From 8acfa2097a86296063bdbb31baac7cea68c70a0e Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 15 Feb 2021 19:27:01 -0800
Subject: [PATCH 001/901] kram/kramv - increase label area, and simplify decode
 with encode/decode routines that go to ram instead of disk.

---
 kramv/KramLoader.mm        |  69 ++++-------------
 kramv/KramViewerMain.mm    |   2 +-
 libkram/kram/KTXImage.cpp  |  16 ++++
 libkram/kram/KTXImage.h    |   4 +
 libkram/kram/KramImage.cpp | 155 ++++++++++++++++++++++++-------------
 libkram/kram/KramImage.h   |   9 +++
 6 files changed, 147 insertions(+), 108 deletions(-)

diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index 7e2c15fa..3ef9c0cb 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -48,69 +48,31 @@ @implementation KramLoader {
 // on macOS/arm, the M1 supports all 3 encode formats
 #define DO_DECODE TARGET_CPU_X86_64
 
-- (BOOL)decodeImageIfNeeded:(KTXImage&)image data:(vector<uint8_t>&)data
+- (BOOL)decodeImageIfNeeded:(KTXImage&)image imageDecoded:(KTXImage&)imageDecoded useImageDecoded:(bool&)useImageDecoded
 {
 #if DO_DECODE
-    MyMTLPixelFormat format = image.pixelFormat;
-
-    // decode to disk, and then load that in place of original
-    // MacIntel can only open BC and explicit formats.
-    FileHelper decodedTmpFile;
-
-    bool useDecode = false;
-    if (isETCFormat(format)) {
-        if (!decodedTmpFile.openTemporaryFile(".ktx", "w+")) {
-            return NO;
-        }
-        
-        Image imageDecode;
-        if (!imageDecode.decode(image, decodedTmpFile.pointer(), kTexEncoderEtcenc, false, "")) {
+    useImageDecoded = false;
+    
+    Image imageUnused; // TODO: move to only using KTXImage, decode needs to move there
+    
+    if (isETCFormat(image.pixelFormat)) {
+        if (!imageUnused.decode(image, imageDecoded, kTexEncoderEtcenc, false, "")) {
             return NO;
         }
-        useDecode = true;
+        useImageDecoded = true;
     }
-    else if (isASTCFormat(format)) {
-        if (!decodedTmpFile.openTemporaryFile(".ktx", "w+")) {
-            return NO;
-        }
-        
-        Image imageDecode;
-        if (!imageDecode.decode(image, decodedTmpFile.pointer(), kTexEncoderAstcenc, false, "")) {
+    else if (isASTCFormat(image.pixelFormat)) {
+        if (!imageUnused.decode(image, imageDecoded, kTexEncoderAstcenc, false, "")) {
             return NO;
         }
     
-        useDecode = true;
+        useImageDecoded = true;
     }
     
     // TODO: decode BC format on iOS when not supported, but viewer only on macOS for now
-
-    if (useDecode) {
-        FILE* fp = decodedTmpFile.pointer();
-        
-        size_t size = decodedTmpFile.size();
-        if (size <= 0) {
-            return NO;
-        }
-        
-        data.resize(size);
-        
-        // have to pull into buffer, this only works with sync load path for now
-        rewind(fp);
-        
-        size_t readBytes = fread(data.data(), 1, size, fp);
-        if (readBytes != size) {
-            fprintf(stderr, "%s\n", strerror(errno));
-            
-            return NO;
-        }
-        
-        image.skipImageLength = false;
-        if (!image.open(data.data(), (int32_t)size)) { // doesn't fail
-            return NO;
-        }
-    }
     
 #endif
+    
     return YES;
 }
     
@@ -126,12 +88,13 @@ - (BOOL)decodeImageIfNeeded:(KTXImage&)image data:(vector<uint8_t>&)data
         *originalFormat = (MTLPixelFormat)image.pixelFormat;
     }
     
-    vector<uint8_t> data;
-    if (![self decodeImageIfNeeded:image data:data]) {
+    KTXImage imageDecoded;
+    bool useImageDecoded = false;
+    if (![self decodeImageIfNeeded:image imageDecoded:imageDecoded useImageDecoded:useImageDecoded]) {
         return nil;
     }
     
-    return [self loadTextureFromImage:image];
+    return [self loadTextureFromImage:useImageDecoded ? imageDecoded : image];
 }
 
 static int32_t numberOfMipmapLevels(const Image& image) {
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 1280bc08..e75e8893 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -371,7 +371,7 @@ - (nonnull ShowSettings*)showSettings {
 - (NSTextField*)_addHud:(BOOL)isShadow
 {
     // add a label for the hud
-    NSTextField *label = [[NSTextField alloc] initWithFrame:NSMakeRect(isShadow ? 11 : 10, isShadow ? 11 : 10, 400, 200)];
+    NSTextField *label = [[NSTextField alloc] initWithFrame:NSMakeRect(isShadow ? 11 : 10, isShadow ? 11 : 10, 800, 300)];
     label.drawsBackground = NO;
     label.textColor = !isShadow ?
         [NSColor colorWithSRGBRed:0 green:1 blue:0 alpha:1] :
diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index b915a447..fba3e0ad 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -1458,5 +1458,21 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength)
     return true;
 }
 
+vector<uint8_t>& KTXImage::imageData() {
+    return imageDataFromKTX2;
+}
+
+void KTXImage::reserveImageData() {
+    int32_t numChunks = totalChunks();
+    const auto& lastMip = mipLevels[header.numberOfMipmapLevels-1];
+    size_t totalKTXSize =
+        lastMip.offset + lastMip.length * numChunks;
+    imageDataFromKTX2.resize(totalKTXSize);
+    memset(imageDataFromKTX2.data(), 0, totalKTXSize);
+    
+    fileDataLength = totalKTXSize;
+    fileData = imageDataFromKTX2.data();
+}
+
 
 }  // namespace kram
diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h
index 8da3b1c7..6ae660df 100644
--- a/libkram/kram/KTXImage.h
+++ b/libkram/kram/KTXImage.h
@@ -207,6 +207,10 @@ class KTXImage {
     //int totalMipLevels() const;
     uint32_t totalChunks() const;
 
+    // this is where KTXImage holds all mip data internally
+    void reserveImageData();
+    vector<uint8_t>& imageData();
+
 private:
     bool openKTX2(const uint8_t* imageData, size_t imageDataLength);
 
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index a1c17174..f32439e2 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -428,7 +428,32 @@ void Image::averageChannelsInBlock(
     }
 }
 
+// this can return on failure to write
+static bool writeDataAtOffset(const uint8_t* data, size_t dataSize, size_t dataOffset, FILE* dstFile, KTXImage& dstImage)
+{
+    if (dstFile) {
+        fseek(dstFile, dataOffset, SEEK_SET);
+        if (!FileHelper::writeBytes(dstFile, data, dataSize))
+            return false;
+    }
+    else {
+        memcpy(dstImage.imageData().data() + dataOffset, data, dataSize);
+    }
+    return true;
+}
+
 bool Image::decode(const KTXImage& srcImage, FILE* dstFile, TexEncoder decoder, bool isVerbose, const string& swizzleText) const
+{
+    KTXImage dstImage;
+    return decodeImpl(srcImage, dstFile, dstImage, decoder, isVerbose, swizzleText);
+}
+
+bool Image::decode(const KTXImage& srcImage, KTXImage& dstImage, TexEncoder decoder, bool isVerbose, const string& swizzleText) const
+{
+    return decodeImpl(srcImage, nullptr, dstImage, decoder, isVerbose, swizzleText);
+}
+
+bool Image::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage& dstImage, TexEncoder decoder, bool isVerbose, const string& swizzleText) const
 {
     // read existing KTX file into mip offset, then start decoding the blocks
     // and write these to 8u,16f,32f ktx with mips
@@ -436,13 +461,14 @@ bool Image::decode(const KTXImage& srcImage, FILE* dstFile, TexEncoder decoder,
 
     // Image sorta represents uncompressed Image mips, not compressed.
     // But wriing things out to dstFile.
-
+    int32_t numChunks = srcImage.totalChunks();
+   
     MyMTLPixelFormat pixelFormat = srcImage.pixelFormat;
     bool isSrgb = isSrgbFormat(pixelFormat);
     bool isHDR = isHdrFormat(pixelFormat);
 
     // setup dstImage
-    KTXImage dstImage;
+    //KTXImage dstImage;
     dstImage = srcImage;  // copy src (name-value pairs copied too)
     
     // important otherwise offsets are wrong if src is ktx2
@@ -473,6 +499,11 @@ bool Image::decode(const KTXImage& srcImage, FILE* dstFile, TexEncoder decoder,
         return false;
     }
     
+    // allocate to hold props and entire image to write out
+    if (!dstFile) {
+        dstImage.reserveImageData();
+    }
+    
     bool success = false;
 
     // 1d textures need to write out 0 width
@@ -483,13 +514,14 @@ bool Image::decode(const KTXImage& srcImage, FILE* dstFile, TexEncoder decoder,
         headerCopy.pixelDepth = 0;
     }
     
+   
     // write the header out
-    if (!FileHelper::writeBytes(dstFile, (const uint8_t*)&headerCopy, sizeof(headerCopy))) {
+    if (!writeDataAtOffset((const uint8_t*)&headerCopy, sizeof(headerCopy), 0, dstFile, dstImage)) {
         return false;
     }
-
+    
     // write out the props
-    if (!FileHelper::writeBytes(dstFile, propsData.data(), propsData.size())) {
+    if (!writeDataAtOffset(propsData.data(), propsData.size(), sizeof(KTXHeader), dstFile, dstImage)) {
         return false;
     }
 
@@ -517,7 +549,6 @@ bool Image::decode(const KTXImage& srcImage, FILE* dstFile, TexEncoder decoder,
 
     // DONE: walk chunks here and seek to src and dst offsets in conversion
     // make sure to walk chunks in the exact same order they are written, array then face, or slice
-    int32_t numChunks = srcImage.totalChunks();
     
     int32_t w = srcImage.width;
     int32_t h = srcImage.height;
@@ -803,19 +834,15 @@ bool Image::decode(const KTXImage& srcImage, FILE* dstFile, TexEncoder decoder,
                     levelSize *= numChunks;
                 }
                 
-                fseek(dstFile, dstMipOffset - sizeof(levelSize), SEEK_SET);  // from begin
-
-                if (!FileHelper::writeBytes(dstFile, (const uint8_t*)&levelSize, sizeof(levelSize))) {
+                if (!writeDataAtOffset((const uint8_t*)&levelSize, sizeof(levelSize), dstMipOffset - sizeof(levelSize), dstFile, dstImage)) {
                     return false;
                 }
             }
             
-            fseek(dstFile, dstMipOffset, SEEK_SET);  // from begin
-            
-            if (!FileHelper::writeBytes(dstFile, outputTexture.data(), dstMipLevel.length)) {
+            if (!writeDataAtOffset(outputTexture.data(), dstMipLevel.length, dstMipOffset, dstFile, dstImage)) {
                 return false;
             }
-
+            
             // next mip level
             mipDown(w, h);
         }
@@ -945,10 +972,23 @@ void Image::heightToNormals(float scale)
     }
 }
 
+bool Image::encode(ImageInfo& info, KTXImage& dstImage) const
+{
+    return encodeImpl(info, nullptr, dstImage);
+}
+
 bool Image::encode(ImageInfo& info, FILE* dstFile) const
 {
-    KTXImage image;
-    KTXHeader& header = image.header;
+    // this will be throw out
+    KTXImage dstImage;
+    return encodeImpl(info, dstFile, dstImage);
+}
+
+
+bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const
+{
+    //KTXImage image;
+    KTXHeader& header = dstImage.header;
 
     vector<Int2> chunkOffsets;
 
@@ -969,10 +1009,10 @@ bool Image::encode(ImageInfo& info, FILE* dstFile) const
     // work out how much memory we need to load
     header.initFormatGL(info.pixelFormat);
 
-    image.pixelFormat = info.pixelFormat;
-    image.textureType = info.textureType;
+    dstImage.pixelFormat = info.pixelFormat;
+    dstImage.textureType = info.textureType;
 
-    image.addFormatProps();
+    dstImage.addFormatProps();
 
     // TODO: caller should really set post swizzle
     string postSwizzleText;
@@ -983,20 +1023,20 @@ bool Image::encode(ImageInfo& info, FILE* dstFile) const
     else if (info.swizzleText == "rrr1")
         postSwizzleText = "r001";  // to match up with BC4/EAC_R11
 
-    image.addSwizzleProps(info.swizzleText.c_str(), postSwizzleText.c_str());
+    dstImage.addSwizzleProps(info.swizzleText.c_str(), postSwizzleText.c_str());
 
     // TODO: caller should really set this, channels and address/filter
     // three letter codes for the channel names so viewer/game can interpret them
     if (info.isNormal) {
-        image.addChannelProps("Nrm.x,Nrm.y,X,X");
+        dstImage.addChannelProps("Nrm.x,Nrm.y,X,X");
     }
     else if (info.isSRGB) {
         // !hasAlpha doesn't change the channel designation
         if (info.isPremultiplied) {
-            image.addChannelProps("Alb.ra,Alb.ga,Alb.ba,Alb.a");
+            dstImage.addChannelProps("Alb.ra,Alb.ga,Alb.ba,Alb.a");
         }
         else {
-            image.addChannelProps("Alb.r,Alb.g,Alb.b,Alb.a");
+            dstImage.addChannelProps("Alb.r,Alb.g,Alb.b,Alb.a");
         }
     }
 
@@ -1005,21 +1045,21 @@ bool Image::encode(ImageInfo& info, FILE* dstFile) const
     // address: Wrap, Clamp, MirrorWrap, MirrorClamp, BorderClamp, BorderClamp0
     // filter: Point, Linear, None (Mip only), TODO: what about Aniso (Mip only + level?)
     //   min/maxLOD too for which range of mips to use, atlas should stop before entries merge
-    if (image.textureType == MyMTLTextureType1DArray) {
-        image.addAddressProps("Rep,X,X");
+    if (dstImage.textureType == MyMTLTextureType1DArray) {
+        dstImage.addAddressProps("Rep,X,X");
     }
-    else if (image.textureType == MyMTLTextureType3D) {
-        image.addAddressProps("Rep,Rep,Rep");
+    else if (dstImage.textureType == MyMTLTextureType3D) {
+        dstImage.addAddressProps("Rep,Rep,Rep");
     }
     else {
-        image.addAddressProps("Rep,Rep,X");
+        dstImage.addAddressProps("Rep,Rep,X");
     }
     
     if (info.doMipmaps) {
-        image.addFilterProps("Lin,Lin,Lin");  // min,mag,mip
+        dstImage.addFilterProps("Lin,Lin,Lin");  // min,mag,mip
     }
     else {
-        image.addFilterProps("Lin,Lin,X");  // min,mag,mip
+        dstImage.addFilterProps("Lin,Lin,X");  // min,mag,mip
     }
    
     // This is hash of source png/ktx file (use xxhash32 or crc32)
@@ -1029,13 +1069,13 @@ bool Image::encode(ImageInfo& info, FILE* dstFile) const
 
     // convert props into a data blob that can be written out
     vector<uint8_t> propsData;
-    image.toPropsData(propsData);
+    dstImage.toPropsData(propsData);
     header.bytesOfKeyValueData = (uint32_t)propsData.size();
 
     //ktxImage.bytesPerBlock = header.blockSize();
     //ktxImage.blockDims = header.blockDims();
 
-    int32_t storageSize = image.mipLevelSize(w, h);
+    int32_t storageSize = dstImage.mipLevelSize(w, h);
 
     // how much to store to store biggest level of ktx (will in-place mip to
     // this)
@@ -1047,7 +1087,7 @@ bool Image::encode(ImageInfo& info, FILE* dstFile) const
     int32_t numMipLevels = 0;
 
     // header only holds pixelFormat, but can generate block info from that
-    computeMipStorage(image, w, h,  // pixelFormat,
+    computeMipStorage(dstImage, w, h,  // pixelFormat,
                       info.doMipmaps, info.mipMinSize, info.mipMaxSize,
                       storageSize, storageSizeTotal, mipStorageSizes,
                       numDstMipLevels, numMipLevels);
@@ -1102,9 +1142,9 @@ bool Image::encode(ImageInfo& info, FILE* dstFile) const
     }
 
     // update image to match
-    image.width = header.pixelWidth;
-    image.height = header.pixelHeight;
-    image.depth = header.pixelDepth;
+    dstImage.width = header.pixelWidth;
+    dstImage.height = header.pixelHeight;
+    dstImage.depth = header.pixelDepth;
 
     // ----------------------------------------------------
 
@@ -1196,7 +1236,14 @@ bool Image::encode(ImageInfo& info, FILE* dstFile) const
             srcImage.pixelsHalf = halfImage.data();
         }
     }
+    
+    int32_t numChunks = (int32_t)chunkOffsets.size();
 
+    // allocate to hold props and entire image to write out
+    if (!dstFile) {
+        dstImage.reserveImageData();
+    }
+    
     // ----------------------------------------------------
 
     Mipper mipper;
@@ -1204,20 +1251,20 @@ bool Image::encode(ImageInfo& info, FILE* dstFile) const
 
     // write the header out
     KTXHeader headerCopy = header;
-    if (image.textureType == MyMTLTextureType1DArray) {
+    if (dstImage.textureType == MyMTLTextureType1DArray) {
         headerCopy.pixelHeight = 0;
         headerCopy.pixelDepth = 0;
     }
-    if (!FileHelper::writeBytes(dstFile, (const uint8_t*)&headerCopy, sizeof(headerCopy))) {
+    if (!writeDataAtOffset((const uint8_t*)&headerCopy, sizeof(headerCopy), 0, dstFile, dstImage)) {
         return false;
     }
 
     // write out the props
-    if (!FileHelper::writeBytes(dstFile, propsData.data(), propsData.size())) {
+    if (!writeDataAtOffset(propsData.data(), propsData.size(), sizeof(KTXHeader), dstFile, dstImage)) {
         return false;
     }
 
-    for (int32_t chunk = 0; chunk < (int32_t)chunkOffsets.size(); ++chunk) {
+    for (int32_t chunk = 0; chunk < numChunks; ++chunk) {
         // this needs to append before chunkOffset copy below
         w = modifiedWidth;
         h = modifiedHeight;
@@ -1276,7 +1323,7 @@ bool Image::encode(ImageInfo& info, FILE* dstFile) const
         }
 
         // doing in-place mips
-        ImageData dstImage = srcImage;
+        ImageData dstImageData = srcImage;
 
         //----------------------------------------------
 
@@ -1303,23 +1350,23 @@ bool Image::encode(ImageInfo& info, FILE* dstFile) const
                 if (!skipMip) {
                     // sdf mipper has to build from origin sourceImage
                     // but it can in-place write to the same dstImage
-                    sdfMipper.mipmap(dstImage, mipLevel);
+                    sdfMipper.mipmap(dstImageData, mipLevel);
 
-                    w = dstImage.width;
-                    h = dstImage.height;
+                    w = dstImageData.width;
+                    h = dstImageData.height;
                 }
             }
             else {
                 // can export existing image for mip 0
                 if (mipLevel > 0) {
                     // have to build the submips even with skipMip
-                    mipper.mipmap(srcImage, dstImage);
+                    mipper.mipmap(srcImage, dstImageData);
 
                     // dst becomes src for next in-place mipmap
-                    srcImage = dstImage;
+                    srcImage = dstImageData;
 
-                    w = dstImage.width;
-                    h = dstImage.height;
+                    w = dstImageData.width;
+                    h = dstImageData.height;
                 }
             }
 
@@ -1336,11 +1383,11 @@ bool Image::encode(ImageInfo& info, FILE* dstFile) const
             //KLOGI("Image", "chunk:%d %d\n", chunk, mipOffset);
 
             // average channels per block if requested (mods 8-bit data on a per block basis)
-            ImageData mipImage = dstImage;
+            ImageData mipImage = dstImageData;
 
             if (!info.averageChannels.empty()) {
                 // this isn't applied to srgb data (what about premul?)
-                averageChannelsInBlock(info.averageChannels.c_str(), image,
+                averageChannelsInBlock(info.averageChannels.c_str(), dstImage,
                                        mipImage, tmpImageData8);
 
                 mipImage.pixels = tmpImageData8.data();
@@ -1349,7 +1396,7 @@ bool Image::encode(ImageInfo& info, FILE* dstFile) const
 
             Timer timer;
             bool success =
-                compressMipLevel(info, image,
+                compressMipLevel(info, dstImage,
                                  mipImage, outputTexture, mipStorageSize);
             assert(success);
 
@@ -1377,19 +1424,19 @@ bool Image::encode(ImageInfo& info, FILE* dstFile) const
                 int32_t levelSizeOf = sizeof(levelSize);
                 assert(levelSizeOf == 4);
 
-                fseek(dstFile, mipOffset - levelSizeOf, SEEK_SET);  // from begin
+                //fseek(dstFile, mipOffset - levelSizeOf, SEEK_SET);  // from begin
 
-                if (!FileHelper::writeBytes(dstFile, (const uint8_t*)&levelSize, levelSizeOf)) {
+                if (!writeDataAtOffset((const uint8_t*)&levelSize, levelSizeOf, mipOffset - levelSizeOf, dstFile, dstImage)) {
                     return false;
                 }
             }
 
-            fseek(dstFile, mipOffset, SEEK_SET);  // from begin
+            //fseek(dstFile, mipOffset, SEEK_SET);  // from begin
 
             // Note that default ktx alignment is 4, so r8u, r16f mips need to be padded out to 4 bytes
             // may need to write these out row by row, and let fseek pad the rows to 4.
 
-            if (!FileHelper::writeBytes(dstFile, outputTexture.data.data(), mipStorageSize)) {
+            if (!writeDataAtOffset(outputTexture.data.data(), mipStorageSize, mipOffset, dstFile, dstImage)) {
                 return false;
             }
         }
diff --git a/libkram/kram/KramImage.h b/libkram/kram/KramImage.h
index cf83409c..2c82f8e0 100644
--- a/libkram/kram/KramImage.h
+++ b/libkram/kram/KramImage.h
@@ -41,9 +41,15 @@ class Image {
 
     bool loadImageFromKTX(const KTXImage& image);
 
+    // encode/ecode to a file
     bool encode(ImageInfo& info, FILE* dstFile) const;
 
     bool decode(const KTXImage& image, FILE* dstFile, TexEncoder decoder, bool isVerbose, const string& swizzleText) const;
+    
+    // encode/decode to a memory block (TODO: change over to returning dstImage holding all data inside)
+    bool encode(ImageInfo& info, KTXImage& dstImage) const;
+
+    bool decode(const KTXImage& image, KTXImage& dstImage, TexEncoder decoder, bool isVerbose, const string& swizzleText) const;
 
     // this is only for 2d images
     bool resizeImage(int32_t wResize, int32_t hResize, bool resizePow2, ImageResizeFilter filter = kImageResizeFilterPoint);
@@ -59,6 +65,9 @@ class Image {
     bool hasAlpha() const { return _hasAlpha; }
 
 private:
+    bool encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const;
+    bool decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage& dstImage, TexEncoder decoder, bool isVerbose, const string& swizzleText) const;
+
     // compute how big mips will be
     void computeMipStorage(const KTXImage& image, int32_t w, int32_t h,
                            bool doMipmaps, int32_t mipMinSize, int32_t mipMaxSize,

From 449f551ede8e65d97385e449d22484289ff5ea74 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 20 Feb 2021 00:31:33 -0800
Subject: [PATCH 002/901] Kram - add non-pow2 downsample for odd -> even in
 mipmapLevelOdd

This is needed to prevent a shift in the image.  Also simplifies the fast path which can ignore the odd case.  This is done single pass with 3x3 pixel area.
9 -> 4, 11 -> 5, etc.  Can see the difference when stepping through mips on Toof-a image.
---
 libkram/kram/KTXMipper.cpp | 237 ++++++++++++++++++++++++++++++++-----
 libkram/kram/KTXMipper.h   |   2 +
 2 files changed, 207 insertions(+), 32 deletions(-)

diff --git a/libkram/kram/KTXMipper.cpp b/libkram/kram/KTXMipper.cpp
index dc732a4d..0718d19e 100644
--- a/libkram/kram/KTXMipper.cpp
+++ b/libkram/kram/KTXMipper.cpp
@@ -304,7 +304,7 @@ void Mipper::mipmap(const ImageData& srcImage, ImageData& dstImage) const
     mipmapLevel(srcImage, dstImage);
 }
 
-void Mipper::mipmapLevel(const ImageData& srcImage, ImageData& dstImage) const
+void Mipper::mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) const
 {
     int32_t width = srcImage.width;
     int32_t height = srcImage.height;
@@ -324,50 +324,223 @@ void Mipper::mipmapLevel(const ImageData& srcImage, ImageData& dstImage) const
     
     int32_t dstIndex = 0;
 
-    // To see the downsampled mip dimensions enable this
-    //    int32_t wDst = width;
-    //    int32_t hDst = height;
-    //    mipDown(wDst, hDst);
-
-    // 535 produces 267.5 -> 267, last pixel in an odd width or height is skipped
-    // this code was incrementing too often at the end
     bool isOddX = width & 1;
     bool isOddY = height & 1;
 
-    for (int32_t y = 0; y < height; y += 2) {
-        // last y row is skipped if odd, this causes a shift
-        if (isOddY) {
-            if (y == (height - 1)) {
-                break;
-            }
-        }
-
+    // advance always by 2, but sample from neighbors
+    int32_t mipWidth = std::max(1, width / 2);
+    int32_t mipHeight = std::max(1, height / 2);
+    
+    float invWidth = 1.0f/width;
+    float invHeight = 1.0f/height;
+    
+    for (int32_t y = isOddY ? 1 : 0; y < height; y += 2) {
+        int32_t ym = y - 1;
         int32_t y0 = y;
         int32_t y1 = y + 1;
-        if (y1 == height) {
-            y1 = y;
+
+        // weights
+        int32_t mipY = y/2;
+        float ymw = (mipHeight - mipY - 1) * invHeight;
+        float y0w = mipHeight * invHeight;
+        float y1w = mipY * invHeight;
+        
+        if (!isOddY) {
+            ym = y; // weight is 0
+            
+            ymw = 0.0f;
+            y0w = 0.5f;
+            y1w = 0.5f;
         }
+        
+        ym *= width;
         y0 *= width;
         y1 *= width;
 
-        for (int32_t x = 0; x < width; x += 2) {
-            // last x column is skipped if odd, this causes a shift
-            if (isOddX) {
-                if (x == (width - 1)) {
-                    break;
+        for (int32_t x = isOddX ? 1 : 0; x < width; x += 2) {
+ 
+            int32_t xm = x - 1;
+            int32_t x0 = x;
+            int32_t x1 = x + 1;
+ 
+            // weights
+            int32_t mipX = x/2;
+            float xmw = (mipWidth - mipX - 1) * invWidth;
+            float x0w = mipWidth * invWidth;
+            float x1w = mipX * invWidth;
+            
+            if (!isOddX) {
+                xm = x; // weight is 0
+                
+                xmw = 0.0f;
+                x0w = 0.5f;
+                x1w = 0.5f;
+            }
+            
+            // we have 3x2, 2x3 or 3x3 pattern to weight
+            // now lookup the 9 values from the buffer
+            
+            float4 c[9];
+            
+            if (srcHalf) {
+                c[0] = toFloat4(srcHalf[ym + xm]);
+                c[1] = toFloat4(srcHalf[ym + x0]);
+                c[2] = toFloat4(srcHalf[ym + x1]);
+                
+                c[3] = toFloat4(srcHalf[y0 + xm]);
+                c[4] = toFloat4(srcHalf[y0 + x0]);
+                c[5] = toFloat4(srcHalf[y0 + x1]);
+                
+                c[6] = toFloat4(srcHalf[y1 + xm]);
+                c[7] = toFloat4(srcHalf[y1 + x0]);
+                c[8] = toFloat4(srcHalf[y1 + x1]);
+            }
+            else if (srcFloat) {
+                c[0] = srcFloat[ym + xm];
+                c[1] = srcFloat[ym + x0];
+                c[2] = srcFloat[ym + x1];
+                
+                c[3] = srcFloat[y0 + xm];
+                c[4] = srcFloat[y0 + x0];
+                c[5] = srcFloat[y0 + x1];
+                
+                c[6] = srcFloat[y1 + xm];
+                c[7] = srcFloat[y1 + x0];
+                c[8] = srcFloat[y1 + x1];
+            }
+            else {
+                c[0] = ColorToUnormFloat4(srcColor[ym + xm]);
+                c[1] = ColorToUnormFloat4(srcColor[ym + x0]);
+                c[2] = ColorToUnormFloat4(srcColor[ym + x1]);
+                
+                c[3] = ColorToUnormFloat4(srcColor[y0 + xm]);
+                c[4] = ColorToUnormFloat4(srcColor[y0 + x0]);
+                c[5] = ColorToUnormFloat4(srcColor[y0 + x1]);
+                
+                c[6] = ColorToUnormFloat4(srcColor[y1 + xm]);
+                c[7] = ColorToUnormFloat4(srcColor[y1 + x0]);
+                c[8] = ColorToUnormFloat4(srcColor[y1 + x1]);
+            }
+                
+            // apply weights to columns/rows
+            for (int32_t i = 0; i < 3; i++) {
+                c[3*i+0] *= xmw;
+                c[3*i+1] *= x0w;
+                c[3*i+2] *= x1w;
+            }
+             
+            for (int32_t i = 0; i < 3; i++) {
+                c[0+i] *= ymw;
+                c[3+i] *= y0w;
+                c[6+i] *= y1w;
+            }
+                
+            // add them all up
+            float4 cFloat = c[0];
+            for (int32_t i = 1; i < 9; ++i) {
+                cFloat += c[i];
+            }
+                
+            if (srcHalf) {
+
+                // overwrite float4 image
+                cDstHalf[dstIndex] = toHalf4(cFloat);
+
+                // assume hdr pulls from half/float data
+                if (!srcImage.isHDR) {
+                    // convert back to srgb for encode
+                    if (srcImage.isSRGB) {
+                        cFloat.x = linearToSRGBFunc(cFloat.x);
+                        cFloat.y = linearToSRGBFunc(cFloat.y);
+                        cFloat.z = linearToSRGBFunc(cFloat.z);
+                    }
+
+                    // override rgba8u version, since this is what is encoded
+                    Color c = Unormfloat4ToColor(cFloat);
+
+                    // can only skip this if cSrc = cDst
+                    cDstColor[dstIndex] = c;
                 }
             }
+            else if (srcFloat) {
 
-            int32_t x1 = x + 1;
-            if (x1 == width) {
-                x1 = x;
+                // overwrite float4 image
+                cDstFloat[dstIndex] = cFloat;
+
+                // assume hdr pulls from half/float data
+                if (!srcImage.isHDR) {
+                    // convert back to srgb for encode
+                    if (srcImage.isSRGB) {
+                        cFloat.x = linearToSRGBFunc(cFloat.x);
+                        cFloat.y = linearToSRGBFunc(cFloat.y);
+                        cFloat.z = linearToSRGBFunc(cFloat.z);
+                    }
+
+                    // Overwrite the RGBA8u image too (this will go out to
+                    // encoder) that means BC/ASTC are linearly fit to
+                    // non-linear srgb colors - ick
+                    Color c = Unormfloat4ToColor(cFloat);
+                    cDstColor[dstIndex] = c;
+                }
             }
+            else {
+
+                // can overwrite memory on linear image, some precision loss, but fast
+                Color c = Unormfloat4ToColor(cFloat);
+                cDstColor[dstIndex] = c;
+            }
+
+            dstIndex++;
+        }
+    }
+}
+
+    
+void Mipper::mipmapLevel(const ImageData& srcImage, ImageData& dstImage) const
+{
+    int32_t width = srcImage.width;
+    int32_t height = srcImage.height;
+
+    bool isOddX = width & 1;
+    bool isOddY = height & 1;
+    
+    if (isOddX || isOddY) {
+        mipmapLevelOdd(srcImage, dstImage);
+        return;
+    }
+    
+    // fast path for 2x2 downsample below, can do in 4 taps
+    
+    // this can receive premul, srgb data
+    // the mip chain is linear data only
+    Color* cDstColor = dstImage.pixels;
+    const Color* srcColor = srcImage.pixels;
+
+    float4* cDstFloat = dstImage.pixelsFloat;
+    const float4* srcFloat = srcImage.pixelsFloat;
+
+    half4* cDstHalf = dstImage.pixelsHalf;
+    const half4* srcHalf = srcImage.pixelsHalf;
+    
+    // Note the ptrs above may point to same memory
+    
+    int32_t dstIndex = 0;
+
+    for (int32_t y = 0; y < height; y += 2) {
+        int32_t y0 = y;
+        int32_t y1 = y + 1;
+        y0 *= width;
+        y1 *= width;
+
+        for (int32_t x = 0; x < width; x += 2) {
+            int32_t x0 = x;
+            int32_t x1 = x + 1;
 
             if (srcHalf) {
                 float4 c0, c1, c2, c3;
-                c0 = toFloat4(srcHalf[y0 + x]);
+                c0 = toFloat4(srcHalf[y0 + x0]);
                 c1 = toFloat4(srcHalf[y0 + x1]);
-                c2 = toFloat4(srcHalf[y1 + x]);
+                c2 = toFloat4(srcHalf[y1 + x0]);
                 c3 = toFloat4(srcHalf[y1 + x1]);
 
                 // mip filter is simple box filter
@@ -394,10 +567,10 @@ void Mipper::mipmapLevel(const ImageData& srcImage, ImageData& dstImage) const
                 }
             }
             else if (srcFloat) {
-                const float4& c0 = srcFloat[y0 + x];
+                const float4& c0 = srcFloat[y0 + x0];
                 const float4& c1 = srcFloat[y0 + x1];
 
-                const float4& c2 = srcFloat[y1 + x];
+                const float4& c2 = srcFloat[y1 + x0];
                 const float4& c3 = srcFloat[y1 + x1];
 
                 // mip filter is simple box filter
@@ -425,10 +598,10 @@ void Mipper::mipmapLevel(const ImageData& srcImage, ImageData& dstImage) const
             }
             else {
                 // faster 8-bit only path for LDR and unmultiplied
-                const Color& c0 = srcColor[y0 + x];
+                const Color& c0 = srcColor[y0 + x0];
                 const Color& c1 = srcColor[y0 + x1];
 
-                const Color& c2 = srcColor[y1 + x];
+                const Color& c2 = srcColor[y1 + x0];
                 const Color& c3 = srcColor[y1 + x1];
 
                 // 8-bit box filter, with +2/4 for rounding
diff --git a/libkram/kram/KTXMipper.h b/libkram/kram/KTXMipper.h
index 2a5a97f2..ca991a60 100644
--- a/libkram/kram/KTXMipper.h
+++ b/libkram/kram/KTXMipper.h
@@ -73,6 +73,8 @@ class Mipper {
 
 private:
     void mipmapLevel(const ImageData &srcImage, ImageData &dstImage) const;
+    
+    void mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) const;
 };
 
 }  // namespace kram

From 727160bd6af2306f4693582efc82d72209a4ebda Mon Sep 17 00:00:00 2001
From: Alec Miller <alecazam@users.noreply.github.com>
Date: Sat, 20 Feb 2021 00:38:27 -0800
Subject: [PATCH 003/901] Update README.md

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 290a4bef..4acb0d48 100644
--- a/README.md
+++ b/README.md
@@ -302,7 +302,7 @@ kram includes additional open-source:
 * Tile command for SVT tiling
 * Block twiddling support for consoles
 * Merge command to combine images (similar to ImageMagick)
-* Atlas command to atlas to 2D and 2D array textures.  Display names, show bounds of atlases.
+* Atlas command to atlas to 2D and 2D array textures.  Display names, show bounds of atlases.  Have -chunks arg now.
 * 3D chart flattening.
 * Motion vector direction analysis.
 * Split view comparison rendering.  Move horizontal slider like ShaderToy.
@@ -541,7 +541,7 @@ ASTC doesn't compress and RDO as tightly.
 
 ### On mip calculations and non-power-of-two textures
 
-With the exception of PVRTC, the block encoded formats support non-power-of-two mipmaps.  But very little literature talks about how mips are calculated.  D3D first used round-down mips, GL followed suit, and Metal/Vulkan followed suit.  Round down cuts out a mip level, and does a floor of the mip levels.   Round-up mips generally have a better mapping to the upper with a simple box filter.  kram hasn't adjusted it's box filter to adjust for this yet, but there are links into the code to articles about how to better weight pixels.  The kram box filter is correct for power-of-two mipgen, but should be improved for these cases.
+With the exception of PVRTC, the block encoded formats support non-power-of-two mipmaps.  But very little literature talks about how mips are calculated.  OpenGL/D3D first used round-down mips, and Metal/Vulkan had to follow suit.  Round down cuts out a mip level, and does a floor of the mip levels.   Round-up mips generally have a better mapping to the upper with a simple box filter.  kram now has reasonable cases for pow2 and non-pow2 mip generation.  Odd source pixel counts have to shift weights as leftmost/rightmost pixels contribute more on the left/right sides, and avoid a shift in image pixels.
 
 ```
 Round Down

From 6c4fb304f32d3b8ddb9f1a1a2910a683222a5e50 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecazam@users.noreply.github.com>
Date: Sun, 21 Feb 2021 01:29:29 -0800
Subject: [PATCH 004/901] Update README.md

---
 README.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/README.md b/README.md
index 4acb0d48..81024c36 100644
--- a/README.md
+++ b/README.md
@@ -7,6 +7,9 @@ C++11 library from 200 to 800KB in size depending on encoder options.  Compiles
 # kramv.app
 ObjC++ Viewer for PNG/KTX supported files from kram.  530KB in size.  Uses Metal compute and shaders, eyedropper, grids, debugging, preview.  Supports HDR and all texture types.  Mip, face, and array access.  No dmg yet, just drop onto /Applications folder, and then run scripts/fixfinder.sh to flush LaunchServices (see below).  Runs on macOS (ARM/Intel).
 
+Diagrams and screenshots can be located here:
+https://www.figma.com/file/bPmPSpBGTi2xTVnBDqVEq0/kram
+
 #### Releases includes builds for macOS (Xcode 12.3 - arm64/x64) and Windows x64 (VS 2019 - x64).  libkram can be built for iOS/Android.
 
 ### About kram

From 209b0c0878be73e5a698365efc1fc6e3fa784d95 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 21 Feb 2021 21:13:24 -0800
Subject: [PATCH 005/901] CMake - add SDK comparison, and use CACHE variable on
 deployment/arch

---
 CMakeLists.txt | 76 +++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 69 insertions(+), 7 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 692c2150..d643a537 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -10,11 +10,17 @@ endif()
 # https://cmake.org/cmake/help/latest/policy/CMP0077.html#policy:CMP0077
 #cmake_policy(SET CMP0077 NEW)
 
-set(UNIXBUILD FALSE)
+set(BUILD_UNIX FALSE)
+set(BUILD_IOS FALSE)
 if (APPLE)
-    message("build for macOS")
+    if (CMAKE_SYSTEM_NAME STREQUAL "iOS")
+        message("build for iOS")
+        set(BUILD_IOS TRUE)
+    else()
+        message("build for macOS")
+    endif()
 elseif (WIN32)
-    message("build for win")
+    message("build for win x64")
 elseif (UNIX AND NOT APPLE)
     message("build for unix")
     set(UNIXBUILD TRUE)
@@ -45,8 +51,16 @@ set(CMAKE_CXX_EXTENSIONS NO)
 # set(CMAKE_OSX_SYSROOT macosx11.0)
 # set(CMAKE_OSX_SYSROOT macos)  # this doesn't work
 
-set(CMAKE_OSX_DEPLOYMENT_TARGET 10.14)
-set(CMAKE_OSX_ARCHITECTURES "$(ARCHS_STANDARD)")
+# CMAKE_OSX_DEPLOYMENT_TARGET must be set as a CACHE variable, or it will be stripped
+if (APPLE)
+    if (BUILD_IOS)
+        set(CMAKE_OSX_DEPLOYMENT_TARGET "11.0" CACHE STRING "Minimum iOS")
+        set(CMAKE_OSX_ARCHITECTURES "$(ARCHS_STANDARD)" CACHE STRING "Architecture iOS")
+    else()
+        set(CMAKE_OSX_DEPLOYMENT_TARGET "10.14" CACHE STRING "Minimum macOS")
+        set(CMAKE_OSX_ARCHITECTURES "$(ARCHS_STANDARD)" CACHE STRING "Architecture macOS")
+    endif()
+endif()
 
 set(CMAKE_CONFIGURATION_TYPES "Debug;Release")
 set(CMAKE_BUILD_TYPE Release)
@@ -60,9 +74,9 @@ set(CMAKE_DEFAULT_STARTUP_PROJECT "kram")
 set(myTargetWorkspace kramWorkspace)
 
 if (APPLE)
-project(${myTargetWorkspace} LANGUAGES C CXX OBJCXX)
+    project(${myTargetWorkspace} LANGUAGES C CXX OBJCXX)
 else()
-project(${myTargetWorkspace} LANGUAGES C CXX)
+    project(${myTargetWorkspace} LANGUAGES C CXX)
 endif()
 
 # the kram static library libkram which should build on iOS/Android/Mac/Win
@@ -79,6 +93,54 @@ endif()
 
 #-----------------------------------------------------
 
+# https://discourse.cmake.org/t/specifying-cmake-osx-sysroot-breaks-xcode-projects-but-no-other-choice/2532/8
+# use snipet from Alian Martin to validate SDK
+
+if (APPLE)
+    if(NOT DEFINED CMAKE_OSX_SYSROOT)
+        message(FATAL_ERROR "Cannot check SDK version if CMAKE_OSX_SYSROOT is not defined."
+    )
+    endif()
+
+    # check the Xcode app itself for it's version
+    set(XCODE_MIN_APP 12.2)
+    if(XCODE AND XCODE_VERSION VERSION_LESS XCODE_MIN_APP)
+        message(FATAL_ERROR "This project requires at least Xcode ${XCODE_MIN_APP}")
+    endif()
+
+    # check the SDK
+    set(XCODE_MIN_SDK_IOS, 14.0)
+    set(XCODE_MIN_SDK_MACOS, 11.0)
+
+    execute_process(
+        COMMAND xcrun --sdk "${CMAKE_OSX_SYSROOT}" --show-sdk-version
+        OUTPUT_VARIABLE SDK_VERSION
+        OUTPUT_STRIP_TRAILING_WHITESPACE
+    )
+
+    if (BUILD_IOS)
+        message("iOS SDK ${SDK_VERSION}")
+        message("iOS deploy ${CMAKE_OSX_DEPLOYMENT_TARGET}")
+        message("iOS arch ${CMAKE_OSX_ARCHITECTURES}")
+                
+        if (SDK_VERSION VERSION_LESS XCODE_MIN_SDK_IOS)
+            message(FATAL_ERROR "This project requires at least iPhoneOS ${XCODE_MIN_SDK_IOS}"
+        )
+        endif()
+    else()
+        message("macOS SDK ${SDK_VERSION}")
+        message("macOS deploy ${CMAKE_OSX_DEPLOYMENT_TARGET}")
+        message("macOS arch ${CMAKE_OSX_ARCHITECTURES}")
+                        
+        if (SDK_VERSION VERSION_LESS XCODE_MIN_SDK_MACOS)
+            message(FATAL_ERROR "This project requires at least macOS SDK ${XCODE_MIN_SDK_MACOS}"
+        )
+        endif()
+    endif()
+endif()
+
+#-----------------------------------------------------
+
 set(BIN_DIR ${PROJECT_SOURCE_DIR}/bin)
 
 # install doesn't seem to do anything on WIN32, the build elements are not copied

From dc45672b408b2539ac6e3931079462c3c07cb066 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 22 Feb 2021 09:44:22 -0800
Subject: [PATCH 006/901] kram - rename mipper

---
 kramv/KramViewerMain.mm                        | 14 +++++++++-----
 libkram/kram/KramImage.cpp                     |  7 ++++---
 libkram/kram/KramImage.h                       |  2 +-
 libkram/kram/KramImageInfo.h                   |  2 +-
 libkram/kram/{KTXMipper.cpp => KramMipper.cpp} |  2 +-
 libkram/kram/{KTXMipper.h => KramMipper.h}     |  0
 libkram/kram/KramSDFMipper.cpp                 |  2 +-
 7 files changed, 17 insertions(+), 12 deletions(-)
 rename libkram/kram/{KTXMipper.cpp => KramMipper.cpp} (99%)
 rename libkram/kram/{KTXMipper.h => KramMipper.h} (100%)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index e75e8893..08b78d75 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -19,7 +19,7 @@
 #import "KramRenderer.h"
 #import "KramShaders.h"
 #include "KramLog.h"
-#include "KTXMipper.h"
+#include "KramMipper.h"
 #include "KramMmapHelper.h"
 #include "KramImage.h"
 #include "KramViewerBase.h"
@@ -261,7 +261,10 @@ MyMTLPixelFormat encodeSrcTextureAsFormat(MyMTLPixelFormat currentFormat, bool i
 void encodeSrcForEncodeComparisons(bool increment) {
     auto newFormat = encodeSrcTextureAsFormat(displayedFormat, increment);
     
-     // TODO: have to encode and then decode astc on macOS-Intel
+    // This is really only useful for variable block size formats like ASTC
+    // maybe some value in BC7 to BC1 comparison (original vs. BC7 vs. BC1)
+ 
+     // TODO: have to encode and then decode astc/etc on macOS-Intel
      // load png and keep it around, and then call encode and then diff the image against the original pixels
      // 565 will always differ from the original.
      
@@ -275,10 +278,11 @@ void encodeSrcForEncodeComparisons(bool increment) {
     // encode incremented format and cache, that way don't wait too long
     // and once all encode formats generated, can cycle through them until next image loaded
     
-    //KTXImage image;
+    // Could reuse the same buffer for all ASTC formats, larger blocks always need less mem
+    //KramImage image; // TODO: move encode to KTXImage, convert png to one layer KTXImage
     //image.open(...);
-    //image.encode();
-    //decodeIfNeeded(...);
+    //image.encode(dstImage);
+    //decodeIfNeeded(dstImage, dstImageDecoded);
     //comparisonTexture = [createImage:image];
     //set that onto the shader to diff against after recontruct
     
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index f32439e2..028f5ded 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -39,7 +39,7 @@
 #include <algorithm>
 
 #include "KTXImage.h"
-#include "KTXMipper.h"
+#include "KramMipper.h"
 #include "KramFileHelper.h"
 #include "KramSDFMipper.h"
 #include "KramTimer.h"
@@ -85,6 +85,7 @@ Image::Image() : _width(0), _height(0), _hasColor(false), _hasAlpha(false)
 {
 }
 
+// TODO: eliminate this and Image class, use KTXImage everywhere so can have explicit mip chains
 bool Image::loadImageFromKTX(const KTXImage& image)
 {
     // copy the data into a contiguous array
@@ -444,7 +445,7 @@ static bool writeDataAtOffset(const uint8_t* data, size_t dataSize, size_t dataO
 
 bool Image::decode(const KTXImage& srcImage, FILE* dstFile, TexEncoder decoder, bool isVerbose, const string& swizzleText) const
 {
-    KTXImage dstImage;
+    KTXImage dstImage; // thrown out, data written to file
     return decodeImpl(srcImage, dstFile, dstImage, decoder, isVerbose, swizzleText);
 }
 
@@ -2087,7 +2088,7 @@ bool Image::compressMipLevel(const ImageInfo& info, KTXImage& image,
 #if 0
             // This hackimproves L1 and LA block generating
             // even enabled dual-plane mode for LA.  Otherwise rgb and rgba blocks
-            // are generated on data that only contains L or LA blocks.
+            // are generated on data that only contain L or LA blocks.
 
             bool useUniqueChannels = true;
             if (useUniqueChannels) {
diff --git a/libkram/kram/KramImage.h b/libkram/kram/KramImage.h
index 2c82f8e0..6fcdfdb9 100644
--- a/libkram/kram/KramImage.h
+++ b/libkram/kram/KramImage.h
@@ -8,7 +8,7 @@
 #include <vector>
 
 #include "KTXImage.h"  // for MyMTLTextureType
-#include "KTXMipper.h"
+#include "KramMipper.h"
 #include "KramConfig.h"
 #include "KramImageInfo.h"
 
diff --git a/libkram/kram/KramImageInfo.h b/libkram/kram/KramImageInfo.h
index bbb23088..2cef4c2b 100644
--- a/libkram/kram/KramImageInfo.h
+++ b/libkram/kram/KramImageInfo.h
@@ -8,7 +8,7 @@
 #include <vector>
 
 #include "KTXImage.h"
-#include "KTXMipper.h"  // for Color
+#include "KramMipper.h"  // for Color
 #include "KramConfig.h"
 
 namespace kram {
diff --git a/libkram/kram/KTXMipper.cpp b/libkram/kram/KramMipper.cpp
similarity index 99%
rename from libkram/kram/KTXMipper.cpp
rename to libkram/kram/KramMipper.cpp
index 0718d19e..f845a6d6 100644
--- a/libkram/kram/KTXMipper.cpp
+++ b/libkram/kram/KramMipper.cpp
@@ -2,7 +2,7 @@
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
-#include "KTXMipper.h"
+#include "KramMipper.h"
 
 #include <algorithm>
 #include <cassert>
diff --git a/libkram/kram/KTXMipper.h b/libkram/kram/KramMipper.h
similarity index 100%
rename from libkram/kram/KTXMipper.h
rename to libkram/kram/KramMipper.h
diff --git a/libkram/kram/KramSDFMipper.cpp b/libkram/kram/KramSDFMipper.cpp
index 92d8a9bc..1d51e4d1 100644
--- a/libkram/kram/KramSDFMipper.cpp
+++ b/libkram/kram/KramSDFMipper.cpp
@@ -6,7 +6,7 @@
 
 #include <algorithm>
 
-#include "KTXMipper.h"
+#include "KramMipper.h"
 
 namespace kram {
 using namespace heman;

From d427f7c67be31fd14285f23510e5721b94564bae Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 22 Feb 2021 09:45:39 -0800
Subject: [PATCH 007/901] kramv - support DebugPosX on SDF and don't offset the
 comparison.  Highlight color now purple.

---
 kramv/KramShaders.metal  | 13 ++++++++++---
 kramv/KramViewerBase.cpp |  4 ++--
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index 9e43e9cd..97e76179 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -337,8 +337,15 @@ float4 DrawPixels(
         // adding some slop here so that flat areas don't flood the visual with red
         else if (uniforms.debugMode == ShDebugModePosX) {
             // two channels here, would need to color each channel
-            if (c.r >= 0.5 + 0.05) {
-                isHighlighted = true;
+            if (uniforms.isSDF) {
+                if (c.r >= 0.5) {
+                    isHighlighted = true;
+                }
+            }
+            else {
+                if (c.r >= 0.5 + 0.05) {
+                    isHighlighted = true;
+                }
             }
         }
         else if (uniforms.debugMode == ShDebugModePosY) {
@@ -350,7 +357,7 @@ float4 DrawPixels(
         // TODO: is it best to highlight the interest pixels in red
         // or the negation of that to see which ones aren't.
         if (isHighlighted) {
-            float3 highlightColor = float3(1.0f, 0.0f, 0.0f);
+            float3 highlightColor = float3(1.0f, 0.0f, 1.0f);
             c.rgb = highlightColor;
         }
         
diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index e468f569..e1edac33 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -47,10 +47,10 @@ void ShowSettings::advanceDebugMode(bool isShiftKeyDown) {
     // if (_showSettings->debugMode == DebugModeGray && !hasColor) advanceDebugMode(isShiftKeyDown);
 
     // for normals show directions
-    if (debugMode == DebugModePosX && !isNormal) {
+    if (debugMode == DebugModePosX && !(isNormal || isSDF)) {
         advanceDebugMode(isShiftKeyDown);
     }
-    if (debugMode == DebugModePosY && !isNormal) {
+    if (debugMode == DebugModePosY && !(isNormal)) {
         advanceDebugMode(isShiftKeyDown);
     }
     

From b5c52237b55bfec0d45d170addc9875de51be744 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecazam@users.noreply.github.com>
Date: Tue, 23 Feb 2021 10:07:35 -0800
Subject: [PATCH 008/901] Update README.md

---
 README.md | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 81024c36..425f342a 100644
--- a/README.md
+++ b/README.md
@@ -609,17 +609,19 @@ The encoders all have to encode non-linear srgb point clouds, which isn't correc
 
 ```
 
-### On texture alases (TODO:)
+### On texture atlases and charts (TODO:)
 
-2D atlas packing works for source textures, but suffers from many issues.  Often packed by hand or algorithm, the results look great as PNG where there are no mips and no block encoding. But the images break down once textures are block encoded. These are some of the complex problems:
+2D atlas packing works for source textures like particle flipbooks, but suffers from many issues.  Often packed by hand or algorithm, the results look great as PNG, but break down once mipped and block encoded. These are some of the complex problems:
 
-* Mip bleed - Solved with mip lod clamping.
-* Alignment bleed - Solved with padding.
+* Mip bleed - Solved with mip lod clamping or disabling mips.
+* Alignment bleed - Solved with padding to smallest visible mip blocks.
 * Block bleed - Solved with pow2 blocks - 4x4 scales down to 2x2 and 1x1.  6x6 scales to non-integral 3x3 and 1.5x1.5.
 * Clamp only - Solved by disabling wrap/mirror modes and uv scaling.
 * Complex pack - stb_rect_pack tightly pack images to a 2d area without accounting for bleed issues
  
 kram will soon offer an atlas mode that uses ES3-level 2d array textures.  These waste some space, but are much simpler to pack, provide a full encoded mip chain with any block type, and also avoid the 5 problems mentioned above.  Named atlas entries reference a given array element, but could be repacked and remapped as used to a smaller atlas.  Dropping mip levels can be done across all entries, but is a little harder for a single array element.  Sparse textures work for 2d array textures, but often the min sparse size is 256x256 (64K) or 128x128 (16K) and the rest is the packed mip tail.  Can draw many types of objects and particles with only a single texture array.
 
-The idea is to copy all atlased images to a 2d vertical strip.  This makes row-byte handling simpler.  Then kram can already convert a vertical strip to a 2D array, and the output rectangle, array index, mip range, and altas names are tracked as well.  But there is some subtlety to copy smaller textures to the smaller mips and use sampler mip clamping.
+The idea is to copy all atlased images to a 2d vertical strip.  This makes row-byte handling simpler.  Then kram can already convert a vertical strip to a 2D array, and the output rectangle, array index, mip range, and altas names are tracked as well.  But there is some subtlety to copy smaller textures to the smaller mips and use sampler mip clamping.  Non-pow2 textures will have transparent fill around the sides.
+
+Apps like Substance Painter use charts of unwrapped UV.  These need to be gapped and aligned to block sizes to avoid the problems above.  Often times the gap is too small (1px) for the mipchain, and instead the algorithms cover up the issue by dilating colors into the gutter regions, so that black outlines are not visible.  thelka_atlas, xatlas, and other utilities can build these charts.
 

From f8e38d09e393082dd903fcb0916f8d9d46f476f0 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 3 Mar 2021 10:25:29 -0800
Subject: [PATCH 009/901] kram - fixes to memory encode for plugin, log format
 checks, expose KTX2 header

These were fixes to support plugin.
---
 libkram/kram/KTXImage.cpp  | 38 --------------------------
 libkram/kram/KTXImage.h    | 44 +++++++++++++++++++++++++++++-
 libkram/kram/Kram.cpp      |  7 ++---
 libkram/kram/KramImage.cpp | 20 ++++++++++++--
 libkram/kram/KramImage.h   |  2 +-
 libkram/kram/KramLog.cpp   | 55 +++++++++++++++++++++++++++-----------
 libkram/kram/KramLog.h     | 17 ++++++++++--
 libkram/kram/sse2neon.h    |  2 ++
 8 files changed, 123 insertions(+), 62 deletions(-)

diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index fba3e0ad..34698a01 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -1205,45 +1205,7 @@ class KTX2ImageLevel {
     uint64_t length; // size of a single mip
 };
 
-// Mips are reversed from KTX1 (mips are smallest first for streaming),
-// and this stores an array of supercompressed levels, and has dfds.
-class KTX2Header {
-public:
-
-    uint8_t identifier[12] = { // same is kKTX2Identifier
-        0xAB, 0x4B, 0x54, 0x58, 0x20, 0x32, 0x30, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A
-        // '«', 'K', 'T', 'X', ' ', '2', '0', '»', '\r', '\n', '\x1A', '\n'
-    };
-
-    uint32_t vkFormat = 0; // invalid
-    uint32_t typeSize = 1;
-    
-    uint32_t pixelWidth = 1;
-    uint32_t pixelHeight = 0;
-    uint32_t pixelDepth = 0;
-
-    uint32_t layerCount = 0;
-    uint32_t faceCount = 1;
-    uint32_t levelCount = 1;
-    uint32_t supercompressionScheme = 0;
 
-    // Index
-
-    // dfd block
-    uint32_t dfdByteOffset = 0;
-    uint32_t dfdByteLength = 0;
-
-    // key-value
-    uint32_t kvdByteOffset = 0;
-    uint32_t kvdByteLength = 0;
-
-    // supercompress global data
-    uint64_t sgdByteOffset = 0;
-    uint64_t sgdByteLength = 0;
-
-    // chunks hold levelCount of all mips of the same size
-    // KTX2ImageChunk* chunks; // [levelCount]
-};
 
 //// Data Format Descriptor
 //uint32_t dfdTotalSize = 0;
diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h
index 6ae660df..ac11b27c 100644
--- a/libkram/kram/KTXImage.h
+++ b/libkram/kram/KTXImage.h
@@ -167,6 +167,48 @@ class KTXHeader {
 
 //---------------------------------------------
 
+// Mips are reversed from KTX1 (mips are smallest first for streaming),
+// and this stores an array of supercompressed levels, and has dfds.
+class KTX2Header {
+public:
+
+    uint8_t identifier[12] = { // same is kKTX2Identifier
+        0xAB, 0x4B, 0x54, 0x58, 0x20, 0x32, 0x30, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A
+        // '«', 'K', 'T', 'X', ' ', '2', '0', '»', '\r', '\n', '\x1A', '\n'
+    };
+
+    uint32_t vkFormat = 0; // invalid
+    uint32_t typeSize = 1;
+    
+    uint32_t pixelWidth = 1;
+    uint32_t pixelHeight = 0;
+    uint32_t pixelDepth = 0;
+
+    uint32_t layerCount = 0;
+    uint32_t faceCount = 1;
+    uint32_t levelCount = 1;
+    uint32_t supercompressionScheme = 0;
+
+    // Index
+
+    // dfd block
+    uint32_t dfdByteOffset = 0;
+    uint32_t dfdByteLength = 0;
+
+    // key-value
+    uint32_t kvdByteOffset = 0;
+    uint32_t kvdByteLength = 0;
+
+    // supercompress global data
+    uint64_t sgdByteOffset = 0;
+    uint64_t sgdByteLength = 0;
+
+    // chunks hold levelCount of all mips of the same size
+    // KTX2ImageChunk* chunks; // [levelCount]
+};
+
+//---------------------------------------------
+
 // This is one entire level of mipLevels.
 class KTXImageLevel {
 public:
@@ -233,7 +275,7 @@ class KTXImage {
     KTXHeader header;  // copy of KTXHeader, so can be modified and then written back
 
     // write out only string/string props, for easy of viewing
-    vector<pair<string, string>> props;
+    vector<pair<string, string> > props;
 
     vector<KTXImageLevel> mipLevels;  // offsets into fileData
 
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 3f97703e..ee375592 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -10,6 +10,7 @@
 #include <atomic>
 #include <cmath>
 #include <ctime>
+#include <inttypes.h>
 #include <string>
 #include <vector>
 
@@ -1269,7 +1270,7 @@ string kramInfoPNGToString(const string& srcFilename, const uint8_t* data, uint6
     bool isMB = (dataSize > (512 * 1024));
     sprintf(tmp,
             "file: %s\n"
-            "size: %d\n"
+            "size: %" PRIu64 "\n"
             "sizm: %0.3f %s\n",
             srcFilename.c_str(),
             dataSize,
@@ -1417,7 +1418,7 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage,
                     "mipd: %dx%d\n"
                     "mips: %zu\n"
                     "mipc: %dx\n"
-                    "mipo: %zu\n",
+                    "mipo: %" PRIu64 "\n",
                     w, h, mipLevel++, mip.length, srcImage.totalChunks(), mip.offset);
             info += tmp;
 
@@ -1991,7 +1992,7 @@ static int32_t kramAppEncode(vector<const char*>& args)
         
         if (success) {
             success = srcImage.encode(info, tmpFileHelper.pointer());
-
+            
             if (!success) {
                 KLOGE("Kram", "encode failed");
             }
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index 028f5ded..338455a3 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -85,7 +85,21 @@ Image::Image() : _width(0), _height(0), _hasColor(false), _hasAlpha(false)
 {
 }
 
-// TODO: eliminate this and Image class, use KTXImage everywhere so can have explicit mip chains
+// TODO: use KTXImage everywhere so can have explicit mip chains
+// this routine converts KTX to float4, but don't need if already matching 4 channels
+// could do other formata conversions here on more supported formats (101010A2, etc).
+
+// TODO: handle loading KTXImage with custom mips
+// TODO: handle loading KTXImage with other texture types (cube, array, etc)
+
+// TODO: image here is very specifically a single level of chunks of float4 or Color (RGBA8Unorm)
+// the encoder is only written to deal with those types.
+
+// TODO: for png need to turn grid/horizontal strip into a vertical strip if not already
+// that way can move through the chunks and overwrite them in-place.
+// That would avoid copying each chunk out in the encode, but have to do in reodering.
+// That way data is stored as KTX would instead of how PNG does.
+
 bool Image::loadImageFromKTX(const KTXImage& image)
 {
     // copy the data into a contiguous array
@@ -186,7 +200,7 @@ bool Image::loadImageFromKTX(const KTXImage& image)
                     int32_t srcX = (y0 + x) * numSrcChannels;
                     int32_t dstX = (y0 + x) * numDstChannels;
 
-                    // copy in available alues
+                    // copy in available values
                     for (int32_t i = 0; i < numSrcChannels; ++i) {
                         srcPixel.v[i] = srcPixels[srcX + i];
                     }
@@ -1242,6 +1256,8 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const
 
     // allocate to hold props and entire image to write out
     if (!dstFile) {
+        dstImage.initMipLevels(false, mipOffset);
+        
         dstImage.reserveImageData();
     }
     
diff --git a/libkram/kram/KramImage.h b/libkram/kram/KramImage.h
index 6fcdfdb9..535a74b2 100644
--- a/libkram/kram/KramImage.h
+++ b/libkram/kram/KramImage.h
@@ -46,7 +46,7 @@ class Image {
 
     bool decode(const KTXImage& image, FILE* dstFile, TexEncoder decoder, bool isVerbose, const string& swizzleText) const;
     
-    // encode/decode to a memory block (TODO: change over to returning dstImage holding all data inside)
+    // encode/decode to a memory block
     bool encode(ImageInfo& info, KTXImage& dstImage) const;
 
     bool decode(const KTXImage& image, KTXImage& dstImage, TexEncoder decoder, bool isVerbose, const string& swizzleText) const;
diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp
index 881e3816..81d78051 100644
--- a/libkram/kram/KramLog.cpp
+++ b/libkram/kram/KramLog.cpp
@@ -19,29 +19,45 @@ using namespace std;
 //
 //}
 
-static int32_t vsprintf(string& str, const char* format, va_list args)
+
+
+static int32_t append_vsprintf(string& str, const char* format, va_list args) 
 {
-    if (strchr(format, '%') == nullptr) {
-        str = format;
-        return (int32_t)str.length();
+    if (strcmp(format, "%s") == 0) {
+        const char* firstArg = va_arg(args, const char*);
+        str += firstArg;
+        return strlen(firstArg);
+    }
+    if (strrchr(format, '%') == nullptr) {
+        str += format;
+        return strlen(format);
     }
 
-    // can't reuse args after vsnprintf
+    // format once to get length (without NULL at end)
     va_list argsCopy;
     va_copy(argsCopy, args);
-
-    // format once to get length (without NULL at end)
     int32_t len = vsnprintf(NULL, 0, format, argsCopy);
-
+    va_end(argsCopy);
+    
     if (len > 0) {
+        size_t existingLen = str.length();
+        
         // resize and format again into string
-        str.resize(len);
+        str.resize(existingLen + len, 0);
 
-        vsnprintf(&str[0], len + 1, format, args);
+        vsnprintf((char*)str.data() + existingLen, len + 1, format, args);
     }
+    
     return len;
 }
 
+
+static int32_t vsprintf(string& str, const char* format, va_list args)
+{
+    str.clear();
+    return append_vsprintf(str, format, args);
+}
+
 int32_t sprintf(string& str, const char* format, ...)
 {
     va_list args;
@@ -52,6 +68,17 @@ int32_t sprintf(string& str, const char* format, ...)
     return len;
 }
 
+int32_t append_sprintf(string& str, const char* format, ...)
+{
+    va_list args;
+    va_start(args, format);
+    int32_t len = append_vsprintf(str, format, args);
+    va_end(args);
+
+    return len;
+}
+
+
 bool startsWith(const char* str, const string& substring)
 {
     return strncmp(str, substring.c_str(), substring.size()) == 0;
@@ -84,22 +111,20 @@ extern int32_t logMessage(const char* group, int32_t logLevel,
 
     // convert var ags to a msg
     const char* msg;
+    
     string str;
-
-    va_list args;
-    va_start(args, fmt);
     if (strstr(fmt, "%") == nullptr) {
         msg = fmt;
     }
     else {
+        va_list args;
         va_start(args, fmt);
         vsprintf(str, fmt, args);
         va_end(args);
 
         msg = str.c_str();
     }
-    va_end(args);
-
+   
     // pipe to correct place, could even be file output
     FILE* fp = stdout;
     if (logLevel >= LogLevelWarning)
diff --git a/libkram/kram/KramLog.h b/libkram/kram/KramLog.h
index f6907e4c..d8493afe 100644
--- a/libkram/kram/KramLog.h
+++ b/libkram/kram/KramLog.h
@@ -18,9 +18,17 @@ enum LogLevel {
     LogLevelError = 3,
 };
 
+// these validate the inputs to any sprintf like format + args
+#ifndef __printflike
+    #define __printflike(fmtIndex, varargIndex)
+#endif
+#ifndef __scanflike
+    #define __scanflike(fmtIndex, varargIndex)
+#endif
+
 extern int32_t logMessage(const char* group, int32_t logLevel,
                       const char* file, int32_t line, const char* func,
-                      const char* fmt, ...);
+                      const char* fmt, ...) __printflike(6, 7);
 
 // verify leaves conditional code in the build
 #if KRAM_DEBUG
@@ -38,7 +46,12 @@ extern int32_t logMessage(const char* group, int32_t logLevel,
 
 // TODO: move to Strings.h
 using namespace std;
-int32_t sprintf(string& str, const char* format, ...);
+
+// returns length of string, -1 if failure
+int32_t sprintf(string& str, const char* format, ...) __printflike(2, 3);
+
+// returns length of chars appended, -1 if failure
+int32_t append_sprintf(string& str, const char* format, ...) __printflike(2, 3);
 
 bool startsWith(const char* str, const string& substring);
 bool endsWithExtension(const char* str, const string& substring);
diff --git a/libkram/kram/sse2neon.h b/libkram/kram/sse2neon.h
index 49a3c9e1..9ce4712a 100644
--- a/libkram/kram/sse2neon.h
+++ b/libkram/kram/sse2neon.h
@@ -5855,6 +5855,7 @@ FORCE_INLINE void _mm_clflush(void const *p)
     // no corollary for Neon?
 }
 
+/* conflicts with mm_malloc.h
 // Allocate aligned blocks of memory.
 // https://software.intel.com/en-us/
 //         cpp-compiler-developer-guide-and-reference-allocating-and-freeing-aligned-memory-blocks
@@ -5874,6 +5875,7 @@ FORCE_INLINE void _mm_free(void *addr)
 {
     free(addr);
 }
+*/
 
 // Starting with the initial value in crc, accumulates a CRC32 value for
 // unsigned 8-bit integer v.

From bcb10717337ee87beade183405e70e67576346f8 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 3 Mar 2021 12:37:15 -0800
Subject: [PATCH 010/901] kram - add prezero to see if that helps with texture
 creation.

This is meant to use with shaders that do premul post sampling.  The issue is that if you feed premul to these shaders, then they apply alpha twice.  But on zero pixel areas, these throw off the encoders if the rgb isn't also zero since they weight towards dilation or unseen rgb.
---
 libkram/kram/Kram.cpp          | 11 ++++++
 libkram/kram/KramImage.cpp     | 28 +++++++++++----
 libkram/kram/KramImageInfo.cpp |  4 +++
 libkram/kram/KramImageInfo.h   |  3 ++
 libkram/kram/KramMipper.cpp    | 62 +++++++++++++++++++++++++++-------
 libkram/kram/KramMipper.h      |  2 +-
 6 files changed, 89 insertions(+), 21 deletions(-)

diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index ee375592..e481badd 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -935,6 +935,7 @@ void kramEncodeUsage(bool showVersion = true)
           "\t [-avg rxbx]\n"
           "\t [-sdf]\n"
           "\t [-premul]\n"
+          "\t [-prezero]\n"
           "\t [-quality 0-100]\n"
           "\t [-optopaque]\n"
           "\t [-v]\n"
@@ -1008,6 +1009,12 @@ void kramEncodeUsage(bool showVersion = true)
           "\tPremultiplied alpha to src pixels before output\n"
           "\n"
 
+          // This is meant to work with shaders that (incorrectly) premul after sampling.
+          // limits the rgb bleed in regions that should not display colors.  Can stil have black color halos.
+          "\t-prezero"
+          "\tPremultiplied alpha to src pixels before output but only where a=0\n"
+          "\n"
+          
           "\t-optopaque"
           "\tChange format from bc7/3 to bc1, or etc2rgba to rgba if opaque\n"
           "\n"
@@ -1825,6 +1832,10 @@ static int32_t kramAppEncode(vector<const char*>& args)
             infoArgs.isPremultiplied = true;
             continue;
         }
+        else if (isStringEqual(word, "-prezero")) {
+            infoArgs.isPrezero = true;
+            continue;
+        }
         else if (isStringEqual(word, "-v") ||
                  isStringEqual(word, "-verbose")) {
             infoArgs.isVerbose = true;
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index 338455a3..516c9d56 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -1198,7 +1198,7 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const
     vector<half4> halfImage;
     vector<float4> floatImage;
 
-    bool doPremultiply = info.hasAlpha && info.isPremultiplied;
+    bool doPremultiply = info.hasAlpha && (info.isPremultiplied || info.isPrezero);
     bool isMultichunk = chunkOffsets.size() > 1;
 
     if (info.isHDR) {
@@ -1216,11 +1216,25 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const
         // run this across all the source data
         // do this in-place before mips are generated
         if (doPremultiply) {
-            for (const auto& pixel : _pixelsFloat) {
-                float alpha = pixel.w;
-                float4& pixelChange = const_cast<float4&>(pixel);
-                pixelChange *= alpha;
-                pixelChange.w = alpha;
+            if (info.isPrezero) {
+                for (const auto& pixel : _pixelsFloat) {
+                    float alpha = pixel.w;
+                    float4& pixelChange = const_cast<float4&>(pixel);
+                    
+                    // only premul at 0 alpha regions
+                    if (alpha == 0.0f) {
+                        pixelChange *= alpha;
+                        pixelChange.w = alpha;
+                    }
+                }
+            }
+            else {
+                for (const auto& pixel : _pixelsFloat) {
+                    float alpha = pixel.w;
+                    float4& pixelChange = const_cast<float4&>(pixel);
+                    pixelChange *= alpha;
+                    pixelChange.w = alpha;
+                }
             }
         }
     }
@@ -1334,7 +1348,7 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const
                 // copy and convert to half4 or float4 image
                 // srcImage already points to float data, so could modify that
                 // only need doPremultiply at the top mip
-                mipper.initPixelsHalfIfNeeded(srcImage, doPremultiply,
+                mipper.initPixelsHalfIfNeeded(srcImage, doPremultiply && !info.isPrezero, info.isPrezero,
                                               halfImage);
             }
         }
diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp
index 3304d032..04d339bf 100644
--- a/libkram/kram/KramImageInfo.cpp
+++ b/libkram/kram/KramImageInfo.cpp
@@ -991,7 +991,11 @@ void ImageInfo::initWithArgs(const ImageInfoArgs& args)
     textureEncoder = args.textureEncoder;
     textureType = args.textureType;
 
+    isPrezero = args.isPrezero;
     isPremultiplied = args.isPremultiplied;
+    if (!isPremultiplied)
+        isPrezero = false;
+    
     isNormal = args.isNormal;
 
     doSDF = args.doSDF;
diff --git a/libkram/kram/KramImageInfo.h b/libkram/kram/KramImageInfo.h
index 2cef4c2b..0d9b2ae5 100644
--- a/libkram/kram/KramImageInfo.h
+++ b/libkram/kram/KramImageInfo.h
@@ -56,12 +56,14 @@ class ImageInfoArgs {
     bool isVerbose = false;
     bool doSDF = false;
     bool isPremultiplied = false;
+    bool isPrezero = false;
     bool isNormal = false;  // signed, but may be stored unorm and swizzled (f.e. astc/bc3nm gggr or rrrg)
 
     // can pick a smaller format if alpha = 1 (only for bc and etc)
     bool optimizeFormatForOpaque = false;
 
     // these and formatString set the pixelFormat
+    // if pixelFOrmat set directly, then these are updated off that format
     bool isSigned = false;
     bool isSRGB = false;
     bool isHDR = false;
@@ -116,6 +118,7 @@ class ImageInfo {
     bool isSRGB = false;
     bool isColorWeighted = false;
     bool isPremultiplied = false;  // don't premul
+    bool isPrezero = false;
     bool isHDR = false;
 
     //bool skipImageLength = false;  // gen ktxa
diff --git a/libkram/kram/KramMipper.cpp b/libkram/kram/KramMipper.cpp
index f845a6d6..f2f4d300 100644
--- a/libkram/kram/KramMipper.cpp
+++ b/libkram/kram/KramMipper.cpp
@@ -164,9 +164,13 @@ void Mipper::initTables()
 #endif
 }
 
-void Mipper::initPixelsHalfIfNeeded(ImageData& srcImage, bool doPremultiply,
+void Mipper::initPixelsHalfIfNeeded(ImageData& srcImage, bool doPremultiply, bool doPrezero,
                                     vector<half4>& halfImage) const
 {
+    Color zeroColor = { 0, 0, 0, 0 };
+    float4 zeroColorf = simd_make_float4(0.0, 0.0f, 0.0f, 0.f); // need a constant for this
+    half4 zeroColorh = toHalf4(zeroColorf);
+    
     int32_t w = srcImage.width;
     int32_t h = srcImage.height;
 
@@ -177,6 +181,7 @@ void Mipper::initPixelsHalfIfNeeded(ImageData& srcImage, bool doPremultiply,
         assert(false);
     }
     else if (srcImage.isSRGB) {
+        
         // this does srgb and premul conversion
         for (int32_t y = 0; y < h; y++) {
             int32_t y0 = y * w;
@@ -189,19 +194,29 @@ void Mipper::initPixelsHalfIfNeeded(ImageData& srcImage, bool doPremultiply,
                 if (c0.a != 255) {
                     float alpha = alphaToFloat[c0.a];
 
-                    if (!doPremultiply) {
-                        cFloat.w = alpha;
-                    }
-                    else {
+                    if (doPremultiply) {
                         // premul and sets alpha
                         cFloat *= alpha;
                     }
+                    else if (doPrezero) {
+                        if (c0.a == 0) {
+                            cFloat = zeroColorf;
+                            c0 = zeroColor;
+                        }
+                        else {
+                            cFloat.w = alpha;
+                        }
+                    }
+                    else {
+                        cFloat.w = alpha;
+                    }
                 }
 
-                //                if (!floatImage.empty()) {
-                //                    floatImage[y0 + x] = cFloat;
-                //                }
-                //                else
+                // TODO: 32F path
+                // if (!floatImage.empty()) {
+                //    floatImage[y0 + x] = cFloat;
+                // }
+                // else
                 {
                     halfImage[y0 + x] = toHalf4(cFloat);
                 }
@@ -219,6 +234,26 @@ void Mipper::initPixelsHalfIfNeeded(ImageData& srcImage, bool doPremultiply,
             }
         }
     }
+    else if (doPrezero) {
+        // do premul conversion
+        for (int32_t y = 0; y < h; y++) {
+            int32_t y0 = y * w;
+            for (int32_t x = 0; x < w; x++) {
+                Color& c0 = srcImage.pixels[y0 + x];
+                
+                // TODO: assumes 16, need 32f path too
+                if (c0.a == 0) {
+                    c0 = zeroColor;
+                    halfImage[y0 + x] = zeroColorh;
+                }
+                else {
+                    float4 cFloat = {alphaToFloat[c0.r], alphaToFloat[c0.g],
+                                    alphaToFloat[c0.b], alphaToFloat[c0.a]};
+                    halfImage[y0 + x] = toHalf4(cFloat);
+                }
+            }
+        }
+    }
     else if (doPremultiply) {
         // do premul conversion
         for (int32_t y = 0; y < h; y++) {
@@ -234,10 +269,11 @@ void Mipper::initPixelsHalfIfNeeded(ImageData& srcImage, bool doPremultiply,
                     cFloat *= alpha;
                 }
 
-                //                if (!floatImage.empty()) {
-                //                    floatImage[y0 + x] = cFloat;
-                //                }
-                //                else
+                // TODO: 32F path
+                // if (!floatImage.empty()) {
+                //    floatImage[y0 + x] = cFloat;
+                // }
+                // else
                 {
                     halfImage[y0 + x] = toHalf4(cFloat);
                 }
diff --git a/libkram/kram/KramMipper.h b/libkram/kram/KramMipper.h
index ca991a60..0751d440 100644
--- a/libkram/kram/KramMipper.h
+++ b/libkram/kram/KramMipper.h
@@ -68,7 +68,7 @@ class Mipper {
     // drop by 1 mip level by box filter
     void mipmap(const ImageData &srcImage, ImageData &dstImage) const;
 
-    void initPixelsHalfIfNeeded(ImageData &srcImage, bool doPremultiply,
+    void initPixelsHalfIfNeeded(ImageData &srcImage, bool doPremultiply, bool doPrezero,
                                 vector<half4> &halfImage) const;
 
 private:

From 683de92199a26bff156bb211c6a281587ab97ad5 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 3 Mar 2021 16:03:28 -0800
Subject: [PATCH 011/901] kram - fix png info, sprintf format mismatch, and
 prezero setup

---
 libkram/kram/Kram.cpp          | 6 +++---
 libkram/kram/KramImageInfo.cpp | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index e481badd..8a82e448 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -1204,9 +1204,9 @@ string kramInfoToString(const string& srcFilename, bool isVerbose)
 
             data = srcFileBuffer.data();
             dataSize = (int32_t)srcFileBuffer.size();
-            
-            info = kramInfoPNGToString(srcFilename, data, dataSize, isVerbose);
         }
+        info = kramInfoPNGToString(srcFilename, data, dataSize, isVerbose);
+        
     }
     else if (isKTX) {
         KTXImage srcImage;
@@ -1423,7 +1423,7 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage,
             sprintf(tmp,
                     "mipn: %d\n"
                     "mipd: %dx%d\n"
-                    "mips: %zu\n"
+                    "mips: %" PRIu64 "\n"
                     "mipc: %dx\n"
                     "mipo: %" PRIu64 "\n",
                     w, h, mipLevel++, mip.length, srcImage.totalChunks(), mip.offset);
diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp
index 04d339bf..85e5ba4e 100644
--- a/libkram/kram/KramImageInfo.cpp
+++ b/libkram/kram/KramImageInfo.cpp
@@ -993,7 +993,7 @@ void ImageInfo::initWithArgs(const ImageInfoArgs& args)
 
     isPrezero = args.isPrezero;
     isPremultiplied = args.isPremultiplied;
-    if (!isPremultiplied)
+    if (isPremultiplied)
         isPrezero = false;
     
     isNormal = args.isNormal;

From 17fc34a391b9e2cb9954fef3953d07f39d3df4b6 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 3 Mar 2021 16:09:30 -0800
Subject: [PATCH 012/901] bc7enc - fix code to not use anonymous structs.  On
 VS, these cannot contain functions.

---
 libkram/bc7enc/bc7enc.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/libkram/bc7enc/bc7enc.cpp b/libkram/bc7enc/bc7enc.cpp
index 1a511d51..a8e32f15 100644
--- a/libkram/bc7enc/bc7enc.cpp
+++ b/libkram/bc7enc/bc7enc.cpp
@@ -33,11 +33,11 @@ static inline int32_t iabs32(int32_t v) { uint32_t msk = v >> 31; return (v ^ ms
 static inline void swapu(uint32_t* a, uint32_t* b) { uint32_t t = *a; *a = *b; *b = t; }
 //static inline void swapf(float* a, float* b) { float t = *a; *a = *b; *b = t; }
 
-typedef struct {
+struct color_quad_u8 {
     uint8_t r, g, b, a;
     inline const uint8_t& operator[](int index) const { return *(&r + index); }
     inline uint8_t& operator[](int index) { return *(&r + index); }
-} color_quad_u8;
+};
 
 static inline color_quad_u8 *color_quad_u8_set_clamped(color_quad_u8 *pRes, int32_t r, int32_t g, int32_t b, int32_t a) { pRes->r = (uint8_t)clampi(r, 0, 255); pRes->g = (uint8_t)clampi(g, 0, 255); pRes->b = (uint8_t)clampi(b, 0, 255); pRes->a = (uint8_t)clampi(a, 0, 255); return pRes; }
 static inline color_quad_u8 *color_quad_u8_set(color_quad_u8 *pRes, int32_t r, int32_t g, int32_t b, int32_t a) { assert((uint32_t)(r | g | b | a) <= 255); pRes->r = (uint8_t)r; pRes->g = (uint8_t)g; pRes->b = (uint8_t)b; pRes->a = (uint8_t)a; return pRes; }
@@ -60,11 +60,11 @@ static inline vec4F vec4F_mul(const vec4F *pLHS, float s) { vec4F res = *pLHS *
 static inline vec4F *vec4F_normalize_in_place(vec4F *pV) {  *pV = normalize(*pV); return pV; }
 
 #else
-typedef struct {
+struct vec4F {
     float r, g, b, a;
     inline const float& operator[](int index) const { return *(&r + index); }
     inline float& operator[](int index) { return *(&r + index); }
-} vec4F;
+};
 
 static inline vec4F *vec4F_set_scalar(vec4F *pV, float x) {    pV->r = x; pV->g = x; pV->b = x;    pV->a = x;    return pV; }
 static inline vec4F *vec4F_set(vec4F *pV, float x, float y, float z, float w) {    pV->r = x;    pV->g = y;    pV->b = z;    pV->a = w;    return pV; }

From c12d931c54d0b66bf2b5a8eeaafdc180e9cfd145 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecazam@users.noreply.github.com>
Date: Fri, 5 Mar 2021 08:36:56 -0800
Subject: [PATCH 013/901] Update README.md

---
 README.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 425f342a..29a3d94d 100644
--- a/README.md
+++ b/README.md
@@ -382,6 +382,7 @@ Usage: kram encode
 	 [-avg rxbx]
 	 [-sdf]
 	 [-premul]
+	 [-prezero]
 	 [-quality 0-100]
 	 [-optopaque]
 	 [-v]
@@ -413,8 +414,8 @@ OPTIONS
 	-signed	Signed r or rg for etc/bc formats, astc doesn't have signed format.
 	-normal	Normal map rg storage signed for etc/bc (rg01), only unsigned astc L+A (gggr).
 	-sdf	Generate single-channel SDF from a bitmap, can mip and drop large mips. Encode to r8, bc4, etc2r, astc4x4 (Unorm LLL1) to encode
-	-premul	Premultiplied alpha to src pixels before output
-
+	-premul	Premultiplied alpha to src pixels before output.  Disable multiply of alpha post-sampling.  In kramv, view with "Premul off".
+	-prezero Premultiplied alpha only where 0, where shaders multiply alpha post-sampling.  Not true premul and black halos if alpha ramp is fast.  In kramv, view with "Premul on".
 	-optopaque	Change format from bc7/3 to bc1, or etc2rgba to rgba if opaque
 	
 	-chunks 4x4	Specifies how many chunks to split up texture into 2darray

From 5a502b373f3b5cb2242331ebdaac31f41bd59345 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 7 Mar 2021 09:51:18 -0800
Subject: [PATCH 014/901] Kram - plugin fixes

mipoffset needed to be recalculated on encode.  This was causing files to be 2x bigger when using the memory path.  Didn't affect kram encode, since it goes to dstFile.
Fix up imageInfo init to honor a pixelFormat set onto args.
Expose some more format functions.
Expose the identifiers for file filtering.   Can read the first 6 bytes to identify ktx vs. ktx2.  Or 4 bytes to identify ktx and ktx2.
---
 libkram/kram/KTXImage.cpp      | 16 ++++++++++++++--
 libkram/kram/KTXImage.h        | 10 ++++++++--
 libkram/kram/KramImage.cpp     |  3 +++
 libkram/kram/KramImageInfo.cpp |  6 +++++-
 libkram/kram/KramLog.cpp       |  9 ++++++++-
 libkram/kram/KramLog.h         |  1 +
 6 files changed, 39 insertions(+), 6 deletions(-)

diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index 34698a01..754a0172 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -41,11 +41,11 @@ const char* kPropFilter = "KramFilter";
 using namespace std;
 
 // These start each KTX file to indicate the type
-const uint8_t kKTXIdentifier[12] = {
+const uint8_t kKTXIdentifier[kKTXIdentifierSize] = {
     0xAB, 0x4B, 0x54, 0x58, 0x20, 0x31, 0x31, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A
     //'«', 'K', 'T', 'X', ' ', '1', '1', '»', '\r', '\n', '\x1A', '\n'
 };
-const uint8_t kKTX2Identifier[12] = {
+const uint8_t kKTX2Identifier[kKTXIdentifierSize] = {
     0xAB, 0x4B, 0x54, 0x58, 0x20, 0x32, 0x30, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A
     // '«', 'K', 'T', 'X', ' ', '2', '0', '»', '\r', '\n', '\x1A', '\n'
 };
@@ -513,6 +513,12 @@ bool isFloatFormat(MyMTLPixelFormat format)
     return it.is16F() || it.is32F();
 }
 
+bool isHalfFormat(MyMTLPixelFormat format)
+{
+    const auto& it = formatInfo(format);
+    return it.is16F();
+}
+
 bool isBCFormat(MyMTLPixelFormat format)
 {
     const auto& it = formatInfo(format);
@@ -531,6 +537,12 @@ bool isASTCFormat(MyMTLPixelFormat format)
     return it.isASTC();
 }
 
+bool isExplicitFormat(MyMTLPixelFormat format)
+{
+    const auto& it = formatInfo(format);
+    return !(it.isASTC() || it.isETC() || it.isBC());
+}
+
 bool isHdrFormat(MyMTLPixelFormat format)
 {
     const auto& it = formatInfo(format);
diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h
index ac11b27c..02387293 100644
--- a/libkram/kram/KTXImage.h
+++ b/libkram/kram/KTXImage.h
@@ -123,13 +123,17 @@ struct Int2 {
 
 //---------------------------------------------
 
+constexpr int32_t kKTXIdentifierSize = 12;
+extern const uint8_t kKTXIdentifier[kKTXIdentifierSize];
+extern const uint8_t kKTX2Identifier[kKTXIdentifierSize];
+
 class KTXHeader {
 public:
     // Don't add any date to this class.  It's typically the top of a file cast to this.
     // As such, this doesn't have much functionality, other than to hold the header.
 
     // 64-byte header
-    uint8_t identifier[12] = { // same is kKTXIdentifier
+    uint8_t identifier[kKTXIdentifierSize] = { // same is kKTXIdentifier
         0xAB, 0x4B, 0x54, 0x58, 0x20, 0x31, 0x31, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A
         //'«', 'K', 'T', 'X', ' ', '1', '1', '»', '\r', '\n', '\x1A', '\n'
     };
@@ -172,7 +176,7 @@ class KTXHeader {
 class KTX2Header {
 public:
 
-    uint8_t identifier[12] = { // same is kKTX2Identifier
+    uint8_t identifier[kKTXIdentifierSize] = { // same is kKTX2Identifier
         0xAB, 0x4B, 0x54, 0x58, 0x20, 0x32, 0x30, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A
         // '«', 'K', 'T', 'X', ' ', '2', '0', '»', '\r', '\n', '\x1A', '\n'
     };
@@ -286,6 +290,7 @@ class KTXImage {
 
 // Generic format helpers.  All based on the ubiquitous type.
 bool isFloatFormat(MyMTLPixelFormat format);
+bool isHalfFormat(MyMTLPixelFormat format);
 bool isHdrFormat(MyMTLPixelFormat format);
 bool isSrgbFormat(MyMTLPixelFormat format);
 bool isColorFormat(MyMTLPixelFormat format);
@@ -295,6 +300,7 @@ bool isSignedFormat(MyMTLPixelFormat format);
 bool isBCFormat(MyMTLPixelFormat format);
 bool isETCFormat(MyMTLPixelFormat format);
 bool isASTCFormat(MyMTLPixelFormat format);
+bool isExplicitFormat(MyMTLPixelFormat format);
 
 Int2 blockDimsOfFormat(MyMTLPixelFormat format);
 uint32_t blockSizeOfFormat(MyMTLPixelFormat format);
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index 516c9d56..6e71f5ed 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -1270,6 +1270,9 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const
 
     // allocate to hold props and entire image to write out
     if (!dstFile) {
+        // recompute, it's had mips added into it above
+        mipOffset = sizeof(KTXHeader) + header.bytesOfKeyValueData;
+
         dstImage.initMipLevels(false, mipOffset);
         
         dstImage.reserveImageData();
diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp
index 85e5ba4e..2652c0c3 100644
--- a/libkram/kram/KramImageInfo.cpp
+++ b/libkram/kram/KramImageInfo.cpp
@@ -542,7 +542,11 @@ bool validateFormatAndEncoder(ImageInfoArgs& infoArgs)
 {
     bool error = false;
 
-    MyMTLPixelFormat format = parseFormat(infoArgs);
+    // caller an set or this can parse format from the format text
+    MyMTLPixelFormat format = infoArgs.pixelFormat;
+    if (format == MyMTLPixelFormatInvalid) {
+        format = parseFormat(infoArgs);
+    }
     if (format == MyMTLPixelFormatInvalid) {
         return false;
     }
diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp
index 81d78051..89090557 100644
--- a/libkram/kram/KramLog.cpp
+++ b/libkram/kram/KramLog.cpp
@@ -19,15 +19,22 @@ using namespace std;
 //
 //}
 
+// Note: careful with stdio sscanf.  In clang, this does and initial strlen which for long buffers
+// being parsed (f.e. mmapped Json) this can significantly slow a parser down.
 
 
 static int32_t append_vsprintf(string& str, const char* format, va_list args) 
 {
+    // for KLOGE("group", "%s", "text")
     if (strcmp(format, "%s") == 0) {
         const char* firstArg = va_arg(args, const char*);
         str += firstArg;
         return strlen(firstArg);
     }
+    
+    // This is important for the case where ##VAR_ARGS only leaves the format.
+    // In this case "text" must be a compile time constant string to avoid security warning needed for above.
+    // for KLOGE("group", "text")
     if (strrchr(format, '%') == nullptr) {
         str += format;
         return strlen(format);
@@ -113,7 +120,7 @@ extern int32_t logMessage(const char* group, int32_t logLevel,
     const char* msg;
     
     string str;
-    if (strstr(fmt, "%") == nullptr) {
+    if (strrchr(fmt, '%') == nullptr) {
         msg = fmt;
     }
     else {
diff --git a/libkram/kram/KramLog.h b/libkram/kram/KramLog.h
index d8493afe..0b24d871 100644
--- a/libkram/kram/KramLog.h
+++ b/libkram/kram/KramLog.h
@@ -19,6 +19,7 @@ enum LogLevel {
 };
 
 // these validate the inputs to any sprintf like format + args
+// these come from sys/cdefs.h on Apple, but need to be define for __clang__ on other platforms
 #ifndef __printflike
     #define __printflike(fmtIndex, varargIndex)
 #endif

From 9d2be269a4c974aabd4bbb1eca728f46d3876b44 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 8 Mar 2021 00:07:32 -0800
Subject: [PATCH 015/901] plugin - start of a PS plugin for loading/saving
 ktx/2 files
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adapted DDS plugin from Brendan Bolles from here:
  https://github.com/fnordware/AdobeDDS
  Thanks Brendan for making so many open-source plugins!
  Replaced crn with libkam to support more input/output formats and also ktx/2 files.

There's still a lot of work to support files that are different channels, bit depths, and texture types.
  Likely layer support is needed to track cube and array faces, mips, etc.
  It's unclear if this should support lossless formats, since I mostly just want 8u/16f/32f compressed as KTX2 source formats.

This currently can load and decode ktx files, and also save them back out to disk as ktx only (not ktx2).
Will likely need to build two plugins - one for ktx, the other for ktx2,

First attempt to get CMake to build a plugin.  But PS plugin api uses Carbon.r/CoreServices.r which are deprecated since macOS 10.8.
Can get this to build and run with original project since it has Rez support, but new XCode removes that completely from newer projects.
Can't get a command line Rez to generate anything but a 0 size file even though the command succeeds.

I can't checkin the SDK due to licensing, but this is where to obtain the sdk.  Just drop it into an plugin/ext/photoshopsdk folder.
Download the "Adobe Photoshop Plug-In and Connection SDK" at https://console.adobe.io/downloads/ps
---
 .gitignore                            |    3 +-
 CMakeLists.txt                        |   19 +-
 kramv/CMakeLists.txt                  |   10 +-
 libkram/CMakeLists.txt                |    1 +
 plugin/CMakeLists.txt                 |  260 ++++++
 plugin/kps/KPS.cpp                    | 1140 +++++++++++++++++++++++++
 plugin/kps/KPS.h                      |  214 +++++
 plugin/kps/KPS.r                      |  456 ++++++++++
 plugin/kps/KPSScripting.cpp           |  296 +++++++
 plugin/kps/KPSTerminology.h           |  109 +++
 plugin/kps/KPSUI.h                    |  167 ++++
 plugin/kps/KPSVersion.h               |   58 ++
 plugin/kps/kram-ps.rsrc               |  Bin 0 -> 1399 bytes
 plugin/kps/mac/Info.plist             |   22 +
 plugin/kps/mac/KPSAbout.xib           |   72 ++
 plugin/kps/mac/KPSAboutController.h   |   58 ++
 plugin/kps/mac/KPSAboutController.mm  |   71 ++
 plugin/kps/mac/KPSInput.xib           |  112 +++
 plugin/kps/mac/KPSInputController.h   |   66 ++
 plugin/kps/mac/KPSInputController.mm  |  104 +++
 plugin/kps/mac/KPSOutput.xib          |  174 ++++
 plugin/kps/mac/KPSOutputController.h  |   95 +++
 plugin/kps/mac/KPSOutputController.mm |  220 +++++
 plugin/kps/mac/KPSUICocoa.mm          |  252 ++++++
 plugin/kps/win/KPSDialogs.rc          |  168 ++++
 plugin/kps/win/KPSInputDialog.cpp     |  217 +++++
 plugin/kps/win/KPSOutputDialog.cpp    |  371 ++++++++
 plugin/kps/win/resource.h             |   20 +
 28 files changed, 4749 insertions(+), 6 deletions(-)
 create mode 100644 plugin/CMakeLists.txt
 create mode 100755 plugin/kps/KPS.cpp
 create mode 100755 plugin/kps/KPS.h
 create mode 100755 plugin/kps/KPS.r
 create mode 100755 plugin/kps/KPSScripting.cpp
 create mode 100755 plugin/kps/KPSTerminology.h
 create mode 100644 plugin/kps/KPSUI.h
 create mode 100755 plugin/kps/KPSVersion.h
 create mode 100644 plugin/kps/kram-ps.rsrc
 create mode 100644 plugin/kps/mac/Info.plist
 create mode 100644 plugin/kps/mac/KPSAbout.xib
 create mode 100644 plugin/kps/mac/KPSAboutController.h
 create mode 100644 plugin/kps/mac/KPSAboutController.mm
 create mode 100644 plugin/kps/mac/KPSInput.xib
 create mode 100644 plugin/kps/mac/KPSInputController.h
 create mode 100644 plugin/kps/mac/KPSInputController.mm
 create mode 100644 plugin/kps/mac/KPSOutput.xib
 create mode 100644 plugin/kps/mac/KPSOutputController.h
 create mode 100644 plugin/kps/mac/KPSOutputController.mm
 create mode 100644 plugin/kps/mac/KPSUICocoa.mm
 create mode 100644 plugin/kps/win/KPSDialogs.rc
 create mode 100644 plugin/kps/win/KPSInputDialog.cpp
 create mode 100644 plugin/kps/win/KPSOutputDialog.cpp
 create mode 100644 plugin/kps/win/resource.h

diff --git a/.gitignore b/.gitignore
index 71ee634a..1b6d6ff2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -41,4 +41,5 @@ out/
 # Executables
 *.exe
 *.out
-*.app
+
+plugin/ext/
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d643a537..5018cb5b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -91,6 +91,11 @@ if (APPLE)
     add_subdirectory(kramv)
 endif()
 
+# ps plugin that uses libkram
+if (APPLE)
+    add_subdirectory(plugin)
+endif()
+
 #-----------------------------------------------------
 
 # https://discourse.cmake.org/t/specifying-cmake-osx-sysroot-breaks-xcode-projects-but-no-other-choice/2532/8
@@ -109,8 +114,8 @@ if (APPLE)
     endif()
 
     # check the SDK
-    set(XCODE_MIN_SDK_IOS, 14.0)
-    set(XCODE_MIN_SDK_MACOS, 11.0)
+    set(XCODE_MIN_SDK_IOS 14.0)
+    set(XCODE_MIN_SDK_MACOS 11.0)
 
     execute_process(
         COMMAND xcrun --sdk "${CMAKE_OSX_SYSROOT}" --show-sdk-version
@@ -143,13 +148,21 @@ endif()
 
 set(BIN_DIR ${PROJECT_SOURCE_DIR}/bin)
 
+# So by default install depends on ALL_BUILD target, but that will fail if plugin
+# does not have everything setup to build (or like now is not building).
+# The plugin is currently setting EXCLUDE_FROM_ALL on the target so it's not built.
+# https://stackoverflow.com/questions/17164731/installing-only-one-target-and-its-dependencies-out-of-a-complex-project-with
+
 # install doesn't seem to do anything on WIN32, the build elements are not copied
 install(TARGETS libkram ARCHIVE DESTINATION ${BIN_DIR})
 install(TARGETS kram RUNTIME DESTINATION ${BIN_DIR})
 if (APPLE)
 	install(TARGETS kramv BUNDLE DESTINATION ${BIN_DIR})
 endif()
-
+# don't install this
+#if (APPLE)
+#    install(TARGETS kram-ps BUNDLE DESTINATION ${BIN_DIR})
+#endif()
 
 
diff --git a/kramv/CMakeLists.txt b/kramv/CMakeLists.txt
index 918a1667..68599317 100644
--- a/kramv/CMakeLists.txt
+++ b/kramv/CMakeLists.txt
@@ -30,7 +30,9 @@ target_link_libraries(${myTargetApp}
     libkram
     "-framework Cocoa" 
     "-framework Metal" 
-    "-framework MetalKit" 
+    "-framework MetalKit"
+    
+    # could eliminate this by replacing cube in kramv, but may want full 3d models for charts w/xatlas
     "-framework ModelIO"
 )
     
@@ -136,7 +138,11 @@ target_sources(${myTargetApp} PRIVATE
 
 # only these 2 resources are copied into the Resource, the other two are signed
 # Can't lowercase Resources or files don't go to correct place
-set_source_files_properties(Assets.xcassets Base.lproj/Main.storyboard PROPERTIES
+set_source_files_properties(
+    Assets.xcassets
+    Base.lproj/Main.storyboard
+    
+    PROPERTIES
     MACOSX_PACKAGE_LOCATION Resources
 )
 
diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index 7564b7c7..7f3173e7 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -43,6 +43,7 @@ endif()
 set(myTargetLib libkram)
 
 # **** This will create libkram.a or .so or kram.lib depending on platform.
+# can also use OBJECT or SHARED, object cuts compile time
 add_library(${myTargetLib} STATIC)
 
 # turn off pch
diff --git a/plugin/CMakeLists.txt b/plugin/CMakeLists.txt
new file mode 100644
index 00000000..de2c32bc
--- /dev/null
+++ b/plugin/CMakeLists.txt
@@ -0,0 +1,260 @@
+cmake_minimum_required(VERSION 3.19.1 FATAL_ERROR)
+
+# This is only configured for a Mac build, but see kram cli app
+# for the Windows configuration.  Eventually port to Win.
+
+# have to add this to each file, or run with this
+# -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON
+# set(CMAKE_VERBOSE_MAKEFILE ON)
+
+#-----------------------------------------------------
+# kramv
+
+# now setup the app project, and link to libkram
+set(myTargetApp kram-ps)
+
+# the mac build has ObjC++
+project(
+    ${myTargetApp}
+    #VERSION 0.9.0
+    LANGUAGES C CXX OBJCXX
+)
+
+add_executable(${myTargetApp} EXCLUDE_FROM_ALL)
+
+#-----------------------------------------------------
+    
+target_link_libraries(${myTargetApp} 
+    ate 
+    libkram
+    "-framework Cocoa" 
+    "-framework AppKit"
+)
+    
+set_target_properties(${myTargetApp} PROPERTIES
+    # Note: match this up with CXX version
+    # c++11 min
+    XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD "c++14"
+    XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++"
+
+    # avx1
+    XCODE_ATTRIBUTE_CLANG_X86_VECTOR_INSTRUCTIONS "avx"
+    
+    # turn off exceptions/rtti
+    XCODE_ATTRIBUTE_GCC_ENABLE_CPP_EXCEPTIONS NO
+    XCODE_ATTRIBUTE_GCC_ENABLE_CPP_RTTI NO
+
+    # can't believe this isn't on by default in CMAKE
+    XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC YES
+    
+    #-------------------------
+        
+    # libs can use dwarf, but apps need dSym generated
+    XCODE_ATTRIBUTE_DEBUG_INFORMATION_FORMAT "dwarf-with-dsym"
+    XCODE_ATTRIBUTE_ONLY_ACTIVE_ARCH "NO"
+
+    # this drops app from 762KB to 174KB with only ATE enabled
+    # note about needing -gfull instead of -gused here or debug info messed up:
+    # https://gist.github.com/tkersey/39b4fe69e14b859889ffadccb009e397
+    XCODE_ATTRIBUTE_DEAD_CODE_STRIPPING YES
+    XCODE_ATTRIBUTE_LLVM_LTO[variant=Release] "Incremental"
+    
+    #-------------------------
+    XCODE_ATTRIBUTE_PRODUCT_BUNDLE_IDENTIFIER "com.ba.kram-ps"
+    MACOSX_BUNDLE_GUI_IDENTIFIER "com.ba.kram-ps"
+    
+    # for now "sign to run locally", or entitlements can't be bundled
+    XCODE_ATTRIBUTE_CODE_SIGNING_REQUIRED "YES"
+    XCODE_ATTRIBUTE_CODE_SIGN_IDENTITY "-"
+    
+    # use the AssetCatalog for icons
+    XCODE_ATTRIBUTE_ASSETCATALOG_COMPILER_APPICON_NAME "AppIcon"
+    
+    # TODO: not sure how to set this, nothing online either ?
+    # MACOSX_BUNDLE_APP_CATEGORY "Developer Tools"
+)
+
+target_compile_options(${myTargetApp} PRIVATE -W -Wall)
+
+#--------------
+# sdk
+
+# Don't ever use a , in a set call, it causes the glob to process entire hard drive
+# and it never seems to return.  Maybe it's building a glob of all files on HD.
+
+set(SDK_DIR "${PROJECT_SOURCE_DIR}/ext/photoshopsdk/pluginsdk")
+set(SDK_SOURCE_DIR "${SDK_DIR}/photoshopapi")
+set(SDK_COMMON_DIR "${SDK_DIR}/samplecode/common")
+
+file(GLOB_RECURSE sdkSources CONFIGURE_DEPENDS
+    "${SDK_SOURCE_DIR}/*.cpp"
+    "${SDK_SOURCE_DIR}/*.h"
+    "${SDK_SOURCE_DIR}/*.m"
+    "${SDK_SOURCE_DIR}/*.mm"
+    "${SDK_SOURCE_DIR}/*.r"
+)
+
+file(GLOB_RECURSE sdkCommonSources CONFIGURE_DEPENDS
+    "${SDK_COMMON_DIR}/*.cpp"
+    "${SDK_COMMON_DIR}/*.h"
+    "${SDK_COMMON_DIR}/*.m"
+    "${SDK_COMMON_DIR}/*.mm"
+    "${SDK_COMMON_DIR}/*.r"
+)
+    
+# TODO: had to modify some files to not use exceptions
+# pass those onto Adobe
+
+# TODO: can these be combined into one list?
+# this is a win file
+list(FILTER sdkCommonSources EXCLUDE REGEX ".pstypelibrary.cpp$")
+list(FILTER sdkCommonSources EXCLUDE REGEX ".pstypelibrary.h$")
+    
+list(FILTER sdkCommonSources EXCLUDE REGEX ".PIDLLInstance.cpp$")
+list(FILTER sdkCommonSources EXCLUDE REGEX ".PIDLLInstance.h$")
+    
+list(FILTER sdkCommonSources EXCLUDE REGEX ".PIUFile.cpp$")
+list(FILTER sdkCommonSources EXCLUDE REGEX ".PIUFile.h$")
+    
+list(FILTER sdkCommonSources EXCLUDE REGEX ".PSConstantArray.cpp$")
+list(FILTER sdkCommonSources EXCLUDE REGEX ".*Win*.cpp$")
+list(FILTER sdkCommonSources EXCLUDE REGEX ".PIWinUI.cpp$")
+
+# intermingled Win files in with Mac
+list(FILTER sdkSources EXCLUDE REGEX ".*Win*.cpp$")
+
+source_group(TREE "${SDK_SOURCE_DIR}" PREFIX "sdk" FILES ${sdkSources})
+source_group(TREE "${SDK_COMMON_DIR}" PREFIX "sdkcommon" FILES ${sdkCommonSources})
+
+set_target_properties(${myTargetApp} PROPERTIES
+
+    XCODE_ATTRIBUTE_WRAPPER_EXTENSION "plugin"
+    
+    # these aren't supported anymore, only on archival projects with Rez support
+    #XCODE_ATTRIBUTE_REZ_PREFIX_FILE
+    #    $(SDK_COMMON_DIR)/includes/MachOMacrezXcode.h
+    #XCODE_ATTRIBUTE_REZ_SEARCH_PATHS
+    #    $(SDK_SOURCE_DIR)/resources/
+    #    $(SDK_SOURCE_DIR)/photoshop/
+    #    $(SDK_COMMON_DIR)/includes/
+)
+
+
+#--------------
+# sources
+
+set(KPS_SOURCE_DIR "${PROJECT_SOURCE_DIR}/kps")
+
+file(GLOB_RECURSE appSources CONFIGURE_DEPENDS
+    "${KPS_SOURCE_DIR}/*.cpp"
+    "${KPS_SOURCE_DIR}/*.mm"
+    "${KPS_SOURCE_DIR}/*.h"
+    
+    # TODO: also include .r files from
+    "${KPS_SOURCE_DIR}/*.r"
+    "${KPS_SOURCE_DIR}/${myTargetApp}.rsrc"
+)
+
+file(GLOB_RECURSE appNibSources CONFIGURE_DEPENDS
+    "${KPS_SOURCE_DIR}/*.xib" # TODO: move this to resource area below
+)
+
+# win files
+list(FILTER appSources EXCLUDE REGEX ".KPSInputDialog.cpp$")
+list(FILTER appSources EXCLUDE REGEX ".KPSOutputDialog.cpp$")
+list(FILTER appSources EXCLUDE REGEX ".resource.h$")
+
+source_group(TREE "${KPS_SOURCE_DIR}" PREFIX "source" FILES ${appSources})
+
+target_sources(${myTargetApp} PRIVATE
+    ${appSources}
+    
+    ${sdkSources}
+    ${sdkCommonSources}
+)
+
+target_include_directories(${myTargetApp} PRIVATE
+    "${KPS_SOURCE_DIR}"
+    
+    # the sdk includes and resources
+    "${SDK_SOURCE_DIR}/photoshop"
+    "${SDK_SOURCE_DIR}/pica_sp"
+    "${SDK_SOURCE_DIR}/resources"
+    
+    "${SDK_COMMON_DIR}/includes"
+    "${SDK_COMMON_DIR}/resources"
+)
+
+
+#--------------
+# resources
+
+# for some reason the Cmake template gens/add an Info.plist even though we override it
+set_target_properties(${myTargetApp} PROPERTIES
+    MACOSX_BUNDLE TRUE
+    
+    MACOSX_BUNDLE_INFO_PLIST ${KPS_SOURCE_DIR}/mac/Info.plist
+    XCODE_ATTRIBUTE_INFOPLIST_FILE ${KPS_SOURCE_DIR}/mac/Info.plist
+    #XCODE_ATTRIBUTE_CODE_SIGN_ENTITLEMENTS ${KPS_SOURCE_DIR}/mac/kramv.entitlements
+)
+
+target_sources(${myTargetApp} PRIVATE
+#    Assets.xcassets
+#    Base.lproj/Main.storyboard
+    ${appNibSources}
+    
+    ${KPS_SOURCE_DIR}/mac/Info.plist
+    
+#    ${KPS_SOURCE_DIR}/mac/Info.plist
+#    kramv.entitlements
+)
+
+# only these 2 resources are copied into the Resource, the other two are signed
+# Can't lowercase Resources or files don't go to correct place
+set_source_files_properties(
+    ${appNibSources}
+    
+    # this is created in the PRE_BUILD step below
+    ${KPS_SOURCE_DIR}/${myTargetApp}.rsrc
+                        
+    PROPERTIES
+    MACOSX_PACKAGE_LOCATION Resources
+)
+
+#--------------
+# rez
+
+# note that despite the usage printed, -i and -s don't actually work
+# for some reason only -I actually includes search paths.  Ugh.
+# But even though this succeeds, it gens a 0 size rsrc file.  Ugh!
+
+# turned off for now, and checking in pre-built resource
+# but app still can't find _main entrpoint.
+
+if (FALSE)
+
+execute_process(
+    COMMAND xcrun -f Rez
+    OUTPUT_VARIABLE rezCompiler
+    OUTPUT_STRIP_TRAILING_WHITESPACE
+)
+
+add_custom_command(TARGET ${myTargetApp} PRE_BUILD
+    DEPENDS ${KPS_SOURCE_DIR}/KPS.r
+    COMMAND ${rezCompiler}
+    -I ${SDK_SOURCE_DIR}/resources/
+    -I ${SDK_SOURCE_DIR}/photoshop/
+    -I ${SDK_COMMON_DIR}/includes/
+    
+    -arch x86_64
+    
+    # needs this for Carbon.r and CoreServices.r in the Adobe .r headers
+    #-F Carbon
+    #-F CoreServices
+    -F /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks/
+
+    -o "${KPS_SOURCE_DIR}/${myTargetApp}.rsrc"
+    ${KPS_SOURCE_DIR}/KPS.r
+)
+
+endif()
diff --git a/plugin/kps/KPS.cpp b/plugin/kps/KPS.cpp
new file mode 100755
index 00000000..c45e07a0
--- /dev/null
+++ b/plugin/kps/KPS.cpp
@@ -0,0 +1,1140 @@
+
+// kram - Copyright 2020 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
+
+///////////////////////////////////////////////////////////////////////////
+//
+// Copyright (c) 2014, Brendan Bolles
+// 
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// *	   Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// *	   Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+///////////////////////////////////////////////////////////////////////////
+
+// ------------------------------------------------------------------------
+//
+// DDS Photoshop plug-in
+//
+// by Brendan Bolles <brendan@fnordware.com>
+//
+// ------------------------------------------------------------------------
+
+#include "KPS.h"
+
+#include "KPSVersion.h"
+#include "KPSUI.h"
+
+//#include "crn_core.h"
+//#include "crn_mipmapped_texture.h"
+
+#include <stdio.h>
+#include <assert.h>
+
+#include <vector>
+
+#ifdef __PIMac__
+#include <mach/mach.h>
+#endif
+
+#ifndef MIN
+#define MIN(A,B)            ( (A) < (B) ? (A) : (B))
+#endif
+        
+#include "Kram.h"
+#include "KTXImage.h"
+#include "KramImage.h"
+#include "KramImageInfo.h"
+
+// this is only on macOS
+#include <sys/stat.h>
+// including FileUtilities pulls in ObjC crud to .cpp file
+//#include "FileUtilities.h"
+// these sporadically take intptr_t and int32 on Win, so fix signatures on port
+extern OSErr PSSDKWrite(int32 refNum, int32 refFD, int16 usePOSIXIO, int32 * count, void * buffPtr);
+extern OSErr PSSDKRead(int32 refNum, int32 refFD, int16 usePOSIXIO, int32 * count, void * buffPtr);
+extern OSErr PSSDKSetFPos(int32 refNum, int32 refFD, int16 usePOSIXIO, short posMode, long posOff);
+
+using namespace kram;
+
+// take from KPSScripting.cpp
+extern DialogFormat FormatToDialog(DDS_Format fmt);
+extern DDS_Format DialogToFormat(DialogFormat fmt);
+extern MyMTLPixelFormat FormatToPixelFormat(DDS_Format fmt);
+
+// this just strips args
+#define macroUnusedArg(x)
+
+// global needed by a bunch of Photoshop SDK routines
+SPBasicSuite *sSPBasic = NULL;
+
+
+const char* kBundleIdentifier = "com.ba.kram-ps";
+
+static void DoAbout(AboutRecordPtr aboutP)
+{
+#ifdef __PIMac__
+	const char * const plugHndl = kBundleIdentifier;
+	const void *hwnd = aboutP;	
+#else
+	const HINSTANCE const plugHndl = GetDLLInstance((SPPluginRef)aboutP->plugInRef);
+	HWND hwnd = (HWND)((PlatformData *)aboutP->platformData)->hwnd;
+#endif
+
+	DDS_About(DDS_Build_Complete_Manual, plugHndl, hwnd);
+}
+
+
+#pragma mark-
+
+
+static void HandleError(GlobalsPtr globals, const char *errStr)
+{
+    const int size = MIN(255, strlen(errStr));
+
+    Str255 p_str;
+    p_str[0] = size;
+    strncpy((char *)&p_str[1], errStr, size);
+
+    PIReportError(p_str);
+    gResult = errReportString; // macro uses globals
+}
+
+#pragma mark-
+
+static Rect ConvertRect(VRect rect) {
+    Rect r;
+    r.left = rect.left;
+    r.right = rect.right;
+    r.top = rect.top;
+    r.bottom = rect.bottom;
+    return r;
+}
+
+
+#pragma mark-
+
+static void InitGlobals(GlobalsPtr globals)
+{	
+	// create "globals" as a our struct global pointer so that any
+	// macros work:
+    //GlobalsPtr globals = (GlobalsPtr)globalPtr;
+			
+    // load options
+	memset(&gInOptions, 0, sizeof(gInOptions));
+	strncpy(gInOptions.sig, "Krmi", 4);
+	gInOptions.version			= 1;
+	gInOptions.alpha			= DDS_ALPHA_CHANNEL; // ignored
+	
+    // save options
+    memset(&gOptions, 0, sizeof(gOptions));
+    strncpy(gOptions.sig, "Krmo", 4);
+	gOptions.version			= 1;
+	gOptions.format				= DDS_FMT_RGBA8;
+	gOptions.alpha				= DDS_ALPHA_CHANNEL; // ignored
+	gOptions.premultiply		= FALSE; // ignored
+	gOptions.mipmap				= FALSE; // ignored
+	gOptions.filter				= DDS_FILTER_MITCHELL; // ignored
+	gOptions.cubemap			= FALSE;   // ignored
+}
+
+// TODO: replace handles with buffers, but revertInfo is a HANDLE, so how to update that?
+
+static Handle myNewHandle(GlobalsPtr globals, const int32 inSize)
+{
+    return gStuff->handleProcs->newProc(inSize);
+}
+
+static Ptr myLockHandle(GlobalsPtr globals, Handle h)
+{
+    return gStuff->handleProcs->lockProc(h, TRUE);
+}
+
+static void myUnlockHandle(GlobalsPtr globals, Handle h)
+{
+    gStuff->handleProcs->unlockProc(h);
+}
+
+static int32 myGetHandleSize(GlobalsPtr globals, Handle h)
+{
+    return gStuff->handleProcs->getSizeProc(h);
+}
+
+static void mySetHandleSize(GlobalsPtr globals, Handle h, const int32 inSize)
+{
+    gStuff->handleProcs->setSizeProc(h, inSize);
+}
+
+// newHandle doesn't have matching call to this
+//static void myDisposeHandle(GlobalsPtr globals, Handle h)
+//{
+//    gStuff->handleProcs->disposeProc(h);
+//}
+
+
+class PSStream
+{
+public:
+    PSStream(int32_t fd);
+	virtual ~PSStream() {};
+
+	bool read(void* pBuf, int32_t len);
+	bool write(const void* pBuf, int32_t len);
+	//bool flush() { return true; };
+	uint64_t size();
+	//uint64_t tell();
+	bool seek(uint64_t ofs);
+
+private:
+	int32_t _fd;
+};
+
+// posix not supported on Windows, why?
+
+
+PSStream::PSStream(int32_t fd)
+    : _fd(fd)
+{
+	//seek(0);
+}
+
+bool PSStream::read(void* pBuf, int32_t len)
+{
+    OSErr err = PSSDKRead(0, _fd, (int16_t)true, &len, pBuf);
+    return err == 0;
+}
+
+
+bool PSStream::write(const void* pBuf, int32_t len)
+{
+    OSErr err = PSSDKWrite(0, _fd, (int16_t)true, &len, (void*)pBuf);
+    return err == 0;
+    
+}
+
+// not sure why this isn't a part of the api, and neither is tell?
+uint64_t PSStream::size()
+{
+    struct stat st;
+    fstat(_fd, &st);
+    return st.st_size;
+    
+}
+
+bool PSStream::seek(uint64_t offset)
+{
+    // seek from begnning
+    OSErr err = PSSDKSetFPos(0, _fd, (int16_t)true, 1, (long)offset);
+    return err == 0;
+}
+
+
+#pragma mark-
+
+
+
+
+// Additional parameter functions
+//   These transfer settings to and from gStuff->revertInfo
+
+template <typename T>
+static bool ReadParams(GlobalsPtr globals, T *options)
+{
+	bool found_revert = FALSE;
+	
+	if ( gStuff->revertInfo != NULL )
+	{
+		if( myGetHandleSize(globals, gStuff->revertInfo) == sizeof(T) )
+		{
+			T *flat_options = (T *)myLockHandle(globals, gStuff->revertInfo);
+			
+			memcpy((char*)options, (char*)flat_options, sizeof(T) );
+			
+			myUnlockHandle(globals, gStuff->revertInfo);
+			
+			found_revert = TRUE;
+		}
+	}
+	
+	return found_revert;
+}
+
+template <typename T>
+static void WriteParams(GlobalsPtr globals, T *options)
+{
+	T *flat_options = NULL;
+	
+	if (gStuff->hostNewHdl != NULL) // we have the handle function
+	{
+		if (gStuff->revertInfo == NULL)
+		{
+			gStuff->revertInfo = myNewHandle(globals, sizeof(T) );
+		}
+		else
+		{
+			if(myGetHandleSize(globals, gStuff->revertInfo) != sizeof(T)  )
+				mySetHandleSize(globals, gStuff->revertInfo, sizeof(T) );
+		}
+		
+		flat_options = (T *)myLockHandle(globals, gStuff->revertInfo);
+		
+		memcpy((char*)flat_options, (char*)options, sizeof(T) );
+		
+		myUnlockHandle(globals, gStuff->revertInfo);
+	}
+}
+
+
+// this is called first on read
+static void DoReadPrepare(GlobalsPtr globals)
+{
+    // posix only on Mac
+    if (!gStuff->hostSupportsPOSIXIO)
+    {
+        //data->gResult = errPlugInHostInsufficient;
+        HandleError(globals, "Read - only support posix io");
+        return;
+    }
+    
+    // set to indicate posixIO usage
+    gStuff->pluginUsingPOSIXIO = TRUE;
+    
+    
+    if (!gStuff->HostSupports32BitCoordinates)
+    {
+        HandleError(globals, "Read - only support imageSize32");
+        return;
+    }
+    
+    // have to ack that plug supports 32-bit
+    gStuff->PluginUsing32BitCoordinates = TRUE;
+    
+	gStuff->maxData = 0;
+}
+
+// read first 4 bytes and determine the file system
+static void DoFilterFile(GlobalsPtr globals)
+{
+    // Note: for now only suppor KTX
+    //#define DDS_SIG "DDS "
+
+    // note 6 instead of 4 chars
+    PSStream stream(gStuff->posixFileDescriptor);
+    
+    if (!stream.seek(0)) {
+        HandleError(globals, "Read - cannot rewind in filter");
+        return;
+    }
+    
+    bool isKTX = false;
+    bool isKTX2 = false;
+    
+    uint8_t hdr[6];
+    if (stream.read(hdr, kKTXIdentifierSize)) {
+        if (memcmp(hdr, kKTXIdentifier, kKTXIdentifierSize) == 0)
+            isKTX = true;
+        else if (memcmp(hdr, kKTX2Identifier, kKTXIdentifierSize) == 0)
+            isKTX2 = true;
+    }
+    
+    // TODO: should this also filter out ktx/ktx2 that are unsupported
+    // could mostly look at header except in case of ASTC HDR where ktx
+    // must also look for format prop.
+    
+    if (!(isKTX || isKTX2)) {
+        gResult = formatCannotRead;
+    }
+}
+
+
+static void DoReadStart(GlobalsPtr globals)
+{
+    gResult = noErr;
+    
+    // read it a second time, but only the header
+    bool isKTX = false;
+    bool isKTX2 = false;
+    
+    PSStream stream(gStuff->posixFileDescriptor);
+    if (!stream.seek(0)) {
+        HandleError(globals, "Read - cannot rewind");
+        return;
+    }
+
+    uint8_t hdr[6];
+    if (stream.read(hdr, kKTXIdentifierSize)) {
+
+        if (memcmp(hdr, kKTXIdentifier, kKTXIdentifierSize) == 0)
+            isKTX = true;
+        else if (memcmp(hdr, kKTX2Identifier, kKTXIdentifierSize) == 0)
+            isKTX2 = true;
+    }
+
+    
+    if (!(isKTX || isKTX2)) {
+        HandleError(globals, "Read - no valid ktx/ktx2 signature");
+        return;
+    }
+    
+    int32_t w, h;
+    MyMTLPixelFormat format;
+    KTXHeader header;
+    KTX2Header header2;
+    
+    if (!stream.seek(0)) {
+        HandleError(globals, "Read - cannot rewind after sig");
+        return;
+    }
+
+    if (isKTX) {
+        if (!stream.read(&header, sizeof(KTXHeader)))
+        {
+            HandleError(globals, "Read - couldn't read ktx header");
+            return;
+        }
+        
+        w = header.pixelWidth;
+        h = header.pixelHeight;
+        format = header.metalFormat();
+    }
+    else {
+        if (!stream.read(&header2, sizeof(KTX2Header)))
+        {
+            HandleError(globals, "Read - couldn't read ktx2 header");
+            return;
+        }
+        
+        w = header2.pixelWidth;
+        h = header2.pixelHeight;
+        format = vulkanToMetalFormat(header2.vkFormat);
+    }
+    
+    gStuff->imageMode = plugInModeRGBColor;
+    
+    bool hasAlpha = isAlphaFormat(format);
+    int32_t numChannels = numChannelsOfFormat(format);
+    
+    gStuff->imageSize32.h = w;
+    gStuff->imageSize32.v = h;
+
+    // plugin sets the numChannels here
+    // 3 for rgb, 4 for rgba, ...
+    gStuff->planes = numChannels; // (hasAlpha ? 4 : 3);
+    
+    if (numChannels == 4) {
+        bool isPremul = false; // TODO: hookup to premul state in props field (Alb.ra,Alb.ga,...)
+        gStuff->transparencyPlane = 3;
+        gStuff->transparencyMatting = isPremul ? 1 : 0;
+    }
+    
+    // 16f and 32f go to 32f
+    gStuff->depth = isFloatFormat(header.metalFormat()) ? 32 : 8;
+    
+    
+    bool reverting = ReadParams(globals, &gInOptions);
+    
+    if (!reverting && gStuff->hostSig != 'FXTC')
+    {
+        DDS_InUI_Data params;
+        
+    #ifdef __PIMac__
+        const char * const plugHndl = kBundleIdentifier;
+        const void *hwnd = globals;
+    #else
+        const HINSTANCE const plugHndl = GetDLLInstance((SPPluginRef)gStuff->plugInRef);
+        HWND hwnd = (HWND)((PlatformData *)gStuff->platformData)->hwnd;
+    #endif
+        
+        // DDS_InUI is responsible for not popping a dialog if the user
+        // didn't request it.  It still has to set the read settings from preferences though.
+        bool result = DDS_InUI(&params, hasAlpha, plugHndl, hwnd);
+        
+        if(result)
+        {
+            gInOptions.alpha = params.alpha;
+            
+            WriteParams(globals, &gInOptions);
+        }
+        else
+        {
+            gResult = userCanceledErr;
+        }
+    }
+    
+// the following was suppose to set alpha if it was set in the options
+// but not using the option anymore.  Honoring the format of the src file.
+//    if(gInOptions.alpha == DDS_ALPHA_TRANSPARENCY && gStuff->planes == 4)
+}
+
+
+void CopyImageRectToPS(GlobalsPtr globals, const KTXImage& image, int32_t mipLevel)
+{
+    // TODO: may need to decocde compressed KTX if want to support those
+    int32_t numPlanes = MAX(4, gStuff->planes);
+    
+    int32_t w = image.width;
+    //int32_t h = image.height;
+    //int32_t rowBytes = numPlanes * w;
+    const uint8_t* pixels = image.fileData + image.mipLevels[mipLevel].offset;
+    
+    gStuff->data = (void*)pixels;
+        
+    gStuff->planeBytes = 1;
+    gStuff->colBytes = gStuff->planeBytes * numPlanes;
+    gStuff->rowBytes = gStuff->colBytes * w;
+    
+    gStuff->loPlane = 0;
+    gStuff->hiPlane = numPlanes - 1;
+            
+    gStuff->theRect32.left = 0;
+    gStuff->theRect32.right = gStuff->imageSize32.h;
+    gStuff->theRect32.top = 0;
+    gStuff->theRect32.bottom = gStuff->imageSize32.v;
+    
+    gStuff->theRect = ConvertRect(gStuff->theRect32);
+    
+    // THis actuall writes the rectangle above from data
+    gResult = AdvanceState();
+    
+    // very important!
+    gStuff->data = NULL;
+}
+
+static void DoReadContinue(GlobalsPtr globals)
+{
+    gResult = noErr;
+    
+    PSStream stream(gStuff->posixFileDescriptor);
+    
+    if (!stream.seek(0)) {
+        HandleError(globals, "Read - cannot rewind after sig");
+        return;
+    }
+
+    // read it yet a third time, this time reading the first mip
+    uint64_t size = stream.size();
+    
+    // read entire ktx/2 into memory (ideally mmap it)
+    std::vector<uint8_t> data;
+    data.resize(size);
+    
+    if (!stream.read(data.data(), data.size())) {
+        HandleError(globals, "Read - Couldn't read file");
+        return;
+    }
+    
+    KTXImage srcImage;
+    if (!srcImage.open(data.data(), data.size())) {
+        HandleError(globals, "Read - Couldn't parse file");
+        return;
+    }
+    
+    auto pixelFormat = srcImage.pixelFormat;
+    
+    KTXImage* outputImage = &srcImage;
+    Image imageDecoder;
+    KTXImage decodedImage;
+    
+    if (isExplicitFormat(pixelFormat)) {
+        if (isFloatFormat(pixelFormat)) {
+            HandleError(globals, "Read - can't decode explicit texture format or type");
+            return;
+            
+            // TODO: not sure that decode does this, code for this exists when a KTX file is imported to kram
+            // and then that's converted to an Image to feed to mip gen on the encode side.
+//            if (isHalfFormat(pixelFormat)) {
+//                TexEncoder decoderType = kTexEncoderUnknown;
+//                if (!validateFormatAndDecoder(srcImage.textureType, pixelFormat, decoderType)) {
+//                    HandleError(globals, "Read - can't decode this texture format or type");
+//                    return;
+//                }
+//
+//                // only need to decode 16f -> 32f, since PS only has 8u, 16u, and 32f
+//                TexEncoder decoder = kTexEncoderUnknown;
+//                if (!imageDecoder.decode(srcImage, decodedImage, decoder, false, "")) {
+//                    HandleError(globals, "Read - Couldn't decode file");
+//                    return;
+//                }
+//                outputImage = &decodedImage;
+//            }
+        }
+    }
+    else if (isHdrFormat(pixelFormat)){
+        // TODO: hdr block encoded formats must be decoded
+        // only ASTC and BC6 formats, but no BC6 support right now
+        HandleError(globals, "Read - can't decode hdr texture format or type");
+        return;
+    }
+    else {
+        TexEncoder decoder = kTexEncoderUnknown;
+        if (!validateFormatAndDecoder(srcImage.textureType, pixelFormat, decoder)) {
+            HandleError(globals, "Read - can't decode this texture format or type");
+            return;
+        }
+        
+        // ldr block encoded formats must be decoded
+        if (!imageDecoder.decode(srcImage, decodedImage, decoder, false, "")) {
+            HandleError(globals, "Read - Couldn't decode file");
+            return;
+        }
+        outputImage = &decodedImage;
+    }
+    
+    CopyImageRectToPS(globals, *outputImage, 0);
+}
+
+
+
+
+static void DoReadFinish(GlobalsPtr macroUnusedArg(globals))
+{
+
+}
+
+#pragma mark-
+
+static void DoOptionsPrepare(GlobalsPtr globals)
+{
+	gStuff->maxData = 0;
+}
+
+
+static void DoOptionsStart(GlobalsPtr globals)
+{
+	ReadParams(globals, &gOptions);
+	
+	if( ReadScriptParamsOnWrite(globals) )
+	{
+		bool have_transparency = false;
+		const char *alpha_name = NULL;
+		
+        if (gStuff->hostSig == '8BIM')
+            // this is a PSD file?
+			have_transparency = (gStuff->documentInfo && gStuff->documentInfo->mergedTransparency);
+		else
+            // either rgba or la
+			have_transparency = (gStuff->planes == 2 || gStuff->planes == 4);
+
+			
+		if (gStuff->documentInfo && gStuff->documentInfo->alphaChannels)
+			alpha_name = gStuff->documentInfo->alphaChannels->name;
+	
+	
+		DDS_OutUI_Data params;
+		
+		params.format			= FormatToDialog(gOptions.format);
+        
+		params.alpha			= (DialogAlpha)gOptions.alpha;
+		params.premultiply		= gOptions.premultiply;
+
+		params.mipmap			= gOptions.mipmap;
+
+		params.filter			= (gOptions.filter == DDS_FILTER_BOX ? DIALOG_FILTER_BOX :
+									gOptions.filter == DDS_FILTER_TENT ? DIALOG_FILTER_TENT :
+									gOptions.filter == DDS_FILTER_LANCZOS4 ? DIALOG_FILTER_LANCZOS4 :
+									gOptions.filter == DDS_FILTER_MITCHELL ? DIALOG_FILTER_MITCHELL :
+									gOptions.filter == DDS_FILTER_KAISER ? DIALOG_FILTER_KAISER :
+									DIALOG_FILTER_MITCHELL);
+								
+		params.cubemap			= gOptions.cubemap;
+	
+	#ifdef __PIMac__
+		const char * const plugHndl = kBundleIdentifier;
+		const void *hwnd = globals;	
+	#else
+		const HINSTANCE const plugHndl = GetDLLInstance((SPPluginRef)gStuff->plugInRef);
+		HWND hwnd = (HWND)((PlatformData *)gStuff->platformData)->hwnd;
+	#endif
+
+		const bool ae_ui = (gStuff->hostSig == 'FXTC');
+
+
+		bool result = DDS_OutUI(&params, have_transparency, alpha_name, ae_ui, plugHndl, hwnd);
+		
+		
+		if (result)
+		{
+            gOptions.format			= DialogToFormat(params.format);
+
+			gOptions.alpha			= params.alpha;
+			gOptions.premultiply	= params.premultiply;
+
+			gOptions.mipmap			= params.mipmap;
+
+			gOptions.filter			= (params.filter == DIALOG_FILTER_BOX ? DDS_FILTER_BOX :
+										params.filter == DIALOG_FILTER_TENT ? DDS_FILTER_TENT :
+										params.filter == DIALOG_FILTER_LANCZOS4 ? DDS_FILTER_LANCZOS4 :
+										params.filter == DIALOG_FILTER_MITCHELL ? DDS_FILTER_MITCHELL :
+										params.filter == DIALOG_FILTER_KAISER ? DDS_FILTER_KAISER :
+										DDS_FILTER_MITCHELL);
+										
+			gOptions.cubemap		= params.cubemap;
+			
+
+			WriteParams(globals, &gOptions);
+			WriteScriptParamsOnWrite(globals);
+		}
+		else
+			gResult = userCanceledErr;
+	}
+}
+
+
+static void DoOptionsContinue(GlobalsPtr macroUnusedArg(globals))
+{
+
+}
+
+
+static void DoOptionsFinish(GlobalsPtr macroUnusedArg(globals))
+{
+
+}
+
+#pragma mark-
+
+// Tis is an esimate of memory use?
+
+static void DoEstimatePrepare(GlobalsPtr globals)
+{
+    if (!gStuff->HostSupports32BitCoordinates)
+    {
+        HandleError(globals, "only support imageSize32");
+        return;
+    }
+    
+    // poxis only on Mac
+    if (!gStuff->hostSupportsPOSIXIO)
+    {
+        HandleError(globals, "only support posix io");
+        return;
+    }
+    
+    // set to indicate posixIO usage
+    gStuff->pluginUsingPOSIXIO = TRUE;
+    
+    // have to ack that plug supports 32-bit
+    gStuff->PluginUsing32BitCoordinates = TRUE;
+    
+	gStuff->maxData = 0;
+}
+
+
+static void DoEstimateStart(GlobalsPtr globals)
+{
+    int64_t width = gStuff->imageSize32.h;
+    int64_t height = gStuff->imageSize32.v;
+
+    // TODO: this assumes single 2d image in dds, and an 8-bit depth multiple
+    int64_t numPlanes = MAX(4, gStuff->planes);
+    int64_t depth = gStuff->depth; // this is in bits
+   
+	int64_t dataBytes = (width * height * numPlanes * depth + 7) >> 3;
+			
+    // this is how much space we need to write out data as KTX/KTX2 file
+    // KTX can precompute max size from width/height and mip count, but ktx2 is compressed
+    // I think PS will make sure there are enough
+    
+    // Can we get this number quickly out of kram based on mip setting, format, etc.
+    // May not always write out full depth.  May have encoded format.
+    
+    size_t bytesToRead = dataBytes;
+    
+	gStuff->minDataBytes = MIN(bytesToRead  / 2, INT_MAX);
+	gStuff->maxDataBytes = MIN(bytesToRead, INT_MAX);
+	
+	gStuff->data = NULL;
+}
+
+
+static void DoEstimateContinue(GlobalsPtr macroUnusedArg(globals))
+{
+
+}
+
+
+static void DoEstimateFinish(GlobalsPtr macroUnusedArg(globals))
+{
+
+}
+
+#pragma mark-
+
+static void DoWritePrepare(GlobalsPtr globals)
+{
+    if (!gStuff->HostSupports32BitCoordinates)
+    {
+        HandleError(globals, "only support imageSize32");
+        return;
+    }
+    
+    // poxis only on Mac
+    if (!gStuff->hostSupportsPOSIXIO)
+    {
+        HandleError(globals, "only support posix io");
+        return;
+    }
+    
+    // set to indicate posixIO usage
+    gStuff->pluginUsingPOSIXIO = TRUE;
+    
+    // have to ack that plug supports 32-bit
+    gStuff->PluginUsing32BitCoordinates = TRUE;
+    
+	gStuff->maxData = 0;
+}
+
+// TODO: extent to take a rect, and return he data
+static bool CopyImageRectFromPS(GlobalsPtr globals, vector<uint8_t>&pixels, int32_t numPlanes, int32_t width, int32_t height)
+{
+    int32_t rowBytes = width * numPlanes;
+    
+    // this is where data will go
+    pixels.resize(rowBytes * height);
+    
+    gStuff->loPlane = 0;
+    gStuff->hiPlane = numPlanes-1; // either b for rgb, or a for rgba are the last byte
+    gStuff->planeBytes = sizeof(unsigned char); // 1 for interleaved data
+    gStuff->colBytes = gStuff->planeBytes * numPlanes;
+    gStuff->rowBytes = rowBytes; // * gStuff->colBytes; // interleaved or non-interleaved data is why colBytes is here
+    
+    gStuff->theRect32.left = 0;
+    gStuff->theRect32.right = gStuff->theRect32.left + width;
+    gStuff->theRect32.top = 0;
+    gStuff->theRect32.bottom = gStuff->theRect32.top + height;
+
+    // set deprecated rect
+    gStuff->theRect = ConvertRect(gStuff->theRect32);
+    
+    // This fills out the pixel data
+    gStuff->data = pixels.data();
+    
+    gResult = AdvanceState();
+    if (gResult != noErr) {
+        HandleError(globals, "Write - AdvanceState failed to read pixels");
+    }
+    
+    // this pack alpha into 3rd channel?
+    bool have_alpha_channel = (gStuff->channelPortProcs && gStuff->documentInfo && gStuff->documentInfo->alphaChannels);
+    if (gResult == noErr && have_alpha_channel) //  && gOptions.alpha == DDS_ALPHA_CHANNEL)
+    {
+        ReadPixelsProc ReadProc = gStuff->channelPortProcs->readPixelsProc;
+        
+        ReadChannelDesc *alpha_channel = gStuff->documentInfo->alphaChannels;
+
+        VRect wroteRect;
+        VRect writeRect = { 0, 0, height, width }; // tlbr
+    
+        PSScaling scaling;
+        scaling.sourceRect = writeRect;
+        scaling.destinationRect = writeRect;
+    
+        // this is converting to bits
+        PixelMemoryDesc memDesc = { (char *)gStuff->data, gStuff->rowBytes * 8, gStuff->colBytes * 8, 3 * 8, gStuff->depth };
+    
+        gResult = ReadProc(alpha_channel->port, &scaling, &writeRect, &memDesc, &wroteRect);
+        
+        if (gResult != noErr) {
+            HandleError(globals, "Write - convert layer to 4 channels failed");
+        }
+    }
+    
+    // very important!, so it's not filled in with data again and again on remaining AdvanceState calls
+    gStuff->data = NULL;
+    
+    if (gResult != noErr) {
+        return false;
+    }
+    
+    return true;
+}
+
+static void DoWriteStart(GlobalsPtr globals)
+{
+	ReadParams(globals, &gOptions);
+	ReadScriptParamsOnWrite(globals);
+
+    // Xcode can't debug p gStuff->..., so must type p global->formatParamBlock->...
+    
+    int32_t numPlanes = MAX(4, gStuff->planes);
+
+    if (gStuff->imageMode != plugInModeRGBColor) {
+        HandleError(globals, "Not rgb color");
+        return;
+    }
+    
+    if (gStuff->depth != 8) {
+        HandleError(globals, "Not 8-bit color");
+        return;
+    }
+    
+    // Note: loadImageFromPixels only works with 4 byte image right now
+    bool haveAlpha = (numPlanes == 4) || ((gStuff->channelPortProcs && gStuff->documentInfo && gStuff->documentInfo->alphaChannels));
+    if ((numPlanes != 4) || (gStuff->planes == 3 && !haveAlpha))
+    {
+        HandleError(globals, "Not 4 planes, or 3 with alpha");
+        return;
+    }
+	
+    int width = gStuff->imageSize32.h;
+    int height = gStuff->imageSize32.v;
+	
+    // this is a potentiall large memory allocation for one level of the image
+    std::vector<uint8_t> pixels;
+    if (!CopyImageRectFromPS(globals, pixels, numPlanes, width, height)) {
+        //return;
+    }
+    
+    Image srcImage;
+    ImageInfo dstImageInfo;
+    KTXImage dstImage;
+    
+    if (gResult == noErr)
+    {
+        // convert pixels into ktx with mips if needed in memory
+        // note: cannot roundtrip mips, so may want to not do mips or block encodes here
+        // try to support
+        // TODO: this is limiting since loadImage must be single 2D image
+        
+        if (!srcImage.loadImageFromPixels(pixels, width, height, true, true)) {
+            HandleError(globals, "Write - loadImageFromPixels failed");
+        }
+    }
+
+    if (gResult == noErr)
+    {
+        MyMTLPixelFormat pixelFormat = FormatToPixelFormat(gOptions.format);
+        
+        // setup all the data to generate dstImage
+        // now apply user picked format
+        ImageInfoArgs dstImageInfoArgs;
+        dstImageInfoArgs.pixelFormat = pixelFormat;
+        dstImageInfoArgs.doMipmaps = false;
+        
+        // ?
+        // photoshop provides raw image as unmultiplied, but need to premul it
+//        if (haveAlpha) {
+//            dstImageInfoArgs.isPremultiplied = true;
+//        }
+        
+        if (!validateFormatAndEncoder(dstImageInfoArgs)) {
+            HandleError(globals, "Write - validate format failed");
+        }
+        else {
+            dstImageInfo.initWithArgs(dstImageInfoArgs);
+           
+            if (!srcImage.encode(dstImageInfo, dstImage)) {
+                HandleError(globals, "Write - encode failed");
+            }
+        }
+    }
+    
+    // testing only
+    //HandleError(globals, "Write - made it past encode");
+    
+    if (gResult == noErr) {
+        // this needs to write ktx with mips and all to the memory, then copy it to dataFork
+        // is this dataFork even valid anymore
+        PSStream stream(gStuff->posixFileDescriptor); // write
+     
+        // TOOD: this is writing 1k x 1k image out as 8MB instead of 4MB
+        // see if validate above fixes that.
+        
+        if (!stream.write(dstImage.fileData, (int32_t)dstImage.fileDataLength)) {
+            HandleError(globals, "Write - stream write failed");
+        }
+    }
+}
+
+
+static void DoWriteContinue(GlobalsPtr macroUnusedArg(globals))
+{
+
+}
+
+
+static void DoWriteFinish(GlobalsPtr globals)
+{
+	if (gStuff->hostSig != 'FXTC')
+		WriteScriptParamsOnWrite(globals);
+}
+
+
+#pragma mark-
+
+
+DLLExport MACPASCAL void PluginMain(const short selector,
+						             FormatRecord *formatParamBlock,
+                                     intptr_t *dataPointer,
+						             short *result)
+{
+	if (selector == formatSelectorAbout)
+	{
+		sSPBasic = ((AboutRecordPtr)formatParamBlock)->sSPBasic;
+
+		DoAbout((AboutRecordPtr)formatParamBlock);
+	}
+	else
+	{
+		sSPBasic = formatParamBlock->sSPBasic;  //thanks Tom
+            
+        GlobalsPtr globals = (GlobalsPtr)*dataPointer;
+        if (globals == NULL)
+        {
+            globals = (GlobalsPtr)malloc(sizeof(Globals));
+    
+            if(globals == NULL) {
+                *result = memFullErr;
+                return;
+            }
+            
+            InitGlobals(globals);
+            
+            *dataPointer = (intptr_t)globals;
+        }
+
+        globals->result = result;
+        globals->formatParamBlock = formatParamBlock;
+
+#if 1
+        static const FProc routineForSelector [] =
+        {
+            /* formatSelectorAbout                  DoAbout, */
+
+            /* formatSelectorReadPrepare */            DoReadPrepare,
+            /* formatSelectorReadStart */            DoReadStart,
+            /* formatSelectorReadContinue */        DoReadContinue,
+            /* formatSelectorReadFinish */            DoReadFinish,
+
+            /* formatSelectorOptionsPrepare */        DoOptionsPrepare,
+            /* formatSelectorOptionsStart */        DoOptionsStart,
+            /* formatSelectorOptionsContinue */        DoOptionsContinue,
+            /* formatSelectorOptionsFinish */        DoOptionsFinish,
+
+            /* formatSelectorEstimatePrepare */        DoEstimatePrepare,
+            /* formatSelectorEstimateStart */        DoEstimateStart,
+            /* formatSelectorEstimateContinue */    DoEstimateContinue,
+            /* formatSelectorEstimateFinish */        DoEstimateFinish,
+
+            /* formatSelectorWritePrepare */        DoWritePrepare,
+            /* formatSelectorWriteStart */            DoWriteStart,
+            /* formatSelectorWriteContinue */        DoWriteContinue,
+            /* formatSelectorWriteFinish */            DoWriteFinish,
+
+            /* formatSelectorFilterFile */            DoFilterFile
+        };
+
+        // Dispatch selector
+        if (selector > formatSelectorAbout && selector <= formatSelectorFilterFile)
+            (routineForSelector[selector-1])(globals); // dispatch using jump table
+        else
+            gResult = formatBadParameters;
+        
+#else
+        // This explicit dispatch is much easier to follow
+        // can can set breakpoints, and step from a central dispatch point here.
+        
+        // Dispatch selector.
+        switch (selector) {
+            case formatSelectorReadPrepare:
+                DoReadPrepare(format_record, data, result);
+                break;
+            case formatSelectorReadStart:
+                DoReadStart(format_record, data, result);
+                break;
+            case formatSelectorReadContinue:
+                DoReadContinue(format_record, data, result);
+                break;
+            case formatSelectorReadFinish:
+                DoReadFinish(format_record, data, result);
+                break;
+
+            case formatSelectorOptionsPrepare:
+                DoOptionsPrepare(format_record, data, result);
+                break;
+            case formatSelectorOptionsStart:
+                DoOptionsStart(format_record, data, result, plugin_ref);
+                break;
+            case formatSelectorOptionsContinue:
+                DoOptionsContinue(format_record, data, result);
+                break;
+            case formatSelectorOptionsFinish:
+                DoOptionsFinish(format_record, data, result);
+                break;
+
+            case formatSelectorEstimatePrepare:
+                DoEstimatePrepare(format_record, data, result);
+                break;
+            case formatSelectorEstimateStart:
+                DoEstimateStart(format_record, data, result);
+                break;
+            case formatSelectorEstimateContinue:
+                DoEstimateContinue(format_record, data, result);
+                break;
+            case formatSelectorEstimateFinish:
+                DoEstimateFinish(format_record, data, result);
+                break;
+
+            case formatSelectorWritePrepare:
+                DoWritePrepare(format_record, data, result);
+                break;
+            case formatSelectorWriteStart:
+                DoWriteStart(format_record, data, result);
+                break;
+            case formatSelectorWriteContinue:
+                DoWriteContinue(format_record, data, result);
+                break;
+            case formatSelectorWriteFinish:
+                DoWriteFinish(format_record, data, result);
+                break;
+
+            case formatSelectorReadLayerStart:
+                DoReadLayerStart(format_record, data, result);
+                break;
+            case formatSelectorReadLayerContinue:
+                DoReadLayerContinue(format_record, data, result);
+                break;
+            case formatSelectorReadLayerFinish:
+                DoReadLayerFinish(format_record, data, result);
+                break;
+
+            case formatSelectorWriteLayerStart:
+                DoWriteLayerStart(format_record, data, result);
+                break;
+            case formatSelectorWriteLayerContinue:
+                DoWriteLayerContinue(format_record, data, result);
+                break;
+            case formatSelectorWriteLayerFinish:
+                DoWriteLayerFinish(format_record, data, result);
+                break;
+
+            case formatSelectorFilterFile:
+                DoFilterFile(format_record, data, result);
+                break;
+            }
+        }
+#endif
+    
+	}
+}
diff --git a/plugin/kps/KPS.h b/plugin/kps/KPS.h
new file mode 100755
index 00000000..1399fe97
--- /dev/null
+++ b/plugin/kps/KPS.h
@@ -0,0 +1,214 @@
+
+// kram - Copyright 2020 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
+
+///////////////////////////////////////////////////////////////////////////
+//
+// Copyright (c) 2014, Brendan Bolles
+// 
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// *	   Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// *	   Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+///////////////////////////////////////////////////////////////////////////
+
+// ------------------------------------------------------------------------
+//
+// DDS Photoshop plug-in
+//
+// by Brendan Bolles <brendan@fnordware.com>
+//
+// ------------------------------------------------------------------------
+
+#pragma once
+
+#include "PIDefines.h"
+#include "PIFormat.h"
+#include "PIExport.h"
+#include "PIUtilities.h"
+#include "PIProperties.h"
+
+
+// these are format settings for output
+enum {
+    // lossy BC
+    DDS_FMT_BC1 = 0,
+    DDS_FMT_BC1S = 1,
+	
+    DDS_FMT_BC3 = 4,
+    DDS_FMT_BC3S = 5,
+	
+    DDS_FMT_BC4 = 6,
+    DDS_FMT_BC4S = 7,
+    
+    DDS_FMT_BC5 = 8,
+    DDS_FMT_BC5S = 9,
+    
+	DDS_FMT_BC7 = 12,
+    DDS_FMT_BC7S = 13,
+    
+    // lossless formats
+    DDS_FMT_R8 = 128,
+    DDS_FMT_RG8 = 138,
+    DDS_FMT_RGBA8 = 148,
+    DDS_FMT_RGBA8S = 158,
+    
+    DDS_FMT_R16F = 168,
+    DDS_FMT_RG16F = 178,
+    DDS_FMT_RGBA16F = 188,
+    
+    DDS_FMT_R32F = 198,
+    DDS_FMT_RG32F = 208,
+    DDS_FMT_RGBA32F = 218,
+    
+    // TODO: not sure this should allow lossy export
+    // TODO: add ASTC4x4, 5x5, 6x6, 8x8
+    // TODO: add ETC2
+    
+    // TODO: R16S format for depth/heighmaps?, PS can store this for edits
+    // PS stores data as 16S acco
+};
+typedef uint8 DDS_Format;
+
+
+//-----------------------------
+
+#if 1 // not used
+
+// not used
+enum {
+	DDS_ALPHA_NONE = 0,
+	DDS_ALPHA_TRANSPARENCY,
+	DDS_ALPHA_CHANNEL
+};
+typedef uint8 DDS_Alpha;
+
+// not used
+enum{
+	DDS_FILTER_BOX,
+	DDS_FILTER_TENT,
+	DDS_FILTER_LANCZOS4,
+	DDS_FILTER_MITCHELL,
+	DDS_FILTER_KAISER
+};
+typedef uint8 DDS_Filter;
+
+
+// TODO: revisit these options, these are mostly no longer used
+
+// Load options
+typedef struct {
+	char		sig[4];
+	uint8		version;
+	DDS_Alpha	alpha;
+	uint8		reserved[26];
+	
+} DDS_inData;
+
+// Save options
+typedef struct {
+	char			sig[4];
+	uint8			version;
+	DDS_Format		format;
+	DDS_Alpha		alpha;
+	Boolean			premultiply;
+	Boolean			mipmap;
+	DDS_Filter		filter;
+	Boolean			cubemap;
+	uint8			reserved[245];
+	
+} DDS_outData;
+
+#else
+
+// no input settings
+
+struct DDS_outData
+{
+    DDS_Format        format;
+    
+    // these need UI, should we only do lossless non-mipped import/export
+    // kram can do all this, but premul here is lossy
+    // much better to script presets for export, and pick from those
+    //  the plugin could read the preset file (or embed it).
+    //
+    //Boolean            premultiply;
+    //Boolean            mipmap;
+};
+
+#endif
+
+
+typedef struct Globals
+{ // This is our structure that we use to pass globals between routines:
+
+	short				*result;			// Must always be first in Globals.
+	FormatRecord		*formatParamBlock;	// Must always be second in Globals.
+
+	//Handle				fileH;				// stores the entire binary file
+	
+	DDS_inData			in_options;
+	DDS_outData			options;
+	
+} Globals, *GlobalsPtr;
+
+
+
+// The routines that are dispatched to from the jump list should all be
+// defined as
+//		void RoutineName (GPtr globals);
+// And this typedef will be used as the type to create a jump list:
+typedef void (* FProc)(GlobalsPtr globals);
+
+
+//-------------------------------------------------------------------------------
+//	Globals -- definitions and macros
+//-------------------------------------------------------------------------------
+
+#define gResult				(*(globals->result))
+#define gStuff				(globals->formatParamBlock)
+
+#define gInOptions			(globals->in_options)
+#define gOptions			(globals->options)
+
+#define gAliasHandle		(globals->aliasHandle)
+
+//-------------------------------------------------------------------------------
+//	Prototypes
+//-------------------------------------------------------------------------------
+
+
+// Everything comes in and out of PluginMain. It must be first routine in source:
+DLLExport MACPASCAL void PluginMain (const short selector,
+					  	             FormatRecord *formatParamBlock,
+						             intptr_t *data,
+						             short *result);
+
+// Scripting functions
+Boolean ReadScriptParamsOnWrite (GlobalsPtr globals);	// Read any scripting params.
+
+OSErr WriteScriptParamsOnWrite (GlobalsPtr globals);	// Write any scripting params.
+
+//-------------------------------------------------------------------------------
+
diff --git a/plugin/kps/KPS.r b/plugin/kps/KPS.r
new file mode 100755
index 00000000..be8da61b
--- /dev/null
+++ b/plugin/kps/KPS.r
@@ -0,0 +1,456 @@
+
+///////////////////////////////////////////////////////////////////////////
+//
+// Copyright (c) 2014, Brendan Bolles
+// 
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// *	   Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// *	   Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+///////////////////////////////////////////////////////////////////////////
+
+// ------------------------------------------------------------------------
+//
+// DDS Photoshop plug-in
+//
+// by Brendan Bolles <brendan@fnordware.com>
+//
+// ------------------------------------------------------------------------
+
+//-------------------------------------------------------------------------------
+//	Definitions -- Required by include files.
+//-------------------------------------------------------------------------------
+
+#include "KPSVersion.h"
+
+// TODO: see if can simplify to kram
+#define plugInName			"kram"
+#define plugInCopyrightYear	DDS_Copyright_Year
+#define plugInDescription DDS_Description
+#define VersionString 	DDS_Version_String
+#define ReleaseString	DDS_Build_Date_Manual
+#define CurrentYear		DDS_Build_Year
+
+//-------------------------------------------------------------------------------
+//	Definitions -- Required by other resources in this rez file.
+//-------------------------------------------------------------------------------
+
+// Dictionary (aete) resources:
+
+#define vendorName			"kram"
+#define plugInAETEComment 	DDS_Description
+
+// TODO: bump this sdk higher?
+#define plugInSuiteID		'sdK4'
+#define plugInClassID		'kram'
+#define plugInEventID		typeNull // must be this
+
+//-------------------------------------------------------------------------------
+//	Set up included files for Macintosh and Windows.
+//-------------------------------------------------------------------------------
+
+
+#if 0
+#include "PIDefines.h"
+#else
+
+
+/// Create a definition if we're on a Macintosh
+#ifdef _WIN32
+    #define __PIWin__            1
+    #define DLLExport extern "C" __declspec(dllexport)
+
+#else
+    // this pulls in Carbon.r, but trying not to use Carbon
+    // also CoreServices/CoreServices.r is pulled in from another Adobe header, which is also Carbon
+    //#include "MacOMacrezXcode.h"
+
+#define Macintosh 1
+
+#ifndef TARGET_API_MAC_CARBON
+#define TARGET_API_MAC_CARBON 1
+#endif
+
+#ifndef TARGET_MAC_OS
+#define TARGET_MAC_OS 1
+#endif
+
+#include <Carbon.r>
+
+
+    #define __PIMac__            1
+    #define DLLExport extern "C"
+
+    // instead of PIPlatform
+    #define PRAGMA_ONCE 1
+    #define Macintosh 1
+#endif
+
+#ifdef __PIMac__
+	#include "PIGeneral.r"
+#elif defined(__PIWin__)
+	#include "PIGeneral.h"
+#endif
+
+
+//#include "PIUtilities.r"
+#ifndef ResourceID
+    #define ResourceID        16000
+#endif
+
+#include "PITerminology.h"
+#include "PIActions.h"
+
+#include "KPSTerminology.h"
+
+//-------------------------------------------------------------------------------
+//	PiPL resource
+//-------------------------------------------------------------------------------
+
+#define USE_KTX  1
+#define USE_KTX2 0
+
+resource 'PiPL' (ResourceID, plugInName " PiPL", purgeable)
+{
+    {
+		Kind { ImageFormat },
+		Name { plugInName },
+
+		//Category { "KTX" },
+		//Priority { 1 }, // Can use this to override a built-in Photoshop plug-in
+
+		Version { (latestFormatVersion << 16) | latestFormatSubVersion },
+
+		#ifdef __PIMac__
+            #if defined(__arm64__)
+                CodeMacARM64 { "PluginMain" },
+            #endif
+			#if (defined(__x86_64__))
+				CodeMacIntel64 { "PluginMain" },
+			#endif
+		#else
+            #if defined(_WIN64)
+				CodeWin64X86 { "PluginMain" },
+			#endif
+            
+            // kram-ps not supporting 32-bit PS
+			//	CodeWin32X86 { "PluginMain" },
+		#endif
+	
+		// ClassID, eventID, aete ID, uniqueString:
+		HasTerminology
+        {
+            plugInClassID,
+            plugInEventID,
+            ResourceID,
+            vendorName " " plugInName
+        },
+		
+		SupportedModes
+		{
+			noBitmap,
+            noGrayScale, // TODO: add support
+			noIndexedColor,
+            doesSupportRGBColor, // this is the only supported
+			noCMYKColor,
+            noHSLColor,
+			noHSBColor,
+            noMultichannel,
+			noDuotone,
+            noLABColor
+		},
+		
+        // Using this on macOS to avoid Carbon use.  Uses file descriptor,
+        // but doesn't work on Win.  Should really provide FILE* on all platforms.
+        SupportsPOSIXIO {},
+
+
+		EnableInfo { "in (PSHOP_ImageMode, RGBMode, RGBColorMode)" },
+	
+        // TODO: can't get 'ktx2' extension files to show up
+        // tried add thta into list below
+        
+        #if USE_KTX
+        // ktx1 and 2 have the same 4 character start, then ' 1' or ' 2'
+		FmtFileType { 'KTX ', '8BIM' },
+		ReadTypes { { 'KTX ', '    ' } },
+		ReadExtensions { { 'ktx ' } },
+		WriteExtensions { { 'ktx ' } },
+		FilteredExtensions { { 'ktx ' } },
+        #elif USE_KTX2
+        FmtFileType { 'KTX2', '8BIM' },
+        ReadTypes { { 'KTX2', '    ' } },
+        ReadExtensions { { 'ktx2' } },
+        WriteExtensions { { 'ktx2' } } // kram can't write KTX2, only read it
+        FilteredExtensions { { 'ktx2' } },
+        #endif
+        
+		FormatFlags
+        {
+            fmtSavesImageResources, //(by saying we do, PS won't store them, thereby avoiding problems)
+            fmtCanRead,
+            fmtCanWrite,
+            fmtCanWriteIfRead,
+            fmtCanWriteTransparency,
+            fmtCannotCreateThumbnail
+        },
+        
+        // commented these out, so can have larger array textures
+		//PlugInMaxSize { 8192, 8192 },
+		//FormatMaxSize { { 8192, 8192 } },
+        
+        // ? shouldn't this be 4, not 5?
+		FormatMaxChannels { {   0, 0, 0, 4, 0, 0,
+							   0, 0, 0, 0, 0, 0 } },
+        
+		FormatICCFlags { 	iccCannotEmbedGray,
+							iccCannotEmbedIndexed,
+							iccCannotEmbedRGB,
+							iccCannotEmbedCMYK },
+        
+        // consider this for reading chunks into layers,
+        //   don't need writing can use CopyAllLayers to avoid wait for DoWriteLayer
+        // FormatLayerSupport{doesSupportFormatLayers},
+        // FormatLayerSupportReadOnly{}
+        
+    },
+};
+
+//-------------------------------------------------------------------------------
+//	Dictionary (scripting) resource
+//-------------------------------------------------------------------------------
+
+resource 'aete' (ResourceID, plugInName " dictionary", purgeable)
+{
+	1, 0, english, roman,									/* aete version and language specifiers */
+	{
+		vendorName,											/* vendor suite name */
+		"kram format",							            /* optional description */
+		plugInSuiteID,										/* suite ID */
+		1,													/* suite code, must be 1 */
+		1,													/* suite level, must be 1 */
+		{},													/* structure for filters */
+		{													/* non-filter plug-in class here */
+			"kram",										    /* unique class name */
+			plugInClassID,									/* class ID, must be unique or Suite ID */
+			plugInAETEComment,								/* optional description */
+			{												/* define inheritance */
+				"<Inheritance>",							/* must be exactly this */
+				keyInherits,								/* must be keyInherits */
+				classFormat,								/* parent: Format, Import, Export */
+				"parent class format",						/* optional description */
+				flagsSingleProperty,						/* if properties, list below */
+							
+				"Format",
+				keyDDSformat,
+				typeEnumerated,
+				"Output encode format",
+				flagsSingleProperty,
+				
+				// "Alpha Channel",
+                // keyDDSalpha,
+                // typeEnumerated,
+				// "Source of the alpha channel",
+				// flagsSingleProperty,
+				//
+				// "Premultiply",
+				// keyDDSpremult,
+				// typeBoolean,
+				// "Premultiply RGB by Alpha",
+				// flagsSingleProperty,
+//
+				// "Mipmap",
+				// keyDDSmipmap,
+				// typeBoolean,
+				// "Create Mipmaps",
+				// flagsSingleProperty,
+				//
+				// "Filter",
+				// keyDDSfilter,
+				// typeEnumerated,
+				// "Mipmap filter",
+				// flagsSingleProperty,
+//
+				// "Cube Map",
+				// keyDDScubemap,
+				// typeBoolean,
+				// "Convert vertical cross to cube map",
+				// flagsSingleProperty,
+			},
+			{}, /* elements (not supported) */
+			/* class descriptions */
+		},
+		{}, /* comparison ops (not supported) */
+		{	/* any enumerations */
+			typeDDSformat,
+			{
+                // explicit
+                "E8r",
+                formatR8,
+                "RGBA8",
+                
+                "E8rg",
+                formatRG8,
+                "RGBA8",
+                
+                "E84",
+                formatRGBA8,
+                "RGBA8",
+                
+                "E84S",
+                formatRGBA8S,
+                "RGBA8 srgb",
+                
+                "EHr",
+                formatR16F,
+                "RGBA16F",
+                
+                "EHrg",
+                formatRG16F,
+                "RGBA16F",
+                
+                "EH4",
+                formatRGBA16F,
+                "RGBA16F",
+                
+                "EFr",
+                formatR32F,
+                "RGBA32F",
+                
+                "EFrg",
+                formatRG32F,
+                "RGBA32F",
+                
+                "EF4",
+                formatRGBA32F,
+                "RGBA32F",
+                
+                // BC with and without srgb
+                "BC1",
+                formatBC1,
+                "BC1",
+                
+                "BC3",
+                formatBC3,
+                "BC3",
+                
+                "BC4",
+                formatBC4,
+                "BC4",
+                
+                "BC5",
+                formatBC5,
+                "BC5",
+                
+                "BC7",
+                formatBC7,
+                "BC7",
+                
+                
+                "BC1S",
+                formatBC1S,
+                "BC1 srgb",
+                
+                "BC3S",
+                formatBC3S,
+                "BC3 srgb",
+                
+                "BC4S",
+                formatBC4S,
+                "BC4 srgb",
+                
+                "BC5S",
+                formatBC5S,
+                "BC5 srgb",
+                
+                "BC7S",
+                formatBC7S,
+                "BC7 srgb"
+                
+                // TODO: add other formats
+			}
+			//typeAlphaChannel,
+            //{
+            //    "None",
+            //    alphaChannelNone,
+            //    "No alpha channel",
+
+            //    "Transparency",
+            //    alphaChannelTransparency,
+            //    "Get alpha from Transparency",
+
+            //    "Channel",
+            //    alphaChannelChannel,
+            //    "Get alpha from channels palette"
+            //},
+            //typeFilter,
+            //{
+            //    "Box",
+            //    filterBox,
+            //    "Box filter",
+                
+            //    "Tent",
+            //    filterTent,
+            //    "Tent filter",
+            
+            //    "Lanczos4",
+            //    filterLanczos4,
+            //   "Lanczos4 filter",
+            
+            //   "Mitchell",
+            //   filterMitchell,
+            //   "Mitchell filter",
+            
+            //  "Kaiser",
+            //  filterKaiser,
+            //  "Kaiser filter"
+            //}
+		}
+	}
+};
+
+
+#ifdef __PIMac__
+
+//-------------------------------------------------------------------------------
+//	Version 'vers' resources.
+//-------------------------------------------------------------------------------
+
+resource 'vers' (1, plugInName " Version", purgeable)
+{
+	5, 0x50, final, 0, verUs,
+	VersionString,
+	VersionString " ©" plugInCopyrightYear " kram"
+};
+
+resource 'vers' (2, plugInName " Version", purgeable)
+{
+	5, 0x50, final, 0, verUs,
+	VersionString,
+	"by Alec Miller (based on DDS plugin by Brendan Bolles)"
+};
+
+
+#endif // __PIMac__
+
+
diff --git a/plugin/kps/KPSScripting.cpp b/plugin/kps/KPSScripting.cpp
new file mode 100755
index 00000000..bb8d2324
--- /dev/null
+++ b/plugin/kps/KPSScripting.cpp
@@ -0,0 +1,296 @@
+
+// kram - Copyright 2020 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
+
+///////////////////////////////////////////////////////////////////////////
+//
+// Copyright (c) 2014, Brendan Bolles
+// 
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// *	   Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// *	   Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+///////////////////////////////////////////////////////////////////////////
+
+// ------------------------------------------------------------------------
+//
+// DDS Photoshop plug-in
+//
+// by Brendan Bolles <brendan@fnordware.com>
+//
+// ------------------------------------------------------------------------
+
+#include "PIDefines.h"
+#include "KPS.h"
+
+#include "KPSTerminology.h"
+#include "KPSUI.h"
+#include "KramImageInfo.h"
+
+using namespace kram;
+
+struct Format {
+    DDS_Format fileFormat; // map to MyMTLPixelFormat
+    DialogFormat uiFormat; // for UI
+    uint32_t signature; // 4 char code
+    MyMTLPixelFormat pixelFormat; // this is what kram uses
+};
+
+// Note export needs to offer a menu of what format to convert to
+// Incoming data is 8u, 16u, or 32f
+// Update .r, UI.h file if adding more formats
+const Format kFormatTable[] = {
+    { DDS_FMT_BC1, DIALOG_FMT_BC1, formatBC1, MyMTLPixelFormatBC1_RGBA },
+    { DDS_FMT_BC3, DIALOG_FMT_BC3, formatBC3, MyMTLPixelFormatBC3_RGBA },
+    { DDS_FMT_BC4, DIALOG_FMT_BC4, formatBC4, MyMTLPixelFormatBC4_RUnorm },
+    { DDS_FMT_BC5, DIALOG_FMT_BC5, formatBC5, MyMTLPixelFormatBC5_RGUnorm },
+    { DDS_FMT_BC7, DIALOG_FMT_BC7, formatBC7, MyMTLPixelFormatBC7_RGBAUnorm },
+    
+    { DDS_FMT_BC1S, DIALOG_FMT_BC1S, formatBC1S, MyMTLPixelFormatBC1_RGBA_sRGB },
+    { DDS_FMT_BC3S, DIALOG_FMT_BC3S, formatBC3S, MyMTLPixelFormatBC3_RGBA_sRGB },
+    { DDS_FMT_BC4S, DIALOG_FMT_BC4S, formatBC4S, MyMTLPixelFormatBC4_RSnorm },
+    { DDS_FMT_BC5S, DIALOG_FMT_BC5S, formatBC5S, MyMTLPixelFormatBC5_RGSnorm },
+    { DDS_FMT_BC7S, DIALOG_FMT_BC7S, formatBC7S, MyMTLPixelFormatBC7_RGBAUnorm_sRGB },
+    
+    // TODO: add ASTC
+    // TODO: add ETC2
+    
+    { DDS_FMT_R8, DIALOG_FMT_R8, formatR8, MyMTLPixelFormatR8Unorm },
+    { DDS_FMT_RG8, DIALOG_FMT_RG8, formatRG8, MyMTLPixelFormatRG8Unorm },
+    { DDS_FMT_RGBA8, DIALOG_FMT_RGBA8, formatRGBA8, MyMTLPixelFormatRGBA8Unorm },
+    { DDS_FMT_RGBA8S, DIALOG_FMT_RGBA8S, formatRGBA8S, MyMTLPixelFormatRGBA8Unorm_sRGB },
+    
+    { DDS_FMT_R16F, DIALOG_FMT_R16F, formatR16F, MyMTLPixelFormatR16Float },
+    { DDS_FMT_RG16F, DIALOG_FMT_RG16F, formatRG16F, MyMTLPixelFormatRG16Float },
+    { DDS_FMT_RGBA16F, DIALOG_FMT_RGBA16F, formatRGBA16F, MyMTLPixelFormatRGBA16Float },
+    
+    { DDS_FMT_R32F, DIALOG_FMT_R32F, formatR32F, MyMTLPixelFormatR32Float },
+    { DDS_FMT_RG32F, DIALOG_FMT_RG32F, formatRG32F, MyMTLPixelFormatRG32Float },
+    { DDS_FMT_RGBA32F, DIALOG_FMT_RGBA32F, formatRGBA32F, MyMTLPixelFormatRGBA32Float },
+};
+const int32_t kFormatTableSize = sizeof(kFormatTable) / sizeof(kFormatTable[0]);
+
+static DDS_Format SignatureToFormat(OSType fmt)
+{
+    for (int32_t i = 0; i < kFormatTableSize; ++i) {
+        if (fmt == kFormatTable[i].signature) {
+            return kFormatTable[i].fileFormat;
+        }
+    }
+    
+    return DDS_FMT_RGBA8;
+}
+
+static OSType FormatToSignature(DDS_Format fmt)
+{
+    for (int32_t i = 0; i < kFormatTableSize; ++i) {
+        if (fmt == kFormatTable[i].fileFormat) {
+            return kFormatTable[i].signature;
+        }
+    }
+
+    return formatRGBA8;
+}
+
+DialogFormat FormatToDialog(DDS_Format fmt)
+{
+    for (int32_t i = 0; i < kFormatTableSize; ++i) {
+        if (fmt == kFormatTable[i].fileFormat) {
+            return kFormatTable[i].uiFormat;
+        }
+    }
+
+    return DIALOG_FMT_RGBA8;
+}
+
+DDS_Format DialogToFormat(DialogFormat fmt)
+{
+    for (int32_t i = 0; i < kFormatTableSize; ++i) {
+        if (fmt == kFormatTable[i].uiFormat) {
+            return kFormatTable[i].fileFormat;
+        }
+    }
+
+    return DIALOG_FMT_RGBA8;
+}
+
+MyMTLPixelFormat FormatToPixelFormat(DDS_Format fmt)
+{
+    for (int32_t i = 0; i < kFormatTableSize; ++i) {
+        if (fmt == kFormatTable[i].fileFormat) {
+            return kFormatTable[i].pixelFormat;
+        }
+    }
+
+    return MyMTLPixelFormatRGBA8Unorm;
+}
+    
+
+//static DDS_Alpha KeyToAlpha(OSType key)
+//{
+//	return	(key == alphaChannelNone)			? DDS_ALPHA_NONE :
+//			(key == alphaChannelTransparency)	? DDS_ALPHA_TRANSPARENCY :
+//			(key == alphaChannelChannel)		? DDS_ALPHA_CHANNEL :
+//			DDS_ALPHA_TRANSPARENCY;
+//}
+//
+//static DDS_Filter KeyToFilter(OSType key)
+//{
+//	return	(key == filterBox		? DDS_FILTER_BOX :
+//			key == filterTent		? DDS_FILTER_TENT :
+//			key == filterLanczos4	? DDS_FILTER_LANCZOS4 :
+//			key == filterMitchell	? DDS_FILTER_MITCHELL :
+//			key == filterKaiser		? DDS_FILTER_KAISER :
+//			DDS_FILTER_MITCHELL);
+//}
+
+Boolean ReadScriptParamsOnWrite(GlobalsPtr globals)
+{
+	PIReadDescriptor			token = NULL;
+	DescriptorKeyID				key = 0;
+	DescriptorTypeID			type = 0;
+	//OSType						shape = 0, create = 0;
+	DescriptorKeyIDArray		array = { NULLID };
+	int32						flags = 0;
+	OSErr						//gotErr = noErr,
+    stickyError = noErr;
+	Boolean						returnValue = true;
+	//int32						storeValue;
+	DescriptorEnumID			ostypeStoreValue;
+	//Boolean						boolStoreValue;
+	
+	if (DescriptorAvailable(NULL))
+	{
+		token = OpenReader(array);
+		if (token)
+		{
+			while (PIGetKey(token, &key, &type, &flags))
+			{
+				switch (key)
+				{
+					case keyDDSformat:
+							PIGetEnum(token, &ostypeStoreValue);
+							gOptions.format = SignatureToFormat(ostypeStoreValue);
+							break;
+					
+//					case keyDDSalpha:
+//							PIGetEnum(token, &ostypeStoreValue);
+//							gOptions.alpha = KeyToAlpha(ostypeStoreValue);
+//							break;
+//
+//					case keyDDSpremult:
+//							PIGetBool(token, &boolStoreValue);
+//							gOptions.premultiply = boolStoreValue;
+//							break;
+//
+//					case keyDDSmipmap:
+//							PIGetBool(token, &boolStoreValue);
+//							gOptions.mipmap = boolStoreValue;
+//							break;
+//
+//					case keyDDSfilter:
+//							PIGetEnum(token, &ostypeStoreValue);
+//							gOptions.filter = KeyToFilter(ostypeStoreValue);
+//							break;
+//
+//					case keyDDScubemap:
+//							PIGetBool(token, &boolStoreValue);
+//							gOptions.cubemap = boolStoreValue;
+//							break;
+				}
+			}
+
+			stickyError = CloseReader(&token); // closes & disposes.
+				
+			if (stickyError)
+			{
+				if (stickyError == errMissingParameter) // missedParamErr == -1715
+					;
+					/* (descriptorKeyIDArray != NULL)
+					   missing parameter somewhere.  Walk IDarray to find which one. */
+				else
+					gResult = stickyError;
+			}
+		}
+		
+		returnValue = PlayDialog();
+		// return TRUE if want to show our Dialog
+	}
+	
+	return returnValue;
+}
+
+		
+
+//static OSType AlphaToKey(DDS_Alpha alpha)
+//{
+//	return	(alpha == DDS_ALPHA_NONE)			? alphaChannelNone :
+//			(alpha == DDS_ALPHA_TRANSPARENCY)	? alphaChannelTransparency :
+//			(alpha == DDS_ALPHA_CHANNEL)		? alphaChannelChannel :
+//			alphaChannelTransparency;
+//}
+//
+//static OSType FilterToKey(DDS_Filter filter)
+//{
+//	return	(filter == DDS_FILTER_BOX		? filterBox :
+//			filter == DDS_FILTER_TENT		? filterTent :
+//			filter == DDS_FILTER_LANCZOS4	? filterLanczos4 :
+//			filter == DDS_FILTER_MITCHELL	? filterMitchell :
+//			filter == DDS_FILTER_KAISER		? filterKaiser :
+//			filterMitchell);
+//}
+
+OSErr WriteScriptParamsOnWrite(GlobalsPtr globals)
+{
+	PIWriteDescriptor			token = nil;
+	OSErr						gotErr = noErr;
+			
+	if (DescriptorAvailable(NULL))
+	{
+		token = OpenWriter();
+		if (token)
+		{
+			// write keys here
+			PIPutEnum(token, keyDDSformat, typeDDSformat, FormatToSignature(gOptions.format));
+
+			//PIPutEnum(token, keyDDSalpha, typeAlphaChannel, AlphaToKey(gOptions.alpha));
+
+//			if(gOptions.alpha != DDS_ALPHA_NONE)
+//				PIPutBool(token, keyDDSpremult, gOptions.premultiply);
+			
+//			PIPutBool(token, keyDDSmipmap, gOptions.mipmap);
+//
+//			if(gOptions.mipmap)
+//				PIPutEnum(token, keyDDSfilter, typeFilter, FilterToKey(gOptions.filter));
+//
+//			PIPutBool(token, keyDDScubemap, gOptions.cubemap);
+				
+			gotErr = CloseWriter(&token); /* closes and sets dialog optional */
+			/* done.  Now pass handle on to Photoshop */
+		}
+	}
+	return gotErr;
+}
+
+
diff --git a/plugin/kps/KPSTerminology.h b/plugin/kps/KPSTerminology.h
new file mode 100755
index 00000000..8e249db3
--- /dev/null
+++ b/plugin/kps/KPSTerminology.h
@@ -0,0 +1,109 @@
+
+// kram - Copyright 2020 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
+
+///////////////////////////////////////////////////////////////////////////
+//
+// Copyright (c) 2014, Brendan Bolles
+// 
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// *	   Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// *	   Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+///////////////////////////////////////////////////////////////////////////
+
+// ------------------------------------------------------------------------
+//
+// DDS Photoshop plug-in
+//
+// by Brendan Bolles <brendan@fnordware.com>
+//
+// ------------------------------------------------------------------------
+
+#ifndef DDSTerminology_H
+#define DDSTerminology_H
+
+//-------------------------------------------------------------------------------
+//	Options
+//-------------------------------------------------------------------------------
+
+//-------------------------------------------------------------------------------
+//	Definitions -- Scripting keys
+//-------------------------------------------------------------------------------
+
+#define keyDDSformat			'kkfm'
+//#define keyDDSalpha			'DDSa'
+//#define keyDDSpremult			'DDSp'
+//#define keyDDSmipmap			'DDSm'
+//#define keyDDSfilter			'DDSq'
+//#define keyDDScubemap			'DDSc'
+
+#define typeDDSformat			'ktfm'
+
+#define formatBC1				'BC1 '
+#define formatBC3				'BC3 '
+#define formatBC4				'BC4 '
+#define formatBC5				'BC5 '
+#define formatBC7               'BC7 '
+
+// signed and srgb variants
+#define formatBC1S              'BC1S'
+#define formatBC3S              'BC3S'
+#define formatBC4S              'BC4S'
+#define formatBC5S              'BC5S'
+#define formatBC7S              'BC7S'
+
+// explicit
+#define formatR8                'U8r '
+#define formatRG8               'U8rg'
+#define formatRGBA8		        'U84 '
+#define formatRGBA8S            'U84S'
+
+#define formatR16F              'H4r '
+#define formatRG16F             'H4rg'
+#define formatRGBA16F           'H4  '
+
+#define formatR32F              'F4r '
+#define formatRG32F             'F4rg'
+#define formatRGBA32F           'F4  '
+
+// TODO: signed RGBA8?
+// TODO: ASTC
+// TODO: ETC
+
+//#define typeAlphaChannel		'alfT'
+//
+//#define alphaChannelNone		'Nalf'
+//#define alphaChannelTransparency 'Talf'
+//#define alphaChannelChannel		'Calf'
+//
+//#define typeFilter				'filT'
+//
+//#define filterBox				'Bfil'
+//#define filterTent				'Tfil'
+//#define filterLanczos4			'Lfil'
+//#define filterMitchell			'Mfil'
+//#define filterKaiser			'Kfil'
+
+#endif
diff --git a/plugin/kps/KPSUI.h b/plugin/kps/KPSUI.h
new file mode 100644
index 00000000..60238b81
--- /dev/null
+++ b/plugin/kps/KPSUI.h
@@ -0,0 +1,167 @@
+
+// kram - Copyright 2020 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
+
+///////////////////////////////////////////////////////////////////////////
+//
+// Copyright (c) 2014, Brendan Bolles
+// 
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// *	   Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// *	   Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+///////////////////////////////////////////////////////////////////////////
+
+// ------------------------------------------------------------------------
+//
+// DDS Photoshop plug-in
+//
+// by Brendan Bolles <brendan@fnordware.com>
+//
+// ------------------------------------------------------------------------
+
+#ifndef DDSUI_H
+#define DDSUI_H
+
+typedef enum DialogFormat {
+    // bc
+	DIALOG_FMT_BC1,
+	DIALOG_FMT_BC3,
+	DIALOG_FMT_BC4,
+	DIALOG_FMT_BC5,
+	DIALOG_FMT_BC7,
+    
+    DIALOG_FMT_BC1S,
+    DIALOG_FMT_BC3S,
+    DIALOG_FMT_BC4S,
+    DIALOG_FMT_BC5S,
+    DIALOG_FMT_BC7S,
+    
+    // TODO: firt decide if lossy formats should be a part of plugin
+    //  this encourages opening/saving and each time loss occurs
+    // TODO: ETC2
+    // TODO: ASTC
+    
+    // TODO: consider 4.12 Valve type HDR, or 101010A2 or 111110
+    
+    // lossless formats good for source
+    // explicit
+    DIALOG_FMT_R8,
+    DIALOG_FMT_RG8,
+    DIALOG_FMT_RGBA8,
+    DIALOG_FMT_RGBA8S,
+    
+    DIALOG_FMT_R16F,
+    DIALOG_FMT_RG16F,
+    DIALOG_FMT_RGBA16F,
+    
+    DIALOG_FMT_R32F,
+    DIALOG_FMT_RG32F,
+    DIALOG_FMT_RGBA32F,
+} DialogFormat;
+
+
+#if 1 // Not using any of these
+
+typedef enum {
+	DIALOG_ALPHA_NONE,
+	DIALOG_ALPHA_TRANSPARENCY,
+	DIALOG_ALPHA_CHANNEL
+} DialogAlpha;
+
+typedef enum {
+	DIALOG_FILTER_BOX,
+	DIALOG_FILTER_TENT,
+	DIALOG_FILTER_LANCZOS4,
+	DIALOG_FILTER_MITCHELL,
+	DIALOG_FILTER_KAISER
+} Dialog_Filter;
+
+typedef struct {
+	DialogAlpha		alpha;
+} DDS_InUI_Data;
+
+typedef struct {
+	DialogFormat		format;
+	DialogAlpha			alpha;
+	bool				premultiply;
+	bool				mipmap;
+	Dialog_Filter		filter;
+	bool				cubemap;
+} DDS_OutUI_Data;
+
+#else
+ 
+// no real input, just want to drop ktx/2 onto PS and go
+
+struct DDS_OutUI_Data {
+    DialogFormat        format;
+};
+#endif
+
+    
+// DDS UI
+//
+// return true if user hit OK
+// if user hit OK, params block will have been modified
+//
+// plugHndl is bundle identifier string on Mac, hInstance on win
+// mwnd is the main window for Windows
+//
+bool
+DDS_InUI(
+	DDS_InUI_Data		*params,
+	bool				has_alpha,
+	const void			*plugHndl,
+	const void			*mwnd);
+
+bool
+DDS_OutUI(
+	DDS_OutUI_Data		*params,
+	bool				have_transparency,
+	const char			*alpha_name,
+	bool				ae_ui,
+	const void			*plugHndl,
+	const void			*mwnd);
+
+void
+DDS_About(
+	const char		*plugin_version_string,
+	const void		*plugHndl,
+	const void		*mwnd);
+	
+
+// Mac prefs keys
+#define DDS_PREFS_ID		"com.ba.kram-ps"
+#define DDS_PREFS_ALPHA		"Alpha Mode"
+#define DDS_PREFS_AUTO		"Auto"
+
+
+// Windows registry keys
+#define DDS_PREFIX		"Software\\ba\\kram-ps"
+#define DDS_ALPHA_KEY	"Alpha"
+#define DDS_AUTO_KEY	"Auto"
+
+
+#endif
diff --git a/plugin/kps/KPSVersion.h b/plugin/kps/KPSVersion.h
new file mode 100755
index 00000000..1c20922e
--- /dev/null
+++ b/plugin/kps/KPSVersion.h
@@ -0,0 +1,58 @@
+
+// kram - Copyright 2020 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
+
+///////////////////////////////////////////////////////////////////////////
+//
+// Copyright (c) 2014, Brendan Bolles
+// 
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// *	   Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// *	   Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+///////////////////////////////////////////////////////////////////////////
+
+// ------------------------------------------------------------------------
+//
+// DDS Photoshop plug-in
+//
+// by Brendan Bolles <brendan@fnordware.com>
+//
+// ------------------------------------------------------------------------
+
+#ifndef DDSVerion_H
+#define DDSVerion_H
+
+#define DDS_Major_Version 0
+#define DDS_Minor_Version 6
+#define DDS_Version_String "0.9"
+#define DDS_Build_Date __DATE__
+#define DDS_Build_Date_Manual "February 28 2021"
+#define DDS_Build_Complete_Manual "v0.9 - " DDS_Build_Date
+#define DDS_Copyright_Year "2021"
+#define DDS_Build_Year "2021"
+
+#define DDS_Description "A import/output plugin for using ktx files from kram in Adobe Photoshop."
+
+#endif
diff --git a/plugin/kps/kram-ps.rsrc b/plugin/kps/kram-ps.rsrc
new file mode 100644
index 0000000000000000000000000000000000000000..d0022fea60dcc4a2da14dcdc552c6b91e4d513c5
GIT binary patch
literal 1399
zcmd^<-EPw`6vt1R7Dfkj5;th#!V(i~Xj->6U5f-0?Ub--R77j}xB{h#(^^fODsiU{
zak0lhh!@~Bcnl=og>#&&Z14zJKJoegY#%$vP5=x5DE|U5(cmlEe^!hy#S7Ouz2p8=
z$e_?=)DAJ}#k^pZ@-+2g6vWSXnq7{iG!l!&zE!Z*svFJ6LiRl&b>AdYG?mJIdQd1i
zkD`MM@|cdSV<f@ldZjfSM=V^3gyvu=qZ!1w97UP{YzxUY-NDgo_v3Nwjd>qKY_rps
zy`^)QL`h0&D~}fs4~w}gJ14{U6p!pQ7@l9{3`@@KMZwEcwb1Nc`Y#JZW&b@cj8#}L
z<1;!GFJAbQg6UkKUn%sJ#`sCM6N{nqzN+&SCTZ*`$bu6KvrYr@UY*Z=mx*|mq^h3G
z)oiZVOrHV6No<}8IcB)~I$DE;BH|ef(<Dzl!xVOdWW<>}NmP<elGzTFo@{Jv)Mu$U
z^AxXl?V(*imJ^-|<w>7Ee|AM2fdAs*2PanS-pot6RLqaO3>VPbU4t7=zVGQYjJcf0
zcWmk7aV-1u_nfLQhP$TSvEFnnR>2eOcFL#*fO=5Xnq}#D1g3pt9a)UsB(}Hjn6_g%
zm%7<-%--Q1Rz@va6D#>mGz%;F7Fr7{`7N|IR`T0roHGD=tWXHO9<nB}Cb8y{;m$A+
zF>A?M#9G8!OV%dVCf3dwzVCX)y}-uY-FaF=V?Vw(b{l*7hXBz1)4M-9XWfYVtS_R7
zr))F8BRJzhetN|b<Cq@iJ@6#!B{(eG!kdBNum9YtNZ<j4P;!O)3jKADEBf3X>Cf4K
z#=!*uLqG-nDg)!276z;{Fh6O(1dq|zzm8k_R#LQ@_5$H6kw`B+zpeFK{7s;~CVm6I
C90||>

literal 0
HcmV?d00001

diff --git a/plugin/kps/mac/Info.plist b/plugin/kps/mac/Info.plist
new file mode 100644
index 00000000..311d1a8c
--- /dev/null
+++ b/plugin/kps/mac/Info.plist
@@ -0,0 +1,22 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>CFBundleDevelopmentRegion</key>
+	<string>English</string>
+	<key>CFBundleExecutable</key>
+	<string>$(PRODUCT_NAME)</string>
+	<key>CFBundleGetInfoString</key>
+	<string>22.0 © 2020 Adobe. All rights reserved.</string>
+	<key>NSHumanReadableCopyright</key>
+	<string>© 2020 Adobe. All rights reserved.</string>
+	<key>CFBundleShortVersionString</key>
+	<string>22.0.0</string>
+	<key>CFBundleName</key>
+	<string>$(PRODUCT_NAME)</string>
+	<key>CFBundlePackageType</key>
+	<string>$(PLUGIN_TYPE)</string>
+	<key>CFBundleSignature</key>
+	<string>8BIM</string>
+</dict>
+</plist>
diff --git a/plugin/kps/mac/KPSAbout.xib b/plugin/kps/mac/KPSAbout.xib
new file mode 100644
index 00000000..b381b10f
--- /dev/null
+++ b/plugin/kps/mac/KPSAbout.xib
@@ -0,0 +1,72 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<document type="com.apple.InterfaceBuilder3.Cocoa.XIB" version="3.0" toolsVersion="17701" targetRuntime="MacOSX.Cocoa" propertyAccessControl="none" useAutolayout="YES">
+    <dependencies>
+        <deployment identifier="macosx"/>
+        <plugIn identifier="com.apple.InterfaceBuilder.CocoaPlugin" version="17701"/>
+        <capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
+    </dependencies>
+    <objects>
+        <customObject id="-2" userLabel="File's Owner" customClass="KPSAboutController">
+            <connections>
+                <outlet property="theWindow" destination="1" id="17"/>
+                <outlet property="versionString" destination="15" id="18"/>
+            </connections>
+        </customObject>
+        <customObject id="-1" userLabel="First Responder" customClass="FirstResponder"/>
+        <customObject id="-3" userLabel="Application" customClass="NSObject"/>
+        <window title="About Kram" allowsToolTipsWhenApplicationIsInactive="NO" autorecalculatesKeyViewLoop="NO" visibleAtLaunch="NO" animationBehavior="default" id="1">
+            <windowStyleMask key="styleMask" titled="YES"/>
+            <windowPositionMask key="initialPositionMask" leftStrut="YES" rightStrut="YES" topStrut="YES" bottomStrut="YES"/>
+            <rect key="contentRect" x="196" y="295" width="364" height="167"/>
+            <rect key="screenRect" x="0.0" y="0.0" width="1536" height="935"/>
+            <view key="contentView" id="2">
+                <rect key="frame" x="0.0" y="0.0" width="364" height="167"/>
+                <autoresizingMask key="autoresizingMask"/>
+                <subviews>
+                    <button verticalHuggingPriority="750" fixedFrame="YES" imageHugsTitle="YES" translatesAutoresizingMaskIntoConstraints="NO" id="5">
+                        <rect key="frame" x="133" y="12" width="96" height="32"/>
+                        <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMinY="YES"/>
+                        <buttonCell key="cell" type="push" title="OK" bezelStyle="rounded" alignment="center" borderStyle="border" imageScaling="proportionallyDown" inset="2" id="6">
+                            <behavior key="behavior" pushIn="YES" lightByBackground="YES" lightByGray="YES"/>
+                            <font key="font" metaFont="system"/>
+                            <string key="keyEquivalent" base64-UTF8="YES">
+DQ
+</string>
+                        </buttonCell>
+                        <connections>
+                            <action selector="clickedOK:" target="-2" id="19"/>
+                        </connections>
+                    </button>
+                    <textField verticalHuggingPriority="750" fixedFrame="YES" translatesAutoresizingMaskIntoConstraints="NO" id="11">
+                        <rect key="frame" x="117" y="128" width="128" height="17"/>
+                        <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMinY="YES"/>
+                        <textFieldCell key="cell" scrollable="YES" lineBreakMode="clipping" sendsActionOnEndEditing="YES" alignment="center" title="Kram" id="12">
+                            <font key="font" metaFont="system"/>
+                            <color key="textColor" name="controlTextColor" catalog="System" colorSpace="catalog"/>
+                            <color key="backgroundColor" name="controlColor" catalog="System" colorSpace="catalog"/>
+                        </textFieldCell>
+                    </textField>
+                    <textField verticalHuggingPriority="750" fixedFrame="YES" translatesAutoresizingMaskIntoConstraints="NO" id="13">
+                        <rect key="frame" x="117" y="109" width="128" height="17"/>
+                        <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMinY="YES"/>
+                        <textFieldCell key="cell" scrollable="YES" lineBreakMode="clipping" sendsActionOnEndEditing="YES" alignment="center" title="by Alec Miller" id="14">
+                            <font key="font" metaFont="system"/>
+                            <color key="textColor" name="controlTextColor" catalog="System" colorSpace="catalog"/>
+                            <color key="backgroundColor" name="controlColor" catalog="System" colorSpace="catalog"/>
+                        </textFieldCell>
+                    </textField>
+                    <textField verticalHuggingPriority="750" fixedFrame="YES" translatesAutoresizingMaskIntoConstraints="NO" id="15">
+                        <rect key="frame" x="90" y="76" width="182" height="14"/>
+                        <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMinY="YES"/>
+                        <textFieldCell key="cell" controlSize="small" scrollable="YES" lineBreakMode="clipping" sendsActionOnEndEditing="YES" alignment="center" title="v0.9" id="16">
+                            <font key="font" metaFont="smallSystem"/>
+                            <color key="textColor" name="controlTextColor" catalog="System" colorSpace="catalog"/>
+                            <color key="backgroundColor" name="controlColor" catalog="System" colorSpace="catalog"/>
+                        </textFieldCell>
+                    </textField>
+                </subviews>
+            </view>
+            <point key="canvasLocation" x="-27" y="118"/>
+        </window>
+    </objects>
+</document>
diff --git a/plugin/kps/mac/KPSAboutController.h b/plugin/kps/mac/KPSAboutController.h
new file mode 100644
index 00000000..dfdc4374
--- /dev/null
+++ b/plugin/kps/mac/KPSAboutController.h
@@ -0,0 +1,58 @@
+
+// kram - Copyright 2020 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
+
+///////////////////////////////////////////////////////////////////////////
+//
+// Copyright (c) 2014, Brendan Bolles
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// *	   Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// *	   Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+///////////////////////////////////////////////////////////////////////////
+
+// ------------------------------------------------------------------------
+//
+// DDS Photoshop plug-in
+//
+// by Brendan Bolles <brendan@fnordware.com>
+//
+// ------------------------------------------------------------------------
+
+
+#import <Cocoa/Cocoa.h>
+
+@interface KPSAboutController : NSObject {
+	IBOutlet NSWindow *theWindow;
+	IBOutlet NSTextField *versionString;
+}
+
+- (id)init:(const char *)version_string;
+
+- (IBAction)clickedOK:(id)sender;
+
+- (NSWindow *)getWindow;
+
+@end
diff --git a/plugin/kps/mac/KPSAboutController.mm b/plugin/kps/mac/KPSAboutController.mm
new file mode 100644
index 00000000..878dad6d
--- /dev/null
+++ b/plugin/kps/mac/KPSAboutController.mm
@@ -0,0 +1,71 @@
+
+// kram - Copyright 2020 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
+
+///////////////////////////////////////////////////////////////////////////
+//
+// Copyright (c) 2014, Brendan Bolles
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// *	   Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// *	   Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+///////////////////////////////////////////////////////////////////////////
+
+// ------------------------------------------------------------------------
+//
+// DDS Photoshop plug-in
+//
+// by Brendan Bolles <brendan@fnordware.com>
+//
+// ------------------------------------------------------------------------
+
+
+#import "KPSAboutController.h"
+
+@implementation KPSAboutController
+
+- (id)init:(const char *)version_string
+{
+	self = [super init];
+	
+	if(!([[NSBundle mainBundle] loadNibNamed:@"DDSAbout" owner:self topLevelObjects:nil]))
+		return nil;
+
+	[versionString setStringValue:[NSString stringWithUTF8String:version_string]];
+	
+	[theWindow center];
+	
+	return self;
+}
+
+- (IBAction)clickedOK:(id)sender {
+    [NSApp stopModal];
+}
+
+- (NSWindow *)getWindow {
+	return theWindow;
+}
+
+@end
diff --git a/plugin/kps/mac/KPSInput.xib b/plugin/kps/mac/KPSInput.xib
new file mode 100644
index 00000000..825a5183
--- /dev/null
+++ b/plugin/kps/mac/KPSInput.xib
@@ -0,0 +1,112 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<document type="com.apple.InterfaceBuilder3.Cocoa.XIB" version="3.0" toolsVersion="17701" targetRuntime="MacOSX.Cocoa" propertyAccessControl="none" useAutolayout="YES">
+    <dependencies>
+        <deployment identifier="macosx"/>
+        <plugIn identifier="com.apple.InterfaceBuilder.CocoaPlugin" version="17701"/>
+        <capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
+    </dependencies>
+    <objects>
+        <customObject id="-2" userLabel="File's Owner" customClass="KPSInputController">
+            <connections>
+                <outlet property="alphaMatrix" destination="35" id="40"/>
+                <outlet property="alwaysCheckbox" destination="11" id="20"/>
+                <outlet property="theWindow" destination="1" id="22"/>
+            </connections>
+        </customObject>
+        <customObject id="-1" userLabel="First Responder" customClass="FirstResponder"/>
+        <customObject id="-3" userLabel="Application" customClass="NSObject"/>
+        <window title="Kram Input Options" allowsToolTipsWhenApplicationIsInactive="NO" autorecalculatesKeyViewLoop="NO" visibleAtLaunch="NO" animationBehavior="default" id="1">
+            <windowStyleMask key="styleMask" titled="YES"/>
+            <windowPositionMask key="initialPositionMask" leftStrut="YES" rightStrut="YES" topStrut="YES" bottomStrut="YES"/>
+            <rect key="contentRect" x="196" y="253" width="364" height="257"/>
+            <rect key="screenRect" x="0.0" y="0.0" width="1536" height="935"/>
+            <view key="contentView" id="2">
+                <rect key="frame" x="0.0" y="0.0" width="364" height="257"/>
+                <autoresizingMask key="autoresizingMask"/>
+                <subviews>
+                    <button fixedFrame="YES" imageHugsTitle="YES" translatesAutoresizingMaskIntoConstraints="NO" id="11">
+                        <rect key="frame" x="75" y="67" width="248" height="18"/>
+                        <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMinY="YES"/>
+                        <buttonCell key="cell" type="check" title="Automatically bring up this dialog" bezelStyle="regularSquare" imagePosition="leading" alignment="left" inset="2" id="12">
+                            <behavior key="behavior" changeContents="YES" doesNotDimImage="YES" lightByContents="YES"/>
+                            <font key="font" metaFont="system"/>
+                        </buttonCell>
+                    </button>
+                    <button verticalHuggingPriority="750" fixedFrame="YES" imageHugsTitle="YES" translatesAutoresizingMaskIntoConstraints="NO" id="13">
+                        <rect key="frame" x="254" y="12" width="96" height="32"/>
+                        <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMinY="YES"/>
+                        <buttonCell key="cell" type="push" title="OK" bezelStyle="rounded" alignment="center" borderStyle="border" imageScaling="proportionallyDown" inset="2" id="14">
+                            <behavior key="behavior" pushIn="YES" lightByBackground="YES" lightByGray="YES"/>
+                            <font key="font" metaFont="system"/>
+                            <string key="keyEquivalent" base64-UTF8="YES">
+DQ
+</string>
+                        </buttonCell>
+                        <connections>
+                            <action selector="clickedOK:" target="-2" id="24"/>
+                        </connections>
+                    </button>
+                    <button verticalHuggingPriority="750" fixedFrame="YES" imageHugsTitle="YES" translatesAutoresizingMaskIntoConstraints="NO" id="15">
+                        <rect key="frame" x="158" y="12" width="96" height="32"/>
+                        <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMinY="YES"/>
+                        <buttonCell key="cell" type="push" title="Cancel" bezelStyle="rounded" alignment="center" borderStyle="border" imageScaling="proportionallyDown" inset="2" id="16">
+                            <behavior key="behavior" pushIn="YES" lightByBackground="YES" lightByGray="YES"/>
+                            <font key="font" metaFont="system"/>
+                            <string key="keyEquivalent" base64-UTF8="YES">
+Gw
+</string>
+                        </buttonCell>
+                        <connections>
+                            <action selector="clickedCancel:" target="-2" id="23"/>
+                        </connections>
+                    </button>
+                    <button verticalHuggingPriority="750" fixedFrame="YES" imageHugsTitle="YES" translatesAutoresizingMaskIntoConstraints="NO" id="17">
+                        <rect key="frame" x="13" y="12" width="116" height="32"/>
+                        <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMinY="YES"/>
+                        <buttonCell key="cell" type="push" title="Set Defaults" bezelStyle="rounded" alignment="center" borderStyle="border" imageScaling="proportionallyDown" inset="2" id="18">
+                            <behavior key="behavior" pushIn="YES" lightByBackground="YES" lightByGray="YES"/>
+                            <font key="font" metaFont="system"/>
+                        </buttonCell>
+                        <connections>
+                            <action selector="clickedSetDefaults:" target="-2" id="25"/>
+                        </connections>
+                    </button>
+                    <box autoresizesSubviews="NO" fixedFrame="YES" borderType="line" title="Alpha Handling" translatesAutoresizingMaskIntoConstraints="NO" id="39">
+                        <rect key="frame" x="99" y="127" width="167" height="108"/>
+                        <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMinY="YES"/>
+                        <view key="contentView" id="AjQ-4J-Dgf">
+                            <rect key="frame" x="3" y="3" width="161" height="90"/>
+                            <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
+                            <subviews>
+                                <matrix verticalHuggingPriority="750" fixedFrame="YES" allowsEmptySelection="NO" translatesAutoresizingMaskIntoConstraints="NO" id="35">
+                                    <rect key="frame" x="27" y="24" width="130" height="49"/>
+                                    <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMinY="YES"/>
+                                    <color key="backgroundColor" name="controlColor" catalog="System" colorSpace="catalog"/>
+                                    <size key="cellSize" width="130" height="22"/>
+                                    <size key="intercellSpacing" width="4" height="5"/>
+                                    <buttonCell key="prototype" type="radio" title="Radio" imagePosition="left" alignment="left" inset="2" id="36">
+                                        <behavior key="behavior" changeContents="YES" doesNotDimImage="YES" lightByContents="YES"/>
+                                        <font key="font" metaFont="system"/>
+                                    </buttonCell>
+                                    <cells>
+                                        <column>
+                                            <buttonCell type="radio" title="Transparency" imagePosition="left" alignment="left" state="on" tag="1" inset="2" id="37">
+                                                <behavior key="behavior" changeContents="YES" doesNotDimImage="YES" lightByContents="YES"/>
+                                                <font key="font" metaFont="system"/>
+                                            </buttonCell>
+                                            <buttonCell type="radio" title="Channels" imagePosition="left" alignment="left" inset="2" id="38">
+                                                <behavior key="behavior" changeContents="YES" doesNotDimImage="YES" lightByContents="YES"/>
+                                                <font key="font" metaFont="system"/>
+                                            </buttonCell>
+                                        </column>
+                                    </cells>
+                                </matrix>
+                            </subviews>
+                        </view>
+                    </box>
+                </subviews>
+            </view>
+            <point key="canvasLocation" x="-27" y="118"/>
+        </window>
+    </objects>
+</document>
diff --git a/plugin/kps/mac/KPSInputController.h b/plugin/kps/mac/KPSInputController.h
new file mode 100644
index 00000000..681f20ae
--- /dev/null
+++ b/plugin/kps/mac/KPSInputController.h
@@ -0,0 +1,66 @@
+
+// kram - Copyright 2020 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
+
+///////////////////////////////////////////////////////////////////////////
+//
+// Copyright (c) 2014, Brendan Bolles
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// *	   Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// *	   Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+///////////////////////////////////////////////////////////////////////////
+
+// ------------------------------------------------------------------------
+//
+// DDS Photoshop plug-in
+//
+// by Brendan Bolles <brendan@fnordware.com>
+//
+// ------------------------------------------------------------------------
+
+
+#import <Cocoa/Cocoa.h>
+
+#include "KPSUI.h"
+
+
+@interface KPSInputController : NSObject {
+	IBOutlet NSWindow *theWindow;
+	IBOutlet NSMatrix *alphaMatrix;
+	IBOutlet NSButton *autoCheckbox;
+}
+- (id)init:(DialogAlpha)the_alpha
+	autoDialog:(BOOL)autoDialog;
+
+- (IBAction)clickedOK:(id)sender;
+- (IBAction)clickedCancel:(id)sender;
+- (IBAction)clickedSetDefaults:(id)sender;
+
+- (NSWindow *)getWindow;
+
+- (DialogAlpha)getAlpha;
+- (BOOL)getAuto;
+@end
diff --git a/plugin/kps/mac/KPSInputController.mm b/plugin/kps/mac/KPSInputController.mm
new file mode 100644
index 00000000..872c1355
--- /dev/null
+++ b/plugin/kps/mac/KPSInputController.mm
@@ -0,0 +1,104 @@
+
+// kram - Copyright 2020 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
+
+///////////////////////////////////////////////////////////////////////////
+//
+// Copyright (c) 2014, Brendan Bolles
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// *	   Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// *	   Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+///////////////////////////////////////////////////////////////////////////
+
+// ------------------------------------------------------------------------
+//
+// DDS Photoshop plug-in
+//
+// by Brendan Bolles <brendan@fnordware.com>
+//
+// ------------------------------------------------------------------------
+
+
+#import "KPSInputController.h"
+
+@implementation KPSInputController
+
+- (id)init:(DialogAlpha)the_alpha autoDialog:(BOOL)autoDialog
+{
+	self = [super init];
+	
+    if(!([[NSBundle mainBundle] loadNibNamed:@"KPSInput" owner:self topLevelObjects:nil]))
+		return nil;
+	
+	[alphaMatrix selectCellAtRow:(NSInteger)(the_alpha - 1) column:0];
+	
+    [autoCheckbox setState:(autoDialog ? NSControlStateValueOn : NSControlStateValueOff)];
+	
+	[theWindow center];
+	
+	return self;
+}
+
+- (IBAction)clickedOK:(id)sender {
+	[NSApp stopModal];
+}
+
+- (IBAction)clickedCancel:(id)sender {
+    [NSApp abortModal];
+}
+
+- (IBAction)clickedSetDefaults:(id)sender {
+	char alphaMode_char = [self getAlpha];
+	CFNumberRef alphaMode = CFNumberCreate(kCFAllocatorDefault, kCFNumberCharType, &alphaMode_char);
+    CFBooleanRef autoRef =  (([autoCheckbox state] == NSControlStateValueOn) ? kCFBooleanTrue : kCFBooleanFalse);
+	
+	CFPreferencesSetAppValue(CFSTR(DDS_PREFS_ALPHA), alphaMode, CFSTR(DDS_PREFS_ID));
+	CFPreferencesSetAppValue(CFSTR(DDS_PREFS_AUTO), autoRef, CFSTR(DDS_PREFS_ID));
+	
+	CFPreferencesAppSynchronize(CFSTR(DDS_PREFS_ID));
+	
+	CFRelease(alphaMode);
+	CFRelease(autoRef);
+}
+
+- (NSWindow *)getWindow {
+	return theWindow;
+}
+
+- (DialogAlpha)getAlpha {
+	switch([alphaMatrix selectedRow])
+	{
+		case 0:		return DIALOG_ALPHA_TRANSPARENCY;
+		case 1:		return DIALOG_ALPHA_CHANNEL;
+		default:	return DIALOG_ALPHA_CHANNEL;
+	}
+}
+
+- (BOOL)getAuto {
+    return [autoCheckbox state] == NSControlStateValueOn;
+}
+
+@end
diff --git a/plugin/kps/mac/KPSOutput.xib b/plugin/kps/mac/KPSOutput.xib
new file mode 100644
index 00000000..af59ed13
--- /dev/null
+++ b/plugin/kps/mac/KPSOutput.xib
@@ -0,0 +1,174 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<document type="com.apple.InterfaceBuilder3.Cocoa.XIB" version="3.0" toolsVersion="17701" targetRuntime="MacOSX.Cocoa" propertyAccessControl="none" useAutolayout="YES">
+    <dependencies>
+        <deployment identifier="macosx"/>
+        <plugIn identifier="com.apple.InterfaceBuilder.CocoaPlugin" version="17701"/>
+        <capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
+    </dependencies>
+    <objects>
+        <customObject id="-2" userLabel="File's Owner" customClass="KPSOutputController">
+            <connections>
+                <outlet property="alphaBox" destination="63" id="214"/>
+                <outlet property="alphaMatrix" destination="72" id="215"/>
+                <outlet property="cancel_button" destination="142" id="229"/>
+                <outlet property="cubemapCheck" destination="262" id="265"/>
+                <outlet property="filterLabel" destination="37" id="216"/>
+                <outlet property="filterPulldown" destination="36" id="217"/>
+                <outlet property="formatPulldown" destination="3" id="218"/>
+                <outlet property="mipmapCheck" destination="29" id="219"/>
+                <outlet property="ok_button" destination="137" id="230"/>
+                <outlet property="premultiplyCheck" destination="89" id="220"/>
+                <outlet property="theWindow" destination="1" id="221"/>
+            </connections>
+        </customObject>
+        <customObject id="-1" userLabel="First Responder" customClass="FirstResponder"/>
+        <customObject id="-3" userLabel="Application" customClass="NSObject"/>
+        <window title="KPS Options" allowsToolTipsWhenApplicationIsInactive="NO" autorecalculatesKeyViewLoop="NO" visibleAtLaunch="NO" animationBehavior="default" id="1">
+            <windowStyleMask key="styleMask" titled="YES"/>
+            <windowPositionMask key="initialPositionMask" leftStrut="YES" rightStrut="YES" topStrut="YES" bottomStrut="YES"/>
+            <rect key="contentRect" x="196" y="240" width="222" height="372"/>
+            <rect key="screenRect" x="0.0" y="0.0" width="1536" height="935"/>
+            <view key="contentView" id="2">
+                <rect key="frame" x="0.0" y="0.0" width="222" height="372"/>
+                <autoresizingMask key="autoresizingMask"/>
+                <subviews>
+                    <popUpButton verticalHuggingPriority="750" fixedFrame="YES" translatesAutoresizingMaskIntoConstraints="NO" id="3">
+                        <rect key="frame" x="78" y="328" width="116" height="26"/>
+                        <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMinY="YES"/>
+                        <popUpButtonCell key="cell" type="push" bezelStyle="rounded" alignment="left" lineBreakMode="truncatingTail" borderStyle="borderAndBezel" imageScaling="proportionallyDown" inset="2" id="4">
+                            <behavior key="behavior" lightByBackground="YES" lightByGray="YES"/>
+                            <font key="font" metaFont="menu"/>
+                            <menu key="menu" title="OtherViews" id="5"/>
+                        </popUpButtonCell>
+                    </popUpButton>
+                    <textField verticalHuggingPriority="750" fixedFrame="YES" translatesAutoresizingMaskIntoConstraints="NO" id="20">
+                        <rect key="frame" x="6" y="334" width="70" height="17"/>
+                        <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMinY="YES"/>
+                        <textFieldCell key="cell" scrollable="YES" lineBreakMode="clipping" sendsActionOnEndEditing="YES" alignment="right" title="Format:" id="21">
+                            <font key="font" metaFont="system"/>
+                            <color key="textColor" name="controlTextColor" catalog="System" colorSpace="catalog"/>
+                            <color key="backgroundColor" name="controlColor" catalog="System" colorSpace="catalog"/>
+                        </textFieldCell>
+                    </textField>
+                    <button fixedFrame="YES" translatesAutoresizingMaskIntoConstraints="NO" id="29">
+                        <rect key="frame" x="79" y="284" width="83" height="18"/>
+                        <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMinY="YES"/>
+                        <buttonCell key="cell" type="check" title="Mipmap" bezelStyle="regularSquare" imagePosition="left" inset="2" id="30">
+                            <behavior key="behavior" changeContents="YES" doesNotDimImage="YES" lightByContents="YES"/>
+                            <font key="font" metaFont="system"/>
+                        </buttonCell>
+                        <connections>
+                            <action selector="trackMipmap:" target="-2" id="225"/>
+                        </connections>
+                    </button>
+                    <popUpButton verticalHuggingPriority="750" fixedFrame="YES" translatesAutoresizingMaskIntoConstraints="NO" id="36">
+                        <rect key="frame" x="78" y="254" width="116" height="26"/>
+                        <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMinY="YES"/>
+                        <popUpButtonCell key="cell" type="push" bezelStyle="rounded" alignment="left" lineBreakMode="truncatingTail" borderStyle="borderAndBezel" imageScaling="proportionallyDown" inset="2" id="41">
+                            <behavior key="behavior" lightByBackground="YES" lightByGray="YES"/>
+                            <font key="font" metaFont="menu"/>
+                            <menu key="menu" title="OtherViews" id="42"/>
+                        </popUpButtonCell>
+                    </popUpButton>
+                    <textField verticalHuggingPriority="750" fixedFrame="YES" translatesAutoresizingMaskIntoConstraints="NO" id="37">
+                        <rect key="frame" x="6" y="260" width="70" height="17"/>
+                        <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMinY="YES"/>
+                        <textFieldCell key="cell" scrollable="YES" lineBreakMode="clipping" sendsActionOnEndEditing="YES" alignment="right" title="Filter:" id="39">
+                            <font key="font" metaFont="system"/>
+                            <color key="textColor" name="controlTextColor" catalog="System" colorSpace="catalog"/>
+                            <color key="backgroundColor" name="controlColor" catalog="System" colorSpace="catalog"/>
+                        </textFieldCell>
+                    </textField>
+                    <box autoresizesSubviews="NO" fixedFrame="YES" borderType="line" title="Alpha Channel" translatesAutoresizingMaskIntoConstraints="NO" id="63">
+                        <rect key="frame" x="30" y="86" width="164" height="144"/>
+                        <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMinY="YES"/>
+                        <view key="contentView" id="YqV-9r-WkS">
+                            <rect key="frame" x="3" y="3" width="158" height="126"/>
+                            <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
+                            <subviews>
+                                <matrix verticalHuggingPriority="750" fixedFrame="YES" allowsEmptySelection="NO" translatesAutoresizingMaskIntoConstraints="NO" id="72">
+                                    <rect key="frame" x="18" y="32" width="119" height="82"/>
+                                    <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMinY="YES"/>
+                                    <color key="backgroundColor" name="controlColor" catalog="System" colorSpace="catalog"/>
+                                    <size key="cellSize" width="119" height="24"/>
+                                    <size key="intercellSpacing" width="4" height="5"/>
+                                    <buttonCell key="prototype" type="radio" title="Radio" imagePosition="left" alignment="left" controlSize="small" inset="2" id="73">
+                                        <behavior key="behavior" changeContents="YES" doesNotDimImage="YES" lightByContents="YES"/>
+                                        <font key="font" metaFont="smallSystem"/>
+                                    </buttonCell>
+                                    <cells>
+                                        <column>
+                                            <buttonCell type="radio" title="None" imagePosition="left" alignment="left" controlSize="small" state="on" tag="1" inset="2" id="74">
+                                                <behavior key="behavior" changeContents="YES" doesNotDimImage="YES" lightByContents="YES"/>
+                                                <font key="font" metaFont="smallSystem"/>
+                                            </buttonCell>
+                                            <buttonCell type="radio" title="Transparency" imagePosition="left" alignment="left" controlSize="small" inset="2" id="75">
+                                                <behavior key="behavior" changeContents="YES" doesNotDimImage="YES" lightByContents="YES"/>
+                                                <font key="font" metaFont="smallSystem"/>
+                                            </buttonCell>
+                                            <buttonCell type="radio" title="Channels Palette" imagePosition="left" alignment="left" controlSize="small" inset="2" id="80">
+                                                <behavior key="behavior" changeContents="YES" doesNotDimImage="YES" lightByContents="YES"/>
+                                                <font key="font" metaFont="smallSystem"/>
+                                            </buttonCell>
+                                        </column>
+                                    </cells>
+                                    <connections>
+                                        <action selector="trackAlpha:" target="-2" id="261"/>
+                                    </connections>
+                                </matrix>
+                                <button fixedFrame="YES" translatesAutoresizingMaskIntoConstraints="NO" id="89">
+                                    <rect key="frame" x="46" y="8" width="93" height="18"/>
+                                    <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMinY="YES"/>
+                                    <buttonCell key="cell" type="check" title="Premultiply" bezelStyle="regularSquare" imagePosition="left" controlSize="small" inset="2" id="90">
+                                        <behavior key="behavior" changeContents="YES" doesNotDimImage="YES" lightByContents="YES"/>
+                                        <font key="font" metaFont="smallSystem"/>
+                                    </buttonCell>
+                                </button>
+                            </subviews>
+                        </view>
+                    </box>
+                    <button verticalHuggingPriority="750" fixedFrame="YES" translatesAutoresizingMaskIntoConstraints="NO" id="137">
+                        <rect key="frame" x="121" y="12" width="87" height="32"/>
+                        <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMinY="YES"/>
+                        <buttonCell key="cell" type="push" title="OK" bezelStyle="rounded" alignment="center" borderStyle="border" imageScaling="proportionallyDown" inset="2" id="138">
+                            <behavior key="behavior" pushIn="YES" lightByBackground="YES" lightByGray="YES"/>
+                            <font key="font" metaFont="system"/>
+                            <string key="keyEquivalent" base64-UTF8="YES">
+DQ
+</string>
+                        </buttonCell>
+                        <connections>
+                            <action selector="clickedOK:" target="-2" id="223"/>
+                        </connections>
+                    </button>
+                    <button verticalHuggingPriority="750" fixedFrame="YES" translatesAutoresizingMaskIntoConstraints="NO" id="142">
+                        <rect key="frame" x="34" y="12" width="87" height="32"/>
+                        <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMinY="YES"/>
+                        <buttonCell key="cell" type="push" title="Cancel" bezelStyle="rounded" alignment="center" borderStyle="border" imageScaling="proportionallyDown" inset="2" id="144">
+                            <behavior key="behavior" pushIn="YES" lightByBackground="YES" lightByGray="YES"/>
+                            <font key="font" metaFont="system"/>
+                            <string key="keyEquivalent" base64-UTF8="YES">
+Gw
+</string>
+                        </buttonCell>
+                        <connections>
+                            <action selector="clickedCancel:" target="-2" id="222"/>
+                        </connections>
+                    </button>
+                    <button fixedFrame="YES" translatesAutoresizingMaskIntoConstraints="NO" id="262">
+                        <rect key="frame" x="79" y="62" width="86" height="18"/>
+                        <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMinY="YES"/>
+                        <buttonCell key="cell" type="check" title="Cube Map" bezelStyle="regularSquare" imagePosition="left" inset="2" id="263">
+                            <behavior key="behavior" changeContents="YES" doesNotDimImage="YES" lightByContents="YES"/>
+                            <font key="font" metaFont="system"/>
+                        </buttonCell>
+                        <connections>
+                            <action selector="trackMipmap:" target="-2" id="264"/>
+                        </connections>
+                    </button>
+                </subviews>
+            </view>
+            <point key="canvasLocation" x="139" y="118"/>
+        </window>
+    </objects>
+</document>
diff --git a/plugin/kps/mac/KPSOutputController.h b/plugin/kps/mac/KPSOutputController.h
new file mode 100644
index 00000000..0c1f11e8
--- /dev/null
+++ b/plugin/kps/mac/KPSOutputController.h
@@ -0,0 +1,95 @@
+
+// kram - Copyright 2020 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
+
+///////////////////////////////////////////////////////////////////////////
+//
+// Copyright (c) 2014, Brendan Bolles
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// *	   Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// *	   Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+///////////////////////////////////////////////////////////////////////////
+
+// ------------------------------------------------------------------------
+//
+// DDS Photoshop plug-in
+//
+// by Brendan Bolles <brendan@fnordware.com>
+//
+// ------------------------------------------------------------------------
+
+
+#import <Cocoa/Cocoa.h>
+
+#include "KPSUI.h"
+
+typedef enum {
+	DIALOG_RESULT_CONTINUE = 0,
+	DIALOG_RESULT_OK,
+	DIALOG_RESULT_CANCEL
+} DialogResult;
+
+@interface KPSOutputController : NSObject {
+	IBOutlet NSWindow *theWindow;
+	IBOutlet NSPopUpButton *formatPulldown;
+	IBOutlet NSButton *mipmapCheck;
+	IBOutlet NSPopUpButton *filterPulldown;
+	IBOutlet NSTextField *filterLabel;
+	IBOutlet NSMatrix *alphaMatrix;
+	IBOutlet NSButton *premultiplyCheck;
+	IBOutlet NSBox *alphaBox;
+	IBOutlet NSButton *cubemapCheck;
+	IBOutlet NSButton *ok_button;
+	IBOutlet NSButton *cancel_button;
+	DialogResult theResult;
+}
+- (id)init:(DialogFormat)format
+	mipmap:(BOOL)mipmap
+	filter:(Dialog_Filter)filter
+	alpha:(DialogAlpha)alpha
+	premultiply:(BOOL)premultiply
+	cube_map:(BOOL)cube_map
+	have_transparency:(BOOL)has_transparency
+	alpha_name:(const char *)alphaName
+	ae_ui:(BOOL)ae_ui;
+
+- (IBAction)clickedOK:(id)sender;
+- (IBAction)clickedCancel:(id)sender;
+
+- (IBAction)trackMipmap:(id)sender;
+- (IBAction)trackAlpha:(id)sender;
+
+- (NSWindow *)getWindow;
+- (DialogResult)getResult;
+
+- (DialogFormat)getFormat;
+- (BOOL)getMipmap;
+- (Dialog_Filter)getFilter;
+- (DialogAlpha)getAlpha;
+- (BOOL)getPremultiply;
+- (BOOL)getCubeMap;
+
+@end
diff --git a/plugin/kps/mac/KPSOutputController.mm b/plugin/kps/mac/KPSOutputController.mm
new file mode 100644
index 00000000..76b1047b
--- /dev/null
+++ b/plugin/kps/mac/KPSOutputController.mm
@@ -0,0 +1,220 @@
+
+// kram - Copyright 2020 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
+
+///////////////////////////////////////////////////////////////////////////
+//
+// Copyright (c) 2014, Brendan Bolles
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// *	   Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// *	   Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+///////////////////////////////////////////////////////////////////////////
+
+// ------------------------------------------------------------------------
+//
+// DDS Photoshop plug-in
+//
+// by Brendan Bolles <brendan@fnordware.com>
+//
+// ------------------------------------------------------------------------
+
+
+#import "KPSOutputController.h"
+
+@implementation KPSOutputController
+
+- (id)init:(DialogFormat)format
+	mipmap:(BOOL)mipmap
+	filter:(Dialog_Filter)filter
+	alpha:(DialogAlpha)alpha
+	premultiply:(BOOL)premultiply
+	cube_map:(BOOL)cube_map
+	have_transparency:(BOOL)has_transparency
+	alpha_name:(const char *)alphaName
+	ae_ui:(BOOL)ae_ui
+{
+	self = [super init];
+	
+    if(!([[NSBundle mainBundle] loadNibNamed:@"KPSOutput" owner:self topLevelObjects:nil]))
+		return nil;
+	
+	// TODO: strings are hardcode in this arrray, these need to line up with
+    // actual types now
+	[formatPulldown addItemsWithTitles:
+	 [NSArray arrayWithObjects:
+      @"DXT1",
+      @"DXT1A",
+      @"DXT2",
+      @"DXT3",
+      @"DXT4",
+      @"DXT5",
+      @"DXT5A",
+      @"3Dc",
+      @"DXN",
+      @"Uncompressed",
+      nil]
+     ];
+	[formatPulldown selectItem:[formatPulldown itemAtIndex:format]];
+	
+	
+    [mipmapCheck setState:(mipmap ? NSControlStateValueOn : NSControlStateValueOff)];
+	
+	
+	[filterPulldown addItemsWithTitles:
+	 [NSArray arrayWithObjects:@"Box", @"Tent", @"Lanczos4", @"Mitchell", @"Kaiser", nil]];
+	[filterPulldown selectItem:[filterPulldown itemAtIndex:filter]];
+	
+	
+	if(!has_transparency)
+	{
+		[[alphaMatrix cellAtRow:1 column:0] setEnabled:FALSE];
+		
+		if(alpha == DIALOG_ALPHA_TRANSPARENCY)
+		{
+			alpha = (alphaName ? DIALOG_ALPHA_CHANNEL : DIALOG_ALPHA_NONE);
+		}
+	}
+	
+	if(alphaName)
+	{
+		[[alphaMatrix cellAtRow:2 column:0] setTitle:[NSString stringWithUTF8String:alphaName]];
+	}
+	else
+	{
+		[[alphaMatrix cellAtRow:2 column:0] setEnabled:FALSE];
+		
+		if(alpha == DIALOG_ALPHA_CHANNEL)
+		{
+			alpha = (has_transparency ? DIALOG_ALPHA_TRANSPARENCY : DIALOG_ALPHA_NONE);
+		}
+	}
+
+	[alphaMatrix selectCellAtRow:(NSInteger)alpha column:0];
+	
+	
+    [premultiplyCheck setState:(premultiply ? NSControlStateValueOn : NSControlStateValueOff)];
+	
+	
+    [cubemapCheck setState:(cube_map ? NSControlStateValueOn : NSControlStateValueOff)];
+
+
+	[self trackMipmap:self];
+	[self trackAlpha:self];
+	
+	if(ae_ui)
+	{
+		[alphaMatrix setHidden:TRUE];
+		[premultiplyCheck setHidden:TRUE];
+		[alphaBox setHidden:TRUE];
+		
+		const int shrink = 170;
+		
+		NSRect window_frame = [theWindow frame];
+		NSRect cube_map_frame = [cubemapCheck frame];
+		NSRect ok_frame = [ok_button frame];
+		NSRect cancel_frame = [cancel_button frame];
+		
+		window_frame.size.height -= shrink;
+		cube_map_frame.origin.y += shrink;
+		ok_frame.origin.y += shrink;
+		cancel_frame.origin.y += shrink;
+		
+		[cubemapCheck setFrame:cube_map_frame];
+		[ok_button setFrame:ok_frame];
+		[cancel_button setFrame:cancel_frame];
+		[theWindow setFrame:window_frame display:TRUE];
+	}
+	
+	[theWindow center];
+	
+	theResult = DIALOG_RESULT_CONTINUE;
+	
+	return self;
+}
+
+- (IBAction)clickedOK:(id)sender {
+	theResult = DIALOG_RESULT_OK;
+}
+
+- (IBAction)clickedCancel:(id)sender {
+    theResult = DIALOG_RESULT_CANCEL;
+}
+
+- (IBAction)trackMipmap:(id)sender {
+	const BOOL enabled =  [self getMipmap];
+	NSColor *label_color = (enabled ? [NSColor textColor] : [NSColor disabledControlTextColor]);
+	
+    [filterPulldown setEnabled:enabled];
+	[filterLabel setTextColor:label_color];
+	
+	//[label_color release];
+}
+
+- (IBAction)trackAlpha:(id)sender {
+	const BOOL enabled = ([self getAlpha] != DIALOG_ALPHA_NONE);
+	
+	[premultiplyCheck setEnabled:enabled];
+}
+
+- (NSWindow *)getWindow {
+	return theWindow;
+}
+
+- (DialogResult)getResult {
+	return theResult;
+}
+
+- (DialogFormat)getFormat {
+	return  (DialogFormat)[formatPulldown indexOfSelectedItem];
+}
+
+- (BOOL)getMipmap {
+    return ([mipmapCheck state] == NSControlStateValueOn);
+}
+
+- (Dialog_Filter)getFilter {
+	return (Dialog_Filter)[filterPulldown indexOfSelectedItem];
+}
+
+- (DialogAlpha)getAlpha {
+	switch([alphaMatrix selectedRow])
+	{
+		case 0:		return DIALOG_ALPHA_NONE;
+		case 1:		return DIALOG_ALPHA_TRANSPARENCY;
+		case 2:		return DIALOG_ALPHA_CHANNEL;
+		default:	return DIALOG_ALPHA_CHANNEL;
+	}
+}
+
+- (BOOL)getPremultiply {
+    return ([premultiplyCheck state] == NSControlStateValueOn);
+}
+
+- (BOOL)getCubeMap {
+    return ([cubemapCheck state] == NSControlStateValueOn);
+}
+
+@end
diff --git a/plugin/kps/mac/KPSUICocoa.mm b/plugin/kps/mac/KPSUICocoa.mm
new file mode 100644
index 00000000..a3627457
--- /dev/null
+++ b/plugin/kps/mac/KPSUICocoa.mm
@@ -0,0 +1,252 @@
+
+///////////////////////////////////////////////////////////////////////////
+//
+// Copyright (c) 2014, Brendan Bolles
+// 
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// *	   Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// *	   Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+///////////////////////////////////////////////////////////////////////////
+
+// kram - Copyright 2020 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
+
+// ------------------------------------------------------------------------
+//
+// DDS Photoshop plug-in
+//
+// by Brendan Bolles <brendan@fnordware.com>
+//
+// ------------------------------------------------------------------------
+
+#include "KPSUI.h"
+
+#import "KPSInputController.h"
+#import "KPSOutputController.h"
+#import "KPSAboutController.h"
+
+#include "KPSVersion.h"
+
+#include "PIUtilities.h"
+
+
+bool
+DDS_InUI(
+	DDS_InUI_Data		*params,
+	bool				has_alpha,
+	const void			*plugHndl,
+	const void			*mwnd)
+{
+	bool result = true;
+	
+	params->alpha = DIALOG_ALPHA_CHANNEL;
+	
+	// get the prefs
+	BOOL auto_dialog = FALSE;
+	
+	CFPropertyListRef alphaMode_val = CFPreferencesCopyAppValue(CFSTR(DDS_PREFS_ALPHA), CFSTR(DDS_PREFS_ID));
+	CFPropertyListRef auto_val = CFPreferencesCopyAppValue(CFSTR(DDS_PREFS_AUTO), CFSTR(DDS_PREFS_ID));
+
+	if(alphaMode_val)
+	{
+		char alphaMode_char;
+		
+		if( CFNumberGetValue((CFNumberRef)alphaMode_val, kCFNumberCharType, &alphaMode_char) )
+		{
+			params->alpha = (DialogAlpha)alphaMode_char;
+		}
+		
+		CFRelease(alphaMode_val);
+	}
+
+	if(auto_val)
+	{
+		auto_dialog = CFBooleanGetValue((CFBooleanRef)auto_val);
+		
+		CFRelease(auto_val);
+	}
+	
+	
+	// user can force dialog open buy holding shift or option
+	const NSUInteger flags = [[NSApp currentEvent] modifierFlags];
+    const bool shift_key = ( (flags & NSEventModifierFlagShift) || (flags & NSEventModifierFlagOption) );
+
+	if((has_alpha && auto_dialog) || shift_key)
+	{
+		// do the dialog (or maybe not (but we still load the object to get the prefs)
+		NSString *bundle_id = [NSString stringWithUTF8String:(const char *)plugHndl];
+
+		Class ui_controller_class = [[NSBundle bundleWithIdentifier:bundle_id]
+										classNamed:@"KPSInputController"];
+
+		if(ui_controller_class)
+		{
+			KPSInputController *ui_controller = [[ui_controller_class alloc] init:params->alpha
+														autoDialog:auto_dialog];
+			
+			if(ui_controller)
+			{
+				NSWindow *my_window = [ui_controller getWindow];
+				
+				if(my_window)
+				{
+					NSInteger modal_result = [NSApp runModalForWindow:my_window];
+					
+					if(modal_result == NSModalResponseStop)
+					{
+						params->alpha = [ui_controller getAlpha];
+						
+						result = true;
+					}
+					else
+						result = false;
+					
+					
+					// record the auto pref every time
+					CFBooleanRef autoRef =  ([ui_controller getAuto] ? kCFBooleanTrue : kCFBooleanFalse);
+					CFPreferencesSetAppValue(CFSTR(DDS_PREFS_AUTO), autoRef, CFSTR(DDS_PREFS_ID));
+					
+					CFPreferencesAppSynchronize(CFSTR(DDS_PREFS_ID));
+										
+					
+					[my_window close];
+				}
+
+				//[ui_controller release];
+			}
+		}
+	}
+
+
+	return result;
+}
+
+
+bool
+DDS_OutUI(
+	DDS_OutUI_Data		*params,
+	bool				have_transparency,
+	const char			*alpha_name,
+    bool				ae_ui,
+	const void			*plugHndl,
+	const void			*mwnd)
+{
+	bool result = true;
+
+	NSString *bundle_id = [NSString stringWithUTF8String:(const char *)plugHndl];
+
+	Class ui_controller_class = [[NSBundle bundleWithIdentifier:bundle_id]
+									classNamed:@"KPSOutputController"];
+
+	if(ui_controller_class)
+	{
+		KPSOutputController *ui_controller = [[ui_controller_class alloc] init: params->format
+																		  mipmap: params->mipmap
+																		  filter: params->filter
+																		   alpha: params->alpha
+																	 premultiply: params->premultiply
+																		cube_map: params->cubemap
+															   have_transparency: have_transparency
+																	  alpha_name: alpha_name
+																		   ae_ui: ae_ui ];
+
+		if(ui_controller)
+		{
+			NSWindow *my_window = [ui_controller getWindow];
+			
+			if(my_window)
+			{
+				NSInteger modal_result;
+				DialogResult dialog_result;
+				
+				NSModalSession modal_session = [NSApp beginModalSessionForWindow:my_window];
+				
+				do{
+					modal_result = [NSApp runModalSession:modal_session];
+
+					dialog_result = [ui_controller getResult];
+				}
+				while(dialog_result == DIALOG_RESULT_CONTINUE && modal_result == NSModalResponseContinue);
+				
+				[NSApp endModalSession:modal_session];
+				
+				
+				if(dialog_result == DIALOG_RESULT_OK || modal_result == NSModalResponseStop)
+				{
+					params->format			= [ui_controller getFormat];
+					params->mipmap			= [ui_controller getMipmap];
+					params->filter			= [ui_controller getFilter];
+					params->alpha			= [ui_controller getAlpha];
+					params->premultiply		= [ui_controller getPremultiply];
+					params->cubemap			= [ui_controller getCubeMap];
+					
+					result = true;
+				}
+				else
+					result = false;
+					
+				[my_window close];
+			}
+			
+			//[ui_controller release];
+		}
+	}
+	
+	
+	return result;
+}
+
+
+void
+DDS_About(
+	const char		*plugin_version_string,
+	const void		*plugHndl,
+	const void		*mwnd)
+{
+	NSString *bundle_id = [NSString stringWithUTF8String:(const char *)plugHndl];
+
+	Class about_controller_class = [[NSBundle bundleWithIdentifier:bundle_id]
+									classNamed:@"KPSAboutController"];
+	
+	if(about_controller_class)
+	{
+		KPSAboutController *about_controller = [[about_controller_class alloc] init:plugin_version_string];
+		
+		if(about_controller)
+		{
+			NSWindow *the_window = [about_controller getWindow];
+			
+			if(the_window)
+			{
+				[NSApp runModalForWindow:the_window];
+				
+				[the_window close];
+			}
+			
+			//[about_controller release];
+		}
+	}
+}
+
diff --git a/plugin/kps/win/KPSDialogs.rc b/plugin/kps/win/KPSDialogs.rc
new file mode 100644
index 00000000..2ed37107
--- /dev/null
+++ b/plugin/kps/win/KPSDialogs.rc
@@ -0,0 +1,168 @@
+// Microsoft Visual C++ generated resource script.
+//
+#include "resource.h"
+
+#define APSTUDIO_READONLY_SYMBOLS
+/////////////////////////////////////////////////////////////////////////////
+//
+// Generated from the TEXTINCLUDE 2 resource.
+//
+#include "afxres.h"
+
+/////////////////////////////////////////////////////////////////////////////
+#undef APSTUDIO_READONLY_SYMBOLS
+
+/////////////////////////////////////////////////////////////////////////////
+// English (U.S.) resources
+
+#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU)
+#ifdef _WIN32
+LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
+#pragma code_page(1252)
+#endif //_WIN32
+
+#ifdef APSTUDIO_INVOKED
+/////////////////////////////////////////////////////////////////////////////
+//
+// TEXTINCLUDE
+//
+
+1 TEXTINCLUDE 
+BEGIN
+    "resource.h\0"
+END
+
+2 TEXTINCLUDE 
+BEGIN
+    "#include ""afxres.h""\r\n"
+    "\0"
+END
+
+3 TEXTINCLUDE 
+BEGIN
+    "\r\n"
+    "\0"
+END
+
+#endif    // APSTUDIO_INVOKED
+
+
+/////////////////////////////////////////////////////////////////////////////
+//
+// Dialog
+//
+
+IN_DIALOG DIALOGEX 0, 0, 242, 127
+STYLE DS_SYSMODAL | DS_SETFONT | DS_MODALFRAME | DS_FIXEDSYS | DS_CENTER | WS_POPUP | WS_CAPTION | WS_SYSMENU
+CAPTION "DDS Input Options"
+FONT 8, "MS Shell Dlg", 400, 0, 0x1
+BEGIN
+    DEFPUSHBUTTON   "OK",IDOK,131,105,50,14
+    PUSHBUTTON      "Cancel",IDCANCEL,185,105,50,14
+    CONTROL         "Transparency",4,"Button",BS_AUTORADIOBUTTON | WS_GROUP,83,21,113,10
+    CONTROL         "Channels Palette",5,"Button",BS_AUTORADIOBUTTON,83,36,85,10
+    CONTROL         "Automatically bring up this dialog",6,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,60,76,121,10
+    PUSHBUTTON      "Set Defaults",3,9,105,61,14
+    GROUPBOX        "Alpha Channel Handling",IDC_STATIC,68,7,107,51
+END
+
+OUT_DIALOG DIALOGEX 0, 0, 151, 234
+STYLE DS_SYSMODAL | DS_SETFONT | DS_MODALFRAME | DS_FIXEDSYS | DS_CENTER | WS_POPUP | WS_CAPTION | WS_SYSMENU
+CAPTION "DDS Options"
+FONT 8, "MS Shell Dlg", 400, 0, 0x1
+BEGIN
+    DEFPUSHBUTTON   "OK",IDOK,38,214,50,14
+    PUSHBUTTON      "Cancel",IDCANCEL,95,214,50,14
+    GROUPBOX        "Alpha Channel",11,25,88,96,90
+    CONTROL         "None",7,"Button",BS_AUTORADIOBUTTON | WS_GROUP,37,102,33,10
+    CONTROL         "Transparency",8,"Button",BS_AUTORADIOBUTTON,37,122,60,10
+    CONTROL         "Channels Palette",9,"Button",BS_AUTORADIOBUTTON,37,142,82,10
+    COMBOBOX        3,56,12,68,30,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP
+    RTEXT           "Format:",IDC_STATIC,14,14,37,8
+    CONTROL         "Mipmap",4,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,56,40,40,10
+    COMBOBOX        5,56,56,68,30,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP
+    RTEXT           "Filter:",6,14,58,37,8
+    CONTROL         "Premultiply",10,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,56,160,53,10
+    CONTROL         "Cube Map",12,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,56,188,48,10
+END
+
+ABOUT_DIALOG DIALOGEX 0, 0, 242, 125
+STYLE DS_SYSMODAL | DS_SETFONT | DS_MODALFRAME | DS_FIXEDSYS | DS_CENTER | WS_POPUP | WS_CAPTION | WS_SYSMENU
+CAPTION "About WebP"
+FONT 8, "MS Shell Dlg", 400, 0, 0x1
+BEGIN
+    DEFPUSHBUTTON   "OK",IDOK,95,100,50,14
+    CTEXT           "DDS Photoshop Plug-In",IDC_STATIC,74,40,98,8
+    CTEXT           "plug-in version",4,61,60,118,8
+END
+
+
+/////////////////////////////////////////////////////////////////////////////
+//
+// Version
+//
+
+VS_VERSION_INFO VERSIONINFO
+ FILEVERSION 0,6,0,0
+ PRODUCTVERSION 0,6,0,0
+ FILEFLAGSMASK 0x17L
+#ifdef _DEBUG
+ FILEFLAGS 0x1L
+#else
+ FILEFLAGS 0x0L
+#endif
+ FILEOS 0x4L
+ FILETYPE 0x2L
+ FILESUBTYPE 0x0L
+BEGIN
+    BLOCK "StringFileInfo"
+    BEGIN
+        BLOCK "040904b0"
+        BEGIN
+            VALUE "FileDescription", "DDS"
+            VALUE "FileVersion", "0.6"
+            VALUE "InternalName", "DDS"
+            VALUE "LegalCopyright", "Copyright (C) 2014-2018"
+            VALUE "OriginalFilename", "DDS.dll"
+            VALUE "ProductName", "DDS Photoshop Plug-in"
+            VALUE "ProductVersion", "2.0"
+        END
+    END
+    BLOCK "VarFileInfo"
+    BEGIN
+        VALUE "Translation", 0x409, 1200
+    END
+END
+
+
+/////////////////////////////////////////////////////////////////////////////
+//
+// DESIGNINFO
+//
+
+#ifdef APSTUDIO_INVOKED
+GUIDELINES DESIGNINFO 
+BEGIN
+    "OUT_DIALOG", DIALOG
+    BEGIN
+        RIGHTMARGIN, 145
+        BOTTOMMARGIN, 228
+    END
+END
+#endif    // APSTUDIO_INVOKED
+
+#endif    // English (U.S.) resources
+/////////////////////////////////////////////////////////////////////////////
+
+
+
+#ifndef APSTUDIO_INVOKED
+/////////////////////////////////////////////////////////////////////////////
+//
+// Generated from the TEXTINCLUDE 3 resource.
+//
+
+
+/////////////////////////////////////////////////////////////////////////////
+#endif    // not APSTUDIO_INVOKED
+
diff --git a/plugin/kps/win/KPSInputDialog.cpp b/plugin/kps/win/KPSInputDialog.cpp
new file mode 100644
index 00000000..0200e495
--- /dev/null
+++ b/plugin/kps/win/KPSInputDialog.cpp
@@ -0,0 +1,217 @@
+
+///////////////////////////////////////////////////////////////////////////
+//
+// Copyright (c) 2014, Brendan Bolles
+// 
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// *	   Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// *	   Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+///////////////////////////////////////////////////////////////////////////
+
+// ------------------------------------------------------------------------
+//
+// DDS Photoshop plug-in
+//
+// by Brendan Bolles <brendan@fnordware.com>
+//
+// ------------------------------------------------------------------------
+
+#ifdef _WIN32
+
+#include "DDS.h"
+
+#include "DDS_UI.h"
+
+#include <Windows.h>
+
+enum {
+	IN_noUI = -1,
+	IN_OK = IDOK,
+	IN_Cancel = IDCANCEL,
+	IN_Set_Defaults_Button,
+	IN_Alpha_Radio_Transparent,
+	IN_Alpha_Radio_Channel,
+	IN_Auto_Checkbox
+};
+
+// sensible Win macros
+#define GET_ITEM(ITEM)	GetDlgItem(hwndDlg, (ITEM))
+
+#define SET_CHECK(ITEM, VAL)	SendMessage(GET_ITEM(ITEM), BM_SETCHECK, (WPARAM)(VAL), (LPARAM)0)
+#define GET_CHECK(ITEM)			SendMessage(GET_ITEM(ITEM), BM_GETCHECK, (WPARAM)0, (LPARAM)0)
+
+#define ENABLE_ITEM(ITEM, ENABLE)	EnableWindow(GetDlgItem(hwndDlg, (ITEM)), (ENABLE));
+
+
+static DialogAlpha			g_alpha = DIALOG_ALPHA_CHANNEL;
+static bool					g_autoD = false;
+
+
+static void ReadPrefs()
+{
+	// read prefs from registry
+	HKEY dds_hkey;
+	LONG reg_error = RegOpenKeyEx(HKEY_CURRENT_USER, DDS_PREFIX, 0, KEY_READ, &dds_hkey);
+
+	if(reg_error == ERROR_SUCCESS)
+	{
+		DWORD type;
+		DWORD size = sizeof(DWORD);
+
+		DWORD alpha = g_alpha,
+				autoD = g_autoD;
+
+		reg_error = RegQueryValueEx(dds_hkey, DDS_ALPHA_KEY, NULL, &type, (LPBYTE)&alpha, &size);
+
+		reg_error = RegQueryValueEx(dds_hkey, DDS_AUTO_KEY, NULL, &type, (LPBYTE)&autoD, &size);
+
+		if(reg_error == ERROR_SUCCESS && type == REG_DWORD)
+			g_autoD = autoD;
+
+		reg_error = RegCloseKey(dds_hkey);
+	}
+}
+
+static void WriteAlphaPrefs()
+{
+	HKEY dds_hkey;
+
+	LONG reg_error = RegCreateKeyEx(HKEY_CURRENT_USER, DDS_PREFIX, NULL, NULL, REG_OPTION_NON_VOLATILE, KEY_WRITE, NULL, &dds_hkey, NULL);
+
+	if(reg_error == ERROR_SUCCESS)
+	{
+		DWORD alpha = g_alpha;
+
+		reg_error = RegSetValueEx(dds_hkey, DDS_ALPHA_KEY, NULL, REG_DWORD, (BYTE *)&alpha, sizeof(DWORD));
+
+		reg_error = RegCloseKey(dds_hkey);
+	}
+}
+
+static void WriteAutoPrefs()
+{
+	HKEY dds_hkey;
+
+	LONG reg_error = RegCreateKeyEx(HKEY_CURRENT_USER, DDS_PREFIX, NULL, NULL, REG_OPTION_NON_VOLATILE, KEY_WRITE, NULL, &dds_hkey, NULL);
+
+	if(reg_error == ERROR_SUCCESS)
+	{
+		DWORD autoD = g_autoD;
+
+		reg_error = RegSetValueEx(dds_hkey, DDS_AUTO_KEY, NULL, REG_DWORD, (BYTE *)&autoD, sizeof(DWORD));
+
+		reg_error = RegCloseKey(dds_hkey);
+	}
+}
+
+
+static WORD	g_item_clicked = 0;
+
+static BOOL CALLBACK DialogProc(HWND hwndDlg, UINT message, WPARAM wParam, LPARAM lParam) 
+{ 
+    BOOL fError; 
+ 
+    switch(message) 
+    { 
+		case WM_INITDIALOG:
+			SET_CHECK( (g_alpha == DIALOG_ALPHA_TRANSPARENCY ? IN_Alpha_Radio_Transparent :
+						g_alpha == DIALOG_ALPHA_CHANNEL ? IN_Alpha_Radio_Channel :
+						IN_Alpha_Radio_Transparent), TRUE);
+
+			SET_CHECK(IN_Auto_Checkbox, g_autoD);
+
+			return TRUE;
+ 
+		case WM_NOTIFY:
+			return FALSE;
+
+        case WM_COMMAND: 
+			g_alpha = GET_CHECK(IN_Alpha_Radio_Transparent) ? DIALOG_ALPHA_TRANSPARENCY :
+							GET_CHECK(IN_Alpha_Radio_Channel) ? DIALOG_ALPHA_CHANNEL :
+							DIALOG_ALPHA_TRANSPARENCY;
+
+			g_autoD = GET_CHECK(IN_Auto_Checkbox);
+
+			g_item_clicked = LOWORD(wParam);
+
+            switch(g_item_clicked)
+            { 
+                case IN_OK: 
+				case IN_Cancel:
+					EndDialog(hwndDlg, 0);
+					return TRUE;
+
+				case IN_Set_Defaults_Button:
+					WriteAlphaPrefs();
+					WriteAutoPrefs();
+					return TRUE;
+            } 
+    } 
+    return FALSE; 
+} 
+
+
+static inline bool KeyIsDown(int vKey)
+{
+	return (GetAsyncKeyState(vKey) & 0x8000);
+}
+
+
+bool
+DDS_InUI(
+	DDS_InUI_Data		*params,
+	bool				has_alpha,
+	const void			*plugHndl,
+	const void			*mwnd)
+{
+	bool continue_reading = true;
+
+	g_alpha = DIALOG_ALPHA_CHANNEL;
+	g_autoD = false;
+
+	ReadPrefs();
+
+	// check for that shift key
+	bool shift_key = ( KeyIsDown(VK_LSHIFT) || KeyIsDown(VK_RSHIFT) || KeyIsDown(VK_LMENU) || KeyIsDown(VK_RMENU) );
+
+	if((g_autoD && has_alpha) || shift_key)
+	{
+		int status = DialogBox((HINSTANCE)plugHndl, (LPSTR)"IN_DIALOG", (HWND)mwnd, (DLGPROC)DialogProc);
+
+		if(g_item_clicked == IN_OK)
+		{
+			WriteAutoPrefs();
+
+			continue_reading = true;
+		}
+		else
+			continue_reading = false;
+	}
+
+	params->alpha	= g_alpha;
+
+	return continue_reading;
+}
+
+#endif
diff --git a/plugin/kps/win/KPSOutputDialog.cpp b/plugin/kps/win/KPSOutputDialog.cpp
new file mode 100644
index 00000000..13b04d76
--- /dev/null
+++ b/plugin/kps/win/KPSOutputDialog.cpp
@@ -0,0 +1,371 @@
+
+///////////////////////////////////////////////////////////////////////////
+//
+// Copyright (c) 2014, Brendan Bolles
+// 
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// *	   Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// *	   Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+///////////////////////////////////////////////////////////////////////////
+
+// ------------------------------------------------------------------------
+//
+// DDS Photoshop plug-in
+//
+// by Brendan Bolles <brendan@fnordware.com>
+//
+// ------------------------------------------------------------------------
+
+#if _WIN32
+
+#include "DDS.h"
+
+#include "DDS_UI.h"
+#include "DDS_version.h"
+
+#include <Windows.h>
+
+#include <assert.h>
+
+enum {
+	OUT_noUI = -1,
+	OUT_OK = IDOK,
+	OUT_Cancel = IDCANCEL,
+	OUT_Format_Menu,
+	OUT_Mipmap_Check,
+	OUT_Filter_Menu,
+	OUT_Filter_Menu_Label,
+	OUT_Alpha_Radio_None,
+	OUT_Alpha_Radio_Transparency,
+	OUT_Alpha_Radio_Channel,
+	OUT_Premultiply_Check,
+	OUT_Alpha_Frame,
+	OUT_CubeMap_Check
+};
+
+// sensible Win macros
+#define GET_ITEM(ITEM)	GetDlgItem(hwndDlg, (ITEM))
+
+#define SET_CHECK(ITEM, VAL)	SendMessage(GET_ITEM(ITEM), BM_SETCHECK, (WPARAM)(VAL), (LPARAM)0)
+#define GET_CHECK(ITEM)			SendMessage(GET_ITEM(ITEM), BM_GETCHECK, (WPARAM)0, (LPARAM)0)
+
+#define ENABLE_ITEM(ITEM, ENABLE)	EnableWindow(GetDlgItem(hwndDlg, (ITEM)), (ENABLE));
+
+#define SHOW_ITEM(ITEM, SHOW)	ShowWindow(GetDlgItem(hwndDlg, (ITEM)), (SHOW) ? SW_SHOW : SW_HIDE)
+
+
+
+static DialogFormat			g_format = DIALOG_FMT_DXT5;
+static DialogAlpha			g_alpha = DIALOG_ALPHA_NONE;
+static bool					g_premultiply = false;
+static bool					g_mipmap = false;
+static Dialog_Filter		g_filter = DIALOG_FILTER_MITCHELL;
+static bool					g_cubemap = false;
+
+static bool					g_have_transparency = false;
+static const char			*g_alpha_name = NULL;
+static bool					g_ae_ui = false;
+
+static WORD	g_item_clicked = 0;
+
+
+static void TrackMipmap(HWND hwndDlg)
+{
+	BOOL enable_state = GET_CHECK(OUT_Mipmap_Check);
+	ENABLE_ITEM(OUT_Filter_Menu, enable_state);
+	ENABLE_ITEM(OUT_Filter_Menu_Label, enable_state);
+}
+
+
+static void TrackAlpha(HWND hwndDlg)
+{
+	BOOL enable_state = !GET_CHECK(OUT_Alpha_Radio_None);
+	ENABLE_ITEM(OUT_Premultiply_Check, enable_state);
+}
+
+
+static BOOL CALLBACK DialogProc(HWND hwndDlg, UINT message, WPARAM wParam, LPARAM lParam) 
+{ 
+    BOOL fError; 
+ 
+    switch(message) 
+    { 
+		case WM_INITDIALOG:
+			do{
+				// set up the menu
+				// I prefer to do it programatically to insure that the compression types match the index
+				const char *opts[] = {	"DXT1",
+										"DXT1A",
+										"DXT2",
+										"DXT3",
+										"DXT4",
+										"DXT5",
+										"DXT5A",
+										"3Dc",
+										"DXN",
+										"Uncompressed" };
+
+				HWND menu = GetDlgItem(hwndDlg, OUT_Format_Menu);
+
+				for(int i=DIALOG_FMT_DXT1; i <= DIALOG_FMT_UNCOMPRESSED; i++)
+				{
+					SendMessage(menu, (UINT)CB_ADDSTRING, (WPARAM)wParam, (LPARAM)(LPCTSTR)opts[i] );
+					SendMessage(menu, (UINT)CB_SETITEMDATA, (WPARAM)i, (LPARAM)(DWORD)i); // this is the compresion number
+
+					if(i == g_format)
+						SendMessage(menu, CB_SETCURSEL, (WPARAM)i, (LPARAM)0);
+				}
+
+
+				const char *f_opts[] = {"Box",
+										"Tent",
+										"Lanczos4",
+										"Mitchell",
+										"Kaiser" };
+
+				HWND f_menu = GetDlgItem(hwndDlg, OUT_Filter_Menu);
+
+				for(int i=DIALOG_FILTER_BOX; i <= DIALOG_FILTER_KAISER; i++)
+				{
+					SendMessage(f_menu, (UINT)CB_ADDSTRING, (WPARAM)wParam, (LPARAM)(LPCTSTR)f_opts[i] );
+					SendMessage(f_menu, (UINT)CB_SETITEMDATA, (WPARAM)i, (LPARAM)(DWORD)i); // this is the compresion number
+
+					if(i == g_filter)
+						SendMessage(f_menu, CB_SETCURSEL, (WPARAM)i, (LPARAM)0);
+				}
+			}while(0);
+
+			SET_CHECK(OUT_Mipmap_Check, g_mipmap);
+
+			if(!g_have_transparency)
+			{
+				ENABLE_ITEM(OUT_Alpha_Radio_Transparency, FALSE);
+
+				if(g_alpha == DIALOG_ALPHA_TRANSPARENCY)
+				{
+					g_alpha = (g_alpha_name != NULL ? DIALOG_ALPHA_CHANNEL : DIALOG_ALPHA_NONE);
+				}
+			}
+
+			if(g_alpha_name == NULL)
+			{
+				ENABLE_ITEM(OUT_Alpha_Radio_Channel, FALSE);
+
+				if(g_alpha == DIALOG_ALPHA_CHANNEL)
+				{
+					g_alpha = (g_have_transparency ? DIALOG_ALPHA_TRANSPARENCY : DIALOG_ALPHA_NONE);
+				}
+			}
+			else
+			{
+				SetDlgItemText(hwndDlg, OUT_Alpha_Radio_Channel, g_alpha_name);
+			}
+
+			SET_CHECK(OUT_Premultiply_Check, g_premultiply);
+
+			SET_CHECK( (g_alpha == DIALOG_ALPHA_NONE ? OUT_Alpha_Radio_None :
+						g_alpha == DIALOG_ALPHA_TRANSPARENCY ? OUT_Alpha_Radio_Transparency :
+						g_alpha == DIALOG_ALPHA_CHANNEL ? OUT_Alpha_Radio_Channel :
+						OUT_Alpha_Radio_None), TRUE);
+			
+			SET_CHECK(OUT_CubeMap_Check, g_cubemap);
+
+			TrackAlpha(hwndDlg);
+			TrackMipmap(hwndDlg);
+
+			if(g_ae_ui)
+			{
+				for(int i = OUT_Alpha_Radio_None; i <= OUT_Alpha_Frame; i++)
+					SHOW_ITEM(i, false);
+
+				WINDOWPLACEMENT winPlace, cubemapPlace, okPlace, cancelPlace;
+				winPlace.length = cubemapPlace.length = okPlace.length = cancelPlace.length = sizeof(WINDOWPLACEMENT);
+
+				GetWindowPlacement(hwndDlg, &winPlace);
+				GetWindowPlacement(GET_ITEM(OUT_CubeMap_Check), &cubemapPlace);
+				GetWindowPlacement(GET_ITEM(OUT_OK), &okPlace);
+				GetWindowPlacement(GET_ITEM(OUT_Cancel), &cancelPlace);
+
+				const int resize = 170;
+
+				winPlace.rcNormalPosition.bottom -= resize;
+				cubemapPlace.rcNormalPosition.top -= resize;
+				cubemapPlace.rcNormalPosition.bottom -= resize;
+				okPlace.rcNormalPosition.top -= resize;
+				okPlace.rcNormalPosition.bottom -= resize;
+				cancelPlace.rcNormalPosition.top -= resize;
+				cancelPlace.rcNormalPosition.bottom -= resize;
+
+				SetWindowPlacement(GET_ITEM(OUT_CubeMap_Check), &cubemapPlace);
+				SetWindowPlacement(GET_ITEM(OUT_Cancel), &cancelPlace);
+				SetWindowPlacement(GET_ITEM(OUT_OK), &okPlace);
+				SetWindowPlacement(hwndDlg, &winPlace);
+			}
+
+			return TRUE;
+ 
+		case WM_NOTIFY:
+			return FALSE;
+
+        case WM_COMMAND: 
+			g_item_clicked = LOWORD(wParam);
+
+            switch(g_item_clicked)
+            { 
+                case OUT_OK: 
+				case OUT_Cancel:  // do the same thing, but g_item_clicked will be different
+				do{
+					HWND menu = GetDlgItem(hwndDlg, OUT_Format_Menu);
+
+					// get the channel index associated with the selected menu item
+					LRESULT cur_sel = SendMessage(menu,(UINT)CB_GETCURSEL, (WPARAM)0, (LPARAM)0);
+
+					g_format = (DialogFormat)SendMessage(menu, (UINT)CB_GETITEMDATA, (WPARAM)cur_sel, (LPARAM)0);
+
+					g_alpha =	GET_CHECK(OUT_Alpha_Radio_None) ? DIALOG_ALPHA_NONE :
+								GET_CHECK(OUT_Alpha_Radio_Transparency) ? DIALOG_ALPHA_TRANSPARENCY :
+								GET_CHECK(OUT_Alpha_Radio_Channel) ? DIALOG_ALPHA_CHANNEL :
+								DIALOG_ALPHA_TRANSPARENCY;
+
+					g_premultiply = GET_CHECK(OUT_Premultiply_Check);
+
+					g_mipmap = GET_CHECK(OUT_Mipmap_Check);
+
+					HWND f_menu = GetDlgItem(hwndDlg, OUT_Filter_Menu);
+					cur_sel = SendMessage(f_menu,(UINT)CB_GETCURSEL, (WPARAM)0, (LPARAM)0);
+					g_filter = (Dialog_Filter)SendMessage(f_menu, (UINT)CB_GETITEMDATA, (WPARAM)cur_sel, (LPARAM)0);
+
+					g_cubemap = GET_CHECK(OUT_CubeMap_Check);
+
+					EndDialog(hwndDlg, 0);
+					return TRUE;
+				}while(0);
+
+				case OUT_Alpha_Radio_None:
+				case OUT_Alpha_Radio_Transparency:
+				case OUT_Alpha_Radio_Channel:
+					TrackAlpha(hwndDlg);
+					return TRUE;
+
+
+				case OUT_Mipmap_Check:
+					TrackMipmap(hwndDlg);
+					return TRUE;
+            } 
+    } 
+    return FALSE; 
+} 
+
+bool
+DDS_OutUI(
+	DDS_OutUI_Data		*params,
+	bool				have_transparency,
+	const char			*alpha_name,
+	bool				ae_ui,
+	const void			*plugHndl,
+	const void			*mwnd)
+{
+	g_format		= params->format;
+	g_alpha			= params->alpha;
+	g_premultiply	= params->premultiply;
+	g_mipmap		= params->mipmap;
+	g_filter		= params->filter;
+	g_mipmap		= params->mipmap;
+	
+	g_have_transparency = have_transparency;
+	g_alpha_name = alpha_name;
+	g_ae_ui = ae_ui;
+
+	if(ae_ui)
+	{
+		g_alpha = DIALOG_ALPHA_TRANSPARENCY;
+		g_premultiply = false;
+		assert(g_alpha_name == NULL);
+	}
+
+	int status = DialogBox((HINSTANCE)plugHndl, (LPSTR)"OUT_DIALOG", (HWND)mwnd, (DLGPROC)DialogProc);
+
+
+	if(g_item_clicked == OUT_OK)
+	{
+		params->format			= g_format;
+		params->alpha			= g_alpha;
+		params->premultiply		= g_premultiply;
+		params->mipmap			= g_mipmap;
+		params->filter			= g_filter;
+		params->cubemap			= g_cubemap;
+
+		return true;
+	}
+	else
+		return false;
+}
+
+
+enum {
+	ABOUT_noUI = -1,
+	ABOUT_OK = IDOK,
+	ABOUT_Plugin_Version_String = 4,
+};
+
+static const char *g_plugin_version_string = NULL;
+
+static BOOL CALLBACK AboutProc(HWND hwndDlg, UINT message, WPARAM wParam, LPARAM lParam) 
+{ 
+    BOOL fError; 
+ 
+    switch(message) 
+    { 
+		case WM_INITDIALOG:
+				SetDlgItemText(hwndDlg, ABOUT_Plugin_Version_String, g_plugin_version_string);
+
+			return TRUE;
+ 
+		case WM_NOTIFY:
+			return FALSE;
+
+        case WM_COMMAND: 
+            switch(LOWORD(wParam))
+            { 
+                case OUT_OK: 
+				case OUT_Cancel:
+					EndDialog(hwndDlg, 0);
+					return TRUE;
+            } 
+    } 
+    return FALSE; 
+} 
+
+void
+DDS_About(
+	const char		*plugin_version_string,
+	const void		*plugHndl,
+	const void		*mwnd)
+{
+	g_plugin_version_string = plugin_version_string;
+
+	int status = DialogBox((HINSTANCE)plugHndl, (LPSTR)"ABOUT_DIALOG", (HWND)mwnd, (DLGPROC)AboutProc);
+}
+
+#endif
diff --git a/plugin/kps/win/resource.h b/plugin/kps/win/resource.h
new file mode 100644
index 00000000..a70236c9
--- /dev/null
+++ b/plugin/kps/win/resource.h
@@ -0,0 +1,20 @@
+//{{NO_DEPENDENCIES}}
+// Microsoft Visual C++ generated include file.
+// Used by DDS_Dialogs.rc
+//
+#define IDC_RADIO1                      1001
+#define IDC_RADIO2                      1002
+#define IDC_RADIO3                      1003
+#define IDC_RADIO4                      1004
+#define IDC_CHECK1                      1016
+
+// Next default values for new objects
+// 
+#ifdef APSTUDIO_INVOKED
+#ifndef APSTUDIO_READONLY_SYMBOLS
+#define _APS_NEXT_RESOURCE_VALUE        103
+#define _APS_NEXT_COMMAND_VALUE         40001
+#define _APS_NEXT_CONTROL_VALUE         1017
+#define _APS_NEXT_SYMED_VALUE           101
+#endif
+#endif

From b399d163c7262fac59ae70613bdfb042b746ba43 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 9 Mar 2021 22:35:10 -0800
Subject: [PATCH 016/901] kram-ps - fix resource gen from cmake and correct
 Info.plist file

Build is still not finding _main().  Will try adding one, even though other working plugin doesn't have one.
---
 plugin/CMakeLists.txt     |  6 +++---
 plugin/kps/KPS.r          | 34 +++++++++++++++++++++----------
 plugin/kps/mac/Info.plist | 42 +++++++++++++++++++--------------------
 3 files changed, 46 insertions(+), 36 deletions(-)
 mode change 100644 => 100755 plugin/kps/mac/Info.plist

diff --git a/plugin/CMakeLists.txt b/plugin/CMakeLists.txt
index de2c32bc..98eab9a5 100644
--- a/plugin/CMakeLists.txt
+++ b/plugin/CMakeLists.txt
@@ -56,7 +56,7 @@ set_target_properties(${myTargetApp} PROPERTIES
     # this drops app from 762KB to 174KB with only ATE enabled
     # note about needing -gfull instead of -gused here or debug info messed up:
     # https://gist.github.com/tkersey/39b4fe69e14b859889ffadccb009e397
-    XCODE_ATTRIBUTE_DEAD_CODE_STRIPPING YES
+    #XCODE_ATTRIBUTE_DEAD_CODE_STRIPPING YES
     XCODE_ATTRIBUTE_LLVM_LTO[variant=Release] "Incremental"
     
     #-------------------------
@@ -244,14 +244,14 @@ add_custom_command(TARGET ${myTargetApp} PRE_BUILD
     COMMAND ${rezCompiler}
     -I ${SDK_SOURCE_DIR}/resources/
     -I ${SDK_SOURCE_DIR}/photoshop/
-    -I ${SDK_COMMON_DIR}/includes/
+    # -I ${SDK_COMMON_DIR}/includes/
     
     -arch x86_64
     
     # needs this for Carbon.r and CoreServices.r in the Adobe .r headers
     #-F Carbon
     #-F CoreServices
-    -F /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks/
+    -isysroot /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/
 
     -o "${KPS_SOURCE_DIR}/${myTargetApp}.rsrc"
     ${KPS_SOURCE_DIR}/KPS.r
diff --git a/plugin/kps/KPS.r b/plugin/kps/KPS.r
index be8da61b..50dc6ca7 100755
--- a/plugin/kps/KPS.r
+++ b/plugin/kps/KPS.r
@@ -85,18 +85,30 @@
     // also CoreServices/CoreServices.r is pulled in from another Adobe header, which is also Carbon
     //#include "MacOMacrezXcode.h"
 
-#define Macintosh 1
-
-#ifndef TARGET_API_MAC_CARBON
-#define TARGET_API_MAC_CARBON 1
-#endif
+    #define Macintosh    1
+    #define MSWindows    0
+    #define Rez          1
+
+    #ifndef TARGET_MAC_OS
+        #define TARGET_MAC_OS 1
+    #endif
+
+    #ifndef DEBUG
+        #ifndef NDEBUG
+            #define DEBUG 1
+        #else
+            #define DEBUG 0
+        #endif
+    #endif
 
-#ifndef TARGET_MAC_OS
-#define TARGET_MAC_OS 1
-#endif
+    #define BUILDING_FOR_MACH        1
 
-#include <Carbon.r>
+    // can this carbon dependency be eliminated?
+    #ifndef TARGET_API_MAC_CARBON
+        #define TARGET_API_MAC_CARBON 1
+    #endif
 
+    #include <Carbon.r>
 
     #define __PIMac__            1
     #define DLLExport extern "C"
@@ -106,14 +118,14 @@
     #define Macintosh 1
 #endif
 
-#ifdef __PIMac__
+#if defined(__PIMac__)
 	#include "PIGeneral.r"
 #elif defined(__PIWin__)
 	#include "PIGeneral.h"
 #endif
 
-
 //#include "PIUtilities.r"
+
 #ifndef ResourceID
     #define ResourceID        16000
 #endif
diff --git a/plugin/kps/mac/Info.plist b/plugin/kps/mac/Info.plist
old mode 100644
new mode 100755
index 311d1a8c..0537f0ac
--- a/plugin/kps/mac/Info.plist
+++ b/plugin/kps/mac/Info.plist
@@ -1,22 +1,20 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
-<plist version="1.0">
-<dict>
-	<key>CFBundleDevelopmentRegion</key>
-	<string>English</string>
-	<key>CFBundleExecutable</key>
-	<string>$(PRODUCT_NAME)</string>
-	<key>CFBundleGetInfoString</key>
-	<string>22.0 © 2020 Adobe. All rights reserved.</string>
-	<key>NSHumanReadableCopyright</key>
-	<string>© 2020 Adobe. All rights reserved.</string>
-	<key>CFBundleShortVersionString</key>
-	<string>22.0.0</string>
-	<key>CFBundleName</key>
-	<string>$(PRODUCT_NAME)</string>
-	<key>CFBundlePackageType</key>
-	<string>$(PLUGIN_TYPE)</string>
-	<key>CFBundleSignature</key>
-	<string>8BIM</string>
-</dict>
-</plist>
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>CFBundleDevelopmentRegion</key>
+	<string>English</string>
+	<key>CFBundleExecutable</key>
+	<string>$(PRODUCT_NAME)</string>
+	<key>CFBundleGetInfoString</key>
+	<string>©2021 kram-ps</string>
+	<key>CFBundleIdentifier</key>
+	<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
+	<key>CFBundleName</key>
+	<string>$(PRODUCT_NAME)</string>
+	<key>CFBundlePackageType</key>
+	<string>$(PLUGIN_TYPE)</string>
+	<key>CFBundleSignature</key>
+	<string>8BIM</string>
+</dict>
+</plist>

From e02ec584344a3ba824d0c54d020f0450d41c62c1 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 9 Mar 2021 22:45:08 -0800
Subject: [PATCH 017/901] kram-ps - add dummy main() to KPS.cpp to get the
 plugin to link

PS doesn't recognize the plugin.  Will have to make sure all entry points are correct.  Something might ref DDS.
---
 plugin/kps/KPS.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/plugin/kps/KPS.cpp b/plugin/kps/KPS.cpp
index c45e07a0..80562844 100755
--- a/plugin/kps/KPS.cpp
+++ b/plugin/kps/KPS.cpp
@@ -1138,3 +1138,10 @@ DLLExport MACPASCAL void PluginMain(const short selector,
     
 	}
 }
+
+// Tthis is just to silence broken build.
+// Even though this is a plugin, Xcode wants _main or won't link.
+int main(int macroUnusedArg(argc), char** macroUnusedArg(argv))
+{
+    return 0;
+}

From f8f22cac7267f5828a0f648f389b1609dd7d3848 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 10 Mar 2021 08:43:06 -0800
Subject: [PATCH 018/901] kram-ps - call PluginMain to prevent dead-strip of
 code, fix rsrc gen, turn on rsrc creation

PS still doesn't see this plugin at all.  I tried with and without the working rsrc file from the legacy project.  Only the legacy project works, but that's not checked in.
---
 plugin/CMakeLists.txt | 16 +++++++++++++---
 plugin/kps/KPS.cpp    | 11 ++++++++++-
 2 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/plugin/CMakeLists.txt b/plugin/CMakeLists.txt
index 98eab9a5..c1ea92db 100644
--- a/plugin/CMakeLists.txt
+++ b/plugin/CMakeLists.txt
@@ -56,7 +56,7 @@ set_target_properties(${myTargetApp} PROPERTIES
     # this drops app from 762KB to 174KB with only ATE enabled
     # note about needing -gfull instead of -gused here or debug info messed up:
     # https://gist.github.com/tkersey/39b4fe69e14b859889ffadccb009e397
-    #XCODE_ATTRIBUTE_DEAD_CODE_STRIPPING YES
+    XCODE_ATTRIBUTE_DEAD_CODE_STRIPPING YES
     XCODE_ATTRIBUTE_LLVM_LTO[variant=Release] "Incremental"
     
     #-------------------------
@@ -203,6 +203,9 @@ target_sources(${myTargetApp} PRIVATE
 #    Base.lproj/Main.storyboard
     ${appNibSources}
     
+    # this is created in the PRE_BUILD step below
+    ${KPS_SOURCE_DIR}/${myTargetApp}.rsrc
+    
     ${KPS_SOURCE_DIR}/mac/Info.plist
     
 #    ${KPS_SOURCE_DIR}/mac/Info.plist
@@ -231,7 +234,7 @@ set_source_files_properties(
 # turned off for now, and checking in pre-built resource
 # but app still can't find _main entrpoint.
 
-if (FALSE)
+if (TRUE)
 
 execute_process(
     COMMAND xcrun -f Rez
@@ -242,17 +245,24 @@ execute_process(
 add_custom_command(TARGET ${myTargetApp} PRE_BUILD
     DEPENDS ${KPS_SOURCE_DIR}/KPS.r
     COMMAND ${rezCompiler}
+    
+    # several .r are located across the build
     -I ${SDK_SOURCE_DIR}/resources/
     -I ${SDK_SOURCE_DIR}/photoshop/
     # -I ${SDK_COMMON_DIR}/includes/
     
     -arch x86_64
     
+    # use the datafork
+    -useDF
+    
     # needs this for Carbon.r and CoreServices.r in the Adobe .r headers
     #-F Carbon
     #-F CoreServices
+    
+    # where to find framework files
     -isysroot /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/
-
+    
     -o "${KPS_SOURCE_DIR}/${myTargetApp}.rsrc"
     ${KPS_SOURCE_DIR}/KPS.r
 )
diff --git a/plugin/kps/KPS.cpp b/plugin/kps/KPS.cpp
index 80562844..5239a7b8 100755
--- a/plugin/kps/KPS.cpp
+++ b/plugin/kps/KPS.cpp
@@ -989,6 +989,12 @@ DLLExport MACPASCAL void PluginMain(const short selector,
                                      intptr_t *dataPointer,
 						             short *result)
 {
+    // using this to keep dead-strip from removing all code
+    if (selector == formatSelectorAbout && formatParamBlock == nullptr)
+    {
+        return;
+    }
+    
 	if (selector == formatSelectorAbout)
 	{
 		sSPBasic = ((AboutRecordPtr)formatParamBlock)->sSPBasic;
@@ -1139,9 +1145,12 @@ DLLExport MACPASCAL void PluginMain(const short selector,
 	}
 }
 
-// Tthis is just to silence broken build.
+// This is just to silence broken build.
 // Even though this is a plugin, Xcode wants _main or won't link.
 int main(int macroUnusedArg(argc), char** macroUnusedArg(argv))
 {
+    // call this to prevent dead-stripping
+    PluginMain(formatSelectorAbout, nullptr, nullptr, nullptr);
+    
     return 0;
 }

From 4bdcf7b65a80704867e6fd5291cbd754aabf009f Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 18 Mar 2021 10:24:19 -0700
Subject: [PATCH 019/901] kram - update Win builds settings for perf and faster
 builds, update lodepng

---
 kram/CMakeLists.txt         |    28 +-
 kramv/CMakeLists.txt        |     2 +-
 libkram/CMakeLists.txt      |    11 +-
 libkram/lodepng/LICENSE     |    46 +-
 libkram/lodepng/lodepng.cpp | 12641 +++++++++++++++++-----------------
 libkram/lodepng/lodepng.h   |  3738 +++++-----
 6 files changed, 8502 insertions(+), 7964 deletions(-)

diff --git a/kram/CMakeLists.txt b/kram/CMakeLists.txt
index 4d852cad..b2afa050 100644
--- a/kram/CMakeLists.txt
+++ b/kram/CMakeLists.txt
@@ -83,12 +83,28 @@ elseif (WIN32)
     string(REGEX REPLACE "/EHsc" "/EHs-c-" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
 
     # don't need force with apps, since they only access kram folder files which include KramConfig
-    # force include and fix STL
-    #target_compile_options(${myTargetApp} PRIVATE /FIKramConfig.h)
-    target_compile_definitions(${myTargetApp} PRIVATE "-D_D_HAS_EXCEPTIONS=0")
-
-    target_compile_options(${myTargetApp} PRIVATE /W3 /arch:AVX)
-elseif  (UNIXBUILD)
+    
+    # all warnings, AVX1, and multiprocess compiles
+    target_compile_options(${myTargetApp} PRIVATE /W3 /arch:AVX /MP /GF /FC)
+    
+    # fix STL
+    target_compile_definitions(${myTargetApp} PRIVATE "-D_D_HAS_EXCEPTIONS=0 -D_ITERATOR_DEBUG_LEVEL=0")
+    
+    if (CMAKE_BUILD_TYPE EQUAL "Debug")
+        target_compile_definitions(${myTargetLib} PRIVATE "/INCREMENTAL")
+        
+    elseif (CMAKE_BUILD_TYPE EQUAL "Release")
+        # only dead strip on Release builds since this disables Incremental linking, may want Profile build that doesn't use this
+        target_compile_definitions(${myTargetLib} PRIVATE "/OPT:REF")
+        
+        # other possibliities
+        # /GL - whole program optimization
+        # /Gy - edit and continue with function level linking
+        # /Oi - enable intrinsic functions
+    
+    endif()
+    
+elseif (UNIXBUILD)
     target_link_libraries(${myTargetApp} libkram)
 
     # TODO: finish this
diff --git a/kramv/CMakeLists.txt b/kramv/CMakeLists.txt
index 68599317..cd290094 100644
--- a/kramv/CMakeLists.txt
+++ b/kramv/CMakeLists.txt
@@ -61,7 +61,7 @@ set_target_properties(${myTargetApp} PROPERTIES
     # this drops app from 762KB to 174KB with only ATE enabled
     # note about needing -gfull instead of -gused here or debug info messed up:
     # https://gist.github.com/tkersey/39b4fe69e14b859889ffadccb009e397
-    XCODE_ATTRIBUTE_DEAD_CODE_STRIPPING YES
+    XCODE_ATTRIBUTE_DEAD_CODE_STRIPPING[variant=Release] YES
     XCODE_ATTRIBUTE_LLVM_LTO[variant=Release] "Incremental"
     
     #-------------------------
diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index 7f3173e7..3c3b3be5 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -158,9 +158,14 @@ elseif (WIN32)
     string(REGEX REPLACE "/GR" "/GR-" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
     string(REGEX REPLACE "/EHsc" "/EHs-c-" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
 
-    # force include and fix STL
-    target_compile_options(${myTargetLib} PRIVATE /FIKramConfig.h /W3 /arch:AVX)
-    target_compile_definitions(${myTargetLib} PRIVATE "-D_D_HAS_EXCEPTIONS=0")
+    # force include
+    target_compile_options(${myTargetLib} PRIVATE /FIKramConfig.h)
+        
+    # all warnings, AVX1, and multiprocess compiles
+    target_compile_options(${myTargetLib} PRIVATE  /W3 /arch:AVX /MP)
+    
+    # fix STL
+    target_compile_definitions(${myTargetLib} PRIVATE "-D_D_HAS_EXCEPTIONS=0 -D_ITERATOR_DEBUG_LEVEL=0")
     
 elseif (UNIXBUILD)
     # TODO: finish this
diff --git a/libkram/lodepng/LICENSE b/libkram/lodepng/LICENSE
index 9382c4d0..a5fb0603 100644
--- a/libkram/lodepng/LICENSE
+++ b/libkram/lodepng/LICENSE
@@ -1,25 +1,21 @@
-LodePNG version 20160124
-
-Copyright (c) 2005-2016 Lode Vandevenne
-
-This software is provided 'as-is', without any express or implied
-warranty. In no event will the authors be held liable for any damages
-arising from the use of this software.
-
-Permission is granted to anyone to use this software for any purpose,
-including commercial applications, and to alter it and redistribute it
-freely, subject to the following restrictions:
-
-    1. The origin of this software must not be misrepresented; you must not
-    claim that you wrote the original software. If you use this software
-    in a product, an acknowledgment in the product documentation would be
-    appreciated but is not required.
-
-    2. Altered source versions must be plainly marked as such, and must not be
-    misrepresented as being the original software.
-
-    3. This notice may not be removed or altered from any source
-    distribution.
-
-The manual and changelog are in the header file "lodepng.h"
-Rename this file to lodepng.cpp to use it for C++, or to lodepng.c to use it for C.
+Copyright (c) 2005-2018 Lode Vandevenne
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+    1. The origin of this software must not be misrepresented; you must not
+    claim that you wrote the original software. If you use this software
+    in a product, an acknowledgment in the product documentation would be
+    appreciated but is not required.
+
+    2. Altered source versions must be plainly marked as such, and must not be
+    misrepresented as being the original software.
+
+    3. This notice may not be removed or altered from any source
+    distribution.
+    
diff --git a/libkram/lodepng/lodepng.cpp b/libkram/lodepng/lodepng.cpp
index a9f0e0c8..b08b0858 100644
--- a/libkram/lodepng/lodepng.cpp
+++ b/libkram/lodepng/lodepng.cpp
@@ -1,6168 +1,6473 @@
-/*
-LodePNG version 20160124
-
-Copyright (c) 2005-2016 Lode Vandevenne
-
-This software is provided 'as-is', without any express or implied
-warranty. In no event will the authors be held liable for any damages
-arising from the use of this software.
-
-Permission is granted to anyone to use this software for any purpose,
-including commercial applications, and to alter it and redistribute it
-freely, subject to the following restrictions:
-
-    1. The origin of this software must not be misrepresented; you must not
-    claim that you wrote the original software. If you use this software
-    in a product, an acknowledgment in the product documentation would be
-    appreciated but is not required.
-
-    2. Altered source versions must be plainly marked as such, and must not be
-    misrepresented as being the original software.
-
-    3. This notice may not be removed or altered from any source
-    distribution.
-*/
-
-/*
-The manual and changelog are in the header file "lodepng.h"
-Rename this file to lodepng.cpp to use it for C++, or to lodepng.c to use it for C.
-*/
-
-#include "lodepng.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-
-#ifdef LODEPNG_COMPILE_CPP
-#include <fstream>
-#endif /*LODEPNG_COMPILE_CPP*/
-
-#if defined(_MSC_VER) && (_MSC_VER >= 1310) /*Visual Studio: A few warning types are not desired here.*/
-#pragma warning( disable : 4244 ) /*implicit conversions: not warned by gcc -Wall -Wextra and requires too much casts*/
-#pragma warning( disable : 4996 ) /*VS does not like fopen, but fopen_s is not standard C so unusable here*/
-#endif /*_MSC_VER */
-
-const char* LODEPNG_VERSION_STRING = "20160124";
-
-/*
-This source file is built up in the following large parts. The code sections
-with the "LODEPNG_COMPILE_" #defines divide this up further in an intermixed way.
--Tools for C and common code for PNG and Zlib
--C Code for Zlib (huffman, deflate, ...)
--C Code for PNG (file format chunks, adam7, PNG filters, color conversions, ...)
--The C++ wrapper around all of the above
-*/
-
-/*The malloc, realloc and free functions defined here with "lodepng_" in front
-of the name, so that you can easily change them to others related to your
-platform if needed. Everything else in the code calls these. Pass
--DLODEPNG_NO_COMPILE_ALLOCATORS to the compiler, or comment out
-#define LODEPNG_COMPILE_ALLOCATORS in the header, to disable the ones here and
-define them in your own project's source files without needing to change
-lodepng source code. Don't forget to remove "static" if you copypaste them
-from here.*/
-
-#ifdef LODEPNG_COMPILE_ALLOCATORS
-static void* lodepng_malloc(size_t size)
-{
-  return malloc(size);
-}
-
-static void* lodepng_realloc(void* ptr, size_t new_size)
-{
-  return realloc(ptr, new_size);
-}
-
-static void lodepng_free(void* ptr)
-{
-  free(ptr);
-}
-#else /*LODEPNG_COMPILE_ALLOCATORS*/
-void* lodepng_malloc(size_t size);
-void* lodepng_realloc(void* ptr, size_t new_size);
-void lodepng_free(void* ptr);
-#endif /*LODEPNG_COMPILE_ALLOCATORS*/
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* ////////////////////////////////////////////////////////////////////////// */
-/* // Tools for C, and common code for PNG and Zlib.                       // */
-/* ////////////////////////////////////////////////////////////////////////// */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-/*
-Often in case of an error a value is assigned to a variable and then it breaks
-out of a loop (to go to the cleanup phase of a function). This macro does that.
-It makes the error handling code shorter and more readable.
-
-Example: if(!uivector_resizev(&frequencies_ll, 286, 0)) ERROR_BREAK(83);
-*/
-#define CERROR_BREAK(errorvar, code)\
-{\
-  errorvar = code;\
-  break;\
-}
-
-/*version of CERROR_BREAK that assumes the common case where the error variable is named "error"*/
-#define ERROR_BREAK(code) CERROR_BREAK(error, code)
-
-/*Set error var to the error code, and return it.*/
-#define CERROR_RETURN_ERROR(errorvar, code)\
-{\
-  errorvar = code;\
-  return code;\
-}
-
-/*Try the code, if it returns error, also return the error.*/
-#define CERROR_TRY_RETURN(call)\
-{\
-  unsigned error = call;\
-  if(error) return error;\
-}
-
-/*Set error var to the error code, and return from the void function.*/
-#define CERROR_RETURN(errorvar, code)\
-{\
-  errorvar = code;\
-  return;\
-}
-
-/*
-About uivector, ucvector and string:
--All of them wrap dynamic arrays or text strings in a similar way.
--LodePNG was originally written in C++. The vectors replace the std::vectors that were used in the C++ version.
--The string tools are made to avoid problems with compilers that declare things like strncat as deprecated.
--They're not used in the interface, only internally in this file as static functions.
--As with many other structs in this file, the init and cleanup functions serve as ctor and dtor.
-*/
-
-#ifdef LODEPNG_COMPILE_ZLIB
-/*dynamic vector of unsigned ints*/
-typedef struct uivector
-{
-  unsigned* data;
-  size_t size; /*size in number of unsigned longs*/
-  size_t allocsize; /*allocated size in bytes*/
-} uivector;
-
-static void uivector_cleanup(void* p)
-{
-  ((uivector*)p)->size = ((uivector*)p)->allocsize = 0;
-  lodepng_free(((uivector*)p)->data);
-  ((uivector*)p)->data = NULL;
-}
-
-/*returns 1 if success, 0 if failure ==> nothing done*/
-static unsigned uivector_reserve(uivector* p, size_t allocsize)
-{
-  if(allocsize > p->allocsize)
-  {
-    size_t newsize = (allocsize > p->allocsize * 2) ? allocsize : (allocsize * 3 / 2);
-    void* data = lodepng_realloc(p->data, newsize);
-    if(data)
-    {
-      p->allocsize = newsize;
-      p->data = (unsigned*)data;
-    }
-    else return 0; /*error: not enough memory*/
-  }
-  return 1;
-}
-
-/*returns 1 if success, 0 if failure ==> nothing done*/
-static unsigned uivector_resize(uivector* p, size_t size)
-{
-  if(!uivector_reserve(p, size * sizeof(unsigned))) return 0;
-  p->size = size;
-  return 1; /*success*/
-}
-
-/*resize and give all new elements the value*/
-static unsigned uivector_resizev(uivector* p, size_t size, unsigned value)
-{
-  size_t oldsize = p->size, i;
-  if(!uivector_resize(p, size)) return 0;
-  for(i = oldsize; i < size; ++i) p->data[i] = value;
-  return 1;
-}
-
-static void uivector_init(uivector* p)
-{
-  p->data = NULL;
-  p->size = p->allocsize = 0;
-}
-
-#ifdef LODEPNG_COMPILE_ENCODER
-/*returns 1 if success, 0 if failure ==> nothing done*/
-static unsigned uivector_push_back(uivector* p, unsigned c)
-{
-  if(!uivector_resize(p, p->size + 1)) return 0;
-  p->data[p->size - 1] = c;
-  return 1;
-}
-#endif /*LODEPNG_COMPILE_ENCODER*/
-#endif /*LODEPNG_COMPILE_ZLIB*/
-
-/* /////////////////////////////////////////////////////////////////////////// */
-
-/*dynamic vector of unsigned chars*/
-typedef struct ucvector
-{
-  unsigned char* data;
-  size_t size; /*used size*/
-  size_t allocsize; /*allocated size*/
-} ucvector;
-
-/*returns 1 if success, 0 if failure ==> nothing done*/
-static unsigned ucvector_reserve(ucvector* p, size_t allocsize)
-{
-  if(allocsize > p->allocsize)
-  {
-    size_t newsize = (allocsize > p->allocsize * 2) ? allocsize : (allocsize * 3 / 2);
-    void* data = lodepng_realloc(p->data, newsize);
-    if(data)
-    {
-      p->allocsize = newsize;
-      p->data = (unsigned char*)data;
-    }
-    else return 0; /*error: not enough memory*/
-  }
-  return 1;
-}
-
-/*returns 1 if success, 0 if failure ==> nothing done*/
-static unsigned ucvector_resize(ucvector* p, size_t size)
-{
-  if(!ucvector_reserve(p, size * sizeof(unsigned char))) return 0;
-  p->size = size;
-  return 1; /*success*/
-}
-
-#ifdef LODEPNG_COMPILE_PNG
-
-static void ucvector_cleanup(void* p)
-{
-  ((ucvector*)p)->size = ((ucvector*)p)->allocsize = 0;
-  lodepng_free(((ucvector*)p)->data);
-  ((ucvector*)p)->data = NULL;
-}
-
-static void ucvector_init(ucvector* p)
-{
-  p->data = NULL;
-  p->size = p->allocsize = 0;
-}
-#endif /*LODEPNG_COMPILE_PNG*/
-
-#ifdef LODEPNG_COMPILE_ZLIB
-/*you can both convert from vector to buffer&size and vica versa. If you use
-init_buffer to take over a buffer and size, it is not needed to use cleanup*/
-static void ucvector_init_buffer(ucvector* p, unsigned char* buffer, size_t size)
-{
-  p->data = buffer;
-  p->allocsize = p->size = size;
-}
-#endif /*LODEPNG_COMPILE_ZLIB*/
-
-#if (defined(LODEPNG_COMPILE_PNG) && defined(LODEPNG_COMPILE_ANCILLARY_CHUNKS)) || defined(LODEPNG_COMPILE_ENCODER)
-/*returns 1 if success, 0 if failure ==> nothing done*/
-static unsigned ucvector_push_back(ucvector* p, unsigned char c)
-{
-  if(!ucvector_resize(p, p->size + 1)) return 0;
-  p->data[p->size - 1] = c;
-  return 1;
-}
-#endif /*defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_ENCODER)*/
-
-
-/* ////////////////////////////////////////////////////////////////////////// */
-
-#ifdef LODEPNG_COMPILE_PNG
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-/*returns 1 if success, 0 if failure ==> nothing done*/
-static unsigned string_resize(char** out, size_t size)
-{
-  char* data = (char*)lodepng_realloc(*out, size + 1);
-  if(data)
-  {
-    data[size] = 0; /*null termination char*/
-    *out = data;
-  }
-  return data != 0;
-}
-
-/*init a {char*, size_t} pair for use as string*/
-static void string_init(char** out)
-{
-  *out = NULL;
-  string_resize(out, 0);
-}
-
-/*free the above pair again*/
-static void string_cleanup(char** out)
-{
-  lodepng_free(*out);
-  *out = NULL;
-}
-
-static void string_set(char** out, const char* in)
-{
-  size_t insize = strlen(in), i;
-  if(string_resize(out, insize))
-  {
-    for(i = 0; i != insize; ++i)
-    {
-      (*out)[i] = in[i];
-    }
-  }
-}
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-#endif /*LODEPNG_COMPILE_PNG*/
-
-/* ////////////////////////////////////////////////////////////////////////// */
-
-unsigned lodepng_read32bitInt(const unsigned char* buffer)
-{
-  return (unsigned)((buffer[0] << 24) | (buffer[1] << 16) | (buffer[2] << 8) | buffer[3]);
-}
-
-#if defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_ENCODER)
-/*buffer must have at least 4 allocated bytes available*/
-static void lodepng_set32bitInt(unsigned char* buffer, unsigned value)
-{
-  buffer[0] = (unsigned char)((value >> 24) & 0xff);
-  buffer[1] = (unsigned char)((value >> 16) & 0xff);
-  buffer[2] = (unsigned char)((value >>  8) & 0xff);
-  buffer[3] = (unsigned char)((value      ) & 0xff);
-}
-#endif /*defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_ENCODER)*/
-
-#ifdef LODEPNG_COMPILE_ENCODER
-static void lodepng_add32bitInt(ucvector* buffer, unsigned value)
-{
-  ucvector_resize(buffer, buffer->size + 4); /*todo: give error if resize failed*/
-  lodepng_set32bitInt(&buffer->data[buffer->size - 4], value);
-}
-#endif /*LODEPNG_COMPILE_ENCODER*/
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* / File IO                                                                / */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-#ifdef LODEPNG_COMPILE_DISK
-
-unsigned lodepng_load_file(unsigned char** out, size_t* outsize, const char* filename)
-{
-  FILE* file;
-  long size;
-
-  /*provide some proper output values if error will happen*/
-  *out = 0;
-  *outsize = 0;
-
-  file = fopen(filename, "rb");
-  if(!file) return 78;
-
-  /*get filesize:*/
-  fseek(file , 0 , SEEK_END);
-  size = ftell(file);
-  rewind(file);
-
-  /*read contents of the file into the vector*/
-  *outsize = 0;
-  *out = (unsigned char*)lodepng_malloc((size_t)size);
-  if(size && (*out)) (*outsize) = fread(*out, 1, (size_t)size, file);
-
-  fclose(file);
-  if(!(*out) && size) return 83; /*the above malloc failed*/
-  return 0;
-}
-
-/*write given buffer to the file, overwriting the file, it doesn't append to it.*/
-unsigned lodepng_save_file(const unsigned char* buffer, size_t buffersize, const char* filename)
-{
-  FILE* file;
-  file = fopen(filename, "wb" );
-  if(!file) return 79;
-  fwrite((char*)buffer , 1 , buffersize, file);
-  fclose(file);
-  return 0;
-}
-
-#endif /*LODEPNG_COMPILE_DISK*/
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* ////////////////////////////////////////////////////////////////////////// */
-/* // End of common code and tools. Begin of Zlib related code.            // */
-/* ////////////////////////////////////////////////////////////////////////// */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-#ifdef LODEPNG_COMPILE_ZLIB
-#ifdef LODEPNG_COMPILE_ENCODER
-/*TODO: this ignores potential out of memory errors*/
-#define addBitToStream(/*size_t**/ bitpointer, /*ucvector**/ bitstream, /*unsigned char*/ bit)\
-{\
-  /*add a new byte at the end*/\
-  if(((*bitpointer) & 7) == 0) ucvector_push_back(bitstream, (unsigned char)0);\
-  /*earlier bit of huffman code is in a lesser significant bit of an earlier byte*/\
-  (bitstream->data[bitstream->size - 1]) |= (bit << ((*bitpointer) & 0x7));\
-  ++(*bitpointer);\
-}
-
-static void addBitsToStream(size_t* bitpointer, ucvector* bitstream, unsigned value, size_t nbits)
-{
-  size_t i;
-  for(i = 0; i != nbits; ++i) addBitToStream(bitpointer, bitstream, (unsigned char)((value >> i) & 1));
-}
-
-static void addBitsToStreamReversed(size_t* bitpointer, ucvector* bitstream, unsigned value, size_t nbits)
-{
-  size_t i;
-  for(i = 0; i != nbits; ++i) addBitToStream(bitpointer, bitstream, (unsigned char)((value >> (nbits - 1 - i)) & 1));
-}
-#endif /*LODEPNG_COMPILE_ENCODER*/
-
-#ifdef LODEPNG_COMPILE_DECODER
-
-#define READBIT(bitpointer, bitstream) ((bitstream[bitpointer >> 3] >> (bitpointer & 0x7)) & (unsigned char)1)
-
-static unsigned char readBitFromStream(size_t* bitpointer, const unsigned char* bitstream)
-{
-  unsigned char result = (unsigned char)(READBIT(*bitpointer, bitstream));
-  ++(*bitpointer);
-  return result;
-}
-
-static unsigned readBitsFromStream(size_t* bitpointer, const unsigned char* bitstream, size_t nbits)
-{
-  unsigned result = 0, i;
-  for(i = 0; i != nbits; ++i)
-  {
-    result += ((unsigned)READBIT(*bitpointer, bitstream)) << i;
-    ++(*bitpointer);
-  }
-  return result;
-}
-#endif /*LODEPNG_COMPILE_DECODER*/
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* / Deflate - Huffman                                                      / */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-#define FIRST_LENGTH_CODE_INDEX 257
-#define LAST_LENGTH_CODE_INDEX 285
-/*256 literals, the end code, some length codes, and 2 unused codes*/
-#define NUM_DEFLATE_CODE_SYMBOLS 288
-/*the distance codes have their own symbols, 30 used, 2 unused*/
-#define NUM_DISTANCE_SYMBOLS 32
-/*the code length codes. 0-15: code lengths, 16: copy previous 3-6 times, 17: 3-10 zeros, 18: 11-138 zeros*/
-#define NUM_CODE_LENGTH_CODES 19
-
-/*the base lengths represented by codes 257-285*/
-static const unsigned LENGTHBASE[29]
-  = {3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59,
-     67, 83, 99, 115, 131, 163, 195, 227, 258};
-
-/*the extra bits used by codes 257-285 (added to base length)*/
-static const unsigned LENGTHEXTRA[29]
-  = {0, 0, 0, 0, 0, 0, 0,  0,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  3,
-      4,  4,  4,   4,   5,   5,   5,   5,   0};
-
-/*the base backwards distances (the bits of distance codes appear after length codes and use their own huffman tree)*/
-static const unsigned DISTANCEBASE[30]
-  = {1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513,
-     769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577};
-
-/*the extra bits of backwards distances (added to base)*/
-static const unsigned DISTANCEEXTRA[30]
-  = {0, 0, 0, 0, 1, 1, 2,  2,  3,  3,  4,  4,  5,  5,   6,   6,   7,   7,   8,
-       8,    9,    9,   10,   10,   11,   11,   12,    12,    13,    13};
-
-/*the order in which "code length alphabet code lengths" are stored, out of this
-the huffman tree of the dynamic huffman tree lengths is generated*/
-static const unsigned CLCL_ORDER[NUM_CODE_LENGTH_CODES]
-  = {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
-
-/* ////////////////////////////////////////////////////////////////////////// */
-
-/*
-Huffman tree struct, containing multiple representations of the tree
-*/
-typedef struct HuffmanTree
-{
-  unsigned* tree2d;
-  unsigned* tree1d;
-  unsigned* lengths; /*the lengths of the codes of the 1d-tree*/
-  unsigned maxbitlen; /*maximum number of bits a single code can get*/
-  unsigned numcodes; /*number of symbols in the alphabet = number of codes*/
-} HuffmanTree;
-
-/*function used for debug purposes to draw the tree in ascii art with C++*/
-/*
-static void HuffmanTree_draw(HuffmanTree* tree)
-{
-  std::cout << "tree. length: " << tree->numcodes << " maxbitlen: " << tree->maxbitlen << std::endl;
-  for(size_t i = 0; i != tree->tree1d.size; ++i)
-  {
-    if(tree->lengths.data[i])
-      std::cout << i << " " << tree->tree1d.data[i] << " " << tree->lengths.data[i] << std::endl;
-  }
-  std::cout << std::endl;
-}*/
-
-static void HuffmanTree_init(HuffmanTree* tree)
-{
-  tree->tree2d = 0;
-  tree->tree1d = 0;
-  tree->lengths = 0;
-}
-
-static void HuffmanTree_cleanup(HuffmanTree* tree)
-{
-  lodepng_free(tree->tree2d);
-  lodepng_free(tree->tree1d);
-  lodepng_free(tree->lengths);
-}
-
-/*the tree representation used by the decoder. return value is error*/
-static unsigned HuffmanTree_make2DTree(HuffmanTree* tree)
-{
-  unsigned nodefilled = 0; /*up to which node it is filled*/
-  unsigned treepos = 0; /*position in the tree (1 of the numcodes columns)*/
-  unsigned n, i;
-
-  tree->tree2d = (unsigned*)lodepng_malloc(tree->numcodes * 2 * sizeof(unsigned));
-  if(!tree->tree2d) return 83; /*alloc fail*/
-
-  /*
-  convert tree1d[] to tree2d[][]. In the 2D array, a value of 32767 means
-  uninited, a value >= numcodes is an address to another bit, a value < numcodes
-  is a code. The 2 rows are the 2 possible bit values (0 or 1), there are as
-  many columns as codes - 1.
-  A good huffman tree has N * 2 - 1 nodes, of which N - 1 are internal nodes.
-  Here, the internal nodes are stored (what their 0 and 1 option point to).
-  There is only memory for such good tree currently, if there are more nodes
-  (due to too long length codes), error 55 will happen
-  */
-  for(n = 0; n < tree->numcodes * 2; ++n)
-  {
-    tree->tree2d[n] = 32767; /*32767 here means the tree2d isn't filled there yet*/
-  }
-
-  for(n = 0; n < tree->numcodes; ++n) /*the codes*/
-  {
-    for(i = 0; i != tree->lengths[n]; ++i) /*the bits for this code*/
-    {
-      unsigned char bit = (unsigned char)((tree->tree1d[n] >> (tree->lengths[n] - i - 1)) & 1);
-      /*oversubscribed, see comment in lodepng_error_text*/
-      if(treepos > 2147483647 || treepos + 2 > tree->numcodes) return 55;
-      if(tree->tree2d[2 * treepos + bit] == 32767) /*not yet filled in*/
-      {
-        if(i + 1 == tree->lengths[n]) /*last bit*/
-        {
-          tree->tree2d[2 * treepos + bit] = n; /*put the current code in it*/
-          treepos = 0;
-        }
-        else
-        {
-          /*put address of the next step in here, first that address has to be found of course
-          (it's just nodefilled + 1)...*/
-          ++nodefilled;
-          /*addresses encoded with numcodes added to it*/
-          tree->tree2d[2 * treepos + bit] = nodefilled + tree->numcodes;
-          treepos = nodefilled;
-        }
-      }
-      else treepos = tree->tree2d[2 * treepos + bit] - tree->numcodes;
-    }
-  }
-
-  for(n = 0; n < tree->numcodes * 2; ++n)
-  {
-    if(tree->tree2d[n] == 32767) tree->tree2d[n] = 0; /*remove possible remaining 32767's*/
-  }
-
-  return 0;
-}
-
-/*
-Second step for the ...makeFromLengths and ...makeFromFrequencies functions.
-numcodes, lengths and maxbitlen must already be filled in correctly. return
-value is error.
-*/
-static unsigned HuffmanTree_makeFromLengths2(HuffmanTree* tree)
-{
-  uivector blcount;
-  uivector nextcode;
-  unsigned error = 0;
-  unsigned bits, n;
-
-  uivector_init(&blcount);
-  uivector_init(&nextcode);
-
-  tree->tree1d = (unsigned*)lodepng_malloc(tree->numcodes * sizeof(unsigned));
-  if(!tree->tree1d) error = 83; /*alloc fail*/
-
-  if(!uivector_resizev(&blcount, tree->maxbitlen + 1, 0)
-  || !uivector_resizev(&nextcode, tree->maxbitlen + 1, 0))
-    error = 83; /*alloc fail*/
-
-  if(!error)
-  {
-    /*step 1: count number of instances of each code length*/
-    for(bits = 0; bits != tree->numcodes; ++bits) ++blcount.data[tree->lengths[bits]];
-    /*step 2: generate the nextcode values*/
-    for(bits = 1; bits <= tree->maxbitlen; ++bits)
-    {
-      nextcode.data[bits] = (nextcode.data[bits - 1] + blcount.data[bits - 1]) << 1;
-    }
-    /*step 3: generate all the codes*/
-    for(n = 0; n != tree->numcodes; ++n)
-    {
-      if(tree->lengths[n] != 0) tree->tree1d[n] = nextcode.data[tree->lengths[n]]++;
-    }
-  }
-
-  uivector_cleanup(&blcount);
-  uivector_cleanup(&nextcode);
-
-  if(!error) return HuffmanTree_make2DTree(tree);
-  else return error;
-}
-
-/*
-given the code lengths (as stored in the PNG file), generate the tree as defined
-by Deflate. maxbitlen is the maximum bits that a code in the tree can have.
-return value is error.
-*/
-static unsigned HuffmanTree_makeFromLengths(HuffmanTree* tree, const unsigned* bitlen,
-                                            size_t numcodes, unsigned maxbitlen)
-{
-  unsigned i;
-  tree->lengths = (unsigned*)lodepng_malloc(numcodes * sizeof(unsigned));
-  if(!tree->lengths) return 83; /*alloc fail*/
-  for(i = 0; i != numcodes; ++i) tree->lengths[i] = bitlen[i];
-  tree->numcodes = (unsigned)numcodes; /*number of symbols*/
-  tree->maxbitlen = maxbitlen;
-  return HuffmanTree_makeFromLengths2(tree);
-}
-
-#ifdef LODEPNG_COMPILE_ENCODER
-
-/*BPM: Boundary Package Merge, see "A Fast and Space-Economical Algorithm for Length-Limited Coding",
-Jyrki Katajainen, Alistair Moffat, Andrew Turpin, 1995.*/
-
-/*chain node for boundary package merge*/
-typedef struct BPMNode
-{
-  int weight; /*the sum of all weights in this chain*/
-  unsigned index; /*index of this leaf node (called "count" in the paper)*/
-  struct BPMNode* tail; /*the next nodes in this chain (null if last)*/
-  int in_use;
-} BPMNode;
-
-/*lists of chains*/
-typedef struct BPMLists
-{
-  /*memory pool*/
-  unsigned memsize;
-  BPMNode* memory;
-  unsigned numfree;
-  unsigned nextfree;
-  BPMNode** freelist;
-  /*two heads of lookahead chains per list*/
-  unsigned listsize;
-  BPMNode** chains0;
-  BPMNode** chains1;
-} BPMLists;
-
-/*creates a new chain node with the given parameters, from the memory in the lists */
-static BPMNode* bpmnode_create(BPMLists* lists, int weight, unsigned index, BPMNode* tail)
-{
-  unsigned i;
-  BPMNode* result;
-
-  /*memory full, so garbage collect*/
-  if(lists->nextfree >= lists->numfree)
-  {
-    /*mark only those that are in use*/
-    for(i = 0; i != lists->memsize; ++i) lists->memory[i].in_use = 0;
-    for(i = 0; i != lists->listsize; ++i)
-    {
-      BPMNode* node;
-      for(node = lists->chains0[i]; node != 0; node = node->tail) node->in_use = 1;
-      for(node = lists->chains1[i]; node != 0; node = node->tail) node->in_use = 1;
-    }
-    /*collect those that are free*/
-    lists->numfree = 0;
-    for(i = 0; i != lists->memsize; ++i)
-    {
-      if(!lists->memory[i].in_use) lists->freelist[lists->numfree++] = &lists->memory[i];
-    }
-    lists->nextfree = 0;
-  }
-
-  result = lists->freelist[lists->nextfree++];
-  result->weight = weight;
-  result->index = index;
-  result->tail = tail;
-  return result;
-}
-
-static int bpmnode_compare(const void* a, const void* b)
-{
-  int wa = ((const BPMNode*)a)->weight;
-  int wb = ((const BPMNode*)b)->weight;
-  if(wa < wb) return -1;
-  if(wa > wb) return 1;
-  /*make the qsort a stable sort*/
-  return ((const BPMNode*)a)->index < ((const BPMNode*)b)->index ? 1 : -1;
-}
-
-/*Boundary Package Merge step, numpresent is the amount of leaves, and c is the current chain.*/
-static void boundaryPM(BPMLists* lists, BPMNode* leaves, size_t numpresent, int c, int num)
-{
-  unsigned lastindex = lists->chains1[c]->index;
-
-  if(c == 0)
-  {
-    if(lastindex >= numpresent) return;
-    lists->chains0[c] = lists->chains1[c];
-    lists->chains1[c] = bpmnode_create(lists, leaves[lastindex].weight, lastindex + 1, 0);
-  }
-  else
-  {
-    /*sum of the weights of the head nodes of the previous lookahead chains.*/
-    int sum = lists->chains0[c - 1]->weight + lists->chains1[c - 1]->weight;
-    lists->chains0[c] = lists->chains1[c];
-    if(lastindex < numpresent && sum > leaves[lastindex].weight)
-    {
-      lists->chains1[c] = bpmnode_create(lists, leaves[lastindex].weight, lastindex + 1, lists->chains1[c]->tail);
-      return;
-    }
-    lists->chains1[c] = bpmnode_create(lists, sum, lastindex, lists->chains1[c - 1]);
-    /*in the end we are only interested in the chain of the last list, so no
-    need to recurse if we're at the last one (this gives measurable speedup)*/
-    if(num + 1 < (int)(2 * numpresent - 2))
-    {
-      boundaryPM(lists, leaves, numpresent, c - 1, num);
-      boundaryPM(lists, leaves, numpresent, c - 1, num);
-    }
-  }
-}
-
-unsigned lodepng_huffman_code_lengths(unsigned* lengths, const unsigned* frequencies,
-                                      size_t numcodes, unsigned maxbitlen)
-{
-  unsigned error = 0;
-  unsigned i;
-  size_t numpresent = 0; /*number of symbols with non-zero frequency*/
-  BPMNode* leaves; /*the symbols, only those with > 0 frequency*/
-
-  if(numcodes == 0) return 80; /*error: a tree of 0 symbols is not supposed to be made*/
-  if((1ull << maxbitlen) < numcodes) return 80; /*error: represent all symbols*/
-
-  leaves = (BPMNode*)lodepng_malloc(numcodes * sizeof(*leaves));
-  if(!leaves) return 83; /*alloc fail*/
-
-  for(i = 0; i != numcodes; ++i)
-  {
-    if(frequencies[i] > 0)
-    {
-      leaves[numpresent].weight = (int)frequencies[i];
-      leaves[numpresent].index = i;
-      ++numpresent;
-    }
-  }
-
-  for(i = 0; i != numcodes; ++i) lengths[i] = 0;
-
-  /*ensure at least two present symbols. There should be at least one symbol
-  according to RFC 1951 section 3.2.7. Some decoders incorrectly require two. To
-  make these work as well ensure there are at least two symbols. The
-  Package-Merge code below also doesn't work correctly if there's only one
-  symbol, it'd give it the theoritical 0 bits but in practice zlib wants 1 bit*/
-  if(numpresent == 0)
-  {
-    lengths[0] = lengths[1] = 1; /*note that for RFC 1951 section 3.2.7, only lengths[0] = 1 is needed*/
-  }
-  else if(numpresent == 1)
-  {
-    lengths[leaves[0].index] = 1;
-    lengths[leaves[0].index == 0 ? 1 : 0] = 1;
-  }
-  else
-  {
-    BPMLists lists;
-    BPMNode* node;
-
-    qsort(leaves, numpresent, sizeof(BPMNode), bpmnode_compare);
-
-    lists.listsize = maxbitlen;
-    lists.memsize = 2 * maxbitlen * (maxbitlen + 1);
-    lists.nextfree = 0;
-    lists.numfree = lists.memsize;
-    lists.memory = (BPMNode*)lodepng_malloc(lists.memsize * sizeof(*lists.memory));
-    lists.freelist = (BPMNode**)lodepng_malloc(lists.memsize * sizeof(BPMNode*));
-    lists.chains0 = (BPMNode**)lodepng_malloc(lists.listsize * sizeof(BPMNode*));
-    lists.chains1 = (BPMNode**)lodepng_malloc(lists.listsize * sizeof(BPMNode*));
-    if(!lists.memory || !lists.freelist || !lists.chains0 || !lists.chains1) error = 83; /*alloc fail*/
-
-    if(!error)
-    {
-      for(i = 0; i != lists.memsize; ++i) lists.freelist[i] = &lists.memory[i];
-
-      bpmnode_create(&lists, leaves[0].weight, 1, 0);
-      bpmnode_create(&lists, leaves[1].weight, 2, 0);
-
-      for(i = 0; i != lists.listsize; ++i)
-      {
-        lists.chains0[i] = &lists.memory[0];
-        lists.chains1[i] = &lists.memory[1];
-      }
-
-      /*each boundaryPM call adds one chain to the last list, and we need 2 * numpresent - 2 chains.*/
-      for(i = 2; i != 2 * numpresent - 2; ++i) boundaryPM(&lists, leaves, numpresent, (int)maxbitlen - 1, (int)i);
-
-      for(node = lists.chains1[maxbitlen - 1]; node; node = node->tail)
-      {
-        for(i = 0; i != node->index; ++i) ++lengths[leaves[i].index];
-      }
-    }
-
-    lodepng_free(lists.memory);
-    lodepng_free(lists.freelist);
-    lodepng_free(lists.chains0);
-    lodepng_free(lists.chains1);
-  }
-
-  lodepng_free(leaves);
-  return error;
-}
-
-/*Create the Huffman tree given the symbol frequencies*/
-static unsigned HuffmanTree_makeFromFrequencies(HuffmanTree* tree, const unsigned* frequencies,
-                                                size_t mincodes, size_t numcodes, unsigned maxbitlen)
-{
-  unsigned error = 0;
-  while(!frequencies[numcodes - 1] && numcodes > mincodes) --numcodes; /*trim zeroes*/
-  tree->maxbitlen = maxbitlen;
-  tree->numcodes = (unsigned)numcodes; /*number of symbols*/
-  tree->lengths = (unsigned*)lodepng_realloc(tree->lengths, numcodes * sizeof(unsigned));
-  if(!tree->lengths) return 83; /*alloc fail*/
-  /*initialize all lengths to 0*/
-  memset(tree->lengths, 0, numcodes * sizeof(unsigned));
-
-  error = lodepng_huffman_code_lengths(tree->lengths, frequencies, numcodes, maxbitlen);
-  if(!error) error = HuffmanTree_makeFromLengths2(tree);
-  return error;
-}
-
-static unsigned HuffmanTree_getCode(const HuffmanTree* tree, unsigned index)
-{
-  return tree->tree1d[index];
-}
-
-static unsigned HuffmanTree_getLength(const HuffmanTree* tree, unsigned index)
-{
-  return tree->lengths[index];
-}
-#endif /*LODEPNG_COMPILE_ENCODER*/
-
-/*get the literal and length code tree of a deflated block with fixed tree, as per the deflate specification*/
-static unsigned generateFixedLitLenTree(HuffmanTree* tree)
-{
-  unsigned i, error = 0;
-  unsigned* bitlen = (unsigned*)lodepng_malloc(NUM_DEFLATE_CODE_SYMBOLS * sizeof(unsigned));
-  if(!bitlen) return 83; /*alloc fail*/
-
-  /*288 possible codes: 0-255=literals, 256=endcode, 257-285=lengthcodes, 286-287=unused*/
-  for(i =   0; i <= 143; ++i) bitlen[i] = 8;
-  for(i = 144; i <= 255; ++i) bitlen[i] = 9;
-  for(i = 256; i <= 279; ++i) bitlen[i] = 7;
-  for(i = 280; i <= 287; ++i) bitlen[i] = 8;
-
-  error = HuffmanTree_makeFromLengths(tree, bitlen, NUM_DEFLATE_CODE_SYMBOLS, 15);
-
-  lodepng_free(bitlen);
-  return error;
-}
-
-/*get the distance code tree of a deflated block with fixed tree, as specified in the deflate specification*/
-static unsigned generateFixedDistanceTree(HuffmanTree* tree)
-{
-  unsigned i, error = 0;
-  unsigned* bitlen = (unsigned*)lodepng_malloc(NUM_DISTANCE_SYMBOLS * sizeof(unsigned));
-  if(!bitlen) return 83; /*alloc fail*/
-
-  /*there are 32 distance codes, but 30-31 are unused*/
-  for(i = 0; i != NUM_DISTANCE_SYMBOLS; ++i) bitlen[i] = 5;
-  error = HuffmanTree_makeFromLengths(tree, bitlen, NUM_DISTANCE_SYMBOLS, 15);
-
-  lodepng_free(bitlen);
-  return error;
-}
-
-#ifdef LODEPNG_COMPILE_DECODER
-
-/*
-returns the code, or (unsigned)(-1) if error happened
-inbitlength is the length of the complete buffer, in bits (so its byte length times 8)
-*/
-static unsigned huffmanDecodeSymbol(const unsigned char* in, size_t* bp,
-                                    const HuffmanTree* codetree, size_t inbitlength)
-{
-  unsigned treepos = 0, ct;
-  for(;;)
-  {
-    if(*bp >= inbitlength) return (unsigned)(-1); /*error: end of input memory reached without endcode*/
-    /*
-    decode the symbol from the tree. The "readBitFromStream" code is inlined in
-    the expression below because this is the biggest bottleneck while decoding
-    */
-    ct = codetree->tree2d[(treepos << 1) + READBIT(*bp, in)];
-    ++(*bp);
-    if(ct < codetree->numcodes) return ct; /*the symbol is decoded, return it*/
-    else treepos = ct - codetree->numcodes; /*symbol not yet decoded, instead move tree position*/
-
-    if(treepos >= codetree->numcodes) return (unsigned)(-1); /*error: it appeared outside the codetree*/
-  }
-}
-#endif /*LODEPNG_COMPILE_DECODER*/
-
-#ifdef LODEPNG_COMPILE_DECODER
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* / Inflator (Decompressor)                                                / */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-/*get the tree of a deflated block with fixed tree, as specified in the deflate specification*/
-static void getTreeInflateFixed(HuffmanTree* tree_ll, HuffmanTree* tree_d)
-{
-  /*TODO: check for out of memory errors*/
-  generateFixedLitLenTree(tree_ll);
-  generateFixedDistanceTree(tree_d);
-}
-
-/*get the tree of a deflated block with dynamic tree, the tree itself is also Huffman compressed with a known tree*/
-static unsigned getTreeInflateDynamic(HuffmanTree* tree_ll, HuffmanTree* tree_d,
-                                      const unsigned char* in, size_t* bp, size_t inlength)
-{
-  /*make sure that length values that aren't filled in will be 0, or a wrong tree will be generated*/
-  unsigned error = 0;
-  unsigned n, HLIT, HDIST, HCLEN, i;
-  size_t inbitlength = inlength * 8;
-
-  /*see comments in deflateDynamic for explanation of the context and these variables, it is analogous*/
-  unsigned* bitlen_ll = 0; /*lit,len code lengths*/
-  unsigned* bitlen_d = 0; /*dist code lengths*/
-  /*code length code lengths ("clcl"), the bit lengths of the huffman tree used to compress bitlen_ll and bitlen_d*/
-  unsigned* bitlen_cl = 0;
-  HuffmanTree tree_cl; /*the code tree for code length codes (the huffman tree for compressed huffman trees)*/
-
-  if((*bp) + 14 > (inlength << 3)) return 49; /*error: the bit pointer is or will go past the memory*/
-
-  /*number of literal/length codes + 257. Unlike the spec, the value 257 is added to it here already*/
-  HLIT =  readBitsFromStream(bp, in, 5) + 257;
-  /*number of distance codes. Unlike the spec, the value 1 is added to it here already*/
-  HDIST = readBitsFromStream(bp, in, 5) + 1;
-  /*number of code length codes. Unlike the spec, the value 4 is added to it here already*/
-  HCLEN = readBitsFromStream(bp, in, 4) + 4;
-
-  if((*bp) + HCLEN * 3 > (inlength << 3)) return 50; /*error: the bit pointer is or will go past the memory*/
-
-  HuffmanTree_init(&tree_cl);
-
-  while(!error)
-  {
-    /*read the code length codes out of 3 * (amount of code length codes) bits*/
-
-    bitlen_cl = (unsigned*)lodepng_malloc(NUM_CODE_LENGTH_CODES * sizeof(unsigned));
-    if(!bitlen_cl) ERROR_BREAK(83 /*alloc fail*/);
-
-    for(i = 0; i != NUM_CODE_LENGTH_CODES; ++i)
-    {
-      if(i < HCLEN) bitlen_cl[CLCL_ORDER[i]] = readBitsFromStream(bp, in, 3);
-      else bitlen_cl[CLCL_ORDER[i]] = 0; /*if not, it must stay 0*/
-    }
-
-    error = HuffmanTree_makeFromLengths(&tree_cl, bitlen_cl, NUM_CODE_LENGTH_CODES, 7);
-    if(error) break;
-
-    /*now we can use this tree to read the lengths for the tree that this function will return*/
-    bitlen_ll = (unsigned*)lodepng_malloc(NUM_DEFLATE_CODE_SYMBOLS * sizeof(unsigned));
-    bitlen_d = (unsigned*)lodepng_malloc(NUM_DISTANCE_SYMBOLS * sizeof(unsigned));
-    if(!bitlen_ll || !bitlen_d) ERROR_BREAK(83 /*alloc fail*/);
-    for(i = 0; i != NUM_DEFLATE_CODE_SYMBOLS; ++i) bitlen_ll[i] = 0;
-    for(i = 0; i != NUM_DISTANCE_SYMBOLS; ++i) bitlen_d[i] = 0;
-
-    /*i is the current symbol we're reading in the part that contains the code lengths of lit/len and dist codes*/
-    i = 0;
-    while(i < HLIT + HDIST)
-    {
-      unsigned code = huffmanDecodeSymbol(in, bp, &tree_cl, inbitlength);
-      if(code <= 15) /*a length code*/
-      {
-        if(i < HLIT) bitlen_ll[i] = code;
-        else bitlen_d[i - HLIT] = code;
-        ++i;
-      }
-      else if(code == 16) /*repeat previous*/
-      {
-        unsigned replength = 3; /*read in the 2 bits that indicate repeat length (3-6)*/
-        unsigned value; /*set value to the previous code*/
-
-        if(i == 0) ERROR_BREAK(54); /*can't repeat previous if i is 0*/
-
-        if((*bp + 2) > inbitlength) ERROR_BREAK(50); /*error, bit pointer jumps past memory*/
-        replength += readBitsFromStream(bp, in, 2);
-
-        if(i < HLIT + 1) value = bitlen_ll[i - 1];
-        else value = bitlen_d[i - HLIT - 1];
-        /*repeat this value in the next lengths*/
-        for(n = 0; n < replength; ++n)
-        {
-          if(i >= HLIT + HDIST) ERROR_BREAK(13); /*error: i is larger than the amount of codes*/
-          if(i < HLIT) bitlen_ll[i] = value;
-          else bitlen_d[i - HLIT] = value;
-          ++i;
-        }
-      }
-      else if(code == 17) /*repeat "0" 3-10 times*/
-      {
-        unsigned replength = 3; /*read in the bits that indicate repeat length*/
-        if((*bp + 3) > inbitlength) ERROR_BREAK(50); /*error, bit pointer jumps past memory*/
-        replength += readBitsFromStream(bp, in, 3);
-
-        /*repeat this value in the next lengths*/
-        for(n = 0; n < replength; ++n)
-        {
-          if(i >= HLIT + HDIST) ERROR_BREAK(14); /*error: i is larger than the amount of codes*/
-
-          if(i < HLIT) bitlen_ll[i] = 0;
-          else bitlen_d[i - HLIT] = 0;
-          ++i;
-        }
-      }
-      else if(code == 18) /*repeat "0" 11-138 times*/
-      {
-        unsigned replength = 11; /*read in the bits that indicate repeat length*/
-        if((*bp + 7) > inbitlength) ERROR_BREAK(50); /*error, bit pointer jumps past memory*/
-        replength += readBitsFromStream(bp, in, 7);
-
-        /*repeat this value in the next lengths*/
-        for(n = 0; n < replength; ++n)
-        {
-          if(i >= HLIT + HDIST) ERROR_BREAK(15); /*error: i is larger than the amount of codes*/
-
-          if(i < HLIT) bitlen_ll[i] = 0;
-          else bitlen_d[i - HLIT] = 0;
-          ++i;
-        }
-      }
-      else /*if(code == (unsigned)(-1))*/ /*huffmanDecodeSymbol returns (unsigned)(-1) in case of error*/
-      {
-        if(code == (unsigned)(-1))
-        {
-          /*return error code 10 or 11 depending on the situation that happened in huffmanDecodeSymbol
-          (10=no endcode, 11=wrong jump outside of tree)*/
-          error = (*bp) > inbitlength ? 10 : 11;
-        }
-        else error = 16; /*unexisting code, this can never happen*/
-        break;
-      }
-    }
-    if(error) break;
-
-    if(bitlen_ll[256] == 0) ERROR_BREAK(64); /*the length of the end code 256 must be larger than 0*/
-
-    /*now we've finally got HLIT and HDIST, so generate the code trees, and the function is done*/
-    error = HuffmanTree_makeFromLengths(tree_ll, bitlen_ll, NUM_DEFLATE_CODE_SYMBOLS, 15);
-    if(error) break;
-    error = HuffmanTree_makeFromLengths(tree_d, bitlen_d, NUM_DISTANCE_SYMBOLS, 15);
-
-    break; /*end of error-while*/
-  }
-
-  lodepng_free(bitlen_cl);
-  lodepng_free(bitlen_ll);
-  lodepng_free(bitlen_d);
-  HuffmanTree_cleanup(&tree_cl);
-
-  return error;
-}
-
-/*inflate a block with dynamic of fixed Huffman tree*/
-static unsigned inflateHuffmanBlock(ucvector* out, const unsigned char* in, size_t* bp,
-                                    size_t* pos, size_t inlength, unsigned btype)
-{
-  unsigned error = 0;
-  HuffmanTree tree_ll; /*the huffman tree for literal and length codes*/
-  HuffmanTree tree_d; /*the huffman tree for distance codes*/
-  size_t inbitlength = inlength * 8;
-
-  HuffmanTree_init(&tree_ll);
-  HuffmanTree_init(&tree_d);
-
-  if(btype == 1) getTreeInflateFixed(&tree_ll, &tree_d);
-  else if(btype == 2) error = getTreeInflateDynamic(&tree_ll, &tree_d, in, bp, inlength);
-
-  while(!error) /*decode all symbols until end reached, breaks at end code*/
-  {
-    /*code_ll is literal, length or end code*/
-    unsigned code_ll = huffmanDecodeSymbol(in, bp, &tree_ll, inbitlength);
-    if(code_ll <= 255) /*literal symbol*/
-    {
-      /*ucvector_push_back would do the same, but for some reason the two lines below run 10% faster*/
-      if(!ucvector_resize(out, (*pos) + 1)) ERROR_BREAK(83 /*alloc fail*/);
-      out->data[*pos] = (unsigned char)code_ll;
-      ++(*pos);
-    }
-    else if(code_ll >= FIRST_LENGTH_CODE_INDEX && code_ll <= LAST_LENGTH_CODE_INDEX) /*length code*/
-    {
-      unsigned code_d, distance;
-      unsigned numextrabits_l, numextrabits_d; /*extra bits for length and distance*/
-      size_t start, forward, backward, length;
-
-      /*part 1: get length base*/
-      length = LENGTHBASE[code_ll - FIRST_LENGTH_CODE_INDEX];
-
-      /*part 2: get extra bits and add the value of that to length*/
-      numextrabits_l = LENGTHEXTRA[code_ll - FIRST_LENGTH_CODE_INDEX];
-      if((*bp + numextrabits_l) > inbitlength) ERROR_BREAK(51); /*error, bit pointer will jump past memory*/
-      length += readBitsFromStream(bp, in, numextrabits_l);
-
-      /*part 3: get distance code*/
-      code_d = huffmanDecodeSymbol(in, bp, &tree_d, inbitlength);
-      if(code_d > 29)
-      {
-        if(code_ll == (unsigned)(-1)) /*huffmanDecodeSymbol returns (unsigned)(-1) in case of error*/
-        {
-          /*return error code 10 or 11 depending on the situation that happened in huffmanDecodeSymbol
-          (10=no endcode, 11=wrong jump outside of tree)*/
-          error = (*bp) > inlength * 8 ? 10 : 11;
-        }
-        else error = 18; /*error: invalid distance code (30-31 are never used)*/
-        break;
-      }
-      distance = DISTANCEBASE[code_d];
-
-      /*part 4: get extra bits from distance*/
-      numextrabits_d = DISTANCEEXTRA[code_d];
-      if((*bp + numextrabits_d) > inbitlength) ERROR_BREAK(51); /*error, bit pointer will jump past memory*/
-      distance += readBitsFromStream(bp, in, numextrabits_d);
-
-      /*part 5: fill in all the out[n] values based on the length and dist*/
-      start = (*pos);
-      if(distance > start) ERROR_BREAK(52); /*too long backward distance*/
-      backward = start - distance;
-
-      if(!ucvector_resize(out, (*pos) + length)) ERROR_BREAK(83 /*alloc fail*/);
-      if (distance < length) {
-        for(forward = 0; forward < length; ++forward)
-        {
-          out->data[(*pos)++] = out->data[backward++];
-        }
-      } else {
-        memcpy(out->data + *pos, out->data + backward, length);
-        *pos += length;
-      }
-    }
-    else if(code_ll == 256)
-    {
-      break; /*end code, break the loop*/
-    }
-    else /*if(code == (unsigned)(-1))*/ /*huffmanDecodeSymbol returns (unsigned)(-1) in case of error*/
-    {
-      /*return error code 10 or 11 depending on the situation that happened in huffmanDecodeSymbol
-      (10=no endcode, 11=wrong jump outside of tree)*/
-      error = ((*bp) > inlength * 8) ? 10 : 11;
-      break;
-    }
-  }
-
-  HuffmanTree_cleanup(&tree_ll);
-  HuffmanTree_cleanup(&tree_d);
-
-  return error;
-}
-
-static unsigned inflateNoCompression(ucvector* out, const unsigned char* in, size_t* bp, size_t* pos, size_t inlength)
-{
-  size_t p;
-  unsigned LEN, NLEN, n, error = 0;
-
-  /*go to first boundary of byte*/
-  while(((*bp) & 0x7) != 0) ++(*bp);
-  p = (*bp) / 8; /*byte position*/
-
-  /*read LEN (2 bytes) and NLEN (2 bytes)*/
-  if(p + 4 >= inlength) return 52; /*error, bit pointer will jump past memory*/
-  LEN = in[p] + 256u * in[p + 1]; p += 2;
-  NLEN = in[p] + 256u * in[p + 1]; p += 2;
-
-  /*check if 16-bit NLEN is really the one's complement of LEN*/
-  if(LEN + NLEN != 65535) return 21; /*error: NLEN is not one's complement of LEN*/
-
-  if(!ucvector_resize(out, (*pos) + LEN)) return 83; /*alloc fail*/
-
-  /*read the literal data: LEN bytes are now stored in the out buffer*/
-  if(p + LEN > inlength) return 23; /*error: reading outside of in buffer*/
-  for(n = 0; n < LEN; ++n) out->data[(*pos)++] = in[p++];
-
-  (*bp) = p * 8;
-
-  return error;
-}
-
-static unsigned lodepng_inflatev(ucvector* out,
-                                 const unsigned char* in, size_t insize,
-                                 const LodePNGDecompressSettings* settings)
-{
-  /*bit pointer in the "in" data, current byte is bp >> 3, current bit is bp & 0x7 (from lsb to msb of the byte)*/
-  size_t bp = 0;
-  unsigned BFINAL = 0;
-  size_t pos = 0; /*byte position in the out buffer*/
-  unsigned error = 0;
-
-  (void)settings;
-
-  while(!BFINAL)
-  {
-    unsigned BTYPE;
-    if(bp + 2 >= insize * 8) return 52; /*error, bit pointer will jump past memory*/
-    BFINAL = readBitFromStream(&bp, in);
-    BTYPE = 1u * readBitFromStream(&bp, in);
-    BTYPE += 2u * readBitFromStream(&bp, in);
-
-    if(BTYPE == 3) return 20; /*error: invalid BTYPE*/
-    else if(BTYPE == 0) error = inflateNoCompression(out, in, &bp, &pos, insize); /*no compression*/
-    else error = inflateHuffmanBlock(out, in, &bp, &pos, insize, BTYPE); /*compression, BTYPE 01 or 10*/
-
-    if(error) return error;
-  }
-
-  return error;
-}
-
-unsigned lodepng_inflate(unsigned char** out, size_t* outsize,
-                         const unsigned char* in, size_t insize,
-                         const LodePNGDecompressSettings* settings)
-{
-  unsigned error;
-  ucvector v;
-  ucvector_init_buffer(&v, *out, *outsize);
-  error = lodepng_inflatev(&v, in, insize, settings);
-  *out = v.data;
-  *outsize = v.size;
-  return error;
-}
-
-static unsigned inflate(unsigned char** out, size_t* outsize,
-                        const unsigned char* in, size_t insize,
-                        const LodePNGDecompressSettings* settings)
-{
-  if(settings->custom_inflate)
-  {
-    return settings->custom_inflate(out, outsize, in, insize, settings);
-  }
-  else
-  {
-    return lodepng_inflate(out, outsize, in, insize, settings);
-  }
-}
-
-#endif /*LODEPNG_COMPILE_DECODER*/
-
-#ifdef LODEPNG_COMPILE_ENCODER
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* / Deflator (Compressor)                                                  / */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-static const size_t MAX_SUPPORTED_DEFLATE_LENGTH = 258;
-
-/*bitlen is the size in bits of the code*/
-static void addHuffmanSymbol(size_t* bp, ucvector* compressed, unsigned code, unsigned bitlen)
-{
-  addBitsToStreamReversed(bp, compressed, code, bitlen);
-}
-
-/*search the index in the array, that has the largest value smaller than or equal to the given value,
-given array must be sorted (if no value is smaller, it returns the size of the given array)*/
-static size_t searchCodeIndex(const unsigned* array, size_t array_size, size_t value)
-{
-  /*binary search (only small gain over linear). TODO: use CPU log2 instruction for getting symbols instead*/
-  size_t left = 1;
-  size_t right = array_size - 1;
-
-  while(left <= right) {
-    size_t mid = (left + right) >> 1;
-    if (array[mid] >= value) right = mid - 1;
-    else left = mid + 1;
-  }
-  if(left >= array_size || array[left] > value) left--;
-  return left;
-}
-
-static void addLengthDistance(uivector* values, size_t length, size_t distance)
-{
-  /*values in encoded vector are those used by deflate:
-  0-255: literal bytes
-  256: end
-  257-285: length/distance pair (length code, followed by extra length bits, distance code, extra distance bits)
-  286-287: invalid*/
-
-  unsigned length_code = (unsigned)searchCodeIndex(LENGTHBASE, 29, length);
-  unsigned extra_length = (unsigned)(length - LENGTHBASE[length_code]);
-  unsigned dist_code = (unsigned)searchCodeIndex(DISTANCEBASE, 30, distance);
-  unsigned extra_distance = (unsigned)(distance - DISTANCEBASE[dist_code]);
-
-  uivector_push_back(values, length_code + FIRST_LENGTH_CODE_INDEX);
-  uivector_push_back(values, extra_length);
-  uivector_push_back(values, dist_code);
-  uivector_push_back(values, extra_distance);
-}
-
-/*3 bytes of data get encoded into two bytes. The hash cannot use more than 3
-bytes as input because 3 is the minimum match length for deflate*/
-static const unsigned HASH_NUM_VALUES = 65536;
-static const unsigned HASH_BIT_MASK = 65535; /*HASH_NUM_VALUES - 1, but C90 does not like that as initializer*/
-
-typedef struct Hash
-{
-  int* head; /*hash value to head circular pos - can be outdated if went around window*/
-  /*circular pos to prev circular pos*/
-  unsigned short* chain;
-  int* val; /*circular pos to hash value*/
-
-  /*TODO: do this not only for zeros but for any repeated byte. However for PNG
-  it's always going to be the zeros that dominate, so not important for PNG*/
-  int* headz; /*similar to head, but for chainz*/
-  unsigned short* chainz; /*those with same amount of zeros*/
-  unsigned short* zeros; /*length of zeros streak, used as a second hash chain*/
-} Hash;
-
-static unsigned hash_init(Hash* hash, unsigned windowsize)
-{
-  unsigned i;
-  hash->head = (int*)lodepng_malloc(sizeof(int) * HASH_NUM_VALUES);
-  hash->val = (int*)lodepng_malloc(sizeof(int) * windowsize);
-  hash->chain = (unsigned short*)lodepng_malloc(sizeof(unsigned short) * windowsize);
-
-  hash->zeros = (unsigned short*)lodepng_malloc(sizeof(unsigned short) * windowsize);
-  hash->headz = (int*)lodepng_malloc(sizeof(int) * (MAX_SUPPORTED_DEFLATE_LENGTH + 1));
-  hash->chainz = (unsigned short*)lodepng_malloc(sizeof(unsigned short) * windowsize);
-
-  if(!hash->head || !hash->chain || !hash->val  || !hash->headz|| !hash->chainz || !hash->zeros)
-  {
-    return 83; /*alloc fail*/
-  }
-
-  /*initialize hash table*/
-  for(i = 0; i != HASH_NUM_VALUES; ++i) hash->head[i] = -1;
-  for(i = 0; i != windowsize; ++i) hash->val[i] = -1;
-  for(i = 0; i != windowsize; ++i) hash->chain[i] = i; /*same value as index indicates uninitialized*/
-
-  for(i = 0; i <= MAX_SUPPORTED_DEFLATE_LENGTH; ++i) hash->headz[i] = -1;
-  for(i = 0; i != windowsize; ++i) hash->chainz[i] = i; /*same value as index indicates uninitialized*/
-
-  return 0;
-}
-
-static void hash_cleanup(Hash* hash)
-{
-  lodepng_free(hash->head);
-  lodepng_free(hash->val);
-  lodepng_free(hash->chain);
-
-  lodepng_free(hash->zeros);
-  lodepng_free(hash->headz);
-  lodepng_free(hash->chainz);
-}
-
-
-
-static unsigned getHash(const unsigned char* data, size_t size, size_t pos)
-{
-  unsigned result = 0;
-  if(pos + 2 < size)
-  {
-    /*A simple shift and xor hash is used. Since the data of PNGs is dominated
-    by zeroes due to the filters, a better hash does not have a significant
-    effect on speed in traversing the chain, and causes more time spend on
-    calculating the hash.*/
-    result ^= (unsigned)(data[pos + 0] << 0u);
-    result ^= (unsigned)(data[pos + 1] << 4u);
-    result ^= (unsigned)(data[pos + 2] << 8u);
-  } else {
-    size_t amount, i;
-    if(pos >= size) return 0;
-    amount = size - pos;
-    for(i = 0; i != amount; ++i) result ^= (unsigned)(data[pos + i] << (i * 8u));
-  }
-  return result & HASH_BIT_MASK;
-}
-
-static unsigned countZeros(const unsigned char* data, size_t size, size_t pos)
-{
-  const unsigned char* start = data + pos;
-  const unsigned char* end = start + MAX_SUPPORTED_DEFLATE_LENGTH;
-  if(end > data + size) end = data + size;
-  data = start;
-  while(data != end && *data == 0) ++data;
-  /*subtracting two addresses returned as 32-bit number (max value is MAX_SUPPORTED_DEFLATE_LENGTH)*/
-  return (unsigned)(data - start);
-}
-
-/*wpos = pos & (windowsize - 1)*/
-static void updateHashChain(Hash* hash, size_t wpos, unsigned hashval, unsigned short numzeros)
-{
-  hash->val[wpos] = (int)hashval;
-  if(hash->head[hashval] != -1) hash->chain[wpos] = hash->head[hashval];
-  hash->head[hashval] = (int)wpos;
-
-  hash->zeros[wpos] = numzeros;
-  if(hash->headz[numzeros] != -1) hash->chainz[wpos] = hash->headz[numzeros];
-  hash->headz[numzeros] = (int)wpos;
-}
-
-/*
-LZ77-encode the data. Return value is error code. The input are raw bytes, the output
-is in the form of unsigned integers with codes representing for example literal bytes, or
-length/distance pairs.
-It uses a hash table technique to let it encode faster. When doing LZ77 encoding, a
-sliding window (of windowsize) is used, and all past bytes in that window can be used as
-the "dictionary". A brute force search through all possible distances would be slow, and
-this hash technique is one out of several ways to speed this up.
-*/
-static unsigned encodeLZ77(uivector* out, Hash* hash,
-                           const unsigned char* in, size_t inpos, size_t insize, unsigned windowsize,
-                           unsigned minmatch, unsigned nicematch, unsigned lazymatching)
-{
-  size_t pos;
-  unsigned i, error = 0;
-  /*for large window lengths, assume the user wants no compression loss. Otherwise, max hash chain length speedup.*/
-  unsigned maxchainlength = windowsize >= 8192 ? windowsize : windowsize / 8;
-  unsigned maxlazymatch = windowsize >= 8192 ? MAX_SUPPORTED_DEFLATE_LENGTH : 64;
-
-  unsigned usezeros = 1; /*not sure if setting it to false for windowsize < 8192 is better or worse*/
-  unsigned numzeros = 0;
-
-  unsigned offset; /*the offset represents the distance in LZ77 terminology*/
-  unsigned length;
-  unsigned lazy = 0;
-  unsigned lazylength = 0, lazyoffset = 0;
-  unsigned hashval;
-  unsigned current_offset, current_length;
-  unsigned prev_offset;
-  const unsigned char *lastptr, *foreptr, *backptr;
-  unsigned hashpos;
-
-  if(windowsize == 0 || windowsize > 32768) return 60; /*error: windowsize smaller/larger than allowed*/
-  if((windowsize & (windowsize - 1)) != 0) return 90; /*error: must be power of two*/
-
-  if(nicematch > MAX_SUPPORTED_DEFLATE_LENGTH) nicematch = MAX_SUPPORTED_DEFLATE_LENGTH;
-
-  for(pos = inpos; pos < insize; ++pos)
-  {
-    size_t wpos = pos & (windowsize - 1); /*position for in 'circular' hash buffers*/
-    unsigned chainlength = 0;
-
-    hashval = getHash(in, insize, pos);
-
-    if(usezeros && hashval == 0)
-    {
-      if(numzeros == 0) numzeros = countZeros(in, insize, pos);
-      else if(pos + numzeros > insize || in[pos + numzeros - 1] != 0) --numzeros;
-    }
-    else
-    {
-      numzeros = 0;
-    }
-
-    updateHashChain(hash, wpos, hashval, numzeros);
-
-    /*the length and offset found for the current position*/
-    length = 0;
-    offset = 0;
-
-    hashpos = hash->chain[wpos];
-
-    lastptr = &in[insize < pos + MAX_SUPPORTED_DEFLATE_LENGTH ? insize : pos + MAX_SUPPORTED_DEFLATE_LENGTH];
-
-    /*search for the longest string*/
-    prev_offset = 0;
-    for(;;)
-    {
-      if(chainlength++ >= maxchainlength) break;
-      current_offset = hashpos <= wpos ? (unsigned int)(wpos - hashpos) : (unsigned int)(wpos - hashpos + windowsize);
-
-      if(current_offset < prev_offset) break; /*stop when went completely around the circular buffer*/
-      prev_offset = current_offset;
-      if(current_offset > 0)
-      {
-        /*test the next characters*/
-        foreptr = &in[pos];
-        backptr = &in[pos - current_offset];
-
-        /*common case in PNGs is lots of zeros. Quickly skip over them as a speedup*/
-        if(numzeros >= 3)
-        {
-          unsigned skip = hash->zeros[hashpos];
-          if(skip > numzeros) skip = numzeros;
-          backptr += skip;
-          foreptr += skip;
-        }
-
-        while(foreptr != lastptr && *backptr == *foreptr) /*maximum supported length by deflate is max length*/
-        {
-          ++backptr;
-          ++foreptr;
-        }
-        current_length = (unsigned)(foreptr - &in[pos]);
-
-        if(current_length > length)
-        {
-          length = current_length; /*the longest length*/
-          offset = current_offset; /*the offset that is related to this longest length*/
-          /*jump out once a length of max length is found (speed gain). This also jumps
-          out if length is MAX_SUPPORTED_DEFLATE_LENGTH*/
-          if(current_length >= nicematch) break;
-        }
-      }
-
-      if(hashpos == hash->chain[hashpos]) break;
-
-      if(numzeros >= 3 && length > numzeros)
-      {
-        hashpos = hash->chainz[hashpos];
-        if(hash->zeros[hashpos] != numzeros) break;
-      }
-      else
-      {
-        hashpos = hash->chain[hashpos];
-        /*outdated hash value, happens if particular value was not encountered in whole last window*/
-        if(hash->val[hashpos] != (int)hashval) break;
-      }
-    }
-
-    if(lazymatching)
-    {
-      if(!lazy && length >= 3 && length <= maxlazymatch && length < MAX_SUPPORTED_DEFLATE_LENGTH)
-      {
-        lazy = 1;
-        lazylength = length;
-        lazyoffset = offset;
-        continue; /*try the next byte*/
-      }
-      if(lazy)
-      {
-        lazy = 0;
-        if(pos == 0) ERROR_BREAK(81);
-        if(length > lazylength + 1)
-        {
-          /*push the previous character as literal*/
-          if(!uivector_push_back(out, in[pos - 1])) ERROR_BREAK(83 /*alloc fail*/);
-        }
-        else
-        {
-          length = lazylength;
-          offset = lazyoffset;
-          hash->head[hashval] = -1; /*the same hashchain update will be done, this ensures no wrong alteration*/
-          hash->headz[numzeros] = -1; /*idem*/
-          --pos;
-        }
-      }
-    }
-    if(length >= 3 && offset > windowsize) ERROR_BREAK(86 /*too big (or overflown negative) offset*/);
-
-    /*encode it as length/distance pair or literal value*/
-    if(length < 3) /*only lengths of 3 or higher are supported as length/distance pair*/
-    {
-      if(!uivector_push_back(out, in[pos])) ERROR_BREAK(83 /*alloc fail*/);
-    }
-    else if(length < minmatch || (length == 3 && offset > 4096))
-    {
-      /*compensate for the fact that longer offsets have more extra bits, a
-      length of only 3 may be not worth it then*/
-      if(!uivector_push_back(out, in[pos])) ERROR_BREAK(83 /*alloc fail*/);
-    }
-    else
-    {
-      addLengthDistance(out, length, offset);
-      for(i = 1; i < length; ++i)
-      {
-        ++pos;
-        wpos = pos & (windowsize - 1);
-        hashval = getHash(in, insize, pos);
-        if(usezeros && hashval == 0)
-        {
-          if(numzeros == 0) numzeros = countZeros(in, insize, pos);
-          else if(pos + numzeros > insize || in[pos + numzeros - 1] != 0) --numzeros;
-        }
-        else
-        {
-          numzeros = 0;
-        }
-        updateHashChain(hash, wpos, hashval, numzeros);
-      }
-    }
-  } /*end of the loop through each character of input*/
-
-  return error;
-}
-
-/* /////////////////////////////////////////////////////////////////////////// */
-
-static unsigned deflateNoCompression(ucvector* out, const unsigned char* data, size_t datasize)
-{
-  /*non compressed deflate block data: 1 bit BFINAL,2 bits BTYPE,(5 bits): it jumps to start of next byte,
-  2 bytes LEN, 2 bytes NLEN, LEN bytes literal DATA*/
-
-  size_t i, j, numdeflateblocks = (datasize + 65534) / 65535;
-  unsigned datapos = 0;
-  for(i = 0; i != numdeflateblocks; ++i)
-  {
-    unsigned BFINAL, BTYPE, LEN, NLEN;
-    unsigned char firstbyte;
-
-    BFINAL = (i == numdeflateblocks - 1);
-    BTYPE = 0;
-
-    firstbyte = (unsigned char)(BFINAL + ((BTYPE & 1) << 1) + ((BTYPE & 2) << 1));
-    ucvector_push_back(out, firstbyte);
-
-    LEN = 65535;
-    if(datasize - datapos < 65535) LEN = (unsigned)datasize - datapos;
-    NLEN = 65535 - LEN;
-
-    ucvector_push_back(out, (unsigned char)(LEN & 255));
-    ucvector_push_back(out, (unsigned char)(LEN >> 8));
-    ucvector_push_back(out, (unsigned char)(NLEN & 255));
-    ucvector_push_back(out, (unsigned char)(NLEN >> 8));
-
-    /*Decompressed data*/
-    for(j = 0; j < 65535 && datapos < datasize; ++j)
-    {
-      ucvector_push_back(out, data[datapos++]);
-    }
-  }
-
-  return 0;
-}
-
-/*
-write the lz77-encoded data, which has lit, len and dist codes, to compressed stream using huffman trees.
-tree_ll: the tree for lit and len codes.
-tree_d: the tree for distance codes.
-*/
-static void writeLZ77data(size_t* bp, ucvector* out, const uivector* lz77_encoded,
-                          const HuffmanTree* tree_ll, const HuffmanTree* tree_d)
-{
-  size_t i = 0;
-  for(i = 0; i != lz77_encoded->size; ++i)
-  {
-    unsigned val = lz77_encoded->data[i];
-    addHuffmanSymbol(bp, out, HuffmanTree_getCode(tree_ll, val), HuffmanTree_getLength(tree_ll, val));
-    if(val > 256) /*for a length code, 3 more things have to be added*/
-    {
-      unsigned length_index = val - FIRST_LENGTH_CODE_INDEX;
-      unsigned n_length_extra_bits = LENGTHEXTRA[length_index];
-      unsigned length_extra_bits = lz77_encoded->data[++i];
-
-      unsigned distance_code = lz77_encoded->data[++i];
-
-      unsigned distance_index = distance_code;
-      unsigned n_distance_extra_bits = DISTANCEEXTRA[distance_index];
-      unsigned distance_extra_bits = lz77_encoded->data[++i];
-
-      addBitsToStream(bp, out, length_extra_bits, n_length_extra_bits);
-      addHuffmanSymbol(bp, out, HuffmanTree_getCode(tree_d, distance_code),
-                       HuffmanTree_getLength(tree_d, distance_code));
-      addBitsToStream(bp, out, distance_extra_bits, n_distance_extra_bits);
-    }
-  }
-}
-
-/*Deflate for a block of type "dynamic", that is, with freely, optimally, created huffman trees*/
-static unsigned deflateDynamic(ucvector* out, size_t* bp, Hash* hash,
-                               const unsigned char* data, size_t datapos, size_t dataend,
-                               const LodePNGCompressSettings* settings, unsigned final)
-{
-  unsigned error = 0;
-
-  /*
-  A block is compressed as follows: The PNG data is lz77 encoded, resulting in
-  literal bytes and length/distance pairs. This is then huffman compressed with
-  two huffman trees. One huffman tree is used for the lit and len values ("ll"),
-  another huffman tree is used for the dist values ("d"). These two trees are
-  stored using their code lengths, and to compress even more these code lengths
-  are also run-length encoded and huffman compressed. This gives a huffman tree
-  of code lengths "cl". The code lenghts used to describe this third tree are
-  the code length code lengths ("clcl").
-  */
-
-  /*The lz77 encoded data, represented with integers since there will also be length and distance codes in it*/
-  uivector lz77_encoded;
-  HuffmanTree tree_ll; /*tree for lit,len values*/
-  HuffmanTree tree_d; /*tree for distance codes*/
-  HuffmanTree tree_cl; /*tree for encoding the code lengths representing tree_ll and tree_d*/
-  uivector frequencies_ll; /*frequency of lit,len codes*/
-  uivector frequencies_d; /*frequency of dist codes*/
-  uivector frequencies_cl; /*frequency of code length codes*/
-  uivector bitlen_lld; /*lit,len,dist code lenghts (int bits), literally (without repeat codes).*/
-  uivector bitlen_lld_e; /*bitlen_lld encoded with repeat codes (this is a rudemtary run length compression)*/
-  /*bitlen_cl is the code length code lengths ("clcl"). The bit lengths of codes to represent tree_cl
-  (these are written as is in the file, it would be crazy to compress these using yet another huffman
-  tree that needs to be represented by yet another set of code lengths)*/
-  uivector bitlen_cl;
-  size_t datasize = dataend - datapos;
-
-  /*
-  Due to the huffman compression of huffman tree representations ("two levels"), there are some anologies:
-  bitlen_lld is to tree_cl what data is to tree_ll and tree_d.
-  bitlen_lld_e is to bitlen_lld what lz77_encoded is to data.
-  bitlen_cl is to bitlen_lld_e what bitlen_lld is to lz77_encoded.
-  */
-
-  unsigned BFINAL = final;
-  size_t numcodes_ll, numcodes_d, i;
-  unsigned HLIT, HDIST, HCLEN;
-
-  uivector_init(&lz77_encoded);
-  HuffmanTree_init(&tree_ll);
-  HuffmanTree_init(&tree_d);
-  HuffmanTree_init(&tree_cl);
-  uivector_init(&frequencies_ll);
-  uivector_init(&frequencies_d);
-  uivector_init(&frequencies_cl);
-  uivector_init(&bitlen_lld);
-  uivector_init(&bitlen_lld_e);
-  uivector_init(&bitlen_cl);
-
-  /*This while loop never loops due to a break at the end, it is here to
-  allow breaking out of it to the cleanup phase on error conditions.*/
-  while(!error)
-  {
-    if(settings->use_lz77)
-    {
-      error = encodeLZ77(&lz77_encoded, hash, data, datapos, dataend, settings->windowsize,
-                         settings->minmatch, settings->nicematch, settings->lazymatching);
-      if(error) break;
-    }
-    else
-    {
-      if(!uivector_resize(&lz77_encoded, datasize)) ERROR_BREAK(83 /*alloc fail*/);
-      for(i = datapos; i < dataend; ++i) lz77_encoded.data[i - datapos] = data[i]; /*no LZ77, but still will be Huffman compressed*/
-    }
-
-    if(!uivector_resizev(&frequencies_ll, 286, 0)) ERROR_BREAK(83 /*alloc fail*/);
-    if(!uivector_resizev(&frequencies_d, 30, 0)) ERROR_BREAK(83 /*alloc fail*/);
-
-    /*Count the frequencies of lit, len and dist codes*/
-    for(i = 0; i != lz77_encoded.size; ++i)
-    {
-      unsigned symbol = lz77_encoded.data[i];
-      ++frequencies_ll.data[symbol];
-      if(symbol > 256)
-      {
-        unsigned dist = lz77_encoded.data[i + 2];
-        ++frequencies_d.data[dist];
-        i += 3;
-      }
-    }
-    frequencies_ll.data[256] = 1; /*there will be exactly 1 end code, at the end of the block*/
-
-    /*Make both huffman trees, one for the lit and len codes, one for the dist codes*/
-    error = HuffmanTree_makeFromFrequencies(&tree_ll, frequencies_ll.data, 257, frequencies_ll.size, 15);
-    if(error) break;
-    /*2, not 1, is chosen for mincodes: some buggy PNG decoders require at least 2 symbols in the dist tree*/
-    error = HuffmanTree_makeFromFrequencies(&tree_d, frequencies_d.data, 2, frequencies_d.size, 15);
-    if(error) break;
-
-    numcodes_ll = tree_ll.numcodes; if(numcodes_ll > 286) numcodes_ll = 286;
-    numcodes_d = tree_d.numcodes; if(numcodes_d > 30) numcodes_d = 30;
-    /*store the code lengths of both generated trees in bitlen_lld*/
-    for(i = 0; i != numcodes_ll; ++i) uivector_push_back(&bitlen_lld, HuffmanTree_getLength(&tree_ll, (unsigned)i));
-    for(i = 0; i != numcodes_d; ++i) uivector_push_back(&bitlen_lld, HuffmanTree_getLength(&tree_d, (unsigned)i));
-
-    /*run-length compress bitlen_ldd into bitlen_lld_e by using repeat codes 16 (copy length 3-6 times),
-    17 (3-10 zeroes), 18 (11-138 zeroes)*/
-    for(i = 0; i != (unsigned)bitlen_lld.size; ++i)
-    {
-      unsigned j = 0; /*amount of repititions*/
-      while(i + j + 1 < (unsigned)bitlen_lld.size && bitlen_lld.data[i + j + 1] == bitlen_lld.data[i]) ++j;
-
-      if(bitlen_lld.data[i] == 0 && j >= 2) /*repeat code for zeroes*/
-      {
-        ++j; /*include the first zero*/
-        if(j <= 10) /*repeat code 17 supports max 10 zeroes*/
-        {
-          uivector_push_back(&bitlen_lld_e, 17);
-          uivector_push_back(&bitlen_lld_e, j - 3);
-        }
-        else /*repeat code 18 supports max 138 zeroes*/
-        {
-          if(j > 138) j = 138;
-          uivector_push_back(&bitlen_lld_e, 18);
-          uivector_push_back(&bitlen_lld_e, j - 11);
-        }
-        i += (j - 1);
-      }
-      else if(j >= 3) /*repeat code for value other than zero*/
-      {
-        size_t k;
-        unsigned num = j / 6, rest = j % 6;
-        uivector_push_back(&bitlen_lld_e, bitlen_lld.data[i]);
-        for(k = 0; k < num; ++k)
-        {
-          uivector_push_back(&bitlen_lld_e, 16);
-          uivector_push_back(&bitlen_lld_e, 6 - 3);
-        }
-        if(rest >= 3)
-        {
-          uivector_push_back(&bitlen_lld_e, 16);
-          uivector_push_back(&bitlen_lld_e, rest - 3);
-        }
-        else j -= rest;
-        i += j;
-      }
-      else /*too short to benefit from repeat code*/
-      {
-        uivector_push_back(&bitlen_lld_e, bitlen_lld.data[i]);
-      }
-    }
-
-    /*generate tree_cl, the huffmantree of huffmantrees*/
-
-    if(!uivector_resizev(&frequencies_cl, NUM_CODE_LENGTH_CODES, 0)) ERROR_BREAK(83 /*alloc fail*/);
-    for(i = 0; i != bitlen_lld_e.size; ++i)
-    {
-      ++frequencies_cl.data[bitlen_lld_e.data[i]];
-      /*after a repeat code come the bits that specify the number of repetitions,
-      those don't need to be in the frequencies_cl calculation*/
-      if(bitlen_lld_e.data[i] >= 16) ++i;
-    }
-
-    error = HuffmanTree_makeFromFrequencies(&tree_cl, frequencies_cl.data,
-                                            frequencies_cl.size, frequencies_cl.size, 7);
-    if(error) break;
-
-    if(!uivector_resize(&bitlen_cl, tree_cl.numcodes)) ERROR_BREAK(83 /*alloc fail*/);
-    for(i = 0; i != tree_cl.numcodes; ++i)
-    {
-      /*lenghts of code length tree is in the order as specified by deflate*/
-      bitlen_cl.data[i] = HuffmanTree_getLength(&tree_cl, CLCL_ORDER[i]);
-    }
-    while(bitlen_cl.data[bitlen_cl.size - 1] == 0 && bitlen_cl.size > 4)
-    {
-      /*remove zeros at the end, but minimum size must be 4*/
-      if(!uivector_resize(&bitlen_cl, bitlen_cl.size - 1)) ERROR_BREAK(83 /*alloc fail*/);
-    }
-    if(error) break;
-
-    /*
-    Write everything into the output
-
-    After the BFINAL and BTYPE, the dynamic block consists out of the following:
-    - 5 bits HLIT, 5 bits HDIST, 4 bits HCLEN
-    - (HCLEN+4)*3 bits code lengths of code length alphabet
-    - HLIT + 257 code lenghts of lit/length alphabet (encoded using the code length
-      alphabet, + possible repetition codes 16, 17, 18)
-    - HDIST + 1 code lengths of distance alphabet (encoded using the code length
-      alphabet, + possible repetition codes 16, 17, 18)
-    - compressed data
-    - 256 (end code)
-    */
-
-    /*Write block type*/
-    addBitToStream(bp, out, BFINAL);
-    addBitToStream(bp, out, 0); /*first bit of BTYPE "dynamic"*/
-    addBitToStream(bp, out, 1); /*second bit of BTYPE "dynamic"*/
-
-    /*write the HLIT, HDIST and HCLEN values*/
-    HLIT = (unsigned)(numcodes_ll - 257);
-    HDIST = (unsigned)(numcodes_d - 1);
-    HCLEN = (unsigned)bitlen_cl.size - 4;
-    /*trim zeroes for HCLEN. HLIT and HDIST were already trimmed at tree creation*/
-    while(!bitlen_cl.data[HCLEN + 4 - 1] && HCLEN > 0) --HCLEN;
-    addBitsToStream(bp, out, HLIT, 5);
-    addBitsToStream(bp, out, HDIST, 5);
-    addBitsToStream(bp, out, HCLEN, 4);
-
-    /*write the code lenghts of the code length alphabet*/
-    for(i = 0; i != HCLEN + 4; ++i) addBitsToStream(bp, out, bitlen_cl.data[i], 3);
-
-    /*write the lenghts of the lit/len AND the dist alphabet*/
-    for(i = 0; i != bitlen_lld_e.size; ++i)
-    {
-      addHuffmanSymbol(bp, out, HuffmanTree_getCode(&tree_cl, bitlen_lld_e.data[i]),
-                       HuffmanTree_getLength(&tree_cl, bitlen_lld_e.data[i]));
-      /*extra bits of repeat codes*/
-      if(bitlen_lld_e.data[i] == 16) addBitsToStream(bp, out, bitlen_lld_e.data[++i], 2);
-      else if(bitlen_lld_e.data[i] == 17) addBitsToStream(bp, out, bitlen_lld_e.data[++i], 3);
-      else if(bitlen_lld_e.data[i] == 18) addBitsToStream(bp, out, bitlen_lld_e.data[++i], 7);
-    }
-
-    /*write the compressed data symbols*/
-    writeLZ77data(bp, out, &lz77_encoded, &tree_ll, &tree_d);
-    /*error: the length of the end code 256 must be larger than 0*/
-    if(HuffmanTree_getLength(&tree_ll, 256) == 0) ERROR_BREAK(64);
-
-    /*write the end code*/
-    addHuffmanSymbol(bp, out, HuffmanTree_getCode(&tree_ll, 256), HuffmanTree_getLength(&tree_ll, 256));
-
-    break; /*end of error-while*/
-  }
-
-  /*cleanup*/
-  uivector_cleanup(&lz77_encoded);
-  HuffmanTree_cleanup(&tree_ll);
-  HuffmanTree_cleanup(&tree_d);
-  HuffmanTree_cleanup(&tree_cl);
-  uivector_cleanup(&frequencies_ll);
-  uivector_cleanup(&frequencies_d);
-  uivector_cleanup(&frequencies_cl);
-  uivector_cleanup(&bitlen_lld_e);
-  uivector_cleanup(&bitlen_lld);
-  uivector_cleanup(&bitlen_cl);
-
-  return error;
-}
-
-static unsigned deflateFixed(ucvector* out, size_t* bp, Hash* hash,
-                             const unsigned char* data,
-                             size_t datapos, size_t dataend,
-                             const LodePNGCompressSettings* settings, unsigned final)
-{
-  HuffmanTree tree_ll; /*tree for literal values and length codes*/
-  HuffmanTree tree_d; /*tree for distance codes*/
-
-  unsigned BFINAL = final;
-  unsigned error = 0;
-  size_t i;
-
-  HuffmanTree_init(&tree_ll);
-  HuffmanTree_init(&tree_d);
-
-  generateFixedLitLenTree(&tree_ll);
-  generateFixedDistanceTree(&tree_d);
-
-  addBitToStream(bp, out, BFINAL);
-  addBitToStream(bp, out, 1); /*first bit of BTYPE*/
-  addBitToStream(bp, out, 0); /*second bit of BTYPE*/
-
-  if(settings->use_lz77) /*LZ77 encoded*/
-  {
-    uivector lz77_encoded;
-    uivector_init(&lz77_encoded);
-    error = encodeLZ77(&lz77_encoded, hash, data, datapos, dataend, settings->windowsize,
-                       settings->minmatch, settings->nicematch, settings->lazymatching);
-    if(!error) writeLZ77data(bp, out, &lz77_encoded, &tree_ll, &tree_d);
-    uivector_cleanup(&lz77_encoded);
-  }
-  else /*no LZ77, but still will be Huffman compressed*/
-  {
-    for(i = datapos; i < dataend; ++i)
-    {
-      addHuffmanSymbol(bp, out, HuffmanTree_getCode(&tree_ll, data[i]), HuffmanTree_getLength(&tree_ll, data[i]));
-    }
-  }
-  /*add END code*/
-  if(!error) addHuffmanSymbol(bp, out, HuffmanTree_getCode(&tree_ll, 256), HuffmanTree_getLength(&tree_ll, 256));
-
-  /*cleanup*/
-  HuffmanTree_cleanup(&tree_ll);
-  HuffmanTree_cleanup(&tree_d);
-
-  return error;
-}
-
-static unsigned lodepng_deflatev(ucvector* out, const unsigned char* in, size_t insize,
-                                 const LodePNGCompressSettings* settings)
-{
-  unsigned error = 0;
-  size_t i, blocksize, numdeflateblocks;
-  size_t bp = 0; /*the bit pointer*/
-  Hash hash;
-
-  if(settings->btype > 2) return 61;
-  else if(settings->btype == 0) return deflateNoCompression(out, in, insize);
-  else if(settings->btype == 1) blocksize = insize;
-  else /*if(settings->btype == 2)*/
-  {
-    /*on PNGs, deflate blocks of 65-262k seem to give most dense encoding*/
-    blocksize = insize / 8 + 8;
-    if(blocksize < 65536) blocksize = 65536;
-    if(blocksize > 262144) blocksize = 262144;
-  }
-
-  numdeflateblocks = (insize + blocksize - 1) / blocksize;
-  if(numdeflateblocks == 0) numdeflateblocks = 1;
-
-  error = hash_init(&hash, settings->windowsize);
-  if(error) return error;
-
-  for(i = 0; i != numdeflateblocks && !error; ++i)
-  {
-    unsigned final = (i == numdeflateblocks - 1);
-    size_t start = i * blocksize;
-    size_t end = start + blocksize;
-    if(end > insize) end = insize;
-
-    if(settings->btype == 1) error = deflateFixed(out, &bp, &hash, in, start, end, settings, final);
-    else if(settings->btype == 2) error = deflateDynamic(out, &bp, &hash, in, start, end, settings, final);
-  }
-
-  hash_cleanup(&hash);
-
-  return error;
-}
-
-unsigned lodepng_deflate(unsigned char** out, size_t* outsize,
-                         const unsigned char* in, size_t insize,
-                         const LodePNGCompressSettings* settings)
-{
-  unsigned error;
-  ucvector v;
-  ucvector_init_buffer(&v, *out, *outsize);
-  error = lodepng_deflatev(&v, in, insize, settings);
-  *out = v.data;
-  *outsize = v.size;
-  return error;
-}
-
-static unsigned deflate(unsigned char** out, size_t* outsize,
-                        const unsigned char* in, size_t insize,
-                        const LodePNGCompressSettings* settings)
-{
-  if(settings->custom_deflate)
-  {
-    return settings->custom_deflate(out, outsize, in, insize, settings);
-  }
-  else
-  {
-    return lodepng_deflate(out, outsize, in, insize, settings);
-  }
-}
-
-#endif /*LODEPNG_COMPILE_DECODER*/
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* / Adler32                                                                  */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-static unsigned update_adler32(unsigned adler, const unsigned char* data, unsigned len)
-{
-   unsigned s1 = adler & 0xffff;
-   unsigned s2 = (adler >> 16) & 0xffff;
-
-  while(len > 0)
-  {
-    /*at least 5550 sums can be done before the sums overflow, saving a lot of module divisions*/
-    unsigned amount = len > 5550 ? 5550 : len;
-    len -= amount;
-    while(amount > 0)
-    {
-      s1 += (*data++);
-      s2 += s1;
-      --amount;
-    }
-    s1 %= 65521;
-    s2 %= 65521;
-  }
-
-  return (s2 << 16) | s1;
-}
-
-/*Return the adler32 of the bytes data[0..len-1]*/
-static unsigned adler32(const unsigned char* data, unsigned len)
-{
-  return update_adler32(1L, data, len);
-}
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* / Zlib                                                                   / */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-#ifdef LODEPNG_COMPILE_DECODER
-
-unsigned lodepng_zlib_decompress(unsigned char** out, size_t* outsize, const unsigned char* in,
-                                 size_t insize, const LodePNGDecompressSettings* settings)
-{
-  unsigned error = 0;
-  unsigned CM, CINFO, FDICT;
-
-  if(insize < 2) return 53; /*error, size of zlib data too small*/
-  /*read information from zlib header*/
-  if((in[0] * 256 + in[1]) % 31 != 0)
-  {
-    /*error: 256 * in[0] + in[1] must be a multiple of 31, the FCHECK value is supposed to be made that way*/
-    return 24;
-  }
-
-  CM = in[0] & 15;
-  CINFO = (in[0] >> 4) & 15;
-  /*FCHECK = in[1] & 31;*/ /*FCHECK is already tested above*/
-  FDICT = (in[1] >> 5) & 1;
-  /*FLEVEL = (in[1] >> 6) & 3;*/ /*FLEVEL is not used here*/
-
-  if(CM != 8 || CINFO > 7)
-  {
-    /*error: only compression method 8: inflate with sliding window of 32k is supported by the PNG spec*/
-    return 25;
-  }
-  if(FDICT != 0)
-  {
-    /*error: the specification of PNG says about the zlib stream:
-      "The additional flags shall not specify a preset dictionary."*/
-    return 26;
-  }
-
-  error = inflate(out, outsize, in + 2, insize - 2, settings);
-  if(error) return error;
-
-  if(!settings->ignore_adler32)
-  {
-    unsigned ADLER32 = lodepng_read32bitInt(&in[insize - 4]);
-    unsigned checksum = adler32(*out, (unsigned)(*outsize));
-    if(checksum != ADLER32) return 58; /*error, adler checksum not correct, data must be corrupted*/
-  }
-
-  return 0; /*no error*/
-}
-
-static unsigned zlib_decompress(unsigned char** out, size_t* outsize, const unsigned char* in,
-                                size_t insize, const LodePNGDecompressSettings* settings)
-{
-  if(settings->custom_zlib)
-  {
-    return settings->custom_zlib(out, outsize, in, insize, settings);
-  }
-  else
-  {
-    return lodepng_zlib_decompress(out, outsize, in, insize, settings);
-  }
-}
-
-#endif /*LODEPNG_COMPILE_DECODER*/
-
-#ifdef LODEPNG_COMPILE_ENCODER
-
-unsigned lodepng_zlib_compress(unsigned char** out, size_t* outsize, const unsigned char* in,
-                               size_t insize, const LodePNGCompressSettings* settings)
-{
-  /*initially, *out must be NULL and outsize 0, if you just give some random *out
-  that's pointing to a non allocated buffer, this'll crash*/
-  ucvector outv;
-  size_t i;
-  unsigned error;
-  unsigned char* deflatedata = 0;
-  size_t deflatesize = 0;
-
-  /*zlib data: 1 byte CMF (CM+CINFO), 1 byte FLG, deflate data, 4 byte ADLER32 checksum of the Decompressed data*/
-  unsigned CMF = 120; /*0b01111000: CM 8, CINFO 7. With CINFO 7, any window size up to 32768 can be used.*/
-  unsigned FLEVEL = 0;
-  unsigned FDICT = 0;
-  unsigned CMFFLG = 256 * CMF + FDICT * 32 + FLEVEL * 64;
-  unsigned FCHECK = 31 - CMFFLG % 31;
-  CMFFLG += FCHECK;
-
-  /*ucvector-controlled version of the output buffer, for dynamic array*/
-  ucvector_init_buffer(&outv, *out, *outsize);
-
-  ucvector_push_back(&outv, (unsigned char)(CMFFLG >> 8));
-  ucvector_push_back(&outv, (unsigned char)(CMFFLG & 255));
-
-  error = deflate(&deflatedata, &deflatesize, in, insize, settings);
-
-  if(!error)
-  {
-    unsigned ADLER32 = adler32(in, (unsigned)insize);
-    for(i = 0; i != deflatesize; ++i) ucvector_push_back(&outv, deflatedata[i]);
-    lodepng_free(deflatedata);
-    lodepng_add32bitInt(&outv, ADLER32);
-  }
-
-  *out = outv.data;
-  *outsize = outv.size;
-
-  return error;
-}
-
-/* compress using the default or custom zlib function */
-static unsigned zlib_compress(unsigned char** out, size_t* outsize, const unsigned char* in,
-                              size_t insize, const LodePNGCompressSettings* settings)
-{
-  if(settings->custom_zlib)
-  {
-    return settings->custom_zlib(out, outsize, in, insize, settings);
-  }
-  else
-  {
-    return lodepng_zlib_compress(out, outsize, in, insize, settings);
-  }
-}
-
-#endif /*LODEPNG_COMPILE_ENCODER*/
-
-#else /*no LODEPNG_COMPILE_ZLIB*/
-
-#ifdef LODEPNG_COMPILE_DECODER
-static unsigned zlib_decompress(unsigned char** out, size_t* outsize, const unsigned char* in,
-                                size_t insize, const LodePNGDecompressSettings* settings)
-{
-  if(!settings->custom_zlib) return 87; /*no custom zlib function provided */
-  return settings->custom_zlib(out, outsize, in, insize, settings);
-}
-#endif /*LODEPNG_COMPILE_DECODER*/
-#ifdef LODEPNG_COMPILE_ENCODER
-static unsigned zlib_compress(unsigned char** out, size_t* outsize, const unsigned char* in,
-                              size_t insize, const LodePNGCompressSettings* settings)
-{
-  if(!settings->custom_zlib) return 87; /*no custom zlib function provided */
-  return settings->custom_zlib(out, outsize, in, insize, settings);
-}
-#endif /*LODEPNG_COMPILE_ENCODER*/
-
-#endif /*LODEPNG_COMPILE_ZLIB*/
-
-/* ////////////////////////////////////////////////////////////////////////// */
-
-#ifdef LODEPNG_COMPILE_ENCODER
-
-/*this is a good tradeoff between speed and compression ratio*/
-#define DEFAULT_WINDOWSIZE 2048
-
-void lodepng_compress_settings_init(LodePNGCompressSettings* settings)
-{
-  /*compress with dynamic huffman tree (not in the mathematical sense, just not the predefined one)*/
-  settings->btype = 2;
-  settings->use_lz77 = 1;
-  settings->windowsize = DEFAULT_WINDOWSIZE;
-  settings->minmatch = 3;
-  settings->nicematch = 128;
-  settings->lazymatching = 1;
-
-  settings->custom_zlib = 0;
-  settings->custom_deflate = 0;
-  settings->custom_context = 0;
-}
-
-const LodePNGCompressSettings lodepng_default_compress_settings = {2, 1, DEFAULT_WINDOWSIZE, 3, 128, 1, 0, 0, 0};
-
-
-#endif /*LODEPNG_COMPILE_ENCODER*/
-
-#ifdef LODEPNG_COMPILE_DECODER
-
-void lodepng_decompress_settings_init(LodePNGDecompressSettings* settings)
-{
-  settings->ignore_adler32 = 0;
-
-  settings->custom_zlib = 0;
-  settings->custom_inflate = 0;
-  settings->custom_context = 0;
-}
-
-const LodePNGDecompressSettings lodepng_default_decompress_settings = {0, 0, 0, 0};
-
-#endif /*LODEPNG_COMPILE_DECODER*/
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* ////////////////////////////////////////////////////////////////////////// */
-/* // End of Zlib related code. Begin of PNG related code.                 // */
-/* ////////////////////////////////////////////////////////////////////////// */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-#ifdef LODEPNG_COMPILE_PNG
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* / CRC32                                                                  / */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-
-#ifndef LODEPNG_NO_COMPILE_CRC
-/* CRC polynomial: 0xedb88320 */
-static unsigned lodepng_crc32_table[256] = {
-           0u, 1996959894u, 3993919788u, 2567524794u,  124634137u, 1886057615u, 3915621685u, 2657392035u,
-   249268274u, 2044508324u, 3772115230u, 2547177864u,  162941995u, 2125561021u, 3887607047u, 2428444049u,
-   498536548u, 1789927666u, 4089016648u, 2227061214u,  450548861u, 1843258603u, 4107580753u, 2211677639u,
-   325883990u, 1684777152u, 4251122042u, 2321926636u,  335633487u, 1661365465u, 4195302755u, 2366115317u,
-   997073096u, 1281953886u, 3579855332u, 2724688242u, 1006888145u, 1258607687u, 3524101629u, 2768942443u,
-   901097722u, 1119000684u, 3686517206u, 2898065728u,  853044451u, 1172266101u, 3705015759u, 2882616665u,
-   651767980u, 1373503546u, 3369554304u, 3218104598u,  565507253u, 1454621731u, 3485111705u, 3099436303u,
-   671266974u, 1594198024u, 3322730930u, 2970347812u,  795835527u, 1483230225u, 3244367275u, 3060149565u,
-  1994146192u,   31158534u, 2563907772u, 4023717930u, 1907459465u,  112637215u, 2680153253u, 3904427059u,
-  2013776290u,  251722036u, 2517215374u, 3775830040u, 2137656763u,  141376813u, 2439277719u, 3865271297u,
-  1802195444u,  476864866u, 2238001368u, 4066508878u, 1812370925u,  453092731u, 2181625025u, 4111451223u,
-  1706088902u,  314042704u, 2344532202u, 4240017532u, 1658658271u,  366619977u, 2362670323u, 4224994405u,
-  1303535960u,  984961486u, 2747007092u, 3569037538u, 1256170817u, 1037604311u, 2765210733u, 3554079995u,
-  1131014506u,  879679996u, 2909243462u, 3663771856u, 1141124467u,  855842277u, 2852801631u, 3708648649u,
-  1342533948u,  654459306u, 3188396048u, 3373015174u, 1466479909u,  544179635u, 3110523913u, 3462522015u,
-  1591671054u,  702138776u, 2966460450u, 3352799412u, 1504918807u,  783551873u, 3082640443u, 3233442989u,
-  3988292384u, 2596254646u,   62317068u, 1957810842u, 3939845945u, 2647816111u,   81470997u, 1943803523u,
-  3814918930u, 2489596804u,  225274430u, 2053790376u, 3826175755u, 2466906013u,  167816743u, 2097651377u,
-  4027552580u, 2265490386u,  503444072u, 1762050814u, 4150417245u, 2154129355u,  426522225u, 1852507879u,
-  4275313526u, 2312317920u,  282753626u, 1742555852u, 4189708143u, 2394877945u,  397917763u, 1622183637u,
-  3604390888u, 2714866558u,  953729732u, 1340076626u, 3518719985u, 2797360999u, 1068828381u, 1219638859u,
-  3624741850u, 2936675148u,  906185462u, 1090812512u, 3747672003u, 2825379669u,  829329135u, 1181335161u,
-  3412177804u, 3160834842u,  628085408u, 1382605366u, 3423369109u, 3138078467u,  570562233u, 1426400815u,
-  3317316542u, 2998733608u,  733239954u, 1555261956u, 3268935591u, 3050360625u,  752459403u, 1541320221u,
-  2607071920u, 3965973030u, 1969922972u,   40735498u, 2617837225u, 3943577151u, 1913087877u,   83908371u,
-  2512341634u, 3803740692u, 2075208622u,  213261112u, 2463272603u, 3855990285u, 2094854071u,  198958881u,
-  2262029012u, 4057260610u, 1759359992u,  534414190u, 2176718541u, 4139329115u, 1873836001u,  414664567u,
-  2282248934u, 4279200368u, 1711684554u,  285281116u, 2405801727u, 4167216745u, 1634467795u,  376229701u,
-  2685067896u, 3608007406u, 1308918612u,  956543938u, 2808555105u, 3495958263u, 1231636301u, 1047427035u,
-  2932959818u, 3654703836u, 1088359270u,  936918000u, 2847714899u, 3736837829u, 1202900863u,  817233897u,
-  3183342108u, 3401237130u, 1404277552u,  615818150u, 3134207493u, 3453421203u, 1423857449u,  601450431u,
-  3009837614u, 3294710456u, 1567103746u,  711928724u, 3020668471u, 3272380065u, 1510334235u,  755167117u
-};
-
-/*Return the CRC of the bytes buf[0..len-1].*/
-unsigned lodepng_crc32(const unsigned char* data, size_t length)
-{
-  unsigned r = 0xffffffffu;
-  size_t i;
-  for(i = 0; i < length; ++i)
-  {
-    r = lodepng_crc32_table[(r ^ data[i]) & 0xff] ^ (r >> 8);
-  }
-  return r ^ 0xffffffffu;
-}
-#else /* !LODEPNG_NO_COMPILE_CRC */
-unsigned lodepng_crc32(const unsigned char* data, size_t length);
-#endif /* !LODEPNG_NO_COMPILE_CRC */
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* / Reading and writing single bits and bytes from/to stream for LodePNG   / */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-static unsigned char readBitFromReversedStream(size_t* bitpointer, const unsigned char* bitstream)
-{
-  unsigned char result = (unsigned char)((bitstream[(*bitpointer) >> 3] >> (7 - ((*bitpointer) & 0x7))) & 1);
-  ++(*bitpointer);
-  return result;
-}
-
-static unsigned readBitsFromReversedStream(size_t* bitpointer, const unsigned char* bitstream, size_t nbits)
-{
-  unsigned result = 0;
-  size_t i;
-  for(i = nbits - 1; i < nbits; --i)
-  {
-    result += (unsigned)readBitFromReversedStream(bitpointer, bitstream) << i;
-  }
-  return result;
-}
-
-#ifdef LODEPNG_COMPILE_DECODER
-static void setBitOfReversedStream0(size_t* bitpointer, unsigned char* bitstream, unsigned char bit)
-{
-  /*the current bit in bitstream must be 0 for this to work*/
-  if(bit)
-  {
-    /*earlier bit of huffman code is in a lesser significant bit of an earlier byte*/
-    bitstream[(*bitpointer) >> 3] |= (bit << (7 - ((*bitpointer) & 0x7)));
-  }
-  ++(*bitpointer);
-}
-#endif /*LODEPNG_COMPILE_DECODER*/
-
-static void setBitOfReversedStream(size_t* bitpointer, unsigned char* bitstream, unsigned char bit)
-{
-  /*the current bit in bitstream may be 0 or 1 for this to work*/
-  if(bit == 0) bitstream[(*bitpointer) >> 3] &=  (unsigned char)(~(1 << (7 - ((*bitpointer) & 0x7))));
-  else         bitstream[(*bitpointer) >> 3] |=  (1 << (7 - ((*bitpointer) & 0x7)));
-  ++(*bitpointer);
-}
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* / PNG chunks                                                             / */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-unsigned lodepng_chunk_length(const unsigned char* chunk)
-{
-  return lodepng_read32bitInt(&chunk[0]);
-}
-
-void lodepng_chunk_type(char type[5], const unsigned char* chunk)
-{
-  unsigned i;
-  for(i = 0; i != 4; ++i) type[i] = (char)chunk[4 + i];
-  type[4] = 0; /*null termination char*/
-}
-
-unsigned char lodepng_chunk_type_equals(const unsigned char* chunk, const char* type)
-{
-  if(strlen(type) != 4) return 0;
-  return (chunk[4] == type[0] && chunk[5] == type[1] && chunk[6] == type[2] && chunk[7] == type[3]);
-}
-
-unsigned char lodepng_chunk_ancillary(const unsigned char* chunk)
-{
-  return((chunk[4] & 32) != 0);
-}
-
-unsigned char lodepng_chunk_private(const unsigned char* chunk)
-{
-  return((chunk[6] & 32) != 0);
-}
-
-unsigned char lodepng_chunk_safetocopy(const unsigned char* chunk)
-{
-  return((chunk[7] & 32) != 0);
-}
-
-unsigned char* lodepng_chunk_data(unsigned char* chunk)
-{
-  return &chunk[8];
-}
-
-const unsigned char* lodepng_chunk_data_const(const unsigned char* chunk)
-{
-  return &chunk[8];
-}
-
-unsigned lodepng_chunk_check_crc(const unsigned char* chunk)
-{
-  unsigned length = lodepng_chunk_length(chunk);
-  unsigned CRC = lodepng_read32bitInt(&chunk[length + 8]);
-  /*the CRC is taken of the data and the 4 chunk type letters, not the length*/
-  unsigned checksum = lodepng_crc32(&chunk[4], length + 4);
-  if(CRC != checksum) return 1;
-  else return 0;
-}
-
-void lodepng_chunk_generate_crc(unsigned char* chunk)
-{
-  unsigned length = lodepng_chunk_length(chunk);
-  unsigned CRC = lodepng_crc32(&chunk[4], length + 4);
-  lodepng_set32bitInt(chunk + 8 + length, CRC);
-}
-
-unsigned char* lodepng_chunk_next(unsigned char* chunk)
-{
-  unsigned total_chunk_length = lodepng_chunk_length(chunk) + 12;
-  return &chunk[total_chunk_length];
-}
-
-const unsigned char* lodepng_chunk_next_const(const unsigned char* chunk)
-{
-  unsigned total_chunk_length = lodepng_chunk_length(chunk) + 12;
-  return &chunk[total_chunk_length];
-}
-
-unsigned lodepng_chunk_append(unsigned char** out, size_t* outlength, const unsigned char* chunk)
-{
-  unsigned i;
-  unsigned total_chunk_length = lodepng_chunk_length(chunk) + 12;
-  unsigned char *chunk_start, *new_buffer;
-  size_t new_length = (*outlength) + total_chunk_length;
-  if(new_length < total_chunk_length || new_length < (*outlength)) return 77; /*integer overflow happened*/
-
-  new_buffer = (unsigned char*)lodepng_realloc(*out, new_length);
-  if(!new_buffer) return 83; /*alloc fail*/
-  (*out) = new_buffer;
-  (*outlength) = new_length;
-  chunk_start = &(*out)[new_length - total_chunk_length];
-
-  for(i = 0; i != total_chunk_length; ++i) chunk_start[i] = chunk[i];
-
-  return 0;
-}
-
-unsigned lodepng_chunk_create(unsigned char** out, size_t* outlength, unsigned length,
-                              const char* type, const unsigned char* data)
-{
-  unsigned i;
-  unsigned char *chunk, *new_buffer;
-  size_t new_length = (*outlength) + length + 12;
-  if(new_length < length + 12 || new_length < (*outlength)) return 77; /*integer overflow happened*/
-  new_buffer = (unsigned char*)lodepng_realloc(*out, new_length);
-  if(!new_buffer) return 83; /*alloc fail*/
-  (*out) = new_buffer;
-  (*outlength) = new_length;
-  chunk = &(*out)[(*outlength) - length - 12];
-
-  /*1: length*/
-  lodepng_set32bitInt(chunk, (unsigned)length);
-
-  /*2: chunk name (4 letters)*/
-  chunk[4] = (unsigned char)type[0];
-  chunk[5] = (unsigned char)type[1];
-  chunk[6] = (unsigned char)type[2];
-  chunk[7] = (unsigned char)type[3];
-
-  /*3: the data*/
-  for(i = 0; i != length; ++i) chunk[8 + i] = data[i];
-
-  /*4: CRC (of the chunkname characters and the data)*/
-  lodepng_chunk_generate_crc(chunk);
-
-  return 0;
-}
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* / Color types and such                                                   / */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-/*return type is a LodePNG error code*/
-static unsigned checkColorValidity(LodePNGColorType colortype, unsigned bd) /*bd = bitdepth*/
-{
-  switch(colortype)
-  {
-    case 0: if(!(bd == 1 || bd == 2 || bd == 4 || bd == 8 || bd == 16)) return 37; break; /*grey*/
-    case 2: if(!(                                 bd == 8 || bd == 16)) return 37; break; /*RGB*/
-    case 3: if(!(bd == 1 || bd == 2 || bd == 4 || bd == 8            )) return 37; break; /*palette*/
-    case 4: if(!(                                 bd == 8 || bd == 16)) return 37; break; /*grey + alpha*/
-    case 6: if(!(                                 bd == 8 || bd == 16)) return 37; break; /*RGBA*/
-    default: return 31;
-  }
-  return 0; /*allowed color type / bits combination*/
-}
-
-static unsigned getNumColorChannels(LodePNGColorType colortype)
-{
-  switch(colortype)
-  {
-    case 0: return 1; /*grey*/
-    case 2: return 3; /*RGB*/
-    case 3: return 1; /*palette*/
-    case 4: return 2; /*grey + alpha*/
-    case 6: return 4; /*RGBA*/
-  }
-  return 0; /*unexisting color type*/
-}
-
-static unsigned lodepng_get_bpp_lct(LodePNGColorType colortype, unsigned bitdepth)
-{
-  /*bits per pixel is amount of channels * bits per channel*/
-  return getNumColorChannels(colortype) * bitdepth;
-}
-
-/* ////////////////////////////////////////////////////////////////////////// */
-
-void lodepng_color_mode_init(LodePNGColorMode* info)
-{
-  info->key_defined = 0;
-  info->key_r = info->key_g = info->key_b = 0;
-  info->colortype = LCT_RGBA;
-  info->bitdepth = 8;
-  info->palette = 0;
-  info->palettesize = 0;
-}
-
-void lodepng_color_mode_cleanup(LodePNGColorMode* info)
-{
-  lodepng_palette_clear(info);
-}
-
-unsigned lodepng_color_mode_copy(LodePNGColorMode* dest, const LodePNGColorMode* source)
-{
-  size_t i;
-  lodepng_color_mode_cleanup(dest);
-  *dest = *source;
-  if(source->palette)
-  {
-    dest->palette = (unsigned char*)lodepng_malloc(1024);
-    if(!dest->palette && source->palettesize) return 83; /*alloc fail*/
-    for(i = 0; i != source->palettesize * 4; ++i) dest->palette[i] = source->palette[i];
-  }
-  return 0;
-}
-
-static int lodepng_color_mode_equal(const LodePNGColorMode* a, const LodePNGColorMode* b)
-{
-  size_t i;
-  if(a->colortype != b->colortype) return 0;
-  if(a->bitdepth != b->bitdepth) return 0;
-  if(a->key_defined != b->key_defined) return 0;
-  if(a->key_defined)
-  {
-    if(a->key_r != b->key_r) return 0;
-    if(a->key_g != b->key_g) return 0;
-    if(a->key_b != b->key_b) return 0;
-  }
-  /*if one of the palette sizes is 0, then we consider it to be the same as the
-  other: it means that e.g. the palette was not given by the user and should be
-  considered the same as the palette inside the PNG.*/
-  if(1/*a->palettesize != 0 && b->palettesize != 0*/) {
-    if(a->palettesize != b->palettesize) return 0;
-    for(i = 0; i != a->palettesize * 4; ++i)
-    {
-      if(a->palette[i] != b->palette[i]) return 0;
-    }
-  }
-  return 1;
-}
-
-void lodepng_palette_clear(LodePNGColorMode* info)
-{
-  if(info->palette) lodepng_free(info->palette);
-  info->palette = 0;
-  info->palettesize = 0;
-}
-
-unsigned lodepng_palette_add(LodePNGColorMode* info,
-                             unsigned char r, unsigned char g, unsigned char b, unsigned char a)
-{
-  unsigned char* data;
-  /*the same resize technique as C++ std::vectors is used, and here it's made so that for a palette with
-  the max of 256 colors, it'll have the exact alloc size*/
-  if(!info->palette) /*allocate palette if empty*/
-  {
-    /*room for 256 colors with 4 bytes each*/
-    data = (unsigned char*)lodepng_realloc(info->palette, 1024);
-    if(!data) return 83; /*alloc fail*/
-    else info->palette = data;
-  }
-  info->palette[4 * info->palettesize + 0] = r;
-  info->palette[4 * info->palettesize + 1] = g;
-  info->palette[4 * info->palettesize + 2] = b;
-  info->palette[4 * info->palettesize + 3] = a;
-  ++info->palettesize;
-  return 0;
-}
-
-unsigned lodepng_get_bpp(const LodePNGColorMode* info)
-{
-  /*calculate bits per pixel out of colortype and bitdepth*/
-  return lodepng_get_bpp_lct(info->colortype, info->bitdepth);
-}
-
-unsigned lodepng_get_channels(const LodePNGColorMode* info)
-{
-  return getNumColorChannels(info->colortype);
-}
-
-unsigned lodepng_is_greyscale_type(const LodePNGColorMode* info)
-{
-  return info->colortype == LCT_GREY || info->colortype == LCT_GREY_ALPHA;
-}
-
-unsigned lodepng_is_alpha_type(const LodePNGColorMode* info)
-{
-  return (info->colortype & 4) != 0; /*4 or 6*/
-}
-
-unsigned lodepng_is_palette_type(const LodePNGColorMode* info)
-{
-  return info->colortype == LCT_PALETTE;
-}
-
-unsigned lodepng_has_palette_alpha(const LodePNGColorMode* info)
-{
-  size_t i;
-  for(i = 0; i != info->palettesize; ++i)
-  {
-    if(info->palette[i * 4 + 3] < 255) return 1;
-  }
-  return 0;
-}
-
-unsigned lodepng_can_have_alpha(const LodePNGColorMode* info)
-{
-  return info->key_defined
-      || lodepng_is_alpha_type(info)
-      || lodepng_has_palette_alpha(info);
-}
-
-size_t lodepng_get_raw_size(unsigned w, unsigned h, const LodePNGColorMode* color)
-{
-  /*will not overflow for any color type if roughly w * h < 268435455*/
-  int bpp = lodepng_get_bpp(color);
-  size_t n = w * h;
-  return ((n / 8) * bpp) + ((n & 7) * bpp + 7) / 8;
-}
-
-size_t lodepng_get_raw_size_lct(unsigned w, unsigned h, LodePNGColorType colortype, unsigned bitdepth)
-{
-  /*will not overflow for any color type if roughly w * h < 268435455*/
-  int bpp = lodepng_get_bpp_lct(colortype, bitdepth);
-  size_t n = w * h;
-  return ((n / 8) * bpp) + ((n & 7) * bpp + 7) / 8;
-}
-
-
-#ifdef LODEPNG_COMPILE_PNG
-#ifdef LODEPNG_COMPILE_DECODER
-/*in an idat chunk, each scanline is a multiple of 8 bits, unlike the lodepng output buffer*/
-static size_t lodepng_get_raw_size_idat(unsigned w, unsigned h, const LodePNGColorMode* color)
-{
-  /*will not overflow for any color type if roughly w * h < 268435455*/
-  int bpp = lodepng_get_bpp(color);
-  size_t line = ((w / 8) * bpp) + ((w & 7) * bpp + 7) / 8;
-  return h * line;
-}
-#endif /*LODEPNG_COMPILE_DECODER*/
-#endif /*LODEPNG_COMPILE_PNG*/
-
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-
-static void LodePNGUnknownChunks_init(LodePNGInfo* info)
-{
-  unsigned i;
-  for(i = 0; i != 3; ++i) info->unknown_chunks_data[i] = 0;
-  for(i = 0; i != 3; ++i) info->unknown_chunks_size[i] = 0;
-}
-
-static void LodePNGUnknownChunks_cleanup(LodePNGInfo* info)
-{
-  unsigned i;
-  for(i = 0; i != 3; ++i) lodepng_free(info->unknown_chunks_data[i]);
-}
-
-static unsigned LodePNGUnknownChunks_copy(LodePNGInfo* dest, const LodePNGInfo* src)
-{
-  unsigned i;
-
-  LodePNGUnknownChunks_cleanup(dest);
-
-  for(i = 0; i != 3; ++i)
-  {
-    size_t j;
-    dest->unknown_chunks_size[i] = src->unknown_chunks_size[i];
-    dest->unknown_chunks_data[i] = (unsigned char*)lodepng_malloc(src->unknown_chunks_size[i]);
-    if(!dest->unknown_chunks_data[i] && dest->unknown_chunks_size[i]) return 83; /*alloc fail*/
-    for(j = 0; j < src->unknown_chunks_size[i]; ++j)
-    {
-      dest->unknown_chunks_data[i][j] = src->unknown_chunks_data[i][j];
-    }
-  }
-
-  return 0;
-}
-
-/******************************************************************************/
-
-static void LodePNGText_init(LodePNGInfo* info)
-{
-  info->text_num = 0;
-  info->text_keys = NULL;
-  info->text_strings = NULL;
-}
-
-static void LodePNGText_cleanup(LodePNGInfo* info)
-{
-  size_t i;
-  for(i = 0; i != info->text_num; ++i)
-  {
-    string_cleanup(&info->text_keys[i]);
-    string_cleanup(&info->text_strings[i]);
-  }
-  lodepng_free(info->text_keys);
-  lodepng_free(info->text_strings);
-}
-
-static unsigned LodePNGText_copy(LodePNGInfo* dest, const LodePNGInfo* source)
-{
-  size_t i = 0;
-  dest->text_keys = 0;
-  dest->text_strings = 0;
-  dest->text_num = 0;
-  for(i = 0; i != source->text_num; ++i)
-  {
-    CERROR_TRY_RETURN(lodepng_add_text(dest, source->text_keys[i], source->text_strings[i]));
-  }
-  return 0;
-}
-
-void lodepng_clear_text(LodePNGInfo* info)
-{
-  LodePNGText_cleanup(info);
-}
-
-unsigned lodepng_add_text(LodePNGInfo* info, const char* key, const char* str)
-{
-  char** new_keys = (char**)(lodepng_realloc(info->text_keys, sizeof(char*) * (info->text_num + 1)));
-  char** new_strings = (char**)(lodepng_realloc(info->text_strings, sizeof(char*) * (info->text_num + 1)));
-  if(!new_keys || !new_strings)
-  {
-    lodepng_free(new_keys);
-    lodepng_free(new_strings);
-    return 83; /*alloc fail*/
-  }
-
-  ++info->text_num;
-  info->text_keys = new_keys;
-  info->text_strings = new_strings;
-
-  string_init(&info->text_keys[info->text_num - 1]);
-  string_set(&info->text_keys[info->text_num - 1], key);
-
-  string_init(&info->text_strings[info->text_num - 1]);
-  string_set(&info->text_strings[info->text_num - 1], str);
-
-  return 0;
-}
-
-/******************************************************************************/
-
-static void LodePNGIText_init(LodePNGInfo* info)
-{
-  info->itext_num = 0;
-  info->itext_keys = NULL;
-  info->itext_langtags = NULL;
-  info->itext_transkeys = NULL;
-  info->itext_strings = NULL;
-}
-
-static void LodePNGIText_cleanup(LodePNGInfo* info)
-{
-  size_t i;
-  for(i = 0; i != info->itext_num; ++i)
-  {
-    string_cleanup(&info->itext_keys[i]);
-    string_cleanup(&info->itext_langtags[i]);
-    string_cleanup(&info->itext_transkeys[i]);
-    string_cleanup(&info->itext_strings[i]);
-  }
-  lodepng_free(info->itext_keys);
-  lodepng_free(info->itext_langtags);
-  lodepng_free(info->itext_transkeys);
-  lodepng_free(info->itext_strings);
-}
-
-static unsigned LodePNGIText_copy(LodePNGInfo* dest, const LodePNGInfo* source)
-{
-  size_t i = 0;
-  dest->itext_keys = 0;
-  dest->itext_langtags = 0;
-  dest->itext_transkeys = 0;
-  dest->itext_strings = 0;
-  dest->itext_num = 0;
-  for(i = 0; i != source->itext_num; ++i)
-  {
-    CERROR_TRY_RETURN(lodepng_add_itext(dest, source->itext_keys[i], source->itext_langtags[i],
-                                        source->itext_transkeys[i], source->itext_strings[i]));
-  }
-  return 0;
-}
-
-void lodepng_clear_itext(LodePNGInfo* info)
-{
-  LodePNGIText_cleanup(info);
-}
-
-unsigned lodepng_add_itext(LodePNGInfo* info, const char* key, const char* langtag,
-                           const char* transkey, const char* str)
-{
-  char** new_keys = (char**)(lodepng_realloc(info->itext_keys, sizeof(char*) * (info->itext_num + 1)));
-  char** new_langtags = (char**)(lodepng_realloc(info->itext_langtags, sizeof(char*) * (info->itext_num + 1)));
-  char** new_transkeys = (char**)(lodepng_realloc(info->itext_transkeys, sizeof(char*) * (info->itext_num + 1)));
-  char** new_strings = (char**)(lodepng_realloc(info->itext_strings, sizeof(char*) * (info->itext_num + 1)));
-  if(!new_keys || !new_langtags || !new_transkeys || !new_strings)
-  {
-    lodepng_free(new_keys);
-    lodepng_free(new_langtags);
-    lodepng_free(new_transkeys);
-    lodepng_free(new_strings);
-    return 83; /*alloc fail*/
-  }
-
-  ++info->itext_num;
-  info->itext_keys = new_keys;
-  info->itext_langtags = new_langtags;
-  info->itext_transkeys = new_transkeys;
-  info->itext_strings = new_strings;
-
-  string_init(&info->itext_keys[info->itext_num - 1]);
-  string_set(&info->itext_keys[info->itext_num - 1], key);
-
-  string_init(&info->itext_langtags[info->itext_num - 1]);
-  string_set(&info->itext_langtags[info->itext_num - 1], langtag);
-
-  string_init(&info->itext_transkeys[info->itext_num - 1]);
-  string_set(&info->itext_transkeys[info->itext_num - 1], transkey);
-
-  string_init(&info->itext_strings[info->itext_num - 1]);
-  string_set(&info->itext_strings[info->itext_num - 1], str);
-
-  return 0;
-}
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-
-void lodepng_info_init(LodePNGInfo* info)
-{
-  lodepng_color_mode_init(&info->color);
-  info->interlace_method = 0;
-  info->compression_method = 0;
-  info->filter_method = 0;
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-  info->background_defined = 0;
-  info->background_r = info->background_g = info->background_b = 0;
-
-  LodePNGText_init(info);
-  LodePNGIText_init(info);
-
-  info->time_defined = 0;
-  info->phys_defined = 0;
-
-  LodePNGUnknownChunks_init(info);
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-}
-
-void lodepng_info_cleanup(LodePNGInfo* info)
-{
-  lodepng_color_mode_cleanup(&info->color);
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-  LodePNGText_cleanup(info);
-  LodePNGIText_cleanup(info);
-
-  LodePNGUnknownChunks_cleanup(info);
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-}
-
-unsigned lodepng_info_copy(LodePNGInfo* dest, const LodePNGInfo* source)
-{
-  lodepng_info_cleanup(dest);
-  *dest = *source;
-  lodepng_color_mode_init(&dest->color);
-  CERROR_TRY_RETURN(lodepng_color_mode_copy(&dest->color, &source->color));
-
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-  CERROR_TRY_RETURN(LodePNGText_copy(dest, source));
-  CERROR_TRY_RETURN(LodePNGIText_copy(dest, source));
-
-  LodePNGUnknownChunks_init(dest);
-  CERROR_TRY_RETURN(LodePNGUnknownChunks_copy(dest, source));
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-  return 0;
-}
-
-void lodepng_info_swap(LodePNGInfo* a, LodePNGInfo* b)
-{
-  LodePNGInfo temp = *a;
-  *a = *b;
-  *b = temp;
-}
-
-/* ////////////////////////////////////////////////////////////////////////// */
-
-/*index: bitgroup index, bits: bitgroup size(1, 2 or 4), in: bitgroup value, out: octet array to add bits to*/
-static void addColorBits(unsigned char* out, size_t index, unsigned bits, unsigned in)
-{
-  unsigned m = bits == 1 ? 7 : bits == 2 ? 3 : 1; /*8 / bits - 1*/
-  /*p = the partial index in the byte, e.g. with 4 palettebits it is 0 for first half or 1 for second half*/
-  unsigned p = index & m;
-  in &= (1u << bits) - 1u; /*filter out any other bits of the input value*/
-  in = in << (bits * (m - p));
-  if(p == 0) out[index * bits / 8] = in;
-  else out[index * bits / 8] |= in;
-}
-
-typedef struct ColorTree ColorTree;
-
-/*
-One node of a color tree
-This is the data structure used to count the number of unique colors and to get a palette
-index for a color. It's like an octree, but because the alpha channel is used too, each
-node has 16 instead of 8 children.
-*/
-struct ColorTree
-{
-  ColorTree* children[16]; /*up to 16 pointers to ColorTree of next level*/
-  int index; /*the payload. Only has a meaningful value if this is in the last level*/
-};
-
-static void color_tree_init(ColorTree* tree)
-{
-  int i;
-  for(i = 0; i != 16; ++i) tree->children[i] = 0;
-  tree->index = -1;
-}
-
-static void color_tree_cleanup(ColorTree* tree)
-{
-  int i;
-  for(i = 0; i != 16; ++i)
-  {
-    if(tree->children[i])
-    {
-      color_tree_cleanup(tree->children[i]);
-      lodepng_free(tree->children[i]);
-    }
-  }
-}
-
-/*returns -1 if color not present, its index otherwise*/
-static int color_tree_get(ColorTree* tree, unsigned char r, unsigned char g, unsigned char b, unsigned char a)
-{
-  int bit = 0;
-  for(bit = 0; bit < 8; ++bit)
-  {
-    int i = 8 * ((r >> bit) & 1) + 4 * ((g >> bit) & 1) + 2 * ((b >> bit) & 1) + 1 * ((a >> bit) & 1);
-    if(!tree->children[i]) return -1;
-    else tree = tree->children[i];
-  }
-  return tree ? tree->index : -1;
-}
-
-#ifdef LODEPNG_COMPILE_ENCODER
-static int color_tree_has(ColorTree* tree, unsigned char r, unsigned char g, unsigned char b, unsigned char a)
-{
-  return color_tree_get(tree, r, g, b, a) >= 0;
-}
-#endif /*LODEPNG_COMPILE_ENCODER*/
-
-/*color is not allowed to already exist.
-Index should be >= 0 (it's signed to be compatible with using -1 for "doesn't exist")*/
-static void color_tree_add(ColorTree* tree,
-                           unsigned char r, unsigned char g, unsigned char b, unsigned char a, unsigned index)
-{
-  int bit;
-  for(bit = 0; bit < 8; ++bit)
-  {
-    int i = 8 * ((r >> bit) & 1) + 4 * ((g >> bit) & 1) + 2 * ((b >> bit) & 1) + 1 * ((a >> bit) & 1);
-    if(!tree->children[i])
-    {
-      tree->children[i] = (ColorTree*)lodepng_malloc(sizeof(ColorTree));
-      color_tree_init(tree->children[i]);
-    }
-    tree = tree->children[i];
-  }
-  tree->index = (int)index;
-}
-
-/*put a pixel, given its RGBA color, into image of any color type*/
-static unsigned rgba8ToPixel(unsigned char* out, size_t i,
-                             const LodePNGColorMode* mode, ColorTree* tree /*for palette*/,
-                             unsigned char r, unsigned char g, unsigned char b, unsigned char a)
-{
-  if(mode->colortype == LCT_GREY)
-  {
-    unsigned char grey = r; /*((unsigned short)r + g + b) / 3*/;
-    if(mode->bitdepth == 8) out[i] = grey;
-    else if(mode->bitdepth == 16) out[i * 2 + 0] = out[i * 2 + 1] = grey;
-    else
-    {
-      /*take the most significant bits of grey*/
-      grey = (grey >> (8 - mode->bitdepth)) & ((1 << mode->bitdepth) - 1);
-      addColorBits(out, i, mode->bitdepth, grey);
-    }
-  }
-  else if(mode->colortype == LCT_RGB)
-  {
-    if(mode->bitdepth == 8)
-    {
-      out[i * 3 + 0] = r;
-      out[i * 3 + 1] = g;
-      out[i * 3 + 2] = b;
-    }
-    else
-    {
-      out[i * 6 + 0] = out[i * 6 + 1] = r;
-      out[i * 6 + 2] = out[i * 6 + 3] = g;
-      out[i * 6 + 4] = out[i * 6 + 5] = b;
-    }
-  }
-  else if(mode->colortype == LCT_PALETTE)
-  {
-    int index = color_tree_get(tree, r, g, b, a);
-    if(index < 0) return 82; /*color not in palette*/
-    if(mode->bitdepth == 8) out[i] = index;
-    else addColorBits(out, i, mode->bitdepth, (unsigned)index);
-  }
-  else if(mode->colortype == LCT_GREY_ALPHA)
-  {
-    unsigned char grey = r; /*((unsigned short)r + g + b) / 3*/;
-    if(mode->bitdepth == 8)
-    {
-      out[i * 2 + 0] = grey;
-      out[i * 2 + 1] = a;
-    }
-    else if(mode->bitdepth == 16)
-    {
-      out[i * 4 + 0] = out[i * 4 + 1] = grey;
-      out[i * 4 + 2] = out[i * 4 + 3] = a;
-    }
-  }
-  else if(mode->colortype == LCT_RGBA)
-  {
-    if(mode->bitdepth == 8)
-    {
-      out[i * 4 + 0] = r;
-      out[i * 4 + 1] = g;
-      out[i * 4 + 2] = b;
-      out[i * 4 + 3] = a;
-    }
-    else
-    {
-      out[i * 8 + 0] = out[i * 8 + 1] = r;
-      out[i * 8 + 2] = out[i * 8 + 3] = g;
-      out[i * 8 + 4] = out[i * 8 + 5] = b;
-      out[i * 8 + 6] = out[i * 8 + 7] = a;
-    }
-  }
-
-  return 0; /*no error*/
-}
-
-/*put a pixel, given its RGBA16 color, into image of any color 16-bitdepth type*/
-static void rgba16ToPixel(unsigned char* out, size_t i,
-                         const LodePNGColorMode* mode,
-                         unsigned short r, unsigned short g, unsigned short b, unsigned short a)
-{
-  if(mode->colortype == LCT_GREY)
-  {
-    unsigned short grey = r; /*((unsigned)r + g + b) / 3*/;
-    out[i * 2 + 0] = (grey >> 8) & 255;
-    out[i * 2 + 1] = grey & 255;
-  }
-  else if(mode->colortype == LCT_RGB)
-  {
-    out[i * 6 + 0] = (r >> 8) & 255;
-    out[i * 6 + 1] = r & 255;
-    out[i * 6 + 2] = (g >> 8) & 255;
-    out[i * 6 + 3] = g & 255;
-    out[i * 6 + 4] = (b >> 8) & 255;
-    out[i * 6 + 5] = b & 255;
-  }
-  else if(mode->colortype == LCT_GREY_ALPHA)
-  {
-    unsigned short grey = r; /*((unsigned)r + g + b) / 3*/;
-    out[i * 4 + 0] = (grey >> 8) & 255;
-    out[i * 4 + 1] = grey & 255;
-    out[i * 4 + 2] = (a >> 8) & 255;
-    out[i * 4 + 3] = a & 255;
-  }
-  else if(mode->colortype == LCT_RGBA)
-  {
-    out[i * 8 + 0] = (r >> 8) & 255;
-    out[i * 8 + 1] = r & 255;
-    out[i * 8 + 2] = (g >> 8) & 255;
-    out[i * 8 + 3] = g & 255;
-    out[i * 8 + 4] = (b >> 8) & 255;
-    out[i * 8 + 5] = b & 255;
-    out[i * 8 + 6] = (a >> 8) & 255;
-    out[i * 8 + 7] = a & 255;
-  }
-}
-
-/*Get RGBA8 color of pixel with index i (y * width + x) from the raw image with given color type.*/
-static void getPixelColorRGBA8(unsigned char* r, unsigned char* g,
-                               unsigned char* b, unsigned char* a,
-                               const unsigned char* in, size_t i,
-                               const LodePNGColorMode* mode)
-{
-  if(mode->colortype == LCT_GREY)
-  {
-    if(mode->bitdepth == 8)
-    {
-      *r = *g = *b = in[i];
-      if(mode->key_defined && *r == mode->key_r) *a = 0;
-      else *a = 255;
-    }
-    else if(mode->bitdepth == 16)
-    {
-      *r = *g = *b = in[i * 2 + 0];
-      if(mode->key_defined && 256U * in[i * 2 + 0] + in[i * 2 + 1] == mode->key_r) *a = 0;
-      else *a = 255;
-    }
-    else
-    {
-      unsigned highest = ((1U << mode->bitdepth) - 1U); /*highest possible value for this bit depth*/
-      size_t j = i * mode->bitdepth;
-      unsigned value = readBitsFromReversedStream(&j, in, mode->bitdepth);
-      *r = *g = *b = (value * 255) / highest;
-      if(mode->key_defined && value == mode->key_r) *a = 0;
-      else *a = 255;
-    }
-  }
-  else if(mode->colortype == LCT_RGB)
-  {
-    if(mode->bitdepth == 8)
-    {
-      *r = in[i * 3 + 0]; *g = in[i * 3 + 1]; *b = in[i * 3 + 2];
-      if(mode->key_defined && *r == mode->key_r && *g == mode->key_g && *b == mode->key_b) *a = 0;
-      else *a = 255;
-    }
-    else
-    {
-      *r = in[i * 6 + 0];
-      *g = in[i * 6 + 2];
-      *b = in[i * 6 + 4];
-      if(mode->key_defined && 256U * in[i * 6 + 0] + in[i * 6 + 1] == mode->key_r
-         && 256U * in[i * 6 + 2] + in[i * 6 + 3] == mode->key_g
-         && 256U * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b) *a = 0;
-      else *a = 255;
-    }
-  }
-  else if(mode->colortype == LCT_PALETTE)
-  {
-    unsigned index;
-    if(mode->bitdepth == 8) index = in[i];
-    else
-    {
-      size_t j = i * mode->bitdepth;
-      index = readBitsFromReversedStream(&j, in, mode->bitdepth);
-    }
-
-    if(index >= mode->palettesize)
-    {
-      /*This is an error according to the PNG spec, but common PNG decoders make it black instead.
-      Done here too, slightly faster due to no error handling needed.*/
-      *r = *g = *b = 0;
-      *a = 255;
-    }
-    else
-    {
-      *r = mode->palette[index * 4 + 0];
-      *g = mode->palette[index * 4 + 1];
-      *b = mode->palette[index * 4 + 2];
-      *a = mode->palette[index * 4 + 3];
-    }
-  }
-  else if(mode->colortype == LCT_GREY_ALPHA)
-  {
-    if(mode->bitdepth == 8)
-    {
-      *r = *g = *b = in[i * 2 + 0];
-      *a = in[i * 2 + 1];
-    }
-    else
-    {
-      *r = *g = *b = in[i * 4 + 0];
-      *a = in[i * 4 + 2];
-    }
-  }
-  else if(mode->colortype == LCT_RGBA)
-  {
-    if(mode->bitdepth == 8)
-    {
-      *r = in[i * 4 + 0];
-      *g = in[i * 4 + 1];
-      *b = in[i * 4 + 2];
-      *a = in[i * 4 + 3];
-    }
-    else
-    {
-      *r = in[i * 8 + 0];
-      *g = in[i * 8 + 2];
-      *b = in[i * 8 + 4];
-      *a = in[i * 8 + 6];
-    }
-  }
-}
-
-/*Similar to getPixelColorRGBA8, but with all the for loops inside of the color
-mode test cases, optimized to convert the colors much faster, when converting
-to RGBA or RGB with 8 bit per cannel. buffer must be RGBA or RGB output with
-enough memory, if has_alpha is true the output is RGBA. mode has the color mode
-of the input buffer.*/
-static void getPixelColorsRGBA8(unsigned char* buffer, size_t numpixels,
-                                unsigned has_alpha, const unsigned char* in,
-                                const LodePNGColorMode* mode)
-{
-  unsigned num_channels = has_alpha ? 4 : 3;
-  size_t i;
-  if(mode->colortype == LCT_GREY)
-  {
-    if(mode->bitdepth == 8)
-    {
-      for(i = 0; i != numpixels; ++i, buffer += num_channels)
-      {
-        buffer[0] = buffer[1] = buffer[2] = in[i];
-        if(has_alpha) buffer[3] = mode->key_defined && in[i] == mode->key_r ? 0 : 255;
-      }
-    }
-    else if(mode->bitdepth == 16)
-    {
-      for(i = 0; i != numpixels; ++i, buffer += num_channels)
-      {
-        buffer[0] = buffer[1] = buffer[2] = in[i * 2];
-        if(has_alpha) buffer[3] = mode->key_defined && 256U * in[i * 2 + 0] + in[i * 2 + 1] == mode->key_r ? 0 : 255;
-      }
-    }
-    else
-    {
-      unsigned highest = ((1U << mode->bitdepth) - 1U); /*highest possible value for this bit depth*/
-      size_t j = 0;
-      for(i = 0; i != numpixels; ++i, buffer += num_channels)
-      {
-        unsigned value = readBitsFromReversedStream(&j, in, mode->bitdepth);
-        buffer[0] = buffer[1] = buffer[2] = (value * 255) / highest;
-        if(has_alpha) buffer[3] = mode->key_defined && value == mode->key_r ? 0 : 255;
-      }
-    }
-  }
-  else if(mode->colortype == LCT_RGB)
-  {
-    if(mode->bitdepth == 8)
-    {
-      for(i = 0; i != numpixels; ++i, buffer += num_channels)
-      {
-        buffer[0] = in[i * 3 + 0];
-        buffer[1] = in[i * 3 + 1];
-        buffer[2] = in[i * 3 + 2];
-        if(has_alpha) buffer[3] = mode->key_defined && buffer[0] == mode->key_r
-           && buffer[1]== mode->key_g && buffer[2] == mode->key_b ? 0 : 255;
-      }
-    }
-    else
-    {
-      for(i = 0; i != numpixels; ++i, buffer += num_channels)
-      {
-        buffer[0] = in[i * 6 + 0];
-        buffer[1] = in[i * 6 + 2];
-        buffer[2] = in[i * 6 + 4];
-        if(has_alpha) buffer[3] = mode->key_defined
-           && 256U * in[i * 6 + 0] + in[i * 6 + 1] == mode->key_r
-           && 256U * in[i * 6 + 2] + in[i * 6 + 3] == mode->key_g
-           && 256U * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b ? 0 : 255;
-      }
-    }
-  }
-  else if(mode->colortype == LCT_PALETTE)
-  {
-    unsigned index;
-    size_t j = 0;
-    for(i = 0; i != numpixels; ++i, buffer += num_channels)
-    {
-      if(mode->bitdepth == 8) index = in[i];
-      else index = readBitsFromReversedStream(&j, in, mode->bitdepth);
-
-      if(index >= mode->palettesize)
-      {
-        /*This is an error according to the PNG spec, but most PNG decoders make it black instead.
-        Done here too, slightly faster due to no error handling needed.*/
-        buffer[0] = buffer[1] = buffer[2] = 0;
-        if(has_alpha) buffer[3] = 255;
-      }
-      else
-      {
-        buffer[0] = mode->palette[index * 4 + 0];
-        buffer[1] = mode->palette[index * 4 + 1];
-        buffer[2] = mode->palette[index * 4 + 2];
-        if(has_alpha) buffer[3] = mode->palette[index * 4 + 3];
-      }
-    }
-  }
-  else if(mode->colortype == LCT_GREY_ALPHA)
-  {
-    if(mode->bitdepth == 8)
-    {
-      for(i = 0; i != numpixels; ++i, buffer += num_channels)
-      {
-        buffer[0] = buffer[1] = buffer[2] = in[i * 2 + 0];
-        if(has_alpha) buffer[3] = in[i * 2 + 1];
-      }
-    }
-    else
-    {
-      for(i = 0; i != numpixels; ++i, buffer += num_channels)
-      {
-        buffer[0] = buffer[1] = buffer[2] = in[i * 4 + 0];
-        if(has_alpha) buffer[3] = in[i * 4 + 2];
-      }
-    }
-  }
-  else if(mode->colortype == LCT_RGBA)
-  {
-    if(mode->bitdepth == 8)
-    {
-      for(i = 0; i != numpixels; ++i, buffer += num_channels)
-      {
-        buffer[0] = in[i * 4 + 0];
-        buffer[1] = in[i * 4 + 1];
-        buffer[2] = in[i * 4 + 2];
-        if(has_alpha) buffer[3] = in[i * 4 + 3];
-      }
-    }
-    else
-    {
-      for(i = 0; i != numpixels; ++i, buffer += num_channels)
-      {
-        buffer[0] = in[i * 8 + 0];
-        buffer[1] = in[i * 8 + 2];
-        buffer[2] = in[i * 8 + 4];
-        if(has_alpha) buffer[3] = in[i * 8 + 6];
-      }
-    }
-  }
-}
-
-/*Get RGBA16 color of pixel with index i (y * width + x) from the raw image with
-given color type, but the given color type must be 16-bit itself.*/
-static void getPixelColorRGBA16(unsigned short* r, unsigned short* g, unsigned short* b, unsigned short* a,
-                                const unsigned char* in, size_t i, const LodePNGColorMode* mode)
-{
-  if(mode->colortype == LCT_GREY)
-  {
-    *r = *g = *b = 256 * in[i * 2 + 0] + in[i * 2 + 1];
-    if(mode->key_defined && 256U * in[i * 2 + 0] + in[i * 2 + 1] == mode->key_r) *a = 0;
-    else *a = 65535;
-  }
-  else if(mode->colortype == LCT_RGB)
-  {
-    *r = 256u * in[i * 6 + 0] + in[i * 6 + 1];
-    *g = 256u * in[i * 6 + 2] + in[i * 6 + 3];
-    *b = 256u * in[i * 6 + 4] + in[i * 6 + 5];
-    if(mode->key_defined
-       && 256u * in[i * 6 + 0] + in[i * 6 + 1] == mode->key_r
-       && 256u * in[i * 6 + 2] + in[i * 6 + 3] == mode->key_g
-       && 256u * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b) *a = 0;
-    else *a = 65535;
-  }
-  else if(mode->colortype == LCT_GREY_ALPHA)
-  {
-    *r = *g = *b = 256u * in[i * 4 + 0] + in[i * 4 + 1];
-    *a = 256u * in[i * 4 + 2] + in[i * 4 + 3];
-  }
-  else if(mode->colortype == LCT_RGBA)
-  {
-    *r = 256u * in[i * 8 + 0] + in[i * 8 + 1];
-    *g = 256u * in[i * 8 + 2] + in[i * 8 + 3];
-    *b = 256u * in[i * 8 + 4] + in[i * 8 + 5];
-    *a = 256u * in[i * 8 + 6] + in[i * 8 + 7];
-  }
-}
-
-unsigned lodepng_convert(unsigned char* out, const unsigned char* in,
-                         const LodePNGColorMode* mode_out, const LodePNGColorMode* mode_in,
-                         unsigned w, unsigned h)
-{
-  int i;
-  ColorTree tree;
-  size_t numpixels = w * h;
-
-  if(lodepng_color_mode_equal(mode_out, mode_in))
-  {
-    size_t numbytes = lodepng_get_raw_size(w, h, mode_in);
-    for(i = 0; i != (int)numbytes; ++i) out[i] = in[i];
-    return 0;
-  }
-
-  if(mode_out->colortype == LCT_PALETTE)
-  {
-    size_t palettesize = mode_out->palettesize;
-    const unsigned char* palette = mode_out->palette;
-    size_t palsize = 1ull << (size_t)mode_out->bitdepth;
-    /*if the user specified output palette but did not give the values, assume
-    they want the values of the input color type (assuming that one is palette).
-    Note that we never create a new palette ourselves.*/
-    if(palettesize == 0)
-    {
-      palettesize = mode_in->palettesize;
-      palette = mode_in->palette;
-    }
-    if(palettesize < palsize) palsize = palettesize;
-    color_tree_init(&tree);
-    for(i = 0; i != (int)palsize; ++i)
-    {
-      const unsigned char* p = &palette[i * 4];
-      color_tree_add(&tree, p[0], p[1], p[2], p[3], i);
-    }
-  }
-
-  if(mode_in->bitdepth == 16 && mode_out->bitdepth == 16)
-  {
-    for(i = 0; i != (int)numpixels; ++i)
-    {
-      unsigned short r = 0, g = 0, b = 0, a = 0;
-      getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode_in);
-      rgba16ToPixel(out, i, mode_out, r, g, b, a);
-    }
-  }
-  else if(mode_out->bitdepth == 8 && mode_out->colortype == LCT_RGBA)
-  {
-    getPixelColorsRGBA8(out, numpixels, 1, in, mode_in);
-  }
-  else if(mode_out->bitdepth == 8 && mode_out->colortype == LCT_RGB)
-  {
-    getPixelColorsRGBA8(out, numpixels, 0, in, mode_in);
-  }
-  else
-  {
-    unsigned char r = 0, g = 0, b = 0, a = 0;
-    for(i = 0; i != (int)numpixels; ++i)
-    {
-      getPixelColorRGBA8(&r, &g, &b, &a, in, i, mode_in);
-      CERROR_TRY_RETURN(rgba8ToPixel(out, i, mode_out, &tree, r, g, b, a));
-    }
-  }
-
-  if(mode_out->colortype == LCT_PALETTE)
-  {
-    color_tree_cleanup(&tree);
-  }
-
-  return 0; /*no error*/
-}
-
-#ifdef LODEPNG_COMPILE_ENCODER
-
-void lodepng_color_profile_init(LodePNGColorProfile* profile)
-{
-  profile->colored = 0;
-  profile->key = 0;
-  profile->alpha = 0;
-  profile->key_r = profile->key_g = profile->key_b = 0;
-  profile->numcolors = 0;
-  profile->bits = 1;
-}
-
-/*function used for debug purposes with C++*/
-/*void printColorProfile(LodePNGColorProfile* p)
-{
-  std::cout << "colored: " << (int)p->colored << ", ";
-  std::cout << "key: " << (int)p->key << ", ";
-  std::cout << "key_r: " << (int)p->key_r << ", ";
-  std::cout << "key_g: " << (int)p->key_g << ", ";
-  std::cout << "key_b: " << (int)p->key_b << ", ";
-  std::cout << "alpha: " << (int)p->alpha << ", ";
-  std::cout << "numcolors: " << (int)p->numcolors << ", ";
-  std::cout << "bits: " << (int)p->bits << std::endl;
-}*/
-
-/*Returns how many bits needed to represent given value (max 8 bit)*/
-static unsigned getValueRequiredBits(unsigned char value)
-{
-  if(value == 0 || value == 255) return 1;
-  /*The scaling of 2-bit and 4-bit values uses multiples of 85 and 17*/
-  if(value % 17 == 0) return value % 85 == 0 ? 2 : 4;
-  return 8;
-}
-
-/*profile must already have been inited with mode.
-It's ok to set some parameters of profile to done already.*/
-unsigned lodepng_get_color_profile(LodePNGColorProfile* profile,
-                                   const unsigned char* in, unsigned w, unsigned h,
-                                   const LodePNGColorMode* mode)
-{
-  unsigned error = 0;
-  size_t i;
-  ColorTree tree;
-  size_t numpixels = w * h;
-
-  unsigned colored_done = lodepng_is_greyscale_type(mode) ? 1 : 0;
-  unsigned alpha_done = lodepng_can_have_alpha(mode) ? 0 : 1;
-  unsigned numcolors_done = 0;
-  unsigned bpp = lodepng_get_bpp(mode);
-  unsigned bits_done = bpp == 1 ? 1 : 0;
-  unsigned maxnumcolors = 257;
-  unsigned sixteen = 0;
-  if(bpp <= 8) maxnumcolors = bpp == 1 ? 2 : (bpp == 2 ? 4 : (bpp == 4 ? 16 : 256));
-
-  color_tree_init(&tree);
-
-  /*Check if the 16-bit input is truly 16-bit*/
-  if(mode->bitdepth == 16)
-  {
-    unsigned short r, g, b, a;
-    for(i = 0; i != numpixels; ++i)
-    {
-      getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode);
-      if((r & 255) != ((r >> 8) & 255) || (g & 255) != ((g >> 8) & 255) ||
-         (b & 255) != ((b >> 8) & 255) || (a & 255) != ((a >> 8) & 255)) /*first and second byte differ*/
-      {
-        sixteen = 1;
-        break;
-      }
-    }
-  }
-
-  if(sixteen)
-  {
-    unsigned short r = 0, g = 0, b = 0, a = 0;
-    profile->bits = 16;
-    bits_done = numcolors_done = 1; /*counting colors no longer useful, palette doesn't support 16-bit*/
-
-    for(i = 0; i != numpixels; ++i)
-    {
-      getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode);
-
-      if(!colored_done && (r != g || r != b))
-      {
-        profile->colored = 1;
-        colored_done = 1;
-      }
-
-      if(!alpha_done)
-      {
-        unsigned matchkey = (r == profile->key_r && g == profile->key_g && b == profile->key_b);
-        if(a != 65535 && (a != 0 || (profile->key && !matchkey)))
-        {
-          profile->alpha = 1;
-          alpha_done = 1;
-          if(profile->bits < 8) profile->bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/
-        }
-        else if(a == 0 && !profile->alpha && !profile->key)
-        {
-          profile->key = 1;
-          profile->key_r = r;
-          profile->key_g = g;
-          profile->key_b = b;
-        }
-        else if(a == 65535 && profile->key && matchkey)
-        {
-          /* Color key cannot be used if an opaque pixel also has that RGB color. */
-          profile->alpha = 1;
-          alpha_done = 1;
-        }
-      }
-
-      if(alpha_done && numcolors_done && colored_done && bits_done) break;
-    }
-  }
-  else /* < 16-bit */
-  {
-    for(i = 0; i != numpixels; ++i)
-    {
-      unsigned char r = 0, g = 0, b = 0, a = 0;
-      getPixelColorRGBA8(&r, &g, &b, &a, in, i, mode);
-
-      if(!bits_done && profile->bits < 8)
-      {
-        /*only r is checked, < 8 bits is only relevant for greyscale*/
-        unsigned bits = getValueRequiredBits(r);
-        if(bits > profile->bits) profile->bits = bits;
-      }
-      bits_done = (profile->bits >= bpp);
-
-      if(!colored_done && (r != g || r != b))
-      {
-        profile->colored = 1;
-        colored_done = 1;
-        if(profile->bits < 8) profile->bits = 8; /*PNG has no colored modes with less than 8-bit per channel*/
-      }
-
-      if(!alpha_done)
-      {
-        unsigned matchkey = (r == profile->key_r && g == profile->key_g && b == profile->key_b);
-        if(a != 255 && (a != 0 || (profile->key && !matchkey)))
-        {
-          profile->alpha = 1;
-          alpha_done = 1;
-          if(profile->bits < 8) profile->bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/
-        }
-        else if(a == 0 && !profile->alpha && !profile->key)
-        {
-          profile->key = 1;
-          profile->key_r = r;
-          profile->key_g = g;
-          profile->key_b = b;
-        }
-        else if(a == 255 && profile->key && matchkey)
-        {
-          /* Color key cannot be used if an opaque pixel also has that RGB color. */
-          profile->alpha = 1;
-          alpha_done = 1;
-          if(profile->bits < 8) profile->bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/
-        }
-      }
-
-      if(!numcolors_done)
-      {
-        if(!color_tree_has(&tree, r, g, b, a))
-        {
-          color_tree_add(&tree, r, g, b, a, profile->numcolors);
-          if(profile->numcolors < 256)
-          {
-            unsigned char* p = profile->palette;
-            unsigned n = profile->numcolors;
-            p[n * 4 + 0] = r;
-            p[n * 4 + 1] = g;
-            p[n * 4 + 2] = b;
-            p[n * 4 + 3] = a;
-          }
-          ++profile->numcolors;
-          numcolors_done = profile->numcolors >= maxnumcolors;
-        }
-      }
-
-      if(alpha_done && numcolors_done && colored_done && bits_done) break;
-    }
-
-    /*make the profile's key always 16-bit for consistency - repeat each byte twice*/
-    profile->key_r += (profile->key_r << 8);
-    profile->key_g += (profile->key_g << 8);
-    profile->key_b += (profile->key_b << 8);
-  }
-
-  color_tree_cleanup(&tree);
-  return error;
-}
-
-/*Automatically chooses color type that gives smallest amount of bits in the
-output image, e.g. grey if there are only greyscale pixels, palette if there
-are less than 256 colors, ...
-Updates values of mode with a potentially smaller color model. mode_out should
-contain the user chosen color model, but will be overwritten with the new chosen one.*/
-unsigned lodepng_auto_choose_color(LodePNGColorMode* mode_out,
-                                   const unsigned char* image, unsigned w, unsigned h,
-                                   const LodePNGColorMode* mode_in)
-{
-  LodePNGColorProfile prof;
-  unsigned error = 0;
-  unsigned i, n, palettebits, grey_ok, palette_ok;
-
-  lodepng_color_profile_init(&prof);
-  error = lodepng_get_color_profile(&prof, image, w, h, mode_in);
-  if(error) return error;
-  mode_out->key_defined = 0;
-
-  if(prof.key && w * h <= 16)
-  {
-    prof.alpha = 1; /*too few pixels to justify tRNS chunk overhead*/
-    if(prof.bits < 8) prof.bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/
-  }
-  grey_ok = !prof.colored && !prof.alpha; /*grey without alpha, with potentially low bits*/
-  n = prof.numcolors;
-  palettebits = n <= 2 ? 1 : (n <= 4 ? 2 : (n <= 16 ? 4 : 8));
-  palette_ok = n <= 256 && (n * 2 < w * h) && prof.bits <= 8;
-  if(w * h < n * 2) palette_ok = 0; /*don't add palette overhead if image has only a few pixels*/
-  if(grey_ok && prof.bits <= palettebits) palette_ok = 0; /*grey is less overhead*/
-
-  if(palette_ok)
-  {
-    unsigned char* p = prof.palette;
-    lodepng_palette_clear(mode_out); /*remove potential earlier palette*/
-    for(i = 0; i != prof.numcolors; ++i)
-    {
-      error = lodepng_palette_add(mode_out, p[i * 4 + 0], p[i * 4 + 1], p[i * 4 + 2], p[i * 4 + 3]);
-      if(error) break;
-    }
-
-    mode_out->colortype = LCT_PALETTE;
-    mode_out->bitdepth = palettebits;
-
-    if(mode_in->colortype == LCT_PALETTE && mode_in->palettesize >= mode_out->palettesize
-        && mode_in->bitdepth == mode_out->bitdepth)
-    {
-      /*If input should have same palette colors, keep original to preserve its order and prevent conversion*/
-      lodepng_color_mode_cleanup(mode_out);
-      lodepng_color_mode_copy(mode_out, mode_in);
-    }
-  }
-  else /*8-bit or 16-bit per channel*/
-  {
-    mode_out->bitdepth = prof.bits;
-    mode_out->colortype = prof.alpha ? (prof.colored ? LCT_RGBA : LCT_GREY_ALPHA)
-                                     : (prof.colored ? LCT_RGB : LCT_GREY);
-
-    if(prof.key && !prof.alpha)
-    {
-      unsigned mask = (1u << mode_out->bitdepth) - 1u; /*profile always uses 16-bit, mask converts it*/
-      mode_out->key_r = prof.key_r & mask;
-      mode_out->key_g = prof.key_g & mask;
-      mode_out->key_b = prof.key_b & mask;
-      mode_out->key_defined = 1;
-    }
-  }
-
-  return error;
-}
-
-#endif /* #ifdef LODEPNG_COMPILE_ENCODER */
-
-/*
-Paeth predicter, used by PNG filter type 4
-The parameters are of type short, but should come from unsigned chars, the shorts
-are only needed to make the paeth calculation correct.
-*/
-static unsigned char paethPredictor(short a, short b, short c)
-{
-  short pa = abs(b - c);
-  short pb = abs(a - c);
-  short pc = abs(a + b - c - c);
-
-  if(pc < pa && pc < pb) return (unsigned char)c;
-  else if(pb < pa) return (unsigned char)b;
-  else return (unsigned char)a;
-}
-
-/*shared values used by multiple Adam7 related functions*/
-
-static const unsigned ADAM7_IX[7] = { 0, 4, 0, 2, 0, 1, 0 }; /*x start values*/
-static const unsigned ADAM7_IY[7] = { 0, 0, 4, 0, 2, 0, 1 }; /*y start values*/
-static const unsigned ADAM7_DX[7] = { 8, 8, 4, 4, 2, 2, 1 }; /*x delta values*/
-static const unsigned ADAM7_DY[7] = { 8, 8, 8, 4, 4, 2, 2 }; /*y delta values*/
-
-/*
-Outputs various dimensions and positions in the image related to the Adam7 reduced images.
-passw: output containing the width of the 7 passes
-passh: output containing the height of the 7 passes
-filter_passstart: output containing the index of the start and end of each
- reduced image with filter bytes
-padded_passstart output containing the index of the start and end of each
- reduced image when without filter bytes but with padded scanlines
-passstart: output containing the index of the start and end of each reduced
- image without padding between scanlines, but still padding between the images
-w, h: width and height of non-interlaced image
-bpp: bits per pixel
-"padded" is only relevant if bpp is less than 8 and a scanline or image does not
- end at a full byte
-*/
-static void Adam7_getpassvalues(unsigned passw[7], unsigned passh[7], size_t filter_passstart[8],
-                                size_t padded_passstart[8], size_t passstart[8], unsigned w, unsigned h, unsigned bpp)
-{
-  /*the passstart values have 8 values: the 8th one indicates the byte after the end of the 7th (= last) pass*/
-  unsigned i;
-
-  /*calculate width and height in pixels of each pass*/
-  for(i = 0; i != 7; ++i)
-  {
-    passw[i] = (w + ADAM7_DX[i] - ADAM7_IX[i] - 1) / ADAM7_DX[i];
-    passh[i] = (h + ADAM7_DY[i] - ADAM7_IY[i] - 1) / ADAM7_DY[i];
-    if(passw[i] == 0) passh[i] = 0;
-    if(passh[i] == 0) passw[i] = 0;
-  }
-
-  filter_passstart[0] = padded_passstart[0] = passstart[0] = 0;
-  for(i = 0; i != 7; ++i)
-  {
-    /*if passw[i] is 0, it's 0 bytes, not 1 (no filtertype-byte)*/
-    filter_passstart[i + 1] = filter_passstart[i]
-                            + ((passw[i] && passh[i]) ? passh[i] * (1 + (passw[i] * bpp + 7) / 8) : 0);
-    /*bits padded if needed to fill full byte at end of each scanline*/
-    padded_passstart[i + 1] = padded_passstart[i] + passh[i] * ((passw[i] * bpp + 7) / 8);
-    /*only padded at end of reduced image*/
-    passstart[i + 1] = passstart[i] + (passh[i] * passw[i] * bpp + 7) / 8;
-  }
-}
-
-#ifdef LODEPNG_COMPILE_DECODER
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* / PNG Decoder                                                            / */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-/*read the information from the header and store it in the LodePNGInfo. return value is error*/
-unsigned lodepng_inspect(unsigned* w, unsigned* h, LodePNGState* state,
-                         const unsigned char* in, size_t insize)
-{
-  LodePNGInfo* info = &state->info_png;
-  if(insize == 0 || in == 0)
-  {
-    CERROR_RETURN_ERROR(state->error, 48); /*error: the given data is empty*/
-  }
-  if(insize < 33)
-  {
-    CERROR_RETURN_ERROR(state->error, 27); /*error: the data length is smaller than the length of a PNG header*/
-  }
-
-  /*when decoding a new PNG image, make sure all parameters created after previous decoding are reset*/
-  lodepng_info_cleanup(info);
-  lodepng_info_init(info);
-
-  if(in[0] != 137 || in[1] != 80 || in[2] != 78 || in[3] != 71
-     || in[4] != 13 || in[5] != 10 || in[6] != 26 || in[7] != 10)
-  {
-    CERROR_RETURN_ERROR(state->error, 28); /*error: the first 8 bytes are not the correct PNG signature*/
-  }
-  if(lodepng_chunk_length(in + 8) != 13)
-  {
-    CERROR_RETURN_ERROR(state->error, 94); /*error: header size must be 13 bytes*/
-  }
-  if(!lodepng_chunk_type_equals(in + 8, "IHDR"))
-  {
-    CERROR_RETURN_ERROR(state->error, 29); /*error: it doesn't start with a IHDR chunk!*/
-  }
-
-  /*read the values given in the header*/
-  *w = lodepng_read32bitInt(&in[16]);
-  *h = lodepng_read32bitInt(&in[20]);
-  info->color.bitdepth = in[24];
-  info->color.colortype = (LodePNGColorType)in[25];
-  info->compression_method = in[26];
-  info->filter_method = in[27];
-  info->interlace_method = in[28];
-
-  if(*w == 0 || *h == 0)
-  {
-    CERROR_RETURN_ERROR(state->error, 93);
-  }
-
-  if(!state->decoder.ignore_crc)
-  {
-    unsigned CRC = lodepng_read32bitInt(&in[29]);
-    unsigned checksum = lodepng_crc32(&in[12], 17);
-    if(CRC != checksum)
-    {
-      CERROR_RETURN_ERROR(state->error, 57); /*invalid CRC*/
-    }
-  }
-
-  /*error: only compression method 0 is allowed in the specification*/
-  if(info->compression_method != 0) CERROR_RETURN_ERROR(state->error, 32);
-  /*error: only filter method 0 is allowed in the specification*/
-  if(info->filter_method != 0) CERROR_RETURN_ERROR(state->error, 33);
-  /*error: only interlace methods 0 and 1 exist in the specification*/
-  if(info->interlace_method > 1) CERROR_RETURN_ERROR(state->error, 34);
-
-  state->error = checkColorValidity(info->color.colortype, info->color.bitdepth);
-  return state->error;
-}
-
-static unsigned unfilterScanline(unsigned char* recon, const unsigned char* scanline, const unsigned char* precon,
-                                 size_t bytewidth, unsigned char filterType, size_t length)
-{
-  /*
-  For PNG filter method 0
-  unfilter a PNG image scanline by scanline. when the pixels are smaller than 1 byte,
-  the filter works byte per byte (bytewidth = 1)
-  precon is the previous unfiltered scanline, recon the result, scanline the current one
-  the incoming scanlines do NOT include the filtertype byte, that one is given in the parameter filterType instead
-  recon and scanline MAY be the same memory address! precon must be disjoint.
-  */
-
-  size_t i;
-  switch(filterType)
-  {
-    case 0:
-      for(i = 0; i != length; ++i) recon[i] = scanline[i];
-      break;
-    case 1:
-      for(i = 0; i != bytewidth; ++i) recon[i] = scanline[i];
-      for(i = bytewidth; i < length; ++i) recon[i] = scanline[i] + recon[i - bytewidth];
-      break;
-    case 2:
-      if(precon)
-      {
-        for(i = 0; i != length; ++i) recon[i] = scanline[i] + precon[i];
-      }
-      else
-      {
-        for(i = 0; i != length; ++i) recon[i] = scanline[i];
-      }
-      break;
-    case 3:
-      if(precon)
-      {
-        for(i = 0; i != bytewidth; ++i) recon[i] = scanline[i] + (precon[i] >> 1);
-        for(i = bytewidth; i < length; ++i) recon[i] = scanline[i] + ((recon[i - bytewidth] + precon[i]) >> 1);
-      }
-      else
-      {
-        for(i = 0; i != bytewidth; ++i) recon[i] = scanline[i];
-        for(i = bytewidth; i < length; ++i) recon[i] = scanline[i] + (recon[i - bytewidth] >> 1);
-      }
-      break;
-    case 4:
-      if(precon)
-      {
-        for(i = 0; i != bytewidth; ++i)
-        {
-          recon[i] = (scanline[i] + precon[i]); /*paethPredictor(0, precon[i], 0) is always precon[i]*/
-        }
-        for(i = bytewidth; i < length; ++i)
-        {
-          recon[i] = (scanline[i] + paethPredictor(recon[i - bytewidth], precon[i], precon[i - bytewidth]));
-        }
-      }
-      else
-      {
-        for(i = 0; i != bytewidth; ++i)
-        {
-          recon[i] = scanline[i];
-        }
-        for(i = bytewidth; i < length; ++i)
-        {
-          /*paethPredictor(recon[i - bytewidth], 0, 0) is always recon[i - bytewidth]*/
-          recon[i] = (scanline[i] + recon[i - bytewidth]);
-        }
-      }
-      break;
-    default: return 36; /*error: unexisting filter type given*/
-  }
-  return 0;
-}
-
-static unsigned unfilter(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, unsigned bpp)
-{
-  /*
-  For PNG filter method 0
-  this function unfilters a single image (e.g. without interlacing this is called once, with Adam7 seven times)
-  out must have enough bytes allocated already, in must have the scanlines + 1 filtertype byte per scanline
-  w and h are image dimensions or dimensions of reduced image, bpp is bits per pixel
-  in and out are allowed to be the same memory address (but aren't the same size since in has the extra filter bytes)
-  */
-
-  unsigned y;
-  unsigned char* prevline = 0;
-
-  /*bytewidth is used for filtering, is 1 when bpp < 8, number of bytes per pixel otherwise*/
-  size_t bytewidth = (bpp + 7) / 8;
-  size_t linebytes = (w * bpp + 7) / 8;
-
-  for(y = 0; y < h; ++y)
-  {
-    size_t outindex = linebytes * y;
-    size_t inindex = (1 + linebytes) * y; /*the extra filterbyte added to each row*/
-    unsigned char filterType = in[inindex];
-
-    CERROR_TRY_RETURN(unfilterScanline(&out[outindex], &in[inindex + 1], prevline, bytewidth, filterType, linebytes));
-
-    prevline = &out[outindex];
-  }
-
-  return 0;
-}
-
-/*
-in: Adam7 interlaced image, with no padding bits between scanlines, but between
- reduced images so that each reduced image starts at a byte.
-out: the same pixels, but re-ordered so that they're now a non-interlaced image with size w*h
-bpp: bits per pixel
-out has the following size in bits: w * h * bpp.
-in is possibly bigger due to padding bits between reduced images.
-out must be big enough AND must be 0 everywhere if bpp < 8 in the current implementation
-(because that's likely a little bit faster)
-NOTE: comments about padding bits are only relevant if bpp < 8
-*/
-static void Adam7_deinterlace(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, unsigned bpp)
-{
-  unsigned passw[7], passh[7];
-  size_t filter_passstart[8], padded_passstart[8], passstart[8];
-  unsigned i;
-
-  Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp);
-
-  if(bpp >= 8)
-  {
-    for(i = 0; i != 7; ++i)
-    {
-      unsigned x, y, b;
-      size_t bytewidth = bpp / 8;
-      for(y = 0; y < passh[i]; ++y)
-      for(x = 0; x < passw[i]; ++x)
-      {
-        size_t pixelinstart = passstart[i] + (y * passw[i] + x) * bytewidth;
-        size_t pixeloutstart = ((ADAM7_IY[i] + y * ADAM7_DY[i]) * w + ADAM7_IX[i] + x * ADAM7_DX[i]) * bytewidth;
-        for(b = 0; b < bytewidth; ++b)
-        {
-          out[pixeloutstart + b] = in[pixelinstart + b];
-        }
-      }
-    }
-  }
-  else /*bpp < 8: Adam7 with pixels < 8 bit is a bit trickier: with bit pointers*/
-  {
-    for(i = 0; i != 7; ++i)
-    {
-      unsigned x, y, b;
-      unsigned ilinebits = bpp * passw[i];
-      unsigned olinebits = bpp * w;
-      size_t obp, ibp; /*bit pointers (for out and in buffer)*/
-      for(y = 0; y < passh[i]; ++y)
-      for(x = 0; x < passw[i]; ++x)
-      {
-        ibp = (8 * passstart[i]) + (y * ilinebits + x * bpp);
-        obp = (ADAM7_IY[i] + y * ADAM7_DY[i]) * olinebits + (ADAM7_IX[i] + x * ADAM7_DX[i]) * bpp;
-        for(b = 0; b < bpp; ++b)
-        {
-          unsigned char bit = readBitFromReversedStream(&ibp, in);
-          /*note that this function assumes the out buffer is completely 0, use setBitOfReversedStream otherwise*/
-          setBitOfReversedStream0(&obp, out, bit);
-        }
-      }
-    }
-  }
-}
-
-static void removePaddingBits(unsigned char* out, const unsigned char* in,
-                              size_t olinebits, size_t ilinebits, unsigned h)
-{
-  /*
-  After filtering there are still padding bits if scanlines have non multiple of 8 bit amounts. They need
-  to be removed (except at last scanline of (Adam7-reduced) image) before working with pure image buffers
-  for the Adam7 code, the color convert code and the output to the user.
-  in and out are allowed to be the same buffer, in may also be higher but still overlapping; in must
-  have >= ilinebits*h bits, out must have >= olinebits*h bits, olinebits must be <= ilinebits
-  also used to move bits after earlier such operations happened, e.g. in a sequence of reduced images from Adam7
-  only useful if (ilinebits - olinebits) is a value in the range 1..7
-  */
-  unsigned y;
-  size_t diff = ilinebits - olinebits;
-  size_t ibp = 0, obp = 0; /*input and output bit pointers*/
-  for(y = 0; y < h; ++y)
-  {
-    size_t x;
-    for(x = 0; x < olinebits; ++x)
-    {
-      unsigned char bit = readBitFromReversedStream(&ibp, in);
-      setBitOfReversedStream(&obp, out, bit);
-    }
-    ibp += diff;
-  }
-}
-
-/*out must be buffer big enough to contain full image, and in must contain the full decompressed data from
-the IDAT chunks (with filter index bytes and possible padding bits)
-return value is error*/
-static unsigned postProcessScanlines(unsigned char* out, unsigned char* in,
-                                     unsigned w, unsigned h, const LodePNGInfo* info_png)
-{
-  /*
-  This function converts the filtered-padded-interlaced data into pure 2D image buffer with the PNG's colortype.
-  Steps:
-  *) if no Adam7: 1) unfilter 2) remove padding bits (= posible extra bits per scanline if bpp < 8)
-  *) if adam7: 1) 7x unfilter 2) 7x remove padding bits 3) Adam7_deinterlace
-  NOTE: the in buffer will be overwritten with intermediate data!
-  */
-  unsigned bpp = lodepng_get_bpp(&info_png->color);
-  if(bpp == 0) return 31; /*error: invalid colortype*/
-
-  if(info_png->interlace_method == 0)
-  {
-    if(bpp < 8 && w * bpp != ((w * bpp + 7) / 8) * 8)
-    {
-      CERROR_TRY_RETURN(unfilter(in, in, w, h, bpp));
-      removePaddingBits(out, in, w * bpp, ((w * bpp + 7) / 8) * 8, h);
-    }
-    /*we can immediately filter into the out buffer, no other steps needed*/
-    else CERROR_TRY_RETURN(unfilter(out, in, w, h, bpp));
-  }
-  else /*interlace_method is 1 (Adam7)*/
-  {
-    unsigned passw[7], passh[7]; size_t filter_passstart[8], padded_passstart[8], passstart[8];
-    unsigned i;
-
-    Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp);
-
-    for(i = 0; i != 7; ++i)
-    {
-      CERROR_TRY_RETURN(unfilter(&in[padded_passstart[i]], &in[filter_passstart[i]], passw[i], passh[i], bpp));
-      /*TODO: possible efficiency improvement: if in this reduced image the bits fit nicely in 1 scanline,
-      move bytes instead of bits or move not at all*/
-      if(bpp < 8)
-      {
-        /*remove padding bits in scanlines; after this there still may be padding
-        bits between the different reduced images: each reduced image still starts nicely at a byte*/
-        removePaddingBits(&in[passstart[i]], &in[padded_passstart[i]], passw[i] * bpp,
-                          ((passw[i] * bpp + 7) / 8) * 8, passh[i]);
-      }
-    }
-
-    Adam7_deinterlace(out, in, w, h, bpp);
-  }
-
-  return 0;
-}
-
-static unsigned readChunk_PLTE(LodePNGColorMode* color, const unsigned char* data, size_t chunkLength)
-{
-  unsigned pos = 0, i;
-  if(color->palette) lodepng_free(color->palette);
-  color->palettesize = chunkLength / 3;
-  color->palette = (unsigned char*)lodepng_malloc(4 * color->palettesize);
-  if(!color->palette && color->palettesize)
-  {
-    color->palettesize = 0;
-    return 83; /*alloc fail*/
-  }
-  if(color->palettesize > 256) return 38; /*error: palette too big*/
-
-  for(i = 0; i != color->palettesize; ++i)
-  {
-    color->palette[4 * i + 0] = data[pos++]; /*R*/
-    color->palette[4 * i + 1] = data[pos++]; /*G*/
-    color->palette[4 * i + 2] = data[pos++]; /*B*/
-    color->palette[4 * i + 3] = 255; /*alpha*/
-  }
-
-  return 0; /* OK */
-}
-
-static unsigned readChunk_tRNS(LodePNGColorMode* color, const unsigned char* data, size_t chunkLength)
-{
-  unsigned i;
-  if(color->colortype == LCT_PALETTE)
-  {
-    /*error: more alpha values given than there are palette entries*/
-    if(chunkLength > color->palettesize) return 38;
-
-    for(i = 0; i != chunkLength; ++i) color->palette[4 * i + 3] = data[i];
-  }
-  else if(color->colortype == LCT_GREY)
-  {
-    /*error: this chunk must be 2 bytes for greyscale image*/
-    if(chunkLength != 2) return 30;
-
-    color->key_defined = 1;
-    color->key_r = color->key_g = color->key_b = 256u * data[0] + data[1];
-  }
-  else if(color->colortype == LCT_RGB)
-  {
-    /*error: this chunk must be 6 bytes for RGB image*/
-    if(chunkLength != 6) return 41;
-
-    color->key_defined = 1;
-    color->key_r = 256u * data[0] + data[1];
-    color->key_g = 256u * data[2] + data[3];
-    color->key_b = 256u * data[4] + data[5];
-  }
-  else return 42; /*error: tRNS chunk not allowed for other color models*/
-
-  return 0; /* OK */
-}
-
-
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-/*background color chunk (bKGD)*/
-static unsigned readChunk_bKGD(LodePNGInfo* info, const unsigned char* data, size_t chunkLength)
-{
-  if(info->color.colortype == LCT_PALETTE)
-  {
-    /*error: this chunk must be 1 byte for indexed color image*/
-    if(chunkLength != 1) return 43;
-
-    info->background_defined = 1;
-    info->background_r = info->background_g = info->background_b = data[0];
-  }
-  else if(info->color.colortype == LCT_GREY || info->color.colortype == LCT_GREY_ALPHA)
-  {
-    /*error: this chunk must be 2 bytes for greyscale image*/
-    if(chunkLength != 2) return 44;
-
-    info->background_defined = 1;
-    info->background_r = info->background_g = info->background_b = 256u * data[0] + data[1];
-  }
-  else if(info->color.colortype == LCT_RGB || info->color.colortype == LCT_RGBA)
-  {
-    /*error: this chunk must be 6 bytes for greyscale image*/
-    if(chunkLength != 6) return 45;
-
-    info->background_defined = 1;
-    info->background_r = 256u * data[0] + data[1];
-    info->background_g = 256u * data[2] + data[3];
-    info->background_b = 256u * data[4] + data[5];
-  }
-
-  return 0; /* OK */
-}
-
-/*text chunk (tEXt)*/
-static unsigned readChunk_tEXt(LodePNGInfo* info, const unsigned char* data, size_t chunkLength)
-{
-  unsigned error = 0;
-  char *key = 0, *str = 0;
-  unsigned i;
-
-  while(!error) /*not really a while loop, only used to break on error*/
-  {
-    unsigned length, string2_begin;
-
-    length = 0;
-    while(length < chunkLength && data[length] != 0) ++length;
-    /*even though it's not allowed by the standard, no error is thrown if
-    there's no null termination char, if the text is empty*/
-    if(length < 1 || length > 79) CERROR_BREAK(error, 89); /*keyword too short or long*/
-
-    key = (char*)lodepng_malloc(length + 1);
-    if(!key) CERROR_BREAK(error, 83); /*alloc fail*/
-
-    key[length] = 0;
-    for(i = 0; i != length; ++i) key[i] = (char)data[i];
-
-    string2_begin = length + 1; /*skip keyword null terminator*/
-
-    length = chunkLength < string2_begin ? 0 : (unsigned int)(chunkLength - string2_begin);
-    str = (char*)lodepng_malloc(length + 1);
-    if(!str) CERROR_BREAK(error, 83); /*alloc fail*/
-
-    str[length] = 0;
-    for(i = 0; i != length; ++i) str[i] = (char)data[string2_begin + i];
-
-    error = lodepng_add_text(info, key, str);
-
-    break;
-  }
-
-  lodepng_free(key);
-  lodepng_free(str);
-
-  return error;
-}
-
-/*compressed text chunk (zTXt)*/
-static unsigned readChunk_zTXt(LodePNGInfo* info, const LodePNGDecompressSettings* zlibsettings,
-                               const unsigned char* data, size_t chunkLength)
-{
-  unsigned error = 0;
-  unsigned i;
-
-  unsigned length, string2_begin;
-  char *key = 0;
-  ucvector decoded;
-
-  ucvector_init(&decoded);
-
-  while(!error) /*not really a while loop, only used to break on error*/
-  {
-    for(length = 0; length < chunkLength && data[length] != 0; ++length) ;
-    if(length + 2 >= chunkLength) CERROR_BREAK(error, 75); /*no null termination, corrupt?*/
-    if(length < 1 || length > 79) CERROR_BREAK(error, 89); /*keyword too short or long*/
-
-    key = (char*)lodepng_malloc(length + 1);
-    if(!key) CERROR_BREAK(error, 83); /*alloc fail*/
-
-    key[length] = 0;
-    for(i = 0; i != length; ++i) key[i] = (char)data[i];
-
-    if(data[length + 1] != 0) CERROR_BREAK(error, 72); /*the 0 byte indicating compression must be 0*/
-
-    string2_begin = length + 2;
-    if(string2_begin > chunkLength) CERROR_BREAK(error, 75); /*no null termination, corrupt?*/
-
-    length = (unsigned int)(chunkLength - string2_begin);
-    /*will fail if zlib error, e.g. if length is too small*/
-    error = zlib_decompress(&decoded.data, &decoded.size,
-                            (unsigned char*)(&data[string2_begin]),
-                            length, zlibsettings);
-    if(error) break;
-    ucvector_push_back(&decoded, 0);
-
-    error = lodepng_add_text(info, key, (char*)decoded.data);
-
-    break;
-  }
-
-  lodepng_free(key);
-  ucvector_cleanup(&decoded);
-
-  return error;
-}
-
-/*international text chunk (iTXt)*/
-static unsigned readChunk_iTXt(LodePNGInfo* info, const LodePNGDecompressSettings* zlibsettings,
-                               const unsigned char* data, size_t chunkLength)
-{
-  unsigned error = 0;
-  unsigned i;
-
-  unsigned length, begin, compressed;
-  char *key = 0, *langtag = 0, *transkey = 0;
-  ucvector decoded;
-  ucvector_init(&decoded);
-
-  while(!error) /*not really a while loop, only used to break on error*/
-  {
-    /*Quick check if the chunk length isn't too small. Even without check
-    it'd still fail with other error checks below if it's too short. This just gives a different error code.*/
-    if(chunkLength < 5) CERROR_BREAK(error, 30); /*iTXt chunk too short*/
-
-    /*read the key*/
-    for(length = 0; length < chunkLength && data[length] != 0; ++length) ;
-    if(length + 3 >= chunkLength) CERROR_BREAK(error, 75); /*no null termination char, corrupt?*/
-    if(length < 1 || length > 79) CERROR_BREAK(error, 89); /*keyword too short or long*/
-
-    key = (char*)lodepng_malloc(length + 1);
-    if(!key) CERROR_BREAK(error, 83); /*alloc fail*/
-
-    key[length] = 0;
-    for(i = 0; i != length; ++i) key[i] = (char)data[i];
-
-    /*read the compression method*/
-    compressed = data[length + 1];
-    if(data[length + 2] != 0) CERROR_BREAK(error, 72); /*the 0 byte indicating compression must be 0*/
-
-    /*even though it's not allowed by the standard, no error is thrown if
-    there's no null termination char, if the text is empty for the next 3 texts*/
-
-    /*read the langtag*/
-    begin = length + 3;
-    length = 0;
-    for(i = begin; i < chunkLength && data[i] != 0; ++i) ++length;
-
-    langtag = (char*)lodepng_malloc(length + 1);
-    if(!langtag) CERROR_BREAK(error, 83); /*alloc fail*/
-
-    langtag[length] = 0;
-    for(i = 0; i != length; ++i) langtag[i] = (char)data[begin + i];
-
-    /*read the transkey*/
-    begin += length + 1;
-    length = 0;
-    for(i = begin; i < chunkLength && data[i] != 0; ++i) ++length;
-
-    transkey = (char*)lodepng_malloc(length + 1);
-    if(!transkey) CERROR_BREAK(error, 83); /*alloc fail*/
-
-    transkey[length] = 0;
-    for(i = 0; i != length; ++i) transkey[i] = (char)data[begin + i];
-
-    /*read the actual text*/
-    begin += length + 1;
-
-    length = chunkLength < begin ? 0 : (unsigned int)(chunkLength - begin);
-
-    if(compressed)
-    {
-      /*will fail if zlib error, e.g. if length is too small*/
-      error = zlib_decompress(&decoded.data, &decoded.size,
-                              (unsigned char*)(&data[begin]),
-                              length, zlibsettings);
-      if(error) break;
-      if(decoded.allocsize < decoded.size) decoded.allocsize = decoded.size;
-      ucvector_push_back(&decoded, 0);
-    }
-    else
-    {
-      if(!ucvector_resize(&decoded, length + 1)) CERROR_BREAK(error, 83 /*alloc fail*/);
-
-      decoded.data[length] = 0;
-      for(i = 0; i != length; ++i) decoded.data[i] = data[begin + i];
-    }
-
-    error = lodepng_add_itext(info, key, langtag, transkey, (char*)decoded.data);
-
-    break;
-  }
-
-  lodepng_free(key);
-  lodepng_free(langtag);
-  lodepng_free(transkey);
-  ucvector_cleanup(&decoded);
-
-  return error;
-}
-
-static unsigned readChunk_tIME(LodePNGInfo* info, const unsigned char* data, size_t chunkLength)
-{
-  if(chunkLength != 7) return 73; /*invalid tIME chunk size*/
-
-  info->time_defined = 1;
-  info->time.year = 256u * data[0] + data[1];
-  info->time.month = data[2];
-  info->time.day = data[3];
-  info->time.hour = data[4];
-  info->time.minute = data[5];
-  info->time.second = data[6];
-
-  return 0; /* OK */
-}
-
-static unsigned readChunk_pHYs(LodePNGInfo* info, const unsigned char* data, size_t chunkLength)
-{
-  if(chunkLength != 9) return 74; /*invalid pHYs chunk size*/
-
-  info->phys_defined = 1;
-  info->phys_x = 16777216u * data[0] + 65536u * data[1] + 256u * data[2] + data[3];
-  info->phys_y = 16777216u * data[4] + 65536u * data[5] + 256u * data[6] + data[7];
-  info->phys_unit = data[8];
-
-  return 0; /* OK */
-}
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-
-/*read a PNG, the result will be in the same color type as the PNG (hence "generic")*/
-static void decodeGeneric(unsigned char** out, unsigned* w, unsigned* h,
-                          LodePNGState* state,
-                          const unsigned char* in, size_t insize)
-{
-  unsigned char IEND = 0;
-  const unsigned char* chunk;
-  size_t i;
-  ucvector idat; /*the data from idat chunks*/
-  ucvector scanlines;
-  size_t predict;
-  size_t numpixels;
-
-  /*for unknown chunk order*/
-  unsigned unknown = 0;
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-  unsigned critical_pos = 1; /*1 = after IHDR, 2 = after PLTE, 3 = after IDAT*/
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-
-  /*provide some proper output values if error will happen*/
-  *out = 0;
-
-  state->error = lodepng_inspect(w, h, state, in, insize); /*reads header and resets other parameters in state->info_png*/
-  if(state->error) return;
-
-  numpixels = *w * *h;
-
-  /*multiplication overflow*/
-  if(*h != 0 && numpixels / *h != *w) CERROR_RETURN(state->error, 92);
-  /*multiplication overflow possible further below. Allows up to 2^31-1 pixel
-  bytes with 16-bit RGBA, the rest is room for filter bytes.*/
-  if(numpixels > 268435455) CERROR_RETURN(state->error, 92);
-
-  ucvector_init(&idat);
-  chunk = &in[33]; /*first byte of the first chunk after the header*/
-
-  /*loop through the chunks, ignoring unknown chunks and stopping at IEND chunk.
-  IDAT data is put at the start of the in buffer*/
-  while(!IEND && !state->error)
-  {
-    unsigned chunkLength;
-    const unsigned char* data; /*the data in the chunk*/
-
-    /*error: size of the in buffer too small to contain next chunk*/
-    if((size_t)((chunk - in) + 12) > insize || chunk < in) CERROR_BREAK(state->error, 30);
-
-    /*length of the data of the chunk, excluding the length bytes, chunk type and CRC bytes*/
-    chunkLength = lodepng_chunk_length(chunk);
-    /*error: chunk length larger than the max PNG chunk size*/
-    if(chunkLength > 2147483647) CERROR_BREAK(state->error, 63);
-
-    if((size_t)((chunk - in) + chunkLength + 12) > insize || (chunk + chunkLength + 12) < in)
-    {
-      CERROR_BREAK(state->error, 64); /*error: size of the in buffer too small to contain next chunk*/
-    }
-
-    data = lodepng_chunk_data_const(chunk);
-
-    /*IDAT chunk, containing compressed image data*/
-    if(lodepng_chunk_type_equals(chunk, "IDAT"))
-    {
-      size_t oldsize = idat.size;
-      if(!ucvector_resize(&idat, oldsize + chunkLength)) CERROR_BREAK(state->error, 83 /*alloc fail*/);
-      for(i = 0; i != chunkLength; ++i) idat.data[oldsize + i] = data[i];
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-      critical_pos = 3;
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-    }
-    /*IEND chunk*/
-    else if(lodepng_chunk_type_equals(chunk, "IEND"))
-    {
-      IEND = 1;
-    }
-    /*palette chunk (PLTE)*/
-    else if(lodepng_chunk_type_equals(chunk, "PLTE"))
-    {
-      state->error = readChunk_PLTE(&state->info_png.color, data, chunkLength);
-      if(state->error) break;
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-      critical_pos = 2;
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-    }
-    /*palette transparency chunk (tRNS)*/
-    else if(lodepng_chunk_type_equals(chunk, "tRNS"))
-    {
-      state->error = readChunk_tRNS(&state->info_png.color, data, chunkLength);
-      if(state->error) break;
-    }
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-    /*background color chunk (bKGD)*/
-    else if(lodepng_chunk_type_equals(chunk, "bKGD"))
-    {
-      state->error = readChunk_bKGD(&state->info_png, data, chunkLength);
-      if(state->error) break;
-    }
-    /*text chunk (tEXt)*/
-    else if(lodepng_chunk_type_equals(chunk, "tEXt"))
-    {
-      if(state->decoder.read_text_chunks)
-      {
-        state->error = readChunk_tEXt(&state->info_png, data, chunkLength);
-        if(state->error) break;
-      }
-    }
-    /*compressed text chunk (zTXt)*/
-    else if(lodepng_chunk_type_equals(chunk, "zTXt"))
-    {
-      if(state->decoder.read_text_chunks)
-      {
-        state->error = readChunk_zTXt(&state->info_png, &state->decoder.zlibsettings, data, chunkLength);
-        if(state->error) break;
-      }
-    }
-    /*international text chunk (iTXt)*/
-    else if(lodepng_chunk_type_equals(chunk, "iTXt"))
-    {
-      if(state->decoder.read_text_chunks)
-      {
-        state->error = readChunk_iTXt(&state->info_png, &state->decoder.zlibsettings, data, chunkLength);
-        if(state->error) break;
-      }
-    }
-    else if(lodepng_chunk_type_equals(chunk, "tIME"))
-    {
-      state->error = readChunk_tIME(&state->info_png, data, chunkLength);
-      if(state->error) break;
-    }
-    else if(lodepng_chunk_type_equals(chunk, "pHYs"))
-    {
-      state->error = readChunk_pHYs(&state->info_png, data, chunkLength);
-      if(state->error) break;
-    }
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-    else /*it's not an implemented chunk type, so ignore it: skip over the data*/
-    {
-      /*error: unknown critical chunk (5th bit of first byte of chunk type is 0)*/
-      if(!lodepng_chunk_ancillary(chunk)) CERROR_BREAK(state->error, 69);
-
-      unknown = 1;
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-      if(state->decoder.remember_unknown_chunks)
-      {
-        state->error = lodepng_chunk_append(&state->info_png.unknown_chunks_data[critical_pos - 1],
-                                            &state->info_png.unknown_chunks_size[critical_pos - 1], chunk);
-        if(state->error) break;
-      }
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-    }
-
-    if(!state->decoder.ignore_crc && !unknown) /*check CRC if wanted, only on known chunk types*/
-    {
-      if(lodepng_chunk_check_crc(chunk)) CERROR_BREAK(state->error, 57); /*invalid CRC*/
-    }
-
-    if(!IEND) chunk = lodepng_chunk_next_const(chunk);
-  }
-
-  ucvector_init(&scanlines);
-  /*predict output size, to allocate exact size for output buffer to avoid more dynamic allocation.
-  If the decompressed size does not match the prediction, the image must be corrupt.*/
-  if(state->info_png.interlace_method == 0)
-  {
-    /*The extra *h is added because this are the filter bytes every scanline starts with*/
-    predict = lodepng_get_raw_size_idat(*w, *h, &state->info_png.color) + *h;
-  }
-  else
-  {
-    /*Adam-7 interlaced: predicted size is the sum of the 7 sub-images sizes*/
-    const LodePNGColorMode* color = &state->info_png.color;
-    predict = 0;
-    predict += lodepng_get_raw_size_idat((*w + 7) >> 3, (*h + 7) >> 3, color) + ((*h + 7) >> 3);
-    if(*w > 4) predict += lodepng_get_raw_size_idat((*w + 3) >> 3, (*h + 7) >> 3, color) + ((*h + 7) >> 3);
-    predict += lodepng_get_raw_size_idat((*w + 3) >> 2, (*h + 3) >> 3, color) + ((*h + 3) >> 3);
-    if(*w > 2) predict += lodepng_get_raw_size_idat((*w + 1) >> 2, (*h + 3) >> 2, color) + ((*h + 3) >> 2);
-    predict += lodepng_get_raw_size_idat((*w + 1) >> 1, (*h + 1) >> 2, color) + ((*h + 1) >> 2);
-    if(*w > 1) predict += lodepng_get_raw_size_idat((*w + 0) >> 1, (*h + 1) >> 1, color) + ((*h + 1) >> 1);
-    predict += lodepng_get_raw_size_idat((*w + 0), (*h + 0) >> 1, color) + ((*h + 0) >> 1);
-  }
-  if(!state->error && !ucvector_reserve(&scanlines, predict)) state->error = 83; /*alloc fail*/
-  if(!state->error)
-  {
-    state->error = zlib_decompress(&scanlines.data, &scanlines.size, idat.data,
-                                   idat.size, &state->decoder.zlibsettings);
-    if(!state->error && scanlines.size != predict) state->error = 91; /*decompressed size doesn't match prediction*/
-  }
-  ucvector_cleanup(&idat);
-
-  if(!state->error)
-  {
-    size_t outsize = lodepng_get_raw_size(*w, *h, &state->info_png.color);
-    *out = (unsigned char*)lodepng_malloc(outsize);
-    if(!*out) state->error = 83; /*alloc fail*/
-    for(i = 0; i < outsize; i++) (*out)[i] = 0;
-    if(!state->error) state->error = postProcessScanlines(*out, scanlines.data, *w, *h, &state->info_png);
-  }
-  ucvector_cleanup(&scanlines);
-}
-
-unsigned lodepng_decode(unsigned char** out, unsigned* w, unsigned* h,
-                        LodePNGState* state,
-                        const unsigned char* in, size_t insize)
-{
-  *out = 0;
-  decodeGeneric(out, w, h, state, in, insize);
-  if(state->error) return state->error;
-  if(!state->decoder.color_convert || lodepng_color_mode_equal(&state->info_raw, &state->info_png.color))
-  {
-    /*same color type, no copying or converting of data needed*/
-    /*store the info_png color settings on the info_raw so that the info_raw still reflects what colortype
-    the raw image has to the end user*/
-    if(!state->decoder.color_convert)
-    {
-      state->error = lodepng_color_mode_copy(&state->info_raw, &state->info_png.color);
-      if(state->error) return state->error;
-    }
-  }
-  else
-  {
-    /*color conversion needed; sort of copy of the data*/
-    unsigned char* data = *out;
-    size_t outsize;
-
-    /*TODO: check if this works according to the statement in the documentation: "The converter can convert
-    from greyscale input color type, to 8-bit greyscale or greyscale with alpha"*/
-    if(!(state->info_raw.colortype == LCT_RGB || state->info_raw.colortype == LCT_RGBA)
-       && !(state->info_raw.bitdepth == 8))
-    {
-      return 56; /*unsupported color mode conversion*/
-    }
-
-    outsize = lodepng_get_raw_size(*w, *h, &state->info_raw);
-    *out = (unsigned char*)lodepng_malloc(outsize);
-    if(!(*out))
-    {
-      state->error = 83; /*alloc fail*/
-    }
-    else state->error = lodepng_convert(*out, data, &state->info_raw,
-                                        &state->info_png.color, *w, *h);
-    lodepng_free(data);
-  }
-  return state->error;
-}
-
-unsigned lodepng_decode_memory(unsigned char** out, unsigned* w, unsigned* h, const unsigned char* in,
-                               size_t insize, LodePNGColorType colortype, unsigned bitdepth)
-{
-  unsigned error;
-  LodePNGState state;
-  lodepng_state_init(&state);
-  state.info_raw.colortype = colortype;
-  state.info_raw.bitdepth = bitdepth;
-  error = lodepng_decode(out, w, h, &state, in, insize);
-  lodepng_state_cleanup(&state);
-  return error;
-}
-
-unsigned lodepng_decode32(unsigned char** out, unsigned* w, unsigned* h, const unsigned char* in, size_t insize)
-{
-  return lodepng_decode_memory(out, w, h, in, insize, LCT_RGBA, 8);
-}
-
-unsigned lodepng_decode24(unsigned char** out, unsigned* w, unsigned* h, const unsigned char* in, size_t insize)
-{
-  return lodepng_decode_memory(out, w, h, in, insize, LCT_RGB, 8);
-}
-
-#ifdef LODEPNG_COMPILE_DISK
-unsigned lodepng_decode_file(unsigned char** out, unsigned* w, unsigned* h, const char* filename,
-                             LodePNGColorType colortype, unsigned bitdepth)
-{
-  unsigned char* buffer;
-  size_t buffersize;
-  unsigned error;
-  error = lodepng_load_file(&buffer, &buffersize, filename);
-  if(!error) error = lodepng_decode_memory(out, w, h, buffer, buffersize, colortype, bitdepth);
-  lodepng_free(buffer);
-  return error;
-}
-
-unsigned lodepng_decode32_file(unsigned char** out, unsigned* w, unsigned* h, const char* filename)
-{
-  return lodepng_decode_file(out, w, h, filename, LCT_RGBA, 8);
-}
-
-unsigned lodepng_decode24_file(unsigned char** out, unsigned* w, unsigned* h, const char* filename)
-{
-  return lodepng_decode_file(out, w, h, filename, LCT_RGB, 8);
-}
-#endif /*LODEPNG_COMPILE_DISK*/
-
-void lodepng_decoder_settings_init(LodePNGDecoderSettings* settings)
-{
-  settings->color_convert = 1;
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-  settings->read_text_chunks = 1;
-  settings->remember_unknown_chunks = 0;
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-  settings->ignore_crc = 0;
-  lodepng_decompress_settings_init(&settings->zlibsettings);
-}
-
-#endif /*LODEPNG_COMPILE_DECODER*/
-
-#if defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER)
-
-void lodepng_state_init(LodePNGState* state)
-{
-#ifdef LODEPNG_COMPILE_DECODER
-  lodepng_decoder_settings_init(&state->decoder);
-#endif /*LODEPNG_COMPILE_DECODER*/
-#ifdef LODEPNG_COMPILE_ENCODER
-  lodepng_encoder_settings_init(&state->encoder);
-#endif /*LODEPNG_COMPILE_ENCODER*/
-  lodepng_color_mode_init(&state->info_raw);
-  lodepng_info_init(&state->info_png);
-  state->error = 1;
-}
-
-void lodepng_state_cleanup(LodePNGState* state)
-{
-  lodepng_color_mode_cleanup(&state->info_raw);
-  lodepng_info_cleanup(&state->info_png);
-}
-
-void lodepng_state_copy(LodePNGState* dest, const LodePNGState* source)
-{
-  lodepng_state_cleanup(dest);
-  *dest = *source;
-  lodepng_color_mode_init(&dest->info_raw);
-  lodepng_info_init(&dest->info_png);
-  dest->error = lodepng_color_mode_copy(&dest->info_raw, &source->info_raw); if(dest->error) return;
-  dest->error = lodepng_info_copy(&dest->info_png, &source->info_png); if(dest->error) return;
-}
-
-#endif /* defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER) */
-
-#ifdef LODEPNG_COMPILE_ENCODER
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* / PNG Encoder                                                            / */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-/*chunkName must be string of 4 characters*/
-static unsigned addChunk(ucvector* out, const char* chunkName, const unsigned char* data, size_t length)
-{
-  CERROR_TRY_RETURN(lodepng_chunk_create(&out->data, &out->size, (unsigned)length, chunkName, data));
-  out->allocsize = out->size; /*fix the allocsize again*/
-  return 0;
-}
-
-static void writeSignature(ucvector* out)
-{
-  /*8 bytes PNG signature, aka the magic bytes*/
-  ucvector_push_back(out, 137);
-  ucvector_push_back(out, 80);
-  ucvector_push_back(out, 78);
-  ucvector_push_back(out, 71);
-  ucvector_push_back(out, 13);
-  ucvector_push_back(out, 10);
-  ucvector_push_back(out, 26);
-  ucvector_push_back(out, 10);
-}
-
-static unsigned addChunk_IHDR(ucvector* out, unsigned w, unsigned h,
-                              LodePNGColorType colortype, unsigned bitdepth, unsigned interlace_method)
-{
-  unsigned error = 0;
-  ucvector header;
-  ucvector_init(&header);
-
-  lodepng_add32bitInt(&header, w); /*width*/
-  lodepng_add32bitInt(&header, h); /*height*/
-  ucvector_push_back(&header, (unsigned char)bitdepth); /*bit depth*/
-  ucvector_push_back(&header, (unsigned char)colortype); /*color type*/
-  ucvector_push_back(&header, 0); /*compression method*/
-  ucvector_push_back(&header, 0); /*filter method*/
-  ucvector_push_back(&header, interlace_method); /*interlace method*/
-
-  error = addChunk(out, "IHDR", header.data, header.size);
-  ucvector_cleanup(&header);
-
-  return error;
-}
-
-static unsigned addChunk_PLTE(ucvector* out, const LodePNGColorMode* info)
-{
-  unsigned error = 0;
-  size_t i;
-  ucvector PLTE;
-  ucvector_init(&PLTE);
-  for(i = 0; i != info->palettesize * 4; ++i)
-  {
-    /*add all channels except alpha channel*/
-    if(i % 4 != 3) ucvector_push_back(&PLTE, info->palette[i]);
-  }
-  error = addChunk(out, "PLTE", PLTE.data, PLTE.size);
-  ucvector_cleanup(&PLTE);
-
-  return error;
-}
-
-static unsigned addChunk_tRNS(ucvector* out, const LodePNGColorMode* info)
-{
-  unsigned error = 0;
-  size_t i;
-  ucvector tRNS;
-  ucvector_init(&tRNS);
-  if(info->colortype == LCT_PALETTE)
-  {
-    size_t amount = info->palettesize;
-    /*the tail of palette values that all have 255 as alpha, does not have to be encoded*/
-    for(i = info->palettesize; i != 0; --i)
-    {
-      if(info->palette[4 * (i - 1) + 3] == 255) --amount;
-      else break;
-    }
-    /*add only alpha channel*/
-    for(i = 0; i != amount; ++i) ucvector_push_back(&tRNS, info->palette[4 * i + 3]);
-  }
-  else if(info->colortype == LCT_GREY)
-  {
-    if(info->key_defined)
-    {
-      ucvector_push_back(&tRNS, (unsigned char)(info->key_r >> 8));
-      ucvector_push_back(&tRNS, (unsigned char)(info->key_r & 255));
-    }
-  }
-  else if(info->colortype == LCT_RGB)
-  {
-    if(info->key_defined)
-    {
-      ucvector_push_back(&tRNS, (unsigned char)(info->key_r >> 8));
-      ucvector_push_back(&tRNS, (unsigned char)(info->key_r & 255));
-      ucvector_push_back(&tRNS, (unsigned char)(info->key_g >> 8));
-      ucvector_push_back(&tRNS, (unsigned char)(info->key_g & 255));
-      ucvector_push_back(&tRNS, (unsigned char)(info->key_b >> 8));
-      ucvector_push_back(&tRNS, (unsigned char)(info->key_b & 255));
-    }
-  }
-
-  error = addChunk(out, "tRNS", tRNS.data, tRNS.size);
-  ucvector_cleanup(&tRNS);
-
-  return error;
-}
-
-static unsigned addChunk_IDAT(ucvector* out, const unsigned char* data, size_t datasize,
-                              LodePNGCompressSettings* zlibsettings)
-{
-  ucvector zlibdata;
-  unsigned error = 0;
-
-  /*compress with the Zlib compressor*/
-  ucvector_init(&zlibdata);
-  error = zlib_compress(&zlibdata.data, &zlibdata.size, data, datasize, zlibsettings);
-  if(!error) error = addChunk(out, "IDAT", zlibdata.data, zlibdata.size);
-  ucvector_cleanup(&zlibdata);
-
-  return error;
-}
-
-static unsigned addChunk_IEND(ucvector* out)
-{
-  unsigned error = 0;
-  error = addChunk(out, "IEND", 0, 0);
-  return error;
-}
-
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-
-static unsigned addChunk_tEXt(ucvector* out, const char* keyword, const char* textstring)
-{
-  unsigned error = 0;
-  size_t i;
-  ucvector text;
-  ucvector_init(&text);
-  for(i = 0; keyword[i] != 0; ++i) ucvector_push_back(&text, (unsigned char)keyword[i]);
-  if(i < 1 || i > 79) return 89; /*error: invalid keyword size*/
-  ucvector_push_back(&text, 0); /*0 termination char*/
-  for(i = 0; textstring[i] != 0; ++i) ucvector_push_back(&text, (unsigned char)textstring[i]);
-  error = addChunk(out, "tEXt", text.data, text.size);
-  ucvector_cleanup(&text);
-
-  return error;
-}
-
-static unsigned addChunk_zTXt(ucvector* out, const char* keyword, const char* textstring,
-                              LodePNGCompressSettings* zlibsettings)
-{
-  unsigned error = 0;
-  ucvector data, compressed;
-  size_t i, textsize = strlen(textstring);
-
-  ucvector_init(&data);
-  ucvector_init(&compressed);
-  for(i = 0; keyword[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)keyword[i]);
-  if(i < 1 || i > 79) return 89; /*error: invalid keyword size*/
-  ucvector_push_back(&data, 0); /*0 termination char*/
-  ucvector_push_back(&data, 0); /*compression method: 0*/
-
-  error = zlib_compress(&compressed.data, &compressed.size,
-                        (unsigned char*)textstring, textsize, zlibsettings);
-  if(!error)
-  {
-    for(i = 0; i != compressed.size; ++i) ucvector_push_back(&data, compressed.data[i]);
-    error = addChunk(out, "zTXt", data.data, data.size);
-  }
-
-  ucvector_cleanup(&compressed);
-  ucvector_cleanup(&data);
-  return error;
-}
-
-static unsigned addChunk_iTXt(ucvector* out, unsigned compressed, const char* keyword, const char* langtag,
-                              const char* transkey, const char* textstring, LodePNGCompressSettings* zlibsettings)
-{
-  unsigned error = 0;
-  ucvector data;
-  size_t i, textsize = strlen(textstring);
-
-  ucvector_init(&data);
-
-  for(i = 0; keyword[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)keyword[i]);
-  if(i < 1 || i > 79) return 89; /*error: invalid keyword size*/
-  ucvector_push_back(&data, 0); /*null termination char*/
-  ucvector_push_back(&data, compressed ? 1 : 0); /*compression flag*/
-  ucvector_push_back(&data, 0); /*compression method*/
-  for(i = 0; langtag[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)langtag[i]);
-  ucvector_push_back(&data, 0); /*null termination char*/
-  for(i = 0; transkey[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)transkey[i]);
-  ucvector_push_back(&data, 0); /*null termination char*/
-
-  if(compressed)
-  {
-    ucvector compressed_data;
-    ucvector_init(&compressed_data);
-    error = zlib_compress(&compressed_data.data, &compressed_data.size,
-                          (unsigned char*)textstring, textsize, zlibsettings);
-    if(!error)
-    {
-      for(i = 0; i != compressed_data.size; ++i) ucvector_push_back(&data, compressed_data.data[i]);
-    }
-    ucvector_cleanup(&compressed_data);
-  }
-  else /*not compressed*/
-  {
-    for(i = 0; textstring[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)textstring[i]);
-  }
-
-  if(!error) error = addChunk(out, "iTXt", data.data, data.size);
-  ucvector_cleanup(&data);
-  return error;
-}
-
-static unsigned addChunk_bKGD(ucvector* out, const LodePNGInfo* info)
-{
-  unsigned error = 0;
-  ucvector bKGD;
-  ucvector_init(&bKGD);
-  if(info->color.colortype == LCT_GREY || info->color.colortype == LCT_GREY_ALPHA)
-  {
-    ucvector_push_back(&bKGD, (unsigned char)(info->background_r >> 8));
-    ucvector_push_back(&bKGD, (unsigned char)(info->background_r & 255));
-  }
-  else if(info->color.colortype == LCT_RGB || info->color.colortype == LCT_RGBA)
-  {
-    ucvector_push_back(&bKGD, (unsigned char)(info->background_r >> 8));
-    ucvector_push_back(&bKGD, (unsigned char)(info->background_r & 255));
-    ucvector_push_back(&bKGD, (unsigned char)(info->background_g >> 8));
-    ucvector_push_back(&bKGD, (unsigned char)(info->background_g & 255));
-    ucvector_push_back(&bKGD, (unsigned char)(info->background_b >> 8));
-    ucvector_push_back(&bKGD, (unsigned char)(info->background_b & 255));
-  }
-  else if(info->color.colortype == LCT_PALETTE)
-  {
-    ucvector_push_back(&bKGD, (unsigned char)(info->background_r & 255)); /*palette index*/
-  }
-
-  error = addChunk(out, "bKGD", bKGD.data, bKGD.size);
-  ucvector_cleanup(&bKGD);
-
-  return error;
-}
-
-static unsigned addChunk_tIME(ucvector* out, const LodePNGTime* time)
-{
-  unsigned error = 0;
-  unsigned char* data = (unsigned char*)lodepng_malloc(7);
-  if(!data) return 83; /*alloc fail*/
-  data[0] = (unsigned char)(time->year >> 8);
-  data[1] = (unsigned char)(time->year & 255);
-  data[2] = (unsigned char)time->month;
-  data[3] = (unsigned char)time->day;
-  data[4] = (unsigned char)time->hour;
-  data[5] = (unsigned char)time->minute;
-  data[6] = (unsigned char)time->second;
-  error = addChunk(out, "tIME", data, 7);
-  lodepng_free(data);
-  return error;
-}
-
-static unsigned addChunk_pHYs(ucvector* out, const LodePNGInfo* info)
-{
-  unsigned error = 0;
-  ucvector data;
-  ucvector_init(&data);
-
-  lodepng_add32bitInt(&data, info->phys_x);
-  lodepng_add32bitInt(&data, info->phys_y);
-  ucvector_push_back(&data, info->phys_unit);
-
-  error = addChunk(out, "pHYs", data.data, data.size);
-  ucvector_cleanup(&data);
-
-  return error;
-}
-
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-
-static void filterScanline(unsigned char* out, const unsigned char* scanline, const unsigned char* prevline,
-                           size_t length, size_t bytewidth, unsigned char filterType)
-{
-  size_t i;
-  switch(filterType)
-  {
-    case 0: /*None*/
-      for(i = 0; i != length; ++i) out[i] = scanline[i];
-      break;
-    case 1: /*Sub*/
-      for(i = 0; i != bytewidth; ++i) out[i] = scanline[i];
-      for(i = bytewidth; i < length; ++i) out[i] = scanline[i] - scanline[i - bytewidth];
-      break;
-    case 2: /*Up*/
-      if(prevline)
-      {
-        for(i = 0; i != length; ++i) out[i] = scanline[i] - prevline[i];
-      }
-      else
-      {
-        for(i = 0; i != length; ++i) out[i] = scanline[i];
-      }
-      break;
-    case 3: /*Average*/
-      if(prevline)
-      {
-        for(i = 0; i != bytewidth; ++i) out[i] = scanline[i] - (prevline[i] >> 1);
-        for(i = bytewidth; i < length; ++i) out[i] = scanline[i] - ((scanline[i - bytewidth] + prevline[i]) >> 1);
-      }
-      else
-      {
-        for(i = 0; i != bytewidth; ++i) out[i] = scanline[i];
-        for(i = bytewidth; i < length; ++i) out[i] = scanline[i] - (scanline[i - bytewidth] >> 1);
-      }
-      break;
-    case 4: /*Paeth*/
-      if(prevline)
-      {
-        /*paethPredictor(0, prevline[i], 0) is always prevline[i]*/
-        for(i = 0; i != bytewidth; ++i) out[i] = (scanline[i] - prevline[i]);
-        for(i = bytewidth; i < length; ++i)
-        {
-          out[i] = (scanline[i] - paethPredictor(scanline[i - bytewidth], prevline[i], prevline[i - bytewidth]));
-        }
-      }
-      else
-      {
-        for(i = 0; i != bytewidth; ++i) out[i] = scanline[i];
-        /*paethPredictor(scanline[i - bytewidth], 0, 0) is always scanline[i - bytewidth]*/
-        for(i = bytewidth; i < length; ++i) out[i] = (scanline[i] - scanline[i - bytewidth]);
-      }
-      break;
-    default: return; /*unexisting filter type given*/
-  }
-}
-
-/* log2 approximation. A slight bit faster than std::log. */
-static float flog2(float f)
-{
-  float result = 0;
-  while(f > 32) { result += 4; f /= 16; }
-  while(f > 2) { ++result; f /= 2; }
-  return result + 1.442695f * (f * f * f / 3 - 3 * f * f / 2 + 3 * f - 1.83333f);
-}
-
-static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, unsigned h,
-                       const LodePNGColorMode* info, const LodePNGEncoderSettings* settings)
-{
-  /*
-  For PNG filter method 0
-  out must be a buffer with as size: h + (w * h * bpp + 7) / 8, because there are
-  the scanlines with 1 extra byte per scanline
-  */
-
-  unsigned bpp = lodepng_get_bpp(info);
-  /*the width of a scanline in bytes, not including the filter type*/
-  size_t linebytes = (w * bpp + 7) / 8;
-  /*bytewidth is used for filtering, is 1 when bpp < 8, number of bytes per pixel otherwise*/
-  size_t bytewidth = (bpp + 7) / 8;
-  const unsigned char* prevline = 0;
-  unsigned x, y;
-  unsigned error = 0;
-  LodePNGFilterStrategy strategy = settings->filter_strategy;
-
-  /*
-  There is a heuristic called the minimum sum of absolute differences heuristic, suggested by the PNG standard:
-   *  If the image type is Palette, or the bit depth is smaller than 8, then do not filter the image (i.e.
-      use fixed filtering, with the filter None).
-   * (The other case) If the image type is Grayscale or RGB (with or without Alpha), and the bit depth is
-     not smaller than 8, then use adaptive filtering heuristic as follows: independently for each row, apply
-     all five filters and select the filter that produces the smallest sum of absolute values per row.
-  This heuristic is used if filter strategy is LFS_MINSUM and filter_palette_zero is true.
-
-  If filter_palette_zero is true and filter_strategy is not LFS_MINSUM, the above heuristic is followed,
-  but for "the other case", whatever strategy filter_strategy is set to instead of the minimum sum
-  heuristic is used.
-  */
-  if(settings->filter_palette_zero &&
-     (info->colortype == LCT_PALETTE || info->bitdepth < 8)) strategy = LFS_ZERO;
-
-  if(bpp == 0) return 31; /*error: invalid color type*/
-
-  if(strategy == LFS_ZERO)
-  {
-    for(y = 0; y != h; ++y)
-    {
-      size_t outindex = (1 + linebytes) * y; /*the extra filterbyte added to each row*/
-      size_t inindex = linebytes * y;
-      out[outindex] = 0; /*filter type byte*/
-      filterScanline(&out[outindex + 1], &in[inindex], prevline, linebytes, bytewidth, 0);
-      prevline = &in[inindex];
-    }
-  }
-  else if(strategy == LFS_MINSUM)
-  {
-    /*adaptive filtering*/
-    size_t sum[5];
-    unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/
-    size_t smallest = 0;
-    unsigned char type, bestType = 0;
-
-    for(type = 0; type != 5; ++type)
-    {
-      attempt[type] = (unsigned char*)lodepng_malloc(linebytes);
-      if(!attempt[type]) return 83; /*alloc fail*/
-    }
-
-    if(!error)
-    {
-      for(y = 0; y != h; ++y)
-      {
-        /*try the 5 filter types*/
-        for(type = 0; type != 5; ++type)
-        {
-          filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type);
-
-          /*calculate the sum of the result*/
-          sum[type] = 0;
-          if(type == 0)
-          {
-            for(x = 0; x != linebytes; ++x) sum[type] += (unsigned char)(attempt[type][x]);
-          }
-          else
-          {
-            for(x = 0; x != linebytes; ++x)
-            {
-              /*For differences, each byte should be treated as signed, values above 127 are negative
-              (converted to signed char). Filtertype 0 isn't a difference though, so use unsigned there.
-              This means filtertype 0 is almost never chosen, but that is justified.*/
-              unsigned char s = attempt[type][x];
-              sum[type] += s < 128 ? s : (255U - s);
-            }
-          }
-
-          /*check if this is smallest sum (or if type == 0 it's the first case so always store the values)*/
-          if(type == 0 || sum[type] < smallest)
-          {
-            bestType = type;
-            smallest = sum[type];
-          }
-        }
-
-        prevline = &in[y * linebytes];
-
-        /*now fill the out values*/
-        out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/
-        for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x];
-      }
-    }
-
-    for(type = 0; type != 5; ++type) lodepng_free(attempt[type]);
-  }
-  else if(strategy == LFS_ENTROPY)
-  {
-    float sum[5];
-    unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/
-    float smallest = 0;
-    unsigned type, bestType = 0;
-    unsigned count[256];
-
-    for(type = 0; type != 5; ++type)
-    {
-      attempt[type] = (unsigned char*)lodepng_malloc(linebytes);
-      if(!attempt[type]) return 83; /*alloc fail*/
-    }
-
-    for(y = 0; y != h; ++y)
-    {
-      /*try the 5 filter types*/
-      for(type = 0; type != 5; ++type)
-      {
-        filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type);
-        for(x = 0; x != 256; ++x) count[x] = 0;
-        for(x = 0; x != linebytes; ++x) ++count[attempt[type][x]];
-        ++count[type]; /*the filter type itself is part of the scanline*/
-        sum[type] = 0;
-        for(x = 0; x != 256; ++x)
-        {
-          float p = count[x] / (float)(linebytes + 1);
-          sum[type] += count[x] == 0 ? 0 : flog2(1 / p) * p;
-        }
-        /*check if this is smallest sum (or if type == 0 it's the first case so always store the values)*/
-        if(type == 0 || sum[type] < smallest)
-        {
-          bestType = type;
-          smallest = sum[type];
-        }
-      }
-
-      prevline = &in[y * linebytes];
-
-      /*now fill the out values*/
-      out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/
-      for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x];
-    }
-
-    for(type = 0; type != 5; ++type) lodepng_free(attempt[type]);
-  }
-  else if(strategy == LFS_PREDEFINED)
-  {
-    for(y = 0; y != h; ++y)
-    {
-      size_t outindex = (1 + linebytes) * y; /*the extra filterbyte added to each row*/
-      size_t inindex = linebytes * y;
-      unsigned char type = settings->predefined_filters[y];
-      out[outindex] = type; /*filter type byte*/
-      filterScanline(&out[outindex + 1], &in[inindex], prevline, linebytes, bytewidth, type);
-      prevline = &in[inindex];
-    }
-  }
-  else if(strategy == LFS_BRUTE_FORCE)
-  {
-    /*brute force filter chooser.
-    deflate the scanline after every filter attempt to see which one deflates best.
-    This is very slow and gives only slightly smaller, sometimes even larger, result*/
-    size_t size[5];
-    unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/
-    size_t smallest = 0;
-    unsigned type = 0, bestType = 0;
-    unsigned char* dummy;
-    LodePNGCompressSettings zlibsettings = settings->zlibsettings;
-    /*use fixed tree on the attempts so that the tree is not adapted to the filtertype on purpose,
-    to simulate the true case where the tree is the same for the whole image. Sometimes it gives
-    better result with dynamic tree anyway. Using the fixed tree sometimes gives worse, but in rare
-    cases better compression. It does make this a bit less slow, so it's worth doing this.*/
-    zlibsettings.btype = 1;
-    /*a custom encoder likely doesn't read the btype setting and is optimized for complete PNG
-    images only, so disable it*/
-    zlibsettings.custom_zlib = 0;
-    zlibsettings.custom_deflate = 0;
-    for(type = 0; type != 5; ++type)
-    {
-      attempt[type] = (unsigned char*)lodepng_malloc(linebytes);
-      if(!attempt[type]) return 83; /*alloc fail*/
-    }
-    for(y = 0; y != h; ++y) /*try the 5 filter types*/
-    {
-      for(type = 0; type != 5; ++type)
-      {
-        size_t testsize = linebytes;
-        /*if(testsize > 8) testsize /= 8;*/ /*it already works good enough by testing a part of the row*/
-
-        filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type);
-        size[type] = 0;
-        dummy = 0;
-        zlib_compress(&dummy, &size[type], attempt[type], testsize, &zlibsettings);
-        lodepng_free(dummy);
-        /*check if this is smallest size (or if type == 0 it's the first case so always store the values)*/
-        if(type == 0 || size[type] < smallest)
-        {
-          bestType = type;
-          smallest = size[type];
-        }
-      }
-      prevline = &in[y * linebytes];
-      out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/
-      for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x];
-    }
-    for(type = 0; type != 5; ++type) free(attempt[type]);
-  }
-  else return 88; /* unknown filter strategy */
-
-  return error;
-}
-
-static void addPaddingBits(unsigned char* out, const unsigned char* in,
-                           size_t olinebits, size_t ilinebits, unsigned h)
-{
-  /*The opposite of the removePaddingBits function
-  olinebits must be >= ilinebits*/
-  unsigned y;
-  size_t diff = olinebits - ilinebits;
-  size_t obp = 0, ibp = 0; /*bit pointers*/
-  for(y = 0; y != h; ++y)
-  {
-    size_t x;
-    for(x = 0; x < ilinebits; ++x)
-    {
-      unsigned char bit = readBitFromReversedStream(&ibp, in);
-      setBitOfReversedStream(&obp, out, bit);
-    }
-    /*obp += diff; --> no, fill in some value in the padding bits too, to avoid
-    "Use of uninitialised value of size ###" warning from valgrind*/
-    for(x = 0; x != diff; ++x) setBitOfReversedStream(&obp, out, 0);
-  }
-}
-
-/*
-in: non-interlaced image with size w*h
-out: the same pixels, but re-ordered according to PNG's Adam7 interlacing, with
- no padding bits between scanlines, but between reduced images so that each
- reduced image starts at a byte.
-bpp: bits per pixel
-there are no padding bits, not between scanlines, not between reduced images
-in has the following size in bits: w * h * bpp.
-out is possibly bigger due to padding bits between reduced images
-NOTE: comments about padding bits are only relevant if bpp < 8
-*/
-static void Adam7_interlace(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, unsigned bpp)
-{
-  unsigned passw[7], passh[7];
-  size_t filter_passstart[8], padded_passstart[8], passstart[8];
-  unsigned i;
-
-  Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp);
-
-  if(bpp >= 8)
-  {
-    for(i = 0; i != 7; ++i)
-    {
-      unsigned x, y, b;
-      size_t bytewidth = bpp / 8;
-      for(y = 0; y < passh[i]; ++y)
-      for(x = 0; x < passw[i]; ++x)
-      {
-        size_t pixelinstart = ((ADAM7_IY[i] + y * ADAM7_DY[i]) * w + ADAM7_IX[i] + x * ADAM7_DX[i]) * bytewidth;
-        size_t pixeloutstart = passstart[i] + (y * passw[i] + x) * bytewidth;
-        for(b = 0; b < bytewidth; ++b)
-        {
-          out[pixeloutstart + b] = in[pixelinstart + b];
-        }
-      }
-    }
-  }
-  else /*bpp < 8: Adam7 with pixels < 8 bit is a bit trickier: with bit pointers*/
-  {
-    for(i = 0; i != 7; ++i)
-    {
-      unsigned x, y, b;
-      unsigned ilinebits = bpp * passw[i];
-      unsigned olinebits = bpp * w;
-      size_t obp, ibp; /*bit pointers (for out and in buffer)*/
-      for(y = 0; y < passh[i]; ++y)
-      for(x = 0; x < passw[i]; ++x)
-      {
-        ibp = (ADAM7_IY[i] + y * ADAM7_DY[i]) * olinebits + (ADAM7_IX[i] + x * ADAM7_DX[i]) * bpp;
-        obp = (8 * passstart[i]) + (y * ilinebits + x * bpp);
-        for(b = 0; b < bpp; ++b)
-        {
-          unsigned char bit = readBitFromReversedStream(&ibp, in);
-          setBitOfReversedStream(&obp, out, bit);
-        }
-      }
-    }
-  }
-}
-
-/*out must be buffer big enough to contain uncompressed IDAT chunk data, and in must contain the full image.
-return value is error**/
-static unsigned preProcessScanlines(unsigned char** out, size_t* outsize, const unsigned char* in,
-                                    unsigned w, unsigned h,
-                                    const LodePNGInfo* info_png, const LodePNGEncoderSettings* settings)
-{
-  /*
-  This function converts the pure 2D image with the PNG's colortype, into filtered-padded-interlaced data. Steps:
-  *) if no Adam7: 1) add padding bits (= posible extra bits per scanline if bpp < 8) 2) filter
-  *) if adam7: 1) Adam7_interlace 2) 7x add padding bits 3) 7x filter
-  */
-  unsigned bpp = lodepng_get_bpp(&info_png->color);
-  unsigned error = 0;
-
-  if(info_png->interlace_method == 0)
-  {
-    *outsize = h + (h * ((w * bpp + 7) / 8)); /*image size plus an extra byte per scanline + possible padding bits*/
-    *out = (unsigned char*)lodepng_malloc(*outsize);
-    if(!(*out) && (*outsize)) error = 83; /*alloc fail*/
-
-    if(!error)
-    {
-      /*non multiple of 8 bits per scanline, padding bits needed per scanline*/
-      if(bpp < 8 && w * bpp != ((w * bpp + 7) / 8) * 8)
-      {
-        unsigned char* padded = (unsigned char*)lodepng_malloc(h * ((w * bpp + 7) / 8));
-        if(!padded) error = 83; /*alloc fail*/
-        if(!error)
-        {
-          addPaddingBits(padded, in, ((w * bpp + 7) / 8) * 8, w * bpp, h);
-          error = filter(*out, padded, w, h, &info_png->color, settings);
-        }
-        lodepng_free(padded);
-      }
-      else
-      {
-        /*we can immediately filter into the out buffer, no other steps needed*/
-        error = filter(*out, in, w, h, &info_png->color, settings);
-      }
-    }
-  }
-  else /*interlace_method is 1 (Adam7)*/
-  {
-    unsigned passw[7], passh[7];
-    size_t filter_passstart[8], padded_passstart[8], passstart[8];
-    unsigned char* adam7;
-
-    Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp);
-
-    *outsize = filter_passstart[7]; /*image size plus an extra byte per scanline + possible padding bits*/
-    *out = (unsigned char*)lodepng_malloc(*outsize);
-    if(!(*out)) error = 83; /*alloc fail*/
-
-    adam7 = (unsigned char*)lodepng_malloc(passstart[7]);
-    if(!adam7 && passstart[7]) error = 83; /*alloc fail*/
-
-    if(!error)
-    {
-      unsigned i;
-
-      Adam7_interlace(adam7, in, w, h, bpp);
-      for(i = 0; i != 7; ++i)
-      {
-        if(bpp < 8)
-        {
-          unsigned char* padded = (unsigned char*)lodepng_malloc(padded_passstart[i + 1] - padded_passstart[i]);
-          if(!padded) ERROR_BREAK(83); /*alloc fail*/
-          addPaddingBits(padded, &adam7[passstart[i]],
-                         ((passw[i] * bpp + 7) / 8) * 8, passw[i] * bpp, passh[i]);
-          error = filter(&(*out)[filter_passstart[i]], padded,
-                         passw[i], passh[i], &info_png->color, settings);
-          lodepng_free(padded);
-        }
-        else
-        {
-          error = filter(&(*out)[filter_passstart[i]], &adam7[padded_passstart[i]],
-                         passw[i], passh[i], &info_png->color, settings);
-        }
-
-        if(error) break;
-      }
-    }
-
-    lodepng_free(adam7);
-  }
-
-  return error;
-}
-
-/*
-palette must have 4 * palettesize bytes allocated, and given in format RGBARGBARGBARGBA...
-returns 0 if the palette is opaque,
-returns 1 if the palette has a single color with alpha 0 ==> color key
-returns 2 if the palette is semi-translucent.
-*/
-static unsigned getPaletteTranslucency(const unsigned char* palette, size_t palettesize)
-{
-  size_t i;
-  unsigned key = 0;
-  unsigned r = 0, g = 0, b = 0; /*the value of the color with alpha 0, so long as color keying is possible*/
-  for(i = 0; i != palettesize; ++i)
-  {
-    if(!key && palette[4 * i + 3] == 0)
-    {
-      r = palette[4 * i + 0]; g = palette[4 * i + 1]; b = palette[4 * i + 2];
-      key = 1;
-      i = (size_t)(-1); /*restart from beginning, to detect earlier opaque colors with key's value*/
-    }
-    else if(palette[4 * i + 3] != 255) return 2;
-    /*when key, no opaque RGB may have key's RGB*/
-    else if(key && r == palette[i * 4 + 0] && g == palette[i * 4 + 1] && b == palette[i * 4 + 2]) return 2;
-  }
-  return key;
-}
-
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-static unsigned addUnknownChunks(ucvector* out, unsigned char* data, size_t datasize)
-{
-  unsigned char* inchunk = data;
-  while((size_t)(inchunk - data) < datasize)
-  {
-    CERROR_TRY_RETURN(lodepng_chunk_append(&out->data, &out->size, inchunk));
-    out->allocsize = out->size; /*fix the allocsize again*/
-    inchunk = lodepng_chunk_next(inchunk);
-  }
-  return 0;
-}
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-
-unsigned lodepng_encode(unsigned char** out, size_t* outsize,
-                        const unsigned char* image, unsigned w, unsigned h,
-                        LodePNGState* state)
-{
-  LodePNGInfo info;
-  ucvector outv;
-  unsigned char* data = 0; /*uncompressed version of the IDAT chunk data*/
-  size_t datasize = 0;
-
-  /*provide some proper output values if error will happen*/
-  *out = 0;
-  *outsize = 0;
-  state->error = 0;
-
-  lodepng_info_init(&info);
-  lodepng_info_copy(&info, &state->info_png);
-
-  if((info.color.colortype == LCT_PALETTE || state->encoder.force_palette)
-      && (info.color.palettesize == 0 || info.color.palettesize > 256))
-  {
-    state->error = 68; /*invalid palette size, it is only allowed to be 1-256*/
-    return state->error;
-  }
-
-  if(state->encoder.auto_convert)
-  {
-    state->error = lodepng_auto_choose_color(&info.color, image, w, h, &state->info_raw);
-  }
-  if(state->error) return state->error;
-
-  if(state->encoder.zlibsettings.btype > 2)
-  {
-    CERROR_RETURN_ERROR(state->error, 61); /*error: unexisting btype*/
-  }
-  if(state->info_png.interlace_method > 1)
-  {
-    CERROR_RETURN_ERROR(state->error, 71); /*error: unexisting interlace mode*/
-  }
-
-  state->error = checkColorValidity(info.color.colortype, info.color.bitdepth);
-  if(state->error) return state->error; /*error: unexisting color type given*/
-  state->error = checkColorValidity(state->info_raw.colortype, state->info_raw.bitdepth);
-  if(state->error) return state->error; /*error: unexisting color type given*/
-
-  if(!lodepng_color_mode_equal(&state->info_raw, &info.color))
-  {
-    unsigned char* converted;
-    size_t size = (w * h * lodepng_get_bpp(&info.color) + 7) / 8;
-
-    converted = (unsigned char*)lodepng_malloc(size);
-    if(!converted && size) state->error = 83; /*alloc fail*/
-    if(!state->error)
-    {
-      state->error = lodepng_convert(converted, image, &info.color, &state->info_raw, w, h);
-    }
-    if(!state->error) preProcessScanlines(&data, &datasize, converted, w, h, &info, &state->encoder);
-    lodepng_free(converted);
-  }
-  else preProcessScanlines(&data, &datasize, image, w, h, &info, &state->encoder);
-
-  ucvector_init(&outv);
-  while(!state->error) /*while only executed once, to break on error*/
-  {
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-    size_t i;
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-    /*write signature and chunks*/
-    writeSignature(&outv);
-    /*IHDR*/
-    addChunk_IHDR(&outv, w, h, info.color.colortype, info.color.bitdepth, info.interlace_method);
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-    /*unknown chunks between IHDR and PLTE*/
-    if(info.unknown_chunks_data[0])
-    {
-      state->error = addUnknownChunks(&outv, info.unknown_chunks_data[0], info.unknown_chunks_size[0]);
-      if(state->error) break;
-    }
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-    /*PLTE*/
-    if(info.color.colortype == LCT_PALETTE)
-    {
-      addChunk_PLTE(&outv, &info.color);
-    }
-    if(state->encoder.force_palette && (info.color.colortype == LCT_RGB || info.color.colortype == LCT_RGBA))
-    {
-      addChunk_PLTE(&outv, &info.color);
-    }
-    /*tRNS*/
-    if(info.color.colortype == LCT_PALETTE && getPaletteTranslucency(info.color.palette, info.color.palettesize) != 0)
-    {
-      addChunk_tRNS(&outv, &info.color);
-    }
-    if((info.color.colortype == LCT_GREY || info.color.colortype == LCT_RGB) && info.color.key_defined)
-    {
-      addChunk_tRNS(&outv, &info.color);
-    }
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-    /*bKGD (must come between PLTE and the IDAt chunks*/
-    if(info.background_defined) addChunk_bKGD(&outv, &info);
-    /*pHYs (must come before the IDAT chunks)*/
-    if(info.phys_defined) addChunk_pHYs(&outv, &info);
-
-    /*unknown chunks between PLTE and IDAT*/
-    if(info.unknown_chunks_data[1])
-    {
-      state->error = addUnknownChunks(&outv, info.unknown_chunks_data[1], info.unknown_chunks_size[1]);
-      if(state->error) break;
-    }
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-    /*IDAT (multiple IDAT chunks must be consecutive)*/
-    state->error = addChunk_IDAT(&outv, data, datasize, &state->encoder.zlibsettings);
-    if(state->error) break;
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-    /*tIME*/
-    if(info.time_defined) addChunk_tIME(&outv, &info.time);
-    /*tEXt and/or zTXt*/
-    for(i = 0; i != info.text_num; ++i)
-    {
-      if(strlen(info.text_keys[i]) > 79)
-      {
-        state->error = 66; /*text chunk too large*/
-        break;
-      }
-      if(strlen(info.text_keys[i]) < 1)
-      {
-        state->error = 67; /*text chunk too small*/
-        break;
-      }
-      if(state->encoder.text_compression)
-      {
-        addChunk_zTXt(&outv, info.text_keys[i], info.text_strings[i], &state->encoder.zlibsettings);
-      }
-      else
-      {
-        addChunk_tEXt(&outv, info.text_keys[i], info.text_strings[i]);
-      }
-    }
-    /*LodePNG version id in text chunk*/
-    if(state->encoder.add_id)
-    {
-      unsigned alread_added_id_text = 0;
-      for(i = 0; i != info.text_num; ++i)
-      {
-        if(!strcmp(info.text_keys[i], "LodePNG"))
-        {
-          alread_added_id_text = 1;
-          break;
-        }
-      }
-      if(alread_added_id_text == 0)
-      {
-        addChunk_tEXt(&outv, "LodePNG", LODEPNG_VERSION_STRING); /*it's shorter as tEXt than as zTXt chunk*/
-      }
-    }
-    /*iTXt*/
-    for(i = 0; i != info.itext_num; ++i)
-    {
-      if(strlen(info.itext_keys[i]) > 79)
-      {
-        state->error = 66; /*text chunk too large*/
-        break;
-      }
-      if(strlen(info.itext_keys[i]) < 1)
-      {
-        state->error = 67; /*text chunk too small*/
-        break;
-      }
-      addChunk_iTXt(&outv, state->encoder.text_compression,
-                    info.itext_keys[i], info.itext_langtags[i], info.itext_transkeys[i], info.itext_strings[i],
-                    &state->encoder.zlibsettings);
-    }
-
-    /*unknown chunks between IDAT and IEND*/
-    if(info.unknown_chunks_data[2])
-    {
-      state->error = addUnknownChunks(&outv, info.unknown_chunks_data[2], info.unknown_chunks_size[2]);
-      if(state->error) break;
-    }
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-    addChunk_IEND(&outv);
-
-    break; /*this isn't really a while loop; no error happened so break out now!*/
-  }
-
-  lodepng_info_cleanup(&info);
-  lodepng_free(data);
-  /*instead of cleaning the vector up, give it to the output*/
-  *out = outv.data;
-  *outsize = outv.size;
-
-  return state->error;
-}
-
-unsigned lodepng_encode_memory(unsigned char** out, size_t* outsize, const unsigned char* image,
-                               unsigned w, unsigned h, LodePNGColorType colortype, unsigned bitdepth)
-{
-  unsigned error;
-  LodePNGState state;
-  lodepng_state_init(&state);
-  state.info_raw.colortype = colortype;
-  state.info_raw.bitdepth = bitdepth;
-  state.info_png.color.colortype = colortype;
-  state.info_png.color.bitdepth = bitdepth;
-  lodepng_encode(out, outsize, image, w, h, &state);
-  error = state.error;
-  lodepng_state_cleanup(&state);
-  return error;
-}
-
-unsigned lodepng_encode32(unsigned char** out, size_t* outsize, const unsigned char* image, unsigned w, unsigned h)
-{
-  return lodepng_encode_memory(out, outsize, image, w, h, LCT_RGBA, 8);
-}
-
-unsigned lodepng_encode24(unsigned char** out, size_t* outsize, const unsigned char* image, unsigned w, unsigned h)
-{
-  return lodepng_encode_memory(out, outsize, image, w, h, LCT_RGB, 8);
-}
-
-#ifdef LODEPNG_COMPILE_DISK
-unsigned lodepng_encode_file(const char* filename, const unsigned char* image, unsigned w, unsigned h,
-                             LodePNGColorType colortype, unsigned bitdepth)
-{
-  unsigned char* buffer;
-  size_t buffersize;
-  unsigned error = lodepng_encode_memory(&buffer, &buffersize, image, w, h, colortype, bitdepth);
-  if(!error) error = lodepng_save_file(buffer, buffersize, filename);
-  lodepng_free(buffer);
-  return error;
-}
-
-unsigned lodepng_encode32_file(const char* filename, const unsigned char* image, unsigned w, unsigned h)
-{
-  return lodepng_encode_file(filename, image, w, h, LCT_RGBA, 8);
-}
-
-unsigned lodepng_encode24_file(const char* filename, const unsigned char* image, unsigned w, unsigned h)
-{
-  return lodepng_encode_file(filename, image, w, h, LCT_RGB, 8);
-}
-#endif /*LODEPNG_COMPILE_DISK*/
-
-void lodepng_encoder_settings_init(LodePNGEncoderSettings* settings)
-{
-  lodepng_compress_settings_init(&settings->zlibsettings);
-  settings->filter_palette_zero = 1;
-  settings->filter_strategy = LFS_MINSUM;
-  settings->auto_convert = 1;
-  settings->force_palette = 0;
-  settings->predefined_filters = 0;
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-  settings->add_id = 0;
-  settings->text_compression = 1;
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-}
-
-#endif /*LODEPNG_COMPILE_ENCODER*/
-#endif /*LODEPNG_COMPILE_PNG*/
-
-#ifdef LODEPNG_COMPILE_ERROR_TEXT
-/*
-This returns the description of a numerical error code in English. This is also
-the documentation of all the error codes.
-*/
-const char* lodepng_error_text(unsigned code)
-{
-  switch(code)
-  {
-    case 0: return "no error, everything went ok";
-    case 1: return "nothing done yet"; /*the Encoder/Decoder has done nothing yet, error checking makes no sense yet*/
-    case 10: return "end of input memory reached without huffman end code"; /*while huffman decoding*/
-    case 11: return "error in code tree made it jump outside of huffman tree"; /*while huffman decoding*/
-    case 13: return "problem while processing dynamic deflate block";
-    case 14: return "problem while processing dynamic deflate block";
-    case 15: return "problem while processing dynamic deflate block";
-    case 16: return "unexisting code while processing dynamic deflate block";
-    case 17: return "end of out buffer memory reached while inflating";
-    case 18: return "invalid distance code while inflating";
-    case 19: return "end of out buffer memory reached while inflating";
-    case 20: return "invalid deflate block BTYPE encountered while decoding";
-    case 21: return "NLEN is not ones complement of LEN in a deflate block";
-     /*end of out buffer memory reached while inflating:
-     This can happen if the inflated deflate data is longer than the amount of bytes required to fill up
-     all the pixels of the image, given the color depth and image dimensions. Something that doesn't
-     happen in a normal, well encoded, PNG image.*/
-    case 22: return "end of out buffer memory reached while inflating";
-    case 23: return "end of in buffer memory reached while inflating";
-    case 24: return "invalid FCHECK in zlib header";
-    case 25: return "invalid compression method in zlib header";
-    case 26: return "FDICT encountered in zlib header while it's not used for PNG";
-    case 27: return "PNG file is smaller than a PNG header";
-    /*Checks the magic file header, the first 8 bytes of the PNG file*/
-    case 28: return "incorrect PNG signature, it's no PNG or corrupted";
-    case 29: return "first chunk is not the header chunk";
-    case 30: return "chunk length too large, chunk broken off at end of file";
-    case 31: return "illegal PNG color type or bpp";
-    case 32: return "illegal PNG compression method";
-    case 33: return "illegal PNG filter method";
-    case 34: return "illegal PNG interlace method";
-    case 35: return "chunk length of a chunk is too large or the chunk too small";
-    case 36: return "illegal PNG filter type encountered";
-    case 37: return "illegal bit depth for this color type given";
-    case 38: return "the palette is too big"; /*more than 256 colors*/
-    case 39: return "more palette alpha values given in tRNS chunk than there are colors in the palette";
-    case 40: return "tRNS chunk has wrong size for greyscale image";
-    case 41: return "tRNS chunk has wrong size for RGB image";
-    case 42: return "tRNS chunk appeared while it was not allowed for this color type";
-    case 43: return "bKGD chunk has wrong size for palette image";
-    case 44: return "bKGD chunk has wrong size for greyscale image";
-    case 45: return "bKGD chunk has wrong size for RGB image";
-    case 48: return "empty input buffer given to decoder. Maybe caused by non-existing file?";
-    case 49: return "jumped past memory while generating dynamic huffman tree";
-    case 50: return "jumped past memory while generating dynamic huffman tree";
-    case 51: return "jumped past memory while inflating huffman block";
-    case 52: return "jumped past memory while inflating";
-    case 53: return "size of zlib data too small";
-    case 54: return "repeat symbol in tree while there was no value symbol yet";
-    /*jumped past tree while generating huffman tree, this could be when the
-    tree will have more leaves than symbols after generating it out of the
-    given lenghts. They call this an oversubscribed dynamic bit lengths tree in zlib.*/
-    case 55: return "jumped past tree while generating huffman tree";
-    case 56: return "given output image colortype or bitdepth not supported for color conversion";
-    case 57: return "invalid CRC encountered (checking CRC can be disabled)";
-    case 58: return "invalid ADLER32 encountered (checking ADLER32 can be disabled)";
-    case 59: return "requested color conversion not supported";
-    case 60: return "invalid window size given in the settings of the encoder (must be 0-32768)";
-    case 61: return "invalid BTYPE given in the settings of the encoder (only 0, 1 and 2 are allowed)";
-    /*LodePNG leaves the choice of RGB to greyscale conversion formula to the user.*/
-    case 62: return "conversion from color to greyscale not supported";
-    case 63: return "length of a chunk too long, max allowed for PNG is 2147483647 bytes per chunk"; /*(2^31-1)*/
-    /*this would result in the inability of a deflated block to ever contain an end code. It must be at least 1.*/
-    case 64: return "the length of the END symbol 256 in the Huffman tree is 0";
-    case 66: return "the length of a text chunk keyword given to the encoder is longer than the maximum of 79 bytes";
-    case 67: return "the length of a text chunk keyword given to the encoder is smaller than the minimum of 1 byte";
-    case 68: return "tried to encode a PLTE chunk with a palette that has less than 1 or more than 256 colors";
-    case 69: return "unknown chunk type with 'critical' flag encountered by the decoder";
-    case 71: return "unexisting interlace mode given to encoder (must be 0 or 1)";
-    case 72: return "while decoding, unexisting compression method encountering in zTXt or iTXt chunk (it must be 0)";
-    case 73: return "invalid tIME chunk size";
-    case 74: return "invalid pHYs chunk size";
-    /*length could be wrong, or data chopped off*/
-    case 75: return "no null termination char found while decoding text chunk";
-    case 76: return "iTXt chunk too short to contain required bytes";
-    case 77: return "integer overflow in buffer size";
-    case 78: return "failed to open file for reading"; /*file doesn't exist or couldn't be opened for reading*/
-    case 79: return "failed to open file for writing";
-    case 80: return "tried creating a tree of 0 symbols";
-    case 81: return "lazy matching at pos 0 is impossible";
-    case 82: return "color conversion to palette requested while a color isn't in palette";
-    case 83: return "memory allocation failed";
-    case 84: return "given image too small to contain all pixels to be encoded";
-    case 86: return "impossible offset in lz77 encoding (internal bug)";
-    case 87: return "must provide custom zlib function pointer if LODEPNG_COMPILE_ZLIB is not defined";
-    case 88: return "invalid filter strategy given for LodePNGEncoderSettings.filter_strategy";
-    case 89: return "text chunk keyword too short or long: must have size 1-79";
-    /*the windowsize in the LodePNGCompressSettings. Requiring POT(==> & instead of %) makes encoding 12% faster.*/
-    case 90: return "windowsize must be a power of two";
-    case 91: return "invalid decompressed idat size";
-    case 92: return "too many pixels, not supported";
-    case 93: return "zero width or height is invalid";
-    case 94: return "header chunk must have a size of 13 bytes";
-  }
-  return "unknown error code";
-}
-#endif /*LODEPNG_COMPILE_ERROR_TEXT*/
-
-/* ////////////////////////////////////////////////////////////////////////// */
-/* ////////////////////////////////////////////////////////////////////////// */
-/* // C++ Wrapper                                                          // */
-/* ////////////////////////////////////////////////////////////////////////// */
-/* ////////////////////////////////////////////////////////////////////////// */
-
-#ifdef LODEPNG_COMPILE_CPP
-namespace lodepng
-{
-
-#ifdef LODEPNG_COMPILE_DISK
-unsigned load_file(std::vector<unsigned char>& buffer, const std::string& filename)
-{
-  std::ifstream file(filename.c_str(), std::ios::in|std::ios::binary|std::ios::ate);
-  if(!file) return 78;
-
-  /*get filesize*/
-  std::streamsize size = 0;
-  if(file.seekg(0, std::ios::end).good()) size = file.tellg();
-  if(file.seekg(0, std::ios::beg).good()) size -= file.tellg();
-
-  /*read contents of the file into the vector*/
-  buffer.resize(size_t(size));
-  if(size > 0) file.read((char*)(&buffer[0]), size);
-
-  return 0; /* OK */
-}
-
-/*write given buffer to the file, overwriting the file, it doesn't append to it.*/
-unsigned save_file(const std::vector<unsigned char>& buffer, const std::string& filename)
-{
-  std::ofstream file(filename.c_str(), std::ios::out|std::ios::binary);
-  if(!file) return 79;
-  file.write(buffer.empty() ? 0 : (char*)&buffer[0], std::streamsize(buffer.size()));
-  return 0;
-}
-#endif /* LODEPNG_COMPILE_DISK */
-
-#ifdef LODEPNG_COMPILE_ZLIB
-#ifdef LODEPNG_COMPILE_DECODER
-unsigned decompress(std::vector<unsigned char>& out, const unsigned char* in, size_t insize,
-                    const LodePNGDecompressSettings& settings)
-{
-  unsigned char* buffer = 0;
-  size_t buffersize = 0;
-  unsigned error = zlib_decompress(&buffer, &buffersize, in, insize, &settings);
-  if(buffer)
-  {
-    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
-    lodepng_free(buffer);
-  }
-  return error;
-}
-
-unsigned decompress(std::vector<unsigned char>& out, const std::vector<unsigned char>& in,
-                    const LodePNGDecompressSettings& settings)
-{
-  return decompress(out, in.empty() ? 0 : &in[0], in.size(), settings);
-}
-#endif /* LODEPNG_COMPILE_DECODER */
-
-#ifdef LODEPNG_COMPILE_ENCODER
-unsigned compress(std::vector<unsigned char>& out, const unsigned char* in, size_t insize,
-                  const LodePNGCompressSettings& settings)
-{
-  unsigned char* buffer = 0;
-  size_t buffersize = 0;
-  unsigned error = zlib_compress(&buffer, &buffersize, in, insize, &settings);
-  if(buffer)
-  {
-    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
-    lodepng_free(buffer);
-  }
-  return error;
-}
-
-unsigned compress(std::vector<unsigned char>& out, const std::vector<unsigned char>& in,
-                  const LodePNGCompressSettings& settings)
-{
-  return compress(out, in.empty() ? 0 : &in[0], in.size(), settings);
-}
-#endif /* LODEPNG_COMPILE_ENCODER */
-#endif /* LODEPNG_COMPILE_ZLIB */
-
-
-#ifdef LODEPNG_COMPILE_PNG
-
-State::State()
-{
-  lodepng_state_init(this);
-}
-
-State::State(const State& other)
-{
-  lodepng_state_init(this);
-  lodepng_state_copy(this, &other);
-}
-
-State::~State()
-{
-  lodepng_state_cleanup(this);
-}
-
-State& State::operator=(const State& other)
-{
-  lodepng_state_copy(this, &other);
-  return *this;
-}
-
-#ifdef LODEPNG_COMPILE_DECODER
-
-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h, const unsigned char* in,
-                size_t insize, LodePNGColorType colortype, unsigned bitdepth)
-{
-  unsigned char* buffer;
-  unsigned error = lodepng_decode_memory(&buffer, &w, &h, in, insize, colortype, bitdepth);
-  if(buffer && !error)
-  {
-    State state;
-    state.info_raw.colortype = colortype;
-    state.info_raw.bitdepth = bitdepth;
-    size_t buffersize = lodepng_get_raw_size(w, h, &state.info_raw);
-    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
-    lodepng_free(buffer);
-  }
-  return error;
-}
-
-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
-                const std::vector<unsigned char>& in, LodePNGColorType colortype, unsigned bitdepth)
-{
-  return decode(out, w, h, in.empty() ? 0 : &in[0], (unsigned)in.size(), colortype, bitdepth);
-}
-
-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
-                State& state,
-                const unsigned char* in, size_t insize)
-{
-  unsigned char* buffer = NULL;
-  unsigned error = lodepng_decode(&buffer, &w, &h, &state, in, insize);
-  if(buffer && !error)
-  {
-    size_t buffersize = lodepng_get_raw_size(w, h, &state.info_raw);
-    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
-  }
-  lodepng_free(buffer);
-  return error;
-}
-
-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
-                State& state,
-                const std::vector<unsigned char>& in)
-{
-  return decode(out, w, h, state, in.empty() ? 0 : &in[0], in.size());
-}
-
-#ifdef LODEPNG_COMPILE_DISK
-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h, const std::string& filename,
-                LodePNGColorType colortype, unsigned bitdepth)
-{
-  std::vector<unsigned char> buffer;
-  unsigned error = load_file(buffer, filename);
-  if(error) return error;
-  return decode(out, w, h, buffer, colortype, bitdepth);
-}
-#endif /* LODEPNG_COMPILE_DECODER */
-#endif /* LODEPNG_COMPILE_DISK */
-
-#ifdef LODEPNG_COMPILE_ENCODER
-unsigned encode(std::vector<unsigned char>& out, const unsigned char* in, unsigned w, unsigned h,
-                LodePNGColorType colortype, unsigned bitdepth)
-{
-  unsigned char* buffer;
-  size_t buffersize;
-  unsigned error = lodepng_encode_memory(&buffer, &buffersize, in, w, h, colortype, bitdepth);
-  if(buffer)
-  {
-    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
-    lodepng_free(buffer);
-  }
-  return error;
-}
-
-unsigned encode(std::vector<unsigned char>& out,
-                const std::vector<unsigned char>& in, unsigned w, unsigned h,
-                LodePNGColorType colortype, unsigned bitdepth)
-{
-  if(lodepng_get_raw_size_lct(w, h, colortype, bitdepth) > in.size()) return 84;
-  return encode(out, in.empty() ? 0 : &in[0], w, h, colortype, bitdepth);
-}
-
-unsigned encode(std::vector<unsigned char>& out,
-                const unsigned char* in, unsigned w, unsigned h,
-                State& state)
-{
-  unsigned char* buffer;
-  size_t buffersize;
-  unsigned error = lodepng_encode(&buffer, &buffersize, in, w, h, &state);
-  if(buffer)
-  {
-    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
-    lodepng_free(buffer);
-  }
-  return error;
-}
-
-unsigned encode(std::vector<unsigned char>& out,
-                const std::vector<unsigned char>& in, unsigned w, unsigned h,
-                State& state)
-{
-  if(lodepng_get_raw_size(w, h, &state.info_raw) > in.size()) return 84;
-  return encode(out, in.empty() ? 0 : &in[0], w, h, state);
-}
-
-#ifdef LODEPNG_COMPILE_DISK
-unsigned encode(const std::string& filename,
-                const unsigned char* in, unsigned w, unsigned h,
-                LodePNGColorType colortype, unsigned bitdepth)
-{
-  std::vector<unsigned char> buffer;
-  unsigned error = encode(buffer, in, w, h, colortype, bitdepth);
-  if(!error) error = save_file(buffer, filename);
-  return error;
-}
-
-unsigned encode(const std::string& filename,
-                const std::vector<unsigned char>& in, unsigned w, unsigned h,
-                LodePNGColorType colortype, unsigned bitdepth)
-{
-  if(lodepng_get_raw_size_lct(w, h, colortype, bitdepth) > in.size()) return 84;
-  return encode(filename, in.empty() ? 0 : &in[0], w, h, colortype, bitdepth);
-}
-#endif /* LODEPNG_COMPILE_DISK */
-#endif /* LODEPNG_COMPILE_ENCODER */
-#endif /* LODEPNG_COMPILE_PNG */
-} /* namespace lodepng */
-#endif /*LODEPNG_COMPILE_CPP*/
+/*
+LodePNG version 20201017
+
+Copyright (c) 2005-2020 Lode Vandevenne
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+    1. The origin of this software must not be misrepresented; you must not
+    claim that you wrote the original software. If you use this software
+    in a product, an acknowledgment in the product documentation would be
+    appreciated but is not required.
+
+    2. Altered source versions must be plainly marked as such, and must not be
+    misrepresented as being the original software.
+
+    3. This notice may not be removed or altered from any source
+    distribution.
+*/
+
+/*
+The manual and changelog are in the header file "lodepng.h"
+Rename this file to lodepng.cpp to use it for C++, or to lodepng.c to use it for C.
+*/
+
+#include "lodepng.h"
+
+#ifdef LODEPNG_COMPILE_DISK
+#include <limits.h> /* LONG_MAX */
+#include <stdio.h> /* file handling */
+#endif /* LODEPNG_COMPILE_DISK */
+
+#ifdef LODEPNG_COMPILE_ALLOCATORS
+#include <stdlib.h> /* allocations */
+#endif /* LODEPNG_COMPILE_ALLOCATORS */
+
+#if defined(_MSC_VER) && (_MSC_VER >= 1310) /*Visual Studio: A few warning types are not desired here.*/
+#pragma warning( disable : 4244 ) /*implicit conversions: not warned by gcc -Wall -Wextra and requires too much casts*/
+#pragma warning( disable : 4996 ) /*VS does not like fopen, but fopen_s is not standard C so unusable here*/
+#endif /*_MSC_VER */
+
+const char* LODEPNG_VERSION_STRING = "20201017";
+
+/*
+This source file is built up in the following large parts. The code sections
+with the "LODEPNG_COMPILE_" #defines divide this up further in an intermixed way.
+-Tools for C and common code for PNG and Zlib
+-C Code for Zlib (huffman, deflate, ...)
+-C Code for PNG (file format chunks, adam7, PNG filters, color conversions, ...)
+-The C++ wrapper around all of the above
+*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* // Tools for C, and common code for PNG and Zlib.                       // */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+/*The malloc, realloc and free functions defined here with "lodepng_" in front
+of the name, so that you can easily change them to others related to your
+platform if needed. Everything else in the code calls these. Pass
+-DLODEPNG_NO_COMPILE_ALLOCATORS to the compiler, or comment out
+#define LODEPNG_COMPILE_ALLOCATORS in the header, to disable the ones here and
+define them in your own project's source files without needing to change
+lodepng source code. Don't forget to remove "static" if you copypaste them
+from here.*/
+
+#ifdef LODEPNG_COMPILE_ALLOCATORS
+static void* lodepng_malloc(size_t size) {
+#ifdef LODEPNG_MAX_ALLOC
+  if(size > LODEPNG_MAX_ALLOC) return 0;
+#endif
+  return malloc(size);
+}
+
+/* NOTE: when realloc returns NULL, it leaves the original memory untouched */
+static void* lodepng_realloc(void* ptr, size_t new_size) {
+#ifdef LODEPNG_MAX_ALLOC
+  if(new_size > LODEPNG_MAX_ALLOC) return 0;
+#endif
+  return realloc(ptr, new_size);
+}
+
+static void lodepng_free(void* ptr) {
+  free(ptr);
+}
+#else /*LODEPNG_COMPILE_ALLOCATORS*/
+/* TODO: support giving additional void* payload to the custom allocators */
+void* lodepng_malloc(size_t size);
+void* lodepng_realloc(void* ptr, size_t new_size);
+void lodepng_free(void* ptr);
+#endif /*LODEPNG_COMPILE_ALLOCATORS*/
+
+/* convince the compiler to inline a function, for use when this measurably improves performance */
+/* inline is not available in C90, but use it when supported by the compiler */
+#if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || (defined(__cplusplus) && (__cplusplus >= 199711L))
+#define LODEPNG_INLINE inline
+#else
+#define LODEPNG_INLINE /* not available */
+#endif
+
+/* restrict is not available in C90, but use it when supported by the compiler */
+#if (defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))) ||\
+    (defined(_MSC_VER) && (_MSC_VER >= 1400)) || \
+    (defined(__WATCOMC__) && (__WATCOMC__ >= 1250) && !defined(__cplusplus))
+#define LODEPNG_RESTRICT __restrict
+#else
+#define LODEPNG_RESTRICT /* not available */
+#endif
+
+
+/* Replacements for C library functions such as memcpy and strlen, to support platforms
+where a full C library is not available. The compiler can recognize them and compile
+to something as fast. */
+
+//static void lodepng_memcpy(void* LODEPNG_RESTRICT dst,
+//                           const void* LODEPNG_RESTRICT src, size_t size) {
+//  size_t i;
+//  for(i = 0; i < size; i++) ((char*)dst)[i] = ((const char*)src)[i];
+//}
+//
+//static void lodepng_memset(void* LODEPNG_RESTRICT dst,
+//                           int value, size_t num) {
+//  size_t i;
+//  for(i = 0; i < num; i++) ((char*)dst)[i] = (char)value;
+//}
+//
+///* does not check memory out of bounds, do not use on untrusted data */
+//static size_t lodepng_strlen(const char* a) {
+//  const char* orig = a;
+//  /* avoid warning about unused function in case of disabled COMPILE... macros */
+//  (void)(&lodepng_strlen);
+//  while(*a) a++;
+//  return (size_t)(a - orig);
+//}
+
+#define lodepng_memcpy(dst, src, size)      memcpy(dst, src, size)
+#define lodepng_memset(dst, value, size)    memset(dst, value, size)
+#define lodepng_strlen(str)                 strlen(str)
+
+
+#define LODEPNG_MAX(a, b) (((a) > (b)) ? (a) : (b))
+#define LODEPNG_MIN(a, b) (((a) < (b)) ? (a) : (b))
+#define LODEPNG_ABS(x) ((x) < 0 ? -(x) : (x))
+
+#if defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_DECODER)
+/* Safely check if adding two integers will overflow (no undefined
+behavior, compiler removing the code, etc...) and output result. */
+static int lodepng_addofl(size_t a, size_t b, size_t* result) {
+  *result = a + b; /* Unsigned addition is well defined and safe in C90 */
+  return *result < a;
+}
+#endif /*defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_DECODER)*/
+
+#ifdef LODEPNG_COMPILE_DECODER
+/* Safely check if multiplying two integers will overflow (no undefined
+behavior, compiler removing the code, etc...) and output result. */
+static int lodepng_mulofl(size_t a, size_t b, size_t* result) {
+  *result = a * b; /* Unsigned multiplication is well defined and safe in C90 */
+  return (a != 0 && *result / a != b);
+}
+
+#ifdef LODEPNG_COMPILE_ZLIB
+/* Safely check if a + b > c, even if overflow could happen. */
+static int lodepng_gtofl(size_t a, size_t b, size_t c) {
+  size_t d;
+  if(lodepng_addofl(a, b, &d)) return 1;
+  return d > c;
+}
+#endif /*LODEPNG_COMPILE_ZLIB*/
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+
+/*
+Often in case of an error a value is assigned to a variable and then it breaks
+out of a loop (to go to the cleanup phase of a function). This macro does that.
+It makes the error handling code shorter and more readable.
+
+Example: if(!uivector_resize(&lz77_encoded, datasize)) ERROR_BREAK(83);
+*/
+#define CERROR_BREAK(errorvar, code){\
+  errorvar = code;\
+  break;\
+}
+
+/*version of CERROR_BREAK that assumes the common case where the error variable is named "error"*/
+#define ERROR_BREAK(code) CERROR_BREAK(error, code)
+
+/*Set error var to the error code, and return it.*/
+#define CERROR_RETURN_ERROR(errorvar, code){\
+  errorvar = code;\
+  return code;\
+}
+
+/*Try the code, if it returns error, also return the error.*/
+#define CERROR_TRY_RETURN(call){\
+  unsigned error = call;\
+  if(error) return error;\
+}
+
+/*Set error var to the error code, and return from the void function.*/
+#define CERROR_RETURN(errorvar, code){\
+  errorvar = code;\
+  return;\
+}
+
+/*
+About uivector, ucvector and string:
+-All of them wrap dynamic arrays or text strings in a similar way.
+-LodePNG was originally written in C++. The vectors replace the std::vectors that were used in the C++ version.
+-The string tools are made to avoid problems with compilers that declare things like strncat as deprecated.
+-They're not used in the interface, only internally in this file as static functions.
+-As with many other structs in this file, the init and cleanup functions serve as ctor and dtor.
+*/
+
+#ifdef LODEPNG_COMPILE_ZLIB
+#ifdef LODEPNG_COMPILE_ENCODER
+/*dynamic vector of unsigned ints*/
+typedef struct uivector {
+  unsigned* data;
+  size_t size; /*size in number of unsigned longs*/
+  size_t allocsize; /*allocated size in bytes*/
+} uivector;
+
+static void uivector_cleanup(void* p) {
+  ((uivector*)p)->size = ((uivector*)p)->allocsize = 0;
+  lodepng_free(((uivector*)p)->data);
+  ((uivector*)p)->data = NULL;
+}
+
+/*returns 1 if success, 0 if failure ==> nothing done*/
+static unsigned uivector_resize(uivector* p, size_t size) {
+  size_t allocsize = size * sizeof(unsigned);
+  if(allocsize > p->allocsize) {
+    size_t newsize = allocsize + (p->allocsize >> 1u);
+    void* data = lodepng_realloc(p->data, newsize);
+    if(data) {
+      p->allocsize = newsize;
+      p->data = (unsigned*)data;
+    }
+    else return 0; /*error: not enough memory*/
+  }
+  p->size = size;
+  return 1; /*success*/
+}
+
+static void uivector_init(uivector* p) {
+  p->data = NULL;
+  p->size = p->allocsize = 0;
+}
+
+/*returns 1 if success, 0 if failure ==> nothing done*/
+static unsigned uivector_push_back(uivector* p, unsigned c) {
+  if(!uivector_resize(p, p->size + 1)) return 0;
+  p->data[p->size - 1] = c;
+  return 1;
+}
+#endif /*LODEPNG_COMPILE_ENCODER*/
+#endif /*LODEPNG_COMPILE_ZLIB*/
+
+/* /////////////////////////////////////////////////////////////////////////// */
+
+/*dynamic vector of unsigned chars*/
+typedef struct ucvector {
+  unsigned char* data;
+  size_t size; /*used size*/
+  size_t allocsize; /*allocated size*/
+} ucvector;
+
+/*returns 1 if success, 0 if failure ==> nothing done*/
+static unsigned ucvector_resize(ucvector* p, size_t size) {
+  if(size > p->allocsize) {
+    size_t newsize = size + (p->allocsize >> 1u);
+    void* data = lodepng_realloc(p->data, newsize);
+    if(data) {
+      p->allocsize = newsize;
+      p->data = (unsigned char*)data;
+    }
+    else return 0; /*error: not enough memory*/
+  }
+  p->size = size;
+  return 1; /*success*/
+}
+
+static ucvector ucvector_init(unsigned char* buffer, size_t size) {
+  ucvector v;
+  v.data = buffer;
+  v.allocsize = v.size = size;
+  return v;
+}
+
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#ifdef LODEPNG_COMPILE_PNG
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+
+/*free string pointer and set it to NULL*/
+static void string_cleanup(char** out) {
+  lodepng_free(*out);
+  *out = NULL;
+}
+
+/*also appends null termination character*/
+static char* alloc_string_sized(const char* in, size_t insize) {
+  char* out = (char*)lodepng_malloc(insize + 1);
+  if(out) {
+    lodepng_memcpy(out, in, insize);
+    out[insize] = 0;
+  }
+  return out;
+}
+
+/* dynamically allocates a new string with a copy of the null terminated input text */
+static char* alloc_string(const char* in) {
+  return alloc_string_sized(in, lodepng_strlen(in));
+}
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+#endif /*LODEPNG_COMPILE_PNG*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#if defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_PNG)
+static unsigned lodepng_read32bitInt(const unsigned char* buffer) {
+  return (((unsigned)buffer[0] << 24u) | ((unsigned)buffer[1] << 16u) |
+         ((unsigned)buffer[2] << 8u) | (unsigned)buffer[3]);
+}
+#endif /*defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_PNG)*/
+
+#if defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_ENCODER)
+/*buffer must have at least 4 allocated bytes available*/
+static void lodepng_set32bitInt(unsigned char* buffer, unsigned value) {
+  buffer[0] = (unsigned char)((value >> 24) & 0xff);
+  buffer[1] = (unsigned char)((value >> 16) & 0xff);
+  buffer[2] = (unsigned char)((value >>  8) & 0xff);
+  buffer[3] = (unsigned char)((value      ) & 0xff);
+}
+#endif /*defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_ENCODER)*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / File IO                                                                / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#ifdef LODEPNG_COMPILE_DISK
+
+/* returns negative value on error. This should be pure C compatible, so no fstat. */
+static long lodepng_filesize(const char* filename) {
+  FILE* file;
+  long size;
+  file = fopen(filename, "rb");
+  if(!file) return -1;
+
+  if(fseek(file, 0, SEEK_END) != 0) {
+    fclose(file);
+    return -1;
+  }
+
+  size = ftell(file);
+  /* It may give LONG_MAX as directory size, this is invalid for us. */
+  if(size == LONG_MAX) size = -1;
+
+  fclose(file);
+  return size;
+}
+
+/* load file into buffer that already has the correct allocated size. Returns error code.*/
+static unsigned lodepng_buffer_file(unsigned char* out, size_t size, const char* filename) {
+  FILE* file;
+  size_t readsize;
+  file = fopen(filename, "rb");
+  if(!file) return 78;
+
+  readsize = fread(out, 1, size, file);
+  fclose(file);
+
+  if(readsize != size) return 78;
+  return 0;
+}
+
+unsigned lodepng_load_file(unsigned char** out, size_t* outsize, const char* filename) {
+  long size = lodepng_filesize(filename);
+  if(size < 0) return 78;
+  *outsize = (size_t)size;
+
+  *out = (unsigned char*)lodepng_malloc((size_t)size);
+  if(!(*out) && size > 0) return 83; /*the above malloc failed*/
+
+  return lodepng_buffer_file(*out, (size_t)size, filename);
+}
+
+/*write given buffer to the file, overwriting the file, it doesn't append to it.*/
+unsigned lodepng_save_file(const unsigned char* buffer, size_t buffersize, const char* filename) {
+  FILE* file;
+  file = fopen(filename, "wb" );
+  if(!file) return 79;
+  fwrite(buffer, 1, buffersize, file);
+  fclose(file);
+  return 0;
+}
+
+#endif /*LODEPNG_COMPILE_DISK*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* // End of common code and tools. Begin of Zlib related code.            // */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#ifdef LODEPNG_COMPILE_ZLIB
+#ifdef LODEPNG_COMPILE_ENCODER
+
+typedef struct {
+  ucvector* data;
+  unsigned char bp; /*ok to overflow, indicates bit pos inside byte*/
+} LodePNGBitWriter;
+
+static void LodePNGBitWriter_init(LodePNGBitWriter* writer, ucvector* data) {
+  writer->data = data;
+  writer->bp = 0;
+}
+
+/*TODO: this ignores potential out of memory errors*/
+#define WRITEBIT(writer, bit){\
+  /* append new byte */\
+  if(((writer->bp) & 7u) == 0) {\
+    if(!ucvector_resize(writer->data, writer->data->size + 1)) return;\
+    writer->data->data[writer->data->size - 1] = 0;\
+  }\
+  (writer->data->data[writer->data->size - 1]) |= (bit << ((writer->bp) & 7u));\
+  ++writer->bp;\
+}
+
+/* LSB of value is written first, and LSB of bytes is used first */
+static void writeBits(LodePNGBitWriter* writer, unsigned value, size_t nbits) {
+  if(nbits == 1) { /* compiler should statically compile this case if nbits == 1 */
+    WRITEBIT(writer, value);
+  } else {
+    /* TODO: increase output size only once here rather than in each WRITEBIT */
+    size_t i;
+    for(i = 0; i != nbits; ++i) {
+      WRITEBIT(writer, (unsigned char)((value >> i) & 1));
+    }
+  }
+}
+
+/* This one is to use for adding huffman symbol, the value bits are written MSB first */
+static void writeBitsReversed(LodePNGBitWriter* writer, unsigned value, size_t nbits) {
+  size_t i;
+  for(i = 0; i != nbits; ++i) {
+    /* TODO: increase output size only once here rather than in each WRITEBIT */
+    WRITEBIT(writer, (unsigned char)((value >> (nbits - 1u - i)) & 1u));
+  }
+}
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+#ifdef LODEPNG_COMPILE_DECODER
+
+typedef struct {
+  const unsigned char* data;
+  size_t size; /*size of data in bytes*/
+  size_t bitsize; /*size of data in bits, end of valid bp values, should be 8*size*/
+  size_t bp;
+  unsigned buffer; /*buffer for reading bits. NOTE: 'unsigned' must support at least 32 bits*/
+} LodePNGBitReader;
+
+/* data size argument is in bytes. Returns error if size too large causing overflow */
+static unsigned LodePNGBitReader_init(LodePNGBitReader* reader, const unsigned char* data, size_t size) {
+  size_t temp;
+  reader->data = data;
+  reader->size = size;
+  /* size in bits, return error if overflow (if size_t is 32 bit this supports up to 500MB)  */
+  if(lodepng_mulofl(size, 8u, &reader->bitsize)) return 105;
+  /*ensure incremented bp can be compared to bitsize without overflow even when it would be incremented 32 too much and
+  trying to ensure 32 more bits*/
+  if(lodepng_addofl(reader->bitsize, 64u, &temp)) return 105;
+  reader->bp = 0;
+  reader->buffer = 0;
+  return 0; /*ok*/
+}
+
+/*
+ensureBits functions:
+Ensures the reader can at least read nbits bits in one or more readBits calls,
+safely even if not enough bits are available.
+Returns 1 if there are enough bits available, 0 if not.
+*/
+
+/*See ensureBits documentation above. This one ensures exactly 1 bit */
+/*static unsigned ensureBits1(LodePNGBitReader* reader) {
+  if(reader->bp >= reader->bitsize) return 0;
+  reader->buffer = (unsigned)reader->data[reader->bp >> 3u] >> (reader->bp & 7u);
+  return 1;
+}*/
+
+/*See ensureBits documentation above. This one ensures up to 9 bits */
+static unsigned ensureBits9(LodePNGBitReader* reader, size_t nbits) {
+  size_t start = reader->bp >> 3u;
+  size_t size = reader->size;
+  if(start + 1u < size) {
+    reader->buffer = (unsigned)reader->data[start + 0] | ((unsigned)reader->data[start + 1] << 8u);
+    reader->buffer >>= (reader->bp & 7u);
+    return 1;
+  } else {
+    reader->buffer = 0;
+    if(start + 0u < size) reader->buffer |= reader->data[start + 0];
+    reader->buffer >>= (reader->bp & 7u);
+    return reader->bp + nbits <= reader->bitsize;
+  }
+}
+
+/*See ensureBits documentation above. This one ensures up to 17 bits */
+static unsigned ensureBits17(LodePNGBitReader* reader, size_t nbits) {
+  size_t start = reader->bp >> 3u;
+  size_t size = reader->size;
+  if(start + 2u < size) {
+    reader->buffer = (unsigned)reader->data[start + 0] | ((unsigned)reader->data[start + 1] << 8u) |
+                     ((unsigned)reader->data[start + 2] << 16u);
+    reader->buffer >>= (reader->bp & 7u);
+    return 1;
+  } else {
+    reader->buffer = 0;
+    if(start + 0u < size) reader->buffer |= reader->data[start + 0];
+    if(start + 1u < size) reader->buffer |= ((unsigned)reader->data[start + 1] << 8u);
+    reader->buffer >>= (reader->bp & 7u);
+    return reader->bp + nbits <= reader->bitsize;
+  }
+}
+
+/*See ensureBits documentation above. This one ensures up to 25 bits */
+static LODEPNG_INLINE unsigned ensureBits25(LodePNGBitReader* reader, size_t nbits) {
+  size_t start = reader->bp >> 3u;
+  size_t size = reader->size;
+  if(start + 3u < size) {
+    reader->buffer = (unsigned)reader->data[start + 0] | ((unsigned)reader->data[start + 1] << 8u) |
+                     ((unsigned)reader->data[start + 2] << 16u) | ((unsigned)reader->data[start + 3] << 24u);
+    reader->buffer >>= (reader->bp & 7u);
+    return 1;
+  } else {
+    reader->buffer = 0;
+    if(start + 0u < size) reader->buffer |= reader->data[start + 0];
+    if(start + 1u < size) reader->buffer |= ((unsigned)reader->data[start + 1] << 8u);
+    if(start + 2u < size) reader->buffer |= ((unsigned)reader->data[start + 2] << 16u);
+    reader->buffer >>= (reader->bp & 7u);
+    return reader->bp + nbits <= reader->bitsize;
+  }
+}
+
+/*See ensureBits documentation above. This one ensures up to 32 bits */
+static LODEPNG_INLINE unsigned ensureBits32(LodePNGBitReader* reader, size_t nbits) {
+  size_t start = reader->bp >> 3u;
+  size_t size = reader->size;
+  if(start + 4u < size) {
+    reader->buffer = (unsigned)reader->data[start + 0] | ((unsigned)reader->data[start + 1] << 8u) |
+                     ((unsigned)reader->data[start + 2] << 16u) | ((unsigned)reader->data[start + 3] << 24u);
+    reader->buffer >>= (reader->bp & 7u);
+    reader->buffer |= (((unsigned)reader->data[start + 4] << 24u) << (8u - (reader->bp & 7u)));
+    return 1;
+  } else {
+    reader->buffer = 0;
+    if(start + 0u < size) reader->buffer |= reader->data[start + 0];
+    if(start + 1u < size) reader->buffer |= ((unsigned)reader->data[start + 1] << 8u);
+    if(start + 2u < size) reader->buffer |= ((unsigned)reader->data[start + 2] << 16u);
+    if(start + 3u < size) reader->buffer |= ((unsigned)reader->data[start + 3] << 24u);
+    reader->buffer >>= (reader->bp & 7u);
+    return reader->bp + nbits <= reader->bitsize;
+  }
+}
+
+/* Get bits without advancing the bit pointer. Must have enough bits available with ensureBits. Max nbits is 31. */
+static unsigned peekBits(LodePNGBitReader* reader, size_t nbits) {
+  /* The shift allows nbits to be only up to 31. */
+  return reader->buffer & ((1u << nbits) - 1u);
+}
+
+/* Must have enough bits available with ensureBits */
+static void advanceBits(LodePNGBitReader* reader, size_t nbits) {
+  reader->buffer >>= nbits;
+  reader->bp += nbits;
+}
+
+/* Must have enough bits available with ensureBits */
+static unsigned readBits(LodePNGBitReader* reader, size_t nbits) {
+  unsigned result = peekBits(reader, nbits);
+  advanceBits(reader, nbits);
+  return result;
+}
+
+/* Public for testing only. steps and result must have numsteps values. */
+unsigned lode_png_test_bitreader(const unsigned char* data, size_t size,
+                                 size_t numsteps, const size_t* steps, unsigned* result) {
+  size_t i;
+  LodePNGBitReader reader;
+  unsigned error = LodePNGBitReader_init(&reader, data, size);
+  if(error) return 0;
+  for(i = 0; i < numsteps; i++) {
+    size_t step = steps[i];
+    unsigned ok;
+    if(step > 25) ok = ensureBits32(&reader, step);
+    else if(step > 17) ok = ensureBits25(&reader, step);
+    else if(step > 9) ok = ensureBits17(&reader, step);
+    else ok = ensureBits9(&reader, step);
+    if(!ok) return 0;
+    result[i] = readBits(&reader, step);
+  }
+  return 1;
+}
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+static unsigned reverseBits(unsigned bits, unsigned num) {
+  /*TODO: implement faster lookup table based version when needed*/
+  unsigned i, result = 0;
+  for(i = 0; i < num; i++) result |= ((bits >> (num - i - 1u)) & 1u) << i;
+  return result;
+}
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / Deflate - Huffman                                                      / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#define FIRST_LENGTH_CODE_INDEX 257
+#define LAST_LENGTH_CODE_INDEX 285
+/*256 literals, the end code, some length codes, and 2 unused codes*/
+#define NUM_DEFLATE_CODE_SYMBOLS 288
+/*the distance codes have their own symbols, 30 used, 2 unused*/
+#define NUM_DISTANCE_SYMBOLS 32
+/*the code length codes. 0-15: code lengths, 16: copy previous 3-6 times, 17: 3-10 zeros, 18: 11-138 zeros*/
+#define NUM_CODE_LENGTH_CODES 19
+
+/*the base lengths represented by codes 257-285*/
+static const unsigned LENGTHBASE[29]
+  = {3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59,
+     67, 83, 99, 115, 131, 163, 195, 227, 258};
+
+/*the extra bits used by codes 257-285 (added to base length)*/
+static const unsigned LENGTHEXTRA[29]
+  = {0, 0, 0, 0, 0, 0, 0,  0,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  3,
+      4,  4,  4,   4,   5,   5,   5,   5,   0};
+
+/*the base backwards distances (the bits of distance codes appear after length codes and use their own huffman tree)*/
+static const unsigned DISTANCEBASE[30]
+  = {1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513,
+     769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577};
+
+/*the extra bits of backwards distances (added to base)*/
+static const unsigned DISTANCEEXTRA[30]
+  = {0, 0, 0, 0, 1, 1, 2,  2,  3,  3,  4,  4,  5,  5,   6,   6,   7,   7,   8,
+       8,    9,    9,   10,   10,   11,   11,   12,    12,    13,    13};
+
+/*the order in which "code length alphabet code lengths" are stored as specified by deflate, out of this the huffman
+tree of the dynamic huffman tree lengths is generated*/
+static const unsigned CLCL_ORDER[NUM_CODE_LENGTH_CODES]
+  = {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
+
+/* ////////////////////////////////////////////////////////////////////////// */
+
+/*
+Huffman tree struct, containing multiple representations of the tree
+*/
+typedef struct HuffmanTree {
+  unsigned* codes; /*the huffman codes (bit patterns representing the symbols)*/
+  unsigned* lengths; /*the lengths of the huffman codes*/
+  unsigned maxbitlen; /*maximum number of bits a single code can get*/
+  unsigned numcodes; /*number of symbols in the alphabet = number of codes*/
+  /* for reading only */
+  unsigned char* table_len; /*length of symbol from lookup table, or max length if secondary lookup needed*/
+  unsigned short* table_value; /*value of symbol from lookup table, or pointer to secondary table if needed*/
+} HuffmanTree;
+
+static void HuffmanTree_init(HuffmanTree* tree) {
+  tree->codes = 0;
+  tree->lengths = 0;
+  tree->table_len = 0;
+  tree->table_value = 0;
+}
+
+static void HuffmanTree_cleanup(HuffmanTree* tree) {
+  lodepng_free(tree->codes);
+  lodepng_free(tree->lengths);
+  lodepng_free(tree->table_len);
+  lodepng_free(tree->table_value);
+}
+
+/* amount of bits for first huffman table lookup (aka root bits), see HuffmanTree_makeTable and huffmanDecodeSymbol.*/
+/* values 8u and 9u work the fastest */
+#define FIRSTBITS 9u
+
+/* a symbol value too big to represent any valid symbol, to indicate reading disallowed huffman bits combination,
+which is possible in case of only 0 or 1 present symbols. */
+#define INVALIDSYMBOL 65535u
+
+/* make table for huffman decoding */
+static unsigned HuffmanTree_makeTable(HuffmanTree* tree) {
+  static const unsigned headsize = 1u << FIRSTBITS; /*size of the first table*/
+  static const unsigned mask = (1u << FIRSTBITS) /*headsize*/ - 1u;
+  size_t i, numpresent, pointer, size; /*total table size*/
+  unsigned* maxlens = (unsigned*)lodepng_malloc(headsize * sizeof(unsigned));
+  if(!maxlens) return 83; /*alloc fail*/
+
+  /* compute maxlens: max total bit length of symbols sharing prefix in the first table*/
+  lodepng_memset(maxlens, 0, headsize * sizeof(*maxlens));
+  for(i = 0; i < tree->numcodes; i++) {
+    unsigned symbol = tree->codes[i];
+    unsigned l = tree->lengths[i];
+    unsigned index;
+    if(l <= FIRSTBITS) continue; /*symbols that fit in first table don't increase secondary table size*/
+    /*get the FIRSTBITS MSBs, the MSBs of the symbol are encoded first. See later comment about the reversing*/
+    index = reverseBits(symbol >> (l - FIRSTBITS), FIRSTBITS);
+    maxlens[index] = LODEPNG_MAX(maxlens[index], l);
+  }
+  /* compute total table size: size of first table plus all secondary tables for symbols longer than FIRSTBITS */
+  size = headsize;
+  for(i = 0; i < headsize; ++i) {
+    unsigned l = maxlens[i];
+    if(l > FIRSTBITS) size += (1u << (l - FIRSTBITS));
+  }
+  tree->table_len = (unsigned char*)lodepng_malloc(size * sizeof(*tree->table_len));
+  tree->table_value = (unsigned short*)lodepng_malloc(size * sizeof(*tree->table_value));
+  if(!tree->table_len || !tree->table_value) {
+    lodepng_free(maxlens);
+    /* freeing tree->table values is done at a higher scope */
+    return 83; /*alloc fail*/
+  }
+  /*initialize with an invalid length to indicate unused entries*/
+  for(i = 0; i < size; ++i) tree->table_len[i] = 16;
+
+  /*fill in the first table for long symbols: max prefix size and pointer to secondary tables*/
+  pointer = headsize;
+  for(i = 0; i < headsize; ++i) {
+    unsigned l = maxlens[i];
+    if(l <= FIRSTBITS) continue;
+    tree->table_len[i] = l;
+    tree->table_value[i] = pointer;
+    pointer += (1u << (l - FIRSTBITS));
+  }
+  lodepng_free(maxlens);
+
+  /*fill in the first table for short symbols, or secondary table for long symbols*/
+  numpresent = 0;
+  for(i = 0; i < tree->numcodes; ++i) {
+    unsigned l = tree->lengths[i];
+    unsigned symbol = tree->codes[i]; /*the huffman bit pattern. i itself is the value.*/
+    /*reverse bits, because the huffman bits are given in MSB first order but the bit reader reads LSB first*/
+    unsigned reverse = reverseBits(symbol, l);
+    if(l == 0) continue;
+    numpresent++;
+
+    if(l <= FIRSTBITS) {
+      /*short symbol, fully in first table, replicated num times if l < FIRSTBITS*/
+      unsigned num = 1u << (FIRSTBITS - l);
+      unsigned j;
+      for(j = 0; j < num; ++j) {
+        /*bit reader will read the l bits of symbol first, the remaining FIRSTBITS - l bits go to the MSB's*/
+        unsigned index = reverse | (j << l);
+        if(tree->table_len[index] != 16) return 55; /*invalid tree: long symbol shares prefix with short symbol*/
+        tree->table_len[index] = l;
+        tree->table_value[index] = i;
+      }
+    } else {
+      /*long symbol, shares prefix with other long symbols in first lookup table, needs second lookup*/
+      /*the FIRSTBITS MSBs of the symbol are the first table index*/
+      unsigned index = reverse & mask;
+      unsigned maxlen = tree->table_len[index];
+      /*log2 of secondary table length, should be >= l - FIRSTBITS*/
+      unsigned tablelen = maxlen - FIRSTBITS;
+      unsigned start = tree->table_value[index]; /*starting index in secondary table*/
+      unsigned num = 1u << (tablelen - (l - FIRSTBITS)); /*amount of entries of this symbol in secondary table*/
+      unsigned j;
+      if(maxlen < l) return 55; /*invalid tree: long symbol shares prefix with short symbol*/
+      for(j = 0; j < num; ++j) {
+        unsigned reverse2 = reverse >> FIRSTBITS; /* l - FIRSTBITS bits */
+        unsigned index2 = start + (reverse2 | (j << (l - FIRSTBITS)));
+        tree->table_len[index2] = l;
+        tree->table_value[index2] = i;
+      }
+    }
+  }
+
+  if(numpresent < 2) {
+    /* In case of exactly 1 symbol, in theory the huffman symbol needs 0 bits,
+    but deflate uses 1 bit instead. In case of 0 symbols, no symbols can
+    appear at all, but such huffman tree could still exist (e.g. if distance
+    codes are never used). In both cases, not all symbols of the table will be
+    filled in. Fill them in with an invalid symbol value so returning them from
+    huffmanDecodeSymbol will cause error. */
+    for(i = 0; i < size; ++i) {
+      if(tree->table_len[i] == 16) {
+        /* As length, use a value smaller than FIRSTBITS for the head table,
+        and a value larger than FIRSTBITS for the secondary table, to ensure
+        valid behavior for advanceBits when reading this symbol. */
+        tree->table_len[i] = (i < headsize) ? 1 : (FIRSTBITS + 1);
+        tree->table_value[i] = INVALIDSYMBOL;
+      }
+    }
+  } else {
+    /* A good huffman tree has N * 2 - 1 nodes, of which N - 1 are internal nodes.
+    If that is not the case (due to too long length codes), the table will not
+    have been fully used, and this is an error (not all bit combinations can be
+    decoded): an oversubscribed huffman tree, indicated by error 55. */
+    for(i = 0; i < size; ++i) {
+      if(tree->table_len[i] == 16) return 55;
+    }
+  }
+
+  return 0;
+}
+
+/*
+Second step for the ...makeFromLengths and ...makeFromFrequencies functions.
+numcodes, lengths and maxbitlen must already be filled in correctly. return
+value is error.
+*/
+static unsigned HuffmanTree_makeFromLengths2(HuffmanTree* tree) {
+  unsigned* blcount;
+  unsigned* nextcode;
+  unsigned error = 0;
+  unsigned bits, n;
+
+  tree->codes = (unsigned*)lodepng_malloc(tree->numcodes * sizeof(unsigned));
+  blcount = (unsigned*)lodepng_malloc((tree->maxbitlen + 1) * sizeof(unsigned));
+  nextcode = (unsigned*)lodepng_malloc((tree->maxbitlen + 1) * sizeof(unsigned));
+  if(!tree->codes || !blcount || !nextcode) error = 83; /*alloc fail*/
+
+  if(!error) {
+    for(n = 0; n != tree->maxbitlen + 1; n++) blcount[n] = nextcode[n] = 0;
+    /*step 1: count number of instances of each code length*/
+    for(bits = 0; bits != tree->numcodes; ++bits) ++blcount[tree->lengths[bits]];
+    /*step 2: generate the nextcode values*/
+    for(bits = 1; bits <= tree->maxbitlen; ++bits) {
+      nextcode[bits] = (nextcode[bits - 1] + blcount[bits - 1]) << 1u;
+    }
+    /*step 3: generate all the codes*/
+    for(n = 0; n != tree->numcodes; ++n) {
+      if(tree->lengths[n] != 0) {
+        tree->codes[n] = nextcode[tree->lengths[n]]++;
+        /*remove superfluous bits from the code*/
+        tree->codes[n] &= ((1u << tree->lengths[n]) - 1u);
+      }
+    }
+  }
+
+  lodepng_free(blcount);
+  lodepng_free(nextcode);
+
+  if(!error) error = HuffmanTree_makeTable(tree);
+  return error;
+}
+
+/*
+given the code lengths (as stored in the PNG file), generate the tree as defined
+by Deflate. maxbitlen is the maximum bits that a code in the tree can have.
+return value is error.
+*/
+static unsigned HuffmanTree_makeFromLengths(HuffmanTree* tree, const unsigned* bitlen,
+                                            size_t numcodes, unsigned maxbitlen) {
+  unsigned i;
+  tree->lengths = (unsigned*)lodepng_malloc(numcodes * sizeof(unsigned));
+  if(!tree->lengths) return 83; /*alloc fail*/
+  for(i = 0; i != numcodes; ++i) tree->lengths[i] = bitlen[i];
+  tree->numcodes = (unsigned)numcodes; /*number of symbols*/
+  tree->maxbitlen = maxbitlen;
+  return HuffmanTree_makeFromLengths2(tree);
+}
+
+#ifdef LODEPNG_COMPILE_ENCODER
+
+/*BPM: Boundary Package Merge, see "A Fast and Space-Economical Algorithm for Length-Limited Coding",
+Jyrki Katajainen, Alistair Moffat, Andrew Turpin, 1995.*/
+
+/*chain node for boundary package merge*/
+typedef struct BPMNode {
+  int weight; /*the sum of all weights in this chain*/
+  unsigned index; /*index of this leaf node (called "count" in the paper)*/
+  struct BPMNode* tail; /*the next nodes in this chain (null if last)*/
+  int in_use;
+} BPMNode;
+
+/*lists of chains*/
+typedef struct BPMLists {
+  /*memory pool*/
+  unsigned memsize;
+  BPMNode* memory;
+  unsigned numfree;
+  unsigned nextfree;
+  BPMNode** freelist;
+  /*two heads of lookahead chains per list*/
+  unsigned listsize;
+  BPMNode** chains0;
+  BPMNode** chains1;
+} BPMLists;
+
+/*creates a new chain node with the given parameters, from the memory in the lists */
+static BPMNode* bpmnode_create(BPMLists* lists, int weight, unsigned index, BPMNode* tail) {
+  unsigned i;
+  BPMNode* result;
+
+  /*memory full, so garbage collect*/
+  if(lists->nextfree >= lists->numfree) {
+    /*mark only those that are in use*/
+    for(i = 0; i != lists->memsize; ++i) lists->memory[i].in_use = 0;
+    for(i = 0; i != lists->listsize; ++i) {
+      BPMNode* node;
+      for(node = lists->chains0[i]; node != 0; node = node->tail) node->in_use = 1;
+      for(node = lists->chains1[i]; node != 0; node = node->tail) node->in_use = 1;
+    }
+    /*collect those that are free*/
+    lists->numfree = 0;
+    for(i = 0; i != lists->memsize; ++i) {
+      if(!lists->memory[i].in_use) lists->freelist[lists->numfree++] = &lists->memory[i];
+    }
+    lists->nextfree = 0;
+  }
+
+  result = lists->freelist[lists->nextfree++];
+  result->weight = weight;
+  result->index = index;
+  result->tail = tail;
+  return result;
+}
+
+/*sort the leaves with stable mergesort*/
+static void bpmnode_sort(BPMNode* leaves, size_t num) {
+  BPMNode* mem = (BPMNode*)lodepng_malloc(sizeof(*leaves) * num);
+  size_t width, counter = 0;
+  for(width = 1; width < num; width *= 2) {
+    BPMNode* a = (counter & 1) ? mem : leaves;
+    BPMNode* b = (counter & 1) ? leaves : mem;
+    size_t p;
+    for(p = 0; p < num; p += 2 * width) {
+      size_t q = (p + width > num) ? num : (p + width);
+      size_t r = (p + 2 * width > num) ? num : (p + 2 * width);
+      size_t i = p, j = q, k;
+      for(k = p; k < r; k++) {
+        if(i < q && (j >= r || a[i].weight <= a[j].weight)) b[k] = a[i++];
+        else b[k] = a[j++];
+      }
+    }
+    counter++;
+  }
+  if(counter & 1) lodepng_memcpy(leaves, mem, sizeof(*leaves) * num);
+  lodepng_free(mem);
+}
+
+/*Boundary Package Merge step, numpresent is the amount of leaves, and c is the current chain.*/
+static void boundaryPM(BPMLists* lists, BPMNode* leaves, size_t numpresent, int c, int num) {
+  unsigned lastindex = lists->chains1[c]->index;
+
+  if(c == 0) {
+    if(lastindex >= numpresent) return;
+    lists->chains0[c] = lists->chains1[c];
+    lists->chains1[c] = bpmnode_create(lists, leaves[lastindex].weight, lastindex + 1, 0);
+  } else {
+    /*sum of the weights of the head nodes of the previous lookahead chains.*/
+    int sum = lists->chains0[c - 1]->weight + lists->chains1[c - 1]->weight;
+    lists->chains0[c] = lists->chains1[c];
+    if(lastindex < numpresent && sum > leaves[lastindex].weight) {
+      lists->chains1[c] = bpmnode_create(lists, leaves[lastindex].weight, lastindex + 1, lists->chains1[c]->tail);
+      return;
+    }
+    lists->chains1[c] = bpmnode_create(lists, sum, lastindex, lists->chains1[c - 1]);
+    /*in the end we are only interested in the chain of the last list, so no
+    need to recurse if we're at the last one (this gives measurable speedup)*/
+    if(num + 1 < (int)(2 * numpresent - 2)) {
+      boundaryPM(lists, leaves, numpresent, c - 1, num);
+      boundaryPM(lists, leaves, numpresent, c - 1, num);
+    }
+  }
+}
+
+unsigned lodepng_huffman_code_lengths(unsigned* lengths, const unsigned* frequencies,
+                                      size_t numcodes, unsigned maxbitlen) {
+  unsigned error = 0;
+  unsigned i;
+  size_t numpresent = 0; /*number of symbols with non-zero frequency*/
+  BPMNode* leaves; /*the symbols, only those with > 0 frequency*/
+
+  if(numcodes == 0) return 80; /*error: a tree of 0 symbols is not supposed to be made*/
+  if((1u << maxbitlen) < (unsigned)numcodes) return 80; /*error: represent all symbols*/
+
+  leaves = (BPMNode*)lodepng_malloc(numcodes * sizeof(*leaves));
+  if(!leaves) return 83; /*alloc fail*/
+
+  for(i = 0; i != numcodes; ++i) {
+    if(frequencies[i] > 0) {
+      leaves[numpresent].weight = (int)frequencies[i];
+      leaves[numpresent].index = i;
+      ++numpresent;
+    }
+  }
+
+  lodepng_memset(lengths, 0, numcodes * sizeof(*lengths));
+
+  /*ensure at least two present symbols. There should be at least one symbol
+  according to RFC 1951 section 3.2.7. Some decoders incorrectly require two. To
+  make these work as well ensure there are at least two symbols. The
+  Package-Merge code below also doesn't work correctly if there's only one
+  symbol, it'd give it the theoretical 0 bits but in practice zlib wants 1 bit*/
+  if(numpresent == 0) {
+    lengths[0] = lengths[1] = 1; /*note that for RFC 1951 section 3.2.7, only lengths[0] = 1 is needed*/
+  } else if(numpresent == 1) {
+    lengths[leaves[0].index] = 1;
+    lengths[leaves[0].index == 0 ? 1 : 0] = 1;
+  } else {
+    BPMLists lists;
+    BPMNode* node;
+
+    bpmnode_sort(leaves, numpresent);
+
+    lists.listsize = maxbitlen;
+    lists.memsize = 2 * maxbitlen * (maxbitlen + 1);
+    lists.nextfree = 0;
+    lists.numfree = lists.memsize;
+    lists.memory = (BPMNode*)lodepng_malloc(lists.memsize * sizeof(*lists.memory));
+    lists.freelist = (BPMNode**)lodepng_malloc(lists.memsize * sizeof(BPMNode*));
+    lists.chains0 = (BPMNode**)lodepng_malloc(lists.listsize * sizeof(BPMNode*));
+    lists.chains1 = (BPMNode**)lodepng_malloc(lists.listsize * sizeof(BPMNode*));
+    if(!lists.memory || !lists.freelist || !lists.chains0 || !lists.chains1) error = 83; /*alloc fail*/
+
+    if(!error) {
+      for(i = 0; i != lists.memsize; ++i) lists.freelist[i] = &lists.memory[i];
+
+      bpmnode_create(&lists, leaves[0].weight, 1, 0);
+      bpmnode_create(&lists, leaves[1].weight, 2, 0);
+
+      for(i = 0; i != lists.listsize; ++i) {
+        lists.chains0[i] = &lists.memory[0];
+        lists.chains1[i] = &lists.memory[1];
+      }
+
+      /*each boundaryPM call adds one chain to the last list, and we need 2 * numpresent - 2 chains.*/
+      for(i = 2; i != 2 * numpresent - 2; ++i) boundaryPM(&lists, leaves, numpresent, (int)maxbitlen - 1, (int)i);
+
+      for(node = lists.chains1[maxbitlen - 1]; node; node = node->tail) {
+        for(i = 0; i != node->index; ++i) ++lengths[leaves[i].index];
+      }
+    }
+
+    lodepng_free(lists.memory);
+    lodepng_free(lists.freelist);
+    lodepng_free(lists.chains0);
+    lodepng_free(lists.chains1);
+  }
+
+  lodepng_free(leaves);
+  return error;
+}
+
+/*Create the Huffman tree given the symbol frequencies*/
+static unsigned HuffmanTree_makeFromFrequencies(HuffmanTree* tree, const unsigned* frequencies,
+                                                size_t mincodes, size_t numcodes, unsigned maxbitlen) {
+  unsigned error = 0;
+  while(!frequencies[numcodes - 1] && numcodes > mincodes) --numcodes; /*trim zeroes*/
+  tree->lengths = (unsigned*)lodepng_malloc(numcodes * sizeof(unsigned));
+  if(!tree->lengths) return 83; /*alloc fail*/
+  tree->maxbitlen = maxbitlen;
+  tree->numcodes = (unsigned)numcodes; /*number of symbols*/
+
+  error = lodepng_huffman_code_lengths(tree->lengths, frequencies, numcodes, maxbitlen);
+  if(!error) error = HuffmanTree_makeFromLengths2(tree);
+  return error;
+}
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+/*get the literal and length code tree of a deflated block with fixed tree, as per the deflate specification*/
+static unsigned generateFixedLitLenTree(HuffmanTree* tree) {
+  unsigned i, error = 0;
+  unsigned* bitlen = (unsigned*)lodepng_malloc(NUM_DEFLATE_CODE_SYMBOLS * sizeof(unsigned));
+  if(!bitlen) return 83; /*alloc fail*/
+
+  /*288 possible codes: 0-255=literals, 256=endcode, 257-285=lengthcodes, 286-287=unused*/
+  for(i =   0; i <= 143; ++i) bitlen[i] = 8;
+  for(i = 144; i <= 255; ++i) bitlen[i] = 9;
+  for(i = 256; i <= 279; ++i) bitlen[i] = 7;
+  for(i = 280; i <= 287; ++i) bitlen[i] = 8;
+
+  error = HuffmanTree_makeFromLengths(tree, bitlen, NUM_DEFLATE_CODE_SYMBOLS, 15);
+
+  lodepng_free(bitlen);
+  return error;
+}
+
+/*get the distance code tree of a deflated block with fixed tree, as specified in the deflate specification*/
+static unsigned generateFixedDistanceTree(HuffmanTree* tree) {
+  unsigned i, error = 0;
+  unsigned* bitlen = (unsigned*)lodepng_malloc(NUM_DISTANCE_SYMBOLS * sizeof(unsigned));
+  if(!bitlen) return 83; /*alloc fail*/
+
+  /*there are 32 distance codes, but 30-31 are unused*/
+  for(i = 0; i != NUM_DISTANCE_SYMBOLS; ++i) bitlen[i] = 5;
+  error = HuffmanTree_makeFromLengths(tree, bitlen, NUM_DISTANCE_SYMBOLS, 15);
+
+  lodepng_free(bitlen);
+  return error;
+}
+
+#ifdef LODEPNG_COMPILE_DECODER
+
+/*
+returns the code. The bit reader must already have been ensured at least 15 bits
+*/
+static unsigned huffmanDecodeSymbol(LodePNGBitReader* reader, const HuffmanTree* codetree) {
+  unsigned short code = peekBits(reader, FIRSTBITS);
+  unsigned short l = codetree->table_len[code];
+  unsigned short value = codetree->table_value[code];
+  if(l <= FIRSTBITS) {
+    advanceBits(reader, l);
+    return value;
+  } else {
+    unsigned index2;
+    advanceBits(reader, FIRSTBITS);
+    index2 = value + peekBits(reader, l - FIRSTBITS);
+    advanceBits(reader, codetree->table_len[index2] - FIRSTBITS);
+    return codetree->table_value[index2];
+  }
+}
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+#ifdef LODEPNG_COMPILE_DECODER
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / Inflator (Decompressor)                                                / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+/*get the tree of a deflated block with fixed tree, as specified in the deflate specification
+Returns error code.*/
+static unsigned getTreeInflateFixed(HuffmanTree* tree_ll, HuffmanTree* tree_d) {
+  unsigned error = generateFixedLitLenTree(tree_ll);
+  if(error) return error;
+  return generateFixedDistanceTree(tree_d);
+}
+
+/*get the tree of a deflated block with dynamic tree, the tree itself is also Huffman compressed with a known tree*/
+static unsigned getTreeInflateDynamic(HuffmanTree* tree_ll, HuffmanTree* tree_d,
+                                      LodePNGBitReader* reader) {
+  /*make sure that length values that aren't filled in will be 0, or a wrong tree will be generated*/
+  unsigned error = 0;
+  unsigned n, HLIT, HDIST, HCLEN, i;
+
+  /*see comments in deflateDynamic for explanation of the context and these variables, it is analogous*/
+  unsigned* bitlen_ll = 0; /*lit,len code lengths*/
+  unsigned* bitlen_d = 0; /*dist code lengths*/
+  /*code length code lengths ("clcl"), the bit lengths of the huffman tree used to compress bitlen_ll and bitlen_d*/
+  unsigned* bitlen_cl = 0;
+  HuffmanTree tree_cl; /*the code tree for code length codes (the huffman tree for compressed huffman trees)*/
+
+  if(!ensureBits17(reader, 14)) return 49; /*error: the bit pointer is or will go past the memory*/
+
+  /*number of literal/length codes + 257. Unlike the spec, the value 257 is added to it here already*/
+  HLIT =  readBits(reader, 5) + 257;
+  /*number of distance codes. Unlike the spec, the value 1 is added to it here already*/
+  HDIST = readBits(reader, 5) + 1;
+  /*number of code length codes. Unlike the spec, the value 4 is added to it here already*/
+  HCLEN = readBits(reader, 4) + 4;
+
+  bitlen_cl = (unsigned*)lodepng_malloc(NUM_CODE_LENGTH_CODES * sizeof(unsigned));
+  if(!bitlen_cl) return 83 /*alloc fail*/;
+
+  HuffmanTree_init(&tree_cl);
+
+  while(!error) {
+    /*read the code length codes out of 3 * (amount of code length codes) bits*/
+    if(lodepng_gtofl(reader->bp, HCLEN * 3, reader->bitsize)) {
+      ERROR_BREAK(50); /*error: the bit pointer is or will go past the memory*/
+    }
+    for(i = 0; i != HCLEN; ++i) {
+      ensureBits9(reader, 3); /*out of bounds already checked above */
+      bitlen_cl[CLCL_ORDER[i]] = readBits(reader, 3);
+    }
+    for(i = HCLEN; i != NUM_CODE_LENGTH_CODES; ++i) {
+      bitlen_cl[CLCL_ORDER[i]] = 0;
+    }
+
+    error = HuffmanTree_makeFromLengths(&tree_cl, bitlen_cl, NUM_CODE_LENGTH_CODES, 7);
+    if(error) break;
+
+    /*now we can use this tree to read the lengths for the tree that this function will return*/
+    bitlen_ll = (unsigned*)lodepng_malloc(NUM_DEFLATE_CODE_SYMBOLS * sizeof(unsigned));
+    bitlen_d = (unsigned*)lodepng_malloc(NUM_DISTANCE_SYMBOLS * sizeof(unsigned));
+    if(!bitlen_ll || !bitlen_d) ERROR_BREAK(83 /*alloc fail*/);
+    lodepng_memset(bitlen_ll, 0, NUM_DEFLATE_CODE_SYMBOLS * sizeof(*bitlen_ll));
+    lodepng_memset(bitlen_d, 0, NUM_DISTANCE_SYMBOLS * sizeof(*bitlen_d));
+
+    /*i is the current symbol we're reading in the part that contains the code lengths of lit/len and dist codes*/
+    i = 0;
+    while(i < HLIT + HDIST) {
+      unsigned code;
+      ensureBits25(reader, 22); /* up to 15 bits for huffman code, up to 7 extra bits below*/
+      code = huffmanDecodeSymbol(reader, &tree_cl);
+      if(code <= 15) /*a length code*/ {
+        if(i < HLIT) bitlen_ll[i] = code;
+        else bitlen_d[i - HLIT] = code;
+        ++i;
+      } else if(code == 16) /*repeat previous*/ {
+        unsigned replength = 3; /*read in the 2 bits that indicate repeat length (3-6)*/
+        unsigned value; /*set value to the previous code*/
+
+        if(i == 0) ERROR_BREAK(54); /*can't repeat previous if i is 0*/
+
+        replength += readBits(reader, 2);
+
+        if(i < HLIT + 1) value = bitlen_ll[i - 1];
+        else value = bitlen_d[i - HLIT - 1];
+        /*repeat this value in the next lengths*/
+        for(n = 0; n < replength; ++n) {
+          if(i >= HLIT + HDIST) ERROR_BREAK(13); /*error: i is larger than the amount of codes*/
+          if(i < HLIT) bitlen_ll[i] = value;
+          else bitlen_d[i - HLIT] = value;
+          ++i;
+        }
+      } else if(code == 17) /*repeat "0" 3-10 times*/ {
+        unsigned replength = 3; /*read in the bits that indicate repeat length*/
+        replength += readBits(reader, 3);
+
+        /*repeat this value in the next lengths*/
+        for(n = 0; n < replength; ++n) {
+          if(i >= HLIT + HDIST) ERROR_BREAK(14); /*error: i is larger than the amount of codes*/
+
+          if(i < HLIT) bitlen_ll[i] = 0;
+          else bitlen_d[i - HLIT] = 0;
+          ++i;
+        }
+      } else if(code == 18) /*repeat "0" 11-138 times*/ {
+        unsigned replength = 11; /*read in the bits that indicate repeat length*/
+        replength += readBits(reader, 7);
+
+        /*repeat this value in the next lengths*/
+        for(n = 0; n < replength; ++n) {
+          if(i >= HLIT + HDIST) ERROR_BREAK(15); /*error: i is larger than the amount of codes*/
+
+          if(i < HLIT) bitlen_ll[i] = 0;
+          else bitlen_d[i - HLIT] = 0;
+          ++i;
+        }
+      } else /*if(code == INVALIDSYMBOL)*/ {
+        ERROR_BREAK(16); /*error: tried to read disallowed huffman symbol*/
+      }
+      /*check if any of the ensureBits above went out of bounds*/
+      if(reader->bp > reader->bitsize) {
+        /*return error code 10 or 11 depending on the situation that happened in huffmanDecodeSymbol
+        (10=no endcode, 11=wrong jump outside of tree)*/
+        /* TODO: revise error codes 10,11,50: the above comment is no longer valid */
+        ERROR_BREAK(50); /*error, bit pointer jumps past memory*/
+      }
+    }
+    if(error) break;
+
+    if(bitlen_ll[256] == 0) ERROR_BREAK(64); /*the length of the end code 256 must be larger than 0*/
+
+    /*now we've finally got HLIT and HDIST, so generate the code trees, and the function is done*/
+    error = HuffmanTree_makeFromLengths(tree_ll, bitlen_ll, NUM_DEFLATE_CODE_SYMBOLS, 15);
+    if(error) break;
+    error = HuffmanTree_makeFromLengths(tree_d, bitlen_d, NUM_DISTANCE_SYMBOLS, 15);
+
+    break; /*end of error-while*/
+  }
+
+  lodepng_free(bitlen_cl);
+  lodepng_free(bitlen_ll);
+  lodepng_free(bitlen_d);
+  HuffmanTree_cleanup(&tree_cl);
+
+  return error;
+}
+
+/*inflate a block with dynamic of fixed Huffman tree. btype must be 1 or 2.*/
+static unsigned inflateHuffmanBlock(ucvector* out, LodePNGBitReader* reader,
+                                    unsigned btype, size_t max_output_size) {
+  unsigned error = 0;
+  HuffmanTree tree_ll; /*the huffman tree for literal and length codes*/
+  HuffmanTree tree_d; /*the huffman tree for distance codes*/
+
+  HuffmanTree_init(&tree_ll);
+  HuffmanTree_init(&tree_d);
+
+  if(btype == 1) error = getTreeInflateFixed(&tree_ll, &tree_d);
+  else /*if(btype == 2)*/ error = getTreeInflateDynamic(&tree_ll, &tree_d, reader);
+
+  while(!error) /*decode all symbols until end reached, breaks at end code*/ {
+    /*code_ll is literal, length or end code*/
+    unsigned code_ll;
+    ensureBits25(reader, 20); /* up to 15 for the huffman symbol, up to 5 for the length extra bits */
+    code_ll = huffmanDecodeSymbol(reader, &tree_ll);
+    if(code_ll <= 255) /*literal symbol*/ {
+      if(!ucvector_resize(out, out->size + 1)) ERROR_BREAK(83 /*alloc fail*/);
+      out->data[out->size - 1] = (unsigned char)code_ll;
+    } else if(code_ll >= FIRST_LENGTH_CODE_INDEX && code_ll <= LAST_LENGTH_CODE_INDEX) /*length code*/ {
+      unsigned code_d, distance;
+      unsigned numextrabits_l, numextrabits_d; /*extra bits for length and distance*/
+      size_t start, backward, length;
+
+      /*part 1: get length base*/
+      length = LENGTHBASE[code_ll - FIRST_LENGTH_CODE_INDEX];
+
+      /*part 2: get extra bits and add the value of that to length*/
+      numextrabits_l = LENGTHEXTRA[code_ll - FIRST_LENGTH_CODE_INDEX];
+      if(numextrabits_l != 0) {
+        /* bits already ensured above */
+        length += readBits(reader, numextrabits_l);
+      }
+
+      /*part 3: get distance code*/
+      ensureBits32(reader, 28); /* up to 15 for the huffman symbol, up to 13 for the extra bits */
+      code_d = huffmanDecodeSymbol(reader, &tree_d);
+      if(code_d > 29) {
+        if(code_d <= 31) {
+          ERROR_BREAK(18); /*error: invalid distance code (30-31 are never used)*/
+        } else /* if(code_d == INVALIDSYMBOL) */{
+          ERROR_BREAK(16); /*error: tried to read disallowed huffman symbol*/
+        }
+      }
+      distance = DISTANCEBASE[code_d];
+
+      /*part 4: get extra bits from distance*/
+      numextrabits_d = DISTANCEEXTRA[code_d];
+      if(numextrabits_d != 0) {
+        /* bits already ensured above */
+        distance += readBits(reader, numextrabits_d);
+      }
+
+      /*part 5: fill in all the out[n] values based on the length and dist*/
+      start = out->size;
+      if(distance > start) ERROR_BREAK(52); /*too long backward distance*/
+      backward = start - distance;
+
+      if(!ucvector_resize(out, out->size + length)) ERROR_BREAK(83 /*alloc fail*/);
+      if(distance < length) {
+        size_t forward;
+        lodepng_memcpy(out->data + start, out->data + backward, distance);
+        start += distance;
+        for(forward = distance; forward < length; ++forward) {
+          out->data[start++] = out->data[backward++];
+        }
+      } else {
+        lodepng_memcpy(out->data + start, out->data + backward, length);
+      }
+    } else if(code_ll == 256) {
+      break; /*end code, break the loop*/
+    } else /*if(code_ll == INVALIDSYMBOL)*/ {
+      ERROR_BREAK(16); /*error: tried to read disallowed huffman symbol*/
+    }
+    /*check if any of the ensureBits above went out of bounds*/
+    if(reader->bp > reader->bitsize) {
+      /*return error code 10 or 11 depending on the situation that happened in huffmanDecodeSymbol
+      (10=no endcode, 11=wrong jump outside of tree)*/
+      /* TODO: revise error codes 10,11,50: the above comment is no longer valid */
+      ERROR_BREAK(51); /*error, bit pointer jumps past memory*/
+    }
+    if(max_output_size && out->size > max_output_size) {
+      ERROR_BREAK(109); /*error, larger than max size*/
+    }
+  }
+
+  HuffmanTree_cleanup(&tree_ll);
+  HuffmanTree_cleanup(&tree_d);
+
+  return error;
+}
+
+static unsigned inflateNoCompression(ucvector* out, LodePNGBitReader* reader,
+                                     const LodePNGDecompressSettings* settings) {
+  size_t bytepos;
+  size_t size = reader->size;
+  unsigned LEN, NLEN, error = 0;
+
+  /*go to first boundary of byte*/
+  bytepos = (reader->bp + 7u) >> 3u;
+
+  /*read LEN (2 bytes) and NLEN (2 bytes)*/
+  if(bytepos + 4 >= size) return 52; /*error, bit pointer will jump past memory*/
+  LEN = (unsigned)reader->data[bytepos] + ((unsigned)reader->data[bytepos + 1] << 8u); bytepos += 2;
+  NLEN = (unsigned)reader->data[bytepos] + ((unsigned)reader->data[bytepos + 1] << 8u); bytepos += 2;
+
+  /*check if 16-bit NLEN is really the one's complement of LEN*/
+  if(!settings->ignore_nlen && LEN + NLEN != 65535) {
+    return 21; /*error: NLEN is not one's complement of LEN*/
+  }
+
+  if(!ucvector_resize(out, out->size + LEN)) return 83; /*alloc fail*/
+
+  /*read the literal data: LEN bytes are now stored in the out buffer*/
+  if(bytepos + LEN > size) return 23; /*error: reading outside of in buffer*/
+
+  lodepng_memcpy(out->data + out->size - LEN, reader->data + bytepos, LEN);
+  bytepos += LEN;
+
+  reader->bp = bytepos << 3u;
+
+  return error;
+}
+
+static unsigned lodepng_inflatev(ucvector* out,
+                                 const unsigned char* in, size_t insize,
+                                 const LodePNGDecompressSettings* settings) {
+  unsigned BFINAL = 0;
+  LodePNGBitReader reader;
+  unsigned error = LodePNGBitReader_init(&reader, in, insize);
+
+  if(error) return error;
+
+  while(!BFINAL) {
+    unsigned BTYPE;
+    if(!ensureBits9(&reader, 3)) return 52; /*error, bit pointer will jump past memory*/
+    BFINAL = readBits(&reader, 1);
+    BTYPE = readBits(&reader, 2);
+
+    if(BTYPE == 3) return 20; /*error: invalid BTYPE*/
+    else if(BTYPE == 0) error = inflateNoCompression(out, &reader, settings); /*no compression*/
+    else error = inflateHuffmanBlock(out, &reader, BTYPE, settings->max_output_size); /*compression, BTYPE 01 or 10*/
+    if(!error && settings->max_output_size && out->size > settings->max_output_size) error = 109;
+    if(error) break;
+  }
+
+  return error;
+}
+
+unsigned lodepng_inflate(unsigned char** out, size_t* outsize,
+                         const unsigned char* in, size_t insize,
+                         const LodePNGDecompressSettings* settings) {
+  ucvector v = ucvector_init(*out, *outsize);
+  unsigned error = lodepng_inflatev(&v, in, insize, settings);
+  *out = v.data;
+  *outsize = v.size;
+  return error;
+}
+
+static unsigned inflatev(ucvector* out, const unsigned char* in, size_t insize,
+                        const LodePNGDecompressSettings* settings) {
+  if(settings->custom_inflate) {
+    unsigned error = settings->custom_inflate(&out->data, &out->size, in, insize, settings);
+    out->allocsize = out->size;
+    if(error) {
+      /*the custom inflate is allowed to have its own error codes, however, we translate it to code 110*/
+      error = 110;
+      /*if there's a max output size, and the custom zlib returned error, then indicate that error instead*/
+      if(settings->max_output_size && out->size > settings->max_output_size) error = 109;
+    }
+    return error;
+  } else {
+    return lodepng_inflatev(out, in, insize, settings);
+  }
+}
+
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+#ifdef LODEPNG_COMPILE_ENCODER
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / Deflator (Compressor)                                                  / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+static const size_t MAX_SUPPORTED_DEFLATE_LENGTH = 258;
+
+/*search the index in the array, that has the largest value smaller than or equal to the given value,
+given array must be sorted (if no value is smaller, it returns the size of the given array)*/
+static size_t searchCodeIndex(const unsigned* array, size_t array_size, size_t value) {
+  /*binary search (only small gain over linear). TODO: use CPU log2 instruction for getting symbols instead*/
+  size_t left = 1;
+  size_t right = array_size - 1;
+
+  while(left <= right) {
+    size_t mid = (left + right) >> 1;
+    if(array[mid] >= value) right = mid - 1;
+    else left = mid + 1;
+  }
+  if(left >= array_size || array[left] > value) left--;
+  return left;
+}
+
+static void addLengthDistance(uivector* values, size_t length, size_t distance) {
+  /*values in encoded vector are those used by deflate:
+  0-255: literal bytes
+  256: end
+  257-285: length/distance pair (length code, followed by extra length bits, distance code, extra distance bits)
+  286-287: invalid*/
+
+  unsigned length_code = (unsigned)searchCodeIndex(LENGTHBASE, 29, length);
+  unsigned extra_length = (unsigned)(length - LENGTHBASE[length_code]);
+  unsigned dist_code = (unsigned)searchCodeIndex(DISTANCEBASE, 30, distance);
+  unsigned extra_distance = (unsigned)(distance - DISTANCEBASE[dist_code]);
+
+  size_t pos = values->size;
+  /*TODO: return error when this fails (out of memory)*/
+  unsigned ok = uivector_resize(values, values->size + 4);
+  if(ok) {
+    values->data[pos + 0] = length_code + FIRST_LENGTH_CODE_INDEX;
+    values->data[pos + 1] = extra_length;
+    values->data[pos + 2] = dist_code;
+    values->data[pos + 3] = extra_distance;
+  }
+}
+
+/*3 bytes of data get encoded into two bytes. The hash cannot use more than 3
+bytes as input because 3 is the minimum match length for deflate*/
+static const unsigned HASH_NUM_VALUES = 65536;
+static const unsigned HASH_BIT_MASK = 65535; /*HASH_NUM_VALUES - 1, but C90 does not like that as initializer*/
+
+typedef struct Hash {
+  int* head; /*hash value to head circular pos - can be outdated if went around window*/
+  /*circular pos to prev circular pos*/
+  unsigned short* chain;
+  int* val; /*circular pos to hash value*/
+
+  /*TODO: do this not only for zeros but for any repeated byte. However for PNG
+  it's always going to be the zeros that dominate, so not important for PNG*/
+  int* headz; /*similar to head, but for chainz*/
+  unsigned short* chainz; /*those with same amount of zeros*/
+  unsigned short* zeros; /*length of zeros streak, used as a second hash chain*/
+} Hash;
+
+static unsigned hash_init(Hash* hash, unsigned windowsize) {
+  unsigned i;
+  hash->head = (int*)lodepng_malloc(sizeof(int) * HASH_NUM_VALUES);
+  hash->val = (int*)lodepng_malloc(sizeof(int) * windowsize);
+  hash->chain = (unsigned short*)lodepng_malloc(sizeof(unsigned short) * windowsize);
+
+  hash->zeros = (unsigned short*)lodepng_malloc(sizeof(unsigned short) * windowsize);
+  hash->headz = (int*)lodepng_malloc(sizeof(int) * (MAX_SUPPORTED_DEFLATE_LENGTH + 1));
+  hash->chainz = (unsigned short*)lodepng_malloc(sizeof(unsigned short) * windowsize);
+
+  if(!hash->head || !hash->chain || !hash->val  || !hash->headz|| !hash->chainz || !hash->zeros) {
+    return 83; /*alloc fail*/
+  }
+
+  /*initialize hash table*/
+  for(i = 0; i != HASH_NUM_VALUES; ++i) hash->head[i] = -1;
+  for(i = 0; i != windowsize; ++i) hash->val[i] = -1;
+  for(i = 0; i != windowsize; ++i) hash->chain[i] = i; /*same value as index indicates uninitialized*/
+
+  for(i = 0; i <= MAX_SUPPORTED_DEFLATE_LENGTH; ++i) hash->headz[i] = -1;
+  for(i = 0; i != windowsize; ++i) hash->chainz[i] = i; /*same value as index indicates uninitialized*/
+
+  return 0;
+}
+
+static void hash_cleanup(Hash* hash) {
+  lodepng_free(hash->head);
+  lodepng_free(hash->val);
+  lodepng_free(hash->chain);
+
+  lodepng_free(hash->zeros);
+  lodepng_free(hash->headz);
+  lodepng_free(hash->chainz);
+}
+
+
+
+static unsigned getHash(const unsigned char* data, size_t size, size_t pos) {
+  unsigned result = 0;
+  if(pos + 2 < size) {
+    /*A simple shift and xor hash is used. Since the data of PNGs is dominated
+    by zeroes due to the filters, a better hash does not have a significant
+    effect on speed in traversing the chain, and causes more time spend on
+    calculating the hash.*/
+    result ^= ((unsigned)data[pos + 0] << 0u);
+    result ^= ((unsigned)data[pos + 1] << 4u);
+    result ^= ((unsigned)data[pos + 2] << 8u);
+  } else {
+    size_t amount, i;
+    if(pos >= size) return 0;
+    amount = size - pos;
+    for(i = 0; i != amount; ++i) result ^= ((unsigned)data[pos + i] << (i * 8u));
+  }
+  return result & HASH_BIT_MASK;
+}
+
+static unsigned countZeros(const unsigned char* data, size_t size, size_t pos) {
+  const unsigned char* start = data + pos;
+  const unsigned char* end = start + MAX_SUPPORTED_DEFLATE_LENGTH;
+  if(end > data + size) end = data + size;
+  data = start;
+  while(data != end && *data == 0) ++data;
+  /*subtracting two addresses returned as 32-bit number (max value is MAX_SUPPORTED_DEFLATE_LENGTH)*/
+  return (unsigned)(data - start);
+}
+
+/*wpos = pos & (windowsize - 1)*/
+static void updateHashChain(Hash* hash, size_t wpos, unsigned hashval, unsigned short numzeros) {
+  hash->val[wpos] = (int)hashval;
+  if(hash->head[hashval] != -1) hash->chain[wpos] = hash->head[hashval];
+  hash->head[hashval] = (int)wpos;
+
+  hash->zeros[wpos] = numzeros;
+  if(hash->headz[numzeros] != -1) hash->chainz[wpos] = hash->headz[numzeros];
+  hash->headz[numzeros] = (int)wpos;
+}
+
+/*
+LZ77-encode the data. Return value is error code. The input are raw bytes, the output
+is in the form of unsigned integers with codes representing for example literal bytes, or
+length/distance pairs.
+It uses a hash table technique to let it encode faster. When doing LZ77 encoding, a
+sliding window (of windowsize) is used, and all past bytes in that window can be used as
+the "dictionary". A brute force search through all possible distances would be slow, and
+this hash technique is one out of several ways to speed this up.
+*/
+static unsigned encodeLZ77(uivector* out, Hash* hash,
+                           const unsigned char* in, size_t inpos, size_t insize, unsigned windowsize,
+                           unsigned minmatch, unsigned nicematch, unsigned lazymatching) {
+  size_t pos;
+  unsigned i, error = 0;
+  /*for large window lengths, assume the user wants no compression loss. Otherwise, max hash chain length speedup.*/
+  unsigned maxchainlength = windowsize >= 8192 ? windowsize : windowsize / 8u;
+  unsigned maxlazymatch = windowsize >= 8192 ? MAX_SUPPORTED_DEFLATE_LENGTH : 64;
+
+  unsigned usezeros = 1; /*not sure if setting it to false for windowsize < 8192 is better or worse*/
+  unsigned numzeros = 0;
+
+  unsigned offset; /*the offset represents the distance in LZ77 terminology*/
+  unsigned length;
+  unsigned lazy = 0;
+  unsigned lazylength = 0, lazyoffset = 0;
+  unsigned hashval;
+  unsigned current_offset, current_length;
+  unsigned prev_offset;
+  const unsigned char *lastptr, *foreptr, *backptr;
+  unsigned hashpos;
+
+  if(windowsize == 0 || windowsize > 32768) return 60; /*error: windowsize smaller/larger than allowed*/
+  if((windowsize & (windowsize - 1)) != 0) return 90; /*error: must be power of two*/
+
+  if(nicematch > MAX_SUPPORTED_DEFLATE_LENGTH) nicematch = MAX_SUPPORTED_DEFLATE_LENGTH;
+
+  for(pos = inpos; pos < insize; ++pos) {
+    size_t wpos = pos & (windowsize - 1); /*position for in 'circular' hash buffers*/
+    unsigned chainlength = 0;
+
+    hashval = getHash(in, insize, pos);
+
+    if(usezeros && hashval == 0) {
+      if(numzeros == 0) numzeros = countZeros(in, insize, pos);
+      else if(pos + numzeros > insize || in[pos + numzeros - 1] != 0) --numzeros;
+    } else {
+      numzeros = 0;
+    }
+
+    updateHashChain(hash, wpos, hashval, numzeros);
+
+    /*the length and offset found for the current position*/
+    length = 0;
+    offset = 0;
+
+    hashpos = hash->chain[wpos];
+
+    lastptr = &in[insize < pos + MAX_SUPPORTED_DEFLATE_LENGTH ? insize : pos + MAX_SUPPORTED_DEFLATE_LENGTH];
+
+    /*search for the longest string*/
+    prev_offset = 0;
+    for(;;) {
+      if(chainlength++ >= maxchainlength) break;
+      current_offset = (unsigned)(hashpos <= wpos ? wpos - hashpos : wpos - hashpos + windowsize);
+
+      if(current_offset < prev_offset) break; /*stop when went completely around the circular buffer*/
+      prev_offset = current_offset;
+      if(current_offset > 0) {
+        /*test the next characters*/
+        foreptr = &in[pos];
+        backptr = &in[pos - current_offset];
+
+        /*common case in PNGs is lots of zeros. Quickly skip over them as a speedup*/
+        if(numzeros >= 3) {
+          unsigned skip = hash->zeros[hashpos];
+          if(skip > numzeros) skip = numzeros;
+          backptr += skip;
+          foreptr += skip;
+        }
+
+        while(foreptr != lastptr && *backptr == *foreptr) /*maximum supported length by deflate is max length*/ {
+          ++backptr;
+          ++foreptr;
+        }
+        current_length = (unsigned)(foreptr - &in[pos]);
+
+        if(current_length > length) {
+          length = current_length; /*the longest length*/
+          offset = current_offset; /*the offset that is related to this longest length*/
+          /*jump out once a length of max length is found (speed gain). This also jumps
+          out if length is MAX_SUPPORTED_DEFLATE_LENGTH*/
+          if(current_length >= nicematch) break;
+        }
+      }
+
+      if(hashpos == hash->chain[hashpos]) break;
+
+      if(numzeros >= 3 && length > numzeros) {
+        hashpos = hash->chainz[hashpos];
+        if(hash->zeros[hashpos] != numzeros) break;
+      } else {
+        hashpos = hash->chain[hashpos];
+        /*outdated hash value, happens if particular value was not encountered in whole last window*/
+        if(hash->val[hashpos] != (int)hashval) break;
+      }
+    }
+
+    if(lazymatching) {
+      if(!lazy && length >= 3 && length <= maxlazymatch && length < MAX_SUPPORTED_DEFLATE_LENGTH) {
+        lazy = 1;
+        lazylength = length;
+        lazyoffset = offset;
+        continue; /*try the next byte*/
+      }
+      if(lazy) {
+        lazy = 0;
+        if(pos == 0) ERROR_BREAK(81);
+        if(length > lazylength + 1) {
+          /*push the previous character as literal*/
+          if(!uivector_push_back(out, in[pos - 1])) ERROR_BREAK(83 /*alloc fail*/);
+        } else {
+          length = lazylength;
+          offset = lazyoffset;
+          hash->head[hashval] = -1; /*the same hashchain update will be done, this ensures no wrong alteration*/
+          hash->headz[numzeros] = -1; /*idem*/
+          --pos;
+        }
+      }
+    }
+    if(length >= 3 && offset > windowsize) ERROR_BREAK(86 /*too big (or overflown negative) offset*/);
+
+    /*encode it as length/distance pair or literal value*/
+    if(length < 3) /*only lengths of 3 or higher are supported as length/distance pair*/ {
+      if(!uivector_push_back(out, in[pos])) ERROR_BREAK(83 /*alloc fail*/);
+    } else if(length < minmatch || (length == 3 && offset > 4096)) {
+      /*compensate for the fact that longer offsets have more extra bits, a
+      length of only 3 may be not worth it then*/
+      if(!uivector_push_back(out, in[pos])) ERROR_BREAK(83 /*alloc fail*/);
+    } else {
+      addLengthDistance(out, length, offset);
+      for(i = 1; i < length; ++i) {
+        ++pos;
+        wpos = pos & (windowsize - 1);
+        hashval = getHash(in, insize, pos);
+        if(usezeros && hashval == 0) {
+          if(numzeros == 0) numzeros = countZeros(in, insize, pos);
+          else if(pos + numzeros > insize || in[pos + numzeros - 1] != 0) --numzeros;
+        } else {
+          numzeros = 0;
+        }
+        updateHashChain(hash, wpos, hashval, numzeros);
+      }
+    }
+  } /*end of the loop through each character of input*/
+
+  return error;
+}
+
+/* /////////////////////////////////////////////////////////////////////////// */
+
+static unsigned deflateNoCompression(ucvector* out, const unsigned char* data, size_t datasize) {
+  /*non compressed deflate block data: 1 bit BFINAL,2 bits BTYPE,(5 bits): it jumps to start of next byte,
+  2 bytes LEN, 2 bytes NLEN, LEN bytes literal DATA*/
+
+  size_t i, numdeflateblocks = (datasize + 65534u) / 65535u;
+  unsigned datapos = 0;
+  for(i = 0; i != numdeflateblocks; ++i) {
+    unsigned BFINAL, BTYPE, LEN, NLEN;
+    unsigned char firstbyte;
+    size_t pos = out->size;
+
+    BFINAL = (i == numdeflateblocks - 1);
+    BTYPE = 0;
+
+    LEN = 65535;
+    if(datasize - datapos < 65535u) LEN = (unsigned)datasize - datapos;
+    NLEN = 65535 - LEN;
+
+    if(!ucvector_resize(out, out->size + LEN + 5)) return 83; /*alloc fail*/
+
+    firstbyte = (unsigned char)(BFINAL + ((BTYPE & 1u) << 1u) + ((BTYPE & 2u) << 1u));
+    out->data[pos + 0] = firstbyte;
+    out->data[pos + 1] = (unsigned char)(LEN & 255);
+    out->data[pos + 2] = (unsigned char)(LEN >> 8u);
+    out->data[pos + 3] = (unsigned char)(NLEN & 255);
+    out->data[pos + 4] = (unsigned char)(NLEN >> 8u);
+    lodepng_memcpy(out->data + pos + 5, data + datapos, LEN);
+    datapos += LEN;
+  }
+
+  return 0;
+}
+
+/*
+write the lz77-encoded data, which has lit, len and dist codes, to compressed stream using huffman trees.
+tree_ll: the tree for lit and len codes.
+tree_d: the tree for distance codes.
+*/
+static void writeLZ77data(LodePNGBitWriter* writer, const uivector* lz77_encoded,
+                          const HuffmanTree* tree_ll, const HuffmanTree* tree_d) {
+  size_t i = 0;
+  for(i = 0; i != lz77_encoded->size; ++i) {
+    unsigned val = lz77_encoded->data[i];
+    writeBitsReversed(writer, tree_ll->codes[val], tree_ll->lengths[val]);
+    if(val > 256) /*for a length code, 3 more things have to be added*/ {
+      unsigned length_index = val - FIRST_LENGTH_CODE_INDEX;
+      unsigned n_length_extra_bits = LENGTHEXTRA[length_index];
+      unsigned length_extra_bits = lz77_encoded->data[++i];
+
+      unsigned distance_code = lz77_encoded->data[++i];
+
+      unsigned distance_index = distance_code;
+      unsigned n_distance_extra_bits = DISTANCEEXTRA[distance_index];
+      unsigned distance_extra_bits = lz77_encoded->data[++i];
+
+      writeBits(writer, length_extra_bits, n_length_extra_bits);
+      writeBitsReversed(writer, tree_d->codes[distance_code], tree_d->lengths[distance_code]);
+      writeBits(writer, distance_extra_bits, n_distance_extra_bits);
+    }
+  }
+}
+
+/*Deflate for a block of type "dynamic", that is, with freely, optimally, created huffman trees*/
+static unsigned deflateDynamic(LodePNGBitWriter* writer, Hash* hash,
+                               const unsigned char* data, size_t datapos, size_t dataend,
+                               const LodePNGCompressSettings* settings, unsigned final) {
+  unsigned error = 0;
+
+  /*
+  A block is compressed as follows: The PNG data is lz77 encoded, resulting in
+  literal bytes and length/distance pairs. This is then huffman compressed with
+  two huffman trees. One huffman tree is used for the lit and len values ("ll"),
+  another huffman tree is used for the dist values ("d"). These two trees are
+  stored using their code lengths, and to compress even more these code lengths
+  are also run-length encoded and huffman compressed. This gives a huffman tree
+  of code lengths "cl". The code lengths used to describe this third tree are
+  the code length code lengths ("clcl").
+  */
+
+  /*The lz77 encoded data, represented with integers since there will also be length and distance codes in it*/
+  uivector lz77_encoded;
+  HuffmanTree tree_ll; /*tree for lit,len values*/
+  HuffmanTree tree_d; /*tree for distance codes*/
+  HuffmanTree tree_cl; /*tree for encoding the code lengths representing tree_ll and tree_d*/
+  unsigned* frequencies_ll = 0; /*frequency of lit,len codes*/
+  unsigned* frequencies_d = 0; /*frequency of dist codes*/
+  unsigned* frequencies_cl = 0; /*frequency of code length codes*/
+  unsigned* bitlen_lld = 0; /*lit,len,dist code lengths (int bits), literally (without repeat codes).*/
+  unsigned* bitlen_lld_e = 0; /*bitlen_lld encoded with repeat codes (this is a rudimentary run length compression)*/
+  size_t datasize = dataend - datapos;
+
+  /*
+  If we could call "bitlen_cl" the the code length code lengths ("clcl"), that is the bit lengths of codes to represent
+  tree_cl in CLCL_ORDER, then due to the huffman compression of huffman tree representations ("two levels"), there are
+  some analogies:
+  bitlen_lld is to tree_cl what data is to tree_ll and tree_d.
+  bitlen_lld_e is to bitlen_lld what lz77_encoded is to data.
+  bitlen_cl is to bitlen_lld_e what bitlen_lld is to lz77_encoded.
+  */
+
+  unsigned BFINAL = final;
+  size_t i;
+  size_t numcodes_ll, numcodes_d, numcodes_lld, numcodes_lld_e, numcodes_cl;
+  unsigned HLIT, HDIST, HCLEN;
+
+  uivector_init(&lz77_encoded);
+  HuffmanTree_init(&tree_ll);
+  HuffmanTree_init(&tree_d);
+  HuffmanTree_init(&tree_cl);
+  /* could fit on stack, but >1KB is on the larger side so allocate instead */
+  frequencies_ll = (unsigned*)lodepng_malloc(286 * sizeof(*frequencies_ll));
+  frequencies_d = (unsigned*)lodepng_malloc(30 * sizeof(*frequencies_d));
+  frequencies_cl = (unsigned*)lodepng_malloc(NUM_CODE_LENGTH_CODES * sizeof(*frequencies_cl));
+
+  if(!frequencies_ll || !frequencies_d || !frequencies_cl) error = 83; /*alloc fail*/
+
+  /*This while loop never loops due to a break at the end, it is here to
+  allow breaking out of it to the cleanup phase on error conditions.*/
+  while(!error) {
+    lodepng_memset(frequencies_ll, 0, 286 * sizeof(*frequencies_ll));
+    lodepng_memset(frequencies_d, 0, 30 * sizeof(*frequencies_d));
+    lodepng_memset(frequencies_cl, 0, NUM_CODE_LENGTH_CODES * sizeof(*frequencies_cl));
+
+    if(settings->use_lz77) {
+      error = encodeLZ77(&lz77_encoded, hash, data, datapos, dataend, settings->windowsize,
+                         settings->minmatch, settings->nicematch, settings->lazymatching);
+      if(error) break;
+    } else {
+      if(!uivector_resize(&lz77_encoded, datasize)) ERROR_BREAK(83 /*alloc fail*/);
+      for(i = datapos; i < dataend; ++i) lz77_encoded.data[i - datapos] = data[i]; /*no LZ77, but still will be Huffman compressed*/
+    }
+
+    /*Count the frequencies of lit, len and dist codes*/
+    for(i = 0; i != lz77_encoded.size; ++i) {
+      unsigned symbol = lz77_encoded.data[i];
+      ++frequencies_ll[symbol];
+      if(symbol > 256) {
+        unsigned dist = lz77_encoded.data[i + 2];
+        ++frequencies_d[dist];
+        i += 3;
+      }
+    }
+    frequencies_ll[256] = 1; /*there will be exactly 1 end code, at the end of the block*/
+
+    /*Make both huffman trees, one for the lit and len codes, one for the dist codes*/
+    error = HuffmanTree_makeFromFrequencies(&tree_ll, frequencies_ll, 257, 286, 15);
+    if(error) break;
+    /*2, not 1, is chosen for mincodes: some buggy PNG decoders require at least 2 symbols in the dist tree*/
+    error = HuffmanTree_makeFromFrequencies(&tree_d, frequencies_d, 2, 30, 15);
+    if(error) break;
+
+    numcodes_ll = LODEPNG_MIN(tree_ll.numcodes, 286);
+    numcodes_d = LODEPNG_MIN(tree_d.numcodes, 30);
+    /*store the code lengths of both generated trees in bitlen_lld*/
+    numcodes_lld = numcodes_ll + numcodes_d;
+    bitlen_lld = (unsigned*)lodepng_malloc(numcodes_lld * sizeof(*bitlen_lld));
+    /*numcodes_lld_e never needs more size than bitlen_lld*/
+    bitlen_lld_e = (unsigned*)lodepng_malloc(numcodes_lld * sizeof(*bitlen_lld_e));
+    if(!bitlen_lld || !bitlen_lld_e) ERROR_BREAK(83); /*alloc fail*/
+    numcodes_lld_e = 0;
+
+    for(i = 0; i != numcodes_ll; ++i) bitlen_lld[i] = tree_ll.lengths[i];
+    for(i = 0; i != numcodes_d; ++i) bitlen_lld[numcodes_ll + i] = tree_d.lengths[i];
+
+    /*run-length compress bitlen_ldd into bitlen_lld_e by using repeat codes 16 (copy length 3-6 times),
+    17 (3-10 zeroes), 18 (11-138 zeroes)*/
+    for(i = 0; i != numcodes_lld; ++i) {
+      unsigned j = 0; /*amount of repetitions*/
+      while(i + j + 1 < numcodes_lld && bitlen_lld[i + j + 1] == bitlen_lld[i]) ++j;
+
+      if(bitlen_lld[i] == 0 && j >= 2) /*repeat code for zeroes*/ {
+        ++j; /*include the first zero*/
+        if(j <= 10) /*repeat code 17 supports max 10 zeroes*/ {
+          bitlen_lld_e[numcodes_lld_e++] = 17;
+          bitlen_lld_e[numcodes_lld_e++] = j - 3;
+        } else /*repeat code 18 supports max 138 zeroes*/ {
+          if(j > 138) j = 138;
+          bitlen_lld_e[numcodes_lld_e++] = 18;
+          bitlen_lld_e[numcodes_lld_e++] = j - 11;
+        }
+        i += (j - 1);
+      } else if(j >= 3) /*repeat code for value other than zero*/ {
+        size_t k;
+        unsigned num = j / 6u, rest = j % 6u;
+        bitlen_lld_e[numcodes_lld_e++] = bitlen_lld[i];
+        for(k = 0; k < num; ++k) {
+          bitlen_lld_e[numcodes_lld_e++] = 16;
+          bitlen_lld_e[numcodes_lld_e++] = 6 - 3;
+        }
+        if(rest >= 3) {
+          bitlen_lld_e[numcodes_lld_e++] = 16;
+          bitlen_lld_e[numcodes_lld_e++] = rest - 3;
+        }
+        else j -= rest;
+        i += j;
+      } else /*too short to benefit from repeat code*/ {
+        bitlen_lld_e[numcodes_lld_e++] = bitlen_lld[i];
+      }
+    }
+
+    /*generate tree_cl, the huffmantree of huffmantrees*/
+    for(i = 0; i != numcodes_lld_e; ++i) {
+      ++frequencies_cl[bitlen_lld_e[i]];
+      /*after a repeat code come the bits that specify the number of repetitions,
+      those don't need to be in the frequencies_cl calculation*/
+      if(bitlen_lld_e[i] >= 16) ++i;
+    }
+
+    error = HuffmanTree_makeFromFrequencies(&tree_cl, frequencies_cl,
+                                            NUM_CODE_LENGTH_CODES, NUM_CODE_LENGTH_CODES, 7);
+    if(error) break;
+
+    /*compute amount of code-length-code-lengths to output*/
+    numcodes_cl = NUM_CODE_LENGTH_CODES;
+    /*trim zeros at the end (using CLCL_ORDER), but minimum size must be 4 (see HCLEN below)*/
+    while(numcodes_cl > 4u && tree_cl.lengths[CLCL_ORDER[numcodes_cl - 1u]] == 0) {
+      numcodes_cl--;
+    }
+
+    /*
+    Write everything into the output
+
+    After the BFINAL and BTYPE, the dynamic block consists out of the following:
+    - 5 bits HLIT, 5 bits HDIST, 4 bits HCLEN
+    - (HCLEN+4)*3 bits code lengths of code length alphabet
+    - HLIT + 257 code lengths of lit/length alphabet (encoded using the code length
+      alphabet, + possible repetition codes 16, 17, 18)
+    - HDIST + 1 code lengths of distance alphabet (encoded using the code length
+      alphabet, + possible repetition codes 16, 17, 18)
+    - compressed data
+    - 256 (end code)
+    */
+
+    /*Write block type*/
+    writeBits(writer, BFINAL, 1);
+    writeBits(writer, 0, 1); /*first bit of BTYPE "dynamic"*/
+    writeBits(writer, 1, 1); /*second bit of BTYPE "dynamic"*/
+
+    /*write the HLIT, HDIST and HCLEN values*/
+    /*all three sizes take trimmed ending zeroes into account, done either by HuffmanTree_makeFromFrequencies
+    or in the loop for numcodes_cl above, which saves space. */
+    HLIT = (unsigned)(numcodes_ll - 257);
+    HDIST = (unsigned)(numcodes_d - 1);
+    HCLEN = (unsigned)(numcodes_cl - 4);
+    writeBits(writer, HLIT, 5);
+    writeBits(writer, HDIST, 5);
+    writeBits(writer, HCLEN, 4);
+
+    /*write the code lengths of the code length alphabet ("bitlen_cl")*/
+    for(i = 0; i != numcodes_cl; ++i) writeBits(writer, tree_cl.lengths[CLCL_ORDER[i]], 3);
+
+    /*write the lengths of the lit/len AND the dist alphabet*/
+    for(i = 0; i != numcodes_lld_e; ++i) {
+      writeBitsReversed(writer, tree_cl.codes[bitlen_lld_e[i]], tree_cl.lengths[bitlen_lld_e[i]]);
+      /*extra bits of repeat codes*/
+      if(bitlen_lld_e[i] == 16) writeBits(writer, bitlen_lld_e[++i], 2);
+      else if(bitlen_lld_e[i] == 17) writeBits(writer, bitlen_lld_e[++i], 3);
+      else if(bitlen_lld_e[i] == 18) writeBits(writer, bitlen_lld_e[++i], 7);
+    }
+
+    /*write the compressed data symbols*/
+    writeLZ77data(writer, &lz77_encoded, &tree_ll, &tree_d);
+    /*error: the length of the end code 256 must be larger than 0*/
+    if(tree_ll.lengths[256] == 0) ERROR_BREAK(64);
+
+    /*write the end code*/
+    writeBitsReversed(writer, tree_ll.codes[256], tree_ll.lengths[256]);
+
+    break; /*end of error-while*/
+  }
+
+  /*cleanup*/
+  uivector_cleanup(&lz77_encoded);
+  HuffmanTree_cleanup(&tree_ll);
+  HuffmanTree_cleanup(&tree_d);
+  HuffmanTree_cleanup(&tree_cl);
+  lodepng_free(frequencies_ll);
+  lodepng_free(frequencies_d);
+  lodepng_free(frequencies_cl);
+  lodepng_free(bitlen_lld);
+  lodepng_free(bitlen_lld_e);
+
+  return error;
+}
+
+static unsigned deflateFixed(LodePNGBitWriter* writer, Hash* hash,
+                             const unsigned char* data,
+                             size_t datapos, size_t dataend,
+                             const LodePNGCompressSettings* settings, unsigned final) {
+  HuffmanTree tree_ll; /*tree for literal values and length codes*/
+  HuffmanTree tree_d; /*tree for distance codes*/
+
+  unsigned BFINAL = final;
+  unsigned error = 0;
+  size_t i;
+
+  HuffmanTree_init(&tree_ll);
+  HuffmanTree_init(&tree_d);
+
+  error = generateFixedLitLenTree(&tree_ll);
+  if(!error) error = generateFixedDistanceTree(&tree_d);
+
+  if(!error) {
+    writeBits(writer, BFINAL, 1);
+    writeBits(writer, 1, 1); /*first bit of BTYPE*/
+    writeBits(writer, 0, 1); /*second bit of BTYPE*/
+
+    if(settings->use_lz77) /*LZ77 encoded*/ {
+      uivector lz77_encoded;
+      uivector_init(&lz77_encoded);
+      error = encodeLZ77(&lz77_encoded, hash, data, datapos, dataend, settings->windowsize,
+                         settings->minmatch, settings->nicematch, settings->lazymatching);
+      if(!error) writeLZ77data(writer, &lz77_encoded, &tree_ll, &tree_d);
+      uivector_cleanup(&lz77_encoded);
+    } else /*no LZ77, but still will be Huffman compressed*/ {
+      for(i = datapos; i < dataend; ++i) {
+        writeBitsReversed(writer, tree_ll.codes[data[i]], tree_ll.lengths[data[i]]);
+      }
+    }
+    /*add END code*/
+    if(!error) writeBitsReversed(writer,tree_ll.codes[256], tree_ll.lengths[256]);
+  }
+
+  /*cleanup*/
+  HuffmanTree_cleanup(&tree_ll);
+  HuffmanTree_cleanup(&tree_d);
+
+  return error;
+}
+
+static unsigned lodepng_deflatev(ucvector* out, const unsigned char* in, size_t insize,
+                                 const LodePNGCompressSettings* settings) {
+  unsigned error = 0;
+  size_t i, blocksize, numdeflateblocks;
+  Hash hash;
+  LodePNGBitWriter writer;
+
+  LodePNGBitWriter_init(&writer, out);
+
+  if(settings->btype > 2) return 61;
+  else if(settings->btype == 0) return deflateNoCompression(out, in, insize);
+  else if(settings->btype == 1) blocksize = insize;
+  else /*if(settings->btype == 2)*/ {
+    /*on PNGs, deflate blocks of 65-262k seem to give most dense encoding*/
+    blocksize = insize / 8u + 8;
+    if(blocksize < 65536) blocksize = 65536;
+    if(blocksize > 262144) blocksize = 262144;
+  }
+
+  numdeflateblocks = (insize + blocksize - 1) / blocksize;
+  if(numdeflateblocks == 0) numdeflateblocks = 1;
+
+  error = hash_init(&hash, settings->windowsize);
+
+  if(!error) {
+    for(i = 0; i != numdeflateblocks && !error; ++i) {
+      unsigned final = (i == numdeflateblocks - 1);
+      size_t start = i * blocksize;
+      size_t end = start + blocksize;
+      if(end > insize) end = insize;
+
+      if(settings->btype == 1) error = deflateFixed(&writer, &hash, in, start, end, settings, final);
+      else if(settings->btype == 2) error = deflateDynamic(&writer, &hash, in, start, end, settings, final);
+    }
+  }
+
+  hash_cleanup(&hash);
+
+  return error;
+}
+
+unsigned lodepng_deflate(unsigned char** out, size_t* outsize,
+                         const unsigned char* in, size_t insize,
+                         const LodePNGCompressSettings* settings) {
+  ucvector v = ucvector_init(*out, *outsize);
+  unsigned error = lodepng_deflatev(&v, in, insize, settings);
+  *out = v.data;
+  *outsize = v.size;
+  return error;
+}
+
+static unsigned deflate(unsigned char** out, size_t* outsize,
+                        const unsigned char* in, size_t insize,
+                        const LodePNGCompressSettings* settings) {
+  if(settings->custom_deflate) {
+    unsigned error = settings->custom_deflate(out, outsize, in, insize, settings);
+    /*the custom deflate is allowed to have its own error codes, however, we translate it to code 111*/
+    return error ? 111 : 0;
+  } else {
+    return lodepng_deflate(out, outsize, in, insize, settings);
+  }
+}
+
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / Adler32                                                                / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+static unsigned update_adler32(unsigned adler, const unsigned char* data, unsigned len) {
+  unsigned s1 = adler & 0xffffu;
+  unsigned s2 = (adler >> 16u) & 0xffffu;
+
+  while(len != 0u) {
+    unsigned i;
+    /*at least 5552 sums can be done before the sums overflow, saving a lot of module divisions*/
+    unsigned amount = len > 5552u ? 5552u : len;
+    len -= amount;
+    for(i = 0; i != amount; ++i) {
+      s1 += (*data++);
+      s2 += s1;
+    }
+    s1 %= 65521u;
+    s2 %= 65521u;
+  }
+
+  return (s2 << 16u) | s1;
+}
+
+/*Return the adler32 of the bytes data[0..len-1]*/
+static unsigned adler32(const unsigned char* data, unsigned len) {
+  return update_adler32(1u, data, len);
+}
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / Zlib                                                                   / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#ifdef LODEPNG_COMPILE_DECODER
+
+static unsigned lodepng_zlib_decompressv(ucvector* out,
+                                         const unsigned char* in, size_t insize,
+                                         const LodePNGDecompressSettings* settings) {
+  unsigned error = 0;
+  unsigned CM, CINFO, FDICT;
+
+  if(insize < 2) return 53; /*error, size of zlib data too small*/
+  /*read information from zlib header*/
+  if((in[0] * 256 + in[1]) % 31 != 0) {
+    /*error: 256 * in[0] + in[1] must be a multiple of 31, the FCHECK value is supposed to be made that way*/
+    return 24;
+  }
+
+  CM = in[0] & 15;
+  CINFO = (in[0] >> 4) & 15;
+  /*FCHECK = in[1] & 31;*/ /*FCHECK is already tested above*/
+  FDICT = (in[1] >> 5) & 1;
+  /*FLEVEL = (in[1] >> 6) & 3;*/ /*FLEVEL is not used here*/
+
+  if(CM != 8 || CINFO > 7) {
+    /*error: only compression method 8: inflate with sliding window of 32k is supported by the PNG spec*/
+    return 25;
+  }
+  if(FDICT != 0) {
+    /*error: the specification of PNG says about the zlib stream:
+      "The additional flags shall not specify a preset dictionary."*/
+    return 26;
+  }
+
+  error = inflatev(out, in + 2, insize - 2, settings);
+  if(error) return error;
+
+  if(!settings->ignore_adler32) {
+    unsigned ADLER32 = lodepng_read32bitInt(&in[insize - 4]);
+    unsigned checksum = adler32(out->data, (unsigned)(out->size));
+    if(checksum != ADLER32) return 58; /*error, adler checksum not correct, data must be corrupted*/
+  }
+
+  return 0; /*no error*/
+}
+
+
+unsigned lodepng_zlib_decompress(unsigned char** out, size_t* outsize, const unsigned char* in,
+                                 size_t insize, const LodePNGDecompressSettings* settings) {
+  ucvector v = ucvector_init(*out, *outsize);
+  unsigned error = lodepng_zlib_decompressv(&v, in, insize, settings);
+  *out = v.data;
+  *outsize = v.size;
+  return error;
+}
+
+/*expected_size is expected output size, to avoid intermediate allocations. Set to 0 if not known. */
+static unsigned zlib_decompress(unsigned char** out, size_t* outsize, size_t expected_size,
+                                const unsigned char* in, size_t insize, const LodePNGDecompressSettings* settings) {
+  unsigned error;
+  if(settings->custom_zlib) {
+    error = settings->custom_zlib(out, outsize, in, insize, settings);
+    if(error) {
+      /*the custom zlib is allowed to have its own error codes, however, we translate it to code 110*/
+      error = 110;
+      /*if there's a max output size, and the custom zlib returned error, then indicate that error instead*/
+      if(settings->max_output_size && *outsize > settings->max_output_size) error = 109;
+    }
+  } else {
+    ucvector v = ucvector_init(*out, *outsize);
+    if(expected_size) {
+      /*reserve the memory to avoid intermediate reallocations*/
+      ucvector_resize(&v, *outsize + expected_size);
+      v.size = *outsize;
+    }
+    error = lodepng_zlib_decompressv(&v, in, insize, settings);
+    *out = v.data;
+    *outsize = v.size;
+  }
+  return error;
+}
+
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+#ifdef LODEPNG_COMPILE_ENCODER
+
+unsigned lodepng_zlib_compress(unsigned char** out, size_t* outsize, const unsigned char* in,
+                               size_t insize, const LodePNGCompressSettings* settings) {
+  size_t i;
+  unsigned error;
+  unsigned char* deflatedata = 0;
+  size_t deflatesize = 0;
+
+  error = deflate(&deflatedata, &deflatesize, in, insize, settings);
+
+  *out = NULL;
+  *outsize = 0;
+  if(!error) {
+    *outsize = deflatesize + 6;
+    *out = (unsigned char*)lodepng_malloc(*outsize);
+    if(!*out) error = 83; /*alloc fail*/
+  }
+
+  if(!error) {
+    unsigned ADLER32 = adler32(in, (unsigned)insize);
+    /*zlib data: 1 byte CMF (CM+CINFO), 1 byte FLG, deflate data, 4 byte ADLER32 checksum of the Decompressed data*/
+    unsigned CMF = 120; /*0b01111000: CM 8, CINFO 7. With CINFO 7, any window size up to 32768 can be used.*/
+    unsigned FLEVEL = 0;
+    unsigned FDICT = 0;
+    unsigned CMFFLG = 256 * CMF + FDICT * 32 + FLEVEL * 64;
+    unsigned FCHECK = 31 - CMFFLG % 31;
+    CMFFLG += FCHECK;
+
+    (*out)[0] = (unsigned char)(CMFFLG >> 8);
+    (*out)[1] = (unsigned char)(CMFFLG & 255);
+    for(i = 0; i != deflatesize; ++i) (*out)[i + 2] = deflatedata[i];
+    lodepng_set32bitInt(&(*out)[*outsize - 4], ADLER32);
+  }
+
+  lodepng_free(deflatedata);
+  return error;
+}
+
+/* compress using the default or custom zlib function */
+static unsigned zlib_compress(unsigned char** out, size_t* outsize, const unsigned char* in,
+                              size_t insize, const LodePNGCompressSettings* settings) {
+  if(settings->custom_zlib) {
+    unsigned error = settings->custom_zlib(out, outsize, in, insize, settings);
+    /*the custom zlib is allowed to have its own error codes, however, we translate it to code 111*/
+    return error ? 111 : 0;
+  } else {
+    return lodepng_zlib_compress(out, outsize, in, insize, settings);
+  }
+}
+
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+#else /*no LODEPNG_COMPILE_ZLIB*/
+
+#ifdef LODEPNG_COMPILE_DECODER
+static unsigned zlib_decompress(unsigned char** out, size_t* outsize, size_t expected_size,
+                                const unsigned char* in, size_t insize, const LodePNGDecompressSettings* settings) {
+  if(!settings->custom_zlib) return 87; /*no custom zlib function provided */
+  (void)expected_size;
+  return settings->custom_zlib(out, outsize, in, insize, settings);
+}
+#endif /*LODEPNG_COMPILE_DECODER*/
+#ifdef LODEPNG_COMPILE_ENCODER
+static unsigned zlib_compress(unsigned char** out, size_t* outsize, const unsigned char* in,
+                              size_t insize, const LodePNGCompressSettings* settings) {
+  if(!settings->custom_zlib) return 87; /*no custom zlib function provided */
+  return settings->custom_zlib(out, outsize, in, insize, settings);
+}
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+#endif /*LODEPNG_COMPILE_ZLIB*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#ifdef LODEPNG_COMPILE_ENCODER
+
+/*this is a good tradeoff between speed and compression ratio*/
+#define DEFAULT_WINDOWSIZE 2048
+
+void lodepng_compress_settings_init(LodePNGCompressSettings* settings) {
+  /*compress with dynamic huffman tree (not in the mathematical sense, just not the predefined one)*/
+  settings->btype = 2;
+  settings->use_lz77 = 1;
+  settings->windowsize = DEFAULT_WINDOWSIZE;
+  settings->minmatch = 3;
+  settings->nicematch = 128;
+  settings->lazymatching = 1;
+
+  settings->custom_zlib = 0;
+  settings->custom_deflate = 0;
+  settings->custom_context = 0;
+}
+
+const LodePNGCompressSettings lodepng_default_compress_settings = {2, 1, DEFAULT_WINDOWSIZE, 3, 128, 1, 0, 0, 0};
+
+
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+#ifdef LODEPNG_COMPILE_DECODER
+
+void lodepng_decompress_settings_init(LodePNGDecompressSettings* settings) {
+  settings->ignore_adler32 = 0;
+  settings->ignore_nlen = 0;
+  settings->max_output_size = 0;
+
+  settings->custom_zlib = 0;
+  settings->custom_inflate = 0;
+  settings->custom_context = 0;
+}
+
+const LodePNGDecompressSettings lodepng_default_decompress_settings = {0, 0, 0, 0, 0, 0};
+
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* // End of Zlib related code. Begin of PNG related code.                 // */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#ifdef LODEPNG_COMPILE_PNG
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / CRC32                                                                  / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+
+#ifndef LODEPNG_NO_COMPILE_CRC
+/* CRC polynomial: 0xedb88320 */
+static unsigned lodepng_crc32_table[256] = {
+           0u, 1996959894u, 3993919788u, 2567524794u,  124634137u, 1886057615u, 3915621685u, 2657392035u,
+   249268274u, 2044508324u, 3772115230u, 2547177864u,  162941995u, 2125561021u, 3887607047u, 2428444049u,
+   498536548u, 1789927666u, 4089016648u, 2227061214u,  450548861u, 1843258603u, 4107580753u, 2211677639u,
+   325883990u, 1684777152u, 4251122042u, 2321926636u,  335633487u, 1661365465u, 4195302755u, 2366115317u,
+   997073096u, 1281953886u, 3579855332u, 2724688242u, 1006888145u, 1258607687u, 3524101629u, 2768942443u,
+   901097722u, 1119000684u, 3686517206u, 2898065728u,  853044451u, 1172266101u, 3705015759u, 2882616665u,
+   651767980u, 1373503546u, 3369554304u, 3218104598u,  565507253u, 1454621731u, 3485111705u, 3099436303u,
+   671266974u, 1594198024u, 3322730930u, 2970347812u,  795835527u, 1483230225u, 3244367275u, 3060149565u,
+  1994146192u,   31158534u, 2563907772u, 4023717930u, 1907459465u,  112637215u, 2680153253u, 3904427059u,
+  2013776290u,  251722036u, 2517215374u, 3775830040u, 2137656763u,  141376813u, 2439277719u, 3865271297u,
+  1802195444u,  476864866u, 2238001368u, 4066508878u, 1812370925u,  453092731u, 2181625025u, 4111451223u,
+  1706088902u,  314042704u, 2344532202u, 4240017532u, 1658658271u,  366619977u, 2362670323u, 4224994405u,
+  1303535960u,  984961486u, 2747007092u, 3569037538u, 1256170817u, 1037604311u, 2765210733u, 3554079995u,
+  1131014506u,  879679996u, 2909243462u, 3663771856u, 1141124467u,  855842277u, 2852801631u, 3708648649u,
+  1342533948u,  654459306u, 3188396048u, 3373015174u, 1466479909u,  544179635u, 3110523913u, 3462522015u,
+  1591671054u,  702138776u, 2966460450u, 3352799412u, 1504918807u,  783551873u, 3082640443u, 3233442989u,
+  3988292384u, 2596254646u,   62317068u, 1957810842u, 3939845945u, 2647816111u,   81470997u, 1943803523u,
+  3814918930u, 2489596804u,  225274430u, 2053790376u, 3826175755u, 2466906013u,  167816743u, 2097651377u,
+  4027552580u, 2265490386u,  503444072u, 1762050814u, 4150417245u, 2154129355u,  426522225u, 1852507879u,
+  4275313526u, 2312317920u,  282753626u, 1742555852u, 4189708143u, 2394877945u,  397917763u, 1622183637u,
+  3604390888u, 2714866558u,  953729732u, 1340076626u, 3518719985u, 2797360999u, 1068828381u, 1219638859u,
+  3624741850u, 2936675148u,  906185462u, 1090812512u, 3747672003u, 2825379669u,  829329135u, 1181335161u,
+  3412177804u, 3160834842u,  628085408u, 1382605366u, 3423369109u, 3138078467u,  570562233u, 1426400815u,
+  3317316542u, 2998733608u,  733239954u, 1555261956u, 3268935591u, 3050360625u,  752459403u, 1541320221u,
+  2607071920u, 3965973030u, 1969922972u,   40735498u, 2617837225u, 3943577151u, 1913087877u,   83908371u,
+  2512341634u, 3803740692u, 2075208622u,  213261112u, 2463272603u, 3855990285u, 2094854071u,  198958881u,
+  2262029012u, 4057260610u, 1759359992u,  534414190u, 2176718541u, 4139329115u, 1873836001u,  414664567u,
+  2282248934u, 4279200368u, 1711684554u,  285281116u, 2405801727u, 4167216745u, 1634467795u,  376229701u,
+  2685067896u, 3608007406u, 1308918612u,  956543938u, 2808555105u, 3495958263u, 1231636301u, 1047427035u,
+  2932959818u, 3654703836u, 1088359270u,  936918000u, 2847714899u, 3736837829u, 1202900863u,  817233897u,
+  3183342108u, 3401237130u, 1404277552u,  615818150u, 3134207493u, 3453421203u, 1423857449u,  601450431u,
+  3009837614u, 3294710456u, 1567103746u,  711928724u, 3020668471u, 3272380065u, 1510334235u,  755167117u
+};
+
+/*Return the CRC of the bytes buf[0..len-1].*/
+unsigned lodepng_crc32(const unsigned char* data, size_t length) {
+  unsigned r = 0xffffffffu;
+  size_t i;
+  for(i = 0; i < length; ++i) {
+    r = lodepng_crc32_table[(r ^ data[i]) & 0xffu] ^ (r >> 8u);
+  }
+  return r ^ 0xffffffffu;
+}
+#else /* !LODEPNG_NO_COMPILE_CRC */
+unsigned lodepng_crc32(const unsigned char* data, size_t length);
+#endif /* !LODEPNG_NO_COMPILE_CRC */
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / Reading and writing PNG color channel bits                             / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+/* The color channel bits of less-than-8-bit pixels are read with the MSB of bytes first,
+so LodePNGBitWriter and LodePNGBitReader can't be used for those. */
+
+static unsigned char readBitFromReversedStream(size_t* bitpointer, const unsigned char* bitstream) {
+  unsigned char result = (unsigned char)((bitstream[(*bitpointer) >> 3] >> (7 - ((*bitpointer) & 0x7))) & 1);
+  ++(*bitpointer);
+  return result;
+}
+
+/* TODO: make this faster */
+static unsigned readBitsFromReversedStream(size_t* bitpointer, const unsigned char* bitstream, size_t nbits) {
+  unsigned result = 0;
+  size_t i;
+  for(i = 0 ; i < nbits; ++i) {
+    result <<= 1u;
+    result |= (unsigned)readBitFromReversedStream(bitpointer, bitstream);
+  }
+  return result;
+}
+
+static void setBitOfReversedStream(size_t* bitpointer, unsigned char* bitstream, unsigned char bit) {
+  /*the current bit in bitstream may be 0 or 1 for this to work*/
+  if(bit == 0) bitstream[(*bitpointer) >> 3u] &=  (unsigned char)(~(1u << (7u - ((*bitpointer) & 7u))));
+  else         bitstream[(*bitpointer) >> 3u] |=  (1u << (7u - ((*bitpointer) & 7u)));
+  ++(*bitpointer);
+}
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / PNG chunks                                                             / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+unsigned lodepng_chunk_length(const unsigned char* chunk) {
+  return lodepng_read32bitInt(&chunk[0]);
+}
+
+void lodepng_chunk_type(char type[5], const unsigned char* chunk) {
+  unsigned i;
+  for(i = 0; i != 4; ++i) type[i] = (char)chunk[4 + i];
+  type[4] = 0; /*null termination char*/
+}
+
+unsigned char lodepng_chunk_type_equals(const unsigned char* chunk, const char* type) {
+  if(lodepng_strlen(type) != 4) return 0;
+  return (chunk[4] == type[0] && chunk[5] == type[1] && chunk[6] == type[2] && chunk[7] == type[3]);
+}
+
+unsigned char lodepng_chunk_ancillary(const unsigned char* chunk) {
+  return((chunk[4] & 32) != 0);
+}
+
+unsigned char lodepng_chunk_private(const unsigned char* chunk) {
+  return((chunk[6] & 32) != 0);
+}
+
+unsigned char lodepng_chunk_safetocopy(const unsigned char* chunk) {
+  return((chunk[7] & 32) != 0);
+}
+
+unsigned char* lodepng_chunk_data(unsigned char* chunk) {
+  return &chunk[8];
+}
+
+const unsigned char* lodepng_chunk_data_const(const unsigned char* chunk) {
+  return &chunk[8];
+}
+
+unsigned lodepng_chunk_check_crc(const unsigned char* chunk) {
+  unsigned length = lodepng_chunk_length(chunk);
+  unsigned CRC = lodepng_read32bitInt(&chunk[length + 8]);
+  /*the CRC is taken of the data and the 4 chunk type letters, not the length*/
+  unsigned checksum = lodepng_crc32(&chunk[4], length + 4);
+  if(CRC != checksum) return 1;
+  else return 0;
+}
+
+void lodepng_chunk_generate_crc(unsigned char* chunk) {
+  unsigned length = lodepng_chunk_length(chunk);
+  unsigned CRC = lodepng_crc32(&chunk[4], length + 4);
+  lodepng_set32bitInt(chunk + 8 + length, CRC);
+}
+
+unsigned char* lodepng_chunk_next(unsigned char* chunk, unsigned char* end) {
+  if(chunk >= end || end - chunk < 12) return end; /*too small to contain a chunk*/
+  if(chunk[0] == 0x89 && chunk[1] == 0x50 && chunk[2] == 0x4e && chunk[3] == 0x47
+    && chunk[4] == 0x0d && chunk[5] == 0x0a && chunk[6] == 0x1a && chunk[7] == 0x0a) {
+    /* Is PNG magic header at start of PNG file. Jump to first actual chunk. */
+    return chunk + 8;
+  } else {
+    size_t total_chunk_length;
+    unsigned char* result;
+    if(lodepng_addofl(lodepng_chunk_length(chunk), 12, &total_chunk_length)) return end;
+    result = chunk + total_chunk_length;
+    if(result < chunk) return end; /*pointer overflow*/
+    return result;
+  }
+}
+
+const unsigned char* lodepng_chunk_next_const(const unsigned char* chunk, const unsigned char* end) {
+  if(chunk >= end || end - chunk < 12) return end; /*too small to contain a chunk*/
+  if(chunk[0] == 0x89 && chunk[1] == 0x50 && chunk[2] == 0x4e && chunk[3] == 0x47
+    && chunk[4] == 0x0d && chunk[5] == 0x0a && chunk[6] == 0x1a && chunk[7] == 0x0a) {
+    /* Is PNG magic header at start of PNG file. Jump to first actual chunk. */
+    return chunk + 8;
+  } else {
+    size_t total_chunk_length;
+    const unsigned char* result;
+    if(lodepng_addofl(lodepng_chunk_length(chunk), 12, &total_chunk_length)) return end;
+    result = chunk + total_chunk_length;
+    if(result < chunk) return end; /*pointer overflow*/
+    return result;
+  }
+}
+
+unsigned char* lodepng_chunk_find(unsigned char* chunk, unsigned char* end, const char type[5]) {
+  for(;;) {
+    if(chunk >= end || end - chunk < 12) return 0; /* past file end: chunk + 12 > end */
+    if(lodepng_chunk_type_equals(chunk, type)) return chunk;
+    chunk = lodepng_chunk_next(chunk, end);
+  }
+}
+
+const unsigned char* lodepng_chunk_find_const(const unsigned char* chunk, const unsigned char* end, const char type[5]) {
+  for(;;) {
+    if(chunk >= end || end - chunk < 12) return 0; /* past file end: chunk + 12 > end */
+    if(lodepng_chunk_type_equals(chunk, type)) return chunk;
+    chunk = lodepng_chunk_next_const(chunk, end);
+  }
+}
+
+unsigned lodepng_chunk_append(unsigned char** out, size_t* outsize, const unsigned char* chunk) {
+  unsigned i;
+  size_t total_chunk_length, new_length;
+  unsigned char *chunk_start, *new_buffer;
+
+  if(lodepng_addofl(lodepng_chunk_length(chunk), 12, &total_chunk_length)) return 77;
+  if(lodepng_addofl(*outsize, total_chunk_length, &new_length)) return 77;
+
+  new_buffer = (unsigned char*)lodepng_realloc(*out, new_length);
+  if(!new_buffer) return 83; /*alloc fail*/
+  (*out) = new_buffer;
+  (*outsize) = new_length;
+  chunk_start = &(*out)[new_length - total_chunk_length];
+
+  for(i = 0; i != total_chunk_length; ++i) chunk_start[i] = chunk[i];
+
+  return 0;
+}
+
+/*Sets length and name and allocates the space for data and crc but does not
+set data or crc yet. Returns the start of the chunk in chunk. The start of
+the data is at chunk + 8. To finalize chunk, add the data, then use
+lodepng_chunk_generate_crc */
+static unsigned lodepng_chunk_init(unsigned char** chunk,
+                                   ucvector* out,
+                                   unsigned length, const char* type) {
+  size_t new_length = out->size;
+  if(lodepng_addofl(new_length, length, &new_length)) return 77;
+  if(lodepng_addofl(new_length, 12, &new_length)) return 77;
+  if(!ucvector_resize(out, new_length)) return 83; /*alloc fail*/
+  *chunk = out->data + new_length - length - 12u;
+
+  /*1: length*/
+  lodepng_set32bitInt(*chunk, length);
+
+  /*2: chunk name (4 letters)*/
+  lodepng_memcpy(*chunk + 4, type, 4);
+
+  return 0;
+}
+
+/* like lodepng_chunk_create but with custom allocsize */
+static unsigned lodepng_chunk_createv(ucvector* out,
+                                      unsigned length, const char* type, const unsigned char* data) {
+  unsigned char* chunk;
+  CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, length, type));
+
+  /*3: the data*/
+  lodepng_memcpy(chunk + 8, data, length);
+
+  /*4: CRC (of the chunkname characters and the data)*/
+  lodepng_chunk_generate_crc(chunk);
+
+  return 0;
+}
+
+unsigned lodepng_chunk_create(unsigned char** out, size_t* outsize,
+                              unsigned length, const char* type, const unsigned char* data) {
+  ucvector v = ucvector_init(*out, *outsize);
+  unsigned error = lodepng_chunk_createv(&v, length, type, data);
+  *out = v.data;
+  *outsize = v.size;
+  return error;
+}
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / Color types, channels, bits                                            / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+/*checks if the colortype is valid and the bitdepth bd is allowed for this colortype.
+Return value is a LodePNG error code.*/
+static unsigned checkColorValidity(LodePNGColorType colortype, unsigned bd) {
+  switch(colortype) {
+    case LCT_GREY:       if(!(bd == 1 || bd == 2 || bd == 4 || bd == 8 || bd == 16)) return 37; break;
+    case LCT_RGB:        if(!(                                 bd == 8 || bd == 16)) return 37; break;
+    case LCT_PALETTE:    if(!(bd == 1 || bd == 2 || bd == 4 || bd == 8            )) return 37; break;
+    case LCT_GREY_ALPHA: if(!(                                 bd == 8 || bd == 16)) return 37; break;
+    case LCT_RGBA:       if(!(                                 bd == 8 || bd == 16)) return 37; break;
+    case LCT_MAX_OCTET_VALUE: return 31; /* invalid color type */
+    default: return 31; /* invalid color type */
+  }
+  return 0; /*allowed color type / bits combination*/
+}
+
+static unsigned getNumColorChannels(LodePNGColorType colortype) {
+  switch(colortype) {
+    case LCT_GREY: return 1;
+    case LCT_RGB: return 3;
+    case LCT_PALETTE: return 1;
+    case LCT_GREY_ALPHA: return 2;
+    case LCT_RGBA: return 4;
+    case LCT_MAX_OCTET_VALUE: return 0; /* invalid color type */
+    default: return 0; /*invalid color type*/
+  }
+}
+
+static unsigned lodepng_get_bpp_lct(LodePNGColorType colortype, unsigned bitdepth) {
+  /*bits per pixel is amount of channels * bits per channel*/
+  return getNumColorChannels(colortype) * bitdepth;
+}
+
+/* ////////////////////////////////////////////////////////////////////////// */
+
+void lodepng_color_mode_init(LodePNGColorMode* info) {
+  info->key_defined = 0;
+  info->key_r = info->key_g = info->key_b = 0;
+  info->colortype = LCT_RGBA;
+  info->bitdepth = 8;
+  info->palette = 0;
+  info->palettesize = 0;
+}
+
+/*allocates palette memory if needed, and initializes all colors to black*/
+static void lodepng_color_mode_alloc_palette(LodePNGColorMode* info) {
+  size_t i;
+  /*if the palette is already allocated, it will have size 1024 so no reallocation needed in that case*/
+  /*the palette must have room for up to 256 colors with 4 bytes each.*/
+  if(!info->palette) info->palette = (unsigned char*)lodepng_malloc(1024);
+  if(!info->palette) return; /*alloc fail*/
+  for(i = 0; i != 256; ++i) {
+    /*Initialize all unused colors with black, the value used for invalid palette indices.
+    This is an error according to the PNG spec, but common PNG decoders make it black instead.
+    That makes color conversion slightly faster due to no error handling needed.*/
+    info->palette[i * 4 + 0] = 0;
+    info->palette[i * 4 + 1] = 0;
+    info->palette[i * 4 + 2] = 0;
+    info->palette[i * 4 + 3] = 255;
+  }
+}
+
+void lodepng_color_mode_cleanup(LodePNGColorMode* info) {
+  lodepng_palette_clear(info);
+}
+
+unsigned lodepng_color_mode_copy(LodePNGColorMode* dest, const LodePNGColorMode* source) {
+  lodepng_color_mode_cleanup(dest);
+  lodepng_memcpy(dest, source, sizeof(LodePNGColorMode));
+  if(source->palette) {
+    dest->palette = (unsigned char*)lodepng_malloc(1024);
+    if(!dest->palette && source->palettesize) return 83; /*alloc fail*/
+    lodepng_memcpy(dest->palette, source->palette, source->palettesize * 4);
+  }
+  return 0;
+}
+
+LodePNGColorMode lodepng_color_mode_make(LodePNGColorType colortype, unsigned bitdepth) {
+  LodePNGColorMode result;
+  lodepng_color_mode_init(&result);
+  result.colortype = colortype;
+  result.bitdepth = bitdepth;
+  return result;
+}
+
+static int lodepng_color_mode_equal(const LodePNGColorMode* a, const LodePNGColorMode* b) {
+  size_t i;
+  if(a->colortype != b->colortype) return 0;
+  if(a->bitdepth != b->bitdepth) return 0;
+  if(a->key_defined != b->key_defined) return 0;
+  if(a->key_defined) {
+    if(a->key_r != b->key_r) return 0;
+    if(a->key_g != b->key_g) return 0;
+    if(a->key_b != b->key_b) return 0;
+  }
+  if(a->palettesize != b->palettesize) return 0;
+  for(i = 0; i != a->palettesize * 4; ++i) {
+    if(a->palette[i] != b->palette[i]) return 0;
+  }
+  return 1;
+}
+
+void lodepng_palette_clear(LodePNGColorMode* info) {
+  if(info->palette) lodepng_free(info->palette);
+  info->palette = 0;
+  info->palettesize = 0;
+}
+
+unsigned lodepng_palette_add(LodePNGColorMode* info,
+                             unsigned char r, unsigned char g, unsigned char b, unsigned char a) {
+  if(!info->palette) /*allocate palette if empty*/ {
+    lodepng_color_mode_alloc_palette(info);
+    if(!info->palette) return 83; /*alloc fail*/
+  }
+  if(info->palettesize >= 256) {
+    return 108; /*too many palette values*/
+  }
+  info->palette[4 * info->palettesize + 0] = r;
+  info->palette[4 * info->palettesize + 1] = g;
+  info->palette[4 * info->palettesize + 2] = b;
+  info->palette[4 * info->palettesize + 3] = a;
+  ++info->palettesize;
+  return 0;
+}
+
+/*calculate bits per pixel out of colortype and bitdepth*/
+unsigned lodepng_get_bpp(const LodePNGColorMode* info) {
+  return lodepng_get_bpp_lct(info->colortype, info->bitdepth);
+}
+
+unsigned lodepng_get_channels(const LodePNGColorMode* info) {
+  return getNumColorChannels(info->colortype);
+}
+
+unsigned lodepng_is_greyscale_type(const LodePNGColorMode* info) {
+  return info->colortype == LCT_GREY || info->colortype == LCT_GREY_ALPHA;
+}
+
+unsigned lodepng_is_alpha_type(const LodePNGColorMode* info) {
+  return (info->colortype & 4) != 0; /*4 or 6*/
+}
+
+unsigned lodepng_is_palette_type(const LodePNGColorMode* info) {
+  return info->colortype == LCT_PALETTE;
+}
+
+unsigned lodepng_has_palette_alpha(const LodePNGColorMode* info) {
+  size_t i;
+  for(i = 0; i != info->palettesize; ++i) {
+    if(info->palette[i * 4 + 3] < 255) return 1;
+  }
+  return 0;
+}
+
+unsigned lodepng_can_have_alpha(const LodePNGColorMode* info) {
+  return info->key_defined
+      || lodepng_is_alpha_type(info)
+      || lodepng_has_palette_alpha(info);
+}
+
+static size_t lodepng_get_raw_size_lct(unsigned w, unsigned h, LodePNGColorType colortype, unsigned bitdepth) {
+  size_t bpp = lodepng_get_bpp_lct(colortype, bitdepth);
+  size_t n = (size_t)w * (size_t)h;
+  return ((n / 8u) * bpp) + ((n & 7u) * bpp + 7u) / 8u;
+}
+
+size_t lodepng_get_raw_size(unsigned w, unsigned h, const LodePNGColorMode* color) {
+  return lodepng_get_raw_size_lct(w, h, color->colortype, color->bitdepth);
+}
+
+
+#ifdef LODEPNG_COMPILE_PNG
+
+/*in an idat chunk, each scanline is a multiple of 8 bits, unlike the lodepng output buffer,
+and in addition has one extra byte per line: the filter byte. So this gives a larger
+result than lodepng_get_raw_size. Set h to 1 to get the size of 1 row including filter byte. */
+static size_t lodepng_get_raw_size_idat(unsigned w, unsigned h, unsigned bpp) {
+  /* + 1 for the filter byte, and possibly plus padding bits per line. */
+  /* Ignoring casts, the expression is equal to (w * bpp + 7) / 8 + 1, but avoids overflow of w * bpp */
+  size_t line = ((size_t)(w / 8u) * bpp) + 1u + ((w & 7u) * bpp + 7u) / 8u;
+  return (size_t)h * line;
+}
+
+#ifdef LODEPNG_COMPILE_DECODER
+/*Safely checks whether size_t overflow can be caused due to amount of pixels.
+This check is overcautious rather than precise. If this check indicates no overflow,
+you can safely compute in a size_t (but not an unsigned):
+-(size_t)w * (size_t)h * 8
+-amount of bytes in IDAT (including filter, padding and Adam7 bytes)
+-amount of bytes in raw color model
+Returns 1 if overflow possible, 0 if not.
+*/
+static int lodepng_pixel_overflow(unsigned w, unsigned h,
+                                  const LodePNGColorMode* pngcolor, const LodePNGColorMode* rawcolor) {
+  size_t bpp = LODEPNG_MAX(lodepng_get_bpp(pngcolor), lodepng_get_bpp(rawcolor));
+  size_t numpixels, total;
+  size_t line; /* bytes per line in worst case */
+
+  if(lodepng_mulofl((size_t)w, (size_t)h, &numpixels)) return 1;
+  if(lodepng_mulofl(numpixels, 8, &total)) return 1; /* bit pointer with 8-bit color, or 8 bytes per channel color */
+
+  /* Bytes per scanline with the expression "(w / 8u) * bpp) + ((w & 7u) * bpp + 7u) / 8u" */
+  if(lodepng_mulofl((size_t)(w / 8u), bpp, &line)) return 1;
+  if(lodepng_addofl(line, ((w & 7u) * bpp + 7u) / 8u, &line)) return 1;
+
+  if(lodepng_addofl(line, 5, &line)) return 1; /* 5 bytes overhead per line: 1 filterbyte, 4 for Adam7 worst case */
+  if(lodepng_mulofl(line, h, &total)) return 1; /* Total bytes in worst case */
+
+  return 0; /* no overflow */
+}
+#endif /*LODEPNG_COMPILE_DECODER*/
+#endif /*LODEPNG_COMPILE_PNG*/
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+
+static void LodePNGUnknownChunks_init(LodePNGInfo* info) {
+  unsigned i;
+  for(i = 0; i != 3; ++i) info->unknown_chunks_data[i] = 0;
+  for(i = 0; i != 3; ++i) info->unknown_chunks_size[i] = 0;
+}
+
+static void LodePNGUnknownChunks_cleanup(LodePNGInfo* info) {
+  unsigned i;
+  for(i = 0; i != 3; ++i) lodepng_free(info->unknown_chunks_data[i]);
+}
+
+static unsigned LodePNGUnknownChunks_copy(LodePNGInfo* dest, const LodePNGInfo* src) {
+  unsigned i;
+
+  LodePNGUnknownChunks_cleanup(dest);
+
+  for(i = 0; i != 3; ++i) {
+    size_t j;
+    dest->unknown_chunks_size[i] = src->unknown_chunks_size[i];
+    dest->unknown_chunks_data[i] = (unsigned char*)lodepng_malloc(src->unknown_chunks_size[i]);
+    if(!dest->unknown_chunks_data[i] && dest->unknown_chunks_size[i]) return 83; /*alloc fail*/
+    for(j = 0; j < src->unknown_chunks_size[i]; ++j) {
+      dest->unknown_chunks_data[i][j] = src->unknown_chunks_data[i][j];
+    }
+  }
+
+  return 0;
+}
+
+/******************************************************************************/
+
+static void LodePNGText_init(LodePNGInfo* info) {
+  info->text_num = 0;
+  info->text_keys = NULL;
+  info->text_strings = NULL;
+}
+
+static void LodePNGText_cleanup(LodePNGInfo* info) {
+  size_t i;
+  for(i = 0; i != info->text_num; ++i) {
+    string_cleanup(&info->text_keys[i]);
+    string_cleanup(&info->text_strings[i]);
+  }
+  lodepng_free(info->text_keys);
+  lodepng_free(info->text_strings);
+}
+
+static unsigned LodePNGText_copy(LodePNGInfo* dest, const LodePNGInfo* source) {
+  size_t i = 0;
+  dest->text_keys = NULL;
+  dest->text_strings = NULL;
+  dest->text_num = 0;
+  for(i = 0; i != source->text_num; ++i) {
+    CERROR_TRY_RETURN(lodepng_add_text(dest, source->text_keys[i], source->text_strings[i]));
+  }
+  return 0;
+}
+
+static unsigned lodepng_add_text_sized(LodePNGInfo* info, const char* key, const char* str, size_t size) {
+  char** new_keys = (char**)(lodepng_realloc(info->text_keys, sizeof(char*) * (info->text_num + 1)));
+  char** new_strings = (char**)(lodepng_realloc(info->text_strings, sizeof(char*) * (info->text_num + 1)));
+
+  if(new_keys) info->text_keys = new_keys;
+  if(new_strings) info->text_strings = new_strings;
+
+  if(!new_keys || !new_strings) return 83; /*alloc fail*/
+
+  ++info->text_num;
+  info->text_keys[info->text_num - 1] = alloc_string(key);
+  info->text_strings[info->text_num - 1] = alloc_string_sized(str, size);
+  if(!info->text_keys[info->text_num - 1] || !info->text_strings[info->text_num - 1]) return 83; /*alloc fail*/
+
+  return 0;
+}
+
+unsigned lodepng_add_text(LodePNGInfo* info, const char* key, const char* str) {
+  return lodepng_add_text_sized(info, key, str, lodepng_strlen(str));
+}
+
+void lodepng_clear_text(LodePNGInfo* info) {
+  LodePNGText_cleanup(info);
+}
+
+/******************************************************************************/
+
+static void LodePNGIText_init(LodePNGInfo* info) {
+  info->itext_num = 0;
+  info->itext_keys = NULL;
+  info->itext_langtags = NULL;
+  info->itext_transkeys = NULL;
+  info->itext_strings = NULL;
+}
+
+static void LodePNGIText_cleanup(LodePNGInfo* info) {
+  size_t i;
+  for(i = 0; i != info->itext_num; ++i) {
+    string_cleanup(&info->itext_keys[i]);
+    string_cleanup(&info->itext_langtags[i]);
+    string_cleanup(&info->itext_transkeys[i]);
+    string_cleanup(&info->itext_strings[i]);
+  }
+  lodepng_free(info->itext_keys);
+  lodepng_free(info->itext_langtags);
+  lodepng_free(info->itext_transkeys);
+  lodepng_free(info->itext_strings);
+}
+
+static unsigned LodePNGIText_copy(LodePNGInfo* dest, const LodePNGInfo* source) {
+  size_t i = 0;
+  dest->itext_keys = NULL;
+  dest->itext_langtags = NULL;
+  dest->itext_transkeys = NULL;
+  dest->itext_strings = NULL;
+  dest->itext_num = 0;
+  for(i = 0; i != source->itext_num; ++i) {
+    CERROR_TRY_RETURN(lodepng_add_itext(dest, source->itext_keys[i], source->itext_langtags[i],
+                                        source->itext_transkeys[i], source->itext_strings[i]));
+  }
+  return 0;
+}
+
+void lodepng_clear_itext(LodePNGInfo* info) {
+  LodePNGIText_cleanup(info);
+}
+
+static unsigned lodepng_add_itext_sized(LodePNGInfo* info, const char* key, const char* langtag,
+                                        const char* transkey, const char* str, size_t size) {
+  char** new_keys = (char**)(lodepng_realloc(info->itext_keys, sizeof(char*) * (info->itext_num + 1)));
+  char** new_langtags = (char**)(lodepng_realloc(info->itext_langtags, sizeof(char*) * (info->itext_num + 1)));
+  char** new_transkeys = (char**)(lodepng_realloc(info->itext_transkeys, sizeof(char*) * (info->itext_num + 1)));
+  char** new_strings = (char**)(lodepng_realloc(info->itext_strings, sizeof(char*) * (info->itext_num + 1)));
+
+  if(new_keys) info->itext_keys = new_keys;
+  if(new_langtags) info->itext_langtags = new_langtags;
+  if(new_transkeys) info->itext_transkeys = new_transkeys;
+  if(new_strings) info->itext_strings = new_strings;
+
+  if(!new_keys || !new_langtags || !new_transkeys || !new_strings) return 83; /*alloc fail*/
+
+  ++info->itext_num;
+
+  info->itext_keys[info->itext_num - 1] = alloc_string(key);
+  info->itext_langtags[info->itext_num - 1] = alloc_string(langtag);
+  info->itext_transkeys[info->itext_num - 1] = alloc_string(transkey);
+  info->itext_strings[info->itext_num - 1] = alloc_string_sized(str, size);
+
+  return 0;
+}
+
+unsigned lodepng_add_itext(LodePNGInfo* info, const char* key, const char* langtag,
+                           const char* transkey, const char* str) {
+  return lodepng_add_itext_sized(info, key, langtag, transkey, str, lodepng_strlen(str));
+}
+
+/* same as set but does not delete */
+static unsigned lodepng_assign_icc(LodePNGInfo* info, const char* name, const unsigned char* profile, unsigned profile_size) {
+  if(profile_size == 0) return 100; /*invalid ICC profile size*/
+
+  info->iccp_name = alloc_string(name);
+  info->iccp_profile = (unsigned char*)lodepng_malloc(profile_size);
+
+  if(!info->iccp_name || !info->iccp_profile) return 83; /*alloc fail*/
+
+  lodepng_memcpy(info->iccp_profile, profile, profile_size);
+  info->iccp_profile_size = profile_size;
+
+  return 0; /*ok*/
+}
+
+unsigned lodepng_set_icc(LodePNGInfo* info, const char* name, const unsigned char* profile, unsigned profile_size) {
+  if(info->iccp_name) lodepng_clear_icc(info);
+  info->iccp_defined = 1;
+
+  return lodepng_assign_icc(info, name, profile, profile_size);
+}
+
+void lodepng_clear_icc(LodePNGInfo* info) {
+  string_cleanup(&info->iccp_name);
+  lodepng_free(info->iccp_profile);
+  info->iccp_profile = NULL;
+  info->iccp_profile_size = 0;
+  info->iccp_defined = 0;
+}
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+
+void lodepng_info_init(LodePNGInfo* info) {
+  lodepng_color_mode_init(&info->color);
+  info->interlace_method = 0;
+  info->compression_method = 0;
+  info->filter_method = 0;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+  info->background_defined = 0;
+  info->background_r = info->background_g = info->background_b = 0;
+
+  LodePNGText_init(info);
+  LodePNGIText_init(info);
+
+  info->time_defined = 0;
+  info->phys_defined = 0;
+
+  info->gama_defined = 0;
+  info->chrm_defined = 0;
+  info->srgb_defined = 0;
+  info->iccp_defined = 0;
+  info->iccp_name = NULL;
+  info->iccp_profile = NULL;
+
+  LodePNGUnknownChunks_init(info);
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+}
+
+void lodepng_info_cleanup(LodePNGInfo* info) {
+  lodepng_color_mode_cleanup(&info->color);
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+  LodePNGText_cleanup(info);
+  LodePNGIText_cleanup(info);
+
+  lodepng_clear_icc(info);
+
+  LodePNGUnknownChunks_cleanup(info);
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+}
+
+unsigned lodepng_info_copy(LodePNGInfo* dest, const LodePNGInfo* source) {
+  lodepng_info_cleanup(dest);
+  lodepng_memcpy(dest, source, sizeof(LodePNGInfo));
+  lodepng_color_mode_init(&dest->color);
+  CERROR_TRY_RETURN(lodepng_color_mode_copy(&dest->color, &source->color));
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+  CERROR_TRY_RETURN(LodePNGText_copy(dest, source));
+  CERROR_TRY_RETURN(LodePNGIText_copy(dest, source));
+  if(source->iccp_defined) {
+    CERROR_TRY_RETURN(lodepng_assign_icc(dest, source->iccp_name, source->iccp_profile, source->iccp_profile_size));
+  }
+
+  LodePNGUnknownChunks_init(dest);
+  CERROR_TRY_RETURN(LodePNGUnknownChunks_copy(dest, source));
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+  return 0;
+}
+
+/* ////////////////////////////////////////////////////////////////////////// */
+
+/*index: bitgroup index, bits: bitgroup size(1, 2 or 4), in: bitgroup value, out: octet array to add bits to*/
+static void addColorBits(unsigned char* out, size_t index, unsigned bits, unsigned in) {
+  unsigned m = bits == 1 ? 7 : bits == 2 ? 3 : 1; /*8 / bits - 1*/
+  /*p = the partial index in the byte, e.g. with 4 palettebits it is 0 for first half or 1 for second half*/
+  unsigned p = index & m;
+  in &= (1u << bits) - 1u; /*filter out any other bits of the input value*/
+  in = in << (bits * (m - p));
+  if(p == 0) out[index * bits / 8u] = in;
+  else out[index * bits / 8u] |= in;
+}
+
+typedef struct ColorTree ColorTree;
+
+/*
+One node of a color tree
+This is the data structure used to count the number of unique colors and to get a palette
+index for a color. It's like an octree, but because the alpha channel is used too, each
+node has 16 instead of 8 children.
+*/
+struct ColorTree {
+  ColorTree* children[16]; /*up to 16 pointers to ColorTree of next level*/
+  int index; /*the payload. Only has a meaningful value if this is in the last level*/
+};
+
+static void color_tree_init(ColorTree* tree) {
+  lodepng_memset(tree->children, 0, 16 * sizeof(*tree->children));
+  tree->index = -1;
+}
+
+static void color_tree_cleanup(ColorTree* tree) {
+  int i;
+  for(i = 0; i != 16; ++i) {
+    if(tree->children[i]) {
+      color_tree_cleanup(tree->children[i]);
+      lodepng_free(tree->children[i]);
+    }
+  }
+}
+
+/*returns -1 if color not present, its index otherwise*/
+static int color_tree_get(ColorTree* tree, unsigned char r, unsigned char g, unsigned char b, unsigned char a) {
+  int bit = 0;
+  for(bit = 0; bit < 8; ++bit) {
+    int i = 8 * ((r >> bit) & 1) + 4 * ((g >> bit) & 1) + 2 * ((b >> bit) & 1) + 1 * ((a >> bit) & 1);
+    if(!tree->children[i]) return -1;
+    else tree = tree->children[i];
+  }
+  return tree ? tree->index : -1;
+}
+
+#ifdef LODEPNG_COMPILE_ENCODER
+static int color_tree_has(ColorTree* tree, unsigned char r, unsigned char g, unsigned char b, unsigned char a) {
+  return color_tree_get(tree, r, g, b, a) >= 0;
+}
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+/*color is not allowed to already exist.
+Index should be >= 0 (it's signed to be compatible with using -1 for "doesn't exist")
+Returns error code, or 0 if ok*/
+static unsigned color_tree_add(ColorTree* tree,
+                               unsigned char r, unsigned char g, unsigned char b, unsigned char a, unsigned index) {
+  int bit;
+  for(bit = 0; bit < 8; ++bit) {
+    int i = 8 * ((r >> bit) & 1) + 4 * ((g >> bit) & 1) + 2 * ((b >> bit) & 1) + 1 * ((a >> bit) & 1);
+    if(!tree->children[i]) {
+      tree->children[i] = (ColorTree*)lodepng_malloc(sizeof(ColorTree));
+      if(!tree->children[i]) return 83; /*alloc fail*/
+      color_tree_init(tree->children[i]);
+    }
+    tree = tree->children[i];
+  }
+  tree->index = (int)index;
+  return 0;
+}
+
+/*put a pixel, given its RGBA color, into image of any color type*/
+static unsigned rgba8ToPixel(unsigned char* out, size_t i,
+                             const LodePNGColorMode* mode, ColorTree* tree /*for palette*/,
+                             unsigned char r, unsigned char g, unsigned char b, unsigned char a) {
+  if(mode->colortype == LCT_GREY) {
+    unsigned char gray = r; /*((unsigned short)r + g + b) / 3u;*/
+    if(mode->bitdepth == 8) out[i] = gray;
+    else if(mode->bitdepth == 16) out[i * 2 + 0] = out[i * 2 + 1] = gray;
+    else {
+      /*take the most significant bits of gray*/
+      gray = ((unsigned)gray >> (8u - mode->bitdepth)) & ((1u << mode->bitdepth) - 1u);
+      addColorBits(out, i, mode->bitdepth, gray);
+    }
+  } else if(mode->colortype == LCT_RGB) {
+    if(mode->bitdepth == 8) {
+      out[i * 3 + 0] = r;
+      out[i * 3 + 1] = g;
+      out[i * 3 + 2] = b;
+    } else {
+      out[i * 6 + 0] = out[i * 6 + 1] = r;
+      out[i * 6 + 2] = out[i * 6 + 3] = g;
+      out[i * 6 + 4] = out[i * 6 + 5] = b;
+    }
+  } else if(mode->colortype == LCT_PALETTE) {
+    int index = color_tree_get(tree, r, g, b, a);
+    if(index < 0) return 82; /*color not in palette*/
+    if(mode->bitdepth == 8) out[i] = index;
+    else addColorBits(out, i, mode->bitdepth, (unsigned)index);
+  } else if(mode->colortype == LCT_GREY_ALPHA) {
+    unsigned char gray = r; /*((unsigned short)r + g + b) / 3u;*/
+    if(mode->bitdepth == 8) {
+      out[i * 2 + 0] = gray;
+      out[i * 2 + 1] = a;
+    } else if(mode->bitdepth == 16) {
+      out[i * 4 + 0] = out[i * 4 + 1] = gray;
+      out[i * 4 + 2] = out[i * 4 + 3] = a;
+    }
+  } else if(mode->colortype == LCT_RGBA) {
+    if(mode->bitdepth == 8) {
+      out[i * 4 + 0] = r;
+      out[i * 4 + 1] = g;
+      out[i * 4 + 2] = b;
+      out[i * 4 + 3] = a;
+    } else {
+      out[i * 8 + 0] = out[i * 8 + 1] = r;
+      out[i * 8 + 2] = out[i * 8 + 3] = g;
+      out[i * 8 + 4] = out[i * 8 + 5] = b;
+      out[i * 8 + 6] = out[i * 8 + 7] = a;
+    }
+  }
+
+  return 0; /*no error*/
+}
+
+/*put a pixel, given its RGBA16 color, into image of any color 16-bitdepth type*/
+static void rgba16ToPixel(unsigned char* out, size_t i,
+                         const LodePNGColorMode* mode,
+                         unsigned short r, unsigned short g, unsigned short b, unsigned short a) {
+  if(mode->colortype == LCT_GREY) {
+    unsigned short gray = r; /*((unsigned)r + g + b) / 3u;*/
+    out[i * 2 + 0] = (gray >> 8) & 255;
+    out[i * 2 + 1] = gray & 255;
+  } else if(mode->colortype == LCT_RGB) {
+    out[i * 6 + 0] = (r >> 8) & 255;
+    out[i * 6 + 1] = r & 255;
+    out[i * 6 + 2] = (g >> 8) & 255;
+    out[i * 6 + 3] = g & 255;
+    out[i * 6 + 4] = (b >> 8) & 255;
+    out[i * 6 + 5] = b & 255;
+  } else if(mode->colortype == LCT_GREY_ALPHA) {
+    unsigned short gray = r; /*((unsigned)r + g + b) / 3u;*/
+    out[i * 4 + 0] = (gray >> 8) & 255;
+    out[i * 4 + 1] = gray & 255;
+    out[i * 4 + 2] = (a >> 8) & 255;
+    out[i * 4 + 3] = a & 255;
+  } else if(mode->colortype == LCT_RGBA) {
+    out[i * 8 + 0] = (r >> 8) & 255;
+    out[i * 8 + 1] = r & 255;
+    out[i * 8 + 2] = (g >> 8) & 255;
+    out[i * 8 + 3] = g & 255;
+    out[i * 8 + 4] = (b >> 8) & 255;
+    out[i * 8 + 5] = b & 255;
+    out[i * 8 + 6] = (a >> 8) & 255;
+    out[i * 8 + 7] = a & 255;
+  }
+}
+
+/*Get RGBA8 color of pixel with index i (y * width + x) from the raw image with given color type.*/
+static void getPixelColorRGBA8(unsigned char* r, unsigned char* g,
+                               unsigned char* b, unsigned char* a,
+                               const unsigned char* in, size_t i,
+                               const LodePNGColorMode* mode) {
+  if(mode->colortype == LCT_GREY) {
+    if(mode->bitdepth == 8) {
+      *r = *g = *b = in[i];
+      if(mode->key_defined && *r == mode->key_r) *a = 0;
+      else *a = 255;
+    } else if(mode->bitdepth == 16) {
+      *r = *g = *b = in[i * 2 + 0];
+      if(mode->key_defined && 256U * in[i * 2 + 0] + in[i * 2 + 1] == mode->key_r) *a = 0;
+      else *a = 255;
+    } else {
+      unsigned highest = ((1U << mode->bitdepth) - 1U); /*highest possible value for this bit depth*/
+      size_t j = i * mode->bitdepth;
+      unsigned value = readBitsFromReversedStream(&j, in, mode->bitdepth);
+      *r = *g = *b = (value * 255) / highest;
+      if(mode->key_defined && value == mode->key_r) *a = 0;
+      else *a = 255;
+    }
+  } else if(mode->colortype == LCT_RGB) {
+    if(mode->bitdepth == 8) {
+      *r = in[i * 3 + 0]; *g = in[i * 3 + 1]; *b = in[i * 3 + 2];
+      if(mode->key_defined && *r == mode->key_r && *g == mode->key_g && *b == mode->key_b) *a = 0;
+      else *a = 255;
+    } else {
+      *r = in[i * 6 + 0];
+      *g = in[i * 6 + 2];
+      *b = in[i * 6 + 4];
+      if(mode->key_defined && 256U * in[i * 6 + 0] + in[i * 6 + 1] == mode->key_r
+         && 256U * in[i * 6 + 2] + in[i * 6 + 3] == mode->key_g
+         && 256U * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b) *a = 0;
+      else *a = 255;
+    }
+  } else if(mode->colortype == LCT_PALETTE) {
+    unsigned index;
+    if(mode->bitdepth == 8) index = in[i];
+    else {
+      size_t j = i * mode->bitdepth;
+      index = readBitsFromReversedStream(&j, in, mode->bitdepth);
+    }
+    /*out of bounds of palette not checked: see lodepng_color_mode_alloc_palette.*/
+    *r = mode->palette[index * 4 + 0];
+    *g = mode->palette[index * 4 + 1];
+    *b = mode->palette[index * 4 + 2];
+    *a = mode->palette[index * 4 + 3];
+  } else if(mode->colortype == LCT_GREY_ALPHA) {
+    if(mode->bitdepth == 8) {
+      *r = *g = *b = in[i * 2 + 0];
+      *a = in[i * 2 + 1];
+    } else {
+      *r = *g = *b = in[i * 4 + 0];
+      *a = in[i * 4 + 2];
+    }
+  } else if(mode->colortype == LCT_RGBA) {
+    if(mode->bitdepth == 8) {
+      *r = in[i * 4 + 0];
+      *g = in[i * 4 + 1];
+      *b = in[i * 4 + 2];
+      *a = in[i * 4 + 3];
+    } else {
+      *r = in[i * 8 + 0];
+      *g = in[i * 8 + 2];
+      *b = in[i * 8 + 4];
+      *a = in[i * 8 + 6];
+    }
+  }
+}
+
+/*Similar to getPixelColorRGBA8, but with all the for loops inside of the color
+mode test cases, optimized to convert the colors much faster, when converting
+to the common case of RGBA with 8 bit per channel. buffer must be RGBA with
+enough memory.*/
+static void getPixelColorsRGBA8(unsigned char* LODEPNG_RESTRICT buffer, size_t numpixels,
+                                const unsigned char* LODEPNG_RESTRICT in,
+                                const LodePNGColorMode* mode) {
+  unsigned num_channels = 4;
+  size_t i;
+  if(mode->colortype == LCT_GREY) {
+    if(mode->bitdepth == 8) {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        buffer[0] = buffer[1] = buffer[2] = in[i];
+        buffer[3] = 255;
+      }
+      if(mode->key_defined) {
+        buffer -= numpixels * num_channels;
+        for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+          if(buffer[0] == mode->key_r) buffer[3] = 0;
+        }
+      }
+    } else if(mode->bitdepth == 16) {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        buffer[0] = buffer[1] = buffer[2] = in[i * 2];
+        buffer[3] = mode->key_defined && 256U * in[i * 2 + 0] + in[i * 2 + 1] == mode->key_r ? 0 : 255;
+      }
+    } else {
+      unsigned highest = ((1U << mode->bitdepth) - 1U); /*highest possible value for this bit depth*/
+      size_t j = 0;
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        unsigned value = readBitsFromReversedStream(&j, in, mode->bitdepth);
+        buffer[0] = buffer[1] = buffer[2] = (value * 255) / highest;
+        buffer[3] = mode->key_defined && value == mode->key_r ? 0 : 255;
+      }
+    }
+  } else if(mode->colortype == LCT_RGB) {
+    if(mode->bitdepth == 8) {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        lodepng_memcpy(buffer, &in[i * 3], 3);
+        buffer[3] = 255;
+      }
+      if(mode->key_defined) {
+        buffer -= numpixels * num_channels;
+        for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+          if(buffer[0] == mode->key_r && buffer[1]== mode->key_g && buffer[2] == mode->key_b) buffer[3] = 0;
+        }
+      }
+    } else {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        buffer[0] = in[i * 6 + 0];
+        buffer[1] = in[i * 6 + 2];
+        buffer[2] = in[i * 6 + 4];
+        buffer[3] = mode->key_defined
+           && 256U * in[i * 6 + 0] + in[i * 6 + 1] == mode->key_r
+           && 256U * in[i * 6 + 2] + in[i * 6 + 3] == mode->key_g
+           && 256U * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b ? 0 : 255;
+      }
+    }
+  } else if(mode->colortype == LCT_PALETTE) {
+    if(mode->bitdepth == 8) {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        unsigned index = in[i];
+        /*out of bounds of palette not checked: see lodepng_color_mode_alloc_palette.*/
+        lodepng_memcpy(buffer, &mode->palette[index * 4], 4);
+      }
+    } else {
+      size_t j = 0;
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        unsigned index = readBitsFromReversedStream(&j, in, mode->bitdepth);
+        /*out of bounds of palette not checked: see lodepng_color_mode_alloc_palette.*/
+        lodepng_memcpy(buffer, &mode->palette[index * 4], 4);
+      }
+    }
+  } else if(mode->colortype == LCT_GREY_ALPHA) {
+    if(mode->bitdepth == 8) {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        buffer[0] = buffer[1] = buffer[2] = in[i * 2 + 0];
+        buffer[3] = in[i * 2 + 1];
+      }
+    } else {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        buffer[0] = buffer[1] = buffer[2] = in[i * 4 + 0];
+        buffer[3] = in[i * 4 + 2];
+      }
+    }
+  } else if(mode->colortype == LCT_RGBA) {
+    if(mode->bitdepth == 8) {
+      lodepng_memcpy(buffer, in, numpixels * 4);
+    } else {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        buffer[0] = in[i * 8 + 0];
+        buffer[1] = in[i * 8 + 2];
+        buffer[2] = in[i * 8 + 4];
+        buffer[3] = in[i * 8 + 6];
+      }
+    }
+  }
+}
+
+/*Similar to getPixelColorsRGBA8, but with 3-channel RGB output.*/
+static void getPixelColorsRGB8(unsigned char* LODEPNG_RESTRICT buffer, size_t numpixels,
+                               const unsigned char* LODEPNG_RESTRICT in,
+                               const LodePNGColorMode* mode) {
+  const unsigned num_channels = 3;
+  size_t i;
+  if(mode->colortype == LCT_GREY) {
+    if(mode->bitdepth == 8) {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        buffer[0] = buffer[1] = buffer[2] = in[i];
+      }
+    } else if(mode->bitdepth == 16) {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        buffer[0] = buffer[1] = buffer[2] = in[i * 2];
+      }
+    } else {
+      unsigned highest = ((1U << mode->bitdepth) - 1U); /*highest possible value for this bit depth*/
+      size_t j = 0;
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        unsigned value = readBitsFromReversedStream(&j, in, mode->bitdepth);
+        buffer[0] = buffer[1] = buffer[2] = (value * 255) / highest;
+      }
+    }
+  } else if(mode->colortype == LCT_RGB) {
+    if(mode->bitdepth == 8) {
+      lodepng_memcpy(buffer, in, numpixels * 3);
+    } else {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        buffer[0] = in[i * 6 + 0];
+        buffer[1] = in[i * 6 + 2];
+        buffer[2] = in[i * 6 + 4];
+      }
+    }
+  } else if(mode->colortype == LCT_PALETTE) {
+    if(mode->bitdepth == 8) {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        unsigned index = in[i];
+        /*out of bounds of palette not checked: see lodepng_color_mode_alloc_palette.*/
+        lodepng_memcpy(buffer, &mode->palette[index * 4], 3);
+      }
+    } else {
+      size_t j = 0;
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        unsigned index = readBitsFromReversedStream(&j, in, mode->bitdepth);
+        /*out of bounds of palette not checked: see lodepng_color_mode_alloc_palette.*/
+        lodepng_memcpy(buffer, &mode->palette[index * 4], 3);
+      }
+    }
+  } else if(mode->colortype == LCT_GREY_ALPHA) {
+    if(mode->bitdepth == 8) {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        buffer[0] = buffer[1] = buffer[2] = in[i * 2 + 0];
+      }
+    } else {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        buffer[0] = buffer[1] = buffer[2] = in[i * 4 + 0];
+      }
+    }
+  } else if(mode->colortype == LCT_RGBA) {
+    if(mode->bitdepth == 8) {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        //lodepng_memcpy(buffer, &in[i * 4], 3); // rgb -> rgba, don't call func, this is hot spot
+        buffer[0] = in[i * 4 + 0];
+        buffer[1] = in[i * 4 + 1];
+        buffer[2] = in[i * 4 + 2];
+      }
+    } else {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        buffer[0] = in[i * 8 + 0];
+        buffer[1] = in[i * 8 + 2];
+        buffer[2] = in[i * 8 + 4];
+      }
+    }
+  }
+}
+
+/*Get RGBA16 color of pixel with index i (y * width + x) from the raw image with
+given color type, but the given color type must be 16-bit itself.*/
+static void getPixelColorRGBA16(unsigned short* r, unsigned short* g, unsigned short* b, unsigned short* a,
+                                const unsigned char* in, size_t i, const LodePNGColorMode* mode) {
+  if(mode->colortype == LCT_GREY) {
+    *r = *g = *b = 256 * in[i * 2 + 0] + in[i * 2 + 1];
+    if(mode->key_defined && 256U * in[i * 2 + 0] + in[i * 2 + 1] == mode->key_r) *a = 0;
+    else *a = 65535;
+  } else if(mode->colortype == LCT_RGB) {
+    *r = 256u * in[i * 6 + 0] + in[i * 6 + 1];
+    *g = 256u * in[i * 6 + 2] + in[i * 6 + 3];
+    *b = 256u * in[i * 6 + 4] + in[i * 6 + 5];
+    if(mode->key_defined
+       && 256u * in[i * 6 + 0] + in[i * 6 + 1] == mode->key_r
+       && 256u * in[i * 6 + 2] + in[i * 6 + 3] == mode->key_g
+       && 256u * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b) *a = 0;
+    else *a = 65535;
+  } else if(mode->colortype == LCT_GREY_ALPHA) {
+    *r = *g = *b = 256u * in[i * 4 + 0] + in[i * 4 + 1];
+    *a = 256u * in[i * 4 + 2] + in[i * 4 + 3];
+  } else if(mode->colortype == LCT_RGBA) {
+    *r = 256u * in[i * 8 + 0] + in[i * 8 + 1];
+    *g = 256u * in[i * 8 + 2] + in[i * 8 + 3];
+    *b = 256u * in[i * 8 + 4] + in[i * 8 + 5];
+    *a = 256u * in[i * 8 + 6] + in[i * 8 + 7];
+  }
+}
+
+unsigned lodepng_convert(unsigned char* out, const unsigned char* in,
+                         const LodePNGColorMode* mode_out, const LodePNGColorMode* mode_in,
+                         unsigned w, unsigned h) {
+  size_t i;
+  ColorTree tree;
+  size_t numpixels = (size_t)w * (size_t)h;
+  unsigned error = 0;
+
+  if(mode_in->colortype == LCT_PALETTE && !mode_in->palette) {
+    return 107; /* error: must provide palette if input mode is palette */
+  }
+
+  if(lodepng_color_mode_equal(mode_out, mode_in)) {
+    size_t numbytes = lodepng_get_raw_size(w, h, mode_in);
+    lodepng_memcpy(out, in, numbytes);
+    return 0;
+  }
+
+  if(mode_out->colortype == LCT_PALETTE) {
+    size_t palettesize = mode_out->palettesize;
+    const unsigned char* palette = mode_out->palette;
+    size_t palsize = (size_t)1u << mode_out->bitdepth;
+    /*if the user specified output palette but did not give the values, assume
+    they want the values of the input color type (assuming that one is palette).
+    Note that we never create a new palette ourselves.*/
+    if(palettesize == 0) {
+      palettesize = mode_in->palettesize;
+      palette = mode_in->palette;
+      /*if the input was also palette with same bitdepth, then the color types are also
+      equal, so copy literally. This to preserve the exact indices that were in the PNG
+      even in case there are duplicate colors in the palette.*/
+      if(mode_in->colortype == LCT_PALETTE && mode_in->bitdepth == mode_out->bitdepth) {
+        size_t numbytes = lodepng_get_raw_size(w, h, mode_in);
+        lodepng_memcpy(out, in, numbytes);
+        return 0;
+      }
+    }
+    if(palettesize < palsize) palsize = palettesize;
+    color_tree_init(&tree);
+    for(i = 0; i != palsize; ++i) {
+      const unsigned char* p = &palette[i * 4];
+      error = color_tree_add(&tree, p[0], p[1], p[2], p[3], (unsigned)i);
+      if(error) break;
+    }
+  }
+
+  if(!error) {
+    if(mode_in->bitdepth == 16 && mode_out->bitdepth == 16) {
+      for(i = 0; i != numpixels; ++i) {
+        unsigned short r = 0, g = 0, b = 0, a = 0;
+        getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode_in);
+        rgba16ToPixel(out, i, mode_out, r, g, b, a);
+      }
+    } else if(mode_out->bitdepth == 8 && mode_out->colortype == LCT_RGBA) {
+      getPixelColorsRGBA8(out, numpixels, in, mode_in);
+    } else if(mode_out->bitdepth == 8 && mode_out->colortype == LCT_RGB) {
+      getPixelColorsRGB8(out, numpixels, in, mode_in);
+    } else {
+      unsigned char r = 0, g = 0, b = 0, a = 0;
+      for(i = 0; i != numpixels; ++i) {
+        getPixelColorRGBA8(&r, &g, &b, &a, in, i, mode_in);
+        error = rgba8ToPixel(out, i, mode_out, &tree, r, g, b, a);
+        if(error) break;
+      }
+    }
+  }
+
+  if(mode_out->colortype == LCT_PALETTE) {
+    color_tree_cleanup(&tree);
+  }
+
+  return error;
+}
+
+
+/* Converts a single rgb color without alpha from one type to another, color bits truncated to
+their bitdepth. In case of single channel (gray or palette), only the r channel is used. Slow
+function, do not use to process all pixels of an image. Alpha channel not supported on purpose:
+this is for bKGD, supporting alpha may prevent it from finding a color in the palette, from the
+specification it looks like bKGD should ignore the alpha values of the palette since it can use
+any palette index but doesn't have an alpha channel. Idem with ignoring color key. */
+unsigned lodepng_convert_rgb(
+    unsigned* r_out, unsigned* g_out, unsigned* b_out,
+    unsigned r_in, unsigned g_in, unsigned b_in,
+    const LodePNGColorMode* mode_out, const LodePNGColorMode* mode_in) {
+  unsigned r = 0, g = 0, b = 0;
+  unsigned mul = 65535 / ((1u << mode_in->bitdepth) - 1u); /*65535, 21845, 4369, 257, 1*/
+  unsigned shift = 16 - mode_out->bitdepth;
+
+  if(mode_in->colortype == LCT_GREY || mode_in->colortype == LCT_GREY_ALPHA) {
+    r = g = b = r_in * mul;
+  } else if(mode_in->colortype == LCT_RGB || mode_in->colortype == LCT_RGBA) {
+    r = r_in * mul;
+    g = g_in * mul;
+    b = b_in * mul;
+  } else if(mode_in->colortype == LCT_PALETTE) {
+    if(r_in >= mode_in->palettesize) return 82;
+    r = mode_in->palette[r_in * 4 + 0] * 257u;
+    g = mode_in->palette[r_in * 4 + 1] * 257u;
+    b = mode_in->palette[r_in * 4 + 2] * 257u;
+  } else {
+    return 31;
+  }
+
+  /* now convert to output format */
+  if(mode_out->colortype == LCT_GREY || mode_out->colortype == LCT_GREY_ALPHA) {
+    *r_out = r >> shift ;
+  } else if(mode_out->colortype == LCT_RGB || mode_out->colortype == LCT_RGBA) {
+    *r_out = r >> shift ;
+    *g_out = g >> shift ;
+    *b_out = b >> shift ;
+  } else if(mode_out->colortype == LCT_PALETTE) {
+    unsigned i;
+    /* a 16-bit color cannot be in the palette */
+    if((r >> 8) != (r & 255) || (g >> 8) != (g & 255) || (b >> 8) != (b & 255)) return 82;
+    for(i = 0; i < mode_out->palettesize; i++) {
+      unsigned j = i * 4;
+      if((r >> 8) == mode_out->palette[j + 0] && (g >> 8) == mode_out->palette[j + 1] &&
+          (b >> 8) == mode_out->palette[j + 2]) {
+        *r_out = i;
+        return 0;
+      }
+    }
+    return 82;
+  } else {
+    return 31;
+  }
+
+  return 0;
+}
+
+#ifdef LODEPNG_COMPILE_ENCODER
+
+void lodepng_color_stats_init(LodePNGColorStats* stats) {
+  /*stats*/
+  stats->colored = 0;
+  stats->key = 0;
+  stats->key_r = stats->key_g = stats->key_b = 0;
+  stats->alpha = 0;
+  stats->numcolors = 0;
+  stats->bits = 1;
+  stats->numpixels = 0;
+  /*settings*/
+  stats->allow_palette = 1;
+  stats->allow_greyscale = 1;
+}
+
+/*function used for debug purposes with C++*/
+/*void printColorStats(LodePNGColorStats* p) {
+  std::cout << "colored: " << (int)p->colored << ", ";
+  std::cout << "key: " << (int)p->key << ", ";
+  std::cout << "key_r: " << (int)p->key_r << ", ";
+  std::cout << "key_g: " << (int)p->key_g << ", ";
+  std::cout << "key_b: " << (int)p->key_b << ", ";
+  std::cout << "alpha: " << (int)p->alpha << ", ";
+  std::cout << "numcolors: " << (int)p->numcolors << ", ";
+  std::cout << "bits: " << (int)p->bits << std::endl;
+}*/
+
+/*Returns how many bits needed to represent given value (max 8 bit)*/
+static unsigned getValueRequiredBits(unsigned char value) {
+  if(value == 0 || value == 255) return 1;
+  /*The scaling of 2-bit and 4-bit values uses multiples of 85 and 17*/
+  if(value % 17 == 0) return value % 85 == 0 ? 2 : 4;
+  return 8;
+}
+
+/*stats must already have been inited. */
+unsigned lodepng_compute_color_stats(LodePNGColorStats* stats,
+                                     const unsigned char* in, unsigned w, unsigned h,
+                                     const LodePNGColorMode* mode_in) {
+  size_t i;
+  ColorTree tree;
+  size_t numpixels = (size_t)w * (size_t)h;
+  unsigned error = 0;
+
+  /* mark things as done already if it would be impossible to have a more expensive case */
+  unsigned colored_done = lodepng_is_greyscale_type(mode_in) ? 1 : 0;
+  unsigned alpha_done = lodepng_can_have_alpha(mode_in) ? 0 : 1;
+  unsigned numcolors_done = 0;
+  unsigned bpp = lodepng_get_bpp(mode_in);
+  unsigned bits_done = (stats->bits == 1 && bpp == 1) ? 1 : 0;
+  unsigned sixteen = 0; /* whether the input image is 16 bit */
+  unsigned maxnumcolors = 257;
+  if(bpp <= 8) maxnumcolors = LODEPNG_MIN(257, stats->numcolors + (1u << bpp));
+
+  stats->numpixels += numpixels;
+
+  /*if palette not allowed, no need to compute numcolors*/
+  if(!stats->allow_palette) numcolors_done = 1;
+
+  color_tree_init(&tree);
+
+  /*If the stats was already filled in from previous data, fill its palette in tree
+  and mark things as done already if we know they are the most expensive case already*/
+  if(stats->alpha) alpha_done = 1;
+  if(stats->colored) colored_done = 1;
+  if(stats->bits == 16) numcolors_done = 1;
+  if(stats->bits >= bpp) bits_done = 1;
+  if(stats->numcolors >= maxnumcolors) numcolors_done = 1;
+
+  if(!numcolors_done) {
+    for(i = 0; i < stats->numcolors; i++) {
+      const unsigned char* color = &stats->palette[i * 4];
+      error = color_tree_add(&tree, color[0], color[1], color[2], color[3], i);
+      if(error) goto cleanup;
+    }
+  }
+
+  /*Check if the 16-bit input is truly 16-bit*/
+  if(mode_in->bitdepth == 16 && !sixteen) {
+    unsigned short r = 0, g = 0, b = 0, a = 0;
+    for(i = 0; i != numpixels; ++i) {
+      getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode_in);
+      if((r & 255) != ((r >> 8) & 255) || (g & 255) != ((g >> 8) & 255) ||
+         (b & 255) != ((b >> 8) & 255) || (a & 255) != ((a >> 8) & 255)) /*first and second byte differ*/ {
+        stats->bits = 16;
+        sixteen = 1;
+        bits_done = 1;
+        numcolors_done = 1; /*counting colors no longer useful, palette doesn't support 16-bit*/
+        break;
+      }
+    }
+  }
+
+  if(sixteen) {
+    unsigned short r = 0, g = 0, b = 0, a = 0;
+
+    for(i = 0; i != numpixels; ++i) {
+      getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode_in);
+
+      if(!colored_done && (r != g || r != b)) {
+        stats->colored = 1;
+        colored_done = 1;
+      }
+
+      if(!alpha_done) {
+        unsigned matchkey = (r == stats->key_r && g == stats->key_g && b == stats->key_b);
+        if(a != 65535 && (a != 0 || (stats->key && !matchkey))) {
+          stats->alpha = 1;
+          stats->key = 0;
+          alpha_done = 1;
+        } else if(a == 0 && !stats->alpha && !stats->key) {
+          stats->key = 1;
+          stats->key_r = r;
+          stats->key_g = g;
+          stats->key_b = b;
+        } else if(a == 65535 && stats->key && matchkey) {
+          /* Color key cannot be used if an opaque pixel also has that RGB color. */
+          stats->alpha = 1;
+          stats->key = 0;
+          alpha_done = 1;
+        }
+      }
+      if(alpha_done && numcolors_done && colored_done && bits_done) break;
+    }
+
+    if(stats->key && !stats->alpha) {
+      for(i = 0; i != numpixels; ++i) {
+        getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode_in);
+        if(a != 0 && r == stats->key_r && g == stats->key_g && b == stats->key_b) {
+          /* Color key cannot be used if an opaque pixel also has that RGB color. */
+          stats->alpha = 1;
+          stats->key = 0;
+          alpha_done = 1;
+        }
+      }
+    }
+  } else /* < 16-bit */ {
+    unsigned char r = 0, g = 0, b = 0, a = 0;
+    for(i = 0; i != numpixels; ++i) {
+      getPixelColorRGBA8(&r, &g, &b, &a, in, i, mode_in);
+
+      if(!bits_done && stats->bits < 8) {
+        /*only r is checked, < 8 bits is only relevant for grayscale*/
+        unsigned bits = getValueRequiredBits(r);
+        if(bits > stats->bits) stats->bits = bits;
+      }
+      bits_done = (stats->bits >= bpp);
+
+      if(!colored_done && (r != g || r != b)) {
+        stats->colored = 1;
+        colored_done = 1;
+        if(stats->bits < 8) stats->bits = 8; /*PNG has no colored modes with less than 8-bit per channel*/
+      }
+
+      if(!alpha_done) {
+        unsigned matchkey = (r == stats->key_r && g == stats->key_g && b == stats->key_b);
+        if(a != 255 && (a != 0 || (stats->key && !matchkey))) {
+          stats->alpha = 1;
+          stats->key = 0;
+          alpha_done = 1;
+          if(stats->bits < 8) stats->bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/
+        } else if(a == 0 && !stats->alpha && !stats->key) {
+          stats->key = 1;
+          stats->key_r = r;
+          stats->key_g = g;
+          stats->key_b = b;
+        } else if(a == 255 && stats->key && matchkey) {
+          /* Color key cannot be used if an opaque pixel also has that RGB color. */
+          stats->alpha = 1;
+          stats->key = 0;
+          alpha_done = 1;
+          if(stats->bits < 8) stats->bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/
+        }
+      }
+
+      if(!numcolors_done) {
+        if(!color_tree_has(&tree, r, g, b, a)) {
+          error = color_tree_add(&tree, r, g, b, a, stats->numcolors);
+          if(error) goto cleanup;
+          if(stats->numcolors < 256) {
+            unsigned char* p = stats->palette;
+            unsigned n = stats->numcolors;
+            p[n * 4 + 0] = r;
+            p[n * 4 + 1] = g;
+            p[n * 4 + 2] = b;
+            p[n * 4 + 3] = a;
+          }
+          ++stats->numcolors;
+          numcolors_done = stats->numcolors >= maxnumcolors;
+        }
+      }
+
+      if(alpha_done && numcolors_done && colored_done && bits_done) break;
+    }
+
+    if(stats->key && !stats->alpha) {
+      for(i = 0; i != numpixels; ++i) {
+        getPixelColorRGBA8(&r, &g, &b, &a, in, i, mode_in);
+        if(a != 0 && r == stats->key_r && g == stats->key_g && b == stats->key_b) {
+          /* Color key cannot be used if an opaque pixel also has that RGB color. */
+          stats->alpha = 1;
+          stats->key = 0;
+          alpha_done = 1;
+          if(stats->bits < 8) stats->bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/
+        }
+      }
+    }
+
+    /*make the stats's key always 16-bit for consistency - repeat each byte twice*/
+    stats->key_r += (stats->key_r << 8);
+    stats->key_g += (stats->key_g << 8);
+    stats->key_b += (stats->key_b << 8);
+  }
+
+cleanup:
+  color_tree_cleanup(&tree);
+  return error;
+}
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+/*Adds a single color to the color stats. The stats must already have been inited. The color must be given as 16-bit
+(with 2 bytes repeating for 8-bit and 65535 for opaque alpha channel). This function is expensive, do not call it for
+all pixels of an image but only for a few additional values. */
+static unsigned lodepng_color_stats_add(LodePNGColorStats* stats,
+                                        unsigned r, unsigned g, unsigned b, unsigned a) {
+  unsigned error = 0;
+  unsigned char image[8];
+  LodePNGColorMode mode;
+  lodepng_color_mode_init(&mode);
+  image[0] = r >> 8; image[1] = r; image[2] = g >> 8; image[3] = g;
+  image[4] = b >> 8; image[5] = b; image[6] = a >> 8; image[7] = a;
+  mode.bitdepth = 16;
+  mode.colortype = LCT_RGBA;
+  error = lodepng_compute_color_stats(stats, image, 1, 1, &mode);
+  lodepng_color_mode_cleanup(&mode);
+  return error;
+}
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+
+/*Computes a minimal PNG color model that can contain all colors as indicated by the stats.
+The stats should be computed with lodepng_compute_color_stats.
+mode_in is raw color profile of the image the stats were computed on, to copy palette order from when relevant.
+Minimal PNG color model means the color type and bit depth that gives smallest amount of bits in the output image,
+e.g. gray if only grayscale pixels, palette if less than 256 colors, color key if only single transparent color, ...
+This is used if auto_convert is enabled (it is by default).
+*/
+static unsigned auto_choose_color(LodePNGColorMode* mode_out,
+                                  const LodePNGColorMode* mode_in,
+                                  const LodePNGColorStats* stats) {
+  unsigned error = 0;
+  unsigned palettebits;
+  size_t i, n;
+  size_t numpixels = stats->numpixels;
+  unsigned palette_ok, gray_ok;
+
+  unsigned alpha = stats->alpha;
+  unsigned key = stats->key;
+  unsigned bits = stats->bits;
+
+  mode_out->key_defined = 0;
+
+  if(key && numpixels <= 16) {
+    alpha = 1; /*too few pixels to justify tRNS chunk overhead*/
+    key = 0;
+    if(bits < 8) bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/
+  }
+
+  gray_ok = !stats->colored;
+  if(!stats->allow_greyscale) gray_ok = 0;
+  if(!gray_ok && bits < 8) bits = 8;
+
+  n = stats->numcolors;
+  palettebits = n <= 2 ? 1 : (n <= 4 ? 2 : (n <= 16 ? 4 : 8));
+  palette_ok = n <= 256 && bits <= 8 && n != 0; /*n==0 means likely numcolors wasn't computed*/
+  if(numpixels < n * 2) palette_ok = 0; /*don't add palette overhead if image has only a few pixels*/
+  if(gray_ok && !alpha && bits <= palettebits) palette_ok = 0; /*gray is less overhead*/
+  if(!stats->allow_palette) palette_ok = 0;
+
+  if(palette_ok) {
+    const unsigned char* p = stats->palette;
+    lodepng_palette_clear(mode_out); /*remove potential earlier palette*/
+    for(i = 0; i != stats->numcolors; ++i) {
+      error = lodepng_palette_add(mode_out, p[i * 4 + 0], p[i * 4 + 1], p[i * 4 + 2], p[i * 4 + 3]);
+      if(error) break;
+    }
+
+    mode_out->colortype = LCT_PALETTE;
+    mode_out->bitdepth = palettebits;
+
+    if(mode_in->colortype == LCT_PALETTE && mode_in->palettesize >= mode_out->palettesize
+        && mode_in->bitdepth == mode_out->bitdepth) {
+      /*If input should have same palette colors, keep original to preserve its order and prevent conversion*/
+      lodepng_color_mode_cleanup(mode_out);
+      lodepng_color_mode_copy(mode_out, mode_in);
+    }
+  } else /*8-bit or 16-bit per channel*/ {
+    mode_out->bitdepth = bits;
+    mode_out->colortype = alpha ? (gray_ok ? LCT_GREY_ALPHA : LCT_RGBA)
+                                : (gray_ok ? LCT_GREY : LCT_RGB);
+    if(key) {
+      unsigned mask = (1u << mode_out->bitdepth) - 1u; /*stats always uses 16-bit, mask converts it*/
+      mode_out->key_r = stats->key_r & mask;
+      mode_out->key_g = stats->key_g & mask;
+      mode_out->key_b = stats->key_b & mask;
+      mode_out->key_defined = 1;
+    }
+  }
+
+  return error;
+}
+
+#endif /* #ifdef LODEPNG_COMPILE_ENCODER */
+
+/*
+Paeth predictor, used by PNG filter type 4
+The parameters are of type short, but should come from unsigned chars, the shorts
+are only needed to make the paeth calculation correct.
+*/
+static unsigned char paethPredictor(short a, short b, short c) {
+  short pa = LODEPNG_ABS(b - c);
+  short pb = LODEPNG_ABS(a - c);
+  short pc = LODEPNG_ABS(a + b - c - c);
+  /* return input value associated with smallest of pa, pb, pc (with certain priority if equal) */
+  if(pb < pa) { a = b; pa = pb; }
+  return (pc < pa) ? c : a;
+}
+
+/*shared values used by multiple Adam7 related functions*/
+
+static const unsigned ADAM7_IX[7] = { 0, 4, 0, 2, 0, 1, 0 }; /*x start values*/
+static const unsigned ADAM7_IY[7] = { 0, 0, 4, 0, 2, 0, 1 }; /*y start values*/
+static const unsigned ADAM7_DX[7] = { 8, 8, 4, 4, 2, 2, 1 }; /*x delta values*/
+static const unsigned ADAM7_DY[7] = { 8, 8, 8, 4, 4, 2, 2 }; /*y delta values*/
+
+/*
+Outputs various dimensions and positions in the image related to the Adam7 reduced images.
+passw: output containing the width of the 7 passes
+passh: output containing the height of the 7 passes
+filter_passstart: output containing the index of the start and end of each
+ reduced image with filter bytes
+padded_passstart output containing the index of the start and end of each
+ reduced image when without filter bytes but with padded scanlines
+passstart: output containing the index of the start and end of each reduced
+ image without padding between scanlines, but still padding between the images
+w, h: width and height of non-interlaced image
+bpp: bits per pixel
+"padded" is only relevant if bpp is less than 8 and a scanline or image does not
+ end at a full byte
+*/
+static void Adam7_getpassvalues(unsigned passw[7], unsigned passh[7], size_t filter_passstart[8],
+                                size_t padded_passstart[8], size_t passstart[8], unsigned w, unsigned h, unsigned bpp) {
+  /*the passstart values have 8 values: the 8th one indicates the byte after the end of the 7th (= last) pass*/
+  unsigned i;
+
+  /*calculate width and height in pixels of each pass*/
+  for(i = 0; i != 7; ++i) {
+    passw[i] = (w + ADAM7_DX[i] - ADAM7_IX[i] - 1) / ADAM7_DX[i];
+    passh[i] = (h + ADAM7_DY[i] - ADAM7_IY[i] - 1) / ADAM7_DY[i];
+    if(passw[i] == 0) passh[i] = 0;
+    if(passh[i] == 0) passw[i] = 0;
+  }
+
+  filter_passstart[0] = padded_passstart[0] = passstart[0] = 0;
+  for(i = 0; i != 7; ++i) {
+    /*if passw[i] is 0, it's 0 bytes, not 1 (no filtertype-byte)*/
+    filter_passstart[i + 1] = filter_passstart[i]
+                            + ((passw[i] && passh[i]) ? passh[i] * (1u + (passw[i] * bpp + 7u) / 8u) : 0);
+    /*bits padded if needed to fill full byte at end of each scanline*/
+    padded_passstart[i + 1] = padded_passstart[i] + passh[i] * ((passw[i] * bpp + 7u) / 8u);
+    /*only padded at end of reduced image*/
+    passstart[i + 1] = passstart[i] + (passh[i] * passw[i] * bpp + 7u) / 8u;
+  }
+}
+
+#ifdef LODEPNG_COMPILE_DECODER
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / PNG Decoder                                                            / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+/*read the information from the header and store it in the LodePNGInfo. return value is error*/
+unsigned lodepng_inspect(unsigned* w, unsigned* h, LodePNGState* state,
+                         const unsigned char* in, size_t insize) {
+  unsigned width, height;
+  LodePNGInfo* info = &state->info_png;
+  if(insize == 0 || in == 0) {
+    CERROR_RETURN_ERROR(state->error, 48); /*error: the given data is empty*/
+  }
+  if(insize < 33) {
+    CERROR_RETURN_ERROR(state->error, 27); /*error: the data length is smaller than the length of a PNG header*/
+  }
+
+  /*when decoding a new PNG image, make sure all parameters created after previous decoding are reset*/
+  /* TODO: remove this. One should use a new LodePNGState for new sessions */
+  lodepng_info_cleanup(info);
+  lodepng_info_init(info);
+
+  if(in[0] != 137 || in[1] != 80 || in[2] != 78 || in[3] != 71
+     || in[4] != 13 || in[5] != 10 || in[6] != 26 || in[7] != 10) {
+    CERROR_RETURN_ERROR(state->error, 28); /*error: the first 8 bytes are not the correct PNG signature*/
+  }
+  if(lodepng_chunk_length(in + 8) != 13) {
+    CERROR_RETURN_ERROR(state->error, 94); /*error: header size must be 13 bytes*/
+  }
+  if(!lodepng_chunk_type_equals(in + 8, "IHDR")) {
+    CERROR_RETURN_ERROR(state->error, 29); /*error: it doesn't start with a IHDR chunk!*/
+  }
+
+  /*read the values given in the header*/
+  width = lodepng_read32bitInt(&in[16]);
+  height = lodepng_read32bitInt(&in[20]);
+  /*TODO: remove the undocumented feature that allows to give null pointers to width or height*/
+  if(w) *w = width;
+  if(h) *h = height;
+  info->color.bitdepth = in[24];
+  info->color.colortype = (LodePNGColorType)in[25];
+  info->compression_method = in[26];
+  info->filter_method = in[27];
+  info->interlace_method = in[28];
+
+  /*errors returned only after the parsing so other values are still output*/
+
+  /*error: invalid image size*/
+  if(width == 0 || height == 0) CERROR_RETURN_ERROR(state->error, 93);
+  /*error: invalid colortype or bitdepth combination*/
+  state->error = checkColorValidity(info->color.colortype, info->color.bitdepth);
+  if(state->error) return state->error;
+  /*error: only compression method 0 is allowed in the specification*/
+  if(info->compression_method != 0) CERROR_RETURN_ERROR(state->error, 32);
+  /*error: only filter method 0 is allowed in the specification*/
+  if(info->filter_method != 0) CERROR_RETURN_ERROR(state->error, 33);
+  /*error: only interlace methods 0 and 1 exist in the specification*/
+  if(info->interlace_method > 1) CERROR_RETURN_ERROR(state->error, 34);
+
+  if(!state->decoder.ignore_crc) {
+    unsigned CRC = lodepng_read32bitInt(&in[29]);
+    unsigned checksum = lodepng_crc32(&in[12], 17);
+    if(CRC != checksum) {
+      CERROR_RETURN_ERROR(state->error, 57); /*invalid CRC*/
+    }
+  }
+
+  return state->error;
+}
+
+static unsigned unfilterScanline(unsigned char* recon, const unsigned char* scanline, const unsigned char* precon,
+                                 size_t bytewidth, unsigned char filterType, size_t length) {
+  /*
+  For PNG filter method 0
+  unfilter a PNG image scanline by scanline. when the pixels are smaller than 1 byte,
+  the filter works byte per byte (bytewidth = 1)
+  precon is the previous unfiltered scanline, recon the result, scanline the current one
+  the incoming scanlines do NOT include the filtertype byte, that one is given in the parameter filterType instead
+  recon and scanline MAY be the same memory address! precon must be disjoint.
+  */
+
+  size_t i;
+  switch(filterType) {
+    case 0:
+      for(i = 0; i != length; ++i) recon[i] = scanline[i];
+      break;
+    case 1:
+      for(i = 0; i != bytewidth; ++i) recon[i] = scanline[i];
+      for(i = bytewidth; i < length; ++i) recon[i] = scanline[i] + recon[i - bytewidth];
+      break;
+    case 2:
+      if(precon) {
+        for(i = 0; i != length; ++i) recon[i] = scanline[i] + precon[i];
+      } else {
+        for(i = 0; i != length; ++i) recon[i] = scanline[i];
+      }
+      break;
+    case 3:
+      if(precon) {
+        for(i = 0; i != bytewidth; ++i) recon[i] = scanline[i] + (precon[i] >> 1u);
+        for(i = bytewidth; i < length; ++i) recon[i] = scanline[i] + ((recon[i - bytewidth] + precon[i]) >> 1u);
+      } else {
+        for(i = 0; i != bytewidth; ++i) recon[i] = scanline[i];
+        for(i = bytewidth; i < length; ++i) recon[i] = scanline[i] + (recon[i - bytewidth] >> 1u);
+      }
+      break;
+    case 4:
+      if(precon) {
+        for(i = 0; i != bytewidth; ++i) {
+          recon[i] = (scanline[i] + precon[i]); /*paethPredictor(0, precon[i], 0) is always precon[i]*/
+        }
+
+        /* Unroll independent paths of the paeth predictor. A 6x and 8x version would also be possible but that
+        adds too much code. Whether this actually speeds anything up at all depends on compiler and settings. */
+        if(bytewidth >= 4) {
+          for(; i + 3 < length; i += 4) {
+            size_t j = i - bytewidth;
+            unsigned char s0 = scanline[i + 0], s1 = scanline[i + 1], s2 = scanline[i + 2], s3 = scanline[i + 3];
+            unsigned char r0 = recon[j + 0], r1 = recon[j + 1], r2 = recon[j + 2], r3 = recon[j + 3];
+            unsigned char p0 = precon[i + 0], p1 = precon[i + 1], p2 = precon[i + 2], p3 = precon[i + 3];
+            unsigned char q0 = precon[j + 0], q1 = precon[j + 1], q2 = precon[j + 2], q3 = precon[j + 3];
+            recon[i + 0] = s0 + paethPredictor(r0, p0, q0);
+            recon[i + 1] = s1 + paethPredictor(r1, p1, q1);
+            recon[i + 2] = s2 + paethPredictor(r2, p2, q2);
+            recon[i + 3] = s3 + paethPredictor(r3, p3, q3);
+          }
+        } else if(bytewidth >= 3) {
+          for(; i + 2 < length; i += 3) {
+            size_t j = i - bytewidth;
+            unsigned char s0 = scanline[i + 0], s1 = scanline[i + 1], s2 = scanline[i + 2];
+            unsigned char r0 = recon[j + 0], r1 = recon[j + 1], r2 = recon[j + 2];
+            unsigned char p0 = precon[i + 0], p1 = precon[i + 1], p2 = precon[i + 2];
+            unsigned char q0 = precon[j + 0], q1 = precon[j + 1], q2 = precon[j + 2];
+            recon[i + 0] = s0 + paethPredictor(r0, p0, q0);
+            recon[i + 1] = s1 + paethPredictor(r1, p1, q1);
+            recon[i + 2] = s2 + paethPredictor(r2, p2, q2);
+          }
+        } else if(bytewidth >= 2) {
+          for(; i + 1 < length; i += 2) {
+            size_t j = i - bytewidth;
+            unsigned char s0 = scanline[i + 0], s1 = scanline[i + 1];
+            unsigned char r0 = recon[j + 0], r1 = recon[j + 1];
+            unsigned char p0 = precon[i + 0], p1 = precon[i + 1];
+            unsigned char q0 = precon[j + 0], q1 = precon[j + 1];
+            recon[i + 0] = s0 + paethPredictor(r0, p0, q0);
+            recon[i + 1] = s1 + paethPredictor(r1, p1, q1);
+          }
+        }
+
+        for(; i != length; ++i) {
+          recon[i] = (scanline[i] + paethPredictor(recon[i - bytewidth], precon[i], precon[i - bytewidth]));
+        }
+      } else {
+        for(i = 0; i != bytewidth; ++i) {
+          recon[i] = scanline[i];
+        }
+        for(i = bytewidth; i < length; ++i) {
+          /*paethPredictor(recon[i - bytewidth], 0, 0) is always recon[i - bytewidth]*/
+          recon[i] = (scanline[i] + recon[i - bytewidth]);
+        }
+      }
+      break;
+    default: return 36; /*error: invalid filter type given*/
+  }
+  return 0;
+}
+
+static unsigned unfilter(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, unsigned bpp) {
+  /*
+  For PNG filter method 0
+  this function unfilters a single image (e.g. without interlacing this is called once, with Adam7 seven times)
+  out must have enough bytes allocated already, in must have the scanlines + 1 filtertype byte per scanline
+  w and h are image dimensions or dimensions of reduced image, bpp is bits per pixel
+  in and out are allowed to be the same memory address (but aren't the same size since in has the extra filter bytes)
+  */
+
+  unsigned y;
+  unsigned char* prevline = 0;
+
+  /*bytewidth is used for filtering, is 1 when bpp < 8, number of bytes per pixel otherwise*/
+  size_t bytewidth = (bpp + 7u) / 8u;
+  /*the width of a scanline in bytes, not including the filter type*/
+  size_t linebytes = lodepng_get_raw_size_idat(w, 1, bpp) - 1u;
+
+  for(y = 0; y < h; ++y) {
+    size_t outindex = linebytes * y;
+    size_t inindex = (1 + linebytes) * y; /*the extra filterbyte added to each row*/
+    unsigned char filterType = in[inindex];
+
+    CERROR_TRY_RETURN(unfilterScanline(&out[outindex], &in[inindex + 1], prevline, bytewidth, filterType, linebytes));
+
+    prevline = &out[outindex];
+  }
+
+  return 0;
+}
+
+/*
+in: Adam7 interlaced image, with no padding bits between scanlines, but between
+ reduced images so that each reduced image starts at a byte.
+out: the same pixels, but re-ordered so that they're now a non-interlaced image with size w*h
+bpp: bits per pixel
+out has the following size in bits: w * h * bpp.
+in is possibly bigger due to padding bits between reduced images.
+out must be big enough AND must be 0 everywhere if bpp < 8 in the current implementation
+(because that's likely a little bit faster)
+NOTE: comments about padding bits are only relevant if bpp < 8
+*/
+static void Adam7_deinterlace(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, unsigned bpp) {
+  unsigned passw[7], passh[7];
+  size_t filter_passstart[8], padded_passstart[8], passstart[8];
+  unsigned i;
+
+  Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp);
+
+  if(bpp >= 8) {
+    for(i = 0; i != 7; ++i) {
+      unsigned x, y, b;
+      size_t bytewidth = bpp / 8u;
+      for(y = 0; y < passh[i]; ++y)
+      for(x = 0; x < passw[i]; ++x) {
+        size_t pixelinstart = passstart[i] + (y * passw[i] + x) * bytewidth;
+        size_t pixeloutstart = ((ADAM7_IY[i] + (size_t)y * ADAM7_DY[i]) * (size_t)w
+                             + ADAM7_IX[i] + (size_t)x * ADAM7_DX[i]) * bytewidth;
+        for(b = 0; b < bytewidth; ++b) {
+          out[pixeloutstart + b] = in[pixelinstart + b];
+        }
+      }
+    }
+  } else /*bpp < 8: Adam7 with pixels < 8 bit is a bit trickier: with bit pointers*/ {
+    for(i = 0; i != 7; ++i) {
+      unsigned x, y, b;
+      unsigned ilinebits = bpp * passw[i];
+      unsigned olinebits = bpp * w;
+      size_t obp, ibp; /*bit pointers (for out and in buffer)*/
+      for(y = 0; y < passh[i]; ++y)
+      for(x = 0; x < passw[i]; ++x) {
+        ibp = (8 * passstart[i]) + (y * ilinebits + x * bpp);
+        obp = (ADAM7_IY[i] + (size_t)y * ADAM7_DY[i]) * olinebits + (ADAM7_IX[i] + (size_t)x * ADAM7_DX[i]) * bpp;
+        for(b = 0; b < bpp; ++b) {
+          unsigned char bit = readBitFromReversedStream(&ibp, in);
+          setBitOfReversedStream(&obp, out, bit);
+        }
+      }
+    }
+  }
+}
+
+static void removePaddingBits(unsigned char* out, const unsigned char* in,
+                              size_t olinebits, size_t ilinebits, unsigned h) {
+  /*
+  After filtering there are still padding bits if scanlines have non multiple of 8 bit amounts. They need
+  to be removed (except at last scanline of (Adam7-reduced) image) before working with pure image buffers
+  for the Adam7 code, the color convert code and the output to the user.
+  in and out are allowed to be the same buffer, in may also be higher but still overlapping; in must
+  have >= ilinebits*h bits, out must have >= olinebits*h bits, olinebits must be <= ilinebits
+  also used to move bits after earlier such operations happened, e.g. in a sequence of reduced images from Adam7
+  only useful if (ilinebits - olinebits) is a value in the range 1..7
+  */
+  unsigned y;
+  size_t diff = ilinebits - olinebits;
+  size_t ibp = 0, obp = 0; /*input and output bit pointers*/
+  for(y = 0; y < h; ++y) {
+    size_t x;
+    for(x = 0; x < olinebits; ++x) {
+      unsigned char bit = readBitFromReversedStream(&ibp, in);
+      setBitOfReversedStream(&obp, out, bit);
+    }
+    ibp += diff;
+  }
+}
+
+/*out must be buffer big enough to contain full image, and in must contain the full decompressed data from
+the IDAT chunks (with filter index bytes and possible padding bits)
+return value is error*/
+static unsigned postProcessScanlines(unsigned char* out, unsigned char* in,
+                                     unsigned w, unsigned h, const LodePNGInfo* info_png) {
+  /*
+  This function converts the filtered-padded-interlaced data into pure 2D image buffer with the PNG's colortype.
+  Steps:
+  *) if no Adam7: 1) unfilter 2) remove padding bits (= possible extra bits per scanline if bpp < 8)
+  *) if adam7: 1) 7x unfilter 2) 7x remove padding bits 3) Adam7_deinterlace
+  NOTE: the in buffer will be overwritten with intermediate data!
+  */
+  unsigned bpp = lodepng_get_bpp(&info_png->color);
+  if(bpp == 0) return 31; /*error: invalid colortype*/
+
+  if(info_png->interlace_method == 0) {
+    if(bpp < 8 && w * bpp != ((w * bpp + 7u) / 8u) * 8u) {
+      CERROR_TRY_RETURN(unfilter(in, in, w, h, bpp));
+      removePaddingBits(out, in, w * bpp, ((w * bpp + 7u) / 8u) * 8u, h);
+    }
+    /*we can immediately filter into the out buffer, no other steps needed*/
+    else CERROR_TRY_RETURN(unfilter(out, in, w, h, bpp));
+  } else /*interlace_method is 1 (Adam7)*/ {
+    unsigned passw[7], passh[7]; size_t filter_passstart[8], padded_passstart[8], passstart[8];
+    unsigned i;
+
+    Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp);
+
+    for(i = 0; i != 7; ++i) {
+      CERROR_TRY_RETURN(unfilter(&in[padded_passstart[i]], &in[filter_passstart[i]], passw[i], passh[i], bpp));
+      /*TODO: possible efficiency improvement: if in this reduced image the bits fit nicely in 1 scanline,
+      move bytes instead of bits or move not at all*/
+      if(bpp < 8) {
+        /*remove padding bits in scanlines; after this there still may be padding
+        bits between the different reduced images: each reduced image still starts nicely at a byte*/
+        removePaddingBits(&in[passstart[i]], &in[padded_passstart[i]], passw[i] * bpp,
+                          ((passw[i] * bpp + 7u) / 8u) * 8u, passh[i]);
+      }
+    }
+
+    Adam7_deinterlace(out, in, w, h, bpp);
+  }
+
+  return 0;
+}
+
+static unsigned readChunk_PLTE(LodePNGColorMode* color, const unsigned char* data, size_t chunkLength) {
+  unsigned pos = 0, i;
+  color->palettesize = chunkLength / 3u;
+  if(color->palettesize == 0 || color->palettesize > 256) return 38; /*error: palette too small or big*/
+  lodepng_color_mode_alloc_palette(color);
+  if(!color->palette && color->palettesize) {
+    color->palettesize = 0;
+    return 83; /*alloc fail*/
+  }
+
+  for(i = 0; i != color->palettesize; ++i) {
+    color->palette[4 * i + 0] = data[pos++]; /*R*/
+    color->palette[4 * i + 1] = data[pos++]; /*G*/
+    color->palette[4 * i + 2] = data[pos++]; /*B*/
+    color->palette[4 * i + 3] = 255; /*alpha*/
+  }
+
+  return 0; /* OK */
+}
+
+static unsigned readChunk_tRNS(LodePNGColorMode* color, const unsigned char* data, size_t chunkLength) {
+  unsigned i;
+  if(color->colortype == LCT_PALETTE) {
+    /*error: more alpha values given than there are palette entries*/
+    if(chunkLength > color->palettesize) return 39;
+
+    for(i = 0; i != chunkLength; ++i) color->palette[4 * i + 3] = data[i];
+  } else if(color->colortype == LCT_GREY) {
+    /*error: this chunk must be 2 bytes for grayscale image*/
+    if(chunkLength != 2) return 30;
+
+    color->key_defined = 1;
+    color->key_r = color->key_g = color->key_b = 256u * data[0] + data[1];
+  } else if(color->colortype == LCT_RGB) {
+    /*error: this chunk must be 6 bytes for RGB image*/
+    if(chunkLength != 6) return 41;
+
+    color->key_defined = 1;
+    color->key_r = 256u * data[0] + data[1];
+    color->key_g = 256u * data[2] + data[3];
+    color->key_b = 256u * data[4] + data[5];
+  }
+  else return 42; /*error: tRNS chunk not allowed for other color models*/
+
+  return 0; /* OK */
+}
+
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+/*background color chunk (bKGD)*/
+static unsigned readChunk_bKGD(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) {
+  if(info->color.colortype == LCT_PALETTE) {
+    /*error: this chunk must be 1 byte for indexed color image*/
+    if(chunkLength != 1) return 43;
+
+    /*error: invalid palette index, or maybe this chunk appeared before PLTE*/
+    if(data[0] >= info->color.palettesize) return 103;
+
+    info->background_defined = 1;
+    info->background_r = info->background_g = info->background_b = data[0];
+  } else if(info->color.colortype == LCT_GREY || info->color.colortype == LCT_GREY_ALPHA) {
+    /*error: this chunk must be 2 bytes for grayscale image*/
+    if(chunkLength != 2) return 44;
+
+    /*the values are truncated to bitdepth in the PNG file*/
+    info->background_defined = 1;
+    info->background_r = info->background_g = info->background_b = 256u * data[0] + data[1];
+  } else if(info->color.colortype == LCT_RGB || info->color.colortype == LCT_RGBA) {
+    /*error: this chunk must be 6 bytes for grayscale image*/
+    if(chunkLength != 6) return 45;
+
+    /*the values are truncated to bitdepth in the PNG file*/
+    info->background_defined = 1;
+    info->background_r = 256u * data[0] + data[1];
+    info->background_g = 256u * data[2] + data[3];
+    info->background_b = 256u * data[4] + data[5];
+  }
+
+  return 0; /* OK */
+}
+
+/*text chunk (tEXt)*/
+static unsigned readChunk_tEXt(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) {
+  unsigned error = 0;
+  char *key = 0, *str = 0;
+
+  while(!error) /*not really a while loop, only used to break on error*/ {
+    unsigned length, string2_begin;
+
+    length = 0;
+    while(length < chunkLength && data[length] != 0) ++length;
+    /*even though it's not allowed by the standard, no error is thrown if
+    there's no null termination char, if the text is empty*/
+    if(length < 1 || length > 79) CERROR_BREAK(error, 89); /*keyword too short or long*/
+
+    key = (char*)lodepng_malloc(length + 1);
+    if(!key) CERROR_BREAK(error, 83); /*alloc fail*/
+
+    lodepng_memcpy(key, data, length);
+    key[length] = 0;
+
+    string2_begin = length + 1; /*skip keyword null terminator*/
+
+    length = (unsigned)(chunkLength < string2_begin ? 0 : chunkLength - string2_begin);
+    str = (char*)lodepng_malloc(length + 1);
+    if(!str) CERROR_BREAK(error, 83); /*alloc fail*/
+
+    lodepng_memcpy(str, data + string2_begin, length);
+    str[length] = 0;
+
+    error = lodepng_add_text(info, key, str);
+
+    break;
+  }
+
+  lodepng_free(key);
+  lodepng_free(str);
+
+  return error;
+}
+
+/*compressed text chunk (zTXt)*/
+static unsigned readChunk_zTXt(LodePNGInfo* info, const LodePNGDecoderSettings* decoder,
+                               const unsigned char* data, size_t chunkLength) {
+  unsigned error = 0;
+
+  /*copy the object to change parameters in it*/
+  LodePNGDecompressSettings zlibsettings = decoder->zlibsettings;
+
+  unsigned length, string2_begin;
+  char *key = 0;
+  unsigned char* str = 0;
+  size_t size = 0;
+
+  while(!error) /*not really a while loop, only used to break on error*/ {
+    for(length = 0; length < chunkLength && data[length] != 0; ++length) ;
+    if(length + 2 >= chunkLength) CERROR_BREAK(error, 75); /*no null termination, corrupt?*/
+    if(length < 1 || length > 79) CERROR_BREAK(error, 89); /*keyword too short or long*/
+
+    key = (char*)lodepng_malloc(length + 1);
+    if(!key) CERROR_BREAK(error, 83); /*alloc fail*/
+
+    lodepng_memcpy(key, data, length);
+    key[length] = 0;
+
+    if(data[length + 1] != 0) CERROR_BREAK(error, 72); /*the 0 byte indicating compression must be 0*/
+
+    string2_begin = length + 2;
+    if(string2_begin > chunkLength) CERROR_BREAK(error, 75); /*no null termination, corrupt?*/
+
+    length = (unsigned)chunkLength - string2_begin;
+    zlibsettings.max_output_size = decoder->max_text_size;
+    /*will fail if zlib error, e.g. if length is too small*/
+    error = zlib_decompress(&str, &size, 0, &data[string2_begin],
+                            length, &zlibsettings);
+    /*error: compressed text larger than  decoder->max_text_size*/
+    if(error && size > zlibsettings.max_output_size) error = 112;
+    if(error) break;
+    error = lodepng_add_text_sized(info, key, (char*)str, size);
+    break;
+  }
+
+  lodepng_free(key);
+  lodepng_free(str);
+
+  return error;
+}
+
+/*international text chunk (iTXt)*/
+static unsigned readChunk_iTXt(LodePNGInfo* info, const LodePNGDecoderSettings* decoder,
+                               const unsigned char* data, size_t chunkLength) {
+  unsigned error = 0;
+  unsigned i;
+
+  /*copy the object to change parameters in it*/
+  LodePNGDecompressSettings zlibsettings = decoder->zlibsettings;
+
+  unsigned length, begin, compressed;
+  char *key = 0, *langtag = 0, *transkey = 0;
+
+  while(!error) /*not really a while loop, only used to break on error*/ {
+    /*Quick check if the chunk length isn't too small. Even without check
+    it'd still fail with other error checks below if it's too short. This just gives a different error code.*/
+    if(chunkLength < 5) CERROR_BREAK(error, 30); /*iTXt chunk too short*/
+
+    /*read the key*/
+    for(length = 0; length < chunkLength && data[length] != 0; ++length) ;
+    if(length + 3 >= chunkLength) CERROR_BREAK(error, 75); /*no null termination char, corrupt?*/
+    if(length < 1 || length > 79) CERROR_BREAK(error, 89); /*keyword too short or long*/
+
+    key = (char*)lodepng_malloc(length + 1);
+    if(!key) CERROR_BREAK(error, 83); /*alloc fail*/
+
+    lodepng_memcpy(key, data, length);
+    key[length] = 0;
+
+    /*read the compression method*/
+    compressed = data[length + 1];
+    if(data[length + 2] != 0) CERROR_BREAK(error, 72); /*the 0 byte indicating compression must be 0*/
+
+    /*even though it's not allowed by the standard, no error is thrown if
+    there's no null termination char, if the text is empty for the next 3 texts*/
+
+    /*read the langtag*/
+    begin = length + 3;
+    length = 0;
+    for(i = begin; i < chunkLength && data[i] != 0; ++i) ++length;
+
+    langtag = (char*)lodepng_malloc(length + 1);
+    if(!langtag) CERROR_BREAK(error, 83); /*alloc fail*/
+
+    lodepng_memcpy(langtag, data + begin, length);
+    langtag[length] = 0;
+
+    /*read the transkey*/
+    begin += length + 1;
+    length = 0;
+    for(i = begin; i < chunkLength && data[i] != 0; ++i) ++length;
+
+    transkey = (char*)lodepng_malloc(length + 1);
+    if(!transkey) CERROR_BREAK(error, 83); /*alloc fail*/
+
+    lodepng_memcpy(transkey, data + begin, length);
+    transkey[length] = 0;
+
+    /*read the actual text*/
+    begin += length + 1;
+
+    length = (unsigned)chunkLength < begin ? 0 : (unsigned)chunkLength - begin;
+
+    if(compressed) {
+      unsigned char* str = 0;
+      size_t size = 0;
+      zlibsettings.max_output_size = decoder->max_text_size;
+      /*will fail if zlib error, e.g. if length is too small*/
+      error = zlib_decompress(&str, &size, 0, &data[begin],
+                              length, &zlibsettings);
+      /*error: compressed text larger than  decoder->max_text_size*/
+      if(error && size > zlibsettings.max_output_size) error = 112;
+      if(!error) error = lodepng_add_itext_sized(info, key, langtag, transkey, (char*)str, size);
+      lodepng_free(str);
+    } else {
+      error = lodepng_add_itext_sized(info, key, langtag, transkey, (char*)(data + begin), length);
+    }
+
+    break;
+  }
+
+  lodepng_free(key);
+  lodepng_free(langtag);
+  lodepng_free(transkey);
+
+  return error;
+}
+
+static unsigned readChunk_tIME(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) {
+  if(chunkLength != 7) return 73; /*invalid tIME chunk size*/
+
+  info->time_defined = 1;
+  info->time.year = 256u * data[0] + data[1];
+  info->time.month = data[2];
+  info->time.day = data[3];
+  info->time.hour = data[4];
+  info->time.minute = data[5];
+  info->time.second = data[6];
+
+  return 0; /* OK */
+}
+
+static unsigned readChunk_pHYs(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) {
+  if(chunkLength != 9) return 74; /*invalid pHYs chunk size*/
+
+  info->phys_defined = 1;
+  info->phys_x = 16777216u * data[0] + 65536u * data[1] + 256u * data[2] + data[3];
+  info->phys_y = 16777216u * data[4] + 65536u * data[5] + 256u * data[6] + data[7];
+  info->phys_unit = data[8];
+
+  return 0; /* OK */
+}
+
+static unsigned readChunk_gAMA(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) {
+  if(chunkLength != 4) return 96; /*invalid gAMA chunk size*/
+
+  info->gama_defined = 1;
+  info->gama_gamma = 16777216u * data[0] + 65536u * data[1] + 256u * data[2] + data[3];
+
+  return 0; /* OK */
+}
+
+static unsigned readChunk_cHRM(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) {
+  if(chunkLength != 32) return 97; /*invalid cHRM chunk size*/
+
+  info->chrm_defined = 1;
+  info->chrm_white_x = 16777216u * data[ 0] + 65536u * data[ 1] + 256u * data[ 2] + data[ 3];
+  info->chrm_white_y = 16777216u * data[ 4] + 65536u * data[ 5] + 256u * data[ 6] + data[ 7];
+  info->chrm_red_x   = 16777216u * data[ 8] + 65536u * data[ 9] + 256u * data[10] + data[11];
+  info->chrm_red_y   = 16777216u * data[12] + 65536u * data[13] + 256u * data[14] + data[15];
+  info->chrm_green_x = 16777216u * data[16] + 65536u * data[17] + 256u * data[18] + data[19];
+  info->chrm_green_y = 16777216u * data[20] + 65536u * data[21] + 256u * data[22] + data[23];
+  info->chrm_blue_x  = 16777216u * data[24] + 65536u * data[25] + 256u * data[26] + data[27];
+  info->chrm_blue_y  = 16777216u * data[28] + 65536u * data[29] + 256u * data[30] + data[31];
+
+  return 0; /* OK */
+}
+
+static unsigned readChunk_sRGB(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) {
+  if(chunkLength != 1) return 98; /*invalid sRGB chunk size (this one is never ignored)*/
+
+  info->srgb_defined = 1;
+  info->srgb_intent = data[0];
+
+  return 0; /* OK */
+}
+
+static unsigned readChunk_iCCP(LodePNGInfo* info, const LodePNGDecoderSettings* decoder,
+                               const unsigned char* data, size_t chunkLength) {
+  unsigned error = 0;
+  unsigned i;
+  size_t size = 0;
+  /*copy the object to change parameters in it*/
+  LodePNGDecompressSettings zlibsettings = decoder->zlibsettings;
+
+  unsigned length, string2_begin;
+
+  info->iccp_defined = 1;
+  if(info->iccp_name) lodepng_clear_icc(info);
+
+  for(length = 0; length < chunkLength && data[length] != 0; ++length) ;
+  if(length + 2 >= chunkLength) return 75; /*no null termination, corrupt?*/
+  if(length < 1 || length > 79) return 89; /*keyword too short or long*/
+
+  info->iccp_name = (char*)lodepng_malloc(length + 1);
+  if(!info->iccp_name) return 83; /*alloc fail*/
+
+  info->iccp_name[length] = 0;
+  for(i = 0; i != length; ++i) info->iccp_name[i] = (char)data[i];
+
+  if(data[length + 1] != 0) return 72; /*the 0 byte indicating compression must be 0*/
+
+  string2_begin = length + 2;
+  if(string2_begin > chunkLength) return 75; /*no null termination, corrupt?*/
+
+  length = (unsigned)chunkLength - string2_begin;
+  zlibsettings.max_output_size = decoder->max_icc_size;
+  error = zlib_decompress(&info->iccp_profile, &size, 0,
+                          &data[string2_begin],
+                          length, &zlibsettings);
+  /*error: ICC profile larger than  decoder->max_icc_size*/
+  if(error && size > zlibsettings.max_output_size) error = 113;
+  info->iccp_profile_size = size;
+  if(!error && !info->iccp_profile_size) error = 100; /*invalid ICC profile size*/
+  return error;
+}
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+
+unsigned lodepng_inspect_chunk(LodePNGState* state, size_t pos,
+                               const unsigned char* in, size_t insize) {
+  const unsigned char* chunk = in + pos;
+  unsigned chunkLength;
+  const unsigned char* data;
+  unsigned unhandled = 0;
+  unsigned error = 0;
+
+  if(pos + 4 > insize) return 30;
+  chunkLength = lodepng_chunk_length(chunk);
+  if(chunkLength > 2147483647) return 63;
+  data = lodepng_chunk_data_const(chunk);
+  if(data + chunkLength + 4 > in + insize) return 30;
+
+  if(lodepng_chunk_type_equals(chunk, "PLTE")) {
+    error = readChunk_PLTE(&state->info_png.color, data, chunkLength);
+  } else if(lodepng_chunk_type_equals(chunk, "tRNS")) {
+    error = readChunk_tRNS(&state->info_png.color, data, chunkLength);
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+  } else if(lodepng_chunk_type_equals(chunk, "bKGD")) {
+    error = readChunk_bKGD(&state->info_png, data, chunkLength);
+  } else if(lodepng_chunk_type_equals(chunk, "tEXt")) {
+    error = readChunk_tEXt(&state->info_png, data, chunkLength);
+  } else if(lodepng_chunk_type_equals(chunk, "zTXt")) {
+    error = readChunk_zTXt(&state->info_png, &state->decoder, data, chunkLength);
+  } else if(lodepng_chunk_type_equals(chunk, "iTXt")) {
+    error = readChunk_iTXt(&state->info_png, &state->decoder, data, chunkLength);
+  } else if(lodepng_chunk_type_equals(chunk, "tIME")) {
+    error = readChunk_tIME(&state->info_png, data, chunkLength);
+  } else if(lodepng_chunk_type_equals(chunk, "pHYs")) {
+    error = readChunk_pHYs(&state->info_png, data, chunkLength);
+  } else if(lodepng_chunk_type_equals(chunk, "gAMA")) {
+    error = readChunk_gAMA(&state->info_png, data, chunkLength);
+  } else if(lodepng_chunk_type_equals(chunk, "cHRM")) {
+    error = readChunk_cHRM(&state->info_png, data, chunkLength);
+  } else if(lodepng_chunk_type_equals(chunk, "sRGB")) {
+    error = readChunk_sRGB(&state->info_png, data, chunkLength);
+  } else if(lodepng_chunk_type_equals(chunk, "iCCP")) {
+    error = readChunk_iCCP(&state->info_png, &state->decoder, data, chunkLength);
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+  } else {
+    /* unhandled chunk is ok (is not an error) */
+    unhandled = 1;
+  }
+
+  if(!error && !unhandled && !state->decoder.ignore_crc) {
+    if(lodepng_chunk_check_crc(chunk)) return 57; /*invalid CRC*/
+  }
+
+  return error;
+}
+
+/*read a PNG, the result will be in the same color type as the PNG (hence "generic")*/
+static void decodeGeneric(unsigned char** out, unsigned* w, unsigned* h,
+                          LodePNGState* state,
+                          const unsigned char* in, size_t insize) {
+  unsigned char IEND = 0;
+  const unsigned char* chunk;
+  unsigned char* idat; /*the data from idat chunks, zlib compressed*/
+  size_t idatsize = 0;
+  unsigned char* scanlines = 0;
+  size_t scanlines_size = 0, expected_size = 0;
+  size_t outsize = 0;
+
+  /*for unknown chunk order*/
+  unsigned unknown = 0;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+  unsigned critical_pos = 1; /*1 = after IHDR, 2 = after PLTE, 3 = after IDAT*/
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+
+
+  /* safe output values in case error happens */
+  *out = 0;
+  *w = *h = 0;
+
+  state->error = lodepng_inspect(w, h, state, in, insize); /*reads header and resets other parameters in state->info_png*/
+  if(state->error) return;
+
+  if(lodepng_pixel_overflow(*w, *h, &state->info_png.color, &state->info_raw)) {
+    CERROR_RETURN(state->error, 92); /*overflow possible due to amount of pixels*/
+  }
+
+  /*the input filesize is a safe upper bound for the sum of idat chunks size*/
+  idat = (unsigned char*)lodepng_malloc(insize);
+  if(!idat) CERROR_RETURN(state->error, 83); /*alloc fail*/
+
+  chunk = &in[33]; /*first byte of the first chunk after the header*/
+
+  /*loop through the chunks, ignoring unknown chunks and stopping at IEND chunk.
+  IDAT data is put at the start of the in buffer*/
+  while(!IEND && !state->error) {
+    unsigned chunkLength;
+    const unsigned char* data; /*the data in the chunk*/
+
+    /*error: size of the in buffer too small to contain next chunk*/
+    if((size_t)((chunk - in) + 12) > insize || chunk < in) {
+      if(state->decoder.ignore_end) break; /*other errors may still happen though*/
+      CERROR_BREAK(state->error, 30);
+    }
+
+    /*length of the data of the chunk, excluding the length bytes, chunk type and CRC bytes*/
+    chunkLength = lodepng_chunk_length(chunk);
+    /*error: chunk length larger than the max PNG chunk size*/
+    if(chunkLength > 2147483647) {
+      if(state->decoder.ignore_end) break; /*other errors may still happen though*/
+      CERROR_BREAK(state->error, 63);
+    }
+
+    if((size_t)((chunk - in) + chunkLength + 12) > insize || (chunk + chunkLength + 12) < in) {
+      CERROR_BREAK(state->error, 64); /*error: size of the in buffer too small to contain next chunk*/
+    }
+
+    data = lodepng_chunk_data_const(chunk);
+
+    unknown = 0;
+
+    /*IDAT chunk, containing compressed image data*/
+    if(lodepng_chunk_type_equals(chunk, "IDAT")) {
+      size_t newsize;
+      if(lodepng_addofl(idatsize, chunkLength, &newsize)) CERROR_BREAK(state->error, 95);
+      if(newsize > insize) CERROR_BREAK(state->error, 95);
+      lodepng_memcpy(idat + idatsize, data, chunkLength);
+      idatsize += chunkLength;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+      critical_pos = 3;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+    } else if(lodepng_chunk_type_equals(chunk, "IEND")) {
+      /*IEND chunk*/
+      IEND = 1;
+    } else if(lodepng_chunk_type_equals(chunk, "PLTE")) {
+      /*palette chunk (PLTE)*/
+      state->error = readChunk_PLTE(&state->info_png.color, data, chunkLength);
+      if(state->error) break;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+      critical_pos = 2;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+    } else if(lodepng_chunk_type_equals(chunk, "tRNS")) {
+      /*palette transparency chunk (tRNS). Even though this one is an ancillary chunk , it is still compiled
+      in without 'LODEPNG_COMPILE_ANCILLARY_CHUNKS' because it contains essential color information that
+      affects the alpha channel of pixels. */
+      state->error = readChunk_tRNS(&state->info_png.color, data, chunkLength);
+      if(state->error) break;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+      /*background color chunk (bKGD)*/
+    } else if(lodepng_chunk_type_equals(chunk, "bKGD")) {
+      state->error = readChunk_bKGD(&state->info_png, data, chunkLength);
+      if(state->error) break;
+    } else if(lodepng_chunk_type_equals(chunk, "tEXt")) {
+      /*text chunk (tEXt)*/
+      if(state->decoder.read_text_chunks) {
+        state->error = readChunk_tEXt(&state->info_png, data, chunkLength);
+        if(state->error) break;
+      }
+    } else if(lodepng_chunk_type_equals(chunk, "zTXt")) {
+      /*compressed text chunk (zTXt)*/
+      if(state->decoder.read_text_chunks) {
+        state->error = readChunk_zTXt(&state->info_png, &state->decoder, data, chunkLength);
+        if(state->error) break;
+      }
+    } else if(lodepng_chunk_type_equals(chunk, "iTXt")) {
+      /*international text chunk (iTXt)*/
+      if(state->decoder.read_text_chunks) {
+        state->error = readChunk_iTXt(&state->info_png, &state->decoder, data, chunkLength);
+        if(state->error) break;
+      }
+    } else if(lodepng_chunk_type_equals(chunk, "tIME")) {
+      state->error = readChunk_tIME(&state->info_png, data, chunkLength);
+      if(state->error) break;
+    } else if(lodepng_chunk_type_equals(chunk, "pHYs")) {
+      state->error = readChunk_pHYs(&state->info_png, data, chunkLength);
+      if(state->error) break;
+    } else if(lodepng_chunk_type_equals(chunk, "gAMA")) {
+      state->error = readChunk_gAMA(&state->info_png, data, chunkLength);
+      if(state->error) break;
+    } else if(lodepng_chunk_type_equals(chunk, "cHRM")) {
+      state->error = readChunk_cHRM(&state->info_png, data, chunkLength);
+      if(state->error) break;
+    } else if(lodepng_chunk_type_equals(chunk, "sRGB")) {
+      state->error = readChunk_sRGB(&state->info_png, data, chunkLength);
+      if(state->error) break;
+    } else if(lodepng_chunk_type_equals(chunk, "iCCP")) {
+      state->error = readChunk_iCCP(&state->info_png, &state->decoder, data, chunkLength);
+      if(state->error) break;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+    } else /*it's not an implemented chunk type, so ignore it: skip over the data*/ {
+      /*error: unknown critical chunk (5th bit of first byte of chunk type is 0)*/
+      if(!state->decoder.ignore_critical && !lodepng_chunk_ancillary(chunk)) {
+        CERROR_BREAK(state->error, 69);
+      }
+
+      unknown = 1;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+      if(state->decoder.remember_unknown_chunks) {
+        state->error = lodepng_chunk_append(&state->info_png.unknown_chunks_data[critical_pos - 1],
+                                            &state->info_png.unknown_chunks_size[critical_pos - 1], chunk);
+        if(state->error) break;
+      }
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+    }
+
+    if(!state->decoder.ignore_crc && !unknown) /*check CRC if wanted, only on known chunk types*/ {
+      if(lodepng_chunk_check_crc(chunk)) CERROR_BREAK(state->error, 57); /*invalid CRC*/
+    }
+
+    if(!IEND) chunk = lodepng_chunk_next_const(chunk, in + insize);
+  }
+
+  if(!state->error && state->info_png.color.colortype == LCT_PALETTE && !state->info_png.color.palette) {
+    state->error = 106; /* error: PNG file must have PLTE chunk if color type is palette */
+  }
+
+  if(!state->error) {
+    /*predict output size, to allocate exact size for output buffer to avoid more dynamic allocation.
+    If the decompressed size does not match the prediction, the image must be corrupt.*/
+    if(state->info_png.interlace_method == 0) {
+      size_t bpp = lodepng_get_bpp(&state->info_png.color);
+      expected_size = lodepng_get_raw_size_idat(*w, *h, bpp);
+    } else {
+      size_t bpp = lodepng_get_bpp(&state->info_png.color);
+      /*Adam-7 interlaced: expected size is the sum of the 7 sub-images sizes*/
+      expected_size = 0;
+      expected_size += lodepng_get_raw_size_idat((*w + 7) >> 3, (*h + 7) >> 3, bpp);
+      if(*w > 4) expected_size += lodepng_get_raw_size_idat((*w + 3) >> 3, (*h + 7) >> 3, bpp);
+      expected_size += lodepng_get_raw_size_idat((*w + 3) >> 2, (*h + 3) >> 3, bpp);
+      if(*w > 2) expected_size += lodepng_get_raw_size_idat((*w + 1) >> 2, (*h + 3) >> 2, bpp);
+      expected_size += lodepng_get_raw_size_idat((*w + 1) >> 1, (*h + 1) >> 2, bpp);
+      if(*w > 1) expected_size += lodepng_get_raw_size_idat((*w + 0) >> 1, (*h + 1) >> 1, bpp);
+      expected_size += lodepng_get_raw_size_idat((*w + 0), (*h + 0) >> 1, bpp);
+    }
+
+    state->error = zlib_decompress(&scanlines, &scanlines_size, expected_size, idat, idatsize, &state->decoder.zlibsettings);
+  }
+  if(!state->error && scanlines_size != expected_size) state->error = 91; /*decompressed size doesn't match prediction*/
+  lodepng_free(idat);
+
+  if(!state->error) {
+    outsize = lodepng_get_raw_size(*w, *h, &state->info_png.color);
+    *out = (unsigned char*)lodepng_malloc(outsize);
+    if(!*out) state->error = 83; /*alloc fail*/
+  }
+  if(!state->error) {
+    lodepng_memset(*out, 0, outsize);
+    state->error = postProcessScanlines(*out, scanlines, *w, *h, &state->info_png);
+  }
+  lodepng_free(scanlines);
+}
+
+unsigned lodepng_decode(unsigned char** out, unsigned* w, unsigned* h,
+                        LodePNGState* state,
+                        const unsigned char* in, size_t insize) {
+  *out = 0;
+  decodeGeneric(out, w, h, state, in, insize);
+  if(state->error) return state->error;
+  if(!state->decoder.color_convert || lodepng_color_mode_equal(&state->info_raw, &state->info_png.color)) {
+    /*same color type, no copying or converting of data needed*/
+    /*store the info_png color settings on the info_raw so that the info_raw still reflects what colortype
+    the raw image has to the end user*/
+    if(!state->decoder.color_convert) {
+      state->error = lodepng_color_mode_copy(&state->info_raw, &state->info_png.color);
+      if(state->error) return state->error;
+    }
+  } else { /*color conversion needed*/
+    unsigned char* data = *out;
+    size_t outsize;
+
+    /*TODO: check if this works according to the statement in the documentation: "The converter can convert
+    from grayscale input color type, to 8-bit grayscale or grayscale with alpha"*/
+    if(!(state->info_raw.colortype == LCT_RGB || state->info_raw.colortype == LCT_RGBA)
+       && !(state->info_raw.bitdepth == 8)) {
+      return 56; /*unsupported color mode conversion*/
+    }
+
+    outsize = lodepng_get_raw_size(*w, *h, &state->info_raw);
+    *out = (unsigned char*)lodepng_malloc(outsize);
+    if(!(*out)) {
+      state->error = 83; /*alloc fail*/
+    }
+    else state->error = lodepng_convert(*out, data, &state->info_raw,
+                                        &state->info_png.color, *w, *h);
+    lodepng_free(data);
+  }
+  return state->error;
+}
+
+unsigned lodepng_decode_memory(unsigned char** out, unsigned* w, unsigned* h, const unsigned char* in,
+                               size_t insize, LodePNGColorType colortype, unsigned bitdepth) {
+  unsigned error;
+  LodePNGState state;
+  lodepng_state_init(&state);
+  state.info_raw.colortype = colortype;
+  state.info_raw.bitdepth = bitdepth;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+  /*disable reading things that this function doesn't output*/
+  state.decoder.read_text_chunks = 0;
+  state.decoder.remember_unknown_chunks = 0;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+  error = lodepng_decode(out, w, h, &state, in, insize);
+  lodepng_state_cleanup(&state);
+  return error;
+}
+
+unsigned lodepng_decode32(unsigned char** out, unsigned* w, unsigned* h, const unsigned char* in, size_t insize) {
+  return lodepng_decode_memory(out, w, h, in, insize, LCT_RGBA, 8);
+}
+
+unsigned lodepng_decode24(unsigned char** out, unsigned* w, unsigned* h, const unsigned char* in, size_t insize) {
+  return lodepng_decode_memory(out, w, h, in, insize, LCT_RGB, 8);
+}
+
+#ifdef LODEPNG_COMPILE_DISK
+unsigned lodepng_decode_file(unsigned char** out, unsigned* w, unsigned* h, const char* filename,
+                             LodePNGColorType colortype, unsigned bitdepth) {
+  unsigned char* buffer = 0;
+  size_t buffersize;
+  unsigned error;
+  /* safe output values in case error happens */
+  *out = 0;
+  *w = *h = 0;
+  error = lodepng_load_file(&buffer, &buffersize, filename);
+  if(!error) error = lodepng_decode_memory(out, w, h, buffer, buffersize, colortype, bitdepth);
+  lodepng_free(buffer);
+  return error;
+}
+
+unsigned lodepng_decode32_file(unsigned char** out, unsigned* w, unsigned* h, const char* filename) {
+  return lodepng_decode_file(out, w, h, filename, LCT_RGBA, 8);
+}
+
+unsigned lodepng_decode24_file(unsigned char** out, unsigned* w, unsigned* h, const char* filename) {
+  return lodepng_decode_file(out, w, h, filename, LCT_RGB, 8);
+}
+#endif /*LODEPNG_COMPILE_DISK*/
+
+void lodepng_decoder_settings_init(LodePNGDecoderSettings* settings) {
+  settings->color_convert = 1;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+  settings->read_text_chunks = 1;
+  settings->remember_unknown_chunks = 0;
+  settings->max_text_size = 16777216;
+  settings->max_icc_size = 16777216; /* 16MB is much more than enough for any reasonable ICC profile */
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+  settings->ignore_crc = 0;
+  settings->ignore_critical = 0;
+  settings->ignore_end = 0;
+  lodepng_decompress_settings_init(&settings->zlibsettings);
+}
+
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+#if defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER)
+
+void lodepng_state_init(LodePNGState* state) {
+#ifdef LODEPNG_COMPILE_DECODER
+  lodepng_decoder_settings_init(&state->decoder);
+#endif /*LODEPNG_COMPILE_DECODER*/
+#ifdef LODEPNG_COMPILE_ENCODER
+  lodepng_encoder_settings_init(&state->encoder);
+#endif /*LODEPNG_COMPILE_ENCODER*/
+  lodepng_color_mode_init(&state->info_raw);
+  lodepng_info_init(&state->info_png);
+  state->error = 1;
+}
+
+void lodepng_state_cleanup(LodePNGState* state) {
+  lodepng_color_mode_cleanup(&state->info_raw);
+  lodepng_info_cleanup(&state->info_png);
+}
+
+void lodepng_state_copy(LodePNGState* dest, const LodePNGState* source) {
+  lodepng_state_cleanup(dest);
+  *dest = *source;
+  lodepng_color_mode_init(&dest->info_raw);
+  lodepng_info_init(&dest->info_png);
+  dest->error = lodepng_color_mode_copy(&dest->info_raw, &source->info_raw); if(dest->error) return;
+  dest->error = lodepng_info_copy(&dest->info_png, &source->info_png); if(dest->error) return;
+}
+
+#endif /* defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER) */
+
+#ifdef LODEPNG_COMPILE_ENCODER
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / PNG Encoder                                                            / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+
+static unsigned writeSignature(ucvector* out) {
+  size_t pos = out->size;
+  const unsigned char signature[] = {137, 80, 78, 71, 13, 10, 26, 10};
+  /*8 bytes PNG signature, aka the magic bytes*/
+  if(!ucvector_resize(out, out->size + 8)) return 83; /*alloc fail*/
+  lodepng_memcpy(out->data + pos, signature, 8);
+  return 0;
+}
+
+static unsigned addChunk_IHDR(ucvector* out, unsigned w, unsigned h,
+                              LodePNGColorType colortype, unsigned bitdepth, unsigned interlace_method) {
+  unsigned char *chunk, *data;
+  CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, 13, "IHDR"));
+  data = chunk + 8;
+
+  lodepng_set32bitInt(data + 0, w); /*width*/
+  lodepng_set32bitInt(data + 4, h); /*height*/
+  data[8] = (unsigned char)bitdepth; /*bit depth*/
+  data[9] = (unsigned char)colortype; /*color type*/
+  data[10] = 0; /*compression method*/
+  data[11] = 0; /*filter method*/
+  data[12] = interlace_method; /*interlace method*/
+
+  lodepng_chunk_generate_crc(chunk);
+  return 0;
+}
+
+/* only adds the chunk if needed (there is a key or palette with alpha) */
+static unsigned addChunk_PLTE(ucvector* out, const LodePNGColorMode* info) {
+  unsigned char* chunk;
+  size_t i, j = 8;
+
+  CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, info->palettesize * 3, "PLTE"));
+
+  for(i = 0; i != info->palettesize; ++i) {
+    /*add all channels except alpha channel*/
+    chunk[j++] = info->palette[i * 4 + 0];
+    chunk[j++] = info->palette[i * 4 + 1];
+    chunk[j++] = info->palette[i * 4 + 2];
+  }
+
+  lodepng_chunk_generate_crc(chunk);
+  return 0;
+}
+
+static unsigned addChunk_tRNS(ucvector* out, const LodePNGColorMode* info) {
+  unsigned char* chunk = 0;
+
+  if(info->colortype == LCT_PALETTE) {
+    size_t i, amount = info->palettesize;
+    /*the tail of palette values that all have 255 as alpha, does not have to be encoded*/
+    for(i = info->palettesize; i != 0; --i) {
+      if(info->palette[4 * (i - 1) + 3] != 255) break;
+      --amount;
+    }
+    if(amount) {
+      CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, amount, "tRNS"));
+      /*add the alpha channel values from the palette*/
+      for(i = 0; i != amount; ++i) chunk[8 + i] = info->palette[4 * i + 3];
+    }
+  } else if(info->colortype == LCT_GREY) {
+    if(info->key_defined) {
+      CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, 2, "tRNS"));
+      chunk[8] = (unsigned char)(info->key_r >> 8);
+      chunk[9] = (unsigned char)(info->key_r & 255);
+    }
+  } else if(info->colortype == LCT_RGB) {
+    if(info->key_defined) {
+      CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, 6, "tRNS"));
+      chunk[8] = (unsigned char)(info->key_r >> 8);
+      chunk[9] = (unsigned char)(info->key_r & 255);
+      chunk[10] = (unsigned char)(info->key_g >> 8);
+      chunk[11] = (unsigned char)(info->key_g & 255);
+      chunk[12] = (unsigned char)(info->key_b >> 8);
+      chunk[13] = (unsigned char)(info->key_b & 255);
+    }
+  }
+
+  if(chunk) lodepng_chunk_generate_crc(chunk);
+  return 0;
+}
+
+static unsigned addChunk_IDAT(ucvector* out, const unsigned char* data, size_t datasize,
+                              LodePNGCompressSettings* zlibsettings) {
+  unsigned error = 0;
+  unsigned char* zlib = 0;
+  size_t zlibsize = 0;
+
+  error = zlib_compress(&zlib, &zlibsize, data, datasize, zlibsettings);
+  if(!error) {
+    error = lodepng_chunk_createv(out, zlibsize, "IDAT", zlib);
+  }
+  lodepng_free(zlib);
+  return error;
+}
+
+static unsigned addChunk_IEND(ucvector* out) {
+  return lodepng_chunk_createv(out, 0, "IEND", 0);
+}
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+
+static unsigned addChunk_tEXt(ucvector* out, const char* keyword, const char* textstring) {
+  unsigned char* chunk = 0;
+  size_t keysize = lodepng_strlen(keyword), textsize = lodepng_strlen(textstring);
+  size_t size = keysize + 1 + textsize;
+  if(keysize < 1 || keysize > 79) return 89; /*error: invalid keyword size*/
+  CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, size, "tEXt"));
+  lodepng_memcpy(chunk + 8, keyword, keysize);
+  chunk[8 + keysize] = 0; /*null termination char*/
+  lodepng_memcpy(chunk + 9 + keysize, textstring, textsize);
+  lodepng_chunk_generate_crc(chunk);
+  return 0;
+}
+
+static unsigned addChunk_zTXt(ucvector* out, const char* keyword, const char* textstring,
+                              LodePNGCompressSettings* zlibsettings) {
+  unsigned error = 0;
+  unsigned char* chunk = 0;
+  unsigned char* compressed = 0;
+  size_t compressedsize = 0;
+  size_t textsize = lodepng_strlen(textstring);
+  size_t keysize = lodepng_strlen(keyword);
+  if(keysize < 1 || keysize > 79) return 89; /*error: invalid keyword size*/
+
+  error = zlib_compress(&compressed, &compressedsize,
+                        (const unsigned char*)textstring, textsize, zlibsettings);
+  if(!error) {
+    size_t size = keysize + 2 + compressedsize;
+    error = lodepng_chunk_init(&chunk, out, size, "zTXt");
+  }
+  if(!error) {
+    lodepng_memcpy(chunk + 8, keyword, keysize);
+    chunk[8 + keysize] = 0; /*null termination char*/
+    chunk[9 + keysize] = 0; /*compression method: 0*/
+    lodepng_memcpy(chunk + 10 + keysize, compressed, compressedsize);
+    lodepng_chunk_generate_crc(chunk);
+  }
+
+  lodepng_free(compressed);
+  return error;
+}
+
+static unsigned addChunk_iTXt(ucvector* out, unsigned compress, const char* keyword, const char* langtag,
+                              const char* transkey, const char* textstring, LodePNGCompressSettings* zlibsettings) {
+  unsigned error = 0;
+  unsigned char* chunk = 0;
+  unsigned char* compressed = 0;
+  size_t compressedsize = 0;
+  size_t textsize = lodepng_strlen(textstring);
+  size_t keysize = lodepng_strlen(keyword), langsize = lodepng_strlen(langtag), transsize = lodepng_strlen(transkey);
+
+  if(keysize < 1 || keysize > 79) return 89; /*error: invalid keyword size*/
+
+  if(compress) {
+    error = zlib_compress(&compressed, &compressedsize,
+                          (const unsigned char*)textstring, textsize, zlibsettings);
+  }
+  if(!error) {
+    size_t size = keysize + 3 + langsize + 1 + transsize + 1 + (compress ? compressedsize : textsize);
+    error = lodepng_chunk_init(&chunk, out, size, "iTXt");
+  }
+  if(!error) {
+    size_t pos = 8;
+    lodepng_memcpy(chunk + pos, keyword, keysize);
+    pos += keysize;
+    chunk[pos++] = 0; /*null termination char*/
+    chunk[pos++] = (compress ? 1 : 0); /*compression flag*/
+    chunk[pos++] = 0; /*compression method: 0*/
+    lodepng_memcpy(chunk + pos, langtag, langsize);
+    pos += langsize;
+    chunk[pos++] = 0; /*null termination char*/
+    lodepng_memcpy(chunk + pos, transkey, transsize);
+    pos += transsize;
+    chunk[pos++] = 0; /*null termination char*/
+    if(compress) {
+      lodepng_memcpy(chunk + pos, compressed, compressedsize);
+    } else {
+      lodepng_memcpy(chunk + pos, textstring, textsize);
+    }
+    lodepng_chunk_generate_crc(chunk);
+  }
+
+  lodepng_free(compressed);
+  return error;
+}
+
+static unsigned addChunk_bKGD(ucvector* out, const LodePNGInfo* info) {
+  unsigned char* chunk = 0;
+  if(info->color.colortype == LCT_GREY || info->color.colortype == LCT_GREY_ALPHA) {
+    CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, 2, "bKGD"));
+    chunk[8] = (unsigned char)(info->background_r >> 8);
+    chunk[9] = (unsigned char)(info->background_r & 255);
+  } else if(info->color.colortype == LCT_RGB || info->color.colortype == LCT_RGBA) {
+    CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, 6, "bKGD"));
+    chunk[8] = (unsigned char)(info->background_r >> 8);
+    chunk[9] = (unsigned char)(info->background_r & 255);
+    chunk[10] = (unsigned char)(info->background_g >> 8);
+    chunk[11] = (unsigned char)(info->background_g & 255);
+    chunk[12] = (unsigned char)(info->background_b >> 8);
+    chunk[13] = (unsigned char)(info->background_b & 255);
+  } else if(info->color.colortype == LCT_PALETTE) {
+    CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, 1, "bKGD"));
+    chunk[8] = (unsigned char)(info->background_r & 255); /*palette index*/
+  }
+  if(chunk) lodepng_chunk_generate_crc(chunk);
+  return 0;
+}
+
+static unsigned addChunk_tIME(ucvector* out, const LodePNGTime* time) {
+  unsigned char* chunk;
+  CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, 7, "tIME"));
+  chunk[8] = (unsigned char)(time->year >> 8);
+  chunk[9] = (unsigned char)(time->year & 255);
+  chunk[10] = (unsigned char)time->month;
+  chunk[11] = (unsigned char)time->day;
+  chunk[12] = (unsigned char)time->hour;
+  chunk[13] = (unsigned char)time->minute;
+  chunk[14] = (unsigned char)time->second;
+  lodepng_chunk_generate_crc(chunk);
+  return 0;
+}
+
+static unsigned addChunk_pHYs(ucvector* out, const LodePNGInfo* info) {
+  unsigned char* chunk;
+  CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, 9, "pHYs"));
+  lodepng_set32bitInt(chunk + 8, info->phys_x);
+  lodepng_set32bitInt(chunk + 12, info->phys_y);
+  chunk[16] = info->phys_unit;
+  lodepng_chunk_generate_crc(chunk);
+  return 0;
+}
+
+static unsigned addChunk_gAMA(ucvector* out, const LodePNGInfo* info) {
+  unsigned char* chunk;
+  CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, 4, "gAMA"));
+  lodepng_set32bitInt(chunk + 8, info->gama_gamma);
+  lodepng_chunk_generate_crc(chunk);
+  return 0;
+}
+
+static unsigned addChunk_cHRM(ucvector* out, const LodePNGInfo* info) {
+  unsigned char* chunk;
+  CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, 32, "cHRM"));
+  lodepng_set32bitInt(chunk + 8, info->chrm_white_x);
+  lodepng_set32bitInt(chunk + 12, info->chrm_white_y);
+  lodepng_set32bitInt(chunk + 16, info->chrm_red_x);
+  lodepng_set32bitInt(chunk + 20, info->chrm_red_y);
+  lodepng_set32bitInt(chunk + 24, info->chrm_green_x);
+  lodepng_set32bitInt(chunk + 28, info->chrm_green_y);
+  lodepng_set32bitInt(chunk + 32, info->chrm_blue_x);
+  lodepng_set32bitInt(chunk + 36, info->chrm_blue_y);
+  lodepng_chunk_generate_crc(chunk);
+  return 0;
+}
+
+static unsigned addChunk_sRGB(ucvector* out, const LodePNGInfo* info) {
+  unsigned char data = info->srgb_intent;
+  return lodepng_chunk_createv(out, 1, "sRGB", &data);
+}
+
+static unsigned addChunk_iCCP(ucvector* out, const LodePNGInfo* info, LodePNGCompressSettings* zlibsettings) {
+  unsigned error = 0;
+  unsigned char* chunk = 0;
+  unsigned char* compressed = 0;
+  size_t compressedsize = 0;
+  size_t keysize = lodepng_strlen(info->iccp_name);
+
+  if(keysize < 1 || keysize > 79) return 89; /*error: invalid keyword size*/
+  error = zlib_compress(&compressed, &compressedsize,
+                        info->iccp_profile, info->iccp_profile_size, zlibsettings);
+  if(!error) {
+    size_t size = keysize + 2 + compressedsize;
+    error = lodepng_chunk_init(&chunk, out, size, "iCCP");
+  }
+  if(!error) {
+    lodepng_memcpy(chunk + 8, info->iccp_name, keysize);
+    chunk[8 + keysize] = 0; /*null termination char*/
+    chunk[9 + keysize] = 0; /*compression method: 0*/
+    lodepng_memcpy(chunk + 10 + keysize, compressed, compressedsize);
+    lodepng_chunk_generate_crc(chunk);
+  }
+
+  lodepng_free(compressed);
+  return error;
+}
+
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+
+static void filterScanline(unsigned char* out, const unsigned char* scanline, const unsigned char* prevline,
+                           size_t length, size_t bytewidth, unsigned char filterType) {
+  size_t i;
+  switch(filterType) {
+    case 0: /*None*/
+      for(i = 0; i != length; ++i) out[i] = scanline[i];
+      break;
+    case 1: /*Sub*/
+      for(i = 0; i != bytewidth; ++i) out[i] = scanline[i];
+      for(i = bytewidth; i < length; ++i) out[i] = scanline[i] - scanline[i - bytewidth];
+      break;
+    case 2: /*Up*/
+      if(prevline) {
+        for(i = 0; i != length; ++i) out[i] = scanline[i] - prevline[i];
+      } else {
+        for(i = 0; i != length; ++i) out[i] = scanline[i];
+      }
+      break;
+    case 3: /*Average*/
+      if(prevline) {
+        for(i = 0; i != bytewidth; ++i) out[i] = scanline[i] - (prevline[i] >> 1);
+        for(i = bytewidth; i < length; ++i) out[i] = scanline[i] - ((scanline[i - bytewidth] + prevline[i]) >> 1);
+      } else {
+        for(i = 0; i != bytewidth; ++i) out[i] = scanline[i];
+        for(i = bytewidth; i < length; ++i) out[i] = scanline[i] - (scanline[i - bytewidth] >> 1);
+      }
+      break;
+    case 4: /*Paeth*/
+      if(prevline) {
+        /*paethPredictor(0, prevline[i], 0) is always prevline[i]*/
+        for(i = 0; i != bytewidth; ++i) out[i] = (scanline[i] - prevline[i]);
+        for(i = bytewidth; i < length; ++i) {
+          out[i] = (scanline[i] - paethPredictor(scanline[i - bytewidth], prevline[i], prevline[i - bytewidth]));
+        }
+      } else {
+        for(i = 0; i != bytewidth; ++i) out[i] = scanline[i];
+        /*paethPredictor(scanline[i - bytewidth], 0, 0) is always scanline[i - bytewidth]*/
+        for(i = bytewidth; i < length; ++i) out[i] = (scanline[i] - scanline[i - bytewidth]);
+      }
+      break;
+    default: return; /*invalid filter type given*/
+  }
+}
+
+/* integer binary logarithm, max return value is 31 */
+static size_t ilog2(size_t i) {
+  size_t result = 0;
+  if(i >= 65536) { result += 16; i >>= 16; }
+  if(i >= 256) { result += 8; i >>= 8; }
+  if(i >= 16) { result += 4; i >>= 4; }
+  if(i >= 4) { result += 2; i >>= 2; }
+  if(i >= 2) { result += 1; /*i >>= 1;*/ }
+  return result;
+}
+
+/* integer approximation for i * log2(i), helper function for LFS_ENTROPY */
+static size_t ilog2i(size_t i) {
+  size_t l;
+  if(i == 0) return 0;
+  l = ilog2(i);
+  /* approximate i*log2(i): l is integer logarithm, ((i - (1u << l)) << 1u)
+  linearly approximates the missing fractional part multiplied by i */
+  return i * l + ((i - (1u << l)) << 1u);
+}
+
+static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, unsigned h,
+                       const LodePNGColorMode* color, const LodePNGEncoderSettings* settings) {
+  /*
+  For PNG filter method 0
+  out must be a buffer with as size: h + (w * h * bpp + 7u) / 8u, because there are
+  the scanlines with 1 extra byte per scanline
+  */
+
+  unsigned bpp = lodepng_get_bpp(color);
+  /*the width of a scanline in bytes, not including the filter type*/
+  size_t linebytes = lodepng_get_raw_size_idat(w, 1, bpp) - 1u;
+
+  /*bytewidth is used for filtering, is 1 when bpp < 8, number of bytes per pixel otherwise*/
+  size_t bytewidth = (bpp + 7u) / 8u;
+  const unsigned char* prevline = 0;
+  unsigned x, y;
+  unsigned error = 0;
+  LodePNGFilterStrategy strategy = settings->filter_strategy;
+
+  /*
+  There is a heuristic called the minimum sum of absolute differences heuristic, suggested by the PNG standard:
+   *  If the image type is Palette, or the bit depth is smaller than 8, then do not filter the image (i.e.
+      use fixed filtering, with the filter None).
+   * (The other case) If the image type is Grayscale or RGB (with or without Alpha), and the bit depth is
+     not smaller than 8, then use adaptive filtering heuristic as follows: independently for each row, apply
+     all five filters and select the filter that produces the smallest sum of absolute values per row.
+  This heuristic is used if filter strategy is LFS_MINSUM and filter_palette_zero is true.
+
+  If filter_palette_zero is true and filter_strategy is not LFS_MINSUM, the above heuristic is followed,
+  but for "the other case", whatever strategy filter_strategy is set to instead of the minimum sum
+  heuristic is used.
+  */
+  if(settings->filter_palette_zero &&
+     (color->colortype == LCT_PALETTE || color->bitdepth < 8)) strategy = LFS_ZERO;
+
+  if(bpp == 0) return 31; /*error: invalid color type*/
+
+  if(strategy >= LFS_ZERO && strategy <= LFS_FOUR) {
+    unsigned char type = (unsigned char)strategy;
+    for(y = 0; y != h; ++y) {
+      size_t outindex = (1 + linebytes) * y; /*the extra filterbyte added to each row*/
+      size_t inindex = linebytes * y;
+      out[outindex] = type; /*filter type byte*/
+      filterScanline(&out[outindex + 1], &in[inindex], prevline, linebytes, bytewidth, type);
+      prevline = &in[inindex];
+    }
+  } else if(strategy == LFS_MINSUM) {
+    /*adaptive filtering*/
+    unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/
+    size_t smallest = 0;
+    unsigned char type, bestType = 0;
+
+    for(type = 0; type != 5; ++type) {
+      attempt[type] = (unsigned char*)lodepng_malloc(linebytes);
+      if(!attempt[type]) error = 83; /*alloc fail*/
+    }
+
+    if(!error) {
+      for(y = 0; y != h; ++y) {
+        /*try the 5 filter types*/
+        for(type = 0; type != 5; ++type) {
+          size_t sum = 0;
+          filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type);
+
+          /*calculate the sum of the result*/
+          if(type == 0) {
+            for(x = 0; x != linebytes; ++x) sum += (unsigned char)(attempt[type][x]);
+          } else {
+            for(x = 0; x != linebytes; ++x) {
+              /*For differences, each byte should be treated as signed, values above 127 are negative
+              (converted to signed char). Filtertype 0 isn't a difference though, so use unsigned there.
+              This means filtertype 0 is almost never chosen, but that is justified.*/
+              unsigned char s = attempt[type][x];
+              sum += s < 128 ? s : (255U - s);
+            }
+          }
+
+          /*check if this is smallest sum (or if type == 0 it's the first case so always store the values)*/
+          if(type == 0 || sum < smallest) {
+            bestType = type;
+            smallest = sum;
+          }
+        }
+
+        prevline = &in[y * linebytes];
+
+        /*now fill the out values*/
+        out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/
+        for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x];
+      }
+    }
+
+    for(type = 0; type != 5; ++type) lodepng_free(attempt[type]);
+  } else if(strategy == LFS_ENTROPY) {
+    unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/
+    size_t bestSum = 0;
+    unsigned type, bestType = 0;
+    unsigned count[256];
+
+    for(type = 0; type != 5; ++type) {
+      attempt[type] = (unsigned char*)lodepng_malloc(linebytes);
+      if(!attempt[type]) error = 83; /*alloc fail*/
+    }
+
+    if(!error) {
+      for(y = 0; y != h; ++y) {
+        /*try the 5 filter types*/
+        for(type = 0; type != 5; ++type) {
+          size_t sum = 0;
+          filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type);
+          lodepng_memset(count, 0, 256 * sizeof(*count));
+          for(x = 0; x != linebytes; ++x) ++count[attempt[type][x]];
+          ++count[type]; /*the filter type itself is part of the scanline*/
+          for(x = 0; x != 256; ++x) {
+            sum += ilog2i(count[x]);
+          }
+          /*check if this is smallest sum (or if type == 0 it's the first case so always store the values)*/
+          if(type == 0 || sum > bestSum) {
+            bestType = type;
+            bestSum = sum;
+          }
+        }
+
+        prevline = &in[y * linebytes];
+
+        /*now fill the out values*/
+        out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/
+        for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x];
+      }
+    }
+
+    for(type = 0; type != 5; ++type) lodepng_free(attempt[type]);
+  } else if(strategy == LFS_PREDEFINED) {
+    for(y = 0; y != h; ++y) {
+      size_t outindex = (1 + linebytes) * y; /*the extra filterbyte added to each row*/
+      size_t inindex = linebytes * y;
+      unsigned char type = settings->predefined_filters[y];
+      out[outindex] = type; /*filter type byte*/
+      filterScanline(&out[outindex + 1], &in[inindex], prevline, linebytes, bytewidth, type);
+      prevline = &in[inindex];
+    }
+  } else if(strategy == LFS_BRUTE_FORCE) {
+    /*brute force filter chooser.
+    deflate the scanline after every filter attempt to see which one deflates best.
+    This is very slow and gives only slightly smaller, sometimes even larger, result*/
+    size_t size[5];
+    unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/
+    size_t smallest = 0;
+    unsigned type = 0, bestType = 0;
+    unsigned char* dummy;
+    LodePNGCompressSettings zlibsettings;
+    lodepng_memcpy(&zlibsettings, &settings->zlibsettings, sizeof(LodePNGCompressSettings));
+    /*use fixed tree on the attempts so that the tree is not adapted to the filtertype on purpose,
+    to simulate the true case where the tree is the same for the whole image. Sometimes it gives
+    better result with dynamic tree anyway. Using the fixed tree sometimes gives worse, but in rare
+    cases better compression. It does make this a bit less slow, so it's worth doing this.*/
+    zlibsettings.btype = 1;
+    /*a custom encoder likely doesn't read the btype setting and is optimized for complete PNG
+    images only, so disable it*/
+    zlibsettings.custom_zlib = 0;
+    zlibsettings.custom_deflate = 0;
+    for(type = 0; type != 5; ++type) {
+      attempt[type] = (unsigned char*)lodepng_malloc(linebytes);
+      if(!attempt[type]) error = 83; /*alloc fail*/
+    }
+    if(!error) {
+      for(y = 0; y != h; ++y) /*try the 5 filter types*/ {
+        for(type = 0; type != 5; ++type) {
+          unsigned testsize = (unsigned)linebytes;
+          /*if(testsize > 8) testsize /= 8;*/ /*it already works good enough by testing a part of the row*/
+
+          filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type);
+          size[type] = 0;
+          dummy = 0;
+          zlib_compress(&dummy, &size[type], attempt[type], testsize, &zlibsettings);
+          lodepng_free(dummy);
+          /*check if this is smallest size (or if type == 0 it's the first case so always store the values)*/
+          if(type == 0 || size[type] < smallest) {
+            bestType = type;
+            smallest = size[type];
+          }
+        }
+        prevline = &in[y * linebytes];
+        out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/
+        for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x];
+      }
+    }
+    for(type = 0; type != 5; ++type) lodepng_free(attempt[type]);
+  }
+  else return 88; /* unknown filter strategy */
+
+  return error;
+}
+
+static void addPaddingBits(unsigned char* out, const unsigned char* in,
+                           size_t olinebits, size_t ilinebits, unsigned h) {
+  /*The opposite of the removePaddingBits function
+  olinebits must be >= ilinebits*/
+  unsigned y;
+  size_t diff = olinebits - ilinebits;
+  size_t obp = 0, ibp = 0; /*bit pointers*/
+  for(y = 0; y != h; ++y) {
+    size_t x;
+    for(x = 0; x < ilinebits; ++x) {
+      unsigned char bit = readBitFromReversedStream(&ibp, in);
+      setBitOfReversedStream(&obp, out, bit);
+    }
+    /*obp += diff; --> no, fill in some value in the padding bits too, to avoid
+    "Use of uninitialised value of size ###" warning from valgrind*/
+    for(x = 0; x != diff; ++x) setBitOfReversedStream(&obp, out, 0);
+  }
+}
+
+/*
+in: non-interlaced image with size w*h
+out: the same pixels, but re-ordered according to PNG's Adam7 interlacing, with
+ no padding bits between scanlines, but between reduced images so that each
+ reduced image starts at a byte.
+bpp: bits per pixel
+there are no padding bits, not between scanlines, not between reduced images
+in has the following size in bits: w * h * bpp.
+out is possibly bigger due to padding bits between reduced images
+NOTE: comments about padding bits are only relevant if bpp < 8
+*/
+static void Adam7_interlace(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, unsigned bpp) {
+  unsigned passw[7], passh[7];
+  size_t filter_passstart[8], padded_passstart[8], passstart[8];
+  unsigned i;
+
+  Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp);
+
+  if(bpp >= 8) {
+    for(i = 0; i != 7; ++i) {
+      unsigned x, y, b;
+      size_t bytewidth = bpp / 8u;
+      for(y = 0; y < passh[i]; ++y)
+      for(x = 0; x < passw[i]; ++x) {
+        size_t pixelinstart = ((ADAM7_IY[i] + y * ADAM7_DY[i]) * w + ADAM7_IX[i] + x * ADAM7_DX[i]) * bytewidth;
+        size_t pixeloutstart = passstart[i] + (y * passw[i] + x) * bytewidth;
+        for(b = 0; b < bytewidth; ++b) {
+          out[pixeloutstart + b] = in[pixelinstart + b];
+        }
+      }
+    }
+  } else /*bpp < 8: Adam7 with pixels < 8 bit is a bit trickier: with bit pointers*/ {
+    for(i = 0; i != 7; ++i) {
+      unsigned x, y, b;
+      unsigned ilinebits = bpp * passw[i];
+      unsigned olinebits = bpp * w;
+      size_t obp, ibp; /*bit pointers (for out and in buffer)*/
+      for(y = 0; y < passh[i]; ++y)
+      for(x = 0; x < passw[i]; ++x) {
+        ibp = (ADAM7_IY[i] + y * ADAM7_DY[i]) * olinebits + (ADAM7_IX[i] + x * ADAM7_DX[i]) * bpp;
+        obp = (8 * passstart[i]) + (y * ilinebits + x * bpp);
+        for(b = 0; b < bpp; ++b) {
+          unsigned char bit = readBitFromReversedStream(&ibp, in);
+          setBitOfReversedStream(&obp, out, bit);
+        }
+      }
+    }
+  }
+}
+
+/*out must be buffer big enough to contain uncompressed IDAT chunk data, and in must contain the full image.
+return value is error**/
+static unsigned preProcessScanlines(unsigned char** out, size_t* outsize, const unsigned char* in,
+                                    unsigned w, unsigned h,
+                                    const LodePNGInfo* info_png, const LodePNGEncoderSettings* settings) {
+  /*
+  This function converts the pure 2D image with the PNG's colortype, into filtered-padded-interlaced data. Steps:
+  *) if no Adam7: 1) add padding bits (= possible extra bits per scanline if bpp < 8) 2) filter
+  *) if adam7: 1) Adam7_interlace 2) 7x add padding bits 3) 7x filter
+  */
+  unsigned bpp = lodepng_get_bpp(&info_png->color);
+  unsigned error = 0;
+
+  if(info_png->interlace_method == 0) {
+    *outsize = h + (h * ((w * bpp + 7u) / 8u)); /*image size plus an extra byte per scanline + possible padding bits*/
+    *out = (unsigned char*)lodepng_malloc(*outsize);
+    if(!(*out) && (*outsize)) error = 83; /*alloc fail*/
+
+    if(!error) {
+      /*non multiple of 8 bits per scanline, padding bits needed per scanline*/
+      if(bpp < 8 && w * bpp != ((w * bpp + 7u) / 8u) * 8u) {
+        unsigned char* padded = (unsigned char*)lodepng_malloc(h * ((w * bpp + 7u) / 8u));
+        if(!padded) error = 83; /*alloc fail*/
+        if(!error) {
+          addPaddingBits(padded, in, ((w * bpp + 7u) / 8u) * 8u, w * bpp, h);
+          error = filter(*out, padded, w, h, &info_png->color, settings);
+        }
+        lodepng_free(padded);
+      } else {
+        /*we can immediately filter into the out buffer, no other steps needed*/
+        error = filter(*out, in, w, h, &info_png->color, settings);
+      }
+    }
+  } else /*interlace_method is 1 (Adam7)*/ {
+    unsigned passw[7], passh[7];
+    size_t filter_passstart[8], padded_passstart[8], passstart[8];
+    unsigned char* adam7;
+
+    Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp);
+
+    *outsize = filter_passstart[7]; /*image size plus an extra byte per scanline + possible padding bits*/
+    *out = (unsigned char*)lodepng_malloc(*outsize);
+    if(!(*out)) error = 83; /*alloc fail*/
+
+    adam7 = (unsigned char*)lodepng_malloc(passstart[7]);
+    if(!adam7 && passstart[7]) error = 83; /*alloc fail*/
+
+    if(!error) {
+      unsigned i;
+
+      Adam7_interlace(adam7, in, w, h, bpp);
+      for(i = 0; i != 7; ++i) {
+        if(bpp < 8) {
+          unsigned char* padded = (unsigned char*)lodepng_malloc(padded_passstart[i + 1] - padded_passstart[i]);
+          if(!padded) ERROR_BREAK(83); /*alloc fail*/
+          addPaddingBits(padded, &adam7[passstart[i]],
+                         ((passw[i] * bpp + 7u) / 8u) * 8u, passw[i] * bpp, passh[i]);
+          error = filter(&(*out)[filter_passstart[i]], padded,
+                         passw[i], passh[i], &info_png->color, settings);
+          lodepng_free(padded);
+        } else {
+          error = filter(&(*out)[filter_passstart[i]], &adam7[padded_passstart[i]],
+                         passw[i], passh[i], &info_png->color, settings);
+        }
+
+        if(error) break;
+      }
+    }
+
+    lodepng_free(adam7);
+  }
+
+  return error;
+}
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+static unsigned addUnknownChunks(ucvector* out, unsigned char* data, size_t datasize) {
+  unsigned char* inchunk = data;
+  while((size_t)(inchunk - data) < datasize) {
+    CERROR_TRY_RETURN(lodepng_chunk_append(&out->data, &out->size, inchunk));
+    out->allocsize = out->size; /*fix the allocsize again*/
+    inchunk = lodepng_chunk_next(inchunk, data + datasize);
+  }
+  return 0;
+}
+
+static unsigned isGrayICCProfile(const unsigned char* profile, unsigned size) {
+  /*
+  It is a gray profile if bytes 16-19 are "GRAY", rgb profile if bytes 16-19
+  are "RGB ". We do not perform any full parsing of the ICC profile here, other
+  than check those 4 bytes to grayscale profile. Other than that, validity of
+  the profile is not checked. This is needed only because the PNG specification
+  requires using a non-gray color model if there is an ICC profile with "RGB "
+  (sadly limiting compression opportunities if the input data is grayscale RGB
+  data), and requires using a gray color model if it is "GRAY".
+  */
+  if(size < 20) return 0;
+  return profile[16] == 'G' &&  profile[17] == 'R' &&  profile[18] == 'A' &&  profile[19] == 'Y';
+}
+
+static unsigned isRGBICCProfile(const unsigned char* profile, unsigned size) {
+  /* See comment in isGrayICCProfile*/
+  if(size < 20) return 0;
+  return profile[16] == 'R' &&  profile[17] == 'G' &&  profile[18] == 'B' &&  profile[19] == ' ';
+}
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+
+unsigned lodepng_encode(unsigned char** out, size_t* outsize,
+                        const unsigned char* image, unsigned w, unsigned h,
+                        LodePNGState* state) {
+  unsigned char* data = 0; /*uncompressed version of the IDAT chunk data*/
+  size_t datasize = 0;
+  ucvector outv = ucvector_init(NULL, 0);
+  LodePNGInfo info;
+  const LodePNGInfo* info_png = &state->info_png;
+
+  lodepng_info_init(&info);
+
+  /*provide some proper output values if error will happen*/
+  *out = 0;
+  *outsize = 0;
+  state->error = 0;
+
+  /*check input values validity*/
+  if((info_png->color.colortype == LCT_PALETTE || state->encoder.force_palette)
+      && (info_png->color.palettesize == 0 || info_png->color.palettesize > 256)) {
+    state->error = 68; /*invalid palette size, it is only allowed to be 1-256*/
+    goto cleanup;
+  }
+  if(state->encoder.zlibsettings.btype > 2) {
+    state->error = 61; /*error: invalid btype*/
+    goto cleanup;
+  }
+  if(info_png->interlace_method > 1) {
+    state->error = 71; /*error: invalid interlace mode*/
+    goto cleanup;
+  }
+  state->error = checkColorValidity(info_png->color.colortype, info_png->color.bitdepth);
+  if(state->error) goto cleanup; /*error: invalid color type given*/
+  state->error = checkColorValidity(state->info_raw.colortype, state->info_raw.bitdepth);
+  if(state->error) goto cleanup; /*error: invalid color type given*/
+
+  /* color convert and compute scanline filter types */
+  lodepng_info_copy(&info, &state->info_png);
+  if(state->encoder.auto_convert) {
+    LodePNGColorStats stats;
+    lodepng_color_stats_init(&stats);
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+    if(info_png->iccp_defined &&
+        isGrayICCProfile(info_png->iccp_profile, info_png->iccp_profile_size)) {
+      /*the PNG specification does not allow to use palette with a GRAY ICC profile, even
+      if the palette has only gray colors, so disallow it.*/
+      stats.allow_palette = 0;
+    }
+    if(info_png->iccp_defined &&
+        isRGBICCProfile(info_png->iccp_profile, info_png->iccp_profile_size)) {
+      /*the PNG specification does not allow to use grayscale color with RGB ICC profile, so disallow gray.*/
+      stats.allow_greyscale = 0;
+    }
+#endif /* LODEPNG_COMPILE_ANCILLARY_CHUNKS */
+    state->error = lodepng_compute_color_stats(&stats, image, w, h, &state->info_raw);
+    if(state->error) goto cleanup;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+    if(info_png->background_defined) {
+      /*the background chunk's color must be taken into account as well*/
+      unsigned r = 0, g = 0, b = 0;
+      LodePNGColorMode mode16 = lodepng_color_mode_make(LCT_RGB, 16);
+      lodepng_convert_rgb(&r, &g, &b, info_png->background_r, info_png->background_g, info_png->background_b, &mode16, &info_png->color);
+      state->error = lodepng_color_stats_add(&stats, r, g, b, 65535);
+      if(state->error) goto cleanup;
+    }
+#endif /* LODEPNG_COMPILE_ANCILLARY_CHUNKS */
+    state->error = auto_choose_color(&info.color, &state->info_raw, &stats);
+    if(state->error) goto cleanup;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+    /*also convert the background chunk*/
+    if(info_png->background_defined) {
+      if(lodepng_convert_rgb(&info.background_r, &info.background_g, &info.background_b,
+          info_png->background_r, info_png->background_g, info_png->background_b, &info.color, &info_png->color)) {
+        state->error = 104;
+        goto cleanup;
+      }
+    }
+#endif /* LODEPNG_COMPILE_ANCILLARY_CHUNKS */
+  }
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+  if(info_png->iccp_defined) {
+    unsigned gray_icc = isGrayICCProfile(info_png->iccp_profile, info_png->iccp_profile_size);
+    unsigned rgb_icc = isRGBICCProfile(info_png->iccp_profile, info_png->iccp_profile_size);
+    unsigned gray_png = info.color.colortype == LCT_GREY || info.color.colortype == LCT_GREY_ALPHA;
+    if(!gray_icc && !rgb_icc) {
+      state->error = 100; /* Disallowed profile color type for PNG */
+      goto cleanup;
+    }
+    if(gray_icc != gray_png) {
+      /*Not allowed to use RGB/RGBA/palette with GRAY ICC profile or vice versa,
+      or in case of auto_convert, it wasn't possible to find appropriate model*/
+      state->error = state->encoder.auto_convert ? 102 : 101;
+      goto cleanup;
+    }
+  }
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+  if(!lodepng_color_mode_equal(&state->info_raw, &info.color)) {
+    unsigned char* converted;
+    size_t size = ((size_t)w * (size_t)h * (size_t)lodepng_get_bpp(&info.color) + 7u) / 8u;
+
+    converted = (unsigned char*)lodepng_malloc(size);
+    if(!converted && size) state->error = 83; /*alloc fail*/
+    if(!state->error) {
+      state->error = lodepng_convert(converted, image, &info.color, &state->info_raw, w, h);
+    }
+    if(!state->error) {
+      state->error = preProcessScanlines(&data, &datasize, converted, w, h, &info, &state->encoder);
+    }
+    lodepng_free(converted);
+    if(state->error) goto cleanup;
+  } else {
+    state->error = preProcessScanlines(&data, &datasize, image, w, h, &info, &state->encoder);
+    if(state->error) goto cleanup;
+  }
+
+  /* output all PNG chunks */ {
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+    size_t i;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+    /*write signature and chunks*/
+    state->error = writeSignature(&outv);
+    if(state->error) goto cleanup;
+    /*IHDR*/
+    state->error = addChunk_IHDR(&outv, w, h, info.color.colortype, info.color.bitdepth, info.interlace_method);
+    if(state->error) goto cleanup;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+    /*unknown chunks between IHDR and PLTE*/
+    if(info.unknown_chunks_data[0]) {
+      state->error = addUnknownChunks(&outv, info.unknown_chunks_data[0], info.unknown_chunks_size[0]);
+      if(state->error) goto cleanup;
+    }
+    /*color profile chunks must come before PLTE */
+    if(info.iccp_defined) {
+      state->error = addChunk_iCCP(&outv, &info, &state->encoder.zlibsettings);
+      if(state->error) goto cleanup;
+    }
+    if(info.srgb_defined) {
+      state->error = addChunk_sRGB(&outv, &info);
+      if(state->error) goto cleanup;
+    }
+    if(info.gama_defined) {
+      state->error = addChunk_gAMA(&outv, &info);
+      if(state->error) goto cleanup;
+    }
+    if(info.chrm_defined) {
+      state->error = addChunk_cHRM(&outv, &info);
+      if(state->error) goto cleanup;
+    }
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+    /*PLTE*/
+    if(info.color.colortype == LCT_PALETTE) {
+      state->error = addChunk_PLTE(&outv, &info.color);
+      if(state->error) goto cleanup;
+    }
+    if(state->encoder.force_palette && (info.color.colortype == LCT_RGB || info.color.colortype == LCT_RGBA)) {
+      /*force_palette means: write suggested palette for truecolor in PLTE chunk*/
+      state->error = addChunk_PLTE(&outv, &info.color);
+      if(state->error) goto cleanup;
+    }
+    /*tRNS (this will only add if when necessary) */
+    state->error = addChunk_tRNS(&outv, &info.color);
+    if(state->error) goto cleanup;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+    /*bKGD (must come between PLTE and the IDAt chunks*/
+    if(info.background_defined) {
+      state->error = addChunk_bKGD(&outv, &info);
+      if(state->error) goto cleanup;
+    }
+    /*pHYs (must come before the IDAT chunks)*/
+    if(info.phys_defined) {
+      state->error = addChunk_pHYs(&outv, &info);
+      if(state->error) goto cleanup;
+    }
+
+    /*unknown chunks between PLTE and IDAT*/
+    if(info.unknown_chunks_data[1]) {
+      state->error = addUnknownChunks(&outv, info.unknown_chunks_data[1], info.unknown_chunks_size[1]);
+      if(state->error) goto cleanup;
+    }
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+    /*IDAT (multiple IDAT chunks must be consecutive)*/
+    state->error = addChunk_IDAT(&outv, data, datasize, &state->encoder.zlibsettings);
+    if(state->error) goto cleanup;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+    /*tIME*/
+    if(info.time_defined) {
+      state->error = addChunk_tIME(&outv, &info.time);
+      if(state->error) goto cleanup;
+    }
+    /*tEXt and/or zTXt*/
+    for(i = 0; i != info.text_num; ++i) {
+      if(lodepng_strlen(info.text_keys[i]) > 79) {
+        state->error = 66; /*text chunk too large*/
+        goto cleanup;
+      }
+      if(lodepng_strlen(info.text_keys[i]) < 1) {
+        state->error = 67; /*text chunk too small*/
+        goto cleanup;
+      }
+      if(state->encoder.text_compression) {
+        state->error = addChunk_zTXt(&outv, info.text_keys[i], info.text_strings[i], &state->encoder.zlibsettings);
+        if(state->error) goto cleanup;
+      } else {
+        state->error = addChunk_tEXt(&outv, info.text_keys[i], info.text_strings[i]);
+        if(state->error) goto cleanup;
+      }
+    }
+    /*LodePNG version id in text chunk*/
+    if(state->encoder.add_id) {
+      unsigned already_added_id_text = 0;
+      for(i = 0; i != info.text_num; ++i) {
+        const char* k = info.text_keys[i];
+        /* Could use strcmp, but we're not calling or reimplementing this C library function for this use only */
+        if(k[0] == 'L' && k[1] == 'o' && k[2] == 'd' && k[3] == 'e' &&
+           k[4] == 'P' && k[5] == 'N' && k[6] == 'G' && k[7] == '\0') {
+          already_added_id_text = 1;
+          break;
+        }
+      }
+      if(already_added_id_text == 0) {
+        state->error = addChunk_tEXt(&outv, "LodePNG", LODEPNG_VERSION_STRING); /*it's shorter as tEXt than as zTXt chunk*/
+        if(state->error) goto cleanup;
+      }
+    }
+    /*iTXt*/
+    for(i = 0; i != info.itext_num; ++i) {
+      if(lodepng_strlen(info.itext_keys[i]) > 79) {
+        state->error = 66; /*text chunk too large*/
+        goto cleanup;
+      }
+      if(lodepng_strlen(info.itext_keys[i]) < 1) {
+        state->error = 67; /*text chunk too small*/
+        goto cleanup;
+      }
+      state->error = addChunk_iTXt(
+          &outv, state->encoder.text_compression,
+          info.itext_keys[i], info.itext_langtags[i], info.itext_transkeys[i], info.itext_strings[i],
+          &state->encoder.zlibsettings);
+      if(state->error) goto cleanup;
+    }
+
+    /*unknown chunks between IDAT and IEND*/
+    if(info.unknown_chunks_data[2]) {
+      state->error = addUnknownChunks(&outv, info.unknown_chunks_data[2], info.unknown_chunks_size[2]);
+      if(state->error) goto cleanup;
+    }
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+    state->error = addChunk_IEND(&outv);
+    if(state->error) goto cleanup;
+  }
+
+cleanup:
+  lodepng_info_cleanup(&info);
+  lodepng_free(data);
+
+  /*instead of cleaning the vector up, give it to the output*/
+  *out = outv.data;
+  *outsize = outv.size;
+
+  return state->error;
+}
+
+unsigned lodepng_encode_memory(unsigned char** out, size_t* outsize, const unsigned char* image,
+                               unsigned w, unsigned h, LodePNGColorType colortype, unsigned bitdepth) {
+  unsigned error;
+  LodePNGState state;
+  lodepng_state_init(&state);
+  state.info_raw.colortype = colortype;
+  state.info_raw.bitdepth = bitdepth;
+  state.info_png.color.colortype = colortype;
+  state.info_png.color.bitdepth = bitdepth;
+  lodepng_encode(out, outsize, image, w, h, &state);
+  error = state.error;
+  lodepng_state_cleanup(&state);
+  return error;
+}
+
+unsigned lodepng_encode32(unsigned char** out, size_t* outsize, const unsigned char* image, unsigned w, unsigned h) {
+  return lodepng_encode_memory(out, outsize, image, w, h, LCT_RGBA, 8);
+}
+
+unsigned lodepng_encode24(unsigned char** out, size_t* outsize, const unsigned char* image, unsigned w, unsigned h) {
+  return lodepng_encode_memory(out, outsize, image, w, h, LCT_RGB, 8);
+}
+
+#ifdef LODEPNG_COMPILE_DISK
+unsigned lodepng_encode_file(const char* filename, const unsigned char* image, unsigned w, unsigned h,
+                             LodePNGColorType colortype, unsigned bitdepth) {
+  unsigned char* buffer;
+  size_t buffersize;
+  unsigned error = lodepng_encode_memory(&buffer, &buffersize, image, w, h, colortype, bitdepth);
+  if(!error) error = lodepng_save_file(buffer, buffersize, filename);
+  lodepng_free(buffer);
+  return error;
+}
+
+unsigned lodepng_encode32_file(const char* filename, const unsigned char* image, unsigned w, unsigned h) {
+  return lodepng_encode_file(filename, image, w, h, LCT_RGBA, 8);
+}
+
+unsigned lodepng_encode24_file(const char* filename, const unsigned char* image, unsigned w, unsigned h) {
+  return lodepng_encode_file(filename, image, w, h, LCT_RGB, 8);
+}
+#endif /*LODEPNG_COMPILE_DISK*/
+
+void lodepng_encoder_settings_init(LodePNGEncoderSettings* settings) {
+  lodepng_compress_settings_init(&settings->zlibsettings);
+  settings->filter_palette_zero = 1;
+  settings->filter_strategy = LFS_MINSUM;
+  settings->auto_convert = 1;
+  settings->force_palette = 0;
+  settings->predefined_filters = 0;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+  settings->add_id = 0;
+  settings->text_compression = 1;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+}
+
+#endif /*LODEPNG_COMPILE_ENCODER*/
+#endif /*LODEPNG_COMPILE_PNG*/
+
+#ifdef LODEPNG_COMPILE_ERROR_TEXT
+/*
+This returns the description of a numerical error code in English. This is also
+the documentation of all the error codes.
+*/
+const char* lodepng_error_text(unsigned code) {
+  switch(code) {
+    case 0: return "no error, everything went ok";
+    case 1: return "nothing done yet"; /*the Encoder/Decoder has done nothing yet, error checking makes no sense yet*/
+    case 10: return "end of input memory reached without huffman end code"; /*while huffman decoding*/
+    case 11: return "error in code tree made it jump outside of huffman tree"; /*while huffman decoding*/
+    case 13: return "problem while processing dynamic deflate block";
+    case 14: return "problem while processing dynamic deflate block";
+    case 15: return "problem while processing dynamic deflate block";
+    /*this error could happen if there are only 0 or 1 symbols present in the huffman code:*/
+    case 16: return "invalid code while processing dynamic deflate block";
+    case 17: return "end of out buffer memory reached while inflating";
+    case 18: return "invalid distance code while inflating";
+    case 19: return "end of out buffer memory reached while inflating";
+    case 20: return "invalid deflate block BTYPE encountered while decoding";
+    case 21: return "NLEN is not ones complement of LEN in a deflate block";
+
+    /*end of out buffer memory reached while inflating:
+    This can happen if the inflated deflate data is longer than the amount of bytes required to fill up
+    all the pixels of the image, given the color depth and image dimensions. Something that doesn't
+    happen in a normal, well encoded, PNG image.*/
+    case 22: return "end of out buffer memory reached while inflating";
+    case 23: return "end of in buffer memory reached while inflating";
+    case 24: return "invalid FCHECK in zlib header";
+    case 25: return "invalid compression method in zlib header";
+    case 26: return "FDICT encountered in zlib header while it's not used for PNG";
+    case 27: return "PNG file is smaller than a PNG header";
+    /*Checks the magic file header, the first 8 bytes of the PNG file*/
+    case 28: return "incorrect PNG signature, it's no PNG or corrupted";
+    case 29: return "first chunk is not the header chunk";
+    case 30: return "chunk length too large, chunk broken off at end of file";
+    case 31: return "illegal PNG color type or bpp";
+    case 32: return "illegal PNG compression method";
+    case 33: return "illegal PNG filter method";
+    case 34: return "illegal PNG interlace method";
+    case 35: return "chunk length of a chunk is too large or the chunk too small";
+    case 36: return "illegal PNG filter type encountered";
+    case 37: return "illegal bit depth for this color type given";
+    case 38: return "the palette is too small or too big"; /*0, or more than 256 colors*/
+    case 39: return "tRNS chunk before PLTE or has more entries than palette size";
+    case 40: return "tRNS chunk has wrong size for grayscale image";
+    case 41: return "tRNS chunk has wrong size for RGB image";
+    case 42: return "tRNS chunk appeared while it was not allowed for this color type";
+    case 43: return "bKGD chunk has wrong size for palette image";
+    case 44: return "bKGD chunk has wrong size for grayscale image";
+    case 45: return "bKGD chunk has wrong size for RGB image";
+    case 48: return "empty input buffer given to decoder. Maybe caused by non-existing file?";
+    case 49: return "jumped past memory while generating dynamic huffman tree";
+    case 50: return "jumped past memory while generating dynamic huffman tree";
+    case 51: return "jumped past memory while inflating huffman block";
+    case 52: return "jumped past memory while inflating";
+    case 53: return "size of zlib data too small";
+    case 54: return "repeat symbol in tree while there was no value symbol yet";
+    /*jumped past tree while generating huffman tree, this could be when the
+    tree will have more leaves than symbols after generating it out of the
+    given lengths. They call this an oversubscribed dynamic bit lengths tree in zlib.*/
+    case 55: return "jumped past tree while generating huffman tree";
+    case 56: return "given output image colortype or bitdepth not supported for color conversion";
+    case 57: return "invalid CRC encountered (checking CRC can be disabled)";
+    case 58: return "invalid ADLER32 encountered (checking ADLER32 can be disabled)";
+    case 59: return "requested color conversion not supported";
+    case 60: return "invalid window size given in the settings of the encoder (must be 0-32768)";
+    case 61: return "invalid BTYPE given in the settings of the encoder (only 0, 1 and 2 are allowed)";
+    /*LodePNG leaves the choice of RGB to grayscale conversion formula to the user.*/
+    case 62: return "conversion from color to grayscale not supported";
+    /*(2^31-1)*/
+    case 63: return "length of a chunk too long, max allowed for PNG is 2147483647 bytes per chunk";
+    /*this would result in the inability of a deflated block to ever contain an end code. It must be at least 1.*/
+    case 64: return "the length of the END symbol 256 in the Huffman tree is 0";
+    case 66: return "the length of a text chunk keyword given to the encoder is longer than the maximum of 79 bytes";
+    case 67: return "the length of a text chunk keyword given to the encoder is smaller than the minimum of 1 byte";
+    case 68: return "tried to encode a PLTE chunk with a palette that has less than 1 or more than 256 colors";
+    case 69: return "unknown chunk type with 'critical' flag encountered by the decoder";
+    case 71: return "invalid interlace mode given to encoder (must be 0 or 1)";
+    case 72: return "while decoding, invalid compression method encountering in zTXt or iTXt chunk (it must be 0)";
+    case 73: return "invalid tIME chunk size";
+    case 74: return "invalid pHYs chunk size";
+    /*length could be wrong, or data chopped off*/
+    case 75: return "no null termination char found while decoding text chunk";
+    case 76: return "iTXt chunk too short to contain required bytes";
+    case 77: return "integer overflow in buffer size";
+    case 78: return "failed to open file for reading"; /*file doesn't exist or couldn't be opened for reading*/
+    case 79: return "failed to open file for writing";
+    case 80: return "tried creating a tree of 0 symbols";
+    case 81: return "lazy matching at pos 0 is impossible";
+    case 82: return "color conversion to palette requested while a color isn't in palette, or index out of bounds";
+    case 83: return "memory allocation failed";
+    case 84: return "given image too small to contain all pixels to be encoded";
+    case 86: return "impossible offset in lz77 encoding (internal bug)";
+    case 87: return "must provide custom zlib function pointer if LODEPNG_COMPILE_ZLIB is not defined";
+    case 88: return "invalid filter strategy given for LodePNGEncoderSettings.filter_strategy";
+    case 89: return "text chunk keyword too short or long: must have size 1-79";
+    /*the windowsize in the LodePNGCompressSettings. Requiring POT(==> & instead of %) makes encoding 12% faster.*/
+    case 90: return "windowsize must be a power of two";
+    case 91: return "invalid decompressed idat size";
+    case 92: return "integer overflow due to too many pixels";
+    case 93: return "zero width or height is invalid";
+    case 94: return "header chunk must have a size of 13 bytes";
+    case 95: return "integer overflow with combined idat chunk size";
+    case 96: return "invalid gAMA chunk size";
+    case 97: return "invalid cHRM chunk size";
+    case 98: return "invalid sRGB chunk size";
+    case 99: return "invalid sRGB rendering intent";
+    case 100: return "invalid ICC profile color type, the PNG specification only allows RGB or GRAY";
+    case 101: return "PNG specification does not allow RGB ICC profile on gray color types and vice versa";
+    case 102: return "not allowed to set grayscale ICC profile with colored pixels by PNG specification";
+    case 103: return "invalid palette index in bKGD chunk. Maybe it came before PLTE chunk?";
+    case 104: return "invalid bKGD color while encoding (e.g. palette index out of range)";
+    case 105: return "integer overflow of bitsize";
+    case 106: return "PNG file must have PLTE chunk if color type is palette";
+    case 107: return "color convert from palette mode requested without setting the palette data in it";
+    case 108: return "tried to add more than 256 values to a palette";
+    /*this limit can be configured in LodePNGDecompressSettings*/
+    case 109: return "tried to decompress zlib or deflate data larger than desired max_output_size";
+    case 110: return "custom zlib or inflate decompression failed";
+    case 111: return "custom zlib or deflate compression failed";
+    /*max text size limit can be configured in LodePNGDecoderSettings. This error prevents
+    unreasonable memory consumption when decoding due to impossibly large text sizes.*/
+    case 112: return "compressed text unreasonably large";
+    /*max ICC size limit can be configured in LodePNGDecoderSettings. This error prevents
+    unreasonable memory consumption when decoding due to impossibly large ICC profile*/
+    case 113: return "ICC profile unreasonably large";
+  }
+  return "unknown error code";
+}
+#endif /*LODEPNG_COMPILE_ERROR_TEXT*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* // C++ Wrapper                                                          // */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#ifdef LODEPNG_COMPILE_CPP
+namespace lodepng {
+
+#ifdef LODEPNG_COMPILE_DISK
+unsigned load_file(std::vector<unsigned char>& buffer, const std::string& filename) {
+  long size = lodepng_filesize(filename.c_str());
+  if(size < 0) return 78;
+  buffer.resize((size_t)size);
+  return size == 0 ? 0 : lodepng_buffer_file(&buffer[0], (size_t)size, filename.c_str());
+}
+
+/*write given buffer to the file, overwriting the file, it doesn't append to it.*/
+unsigned save_file(const std::vector<unsigned char>& buffer, const std::string& filename) {
+  return lodepng_save_file(buffer.empty() ? 0 : &buffer[0], buffer.size(), filename.c_str());
+}
+#endif /* LODEPNG_COMPILE_DISK */
+
+#ifdef LODEPNG_COMPILE_ZLIB
+#ifdef LODEPNG_COMPILE_DECODER
+unsigned decompress(std::vector<unsigned char>& out, const unsigned char* in, size_t insize,
+                    const LodePNGDecompressSettings& settings) {
+  unsigned char* buffer = 0;
+  size_t buffersize = 0;
+  unsigned error = zlib_decompress(&buffer, &buffersize, 0, in, insize, &settings);
+  if(buffer) {
+    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
+    lodepng_free(buffer);
+  }
+  return error;
+}
+
+unsigned decompress(std::vector<unsigned char>& out, const std::vector<unsigned char>& in,
+                    const LodePNGDecompressSettings& settings) {
+  return decompress(out, in.empty() ? 0 : &in[0], in.size(), settings);
+}
+#endif /* LODEPNG_COMPILE_DECODER */
+
+#ifdef LODEPNG_COMPILE_ENCODER
+unsigned compress(std::vector<unsigned char>& out, const unsigned char* in, size_t insize,
+                  const LodePNGCompressSettings& settings) {
+  unsigned char* buffer = 0;
+  size_t buffersize = 0;
+  unsigned error = zlib_compress(&buffer, &buffersize, in, insize, &settings);
+  if(buffer) {
+    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
+    lodepng_free(buffer);
+  }
+  return error;
+}
+
+unsigned compress(std::vector<unsigned char>& out, const std::vector<unsigned char>& in,
+                  const LodePNGCompressSettings& settings) {
+  return compress(out, in.empty() ? 0 : &in[0], in.size(), settings);
+}
+#endif /* LODEPNG_COMPILE_ENCODER */
+#endif /* LODEPNG_COMPILE_ZLIB */
+
+
+#ifdef LODEPNG_COMPILE_PNG
+
+State::State() {
+  lodepng_state_init(this);
+}
+
+State::State(const State& other) {
+  lodepng_state_init(this);
+  lodepng_state_copy(this, &other);
+}
+
+State::~State() {
+  lodepng_state_cleanup(this);
+}
+
+State& State::operator=(const State& other) {
+  lodepng_state_copy(this, &other);
+  return *this;
+}
+
+#ifdef LODEPNG_COMPILE_DECODER
+
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h, const unsigned char* in,
+                size_t insize, LodePNGColorType colortype, unsigned bitdepth) {
+  unsigned char* buffer = 0;
+  unsigned error = lodepng_decode_memory(&buffer, &w, &h, in, insize, colortype, bitdepth);
+  if(buffer && !error) {
+    State state;
+    state.info_raw.colortype = colortype;
+    state.info_raw.bitdepth = bitdepth;
+    size_t buffersize = lodepng_get_raw_size(w, h, &state.info_raw);
+    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
+  }
+  lodepng_free(buffer);
+  return error;
+}
+
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+                const std::vector<unsigned char>& in, LodePNGColorType colortype, unsigned bitdepth) {
+  return decode(out, w, h, in.empty() ? 0 : &in[0], (unsigned)in.size(), colortype, bitdepth);
+}
+
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+                State& state,
+                const unsigned char* in, size_t insize) {
+  unsigned char* buffer = NULL;
+  unsigned error = lodepng_decode(&buffer, &w, &h, &state, in, insize);
+  if(buffer && !error) {
+    size_t buffersize = lodepng_get_raw_size(w, h, &state.info_raw);
+    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
+  }
+  lodepng_free(buffer);
+  return error;
+}
+
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+                State& state,
+                const std::vector<unsigned char>& in) {
+  return decode(out, w, h, state, in.empty() ? 0 : &in[0], in.size());
+}
+
+#ifdef LODEPNG_COMPILE_DISK
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h, const std::string& filename,
+                LodePNGColorType colortype, unsigned bitdepth) {
+  std::vector<unsigned char> buffer;
+  /* safe output values in case error happens */
+  w = h = 0;
+  unsigned error = load_file(buffer, filename);
+  if(error) return error;
+  return decode(out, w, h, buffer, colortype, bitdepth);
+}
+#endif /* LODEPNG_COMPILE_DECODER */
+#endif /* LODEPNG_COMPILE_DISK */
+
+#ifdef LODEPNG_COMPILE_ENCODER
+unsigned encode(std::vector<unsigned char>& out, const unsigned char* in, unsigned w, unsigned h,
+                LodePNGColorType colortype, unsigned bitdepth) {
+  unsigned char* buffer;
+  size_t buffersize;
+  unsigned error = lodepng_encode_memory(&buffer, &buffersize, in, w, h, colortype, bitdepth);
+  if(buffer) {
+    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
+    lodepng_free(buffer);
+  }
+  return error;
+}
+
+unsigned encode(std::vector<unsigned char>& out,
+                const std::vector<unsigned char>& in, unsigned w, unsigned h,
+                LodePNGColorType colortype, unsigned bitdepth) {
+  if(lodepng_get_raw_size_lct(w, h, colortype, bitdepth) > in.size()) return 84;
+  return encode(out, in.empty() ? 0 : &in[0], w, h, colortype, bitdepth);
+}
+
+unsigned encode(std::vector<unsigned char>& out,
+                const unsigned char* in, unsigned w, unsigned h,
+                State& state) {
+  unsigned char* buffer;
+  size_t buffersize;
+  unsigned error = lodepng_encode(&buffer, &buffersize, in, w, h, &state);
+  if(buffer) {
+    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
+    lodepng_free(buffer);
+  }
+  return error;
+}
+
+unsigned encode(std::vector<unsigned char>& out,
+                const std::vector<unsigned char>& in, unsigned w, unsigned h,
+                State& state) {
+  if(lodepng_get_raw_size(w, h, &state.info_raw) > in.size()) return 84;
+  return encode(out, in.empty() ? 0 : &in[0], w, h, state);
+}
+
+#ifdef LODEPNG_COMPILE_DISK
+unsigned encode(const std::string& filename,
+                const unsigned char* in, unsigned w, unsigned h,
+                LodePNGColorType colortype, unsigned bitdepth) {
+  std::vector<unsigned char> buffer;
+  unsigned error = encode(buffer, in, w, h, colortype, bitdepth);
+  if(!error) error = save_file(buffer, filename);
+  return error;
+}
+
+unsigned encode(const std::string& filename,
+                const std::vector<unsigned char>& in, unsigned w, unsigned h,
+                LodePNGColorType colortype, unsigned bitdepth) {
+  if(lodepng_get_raw_size_lct(w, h, colortype, bitdepth) > in.size()) return 84;
+  return encode(filename, in.empty() ? 0 : &in[0], w, h, colortype, bitdepth);
+}
+#endif /* LODEPNG_COMPILE_DISK */
+#endif /* LODEPNG_COMPILE_ENCODER */
+#endif /* LODEPNG_COMPILE_PNG */
+} /* namespace lodepng */
+#endif /*LODEPNG_COMPILE_CPP*/
diff --git a/libkram/lodepng/lodepng.h b/libkram/lodepng/lodepng.h
index 1f0bdfd8..6801cb78 100644
--- a/libkram/lodepng/lodepng.h
+++ b/libkram/lodepng/lodepng.h
@@ -1,1761 +1,1977 @@
-/*
-LodePNG version 20160124
-
-Copyright (c) 2005-2016 Lode Vandevenne
-
-This software is provided 'as-is', without any express or implied
-warranty. In no event will the authors be held liable for any damages
-arising from the use of this software.
-
-Permission is granted to anyone to use this software for any purpose,
-including commercial applications, and to alter it and redistribute it
-freely, subject to the following restrictions:
-
-    1. The origin of this software must not be misrepresented; you must not
-    claim that you wrote the original software. If you use this software
-    in a product, an acknowledgment in the product documentation would be
-    appreciated but is not required.
-
-    2. Altered source versions must be plainly marked as such, and must not be
-    misrepresented as being the original software.
-
-    3. This notice may not be removed or altered from any source
-    distribution.
-*/
-
-#ifndef LODEPNG_H
-#define LODEPNG_H
-
-#include <string.h> /*for size_t*/
-
-extern const char* LODEPNG_VERSION_STRING;
-
-// TODO: move to Cmake
-#define LODEPNG_NO_COMPILE_ENCODER
-#define LODEPNG_NO_COMPILE_DISK
-#define LODEPNG_NO_COMPILE_ANCILLARY_CHUNKS
-
-/*
-The following #defines are used to create code sections. They can be disabled
-to disable code sections, which can give faster compile time and smaller binary.
-The "NO_COMPILE" defines are designed to be used to pass as defines to the
-compiler command to disable them without modifying this header, e.g.
--DLODEPNG_NO_COMPILE_ZLIB for gcc.
-In addition to those below, you can also define LODEPNG_NO_COMPILE_CRC to
-allow implementing a custom lodepng_crc32.
-*/
-/*deflate & zlib. If disabled, you must specify alternative zlib functions in
-the custom_zlib field of the compress and decompress settings*/
-#ifndef LODEPNG_NO_COMPILE_ZLIB
-#define LODEPNG_COMPILE_ZLIB
-#endif
-/*png encoder and png decoder*/
-#ifndef LODEPNG_NO_COMPILE_PNG
-#define LODEPNG_COMPILE_PNG
-#endif
-/*deflate&zlib decoder and png decoder*/
-#ifndef LODEPNG_NO_COMPILE_DECODER
-#define LODEPNG_COMPILE_DECODER
-#endif
-/*deflate&zlib encoder and png encoder*/
-#ifndef LODEPNG_NO_COMPILE_ENCODER
-#define LODEPNG_COMPILE_ENCODER
-#endif
-/*the optional built in harddisk file loading and saving functions*/
-#ifndef LODEPNG_NO_COMPILE_DISK
-#define LODEPNG_COMPILE_DISK
-#endif
-/*support for chunks other than IHDR, IDAT, PLTE, tRNS, IEND: ancillary and unknown chunks*/
-#ifndef LODEPNG_NO_COMPILE_ANCILLARY_CHUNKS
-#define LODEPNG_COMPILE_ANCILLARY_CHUNKS
-#endif
-/*ability to convert error numerical codes to English text string*/
-#ifndef LODEPNG_NO_COMPILE_ERROR_TEXT
-#define LODEPNG_COMPILE_ERROR_TEXT
-#endif
-/*Compile the default allocators (C's free, malloc and realloc). If you disable this,
-you can define the functions lodepng_free, lodepng_malloc and lodepng_realloc in your
-source files with custom allocators.*/
-#ifndef LODEPNG_NO_COMPILE_ALLOCATORS
-#define LODEPNG_COMPILE_ALLOCATORS
-#endif
-/*compile the C++ version (you can disable the C++ wrapper here even when compiling for C++)*/
-#ifdef __cplusplus
-#ifndef LODEPNG_NO_COMPILE_CPP
-#define LODEPNG_COMPILE_CPP
-#endif
-#endif
-
-#ifdef LODEPNG_COMPILE_CPP
-#include <vector>
-#include <string>
-#endif /*LODEPNG_COMPILE_CPP*/
-
-#ifdef LODEPNG_COMPILE_PNG
-/*The PNG color types (also used for raw).*/
-typedef enum LodePNGColorType
-{
-  LCT_GREY = 0, /*greyscale: 1,2,4,8,16 bit*/
-  LCT_RGB = 2, /*RGB: 8,16 bit*/
-  LCT_PALETTE = 3, /*palette: 1,2,4,8 bit*/
-  LCT_GREY_ALPHA = 4, /*greyscale with alpha: 8,16 bit*/
-  LCT_RGBA = 6 /*RGB with alpha: 8,16 bit*/
-} LodePNGColorType;
-
-#ifdef LODEPNG_COMPILE_DECODER
-/*
-Converts PNG data in memory to raw pixel data.
-out: Output parameter. Pointer to buffer that will contain the raw pixel data.
-     After decoding, its size is w * h * (bytes per pixel) bytes larger than
-     initially. Bytes per pixel depends on colortype and bitdepth.
-     Must be freed after usage with free(*out).
-     Note: for 16-bit per channel colors, uses big endian format like PNG does.
-w: Output parameter. Pointer to width of pixel data.
-h: Output parameter. Pointer to height of pixel data.
-in: Memory buffer with the PNG file.
-insize: size of the in buffer.
-colortype: the desired color type for the raw output image. See explanation on PNG color types.
-bitdepth: the desired bit depth for the raw output image. See explanation on PNG color types.
-Return value: LodePNG error code (0 means no error).
-*/
-unsigned lodepng_decode_memory(unsigned char** out, unsigned* w, unsigned* h,
-                               const unsigned char* in, size_t insize,
-                               LodePNGColorType colortype, unsigned bitdepth);
-
-/*Same as lodepng_decode_memory, but always decodes to 32-bit RGBA raw image*/
-unsigned lodepng_decode32(unsigned char** out, unsigned* w, unsigned* h,
-                          const unsigned char* in, size_t insize);
-
-/*Same as lodepng_decode_memory, but always decodes to 24-bit RGB raw image*/
-unsigned lodepng_decode24(unsigned char** out, unsigned* w, unsigned* h,
-                          const unsigned char* in, size_t insize);
-
-#ifdef LODEPNG_COMPILE_DISK
-/*
-Load PNG from disk, from file with given name.
-Same as the other decode functions, but instead takes a filename as input.
-*/
-unsigned lodepng_decode_file(unsigned char** out, unsigned* w, unsigned* h,
-                             const char* filename,
-                             LodePNGColorType colortype, unsigned bitdepth);
-
-/*Same as lodepng_decode_file, but always decodes to 32-bit RGBA raw image.*/
-unsigned lodepng_decode32_file(unsigned char** out, unsigned* w, unsigned* h,
-                               const char* filename);
-
-/*Same as lodepng_decode_file, but always decodes to 24-bit RGB raw image.*/
-unsigned lodepng_decode24_file(unsigned char** out, unsigned* w, unsigned* h,
-                               const char* filename);
-#endif /*LODEPNG_COMPILE_DISK*/
-#endif /*LODEPNG_COMPILE_DECODER*/
-
-
-#ifdef LODEPNG_COMPILE_ENCODER
-/*
-Converts raw pixel data into a PNG image in memory. The colortype and bitdepth
-  of the output PNG image cannot be chosen, they are automatically determined
-  by the colortype, bitdepth and content of the input pixel data.
-  Note: for 16-bit per channel colors, needs big endian format like PNG does.
-out: Output parameter. Pointer to buffer that will contain the PNG image data.
-     Must be freed after usage with free(*out).
-outsize: Output parameter. Pointer to the size in bytes of the out buffer.
-image: The raw pixel data to encode. The size of this buffer should be
-       w * h * (bytes per pixel), bytes per pixel depends on colortype and bitdepth.
-w: width of the raw pixel data in pixels.
-h: height of the raw pixel data in pixels.
-colortype: the color type of the raw input image. See explanation on PNG color types.
-bitdepth: the bit depth of the raw input image. See explanation on PNG color types.
-Return value: LodePNG error code (0 means no error).
-*/
-unsigned lodepng_encode_memory(unsigned char** out, size_t* outsize,
-                               const unsigned char* image, unsigned w, unsigned h,
-                               LodePNGColorType colortype, unsigned bitdepth);
-
-/*Same as lodepng_encode_memory, but always encodes from 32-bit RGBA raw image.*/
-unsigned lodepng_encode32(unsigned char** out, size_t* outsize,
-                          const unsigned char* image, unsigned w, unsigned h);
-
-/*Same as lodepng_encode_memory, but always encodes from 24-bit RGB raw image.*/
-unsigned lodepng_encode24(unsigned char** out, size_t* outsize,
-                          const unsigned char* image, unsigned w, unsigned h);
-
-#ifdef LODEPNG_COMPILE_DISK
-/*
-Converts raw pixel data into a PNG file on disk.
-Same as the other encode functions, but instead takes a filename as output.
-NOTE: This overwrites existing files without warning!
-*/
-unsigned lodepng_encode_file(const char* filename,
-                             const unsigned char* image, unsigned w, unsigned h,
-                             LodePNGColorType colortype, unsigned bitdepth);
-
-/*Same as lodepng_encode_file, but always encodes from 32-bit RGBA raw image.*/
-unsigned lodepng_encode32_file(const char* filename,
-                               const unsigned char* image, unsigned w, unsigned h);
-
-/*Same as lodepng_encode_file, but always encodes from 24-bit RGB raw image.*/
-unsigned lodepng_encode24_file(const char* filename,
-                               const unsigned char* image, unsigned w, unsigned h);
-#endif /*LODEPNG_COMPILE_DISK*/
-#endif /*LODEPNG_COMPILE_ENCODER*/
-
-
-#ifdef LODEPNG_COMPILE_CPP
-namespace lodepng
-{
-#ifdef LODEPNG_COMPILE_DECODER
-/*Same as lodepng_decode_memory, but decodes to an std::vector. The colortype
-is the format to output the pixels to. Default is RGBA 8-bit per channel.*/
-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
-                const unsigned char* in, size_t insize,
-                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
-                const std::vector<unsigned char>& in,
-                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
-#ifdef LODEPNG_COMPILE_DISK
-/*
-Converts PNG file from disk to raw pixel data in memory.
-Same as the other decode functions, but instead takes a filename as input.
-*/
-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
-                const std::string& filename,
-                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
-#endif /* LODEPNG_COMPILE_DISK */
-#endif /* LODEPNG_COMPILE_DECODER */
-
-#ifdef LODEPNG_COMPILE_ENCODER
-/*Same as lodepng_encode_memory, but encodes to an std::vector. colortype
-is that of the raw input data. The output PNG color type will be auto chosen.*/
-unsigned encode(std::vector<unsigned char>& out,
-                const unsigned char* in, unsigned w, unsigned h,
-                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
-unsigned encode(std::vector<unsigned char>& out,
-                const std::vector<unsigned char>& in, unsigned w, unsigned h,
-                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
-#ifdef LODEPNG_COMPILE_DISK
-/*
-Converts 32-bit RGBA raw pixel data into a PNG file on disk.
-Same as the other encode functions, but instead takes a filename as output.
-NOTE: This overwrites existing files without warning!
-*/
-unsigned encode(const std::string& filename,
-                const unsigned char* in, unsigned w, unsigned h,
-                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
-unsigned encode(const std::string& filename,
-                const std::vector<unsigned char>& in, unsigned w, unsigned h,
-                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
-#endif /* LODEPNG_COMPILE_DISK */
-#endif /* LODEPNG_COMPILE_ENCODER */
-} /* namespace lodepng */
-#endif /*LODEPNG_COMPILE_CPP*/
-#endif /*LODEPNG_COMPILE_PNG*/
-
-#ifdef LODEPNG_COMPILE_ERROR_TEXT
-/*Returns an English description of the numerical error code.*/
-const char* lodepng_error_text(unsigned code);
-#endif /*LODEPNG_COMPILE_ERROR_TEXT*/
-
-#ifdef LODEPNG_COMPILE_DECODER
-/*Settings for zlib decompression*/
-typedef struct LodePNGDecompressSettings LodePNGDecompressSettings;
-struct LodePNGDecompressSettings
-{
-  unsigned ignore_adler32; /*if 1, continue and don't give an error message if the Adler32 checksum is corrupted*/
-
-  /*use custom zlib decoder instead of built in one (default: null)*/
-  unsigned (*custom_zlib)(unsigned char**, size_t*,
-                          const unsigned char*, size_t,
-                          const LodePNGDecompressSettings*);
-  /*use custom deflate decoder instead of built in one (default: null)
-  if custom_zlib is used, custom_deflate is ignored since only the built in
-  zlib function will call custom_deflate*/
-  unsigned (*custom_inflate)(unsigned char**, size_t*,
-                             const unsigned char*, size_t,
-                             const LodePNGDecompressSettings*);
-
-  const void* custom_context; /*optional custom settings for custom functions*/
-};
-
-extern const LodePNGDecompressSettings lodepng_default_decompress_settings;
-void lodepng_decompress_settings_init(LodePNGDecompressSettings* settings);
-#endif /*LODEPNG_COMPILE_DECODER*/
-
-#ifdef LODEPNG_COMPILE_ENCODER
-/*
-Settings for zlib compression. Tweaking these settings tweaks the balance
-between speed and compression ratio.
-*/
-typedef struct LodePNGCompressSettings LodePNGCompressSettings;
-struct LodePNGCompressSettings /*deflate = compress*/
-{
-  /*LZ77 related settings*/
-  unsigned btype; /*the block type for LZ (0, 1, 2 or 3, see zlib standard). Should be 2 for proper compression.*/
-  unsigned use_lz77; /*whether or not to use LZ77. Should be 1 for proper compression.*/
-  unsigned windowsize; /*must be a power of two <= 32768. higher compresses more but is slower. Default value: 2048.*/
-  unsigned minmatch; /*mininum lz77 length. 3 is normally best, 6 can be better for some PNGs. Default: 0*/
-  unsigned nicematch; /*stop searching if >= this length found. Set to 258 for best compression. Default: 128*/
-  unsigned lazymatching; /*use lazy matching: better compression but a bit slower. Default: true*/
-
-  /*use custom zlib encoder instead of built in one (default: null)*/
-  unsigned (*custom_zlib)(unsigned char**, size_t*,
-                          const unsigned char*, size_t,
-                          const LodePNGCompressSettings*);
-  /*use custom deflate encoder instead of built in one (default: null)
-  if custom_zlib is used, custom_deflate is ignored since only the built in
-  zlib function will call custom_deflate*/
-  unsigned (*custom_deflate)(unsigned char**, size_t*,
-                             const unsigned char*, size_t,
-                             const LodePNGCompressSettings*);
-
-  const void* custom_context; /*optional custom settings for custom functions*/
-};
-
-extern const LodePNGCompressSettings lodepng_default_compress_settings;
-void lodepng_compress_settings_init(LodePNGCompressSettings* settings);
-#endif /*LODEPNG_COMPILE_ENCODER*/
-
-#ifdef LODEPNG_COMPILE_PNG
-/*
-Color mode of an image. Contains all information required to decode the pixel
-bits to RGBA colors. This information is the same as used in the PNG file
-format, and is used both for PNG and raw image data in LodePNG.
-*/
-typedef struct LodePNGColorMode
-{
-  /*header (IHDR)*/
-  LodePNGColorType colortype; /*color type, see PNG standard or documentation further in this header file*/
-  unsigned bitdepth;  /*bits per sample, see PNG standard or documentation further in this header file*/
-
-  /*
-  palette (PLTE and tRNS)
-
-  Dynamically allocated with the colors of the palette, including alpha.
-  When encoding a PNG, to store your colors in the palette of the LodePNGColorMode, first use
-  lodepng_palette_clear, then for each color use lodepng_palette_add.
-  If you encode an image without alpha with palette, don't forget to put value 255 in each A byte of the palette.
-
-  When decoding, by default you can ignore this palette, since LodePNG already
-  fills the palette colors in the pixels of the raw RGBA output.
-
-  The palette is only supported for color type 3.
-  */
-  unsigned char* palette; /*palette in RGBARGBA... order. When allocated, must be either 0, or have size 1024*/
-  size_t palettesize; /*palette size in number of colors (amount of bytes is 4 * palettesize)*/
-
-  /*
-  transparent color key (tRNS)
-
-  This color uses the same bit depth as the bitdepth value in this struct, which can be 1-bit to 16-bit.
-  For greyscale PNGs, r, g and b will all 3 be set to the same.
-
-  When decoding, by default you can ignore this information, since LodePNG sets
-  pixels with this key to transparent already in the raw RGBA output.
-
-  The color key is only supported for color types 0 and 2.
-  */
-  unsigned key_defined; /*is a transparent color key given? 0 = false, 1 = true*/
-  unsigned key_r;       /*red/greyscale component of color key*/
-  unsigned key_g;       /*green component of color key*/
-  unsigned key_b;       /*blue component of color key*/
-} LodePNGColorMode;
-
-/*init, cleanup and copy functions to use with this struct*/
-void lodepng_color_mode_init(LodePNGColorMode* info);
-void lodepng_color_mode_cleanup(LodePNGColorMode* info);
-/*return value is error code (0 means no error)*/
-unsigned lodepng_color_mode_copy(LodePNGColorMode* dest, const LodePNGColorMode* source);
-
-void lodepng_palette_clear(LodePNGColorMode* info);
-/*add 1 color to the palette*/
-unsigned lodepng_palette_add(LodePNGColorMode* info,
-                             unsigned char r, unsigned char g, unsigned char b, unsigned char a);
-
-/*get the total amount of bits per pixel, based on colortype and bitdepth in the struct*/
-unsigned lodepng_get_bpp(const LodePNGColorMode* info);
-/*get the amount of color channels used, based on colortype in the struct.
-If a palette is used, it counts as 1 channel.*/
-unsigned lodepng_get_channels(const LodePNGColorMode* info);
-/*is it a greyscale type? (only colortype 0 or 4)*/
-unsigned lodepng_is_greyscale_type(const LodePNGColorMode* info);
-/*has it got an alpha channel? (only colortype 2 or 6)*/
-unsigned lodepng_is_alpha_type(const LodePNGColorMode* info);
-/*has it got a palette? (only colortype 3)*/
-unsigned lodepng_is_palette_type(const LodePNGColorMode* info);
-/*only returns true if there is a palette and there is a value in the palette with alpha < 255.
-Loops through the palette to check this.*/
-unsigned lodepng_has_palette_alpha(const LodePNGColorMode* info);
-/*
-Check if the given color info indicates the possibility of having non-opaque pixels in the PNG image.
-Returns true if the image can have translucent or invisible pixels (it still be opaque if it doesn't use such pixels).
-Returns false if the image can only have opaque pixels.
-In detail, it returns true only if it's a color type with alpha, or has a palette with non-opaque values,
-or if "key_defined" is true.
-*/
-unsigned lodepng_can_have_alpha(const LodePNGColorMode* info);
-/*Returns the byte size of a raw image buffer with given width, height and color mode*/
-size_t lodepng_get_raw_size(unsigned w, unsigned h, const LodePNGColorMode* color);
-
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-/*The information of a Time chunk in PNG.*/
-typedef struct LodePNGTime
-{
-  unsigned year;    /*2 bytes used (0-65535)*/
-  unsigned month;   /*1-12*/
-  unsigned day;     /*1-31*/
-  unsigned hour;    /*0-23*/
-  unsigned minute;  /*0-59*/
-  unsigned second;  /*0-60 (to allow for leap seconds)*/
-} LodePNGTime;
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-
-/*Information about the PNG image, except pixels, width and height.*/
-typedef struct LodePNGInfo
-{
-  /*header (IHDR), palette (PLTE) and transparency (tRNS) chunks*/
-  unsigned compression_method;/*compression method of the original file. Always 0.*/
-  unsigned filter_method;     /*filter method of the original file*/
-  unsigned interlace_method;  /*interlace method of the original file*/
-  LodePNGColorMode color;     /*color type and bits, palette and transparency of the PNG file*/
-
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-  /*
-  suggested background color chunk (bKGD)
-  This color uses the same color mode as the PNG (except alpha channel), which can be 1-bit to 16-bit.
-
-  For greyscale PNGs, r, g and b will all 3 be set to the same. When encoding
-  the encoder writes the red one. For palette PNGs: When decoding, the RGB value
-  will be stored, not a palette index. But when encoding, specify the index of
-  the palette in background_r, the other two are then ignored.
-
-  The decoder does not use this background color to edit the color of pixels.
-  */
-  unsigned background_defined; /*is a suggested background color given?*/
-  unsigned background_r;       /*red component of suggested background color*/
-  unsigned background_g;       /*green component of suggested background color*/
-  unsigned background_b;       /*blue component of suggested background color*/
-
-  /*
-  non-international text chunks (tEXt and zTXt)
-
-  The char** arrays each contain num strings. The actual messages are in
-  text_strings, while text_keys are keywords that give a short description what
-  the actual text represents, e.g. Title, Author, Description, or anything else.
-
-  A keyword is minimum 1 character and maximum 79 characters long. It's
-  discouraged to use a single line length longer than 79 characters for texts.
-
-  Don't allocate these text buffers yourself. Use the init/cleanup functions
-  correctly and use lodepng_add_text and lodepng_clear_text.
-  */
-  size_t text_num; /*the amount of texts in these char** buffers (there may be more texts in itext)*/
-  char** text_keys; /*the keyword of a text chunk (e.g. "Comment")*/
-  char** text_strings; /*the actual text*/
-
-  /*
-  international text chunks (iTXt)
-  Similar to the non-international text chunks, but with additional strings
-  "langtags" and "transkeys".
-  */
-  size_t itext_num; /*the amount of international texts in this PNG*/
-  char** itext_keys; /*the English keyword of the text chunk (e.g. "Comment")*/
-  char** itext_langtags; /*language tag for this text's language, ISO/IEC 646 string, e.g. ISO 639 language tag*/
-  char** itext_transkeys; /*keyword translated to the international language - UTF-8 string*/
-  char** itext_strings; /*the actual international text - UTF-8 string*/
-
-  /*time chunk (tIME)*/
-  unsigned time_defined; /*set to 1 to make the encoder generate a tIME chunk*/
-  LodePNGTime time;
-
-  /*phys chunk (pHYs)*/
-  unsigned phys_defined; /*if 0, there is no pHYs chunk and the values below are undefined, if 1 else there is one*/
-  unsigned phys_x; /*pixels per unit in x direction*/
-  unsigned phys_y; /*pixels per unit in y direction*/
-  unsigned phys_unit; /*may be 0 (unknown unit) or 1 (metre)*/
-
-  /*
-  unknown chunks
-  There are 3 buffers, one for each position in the PNG where unknown chunks can appear
-  each buffer contains all unknown chunks for that position consecutively
-  The 3 buffers are the unknown chunks between certain critical chunks:
-  0: IHDR-PLTE, 1: PLTE-IDAT, 2: IDAT-IEND
-  Do not allocate or traverse this data yourself. Use the chunk traversing functions declared
-  later, such as lodepng_chunk_next and lodepng_chunk_append, to read/write this struct.
-  */
-  unsigned char* unknown_chunks_data[3];
-  size_t unknown_chunks_size[3]; /*size in bytes of the unknown chunks, given for protection*/
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-} LodePNGInfo;
-
-/*init, cleanup and copy functions to use with this struct*/
-void lodepng_info_init(LodePNGInfo* info);
-void lodepng_info_cleanup(LodePNGInfo* info);
-/*return value is error code (0 means no error)*/
-unsigned lodepng_info_copy(LodePNGInfo* dest, const LodePNGInfo* source);
-
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-void lodepng_clear_text(LodePNGInfo* info); /*use this to clear the texts again after you filled them in*/
-unsigned lodepng_add_text(LodePNGInfo* info, const char* key, const char* str); /*push back both texts at once*/
-
-void lodepng_clear_itext(LodePNGInfo* info); /*use this to clear the itexts again after you filled them in*/
-unsigned lodepng_add_itext(LodePNGInfo* info, const char* key, const char* langtag,
-                           const char* transkey, const char* str); /*push back the 4 texts of 1 chunk at once*/
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-
-/*
-Converts raw buffer from one color type to another color type, based on
-LodePNGColorMode structs to describe the input and output color type.
-See the reference manual at the end of this header file to see which color conversions are supported.
-return value = LodePNG error code (0 if all went ok, an error if the conversion isn't supported)
-The out buffer must have size (w * h * bpp + 7) / 8, where bpp is the bits per pixel
-of the output color type (lodepng_get_bpp).
-For < 8 bpp images, there should not be padding bits at the end of scanlines.
-For 16-bit per channel colors, uses big endian format like PNG does.
-Return value is LodePNG error code
-*/
-unsigned lodepng_convert(unsigned char* out, const unsigned char* in,
-                         const LodePNGColorMode* mode_out, const LodePNGColorMode* mode_in,
-                         unsigned w, unsigned h);
-
-#ifdef LODEPNG_COMPILE_DECODER
-/*
-Settings for the decoder. This contains settings for the PNG and the Zlib
-decoder, but not the Info settings from the Info structs.
-*/
-typedef struct LodePNGDecoderSettings
-{
-  LodePNGDecompressSettings zlibsettings; /*in here is the setting to ignore Adler32 checksums*/
-
-  unsigned ignore_crc; /*ignore CRC checksums*/
-
-  unsigned color_convert; /*whether to convert the PNG to the color type you want. Default: yes*/
-
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-  unsigned read_text_chunks; /*if false but remember_unknown_chunks is true, they're stored in the unknown chunks*/
-  /*store all bytes from unknown chunks in the LodePNGInfo (off by default, useful for a png editor)*/
-  unsigned remember_unknown_chunks;
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-} LodePNGDecoderSettings;
-
-void lodepng_decoder_settings_init(LodePNGDecoderSettings* settings);
-#endif /*LODEPNG_COMPILE_DECODER*/
-
-#ifdef LODEPNG_COMPILE_ENCODER
-/*automatically use color type with less bits per pixel if losslessly possible. Default: AUTO*/
-typedef enum LodePNGFilterStrategy
-{
-  /*every filter at zero*/
-  LFS_ZERO,
-  /*Use filter that gives minimum sum, as described in the official PNG filter heuristic.*/
-  LFS_MINSUM,
-  /*Use the filter type that gives smallest Shannon entropy for this scanline. Depending
-  on the image, this is better or worse than minsum.*/
-  LFS_ENTROPY,
-  /*
-  Brute-force-search PNG filters by compressing each filter for each scanline.
-  Experimental, very slow, and only rarely gives better compression than MINSUM.
-  */
-  LFS_BRUTE_FORCE,
-  /*use predefined_filters buffer: you specify the filter type for each scanline*/
-  LFS_PREDEFINED
-} LodePNGFilterStrategy;
-
-/*Gives characteristics about the colors of the image, which helps decide which color model to use for encoding.
-Used internally by default if "auto_convert" is enabled. Public because it's useful for custom algorithms.*/
-typedef struct LodePNGColorProfile
-{
-  unsigned colored; /*not greyscale*/
-  unsigned key; /*if true, image is not opaque. Only if true and alpha is false, color key is possible.*/
-  unsigned short key_r; /*these values are always in 16-bit bitdepth in the profile*/
-  unsigned short key_g;
-  unsigned short key_b;
-  unsigned alpha; /*alpha channel or alpha palette required*/
-  unsigned numcolors; /*amount of colors, up to 257. Not valid if bits == 16.*/
-  unsigned char palette[1024]; /*Remembers up to the first 256 RGBA colors, in no particular order*/
-  unsigned bits; /*bits per channel (not for palette). 1,2 or 4 for greyscale only. 16 if 16-bit per channel required.*/
-} LodePNGColorProfile;
-
-void lodepng_color_profile_init(LodePNGColorProfile* profile);
-
-/*Get a LodePNGColorProfile of the image.*/
-unsigned lodepng_get_color_profile(LodePNGColorProfile* profile,
-                                   const unsigned char* image, unsigned w, unsigned h,
-                                   const LodePNGColorMode* mode_in);
-/*The function LodePNG uses internally to decide the PNG color with auto_convert.
-Chooses an optimal color model, e.g. grey if only grey pixels, palette if < 256 colors, ...*/
-unsigned lodepng_auto_choose_color(LodePNGColorMode* mode_out,
-                                   const unsigned char* image, unsigned w, unsigned h,
-                                   const LodePNGColorMode* mode_in);
-
-/*Settings for the encoder.*/
-typedef struct LodePNGEncoderSettings
-{
-  LodePNGCompressSettings zlibsettings; /*settings for the zlib encoder, such as window size, ...*/
-
-  unsigned auto_convert; /*automatically choose output PNG color type. Default: true*/
-
-  /*If true, follows the official PNG heuristic: if the PNG uses a palette or lower than
-  8 bit depth, set all filters to zero. Otherwise use the filter_strategy. Note that to
-  completely follow the official PNG heuristic, filter_palette_zero must be true and
-  filter_strategy must be LFS_MINSUM*/
-  unsigned filter_palette_zero;
-  /*Which filter strategy to use when not using zeroes due to filter_palette_zero.
-  Set filter_palette_zero to 0 to ensure always using your chosen strategy. Default: LFS_MINSUM*/
-  LodePNGFilterStrategy filter_strategy;
-  /*used if filter_strategy is LFS_PREDEFINED. In that case, this must point to a buffer with
-  the same length as the amount of scanlines in the image, and each value must <= 5. You
-  have to cleanup this buffer, LodePNG will never free it. Don't forget that filter_palette_zero
-  must be set to 0 to ensure this is also used on palette or low bitdepth images.*/
-  const unsigned char* predefined_filters;
-
-  /*force creating a PLTE chunk if colortype is 2 or 6 (= a suggested palette).
-  If colortype is 3, PLTE is _always_ created.*/
-  unsigned force_palette;
-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
-  /*add LodePNG identifier and version as a text chunk, for debugging*/
-  unsigned add_id;
-  /*encode text chunks as zTXt chunks instead of tEXt chunks, and use compression in iTXt chunks*/
-  unsigned text_compression;
-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-} LodePNGEncoderSettings;
-
-void lodepng_encoder_settings_init(LodePNGEncoderSettings* settings);
-#endif /*LODEPNG_COMPILE_ENCODER*/
-
-
-#if defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER)
-/*The settings, state and information for extended encoding and decoding.*/
-typedef struct LodePNGState
-{
-#ifdef LODEPNG_COMPILE_DECODER
-  LodePNGDecoderSettings decoder; /*the decoding settings*/
-#endif /*LODEPNG_COMPILE_DECODER*/
-#ifdef LODEPNG_COMPILE_ENCODER
-  LodePNGEncoderSettings encoder; /*the encoding settings*/
-#endif /*LODEPNG_COMPILE_ENCODER*/
-  LodePNGColorMode info_raw; /*specifies the format in which you would like to get the raw pixel buffer*/
-  LodePNGInfo info_png; /*info of the PNG image obtained after decoding*/
-  unsigned error;
-#ifdef LODEPNG_COMPILE_CPP
-  /* For the lodepng::State subclass. */
-  virtual ~LodePNGState(){}
-#endif
-} LodePNGState;
-
-/*init, cleanup and copy functions to use with this struct*/
-void lodepng_state_init(LodePNGState* state);
-void lodepng_state_cleanup(LodePNGState* state);
-void lodepng_state_copy(LodePNGState* dest, const LodePNGState* source);
-#endif /* defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER) */
-
-#ifdef LODEPNG_COMPILE_DECODER
-/*
-Same as lodepng_decode_memory, but uses a LodePNGState to allow custom settings and
-getting much more information about the PNG image and color mode.
-*/
-unsigned lodepng_decode(unsigned char** out, unsigned* w, unsigned* h,
-                        LodePNGState* state,
-                        const unsigned char* in, size_t insize);
-
-/*
-Read the PNG header, but not the actual data. This returns only the information
-that is in the header chunk of the PNG, such as width, height and color type. The
-information is placed in the info_png field of the LodePNGState.
-*/
-unsigned lodepng_inspect(unsigned* w, unsigned* h,
-                         LodePNGState* state,
-                         const unsigned char* in, size_t insize);
-#endif /*LODEPNG_COMPILE_DECODER*/
-
-
-#ifdef LODEPNG_COMPILE_ENCODER
-/*This function allocates the out buffer with standard malloc and stores the size in *outsize.*/
-unsigned lodepng_encode(unsigned char** out, size_t* outsize,
-                        const unsigned char* image, unsigned w, unsigned h,
-                        LodePNGState* state);
-#endif /*LODEPNG_COMPILE_ENCODER*/
-
-/*
-The lodepng_chunk functions are normally not needed, except to traverse the
-unknown chunks stored in the LodePNGInfo struct, or add new ones to it.
-It also allows traversing the chunks of an encoded PNG file yourself.
-
-PNG standard chunk naming conventions:
-First byte: uppercase = critical, lowercase = ancillary
-Second byte: uppercase = public, lowercase = private
-Third byte: must be uppercase
-Fourth byte: uppercase = unsafe to copy, lowercase = safe to copy
-*/
-
-/*
-Gets the length of the data of the chunk. Total chunk length has 12 bytes more.
-There must be at least 4 bytes to read from. If the result value is too large,
-it may be corrupt data.
-*/
-unsigned lodepng_chunk_length(const unsigned char* chunk);
-
-/*puts the 4-byte type in null terminated string*/
-void lodepng_chunk_type(char type[5], const unsigned char* chunk);
-
-/*check if the type is the given type*/
-unsigned char lodepng_chunk_type_equals(const unsigned char* chunk, const char* type);
-
-/*0: it's one of the critical chunk types, 1: it's an ancillary chunk (see PNG standard)*/
-unsigned char lodepng_chunk_ancillary(const unsigned char* chunk);
-
-/*0: public, 1: private (see PNG standard)*/
-unsigned char lodepng_chunk_private(const unsigned char* chunk);
-
-/*0: the chunk is unsafe to copy, 1: the chunk is safe to copy (see PNG standard)*/
-unsigned char lodepng_chunk_safetocopy(const unsigned char* chunk);
-
-/*get pointer to the data of the chunk, where the input points to the header of the chunk*/
-unsigned char* lodepng_chunk_data(unsigned char* chunk);
-const unsigned char* lodepng_chunk_data_const(const unsigned char* chunk);
-
-/*returns 0 if the crc is correct, 1 if it's incorrect (0 for OK as usual!)*/
-unsigned lodepng_chunk_check_crc(const unsigned char* chunk);
-
-/*generates the correct CRC from the data and puts it in the last 4 bytes of the chunk*/
-void lodepng_chunk_generate_crc(unsigned char* chunk);
-
-/*iterate to next chunks. don't use on IEND chunk, as there is no next chunk then*/
-unsigned char* lodepng_chunk_next(unsigned char* chunk);
-const unsigned char* lodepng_chunk_next_const(const unsigned char* chunk);
-
-/*
-Appends chunk to the data in out. The given chunk should already have its chunk header.
-The out variable and outlength are updated to reflect the new reallocated buffer.
-Returns error code (0 if it went ok)
-*/
-unsigned lodepng_chunk_append(unsigned char** out, size_t* outlength, const unsigned char* chunk);
-
-/*
-Appends new chunk to out. The chunk to append is given by giving its length, type
-and data separately. The type is a 4-letter string.
-The out variable and outlength are updated to reflect the new reallocated buffer.
-Returne error code (0 if it went ok)
-*/
-unsigned lodepng_chunk_create(unsigned char** out, size_t* outlength, unsigned length,
-                              const char* type, const unsigned char* data);
-
-
-/*Calculate CRC32 of buffer*/
-unsigned lodepng_crc32(const unsigned char* buf, size_t len);
-#endif /*LODEPNG_COMPILE_PNG*/
-
-
-#ifdef LODEPNG_COMPILE_ZLIB
-/*
-This zlib part can be used independently to zlib compress and decompress a
-buffer. It cannot be used to create gzip files however, and it only supports the
-part of zlib that is required for PNG, it does not support dictionaries.
-*/
-
-#ifdef LODEPNG_COMPILE_DECODER
-/*Inflate a buffer. Inflate is the decompression step of deflate. Out buffer must be freed after use.*/
-unsigned lodepng_inflate(unsigned char** out, size_t* outsize,
-                         const unsigned char* in, size_t insize,
-                         const LodePNGDecompressSettings* settings);
-
-/*
-Decompresses Zlib data. Reallocates the out buffer and appends the data. The
-data must be according to the zlib specification.
-Either, *out must be NULL and *outsize must be 0, or, *out must be a valid
-buffer and *outsize its size in bytes. out must be freed by user after usage.
-*/
-unsigned lodepng_zlib_decompress(unsigned char** out, size_t* outsize,
-                                 const unsigned char* in, size_t insize,
-                                 const LodePNGDecompressSettings* settings);
-#endif /*LODEPNG_COMPILE_DECODER*/
-
-#ifdef LODEPNG_COMPILE_ENCODER
-/*
-Compresses data with Zlib. Reallocates the out buffer and appends the data.
-Zlib adds a small header and trailer around the deflate data.
-The data is output in the format of the zlib specification.
-Either, *out must be NULL and *outsize must be 0, or, *out must be a valid
-buffer and *outsize its size in bytes. out must be freed by user after usage.
-*/
-unsigned lodepng_zlib_compress(unsigned char** out, size_t* outsize,
-                               const unsigned char* in, size_t insize,
-                               const LodePNGCompressSettings* settings);
-
-/*
-Find length-limited Huffman code for given frequencies. This function is in the
-public interface only for tests, it's used internally by lodepng_deflate.
-*/
-unsigned lodepng_huffman_code_lengths(unsigned* lengths, const unsigned* frequencies,
-                                      size_t numcodes, unsigned maxbitlen);
-
-/*Compress a buffer with deflate. See RFC 1951. Out buffer must be freed after use.*/
-unsigned lodepng_deflate(unsigned char** out, size_t* outsize,
-                         const unsigned char* in, size_t insize,
-                         const LodePNGCompressSettings* settings);
-
-#endif /*LODEPNG_COMPILE_ENCODER*/
-#endif /*LODEPNG_COMPILE_ZLIB*/
-
-#ifdef LODEPNG_COMPILE_DISK
-/*
-Load a file from disk into buffer. The function allocates the out buffer, and
-after usage you should free it.
-out: output parameter, contains pointer to loaded buffer.
-outsize: output parameter, size of the allocated out buffer
-filename: the path to the file to load
-return value: error code (0 means ok)
-*/
-unsigned lodepng_load_file(unsigned char** out, size_t* outsize, const char* filename);
-
-/*
-Save a file from buffer to disk. Warning, if it exists, this function overwrites
-the file without warning!
-buffer: the buffer to write
-buffersize: size of the buffer to write
-filename: the path to the file to save to
-return value: error code (0 means ok)
-*/
-unsigned lodepng_save_file(const unsigned char* buffer, size_t buffersize, const char* filename);
-#endif /*LODEPNG_COMPILE_DISK*/
-
-#ifdef LODEPNG_COMPILE_CPP
-/* The LodePNG C++ wrapper uses std::vectors instead of manually allocated memory buffers. */
-namespace lodepng
-{
-#ifdef LODEPNG_COMPILE_PNG
-class State : public LodePNGState
-{
-  public:
-    State();
-    State(const State& other);
-    virtual ~State();
-    State& operator=(const State& other);
-};
-
-#ifdef LODEPNG_COMPILE_DECODER
-/* Same as other lodepng::decode, but using a State for more settings and information. */
-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
-                State& state,
-                const unsigned char* in, size_t insize);
-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
-                State& state,
-                const std::vector<unsigned char>& in);
-#endif /*LODEPNG_COMPILE_DECODER*/
-
-#ifdef LODEPNG_COMPILE_ENCODER
-/* Same as other lodepng::encode, but using a State for more settings and information. */
-unsigned encode(std::vector<unsigned char>& out,
-                const unsigned char* in, unsigned w, unsigned h,
-                State& state);
-unsigned encode(std::vector<unsigned char>& out,
-                const std::vector<unsigned char>& in, unsigned w, unsigned h,
-                State& state);
-#endif /*LODEPNG_COMPILE_ENCODER*/
-
-#ifdef LODEPNG_COMPILE_DISK
-/*
-Load a file from disk into an std::vector.
-return value: error code (0 means ok)
-*/
-unsigned load_file(std::vector<unsigned char>& buffer, const std::string& filename);
-
-/*
-Save the binary data in an std::vector to a file on disk. The file is overwritten
-without warning.
-*/
-unsigned save_file(const std::vector<unsigned char>& buffer, const std::string& filename);
-#endif /* LODEPNG_COMPILE_DISK */
-#endif /* LODEPNG_COMPILE_PNG */
-
-#ifdef LODEPNG_COMPILE_ZLIB
-#ifdef LODEPNG_COMPILE_DECODER
-/* Zlib-decompress an unsigned char buffer */
-unsigned decompress(std::vector<unsigned char>& out, const unsigned char* in, size_t insize,
-                    const LodePNGDecompressSettings& settings = lodepng_default_decompress_settings);
-
-/* Zlib-decompress an std::vector */
-unsigned decompress(std::vector<unsigned char>& out, const std::vector<unsigned char>& in,
-                    const LodePNGDecompressSettings& settings = lodepng_default_decompress_settings);
-#endif /* LODEPNG_COMPILE_DECODER */
-
-#ifdef LODEPNG_COMPILE_ENCODER
-/* Zlib-compress an unsigned char buffer */
-unsigned compress(std::vector<unsigned char>& out, const unsigned char* in, size_t insize,
-                  const LodePNGCompressSettings& settings = lodepng_default_compress_settings);
-
-/* Zlib-compress an std::vector */
-unsigned compress(std::vector<unsigned char>& out, const std::vector<unsigned char>& in,
-                  const LodePNGCompressSettings& settings = lodepng_default_compress_settings);
-#endif /* LODEPNG_COMPILE_ENCODER */
-#endif /* LODEPNG_COMPILE_ZLIB */
-} /* namespace lodepng */
-#endif /*LODEPNG_COMPILE_CPP*/
-
-/*
-TODO:
-[.] test if there are no memory leaks or security exploits - done a lot but needs to be checked often
-[.] check compatibility with various compilers  - done but needs to be redone for every newer version
-[X] converting color to 16-bit per channel types
-[ ] read all public PNG chunk types (but never let the color profile and gamma ones touch RGB values)
-[ ] make sure encoder generates no chunks with size > (2^31)-1
-[ ] partial decoding (stream processing)
-[X] let the "isFullyOpaque" function check color keys and transparent palettes too
-[X] better name for the variables "codes", "codesD", "codelengthcodes", "clcl" and "lldl"
-[ ] don't stop decoding on errors like 69, 57, 58 (make warnings)
-[ ] let the C++ wrapper catch exceptions coming from the standard library and return LodePNG error codes
-[ ] allow user to provide custom color conversion functions, e.g. for premultiplied alpha, padding bits or not, ...
-[ ] allow user to give data (void*) to custom allocator
-*/
-
-#endif /*LODEPNG_H inclusion guard*/
-
-/*
-LodePNG Documentation
----------------------
-
-0. table of contents
---------------------
-
-  1. about
-   1.1. supported features
-   1.2. features not supported
-  2. C and C++ version
-  3. security
-  4. decoding
-  5. encoding
-  6. color conversions
-    6.1. PNG color types
-    6.2. color conversions
-    6.3. padding bits
-    6.4. A note about 16-bits per channel and endianness
-  7. error values
-  8. chunks and PNG editing
-  9. compiler support
-  10. examples
-   10.1. decoder C++ example
-   10.2. decoder C example
-  11. state settings reference
-  12. changes
-  13. contact information
-
-
-1. about
---------
-
-PNG is a file format to store raster images losslessly with good compression,
-supporting different color types and alpha channel.
-
-LodePNG is a PNG codec according to the Portable Network Graphics (PNG)
-Specification (Second Edition) - W3C Recommendation 10 November 2003.
-
-The specifications used are:
-
-*) Portable Network Graphics (PNG) Specification (Second Edition):
-     http://www.w3.org/TR/2003/REC-PNG-20031110
-*) RFC 1950 ZLIB Compressed Data Format version 3.3:
-     http://www.gzip.org/zlib/rfc-zlib.html
-*) RFC 1951 DEFLATE Compressed Data Format Specification ver 1.3:
-     http://www.gzip.org/zlib/rfc-deflate.html
-
-The most recent version of LodePNG can currently be found at
-http://lodev.org/lodepng/
-
-LodePNG works both in C (ISO C90) and C++, with a C++ wrapper that adds
-extra functionality.
-
-LodePNG exists out of two files:
--lodepng.h: the header file for both C and C++
--lodepng.c(pp): give it the name lodepng.c or lodepng.cpp (or .cc) depending on your usage
-
-If you want to start using LodePNG right away without reading this doc, get the
-examples from the LodePNG website to see how to use it in code, or check the
-smaller examples in chapter 13 here.
-
-LodePNG is simple but only supports the basic requirements. To achieve
-simplicity, the following design choices were made: There are no dependencies
-on any external library. There are functions to decode and encode a PNG with
-a single function call, and extended versions of these functions taking a
-LodePNGState struct allowing to specify or get more information. By default
-the colors of the raw image are always RGB or RGBA, no matter what color type
-the PNG file uses. To read and write files, there are simple functions to
-convert the files to/from buffers in memory.
-
-This all makes LodePNG suitable for loading textures in games, demos and small
-programs, ... It's less suitable for full fledged image editors, loading PNGs
-over network (it requires all the image data to be available before decoding can
-begin), life-critical systems, ...
-
-1.1. supported features
------------------------
-
-The following features are supported by the decoder:
-
-*) decoding of PNGs with any color type, bit depth and interlace mode, to a 24- or 32-bit color raw image,
-   or the same color type as the PNG
-*) encoding of PNGs, from any raw image to 24- or 32-bit color, or the same color type as the raw image
-*) Adam7 interlace and deinterlace for any color type
-*) loading the image from harddisk or decoding it from a buffer from other sources than harddisk
-*) support for alpha channels, including RGBA color model, translucent palettes and color keying
-*) zlib decompression (inflate)
-*) zlib compression (deflate)
-*) CRC32 and ADLER32 checksums
-*) handling of unknown chunks, allowing making a PNG editor that stores custom and unknown chunks.
-*) the following chunks are supported (generated/interpreted) by both encoder and decoder:
-    IHDR: header information
-    PLTE: color palette
-    IDAT: pixel data
-    IEND: the final chunk
-    tRNS: transparency for palettized images
-    tEXt: textual information
-    zTXt: compressed textual information
-    iTXt: international textual information
-    bKGD: suggested background color
-    pHYs: physical dimensions
-    tIME: modification time
-
-1.2. features not supported
----------------------------
-
-The following features are _not_ supported:
-
-*) some features needed to make a conformant PNG-Editor might be still missing.
-*) partial loading/stream processing. All data must be available and is processed in one call.
-*) The following public chunks are not supported but treated as unknown chunks by LodePNG
-    cHRM, gAMA, iCCP, sRGB, sBIT, hIST, sPLT
-   Some of these are not supported on purpose: LodePNG wants to provide the RGB values
-   stored in the pixels, not values modified by system dependent gamma or color models.
-
-
-2. C and C++ version
---------------------
-
-The C version uses buffers allocated with alloc that you need to free()
-yourself. You need to use init and cleanup functions for each struct whenever
-using a struct from the C version to avoid exploits and memory leaks.
-
-The C++ version has extra functions with std::vectors in the interface and the
-lodepng::State class which is a LodePNGState with constructor and destructor.
-
-These files work without modification for both C and C++ compilers because all
-the additional C++ code is in "#ifdef __cplusplus" blocks that make C-compilers
-ignore it, and the C code is made to compile both with strict ISO C90 and C++.
-
-To use the C++ version, you need to rename the source file to lodepng.cpp
-(instead of lodepng.c), and compile it with a C++ compiler.
-
-To use the C version, you need to rename the source file to lodepng.c (instead
-of lodepng.cpp), and compile it with a C compiler.
-
-
-3. Security
------------
-
-Even if carefully designed, it's always possible that LodePNG contains possible
-exploits. If you discover one, please let me know, and it will be fixed.
-
-When using LodePNG, care has to be taken with the C version of LodePNG, as well
-as the C-style structs when working with C++. The following conventions are used
-for all C-style structs:
-
--if a struct has a corresponding init function, always call the init function when making a new one
--if a struct has a corresponding cleanup function, call it before the struct disappears to avoid memory leaks
--if a struct has a corresponding copy function, use the copy function instead of "=".
- The destination must also be inited already.
-
-
-4. Decoding
------------
-
-Decoding converts a PNG compressed image to a raw pixel buffer.
-
-Most documentation on using the decoder is at its declarations in the header
-above. For C, simple decoding can be done with functions such as
-lodepng_decode32, and more advanced decoding can be done with the struct
-LodePNGState and lodepng_decode. For C++, all decoding can be done with the
-various lodepng::decode functions, and lodepng::State can be used for advanced
-features.
-
-When using the LodePNGState, it uses the following fields for decoding:
-*) LodePNGInfo info_png: it stores extra information about the PNG (the input) in here
-*) LodePNGColorMode info_raw: here you can say what color mode of the raw image (the output) you want to get
-*) LodePNGDecoderSettings decoder: you can specify a few extra settings for the decoder to use
-
-LodePNGInfo info_png
---------------------
-
-After decoding, this contains extra information of the PNG image, except the actual
-pixels, width and height because these are already gotten directly from the decoder
-functions.
-
-It contains for example the original color type of the PNG image, text comments,
-suggested background color, etc... More details about the LodePNGInfo struct are
-at its declaration documentation.
-
-LodePNGColorMode info_raw
--------------------------
-
-When decoding, here you can specify which color type you want
-the resulting raw image to be. If this is different from the colortype of the
-PNG, then the decoder will automatically convert the result. This conversion
-always works, except if you want it to convert a color PNG to greyscale or to
-a palette with missing colors.
-
-By default, 32-bit color is used for the result.
-
-LodePNGDecoderSettings decoder
-------------------------------
-
-The settings can be used to ignore the errors created by invalid CRC and Adler32
-chunks, and to disable the decoding of tEXt chunks.
-
-There's also a setting color_convert, true by default. If false, no conversion
-is done, the resulting data will be as it was in the PNG (after decompression)
-and you'll have to puzzle the colors of the pixels together yourself using the
-color type information in the LodePNGInfo.
-
-
-5. Encoding
------------
-
-Encoding converts a raw pixel buffer to a PNG compressed image.
-
-Most documentation on using the encoder is at its declarations in the header
-above. For C, simple encoding can be done with functions such as
-lodepng_encode32, and more advanced decoding can be done with the struct
-LodePNGState and lodepng_encode. For C++, all encoding can be done with the
-various lodepng::encode functions, and lodepng::State can be used for advanced
-features.
-
-Like the decoder, the encoder can also give errors. However it gives less errors
-since the encoder input is trusted, the decoder input (a PNG image that could
-be forged by anyone) is not trusted.
-
-When using the LodePNGState, it uses the following fields for encoding:
-*) LodePNGInfo info_png: here you specify how you want the PNG (the output) to be.
-*) LodePNGColorMode info_raw: here you say what color type of the raw image (the input) has
-*) LodePNGEncoderSettings encoder: you can specify a few settings for the encoder to use
-
-LodePNGInfo info_png
---------------------
-
-When encoding, you use this the opposite way as when decoding: for encoding,
-you fill in the values you want the PNG to have before encoding. By default it's
-not needed to specify a color type for the PNG since it's automatically chosen,
-but it's possible to choose it yourself given the right settings.
-
-The encoder will not always exactly match the LodePNGInfo struct you give,
-it tries as close as possible. Some things are ignored by the encoder. The
-encoder uses, for example, the following settings from it when applicable:
-colortype and bitdepth, text chunks, time chunk, the color key, the palette, the
-background color, the interlace method, unknown chunks, ...
-
-When encoding to a PNG with colortype 3, the encoder will generate a PLTE chunk.
-If the palette contains any colors for which the alpha channel is not 255 (so
-there are translucent colors in the palette), it'll add a tRNS chunk.
-
-LodePNGColorMode info_raw
--------------------------
-
-You specify the color type of the raw image that you give to the input here,
-including a possible transparent color key and palette you happen to be using in
-your raw image data.
-
-By default, 32-bit color is assumed, meaning your input has to be in RGBA
-format with 4 bytes (unsigned chars) per pixel.
-
-LodePNGEncoderSettings encoder
-------------------------------
-
-The following settings are supported (some are in sub-structs):
-*) auto_convert: when this option is enabled, the encoder will
-automatically choose the smallest possible color mode (including color key) that
-can encode the colors of all pixels without information loss.
-*) btype: the block type for LZ77. 0 = uncompressed, 1 = fixed huffman tree,
-   2 = dynamic huffman tree (best compression). Should be 2 for proper
-   compression.
-*) use_lz77: whether or not to use LZ77 for compressed block types. Should be
-   true for proper compression.
-*) windowsize: the window size used by the LZ77 encoder (1 - 32768). Has value
-   2048 by default, but can be set to 32768 for better, but slow, compression.
-*) force_palette: if colortype is 2 or 6, you can make the encoder write a PLTE
-   chunk if force_palette is true. This can used as suggested palette to convert
-   to by viewers that don't support more than 256 colors (if those still exist)
-*) add_id: add text chunk "Encoder: LodePNG <version>" to the image.
-*) text_compression: default 1. If 1, it'll store texts as zTXt instead of tEXt chunks.
-  zTXt chunks use zlib compression on the text. This gives a smaller result on
-  large texts but a larger result on small texts (such as a single program name).
-  It's all tEXt or all zTXt though, there's no separate setting per text yet.
-
-
-6. color conversions
---------------------
-
-An important thing to note about LodePNG, is that the color type of the PNG, and
-the color type of the raw image, are completely independent. By default, when
-you decode a PNG, you get the result as a raw image in the color type you want,
-no matter whether the PNG was encoded with a palette, greyscale or RGBA color.
-And if you encode an image, by default LodePNG will automatically choose the PNG
-color type that gives good compression based on the values of colors and amount
-of colors in the image. It can be configured to let you control it instead as
-well, though.
-
-To be able to do this, LodePNG does conversions from one color mode to another.
-It can convert from almost any color type to any other color type, except the
-following conversions: RGB to greyscale is not supported, and converting to a
-palette when the palette doesn't have a required color is not supported. This is
-not supported on purpose: this is information loss which requires a color
-reduction algorithm that is beyong the scope of a PNG encoder (yes, RGB to grey
-is easy, but there are multiple ways if you want to give some channels more
-weight).
-
-By default, when decoding, you get the raw image in 32-bit RGBA or 24-bit RGB
-color, no matter what color type the PNG has. And by default when encoding,
-LodePNG automatically picks the best color model for the output PNG, and expects
-the input image to be 32-bit RGBA or 24-bit RGB. So, unless you want to control
-the color format of the images yourself, you can skip this chapter.
-
-6.1. PNG color types
---------------------
-
-A PNG image can have many color types, ranging from 1-bit color to 64-bit color,
-as well as palettized color modes. After the zlib decompression and unfiltering
-in the PNG image is done, the raw pixel data will have that color type and thus
-a certain amount of bits per pixel. If you want the output raw image after
-decoding to have another color type, a conversion is done by LodePNG.
-
-The PNG specification gives the following color types:
-
-0: greyscale, bit depths 1, 2, 4, 8, 16
-2: RGB, bit depths 8 and 16
-3: palette, bit depths 1, 2, 4 and 8
-4: greyscale with alpha, bit depths 8 and 16
-6: RGBA, bit depths 8 and 16
-
-Bit depth is the amount of bits per pixel per color channel. So the total amount
-of bits per pixel is: amount of channels * bitdepth.
-
-6.2. color conversions
-----------------------
-
-As explained in the sections about the encoder and decoder, you can specify
-color types and bit depths in info_png and info_raw to change the default
-behaviour.
-
-If, when decoding, you want the raw image to be something else than the default,
-you need to set the color type and bit depth you want in the LodePNGColorMode,
-or the parameters colortype and bitdepth of the simple decoding function.
-
-If, when encoding, you use another color type than the default in the raw input
-image, you need to specify its color type and bit depth in the LodePNGColorMode
-of the raw image, or use the parameters colortype and bitdepth of the simple
-encoding function.
-
-If, when encoding, you don't want LodePNG to choose the output PNG color type
-but control it yourself, you need to set auto_convert in the encoder settings
-to false, and specify the color type you want in the LodePNGInfo of the
-encoder (including palette: it can generate a palette if auto_convert is true,
-otherwise not).
-
-If the input and output color type differ (whether user chosen or auto chosen),
-LodePNG will do a color conversion, which follows the rules below, and may
-sometimes result in an error.
-
-To avoid some confusion:
--the decoder converts from PNG to raw image
--the encoder converts from raw image to PNG
--the colortype and bitdepth in LodePNGColorMode info_raw, are those of the raw image
--the colortype and bitdepth in the color field of LodePNGInfo info_png, are those of the PNG
--when encoding, the color type in LodePNGInfo is ignored if auto_convert
- is enabled, it is automatically generated instead
--when decoding, the color type in LodePNGInfo is set by the decoder to that of the original
- PNG image, but it can be ignored since the raw image has the color type you requested instead
--if the color type of the LodePNGColorMode and PNG image aren't the same, a conversion
- between the color types is done if the color types are supported. If it is not
- supported, an error is returned. If the types are the same, no conversion is done.
--even though some conversions aren't supported, LodePNG supports loading PNGs from any
- colortype and saving PNGs to any colortype, sometimes it just requires preparing
- the raw image correctly before encoding.
--both encoder and decoder use the same color converter.
-
-Non supported color conversions:
--color to greyscale: no error is thrown, but the result will look ugly because
-only the red channel is taken
--anything to palette when that palette does not have that color in it: in this
-case an error is thrown
-
-Supported color conversions:
--anything to 8-bit RGB, 8-bit RGBA, 16-bit RGB, 16-bit RGBA
--any grey or grey+alpha, to grey or grey+alpha
--anything to a palette, as long as the palette has the requested colors in it
--removing alpha channel
--higher to smaller bitdepth, and vice versa
-
-If you want no color conversion to be done (e.g. for speed or control):
--In the encoder, you can make it save a PNG with any color type by giving the
-raw color mode and LodePNGInfo the same color mode, and setting auto_convert to
-false.
--In the decoder, you can make it store the pixel data in the same color type
-as the PNG has, by setting the color_convert setting to false. Settings in
-info_raw are then ignored.
-
-The function lodepng_convert does the color conversion. It is available in the
-interface but normally isn't needed since the encoder and decoder already call
-it.
-
-6.3. padding bits
------------------
-
-In the PNG file format, if a less than 8-bit per pixel color type is used and the scanlines
-have a bit amount that isn't a multiple of 8, then padding bits are used so that each
-scanline starts at a fresh byte. But that is NOT true for the LodePNG raw input and output.
-The raw input image you give to the encoder, and the raw output image you get from the decoder
-will NOT have these padding bits, e.g. in the case of a 1-bit image with a width
-of 7 pixels, the first pixel of the second scanline will the the 8th bit of the first byte,
-not the first bit of a new byte.
-
-6.4. A note about 16-bits per channel and endianness
-----------------------------------------------------
-
-LodePNG uses unsigned char arrays for 16-bit per channel colors too, just like
-for any other color format. The 16-bit values are stored in big endian (most
-significant byte first) in these arrays. This is the opposite order of the
-little endian used by x86 CPU's.
-
-LodePNG always uses big endian because the PNG file format does so internally.
-Conversions to other formats than PNG uses internally are not supported by
-LodePNG on purpose, there are myriads of formats, including endianness of 16-bit
-colors, the order in which you store R, G, B and A, and so on. Supporting and
-converting to/from all that is outside the scope of LodePNG.
-
-This may mean that, depending on your use case, you may want to convert the big
-endian output of LodePNG to little endian with a for loop. This is certainly not
-always needed, many applications and libraries support big endian 16-bit colors
-anyway, but it means you cannot simply cast the unsigned char* buffer to an
-unsigned short* buffer on x86 CPUs.
-
-
-7. error values
----------------
-
-All functions in LodePNG that return an error code, return 0 if everything went
-OK, or a non-zero code if there was an error.
-
-The meaning of the LodePNG error values can be retrieved with the function
-lodepng_error_text: given the numerical error code, it returns a description
-of the error in English as a string.
-
-Check the implementation of lodepng_error_text to see the meaning of each code.
-
-
-8. chunks and PNG editing
--------------------------
-
-If you want to add extra chunks to a PNG you encode, or use LodePNG for a PNG
-editor that should follow the rules about handling of unknown chunks, or if your
-program is able to read other types of chunks than the ones handled by LodePNG,
-then that's possible with the chunk functions of LodePNG.
-
-A PNG chunk has the following layout:
-
-4 bytes length
-4 bytes type name
-length bytes data
-4 bytes CRC
-
-8.1. iterating through chunks
------------------------------
-
-If you have a buffer containing the PNG image data, then the first chunk (the
-IHDR chunk) starts at byte number 8 of that buffer. The first 8 bytes are the
-signature of the PNG and are not part of a chunk. But if you start at byte 8
-then you have a chunk, and can check the following things of it.
-
-NOTE: none of these functions check for memory buffer boundaries. To avoid
-exploits, always make sure the buffer contains all the data of the chunks.
-When using lodepng_chunk_next, make sure the returned value is within the
-allocated memory.
-
-unsigned lodepng_chunk_length(const unsigned char* chunk):
-
-Get the length of the chunk's data. The total chunk length is this length + 12.
-
-void lodepng_chunk_type(char type[5], const unsigned char* chunk):
-unsigned char lodepng_chunk_type_equals(const unsigned char* chunk, const char* type):
-
-Get the type of the chunk or compare if it's a certain type
-
-unsigned char lodepng_chunk_critical(const unsigned char* chunk):
-unsigned char lodepng_chunk_private(const unsigned char* chunk):
-unsigned char lodepng_chunk_safetocopy(const unsigned char* chunk):
-
-Check if the chunk is critical in the PNG standard (only IHDR, PLTE, IDAT and IEND are).
-Check if the chunk is private (public chunks are part of the standard, private ones not).
-Check if the chunk is safe to copy. If it's not, then, when modifying data in a critical
-chunk, unsafe to copy chunks of the old image may NOT be saved in the new one if your
-program doesn't handle that type of unknown chunk.
-
-unsigned char* lodepng_chunk_data(unsigned char* chunk):
-const unsigned char* lodepng_chunk_data_const(const unsigned char* chunk):
-
-Get a pointer to the start of the data of the chunk.
-
-unsigned lodepng_chunk_check_crc(const unsigned char* chunk):
-void lodepng_chunk_generate_crc(unsigned char* chunk):
-
-Check if the crc is correct or generate a correct one.
-
-unsigned char* lodepng_chunk_next(unsigned char* chunk):
-const unsigned char* lodepng_chunk_next_const(const unsigned char* chunk):
-
-Iterate to the next chunk. This works if you have a buffer with consecutive chunks. Note that these
-functions do no boundary checking of the allocated data whatsoever, so make sure there is enough
-data available in the buffer to be able to go to the next chunk.
-
-unsigned lodepng_chunk_append(unsigned char** out, size_t* outlength, const unsigned char* chunk):
-unsigned lodepng_chunk_create(unsigned char** out, size_t* outlength, unsigned length,
-                              const char* type, const unsigned char* data):
-
-These functions are used to create new chunks that are appended to the data in *out that has
-length *outlength. The append function appends an existing chunk to the new data. The create
-function creates a new chunk with the given parameters and appends it. Type is the 4-letter
-name of the chunk.
-
-8.2. chunks in info_png
------------------------
-
-The LodePNGInfo struct contains fields with the unknown chunk in it. It has 3
-buffers (each with size) to contain 3 types of unknown chunks:
-the ones that come before the PLTE chunk, the ones that come between the PLTE
-and the IDAT chunks, and the ones that come after the IDAT chunks.
-It's necessary to make the distionction between these 3 cases because the PNG
-standard forces to keep the ordering of unknown chunks compared to the critical
-chunks, but does not force any other ordering rules.
-
-info_png.unknown_chunks_data[0] is the chunks before PLTE
-info_png.unknown_chunks_data[1] is the chunks after PLTE, before IDAT
-info_png.unknown_chunks_data[2] is the chunks after IDAT
-
-The chunks in these 3 buffers can be iterated through and read by using the same
-way described in the previous subchapter.
-
-When using the decoder to decode a PNG, you can make it store all unknown chunks
-if you set the option settings.remember_unknown_chunks to 1. By default, this
-option is off (0).
-
-The encoder will always encode unknown chunks that are stored in the info_png.
-If you need it to add a particular chunk that isn't known by LodePNG, you can
-use lodepng_chunk_append or lodepng_chunk_create to the chunk data in
-info_png.unknown_chunks_data[x].
-
-Chunks that are known by LodePNG should not be added in that way. E.g. to make
-LodePNG add a bKGD chunk, set background_defined to true and add the correct
-parameters there instead.
-
-
-9. compiler support
--------------------
-
-No libraries other than the current standard C library are needed to compile
-LodePNG. For the C++ version, only the standard C++ library is needed on top.
-Add the files lodepng.c(pp) and lodepng.h to your project, include
-lodepng.h where needed, and your program can read/write PNG files.
-
-It is compatible with C90 and up, and C++03 and up.
-
-If performance is important, use optimization when compiling! For both the
-encoder and decoder, this makes a large difference.
-
-Make sure that LodePNG is compiled with the same compiler of the same version
-and with the same settings as the rest of the program, or the interfaces with
-std::vectors and std::strings in C++ can be incompatible.
-
-CHAR_BITS must be 8 or higher, because LodePNG uses unsigned chars for octets.
-
-*) gcc and g++
-
-LodePNG is developed in gcc so this compiler is natively supported. It gives no
-warnings with compiler options "-Wall -Wextra -pedantic -ansi", with gcc and g++
-version 4.7.1 on Linux, 32-bit and 64-bit.
-
-*) Clang
-
-Fully supported and warning-free.
-
-*) Mingw
-
-The Mingw compiler (a port of gcc for Windows) should be fully supported by
-LodePNG.
-
-*) Visual Studio and Visual C++ Express Edition
-
-LodePNG should be warning-free with warning level W4. Two warnings were disabled
-with pragmas though: warning 4244 about implicit conversions, and warning 4996
-where it wants to use a non-standard function fopen_s instead of the standard C
-fopen.
-
-Visual Studio may want "stdafx.h" files to be included in each source file and
-give an error "unexpected end of file while looking for precompiled header".
-This is not standard C++ and will not be added to the stock LodePNG. You can
-disable it for lodepng.cpp only by right clicking it, Properties, C/C++,
-Precompiled Headers, and set it to Not Using Precompiled Headers there.
-
-NOTE: Modern versions of VS should be fully supported, but old versions, e.g.
-VS6, are not guaranteed to work.
-
-*) Compilers on Macintosh
-
-LodePNG has been reported to work both with gcc and LLVM for Macintosh, both for
-C and C++.
-
-*) Other Compilers
-
-If you encounter problems on any compilers, feel free to let me know and I may
-try to fix it if the compiler is modern and standards complient.
-
-
-10. examples
-------------
-
-This decoder example shows the most basic usage of LodePNG. More complex
-examples can be found on the LodePNG website.
-
-10.1. decoder C++ example
--------------------------
-
-#include "lodepng.h"
-#include <iostream>
-
-int main(int argc, char *argv[])
-{
-  const char* filename = argc > 1 ? argv[1] : "test.png";
-
-  //load and decode
-  std::vector<unsigned char> image;
-  unsigned width, height;
-  unsigned error = lodepng::decode(image, width, height, filename);
-
-  //if there's an error, display it
-  if(error) std::cout << "decoder error " << error << ": " << lodepng_error_text(error) << std::endl;
-
-  //the pixels are now in the vector "image", 4 bytes per pixel, ordered RGBARGBA..., use it as texture, draw it, ...
-}
-
-10.2. decoder C example
------------------------
-
-#include "lodepng.h"
-
-int main(int argc, char *argv[])
-{
-  unsigned error;
-  unsigned char* image;
-  size_t width, height;
-  const char* filename = argc > 1 ? argv[1] : "test.png";
-
-  error = lodepng_decode32_file(&image, &width, &height, filename);
-
-  if(error) printf("decoder error %u: %s\n", error, lodepng_error_text(error));
-
-  / * use image here * /
-
-  free(image);
-  return 0;
-}
-
-11. state settings reference
-----------------------------
-
-A quick reference of some settings to set on the LodePNGState
-
-For decoding:
-
-state.decoder.zlibsettings.ignore_adler32: ignore ADLER32 checksums
-state.decoder.zlibsettings.custom_...: use custom inflate function
-state.decoder.ignore_crc: ignore CRC checksums
-state.decoder.color_convert: convert internal PNG color to chosen one
-state.decoder.read_text_chunks: whether to read in text metadata chunks
-state.decoder.remember_unknown_chunks: whether to read in unknown chunks
-state.info_raw.colortype: desired color type for decoded image
-state.info_raw.bitdepth: desired bit depth for decoded image
-state.info_raw....: more color settings, see struct LodePNGColorMode
-state.info_png....: no settings for decoder but ouput, see struct LodePNGInfo
-
-For encoding:
-
-state.encoder.zlibsettings.btype: disable compression by setting it to 0
-state.encoder.zlibsettings.use_lz77: use LZ77 in compression
-state.encoder.zlibsettings.windowsize: tweak LZ77 windowsize
-state.encoder.zlibsettings.minmatch: tweak min LZ77 length to match
-state.encoder.zlibsettings.nicematch: tweak LZ77 match where to stop searching
-state.encoder.zlibsettings.lazymatching: try one more LZ77 matching
-state.encoder.zlibsettings.custom_...: use custom deflate function
-state.encoder.auto_convert: choose optimal PNG color type, if 0 uses info_png
-state.encoder.filter_palette_zero: PNG filter strategy for palette
-state.encoder.filter_strategy: PNG filter strategy to encode with
-state.encoder.force_palette: add palette even if not encoding to one
-state.encoder.add_id: add LodePNG identifier and version as a text chunk
-state.encoder.text_compression: use compressed text chunks for metadata
-state.info_raw.colortype: color type of raw input image you provide
-state.info_raw.bitdepth: bit depth of raw input image you provide
-state.info_raw: more color settings, see struct LodePNGColorMode
-state.info_png.color.colortype: desired color type if auto_convert is false
-state.info_png.color.bitdepth: desired bit depth if auto_convert is false
-state.info_png.color....: more color settings, see struct LodePNGColorMode
-state.info_png....: more PNG related settings, see struct LodePNGInfo
-
-
-12. changes
------------
-
-The version number of LodePNG is the date of the change given in the format
-yyyymmdd.
-
-Some changes aren't backwards compatible. Those are indicated with a (!)
-symbol.
-
-*) 08 dec 2015: Made load_file function return error if file can't be opened.
-*) 24 okt 2015: Bugfix with decoding to palette output.
-*) 18 apr 2015: Boundary PM instead of just package-merge for faster encoding.
-*) 23 aug 2014: Reduced needless memory usage of decoder.
-*) 28 jun 2014: Removed fix_png setting, always support palette OOB for
-    simplicity. Made ColorProfile public.
-*) 09 jun 2014: Faster encoder by fixing hash bug and more zeros optimization.
-*) 22 dec 2013: Power of two windowsize required for optimization.
-*) 15 apr 2013: Fixed bug with LAC_ALPHA and color key.
-*) 25 mar 2013: Added an optional feature to ignore some PNG errors (fix_png).
-*) 11 mar 2013 (!): Bugfix with custom free. Changed from "my" to "lodepng_"
-    prefix for the custom allocators and made it possible with a new #define to
-    use custom ones in your project without needing to change lodepng's code.
-*) 28 jan 2013: Bugfix with color key.
-*) 27 okt 2012: Tweaks in text chunk keyword length error handling.
-*) 8 okt 2012 (!): Added new filter strategy (entropy) and new auto color mode.
-    (no palette). Better deflate tree encoding. New compression tweak settings.
-    Faster color conversions while decoding. Some internal cleanups.
-*) 23 sep 2012: Reduced warnings in Visual Studio a little bit.
-*) 1 sep 2012 (!): Removed #define's for giving custom (de)compression functions
-    and made it work with function pointers instead.
-*) 23 jun 2012: Added more filter strategies. Made it easier to use custom alloc
-    and free functions and toggle #defines from compiler flags. Small fixes.
-*) 6 may 2012 (!): Made plugging in custom zlib/deflate functions more flexible.
-*) 22 apr 2012 (!): Made interface more consistent, renaming a lot. Removed
-    redundant C++ codec classes. Reduced amount of structs. Everything changed,
-    but it is cleaner now imho and functionality remains the same. Also fixed
-    several bugs and shrunk the implementation code. Made new samples.
-*) 6 nov 2011 (!): By default, the encoder now automatically chooses the best
-    PNG color model and bit depth, based on the amount and type of colors of the
-    raw image. For this, autoLeaveOutAlphaChannel replaced by auto_choose_color.
-*) 9 okt 2011: simpler hash chain implementation for the encoder.
-*) 8 sep 2011: lz77 encoder lazy matching instead of greedy matching.
-*) 23 aug 2011: tweaked the zlib compression parameters after benchmarking.
-    A bug with the PNG filtertype heuristic was fixed, so that it chooses much
-    better ones (it's quite significant). A setting to do an experimental, slow,
-    brute force search for PNG filter types is added.
-*) 17 aug 2011 (!): changed some C zlib related function names.
-*) 16 aug 2011: made the code less wide (max 120 characters per line).
-*) 17 apr 2011: code cleanup. Bugfixes. Convert low to 16-bit per sample colors.
-*) 21 feb 2011: fixed compiling for C90. Fixed compiling with sections disabled.
-*) 11 dec 2010: encoding is made faster, based on suggestion by Peter Eastman
-    to optimize long sequences of zeros.
-*) 13 nov 2010: added LodePNG_InfoColor_hasPaletteAlpha and
-    LodePNG_InfoColor_canHaveAlpha functions for convenience.
-*) 7 nov 2010: added LodePNG_error_text function to get error code description.
-*) 30 okt 2010: made decoding slightly faster
-*) 26 okt 2010: (!) changed some C function and struct names (more consistent).
-     Reorganized the documentation and the declaration order in the header.
-*) 08 aug 2010: only changed some comments and external samples.
-*) 05 jul 2010: fixed bug thanks to warnings in the new gcc version.
-*) 14 mar 2010: fixed bug where too much memory was allocated for char buffers.
-*) 02 sep 2008: fixed bug where it could create empty tree that linux apps could
-    read by ignoring the problem but windows apps couldn't.
-*) 06 jun 2008: added more error checks for out of memory cases.
-*) 26 apr 2008: added a few more checks here and there to ensure more safety.
-*) 06 mar 2008: crash with encoding of strings fixed
-*) 02 feb 2008: support for international text chunks added (iTXt)
-*) 23 jan 2008: small cleanups, and #defines to divide code in sections
-*) 20 jan 2008: support for unknown chunks allowing using LodePNG for an editor.
-*) 18 jan 2008: support for tIME and pHYs chunks added to encoder and decoder.
-*) 17 jan 2008: ability to encode and decode compressed zTXt chunks added
-    Also various fixes, such as in the deflate and the padding bits code.
-*) 13 jan 2008: Added ability to encode Adam7-interlaced images. Improved
-    filtering code of encoder.
-*) 07 jan 2008: (!) changed LodePNG to use ISO C90 instead of C++. A
-    C++ wrapper around this provides an interface almost identical to before.
-    Having LodePNG be pure ISO C90 makes it more portable. The C and C++ code
-    are together in these files but it works both for C and C++ compilers.
-*) 29 dec 2007: (!) changed most integer types to unsigned int + other tweaks
-*) 30 aug 2007: bug fixed which makes this Borland C++ compatible
-*) 09 aug 2007: some VS2005 warnings removed again
-*) 21 jul 2007: deflate code placed in new namespace separate from zlib code
-*) 08 jun 2007: fixed bug with 2- and 4-bit color, and small interlaced images
-*) 04 jun 2007: improved support for Visual Studio 2005: crash with accessing
-    invalid std::vector element [0] fixed, and level 3 and 4 warnings removed
-*) 02 jun 2007: made the encoder add a tag with version by default
-*) 27 may 2007: zlib and png code separated (but still in the same file),
-    simple encoder/decoder functions added for more simple usage cases
-*) 19 may 2007: minor fixes, some code cleaning, new error added (error 69),
-    moved some examples from here to lodepng_examples.cpp
-*) 12 may 2007: palette decoding bug fixed
-*) 24 apr 2007: changed the license from BSD to the zlib license
-*) 11 mar 2007: very simple addition: ability to encode bKGD chunks.
-*) 04 mar 2007: (!) tEXt chunk related fixes, and support for encoding
-    palettized PNG images. Plus little interface change with palette and texts.
-*) 03 mar 2007: Made it encode dynamic Huffman shorter with repeat codes.
-    Fixed a bug where the end code of a block had length 0 in the Huffman tree.
-*) 26 feb 2007: Huffman compression with dynamic trees (BTYPE 2) now implemented
-    and supported by the encoder, resulting in smaller PNGs at the output.
-*) 27 jan 2007: Made the Adler-32 test faster so that a timewaste is gone.
-*) 24 jan 2007: gave encoder an error interface. Added color conversion from any
-    greyscale type to 8-bit greyscale with or without alpha.
-*) 21 jan 2007: (!) Totally changed the interface. It allows more color types
-    to convert to and is more uniform. See the manual for how it works now.
-*) 07 jan 2007: Some cleanup & fixes, and a few changes over the last days:
-    encode/decode custom tEXt chunks, separate classes for zlib & deflate, and
-    at last made the decoder give errors for incorrect Adler32 or Crc.
-*) 01 jan 2007: Fixed bug with encoding PNGs with less than 8 bits per channel.
-*) 29 dec 2006: Added support for encoding images without alpha channel, and
-    cleaned out code as well as making certain parts faster.
-*) 28 dec 2006: Added "Settings" to the encoder.
-*) 26 dec 2006: The encoder now does LZ77 encoding and produces much smaller files now.
-    Removed some code duplication in the decoder. Fixed little bug in an example.
-*) 09 dec 2006: (!) Placed output parameters of public functions as first parameter.
-    Fixed a bug of the decoder with 16-bit per color.
-*) 15 okt 2006: Changed documentation structure
-*) 09 okt 2006: Encoder class added. It encodes a valid PNG image from the
-    given image buffer, however for now it's not compressed.
-*) 08 sep 2006: (!) Changed to interface with a Decoder class
-*) 30 jul 2006: (!) LodePNG_InfoPng , width and height are now retrieved in different
-    way. Renamed decodePNG to decodePNGGeneric.
-*) 29 jul 2006: (!) Changed the interface: image info is now returned as a
-    struct of type LodePNG::LodePNG_Info, instead of a vector, which was a bit clumsy.
-*) 28 jul 2006: Cleaned the code and added new error checks.
-    Corrected terminology "deflate" into "inflate".
-*) 23 jun 2006: Added SDL example in the documentation in the header, this
-    example allows easy debugging by displaying the PNG and its transparency.
-*) 22 jun 2006: (!) Changed way to obtain error value. Added
-    loadFile function for convenience. Made decodePNG32 faster.
-*) 21 jun 2006: (!) Changed type of info vector to unsigned.
-    Changed position of palette in info vector. Fixed an important bug that
-    happened on PNGs with an uncompressed block.
-*) 16 jun 2006: Internally changed unsigned into unsigned where
-    needed, and performed some optimizations.
-*) 07 jun 2006: (!) Renamed functions to decodePNG and placed them
-    in LodePNG namespace. Changed the order of the parameters. Rewrote the
-    documentation in the header. Renamed files to lodepng.cpp and lodepng.h
-*) 22 apr 2006: Optimized and improved some code
-*) 07 sep 2005: (!) Changed to std::vector interface
-*) 12 aug 2005: Initial release (C++, decoder only)
-
-
-13. contact information
------------------------
-
-Feel free to contact me with suggestions, problems, comments, ... concerning
-LodePNG. If you encounter a PNG image that doesn't work properly with this
-decoder, feel free to send it and I'll use it to find and fix the problem.
-
-My email address is (puzzle the account and domain together with an @ symbol):
-Domain: gmail dot com.
-Account: lode dot vandevenne.
-
-
-Copyright (c) 2005-2016 Lode Vandevenne
-*/
+/*
+LodePNG version 20201017
+
+Copyright (c) 2005-2020 Lode Vandevenne
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+    1. The origin of this software must not be misrepresented; you must not
+    claim that you wrote the original software. If you use this software
+    in a product, an acknowledgment in the product documentation would be
+    appreciated but is not required.
+
+    2. Altered source versions must be plainly marked as such, and must not be
+    misrepresented as being the original software.
+
+    3. This notice may not be removed or altered from any source
+    distribution.
+*/
+
+#ifndef LODEPNG_H
+#define LODEPNG_H
+
+#include <string.h> /*for size_t*/
+
+extern const char* LODEPNG_VERSION_STRING;
+
+/*
+The following #defines are used to create code sections. They can be disabled
+to disable code sections, which can give faster compile time and smaller binary.
+The "NO_COMPILE" defines are designed to be used to pass as defines to the
+compiler command to disable them without modifying this header, e.g.
+-DLODEPNG_NO_COMPILE_ZLIB for gcc.
+In addition to those below, you can also define LODEPNG_NO_COMPILE_CRC to
+allow implementing a custom lodepng_crc32.
+*/
+/*deflate & zlib. If disabled, you must specify alternative zlib functions in
+the custom_zlib field of the compress and decompress settings*/
+#ifndef LODEPNG_NO_COMPILE_ZLIB
+#define LODEPNG_COMPILE_ZLIB
+#endif
+
+/*png encoder and png decoder*/
+#ifndef LODEPNG_NO_COMPILE_PNG
+#define LODEPNG_COMPILE_PNG
+#endif
+
+/*deflate&zlib decoder and png decoder*/
+#ifndef LODEPNG_NO_COMPILE_DECODER
+#define LODEPNG_COMPILE_DECODER
+#endif
+
+/*deflate&zlib encoder and png encoder*/
+#ifndef LODEPNG_NO_COMPILE_ENCODER
+#define LODEPNG_COMPILE_ENCODER
+#endif
+
+/*the optional built in harddisk file loading and saving functions*/
+#ifndef LODEPNG_NO_COMPILE_DISK
+#define LODEPNG_COMPILE_DISK
+#endif
+
+/*support for chunks other than IHDR, IDAT, PLTE, tRNS, IEND: ancillary and unknown chunks*/
+#ifndef LODEPNG_NO_COMPILE_ANCILLARY_CHUNKS
+#define LODEPNG_COMPILE_ANCILLARY_CHUNKS
+#endif
+
+/*ability to convert error numerical codes to English text string*/
+#ifndef LODEPNG_NO_COMPILE_ERROR_TEXT
+#define LODEPNG_COMPILE_ERROR_TEXT
+#endif
+
+/*Compile the default allocators (C's free, malloc and realloc). If you disable this,
+you can define the functions lodepng_free, lodepng_malloc and lodepng_realloc in your
+source files with custom allocators.*/
+#ifndef LODEPNG_NO_COMPILE_ALLOCATORS
+#define LODEPNG_COMPILE_ALLOCATORS
+#endif
+
+/*compile the C++ version (you can disable the C++ wrapper here even when compiling for C++)*/
+#ifdef __cplusplus
+#ifndef LODEPNG_NO_COMPILE_CPP
+#define LODEPNG_COMPILE_CPP
+#endif
+#endif
+
+#ifdef LODEPNG_COMPILE_CPP
+#include <vector>
+#include <string>
+#endif /*LODEPNG_COMPILE_CPP*/
+
+#ifdef LODEPNG_COMPILE_PNG
+/*The PNG color types (also used for raw image).*/
+typedef enum LodePNGColorType {
+  LCT_GREY = 0, /*grayscale: 1,2,4,8,16 bit*/
+  LCT_RGB = 2, /*RGB: 8,16 bit*/
+  LCT_PALETTE = 3, /*palette: 1,2,4,8 bit*/
+  LCT_GREY_ALPHA = 4, /*grayscale with alpha: 8,16 bit*/
+  LCT_RGBA = 6, /*RGB with alpha: 8,16 bit*/
+  /*LCT_MAX_OCTET_VALUE lets the compiler allow this enum to represent any invalid
+  byte value from 0 to 255 that could be present in an invalid PNG file header. Do
+  not use, compare with or set the name LCT_MAX_OCTET_VALUE, instead either use
+  the valid color type names above, or numeric values like 1 or 7 when checking for
+  particular disallowed color type byte values, or cast to integer to print it.*/
+  LCT_MAX_OCTET_VALUE = 255
+} LodePNGColorType;
+
+#ifdef LODEPNG_COMPILE_DECODER
+/*
+Converts PNG data in memory to raw pixel data.
+out: Output parameter. Pointer to buffer that will contain the raw pixel data.
+     After decoding, its size is w * h * (bytes per pixel) bytes larger than
+     initially. Bytes per pixel depends on colortype and bitdepth.
+     Must be freed after usage with free(*out).
+     Note: for 16-bit per channel colors, uses big endian format like PNG does.
+w: Output parameter. Pointer to width of pixel data.
+h: Output parameter. Pointer to height of pixel data.
+in: Memory buffer with the PNG file.
+insize: size of the in buffer.
+colortype: the desired color type for the raw output image. See explanation on PNG color types.
+bitdepth: the desired bit depth for the raw output image. See explanation on PNG color types.
+Return value: LodePNG error code (0 means no error).
+*/
+unsigned lodepng_decode_memory(unsigned char** out, unsigned* w, unsigned* h,
+                               const unsigned char* in, size_t insize,
+                               LodePNGColorType colortype, unsigned bitdepth);
+
+/*Same as lodepng_decode_memory, but always decodes to 32-bit RGBA raw image*/
+unsigned lodepng_decode32(unsigned char** out, unsigned* w, unsigned* h,
+                          const unsigned char* in, size_t insize);
+
+/*Same as lodepng_decode_memory, but always decodes to 24-bit RGB raw image*/
+unsigned lodepng_decode24(unsigned char** out, unsigned* w, unsigned* h,
+                          const unsigned char* in, size_t insize);
+
+#ifdef LODEPNG_COMPILE_DISK
+/*
+Load PNG from disk, from file with given name.
+Same as the other decode functions, but instead takes a filename as input.
+*/
+unsigned lodepng_decode_file(unsigned char** out, unsigned* w, unsigned* h,
+                             const char* filename,
+                             LodePNGColorType colortype, unsigned bitdepth);
+
+/*Same as lodepng_decode_file, but always decodes to 32-bit RGBA raw image.*/
+unsigned lodepng_decode32_file(unsigned char** out, unsigned* w, unsigned* h,
+                               const char* filename);
+
+/*Same as lodepng_decode_file, but always decodes to 24-bit RGB raw image.*/
+unsigned lodepng_decode24_file(unsigned char** out, unsigned* w, unsigned* h,
+                               const char* filename);
+#endif /*LODEPNG_COMPILE_DISK*/
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+
+#ifdef LODEPNG_COMPILE_ENCODER
+/*
+Converts raw pixel data into a PNG image in memory. The colortype and bitdepth
+  of the output PNG image cannot be chosen, they are automatically determined
+  by the colortype, bitdepth and content of the input pixel data.
+  Note: for 16-bit per channel colors, needs big endian format like PNG does.
+out: Output parameter. Pointer to buffer that will contain the PNG image data.
+     Must be freed after usage with free(*out).
+outsize: Output parameter. Pointer to the size in bytes of the out buffer.
+image: The raw pixel data to encode. The size of this buffer should be
+       w * h * (bytes per pixel), bytes per pixel depends on colortype and bitdepth.
+w: width of the raw pixel data in pixels.
+h: height of the raw pixel data in pixels.
+colortype: the color type of the raw input image. See explanation on PNG color types.
+bitdepth: the bit depth of the raw input image. See explanation on PNG color types.
+Return value: LodePNG error code (0 means no error).
+*/
+unsigned lodepng_encode_memory(unsigned char** out, size_t* outsize,
+                               const unsigned char* image, unsigned w, unsigned h,
+                               LodePNGColorType colortype, unsigned bitdepth);
+
+/*Same as lodepng_encode_memory, but always encodes from 32-bit RGBA raw image.*/
+unsigned lodepng_encode32(unsigned char** out, size_t* outsize,
+                          const unsigned char* image, unsigned w, unsigned h);
+
+/*Same as lodepng_encode_memory, but always encodes from 24-bit RGB raw image.*/
+unsigned lodepng_encode24(unsigned char** out, size_t* outsize,
+                          const unsigned char* image, unsigned w, unsigned h);
+
+#ifdef LODEPNG_COMPILE_DISK
+/*
+Converts raw pixel data into a PNG file on disk.
+Same as the other encode functions, but instead takes a filename as output.
+NOTE: This overwrites existing files without warning!
+*/
+unsigned lodepng_encode_file(const char* filename,
+                             const unsigned char* image, unsigned w, unsigned h,
+                             LodePNGColorType colortype, unsigned bitdepth);
+
+/*Same as lodepng_encode_file, but always encodes from 32-bit RGBA raw image.*/
+unsigned lodepng_encode32_file(const char* filename,
+                               const unsigned char* image, unsigned w, unsigned h);
+
+/*Same as lodepng_encode_file, but always encodes from 24-bit RGB raw image.*/
+unsigned lodepng_encode24_file(const char* filename,
+                               const unsigned char* image, unsigned w, unsigned h);
+#endif /*LODEPNG_COMPILE_DISK*/
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+
+#ifdef LODEPNG_COMPILE_CPP
+namespace lodepng {
+#ifdef LODEPNG_COMPILE_DECODER
+/*Same as lodepng_decode_memory, but decodes to an std::vector. The colortype
+is the format to output the pixels to. Default is RGBA 8-bit per channel.*/
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+                const unsigned char* in, size_t insize,
+                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+                const std::vector<unsigned char>& in,
+                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
+#ifdef LODEPNG_COMPILE_DISK
+/*
+Converts PNG file from disk to raw pixel data in memory.
+Same as the other decode functions, but instead takes a filename as input.
+*/
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+                const std::string& filename,
+                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
+#endif /* LODEPNG_COMPILE_DISK */
+#endif /* LODEPNG_COMPILE_DECODER */
+
+#ifdef LODEPNG_COMPILE_ENCODER
+/*Same as lodepng_encode_memory, but encodes to an std::vector. colortype
+is that of the raw input data. The output PNG color type will be auto chosen.*/
+unsigned encode(std::vector<unsigned char>& out,
+                const unsigned char* in, unsigned w, unsigned h,
+                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
+unsigned encode(std::vector<unsigned char>& out,
+                const std::vector<unsigned char>& in, unsigned w, unsigned h,
+                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
+#ifdef LODEPNG_COMPILE_DISK
+/*
+Converts 32-bit RGBA raw pixel data into a PNG file on disk.
+Same as the other encode functions, but instead takes a filename as output.
+NOTE: This overwrites existing files without warning!
+*/
+unsigned encode(const std::string& filename,
+                const unsigned char* in, unsigned w, unsigned h,
+                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
+unsigned encode(const std::string& filename,
+                const std::vector<unsigned char>& in, unsigned w, unsigned h,
+                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
+#endif /* LODEPNG_COMPILE_DISK */
+#endif /* LODEPNG_COMPILE_ENCODER */
+} /* namespace lodepng */
+#endif /*LODEPNG_COMPILE_CPP*/
+#endif /*LODEPNG_COMPILE_PNG*/
+
+#ifdef LODEPNG_COMPILE_ERROR_TEXT
+/*Returns an English description of the numerical error code.*/
+const char* lodepng_error_text(unsigned code);
+#endif /*LODEPNG_COMPILE_ERROR_TEXT*/
+
+#ifdef LODEPNG_COMPILE_DECODER
+/*Settings for zlib decompression*/
+typedef struct LodePNGDecompressSettings LodePNGDecompressSettings;
+struct LodePNGDecompressSettings {
+  /* Check LodePNGDecoderSettings for more ignorable errors such as ignore_crc */
+  unsigned ignore_adler32; /*if 1, continue and don't give an error message if the Adler32 checksum is corrupted*/
+  unsigned ignore_nlen; /*ignore complement of len checksum in uncompressed blocks*/
+
+  /*Maximum decompressed size, beyond this the decoder may (and is encouraged to) stop decoding,
+  return an error, output a data size > max_output_size and all the data up to that point. This is
+  not hard limit nor a guarantee, but can prevent excessive memory usage. This setting is
+  ignored by the PNG decoder, but is used by the deflate/zlib decoder and can be used by custom ones.
+  Set to 0 to impose no limit (the default).*/
+  size_t max_output_size;
+
+  /*use custom zlib decoder instead of built in one (default: null).
+  Should return 0 if success, any non-0 if error (numeric value not exposed).*/
+  unsigned (*custom_zlib)(unsigned char**, size_t*,
+                          const unsigned char*, size_t,
+                          const LodePNGDecompressSettings*);
+  /*use custom deflate decoder instead of built in one (default: null)
+  if custom_zlib is not null, custom_inflate is ignored (the zlib format uses deflate).
+  Should return 0 if success, any non-0 if error (numeric value not exposed).*/
+  unsigned (*custom_inflate)(unsigned char**, size_t*,
+                             const unsigned char*, size_t,
+                             const LodePNGDecompressSettings*);
+
+  const void* custom_context; /*optional custom settings for custom functions*/
+};
+
+extern const LodePNGDecompressSettings lodepng_default_decompress_settings;
+void lodepng_decompress_settings_init(LodePNGDecompressSettings* settings);
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+#ifdef LODEPNG_COMPILE_ENCODER
+/*
+Settings for zlib compression. Tweaking these settings tweaks the balance
+between speed and compression ratio.
+*/
+typedef struct LodePNGCompressSettings LodePNGCompressSettings;
+struct LodePNGCompressSettings /*deflate = compress*/ {
+  /*LZ77 related settings*/
+  unsigned btype; /*the block type for LZ (0, 1, 2 or 3, see zlib standard). Should be 2 for proper compression.*/
+  unsigned use_lz77; /*whether or not to use LZ77. Should be 1 for proper compression.*/
+  unsigned windowsize; /*must be a power of two <= 32768. higher compresses more but is slower. Default value: 2048.*/
+  unsigned minmatch; /*minimum lz77 length. 3 is normally best, 6 can be better for some PNGs. Default: 0*/
+  unsigned nicematch; /*stop searching if >= this length found. Set to 258 for best compression. Default: 128*/
+  unsigned lazymatching; /*use lazy matching: better compression but a bit slower. Default: true*/
+
+  /*use custom zlib encoder instead of built in one (default: null)*/
+  unsigned (*custom_zlib)(unsigned char**, size_t*,
+                          const unsigned char*, size_t,
+                          const LodePNGCompressSettings*);
+  /*use custom deflate encoder instead of built in one (default: null)
+  if custom_zlib is used, custom_deflate is ignored since only the built in
+  zlib function will call custom_deflate*/
+  unsigned (*custom_deflate)(unsigned char**, size_t*,
+                             const unsigned char*, size_t,
+                             const LodePNGCompressSettings*);
+
+  const void* custom_context; /*optional custom settings for custom functions*/
+};
+
+extern const LodePNGCompressSettings lodepng_default_compress_settings;
+void lodepng_compress_settings_init(LodePNGCompressSettings* settings);
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+#ifdef LODEPNG_COMPILE_PNG
+/*
+Color mode of an image. Contains all information required to decode the pixel
+bits to RGBA colors. This information is the same as used in the PNG file
+format, and is used both for PNG and raw image data in LodePNG.
+*/
+typedef struct LodePNGColorMode {
+  /*header (IHDR)*/
+  LodePNGColorType colortype; /*color type, see PNG standard or documentation further in this header file*/
+  unsigned bitdepth;  /*bits per sample, see PNG standard or documentation further in this header file*/
+
+  /*
+  palette (PLTE and tRNS)
+
+  Dynamically allocated with the colors of the palette, including alpha.
+  This field may not be allocated directly, use lodepng_color_mode_init first,
+  then lodepng_palette_add per color to correctly initialize it (to ensure size
+  of exactly 1024 bytes).
+
+  The alpha channels must be set as well, set them to 255 for opaque images.
+
+  When decoding, by default you can ignore this palette, since LodePNG already
+  fills the palette colors in the pixels of the raw RGBA output.
+
+  The palette is only supported for color type 3.
+  */
+  unsigned char* palette; /*palette in RGBARGBA... order. Must be either 0, or when allocated must have 1024 bytes*/
+  size_t palettesize; /*palette size in number of colors (amount of used bytes is 4 * palettesize)*/
+
+  /*
+  transparent color key (tRNS)
+
+  This color uses the same bit depth as the bitdepth value in this struct, which can be 1-bit to 16-bit.
+  For grayscale PNGs, r, g and b will all 3 be set to the same.
+
+  When decoding, by default you can ignore this information, since LodePNG sets
+  pixels with this key to transparent already in the raw RGBA output.
+
+  The color key is only supported for color types 0 and 2.
+  */
+  unsigned key_defined; /*is a transparent color key given? 0 = false, 1 = true*/
+  unsigned key_r;       /*red/grayscale component of color key*/
+  unsigned key_g;       /*green component of color key*/
+  unsigned key_b;       /*blue component of color key*/
+} LodePNGColorMode;
+
+/*init, cleanup and copy functions to use with this struct*/
+void lodepng_color_mode_init(LodePNGColorMode* info);
+void lodepng_color_mode_cleanup(LodePNGColorMode* info);
+/*return value is error code (0 means no error)*/
+unsigned lodepng_color_mode_copy(LodePNGColorMode* dest, const LodePNGColorMode* source);
+/* Makes a temporary LodePNGColorMode that does not need cleanup (no palette) */
+LodePNGColorMode lodepng_color_mode_make(LodePNGColorType colortype, unsigned bitdepth);
+
+void lodepng_palette_clear(LodePNGColorMode* info);
+/*add 1 color to the palette*/
+unsigned lodepng_palette_add(LodePNGColorMode* info,
+                             unsigned char r, unsigned char g, unsigned char b, unsigned char a);
+
+/*get the total amount of bits per pixel, based on colortype and bitdepth in the struct*/
+unsigned lodepng_get_bpp(const LodePNGColorMode* info);
+/*get the amount of color channels used, based on colortype in the struct.
+If a palette is used, it counts as 1 channel.*/
+unsigned lodepng_get_channels(const LodePNGColorMode* info);
+/*is it a grayscale type? (only colortype 0 or 4)*/
+unsigned lodepng_is_greyscale_type(const LodePNGColorMode* info);
+/*has it got an alpha channel? (only colortype 2 or 6)*/
+unsigned lodepng_is_alpha_type(const LodePNGColorMode* info);
+/*has it got a palette? (only colortype 3)*/
+unsigned lodepng_is_palette_type(const LodePNGColorMode* info);
+/*only returns true if there is a palette and there is a value in the palette with alpha < 255.
+Loops through the palette to check this.*/
+unsigned lodepng_has_palette_alpha(const LodePNGColorMode* info);
+/*
+Check if the given color info indicates the possibility of having non-opaque pixels in the PNG image.
+Returns true if the image can have translucent or invisible pixels (it still be opaque if it doesn't use such pixels).
+Returns false if the image can only have opaque pixels.
+In detail, it returns true only if it's a color type with alpha, or has a palette with non-opaque values,
+or if "key_defined" is true.
+*/
+unsigned lodepng_can_have_alpha(const LodePNGColorMode* info);
+/*Returns the byte size of a raw image buffer with given width, height and color mode*/
+size_t lodepng_get_raw_size(unsigned w, unsigned h, const LodePNGColorMode* color);
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+/*The information of a Time chunk in PNG.*/
+typedef struct LodePNGTime {
+  unsigned year;    /*2 bytes used (0-65535)*/
+  unsigned month;   /*1-12*/
+  unsigned day;     /*1-31*/
+  unsigned hour;    /*0-23*/
+  unsigned minute;  /*0-59*/
+  unsigned second;  /*0-60 (to allow for leap seconds)*/
+} LodePNGTime;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+
+/*Information about the PNG image, except pixels, width and height.*/
+typedef struct LodePNGInfo {
+  /*header (IHDR), palette (PLTE) and transparency (tRNS) chunks*/
+  unsigned compression_method;/*compression method of the original file. Always 0.*/
+  unsigned filter_method;     /*filter method of the original file*/
+  unsigned interlace_method;  /*interlace method of the original file: 0=none, 1=Adam7*/
+  LodePNGColorMode color;     /*color type and bits, palette and transparency of the PNG file*/
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+  /*
+  Suggested background color chunk (bKGD)
+
+  This uses the same color mode and bit depth as the PNG (except no alpha channel),
+  with values truncated to the bit depth in the unsigned integer.
+
+  For grayscale and palette PNGs, the value is stored in background_r. The values
+  in background_g and background_b are then unused.
+
+  So when decoding, you may get these in a different color mode than the one you requested
+  for the raw pixels.
+
+  When encoding with auto_convert, you must use the color model defined in info_png.color for
+  these values. The encoder normally ignores info_png.color when auto_convert is on, but will
+  use it to interpret these values (and convert copies of them to its chosen color model).
+
+  When encoding, avoid setting this to an expensive color, such as a non-gray value
+  when the image is gray, or the compression will be worse since it will be forced to
+  write the PNG with a more expensive color mode (when auto_convert is on).
+
+  The decoder does not use this background color to edit the color of pixels. This is a
+  completely optional metadata feature.
+  */
+  unsigned background_defined; /*is a suggested background color given?*/
+  unsigned background_r;       /*red/gray/palette component of suggested background color*/
+  unsigned background_g;       /*green component of suggested background color*/
+  unsigned background_b;       /*blue component of suggested background color*/
+
+  /*
+  Non-international text chunks (tEXt and zTXt)
+
+  The char** arrays each contain num strings. The actual messages are in
+  text_strings, while text_keys are keywords that give a short description what
+  the actual text represents, e.g. Title, Author, Description, or anything else.
+
+  All the string fields below including strings, keys, names and language tags are null terminated.
+  The PNG specification uses null characters for the keys, names and tags, and forbids null
+  characters to appear in the main text which is why we can use null termination everywhere here.
+
+  A keyword is minimum 1 character and maximum 79 characters long (plus the
+  additional null terminator). It's discouraged to use a single line length
+  longer than 79 characters for texts.
+
+  Don't allocate these text buffers yourself. Use the init/cleanup functions
+  correctly and use lodepng_add_text and lodepng_clear_text.
+
+  Standard text chunk keywords and strings are encoded using Latin-1.
+  */
+  size_t text_num; /*the amount of texts in these char** buffers (there may be more texts in itext)*/
+  char** text_keys; /*the keyword of a text chunk (e.g. "Comment")*/
+  char** text_strings; /*the actual text*/
+
+  /*
+  International text chunks (iTXt)
+  Similar to the non-international text chunks, but with additional strings
+  "langtags" and "transkeys", and the following text encodings are used:
+  keys: Latin-1, langtags: ASCII, transkeys and strings: UTF-8.
+  keys must be 1-79 characters (plus the additional null terminator), the other
+  strings are any length.
+  */
+  size_t itext_num; /*the amount of international texts in this PNG*/
+  char** itext_keys; /*the English keyword of the text chunk (e.g. "Comment")*/
+  char** itext_langtags; /*language tag for this text's language, ISO/IEC 646 string, e.g. ISO 639 language tag*/
+  char** itext_transkeys; /*keyword translated to the international language - UTF-8 string*/
+  char** itext_strings; /*the actual international text - UTF-8 string*/
+
+  /*time chunk (tIME)*/
+  unsigned time_defined; /*set to 1 to make the encoder generate a tIME chunk*/
+  LodePNGTime time;
+
+  /*phys chunk (pHYs)*/
+  unsigned phys_defined; /*if 0, there is no pHYs chunk and the values below are undefined, if 1 else there is one*/
+  unsigned phys_x; /*pixels per unit in x direction*/
+  unsigned phys_y; /*pixels per unit in y direction*/
+  unsigned phys_unit; /*may be 0 (unknown unit) or 1 (metre)*/
+
+  /*
+  Color profile related chunks: gAMA, cHRM, sRGB, iCPP
+
+  LodePNG does not apply any color conversions on pixels in the encoder or decoder and does not interpret these color
+  profile values. It merely passes on the information. If you wish to use color profiles and convert colors, please
+  use these values with a color management library.
+
+  See the PNG, ICC and sRGB specifications for more information about the meaning of these values.
+  */
+
+  /* gAMA chunk: optional, overridden by sRGB or iCCP if those are present. */
+  unsigned gama_defined; /* Whether a gAMA chunk is present (0 = not present, 1 = present). */
+  unsigned gama_gamma;   /* Gamma exponent times 100000 */
+
+  /* cHRM chunk: optional, overridden by sRGB or iCCP if those are present. */
+  unsigned chrm_defined; /* Whether a cHRM chunk is present (0 = not present, 1 = present). */
+  unsigned chrm_white_x; /* White Point x times 100000 */
+  unsigned chrm_white_y; /* White Point y times 100000 */
+  unsigned chrm_red_x;   /* Red x times 100000 */
+  unsigned chrm_red_y;   /* Red y times 100000 */
+  unsigned chrm_green_x; /* Green x times 100000 */
+  unsigned chrm_green_y; /* Green y times 100000 */
+  unsigned chrm_blue_x;  /* Blue x times 100000 */
+  unsigned chrm_blue_y;  /* Blue y times 100000 */
+
+  /*
+  sRGB chunk: optional. May not appear at the same time as iCCP.
+  If gAMA is also present gAMA must contain value 45455.
+  If cHRM is also present cHRM must contain respectively 31270,32900,64000,33000,30000,60000,15000,6000.
+  */
+  unsigned srgb_defined; /* Whether an sRGB chunk is present (0 = not present, 1 = present). */
+  unsigned srgb_intent;  /* Rendering intent: 0=perceptual, 1=rel. colorimetric, 2=saturation, 3=abs. colorimetric */
+
+  /*
+  iCCP chunk: optional. May not appear at the same time as sRGB.
+
+  LodePNG does not parse or use the ICC profile (except its color space header field for an edge case), a
+  separate library to handle the ICC data (not included in LodePNG) format is needed to use it for color
+  management and conversions.
+
+  For encoding, if iCCP is present, gAMA and cHRM are recommended to be added as well with values that match the ICC
+  profile as closely as possible, if you wish to do this you should provide the correct values for gAMA and cHRM and
+  enable their '_defined' flags since LodePNG will not automatically compute them from the ICC profile.
+
+  For encoding, the ICC profile is required by the PNG specification to be an "RGB" profile for non-gray
+  PNG color types and a "GRAY" profile for gray PNG color types. If you disable auto_convert, you must ensure
+  the ICC profile type matches your requested color type, else the encoder gives an error. If auto_convert is
+  enabled (the default), and the ICC profile is not a good match for the pixel data, this will result in an encoder
+  error if the pixel data has non-gray pixels for a GRAY profile, or a silent less-optimal compression of the pixel
+  data if the pixels could be encoded as grayscale but the ICC profile is RGB.
+
+  To avoid this do not set an ICC profile in the image unless there is a good reason for it, and when doing so
+  make sure you compute it carefully to avoid the above problems.
+  */
+  unsigned iccp_defined;      /* Whether an iCCP chunk is present (0 = not present, 1 = present). */
+  char* iccp_name;            /* Null terminated string with profile name, 1-79 bytes */
+  /*
+  The ICC profile in iccp_profile_size bytes.
+  Don't allocate this buffer yourself. Use the init/cleanup functions
+  correctly and use lodepng_set_icc and lodepng_clear_icc.
+  */
+  unsigned char* iccp_profile;
+  unsigned iccp_profile_size; /* The size of iccp_profile in bytes */
+
+  /* End of color profile related chunks */
+
+
+  /*
+  unknown chunks: chunks not known by LodePNG, passed on byte for byte.
+
+  There are 3 buffers, one for each position in the PNG where unknown chunks can appear.
+  Each buffer contains all unknown chunks for that position consecutively.
+  The 3 positions are:
+  0: between IHDR and PLTE, 1: between PLTE and IDAT, 2: between IDAT and IEND.
+
+  For encoding, do not store critical chunks or known chunks that are enabled with a "_defined" flag
+  above in here, since the encoder will blindly follow this and could then encode an invalid PNG file
+  (such as one with two IHDR chunks or the disallowed combination of sRGB with iCCP). But do use
+  this if you wish to store an ancillary chunk that is not supported by LodePNG (such as sPLT or hIST),
+  or any non-standard PNG chunk.
+
+  Do not allocate or traverse this data yourself. Use the chunk traversing functions declared
+  later, such as lodepng_chunk_next and lodepng_chunk_append, to read/write this struct.
+  */
+  unsigned char* unknown_chunks_data[3];
+  size_t unknown_chunks_size[3]; /*size in bytes of the unknown chunks, given for protection*/
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+} LodePNGInfo;
+
+/*init, cleanup and copy functions to use with this struct*/
+void lodepng_info_init(LodePNGInfo* info);
+void lodepng_info_cleanup(LodePNGInfo* info);
+/*return value is error code (0 means no error)*/
+unsigned lodepng_info_copy(LodePNGInfo* dest, const LodePNGInfo* source);
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+unsigned lodepng_add_text(LodePNGInfo* info, const char* key, const char* str); /*push back both texts at once*/
+void lodepng_clear_text(LodePNGInfo* info); /*use this to clear the texts again after you filled them in*/
+
+unsigned lodepng_add_itext(LodePNGInfo* info, const char* key, const char* langtag,
+                           const char* transkey, const char* str); /*push back the 4 texts of 1 chunk at once*/
+void lodepng_clear_itext(LodePNGInfo* info); /*use this to clear the itexts again after you filled them in*/
+
+/*replaces if exists*/
+unsigned lodepng_set_icc(LodePNGInfo* info, const char* name, const unsigned char* profile, unsigned profile_size);
+void lodepng_clear_icc(LodePNGInfo* info); /*use this to clear the texts again after you filled them in*/
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+
+/*
+Converts raw buffer from one color type to another color type, based on
+LodePNGColorMode structs to describe the input and output color type.
+See the reference manual at the end of this header file to see which color conversions are supported.
+return value = LodePNG error code (0 if all went ok, an error if the conversion isn't supported)
+The out buffer must have size (w * h * bpp + 7) / 8, where bpp is the bits per pixel
+of the output color type (lodepng_get_bpp).
+For < 8 bpp images, there should not be padding bits at the end of scanlines.
+For 16-bit per channel colors, uses big endian format like PNG does.
+Return value is LodePNG error code
+*/
+unsigned lodepng_convert(unsigned char* out, const unsigned char* in,
+                         const LodePNGColorMode* mode_out, const LodePNGColorMode* mode_in,
+                         unsigned w, unsigned h);
+
+#ifdef LODEPNG_COMPILE_DECODER
+/*
+Settings for the decoder. This contains settings for the PNG and the Zlib
+decoder, but not the Info settings from the Info structs.
+*/
+typedef struct LodePNGDecoderSettings {
+  LodePNGDecompressSettings zlibsettings; /*in here is the setting to ignore Adler32 checksums*/
+
+  /* Check LodePNGDecompressSettings for more ignorable errors such as ignore_adler32 */
+  unsigned ignore_crc; /*ignore CRC checksums*/
+  unsigned ignore_critical; /*ignore unknown critical chunks*/
+  unsigned ignore_end; /*ignore issues at end of file if possible (missing IEND chunk, too large chunk, ...)*/
+  /* TODO: make a system involving warnings with levels and a strict mode instead. Other potentially recoverable
+     errors: srgb rendering intent value, size of content of ancillary chunks, more than 79 characters for some
+     strings, placement/combination rules for ancillary chunks, crc of unknown chunks, allowed characters
+     in string keys, etc... */
+
+  unsigned color_convert; /*whether to convert the PNG to the color type you want. Default: yes*/
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+  unsigned read_text_chunks; /*if false but remember_unknown_chunks is true, they're stored in the unknown chunks*/
+
+  /*store all bytes from unknown chunks in the LodePNGInfo (off by default, useful for a png editor)*/
+  unsigned remember_unknown_chunks;
+
+  /* maximum size for decompressed text chunks. If a text chunk's text is larger than this, an error is returned,
+  unless reading text chunks is disabled or this limit is set higher or disabled. Set to 0 to allow any size.
+  By default it is a value that prevents unreasonably large strings from hogging memory. */
+  size_t max_text_size;
+
+  /* maximum size for compressed ICC chunks. If the ICC profile is larger than this, an error will be returned. Set to
+  0 to allow any size. By default this is a value that prevents ICC profiles that would be much larger than any
+  legitimate profile could be to hog memory. */
+  size_t max_icc_size;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+} LodePNGDecoderSettings;
+
+void lodepng_decoder_settings_init(LodePNGDecoderSettings* settings);
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+#ifdef LODEPNG_COMPILE_ENCODER
+/*automatically use color type with less bits per pixel if losslessly possible. Default: AUTO*/
+typedef enum LodePNGFilterStrategy {
+  /*every filter at zero*/
+  LFS_ZERO = 0,
+  /*every filter at 1, 2, 3 or 4 (paeth), unlike LFS_ZERO not a good choice, but for testing*/
+  LFS_ONE = 1,
+  LFS_TWO = 2,
+  LFS_THREE = 3,
+  LFS_FOUR = 4,
+  /*Use filter that gives minimum sum, as described in the official PNG filter heuristic.*/
+  LFS_MINSUM,
+  /*Use the filter type that gives smallest Shannon entropy for this scanline. Depending
+  on the image, this is better or worse than minsum.*/
+  LFS_ENTROPY,
+  /*
+  Brute-force-search PNG filters by compressing each filter for each scanline.
+  Experimental, very slow, and only rarely gives better compression than MINSUM.
+  */
+  LFS_BRUTE_FORCE,
+  /*use predefined_filters buffer: you specify the filter type for each scanline*/
+  LFS_PREDEFINED
+} LodePNGFilterStrategy;
+
+/*Gives characteristics about the integer RGBA colors of the image (count, alpha channel usage, bit depth, ...),
+which helps decide which color model to use for encoding.
+Used internally by default if "auto_convert" is enabled. Public because it's useful for custom algorithms.*/
+typedef struct LodePNGColorStats {
+  unsigned colored; /*not grayscale*/
+  unsigned key; /*image is not opaque and color key is possible instead of full alpha*/
+  unsigned short key_r; /*key values, always as 16-bit, in 8-bit case the byte is duplicated, e.g. 65535 means 255*/
+  unsigned short key_g;
+  unsigned short key_b;
+  unsigned alpha; /*image is not opaque and alpha channel or alpha palette required*/
+  unsigned numcolors; /*amount of colors, up to 257. Not valid if bits == 16 or allow_palette is disabled.*/
+  unsigned char palette[1024]; /*Remembers up to the first 256 RGBA colors, in no particular order, only valid when numcolors is valid*/
+  unsigned bits; /*bits per channel (not for palette). 1,2 or 4 for grayscale only. 16 if 16-bit per channel required.*/
+  size_t numpixels;
+
+  /*user settings for computing/using the stats*/
+  unsigned allow_palette; /*default 1. if 0, disallow choosing palette colortype in auto_choose_color, and don't count numcolors*/
+  unsigned allow_greyscale; /*default 1. if 0, choose RGB or RGBA even if the image only has gray colors*/
+} LodePNGColorStats;
+
+void lodepng_color_stats_init(LodePNGColorStats* stats);
+
+/*Get a LodePNGColorStats of the image. The stats must already have been inited.
+Returns error code (e.g. alloc fail) or 0 if ok.*/
+unsigned lodepng_compute_color_stats(LodePNGColorStats* stats,
+                                     const unsigned char* image, unsigned w, unsigned h,
+                                     const LodePNGColorMode* mode_in);
+
+/*Settings for the encoder.*/
+typedef struct LodePNGEncoderSettings {
+  LodePNGCompressSettings zlibsettings; /*settings for the zlib encoder, such as window size, ...*/
+
+  unsigned auto_convert; /*automatically choose output PNG color type. Default: true*/
+
+  /*If true, follows the official PNG heuristic: if the PNG uses a palette or lower than
+  8 bit depth, set all filters to zero. Otherwise use the filter_strategy. Note that to
+  completely follow the official PNG heuristic, filter_palette_zero must be true and
+  filter_strategy must be LFS_MINSUM*/
+  unsigned filter_palette_zero;
+  /*Which filter strategy to use when not using zeroes due to filter_palette_zero.
+  Set filter_palette_zero to 0 to ensure always using your chosen strategy. Default: LFS_MINSUM*/
+  LodePNGFilterStrategy filter_strategy;
+  /*used if filter_strategy is LFS_PREDEFINED. In that case, this must point to a buffer with
+  the same length as the amount of scanlines in the image, and each value must <= 5. You
+  have to cleanup this buffer, LodePNG will never free it. Don't forget that filter_palette_zero
+  must be set to 0 to ensure this is also used on palette or low bitdepth images.*/
+  const unsigned char* predefined_filters;
+
+  /*force creating a PLTE chunk if colortype is 2 or 6 (= a suggested palette).
+  If colortype is 3, PLTE is _always_ created.*/
+  unsigned force_palette;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+  /*add LodePNG identifier and version as a text chunk, for debugging*/
+  unsigned add_id;
+  /*encode text chunks as zTXt chunks instead of tEXt chunks, and use compression in iTXt chunks*/
+  unsigned text_compression;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+} LodePNGEncoderSettings;
+
+void lodepng_encoder_settings_init(LodePNGEncoderSettings* settings);
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+
+#if defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER)
+/*The settings, state and information for extended encoding and decoding.*/
+typedef struct LodePNGState {
+#ifdef LODEPNG_COMPILE_DECODER
+  LodePNGDecoderSettings decoder; /*the decoding settings*/
+#endif /*LODEPNG_COMPILE_DECODER*/
+#ifdef LODEPNG_COMPILE_ENCODER
+  LodePNGEncoderSettings encoder; /*the encoding settings*/
+#endif /*LODEPNG_COMPILE_ENCODER*/
+  LodePNGColorMode info_raw; /*specifies the format in which you would like to get the raw pixel buffer*/
+  LodePNGInfo info_png; /*info of the PNG image obtained after decoding*/
+  unsigned error;
+} LodePNGState;
+
+/*init, cleanup and copy functions to use with this struct*/
+void lodepng_state_init(LodePNGState* state);
+void lodepng_state_cleanup(LodePNGState* state);
+void lodepng_state_copy(LodePNGState* dest, const LodePNGState* source);
+#endif /* defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER) */
+
+#ifdef LODEPNG_COMPILE_DECODER
+/*
+Same as lodepng_decode_memory, but uses a LodePNGState to allow custom settings and
+getting much more information about the PNG image and color mode.
+*/
+unsigned lodepng_decode(unsigned char** out, unsigned* w, unsigned* h,
+                        LodePNGState* state,
+                        const unsigned char* in, size_t insize);
+
+/*
+Read the PNG header, but not the actual data. This returns only the information
+that is in the IHDR chunk of the PNG, such as width, height and color type. The
+information is placed in the info_png field of the LodePNGState.
+*/
+unsigned lodepng_inspect(unsigned* w, unsigned* h,
+                         LodePNGState* state,
+                         const unsigned char* in, size_t insize);
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+/*
+Reads one metadata chunk (other than IHDR) of the PNG file and outputs what it
+read in the state. Returns error code on failure.
+Use lodepng_inspect first with a new state, then e.g. lodepng_chunk_find_const
+to find the desired chunk type, and if non null use lodepng_inspect_chunk (with
+chunk_pointer - start_of_file as pos).
+Supports most metadata chunks from the PNG standard (gAMA, bKGD, tEXt, ...).
+Ignores unsupported, unknown, non-metadata or IHDR chunks (without error).
+Requirements: &in[pos] must point to start of a chunk, must use regular
+lodepng_inspect first since format of most other chunks depends on IHDR, and if
+there is a PLTE chunk, that one must be inspected before tRNS or bKGD.
+*/
+unsigned lodepng_inspect_chunk(LodePNGState* state, size_t pos,
+                               const unsigned char* in, size_t insize);
+
+#ifdef LODEPNG_COMPILE_ENCODER
+/*This function allocates the out buffer with standard malloc and stores the size in *outsize.*/
+unsigned lodepng_encode(unsigned char** out, size_t* outsize,
+                        const unsigned char* image, unsigned w, unsigned h,
+                        LodePNGState* state);
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+/*
+The lodepng_chunk functions are normally not needed, except to traverse the
+unknown chunks stored in the LodePNGInfo struct, or add new ones to it.
+It also allows traversing the chunks of an encoded PNG file yourself.
+
+The chunk pointer always points to the beginning of the chunk itself, that is
+the first byte of the 4 length bytes.
+
+In the PNG file format, chunks have the following format:
+-4 bytes length: length of the data of the chunk in bytes (chunk itself is 12 bytes longer)
+-4 bytes chunk type (ASCII a-z,A-Z only, see below)
+-length bytes of data (may be 0 bytes if length was 0)
+-4 bytes of CRC, computed on chunk name + data
+
+The first chunk starts at the 8th byte of the PNG file, the entire rest of the file
+exists out of concatenated chunks with the above format.
+
+PNG standard chunk ASCII naming conventions:
+-First byte: uppercase = critical, lowercase = ancillary
+-Second byte: uppercase = public, lowercase = private
+-Third byte: must be uppercase
+-Fourth byte: uppercase = unsafe to copy, lowercase = safe to copy
+*/
+
+/*
+Gets the length of the data of the chunk. Total chunk length has 12 bytes more.
+There must be at least 4 bytes to read from. If the result value is too large,
+it may be corrupt data.
+*/
+unsigned lodepng_chunk_length(const unsigned char* chunk);
+
+/*puts the 4-byte type in null terminated string*/
+void lodepng_chunk_type(char type[5], const unsigned char* chunk);
+
+/*check if the type is the given type*/
+unsigned char lodepng_chunk_type_equals(const unsigned char* chunk, const char* type);
+
+/*0: it's one of the critical chunk types, 1: it's an ancillary chunk (see PNG standard)*/
+unsigned char lodepng_chunk_ancillary(const unsigned char* chunk);
+
+/*0: public, 1: private (see PNG standard)*/
+unsigned char lodepng_chunk_private(const unsigned char* chunk);
+
+/*0: the chunk is unsafe to copy, 1: the chunk is safe to copy (see PNG standard)*/
+unsigned char lodepng_chunk_safetocopy(const unsigned char* chunk);
+
+/*get pointer to the data of the chunk, where the input points to the header of the chunk*/
+unsigned char* lodepng_chunk_data(unsigned char* chunk);
+const unsigned char* lodepng_chunk_data_const(const unsigned char* chunk);
+
+/*returns 0 if the crc is correct, 1 if it's incorrect (0 for OK as usual!)*/
+unsigned lodepng_chunk_check_crc(const unsigned char* chunk);
+
+/*generates the correct CRC from the data and puts it in the last 4 bytes of the chunk*/
+void lodepng_chunk_generate_crc(unsigned char* chunk);
+
+/*
+Iterate to next chunks, allows iterating through all chunks of the PNG file.
+Input must be at the beginning of a chunk (result of a previous lodepng_chunk_next call,
+or the 8th byte of a PNG file which always has the first chunk), or alternatively may
+point to the first byte of the PNG file (which is not a chunk but the magic header, the
+function will then skip over it and return the first real chunk).
+Will output pointer to the start of the next chunk, or at or beyond end of the file if there
+is no more chunk after this or possibly if the chunk is corrupt.
+Start this process at the 8th byte of the PNG file.
+In a non-corrupt PNG file, the last chunk should have name "IEND".
+*/
+unsigned char* lodepng_chunk_next(unsigned char* chunk, unsigned char* end);
+const unsigned char* lodepng_chunk_next_const(const unsigned char* chunk, const unsigned char* end);
+
+/*Finds the first chunk with the given type in the range [chunk, end), or returns NULL if not found.*/
+unsigned char* lodepng_chunk_find(unsigned char* chunk, unsigned char* end, const char type[5]);
+const unsigned char* lodepng_chunk_find_const(const unsigned char* chunk, const unsigned char* end, const char type[5]);
+
+/*
+Appends chunk to the data in out. The given chunk should already have its chunk header.
+The out variable and outsize are updated to reflect the new reallocated buffer.
+Returns error code (0 if it went ok)
+*/
+unsigned lodepng_chunk_append(unsigned char** out, size_t* outsize, const unsigned char* chunk);
+
+/*
+Appends new chunk to out. The chunk to append is given by giving its length, type
+and data separately. The type is a 4-letter string.
+The out variable and outsize are updated to reflect the new reallocated buffer.
+Returne error code (0 if it went ok)
+*/
+unsigned lodepng_chunk_create(unsigned char** out, size_t* outsize, unsigned length,
+                              const char* type, const unsigned char* data);
+
+
+/*Calculate CRC32 of buffer*/
+unsigned lodepng_crc32(const unsigned char* buf, size_t len);
+#endif /*LODEPNG_COMPILE_PNG*/
+
+
+#ifdef LODEPNG_COMPILE_ZLIB
+/*
+This zlib part can be used independently to zlib compress and decompress a
+buffer. It cannot be used to create gzip files however, and it only supports the
+part of zlib that is required for PNG, it does not support dictionaries.
+*/
+
+#ifdef LODEPNG_COMPILE_DECODER
+/*Inflate a buffer. Inflate is the decompression step of deflate. Out buffer must be freed after use.*/
+unsigned lodepng_inflate(unsigned char** out, size_t* outsize,
+                         const unsigned char* in, size_t insize,
+                         const LodePNGDecompressSettings* settings);
+
+/*
+Decompresses Zlib data. Reallocates the out buffer and appends the data. The
+data must be according to the zlib specification.
+Either, *out must be NULL and *outsize must be 0, or, *out must be a valid
+buffer and *outsize its size in bytes. out must be freed by user after usage.
+*/
+unsigned lodepng_zlib_decompress(unsigned char** out, size_t* outsize,
+                                 const unsigned char* in, size_t insize,
+                                 const LodePNGDecompressSettings* settings);
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+#ifdef LODEPNG_COMPILE_ENCODER
+/*
+Compresses data with Zlib. Reallocates the out buffer and appends the data.
+Zlib adds a small header and trailer around the deflate data.
+The data is output in the format of the zlib specification.
+Either, *out must be NULL and *outsize must be 0, or, *out must be a valid
+buffer and *outsize its size in bytes. out must be freed by user after usage.
+*/
+unsigned lodepng_zlib_compress(unsigned char** out, size_t* outsize,
+                               const unsigned char* in, size_t insize,
+                               const LodePNGCompressSettings* settings);
+
+/*
+Find length-limited Huffman code for given frequencies. This function is in the
+public interface only for tests, it's used internally by lodepng_deflate.
+*/
+unsigned lodepng_huffman_code_lengths(unsigned* lengths, const unsigned* frequencies,
+                                      size_t numcodes, unsigned maxbitlen);
+
+/*Compress a buffer with deflate. See RFC 1951. Out buffer must be freed after use.*/
+unsigned lodepng_deflate(unsigned char** out, size_t* outsize,
+                         const unsigned char* in, size_t insize,
+                         const LodePNGCompressSettings* settings);
+
+#endif /*LODEPNG_COMPILE_ENCODER*/
+#endif /*LODEPNG_COMPILE_ZLIB*/
+
+#ifdef LODEPNG_COMPILE_DISK
+/*
+Load a file from disk into buffer. The function allocates the out buffer, and
+after usage you should free it.
+out: output parameter, contains pointer to loaded buffer.
+outsize: output parameter, size of the allocated out buffer
+filename: the path to the file to load
+return value: error code (0 means ok)
+*/
+unsigned lodepng_load_file(unsigned char** out, size_t* outsize, const char* filename);
+
+/*
+Save a file from buffer to disk. Warning, if it exists, this function overwrites
+the file without warning!
+buffer: the buffer to write
+buffersize: size of the buffer to write
+filename: the path to the file to save to
+return value: error code (0 means ok)
+*/
+unsigned lodepng_save_file(const unsigned char* buffer, size_t buffersize, const char* filename);
+#endif /*LODEPNG_COMPILE_DISK*/
+
+#ifdef LODEPNG_COMPILE_CPP
+/* The LodePNG C++ wrapper uses std::vectors instead of manually allocated memory buffers. */
+namespace lodepng {
+#ifdef LODEPNG_COMPILE_PNG
+class State : public LodePNGState {
+  public:
+    State();
+    State(const State& other);
+    ~State();
+    State& operator=(const State& other);
+};
+
+#ifdef LODEPNG_COMPILE_DECODER
+/* Same as other lodepng::decode, but using a State for more settings and information. */
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+                State& state,
+                const unsigned char* in, size_t insize);
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+                State& state,
+                const std::vector<unsigned char>& in);
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+#ifdef LODEPNG_COMPILE_ENCODER
+/* Same as other lodepng::encode, but using a State for more settings and information. */
+unsigned encode(std::vector<unsigned char>& out,
+                const unsigned char* in, unsigned w, unsigned h,
+                State& state);
+unsigned encode(std::vector<unsigned char>& out,
+                const std::vector<unsigned char>& in, unsigned w, unsigned h,
+                State& state);
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+#ifdef LODEPNG_COMPILE_DISK
+/*
+Load a file from disk into an std::vector.
+return value: error code (0 means ok)
+*/
+unsigned load_file(std::vector<unsigned char>& buffer, const std::string& filename);
+
+/*
+Save the binary data in an std::vector to a file on disk. The file is overwritten
+without warning.
+*/
+unsigned save_file(const std::vector<unsigned char>& buffer, const std::string& filename);
+#endif /* LODEPNG_COMPILE_DISK */
+#endif /* LODEPNG_COMPILE_PNG */
+
+#ifdef LODEPNG_COMPILE_ZLIB
+#ifdef LODEPNG_COMPILE_DECODER
+/* Zlib-decompress an unsigned char buffer */
+unsigned decompress(std::vector<unsigned char>& out, const unsigned char* in, size_t insize,
+                    const LodePNGDecompressSettings& settings = lodepng_default_decompress_settings);
+
+/* Zlib-decompress an std::vector */
+unsigned decompress(std::vector<unsigned char>& out, const std::vector<unsigned char>& in,
+                    const LodePNGDecompressSettings& settings = lodepng_default_decompress_settings);
+#endif /* LODEPNG_COMPILE_DECODER */
+
+#ifdef LODEPNG_COMPILE_ENCODER
+/* Zlib-compress an unsigned char buffer */
+unsigned compress(std::vector<unsigned char>& out, const unsigned char* in, size_t insize,
+                  const LodePNGCompressSettings& settings = lodepng_default_compress_settings);
+
+/* Zlib-compress an std::vector */
+unsigned compress(std::vector<unsigned char>& out, const std::vector<unsigned char>& in,
+                  const LodePNGCompressSettings& settings = lodepng_default_compress_settings);
+#endif /* LODEPNG_COMPILE_ENCODER */
+#endif /* LODEPNG_COMPILE_ZLIB */
+} /* namespace lodepng */
+#endif /*LODEPNG_COMPILE_CPP*/
+
+/*
+TODO:
+[.] test if there are no memory leaks or security exploits - done a lot but needs to be checked often
+[.] check compatibility with various compilers  - done but needs to be redone for every newer version
+[X] converting color to 16-bit per channel types
+[X] support color profile chunk types (but never let them touch RGB values by default)
+[ ] support all public PNG chunk types (almost done except sBIT, sPLT and hIST)
+[ ] make sure encoder generates no chunks with size > (2^31)-1
+[ ] partial decoding (stream processing)
+[X] let the "isFullyOpaque" function check color keys and transparent palettes too
+[X] better name for the variables "codes", "codesD", "codelengthcodes", "clcl" and "lldl"
+[ ] allow treating some errors like warnings, when image is recoverable (e.g. 69, 57, 58)
+[ ] make warnings like: oob palette, checksum fail, data after iend, wrong/unknown crit chunk, no null terminator in text, ...
+[ ] error messages with line numbers (and version)
+[ ] errors in state instead of as return code?
+[ ] new errors/warnings like suspiciously big decompressed ztxt or iccp chunk
+[ ] let the C++ wrapper catch exceptions coming from the standard library and return LodePNG error codes
+[ ] allow user to provide custom color conversion functions, e.g. for premultiplied alpha, padding bits or not, ...
+[ ] allow user to give data (void*) to custom allocator
+[X] provide alternatives for C library functions not present on some platforms (memcpy, ...)
+*/
+
+#endif /*LODEPNG_H inclusion guard*/
+
+/*
+LodePNG Documentation
+---------------------
+
+0. table of contents
+--------------------
+
+  1. about
+   1.1. supported features
+   1.2. features not supported
+  2. C and C++ version
+  3. security
+  4. decoding
+  5. encoding
+  6. color conversions
+    6.1. PNG color types
+    6.2. color conversions
+    6.3. padding bits
+    6.4. A note about 16-bits per channel and endianness
+  7. error values
+  8. chunks and PNG editing
+  9. compiler support
+  10. examples
+   10.1. decoder C++ example
+   10.2. decoder C example
+  11. state settings reference
+  12. changes
+  13. contact information
+
+
+1. about
+--------
+
+PNG is a file format to store raster images losslessly with good compression,
+supporting different color types and alpha channel.
+
+LodePNG is a PNG codec according to the Portable Network Graphics (PNG)
+Specification (Second Edition) - W3C Recommendation 10 November 2003.
+
+The specifications used are:
+
+*) Portable Network Graphics (PNG) Specification (Second Edition):
+     http://www.w3.org/TR/2003/REC-PNG-20031110
+*) RFC 1950 ZLIB Compressed Data Format version 3.3:
+     http://www.gzip.org/zlib/rfc-zlib.html
+*) RFC 1951 DEFLATE Compressed Data Format Specification ver 1.3:
+     http://www.gzip.org/zlib/rfc-deflate.html
+
+The most recent version of LodePNG can currently be found at
+http://lodev.org/lodepng/
+
+LodePNG works both in C (ISO C90) and C++, with a C++ wrapper that adds
+extra functionality.
+
+LodePNG exists out of two files:
+-lodepng.h: the header file for both C and C++
+-lodepng.c(pp): give it the name lodepng.c or lodepng.cpp (or .cc) depending on your usage
+
+If you want to start using LodePNG right away without reading this doc, get the
+examples from the LodePNG website to see how to use it in code, or check the
+smaller examples in chapter 13 here.
+
+LodePNG is simple but only supports the basic requirements. To achieve
+simplicity, the following design choices were made: There are no dependencies
+on any external library. There are functions to decode and encode a PNG with
+a single function call, and extended versions of these functions taking a
+LodePNGState struct allowing to specify or get more information. By default
+the colors of the raw image are always RGB or RGBA, no matter what color type
+the PNG file uses. To read and write files, there are simple functions to
+convert the files to/from buffers in memory.
+
+This all makes LodePNG suitable for loading textures in games, demos and small
+programs, ... It's less suitable for full fledged image editors, loading PNGs
+over network (it requires all the image data to be available before decoding can
+begin), life-critical systems, ...
+
+1.1. supported features
+-----------------------
+
+The following features are supported by the decoder:
+
+*) decoding of PNGs with any color type, bit depth and interlace mode, to a 24- or 32-bit color raw image,
+   or the same color type as the PNG
+*) encoding of PNGs, from any raw image to 24- or 32-bit color, or the same color type as the raw image
+*) Adam7 interlace and deinterlace for any color type
+*) loading the image from harddisk or decoding it from a buffer from other sources than harddisk
+*) support for alpha channels, including RGBA color model, translucent palettes and color keying
+*) zlib decompression (inflate)
+*) zlib compression (deflate)
+*) CRC32 and ADLER32 checksums
+*) colorimetric color profile conversions: currently experimentally available in lodepng_util.cpp only,
+   plus alternatively ability to pass on chroma/gamma/ICC profile information to other color management system.
+*) handling of unknown chunks, allowing making a PNG editor that stores custom and unknown chunks.
+*) the following chunks are supported by both encoder and decoder:
+    IHDR: header information
+    PLTE: color palette
+    IDAT: pixel data
+    IEND: the final chunk
+    tRNS: transparency for palettized images
+    tEXt: textual information
+    zTXt: compressed textual information
+    iTXt: international textual information
+    bKGD: suggested background color
+    pHYs: physical dimensions
+    tIME: modification time
+    cHRM: RGB chromaticities
+    gAMA: RGB gamma correction
+    iCCP: ICC color profile
+    sRGB: rendering intent
+
+1.2. features not supported
+---------------------------
+
+The following features are _not_ supported:
+
+*) some features needed to make a conformant PNG-Editor might be still missing.
+*) partial loading/stream processing. All data must be available and is processed in one call.
+*) The following public chunks are not (yet) supported but treated as unknown chunks by LodePNG:
+    sBIT
+    hIST
+    sPLT
+
+
+2. C and C++ version
+--------------------
+
+The C version uses buffers allocated with alloc that you need to free()
+yourself. You need to use init and cleanup functions for each struct whenever
+using a struct from the C version to avoid exploits and memory leaks.
+
+The C++ version has extra functions with std::vectors in the interface and the
+lodepng::State class which is a LodePNGState with constructor and destructor.
+
+These files work without modification for both C and C++ compilers because all
+the additional C++ code is in "#ifdef __cplusplus" blocks that make C-compilers
+ignore it, and the C code is made to compile both with strict ISO C90 and C++.
+
+To use the C++ version, you need to rename the source file to lodepng.cpp
+(instead of lodepng.c), and compile it with a C++ compiler.
+
+To use the C version, you need to rename the source file to lodepng.c (instead
+of lodepng.cpp), and compile it with a C compiler.
+
+
+3. Security
+-----------
+
+Even if carefully designed, it's always possible that LodePNG contains possible
+exploits. If you discover one, please let me know, and it will be fixed.
+
+When using LodePNG, care has to be taken with the C version of LodePNG, as well
+as the C-style structs when working with C++. The following conventions are used
+for all C-style structs:
+
+-if a struct has a corresponding init function, always call the init function when making a new one
+-if a struct has a corresponding cleanup function, call it before the struct disappears to avoid memory leaks
+-if a struct has a corresponding copy function, use the copy function instead of "=".
+ The destination must also be inited already.
+
+
+4. Decoding
+-----------
+
+Decoding converts a PNG compressed image to a raw pixel buffer.
+
+Most documentation on using the decoder is at its declarations in the header
+above. For C, simple decoding can be done with functions such as
+lodepng_decode32, and more advanced decoding can be done with the struct
+LodePNGState and lodepng_decode. For C++, all decoding can be done with the
+various lodepng::decode functions, and lodepng::State can be used for advanced
+features.
+
+When using the LodePNGState, it uses the following fields for decoding:
+*) LodePNGInfo info_png: it stores extra information about the PNG (the input) in here
+*) LodePNGColorMode info_raw: here you can say what color mode of the raw image (the output) you want to get
+*) LodePNGDecoderSettings decoder: you can specify a few extra settings for the decoder to use
+
+LodePNGInfo info_png
+--------------------
+
+After decoding, this contains extra information of the PNG image, except the actual
+pixels, width and height because these are already gotten directly from the decoder
+functions.
+
+It contains for example the original color type of the PNG image, text comments,
+suggested background color, etc... More details about the LodePNGInfo struct are
+at its declaration documentation.
+
+LodePNGColorMode info_raw
+-------------------------
+
+When decoding, here you can specify which color type you want
+the resulting raw image to be. If this is different from the colortype of the
+PNG, then the decoder will automatically convert the result. This conversion
+always works, except if you want it to convert a color PNG to grayscale or to
+a palette with missing colors.
+
+By default, 32-bit color is used for the result.
+
+LodePNGDecoderSettings decoder
+------------------------------
+
+The settings can be used to ignore the errors created by invalid CRC and Adler32
+chunks, and to disable the decoding of tEXt chunks.
+
+There's also a setting color_convert, true by default. If false, no conversion
+is done, the resulting data will be as it was in the PNG (after decompression)
+and you'll have to puzzle the colors of the pixels together yourself using the
+color type information in the LodePNGInfo.
+
+
+5. Encoding
+-----------
+
+Encoding converts a raw pixel buffer to a PNG compressed image.
+
+Most documentation on using the encoder is at its declarations in the header
+above. For C, simple encoding can be done with functions such as
+lodepng_encode32, and more advanced decoding can be done with the struct
+LodePNGState and lodepng_encode. For C++, all encoding can be done with the
+various lodepng::encode functions, and lodepng::State can be used for advanced
+features.
+
+Like the decoder, the encoder can also give errors. However it gives less errors
+since the encoder input is trusted, the decoder input (a PNG image that could
+be forged by anyone) is not trusted.
+
+When using the LodePNGState, it uses the following fields for encoding:
+*) LodePNGInfo info_png: here you specify how you want the PNG (the output) to be.
+*) LodePNGColorMode info_raw: here you say what color type of the raw image (the input) has
+*) LodePNGEncoderSettings encoder: you can specify a few settings for the encoder to use
+
+LodePNGInfo info_png
+--------------------
+
+When encoding, you use this the opposite way as when decoding: for encoding,
+you fill in the values you want the PNG to have before encoding. By default it's
+not needed to specify a color type for the PNG since it's automatically chosen,
+but it's possible to choose it yourself given the right settings.
+
+The encoder will not always exactly match the LodePNGInfo struct you give,
+it tries as close as possible. Some things are ignored by the encoder. The
+encoder uses, for example, the following settings from it when applicable:
+colortype and bitdepth, text chunks, time chunk, the color key, the palette, the
+background color, the interlace method, unknown chunks, ...
+
+When encoding to a PNG with colortype 3, the encoder will generate a PLTE chunk.
+If the palette contains any colors for which the alpha channel is not 255 (so
+there are translucent colors in the palette), it'll add a tRNS chunk.
+
+LodePNGColorMode info_raw
+-------------------------
+
+You specify the color type of the raw image that you give to the input here,
+including a possible transparent color key and palette you happen to be using in
+your raw image data.
+
+By default, 32-bit color is assumed, meaning your input has to be in RGBA
+format with 4 bytes (unsigned chars) per pixel.
+
+LodePNGEncoderSettings encoder
+------------------------------
+
+The following settings are supported (some are in sub-structs):
+*) auto_convert: when this option is enabled, the encoder will
+automatically choose the smallest possible color mode (including color key) that
+can encode the colors of all pixels without information loss.
+*) btype: the block type for LZ77. 0 = uncompressed, 1 = fixed huffman tree,
+   2 = dynamic huffman tree (best compression). Should be 2 for proper
+   compression.
+*) use_lz77: whether or not to use LZ77 for compressed block types. Should be
+   true for proper compression.
+*) windowsize: the window size used by the LZ77 encoder (1 - 32768). Has value
+   2048 by default, but can be set to 32768 for better, but slow, compression.
+*) force_palette: if colortype is 2 or 6, you can make the encoder write a PLTE
+   chunk if force_palette is true. This can used as suggested palette to convert
+   to by viewers that don't support more than 256 colors (if those still exist)
+*) add_id: add text chunk "Encoder: LodePNG <version>" to the image.
+*) text_compression: default 1. If 1, it'll store texts as zTXt instead of tEXt chunks.
+  zTXt chunks use zlib compression on the text. This gives a smaller result on
+  large texts but a larger result on small texts (such as a single program name).
+  It's all tEXt or all zTXt though, there's no separate setting per text yet.
+
+
+6. color conversions
+--------------------
+
+An important thing to note about LodePNG, is that the color type of the PNG, and
+the color type of the raw image, are completely independent. By default, when
+you decode a PNG, you get the result as a raw image in the color type you want,
+no matter whether the PNG was encoded with a palette, grayscale or RGBA color.
+And if you encode an image, by default LodePNG will automatically choose the PNG
+color type that gives good compression based on the values of colors and amount
+of colors in the image. It can be configured to let you control it instead as
+well, though.
+
+To be able to do this, LodePNG does conversions from one color mode to another.
+It can convert from almost any color type to any other color type, except the
+following conversions: RGB to grayscale is not supported, and converting to a
+palette when the palette doesn't have a required color is not supported. This is
+not supported on purpose: this is information loss which requires a color
+reduction algorithm that is beyond the scope of a PNG encoder (yes, RGB to gray
+is easy, but there are multiple ways if you want to give some channels more
+weight).
+
+By default, when decoding, you get the raw image in 32-bit RGBA or 24-bit RGB
+color, no matter what color type the PNG has. And by default when encoding,
+LodePNG automatically picks the best color model for the output PNG, and expects
+the input image to be 32-bit RGBA or 24-bit RGB. So, unless you want to control
+the color format of the images yourself, you can skip this chapter.
+
+6.1. PNG color types
+--------------------
+
+A PNG image can have many color types, ranging from 1-bit color to 64-bit color,
+as well as palettized color modes. After the zlib decompression and unfiltering
+in the PNG image is done, the raw pixel data will have that color type and thus
+a certain amount of bits per pixel. If you want the output raw image after
+decoding to have another color type, a conversion is done by LodePNG.
+
+The PNG specification gives the following color types:
+
+0: grayscale, bit depths 1, 2, 4, 8, 16
+2: RGB, bit depths 8 and 16
+3: palette, bit depths 1, 2, 4 and 8
+4: grayscale with alpha, bit depths 8 and 16
+6: RGBA, bit depths 8 and 16
+
+Bit depth is the amount of bits per pixel per color channel. So the total amount
+of bits per pixel is: amount of channels * bitdepth.
+
+6.2. color conversions
+----------------------
+
+As explained in the sections about the encoder and decoder, you can specify
+color types and bit depths in info_png and info_raw to change the default
+behaviour.
+
+If, when decoding, you want the raw image to be something else than the default,
+you need to set the color type and bit depth you want in the LodePNGColorMode,
+or the parameters colortype and bitdepth of the simple decoding function.
+
+If, when encoding, you use another color type than the default in the raw input
+image, you need to specify its color type and bit depth in the LodePNGColorMode
+of the raw image, or use the parameters colortype and bitdepth of the simple
+encoding function.
+
+If, when encoding, you don't want LodePNG to choose the output PNG color type
+but control it yourself, you need to set auto_convert in the encoder settings
+to false, and specify the color type you want in the LodePNGInfo of the
+encoder (including palette: it can generate a palette if auto_convert is true,
+otherwise not).
+
+If the input and output color type differ (whether user chosen or auto chosen),
+LodePNG will do a color conversion, which follows the rules below, and may
+sometimes result in an error.
+
+To avoid some confusion:
+-the decoder converts from PNG to raw image
+-the encoder converts from raw image to PNG
+-the colortype and bitdepth in LodePNGColorMode info_raw, are those of the raw image
+-the colortype and bitdepth in the color field of LodePNGInfo info_png, are those of the PNG
+-when encoding, the color type in LodePNGInfo is ignored if auto_convert
+ is enabled, it is automatically generated instead
+-when decoding, the color type in LodePNGInfo is set by the decoder to that of the original
+ PNG image, but it can be ignored since the raw image has the color type you requested instead
+-if the color type of the LodePNGColorMode and PNG image aren't the same, a conversion
+ between the color types is done if the color types are supported. If it is not
+ supported, an error is returned. If the types are the same, no conversion is done.
+-even though some conversions aren't supported, LodePNG supports loading PNGs from any
+ colortype and saving PNGs to any colortype, sometimes it just requires preparing
+ the raw image correctly before encoding.
+-both encoder and decoder use the same color converter.
+
+The function lodepng_convert does the color conversion. It is available in the
+interface but normally isn't needed since the encoder and decoder already call
+it.
+
+Non supported color conversions:
+-color to grayscale when non-gray pixels are present: no error is thrown, but
+the result will look ugly because only the red channel is taken (it assumes all
+three channels are the same in this case so ignores green and blue). The reason
+no error is given is to allow converting from three-channel grayscale images to
+one-channel even if there are numerical imprecisions.
+-anything to palette when the palette does not have an exact match for a from-color
+in it: in this case an error is thrown
+
+Supported color conversions:
+-anything to 8-bit RGB, 8-bit RGBA, 16-bit RGB, 16-bit RGBA
+-any gray or gray+alpha, to gray or gray+alpha
+-anything to a palette, as long as the palette has the requested colors in it
+-removing alpha channel
+-higher to smaller bitdepth, and vice versa
+
+If you want no color conversion to be done (e.g. for speed or control):
+-In the encoder, you can make it save a PNG with any color type by giving the
+raw color mode and LodePNGInfo the same color mode, and setting auto_convert to
+false.
+-In the decoder, you can make it store the pixel data in the same color type
+as the PNG has, by setting the color_convert setting to false. Settings in
+info_raw are then ignored.
+
+6.3. padding bits
+-----------------
+
+In the PNG file format, if a less than 8-bit per pixel color type is used and the scanlines
+have a bit amount that isn't a multiple of 8, then padding bits are used so that each
+scanline starts at a fresh byte. But that is NOT true for the LodePNG raw input and output.
+The raw input image you give to the encoder, and the raw output image you get from the decoder
+will NOT have these padding bits, e.g. in the case of a 1-bit image with a width
+of 7 pixels, the first pixel of the second scanline will the 8th bit of the first byte,
+not the first bit of a new byte.
+
+6.4. A note about 16-bits per channel and endianness
+----------------------------------------------------
+
+LodePNG uses unsigned char arrays for 16-bit per channel colors too, just like
+for any other color format. The 16-bit values are stored in big endian (most
+significant byte first) in these arrays. This is the opposite order of the
+little endian used by x86 CPU's.
+
+LodePNG always uses big endian because the PNG file format does so internally.
+Conversions to other formats than PNG uses internally are not supported by
+LodePNG on purpose, there are myriads of formats, including endianness of 16-bit
+colors, the order in which you store R, G, B and A, and so on. Supporting and
+converting to/from all that is outside the scope of LodePNG.
+
+This may mean that, depending on your use case, you may want to convert the big
+endian output of LodePNG to little endian with a for loop. This is certainly not
+always needed, many applications and libraries support big endian 16-bit colors
+anyway, but it means you cannot simply cast the unsigned char* buffer to an
+unsigned short* buffer on x86 CPUs.
+
+
+7. error values
+---------------
+
+All functions in LodePNG that return an error code, return 0 if everything went
+OK, or a non-zero code if there was an error.
+
+The meaning of the LodePNG error values can be retrieved with the function
+lodepng_error_text: given the numerical error code, it returns a description
+of the error in English as a string.
+
+Check the implementation of lodepng_error_text to see the meaning of each code.
+
+It is not recommended to use the numerical values to programmatically make
+different decisions based on error types as the numbers are not guaranteed to
+stay backwards compatible. They are for human consumption only. Programmatically
+only 0 or non-0 matter.
+
+
+8. chunks and PNG editing
+-------------------------
+
+If you want to add extra chunks to a PNG you encode, or use LodePNG for a PNG
+editor that should follow the rules about handling of unknown chunks, or if your
+program is able to read other types of chunks than the ones handled by LodePNG,
+then that's possible with the chunk functions of LodePNG.
+
+A PNG chunk has the following layout:
+
+4 bytes length
+4 bytes type name
+length bytes data
+4 bytes CRC
+
+8.1. iterating through chunks
+-----------------------------
+
+If you have a buffer containing the PNG image data, then the first chunk (the
+IHDR chunk) starts at byte number 8 of that buffer. The first 8 bytes are the
+signature of the PNG and are not part of a chunk. But if you start at byte 8
+then you have a chunk, and can check the following things of it.
+
+NOTE: none of these functions check for memory buffer boundaries. To avoid
+exploits, always make sure the buffer contains all the data of the chunks.
+When using lodepng_chunk_next, make sure the returned value is within the
+allocated memory.
+
+unsigned lodepng_chunk_length(const unsigned char* chunk):
+
+Get the length of the chunk's data. The total chunk length is this length + 12.
+
+void lodepng_chunk_type(char type[5], const unsigned char* chunk):
+unsigned char lodepng_chunk_type_equals(const unsigned char* chunk, const char* type):
+
+Get the type of the chunk or compare if it's a certain type
+
+unsigned char lodepng_chunk_critical(const unsigned char* chunk):
+unsigned char lodepng_chunk_private(const unsigned char* chunk):
+unsigned char lodepng_chunk_safetocopy(const unsigned char* chunk):
+
+Check if the chunk is critical in the PNG standard (only IHDR, PLTE, IDAT and IEND are).
+Check if the chunk is private (public chunks are part of the standard, private ones not).
+Check if the chunk is safe to copy. If it's not, then, when modifying data in a critical
+chunk, unsafe to copy chunks of the old image may NOT be saved in the new one if your
+program doesn't handle that type of unknown chunk.
+
+unsigned char* lodepng_chunk_data(unsigned char* chunk):
+const unsigned char* lodepng_chunk_data_const(const unsigned char* chunk):
+
+Get a pointer to the start of the data of the chunk.
+
+unsigned lodepng_chunk_check_crc(const unsigned char* chunk):
+void lodepng_chunk_generate_crc(unsigned char* chunk):
+
+Check if the crc is correct or generate a correct one.
+
+unsigned char* lodepng_chunk_next(unsigned char* chunk):
+const unsigned char* lodepng_chunk_next_const(const unsigned char* chunk):
+
+Iterate to the next chunk. This works if you have a buffer with consecutive chunks. Note that these
+functions do no boundary checking of the allocated data whatsoever, so make sure there is enough
+data available in the buffer to be able to go to the next chunk.
+
+unsigned lodepng_chunk_append(unsigned char** out, size_t* outsize, const unsigned char* chunk):
+unsigned lodepng_chunk_create(unsigned char** out, size_t* outsize, unsigned length,
+                              const char* type, const unsigned char* data):
+
+These functions are used to create new chunks that are appended to the data in *out that has
+length *outsize. The append function appends an existing chunk to the new data. The create
+function creates a new chunk with the given parameters and appends it. Type is the 4-letter
+name of the chunk.
+
+8.2. chunks in info_png
+-----------------------
+
+The LodePNGInfo struct contains fields with the unknown chunk in it. It has 3
+buffers (each with size) to contain 3 types of unknown chunks:
+the ones that come before the PLTE chunk, the ones that come between the PLTE
+and the IDAT chunks, and the ones that come after the IDAT chunks.
+It's necessary to make the distinction between these 3 cases because the PNG
+standard forces to keep the ordering of unknown chunks compared to the critical
+chunks, but does not force any other ordering rules.
+
+info_png.unknown_chunks_data[0] is the chunks before PLTE
+info_png.unknown_chunks_data[1] is the chunks after PLTE, before IDAT
+info_png.unknown_chunks_data[2] is the chunks after IDAT
+
+The chunks in these 3 buffers can be iterated through and read by using the same
+way described in the previous subchapter.
+
+When using the decoder to decode a PNG, you can make it store all unknown chunks
+if you set the option settings.remember_unknown_chunks to 1. By default, this
+option is off (0).
+
+The encoder will always encode unknown chunks that are stored in the info_png.
+If you need it to add a particular chunk that isn't known by LodePNG, you can
+use lodepng_chunk_append or lodepng_chunk_create to the chunk data in
+info_png.unknown_chunks_data[x].
+
+Chunks that are known by LodePNG should not be added in that way. E.g. to make
+LodePNG add a bKGD chunk, set background_defined to true and add the correct
+parameters there instead.
+
+
+9. compiler support
+-------------------
+
+No libraries other than the current standard C library are needed to compile
+LodePNG. For the C++ version, only the standard C++ library is needed on top.
+Add the files lodepng.c(pp) and lodepng.h to your project, include
+lodepng.h where needed, and your program can read/write PNG files.
+
+It is compatible with C90 and up, and C++03 and up.
+
+If performance is important, use optimization when compiling! For both the
+encoder and decoder, this makes a large difference.
+
+Make sure that LodePNG is compiled with the same compiler of the same version
+and with the same settings as the rest of the program, or the interfaces with
+std::vectors and std::strings in C++ can be incompatible.
+
+CHAR_BITS must be 8 or higher, because LodePNG uses unsigned chars for octets.
+
+*) gcc and g++
+
+LodePNG is developed in gcc so this compiler is natively supported. It gives no
+warnings with compiler options "-Wall -Wextra -pedantic -ansi", with gcc and g++
+version 4.7.1 on Linux, 32-bit and 64-bit.
+
+*) Clang
+
+Fully supported and warning-free.
+
+*) Mingw
+
+The Mingw compiler (a port of gcc for Windows) should be fully supported by
+LodePNG.
+
+*) Visual Studio and Visual C++ Express Edition
+
+LodePNG should be warning-free with warning level W4. Two warnings were disabled
+with pragmas though: warning 4244 about implicit conversions, and warning 4996
+where it wants to use a non-standard function fopen_s instead of the standard C
+fopen.
+
+Visual Studio may want "stdafx.h" files to be included in each source file and
+give an error "unexpected end of file while looking for precompiled header".
+This is not standard C++ and will not be added to the stock LodePNG. You can
+disable it for lodepng.cpp only by right clicking it, Properties, C/C++,
+Precompiled Headers, and set it to Not Using Precompiled Headers there.
+
+NOTE: Modern versions of VS should be fully supported, but old versions, e.g.
+VS6, are not guaranteed to work.
+
+*) Compilers on Macintosh
+
+LodePNG has been reported to work both with gcc and LLVM for Macintosh, both for
+C and C++.
+
+*) Other Compilers
+
+If you encounter problems on any compilers, feel free to let me know and I may
+try to fix it if the compiler is modern and standards compliant.
+
+
+10. examples
+------------
+
+This decoder example shows the most basic usage of LodePNG. More complex
+examples can be found on the LodePNG website.
+
+10.1. decoder C++ example
+-------------------------
+
+#include "lodepng.h"
+#include <iostream>
+
+int main(int argc, char *argv[]) {
+  const char* filename = argc > 1 ? argv[1] : "test.png";
+
+  //load and decode
+  std::vector<unsigned char> image;
+  unsigned width, height;
+  unsigned error = lodepng::decode(image, width, height, filename);
+
+  //if there's an error, display it
+  if(error) std::cout << "decoder error " << error << ": " << lodepng_error_text(error) << std::endl;
+
+  //the pixels are now in the vector "image", 4 bytes per pixel, ordered RGBARGBA..., use it as texture, draw it, ...
+}
+
+10.2. decoder C example
+-----------------------
+
+#include "lodepng.h"
+
+int main(int argc, char *argv[]) {
+  unsigned error;
+  unsigned char* image;
+  size_t width, height;
+  const char* filename = argc > 1 ? argv[1] : "test.png";
+
+  error = lodepng_decode32_file(&image, &width, &height, filename);
+
+  if(error) printf("decoder error %u: %s\n", error, lodepng_error_text(error));
+
+  / * use image here * /
+
+  free(image);
+  return 0;
+}
+
+11. state settings reference
+----------------------------
+
+A quick reference of some settings to set on the LodePNGState
+
+For decoding:
+
+state.decoder.zlibsettings.ignore_adler32: ignore ADLER32 checksums
+state.decoder.zlibsettings.custom_...: use custom inflate function
+state.decoder.ignore_crc: ignore CRC checksums
+state.decoder.ignore_critical: ignore unknown critical chunks
+state.decoder.ignore_end: ignore missing IEND chunk. May fail if this corruption causes other errors
+state.decoder.color_convert: convert internal PNG color to chosen one
+state.decoder.read_text_chunks: whether to read in text metadata chunks
+state.decoder.remember_unknown_chunks: whether to read in unknown chunks
+state.info_raw.colortype: desired color type for decoded image
+state.info_raw.bitdepth: desired bit depth for decoded image
+state.info_raw....: more color settings, see struct LodePNGColorMode
+state.info_png....: no settings for decoder but ouput, see struct LodePNGInfo
+
+For encoding:
+
+state.encoder.zlibsettings.btype: disable compression by setting it to 0
+state.encoder.zlibsettings.use_lz77: use LZ77 in compression
+state.encoder.zlibsettings.windowsize: tweak LZ77 windowsize
+state.encoder.zlibsettings.minmatch: tweak min LZ77 length to match
+state.encoder.zlibsettings.nicematch: tweak LZ77 match where to stop searching
+state.encoder.zlibsettings.lazymatching: try one more LZ77 matching
+state.encoder.zlibsettings.custom_...: use custom deflate function
+state.encoder.auto_convert: choose optimal PNG color type, if 0 uses info_png
+state.encoder.filter_palette_zero: PNG filter strategy for palette
+state.encoder.filter_strategy: PNG filter strategy to encode with
+state.encoder.force_palette: add palette even if not encoding to one
+state.encoder.add_id: add LodePNG identifier and version as a text chunk
+state.encoder.text_compression: use compressed text chunks for metadata
+state.info_raw.colortype: color type of raw input image you provide
+state.info_raw.bitdepth: bit depth of raw input image you provide
+state.info_raw: more color settings, see struct LodePNGColorMode
+state.info_png.color.colortype: desired color type if auto_convert is false
+state.info_png.color.bitdepth: desired bit depth if auto_convert is false
+state.info_png.color....: more color settings, see struct LodePNGColorMode
+state.info_png....: more PNG related settings, see struct LodePNGInfo
+
+
+12. changes
+-----------
+
+The version number of LodePNG is the date of the change given in the format
+yyyymmdd.
+
+Some changes aren't backwards compatible. Those are indicated with a (!)
+symbol.
+
+Not all changes are listed here, the commit history in github lists more:
+https://github.com/lvandeve/lodepng
+
+*) 17 okt 2020: prevent decoding too large text/icc chunks by default.
+*) 06 mar 2020: simplified some of the dynamic memory allocations.
+*) 12 jan 2020: (!) added 'end' argument to lodepng_chunk_next to allow correct
+   overflow checks.
+*) 14 aug 2019: around 25% faster decoding thanks to huffman lookup tables.
+*) 15 jun 2019: (!) auto_choose_color API changed (for bugfix: don't use palette
+   if gray ICC profile) and non-ICC LodePNGColorProfile renamed to
+   LodePNGColorStats.
+*) 30 dec 2018: code style changes only: removed newlines before opening braces.
+*) 10 sep 2018: added way to inspect metadata chunks without full decoding.
+*) 19 aug 2018: (!) fixed color mode bKGD is encoded with and made it use
+   palette index in case of palette.
+*) 10 aug 2018: (!) added support for gAMA, cHRM, sRGB and iCCP chunks. This
+   change is backwards compatible unless you relied on unknown_chunks for those.
+*) 11 jun 2018: less restrictive check for pixel size integer overflow
+*) 14 jan 2018: allow optionally ignoring a few more recoverable errors
+*) 17 sep 2017: fix memory leak for some encoder input error cases
+*) 27 nov 2016: grey+alpha auto color model detection bugfix
+*) 18 apr 2016: Changed qsort to custom stable sort (for platforms w/o qsort).
+*) 09 apr 2016: Fixed colorkey usage detection, and better file loading (within
+   the limits of pure C90).
+*) 08 dec 2015: Made load_file function return error if file can't be opened.
+*) 24 okt 2015: Bugfix with decoding to palette output.
+*) 18 apr 2015: Boundary PM instead of just package-merge for faster encoding.
+*) 24 aug 2014: Moved to github
+*) 23 aug 2014: Reduced needless memory usage of decoder.
+*) 28 jun 2014: Removed fix_png setting, always support palette OOB for
+    simplicity. Made ColorProfile public.
+*) 09 jun 2014: Faster encoder by fixing hash bug and more zeros optimization.
+*) 22 dec 2013: Power of two windowsize required for optimization.
+*) 15 apr 2013: Fixed bug with LAC_ALPHA and color key.
+*) 25 mar 2013: Added an optional feature to ignore some PNG errors (fix_png).
+*) 11 mar 2013: (!) Bugfix with custom free. Changed from "my" to "lodepng_"
+    prefix for the custom allocators and made it possible with a new #define to
+    use custom ones in your project without needing to change lodepng's code.
+*) 28 jan 2013: Bugfix with color key.
+*) 27 okt 2012: Tweaks in text chunk keyword length error handling.
+*) 8 okt 2012: (!) Added new filter strategy (entropy) and new auto color mode.
+    (no palette). Better deflate tree encoding. New compression tweak settings.
+    Faster color conversions while decoding. Some internal cleanups.
+*) 23 sep 2012: Reduced warnings in Visual Studio a little bit.
+*) 1 sep 2012: (!) Removed #define's for giving custom (de)compression functions
+    and made it work with function pointers instead.
+*) 23 jun 2012: Added more filter strategies. Made it easier to use custom alloc
+    and free functions and toggle #defines from compiler flags. Small fixes.
+*) 6 may 2012: (!) Made plugging in custom zlib/deflate functions more flexible.
+*) 22 apr 2012: (!) Made interface more consistent, renaming a lot. Removed
+    redundant C++ codec classes. Reduced amount of structs. Everything changed,
+    but it is cleaner now imho and functionality remains the same. Also fixed
+    several bugs and shrunk the implementation code. Made new samples.
+*) 6 nov 2011: (!) By default, the encoder now automatically chooses the best
+    PNG color model and bit depth, based on the amount and type of colors of the
+    raw image. For this, autoLeaveOutAlphaChannel replaced by auto_choose_color.
+*) 9 okt 2011: simpler hash chain implementation for the encoder.
+*) 8 sep 2011: lz77 encoder lazy matching instead of greedy matching.
+*) 23 aug 2011: tweaked the zlib compression parameters after benchmarking.
+    A bug with the PNG filtertype heuristic was fixed, so that it chooses much
+    better ones (it's quite significant). A setting to do an experimental, slow,
+    brute force search for PNG filter types is added.
+*) 17 aug 2011: (!) changed some C zlib related function names.
+*) 16 aug 2011: made the code less wide (max 120 characters per line).
+*) 17 apr 2011: code cleanup. Bugfixes. Convert low to 16-bit per sample colors.
+*) 21 feb 2011: fixed compiling for C90. Fixed compiling with sections disabled.
+*) 11 dec 2010: encoding is made faster, based on suggestion by Peter Eastman
+    to optimize long sequences of zeros.
+*) 13 nov 2010: added LodePNG_InfoColor_hasPaletteAlpha and
+    LodePNG_InfoColor_canHaveAlpha functions for convenience.
+*) 7 nov 2010: added LodePNG_error_text function to get error code description.
+*) 30 okt 2010: made decoding slightly faster
+*) 26 okt 2010: (!) changed some C function and struct names (more consistent).
+     Reorganized the documentation and the declaration order in the header.
+*) 08 aug 2010: only changed some comments and external samples.
+*) 05 jul 2010: fixed bug thanks to warnings in the new gcc version.
+*) 14 mar 2010: fixed bug where too much memory was allocated for char buffers.
+*) 02 sep 2008: fixed bug where it could create empty tree that linux apps could
+    read by ignoring the problem but windows apps couldn't.
+*) 06 jun 2008: added more error checks for out of memory cases.
+*) 26 apr 2008: added a few more checks here and there to ensure more safety.
+*) 06 mar 2008: crash with encoding of strings fixed
+*) 02 feb 2008: support for international text chunks added (iTXt)
+*) 23 jan 2008: small cleanups, and #defines to divide code in sections
+*) 20 jan 2008: support for unknown chunks allowing using LodePNG for an editor.
+*) 18 jan 2008: support for tIME and pHYs chunks added to encoder and decoder.
+*) 17 jan 2008: ability to encode and decode compressed zTXt chunks added
+    Also various fixes, such as in the deflate and the padding bits code.
+*) 13 jan 2008: Added ability to encode Adam7-interlaced images. Improved
+    filtering code of encoder.
+*) 07 jan 2008: (!) changed LodePNG to use ISO C90 instead of C++. A
+    C++ wrapper around this provides an interface almost identical to before.
+    Having LodePNG be pure ISO C90 makes it more portable. The C and C++ code
+    are together in these files but it works both for C and C++ compilers.
+*) 29 dec 2007: (!) changed most integer types to unsigned int + other tweaks
+*) 30 aug 2007: bug fixed which makes this Borland C++ compatible
+*) 09 aug 2007: some VS2005 warnings removed again
+*) 21 jul 2007: deflate code placed in new namespace separate from zlib code
+*) 08 jun 2007: fixed bug with 2- and 4-bit color, and small interlaced images
+*) 04 jun 2007: improved support for Visual Studio 2005: crash with accessing
+    invalid std::vector element [0] fixed, and level 3 and 4 warnings removed
+*) 02 jun 2007: made the encoder add a tag with version by default
+*) 27 may 2007: zlib and png code separated (but still in the same file),
+    simple encoder/decoder functions added for more simple usage cases
+*) 19 may 2007: minor fixes, some code cleaning, new error added (error 69),
+    moved some examples from here to lodepng_examples.cpp
+*) 12 may 2007: palette decoding bug fixed
+*) 24 apr 2007: changed the license from BSD to the zlib license
+*) 11 mar 2007: very simple addition: ability to encode bKGD chunks.
+*) 04 mar 2007: (!) tEXt chunk related fixes, and support for encoding
+    palettized PNG images. Plus little interface change with palette and texts.
+*) 03 mar 2007: Made it encode dynamic Huffman shorter with repeat codes.
+    Fixed a bug where the end code of a block had length 0 in the Huffman tree.
+*) 26 feb 2007: Huffman compression with dynamic trees (BTYPE 2) now implemented
+    and supported by the encoder, resulting in smaller PNGs at the output.
+*) 27 jan 2007: Made the Adler-32 test faster so that a timewaste is gone.
+*) 24 jan 2007: gave encoder an error interface. Added color conversion from any
+    greyscale type to 8-bit greyscale with or without alpha.
+*) 21 jan 2007: (!) Totally changed the interface. It allows more color types
+    to convert to and is more uniform. See the manual for how it works now.
+*) 07 jan 2007: Some cleanup & fixes, and a few changes over the last days:
+    encode/decode custom tEXt chunks, separate classes for zlib & deflate, and
+    at last made the decoder give errors for incorrect Adler32 or Crc.
+*) 01 jan 2007: Fixed bug with encoding PNGs with less than 8 bits per channel.
+*) 29 dec 2006: Added support for encoding images without alpha channel, and
+    cleaned out code as well as making certain parts faster.
+*) 28 dec 2006: Added "Settings" to the encoder.
+*) 26 dec 2006: The encoder now does LZ77 encoding and produces much smaller files now.
+    Removed some code duplication in the decoder. Fixed little bug in an example.
+*) 09 dec 2006: (!) Placed output parameters of public functions as first parameter.
+    Fixed a bug of the decoder with 16-bit per color.
+*) 15 okt 2006: Changed documentation structure
+*) 09 okt 2006: Encoder class added. It encodes a valid PNG image from the
+    given image buffer, however for now it's not compressed.
+*) 08 sep 2006: (!) Changed to interface with a Decoder class
+*) 30 jul 2006: (!) LodePNG_InfoPng , width and height are now retrieved in different
+    way. Renamed decodePNG to decodePNGGeneric.
+*) 29 jul 2006: (!) Changed the interface: image info is now returned as a
+    struct of type LodePNG::LodePNG_Info, instead of a vector, which was a bit clumsy.
+*) 28 jul 2006: Cleaned the code and added new error checks.
+    Corrected terminology "deflate" into "inflate".
+*) 23 jun 2006: Added SDL example in the documentation in the header, this
+    example allows easy debugging by displaying the PNG and its transparency.
+*) 22 jun 2006: (!) Changed way to obtain error value. Added
+    loadFile function for convenience. Made decodePNG32 faster.
+*) 21 jun 2006: (!) Changed type of info vector to unsigned.
+    Changed position of palette in info vector. Fixed an important bug that
+    happened on PNGs with an uncompressed block.
+*) 16 jun 2006: Internally changed unsigned into unsigned where
+    needed, and performed some optimizations.
+*) 07 jun 2006: (!) Renamed functions to decodePNG and placed them
+    in LodePNG namespace. Changed the order of the parameters. Rewrote the
+    documentation in the header. Renamed files to lodepng.cpp and lodepng.h
+*) 22 apr 2006: Optimized and improved some code
+*) 07 sep 2005: (!) Changed to std::vector interface
+*) 12 aug 2005: Initial release (C++, decoder only)
+
+
+13. contact information
+-----------------------
+
+Feel free to contact me with suggestions, problems, comments, ... concerning
+LodePNG. If you encounter a PNG image that doesn't work properly with this
+decoder, feel free to send it and I'll use it to find and fix the problem.
+
+My email address is (puzzle the account and domain together with an @ symbol):
+Domain: gmail dot com.
+Account: lode dot vandevenne.
+
+
+Copyright (c) 2005-2020 Lode Vandevenne
+*/

From d11773c5fe237e70a42fb8634cd5aff31614e5e9 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 27 Mar 2021 11:52:57 -0700
Subject: [PATCH 020/901] astc-encoder - switch to v2.5 sources

A few small changes, but mostly a drop-in replacement.   Updated makefiles.
---
 libkram/CMakeLists.txt                        |    5 -
 libkram/astc-encoder/astcenc.h                |  361 +++-
 .../astcenc_averages_and_directions.cpp       |  873 +++++----
 libkram/astc-encoder/astcenc_block_sizes2.cpp |  588 +++---
 .../astc-encoder/astcenc_color_quantize.cpp   | 1200 +++++-------
 .../astc-encoder/astcenc_color_unquantize.cpp |  764 +++-----
 .../astcenc_compress_symbolic.cpp             | 1305 ++++++++------
 .../astc-encoder/astcenc_compute_variance.cpp |  314 ++--
 .../astcenc_decompress_symbolic.cpp           |  407 +++--
 .../astc-encoder/astcenc_diagnostic_trace.cpp |  219 +++
 .../astc-encoder/astcenc_diagnostic_trace.h   |  225 +++
 .../astcenc_encoding_choice_error.cpp         |  363 ++--
 libkram/astc-encoder/astcenc_entry.cpp        |  636 +++++--
 .../astcenc_find_best_partitioning.cpp        |  852 ++++-----
 .../astcenc_ideal_endpoints_and_weights.cpp   | 1605 +++++++++--------
 libkram/astc-encoder/astcenc_image.cpp        |  821 +++------
 .../astc-encoder/astcenc_integer_sequence.cpp |  455 ++---
 libkram/astc-encoder/astcenc_internal.h       |  744 +++++---
 .../astcenc_kmeans_partitioning.cpp           |  238 +--
 libkram/astc-encoder/astcenc_mathlib.cpp      |   37 +-
 libkram/astc-encoder/astcenc_mathlib.h        |  546 +++---
 .../astcenc_mathlib_softfloat.cpp             |   34 +-
 .../astc-encoder/astcenc_partition_tables.cpp |   14 +-
 .../astcenc_percentile_tables.cpp             |    6 +-
 .../astcenc_pick_best_endpoint_format.cpp     |  456 ++---
 .../astcenc_platform_isa_detection.cpp        |   57 +-
 libkram/astc-encoder/astcenc_quantization.cpp |   29 +-
 .../astcenc_symbolic_physical.cpp             |   71 +-
 libkram/astc-encoder/astcenc_vecmathlib.h     |  977 ++++------
 .../astc-encoder/astcenc_vecmathlib_avx2_8.h  |  943 ++++++++++
 .../astcenc_vecmathlib_common_4.h             |  352 ++++
 .../astc-encoder/astcenc_vecmathlib_neon_4.h  |  915 ++++++++++
 .../astcenc_vecmathlib_neon_armv7_4.h         |  186 ++
 .../astc-encoder/astcenc_vecmathlib_none_4.h  | 1025 +++++++++++
 .../astc-encoder/astcenc_vecmathlib_sse_4.h   | 1008 +++++++++++
 libkram/astc-encoder/astcenc_weight_align.cpp |  219 +--
 .../astcenc_weight_quant_xfer_tables.cpp      |    2 +-
 libkram/kram/Kram.cpp                         |    7 +
 libkram/kram/KramConfig.h                     |    7 +
 libkram/kram/KramImage.cpp                    |  150 +-
 libkram/kram/KramImage.h                      |    2 +-
 plugin/kps/KPS.cpp                            |    2 -
 42 files changed, 11729 insertions(+), 7291 deletions(-)
 create mode 100644 libkram/astc-encoder/astcenc_diagnostic_trace.cpp
 create mode 100644 libkram/astc-encoder/astcenc_diagnostic_trace.h
 create mode 100755 libkram/astc-encoder/astcenc_vecmathlib_avx2_8.h
 create mode 100755 libkram/astc-encoder/astcenc_vecmathlib_common_4.h
 create mode 100755 libkram/astc-encoder/astcenc_vecmathlib_neon_4.h
 create mode 100644 libkram/astc-encoder/astcenc_vecmathlib_neon_armv7_4.h
 create mode 100644 libkram/astc-encoder/astcenc_vecmathlib_none_4.h
 create mode 100755 libkram/astc-encoder/astcenc_vecmathlib_sse_4.h

diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index 3c3b3be5..0b501ee3 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -174,11 +174,6 @@ elseif (UNIXBUILD)
     
 endif()
 
-
-target_compile_definitions(${myTargetLib} PUBLIC
-    -DASTCENC_SSE=42 -DASTCENC_AVX=1 -DASTCENC_POPCNT=0
-    -DASTCENC_VECALIGN=16 -DASTCENC_ISA_INVARIANCE=0)
-
 target_compile_definitions(${myTargetLib} PUBLIC
     "-DCOMPILE_ATE=${COMPILE_ATE}"
     "-DCOMPILE_BCENC=${COMPILE_BCENC}"
diff --git a/libkram/astc-encoder/astcenc.h b/libkram/astc-encoder/astcenc.h
index 2a9a6e56..618ded49 100644
--- a/libkram/astc-encoder/astcenc.h
+++ b/libkram/astc-encoder/astcenc.h
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2020 Arm Limited
+// Copyright 2020-2021 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -43,9 +43,9 @@
  *       allocate multiple contexts and assign each context to a thread.
  *     * An application wishing to process a single image in using multiple
  *       threads can configure the context for multi-threaded use, and invoke
- *       astcenc_compress() once per thread for faster compression. The caller
- *       is responsible for creating the worker threads. Note that
- *       decompression is always single-threaded.
+ *       astcenc_compress/decompress() once per thread for faster processing.
+ *       The caller is responsible for creating the worker threads, and
+ *       synchronizing between images.
  *
  * Threading
  * =========
@@ -76,21 +76,21 @@
  * Images
  * ======
  *
- * Images are passed in as a astcenc_image structure. Inputs can be either
- * 8-bit unorm inputs (passed in via the data8 pointer), or 16-bit floating
- * point inputs (passed in via the data16 pointer). The unused pointer should
- * be set to nullptr.
+ * Images are passed in as an astcenc_image structure. Inputs can be either
+ * 8-bit unorm, 16-bit half-float, or 32-bit float, as indicated by the
+ * data_type field.
  *
  * Images can be any dimension; there is no requirement for them to be a
  * multiple of the ASTC block size.
  *
- * Data is always passed in as 4 color channels, and accessed as 3D array
- * indexed using e.g.
+ * Data is always passed in as 4 color channels, and accessed as an array of
+ * 2D image slices. Data within an image slice is always tightly packed without
+ * padding. Addresing looks like this:
  *
- *     data8[z_coord][y_coord][x_coord * 4    ]   // Red
- *     data8[z_coord][y_coord][x_coord * 4 + 1]   // Green
- *     data8[z_coord][y_coord][x_coord * 4 + 2]   // Blue
- *     data8[z_coord][y_coord][x_coord * 4 + 3]   // Alpha
+ *     data[z_coord][y_coord * x_dim * 4 + x_coord * 4    ]   // Red
+ *     data[z_coord][y_coord * x_dim * 4 + x_coord * 4 + 1]   // Green
+ *     data[z_coord][y_coord * x_dim * 4 + x_coord * 4 + 2]   // Blue
+ *     data[z_coord][y_coord * x_dim * 4 + x_coord * 4 + 3]   // Alpha
  *
  * Common compressor usage
  * =======================
@@ -144,6 +144,16 @@
 #include <cstddef>
 #include <cstdint>
 
+#if defined(ASTCENC_DYNAMIC_LIBRARY)
+	#if defined(_MSC_VER)
+		#define ASTCENC_PUBLIC extern "C" __declspec(dllexport)
+	#else
+		#define ASTCENC_PUBLIC extern "C" __attribute__ ((visibility ("default")))
+	#endif
+#else
+	#define ASTCENC_PUBLIC
+#endif
+
 /* ============================================================================
     Data declarations
 ============================================================================ */
@@ -171,8 +181,8 @@ enum astcenc_error {
 	ASTCENC_ERR_BAD_BLOCK_SIZE,
 	/** @brief The call failed due to an out-of-spec color profile. */
 	ASTCENC_ERR_BAD_PROFILE,
-	/** @brief The call failed due to an out-of-spec quality preset. */
-	ASTCENC_ERR_BAD_PRESET,
+	/** @brief The call failed due to an out-of-spec quality value. */
+	ASTCENC_ERR_BAD_QUALITY,
 	/** @brief The call failed due to an out-of-spec channel swizzle. */
 	ASTCENC_ERR_BAD_SWIZZLE,
 	/** @brief The call failed due to an out-of-spec flag set. */
@@ -180,7 +190,11 @@ enum astcenc_error {
 	/** @brief The call failed due to the context not supporting the operation. */
 	ASTCENC_ERR_BAD_CONTEXT,
 	/** @brief The call failed due to unimplemented functionality. */
-	ASTCENC_ERR_NOT_IMPLEMENTED
+	ASTCENC_ERR_NOT_IMPLEMENTED,
+#if defined(ASTCENC_DIAGNOSTICS)
+	/** @brief The call failed due to an issue with diagnostic tracing. */
+	ASTCENC_ERR_DTRACE_FAILURE,
+#endif
 };
 
 /**
@@ -197,21 +211,20 @@ enum astcenc_profile {
 	ASTCENC_PRF_HDR
 };
 
-/**
- * @brief A codec quality preset.
- */
-enum astcenc_preset {
-	/** @brief The fastest, lowest quality, search preset. */
-	ASTCENC_PRE_FASTEST = 0,
-	/** @brief The fast search preset. */
-	ASTCENC_PRE_FAST,
-	/** @brief The medium quality search preset. */
-	ASTCENC_PRE_MEDIUM,
-	/** @brief The throrough quality search preset. */
-	ASTCENC_PRE_THOROUGH,
-	/** @brief The exhaustive, highest quality, search preset. */
-	ASTCENC_PRE_EXHAUSTIVE
-};
+/** @brief The fastest, lowest quality, search preset. */
+static const float ASTCENC_PRE_FASTEST = 0.0f;
+
+/** @brief The fast search preset. */
+static const float ASTCENC_PRE_FAST = 10.0f;
+
+/** @brief The medium quality search preset. */
+static const float ASTCENC_PRE_MEDIUM = 60.0f;
+
+/** @brief The throrough quality search preset. */
+static const float ASTCENC_PRE_THOROUGH = 98.0f;
+
+/** @brief The exhaustive, highest quality, search preset. */
+static const float ASTCENC_PRE_EXHAUSTIVE = 100.0f;
 
 /**
  * @brief A codec channel swizzle selector.
@@ -276,7 +289,32 @@ static const unsigned int ASTCENC_FLG_MAP_NORMAL          = 1 << 0;
  * the color channels to be treated independently for the purposes of error
  * analysis.
  */
-static const unsigned int ASTCENC_FLG_MAP_MASK            = 1 << 1;
+static const unsigned int ASTCENC_FLG_MAP_MASK             = 1 << 1;
+
+/**
+ * @brief Enable RGBM map compression.
+ *
+ * Input data will be treated as HDR data that has been stored in an LDR
+ * RGBM-encoded wrapper format. Data must be preprocessed by the user to be in
+ * LDR RGBM format before calling the compression function, this flag is only
+ * used to control the use of RGBM-specific heuristics and error metrics.
+ *
+ * IMPORTANT: The ASTC format is prone to bad failure modes with unconstrained
+ * RGBM data; very small M values can round to zero due to quantization and
+ * result in black or white pixels. It is *highly* recommended that the minimum
+ * value of M used in the encoding is kept above a lower threshold (try 16 or
+ * 32). Applying this threshold reduces the number of very dark colors that can
+ * be represented, but is still slightly higher precision than 8-bit LDR.
+ *
+ * When this flag is set the value of @c rgbm_m_scale in the context must be
+ * set to the RGBM scale factor used during reconstruction. This defaults to 5
+ * when in RGBM mode.
+ *
+ * It is recommended that the value of @c cw_a_weight is set to twice the value
+ * of the multiplier scale, ensuring that the M value is accurately encoded.
+ * This defaults to 10 when in RGBM mode, matching the default scale factor.
+ */
+static const unsigned int ASTCENC_FLG_MAP_RGBM             = 1 << 6;
 
 /**
  * @brief Enable alpha weighting.
@@ -286,7 +324,7 @@ static const unsigned int ASTCENC_FLG_MAP_MASK            = 1 << 1;
  * more accurately encode the alpha value in areas where the color value
  * is less significant.
  */
-static const unsigned int ASTCENC_FLG_USE_ALPHA_WEIGHT    = 1 << 2;
+static const unsigned int ASTCENC_FLG_USE_ALPHA_WEIGHT     = 1 << 2;
 
 /**
  * @brief Enable perceptual error metrics.
@@ -295,25 +333,38 @@ static const unsigned int ASTCENC_FLG_USE_ALPHA_WEIGHT    = 1 << 2;
  * perceptual error rather than best PSNR. Only some input modes support
  * perceptual error metrics.
  */
-static const unsigned int ASTCENC_FLG_USE_PERCEPTUAL      = 1 << 3;
+static const unsigned int ASTCENC_FLG_USE_PERCEPTUAL       = 1 << 3;
 
 /**
  * @brief Create a decompression-only context.
  *
- * This mode enables context allocation to skip some transient buffer
- * allocation, resulting in a lower-memory footprint.
+ * This mode disables support for compression. This enables context allocation
+ * to skip some transient buffer allocation, resulting in lower memory usage.
+ */
+static const unsigned int ASTCENC_FLG_DECOMPRESS_ONLY      = 1 << 4;
+
+/**
+ * @brief Create a self-decompression context.
+ *
+ * This mode configures the compressor so that it is only guaranteed to be
+ * able to decompress images that were actually created using the current
+ * context. This is the common case for compression use cases, and setting this
+ * flag enables additional optimizations, but does mean that the context cannot
+ * reliably decompress arbitrary ASTC images.
  */
-static const unsigned int ASTCENC_FLG_DECOMPRESS_ONLY     = 1 << 4;
+static const unsigned int ASTCENC_FLG_SELF_DECOMPRESS_ONLY = 1 << 5;
 
 /**
  * @brief The bit mask of all valid flags.
  */
 static const unsigned int ASTCENC_ALL_FLAGS =
-                              ASTCENC_FLG_MAP_NORMAL |
                               ASTCENC_FLG_MAP_MASK |
+                              ASTCENC_FLG_MAP_NORMAL |
+                              ASTCENC_FLG_MAP_RGBM |
                               ASTCENC_FLG_USE_ALPHA_WEIGHT |
                               ASTCENC_FLG_USE_PERCEPTUAL |
-                              ASTCENC_FLG_DECOMPRESS_ONLY;
+                              ASTCENC_FLG_DECOMPRESS_ONLY |
+                              ASTCENC_FLG_SELF_DECOMPRESS_ONLY;
 
 /**
  * @brief The config structure.
@@ -327,7 +378,8 @@ static const unsigned int ASTCENC_ALL_FLAGS =
  * the value in the config applies to the channel that exists after any
  * compression data swizzle is applied.
  */
-struct astcenc_config {
+struct astcenc_config
+{
 	/** @brief The color profile. */
 	astcenc_profile profile;
 
@@ -402,12 +454,22 @@ struct astcenc_config {
 	 */
 	float b_deblock_weight;
 
+	/** @brief The RGBM scale factor for the shared multiplier (-rgbm). */
+	float rgbm_m_scale;
+
+	/**
+	 * @brief The maximum number of partitions searched (-partitioncountlimit).
+	 *
+	 * Valid values are between 1 and 4.
+	 */
+	unsigned int tune_partition_count_limit;
+
 	/**
-	 * @brief The maximum number of partitions searched (-partitionlimit).
+	 * @brief The maximum number of partitions searched (-partitionindexlimit).
 	 *
 	 * Valid values are between 1 and 1024.
 	 */
-	unsigned int tune_partition_limit;
+	unsigned int tune_partition_index_limit;
 
 	/**
 	 * @brief The maximum centile for block modes searched (-blockmodelimit).
@@ -438,6 +500,30 @@ struct astcenc_config {
 	 */
 	float tune_db_limit;
 
+	/**
+	 * @brief The amount of overshoot needed to early-out mode 0 fast path.
+	 *
+	 * We have a fast-path for mode 0 (1 partition, 1 plane) which uses only
+	 * essential block modes as an initital search. This can short-cut
+	 * compression for simple blocks, but to avoid shortcutting too much we
+	 * force this to overshoot the MSE threshold needed to hit the block-local
+	 * db_limit e.g. 1.0 = no overshoot, 2.0 = need half the error to trigger.
+	 */
+	float tune_mode0_mse_overshoot;
+
+	/**
+	 * @brief The amount of overshoot needed to early-out refinement.
+	 *
+	 * The codec will refine block candidates iteratively to improve the
+	 * encoding, based on the @c tune_refinement_limit count. Earlier
+	 * implementations will use all refinement iterations, even if the target
+	 * threshold is reached. This tuning parameter allows an early out, but
+	 * with an overshoot MSE threshold. Setting this to 1.0 will early-out as
+	 * soon as the target is hit, but does reduce image quality vs the
+	 * default behavior of over-refinement.
+	 */
+	float tune_refinement_mse_overshoot;
+
 	/**
 	 * @brief The threshold for skipping 3+ partitions (-partitionearlylimit).
 	 *
@@ -451,27 +537,116 @@ struct astcenc_config {
 	 * This option is ineffective for normal maps.
 	 */
 	float tune_two_plane_early_out_limit;
+
+#if defined(ASTCENC_DIAGNOSTICS)
+	/**
+	 * @brief The path to save the diagnostic trace data to.
+	 *
+	 * This option is not part of the public API, and requires special builds
+	 * of the library.
+	 */
+	const char* trace_file_path;
+#endif
 };
 
 /**
  * @brief An uncompressed 2D or 3D image.
  *
- * Inputs can be either 8-bit unorm inputs (passed in via the data8 pointer),
- * or 16-bit floating point inputs (passed in via the data16 pointer). The
- * unused pointer must be set to nullptr. Data is always passed in as 4 color
- * channels, and accessed as 3D array indexed using [Z][Y][(X * 4) + (0..3)].
+ * 3D image are passed in as an array of 2D slices. Each slice has identical
+ * size and color format.
  */
-struct astcenc_image {
+struct astcenc_image
+{
 	/** @brief The X dimension of the image, in texels. */
 	unsigned int dim_x;
+
 	/** @brief The Y dimension of the image, in texels. */
 	unsigned int dim_y;
-	/** @brief The X dimension of the image, in texels. */
+
+	/** @brief The Z dimension of the image, in texels. */
 	unsigned int dim_z;
+
 	/** @brief The data type per channel. */
 	astcenc_type data_type;
-	/** @brief The data; actually of type <t>***. */
-	void *data;
+
+	/** @brief The array of 2D slices, of length @c dim_z. */
+	void** data;
+};
+
+/**
+ * @brief A block encoding metadata query result.
+ *
+ * If the block is an error block or a constant color block or an error block
+ * all fields other than the profile, block dimensions, and error/constant
+ * indicator will be zero.
+ */
+struct astcenc_block_info
+{
+	/** @brief The block encoding color profile. */
+	astcenc_profile profile;
+
+	/** @brief The number of texels in the X dimension. */
+	int block_x;
+
+	/** @brief The number of texels in the Y dimension. */
+	int block_y;
+
+	/** @brief The number of texel in the Z dimension. */
+	int block_z;
+
+	/** @brief The number of texels in the block. */
+	int texel_count;
+
+	/** @brief True if this block is an error block. */
+	bool is_error_block;
+
+	/** @brief True if this block is a constant color block. */
+	bool is_constant_block;
+
+	/** @brief True if this block is an HDR block. */
+	bool is_hdr_block;
+
+	/** @brief True if this block uses two weight planes. */
+	bool is_dual_plane_block;
+
+	/** @brief The number of partitions if not constant color. */
+	int partition_count;
+
+	/** @brief The partition index if 2 - 4 partitions used. */
+	int partition_index;
+
+	/** @brief The component index of the second plane if dual plane. */
+	int dual_plane_component;
+
+	/** @brief The color endpoint encoding mode for each partition. */
+	int color_endpoint_modes[4];
+
+	/** @brief The number of color endpoint quantization levels. */
+	int color_level_count;
+
+	/** @brief The number of weight quantization levels. */
+	int weight_level_count;
+
+	/** @brief The number of weights in the X dimension. */
+	int weight_x;
+
+	/** @brief The number of weights in the Y dimension. */
+	int weight_y;
+
+	/** @brief The number of weights in the Z dimension. */
+	int weight_z;
+
+	/** @brief The unpacked color endpoints for each partition. */
+	float color_endpoints[4][2][4];
+
+	/** @brief The per-texel interpolation weights for the block. */
+	float weight_values_plane1[216];
+
+	/** @brief The per-texel interpolation weights for the block. */
+	float weight_values_plane2[216];
+
+	/** @brief The per-texel partition assignments for the block. */
+	uint8_t partition_assignment[216];
 };
 
 /**
@@ -484,21 +659,24 @@ struct astcenc_image {
  * @param      block_x   ASTC block size X dimension.
  * @param      block_y   ASTC block size Y dimension.
  * @param      block_z   ASTC block size Z dimension.
- * @param      preset    Search quality preset.
+ * @param      quality   Search quality preset / effort level. Either an
+ *                       @c ASTCENC_PRE_* value, or a effort level between 0
+ *                       and 100. Performance is not linear between 0 and 100.
+
  * @param      flags     A valid set of ASTCENC_FLG_* flag bits.
  * @param[out] config    Output config struct to populate.
  *
  * @return ASTCENC_SUCCESS on success, or an error if the inputs are invalid
  * either individually, or in combination.
  */
-astcenc_error astcenc_config_init(
+ASTCENC_PUBLIC astcenc_error astcenc_config_init(
 	astcenc_profile profile,
 	unsigned int block_x,
 	unsigned int block_y,
 	unsigned int block_z,
-	astcenc_preset preset,
+	float quality,
 	unsigned int flags,
-	astcenc_config& config);
+	astcenc_config* config);
 
 /**
  * @brief Allocate a new codec context based on a config.
@@ -515,14 +693,13 @@ astcenc_error astcenc_config_init(
  * be set when creating ay context.
  *
  * @param[in]  config         Codec config.
- * @param      thread_count   Thread count to configure for. Decompress-only
- *                            contexts must have a thread_count of 1.
+ * @param      thread_count   Thread count to configure for.
  * @param[out] context        Location to store an opaque context pointer.
  *
  * @return ASTCENC_SUCCESS on success, or an error if context creation failed.
  */
-astcenc_error astcenc_context_alloc(
-	const astcenc_config& config,
+ASTCENC_PUBLIC astcenc_error astcenc_context_alloc(
+	const astcenc_config* config,
 	unsigned int thread_count,
 	astcenc_context** context);
 
@@ -536,7 +713,7 @@ astcenc_error astcenc_context_alloc(
  * available. Each thread must have a unique thread_index.
  *
  * @param         context        Codec context.
- * @param[in,out] image          Input image.
+ * @param[in,out] image          An input image, in 2D slices.
  * @param         swizzle        Compression data swizzle.
  * @param[out]    data_out       Pointer to output data array.
  * @param         data_len       Length of the output data array.
@@ -544,16 +721,16 @@ astcenc_error astcenc_context_alloc(
  *
  * @return ASTCENC_SUCCESS on success, or an error if compression failed.
  */
-astcenc_error astcenc_compress_image(
+ASTCENC_PUBLIC astcenc_error astcenc_compress_image(
 	astcenc_context* context,
-	astcenc_image& image,
+	astcenc_image* image,
 	astcenc_swizzle swizzle,
 	uint8_t* data_out,
 	size_t data_len,
 	unsigned int thread_index);
 
 /**
- * @brief Reset the compressor state for a new compression.
+ * @brief Reset the codec state for a new compression.
  *
  * The caller is responsible for synchronizing threads in the worker thread
  * pool. This function must only be called when all threads have exited the
@@ -564,35 +741,71 @@ astcenc_error astcenc_compress_image(
  *
  * @return ASTCENC_SUCCESS on success, or an error if reset failed.
  */
-astcenc_error astcenc_compress_reset(
+ASTCENC_PUBLIC astcenc_error astcenc_compress_reset(
 	astcenc_context* context);
 
 /**
  * @brief Decompress an image.
  *
- * @param         context      Codec context.
- * @param[in]     data         Pointer to compressed data.
- * @param         data_len     Length of the compressed data, in bytes.
- * @param[in,out] image_out    Output image.
- * @param         swizzle      Decompression data swizzle.
+ * @param         context        Codec context.
+ * @param[in]     data           Pointer to compressed data.
+ * @param         data_len       Length of the compressed data, in bytes.
+ * @param[in,out] image_out      Output image.
+ * @param         swizzle        Decompression data swizzle.
+ * @param         thread_index   Thread index [0..N-1] of calling thread.
  *
  * @return ASTCENC_SUCCESS on success, or an error if decompression failed.
  */
-astcenc_error astcenc_decompress_image(
+ASTCENC_PUBLIC astcenc_error astcenc_decompress_image(
 	astcenc_context* context,
 	const uint8_t* data,
 	size_t data_len,
-	astcenc_image& image_out,
-	astcenc_swizzle swizzle);
+	astcenc_image* image_out,
+	astcenc_swizzle swizzle,
+	unsigned int thread_index);
+
+/**
+ * @brief Reset the codec state for a new decompression.
+ *
+ * The caller is responsible for synchronizing threads in the worker thread
+ * pool. This function must only be called when all threads have exited the
+ * astcenc_decompress_image() function for image N, but before any thread
+ * enters it for image N + 1.
+ *
+ * @param context   Codec context.
+ *
+ * @return ASTCENC_SUCCESS on success, or an error if reset failed.
+ */
+ASTCENC_PUBLIC astcenc_error astcenc_decompress_reset(
+	astcenc_context* context);
 
 /**
  * Free the compressor context.
  *
  * @param context   The codec context.
  */
-void astcenc_context_free(
+ASTCENC_PUBLIC void astcenc_context_free(
 	astcenc_context* context);
 
+/**
+ * @brief Provide a high level summary of a block's encoding.
+ *
+ * This feature is primarily useful for codec developers but may be useful
+ * for developers building advanced content packaging pipelines.
+ *
+ * @param context   Codec context.
+ * @param data      One block of compressesd ASTC data.
+ * @param info      The output info structure to populate.
+ *
+ * @return ASTCENC_SUCCESS if the block was decoded, or an error otherwise.
+ *         Note that this function will return success even if the block itself
+ *         was an error block encoding, as the decode was correctly handled.
+ */
+ASTCENC_PUBLIC astcenc_error astcenc_get_block_info(
+	astcenc_context* context,
+	const uint8_t data[16],
+	astcenc_block_info* info);
+
 /**
  * @brief Get a printable string for specific status code.
  *
@@ -600,7 +813,7 @@ void astcenc_context_free(
  *
  * @return A human readable nul-terminated string.
  */
-const char* astcenc_get_error_string(
+ASTCENC_PUBLIC const char* astcenc_get_error_string(
 	astcenc_error status);
 
 #endif
diff --git a/libkram/astc-encoder/astcenc_averages_and_directions.cpp b/libkram/astc-encoder/astcenc_averages_and_directions.cpp
index 8e34ccec..048f0881 100644
--- a/libkram/astc-encoder/astcenc_averages_and_directions.cpp
+++ b/libkram/astc-encoder/astcenc_averages_and_directions.cpp
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2020 Arm Limited
+// Copyright 2011-2021 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -37,81 +37,71 @@
 // We have separate versions for blocks with and without alpha, since the
 // processing for blocks with alpha is significantly more expensive. The
 // direction vectors it produces are NOT normalized.
-void compute_averages_and_directions_rgba(
+void compute_avgs_and_dirs_4_comp(
 	const partition_info* pt,
 	const imageblock* blk,
 	const error_weight_block* ewb,
-	const float4* color_scalefactors,
-	float4* averages,
-	float4* directions_rgba
+	partition_metrics pms[4]
 ) {
 	int partition_count = pt->partition_count;
+	promise(partition_count > 0);
+
 	for (int partition = 0; partition < partition_count; partition++)
 	{
 		const uint8_t *weights = pt->texels_of_partition[partition];
-		int texelcount = pt->texels_per_partition[partition];
 
-		float4 base_sum = float4(0.0f);
+		vfloat4 base_sum = vfloat4::zero();
 		float partition_weight = 0.0f;
 
-		for (int i = 0; i < texelcount; i++)
+		int texel_count = pt->partition_texel_count[partition];
+		promise(texel_count > 0);
+
+		for (int i = 0; i < texel_count; i++)
 		{
 			int iwt = weights[i];
 			float weight = ewb->texel_weight[iwt];
-			float4 texel_datum = float4(blk->data_r[iwt],
-			                            blk->data_g[iwt],
-			                            blk->data_b[iwt],
-			                            blk->data_a[iwt]) * weight;
-			partition_weight += weight;
+			vfloat4 texel_datum = blk->texel(iwt);
 
-			base_sum = base_sum + texel_datum;
+			partition_weight += weight;
+			base_sum = base_sum + texel_datum * weight;
 		}
 
-		float4 average = base_sum * (1.0f / MAX(partition_weight, 1e-7f));
-		averages[partition] = average * color_scalefactors[partition];
+		vfloat4 average = base_sum * (1.0f / astc::max(partition_weight, 1e-7f));
+		pms[partition].avg = average * pms[partition].color_scale;
 
-		float4 sum_xp = float4(0.0f);
-		float4 sum_yp = float4(0.0f);
-		float4 sum_zp = float4(0.0f);
-		float4 sum_wp = float4(0.0f);
+		vfloat4 sum_xp = vfloat4::zero();
+		vfloat4 sum_yp = vfloat4::zero();
+		vfloat4 sum_zp = vfloat4::zero();
+		vfloat4 sum_wp = vfloat4::zero();
 
-		for (int i = 0; i < texelcount; i++)
+		for (int i = 0; i < texel_count; i++)
 		{
 			int iwt = weights[i];
 			float weight = ewb->texel_weight[iwt];
-			float4 texel_datum = float4(blk->data_r[iwt],
-			                            blk->data_g[iwt],
-			                            blk->data_b[iwt],
-			                            blk->data_a[iwt]);
+			vfloat4 texel_datum = blk->texel(iwt);
 			texel_datum = (texel_datum - average) * weight;
 
-			if (texel_datum.r > 0.0f)
-			{
-				sum_xp = sum_xp + texel_datum;
-			}
+			vfloat4 zero = vfloat4::zero();
 
-			if (texel_datum.g > 0.0f)
-			{
-				sum_yp = sum_yp + texel_datum;
-			}
+			vmask4 tdm0 = vfloat4(texel_datum.lane<0>()) > zero;
+			sum_xp += select(zero, texel_datum, tdm0);
 
-			if (texel_datum.b > 0.0f)
-			{
-				sum_zp = sum_zp + texel_datum;
-			}
+			vmask4 tdm1 = vfloat4(texel_datum.lane<1>()) > zero;
+			sum_yp += select(zero, texel_datum, tdm1);
 
-			if (texel_datum.a > 0.0f)
-			{
-				sum_wp = sum_wp + texel_datum;
-			}
+			vmask4 tdm2 = vfloat4(texel_datum.lane<2>()) > zero;
+			sum_zp += select(zero, texel_datum, tdm2);
+
+			vmask4 tdm3 = vfloat4(texel_datum.lane<3>()) > zero;
+			sum_wp += select(zero, texel_datum, tdm3);
 		}
 
-		float prod_xp = dot(sum_xp, sum_xp);
-		float prod_yp = dot(sum_yp, sum_yp);
-		float prod_zp = dot(sum_zp, sum_zp);
-		float prod_wp = dot(sum_wp, sum_wp);
+		float prod_xp = dot_s(sum_xp, sum_xp);
+		float prod_yp = dot_s(sum_yp, sum_yp);
+		float prod_zp = dot_s(sum_zp, sum_zp);
+		float prod_wp = dot_s(sum_wp, sum_wp);
 
-		float4 best_vector = sum_xp;
+		vfloat4 best_vector = sum_xp;
 		float best_sum = prod_xp;
 
 		if (prod_yp > best_sum)
@@ -131,109 +121,21 @@ void compute_averages_and_directions_rgba(
 			best_vector = sum_wp;
 		}
 
-		directions_rgba[partition] = best_vector;
-	}
-}
-
-void compute_averages_and_directions_rgb(
-	const partition_info* pt,
-	const imageblock* blk,
-	const error_weight_block* ewb,
-	const float4* color_scalefactors,
-	float3* averages,
-	float3* directions_rgb
-) {
-	int partition_count = pt->partition_count;
-	const float *texel_weights = ewb->texel_weight_rgb;
-
-	for (int partition = 0; partition < partition_count; partition++)
-	{
-		const uint8_t *weights = pt->texels_of_partition[partition];
-		int texelcount = pt->texels_per_partition[partition];
-
-		float3 base_sum = float3(0.0f, 0.0f, 0.0f);
-		float partition_weight = 0.0f;
-
-		for (int i = 0; i < texelcount; i++)
-		{
-			int iwt = weights[i];
-			float weight = texel_weights[iwt];
-			float3 texel_datum = float3(blk->data_r[iwt],
-			                            blk->data_g[iwt],
-			                            blk->data_b[iwt]) * weight;
-			partition_weight += weight;
-
-			base_sum = base_sum + texel_datum;
-		}
-
-		float4 csf = color_scalefactors[partition];
-		float3 average = base_sum * (1.0f / MAX(partition_weight, 1e-7f));
-		averages[partition] = average * float3(csf.r, csf.g, csf.b);
-
-		float3 sum_xp = float3(0.0f);
-		float3 sum_yp = float3(0.0f);
-		float3 sum_zp = float3(0.0f);
-
-		for (int i = 0; i < texelcount; i++)
-		{
-			int iwt = weights[i];
-			float weight = texel_weights[iwt];
-			float3 texel_datum = float3(blk->data_r[iwt],
-			                            blk->data_g[iwt],
-			                            blk->data_b[iwt]);
-			texel_datum = (texel_datum - average) * weight;
-
-			if (texel_datum.r > 0.0f)
-			{
-				sum_xp = sum_xp + texel_datum;
-			}
-
-			if (texel_datum.g > 0.0f)
-			{
-				sum_yp = sum_yp + texel_datum;
-			}
-
-			if (texel_datum.b > 0.0f)
-			{
-				sum_zp = sum_zp + texel_datum;
-			}
-		}
-
-		float prod_xp = dot(sum_xp, sum_xp);
-		float prod_yp = dot(sum_yp, sum_yp);
-		float prod_zp = dot(sum_zp, sum_zp);
-
-		float3 best_vector = sum_xp;
-		float best_sum = prod_xp;
-
-		if (prod_yp > best_sum)
-		{
-			best_vector = sum_yp;
-			best_sum = prod_yp;
-		}
-
-		if (prod_zp > best_sum)
-		{
-			best_vector = sum_zp;
-		}
-
-		directions_rgb[partition] = best_vector;
+		pms[partition].dir = best_vector;
 	}
 }
 
-void compute_averages_and_directions_3_components(
+void compute_avgs_and_dirs_3_comp(
 	const partition_info* pt,
 	const imageblock* blk,
 	const error_weight_block* ewb,
-	const float3* color_scalefactors,
 	int omitted_component,
-	float3* averages,
-	float3* directions
+	partition_metrics pm[4]
 ) {
 	const float *texel_weights;
-	const float* data_vr;
-	const float* data_vg;
-	const float* data_vb;
+	const float* data_vr = blk->data_r;
+	const float* data_vg = blk->data_g;
+	const float* data_vb = blk->data_b;
 
 	if (omitted_component == 0)
 	{
@@ -245,86 +147,82 @@ void compute_averages_and_directions_3_components(
 	else if (omitted_component == 1)
 	{
 		texel_weights = ewb->texel_weight_rba;
-		data_vr = blk->data_r;
 		data_vg = blk->data_b;
 		data_vb = blk->data_a;
 	}
 	else if (omitted_component == 2)
 	{
 		texel_weights = ewb->texel_weight_rga;
-		data_vr = blk->data_r;
-		data_vg = blk->data_g;
 		data_vb = blk->data_a;
 	}
 	else
 	{
 		assert(omitted_component == 3);
 		texel_weights = ewb->texel_weight_rgb;
-		data_vr = blk->data_r;
-		data_vg = blk->data_g;
-		data_vb = blk->data_b;
 	}
 
 	int partition_count = pt->partition_count;
+	promise(partition_count > 0);
+
 	for (int partition = 0; partition < partition_count; partition++)
 	{
 		const uint8_t *weights = pt->texels_of_partition[partition];
-		int texelcount = pt->texels_per_partition[partition];
 
-		float3 base_sum = float3(0.0f);
+		vfloat4 base_sum = vfloat4::zero();
 		float partition_weight = 0.0f;
 
-		for (int i = 0; i < texelcount; i++)
+		int texel_count = pt->partition_texel_count[partition];
+		promise(texel_count > 0);
+
+		for (int i = 0; i < texel_count; i++)
 		{
 			int iwt = weights[i];
 			float weight = texel_weights[iwt];
-			float3 texel_datum = float3(data_vr[iwt],
-			                            data_vg[iwt],
-			                            data_vb[iwt]) * weight;
-			partition_weight += weight;
+			vfloat4 texel_datum(data_vr[iwt],
+			                    data_vg[iwt],
+			                    data_vb[iwt],
+			                    0.0f);
 
-			base_sum = base_sum + texel_datum;
+			partition_weight += weight;
+			base_sum = base_sum + texel_datum * weight;
 		}
 
-		float3 csf = color_scalefactors[partition];
+		vfloat4 csf = pm[partition].color_scale;
 
-		float3 average = base_sum * (1.0f / MAX(partition_weight, 1e-7f));
-		averages[partition] = average * float3(csf.r, csf.g, csf.b);
+		vfloat4 average = base_sum * (1.0f / astc::max(partition_weight, 1e-7f));
+		pm[partition].avg = average * csf;
 
-		float3 sum_xp = float3(0.0f);
-		float3 sum_yp = float3(0.0f);
-		float3 sum_zp = float3(0.0f);
+		vfloat4 sum_xp = vfloat4::zero();
+		vfloat4 sum_yp = vfloat4::zero();
+		vfloat4 sum_zp = vfloat4::zero();
 
-		for (int i = 0; i < texelcount; i++)
+		for (int i = 0; i < texel_count; i++)
 		{
 			int iwt = weights[i];
 			float weight = texel_weights[iwt];
-			float3 texel_datum = float3(data_vr[iwt],
-			                            data_vg[iwt],
-			                            data_vb[iwt]);
+			vfloat4 texel_datum = vfloat4(data_vr[iwt],
+			                              data_vg[iwt],
+			                              data_vb[iwt],
+			                              0.0f);
 			texel_datum = (texel_datum - average) * weight;
 
-			if (texel_datum.r > 0.0f)
-			{
-				sum_xp = sum_xp + texel_datum;
-			}
+			vfloat4 zero = vfloat4::zero();
 
-			if (texel_datum.g > 0.0f)
-			{
-				sum_yp = sum_yp + texel_datum;
-			}
+			vmask4 tdm0 = vfloat4(texel_datum.lane<0>()) > zero;
+			sum_xp += select(zero, texel_datum, tdm0);
 
-			if (texel_datum.b > 0.0f)
-			{
-				sum_zp = sum_zp + texel_datum;
-			}
+			vmask4 tdm1 = vfloat4(texel_datum.lane<1>()) > zero;
+			sum_yp += select(zero, texel_datum, tdm1);
+
+			vmask4 tdm2 = vfloat4(texel_datum.lane<2>()) > zero;
+			sum_zp += select(zero, texel_datum, tdm2);
 		}
 
-		float prod_xp = dot(sum_xp, sum_xp);
-		float prod_yp = dot(sum_yp, sum_yp);
-		float prod_zp = dot(sum_zp, sum_zp);
+		float prod_xp = dot3_s(sum_xp, sum_xp);
+		float prod_yp = dot3_s(sum_yp, sum_yp);
+		float prod_zp = dot3_s(sum_zp, sum_zp);
 
-		float3 best_vector = sum_xp;
+		vfloat4 best_vector = sum_xp;
 		float best_sum = prod_xp;
 
 		if (prod_yp > best_sum)
@@ -338,17 +236,16 @@ void compute_averages_and_directions_3_components(
 			best_vector = sum_zp;
 		}
 
-		if (dot(best_vector, best_vector) < 1e-18f)
+		if (dot3_s(best_vector, best_vector) < 1e-18f)
 		{
-			best_vector = float3(1.0f, 1.0f, 1.0f);
+			best_vector = vfloat4(1.0f, 1.0f, 1.0f, 0.0f);
 		}
 
-		directions[partition] = best_vector;
+		pm[partition].dir = best_vector;
 	}
-
 }
 
-void compute_averages_and_directions_2_components(
+void compute_avgs_and_dirs_2_comp(
 	const partition_info* pt,
 	const imageblock* blk,
 	const error_weight_block* ewb,
@@ -383,15 +280,19 @@ void compute_averages_and_directions_2_components(
 	}
 
 	int partition_count = pt->partition_count;
+	promise(partition_count > 0);
+
 	for (int partition = 0; partition < partition_count; partition++)
 	{
 		const uint8_t *weights = pt->texels_of_partition[partition];
-		int texelcount = pt->texels_per_partition[partition];
 
 		float2 base_sum = float2(0.0f);
 		float partition_weight = 0.0f;
 
-		for (int i = 0; i < texelcount; i++)
+		int texel_count = pt->partition_texel_count[partition];
+		promise(texel_count > 0);
+
+		for (int i = 0; i < texel_count; i++)
 		{
 			int iwt = weights[i];
 			float weight = texel_weights[iwt];
@@ -403,13 +304,13 @@ void compute_averages_and_directions_2_components(
 
 		float2 csf = color_scalefactors[partition];
 
-		float2 average = base_sum * (1.0f / MAX(partition_weight, 1e-7f));
+		float2 average = base_sum * (1.0f / astc::max(partition_weight, 1e-7f));
 		averages[partition] = average * float2(csf.r, csf.g);
 
 		float2 sum_xp = float2(0.0f);
 		float2 sum_yp = float2(0.0f);
 
-		for (int i = 0; i < texelcount; i++)
+		for (int i = 0; i < texel_count; i++)
 		{
 			int iwt = weights[i];
 			float weight = texel_weights[iwt];
@@ -443,340 +344,406 @@ void compute_averages_and_directions_2_components(
 }
 
 void compute_error_squared_rgba(
-	const partition_info* pt,    // the partition that we use when computing the squared-error.
+	const partition_info* pt,
 	const imageblock* blk,
 	const error_weight_block* ewb,
-	const processed_line4* plines_uncorr,
-	const processed_line4* plines_samechroma,
-	const processed_line3* plines_separate_red,
-	const processed_line3* plines_separate_green,
-	const processed_line3* plines_separate_blue,
-	const processed_line3* plines_separate_alpha,
-	float* lengths_uncorr,
-	float* lengths_samechroma,
-	float4* lengths_separate,
-	float* uncorr_errors,
-	float* samechroma_errors,
-	float4* separate_color_errors
+	const processed_line4* uncor_plines,
+	const processed_line4* samec_plines,
+	float* uncor_lengths,
+	float* samec_lengths,
+	float* uncor_errors,
+	float* samec_errors
 ) {
-	float uncorr_errorsum = 0.0f;
-	float samechroma_errorsum = 0.0f;
-	float red_errorsum = 0.0f;
-	float green_errorsum = 0.0f;
-	float blue_errorsum = 0.0f;
-	float alpha_errorsum = 0.0f;
-
-	for (int partition = 0; partition < pt->partition_count; partition++)
+	float uncor_errorsum = 0.0f;
+	float samec_errorsum = 0.0f;
+
+	int partition_count = pt->partition_count;
+	promise(partition_count > 0);
+
+	for (int partition = 0; partition < partition_count; partition++)
 	{
-		// TODO: sort partitions by number of texels. For warp-architectures,
-		// this can reduce the running time by about 25-50%.
 		const uint8_t *weights = pt->texels_of_partition[partition];
-		int texelcount = pt->texels_per_partition[partition];
 
-		float uncorr_lowparam = 1e10f;
-		float uncorr_highparam = -1e10f;
+		float uncor_loparam = 1e10f;
+		float uncor_hiparam = -1e10f;
 
-		float samechroma_lowparam = 1e10f;
-		float samechroma_highparam = -1e10f;
+		float samec_loparam = 1e10f;
+		float samec_hiparam = -1e10f;
 
-		float4 separate_lowparam = float4(1e10f);
-		float4 separate_highparam = float4(-1e10f);
+		processed_line4 l_uncor = uncor_plines[partition];
+		processed_line4 l_samec = samec_plines[partition];
 
-		processed_line4 l_uncorr = plines_uncorr[partition];
-		processed_line4 l_samechroma = plines_samechroma[partition];
-		processed_line3 l_red = plines_separate_red[partition];
-		processed_line3 l_green = plines_separate_green[partition];
-		processed_line3 l_blue = plines_separate_blue[partition];
-		processed_line3 l_alpha = plines_separate_alpha[partition];
+		int texel_count = pt->partition_texel_count[partition];
+		promise(texel_count > 0);
 
-		// TODO: split up this loop due to too many temporaries; in particular,
-		// the six line functions will consume 18 vector registers
-		for (int i = 0; i < texelcount; i++)
-		{
-			int iwt = weights[i];
+		int i = 0;
 
-			float texel_weight_rgba = ewb->texel_weight[iwt];
-			if (texel_weight_rgba > 1e-20f)
-			{
-				float4 dat = float4(blk->data_r[iwt],
-				                    blk->data_g[iwt],
-				                    blk->data_b[iwt],
-				                    blk->data_a[iwt]);
-
-				float4 ews = ewb->error_weights[iwt];
-
-				float uncorr_param = dot(dat, l_uncorr.bs);
-				uncorr_lowparam = MIN(uncorr_param, uncorr_lowparam);
-				uncorr_highparam = MAX(uncorr_param, uncorr_highparam);
-
-				float samechroma_param = dot(dat, l_samechroma.bs);
-				samechroma_lowparam = MIN(samechroma_param, samechroma_lowparam);
-				samechroma_highparam = MAX(samechroma_param, samechroma_highparam);
-
-				float4 separate_param = float4(dot(float3(dat.g, dat.b, dat.a), l_red.bs),
-				                               dot(float3(dat.r, dat.b, dat.a), l_green.bs),
-				                               dot(float3(dat.r, dat.g, dat.a), l_blue.bs),
-				                               dot(float3(dat.r, dat.g, dat.b), l_alpha.bs));
-
-				separate_lowparam = float4(MIN(separate_param.r, separate_lowparam.r),
-				                           MIN(separate_param.g, separate_lowparam.g),
-				                           MIN(separate_param.b, separate_lowparam.b),
-				                           MIN(separate_param.a, separate_lowparam.a));
-
-				separate_highparam = float4(MAX(separate_param.r, separate_highparam.r),
-				                            MAX(separate_param.g, separate_highparam.g),
-				                            MAX(separate_param.b, separate_highparam.b),
-				                            MAX(separate_param.a, separate_highparam.a));
-
-				float4 uncorr_dist  = (l_uncorr.amod - dat) + (uncorr_param * l_uncorr.bis);
-				uncorr_errorsum += dot(ews, uncorr_dist * uncorr_dist);
-
-				float4 samechroma_dist = (l_samechroma.amod - dat) +
-				                         (samechroma_param * l_samechroma.bis);
-				samechroma_errorsum += dot(ews, samechroma_dist * samechroma_dist);
-
-				float3 red_dist = (l_red.amod - float3(dat.g, dat.b, dat.a)) +
-				                  (separate_param.r * l_red.bis);
-				red_errorsum += dot(float3(ews.g, ews.b, ews.a), red_dist * red_dist);
-
-				float3 green_dist  = (l_green.amod - float3(dat.r, dat.b, dat.a)) +
-				                     (separate_param.g * l_green.bis);
-				green_errorsum += dot(float3(ews.r, ews.b, ews.a), green_dist * green_dist);
-
-				float3 blue_dist  = (l_blue.amod - float3(dat.r, dat.g, dat.a)) +
-				                    (separate_param.b * l_blue.bis);
-				blue_errorsum += dot(float3(ews.r, ews.g, ews.a), blue_dist * blue_dist);
-
-				float3 alpha_dist  = (l_alpha.amod - float3(dat.r, dat.g, dat.b)) +
-				                     (separate_param.a * l_alpha.bis);
-				alpha_errorsum += dot(float3(ews.r, ews.g, ews.b), alpha_dist * alpha_dist);
-			}
-		}
+		// Vectorize some useful scalar inputs
+		vfloat l_uncor_bs0(l_uncor.bs.lane<0>());
+		vfloat l_uncor_bs1(l_uncor.bs.lane<1>());
+		vfloat l_uncor_bs2(l_uncor.bs.lane<2>());
+		vfloat l_uncor_bs3(l_uncor.bs.lane<3>());
 
-		float uncorr_linelen = uncorr_highparam - uncorr_lowparam;
-		float samechroma_linelen = samechroma_highparam - samechroma_lowparam;
-		float4 separate_linelen = separate_highparam - separate_lowparam;
+		vfloat l_uncor_amod0(l_uncor.amod.lane<0>());
+		vfloat l_uncor_amod1(l_uncor.amod.lane<1>());
+		vfloat l_uncor_amod2(l_uncor.amod.lane<2>());
+		vfloat l_uncor_amod3(l_uncor.amod.lane<3>());
 
-		// Turn very small numbers and NaNs into a small number
-		if (!(uncorr_linelen > 1e-7f))
-		{
-			uncorr_linelen = 1e-7f;
-		}
+		vfloat l_uncor_bis0(l_uncor.bis.lane<0>());
+		vfloat l_uncor_bis1(l_uncor.bis.lane<1>());
+		vfloat l_uncor_bis2(l_uncor.bis.lane<2>());
+		vfloat l_uncor_bis3(l_uncor.bis.lane<3>());
 
-		if (!(samechroma_linelen > 1e-7f))
-		{
-			samechroma_linelen = 1e-7f;
-		}
+		vfloat l_samec_bs0(l_samec.bs.lane<0>());
+		vfloat l_samec_bs1(l_samec.bs.lane<1>());
+		vfloat l_samec_bs2(l_samec.bs.lane<2>());
+		vfloat l_samec_bs3(l_samec.bs.lane<3>());
 
-		if (!(separate_linelen.r > 1e-7f))
-		{
-			separate_linelen.r = 1e-7f;
-		}
+		assert(all(l_samec.amod == vfloat4(0.0f)));
 
-		if (!(separate_linelen.g > 1e-7f))
-		{
-			separate_linelen.g = 1e-7f;
-		}
+		vfloat l_samec_bis0(l_samec.bis.lane<0>());
+		vfloat l_samec_bis1(l_samec.bis.lane<1>());
+		vfloat l_samec_bis2(l_samec.bis.lane<2>());
+		vfloat l_samec_bis3(l_samec.bis.lane<3>());
+
+		vfloat uncor_loparamv(1e10f);
+		vfloat uncor_hiparamv(-1e10f);
+		vfloat4 uncor_errorsumv = vfloat4::zero();
 
-		if (!(separate_linelen.b > 1e-7f))
+		vfloat samec_loparamv(1e10f);
+		vfloat samec_hiparamv(-1e10f);
+		vfloat4 samec_errorsumv = vfloat4::zero();
+
+		int clipped_texel_count = round_down_to_simd_multiple_vla(texel_count);
+		for (/* */; i < clipped_texel_count; i += ASTCENC_SIMD_WIDTH)
 		{
-			separate_linelen.b = 1e-7f;
+			vint texel_idxs(&(weights[i]));
+
+			vfloat data_r = gatherf(blk->data_r, texel_idxs);
+			vfloat data_g = gatherf(blk->data_g, texel_idxs);
+			vfloat data_b = gatherf(blk->data_b, texel_idxs);
+			vfloat data_a = gatherf(blk->data_a, texel_idxs);
+
+			vfloat ew_r = gatherf(ewb->texel_weight_r, texel_idxs);
+			vfloat ew_g = gatherf(ewb->texel_weight_g, texel_idxs);
+			vfloat ew_b = gatherf(ewb->texel_weight_b, texel_idxs);
+			vfloat ew_a = gatherf(ewb->texel_weight_a, texel_idxs);
+
+			vfloat uncor_param  = (data_r * l_uncor_bs0)
+			                    + (data_g * l_uncor_bs1)
+			                    + (data_b * l_uncor_bs2)
+			                    + (data_a * l_uncor_bs3);
+
+			uncor_loparamv = min(uncor_param, uncor_loparamv);
+			uncor_hiparamv = max(uncor_param, uncor_hiparamv);
+
+			vfloat uncor_dist0 = (l_uncor_amod0 - data_r)
+			                   + (uncor_param * l_uncor_bis0);
+			vfloat uncor_dist1 = (l_uncor_amod1 - data_g)
+			                   + (uncor_param * l_uncor_bis1);
+			vfloat uncor_dist2 = (l_uncor_amod2 - data_b)
+			                   + (uncor_param * l_uncor_bis2);
+			vfloat uncor_dist3 = (l_uncor_amod3 - data_a)
+			                   + (uncor_param * l_uncor_bis3);
+
+			vfloat uncor_error = (ew_r * uncor_dist0 * uncor_dist0)
+			                   + (ew_g * uncor_dist1 * uncor_dist1)
+			                   + (ew_b * uncor_dist2 * uncor_dist2)
+			                   + (ew_a * uncor_dist3 * uncor_dist3);
+
+			haccumulate(uncor_errorsumv, uncor_error);
+
+			// Process samechroma data
+			vfloat samec_param = (data_r * l_samec_bs0)
+			                   + (data_g * l_samec_bs1)
+			                   + (data_b * l_samec_bs2)
+			                   + (data_a * l_samec_bs3);
+
+			samec_loparamv = min(samec_param, samec_loparamv);
+			samec_hiparamv = max(samec_param, samec_hiparamv);
+
+
+			vfloat samec_dist0 = samec_param * l_samec_bis0 - data_r;
+			vfloat samec_dist1 = samec_param * l_samec_bis1 - data_g;
+			vfloat samec_dist2 = samec_param * l_samec_bis2 - data_b;
+			vfloat samec_dist3 = samec_param * l_samec_bis3 - data_a;
+
+			vfloat samec_error = (ew_r * samec_dist0 * samec_dist0)
+			                   + (ew_g * samec_dist1 * samec_dist1)
+			                   + (ew_b * samec_dist2 * samec_dist2)
+			                   + (ew_a * samec_dist3 * samec_dist3);
+
+			haccumulate(samec_errorsumv, samec_error);
 		}
 
-		if (!(separate_linelen.a > 1e-7f))
+		uncor_loparam = hmin_s(uncor_loparamv);
+		uncor_hiparam = hmax_s(uncor_hiparamv);
+
+		samec_loparam = hmin_s(samec_loparamv);
+		samec_hiparam = hmax_s(samec_hiparamv);
+
+		// Loop tail
+		// Error is buffered and accumulated in blocks of 4 to ensure that
+		// the partial sums added to the accumulator are invariant with the
+		// vector implementation, irrespective of vector size ...
+		alignas(16) float uncor_errorsum_tmp[4] { 0 };
+		alignas(16) float samec_errorsum_tmp[4] { 0 };
+		for (/* */; i < texel_count; i++)
 		{
-			separate_linelen.a = 1e-7f;
+			int iwt = weights[i];
+
+			vfloat4 dat = blk->texel(iwt);
+			vfloat4 ews = ewb->error_weights[iwt];
+
+			float uncor_param = dot_s(dat, l_uncor.bs);
+			uncor_loparam = astc::min(uncor_param, uncor_loparam);
+			uncor_hiparam = astc::max(uncor_param, uncor_hiparam);
+
+			float samec_param = dot_s(dat, l_samec.bs);
+			samec_loparam = astc::min(samec_param, samec_loparam);
+			samec_hiparam = astc::max(samec_param, samec_hiparam);
+
+			vfloat4 uncor_dist  = (l_uncor.amod - dat)
+			                    + (uncor_param * l_uncor.bis);
+			float uncor_error_tmp = dot_s(ews, uncor_dist * uncor_dist);
+
+			vfloat4 samec_dist = samec_param * l_samec.bis - dat;
+			float samec_error_tmp = dot_s(ews, samec_dist * samec_dist);
+
+			// Accumulate error sum in the temporary array
+			int error_index = i & 0x3;
+			uncor_errorsum_tmp[error_index] = uncor_error_tmp;
+			samec_errorsum_tmp[error_index] = samec_error_tmp;
+
+#if ASTCENC_SIMD_WIDTH == 8
+			// Zero the temporary staging buffer every 4 items unless last iter
+			if ((i & 0x7) == 0x03)
+			{
+				haccumulate(uncor_errorsumv, vfloat4::loada(uncor_errorsum_tmp));
+				storea(vfloat4::zero(), uncor_errorsum_tmp);
+
+				haccumulate(samec_errorsumv, vfloat4::loada(samec_errorsum_tmp));
+				storea(vfloat4::zero(), samec_errorsum_tmp);
+			}
+#endif
 		}
 
-		lengths_uncorr[partition] = uncorr_linelen;
-		lengths_samechroma[partition] = samechroma_linelen;
-		lengths_separate[partition] = separate_linelen;
+		// Accumulate the loop tail using the vfloat4 swizzle
+		haccumulate(uncor_errorsumv, vfloat4::loada(uncor_errorsum_tmp));
+		haccumulate(samec_errorsumv, vfloat4::loada(samec_errorsum_tmp));
+
+		// Resolve the final scalar accumulator sum
+		haccumulate(uncor_errorsum, uncor_errorsumv);
+		haccumulate(samec_errorsum, samec_errorsumv);
+
+		float uncor_linelen = uncor_hiparam - uncor_loparam;
+		float samec_linelen = samec_hiparam - samec_loparam;
 
-		*uncorr_errors = uncorr_errorsum;
-		*samechroma_errors = samechroma_errorsum;
-		*separate_color_errors = float4(red_errorsum, green_errorsum, blue_errorsum, alpha_errorsum);
+		// Turn very small numbers and NaNs into a small number
+		uncor_linelen = astc::max(uncor_linelen, 1e-7f);
+		samec_linelen = astc::max(samec_linelen, 1e-7f);
+
+		uncor_lengths[partition] = uncor_linelen;
+		samec_lengths[partition] = samec_linelen;
 	}
+
+	*uncor_errors = uncor_errorsum;
+	*samec_errors = samec_errorsum;
 }
 
 void compute_error_squared_rgb(
-	const partition_info *pt,    // the partition that we use when computing the squared-error.
+	const partition_info *pt,
 	const imageblock *blk,
 	const error_weight_block *ewb,
-	const processed_line3 *plines_uncorr,
-	const processed_line3 *plines_samechroma,
-	const processed_line2 *plines_separate_red,
-	const processed_line2 *plines_separate_green,
-	const processed_line2 *plines_separate_blue,
-	float *lengths_uncorr,
-	float *lengths_samechroma,
-	float3 *lengths_separate,
-	float *uncorr_errors,
-	float *samechroma_errors,
-	float3 *separate_color_errors
+	partition_lines3 plines[4],
+	float& uncor_error,
+	float& samec_error
 ) {
-	float uncorr_errorsum = 0.0f;
-	float samechroma_errorsum = 0.0f;
-	float red_errorsum = 0.0f;
-	float green_errorsum = 0.0f;
-	float blue_errorsum = 0.0f;
+	float uncor_errorsum = 0.0f;
+	float samec_errorsum = 0.0f;
 
-	for (int partition = 0; partition < pt->partition_count; partition++)
+	int partition_count = pt->partition_count;
+	promise(partition_count > 0);
+
+	for (int partition = 0; partition < partition_count; partition++)
 	{
-		// TODO: sort partitions by number of texels. For warp-architectures,
-		// this can reduce the running time by about 25-50%.
+		partition_lines3& pl = plines[partition];
 		const uint8_t *weights = pt->texels_of_partition[partition];
-		int texelcount = pt->texels_per_partition[partition];
+		int texel_count = pt->partition_texel_count[partition];
+		promise(texel_count > 0);
+
+		float uncor_loparam = 1e10f;
+		float uncor_hiparam = -1e10f;
+
+		float samec_loparam = 1e10f;
+		float samec_hiparam = -1e10f;
+
+		processed_line3 l_uncor = pl.uncor_pline;
+		processed_line3 l_samec = pl.samec_pline;
 
-		float uncorr_lowparam = 1e10f;
-		float uncorr_highparam = -1e10f;
+		int i = 0;
 
-		float samechroma_lowparam = 1e10f;
-		float samechroma_highparam = -1e10f;
+		// This implementation is an example vectorization of this function.
+		// It works for - the codec is a 2-4% faster than not vectorizing - but
+		// the benefit is limited by the use of gathers and register pressure
 
-		float3 separate_lowparam = float3(1e10f);
-		float3 separate_highparam = float3(-1e10f);
+		// Vectorize some useful scalar inputs
+		vfloat l_uncor_bs0(l_uncor.bs.lane<0>());
+		vfloat l_uncor_bs1(l_uncor.bs.lane<1>());
+		vfloat l_uncor_bs2(l_uncor.bs.lane<2>());
 
-		processed_line3 l_uncorr = plines_uncorr[partition];
-		processed_line3 l_samechroma = plines_samechroma[partition];
-		processed_line2 l_red = plines_separate_red[partition];
-		processed_line2 l_green = plines_separate_green[partition];
-		processed_line2 l_blue = plines_separate_blue[partition];
+		vfloat l_uncor_amod0(l_uncor.amod.lane<0>());
+		vfloat l_uncor_amod1(l_uncor.amod.lane<1>());
+		vfloat l_uncor_amod2(l_uncor.amod.lane<2>());
 
-		// TODO: split up this loop due to too many temporaries; in
-		// particular, the six line functions will consume 18 vector registers
+		vfloat l_uncor_bis0(l_uncor.bis.lane<0>());
+		vfloat l_uncor_bis1(l_uncor.bis.lane<1>());
+		vfloat l_uncor_bis2(l_uncor.bis.lane<2>());
 
-		for (int i = 0; i < texelcount; i++)
+		vfloat l_samec_bs0(l_samec.bs.lane<0>());
+		vfloat l_samec_bs1(l_samec.bs.lane<1>());
+		vfloat l_samec_bs2(l_samec.bs.lane<2>());
+
+		assert(all(l_samec.amod == vfloat4(0.0f)));
+
+		vfloat l_samec_bis0(l_samec.bis.lane<0>());
+		vfloat l_samec_bis1(l_samec.bis.lane<1>());
+		vfloat l_samec_bis2(l_samec.bis.lane<2>());
+
+		vfloat uncor_loparamv(1e10f);
+		vfloat uncor_hiparamv(-1e10f);
+		vfloat4 uncor_errorsumv = vfloat4::zero();
+
+		vfloat samec_loparamv(1e10f);
+		vfloat samec_hiparamv(-1e10f);
+		vfloat4 samec_errorsumv = vfloat4::zero();
+
+		int clipped_texel_count = round_down_to_simd_multiple_vla(texel_count);
+		for (/* */; i < clipped_texel_count; i += ASTCENC_SIMD_WIDTH)
 		{
-			int iwt = weights[i];
+			vint texel_idxs(&(weights[i]));
 
-			float texel_weight_rgb = ewb->texel_weight_rgb[iwt];
-			if (texel_weight_rgb > 1e-20f)
-			{
-				float3 dat = float3(blk->data_r[iwt],
-				                    blk->data_g[iwt],
-				                    blk->data_b[iwt]);
+			vfloat data_r = gatherf(blk->data_r, texel_idxs);
+			vfloat data_g = gatherf(blk->data_g, texel_idxs);
+			vfloat data_b = gatherf(blk->data_b, texel_idxs);
 
-				float3 ews = float3(ewb->error_weights[iwt].r,
-				                    ewb->error_weights[iwt].g,
-				                    ewb->error_weights[iwt].b);
+			vfloat ew_r = gatherf(ewb->texel_weight_r, texel_idxs);
+			vfloat ew_g = gatherf(ewb->texel_weight_g, texel_idxs);
+			vfloat ew_b = gatherf(ewb->texel_weight_b, texel_idxs);
 
-				float uncorr_param = dot(dat, l_uncorr.bs);
-				uncorr_lowparam  = MIN(uncorr_param, uncorr_lowparam);
-				uncorr_highparam = MAX(uncorr_param, uncorr_highparam);
+			vfloat uncor_param  = (data_r * l_uncor_bs0)
+			                    + (data_g * l_uncor_bs1)
+			                    + (data_b * l_uncor_bs2);
 
-				float samechroma_param = dot(dat, l_samechroma.bs);
-				samechroma_lowparam  = MIN(samechroma_param, samechroma_lowparam);
-				samechroma_highparam = MAX(samechroma_param, samechroma_highparam);
+			uncor_loparamv = min(uncor_param, uncor_loparamv);
+			uncor_hiparamv = max(uncor_param, uncor_hiparamv);
 
-				float3 separate_param = float3(dot(float2(dat.g, dat.b), l_red.bs),
-				                               dot(float2(dat.r, dat.b), l_green.bs),
-				                               dot(float2(dat.r, dat.g), l_blue.bs));
+			vfloat uncor_dist0 = (l_uncor_amod0 - data_r)
+			                   + (uncor_param * l_uncor_bis0);
+			vfloat uncor_dist1 = (l_uncor_amod1 - data_g)
+			                   + (uncor_param * l_uncor_bis1);
+			vfloat uncor_dist2 = (l_uncor_amod2 - data_b)
+			                   + (uncor_param * l_uncor_bis2);
 
-				separate_lowparam  = float3(MIN(separate_param.r, separate_lowparam.r),
-				                            MIN(separate_param.g, separate_lowparam.g),
-				                            MIN(separate_param.b, separate_lowparam.b));
+			vfloat uncor_err = (ew_r * uncor_dist0 * uncor_dist0)
+			                 + (ew_g * uncor_dist1 * uncor_dist1)
+			                 + (ew_b * uncor_dist2 * uncor_dist2);
 
-				separate_highparam  = float3(MAX(separate_param.r, separate_highparam.r),
-				                             MAX(separate_param.g, separate_highparam.g),
-				                             MAX(separate_param.b, separate_highparam.b));
+			haccumulate(uncor_errorsumv, uncor_err);
 
-				float3 uncorr_dist  = (l_uncorr.amod - dat) +
-				                      (uncorr_param * l_uncorr.bis);
-				uncorr_errorsum += dot(ews, uncorr_dist * uncorr_dist);
+			// Process samechroma data
+			vfloat samec_param = (data_r * l_samec_bs0)
+			                   + (data_g * l_samec_bs1)
+			                   + (data_b * l_samec_bs2);
 
-				float3 samechroma_dist = (l_samechroma.amod - dat) +
-				                         (samechroma_param * l_samechroma.bis);
-				samechroma_errorsum += dot(ews, samechroma_dist * samechroma_dist);
+			samec_loparamv = min(samec_param, samec_loparamv);
+			samec_hiparamv = max(samec_param, samec_hiparamv);
 
-				float2 red_dist = (l_red.amod - float2(dat.g, dat.b)) +
-				                  (separate_param.r * l_red.bis);
-				red_errorsum += dot(float2(ews.g, ews.b), red_dist * red_dist);
 
-				float2 green_dist = (l_green.amod - float2(dat.r, dat.b)) +
-				                    (separate_param.g * l_green.bis);
-				green_errorsum += dot(float2(ews.r, ews.b), green_dist * green_dist);
+			vfloat samec_dist0 = samec_param * l_samec_bis0 - data_r;
+			vfloat samec_dist1 = samec_param * l_samec_bis1 - data_g;
+			vfloat samec_dist2 = samec_param * l_samec_bis2 - data_b;
 
-				float2 blue_dist = (l_blue.amod - float2(dat.r, dat.g)) +
-				                   (separate_param.b * l_blue.bis);
-				blue_errorsum += dot(float2(ews.r, ews.g), blue_dist * blue_dist);
-			}
+			vfloat samec_err = (ew_r * samec_dist0 * samec_dist0)
+			                 + (ew_g * samec_dist1 * samec_dist1)
+			                 + (ew_b * samec_dist2 * samec_dist2);
+
+			haccumulate(samec_errorsumv, samec_err);
 		}
 
-		float uncorr_linelen = uncorr_highparam - uncorr_lowparam;
-		float samechroma_linelen = samechroma_highparam - samechroma_lowparam;
-		float3 separate_linelen = separate_highparam - separate_lowparam;
+		uncor_loparam = hmin_s(uncor_loparamv);
+		uncor_hiparam = hmax_s(uncor_hiparamv);
 
-		// Turn very small numbers and NaNs into a small number
-		if (!(uncorr_linelen > 1e-7f))
-		{
-			uncorr_linelen = 1e-7f;
-		}
+		samec_loparam = hmin_s(samec_loparamv);
+		samec_hiparam = hmax_s(samec_hiparamv);
 
-		if (!(samechroma_linelen > 1e-7f))
+		// Loop tail
+		// Error is buffered and accumulated in blocks of 4 to ensure that
+		// the partial sums added to the accumulator are invariant with the
+		// vector implementation, irrespective of vector size ...
+		alignas(16) float uncor_errorsum_tmp[4] { 0 };
+		alignas(16) float samec_errorsum_tmp[4] { 0 };
+		for (/* */; i < texel_count; i++)
 		{
-			samechroma_linelen = 1e-7f;
-		}
+			int iwt = weights[i];
 
-		if (!(separate_linelen.r > 1e-7f))
-		{
-			separate_linelen.r = 1e-7f;
-		}
+			vfloat4 dat = blk->texel3(iwt);
+			vfloat4 ews = ewb->error_weights[iwt];
 
-		if (!(separate_linelen.g > 1e-7f))
-		{
-			separate_linelen.g = 1e-7f;
-		}
+			float uncor_param = dot3_s(dat, l_uncor.bs);
+			uncor_loparam  = astc::min(uncor_param, uncor_loparam);
+			uncor_hiparam = astc::max(uncor_param, uncor_hiparam);
 
-		if (!(separate_linelen.b > 1e-7f))
-		{
-			separate_linelen.b = 1e-7f;
-		}
+			float samec_param = dot3_s(dat, l_samec.bs);
+			samec_loparam  = astc::min(samec_param, samec_loparam);
+			samec_hiparam = astc::max(samec_param, samec_hiparam);
 
-		lengths_uncorr[partition] = uncorr_linelen;
-		lengths_samechroma[partition] = samechroma_linelen;
-		lengths_separate[partition] = separate_linelen;
+			vfloat4 uncor_dist  = (l_uncor.amod - dat)
+			                    + (uncor_param * l_uncor.bis);
+			float uncor_error_tmp = dot3_s(ews, uncor_dist * uncor_dist);
 
-		*uncorr_errors = uncorr_errorsum;
-		*samechroma_errors = samechroma_errorsum;
-		*separate_color_errors = float3(red_errorsum, green_errorsum, blue_errorsum);
-	}
-}
+			vfloat4 samec_dist = samec_param * l_samec.bis - dat;
+			float samec_error_tmp = dot3_s(ews, samec_dist * samec_dist);
 
-// function to compute the error across a tile when using a particular line for
-// a particular partition.
-float compute_error_squared_rgb_single_partition(
-	int partition_to_test,
-	const block_size_descriptor* bsd,
-	const partition_info* pt,	// the partition that we use when computing the squared-error.
-	const imageblock* blk,
-	const error_weight_block* ewb,
-	const processed_line3* lin	// the line for the partition.
-) {
-	int texels_per_block = bsd->texel_count;
-	float errorsum = 0.0f;
+			// Accumulate error sum in the temporary array
+			int error_index = i & 0x3;
+			uncor_errorsum_tmp[error_index] = uncor_error_tmp;
+			samec_errorsum_tmp[error_index] = samec_error_tmp;
 
-	for (int i = 0; i < texels_per_block; i++)
-	{
-		int partition = pt->partition_of_texel[i];
-		float texel_weight = ewb->texel_weight_rgb[i];
+#if ASTCENC_SIMD_WIDTH == 8
+			// Emit the staging buffer every 4 items unless last iteration
+			if ((i & 0x7) == 0x03)
+			{
+				haccumulate(uncor_errorsumv, vfloat4::loada(uncor_errorsum_tmp));
+				storea(vfloat4::zero(), uncor_errorsum_tmp);
 
-		if (partition != partition_to_test || texel_weight < 1e-20f)
-		{
-			continue;
+				haccumulate(samec_errorsumv, vfloat4::loada(samec_errorsum_tmp));
+				storea(vfloat4::zero(), samec_errorsum_tmp);
+			}
+#endif
 		}
 
-		float3 point = float3(blk->data_r[i],
-		                      blk->data_g[i],
-		                      blk->data_b[i]);
-		float param = dot(point, lin->bs);
-		float3 rp1 = lin->amod + param * lin->bis;
-		float3 dist = rp1 - point;
-		float4 ews = ewb->error_weights[i];
-		float3 ews3 = float3(ews.r, ews.g, ews.b);
-		errorsum += dot(ews3, dist * dist);
+		// Accumulate the loop tail using the vfloat4 swizzle
+		haccumulate(uncor_errorsumv, vfloat4::loada(uncor_errorsum_tmp));
+		haccumulate(samec_errorsumv, vfloat4::loada(samec_errorsum_tmp));
+
+		// Resolve the final scalar accumulator sum
+		haccumulate(uncor_errorsum, uncor_errorsumv);
+		haccumulate(samec_errorsum, samec_errorsumv);
+
+		float uncor_linelen = uncor_hiparam - uncor_loparam;
+		float samec_linelen = samec_hiparam - samec_loparam;
+
+		// Turn very small numbers and NaNs into a small number
+		uncor_linelen = astc::max(uncor_linelen, 1e-7f);
+		samec_linelen = astc::max(samec_linelen, 1e-7f);
+
+		pl.uncor_line_len = uncor_linelen;
+		pl.samec_line_len = samec_linelen;
 	}
 
-	return errorsum;
+	uncor_error = uncor_errorsum;
+	samec_error = samec_errorsum;
 }
 
 #endif
diff --git a/libkram/astc-encoder/astcenc_block_sizes2.cpp b/libkram/astc-encoder/astcenc_block_sizes2.cpp
index 04089a2c..b37892d8 100644
--- a/libkram/astc-encoder/astcenc_block_sizes2.cpp
+++ b/libkram/astc-encoder/astcenc_block_sizes2.cpp
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2020 Arm Limited
+// Copyright 2011-2021 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -117,8 +117,10 @@ static int decode_block_mode_2d(
 	int weight_count = N * M * (D + 1);
 	int qmode = (base_quant_mode - 2) + 6 * H;
 
-	int weightbits = compute_ise_bitcount(weight_count, (quantization_method) qmode);
-	if (weight_count > MAX_WEIGHTS_PER_BLOCK || weightbits < MIN_WEIGHT_BITS_PER_BLOCK || weightbits > MAX_WEIGHT_BITS_PER_BLOCK)
+	int weightbits = get_ise_sequence_bitcount(weight_count, (quant_method)qmode);
+	if (weight_count > MAX_WEIGHTS_PER_BLOCK ||
+	    weightbits < MIN_WEIGHT_BITS_PER_BLOCK ||
+	    weightbits > MAX_WEIGHT_BITS_PER_BLOCK)
 	{
 		return 0;
 	}
@@ -211,7 +213,7 @@ static int decode_block_mode_3d(
 	int weight_count = N * M * Q * (D + 1);
 	int qmode = (base_quant_mode - 2) + 6 * H;
 
-	int weightbits = compute_ise_bitcount(weight_count, (quantization_method) qmode);
+	int weightbits = get_ise_sequence_bitcount(weight_count, (quant_method)qmode);
 	if (weight_count > MAX_WEIGHTS_PER_BLOCK ||
 	    weightbits < MIN_WEIGHT_BITS_PER_BLOCK ||
 	    weightbits > MAX_WEIGHT_BITS_PER_BLOCK)
@@ -237,12 +239,12 @@ static void initialize_decimation_table_2d(
 	int texels_per_block = xdim * ydim;
 	int weights_per_block = x_weights * y_weights;
 
-	int weightcount_of_texel[MAX_TEXELS_PER_BLOCK];
-	int grid_weights_of_texel[MAX_TEXELS_PER_BLOCK][4];
-	int weights_of_texel[MAX_TEXELS_PER_BLOCK][4];
+	uint8_t weightcount_of_texel[MAX_TEXELS_PER_BLOCK];
+	uint8_t grid_weights_of_texel[MAX_TEXELS_PER_BLOCK][4];
+	uint8_t weights_of_texel[MAX_TEXELS_PER_BLOCK][4];
 
-	int texelcount_of_weight[MAX_WEIGHTS_PER_BLOCK];
-	int texels_of_weight[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK];
+	uint8_t texelcount_of_weight[MAX_WEIGHTS_PER_BLOCK];
+	uint8_t texels_of_weight[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK];
 	int texelweights_of_weight[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK];
 
 	for (int i = 0; i < weights_per_block; i++)
@@ -269,7 +271,6 @@ static void initialize_decimation_table_2d(
 			int x_weight_int = x_weight >> 4;
 			int y_weight_int = y_weight >> 4;
 			int qweight[4];
-			int weight[4];
 			qweight[0] = x_weight_int + y_weight_int * x_weights;
 			qweight[1] = qweight[0] + 1;
 			qweight[2] = qweight[0] + x_weights;
@@ -278,6 +279,7 @@ static void initialize_decimation_table_2d(
 			// truncated-precision bilinear interpolation.
 			int prod = x_weight_frac * y_weight_frac;
 
+			int weight[4];
 			weight[3] = (prod + 8) >> 4;
 			weight[1] = x_weight_frac - weight[3];
 			weight[2] = y_weight_frac - weight[3];
@@ -300,35 +302,44 @@ static void initialize_decimation_table_2d(
 
 	for (int i = 0; i < texels_per_block; i++)
 	{
-		dt->texel_num_weights[i] = weightcount_of_texel[i];
+		dt->texel_weight_count[i] = weightcount_of_texel[i];
 
-		// ensure that all 4 entries are actually initialized.
-		// This allows a branch-free implementation of compute_value_of_texel_flt()
+		// Init all 4 entries so we can rely on zeros for vectorization
 		for (int j = 0; j < 4; j++)
 		{
-			dt->texel_weights_int[i][j] = 0;
-			dt->texel_weights_float[i][j] = 0.0f;
-			dt->texel_weights[i][j] = 0;
+			dt->texel_weights_int_t4[i][j] = 0;
+			dt->texel_weights_float_t4[i][j] = 0.0f;
+			dt->texel_weights_t4[i][j] = 0;
+
+			dt->texel_weights_float_4t[j][i] = 0.0f;
+			dt->texel_weights_4t[j][i] = 0;
+
 		}
 
 		for (int j = 0; j < weightcount_of_texel[i]; j++)
 		{
-			dt->texel_weights_int[i][j] = (uint8_t)weights_of_texel[i][j];
-			dt->texel_weights_float[i][j] = ((float)weights_of_texel[i][j]) * (1.0f / TEXEL_WEIGHT_SUM);
-			dt->texel_weights[i][j] = (uint8_t)grid_weights_of_texel[i][j];
+			dt->texel_weights_int_t4[i][j] = weights_of_texel[i][j];
+			dt->texel_weights_float_t4[i][j] = ((float)weights_of_texel[i][j]) * (1.0f / TEXEL_WEIGHT_SUM);
+			dt->texel_weights_t4[i][j] = grid_weights_of_texel[i][j];
+
+			dt->texel_weights_float_4t[j][i] = ((float)weights_of_texel[i][j]) * (1.0f / TEXEL_WEIGHT_SUM);
+			dt->texel_weights_4t[j][i] = grid_weights_of_texel[i][j];
 		}
 	}
 
 	for (int i = 0; i < weights_per_block; i++)
 	{
-		dt->weight_num_texels[i] = texelcount_of_weight[i];
+		dt->weight_texel_count[i] = texelcount_of_weight[i];
 
 		for (int j = 0; j < texelcount_of_weight[i]; j++)
 		{
-			int texel = texels_of_weight[i][j];
-			dt->weight_texel[i][j] = (uint8_t)texel;
-			dt->weights_int[i][j] = (uint8_t)texelweights_of_weight[i][j];
-			dt->weights_flt[i][j] = (float)texelweights_of_weight[i][j];
+			uint8_t texel = texels_of_weight[i][j];
+
+			dt->weights_int[i][j] = texelweights_of_weight[i][j];
+
+			// Create transposed versions of these for better vectorization
+			dt->weight_texel[j][i] = texel;
+			dt->weights_flt[j][i] = (float)texelweights_of_weight[i][j];
 
 			// perform a layer of array unrolling. An aspect of this unrolling is that
 			// one of the texel-weight indexes is an identity-mapped index; we will use this
@@ -336,30 +347,33 @@ static void initialize_decimation_table_2d(
 			int swap_idx = -1;
 			for (int k = 0; k < 4; k++)
 			{
-				int dttw = dt->texel_weights[texel][k];
-				float dttwf = dt->texel_weights_float[texel][k];
+				uint8_t dttw = dt->texel_weights_t4[texel][k];
+				float dttwf = dt->texel_weights_float_t4[texel][k];
 				if (dttw == i && dttwf != 0.0f)
 				{
 					swap_idx = k;
 				}
-				dt->texel_weights_texel[i][j][k] = (uint8_t)dttw;
+				dt->texel_weights_texel[i][j][k] = dttw;
 				dt->texel_weights_float_texel[i][j][k] = dttwf;
 			}
 
 			if (swap_idx != 0)
 			{
-				int vi = dt->texel_weights_texel[i][j][0];
+				uint8_t vi = dt->texel_weights_texel[i][j][0];
 				float vf = dt->texel_weights_float_texel[i][j][0];
 				dt->texel_weights_texel[i][j][0] = dt->texel_weights_texel[i][j][swap_idx];
 				dt->texel_weights_float_texel[i][j][0] = dt->texel_weights_float_texel[i][j][swap_idx];
-				dt->texel_weights_texel[i][j][swap_idx] = (uint8_t)vi;
+				dt->texel_weights_texel[i][j][swap_idx] = vi;
 				dt->texel_weights_float_texel[i][j][swap_idx] = vf;
 			}
 		}
 	}
 
-	dt->num_texels = texels_per_block;
-	dt->num_weights = weights_per_block;
+	dt->texel_count = texels_per_block;
+	dt->weight_count = weights_per_block;
+	dt->weight_x = x_weights;
+	dt->weight_y = y_weights;
+	dt->weight_z = 1;
 }
 
 static void initialize_decimation_table_3d(
@@ -374,12 +388,12 @@ static void initialize_decimation_table_3d(
 	int texels_per_block = xdim * ydim * zdim;
 	int weights_per_block = x_weights * y_weights * z_weights;
 
-	int weightcount_of_texel[MAX_TEXELS_PER_BLOCK];
-	int grid_weights_of_texel[MAX_TEXELS_PER_BLOCK][4];
-	int weights_of_texel[MAX_TEXELS_PER_BLOCK][4];
+	uint8_t weightcount_of_texel[MAX_TEXELS_PER_BLOCK];
+	uint8_t grid_weights_of_texel[MAX_TEXELS_PER_BLOCK][4];
+	uint8_t weights_of_texel[MAX_TEXELS_PER_BLOCK][4];
 
-	int texelcount_of_weight[MAX_WEIGHTS_PER_BLOCK];
-	int texels_of_weight[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK];
+	uint8_t texelcount_of_weight[MAX_WEIGHTS_PER_BLOCK];
+	uint8_t texels_of_weight[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK];
 	int texelweights_of_weight[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK];
 
 	for (int i = 0; i < weights_per_block; i++)
@@ -510,34 +524,42 @@ static void initialize_decimation_table_3d(
 
 	for (int i = 0; i < texels_per_block; i++)
 	{
-		dt->texel_num_weights[i] = weightcount_of_texel[i];
+		dt->texel_weight_count[i] = weightcount_of_texel[i];
 
-		// ensure that all 4 entries are actually initialized.
-		// This allows a branch-free implementation of compute_value_of_texel_flt()
+		// Init all 4 entries so we can rely on zeros for vectorization
 		for (int j = 0; j < 4; j++)
 		{
-			dt->texel_weights_int[i][j] = 0;
-			dt->texel_weights_float[i][j] = 0.0f;
-			dt->texel_weights[i][j] = 0;
+			dt->texel_weights_int_t4[i][j] = 0;
+			dt->texel_weights_float_t4[i][j] = 0.0f;
+			dt->texel_weights_t4[i][j] = 0;
+
+			dt->texel_weights_float_4t[j][i] = 0.0f;
+			dt->texel_weights_4t[j][i] = 0;
 		}
 
 		for (int j = 0; j < weightcount_of_texel[i]; j++)
 		{
-			dt->texel_weights_int[i][j] = (uint8_t)weights_of_texel[i][j];
-			dt->texel_weights_float[i][j] = ((float)weights_of_texel[i][j]) * (1.0f / TEXEL_WEIGHT_SUM);
-			dt->texel_weights[i][j] = (uint8_t)grid_weights_of_texel[i][j];
+			dt->texel_weights_int_t4[i][j] = weights_of_texel[i][j];
+			dt->texel_weights_float_t4[i][j] = ((float)weights_of_texel[i][j]) * (1.0f / TEXEL_WEIGHT_SUM);
+			dt->texel_weights_t4[i][j] = grid_weights_of_texel[i][j];
+
+			dt->texel_weights_float_4t[j][i] = ((float)weights_of_texel[i][j]) * (1.0f / TEXEL_WEIGHT_SUM);
+			dt->texel_weights_4t[j][i] = grid_weights_of_texel[i][j];
 		}
 	}
 
 	for (int i = 0; i < weights_per_block; i++)
 	{
-		dt->weight_num_texels[i] = texelcount_of_weight[i];
+		dt->weight_texel_count[i] = texelcount_of_weight[i];
 		for (int j = 0; j < texelcount_of_weight[i]; j++)
 		{
 			int texel = texels_of_weight[i][j];
-			dt->weight_texel[i][j] = (uint8_t)texel;
-			dt->weights_int[i][j] = (uint8_t)texelweights_of_weight[i][j];
-			dt->weights_flt[i][j] = (float)texelweights_of_weight[i][j];
+
+			dt->weights_int[i][j] = texelweights_of_weight[i][j];
+
+			// Create transposed versions of these for better vectorization
+			dt->weight_texel[j][i] = texel;
+			dt->weights_flt[j][i] = (float)texelweights_of_weight[i][j];
 
 			// perform a layer of array unrolling. An aspect of this unrolling is that
 			// one of the texel-weight indexes is an identity-mapped index; we will use this
@@ -545,219 +567,281 @@ static void initialize_decimation_table_3d(
 			int swap_idx = -1;
 			for (int k = 0; k < 4; k++)
 			{
-				int dttw = dt->texel_weights[texel][k];
-				float dttwf = dt->texel_weights_float[texel][k];
+				uint8_t dttw = dt->texel_weights_t4[texel][k];
+				float dttwf = dt->texel_weights_float_t4[texel][k];
 				if (dttw == i && dttwf != 0.0f)
 				{
 					swap_idx = k;
 				}
-				dt->texel_weights_texel[i][j][k] = (uint8_t)dttw;
+				dt->texel_weights_texel[i][j][k] = dttw;
 				dt->texel_weights_float_texel[i][j][k] = dttwf;
 			}
 
 			if (swap_idx != 0)
 			{
-				int vi = dt->texel_weights_texel[i][j][0];
+				uint8_t vi = dt->texel_weights_texel[i][j][0];
 				float vf = dt->texel_weights_float_texel[i][j][0];
 				dt->texel_weights_texel[i][j][0] = dt->texel_weights_texel[i][j][swap_idx];
 				dt->texel_weights_float_texel[i][j][0] = dt->texel_weights_float_texel[i][j][swap_idx];
-				dt->texel_weights_texel[i][j][swap_idx] = (uint8_t)vi;
+				dt->texel_weights_texel[i][j][swap_idx] = vi;
 				dt->texel_weights_float_texel[i][j][swap_idx] = vf;
 			}
 		}
 	}
 
-	dt->num_texels = texels_per_block;
-	dt->num_weights = weights_per_block;
+	dt->texel_count = texels_per_block;
+	dt->weight_count = weights_per_block;
+	dt->weight_x = x_weights;
+	dt->weight_y = y_weights;
+	dt->weight_z = z_weights;
 }
 
-static void construct_block_size_descriptor_2d(
-	int xdim,
-	int ydim,
-	block_size_descriptor* bsd
+/**
+ * @brief Assign the texels to use for kmeans clustering.
+ *
+ * The max limit is MAX_KMEANS_TEXELS; above this a random selection is used.
+ * The @c bsd.texel_count is an input and must be populated beforehand.
+ *
+ * @param bsd   The block size descriptor to populate.
+ */
+static void assign_kmeans_texels(
+	block_size_descriptor& bsd
 ) {
-	int decimation_mode_index[256];	// for each of the 256 entries in the decim_table_array, its index
-	int decimation_mode_count = 0;
+	// Use all texels for kmeans on a small block
+	if (bsd.texel_count <= MAX_KMEANS_TEXELS)
+	{
+		for (int i = 0; i < bsd.texel_count; i++)
+		{
+			bsd.kmeans_texels[i] = i;
+		}
 
-	bsd->xdim = xdim;
-	bsd->ydim = ydim;
-	bsd->zdim = 1;
-	bsd->texel_count = xdim * ydim;
+		bsd.kmeans_texel_count = bsd.texel_count;
+		return;
+	}
+
+	// Select a random subset of texels for kmeans on a large block
+	uint64_t rng_state[2];
+	astc::rand_init(rng_state);
 
-	for (int i = 0; i < 256; i++)
+	// Pick 64 random texels for use with bitmap partitioning.
+	bool seen[MAX_TEXELS_PER_BLOCK];
+	for (int i = 0; i < bsd.texel_count; i++)
 	{
-		decimation_mode_index[i] = -1;
+		seen[i] = false;
 	}
 
-	// gather all the infill-modes that can be used with the current block size
-	for (int x_weights = 2; x_weights <= 12; x_weights++)
+	// Assign 64 random indices, retrying if we see repeats
+	int arr_elements_set = 0;
+	while (arr_elements_set < MAX_KMEANS_TEXELS)
 	{
-		for (int y_weights = 2; y_weights <= 12; y_weights++)
+		unsigned int idx = (unsigned int)astc::rand(rng_state);
+		idx %= bsd.texel_count;
+		if (!seen[idx])
 		{
-			if (x_weights * y_weights > MAX_WEIGHTS_PER_BLOCK)
-			{
-				continue;
-			}
+			bsd.kmeans_texels[arr_elements_set++] = idx;
+			seen[idx] = true;
+		}
+	}
 
-			decimation_table *dt = new decimation_table;
-			decimation_mode_index[y_weights * 16 + x_weights] = decimation_mode_count;
-			initialize_decimation_table_2d(xdim, ydim, x_weights, y_weights, dt);
+	bsd.kmeans_texel_count = MAX_KMEANS_TEXELS;
+}
 
-			int weight_count = x_weights * y_weights;
+/**
+ * @brief Allocate a single 2D decimation table entry.
+ *
+ * @param x_dim       The block X dimension.
+ * @param y_dim       The block Y dimension.
+ * @param x_weights   The weight grid X dimension.
+ * @param y_weights   The weight grid Y dimension.
+ *
+ * @return The new entry's index in the compacted decimation_table array.
+ */
+static int construct_dt_entry_2d(
+	int x_dim,
+	int y_dim,
+	int x_weights,
+	int y_weights,
+	block_size_descriptor& bsd
+) {
+	int dm_index = bsd.decimation_mode_count;
+	int weight_count = x_weights * y_weights;
+	assert(weight_count <= MAX_WEIGHTS_PER_BLOCK);
 
-			int maxprec_1plane = -1;
-			int maxprec_2planes = -1;
-			for (int i = 0; i < 12; i++)
-			{
-				int bits_1plane = compute_ise_bitcount(weight_count, (quantization_method) i);
-				int bits_2planes = compute_ise_bitcount(2 * weight_count, (quantization_method) i);
+	bool try_2planes = (2 * weight_count) <= MAX_WEIGHTS_PER_BLOCK;
 
-				if (bits_1plane >= MIN_WEIGHT_BITS_PER_BLOCK && bits_1plane <= MAX_WEIGHT_BITS_PER_BLOCK)
-				{
-					maxprec_1plane = i;
-				}
+	decimation_table *dt = aligned_malloc<decimation_table>(sizeof(decimation_table), ASTCENC_VECALIGN);
+	initialize_decimation_table_2d(x_dim, y_dim, x_weights, y_weights, dt);
 
-				if (bits_2planes >= MIN_WEIGHT_BITS_PER_BLOCK && bits_2planes <= MAX_WEIGHT_BITS_PER_BLOCK)
-				{
-					maxprec_2planes = i;
-				}
-			}
+	int maxprec_1plane = -1;
+	int maxprec_2planes = -1;
+	for (int i = 0; i < 12; i++)
+	{
+		int bits_1plane = get_ise_sequence_bitcount(weight_count, (quant_method)i);
+		if (bits_1plane >= MIN_WEIGHT_BITS_PER_BLOCK && bits_1plane <= MAX_WEIGHT_BITS_PER_BLOCK)
+		{
+			maxprec_1plane = i;
+		}
 
-			if (2 * x_weights * y_weights > MAX_WEIGHTS_PER_BLOCK)
+		if (try_2planes)
+		{
+			int bits_2planes = get_ise_sequence_bitcount(2 * weight_count, (quant_method)i);
+			if (bits_2planes >= MIN_WEIGHT_BITS_PER_BLOCK && bits_2planes <= MAX_WEIGHT_BITS_PER_BLOCK)
 			{
-				maxprec_2planes = -1;
+				maxprec_2planes = i;
 			}
+		}
+	}
 
-			bsd->permit_encode[decimation_mode_count] = (x_weights <= xdim && y_weights <= ydim);
+	// At least one of the two should be valid ...
+	assert(maxprec_1plane >= 0 || maxprec_2planes >= 0);
+	bsd.decimation_modes[dm_index].maxprec_1plane = maxprec_1plane;
+	bsd.decimation_modes[dm_index].maxprec_2planes = maxprec_2planes;
+	bsd.decimation_modes[dm_index].percentile_hit = false;
+	bsd.decimation_modes[dm_index].percentile_always = false;
+	bsd.decimation_tables[dm_index] = dt;
 
-			bsd->decimation_mode_samples[decimation_mode_count] = weight_count;
-			bsd->decimation_mode_maxprec_1plane[decimation_mode_count] = maxprec_1plane;
-			bsd->decimation_mode_maxprec_2planes[decimation_mode_count] = maxprec_2planes;
-			bsd->decimation_tables[decimation_mode_count] = dt;
+	bsd.decimation_mode_count++;
+	return dm_index;
+}
 
-			decimation_mode_count++;
-		}
-	}
+/**
+ * @brief Allocate block modes and decimation tables for a single BSD.
+ *
+ * @param x_dim            The block X dimension.
+ * @param y_dim            The block Y dimension.
+ * @param can_omit_modes   True if we are allowed to discard modes that
+ *                         compression won't use, even if they are legal.
+ * @param mode_cutoff      Block mode percentile cut off, between [0,1].
+ * @param bsd              The BSD to populate.
+ */
+static void construct_block_size_descriptor_2d(
+	int x_dim,
+	int y_dim,
+	bool can_omit_modes,
+	float mode_cutoff,
+	block_size_descriptor& bsd
+) {
+	// Store a remap table for storing packed decimation modes.
+	// Indexing uses [Y * 16 + X] and max block size for each axis is 12.
+	static const int MAX_DMI = 12 * 16 + 12;
+	int decimation_mode_index[MAX_DMI];
 
-	for (int i = 0; i < MAX_DECIMATION_MODES; i++)
-	{
-		bsd->decimation_mode_percentile[i] = 1.0f;
-	}
+	bsd.xdim = x_dim;
+	bsd.ydim = y_dim;
+	bsd.zdim = 1;
+	bsd.texel_count = x_dim * y_dim;
+	bsd.decimation_mode_count = 0;
 
-	for (int i = decimation_mode_count; i < MAX_DECIMATION_MODES; i++)
+	for (int i = 0; i < MAX_DMI; i++)
 	{
-		bsd->permit_encode[i] = 0;
-		bsd->decimation_mode_samples[i] = 0;
-		bsd->decimation_mode_maxprec_1plane[i] = -1;
-		bsd->decimation_mode_maxprec_2planes[i] = -1;
+		decimation_mode_index[i] = -1;
 	}
 
-	bsd->decimation_mode_count = decimation_mode_count;
-
+	// Gather all the decimation grids that can be used with the current block.
 #if !defined(ASTCENC_DECOMPRESS_ONLY)
-	const float *percentiles = get_2d_percentile_table(xdim, ydim);
+	const float *percentiles = get_2d_percentile_table(x_dim, y_dim);
+#else
+	// Unused in decompress-only builds
+	(void)can_omit_modes;
+	(void)mode_cutoff;
 #endif
 
-	// then construct the list of block formats
+	// Construct the list of block formats referencing the decimation tables
 	int packed_idx = 0;
 	for (int i = 0; i < MAX_WEIGHT_MODES; i++)
 	{
 		int x_weights, y_weights;
 		int is_dual_plane;
-		int quantization_mode;
-		int permit_encode = 1;
+		int quant_mode;
 
-		if (decode_block_mode_2d(i, &x_weights, &y_weights, &is_dual_plane, &quantization_mode))
-		{
-			if (x_weights > xdim || y_weights > ydim)
-			{
-				permit_encode = 0;
-			}
-		}
-		else
-		{
-			permit_encode = 0;
-		}
-
-		bsd->block_mode_to_packed[i] = -1;
-		if (!permit_encode) // also disallow decode of grid size larger than block size.
-			continue;
-		int decimation_mode = decimation_mode_index[y_weights * 16 + x_weights];
-		bsd->block_modes_packed[packed_idx].decimation_mode = decimation_mode;
-		bsd->block_modes_packed[packed_idx].quantization_mode = quantization_mode;
-		bsd->block_modes_packed[packed_idx].is_dual_plane = is_dual_plane;
-		bsd->block_modes_packed[packed_idx].mode_index = i;
+		bool valid = decode_block_mode_2d(i, &x_weights, &y_weights, &is_dual_plane, &quant_mode);
 
 #if !defined(ASTCENC_DECOMPRESS_ONLY)
-		bsd->block_modes_packed[packed_idx].percentile = percentiles[i];
-		if (bsd->decimation_mode_percentile[decimation_mode] > percentiles[i])
-		{
-			bsd->decimation_mode_percentile[decimation_mode] = percentiles[i];
-		}
+		float percentile = percentiles[i];
+		bool selected = (percentile <= mode_cutoff) || !can_omit_modes;
 #else
-		bsd->block_modes_packed[packed_idx].percentile = 0.0f;
+		// Decompressor builds can never discard modes, as we cannot make any
+		// assumptions about the modes the original compressor used
+		bool selected = true;
 #endif
-		bsd->block_mode_to_packed[i] = packed_idx;
-		++packed_idx;
-	}
-	bsd->block_mode_packed_count = packed_idx;
 
-#if !defined(ASTCENC_DECOMPRESS_ONLY)
-	delete[] percentiles;
-#endif
+		// ASSUMPTION: No compressor will use more weights in a dimension than
+		// the block has actual texels, because it wastes bits. Decompression
+		// of an image which violates this assumption will fail, even though it
+		// is technically permitted by the specification.
 
-	if (xdim * ydim <= 64)
-	{
-		bsd->texelcount_for_bitmap_partitioning = xdim * ydim;
-		for (int i = 0; i < xdim * ydim; i++)
+		// Skip modes that are invalid, too large, or not selected by heuristic
+		if (!valid || !selected || (x_weights > x_dim) || (y_weights > y_dim))
 		{
-			bsd->texels_for_bitmap_partitioning[i] = i;
+			bsd.block_mode_packed_index[i] = -1;
+			continue;
 		}
-	}
-	else
-	{
-		uint64_t rng_state[2];
-		astc::rand_init(rng_state);
 
-		// pick 64 random texels for use with bitmap partitioning.
-		int arr[MAX_TEXELS_PER_BLOCK];
-		for (int i = 0; i < xdim * ydim; i++)
+		// Allocate and initialize the DT entry if we've not used it yet.
+		int decimation_mode = decimation_mode_index[y_weights * 16 + x_weights];
+		if (decimation_mode == -1)
 		{
-			arr[i] = 0;
+			decimation_mode = construct_dt_entry_2d(x_dim, y_dim, x_weights, y_weights, bsd);
+			decimation_mode_index[y_weights * 16 + x_weights] = decimation_mode;
 		}
 
-		int arr_elements_set = 0;
-		while (arr_elements_set < 64)
+#if !defined(ASTCENC_DECOMPRESS_ONLY)
+		// Flatten the block mode heuristic into some precomputed flags
+		if (percentile == 0.0f)
 		{
-			unsigned int idx = (unsigned int)astc::rand(rng_state);
-			idx %= xdim * ydim;
-			if (arr[idx] == 0)
-			{
-				arr_elements_set++;
-				arr[idx] = 1;
-			}
+			bsd.block_modes[packed_idx].percentile_always = true;
+			bsd.decimation_modes[decimation_mode].percentile_always = true;
+
+			bsd.block_modes[packed_idx].percentile_hit = true;
+			bsd.decimation_modes[decimation_mode].percentile_hit = true;
 		}
+		else if (percentile <= mode_cutoff)
+		{
+			bsd.block_modes[packed_idx].percentile_always = false;
 
-		int texel_weights_written = 0;
-		int idx = 0;
-		while (texel_weights_written < 64)
+			bsd.block_modes[packed_idx].percentile_hit = true;
+			bsd.decimation_modes[decimation_mode].percentile_hit = true;
+		}
+		else
 		{
-			if (arr[idx])
-			{
-				bsd->texels_for_bitmap_partitioning[texel_weights_written++] = idx;
-			}
-			idx++;
+			bsd.block_modes[packed_idx].percentile_always = false;
+			bsd.block_modes[packed_idx].percentile_hit = false;
 		}
+#endif
 
-		bsd->texelcount_for_bitmap_partitioning = 64;
+		bsd.block_modes[packed_idx].decimation_mode = decimation_mode;
+		bsd.block_modes[packed_idx].quant_mode = quant_mode;
+		bsd.block_modes[packed_idx].is_dual_plane = is_dual_plane ? 1 : 0;
+		bsd.block_modes[packed_idx].mode_index = i;
+		bsd.block_mode_packed_index[i] = packed_idx;
+		++packed_idx;
 	}
+
+	bsd.block_mode_count = packed_idx;
+
+#if !defined(ASTCENC_DECOMPRESS_ONLY)
+	delete[] percentiles;
+#endif
+
+	// Ensure the end of the array contains valid data (should never get read)
+	for (int i = bsd.decimation_mode_count; i < MAX_DECIMATION_MODES; i++)
+	{
+		bsd.decimation_modes[i].maxprec_1plane = -1;
+		bsd.decimation_modes[i].maxprec_2planes = -1;
+		bsd.decimation_modes[i].percentile_hit = false;
+		bsd.decimation_modes[i].percentile_always = false;
+		bsd.decimation_tables[i] = nullptr;
+	}
+
+	// Determine the texels to use for kmeans clustering.
+	assign_kmeans_texels(bsd);
 }
 
 static void construct_block_size_descriptor_3d(
 	int xdim,
 	int ydim,
 	int zdim,
-	block_size_descriptor * bsd
+	block_size_descriptor* bsd
 ) {
 	int decimation_mode_index[512];	// for each of the 512 entries in the decim_table_array, its index
 	int decimation_mode_count = 0;
@@ -773,29 +857,28 @@ static void construct_block_size_descriptor_3d(
 	}
 
 	// gather all the infill-modes that can be used with the current block size
-	for (int x_weights = 2; x_weights <= 6; x_weights++)
+	for (int x_weights = 2; x_weights <= xdim; x_weights++)
 	{
-		for (int y_weights = 2; y_weights <= 6; y_weights++)
+		for (int y_weights = 2; y_weights <= ydim; y_weights++)
 		{
-			for (int z_weights = 2; z_weights <= 6; z_weights++)
+			for (int z_weights = 2; z_weights <= zdim; z_weights++)
 			{
-				if ((x_weights * y_weights * z_weights) > MAX_WEIGHTS_PER_BLOCK)
+				int weight_count = x_weights * y_weights * z_weights;
+				if (weight_count > MAX_WEIGHTS_PER_BLOCK)
 				{
 					continue;
 				}
 
-				decimation_table *dt = new decimation_table;
+				decimation_table *dt = aligned_malloc<decimation_table>(sizeof(decimation_table), ASTCENC_VECALIGN);
 				decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights] = decimation_mode_count;
 				initialize_decimation_table_3d(xdim, ydim, zdim, x_weights, y_weights, z_weights, dt);
 
-				int weight_count = x_weights * y_weights * z_weights;
-
 				int maxprec_1plane = -1;
 				int maxprec_2planes = -1;
 				for (int i = 0; i < 12; i++)
 				{
-					int bits_1plane = compute_ise_bitcount(weight_count, (quantization_method) i);
-					int bits_2planes = compute_ise_bitcount(2 * weight_count, (quantization_method) i);
+					int bits_1plane = get_ise_sequence_bitcount(weight_count, (quant_method)i);
+					int bits_2planes = get_ise_sequence_bitcount(2 * weight_count, (quant_method)i);
 
 					if (bits_1plane >= MIN_WEIGHT_BITS_PER_BLOCK && bits_1plane <= MAX_WEIGHT_BITS_PER_BLOCK)
 					{
@@ -808,34 +891,28 @@ static void construct_block_size_descriptor_3d(
 					}
 				}
 
-				if ((2 * x_weights * y_weights * z_weights) > MAX_WEIGHTS_PER_BLOCK)
+				if ((2 * weight_count) > MAX_WEIGHTS_PER_BLOCK)
 				{
 					maxprec_2planes = -1;
 				}
 
-				bsd->permit_encode[decimation_mode_count] = (x_weights <= xdim && y_weights <= ydim && z_weights <= zdim);
-
-				bsd->decimation_mode_samples[decimation_mode_count] = weight_count;
-				bsd->decimation_mode_maxprec_1plane[decimation_mode_count] = maxprec_1plane;
-				bsd->decimation_mode_maxprec_2planes[decimation_mode_count] = maxprec_2planes;
+				bsd->decimation_modes[decimation_mode_count].maxprec_1plane = maxprec_1plane;
+				bsd->decimation_modes[decimation_mode_count].maxprec_2planes = maxprec_2planes;
+				bsd->decimation_modes[decimation_mode_count].percentile_hit = false;
+				bsd->decimation_modes[decimation_mode_count].percentile_always = false;
 				bsd->decimation_tables[decimation_mode_count] = dt;
-
 				decimation_mode_count++;
 			}
 		}
 	}
 
-	for (int i = 0; i < MAX_DECIMATION_MODES; i++)
-	{
-		bsd->decimation_mode_percentile[i] = 1.0f;
-	}
-
 	for (int i = decimation_mode_count; i < MAX_DECIMATION_MODES; i++)
 	{
-		bsd->permit_encode[i] = 0;
-		bsd->decimation_mode_samples[i] = 0;
-		bsd->decimation_mode_maxprec_1plane[i] = -1;
-		bsd->decimation_mode_maxprec_2planes[i] = -1;
+		bsd->decimation_modes[i].maxprec_1plane = -1;
+		bsd->decimation_modes[i].maxprec_2planes = -1;
+		bsd->decimation_modes[i].percentile_hit = false;
+		bsd->decimation_modes[i].percentile_always = false;
+		bsd->decimation_tables[i] = nullptr;
 	}
 
 	bsd->decimation_mode_count = decimation_mode_count;
@@ -846,10 +923,10 @@ static void construct_block_size_descriptor_3d(
 	{
 		int x_weights, y_weights, z_weights;
 		int is_dual_plane;
-		int quantization_mode;
+		int quant_mode;
 		int permit_encode = 1;
 
-		if (decode_block_mode_3d(i, &x_weights, &y_weights, &z_weights, &is_dual_plane, &quantization_mode))
+		if (decode_block_mode_3d(i, &x_weights, &y_weights, &z_weights, &is_dual_plane, &quant_mode))
 		{
 			if (x_weights > xdim || y_weights > ydim || z_weights > zdim)
 			{
@@ -860,70 +937,33 @@ static void construct_block_size_descriptor_3d(
 		{
 			permit_encode = 0;
 		}
-		bsd->block_mode_to_packed[i] = -1;
-		if (!permit_encode)
-			continue;
-		
-		int decimation_mode = decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights];
-		bsd->block_modes_packed[packed_idx].decimation_mode = decimation_mode;
-		bsd->block_modes_packed[packed_idx].quantization_mode = quantization_mode;
-		bsd->block_modes_packed[packed_idx].is_dual_plane = is_dual_plane;
-		bsd->block_modes_packed[packed_idx].mode_index = i;
 
-		bsd->block_modes_packed[packed_idx].percentile = 0.0f; // No percentile table
-		if (bsd->decimation_mode_percentile[decimation_mode] > 0.0f)
+		bsd->block_mode_packed_index[i] = -1;
+		if (!permit_encode)
 		{
-			bsd->decimation_mode_percentile[decimation_mode] = 0.0f;
+			continue;
 		}
-		bsd->block_mode_to_packed[i] = packed_idx;
-		++packed_idx;
-	}
-	bsd->block_mode_packed_count = packed_idx;
 
-	if (xdim * ydim * zdim <= 64)
-	{
-		bsd->texelcount_for_bitmap_partitioning = xdim * ydim * zdim;
-		for (int i = 0; i < xdim * ydim * zdim; i++)
-		{
-			bsd->texels_for_bitmap_partitioning[i] = i;
-		}
+		int decimation_mode = decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights];
+		bsd->block_modes[packed_idx].decimation_mode = decimation_mode;
+		bsd->block_modes[packed_idx].quant_mode = quant_mode;
+		bsd->block_modes[packed_idx].is_dual_plane = is_dual_plane ? 1 : 0;
+		bsd->block_modes[packed_idx].mode_index = i;
+
+		// No percentile table, so enable everything all the time ...
+		bsd->block_modes[packed_idx].percentile_hit = true;
+		bsd->block_modes[packed_idx].percentile_always = true;
+		bsd->decimation_modes[decimation_mode].percentile_hit = true;
+		bsd->decimation_modes[decimation_mode].percentile_always = true;
+
+		bsd->block_mode_packed_index[i] = packed_idx;
+		++packed_idx;
 	}
-	else
-	{
-		uint64_t rng_state[2];
-		astc::rand_init(rng_state);
 
-		// pick 64 random texels for use with bitmap partitioning.
-		int arr[MAX_TEXELS_PER_BLOCK];
-		for (int i = 0; i < xdim * ydim * zdim; i++)
-		{
-			arr[i] = 0;
-		}
-
-		int arr_elements_set = 0;
-		while (arr_elements_set < 64)
-		{
-			unsigned int idx = (unsigned int)astc::rand(rng_state);
-			idx %= xdim * ydim * zdim;
-			if (arr[idx] == 0)
-			{
-				arr_elements_set++;
-				arr[idx] = 1;
-			}
-		}
+	bsd->block_mode_count = packed_idx;
 
-		int texel_weights_written = 0;
-		int idx = 0;
-		while (texel_weights_written < 64)
-		{
-			if (arr[idx])
-			{
-				bsd->texels_for_bitmap_partitioning[texel_weights_written++] = idx;
-			}
-			idx++;
-		}
-		bsd->texelcount_for_bitmap_partitioning = 64;
-	}
+	// Determine the texels to use for kmeans clustering.
+	assign_kmeans_texels(*bsd);
 }
 
 /* Public function, see header file for detailed documentation */
@@ -931,6 +971,8 @@ void init_block_size_descriptor(
 	int xdim,
 	int ydim,
 	int zdim,
+	bool can_omit_modes,
+	float mode_cutoff,
 	block_size_descriptor* bsd
 ) {
 	if (zdim > 1)
@@ -939,17 +981,17 @@ void init_block_size_descriptor(
 	}
 	else
 	{
-		construct_block_size_descriptor_2d(xdim, ydim, bsd);
+		construct_block_size_descriptor_2d(xdim, ydim, can_omit_modes, mode_cutoff, *bsd);
 	}
 
 	init_partition_tables(bsd);
 }
 
 void term_block_size_descriptor(
-	block_size_descriptor* bsd)
-{
+	block_size_descriptor* bsd
+) {
 	for (int i = 0; i < bsd->decimation_mode_count; i++)
 	{
-		delete bsd->decimation_tables[i];
+		aligned_free<const decimation_table>(bsd->decimation_tables[i]);
 	}
 }
diff --git a/libkram/astc-encoder/astcenc_color_quantize.cpp b/libkram/astc-encoder/astcenc_color_quantize.cpp
index 8c402986..b3592657 100644
--- a/libkram/astc-encoder/astcenc_color_quantize.cpp
+++ b/libkram/astc-encoder/astcenc_color_quantize.cpp
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2020 Arm Limited
+// Copyright 2011-2021 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -35,36 +35,29 @@
 	increased until color0 is no longer larger than color1.
 */
 static inline int cqt_lookup(
-	int quantization_level,
+	int quant_level,
 	int value
 ) {
-	if (value < 0)
-	{
-		value = 0;
-	}
-	else if (value > 255)
-	{
-		value = 255;
-	}
-
-	return color_quantization_tables[quantization_level][value];
+	// TODO: Make this unsigned and avoid the low clamp
+	value = astc::clamp(value, 0, 255);
+	return color_quant_tables[quant_level][value];
 }
 
 static void quantize_rgb(
-	float4 color0,	// LDR: 0=lowest, 255=highest
-	float4 color1,
+	vfloat4 color0,
+	vfloat4 color1,
 	int output[6],
-	int quantization_level
+	int quant_level
 ) {
 	float scale = 1.0f / 257.0f;
 
-	float r0 = astc::clamp255f(color0.r * scale);
-	float g0 = astc::clamp255f(color0.g * scale);
-	float b0 = astc::clamp255f(color0.b * scale);
+	float r0 = astc::clamp255f(color0.lane<0>() * scale);
+	float g0 = astc::clamp255f(color0.lane<1>() * scale);
+	float b0 = astc::clamp255f(color0.lane<2>() * scale);
 
-	float r1 = astc::clamp255f(color1.r * scale);
-	float g1 = astc::clamp255f(color1.g * scale);
-	float b1 = astc::clamp255f(color1.b * scale);
+	float r1 = astc::clamp255f(color1.lane<0>() * scale);
+	float g1 = astc::clamp255f(color1.lane<1>() * scale);
+	float b1 = astc::clamp255f(color1.lane<2>() * scale);
 
 	int ri0, gi0, bi0, ri1, gi1, bi1;
 	int ri0b, gi0b, bi0b, ri1b, gi1b, bi1b;
@@ -73,19 +66,19 @@ static void quantize_rgb(
 	int iters = 0;
 	do
 	{
-		ri0 = cqt_lookup(quantization_level, astc::flt2int_rd(r0 + rgb0_addon));
-		gi0 = cqt_lookup(quantization_level, astc::flt2int_rd(g0 + rgb0_addon));
-		bi0 = cqt_lookup(quantization_level, astc::flt2int_rd(b0 + rgb0_addon));
-		ri1 = cqt_lookup(quantization_level, astc::flt2int_rd(r1 + rgb1_addon));
-		gi1 = cqt_lookup(quantization_level, astc::flt2int_rd(g1 + rgb1_addon));
-		bi1 = cqt_lookup(quantization_level, astc::flt2int_rd(b1 + rgb1_addon));
-
-		ri0b = color_unquantization_tables[quantization_level][ri0];
-		gi0b = color_unquantization_tables[quantization_level][gi0];
-		bi0b = color_unquantization_tables[quantization_level][bi0];
-		ri1b = color_unquantization_tables[quantization_level][ri1];
-		gi1b = color_unquantization_tables[quantization_level][gi1];
-		bi1b = color_unquantization_tables[quantization_level][bi1];
+		ri0 = cqt_lookup(quant_level, astc::flt2int_rd(r0 + rgb0_addon));
+		gi0 = cqt_lookup(quant_level, astc::flt2int_rd(g0 + rgb0_addon));
+		bi0 = cqt_lookup(quant_level, astc::flt2int_rd(b0 + rgb0_addon));
+		ri1 = cqt_lookup(quant_level, astc::flt2int_rd(r1 + rgb1_addon));
+		gi1 = cqt_lookup(quant_level, astc::flt2int_rd(g1 + rgb1_addon));
+		bi1 = cqt_lookup(quant_level, astc::flt2int_rd(b1 + rgb1_addon));
+
+		ri0b = color_unquant_tables[quant_level][ri0];
+		gi0b = color_unquant_tables[quant_level][gi0];
+		bi0b = color_unquant_tables[quant_level][bi0];
+		ri1b = color_unquant_tables[quant_level][ri1];
+		gi1b = color_unquant_tables[quant_level][gi1];
+		bi1b = color_unquant_tables[quant_level][bi1];
 
 		rgb0_addon -= 0.2f;
 		rgb1_addon += 0.2f;
@@ -102,47 +95,41 @@ static void quantize_rgb(
 
 /* quantize an RGBA color. */
 static void quantize_rgba(
-	float4 color0,
-	float4 color1,
+	vfloat4 color0,
+	vfloat4 color1,
 	int output[8],
-	int quantization_level
+	int quant_level
 ) {
-	color0.a *= (1.0f / 257.0f);
-	color1.a *= (1.0f / 257.0f);
+	float scale = 1.0f / 257.0f;
+
+	float a0 = astc::clamp255f(color0.lane<3>() * scale);
+	float a1 = astc::clamp255f(color1.lane<3>() * scale);
 
-	float a0 = astc::clamp255f(color0.a);
-	float a1 = astc::clamp255f(color1.a);
-	int ai0 = color_quantization_tables[quantization_level][astc::flt2int_rtn(a0)];
-	int ai1 = color_quantization_tables[quantization_level][astc::flt2int_rtn(a1)];
+	int ai0 = color_quant_tables[quant_level][astc::flt2int_rtn(a0)];
+	int ai1 = color_quant_tables[quant_level][astc::flt2int_rtn(a1)];
 
 	output[6] = ai0;
 	output[7] = ai1;
 
-	quantize_rgb(color0, color1, output, quantization_level);
+	quantize_rgb(color0, color1, output, quant_level);
 }
 
 /* attempt to quantize RGB endpoint values with blue-contraction. Returns 1 on failure, 0 on success. */
-static int try_quantize_rgb_blue_contract(
-	float4 color0,	// assumed to be the smaller color
-	float4 color1,	// assumed to be the larger color
+static bool try_quantize_rgb_blue_contract(
+	vfloat4 color0,	// assumed to be the smaller color
+	vfloat4 color1,	// assumed to be the larger color
 	int output[6],
-	int quantization_level
+	int quant_level
 ) {
-	color0.r *= (1.0f / 257.0f);
-	color0.g *= (1.0f / 257.0f);
-	color0.b *= (1.0f / 257.0f);
-
-	color1.r *= (1.0f / 257.0f);
-	color1.g *= (1.0f / 257.0f);
-	color1.b *= (1.0f / 257.0f);
+	float scale = 1.0f / 257.0f;
 
-	float r0 = color0.r;
-	float g0 = color0.g;
-	float b0 = color0.b;
+	float r0 = color0.lane<0>() * scale;
+	float g0 = color0.lane<1>() * scale;
+	float b0 = color0.lane<2>() * scale;
 
-	float r1 = color1.r;
-	float g1 = color1.g;
-	float b1 = color1.b;
+	float r1 = color1.lane<0>() * scale;
+	float g1 = color1.lane<1>() * scale;
+	float b1 = color1.lane<2>() * scale;
 
 	// inverse blue-contraction. This can produce an overflow;
 	// just bail out immediately if this is the case.
@@ -154,31 +141,31 @@ static int try_quantize_rgb_blue_contract(
 	if (r0 < 0.0f || r0 > 255.0f || g0 < 0.0f || g0 > 255.0f || b0 < 0.0f || b0 > 255.0f ||
 		r1 < 0.0f || r1 > 255.0f || g1 < 0.0f || g1 > 255.0f || b1 < 0.0f || b1 > 255.0f)
 	{
-		return 0;
+		return false;
 	}
 
 	// quantize the inverse-blue-contracted color
-	int ri0 = color_quantization_tables[quantization_level][astc::flt2int_rtn(r0)];
-	int gi0 = color_quantization_tables[quantization_level][astc::flt2int_rtn(g0)];
-	int bi0 = color_quantization_tables[quantization_level][astc::flt2int_rtn(b0)];
-	int ri1 = color_quantization_tables[quantization_level][astc::flt2int_rtn(r1)];
-	int gi1 = color_quantization_tables[quantization_level][astc::flt2int_rtn(g1)];
-	int bi1 = color_quantization_tables[quantization_level][astc::flt2int_rtn(b1)];
+	int ri0 = color_quant_tables[quant_level][astc::flt2int_rtn(r0)];
+	int gi0 = color_quant_tables[quant_level][astc::flt2int_rtn(g0)];
+	int bi0 = color_quant_tables[quant_level][astc::flt2int_rtn(b0)];
+	int ri1 = color_quant_tables[quant_level][astc::flt2int_rtn(r1)];
+	int gi1 = color_quant_tables[quant_level][astc::flt2int_rtn(g1)];
+	int bi1 = color_quant_tables[quant_level][astc::flt2int_rtn(b1)];
 
 	// then unquantize again
-	int ru0 = color_unquantization_tables[quantization_level][ri0];
-	int gu0 = color_unquantization_tables[quantization_level][gi0];
-	int bu0 = color_unquantization_tables[quantization_level][bi0];
-	int ru1 = color_unquantization_tables[quantization_level][ri1];
-	int gu1 = color_unquantization_tables[quantization_level][gi1];
-	int bu1 = color_unquantization_tables[quantization_level][bi1];
+	int ru0 = color_unquant_tables[quant_level][ri0];
+	int gu0 = color_unquant_tables[quant_level][gi0];
+	int bu0 = color_unquant_tables[quant_level][bi0];
+	int ru1 = color_unquant_tables[quant_level][ri1];
+	int gu1 = color_unquant_tables[quant_level][gi1];
+	int bu1 = color_unquant_tables[quant_level][bi1];
 
 	// if color #1 is not larger than color #0, then blue-contraction is not a valid approach.
 	// note that blue-contraction and quantization may itself change this order, which is why
 	// we must only test AFTER blue-contraction.
 	if (ru1 + gu1 + bu1 <= ru0 + gu0 + bu0)
 	{
-		return 0;
+		return false;
 	}
 
 	output[0] = ri1;
@@ -188,26 +175,25 @@ static int try_quantize_rgb_blue_contract(
 	output[4] = bi1;
 	output[5] = bi0;
 
-	return 1;
+	return true;
 }
 
 /* quantize an RGBA color with blue-contraction */
 static int try_quantize_rgba_blue_contract(
-	float4 color0,
-	float4 color1,
+	vfloat4 color0,
+	vfloat4 color1,
 	int output[8],
-	int quantization_level
+	int quant_level
 ) {
-	color0.a *= (1.0f / 257.0f);
-	color1.a *= (1.0f / 257.0f);
+	float scale = 1.0f / 257.0f;
 
-	float a0 = astc::clamp255f(color0.a);
-	float a1 = astc::clamp255f(color1.a);
+	float a0 = astc::clamp255f(color0.lane<3>() * scale);
+	float a1 = astc::clamp255f(color1.lane<3>() * scale);
 
-	output[7] = color_quantization_tables[quantization_level][astc::flt2int_rtn(a0)];
-	output[6] = color_quantization_tables[quantization_level][astc::flt2int_rtn(a1)];
+	output[7] = color_quant_tables[quant_level][astc::flt2int_rtn(a0)];
+	output[6] = color_quant_tables[quant_level][astc::flt2int_rtn(a1)];
 
-	return try_quantize_rgb_blue_contract(color0, color1, output, quantization_level);
+	return try_quantize_rgb_blue_contract(color0, color1, output, quant_level);
 }
 
 
@@ -218,32 +204,27 @@ static int try_quantize_rgba_blue_contract(
 // if the sum of the offsets is nonnegative, then we encode a regular delta.
 
 /* attempt to quantize an RGB endpoint value with delta-encoding. */
-static int try_quantize_rgb_delta(
-	float4 color0,
-	float4 color1,
+static bool try_quantize_rgb_delta(
+	vfloat4 color0,
+	vfloat4 color1,
 	int output[6],
-	int quantization_level
+	int quant_level
 ) {
-	color0.r *= (1.0f / 257.0f);
-	color0.g *= (1.0f / 257.0f);
-	color0.b *= (1.0f / 257.0f);
-
-	color1.r *= (1.0f / 257.0f);
-	color1.g *= (1.0f / 257.0f);
-	color1.b *= (1.0f / 257.0f);
+	float scale = 1.0f / 257.0f;
 
-	float r0 = astc::clamp255f(color0.r);
-	float g0 = astc::clamp255f(color0.g);
-	float b0 = astc::clamp255f(color0.b);
+	float r0 = astc::clamp255f(color0.lane<0>() * scale);
+	float g0 = astc::clamp255f(color0.lane<1>() * scale);
+	float b0 = astc::clamp255f(color0.lane<2>() * scale);
 
-	float r1 = astc::clamp255f(color1.r);
-	float g1 = astc::clamp255f(color1.g);
-	float b1 = astc::clamp255f(color1.b);
+	float r1 = astc::clamp255f(color1.lane<0>() * scale);
+	float g1 = astc::clamp255f(color1.lane<1>() * scale);
+	float b1 = astc::clamp255f(color1.lane<2>() * scale);
 
 	// transform r0 to unorm9
 	int r0a = astc::flt2int_rtn(r0);
 	int g0a = astc::flt2int_rtn(g0);
 	int b0a = astc::flt2int_rtn(b0);
+
 	r0a <<= 1;
 	g0a <<= 1;
 	b0a <<= 1;
@@ -255,13 +236,13 @@ static int try_quantize_rgb_delta(
 
 	// quantize, then unquantize in order to get a value that we take
 	// differences against.
-	int r0be = color_quantization_tables[quantization_level][r0b];
-	int g0be = color_quantization_tables[quantization_level][g0b];
-	int b0be = color_quantization_tables[quantization_level][b0b];
+	int r0be = color_quant_tables[quant_level][r0b];
+	int g0be = color_quant_tables[quant_level][g0b];
+	int b0be = color_quant_tables[quant_level][b0b];
 
-	r0b = color_unquantization_tables[quantization_level][r0be];
-	g0b = color_unquantization_tables[quantization_level][g0be];
-	b0b = color_unquantization_tables[quantization_level][b0be];
+	r0b = color_unquant_tables[quant_level][r0be];
+	g0b = color_unquant_tables[quant_level][g0be];
+	b0b = color_unquant_tables[quant_level][b0be];
 	r0b |= r0a & 0x100;			// final unquantized-values for endpoint 0.
 	g0b |= g0a & 0x100;
 	b0b |= b0a & 0x100;
@@ -282,7 +263,7 @@ static int try_quantize_rgb_delta(
 	// check if the difference is too large to be encodable.
 	if (r1d > 63 || g1d > 63 || b1d > 63 || r1d < -64 || g1d < -64 || b1d < -64)
 	{
-		return 0;
+		return false;
 	}
 
 	// insert top bit of the base into the offset
@@ -297,17 +278,17 @@ static int try_quantize_rgb_delta(
 	// then quantize & unquantize; if this causes any of the top two bits to flip,
 	// then encoding fails, since we have then corrupted either the top bit of the base
 	// or the sign bit of the offset.
-	int r1de = color_quantization_tables[quantization_level][r1d];
-	int g1de = color_quantization_tables[quantization_level][g1d];
-	int b1de = color_quantization_tables[quantization_level][b1d];
+	int r1de = color_quant_tables[quant_level][r1d];
+	int g1de = color_quant_tables[quant_level][g1d];
+	int b1de = color_quant_tables[quant_level][b1d];
 
-	int r1du = color_unquantization_tables[quantization_level][r1de];
-	int g1du = color_unquantization_tables[quantization_level][g1de];
-	int b1du = color_unquantization_tables[quantization_level][b1de];
+	int r1du = color_unquant_tables[quant_level][r1de];
+	int g1du = color_unquant_tables[quant_level][g1de];
+	int b1du = color_unquant_tables[quant_level][b1de];
 
 	if (((r1d ^ r1du) | (g1d ^ g1du) | (b1d ^ b1du)) & 0xC0)
 	{
-		return 0;
+		return false;
 	}
 
 	// check that the sum of the encoded offsets is nonnegative, else encoding fails
@@ -332,7 +313,7 @@ static int try_quantize_rgb_delta(
 
 	if (r1du + g1du + b1du < 0)
 	{
-		return 0;
+		return false;
 	}
 
 	// check that the offsets produce legitimate sums as well.
@@ -341,7 +322,7 @@ static int try_quantize_rgb_delta(
 	b1du += b0b;
 	if (r1du < 0 || r1du > 0x1FF || g1du < 0 || g1du > 0x1FF || b1du < 0 || b1du > 0x1FF)
 	{
-		return 0;
+		return false;
 	}
 
 	// OK, we've come this far; we can now encode legitimate values.
@@ -352,31 +333,25 @@ static int try_quantize_rgb_delta(
 	output[4] = b0be;
 	output[5] = b1de;
 
-	return 1;
+	return true;
 }
 
-static int try_quantize_rgb_delta_blue_contract(
-	float4 color0,
-	float4 color1,
+static bool try_quantize_rgb_delta_blue_contract(
+	vfloat4 color0,
+	vfloat4 color1,
 	int output[6],
-	int quantization_level
+	int quant_level
 ) {
-	color0.r *= (1.0f / 257.0f);
-	color0.g *= (1.0f / 257.0f);
-	color0.b *= (1.0f / 257.0f);
-
-	color1.r *= (1.0f / 257.0f);
-	color1.g *= (1.0f / 257.0f);
-	color1.b *= (1.0f / 257.0f);
+	// Note: Switch around endpoint colors already at start
+	float scale = 1.0f / 257.0f;
 
-	// switch around endpoint colors already at start.
-	float r0 = color1.r;
-	float g0 = color1.g;
-	float b0 = color1.b;
+	float r1 = color0.lane<0>() * scale;
+	float g1 = color0.lane<1>() * scale;
+	float b1 = color0.lane<2>() * scale;
 
-	float r1 = color0.r;
-	float g1 = color0.g;
-	float b1 = color0.b;
+	float r0 = color1.lane<0>() * scale;
+	float g0 = color1.lane<1>() * scale;
+	float b0 = color1.lane<2>() * scale;
 
 	// inverse blue-contraction. This step can perform an overflow, in which case
 	// we will bail out immediately.
@@ -388,7 +363,7 @@ static int try_quantize_rgb_delta_blue_contract(
 	if (r0 < 0.0f || r0 > 255.0f || g0 < 0.0f || g0 > 255.0f || b0 < 0.0f || b0 > 255.0f ||
 	    r1 < 0.0f || r1 > 255.0f || g1 < 0.0f || g1 > 255.0f || b1 < 0.0f || b1 > 255.0f)
 	{
-		return 0;
+		return false;
 	}
 
 	// transform r0 to unorm9
@@ -406,13 +381,13 @@ static int try_quantize_rgb_delta_blue_contract(
 
 	// quantize, then unquantize in order to get a value that we take
 	// differences against.
-	int r0be = color_quantization_tables[quantization_level][r0b];
-	int g0be = color_quantization_tables[quantization_level][g0b];
-	int b0be = color_quantization_tables[quantization_level][b0b];
+	int r0be = color_quant_tables[quant_level][r0b];
+	int g0be = color_quant_tables[quant_level][g0b];
+	int b0be = color_quant_tables[quant_level][b0b];
 
-	r0b = color_unquantization_tables[quantization_level][r0be];
-	g0b = color_unquantization_tables[quantization_level][g0be];
-	b0b = color_unquantization_tables[quantization_level][b0be];
+	r0b = color_unquant_tables[quant_level][r0be];
+	g0b = color_unquant_tables[quant_level][g0be];
+	b0b = color_unquant_tables[quant_level][b0be];
 	r0b |= r0a & 0x100;			// final unquantized-values for endpoint 0.
 	g0b |= g0a & 0x100;
 	b0b |= b0a & 0x100;
@@ -433,7 +408,7 @@ static int try_quantize_rgb_delta_blue_contract(
 	// check if the difference is too large to be encodable.
 	if (r1d > 63 || g1d > 63 || b1d > 63 || r1d < -64 || g1d < -64 || b1d < -64)
 	{
-		return 0;
+		return false;
 	}
 
 	// insert top bit of the base into the offset
@@ -448,17 +423,17 @@ static int try_quantize_rgb_delta_blue_contract(
 	// then quantize & unquantize; if this causes any of the top two bits to flip,
 	// then encoding fails, since we have then corrupted either the top bit of the base
 	// or the sign bit of the offset.
-	int r1de = color_quantization_tables[quantization_level][r1d];
-	int g1de = color_quantization_tables[quantization_level][g1d];
-	int b1de = color_quantization_tables[quantization_level][b1d];
+	int r1de = color_quant_tables[quant_level][r1d];
+	int g1de = color_quant_tables[quant_level][g1d];
+	int b1de = color_quant_tables[quant_level][b1d];
 
-	int r1du = color_unquantization_tables[quantization_level][r1de];
-	int g1du = color_unquantization_tables[quantization_level][g1de];
-	int b1du = color_unquantization_tables[quantization_level][b1de];
+	int r1du = color_unquant_tables[quant_level][r1de];
+	int g1du = color_unquant_tables[quant_level][g1de];
+	int b1du = color_unquant_tables[quant_level][b1de];
 
 	if (((r1d ^ r1du) | (g1d ^ g1du) | (b1d ^ b1du)) & 0xC0)
 	{
-		return 0;
+		return false;
 	}
 
 	// check that the sum of the encoded offsets is negative, else encoding fails
@@ -484,7 +459,7 @@ static int try_quantize_rgb_delta_blue_contract(
 
 	if (r1du + g1du + b1du >= 0)
 	{
-		return 0;
+		return false;
 	}
 
 	// check that the offsets produce legitimate sums as well.
@@ -494,7 +469,7 @@ static int try_quantize_rgb_delta_blue_contract(
 
 	if (r1du < 0 || r1du > 0x1FF || g1du < 0 || g1du > 0x1FF || b1du < 0 || b1du > 0x1FF)
 	{
-		return 0;
+		return false;
 	}
 
 	// OK, we've come this far; we can now encode legitimate values.
@@ -505,43 +480,40 @@ static int try_quantize_rgb_delta_blue_contract(
 	output[4] = b0be;
 	output[5] = b1de;
 
-	return 1;
+	return true;
 }
 
-static int try_quantize_alpha_delta(
-	float4 color0,
-	float4 color1,
+static bool try_quantize_alpha_delta(
+	vfloat4 color0,
+	vfloat4 color1,
 	int output[8],
-	int quantization_level
+	int quant_level
 ) {
-	color0.a *= (1.0f / 257.0f);
-	color1.a *= (1.0f / 257.0f);
+	float scale = 1.0f / 257.0f;
 
-	// the calculation for alpha-delta is exactly the same as for RGB-delta; see
-	// the RGB-delta function for comments.
-	float a0 = astc::clamp255f(color0.a);
-	float a1 = astc::clamp255f(color1.a);
+	float a0 = astc::clamp255f(color0.lane<3>() * scale);
+	float a1 = astc::clamp255f(color1.lane<3>() * scale);
 
 	int a0a = astc::flt2int_rtn(a0);
 	a0a <<= 1;
 	int a0b = a0a & 0xFF;
-	int a0be = color_quantization_tables[quantization_level][a0b];
-	a0b = color_unquantization_tables[quantization_level][a0be];
+	int a0be = color_quant_tables[quant_level][a0b];
+	a0b = color_unquant_tables[quant_level][a0be];
 	a0b |= a0a & 0x100;
 	int a1d = astc::flt2int_rtn(a1);
 	a1d <<= 1;
 	a1d -= a0b;
 	if (a1d > 63 || a1d < -64)
 	{
-		return 0;
+		return false;
 	}
 	a1d &= 0x7F;
 	a1d |= (a0b & 0x100) >> 1;
-	int a1de = color_quantization_tables[quantization_level][a1d];
-	int a1du = color_unquantization_tables[quantization_level][a1de];
+	int a1de = color_quant_tables[quant_level][a1d];
+	int a1du = color_unquant_tables[quant_level][a1de];
 	if ((a1d ^ a1du) & 0xC0)
 	{
-		return 0;
+		return false;
 	}
 	a1du &= 0x7F;
 	if (a1du & 0x40)
@@ -551,23 +523,26 @@ static int try_quantize_alpha_delta(
 	a1du += a0b;
 	if (a1du < 0 || a1du > 0x1FF)
 	{
-		return 0;
+		return false;
 	}
 	output[6] = a0be;
 	output[7] = a1de;
-	return 1;
+	return true;
 }
 
-int try_quantize_luminance_alpha_delta(
-	float4 color0,
-	float4 color1,
+static bool try_quantize_luminance_alpha_delta(
+	vfloat4 color0,
+	vfloat4 color1,
 	int output[8],
-	int quantization_level
+	int quant_level
 ) {
-	float l0 = astc::clamp255f((color0.r + color0.g + color0.b) * ((1.0f / 3.0f) * (1.0f / 257.0f)));
-	float l1 = astc::clamp255f((color1.r + color1.g + color1.b) * ((1.0f / 3.0f) * (1.0f / 257.0f)));
-	float a0 = astc::clamp255f(color0.a * (1.0f / 257.0f));
-	float a1 = astc::clamp255f(color1.a * (1.0f / 257.0f));
+	float scale = 1.0f / 257.0f;
+
+	float l0 = astc::clamp255f(hadd_rgb_s(color0) * ((1.0f / 3.0f) * scale));
+	float l1 = astc::clamp255f(hadd_rgb_s(color1) * ((1.0f / 3.0f) * scale));
+
+	float a0 = astc::clamp255f(color0.lane<3>() * scale);
+	float a1 = astc::clamp255f(color1.lane<3>() * scale);
 
 	int l0a = astc::flt2int_rtn(l0);
 	int a0a = astc::flt2int_rtn(a0);
@@ -575,10 +550,10 @@ int try_quantize_luminance_alpha_delta(
 	a0a <<= 1;
 	int l0b = l0a & 0xFF;
 	int a0b = a0a & 0xFF;
-	int l0be = color_quantization_tables[quantization_level][l0b];
-	int a0be = color_quantization_tables[quantization_level][a0b];
-	l0b = color_unquantization_tables[quantization_level][l0be];
-	a0b = color_unquantization_tables[quantization_level][a0be];
+	int l0be = color_quant_tables[quant_level][l0b];
+	int a0be = color_quant_tables[quant_level][a0b];
+	l0b = color_unquant_tables[quant_level][l0be];
+	a0b = color_unquant_tables[quant_level][a0be];
 	l0b |= l0a & 0x100;
 	a0b |= a0a & 0x100;
 	int l1d = astc::flt2int_rtn(l1);
@@ -589,28 +564,28 @@ int try_quantize_luminance_alpha_delta(
 	a1d -= a0b;
 	if (l1d > 63 || l1d < -64)
 	{
-		return 0;
+		return false;
 	}
 	if (a1d > 63 || a1d < -64)
 	{
-		return 0;
+		return false;
 	}
 	l1d &= 0x7F;
 	a1d &= 0x7F;
 	l1d |= (l0b & 0x100) >> 1;
 	a1d |= (a0b & 0x100) >> 1;
 
-	int l1de = color_quantization_tables[quantization_level][l1d];
-	int a1de = color_quantization_tables[quantization_level][a1d];
-	int l1du = color_unquantization_tables[quantization_level][l1de];
-	int a1du = color_unquantization_tables[quantization_level][a1de];
+	int l1de = color_quant_tables[quant_level][l1d];
+	int a1de = color_quant_tables[quant_level][a1d];
+	int l1du = color_unquant_tables[quant_level][l1de];
+	int a1du = color_unquant_tables[quant_level][a1de];
 	if ((l1d ^ l1du) & 0xC0)
 	{
-		return 0;
+		return false;
 	}
 	if ((a1d ^ a1du) & 0xC0)
 	{
-		return 0;
+		return false;
 	}
 	l1du &= 0x7F;
 	a1du &= 0x7F;
@@ -626,126 +601,120 @@ int try_quantize_luminance_alpha_delta(
 	a1du += a0b;
 	if (l1du < 0 || l1du > 0x1FF)
 	{
-		return 0;
+		return false;
 	}
 	if (a1du < 0 || a1du > 0x1FF)
 	{
-		return 0;
+		return false;
 	}
 	output[0] = l0be;
 	output[1] = l1de;
 	output[2] = a0be;
 	output[3] = a1de;
 
-	return 1;
+	return true;
 }
 
-static int try_quantize_rgba_delta(
-	float4 color0,
-	float4 color1,
+static bool try_quantize_rgba_delta(
+	vfloat4 color0,
+	vfloat4 color1,
 	int output[8],
-	int quantization_level
+	int quant_level
 ) {
-	int alpha_delta_res = try_quantize_alpha_delta(color0, color1, output, quantization_level);
+	bool alpha_delta_res = try_quantize_alpha_delta(color0, color1, output, quant_level);
 
-	if (alpha_delta_res == 0)
+	if (alpha_delta_res == false)
 	{
-		return 0;
+		return false;
 	}
 
-	return try_quantize_rgb_delta(color0, color1, output, quantization_level);
+	return try_quantize_rgb_delta(color0, color1, output, quant_level);
 }
 
-static int try_quantize_rgba_delta_blue_contract(
-	float4 color0,
-	float4 color1,
+static bool try_quantize_rgba_delta_blue_contract(
+	vfloat4 color0,
+	vfloat4 color1,
 	int output[8],
-	int quantization_level
+	int quant_level
 ) {
 	// notice that for the alpha encoding, we are swapping around color0 and color1;
 	// this is because blue-contraction involves swapping around the two colors.
-	int alpha_delta_res = try_quantize_alpha_delta(color1, color0, output, quantization_level);
+	int alpha_delta_res = try_quantize_alpha_delta(color1, color0, output, quant_level);
 
 	if (alpha_delta_res == 0)
 	{
-		return 0;
+		return false;
 	}
 
-	return try_quantize_rgb_delta_blue_contract(color0, color1, output, quantization_level);
+	return try_quantize_rgb_delta_blue_contract(color0, color1, output, quant_level);
 }
 
 static void quantize_rgbs_new(
-	float4 rgbs_color,	// W component is a desired-scale to apply, in the range 0..1
+	vfloat4 rgbs_color,	// W component is a desired-scale to apply, in the range 0..1
 	int output[4],
-	int quantization_level
+	int quant_level
 ) {
-	rgbs_color.r *= (1.0f / 257.0f);
-	rgbs_color.g *= (1.0f / 257.0f);
-	rgbs_color.b *= (1.0f / 257.0f);
+	float scale = 1.0f / 257.0f;
 
-	float r = astc::clamp255f(rgbs_color.r);
-	float g = astc::clamp255f(rgbs_color.g);
-	float b = astc::clamp255f(rgbs_color.b);
+	float r = astc::clamp255f(rgbs_color.lane<0>() * scale);
+	float g = astc::clamp255f(rgbs_color.lane<1>() * scale);
+	float b = astc::clamp255f(rgbs_color.lane<2>() * scale);
 
-	int ri = color_quantization_tables[quantization_level][astc::flt2int_rtn(r)];
-	int gi = color_quantization_tables[quantization_level][astc::flt2int_rtn(g)];
-	int bi = color_quantization_tables[quantization_level][astc::flt2int_rtn(b)];
+	int ri = color_quant_tables[quant_level][astc::flt2int_rtn(r)];
+	int gi = color_quant_tables[quant_level][astc::flt2int_rtn(g)];
+	int bi = color_quant_tables[quant_level][astc::flt2int_rtn(b)];
 
-	int ru = color_unquantization_tables[quantization_level][ri];
-	int gu = color_unquantization_tables[quantization_level][gi];
-	int bu = color_unquantization_tables[quantization_level][bi];
+	int ru = color_unquant_tables[quant_level][ri];
+	int gu = color_unquant_tables[quant_level][gi];
+	int bu = color_unquant_tables[quant_level][bi];
 
-	float oldcolorsum = rgbs_color.r + rgbs_color.g + rgbs_color.b;
+	float oldcolorsum = hadd_rgb_s(rgbs_color) * scale;
 	float newcolorsum = (float)(ru + gu + bu);
 
-	float scale = astc::clamp1f(rgbs_color.a * (oldcolorsum + 1e-10f) / (newcolorsum + 1e-10f));
-	int scale_idx = astc::flt2int_rtn(scale * 256.0f);
-	scale_idx = astc::clampi(scale_idx, 0, 255);
+	float scalea = astc::clamp1f(rgbs_color.lane<3>() * (oldcolorsum + 1e-10f) / (newcolorsum + 1e-10f));
+	int scale_idx = astc::flt2int_rtn(scalea * 256.0f);
+	scale_idx = astc::clamp(scale_idx, 0, 255);
 
 	output[0] = ri;
 	output[1] = gi;
 	output[2] = bi;
-	output[3] = color_quantization_tables[quantization_level][scale_idx];
+	output[3] = color_quant_tables[quant_level][scale_idx];
 }
 
 static void quantize_rgbs_alpha_new(
-	float4 color0,
-	float4 color1,
-	float4 rgbs_color,
+	vfloat4 color0,
+	vfloat4 color1,
+	vfloat4 rgbs_color,
 	int output[6],
-	int quantization_level
+	int quant_level
 ) {
-	color0.a *= (1.0f / 257.0f);
-	color1.a *= (1.0f / 257.0f);
+	float scale = 1.0f / 257.0f;
 
-	float a0 = astc::clamp255f(color0.a);
-	float a1 = astc::clamp255f(color1.a);
+	float a0 = astc::clamp255f(color0.lane<3>() * scale);
+	float a1 = astc::clamp255f(color1.lane<3>() * scale);
 
-	int ai0 = color_quantization_tables[quantization_level][astc::flt2int_rtn(a0)];
-	int ai1 = color_quantization_tables[quantization_level][astc::flt2int_rtn(a1)];
+	int ai0 = color_quant_tables[quant_level][astc::flt2int_rtn(a0)];
+	int ai1 = color_quant_tables[quant_level][astc::flt2int_rtn(a1)];
 
 	output[4] = ai0;
 	output[5] = ai1;
 
-	quantize_rgbs_new(rgbs_color, output, quantization_level);
+	quantize_rgbs_new(rgbs_color, output, quant_level);
 }
 
 static void quantize_luminance(
-	float4 color0,
-	float4 color1,
+	vfloat4 color0,
+	vfloat4 color1,
 	int output[2],
-	int quantization_level
+	int quant_level
 ) {
-	color0.r *= (1.0f / 257.0f);
-	color0.g *= (1.0f / 257.0f);
-	color0.b *= (1.0f / 257.0f);
+	float scale = 1.0f / 257.0f;
 
-	color1.r *= (1.0f / 257.0f);
-	color1.g *= (1.0f / 257.0f);
-	color1.b *= (1.0f / 257.0f);
+	color0 = color0 * scale;
+	color1 = color1 * scale;
 
-	float lum0 = astc::clamp255f((color0.r + color0.g + color0.b) * (1.0f / 3.0f));
-	float lum1 = astc::clamp255f((color1.r + color1.g + color1.b) * (1.0f / 3.0f));
+	float lum0 = astc::clamp255f(hadd_rgb_s(color0) * (1.0f / 3.0f));
+	float lum1 = astc::clamp255f(hadd_rgb_s(color1) * (1.0f / 3.0f));
 
 	if (lum0 > lum1)
 	{
@@ -754,66 +723,73 @@ static void quantize_luminance(
 		lum1 = avg;
 	}
 
-	output[0] = color_quantization_tables[quantization_level][astc::flt2int_rtn(lum0)];
-	output[1] = color_quantization_tables[quantization_level][astc::flt2int_rtn(lum1)];
+	output[0] = color_quant_tables[quant_level][astc::flt2int_rtn(lum0)];
+	output[1] = color_quant_tables[quant_level][astc::flt2int_rtn(lum1)];
 }
 
 static void quantize_luminance_alpha(
-	float4 color0,
-	float4 color1,
+	vfloat4 color0,
+	vfloat4 color1,
 	int output[4],
-	int quantization_level
+	int quant_level
 ) {
-	color0 = color0 * (1.0f / 257.0f);
-	color1 = color1 * (1.0f / 257.0f);
+	float scale = 1.0f / 257.0f;
+
+	color0 = color0 * scale;
+	color1 = color1 * scale;
+
+	float lum0 = astc::clamp255f(hadd_rgb_s(color0) * (1.0f / 3.0f));
+	float lum1 = astc::clamp255f(hadd_rgb_s(color1) * (1.0f / 3.0f));
 
-	float lum0 = astc::clamp255f((color0.r + color0.g + color0.b) * (1.0f / 3.0f));
-	float lum1 = astc::clamp255f((color1.r + color1.g + color1.b) * (1.0f / 3.0f));
-	float a0 = astc::clamp255f(color0.a);
-	float a1 = astc::clamp255f(color1.a);
+	float a0 = astc::clamp255f(color0.lane<3>());
+	float a1 = astc::clamp255f(color1.lane<3>());
 
 	// if the endpoints are *really* close, then pull them apart slightly;
 	// this affords for >8 bits precision for normal maps.
-	if (quantization_level > 18 && fabsf(lum0 - lum1) < 3.0f)
-	{
-		if (lum0 < lum1)
-		{
-			lum0 -= 0.5f;
-			lum1 += 0.5f;
-		}
-		else
-		{
-			lum0 += 0.5f;
-			lum1 -= 0.5f;
-		}
-		lum0 = astc::clamp255f(lum0);
-		lum1 = astc::clamp255f(lum1);
-	}
-	if (quantization_level > 18 && fabsf(a0 - a1) < 3.0f)
+	if (quant_level > 18)
 	{
-		if (a0 < a1)
+		if (fabsf(lum0 - lum1) < 3.0f)
 		{
-			a0 -= 0.5f;
-			a1 += 0.5f;
+			if (lum0 < lum1)
+			{
+				lum0 -= 0.5f;
+				lum1 += 0.5f;
+			}
+			else
+			{
+				lum0 += 0.5f;
+				lum1 -= 0.5f;
+			}
+			lum0 = astc::clamp255f(lum0);
+			lum1 = astc::clamp255f(lum1);
 		}
-		else
+
+		if (fabsf(a0 - a1) < 3.0f)
 		{
-			a0 += 0.5f;
-			a1 -= 0.5f;
+			if (a0 < a1)
+			{
+				a0 -= 0.5f;
+				a1 += 0.5f;
+			}
+			else
+			{
+				a0 += 0.5f;
+				a1 -= 0.5f;
+			}
+			a0 = astc::clamp255f(a0);
+			a1 = astc::clamp255f(a1);
 		}
-		a0 = astc::clamp255f(a0);
-		a1 = astc::clamp255f(a1);
 	}
 
-	output[0] = color_quantization_tables[quantization_level][astc::flt2int_rtn(lum0)];
-	output[1] = color_quantization_tables[quantization_level][astc::flt2int_rtn(lum1)];
-	output[2] = color_quantization_tables[quantization_level][astc::flt2int_rtn(a0)];
-	output[3] = color_quantization_tables[quantization_level][astc::flt2int_rtn(a1)];
+	output[0] = color_quant_tables[quant_level][astc::flt2int_rtn(lum0)];
+	output[1] = color_quant_tables[quant_level][astc::flt2int_rtn(lum1)];
+	output[2] = color_quant_tables[quant_level][astc::flt2int_rtn(a0)];
+	output[3] = color_quant_tables[quant_level][astc::flt2int_rtn(a1)];
 }
 
 // quantize and unquantize a number, wile making sure to retain the top two bits.
 static inline void quantize_and_unquantize_retain_top_two_bits(
-	int quantization_level,
+	int quant_level,
 	int value_to_quantize,	// 0 to 255.
 	int* quantized_value,
 	int* unquantized_value
@@ -824,8 +800,8 @@ static inline void quantize_and_unquantize_retain_top_two_bits(
 
 	do
 	{
-		quantval = color_quantization_tables[quantization_level][value_to_quantize];
-		uquantval = color_unquantization_tables[quantization_level][quantval];
+		quantval = color_quant_tables[quant_level][value_to_quantize];
+		uquantval = color_unquant_tables[quant_level][quantval];
 
 		// perform looping if the top two bits were modified by quant/unquant
 		perform_loop = (value_to_quantize & 0xC0) != (uquantval & 0xC0);
@@ -850,7 +826,7 @@ static inline void quantize_and_unquantize_retain_top_two_bits(
 
 // quantize and unquantize a number, wile making sure to retain the top four bits.
 static inline void quantize_and_unquantize_retain_top_four_bits(
-	int quantization_level,
+	int quant_level,
 	int value_to_quantize,	// 0 to 255.
 	int *quantized_value,
 	int *unquantized_value
@@ -861,8 +837,8 @@ static inline void quantize_and_unquantize_retain_top_four_bits(
 
 	do
 	{
-		quantval = color_quantization_tables[quantization_level][value_to_quantize];
-		uquantval = color_unquantization_tables[quantization_level][quantval];
+		quantval = color_quant_tables[quant_level][value_to_quantize];
+		uquantval = color_unquant_tables[quant_level][quantval];
 
 		// perform looping if the top two bits were modified by quant/unquant
 		perform_loop = (value_to_quantize & 0xF0) != (uquantval & 0xF0);
@@ -887,39 +863,21 @@ static inline void quantize_and_unquantize_retain_top_four_bits(
 
 /* HDR color encoding, take #3 */
 static void quantize_hdr_rgbo3(
-	float4 color,
+	vfloat4 color,
 	int output[4],
-	int quantization_level
+	int quant_level
 ) {
-	color.r += color.a;
-	color.g += color.a;
-	color.b += color.a;
-
-	if (!(color.r > 0.0f))
-		color.r = 0.0f;
-	else if (color.r > 65535.0f)
-		color.r = 65535.0f;
-
-	if (!(color.g > 0.0f))
-		color.g = 0.0f;
-	else if (color.g > 65535.0f)
-		color.g = 65535.0f;
-
-	if (!(color.b > 0.0f))
-		color.b = 0.0f;
-	else if (color.b > 65535.0f)
-		color.b = 65535.0f;
-
-	if (!(color.a > 0.0f))
-		color.a = 0.0f;
-	else if (color.a > 65535.0f)
-		color.a = 65535.0f;
-
-	float4 color_bak = color;
+	color.set_lane<0>(color.lane<0>() + color.lane<3>());
+	color.set_lane<1>(color.lane<1>() + color.lane<3>());
+	color.set_lane<2>(color.lane<2>() + color.lane<3>());
+
+	color = clamp(0.0f, 65535.0f, color);
+
+	vfloat4 color_bak = color;
 	int majcomp;
-	if (color.r > color.g && color.r > color.b)
+	if (color.lane<0>() > color.lane<1>() && color.lane<0>() > color.lane<2>())
 		majcomp = 0;			// red is largest component
-	else if (color.g > color.b)
+	else if (color.lane<1>() > color.lane<2>())
 		majcomp = 1;			// green is largest component
 	else
 		majcomp = 2;			// blue is largest component
@@ -928,10 +886,10 @@ static void quantize_hdr_rgbo3(
 	switch (majcomp)
 	{
 	case 1:
-		color = float4(color.g, color.r, color.b, color.a);
+		color = color.swz<1, 0, 2, 3>();
 		break;
 	case 2:
-		color = float4(color.b, color.g, color.r, color.a);
+		color = color.swz<2, 1, 0, 3>();
 		break;
 	default:
 		break;
@@ -969,10 +927,10 @@ static void quantize_hdr_rgbo3(
 		1.0f / 256.0f,
 	};
 
-	float r_base = color.r;
-	float g_base = color.r - color.g;
-	float b_base = color.r - color.b;
-	float s_base = color.a;
+	float r_base = color.lane<0>();
+	float g_base = color.lane<0>() - color.lane<1>() ;
+	float b_base = color.lane<0>() - color.lane<2>() ;
+	float s_base = color.lane<3>() ;
 
 	for (int mode = 0; mode < 5; mode++)
 	{
@@ -999,36 +957,21 @@ static void quantize_hdr_rgbo3(
 
 		int r_quantval;
 		int r_uquantval;
-		quantize_and_unquantize_retain_top_two_bits(quantization_level, r_lowbits, &r_quantval, &r_uquantval);
+		quantize_and_unquantize_retain_top_two_bits(quant_level, r_lowbits, &r_quantval, &r_uquantval);
 
 		r_intval = (r_intval & ~0x3f) | (r_uquantval & 0x3f);
-		float r_fval = r_intval * mode_rscale;
+		float r_fval = static_cast<float>(r_intval) * mode_rscale;
 
 		// next, recompute G and B, then quantize and unquantize them.
-		float g_fval = r_fval - color.g;
-		float b_fval = r_fval - color.b;
-		if (g_fval < 0.0f)
-		{
-			g_fval = 0.0f;
-		}
-		else if (g_fval > 65535.0f)
-		{
-			g_fval = 65535.0f;
-		}
+		float g_fval = r_fval - color.lane<1>() ;
+		float b_fval = r_fval - color.lane<2>() ;
 
-		if (b_fval < 0.0f)
-		{
-			b_fval = 0.0f;
-		}
-		else if (b_fval > 65535.0f)
-		{
-			b_fval = 65535.0f;
-		}
+		g_fval = astc::clamp(g_fval, 0.0f, 65535.0f);
+		b_fval = astc::clamp(b_fval, 0.0f, 65535.0f);
 
 		int g_intval = astc::flt2int_rtn(g_fval * mode_scale);
 		int b_intval = astc::flt2int_rtn(b_fval * mode_scale);
 
-
 		if (g_intval >= gb_intcutoff || b_intval >= gb_intcutoff)
 		{
 			continue;
@@ -1115,32 +1058,25 @@ static void quantize_hdr_rgbo3(
 		int g_uquantval;
 		int b_uquantval;
 
-		quantize_and_unquantize_retain_top_four_bits(quantization_level, g_lowbits, &g_quantval, &g_uquantval);
+		quantize_and_unquantize_retain_top_four_bits(quant_level, g_lowbits, &g_quantval, &g_uquantval);
 
-		quantize_and_unquantize_retain_top_four_bits(quantization_level, b_lowbits, &b_quantval, &b_uquantval);
+		quantize_and_unquantize_retain_top_four_bits(quant_level, b_lowbits, &b_quantval, &b_uquantval);
 
 		g_intval = (g_intval & ~0x1f) | (g_uquantval & 0x1f);
 		b_intval = (b_intval & ~0x1f) | (b_uquantval & 0x1f);
 
-		g_fval = g_intval * mode_rscale;
-		b_fval = b_intval * mode_rscale;
+		g_fval = static_cast<float>(g_intval) * mode_rscale;
+		b_fval = static_cast<float>(b_intval) * mode_rscale;
 
 		// finally, recompute the scale value, based on the errors
 		// introduced to red, green and blue.
 
 		// If the error is positive, then the R,G,B errors combined have raised the color
 		// value overall; as such, the scale value needs to be increased.
-		float rgb_errorsum = (r_fval - color.r) + (r_fval - g_fval - color.g) + (r_fval - b_fval - color.b);
+		float rgb_errorsum = (r_fval - color.lane<0>() ) + (r_fval - g_fval - color.lane<1>() ) + (r_fval - b_fval - color.lane<2>() );
 
 		float s_fval = s_base + rgb_errorsum * (1.0f / 3.0f);
-		if (s_fval < 0.0f)
-		{
-			s_fval = 0.0f;
-		}
-		else if (s_fval > 1e9f)
-		{
-			s_fval = 1e9f;
-		}
+		s_fval = astc::clamp(s_fval, 0.0f, 1e9f);
 
 		int s_intval = astc::flt2int_rtn(s_fval * mode_scale);
 
@@ -1194,7 +1130,7 @@ static void quantize_hdr_rgbo3(
 		int s_quantval;
 		int s_uquantval;
 
-		quantize_and_unquantize_retain_top_four_bits(quantization_level, s_lowbits, &s_quantval, &s_uquantval);
+		quantize_and_unquantize_retain_top_four_bits(quant_level, s_lowbits, &s_quantval, &s_uquantval);
 		output[0] = r_quantval;
 		output[1] = g_quantval;
 		output[2] = b_quantval;
@@ -1205,45 +1141,28 @@ static void quantize_hdr_rgbo3(
 	// failed to encode any of the modes above? In that case,
 	// encode using mode #5.
 	float vals[4];
-	int ivals[4];
-	vals[0] = color_bak.r;
-	vals[1] = color_bak.g;
-	vals[2] = color_bak.b;
-	vals[3] = color_bak.a;
+	vals[0] = color_bak.lane<0>();
+	vals[1] = color_bak.lane<1>();
+	vals[2] = color_bak.lane<2>();
+	vals[3] = color_bak.lane<3>();
 
+	int ivals[4];
 	float cvals[3];
 
 	for (int i = 0; i < 3; i++)
 	{
-		if (vals[i] < 0.0f)
-		{
-			vals[i] = 0.0f;
-		}
-		else if (vals[i] > 65020.0f)
-		{
-			vals[i] = 65020.0f;
-		}
-
+		vals[i] = astc::clamp(vals[i], 0.0f, 65020.0f);
 		ivals[i] = astc::flt2int_rtn(vals[i] * (1.0f / 512.0f));
-		cvals[i] = ivals[i] * 512.0f;
+		cvals[i] = static_cast<float>(ivals[i]) * 512.0f;
 	}
 
 	float rgb_errorsum = (cvals[0] - vals[0]) + (cvals[1] - vals[1]) + (cvals[2] - vals[2]);
 	vals[3] += rgb_errorsum * (1.0f / 3.0f);
 
-	if (vals[3] < 0.0f)
-	{
-		vals[3] = 0.0f;
-	}
-	else if (vals[3] > 65020.0f)
-	{
-		vals[3] = 65020.0f;
-	}
-
+	vals[3] = astc::clamp(vals[3], 0.0f, 65020.0f);
 	ivals[3] = astc::flt2int_rtn(vals[3] * (1.0f / 512.0f));
 
 	int encvals[4];
-
 	encvals[0] = (ivals[0] & 0x3f) | 0xC0;
 	encvals[1] = (ivals[1] & 0x7f) | 0x80;
 	encvals[2] = (ivals[2] & 0x7f) | 0x80;
@@ -1252,81 +1171,31 @@ static void quantize_hdr_rgbo3(
 	for (int i = 0; i < 4; i++)
 	{
 		int dummy;
-		quantize_and_unquantize_retain_top_four_bits(quantization_level, encvals[i], &(output[i]), &dummy);
+		quantize_and_unquantize_retain_top_four_bits(quant_level, encvals[i], &(output[i]), &dummy);
 	}
 
 	return;
 }
 
 static void quantize_hdr_rgb3(
-	float4 color0,
-	float4 color1,
+	vfloat4 color0,
+	vfloat4 color1,
 	int output[6],
-	int quantization_level
+	int quant_level
 ) {
-	if (!(color0.r > 0.0f))
-	{
-		color0.r = 0.0f;
-	}
-	else if (color0.r > 65535.0f)
-	{
-		color0.r = 65535.0f;
-	}
-
-	if (!(color0.g > 0.0f))
-	{
-		color0.g = 0.0f;
-	}
-	else if (color0.g > 65535.0f)
-	{
-		color0.g = 65535.0f;
-	}
-
-	if (!(color0.b > 0.0f))
-	{
-		color0.b = 0.0f;
-	}
-	else if (color0.b > 65535.0f)
-	{
-		color0.b = 65535.0f;
-	}
-
-	if (!(color1.r > 0.0f))
-	{
-		color1.r = 0.0f;
-	}
-	else if (color1.r > 65535.0f)
-	{
-		color1.r = 65535.0f;
-	}
-
-	if (!(color1.g > 0.0f))
-	{
-		color1.g = 0.0f;
-	}
-	else if (color1.g > 65535.0f)
-	{
-		color1.g = 65535.0f;
-	}
+	// Note: color*.lane<3> is not used so we can ignore it
+	color0 = clamp(0.0f, 65535.0f, color0);
+	color1 = clamp(0.0f, 65535.0f, color1);
 
-	if (!(color1.b > 0.0f))
-	{
-		color1.b = 0.0f;
-	}
-	else if (color1.b > 65535.0f)
-	{
-		color1.b = 65535.0f;
-	}
-
-	float4 color0_bak = color0;
-	float4 color1_bak = color1;
+	vfloat4 color0_bak = color0;
+	vfloat4 color1_bak = color1;
 
 	int majcomp;
-	if (color1.r > color1.g && color1.r > color1.b)
+	if (color1.lane<0>() > color1.lane<1>() && color1.lane<0>() > color1.lane<2>())
 	{
 		majcomp = 0;			// red is largest
 	}
-	else if (color1.g > color1.b)
+	else if (color1.lane<1>() > color1.lane<2>())
 	{
 		majcomp = 1;			// green is largest
 	}
@@ -1339,32 +1208,25 @@ static void quantize_hdr_rgb3(
 	switch (majcomp)
 	{
 	case 1:					// red-green swap
-		color0 = float4(color0.g, color0.r, color0.b, color0.a);
-		color1 = float4(color1.g, color1.r, color1.b, color1.a);
+		color0 = color0.swz<1, 0, 2, 3>();
+		color1 = color1.swz<1, 0, 2, 3>();
 		break;
 	case 2:					// red-blue swap
-		color0 = float4(color0.b, color0.g, color0.r, color0.a);
-		color1 = float4(color1.b, color1.g, color1.r, color1.a);
+		color0 = color0.swz<2, 1, 0, 3>();
+		color1 = color1.swz<2, 1, 0, 3>();
 		break;
 	default:
 		break;
 	}
 
-	float a_base = color1.r;
-	if (a_base < 0.0f)
-	{
-		a_base = 0.0f;
-	}
-	else if (a_base > 65535.0f)
-	{
-		a_base = 65535.0f;
-	}
+	float a_base = color1.lane<0>();
+	a_base = astc::clamp(a_base, 0.0f, 65535.0f);
 
-	float b0_base = a_base - color1.g;
-	float b1_base = a_base - color1.b;
-	float c_base = a_base - color0.r;
-	float d0_base = a_base - b0_base - c_base - color0.g;
-	float d1_base = a_base - b1_base - c_base - color0.b;
+	float b0_base = a_base - color1.lane<1>();
+	float b1_base = a_base - color1.lane<2>();
+	float c_base = a_base - color0.lane<0>();
+	float d0_base = a_base - b0_base - c_base - color0.lane<1>();
+	float d1_base = a_base - b1_base - c_base - color0.lane<2>();
 
 	// number of bits in the various fields in the various modes
 	static const int mode_bits[8][4] = {
@@ -1440,17 +1302,14 @@ static void quantize_hdr_rgb3(
 		int a_intval = astc::flt2int_rtn(a_base * mode_scale);
 		int a_lowbits = a_intval & 0xFF;
 
-		int a_quantval = color_quantization_tables[quantization_level][a_lowbits];
-		int a_uquantval = color_unquantization_tables[quantization_level][a_quantval];
+		int a_quantval = color_quant_tables[quant_level][a_lowbits];
+		int a_uquantval = color_unquant_tables[quant_level][a_quantval];
 		a_intval = (a_intval & ~0xFF) | a_uquantval;
-		float a_fval = a_intval * mode_rscale;
+		float a_fval = static_cast<float>(a_intval) * mode_rscale;
 
 		// next, recompute C, then quantize and unquantize it
-		float c_fval = a_fval - color0.r;
-		if (c_fval < 0.0f)
-			c_fval = 0.0f;
-		else if (c_fval > 65535.0f)
-			c_fval = 65535.0f;
+		float c_fval = a_fval - color0.lane<0>();
+		c_fval = astc::clamp(c_fval, 0.0f, 65535.0f);
 
 		int c_intval = astc::flt2int_rtn(c_fval * mode_scale);
 
@@ -1466,31 +1325,16 @@ static void quantize_hdr_rgb3(
 
 		int c_quantval;
 		int c_uquantval;
-		quantize_and_unquantize_retain_top_two_bits(quantization_level, c_lowbits, &c_quantval, &c_uquantval);
+		quantize_and_unquantize_retain_top_two_bits(quant_level, c_lowbits, &c_quantval, &c_uquantval);
 		c_intval = (c_intval & ~0x3F) | (c_uquantval & 0x3F);
-		c_fval = c_intval * mode_rscale;
+		c_fval = static_cast<float>(c_intval) * mode_rscale;
 
 		// next, recompute B0 and B1, then quantize and unquantize them
-		float b0_fval = a_fval - color1.g;
-		float b1_fval = a_fval - color1.b;
-		if (b0_fval < 0.0f)
-		{
-			b0_fval = 0.0f;
-		}
-		else if (b0_fval > 65535.0f)
-		{
-			b0_fval = 65535.0f;
-		}
-
-		if (b1_fval < 0.0f)
-		{
-			b1_fval = 0.0f;
-		}
-		else if (b1_fval > 65535.0f)
-		{
-			b1_fval = 65535.0f;
-		}
+		float b0_fval = a_fval - color1.lane<1>();
+		float b1_fval = a_fval - color1.lane<2>();
 
+		b0_fval = astc::clamp(b0_fval, 0.0f, 65535.0f);
+		b1_fval = astc::clamp(b1_fval, 0.0f, 65535.0f);
 		int b0_intval = astc::flt2int_rtn(b0_fval * mode_scale);
 		int b1_intval = astc::flt2int_rtn(b1_fval * mode_scale);
 
@@ -1549,36 +1393,21 @@ static void quantize_hdr_rgb3(
 		int b0_uquantval;
 		int b1_uquantval;
 
-		quantize_and_unquantize_retain_top_two_bits(quantization_level, b0_lowbits, &b0_quantval, &b0_uquantval);
+		quantize_and_unquantize_retain_top_two_bits(quant_level, b0_lowbits, &b0_quantval, &b0_uquantval);
 
-		quantize_and_unquantize_retain_top_two_bits(quantization_level, b1_lowbits, &b1_quantval, &b1_uquantval);
+		quantize_and_unquantize_retain_top_two_bits(quant_level, b1_lowbits, &b1_quantval, &b1_uquantval);
 
 		b0_intval = (b0_intval & ~0x3f) | (b0_uquantval & 0x3f);
 		b1_intval = (b1_intval & ~0x3f) | (b1_uquantval & 0x3f);
-		b0_fval = b0_intval * mode_rscale;
-		b1_fval = b1_intval * mode_rscale;
+		b0_fval = static_cast<float>(b0_intval) * mode_rscale;
+		b1_fval = static_cast<float>(b1_intval) * mode_rscale;
 
 		// finally, recompute D0 and D1, then quantize and unquantize them
-		float d0_fval = a_fval - b0_fval - c_fval - color0.g;
-		float d1_fval = a_fval - b1_fval - c_fval - color0.b;
+		float d0_fval = a_fval - b0_fval - c_fval - color0.lane<1>();
+		float d1_fval = a_fval - b1_fval - c_fval - color0.lane<2>();
 
-		if (d0_fval < -65535.0f)
-		{
-			d0_fval = -65535.0f;
-		}
-		else if (d0_fval > 65535.0f)
-		{
-			d0_fval = 65535.0f;
-		}
-
-		if (d1_fval < -65535.0f)
-		{
-			d1_fval = -65535.0f;
-		}
-		else if (d1_fval > 65535.0f)
-		{
-			d1_fval = 65535.0f;
-		}
+		d0_fval = astc::clamp(d0_fval, -65535.0f, 65535.0f);
+		d1_fval = astc::clamp(d1_fval, -65535.0f, 65535.0f);
 
 		int d0_intval = astc::flt2int_rtn(d0_fval * mode_scale);
 		int d1_intval = astc::flt2int_rtn(d1_fval * mode_scale);
@@ -1660,9 +1489,9 @@ static void quantize_hdr_rgb3(
 		int d0_uquantval;
 		int d1_uquantval;
 
-		quantize_and_unquantize_retain_top_four_bits(quantization_level, d0_lowbits, &d0_quantval, &d0_uquantval);
+		quantize_and_unquantize_retain_top_four_bits(quant_level, d0_lowbits, &d0_quantval, &d0_uquantval);
 
-		quantize_and_unquantize_retain_top_four_bits(quantization_level, d1_lowbits, &d1_quantval, &d1_uquantval);
+		quantize_and_unquantize_retain_top_four_bits(quant_level, d1_lowbits, &d1_quantval, &d1_uquantval);
 
 		output[0] = a_quantval;
 		output[1] = c_quantval;
@@ -1679,70 +1508,62 @@ static void quantize_hdr_rgb3(
 	// but usable. This representation is used if the light color is more than 4x the
 	// color value of the dark color.
 	float vals[6];
-	vals[0] = color0_bak.r;
-	vals[1] = color1_bak.r;
-	vals[2] = color0_bak.g;
-	vals[3] = color1_bak.g;
-	vals[4] = color0_bak.b;
-	vals[5] = color1_bak.b;
+	vals[0] = color0_bak.lane<0>();
+	vals[1] = color1_bak.lane<0>();
+	vals[2] = color0_bak.lane<1>();
+	vals[3] = color1_bak.lane<1>();
+	vals[4] = color0_bak.lane<2>();
+	vals[5] = color1_bak.lane<2>();
 
 	for (int i = 0; i < 6; i++)
 	{
-		if (vals[i] < 0.0f)
-		{
-			vals[i] = 0.0f;
-		}
-		else if (vals[i] > 65020.0f)
-		{
-			vals[i] = 65020.0f;
-		}
+		vals[i] = astc::clamp(vals[i], 0.0f, 65020.0f);
 	}
 
 	for (int i = 0; i < 4; i++)
 	{
 		int idx = astc::flt2int_rtn(vals[i] * 1.0f / 256.0f);
-		output[i] = color_quantization_tables[quantization_level][idx];
+		output[i] = color_quant_tables[quant_level][idx];
 	}
 
 	for (int i = 4; i < 6; i++)
 	{
 		int dummy;
 		int idx = astc::flt2int_rtn(vals[i] * 1.0f / 512.0f) + 128;
-		quantize_and_unquantize_retain_top_two_bits(quantization_level, idx, &(output[i]), &dummy);
+		quantize_and_unquantize_retain_top_two_bits(quant_level, idx, &(output[i]), &dummy);
 	}
 
 	return;
 }
 
 static void quantize_hdr_rgb_ldr_alpha3(
-	float4 color0,
-	float4 color1,
+	vfloat4 color0,
+	vfloat4 color1,
 	int output[8],
-	int quantization_level
+	int quant_level
 ) {
-	color0.a *= (1.0f / 257.0f);
-	color1.a *= (1.0f / 257.0f);
+	float scale = 1.0f / 257.0f;
 
-	quantize_hdr_rgb3(color0, color1, output, quantization_level);
+	float a0 = astc::clamp255f(color0.lane<3>() * scale);
+	float a1 = astc::clamp255f(color1.lane<3>() * scale);
 
-	float a0 = astc::clamp255f(color0.a);
-	float a1 = astc::clamp255f(color1.a);
-	int ai0 = color_quantization_tables[quantization_level][astc::flt2int_rtn(a0)];
-	int ai1 = color_quantization_tables[quantization_level][astc::flt2int_rtn(a1)];
+	int ai0 = color_quant_tables[quant_level][astc::flt2int_rtn(a0)];
+	int ai1 = color_quant_tables[quant_level][astc::flt2int_rtn(a1)];
 
 	output[6] = ai0;
 	output[7] = ai1;
+
+	quantize_hdr_rgb3(color0, color1, output, quant_level);
 }
 
 static void quantize_hdr_luminance_large_range3(
-	float4 color0,
-	float4 color1,
+	vfloat4 color0,
+	vfloat4 color1,
 	int output[2],
-	int quantization_level
+	int quant_level
 ) {
-
-	float lum1 = (color1.r + color1.g + color1.b) * (1.0f / 3.0f);
-	float lum0 = (color0.r + color0.g + color0.b) * (1.0f / 3.0f);
+	float lum0 = hadd_rgb_s(color0) * (1.0f / 3.0f);
+	float lum1 = hadd_rgb_s(color1) * (1.0f / 3.0f);
 
 	if (lum1 < lum0)
 	{
@@ -1758,45 +1579,15 @@ static void quantize_hdr_luminance_large_range3(
 	int upper_v0 = (ilum0 + 128) >> 8;
 	int upper_v1 = (ilum1 + 128) >> 8;
 
-	if (upper_v0 < 0)
-	{
-		upper_v0 = 0;
-	}
-	else if (upper_v0 > 255)
-	{
-		upper_v0 = 255;
-	}
-
-	if (upper_v1 < 0)
-	{
-		upper_v1 = 0;
-	}
-	else if (upper_v1 > 255)
-	{
-		upper_v1 = 255;
-	}
+	upper_v0 = astc::clamp(upper_v0, 0, 255);
+	upper_v1 = astc::clamp(upper_v1, 0, 255);
 
 	// find the closest encodable point in the lower half of the code-point space
 	int lower_v0 = (ilum1 + 256) >> 8;
 	int lower_v1 = ilum0 >> 8;
 
-	if (lower_v0 < 0)
-	{
-		lower_v0 = 0;
-	}
-	else if (lower_v0 > 255)
-	{
-		lower_v0 = 255;
-	}
-
-	if (lower_v1 < 0)
-	{
-		lower_v1 = 0;
-	}
-	else if (lower_v1 > 255)
-	{
-		lower_v1 = 255;
-	}
+	lower_v0 = astc::clamp(lower_v0, 0, 255);
+	lower_v1 = astc::clamp(lower_v1, 0, 255);
 
 	// determine the distance between the point in code-point space and the input value
 	int upper0_dec = upper_v0 << 8;
@@ -1825,18 +1616,18 @@ static void quantize_hdr_luminance_large_range3(
 	}
 
 	// OK; encode.
-	output[0] = color_quantization_tables[quantization_level][v0];
-	output[1] = color_quantization_tables[quantization_level][v1];
+	output[0] = color_quant_tables[quant_level][v0];
+	output[1] = color_quant_tables[quant_level][v1];
 }
 
-static int try_quantize_hdr_luminance_small_range3(
-	float4 color0,
-	float4 color1,
+static bool try_quantize_hdr_luminance_small_range3(
+	vfloat4 color0,
+	vfloat4 color1,
 	int output[2],
-	int quantization_level
+	int quant_level
 ) {
-	float lum1 = (color1.r + color1.g + color1.b) * (1.0f / 3.0f);
-	float lum0 = (color0.r + color0.g + color0.b) * (1.0f / 3.0f);
+	float lum0 = hadd_rgb_s(color0) * (1.0f / 3.0f);
+	float lum1 = hadd_rgb_s(color1) * (1.0f / 3.0f);
 
 	if (lum1 < lum0)
 	{
@@ -1851,7 +1642,7 @@ static int try_quantize_hdr_luminance_small_range3(
 	// difference of more than a factor-of-2 results in immediate failure.
 	if (ilum1 - ilum0 > 2048)
 	{
-		return 0;
+		return false;
 	}
 
 	int lowval, highval, diffval;
@@ -1863,126 +1654,76 @@ static int try_quantize_hdr_luminance_small_range3(
 	lowval = (ilum0 + 16) >> 5;
 	highval = (ilum1 + 16) >> 5;
 
-	if (lowval < 0)
-	{
-		lowval = 0;
-	}
-	else if (lowval > 2047)
-	{
-		lowval = 2047;
-	}
-
-	if (highval < 0)
-	{
-		highval = 0;
-	}
-	else if (highval > 2047)
-	{
-		highval = 2047;
-	}
+	lowval = astc::clamp(lowval, 0, 2047);
+	highval = astc::clamp(highval, 0, 2047);
 
 	v0 = lowval & 0x7F;
-	v0e = color_quantization_tables[quantization_level][v0];
-	v0d = color_unquantization_tables[quantization_level][v0e];
-	if ((v0d & 0x80) == 0x80)
-	{
-		goto LOW_PRECISION_SUBMODE;
-	}
-
-	lowval = (lowval & ~0x7F) | (v0d & 0x7F);
-	diffval = highval - lowval;
-	if (diffval < 0 || diffval > 15)
-	{
-		goto LOW_PRECISION_SUBMODE;
-	}
+	v0e = color_quant_tables[quant_level][v0];
+	v0d = color_unquant_tables[quant_level][v0e];
 
-	v1 = ((lowval >> 3) & 0xF0) | diffval;
-	v1e = color_quantization_tables[quantization_level][v1];
-	v1d = color_unquantization_tables[quantization_level][v1e];
-	if ((v1d & 0xF0) != (v1 & 0xF0))
+	if (v0d < 0x80)
 	{
-		goto LOW_PRECISION_SUBMODE;
+		lowval = (lowval & ~0x7F) | v0d;
+		diffval = highval - lowval;
+		if (diffval >= 0 && diffval <= 15)
+		{
+			v1 = ((lowval >> 3) & 0xF0) | diffval;
+			v1e = color_quant_tables[quant_level][v1];
+			v1d = color_unquant_tables[quant_level][v1e];
+			if ((v1d & 0xF0) == (v1 & 0xF0))
+			{
+				output[0] = v0e;
+				output[1] = v1e;
+				return true;
+			}
+		}
 	}
 
-	output[0] = v0e;
-	output[1] = v1e;
-	return 1;
-
 	// failed to encode the high-precision submode; well, then try to encode the
 	// low-precision submode.
-LOW_PRECISION_SUBMODE:
 
 	lowval = (ilum0 + 32) >> 6;
 	highval = (ilum1 + 32) >> 6;
-	if (lowval < 0)
-	{
-		lowval = 0;
-	}
-	else if (lowval > 1023)
-	{
-		lowval = 1023;
-	}
 
-	if (highval < 0)
-	{
-		highval = 0;
-	}
-	else if (highval > 1023)
-	{
-		highval = 1023;
-	}
+	lowval = astc::clamp(lowval, 0, 1023);
+	highval = astc::clamp(highval, 0, 1023);
 
 	v0 = (lowval & 0x7F) | 0x80;
-	v0e = color_quantization_tables[quantization_level][v0];
-	v0d = color_unquantization_tables[quantization_level][v0e];
+	v0e = color_quant_tables[quant_level][v0];
+	v0d = color_unquant_tables[quant_level][v0e];
 	if ((v0d & 0x80) == 0)
 	{
-		return 0;
+		return false;
 	}
 
 	lowval = (lowval & ~0x7F) | (v0d & 0x7F);
 	diffval = highval - lowval;
 	if (diffval < 0 || diffval > 31)
 	{
-		return 0;
+		return false;
 	}
 
 	v1 = ((lowval >> 2) & 0xE0) | diffval;
-	v1e = color_quantization_tables[quantization_level][v1];
-	v1d = color_unquantization_tables[quantization_level][v1e];
+	v1e = color_quant_tables[quant_level][v1];
+	v1d = color_unquant_tables[quant_level][v1e];
 	if ((v1d & 0xE0) != (v1 & 0xE0))
 	{
-		return 0;;
+		return false;
 	}
 
 	output[0] = v0e;
 	output[1] = v1e;
-	return 1;
+	return true;
 }
 
 static void quantize_hdr_alpha3(
 	float alpha0,
 	float alpha1,
 	int output[2],
-	int quantization_level
+	int quant_level
 ) {
-	if (alpha0 < 0)
-	{
-		alpha0 = 0;
-	}
-	else if (alpha0 > 65280)
-	{
-		alpha0 = 65280;
-	}
-
-	if (alpha1 < 0)
-	{
-		alpha1 = 0;
-	}
-	else if (alpha1 > 65280)
-	{
-		alpha1 = 65280;
-	}
+	alpha0 = astc::clamp(alpha0, 0.0f, 65280.0f);
+	alpha1 = astc::clamp(alpha1, 0.0f, 65280.0f);
 
 	int ialpha0 = astc::flt2int_rtn(alpha0);
 	int ialpha1 = astc::flt2int_rtn(alpha1);
@@ -1999,8 +1740,8 @@ static void quantize_hdr_alpha3(
 		val1 = (ialpha1 + (128 >> i)) >> (8 - i);
 
 		v6 = (val0 & 0x7F) | ((i & 1) << 7);
-		v6e = color_quantization_tables[quantization_level][v6];
-		v6d = color_unquantization_tables[quantization_level][v6e];
+		v6e = color_quant_tables[quant_level][v6];
+		v6d = color_unquant_tables[quant_level][v6e];
 
 		if ((v6 ^ v6d) & 0x80)
 		{
@@ -2018,10 +1759,10 @@ static void quantize_hdr_alpha3(
 		}
 
 		v7 = ((i & 2) << 6) | ((val0 >> 7) << (6 - i)) | (diffval & mask);
-		v7e = color_quantization_tables[quantization_level][v7];
-		v7d = color_unquantization_tables[quantization_level][v7e];
+		v7e = color_quant_tables[quant_level][v7];
+		v7d = color_unquant_tables[quant_level][v7e];
 
-		static const int testbits[3] = { 0xE0, 0xF0, 0xF8 };
+		static const int testbits[3] { 0xE0, 0xF0, 0xF8 };
 
 		if ((v7 ^ v7d) & testbits[i])
 		{
@@ -2039,8 +1780,8 @@ static void quantize_hdr_alpha3(
 	v6 = val0 | 0x80;
 	v7 = val1 | 0x80;
 
-	v6e = color_quantization_tables[quantization_level][v6];
-	v7e = color_quantization_tables[quantization_level][v7];
+	v6e = color_quant_tables[quant_level][v6];
+	v7e = color_quant_tables[quant_level][v7];
 	output[0] = v6e;
 	output[1] = v7e;
 
@@ -2048,13 +1789,13 @@ static void quantize_hdr_alpha3(
 }
 
 static void quantize_hdr_rgb_alpha3(
-	float4 color0,
-	float4 color1,
+	vfloat4 color0,
+	vfloat4 color1,
 	int output[8],
-	int quantization_level
+	int quant_level
 ) {
-	quantize_hdr_rgb3(color0, color1, output, quantization_level);
-	quantize_hdr_alpha3(color0.a, color1.a, output + 6, quantization_level);
+	quantize_hdr_rgb3(color0, color1, output, quant_level);
+	quantize_hdr_alpha3(color0.lane<3>(), color1.lane<3>(), output + 6, quant_level);
 }
 
 /*
@@ -2062,133 +1803,126 @@ static void quantize_hdr_rgb_alpha3(
 	delta-based representation; as such, it will report back the format it actually used.
 */
 int pack_color_endpoints(
-	float4 color0,
-	float4 color1,
-	float4 rgbs_color,
-	float4 rgbo_color,
+	vfloat4 color0,
+	vfloat4 color1,
+	vfloat4 rgbs_color,
+	vfloat4 rgbo_color,
 	int format,
 	int* output,
-	int quantization_level
+	int quant_level
 ) {
-	assert(quantization_level >= 0 && quantization_level < 21);
-	// we do not support negative colors.
-	color0.r = MAX(color0.r, 0.0f);
-	color0.g = MAX(color0.g, 0.0f);
-	color0.b = MAX(color0.b, 0.0f);
-	color0.a = MAX(color0.a, 0.0f);
+	assert(quant_level >= 0 && quant_level < 21);
 
-	color1.r = MAX(color1.r, 0.0f);
-	color1.g = MAX(color1.g, 0.0f);
-	color1.b = MAX(color1.b, 0.0f);
-	color1.a = MAX(color1.a, 0.0f);
+	// we do not support negative colors.
+	color0 = max(color0, 0.0f);
+	color1 = max(color1, 0.0f);
 
 	int retval = 0;
 
-	// TODO: Make format an endpoint_fmt enum type
 	switch (format)
 	{
 	case FMT_RGB:
-		if (quantization_level <= 18)
+		if (quant_level <= 18)
 		{
-			if (try_quantize_rgb_delta_blue_contract(color0, color1, output, quantization_level))
+			if (try_quantize_rgb_delta_blue_contract(color0, color1, output, quant_level))
 			{
 				retval = FMT_RGB_DELTA;
 				break;
 			}
-			if (try_quantize_rgb_delta(color0, color1, output, quantization_level))
+			if (try_quantize_rgb_delta(color0, color1, output, quant_level))
 			{
 				retval = FMT_RGB_DELTA;
 				break;
 			}
 		}
-		if (try_quantize_rgb_blue_contract(color0, color1, output, quantization_level))
+		if (try_quantize_rgb_blue_contract(color0, color1, output, quant_level))
 		{
 			retval = FMT_RGB;
 			break;
 		}
-		quantize_rgb(color0, color1, output, quantization_level);
+		quantize_rgb(color0, color1, output, quant_level);
 		retval = FMT_RGB;
 		break;
 
 	case FMT_RGBA:
-		if (quantization_level <= 18)
+		if (quant_level <= 18)
 		{
-			if (try_quantize_rgba_delta_blue_contract(color0, color1, output, quantization_level))
+			if (try_quantize_rgba_delta_blue_contract(color0, color1, output, quant_level))
 			{
 				retval = FMT_RGBA_DELTA;
 				break;
 			}
-			if (try_quantize_rgba_delta(color0, color1, output, quantization_level))
+			if (try_quantize_rgba_delta(color0, color1, output, quant_level))
 			{
 				retval = FMT_RGBA_DELTA;
 				break;
 			}
 		}
-		if (try_quantize_rgba_blue_contract(color0, color1, output, quantization_level))
+		if (try_quantize_rgba_blue_contract(color0, color1, output, quant_level))
 		{
 			retval = FMT_RGBA;
 			break;
 		}
-		quantize_rgba(color0, color1, output, quantization_level);
+		quantize_rgba(color0, color1, output, quant_level);
 		retval = FMT_RGBA;
 		break;
 
 	case FMT_RGB_SCALE:
-		quantize_rgbs_new(rgbs_color, output, quantization_level);
+		quantize_rgbs_new(rgbs_color, output, quant_level);
 		retval = FMT_RGB_SCALE;
 		break;
 
 	case FMT_HDR_RGB_SCALE:
-		quantize_hdr_rgbo3(rgbo_color, output, quantization_level);
+		quantize_hdr_rgbo3(rgbo_color, output, quant_level);
 		retval = FMT_HDR_RGB_SCALE;
 		break;
 
 	case FMT_HDR_RGB:
-		quantize_hdr_rgb3(color0, color1, output, quantization_level);
+		quantize_hdr_rgb3(color0, color1, output, quant_level);
 		retval = FMT_HDR_RGB;
 		break;
 
 	case FMT_RGB_SCALE_ALPHA:
-		quantize_rgbs_alpha_new(color0, color1, rgbs_color, output, quantization_level);
+		quantize_rgbs_alpha_new(color0, color1, rgbs_color, output, quant_level);
 		retval = FMT_RGB_SCALE_ALPHA;
 		break;
 
 	case FMT_HDR_LUMINANCE_SMALL_RANGE:
 	case FMT_HDR_LUMINANCE_LARGE_RANGE:
-		if (try_quantize_hdr_luminance_small_range3(color0, color1, output, quantization_level))
+		if (try_quantize_hdr_luminance_small_range3(color0, color1, output, quant_level))
 		{
 			retval = FMT_HDR_LUMINANCE_SMALL_RANGE;
 			break;
 		}
-		quantize_hdr_luminance_large_range3(color0, color1, output, quantization_level);
+		quantize_hdr_luminance_large_range3(color0, color1, output, quant_level);
 		retval = FMT_HDR_LUMINANCE_LARGE_RANGE;
 		break;
 
 	case FMT_LUMINANCE:
-		quantize_luminance(color0, color1, output, quantization_level);
+		quantize_luminance(color0, color1, output, quant_level);
 		retval = FMT_LUMINANCE;
 		break;
 
 	case FMT_LUMINANCE_ALPHA:
-		if (quantization_level <= 18)
+		if (quant_level <= 18)
 		{
-			if (try_quantize_luminance_alpha_delta(color0, color1, output, quantization_level))
+			if (try_quantize_luminance_alpha_delta(color0, color1, output, quant_level))
 			{
 				retval = FMT_LUMINANCE_ALPHA_DELTA;
 				break;
 			}
 		}
-		quantize_luminance_alpha(color0, color1, output, quantization_level);
+		quantize_luminance_alpha(color0, color1, output, quant_level);
 		retval = FMT_LUMINANCE_ALPHA;
 		break;
 
 	case FMT_HDR_RGB_LDR_ALPHA:
-		quantize_hdr_rgb_ldr_alpha3(color0, color1, output, quantization_level);
+		quantize_hdr_rgb_ldr_alpha3(color0, color1, output, quant_level);
 		retval = FMT_HDR_RGB_LDR_ALPHA;
 		break;
 
 	case FMT_HDR_RGBA:
-		quantize_hdr_rgb_alpha3(color0, color1, output, quantization_level);
+		quantize_hdr_rgb_alpha3(color0, color1, output, quant_level);
 		retval = FMT_HDR_RGBA;
 		break;
 	}
diff --git a/libkram/astc-encoder/astcenc_color_unquantize.cpp b/libkram/astc-encoder/astcenc_color_unquantize.cpp
index 2321f0d7..427c8817 100644
--- a/libkram/astc-encoder/astcenc_color_unquantize.cpp
+++ b/libkram/astc-encoder/astcenc_color_unquantize.cpp
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2020 Arm Limited
+// Copyright 2011-2021 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -15,308 +15,203 @@
 // under the License.
 // ----------------------------------------------------------------------------
 
+#include <utility>
+
 /**
  * @brief Functions for color unquantization.
  */
 
 #include "astcenc_internal.h"
 
-static int rgb_delta_unpack(
-	const int input[6],
-	int quantization_level,
-	uint4* output0,
-	uint4* output1
+static ASTCENC_SIMD_INLINE vint4 unquant_color(
+	int quant_level,
+	vint4 inputq
 ) {
-	// unquantize the color endpoints
-	int r0 = color_unquantization_tables[quantization_level][input[0]];
-	int g0 = color_unquantization_tables[quantization_level][input[2]];
-	int b0 = color_unquantization_tables[quantization_level][input[4]];
-
-	int r1 = color_unquantization_tables[quantization_level][input[1]];
-	int g1 = color_unquantization_tables[quantization_level][input[3]];
-	int b1 = color_unquantization_tables[quantization_level][input[5]];
-
-	// perform the bit-transfer procedure
-	r0 |= (r1 & 0x80) << 1;
-	g0 |= (g1 & 0x80) << 1;
-	b0 |= (b1 & 0x80) << 1;
-	r1 &= 0x7F;
-	g1 &= 0x7F;
-	b1 &= 0x7F;
-	if (r1 & 0x40)
-		r1 -= 0x80;
-	if (g1 & 0x40)
-		g1 -= 0x80;
-	if (b1 & 0x40)
-		b1 -= 0x80;
-
-	r0 >>= 1;
-	g0 >>= 1;
-	b0 >>= 1;
-	r1 >>= 1;
-	g1 >>= 1;
-	b1 >>= 1;
-
-	int rgbsum = r1 + g1 + b1;
-
-	r1 += r0;
-	g1 += g0;
-	b1 += b0;
-
-	int retval;
-
-	int r0e, g0e, b0e;
-	int r1e, g1e, b1e;
-
-	if (rgbsum >= 0)
-	{
-		r0e = r0;
-		g0e = g0;
-		b0e = b0;
+	const uint8_t* unq = color_unquant_tables[quant_level];
+	return vint4(unq[inputq.lane<0>()], unq[inputq.lane<1>()],
+	             unq[inputq.lane<2>()], unq[inputq.lane<3>()]);
+}
 
-		r1e = r1;
-		g1e = g1;
-		b1e = b1;
+static ASTCENC_SIMD_INLINE vint4 uncontract_color(
+	vint4 input
+) {
+	vmask4 mask(true, true, false, false);
+	vint4 bc0 = asr<1>(input + input.lane<2>());
+	return select(input, bc0, mask);
+}
 
-		retval = 0;
-	}
-	else
+static void rgba_delta_unpack(
+	vint4 input0q,
+	vint4 input1q,
+	int quant_level,
+	vint4& output0,
+	vint4& output1
+) {
+	// Unquantize color endpoints
+	vint4 input0 = unquant_color(quant_level, input0q);
+	vint4 input1 = unquant_color(quant_level, input1q);
+
+	// Perform bit-transfer
+	input0 = input0 | lsl<1>(input1 & 0x80);
+	input1 = input1 & 0x7F;
+	vmask4 mask = (input1 & 0x40) != vint4::zero();
+	input1 = select(input1, input1 - 0x80, mask);
+
+	// Scale
+	input0 = asr<1>(input0);
+	input1 = asr<1>(input1);
+
+	// Apply blue-uncontraction if needed
+	int rgb_sum = hadd_rgb_s(input1);
+	input1 = input1 + input0;
+	if (rgb_sum < 0)
 	{
-		r0e = (r1 + b1) >> 1;
-		g0e = (g1 + b1) >> 1;
-		b0e = b1;
-
-		r1e = (r0 + b0) >> 1;
-		g1e = (g0 + b0) >> 1;
-		b1e = b0;
-
-		retval = 1;
+		input0 = uncontract_color(input0);
+		input1 = uncontract_color(input1);
+		std::swap(input0, input1);
 	}
 
-	if (r0e < 0)
-		r0e = 0;
-	else if (r0e > 255)
-		r0e = 255;
-
-	if (g0e < 0)
-		g0e = 0;
-	else if (g0e > 255)
-		g0e = 255;
-
-	if (b0e < 0)
-		b0e = 0;
-	else if (b0e > 255)
-		b0e = 255;
-
-	if (r1e < 0)
-		r1e = 0;
-	else if (r1e > 255)
-		r1e = 255;
-
-	if (g1e < 0)
-		g1e = 0;
-	else if (g1e > 255)
-		g1e = 255;
-
-	if (b1e < 0)
-		b1e = 0;
-	else if (b1e > 255)
-		b1e = 255;
-
-	output0->r = r0e;
-	output0->g = g0e;
-	output0->b = b0e;
-	output0->a = 0xFF;
-
-	output1->r = r1e;
-	output1->g = g1e;
-	output1->b = b1e;
-	output1->a = 0xFF;
-
-	return retval;
+	output0 = clamp(0, 255, input0);
+	output1 = clamp(0, 255, input1);
 }
 
-static int rgb_unpack(
-	const int input[6],
-	int quantization_level,
-	uint4* output0,
-	uint4* output1
+static void rgb_delta_unpack(
+	vint4 input0q,
+	vint4 input1q,
+	int quant_level,
+	vint4& output0,
+	vint4& output1
 ) {
-	int ri0b = color_unquantization_tables[quantization_level][input[0]];
-	int ri1b = color_unquantization_tables[quantization_level][input[1]];
-	int gi0b = color_unquantization_tables[quantization_level][input[2]];
-	int gi1b = color_unquantization_tables[quantization_level][input[3]];
-	int bi0b = color_unquantization_tables[quantization_level][input[4]];
-	int bi1b = color_unquantization_tables[quantization_level][input[5]];
-
-	if (ri0b + gi0b + bi0b > ri1b + gi1b + bi1b)
-	{
-		// blue-contraction
-		ri0b = (ri0b + bi0b) >> 1;
-		gi0b = (gi0b + bi0b) >> 1;
-		ri1b = (ri1b + bi1b) >> 1;
-		gi1b = (gi1b + bi1b) >> 1;
-
-		output0->r = ri1b;
-		output0->g = gi1b;
-		output0->b = bi1b;
-		output0->a = 255;
-
-		output1->r = ri0b;
-		output1->g = gi0b;
-		output1->b = bi0b;
-		output1->a = 255;
-		return 1;
-	}
-	else
-	{
-		output0->r = ri0b;
-		output0->g = gi0b;
-		output0->b = bi0b;
-		output0->a = 255;
-
-		output1->r = ri1b;
-		output1->g = gi1b;
-		output1->b = bi1b;
-		output1->a = 255;
-		return 0;
-	}
+	rgba_delta_unpack(input0q, input1q, quant_level, output0, output1);
+	output0.set_lane<3>(255);
+	output1.set_lane<3>(255);
 }
 
 static void rgba_unpack(
-	const int input[8],
-	int quantization_level,
-	uint4* output0,
-	uint4* output1
+	vint4 input0q,
+	vint4 input1q,
+	int quant_level,
+	vint4& output0,
+	vint4& output1
 ) {
-	int order = rgb_unpack(input, quantization_level, output0, output1);
-	if (order == 0)
-	{
-		output0->a = color_unquantization_tables[quantization_level][input[6]];
-		output1->a = color_unquantization_tables[quantization_level][input[7]];
-	}
-	else
+	// Unquantize color endpoints
+	vint4 input0 = unquant_color(quant_level, input0q);
+	vint4 input1 = unquant_color(quant_level, input1q);
+
+	// Apply blue-uncontraction if needed
+	if (hadd_rgb_s(input0) > hadd_rgb_s(input1))
 	{
-		output0->a = color_unquantization_tables[quantization_level][input[7]];
-		output1->a = color_unquantization_tables[quantization_level][input[6]];
+		input0 = uncontract_color(input0);
+		input1 = uncontract_color(input1);
+		std::swap(input0, input1);
 	}
+
+	output0 = input0;
+	output1 = input1;
 }
 
-static void rgba_delta_unpack(
-	const int input[8],
-	int quantization_level,
-	uint4* output0,
-	uint4* output1
+static void rgb_unpack(
+	vint4 input0q,
+	vint4 input1q,
+	int quant_level,
+	vint4& output0,
+	vint4& output1
 ) {
-	int a0 = color_unquantization_tables[quantization_level][input[6]];
-	int a1 = color_unquantization_tables[quantization_level][input[7]];
-	a0 |= (a1 & 0x80) << 1;
-	a1 &= 0x7F;
-	if (a1 & 0x40)
-		a1 -= 0x80;
-	a0 >>= 1;
-	a1 >>= 1;
-	a1 += a0;
-
-	if (a1 < 0)
-		a1 = 0;
-	else if (a1 > 255)
-		a1 = 255;
-
-	int order = rgb_delta_unpack(input, quantization_level, output0, output1);
-	if (order == 0)
-	{
-		output0->a = a0;
-		output1->a = a1;
-	}
-	else
-	{
-		output0->a = a1;
-		output1->a = a0;
-	}
+	rgba_unpack(input0q, input1q, quant_level, output0, output1);
+	output0.set_lane<3>(255);
+	output1.set_lane<3>(255);
 }
 
-static void rgb_scale_unpack(
-	const int input[4],
-	int quantization_level,
-	uint4* output0,
-	uint4* output1
+static void rgb_scale_alpha_unpack(
+	vint4 input0q,
+	int alpha1q,
+	int scaleq,
+	int quant_level,
+	vint4& output0,
+	vint4& output1
 ) {
-	int ir = color_unquantization_tables[quantization_level][input[0]];
-	int ig = color_unquantization_tables[quantization_level][input[1]];
-	int ib = color_unquantization_tables[quantization_level][input[2]];
+	// Unquantize color endpoints
+	vint4 input = unquant_color(quant_level, input0q);
+	int alpha1 = color_unquant_tables[quant_level][alpha1q];
+	int scale = color_unquant_tables[quant_level][scaleq];
 
-	int iscale = color_unquantization_tables[quantization_level][input[3]];
+	output1 = input;
+	output1.set_lane<3>(alpha1);
 
-	*output1 = uint4(ir, ig, ib, 255);
-	*output0 = uint4((ir * iscale) >> 8, (ig * iscale) >> 8, (ib * iscale) >> 8, 255);
+	output0 = asr<8>(input * scale);
+	output0.set_lane<3>(input.lane<3>());
 }
 
-static void rgb_scale_alpha_unpack(
-	const int input[6],
-	int quantization_level,
-	uint4* output0,
-	uint4* output1
+static void rgb_scale_unpack(
+	vint4 input0q,
+	int scaleq,
+	int quant_level,
+	vint4& output0,
+	vint4& output1
 ) {
-	rgb_scale_unpack(input, quantization_level, output0, output1);
-	output0->a = color_unquantization_tables[quantization_level][input[4]];
-	output1->a = color_unquantization_tables[quantization_level][input[5]];
+	vint4 input = unquant_color(quant_level, input0q);
+	int scale = color_unquant_tables[quant_level][scaleq];
+
+	output1 = input;
+	output1.set_lane<3>(255);
+
+	output0 = asr<8>(input * scale);
+	output0.set_lane<3>(255);
 }
 
 static void luminance_unpack(
 	const int input[2],
-	int quantization_level,
-	uint4* output0,
-	uint4* output1
+	int quant_level,
+	vint4* output0,
+	vint4* output1
 ) {
-	int lum0 = color_unquantization_tables[quantization_level][input[0]];
-	int lum1 = color_unquantization_tables[quantization_level][input[1]];
-	*output0 = uint4(lum0, lum0, lum0, 255);
-	*output1 = uint4(lum1, lum1, lum1, 255);
+	int lum0 = color_unquant_tables[quant_level][input[0]];
+	int lum1 = color_unquant_tables[quant_level][input[1]];
+	*output0 = vint4(lum0, lum0, lum0, 255);
+	*output1 = vint4(lum1, lum1, lum1, 255);
 }
 
 static void luminance_delta_unpack(
 	const int input[2],
-	int quantization_level,
-	uint4* output0,
-	uint4* output1
+	int quant_level,
+	vint4* output0,
+	vint4* output1
 ) {
-	int v0 = color_unquantization_tables[quantization_level][input[0]];
-	int v1 = color_unquantization_tables[quantization_level][input[1]];
+	int v0 = color_unquant_tables[quant_level][input[0]];
+	int v1 = color_unquant_tables[quant_level][input[1]];
 	int l0 = (v0 >> 2) | (v1 & 0xC0);
 	int l1 = l0 + (v1 & 0x3F);
 
-	if (l1 > 255)
-		l1 = 255;
+	l1 = astc::min(l1, 255);
 
-	*output0 = uint4(l0, l0, l0, 255);
-	*output1 = uint4(l1, l1, l1, 255);
+	*output0 = vint4(l0, l0, l0, 255);
+	*output1 = vint4(l1, l1, l1, 255);
 }
 
 static void luminance_alpha_unpack(
 	const int input[4],
-	int quantization_level,
-	uint4* output0,
-	uint4* output1
+	int quant_level,
+	vint4* output0,
+	vint4* output1
 ) {
-	int lum0 = color_unquantization_tables[quantization_level][input[0]];
-	int lum1 = color_unquantization_tables[quantization_level][input[1]];
-	int alpha0 = color_unquantization_tables[quantization_level][input[2]];
-	int alpha1 = color_unquantization_tables[quantization_level][input[3]];
-	*output0 = uint4(lum0, lum0, lum0, alpha0);
-	*output1 = uint4(lum1, lum1, lum1, alpha1);
+	int lum0 = color_unquant_tables[quant_level][input[0]];
+	int lum1 = color_unquant_tables[quant_level][input[1]];
+	int alpha0 = color_unquant_tables[quant_level][input[2]];
+	int alpha1 = color_unquant_tables[quant_level][input[3]];
+	*output0 = vint4(lum0, lum0, lum0, alpha0);
+	*output1 = vint4(lum1, lum1, lum1, alpha1);
 }
 
 static void luminance_alpha_delta_unpack(
 	const int input[4],
-	int quantization_level,
-	uint4* output0,
-	uint4* output1
+	int quant_level,
+	vint4* output0,
+	vint4* output1
 ) {
-	int lum0 = color_unquantization_tables[quantization_level][input[0]];
-	int lum1 = color_unquantization_tables[quantization_level][input[1]];
-	int alpha0 = color_unquantization_tables[quantization_level][input[2]];
-	int alpha1 = color_unquantization_tables[quantization_level][input[3]];
+	int lum0 = color_unquant_tables[quant_level][input[0]];
+	int lum1 = color_unquant_tables[quant_level][input[1]];
+	int alpha0 = color_unquant_tables[quant_level][input[2]];
+	int alpha1 = color_unquant_tables[quant_level][input[3]];
 
 	lum0 |= (lum1 & 0x80) << 1;
 	alpha0 |= (alpha1 & 0x80) << 1;
@@ -334,31 +229,24 @@ static void luminance_alpha_delta_unpack(
 	lum1 += lum0;
 	alpha1 += alpha0;
 
-	if (lum1 < 0)
-		lum1 = 0;
-	else if (lum1 > 255)
-		lum1 = 255;
+	lum1 = astc::clamp(lum1, 0, 255);
+	alpha1 = astc::clamp(alpha1, 0, 255);
 
-	if (alpha1 < 0)
-		alpha1 = 0;
-	else if (alpha1 > 255)
-		alpha1 = 255;
-
-	*output0 = uint4(lum0, lum0, lum0, alpha0);
-	*output1 = uint4(lum1, lum1, lum1, alpha1);
+	*output0 = vint4(lum0, lum0, lum0, alpha0);
+	*output1 = vint4(lum1, lum1, lum1, alpha1);
 }
 
 // RGB-offset format
 static void hdr_rgbo_unpack3(
 	const int input[4],
-	int quantization_level,
-	uint4* output0,
-	uint4* output1
+	int quant_level,
+	vint4* output0,
+	vint4* output1
 ) {
-	int v0 = color_unquantization_tables[quantization_level][input[0]];
-	int v1 = color_unquantization_tables[quantization_level][input[1]];
-	int v2 = color_unquantization_tables[quantization_level][input[2]];
-	int v3 = color_unquantization_tables[quantization_level][input[3]];
+	int v0 = color_unquant_tables[quant_level][input[0]];
+	int v1 = color_unquant_tables[quant_level][input[1]];
+	int v2 = color_unquant_tables[quant_level][input[2]];
+	int v3 = color_unquant_tables[quant_level][input[3]];
 
 	int modeval = ((v0 & 0xC0) >> 6) | (((v1 & 0x80) >> 7) << 2) | (((v2 & 0x80) >> 7) << 3);
 
@@ -437,7 +325,7 @@ static void hdr_rgbo_unpack3(
 		red |= bit5 << 10;
 
 	// expand to 12 bits.
-	static const int shamts[6] = { 1, 1, 2, 3, 4, 5 };
+	static const int shamts[6] { 1, 1, 2, 3, 4, 5 };
 	int shamt = shamts[mode];
 	red <<= shamt;
 	green <<= shamt;
@@ -489,23 +377,23 @@ static void hdr_rgbo_unpack3(
 	if (blue0 < 0)
 		blue0 = 0;
 
-	*output0 = uint4(red0 << 4, green0 << 4, blue0 << 4, 0x7800);
-	*output1 = uint4(red << 4, green << 4, blue << 4, 0x7800);
+	*output0 = vint4(red0 << 4, green0 << 4, blue0 << 4, 0x7800);
+	*output1 = vint4(red << 4, green << 4, blue << 4, 0x7800);
 }
 
 static void hdr_rgb_unpack3(
 	const int input[6],
-	int quantization_level,
-	uint4* output0,
-	uint4* output1
+	int quant_level,
+	vint4* output0,
+	vint4* output1
 ) {
 
-	int v0 = color_unquantization_tables[quantization_level][input[0]];
-	int v1 = color_unquantization_tables[quantization_level][input[1]];
-	int v2 = color_unquantization_tables[quantization_level][input[2]];
-	int v3 = color_unquantization_tables[quantization_level][input[3]];
-	int v4 = color_unquantization_tables[quantization_level][input[4]];
-	int v5 = color_unquantization_tables[quantization_level][input[5]];
+	int v0 = color_unquant_tables[quant_level][input[0]];
+	int v1 = color_unquant_tables[quant_level][input[1]];
+	int v2 = color_unquant_tables[quant_level][input[2]];
+	int v3 = color_unquant_tables[quant_level][input[3]];
+	int v4 = color_unquant_tables[quant_level][input[4]];
+	int v5 = color_unquant_tables[quant_level][input[5]];
 
 	// extract all the fixed-placement bitfields
 	int modeval = ((v1 & 0x80) >> 7) | (((v2 & 0x80) >> 7) << 1) | (((v3 & 0x80) >> 7) << 2);
@@ -514,8 +402,8 @@ static void hdr_rgb_unpack3(
 
 	if (majcomp == 3)
 	{
-		*output0 = uint4(v0 << 8, v2 << 8, (v4 & 0x7F) << 9, 0x7800);
-		*output1 = uint4(v1 << 8, v3 << 8, (v5 & 0x7F) << 9, 0x7800);
+		*output0 = vint4(v0 << 8, v2 << 8, (v4 & 0x7F) << 9, 0x7800);
+		*output1 = vint4(v1 << 8, v3 << 8, (v5 & 0x7F) << 9, 0x7800);
 		return;
 	}
 
@@ -527,13 +415,12 @@ static void hdr_rgb_unpack3(
 	int d1 = v5 & 0x7f;
 
 	// get hold of the number of bits in 'd0' and 'd1'
-	static const int dbits_tab[8] = { 7, 6, 7, 6, 5, 6, 5, 6 };
+	static const int dbits_tab[8] { 7, 6, 7, 6, 5, 6, 5, 6 };
 	int dbits = dbits_tab[modeval];
 
 	// extract six variable-placement bits
 	int bit0 = (v2 >> 6) & 1;
 	int bit1 = (v3 >> 6) & 1;
-
 	int bit2 = (v4 >> 6) & 1;
 	int bit3 = (v5 >> 6) & 1;
 	int bit4 = (v4 >> 5) & 1;
@@ -565,23 +452,28 @@ static void hdr_rgb_unpack3(
 		c |= bit2 << 7;
 
 	if (ohmod & 0x5B)
+	{
 		b0 |= bit0 << 6;
-	if (ohmod & 0x5B)
 		b1 |= bit1 << 6;
+	}
 
 	if (ohmod & 0x12)
+	{
 		b0 |= bit2 << 7;
-	if (ohmod & 0x12)
 		b1 |= bit3 << 7;
+	}
 
 	if (ohmod & 0xAF)
+	{
 		d0 |= bit4 << 5;
-	if (ohmod & 0xAF)
 		d1 |= bit5 << 5;
+	}
+
 	if (ohmod & 0x5)
+	{
 		d0 |= bit2 << 6;
-	if (ohmod & 0x5)
 		d1 |= bit3 << 6;
+	}
 
 	// sign-extend 'd0' and 'd1'
 	// note: this code assumes that signed right-shift actually sign-fills, not zero-fills.
@@ -613,35 +505,13 @@ static void hdr_rgb_unpack3(
 	int blue0 = a - b1 - c - d1;
 
 	// clamp the color components to [0,2^12 - 1]
-	if (red0 < 0)
-		red0 = 0;
-	else if (red0 > 0xFFF)
-		red0 = 0xFFF;
+	red0 = astc::clamp(red0, 0, 4095);
+	green0 = astc::clamp(green0, 0, 4095);
+	blue0 = astc::clamp(blue0, 0, 4095);
 
-	if (green0 < 0)
-		green0 = 0;
-	else if (green0 > 0xFFF)
-		green0 = 0xFFF;
-
-	if (blue0 < 0)
-		blue0 = 0;
-	else if (blue0 > 0xFFF)
-		blue0 = 0xFFF;
-
-	if (red1 < 0)
-		red1 = 0;
-	else if (red1 > 0xFFF)
-		red1 = 0xFFF;
-
-	if (green1 < 0)
-		green1 = 0;
-	else if (green1 > 0xFFF)
-		green1 = 0xFFF;
-
-	if (blue1 < 0)
-		blue1 = 0;
-	else if (blue1 > 0xFFF)
-		blue1 = 0xFFF;
+	red1 = astc::clamp(red1, 0, 4095);
+	green1 = astc::clamp(green1, 0, 4095);
+	blue1 = astc::clamp(blue1, 0, 4095);
 
 	// switch around the color components
 	int temp0, temp1;
@@ -667,32 +537,32 @@ static void hdr_rgb_unpack3(
 		break;
 	}
 
-	*output0 = uint4(red0 << 4, green0 << 4, blue0 << 4, 0x7800);
-	*output1 = uint4(red1 << 4, green1 << 4, blue1 << 4, 0x7800);
+	*output0 = vint4(red0 << 4, green0 << 4, blue0 << 4, 0x7800);
+	*output1 = vint4(red1 << 4, green1 << 4, blue1 << 4, 0x7800);
 }
 
 static void hdr_rgb_ldr_alpha_unpack3(
 	const int input[8],
-	int quantization_level,
-	uint4* output0,
-	uint4* output1
+	int quant_level,
+	vint4* output0,
+	vint4* output1
 ) {
-	hdr_rgb_unpack3(input, quantization_level, output0, output1);
+	hdr_rgb_unpack3(input, quant_level, output0, output1);
 
-	int v6 = color_unquantization_tables[quantization_level][input[6]];
-	int v7 = color_unquantization_tables[quantization_level][input[7]];
-	output0->a = v6;
-	output1->a = v7;
+	int v6 = color_unquant_tables[quant_level][input[6]];
+	int v7 = color_unquant_tables[quant_level][input[7]];
+	output0->set_lane<3>(v6);
+	output1->set_lane<3>(v7);
 }
 
 static void hdr_luminance_small_range_unpack(
 	const int input[2],
-	int quantization_level,
-	uint4* output0,
-	uint4* output1
+	int quant_level,
+	vint4* output0,
+	vint4* output1
 ) {
-	int v0 = color_unquantization_tables[quantization_level][input[0]];
-	int v1 = color_unquantization_tables[quantization_level][input[1]];
+	int v0 = color_unquant_tables[quant_level][input[0]];
+	int v1 = color_unquant_tables[quant_level][input[1]];
 
 	int y0, y1;
 	if (v0 & 0x80)
@@ -710,18 +580,18 @@ static void hdr_luminance_small_range_unpack(
 	if (y1 > 0xFFF)
 		y1 = 0xFFF;
 
-	*output0 = uint4(y0 << 4, y0 << 4, y0 << 4, 0x7800);
-	*output1 = uint4(y1 << 4, y1 << 4, y1 << 4, 0x7800);
+	*output0 = vint4(y0 << 4, y0 << 4, y0 << 4, 0x7800);
+	*output1 = vint4(y1 << 4, y1 << 4, y1 << 4, 0x7800);
 }
 
 static void hdr_luminance_large_range_unpack(
 	const int input[2],
-	int quantization_level,
-	uint4* output0,
-	uint4* output1
+	int quant_level,
+	vint4* output0,
+	vint4* output1
 ) {
-	int v0 = color_unquantization_tables[quantization_level][input[0]];
-	int v1 = color_unquantization_tables[quantization_level][input[1]];
+	int v0 = color_unquant_tables[quant_level][input[0]];
+	int v1 = color_unquant_tables[quant_level][input[1]];
 
 	int y0, y1;
 	if (v1 >= v0)
@@ -734,19 +604,19 @@ static void hdr_luminance_large_range_unpack(
 		y0 = (v1 << 4) + 8;
 		y1 = (v0 << 4) - 8;
 	}
-	*output0 = uint4(y0 << 4, y0 << 4, y0 << 4, 0x7800);
-	*output1 = uint4(y1 << 4, y1 << 4, y1 << 4, 0x7800);
+	*output0 = vint4(y0 << 4, y0 << 4, y0 << 4, 0x7800);
+	*output1 = vint4(y1 << 4, y1 << 4, y1 << 4, 0x7800);
 }
 
 static void hdr_alpha_unpack(
 	const int input[2],
-	int quantization_level,
+	int quant_level,
 	int* output0,
 	int* output1
 ) {
 
-	int v6 = color_unquantization_tables[quantization_level][input[0]];
-	int v7 = color_unquantization_tables[quantization_level][input[1]];
+	int v6 = color_unquant_tables[quant_level][input[0]];
+	int v7 = color_unquant_tables[quant_level][input[1]];
 
 	int selector = ((v6 >> 7) & 1) | ((v7 >> 6) & 2);
 	v6 &= 0x7F;
@@ -781,226 +651,188 @@ static void hdr_alpha_unpack(
 
 static void hdr_rgb_hdr_alpha_unpack3(
 	const int input[8],
-	int quantization_level,
-	uint4* output0,
-	uint4* output1
+	int quant_level,
+	vint4* output0,
+	vint4* output1
 ) {
-	hdr_rgb_unpack3(input, quantization_level, output0, output1);
+	hdr_rgb_unpack3(input, quant_level, output0, output1);
 
 	int alpha0, alpha1;
-	hdr_alpha_unpack(input + 6, quantization_level, &alpha0, &alpha1);
+	hdr_alpha_unpack(input + 6, quant_level, &alpha0, &alpha1);
 
-	output0->a = alpha0;
-	output1->a = alpha1;
+	output0->set_lane<3>(alpha0);
+	output1->set_lane<3>(alpha1);
 }
 
 void unpack_color_endpoints(
 	astcenc_profile decode_mode,
 	int format,
-	int quantization_level,
+	int quant_level,
 	const int* input,
 	int* rgb_hdr,
 	int* alpha_hdr,
 	int* nan_endpoint,
-	uint4* output0,
-	uint4* output1
+	vint4* output0,
+	vint4* output1
 ) {
+	// TODO: Make these bools ...
+
+	// Assume no NaNs and LDR endpoints
+
+	// TODO: Review use of NaN endpoint. It's never set for HDR images ...
 	*nan_endpoint = 0;
+	*rgb_hdr = 0;
+	*alpha_hdr = 0;
+
 
-	// TODO: Make format the correct enum type
 	switch (format)
 	{
 	case FMT_LUMINANCE:
-		*rgb_hdr = 0;
-		*alpha_hdr = 0;
-		luminance_unpack(input, quantization_level, output0, output1);
+		luminance_unpack(input, quant_level, output0, output1);
 		break;
 
 	case FMT_LUMINANCE_DELTA:
-		*rgb_hdr = 0;
-		*alpha_hdr = 0;
-		luminance_delta_unpack(input, quantization_level, output0, output1);
+		luminance_delta_unpack(input, quant_level, output0, output1);
 		break;
 
 	case FMT_HDR_LUMINANCE_SMALL_RANGE:
 		*rgb_hdr = 1;
 		*alpha_hdr = -1;
-		hdr_luminance_small_range_unpack(input, quantization_level, output0, output1);
+		hdr_luminance_small_range_unpack(input, quant_level, output0, output1);
 		break;
 
 	case FMT_HDR_LUMINANCE_LARGE_RANGE:
 		*rgb_hdr = 1;
 		*alpha_hdr = -1;
-		hdr_luminance_large_range_unpack(input, quantization_level, output0, output1);
+		hdr_luminance_large_range_unpack(input, quant_level, output0, output1);
 		break;
 
 	case FMT_LUMINANCE_ALPHA:
-		*rgb_hdr = 0;
-		*alpha_hdr = 0;
-		luminance_alpha_unpack(input, quantization_level, output0, output1);
+		luminance_alpha_unpack(input, quant_level, output0, output1);
 		break;
 
 	case FMT_LUMINANCE_ALPHA_DELTA:
-		*rgb_hdr = 0;
-		*alpha_hdr = 0;
-		luminance_alpha_delta_unpack(input, quantization_level, output0, output1);
+		luminance_alpha_delta_unpack(input, quant_level, output0, output1);
 		break;
 
 	case FMT_RGB_SCALE:
-		*rgb_hdr = 0;
-		*alpha_hdr = 0;
-		rgb_scale_unpack(input, quantization_level, output0, output1);
+		{
+			vint4 input0q(input[0], input[1], input[2], 0);
+			int scale = input[3];
+			rgb_scale_unpack(input0q, scale, quant_level, *output0, *output1);
+		}
 		break;
 
 	case FMT_RGB_SCALE_ALPHA:
-		*rgb_hdr = 0;
-		*alpha_hdr = 0;
-		rgb_scale_alpha_unpack(input, quantization_level, output0, output1);
+		{
+			vint4 input0q(input[0], input[1], input[2], input[4]);
+			int alpha1q = input[5];
+			int scaleq = input[3];
+			rgb_scale_alpha_unpack(input0q, alpha1q, scaleq, quant_level, *output0, *output1);
+		}
 		break;
 
 	case FMT_HDR_RGB_SCALE:
 		*rgb_hdr = 1;
 		*alpha_hdr = -1;
-		hdr_rgbo_unpack3(input, quantization_level, output0, output1);
+		hdr_rgbo_unpack3(input, quant_level, output0, output1);
 		break;
 
 	case FMT_RGB:
-		*rgb_hdr = 0;
-		*alpha_hdr = 0;
-		rgb_unpack(input, quantization_level, output0, output1);
+		{
+			vint4 input0q(input[0], input[2], input[4], 0);
+			vint4 input1q(input[1], input[3], input[5], 0);
+			rgb_unpack(input0q, input1q, quant_level, *output0, *output1);
+		}
 		break;
 
 	case FMT_RGB_DELTA:
-		*rgb_hdr = 0;
-		*alpha_hdr = 0;
-		rgb_delta_unpack(input, quantization_level, output0, output1);
+		{
+			vint4 input0q(input[0], input[2], input[4], 0);
+			vint4 input1q(input[1], input[3], input[5], 0);
+			rgb_delta_unpack(input0q, input1q, quant_level, *output0, *output1);
+		}
 		break;
 
 	case FMT_HDR_RGB:
 		*rgb_hdr = 1;
 		*alpha_hdr = -1;
-		hdr_rgb_unpack3(input, quantization_level, output0, output1);
+		hdr_rgb_unpack3(input, quant_level, output0, output1);
 		break;
 
 	case FMT_RGBA:
-		*rgb_hdr = 0;
-		*alpha_hdr = 0;
-		rgba_unpack(input, quantization_level, output0, output1);
+		{
+			vint4 input0q(input[0], input[2], input[4], input[6]);
+			vint4 input1q(input[1], input[3], input[5], input[7]);
+			rgba_unpack(input0q, input1q, quant_level, *output0, *output1);
+		}
 		break;
 
 	case FMT_RGBA_DELTA:
-		*rgb_hdr = 0;
-		*alpha_hdr = 0;
-		rgba_delta_unpack(input, quantization_level, output0, output1);
+		{
+			vint4 input0q(input[0], input[2], input[4], input[6]);
+			vint4 input1q(input[1], input[3], input[5], input[7]);
+			rgba_delta_unpack(input0q, input1q, quant_level, *output0, *output1);
+		}
 		break;
 
 	case FMT_HDR_RGB_LDR_ALPHA:
 		*rgb_hdr = 1;
-		*alpha_hdr = 0;
-		hdr_rgb_ldr_alpha_unpack3(input, quantization_level, output0, output1);
+		hdr_rgb_ldr_alpha_unpack3(input, quant_level, output0, output1);
 		break;
 
 	case FMT_HDR_RGBA:
 		*rgb_hdr = 1;
 		*alpha_hdr = 1;
-		hdr_rgb_hdr_alpha_unpack3(input, quantization_level, output0, output1);
+		hdr_rgb_hdr_alpha_unpack3(input, quant_level, output0, output1);
 		break;
 	}
 
+	// Assign a correct default alpha
 	if (*alpha_hdr == -1)
 	{
 		if (decode_mode == ASTCENC_PRF_HDR)
 		{
-			output0->a = 0x7800;
-			output1->a = 0x7800;
+			output0->set_lane<3>(0x7800);
+			output1->set_lane<3>(0x7800);
 			*alpha_hdr = 1;
 		}
 		else
 		{
-			output0->a = 0x00FF;
-			output1->a = 0x00FF;
+			output0->set_lane<3>(0x00FF);
+			output1->set_lane<3>(0x00FF);
 			*alpha_hdr = 0;
 		}
 	}
 
-	switch (decode_mode)
+	vint4 ldr_scale(257);
+	vint4 hdr_scale(1);
+	vint4 output_scale = ldr_scale;
+
+	// An LDR profile image
+	if ((decode_mode == ASTCENC_PRF_LDR) ||
+	    (decode_mode == ASTCENC_PRF_LDR_SRGB))
 	{
-	case ASTCENC_PRF_LDR_SRGB:
+		// Also matches HDR alpha, as cannot have HDR alpha without HDR RGB
 		if (*rgb_hdr == 1)
 		{
-			output0->r = 0xFF00;
-			output0->g = 0x0000;
-			output0->b = 0xFF00;
-			output0->a = 0xFF00;
-
-			output1->r = 0xFF00;
-			output1->g = 0x0000;
-			output1->b = 0xFF00;
-			output1->a = 0xFF00;
-		}
-		else
-		{
-			output0->r *= 257;
-			output0->g *= 257;
-			output0->b *= 257;
-			output0->a *= 257;
-
-			output1->r *= 257;
-			output1->g *= 257;
-			output1->b *= 257;
-			output1->a *= 257;
-		}
-		*rgb_hdr = 0;
-		*alpha_hdr = 0;
-		break;
+			*output0 = vint4(0xFF00, 0x0000, 0xFF00, 0xFF00);
+			*output1 = vint4(0xFF00, 0x0000, 0xFF00, 0xFF00);
+			output_scale = hdr_scale;
 
-	case ASTCENC_PRF_LDR:
-		if (*rgb_hdr == 1)
-		{
-			output0->r = 0xFFFF;
-			output0->g = 0xFFFF;
-			output0->b = 0xFFFF;
-			output0->a = 0xFFFF;
-
-			output1->r = 0xFFFF;
-			output1->g = 0xFFFF;
-			output1->b = 0xFFFF;
-			output1->a = 0xFFFF;
-			*nan_endpoint = 1;
-		}
-		else
-		{
-			output0->r *= 257;
-			output0->g *= 257;
-			output0->b *= 257;
-			output0->a *= 257;
-
-			output1->r *= 257;
-			output1->g *= 257;
-			output1->b *= 257;
-			output1->a *= 257;
-		}
-		*rgb_hdr = 0;
-		*alpha_hdr = 0;
-		break;
-
-	case ASTCENC_PRF_HDR_RGB_LDR_A:
-	case ASTCENC_PRF_HDR:
-		if (*rgb_hdr == 0)
-		{
-			output0->r *= 257;
-			output0->g *= 257;
-			output0->b *= 257;
-
-			output1->r *= 257;
-			output1->g *= 257;
-			output1->b *= 257;
-		}
-		if (*alpha_hdr == 0)
-		{
-			output0->a *= 257;
-			output1->a *= 257;
+			*rgb_hdr = 0;
+			*alpha_hdr = 0;
 		}
-		break;
 	}
+	// An HDR profile image
+	else
+	{
+		bool hrgb = *rgb_hdr == 1;
+		bool ha = *alpha_hdr == 1;
+		vmask4 hdr_lanes(hrgb, hrgb, hrgb, ha);
+		output_scale = select(ldr_scale, hdr_scale, hdr_lanes);
+	}
+
+	*output0 = *output0 * output_scale;
+	*output1 = *output1 * output_scale;
 }
diff --git a/libkram/astc-encoder/astcenc_compress_symbolic.cpp b/libkram/astc-encoder/astcenc_compress_symbolic.cpp
index f75471c7..5b5e5519 100644
--- a/libkram/astc-encoder/astcenc_compress_symbolic.cpp
+++ b/libkram/astc-encoder/astcenc_compress_symbolic.cpp
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2020 Arm Limited
+// Copyright 2011-2021 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -22,6 +22,7 @@
  */
 
 #include "astcenc_internal.h"
+#include "astcenc_diagnostic_trace.h"
 
 #include <cassert>
 #include <cstring>
@@ -57,40 +58,42 @@ static int realign_weights(
 	pt += scb->partition_index;
 
 	// Get the quantization table
-	const int packed_index = bsd->block_mode_to_packed[scb->block_mode];
-	assert(packed_index >= 0 && packed_index < bsd->block_mode_packed_count);
-	const block_mode& bm = bsd->block_modes_packed[packed_index];
-	int weight_quantization_level = bm.quantization_mode;
-	const quantization_and_transfer_table *qat = &(quant_and_xfer_tables[weight_quantization_level]);
+	const int packed_index = bsd->block_mode_packed_index[scb->block_mode];
+	assert(packed_index >= 0 && packed_index < bsd->block_mode_count);
+	const block_mode& bm = bsd->block_modes[packed_index];
+	int weight_quant_level = bm.quant_mode;
+	const quantization_and_transfer_table *qat = &(quant_and_xfer_tables[weight_quant_level]);
 
 	// Get the decimation table
-	const decimation_table *const *ixtab2 = bsd->decimation_tables;
-	const decimation_table *it = ixtab2[bm.decimation_mode];
-	int weight_count = it->num_weights;
+	const decimation_table* dt = bsd->decimation_tables[bm.decimation_mode];
+	int weight_count = dt->weight_count;
 
 	int max_plane = bm.is_dual_plane;
-	int plane2_component = max_plane ? scb->plane2_color_component : 0;
-	int plane_mask = max_plane ? 1 << plane2_component : 0;
+	int plane2_component = bm.is_dual_plane ? scb->plane2_color_component : -1;
+	vmask4 plane_mask = vint4::lane_id() == vint4(plane2_component);
 
 	// Decode the color endpoints
 	int rgb_hdr;
 	int alpha_hdr;
 	int nan_endpoint;
-	int4 endpnt0[4];
-	int4 endpnt1[4];
-	float4 endpnt0f[4];
-	float4 offset[4];
+	vint4 endpnt0[4];
+	vint4 endpnt1[4];
+	vfloat4 endpnt0f[4];
+	vfloat4 offset[4];
+
+	promise(partition_count > 0);
+	promise(weight_count > 0);
+	promise(max_plane >= 0);
 
 	for (int pa_idx = 0; pa_idx < partition_count; pa_idx++)
 	{
 		unpack_color_endpoints(decode_mode,
 		                       scb->color_formats[pa_idx],
-		                       scb->color_quantization_level,
+		                       scb->color_quant_level,
 		                       scb->color_values[pa_idx],
 		                       &rgb_hdr, &alpha_hdr, &nan_endpoint,
-		                       // TODO: Fix these casts ...
-		                       reinterpret_cast<uint4*>(&endpnt0[pa_idx]),
-		                       reinterpret_cast<uint4*>(&endpnt1[pa_idx]));
+		                       &endpnt0[pa_idx],
+		                       &endpnt1[pa_idx]);
 	}
 
 	uint8_t uq_pl_weights[MAX_WEIGHTS_PER_BLOCK];
@@ -103,16 +106,11 @@ static int realign_weights(
 		for (int pa_idx = 0; pa_idx < partition_count; pa_idx++)
 		{
 			// Compute the endpoint delta for all channels in current plane
-			int4 epd = endpnt1[pa_idx] - endpnt0[pa_idx];
-
-			if (plane_mask & 1) epd.r = 0;
-			if (plane_mask & 2) epd.g = 0;
-			if (plane_mask & 4) epd.b = 0;
-			if (plane_mask & 8) epd.a = 0;
+			vint4 epd = endpnt1[pa_idx] - endpnt0[pa_idx];
+			epd = select(epd, vint4::zero(), plane_mask);
 
-			endpnt0f[pa_idx] = float4((float)endpnt0[pa_idx].r, (float)endpnt0[pa_idx].g,
-			                          (float)endpnt0[pa_idx].b, (float)endpnt0[pa_idx].a);
-			offset[pa_idx] = float4((float)epd.r, (float)epd.g, (float)epd.b, (float)epd.a);
+			endpnt0f[pa_idx] = int_to_float(endpnt0[pa_idx]);
+			offset[pa_idx] = int_to_float(epd);
 			offset[pa_idx] = offset[pa_idx] * (1.0f / 64.0f);
 		}
 
@@ -139,42 +137,42 @@ static int realign_weights(
 			float down_error = 0.0f;
 
 			// Interpolate the colors to create the diffs
-			int texels_to_evaluate = it->weight_num_texels[we_idx];
+			int texels_to_evaluate = dt->weight_texel_count[we_idx];
+			promise(texels_to_evaluate > 0);
 			for (int te_idx = 0; te_idx < texels_to_evaluate; te_idx++)
 			{
-				int texel = it->weight_texel[we_idx][te_idx];
-				const uint8_t *texel_weights = it->texel_weights_texel[we_idx][te_idx];
-				const float *texel_weights_float = it->texel_weights_float_texel[we_idx][te_idx];
+				int texel = dt->weight_texel[te_idx][we_idx];
+				const uint8_t *texel_weights = dt->texel_weights_texel[we_idx][te_idx];
+				const float *texel_weights_float = dt->texel_weights_float_texel[we_idx][te_idx];
 				float twf0 = texel_weights_float[0];
 				float weight_base =
-				    ((uqw * twf0
-				    + uq_pl_weights[texel_weights[1]]  * texel_weights_float[1])
-				    + (uq_pl_weights[texel_weights[2]] * texel_weights_float[2]
-				    + uq_pl_weights[texel_weights[3]]  * texel_weights_float[3]));
+				    ((static_cast<float>(uqw) * twf0
+				    + static_cast<float>(uq_pl_weights[texel_weights[1]])  * texel_weights_float[1])
+				    + (static_cast<float>(uq_pl_weights[texel_weights[2]]) * texel_weights_float[2]
+				    + static_cast<float>(uq_pl_weights[texel_weights[3]]) * texel_weights_float[3]));
 
 				int partition = pt->partition_of_texel[texel];
 
 				weight_base = weight_base + 0.5f;
 				float plane_weight = astc::flt_rd(weight_base);
-				float plane_up_weight = astc::flt_rd(weight_base + uqw_next_dif * twf0) - plane_weight;
-				float plane_down_weight = astc::flt_rd(weight_base + uqw_prev_dif * twf0) - plane_weight;
+				float plane_up_weight = astc::flt_rd(weight_base + static_cast<float>(uqw_next_dif) * twf0) - plane_weight;
+				float plane_down_weight = astc::flt_rd(weight_base + static_cast<float>(uqw_prev_dif) * twf0) - plane_weight;
 
-				float4 color_offset = offset[partition];
-				float4 color_base   = endpnt0f[partition];
+				vfloat4 color_offset = offset[partition];
+				vfloat4 color_base   = endpnt0f[partition];
 
-				float4 color = color_base + color_offset * plane_weight;
+				vfloat4 color = color_base + color_offset * plane_weight;
 
-				float4 origcolor    = float4(blk->data_r[texel], blk->data_g[texel],
-				                             blk->data_b[texel], blk->data_a[texel]);
-				float4 error_weight = float4(ewb->texel_weight_r[texel], ewb->texel_weight_g[texel],
-				                             ewb->texel_weight_b[texel], ewb->texel_weight_a[texel]);
+				vfloat4 origcolor    = blk->texel(texel);
+				vfloat4 error_weight = vfloat4(ewb->texel_weight_r[texel], ewb->texel_weight_g[texel],
+				                               ewb->texel_weight_b[texel], ewb->texel_weight_a[texel]);
 
-				float4 colordiff       = color - origcolor;
-				float4 color_up_diff   = colordiff + color_offset * plane_up_weight;
-				float4 color_down_diff = colordiff + color_offset * plane_down_weight;
-				current_error += dot(colordiff       * colordiff,       error_weight);
-				up_error      += dot(color_up_diff   * color_up_diff,   error_weight);
-				down_error    += dot(color_down_diff * color_down_diff, error_weight);
+				vfloat4 colordiff       = color - origcolor;
+				vfloat4 color_up_diff   = colordiff + color_offset * plane_up_weight;
+				vfloat4 color_down_diff = colordiff + color_offset * plane_down_weight;
+				current_error += dot_s(colordiff       * colordiff,       error_weight);
+				up_error      += dot_s(color_up_diff   * color_up_diff,   error_weight);
+				down_error    += dot_s(color_down_diff * color_down_diff, error_weight);
 			}
 
 			// Check if the prev or next error is better, and if so use it
@@ -194,7 +192,7 @@ static int realign_weights(
 
 		// Prepare iteration for plane 2
 		weight_set8 = plane2_weight_set8;
-		plane_mask ^= 0xF;
+		plane_mask = ~plane_mask;
 	}
 
 	return adjustments;
@@ -203,31 +201,35 @@ static int realign_weights(
 /*
 	function for compressing a block symbolically, given that we have already decided on a partition
 */
-static void compress_symbolic_block_fixed_partition_1_plane(
-	astcenc_profile decode_mode,
-	float mode_cutoff,
+static float compress_symbolic_block_fixed_partition_1_plane(
+	const astcenc_config& config,
+	bool only_always,
 	int tune_candidate_limit,
+	float tune_errorval_threshold,
 	int max_refinement_iters,
 	const block_size_descriptor* bsd,
-	int partition_count, int partition_index,
+	int partition_count,
+	int partition_index,
 	const imageblock* blk,
 	const error_weight_block* ewb,
-	symbolic_compressed_block* scb,
+	symbolic_compressed_block& scb,
 	compress_fixed_partition_buffers* tmpbuf
 ) {
-	static const int free_bits_for_partition_count[5] = { 0, 115 - 4, 111 - 4 - PARTITION_BITS, 108 - 4 - PARTITION_BITS, 105 - 4 - PARTITION_BITS };
+	static const int free_bits_for_partition_count[5] = {
+		0, 115 - 4, 111 - 4 - PARTITION_BITS, 108 - 4 - PARTITION_BITS, 105 - 4 - PARTITION_BITS
+	};
 
-	const partition_info *pi = get_partition_table(bsd, partition_count);
-	pi += partition_index;
+	const partition_info *pt = get_partition_table(bsd, partition_count);
+	pt += partition_index;
 
 	// first, compute ideal weights and endpoint colors, under the assumption that
 	// there is no quantization or decimation going on.
 	endpoints_and_weights *ei = &tmpbuf->ei1;
 	endpoints_and_weights *eix = tmpbuf->eix1;
-	compute_endpoints_and_ideal_weights_1_plane(bsd, pi, blk, ewb, ei);
+	compute_endpoints_and_ideal_weights_1_plane(bsd, pt, blk, ewb, ei);
 
 	// next, compute ideal weights and endpoint colors for every decimation.
-	const decimation_table *const *ixtab2 = bsd->decimation_tables;
+	const decimation_table *const *dts = bsd->decimation_tables;
 
 	float *decimated_quantized_weights = tmpbuf->decimated_quantized_weights;
 	float *decimated_weights = tmpbuf->decimated_weights;
@@ -236,65 +238,52 @@ static void compress_symbolic_block_fixed_partition_1_plane(
 
 	// for each decimation mode, compute an ideal set of weights
 	// (that is, weights computed with the assumption that they are not quantized)
-	for (int i = 0; i < MAX_DECIMATION_MODES; i++)
+	for (int i = 0; i < bsd->decimation_mode_count; i++)
 	{
-		if (bsd->permit_encode[i] == 0 || bsd->decimation_mode_maxprec_1plane[i] < 0 || bsd->decimation_mode_percentile[i] > mode_cutoff)
+		const decimation_mode& dm = bsd->decimation_modes[i];
+		if (dm.maxprec_1plane < 0 || (only_always && !dm.percentile_always) || !dm.percentile_hit)
 		{
 			continue;
 		}
-		eix[i] = *ei;
-		compute_ideal_weights_for_decimation_table(&(eix[i]), ixtab2[i], decimated_quantized_weights + i * MAX_WEIGHTS_PER_BLOCK, decimated_weights + i * MAX_WEIGHTS_PER_BLOCK);
 
+		compute_ideal_weights_for_decimation_table(
+		    *ei,
+		    eix[i],
+		    *(dts[i]),
+		    decimated_quantized_weights + i * MAX_WEIGHTS_PER_BLOCK,
+		    decimated_weights + i * MAX_WEIGHTS_PER_BLOCK);
 	}
 
 	// compute maximum colors for the endpoints and ideal weights.
 	// for each endpoint-and-ideal-weight pair, compute the smallest weight value
 	// that will result in a color value greater than 1.
-	float4 min_ep = float4(10.0f);
+	vfloat4 min_ep(10.0f);
 	for (int i = 0; i < partition_count; i++)
 	{
 		#ifdef DEBUG_CAPTURE_NAN
 			fedisableexcept(FE_DIVBYZERO | FE_INVALID);
 		#endif
 
-		float4 ep = float4(
-			(1.0f - ei->ep.endpt0[i].r) / (ei->ep.endpt1[i].r - ei->ep.endpt0[i].r),
-			(1.0f - ei->ep.endpt0[i].g) / (ei->ep.endpt1[i].g - ei->ep.endpt0[i].g),
-			(1.0f - ei->ep.endpt0[i].b) / (ei->ep.endpt1[i].b - ei->ep.endpt0[i].b),
-			(1.0f - ei->ep.endpt0[i].a) / (ei->ep.endpt1[i].a - ei->ep.endpt0[i].a));
+		vfloat4 ep = (vfloat4(1.0f) - ei->ep.endpt0[i]) / (ei->ep.endpt1[i] - ei->ep.endpt0[i]);
 
-		if (ep.r > 0.5f && ep.r < min_ep.r)
-		{
-			min_ep.r = ep.r;
-		}
-
-		if (ep.g > 0.5f && ep.g < min_ep.g)
-		{
-			min_ep.g = ep.g;
-		}
-
-		if (ep.b > 0.5f && ep.b < min_ep.b)
-		{
-			min_ep.b = ep.b;
-		}
-
-		if (ep.a > 0.5f && ep.a < min_ep.a)
-		{
-			min_ep.a = ep.a;
-		}
+		vmask4 use_ep = (ep > vfloat4(0.5f)) & (ep < min_ep);
+		min_ep = select(min_ep, ep, use_ep);
 
 		#ifdef DEBUG_CAPTURE_NAN
 			feenableexcept(FE_DIVBYZERO | FE_INVALID);
 		#endif
 	}
 
-	float min_wt_cutoff = MIN(MIN(min_ep.r, min_ep.g), MIN(min_ep.b, min_ep.a));
+	float min_wt_cutoff = hmin_s(min_ep);
 
 	// for each mode, use the angular method to compute a shift.
 	float weight_low_value[MAX_WEIGHT_MODES];
 	float weight_high_value[MAX_WEIGHT_MODES];
 
-	compute_angular_endpoints_1plane(mode_cutoff, bsd, decimated_quantized_weights, decimated_weights, weight_low_value, weight_high_value);
+	compute_angular_endpoints_1plane(
+	    only_always, bsd,
+	    decimated_quantized_weights, decimated_weights,
+	    weight_low_value, weight_high_value);
 
 	// for each mode (which specifies a decimation and a quantization):
 	// * compute number of bits needed for the quantized weights.
@@ -303,10 +292,10 @@ static void compress_symbolic_block_fixed_partition_1_plane(
 	int qwt_bitcounts[MAX_WEIGHT_MODES];
 	float qwt_errors[MAX_WEIGHT_MODES];
 
-	for (int i = 0, ni = bsd->block_mode_packed_count; i < ni; ++i)
+	for (int i = 0; i < bsd->block_mode_count; ++i)
 	{
-		const block_mode& bm = bsd->block_modes_packed[i];
-		if (bm.is_dual_plane != 0 || bm.percentile > mode_cutoff)
+		const block_mode& bm = bsd->block_modes[i];
+		if (bm.is_dual_plane || (only_always && !bm.percentile_always) || !bm.percentile_hit)
 		{
 			qwt_errors[i] = 1e38f;
 			continue;
@@ -320,8 +309,9 @@ static void compress_symbolic_block_fixed_partition_1_plane(
 		int decimation_mode = bm.decimation_mode;
 
 		// compute weight bitcount for the mode
-		int bits_used_by_weights = compute_ise_bitcount(ixtab2[decimation_mode]->num_weights,
-														(quantization_method) bm.quantization_mode);
+		int bits_used_by_weights = get_ise_sequence_bitcount(
+		    dts[decimation_mode]->weight_count,
+		    (quant_method)bm.quant_mode);
 		int bitcount = free_bits_for_partition_count[partition_count] - bits_used_by_weights;
 		if (bitcount <= 0 || bits_used_by_weights < 24 || bits_used_by_weights > 96)
 		{
@@ -331,15 +321,19 @@ static void compress_symbolic_block_fixed_partition_1_plane(
 		qwt_bitcounts[i] = bitcount;
 
 		// then, generate the optimized set of weights for the weight mode.
-		compute_ideal_quantized_weights_for_decimation_table(ixtab2[decimation_mode],
-															 weight_low_value[i], weight_high_value[i],
-															 decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * decimation_mode,
-															 flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * i,
-															 u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * i,
-															 bm.quantization_mode);
+		compute_quantized_weights_for_decimation_table(
+		    dts[decimation_mode],
+		    weight_low_value[i], weight_high_value[i],
+		    decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * decimation_mode,
+		    flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * i,
+		    u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * i,
+		    bm.quant_mode);
 
 		// then, compute weight-errors for the weight mode.
-		qwt_errors[i] = compute_error_of_weight_set(&(eix[decimation_mode]), ixtab2[decimation_mode], flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * i);
+		qwt_errors[i] = compute_error_of_weight_set(
+		                    &(eix[decimation_mode]),
+		                    dts[decimation_mode],
+		                    flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * i);
 	}
 
 	// for each weighting mode, determine the optimal combination of color endpoint encodings
@@ -347,128 +341,233 @@ static void compress_symbolic_block_fixed_partition_1_plane(
 
 	int partition_format_specifiers[TUNE_MAX_TRIAL_CANDIDATES][4];
 	int quantized_weight[TUNE_MAX_TRIAL_CANDIDATES];
-	int color_quantization_level[TUNE_MAX_TRIAL_CANDIDATES];
-	int color_quantization_level_mod[TUNE_MAX_TRIAL_CANDIDATES];
+	int color_quant_level[TUNE_MAX_TRIAL_CANDIDATES];
+	int color_quant_level_mod[TUNE_MAX_TRIAL_CANDIDATES];
+
 	determine_optimal_set_of_endpoint_formats_to_use(
-	    bsd, pi, blk, ewb, &(ei->ep), -1, qwt_bitcounts, qwt_errors,
+	    bsd, pt, blk, ewb, &(ei->ep), -1, qwt_bitcounts, qwt_errors,
 	    tune_candidate_limit, partition_format_specifiers, quantized_weight,
-	    color_quantization_level, color_quantization_level_mod);
+	    color_quant_level, color_quant_level_mod);
 
 	// then iterate over the tune_candidate_limit believed-to-be-best modes to
 	// find out which one is actually best.
+	float best_errorval_in_mode = 1e30f;
+	float best_errorval_in_scb = scb.errorval;
+
 	for (int i = 0; i < tune_candidate_limit; i++)
 	{
+		TRACE_NODE(node0, "candidate");
+
 		uint8_t *u8_weight_src;
 		int weights_to_copy;
 
 		const int qw_packed_index = quantized_weight[i];
 		if (qw_packed_index < 0)
 		{
-			scb->error_block = 1;
-			scb++;
+			trace_add_data("failed", "error_block");
 			continue;
 		}
 
-		assert(qw_packed_index >= 0 && qw_packed_index < bsd->block_mode_packed_count);
-		const block_mode& qw_bm = bsd->block_modes_packed[qw_packed_index];
+		assert(qw_packed_index >= 0 && qw_packed_index < bsd->block_mode_count);
+		const block_mode& qw_bm = bsd->block_modes[qw_packed_index];
 
 		int decimation_mode = qw_bm.decimation_mode;
-		int weight_quantization_mode = qw_bm.quantization_mode;
-		const decimation_table *it = ixtab2[decimation_mode];
+		int weight_quant_mode = qw_bm.quant_mode;
+		const decimation_table *dt = dts[decimation_mode];
 		u8_weight_src = u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * qw_packed_index;
+		weights_to_copy = dt->weight_count;
 
-		weights_to_copy = it->num_weights;
+		trace_add_data("weight_x", dt->weight_x);
+		trace_add_data("weight_y", dt->weight_y);
+		trace_add_data("weight_z", dt->weight_z);
+		trace_add_data("weight_quant", weight_quant_mode);
 
 		// recompute the ideal color endpoints before storing them.
-		float4 rgbs_colors[4];
-		float4 rgbo_colors[4];
+		vfloat4 rgbs_colors[4];
+		vfloat4 rgbo_colors[4];
 
+		// TODO: Can we ping-pong between two buffers and make this zero copy?
+		symbolic_compressed_block workscb;
 		for (int l = 0; l < max_refinement_iters; l++)
 		{
-			recompute_ideal_colors(weight_quantization_mode, &(eix[decimation_mode].ep), rgbs_colors, rgbo_colors, u8_weight_src, nullptr, -1, pi, it, blk, ewb);
+			recompute_ideal_colors_1plane(
+			    weight_quant_mode, &(eix[decimation_mode].ep),
+			    rgbs_colors, rgbo_colors, u8_weight_src, pt, dt, blk, ewb);
 
 			// quantize the chosen color
 
 			// store the colors for the block
 			for (int j = 0; j < partition_count; j++)
 			{
-				scb->color_formats[j] = pack_color_endpoints(eix[decimation_mode].ep.endpt0[j],
-															 eix[decimation_mode].ep.endpt1[j],
-															 rgbs_colors[j], rgbo_colors[j], partition_format_specifiers[i][j], scb->color_values[j], color_quantization_level[i]);
+				workscb.color_formats[j] = pack_color_endpoints(
+				    eix[decimation_mode].ep.endpt0[j],
+				    eix[decimation_mode].ep.endpt1[j],
+				    rgbs_colors[j],
+				    rgbo_colors[j],
+				    partition_format_specifiers[i][j],
+				    workscb.color_values[j],
+				    color_quant_level[i]);
 			}
 
 			// if all the color endpoint modes are the same, we get a few more
 			// bits to store colors; let's see if we can take advantage of this:
 			// requantize all the colors and see if the endpoint modes remain the same;
 			// if they do, then exploit it.
-			scb->color_formats_matched = 0;
+			workscb.color_formats_matched = 0;
 
-			if ((partition_count >= 2 && scb->color_formats[0] == scb->color_formats[1]
-				&& color_quantization_level[i] != color_quantization_level_mod[i])
-				&& (partition_count == 2 || (scb->color_formats[0] == scb->color_formats[2] && (partition_count == 3 || (scb->color_formats[0] == scb->color_formats[3])))))
+			if ((partition_count >= 2 && workscb.color_formats[0] == workscb.color_formats[1]
+			    && color_quant_level[i] != color_quant_level_mod[i])
+			    && (partition_count == 2 || (workscb.color_formats[0] == workscb.color_formats[2]
+			    && (partition_count == 3 || (workscb.color_formats[0] == workscb.color_formats[3])))))
 			{
 				int colorvals[4][12];
-				int color_formats_mod[4] = { 0 };
+				int color_formats_mod[4] { 0 };
 				for (int j = 0; j < partition_count; j++)
 				{
-					color_formats_mod[j] = pack_color_endpoints(eix[decimation_mode].ep.endpt0[j],
-																eix[decimation_mode].ep.endpt1[j],
-																rgbs_colors[j], rgbo_colors[j], partition_format_specifiers[i][j], colorvals[j], color_quantization_level_mod[i]);
+					color_formats_mod[j] = pack_color_endpoints(
+					    eix[decimation_mode].ep.endpt0[j],
+					    eix[decimation_mode].ep.endpt1[j],
+					    rgbs_colors[j],
+					    rgbo_colors[j],
+					    partition_format_specifiers[i][j],
+					    colorvals[j],
+					    color_quant_level_mod[i]);
 				}
+
 				if (color_formats_mod[0] == color_formats_mod[1]
-					&& (partition_count == 2 || (color_formats_mod[0] == color_formats_mod[2] && (partition_count == 3 || (color_formats_mod[0] == color_formats_mod[3])))))
+				    && (partition_count == 2 || (color_formats_mod[0] == color_formats_mod[2]
+				    && (partition_count == 3 || (color_formats_mod[0] == color_formats_mod[3])))))
 				{
-					scb->color_formats_matched = 1;
+					workscb.color_formats_matched = 1;
 					for (int j = 0; j < 4; j++)
 					{
 						for (int k = 0; k < 12; k++)
 						{
-							scb->color_values[j][k] = colorvals[j][k];
+							workscb.color_values[j][k] = colorvals[j][k];
 						}
 					}
 
 					for (int j = 0; j < 4; j++)
 					{
-						scb->color_formats[j] = color_formats_mod[j];
+						workscb.color_formats[j] = color_formats_mod[j];
 					}
 				}
 			}
 
 			// store header fields
-			scb->partition_count = partition_count;
-			scb->partition_index = partition_index;
-			scb->color_quantization_level = scb->color_formats_matched ? color_quantization_level_mod[i] : color_quantization_level[i];
-			scb->block_mode = qw_bm.mode_index;
-			scb->error_block = 0;
+			workscb.partition_count = partition_count;
+			workscb.partition_index = partition_index;
+			workscb.color_quant_level = workscb.color_formats_matched ? color_quant_level_mod[i] : color_quant_level[i];
+			workscb.block_mode = qw_bm.mode_index;
+			workscb.error_block = 0;
 
-			if (scb->color_quantization_level < 4)
+			if (workscb.color_quant_level < 4)
 			{
-				scb->error_block = 1;	// should never happen, but cannot prove it impossible.
+				workscb.error_block = 1; // should never happen, but cannot prove it impossible.
+			}
+
+			// Pre-realign test
+			if (l == 0)
+			{
+				for (int j = 0; j < weights_to_copy; j++)
+				{
+					workscb.weights[j] = u8_weight_src[j];
+				}
+
+				float errorval = compute_symbolic_block_difference(config, bsd, &workscb, blk, ewb);
+				if (errorval == -1e30f)
+				{
+					errorval = -errorval;
+					workscb.error_block = 1;
+				}
+
+
+				trace_add_data("error_prerealign", errorval);
+				best_errorval_in_mode = astc::min(errorval, best_errorval_in_mode);
+
+				// Average refinement improvement is 3.5% per iteration
+				// (allow 5%), but the first iteration can help more so we give
+				// it a extra 10% leeway. Use this knowledge to drive a
+				// heuristic to skip blocks that are unlikely to catch up with
+				// the best block we have already.
+				int iters_remaining = max_refinement_iters - l;
+				float threshold = (0.05f * static_cast<float>(iters_remaining)) + 1.1f;
+				if (errorval > (threshold * best_errorval_in_scb))
+				{
+					break;
+				}
+
+				if (errorval < best_errorval_in_scb)
+				{
+					best_errorval_in_scb = errorval;
+					workscb.errorval = errorval;
+					scb = workscb;
+
+					if (errorval < tune_errorval_threshold)
+					{
+						return errorval;
+					}
+				}
 			}
 
 			// perform a final pass over the weights to try to improve them.
 			int adjustments = realign_weights(
-				decode_mode, bsd, blk, ewb, scb, u8_weight_src, nullptr);
+			    config.profile, bsd, blk, ewb, &workscb,
+			    u8_weight_src, nullptr);
 
-			if (adjustments == 0)
+			// Post-realign test
+			for (int j = 0; j < weights_to_copy; j++)
+			{
+				workscb.weights[j] = u8_weight_src[j];
+			}
+
+			float errorval = compute_symbolic_block_difference(config, bsd, &workscb, blk, ewb);
+			if (errorval == -1e30f)
+			{
+				errorval = -errorval;
+				workscb.error_block = 1;
+			}
+
+			trace_add_data("error_postrealign", errorval);
+			best_errorval_in_mode = astc::min(errorval, best_errorval_in_mode);
+
+			// Average refinement improvement is 3.5% per iteration, so skip
+			// blocks that are unlikely to catch up with the best block we
+			// have already. Assume a 5% per step to give benefit of the doubt
+			int iters_remaining = max_refinement_iters - 1 - l;
+			float threshold = (0.05f * static_cast<float>(iters_remaining)) + 1.0f;
+			if (errorval > (threshold * best_errorval_in_scb))
 			{
 				break;
 			}
-		}
 
-		for (int j = 0; j < weights_to_copy; j++)
-		{
-			scb->plane1_weights[j] = u8_weight_src[j];
-		}
+			if (errorval < best_errorval_in_scb)
+			{
+				best_errorval_in_scb = errorval;
+				workscb.errorval = errorval;
+				scb = workscb;
 
-		scb++;
+				if (errorval < tune_errorval_threshold)
+				{
+					return errorval;
+				}
+			}
+
+			if (adjustments == 0)
+			{
+				break;
+			}
+		}
 	}
+
+	return best_errorval_in_mode;
 }
 
-static void compress_symbolic_block_fixed_partition_2_planes(
-	astcenc_profile decode_mode,
-	float mode_cutoff,
+static float compress_symbolic_block_fixed_partition_2_planes(
+	const astcenc_config& config,
+	bool only_always,
 	int tune_candidate_limit,
+	float tune_errorval_threshold,
 	int max_refinement_iters,
 	const block_size_descriptor* bsd,
 	int partition_count,
@@ -476,24 +575,25 @@ static void compress_symbolic_block_fixed_partition_2_planes(
 	int separate_component,
 	const imageblock* blk,
 	const error_weight_block* ewb,
-	symbolic_compressed_block* scb,
+	symbolic_compressed_block& scb,
 	compress_fixed_partition_buffers* tmpbuf
 ) {
-	static const int free_bits_for_partition_count[5] =
-		{ 0, 113 - 4, 109 - 4 - PARTITION_BITS, 106 - 4 - PARTITION_BITS, 103 - 4 - PARTITION_BITS };
+	static const int free_bits_for_partition_count[5] = {
+		0, 113 - 4, 109 - 4 - PARTITION_BITS, 106 - 4 - PARTITION_BITS, 103 - 4 - PARTITION_BITS
+	};
 
-	const partition_info *pi = get_partition_table(bsd, partition_count);
-	pi += partition_index;
+	const partition_info *pt = get_partition_table(bsd, partition_count);
+	pt += partition_index;
 
 	// first, compute ideal weights and endpoint colors
 	endpoints_and_weights *ei1 = &tmpbuf->ei1;
 	endpoints_and_weights *ei2 = &tmpbuf->ei2;
 	endpoints_and_weights *eix1 = tmpbuf->eix1;
 	endpoints_and_weights *eix2 = tmpbuf->eix2;
-	compute_endpoints_and_ideal_weights_2_planes(bsd, pi, blk, ewb, separate_component, ei1, ei2);
+	compute_endpoints_and_ideal_weights_2_planes(bsd, pt, blk, ewb, separate_component, ei1, ei2);
 
 	// next, compute ideal weights and endpoint colors for every decimation.
-	const decimation_table *const *ixtab2 = bsd->decimation_tables;
+	const decimation_table *const *dts = bsd->decimation_tables;
 
 	float *decimated_quantized_weights = tmpbuf->decimated_quantized_weights;
 	float *decimated_weights = tmpbuf->decimated_weights;
@@ -501,119 +601,75 @@ static void compress_symbolic_block_fixed_partition_2_planes(
 	uint8_t *u8_quantized_decimated_quantized_weights = tmpbuf->u8_quantized_decimated_quantized_weights;
 
 	// for each decimation mode, compute an ideal set of weights
-	for (int i = 0; i < MAX_DECIMATION_MODES; i++)
+	for (int i = 0; i < bsd->decimation_mode_count; i++)
 	{
-		if (bsd->permit_encode[i] == 0 || bsd->decimation_mode_maxprec_2planes[i] < 0 || bsd->decimation_mode_percentile[i] > mode_cutoff)
+		const decimation_mode& dm = bsd->decimation_modes[i];
+		if (dm.maxprec_2planes < 0 || (only_always && !dm.percentile_always) || !dm.percentile_hit)
 		{
 			continue;
 		}
 
-		eix1[i] = *ei1;
-		eix2[i] = *ei2;
-		compute_ideal_weights_for_decimation_table(&(eix1[i]), ixtab2[i], decimated_quantized_weights + (2 * i) * MAX_WEIGHTS_PER_BLOCK, decimated_weights + (2 * i) * MAX_WEIGHTS_PER_BLOCK);
-		compute_ideal_weights_for_decimation_table(&(eix2[i]), ixtab2[i], decimated_quantized_weights + (2 * i + 1) * MAX_WEIGHTS_PER_BLOCK, decimated_weights + (2 * i + 1) * MAX_WEIGHTS_PER_BLOCK);
+		compute_ideal_weights_for_decimation_table(
+		    *ei1,
+		    eix1[i],
+		    *(dts[i]),
+		    decimated_quantized_weights + (2 * i) * MAX_WEIGHTS_PER_BLOCK,
+		    decimated_weights + (2 * i) * MAX_WEIGHTS_PER_BLOCK);
+
+		compute_ideal_weights_for_decimation_table(
+		    *ei2,
+		    eix2[i],
+		    *(dts[i]),
+		    decimated_quantized_weights + (2 * i + 1) * MAX_WEIGHTS_PER_BLOCK,
+		    decimated_weights + (2 * i + 1) * MAX_WEIGHTS_PER_BLOCK);
 	}
 
 	// compute maximum colors for the endpoints and ideal weights.
 	// for each endpoint-and-ideal-weight pair, compute the smallest weight value
 	// that will result in a color value greater than 1.
 
-	float4 min_ep1 = float4(10.0f);
-	float4 min_ep2 = float4(10.0f);
+	vfloat4 min_ep1(10.0f);
+	vfloat4 min_ep2(10.0f);
 	for (int i = 0; i < partition_count; i++)
 	{
 		#ifdef DEBUG_CAPTURE_NAN
 			fedisableexcept(FE_DIVBYZERO | FE_INVALID);
 		#endif
 
-		float4 ep1 = float4(
-			(1.0f - ei1->ep.endpt0[i].r) / (ei1->ep.endpt1[i].r - ei1->ep.endpt0[i].r),
-			(1.0f - ei1->ep.endpt0[i].g) / (ei1->ep.endpt1[i].g - ei1->ep.endpt0[i].g),
-			(1.0f - ei1->ep.endpt0[i].b) / (ei1->ep.endpt1[i].b - ei1->ep.endpt0[i].b),
-			(1.0f - ei1->ep.endpt0[i].a) / (ei1->ep.endpt1[i].a - ei1->ep.endpt0[i].a));
+		vfloat4 ep1 = (vfloat4(1.0f) - ei1->ep.endpt0[i]) / (ei1->ep.endpt1[i] - ei1->ep.endpt0[i]);
+		vmask4 use_ep1 = (ep1 > vfloat4(0.5f)) & (ep1 < min_ep1);
+		min_ep1 = select(min_ep1, ep1, use_ep1);
 
-		if (ep1.r > 0.5f && ep1.r < min_ep1.r)
-		{
-			min_ep1.r = ep1.r;
-		}
-
-		if (ep1.g > 0.5f && ep1.g < min_ep1.g)
-		{
-			min_ep1.g = ep1.g;
-		}
-
-		if (ep1.b > 0.5f && ep1.b < min_ep1.b)
-		{
-			min_ep1.b = ep1.b;
-		}
-
-		if (ep1.a > 0.5f && ep1.a < min_ep1.a)
-		{
-			min_ep1.a = ep1.a;
-		}
-
-		float4 ep2 = float4(
-			(1.0f - ei2->ep.endpt0[i].r) / (ei2->ep.endpt1[i].r - ei2->ep.endpt0[i].r),
-			(1.0f - ei2->ep.endpt0[i].g) / (ei2->ep.endpt1[i].g - ei2->ep.endpt0[i].g),
-			(1.0f - ei2->ep.endpt0[i].b) / (ei2->ep.endpt1[i].b - ei2->ep.endpt0[i].b),
-			(1.0f - ei2->ep.endpt0[i].a) / (ei2->ep.endpt1[i].a - ei2->ep.endpt0[i].a));
-
-		if (ep2.r > 0.5f && ep2.r < min_ep2.r)
-		{
-			min_ep2.r = ep2.r;
-		}
-
-		if (ep2.g > 0.5f && ep2.g < min_ep2.g)
-		{
-			min_ep2.g = ep2.g;
-		}
-
-		if (ep2.b > 0.5f && ep2.b < min_ep2.b)
-		{
-			min_ep2.b = ep2.b;
-		}
-
-		if (ep2.a > 0.5f && ep2.a < min_ep2.a)
-		{
-			min_ep2.a = ep2.a;
-		}
+		vfloat4 ep2 = (vfloat4(1.0f) - ei2->ep.endpt0[i]) / (ei2->ep.endpt1[i] - ei2->ep.endpt0[i]);
+		vmask4 use_ep2 = (ep2 > vfloat4(0.5f)) & (ep2 < min_ep2);
+		min_ep2 = select(min_ep2, ep2, use_ep2);
 
 		#ifdef DEBUG_CAPTURE_NAN
 			feenableexcept(FE_DIVBYZERO | FE_INVALID);
 		#endif
 	}
 
-	float min_wt_cutoff1, min_wt_cutoff2;
-	switch (separate_component)
-	{
-	case 0:
-		min_wt_cutoff2 = min_ep2.r;
-		min_ep1.r = 1e30f;
-		break;
-	case 1:
-		min_wt_cutoff2 = min_ep2.g;
-		min_ep1.g = 1e30f;
-		break;
-	case 2:
-		min_wt_cutoff2 = min_ep2.b;
-		min_ep1.b = 1e30f;
-		break;
-	case 3:
-		min_wt_cutoff2 = min_ep2.a;
-		min_ep1.a = 1e30f;
-		break;
-	default:
-		min_wt_cutoff2 = 1e30f;
-	}
+	vfloat4 err_max(1e30f);
+	vmask4 err_mask = vint4::lane_id() == vint4(separate_component);
+
+	// Set the separate component to max error in ep1
+	min_ep1 = select(min_ep1, err_max, err_mask);
 
-	min_wt_cutoff1 = MIN(MIN(min_ep1.r, min_ep1.g), MIN(min_ep1.b, min_ep1.a));
+	float min_wt_cutoff1 = hmin_s(min_ep1);
+
+	// Set the minwt2 to the separate component min in ep2
+	float min_wt_cutoff2 = hmin_s(select(err_max, min_ep2, err_mask));
 
 	float weight_low_value1[MAX_WEIGHT_MODES];
 	float weight_high_value1[MAX_WEIGHT_MODES];
 	float weight_low_value2[MAX_WEIGHT_MODES];
 	float weight_high_value2[MAX_WEIGHT_MODES];
 
-	compute_angular_endpoints_2planes(mode_cutoff, bsd, decimated_quantized_weights, decimated_weights, weight_low_value1, weight_high_value1, weight_low_value2, weight_high_value2);
+	compute_angular_endpoints_2planes(
+	    only_always, bsd,
+	    decimated_quantized_weights, decimated_weights,
+	    weight_low_value1, weight_high_value1,
+	    weight_low_value2, weight_high_value2);
 
 	// for each mode (which specifies a decimation and a quantization):
 	// * generate an optimized set of quantized weights.
@@ -622,10 +678,10 @@ static void compress_symbolic_block_fixed_partition_2_planes(
 
 	int qwt_bitcounts[MAX_WEIGHT_MODES];
 	float qwt_errors[MAX_WEIGHT_MODES];
-	for (int i = 0, ni = bsd->block_mode_packed_count; i < ni; ++i)
+	for (int i = 0; i < bsd->block_mode_count; ++i)
 	{
-		const block_mode& bm = bsd->block_modes_packed[i];
-		if (bm.is_dual_plane != 1 || bm.percentile > mode_cutoff)
+		const block_mode& bm = bsd->block_modes[i];
+		if ((!bm.is_dual_plane) || (only_always && !bm.percentile_always) || !bm.percentile_hit)
 		{
 			qwt_errors[i] = 1e38f;
 			continue;
@@ -644,8 +700,9 @@ static void compress_symbolic_block_fixed_partition_2_planes(
 		}
 
 		// compute weight bitcount for the mode
-		int bits_used_by_weights = compute_ise_bitcount(2 * ixtab2[decimation_mode]->num_weights,
-														(quantization_method) bm.quantization_mode);
+		int bits_used_by_weights = get_ise_sequence_bitcount(
+			2 * dts[decimation_mode]->weight_count,
+			(quant_method)bm.quant_mode);
 		int bitcount = free_bits_for_partition_count[partition_count] - bits_used_by_weights;
 		if (bitcount <= 0 || bits_used_by_weights < 24 || bits_used_by_weights > 96)
 		{
@@ -655,55 +712,62 @@ static void compress_symbolic_block_fixed_partition_2_planes(
 		qwt_bitcounts[i] = bitcount;
 
 		// then, generate the optimized set of weights for the mode.
-		compute_ideal_quantized_weights_for_decimation_table(
-		    ixtab2[decimation_mode],
+		compute_quantized_weights_for_decimation_table(
+		    dts[decimation_mode],
 		    weight_low_value1[i],
 		    weight_high_value1[i],
 		    decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * decimation_mode),
 		    flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i),
-		    u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i), bm.quantization_mode);
+		    u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i), bm.quant_mode);
 
-		compute_ideal_quantized_weights_for_decimation_table(
-		    ixtab2[decimation_mode],
+		compute_quantized_weights_for_decimation_table(
+		    dts[decimation_mode],
 		    weight_low_value2[i],
 		    weight_high_value2[i],
 		    decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * decimation_mode + 1),
 		    flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i + 1),
-		    u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i + 1), bm.quantization_mode);
+		    u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i + 1), bm.quant_mode);
 
 
 		// then, compute quantization errors for the block mode.
 		qwt_errors[i] =	compute_error_of_weight_set(
 		                    &(eix1[decimation_mode]),
-		                    ixtab2[decimation_mode],
+		                    dts[decimation_mode],
 		                    flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i))
+
 		              + compute_error_of_weight_set(
 		                    &(eix2[decimation_mode]),
-		                    ixtab2[decimation_mode],
+		                    dts[decimation_mode],
 		                    flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i + 1));
 	}
 
 	// decide the optimal combination of color endpoint encodings and weight encodings.
 	int partition_format_specifiers[TUNE_MAX_TRIAL_CANDIDATES][4];
 	int quantized_weight[TUNE_MAX_TRIAL_CANDIDATES];
-	int color_quantization_level[TUNE_MAX_TRIAL_CANDIDATES];
-	int color_quantization_level_mod[TUNE_MAX_TRIAL_CANDIDATES];
+	int color_quant_level[TUNE_MAX_TRIAL_CANDIDATES];
+	int color_quant_level_mod[TUNE_MAX_TRIAL_CANDIDATES];
 
 	endpoints epm;
 	merge_endpoints(&(ei1->ep), &(ei2->ep), separate_component, &epm);
 
 	determine_optimal_set_of_endpoint_formats_to_use(
-	    bsd, pi, blk, ewb, &epm, separate_component, qwt_bitcounts, qwt_errors,
+	    bsd, pt, blk, ewb, &epm, separate_component, qwt_bitcounts, qwt_errors,
 	    tune_candidate_limit, partition_format_specifiers, quantized_weight,
-	    color_quantization_level, color_quantization_level_mod);
+	    color_quant_level, color_quant_level_mod);
+
+	// then iterate over the tune_candidate_limit believed-to-be-best modes to
+	// find out which one is actually best.
+	float best_errorval_in_mode = 1e30f;
+	float best_errorval_in_scb = scb.errorval;
 
 	for (int i = 0; i < tune_candidate_limit; i++)
 	{
+		TRACE_NODE(node0, "candidate");
+
 		const int qw_packed_index = quantized_weight[i];
 		if (qw_packed_index < 0)
 		{
-			scb->error_block = 1;
-			scb++;
+			trace_add_data("failed", "error_block");
 			continue;
 		}
 
@@ -711,107 +775,199 @@ static void compress_symbolic_block_fixed_partition_2_planes(
 		uint8_t *u8_weight2_src;
 		int weights_to_copy;
 
-		assert(qw_packed_index >= 0 && qw_packed_index < bsd->block_mode_packed_count);
-		const block_mode& qw_bm = bsd->block_modes_packed[qw_packed_index];
+		assert(qw_packed_index >= 0 && qw_packed_index < bsd->block_mode_count);
+		const block_mode& qw_bm = bsd->block_modes[qw_packed_index];
 
 		int decimation_mode = qw_bm.decimation_mode;
-		int weight_quantization_mode = qw_bm.quantization_mode;
-		const decimation_table *it = ixtab2[decimation_mode];
+		int weight_quant_mode = qw_bm.quant_mode;
+		const decimation_table *dt = dts[decimation_mode];
 
 		u8_weight1_src = u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * qw_packed_index);
 		u8_weight2_src = u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * qw_packed_index + 1);
+		weights_to_copy = dt->weight_count;
 
-		weights_to_copy = it->num_weights;
+		trace_add_data("weight_x", dt->weight_x);
+		trace_add_data("weight_y", dt->weight_y);
+		trace_add_data("weight_z", dt->weight_z);
+		trace_add_data("weight_quant", weight_quant_mode);
 
 		// recompute the ideal color endpoints before storing them.
 		merge_endpoints(&(eix1[decimation_mode].ep), &(eix2[decimation_mode].ep), separate_component, &epm);
 
-		float4 rgbs_colors[4];
-		float4 rgbo_colors[4];
+		vfloat4 rgbs_colors[4];
+		vfloat4 rgbo_colors[4];
 
+		// TODO: Ping-pong between two buffers and make this zero copy
+		symbolic_compressed_block workscb;
 		for (int l = 0; l < max_refinement_iters; l++)
 		{
-			recompute_ideal_colors(
-			    weight_quantization_mode, &epm, rgbs_colors, rgbo_colors,
-			    u8_weight1_src, u8_weight2_src, separate_component, pi, it, blk, ewb);
+			recompute_ideal_colors_2planes(
+			    weight_quant_mode, &epm, rgbs_colors, rgbo_colors,
+			    u8_weight1_src, u8_weight2_src, separate_component, pt, dt, blk, ewb);
 
 			// store the colors for the block
 			for (int j = 0; j < partition_count; j++)
 			{
-				scb->color_formats[j] = pack_color_endpoints(
-				                            epm.endpt0[j], epm.endpt1[j],
+				workscb.color_formats[j] = pack_color_endpoints(
+				                            epm.endpt0[j],
+				                            epm.endpt1[j],
 				                            rgbs_colors[j], rgbo_colors[j],
 				                            partition_format_specifiers[i][j],
-				                            scb->color_values[j],
-				                            color_quantization_level[i]);
+				                            workscb.color_values[j],
+				                            color_quant_level[i]);
 			}
-			scb->color_formats_matched = 0;
 
-			if ((partition_count >= 2 && scb->color_formats[0] == scb->color_formats[1]
-				&& color_quantization_level[i] != color_quantization_level_mod[i])
-				&& (partition_count == 2 || (scb->color_formats[0] == scb->color_formats[2] && (partition_count == 3 || (scb->color_formats[0] == scb->color_formats[3])))))
+			workscb.color_formats_matched = 0;
+
+			if ((partition_count >= 2 && workscb.color_formats[0] == workscb.color_formats[1]
+			    && color_quant_level[i] != color_quant_level_mod[i])
+			    && (partition_count == 2 || (workscb.color_formats[0] == workscb.color_formats[2]
+			    && (partition_count == 3 || (workscb.color_formats[0] == workscb.color_formats[3])))))
 			{
 				int colorvals[4][12];
-				int color_formats_mod[4] = { 0 };
+				int color_formats_mod[4] { 0 };
 				for (int j = 0; j < partition_count; j++)
 				{
 					color_formats_mod[j] = pack_color_endpoints(
-					                           epm.endpt0[j], epm.endpt1[j],
-					                           rgbs_colors[j], rgbo_colors[j],
-					                           partition_format_specifiers[i][j],
-					                           colorvals[j],
-					                           color_quantization_level_mod[i]);
+					    epm.endpt0[j],
+					    epm.endpt1[j],
+					    rgbs_colors[j],
+					    rgbo_colors[j],
+					    partition_format_specifiers[i][j],
+					    colorvals[j],
+					    color_quant_level_mod[i]);
 				}
 
 				if (color_formats_mod[0] == color_formats_mod[1]
-					&& (partition_count == 2 || (color_formats_mod[0] == color_formats_mod[2] && (partition_count == 3 || (color_formats_mod[0] == color_formats_mod[3])))))
+				    && (partition_count == 2 || (color_formats_mod[0] == color_formats_mod[2]
+				    && (partition_count == 3 || (color_formats_mod[0] == color_formats_mod[3])))))
 				{
-					scb->color_formats_matched = 1;
+					workscb.color_formats_matched = 1;
 					for (int j = 0; j < 4; j++)
 					{
 						for (int k = 0; k < 12; k++)
 						{
-							scb->color_values[j][k] = colorvals[j][k];
+							workscb.color_values[j][k] = colorvals[j][k];
 						}
 					}
 
 					for (int j = 0; j < 4; j++)
 					{
-						scb->color_formats[j] = color_formats_mod[j];
+						workscb.color_formats[j] = color_formats_mod[j];
 					}
 				}
 			}
 
 			// store header fields
-			scb->partition_count = partition_count;
-			scb->partition_index = partition_index;
-			scb->color_quantization_level = scb->color_formats_matched ? color_quantization_level_mod[i] : color_quantization_level[i];
-			scb->block_mode = qw_bm.mode_index;
-			scb->plane2_color_component = separate_component;
-			scb->error_block = 0;
-
-			if (scb->color_quantization_level < 4)
+			workscb.partition_count = partition_count;
+			workscb.partition_index = partition_index;
+			workscb.color_quant_level = workscb.color_formats_matched ? color_quant_level_mod[i] : color_quant_level[i];
+			workscb.block_mode = qw_bm.mode_index;
+			workscb.plane2_color_component = separate_component;
+			workscb.error_block = 0;
+
+			if (workscb.color_quant_level < 4)
 			{
-				scb->error_block = 1;	// should never happen, but cannot prove it impossible
+				workscb.error_block = 1;	// should never happen, but cannot prove it impossible
+			}
+
+			// Pre-realign test
+			if (l == 0)
+			{
+				for (int j = 0; j < weights_to_copy; j++)
+				{
+					workscb.weights[j] = u8_weight1_src[j];
+					workscb.weights[j + PLANE2_WEIGHTS_OFFSET] = u8_weight2_src[j];
+				}
+
+				float errorval = compute_symbolic_block_difference(config, bsd, &workscb, blk, ewb);
+				if (errorval == -1e30f)
+				{
+					errorval = -errorval;
+					workscb.error_block = 1;
+				}
+
+
+				trace_add_data("error_prerealign", errorval);
+				best_errorval_in_mode = astc::min(errorval, best_errorval_in_mode);
+
+				// Average refinement improvement is 3.5% per iteration
+				// (allow 5%), but the first iteration can help more so we give
+				// it a extra 10% leeway. Use this knowledge to drive a
+				// heuristic to skip blocks that are unlikely to catch up with
+				// the best block we have already.
+				int iters_remaining = max_refinement_iters - l;
+				float threshold = (0.05f * static_cast<float>(iters_remaining)) + 1.1f;
+				if (errorval > (threshold * best_errorval_in_scb))
+				{
+					break;
+				}
+
+				if (errorval < best_errorval_in_scb)
+				{
+					best_errorval_in_scb = errorval;
+					workscb.errorval = errorval;
+					scb = workscb;
+
+					if (errorval < tune_errorval_threshold)
+					{
+						return errorval;
+					}
+				}
 			}
 
+			// perform a final pass over the weights to try to improve them.
 			int adjustments = realign_weights(
-				decode_mode, bsd, blk, ewb, scb, u8_weight1_src, u8_weight2_src);
+			    config.profile, bsd, blk, ewb, &workscb,
+			    u8_weight1_src, u8_weight2_src);
 
-			if (adjustments == 0)
+			// Post-realign test
+			for (int j = 0; j < weights_to_copy; j++)
+			{
+				workscb.weights[j] = u8_weight1_src[j];
+				workscb.weights[j + PLANE2_WEIGHTS_OFFSET] = u8_weight2_src[j];
+			}
+
+			float errorval = compute_symbolic_block_difference(config, bsd, &workscb, blk, ewb);
+			if (errorval == -1e30f)
+			{
+				errorval = -errorval;
+				workscb.error_block = 1;
+			}
+
+			trace_add_data("error_postrealign", errorval);
+			best_errorval_in_mode = astc::min(errorval, best_errorval_in_mode);
+
+			// Average refinement improvement is 3.5% per iteration, so skip
+			// blocks that are unlikely to catch up with the best block we
+			// have already. Assume a 5% per step to give benefit of the doubt
+			int iters_remaining = max_refinement_iters - 1 - l;
+			float threshold = (0.05f * static_cast<float>(iters_remaining)) + 1.0f;
+			if (errorval > (threshold * best_errorval_in_scb))
 			{
 				break;
 			}
-		}
 
-		for (int j = 0; j < weights_to_copy; j++)
-		{
-			scb->plane1_weights[j] = u8_weight1_src[j];
-			scb->plane2_weights[j] = u8_weight2_src[j];
-		}
+			if (errorval < best_errorval_in_scb)
+			{
+				best_errorval_in_scb = errorval;
+				workscb.errorval = errorval;
+				scb = workscb;
 
-		scb++;
+				if (errorval < tune_errorval_threshold)
+				{
+					return errorval;
+				}
+			}
+
+			if (adjustments == 0)
+			{
+				break;
+			}
+		}
 	}
+
+	return best_errorval_in_mode;
 }
 
 void expand_deblock_weights(
@@ -821,9 +977,9 @@ void expand_deblock_weights(
 	unsigned int ydim = ctx.config.block_y;
 	unsigned int zdim = ctx.config.block_z;
 
-	float centerpos_x = (xdim - 1) * 0.5f;
-	float centerpos_y = (ydim - 1) * 0.5f;
-	float centerpos_z = (zdim - 1) * 0.5f;
+	float centerpos_x = static_cast<float>(xdim - 1) * 0.5f;
+	float centerpos_y = static_cast<float>(ydim - 1) * 0.5f;
+	float centerpos_z = static_cast<float>(zdim - 1) * 0.5f;
 	float *bef = ctx.deblock_weights;
 
 	for (unsigned int z = 0; z < zdim; z++)
@@ -832,13 +988,13 @@ void expand_deblock_weights(
 		{
 			for (unsigned int x = 0; x < xdim; x++)
 			{
-				float xdif = (x - centerpos_x) / xdim;
-				float ydif = (y - centerpos_y) / ydim;
-				float zdif = (z - centerpos_z) / zdim;
+				float xdif = (static_cast<float>(x) - centerpos_x) / static_cast<float>(xdim);
+				float ydif = (static_cast<float>(y) - centerpos_y) / static_cast<float>(ydim);
+				float zdif = (static_cast<float>(z) - centerpos_z) / static_cast<float>(zdim);
 
 				float wdif = 0.36f;
 				float dist = astc::sqrt(xdif * xdif + ydif * ydif + zdif * zdif + wdif * wdif);
-				*bef = powf(dist, ctx.config.b_deblock_weight);
+				*bef = astc::pow(dist, ctx.config.b_deblock_weight);
 				bef++;
 			}
 		}
@@ -859,14 +1015,12 @@ static float prepare_error_weight_block(
 		ctx.config.v_rgb_mean != 0.0f || ctx.config.v_rgb_stdev != 0.0f || \
 		ctx.config.v_a_mean != 0.0f || ctx.config.v_a_stdev != 0.0f;
 
-	float4 derv[MAX_TEXELS_PER_BLOCK];
+	vfloat4 derv[MAX_TEXELS_PER_BLOCK];
 	imageblock_initialize_deriv(blk, bsd->texel_count, derv);
-	float4 color_weights = float4(ctx.config.cw_r_weight,
-	                              ctx.config.cw_g_weight,
-	                              ctx.config.cw_b_weight,
-	                              ctx.config.cw_a_weight);
-
-	ewb->contains_zeroweight_texels = 0;
+	vfloat4 color_weights(ctx.config.cw_r_weight,
+	                      ctx.config.cw_g_weight,
+	                      ctx.config.cw_b_weight,
+	                      ctx.config.cw_a_weight);
 
 	for (int z = 0; z < bsd->zdim; z++)
 	{
@@ -880,70 +1034,49 @@ static float prepare_error_weight_block(
 
 				if (xpos >= input_image.dim_x || ypos >= input_image.dim_y || zpos >= input_image.dim_z)
 				{
-					float4 weights = float4(1e-11f);
-					ewb->error_weights[idx] = weights;
-					ewb->contains_zeroweight_texels = 1;
+					ewb->error_weights[idx] = vfloat4(1e-11f);
 				}
 				else
 				{
-					float4 error_weight = float4(ctx.config.v_rgb_base,
-					                             ctx.config.v_rgb_base,
-					                             ctx.config.v_rgb_base,
-					                             ctx.config.v_a_base);
+					vfloat4 error_weight(ctx.config.v_rgb_base,
+					                     ctx.config.v_rgb_base,
+					                     ctx.config.v_rgb_base,
+					                     ctx.config.v_a_base);
 
 					int ydt = input_image.dim_x;
 					int zdt = input_image.dim_x * input_image.dim_y;
 
 					if (any_mean_stdev_weight)
 					{
-						float4 avg = ctx.input_averages[zpos * zdt + ypos * ydt + xpos];
-						if (avg.r < 6e-5f)
-							avg.r = 6e-5f;
-						if (avg.g < 6e-5f)
-							avg.g = 6e-5f;
-						if (avg.b < 6e-5f)
-							avg.b = 6e-5f;
-						if (avg.a < 6e-5f)
-							avg.a = 6e-5f;
-
+						vfloat4 avg = ctx.input_averages[zpos * zdt + ypos * ydt + xpos];
+						avg = max(avg, 6e-5f);
 						avg = avg * avg;
 
-						float4 variance = ctx.input_variances[zpos * zdt + ypos * ydt + xpos];
+						vfloat4 variance = ctx.input_variances[zpos * zdt + ypos * ydt + xpos];
 						variance = variance * variance;
 
-						float favg = (avg.r + avg.g + avg.b) * (1.0f / 3.0f);
-						float fvar = (variance.r + variance.g + variance.b) * (1.0f / 3.0f);
+						float favg = hadd_rgb_s(avg) * (1.0f / 3.0f);
+						float fvar = hadd_rgb_s(variance) * (1.0f / 3.0f);
 
 						float mixing = ctx.config.v_rgba_mean_stdev_mix;
-						avg.r = favg * mixing + avg.r * (1.0f - mixing);
-						avg.g = favg * mixing + avg.g * (1.0f - mixing);
-						avg.b = favg * mixing + avg.b * (1.0f - mixing);
+						avg.set_lane<0>(favg * mixing + avg.lane<0>() * (1.0f - mixing));
+						avg.set_lane<1>(favg * mixing + avg.lane<1>() * (1.0f - mixing));
+						avg.set_lane<2>(favg * mixing + avg.lane<2>() * (1.0f - mixing));
 
-						variance.r = fvar * mixing + variance.r * (1.0f - mixing);
-						variance.g = fvar * mixing + variance.g * (1.0f - mixing);
-						variance.b = fvar * mixing + variance.b * (1.0f - mixing);
+						variance.set_lane<0>(fvar * mixing + variance.lane<0>() * (1.0f - mixing));
+						variance.set_lane<1>(fvar * mixing + variance.lane<1>() * (1.0f - mixing));
+						variance.set_lane<2>(fvar * mixing + variance.lane<2>() * (1.0f - mixing));
 
-						float4 stdev = float4(astc::sqrt(MAX(variance.r, 0.0f)),
-						                      astc::sqrt(MAX(variance.g, 0.0f)),
-						                      astc::sqrt(MAX(variance.b, 0.0f)),
-						                      astc::sqrt(MAX(variance.a, 0.0f)));
+						vfloat4 stdev = sqrt(max(variance, 0.0f));
 
-						avg.r *= ctx.config.v_rgb_mean;
-						avg.g *= ctx.config.v_rgb_mean;
-						avg.b *= ctx.config.v_rgb_mean;
-						avg.a *= ctx.config.v_a_mean;
+						vfloat4 scalea(ctx.config.v_rgb_mean, ctx.config.v_rgb_mean, ctx.config.v_rgb_mean, ctx.config.v_a_mean);
+						avg = avg * scalea;
 
-						stdev.r *= ctx.config.v_rgb_stdev;
-						stdev.g *= ctx.config.v_rgb_stdev;
-						stdev.b *= ctx.config.v_rgb_stdev;
-						stdev.a *= ctx.config.v_a_stdev;
+						vfloat4 scales(ctx.config.v_rgb_stdev, ctx.config.v_rgb_stdev, ctx.config.v_rgb_stdev, ctx.config.v_a_stdev);
+						stdev = stdev * scales;
 
 						error_weight = error_weight + avg + stdev;
-
-						error_weight = float4(1.0f / error_weight.r,
-						                      1.0f / error_weight.g,
-						                      1.0f / error_weight.b,
-						                      1.0f / error_weight.a);
+						error_weight = 1.0f / error_weight;
 					}
 
 					if (ctx.config.flags & ASTCENC_FLG_MAP_NORMAL)
@@ -953,11 +1086,10 @@ static float prepare_error_weight_block(
 						float yN = ((blk->data_a[idx] * (1.0f / 65535.0f)) - 0.5f) * 2.0f;
 
 						float denom = 1.0f - xN * xN - yN * yN;
-						if (denom < 0.1f)
-							denom = 0.1f;
+						denom = astc::max(denom, 0.1f);
 						denom = 1.0f / denom;
-						error_weight.r *= 1.0f + xN * xN * denom;
-						error_weight.a *= 1.0f + yN * yN * denom;
+						error_weight.set_lane<0>(error_weight.lane<0>() * (1.0f + xN * xN * denom));
+						error_weight.set_lane<3>(error_weight.lane<3>() * (1.0f + yN * yN * denom));
 					}
 
 					if (ctx.config.flags & ASTCENC_FLG_USE_ALPHA_WEIGHT)
@@ -972,15 +1104,12 @@ static float prepare_error_weight_block(
 							alpha_scale = blk->data_a[idx] * (1.0f / 65535.0f);
 						}
 
-						if (alpha_scale < 0.0001f)
-						{
-							alpha_scale = 0.0001f;
-						}
+						alpha_scale = astc::max(alpha_scale, 0.0001f);
 
 						alpha_scale *= alpha_scale;
-						error_weight.r *= alpha_scale;
-						error_weight.g *= alpha_scale;
-						error_weight.b *= alpha_scale;
+						error_weight.set_lane<0>(error_weight.lane<0>() * alpha_scale);
+						error_weight.set_lane<1>(error_weight.lane<1>() * alpha_scale);
+						error_weight.set_lane<2>(error_weight.lane<2>() * alpha_scale);
 					}
 
 					error_weight = error_weight * color_weights;
@@ -994,47 +1123,44 @@ static float prepare_error_weight_block(
 					// which is equivalent to dividing by the derivative of the transfer
 					// function.
 
-					error_weight.r /= (derv[idx].r * derv[idx].r * 1e-10f);
-					error_weight.g /= (derv[idx].g * derv[idx].g * 1e-10f);
-					error_weight.b /= (derv[idx].b * derv[idx].b * 1e-10f);
-					error_weight.a /= (derv[idx].a * derv[idx].a * 1e-10f);
-
+					error_weight = error_weight / (derv[idx] * derv[idx] * 1e-10f);
 					ewb->error_weights[idx] = error_weight;
-					if (dot(error_weight, float4(1.0f, 1.0f, 1.0f, 1.0f)) < 1e-10f)
-					{
-						ewb->contains_zeroweight_texels = 1;
-					}
 				}
 				idx++;
 			}
 		}
 	}
 
-	float4 error_weight_sum = float4(0.0f, 0.0f, 0.0f, 0.0f);
+	vfloat4 error_weight_sum = vfloat4::zero();
 	int texels_per_block = bsd->texel_count;
 	for (int i = 0; i < texels_per_block; i++)
 	{
 		error_weight_sum = error_weight_sum + ewb->error_weights[i];
 
-		ewb->texel_weight_r[i] = ewb->error_weights[i].r;
-		ewb->texel_weight_g[i] = ewb->error_weights[i].g;
-		ewb->texel_weight_b[i] = ewb->error_weights[i].b;
-		ewb->texel_weight_a[i] = ewb->error_weights[i].a;
+		float wr = ewb->error_weights[i].lane<0>();
+		float wg = ewb->error_weights[i].lane<1>();
+		float wb = ewb->error_weights[i].lane<2>();
+		float wa = ewb->error_weights[i].lane<3>();
 
-		ewb->texel_weight_rg[i] = (ewb->error_weights[i].r + ewb->error_weights[i].g) * 0.5f;
-		ewb->texel_weight_rb[i] = (ewb->error_weights[i].r + ewb->error_weights[i].b) * 0.5f;
-		ewb->texel_weight_gb[i] = (ewb->error_weights[i].g + ewb->error_weights[i].b) * 0.5f;
-		ewb->texel_weight_ra[i] = (ewb->error_weights[i].r + ewb->error_weights[i].a) * 0.5f;
+		ewb->texel_weight_r[i] = wr;
+		ewb->texel_weight_g[i] = wg;
+		ewb->texel_weight_b[i] = wb;
+		ewb->texel_weight_a[i] = wa;
 
-		ewb->texel_weight_gba[i] = (ewb->error_weights[i].g + ewb->error_weights[i].b + ewb->error_weights[i].a) * 0.333333f;
-		ewb->texel_weight_rba[i] = (ewb->error_weights[i].r + ewb->error_weights[i].b + ewb->error_weights[i].a) * 0.333333f;
-		ewb->texel_weight_rga[i] = (ewb->error_weights[i].r + ewb->error_weights[i].g + ewb->error_weights[i].a) * 0.333333f;
-		ewb->texel_weight_rgb[i] = (ewb->error_weights[i].r + ewb->error_weights[i].g + ewb->error_weights[i].b) * 0.333333f;
+		ewb->texel_weight_rg[i] = (wr + wg) * 0.5f;
+		ewb->texel_weight_rb[i] = (wr + wb) * 0.5f;
+		ewb->texel_weight_gb[i] = (wg + wb) * 0.5f;
+		ewb->texel_weight_ra[i] = (wr + wa) * 0.5f;
 
-		ewb->texel_weight[i] = (ewb->error_weights[i].r + ewb->error_weights[i].g + ewb->error_weights[i].b + ewb->error_weights[i].a) * 0.25f;
+		ewb->texel_weight_gba[i] = (wg + wb + wa) * 0.333333f;
+		ewb->texel_weight_rba[i] = (wr + wb + wa) * 0.333333f;
+		ewb->texel_weight_rga[i] = (wr + wg + wa) * 0.333333f;
+		ewb->texel_weight_rgb[i] = (wr + wg + wb) * 0.333333f;
+
+		ewb->texel_weight[i] = (wr + wg + wb + wa) * 0.25f;
 	}
 
-	return dot(error_weight_sum, float4(1.0f, 1.0f, 1.0f, 1.0f));
+	return hadd_s(error_weight_sum);
 }
 
 static float prepare_block_statistics(
@@ -1096,7 +1222,7 @@ static float prepare_block_statistics(
 		aa_var += a * aw;
 	}
 
-	float rpt = 1.0f / MAX(weight_sum, 1e-7f);
+	float rpt = 1.0f / astc::max(weight_sum, 1e-7f);
 
 	rr_var -= rs * (rs * rpt);
 	rg_cov -= gs * (rs * rpt);
@@ -1112,12 +1238,12 @@ static float prepare_block_statistics(
 
 	aa_var -= as * (as * rpt);
 
-	rg_cov *= astc::rsqrt(MAX(rr_var * gg_var, 1e-30f));
-	rb_cov *= astc::rsqrt(MAX(rr_var * bb_var, 1e-30f));
-	ra_cov *= astc::rsqrt(MAX(rr_var * aa_var, 1e-30f));
-	gb_cov *= astc::rsqrt(MAX(gg_var * bb_var, 1e-30f));
-	ga_cov *= astc::rsqrt(MAX(gg_var * aa_var, 1e-30f));
-	ba_cov *= astc::rsqrt(MAX(bb_var * aa_var, 1e-30f));
+	rg_cov *= astc::rsqrt(astc::max(rr_var * gg_var, 1e-30f));
+	rb_cov *= astc::rsqrt(astc::max(rr_var * bb_var, 1e-30f));
+	ra_cov *= astc::rsqrt(astc::max(rr_var * aa_var, 1e-30f));
+	gb_cov *= astc::rsqrt(astc::max(gg_var * bb_var, 1e-30f));
+	ga_cov *= astc::rsqrt(astc::max(gg_var * aa_var, 1e-30f));
+	ba_cov *= astc::rsqrt(astc::max(bb_var * aa_var, 1e-30f));
 
 	if (astc::isnan(rg_cov)) rg_cov = 1.0f;
 	if (astc::isnan(rb_cov)) rb_cov = 1.0f;
@@ -1126,11 +1252,28 @@ static float prepare_block_statistics(
 	if (astc::isnan(ga_cov)) ga_cov = 1.0f;
 	if (astc::isnan(ba_cov)) ba_cov = 1.0f;
 
-	float lowest_correlation = MIN(fabsf(rg_cov), fabsf(rb_cov));
-	lowest_correlation = MIN(lowest_correlation, fabsf(ra_cov));
-	lowest_correlation = MIN(lowest_correlation, fabsf(gb_cov));
-	lowest_correlation = MIN(lowest_correlation, fabsf(ga_cov));
-	lowest_correlation = MIN(lowest_correlation, fabsf(ba_cov));
+	float lowest_correlation = astc::min(fabsf(rg_cov), fabsf(rb_cov));
+	lowest_correlation       = astc::min(lowest_correlation, fabsf(ra_cov));
+	lowest_correlation       = astc::min(lowest_correlation, fabsf(gb_cov));
+	lowest_correlation       = astc::min(lowest_correlation, fabsf(ga_cov));
+	lowest_correlation       = astc::min(lowest_correlation, fabsf(ba_cov));
+
+	// Diagnostic trace points
+	trace_add_data("min_r", blk->data_min.lane<0>());
+	trace_add_data("max_r", blk->data_max.lane<0>());
+	trace_add_data("min_g", blk->data_min.lane<1>());
+	trace_add_data("max_g", blk->data_max.lane<1>());
+	trace_add_data("min_b", blk->data_min.lane<2>());
+	trace_add_data("max_b", blk->data_max.lane<2>());
+	trace_add_data("min_a", blk->data_min.lane<3>());
+	trace_add_data("max_a", blk->data_max.lane<3>());
+	trace_add_data("cov_rg", fabsf(rg_cov));
+	trace_add_data("cov_rb", fabsf(rb_cov));
+	trace_add_data("cov_ra", fabsf(ra_cov));
+	trace_add_data("cov_gb", fabsf(gb_cov));
+	trace_add_data("cov_ga", fabsf(ga_cov));
+	trace_add_data("cov_ba", fabsf(ba_cov));
+
 	return lowest_correlation;
 }
 
@@ -1143,71 +1286,92 @@ void compress_block(
 	compress_symbolic_block_buffers* tmpbuf)
 {
 	astcenc_profile decode_mode = ctx.config.profile;
+	error_weight_block *ewb = &tmpbuf->ewb;
 	const block_size_descriptor* bsd = ctx.bsd;
+	float lowest_correl;
+
+	TRACE_NODE(node0, "block");
+	trace_add_data("pos_x", blk->xpos);
+	trace_add_data("pos_y", blk->ypos);
+	trace_add_data("pos_z", blk->zpos);
+
+	// Set stricter block targets for luminance data as we have more bits to
+	// play with - fewer endpoints and never need a second weight plane
+	bool block_is_l = imageblock_is_lum(blk);
+	float block_is_l_scale = block_is_l ? 1.0f / 1.5f : 1.0f;
+
+	// Set slightly stricter block targets for lumalpha data as we have more
+	// bits to play with - fewer endpoints but may use a second weight plane
+	bool block_is_la = imageblock_is_lumalp(blk);
+	float block_is_la_scale = block_is_la ? 1.0f / 1.05f : 1.0f;
+
+	bool block_skip_two_plane = false;
+
+	// Default max partition, but +1 if only have 1 or 2 active components
+	int max_partitions = ctx.config.tune_partition_count_limit;
+	if (block_is_l || block_is_la)
+	{
+		max_partitions = astc::min(max_partitions + 1, 4);
+	}
+
+
+#if defined(ASTCENC_DIAGNOSTICS)
+	// Do this early in diagnostic builds so we can dump uniform metrics
+	// for every block. Do it later in release builds to avoid redundant work!
+	float error_weight_sum = prepare_error_weight_block(ctx, input_image, bsd, blk, ewb);
+	float error_threshold = ctx.config.tune_db_limit
+	                      * error_weight_sum
+	                      * block_is_l_scale
+	                      * block_is_la_scale;
+
+	lowest_correl = prepare_block_statistics(bsd->texel_count, blk, ewb);
+
+	trace_add_data("tune_error_threshold", error_threshold);
+#endif
 
-	if (blk->red_min == blk->red_max && blk->green_min == blk->green_max && blk->blue_min == blk->blue_max && blk->alpha_min == blk->alpha_max)
+	if (all(blk->data_min == blk->data_max))
 	{
+		TRACE_NODE(node1, "pass");
+		trace_add_data("partition_count", 0);
+		trace_add_data("plane_count", 1);
+
 		// detected a constant-color block. Encode as FP16 if using HDR
 		scb.error_block = 0;
+		scb.partition_count = 0;
 
 		if ((decode_mode == ASTCENC_PRF_HDR) ||
 		    (decode_mode == ASTCENC_PRF_HDR_RGB_LDR_A))
 		{
 			scb.block_mode = -1;
-			scb.partition_count = 0;
-			float4 orig_color = blk->origin_texel;
-			scb.constant_color[0] = float_to_sf16(orig_color.r, SF_NEARESTEVEN);
-			scb.constant_color[1] = float_to_sf16(orig_color.g, SF_NEARESTEVEN);
-			scb.constant_color[2] = float_to_sf16(orig_color.b, SF_NEARESTEVEN);
-			scb.constant_color[3] = float_to_sf16(orig_color.a, SF_NEARESTEVEN);
+			vint4 color_f16 = float_to_float16(blk->origin_texel);
+			store(color_f16, scb.constant_color);
 		}
 		else
 		{
 			// Encode as UNORM16 if NOT using HDR.
 			scb.block_mode = -2;
-			scb.partition_count = 0;
-			float4 orig_color = blk->origin_texel;
-			float red   = orig_color.r;
-			float green = orig_color.g;
-			float blue  = orig_color.b;
-			float alpha = orig_color.a;
-
-			if (red < 0)
-				red = 0;
-			else if (red > 1)
-				red = 1;
-
-			if (green < 0)
-				green = 0;
-			else if (green > 1)
-				green = 1;
-
-			if (blue < 0)
-				blue = 0;
-			else if (blue > 1)
-				blue = 1;
-
-			if (alpha < 0)
-				alpha = 0;
-			else if (alpha > 1)
-				alpha = 1;
-
-			scb.constant_color[0] = astc::flt2int_rtn(red * 65535.0f);
-			scb.constant_color[1] = astc::flt2int_rtn(green * 65535.0f);
-			scb.constant_color[2] = astc::flt2int_rtn(blue * 65535.0f);
-			scb.constant_color[3] = astc::flt2int_rtn(alpha * 65535.0f);
+			vfloat4 color_f32 = clamp(0.0f, 1.0f, blk->origin_texel) * 65535.0f;
+			vint4 color_u16 = float_to_int_rtn(color_f32);
+			store(color_u16, scb.constant_color);
 		}
 
+		trace_add_data("exit", "quality hit");
+
 		symbolic_to_physical(*bsd, scb, pcb);
 		return;
 	}
 
-	error_weight_block *ewb = &tmpbuf->ewb;
+#if !defined(ASTCENC_DIAGNOSTICS)
 	float error_weight_sum = prepare_error_weight_block(ctx, input_image, bsd, blk, ewb);
+	float error_threshold = ctx.config.tune_db_limit
+	                      * error_weight_sum
+	                      * block_is_l_scale
+	                      * block_is_la_scale;
+#endif
 
-	symbolic_compressed_block *tempblocks = tmpbuf->tempblocks;
-
-	float error_of_best_block = 1e20f;
+	// Set SCB and mode errors to a very high error value
+	scb.errorval = 1e30f;
+	scb.error_block = 1;
 
 	float best_errorvals_in_modes[13];
 	for (int i = 0; i < 13; i++)
@@ -1217,8 +1381,6 @@ void compress_block(
 
 	int uses_alpha = imageblock_uses_alpha(blk);
 
-	float mode_cutoff = ctx.config.tune_block_mode_limit / 100.0f;
-
 	// Trial using 1 plane of weights and 1 partition.
 
 	// Most of the time we test it twice, first with a mode cutoff of 0 and
@@ -1227,227 +1389,182 @@ void compress_block(
 	// disabled for 4x4 and 5x4 blocks where it nearly always slows down the
 	// compression and slightly reduces image quality.
 
-	float modecutoffs[2];
-	float errorval_mult[2] = { 2.5, 1 };
-	modecutoffs[0] = 0;
-	modecutoffs[1] = mode_cutoff;
+	float errorval_mult[2] = {
+		1.0f / ctx.config.tune_mode0_mse_overshoot,
+		1.0f
+	};
 
-	float lowest_correl;
-	float best_errorval_in_mode;
+	static const float errorval_overshoot = 1.0f / ctx.config.tune_refinement_mse_overshoot;
 
-	int start_trial = bsd->texel_count < TUNE_MAX_TEXELS_MODE0_FASTPATH ? 1 : 0;
+	int start_trial = bsd->texel_count < (int)TUNE_MAX_TEXELS_MODE0_FASTPATH ? 1 : 0;
 	for (int i = start_trial; i < 2; i++)
 	{
-		compress_symbolic_block_fixed_partition_1_plane(
-		    decode_mode, modecutoffs[i],
+		TRACE_NODE(node1, "pass");
+		trace_add_data("partition_count", 1);
+		trace_add_data("plane_count", 1);
+		trace_add_data("search_mode", i);
+
+		float errorval = compress_symbolic_block_fixed_partition_1_plane(
+		    ctx.config, i == 0,
 		    ctx.config.tune_candidate_limit,
+		    error_threshold * errorval_mult[i] * errorval_overshoot,
 		    ctx.config.tune_refinement_limit,
-		    bsd, 1, 0, blk, ewb, tempblocks, &tmpbuf->planes);
-
-		best_errorval_in_mode = 1e30f;
-		for (unsigned int j = 0; j < ctx.config.tune_candidate_limit; j++)
-		{
-			if (tempblocks[j].error_block)
-			{
-				continue;
-			}
-
-			float errorval = compute_symbolic_block_difference(decode_mode, bsd, tempblocks + j, blk, ewb);
-			errorval *= errorval_mult[i];
-			if (errorval < best_errorval_in_mode)
-			{
-				best_errorval_in_mode = errorval;
-			}
-
-			if (errorval < error_of_best_block)
-			{
-				error_of_best_block = errorval;
-				scb = tempblocks[j];
-			}
-		}
+		    bsd, 1, 0, blk, ewb, scb, &tmpbuf->planes);
 
 		// Mode 0
-		best_errorvals_in_modes[0] = best_errorval_in_mode;
-		if ((error_of_best_block / error_weight_sum) < ctx.config.tune_db_limit)
+		best_errorvals_in_modes[0] = errorval;
+		if (errorval < (error_threshold * errorval_mult[i]))
 		{
+			trace_add_data("exit", "quality hit");
 			goto END_OF_TESTS;
 		}
 	}
 
+#if !defined(ASTCENC_DIAGNOSTICS)
 	lowest_correl = prepare_block_statistics(bsd->texel_count, blk, ewb);
+#endif
+
+	block_skip_two_plane = lowest_correl > ctx.config.tune_two_plane_early_out_limit;
 
 	// next, test the four possible 1-partition, 2-planes modes
 	for (int i = 0; i < 4; i++)
 	{
+		TRACE_NODE(node1, "pass");
+		trace_add_data("partition_count", 1);
+		trace_add_data("plane_count", 2);
+		trace_add_data("plane_channel", i);
 
-		if (lowest_correl > ctx.config.tune_two_plane_early_out_limit)
+		if (block_skip_two_plane)
 		{
+			trace_add_data("skip", "tune_two_plane_early_out_limit");
 			continue;
 		}
 
 		if (blk->grayscale && i != 3)
 		{
+			trace_add_data("skip", "grayscale block");
 			continue;
 		}
 
 		if (!uses_alpha && i == 3)
 		{
+			trace_add_data("skip", "no alpha channel");
 			continue;
 		}
 
-		compress_symbolic_block_fixed_partition_2_planes(
-		    decode_mode, mode_cutoff,
+		float errorval = compress_symbolic_block_fixed_partition_2_planes(
+		    ctx.config, false,
 		    ctx.config.tune_candidate_limit,
+		    error_threshold * errorval_overshoot,
 		    ctx.config.tune_refinement_limit,
 		    bsd, 1,	// partition count
 		    0,	// partition index
 		    i,	// the color component to test a separate plane of weights for.
-		    blk, ewb, tempblocks, &tmpbuf->planes);
-
-		best_errorval_in_mode = 1e30f;
-		for (unsigned int j = 0; j < ctx.config.tune_candidate_limit; j++)
-		{
-			if (tempblocks[j].error_block)
-			{
-				continue;
-			}
-
-			float errorval = compute_symbolic_block_difference(decode_mode, bsd, tempblocks + j, blk, ewb);
-			if (errorval < best_errorval_in_mode)
-			{
-				best_errorval_in_mode = errorval;
-			}
-
-			if (errorval < error_of_best_block)
-			{
-				error_of_best_block = errorval;
-				scb = tempblocks[j];
-			}
+		    blk, ewb, scb, &tmpbuf->planes);
 
-			// Modes 1-4
-			best_errorvals_in_modes[i + 1] = best_errorval_in_mode;
-		}
-
-		if ((error_of_best_block / error_weight_sum) < ctx.config.tune_db_limit)
+		// Modes 7, 10 (13 is unreachable)
+		if (errorval < error_threshold)
 		{
+			trace_add_data("exit", "quality hit");
 			goto END_OF_TESTS;
 		}
 	}
 
 	// find best blocks for 2, 3 and 4 partitions
-	for (int partition_count = 2; partition_count <= 4; partition_count++)
+	for (int partition_count = 2; partition_count <= max_partitions; partition_count++)
 	{
-		int partition_indices_1plane[2];
-		int partition_index_2planes;
+		int partition_indices_1plane[2] { 0, 0 };
+		int partition_index_2planes = 0;
 
 		find_best_partitionings(bsd, blk, ewb, partition_count,
-		                        ctx.config.tune_partition_limit,
+		                        ctx.config.tune_partition_index_limit,
 		                        &(partition_indices_1plane[0]),
 		                        &(partition_indices_1plane[1]),
-		                        &partition_index_2planes);
+		                        block_skip_two_plane ? nullptr : &partition_index_2planes);
 
 		for (int i = 0; i < 2; i++)
 		{
-			compress_symbolic_block_fixed_partition_1_plane(
-			    decode_mode, mode_cutoff,
+			TRACE_NODE(node1, "pass");
+			trace_add_data("partition_count", partition_count);
+			trace_add_data("partition_index", partition_indices_1plane[i]);
+			trace_add_data("plane_count", 1);
+			trace_add_data("search_mode", i);
+
+			float errorval = compress_symbolic_block_fixed_partition_1_plane(
+			    ctx.config, false,
 			    ctx.config.tune_candidate_limit,
+			    error_threshold * errorval_overshoot,
 			    ctx.config.tune_refinement_limit,
 			    bsd, partition_count, partition_indices_1plane[i],
-			    blk, ewb, tempblocks, &tmpbuf->planes);
-
-			best_errorval_in_mode = 1e30f;
-			for (unsigned int j = 0; j < ctx.config.tune_candidate_limit; j++)
-			{
-				if (tempblocks[j].error_block)
-				{
-					continue;
-				}
-
-				float errorval = compute_symbolic_block_difference(decode_mode, bsd, tempblocks + j, blk, ewb);
-				if (errorval < best_errorval_in_mode)
-				{
-					best_errorval_in_mode = errorval;
-				}
-
-				if (errorval < error_of_best_block)
-				{
-					error_of_best_block = errorval;
-					scb = tempblocks[j];
-				}
-			}
+			    blk, ewb, scb, &tmpbuf->planes);
 
 			// Modes 5, 6, 8, 9, 11, 12
-			best_errorvals_in_modes[3 * (partition_count - 2) + 5 + i] = best_errorval_in_mode;
-
-			if ((error_of_best_block / error_weight_sum) < ctx.config.tune_db_limit)
+			best_errorvals_in_modes[3 * (partition_count - 2) + 5 + i] = errorval;
+			if (errorval < error_threshold)
 			{
+				trace_add_data("exit", "quality hit");
 				goto END_OF_TESTS;
 			}
 		}
 
-		if (partition_count == 2 && MIN(best_errorvals_in_modes[5], best_errorvals_in_modes[6]) > (best_errorvals_in_modes[0] * ctx.config.tune_partition_early_out_limit))
+		if (partition_count == 2 && astc::min(best_errorvals_in_modes[5], best_errorvals_in_modes[6]) > (best_errorvals_in_modes[0] * ctx.config.tune_partition_early_out_limit))
 		{
+			trace_add_data("skip", "tune_partition_early_out_limit 1");
 			goto END_OF_TESTS;
 		}
 
 		// Skip testing dual weight planes for:
 		// * 4 partitions (can't be encoded by the format)
-		// * Luminance only blocks (never need for a second plane)
-		// * Blocks with higher component correlation than the tuning cutoff
-		if ((partition_count == 4) ||
-		    (blk->grayscale && !uses_alpha) ||
-		    (lowest_correl > ctx.config.tune_two_plane_early_out_limit))
+		if (partition_count == 4)
 		{
 			continue;
 		}
 
-
-		if (lowest_correl <= ctx.config.tune_two_plane_early_out_limit)
+		// * Luminance only blocks (never need for a second plane)
+		if (blk->grayscale && !uses_alpha)
 		{
-			compress_symbolic_block_fixed_partition_2_planes(
-			    decode_mode,
-			    mode_cutoff,
-			    ctx.config.tune_candidate_limit,
-			    ctx.config.tune_refinement_limit,
-			    bsd,
-			    partition_count,
-			    partition_index_2planes & (PARTITION_COUNT - 1),
-			    partition_index_2planes >> PARTITION_BITS,
-			    blk, ewb, tempblocks, &tmpbuf->planes);
-
-			best_errorval_in_mode = 1e30f;
-			for (unsigned int j = 0; j < ctx.config.tune_candidate_limit; j++)
-			{
-				if (tempblocks[j].error_block)
-				{
-					continue;
-				}
-
-				float errorval = compute_symbolic_block_difference(decode_mode, bsd, tempblocks + j, blk, ewb);
-				if (errorval < best_errorval_in_mode)
-				{
-					best_errorval_in_mode = errorval;
-				}
+			trace_add_data("skip", "grayscale no alpha block ");
+			continue;
+		}
 
-				if (errorval < error_of_best_block)
-				{
-					error_of_best_block = errorval;
-					scb = tempblocks[j];
-				}
-			}
+		// * Blocks with higher component correlation than the tuning cutoff
+		if (block_skip_two_plane)
+		{
+			trace_add_data("skip", "tune_two_plane_early_out_limit");
+			continue;
+		}
 
-			// Modes 7, 10 (13 is unreachable)
-			best_errorvals_in_modes[3 * (partition_count - 2) + 5 + 2] = best_errorval_in_mode;
 
-			if ((error_of_best_block / error_weight_sum) < ctx.config.tune_db_limit)
-			{
-				goto END_OF_TESTS;
-			}
+		TRACE_NODE(node1, "pass");
+		trace_add_data("partition_count", partition_count);
+		trace_add_data("partition_index", partition_index_2planes & (PARTITION_COUNT - 1));
+		trace_add_data("plane_count", 2);
+		trace_add_data("plane_channel", partition_index_2planes >> PARTITION_BITS);
+
+		float errorval = compress_symbolic_block_fixed_partition_2_planes(
+			ctx.config,
+			false,
+			ctx.config.tune_candidate_limit,
+			error_threshold * errorval_overshoot,
+			ctx.config.tune_refinement_limit,
+			bsd,
+			partition_count,
+			partition_index_2planes & (PARTITION_COUNT - 1),
+			partition_index_2planes >> PARTITION_BITS,
+			blk, ewb, scb, &tmpbuf->planes);
+
+		// Modes 7, 10 (13 is unreachable)
+		if (errorval < error_threshold)
+		{
+			trace_add_data("exit", "quality hit");
+			goto END_OF_TESTS;
 		}
 	}
 
+	trace_add_data("exit", "quality not hit");
+
 END_OF_TESTS:
-	// compress/decompress to a physical block
+	// Compress to a physical block
 	symbolic_to_physical(*bsd, scb, pcb);
 }
 
diff --git a/libkram/astc-encoder/astcenc_compute_variance.cpp b/libkram/astc-encoder/astcenc_compute_variance.cpp
index ae7f1170..eee65704 100644
--- a/libkram/astc-encoder/astcenc_compute_variance.cpp
+++ b/libkram/astc-encoder/astcenc_compute_variance.cpp
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2020 Arm Limited
+// Copyright 2011-2021 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -34,8 +34,6 @@
 
 #include <cassert>
 
-#define USE_2DARRAY 1
-
 /**
  * @brief Generate a prefix-sum array using Brent-Kung algorithm.
  *
@@ -49,7 +47,7 @@
  * @param stride The item spacing in the array; i.e. dense arrays should use 1.
  */
 static void brent_kung_prefix_sum(
-	float4* d,
+	vfloat4* d,
 	size_t items,
 	int stride
 ) {
@@ -65,7 +63,7 @@ static void brent_kung_prefix_sum(
 		size_t start = lc_stride - 1;
 		size_t iters = items >> log2_stride;
 
-		float4 *da = d + (start * stride);
+		vfloat4 *da = d + (start * stride);
 		ptrdiff_t ofs = -(ptrdiff_t)(step * stride);
 		size_t ofs_stride = stride << log2_stride;
 
@@ -89,7 +87,7 @@ static void brent_kung_prefix_sum(
 		size_t start = step + lc_stride - 1;
 		size_t iters = (items - step) >> log2_stride;
 
-		float4 *da = d + (start * stride);
+		vfloat4 *da = d + (start * stride);
 		ptrdiff_t ofs = -(ptrdiff_t)(step * stride);
 		size_t ofs_stride = stride << log2_stride;
 
@@ -119,26 +117,26 @@ static void compute_pixel_region_variance(
 	float rgb_power = arg->rgb_power;
 	float alpha_power = arg->alpha_power;
 	astcenc_swizzle swz = arg->swz;
-	int have_z = arg->have_z;
+	bool have_z = arg->have_z;
 
-	int size_x = arg->size.r;
-	int size_y = arg->size.g;
-	int size_z = arg->size.b;
+	int size_x = arg->size_x;
+	int size_y = arg->size_y;
+	int size_z = arg->size_z;
 
-	int offset_x = arg->offset.r;
-	int offset_y = arg->offset.g;
-	int offset_z = arg->offset.b;
+	int offset_x = arg->offset_x;
+	int offset_y = arg->offset_y;
+	int offset_z = arg->offset_z;
 
 	int avg_var_kernel_radius = arg->avg_var_kernel_radius;
 	int alpha_kernel_radius = arg->alpha_kernel_radius;
 
 	float  *input_alpha_averages = ctx.input_alpha_averages;
-	float4 *input_averages = ctx.input_averages;
-	float4 *input_variances = ctx.input_variances;
-	float4 *work_memory = arg->work_memory;
+	vfloat4 *input_averages = ctx.input_averages;
+	vfloat4 *input_variances = ctx.input_variances;
+	vfloat4 *work_memory = arg->work_memory;
 
 	// Compute memory sizes and dimensions that we need
-	int kernel_radius = MAX(avg_var_kernel_radius, alpha_kernel_radius);
+	int kernel_radius = astc::max(avg_var_kernel_radius, alpha_kernel_radius);
 	int kerneldim = 2 * kernel_radius + 1;
 	int kernel_radius_xy = kernel_radius;
 	int kernel_radius_z = have_z ? kernel_radius : 0;
@@ -151,8 +149,8 @@ static void compute_pixel_region_variance(
 	int zd_start = have_z ? 1 : 0;
 	int are_powers_1 = (rgb_power == 1.0f) && (alpha_power == 1.0f);
 
-	float4 *varbuf1 = work_memory;
-	float4 *varbuf2 = work_memory + sizeprod;
+	vfloat4 *varbuf1 = work_memory;
+	vfloat4 *varbuf2 = work_memory + sizeprod;
 
 	// Scaling factors to apply to Y and Z for accesses into the work buffers
 	int yst = padsize_x;
@@ -166,18 +164,9 @@ static void compute_pixel_region_variance(
 	#define VARBUF1(z, y, x) varbuf1[z * zst + y * yst + x]
 	#define VARBUF2(z, y, x) varbuf2[z * zst + y * yst + x]
 
-    // True if any non-identity swizzle
-    bool needs_swz = (swz.r != ASTCENC_SWZ_R) || (swz.g != ASTCENC_SWZ_G) ||
-                     (swz.b != ASTCENC_SWZ_B) || (swz.a != ASTCENC_SWZ_A);
-
 	// Load N and N^2 values into the work buffers
 	if (img->data_type == ASTCENC_TYPE_U8)
 	{
-#if USE_2DARRAY
-        uint8_t* data8 = static_cast<uint8_t*>(img->data);
-#else
-		uint8_t*** data8 = static_cast<uint8_t***>(img->data);
-#endif
 		// Swizzle data structure 4 = ZERO, 5 = ONE
 		uint8_t data[6];
 		data[ASTCENC_SWZ_0] = 0;
@@ -187,6 +176,7 @@ static void compute_pixel_region_variance(
 		{
 			int z_src = (z - zd_start) + offset_z - kernel_radius_z;
 			z_src = astc::clamp(z_src, 0, (int)(img->dim_z - 1));
+			uint8_t* data8 = static_cast<uint8_t*>(img->data[z_src]);
 
 			for (int y = 1; y < padsize_y; y++)
 			{
@@ -198,48 +188,25 @@ static void compute_pixel_region_variance(
 					int x_src = (x - 1) + offset_x - kernel_radius_xy;
 					x_src = astc::clamp(x_src, 0, (int)(img->dim_x - 1));
 
-                    float4 d;
-#if USE_2DARRAY
-                    int px = (y_src * img->dim_x + x_src) * 4;
-                    
-                    uint8_t r = data8[px + 0];
-                    uint8_t g = data8[px + 1];
-                    uint8_t b = data8[px + 2];
-                    uint8_t a = data8[px + 3];
-                    
-                    if (needs_swz)
-                    {
-                        data[0] = r;
-                        data[1] = g;
-                        data[2] = b;
-                        data[3] = a;
-                        
-                        r = data[swz.r];
-                        g = data[swz.g];
-                        b = data[swz.b];
-                        a = data[swz.a];
-                    }
-#else
-					data[0] = data8[z_src][y_src][4 * x_src    ];
-					data[1] = data8[z_src][y_src][4 * x_src + 1];
-					data[2] = data8[z_src][y_src][4 * x_src + 2];
-					data[3] = data8[z_src][y_src][4 * x_src + 3];
-
-                    uint8_t r = data[swz.r];
-                    uint8_t g = data[swz.g];
-                    uint8_t b = data[swz.b];
-                    uint8_t a = data[swz.a];
-#endif
-                    // int to float conversion
-                    d = float4((float)r, (float)g, (float)b, float(a));
-                    d = d * (1.0f / 255.0f);
+					data[0] = data8[(4 * img->dim_x * y_src) + (4 * x_src    )];
+					data[1] = data8[(4 * img->dim_x * y_src) + (4 * x_src + 1)];
+					data[2] = data8[(4 * img->dim_x * y_src) + (4 * x_src + 2)];
+					data[3] = data8[(4 * img->dim_x * y_src) + (4 * x_src + 3)];
+
+					uint8_t r = data[swz.r];
+					uint8_t g = data[swz.g];
+					uint8_t b = data[swz.b];
+					uint8_t a = data[swz.a];
+
+					vfloat4 d = vfloat4 (r * (1.0f / 255.0f),
+					                     g * (1.0f / 255.0f),
+					                     b * (1.0f / 255.0f),
+					                     a * (1.0f / 255.0f));
 
 					if (!are_powers_1)
 					{
-						d.r = powf(MAX(d.r, 1e-6f), rgb_power);
-						d.g = powf(MAX(d.g, 1e-6f), rgb_power);
-						d.b = powf(MAX(d.b, 1e-6f), rgb_power);
-						d.a = powf(MAX(d.a, 1e-6f), alpha_power);
+						vfloat4 exp(rgb_power, rgb_power, rgb_power, alpha_power);
+						d = pow(max(d, 1e-6f), exp);
 					}
 
 					VARBUF1(z, y, x) = d;
@@ -250,9 +217,6 @@ static void compute_pixel_region_variance(
 	}
 	else if (img->data_type == ASTCENC_TYPE_F16)
 	{
-// TODO: apply USE_2DARRAY to FP16 inputs
-		uint16_t*** data16 = static_cast<uint16_t***>(img->data);
-
 		// Swizzle data structure 4 = ZERO, 5 = ONE (in FP16)
 		uint16_t data[6];
 		data[ASTCENC_SWZ_0] = 0;
@@ -262,6 +226,7 @@ static void compute_pixel_region_variance(
 		{
 			int z_src = (z - zd_start) + offset_z - kernel_radius_z;
 			z_src = astc::clamp(z_src, 0, (int)(img->dim_z - 1));
+			uint16_t* data16 = static_cast<uint16_t*>(img->data[z_src]);
 
 			for (int y = 1; y < padsize_y; y++)
 			{
@@ -273,27 +238,18 @@ static void compute_pixel_region_variance(
 					int x_src = (x - 1) + offset_x - kernel_radius_xy;
 					x_src = astc::clamp(x_src, 0, (int)(img->dim_x - 1));
 
-					data[0] = data16[z_src][y_src][4 * x_src    ];
-					data[1] = data16[z_src][y_src][4 * x_src + 1];
-					data[2] = data16[z_src][y_src][4 * x_src + 2];
-					data[3] = data16[z_src][y_src][4 * x_src + 3];
-
-					uint16_t r = data[swz.r];
-					uint16_t g = data[swz.g];
-					uint16_t b = data[swz.b];
-					uint16_t a = data[swz.a];
+					data[0] = data16[(4 * img->dim_x * y_src) + (4 * x_src    )];
+					data[1] = data16[(4 * img->dim_x * y_src) + (4 * x_src + 1)];
+					data[2] = data16[(4 * img->dim_x * y_src) + (4 * x_src + 2)];
+					data[3] = data16[(4 * img->dim_x * y_src) + (4 * x_src + 3)];
 
-					float4 d = float4(sf16_to_float(r),
-					                  sf16_to_float(g),
-					                  sf16_to_float(b),
-					                  sf16_to_float(a));
+					vint4 di(data[swz.r], data[swz.g], data[swz.b], data[swz.a]);
+					vfloat4 d = float16_to_float(di);
 
 					if (!are_powers_1)
 					{
-						d.r = powf(MAX(d.r, 1e-6f), rgb_power);
-						d.g = powf(MAX(d.g, 1e-6f), rgb_power);
-						d.b = powf(MAX(d.b, 1e-6f), rgb_power);
-						d.a = powf(MAX(d.a, 1e-6f), alpha_power);
+						vfloat4 exp(rgb_power, rgb_power, rgb_power, alpha_power);
+						d = pow(max(d, 1e-6f), exp);
 					}
 
 					VARBUF1(z, y, x) = d;
@@ -305,11 +261,7 @@ static void compute_pixel_region_variance(
 	else // if (img->data_type == ASTCENC_TYPE_F32)
 	{
 		assert(img->data_type == ASTCENC_TYPE_F32);
-#if USE_2DARRAY
-        float4* data32 = static_cast<float4*>(img->data);
-#else
-		float*** data32 = static_cast<float***>(img->data);
-#endif
+
 		// Swizzle data structure 4 = ZERO, 5 = ONE (in FP16)
 		float data[6];
 		data[ASTCENC_SWZ_0] = 0.0f;
@@ -319,6 +271,7 @@ static void compute_pixel_region_variance(
 		{
 			int z_src = (z - zd_start) + offset_z - kernel_radius_z;
 			z_src = astc::clamp(z_src, 0, (int)(img->dim_z - 1));
+			float* data32 = static_cast<float*>(img->data[z_src]);
 
 			for (int y = 1; y < padsize_y; y++)
 			{
@@ -330,44 +283,22 @@ static void compute_pixel_region_variance(
 					int x_src = (x - 1) + offset_x - kernel_radius_xy;
 					x_src = astc::clamp(x_src, 0, (int)(img->dim_x - 1));
 
-#if USE_2DARRAY
-                    assert(z_src == 0);
-                    float4 d = data32[y_src * img->dim_x + x_src];
-                    
-                    if (needs_swz)
-                    {
-                        data[0] = d.r;
-                        data[1] = d.g;
-                        data[2] = d.b;
-                        data[3] = d.a;
-                        
-                        float r = data[swz.r];
-                        float g = data[swz.g];
-                        float b = data[swz.b];
-                        float a = data[swz.a];
-                        
-                        d = float4(r,g,b,a);
-                    }
-#else
-					data[0] = data32[z_src][y_src][4 * x_src    ];
-					data[1] = data32[z_src][y_src][4 * x_src + 1];
-					data[2] = data32[z_src][y_src][4 * x_src + 2];
-					data[3] = data32[z_src][y_src][4 * x_src + 3];
-                    
+					data[0] = data32[(4 * img->dim_x * y_src) + (4 * x_src    )];
+					data[1] = data32[(4 * img->dim_x * y_src) + (4 * x_src + 1)];
+					data[2] = data32[(4 * img->dim_x * y_src) + (4 * x_src + 2)];
+					data[3] = data32[(4 * img->dim_x * y_src) + (4 * x_src + 3)];
+
 					float r = data[swz.r];
 					float g = data[swz.g];
 					float b = data[swz.b];
 					float a = data[swz.a];
 
-					float4 d = float4(r, g, b, a);
-#endif
+					vfloat4 d(r, g, b, a);
 
 					if (!are_powers_1)
 					{
-						d.r = powf(MAX(d.r, 1e-6f), rgb_power);
-						d.g = powf(MAX(d.g, 1e-6f), rgb_power);
-						d.b = powf(MAX(d.b, 1e-6f), rgb_power);
-						d.a = powf(MAX(d.a, 1e-6f), alpha_power);
+						vfloat4 exp(rgb_power, rgb_power, rgb_power, alpha_power);
+						d = pow(max(d, 1e-6f), exp);
 					}
 
 					VARBUF1(z, y, x) = d;
@@ -378,7 +309,7 @@ static void compute_pixel_region_variance(
 	}
 
 	// Pad with an extra layer of 0s; this forms the edge of the SAT tables
-	float4 vbz = float4(0.0f);
+	vfloat4 vbz = vfloat4::zero();
 	for (int z = 0; z < padsize_z; z++)
 	{
 		for (int y = 0; y < padsize_y; y++)
@@ -479,11 +410,6 @@ static void compute_pixel_region_variance(
 			int z_low  = z_src - alpha_kernel_radius;
 			int z_high = z_src + alpha_kernel_radius + 1;
 
-			astc::clamp(z_src,  0, (int)(img->dim_z - 1));
-			astc::clamp(z_low,  0, (int)(img->dim_z - 1));
-			astc::clamp(z_high, 0, (int)(img->dim_z - 1));
-
-
 			for (int y = 0; y < size_y; y++)
 			{
 				int y_src = y + kernel_radius_xy;
@@ -491,10 +417,6 @@ static void compute_pixel_region_variance(
 				int y_low  = y_src - alpha_kernel_radius;
 				int y_high = y_src + alpha_kernel_radius + 1;
 
-				astc::clamp(y_src,  0, (int)(img->dim_y - 1));
-				astc::clamp(y_low,  0, (int)(img->dim_y - 1));
-				astc::clamp(y_high, 0, (int)(img->dim_y - 1));
-
 				for (int x = 0; x < size_x; x++)
 				{
 					int x_src = x + kernel_radius_xy;
@@ -502,25 +424,21 @@ static void compute_pixel_region_variance(
 					int x_low  = x_src - alpha_kernel_radius;
 					int x_high = x_src + alpha_kernel_radius + 1;
 
-					astc::clamp(x_src,  0, (int)(img->dim_x - 1));
-					astc::clamp(x_low,  0, (int)(img->dim_x - 1));
-					astc::clamp(x_high, 0, (int)(img->dim_x - 1));
-
 					// Summed-area table lookups for alpha average
-					float vasum = (  VARBUF1(z_high, y_low,  x_low).a
-					               - VARBUF1(z_high, y_low,  x_high).a
-					               - VARBUF1(z_high, y_high, x_low).a
-					               + VARBUF1(z_high, y_high, x_high).a) -
-					              (  VARBUF1(z_low,  y_low,  x_low).a
-					               - VARBUF1(z_low,  y_low,  x_high).a
-					               - VARBUF1(z_low,  y_high, x_low).a
-					               + VARBUF1(z_low,  y_high, x_high).a);
+					float vasum = (  VARBUF1(z_high, y_low,  x_low).lane<3>()
+					               - VARBUF1(z_high, y_low,  x_high).lane<3>()
+					               - VARBUF1(z_high, y_high, x_low).lane<3>()
+					               + VARBUF1(z_high, y_high, x_high).lane<3>()) -
+					              (  VARBUF1(z_low,  y_low,  x_low).lane<3>()
+					               - VARBUF1(z_low,  y_low,  x_high).lane<3>()
+					               - VARBUF1(z_low,  y_high, x_low).lane<3>()
+					               + VARBUF1(z_low,  y_high, x_high).lane<3>());
 
 					int out_index = z_dst * zdt + y_dst * ydt + x_dst;
 					input_alpha_averages[out_index] = (vasum * alpha_rsamples);
 
 					// Summed-area table lookups for RGBA average and variance
-					float4 v1sum = (  VARBUF1(z_high, y_low,  x_low)
+					vfloat4 v1sum = ( VARBUF1(z_high, y_low,  x_low)
 					                - VARBUF1(z_high, y_low,  x_high)
 					                - VARBUF1(z_high, y_high, x_low)
 					                + VARBUF1(z_high, y_high, x_high)) -
@@ -529,7 +447,7 @@ static void compute_pixel_region_variance(
 					                - VARBUF1(z_low,  y_high, x_low)
 					                + VARBUF1(z_low,  y_high, x_high));
 
-					float4 v2sum = (  VARBUF2(z_high, y_low,  x_low)
+					vfloat4 v2sum = ( VARBUF2(z_high, y_low,  x_low)
 					                - VARBUF2(z_high, y_low,  x_high)
 					                - VARBUF2(z_high, y_high, x_low)
 					                + VARBUF2(z_high, y_high, x_high)) -
@@ -539,11 +457,11 @@ static void compute_pixel_region_variance(
 					                + VARBUF2(z_low,  y_high, x_high));
 
 					// Compute and emit the average
-					float4 avg = v1sum * avg_var_rsamples;
+					vfloat4 avg = v1sum * avg_var_rsamples;
 					input_averages[out_index] = avg;
 
 					// Compute and emit the actual variance
-					float4 variance = mul2 * v2sum - mul1 * (v1sum * v1sum);
+					vfloat4 variance = mul2 * v2sum - mul1 * (v1sum * v1sum);
 					input_variances[out_index] = variance;
 				}
 			}
@@ -558,10 +476,6 @@ static void compute_pixel_region_variance(
 			int y_low  = y_src - alpha_kernel_radius;
 			int y_high = y_src + alpha_kernel_radius + 1;
 
-			astc::clamp(y_src,  0, (int)(img->dim_y - 1));
-			astc::clamp(y_low,  0, (int)(img->dim_y - 1));
-			astc::clamp(y_high, 0, (int)(img->dim_y - 1));
-
 			for (int x = 0; x < size_x; x++)
 			{
 				int x_src = x + kernel_radius_xy;
@@ -569,36 +483,32 @@ static void compute_pixel_region_variance(
 				int x_low  = x_src - alpha_kernel_radius;
 				int x_high = x_src + alpha_kernel_radius + 1;
 
-				astc::clamp(x_src,  0, (int)(img->dim_x - 1));
-				astc::clamp(x_low,  0, (int)(img->dim_x - 1));
-				astc::clamp(x_high, 0, (int)(img->dim_x - 1));
-
 				// Summed-area table lookups for alpha average
-				float vasum = VARBUF1(0, y_low,  x_low).a
-				            - VARBUF1(0, y_low,  x_high).a
-				            - VARBUF1(0, y_high, x_low).a
-				            + VARBUF1(0, y_high, x_high).a;
+				float vasum = VARBUF1(0, y_low,  x_low).lane<3>()
+				            - VARBUF1(0, y_low,  x_high).lane<3>()
+				            - VARBUF1(0, y_high, x_low).lane<3>()
+				            + VARBUF1(0, y_high, x_high).lane<3>();
 
 				int out_index = y_dst * ydt + x_dst;
 				input_alpha_averages[out_index] = (vasum * alpha_rsamples);
 
 				// summed-area table lookups for RGBA average and variance
-				float4 v1sum = VARBUF1(0, y_low,  x_low)
-				             - VARBUF1(0, y_low,  x_high)
-				             - VARBUF1(0, y_high, x_low)
-				             + VARBUF1(0, y_high, x_high);
+				vfloat4 v1sum = VARBUF1(0, y_low,  x_low)
+				              - VARBUF1(0, y_low,  x_high)
+				              - VARBUF1(0, y_high, x_low)
+				              + VARBUF1(0, y_high, x_high);
 
-				float4 v2sum = VARBUF2(0, y_low,  x_low)
-				             - VARBUF2(0, y_low,  x_high)
-				             - VARBUF2(0, y_high, x_low)
-				             + VARBUF2(0, y_high, x_high);
+				vfloat4 v2sum = VARBUF2(0, y_low,  x_low)
+				              - VARBUF2(0, y_low,  x_high)
+				              - VARBUF2(0, y_high, x_low)
+				              + VARBUF2(0, y_high, x_high);
 
 				// Compute and emit the average
-				float4 avg = v1sum * avg_var_rsamples;
+				vfloat4 avg = v1sum * avg_var_rsamples;
 				input_averages[out_index] = avg;
 
 				// Compute and emit the actual variance
-				float4 variance = mul2 * v2sum - mul1 * (v1sum * v1sum);
+				vfloat4 variance = mul2 * v2sum - mul1 * (v1sum * v1sum);
 				input_variances[out_index] = variance;
 			}
 		}
@@ -610,43 +520,44 @@ void compute_averages_and_variances(
 	const avg_var_args &ag
 ) {
 	pixel_region_variance_args arg = ag.arg;
-	arg.work_memory = new float4[ag.work_memory_size];
+	arg.work_memory = new vfloat4[ag.work_memory_size];
 
-	int size_x = ag.img_size.r;
-	int size_y = ag.img_size.g;
-	int size_z = ag.img_size.b;
+	int size_x = ag.img_size_x;
+	int size_y = ag.img_size_y;
+	int size_z = ag.img_size_z;
 
-	int step_x = ag.blk_size.r;
-	int step_y = ag.blk_size.g;
-	int step_z = ag.blk_size.b;
+	int step_xy = ag.blk_size_xy;
+	int step_z = ag.blk_size_z;
 
-	int y_tasks = (size_y + step_y - 1) / step_y;
+	int y_tasks = (size_y + step_xy - 1) / step_xy;
 
 	// All threads run this processing loop until there is no work remaining
 	while (true)
 	{
 		unsigned int count;
-		unsigned int base = ctx.manage_avg_var.get_task_assignment(1, count);
+		unsigned int base = ctx.manage_avg_var.get_task_assignment(16, count);
 		if (!count)
 		{
 			break;
 		}
 
-		assert(count == 1);
-		int z = (base / (y_tasks)) * step_z;
-		int y = (base - (z * y_tasks)) * step_y;
+		for (unsigned int i = base; i < base + count; i++)
+		{
+			int z = (i / (y_tasks)) * step_z;
+			int y = (i - (z * y_tasks)) * step_xy;
 
-		arg.size.b = MIN(step_z, size_z - z);
-		arg.offset.b = z;
+			arg.size_z = astc::min(step_z, size_z - z);
+			arg.offset_z = z;
 
-		arg.size.g = MIN(step_y, size_y - y);
-		arg.offset.g = y;
+			arg.size_y = astc::min(step_xy, size_y - y);
+			arg.offset_y = y;
 
-		for (int x = 0; x < size_x; x += step_x)
-		{
-			arg.size.r = MIN(step_x, size_x - x);
-			arg.offset.r = x;
-			compute_pixel_region_variance(ctx, &arg);
+			for (int x = 0; x < size_x; x += step_xy)
+			{
+				arg.size_x = astc::min(step_xy, size_x - x);
+				arg.offset_x = x;
+				compute_pixel_region_variance(ctx, &arg);
+			}
 		}
 
 		ctx.manage_avg_var.complete_task_assignment(count);
@@ -671,20 +582,24 @@ unsigned int init_compute_averages_and_variances(
 	int size_z = img.dim_z;
 
 	// Compute maximum block size and from that the working memory buffer size
-	int kernel_radius = MAX(avg_var_kernel_radius, alpha_kernel_radius);
+	int kernel_radius = astc::max(avg_var_kernel_radius, alpha_kernel_radius);
 	int kerneldim = 2 * kernel_radius + 1;
 
-	int have_z = (size_z > 1);
+	bool have_z = (size_z > 1);
 	int max_blk_size_xy = have_z ? 16 : 32;
-	int max_blk_size_z = MIN(size_z, have_z ? 16 : 1);
+	int max_blk_size_z = astc::min(size_z, have_z ? 16 : 1);
 
 	int max_padsize_xy = max_blk_size_xy + kerneldim;
 	int max_padsize_z = max_blk_size_z + (have_z ? kerneldim : 0);
 
 	// Perform block-wise averages-and-variances calculations across the image
 	// Initialize fields which are not populated until later
-	arg.size = int3(0);
-	arg.offset = int3(0);
+	arg.size_x = 0;
+	arg.size_y = 0;
+	arg.size_z = 0;
+	arg.offset_x = 0;
+	arg.offset_y = 0;
+	arg.offset_z = 0;
 	arg.work_memory = nullptr;
 
 	arg.img = &img;
@@ -696,8 +611,11 @@ unsigned int init_compute_averages_and_variances(
 	arg.alpha_kernel_radius = alpha_kernel_radius;
 
 	ag.arg = arg;
-	ag.img_size = int3(size_x, size_y, size_z);
-	ag.blk_size = int3(max_blk_size_xy, max_blk_size_xy, max_blk_size_z);
+	ag.img_size_x = size_x;
+	ag.img_size_y = size_y;
+	ag.img_size_z = size_z;
+	ag.blk_size_xy = max_blk_size_xy;
+	ag.blk_size_z = max_blk_size_z;
 	ag.work_memory_size = 2 * max_padsize_xy * max_padsize_xy * max_padsize_z;
 
 	// The parallel task count
diff --git a/libkram/astc-encoder/astcenc_decompress_symbolic.cpp b/libkram/astc-encoder/astcenc_decompress_symbolic.cpp
index e1038ec1..ac4a1cea 100644
--- a/libkram/astc-encoder/astcenc_decompress_symbolic.cpp
+++ b/libkram/astc-encoder/astcenc_decompress_symbolic.cpp
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2020 Arm Limited
+// Copyright 2011-2021 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -26,51 +26,114 @@
 
 static int compute_value_of_texel_int(
 	int texel_to_get,
-	const decimation_table* it,
+	const decimation_table* dt,
 	const int* weights
 ) {
 	int summed_value = 8;
-	int weights_to_evaluate = it->texel_num_weights[texel_to_get];
+	int weights_to_evaluate = dt->texel_weight_count[texel_to_get];
 	for (int i = 0; i < weights_to_evaluate; i++)
 	{
-		summed_value += weights[it->texel_weights[texel_to_get][i]] * it->texel_weights_int[texel_to_get][i];
+		summed_value += weights[dt->texel_weights_t4[texel_to_get][i]]
+		              * dt->texel_weights_int_t4[texel_to_get][i];
 	}
 	return summed_value >> 4;
 }
 
-static uint4 lerp_color_int(
+static vint4 lerp_color_int(
 	astcenc_profile decode_mode,
-	uint4 color0,
-	uint4 color1,
+	vint4 color0,
+	vint4 color1,
 	int weight,
 	int plane2_weight,
-	int plane2_color_component	// -1 in 1-plane mode
+	vmask4 plane2_mask
 ) {
-	uint4 weight1 = uint4(
-		plane2_color_component == 0 ? plane2_weight : weight,
-		plane2_color_component == 1 ? plane2_weight : weight,
-		plane2_color_component == 2 ? plane2_weight : weight,
-		plane2_color_component == 3 ? plane2_weight : weight);
-
-	uint4 weight0 = uint4(64, 64, 64, 64) - weight1;
+	vint4 weight1 = select(vint4(weight), vint4(plane2_weight), plane2_mask);
+	vint4 weight0 = vint4(64) - weight1;
 
 	if (decode_mode == ASTCENC_PRF_LDR_SRGB)
 	{
-		color0 = uint4(color0.r >> 8, color0.g >> 8, color0.b >> 8, color0.a >> 8);
-		color1 = uint4(color1.r >> 8, color1.g >> 8, color1.b >> 8, color1.a >> 8);
+		color0 = asr<8>(color0);
+		color1 = asr<8>(color1);
 	}
 
-	uint4 color = (color0 * weight0) + (color1 * weight1) + uint4(32, 32, 32, 32);
-	color = uint4(color.r >> 6, color.g >> 6, color.b >> 6, color.a >> 6);
+	vint4 color = (color0 * weight0) + (color1 * weight1) + vint4(32);
+	color = asr<6>(color);
 
 	if (decode_mode == ASTCENC_PRF_LDR_SRGB)
 	{
-		color = color * 257u;
+		color = color * vint4(257);
 	}
 
 	return color;
 }
 
+// Turn packed unorm16 or LNS data into generic float data
+static inline vfloat4 decode_texel(
+	vint4 data,
+	vmask4 lns_mask
+) {
+	vint4 color_lns = vint4::zero();
+	vint4 color_unorm = vint4::zero();
+
+	if (any(lns_mask))
+	{
+		color_lns = lns_to_sf16(data);
+	}
+
+	if (!all(lns_mask))
+	{
+		color_unorm = unorm16_to_sf16(data);
+	}
+
+	// Pick channels and then covert to FP16
+	vint4 datai = select(color_unorm, color_lns, lns_mask);
+	return float16_to_float(datai);
+}
+
+void unpack_weights(
+	const block_size_descriptor& bsd,
+	const symbolic_compressed_block& scb,
+	const decimation_table& dt,
+	bool is_dual_plane,
+	int weight_quant_level,
+	int weights_plane1[MAX_TEXELS_PER_BLOCK],
+	int weights_plane2[MAX_TEXELS_PER_BLOCK]
+) {
+	// First, unquantize the weights ...
+	int uq_plane1_weights[MAX_WEIGHTS_PER_BLOCK];
+	int uq_plane2_weights[MAX_WEIGHTS_PER_BLOCK];
+	int weight_count = dt.weight_count;
+
+	const quantization_and_transfer_table *qat = &(quant_and_xfer_tables[weight_quant_level]);
+
+	for (int i = 0; i < weight_count; i++)
+	{
+		uq_plane1_weights[i] = qat->unquantized_value[scb.weights[i]];
+	}
+
+	if (is_dual_plane)
+	{
+		for (int i = 0; i < weight_count; i++)
+		{
+			uq_plane2_weights[i] = qat->unquantized_value[scb.weights[i + PLANE2_WEIGHTS_OFFSET]];
+		}
+	}
+
+	// Second, undecimate the weights ...
+	for (int i = 0; i < bsd.texel_count; i++)
+	{
+		weights_plane1[i] = compute_value_of_texel_int(i, &dt, uq_plane1_weights);
+	}
+
+	if (is_dual_plane)
+	{
+		for (int i = 0; i < bsd.texel_count; i++)
+		{
+			weights_plane2[i] = compute_value_of_texel_int(i, &dt, uq_plane2_weights);
+		}
+	}
+}
+
 void decompress_symbolic_block(
 	astcenc_profile decode_mode,
 	const block_size_descriptor* bsd,
@@ -84,9 +147,14 @@ void decompress_symbolic_block(
 	blk->ypos = ypos;
 	blk->zpos = zpos;
 
+	blk->data_min = vfloat4::zero();
+	blk->data_max = vfloat4::zero();
+	blk->grayscale = false;
+
 	// if we detected an error-block, blow up immediately.
 	if (scb->error_block)
 	{
+		// TODO: Check this - isn't linear LDR magenta too? Same below ...
 		if (decode_mode == ASTCENC_PRF_LDR_SRGB)
 		{
 			for (int i = 0; i < bsd->texel_count; i++)
@@ -119,74 +187,52 @@ void decompress_symbolic_block(
 
 	if (scb->block_mode < 0)
 	{
-		float red = 0, green = 0, blue = 0, alpha = 0;
+		vfloat4 color;
 		int use_lns = 0;
 		int use_nan = 0;
 
 		if (scb->block_mode == -2)
 		{
-			int ired = scb->constant_color[0];
-			int igreen = scb->constant_color[1];
-			int iblue = scb->constant_color[2];
-			int ialpha = scb->constant_color[3];
+			vint4 colori(scb->constant_color);
 
 			// For sRGB decoding a real decoder would just use the top 8 bits
 			// for color conversion. We don't color convert, so linearly scale
 			// the top 8 bits into the full 16 bit dynamic range
 			if (decode_mode == ASTCENC_PRF_LDR_SRGB)
 			{
-				ired = (ired >> 8) * 257;
-				igreen = (igreen >> 8) * 257;
-				iblue = (iblue >> 8) * 257;
-				ialpha = (ialpha >> 8) * 257;
+				colori = asr<8>(colori) * 257;
 			}
 
-			red = sf16_to_float(unorm16_to_sf16(ired));
-			green = sf16_to_float(unorm16_to_sf16(igreen));
-			blue = sf16_to_float(unorm16_to_sf16(iblue));
-			alpha = sf16_to_float(unorm16_to_sf16(ialpha));
-			use_lns = 0;
-			use_nan = 0;
+			vint4 colorf16 = unorm16_to_sf16(colori);
+			color = float16_to_float(colorf16);
 		}
 		else
 		{
 			switch (decode_mode)
 			{
 			case ASTCENC_PRF_LDR_SRGB:
-				red = 1.0f;
-				green = 0.0f;
-				blue = 1.0f;
-				alpha = 1.0f;
-				use_lns = 0;
-				use_nan = 0;
+				color = vfloat4(1.0f, 0.0f, 1.0f, 1.0f);
 				break;
 			case ASTCENC_PRF_LDR:
-				red = 0.0f;
-				green = 0.0f;
-				blue = 0.0f;
-				alpha = 0.0f;
-				use_lns = 0;
+				color = vfloat4(0.0f);
 				use_nan = 1;
 				break;
 			case ASTCENC_PRF_HDR_RGB_LDR_A:
 			case ASTCENC_PRF_HDR:
 				// constant-color block; unpack from FP16 to FP32.
-				red = sf16_to_float(scb->constant_color[0]);
-				green = sf16_to_float(scb->constant_color[1]);
-				blue = sf16_to_float(scb->constant_color[2]);
-				alpha = sf16_to_float(scb->constant_color[3]);
+				color = float16_to_float(vint4(scb->constant_color));
 				use_lns = 1;
-				use_nan = 0;
 				break;
 			}
 		}
 
+		// TODO: Skip this and add constant color transfer to img block?
 		for (int i = 0; i < bsd->texel_count; i++)
 		{
-			blk->data_r[i] = red;
-			blk->data_g[i] = green;
-			blk->data_b[i] = blue;
-			blk->data_a[i] = alpha;
+			blk->data_r[i] = color.lane<0>();
+			blk->data_g[i] = color.lane<1>();
+			blk->data_b[i] = color.lane<2>();
+			blk->data_a[i] = color.lane<3>();
 			blk->rgb_lns[i] = use_lns;
 			blk->alpha_lns[i] = use_lns;
 			blk->nan_texel[i] = use_nan;
@@ -201,20 +247,20 @@ void decompress_symbolic_block(
 	pt += scb->partition_index;
 
 	// get the appropriate block descriptor
-	const decimation_table *const *ixtab2 = bsd->decimation_tables;
+	const decimation_table *const *dts = bsd->decimation_tables;
 
-	const int packed_index = bsd->block_mode_to_packed[scb->block_mode];
-	assert(packed_index >= 0 && packed_index < bsd->block_mode_packed_count);
-	const block_mode& bm = bsd->block_modes_packed[packed_index];
-	const decimation_table *it = ixtab2[bm.decimation_mode];
+	const int packed_index = bsd->block_mode_packed_index[scb->block_mode];
+	assert(packed_index >= 0 && packed_index < bsd->block_mode_count);
+	const block_mode& bm = bsd->block_modes[packed_index];
+	const decimation_table *dt = dts[bm.decimation_mode];
 
 	int is_dual_plane = bm.is_dual_plane;
 
-	int weight_quantization_level = bm.quantization_mode;
+	int weight_quant_level = bm.quant_mode;
 
 	// decode the color endpoints
-	uint4 color_endpoint0[4];
-	uint4 color_endpoint1[4];
+	vint4 color_endpoint0[4];
+	vint4 color_endpoint1[4];
 	int rgb_hdr_endpoint[4];
 	int alpha_hdr_endpoint[4];
 	int nan_endpoint[4];
@@ -223,7 +269,7 @@ void decompress_symbolic_block(
 	{
 		unpack_color_endpoints(decode_mode,
 		                       scb->color_formats[i],
-		                       scb->color_quantization_level,
+		                       scb->color_quant_level,
 		                       scb->color_values[i],
 		                       &(rgb_hdr_endpoint[i]),
 		                       &(alpha_hdr_endpoint[i]),
@@ -232,77 +278,53 @@ void decompress_symbolic_block(
 		                       &(color_endpoint1[i]));
 	}
 
-	// first unquantize the weights
-	int uq_plane1_weights[MAX_WEIGHTS_PER_BLOCK];
-	int uq_plane2_weights[MAX_WEIGHTS_PER_BLOCK];
-	int weight_count = it->num_weights;
-
-	const quantization_and_transfer_table *qat = &(quant_and_xfer_tables[weight_quantization_level]);
-
-	for (int i = 0; i < weight_count; i++)
-	{
-		uq_plane1_weights[i] = qat->unquantized_value[scb->plane1_weights[i]];
-	}
-
-	if (is_dual_plane)
-	{
-		for (int i = 0; i < weight_count; i++)
-		{
-			uq_plane2_weights[i] = qat->unquantized_value[scb->plane2_weights[i]];
-		}
-	}
-
-	// then undecimate them.
+	// Unquantize and undecimate the weights
 	int weights[MAX_TEXELS_PER_BLOCK];
 	int plane2_weights[MAX_TEXELS_PER_BLOCK];
+	unpack_weights(*bsd, *scb, *dt, is_dual_plane, weight_quant_level, weights, plane2_weights);
 
-	for (int i = 0; i < bsd->texel_count; i++)
-	{
-		weights[i] = compute_value_of_texel_int(i, it, uq_plane1_weights);
-	}
+	// Now that we have endpoint colors and weights, we can unpack texel colors
+	int plane2_color_component = is_dual_plane ? scb->plane2_color_component : -1;
+	vmask4 plane2_mask = vint4::lane_id() == vint4(plane2_color_component);
 
-	if (is_dual_plane)
+	for (int i = 0; i < partition_count; i++)
 	{
-		for (int i = 0; i < bsd->texel_count; i++)
+		vint4 ep0 = color_endpoint0[i];
+		vint4 ep1 = color_endpoint1[i];
+		bool rgb_lns = rgb_hdr_endpoint[i];
+		bool nan = nan_endpoint[i];
+		bool a_lns = alpha_hdr_endpoint[i];
+		vmask4 lns_mask(rgb_lns, rgb_lns, rgb_lns, a_lns);
+
+		int texel_count = pt->partition_texel_count[i];
+		for (int j = 0; j < texel_count; j++)
 		{
-			plane2_weights[i] = compute_value_of_texel_int(i, it, uq_plane2_weights);
+			int tix = pt->texels_of_partition[i][j];
+			vint4 color = lerp_color_int(decode_mode,
+			                             ep0,
+			                             ep1,
+			                             weights[tix],
+			                             plane2_weights[tix],
+			                             plane2_mask);
+
+			vfloat4 colorf = decode_texel(color, lns_mask);
+
+			blk->nan_texel[tix] = nan;
+			blk->data_r[tix] = colorf.lane<0>();
+			blk->data_g[tix] = colorf.lane<1>();
+			blk->data_b[tix] = colorf.lane<2>();
+			blk->data_a[tix] = colorf.lane<3>();
 		}
 	}
-
-	int plane2_color_component = scb->plane2_color_component;
-
-	// now that we have endpoint colors and weights, we can unpack actual colors for
-	// each texel.
-	for (int i = 0; i < bsd->texel_count; i++)
-	{
-		int partition = pt->partition_of_texel[i];
-
-		uint4 color = lerp_color_int(decode_mode,
-		                             color_endpoint0[partition],
-		                             color_endpoint1[partition],
-		                             weights[i],
-		                             plane2_weights[i],
-		                             is_dual_plane ? plane2_color_component : -1);
-
-		blk->rgb_lns[i] = rgb_hdr_endpoint[partition];
-		blk->alpha_lns[i] = alpha_hdr_endpoint[partition];
-		blk->nan_texel[i] = nan_endpoint[partition];
-
-		blk->data_r[i] = (float)color.r;
-		blk->data_g[i] = (float)color.g;
-		blk->data_b[i] = (float)color.b;
-		blk->data_a[i] = (float)color.a;
-	}
-
-	imageblock_initialize_orig_from_work(blk, bsd->texel_count);
-	update_imageblock_flags(blk, bsd->xdim, bsd->ydim, bsd->zdim);
 }
 
+// Returns a negative error for encodings we want to reject as a part of a
+// heuristic check, e.g. for RGBM textures which have zero M values.
 float compute_symbolic_block_difference(
-	astcenc_profile decode_mode,
+	const astcenc_config& config,
 	const block_size_descriptor* bsd,
 	const symbolic_compressed_block* scb,
-	const imageblock* pb,
+	const imageblock* blk,
 	const error_weight_block *ewb
 ) {
 	// if we detected an error-block, blow up immediately.
@@ -319,29 +341,35 @@ float compute_symbolic_block_difference(
 	pt += scb->partition_index;
 
 	// get the appropriate block descriptor
-	const decimation_table *const *ixtab2 = bsd->decimation_tables;
+	const decimation_table *const *dts = bsd->decimation_tables;
 
-	const int packed_index = bsd->block_mode_to_packed[scb->block_mode];
-	assert(packed_index >= 0 && packed_index < bsd->block_mode_packed_count);
-	const block_mode& bm = bsd->block_modes_packed[packed_index];
-	const decimation_table *it = ixtab2[bm.decimation_mode];
+	const int packed_index = bsd->block_mode_packed_index[scb->block_mode];
+	assert(packed_index >= 0 && packed_index < bsd->block_mode_count);
+	const block_mode& bm = bsd->block_modes[packed_index];
+	const decimation_table *dt = dts[bm.decimation_mode];
 
 	int is_dual_plane = bm.is_dual_plane;
+	int weight_quant_level = bm.quant_mode;
 
-	int weight_quantization_level = bm.quantization_mode;
+	int weight_count = dt->weight_count;
+	int texel_count = bsd->texel_count;
+
+	promise(partition_count > 0);
+	promise(weight_count > 0);
+	promise(texel_count > 0);
 
 	// decode the color endpoints
-	uint4 color_endpoint0[4];
-	uint4 color_endpoint1[4];
+	vint4 color_endpoint0[4];
+	vint4 color_endpoint1[4];
 	int rgb_hdr_endpoint[4];
 	int alpha_hdr_endpoint[4];
 	int nan_endpoint[4];
 
 	for (int i = 0; i < partition_count; i++)
 	{
-		unpack_color_endpoints(decode_mode,
+		unpack_color_endpoints(config.profile,
 		                       scb->color_formats[i],
-		                       scb->color_quantization_level,
+		                       scb->color_quant_level,
 		                       scb->color_values[i],
 		                       &(rgb_hdr_endpoint[i]),
 		                       &(alpha_hdr_endpoint[i]),
@@ -350,88 +378,69 @@ float compute_symbolic_block_difference(
 		                       &(color_endpoint1[i]));
 	}
 
-	// first unquantize the weights
-	int uq_plane1_weights[MAX_WEIGHTS_PER_BLOCK];
-	int uq_plane2_weights[MAX_WEIGHTS_PER_BLOCK];
-	int weight_count = it->num_weights;
-
-	const quantization_and_transfer_table *qat = &(quant_and_xfer_tables[weight_quantization_level]);
-
-	for (int i = 0; i < weight_count; i++)
-	{
-		uq_plane1_weights[i] = qat->unquantized_value[scb->plane1_weights[i]];
-	}
-
-	if (is_dual_plane)
-	{
-		for (int i = 0; i < weight_count; i++)
-		{
-			uq_plane2_weights[i] = qat->unquantized_value[scb->plane2_weights[i]];
-		}
-	}
-
-	// then undecimate them.
+	// Unquantize and undecimate the weights
 	int weights[MAX_TEXELS_PER_BLOCK];
 	int plane2_weights[MAX_TEXELS_PER_BLOCK];
+	unpack_weights(*bsd, *scb, *dt, is_dual_plane, weight_quant_level, weights, plane2_weights);
 
-	for (int i = 0; i < bsd->texel_count; i++)
-	{
-		weights[i] = compute_value_of_texel_int(i, it, uq_plane1_weights);
-	}
-
-	if (is_dual_plane)
-	{
-		for (int i = 0; i < bsd->texel_count; i++)
-		{
-			plane2_weights[i] = compute_value_of_texel_int(i, it, uq_plane2_weights);
-		}
-	}
-
-	int plane2_color_component = scb->plane2_color_component;
+	// Now that we have endpoint colors and weights, we can unpack texel colors
+	int plane2_color_component = is_dual_plane ? scb->plane2_color_component : -1;
+	vmask4 plane2_mask = vint4::lane_id() == vint4(plane2_color_component);
 
-	// now that we have endpoint colors and weights, we can unpack actual colors for
-	// each texel.
 	float summa = 0.0f;
-	for (int i = 0; i < bsd->texel_count; i++)
+	for (int i = 0; i < texel_count; i++)
 	{
 		int partition = pt->partition_of_texel[i];
 
-		uint4 color = lerp_color_int(decode_mode,
-		                             color_endpoint0[partition],
-		                             color_endpoint1[partition],
-		                             weights[i],
-		                             plane2_weights[i],
-		                             is_dual_plane ? plane2_color_component : -1);
+		vint4 ep0 = color_endpoint0[partition];
+		vint4 ep1 = color_endpoint1[partition];
 
-		float4 newColor = float4((float)color.r,
-		                         (float)color.g,
-		                         (float)color.b,
-		                         (float)color.a);
+		vint4 colori = lerp_color_int(config.profile,
+		                              ep0,
+		                              ep1,
+		                              weights[i],
+		                              plane2_weights[i],
+		                              plane2_mask);
 
-		float4 oldColor = float4(pb->data_r[i],
-		                         pb->data_g[i],
-		                         pb->data_b[i],
-		                         pb->data_a[i]);
+		vfloat4 color = int_to_float(colori);
+		vfloat4 oldColor = blk->texel(i);
 
-		float4 error = oldColor - newColor;
+		if (config.flags & ASTCENC_FLG_MAP_RGBM)
+		{
+			// Fail encodings that result in zero weight M pixels. Note that
+			// this can cause "interesting" artifacts if we reject all useful
+			// encodings - we typically get max brightness encodings instead
+			// which look just as bad. We recommend users apply a bias to their
+			// stored M value, limiting the lower value to 16 or 32 to avoid
+			// getting small M values post-quantization, but we can't prove it
+			// would never happen, especially at low bit rates ...
+			if (color.lane<3>() == 0.0f)
+			{
+				return -1e30f;
+			}
 
-		error.r = MIN(fabsf(error.r), 1e15f);
-		error.g = MIN(fabsf(error.g), 1e15f);
-		error.b = MIN(fabsf(error.b), 1e15f);
-		error.a = MIN(fabsf(error.a), 1e15f);
+			// Compute error based on decoded RGBM color
+			color = vfloat4(
+				color.lane<0>() * color.lane<3>() * config.rgbm_m_scale,
+				color.lane<1>() * color.lane<3>() * config.rgbm_m_scale,
+				color.lane<2>() * color.lane<3>() * config.rgbm_m_scale,
+				1.0f
+			);
+
+			oldColor = vfloat4(
+				oldColor.lane<0>() * oldColor.lane<3>() * config.rgbm_m_scale,
+				oldColor.lane<1>() * oldColor.lane<3>() * config.rgbm_m_scale,
+				oldColor.lane<2>() * oldColor.lane<3>() * config.rgbm_m_scale,
+				1.0f
+			);
+		}
 
+		vfloat4 error = oldColor - color;
+		error = min(abs(error), 1e15f);
 		error = error * error;
 
-		float4 errorWeight = float4(ewb->error_weights[i].r,
-		                            ewb->error_weights[i].g,
-		                            ewb->error_weights[i].b,
-		                            ewb->error_weights[i].a);
-
-		float metric = dot(error, errorWeight);
-		if (metric >= 1e30f) metric = 1e30f;
-		if (metric != metric) metric = 0.0f;
-
-		summa += metric;
+		float metric = dot_s(error, ewb->error_weights[i]);
+		summa += astc::min(metric, 1e30f);
 	}
 
 	return summa;
diff --git a/libkram/astc-encoder/astcenc_diagnostic_trace.cpp b/libkram/astc-encoder/astcenc_diagnostic_trace.cpp
new file mode 100644
index 00000000..2f6c8c3b
--- /dev/null
+++ b/libkram/astc-encoder/astcenc_diagnostic_trace.cpp
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: Apache-2.0
+// ----------------------------------------------------------------------------
+// Copyright 2021 Arm Limited
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy
+// of the License at:
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations
+// under the License.
+// ----------------------------------------------------------------------------
+
+/**
+ * @brief Functions for the library entrypoint.
+ */
+
+#if defined(ASTCENC_DIAGNOSTICS)
+
+#include <cassert>
+#include <cstdarg>
+#include <cstdio>
+
+#include "astcenc_diagnostic_trace.h"
+
+/** @brief The global trace logger. */
+static TraceLog* g_TraceLog = nullptr;
+
+/** @brief The JSON indentation level. */
+static const int g_trace_indent = 2;
+
+TraceLog::TraceLog(
+	const char* file_name):
+	m_file(file_name, std::ofstream::out | std::ofstream::binary)
+{
+	assert(!g_TraceLog);
+	g_TraceLog = this;
+	m_root = new TraceNode("root");
+}
+
+TraceNode* TraceLog::get_current_leaf()
+{
+	if (m_stack.size())
+	{
+		return m_stack.back();
+	}
+
+	return nullptr;
+}
+
+int TraceLog::get_depth()
+{
+	return m_stack.size();
+}
+
+TraceLog::~TraceLog()
+{
+	assert(g_TraceLog == this);
+	delete m_root;
+	g_TraceLog = nullptr;
+}
+
+TraceNode::TraceNode(
+	const char* format,
+	...
+) {
+	// Format the name string
+	constexpr size_t bufsz = 256;
+	char buffer[bufsz];
+
+	va_list args;
+	va_start (args, format);
+	vsnprintf (buffer, bufsz, format, args);
+	va_end (args);
+
+	// Guarantee there is a nul termintor
+	buffer[bufsz - 1] = 0;
+
+	// Generate the node
+	TraceNode* parent = g_TraceLog->get_current_leaf();
+	int depth = g_TraceLog->get_depth();
+	g_TraceLog->m_stack.push_back(this);
+
+	bool comma = parent && parent->m_attrib_count;
+	auto& out = g_TraceLog->m_file;
+
+	if (parent)
+	{
+		parent->m_attrib_count++;
+	}
+
+	if (comma)
+	{
+		out << ',';
+	}
+
+	if (depth)
+	{
+		out << '\n';
+	}
+
+	int out_indent = (depth * 2) * g_trace_indent;
+	int in_indent = (depth * 2 + 1) * g_trace_indent;
+
+	std::string out_indents("");
+	if (out_indent)
+	{
+		out_indents = std::string(out_indent, ' ');
+	}
+
+	std::string in_indents(in_indent, ' ');
+
+	out << out_indents << "[ \"node\", \"" << buffer << "\",\n";
+	out << in_indents << "[";
+}
+
+void TraceNode::add_attrib(
+	std::string type,
+	std::string key,
+	std::string value
+) {
+	(void)type;
+
+	int depth = g_TraceLog->get_depth();
+	int indent = (depth * 2) * g_trace_indent;
+	auto& out = g_TraceLog->m_file;
+	bool comma = m_attrib_count;
+	m_attrib_count++;
+
+	if (comma)
+	{
+		out << ',';
+	}
+
+	out << '\n';
+	out << std::string(indent, ' ') << "[ "
+	                                << "\"" << key << "\", "
+	                                << value << " ]";
+}
+
+TraceNode::~TraceNode()
+{
+	g_TraceLog->m_stack.pop_back();
+
+	auto& out = g_TraceLog->m_file;
+	int depth = g_TraceLog->get_depth();
+	int out_indent = (depth * 2) * g_trace_indent;
+	int in_indent = (depth * 2 + 1) * g_trace_indent;
+
+	std::string out_indents("");
+	if (out_indent)
+	{
+		out_indents = std::string(out_indent, ' ');
+	}
+
+	std::string in_indents(in_indent, ' ');
+
+	if (m_attrib_count)
+	{
+		out << "\n" << in_indents;
+	}
+	out << "]\n";
+
+	out << out_indents << "]";
+}
+
+void trace_add_data(
+	const char* key,
+	const char* format,
+	...
+) {
+	constexpr size_t bufsz = 256;
+	char buffer[bufsz];
+
+	va_list args;
+	va_start (args, format);
+	vsnprintf (buffer, bufsz, format, args);
+	va_end (args);
+
+	// Guarantee there is a nul termintor
+	buffer[bufsz - 1] = 0;
+
+	std::string value = "\"" + std::string(buffer) + "\"";
+
+	TraceNode* node = g_TraceLog->get_current_leaf();
+	node->add_attrib("str", key, value);
+}
+
+void trace_add_data(
+	const char* key,
+	float value
+) {
+  	char buffer[256];
+	sprintf(buffer, "%.20g", (double)value);
+	TraceNode* node = g_TraceLog->get_current_leaf();
+	node->add_attrib("float", key, buffer);
+}
+
+void trace_add_data(
+	const char* key,
+	int value
+) {
+	TraceNode* node = g_TraceLog->get_current_leaf();
+	node->add_attrib("int", key, std::to_string(value));
+}
+
+void trace_add_data(
+	const char* key,
+	unsigned int value
+) {
+	TraceNode* node = g_TraceLog->get_current_leaf();
+	node->add_attrib("int", key, std::to_string(value));
+}
+
+#endif
diff --git a/libkram/astc-encoder/astcenc_diagnostic_trace.h b/libkram/astc-encoder/astcenc_diagnostic_trace.h
new file mode 100644
index 00000000..e3c26afb
--- /dev/null
+++ b/libkram/astc-encoder/astcenc_diagnostic_trace.h
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: Apache-2.0
+// ----------------------------------------------------------------------------
+// Copyright 2021 Arm Limited
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy
+// of the License at:
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations
+// under the License.
+// ----------------------------------------------------------------------------
+
+/**
+ * @brief This module provides a set of diagnostic tracing utilities.
+ *
+ * Overview
+ * ========
+ *
+ * The built-in diagnostic trace tool generates a hierarchical JSON tree
+ * structure. The tree hierarchy contains three levels:
+ *
+ *    - block
+ *        - pass
+ *           - candidate
+ *
+ * One block node exists for each compressed block in the image. One pass node
+ * exists for each major pass (N partition, M planes, O channel) applied to a
+ * block. One candidate node exists for each encoding candidate trialed for a
+ * pass.
+ *
+ * Each node contains both the hierarchy but also a number of attributes which
+ * explain the behavior. For example, the block node contains the block
+ * coordinates in the image, the pass explains the pass configuration, and the
+ * candidate will explain the candidate encoding such as weight decimation,
+ * refinement error, etc.
+ *
+ * Trace Nodes are designed as scope-managed C++ objects with stack-like
+ * push/pop behavior. Constructing a trace node on the stack will automatically
+ * add it to the current node as a child, and then make it the current node.
+ * Destroying the current node will pop the stack and set the parent to the
+ * current node. This provides a robust mechanism for ensuring reliable
+ * nesting in the tree structure.
+ *
+ * A set of utility macros are provided to add attribute annotations to the
+ * current trace node.
+ *
+ * Usage
+ * =====
+ *
+ * Create Trace Nodes on the stack using the TRACE_NODE() macro. This will
+ * compile-out completely in builds with diagnostics disabled.
+ *
+ * Add annotations to the current trace node using the trace_add_data() macro.
+ * This will similarly compile out completely in builds with diagnostics
+ * disabled.
+ *
+ * If you need to add additional code to support diagnostics-only behavior wrap
+ * it in preprocessor guards:
+ *
+ *     #if defined(ASTCENC_DIAGNOSTICS)
+ *     #endif
+ */
+
+#ifndef ASTCENC_DIAGNOSTIC_TRACE_INCLUDED
+#define ASTCENC_DIAGNOSTIC_TRACE_INCLUDED
+
+#if defined(ASTCENC_DIAGNOSTICS)
+
+#include <iostream>
+#include <fstream>
+#include <vector>
+
+/**
+ * @brief Class representing a single node in the trace hierarchy.
+ */
+class TraceNode
+{
+public:
+	/**
+	 * @brief Construct a new node.
+	 *
+	 * Constructing a node will push to the the top of the stack, automatically
+	 * making it a child of the current node, and then setting it to become the
+	 * current node.
+	 *
+	 * @param format   The format template for the node name.
+	 * @param ...      The format parameters.
+	 */
+	TraceNode(const char* format, ...);
+
+	/**
+	 * @brief Add an attribute to this node.
+	 *
+	 * Note that no quoting is applied to the @c value, so if quoting is
+	 * needed it must be done by the caller.
+	 *
+	 * @param type    The type of the attribute.
+	 * @param key     The key of the attribute.
+	 * @param value   The value of the attribute.
+	 */
+	void add_attrib(std::string type, std::string key, std::string value);
+
+	/**
+	 * @brief Destroy this node.
+	 *
+	 * Destroying a node will pop it from the top of the stack, making its
+	 * parent the current node. It is invalid behavior to destroy a node that
+	 * is not the current node; usage must conform to stack push-pop semantics.
+	 */
+	~TraceNode();
+
+	/**
+	 * @brief The number of attributes and child nodes in this node.
+	 */
+	unsigned int m_attrib_count { 0 };
+};
+
+/**
+ * @brief Class representing the trace log file being written.
+ */
+class TraceLog
+{
+public:
+	/**
+	 * @brief Create a new trace log.
+	 *
+	 * The trace log is global; there can be only one at a time.
+	 *
+	 * @param file_name   The name of the file to write.
+	 */
+	TraceLog(const char* file_name);
+
+	/**
+	 * @brief Detroy the trace log.
+	 *
+	 * Trace logs MUST be cleanly destroyed to ensure the file gets written.
+	 */
+	~TraceLog();
+
+	/**
+	 * @brief Get the current child node.
+	 *
+	 * @return The current leaf node.
+	 */
+	TraceNode* get_current_leaf();
+
+	/**
+	 * @brief Get the stack depth of the current child node.
+	 *
+	 * @return The current leaf node stack depth.
+	 */
+	int get_depth();
+
+	/**
+	 * @brief The file stream to write to.
+	 */
+	std::ofstream m_file;
+
+	/**
+	 * @brief The stack of nodes (newest at the back).
+	 */
+	std::vector<TraceNode*> m_stack;
+
+private:
+	/**
+	 * @brief The root node in the JSON file.
+	 */
+	TraceNode* m_root;
+};
+
+/**
+ * @brief Utility macro to create a trace node on the stack.
+ *
+ * @param name     The variable name to use.
+ * @param ...      The name template and format parameters.
+ */
+#define TRACE_NODE(name, ...) TraceNode name(__VA_ARGS__);
+
+/**
+ * @brief Add a string annotation to the current node.
+ *
+ * @param key      The name of the attribute.
+ * @param format   The format template for the attribute value.
+ * @param ...      The format parameters.
+ */
+void trace_add_data(const char* key, const char* format, ...);
+
+/**
+ * @brief Add a float annotation to the current node.
+ *
+ * @param key     The name of the attribute.
+ * @param value   The value of the attribute.
+ */
+void trace_add_data(const char* key, float value);
+
+/**
+ * @brief Add an integer annotation to the current node.
+ *
+ * @param key     The name of the attribute.
+ * @param value   The value of the attribute.
+ */
+void trace_add_data(const char* key, int value);
+
+/**
+ * @brief Add an unsigned integer annotation to the current node.
+ *
+ * @param key     The name of the attribute.
+ * @param value   The value of the attribute.
+ */
+void trace_add_data(const char* key, unsigned int value);
+
+#else
+
+#define TRACE_NODE(name, ...)
+
+#define trace_add_data(...)
+
+#endif
+
+#endif
diff --git a/libkram/astc-encoder/astcenc_encoding_choice_error.cpp b/libkram/astc-encoder/astcenc_encoding_choice_error.cpp
index c58175a3..c9c46d4a 100644
--- a/libkram/astc-encoder/astcenc_encoding_choice_error.cpp
+++ b/libkram/astc-encoder/astcenc_encoding_choice_error.cpp
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2020 Arm Limited
+// Copyright 2011-2021 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -43,50 +43,101 @@
 
 // helper function to merge two endpoint-colors
 void merge_endpoints(
-	const endpoints * ep1,	// contains three of the color components
-	const endpoints * ep2,	// contains the remaining color component
+	const endpoints* ep1,	// contains three of the color components
+	const endpoints* ep2,	// contains the remaining color component
 	int separate_component,
-	endpoints * res
+	endpoints* res
 ) {
 	int partition_count = ep1->partition_count;
+	vmask4 sep_mask = vint4::lane_id() == vint4(separate_component);
+
 	res->partition_count = partition_count;
+	promise(partition_count > 0);
 	for (int i = 0; i < partition_count; i++)
 	{
-		res->endpt0[i] = ep1->endpt0[i];
-		res->endpt1[i] = ep1->endpt1[i];
+		res->endpt0[i] = select(ep1->endpt0[i], ep2->endpt0[i], sep_mask);
+		res->endpt1[i] = select(ep1->endpt1[i], ep2->endpt1[i], sep_mask);
 	}
+}
 
-	switch (separate_component)
+// function to compute the error across a tile when using a particular line for
+// a particular partition.
+static void compute_error_squared_rgb_single_partition(
+	int partition_to_test,
+	const block_size_descriptor* bsd,
+	const partition_info* pt,	// the partition that we use when computing the squared-error.
+	const imageblock* blk,
+	const error_weight_block* ewb,
+	const processed_line3* uncor_pline,
+	float* uncor_err,
+	const processed_line3* samec_pline,
+	float* samec_err,
+	const processed_line3* rgbl_pline,
+	float* rgbl_err,
+	const processed_line3* l_pline,
+	float* l_err,
+	float* a_drop_err
+) {
+	int texels_per_block = bsd->texel_count;
+	float uncor_errorsum = 0.0f;
+	float samec_errorsum = 0.0f;
+	float rgbl_errorsum = 0.0f;
+	float l_errorsum = 0.0f;
+	float a_drop_errorsum = 0.0f;
+
+	for (int i = 0; i < texels_per_block; i++)
 	{
-	case 0:
-		for (int i = 0; i < partition_count; i++)
+		int partition = pt->partition_of_texel[i];
+		float texel_weight = ewb->texel_weight_rgb[i];
+		if (partition != partition_to_test || texel_weight < 1e-20f)
+		{
+			continue;
+		}
+
+		vfloat4 point = blk->texel(i);
+		vfloat4 ews = ewb->error_weights[i];
+
+		// Compute the error that arises from just ditching alpha
+		float default_alpha = imageblock_default_alpha(blk);
+		float omalpha = point.lane<3>() - default_alpha;
+		a_drop_errorsum += omalpha * omalpha * ews.lane<3>();
+
 		{
-			res->endpt0[i].r = ep2->endpt0[i].r;
-			res->endpt1[i].r = ep2->endpt1[i].r;
+			float param = dot3_s(point, uncor_pline->bs);
+			vfloat4 rp1 = uncor_pline->amod + param * uncor_pline->bis;
+			vfloat4 dist = rp1 - point;
+			uncor_errorsum += dot3_s(ews, dist * dist);
 		}
-		break;
-	case 1:
-		for (int i = 0; i < partition_count; i++)
+
 		{
-			res->endpt0[i].g = ep2->endpt0[i].g;
-			res->endpt1[i].g = ep2->endpt1[i].g;
+			float param = dot3_s(point, samec_pline->bs);
+			// No samec amod - we know it's always zero
+			vfloat4 rp1 = /* samec_pline->amod + */ param * samec_pline->bis;
+			vfloat4 dist = rp1 - point;
+			samec_errorsum += dot3_s(ews, dist * dist);
 		}
-		break;
-	case 2:
-		for (int i = 0; i < partition_count; i++)
+
 		{
-			res->endpt0[i].b = ep2->endpt0[i].b;
-			res->endpt1[i].b = ep2->endpt1[i].b;
+			float param = dot3_s(point,  rgbl_pline->bs);
+			vfloat4 rp1 = rgbl_pline->amod + param * rgbl_pline->bis;
+			vfloat4 dist = rp1 - point;
+			rgbl_errorsum += dot3_s(ews, dist * dist);
 		}
-		break;
-	case 3:
-		for (int i = 0; i < partition_count; i++)
+
 		{
-			res->endpt0[i].a = ep2->endpt0[i].a;
-			res->endpt1[i].a = ep2->endpt1[i].a;
+			float param = dot3_s(point, l_pline->bs);
+			// No luma amod - we know it's always zero
+			vfloat4 rp1 = /* l_pline->amod + */ param * l_pline->bis;
+			vfloat4 dist = rp1 - point;
+			l_errorsum += dot3_s(ews, dist * dist);
 		}
-		break;
 	}
+
+	*uncor_err = uncor_errorsum;
+	*samec_err = samec_errorsum;
+	*rgbl_err = rgbl_errorsum;
+	*l_err = l_errorsum;
+	*a_drop_err = a_drop_errorsum;
 }
 
 /*
@@ -100,192 +151,126 @@ void merge_endpoints(
  */
 void compute_encoding_choice_errors(
 	const block_size_descriptor* bsd,
-	const imageblock* pb,
-	const partition_info* pi,
+	const imageblock* blk,
+	const partition_info* pt,
 	const error_weight_block* ewb,
 	int separate_component,	// component that is separated out in 2-plane mode, -1 in 1-plane mode
 	encoding_choice_errors* eci)
 {
-	int partition_count = pi->partition_count;
+	int partition_count = pt->partition_count;
 	int texels_per_block = bsd->texel_count;
 
-	float3 averages[4];
-	float3 directions_rgb[4];
-	float4 error_weightings[4];
-	float4 color_scalefactors[4];
-	float4 inverse_color_scalefactors[4];
-
-	compute_partition_error_color_weightings(bsd, ewb, pi, error_weightings, color_scalefactors);
-	compute_averages_and_directions_rgb(pi, pb, ewb, color_scalefactors, averages, directions_rgb);
-
-	line3 uncorr_rgb_lines[4];
-	line3 samechroma_rgb_lines[4];	// for LDR-RGB-scale
-	line3 rgb_luma_lines[4];	// for HDR-RGB-scale
-	line3 luminance_lines[4];
-
-	processed_line3 proc_uncorr_rgb_lines[4];
-	processed_line3 proc_samechroma_rgb_lines[4];	// for LDR-RGB-scale
-	processed_line3 proc_rgb_luma_lines[4];	// for HDR-RGB-scale
-	processed_line3 proc_luminance_lines[4];
-
-	for (int i = 0; i < partition_count; i++)
-	{
-		inverse_color_scalefactors[i].r = 1.0f / MAX(color_scalefactors[i].r, 1e-7f);
-		inverse_color_scalefactors[i].g = 1.0f / MAX(color_scalefactors[i].g, 1e-7f);
-		inverse_color_scalefactors[i].b = 1.0f / MAX(color_scalefactors[i].b, 1e-7f);
-		inverse_color_scalefactors[i].a = 1.0f / MAX(color_scalefactors[i].a, 1e-7f);
-
-		float3 csf = float3(color_scalefactors[i].r, color_scalefactors[i].g, color_scalefactors[i].b);
-		float3 icsf = float3(inverse_color_scalefactors[i].r, inverse_color_scalefactors[i].g, inverse_color_scalefactors[i].b);
-
-		uncorr_rgb_lines[i].a = averages[i];
-		if (dot(directions_rgb[i], directions_rgb[i]) == 0.0f)
-		{
-			uncorr_rgb_lines[i].b = normalize(csf);
-		}
-		else
-		{
-			uncorr_rgb_lines[i].b = normalize(directions_rgb[i]);
-		}
-
-		samechroma_rgb_lines[i].a = float3(0.0f);
-		if (dot(averages[i], averages[i]) < 1e-20f)
-		{
-			samechroma_rgb_lines[i].b = normalize(csf);
-		}
-		else
-		{
-			samechroma_rgb_lines[i].b = normalize(averages[i]);
-		}
-
-		rgb_luma_lines[i].a = averages[i];
-		rgb_luma_lines[i].b = normalize(csf);
+	promise(partition_count > 0);
+	promise(texels_per_block > 0);
 
-		luminance_lines[i].a = float3(0.0f);
-		luminance_lines[i].b = normalize(csf);
+	partition_metrics pms[4];
 
-		proc_uncorr_rgb_lines[i].amod = (uncorr_rgb_lines[i].a - uncorr_rgb_lines[i].b * dot(uncorr_rgb_lines[i].a, uncorr_rgb_lines[i].b)) * icsf;
-		proc_uncorr_rgb_lines[i].bs = uncorr_rgb_lines[i].b * csf;
-		proc_uncorr_rgb_lines[i].bis = uncorr_rgb_lines[i].b * icsf;
+	compute_partition_error_color_weightings(*ewb, *pt, pms);
 
-		proc_samechroma_rgb_lines[i].amod = (samechroma_rgb_lines[i].a - samechroma_rgb_lines[i].b * dot(samechroma_rgb_lines[i].a, samechroma_rgb_lines[i].b)) * icsf;
-		proc_samechroma_rgb_lines[i].bs = samechroma_rgb_lines[i].b * csf;
-		proc_samechroma_rgb_lines[i].bis = samechroma_rgb_lines[i].b * icsf;
+	compute_avgs_and_dirs_3_comp(pt, blk, ewb, 3, pms);
 
-		proc_rgb_luma_lines[i].amod = (rgb_luma_lines[i].a - rgb_luma_lines[i].b * dot(rgb_luma_lines[i].a, rgb_luma_lines[i].b)) * icsf;
-		proc_rgb_luma_lines[i].bs = rgb_luma_lines[i].b * csf;
-		proc_rgb_luma_lines[i].bis = rgb_luma_lines[i].b * icsf;
-
-		proc_luminance_lines[i].amod = (luminance_lines[i].a - luminance_lines[i].b * dot(luminance_lines[i].a, luminance_lines[i].b)) * icsf;
-		proc_luminance_lines[i].bs = luminance_lines[i].b * csf;
-		proc_luminance_lines[i].bis = luminance_lines[i].b * icsf;
-	}
-
-	float uncorr_rgb_error[4];
-	float samechroma_rgb_error[4];
-	float rgb_luma_error[4];
-	float luminance_rgb_error[4];
-
-	for (int i = 0; i < partition_count; i++)
-	{
-		uncorr_rgb_error[i] = compute_error_squared_rgb_single_partition(i, bsd, pi, pb, ewb, &(proc_uncorr_rgb_lines[i]));
-
-		samechroma_rgb_error[i] = compute_error_squared_rgb_single_partition(i, bsd, pi, pb, ewb, &(proc_samechroma_rgb_lines[i]));
-
-		rgb_luma_error[i] = compute_error_squared_rgb_single_partition(i, bsd, pi, pb, ewb, &(proc_rgb_luma_lines[i]));
-
-		luminance_rgb_error[i] = compute_error_squared_rgb_single_partition(i, bsd, pi, pb, ewb, &(proc_luminance_lines[i]));
-	}
-
-	// compute the error that arises from just ditching alpha and RGB
-	float alpha_drop_error[4];
-	float rgb_drop_error[4];
-
-	for (int i = 0; i < partition_count; i++)
-	{
-		alpha_drop_error[i] = 0;
-		rgb_drop_error[i] = 0;
-	}
-
-	for (int i = 0; i < texels_per_block; i++)
-	{
-		int partition = pi->partition_of_texel[i];
-		float alpha = pb->data_a[i];
-		float default_alpha = pb->alpha_lns[i] ? (float)0x7800 : (float)0xFFFF;
-
-		float omalpha = alpha - default_alpha;
-		alpha_drop_error[partition] += omalpha * omalpha * ewb->error_weights[i].a;
-
-		float red = pb->data_r[i];
-		float green = pb->data_g[i];
-		float blue = pb->data_b[i];
-		rgb_drop_error[partition] += red * red * ewb->error_weights[i].r +
-		                             green * green * ewb->error_weights[i].g +
-		                             blue * blue * ewb->error_weights[i].b;
-	}
-
-	// check if we are eligible for blue-contraction and offset-encoding
 	endpoints ep;
 	if (separate_component == -1)
 	{
 		endpoints_and_weights ei;
-		compute_endpoints_and_ideal_weights_1_plane(bsd, pi, pb, ewb, &ei);
+		compute_endpoints_and_ideal_weights_1_plane(bsd, pt, blk, ewb, &ei);
 		ep = ei.ep;
 	}
 	else
 	{
 		endpoints_and_weights ei1, ei2;
-		compute_endpoints_and_ideal_weights_2_planes(bsd, pi, pb, ewb, separate_component, &ei1, &ei2);
-
+		compute_endpoints_and_ideal_weights_2_planes(bsd, pt, blk, ewb, separate_component, &ei1, &ei2);
 		merge_endpoints(&(ei1.ep), &(ei2.ep), separate_component, &ep);
 	}
 
-	int eligible_for_offset_encode[4];
-	int eligible_for_blue_contraction[4];
-	for (int i = 0; i < partition_count; i++)
-	{
-		float4 endpt0 = ep.endpt0[i];
-		float4 endpt1 = ep.endpt1[i];
-
-		float4 endpt_dif = endpt1 - endpt0;
-		if (fabsf(endpt_dif.r) < (0.12f * 65535.0f) &&
-		    fabsf(endpt_dif.g) < (0.12f * 65535.0f) &&
-		    fabsf(endpt_dif.b) < (0.12f * 65535.0f))
-		{
-			eligible_for_offset_encode[i] = 1;
-		}
-		else
-		{
-			eligible_for_offset_encode[i] = 0;
-		}
-
-		endpt0.r += (endpt0.r - endpt0.b);
-		endpt0.g += (endpt0.g - endpt0.b);
-		endpt1.r += (endpt1.r - endpt1.b);
-		endpt1.g += (endpt1.g - endpt1.b);
-		if (endpt0.r > (0.01f * 65535.0f) && endpt0.r < (0.99f * 65535.0f) &&
-		    endpt1.r > (0.01f * 65535.0f) && endpt1.r < (0.99f * 65535.0f) &&
-		    endpt0.g > (0.01f * 65535.0f) && endpt0.g < (0.99f * 65535.0f) &&
-		    endpt1.g > (0.01f * 65535.0f) && endpt1.g < (0.99f * 65535.0f))
-		{
-			eligible_for_blue_contraction[i] = 1;
-		}
-		else
-		{
-			eligible_for_blue_contraction[i] = 0;
-		}
-	}
-
-	// finally, gather up our results
 	for (int i = 0; i < partition_count; i++)
 	{
-		eci[i].rgb_scale_error = (samechroma_rgb_error[i] - uncorr_rgb_error[i]) * 0.7f;	// empirical
-		eci[i].rgb_luma_error = (rgb_luma_error[i] - uncorr_rgb_error[i]) * 1.5f;	// wild guess
-		eci[i].luminance_error = (luminance_rgb_error[i] - uncorr_rgb_error[i]) * 3.0f;	// empirical
-		eci[i].alpha_drop_error = alpha_drop_error[i] * 3.0f;
-		eci[i].rgb_drop_error = rgb_drop_error[i] * 3.0f;
-		eci[i].can_offset_encode = eligible_for_offset_encode[i];
-		eci[i].can_blue_contract = eligible_for_blue_contraction[i];
+		partition_metrics& pm = pms[i];
+
+		// TODO: Can we skip rgb_luma_lines for LDR images?
+		line3 uncor_rgb_lines;
+		line3 samec_rgb_lines;	// for LDR-RGB-scale
+		line3 rgb_luma_lines;	// for HDR-RGB-scale
+
+		processed_line3 uncor_rgb_plines;
+		processed_line3 samec_rgb_plines;	// for LDR-RGB-scale
+		processed_line3 rgb_luma_plines;	// for HDR-RGB-scale
+		processed_line3 luminance_plines;
+
+		float uncorr_rgb_error;
+		float samechroma_rgb_error;
+		float rgb_luma_error;
+		float luminance_rgb_error;
+		float alpha_drop_error;
+
+		vfloat4 csf = pm.color_scale;
+		csf.set_lane<3>(0.0f);
+		vfloat4 csfn = normalize(csf);
+
+		vfloat4 icsf = pm.icolor_scale;
+		icsf.set_lane<3>(0.0f);
+
+		uncor_rgb_lines.a = pm.avg;
+		uncor_rgb_lines.b = normalize_safe(pm.dir.swz<0, 1, 2>(), csfn);
+
+		samec_rgb_lines.a = vfloat4::zero();
+		samec_rgb_lines.b = normalize_safe(pm.avg.swz<0, 1, 2>(), csfn);
+
+		rgb_luma_lines.a = pm.avg;
+		rgb_luma_lines.b = csfn;
+
+		uncor_rgb_plines.amod = (uncor_rgb_lines.a - uncor_rgb_lines.b * dot3(uncor_rgb_lines.a, uncor_rgb_lines.b)) * icsf;
+		uncor_rgb_plines.bs   = uncor_rgb_lines.b * csf;
+		uncor_rgb_plines.bis  = uncor_rgb_lines.b * icsf;
+
+		// Same chroma always goes though zero, so this is simpler than the others
+		samec_rgb_plines.amod = vfloat4::zero();
+		samec_rgb_plines.bs   = samec_rgb_lines.b * csf;
+		samec_rgb_plines.bis  = samec_rgb_lines.b * icsf;
+
+		rgb_luma_plines.amod = (rgb_luma_lines.a - rgb_luma_lines.b * dot3(rgb_luma_lines.a, rgb_luma_lines.b)) * icsf;
+		rgb_luma_plines.bs   = rgb_luma_lines.b * csf;
+		rgb_luma_plines.bis  = rgb_luma_lines.b * icsf;
+
+		// Luminance always goes though zero, so this is simpler than the others
+		luminance_plines.amod = vfloat4::zero();
+		luminance_plines.bs   = csfn * csf;
+		luminance_plines.bis  = csfn * icsf;
+
+		compute_error_squared_rgb_single_partition(
+		    i, bsd, pt, blk, ewb,
+		    &uncor_rgb_plines, &uncorr_rgb_error,
+		    &samec_rgb_plines, &samechroma_rgb_error,
+		    &rgb_luma_plines,  &rgb_luma_error,
+		    &luminance_plines, &luminance_rgb_error,
+		                       &alpha_drop_error);
+
+		// Determine if we can offset encode RGB lanes
+		vfloat4 endpt0 = ep.endpt0[i];
+		vfloat4 endpt1 = ep.endpt1[i];
+		vfloat4 endpt_diff = abs(endpt1 - endpt0);
+		vmask4 endpt_can_offset = endpt_diff < vfloat4(0.12f * 65535.0f);
+		bool can_offset_encode = (mask(endpt_can_offset) & 0x7) == 0x7;
+
+		// Determine if we can blue contract encode RGB lanes
+		vfloat4 endpt_diff_bc(
+			endpt0.lane<0>() + (endpt0.lane<0>() - endpt0.lane<2>()),
+			endpt1.lane<0>() + (endpt1.lane<0>() - endpt1.lane<2>()),
+			endpt0.lane<1>() + (endpt0.lane<1>() - endpt0.lane<2>()),
+			endpt1.lane<1>() + (endpt1.lane<1>() - endpt1.lane<2>())
+		);
+
+		vmask4 endpt_can_bc_lo = endpt_diff_bc > vfloat4(0.01f * 65535.0f);
+		vmask4 endpt_can_bc_hi = endpt_diff_bc < vfloat4(0.99f * 65535.0f);
+		bool can_blue_contract = (mask(endpt_can_bc_lo & endpt_can_bc_hi) & 0x7) == 0x7;
+
+		// Store out the settings
+		eci[i].rgb_scale_error = (samechroma_rgb_error - uncorr_rgb_error) * 0.7f;	// empirical
+		eci[i].rgb_luma_error  = (rgb_luma_error - uncorr_rgb_error) * 1.5f;	// wild guess
+		eci[i].luminance_error = (luminance_rgb_error - uncorr_rgb_error) * 3.0f;	// empirical
+		eci[i].alpha_drop_error = alpha_drop_error * 3.0f;
+		eci[i].can_offset_encode = can_offset_encode;
+		eci[i].can_blue_contract = can_blue_contract;
 	}
 }
 
diff --git a/libkram/astc-encoder/astcenc_entry.cpp b/libkram/astc-encoder/astcenc_entry.cpp
index 78e0806c..2cd4366c 100644
--- a/libkram/astc-encoder/astcenc_entry.cpp
+++ b/libkram/astc-encoder/astcenc_entry.cpp
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2020 Arm Limited
+// Copyright 2011-2021 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -19,11 +19,13 @@
  * @brief Functions for the library entrypoint.
  */
 
+#include <array>
 #include <cstring>
 #include <new>
 
 #include "astcenc.h"
 #include "astcenc_internal.h"
+#include "astcenc_diagnostic_trace.h"
 
 // The ASTC codec is written with the assumption that a float threaded through
 // the "if32" union will in fact be stored and reloaded as a 32-bit IEEE-754 single-precision
@@ -48,8 +50,8 @@ static astcenc_error validate_cpu_float()
 
 static astcenc_error validate_cpu_isa()
 {
-	#if ASTCENC_SSE >= 42
-		if (!cpu_supports_sse42())
+	#if ASTCENC_SSE >= 41
+		if (!cpu_supports_sse41())
 		{
 			return ASTCENC_ERR_BAD_CPU_ISA;
 		}
@@ -62,6 +64,13 @@ static astcenc_error validate_cpu_isa()
 		}
 	#endif
 
+	#if ASTCENC_F16C >= 1
+		if (!cpu_supports_f16c())
+		{
+			return ASTCENC_ERR_BAD_CPU_ISA;
+		}
+	#endif
+
 	#if ASTCENC_AVX >= 2
 		if (!cpu_supports_avx2())
 		{
@@ -72,10 +81,59 @@ static astcenc_error validate_cpu_isa()
 	return ASTCENC_SUCCESS;
 }
 
+/**
+ * @brief Record of the quality tuning parameter values.
+ *
+ * See the @c astcenc_config structure for detailed parameter documentation.
+ *
+ * Note that the mse_overshoot entries are scaling factors relative to the
+ * base MSE to hit db_limit. A 20% overshoot is harder to hit for a higher
+ * base db_limit, so we may actually use lower ratios for the more through
+ * search presets because the underlying db_limit is so much higher.
+ */
+struct astcenc_preset_config {
+	float quality;
+	unsigned int tune_partition_count_limit;
+	unsigned int tune_partition_index_limit;
+	unsigned int tune_block_mode_limit;
+	unsigned int tune_refinement_limit;
+	unsigned int tune_candidate_limit;
+	float tune_db_limit_a_base;
+	float tune_db_limit_b_base;
+	float tune_mode0_mse_overshoot;
+	float tune_refinement_mse_overshoot;
+	float tune_partition_early_out_limit;
+	float tune_two_plane_early_out_limit;
+};
+
+/**
+ * @brief The static quality presets that are built-in.
+ */
+static const std::array<astcenc_preset_config, 5> preset_configs {{
+	{
+		ASTCENC_PRE_FASTEST,
+		4, 2, 30, 1, 1, 79, 57, 2.0f, 2.0f, 1.0f, 0.5f
+	}, {
+		ASTCENC_PRE_FAST,
+		4, 4, 50, 2, 2, 85, 63, 3.5f, 3.5f, 1.0f, 0.5f
+	}, {
+		ASTCENC_PRE_MEDIUM,
+		4, 25, 75, 2, 2,  95, 70, 1.75f, 1.75f, 1.2f, 0.75f
+	}, {
+		ASTCENC_PRE_THOROUGH,
+		4, 75, 92, 4, 4, 105, 77, 10.0f, 10.0f, 2.5f, 0.95f
+	}, {
+		ASTCENC_PRE_EXHAUSTIVE,
+		4, 1024, 100, 4, 4, 200, 200, 10.0f, 10.0f, 10.0f, 0.99f
+	}
+}};
+
 static astcenc_error validate_profile(
 	astcenc_profile profile
 ) {
-	switch(profile)
+	// Values in this enum are from an external user, so not guaranteed to be
+	// bounded to the enum values
+	switch(static_cast<int>(profile))
 	{
 	case ASTCENC_PRF_LDR_SRGB:
 	case ASTCENC_PRF_LDR:
@@ -112,7 +170,9 @@ static astcenc_error validate_flags(
 	}
 
 	// Flags field must only contain at most a single map type
-	exMask = ASTCENC_FLG_MAP_MASK | ASTCENC_FLG_MAP_NORMAL;
+	exMask = ASTCENC_FLG_MAP_MASK
+	       | ASTCENC_FLG_MAP_NORMAL
+	       | ASTCENC_FLG_MAP_RGBM;
 	if (astc::popcount(flags & exMask) > 1)
 	{
 		return ASTCENC_ERR_BAD_FLAGS;
@@ -125,7 +185,8 @@ static astcenc_error validate_flags(
 static astcenc_error validate_compression_swz(
 	astcenc_swz swizzle
 ) {
-	switch(swizzle)
+	// Not all enum values are handled; SWZ_Z is invalid for compression
+	switch(static_cast<int>(swizzle))
 	{
 	case ASTCENC_SWZ_R:
 	case ASTCENC_SWZ_G:
@@ -157,7 +218,9 @@ static astcenc_error validate_compression_swizzle(
 static astcenc_error validate_decompression_swz(
 	astcenc_swz swizzle
 ) {
-	switch(swizzle)
+	// Values in this enum are from an external user, so not guaranteed to be
+	// bounded to the enum values
+	switch(static_cast<int>(swizzle))
 	{
 	case ASTCENC_SWZ_R:
 	case ASTCENC_SWZ_G:
@@ -197,8 +260,7 @@ static astcenc_error validate_decompression_swizzle(
  *     make no sense algorithmically will return an error.
  */
 static astcenc_error validate_config(
-	astcenc_config &config,
-	unsigned int thread_count
+	astcenc_config &config
 ) {
 	astcenc_error status;
 
@@ -220,12 +282,6 @@ static astcenc_error validate_config(
 		return status;
 	}
 
-	// Decompress-only contexts must be single threaded
-	if ((config.flags & ASTCENC_FLG_DECOMPRESS_ONLY) && (thread_count > 1))
-	{
-		return ASTCENC_ERR_BAD_PARAM;
-	}
-
 #if defined(ASTCENC_DECOMPRESS_ONLY)
 	// Decompress-only builds only support decompress-only contexts
 	if (!(config.flags & ASTCENC_FLG_DECOMPRESS_ONLY))
@@ -234,36 +290,41 @@ static astcenc_error validate_config(
 	}
 #endif
 
-	config.v_rgba_mean_stdev_mix = MAX(config.v_rgba_mean_stdev_mix, 0.0f);
-	config.v_rgb_power = MAX(config.v_rgb_power, 0.0f);
-	config.v_rgb_base = MAX(config.v_rgb_base, 0.0f);
-	config.v_rgb_mean = MAX(config.v_rgb_mean, 0.0f);
-	config.v_rgb_stdev = MAX(config.v_rgb_stdev, 0.0f);
-	config.v_a_power = MAX(config.v_a_power, 0.0f);
-	config.v_a_base = MAX(config.v_a_base, 0.0f);
-	config.v_a_mean = MAX(config.v_a_mean, 0.0f);
-	config.v_a_stdev = MAX(config.v_a_stdev, 0.0f);
-
-	config.b_deblock_weight = MAX(config.b_deblock_weight, 0.0f);
-
-	config.tune_partition_limit = astc::clampi(config.tune_partition_limit, 1, PARTITION_COUNT);
-	config.tune_block_mode_limit = astc::clampi(config.tune_block_mode_limit, 1, 100);
-	config.tune_refinement_limit = MAX(config.tune_refinement_limit, 1);
-	config.tune_candidate_limit = astc::clampi(config.tune_candidate_limit, 1, TUNE_MAX_TRIAL_CANDIDATES);
-	config.tune_db_limit = MAX(config.tune_db_limit, 0.0f);
-	config.tune_partition_early_out_limit = MAX(config.tune_partition_early_out_limit, 0.0f);
-	config.tune_two_plane_early_out_limit = MAX(config.tune_two_plane_early_out_limit, 0.0f);
+	config.v_rgba_mean_stdev_mix = astc::max(config.v_rgba_mean_stdev_mix, 0.0f);
+	config.v_rgb_power = astc::max(config.v_rgb_power, 0.0f);
+	config.v_rgb_base = astc::max(config.v_rgb_base, 0.0f);
+	config.v_rgb_mean = astc::max(config.v_rgb_mean, 0.0f);
+	config.v_rgb_stdev = astc::max(config.v_rgb_stdev, 0.0f);
+	config.v_a_power = astc::max(config.v_a_power, 0.0f);
+	config.v_a_base = astc::max(config.v_a_base, 0.0f);
+	config.v_a_mean = astc::max(config.v_a_mean, 0.0f);
+	config.v_a_stdev = astc::max(config.v_a_stdev, 0.0f);
+
+	config.b_deblock_weight = astc::max(config.b_deblock_weight, 0.0f);
+
+	config.rgbm_m_scale = astc::max(config.rgbm_m_scale, 1.0f);
+
+	config.tune_partition_count_limit = astc::clamp(config.tune_partition_count_limit, 1u, 4u);
+	config.tune_partition_index_limit = astc::clamp(config.tune_partition_index_limit, 1u, (unsigned int)PARTITION_COUNT);
+	config.tune_block_mode_limit = astc::clamp(config.tune_block_mode_limit, 1u, 100u);
+	config.tune_refinement_limit = astc::max(config.tune_refinement_limit, 1u);
+	config.tune_candidate_limit = astc::clamp(config.tune_candidate_limit, 1u, TUNE_MAX_TRIAL_CANDIDATES);
+	config.tune_db_limit = astc::max(config.tune_db_limit, 0.0f);
+	config.tune_mode0_mse_overshoot = astc::max(config.tune_mode0_mse_overshoot, 1.0f);
+	config.tune_refinement_mse_overshoot = astc::max(config.tune_refinement_mse_overshoot, 1.0f);
+	config.tune_partition_early_out_limit = astc::max(config.tune_partition_early_out_limit, 0.0f);
+	config.tune_two_plane_early_out_limit = astc::max(config.tune_two_plane_early_out_limit, 0.0f);
 
 	// Specifying a zero weight color component is not allowed; force to small value
-	float max_weight = MAX(MAX(config.cw_r_weight, config.cw_g_weight),
-	                       MAX(config.cw_b_weight, config.cw_a_weight));
+	float max_weight = astc::max(astc::max(config.cw_r_weight, config.cw_g_weight),
+	                             astc::max(config.cw_b_weight, config.cw_a_weight));
 	if (max_weight > 0.0f)
 	{
 		max_weight /= 1000.0f;
-		config.cw_r_weight = MAX(config.cw_r_weight, max_weight);
-		config.cw_g_weight = MAX(config.cw_g_weight, max_weight);
-		config.cw_b_weight = MAX(config.cw_b_weight, max_weight);
-		config.cw_a_weight = MAX(config.cw_a_weight, max_weight);
+		config.cw_r_weight = astc::max(config.cw_r_weight, max_weight);
+		config.cw_g_weight = astc::max(config.cw_g_weight, max_weight);
+		config.cw_b_weight = astc::max(config.cw_b_weight, max_weight);
+		config.cw_a_weight = astc::max(config.cw_a_weight, max_weight);
 	}
 	// If all color components error weights are zero then return an error
 	else
@@ -279,17 +340,18 @@ astcenc_error astcenc_config_init(
 	unsigned int block_x,
 	unsigned int block_y,
 	unsigned int block_z,
-	astcenc_preset preset,
+	float quality,
 	unsigned int flags,
-	astcenc_config& config
+	astcenc_config* configp
 ) {
 	astcenc_error status;
+	astcenc_config& config = *configp;
 
 	// Zero init all config fields; although most of will be over written
 	std::memset(&config, 0, sizeof(config));
 
 	// Process the block size
-	block_z = MAX(block_z, 1); // For 2D blocks Z==0 is accepted, but convert to 1
+	block_z = astc::max(block_z, 1u); // For 2D blocks Z==0 is accepted, but convert to 1
 	status = validate_block_size(block_x, block_y, block_z);
 	if (status != ASTCENC_SUCCESS)
 	{
@@ -303,58 +365,84 @@ astcenc_error astcenc_config_init(
 	float texels = static_cast<float>(block_x * block_y * block_z);
 	float ltexels = logf(texels) / logf(10.0f);
 
-	// Process the performance preset; note that this must be done before we
-	// process any additional settings, such as color profile and flags, which
-	// may replace some of these settings with more use case tuned values
-	switch(preset)
-	{
-	case ASTCENC_PRE_FASTEST:
-		config.tune_partition_limit = 2;
-		config.tune_block_mode_limit = 25;
-		config.tune_refinement_limit = 1;
-		config.tune_candidate_limit = MIN(1, TUNE_MAX_TRIAL_CANDIDATES);
-		config.tune_db_limit = MAX(70 - 35 * ltexels, 53 - 19 * ltexels);
-		config.tune_partition_early_out_limit = 1.0f;
-		config.tune_two_plane_early_out_limit = 0.5f;
-		break;
-	case ASTCENC_PRE_FAST:
-		config.tune_partition_limit = 4;
-		config.tune_block_mode_limit = 50;
-		config.tune_refinement_limit = 1;
-		config.tune_candidate_limit = MIN(2, TUNE_MAX_TRIAL_CANDIDATES);
-		config.tune_db_limit = MAX(85 - 35 * ltexels, 63 - 19 * ltexels);
-		config.tune_partition_early_out_limit = 1.0f;
-		config.tune_two_plane_early_out_limit = 0.5f;
-		break;
-	case ASTCENC_PRE_MEDIUM:
-		config.tune_partition_limit = 25;
-		config.tune_block_mode_limit = 75;
-		config.tune_refinement_limit = 2;
-		config.tune_candidate_limit = MIN(2, TUNE_MAX_TRIAL_CANDIDATES);
-		config.tune_db_limit = MAX(95 - 35 * ltexels, 70 - 19 * ltexels);
-		config.tune_partition_early_out_limit = 1.2f;
-		config.tune_two_plane_early_out_limit = 0.75f;
-		break;
-	case ASTCENC_PRE_THOROUGH:
-		config.tune_partition_limit = 100;
-		config.tune_block_mode_limit = 95;
-		config.tune_refinement_limit = 4;
-		config.tune_candidate_limit = MIN(3, TUNE_MAX_TRIAL_CANDIDATES);
-		config.tune_db_limit = MAX(105 - 35 * ltexels, 77 - 19 * ltexels);
-		config.tune_partition_early_out_limit = 2.5f;
-		config.tune_two_plane_early_out_limit = 0.95f;
-		break;
-	case ASTCENC_PRE_EXHAUSTIVE:
-		config.tune_partition_limit = 1024;
-		config.tune_block_mode_limit = 100;
-		config.tune_refinement_limit = 4;
-		config.tune_candidate_limit = MIN(4, TUNE_MAX_TRIAL_CANDIDATES);
-		config.tune_db_limit = 999.0f;
-		config.tune_partition_early_out_limit = 1000.0f;
-		config.tune_two_plane_early_out_limit = 0.99f;
-		break;
-	default:
-		return ASTCENC_ERR_BAD_PRESET;
+	// Process the performance quality level or preset; note that this must be
+	// done before we process any additional settings, such as color profile
+	// and flags, which may replace some of these settings with more use case
+	// tuned values
+	if (quality < ASTCENC_PRE_FASTEST ||
+	    quality > ASTCENC_PRE_EXHAUSTIVE)
+	{
+		return ASTCENC_ERR_BAD_QUALITY;
+	}
+
+	// Determine which preset to use, or which pair to interpolate
+	size_t start;
+	size_t end;
+	for (end = 0; end < preset_configs.size(); end++)
+	{
+		if (preset_configs[end].quality >= quality)
+		{
+			break;
+		}
+	}
+
+	start = end == 0 ? 0 : end - 1;
+
+	// Start and end node are the same - so just transfer the values.
+	if (start == end)
+	{
+		config.tune_partition_count_limit = preset_configs[start].tune_partition_count_limit;
+		config.tune_partition_index_limit = preset_configs[start].tune_partition_index_limit;
+		config.tune_block_mode_limit = preset_configs[start].tune_block_mode_limit;
+		config.tune_refinement_limit = preset_configs[start].tune_refinement_limit;
+		config.tune_candidate_limit = astc::min(preset_configs[start].tune_candidate_limit,
+		                                        TUNE_MAX_TRIAL_CANDIDATES);
+		config.tune_db_limit = astc::max(preset_configs[start].tune_db_limit_a_base - 35 * ltexels,
+		                                 preset_configs[start].tune_db_limit_b_base - 19 * ltexels);
+
+		config.tune_mode0_mse_overshoot = preset_configs[start].tune_mode0_mse_overshoot;
+		config.tune_refinement_mse_overshoot = preset_configs[start].tune_refinement_mse_overshoot;
+
+		config.tune_partition_early_out_limit = preset_configs[start].tune_partition_early_out_limit;
+		config.tune_two_plane_early_out_limit = preset_configs[start].tune_two_plane_early_out_limit;
+	}
+	// Start and end node are not the same - so interpolate between them
+	else
+	{
+		auto& node_a = preset_configs[start];
+		auto& node_b = preset_configs[end];
+
+		float wt_range = node_b.quality - node_a.quality;
+		assert(wt_range > 0);
+
+		// Compute interpolation factors
+		float wt_node_a = (node_b.quality - quality) / wt_range;
+		float wt_node_b = (quality - node_a.quality) / wt_range;
+
+		#define LERP(param) ((node_a.param * wt_node_a) + (node_b.param * wt_node_b))
+		#define LERPI(param) astc::flt2int_rtn(\
+		                         (((float)node_a.param) * wt_node_a) + \
+		                         (((float)node_b.param) * wt_node_b))
+		#define LERPUI(param) (unsigned int)LERPI(param)
+
+		config.tune_partition_count_limit = LERPI(tune_partition_count_limit);
+		config.tune_partition_index_limit = LERPI(tune_partition_index_limit);
+		config.tune_block_mode_limit = LERPI(tune_block_mode_limit);
+		config.tune_refinement_limit = LERPI(tune_refinement_limit);
+		config.tune_candidate_limit = astc::min(LERPUI(tune_candidate_limit),
+		                                        TUNE_MAX_TRIAL_CANDIDATES);
+		config.tune_db_limit = astc::max(LERP(tune_db_limit_a_base) - 35 * ltexels,
+		                                 LERP(tune_db_limit_b_base) - 19 * ltexels);
+
+		config.tune_mode0_mse_overshoot = LERP(tune_mode0_mse_overshoot);
+		config.tune_refinement_mse_overshoot = LERP(tune_refinement_mse_overshoot);
+
+		config.tune_partition_early_out_limit = LERP(tune_partition_early_out_limit);
+		config.tune_two_plane_early_out_limit = LERP(tune_two_plane_early_out_limit);
+
+		#undef LERP
+		#undef LERPI
+		#undef LERPUI
 	}
 
 	// Set heuristics to the defaults for each color profile
@@ -368,10 +456,13 @@ astcenc_error astcenc_config_init(
 
 	config.a_scale_radius = 0;
 
-	config.b_deblock_weight = 0.0f;
+	config.rgbm_m_scale = 0.0f;
 
 	config.profile = profile;
-	switch(profile)
+
+	// Values in this enum are from an external user, so not guaranteed to be
+	// bounded to the enum values
+	switch(static_cast<int>(profile))
 	{
 	case ASTCENC_PRF_LDR:
 	case ASTCENC_PRF_LDR_SRGB:
@@ -424,14 +515,12 @@ astcenc_error astcenc_config_init(
 
 	if (flags & ASTCENC_FLG_MAP_NORMAL)
 	{
-		config.cw_r_weight = 1.0f;
 		config.cw_g_weight = 0.0f;
 		config.cw_b_weight = 0.0f;
-		config.cw_a_weight = 1.0f;
 		config.tune_partition_early_out_limit = 1000.0f;
 		config.tune_two_plane_early_out_limit = 0.99f;
 
- 		if (flags & ASTCENC_FLG_USE_PERCEPTUAL)
+		if (flags & ASTCENC_FLG_USE_PERCEPTUAL)
 		{
 			config.b_deblock_weight = 1.8f;
 			config.v_rgba_radius = 3;
@@ -453,19 +542,26 @@ astcenc_error astcenc_config_init(
 		config.v_a_stdev = 25.0f;
 	}
 
+	if (flags & ASTCENC_FLG_MAP_RGBM)
+	{
+		config.rgbm_m_scale = 5.0f;
+		config.cw_a_weight = 2.0f * config.rgbm_m_scale;
+	}
+
 	config.flags = flags;
 
 	return ASTCENC_SUCCESS;
 }
 
 astcenc_error astcenc_context_alloc(
-	astcenc_config const& config,
+	const astcenc_config* configp,
 	unsigned int thread_count,
 	astcenc_context** context
 ) {
 	astcenc_error status;
 	astcenc_context* ctx = nullptr;
 	block_size_descriptor* bsd = nullptr;
+	const astcenc_config& config = *configp;
 
 	status = validate_cpu_float();
 	if (status != ASTCENC_SUCCESS)
@@ -484,6 +580,14 @@ astcenc_error astcenc_context_alloc(
 		return ASTCENC_ERR_BAD_PARAM;
 	}
 
+#if defined(ASTCENC_DIAGNOSTICS)
+	// Force single threaded compressor use in diagnostic mode.
+	if (thread_count != 1)
+	{
+		return ASTCENC_ERR_BAD_PARAM;
+	}
+#endif
+
 	ctx = new astcenc_context;
 	ctx->thread_count = thread_count;
 	ctx->config = config;
@@ -495,7 +599,7 @@ astcenc_error astcenc_context_alloc(
 	ctx->input_alpha_averages = nullptr;
 
 	// Copy the config first and validate the copy (we may modify it)
-	status = validate_config(ctx->config, thread_count);
+	status = validate_config(ctx->config);
 	if (status != ASTCENC_SUCCESS)
 	{
 		delete ctx;
@@ -503,7 +607,9 @@ astcenc_error astcenc_context_alloc(
 	}
 
 	bsd = new block_size_descriptor;
-	init_block_size_descriptor(config.block_x, config.block_y, config.block_z, bsd);
+	bool can_omit_modes = config.flags & ASTCENC_FLG_SELF_DECOMPRESS_ONLY;
+	init_block_size_descriptor(config.block_x, config.block_y, config.block_z,
+	                           can_omit_modes, static_cast<float>(config.tune_block_mode_limit) / 100.0f, bsd);
 	ctx->bsd = bsd;
 
 #if !defined(ASTCENC_DECOMPRESS_ONLY)
@@ -516,7 +622,7 @@ astcenc_error astcenc_context_alloc(
 		// Turn a dB limit into a per-texel error for faster use later
 		if ((ctx->config.profile == ASTCENC_PRF_LDR) || (ctx->config.profile == ASTCENC_PRF_LDR_SRGB))
 		{
-			ctx->config.tune_db_limit = powf(0.1f, ctx->config.tune_db_limit * 0.1f) * 65535.0f * 65535.0f;
+			ctx->config.tune_db_limit = astc::pow(0.1f, ctx->config.tune_db_limit * 0.1f) * 65535.0f * 65535.0f;
 		}
 		else
 		{
@@ -527,27 +633,36 @@ astcenc_error astcenc_context_alloc(
 		ctx->working_buffers = aligned_malloc<compress_symbolic_block_buffers>(worksize , 32);
 		if (!ctx->working_buffers)
 		{
-			goto error_oom;
+			term_block_size_descriptor(bsd);
+			delete bsd;
+			delete ctx;
+			*context = nullptr;
+			return ASTCENC_ERR_OUT_OF_MEM;
 		}
 	}
 #endif
 
+#if defined(ASTCENC_DIAGNOSTICS)
+	ctx->trace_log = new TraceLog(ctx->config.trace_file_path);
+	if(!ctx->trace_log->m_file)
+	{
+		return ASTCENC_ERR_DTRACE_FAILURE;
+	}
+
+	trace_add_data("block_x", config.block_x);
+	trace_add_data("block_y", config.block_y);
+	trace_add_data("block_z", config.block_z);
+#endif
+
 	*context = ctx;
 
 	// TODO: Currently static memory; should move to context memory
 #if !defined(ASTCENC_DECOMPRESS_ONLY)
 	prepare_angular_tables();
 #endif
-	build_quantization_mode_table();
+	build_quant_mode_table();
 
 	return ASTCENC_SUCCESS;
-
-error_oom:
-	term_block_size_descriptor(bsd);
-	delete bsd;
-	delete ctx;
-	*context = nullptr;
-	return ASTCENC_ERR_OUT_OF_MEM;
 }
 
 void astcenc_context_free(
@@ -557,6 +672,9 @@ void astcenc_context_free(
 	{
 		aligned_free<compress_symbolic_block_buffers>(ctx->working_buffers);
 		term_block_size_descriptor(ctx->bsd);
+#if defined(ASTCENC_DIAGNOSTICS)
+		delete ctx->trace_log;
+#endif
 		delete ctx->bsd;
 		delete ctx;
 	}
@@ -571,15 +689,17 @@ static void compress_image(
 	uint8_t* buffer
 ) {
 	const block_size_descriptor *bsd = ctx.bsd;
+	astcenc_profile decode_mode = ctx.config.profile;
+	imageblock blk;
+
 	int block_x = bsd->xdim;
 	int block_y = bsd->ydim;
 	int block_z = bsd->zdim;
-	astcenc_profile decode_mode = ctx.config.profile;
 
-	imageblock pb;
 	int dim_x = image.dim_x;
 	int dim_y = image.dim_y;
 	int dim_z = image.dim_z;
+
 	int xblocks = (dim_x + block_x - 1) / block_x;
 	int yblocks = (dim_y + block_y - 1) / block_y;
 	int zblocks = (dim_z + block_z - 1) / block_z;
@@ -597,7 +717,7 @@ static void compress_image(
 	while (true)
 	{
 		unsigned int count;
-		unsigned int base = ctx.manage_compress.get_task_assignment(4, count);
+		unsigned int base = ctx.manage_compress.get_task_assignment(16, count);
 		if (!count)
 		{
 			break;
@@ -611,25 +731,74 @@ static void compress_image(
 			int y = rem / row_blocks;
 			int x = rem - (y * row_blocks);
 
-			// Decompress
-			fetch_imageblock(decode_mode, image, &pb, bsd, x * block_x, y * block_y, z * block_z, swizzle);
+			// Test if we can apply some basic alpha-scale RDO
+			bool use_full_block = true;
+			if (ctx.config.a_scale_radius != 0 && block_z == 1)
+			{
+				int start_x = x * block_x;
+				int end_x = astc::min(dim_x, start_x + block_x);
+
+				int start_y = y * block_y;
+				int end_y = astc::min(dim_y, start_y + block_y);
+
+				// SATs accumulate error, so don't test exactly zero. Test for
+				// less than 1 alpha in the expanded block footprint that
+				// includes the alpha radius.
+				int x_footprint = block_x +
+				                  2 * (ctx.config.a_scale_radius - 1);
+
+				int y_footprint = block_y +
+				                  2 * (ctx.config.a_scale_radius - 1);
+
+				float footprint = (float)(x_footprint * y_footprint);
+				float threshold = 0.9f / (255.0f * footprint);
+
+				// Do we have any alpha values?
+				use_full_block = false;
+				for (int ay = start_y; ay < end_y; ay++)
+				{
+					for (int ax = start_x; ax < end_x; ax++)
+					{
+						float a_avg = ctx.input_alpha_averages[ay * dim_x + ax];
+						if (a_avg > threshold)
+						{
+							use_full_block = true;
+							ax = end_x;
+							ay = end_y;
+						}
+					}
+				}
+			}
 
+			// Fetch the full block for compression
+			if (use_full_block)
+			{
+				fetch_imageblock(decode_mode, image, &blk, bsd, x * block_x, y * block_y, z * block_z, swizzle);
+			}
+			// Apply alpha scale RDO - substitute constant color block
+			else
+			{
+				blk.origin_texel = vfloat4::zero();
+				blk.data_min = vfloat4::zero();
+				blk.data_max = blk.data_min;
+				blk.grayscale = false;
+			}
 
 			int offset = ((z * yblocks + y) * xblocks + x) * 16;
 			uint8_t *bp = buffer + offset;
 			physical_compressed_block* pcb = reinterpret_cast<physical_compressed_block*>(bp);
 			symbolic_compressed_block scb;
-			compress_block(ctx, image, &pb, scb, *pcb, temp_buffers);
+			compress_block(ctx, image, &blk, scb, *pcb, temp_buffers);
 		}
 
 		ctx.manage_compress.complete_task_assignment(count);
-	};
+	}
 }
 #endif
 
 astcenc_error astcenc_compress_image(
 	astcenc_context* ctx,
-	astcenc_image& image,
+	astcenc_image* imagep,
 	astcenc_swizzle swizzle,
 	uint8_t* data_out,
 	size_t data_len,
@@ -637,7 +806,7 @@ astcenc_error astcenc_compress_image(
 ) {
 #if defined(ASTCENC_DECOMPRESS_ONLY)
 	(void)ctx;
-	(void)image;
+	(void)imagep;
 	(void)swizzle;
 	(void)data_out;
 	(void)data_len;
@@ -645,6 +814,7 @@ astcenc_error astcenc_compress_image(
 	return ASTCENC_ERR_BAD_CONTEXT;
 #else
 	astcenc_error status;
+	astcenc_image& image = *imagep;
 
 	if (ctx->config.flags & ASTCENC_FLG_DECOMPRESS_ONLY)
 	{
@@ -678,15 +848,16 @@ astcenc_error astcenc_compress_image(
 	}
 
 	if (ctx->config.v_rgb_mean != 0.0f || ctx->config.v_rgb_stdev != 0.0f ||
-	    ctx->config.v_a_mean != 0.0f || ctx->config.v_a_stdev != 0.0f)
+	    ctx->config.v_a_mean != 0.0f || ctx->config.v_a_stdev != 0.0f ||
+	    ctx->config.a_scale_radius != 0)
 	{
 		// First thread to enter will do setup, other threads will subsequently
 		// enter the critical section but simply skip over the initialization
 		auto init_avg_var = [ctx, &image, swizzle]() {
 			// Perform memory allocations for the destination buffers
 			size_t texel_count = image.dim_x * image.dim_y * image.dim_z;
-			ctx->input_averages = new float4[texel_count];
-			ctx->input_variances = new float4[texel_count];
+			ctx->input_averages = new vfloat4[texel_count];
+			ctx->input_variances = new vfloat4[texel_count];
 			ctx->input_alpha_averages = new float[texel_count];
 
 			return init_compute_averages_and_variances(
@@ -747,13 +918,21 @@ astcenc_error astcenc_compress_reset(
 }
 
 astcenc_error astcenc_decompress_image(
-	astcenc_context* context,
+	astcenc_context* ctx,
 	const uint8_t* data,
 	size_t data_len,
-	astcenc_image& image_out,
-	astcenc_swizzle swizzle
+	astcenc_image* image_outp,
+	astcenc_swizzle swizzle,
+	unsigned int thread_index
 ) {
 	astcenc_error status;
+	astcenc_image& image_out = *image_outp;
+
+	// Today this doesn't matter (working set on stack) but might in future ...
+	if (thread_index >= ctx->thread_count)
+	{
+		return ASTCENC_ERR_BAD_PARAM;
+	}
 
 	status = validate_decompression_swizzle(swizzle);
 	if (status != ASTCENC_SUCCESS)
@@ -761,14 +940,17 @@ astcenc_error astcenc_decompress_image(
 		return status;
 	}
 
-	unsigned int block_x = context->config.block_x;
-	unsigned int block_y = context->config.block_y;
-	unsigned int block_z = context->config.block_z;
+	unsigned int block_x = ctx->config.block_x;
+	unsigned int block_y = ctx->config.block_y;
+	unsigned int block_z = ctx->config.block_z;
 
 	unsigned int xblocks = (image_out.dim_x + block_x - 1) / block_x;
 	unsigned int yblocks = (image_out.dim_y + block_y - 1) / block_y;
 	unsigned int zblocks = (image_out.dim_z + block_z - 1) / block_z;
 
+	int row_blocks = xblocks;
+	int plane_blocks = xblocks * yblocks;
+
 	// Check we have enough output space (16 bytes per block)
 	size_t size_needed = xblocks * yblocks * zblocks * 16;
 	if (data_len < size_needed)
@@ -776,39 +958,189 @@ astcenc_error astcenc_decompress_image(
 		return ASTCENC_ERR_OUT_OF_MEM;
 	}
 
-	imageblock pb;
+	imageblock blk;
 
-	for (unsigned int z = 0; z < zblocks; z++)
+	// Only the first thread actually runs the initializer
+	ctx->manage_decompress.init(zblocks * yblocks * xblocks);
+
+	// All threads run this processing loop until there is no work remaining
+	while (true)
 	{
-		for (unsigned int y = 0; y < yblocks; y++)
+		unsigned int count;
+		unsigned int base = ctx->manage_decompress.get_task_assignment(128, count);
+		if (!count)
 		{
-			for (unsigned int x = 0; x < xblocks; x++)
-			{
-				unsigned int offset = (((z * yblocks + y) * xblocks) + x) * 16;
-				const uint8_t* bp = data + offset;
-				physical_compressed_block pcb = *(physical_compressed_block *) bp;
-				symbolic_compressed_block scb;
+			break;
+		}
 
-				physical_to_symbolic(*context->bsd, pcb, scb);
+		for (unsigned int i = base; i < base + count; i++)
+		{
+			// Decode i into x, y, z block indices
+			int z = i / plane_blocks;
+			unsigned int rem = i - (z * plane_blocks);
+			int y = rem / row_blocks;
+			int x = rem - (y * row_blocks);
 
-				decompress_symbolic_block(context->config.profile, context->bsd,
-				                          x * block_x, y * block_y, z * block_z,
-				                          &scb, &pb);
+			unsigned int offset = (((z * yblocks + y) * xblocks) + x) * 16;
+			const uint8_t* bp = data + offset;
+			physical_compressed_block pcb = *(const physical_compressed_block*)bp;
+			symbolic_compressed_block scb;
 
-				write_imageblock(image_out, &pb, context->bsd,
-				                 x * block_x, y * block_y, z * block_z, swizzle);
-			}
+			physical_to_symbolic(*ctx->bsd, pcb, scb);
+
+			decompress_symbolic_block(ctx->config.profile, ctx->bsd,
+			                          x * block_x, y * block_y, z * block_z,
+			                          &scb, &blk);
+
+			write_imageblock(image_out, &blk, ctx->bsd,
+			                 x * block_x, y * block_y, z * block_z, swizzle);
+		}
+
+		ctx->manage_decompress.complete_task_assignment(count);
+	}
+
+	return ASTCENC_SUCCESS;
+}
+
+astcenc_error astcenc_decompress_reset(
+	astcenc_context* ctx
+) {
+	ctx->manage_decompress.reset();
+	return ASTCENC_SUCCESS;
+}
+
+astcenc_error astcenc_get_block_info(
+	astcenc_context* ctx,
+	const uint8_t data[16],
+	astcenc_block_info* info
+) {
+#if defined(ASTCENC_DECOMPRESS_ONLY)
+	(void)ctx;
+	(void)data;
+	(void)info;
+	return ASTCENC_ERR_BAD_CONTEXT;
+#else
+	// Decode the compressed data into a symbolic form
+	physical_compressed_block pcb = *(const physical_compressed_block*)data;
+	symbolic_compressed_block scb;
+	physical_to_symbolic(*ctx->bsd, pcb, scb);
+
+	// Fetch the appropriate partition and decimation tables
+	block_size_descriptor& bsd = *ctx->bsd;
+
+	// Start from a clean slate
+	memset(info, 0, sizeof(*info));
+
+	// Basic info we can always populate
+	info->profile = ctx->config.profile;
+
+	info->block_x = ctx->config.block_x;
+	info->block_y = ctx->config.block_y;
+	info->block_z = ctx->config.block_z;
+	info->texel_count = bsd.texel_count;
+
+	// Check for error blocks first - block_mode will be negative
+	info->is_error_block = scb.error_block != 0;
+	if (info->is_error_block)
+	{
+		return ASTCENC_SUCCESS;
+	}
+
+	// Check for constant color blocks second - block_mode will be negative
+	info->is_constant_block = scb.block_mode < 0;
+	if (info->is_constant_block)
+	{
+		return ASTCENC_SUCCESS;
+	}
+
+	// Otherwise, handle a full block with partition payload; values are known
+	// to be valid once the two conditions above have been checked
+	int partition_count = scb.partition_count;
+	const partition_info* pt = get_partition_table(&bsd, partition_count);
+	pt += scb.partition_index;
+
+	const int packed_index = bsd.block_mode_packed_index[scb.block_mode];
+	assert(packed_index >= 0 && packed_index < bsd.block_mode_count);
+	const block_mode& bm = bsd.block_modes[packed_index];
+	const decimation_table& dt = *bsd.decimation_tables[bm.decimation_mode];
+
+	info->weight_x = dt.weight_x;
+	info->weight_y = dt.weight_y;
+	info->weight_z = dt.weight_z;
+
+	info->is_dual_plane_block = bm.is_dual_plane != 0;
+
+	info->partition_count = scb.partition_count;
+	info->partition_index = scb.partition_index;
+	info->dual_plane_component = scb.plane2_color_component;
+
+	info->color_level_count =  get_quant_method_levels((quant_method)scb.color_quant_level);
+	info->weight_level_count = get_quant_method_levels((quant_method)bm.quant_mode);
+
+	// Unpack color endpoints for each active partition
+	for (int i = 0; i < scb.partition_count; i++)
+	{
+		int rgb_hdr;
+		int a_hdr;
+		int nan;
+		vint4 endpnt[2];
+
+		unpack_color_endpoints(ctx->config.profile,
+		                       scb.color_formats[i],
+		                       scb.color_quant_level,
+		                       scb.color_values[i],
+		                       &rgb_hdr, &a_hdr, &nan,
+		                       endpnt, endpnt + 1);
+
+		// Store the color endpoint mode info
+		info->color_endpoint_modes[i] = scb.color_formats[i];
+		info->is_hdr_block |= (rgb_hdr != 0) | (a_hdr != 0);
+
+		// Store the unpacked and decoded color endpoint
+		vmask4 hdr_mask(rgb_hdr, rgb_hdr, rgb_hdr, a_hdr);
+		for (int j = 0; j < 2; j++)
+		{
+			vint4 color_lns = lns_to_sf16(endpnt[j]);
+			vint4 color_unorm = unorm16_to_sf16(endpnt[j]);
+			vint4 datai = select(color_unorm, color_lns, hdr_mask);
+			store(float16_to_float(datai), info->color_endpoints[i][j]);
 		}
 	}
 
+	// Unpack weights for each texel
+	int weight_plane1[MAX_TEXELS_PER_BLOCK];
+	int weight_plane2[MAX_TEXELS_PER_BLOCK];
+
+	unpack_weights(bsd, scb, dt, bm.is_dual_plane, bm.quant_mode, weight_plane1, weight_plane2);
+	for (int i = 0; i < bsd.texel_count; i++)
+	{
+		info->weight_values_plane1[i] = (float)weight_plane1[i] / (float)TEXEL_WEIGHT_SUM;
+		if (info->is_dual_plane_block)
+		{
+			info->weight_values_plane2[i] = (float)weight_plane2[i] / (float)TEXEL_WEIGHT_SUM;
+		}
+	}
+
+	// Unpack partition assignments for each texel
+	for (int i = 0; i < bsd.texel_count; i++)
+	{
+		info->partition_assignment[i] = pt->partition_of_texel[i];
+	}
+
 	return ASTCENC_SUCCESS;
+#endif
 }
 
+
 const char* astcenc_get_error_string(
 	astcenc_error status
 ) {
-	switch(status)
+	// Values in this enum are from an external user, so not guaranteed to be
+	// bounded to the enum values
+	switch(static_cast<int>(status))
 	{
+	case ASTCENC_SUCCESS:
+		return "ASTCENC_SUCCESS";
 	case ASTCENC_ERR_OUT_OF_MEM:
 		return "ASTCENC_ERR_OUT_OF_MEM";
 	case ASTCENC_ERR_BAD_CPU_FLOAT:
@@ -821,8 +1153,8 @@ const char* astcenc_get_error_string(
 		return "ASTCENC_ERR_BAD_BLOCK_SIZE";
 	case ASTCENC_ERR_BAD_PROFILE:
 		return "ASTCENC_ERR_BAD_PROFILE";
-	case ASTCENC_ERR_BAD_PRESET:
-		return "ASTCENC_ERR_BAD_PRESET";
+	case ASTCENC_ERR_BAD_QUALITY:
+		return "ASTCENC_ERR_BAD_QUALITY";
 	case ASTCENC_ERR_BAD_FLAGS:
 		return "ASTCENC_ERR_BAD_FLAGS";
 	case ASTCENC_ERR_BAD_SWIZZLE:
@@ -831,6 +1163,10 @@ const char* astcenc_get_error_string(
 		return "ASTCENC_ERR_BAD_CONTEXT";
 	case ASTCENC_ERR_NOT_IMPLEMENTED:
 		return "ASTCENC_ERR_NOT_IMPLEMENTED";
+#if defined(ASTCENC_DIAGNOSTICS)
+	case ASTCENC_ERR_DTRACE_FAILURE:
+		return "ASTCENC_ERR_DTRACE_FAILURE";
+#endif
 	default:
 		return nullptr;
 	}
diff --git a/libkram/astc-encoder/astcenc_find_best_partitioning.cpp b/libkram/astc-encoder/astcenc_find_best_partitioning.cpp
index d32d8d32..7658e331 100644
--- a/libkram/astc-encoder/astcenc_find_best_partitioning.cpp
+++ b/libkram/astc-encoder/astcenc_find_best_partitioning.cpp
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2020 Arm Limited
+// Copyright 2011-2021 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -53,163 +53,68 @@
 
 #include "astcenc_internal.h"
 
-static void compute_alpha_range(
-	int texels_per_block,
-	const partition_info* pt,
-	const imageblock* blk,
-	const error_weight_block* ewb,
-	float alpha_range[4]
+static void compute_partition_error_color_weightings_and_range(
+	const imageblock& blk,
+	const error_weight_block& ewb,
+	const partition_info& pt,
+	partition_metrics pm[4]
 ) {
-	float alpha_min[4];
-	float alpha_max[4];
+	int partition_count = pt.partition_count;
 
-	int partition_count = pt->partition_count;
 	for (int i = 0; i < partition_count; i++)
 	{
-		alpha_min[i] = 1e38f;
-		alpha_max[i] = -1e38f;
-	}
+		vfloat4 error_weight(1e-12f);
+		vfloat4 rgba_min(1e38f);
+		vfloat4 rgba_max(-1e38f);
 
-	for (int i = 0; i < texels_per_block; i++)
-	{
-		if (ewb->texel_weight[i] > 1e-10f)
+		int texel_count = pt.partition_texel_count[i];
+		for (int j = 0; j < texel_count; j++)
 		{
-			int partition = pt->partition_of_texel[i];
-			float alphaval = blk->data_a[i];
+			int tidx = pt.texels_of_partition[i][j];
+			error_weight = error_weight + ewb.error_weights[tidx];
 
-			if (alphaval > alpha_max[partition])
+			if (ewb.texel_weight[tidx] > 1e-10f)
 			{
-				alpha_max[partition] = alphaval;
-			}
-
-			if (alphaval < alpha_min[partition])
-			{
-				alpha_min[partition] = alphaval;
+				vfloat4 data = blk.texel(tidx);
+				rgba_min = min(data, rgba_min);
+				rgba_max = max(data, rgba_max);
 			}
 		}
-	}
 
-	for (int i = 0; i < partition_count; i++)
-	{
-		alpha_range[i] = alpha_max[i] - alpha_min[i];
-		if (alpha_range[i] <= 0.0f)
-		{
-			alpha_range[i] = 1e-10f;
-		}
+		error_weight = error_weight / pt.partition_texel_count[i];
+		vfloat4 csf = sqrt(error_weight);
+		vfloat4 range = max(rgba_max - rgba_min, 1e-10f);
+		pm[i].error_weight = error_weight;
+		pm[i].color_scale = csf;
+		pm[i].icolor_scale = 1.0f / max(csf, 1e-7f);
+		pm[i].range_sq = range * range;
 	}
 }
 
-static void compute_rgb_range(
-	int texels_per_block,
-	const partition_info* pt,
-	const imageblock* blk,
-	const error_weight_block* ewb,
-	float3 rgb_range[4]
+void compute_partition_error_color_weightings(
+	const error_weight_block& ewb,
+	const partition_info& pt,
+	partition_metrics pm[4]
 ) {
-	float3 rgb_min[4];
-	float3 rgb_max[4];
-
-	int partition_count = pt->partition_count;
-	for (int i = 0; i < partition_count; i++)
-	{
-		rgb_min[i] = float3(1e38f);
-		rgb_max[i] = float3(-1e38f);
-	}
-
-	for (int i = 0; i < texels_per_block; i++)
-	{
-		if (ewb->texel_weight[i] > 1e-10f)
-		{
-			int partition = pt->partition_of_texel[i];
-
-			float redval = blk->data_r[i];
-			if (redval > rgb_max[partition].r)
-			{
-				rgb_max[partition].r = redval;
-			}
-
-			if (redval < rgb_min[partition].r)
-			{
-				rgb_min[partition].r = redval;
-			}
-
-			float greenval = blk->data_g[i];
-			if (greenval > rgb_max[partition].g)
-			{
-				rgb_max[partition].g = greenval;
-			}
-
-			if (greenval < rgb_min[partition].g)
-			{
-				rgb_min[partition].g = greenval;
-			}
-
-			float blueval = blk->data_b[i];
-			if (blueval > rgb_max[partition].b)
-			{
-				rgb_max[partition].b = blueval;
-			}
-
-			if (blueval < rgb_min[partition].b)
-			{
-				rgb_min[partition].b = blueval;
-			}
-		}
-	}
+	int partition_count = pt.partition_count;
 
-	// Covert min/max into ranges forcing a min range of 1e-10
-	// to avoid divide by zeros later ...
 	for (int i = 0; i < partition_count; i++)
 	{
-		rgb_range[i].r = rgb_max[i].r - rgb_min[i].r;
-		if (rgb_range[i].r <= 0.0f)
-		{
-			rgb_range[i].r = 1e-10f;
-		}
+		vfloat4 error_weight(1e-12f);
 
-		rgb_range[i].g = rgb_max[i].g - rgb_min[i].g;
-		if (rgb_range[i].g <= 0.0f)
+		int texel_count = pt.partition_texel_count[i];
+		for (int j = 0; j < texel_count; j++)
 		{
-			rgb_range[i].g = 1e-10f;
+			int tidx = pt.texels_of_partition[i][j];
+			error_weight = error_weight + ewb.error_weights[tidx];
 		}
 
-		rgb_range[i].b = rgb_max[i].b - rgb_min[i].b;
-		if (rgb_range[i].b <= 0.0f)
-		{
-			rgb_range[i].b = 1e-10f;
-		}
-	}
-}
-
-void compute_partition_error_color_weightings(
-	const block_size_descriptor* bsd,
-	const error_weight_block* ewb,
-	const partition_info* pi,
-	float4 error_weightings[4],
-	float4 color_scalefactors[4]
-) {
-	int texels_per_block = bsd->texel_count;
-	int pcnt = pi->partition_count;
-
-	for (int i = 0; i < pcnt; i++)
-	{
-		error_weightings[i] = float4(1e-12f);
-	}
-
-	for (int i = 0; i < texels_per_block; i++)
-	{
-		int part = pi->partition_of_texel[i];
-		error_weightings[part] = error_weightings[part] + ewb->error_weights[i];
-	}
-
-	for (int i = 0; i < pcnt; i++)
-	{
-		error_weightings[i] = error_weightings[i] * (1.0f / pi->texels_per_partition[i]);
-	}
+		error_weight = error_weight / pt.partition_texel_count[i];
+		vfloat4 csf = sqrt(error_weight);
 
-	for (int i = 0; i < pcnt; i++)
-	{
-		color_scalefactors[i] = sqrt(error_weightings[i]);
+		pm[i].error_weight = error_weight;
+		pm[i].color_scale = csf;
+		pm[i].icolor_scale = 1.0f / max(csf, 1e-7f);
 	}
 }
 
@@ -220,15 +125,15 @@ void find_best_partitionings(
 	const error_weight_block* ewb,
 	int partition_count,
 	int partition_search_limit,
-	int* best_partition_uncorrelated,
-	int* best_partition_samechroma,
+	int* best_partition_uncor,
+	int* best_partition_samec,
 	int* best_partition_dualplane
 ) {
 	// constant used to estimate quantization error for a given partitioning;
 	// the optimal value for this constant depends on bitrate.
 	// These constants have been determined empirically.
 	int texels_per_block = bsd->texel_count;
-	float weight_imprecision_estim = 100.0f;
+	float weight_imprecision_estim = 0.055f;
 	if (texels_per_block <= 20)
 	{
 		weight_imprecision_estim = 0.03f;
@@ -241,34 +146,32 @@ void find_best_partitionings(
 	{
 		weight_imprecision_estim = 0.05f;
 	}
-	else
-	{
-		weight_imprecision_estim = 0.055f;
-	}
+
+	weight_imprecision_estim = weight_imprecision_estim * weight_imprecision_estim;
 
 	int partition_sequence[PARTITION_COUNT];
 
 	kmeans_compute_partition_ordering(bsd, partition_count, blk, partition_sequence);
 
-	float weight_imprecision_estim_squared = weight_imprecision_estim * weight_imprecision_estim;
-
 	int uses_alpha = imageblock_uses_alpha(blk);
 
 	const partition_info* ptab = get_partition_table(bsd, partition_count);
 
 	// Partitioning errors assuming uncorrelated-chrominance endpoints
-	float uncorr_best_error { ERROR_CALC_DEFAULT };
-	int uncorr_best_partition { 0 };
+	float uncor_best_error { ERROR_CALC_DEFAULT };
+	int uncor_best_partition { 0 };
 
 	// Partitioning errors assuming same-chrominance endpoints
 	// Store two so we can always return one different to uncorr
-	float samechroma_best_errors[2] { ERROR_CALC_DEFAULT, ERROR_CALC_DEFAULT };
-	int samechroma_best_partitions[2] { 0, 0 };
+	float samec_best_errors[2] { ERROR_CALC_DEFAULT, ERROR_CALC_DEFAULT };
+	int samec_best_partitions[2] { 0, 0 };
 
 	// Partitioning errors assuming that one color component is uncorrelated
-	float separate_best_error { ERROR_CALC_DEFAULT };
-	int separate_best_partition { 0 };
-	int separate_best_component { 0 };
+	float sep_best_error { ERROR_CALC_DEFAULT };
+	int sep_best_partition { 0 };
+	int sep_best_component { 0 };
+
+	bool skip_two_plane = best_partition_dualplane == nullptr;
 
 	if (uses_alpha)
 	{
@@ -280,167 +183,100 @@ void find_best_partitionings(
 			int bk_partition_count = ptab[partition].partition_count;
 			if (bk_partition_count < partition_count)
 			{
-				continue;
+				break;
 			}
 
-			// compute the weighting to give to each color channel
-			// in each partition.
-			float4 error_weightings[4];
-			float4 color_scalefactors[4];
-			float4 inverse_color_scalefactors[4];
-			compute_partition_error_color_weightings(bsd, ewb, ptab + partition, error_weightings, color_scalefactors);
-
-			for (int j = 0; j < partition_count; j++)
-			{
-				inverse_color_scalefactors[j].r = 1.0f / MAX(color_scalefactors[j].r, 1e-7f);
-				inverse_color_scalefactors[j].g = 1.0f / MAX(color_scalefactors[j].g, 1e-7f);
-				inverse_color_scalefactors[j].b = 1.0f / MAX(color_scalefactors[j].b, 1e-7f);
-				inverse_color_scalefactors[j].a = 1.0f / MAX(color_scalefactors[j].a, 1e-7f);
-			}
+			// Compute weighting to give to each channel in each partition
+			partition_metrics pms[4];
 
-			float4 averages[4];
-			float4 directions_rgba[4];
+			compute_partition_error_color_weightings_and_range(*blk, *ewb, *(ptab + partition), pms);
 
-			compute_averages_and_directions_rgba(ptab + partition, blk, ewb,
-			                                     color_scalefactors, averages,
-			                                     directions_rgba);
+			compute_avgs_and_dirs_4_comp(ptab + partition, blk, ewb, pms);
 
-			line4 uncorr_lines[4];
-			line4 samechroma_lines[4];
-			line3 separate_red_lines[4];
-			line3 separate_green_lines[4];
-			line3 separate_blue_lines[4];
-			line3 separate_alpha_lines[4];
+			line4 uncor_lines[4];
+			line4 samec_lines[4];
+			line3 sep_r_lines[4];
+			line3 sep_g_lines[4];
+			line3 sep_b_lines[4];
+			line3 sep_a_lines[4];
 
-			processed_line4 proc_uncorr_lines[4];
-			processed_line4 proc_samechroma_lines[4];
-			processed_line3 proc_separate_red_lines[4];
-			processed_line3 proc_separate_green_lines[4];
-			processed_line3 proc_separate_blue_lines[4];
-			processed_line3 proc_separate_alpha_lines[4];
+			processed_line4 uncor_plines[4];
+			processed_line4 samec_plines[4];
+			processed_line3 sep_r_plines[4];
+			processed_line3 sep_g_plines[4];
+			processed_line3 sep_b_plines[4];
+			processed_line3 sep_a_plines[4];
 
-			float uncorr_linelengths[4];
-			float samechroma_linelengths[4];
-			float4 separate_linelengths[4];
+			float uncor_line_lens[4];
+			float samec_line_lens[4];
 
 			for (int j = 0; j < partition_count; j++)
 			{
-				uncorr_lines[j].a = averages[j];
-				if (dot(directions_rgba[j], directions_rgba[j]) == 0.0f)
-				{
-					uncorr_lines[j].b = normalize(float4(1.0f));
-				}
-				else
-				{
-					uncorr_lines[j].b = normalize(directions_rgba[j]);
-				}
+				partition_metrics& pm = pms[j];
 
-				proc_uncorr_lines[j].amod = (uncorr_lines[j].a - uncorr_lines[j].b * dot(uncorr_lines[j].a, uncorr_lines[j].b)) * inverse_color_scalefactors[j];
-				proc_uncorr_lines[j].bs = (uncorr_lines[j].b * color_scalefactors[j]);
-				proc_uncorr_lines[j].bis = (uncorr_lines[j].b * inverse_color_scalefactors[j]);
+				uncor_lines[j].a = pm.avg;
+				uncor_lines[j].b = normalize_safe(pm.dir, unit4());
 
-				samechroma_lines[j].a = float4(0.0f);
-				if (dot(averages[j], averages[j]) == 0.0f)
-				{
-					samechroma_lines[j].b = normalize(float4(1.0f));
-				}
-				else
-				{
-					samechroma_lines[j].b = normalize(averages[j]);
-				}
+				uncor_plines[j].amod = (uncor_lines[j].a - uncor_lines[j].b * dot(uncor_lines[j].a, uncor_lines[j].b)) * pm.icolor_scale;
+				uncor_plines[j].bs   = uncor_lines[j].b * pm.color_scale;
+				uncor_plines[j].bis  = uncor_lines[j].b * pm.icolor_scale;
 
-				proc_samechroma_lines[j].amod = (samechroma_lines[j].a - samechroma_lines[j].b * dot(samechroma_lines[j].a, samechroma_lines[j].b)) * inverse_color_scalefactors[j];
-				proc_samechroma_lines[j].bs = (samechroma_lines[j].b * color_scalefactors[j]);
-				proc_samechroma_lines[j].bis = (samechroma_lines[j].b * inverse_color_scalefactors[j]);
+				samec_lines[j].a = vfloat4::zero();
+				samec_lines[j].b = normalize_safe(pm.avg, unit4());
 
-				separate_red_lines[j].a = float3(averages[j].g, averages[j].b, averages[j].a);
-				float3 dirs_gba = float3(directions_rgba[j].g, directions_rgba[j].b, directions_rgba[j].a);
-				if (dot(dirs_gba, dirs_gba) == 0.0f)
-				{
-					separate_red_lines[j].b = normalize(float3(1.0f, 1.0f, 1.0f));
-				}
-				else
-				{
-					separate_red_lines[j].b = normalize(dirs_gba);
-				}
+				samec_plines[j].amod = vfloat4::zero();
+				samec_plines[j].bs   = samec_lines[j].b * pm.color_scale;
+				samec_plines[j].bis  = samec_lines[j].b * pm.icolor_scale;
 
-				separate_green_lines[j].a = float3(averages[j].r, averages[j].b, averages[j].a);
-				float3 dirs_rba = float3(directions_rgba[j].r, directions_rgba[j].b, directions_rgba[j].a);
-				if (dot(dirs_rba, dirs_rba) == 0.0f)
-				{
-					separate_green_lines[j].b = normalize(float3(1.0f, 1.0f, 1.0f));
-				}
-				else
+				if (!skip_two_plane)
 				{
-					separate_green_lines[j].b = normalize(dirs_rba);
-				}
+					sep_r_lines[j].a = pm.avg.swz<1, 2, 3>();
+					vfloat4 dirs_gba = pm.dir.swz<1, 2, 3>();
+					sep_r_lines[j].b = normalize_safe(dirs_gba, unit3());
 
-				separate_blue_lines[j].a = float3(averages[j].r, averages[j].g, averages[j].a);
-				float3 dirs_rga = float3(directions_rgba[j].r, directions_rgba[j].g, directions_rgba[j].a);
-				if (dot(dirs_rga, dirs_rga) == 0.0f)
-				{
-					separate_blue_lines[j].b = normalize(float3(1.0f, 1.0f, 1.0f));
-				}
-				else
-				{
-					separate_blue_lines[j].b = normalize(dirs_rga);
-				}
+					sep_g_lines[j].a = pm.avg.swz<0, 2, 3>();
+					vfloat4 dirs_rba = pm.dir.swz<0, 2, 3>();
+					sep_g_lines[j].b = normalize_safe(dirs_rba, unit3());
 
-				separate_alpha_lines[j].a = float3(averages[j].r, averages[j].g, averages[j].b);
-				float3 dirs_rgb = float3(directions_rgba[j].r, directions_rgba[j].g, directions_rgba[j].b);
-				if (dot(dirs_rgb, dirs_rgb) == 0.0f)
-				{
-					separate_alpha_lines[j].b = normalize(float3(1.0f, 1.0f, 1.0f));
-				}
-				else
-				{
-					separate_alpha_lines[j].b = normalize(dirs_rgb);
-				}
+					sep_b_lines[j].a = pm.avg.swz<0, 1, 3>();
+					vfloat4 dirs_rga = pm.dir.swz<0, 1, 3>();
+					sep_b_lines[j].b = normalize_safe(dirs_rga, unit3());
+
+					sep_a_lines[j].a = pm.avg.swz<0, 1, 2>();
+					vfloat4 dirs_rgb = pm.dir.swz<0, 1, 2>();
+					sep_a_lines[j].b = normalize_safe(dirs_rgb, unit3());
 
-				proc_separate_red_lines[j].amod = (separate_red_lines[j].a - separate_red_lines[j].b * dot(separate_red_lines[j].a, separate_red_lines[j].b)) * float3(inverse_color_scalefactors[j].g, inverse_color_scalefactors[j].b, inverse_color_scalefactors[j].a);
-				proc_separate_red_lines[j].bs = (separate_red_lines[j].b * float3(color_scalefactors[j].g, color_scalefactors[j].b, color_scalefactors[j].a));
-				proc_separate_red_lines[j].bis = (separate_red_lines[j].b * float3(inverse_color_scalefactors[j].g, inverse_color_scalefactors[j].b, inverse_color_scalefactors[j].a));
+					sep_r_plines[j].amod = (sep_r_lines[j].a - sep_r_lines[j].b * dot3(sep_r_lines[j].a, sep_r_lines[j].b)) * pm.icolor_scale.swz<1, 2, 3, 0>();
+					sep_r_plines[j].bs   = (sep_r_lines[j].b * pm.color_scale.swz<1, 2, 3, 0>());
+					sep_r_plines[j].bis  = (sep_r_lines[j].b * pm.icolor_scale.swz<1, 2, 3, 0>());
 
-				proc_separate_green_lines[j].amod =
-					(separate_green_lines[j].a - separate_green_lines[j].b * dot(separate_green_lines[j].a, separate_green_lines[j].b)) * float3(inverse_color_scalefactors[j].r, inverse_color_scalefactors[j].b, inverse_color_scalefactors[j].a);
-				proc_separate_green_lines[j].bs = (separate_green_lines[j].b * float3(color_scalefactors[j].r, color_scalefactors[j].b, color_scalefactors[j].a));
-				proc_separate_green_lines[j].bis = (separate_green_lines[j].b * float3(inverse_color_scalefactors[j].r, inverse_color_scalefactors[j].b, inverse_color_scalefactors[j].a));
+					sep_g_plines[j].amod = (sep_g_lines[j].a - sep_g_lines[j].b * dot3(sep_g_lines[j].a, sep_g_lines[j].b)) * pm.icolor_scale.swz<0, 2, 3, 1>();
+					sep_g_plines[j].bs   = (sep_g_lines[j].b * pm.color_scale.swz<0, 2, 3, 1>());
+					sep_g_plines[j].bis  = (sep_g_lines[j].b * pm.icolor_scale.swz<0, 2, 3, 1>());
 
-				proc_separate_blue_lines[j].amod = (separate_blue_lines[j].a - separate_blue_lines[j].b * dot(separate_blue_lines[j].a, separate_blue_lines[j].b)) * float3(inverse_color_scalefactors[j].r, inverse_color_scalefactors[j].g, inverse_color_scalefactors[j].a);
-				proc_separate_blue_lines[j].bs = (separate_blue_lines[j].b * float3(color_scalefactors[j].r, color_scalefactors[j].g, color_scalefactors[j].a));
-				proc_separate_blue_lines[j].bis = (separate_blue_lines[j].b * float3(inverse_color_scalefactors[j].r, inverse_color_scalefactors[j].g, inverse_color_scalefactors[j].a));
+					sep_b_plines[j].amod = (sep_b_lines[j].a - sep_b_lines[j].b * dot3(sep_b_lines[j].a, sep_b_lines[j].b)) * pm.icolor_scale.swz<0, 1, 3, 2>();
+					sep_b_plines[j].bs   = (sep_b_lines[j].b * pm.color_scale.swz<0, 1, 3, 2>());
+					sep_b_plines[j].bis  = (sep_b_lines[j].b * pm.icolor_scale.swz<0, 1, 3, 2>());
 
-				proc_separate_alpha_lines[j].amod =
-					(separate_alpha_lines[j].a - separate_alpha_lines[j].b * dot(separate_alpha_lines[j].a, separate_alpha_lines[j].b)) * float3(inverse_color_scalefactors[j].r, inverse_color_scalefactors[j].g, inverse_color_scalefactors[j].b);
-				proc_separate_alpha_lines[j].bs = (separate_alpha_lines[j].b * float3(color_scalefactors[j].r, color_scalefactors[j].g, color_scalefactors[j].b));
-				proc_separate_alpha_lines[j].bis = (separate_alpha_lines[j].b * float3(inverse_color_scalefactors[j].r, inverse_color_scalefactors[j].g, inverse_color_scalefactors[j].b));
+					sep_a_plines[j].amod = (sep_a_lines[j].a - sep_a_lines[j].b * dot3(sep_a_lines[j].a, sep_a_lines[j].b)) * pm.icolor_scale.swz<0, 1, 2, 3>();
+					sep_a_plines[j].bs   = (sep_a_lines[j].b * pm.color_scale.swz<0, 1, 2, 3>());
+					sep_a_plines[j].bis  = (sep_a_lines[j].b * pm.icolor_scale.swz<0, 1, 2, 3>());
+				}
 			}
 
-			float uncorr_error = 0.0f;
-			float samechroma_error = 0.0f;
-			float4 separate_error = float4(0.0f);
+			float uncor_error = 0.0f;
+			float samec_error = 0.0f;
+			vfloat4 sep_error = vfloat4::zero();
+
 			compute_error_squared_rgba(ptab + partition,
 			                           blk,
 			                           ewb,
-			                           proc_uncorr_lines,
-			                           proc_samechroma_lines,
-			                           proc_separate_red_lines,
-			                           proc_separate_green_lines,
-			                           proc_separate_blue_lines,
-			                           proc_separate_alpha_lines,
-			                           uncorr_linelengths,
-			                           samechroma_linelengths,
-			                           separate_linelengths,
-			                           &uncorr_error,
-			                           &samechroma_error,
-			                           &separate_error);
-
-			// compute minimum & maximum alpha values in each partition
-			float3 rgb_range[4];
-			float alpha_range[4];
-
-			compute_alpha_range(bsd->texel_count, ptab + partition, blk, ewb, alpha_range);
-			compute_rgb_range(bsd->texel_count, ptab + partition, blk, ewb, rgb_range);
+			                           uncor_plines,
+			                           samec_plines,
+			                           uncor_line_lens,
+			                           samec_line_lens,
+			                           &uncor_error,
+			                           &samec_error);
 
 			/*
 			   Compute an estimate of error introduced by weight quantization imprecision.
@@ -456,84 +292,91 @@ void find_best_partitionings(
 
 			for (int j = 0; j < partition_count; j++)
 			{
-				float tpp = (float)(ptab[partition].texels_per_partition[j]);
-
-				float4 ics = inverse_color_scalefactors[j];
-				float4 error_weights = error_weightings[j] * (tpp * weight_imprecision_estim_squared);
-
-				float4 uncorr_vector = (uncorr_lines[j].b * uncorr_linelengths[j]) * ics;
-				float4 samechroma_vector = (samechroma_lines[j].b * samechroma_linelengths[j]) * ics;
-				float3 separate_red_vector = (separate_red_lines[j].b * separate_linelengths[j].r) * float3(ics.g, ics.b, ics.a);
-				float3 separate_green_vector = (separate_green_lines[j].b * separate_linelengths[j].g) * float3(ics.r, ics.b, ics.a);
-				float3 separate_blue_vector = (separate_blue_lines[j].b * separate_linelengths[j].b) * float3(ics.r, ics.g, ics.a);
-				float3 separate_alpha_vector = (separate_alpha_lines[j].b * separate_linelengths[j].a) * float3(ics.r, ics.g, ics.b);
-
-				uncorr_vector = uncorr_vector * uncorr_vector;
-				samechroma_vector = samechroma_vector * samechroma_vector;
-				separate_red_vector = separate_red_vector * separate_red_vector;
-				separate_green_vector = separate_green_vector * separate_green_vector;
-				separate_blue_vector = separate_blue_vector * separate_blue_vector;
-				separate_alpha_vector = separate_alpha_vector * separate_alpha_vector;
-
-				uncorr_error += dot(uncorr_vector, error_weights);
-				samechroma_error += dot(samechroma_vector, error_weights);
-				separate_error.r += dot(separate_red_vector, float3(error_weights.g, error_weights.b, error_weights.a));
-				separate_error.g += dot(separate_green_vector, float3(error_weights.r, error_weights.b, error_weights.a));
-				separate_error.b += dot(separate_blue_vector, float3(error_weights.r, error_weights.g, error_weights.a));
-				separate_error.a += dot(separate_alpha_vector, float3(error_weights.r, error_weights.g, error_weights.b));
-
-				separate_error.r += rgb_range[j].r * rgb_range[j].r * error_weights.r;
-				separate_error.g += rgb_range[j].g * rgb_range[j].g * error_weights.g;
-				separate_error.b += rgb_range[j].b * rgb_range[j].b * error_weights.b;
-				separate_error.a += alpha_range[j] * alpha_range[j] * error_weights.a;
+				partition_metrics& pm = pms[j];
+				float tpp = (float)(ptab[partition].partition_texel_count[j]);
+
+				vfloat4 ics = pm.icolor_scale;
+				vfloat4 error_weights = pm.error_weight * (tpp * weight_imprecision_estim);
+
+				vfloat4 uncor_vector = uncor_lines[j].b * uncor_line_lens[j] * ics;
+				vfloat4 samec_vector = samec_lines[j].b * samec_line_lens[j] * ics;
+
+				uncor_vector = uncor_vector * uncor_vector;
+				samec_vector = samec_vector * samec_vector;
+
+				uncor_error += dot_s(uncor_vector, error_weights);
+				samec_error += dot_s(samec_vector, error_weights);
+
+				if (!skip_two_plane)
+				{
+					vfloat4 sep_r_vector = sep_r_lines[j].b * ics.swz<1, 2, 3, 0>();
+					vfloat4 sep_g_vector = sep_g_lines[j].b * ics.swz<0, 2, 3, 1>();
+					vfloat4 sep_b_vector = sep_b_lines[j].b * ics.swz<0, 1, 3, 2>();
+					vfloat4 sep_a_vector = sep_a_lines[j].b * ics.swz<0, 1, 2, 3>();
+
+					sep_r_vector = sep_r_vector * sep_r_vector;
+					sep_g_vector = sep_g_vector * sep_g_vector;
+					sep_b_vector = sep_b_vector * sep_b_vector;
+					sep_a_vector = sep_a_vector * sep_a_vector;
+
+					vfloat4 sep_err_inc(dot3_s(sep_r_vector, error_weights.swz<1, 2, 3, 0>()),
+										dot3_s(sep_g_vector, error_weights.swz<0, 2, 3, 1>()),
+										dot3_s(sep_b_vector, error_weights.swz<0, 1, 3, 2>()),
+										dot3_s(sep_a_vector, error_weights.swz<0, 1, 2, 3>()));
+
+					sep_error = sep_error + sep_err_inc + pm.range_sq * error_weights;
+				}
 			}
 
-			if (uncorr_error < uncorr_best_error)
+			if (uncor_error < uncor_best_error)
 			{
-				uncorr_best_error = uncorr_error;
-				uncorr_best_partition = partition;
+				uncor_best_error = uncor_error;
+				uncor_best_partition = partition;
 			}
 
-			if (samechroma_error < samechroma_best_errors[0])
+			if (samec_error < samec_best_errors[0])
 			{
-				samechroma_best_errors[1] = samechroma_best_errors[0];
-				samechroma_best_partitions[1] = samechroma_best_partitions[0];
+				samec_best_errors[1] = samec_best_errors[0];
+				samec_best_partitions[1] = samec_best_partitions[0];
 
-				samechroma_best_errors[0] = samechroma_error;
-				samechroma_best_partitions[0] = partition;
+				samec_best_errors[0] = samec_error;
+				samec_best_partitions[0] = partition;
 			}
-			else if (samechroma_error < samechroma_best_errors[1])
+			else if (samec_error < samec_best_errors[1])
 			{
-				samechroma_best_errors[1] = samechroma_error;
-				samechroma_best_partitions[1] = partition;
+				samec_best_errors[1] = samec_error;
+				samec_best_partitions[1] = partition;
 			}
 
-			if (separate_error.r < separate_best_error)
+			if (!skip_two_plane)
 			{
-				separate_best_error = separate_error.r;
-				separate_best_partition = partition;
-				separate_best_component = 0;
-			}
+				if (sep_error.lane<0>() < sep_best_error)
+				{
+					sep_best_error = sep_error.lane<0>();
+					sep_best_partition = partition;
+					sep_best_component = 0;
+				}
 
-			if (separate_error.g < separate_best_error)
-			{
-				separate_best_error = separate_error.g;
-				separate_best_partition = partition;
-				separate_best_component = 1;
-			}
+				if (sep_error.lane<1>() < sep_best_error)
+				{
+					sep_best_error = sep_error.lane<1>();
+					sep_best_partition = partition;
+					sep_best_component = 1;
+				}
 
-			if (separate_error.b < separate_best_error)
-			{
-				separate_best_error = separate_error.b;
-				separate_best_partition = partition;
-				separate_best_component = 2;
-			}
+				if (sep_error.lane<2>() < sep_best_error)
+				{
+					sep_best_error = sep_error.lane<2>();
+					sep_best_partition = partition;
+					sep_best_component = 2;
+				}
 
-			if (separate_error.a < separate_best_error)
-			{
-				separate_best_error = separate_error.a;
-				separate_best_partition = partition;
-				separate_best_component = 3;
+				if (sep_error.lane<3>() < sep_best_error)
+				{
+					sep_best_error = sep_error.lane<3>();
+					sep_best_partition = partition;
+					sep_best_component = 3;
+				}
 			}
 		}
 	}
@@ -546,144 +389,104 @@ void find_best_partitionings(
 			int bk_partition_count = ptab[partition].partition_count;
 			if (bk_partition_count < partition_count)
 			{
-				continue;
+				break;
 			}
 
-			// compute the weighting to give to each color channel
-			// in each partition.
-			float4 error_weightings[4];
-			float4 color_scalefactors[4];
-			float4 inverse_color_scalefactors[4];
-
-			compute_partition_error_color_weightings(bsd, ewb, ptab + partition, error_weightings, color_scalefactors);
-
-			for (int j = 0; j < partition_count; j++)
-			{
-				inverse_color_scalefactors[j].r = 1.0f / MAX(color_scalefactors[j].r, 1e-7f);
-				inverse_color_scalefactors[j].g = 1.0f / MAX(color_scalefactors[j].g, 1e-7f);
-				inverse_color_scalefactors[j].b = 1.0f / MAX(color_scalefactors[j].b, 1e-7f);
-				inverse_color_scalefactors[j].a = 1.0f / MAX(color_scalefactors[j].a, 1e-7f);
-			}
+			// Compute weighting to give to each channel in each partition
+			partition_metrics pms[4];
 
-			float3 averages[4];
-			float3 directions_rgb[4];
+			compute_partition_error_color_weightings_and_range(*blk, *ewb, *(ptab + partition), pms);
 
-			compute_averages_and_directions_rgb(ptab + partition, blk, ewb, color_scalefactors, averages, directions_rgb);
+			compute_avgs_and_dirs_3_comp(ptab + partition, blk, ewb, 3, pms);
 
-			line3 uncorr_lines[4];
-			line3 samechroma_lines[4];
-			line2 separate_red_lines[4];
-			line2 separate_green_lines[4];
-			line2 separate_blue_lines[4];
+			partition_lines3 plines[4];
 
-			processed_line3 proc_uncorr_lines[4];
-			processed_line3 proc_samechroma_lines[4];
+			line2 sep_r_lines[4];
+			line2 sep_g_lines[4];
+			line2 sep_b_lines[4];
 
-			processed_line2 proc_separate_red_lines[4];
-			processed_line2 proc_separate_green_lines[4];
-			processed_line2 proc_separate_blue_lines[4];
-
-			float uncorr_linelengths[4];
-			float samechroma_linelengths[4];
-			float3 separate_linelengths[4];
+			processed_line2 sep_r_plines[4];
+			processed_line2 sep_g_plines[4];
+			processed_line2 sep_b_plines[4];
 
 			for (int j = 0; j < partition_count; j++)
 			{
-				uncorr_lines[j].a = averages[j];
-				if (dot(directions_rgb[j], directions_rgb[j]) == 0.0f)
-				{
-					uncorr_lines[j].b = normalize(float3(1.0f));
-				}
-				else
-				{
-					uncorr_lines[j].b = normalize(directions_rgb[j]);
-				}
+				partition_metrics& pm = pms[j];
+				partition_lines3& pl = plines[j];
 
-				samechroma_lines[j].a = float3(0.0f);
-				if (dot(averages[j], averages[j]) == 0.0f)
-				{
-					samechroma_lines[j].b = normalize(float3(1.0f));
-				}
-				else
-				{
-					samechroma_lines[j].b = normalize(averages[j]);
-				}
+				pl.uncor_line.a = pm.avg;
+				pl.uncor_line.b = normalize_safe(pm.dir.swz<0, 1, 2>(), unit3());
 
-				proc_uncorr_lines[j].amod = (uncorr_lines[j].a - uncorr_lines[j].b * dot(uncorr_lines[j].a, uncorr_lines[j].b)) * float3(inverse_color_scalefactors[j].r, inverse_color_scalefactors[j].g, inverse_color_scalefactors[j].b);
-				proc_uncorr_lines[j].bs = (uncorr_lines[j].b * float3(color_scalefactors[j].r, color_scalefactors[j].g, color_scalefactors[j].b));
-				proc_uncorr_lines[j].bis = (uncorr_lines[j].b * float3(inverse_color_scalefactors[j].r, inverse_color_scalefactors[j].g, inverse_color_scalefactors[j].b));
+				pl.samec_line.a = vfloat4::zero();
+				pl.samec_line.b = normalize_safe(pm.avg.swz<0, 1, 2>(), unit3());
 
-				proc_samechroma_lines[j].amod = (samechroma_lines[j].a - samechroma_lines[j].b * dot(samechroma_lines[j].a, samechroma_lines[j].b)) * float3(inverse_color_scalefactors[j].r, inverse_color_scalefactors[j].g, inverse_color_scalefactors[j].b);
-				proc_samechroma_lines[j].bs = (samechroma_lines[j].b * float3(color_scalefactors[j].r, color_scalefactors[j].g, color_scalefactors[j].b));
-				proc_samechroma_lines[j].bis = (samechroma_lines[j].b * float3(inverse_color_scalefactors[j].r, inverse_color_scalefactors[j].g, inverse_color_scalefactors[j].b));
+				pl.uncor_pline.amod = (pl.uncor_line.a - pl.uncor_line.b * dot3(pl.uncor_line.a, pl.uncor_line.b)) * pm.icolor_scale.swz<0, 1, 2, 3>();
+				pl.uncor_pline.bs   = (pl.uncor_line.b * pm.color_scale.swz<0, 1, 2, 3>());
+				pl.uncor_pline.bis  = (pl.uncor_line.b * pm.icolor_scale.swz<0, 1, 2, 3>());
 
-				separate_red_lines[j].a = float2(averages[j].g, averages[j].b);
-				float2 dirs_gb = float2(directions_rgb[j].g, directions_rgb[j].b);
-				if (dot(dirs_gb, dirs_gb) == 0.0f)
-				{
-					separate_red_lines[j].b = normalize(float2(1.0f));
-				}
-				else
-				{
-					separate_red_lines[j].b = normalize(dirs_gb);
-				}
-
-				separate_green_lines[j].a = float2(averages[j].r, averages[j].b);
-				float2 dirs_rb = float2(directions_rgb[j].r, directions_rgb[j].b);
-				if (dot(dirs_rb, dirs_rb) == 0.0f)
-				{
-					separate_green_lines[j].b = normalize(float2(1.0f));
-				}
-				else
-				{
-					separate_green_lines[j].b = normalize(dirs_rb);
-				}
+				pl.samec_pline.amod = vfloat4::zero();
+				pl.samec_pline.bs   = (pl.samec_line.b * pm.color_scale.swz<0, 1, 2, 3>());
+				pl.samec_pline.bis  = (pl.samec_line.b * pm.icolor_scale.swz<0, 1, 2, 3>());
 
-				separate_blue_lines[j].a = float2(averages[j].r, averages[j].g);
-				float2 dirs_rg = float2(directions_rgb[j].r, directions_rgb[j].g);
-				if (dot(dirs_rg, dirs_rg) == 0.0f)
+				if (!skip_two_plane)
 				{
-					separate_blue_lines[j].b = normalize(float2(1.0f));
+					sep_r_lines[j].a = pm.avg.swz<1, 2>();
+					float2 dirs_gb = pm.dir.swz<1, 2>();
+					if (dot(dirs_gb, dirs_gb) == 0.0f)
+					{
+						sep_r_lines[j].b = normalize(float2(1.0f));
+					}
+					else
+					{
+						sep_r_lines[j].b = normalize(dirs_gb);
+					}
+
+					sep_g_lines[j].a = pm.avg.swz<0, 2>();
+					float2 dirs_rb = pm.dir.swz<0, 2>();
+					if (dot(dirs_rb, dirs_rb) == 0.0f)
+					{
+						sep_g_lines[j].b = normalize(float2(1.0f));
+					}
+					else
+					{
+						sep_g_lines[j].b = normalize(dirs_rb);
+					}
+
+					sep_b_lines[j].a = pm.avg.swz<0, 1>();
+					float2 dirs_rg = pm.dir.swz<0, 1>();
+					if (dot(dirs_rg, dirs_rg) == 0.0f)
+					{
+						sep_b_lines[j].b = normalize(float2(1.0f));
+					}
+					else
+					{
+						sep_b_lines[j].b = normalize(dirs_rg);
+					}
+
+					sep_r_plines[j].amod = (sep_r_lines[j].a - sep_r_lines[j].b * dot(sep_r_lines[j].a, sep_r_lines[j].b)) * pm.icolor_scale.swz<1, 2>();
+					sep_r_plines[j].bs   = (sep_r_lines[j].b * pm.color_scale.swz<1, 2>());
+					sep_r_plines[j].bis  = (sep_r_lines[j].b * pm.icolor_scale.swz<1, 2>());
+
+					sep_g_plines[j].amod = (sep_g_lines[j].a - sep_g_lines[j].b * dot(sep_g_lines[j].a, sep_g_lines[j].b)) * pm.icolor_scale.swz<0, 2>();
+					sep_g_plines[j].bs   = (sep_g_lines[j].b * pm.color_scale.swz<0, 2>());
+					sep_g_plines[j].bis  = (sep_g_lines[j].b * pm.icolor_scale.swz<0, 2>());
+
+					sep_b_plines[j].amod = (sep_b_lines[j].a - sep_b_lines[j].b * dot(sep_b_lines[j].a, sep_b_lines[j].b)) * pm.icolor_scale.swz<0, 1>();
+					sep_b_plines[j].bs   = (sep_b_lines[j].b * pm.color_scale.swz<0, 1>());
+					sep_b_plines[j].bis  = (sep_b_lines[j].b * pm.icolor_scale.swz<0, 1>());
 				}
-				else
-				{
-					separate_blue_lines[j].b = normalize(dirs_rg);
-				}
-
-				proc_separate_red_lines[j].amod = (separate_red_lines[j].a - separate_red_lines[j].b * dot(separate_red_lines[j].a, separate_red_lines[j].b)) * float2(inverse_color_scalefactors[j].g, inverse_color_scalefactors[j].b);
-				proc_separate_red_lines[j].bs = (separate_red_lines[j].b * float2(color_scalefactors[j].g, color_scalefactors[j].b));
-				proc_separate_red_lines[j].bis = (separate_red_lines[j].b * float2(inverse_color_scalefactors[j].g, inverse_color_scalefactors[j].b));
-
-				proc_separate_green_lines[j].amod = (separate_green_lines[j].a - separate_green_lines[j].b * dot(separate_green_lines[j].a, separate_green_lines[j].b)) * float2(inverse_color_scalefactors[j].r, inverse_color_scalefactors[j].b);
-				proc_separate_green_lines[j].bs = (separate_green_lines[j].b * float2(color_scalefactors[j].r, color_scalefactors[j].b));
-				proc_separate_green_lines[j].bis = (separate_green_lines[j].b * float2(inverse_color_scalefactors[j].r, inverse_color_scalefactors[j].b));
-
-				proc_separate_blue_lines[j].amod = (separate_blue_lines[j].a - separate_blue_lines[j].b * dot(separate_blue_lines[j].a, separate_blue_lines[j].b)) * float2(inverse_color_scalefactors[j].r, inverse_color_scalefactors[j].g);
-				proc_separate_blue_lines[j].bs = (separate_blue_lines[j].b * float2(color_scalefactors[j].r, color_scalefactors[j].g));
-				proc_separate_blue_lines[j].bis = (separate_blue_lines[j].b * float2(inverse_color_scalefactors[j].r, inverse_color_scalefactors[j].g));
 			}
 
-			float uncorr_error = 0.0f;
-			float samechroma_error = 0.0f;
-			float3 separate_error = float3(0.0f);
+			float uncor_error = 0.0f;
+			float samec_error = 0.0f;
+			vfloat4 sep_error = vfloat4(0.0f);
 
 			compute_error_squared_rgb(ptab + partition,
 			                          blk,
 			                          ewb,
-			                          proc_uncorr_lines,
-			                          proc_samechroma_lines,
-			                          proc_separate_red_lines,
-			                          proc_separate_green_lines,
-			                          proc_separate_blue_lines,
-			                          uncorr_linelengths,
-			                          samechroma_linelengths,
-			                          separate_linelengths,
-			                          &uncorr_error,
-			                          &samechroma_error,
-			                          &separate_error);
-
-			float3 rgb_range[4];
-			compute_rgb_range(bsd->texel_count, ptab + partition, blk, ewb, rgb_range);
+			                          plines,
+			                          uncor_error,
+			                          samec_error);
 
 			/*
 			   compute an estimate of error introduced by weight imprecision.
@@ -699,85 +502,102 @@ void find_best_partitionings(
 
 			for (int j = 0; j < partition_count; j++)
 			{
-				float tpp = (float)(ptab[partition].texels_per_partition[j]);
+				partition_metrics& pm = pms[j];
+				partition_lines3& pl = plines[j];
+
+				float tpp = (float)(ptab[partition].partition_texel_count[j]);
+
+				vfloat4 ics = pm.icolor_scale;
+				ics.set_lane<3>(0.0f);
+
+				vfloat4 error_weights = pm.error_weight * (tpp * weight_imprecision_estim);
+				error_weights.set_lane<3>(0.0f);
 
-				float3 ics = float3(inverse_color_scalefactors[j].r, inverse_color_scalefactors[j].g, inverse_color_scalefactors[j].b);
-				float3 error_weights = float3(error_weightings[j].r, error_weightings[j].g, error_weightings[j].b) * (tpp * weight_imprecision_estim_squared);
+				vfloat4 uncor_vector = (pl.uncor_line.b * pl.uncor_line_len) * ics;
+				vfloat4 samec_vector = (pl.samec_line.b * pl.samec_line_len) * ics;
 
-				float3 uncorr_vector = (uncorr_lines[j].b * uncorr_linelengths[j]) * ics;
-				float3 samechroma_vector = (samechroma_lines[j].b * samechroma_linelengths[j]) * ics;
+				uncor_vector = uncor_vector * uncor_vector;
+				samec_vector = samec_vector * samec_vector;
 
-				float2 separate_red_vector = (separate_red_lines[j].b * separate_linelengths[j].r) * float2(ics.g, ics.b);
-				float2 separate_green_vector = (separate_green_lines[j].b * separate_linelengths[j].g) * float2(ics.r, ics.b);
-				float2 separate_blue_vector = (separate_blue_lines[j].b * separate_linelengths[j].b) * float2(ics.r, ics.g);
+				uncor_error += dot3_s(uncor_vector, error_weights);
+				samec_error += dot3_s(samec_vector, error_weights);
 
-				uncorr_vector = uncorr_vector * uncorr_vector;
-				samechroma_vector = samechroma_vector * samechroma_vector;
-				separate_red_vector = separate_red_vector * separate_red_vector;
-				separate_green_vector = separate_green_vector * separate_green_vector;
-				separate_blue_vector = separate_blue_vector * separate_blue_vector;
+				if (!skip_two_plane)
+				{
+					float2 sep_r_vector = sep_r_lines[j].b * ics.swz<1, 2>();
+					float2 sep_g_vector = sep_g_lines[j].b * ics.swz<0, 2>();
+					float2 sep_b_vector = sep_b_lines[j].b * ics.swz<0, 1>();
+
+					sep_r_vector = sep_r_vector * sep_r_vector;
+					sep_g_vector = sep_g_vector * sep_g_vector;
+					sep_b_vector = sep_b_vector * sep_b_vector;
 
-				uncorr_error += dot(uncorr_vector, error_weights);
-				samechroma_error += dot(samechroma_vector, error_weights);
-				separate_error.r += dot(separate_red_vector, float2(error_weights.g, error_weights.b));
-				separate_error.g += dot(separate_green_vector, float2(error_weights.r, error_weights.b));
-				separate_error.b += dot(separate_blue_vector, float2(error_weights.r, error_weights.r));
+					sep_error.set_lane<0>(sep_error.lane<0>() + dot(sep_r_vector, error_weights.swz<1, 2>()));
+					sep_error.set_lane<1>(sep_error.lane<1>() + dot(sep_g_vector, error_weights.swz<0, 2>()));
+					sep_error.set_lane<2>(sep_error.lane<2>() + dot(sep_b_vector, error_weights.swz<0, 1>()));
 
-				separate_error.r += rgb_range[j].r * rgb_range[j].r * error_weights.r;
-				separate_error.g += rgb_range[j].g * rgb_range[j].g * error_weights.g;
-				separate_error.b += rgb_range[j].b * rgb_range[j].b * error_weights.b;
+					sep_error.set_lane<0>(sep_error.lane<0>() + pm.range_sq.lane<0>() * error_weights.lane<0>());
+					sep_error.set_lane<1>(sep_error.lane<1>() + pm.range_sq.lane<1>() * error_weights.lane<1>());
+					sep_error.set_lane<2>(sep_error.lane<2>() + pm.range_sq.lane<2>() * error_weights.lane<2>());
+				}
 			}
 
-			if (uncorr_error < uncorr_best_error)
+			if (uncor_error < uncor_best_error)
 			{
-				uncorr_best_error = uncorr_error;
-				uncorr_best_partition = partition;
+				uncor_best_error = uncor_error;
+				uncor_best_partition = partition;
 			}
 
-			if (samechroma_error < samechroma_best_errors[0])
+			if (samec_error < samec_best_errors[0])
 			{
-				samechroma_best_errors[1] = samechroma_best_errors[0];
-				samechroma_best_partitions[1] = samechroma_best_partitions[0];
+				samec_best_errors[1] = samec_best_errors[0];
+				samec_best_partitions[1] = samec_best_partitions[0];
 
-				samechroma_best_errors[0] = samechroma_error;
-				samechroma_best_partitions[0] = partition;
+				samec_best_errors[0] = samec_error;
+				samec_best_partitions[0] = partition;
 			}
-			else if (samechroma_error < samechroma_best_errors[1])
+			else if (samec_error < samec_best_errors[1])
 			{
-				samechroma_best_errors[1] = samechroma_error;
-				samechroma_best_partitions[1] = partition;
+				samec_best_errors[1] = samec_error;
+				samec_best_partitions[1] = partition;
 			}
 
-			if (separate_error.r < separate_best_error)
+			if (!skip_two_plane)
 			{
-				separate_best_error = separate_error.r;
-				separate_best_partition = partition;
-				separate_best_component = 0;
-			}
+				if (sep_error.lane<0>() < sep_best_error)
+				{
+					sep_best_error = sep_error.lane<0>();
+					sep_best_partition = partition;
+					sep_best_component = 0;
+				}
 
-			if (separate_error.g < separate_best_error)
-			{
-				separate_best_error = separate_error.g;
-				separate_best_partition = partition;
-				separate_best_component = 1;
-			}
+				if (sep_error.lane<1>() < sep_best_error)
+				{
+					sep_best_error = sep_error.lane<1>();
+					sep_best_partition = partition;
+					sep_best_component = 1;
+				}
 
-			if (separate_error.b < separate_best_error)
-			{
-				separate_best_error = separate_error.b;
-				separate_best_partition = partition;
-				separate_best_component = 2;
+				if (sep_error.lane<2>() < sep_best_error)
+				{
+					sep_best_error = sep_error.lane<2>();
+					sep_best_partition = partition;
+					sep_best_component = 2;
+				}
 			}
 		}
 	}
 
-	*best_partition_uncorrelated = uncorr_best_partition;
+	*best_partition_uncor = uncor_best_partition;
 
-	int index { samechroma_best_partitions[0] != uncorr_best_partition ? 0 : 1 };
-	*best_partition_samechroma = samechroma_best_partitions[index];
+	int index = samec_best_partitions[0] != uncor_best_partition ? 0 : 1;
+	*best_partition_samec = samec_best_partitions[index];
 
-	*best_partition_dualplane = (separate_best_component << PARTITION_BITS) |
-	                            (separate_best_partition);
+	if (best_partition_dualplane)
+	{
+		*best_partition_dualplane = (sep_best_component << PARTITION_BITS) |
+		                            (sep_best_partition);
+	}
 }
 
 #endif
diff --git a/libkram/astc-encoder/astcenc_ideal_endpoints_and_weights.cpp b/libkram/astc-encoder/astcenc_ideal_endpoints_and_weights.cpp
index 9390c518..6e1906cf 100644
--- a/libkram/astc-encoder/astcenc_ideal_endpoints_and_weights.cpp
+++ b/libkram/astc-encoder/astcenc_ideal_endpoints_and_weights.cpp
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2020 Arm Limited
+// Copyright 2011-2021 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -34,7 +34,7 @@
 	#include <fenv.h>
 #endif
 
-static void compute_endpoints_and_ideal_weights_1_component(
+static void compute_endpoints_and_ideal_weights_1_comp(
 	const block_size_descriptor* bsd,
 	const partition_info* pt,
 	const imageblock* blk,
@@ -44,16 +44,21 @@ static void compute_endpoints_and_ideal_weights_1_component(
 ) {
 	int partition_count = pt->partition_count;
 	ei->ep.partition_count = partition_count;
+	promise(partition_count > 0);
+
+	int texel_count = bsd->texel_count;
+	promise(texel_count > 0);
+
+	float lowvalues[4] { 1e10f, 1e10f, 1e10f, 1e10f };
+	float highvalues[4] { -1e10f, -1e10f, -1e10f, -1e10f };
 
-	float lowvalues[4], highvalues[4];
 	float partition_error_scale[4];
 	float linelengths_rcp[4];
 
-	int texels_per_block = bsd->texel_count;
-
-	const float *error_weights;
+	const float *error_weights = nullptr;
 	const float* data_vr = nullptr;
-	assert(component <= 3);
+
+	assert(component < 4);
 	switch (component)
 	{
 	case 0:
@@ -68,34 +73,21 @@ static void compute_endpoints_and_ideal_weights_1_component(
 		error_weights = ewb->texel_weight_b;
 		data_vr = blk->data_b;
 		break;
-	case 3:
+	default:
 		error_weights = ewb->texel_weight_a;
 		data_vr = blk->data_a;
 		break;
 	}
 
-	for (int i = 0; i < partition_count; i++)
-	{
-		lowvalues[i] = 1e10f;
-		highvalues[i] = -1e10f;
-	}
-
-	for (int i = 0; i < texels_per_block; i++)
+	for (int i = 0; i < texel_count; i++)
 	{
 		if (error_weights[i] > 1e-10f)
 		{
 			float value = data_vr[i];
 			int partition = pt->partition_of_texel[i];
 
-			if (value < lowvalues[partition])
-			{
-				lowvalues[partition] = value;
-			}
-
-			if (value > highvalues[partition])
-			{
-				highvalues[partition] = value;
-			}
+			lowvalues[partition] = astc::min(value, lowvalues[partition]);
+			highvalues[partition] = astc::max(value, highvalues[partition]);
 		}
 	}
 
@@ -109,63 +101,34 @@ static void compute_endpoints_and_ideal_weights_1_component(
 			highvalues[i] = 0.0f;
 		}
 
-		if (diff < 1e-7f)
-		{
-			diff = 1e-7f;
-		}
+		diff = astc::max(diff, 1e-7f);
 
 		partition_error_scale[i] = diff * diff;
 		linelengths_rcp[i] = 1.0f / diff;
 	}
 
-	for (int i = 0; i < texels_per_block; i++)
+	for (int i = 0; i < texel_count; i++)
 	{
 		float value = data_vr[i];
 		int partition = pt->partition_of_texel[i];
 		value -= lowvalues[partition];
 		value *= linelengths_rcp[partition];
-
-		if (value > 1.0f)
-		{
-			value = 1.0f;
-		}
-		else if (!(value > 0.0f))
-		{
-			value = 0.0f;
-		}
+		value = astc::clamp1f(value);
 
 		ei->weights[i] = value;
 		ei->weight_error_scale[i] = partition_error_scale[partition] * error_weights[i];
 		assert(!astc::isnan(ei->weight_error_scale[i]));
 	}
 
+	vmask4 sep_mask = vint4::lane_id() == vint4(component);
 	for (int i = 0; i < partition_count; i++)
 	{
-		ei->ep.endpt0[i] = float4(blk->red_min, blk->green_min, blk->blue_min, blk->alpha_min);
-		ei->ep.endpt1[i] = float4(blk->red_max, blk->green_max, blk->blue_max, blk->alpha_max);
-		switch (component)
-		{
-		case 0:				// red/x
-			ei->ep.endpt0[i].r = lowvalues[i];
-			ei->ep.endpt1[i].r = highvalues[i];
-			break;
-		case 1:				// green/y
-			ei->ep.endpt0[i].g = lowvalues[i];
-			ei->ep.endpt1[i].g = highvalues[i];
-			break;
-		case 2:				// blue/z
-			ei->ep.endpt0[i].b = lowvalues[i];
-			ei->ep.endpt1[i].b = highvalues[i];
-			break;
-		case 3:				// alpha/w
-			ei->ep.endpt0[i].a = lowvalues[i];
-			ei->ep.endpt1[i].a = highvalues[i];
-			break;
-		}
+		ei->ep.endpt0[i] = select(blk->data_min, vfloat4(lowvalues[i]), sep_mask);
+		ei->ep.endpt1[i] = select(blk->data_max, vfloat4(highvalues[i]), sep_mask);
 	}
 }
 
-static void compute_endpoints_and_ideal_weights_2_components(
+static void compute_endpoints_and_ideal_weights_2_comp(
 	const block_size_descriptor* bsd,
 	const partition_info* pt,
 	const imageblock* blk,
@@ -176,9 +139,12 @@ static void compute_endpoints_and_ideal_weights_2_components(
 ) {
 	int partition_count = pt->partition_count;
 	ei->ep.partition_count = partition_count;
+	promise(partition_count > 0);
+
+	int texel_count = bsd->texel_count;
+	promise(texel_count > 0);
 
-	float4 error_weightings[4];
-	float4 color_scalefactors[4];
+	partition_metrics pms[4];
 
 	float2 scalefactors[4];
 
@@ -204,48 +170,49 @@ static void compute_endpoints_and_ideal_weights_2_components(
 		data_vg = blk->data_b;
 	}
 
-	int texels_per_block = bsd->texel_count;
-
-	compute_partition_error_color_weightings(bsd, ewb, pt, error_weightings, color_scalefactors);
+	compute_partition_error_color_weightings(*ewb, *pt, pms);
 
 	for (int i = 0; i < partition_count; i++)
 	{
 		float s1 = 0, s2 = 0;
+		assert(component1 < 4);
 		switch (component1)
 		{
 		case 0:
-			s1 = color_scalefactors[i].r;
+			s1 = pms[i].color_scale.lane<0>();
 			break;
 		case 1:
-			s1 = color_scalefactors[i].g;
+			s1 = pms[i].color_scale.lane<1>();
 			break;
 		case 2:
-			s1 = color_scalefactors[i].b;
+			s1 = pms[i].color_scale.lane<2>();
 			break;
-		case 3:
-			s1 = color_scalefactors[i].a;
+		default:
+			s1 = pms[i].color_scale.lane<3>();
 			break;
 		}
 
+		assert(component2 < 4);
 		switch (component2)
 		{
 		case 0:
-			s2 = color_scalefactors[i].r;
+			s2 = pms[i].color_scale.lane<0>();
 			break;
 		case 1:
-			s2 = color_scalefactors[i].g;
+			s2 = pms[i].color_scale.lane<1>();
 			break;
 		case 2:
-			s2 = color_scalefactors[i].b;
+			s2 = pms[i].color_scale.lane<2>();
 			break;
-		case 3:
-			s2 = color_scalefactors[i].a;
+		default:
+			s2 = pms[i].color_scale.lane<3>();
 			break;
 		}
 		scalefactors[i] = normalize(float2(s1, s2)) * 1.41421356f;
 	}
 
-	float lowparam[4], highparam[4];
+	float lowparam[4] { 1e10f, 1e10f, 1e10f, 1e10f };
+	float highparam[4] { -1e10f, -1e10f, -1e10f, -1e10f };
 
 	float2 averages[4];
 	float2 directions[4];
@@ -254,35 +221,28 @@ static void compute_endpoints_and_ideal_weights_2_components(
 	float scale[4];
 	float length_squared[4];
 
-	for (int i = 0; i < partition_count; i++)
-	{
-		lowparam[i] = 1e10;
-		highparam[i] = -1e10;
-	}
-
-	compute_averages_and_directions_2_components(pt, blk, ewb, scalefactors, component1, component2, averages, directions);
+	compute_avgs_and_dirs_2_comp(pt, blk, ewb, scalefactors, component1, component2, averages, directions);
 
 	for (int i = 0; i < partition_count; i++)
 	{
-		float2 egv = directions[i];
-		if (egv.r + egv.g < 0.0f)
-			directions[i] = float2(0.0f) - egv;
-	}
+		float2 dir = directions[i];
+		if (dir.r + dir.g < 0.0f)
+		{
+			dir = float2(0.0f) - dir;
+		}
 
-	for (int i = 0; i < partition_count; i++)
-	{
 		lines[i].a = averages[i];
-		if (dot(directions[i], directions[i]) == 0.0f)
+		if (dot(dir, dir) == 0.0f)
 		{
 			lines[i].b = normalize(float2(1.0f));
 		}
 		else
 		{
-			lines[i].b = normalize(directions[i]);
+			lines[i].b = normalize(dir);
 		}
 	}
 
-	for (int i = 0; i < texels_per_block; i++)
+	for (int i = 0; i < texel_count; i++)
 	{
 		if (error_weights[i] > 1e-10f)
 		{
@@ -292,15 +252,8 @@ static void compute_endpoints_and_ideal_weights_2_components(
 			float param = dot(point - l.a, l.b);
 			ei->weights[i] = param;
 
-			if (param < lowparam[partition])
-			{
-				lowparam[partition] = param;
-			}
-
-			if (param > highparam[partition])
-			{
-				highparam[partition] = param;
-			}
+			lowparam[partition] = astc::min(param, lowparam[partition]);
+			highparam[partition] = astc::max(param, highparam[partition]);
 		}
 		else
 		{
@@ -323,11 +276,7 @@ static void compute_endpoints_and_ideal_weights_2_components(
 		// it is possible for a uniform-color partition to produce length=0; this
 		// causes NaN-production and NaN-propagation later on. Set length to
 		// a small value to avoid this problem.
-		if (length < 1e-7f)
-		{
-			length = 1e-7f;
-		}
-
+		length = astc::max(length, 1e-7f);
 		length_squared[i] = length * length;
 		scale[i] = 1.0f / length;
 
@@ -344,67 +293,22 @@ static void compute_endpoints_and_ideal_weights_2_components(
 		highvalues[i] = ep1;
 	}
 
+	vmask4 comp1_mask = vint4::lane_id() == vint4(component1);
+	vmask4 comp2_mask = vint4::lane_id() == vint4(component2);
 	for (int i = 0; i < partition_count; i++)
 	{
-		ei->ep.endpt0[i] = float4(blk->red_min, blk->green_min, blk->blue_min, blk->alpha_min);
-		ei->ep.endpt1[i] = float4(blk->red_max, blk->green_max, blk->blue_max, blk->alpha_max);
-
-		float2 ep0 = lowvalues[i];
-		float2 ep1 = highvalues[i];
-
-		switch (component1)
-		{
-		case 0:
-			ei->ep.endpt0[i].r = ep0.r;
-			ei->ep.endpt1[i].r = ep1.r;
-			break;
-		case 1:
-			ei->ep.endpt0[i].g = ep0.r;
-			ei->ep.endpt1[i].g = ep1.r;
-			break;
-		case 2:
-			ei->ep.endpt0[i].b = ep0.r;
-			ei->ep.endpt1[i].b = ep1.r;
-			break;
-		case 3:
-			ei->ep.endpt0[i].a = ep0.r;
-			ei->ep.endpt1[i].a = ep1.r;
-			break;
-		}
+		vfloat4 ep0 = select(blk->data_min, vfloat4(lowvalues[i].r), comp1_mask);
+		vfloat4 ep1 = select(blk->data_max, vfloat4(highvalues[i].r), comp1_mask);
 
-		switch (component2)
-		{
-		case 0:
-			ei->ep.endpt0[i].r = ep0.g;
-			ei->ep.endpt1[i].r = ep1.g;
-			break;
-		case 1:
-			ei->ep.endpt0[i].g = ep0.g;
-			ei->ep.endpt1[i].g = ep1.g;
-			break;
-		case 2:
-			ei->ep.endpt0[i].b = ep0.g;
-			ei->ep.endpt1[i].b = ep1.g;
-			break;
-		case 3:
-			ei->ep.endpt0[i].a = ep0.g;
-			ei->ep.endpt1[i].a = ep1.g;
-			break;
-		}
+		ei->ep.endpt0[i] = select(ep0, vfloat4(lowvalues[i].g), comp2_mask);
+		ei->ep.endpt1[i] = select(ep1, vfloat4(highvalues[i].g), comp2_mask);
 	}
 
-	for (int i = 0; i < texels_per_block; i++)
+	for (int i = 0; i < texel_count; i++)
 	{
 		int partition = pt->partition_of_texel[i];
 		float idx = (ei->weights[i] - lowparam[partition]) * scale[partition];
-		if (idx > 1.0f)
-		{
-			idx = 1.0f;
-		}
-		else if (!(idx > 0.0f))
-		{
-			idx = 0.0f;
-		}
+		idx = astc::clamp1f(idx);
 
 		ei->weights[i] = idx;
 		ei->weight_error_scale[i] = length_squared[partition] * error_weights[i];
@@ -412,43 +316,42 @@ static void compute_endpoints_and_ideal_weights_2_components(
 	}
 }
 
-static void compute_endpoints_and_ideal_weights_3_components(
+static void compute_endpoints_and_ideal_weights_3_comp(
 	const block_size_descriptor* bsd,
 	const partition_info* pt,
 	const imageblock* blk,
 	const error_weight_block* ewb,
 	endpoints_and_weights* ei,
-	int omittedComponent
+	int omitted_component
 ) {
 	int partition_count = pt->partition_count;
 	ei->ep.partition_count = partition_count;
+	promise(partition_count > 0);
 
-	float4 error_weightings[4];
-	float4 color_scalefactors[4];
-
-	float3 scalefactors[4];
+	int texel_count= bsd->texel_count;
+	promise(texel_count > 0);
 
-	int texels_per_block = bsd->texel_count;
+	partition_metrics pms[4];
 
 	const float *error_weights;
 	const float* data_vr = nullptr;
 	const float* data_vg = nullptr;
 	const float* data_vb = nullptr;
-	if (omittedComponent == 0)
+	if (omitted_component == 0)
 	{
 		error_weights = ewb->texel_weight_gba;
 		data_vr = blk->data_g;
 		data_vg = blk->data_b;
 		data_vb = blk->data_a;
 	}
-	else if (omittedComponent == 1)
+	else if (omitted_component == 1)
 	{
 		error_weights = ewb->texel_weight_rba;
 		data_vr = blk->data_r;
 		data_vg = blk->data_b;
 		data_vb = blk->data_a;
 	}
-	else if (omittedComponent == 2)
+	else if (omitted_component == 2)
 	{
 		error_weights = ewb->texel_weight_rga;
 		data_vr = blk->data_r;
@@ -463,96 +366,79 @@ static void compute_endpoints_and_ideal_weights_3_components(
 		data_vb = blk->data_b;
 	}
 
-	compute_partition_error_color_weightings(bsd, ewb, pt, error_weightings, color_scalefactors);
+	compute_partition_error_color_weightings(*ewb, *pt, pms);
 
 	for (int i = 0; i < partition_count; i++)
 	{
 		float s1 = 0, s2 = 0, s3 = 0;
-		switch (omittedComponent)
+		assert(omitted_component < 4);
+		switch (omitted_component)
 		{
 		case 0:
-			s1 = color_scalefactors[i].g;
-			s2 = color_scalefactors[i].b;
-			s3 = color_scalefactors[i].a;
+			s1 = pms[i].color_scale.lane<1>();
+			s2 = pms[i].color_scale.lane<2>();
+			s3 = pms[i].color_scale.lane<3>();
 			break;
 		case 1:
-			s1 = color_scalefactors[i].r;
-			s2 = color_scalefactors[i].b;
-			s3 = color_scalefactors[i].a;
+			s1 = pms[i].color_scale.lane<0>();
+			s2 = pms[i].color_scale.lane<2>();
+			s3 = pms[i].color_scale.lane<3>();
 			break;
 		case 2:
-			s1 = color_scalefactors[i].r;
-			s2 = color_scalefactors[i].g;
-			s3 = color_scalefactors[i].a;
+			s1 = pms[i].color_scale.lane<0>();
+			s2 = pms[i].color_scale.lane<1>();
+			s3 = pms[i].color_scale.lane<3>();
 			break;
-		case 3:
-			s1 = color_scalefactors[i].r;
-			s2 = color_scalefactors[i].g;
-			s3 = color_scalefactors[i].b;
+		default:
+			s1 = pms[i].color_scale.lane<0>();
+			s2 = pms[i].color_scale.lane<1>();
+			s3 = pms[i].color_scale.lane<2>();
 			break;
 		}
 
-		scalefactors[i] = normalize(float3(s1, s2, s3)) * 1.73205080f;
+		pms[i].color_scale = normalize(vfloat4(s1, s2, s3, 0.0f)) * 1.73205080f;
 	}
 
-	float lowparam[4], highparam[4];
-
-	float3 averages[4];
-	float3 directions[4];
+	float lowparam[4] { 1e10f, 1e10f, 1e10f, 1e10f };
+	float highparam[4] { -1e10f, -1e10f, -1e10f, -1e10f };
 
 	line3 lines[4];
 	float scale[4];
 	float length_squared[4];
 
-	for (int i = 0; i < partition_count; i++)
-	{
-		lowparam[i] = 1e10f;
-		highparam[i] = -1e10f;
-	}
-
-	compute_averages_and_directions_3_components(pt, blk, ewb, scalefactors, omittedComponent, averages, directions);
+	compute_avgs_and_dirs_3_comp(pt, blk, ewb, omitted_component, pms);
 
 	for (int i = 0; i < partition_count; i++)
 	{
-		float3 direc = directions[i];
-		if (direc.r + direc.g + direc.b < 0.0f)
+		vfloat4 dir = pms[i].dir;
+		if (hadd_rgb_s(dir) < 0.0f)
 		{
-			directions[i] = float3(0.0f) - direc;
+			dir = vfloat4(0.0f) - dir;
 		}
-	}
 
-	for (int i = 0; i < partition_count; i++)
-	{
-		lines[i].a = averages[i];
-		if (dot(directions[i], directions[i]) == 0.0f)
+		lines[i].a = pms[i].avg;
+		if (dot3_s(dir, dir) == 0.0f)
 		{
-			lines[i].b = normalize(float3(1.0f));
+			lines[i].b = normalize(vfloat4(1.0f, 1.0f, 1.0f, 0.0f));
 		}
 		else
 		{
-			lines[i].b = normalize(directions[i]);
+			lines[i].b = normalize(dir);
 		}
 	}
 
-	for (int i = 0; i < texels_per_block; i++)
+	for (int i = 0; i < texel_count; i++)
 	{
 		if (error_weights[i] > 1e-10f)
 		{
 			int partition = pt->partition_of_texel[i];
-			float3 point = float3(data_vr[i], data_vg[i], data_vb[i]) * scalefactors[partition];
+			vfloat4 point = vfloat4(data_vr[i], data_vg[i], data_vb[i], 0.0f) * pms[partition].color_scale;
 			line3 l = lines[partition];
-			float param = dot(point - l.a, l.b);
+			float param = dot3_s(point - l.a, l.b);
 			ei->weights[i] = param;
 
-			if (param < lowparam[partition])
-			{
-				lowparam[partition] = param;
-			}
-
-			if (param > highparam[partition])
-			{
-				highparam[partition] = param;
-			}
+			lowparam[partition] = astc::min(param, lowparam[partition]);
+			highparam[partition] = astc::max(param, highparam[partition]);
 		}
 		else
 		{
@@ -560,9 +446,6 @@ static void compute_endpoints_and_ideal_weights_3_components(
 		}
 	}
 
-	float3 lowvalues[4];
-	float3 highvalues[4];
-
 	for (int i = 0; i < partition_count; i++)
 	{
 		float length = highparam[i] - lowparam[i];
@@ -575,90 +458,48 @@ static void compute_endpoints_and_ideal_weights_3_components(
 		// it is possible for a uniform-color partition to produce length=0; this
 		// causes NaN-production and NaN-propagation later on. Set length to
 		// a small value to avoid this problem.
-		if (length < 1e-7f)
-		{
-			length = 1e-7f;
-		}
+		length = astc::max(length, 1e-7f);
 
 		length_squared[i] = length * length;
 		scale[i] = 1.0f / length;
 
-		float3 ep0 = lines[i].a + lines[i].b * lowparam[i];
-		float3 ep1 = lines[i].a + lines[i].b * highparam[i];
-
-		ep0.r /= scalefactors[i].r;
-		ep0.g /= scalefactors[i].g;
-		ep0.b /= scalefactors[i].b;
-
-		ep1.r /= scalefactors[i].r;
-		ep1.g /= scalefactors[i].g;
-		ep1.b /= scalefactors[i].b;
-
-		lowvalues[i] = ep0;
-		highvalues[i] = ep1;
-	}
+		vfloat4 ep0 = lines[i].a + lines[i].b * lowparam[i];
+		vfloat4 ep1 = lines[i].a + lines[i].b * highparam[i];
 
-	for (int i = 0; i < partition_count; i++)
-	{
-		ei->ep.endpt0[i] = float4(blk->red_min, blk->green_min, blk->blue_min, blk->alpha_min);
-		ei->ep.endpt1[i] = float4(blk->red_max, blk->green_max, blk->blue_max, blk->alpha_max);
+		ep0 = ep0 / pms[i].color_scale;
+		ep1 = ep1 / pms[i].color_scale;
 
-		float3 ep0 = lowvalues[i];
-		float3 ep1 = highvalues[i];
+		vfloat4 bmin = blk->data_min;
+		vfloat4 bmax = blk->data_max;
 
-		switch (omittedComponent)
+		// TODO: Probably a programmatic vector permute we can do here ...
+		assert(omitted_component < 4);
+		switch (omitted_component)
 		{
 			case 0:
-				ei->ep.endpt0[i].g = ep0.r;
-				ei->ep.endpt0[i].b = ep0.g;
-				ei->ep.endpt0[i].a = ep0.b;
-
-				ei->ep.endpt1[i].g = ep1.r;
-				ei->ep.endpt1[i].b = ep1.g;
-				ei->ep.endpt1[i].a = ep1.b;
+				ei->ep.endpt0[i] = vfloat4(bmin.lane<0>(), ep0.lane<0>(), ep0.lane<1>(), ep0.lane<2>());
+				ei->ep.endpt1[i] = vfloat4(bmax.lane<0>(), ep1.lane<0>(), ep1.lane<1>(), ep1.lane<2>());
 				break;
 			case 1:
-				ei->ep.endpt0[i].r = ep0.r;
-				ei->ep.endpt0[i].b = ep0.g;
-				ei->ep.endpt0[i].a = ep0.b;
-
-				ei->ep.endpt1[i].r = ep1.r;
-				ei->ep.endpt1[i].b = ep1.g;
-				ei->ep.endpt1[i].a = ep1.b;
+				ei->ep.endpt0[i] = vfloat4(ep0.lane<0>(), bmin.lane<1>(), ep0.lane<1>(), ep0.lane<2>());
+				ei->ep.endpt1[i] = vfloat4(ep1.lane<0>(), bmax.lane<1>(), ep1.lane<1>(), ep1.lane<2>());
 				break;
 			case 2:
-				ei->ep.endpt0[i].r = ep0.r;
-				ei->ep.endpt0[i].g = ep0.g;
-				ei->ep.endpt0[i].a = ep0.b;
-
-				ei->ep.endpt1[i].r = ep1.r;
-				ei->ep.endpt1[i].g = ep1.g;
-				ei->ep.endpt1[i].a = ep1.b;
+				ei->ep.endpt0[i] = vfloat4(ep0.lane<0>(), ep0.lane<1>(), bmin.lane<2>(), ep0.lane<2>());
+				ei->ep.endpt1[i] = vfloat4(ep1.lane<0>(), ep1.lane<1>(), bmax.lane<2>(), ep1.lane<2>());
 				break;
-			case 3:
-				ei->ep.endpt0[i].r = ep0.r;
-				ei->ep.endpt0[i].g = ep0.g;
-				ei->ep.endpt0[i].b = ep0.b;
-
-				ei->ep.endpt1[i].r = ep1.r;
-				ei->ep.endpt1[i].g = ep1.g;
-				ei->ep.endpt1[i].b = ep1.b;
+			default:
+				ei->ep.endpt0[i] = vfloat4(ep0.lane<0>(), ep0.lane<1>(), ep0.lane<2>(), bmin.lane<3>());
+				ei->ep.endpt1[i] = vfloat4(ep1.lane<0>(), ep1.lane<1>(), ep1.lane<2>(), bmax.lane<3>());
 				break;
 		}
 	}
 
-	for (int i = 0; i < texels_per_block; i++)
+	for (int i = 0; i < texel_count; i++)
 	{
 		int partition = pt->partition_of_texel[i];
 		float idx = (ei->weights[i] - lowparam[partition]) * scale[partition];
-		if (idx > 1.0f)
-		{
-			idx = 1.0f;
-		}
-		else if (!(idx > 0.0f))
-		{
-			idx = 0.0f;
-		}
+		idx = astc::clamp1f(idx);
 
 		ei->weights[i] = idx;
 		ei->weight_error_scale[i] = length_squared[partition] * error_weights[i];
@@ -666,7 +507,7 @@ static void compute_endpoints_and_ideal_weights_3_components(
 	}
 }
 
-static void compute_endpoints_and_ideal_weights_rgba(
+static void compute_endpoints_and_ideal_weights_4_comp(
 	const block_size_descriptor* bsd,
 	const partition_info* pt,
 	const imageblock* blk,
@@ -676,81 +517,65 @@ static void compute_endpoints_and_ideal_weights_rgba(
 	const float *error_weights = ewb->texel_weight;
 
 	int partition_count = pt->partition_count;
-	float lowparam[4], highparam[4];
-	for (int i = 0; i < partition_count; i++)
-	{
-		lowparam[i] = 1e10;
-		highparam[i] = -1e10;
-	}
 
-	float4 averages[4];
-	float4 directions_rgba[4];
+	int texel_count= bsd->texel_count;
+	promise(texel_count > 0);
+	promise(partition_count > 0);
+
+	float lowparam[4] { 1e10, 1e10, 1e10, 1e10 };
+	float highparam[4] {  -1e10,  -1e10,  -1e10, -1e10 };
 
 	line4 lines[4];
 
 	float scale[4];
 	float length_squared[4];
 
-	float4 error_weightings[4];
-	float4 color_scalefactors[4];
-	float4 scalefactors[4];
-
-	int texels_per_block = bsd->texel_count;
+	partition_metrics pms[4];
 
-	compute_partition_error_color_weightings(bsd, ewb, pt, error_weightings, color_scalefactors);
+	compute_partition_error_color_weightings(*ewb, *pt, pms);
 
 	for (int i = 0; i < partition_count; i++)
 	{
-		scalefactors[i] = normalize(color_scalefactors[i]) * 2.0f;
+		pms[i].color_scale = normalize(pms[i].color_scale) * 2.0f;
 	}
 
-	compute_averages_and_directions_rgba(pt, blk, ewb, scalefactors, averages, directions_rgba);
+	compute_avgs_and_dirs_4_comp(pt, blk, ewb, pms);
 
 	// if the direction-vector ends up pointing from light to dark, FLIP IT!
 	// this will make the first endpoint the darkest one.
 	for (int i = 0; i < partition_count; i++)
 	{
-		float4 direc = directions_rgba[i];
-		if (direc.r + direc.g + direc.b < 0.0f)
+		vfloat4 dir = pms[i].dir;
+		if (hadd_rgb_s(dir) < 0.0f)
 		{
-			directions_rgba[i] = float4(0.0f) - direc;
+			dir = vfloat4::zero() - dir;
 		}
-	}
 
-	for (int i = 0; i < partition_count; i++)
-	{
-		lines[i].a = averages[i];
-		if (dot(directions_rgba[i], directions_rgba[i]) == 0.0f)
+		lines[i].a = pms[i].avg;
+		if (dot_s(dir, dir) == 0.0f)
 		{
-			lines[i].b = normalize(float4(1.0f));
+			lines[i].b = normalize(vfloat4(1.0f));
 		}
 		else
 		{
-			lines[i].b = normalize(directions_rgba[i]);
+			lines[i].b = normalize(dir);
 		}
 	}
 
-	for (int i = 0; i < texels_per_block; i++)
+	for (int i = 0; i < texel_count; i++)
 	{
 		if (error_weights[i] > 1e-10f)
 		{
 			int partition = pt->partition_of_texel[i];
 
-			float4 point = float4(blk->data_r[i], blk->data_g[i], blk->data_b[i], blk->data_a[i]) * scalefactors[partition];
+			vfloat4 point = blk->texel(i) * pms[partition].color_scale;
 			line4 l = lines[partition];
 
-			float param = dot(point - l.a, l.b);
+			float param = dot_s(point - l.a, l.b);
 			ei->weights[i] = param;
 
-			if (param < lowparam[partition])
-			{
-				lowparam[partition] = param;
-			}
-
-			if (param > highparam[partition])
-			{
-				highparam[partition] = param;
-			}
+			lowparam[partition] = astc::min(param, lowparam[partition]);
+			highparam[partition] = astc::max(param, highparam[partition]);
 		}
 		else
 		{
@@ -770,43 +595,24 @@ static void compute_endpoints_and_ideal_weights_rgba(
 		// it is possible for a uniform-color partition to produce length=0; this
 		// causes NaN-production and NaN-propagation later on. Set length to
 		// a small value to avoid this problem.
-		if (length < 1e-7f)
-		{
-			length = 1e-7f;
-		}
+		length = astc::max(length, 1e-7f);
 
 		length_squared[i] = length * length;
 		scale[i] = 1.0f / length;
 
-		float4 ep0 = lines[i].a + lines[i].b * lowparam[i];
-		float4 ep1 = lines[i].a + lines[i].b * highparam[i];
-
-		ep0.r /= scalefactors[i].r;
-		ep0.g /= scalefactors[i].g;
-		ep0.b /= scalefactors[i].b;
-		ep0.a /= scalefactors[i].a;
+		vfloat4 ep0 = lines[i].a + lines[i].b * lowparam[i];
+		vfloat4 ep1 = lines[i].a + lines[i].b * highparam[i];
 
-		ep1.r /= scalefactors[i].r;
-		ep1.g /= scalefactors[i].g;
-		ep1.b /= scalefactors[i].b;
-		ep1.a /= scalefactors[i].a;
-
-		ei->ep.endpt0[i] = ep0;
-		ei->ep.endpt1[i] = ep1;
+		ei->ep.endpt0[i] = ep0 / pms[i].color_scale;
+		ei->ep.endpt1[i] = ep1 / pms[i].color_scale;
 	}
 
-	for (int i = 0; i < texels_per_block; i++)
+	for (int i = 0; i < texel_count; i++)
 	{
 		int partition = pt->partition_of_texel[i];
 		float idx = (ei->weights[i] - lowparam[partition]) * scale[partition];
-		if (idx > 1.0f)
-		{
-			idx = 1.0f;
-		}
-		else if (!(idx > 0.0f))
-		{
-			idx = 0.0f;
-		}
+		idx = astc::clamp1f(idx);
+
 		ei->weights[i] = idx;
 		ei->weight_error_scale[i] = error_weights[i] * length_squared[partition];
 		assert(!astc::isnan(ei->weight_error_scale[i]));
@@ -830,11 +636,11 @@ void compute_endpoints_and_ideal_weights_1_plane(
 	int uses_alpha = imageblock_uses_alpha(blk);
 	if (uses_alpha)
 	{
-		compute_endpoints_and_ideal_weights_rgba(bsd, pt, blk, ewb, ei);
+		compute_endpoints_and_ideal_weights_4_comp(bsd, pt, blk, ewb, ei);
 	}
 	else
 	{
-		compute_endpoints_and_ideal_weights_3_components(bsd, pt, blk, ewb, ei, 3);
+		compute_endpoints_and_ideal_weights_3_comp(bsd, pt, blk, ewb, ei, 3);
 	}
 }
 
@@ -848,48 +654,50 @@ void compute_endpoints_and_ideal_weights_2_planes(
 	endpoints_and_weights* ei2
 ) {
 	int uses_alpha = imageblock_uses_alpha(blk);
+
+	assert(separate_component < 4);
 	switch (separate_component)
 	{
-	case 0:					// separate weights for red
-		if (uses_alpha == 1)
+	case 0: // separate weights for red
+		if (uses_alpha)
 		{
-			compute_endpoints_and_ideal_weights_3_components(bsd, pt, blk, ewb, ei1, 0);
+			compute_endpoints_and_ideal_weights_3_comp(bsd, pt, blk, ewb, ei1, 0);
 		}
 		else
 		{
-			compute_endpoints_and_ideal_weights_2_components(bsd, pt, blk, ewb, ei1, 1, 2);
+			compute_endpoints_and_ideal_weights_2_comp(bsd, pt, blk, ewb, ei1, 1, 2);
 		}
-		compute_endpoints_and_ideal_weights_1_component(bsd, pt, blk, ewb, ei2, 0);
+		compute_endpoints_and_ideal_weights_1_comp(bsd, pt, blk, ewb, ei2, 0);
 		break;
 
-	case 1:					// separate weights for green
-		if (uses_alpha == 1)
+	case 1: // separate weights for green
+		if (uses_alpha)
 		{
-			compute_endpoints_and_ideal_weights_3_components(bsd, pt, blk, ewb, ei1, 1);
+			compute_endpoints_and_ideal_weights_3_comp(bsd, pt, blk, ewb, ei1, 1);
 		}
 		else
 		{
-			compute_endpoints_and_ideal_weights_2_components(bsd, pt, blk, ewb, ei1, 0, 2);
+			compute_endpoints_and_ideal_weights_2_comp(bsd, pt, blk, ewb, ei1, 0, 2);
 		}
-		compute_endpoints_and_ideal_weights_1_component(bsd, pt, blk, ewb, ei2, 1);
+		compute_endpoints_and_ideal_weights_1_comp(bsd, pt, blk, ewb, ei2, 1);
 		break;
 
-	case 2:					// separate weights for blue
-		if (uses_alpha == 1)
+	case 2: // separate weights for blue
+		if (uses_alpha)
 		{
-			compute_endpoints_and_ideal_weights_3_components(bsd, pt, blk, ewb, ei1, 2);
+			compute_endpoints_and_ideal_weights_3_comp(bsd, pt, blk, ewb, ei1, 2);
 		}
 		else
 		{
-			compute_endpoints_and_ideal_weights_2_components(bsd, pt, blk, ewb, ei1, 0, 1);
+			compute_endpoints_and_ideal_weights_2_comp(bsd, pt, blk, ewb, ei1, 0, 1);
 		}
-		compute_endpoints_and_ideal_weights_1_component(bsd, pt, blk, ewb, ei2, 2);
+		compute_endpoints_and_ideal_weights_1_comp(bsd, pt, blk, ewb, ei2, 2);
 		break;
 
-	case 3:					// separate weights for alpha
-		assert(uses_alpha != 0);
-		compute_endpoints_and_ideal_weights_3_components(bsd, pt, blk, ewb, ei1, 3);
-		compute_endpoints_and_ideal_weights_1_component(bsd, pt, blk, ewb, ei2, 3);
+	default: // separate weights for alpha
+		assert(uses_alpha);
+		compute_endpoints_and_ideal_weights_3_comp(bsd, pt, blk, ewb, ei1, 3);
+		compute_endpoints_and_ideal_weights_1_comp(bsd, pt, blk, ewb, ei2, 3);
 		break;
 	}
 }
@@ -929,150 +737,328 @@ void compute_endpoints_and_ideal_weights_2_planes(
      go into a given texel.
 */
 
-static float compute_value_of_texel_flt(
-	int texel_to_get,
-	const decimation_table* it,
+float compute_error_of_weight_set(
+	const endpoints_and_weights* eai,
+	const decimation_table* dt,
 	const float* weights
 ) {
-	const uint8_t *texel_weights = it->texel_weights[texel_to_get];
-	const float *texel_weights_float = it->texel_weights_float[texel_to_get];
+	vfloat4 error_summav = vfloat4::zero();
+	float error_summa = 0.0f;
+	int texel_count = dt->texel_count;
 
-	return (weights[texel_weights[0]] * texel_weights_float[0] +
-	        weights[texel_weights[1]] * texel_weights_float[1]) +
-	       (weights[texel_weights[2]] * texel_weights_float[2] +
-	        weights[texel_weights[3]] * texel_weights_float[3]);
-}
+	int i = 0;
 
-static inline float compute_error_of_texel(
-	const endpoints_and_weights * eai,
-	int texel_to_get,
-	const decimation_table* it,
-	const float *weights
-) {
-	float current_value = compute_value_of_texel_flt(texel_to_get, it, weights);
-	float valuedif = current_value - eai->weights[texel_to_get];
-	return valuedif * valuedif * eai->weight_error_scale[texel_to_get];
-}
+	// Process SIMD-width texel coordinates at at time while we can
+	int clipped_texel_count = round_down_to_simd_multiple_vla(texel_count);
+	for (/* */; i < clipped_texel_count; i += ASTCENC_SIMD_WIDTH)
+	{
+		// Load the bilinear filter texel weight indexes
+		vint weight_idx0 = vint(&(dt->texel_weights_4t[0][i]));
+		vint weight_idx1 = vint(&(dt->texel_weights_4t[1][i]));
+		vint weight_idx2 = vint(&(dt->texel_weights_4t[2][i]));
+		vint weight_idx3 = vint(&(dt->texel_weights_4t[3][i]));
+
+		// Load the bilinear filter texel weights
+		vfloat weight_val0 = gatherf(weights, weight_idx0);
+		vfloat weight_val1 = gatherf(weights, weight_idx1);
+		vfloat weight_val2 = gatherf(weights, weight_idx2);
+		vfloat weight_val3 = gatherf(weights, weight_idx3);
+
+		// Load the weight contributions for each texel
+		// TODO: Should we rename this dt->texel_weights_float field?
+		vfloat tex_weight_float0 = loada(&(dt->texel_weights_float_4t[0][i]));
+		vfloat tex_weight_float1 = loada(&(dt->texel_weights_float_4t[1][i]));
+		vfloat tex_weight_float2 = loada(&(dt->texel_weights_float_4t[2][i]));
+		vfloat tex_weight_float3 = loada(&(dt->texel_weights_float_4t[3][i]));
+
+		// Compute the bilinear interpolation
+		vfloat current_values = (weight_val0 * tex_weight_float0 +
+		                         weight_val1 * tex_weight_float1) +
+		                        (weight_val2 * tex_weight_float2 +
+		                         weight_val3 * tex_weight_float3);
+
+		// Compute the error between the computed value and the ideal weight
+		vfloat actual_values = loada(&(eai->weights[i]));
+		vfloat diff = current_values - actual_values;
+		vfloat significance = loada(&(eai->weight_error_scale[i]));
+		vfloat error = diff * diff * significance;
+
+		haccumulate(error_summav, error);
+	}
 
-float compute_error_of_weight_set(
-	const endpoints_and_weights* eai,
-	const decimation_table* it,
-	const float* weights
-) {
-	int texel_count = it->num_texels;
-	float error_summa = 0.0;
-	for (int i = 0; i < texel_count; i++)
+	// Loop tail
+	// Error is buffered and accumulated in blocks of 4 to ensure that
+	// the partial sums added to the accumulator are invariant with the
+	// vector implementation, irrespective of vector size ...
+	alignas(16) float errorsum_tmp[4] { 0 };
+	for (/* */; i < texel_count; i++)
 	{
-		error_summa += compute_error_of_texel(eai, i, it, weights);
+		// This isn't the ideal access pattern, but the cache lines are probably
+		// already in the cache due to the vector loop above, so go with it ...
+		float current_value = (weights[dt->texel_weights_4t[0][i]] * dt->texel_weights_float_4t[0][i] +
+		                       weights[dt->texel_weights_4t[1][i]] * dt->texel_weights_float_4t[1][i]) +
+		                      (weights[dt->texel_weights_4t[2][i]] * dt->texel_weights_float_4t[2][i] +
+		                       weights[dt->texel_weights_4t[3][i]] * dt->texel_weights_float_4t[3][i]);
+
+		float valuedif = current_value - eai->weights[i];
+		float error = valuedif * valuedif * eai->weight_error_scale[i];
+
+		// Accumulate error sum in the temporary array
+		int error_index = i & 0x3;
+		errorsum_tmp[error_index] = error;
+
+#if ASTCENC_SIMD_WIDTH == 8
+		// Zero the temporary staging buffer every 4 items unless last. Note
+		// that this block can only trigger for 6x5 blocks, all other partials
+		// tails are shorter than 4 ...
+		if ((i & 0x7) == 0x03)
+		{
+			haccumulate(error_summav, vfloat4::loada(errorsum_tmp));
+ 			storea(vfloat4::zero(), errorsum_tmp);
+		}
+#endif
 	}
+
+	// Accumulate the loop tail using the vfloat4 swizzle
+	haccumulate(error_summav, vfloat4::loada(errorsum_tmp));
+
+	// Resolve the final scalar accumulator sum
+	haccumulate(error_summa, error_summav);
+
 	return error_summa;
 }
 
-/*
-	Given a complete weight set and a decimation table, try to
-	compute the optimal weight set (assuming infinite precision)
-	given the selected decimation table.
-*/
+/* See header for documentation. */
+// Note: This function is vectorized, but needs to use gathers to access the
+// decimation table structures so vectorization is currently only enabled for
+// AVX2. The implementation loops over decimated weights, and then texels for
+// each weight. We know the backing memory is "large enough" we can can
+// overshoot the weight count to always use full vectors without a loop tail.
+// The inner loop operates on 8 weights, each of which may have a different
+// number of texels referenced by it. We iterate over the max reference count,
+// and then use lane masks to disable lanes that are no longer in scope.
 void compute_ideal_weights_for_decimation_table(
-	const endpoints_and_weights* eai,
-	const decimation_table* it,
-	float* weight_set,
-	float* weights
+	const endpoints_and_weights& eai_in,
+	endpoints_and_weights& eai_out,
+	const decimation_table& dt,
+	float* RESTRICT weight_set,
+	float* RESTRICT weights
 ) {
-	int texels_per_block = it->num_texels;
-	int weight_count = it->num_weights;
+	int texel_count = dt.texel_count;
+	int weight_count = dt.weight_count;
 
-	// perform a shortcut in the case of a complete decimation table
-	if (texels_per_block == weight_count)
+	promise(texel_count > 0);
+	promise(weight_count > 0);
+
+	// This function includes a copy of the epw from eai_in to eai_out. We do it
+	// here because we want to load the data anyway, so we can avoid loading it
+	// from memory twice.
+	eai_out.ep = eai_in.ep;
+
+	// If we have a 1:1 mapping just shortcut the computation - clone the
+	// weights into both the weight set and the output epw copy.
+	if (texel_count == weight_count)
 	{
-		for (int i = 0; i < it->num_texels; i++)
+		for (int i = 0; i < texel_count; i++)
 		{
-			int texel = it->weight_texel[i][0];
-			weight_set[i] = eai->weights[texel];
-			weights[i] = eai->weight_error_scale[texel];
+			assert(i == dt.weight_texel[0][i]);
+			weight_set[i] = eai_in.weights[i];
+			weights[i] = eai_in.weight_error_scale[i];
+
+			eai_out.weights[i] = eai_in.weights[i];
+			eai_out.weight_error_scale[i] = eai_in.weight_error_scale[i];
 		}
 		return;
 	}
+	// If we don't have a 1:1 mapping just clone the weights into the output
+	// epw copy and then do the full algorithm to decimate weights.
+	else
+	{
+		for (int i = 0; i < texel_count; i++)
+		{
+			eai_out.weights[i] = eai_in.weights[i];
+			eai_out.weight_error_scale[i] = eai_in.weight_error_scale[i];
+		}
+	}
 
-	// if the shortcut is not available, we will instead compute a simple estimate
-	// and perform a single iteration of refinement on that estimate.
-	float infilled_weights[MAX_TEXELS_PER_BLOCK];
+	// Otherwise compute an estimate and perform single refinement iteration
+	alignas(ASTCENC_VECALIGN) float infilled_weights[MAX_TEXELS_PER_BLOCK];
 
-	// compute an initial average for each weight.
-	for (int i = 0; i < weight_count; i++)
+	// Compute an initial average for each decimated weight
+#if ASTCENC_SIMD_WIDTH >= 8
+	int clipped_weight_count = round_up_to_simd_multiple_vla(weight_count);
+	for (int i = 0; i < clipped_weight_count; i += ASTCENC_SIMD_WIDTH)
 	{
-		int texel_count = it->weight_num_texels[i];
+		// Start with a small value to avoid div-by-zero later
+		vfloat weight_weight(1e-10f);
+		vfloat initial_weight = vfloat::zero();
+
+		// Accumulate error weighting of all the texels using this weight
+		vint weight_texel_count(dt.weight_texel_count + i);
+		int max_texel_count = hmax(weight_texel_count).lane<0>();
+		promise(max_texel_count > 0);
+
+		for (int j = 0; j < max_texel_count; j++)
+		{
+			// Not all lanes may actually use j texels, so mask out if idle
+			vmask active = weight_texel_count > vint(j);
+
+			vint texel(dt.weight_texel[j] + i);
+			texel = select(vint::zero(), texel, active);
 
-		float weight_weight = 1e-10f;	// to avoid 0/0 later on
+			vfloat weight = loada(dt.weights_flt[j] + i);
+			weight = select(vfloat::zero(), weight, active);
+
+			vfloat contrib_weight = weight * gatherf(eai_in.weight_error_scale, texel);
+
+			weight_weight = weight_weight + contrib_weight;
+			initial_weight = initial_weight + gatherf(eai_in.weights, texel) * contrib_weight;
+		}
+
+		storea(weight_weight, weights + i);
+		storea(initial_weight / weight_weight, weight_set + i);
+	}
+#else
+	for (int i = 0; i < weight_count; i++)
+	{
+		// Start with a small value to avoid div-by-zero later
+		float weight_weight = 1e-10f;
 		float initial_weight = 0.0f;
-		for (int j = 0; j < texel_count; j++)
+
+		// Accumulate error weighting of all the texels using this weight
+		int weight_texel_count = dt.weight_texel_count[i];
+		promise(weight_texel_count > 0);
+
+		for (int j = 0; j < weight_texel_count; j++)
 		{
-			int texel = it->weight_texel[i][j];
-			float weight = it->weights_flt[i][j];
-			float contrib_weight = weight * eai->weight_error_scale[texel];
+			int texel = dt.weight_texel[j][i];
+			float weight = dt.weights_flt[j][i];
+			float contrib_weight = weight * eai_in.weight_error_scale[texel];
 			weight_weight += contrib_weight;
-			initial_weight += eai->weights[texel] * contrib_weight;
+			initial_weight += eai_in.weights[texel] * contrib_weight;
 		}
 
 		weights[i] = weight_weight;
-		weight_set[i] = initial_weight / weight_weight;	// this is the 0/0 that is to be avoided.
+		weight_set[i] = initial_weight / weight_weight;
 	}
+#endif
 
-	for (int i = 0; i < texels_per_block; i++)
+	// Populate the interpolated weight grid based on the initital average
+#if ASTCENC_SIMD_WIDTH >= 8
+	// Process SIMD-width texel coordinates at at time while we can
+	int clipped_texel_count = round_up_to_simd_multiple_vla(texel_count);
+	for (int i = 0; i < clipped_texel_count; i += ASTCENC_SIMD_WIDTH)
+	{
+		vint texel_weights_0(dt.texel_weights_4t[0] + i);
+		vint texel_weights_1(dt.texel_weights_4t[1] + i);
+		vint texel_weights_2(dt.texel_weights_4t[2] + i);
+		vint texel_weights_3(dt.texel_weights_4t[3] + i);
+
+		vfloat weight_set_0 = gatherf(weight_set, texel_weights_0);
+		vfloat weight_set_1 = gatherf(weight_set, texel_weights_1);
+		vfloat weight_set_2 = gatherf(weight_set, texel_weights_2);
+		vfloat weight_set_3 = gatherf(weight_set, texel_weights_3);
+
+		vfloat texel_weights_float_0 = loada(dt.texel_weights_float_4t[0] + i);
+		vfloat texel_weights_float_1 = loada(dt.texel_weights_float_4t[1] + i);
+		vfloat texel_weights_float_2 = loada(dt.texel_weights_float_4t[2] + i);
+		vfloat texel_weights_float_3 = loada(dt.texel_weights_float_4t[3] + i);
+
+		vfloat weight = (weight_set_0 * texel_weights_float_0
+		                + weight_set_1 * texel_weights_float_1)
+		               + (weight_set_2 * texel_weights_float_2
+		                + weight_set_3 * texel_weights_float_3);
+
+		storea(weight, infilled_weights + i);
+	}
+#else
+	for (int i = 0; i < texel_count; i++)
 	{
-		const uint8_t *texel_weights = it->texel_weights[i];
-		const float *texel_weights_float = it->texel_weights_float[i];
+		const uint8_t *texel_weights = dt.texel_weights_t4[i];
+		const float *texel_weights_float = dt.texel_weights_float_t4[i];
 		infilled_weights[i] = (weight_set[texel_weights[0]] * texel_weights_float[0]
-		                     + weight_set[texel_weights[1]] * texel_weights_float[1])
-		                    + (weight_set[texel_weights[2]] * texel_weights_float[2]
-		                     + weight_set[texel_weights[3]] * texel_weights_float[3]);
+		                      + weight_set[texel_weights[1]] * texel_weights_float[1])
+		                     + (weight_set[texel_weights[2]] * texel_weights_float[2]
+		                      + weight_set[texel_weights[3]] * texel_weights_float[3]);
 	}
+#endif
 
+	// Perform a single iteration of refinement
 	constexpr float stepsize = 0.25f;
-	constexpr float ch0_scale = 4.0f * (stepsize * stepsize * (1.0f / (TEXEL_WEIGHT_SUM * TEXEL_WEIGHT_SUM)));
-	constexpr float ch1_scale = -2.0f * (stepsize * (2.0f / TEXEL_WEIGHT_SUM));
-	constexpr float chd_scale = (ch1_scale / ch0_scale) * stepsize;
+	constexpr float chd_scale = -TEXEL_WEIGHT_SUM;
 
-	for (int i = 0; i < weight_count; i++)
+#if ASTCENC_SIMD_WIDTH >= 8
+	for (int i = 0; i < clipped_weight_count; i += ASTCENC_SIMD_WIDTH)
 	{
-		float weight_val = weight_set[i];
+		// Start with a small value to avoid div-by-zero later
+		vfloat weight_val = loada(weight_set + i);
+
+		// Accumulate error weighting of all the texels using this weight
+		vfloat error_change0(1e-10f);
+		vfloat error_change1(0.0f);
+
+		// Accumulate error weighting of all the texels using this weight
+		vint weight_texel_count(dt.weight_texel_count + i);
+		int max_texel_count = hmax(weight_texel_count).lane<0>();
+		promise(max_texel_count > 0);
+
+		for (int j = 0; j < max_texel_count; j++)
+		{
+			// Not all lanes may actually use j texels, so mask out if idle
+			vmask active = weight_texel_count > vint(j);
 
-		const uint8_t *weight_texel_ptr = it->weight_texel[i];
-		const float *weights_ptr = it->weights_flt[i];
+			vint texel(dt.weight_texel[j] + i);
+			texel = select(vint::zero(), texel, active);
 
-		// compute the two error changes that can occur from perturbing the current index.
-		int num_weights = it->weight_num_texels[i];
+			vfloat contrib_weight = loada(dt.weights_flt[j] + i);
+			contrib_weight = select(vfloat::zero(), contrib_weight, active);
+
+			vfloat scale = gatherf(eai_in.weight_error_scale, texel) * contrib_weight;
+			vfloat old_weight = gatherf(infilled_weights, texel);
+			vfloat ideal_weight = gatherf(eai_in.weights, texel);
+
+			error_change0 = error_change0 + contrib_weight * scale;
+			error_change1 = error_change1 + (old_weight - ideal_weight) * scale;
+		}
+
+		vfloat step = (error_change1 * chd_scale) / error_change0;
+		step = clamp(-stepsize, stepsize, step);
+
+		// update the weight
+		storea(weight_val + step, weight_set + i);
+	}
+#else
+	for (int i = 0; i < weight_count; i++)
+	{
+		float weight_val = weight_set[i];
 
-		float error_change0 = 1e-10f; // done in order to ensure that this value isn't 0, in order to avoid a possible divide by zero later.
+		// Start with a small value to avoid div-by-zero later
+		float error_change0 = 1e-10f;
 		float error_change1 = 0.0f;
 
-		for (int k = 0; k < num_weights; k++)
+		// Compute the two error changes that occur from perturbing the current index
+		int weight_texel_count = dt.weight_texel_count[i];
+		promise(weight_texel_count > 0);
+		for (int k = 0; k < weight_texel_count; k++)
 		{
-			uint8_t weight_texel = weight_texel_ptr[k];
-			float weights2 = weights_ptr[k];
+			uint8_t texel = dt.weight_texel[k][i];
+			float contrib_weight = dt.weights_flt[k][i];
 
-			float scale = eai->weight_error_scale[weight_texel] * weights2;
-			float old_weight = infilled_weights[weight_texel];
-			float ideal_weight = eai->weights[weight_texel];
+			float scale = eai_in.weight_error_scale[texel] * contrib_weight;
+			float old_weight = infilled_weights[texel];
+			float ideal_weight = eai_in.weights[texel];
 
-			error_change0 += weights2 * scale;
+			error_change0 +=  contrib_weight * scale;
 			error_change1 += (old_weight - ideal_weight) * scale;
 		}
 
 		float step = (error_change1 * chd_scale) / error_change0;
-		// clamp the step-value.
-		if (step < -stepsize)
-		{
-			step = -stepsize;
-		}
-		else if (step > stepsize)
-		{
-			step = stepsize;
-		}
+		step = astc::clamp(step, -stepsize, stepsize);
 
 		// update the weight
 		weight_set[i] = weight_val + step;
 	}
+#endif
 }
 
 /*
@@ -1086,20 +1072,20 @@ void compute_ideal_weights_for_decimation_table(
 	Repeat until we have made a complete processing pass over all weights without
 	triggering any perturbations *OR* we have run 4 full passes.
 */
-void compute_ideal_quantized_weights_for_decimation_table(
-	const decimation_table* it,
+void compute_quantized_weights_for_decimation_table(
+	const decimation_table* dt,
 	float low_bound,
 	float high_bound,
 	const float* weight_set_in,
 	float* weight_set_out,
 	uint8_t* quantized_weight_set,
-	int quantization_level
+	int quant_level
 ) {
-	int weight_count = it->num_weights;
-	const quantization_and_transfer_table *qat = &(quant_and_xfer_tables[quantization_level]);
+	int weight_count = dt->weight_count;
+	const quantization_and_transfer_table *qat = &(quant_and_xfer_tables[quant_level]);
 
-	static const int quant_levels[12] = { 2,3,4,5,6,8,10,12,16,20,24,32 };
-	float quant_level_m1 = (float)(quant_levels[quantization_level] - 1);
+	static const int quant_levels[12] { 2,3,4,5,6,8,10,12,16,20,24,32 };
+	float quant_level_m1 = (float)(quant_levels[quant_level] - 1);
 
 	// Quantize the weight set using both the specified low/high bounds
 	// and the standard 0..1 weight bounds.
@@ -1123,48 +1109,42 @@ void compute_ideal_quantized_weights_for_decimation_table(
 
 #if ASTCENC_SIMD_WIDTH > 1
 	// SIMD loop; process weights in SIMD width batches while we can.
-	int clipped_weight_count = weight_count & ~(ASTCENC_SIMD_WIDTH-1);
 	vfloat scalev(scale);
 	vfloat scaled_low_boundv(scaled_low_bound);
 	vfloat quant_level_m1v(quant_level_m1);
 	vfloat rscalev(rscale);
 	vfloat low_boundv(low_bound);
-	for (/*Vector loop */; i < clipped_weight_count; i += ASTCENC_SIMD_WIDTH)
+
+	int clipped_weight_count = round_down_to_simd_multiple_vla(weight_count);
+	for (/* */; i < clipped_weight_count; i += ASTCENC_SIMD_WIDTH)
 	{
 		vfloat ix = loada(&weight_set_in[i]) * scalev - scaled_low_boundv;
-		ix = saturate(ix); // upper bound must be smaller than 1 to avoid an array overflow below.
+		ix = clampzo(ix);
 
-		// look up the two closest indexes and return the one that was closest.
+		//Llook up the two closest indexes and return the one that was closest.
 		vfloat ix1 = ix * quant_level_m1v;
-		vint weight = floatToInt(ix1);
-		vint weight1 = weight+vint(1);
+		vint weight = float_to_int(ix1);
+		vint weight1 = weight + vint(1);
 		vfloat ixl = gatherf(qat->unquantized_value_unsc, weight);
 		vfloat ixh = gatherf(qat->unquantized_value_unsc, weight1);
 
-		vmask mask = ixl + ixh < vfloat(128.0f) * ix;
+		vmask mask = (ixl + ixh) < (vfloat(128.0f) * ix);
 		weight = select(weight, weight1, mask);
 		ixl = select(ixl, ixh, mask);
 
 		// Invert the weight-scaling that was done initially
-		store(ixl * rscalev + low_boundv, &weight_set_out[i]);
+		storea(ixl * rscalev + low_boundv, &weight_set_out[i]);
 		vint scm = gatheri(qat->scramble_map, weight);
 		vint scn = pack_low_bytes(scm);
 		store_nbytes(scn, &quantized_weight_set[i]);
 	}
 #endif // #if ASTCENC_SIMD_WIDTH > 1
 
-	// Process remaining weights in a scalar way.
-	for (/* Loop tail */; i < weight_count; i++)
+	// Loop tail
+	for (/* */; i < weight_count; i++)
 	{
 		float ix = (weight_set_in[i] * scale) - scaled_low_bound;
-		if (ix < 0.0f)
-		{
-			ix = 0.0f;
-		}
-		if (ix > 1.0f) // upper bound must be smaller than 1 to avoid an array overflow below.
-		{
-			ix = 1.0f;
-		}
+		ix = astc::clamp1f(ix);
 
 		// look up the two closest indexes and return the one that was closest.
 		float ix1 = ix * quant_level_m1;
@@ -1184,9 +1164,9 @@ void compute_ideal_quantized_weights_for_decimation_table(
 	}
 }
 
-static inline float4 compute_rgbovec(
-	float4 rgba_weight_sum,
-	float3 weight_weight_sum,
+static inline vfloat4 compute_rgbovec(
+	vfloat4 rgba_weight_sum,
+	vfloat4 weight_weight_sum,
 	float red_sum,
 	float green_sum,
 	float blue_sum,
@@ -1197,18 +1177,18 @@ static inline float4 compute_rgbovec(
 	// has a regular structure, we can simplify the inverse calculation. This
 	// gives us ~24 multiplications, down from 96 for a generic inverse
 
-	// mat[0] = float4(rgba_ws.x,      0.0f,      0.0f, wght_ws.x);
-	// mat[1] = float4(     0.0f, rgba_ws.y,      0.0f, wght_ws.y);
-	// mat[2] = float4(     0.0f,      0.0f, rgba_ws.z, wght_ws.z);
-	// mat[3] = float4(wght_ws.x, wght_ws.y, wght_ws.z,      psum);
+	// mat[0] = vfloat4(rgba_ws.x,      0.0f,      0.0f, wght_ws.x);
+	// mat[1] = vfloat4(     0.0f, rgba_ws.y,      0.0f, wght_ws.y);
+	// mat[2] = vfloat4(     0.0f,      0.0f, rgba_ws.z, wght_ws.z);
+	// mat[3] = vfloat4(wght_ws.x, wght_ws.y, wght_ws.z,      psum);
 	// mat = invert(mat);
 
-	float X = rgba_weight_sum.r;
-	float Y = rgba_weight_sum.g;
-	float Z = rgba_weight_sum.b;
-	float P = weight_weight_sum.r;
-	float Q = weight_weight_sum.g;
-	float R = weight_weight_sum.b;
+	float X = rgba_weight_sum.lane<0>();
+	float Y = rgba_weight_sum.lane<1>();
+	float Z = rgba_weight_sum.lane<2>();
+	float P = weight_weight_sum.lane<0>();
+	float Q = weight_weight_sum.lane<1>();
+	float R = weight_weight_sum.lane<2>();
 	float S = psum;
 
 	float PP = P * P;
@@ -1232,78 +1212,75 @@ static inline float4 compute_rgbovec(
 
 	// Actually compute the adjugate matrix, not the inverse, and apply the
 	// multiplication by 1/det to the vector separately.
-	float4 mat0 = float4(DT, ZQP, RYP, mZYP);
-	float4 mat1 = float4(ZQP, SZmRR * X - Z * PP, RQX, mZQX);
-	float4 mat2 = float4(RYP, RQX, (S * Y - QQ) * X - Y * PP, mRYX);
-	float4 mat3 = float4(mZYP, mZQX, mRYX, Z * YX);
-	float4 vect = float4(red_sum, green_sum, blue_sum, qsum) * rdet;
+	vfloat4 mat0(DT, ZQP, RYP, mZYP);
+	vfloat4 mat1(ZQP, SZmRR * X - Z * PP, RQX, mZQX);
+	vfloat4 mat2(RYP, RQX, (S * Y - QQ) * X - Y * PP, mRYX);
+	vfloat4 mat3(mZYP, mZQX, mRYX, Z * YX);
+	vfloat4 vect = vfloat4(red_sum, green_sum, blue_sum, qsum) * rdet;
 
 	#ifdef DEBUG_CAPTURE_NAN
 	    fedisableexcept(FE_DIVBYZERO | FE_INVALID);
 	#endif
 
-	return float4(dot(mat0, vect),
-	              dot(mat1, vect),
-	              dot(mat2, vect),
-	              dot(mat3, vect));
+	return vfloat4(dot_s(mat0, vect),
+	               dot_s(mat1, vect),
+	               dot_s(mat2, vect),
+	               dot_s(mat3, vect));
 }
 
 /* for a given weight set, we wish to recompute the colors so that they are optimal for a particular weight set. */
-void recompute_ideal_colors(
-	int weight_quantization_mode,
+void recompute_ideal_colors_2planes(
+	int weight_quant_mode,
 	endpoints* ep,	// contains the endpoints we wish to update
-	float4* rgbs_vectors,	// used to return RGBS-vectors for endpoint mode #6
-	float4* rgbo_vectors,	// used to return RGBO-vectors for endpoint mode #7
+	vfloat4* rgbs_vectors,	// used to return RGBS-vectors for endpoint mode #6
+	vfloat4* rgbo_vectors,	// used to return RGBO-vectors for endpoint mode #7
 	const uint8_t* weight_set8,	// the current set of weight values
 	const uint8_t* plane2_weight_set8,	// nullptr if plane 2 is not actually used.
 	int plane2_color_component,	// color component for 2nd plane of weights; -1 if the 2nd plane of weights is not present
-	const partition_info* pi,
-	const decimation_table* it,
-	const imageblock* pb,	// picture-block containing the actual data.
+	const partition_info* pt,
+	const decimation_table* dt,
+	const imageblock* blk,	// picture-block containing the actual data.
 	const error_weight_block* ewb
 ) {
-	const quantization_and_transfer_table *qat = &(quant_and_xfer_tables[weight_quantization_mode]);
+	const quantization_and_transfer_table *qat = &(quant_and_xfer_tables[weight_quant_mode]);
 
 	float weight_set[MAX_WEIGHTS_PER_BLOCK];
 	float plane2_weight_set[MAX_WEIGHTS_PER_BLOCK];
 
-	for (int i = 0; i < it->num_weights; i++)
+	for (int i = 0; i < dt->weight_count; i++)
 	{
 		weight_set[i] = qat->unquantized_value[weight_set8[i]] * (1.0f / 64.0f);
 	}
 
 	if (plane2_weight_set8)
 	{
-		for (int i = 0; i < it->num_weights; i++)
+		for (int i = 0; i < dt->weight_count; i++)
 		{
 			plane2_weight_set[i] = qat->unquantized_value[plane2_weight_set8[i]] * (1.0f / 64.0f);
 		}
 	}
 
-	int partition_count = pi->partition_count;
+	int partition_count = pt->partition_count;
 
 	for (int i = 0; i < partition_count; i++)
 	{
-		float4 rgba_sum        = float4(1e-17f);
-		float4 rgba_weight_sum = float4(1e-17f);
+		vfloat4 rgba_sum(1e-17f);
+		vfloat4 rgba_weight_sum(1e-17f);
 
-		int texelcount = pi->texels_per_partition[i];
-		const uint8_t *texel_indexes = pi->texels_of_partition[i];
+		int texelcount = pt->partition_texel_count[i];
+		const uint8_t *texel_indexes = pt->texels_of_partition[i];
 		for (int j = 0; j < texelcount; j++)
 		{
 			int tix = texel_indexes[j];
 
-			float4 rgba = float4(pb->data_r[tix], pb->data_g[tix], pb->data_b[tix], pb->data_a[tix]);
-			float4 error_weight = float4(ewb->texel_weight_r[tix], ewb->texel_weight_g[tix], ewb->texel_weight_b[tix], ewb->texel_weight_a[tix]);
+			vfloat4 rgba = blk->texel(tix);
+			vfloat4 error_weight(ewb->texel_weight_r[tix], ewb->texel_weight_g[tix], ewb->texel_weight_b[tix], ewb->texel_weight_a[tix]);
 
 			rgba_sum = rgba_sum + (rgba * error_weight);
 			rgba_weight_sum = rgba_weight_sum + error_weight;
 		}
 
-		float3 scale_direction = normalize(float3(
-		        rgba_sum.r * (1.0f / rgba_weight_sum.r),
-		        rgba_sum.g * (1.0f / rgba_weight_sum.g),
-		        rgba_sum.b * (1.0f / rgba_weight_sum.b)));
+		vfloat4 scale_direction = normalize((rgba_sum * (1.0f / rgba_weight_sum)).swz<0, 1, 2>());
 
 		float scale_max = 0.0f;
 		float scale_min = 1e10f;
@@ -1313,22 +1290,22 @@ void recompute_ideal_colors(
 		float wmin2 = 1.0f;
 		float wmax2 = 0.0f;
 
-		float4 left_sum    = float4(0.0f);
-		float4 middle_sum  = float4(0.0f);
-		float4 right_sum   = float4(0.0f);
+		vfloat4 left_sum    = vfloat4::zero();
+		vfloat4 middle_sum  = vfloat4::zero();
+		vfloat4 right_sum   = vfloat4::zero();
 
-		float4 left2_sum   = float4(0.0f);
-		float4 middle2_sum = float4(0.0f);
-		float4 right2_sum  = float4(0.0f);
+		vfloat4 left2_sum   = vfloat4::zero();
+		vfloat4 middle2_sum = vfloat4::zero();
+		vfloat4 right2_sum  = vfloat4::zero();
 
-		float3 lmrs_sum = float3(0.0f);
+		vfloat4 lmrs_sum = vfloat4(0.0f);
 
-		float4 color_vec_x = float4(0.0f);
-		float4 color_vec_y = float4(0.0f);
+		vfloat4 color_vec_x = vfloat4::zero();
+		vfloat4 color_vec_y = vfloat4::zero();
 
 		float2 scale_vec = float2(0.0f);
 
-		float3 weight_weight_sum = float3(1e-17f);
+		vfloat4 weight_weight_sum = vfloat4(1e-17f);
 		float psum = 1e-17f;
 
 		// FIXME: the loop below has too many responsibilities, making it inefficient.
@@ -1336,51 +1313,38 @@ void recompute_ideal_colors(
 		{
 			int tix = texel_indexes[j];
 
-			float4 rgba = float4(pb->data_r[tix], pb->data_g[tix], pb->data_b[tix], pb->data_a[tix]);
-			float4 color_weight = float4(ewb->texel_weight_r[tix], ewb->texel_weight_g[tix], ewb->texel_weight_b[tix], ewb->texel_weight_a[tix]);
+			vfloat4 rgba = blk->texel(tix);
+			vfloat4 color_weight(ewb->texel_weight_r[tix], ewb->texel_weight_g[tix], ewb->texel_weight_b[tix], ewb->texel_weight_a[tix]);
 
-			float3 color_weight3 = float3(color_weight.r, color_weight.g, color_weight.b);
-			float3 rgb = float3(rgba.r, rgba.g, rgba.b);
+			vfloat4 color_weight3 = color_weight.swz<0, 1, 2>();
+			vfloat4 rgb = rgba.swz<0, 1, 2>();
 
 			// FIXME: move this calculation out to the color block.
-			float ls_weight = (color_weight.r + color_weight.g + color_weight.b);
+			float ls_weight = hadd_rgb_s(color_weight);
 
-			const uint8_t *texel_weights = it->texel_weights[tix];
-			const float *texel_weights_float = it->texel_weights_float[tix];
+			const uint8_t *texel_weights = dt->texel_weights_t4[tix];
+			const float *texel_weights_float = dt->texel_weights_float_t4[tix];
 			float idx0 = (weight_set[texel_weights[0]] * texel_weights_float[0]
 			            + weight_set[texel_weights[1]] * texel_weights_float[1])
 			           + (weight_set[texel_weights[2]] * texel_weights_float[2]
 			            + weight_set[texel_weights[3]] * texel_weights_float[3]);
 
 			float om_idx0 = 1.0f - idx0;
-			if (idx0 > wmax1)
-			{
-				wmax1 = idx0;
-			}
+			wmin1 = astc::min(idx0, wmin1);
+			wmax1 = astc::max(idx0, wmax1);
 
-			if (idx0 < wmin1)
-			{
-				wmin1 = idx0;
-			}
-
-			float scale = dot(scale_direction, rgb);
-			if (scale < scale_min)
-			{
-				scale_min = scale;
-			}
-
-			if (scale > scale_max)
-			{
-				scale_max = scale;
-			}
+			float scale = dot3_s(scale_direction, rgb);
+			scale_min = astc::min(scale, scale_min);
+			scale_max = astc::max(scale, scale_max);
 
-			float4 left   = color_weight * (om_idx0 * om_idx0);
-			float4 middle = color_weight * (om_idx0 * idx0);
-			float4 right  = color_weight * (idx0 * idx0);
+			vfloat4 left   = color_weight * (om_idx0 * om_idx0);
+			vfloat4 middle = color_weight * (om_idx0 * idx0);
+			vfloat4 right  = color_weight * (idx0 * idx0);
 
-			float3 lmrs = float3(om_idx0 * om_idx0,
-			                     om_idx0 * idx0,
-			                     idx0 * idx0) * ls_weight;
+			vfloat4 lmrs = vfloat4(om_idx0 * om_idx0,
+			                       om_idx0 * idx0,
+			                       idx0 * idx0,
+			                       0.0f) * ls_weight;
 
 			left_sum   = left_sum + left;
 			middle_sum = middle_sum + middle;
@@ -1399,34 +1363,27 @@ void recompute_ideal_colors(
 				      + plane2_weight_set[texel_weights[3]] * texel_weights_float[3]);
 
 				om_idx1 = 1.0f - idx1;
-				if (idx1 > wmax2)
-				{
-					wmax2 = idx1;
-				}
-
-				if (idx1 < wmin2)
-				{
-					wmin2 = idx1;
-				}
+				wmin2 = astc::min(idx1, wmin2);
+				wmax2 = astc::max(idx1, wmax2);
 
-				float4 left2   = color_weight * (om_idx1 * om_idx1);
-				float4 middle2 = color_weight * (om_idx1 * idx1);
-				float4 right2  = color_weight * (idx1 * idx1);
+				vfloat4 left2   = color_weight * (om_idx1 * om_idx1);
+				vfloat4 middle2 = color_weight * (om_idx1 * idx1);
+				vfloat4 right2  = color_weight * (idx1 * idx1);
 
 				left2_sum   = left2_sum   + left2;
 				middle2_sum = middle2_sum + middle2;
 				right2_sum  = right2_sum  + right2;
 			}
 
-			float4 color_idx = float4((plane2_color_component == 0) ? idx1 : idx0,
-			                          (plane2_color_component == 1) ? idx1 : idx0,
-			                          (plane2_color_component == 2) ? idx1 : idx0,
-			                          (plane2_color_component == 3) ? idx1 : idx0);
+			vfloat4 color_idx((plane2_color_component == 0) ? idx1 : idx0,
+			                  (plane2_color_component == 1) ? idx1 : idx0,
+			                  (plane2_color_component == 2) ? idx1 : idx0,
+			                  (plane2_color_component == 3) ? idx1 : idx0);
 
-			float3 color_idx3 = float3(color_idx.r, color_idx.g, color_idx.b);
+			vfloat4 color_idx3 = color_idx.swz<0, 1, 2>();
 
-			float4 cwprod = color_weight * rgba;
-			float4 cwiprod = cwprod * color_idx;
+			vfloat4 cwprod = color_weight * rgba;
+			vfloat4 cwiprod = cwprod * color_idx;
 
 			color_vec_y = color_vec_y + cwiprod;
 			color_vec_x = color_vec_x + (cwprod - cwiprod);
@@ -1435,29 +1392,29 @@ void recompute_ideal_colors(
 
 			weight_weight_sum = weight_weight_sum + (color_weight3 * color_idx3);
 
-			psum += dot(color_weight3 * color_idx3, color_idx3);
+			psum += dot3_s(color_weight3 * color_idx3, color_idx3);
 		}
 
 		// calculations specific to mode #7, the HDR RGB-scale mode.
 		// FIXME: Can we skip this for LDR textures?
-		float red_sum   = color_vec_x.r + color_vec_y.r;
-		float green_sum = color_vec_x.g + color_vec_y.g;
-		float blue_sum  = color_vec_x.b + color_vec_y.b;
-		float qsum = color_vec_y.r + color_vec_y.g + color_vec_y.b;
+		float red_sum   = color_vec_x.lane<0>() + color_vec_y.lane<0>();
+		float green_sum = color_vec_x.lane<1>() + color_vec_y.lane<1>();
+		float blue_sum  = color_vec_x.lane<2>() + color_vec_y.lane<2>();
+		float qsum = hadd_rgb_s(color_vec_y);
 
 		#ifdef DEBUG_CAPTURE_NAN
 		    fedisableexcept(FE_DIVBYZERO | FE_INVALID);
 		#endif
 
-		float4 rgbovec = compute_rgbovec(rgba_weight_sum, weight_weight_sum,
-		                                 red_sum, green_sum, blue_sum, psum, qsum);
+		vfloat4 rgbovec = compute_rgbovec(rgba_weight_sum, weight_weight_sum,
+		                                  red_sum, green_sum, blue_sum, psum, qsum);
 		rgbo_vectors[i] = rgbovec;
 
 		// We will occasionally get a failure due to the use of a singular
 		// (non-invertible) matrix. Record whether such a failure has taken
 		// place; if it did, compute rgbo_vectors[] with a different method
 		// later on.
-		float chkval = dot(rgbovec, rgbovec);
+		float chkval = dot_s(rgbovec, rgbovec);
 		int rgbo_fail = chkval != chkval;
 
 		// Initialize the luminance and scale vectors with a reasonable
@@ -1466,56 +1423,31 @@ void recompute_ideal_colors(
 			fedisableexcept(FE_DIVBYZERO | FE_INVALID);
 		#endif
 
-		float scalediv = scale_min * (1.0f / MAX(scale_max, 1e-10f));
-		if (!(scalediv > 0.0f))
-		{
-			scalediv = 0.0f;    // set to zero if scalediv is negative, or NaN.
-		}
-
-		if (scalediv > 1.0f)
-		{
-			scalediv = 1.0f;
-		}
+		float scalediv = scale_min * (1.0f / astc::max(scale_max, 1e-10f));
+		scalediv = astc::clamp1f(scalediv);
 
 		#ifdef DEBUG_CAPTURE_NAN
 			feenableexcept(FE_DIVBYZERO | FE_INVALID);
 		#endif
 
-		float3 sds = scale_direction * scale_max;
+		vfloat4 sds = scale_direction * scale_max;
 
-		rgbs_vectors[i] = float4(sds.r, sds.g, sds.b, scalediv);
+		rgbs_vectors[i] = vfloat4(sds.lane<0>(), sds.lane<1>(), sds.lane<2>(), scalediv);
 
 		if (wmin1 >= wmax1 * 0.999f)
 		{
 			// if all weights in the partition were equal, then just take average
 			// of all colors in the partition and use that as both endpoint colors.
-			float4 avg = (color_vec_x + color_vec_y) *
-			             float4(1.0f / rgba_weight_sum.r,
-			                    1.0f / rgba_weight_sum.g,
-			                    1.0f / rgba_weight_sum.b,
-			                    1.0f / rgba_weight_sum.a);
-
-			if (plane2_color_component != 0 && avg.r == avg.r)
-			{
-				ep->endpt0[i].r = ep->endpt1[i].r = avg.r;
-			}
-
-			if (plane2_color_component != 1 && avg.g == avg.g)
-			{
-				ep->endpt0[i].g = ep->endpt1[i].g = avg.g;
-			}
+			vfloat4 avg = (color_vec_x + color_vec_y) * (1.0f / rgba_weight_sum);
 
-			if (plane2_color_component != 2 && avg.b == avg.b)
-			{
-				ep->endpt0[i].b = ep->endpt1[i].b = avg.b;
-			}
+			vmask4 p1_mask = vint4::lane_id() != vint4(plane2_color_component);
+			vmask4 notnan_mask = avg == avg;
+			vmask4 full_mask = p1_mask & notnan_mask;
 
-			if (plane2_color_component != 3 && avg.a == avg.a)
-			{
-				ep->endpt0[i].a = ep->endpt1[i].a = avg.a;
-			}
+			ep->endpt0[i] = select(ep->endpt0[i], avg, full_mask);
+			ep->endpt1[i] = select(ep->endpt1[i], avg, full_mask);
 
-			rgbs_vectors[i] = float4(sds.r, sds.g, sds.b, 1.0f);
+			rgbs_vectors[i] = vfloat4(sds.lane<0>(), sds.lane<1>(), sds.lane<2>(), 1.0f);
 		}
 		else
 		{
@@ -1526,58 +1458,39 @@ void recompute_ideal_colors(
 			    fedisableexcept(FE_DIVBYZERO | FE_INVALID);
 			#endif
 
-			float4 color_det1 = (left_sum * right_sum) - (middle_sum * middle_sum);
-			float4 color_rdet1 = float4(1.0f / color_det1.r,
-			                            1.0f / color_det1.g,
-			                            1.0f / color_det1.b,
-			                            1.0f / color_det1.a );
+			vfloat4 color_det1 = (left_sum * right_sum) - (middle_sum * middle_sum);
+			vfloat4 color_rdet1 = 1.0f / color_det1;
 
-			float ls_det1  = (lmrs_sum.r * lmrs_sum.b) - (lmrs_sum.g * lmrs_sum.g);
+			float ls_det1  = (lmrs_sum.lane<0>() * lmrs_sum.lane<2>()) - (lmrs_sum.lane<1>() * lmrs_sum.lane<1>());
 			float ls_rdet1 = 1.0f / ls_det1;
 
-			float4 color_mss1 = (left_sum * left_sum)
-			                  + (2.0f * middle_sum * middle_sum)
-			                  + (right_sum * right_sum);
+			vfloat4 color_mss1 = (left_sum * left_sum)
+			                   + (2.0f * middle_sum * middle_sum)
+			                   + (right_sum * right_sum);
 
-			float ls_mss1 = (lmrs_sum.r * lmrs_sum.r)
-			              + (2.0f * lmrs_sum.g * lmrs_sum.g)
-			              + (lmrs_sum.b * lmrs_sum.b);
+			float ls_mss1 = (lmrs_sum.lane<0>() * lmrs_sum.lane<0>())
+			              + (2.0f * lmrs_sum.lane<1>() * lmrs_sum.lane<1>())
+			              + (lmrs_sum.lane<2>() * lmrs_sum.lane<2>());
 
-			float4 ep0 = (right_sum * color_vec_x - middle_sum * color_vec_y) * color_rdet1;
-			float4 ep1 = (left_sum * color_vec_y - middle_sum * color_vec_x) * color_rdet1;
+			vfloat4 ep0 = (right_sum * color_vec_x - middle_sum * color_vec_y) * color_rdet1;
+			vfloat4 ep1 = (left_sum * color_vec_y - middle_sum * color_vec_x) * color_rdet1;
 
-			float scale_ep0 = (lmrs_sum.b * scale_vec.r - lmrs_sum.g * scale_vec.g) * ls_rdet1;
-			float scale_ep1 = (lmrs_sum.r * scale_vec.g - lmrs_sum.g * scale_vec.r) * ls_rdet1;
+			float scale_ep0 = (lmrs_sum.lane<2>() * scale_vec.r - lmrs_sum.lane<1>() * scale_vec.g) * ls_rdet1;
+			float scale_ep1 = (lmrs_sum.lane<0>() * scale_vec.g - lmrs_sum.lane<1>() * scale_vec.r) * ls_rdet1;
 
-			if (plane2_color_component != 0 && fabsf(color_det1.r) > (color_mss1.r * 1e-4f) && ep0.r == ep0.r && ep1.r == ep1.r)
-			{
-				ep->endpt0[i].r = ep0.r;
-				ep->endpt1[i].r = ep1.r;
-			}
+			vmask4 p1_mask = vint4::lane_id() != vint4(plane2_color_component);
+			vmask4 det_mask = abs(color_det1) > (color_mss1 * 1e-4f);
+			vmask4 notnan_mask = (ep0 == ep0) & (ep1 == ep1);
+			vmask4 full_mask = p1_mask & det_mask & notnan_mask;
 
-			if (plane2_color_component != 1 && fabsf(color_det1.g) > (color_mss1.g * 1e-4f) && ep0.g == ep0.g && ep1.g == ep1.g)
-			{
-				ep->endpt0[i].g = ep0.g;
-				ep->endpt1[i].g = ep1.g;
-			}
-
-			if (plane2_color_component != 2 && fabsf(color_det1.b) > (color_mss1.b * 1e-4f) && ep0.b == ep0.b && ep1.b == ep1.b)
-			{
-				ep->endpt0[i].b = ep0.b;
-				ep->endpt1[i].b = ep1.b;
-			}
-
-			if (plane2_color_component != 3 && fabsf(color_det1.a) > (color_mss1.a * 1e-4f) && ep0.a == ep0.a && ep1.a == ep1.a)
-			{
-				ep->endpt0[i].a = ep0.a;
-				ep->endpt1[i].a = ep1.a;
-			}
+			ep->endpt0[i] = select(ep->endpt0[i], ep0, full_mask);
+			ep->endpt1[i] = select(ep->endpt1[i], ep1, full_mask);
 
 			if (fabsf(ls_det1) > (ls_mss1 * 1e-4f) && scale_ep0 == scale_ep0 && scale_ep1 == scale_ep1 && scale_ep0 < scale_ep1)
 			{
 				float scalediv2 = scale_ep0 * (1.0f / scale_ep1);
-				float3 sdsm = scale_direction * scale_ep1;
-				rgbs_vectors[i] = float4(sdsm.r, sdsm.g, sdsm.b, scalediv2);
+				vfloat4 sdsm = scale_direction * scale_ep1;
+				rgbs_vectors[i] = vfloat4(sdsm.lane<0>(), sdsm.lane<1>(), sdsm.lane<2>(), scalediv2);
 			}
 
 			#ifdef DEBUG_CAPTURE_NAN
@@ -1591,31 +1504,14 @@ void recompute_ideal_colors(
 			{
 				// if all weights in the partition were equal, then just take average
 				// of all colors in the partition and use that as both endpoint colors.
-				float4 avg = (color_vec_x + color_vec_y)
-				           * float4(1.0f / rgba_weight_sum.r,
-				                    1.0f / rgba_weight_sum.g,
-				                    1.0f / rgba_weight_sum.b,
-				                    1.0f / rgba_weight_sum.a);
-
-				if (plane2_color_component == 0 && avg.r == avg.r)
-				{
-					ep->endpt0[i].r = ep->endpt1[i].r = avg.r;
-				}
-
-				if (plane2_color_component == 1 && avg.g == avg.g)
-				{
-					ep->endpt0[i].g = ep->endpt1[i].g = avg.g;
-				}
-
-				if (plane2_color_component == 2 && avg.b == avg.b)
-				{
-					ep->endpt0[i].b = ep->endpt1[i].b = avg.b;
-				}
-
-				if (plane2_color_component == 3 && avg.a == avg.a)
-				{
-					ep->endpt0[i].a = ep->endpt1[i].a = avg.a;
-				}
+				vfloat4 avg = (color_vec_x + color_vec_y) * (1.0f / rgba_weight_sum);
+
+				vmask4 p2_mask = vint4::lane_id() == vint4(plane2_color_component);
+				vmask4 notnan_mask = avg == avg;
+				vmask4 full_mask = p2_mask & notnan_mask;
+
+				ep->endpt0[i] = select(ep->endpt0[i], avg, full_mask);
+				ep->endpt1[i] = select(ep->endpt1[i], avg, full_mask);
 			}
 			else
 			{
@@ -1625,42 +1521,23 @@ void recompute_ideal_colors(
 
 				// otherwise, complete the analytic calculation of ideal-endpoint-values
 				// for the given set of texel weights and pixel colors.
-				float4 color_det2 = (left2_sum * right2_sum) - (middle2_sum * middle2_sum);
-				float4 color_rdet2 = float4(1.0f / color_det2.r,
-				                            1.0f / color_det2.g,
-				                            1.0f / color_det2.b,
-				                            1.0f / color_det2.a);
-
-				float4 color_mss2 = (left2_sum * left2_sum)
-				                  + (2.0f * middle2_sum * middle2_sum)
-				                  + (right2_sum * right2_sum);
-
-				float4 ep0 = (right2_sum * color_vec_x - middle2_sum * color_vec_y) * color_rdet2;
-				float4 ep1 = (left2_sum * color_vec_y - middle2_sum * color_vec_x) * color_rdet2;
-
-				if (plane2_color_component == 0 && fabsf(color_det2.r) > (color_mss2.r * 1e-4f) && ep0.r == ep0.r && ep1.r == ep1.r)
-				{
-					ep->endpt0[i].r = ep0.r;
-					ep->endpt1[i].r = ep1.r;
-				}
-
-				if (plane2_color_component == 1 && fabsf(color_det2.g) > (color_mss2.g * 1e-4f) && ep0.g == ep0.g && ep1.g == ep1.g)
-				{
-					ep->endpt0[i].g = ep0.g;
-					ep->endpt1[i].g = ep1.g;
-				}
-
-				if (plane2_color_component == 2 && fabsf(color_det2.b) > (color_mss2.b * 1e-4f) && ep0.b == ep0.b && ep1.b == ep1.b)
-				{
-					ep->endpt0[i].b = ep0.b;
-					ep->endpt1[i].b = ep1.b;
-				}
-
-				if (plane2_color_component == 3 && fabsf(color_det2.a) > (color_mss2.a * 1e-4f) && ep0.a == ep0.a && ep1.a == ep1.a)
-				{
-					ep->endpt0[i].a = ep0.a;
-					ep->endpt1[i].a = ep1.a;
-				}
+				vfloat4 color_det2 = (left2_sum * right2_sum) - (middle2_sum * middle2_sum);
+				vfloat4 color_rdet2 = 1.0f / color_det2;
+
+				vfloat4 color_mss2 = (left2_sum * left2_sum)
+				                   + (2.0f * middle2_sum * middle2_sum)
+				                   + (right2_sum * right2_sum);
+
+				vfloat4 ep0 = (right2_sum * color_vec_x - middle2_sum * color_vec_y) * color_rdet2;
+				vfloat4 ep1 = (left2_sum * color_vec_y - middle2_sum * color_vec_x) * color_rdet2;
+
+				vmask4 p2_mask = vint4::lane_id() == vint4(plane2_color_component);
+				vmask4 det_mask = abs(color_det2) > (color_mss2 * 1e-4f);
+				vmask4 notnan_mask = (ep0 == ep0) & (ep1 == ep1);
+				vmask4 full_mask = p2_mask & det_mask & notnan_mask;
+
+				ep->endpt0[i] = select(ep->endpt0[i], ep0, full_mask);
+				ep->endpt1[i] = select(ep->endpt1[i], ep1, full_mask);
 
 				#ifdef DEBUG_CAPTURE_NAN
 					feenableexcept(FE_DIVBYZERO | FE_INVALID);
@@ -1672,19 +1549,259 @@ void recompute_ideal_colors(
 		// a somewhat-sensible value anyway
 		if (rgbo_fail)
 		{
-			float4 v0 = ep->endpt0[i];
-			float4 v1 = ep->endpt1[i];
-			float avgdif = ((v1.r - v0.r) + (v1.g - v0.g) + (v1.b - v0.b)) * (1.0f / 3.0f);
+			vfloat4 v0 = ep->endpt0[i];
+			vfloat4 v1 = ep->endpt1[i];
+			float avgdif = hadd_rgb_s(v1 - v0) * (1.0f / 3.0f);
+			avgdif = astc::max(avgdif, 0.0f);
+
+			vfloat4 avg = (v0 + v1) * 0.5f;
+			vfloat4 ep0 = avg - vfloat4(avgdif) * 0.5f;
+
+			rgbo_vectors[i] = vfloat4(ep0.lane<0>(), ep0.lane<1>(), ep0.lane<2>(), avgdif);
+		}
+	}
+}
+
+/* for a given weight set, we wish to recompute the colors so that they are optimal for a particular weight set. */
+void recompute_ideal_colors_1plane(
+	int weight_quant_mode,
+	endpoints* ep,	// contains the endpoints we wish to update
+	vfloat4* rgbs_vectors,	// used to return RGBS-vectors for endpoint mode #6
+	vfloat4* rgbo_vectors,	// used to return RGBO-vectors for endpoint mode #7
+	const uint8_t* weight_set8,	// the current set of weight values
+	const partition_info* pt,
+	const decimation_table* dt,
+	const imageblock* blk,	// picture-block containing the actual data.
+	const error_weight_block* ewb
+) {
+	int weight_count = dt->weight_count;
+	int partition_count = pt->partition_count;
+
+	promise(weight_count > 0);
+	promise(partition_count > 0);
+
+	const quantization_and_transfer_table *qat = &(quant_and_xfer_tables[weight_quant_mode]);
+
+	float weight_set[MAX_WEIGHTS_PER_BLOCK];
+	for (int i = 0; i < weight_count; i++)
+	{
+		weight_set[i] = qat->unquantized_value[weight_set8[i]] * (1.0f / 64.0f);
+	}
+
+	for (int i = 0; i < partition_count; i++)
+	{
+		vfloat4 rgba_sum(1e-17f);
+		vfloat4 rgba_weight_sum(1e-17f);
+
+		int texelcount = pt->partition_texel_count[i];
+		const uint8_t *texel_indexes = pt->texels_of_partition[i];
+
+		promise(texelcount > 0);
+		for (int j = 0; j < texelcount; j++)
+		{
+			int tix = texel_indexes[j];
+
+			vfloat4 rgba = blk->texel(tix);
+			vfloat4 error_weight(ewb->texel_weight_r[tix], ewb->texel_weight_g[tix], ewb->texel_weight_b[tix], ewb->texel_weight_a[tix]);
+
+			rgba_sum = rgba_sum + (rgba * error_weight);
+			rgba_weight_sum = rgba_weight_sum + error_weight;
+		}
+
+		vfloat4 scale_direction = normalize((rgba_sum * (1.0f / rgba_weight_sum)).swz<0, 1, 2>());
+
+		float scale_max = 0.0f;
+		float scale_min = 1e10f;
+
+		float wmin1 = 1.0f;
+		float wmax1 = 0.0f;
+
+		vfloat4 left_sum    = vfloat4::zero();
+		vfloat4 middle_sum  = vfloat4::zero();
+		vfloat4 right_sum   = vfloat4::zero();
+
+		vfloat4 lmrs_sum = vfloat4(0.0f);
+
+		vfloat4 color_vec_x = vfloat4::zero();
+		vfloat4 color_vec_y = vfloat4::zero();
+
+		float2 scale_vec = float2(0.0f);
+
+		vfloat4 weight_weight_sum = vfloat4(1e-17f);
+		float psum = 1e-17f;
+
+		// FIXME: the loop below has too many responsibilities, making it inefficient.
+		for (int j = 0; j < texelcount; j++)
+		{
+			int tix = texel_indexes[j];
+
+			vfloat4 rgba = blk->texel(tix);
+			vfloat4 color_weight(ewb->texel_weight_r[tix], ewb->texel_weight_g[tix], ewb->texel_weight_b[tix], ewb->texel_weight_a[tix]);
+
+			vfloat4 color_weight3 = color_weight.swz<0, 1, 2>();
+			vfloat4 rgb = rgba.swz<0, 1, 2>();
+
+			// FIXME: move this calculation out to the color block.
+			float ls_weight = hadd_rgb_s(color_weight);
+
+			const uint8_t *texel_weights = dt->texel_weights_t4[tix];
+			const float *texel_weights_float = dt->texel_weights_float_t4[tix];
+			float idx0 = (weight_set[texel_weights[0]] * texel_weights_float[0]
+			            + weight_set[texel_weights[1]] * texel_weights_float[1])
+			           + (weight_set[texel_weights[2]] * texel_weights_float[2]
+			            + weight_set[texel_weights[3]] * texel_weights_float[3]);
+
+			float om_idx0 = 1.0f - idx0;
+			wmin1 = astc::min(idx0, wmin1);
+			wmax1 = astc::max(idx0, wmax1);
+
+			float scale = dot3_s(scale_direction, rgb);
+			scale_min = astc::min(scale, scale_min);
+			scale_max = astc::max(scale, scale_max);
+
+			vfloat4 left   = color_weight * (om_idx0 * om_idx0);
+			vfloat4 middle = color_weight * (om_idx0 * idx0);
+			vfloat4 right  = color_weight * (idx0 * idx0);
+
+			vfloat4 lmrs = vfloat4(om_idx0 * om_idx0,
+			                       om_idx0 * idx0,
+			                       idx0 * idx0,
+			                       0.0f) * ls_weight;
+
+			left_sum   = left_sum + left;
+			middle_sum = middle_sum + middle;
+			right_sum  = right_sum + right;
+
+			lmrs_sum = lmrs_sum + lmrs;
+
+			vfloat4 color_idx(idx0);
+			vfloat4 color_idx3(idx0);
+
+			vfloat4 cwprod = color_weight * rgba;
+			vfloat4 cwiprod = cwprod * color_idx;
+
+			color_vec_y = color_vec_y + cwiprod;
+			color_vec_x = color_vec_x + (cwprod - cwiprod);
+
+			scale_vec = scale_vec + float2(om_idx0, idx0) * (ls_weight * scale);
+
+			weight_weight_sum = weight_weight_sum + (color_weight3 * color_idx3);
+
+			psum += dot3_s(color_weight3 * color_idx3, color_idx3);
+		}
+
+		// calculations specific to mode #7, the HDR RGB-scale mode.
+		// FIXME: Can we skip this for LDR textures?
+		float red_sum   = color_vec_x.lane<0>() + color_vec_y.lane<0>();
+		float green_sum = color_vec_x.lane<1>() + color_vec_y.lane<1>();
+		float blue_sum  = color_vec_x.lane<2>() + color_vec_y.lane<2>();
+		float qsum = hadd_rgb_s(color_vec_y);
+
+		#ifdef DEBUG_CAPTURE_NAN
+		    fedisableexcept(FE_DIVBYZERO | FE_INVALID);
+		#endif
+
+		vfloat4 rgbovec = compute_rgbovec(rgba_weight_sum, weight_weight_sum,
+		                                  red_sum, green_sum, blue_sum, psum, qsum);
+		rgbo_vectors[i] = rgbovec;
+
+		// We will occasionally get a failure due to the use of a singular
+		// (non-invertible) matrix. Record whether such a failure has taken
+		// place; if it did, compute rgbo_vectors[] with a different method
+		// later on.
+		float chkval = dot_s(rgbovec, rgbovec);
+		int rgbo_fail = chkval != chkval;
+
+		// Initialize the luminance and scale vectors with a reasonable
+		//  default, just in case the subsequent calculation blows up.
+		#ifdef DEBUG_CAPTURE_NAN
+			fedisableexcept(FE_DIVBYZERO | FE_INVALID);
+		#endif
+
+		float scalediv = scale_min * (1.0f / astc::max(scale_max, 1e-10f));
+		scalediv = astc::clamp1f(scalediv);
+
+		#ifdef DEBUG_CAPTURE_NAN
+			feenableexcept(FE_DIVBYZERO | FE_INVALID);
+		#endif
+
+		vfloat4 sds = scale_direction * scale_max;
+
+		rgbs_vectors[i] = vfloat4(sds.lane<0>(), sds.lane<1>(), sds.lane<2>(), scalediv);
+
+		if (wmin1 >= wmax1 * 0.999f)
+		{
+			// if all weights in the partition were equal, then just take average
+			// of all colors in the partition and use that as both endpoint colors.
+			vfloat4 avg = (color_vec_x + color_vec_y) * (1.0f / rgba_weight_sum);
+
+			vmask4 notnan_mask = avg == avg;
+			ep->endpt0[i] = select(ep->endpt0[i], avg, notnan_mask);
+			ep->endpt1[i] = select(ep->endpt1[i], avg, notnan_mask);
+
+			rgbs_vectors[i] = vfloat4(sds.lane<0>(), sds.lane<1>(), sds.lane<2>(), 1.0f);
+		}
+		else
+		{
+			// otherwise, complete the analytic calculation of ideal-endpoint-values
+			// for the given set of texel weights and pixel colors.
+
+			#ifdef DEBUG_CAPTURE_NAN
+			    fedisableexcept(FE_DIVBYZERO | FE_INVALID);
+			#endif
+
+			vfloat4 color_det1 = (left_sum * right_sum) - (middle_sum * middle_sum);
+			vfloat4 color_rdet1 = 1.0f / color_det1;
+
+			float ls_det1  = (lmrs_sum.lane<0>() * lmrs_sum.lane<2>()) - (lmrs_sum.lane<1>() * lmrs_sum.lane<1>());
+			float ls_rdet1 = 1.0f / ls_det1;
+
+			vfloat4 color_mss1 = (left_sum * left_sum)
+			                   + (2.0f * middle_sum * middle_sum)
+			                   + (right_sum * right_sum);
+
+			float ls_mss1 = (lmrs_sum.lane<0>() * lmrs_sum.lane<0>())
+			              + (2.0f * lmrs_sum.lane<1>() * lmrs_sum.lane<1>())
+			              + (lmrs_sum.lane<2>() * lmrs_sum.lane<2>());
+
+			vfloat4 ep0 = (right_sum * color_vec_x - middle_sum * color_vec_y) * color_rdet1;
+			vfloat4 ep1 = (left_sum * color_vec_y - middle_sum * color_vec_x) * color_rdet1;
+
+			vmask4 det_mask = abs(color_det1) > (color_mss1 * 1e-4f);
+			vmask4 notnan_mask = (ep0 == ep0) & (ep1 == ep1);
+			vmask4 full_mask = det_mask & notnan_mask;
+
+			ep->endpt0[i] = select(ep->endpt0[i], ep0, full_mask);
+			ep->endpt1[i] = select(ep->endpt1[i], ep1, full_mask);
 
-			if (avgdif <= 0.0f)
+			float scale_ep0 = (lmrs_sum.lane<2>() * scale_vec.r - lmrs_sum.lane<1>() * scale_vec.g) * ls_rdet1;
+			float scale_ep1 = (lmrs_sum.lane<0>() * scale_vec.g - lmrs_sum.lane<1>() * scale_vec.r) * ls_rdet1;
+
+			if (fabsf(ls_det1) > (ls_mss1 * 1e-4f) && scale_ep0 == scale_ep0 && scale_ep1 == scale_ep1 && scale_ep0 < scale_ep1)
 			{
-				avgdif = 0.0f;
+				float scalediv2 = scale_ep0 * (1.0f / scale_ep1);
+				vfloat4 sdsm = scale_direction * scale_ep1;
+				rgbs_vectors[i] = vfloat4(sdsm.lane<0>(), sdsm.lane<1>(), sdsm.lane<2>(), scalediv2);
 			}
 
-			float4 avg = (v0 + v1) * 0.5f;
-			float4 ep0 = avg - float4(avgdif, avgdif, avgdif, avgdif) * 0.5f;
+			#ifdef DEBUG_CAPTURE_NAN
+				feenableexcept(FE_DIVBYZERO | FE_INVALID);
+			#endif
+		}
+
+		// if the calculation of an RGB-offset vector failed, try to compute
+		// a somewhat-sensible value anyway
+		if (rgbo_fail)
+		{
+			vfloat4 v0 = ep->endpt0[i];
+			vfloat4 v1 = ep->endpt1[i];
+			float avgdif = ((v1.lane<0>() - v0.lane<0>()) + (v1.lane<1>() - v0.lane<1>()) + (v1.lane<2>() - v0.lane<2>())) * (1.0f / 3.0f);
+			avgdif = astc::max(avgdif, 0.0f);
+
+			vfloat4 avg = (v0 + v1) * 0.5f;
+			vfloat4 ep0 = avg - vfloat4(avgdif) * 0.5f;
 
-			rgbo_vectors[i] = float4(ep0.r, ep0.g, ep0.b, avgdif);
+			rgbo_vectors[i] = vfloat4(ep0.lane<0>(), ep0.lane<1>(), ep0.lane<2>(), avgdif);
 		}
 	}
 }
diff --git a/libkram/astc-encoder/astcenc_image.cpp b/libkram/astc-encoder/astcenc_image.cpp
index 50eceffc..1d67c01b 100644
--- a/libkram/astc-encoder/astcenc_image.cpp
+++ b/libkram/astc-encoder/astcenc_image.cpp
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2020 Arm Limited
+// Copyright 2011-2021 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -24,256 +24,108 @@
 
 #include "astcenc_internal.h"
 
-// hack in 2d array support for f32 on encode, and u8 on decode
-#define USE_2DARRAY 1
-
-// conversion functions between the LNS representation and the FP16 representation.
-static float float_to_lns(float p)
-{
-	if (astc::isnan(p) || p <= 1.0f / 67108864.0f)
-	{
-		// underflow or NaN value, return 0.
-		// We count underflow if the input value is smaller than 2^-26.
-		return 0.0f;
-	}
-
-	if (fabsf(p) >= 65536.0f)
-	{
-		// overflow, return a +INF value
-		return 65535.0f;
-	}
-
-	int expo;
-	float normfrac = frexpf(p, &expo);
-	float p1;
-	if (expo < -13)
-	{
-		// input number is smaller than 2^-14. In this case, multiply by 2^25.
-		p1 = p * 33554432.0f;
-		expo = 0;
-	}
-	else
-	{
-		expo += 14;
-		p1 = (normfrac - 0.5f) * 4096.0f;
-	}
-
-	if (p1 < 384.0f)
-		p1 *= 4.0f / 3.0f;
-	else if (p1 <= 1408.0f)
-		p1 += 128.0f;
-	else
-		p1 = (p1 + 512.0f) * (4.0f / 5.0f);
-
-	p1 += ((float)expo) * 2048.0f;
-	return p1 + 1.0f;
-}
-
-static uint16_t lns_to_sf16(uint16_t p)
-{
-	uint16_t mc = p & 0x7FF;
-	uint16_t ec = p >> 11;
-	uint16_t mt;
-	if (mc < 512)
-		mt = 3 * mc;
-	else if (mc < 1536)
-		mt = 4 * mc - 512;
-	else
-		mt = 5 * mc - 2048;
-
-	uint16_t res = (ec << 10) | (mt >> 3);
-	if (res >= 0x7BFF)
-		res = 0x7BFF;
-	return res;
-}
-
-// conversion function from 16-bit LDR value to FP16.
-// note: for LDR interpolation, it is impossible to get a denormal result;
-// this simplifies the conversion.
-// FALSE; we can receive a very small UNORM16 through the constant-block.
-uint16_t unorm16_to_sf16(uint16_t p)
-{
-	if (p == 0xFFFF)
-		return 0x3C00;			// value of 1.0 .
-	if (p < 4)
-		return p << 8;
-
-	int lz = clz32(p) - 16;
-	p <<= (lz + 1);
-	p >>= 6;
-	p |= (14 - lz) << 10;
-	return p;
-}
-
 void imageblock_initialize_deriv(
-	const imageblock* pb,
+	const imageblock* blk,
 	int pixelcount,
-	float4* dptr
+	vfloat4* dptr
 ) {
+	// TODO: For LDR on the current codec we can skip this if no LNS and just
+	// early-out as we use the same LNS settings everywhere ...
 	for (int i = 0; i < pixelcount; i++)
 	{
-		// compute derivatives for RGB first
-		if (pb->rgb_lns[i])
+		vfloat4 derv_unorm(65535.0f);
+		vfloat4 derv_lns = vfloat4::zero();
+
+		// TODO: Pack these into bits and avoid the disjoint fetch
+		int rgb_lns = blk->rgb_lns[i];
+		int a_lns = blk->alpha_lns[i];
+
+		// Compute derivatives if we have any use of LNS
+		if (rgb_lns || a_lns)
 		{
-			float3 fdata = float3(pb->data_r[i], pb->data_g[i], pb->data_b[i]);
-			fdata.r = sf16_to_float(lns_to_sf16((uint16_t)fdata.r));
-			fdata.g = sf16_to_float(lns_to_sf16((uint16_t)fdata.g));
-			fdata.b = sf16_to_float(lns_to_sf16((uint16_t)fdata.b));
-
-			float r = MAX(fdata.r, 6e-5f);
-			float g = MAX(fdata.g, 6e-5f);
-			float b = MAX(fdata.b, 6e-5f);
-
-			float rderiv = (float_to_lns(r * 1.05f) - float_to_lns(r)) / (r * 0.05f);
-			float gderiv = (float_to_lns(g * 1.05f) - float_to_lns(g)) / (g * 0.05f);
-			float bderiv = (float_to_lns(b * 1.05f) - float_to_lns(b)) / (b * 0.05f);
-
-			// the derivative may not actually take values smaller than 1/32 or larger than 2^25;
-			// if it does, we clamp it.
-			if (rderiv < (1.0f / 32.0f))
-			{
-				rderiv = (1.0f / 32.0f);
-			}
-			else if (rderiv > 33554432.0f)
-			{
-				rderiv = 33554432.0f;
-			}
+			vfloat4 data = blk->texel(i);
+			vint4 datai = lns_to_sf16(float_to_int(data));
 
-			if (gderiv < (1.0f / 32.0f))
-			{
-				gderiv = (1.0f / 32.0f);
-			}
-			else if (gderiv > 33554432.0f)
-			{
-				gderiv = 33554432.0f;
-			}
+			vfloat4 dataf = float16_to_float(datai);
+			dataf = max(dataf, 6e-5f);
 
-			if (bderiv < (1.0f / 32.0f))
-			{
-				bderiv = (1.0f / 32.0f);
-			}
-			else if (bderiv > 33554432.0f)
-			{
-				bderiv = 33554432.0f;
-			}
+			vfloat4 data_lns1 = dataf * 1.05f;
+			data_lns1 = float_to_lns(data_lns1);
 
-			dptr->r = rderiv;
-			dptr->g = gderiv;
-			dptr->b = bderiv;
-		}
-		else
-		{
-			dptr->r = 65535.0f;
-			dptr->g = 65535.0f;
-			dptr->b = 65535.0f;
-		}
+			vfloat4 data_lns2 = dataf;
+			data_lns2 = float_to_lns(data_lns2);
 
-		// then compute derivatives for Alpha
-		if (pb->alpha_lns[i])
-		{
-			float fdata = pb->data_a[i];
-			fdata = sf16_to_float(lns_to_sf16((uint16_t)fdata));
-
-			float a = MAX(fdata, 6e-5f);
-			float aderiv = (float_to_lns(a * 1.05f) - float_to_lns(a)) / (a * 0.05f);
-			// the derivative may not actually take values smaller than 1/32 or larger than 2^25;
-			// if it does, we clamp it.
-			if (aderiv < (1.0f / 32.0f))
-			{
-				aderiv = (1.0f / 32.0f);
-			}
-			else if (aderiv > 33554432.0f)
-			{
-				aderiv = 33554432.0f;
-			}
+			vfloat4 divisor_lns = dataf * 0.05f;
 
-			dptr->a = aderiv;
-		}
-		else
-		{
-			dptr->a = 65535.0f;
+			// Clamp derivatives between 1/32 and 2^25
+			float lo = 1.0f / 32.0f;
+			float hi = 33554432.0f;
+			derv_lns = clamp(lo, hi, (data_lns1 - data_lns2) / divisor_lns);
 		}
 
+		vint4 use_lns(rgb_lns, rgb_lns, rgb_lns, a_lns);
+		vmask4 lns_mask = use_lns != vint4::zero();
+		*dptr = select(derv_unorm, derv_lns, lns_mask);
 		dptr++;
 	}
 }
 
 // helper function to initialize the work-data from the orig-data
-void imageblock_initialize_work_from_orig(
-	imageblock* pb,
+static void imageblock_initialize_work_from_orig(
+	imageblock* blk,
 	int pixelcount
 ) {
-	pb->origin_texel = float4(pb->data_r[0], pb->data_g[0],
-	                          pb->data_b[0], pb->data_a[0]);
+	blk->origin_texel = blk->texel(0);
+
+	vfloat4 data_min(1e38f);
+	vfloat4 data_max(-1e38f);
+	bool grayscale = true;
 
 	for (int i = 0; i < pixelcount; i++)
 	{
-		float4 inc = float4(pb->data_r[i], pb->data_g[i],
-		                    pb->data_b[i], pb->data_a[i]);
+		vfloat4 data = blk->texel(i);
+		vfloat4 color_lns = vfloat4::zero();
+		vfloat4 color_unorm = data * 65535.0f;
 
-		if (pb->rgb_lns[i])
-		{
-			pb->data_r[i] = float_to_lns(inc.r);
-			pb->data_g[i] = float_to_lns(inc.g);
-			pb->data_b[i] = float_to_lns(inc.b);
-		}
-		else
-		{
-			pb->data_r[i] = inc.r * 65535.0f;
-			pb->data_g[i] = inc.g * 65535.0f;
-			pb->data_b[i] = inc.b * 65535.0f;
-		}
+		int rgb_lns = blk->rgb_lns[i];
+		int a_lns = blk->alpha_lns[i];
 
-		if (pb->alpha_lns[i])
+		if (rgb_lns || a_lns)
 		{
-			pb->data_a[i] = float_to_lns(inc.a);
+			color_lns = float_to_lns(data);
 		}
-		else
-		{
-			pb->data_a[i] = inc.a * 65535.0f;
-		}
-	}
-}
 
-// helper function to initialize the orig-data from the work-data
-void imageblock_initialize_orig_from_work(
-	imageblock* pb,
-	int pixelcount
-) {
-	for (int i = 0; i < pixelcount; i++)
-	{
-		float4 inc = float4(pb->data_r[i], pb->data_g[i],
-		                    pb->data_b[i], pb->data_a[i]);
+		vint4 use_lns(rgb_lns, rgb_lns, rgb_lns, a_lns);
+		vmask4 lns_mask = use_lns != vint4::zero();
+		data = select(color_unorm, color_lns, lns_mask);
 
-		if (pb->rgb_lns[i])
-		{
-			pb->data_r[i] = sf16_to_float(lns_to_sf16((uint16_t)inc.r));
-			pb->data_g[i] = sf16_to_float(lns_to_sf16((uint16_t)inc.g));
-			pb->data_b[i] = sf16_to_float(lns_to_sf16((uint16_t)inc.b));
-		}
-		else
-		{
-			pb->data_r[i] = sf16_to_float(unorm16_to_sf16((uint16_t)inc.r));
-			pb->data_g[i] = sf16_to_float(unorm16_to_sf16((uint16_t)inc.g));
-			pb->data_b[i] = sf16_to_float(unorm16_to_sf16((uint16_t)inc.b));
-		}
+		// Compute block metadata
+		data_min = min(data_min, data);
+		data_max = max(data_max, data);
 
-		if (pb->alpha_lns[i])
+		if (grayscale && (data.lane<0>() != data.lane<1>() || data.lane<0>() != data.lane<2>()))
 		{
-			pb->data_a[i] = sf16_to_float(lns_to_sf16((uint16_t)inc.a));
-		}
-		else
-		{
-			pb->data_a[i] = sf16_to_float(unorm16_to_sf16((uint16_t)inc.a));
+			grayscale = false;
 		}
+
+		// Store block data
+		blk->data_r[i] = data.lane<0>();
+		blk->data_g[i] = data.lane<1>();
+		blk->data_b[i] = data.lane<2>();
+		blk->data_a[i] = data.lane<3>();
 	}
+
+	// Store block metadata
+	blk->data_min = data_min;
+	blk->data_max = data_max;
+	blk->grayscale = grayscale;
 }
 
 // fetch an imageblock from the input file.
 void fetch_imageblock(
 	astcenc_profile decode_mode,
 	const astcenc_image& img,
-	imageblock* pb,	// picture-block to initialize with image data
+	imageblock* blk,	// picture-block to initialize with image data
 	const block_size_descriptor* bsd,
 	// position in texture.
 	int xpos,
@@ -285,9 +137,9 @@ void fetch_imageblock(
 	int ysize = img.dim_y;
 	int zsize = img.dim_z;
 
-	pb->xpos = xpos;
-	pb->ypos = ypos;
-	pb->zpos = zpos;
+	blk->xpos = xpos;
+	blk->ypos = ypos;
+	blk->zpos = zpos;
 
 	// True if any non-identity swizzle
 	bool needs_swz = (swz.r != ASTCENC_SWZ_R) || (swz.g != ASTCENC_SWZ_G) ||
@@ -300,49 +152,24 @@ void fetch_imageblock(
 		data[ASTCENC_SWZ_0] = 0x00;
 		data[ASTCENC_SWZ_1] = 0xFF;
 
-#if USE_2DARRAY
-        uint8_t* data8 = static_cast<uint8_t*>(img.data);
-#else
-		uint8_t*** data8 = static_cast<uint8_t***>(img.data);
-#endif
 		for (int z = 0; z < bsd->zdim; z++)
 		{
-            int zi = zpos + z;
-            if (zi < 0)
-                zi = 0;
-            if (zi >= zsize)
-                zi = zsize - 1;
+			int zi = astc::min(zpos + z, zsize - 1);
+			uint8_t* data8 = static_cast<uint8_t*>(img.data[zi]);
 
 			for (int y = 0; y < bsd->ydim; y++)
 			{
-                int yi = ypos + y;
-                if (yi < 0)
-                    yi = 0;
-                if (yi >= ysize)
-                    yi = ysize - 1;
-                
+				int yi = astc::min(ypos + y, ysize - 1);
+
 				for (int x = 0; x < bsd->xdim; x++)
 				{
-					int xi = xpos + x;
-					if (xi < 0)
-						xi = 0;
-					if (xi >= xsize)
-						xi = xsize - 1;
-#if USE_2DARRAY
-                    int px = (yi * xsize + xi) * 4;
-                    assert(zi == 0);
-                    assert(px >= 0 && px < (xsize * ysize * 4));
-                    
-                    int r = data8[px + 0];
-                    int g = data8[px + 1];
-                    int b = data8[px + 2];
-                    int a = data8[px + 3];
-#else
-					int r = data8[zi][yi][4 * xi    ];
-					int g = data8[zi][yi][4 * xi + 1];
-					int b = data8[zi][yi][4 * xi + 2];
-					int a = data8[zi][yi][4 * xi + 3];
-#endif
+					int xi = astc::min(xpos + x, xsize - 1);
+
+					int r = data8[(4 * xsize * yi) + (4 * xi    )];
+					int g = data8[(4 * xsize * yi) + (4 * xi + 1)];
+					int b = data8[(4 * xsize * yi) + (4 * xi + 2)];
+					int a = data8[(4 * xsize * yi) + (4 * xi + 3)];
+
 					if (needs_swz)
 					{
 						data[ASTCENC_SWZ_R] = r;
@@ -356,10 +183,10 @@ void fetch_imageblock(
 						a = data[swz.a];
 					}
 
-					pb->data_r[idx] = r / 255.0f;
-					pb->data_g[idx] = g / 255.0f;
-					pb->data_b[idx] = b / 255.0f;
-					pb->data_a[idx] = a / 255.0f;
+					blk->data_r[idx] = static_cast<float>(r) / 255.0f;
+					blk->data_g[idx] = static_cast<float>(g) / 255.0f;
+					blk->data_b[idx] = static_cast<float>(b) / 255.0f;
+					blk->data_a[idx] = static_cast<float>(a) / 255.0f;
 					idx++;
 				}
 			}
@@ -371,34 +198,23 @@ void fetch_imageblock(
 		data[ASTCENC_SWZ_0] = 0x0000;
 		data[ASTCENC_SWZ_1] = 0x3C00;
 
-		uint16_t*** data16 = static_cast<uint16_t***>(img.data);
 		for (int z = 0; z < bsd->zdim; z++)
 		{
+			int zi = astc::min(zpos + z, zsize - 1);
+			uint16_t* data16 = static_cast<uint16_t*>(img.data[zi]);
+
 			for (int y = 0; y < bsd->ydim; y++)
 			{
+				int yi = astc::min(ypos + y, ysize - 1);
+
 				for (int x = 0; x < bsd->xdim; x++)
 				{
-					int xi = xpos + x;
-					int yi = ypos + y;
-					int zi = zpos + z;
-					// clamp XY coordinates to the picture.
-					if (xi < 0)
-						xi = 0;
-					if (yi < 0)
-						yi = 0;
-					if (zi < 0)
-						zi = 0;
-					if (xi >= xsize)
-						xi = xsize - 1;
-					if (yi >= ysize)
-						yi = ysize - 1;
-					if (zi >= ysize)
-						zi = zsize - 1;
-
-					int r = data16[zi][yi][4 * xi    ];
-					int g = data16[zi][yi][4 * xi + 1];
-					int b = data16[zi][yi][4 * xi + 2];
-					int a = data16[zi][yi][4 * xi + 3];
+					int xi = astc::min(xpos + x, xsize - 1);
+
+					int r = data16[(4 * xsize * yi) + (4 * xi    )];
+					int g = data16[(4 * xsize * yi) + (4 * xi + 1)];
+					int b = data16[(4 * xsize * yi) + (4 * xi + 2)];
+					int a = data16[(4 * xsize * yi) + (4 * xi + 3)];
 
 					if (needs_swz)
 					{
@@ -413,10 +229,11 @@ void fetch_imageblock(
 						a = data[swz.a];
 					}
 
-					pb->data_r[idx] = MAX(sf16_to_float(r), 1e-8f);
-					pb->data_g[idx] = MAX(sf16_to_float(g), 1e-8f);
-					pb->data_b[idx] = MAX(sf16_to_float(b), 1e-8f);
-					pb->data_a[idx] = MAX(sf16_to_float(a), 1e-8f);
+					vfloat4 dataf = max(float16_to_float(vint4(r, g, b, a)), 1e-8f);
+					blk->data_r[idx] = dataf.lane<0>();
+					blk->data_g[idx] = dataf.lane<1>();
+					blk->data_b[idx] = dataf.lane<2>();
+					blk->data_a[idx] = dataf.lane<3>();
 					idx++;
 				}
 			}
@@ -430,76 +247,25 @@ void fetch_imageblock(
 		data[ASTCENC_SWZ_0] = 0.0f;
 		data[ASTCENC_SWZ_1] = 1.0f;
 
-#if USE_2DARRAY
-        float4* data32 = static_cast<float4*>(img.data);
-#else
-		float*** data32 = static_cast<float***>(img.data);
-#endif
 		for (int z = 0; z < bsd->zdim; z++)
 		{
-            int zi = zpos + z;
-            if (zi < 0)
-                zi = 0;
-            if (zi >= ysize)
-                zi = zsize - 1;
-            
+			int zi = astc::min(zpos + z, zsize - 1);
+			float* data32 = static_cast<float*>(img.data[zi]);
+
 			for (int y = 0; y < bsd->ydim; y++)
 			{
-                int yi = ypos + y;
-                if (yi < 0)
-                    yi = 0;
-                if (yi >= ysize)
-                    yi = ysize - 1;
-                
+				int yi = astc::min(ypos + y, ysize - 1);
+
 				for (int x = 0; x < bsd->xdim; x++)
 				{
-                    // clamp XY coordinates to the picture.
-                    int xi = xpos + x;
-					if (xi < 0)
-						xi = 0;
-					if (xi >= xsize)
-						xi = xsize - 1;
-       
-#if USE_2DARRAY
-                    int px = (yi * xsize + xi); // * 4;
-                    assert(zi == 0);
-                    assert(px >= 0 && px < (xsize * ysize));
-                    float4 val = data32[px];
-                    val = max(val, float4(1e-8f)); // why can't this 0, the U8 Path does?
-                    
-                    if (needs_swz)
-                    {
-                        // prob best as a swizzle, and then select in 0/1 elements
-                        // instead of a 6 array lookup which isn't simd compatible.
-                        float r = val.r;
-                        float g = val.g;
-                        float b = val.b;
-                        float a = val.a;
-                        
-                        data[ASTCENC_SWZ_R] = r;
-                        data[ASTCENC_SWZ_G] = g;
-                        data[ASTCENC_SWZ_B] = b;
-                        data[ASTCENC_SWZ_A] = a;
-
-                        val.r = data[swz.r];
-                        val.g = data[swz.g];
-                        val.b = data[swz.b];
-                        val.a = data[swz.a];
-                    }
-                    
-                    // ugh, this pulls out of simd to planar
-                    pb->data_r[idx] = val.r;
-                    pb->data_g[idx] = val.g;
-                    pb->data_b[idx] = val.b;
-                    pb->data_a[idx] = val.a;
-                    
-#else
-					float r = data32[zi][yi][4 * xi    ];
-					float g = data32[zi][yi][4 * xi + 1];
-					float b = data32[zi][yi][4 * xi + 2];
-					float a = data32[zi][yi][4 * xi + 3];
-
-                    if (needs_swz)
+					int xi = astc::min(xpos + x, xsize - 1);
+
+					float r = data32[(4 * xsize * yi) + (4 * xi    )];
+					float g = data32[(4 * xsize * yi) + (4 * xi + 1)];
+					float b = data32[(4 * xsize * yi) + (4 * xi + 2)];
+					float a = data32[(4 * xsize * yi) + (4 * xi + 3)];
+
+					if (needs_swz)
 					{
 						data[ASTCENC_SWZ_R] = r;
 						data[ASTCENC_SWZ_G] = g;
@@ -511,12 +277,11 @@ void fetch_imageblock(
 						b = data[swz.b];
 						a = data[swz.a];
 					}
-    
-					pb->data_r[idx] = MAX(r, 1e-8f);
-					pb->data_g[idx] = MAX(g, 1e-8f);
-					pb->data_b[idx] = MAX(b, 1e-8f);
-					pb->data_a[idx] = MAX(a, 1e-8f);
-#endif
+
+					blk->data_r[idx] = astc::max(r, 1e-8f);
+					blk->data_g[idx] = astc::max(g, 1e-8f);
+					blk->data_b[idx] = astc::max(b, 1e-8f);
+					blk->data_a[idx] = astc::max(a, 1e-8f);
 					idx++;
 				}
 			}
@@ -529,18 +294,17 @@ void fetch_imageblock(
 	// impose the choice on every pixel when encoding.
 	for (int i = 0; i < bsd->texel_count; i++)
 	{
-		pb->rgb_lns[i] = rgb_lns;
-		pb->alpha_lns[i] = alpha_lns;
-		pb->nan_texel[i] = 0;
+		blk->rgb_lns[i] = rgb_lns;
+		blk->alpha_lns[i] = alpha_lns;
+		blk->nan_texel[i] = 0;
 	}
 
-	imageblock_initialize_work_from_orig(pb, bsd->texel_count);
-	update_imageblock_flags(pb, bsd->xdim, bsd->ydim, bsd->zdim);
+	imageblock_initialize_work_from_orig(blk, bsd->texel_count);
 }
 
 void write_imageblock(
 	astcenc_image& img,
-	const imageblock* pb,	// picture-block to initialize with image data. We assume that orig_data is valid.
+	const imageblock* blk,	// picture-block to initialize with image data. We assume that orig_data is valid.
 	const block_size_descriptor* bsd,
 	// position to write the block to
 	int xpos,
@@ -548,11 +312,22 @@ void write_imageblock(
 	int zpos,
 	astcenc_swizzle swz
 ) {
-	const uint8_t *nptr = pb->nan_texel;
+	const uint8_t *nptr = blk->nan_texel;
 	int xsize = img.dim_x;
 	int ysize = img.dim_y;
 	int zsize = img.dim_z;
 
+	int x_start = xpos;
+	int x_end = std::min(xsize, xpos + bsd->xdim);
+	int x_nudge = bsd->xdim - (x_end - x_start);
+
+	int y_start = ypos;
+	int y_end = std::min(ysize, ypos + bsd->ydim);
+	int y_nudge = (bsd->ydim - (y_end - y_start)) * bsd->xdim;
+
+	int z_start = zpos;
+	int z_end = std::min(zsize, zpos + bsd->zdim);
+
 	float data[7];
 	data[ASTCENC_SWZ_0] = 0.0f;
 	data[ASTCENC_SWZ_1] = 1.0f;
@@ -568,280 +343,174 @@ void write_imageblock(
 	int idx = 0;
 	if (img.data_type == ASTCENC_TYPE_U8)
 	{
-#if USE_2DARRAY
-        uint8_t* data8 = static_cast<uint8_t*>(img.data);
-#else
-		uint8_t*** data8 = static_cast<uint8_t***>(img.data);
-#endif
-		for (int z = 0; z < bsd->zdim; z++)
+		for (int z = z_start; z < z_end; z++)
 		{
-			for (int y = 0; y < bsd->ydim; y++)
+			// Fetch the image plane
+			uint8_t* data8 = static_cast<uint8_t*>(img.data[z]);
+
+			for (int y = y_start; y < y_end; y++)
 			{
-				for (int x = 0; x < bsd->xdim; x++)
+				for (int x = x_start; x < x_end; x++)
 				{
-					int xi = xpos + x;
-					int yi = ypos + y;
-					int zi = zpos + z;
+					vint4 colori = vint4::zero();
 
-					if (xi >= 0 && yi >= 0 && zi >= 0 && xi < xsize && yi < ysize && zi < zsize)
+					if (*nptr)
 					{
-						int ri, gi, bi, ai;
+						// Can't display NaN - show magenta error color
+						colori = vint4(0xFF, 0x00, 0xFF, 0xFF);
+					}
+					else if (needs_swz)
+					{
+						data[ASTCENC_SWZ_R] = blk->data_r[idx];
+						data[ASTCENC_SWZ_G] = blk->data_g[idx];
+						data[ASTCENC_SWZ_B] = blk->data_b[idx];
+						data[ASTCENC_SWZ_A] = blk->data_a[idx];
 
-						if (*nptr)
-						{
-							// NaN-pixel, but we can't display it. Display purple instead.
-							ri = 0xFF;
-							gi = 0x00;
-							bi = 0xFF;
-							ai = 0xFF;
-						}
-						else if (needs_swz)
+						if (needs_z)
 						{
-							data[ASTCENC_SWZ_R] = pb->data_r[idx];
-							data[ASTCENC_SWZ_G] = pb->data_g[idx];
-							data[ASTCENC_SWZ_B] = pb->data_b[idx];
-							data[ASTCENC_SWZ_A] = pb->data_a[idx];
-
-							if (needs_z)
+							float xcoord = (data[0] * 2.0f) - 1.0f;
+							float ycoord = (data[3] * 2.0f) - 1.0f;
+							float zcoord = 1.0f - xcoord * xcoord - ycoord * ycoord;
+							if (zcoord < 0.0f)
 							{
-								float xcoord = (data[0] * 2.0f) - 1.0f;
-								float ycoord = (data[3] * 2.0f) - 1.0f;
-								float zcoord = 1.0f - xcoord * xcoord - ycoord * ycoord;
-								if (zcoord < 0.0f)
-								{
-									zcoord = 0.0f;
-								}
-								data[ASTCENC_SWZ_Z] = (astc::sqrt(zcoord) * 0.5f) + 0.5f;
+								zcoord = 0.0f;
 							}
-
-							ri = astc::flt2int_rtn(MIN(data[swz.r], 1.0f) * 255.0f);
-							gi = astc::flt2int_rtn(MIN(data[swz.g], 1.0f) * 255.0f);
-							bi = astc::flt2int_rtn(MIN(data[swz.b], 1.0f) * 255.0f);
-							ai = astc::flt2int_rtn(MIN(data[swz.a], 1.0f) * 255.0f);
+							data[ASTCENC_SWZ_Z] = (astc::sqrt(zcoord) * 0.5f) + 0.5f;
 						}
-						else
-						{
-							ri = astc::flt2int_rtn(MIN(pb->data_r[idx], 1.0f) * 255.0f);
-							gi = astc::flt2int_rtn(MIN(pb->data_g[idx], 1.0f) * 255.0f);
-							bi = astc::flt2int_rtn(MIN(pb->data_b[idx], 1.0f) * 255.0f);
-							ai = astc::flt2int_rtn(MIN(pb->data_a[idx], 1.0f) * 255.0f);
-						}
-#if USE_2DARRAY
-                        int px = (yi * xsize + xi) * 4;
-                        assert(zi == 0);
-                        assert(px >= 0 && px < (xsize * ysize * 4));
-                        
-                        data8[px + 0] = ri;
-                        data8[px + 1] = gi;
-                        data8[px + 2] = bi;
-                        data8[px + 3] = ai;
-
-#else
-						data8[zi][yi][4 * xi    ] = ri;
-						data8[zi][yi][4 * xi + 1] = gi;
-						data8[zi][yi][4 * xi + 2] = bi;
-						data8[zi][yi][4 * xi + 3] = ai;
-#endif
+
+						vfloat4 color = vfloat4(data[swz.r], data[swz.g], data[swz.b], data[swz.a]);
+						colori = float_to_int_rtn(min(color, 1.0f) * 255.0f);
 					}
+					else
+					{
+						vfloat4 color = blk->texel(idx);
+						colori = float_to_int_rtn(min(color, 1.0f) * 255.0f);
+					}
+
+					colori = pack_low_bytes(colori);
+					store_nbytes(colori, data8 + (4 * xsize * y) + (4 * x    ));
+
 					idx++;
 					nptr++;
 				}
+				idx += x_nudge;
+				nptr += x_nudge;
 			}
+			idx += y_nudge;
+			nptr += y_nudge;
 		}
 	}
 	else if (img.data_type == ASTCENC_TYPE_F16)
 	{
-		uint16_t*** data16 = static_cast<uint16_t***>(img.data);
-		for (int z = 0; z < bsd->zdim; z++)
+		for (int z = z_start; z < z_end; z++)
 		{
-			for (int y = 0; y < bsd->ydim; y++)
+			// Fetch the image plane
+			uint16_t* data16 = static_cast<uint16_t*>(img.data[z]);
+
+			for (int y = y_start; y < y_end; y++)
 			{
-				for (int x = 0; x < bsd->xdim; x++)
+				for (int x = x_start; x < x_end; x++)
 				{
-					int xi = xpos + x;
-					int yi = ypos + y;
-					int zi = zpos + z;
+					vint4 color;
 
-					if (xi >= 0 && yi >= 0 && zi >= 0 && xi < xsize && yi < ysize && zi < zsize)
+					if (*nptr)
 					{
-						int ri, gi, bi, ai;
+						color = vint4(0xFFFF);
+					}
+					else if (needs_swz)
+					{
+						data[ASTCENC_SWZ_R] = blk->data_r[idx];
+						data[ASTCENC_SWZ_G] = blk->data_g[idx];
+						data[ASTCENC_SWZ_B] = blk->data_b[idx];
+						data[ASTCENC_SWZ_A] = blk->data_a[idx];
 
-						if (*nptr)
-						{
-							ri = 0xFFFF;
-							gi = 0xFFFF;
-							bi = 0xFFFF;
-							ai = 0xFFFF;
-						}
-						else if (needs_swz)
+						if (needs_z)
 						{
-							data[ASTCENC_SWZ_R] = pb->data_r[idx];
-							data[ASTCENC_SWZ_G] = pb->data_g[idx];
-							data[ASTCENC_SWZ_B] = pb->data_b[idx];
-							data[ASTCENC_SWZ_A] = pb->data_a[idx];
-
-							if (needs_z)
+							float xN = (data[0] * 2.0f) - 1.0f;
+							float yN = (data[3] * 2.0f) - 1.0f;
+							float zN = 1.0f - xN * xN - yN * yN;
+							if (zN < 0.0f)
 							{
-								float xN = (data[0] * 2.0f) - 1.0f;
-								float yN = (data[3] * 2.0f) - 1.0f;
-								float zN = 1.0f - xN * xN - yN * yN;
-								if (zN < 0.0f)
-								{
-									zN = 0.0f;
-								}
-								data[ASTCENC_SWZ_Z] = (astc::sqrt(zN) * 0.5f) + 0.5f;
+								zN = 0.0f;
 							}
-
-							ri = float_to_sf16(data[swz.r], SF_NEARESTEVEN);
-							gi = float_to_sf16(data[swz.g], SF_NEARESTEVEN);
-							bi = float_to_sf16(data[swz.b], SF_NEARESTEVEN);
-							ai = float_to_sf16(data[swz.a], SF_NEARESTEVEN);
-						}
-						else
-						{
-							ri = float_to_sf16(pb->data_r[idx], SF_NEARESTEVEN);
-							gi = float_to_sf16(pb->data_g[idx], SF_NEARESTEVEN);
-							bi = float_to_sf16(pb->data_b[idx], SF_NEARESTEVEN);
-							ai = float_to_sf16(pb->data_a[idx], SF_NEARESTEVEN);
+							data[ASTCENC_SWZ_Z] = (astc::sqrt(zN) * 0.5f) + 0.5f;
 						}
 
-						data16[zi][yi][4 * xi    ] = ri;
-						data16[zi][yi][4 * xi + 1] = gi;
-						data16[zi][yi][4 * xi + 2] = bi;
-						data16[zi][yi][4 * xi + 3] = ai;
+						vfloat4 colorf(data[swz.r], data[swz.g], data[swz.b], data[swz.a]);
+						color = float_to_float16(colorf);
 					}
+					else
+					{
+						vfloat4 colorf = blk->texel(idx);
+						color = float_to_float16(colorf);
+					}
+
+					data16[(4 * xsize * y) + (4 * x    )] = (uint16_t)color.lane<0>();
+					data16[(4 * xsize * y) + (4 * x + 1)] = (uint16_t)color.lane<1>();
+					data16[(4 * xsize * y) + (4 * x + 2)] = (uint16_t)color.lane<2>();
+					data16[(4 * xsize * y) + (4 * x + 3)] = (uint16_t)color.lane<3>();
+
 					idx++;
 					nptr++;
 				}
+				idx += x_nudge;
+				nptr += x_nudge;
 			}
+			idx += y_nudge;
+			nptr += y_nudge;
 		}
 	}
 	else // if (img.data_type == ASTCENC_TYPE_F32)
 	{
 		assert(img.data_type == ASTCENC_TYPE_F32);
-        
-		float*** data32 = static_cast<float***>(img.data);
-		for (int z = 0; z < bsd->zdim; z++)
+
+		for (int z = z_start; z < z_end; z++)
 		{
-			for (int y = 0; y < bsd->ydim; y++)
+			// Fetch the image plane
+			float* data32 = static_cast<float*>(img.data[z]);
+
+			for (int y = y_start; y < y_end; y++)
 			{
-				for (int x = 0; x < bsd->xdim; x++)
+				for (int x = x_start; x < x_end; x++)
 				{
-					int xi = xpos + x;
-					int yi = ypos + y;
-					int zi = zpos + z;
+					vfloat4 color = blk->texel(idx);
 
-					if (xi >= 0 && yi >= 0 && zi >= 0 && xi < xsize && yi < ysize && zi < zsize)
+					if (*nptr)
 					{
-						float rf, gf, bf, af;
+						color = vfloat4(std::numeric_limits<float>::quiet_NaN());
+					}
+					else if (needs_swz)
+					{
+						data[ASTCENC_SWZ_R] = color.lane<0>();
+						data[ASTCENC_SWZ_G] = color.lane<1>();
+						data[ASTCENC_SWZ_B] = color.lane<2>();
+						data[ASTCENC_SWZ_A] = color.lane<3>();
 
-						if (*nptr)
-						{
-							rf = std::numeric_limits<float>::quiet_NaN();
-							gf = std::numeric_limits<float>::quiet_NaN();
-							bf = std::numeric_limits<float>::quiet_NaN();
-							af = std::numeric_limits<float>::quiet_NaN();
-						}
-						else if (needs_swz)
+						if (needs_z)
 						{
-							data[ASTCENC_SWZ_R] = pb->data_r[idx];
-							data[ASTCENC_SWZ_G] = pb->data_g[idx];
-							data[ASTCENC_SWZ_B] = pb->data_b[idx];
-							data[ASTCENC_SWZ_A] = pb->data_a[idx];
-
-							if (needs_z)
+							float xN = (data[0] * 2.0f) - 1.0f;
+							float yN = (data[3] * 2.0f) - 1.0f;
+							float zN = 1.0f - xN * xN - yN * yN;
+							if (zN < 0.0f)
 							{
-								float xN = (data[0] * 2.0f) - 1.0f;
-								float yN = (data[3] * 2.0f) - 1.0f;
-								float zN = 1.0f - xN * xN - yN * yN;
-								if (zN < 0.0f)
-								{
-									zN = 0.0f;
-								}
-								data[ASTCENC_SWZ_Z] = (astc::sqrt(zN) * 0.5f) + 0.5f;
+								zN = 0.0f;
 							}
-
-							rf = data[swz.r];
-							gf = data[swz.g];
-							bf = data[swz.b];
-							af = data[swz.a];
-						}
-						else
-						{
-							rf = pb->data_r[idx];
-							gf = pb->data_g[idx];
-							bf = pb->data_b[idx];
-							af = pb->data_a[idx];
+							data[ASTCENC_SWZ_Z] = (astc::sqrt(zN) * 0.5f) + 0.5f;
 						}
 
-						data32[zi][yi][4 * xi    ] = rf;
-						data32[zi][yi][4 * xi + 1] = gf;
-						data32[zi][yi][4 * xi + 2] = bf;
-						data32[zi][yi][4 * xi + 3] = af;
+						color = vfloat4(data[swz.r], data[swz.g], data[swz.b], data[swz.a]);
 					}
+
+					store(color, data32 + (4 * xsize * y) + (4 * x    ));
+
 					idx++;
 					nptr++;
 				}
+				idx += x_nudge;
+				nptr += x_nudge;
 			}
+			idx += y_nudge;
+			nptr += y_nudge;
 		}
 	}
 }
-
-/*
-   For an imageblock, update its flags.
-   The updating is done based on data, not orig_data.
-*/
-void update_imageblock_flags(
-	imageblock* pb,
-	int xdim,
-	int ydim,
-	int zdim
-) {
-	float red_min = 1e38f, red_max = -1e38f;
-	float green_min = 1e38f, green_max = -1e38f;
-	float blue_min = 1e38f, blue_max = -1e38f;
-	float alpha_min = 1e38f, alpha_max = -1e38f;
-
-	int texels_per_block = xdim * ydim * zdim;
-
-	int grayscale = 1;
-
-	for (int i = 0; i < texels_per_block; i++)
-	{
-		float red = pb->data_r[i];
-		float green = pb->data_g[i];
-		float blue = pb->data_b[i];
-		float alpha = pb->data_a[i];
-		if (red < red_min)
-			red_min = red;
-		if (red > red_max)
-			red_max = red;
-		if (green < green_min)
-			green_min = green;
-		if (green > green_max)
-			green_max = green;
-		if (blue < blue_min)
-			blue_min = blue;
-		if (blue > blue_max)
-			blue_max = blue;
-		if (alpha < alpha_min)
-			alpha_min = alpha;
-		if (alpha > alpha_max)
-			alpha_max = alpha;
-
-		if (grayscale == 1 && (red != green || red != blue))
-		{
-			grayscale = 0;
-		}
-	}
-
-	pb->red_min = red_min;
-	pb->red_max = red_max;
-	pb->green_min = green_min;
-	pb->green_max = green_max;
-	pb->blue_min = blue_min;
-	pb->blue_max = blue_max;
-	pb->alpha_min = alpha_min;
-	pb->alpha_max = alpha_max;
-	pb->grayscale = grayscale;
-}
diff --git a/libkram/astc-encoder/astcenc_integer_sequence.cpp b/libkram/astc-encoder/astcenc_integer_sequence.cpp
index 63df91cd..d7079446 100644
--- a/libkram/astc-encoder/astcenc_integer_sequence.cpp
+++ b/libkram/astc-encoder/astcenc_integer_sequence.cpp
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2020 Arm Limited
+// Copyright 2011-2021 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -21,6 +21,8 @@
 
 #include "astcenc_internal.h"
 
+#include <array>
+
 // unpacked quint triplets <low,middle,high> for each packed-quint value
 static const uint8_t quints_of_integer[128][3] = {
 	{0, 0, 0}, {1, 0, 0}, {2, 0, 0}, {3, 0, 0},
@@ -329,94 +331,111 @@ static const uint8_t integer_of_trits[3][3][3][3][3] = {
 	}
 };
 
-static void find_number_of_bits_trits_quints(
-	int quantization_level,
-	int* bits,
-	int* trits,
-	int* quints
+/**
+ * @brief The number of bits, trits, and quints needed for a quant level.
+ */
+struct btq_count {
+	/**< The quantization level. */
+	uint8_t quant;
+
+	/**< The number of bits. */
+	uint8_t bits;
+
+	/**< The number of trits. */
+	uint8_t trits;
+
+	/**< The number of quints. */
+	uint8_t quints;
+};
+
+/**
+ * @brief The table of bits, trits, and quints needed for a quant encode.
+ */
+static const std::array<btq_count, 21> btq_counts = {{
+	{   QUANT_2, 1, 0, 0 },
+	{   QUANT_3, 0, 1, 0 },
+	{   QUANT_4, 2, 0, 0 },
+	{   QUANT_5, 0, 0, 1 },
+	{   QUANT_6, 1, 1, 0 },
+	{   QUANT_8, 3, 0, 0 },
+	{  QUANT_10, 1, 0, 1 },
+	{  QUANT_12, 2, 1, 0 },
+	{  QUANT_16, 4, 0, 0 },
+	{  QUANT_20, 2, 0, 1 },
+	{  QUANT_24, 3, 1, 0 },
+	{  QUANT_32, 5, 0, 0 },
+	{  QUANT_40, 3, 0, 1 },
+	{  QUANT_48, 4, 1, 0 },
+	{  QUANT_64, 6, 0, 0 },
+	{  QUANT_80, 4, 0, 1 },
+	{  QUANT_96, 5, 1, 0 },
+	{ QUANT_128, 7, 0, 0 },
+	{ QUANT_160, 5, 0, 1 },
+	{ QUANT_192, 6, 1, 0 },
+	{ QUANT_256, 8, 0, 0 }
+}};
+
+/**
+ * @brief The sequence scale, round, and divisors needed to compute sizing.
+ *
+ * The length of a quantized sequence in bits is:
+ *     (scale * <sequence_len> + round) / divisor
+ */
+struct ise_size {
+	/**< The quantization level. */
+	uint8_t quant;
+
+	/**< The scaling parameter. */
+	uint8_t scale;
+
+	/**< The rounding parameter. */
+	uint8_t round;
+
+	/**< The divisor parameter. */
+	uint8_t divisor;
+};
+
+/**
+ * @brief The table of scale, round, and divisors needed for quant sizing.
+ */
+static const std::array<ise_size, 21> ise_sizes = {{
+	{   QUANT_2,  1, 0, 1 },
+	{   QUANT_3,  8, 4, 5 },
+	{   QUANT_4,  2, 0, 1 },
+	{   QUANT_5,  7, 2, 3 },
+	{   QUANT_6, 13, 4, 5 },
+	{   QUANT_8,  3, 0, 1 },
+	{  QUANT_10, 10, 2, 3 },
+	{  QUANT_12, 18, 4, 5 },
+	{  QUANT_16,  4, 0, 1 },
+	{  QUANT_20, 13, 2, 3 },
+	{  QUANT_24, 23, 4, 5 },
+	{  QUANT_32,  5, 0, 1 },
+	{  QUANT_40, 16, 2, 3 },
+	{  QUANT_48, 28, 4, 5 },
+	{  QUANT_64,  6, 0, 1 },
+	{  QUANT_80, 19, 2, 3 },
+	{  QUANT_96, 33, 4, 5 },
+	{ QUANT_128,  7, 0, 1 },
+	{ QUANT_160, 22, 2, 3 },
+	{ QUANT_192, 38, 4, 5 },
+	{ QUANT_256,  8, 0, 1 }
+}};
+
+/* See header for documentation. */
+int get_ise_sequence_bitcount(
+	int items,
+	quant_method quant
 ) {
-	*bits = 0;
-	*trits = 0;
-	*quints = 0;
-	switch (quantization_level)
+	// Cope with out-of bounds values - input might be invalid
+	if (static_cast<size_t>(quant) >= ise_sizes.size())
 	{
-	case QUANT_2:
-		*bits = 1;
-		break;
-	case QUANT_3:
-		*bits = 0;
-		*trits = 1;
-		break;
-	case QUANT_4:
-		*bits = 2;
-		break;
-	case QUANT_5:
-		*bits = 0;
-		*quints = 1;
-		break;
-	case QUANT_6:
-		*bits = 1;
-		*trits = 1;
-		break;
-	case QUANT_8:
-		*bits = 3;
-		break;
-	case QUANT_10:
-		*bits = 1;
-		*quints = 1;
-		break;
-	case QUANT_12:
-		*bits = 2;
-		*trits = 1;
-		break;
-	case QUANT_16:
-		*bits = 4;
-		break;
-	case QUANT_20:
-		*bits = 2;
-		*quints = 1;
-		break;
-	case QUANT_24:
-		*bits = 3;
-		*trits = 1;
-		break;
-	case QUANT_32:
-		*bits = 5;
-		break;
-	case QUANT_40:
-		*bits = 3;
-		*quints = 1;
-		break;
-	case QUANT_48:
-		*bits = 4;
-		*trits = 1;
-		break;
-	case QUANT_64:
-		*bits = 6;
-		break;
-	case QUANT_80:
-		*bits = 4;
-		*quints = 1;
-		break;
-	case QUANT_96:
-		*bits = 5;
-		*trits = 1;
-		break;
-	case QUANT_128:
-		*bits = 7;
-		break;
-	case QUANT_160:
-		*bits = 5;
-		*quints = 1;
-		break;
-	case QUANT_192:
-		*bits = 6;
-		*trits = 1;
-		break;
-	case QUANT_256:
-		*bits = 8;
-		break;
+		// Arbitrary large number that's more than an ASTC block can hold
+		return 1024;
 	}
+
+	auto& entry = ise_sizes[quant];
+	return (entry.scale * items + entry.round) / entry.divisor;
 }
 
 // routine to write up to 8 bits
@@ -456,85 +475,163 @@ static inline int read_bits(
 }
 
 void encode_ise(
-	int quantization_level,
+	int quant_level,
 	int elements,
 	const uint8_t* input_data,
 	uint8_t* output_data,
 	int bit_offset
 ) {
-	uint8_t lowparts[64];
-	uint8_t highparts[69];		// 64 elements + 5 elements for padding
-	uint8_t tq_blocks[22];		// trit-blocks or quint-blocks
-
-	int bits, trits, quints;
-	find_number_of_bits_trits_quints(quantization_level, &bits, &trits, &quints);
+	int bits = btq_counts[quant_level].bits;
+	int trits = btq_counts[quant_level].trits;
+	int quints = btq_counts[quant_level].quints;
+	int mask = (1 << bits) - 1;
 
-	for (int i = 0; i < elements; i++)
-	{
-		lowparts[i] = input_data[i] & ((1 << bits) - 1);
-		highparts[i] = input_data[i] >> bits;
-	}
-
-	for (int i = elements; i < elements + 5; i++)
-	{
-		highparts[i] = 0;		// padding before we start constructing trit-blocks or quint-blocks
-	}
-
-	// construct trit-blocks or quint-blocks as necessary
+	// Write out trits and bits
 	if (trits)
 	{
-		int trit_blocks = (elements + 4) / 5;
-		for (int i = 0; i < trit_blocks; i++)
+		int i = 0;
+		int full_trit_blocks = elements / 5;
+
+		for (int j = 0; j < full_trit_blocks; j++)
 		{
-			tq_blocks[i] = integer_of_trits[highparts[5 * i + 4]][highparts[5 * i + 3]][highparts[5 * i + 2]][highparts[5 * i + 1]][highparts[5 * i]];
+			int i4 = input_data[i + 4] >> bits;
+			int i3 = input_data[i + 3] >> bits;
+			int i2 = input_data[i + 2] >> bits;
+			int i1 = input_data[i + 1] >> bits;
+			int i0 = input_data[i + 0] >> bits;
+
+			uint8_t T = integer_of_trits[i4][i3][i2][i1][i0];
+
+			// The max size of a trit bit count is 6, so we can always safely
+			// pack a single MX value with the following 1 or 2 T bits.
+			uint8_t pack;
+
+			// Element 0 + T0 + T1
+			pack = (input_data[i++] & mask) | (((T >> 0) & 0x3) << bits);
+			write_bits(pack, bits + 2, bit_offset, output_data);
+			bit_offset += bits + 2;
+
+			// Element 1 + T2 + T3
+			pack = (input_data[i++] & mask) | (((T >> 2) & 0x3) << bits);
+			write_bits(pack, bits + 2, bit_offset, output_data);
+			bit_offset += bits + 2;
+
+			// Element 2 + T4
+			pack = (input_data[i++] & mask) | (((T >> 4) & 0x1) << bits);
+			write_bits(pack, bits + 1, bit_offset, output_data);
+			bit_offset += bits + 1;
+
+			// Element 3 + T5 + T6
+			pack = (input_data[i++] & mask) | (((T >> 5) & 0x3) << bits);
+			write_bits(pack, bits + 2, bit_offset, output_data);
+			bit_offset += bits + 2;
+
+			// Element 4 + T7
+			pack = (input_data[i++] & mask) | (((T >> 7) & 0x1) << bits);
+			write_bits(pack, bits + 1, bit_offset, output_data);
+			bit_offset += bits + 1;
 		}
-	}
 
-	if (quints)
-	{
-		int quint_blocks = (elements + 2) / 3;
-		for (int i = 0; i < quint_blocks; i++)
+		// Loop tail for a partial block
+		if (i != elements)
 		{
-			tq_blocks[i] = integer_of_quints[highparts[3 * i + 2]][highparts[3 * i + 1]][highparts[3 * i]];
+			// i4 cannot be present - we know the block is partial
+			// i0 must be present - we know the block isn't empty
+			int i4 =                         0;
+			int i3 = i + 3 >= elements ? 0 : input_data[i + 3] >> bits;
+			int i2 = i + 2 >= elements ? 0 : input_data[i + 2] >> bits;
+			int i1 = i + 1 >= elements ? 0 : input_data[i + 1] >> bits;
+			int i0 =                         input_data[i + 0] >> bits;
+
+			uint8_t T = integer_of_trits[i4][i3][i2][i1][i0];
+
+			for (int j = 0; i < elements; i++, j++)
+			{
+				// Truncated table as this iteration is always partital
+				static const uint8_t tbits[4]  { 2, 2, 1, 2 };
+				static const uint8_t tshift[4] { 0, 2, 4, 5 };
+
+				uint8_t pack = (input_data[i] & mask) |
+				               (((T >> tshift[j]) & ((1 << tbits[j]) - 1)) << bits);
+
+				write_bits(pack, bits + tbits[j], bit_offset, output_data);
+				bit_offset += bits + tbits[j];
+			}
 		}
 	}
-
-	// then, write out the actual bits.
-	int lcounter = 0;
-	int hcounter = 0;
-	for (int i = 0; i < elements; i++)
+	// Write out quints and bits
+	else if (quints)
 	{
-		write_bits(lowparts[i], bits, bit_offset, output_data);
-		bit_offset += bits;
+		int i = 0;
+		int full_quint_blocks = elements / 3;
 
-		if (trits)
+		for (int j = 0; j < full_quint_blocks; j++)
 		{
-			static const int bits_to_write[5] = { 2, 2, 1, 2, 1 };
-			static const int block_shift[5] = { 0, 2, 4, 5, 7 };
-			static const int next_lcounter[5] = { 1, 2, 3, 4, 0 };
-			static const int hcounter_incr[5] = { 0, 0, 0, 0, 1 };
-			write_bits(tq_blocks[hcounter] >> block_shift[lcounter], bits_to_write[lcounter], bit_offset, output_data);
-			bit_offset += bits_to_write[lcounter];
-			hcounter += hcounter_incr[lcounter];
-			lcounter = next_lcounter[lcounter];
+			int i2 = input_data[i + 2] >> bits;
+			int i1 = input_data[i + 1] >> bits;
+			int i0 = input_data[i + 0] >> bits;
+
+			uint8_t T = integer_of_quints[i2][i1][i0];
+
+			// The max size of a quint bit count is 5, so we can always safely
+			// pack a single M value with the following 2 or 3 T bits.
+			uint8_t pack;
+
+			// Element 0
+			pack = (input_data[i++] & mask) | (((T >> 0) & 0x7) << bits);
+			write_bits(pack, bits + 3, bit_offset, output_data);
+			bit_offset += bits + 3;
+
+			// Element 1
+			pack = (input_data[i++] & mask) | (((T >> 3) & 0x3) << bits);
+			write_bits(pack, bits + 2, bit_offset, output_data);
+			bit_offset += bits + 2;
+
+			// Element 2
+			pack = (input_data[i++] & mask) | (((T >> 5) & 0x3) << bits);
+			write_bits(pack, bits + 2, bit_offset, output_data);
+			bit_offset += bits + 2;
 		}
 
-		if (quints)
+		// Loop tail for a partial block
+		if (i != elements)
 		{
-			static const int bits_to_write[3] = { 3, 2, 2 };
-			static const int block_shift[3] = { 0, 3, 5 };
-			static const int next_lcounter[3] = { 1, 2, 0 };
-			static const int hcounter_incr[3] = { 0, 0, 1 };
-			write_bits(tq_blocks[hcounter] >> block_shift[lcounter], bits_to_write[lcounter], bit_offset, output_data);
-			bit_offset += bits_to_write[lcounter];
-			hcounter += hcounter_incr[lcounter];
-			lcounter = next_lcounter[lcounter];
+			// i2 cannot be present - we know the block is partial
+			// i0 must be present - we know the block isn't empty
+			int i2 =                         0;
+			int i1 = i + 1 >= elements ? 0 : input_data[i + 1] >> bits;
+			int i0 =                         input_data[i + 0] >> bits;
+
+			uint8_t T = integer_of_quints[i2][i1][i0];
+
+			for (int j = 0; i < elements; i++, j++)
+			{
+				// Truncated table as this iteration is always partital
+				static const uint8_t tbits[2]  { 3, 2 };
+				static const uint8_t tshift[2] { 0, 3 };
+
+				uint8_t pack = (input_data[i] & mask) |
+				               (((T >> tshift[j]) & ((1 << tbits[j]) - 1)) << bits);
+
+				write_bits(pack, bits + tbits[j], bit_offset, output_data);
+				bit_offset += bits + tbits[j];
+			}
+		}
+	}
+	// Write out just bits
+	else
+	{
+		promise(elements > 0);
+		for (int i = 0; i < elements; i++)
+		{
+			write_bits(input_data[i], bits, bit_offset, output_data);
+			bit_offset += bits;
 		}
 	}
 }
 
 void decode_ise(
-	int quantization_level,
+	int quant_level,
 	int elements,
 	const uint8_t* input_data,
 	uint8_t* output_data,
@@ -547,8 +644,9 @@ void decode_ise(
 	uint8_t results[68];
 	uint8_t tq_blocks[22];		// trit-blocks or quint-blocks
 
-	int bits, trits, quints;
-	find_number_of_bits_trits_quints(quantization_level, &bits, &trits, &quints);
+	int bits = btq_counts[quant_level].bits;
+	int trits = btq_counts[quant_level].trits;
+	int quints = btq_counts[quant_level].quints;
 
 	int lcounter = 0;
 	int hcounter = 0;
@@ -567,10 +665,10 @@ void decode_ise(
 
 		if (trits)
 		{
-			static const int bits_to_read[5] = { 2, 2, 1, 2, 1 };
-			static const int block_shift[5] = { 0, 2, 4, 5, 7 };
-			static const int next_lcounter[5] = { 1, 2, 3, 4, 0 };
-			static const int hcounter_incr[5] = { 0, 0, 0, 0, 1 };
+			static const int bits_to_read[5]  { 2, 2, 1, 2, 1 };
+			static const int block_shift[5]   { 0, 2, 4, 5, 7 };
+			static const int next_lcounter[5] { 1, 2, 3, 4, 0 };
+			static const int hcounter_incr[5] { 0, 0, 0, 0, 1 };
 			int tdata = read_bits(bits_to_read[lcounter], bit_offset, input_data);
 			bit_offset += bits_to_read[lcounter];
 			tq_blocks[hcounter] |= tdata << block_shift[lcounter];
@@ -580,10 +678,10 @@ void decode_ise(
 
 		if (quints)
 		{
-			static const int bits_to_read[3] = { 3, 2, 2 };
-			static const int block_shift[3] = { 0, 3, 5 };
-			static const int next_lcounter[3] = { 1, 2, 0 };
-			static const int hcounter_incr[3] = { 0, 0, 1 };
+			static const int bits_to_read[3]  { 3, 2, 2 };
+			static const int block_shift[3]   { 0, 3, 5 };
+			static const int next_lcounter[3] { 1, 2, 0 };
+			static const int hcounter_incr[3] { 0, 0, 1 };
 			int tdata = read_bits(bits_to_read[lcounter], bit_offset, input_data);
 			bit_offset += bits_to_read[lcounter];
 			tq_blocks[hcounter] |= tdata << block_shift[lcounter];
@@ -599,7 +697,7 @@ void decode_ise(
 		for (int i = 0; i < trit_blocks; i++)
 		{
 			const uint8_t *tritptr = trits_of_integer[tq_blocks[i]];
-			results[5 * i] |= tritptr[0] << bits;
+			results[5 * i    ] |= tritptr[0] << bits;
 			results[5 * i + 1] |= tritptr[1] << bits;
 			results[5 * i + 2] |= tritptr[2] << bits;
 			results[5 * i + 3] |= tritptr[3] << bits;
@@ -613,7 +711,7 @@ void decode_ise(
 		for (int i = 0; i < quint_blocks; i++)
 		{
 			const uint8_t *quintptr = quints_of_integer[tq_blocks[i]];
-			results[3 * i] |= quintptr[0] << bits;
+			results[3 * i    ] |= quintptr[0] << bits;
 			results[3 * i + 1] |= quintptr[1] << bits;
 			results[3 * i + 2] |= quintptr[2] << bits;
 		}
@@ -624,56 +722,3 @@ void decode_ise(
 		output_data[i] = results[i];
 	}
 }
-
-int compute_ise_bitcount(
-	int items,
-	quantization_method quant
-) {
-	switch (quant)
-	{
-	case QUANT_2:
-		return items;
-	case QUANT_3:
-		return (8 * items + 4) / 5;
-	case QUANT_4:
-		return 2 * items;
-	case QUANT_5:
-		return (7 * items + 2) / 3;
-	case QUANT_6:
-		return (13 * items + 4) / 5;
-	case QUANT_8:
-		return 3 * items;
-	case QUANT_10:
-		return (10 * items + 2) / 3;
-	case QUANT_12:
-		return (18 * items + 4) / 5;
-	case QUANT_16:
-		return items * 4;
-	case QUANT_20:
-		return (13 * items + 2) / 3;
-	case QUANT_24:
-		return (23 * items + 4) / 5;
-	case QUANT_32:
-		return 5 * items;
-	case QUANT_40:
-		return (16 * items + 2) / 3;
-	case QUANT_48:
-		return (28 * items + 4) / 5;
-	case QUANT_64:
-		return 6 * items;
-	case QUANT_80:
-		return (19 * items + 2) / 3;
-	case QUANT_96:
-		return (33 * items + 4) / 5;
-	case QUANT_128:
-		return 7 * items;
-	case QUANT_160:
-		return (22 * items + 2) / 3;
-	case QUANT_192:
-		return (38 * items + 4) / 5;
-	case QUANT_256:
-		return 8 * items;
-	default:
-		return 100000;
-	}
-}
diff --git a/libkram/astc-encoder/astcenc_internal.h b/libkram/astc-encoder/astcenc_internal.h
index 42e95bd7..565bec00 100644
--- a/libkram/astc-encoder/astcenc_internal.h
+++ b/libkram/astc-encoder/astcenc_internal.h
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2020 Arm Limited
+// Copyright 2011-2021 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -23,6 +23,7 @@
 #define ASTCENC_INTERNAL_INCLUDED
 
 #include <algorithm>
+#include <atomic>
 #include <cstddef>
 #include <cstdint>
 #include <cstdio>
@@ -30,31 +31,59 @@
 #include <condition_variable>
 #include <functional>
 #include <mutex>
+#include <type_traits>
 
-#ifndef ASTCENC_SSE
-#error ERROR: ASTCENC_SSE not defined
-#endif
-
-#ifndef ASTCENC_POPCNT
-#error ERROR: ASTCENC_POPCNT not defined
-#endif
+#include "astcenc.h"
+#include "astcenc_mathlib.h"
+#include "astcenc_vecmathlib.h"
 
-#ifndef ASTCENC_AVX
-#error ERROR: ASTCENC_AVX not defined
+/**
+ * @brief Make a promise to the compiler's optimizer.
+ *
+ * A promise is an expression that the optimizer is can assume is true for to
+ * help it generate faster code. Common use cases for this are to promise that
+ * a for loop will iterate more than once, or that the loop iteration count is
+ * a multiple of a vector length, which avoids pre-loop checks and can avoid
+ * loop tails if loops are unrolled by the auto-vectorizer.
+ */
+#if defined(NDEBUG)
+	#if !defined(__clang__) && defined(_MSC_VER)
+		#define promise(cond) __assume(cond)
+	#elif defined(__clang__)
+		#if __has_builtin(__builtin_assume)
+			#define promise(cond) __builtin_assume(cond)
+		#elif __has_builtin(__builtin_unreachable)
+			#define promise(cond) if(!(cond)) { __builtin_unreachable(); }
+		#else
+			#define promise(cond)
+		#endif
+	#else // Assume GCC
+		#define promise(cond) if(!(cond)) { __builtin_unreachable(); }
+	#endif
+#else
+	#define promise(cond) assert(cond);
 #endif
 
-#ifndef ASTCENC_ISA_INVARIANCE
-#error ERROR: ASTCENC_ISA_INVARIANCE not defined
+/**
+ * @brief Make a promise to the compiler's optimizer parameters don't alias.
+ *
+ * This is a compiler extension to implement the equivalent of the C99
+ *  @c restrict keyword. Mostly expected to help on functions which are
+ * reading and writing to arrays via pointers of the same basic type.
+ */
+#if !defined(__clang__) && defined(_MSC_VER)
+	#define RESTRICT __restrict
+#else // Assume Clang or GCC
+	#define RESTRICT __restrict__
 #endif
 
-#include "astcenc.h"
-#include "astcenc_mathlib.h"
-
 /* ============================================================================
   Constants
 ============================================================================ */
 #define MAX_TEXELS_PER_BLOCK 216
+#define MAX_KMEANS_TEXELS 64
 #define MAX_WEIGHTS_PER_BLOCK 64
+#define PLANE2_WEIGHTS_OFFSET (MAX_WEIGHTS_PER_BLOCK/2)
 #define MIN_WEIGHT_BITS_PER_BLOCK 24
 #define MAX_WEIGHT_BITS_PER_BLOCK 96
 #define PARTITION_BITS 10
@@ -73,11 +102,11 @@ static const float ERROR_CALC_DEFAULT { 1e30f };
 ============================================================================ */
 // The max texel count in a block which can try the one partition fast path.
 // Default: enabled for 4x4 and 5x4 blocks.
-static const int TUNE_MAX_TEXELS_MODE0_FASTPATH { 24 };
+static const unsigned int TUNE_MAX_TEXELS_MODE0_FASTPATH { 24 };
 
 // The maximum number of candidate encodings returned for each encoding mode.
 // Default: depends on quality preset
-static const int TUNE_MAX_TRIAL_CANDIDATES { 4 };
+static const unsigned int TUNE_MAX_TRIAL_CANDIDATES { 4 };
 
 /* ============================================================================
   Other configuration parameters
@@ -100,7 +129,7 @@ static const int TUNE_MAX_TRIAL_CANDIDATES { 4 };
  *     * A condition variable so threads can wait for processing completion.
  *
  * The init stage will be executed by the first thread to arrive in the
- * critical section, there is no master thread in the thread pool.
+ * critical section, there is no main thread in the thread pool.
  *
  * The processing stage uses dynamic dispatch to assign task tickets to threads
  * on an on-demand basis. Threads may each therefore executed different numbers
@@ -153,36 +182,36 @@ static const int TUNE_MAX_TRIAL_CANDIDATES { 4 };
 class ParallelManager
 {
 private:
-	/** \brief Lock used for critical section and condition synchronization. */
+	/** @brief Lock used for critical section and condition synchronization. */
 	std::mutex m_lock;
 
-	/** \brief True if the stage init() step has been executed. */
+	/** @brief True if the stage init() step has been executed. */
 	bool m_init_done;
 
-	/** \brief True if the stage term() step has been executed. */
+	/** @brief True if the stage term() step has been executed. */
 	bool m_term_done;
 
-	/** \brief Contition variable for tracking stage processing completion. */
+	/** @brief Contition variable for tracking stage processing completion. */
 	std::condition_variable m_complete;
 
-	/** \brief Number of tasks started, but not necessarily finished. */
-	unsigned int m_start_count;
+	/** @brief Number of tasks started, but not necessarily finished. */
+	std::atomic<unsigned int> m_start_count;
 
-	/** \brief Number of tasks finished. */
+	/** @brief Number of tasks finished. */
 	unsigned int m_done_count;
 
-	/** \brief Number of tasks that need to be processed. */
+	/** @brief Number of tasks that need to be processed. */
 	unsigned int m_task_count;
 
 public:
-	/** \brief Create a new ParallelManager. */
+	/** @brief Create a new ParallelManager. */
 	ParallelManager()
 	{
 		reset();
 	}
 
 	/**
-	 * \brief Reset the tracker for a new processing batch.
+	 * @brief Reset the tracker for a new processing batch.
 	 *
 	 * This must be called from single-threaded code before starting the
 	 * multi-threaded procesing operations.
@@ -197,14 +226,14 @@ class ParallelManager
 	}
 
 	/**
-	 * \brief Trigger the pipeline stage init step.
+	 * @brief Trigger the pipeline stage init step.
 	 *
 	 * This can be called from multi-threaded code. The first thread to
 	 * hit this will process the initialization. Other threads will block
 	 * and wait for it to complete.
 	 *
-	 * \param init_func    Callable which executes the stage initialization.
-	 *                     Must return the number of tasks in the stage.
+	 * @param init_func   Callable which executes the stage initialization.
+	 *                    Must return the number of tasks in the stage.
 	 */
 	void init(std::function<unsigned int(void)> init_func)
 	{
@@ -217,13 +246,13 @@ class ParallelManager
 	}
 
 	/**
-	 * \brief Trigger the pipeline stage init step.
+	 * @brief Trigger the pipeline stage init step.
 	 *
 	 * This can be called from multi-threaded code. The first thread to
 	 * hit this will process the initialization. Other threads will block
 	 * and wait for it to complete.
 	 *
-	 * \param task_count   Total number of tasks needing processing.
+	 * @param task_count   Total number of tasks needing processing.
 	 */
 	void init(unsigned int task_count)
 	{
@@ -236,36 +265,42 @@ class ParallelManager
 	}
 
 	/**
-	 * \brief Request a task assignment.
+	 * @brief Request a task assignment.
 	 *
-	 * Assign up to \c granule tasks to the caller for processing.
+	 * Assign up to @c granule tasks to the caller for processing.
 	 *
-	 * \param      granule   Maximum number of tasks that can be assigned.
-	 * \param[out] count     Actual number of tasks assigned, or zero if
+	 * @param      granule   Maximum number of tasks that can be assigned.
+	 * @param[out] count     Actual number of tasks assigned, or zero if
 	 *                       no tasks were assigned.
 	 *
-	 * \return Task index of the first assigned task; assigned tasks
+	 * @return Task index of the first assigned task; assigned tasks
 	 *         increment from this.
 	 */
 	unsigned int get_task_assignment(unsigned int granule, unsigned int& count)
 	{
-		std::lock_guard<std::mutex> lck(m_lock);
-		unsigned int base = m_start_count;
-		count = std::min(granule, m_task_count - m_start_count);
-		m_start_count += count;
+		unsigned int base = m_start_count.fetch_add(granule, std::memory_order_relaxed);
+		if (base >= m_task_count)
+		{
+			count = 0;
+			return 0;
+		}
+
+		count = astc::min(m_task_count - base, granule);
 		return base;
 	}
 
 	/**
-	 * \brief Complete a task assignment.
+	 * @brief Complete a task assignment.
 	 *
-	 * Mark \c count tasks as complete. This will notify all threads blocked
-	 * on \c wait() if this completes the processing of the stage.
+	 * Mark @c count tasks as complete. This will notify all threads blocked
+	 * on @c wait() if this completes the processing of the stage.
 	 *
-	 * \param count   The number of completed tasks.
+	 * @param count   The number of completed tasks.
 	 */
 	void complete_task_assignment(unsigned int count)
 	{
+		// Note: m_done_count cannot use an atomic without the mutex; this has
+		// a race between the update here and the wait() for other threads
 		std::unique_lock<std::mutex> lck(m_lock);
 		this->m_done_count += count;
 		if (m_done_count == m_task_count)
@@ -276,7 +311,7 @@ class ParallelManager
 	}
 
 	/**
-	 * \brief Wait for stage processing to complete.
+	 * @brief Wait for stage processing to complete.
 	 */
 	void wait()
 	{
@@ -285,13 +320,13 @@ class ParallelManager
 	}
 
 	/**
-	 * \brief Trigger the pipeline stage term step.
+	 * @brief Trigger the pipeline stage term step.
 	 *
 	 * This can be called from multi-threaded code. The first thread to
 	 * hit this will process the thread termintion. Caller must have called
 	 * wait() prior to calling this function to ensure processing is complete.
 	 *
-	 * \param term_func   Callable which executes the stage termination.
+	 * @param term_func   Callable which executes the stage termination.
 	 */
 	void term(std::function<void(void)> term_func)
 	{
@@ -304,6 +339,27 @@ class ParallelManager
 	}
 };
 
+struct partition_metrics
+{
+	vfloat4 range_sq;
+	vfloat4 error_weight;
+	vfloat4 icolor_scale;
+	vfloat4 color_scale;
+	vfloat4 avg;
+	vfloat4 dir;
+};
+
+struct partition_lines3
+{
+	line3 uncor_line;
+	line3 samec_line;
+
+	processed_line3 uncor_pline;
+	processed_line3 samec_pline;
+
+	float uncor_line_len;
+	float samec_line_len;
+};
 
 /*
 	Partition table representation:
@@ -320,7 +376,7 @@ class ParallelManager
 struct partition_info
 {
 	int partition_count;
-	uint8_t texels_per_partition[4];
+	uint8_t partition_texel_count[4];
 	uint8_t partition_of_texel[MAX_TEXELS_PER_BLOCK];
 	uint8_t texels_of_partition[4][MAX_TEXELS_PER_BLOCK];
 	uint64_t coverage_bitmaps[4];
@@ -336,69 +392,130 @@ struct partition_info
 */
 struct decimation_table
 {
-	int num_texels;
-	int num_weights;
-	uint8_t texel_num_weights[MAX_TEXELS_PER_BLOCK];	// number of indices that go into the calculation for a texel
-	uint8_t texel_weights_int[MAX_TEXELS_PER_BLOCK][4];	// the weight to assign to each weight
-	float texel_weights_float[MAX_TEXELS_PER_BLOCK][4];	// the weight to assign to each weight
-	uint8_t texel_weights[MAX_TEXELS_PER_BLOCK][4];	// the weights that go into a texel calculation
-	uint8_t weight_num_texels[MAX_WEIGHTS_PER_BLOCK];	// the number of texels that a given weight contributes to
-	uint8_t weight_texel[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK];	// the texels that the weight contributes to
+	// TODO: Make these byte values
+	int texel_count;
+	int weight_count;
+	int weight_x;
+	int weight_y;
+	int weight_z;
+
+	uint8_t texel_weight_count[MAX_TEXELS_PER_BLOCK];	// number of indices that go into the calculation for a texel
+
+	// The 4t and t4 tables are the same data, but transposed to allow optimal
+	// data access patterns depending on how we can unroll loops
+	alignas(ASTCENC_VECALIGN) float texel_weights_float_4t[4][MAX_TEXELS_PER_BLOCK];	// the weight to assign to each weight
+	alignas(ASTCENC_VECALIGN) uint8_t texel_weights_4t[4][MAX_TEXELS_PER_BLOCK];	// the weights that go into a texel calculation
+
+	// TODO: Can we remove the copies?
+	float texel_weights_float_t4[MAX_TEXELS_PER_BLOCK][4];	// the weight to assign to each weight
+	uint8_t texel_weights_t4[MAX_TEXELS_PER_BLOCK][4];	// the weights that go into a texel calculation
+
+	uint8_t texel_weights_int_t4[MAX_TEXELS_PER_BLOCK][4];	// the weight to assign to each weight
+
+	uint8_t weight_texel_count[MAX_WEIGHTS_PER_BLOCK];	// the number of texels that a given weight contributes to
 	uint8_t weights_int[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK];	// the weights that the weight contributes to a texel.
-	float weights_flt[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK];	// the weights that the weight contributes to a texel.
+
+	// Stored transposed to give better access patterns
+	uint8_t weight_texel[MAX_TEXELS_PER_BLOCK][MAX_WEIGHTS_PER_BLOCK];	// the texels that the weight contributes to
+	alignas(ASTCENC_VECALIGN) float weights_flt[MAX_TEXELS_PER_BLOCK][MAX_WEIGHTS_PER_BLOCK];	// the weights that the weight contributes to a texel.
 
 	// folded data structures:
 	//  * texel_weights_texel[i][j] = texel_weights[weight_texel[i][j]];
-	//  * texel_weights_float_texel[i][j] = texel_weights_float[weight_texel[i][j]
+	//  * texel_weights_float_texel[i][j] = texel_weights_float[weight_texel[i][j]]
 	uint8_t texel_weights_texel[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK][4];
 	float texel_weights_float_texel[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK][4];
 };
 
-/*
-   data structure describing information that pertains to a block size and its associated block modes.
-*/
+/**
+ * @brief Metadata for single block mode for a specific BSD.
+ */
 struct block_mode
 {
 	int8_t decimation_mode;
-	int8_t quantization_mode;
-	int8_t is_dual_plane;
+	int8_t quant_mode;
+	uint8_t is_dual_plane : 1;
+	uint8_t percentile_hit : 1;
+	uint8_t percentile_always : 1;
 	int16_t mode_index;
-	float percentile;
 };
 
+/**
+ * @brief Metadata for single decimation mode for a specific BSD.
+ */
+struct decimation_mode
+{
+	int8_t maxprec_1plane;
+	int8_t maxprec_2planes;
+	uint8_t percentile_hit : 1;
+	uint8_t percentile_always : 1;
+};
+
+/**
+ * @brief Data tables for a single block size.
+ *
+ * The decimation tables store the information to apply weight grid dimension
+ * reductions. We only store the decimation modes that are actually needed by
+ * the current context; many of the possible modes will be unused (too many
+ * weights for the current block size or disabled by heuristics). The actual
+ * number of weights stored is @c decimation_mode_count, and the
+ * @c decimation_modes and @c decimation_tables arrays store the active modes
+ * contiguously at the start of the array. These entries are not stored in any
+ * particuar order.
+ *
+ * The block mode tables store the unpacked block mode settings. Block modes
+ * are stored in the compressed block as an 11 bit field, but for any given
+ * block size and set of compressor heuristics, only a subset of the block
+ * modes will be used. The actual number of block modes stored is indicated in
+ * @c block_mode_count, and the @c block_modes array store the active modes
+ * contiguously at the start of the array. These entries are stored in
+ * incrementing "packed" value order, which doesn't mean much once unpacked.
+ * To allow decompressors to reference the packed data efficiently the
+ * @c block_mode_packed_index array stores the mapping between physical ID and
+ * the actual remapped array index.
+ */
 struct block_size_descriptor
 {
+	/**< The block X dimension, in texels. */
 	int xdim;
+
+	/**< The block Y dimension, in texels. */
 	int ydim;
+
+	/**< The block Z dimension, in texels. */
 	int zdim;
+
+	/**< The block total texel count. */
 	int texel_count;
 
+
+	/**< The number of stored decimation modes. */
 	int decimation_mode_count;
-	int decimation_mode_samples[MAX_DECIMATION_MODES];
-	int decimation_mode_maxprec_1plane[MAX_DECIMATION_MODES];
-	int decimation_mode_maxprec_2planes[MAX_DECIMATION_MODES];
-	float decimation_mode_percentile[MAX_DECIMATION_MODES];
-	int permit_encode[MAX_DECIMATION_MODES];
+
+	/**< The active decimation modes, stored in low indices. */
+	decimation_mode decimation_modes[MAX_DECIMATION_MODES];
+
+	/**< The active decimation tables, stored in low indices. */
 	const decimation_table *decimation_tables[MAX_DECIMATION_MODES];
 
-	// out of all possible 2048 weight modes, only a subset is
-	// actually valid for the current configuration (e.g. 6x6
-	// 2D LDR has 370 valid modes); the valid ones are packed into
-	// block_modes_packed array.
-	block_mode block_modes_packed[MAX_WEIGHT_MODES];
-	int block_mode_packed_count;
-	// get index of block mode inside the block_modes_packed array,
-	// or -1 if mode is not valid for the current configuration.
-	int16_t block_mode_to_packed[MAX_WEIGHT_MODES];
-
-	// for the k-means bed bitmap partitioning algorithm, we don't
-	// want to consider more than 64 texels; this array specifies
-	// which 64 texels (if that many) to consider.
-	int texelcount_for_bitmap_partitioning;
-	int texels_for_bitmap_partitioning[64];
-
-	// All the partitioning information for this block size
-	partition_info partitions[(3*PARTITION_COUNT)+1];
+
+	/**< The number of stored block modes. */
+	int block_mode_count;
+
+	/**< The active block modes, stored in low indices. */
+	block_mode block_modes[MAX_WEIGHT_MODES];
+
+	/**< The block mode array index, or -1 if not valid in current config. */
+	int16_t block_mode_packed_index[MAX_WEIGHT_MODES];
+
+
+	/**< The texel count for k-means partition selection. */
+	int kmeans_texel_count;
+
+	/**< The active texels for k-means partition selection. */
+	int kmeans_texels[MAX_KMEANS_TEXELS];
+
+	/**< The partion tables for all of the possible partitions. */
+	partition_info partitions[(3 * PARTITION_COUNT) + 1];
 };
 
 // data structure representing one block of an image.
@@ -410,39 +527,60 @@ struct imageblock
 	float data_g[MAX_TEXELS_PER_BLOCK];
 	float data_b[MAX_TEXELS_PER_BLOCK];
 	float data_a[MAX_TEXELS_PER_BLOCK];
-	float4 origin_texel;
+
+	vfloat4 origin_texel;
+	vfloat4 data_min;
+	vfloat4 data_max;
+	bool    grayscale;
 
 	uint8_t rgb_lns[MAX_TEXELS_PER_BLOCK];      // 1 if RGB data are being treated as LNS
 	uint8_t alpha_lns[MAX_TEXELS_PER_BLOCK];    // 1 if Alpha data are being treated as LNS
 	uint8_t nan_texel[MAX_TEXELS_PER_BLOCK];    // 1 if the texel is a NaN-texel.
+	int xpos, ypos, zpos;
 
-	float red_min, red_max;
-	float green_min, green_max;
-	float blue_min, blue_max;
-	float alpha_min, alpha_max;
-	int grayscale;				// 1 if R=G=B for every pixel, 0 otherwise
+	inline vfloat4 texel(int index) const
+	{
+		return vfloat4(data_r[index],
+		               data_g[index],
+		               data_b[index],
+		               data_a[index]);
+	}
 
-	int xpos, ypos, zpos;
+	inline vfloat4 texel3(int index) const
+	{
+		return vfloat4(data_r[index],
+		               data_g[index],
+		               data_b[index],
+		               0.0f);
+	}
 };
 
-static inline int imageblock_uses_alpha(const imageblock * pb)
+static inline float imageblock_default_alpha(const imageblock * blk)
 {
-	return pb->alpha_max != pb->alpha_min;
+	return blk->alpha_lns[0] ? (float)0x7800 : (float)0xFFFF;
 }
 
-void update_imageblock_flags(
-	imageblock* pb,
-	int xdim,
-	int ydim,
-	int zdim);
 
-void imageblock_initialize_orig_from_work(
-	imageblock * pb,
-	int pixelcount);
+static inline int imageblock_uses_alpha(const imageblock * blk)
+{
+	return blk->data_min.lane<3>() != blk->data_max.lane<3>();
+}
+
+static inline int imageblock_is_lum(const imageblock * blk)
+{
+	float default_alpha = imageblock_default_alpha(blk);
+	bool alpha1 = (blk->data_min.lane<3>() == default_alpha) &&
+	              (blk->data_max.lane<3>() == default_alpha);
+	return blk->grayscale && alpha1;
+}
 
-void imageblock_initialize_work_from_orig(
-	imageblock * pb,
-	int pixelcount);
+static inline int imageblock_is_lumalp(const imageblock * blk)
+{
+	float default_alpha = imageblock_default_alpha(blk);
+	bool alpha1 = (blk->data_min.lane<3>() == default_alpha) &&
+	              (blk->data_max.lane<3>() == default_alpha);
+	return blk->grayscale && !alpha1;
+}
 
 /*
 	Data structure representing error weighting for one block of an image. this is used as
@@ -467,8 +605,10 @@ void imageblock_initialize_work_from_orig(
 
 struct error_weight_block
 {
-	float4 error_weights[MAX_TEXELS_PER_BLOCK];
+	vfloat4 error_weights[MAX_TEXELS_PER_BLOCK];
+
 	float texel_weight[MAX_TEXELS_PER_BLOCK];
+
 	float texel_weight_gba[MAX_TEXELS_PER_BLOCK];
 	float texel_weight_rba[MAX_TEXELS_PER_BLOCK];
 	float texel_weight_rga[MAX_TEXELS_PER_BLOCK];
@@ -483,12 +623,10 @@ struct error_weight_block
 	float texel_weight_g[MAX_TEXELS_PER_BLOCK];
 	float texel_weight_b[MAX_TEXELS_PER_BLOCK];
 	float texel_weight_a[MAX_TEXELS_PER_BLOCK];
-
-	int contains_zeroweight_texels;
 };
 
 // enumeration of all the quantization methods we support under this format.
-enum quantization_method
+enum quant_method
 {
 	QUANT_2 = 0,
 	QUANT_3 = 1,
@@ -513,6 +651,36 @@ enum quantization_method
 	QUANT_256 = 20
 };
 
+static inline int get_quant_method_levels(quant_method method)
+{
+	switch(method)
+	{
+	case QUANT_2:   return   2;
+	case QUANT_3:   return   3;
+	case QUANT_4:   return   4;
+	case QUANT_5:   return   5;
+	case QUANT_6:   return   6;
+	case QUANT_8:   return   8;
+	case QUANT_10:  return  10;
+	case QUANT_12:  return  12;
+	case QUANT_16:  return  16;
+	case QUANT_20:  return  20;
+	case QUANT_24:  return  24;
+	case QUANT_32:  return  32;
+	case QUANT_40:  return  40;
+	case QUANT_48:  return  48;
+	case QUANT_64:  return  64;
+	case QUANT_80:  return  80;
+	case QUANT_96:  return  96;
+	case QUANT_128: return 128;
+	case QUANT_160: return 160;
+	case QUANT_192: return 192;
+	case QUANT_256: return 256;
+	// Unreachable - the enum is fully described
+	default:        return   0;
+	}
+}
+
 /**
  * @brief Weight quantization transfer table.
  *
@@ -530,12 +698,10 @@ enum quantization_method
 struct quantization_and_transfer_table
 {
 	/** The quantization level used */
-	quantization_method method;
+	quant_method method;
 	/** The unscrambled unquantized value. */
-	// TODO: Converted to floats to support AVX gathers
 	float unquantized_value_unsc[33];
 	/** The scrambling order: value[map[i]] == value_unsc[i] */
-	// TODO: Converted to u32 to support AVX gathers
 	int32_t scramble_map[32];
 	/** The scrambled unquantized values. */
 	uint8_t unquantized_value[32];
@@ -580,12 +746,16 @@ struct symbolic_compressed_block
 	int partition_index;		// 0 to 1023
 	int color_formats[4];		// color format for each endpoint color pair.
 	int color_formats_matched;	// color format for all endpoint pairs are matched.
-	int color_values[4][12];	// quantized endpoint color pairs.
-	int color_quantization_level;
-	uint8_t plane1_weights[MAX_WEIGHTS_PER_BLOCK];	// quantized and decimated weights
-	uint8_t plane2_weights[MAX_WEIGHTS_PER_BLOCK];
+	int color_quant_level;
 	int plane2_color_component;	// color component for the secondary plane of weights
+
+	// TODO: Under what circumstances is this ever more than 8 (4 pairs) colors
+	int color_values[4][12];	// quantized endpoint color pairs.
 	int constant_color[4];		// constant-color, as FP16 or UINT16. Used for constant-color blocks only.
+	// Quantized and decimated weights. In the case of dual plane, the second
+	// index plane starts at weights[PLANE2_WEIGHTS_OFFSET]
+	float errorval;             // The error of the current encoding
+	uint8_t weights[MAX_WEIGHTS_PER_BLOCK];
 };
 
 struct physical_compressed_block
@@ -603,15 +773,18 @@ struct physical_compressed_block
  * This will also initialize the partition table metadata, which is stored
  * as part of the BSD structure.
  *
- * @param xdim The x axis size of the block.
- * @param ydim The y axis size of the block.
- * @param zdim The z axis size of the block.
- * @param bsd  The structure to populate.
+ * @param xdim        The x axis size of the block.
+ * @param ydim        The y axis size of the block.
+ * @param zdim        The z axis size of the block.
+ * @param mode_cutoff The block mode percentil cutoff [0-1].
+ * @param bsd         The structure to populate.
  */
 void init_block_size_descriptor(
 	int xdim,
 	int ydim,
 	int zdim,
+	bool can_omit_modes,
+	float mode_cutoff,
 	block_size_descriptor* bsd);
 
 void term_block_size_descriptor(
@@ -679,29 +852,39 @@ int is_legal_3d_block_size(
 // functions and data pertaining to quantization and encoding
 // **********************************************************
 
-extern const uint8_t color_quantization_tables[21][256];
-extern const uint8_t color_unquantization_tables[21][256];
-extern int quantization_mode_table[17][128];
+extern const uint8_t color_quant_tables[21][256];
+extern const uint8_t color_unquant_tables[21][256];
+extern int8_t quant_mode_table[17][128];
 
 void encode_ise(
-	int quantization_level,
+	int quant_level,
 	int elements,
 	const uint8_t* input_data,
 	uint8_t* output_data,
 	int bit_offset);
 
 void decode_ise(
-	int quantization_level,
+	int quant_level,
 	int elements,
 	const uint8_t* input_data,
 	uint8_t* output_data,
 	int bit_offset);
 
-int compute_ise_bitcount(
+/**
+ * @brief Return the number of bits needed to encode an ISE sequence.
+ *
+ * This implementation assumes that the @c quant level is untrusted, given it
+ * may come from random data being decompressed, so we return an unencodable
+ * size if that is the case.
+ *
+ * @param items   The number of items in the sequence.
+ * @param quant   The desired quantization level.
+ */
+int get_ise_sequence_bitcount(
 	int items,
-	quantization_method quant);
+	quant_method quant);
 
-void build_quantization_mode_table(void);
+void build_quant_mode_table(void);
 
 // **********************************************
 // functions and data pertaining to partitioning
@@ -709,33 +892,20 @@ void build_quantization_mode_table(void);
 
 // functions to compute color averages and dominant directions
 // for each partition in a block
-
-void compute_averages_and_directions_rgb(
+void compute_avgs_and_dirs_4_comp(
 	const partition_info* pt,
 	const imageblock* blk,
 	const error_weight_block* ewb,
-	const float4* color_scalefactors,
-	float3* averages,
-	float3* directions_rgb);
+	partition_metrics pm[4]);
 
-void compute_averages_and_directions_rgba(
+void compute_avgs_and_dirs_3_comp(
 	const partition_info* pt,
 	const imageblock* blk,
 	const error_weight_block* ewb,
-	const float4* color_scalefactors,
-	float4* averages,
-	float4* directions_rgba);
+	int omitted_component,
+	partition_metrics pm[4]);
 
-void compute_averages_and_directions_3_components(
-	const partition_info* pt,
-	const imageblock* blk,
-	const error_weight_block* ewb,
-	const float3 * color_scalefactors,
-	int omittedComponent,
-	float3* averages,
-	float3* directions);
-
-void compute_averages_and_directions_2_components(
+void compute_avgs_and_dirs_2_comp(
 	const partition_info* pt,
 	const imageblock* blk,
 	const error_weight_block* ewb,
@@ -746,67 +916,40 @@ void compute_averages_and_directions_2_components(
 	float2* directions);
 
 void compute_error_squared_rgba(
-	const partition_info* pt,	// the partition that we use when computing the squared-error.
+	const partition_info* pt,
 	const imageblock* blk,
 	const error_weight_block* ewb,
-	const processed_line4* plines_uncorr,
-	const processed_line4* plines_samechroma,
-	const processed_line3* plines_separate_red,
-	const processed_line3* plines_separate_green,
-	const processed_line3* plines_separate_blue,
-	const processed_line3* plines_separate_alpha,
-	float* length_uncorr,
-	float* length_samechroma,
-	float4* length_separate,
-	float* uncorr_error,
-	float* samechroma_error,
-	float4* separate_color_error);
+	const processed_line4* uncor_plines,
+	const processed_line4* samec_plines,
+	float* uncor_lengths,
+	float* samec_lengths,
+	float* uncor_errors,
+	float* samec_errors);
 
 void compute_error_squared_rgb(
-	const partition_info* pt,	// the partition that we use when computing the squared-error.
-	const imageblock* blk,
-	const error_weight_block* ewb,
-	const processed_line3* plines_uncorr,
-	const processed_line3* plines_samechroma,
-	const processed_line2* plines_separate_red,
-	const processed_line2* plines_separate_green,
-	const processed_line2* plines_separate_blue,
-	float* length_uncorr,
-	float* length_samechroma,
-	float3* length_separate,
-	float* uncorr_error,
-	float* samechroma_error,
-	float3* separate_color_error);
-
-// functions to compute error value across a tile for a particular line function
-// for a single partition.
-float compute_error_squared_rgb_single_partition(
-	int partition_to_test,
-	const block_size_descriptor* bsd,
-	const partition_info* pt,
-	const imageblock* blk,
-	const error_weight_block* ewb,
-	const processed_line3* lin	// the line for the partition.
-);
+	const partition_info *pt,
+	const imageblock *blk,
+	const error_weight_block *ewb,
+	partition_lines3 plines[4],
+	float& uncor_error,
+	float& samec_error);
 
 // for each partition, compute its color weightings.
 void compute_partition_error_color_weightings(
-	const block_size_descriptor* bsd,
-	const error_weight_block * ewb,
-	const partition_info* pi,
-	float4 error_weightings[4],
-	float4 color_scalefactors[4]);
+	const error_weight_block& ewb,
+	const partition_info& pt,
+	partition_metrics pm[4]);
 
 /**
- * \brief Find the best set of partitions to trial for a given block.
+ * @brief Find the best set of partitions to trial for a given block.
  *
- * On return \c best_partition_uncorrelated contains the best partition
- * assuming the data has noncorrelated chroma, \c best_partition_samechroma
+ * On return @c best_partition_uncorrelated contains the best partition
+ * assuming the data has noncorrelated chroma, @c best_partition_samechroma
  * contains the best partition assuming the data has corelated chroma, and
- * \c best_partition_dualplane contains the best partition assuming the data
+ * @c best_partition_dualplane contains the best partition assuming the data
  * has one uncorrelated color component.
  *
- * \c best_partition_dualplane is stored packed; bits [9:0] contain the
+ * @c best_partition_dualplane is stored packed; bits [9:0] contain the
  * best partition, bits [11:10] contain the best color component.
  */
 void find_best_partitionings(
@@ -847,17 +990,21 @@ struct pixel_region_variance_args
 	/** The channel swizzle pattern. */
 	astcenc_swizzle swz;
 	/** Should the algorithm bother with Z axis processing? */
-	int have_z;
+	bool have_z;
 	/** The kernel radius for average and variance. */
 	int avg_var_kernel_radius;
 	/** The kernel radius for alpha processing. */
 	int alpha_kernel_radius;
 	/** The size of the working data to process. */
-	int3 size;
+	int size_x;
+	int size_y;
+	int size_z;
 	/** The position of first src and dst data in the data set. */
-	int3 offset;
+	int offset_x;
+	int offset_y;
+	int offset_z;
 	/** The working memory buffer. */
-	float4 *work_memory;
+	vfloat4 *work_memory;
 };
 
 /**
@@ -868,9 +1015,12 @@ struct avg_var_args
 	/** The arguments for the nested variance computation. */
 	pixel_region_variance_args arg;
 	/** The image dimensions. */
-	int3 img_size;
+	int img_size_x;
+	int img_size_y;
+	int img_size_z;
 	/** The maximum working block dimensions. */
-	int3 blk_size;
+	int blk_size_xy;
+	int blk_size_z;
 	/** The working block memory size. */
 	int work_memory_size;
 };
@@ -887,7 +1037,8 @@ struct avg_var_args
  * @param avg_var_kernel_radius The kernel radius (in pixels) for avg and var.
  * @param alpha_kernel_radius   The kernel radius (in pixels) for alpha mods.
  * @param swz                   Input data channel swizzle.
- * @param thread_count          The number of threads to use.
+ * @param arg                   The pixel region arguments for this thread.
+ * @param ag                    The average variance arguments for this thread.
  *
  * @return The number of tasks in the processing stage.
  */
@@ -909,7 +1060,7 @@ void compute_averages_and_variances(
 void fetch_imageblock(
 	astcenc_profile decode_mode,
 	const astcenc_image& img,
-	imageblock* pb,	// picture-block to initialize with image data
+	imageblock* blk,	// picture-block to initialize with image data
 	const block_size_descriptor* bsd,
 	// position in picture to fetch block from
 	int xpos,
@@ -921,7 +1072,7 @@ void fetch_imageblock(
 // the data written are taken from orig_data.
 void write_imageblock(
 	astcenc_image& img,
-	const imageblock* pb,	// picture-block to initialize with image data
+	const imageblock* blk,	// picture-block to initialize with image data
 	const block_size_descriptor* bsd,
 	// position in picture to write block to.
 	int xpos,
@@ -929,16 +1080,11 @@ void write_imageblock(
 	int zpos,
 	astcenc_swizzle swz);
 
-// helper function to check whether a given picture-block has alpha that is not
-// just uniformly 1.
-int imageblock_uses_alpha(
-	const imageblock * pb);
-
 float compute_symbolic_block_difference(
-	astcenc_profile decode_mode,
+	const astcenc_config& config,
 	const block_size_descriptor* bsd,
 	const symbolic_compressed_block* scb,
-	const imageblock* pb,
+	const imageblock* blk,
 	const error_weight_block *ewb) ;
 
 // ***********************************************************
@@ -947,15 +1093,15 @@ float compute_symbolic_block_difference(
 struct endpoints
 {
 	int partition_count;
-	float4 endpt0[4];
-	float4 endpt1[4];
+	vfloat4 endpt0[4];
+	vfloat4 endpt1[4];
 };
 
 struct endpoints_and_weights
 {
 	endpoints ep;
-	float weights[MAX_TEXELS_PER_BLOCK];
-	float weight_error_scale[MAX_TEXELS_PER_BLOCK];
+	alignas(ASTCENC_VECALIGN) float weights[MAX_TEXELS_PER_BLOCK];
+	alignas(ASTCENC_VECALIGN) float weight_error_scale[MAX_TEXELS_PER_BLOCK];
 };
 
 void compute_endpoints_and_ideal_weights_1_plane(
@@ -974,24 +1120,42 @@ void compute_endpoints_and_ideal_weights_2_planes(
 	endpoints_and_weights* ei1, // primary plane weights
 	endpoints_and_weights* ei2); // secondary plane weights
 
+/**
+ * @brief Compute the optimal weights for a decimation table.
+ *
+ * Compute the idealized weight set, assuming infinite precision and no
+ * quantization. Later functions will use this as a staring points.
+ *
+ * @param      eai_in       The non-decimated endpoints and weights.
+ * @param      eai_out      A copy of eai_in we can modify later.
+ * @param      dt           The selected decimation table.
+ * @param[out] weight_set   The output decimated weight set.
+ * @param[out] weights      The output decimated weights.
+ */
 void compute_ideal_weights_for_decimation_table(
-	const endpoints_and_weights* eai,
-	const decimation_table* it,
+	const endpoints_and_weights& eai_in,
+	endpoints_and_weights& eai_out,
+	const decimation_table& dt,
 	float* weight_set,
 	float* weights);
 
-void compute_ideal_quantized_weights_for_decimation_table(
-	const decimation_table* it,
+/**
+ * @brief Compute the best quantized weights for a decimation table.
+ *
+ * Compute the quantized weight set, for a specific quant level.
+ */
+void compute_quantized_weights_for_decimation_table(
+	const decimation_table* dt,
 	float low_bound,
 	float high_bound,
 	const float* weight_set_in,
 	float* weight_set_out,
 	uint8_t* quantized_weight_set,
-	int quantization_level);
+	int quant_level);
 
 float compute_error_of_weight_set(
 	const endpoints_and_weights* eai,
-	const decimation_table* it,
+	const decimation_table* dt,
 	const float *weights);
 
 void merge_endpoints(
@@ -1005,35 +1169,50 @@ void merge_endpoints(
 // the format used may or may not match the format specified;
 // the return value is the format actually used.
 int pack_color_endpoints(
-	float4 color0,
-	float4 color1,
-	float4 rgbs_color,
-	float4 rgbo_color,
+	vfloat4 color0,
+	vfloat4 color1,
+	vfloat4 rgbs_color,
+	vfloat4 rgbo_color,
 	int format,
 	int* output,
-	int quantization_level);
+	int quant_level);
 
 // unpack a pair of color endpoints from a series of integers.
 void unpack_color_endpoints(
 	astcenc_profile decode_mode,
 	int format,
-	int quantization_level,
+	int quant_level,
 	const int* input,
 	int* rgb_hdr,
 	int* alpha_hdr,
 	int* nan_endpoint,
-	uint4* output0,
-	uint4* output1);
+	vint4* output0,
+	vint4* output1);
+
+// unquantize and undecimate a weight grid
+void unpack_weights(
+	const block_size_descriptor& bsd,
+	const symbolic_compressed_block& scb,
+	const decimation_table& dt,
+	bool is_dual_plane,
+	int weight_quant_level,
+	int weights_plane1[MAX_TEXELS_PER_BLOCK],
+	int weights_plane2[MAX_TEXELS_PER_BLOCK]);
 
 struct encoding_choice_errors
 {
-	float rgb_scale_error;		// error of using LDR RGB-scale instead of complete endpoints.
-	float rgb_luma_error;		// error of using HDR RGB-scale instead of complete endpoints.
-	float luminance_error;		// error of using luminance instead of RGB
-	float alpha_drop_error;		// error of discarding alpha
-	float rgb_drop_error;		// error of discarding RGB
-	int can_offset_encode;
-	int can_blue_contract;
+	// Error of using LDR RGB-scale instead of complete endpoints.
+	float rgb_scale_error;
+	// Error of using HDR RGB-scale instead of complete endpoints.
+	float rgb_luma_error;
+	// Error of using luminance instead of RGB.
+	float luminance_error;
+	// Error of discarding alpha.
+	float alpha_drop_error;
+	// Validity of using offset encoding.
+	bool can_offset_encode;
+	// Validity of using blue contraction encoding.
+	bool can_blue_contract;
 };
 
 // buffers used to store intermediate data in compress_symbolic_block_fixed_partition_*()
@@ -1052,14 +1231,13 @@ struct alignas(ASTCENC_VECALIGN) compress_fixed_partition_buffers
 struct compress_symbolic_block_buffers
 {
 	error_weight_block ewb;
-	symbolic_compressed_block tempblocks[TUNE_MAX_TRIAL_CANDIDATES];
 	compress_fixed_partition_buffers planes;
 };
 
 void compute_encoding_choice_errors(
 	const block_size_descriptor* bsd,
-	const imageblock* pb,
-	const partition_info* pi,
+	const imageblock* blk,
+	const partition_info* pt,
 	const error_weight_block* ewb,
 	int separate_component,	// component that is separated out in 2-plane mode, -1 in 1-plane mode
 	encoding_choice_errors* eci);
@@ -1078,20 +1256,31 @@ void determine_optimal_set_of_endpoint_formats_to_use(
 	// output data
 	int partition_format_specifiers[4][4],
 	int quantized_weight[4],
-	int quantization_level[4],
-	int quantization_level_mod[4]);
+	int quant_level[4],
+	int quant_level_mod[4]);
 
-void recompute_ideal_colors(
-	int weight_quantization_mode,
+void recompute_ideal_colors_1plane(
+	int weight_quant_mode,
 	endpoints* ep,	// contains the endpoints we wish to update
-	float4* rgbs_vectors,	// used to return RGBS-vectors for endpoint mode #6
-	float4* rgbo_vectors,	// used to return RGBS-vectors for endpoint mode #7
+	vfloat4* rgbs_vectors,	// used to return RGBS-vectors for endpoint mode #6 (LDR RGB base + scale)
+	vfloat4* rgbo_vectors,	// used to return RGBS-vectors for endpoint mode #7 (HDR RGB base + scale)
+	const uint8_t* weight_set8,	// the current set of weight values
+	const partition_info* pt,
+	const decimation_table* dt,
+	const imageblock* blk,	// picture-block containing the actual data.
+	const error_weight_block* ewb);
+
+void recompute_ideal_colors_2planes(
+	int weight_quant_mode,
+	endpoints* ep,	// contains the endpoints we wish to update
+	vfloat4* rgbs_vectors,	// used to return RGBS-vectors for endpoint mode #6 (LDR RGB base + scale)
+	vfloat4* rgbo_vectors,	// used to return RGBS-vectors for endpoint mode #7 (HDR RGB base + scale)
 	const uint8_t* weight_set8,	// the current set of weight values
 	const uint8_t* plane2_weight_set8,	// nullptr if plane 2 is not actually used.
 	int plane2_color_component,	// color component for 2nd plane of weights; -1 if the 2nd plane of weights is not present
-	const partition_info* pi,
-	const decimation_table* it,
-	const imageblock* pb,	// picture-block containing the actual data.
+	const partition_info* pt,
+	const decimation_table* dt,
+	const imageblock* blk,	// picture-block containing the actual data.
 	const error_weight_block* ewb);
 
 void expand_deblock_weights(
@@ -1101,12 +1290,12 @@ void expand_deblock_weights(
 void prepare_angular_tables();
 
 void imageblock_initialize_deriv(
-	const imageblock* pb,
+	const imageblock* blk,
 	int pixelcount,
-	float4* dptr);
+	vfloat4* dptr);
 
 void compute_angular_endpoints_1plane(
-	float mode_cutoff,
+	bool only_always,
 	const block_size_descriptor* bsd,
 	const float* decimated_quantized_weights,
 	const float* decimated_weights,
@@ -1114,7 +1303,7 @@ void compute_angular_endpoints_1plane(
 	float high_value[MAX_WEIGHT_MODES]);
 
 void compute_angular_endpoints_2planes(
-	float mode_cutoff,
+	bool only_always,
 	const block_size_descriptor * bsd,
 	const float* decimated_quantized_weights,
 	const float* decimated_weights,
@@ -1152,8 +1341,9 @@ void physical_to_symbolic(
 	const physical_compressed_block& pcb,
 	symbolic_compressed_block& scb);
 
-uint16_t unorm16_to_sf16(
-	uint16_t p);
+#if defined(ASTCENC_DIAGNOSTICS)
+class TraceLog; // See astcenc_diagnostic_trace for details.
+#endif
 
 struct astcenc_context
 {
@@ -1168,8 +1358,8 @@ struct astcenc_context
 	// Regional average-and-variance information, initialized by
 	// compute_averages_and_variances() only if the astc encoder
 	// is requested to do error weighting based on averages and variances.
-	float4 *input_averages;
-	float4 *input_variances;
+	vfloat4 *input_averages;
+	vfloat4 *input_variances;
 	float *input_alpha_averages;
 
 	compress_symbolic_block_buffers* working_buffers;
@@ -1183,26 +1373,38 @@ struct astcenc_context
 	ParallelManager manage_avg_var;
 	ParallelManager manage_compress;
 #endif
+
+	ParallelManager manage_decompress;
+
+#if defined(ASTCENC_DIAGNOSTICS)
+	TraceLog* trace_log;
+#endif
 };
 
 /* ============================================================================
   Platform-specific functions
 ============================================================================ */
 /**
- * @brief Run-time detection if the host CPU supports SSE 4.2.
- * @returns Zero if not supported, positive value if it is.
+ * @brief Run-time detection if the host CPU supports the POPCNT extension.
+ * @return Zero if not supported, positive value if it is.
+ */
+int cpu_supports_popcnt();
+
+/**
+ * @brief Run-time detection if the host CPU supports F16C extension.
+ * @return Zero if not supported, positive value if it is.
  */
-int cpu_supports_sse42();
+int cpu_supports_f16c();
 
 /**
- * @brief Run-time detection if the host CPU supports popcnt.
- * @returns Zero if not supported, positive value if it is.
+ * @brief Run-time detection if the host CPU supports SSE 4.1 extension.
+ * @return Zero if not supported, positive value if it is.
  */
-int cpu_supports_popcnt();
+int cpu_supports_sse41();
 
 /**
- * @brief Run-time detection if the host CPU supports avx2.
- * @returns Zero if not supported, positive value if it is.
+ * @brief Run-time detection if the host CPU supports AVX 2 extension.
+ * @return Zero if not supported, positive value if it is.
  */
 int cpu_supports_avx2();
 
@@ -1215,7 +1417,7 @@ int cpu_supports_avx2();
  * @param size    The desired buffer size.
  * @param align   The desired buffer alignment; must be 2^N.
  *
- * @returns The memory buffer pointer or nullptr on allocation failure.
+ * @return The memory buffer pointer or nullptr on allocation failure.
  */
 template<typename T>
 T* aligned_malloc(size_t size, size_t align)
@@ -1246,9 +1448,9 @@ template<typename T>
 void aligned_free(T* ptr)
 {
 #if defined(_WIN32)
-	_aligned_free(ptr);
+	_aligned_free((void*)ptr);
 #else
-	free(ptr);
+	free((void*)ptr);
 #endif
 }
 
diff --git a/libkram/astc-encoder/astcenc_kmeans_partitioning.cpp b/libkram/astc-encoder/astcenc_kmeans_partitioning.cpp
index 3d3c94de..6b837566 100644
--- a/libkram/astc-encoder/astcenc_kmeans_partitioning.cpp
+++ b/libkram/astc-encoder/astcenc_kmeans_partitioning.cpp
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2020 Arm Limited
+// Copyright 2011-2021 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -37,16 +37,12 @@
 // algorithm similar to XKCD #221. (http://xkcd.com/221/)
 
 // cluster the texels using the k++ means clustering initialization algorithm.
-static void kpp_initialize(
-	int xdim,
-	int ydim,
-	int zdim,
+static void kmeans_init(
+	int texels_per_block,
 	int partition_count,
 	const imageblock* blk,
-	float4* cluster_centers
+	vfloat4* cluster_centers
 ) {
-	int texels_per_block = xdim * ydim * zdim;
-
 	int cluster_center_samples[4];
 	// pick a random sample as first center-point.
 	cluster_center_samples[0] = 145897 /* number from random.org */  % texels_per_block;
@@ -56,20 +52,14 @@ static void kpp_initialize(
 
 	// compute the distance to the first point.
 	int sample = cluster_center_samples[0];
-	float4 center_color = float4(blk->data_r[sample],
-								 blk->data_g[sample],
-								 blk->data_b[sample],
-								 blk->data_a[sample]);
+	vfloat4 center_color = blk->texel(sample);
 
 	float distance_sum = 0.0f;
 	for (int i = 0; i < texels_per_block; i++)
 	{
-		float4 color = float4(blk->data_r[i],
-							  blk->data_g[i],
-							  blk->data_b[i],
-							  blk->data_a[i]);
-		float4 diff = color - center_color;
-		float distance = dot(diff, diff);
+		vfloat4 color =  blk->texel(i);
+		vfloat4 diff = color - center_color;
+		float distance = dot_s(diff, diff);
 		distance_sum += distance;
 		distances[i] = distance;
 	}
@@ -110,21 +100,15 @@ static void kpp_initialize(
 		}
 
 		// update the distances with the new point.
-		center_color = float4(blk->data_r[sample],
-		                      blk->data_g[sample],
-		                      blk->data_b[sample],
-		                      blk->data_a[sample]);
+		center_color = blk->texel(sample);
 
 		distance_sum = 0.0f;
 		for (int i = 0; i < texels_per_block; i++)
 		{
-			float4 color = float4(blk->data_r[i],
-			                      blk->data_g[i],
-			                      blk->data_b[i],
-			                      blk->data_a[i]);
-			float4 diff = color - center_color;
-			float distance = dot(diff, diff);
-			distance = MIN(distance, distances[i]);
+			vfloat4 color = blk->texel(i);
+			vfloat4 diff = color - center_color;
+			float distance = dot_s(diff, diff);
+			distance = astc::min(distance, distances[i]);
 			distance_sum += distance;
 			distances[i] = distance;
 		}
@@ -134,66 +118,52 @@ static void kpp_initialize(
 	for (int i = 0; i < partition_count; i++)
 	{
 		int center_sample = cluster_center_samples[i];
-		float4 color = float4(blk->data_r[center_sample],
-		                      blk->data_g[center_sample],
-		                      blk->data_b[center_sample],
-		                      blk->data_a[center_sample]);
-		cluster_centers[i] = color;
+		cluster_centers[i] = blk->texel(center_sample);
 	}
 }
 
 // basic K-means clustering: given a set of cluster centers,
 // assign each texel to a partition
-static void basic_kmeans_assign_pass(
-	int xdim,
-	int ydim,
-	int zdim,
+static void kmeans_assign(
+	int texels_per_block,
 	int partition_count,
 	const imageblock* blk,
-	const float4* cluster_centers,
+	const vfloat4* cluster_centers,
 	int* partition_of_texel
 ) {
-	int texels_per_block = xdim * ydim * zdim;
-
 	float distances[MAX_TEXELS_PER_BLOCK];
 
-	int texels_per_partition[4];
+	int partition_texel_count[4];
 
-	texels_per_partition[0] = texels_per_block;
+	partition_texel_count[0] = texels_per_block;
 	for (int i = 1; i < partition_count; i++)
 	{
-		texels_per_partition[i] = 0;
+		partition_texel_count[i] = 0;
 	}
 
 	for (int i = 0; i < texels_per_block; i++)
 	{
-		float4 color = float4(blk->data_r[i],
-							  blk->data_g[i],
-							  blk->data_b[i],
-							  blk->data_a[i]);
-		float4 diff = color - cluster_centers[0];
-		float distance = dot(diff, diff);
+		vfloat4 color = blk->texel(i);
+		vfloat4 diff = color - cluster_centers[0];
+		float distance = dot_s(diff, diff);
 		distances[i] = distance;
 		partition_of_texel[i] = 0;
 	}
 
 	for (int j = 1; j < partition_count; j++)
 	{
-		float4 center_color = cluster_centers[j];
+		vfloat4 center_color = cluster_centers[j];
 
 		for (int i = 0; i < texels_per_block; i++)
 		{
-			float4 color = float4(blk->data_r[i],
-								  blk->data_g[i],
-								  blk->data_b[i],
-								  blk->data_a[i]);
-			float4 diff = color - center_color;
-			float distance = dot(diff, diff);
+			vfloat4 color = blk->texel(i);
+			vfloat4 diff = color - center_color;
+			float distance = dot_s(diff, diff);
 			if (distance < distances[i])
 			{
 				distances[i] = distance;
-				texels_per_partition[partition_of_texel[i]]--;
-				texels_per_partition[j]++;
+				partition_texel_count[partition_of_texel[i]]--;
+				partition_texel_count[j]++;
 				partition_of_texel[i] = j;
 			}
 		}
@@ -210,10 +180,10 @@ static void basic_kmeans_assign_pass(
 		problem_case = 0;
 		for (int i = 0; i < partition_count; i++)
 		{
-			if (texels_per_partition[i] == 0)
+			if (partition_texel_count[i] == 0)
 			{
-				texels_per_partition[partition_of_texel[i]]--;
-				texels_per_partition[i]++;
+				partition_texel_count[partition_of_texel[i]]--;
+				partition_texel_count[i]++;
 				partition_of_texel[i] = i;
 				problem_case = 1;
 			}
@@ -224,33 +194,26 @@ static void basic_kmeans_assign_pass(
 
 // basic k-means clustering: given a set of cluster assignments
 // for the texels, find the center position of each cluster.
-static void basic_kmeans_update(
-	int xdim,
-	int ydim,
-	int zdim,
+static void kmeans_update(
+	int texels_per_block,
 	int partition_count,
 	const imageblock* blk,
 	const int* partition_of_texel,
-	float4* cluster_centers
+	vfloat4* cluster_centers
 ) {
-	int texels_per_block = xdim * ydim * zdim;
-
-	float4 color_sum[4];
+	vfloat4 color_sum[4];
 	int weight_sum[4];
 
 	for (int i = 0; i < partition_count; i++)
 	{
-		color_sum[i] = float4(0.0f, 0.0f, 0.0f, 0.0f);
+		color_sum[i] = vfloat4::zero();
 		weight_sum[i] = 0;
 	}
 
 	// first, find the center-of-gravity in each cluster
 	for (int i = 0; i < texels_per_block; i++)
 	{
-		float4 color = float4(blk->data_r[i],
-		                      blk->data_g[i],
-		                      blk->data_b[i],
-		                      blk->data_a[i]);
+		vfloat4 color = blk->texel(i);
 		int part = partition_of_texel[i];
 		color_sum[part] = color_sum[part] + color;
 		weight_sum[part]++;
@@ -258,7 +221,7 @@ static void basic_kmeans_update(
 
 	for (int i = 0; i < partition_count; i++)
 	{
-		cluster_centers[i] = color_sum[i] * (1.0f / weight_sum[i]);
+		cluster_centers[i] = color_sum[i] * (1.0f / static_cast<float>(weight_sum[i]));
 	}
 }
 
@@ -271,7 +234,7 @@ static inline int partition_mismatch2(
 ) {
 	int v1 = astc::popcount(a0 ^ b0) + astc::popcount(a1 ^ b1);
 	int v2 = astc::popcount(a0 ^ b1) + astc::popcount(a1 ^ b0);
-	return MIN(v1, v2);
+	return astc::min(v1, v2);
 }
 
 // compute the bit-mismatch for a partitioning in 3-partition mode
@@ -297,31 +260,17 @@ static inline int partition_mismatch3(
 
 	int s0 = p11 + p22;
 	int s1 = p12 + p21;
-	int v0 = MIN(s0, s1) + p00;
+	int v0 = astc::min(s0, s1) + p00;
 
 	int s2 = p10 + p22;
 	int s3 = p12 + p20;
-	int v1 = MIN(s2, s3) + p01;
+	int v1 = astc::min(s2, s3) + p01;
 
 	int s4 = p10 + p21;
 	int s5 = p11 + p20;
-	int v2 = MIN(s4, s5) + p02;
+	int v2 = astc::min(s4, s5) + p02;
 
-	if (v1 < v0)
-		v0 = v1;
-	if (v2 < v0)
-		v0 = v2;
-
-	return v0;
-}
-
-static inline int MIN3(
-	int a,
-	int b,
-	int c
-) {
-	int d = MIN(a, b);
-	return MIN(c, d);
+	return astc::min(v0, v1, v2);
 }
 
 // compute the bit-mismatch for a partitioning in 4-partition mode
@@ -355,21 +304,19 @@ static inline int partition_mismatch4(
 	int p32 = astc::popcount(a3 ^ b2);
 	int p33 = astc::popcount(a3 ^ b3);
 
-	int mx23 = MIN(p22 + p33, p23 + p32);
-	int mx13 = MIN(p21 + p33, p23 + p31);
-	int mx12 = MIN(p21 + p32, p22 + p31);
-	int mx03 = MIN(p20 + p33, p23 + p30);
-	int mx02 = MIN(p20 + p32, p22 + p30);
-	int mx01 = MIN(p21 + p30, p20 + p31);
-
-	int v0 = p00 + MIN3(p11 + mx23, p12 + mx13, p13 + mx12);
-	int v1 = p01 + MIN3(p10 + mx23, p12 + mx03, p13 + mx02);
-	int v2 = p02 + MIN3(p11 + mx03, p10 + mx13, p13 + mx01);
-	int v3 = p03 + MIN3(p11 + mx02, p12 + mx01, p10 + mx12);
-
-	int x0 = MIN(v0, v1);
-	int x1 = MIN(v2, v3);
-	return MIN(x0, x1);
+	int mx23 = astc::min(p22 + p33, p23 + p32);
+	int mx13 = astc::min(p21 + p33, p23 + p31);
+	int mx12 = astc::min(p21 + p32, p22 + p31);
+	int mx03 = astc::min(p20 + p33, p23 + p30);
+	int mx02 = astc::min(p20 + p32, p22 + p30);
+	int mx01 = astc::min(p21 + p30, p20 + p31);
+
+	int v0 = p00 + astc::min(p11 + mx23, p12 + mx13, p13 + mx12);
+	int v1 = p01 + astc::min(p10 + mx23, p12 + mx03, p13 + mx02);
+	int v2 = p02 + astc::min(p11 + mx03, p10 + mx13, p13 + mx01);
+	int v3 = p03 + astc::min(p11 + mx02, p12 + mx01, p10 + mx12);
+
+	return astc::min(v0, v1, v2, v3);
 }
 
 static void count_partition_mismatch_bits(
@@ -378,7 +325,7 @@ static void count_partition_mismatch_bits(
 	const uint64_t bitmaps[4],
 	int bitcounts[PARTITION_COUNT]
 ) {
-	const partition_info *pi = get_partition_table(bsd, partition_count);
+	const partition_info *pt = get_partition_table(bsd, partition_count);
 
 	if (partition_count == 2)
 	{
@@ -386,15 +333,15 @@ static void count_partition_mismatch_bits(
 		uint64_t bm1 = bitmaps[1];
 		for (int i = 0; i < PARTITION_COUNT; i++)
 		{
-			if (pi->partition_count == 2)
+			if (pt->partition_count == 2)
 			{
-				bitcounts[i] = partition_mismatch2(bm0, bm1, pi->coverage_bitmaps[0], pi->coverage_bitmaps[1]);
+				bitcounts[i] = partition_mismatch2(bm0, bm1, pt->coverage_bitmaps[0], pt->coverage_bitmaps[1]);
 			}
 			else
 			{
 				bitcounts[i] = 255;
 			}
-			pi++;
+			pt++;
 		}
 	}
 	else if (partition_count == 3)
@@ -404,15 +351,15 @@ static void count_partition_mismatch_bits(
 		uint64_t bm2 = bitmaps[2];
 		for (int i = 0; i < PARTITION_COUNT; i++)
 		{
-			if (pi->partition_count == 3)
+			if (pt->partition_count == 3)
 			{
-				bitcounts[i] = partition_mismatch3(bm0, bm1, bm2, pi->coverage_bitmaps[0], pi->coverage_bitmaps[1], pi->coverage_bitmaps[2]);
+				bitcounts[i] = partition_mismatch3(bm0, bm1, bm2, pt->coverage_bitmaps[0], pt->coverage_bitmaps[1], pt->coverage_bitmaps[2]);
 			}
 			else
 			{
 				bitcounts[i] = 255;
 			}
-			pi++;
+			pt++;
 		}
 	}
 	else if (partition_count == 4)
@@ -423,37 +370,37 @@ static void count_partition_mismatch_bits(
 		uint64_t bm3 = bitmaps[3];
 		for (int i = 0; i < PARTITION_COUNT; i++)
 		{
-			if (pi->partition_count == 4)
+			if (pt->partition_count == 4)
 			{
-				bitcounts[i] = partition_mismatch4(bm0, bm1, bm2, bm3, pi->coverage_bitmaps[0], pi->coverage_bitmaps[1], pi->coverage_bitmaps[2], pi->coverage_bitmaps[3]);
+				bitcounts[i] = partition_mismatch4(bm0, bm1, bm2, bm3, pt->coverage_bitmaps[0], pt->coverage_bitmaps[1], pt->coverage_bitmaps[2], pt->coverage_bitmaps[3]);
 			}
 			else
 			{
 				bitcounts[i] = 255;
 			}
-			pi++;
+			pt++;
 		}
 	}
 
 }
 
-// counting-sort on the mismatch-bits, thereby
-// sorting the partitions into an ordering.
+/**
+ * @brief Use counting sort on the mismatch array to sort partition candidates.
+ */
 static void get_partition_ordering_by_mismatch_bits(
 	const int mismatch_bits[PARTITION_COUNT],
 	int partition_ordering[PARTITION_COUNT]
 ) {
-	int mscount[256];
-	for (int i = 0; i < 256; i++)
-	{
-		mscount[i] = 0;
-	}
+	int mscount[256] { 0 };
 
+	// Create the histogram of mismatch counts
 	for (int i = 0; i < PARTITION_COUNT; i++)
 	{
 		mscount[mismatch_bits[i]]++;
 	}
 
+	// Create a running sum from the histogram array
+	// Cells store previous values only; i.e. exclude self after sum
 	int summa = 0;
 	for (int i = 0; i < 256; i++)
 	{
@@ -462,6 +409,8 @@ static void get_partition_ordering_by_mismatch_bits(
 		summa += cnt;
 	}
 
+	// Use the running sum as the index, incrementing after read to allow
+	// sequential entries with the same count
 	for (int i = 0; i < PARTITION_COUNT; i++)
 	{
 		int idx = mscount[mismatch_bits[i]]++;
@@ -475,46 +424,39 @@ void kmeans_compute_partition_ordering(
 	const imageblock* blk,
 	int* ordering
 ) {
-	float4 cluster_centers[4];
+	vfloat4 cluster_centers[4];
 	int partition_of_texel[MAX_TEXELS_PER_BLOCK];
 
-	// 3 passes of plain k-means partitioning
+	// Use three passes of k-means clustering to partition the block data
 	for (int i = 0; i < 3; i++)
 	{
 		if (i == 0)
 		{
-			kpp_initialize(bsd->xdim, bsd->ydim, bsd->zdim, partition_count, blk, cluster_centers);
+			kmeans_init(bsd->texel_count, partition_count, blk, cluster_centers);
 		}
 		else
 		{
-			basic_kmeans_update(bsd->xdim, bsd->ydim, bsd->zdim, partition_count, blk, partition_of_texel, cluster_centers);
+			kmeans_update(bsd->texel_count, partition_count, blk, partition_of_texel, cluster_centers);
 		}
 
-		basic_kmeans_assign_pass(bsd->xdim, bsd->ydim, bsd->zdim, partition_count, blk, cluster_centers, partition_of_texel);
-	}
-
-	// at this point, we have a near-ideal partitioning.
-
-	// construct bitmaps
-	uint64_t bitmaps[4];
-	for (int i = 0; i < 4; i++)
-	{
-		bitmaps[i] = 0ULL;
+		kmeans_assign(bsd->texel_count, partition_count, blk, cluster_centers, partition_of_texel);
 	}
 
-	int texels_to_process = bsd->texelcount_for_bitmap_partitioning;
+	// Construct the block bitmaps of texel assignments to each partition
+	uint64_t bitmaps[4] { 0 };
+	int texels_to_process = bsd->kmeans_texel_count;
 	for (int i = 0; i < texels_to_process; i++)
 	{
-		int idx = bsd->texels_for_bitmap_partitioning[i];
+		int idx = bsd->kmeans_texels[i];
 		bitmaps[partition_of_texel[idx]] |= 1ULL << i;
 	}
 
-	int bitcounts[PARTITION_COUNT];
-	// for each entry in the partition table, count bits of partition-mismatch.
-	count_partition_mismatch_bits(bsd, partition_count, bitmaps, bitcounts);
+	// Count the mismatch between the block and the format's partition tables
+	int mismatch_counts[PARTITION_COUNT];
+	count_partition_mismatch_bits(bsd, partition_count, bitmaps, mismatch_counts);
 
-	// finally, sort the partitions by bits-of-partition-mismatch
-	get_partition_ordering_by_mismatch_bits(bitcounts, ordering);
+	// Sort the partitions based on the number of mismatched bits
+	get_partition_ordering_by_mismatch_bits(mismatch_counts, ordering);
 }
 
 #endif
diff --git a/libkram/astc-encoder/astcenc_mathlib.cpp b/libkram/astc-encoder/astcenc_mathlib.cpp
index ffe01c40..a59cb24b 100644
--- a/libkram/astc-encoder/astcenc_mathlib.cpp
+++ b/libkram/astc-encoder/astcenc_mathlib.cpp
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2020 Arm Limited
+// Copyright 2011-2021 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -17,41 +17,6 @@
 
 #include "astcenc_mathlib.h"
 
-/* Public function, see header file for detailed documentation */
-float astc::log2(float val)
-{
-	if32 p;
-	p.f = val;
-	if (p.s < 0x800000)
-		p.s = 0x800000; // negative, 0, denormal get clamped to non-denormal.
-
-	// normalize mantissa to range [0.66, 1.33] and extract an exponent
-	// in such a way that 1.0 returns 0.
-	p.s -= 0x3f2aaaab;
-	int expo = p.s >> 23;
-	p.s &= 0x7fffff;
-	p.s += 0x3f2aaaab;
-
-	float x = p.f - 1.0f;
-
-	// taylor polynomial that, with horner's-rule style evaluation,
-	// gives sufficient precision for our use
-	// (relative error of about 1 in 10^6)
-
-	float res = (float)expo
-	          + x * ( 1.442695040888963f
-	          + x * (-0.721347520444482f
-	          + x * ( 0.480898346962988f
-	          + x * (-0.360673760222241f
-	          + x * ( 0.288539008177793f
-	          + x * (-0.240449173481494f
-	          + x * ( 0.206099291555566f
-	          + x * (-0.180336880111120f
-	          + x * ( 0.160299448987663f
-	          )))))))));
-	return res;
-}
-
 /**
  * @brief 64-bit rotate left.
  *
diff --git a/libkram/astc-encoder/astcenc_mathlib.h b/libkram/astc-encoder/astcenc_mathlib.h
index 05bd258f..63822627 100644
--- a/libkram/astc-encoder/astcenc_mathlib.h
+++ b/libkram/astc-encoder/astcenc_mathlib.h
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2020 Arm Limited
+// Copyright 2011-2021 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -27,10 +27,69 @@
 #include <cstdint>
 #include <cmath>
 
-// Kram uses SSE2Neon on ARM, so needs intrinsics in use but not the include
-//#if /* USE_SSE && */ (ASTCENC_SSE != 0 || ASTCENC_AVX != 0)
-//	#include <immintrin.h>
-//#endif
+#ifndef ASTCENC_NEON
+  #if defined(__aarch64__) || defined(__arm__)
+    #define ASTCENC_NEON 1
+
+    // these aren't valid on Neon
+    #define ASTCENC_AVX 0
+    #define ASTCENC_SSE 0
+  #else
+    #define ASTCENC_NEON 0
+  #endif
+#endif
+
+#ifndef ASTCENC_POPCNT
+  #if defined(__POPCNT__)
+    #define ASTCENC_POPCNT 1
+  #else
+    #define ASTCENC_POPCNT 0
+  #endif
+#endif
+
+#ifndef ASTCENC_F16C
+  #if defined(__F16C__)
+    #define ASTCENC_F16C 1
+  #else
+    #define ASTCENC_F16C 0
+  #endif
+#endif
+
+#ifndef ASTCENC_SSE
+  #if defined(__SSE4_2__)
+    #define ASTCENC_SSE 42
+  #elif defined(__SSE4_1__)
+    #define ASTCENC_SSE 41
+  #elif defined(__SSE3__)
+    #define ASTCENC_SSE 30
+  #elif defined(__SSE2__)
+    #define ASTCENC_SSE 20
+  #else
+    #define ASTCENC_SSE 0
+  #endif
+#endif
+
+#ifndef ASTCENC_AVX
+  #if defined(__AVX2__)
+    #define ASTCENC_AVX 2
+  #elif defined(__AVX__)
+    #define ASTCENC_AVX 1
+  #else
+    #define ASTCENC_AVX 0
+  #endif
+#endif
+
+// 32-byte words in AVX and AVX2, but also a lot of 16-byte ops in AVX
+// Neon only has 16-byte ops for now, but new ISA on the way.
+#if ASTCENC_AVX
+  #define ASTCENC_VECALIGN 32
+#else
+  #define ASTCENC_VECALIGN 16
+#endif
+
+#if ASTCENC_SSE != 0 || ASTCENC_AVX != 0 || ASTCENC_POPCNT != 0
+	#include <immintrin.h>
+#endif
 
 /* ============================================================================
   Fast math library; note that many of the higher-order functions in this set
@@ -43,6 +102,14 @@
   to future vectorization.
 ============================================================================ */
 
+// Union for manipulation of float bit patterns
+typedef union
+{
+	uint32_t u;
+	int32_t s;
+	float f;
+} if32;
+
 // These are namespaced to avoid colliding with C standard library functions.
 namespace astc
 {
@@ -51,271 +118,281 @@ static const float PI          = 3.14159265358979323846f;
 static const float PI_OVER_TWO = 1.57079632679489661923f;
 
 /**
- * @brief Fast approximation of log2(x)
+ * @brief SP float absolute value.
  *
- * This does not produce correct results for special cases such as
- * zero/inf/nan/denormal/negative inputs:
+ * @param v   The value to make absolute.
  *
- *   * Any negative, zero, or denormal will get clamped to smallest-normal,
- *     resulting in a logarithm of -126.
- *   * +Inf and +NaN get treated as an extension of largest-finite values,
- *     which should result in a logarithm value between 128 and 129.
+ * @return The absolute value.
  */
-float log2(float val);
+static inline float fabs(float v)
+{
+	return std::fabs(v);
+}
 
 /**
- * @brief SP float absolute value.
+ * @brief Test if a float value is a nan.
  *
- * @param val The value to make absolute.
+ * @param v    The value test.
  *
- * @return The absolute value.
+ * @return Zero is not a NaN, non-zero otherwise.
  */
-static inline float fabs(float val)
+static inline bool isnan(float v)
 {
-	return std::fabs(val);
+	return v != v;
 }
 
 /**
- * @brief SP float min.
+ * @brief Return the minimum of two values.
  *
- * @param valA The first value to compare.
- * @param valB The second value to compare.
+ * For floats, NaNs are turned into @c q.
+ *
+ * @param p   The first value to compare.
+ * @param q   The second value to compare.
  *
  * @return The smallest value.
  */
-static inline float fmin(float p, float q)
+template<typename T>
+static inline T min(T p, T q)
 {
 	return p < q ? p : q;
 }
 
 /**
- * @brief SP float max.
+ * @brief Return the minimum of three values.
  *
- * @param valA The first value to compare.
- * @param valB The second value to compare.
+ * For floats, NaNs are turned into @c r.
  *
- * @return The largest value.
+ * @param p   The first value to compare.
+ * @param q   The second value to compare.
+ * @param r   The third value to compare.
+ *
+ * @return The smallest value.
  */
-static inline float fmax(float p, float q)
+template<typename T>
+static inline T min(T p, T q, T r)
 {
-	return q < p ? p : q;
+	return min(min(p, q), r);
 }
 
 /**
- * @brief Test if a float value is a nan.
+ * @brief Return the minimum of four values.
  *
- * @param val The value test.
+ * For floats, NaNs are turned into @c s.
  *
- * @return Zero is not a NaN, non-zero otherwise.
+ * @param p   The first value to compare.
+ * @param q   The second value to compare.
+ * @param r   The third value to compare.
+ * @param s   The fourth value to compare.
+ *
+ * @return The smallest value.
  */
-static inline int isnan(float val)
+template<typename T>
+static inline T min(T p, T q, T r, T s)
 {
-	return val != val;
+	return min(min(p, q), min(r, s));
 }
 
 /**
- * @brief Clamp a float value between 0.0f and 1.0f.
+ * @brief Return the maximum of two values.
  *
- * NaNs are turned into 0.0f.
+ * For floats, NaNs are turned into @c q.
  *
- * @param val The value clamp.
+ * @param p   The first value to compare.
+ * @param q   The second value to compare.
  *
- * @return The clamped value.
+ * @return The largest value.
  */
-static inline float clamp1f(float val)
+template<typename T>
+static inline T max(T p, T q)
 {
-	// Do not reorder these, correct NaN handling relies on the fact that
-	// any comparison with NaN returns false so will fall-though to the 0.0f.
-	if (val > 1.0f) return 1.0f;
-	if (val > 0.0f) return val;
-	return 0.0f;
+	return p > q ? p : q;
 }
 
 /**
- * @brief Clamp a float value between 0.0f and 255.0f.
+ * @brief Return the maximum of three values.
  *
- * NaNs are turned into 0.0f.
+ * For floats, NaNs are turned into @c r.
  *
- * @param val The value clamp.
+ * @param p   The first value to compare.
+ * @param q   The second value to compare.
+ * @param r   The third value to compare.
  *
- * @return The clamped value.
+ * @return The largest value.
  */
-static inline float clamp255f(float val)
+template<typename T>
+static inline T max(T p, T q, T r)
 {
-	// Do not reorder these, correct NaN handling relies on the fact that
-	// any comparison with NaN returns false so will fall-though to the 0.0f.
-	if (val > 255.0f) return 255.0f;
-	if (val > 0.0f) return val;
-	return 0.0f;
+	return max(max(p, q), r);
 }
 
 /**
- * @brief Clamp a value value between mn and mx
+ * @brief Return the maximum of four values.
  *
- * For floats, NaNs are turned into mn.
+ * For floats, NaNs are turned into @c s.
  *
- * @param val The value clamp.
- * @param mn  The min value (inclusive).
- * @param mx  The max value (inclusive).
+ * @param p   The first value to compare.
+ * @param q   The second value to compare.
+ * @param r   The third value to compare.
+ * @param s   The fourth value to compare.
  *
- * @return The clamped value.
+ * @return The largest value.
  */
 template<typename T>
-inline T clamp(T val, T mn, T mx)
+static inline T max(T p, T q, T r, T s)
 {
-	// Do not reorder; correct NaN handling relies on the fact that comparison
-	// with NaN returns false and will fall-though to the "min" value.
-	if (val > mx) return mx;
-	if (val > mn) return val;
-	return mn;
+	return max(max(p, q), max(r, s));
 }
 
 /**
- * @brief Clamp a float value between 0.0f and 65504.0f.
+ * @brief Clamp a value value between @c mn and @c mx.
  *
- * NaNs are turned into 0.0f.
+ * For floats, NaNs are turned into @c mn.
  *
- * @param val The value to clamp
+ * @param v      The value to clamp.
+ * @param mn     The min value (inclusive).
+ * @param mx     The max value (inclusive).
  *
- * @return The clamped value
+ * @return The clamped value.
  */
-static inline float clamp64Kf(float val)
+template<typename T>
+inline T clamp(T v, T mn, T mx)
 {
-	// Do not reorder these, correct NaN handling relies on the fact that
-	// any comparison with NaN returns false so will fall-though to the 0.0f.
-	if (val > 65504.0f) return 65504.0f;
-	if (val > 0.0f) return val;
-	return 0.0f;
+	// Do not reorder; correct NaN handling relies on the fact that comparison
+	// with NaN returns false and will fall-though to the "min" value.
+	if (v > mx) return mx;
+	if (v > mn) return v;
+	return mn;
 }
 
 /**
- * @brief Clamp an integer between two specified limits.
+ * @brief Clamp a float value between 0.0f and 1.0f.
  *
- * @param val The value clamp.
+ * NaNs are turned into 0.0f.
+ *
+ * @param v   The value to clamp.
  *
  * @return The clamped value.
  */
-static inline int clampi(int val, int low, int high)
+static inline float clamp1f(float v)
 {
-	if (val < low) return low;
-	if (val > high) return high;
-	return val;
+	return astc::clamp(v, 0.0f, 1.0f);
 }
 
 /**
- * @brief SP float round-to-nearest.
+ * @brief Clamp a float value between 0.0f and 255.0f.
  *
- * @param val The value to round.
+ * NaNs are turned into 0.0f.
  *
- * @return The rounded value.
+ * @param v  The value to clamp.
+ *
+ * @return The clamped value.
  */
-static inline float flt_rte(float val)
+static inline float clamp255f(float v)
 {
-	return std::floor(val + 0.5f);
+	return astc::clamp(v, 0.0f, 255.0f);
 }
 
 /**
  * @brief SP float round-down.
  *
- * @param val The value to round.
+ * @param v   The value to round.
  *
  * @return The rounded value.
  */
-static inline float flt_rd(float val)
+static inline float flt_rd(float v)
 {
-	return std::floor(val);
+	return std::floor(v);
 }
 
 /**
  * @brief SP float round-to-nearest and convert to integer.
  *
- * @param val The value to round.
+ * @param v   The value to round.
  *
  * @return The rounded value.
  */
-static inline int flt2int_rtn(float val)
+static inline int flt2int_rtn(float v)
 {
 
-	return (int)(val + 0.5f);
+	return (int)(v + 0.5f);
 }
 
 /**
  * @brief SP float round down and convert to integer.
  *
- * @param val The value to round.
+ * @param v   The value to round.
  *
  * @return The rounded value.
  */
-static inline int flt2int_rd(float val)
+static inline int flt2int_rd(float v)
 {
-	return (int)(val);
+	return (int)(v);
 }
 
 /**
  * @brief Population bit count.
  *
- * @param val The value to count.
+ * @param v   The value to population count.
  *
  * @return The number of 1 bits.
  */
-static inline int popcount(uint64_t p)
+static inline int popcount(uint64_t v)
 {
 #if ASTCENC_POPCNT >= 1
-	return (int)_mm_popcnt_u64(p);
+	return (int)_mm_popcnt_u64(v);
 #else
 	uint64_t mask1 = 0x5555555555555555ULL;
 	uint64_t mask2 = 0x3333333333333333ULL;
 	uint64_t mask3 = 0x0F0F0F0F0F0F0F0FULL;
-	p -= (p >> 1) & mask1;
-	p = (p & mask2) + ((p >> 2) & mask2);
-	p += p >> 4;
-	p &= mask3;
-	p *= 0x0101010101010101ULL;
-	p >>= 56;
-	return (int)p;
+	v -= (v >> 1) & mask1;
+	v = (v & mask2) + ((v >> 2) & mask2);
+	v += v >> 4;
+	v &= mask3;
+	v *= 0x0101010101010101ULL;
+	v >>= 56;
+	return (int)v;
 #endif
 }
 
 /**
  * @brief Fast approximation of 1.0 / sqrt(val).
  *
- * @param val The input value.
+ * @param v   The input value.
  *
  * @return The approximated result.
  */
-static inline float rsqrt(float val)
+static inline float rsqrt(float v)
 {
-	return 1.0f / std::sqrt(val);
+	return 1.0f / std::sqrt(v);
 }
 
 /**
  * @brief Fast approximation of sqrt(val).
  *
- * @param val The input value.
+ * @param v   The input value.
  *
  * @return The approximated result.
  */
-static inline float sqrt(float val)
+static inline float sqrt(float v)
 {
-	return std::sqrt(val);
+	return std::sqrt(v);
 }
 
 /**
- * @brief Log base 2, linearized from 2^-14.
+ * @brief Extract mantissa and exponent of a float value.
  *
- * @param val The value to log2.
+ * @param      v      The input value.
+ * @param[out] expo   The output exponent.
  *
- * @return The approximated result.
+ * @return The mantissa.
  */
-static inline float xlog2(float val)
+static inline float frexp(float v, int* expo)
 {
-	if (val >= 0.00006103515625f)
-	{
-		return astc::log2(val);
-	}
-
-	// Linearized region
-	return -15.44269504088896340735f + val * 23637.11554992477646609062f;
+	if32 p;
+	p.f = v;
+	*expo = ((p.u >> 23) & 0xFF) - 126;
+	p.u = (p.u & 0x807fffff) | 0x3f000000;
+	return p.f;
 }
 
 /**
@@ -400,220 +477,29 @@ vtype2<T> operator*(vtype2<T> p, T q) {
 
 // Scalar by vector multiplication operator
 template <typename T>
-vtype2<T> operator*(T p, vtype2<T> q){
+vtype2<T> operator*(T p, vtype2<T> q) {
 	return vtype2<T> { p * q.r, p * q.g };
 }
 
-template <typename T> class vtype3
-{
-public:
-	// Data storage
-	T r, g, b;
-
-	// Default constructor
-	vtype3() {}
-
-	// Initialize from 1 scalar
-	vtype3(T p) : r(p), g(p), b(p) {}
-
-	// Initialize from N scalars
-	vtype3(T p, T q, T s) : r(p), g(q), b(s) {}
-
-	// Initialize from another vector
-	vtype3(const vtype3 & p) : r(p.r), g(p.g), b(p.b) {}
-
-	// Assignment operator
-	vtype3& operator=(const vtype3 &s) {
-		this->r = s.r;
-		this->g = s.g;
-		this->b = s.b;
-		return *this;
-	}
-};
-
-// Vector by vector addition
-template <typename T>
-vtype3<T> operator+(vtype3<T> p, vtype3<T> q) {
-	return vtype3<T> { p.r + q.r, p.g + q.g, p.b + q.b };
-}
-
-// Vector by vector subtraction
-template <typename T>
-vtype3<T> operator-(vtype3<T> p, vtype3<T> q) {
-	return vtype3<T> { p.r - q.r, p.g - q.g, p.b - q.b };
-}
-
-// Vector by vector multiplication operator
-template <typename T>
-vtype3<T> operator*(vtype3<T> p, vtype3<T> q) {
-	return vtype3<T> { p.r * q.r, p.g * q.g, p.b * q.b };
-}
-
-// Vector by scalar multiplication operator
-template <typename T>
-vtype3<T> operator*(vtype3<T> p, T q) {
-	return vtype3<T> { p.r * q, p.g * q, p.b * q };
-}
-
-// Scalar by vector multiplication operator
-template <typename T>
-vtype3<T> operator*(T p, vtype3<T> q){
-	return vtype3<T> { p * q.r, p * q.g, p * q.b };
-}
-
-template <typename T> class alignas(16) vtype4
-{
-public:
-	// Data storage
-	T r, g, b, a;
-
-	// Default constructor
-	vtype4() {}
-
-	// Initialize from 1 scalar
-	vtype4(T p) : r(p), g(p), b(p), a(p) {}
-
-	// Initialize from N scalars
-	vtype4(T p, T q, T s, T t) : r(p), g(q), b(s), a(t) {}
-
-	// Initialize from another vector
-	vtype4(const vtype4 & p) : r(p.r), g(p.g), b(p.b), a(p.a) {}
-
-	// Assignment operator
-	vtype4& operator=(const vtype4 &s) {
-		this->r = s.r;
-		this->g = s.g;
-		this->b = s.b;
-		this->a = s.a;
-		return *this;
-	}
-};
-
-// Vector by vector addition
-template <typename T>
-vtype4<T> operator+(vtype4<T> p, vtype4<T> q) {
-	return vtype4<T> { p.r + q.r, p.g + q.g, p.b + q.b, p.a + q.a };
-}
-
-// Vector by vector subtraction
-template <typename T>
-vtype4<T> operator-(vtype4<T> p, vtype4<T> q) {
-	return vtype4<T> { p.r - q.r, p.g - q.g, p.b - q.b, p.a - q.a };
-}
-
-// Vector by vector multiplication operator
-template <typename T>
-vtype4<T> operator*(vtype4<T> p, vtype4<T> q) {
-	return vtype4<T> { p.r * q.r, p.g * q.g, p.b * q.b, p.a * q.a };
-}
-
-// Vector by scalar multiplication operator
-template <typename T>
-vtype4<T> operator*(vtype4<T> p, T q) {
-	return vtype4<T> { p.r * q, p.g * q, p.b * q, p.a * q };
-}
-
-// Scalar by vector multiplication operator
-template <typename T>
-vtype4<T> operator*(T p, vtype4<T> q){
-	return vtype4<T> { p * q.r, p * q.g, p * q.b, p * q.a };
-}
-
 typedef vtype2<float>        float2;
-typedef vtype3<float>        float3;
-typedef vtype4<float>        float4;
-typedef vtype3<int>          int3;
-typedef vtype4<int>          int4;
-typedef vtype4<unsigned int> uint4;
 
 static inline float dot(float2 p, float2 q)  { return p.r * q.r + p.g * q.g; }
-static inline float dot(float3 p, float3 q)  { return p.r * q.r + p.g * q.g + p.b * q.b; }
-static inline float dot(float4 p, float4 q)  {
-#if (ASTCENC_SSE >= 42) && (ASTCENC_ISA_INVARIANCE == 0)
-	__m128 pv = _mm_load_ps((float*)&p);
-	__m128 qv = _mm_load_ps((float*)&q);
-	__m128 t  = _mm_dp_ps(pv, qv, 0xFF);
-	return _mm_cvtss_f32(t);
-#else
-	return p.r * q.r + p.g * q.g + p.b * q.b  + p.a * q.a;
-#endif
-}
 
 static inline float2 normalize(float2 p) { return p * astc::rsqrt(dot(p, p)); }
-static inline float3 normalize(float3 p) { return p * astc::rsqrt(dot(p, p)); }
-static inline float4 normalize(float4 p) { return p * astc::rsqrt(dot(p, p)); }
-
-static inline float4 sqrt(float4 p) {
-	float4 r;
-#if ASTCENC_SSE >= 20
-	__m128 pv = _mm_load_ps((float*)&p);
-	__m128 t  = _mm_sqrt_ps(pv);
-	_mm_store_ps((float*)&r, t);
-#else
-	r.r = std::sqrt(p.r);
-	r.g = std::sqrt(p.g);
-	r.b = std::sqrt(p.b);
-	r.a = std::sqrt(p.a);
-#endif
-	return r;
-}
-
-#ifndef MIN
-	#define MIN(x,y) ((x)<(y)?(x):(y))
-#endif
-
-#ifndef MAX
-	#define MAX(x,y) ((x)>(y)?(x):(y))
-#endif
-
-// TODO: need to use _mm_min/max_ps
-static inline float4 min(float4 p, float4 q)  {
-    return float4(MIN(p.r, q.r), MIN(p.g, q.g), MIN(p.b, q.b), MIN(p.a, q.a));
-}
-static inline float4 max(float4 p, float4 q)  {
-    return float4(MAX(p.r, q.r), MAX(p.g, q.g), MAX(p.b, q.b), MAX(p.a, q.a));
-}
-
 
 /* ============================================================================
   Softfloat library with fp32 and fp16 conversion functionality.
 ============================================================================ */
-typedef union if32_
-{
-	uint32_t u;
-	int32_t s;
-	float f;
-} if32;
-
 uint32_t clz32(uint32_t p);
 
-/*	sized soft-float types. These are mapped to the sized integer
-    types of C99, instead of C's floating-point types; this is because
-    the library needs to maintain exact, bit-level control on all
-    operations on these data types. */
-typedef uint16_t sf16;
-typedef uint32_t sf32;
-
-/* the five rounding modes that IEEE-754r defines */
-typedef enum
-{
-	SF_UP = 0,				/* round towards positive infinity */
-	SF_DOWN = 1,			/* round towards negative infinity */
-	SF_TOZERO = 2,			/* round towards zero */
-	SF_NEARESTEVEN = 3,		/* round toward nearest value; if mid-between, round to even value */
-	SF_NEARESTAWAY = 4		/* round toward nearest value; if mid-between, round away from zero */
-} roundmode;
-
 /* narrowing float->float conversions */
-sf16 sf32_to_sf16(sf32, roundmode);
-
-/* widening float->float conversions */
-sf32 sf16_to_sf32(sf16);
-
-sf16 float_to_sf16(float, roundmode);
-
-float sf16_to_float(sf16);
+uint16_t float_to_sf16(float val);
+float sf16_to_float(uint16_t val);
 
+/*********************************
+  Vector library
+*********************************/
+#include "astcenc_vecmathlib.h"
 
 /*********************************
   Declaration of line types
@@ -629,14 +515,14 @@ struct line2
 // parametric line, 3D
 struct line3
 {
-	float3 a;
-	float3 b;
+	vfloat4 a;
+	vfloat4 b;
 };
 
 struct line4
 {
-	float4 a;
-	float4 b;
+	vfloat4 a;
+	vfloat4 b;
 };
 
 
@@ -649,16 +535,16 @@ struct processed_line2
 
 struct processed_line3
 {
-	float3 amod;
-	float3 bs;
-	float3 bis;
+	vfloat4 amod;
+	vfloat4 bs;
+	vfloat4 bis;
 };
 
 struct processed_line4
 {
-	float4 amod;
-	float4 bs;
-	float4 bis;
+	vfloat4 amod;
+	vfloat4 bs;
+	vfloat4 bis;
 };
 
 #endif
diff --git a/libkram/astc-encoder/astcenc_mathlib_softfloat.cpp b/libkram/astc-encoder/astcenc_mathlib_softfloat.cpp
index 98ebac7d..d1381fd7 100644
--- a/libkram/astc-encoder/astcenc_mathlib_softfloat.cpp
+++ b/libkram/astc-encoder/astcenc_mathlib_softfloat.cpp
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2020 Arm Limited
+// Copyright 2011-2021 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -21,6 +21,13 @@
 
 #include "astcenc_mathlib.h"
 
+/*	sized soft-float types. These are mapped to the sized integer
+    types of C99, instead of C's floating-point types; this is because
+    the library needs to maintain exact, bit-level control on all
+    operations on these data types. */
+typedef uint16_t sf16;
+typedef uint32_t sf32;
+
 /******************************************
   helper functions and their lookup tables
  ******************************************/
@@ -58,7 +65,7 @@ uint32_t clz32(uint32_t inp)
 {
 	#if defined(__GNUC__) && (defined(__i386) || defined(__amd64))
 		uint32_t bsr;
-	__asm__("bsrl %1, %0": "=r"(bsr):"r"(inp | 1));
+		__asm__("bsrl %1, %0": "=r"(bsr):"r"(inp | 1));
 		return 31 - bsr;
 	#else
 		#if defined(__arm__) && defined(__ARMCC_VERSION)
@@ -66,7 +73,7 @@ uint32_t clz32(uint32_t inp)
 		#else
 			#if defined(__arm__) && defined(__GNUC__)
 				uint32_t lz;
-			__asm__("clz %0, %1": "=r"(lz):"r"(inp));
+				__asm__("clz %0, %1": "=r"(lz):"r"(inp));
 				return lz;
 			#else
 				/* slow default version */
@@ -87,6 +94,17 @@ uint32_t clz32(uint32_t inp)
 	#endif
 }
 
+/* the five rounding modes that IEEE-754r defines */
+typedef enum
+{
+	SF_UP = 0,				/* round towards positive infinity */
+	SF_DOWN = 1,			/* round towards negative infinity */
+	SF_TOZERO = 2,			/* round towards zero */
+	SF_NEARESTEVEN = 3,		/* round toward nearest value; if mid-between, round to even value */
+	SF_NEARESTAWAY = 4		/* round toward nearest value; if mid-between, round away from zero */
+} roundmode;
+
+
 static uint32_t rtne_shift32(uint32_t inp, uint32_t shamt)
 {
 	uint32_t vl1 = UINT32_C(1) << shamt;
@@ -116,7 +134,7 @@ static uint32_t rtup_shift32(uint32_t inp, uint32_t shamt)
 }
 
 /* convert from FP16 to FP32. */
-sf32 sf16_to_sf32(sf16 inp)
+static sf32 sf16_to_sf32(sf16 inp)
 {
 	uint32_t inpx = inp;
 
@@ -167,7 +185,7 @@ sf32 sf16_to_sf32(sf16 inp)
 }
 
 /* Conversion routine that converts from FP32 to FP16. It supports denormals and all rounding modes. If a NaN is given as input, it is quietened. */
-sf16 sf32_to_sf16(sf32 inp, roundmode rmode)
+static sf16 sf32_to_sf16(sf32 inp, roundmode rmode)
 {
 	/* for each possible sign/exponent combination, store a case index. This gives a 512-byte table */
 	static const uint8_t tab[512] = {
@@ -369,7 +387,7 @@ sf16 sf32_to_sf16(sf32 inp, roundmode rmode)
 }
 
 /* convert from soft-float to native-float */
-float sf16_to_float(sf16 p)
+float sf16_to_float(uint16_t p)
 {
 	if32 i;
 	i.u = sf16_to_sf32(p);
@@ -377,9 +395,9 @@ float sf16_to_float(sf16 p)
 }
 
 /* convert from native-float to soft-float */
-sf16 float_to_sf16(float p, roundmode rm)
+uint16_t float_to_sf16(float p)
 {
 	if32 i;
 	i.f = p;
-	return sf32_to_sf16(i.u, rm);
+	return sf32_to_sf16(i.u, SF_NEARESTEVEN);
 }
diff --git a/libkram/astc-encoder/astcenc_partition_tables.cpp b/libkram/astc-encoder/astcenc_partition_tables.cpp
index 20be6a11..04f7ae23 100644
--- a/libkram/astc-encoder/astcenc_partition_tables.cpp
+++ b/libkram/astc-encoder/astcenc_partition_tables.cpp
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2020 Arm Limited
+// Copyright 2011-2021 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -75,14 +75,14 @@ static int compare_canonicalized_partition_tables(
    consider and thus improves encode performance. */
 static void partition_table_zap_equal_elements(
 	int texel_count,
-	partition_info* pi
+	partition_info* pt
 ) {
 	int partition_tables_zapped = 0;
 	uint64_t *canonicalizeds = new uint64_t[PARTITION_COUNT * 7];
 
 	for (int i = 0; i < PARTITION_COUNT; i++)
 	{
-		gen_canonicalized_partition_table(texel_count, pi[i].partition_of_texel, canonicalizeds + i * 7);
+		gen_canonicalized_partition_table(texel_count, pt[i].partition_of_texel, canonicalizeds + i * 7);
 	}
 
 	for (int i = 0; i < PARTITION_COUNT; i++)
@@ -91,7 +91,7 @@ static void partition_table_zap_equal_elements(
 		{
 			if (compare_canonicalized_partition_tables(canonicalizeds + 7 * i, canonicalizeds + 7 * j))
 			{
-				pi[i].partition_count = 0;
+				pt[i].partition_count = 0;
 				partition_tables_zapped++;
 				break;
 			}
@@ -275,7 +275,7 @@ static void generate_one_partition_table(
 
 	for (int i = 0; i < 4; i++)
 	{
-		pt->texels_per_partition[i] = counts[i];
+		pt->partition_texel_count[i] = counts[i];
 	}
 
 	if (counts[0] == 0)
@@ -304,10 +304,10 @@ static void generate_one_partition_table(
 		pt->coverage_bitmaps[i] = 0ULL;
 	}
 
-	int texels_to_process = bsd->texelcount_for_bitmap_partitioning;
+	int texels_to_process = bsd->kmeans_texel_count;
 	for (int i = 0; i < texels_to_process; i++)
 	{
-		int idx = bsd->texels_for_bitmap_partitioning[i];
+		int idx = bsd->kmeans_texels[i];
 		pt->coverage_bitmaps[pt->partition_of_texel[idx]] |= 1ULL << i;
 	}
 }
diff --git a/libkram/astc-encoder/astcenc_percentile_tables.cpp b/libkram/astc-encoder/astcenc_percentile_tables.cpp
index f84dea03..6d55a7ac 100644
--- a/libkram/astc-encoder/astcenc_percentile_tables.cpp
+++ b/libkram/astc-encoder/astcenc_percentile_tables.cpp
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2020 Arm Limited
+// Copyright 2011-2021 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -1108,7 +1108,7 @@ static const packed_percentile_table *get_packed_table(
 		case 0x0A0A: return &block_pcd_10x10;
 		case 0x0A0C: return &block_pcd_12x10;
 		case 0x0C0C: return &block_pcd_12x12;
-	};
+	}
 
 	// Should never hit this with a valid 2D block size
 	return nullptr;
@@ -1173,7 +1173,7 @@ int is_legal_2d_block_size(
 		case 0x0C0A:
 		case 0x0C0C:
 			return 1;
-	};
+	}
 
 	return 0;
 }
diff --git a/libkram/astc-encoder/astcenc_pick_best_endpoint_format.cpp b/libkram/astc-encoder/astcenc_pick_best_endpoint_format.cpp
index 21d8cf87..9ba1685f 100644
--- a/libkram/astc-encoder/astcenc_pick_best_endpoint_format.cpp
+++ b/libkram/astc-encoder/astcenc_pick_best_endpoint_format.cpp
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2020 Arm Limited
+// Copyright 2011-2021 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -32,19 +32,19 @@
 
 // for a given partition, compute for every (integer-component-count, quantization-level)
 // the color error.
-static void compute_color_error_for_every_integer_count_and_quantization_level(
+static void compute_color_error_for_every_integer_count_and_quant_level(
 	int encode_hdr_rgb,	// 1 = perform HDR encoding, 0 = perform LDR encoding.
 	int encode_hdr_alpha,
 	int partition_index,
-	const partition_info* pi,
-	const encoding_choice_errors * eci,	// pointer to the structure for the CURRENT partition.
-	 const endpoints * ep,
-	 float4 error_weightings[4],
+	const partition_info* pt,
+	const encoding_choice_errors* eci,	// pointer to the structure for the CURRENT partition.
+	const endpoints* ep,
+	vfloat4 error_weight,
 	// arrays to return results back through.
 	float best_error[21][4],
 	int format_of_choice[21][4]
 ) {
-	int partition_size = pi->texels_per_partition[partition_index];
+	int partition_size = pt->partition_texel_count[partition_index];
 
 	static const float baseline_quant_error[21] = {
 		(65536.0f * 65536.0f / 18.0f),				// 2 values, 1 step
@@ -70,15 +70,13 @@ static void compute_color_error_for_every_integer_count_and_quantization_level(
 		(65536.0f * 65536.0f / 18.0f) / (255 * 255)
 	};
 
-	float4 ep0 = ep->endpt0[partition_index];
-	float4 ep1 = ep->endpt1[partition_index];
-
-	float ep1_min = MIN(MIN(ep1.r, ep1.g), ep1.b);
-	ep1_min = MAX(ep1_min, 0.0f);
+	vfloat4 ep0 = ep->endpt0[partition_index];
+	vfloat4 ep1 = ep->endpt1[partition_index];
 
-	float4 error_weight = error_weightings[partition_index];
+	float ep1_min = hmin_rgb_s(ep1);
+	ep1_min = astc::max(ep1_min, 0.0f);
 
-	float error_weight_rgbsum = error_weight.r + error_weight.g + error_weight.b;
+	float error_weight_rgbsum = hadd_rgb_s(error_weight);
 
 	float range_upper_limit_rgb = encode_hdr_rgb ? 61440.0f : 65535.0f;
 	float range_upper_limit_alpha = encode_hdr_alpha ? 61440.0f : 65535.0f;
@@ -86,66 +84,56 @@ static void compute_color_error_for_every_integer_count_and_quantization_level(
 	// it is possible to get endpoint colors significantly outside [0,upper-limit]
 	// even if the input data are safely contained in [0,upper-limit];
 	// we need to add an error term for this situation,
-	float4 ep0_range_error_high;
-	float4 ep1_range_error_high;
-	float4 ep0_range_error_low;
-	float4 ep1_range_error_low;
-
-	ep0_range_error_high.r = MAX(0.0f, ep0.r - range_upper_limit_rgb);
-	ep0_range_error_high.g = MAX(0.0f, ep0.g - range_upper_limit_rgb);
-	ep0_range_error_high.b = MAX(0.0f, ep0.b - range_upper_limit_rgb);
-	ep0_range_error_high.a = MAX(0.0f, ep0.a - range_upper_limit_alpha);
-
-	ep1_range_error_high.r = MAX(0.0f, ep1.r - range_upper_limit_rgb);
-	ep1_range_error_high.g = MAX(0.0f, ep1.g - range_upper_limit_rgb);
-	ep1_range_error_high.b = MAX(0.0f, ep1.b - range_upper_limit_rgb);
-	ep1_range_error_high.a = MAX(0.0f, ep1.a - range_upper_limit_alpha);
-
-	ep0_range_error_low.r = MIN(0.0f, ep0.r);
-	ep0_range_error_low.g = MIN(0.0f, ep0.g);
-	ep0_range_error_low.b = MIN(0.0f, ep0.b);
-	ep0_range_error_low.a = MIN(0.0f, ep0.a);
-
-	ep1_range_error_low.r = MIN(0.0f, ep1.r);
-	ep1_range_error_low.g = MIN(0.0f, ep1.g);
-	ep1_range_error_low.b = MIN(0.0f, ep1.b);
-	ep1_range_error_low.a = MIN(0.0f, ep1.a);
-
-	float4 sum_range_error =
+	vfloat4 ep0_range_error_high;
+	vfloat4 ep1_range_error_high;
+	vfloat4 ep0_range_error_low;
+	vfloat4 ep1_range_error_low;
+
+	vfloat4 offset(range_upper_limit_rgb, range_upper_limit_rgb, range_upper_limit_rgb, range_upper_limit_alpha);
+	ep0_range_error_high = max(ep0 - offset, 0.0f);
+	ep1_range_error_high = max(ep1 - offset, 0.0f);
+
+	ep0_range_error_low = min(ep0, 0.0f);
+	ep1_range_error_low = min(ep1, 0.0f);
+
+	vfloat4 sum_range_error =
 		(ep0_range_error_low * ep0_range_error_low) +
 		(ep1_range_error_low * ep1_range_error_low) +
 		(ep0_range_error_high * ep0_range_error_high) +
 		(ep1_range_error_high * ep1_range_error_high);
-	float rgb_range_error = dot(float3(sum_range_error.r, sum_range_error.g, sum_range_error.b),
-	                            float3(error_weight.r, error_weight.g, error_weight.b)) * 0.5f * partition_size;
-	float alpha_range_error = sum_range_error.a * error_weight.a * 0.5f * partition_size;
+
+	float rgb_range_error = dot3_s(sum_range_error.swz<0, 1, 2>(),
+	                               error_weight.swz<0, 1, 2>())
+	                      * 0.5f * static_cast<float>(partition_size);
+	float alpha_range_error = sum_range_error.lane<3>() * error_weight.lane<3>()
+	                        * 0.5f * static_cast<float>(partition_size);
 
 	if (encode_hdr_rgb)
 	{
 
 		// collect some statistics
 		float af, cf;
-		if (ep1.r > ep1.g && ep1.r > ep1.b)
+		if (ep1.lane<0>() > ep1.lane<1>() && ep1.lane<0>() > ep1.lane<2>())
 		{
-			af = ep1.r;
-			cf = ep1.r - ep0.r;
+			af = ep1.lane<0>();
+			cf = ep1.lane<0>() - ep0.lane<0>();
 		}
-		else if (ep1.g > ep1.b)
+		else if (ep1.lane<1>() > ep1.lane<2>())
 		{
-			af = ep1.g;
-			cf = ep1.g - ep0.g;
+			af = ep1.lane<1>();
+			cf = ep1.lane<1>() - ep0.lane<1>();
 		}
 		else
 		{
-			af = ep1.b;
-			cf = ep1.b - ep0.b;
+			af = ep1.lane<2>();
+			cf = ep1.lane<2>() - ep0.lane<2>();
 		}
 
 		float bf = af - ep1_min;	// estimate of color-component spread in high endpoint color
-		float3 prd = float3(ep1.r, ep1.g, ep1.b) - float3(cf, cf, cf);
-		float3 pdif = prd - float3(ep0.r, ep0.g, ep0.b);
+		vfloat4 prd = (ep1 - vfloat4(cf)).swz<0, 1, 2>();
+		vfloat4 pdif = prd - ep0.swz<0, 1, 2>();
 		// estimate of color-component spread in low endpoint color
-		float df = MAX(MAX(fabsf(pdif.r), fabsf(pdif.g)), fabsf(pdif.b));
+		float df = hmax_s(abs(pdif));
 
 		int b = (int)bf;
 		int c = (int)cf;
@@ -236,15 +224,15 @@ static void compute_color_error_for_every_integer_count_and_quantization_level(
 			rgb_mode = 7;
 		}
 
-		static const float rgbo_error_scales[6] = { 4.0f, 4.0f, 16.0f, 64.0f, 256.0f, 1024.0f };
-		static const float rgb_error_scales[9] = { 64.0f, 64.0f, 16.0f, 16.0f, 4.0f, 4.0f, 1.0f, 1.0f, 384.0f };
+		static const float rgbo_error_scales[6] { 4.0f, 4.0f, 16.0f, 64.0f, 256.0f, 1024.0f };
+		static const float rgb_error_scales[9] { 64.0f, 64.0f, 16.0f, 16.0f, 4.0f, 4.0f, 1.0f, 1.0f, 384.0f };
 
 		float mode7mult = rgbo_error_scales[rgbo_mode] * 0.0015f;	// empirically determined ....
 		float mode11mult = rgb_error_scales[rgb_mode] * 0.010f;	// empirically determined ....
 
 
-		float lum_high = (ep1.r + ep1.g + ep1.b) * (1.0f / 3.0f);
-		float lum_low = (ep0.r + ep0.g + ep0.b) * (1.0f / 3.0f);
+		float lum_high = hadd_rgb_s(ep1) * (1.0f / 3.0f);
+		float lum_low = hadd_rgb_s(ep0) * (1.0f / 3.0f);
 		float lumdif = lum_high - lum_low;
 		float mode23mult = lumdif < 960 ? 4.0f : lumdif < 3968 ? 16.0f : 128.0f;
 
@@ -268,9 +256,9 @@ static void compute_color_error_for_every_integer_count_and_quantization_level(
 			// base_quant_error should depend on the scale-factor that would be used
 			// during actual encode of the color value.
 
-			float base_quant_error = baseline_quant_error[i] * partition_size * 1.0f;
+			float base_quant_error = baseline_quant_error[i] * static_cast<float>(partition_size);
 			float rgb_quantization_error = error_weight_rgbsum * base_quant_error * 2.0f;
-			float alpha_quantization_error = error_weight.a * base_quant_error * 2.0f;
+			float alpha_quantization_error = error_weight.lane<3>() * base_quant_error * 2.0f;
 			float rgba_quantization_error = rgb_quantization_error + alpha_quantization_error;
 
 			// for 8 integers, we have two encodings: one with HDR alpha and another one
@@ -312,52 +300,51 @@ static void compute_color_error_for_every_integer_count_and_quantization_level(
 			format_of_choice[i][0] = FMT_LUMINANCE;
 		}
 
+		float base_quant_error_rgb = error_weight_rgbsum * static_cast<float>(partition_size);
+		float base_quant_error_a = error_weight.lane<3>() * static_cast<float>(partition_size);
+		float base_quant_error_rgba = base_quant_error_rgb + base_quant_error_a;
+
+		float error_scale_bc_rgba = eci->can_blue_contract ? 0.625f : 1.0f;
+		float error_scale_oe_rgba = eci->can_offset_encode ? 0.5f : 1.0f;
+
+		float error_scale_bc_rgb = eci->can_blue_contract ? 0.5f : 1.0f;
+		float error_scale_oe_rgb = eci->can_offset_encode ? 0.25f : 1.0f;
+
 		// pick among the available LDR endpoint modes
 		for (int i = 4; i < 21; i++)
 		{
-			float base_quant_error = baseline_quant_error[i] * partition_size * 1.0f;
-			float rgb_quantization_error = error_weight_rgbsum * base_quant_error;
-			float alpha_quantization_error = error_weight.a * base_quant_error;
-			float rgba_quantization_error = rgb_quantization_error + alpha_quantization_error;
-
-			// for 8 integers, the available encodings are:
-			// full LDR RGB-Alpha
-			float full_ldr_rgba_error = rgba_quantization_error;
-
-			if (eci->can_blue_contract)
+			// Offset encoding not possible at higher quant levels
+			if (i == 19)
 			{
-				full_ldr_rgba_error *= 0.625f;
+				error_scale_oe_rgba = 1.0f;
+				error_scale_oe_rgb = 1.0f;
 			}
 
-			if (eci->can_offset_encode && i <= 18)
-			{
-				full_ldr_rgba_error *= 0.5f;
-			}
+			float base_quant_error = baseline_quant_error[i];
+			float quant_error_rgb  = base_quant_error_rgb * base_quant_error;
+			float quant_error_rgba = base_quant_error_rgba * base_quant_error;
 
-			full_ldr_rgba_error += rgb_range_error + alpha_range_error;
+			// 8 integers can encode as RGBA+RGBA
+			float full_ldr_rgba_error = quant_error_rgba
+			                          * error_scale_bc_rgba
+			                          * error_scale_oe_rgba
+			                          + rgb_range_error
+			                          + alpha_range_error;
 
 			best_error[i][3] = full_ldr_rgba_error;
 			format_of_choice[i][3] = FMT_RGBA;
 
-			// for 6 integers, we have:
-			// - an LDR-RGB encoding
-			// - an RGBS + Alpha encoding (LDR)
-
-			float full_ldr_rgb_error = rgb_quantization_error;
-
-			if (eci->can_blue_contract)
-			{
-				full_ldr_rgb_error *= 0.5f;
-			}
-
-			if (eci->can_offset_encode && i <= 18)
-			{
-				full_ldr_rgb_error *= 0.25f;
-			}
-
-			full_ldr_rgb_error += eci->alpha_drop_error + rgb_range_error;
+			// 6 integers can encode as RGB+RGB or RGBS+AA
+			float full_ldr_rgb_error = quant_error_rgb
+			                         * error_scale_bc_rgb
+			                         * error_scale_oe_rgb
+			                         + rgb_range_error
+			                         + eci->alpha_drop_error;
 
-			float rgbs_alpha_error = rgba_quantization_error + eci->rgb_scale_error + rgb_range_error + alpha_range_error;
+			float rgbs_alpha_error = quant_error_rgba
+			                       + eci->rgb_scale_error
+			                       + rgb_range_error
+			                       + alpha_range_error;
 
 			if (rgbs_alpha_error < full_ldr_rgb_error)
 			{
@@ -370,10 +357,16 @@ static void compute_color_error_for_every_integer_count_and_quantization_level(
 				format_of_choice[i][2] = FMT_RGB;
 			}
 
-			// for 4 integers, we have a Luminance-Alpha encoding and the RGBS encoding
-			float ldr_rgbs_error = rgb_quantization_error + eci->alpha_drop_error + eci->rgb_scale_error + rgb_range_error;
+			// 4 integers can encode as RGBS or LA+LA
+			float ldr_rgbs_error = quant_error_rgb
+			                     + rgb_range_error
+			                     + eci->alpha_drop_error
+			                     + eci->rgb_scale_error;
 
-			float lum_alpha_error = rgba_quantization_error + eci->luminance_error + rgb_range_error + alpha_range_error;
+			float lum_alpha_error = quant_error_rgba
+			                      + rgb_range_error
+			                      + alpha_range_error
+			                      + eci->luminance_error;
 
 			if (ldr_rgbs_error < lum_alpha_error)
 			{
@@ -386,8 +379,11 @@ static void compute_color_error_for_every_integer_count_and_quantization_level(
 				format_of_choice[i][1] = FMT_LUMINANCE_ALPHA;
 			}
 
-			// for 2 integers, we have a Luminance-encoding and an Alpha-encoding.
-			float luminance_error = rgb_quantization_error + eci->alpha_drop_error + eci->luminance_error + rgb_range_error;
+			// 2 integers can encode as L+L
+			float luminance_error = quant_error_rgb
+			                      + rgb_range_error
+			                      + eci->alpha_drop_error
+			                      + eci->luminance_error;
 
 			best_error[i][0] = luminance_error;
 			format_of_choice[i][0] = FMT_LUMINANCE;
@@ -400,7 +396,7 @@ static void one_partition_find_best_combination_for_bitcount(
 	float combined_best_error[21][4],
 	int formats_of_choice[21][4],
 	int bits_available,
-	int* best_quantization_level,
+	int* best_quant_level,
 	int* best_formats,
 	float* error_of_best_combination
 ) {
@@ -409,23 +405,23 @@ static void one_partition_find_best_combination_for_bitcount(
 	for (int i = 0; i < 4; i++)
 	{
 		// compute the quantization level for a given number of integers and a given number of bits.
-		int quantization_level = quantization_mode_table[i + 1][bits_available];
+		int quant_level = quant_mode_table[i + 1][bits_available];
 
-		if (quantization_level == -1)
+		if (quant_level == -1)
 		{
 			continue;			// used to indicate the case where we don't have enough bits to represent a given endpoint format at all.
 		}
 
-		if (combined_best_error[quantization_level][i] < best_integer_count_error)
+		if (combined_best_error[quant_level][i] < best_integer_count_error)
 		{
-			best_integer_count_error = combined_best_error[quantization_level][i];
+			best_integer_count_error = combined_best_error[quant_level][i];
 			best_integer_count = i;
 		}
 	}
 
-	int ql = quantization_mode_table[best_integer_count + 1][bits_available];
+	int ql = quant_mode_table[best_integer_count + 1][bits_available];
 
-	*best_quantization_level = ql;
+	*best_quant_level = ql;
 	*error_of_best_combination = best_integer_count_error;
 	if (ql >= 0)
 	{
@@ -458,15 +454,15 @@ static void two_partitions_find_best_combination_for_every_quantization_and_inte
 		{
 			for (int j = 0; j < 4; j++)	// integer-count for second endpoint-pair
 			{
-				int low2 = MIN(i, j);
-				int high2 = MAX(i, j);
+				int low2 = astc::min(i, j);
+				int high2 = astc::max(i, j);
 				if ((high2 - low2) > 1)
 				{
 					continue;
 				}
 
 				int intcnt = i + j;
-				float errorterm = MIN(best_error[0][quant][i] + best_error[1][quant][j], 1e10f);
+				float errorterm = astc::min(best_error[0][quant][i] + best_error[1][quant][j], 1e10f);
 				if (errorterm <= combined_best_error[quant][intcnt])
 				{
 					combined_best_error[quant][intcnt] = errorterm;
@@ -483,8 +479,8 @@ static void two_partitions_find_best_combination_for_bitcount(
 	float combined_best_error[21][7],
 	int formats_of_choice[21][7][2],
 	int bits_available,
-	int* best_quantization_level,
-	int* best_quantization_level_mod,
+	int* best_quant_level,
+	int* best_quant_level_mod,
 	int* best_formats,
 	float* error_of_best_combination
 ) {
@@ -494,14 +490,14 @@ static void two_partitions_find_best_combination_for_bitcount(
 	for (int integer_count = 2; integer_count <= 8; integer_count++)
 	{
 		// compute the quantization level for a given number of integers and a given number of bits.
-		int quantization_level = quantization_mode_table[integer_count][bits_available];
+		int quant_level = quant_mode_table[integer_count][bits_available];
 
-		if (quantization_level == -1)
+		if (quant_level == -1)
 		{
 			break;				// used to indicate the case where we don't have enough bits to represent a given endpoint format at all.
 		}
 
-		float integer_count_error = combined_best_error[quantization_level][integer_count - 2];
+		float integer_count_error = combined_best_error[quant_level][integer_count - 2];
 
 		if (integer_count_error < best_integer_count_error)
 		{
@@ -510,17 +506,14 @@ static void two_partitions_find_best_combination_for_bitcount(
 		}
 	}
 
-	int ql = quantization_mode_table[best_integer_count][bits_available];
-	int ql_mod = quantization_mode_table[best_integer_count][bits_available + 2];
+	int ql = quant_mode_table[best_integer_count][bits_available];
+	int ql_mod = quant_mode_table[best_integer_count][bits_available + 2];
 
-	*best_quantization_level = ql;
-	*best_quantization_level_mod = ql_mod;
+	*best_quant_level = ql;
+	*best_quant_level_mod = ql_mod;
 	*error_of_best_combination = best_integer_count_error;
 	if (ql >= 0)
 	{
-        // make sure this is postive too
-        assert(ql_mod >= 0 && ql_mod < 21);
-        
 		for (int i = 0; i < 2; i++)
 		{
 			best_formats[i] = formats_of_choice[ql][best_integer_count - 2][i];
@@ -556,8 +549,8 @@ static void three_partitions_find_best_combination_for_every_quantization_and_in
 		{
 			for (int j = 0; j < 4; j++)	// integer-count for second endpoint-pair
 			{
-				int low2 = MIN(i, j);
-				int high2 = MAX(i, j);
+				int low2 = astc::min(i, j);
+				int high2 = astc::max(i, j);
 				if ((high2 - low2) > 1)
 				{
 					continue;
@@ -565,15 +558,15 @@ static void three_partitions_find_best_combination_for_every_quantization_and_in
 
 				for (int k = 0; k < 4; k++)	// integer-count for third endpoint-pair
 				{
-					int low3 = MIN(k, low2);
-					int high3 = MAX(k, high2);
+					int low3 = astc::min(k, low2);
+					int high3 = astc::max(k, high2);
 					if ((high3 - low3) > 1)
 					{
 						continue;
 					}
 
 					int intcnt = i + j + k;
-					float errorterm = MIN(best_error[0][quant][i] + best_error[1][quant][j] + best_error[2][quant][k], 1e10f);
+					float errorterm = astc::min(best_error[0][quant][i] + best_error[1][quant][j] + best_error[2][quant][k], 1e10f);
 					if (errorterm <= combined_best_error[quant][intcnt])
 					{
 						combined_best_error[quant][intcnt] = errorterm;
@@ -592,8 +585,8 @@ static void three_partitions_find_best_combination_for_bitcount(
 	float combined_best_error[21][10],
 	int formats_of_choice[21][10][3],
 	int bits_available,
-	int* best_quantization_level,
-	int* best_quantization_level_mod,
+	int* best_quant_level,
+	int* best_quant_level_mod,
 	int* best_formats,
 	float* error_of_best_combination
 ) {
@@ -603,14 +596,14 @@ static void three_partitions_find_best_combination_for_bitcount(
 	for (int integer_count = 3; integer_count <= 9; integer_count++)
 	{
 		// compute the quantization level for a given number of integers and a given number of bits.
-		int quantization_level = quantization_mode_table[integer_count][bits_available];
+		int quant_level = quant_mode_table[integer_count][bits_available];
 
-		if (quantization_level == -1)
+		if (quant_level == -1)
 		{
 			break;				// used to indicate the case where we don't have enough bits to represent a given endpoint format at all.
 		}
 
-		float integer_count_error = combined_best_error[quantization_level][integer_count - 3];
+		float integer_count_error = combined_best_error[quant_level][integer_count - 3];
 
 		if (integer_count_error < best_integer_count_error)
 		{
@@ -619,17 +612,14 @@ static void three_partitions_find_best_combination_for_bitcount(
 		}
 	}
 
-	int ql = quantization_mode_table[best_integer_count][bits_available];
-	int ql_mod = quantization_mode_table[best_integer_count][bits_available + 5];
+	int ql = quant_mode_table[best_integer_count][bits_available];
+	int ql_mod = quant_mode_table[best_integer_count][bits_available + 5];
 
-	*best_quantization_level = ql;
-	*best_quantization_level_mod = ql_mod;
+	*best_quant_level = ql;
+	*best_quant_level_mod = ql_mod;
 	*error_of_best_combination = best_integer_count_error;
 	if (ql >= 0)
 	{
-        // make sure this is postive too
-        assert(ql_mod >= 0 && ql_mod < 21);
-        
 		for (int i = 0; i < 3; i++)
 		{
 			best_formats[i] = formats_of_choice[ql][best_integer_count - 3][i];
@@ -665,8 +655,8 @@ static void four_partitions_find_best_combination_for_every_quantization_and_int
 		{
 			for (int j = 0; j < 4; j++)	// integer-count for second endpoint-pair
 			{
-				int low2 = MIN(i, j);
-				int high2 = MAX(i, j);
+				int low2 = astc::min(i, j);
+				int high2 = astc::max(i, j);
 				if ((high2 - low2) > 1)
 				{
 					continue;
@@ -674,8 +664,8 @@ static void four_partitions_find_best_combination_for_every_quantization_and_int
 
 				for (int k = 0; k < 4; k++)	// integer-count for third endpoint-pair
 				{
-					int low3 = MIN(k, low2);
-					int high3 = MAX(k, high2);
+					int low3 = astc::min(k, low2);
+					int high3 = astc::max(k, high2);
 					if ((high3 - low3) > 1)
 					{
 						continue;
@@ -683,15 +673,15 @@ static void four_partitions_find_best_combination_for_every_quantization_and_int
 
 					for (int l = 0; l < 4; l++)	// integer-count for fourth endpoint-pair
 					{
-						int low4 = MIN(l, low3);
-						int high4 = MAX(l, high3);
+						int low4 = astc::min(l, low3);
+						int high4 = astc::max(l, high3);
 						if ((high4 - low4) > 1)
 						{
 							continue;
 						}
 
 						int intcnt = i + j + k + l;
-						float errorterm = MIN(best_error[0][quant][i] + best_error[1][quant][j] + best_error[2][quant][k] + best_error[3][quant][l], 1e10f);
+						float errorterm = astc::min(best_error[0][quant][i] + best_error[1][quant][j] + best_error[2][quant][k] + best_error[3][quant][l], 1e10f);
 						if (errorterm <= combined_best_error[quant][intcnt])
 						{
 							combined_best_error[quant][intcnt] = errorterm;
@@ -712,8 +702,8 @@ static void four_partitions_find_best_combination_for_bitcount(
 	float combined_best_error[21][13],
 	int formats_of_choice[21][13][4],
 	int bits_available,
-	int* best_quantization_level,
-	int* best_quantization_level_mod,
+	int* best_quant_level,
+	int* best_quant_level_mod,
 	int* best_formats,
 	float* error_of_best_combination
 ) {
@@ -723,14 +713,14 @@ static void four_partitions_find_best_combination_for_bitcount(
 	for (int integer_count = 4; integer_count <= 9; integer_count++)
 	{
 		// compute the quantization level for a given number of integers and a given number of bits.
-		int quantization_level = quantization_mode_table[integer_count][bits_available];
+		int quant_level = quant_mode_table[integer_count][bits_available];
 
-		if (quantization_level == -1)
+		if (quant_level == -1)
 		{
 			break;				// used to indicate the case where we don't have enough bits to represent a given endpoint format at all.
 		}
 
-		float integer_count_error = combined_best_error[quantization_level][integer_count - 4];
+		float integer_count_error = combined_best_error[quant_level][integer_count - 4];
 
 		if (integer_count_error < best_integer_count_error)
 		{
@@ -739,17 +729,14 @@ static void four_partitions_find_best_combination_for_bitcount(
 		}
 	}
 
-	int ql = quantization_mode_table[best_integer_count][bits_available];
-	int ql_mod = quantization_mode_table[best_integer_count][bits_available + 8];
+	int ql = quant_mode_table[best_integer_count][bits_available];
+	int ql_mod = quant_mode_table[best_integer_count][bits_available + 8];
 
-	*best_quantization_level = ql;
-	*best_quantization_level_mod = ql_mod;
+	*best_quant_level = ql;
+	*best_quant_level_mod = ql_mod;
 	*error_of_best_combination = best_integer_count_error;
 	if (ql >= 0)
 	{
-        // make sure this is postive too
-        assert(ql_mod >= 0 && ql_mod < 21);
-        
 		for (int i = 0; i < 4; i++)
 		{
 			best_formats[i] = formats_of_choice[ql][best_integer_count - 4][i];
@@ -799,8 +786,8 @@ void determine_optimal_set_of_endpoint_formats_to_use(
 	// output data
 	int partition_format_specifiers[TUNE_MAX_TRIAL_CANDIDATES][4],
 	int quantized_weight[TUNE_MAX_TRIAL_CANDIDATES],
-	int quantization_level[TUNE_MAX_TRIAL_CANDIDATES],
-	int quantization_level_mod[TUNE_MAX_TRIAL_CANDIDATES]
+	int quant_level[TUNE_MAX_TRIAL_CANDIDATES],
+	int quant_level_mod[TUNE_MAX_TRIAL_CANDIDATES]
 ) {
 	int partition_count = pt->partition_count;
 
@@ -814,45 +801,43 @@ void determine_optimal_set_of_endpoint_formats_to_use(
 	compute_encoding_choice_errors(bsd, blk, pt, ewb, separate_component, eci);
 
 	// for each partition, compute the error weights to apply for that partition.
-	float4 error_weightings[4];
-	float4 dummied_color_scalefactors[4];	// only used to receive data
-	compute_partition_error_color_weightings(bsd, ewb, pt, error_weightings, dummied_color_scalefactors);
+	partition_metrics pms[4];
+
+	compute_partition_error_color_weightings(*ewb, *pt, pms);
 
 	float best_error[4][21][4];
 	int format_of_choice[4][21][4];
 	for (int i = 0; i < partition_count; i++)
 	{
-		compute_color_error_for_every_integer_count_and_quantization_level(
+		compute_color_error_for_every_integer_count_and_quant_level(
 		    encode_hdr_rgb, encode_hdr_alpha, i,
-		    pt, &(eci[i]), ep, error_weightings, best_error[i],
+		    pt, &(eci[i]), ep, pms[i].error_weight, best_error[i],
 		    format_of_choice[i]);
 	}
 
 	alignas(ASTCENC_VECALIGN) float errors_of_best_combination[MAX_WEIGHT_MODES];
-	alignas(ASTCENC_VECALIGN) int best_quantization_levels[MAX_WEIGHT_MODES];
-	int best_quantization_levels_mod[MAX_WEIGHT_MODES];
+	alignas(ASTCENC_VECALIGN) int best_quant_levels[MAX_WEIGHT_MODES];
+	int best_quant_levels_mod[MAX_WEIGHT_MODES];
 	int best_ep_formats[MAX_WEIGHT_MODES][4];
 
 #if ASTCENC_SIMD_WIDTH > 1
 	// have to ensure that the "overstep" of the last iteration in the vectorized
 	// loop will contain data that will never be picked as best candidate
-	const int packed_mode_count = bsd->block_mode_packed_count;
-	const int packed_mode_count_simd_up = (packed_mode_count + ASTCENC_SIMD_WIDTH - 1) / ASTCENC_SIMD_WIDTH * ASTCENC_SIMD_WIDTH;
+	const int packed_mode_count = bsd->block_mode_count;
+	const int packed_mode_count_simd_up = round_up_to_simd_multiple_vla(packed_mode_count);
 	for (int i = packed_mode_count; i < packed_mode_count_simd_up; ++i)
 	{
 		errors_of_best_combination[i] = 1e30f;
-		best_quantization_levels[i] = 0;
-		best_quantization_levels_mod[i] = 0;
+		best_quant_levels[i] = 0;
+		best_quant_levels_mod[i] = 0;
 	}
 #endif // #if ASTCENC_SIMD_WIDTH > 1
 
 	// code for the case where the block contains 1 partition
 	if (partition_count == 1)
 	{
-		int best_quantization_level;
-		int best_format;
 		float error_of_best_combination;
-		for (int i = 0, ni = bsd->block_mode_packed_count; i < ni; ++i)
+		for (int i = 0; i < bsd->block_mode_count; ++i)
 		{
 			if (qwt_errors[i] >= 1e29f)
 			{
@@ -862,23 +847,16 @@ void determine_optimal_set_of_endpoint_formats_to_use(
 
 			one_partition_find_best_combination_for_bitcount(
 			    best_error[0], format_of_choice[0], qwt_bitcounts[i],
-			    &best_quantization_level, &best_format, &error_of_best_combination);
+			    best_quant_levels + i, best_ep_formats[i], &error_of_best_combination);
 			error_of_best_combination += qwt_errors[i];
 
 			errors_of_best_combination[i] = error_of_best_combination;
-			best_quantization_levels[i] = best_quantization_level;
-			best_quantization_levels_mod[i] = best_quantization_level;
-			best_ep_formats[i][0] = best_format;
+			best_quant_levels_mod[i] = best_quant_levels[i];
 		}
 	}
 	// code for the case where the block contains 2 partitions
 	else if (partition_count == 2)
 	{
-		int best_quantization_level;
-		int best_quantization_level_mod;
-		int best_formats[2];
-		float error_of_best_combination;
-
 		float combined_best_error[21][7];
 		int formats_of_choice[21][7][2];
 
@@ -886,7 +864,7 @@ void determine_optimal_set_of_endpoint_formats_to_use(
 		    best_error, format_of_choice, combined_best_error, formats_of_choice);
 
 
-		for (int i = 0, ni = bsd->block_mode_packed_count; i < ni; ++i)
+		for (int i = 0; i < bsd->block_mode_count; ++i)
 		{
 			if (qwt_errors[i] >= 1e29f)
 			{
@@ -894,35 +872,25 @@ void determine_optimal_set_of_endpoint_formats_to_use(
 				continue;
 			}
 
+			float error_of_best_combination;
 			two_partitions_find_best_combination_for_bitcount(
 			    combined_best_error, formats_of_choice, qwt_bitcounts[i],
-			    &best_quantization_level, &best_quantization_level_mod,
-			    best_formats, &error_of_best_combination);
-
-			error_of_best_combination += qwt_errors[i];
+			    best_quant_levels + i, best_quant_levels_mod + i,
+			    best_ep_formats[i], &error_of_best_combination);
 
-			errors_of_best_combination[i] = error_of_best_combination;
-			best_quantization_levels[i] = best_quantization_level;
-			best_quantization_levels_mod[i] = best_quantization_level_mod;
-			best_ep_formats[i][0] = best_formats[0];
-			best_ep_formats[i][1] = best_formats[1];
+			errors_of_best_combination[i] = error_of_best_combination + qwt_errors[i];
 		}
 	}
 	// code for the case where the block contains 3 partitions
 	else if (partition_count == 3)
 	{
-		int best_quantization_level;
-		int best_quantization_level_mod;
-		int best_formats[3];
-		float error_of_best_combination;
-
 		float combined_best_error[21][10];
 		int formats_of_choice[21][10][3];
 
 		three_partitions_find_best_combination_for_every_quantization_and_integer_count(
 		    best_error, format_of_choice, combined_best_error, formats_of_choice);
 
-		for (int i = 0, ni = bsd->block_mode_packed_count; i < ni; ++i)
+		for (int i = 0; i < bsd->block_mode_count; ++i)
 		{
 			if (qwt_errors[i] >= 1e29f)
 			{
@@ -930,36 +898,25 @@ void determine_optimal_set_of_endpoint_formats_to_use(
 				continue;
 			}
 
+			float error_of_best_combination;
 			three_partitions_find_best_combination_for_bitcount(
 			    combined_best_error, formats_of_choice, qwt_bitcounts[i],
-			    &best_quantization_level, &best_quantization_level_mod,
-			    best_formats, &error_of_best_combination);
-
-			error_of_best_combination += qwt_errors[i];
+			    best_quant_levels + i, best_quant_levels_mod + i,
+			    best_ep_formats[i], &error_of_best_combination);
 
-			errors_of_best_combination[i] = error_of_best_combination;
-			best_quantization_levels[i] = best_quantization_level;
-			best_quantization_levels_mod[i] = best_quantization_level_mod;
-			best_ep_formats[i][0] = best_formats[0];
-			best_ep_formats[i][1] = best_formats[1];
-			best_ep_formats[i][2] = best_formats[2];
+			errors_of_best_combination[i] = error_of_best_combination + qwt_errors[i];
 		}
 	}
 	// code for the case where the block contains 4 partitions
 	else if (partition_count == 4)
 	{
-		int best_quantization_level;
-		int best_quantization_level_mod;
-		int best_formats[4];
-		float error_of_best_combination;
-
 		float combined_best_error[21][13];
 		int formats_of_choice[21][13][4];
 
 		four_partitions_find_best_combination_for_every_quantization_and_integer_count(
 		    best_error, format_of_choice, combined_best_error, formats_of_choice);
 
-		for (int i = 0, ni = bsd->block_mode_packed_count; i < ni; ++i)
+		for (int i = 0; i < bsd->block_mode_count; ++i)
 		{
 			if (qwt_errors[i] >= 1e29f)
 			{
@@ -967,69 +924,46 @@ void determine_optimal_set_of_endpoint_formats_to_use(
 				continue;
 			}
 
+			float error_of_best_combination;
 			four_partitions_find_best_combination_for_bitcount(
 			    combined_best_error, formats_of_choice, qwt_bitcounts[i],
-			    &best_quantization_level, &best_quantization_level_mod,
-			    best_formats, &error_of_best_combination);
+			    best_quant_levels + i, best_quant_levels_mod + i,
+			    best_ep_formats[i], &error_of_best_combination);
 
-			error_of_best_combination += qwt_errors[i];
-
-			errors_of_best_combination[i] = error_of_best_combination;
-			best_quantization_levels[i] = best_quantization_level;
-			best_quantization_levels_mod[i] = best_quantization_level_mod;
-			best_ep_formats[i][0] = best_formats[0];
-			best_ep_formats[i][1] = best_formats[1];
-			best_ep_formats[i][2] = best_formats[2];
-			best_ep_formats[i][3] = best_formats[3];
+			errors_of_best_combination[i] = error_of_best_combination + qwt_errors[i];
 		}
 	}
 
-	// finally, go through the results and pick the best-looking modes.
+	// Go through the results and pick the best candidate modes
 	int best_error_weights[TUNE_MAX_TRIAL_CANDIDATES];
+	static_assert((MAX_WEIGHT_MODES % ASTCENC_SIMD_WIDTH) == 0,
+	              "MAX_WEIGHT_MODES should be multiple of ASTCENC_SIMD_WIDTH");
 	for (int i = 0; i < tune_candidate_limit; i++)
 	{
-#if 0
-		// reference; scalar code
-		float best_ep_error = 1e30f;
-		int best_error_index = -1;
-		for (int j = 0, npack = bsd->block_mode_packed_count; j < npack; ++j)
-		{
-			if (errors_of_best_combination[j] < best_ep_error && best_quantization_levels[j] >= 5)
-			{
-				best_ep_error = errors_of_best_combination[j];
-				best_error_index = j;
-			}
-		}
-#else
-		// find best mode, SIMD N-wide way
-		static_assert((MAX_WEIGHT_MODES % ASTCENC_SIMD_WIDTH) == 0, "MAX_WEIGHT_MODES should be multiple of ASTCENC_SIMD_WIDTH");
 		vint vbest_error_index(-1);
 		vfloat vbest_ep_error(1e30f);
 		vint lane_ids = vint::lane_id();
-		for (int j = 0, npack = bsd->block_mode_packed_count; j < npack; j += ASTCENC_SIMD_WIDTH)
+		for (int j = 0; j < bsd->block_mode_count; j += ASTCENC_SIMD_WIDTH)
 		{
 			vfloat err = vfloat(&errors_of_best_combination[j]);
 			vmask mask1 = err < vbest_ep_error;
-			vmask mask2 = vint(&best_quantization_levels[j]) > vint(4);
+			vmask mask2 = vint(&best_quant_levels[j]) > vint(4);
 			vmask mask = mask1 & mask2;
 			vbest_ep_error = select(vbest_ep_error, err, mask);
 			vbest_error_index = select(vbest_error_index, lane_ids, mask);
 			lane_ids = lane_ids + vint(ASTCENC_SIMD_WIDTH);
 		}
 
-		// pick final best mode from the SIMD result.
-		// note that if multiple SIMD lanes have "best" score,
-		// we want to pick one with the lowest index, i.e. what
-		// would happen if code was purely scalar.
-		vmask lanes_with_min_error = vbest_ep_error == hmin(vbest_ep_error);
-		// take smallest index from the SIMD lanes that had the best score
-		vbest_error_index = select(vint(0x7fffffff), vbest_error_index, lanes_with_min_error);
+		// Pick best mode from the SIMD result. If multiple SIMD lanes have
+		// the best score, pick the one with the lowest index.
+		vmask lanes_min_error = vbest_ep_error == hmin(vbest_ep_error);
+		vbest_error_index = select(vint(0x7FFFFFFF), vbest_error_index, lanes_min_error);
 		vbest_error_index = hmin(vbest_error_index);
-		int best_error_index = vbest_error_index.lane(0);
-#endif
+		int best_error_index = vbest_error_index.lane<0>();
 
 		best_error_weights[i] = best_error_index;
 
+		// Max the error for this candidate so we don't pick it again
 		if (best_error_index >= 0)
 		{
 			errors_of_best_combination[best_error_index] = 1e30f;
@@ -1038,21 +972,15 @@ void determine_optimal_set_of_endpoint_formats_to_use(
 
 	for (int i = 0; i < tune_candidate_limit; i++)
 	{
-        int weight = best_error_weights[i];
-		quantized_weight[i] = weight;
-		if (weight >= 0)
+		quantized_weight[i] = best_error_weights[i];
+		if (quantized_weight[i] >= 0)
 		{
-            int level = best_quantization_levels[weight];
-			int level_mod = best_quantization_levels_mod[weight];
-            
-            assert(level >= 0 && level < 21);
-            assert(level_mod >= 0 && level_mod < 21);
-            
-            quantization_level[i] = level;
-            quantization_level_mod[i] = level_mod;
+			quant_level[i] = best_quant_levels[best_error_weights[i]];
+			assert(quant_level[i] >= 0 && quant_level[i] < 21);
+			quant_level_mod[i] = best_quant_levels_mod[best_error_weights[i]];
 			for (int j = 0; j < partition_count; j++)
 			{
-				partition_format_specifiers[i][j] = best_ep_formats[weight][j];
+				partition_format_specifiers[i][j] = best_ep_formats[best_error_weights[i]][j];
 			}
 		}
 	}
diff --git a/libkram/astc-encoder/astcenc_platform_isa_detection.cpp b/libkram/astc-encoder/astcenc_platform_isa_detection.cpp
index 4ed1ee2d..3766aa51 100644
--- a/libkram/astc-encoder/astcenc_platform_isa_detection.cpp
+++ b/libkram/astc-encoder/astcenc_platform_isa_detection.cpp
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2020 Arm Limited
+// Copyright 2020-2021 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -15,7 +15,6 @@
 // under the License.
 // ----------------------------------------------------------------------------
 
-#if (ASTCENC_SSE > 0) || (ASTCENC_AVX > 0) || (ASTCENC_POPCNT > 0)
 
 /**
  * @brief Platform-specific function implementations.
@@ -25,26 +24,18 @@
 
 #include "astcenc_internal.h"
 
-int cpu_supports_sse42() {
-    return 1;
-}
-int cpu_supports_popcnt() {
-    return 1;
-}
-// kram only wants avx1 for now
-int cpu_supports_avx2() {
-    return 0;
-}
+#if (ASTCENC_SSE > 0)    || (ASTCENC_AVX > 0) || \
+    (ASTCENC_POPCNT > 0) || (ASTCENC_F16C > 0)
 
-#if 0
-static int g_cpu_has_sse42 = -1;
+static int g_cpu_has_sse41 = -1;
 static int g_cpu_has_avx2 = -1;
 static int g_cpu_has_popcnt = -1;
+static int g_cpu_has_f16c = -1;
 
 /* ============================================================================
    Platform code for Visual Studio
 ============================================================================ */
-#if defined(_MSC_VER)
+#if !defined(__clang__) && defined(_MSC_VER)
 #include <intrin.h>
 
 static void detect_cpu_isa()
@@ -54,15 +45,18 @@ static void detect_cpu_isa()
 	__cpuid(data, 0);
 	int num_id = data[0];
 
-	g_cpu_has_sse42 = 0;
+	g_cpu_has_sse41 = 0;
 	g_cpu_has_popcnt = 0;
+	g_cpu_has_f16c = 0;
 	if (num_id >= 1)
 	{
 		__cpuidex(data, 1, 0);
-		// SSE42 = Bank 1, ECX, bit 20
-		g_cpu_has_sse42 = data[2] & (1 << 20) ? 1 : 0;
+		// SSE41 = Bank 1, ECX, bit 19
+		g_cpu_has_sse41 = data[2] & (1 << 19) ? 1 : 0;
 		// POPCNT = Bank 1, ECX, bit 23
 		g_cpu_has_popcnt = data[2] & (1 << 23) ? 1 : 0;
+		// F16C = Bank 1, ECX, bit 29
+		g_cpu_has_f16c = data[2] & (1 << 29) ? 1 : 0;
 	}
 
 	g_cpu_has_avx2 = 0;
@@ -84,14 +78,17 @@ static void detect_cpu_isa()
 {
 	unsigned int data[4];
 
-	g_cpu_has_sse42 = 0;
+	g_cpu_has_sse41 = 0;
 	g_cpu_has_popcnt = 0;
+	g_cpu_has_f16c = 0;
 	if (__get_cpuid_count(1, 0, &data[0], &data[1], &data[2], &data[3]))
 	{
-		// SSE42 = Bank 1, ECX, bit 20
-		g_cpu_has_sse42 = data[2] & (1 << 20) ? 1 : 0;
+		// SSE41 = Bank 1, ECX, bit 19
+		g_cpu_has_sse41 = data[2] & (1 << 19) ? 1 : 0;
 		// POPCNT = Bank 1, ECX, bit 23
 		g_cpu_has_popcnt = data[2] & (1 << 23) ? 1 : 0;
+		// F16C = Bank 1, ECX, bit 29
+		g_cpu_has_f16c = data[2] & (1 << 29) ? 1 : 0;
 	}
 
 	g_cpu_has_avx2 = 0;
@@ -104,14 +101,14 @@ static void detect_cpu_isa()
 #endif
 
 /* Public function, see header file for detailed documentation */
-int cpu_supports_sse42()
+int cpu_supports_sse41()
 {
-	if (g_cpu_has_sse42 == -1)
+	if (g_cpu_has_sse41 == -1)
 	{
 		detect_cpu_isa();
 	}
 
-	return g_cpu_has_sse42;
+	return g_cpu_has_sse41;
 }
 
 /* Public function, see header file for detailed documentation */
@@ -125,6 +122,17 @@ int cpu_supports_popcnt()
 	return g_cpu_has_popcnt;
 }
 
+/* Public function, see header file for detailed documentation */
+int cpu_supports_f16c()
+{
+	if (g_cpu_has_f16c == -1)
+	{
+		detect_cpu_isa();
+	}
+
+	return g_cpu_has_f16c;
+}
+
 /* Public function, see header file for detailed documentation */
 int cpu_supports_avx2()
 {
@@ -137,4 +145,3 @@ int cpu_supports_avx2()
 }
 
 #endif
-#endif
diff --git a/libkram/astc-encoder/astcenc_quantization.cpp b/libkram/astc-encoder/astcenc_quantization.cpp
index 7ce26b08..afc10160 100644
--- a/libkram/astc-encoder/astcenc_quantization.cpp
+++ b/libkram/astc-encoder/astcenc_quantization.cpp
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2020 Arm Limited
+// Copyright 2011-2021 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -23,7 +23,7 @@
 
 #if !defined(ASTCENC_DECOMPRESS_ONLY)
 
-const uint8_t color_quantization_tables[21][256] = {
+const uint8_t color_quant_tables[21][256] = {
 	{
 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -406,7 +406,7 @@ const uint8_t color_quantization_tables[21][256] = {
 
 #endif
 
-const uint8_t color_unquantization_tables[21][256] = {
+const uint8_t color_unquant_tables[21][256] = {
 	{
 		0, 255
 	},
@@ -533,19 +533,18 @@ const uint8_t color_unquantization_tables[21][256] = {
 	}
 };
 
-// quantization_mode_table[integercount/2][bits] gives
-// us the quantization level for a given integer count and number of bits that
-// the integer may fit into. This is needed for color decoding,
-// and for the color encoding.
-int quantization_mode_table[17][128];
+// The quant_mode_table[integercount/2][bits] gives us the quantization
+// level for a given integer count and number of bits that the integer may fit
+// into. This is needed for color decoding, and for the color encoding.
+int8_t quant_mode_table[17][128];
 
-void build_quantization_mode_table()
+void build_quant_mode_table()
 {
 	for (int i = 0; i <= 16; i++)
 	{
 		for (int j = 0; j < 128; j++)
 		{
-			quantization_mode_table[i][j] = -1;
+			quant_mode_table[i][j] = -1;
 		}
 	}
 
@@ -553,10 +552,10 @@ void build_quantization_mode_table()
 	{
 		for (int j = 1; j <= 16; j++)
 		{
-			int p = compute_ise_bitcount(2 * j, (quantization_method) i);
+			int p = get_ise_sequence_bitcount(2 * j, (quant_method)i);
 			if (p < 128)
 			{
-				quantization_mode_table[j][p] = i;
+				quant_mode_table[j][p] = i;
 			}
 		}
 	}
@@ -566,13 +565,13 @@ void build_quantization_mode_table()
 		int largest_value_so_far = -1;
 		for (int j = 0; j < 128; j++)
 		{
-			if (quantization_mode_table[i][j] > largest_value_so_far)
+			if (quant_mode_table[i][j] > largest_value_so_far)
 			{
-				largest_value_so_far = quantization_mode_table[i][j];
+				largest_value_so_far = quant_mode_table[i][j];
 			}
 			else
 			{
-				quantization_mode_table[i][j] = largest_value_so_far;
+				quant_mode_table[i][j] = largest_value_so_far;
 			}
 		}
 	}
diff --git a/libkram/astc-encoder/astcenc_symbolic_physical.cpp b/libkram/astc-encoder/astcenc_symbolic_physical.cpp
index a486f884..894dc933 100644
--- a/libkram/astc-encoder/astcenc_symbolic_physical.cpp
+++ b/libkram/astc-encoder/astcenc_symbolic_physical.cpp
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2020 Arm Limited
+// Copyright 2011-2021 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -78,7 +78,7 @@ void symbolic_to_physical(
 		// This encodes separate constant-color blocks. There is currently
 		// no attempt to coalesce them into larger void-extents.
 
-		static const uint8_t cbytes[8] = { 0xFC, 0xFD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
+		static const uint8_t cbytes[8] { 0xFC, 0xFD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
 		for (int i = 0; i < 8; i++)
 		{
 			pcb.data[i] = cbytes[i];
@@ -99,7 +99,7 @@ void symbolic_to_physical(
 		// This encodes separate constant-color blocks. There is currently
 		// no attempt to coalesce them into larger void-extents.
 
-		static const uint8_t cbytes[8] = { 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
+		static const uint8_t cbytes[8]  { 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
 		for (int i = 0; i < 8; i++)
 		{
 			pcb.data[i] = cbytes[i];
@@ -124,34 +124,34 @@ void symbolic_to_physical(
 		weightbuf[i] = 0;
 	}
 
-	const decimation_table *const *ixtab2 = bsd.decimation_tables;
+	const decimation_table *const *dts = bsd.decimation_tables;
 
-	const int packed_index = bsd.block_mode_to_packed[scb.block_mode];
-	assert(packed_index >= 0 && packed_index < bsd.block_mode_packed_count);
-	const block_mode& bm = bsd.block_modes_packed[packed_index];
+	const int packed_index = bsd.block_mode_packed_index[scb.block_mode];
+	assert(packed_index >= 0 && packed_index < bsd.block_mode_count);
+	const block_mode& bm = bsd.block_modes[packed_index];
 
-	int weight_count = ixtab2[bm.decimation_mode]->num_weights;
-	int weight_quantization_method = bm.quantization_mode;
+	int weight_count = dts[bm.decimation_mode]->weight_count;
+	int weight_quant_method = bm.quant_mode;
 	int is_dual_plane = bm.is_dual_plane;
 
 	int real_weight_count = is_dual_plane ? 2 * weight_count : weight_count;
 
-	int bits_for_weights = compute_ise_bitcount(real_weight_count,
-	                                            (quantization_method) weight_quantization_method);
+	int bits_for_weights = get_ise_sequence_bitcount(real_weight_count,
+	                                                 (quant_method)weight_quant_method);
 
 	if (is_dual_plane)
 	{
 		uint8_t weights[64];
 		for (int i = 0; i < weight_count; i++)
 		{
-			weights[2 * i] = scb.plane1_weights[i];
-			weights[2 * i + 1] = scb.plane2_weights[i];
+			weights[2 * i] = scb.weights[i];
+			weights[2 * i + 1] = scb.weights[i + PLANE2_WEIGHTS_OFFSET];
 		}
-		encode_ise(weight_quantization_method, real_weight_count, weights, weightbuf, 0);
+		encode_ise(weight_quant_method, real_weight_count, weights, weightbuf, 0);
 	}
 	else
 	{
-		encode_ise(weight_quantization_method, weight_count, scb.plane1_weights, weightbuf, 0);
+		encode_ise(weight_quant_method, weight_count, scb.weights, weightbuf, 0);
 	}
 
 	for (int i = 0; i < 16; i++)
@@ -184,10 +184,7 @@ void symbolic_to_physical(
 			for (int i = 0; i < partition_count; i++)
 			{
 				int class_of_format = scb.color_formats[i] >> 2;
-				if (class_of_format < low_class)
-				{
-					low_class = class_of_format;
-				}
+				low_class = astc::min(class_of_format, low_class);
 			}
 
 			if (low_class == 3)
@@ -247,7 +244,7 @@ void symbolic_to_physical(
 	}
 
 	// then, encode an ISE based on them.
-	encode_ise(scb.color_quantization_level, valuecount_to_encode, values_to_encode, pcb.data, (scb.partition_count == 1 ? 17 : 19 + PARTITION_BITS));
+	encode_ise(scb.color_quant_level, valuecount_to_encode, values_to_encode, pcb.data, (scb.partition_count == 1 ? 17 : 19 + PARTITION_BITS));
 }
 
 void physical_to_symbolic(
@@ -260,7 +257,7 @@ void physical_to_symbolic(
 	scb.error_block = 0;
 
 	// get hold of the decimation tables.
-	const decimation_table *const *ixtab2 = bsd.decimation_tables;
+	const decimation_table *const *dts = bsd.decimation_tables;
 
 	// extract header fields
 	int block_mode = read_bits(11, 0, pcb.data);
@@ -327,17 +324,17 @@ void physical_to_symbolic(
 		return;
 	}
 
-	const int packed_index = bsd.block_mode_to_packed[block_mode];
+	const int packed_index = bsd.block_mode_packed_index[block_mode];
 	if (packed_index < 0)
 	{
 		scb.error_block = 1;
 		return;
 	}
-	assert(packed_index >= 0 && packed_index < bsd.block_mode_packed_count);
-	const struct block_mode& bm = bsd.block_modes_packed[packed_index];
+	assert(packed_index >= 0 && packed_index < bsd.block_mode_count);
+	const struct block_mode& bm = bsd.block_modes[packed_index];
 
-	int weight_count = ixtab2[bm.decimation_mode]->num_weights;
-	int weight_quantization_method = bm.quantization_mode;
+	int weight_count = dts[bm.decimation_mode]->weight_count;
+	int weight_quant_method = bm.quant_mode;
 	int is_dual_plane = bm.is_dual_plane;
 
 	int real_weight_count = is_dual_plane ? 2 * weight_count : weight_count;
@@ -352,24 +349,24 @@ void physical_to_symbolic(
 		bswapped[i] = bitrev8(pcb.data[15 - i]);
 	}
 
-	int bits_for_weights = compute_ise_bitcount(real_weight_count,
-												(quantization_method) weight_quantization_method);
+	int bits_for_weights = get_ise_sequence_bitcount(real_weight_count,
+	                                                 (quant_method)weight_quant_method);
 
 	int below_weights_pos = 128 - bits_for_weights;
 
 	if (is_dual_plane)
 	{
 		uint8_t indices[64];
-		decode_ise(weight_quantization_method, real_weight_count, bswapped, indices, 0);
+		decode_ise(weight_quant_method, real_weight_count, bswapped, indices, 0);
 		for (int i = 0; i < weight_count; i++)
 		{
-			scb.plane1_weights[i] = indices[2 * i];
-			scb.plane2_weights[i] = indices[2 * i + 1];
+			scb.weights[i] = indices[2 * i];
+			scb.weights[i + PLANE2_WEIGHTS_OFFSET] = indices[2 * i + 1];
 		}
 	}
 	else
 	{
-		decode_ise(weight_quantization_method, weight_count, bswapped, scb.plane1_weights, 0);
+		decode_ise(weight_quant_method, weight_count, bswapped, scb.weights, 0);
 	}
 
 	if (is_dual_plane && partition_count == 4)
@@ -443,7 +440,7 @@ void physical_to_symbolic(
 	}
 
 	// then, determine the color endpoint format to use for these integers
-	static const int color_bits_arr[5] = { -1, 115 - 4, 113 - 4 - PARTITION_BITS, 113 - 4 - PARTITION_BITS, 113 - 4 - PARTITION_BITS };
+	static const int color_bits_arr[5] { -1, 115 - 4, 113 - 4 - PARTITION_BITS, 113 - 4 - PARTITION_BITS, 113 - 4 - PARTITION_BITS };
 	int color_bits = color_bits_arr[partition_count] - bits_for_weights - encoded_type_highpart_size;
 	if (is_dual_plane)
 	{
@@ -455,16 +452,16 @@ void physical_to_symbolic(
 		color_bits = 0;
 	}
 
-	int color_quantization_level = quantization_mode_table[color_integer_count >> 1][color_bits];
-	scb.color_quantization_level = color_quantization_level;
-	if (color_quantization_level < 4)
+	int color_quant_level = quant_mode_table[color_integer_count >> 1][color_bits];
+	scb.color_quant_level = color_quant_level;
+	if (color_quant_level < 4)
 	{
 		scb.error_block = 1;
 	}
 
 	// then unpack the integer-bits
 	uint8_t values_to_decode[32];
-	decode_ise(color_quantization_level, color_integer_count, pcb.data, values_to_decode, (partition_count == 1 ? 17 : 19 + PARTITION_BITS));
+	decode_ise(color_quant_level, color_integer_count, pcb.data, values_to_decode, (partition_count == 1 ? 17 : 19 + PARTITION_BITS));
 
 	// and distribute them over the endpoint types
 	int valuecount_to_decode = 0;
diff --git a/libkram/astc-encoder/astcenc_vecmathlib.h b/libkram/astc-encoder/astcenc_vecmathlib.h
index a1b90661..aed6752c 100644
--- a/libkram/astc-encoder/astcenc_vecmathlib.h
+++ b/libkram/astc-encoder/astcenc_vecmathlib.h
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2019-2020 Arm Limited
+// Copyright 2019-2021 Arm Limited
+// Copyright 2008 Jose Fonseca
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -16,724 +17,508 @@
 // ----------------------------------------------------------------------------
 
 /*
- * This module implements flexible N-wide float and integer vectors, where the
- * width can be selected at compile time depending on the underlying ISA. It
- * is not possible to mix different ISAs (or vector widths) in a single file -
- * the ISA is statically selected when the header is first included.
+ * This module implements vector support for floats, ints, and vector lane
+ * control masks. It provides access to both explicit vector width types, and
+ * flexible N-wide types where N can be determined at compile time.
  *
- * ISA support is provided for:
+ * The design of this module encourages use of vector length agnostic code, via
+ * the vint, vfloat, and vmask types. These will take on the widest SIMD vector
+ * with that is available at compile time. The current vector width is
+ * accessible for e.g. loop strides via the ASTCENC_SIMD_WIDTH constant.
+ *
+ * Explicit scalar types are acessible via the vint1, vfloat1, vmask1 types.
+ * These are provided primarily for prototyping and algorithm debug of VLA
+ * implementations.
+ *
+ * Explicit 4-wide types are accessible via the vint4, vfloat4, and vmask4
+ * types. These are provided for use by VLA code, but are also expected to be
+ * used as a fixed-width type and will supported a reference C++ fallback for
+ * use on platforms without SIMD intrinsics.
+ *
+ * Explicit 8-wide types are accessible via the vint8, vfloat8, and vmask8
+ * types. These are provide for use by VLA code, and are not expected to be
+ * used as a fixed-width type in normal code. No reference C implementation is
+ * provided on platforms without underlying SIMD intrinsics.
+ *
+ * With the current implementation ISA support is provided for:
  *
  *     * 1-wide for scalar reference.
- *     * 4-wide for SSE2.
- *     * 4-wide for SSE4.2.
- *     * 8-wide for AVX2.
+ *     * 4-wide for Armv8-A NEON.
+ *     * 4-wide for x86-64 SSE2.
+ *     * 4-wide for x86-64 SSE4.1.
+ *     * 8-wide for x86-64 AVX2.
  *
  */
 
 #ifndef ASTC_VECMATHLIB_H_INCLUDED
 #define ASTC_VECMATHLIB_H_INCLUDED
 
-// Kram uses SSE2Neon on ARM, so needs intrinsics in use but not the include
-//#if /* USE_SSE && */ (ASTCENC_SSE != 0 || ASTCENC_AVX != 0)
-//    #include <immintrin.h>
-//#endif
+#if ASTCENC_SSE != 0 || ASTCENC_AVX != 0
+	#include <immintrin.h>
+#elif ASTCENC_NEON != 0
+	#include <arm_neon.h>
+#endif
 
-// This conflicts with simd.h library definition
-#if defined(_MSC_VER)
+#if !defined(__clang__) && defined(_MSC_VER)
 	#define ASTCENC_SIMD_INLINE __forceinline
 #elif defined(__GNUC__) && !defined(__clang__)
-	#define ASTCENC_SIMD_INLINE __attribute__((unused, always_inline)) inline
+	#define ASTCENC_SIMD_INLINE __attribute__((always_inline)) inline
 #else
-	#define ASTCENC_SIMD_INLINE __attribute__((unused, always_inline, nodebug)) inline
+	#define ASTCENC_SIMD_INLINE __attribute__((always_inline, nodebug)) inline
 #endif
 
 #if ASTCENC_AVX >= 2
-	#define ASTCENC_SIMD_ISA_AVX2
-#elif ASTCENC_SSE >= 20
-	#define ASTCENC_SIMD_ISA_SSE
-#else
-	#define ASTCENC_SIMD_ISA_SCALAR
-#endif
+	/* If we have AVX2 expose 8-wide VLA. */
+	#include "astcenc_vecmathlib_sse_4.h"
+	#include "astcenc_vecmathlib_common_4.h"
+	#include "astcenc_vecmathlib_avx2_8.h"
 
+	#define ASTCENC_SIMD_WIDTH 8
 
-// ----------------------------------------------------------------------------
-// AVX2 8-wide implementation
+	using vfloat = vfloat8;
+	using vint = vint8;
+	using vmask = vmask8;
 
-#ifdef ASTCENC_SIMD_ISA_AVX2
+	constexpr auto loada = vfloat8::loada;
+	constexpr auto load1 = vfloat8::load1;
 
-#define ASTCENC_SIMD_WIDTH 8
-
-// N-wide float
-struct vfloat
-{
-	ASTCENC_SIMD_INLINE vfloat() {}
-	// Initialize with N floats from an unaligned memory address.
-	// Using loada() when address is aligned might be more optimal.
-	ASTCENC_SIMD_INLINE explicit vfloat(const float *p) { m = _mm256_loadu_ps(p); }
-	// Initialize with the same given float value in all lanes.
-	ASTCENC_SIMD_INLINE explicit vfloat(float v) { m = _mm256_set1_ps(v); }
-
-	ASTCENC_SIMD_INLINE explicit vfloat(__m256 v) { m = v; }
-
-	// Get SIMD lane #i value.
-	ASTCENC_SIMD_INLINE float lane(int i) const
-	{
-		#ifdef _MSC_VER
-		return m.m256_f32[i];
-		#else
-		union { __m256 m; float f[ASTCENC_SIMD_WIDTH]; } cvt;
-		cvt.m = m;
-		return cvt.f[i];
-		#endif
-	}
+#elif ASTCENC_SSE >= 20
+	/* If we have SSE expose 4-wide VLA, and 4-wide fixed width. */
+	#include "astcenc_vecmathlib_sse_4.h"
+	#include "astcenc_vecmathlib_common_4.h"
 
-	// Float vector with all zero values
-	static ASTCENC_SIMD_INLINE vfloat zero() { return vfloat(_mm256_setzero_ps()); }
+	#define ASTCENC_SIMD_WIDTH 4
 
-	// Float vector with each lane having the lane index (0, 1, 2, ...)
-	static ASTCENC_SIMD_INLINE vfloat lane_id() { return vfloat(_mm256_set_ps(7, 6, 5, 4, 3, 2, 1, 0)); }
+	using vfloat = vfloat4;
+	using vint = vint4;
+	using vmask = vmask4;
 
-	__m256 m;
-};
+	constexpr auto loada = vfloat4::loada;
+	constexpr auto load1 = vfloat4::load1;
 
-// N-wide integer (32 bit in each lane)
-struct vint
-{
-	ASTCENC_SIMD_INLINE vint() {}
-	// Initialize with N ints from an unaligned memory address.
-	ASTCENC_SIMD_INLINE explicit vint(const int *p) { m = _mm256_loadu_si256((const __m256i*)p); }
-	// Initialize with the same given integer value in all lanes.
-	ASTCENC_SIMD_INLINE explicit vint(int v) { m = _mm256_set1_epi32(v); }
+#elif ASTCENC_NEON > 0
+	/* If we have NEON expose 4-wide VLA. */
+	#include "astcenc_vecmathlib_neon_4.h"
+	#include "astcenc_vecmathlib_common_4.h"
 
-	ASTCENC_SIMD_INLINE explicit vint(__m256i v) { m = v; }
+	#define ASTCENC_SIMD_WIDTH 4
 
-	// Get SIMD lane #i value
-	ASTCENC_SIMD_INLINE int lane(int i) const
-	{
-		#ifdef _MSC_VER
-		return m.m256i_i32[i];
-		#else
-		union { __m256i m; int f[ASTCENC_SIMD_WIDTH]; } cvt;
-		cvt.m = m;
-		return cvt.f[i];
-		#endif
-	}
+	using vfloat = vfloat4;
+	using vint = vint4;
+	using vmask = vmask4;
 
-	// Integer vector with each lane having the lane index (0, 1, 2, ...)
-	static ASTCENC_SIMD_INLINE vint lane_id() { return vint(_mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0)); }
+	constexpr auto loada = vfloat4::loada;
+	constexpr auto load1 = vfloat4::load1;
 
-	__m256i m;
-};
+#else
+	// If we have nothing expose 4-wide VLA, and 4-wide fixed width.
+
+	// Note: We no longer expose the 1-wide scalar fallback because it is not
+	// invariant with the 4-wide path due to algorithms that use horizontal
+	// operations that accumulate a local vector sum before accumulating into
+	// a running sum.
+	//
+	// For 4 items adding into an accumulator using 1-wide vectors the sum is:
+	//
+	//     result = ((((sum + l0) + l1) + l2) + l3)
+	//
+    // ... whereas the accumulator for a 4-wide vector sum is:
+	//
+	//     result = sum + ((l0 + l2) + (l1 + l3))
+	//
+	// In "normal maths" this is the same, but the floating point reassociation
+	// differences mean that these will not produce the same result.
+
+	#include "astcenc_vecmathlib_none_4.h"
+	#include "astcenc_vecmathlib_common_4.h"
+
+	#define ASTCENC_SIMD_WIDTH 4
+
+	using vfloat = vfloat4;
+	using vint = vint4;
+	using vmask = vmask4;
+
+	constexpr auto loada = vfloat4::loada;
+	constexpr auto load1 = vfloat4::load1;
+#endif
 
-// N-wide comparison mask. vmask is a result of comparison operators,
-// and an argument for select() function below.
-struct vmask
-{
-	ASTCENC_SIMD_INLINE explicit vmask(__m256 v) { m = v; }
-	ASTCENC_SIMD_INLINE explicit vmask(__m256i v) { m = _mm256_castsi256_ps(v); }
-	__m256 m;
-};
-
-// Initialize with one float in all SIMD lanes, from an aligned memory address.
-ASTCENC_SIMD_INLINE vfloat load1a(const float* p) { return vfloat(_mm256_broadcast_ss(p)); }
-// Initialize with N floats from an aligned memory address.
-ASTCENC_SIMD_INLINE vfloat loada(const float* p) { return vfloat(_mm256_load_ps(p)); }
-
-// Per-lane float arithmetic operations
-ASTCENC_SIMD_INLINE vfloat operator+ (vfloat a, vfloat b) { a.m = _mm256_add_ps(a.m, b.m); return a; }
-ASTCENC_SIMD_INLINE vfloat operator- (vfloat a, vfloat b) { a.m = _mm256_sub_ps(a.m, b.m); return a; }
-ASTCENC_SIMD_INLINE vfloat operator* (vfloat a, vfloat b) { a.m = _mm256_mul_ps(a.m, b.m); return a; }
-ASTCENC_SIMD_INLINE vfloat operator/ (vfloat a, vfloat b) { a.m = _mm256_div_ps(a.m, b.m); return a; }
-
-// Per-lane float comparison operations
-ASTCENC_SIMD_INLINE vmask operator==(vfloat a, vfloat b) { return vmask(_mm256_cmp_ps(a.m, b.m, _CMP_EQ_OQ)); }
-ASTCENC_SIMD_INLINE vmask operator!=(vfloat a, vfloat b) { return vmask(_mm256_cmp_ps(a.m, b.m, _CMP_NEQ_OQ)); }
-ASTCENC_SIMD_INLINE vmask operator< (vfloat a, vfloat b) { return vmask(_mm256_cmp_ps(a.m, b.m, _CMP_LT_OQ)); }
-ASTCENC_SIMD_INLINE vmask operator> (vfloat a, vfloat b) { return vmask(_mm256_cmp_ps(a.m, b.m, _CMP_GT_OQ)); }
-ASTCENC_SIMD_INLINE vmask operator<=(vfloat a, vfloat b) { return vmask(_mm256_cmp_ps(a.m, b.m, _CMP_LE_OQ)); }
-ASTCENC_SIMD_INLINE vmask operator>=(vfloat a, vfloat b) { return vmask(_mm256_cmp_ps(a.m, b.m, _CMP_GE_OQ)); }
-
-// Logical operations on comparison mask values
-ASTCENC_SIMD_INLINE vmask operator| (vmask a, vmask b) { return vmask(_mm256_or_ps(a.m, b.m)); }
-ASTCENC_SIMD_INLINE vmask operator& (vmask a, vmask b) { return vmask(_mm256_and_ps(a.m, b.m)); }
-ASTCENC_SIMD_INLINE vmask operator^ (vmask a, vmask b) { return vmask(_mm256_xor_ps(a.m, b.m)); }
-
-// Returns a 8-bit code where bit0..bit7 map to lanes
-ASTCENC_SIMD_INLINE unsigned mask(vmask v) { return _mm256_movemask_ps(v.m); }
-// Whether any lane in the comparison mask is set
-ASTCENC_SIMD_INLINE bool any(vmask v) { return mask(v) != 0; }
-// Whether all lanes in the comparison mask are set
-ASTCENC_SIMD_INLINE bool all(vmask v) { return mask(v) == 0xFF; }
-
-// Per-lane float min & max
-ASTCENC_SIMD_INLINE vfloat min(vfloat a, vfloat b) { a.m = _mm256_min_ps(a.m, b.m); return a; }
-ASTCENC_SIMD_INLINE vfloat max(vfloat a, vfloat b) { a.m = _mm256_max_ps(a.m, b.m); return a; }
-
-// Per-lane clamp to 0..1 range
-ASTCENC_SIMD_INLINE vfloat saturate(vfloat a)
+/**
+ * @brief Round a count down to the largest multiple of 8.
+ *
+ * @param count   The unrounded value.
+ *
+ * @return The rounded value.
+ */
+ASTCENC_SIMD_INLINE int round_down_to_simd_multiple_8(int count)
 {
-	__m256 zero = _mm256_setzero_ps();
-	__m256 one = _mm256_set1_ps(1.0f);
-	return vfloat(_mm256_min_ps(_mm256_max_ps(a.m, zero), one));
+	return count & ~(8 - 1);
 }
 
-ASTCENC_SIMD_INLINE vfloat abs(vfloat x)
+/**
+ * @brief Round a count down to the largest multiple of 4.
+ *
+ * @param count   The unrounded value.
+ *
+ * @return The rounded value.
+ */
+ASTCENC_SIMD_INLINE int round_down_to_simd_multiple_4(int count)
 {
-	__m256 msk = _mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffff));
-	return vfloat(_mm256_and_ps(x.m, msk));
+	return count & ~(4 - 1);
 }
 
-// Round to nearest integer (nearest even for .5 cases)
-ASTCENC_SIMD_INLINE vfloat round(vfloat v)
+/**
+ * @brief Round a count down to the largest multiple of the SIMD width.
+ *
+ * Assumption that the vector width is a power of two ...
+ *
+ * @param count   The unrounded value.
+ *
+ * @return The rounded value.
+ */
+ASTCENC_SIMD_INLINE int round_down_to_simd_multiple_vla(int count)
 {
-	return vfloat(_mm256_round_ps(v.m, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
+	return count & ~(ASTCENC_SIMD_WIDTH - 1);
 }
 
-// Per-lane convert to integer (truncate)
-ASTCENC_SIMD_INLINE vint floatToInt(vfloat v) { return vint(_mm256_cvttps_epi32(v.m)); }
-
-// Reinterpret-bitcast integer vector as a float vector (this is basically a no-op on the CPU)
-ASTCENC_SIMD_INLINE vfloat intAsFloat(vint v) { return vfloat(_mm256_castsi256_ps(v.m)); }
-// Reinterpret-bitcast float vector as an integer vector (this is basically a no-op on the CPU)
-ASTCENC_SIMD_INLINE vint floatAsInt(vfloat v) { return vint(_mm256_castps_si256(v.m)); }
-
-ASTCENC_SIMD_INLINE vint operator~ (vint a) { return vint(_mm256_xor_si256(a.m, _mm256_set1_epi32(-1))); }
-ASTCENC_SIMD_INLINE vmask operator~ (vmask a) { return vmask(_mm256_xor_si256(_mm256_castps_si256(a.m), _mm256_set1_epi32(-1))); }
-
-// Per-lane arithmetic integer operations
-ASTCENC_SIMD_INLINE vint operator+ (vint a, vint b) { a.m = _mm256_add_epi32(a.m, b.m); return a; }
-ASTCENC_SIMD_INLINE vint operator- (vint a, vint b) { a.m = _mm256_sub_epi32(a.m, b.m); return a; }
-
-// Per-lane logical bit operations
-ASTCENC_SIMD_INLINE vint operator| (vint a, vint b) { return vint(_mm256_or_si256(a.m, b.m)); }
-ASTCENC_SIMD_INLINE vint operator& (vint a, vint b) { return vint(_mm256_and_si256(a.m, b.m)); }
-ASTCENC_SIMD_INLINE vint operator^ (vint a, vint b) { return vint(_mm256_xor_si256(a.m, b.m)); }
-
-// Per-lane integer comparison operations
-ASTCENC_SIMD_INLINE vmask operator< (vint a, vint b) { return vmask(_mm256_cmpgt_epi32(b.m, a.m)); }
-ASTCENC_SIMD_INLINE vmask operator> (vint a, vint b) { return vmask(_mm256_cmpgt_epi32(a.m, b.m)); }
-ASTCENC_SIMD_INLINE vmask operator==(vint a, vint b) { return vmask(_mm256_cmpeq_epi32(a.m, b.m)); }
-ASTCENC_SIMD_INLINE vmask operator!=(vint a, vint b) { return ~vmask(_mm256_cmpeq_epi32(a.m, b.m)); }
-
-// Per-lane integer min & max
-ASTCENC_SIMD_INLINE vint min(vint a, vint b) { a.m = _mm256_min_epi32(a.m, b.m); return a; }
-ASTCENC_SIMD_INLINE vint max(vint a, vint b) { a.m = _mm256_max_epi32(a.m, b.m); return a; }
-
-// Horizontal minimum - returns vector with all lanes
-// set to the minimum value of the input vector.
-ASTCENC_SIMD_INLINE vfloat hmin(vfloat v)
+/**
+ * @brief Round a count up to the largest multiple of the SIMD width.
+ *
+ * Assumption that the vector width is a power of two ...
+ *
+ * @param count   The unrounded value.
+ *
+ * @return The rounded value.
+ */
+ASTCENC_SIMD_INLINE int round_up_to_simd_multiple_vla(int count)
 {
-	__m128 vlow = _mm256_castps256_ps128(v.m);
-	__m128 vhigh = _mm256_extractf128_ps(v.m, 1);
-	vlow  = _mm_min_ps(vlow, vhigh);
-
-	// First do an horizontal reduction.                                // v = [ D C | B A ]
-	__m128 shuf = _mm_shuffle_ps(vlow, vlow, _MM_SHUFFLE(2, 3, 0, 1));  //     [ C D | A B ]
-	__m128 mins = _mm_min_ps(vlow, shuf);                            // mins = [ D+C C+D | B+A A+B ]
-	shuf        = _mm_movehl_ps(shuf, mins);                         //        [   C   D | D+C C+D ]
-	mins        = _mm_min_ss(mins, shuf);
-
-
-	// This is the most logical implementation, but the convenience intrinsic
-	// is missing on older compilers (supported in g++ 9 and clang++ 9).
-	//__m256i r = _mm256_set_m128(m, m)
-	__m256 r = _mm256_insertf128_ps(_mm256_castps128_ps256(mins), mins, 1);
-
-	vfloat vmin(_mm256_permute_ps(r, 0));
-	return vmin;
+	int multiples = (count + ASTCENC_SIMD_WIDTH - 1) / ASTCENC_SIMD_WIDTH;
+	return multiples * ASTCENC_SIMD_WIDTH;
 }
 
-ASTCENC_SIMD_INLINE vint hmin(vint v)
+/**
+ * @brief Return @c a with lanes negated if the @c b lane is negative.
+ */
+ASTCENC_SIMD_INLINE vfloat change_sign(vfloat a, vfloat b)
 {
-	__m128i m = _mm_min_epi32(_mm256_extracti128_si256(v.m, 0), _mm256_extracti128_si256(v.m, 1));
-	m = _mm_min_epi32(m, _mm_shuffle_epi32(m, _MM_SHUFFLE(0,0,3,2)));
-	m = _mm_min_epi32(m, _mm_shuffle_epi32(m, _MM_SHUFFLE(0,0,0,1)));
-	m = _mm_shuffle_epi32(m, _MM_SHUFFLE(0,0,0,0));
-
-	// This is the most logical implementation, but the convenience intrinsic
-	// is missing on older compilers (supported in g++ 9 and clang++ 9).
-	//__m256i r = _mm256_set_m128i(m, m)
-	__m256i r = _mm256_insertf128_si256(_mm256_castsi128_si256(m), m, 1);
-	vint vmin(r);
-	return vmin;
+	vint ia = float_as_int(a);
+	vint ib = float_as_int(b);
+	vint sign_mask((int)0x80000000);
+	vint r = ia ^ (ib & sign_mask);
+	return int_as_float(r);
 }
 
-// Store float vector into an aligned address.
-ASTCENC_SIMD_INLINE void store(vfloat v, float* ptr) { _mm256_store_ps(ptr, v.m); }
-// Store integer vector into an aligned address.
-ASTCENC_SIMD_INLINE void store(vint v, int* ptr) { _mm256_store_si256((__m256i*)ptr, v.m); }
-
-// Store lowest N (simd width) bytes of integer vector into an unaligned address.
-ASTCENC_SIMD_INLINE void store_nbytes(vint v, uint8_t* ptr)
+/**
+ * @brief Return fast, but approximate, vector atan(x).
+ *
+ * Max error of this implementaiton is 0.004883.
+ */
+ASTCENC_SIMD_INLINE vfloat atan(vfloat x)
 {
-	// This is the most logical implementation, but the convenience intrinsic
-	// is missing on older compilers (supported in g++ 9 and clang++ 9).
-	// _mm_storeu_si64(ptr, _mm256_extracti128_si256(v.m, 0))
-	_mm_storel_epi64((__m128i*)ptr, _mm256_extracti128_si256(v.m, 0));
+	vmask c = abs(x) > vfloat(1.0f);
+	vfloat z = change_sign(vfloat(astc::PI_OVER_TWO), x);
+	vfloat y = select(x, vfloat(1.0f) / x, c);
+	y = y / (y * y * vfloat(0.28f) + vfloat(1.0f));
+	return select(y, z - y, c);
 }
 
-// SIMD "gather" - load each lane with base[indices[i]]
-ASTCENC_SIMD_INLINE vfloat gatherf(const float* base, vint indices)
-{
-	return vfloat(_mm256_i32gather_ps(base, indices.m, 4));
-}
-ASTCENC_SIMD_INLINE vint gatheri(const int* base, vint indices)
+/**
+ * @brief Return fast, but approximate, vector atan2(x, y).
+ */
+ASTCENC_SIMD_INLINE vfloat atan2(vfloat y, vfloat x)
 {
-	return vint(_mm256_i32gather_epi32(base, indices.m, 4));
+	vfloat z = atan(abs(y / x));
+	vmask xmask = vmask(float_as_int(x).m);
+	return change_sign(select(z, vfloat(astc::PI) - z, xmask), y);
 }
 
-// Pack low 8 bits of each lane into low 64 bits of result.
-ASTCENC_SIMD_INLINE vint pack_low_bytes(vint v)
+/*
+ * @brief Factory that returns a unit length 4 component vfloat4.
+ */
+static ASTCENC_SIMD_INLINE vfloat4 unit4()
 {
-	__m256i shuf = _mm256_set_epi8(0, 0, 0, 0,  0,  0,  0,  0,
-	                               0, 0, 0, 0, 28, 24, 20, 16,
-	                               0, 0, 0, 0,  0,  0,  0,  0,
-	                               0, 0, 0, 0, 12,  8,  4,  0);
-	__m256i a = _mm256_shuffle_epi8(v.m, shuf);
-	__m128i a0 = _mm256_extracti128_si256(a, 0);
-	__m128i a1 = _mm256_extracti128_si256(a, 1);
-	__m128i b = _mm_unpacklo_epi32(a0, a1);
-
-	// This is the most logical implementation, but the convenience intrinsic
-	// is missing on older compilers (supported in g++ 9 and clang++ 9).
-	//__m256i r = _mm256_set_m128i(b, b)
-	__m256i r = _mm256_insertf128_si256(_mm256_castsi128_si256(b), b, 1);
-	return vint(r);
+	return vfloat4(0.5f);
 }
 
-// "select", i.e. highbit(cond) ? b : a
-ASTCENC_SIMD_INLINE vfloat select(vfloat a, vfloat b, vmask cond)
-{
-	return vfloat(_mm256_blendv_ps(a.m, b.m, cond.m));
-}
-ASTCENC_SIMD_INLINE vint select(vint a, vint b, vmask cond)
+/**
+ * @brief Factory that returns a unit length 3 component vfloat4.
+ */
+static ASTCENC_SIMD_INLINE vfloat4 unit3()
 {
-	return vint(_mm256_blendv_epi8(a.m, b.m, _mm256_castps_si256(cond.m)));
+	return vfloat4(0.57735f, 0.57735f, 0.57735f, 0.0f);
 }
 
-ASTCENC_SIMD_INLINE void print(vfloat a)
+/**
+ * @brief Normalize a non-zero length vector to unit length.
+ */
+static ASTCENC_SIMD_INLINE vfloat4 normalize(vfloat4 a)
 {
-	alignas(ASTCENC_VECALIGN) float v[8];
-	store(a, v);
-    KLOGD("Astcenc", "v8_f32:\n  %0.4f %0.4f %0.4f %0.4f %0.4f %0.4f %0.4f %0.4f\n",
-	       (double)v[0], (double)v[1], (double)v[2], (double)v[3],
-	       (double)v[4], (double)v[5], (double)v[6], (double)v[7]);
+	vfloat4 length = dot(a, a);
+	return a / sqrt(length);
 }
 
-ASTCENC_SIMD_INLINE void print(vint a)
+/**
+ * @brief Normalize a vector, returning @c safe if len is zero.
+ */
+static ASTCENC_SIMD_INLINE vfloat4 normalize_safe(vfloat4 a, vfloat4 safe)
 {
-	alignas(ASTCENC_VECALIGN) int v[8];
-	store(a, v);
-    KLOGD("Astcenc", "v8_i32:\n  %8u %8u %8u %8u %8u %8u %8u %8u\n",
-	       v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);
-}
-
-#endif // #ifdef ASTCENC_SIMD_ISA_AVX2
+	vfloat4 length = dot(a, a);
+	if (length.lane<0>() != 0.0f)
+	{
+		return a / sqrt(length);
+	}
 
+	return safe;
+}
 
-// ----------------------------------------------------------------------------
-// SSE 4-wide implementation
-// Uses SSE2 as baseline, optionally SSE4.x instructions based on ASTCENC_SSE value
 
-#ifdef ASTCENC_SIMD_ISA_SSE
 
-#define ASTCENC_SIMD_WIDTH 4
+#define POLY0(x, c0)                     (                                     c0)
+#define POLY1(x, c0, c1)                 ((POLY0(x, c1) * x)                 + c0)
+#define POLY2(x, c0, c1, c2)             ((POLY1(x, c1, c2) * x)             + c0)
+#define POLY3(x, c0, c1, c2, c3)         ((POLY2(x, c1, c2, c3) * x)         + c0)
+#define POLY4(x, c0, c1, c2, c3, c4)     ((POLY3(x, c1, c2, c3, c4) * x)     + c0)
+#define POLY5(x, c0, c1, c2, c3, c4, c5) ((POLY4(x, c1, c2, c3, c4, c5) * x) + c0)
 
-struct vfloat
+/**
+ * @brief Compute an approximate exp2(x) for each lane in the vector.
+ *
+ * Based on 5th degree minimax polynomials, ported from this blog
+ * https://jrfonseca.blogspot.com/2008/09/fast-sse2-pow-tables-or-polynomials.html
+ */
+static ASTCENC_SIMD_INLINE vfloat4 exp2(vfloat4 x)
 {
-	ASTCENC_SIMD_INLINE vfloat() {}
-	ASTCENC_SIMD_INLINE explicit vfloat(const float *p) { m = _mm_loadu_ps(p); }
-	ASTCENC_SIMD_INLINE explicit vfloat(float v) { m = _mm_set_ps1(v); }
-	ASTCENC_SIMD_INLINE explicit vfloat(__m128 v) { m = v; }
-	ASTCENC_SIMD_INLINE float lane(int i) const
-	{
-		#ifdef _MSC_VER
-		return m.m128_f32[i];
-		#else
-		union { __m128 m; float f[ASTCENC_SIMD_WIDTH]; } cvt;
-		cvt.m = m;
-		return cvt.f[i];
-		#endif
-	}
-	static ASTCENC_SIMD_INLINE vfloat zero() { return vfloat(_mm_setzero_ps()); }
-	static ASTCENC_SIMD_INLINE vfloat lane_id() { return vfloat(_mm_set_ps(3, 2, 1, 0)); }
-	__m128 m;
-};
+	x = clamp(-126.99999f, 129.0f, x);
 
-struct vint
-{
-	ASTCENC_SIMD_INLINE vint() {}
-	ASTCENC_SIMD_INLINE explicit vint(const int *p) { m = _mm_load_si128((const __m128i*)p); }
-	ASTCENC_SIMD_INLINE explicit vint(int v) { m = _mm_set1_epi32(v); }
-	ASTCENC_SIMD_INLINE explicit vint(__m128i v) { m = v; }
-	ASTCENC_SIMD_INLINE int lane(int i) const
-	{
-		#ifdef _MSC_VER
-		return m.m128i_i32[i];
-		#else
-		union { __m128i m; int f[ASTCENC_SIMD_WIDTH]; } cvt;
-		cvt.m = m;
-		return cvt.f[i];
-		#endif
-	}
-	static ASTCENC_SIMD_INLINE vint lane_id() { return vint(_mm_set_epi32(3, 2, 1, 0)); }
-	__m128i m;
-};
+	vint4 ipart = float_to_int(x - 0.5f);
+	vfloat4 fpart = x - int_to_float(ipart);
 
-struct vmask
-{
-	ASTCENC_SIMD_INLINE explicit vmask(__m128 v) { m = v; }
-	ASTCENC_SIMD_INLINE explicit vmask(__m128i v) { m = _mm_castsi128_ps(v); }
-	__m128 m;
-};
-
-
-ASTCENC_SIMD_INLINE vfloat load1a(const float* p) { return vfloat(_mm_load_ps1(p)); }
-ASTCENC_SIMD_INLINE vfloat loada(const float* p) { return vfloat(_mm_load_ps(p)); }
-
-ASTCENC_SIMD_INLINE vfloat operator+ (vfloat a, vfloat b) { a.m = _mm_add_ps(a.m, b.m); return a; }
-ASTCENC_SIMD_INLINE vfloat operator- (vfloat a, vfloat b) { a.m = _mm_sub_ps(a.m, b.m); return a; }
-ASTCENC_SIMD_INLINE vfloat operator* (vfloat a, vfloat b) { a.m = _mm_mul_ps(a.m, b.m); return a; }
-ASTCENC_SIMD_INLINE vfloat operator/ (vfloat a, vfloat b) { a.m = _mm_div_ps(a.m, b.m); return a; }
-ASTCENC_SIMD_INLINE vmask operator==(vfloat a, vfloat b) { return vmask(_mm_cmpeq_ps(a.m, b.m)); }
-ASTCENC_SIMD_INLINE vmask operator!=(vfloat a, vfloat b) { return vmask(_mm_cmpneq_ps(a.m, b.m)); }
-ASTCENC_SIMD_INLINE vmask operator< (vfloat a, vfloat b) { return vmask(_mm_cmplt_ps(a.m, b.m)); }
-ASTCENC_SIMD_INLINE vmask operator> (vfloat a, vfloat b) { return vmask(_mm_cmpgt_ps(a.m, b.m)); }
-ASTCENC_SIMD_INLINE vmask operator<=(vfloat a, vfloat b) { return vmask(_mm_cmple_ps(a.m, b.m)); }
-ASTCENC_SIMD_INLINE vmask operator>=(vfloat a, vfloat b) { return vmask(_mm_cmpge_ps(a.m, b.m)); }
-ASTCENC_SIMD_INLINE vmask operator| (vmask a, vmask b) { return vmask(_mm_or_ps(a.m, b.m)); }
-ASTCENC_SIMD_INLINE vmask operator& (vmask a, vmask b) { return vmask(_mm_and_ps(a.m, b.m)); }
-ASTCENC_SIMD_INLINE vmask operator^ (vmask a, vmask b) { return vmask(_mm_xor_ps(a.m, b.m)); }
-// Returns a 4-bit code where bit0..bit3 is X..W
-ASTCENC_SIMD_INLINE unsigned mask(vmask v) { return _mm_movemask_ps(v.m); }
-ASTCENC_SIMD_INLINE bool any(vmask v) { return mask(v) != 0; }
-ASTCENC_SIMD_INLINE bool all(vmask v) { return mask(v) == 0xF; }
-
-ASTCENC_SIMD_INLINE vfloat min(vfloat a, vfloat b) { a.m = _mm_min_ps(a.m, b.m); return a; }
-ASTCENC_SIMD_INLINE vfloat max(vfloat a, vfloat b) { a.m = _mm_max_ps(a.m, b.m); return a; }
-ASTCENC_SIMD_INLINE vfloat saturate(vfloat a)
-{
-	__m128 zero = _mm_setzero_ps();
-	__m128 one = _mm_set1_ps(1.0f);
-	return vfloat(_mm_min_ps(_mm_max_ps(a.m, zero), one));
-}
+	// Integer contrib, using 1 << ipart
+	vfloat4 iexp = int_as_float(lsl<23>(ipart + 127));
 
-ASTCENC_SIMD_INLINE vfloat abs(vfloat x)
-{
-	__m128 msk = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff));
-	return vfloat(_mm_and_ps(x.m, msk));
+	// Fractional contrib, using polynomial fit of 2^x in range [-0.5, 0.5)
+	vfloat4 fexp = POLY5(fpart,
+	                     9.9999994e-1f,
+	                     6.9315308e-1f,
+	                     2.4015361e-1f,
+	                     5.5826318e-2f,
+	                     8.9893397e-3f,
+	                     1.8775767e-3f);
+
+	return iexp * fexp;
 }
 
-ASTCENC_SIMD_INLINE vfloat round(vfloat v)
+/**
+ * @brief Compute an approximate log2(x) for each lane in the vector.
+ *
+ * Based on 5th degree minimax polynomials, ported from this blog
+ * https://jrfonseca.blogspot.com/2008/09/fast-sse2-pow-tables-or-polynomials.html
+ */
+static ASTCENC_SIMD_INLINE vfloat4 log2(vfloat4 x)
 {
-#if ASTCENC_SSE >= 41
-	return vfloat(_mm_round_ps(v.m, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
-#else
-	__m128 V = v.m;
-	__m128 negZero = _mm_castsi128_ps(_mm_set1_epi32(0x80000000));
-	__m128 noFraction = _mm_set_ps1(8388608.0f);
-	__m128 absMask = _mm_castsi128_ps(_mm_set1_epi32(0x7FFFFFFF));
-	__m128 sign = _mm_and_ps(V, negZero);
-	__m128 sMagic = _mm_or_ps(noFraction, sign);
-	__m128 R1 = _mm_add_ps(V, sMagic);
-	R1 = _mm_sub_ps(R1, sMagic);
-	__m128 R2 = _mm_and_ps(V, absMask);
-	__m128 mask = _mm_cmple_ps(R2, noFraction);
-	R2 = _mm_andnot_ps(mask, V);
-	R1 = _mm_and_ps(R1, mask);
-	return vfloat(_mm_xor_ps(R1, R2));
-#endif
-}
+	vint4 exp(0x7F800000);
+	vint4 mant(0x007FFFFF);
+	vint4 one(0x3F800000);
 
-ASTCENC_SIMD_INLINE vint floatToInt(vfloat v) { return vint(_mm_cvttps_epi32(v.m)); }
-
-ASTCENC_SIMD_INLINE vfloat intAsFloat(vint v) { return vfloat(_mm_castsi128_ps(v.m)); }
-ASTCENC_SIMD_INLINE vint floatAsInt(vfloat v) { return vint(_mm_castps_si128(v.m)); }
-
-ASTCENC_SIMD_INLINE vint operator~ (vint a) { return vint(_mm_xor_si128(a.m, _mm_set1_epi32(-1))); }
-ASTCENC_SIMD_INLINE vmask operator~ (vmask a) { return vmask(_mm_xor_si128(_mm_castps_si128(a.m), _mm_set1_epi32(-1))); }
-
-ASTCENC_SIMD_INLINE vint operator+ (vint a, vint b) { a.m = _mm_add_epi32(a.m, b.m); return a; }
-ASTCENC_SIMD_INLINE vint operator- (vint a, vint b) { a.m = _mm_sub_epi32(a.m, b.m); return a; }
-ASTCENC_SIMD_INLINE vint operator| (vint a, vint b) { return vint(_mm_or_si128(a.m, b.m)); }
-ASTCENC_SIMD_INLINE vint operator& (vint a, vint b) { return vint(_mm_and_si128(a.m, b.m)); }
-ASTCENC_SIMD_INLINE vint operator^ (vint a, vint b) { return vint(_mm_xor_si128(a.m, b.m)); }
-ASTCENC_SIMD_INLINE vmask operator< (vint a, vint b) { return vmask(_mm_cmplt_epi32(a.m, b.m)); }
-ASTCENC_SIMD_INLINE vmask operator> (vint a, vint b) { return vmask(_mm_cmpgt_epi32(a.m, b.m)); }
-ASTCENC_SIMD_INLINE vmask operator==(vint a, vint b) { return vmask(_mm_cmpeq_epi32(a.m, b.m)); }
-ASTCENC_SIMD_INLINE vmask operator!=(vint a, vint b) { return ~vmask(_mm_cmpeq_epi32(a.m, b.m)); }
-ASTCENC_SIMD_INLINE vint min(vint a, vint b) {
-#if ASTCENC_SSE >= 41
-	a.m = _mm_min_epi32(a.m, b.m);
-#else
-	vmask d = a < b;
-	a.m = _mm_or_si128(_mm_and_si128(_mm_castps_si128(d.m), a.m), _mm_andnot_si128(_mm_castps_si128(d.m), b.m));
-#endif
-	return a;
-}
+	vint4 i = float_as_int(x);
 
-ASTCENC_SIMD_INLINE vint max(vint a, vint b) {
-#if ASTCENC_SSE >= 41
-	a.m = _mm_max_epi32(a.m, b.m);
-#else
-	vmask d = a > b;
-	a.m = _mm_or_si128(_mm_and_si128(_mm_castps_si128(d.m), a.m), _mm_andnot_si128(_mm_castps_si128(d.m), b.m));
-#endif
-	return a;
-}
+	vfloat4 e = int_to_float(lsr<23>(i & exp) - 127);
 
-#define ASTCENC_SHUFFLE4F(V, X,Y,Z,W) vfloat(_mm_shuffle_ps((V).m, (V).m, _MM_SHUFFLE(W,Z,Y,X)))
-#define ASTCENC_SHUFFLE4I(V, X,Y,Z,W) vint(_mm_shuffle_epi32((V).m, _MM_SHUFFLE(W,Z,Y,X)))
+	vfloat4 m = int_as_float((i & mant) | one);
 
-ASTCENC_SIMD_INLINE vfloat hmin(vfloat v)
-{
-	v = min(v, ASTCENC_SHUFFLE4F(v, 2, 3, 0, 0));
-	v = min(v, ASTCENC_SHUFFLE4F(v, 1, 0, 0, 0));
-	return ASTCENC_SHUFFLE4F(v, 0,0,0,0);
-}
-ASTCENC_SIMD_INLINE vint hmin(vint v)
-{
-	v = min(v, ASTCENC_SHUFFLE4I(v, 2, 3, 0, 0));
-	v = min(v, ASTCENC_SHUFFLE4I(v, 1, 0, 0, 0));
-	return ASTCENC_SHUFFLE4I(v, 0,0,0,0);
-}
+	// Polynomial fit of log2(x)/(x - 1), for x in range [1, 2)
+	vfloat4 p = POLY4(m,
+	                  2.8882704548164776201f,
+	                 -2.52074962577807006663f,
+	                  1.48116647521213171641f,
+	                 -0.465725644288844778798f,
+	                  0.0596515482674574969533f);
 
-ASTCENC_SIMD_INLINE void store(vfloat v, float* ptr) { _mm_store_ps(ptr, v.m); }
-ASTCENC_SIMD_INLINE void store(vint v, int* ptr) { _mm_store_si128((__m128i*)ptr, v.m); }
+	// Increases the polynomial degree, but ensures that log2(1) == 0
+	p = p * (m - 1.0f);
 
-ASTCENC_SIMD_INLINE void store_nbytes(vint v, uint8_t* ptr)
-{
-	// This is the most logical implementation, but the convenience intrinsic
-	// is missing on older compilers (supported in g++ 9 and clang++ 9).
-	// _mm_storeu_si32(ptr, v.m);
-	_mm_store_ss((float*)ptr, _mm_castsi128_ps(v.m));
+	return p + e;
 }
 
-ASTCENC_SIMD_INLINE vfloat gatherf(const float* base, vint indices)
+/**
+ * @brief Compute an approximate pow(x, y) for each lane in the vector.
+ *
+ * Power function based on the exp2(log2(x) * y) transform.
+ */
+static ASTCENC_SIMD_INLINE vfloat4 pow(vfloat4 x, vfloat4 y)
 {
-	int idx[4];
-	store(indices, idx);
-	return vfloat(_mm_set_ps(base[idx[3]], base[idx[2]], base[idx[1]], base[idx[0]]));
-}
+	vmask4 zero_mask = y == vfloat4(0.0f);
+	vfloat4 estimate = exp2(log2(x) * y);
 
-ASTCENC_SIMD_INLINE vint gatheri(const int* base, vint indices)
-{
-	int idx[4];
-	store(indices, idx);
-	return vint(_mm_set_epi32(base[idx[3]], base[idx[2]], base[idx[1]], base[idx[0]]));
+	// Guarantee that y == 0 returns exactly 1.0f
+	return select(estimate, vfloat4(1.0f), zero_mask);
 }
 
-// packs low 8 bits of each lane into low 32 bits of result
-ASTCENC_SIMD_INLINE vint pack_low_bytes(vint v)
+/**
+ * @brief Count the leading zeros for each lane in @c a.
+ *
+ * Valid for all data values of @c a; will return a per-lane value [0, 32].
+ */
+ASTCENC_SIMD_INLINE vint4 clz(vint4 a)
 {
-	#if ASTCENC_SSE >= 41
-	__m128i shuf = _mm_set_epi8(0,0,0,0, 0,0,0,0, 0,0,0,0, 12,8,4,0);
-	return vint(_mm_shuffle_epi8(v.m, shuf));
-	#else
-	__m128i va = _mm_unpacklo_epi8(v.m, _mm_shuffle_epi32(v.m, _MM_SHUFFLE(1,1,1,1)));
-	__m128i vb = _mm_unpackhi_epi8(v.m, _mm_shuffle_epi32(v.m, _MM_SHUFFLE(3,3,3,3)));
-	return vint(_mm_unpacklo_epi16(va, vb));
-	#endif
-}
+	// This function is a horrible abuse of floating point exponents to convert
+	// the original integer value into a 2^N encoding we can recover easily.
 
-// "select", i.e. highbit(cond) ? b : a
-// on SSE4.1 and up this can be done easily via "blend" instruction;
-// on older SSEs we have to do some hoops, see
-// https://fgiesen.wordpress.com/2016/04/03/sse-mind-the-gap/
-ASTCENC_SIMD_INLINE vfloat select(vfloat a, vfloat b, vmask cond)
-{
-#if ASTCENC_SSE >= 41
-	a.m = _mm_blendv_ps(a.m, b.m, cond.m);
-#else
-	__m128 d = _mm_castsi128_ps(_mm_srai_epi32(_mm_castps_si128(cond.m), 31));
-	a.m = _mm_or_ps(_mm_and_ps(d, b.m), _mm_andnot_ps(d, a.m));
-#endif
-	return a;
-}
+	// Convert to float without risk of rounding up by keeping only top 8 bits.
+	// This trick is is guranteed to keep top 8 bits and clear the 9th.
+	a = (~lsr<8>(a)) & a;
+	a = float_as_int(int_to_float(a));
 
-ASTCENC_SIMD_INLINE vint select(vint a, vint b, vmask cond)
-{
-#if ASTCENC_SSE >= 41
-	return vint(_mm_blendv_epi8(a.m, b.m, _mm_castps_si128(cond.m)));
-#else
-	__m128i d = _mm_srai_epi32(_mm_castps_si128(cond.m), 31);
-	return vint(_mm_or_si128(_mm_and_si128(d, b.m), _mm_andnot_si128(d, a.m)));
-#endif
-}
+	// Extract and unbias exponent
+	a = vint4(127 + 31) - lsr<23>(a);
 
-ASTCENC_SIMD_INLINE void print(vfloat a)
-{
-	alignas(ASTCENC_VECALIGN) float v[4];
-	store(a, v);
-    KLOGD("Astcenc", "v4_f32:\n  %0.4f %0.4f %0.4f %0.4f\n",
-	       (double)v[0], (double)v[1], (double)v[2], (double)v[3]);
+	// Clamp result to a valid 32-bit range
+	return clamp(0, 32, a);
 }
 
-ASTCENC_SIMD_INLINE void print(vint a)
+/**
+ * @brief Return lanewise 2^a for each lane in @c a.
+ *
+ * Use of signed int mean that this is only valid for values in range [0, 31].
+ */
+ASTCENC_SIMD_INLINE vint4 two_to_the_n(vint4 a)
 {
-	alignas(ASTCENC_VECALIGN) int v[4];
-	store(a, v);
-	KLOGD("Astcenc", "v4_i32:\n  %8u %8u %8u %8u\n",
-	       v[0], v[1], v[2], v[3]);
-}
+	// 2^30 is the largest signed number than can be represented
+	assert(all(a < vint4(31)));
 
+	// This function is a horrible abuse of floating point to use the exponent
+	// and float conversion to generate a 2^N multiple.
 
-#endif // #ifdef ASTCENC_SIMD_ISA_SSE
+	// Bias the exponent
+	vint4 exp = a + 127;
+	exp = lsl<23>(exp);
 
+	// Reinterpret the bits as a float, and then convert to an int
+	vfloat4 f = int_as_float(exp);
+	return float_to_int(f);
+}
 
-// ----------------------------------------------------------------------------
-// Pure scalar, 1-wide implementation
+/**
+ * @brief Convert unorm16 [0, 65535] to float16 in range [0, 1].
+ */
+ASTCENC_SIMD_INLINE vint4 unorm16_to_sf16(vint4 p)
+{
+	vint4 fp16_one = vint4(0x3C00);
+	vint4 fp16_small = lsl<8>(p);
 
-#ifdef ASTCENC_SIMD_ISA_SCALAR
+	vmask4 is_one = p == vint4(0xFFFF);
+	vmask4 is_small = p < vint4(4);
 
-#include <algorithm>
-#include <math.h>
-#include <string.h>
+	vint4 lz = clz(p) - 16;
 
-#define ASTCENC_SIMD_WIDTH 1
+	// TODO: Could use AVX2 _mm_sllv_epi32() instead of p * 2^<shift>
+	p = p * two_to_the_n(lz + 1);
+	p = p & vint4(0xFFFF);
 
-struct vfloat
-{
-	ASTCENC_SIMD_INLINE vfloat() {}
-	ASTCENC_SIMD_INLINE explicit vfloat(const float *p) { m = *p; }
-	ASTCENC_SIMD_INLINE explicit vfloat(float v) { m = v; }
-	ASTCENC_SIMD_INLINE float lane(int i) const { return m; }
-	static ASTCENC_SIMD_INLINE vfloat zero() { return vfloat(0.0f); }
-	static ASTCENC_SIMD_INLINE vfloat lane_id() { return vfloat(0.0f); }
-	float m;
-};
-
-struct vint
-{
-	ASTCENC_SIMD_INLINE vint() {}
-	ASTCENC_SIMD_INLINE explicit vint(const int *p) { m = *p; }
-	ASTCENC_SIMD_INLINE explicit vint(int v) { m = v; }
-	ASTCENC_SIMD_INLINE int lane(int i) const { return m; }
-	static ASTCENC_SIMD_INLINE vint lane_id() { return vint(0); }
-	int m;
-};
-
-struct vmask
-{
-	ASTCENC_SIMD_INLINE explicit vmask(bool v) { m = v; }
-	bool m;
-};
-
-
-ASTCENC_SIMD_INLINE vfloat load1a(const float* p) { return vfloat(*p); }
-ASTCENC_SIMD_INLINE vfloat loada(const float* p) { return vfloat(*p); }
-
-ASTCENC_SIMD_INLINE vfloat operator+ (vfloat a, vfloat b) { a.m = a.m + b.m; return a; }
-ASTCENC_SIMD_INLINE vfloat operator- (vfloat a, vfloat b) { a.m = a.m - b.m; return a; }
-ASTCENC_SIMD_INLINE vfloat operator* (vfloat a, vfloat b) { a.m = a.m * b.m; return a; }
-ASTCENC_SIMD_INLINE vfloat operator/ (vfloat a, vfloat b) { a.m = a.m / b.m; return a; }
-ASTCENC_SIMD_INLINE vmask operator==(vfloat a, vfloat b) { return vmask(a.m = a.m == b.m); }
-ASTCENC_SIMD_INLINE vmask operator!=(vfloat a, vfloat b) { return vmask(a.m = a.m != b.m); }
-ASTCENC_SIMD_INLINE vmask operator< (vfloat a, vfloat b) { return vmask(a.m = a.m < b.m); }
-ASTCENC_SIMD_INLINE vmask operator> (vfloat a, vfloat b) { return vmask(a.m = a.m > b.m); }
-ASTCENC_SIMD_INLINE vmask operator<=(vfloat a, vfloat b) { return vmask(a.m = a.m <= b.m); }
-ASTCENC_SIMD_INLINE vmask operator>=(vfloat a, vfloat b) { return vmask(a.m = a.m >= b.m); }
-ASTCENC_SIMD_INLINE vmask operator| (vmask a, vmask b) { return vmask(a.m || b.m); }
-ASTCENC_SIMD_INLINE vmask operator& (vmask a, vmask b) { return vmask(a.m && b.m); }
-ASTCENC_SIMD_INLINE vmask operator^ (vmask a, vmask b) { return vmask(a.m ^ b.m); }
-ASTCENC_SIMD_INLINE unsigned mask(vmask v) { return v.m; }
-ASTCENC_SIMD_INLINE bool any(vmask v) { return mask(v) != 0; }
-ASTCENC_SIMD_INLINE bool all(vmask v) { return mask(v) != 0; }
-
-ASTCENC_SIMD_INLINE vfloat min(vfloat a, vfloat b) { a.m = a.m < b.m ? a.m : b.m; return a; }
-ASTCENC_SIMD_INLINE vfloat max(vfloat a, vfloat b) { a.m = a.m > b.m ? a.m : b.m; return a; }
-ASTCENC_SIMD_INLINE vfloat saturate(vfloat a) { return vfloat(std::min(std::max(a.m,0.0f), 1.0f)); }
-
-ASTCENC_SIMD_INLINE vfloat abs(vfloat x) { return vfloat(std::abs(x.m)); }
-
-ASTCENC_SIMD_INLINE vfloat round(vfloat v)
-{
-	return vfloat(std::floor(v.m + 0.5f));
-}
+	p = lsr<6>(p);
 
-ASTCENC_SIMD_INLINE vint floatToInt(vfloat v) { return vint(v.m); }
+	p = p | lsl<10>(vint4(14) - lz);
 
-ASTCENC_SIMD_INLINE vfloat intAsFloat(vint v) { vfloat r; memcpy(&r.m, &v.m, 4); return r; }
-ASTCENC_SIMD_INLINE vint floatAsInt(vfloat v) { vint r; memcpy(&r.m, &v.m, 4); return r; }
+	vint4 r = select(p, fp16_one, is_one);
+	r = select(r, fp16_small, is_small);
+	return r;
+}
 
-ASTCENC_SIMD_INLINE vint operator~ (vint a) { a.m = ~a.m; return a; }
-ASTCENC_SIMD_INLINE vint operator+ (vint a, vint b) { a.m = a.m + b.m; return a; }
-ASTCENC_SIMD_INLINE vint operator- (vint a, vint b) { a.m = a.m - b.m; return a; }
-ASTCENC_SIMD_INLINE vint operator| (vint a, vint b) { return vint(a.m | b.m); }
-ASTCENC_SIMD_INLINE vint operator& (vint a, vint b) { return vint(a.m & b.m); }
-ASTCENC_SIMD_INLINE vint operator^ (vint a, vint b) { return vint(a.m ^ b.m); }
-ASTCENC_SIMD_INLINE vmask operator< (vint a, vint b) { return vmask(a.m = a.m < b.m); }
-ASTCENC_SIMD_INLINE vmask operator> (vint a, vint b) { return vmask(a.m = a.m > b.m); }
-ASTCENC_SIMD_INLINE vmask operator==(vint a, vint b) { return vmask(a.m = a.m == b.m); }
-ASTCENC_SIMD_INLINE vmask operator!=(vint a, vint b) { return vmask(a.m = a.m != b.m); }
-ASTCENC_SIMD_INLINE vint min(vint a, vint b) { a.m = a.m < b.m ? a.m : b.m; return a; }
-ASTCENC_SIMD_INLINE vint max(vint a, vint b) { a.m = a.m > b.m ? a.m : b.m; return a; }
+/**
+ * @brief Convert 16-bit LNS to float16.
+ */
+ASTCENC_SIMD_INLINE vint4 lns_to_sf16(vint4 p)
+{
+	vint4 mc = p & 0x7FF;
+	vint4 ec = lsr<11>(p);
 
-ASTCENC_SIMD_INLINE vfloat hmin(vfloat v) { return v; }
-ASTCENC_SIMD_INLINE vint hmin(vint v) { return v; }
+	vint4 mc_512 = mc * 3;
+	vmask4 mask_512 = mc < vint4(512);
 
-ASTCENC_SIMD_INLINE void store(vfloat v, float* ptr) { *ptr = v.m; }
-ASTCENC_SIMD_INLINE void store(vint v, int* ptr) { *ptr = v.m; }
+	vint4 mc_1536 = mc * 4 - 512;
+	vmask4 mask_1536 = mc < vint4(1536);
 
-ASTCENC_SIMD_INLINE void store_nbytes(vint v, uint8_t* ptr) { *ptr = (uint8_t)v.m; }
+	vint4 mc_else = mc * 5 - 2048;
 
-ASTCENC_SIMD_INLINE vfloat gatherf(const float* base, vint indices)
-{
-	return vfloat(base[indices.m]);
-}
-ASTCENC_SIMD_INLINE vint gatheri(const int* base, vint indices)
-{
-	return vint(base[indices.m]);
+	vint4 mt = mc_else;
+	mt = select(mt, mc_1536, mask_1536);
+	mt = select(mt, mc_512, mask_512);
+
+	vint4 res = lsl<10>(ec) | lsr<3>(mt);
+	return min(res, vint4(0x7BFF));
 }
 
-// packs low 8 bits of each lane into low 8 bits of result (a no-op in scalar code path)
-ASTCENC_SIMD_INLINE vint pack_low_bytes(vint v)
+/**
+ * @brief Extract mantissa and exponent of a float value.
+ *
+ * @param      a      The input value.
+ * @param[out] exp    The output exponent.
+ *
+ * @return The mantissa.
+ */
+static inline vfloat4 frexp(vfloat4 a, vint4& exp)
 {
-	return v;
-}
+	// Interpret the bits as an integer
+	vint4 ai = float_as_int(a);
 
+	// Extract and unbias the exponent
+	exp = (lsr<23>(ai) & 0xFF) - 126;
 
-// "select", i.e. highbit(cond) ? b : a
-ASTCENC_SIMD_INLINE vfloat select(vfloat a, vfloat b, vmask cond)
-{
-	return cond.m ? b : a;
+	// Extract and unbias the mantissa
+	vint4 manti = (ai & 0x807FFFFF) | 0x3F000000;
+	return int_as_float(manti);
 }
-ASTCENC_SIMD_INLINE vint select(vint a, vint b, vmask cond)
+
+/**
+ * @brief Convert float to 16-bit LNS.
+ */
+static inline vfloat4 float_to_lns(vfloat4 a)
 {
-	return cond.m ? b : a;
-}
+	vint4 exp;
+	vfloat4 mant = frexp(a, exp);
 
+	// Do these early before we start messing about ...
+	vmask4 mask_underflow_nan = ~(a > vfloat4(1.0f / 67108864.0f));
+	vmask4 mask_infinity = a >= vfloat4(65536.0f);
 
-#endif // #ifdef ASTCENC_SIMD_ISA_SCALAR
+	// If input is smaller than 2^-14, multiply by 2^25 and don't bias.
+	vmask4 exp_lt_m13 = exp < vint4(-13);
 
+	vfloat4 a1a = a * 33554432.0f;
+	vint4 expa = vint4::zero();
 
-// ----------------------------------------------------------------------------
+	vfloat4 a1b = (mant - 0.5f) * 4096;
+	vint4 expb = exp + 14;
 
-// Return x, with each lane having its sign flipped where the corresponding y lane is negative, i.e. msb(y) ? -x : x
-ASTCENC_SIMD_INLINE vfloat changesign(vfloat x, vfloat y)
-{
-	vint ix = floatAsInt(x);
-	vint iy = floatAsInt(y);
-	vint signMask((int)0x80000000);
-	vint r = ix ^ (iy & signMask);
-	return intAsFloat(r);
+	a = select(a1b, a1a, exp_lt_m13);
+	exp = select(expb, expa, exp_lt_m13);
+
+	vmask4 a_lt_384 = a < vfloat4(384.0f);
+	vmask4 a_lt_1408 = a <= vfloat4(1408.0f);
+
+	vfloat4 a2a = a * (4.0f / 3.0f);
+	vfloat4 a2b = a + 128.0f;
+	vfloat4 a2c = (a + 512.0f) * (4.0f / 5.0f);
+
+	a = a2c;
+	a = select(a, a2b, a_lt_1408);
+	a = select(a, a2a, a_lt_384);
+
+	a = a + (int_to_float(exp) * 2048.0f) + 1.0f;
+
+	a = select(a, vfloat4(65535.0f), mask_infinity);
+	a = select(a, vfloat4::zero(), mask_underflow_nan);
+
+	return a;
 }
 
-// Fast atan implementation, with max error of 0.004883
-ASTCENC_SIMD_INLINE vfloat atan(vfloat x)
+namespace astc
 {
-	vmask c = abs(x) > vfloat(1.0f);
-	vfloat z = changesign(vfloat(astc::PI_OVER_TWO), x);
-	vfloat y = select(x, vfloat(1.0f) / x, c);
-	y = y / (y * y * vfloat(0.28f) + vfloat(1.0f));
-	return select(y, z - y, c);
-}
 
-ASTCENC_SIMD_INLINE vfloat atan2(vfloat y, vfloat x)
+static ASTCENC_SIMD_INLINE float pow(float x, float y)
 {
-	vfloat z = atan(abs(y / x));
-	vmask xmask = vmask(floatAsInt(x).m);
-	return changesign(select(z, vfloat(astc::PI) - z, xmask), y);
+	return pow(vfloat4(x), vfloat4(y)).lane<0>();
+}
+
 }
 
 #endif // #ifndef ASTC_VECMATHLIB_H_INCLUDED
diff --git a/libkram/astc-encoder/astcenc_vecmathlib_avx2_8.h b/libkram/astc-encoder/astcenc_vecmathlib_avx2_8.h
new file mode 100755
index 00000000..cba1db45
--- /dev/null
+++ b/libkram/astc-encoder/astcenc_vecmathlib_avx2_8.h
@@ -0,0 +1,943 @@
+// SPDX-License-Identifier: Apache-2.0
+// ----------------------------------------------------------------------------
+// Copyright 2019-2021 Arm Limited
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy
+// of the License at:
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations
+// under the License.
+// ----------------------------------------------------------------------------
+
+/**
+ * @brief 8x32-bit vectors, implemented using AVX2.
+ *
+ * This module implements 8-wide 32-bit float, int, and mask vectors for x86
+ * AVX2.
+ *
+ * There is a baseline level of functionality provided by all vector widths and
+ * implementations. This is implemented using identical function signatures,
+ * modulo data type, so we can use them as substitutable implementations in VLA
+ * code.
+ */
+
+#ifndef ASTC_VECMATHLIB_AVX2_8_H_INCLUDED
+#define ASTC_VECMATHLIB_AVX2_8_H_INCLUDED
+
+#ifndef ASTCENC_SIMD_INLINE
+	#error "Include astcenc_vecmathlib.h, do not include directly"
+#endif
+
+#include <cstdio>
+
+// ============================================================================
+// vfloat8 data type
+// ============================================================================
+
+/**
+ * @brief Data type for 8-wide floats.
+ */
+struct vfloat8
+{
+	/**
+	 * @brief Construct from zero-initialized value.
+	 */
+	ASTCENC_SIMD_INLINE vfloat8() {}
+
+	/**
+	 * @brief Construct from 4 values loaded from an unaligned address.
+	 *
+	 * Consider using loada() which is better with vectors if data is aligned
+	 * to vector length.
+	 */
+	ASTCENC_SIMD_INLINE explicit vfloat8(const float *p)
+	{
+		m = _mm256_loadu_ps(p);
+	}
+
+	/**
+	 * @brief Construct from 1 scalar value replicated across all lanes.
+	 *
+	 * Consider using zero() for constexpr zeros.
+	 */
+	ASTCENC_SIMD_INLINE explicit vfloat8(float a)
+	{
+		m = _mm256_set1_ps(a);
+	}
+
+	/**
+	 * @brief Construct from 8 scalar values.
+	 *
+	 * The value of @c a is stored to lane 0 (LSB) in the SIMD register.
+	 */
+	ASTCENC_SIMD_INLINE explicit vfloat8(
+		float a, float b, float c, float d,
+		float e, float f, float g, float h)
+	{
+		m = _mm256_set_ps(h, g, f, e, d, c, b, a);
+	}
+
+	/**
+	 * @brief Construct from an existing SIMD register.
+	 */
+	ASTCENC_SIMD_INLINE explicit vfloat8(__m256 a) {
+		m = a;
+	}
+
+	/**
+	 * @brief Get the scalar value of a single lane.
+	 */
+	template <int l> ASTCENC_SIMD_INLINE float lane() const
+	{
+	#if !defined(__clang__) && defined(_MSC_VER)
+		return m.m256_f32[l];
+	#else
+		union { __m256 m; float f[8]; } cvt;
+		cvt.m = m;
+		return cvt.f[l];
+	#endif
+	}
+
+	/**
+	 * @brief Factory that returns a vector of zeros.
+	 */
+	static ASTCENC_SIMD_INLINE vfloat8 zero()
+	{
+		return vfloat8(_mm256_setzero_ps());
+	}
+
+	/**
+	 * @brief Factory that returns a replicated scalar loaded from memory.
+	 */
+	static ASTCENC_SIMD_INLINE vfloat8 load1(const float* p)
+	{
+		return vfloat8(_mm256_broadcast_ss(p));
+	}
+
+	/**
+	 * @brief Factory that returns a vector loaded from 32B aligned memory.
+	 */
+	static ASTCENC_SIMD_INLINE vfloat8 loada(const float* p)
+	{
+		return vfloat8(_mm256_load_ps(p));
+	}
+
+	/**
+	 * @brief Factory that returns a vector containing the lane IDs.
+	 */
+	static ASTCENC_SIMD_INLINE vfloat8 lane_id()
+	{
+		return vfloat8(_mm256_set_ps(7, 6, 5, 4, 3, 2, 1, 0));
+	}
+
+	/**
+	 * @brief The vector ...
+	 */
+	__m256 m;
+};
+
+// ============================================================================
+// vint8 data type
+// ============================================================================
+
+/**
+ * @brief Data type for 8-wide ints.
+ */
+struct vint8
+{
+	/**
+	 * @brief Construct from zero-initialized value.
+	 */
+	ASTCENC_SIMD_INLINE vint8() {}
+
+	/**
+	 * @brief Construct from 8 values loaded from an unaligned address.
+	 *
+	 * Consider using loada() which is better with vectors if data is aligned
+	 * to vector length.
+	 */
+	ASTCENC_SIMD_INLINE explicit vint8(const int *p)
+	{
+		m = _mm256_loadu_si256((const __m256i*)p);
+	}
+
+	/**
+	 * @brief Construct from 8 uint8_t loaded from an unaligned address.
+	 */
+	ASTCENC_SIMD_INLINE explicit vint8(const uint8_t *p)
+	{
+		// _mm_loadu_si64 would be nicer syntax, but missing on older GCC
+		m = _mm256_cvtepu8_epi32(_mm_cvtsi64_si128(*(const long long*)p));
+	}
+
+	/**
+	 * @brief Construct from 1 scalar value replicated across all lanes.
+	 *
+	 * Consider using vfloat4::zero() for constexpr zeros.
+	 */
+	ASTCENC_SIMD_INLINE explicit vint8(int a)
+	{
+		m = _mm256_set1_epi32(a);
+	}
+
+	/**
+	 * @brief Construct from 8 scalar values.
+	 *
+	 * The value of @c a is stored to lane 0 (LSB) in the SIMD register.
+	 */
+	ASTCENC_SIMD_INLINE explicit vint8(
+		int a, int b, int c, int d,
+		int e, int f, int g, int h)
+	{
+		m = _mm256_set_epi32(h, g, f, e, d, c, b, a);
+	}
+
+	/**
+	 * @brief Construct from an existing SIMD register.
+	 */
+	ASTCENC_SIMD_INLINE explicit vint8(__m256i a)
+	{
+		m = a;
+	}
+
+	/**
+	 * @brief Get the scalar from a single lane.
+	 */
+	template <int l> ASTCENC_SIMD_INLINE int lane() const
+	{
+	#if !defined(__clang__) && defined(_MSC_VER)
+		return m.m256i_i32[l];
+	#else
+		union { __m256i m; int f[8]; } cvt;
+		cvt.m = m;
+		return cvt.f[l];
+	#endif
+	}
+
+	/**
+	 * @brief Factory that returns a vector of zeros.
+	 */
+	static ASTCENC_SIMD_INLINE vint8 zero()
+	{
+		return vint8(_mm256_setzero_si256());
+	}
+
+	/**
+	 * @brief Factory that returns a replicated scalar loaded from memory.
+	 */
+	static ASTCENC_SIMD_INLINE vint8 load1(const int* p)
+	{
+		__m128i a = _mm_set1_epi32(*p);
+		return vint8(_mm256_broadcastd_epi32(a));
+	}
+
+	/**
+	 * @brief Factory that returns a vector loaded from 32B aligned memory.
+	 */
+	static ASTCENC_SIMD_INLINE vint8 loada(const int* p)
+	{
+		return vint8(_mm256_load_si256((const __m256i*)p));
+	}
+
+	/**
+	 * @brief Factory that returns a vector containing the lane IDs.
+	 */
+	static ASTCENC_SIMD_INLINE vint8 lane_id()
+	{
+		return vint8(_mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0));
+	}
+
+	/**
+	 * @brief The vector ...
+	 */
+	__m256i m;
+};
+
+// ============================================================================
+// vmask8 data type
+// ============================================================================
+
+/**
+ * @brief Data type for 8-wide control plane masks.
+ */
+struct vmask8
+{
+	/**
+	 * @brief Construct from an existing SIMD register.
+	 */
+	ASTCENC_SIMD_INLINE explicit vmask8(__m256 a)
+	{
+		m = a;
+	}
+
+	/**
+	 * @brief Construct from an existing SIMD register.
+	 */
+	ASTCENC_SIMD_INLINE explicit vmask8(__m256i a)
+	{
+		m = _mm256_castsi256_ps(a);
+	}
+
+	/**
+	 * @brief The vector ...
+	 */
+	__m256 m;
+};
+
+// ============================================================================
+// vmask8 operators and functions
+// ============================================================================
+
+/**
+ * @brief Overload: mask union (or).
+ */
+ASTCENC_SIMD_INLINE vmask8 operator|(vmask8 a, vmask8 b)
+{
+	return vmask8(_mm256_or_ps(a.m, b.m));
+}
+
+/**
+ * @brief Overload: mask intersect (and).
+ */
+ASTCENC_SIMD_INLINE vmask8 operator&(vmask8 a, vmask8 b)
+{
+	return vmask8(_mm256_and_ps(a.m, b.m));
+}
+
+/**
+ * @brief Overload: mask difference (xor).
+ */
+ASTCENC_SIMD_INLINE vmask8 operator^(vmask8 a, vmask8 b)
+{
+	return vmask8(_mm256_xor_ps(a.m, b.m));
+}
+
+/**
+ * @brief Overload: mask invert (not).
+ */
+ASTCENC_SIMD_INLINE vmask8 operator~(vmask8 a)
+{
+	return vmask8(_mm256_xor_si256(_mm256_castps_si256(a.m), _mm256_set1_epi32(-1)));
+}
+
+/**
+ * @brief Return a 8-bit mask code indicating mask status.
+ *
+ * bit0 = lane 0
+ */
+ASTCENC_SIMD_INLINE unsigned mask(vmask8 a)
+{
+	return _mm256_movemask_ps(a.m);
+}
+
+/**
+ * @brief True if any lanes are enabled, false otherwise.
+ */
+ASTCENC_SIMD_INLINE bool any(vmask8 a)
+{
+	return mask(a) != 0;
+}
+
+/**
+ * @brief True if any lanes are enabled, false otherwise.
+ */
+ASTCENC_SIMD_INLINE bool all(vmask8 a)
+{
+	return mask(a) == 0xFF;
+}
+
+// ============================================================================
+// vint8 operators and functions
+// ============================================================================
+/**
+ * @brief Overload: vector by vector addition.
+ */
+ASTCENC_SIMD_INLINE vint8 operator+(vint8 a, vint8 b)
+{
+	return vint8(_mm256_add_epi32(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector by vector subtraction.
+ */
+ASTCENC_SIMD_INLINE vint8 operator-(vint8 a, vint8 b)
+{
+	return vint8(_mm256_sub_epi32(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector bit invert.
+ */
+ASTCENC_SIMD_INLINE vint8 operator~(vint8 a)
+{
+	return vint8(_mm256_xor_si256(a.m, _mm256_set1_epi32(-1)));
+}
+
+/**
+ * @brief Overload: vector by vector bitwise or.
+ */
+ASTCENC_SIMD_INLINE vint8 operator|(vint8 a, vint8 b)
+{
+	return vint8(_mm256_or_si256(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector by vector bitwise and.
+ */
+ASTCENC_SIMD_INLINE vint8 operator&(vint8 a, vint8 b)
+{
+	return vint8(_mm256_and_si256(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector by vector bitwise xor.
+ */
+ASTCENC_SIMD_INLINE vint8 operator^(vint8 a, vint8 b)
+{
+	return vint8(_mm256_xor_si256(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector by vector equality.
+ */
+ASTCENC_SIMD_INLINE vmask8 operator==(vint8 a, vint8 b)
+{
+	return vmask8(_mm256_cmpeq_epi32(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector by vector inequality.
+ */
+ASTCENC_SIMD_INLINE vmask8 operator!=(vint8 a, vint8 b)
+{
+	return ~vmask8(_mm256_cmpeq_epi32(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector by vector less than.
+ */
+ASTCENC_SIMD_INLINE vmask8 operator<(vint8 a, vint8 b)
+{
+	return vmask8(_mm256_cmpgt_epi32(b.m, a.m));
+}
+
+/**
+ * @brief Overload: vector by vector greater than.
+ */
+ASTCENC_SIMD_INLINE vmask8 operator>(vint8 a, vint8 b)
+{
+	return vmask8(_mm256_cmpgt_epi32(a.m, b.m));
+}
+
+/**
+ * @brief Return the min vector of two vectors.
+ */
+ASTCENC_SIMD_INLINE vint8 min(vint8 a, vint8 b)
+{
+	return vint8(_mm256_min_epi32(a.m, b.m));
+}
+
+/**
+ * @brief Return the max vector of two vectors.
+ */
+ASTCENC_SIMD_INLINE vint8 max(vint8 a, vint8 b)
+{
+	return vint8(_mm256_max_epi32(a.m, b.m));
+}
+
+/**
+ * @brief Return the horizontal minimum of a vector.
+ */
+ASTCENC_SIMD_INLINE vint8 hmin(vint8 a)
+{
+	__m128i m = _mm_min_epi32(_mm256_extracti128_si256(a.m, 0), _mm256_extracti128_si256(a.m, 1));
+	m = _mm_min_epi32(m, _mm_shuffle_epi32(m, _MM_SHUFFLE(0,0,3,2)));
+	m = _mm_min_epi32(m, _mm_shuffle_epi32(m, _MM_SHUFFLE(0,0,0,1)));
+	m = _mm_shuffle_epi32(m, _MM_SHUFFLE(0,0,0,0));
+
+	// This is the most logical implementation, but the convenience intrinsic
+	// is missing on older compilers (supported in g++ 9 and clang++ 9).
+	//__m256i r = _mm256_set_m128i(m, m)
+	__m256i r = _mm256_insertf128_si256(_mm256_castsi128_si256(m), m, 1);
+	vint8 vmin(r);
+	return vmin;
+}
+
+/**
+ * @brief Return the horizontal minimum of a vector.
+ */
+ASTCENC_SIMD_INLINE vint8 hmax(vint8 a)
+{
+	__m128i m = _mm_max_epi32(_mm256_extracti128_si256(a.m, 0), _mm256_extracti128_si256(a.m, 1));
+	m = _mm_max_epi32(m, _mm_shuffle_epi32(m, _MM_SHUFFLE(0,0,3,2)));
+	m = _mm_max_epi32(m, _mm_shuffle_epi32(m, _MM_SHUFFLE(0,0,0,1)));
+	m = _mm_shuffle_epi32(m, _MM_SHUFFLE(0,0,0,0));
+
+	// This is the most logical implementation, but the convenience intrinsic
+	// is missing on older compilers (supported in g++ 9 and clang++ 9).
+	//__m256i r = _mm256_set_m128i(m, m)
+	__m256i r = _mm256_insertf128_si256(_mm256_castsi128_si256(m), m, 1);
+	vint8 vmax(r);
+	return vmax;
+}
+
+/**
+ * @brief Store a vector to a 16B aligned memory address.
+ */
+ASTCENC_SIMD_INLINE void storea(vint8 a, int* p)
+{
+	_mm256_store_si256((__m256i*)p, a.m);
+}
+
+/**
+ * @brief Store lowest N (vector width) bytes into an unaligned address.
+ */
+ASTCENC_SIMD_INLINE void store_nbytes(vint8 a, uint8_t* p)
+{
+	// This is the most logical implementation, but the convenience intrinsic
+	// is missing on older compilers (supported in g++ 9 and clang++ 9).
+	// _mm_storeu_si64(ptr, _mm256_extracti128_si256(v.m, 0))
+	_mm_storel_epi64((__m128i*)p, _mm256_extracti128_si256(a.m, 0));
+}
+
+/**
+ * @brief Gather N (vector width) indices from the array.
+ */
+ASTCENC_SIMD_INLINE vint8 gatheri(const int* base, vint8 indices)
+{
+	return vint8(_mm256_i32gather_epi32(base, indices.m, 4));
+}
+
+/**
+ * @brief Pack low 8 bits of N (vector width) lanes into bottom of vector.
+ */
+ASTCENC_SIMD_INLINE vint8 pack_low_bytes(vint8 v)
+{
+	__m256i shuf = _mm256_set_epi8(0, 0, 0, 0,  0,  0,  0,  0,
+	                               0, 0, 0, 0, 28, 24, 20, 16,
+	                               0, 0, 0, 0,  0,  0,  0,  0,
+	                               0, 0, 0, 0, 12,  8,  4,  0);
+	__m256i a = _mm256_shuffle_epi8(v.m, shuf);
+	__m128i a0 = _mm256_extracti128_si256(a, 0);
+	__m128i a1 = _mm256_extracti128_si256(a, 1);
+	__m128i b = _mm_unpacklo_epi32(a0, a1);
+
+	// This is the most logical implementation, but the convenience intrinsic
+	// is missing on older compilers (supported in g++ 9 and clang++ 9).
+	//__m256i r = _mm256_set_m128i(b, b)
+	__m256i r = _mm256_insertf128_si256(_mm256_castsi128_si256(b), b, 1);
+	return vint8(r);
+}
+
+/**
+ * @brief Return lanes from @c b if MSB of @c cond is set, else @c a.
+ */
+ASTCENC_SIMD_INLINE vint8 select(vint8 a, vint8 b, vmask8 cond)
+{
+	// Don't use _mm256_blendv_epi8 directly, as it doesn't give the select on
+	// float sign-bit in the mask behavior which is useful. Performance is the
+	// same, these casts are free.
+	__m256 av = _mm256_castsi256_ps(a.m);
+	__m256 bv = _mm256_castsi256_ps(b.m);
+	return vint8(_mm256_castps_si256(_mm256_blendv_ps(av, bv, cond.m)));
+}
+
+/**
+ * @brief Debug function to print a vector of ints.
+ */
+ASTCENC_SIMD_INLINE void print(vint8 a)
+{
+	alignas(ASTCENC_VECALIGN) int v[8];
+	storea(a, v);
+	printf("v8_i32:\n  %8d %8d %8d %8d %8d %8d %8d %8d\n",
+	       v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);
+}
+
+// ============================================================================
+// vfloat4 operators and functions
+// ============================================================================
+
+/**
+ * @brief Overload: vector by vector addition.
+ */
+ASTCENC_SIMD_INLINE vfloat8 operator+(vfloat8 a, vfloat8 b)
+{
+	return vfloat8(_mm256_add_ps(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector by vector subtraction.
+ */
+ASTCENC_SIMD_INLINE vfloat8 operator-(vfloat8 a, vfloat8 b)
+{
+	return vfloat8(_mm256_sub_ps(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector by vector multiplication.
+ */
+ASTCENC_SIMD_INLINE vfloat8 operator*(vfloat8 a, vfloat8 b)
+{
+	return vfloat8(_mm256_mul_ps(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector by scalar multiplication.
+ */
+ASTCENC_SIMD_INLINE vfloat8 operator*(vfloat8 a, float b)
+{
+	return vfloat8(_mm256_mul_ps(a.m, _mm256_set1_ps(b)));
+}
+
+/**
+ * @brief Overload: scalar by vector multiplication.
+ */
+ASTCENC_SIMD_INLINE vfloat8 operator*(float a, vfloat8 b)
+{
+	return vfloat8(_mm256_mul_ps(_mm256_set1_ps(a), b.m));
+}
+
+/**
+ * @brief Overload: vector by vector division.
+ */
+ASTCENC_SIMD_INLINE vfloat8 operator/(vfloat8 a, vfloat8 b)
+{
+	return vfloat8(_mm256_div_ps(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector by scalar division.
+ */
+ASTCENC_SIMD_INLINE vfloat8 operator/(vfloat8 a, float b)
+{
+	return vfloat8(_mm256_div_ps(a.m, _mm256_set1_ps(b)));
+}
+
+
+/**
+ * @brief Overload: scalar by vector division.
+ */
+ASTCENC_SIMD_INLINE vfloat8 operator/(float a, vfloat8 b)
+{
+	return vfloat8(_mm256_div_ps(_mm256_set1_ps(a), b.m));
+}
+
+
+/**
+ * @brief Overload: vector by vector equality.
+ */
+ASTCENC_SIMD_INLINE vmask8 operator==(vfloat8 a, vfloat8 b)
+{
+	return vmask8(_mm256_cmp_ps(a.m, b.m, _CMP_EQ_OQ));
+}
+
+/**
+ * @brief Overload: vector by vector inequality.
+ */
+ASTCENC_SIMD_INLINE vmask8 operator!=(vfloat8 a, vfloat8 b)
+{
+	return vmask8(_mm256_cmp_ps(a.m, b.m, _CMP_NEQ_OQ));
+}
+
+/**
+ * @brief Overload: vector by vector less than.
+ */
+ASTCENC_SIMD_INLINE vmask8 operator<(vfloat8 a, vfloat8 b)
+{
+	return vmask8(_mm256_cmp_ps(a.m, b.m, _CMP_LT_OQ));
+}
+
+/**
+ * @brief Overload: vector by vector greater than.
+ */
+ASTCENC_SIMD_INLINE vmask8 operator>(vfloat8 a, vfloat8 b)
+{
+	return vmask8(_mm256_cmp_ps(a.m, b.m, _CMP_GT_OQ));
+}
+
+/**
+ * @brief Overload: vector by vector les than or equal.
+ */
+ASTCENC_SIMD_INLINE vmask8 operator<=(vfloat8 a, vfloat8 b)
+{
+	return vmask8(_mm256_cmp_ps(a.m, b.m, _CMP_LE_OQ));
+}
+
+/**
+ * @brief Overload: vector by vector greater than or equal.
+ */
+ASTCENC_SIMD_INLINE vmask8 operator>=(vfloat8 a, vfloat8 b)
+{
+	return vmask8(_mm256_cmp_ps(a.m, b.m, _CMP_GE_OQ));
+}
+
+/**
+ * @brief Return the min vector of two vectors.
+ *
+ * If either lane value is NaN, @c b will be returned for that lane.
+ */
+ASTCENC_SIMD_INLINE vfloat8 min(vfloat8 a, vfloat8 b)
+{
+	return vfloat8(_mm256_min_ps(a.m, b.m));
+}
+
+/**
+ * @brief Return the max vector of two vectors.
+ *
+ * If either lane value is NaN, @c b will be returned for that lane.
+ */
+ASTCENC_SIMD_INLINE vfloat8 max(vfloat8 a, vfloat8 b)
+{
+	return vfloat8(_mm256_max_ps(a.m, b.m));
+}
+
+/**
+ * @brief Return the clamped value between min and max.
+ *
+ * It is assumed that neither @c min nor @c max are NaN values. If @c a is NaN
+ * then @c min will be returned for that lane.
+ */
+ASTCENC_SIMD_INLINE vfloat8 clamp(float min, float max, vfloat8 a)
+{
+	// Do not reorder - second operand will return if either is NaN
+	a.m = _mm256_max_ps(a.m, _mm256_set1_ps(min));
+	a.m = _mm256_min_ps(a.m, _mm256_set1_ps(max));
+	return a;
+}
+
+/**
+ * @brief Return a clamped value between 0.0f and max.
+ *
+ * It is assumed that @c max is not a NaN value. If @c a is NaN then zero will
+ * be returned for that lane.
+ */
+ASTCENC_SIMD_INLINE vfloat8 clampz(float max, vfloat8 a)
+{
+	a.m = _mm256_max_ps(a.m, _mm256_setzero_ps());
+	a.m = _mm256_min_ps(a.m, _mm256_set1_ps(max));
+	return a;
+}
+
+/**
+ * @brief Return a clamped value between 0.0f and 1.0f.
+ *
+ * If @c a is NaN then zero will be returned for that lane.
+ */
+ASTCENC_SIMD_INLINE vfloat8 clampzo(vfloat8 a)
+{
+	a.m = _mm256_max_ps(a.m, _mm256_setzero_ps());
+	a.m = _mm256_min_ps(a.m, _mm256_set1_ps(1.0f));
+	return a;
+}
+
+/**
+ * @brief Return the absolute value of the float vector.
+ */
+ASTCENC_SIMD_INLINE vfloat8 abs(vfloat8 a)
+{
+	__m256 msk = _mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffff));
+	return vfloat8(_mm256_and_ps(a.m, msk));
+}
+
+/**
+ * @brief Return a float rounded to the nearest integer value.
+ */
+ASTCENC_SIMD_INLINE vfloat8 round(vfloat8 a)
+{
+	constexpr int flags = _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC;
+	return vfloat8(_mm256_round_ps(a.m, flags));
+}
+
+/**
+ * @brief Return the horizontal minimum of a vector.
+ */
+ASTCENC_SIMD_INLINE vfloat8 hmin(vfloat8 a)
+{
+	__m128 vlow = _mm256_castps256_ps128(a.m);
+	__m128 vhigh = _mm256_extractf128_ps(a.m, 1);
+	vlow  = _mm_min_ps(vlow, vhigh);
+
+	// First do an horizontal reduction.
+	__m128 shuf = _mm_shuffle_ps(vlow, vlow, _MM_SHUFFLE(2, 3, 0, 1));
+	__m128 mins = _mm_min_ps(vlow, shuf);
+	shuf        = _mm_movehl_ps(shuf, mins);
+	mins        = _mm_min_ss(mins, shuf);
+
+	// This is the most logical implementation, but the convenience intrinsic
+	// is missing on older compilers (supported in g++ 9 and clang++ 9).
+	//__m256i r = _mm256_set_m128(m, m)
+	__m256 r = _mm256_insertf128_ps(_mm256_castps128_ps256(mins), mins, 1);
+
+	return vfloat8(_mm256_permute_ps(r, 0));
+}
+
+/**
+ * @brief Return the horizontal minimum of a vector.
+ */
+ASTCENC_SIMD_INLINE float hmin_s(vfloat8 a)
+{
+	return hmin(a).lane<0>();
+}
+
+/**
+ * @brief Return the horizontal maximum of a vector.
+ */
+ASTCENC_SIMD_INLINE vfloat8 hmax(vfloat8 a)
+{
+	__m128 vlow = _mm256_castps256_ps128(a.m);
+	__m128 vhigh = _mm256_extractf128_ps(a.m, 1);
+	vhigh  = _mm_max_ps(vlow, vhigh);
+
+	// First do an horizontal reduction.
+	__m128 shuf = _mm_shuffle_ps(vhigh, vhigh, _MM_SHUFFLE(2, 3, 0, 1));
+	__m128 maxs = _mm_max_ps(vhigh, shuf);
+	shuf        = _mm_movehl_ps(shuf,maxs);
+	maxs        = _mm_max_ss(maxs, shuf);
+
+	// This is the most logical implementation, but the convenience intrinsic
+	// is missing on older compilers (supported in g++ 9 and clang++ 9).
+	//__m256i r = _mm256_set_m128(m, m)
+	__m256 r = _mm256_insertf128_ps(_mm256_castps128_ps256(maxs), maxs, 1);
+	return vfloat8(_mm256_permute_ps(r, 0));
+}
+
+/**
+ * @brief Return the horizontal maximum of a vector.
+ */
+ASTCENC_SIMD_INLINE float hmax_s(vfloat8 a)
+{
+	return hmax(a).lane<0>();
+}
+
+/**
+ * @brief Return the horizontal sum of a vector.
+ */
+ASTCENC_SIMD_INLINE float hadd_s(vfloat8 a)
+{
+	// Two sequential 4-wide adds gives invariance with 4-wide code
+	vfloat4 lo(_mm256_extractf128_ps(a.m, 0));
+	vfloat4 hi(_mm256_extractf128_ps(a.m, 1));
+	return hadd_s(lo) + hadd_s(hi);
+}
+
+/**
+ * @brief Accumulate the full horizontal sum of a vector.
+ */
+ASTCENC_SIMD_INLINE void haccumulate(float& accum, vfloat8 a)
+{
+	// Two sequential 4-wide accumulates gives invariance with 4-wide code.
+	// Note that this approach gives higher error in the sum; adding the two
+	// smaller numbers together first would be more accurate.
+	vfloat4 lo(_mm256_extractf128_ps(a.m, 0));
+	haccumulate(accum, lo);
+
+	vfloat4 hi(_mm256_extractf128_ps(a.m, 1));
+	haccumulate(accum, hi);
+}
+
+/**
+ * @brief Accumulate lane-wise sums for a vector, folded 4-wide.
+ */
+ASTCENC_SIMD_INLINE void haccumulate(vfloat4& accum, vfloat8 a)
+{
+	// Two sequential 4-wide accumulates gives invariance with 4-wide code.
+	// Note that this approach gives higher error in the sum; adding the two
+	// smaller numbers together first would be more accurate.
+	vfloat4 lo(_mm256_extractf128_ps(a.m, 0));
+	haccumulate(accum, lo);
+
+	vfloat4 hi(_mm256_extractf128_ps(a.m, 1));
+	haccumulate(accum, hi);
+}
+
+/**
+ * @brief Return the sqrt of the lanes in the vector.
+ */
+ASTCENC_SIMD_INLINE vfloat8 sqrt(vfloat8 a)
+{
+	return vfloat8(_mm256_sqrt_ps(a.m));
+}
+
+/**
+ * @brief Return lanes from @c b if MSB of @c cond is set, else @c a.
+ */
+ASTCENC_SIMD_INLINE vfloat8 select(vfloat8 a, vfloat8 b, vmask8 cond)
+{
+	return vfloat8(_mm256_blendv_ps(a.m, b.m, cond.m));
+}
+
+/**
+ * @brief Load a vector of gathered results from an array;
+ */
+ASTCENC_SIMD_INLINE vfloat8 gatherf(const float* base, vint8 indices)
+{
+	return vfloat8(_mm256_i32gather_ps(base, indices.m, 4));
+}
+
+/**
+ * @brief Store a vector to an unaligned memory address.
+ */
+ASTCENC_SIMD_INLINE void store(vfloat8 a, float* p)
+{
+	_mm256_storeu_ps(p, a.m);
+}
+
+/**
+ * @brief Store a vector to a 32B aligned memory address.
+ */
+ASTCENC_SIMD_INLINE void storea(vfloat8 a, float* p)
+{
+	_mm256_store_ps(p, a.m);
+}
+
+/**
+ * @brief Return a integer value for a float vector, using truncation.
+ */
+ASTCENC_SIMD_INLINE vint8 float_to_int(vfloat8 a)
+{
+	return vint8(_mm256_cvttps_epi32(a.m));
+}
+
+/**
+ * @brief Return a float value as an integer bit pattern (i.e. no conversion).
+ *
+ * It is a common trick to convert floats into integer bit patterns, perform
+ * some bit hackery based on knowledge they are IEEE 754 layout, and then
+ * convert them back again. This is the first half of that flip.
+ */
+ASTCENC_SIMD_INLINE vint8 float_as_int(vfloat8 a)
+{
+	return vint8(_mm256_castps_si256(a.m));
+}
+
+/**
+ * @brief Return a integer value as a float bit pattern (i.e. no conversion).
+ *
+ * It is a common trick to convert floats into integer bit patterns, perform
+ * some bit hackery based on knowledge they are IEEE 754 layout, and then
+ * convert them back again. This is the second half of that flip.
+ */
+ASTCENC_SIMD_INLINE vfloat8 int_as_float(vint8 a)
+{
+	return vfloat8(_mm256_castsi256_ps(a.m));
+}
+
+/**
+ * @brief Debug function to print a vector of floats.
+ */
+ASTCENC_SIMD_INLINE void print(vfloat8 a)
+{
+	alignas(ASTCENC_VECALIGN) float v[8];
+	storea(a, v);
+	printf("v8_f32:\n  %0.4f %0.4f %0.4f %0.4f %0.4f %0.4f %0.4f %0.4f\n",
+	       (double)v[0], (double)v[1], (double)v[2], (double)v[3],
+	       (double)v[4], (double)v[5], (double)v[6], (double)v[7]);
+}
+
+#endif // #ifndef ASTC_VECMATHLIB_AVX2_8_H_INCLUDED
diff --git a/libkram/astc-encoder/astcenc_vecmathlib_common_4.h b/libkram/astc-encoder/astcenc_vecmathlib_common_4.h
new file mode 100755
index 00000000..319537b6
--- /dev/null
+++ b/libkram/astc-encoder/astcenc_vecmathlib_common_4.h
@@ -0,0 +1,352 @@
+// SPDX-License-Identifier: Apache-2.0
+// ----------------------------------------------------------------------------
+// Copyright 2020-2021 Arm Limited
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy
+// of the License at:
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations
+// under the License.
+// ----------------------------------------------------------------------------
+
+/**
+ * @brief Generic 4x32-bit vector functions.
+ *
+ * This module implements generic 4-wide vector functions that are valid for
+ * all instruction sets, typically implemented using lower level 4-wide
+ * operations that are ISA-specific.
+ */
+
+#ifndef ASTC_VECMATHLIB_COMMON_4_H_INCLUDED
+#define ASTC_VECMATHLIB_COMMON_4_H_INCLUDED
+
+#ifndef ASTCENC_SIMD_INLINE
+	#error "Include astcenc_vecmathlib.h, do not include directly"
+#endif
+
+#include <cstdio>
+
+// ============================================================================
+// vmask4 operators and functions
+// ============================================================================
+
+/**
+ * @brief True if any lanes are enabled, false otherwise.
+ */
+ASTCENC_SIMD_INLINE bool any(vmask4 a)
+{
+	return mask(a) != 0;
+}
+
+/**
+ * @brief True if all lanes are enabled, false otherwise.
+ */
+ASTCENC_SIMD_INLINE bool all(vmask4 a)
+{
+	return mask(a) == 0xF;
+}
+
+// ============================================================================
+// vint4 operators and functions
+// ============================================================================
+
+/**
+ * @brief Overload: vector by scalar addition.
+ */
+ASTCENC_SIMD_INLINE vint4 operator+(vint4 a, int b)
+{
+	return a + vint4(b);
+}
+
+/**
+ * @brief Overload: vector by scalar subtraction.
+ */
+ASTCENC_SIMD_INLINE vint4 operator-(vint4 a, int b)
+{
+	return a - vint4(b);
+}
+
+/**
+ * @brief Overload: vector by scalar multiplication.
+ */
+ASTCENC_SIMD_INLINE vint4 operator*(vint4 a, int b)
+{
+	return a * vint4(b);
+}
+
+/**
+ * @brief Overload: vector by scalar bitwise or.
+ */
+ASTCENC_SIMD_INLINE vint4 operator|(vint4 a, int b)
+{
+	return a | vint4(b);
+}
+
+/**
+ * @brief Overload: vector by scalar bitwise and.
+ */
+ASTCENC_SIMD_INLINE vint4 operator&(vint4 a, int b)
+{
+	return a & vint4(b);
+}
+
+/**
+ * @brief Overload: vector by scalar bitwise xor.
+ */
+ASTCENC_SIMD_INLINE vint4 operator^(vint4 a, int b)
+{
+	return a ^ vint4(b);
+}
+
+/**
+ * @brief Return the clamped value between min and max.
+ */
+ASTCENC_SIMD_INLINE vint4 clamp(int minv, int maxv, vint4 a)
+{
+	return min(max(a, vint4(minv)), vint4(maxv));
+}
+
+/**
+ * @brief Return the horizontal sum of RGB vector lanes as a scalar.
+ */
+ASTCENC_SIMD_INLINE int hadd_rgb_s(vint4 a)
+{
+	return a.lane<0>() + a.lane<1>() + a.lane<2>();
+}
+
+/**
+ * @brief Debug function to print a vector of ints.
+ */
+ASTCENC_SIMD_INLINE void print(vint4 a)
+{
+	alignas(16) int v[4];
+	storea(a, v);
+	printf("v4_i32:\n  %8d %8d %8d %8d\n",
+	       v[0], v[1], v[2], v[3]);
+}
+
+// ============================================================================
+// vfloat4 operators and functions
+// ============================================================================
+
+ASTCENC_SIMD_INLINE vfloat4& operator+=(vfloat4& a, const vfloat4& b)
+{
+	a = a + b;
+	return a;
+}
+
+/**
+ * @brief Overload: vector by scalar addition.
+ */
+ASTCENC_SIMD_INLINE vfloat4 operator+(vfloat4 a, float b)
+{
+	return a + vfloat4(b);
+}
+
+/**
+ * @brief Overload: vector by scalar subtraction.
+ */
+ASTCENC_SIMD_INLINE vfloat4 operator-(vfloat4 a, float b)
+{
+	return a - vfloat4(b);
+}
+
+/**
+ * @brief Overload: vector by scalar multiplication.
+ */
+ASTCENC_SIMD_INLINE vfloat4 operator*(vfloat4 a, float b)
+{
+	return a * vfloat4(b);
+}
+
+/**
+ * @brief Overload: scalar by vector multiplication.
+ */
+ASTCENC_SIMD_INLINE vfloat4 operator*(float a, vfloat4 b)
+{
+	return vfloat4(a) * b;
+}
+
+/**
+ * @brief Overload: vector by scalar division.
+ */
+ASTCENC_SIMD_INLINE vfloat4 operator/(vfloat4 a, float b)
+{
+	return a / vfloat4(b);
+}
+
+/**
+ * @brief Overload: scalar by vector division.
+ */
+ASTCENC_SIMD_INLINE vfloat4 operator/(float a, vfloat4 b)
+{
+	return vfloat4(a) / b;
+}
+
+/**
+ * @brief Return the min vector of a vector and a scalar.
+ *
+ * If either lane value is NaN, @c b will be returned for that lane.
+ */
+ASTCENC_SIMD_INLINE vfloat4 min(vfloat4 a, float b)
+{
+	return min(a, vfloat4(b));
+}
+
+/**
+ * @brief Return the max vector of a vector and a scalar.
+ *
+ * If either lane value is NaN, @c b will be returned for that lane.
+ */
+ASTCENC_SIMD_INLINE vfloat4 max(vfloat4 a, float b)
+{
+	return max(a, vfloat4(b));
+}
+
+/**
+ * @brief Return the clamped value between min and max.
+ *
+ * It is assumed that neither @c min nor @c max are NaN values. If @c a is NaN
+ * then @c min will be returned for that lane.
+ */
+ASTCENC_SIMD_INLINE vfloat4 clamp(float minv, float maxv, vfloat4 a)
+{
+	// Do not reorder - second operand will return if either is NaN
+	return min(max(a, minv), maxv);
+}
+
+/**
+ * @brief Return the clamped value between 0.0f and max.
+ *
+ * It is assumed that  @c max is not a NaN value. If @c a is NaN then zero will
+ * be returned for that lane.
+ */
+ASTCENC_SIMD_INLINE vfloat4 clampz(float maxv, vfloat4 a)
+{
+	// Do not reorder - second operand will return if either is NaN
+	return min(max(a, vfloat4::zero()), maxv);
+}
+
+/**
+ * @brief Return the clamped value between 0.0f and 1.0f.
+ *
+ * If @c a is NaN then zero will be returned for that lane.
+ */
+ASTCENC_SIMD_INLINE vfloat4 clampzo(vfloat4 a)
+{
+	// Do not reorder - second operand will return if either is NaN
+	return min(max(a, vfloat4::zero()), 1.0f);
+}
+
+/**
+ * @brief Return the horizontal minimum of a vector.
+ */
+ASTCENC_SIMD_INLINE float hmin_s(vfloat4 a)
+{
+	return hmin(a).lane<0>();
+}
+
+/**
+ * @brief Return the horizontal min of RGB vector lanes as a scalar.
+ */
+ASTCENC_SIMD_INLINE float hmin_rgb_s(vfloat4 a)
+{
+	a.set_lane<3>(a.lane<0>());
+	return hmin_s(a);
+}
+
+/**
+ * @brief Return the horizontal maximum of a vector.
+ */
+ASTCENC_SIMD_INLINE float hmax_s(vfloat4 a)
+{
+	return hmax(a).lane<0>();
+}
+
+/**
+ * @brief Accumulate the full horizontal sum of a vector.
+ */
+ASTCENC_SIMD_INLINE void haccumulate(float& accum, vfloat4 a)
+{
+	accum += hadd_s(a);
+}
+
+/**
+ * @brief Accumulate lane-wise sums for a vector.
+ */
+ASTCENC_SIMD_INLINE void haccumulate(vfloat4& accum, vfloat4 a)
+{
+	accum = accum + a;
+}
+
+/**
+ * @brief Return the horizontal sum of RGB vector lanes as a scalar.
+ */
+ASTCENC_SIMD_INLINE float hadd_rgb_s(vfloat4 a)
+{
+	return a.lane<0>() + a.lane<1>() + a.lane<2>();
+}
+
+/**
+ * @brief Return the dot product for the full 4 lanes, returning scalar.
+ */
+ASTCENC_SIMD_INLINE float dot_s(vfloat4 a, vfloat4 b)
+{
+	vfloat4 m = a * b;
+	return hadd_s(m);
+}
+
+/**
+ * @brief Return the dot product for the full 4 lanes, returning vector.
+ */
+ASTCENC_SIMD_INLINE vfloat4 dot(vfloat4 a, vfloat4 b)
+{
+	vfloat4 m = a * b;
+	return vfloat4(hadd_s(m));
+}
+
+/**
+ * @brief Return the dot product for the bottom 3 lanes, returning scalar.
+ */
+ASTCENC_SIMD_INLINE float dot3_s(vfloat4 a, vfloat4 b)
+{
+	vfloat4 m = a * b;
+	return hadd_rgb_s(m);
+}
+
+/**
+ * @brief Return the dot product for the full 4 lanes, returning vector.
+ */
+ASTCENC_SIMD_INLINE vfloat4 dot3(vfloat4 a, vfloat4 b)
+{
+	vfloat4 m = a * b;
+	float d3 = hadd_rgb_s(m);
+	return vfloat4(d3, d3, d3, 0.0f);
+}
+
+/**
+ * @brief Generate a reciprocal of a vector.
+ */
+ASTCENC_SIMD_INLINE vfloat4 recip(vfloat4 b)
+{
+	return 1.0f / b;
+}
+
+/**
+ * @brief Debug function to print a vector of floats.
+ */
+ASTCENC_SIMD_INLINE void print(vfloat4 a)
+{
+	alignas(16) float v[4];
+	storea(a, v);
+	printf("v4_f32:\n  %0.4f %0.4f %0.4f %0.4f\n",
+	       (double)v[0], (double)v[1], (double)v[2], (double)v[3]);
+}
+
+#endif // #ifndef ASTC_VECMATHLIB_COMMON_4_H_INCLUDED
diff --git a/libkram/astc-encoder/astcenc_vecmathlib_neon_4.h b/libkram/astc-encoder/astcenc_vecmathlib_neon_4.h
new file mode 100755
index 00000000..a1163531
--- /dev/null
+++ b/libkram/astc-encoder/astcenc_vecmathlib_neon_4.h
@@ -0,0 +1,915 @@
+// SPDX-License-Identifier: Apache-2.0
+// ----------------------------------------------------------------------------
+// Copyright 2019-2021 Arm Limited
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy
+// of the License at:
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations
+// under the License.
+// ----------------------------------------------------------------------------
+
+/**
+ * @brief 4x32-bit vectors, implemented using Armv8-A NEON.
+ *
+ * This module implements 4-wide 32-bit float, int, and mask vectors for
+ * Armv8-A NEON.
+ *
+ * There is a baseline level of functionality provided by all vector widths and
+ * implementations. This is implemented using identical function signatures,
+ * modulo data type, so we can use them as substitutable implementations in VLA
+ * code.
+ *
+ * The 4-wide vectors are also used as a fixed-width type, and significantly
+ * extend the functionality above that available to VLA code.
+ */
+
+#ifndef ASTC_VECMATHLIB_NEON_4_H_INCLUDED
+#define ASTC_VECMATHLIB_NEON_4_H_INCLUDED
+
+#ifndef ASTCENC_SIMD_INLINE
+	#error "Include astcenc_vecmathlib.h, do not include directly"
+#endif
+
+#include <cstdio>
+
+#if defined(__arm__)
+	#include "astcenc_vecmathlib_neon_armv7_4.h"
+#endif
+
+// ============================================================================
+// vfloat4 data type
+// ============================================================================
+
+/**
+ * @brief Data type for 4-wide floats.
+ */
+struct vfloat4
+{
+	/**
+	 * @brief Construct from zero-initialized value.
+	 */
+	ASTCENC_SIMD_INLINE vfloat4() {}
+
+	/**
+	 * @brief Construct from 4 values loaded from an unaligned address.
+	 *
+	 * Consider using loada() which is better with vectors if data is aligned
+	 * to vector length.
+	 */
+	ASTCENC_SIMD_INLINE explicit vfloat4(const float *p)
+	{
+		m = vld1q_f32(p);
+	}
+
+	/**
+	 * @brief Construct from 1 scalar value replicated across all lanes.
+	 *
+	 * Consider using zero() for constexpr zeros.
+	 */
+	ASTCENC_SIMD_INLINE explicit vfloat4(float a)
+	{
+		m = vdupq_n_f32(a);
+	}
+
+	/**
+	 * @brief Construct from 4 scalar values.
+	 *
+	 * The value of @c a is stored to lane 0 (LSB) in the SIMD register.
+	 */
+	ASTCENC_SIMD_INLINE explicit vfloat4(float a, float b, float c, float d)
+	{
+		float32x4_t v { a, b, c, d };
+		m = v;
+	}
+
+	/**
+	 * @brief Construct from an existing SIMD register.
+	 */
+	ASTCENC_SIMD_INLINE explicit vfloat4(float32x4_t a)
+	{
+		m = a;
+	}
+
+	/**
+	 * @brief Get the scalar value of a single lane.
+	 */
+	template <int l> ASTCENC_SIMD_INLINE float lane() const
+	{
+		return vgetq_lane_f32(m, l);
+	}
+
+	/**
+	 * @brief Set the scalar value of a single lane.
+	 */
+	template <int l> ASTCENC_SIMD_INLINE void set_lane(float a)
+	{
+		m = vld1q_lane_f32(&a, m, l);
+	}
+
+	/**
+	 * @brief Factory that returns a vector of zeros.
+	 */
+	static ASTCENC_SIMD_INLINE vfloat4 zero()
+	{
+		return vfloat4(vdupq_n_f32(0.0f));
+	}
+
+	/**
+	 * @brief Factory that returns a replicated scalar loaded from memory.
+	 */
+	static ASTCENC_SIMD_INLINE vfloat4 load1(const float* p)
+	{
+		return vfloat4(vdupq_n_f32(*p));
+	}
+
+	/**
+	 * @brief Factory that returns a vector loaded from 16B aligned memory.
+	 */
+	static ASTCENC_SIMD_INLINE vfloat4 loada(const float* p)
+	{
+		return vfloat4(vld1q_f32(p));
+	}
+
+	/**
+	 * @brief Factory that returns a vector containing the lane IDs.
+	 */
+	static ASTCENC_SIMD_INLINE vfloat4 lane_id()
+	{
+		alignas(16) float data[4] { 0.0f, 1.0f, 2.0f, 3.0f };
+		return vfloat4(vld1q_f32(data));
+	}
+
+	/**
+	 * @brief Return a swizzled float 2.
+	 */
+	template <int l0, int l1> ASTCENC_SIMD_INLINE float2 swz() const
+	{
+		return float2(lane<l0>(), lane<l1>());
+	}
+
+	/**
+	 * @brief Return a swizzled float 3.
+	 *
+	 * TODO: Implement using permutes.
+	 */
+	template <int l0, int l1, int l2> ASTCENC_SIMD_INLINE vfloat4 swz() const
+	{
+		return vfloat4(lane<l0>(), lane<l1>(), lane<l2>(), 0.0f);
+	}
+
+	/**
+	 * @brief Return a swizzled float 4.
+	 *
+	 * TODO: Implement using permutes.
+	 */
+	template <int l0, int l1, int l2, int l3> ASTCENC_SIMD_INLINE vfloat4 swz() const
+	{
+		return vfloat4(lane<l0>(), lane<l1>(), lane<l2>(), lane<l3>());
+	}
+
+	/**
+	 * @brief The vector ...
+	 */
+	float32x4_t m;
+};
+
+// ============================================================================
+// vint4 data type
+// ============================================================================
+
+/**
+ * @brief Data type for 4-wide ints.
+ */
+struct vint4
+{
+	/**
+	 * @brief Construct from zero-initialized value.
+	 */
+	ASTCENC_SIMD_INLINE vint4() {}
+
+	/**
+	 * @brief Construct from 4 values loaded from an unaligned address.
+	 *
+	 * Consider using loada() which is better with vectors if data is aligned
+	 * to vector length.
+	 */
+	ASTCENC_SIMD_INLINE explicit vint4(const int *p)
+	{
+		m = vld1q_s32(p);
+	}
+
+	/**
+	 * @brief Construct from 4 uint8_t loaded from an unaligned address.
+	 */
+	ASTCENC_SIMD_INLINE explicit vint4(const uint8_t *p)
+	{
+		uint32x2_t t8 {};
+		// Cast is safe - NEON loads are allowed to be unaligned
+		t8 = vld1_lane_u32((const uint32_t*)p, t8, 0);
+		uint16x4_t t16 = vget_low_u16(vmovl_u8(vreinterpret_u8_u32(t8)));
+		m = vreinterpretq_s32_u32(vmovl_u16(t16));
+	}
+
+	/**
+	 * @brief Construct from 1 scalar value replicated across all lanes.
+	 *
+	 * Consider using vfloat4::zero() for constexpr zeros.
+	 */
+	ASTCENC_SIMD_INLINE explicit vint4(int a)
+	{
+		m = vdupq_n_s32(a);
+	}
+
+	/**
+	 * @brief Construct from 4 scalar values.
+	 *
+	 * The value of @c a is stored to lane 0 (LSB) in the SIMD register.
+	 */
+	ASTCENC_SIMD_INLINE explicit vint4(int a, int b, int c, int d)
+	{
+		int32x4_t v { a, b, c, d };
+		m = v;
+	}
+
+	/**
+	 * @brief Construct from an existing SIMD register.
+	 */
+	ASTCENC_SIMD_INLINE explicit vint4(int32x4_t a)
+	{
+		m = a;
+	}
+
+	/**
+	 * @brief Get the scalar from a single lane.
+	 */
+	template <int l> ASTCENC_SIMD_INLINE int lane() const
+	{
+		return vgetq_lane_s32(m, l);
+	}
+
+	/**
+	 * @brief Set the scalar value of a single lane.
+	 */
+	template <int l> ASTCENC_SIMD_INLINE void set_lane(int a)
+	{
+		m = vld1q_lane_s32(&a, m, l);
+	}
+
+	/**
+	 * @brief Factory that returns a vector of zeros.
+	 */
+	static ASTCENC_SIMD_INLINE vint4 zero()
+	{
+		return vint4(0);
+	}
+
+	/**
+	 * @brief Factory that returns a replicated scalar loaded from memory.
+	 */
+	static ASTCENC_SIMD_INLINE vint4 load1(const int* p)
+	{
+		return vint4(*p);
+	}
+
+	/**
+	 * @brief Factory that returns a vector loaded from 16B aligned memory.
+	 */
+	static ASTCENC_SIMD_INLINE vint4 loada(const int* p)
+	{
+		return vint4(*p);
+	}
+
+	/**
+	 * @brief Factory that returns a vector containing the lane IDs.
+	 */
+	static ASTCENC_SIMD_INLINE vint4 lane_id()
+	{
+		alignas(ASTCENC_VECALIGN) static const int data[4] { 0, 1, 2, 3 };
+		return vint4(vld1q_s32(data));
+	}
+
+	/**
+	 * @brief The vector ...
+	 */
+	int32x4_t m;
+};
+
+// ============================================================================
+// vmask4 data type
+// ============================================================================
+
+/**
+ * @brief Data type for 4-wide control plane masks.
+ */
+struct vmask4
+{
+	/**
+	 * @brief Construct from an existing SIMD register.
+	 */
+	ASTCENC_SIMD_INLINE explicit vmask4(uint32x4_t a)
+	{
+		m = a;
+	}
+
+	/**
+	 * @brief Construct from an existing SIMD register.
+	 */
+	ASTCENC_SIMD_INLINE explicit vmask4(int32x4_t a)
+	{
+		m = vreinterpretq_u32_s32(a);
+	}
+
+	/**
+	 * @brief Construct from an existing SIMD register.
+	 */
+	ASTCENC_SIMD_INLINE explicit vmask4(bool a, bool b, bool c, bool d)
+	{
+		int32x4_t v {
+			a == true ? -1 : 0,
+			b == true ? -1 : 0,
+			c == true ? -1 : 0,
+			d == true ? -1 : 0
+		};
+
+		m = vreinterpretq_u32_s32(v);
+	}
+
+
+	/**
+	 * @brief The vector ...
+	 */
+	uint32x4_t m;
+};
+
+// ============================================================================
+// vmask4 operators and functions
+// ============================================================================
+
+/**
+ * @brief Overload: mask union (or).
+ */
+ASTCENC_SIMD_INLINE vmask4 operator|(vmask4 a, vmask4 b)
+{
+	return vmask4(vorrq_u32(a.m, b.m));
+}
+
+/**
+ * @brief Overload: mask intersect (and).
+ */
+ASTCENC_SIMD_INLINE vmask4 operator&(vmask4 a, vmask4 b)
+{
+	return vmask4(vandq_u32(a.m, b.m));
+}
+
+/**
+ * @brief Overload: mask difference (xor).
+ */
+ASTCENC_SIMD_INLINE vmask4 operator^(vmask4 a, vmask4 b)
+{
+	return vmask4(veorq_u32(a.m, b.m));
+}
+
+/**
+ * @brief Overload: mask invert (not).
+ */
+ASTCENC_SIMD_INLINE vmask4 operator~(vmask4 a)
+{
+	return vmask4(vmvnq_u32(a.m));
+}
+
+/**
+ * @brief Return a 4-bit mask code indicating mask status.
+ *
+ * bit0 = lane 0
+ */
+ASTCENC_SIMD_INLINE unsigned int mask(vmask4 a)
+{
+	static const int32x4_t shift { 0, 1, 2, 3 };
+	uint32x4_t tmp = vshrq_n_u32(a.m, 31);
+	return vaddvq_u32(vshlq_u32(tmp, shift));
+}
+
+// ============================================================================
+// vint4 operators and functions
+// ============================================================================
+
+/**
+ * @brief Overload: vector by vector addition.
+ */
+ASTCENC_SIMD_INLINE vint4 operator+(vint4 a, vint4 b)
+{
+	return vint4(vaddq_s32(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector by vector subtraction.
+ */
+ASTCENC_SIMD_INLINE vint4 operator-(vint4 a, vint4 b)
+{
+	return vint4(vsubq_s32(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector by vector multiplication.
+ */
+ASTCENC_SIMD_INLINE vint4 operator*(vint4 a, vint4 b)
+{
+	return vint4(vmulq_s32(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector bit invert.
+ */
+ASTCENC_SIMD_INLINE vint4 operator~(vint4 a)
+{
+	return vint4(vmvnq_s32(a.m));
+}
+
+/**
+ * @brief Overload: vector by vector bitwise or.
+ */
+ASTCENC_SIMD_INLINE vint4 operator|(vint4 a, vint4 b)
+{
+	return vint4(vorrq_s32(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector by vector bitwise and.
+ */
+ASTCENC_SIMD_INLINE vint4 operator&(vint4 a, vint4 b)
+{
+	return vint4(vandq_s32(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector by vector bitwise xor.
+ */
+ASTCENC_SIMD_INLINE vint4 operator^(vint4 a, vint4 b)
+{
+	return vint4(veorq_s32(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector by vector equality.
+ */
+ASTCENC_SIMD_INLINE vmask4 operator==(vint4 a, vint4 b)
+{
+	return vmask4(vceqq_s32(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector by vector inequality.
+ */
+ASTCENC_SIMD_INLINE vmask4 operator!=(vint4 a, vint4 b)
+{
+	return ~vmask4(vceqq_s32(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector by vector less than.
+ */
+ASTCENC_SIMD_INLINE vmask4 operator<(vint4 a, vint4 b)
+{
+	return vmask4(vcltq_s32(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector by vector greater than.
+ */
+ASTCENC_SIMD_INLINE vmask4 operator>(vint4 a, vint4 b)
+{
+	return vmask4(vcgtq_s32(a.m, b.m));
+}
+
+/**
+ * @brief Logical shift left.
+ */
+template <int s> ASTCENC_SIMD_INLINE vint4 lsl(vint4 a)
+{
+	return vint4(vshlq_s32(a.m, vdupq_n_s32(s)));
+}
+
+/**
+ * @brief Logical shift right.
+ */
+template <int s> ASTCENC_SIMD_INLINE vint4 lsr(vint4 a)
+{
+	uint32x4_t ua = vreinterpretq_u32_s32(a.m);
+	ua = vshlq_u32(ua, vdupq_n_s32(-s));
+	return vint4(vreinterpretq_s32_u32(ua));
+}
+
+/**
+ * @brief Arithmetic shift right.
+ */
+template <int s> ASTCENC_SIMD_INLINE vint4 asr(vint4 a)
+{
+	return vint4(vshlq_s32(a.m, vdupq_n_s32(-s)));
+}
+
+/**
+ * @brief Return the min vector of two vectors.
+ */
+ASTCENC_SIMD_INLINE vint4 min(vint4 a, vint4 b)
+{
+	return vint4(vminq_s32(a.m, b.m));
+}
+
+/**
+ * @brief Return the max vector of two vectors.
+ */
+ASTCENC_SIMD_INLINE vint4 max(vint4 a, vint4 b)
+{
+	return vint4(vmaxq_s32(a.m, b.m));
+}
+
+/**
+ * @brief Return the horizontal minimum of a vector.
+ */
+ASTCENC_SIMD_INLINE vint4 hmin(vint4 a)
+{
+	return vint4(vminvq_s32(a.m));
+}
+
+/**
+ * @brief Return the horizontal maximum of a vector.
+ */
+ASTCENC_SIMD_INLINE vint4 hmax(vint4 a)
+{
+	return vint4(vmaxvq_s32(a.m));
+}
+
+/**
+ * @brief Return the horizontal sum of a vector.
+ */
+ASTCENC_SIMD_INLINE int hadd_s(vint4 a)
+{
+	int32x2_t t = vadd_s32(vget_high_s32(a.m), vget_low_s32(a.m));
+	return vget_lane_s32(vpadd_s32(t, t), 0);
+}
+
+/**
+ * @brief Store a vector to a 16B aligned memory address.
+ */
+ASTCENC_SIMD_INLINE void storea(vint4 a, int* p)
+{
+	vst1q_s32(p, a.m);
+}
+
+/**
+ * @brief Store a vector to an unaligned memory address.
+ */
+ASTCENC_SIMD_INLINE void store(vint4 a, int* p)
+{
+	vst1q_s32(p, a.m);
+}
+
+/**
+ * @brief Store lowest N (vector width) bytes into an unaligned address.
+ */
+ASTCENC_SIMD_INLINE void store_nbytes(vint4 a, uint8_t* p)
+{
+	vst1q_lane_s32((int32_t*)p, a.m, 0);
+}
+
+/**
+ * @brief Gather N (vector width) indices from the array.
+ */
+ASTCENC_SIMD_INLINE vint4 gatheri(const int* base, vint4 indices)
+{
+	alignas(16) int idx[4];
+	storea(indices, idx);
+	alignas(16) int vals[4];
+	vals[0] = base[idx[0]];
+	vals[1] = base[idx[1]];
+	vals[2] = base[idx[2]];
+	vals[3] = base[idx[3]];
+	return vint4(vals);
+}
+
+/**
+ * @brief Pack low 8 bits of N (vector width) lanes into bottom of vector.
+ */
+ASTCENC_SIMD_INLINE vint4 pack_low_bytes(vint4 a)
+{
+	alignas(16) uint8_t shuf[16] = {
+		0, 4, 8, 12,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0
+	};
+	uint8x16_t idx = vld1q_u8(shuf);
+	int8x16_t av = vreinterpretq_s8_s32(a.m);
+	return vint4(vreinterpretq_s32_s8(vqtbl1q_s8(av, idx)));
+}
+
+/**
+ * @brief Return lanes from @c b if MSB of @c cond is set, else @c a.
+ */
+ASTCENC_SIMD_INLINE vint4 select(vint4 a, vint4 b, vmask4 cond)
+{
+	static const uint32x4_t msb = vdupq_n_u32(0x80000000u);
+	uint32x4_t mask = vcgeq_u32(cond.m, msb);
+	return vint4(vbslq_s32(mask, b.m, a.m));
+}
+
+// ============================================================================
+// vfloat4 operators and functions
+// ============================================================================
+
+/**
+ * @brief Overload: vector by vector addition.
+ */
+ASTCENC_SIMD_INLINE vfloat4 operator+(vfloat4 a, vfloat4 b)
+{
+	return vfloat4(vaddq_f32(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector by vector subtraction.
+ */
+ASTCENC_SIMD_INLINE vfloat4 operator-(vfloat4 a, vfloat4 b)
+{
+	return vfloat4(vsubq_f32(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector by vector multiplication.
+ */
+ASTCENC_SIMD_INLINE vfloat4 operator*(vfloat4 a, vfloat4 b)
+{
+	return vfloat4(vmulq_f32(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector by vector division.
+ */
+ASTCENC_SIMD_INLINE vfloat4 operator/(vfloat4 a, vfloat4 b)
+{
+	return vfloat4(vdivq_f32(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector by vector equality.
+ */
+ASTCENC_SIMD_INLINE vmask4 operator==(vfloat4 a, vfloat4 b)
+{
+	return vmask4(vceqq_f32(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector by vector inequality.
+ */
+ASTCENC_SIMD_INLINE vmask4 operator!=(vfloat4 a, vfloat4 b)
+{
+	return vmask4(vmvnq_u32(vceqq_f32(a.m, b.m)));
+}
+
+/**
+ * @brief Overload: vector by vector less than.
+ */
+ASTCENC_SIMD_INLINE vmask4 operator<(vfloat4 a, vfloat4 b)
+{
+	return vmask4(vcltq_f32(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector by vector greater than.
+ */
+ASTCENC_SIMD_INLINE vmask4 operator>(vfloat4 a, vfloat4 b)
+{
+	return vmask4(vcgtq_f32(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector by vector less than or equal.
+ */
+ASTCENC_SIMD_INLINE vmask4 operator<=(vfloat4 a, vfloat4 b)
+{
+	return vmask4(vcleq_f32(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector by vector greater than or equal.
+ */
+ASTCENC_SIMD_INLINE vmask4 operator>=(vfloat4 a, vfloat4 b)
+{
+	return vmask4(vcgeq_f32(a.m, b.m));
+}
+
+/**
+ * @brief Return the min vector of two vectors.
+ *
+ * If either lane value is NaN, @c b will be returned for that lane.
+ */
+ASTCENC_SIMD_INLINE vfloat4 min(vfloat4 a, vfloat4 b)
+{
+	// Do not reorder - second operand will return if either is NaN
+	return vfloat4(vminnmq_f32(a.m, b.m));
+}
+
+/**
+ * @brief Return the max vector of two vectors.
+ *
+ * If either lane value is NaN, @c b will be returned for that lane.
+ */
+ASTCENC_SIMD_INLINE vfloat4 max(vfloat4 a, vfloat4 b)
+{
+	// Do not reorder - second operand will return if either is NaN
+	return vfloat4(vmaxnmq_f32(a.m, b.m));
+}
+
+/**
+ * @brief Return the absolute value of the float vector.
+ */
+ASTCENC_SIMD_INLINE vfloat4 abs(vfloat4 a)
+{
+	float32x4_t zero = vdupq_n_f32(0.0f);
+	float32x4_t inv = vsubq_f32(zero, a.m);
+	return vfloat4(vmaxq_f32(a.m, inv));
+}
+
+/**
+ * @brief Return a float rounded to the nearest integer value.
+ */
+ASTCENC_SIMD_INLINE vfloat4 round(vfloat4 a)
+{
+	return vfloat4(vrndnq_f32(a.m));
+}
+
+/**
+ * @brief Return the horizontal minimum of a vector.
+ */
+ASTCENC_SIMD_INLINE vfloat4 hmin(vfloat4 a)
+{
+	return vfloat4(vminvq_f32(a.m));
+}
+
+/**
+ * @brief Return the horizontal maximum of a vector.
+ */
+ASTCENC_SIMD_INLINE vfloat4 hmax(vfloat4 a)
+{
+	return vfloat4(vmaxvq_f32(a.m));
+}
+
+/**
+ * @brief Return the horizontal sum of a vector.
+ */
+ASTCENC_SIMD_INLINE float hadd_s(vfloat4 a)
+{
+	// Perform halving add to ensure invariance; we cannot use vaddqv as this
+	// does (0 + 1 + 2 + 3) which is not invariant with x86 (0 + 2) + (1 + 3).
+	float32x2_t t = vadd_f32(vget_high_f32(a.m), vget_low_f32(a.m));
+	return vget_lane_f32(vpadd_f32(t, t), 0);
+}
+
+/**
+ * @brief Return the sqrt of the lanes in the vector.
+ */
+ASTCENC_SIMD_INLINE vfloat4 sqrt(vfloat4 a)
+{
+	return vfloat4(vsqrtq_f32(a.m));
+}
+
+/**
+ * @brief Return lanes from @c b if MSB of @c cond is set, else @c a.
+ */
+ASTCENC_SIMD_INLINE vfloat4 select(vfloat4 a, vfloat4 b, vmask4 cond)
+{
+	static const uint32x4_t msb = vdupq_n_u32(0x80000000u);
+	uint32x4_t mask = vcgeq_u32(cond.m, msb);
+	return vfloat4(vbslq_f32(mask, b.m, a.m));
+}
+
+/**
+ * @brief Load a vector of gathered results from an array;
+ */
+ASTCENC_SIMD_INLINE vfloat4 gatherf(const float* base, vint4 indices)
+{
+	alignas(16) int idx[4];
+	storea(indices, idx);
+	alignas(16) float vals[4];
+	vals[0] = base[idx[0]];
+	vals[1] = base[idx[1]];
+	vals[2] = base[idx[2]];
+	vals[3] = base[idx[3]];
+	return vfloat4(vals);
+}
+
+/**
+ * @brief Store a vector to an unaligned memory address.
+ */
+ASTCENC_SIMD_INLINE void store(vfloat4 a, float* p)
+{
+	vst1q_f32(p, a.m);
+}
+
+/**
+ * @brief Store a vector to a 16B aligned memory address.
+ */
+ASTCENC_SIMD_INLINE void storea(vfloat4 a, float* p)
+{
+	vst1q_f32(p, a.m);
+}
+
+/**
+ * @brief Return a integer value for a float vector, using truncation.
+ */
+ASTCENC_SIMD_INLINE vint4 float_to_int(vfloat4 a)
+{
+	return vint4(vcvtq_s32_f32(a.m));
+}
+
+/**
+ * @brief Return a integer value for a float vector, using round-to-nearest.
+ */
+ASTCENC_SIMD_INLINE vint4 float_to_int_rtn(vfloat4 a)
+{
+	a = round(a);
+	return vint4(vcvtq_s32_f32(a.m));
+}
+
+/**
+ * @brief Return a float value for an integer vector.
+ */
+ASTCENC_SIMD_INLINE vfloat4 int_to_float(vint4 a)
+{
+	return vfloat4(vcvtq_f32_s32(a.m));
+}
+
+/**
+ * @brief Return a float16 value for a float vector, using round-to-nearest.
+ */
+ASTCENC_SIMD_INLINE vint4 float_to_float16(vfloat4 a)
+{
+	// Generate float16 value
+	float16x4_t f16 = vcvt_f16_f32(a.m);
+
+	// Convert each 16-bit float pattern to a 32-bit pattern
+	uint16x4_t u16 = vreinterpret_u16_f16(f16);
+	uint32x4_t u32 = vmovl_u16(u16);
+	return vint4(vreinterpretq_s32_u32(u32));
+}
+
+/**
+ * @brief Return a float16 value for a float scalar, using round-to-nearest.
+ */
+static inline uint16_t float_to_float16(float a)
+{
+	vfloat4 av(a);
+	return float_to_float16(av).lane<0>();
+}
+
+/**
+ * @brief Return a float value for a float16 vector.
+ */
+ASTCENC_SIMD_INLINE vfloat4 float16_to_float(vint4 a)
+{
+	// Convert each 32-bit float pattern to a 16-bit pattern
+	uint32x4_t u32 = vreinterpretq_u32_s32(a.m);
+	uint16x4_t u16 = vmovn_u32(u32);
+	float16x4_t f16 = vreinterpret_f16_u16(u16);
+
+	// Generate float16 value
+	return vfloat4(vcvt_f32_f16(f16));
+}
+
+/**
+ * @brief Return a float value for a float16 scalar.
+ */
+ASTCENC_SIMD_INLINE float float16_to_float(uint16_t a)
+{
+	vint4 av(a);
+	return float16_to_float(av).lane<0>();
+}
+
+/**
+ * @brief Return a float value as an integer bit pattern (i.e. no conversion).
+ *
+ * It is a common trick to convert floats into integer bit patterns, perform
+ * some bit hackery based on knowledge they are IEEE 754 layout, and then
+ * convert them back again. This is the first half of that flip.
+ */
+ASTCENC_SIMD_INLINE vint4 float_as_int(vfloat4 a)
+{
+	return vint4(vreinterpretq_s32_f32(a.m));
+}
+
+/**
+ * @brief Return a integer value as a float bit pattern (i.e. no conversion).
+ *
+ * It is a common trick to convert floats into integer bit patterns, perform
+ * some bit hackery based on knowledge they are IEEE 754 layout, and then
+ * convert them back again. This is the second half of that flip.
+ */
+ASTCENC_SIMD_INLINE vfloat4 int_as_float(vint4 v)
+{
+	return vfloat4(vreinterpretq_f32_s32(v.m));
+}
+
+#endif // #ifndef ASTC_VECMATHLIB_NEON_4_H_INCLUDED
diff --git a/libkram/astc-encoder/astcenc_vecmathlib_neon_armv7_4.h b/libkram/astc-encoder/astcenc_vecmathlib_neon_armv7_4.h
new file mode 100644
index 00000000..7d33dc15
--- /dev/null
+++ b/libkram/astc-encoder/astcenc_vecmathlib_neon_armv7_4.h
@@ -0,0 +1,186 @@
+// SPDX-License-Identifier: Apache-2.0
+// ----------------------------------------------------------------------------
+// Copyright 2021 Arm Limited
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy
+// of the License at:
+//
+//	 http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations
+// under the License.
+// ----------------------------------------------------------------------------
+
+/**
+ * @brief Intrinsics for Armv7 NEON.
+ *
+ * This module implements a few Armv7-compatible intrinsics indentical to Armv8
+ * ones. Thus, astcenc can be compiled using Armv7 architecture.
+ */
+
+#ifndef ASTC_VECMATHLIB_NEON_ARMV7_4_H_INCLUDED
+#define ASTC_VECMATHLIB_NEON_ARMV7_4_H_INCLUDED
+
+#ifndef ASTCENC_SIMD_INLINE
+	#error "Include astcenc_vecmathlib.h, do not include directly"
+#endif
+
+#include <algorithm>
+#include <cfenv>
+
+
+// arm-linux-gnueabi-gcc contains the following functions by using
+// #pragma GCC target ("fpu=neon-fp-armv8"), while clang does not.
+#if defined(__clang__)
+
+/**
+ * @brief Return the max vector of two vectors.
+ *
+ * If one vector element is numeric and the other is a quiet NaN,
+ * the result placed in the vector is the numerical value.
+ */
+ASTCENC_SIMD_INLINE float32x4_t vmaxnmq_f32(float32x4_t a, float32x4_t b)
+{
+	uint32x4_t amask = vceqq_f32(a, a);
+	uint32x4_t bmask = vceqq_f32(b, b);
+	a = vbslq_f32(amask, a, b);
+	b = vbslq_f32(bmask, b, a);
+	return vmaxq_f32(a, b);
+}
+
+/**
+ * @brief Return the min vector of two vectors.
+ *
+ * If one vector element is numeric and the other is a quiet NaN,
+ * the result placed in the vector is the numerical value.
+ */
+ASTCENC_SIMD_INLINE float32x4_t vminnmq_f32(float32x4_t a, float32x4_t b)
+{
+	uint32x4_t amask = vceqq_f32(a, a);
+	uint32x4_t bmask = vceqq_f32(b, b);
+	a = vbslq_f32(amask, a, b);
+	b = vbslq_f32(bmask, b, a);
+	return vminq_f32(a, b);
+}
+
+/**
+ * @brief Return a float rounded to the nearest integer value.
+ */
+ASTCENC_SIMD_INLINE float32x4_t vrndnq_f32(float32x4_t a)
+{
+	assert(std::fegetround() == FE_TONEAREST);
+	float a0 = std::nearbyintf(vgetq_lane_f32(a, 0));
+	float a1 = std::nearbyintf(vgetq_lane_f32(a, 1));
+	float a2 = std::nearbyintf(vgetq_lane_f32(a, 2));
+	float a3 = std::nearbyintf(vgetq_lane_f32(a, 3));
+	float32x4_t c { a0, a1, a2, a3 };
+	return c;
+}
+
+#endif
+
+/**
+ * @brief Return the horizontal maximum of a vector.
+ */
+ASTCENC_SIMD_INLINE float vmaxvq_f32(float32x4_t a)
+{
+	float a0 = vgetq_lane_f32(a, 0);
+	float a1 = vgetq_lane_f32(a, 1);
+	float a2 = vgetq_lane_f32(a, 2);
+	float a3 = vgetq_lane_f32(a, 3);
+	return std::max(std::max(a0, a1), std::max(a2, a3));
+}
+
+/**
+ * @brief Return the horizontal maximum of a vector.
+ */
+ASTCENC_SIMD_INLINE float vminvq_f32(float32x4_t a)
+{
+	float a0 = vgetq_lane_f32(a, 0);
+	float a1 = vgetq_lane_f32(a, 1);
+	float a2 = vgetq_lane_f32(a, 2);
+	float a3 = vgetq_lane_f32(a, 3);
+	return std::min(std::min(a0, a1), std::min(a2, a3));
+}
+
+/**
+ * @brief Return the horizontal maximum of a vector.
+ */
+ASTCENC_SIMD_INLINE int32_t vmaxvq_s32(int32x4_t a)
+{
+	int32_t a0 = vgetq_lane_s32(a, 0);
+	int32_t a1 = vgetq_lane_s32(a, 1);
+	int32_t a2 = vgetq_lane_s32(a, 2);
+	int32_t a3 = vgetq_lane_s32(a, 3);
+	return std::max(std::max(a0, a1), std::max(a2, a3));
+}
+
+/**
+ * @brief Return the horizontal maximum of a vector.
+ */
+ASTCENC_SIMD_INLINE int32_t vminvq_s32(int32x4_t a)
+{
+	int32_t a0 = vgetq_lane_s32(a, 0);
+	int32_t a1 = vgetq_lane_s32(a, 1);
+	int32_t a2 = vgetq_lane_s32(a, 2);
+	int32_t a3 = vgetq_lane_s32(a, 3);
+	return std::min(std::min(a0, a1), std::min(a2, a3));
+}
+
+/**
+ * @brief Return the sqrt of the lanes in the vector.
+ */
+ASTCENC_SIMD_INLINE float32x4_t vsqrtq_f32(float32x4_t a)
+{
+	float a0 = std::sqrt(vgetq_lane_f32(a, 0));
+	float a1 = std::sqrt(vgetq_lane_f32(a, 1));
+	float a2 = std::sqrt(vgetq_lane_f32(a, 2));
+	float a3 = std::sqrt(vgetq_lane_f32(a, 3));
+	float32x4_t c { a0, a1, a2, a3 };
+	return c;
+}
+
+/**
+ * @brief Vector by vector division.
+ */
+ASTCENC_SIMD_INLINE float32x4_t vdivq_f32(float32x4_t a, float32x4_t b)
+{
+	float a0 = vgetq_lane_f32(a, 0), b0 = vgetq_lane_f32(b, 0);
+	float a1 = vgetq_lane_f32(a, 1), b1 = vgetq_lane_f32(b, 1);
+	float a2 = vgetq_lane_f32(a, 2), b2 = vgetq_lane_f32(b, 2);
+	float a3 = vgetq_lane_f32(a, 3), b3 = vgetq_lane_f32(b, 3);
+	float32x4_t c { a0 / b0, a1 / b1, a2 / b2, a3 / b3 };
+	return c;
+}
+
+/**
+ * @brief Table vector lookup.
+ */
+ASTCENC_SIMD_INLINE int8x16_t vqtbl1q_s8(int8x16_t t, uint8x16_t idx)
+{
+	int8x8x2_t tab;
+	tab.val[0] = vget_low_s8(t);
+	tab.val[1] = vget_high_s8(t);
+	int8x16_t id = vreinterpretq_s8_u8(idx);
+	return vcombine_s8(
+		vtbl2_s8(tab, vget_low_s8(id)),
+		vtbl2_s8(tab, vget_high_s8(id)));
+}
+
+/**
+ * @brief Horizontal integer addition.
+ */
+ASTCENC_SIMD_INLINE uint32_t vaddvq_u32(uint32x4_t a)
+{
+	uint32_t a0 = vgetq_lane_u32(a, 0);
+	uint32_t a1 = vgetq_lane_u32(a, 1);
+	uint32_t a2 = vgetq_lane_u32(a, 2);
+	uint32_t a3 = vgetq_lane_u32(a, 3);
+	return a0 + a1 + a2 + a3;
+}
+
+#endif
diff --git a/libkram/astc-encoder/astcenc_vecmathlib_none_4.h b/libkram/astc-encoder/astcenc_vecmathlib_none_4.h
new file mode 100644
index 00000000..716d6982
--- /dev/null
+++ b/libkram/astc-encoder/astcenc_vecmathlib_none_4.h
@@ -0,0 +1,1025 @@
+// SPDX-License-Identifier: Apache-2.0
+// ----------------------------------------------------------------------------
+// Copyright 2019-2021 Arm Limited
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy
+// of the License at:
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations
+// under the License.
+// ----------------------------------------------------------------------------
+
+/**
+ * @brief 4x32-bit vectors, implemented using plain C++.
+ *
+ * This module implements 4-wide 32-bit float, int, and mask vectors. This
+ * module provides a scalar fallback for VLA code, primarily useful for
+ * debugging VLA algorithms without the complexity of handling SIMD. Only the
+ * baseline level of functionality needed to support VLA is provided.
+ *
+ * Note that the vector conditional operators implemented by this module are
+ * designed to behave like SIMD conditional operators that generate lane masks.
+ * Rather than returning 0/1 booleans like normal C++ code they will return
+ * 0/-1 to give a full lane-width bitmask.
+ *
+ * Note that the documentation for this module still talks about "vectors" to
+ * help developers think about the implied VLA behavior when writing optimized
+ * paths.
+ */
+
+#ifndef ASTC_VECMATHLIB_NONE_4_H_INCLUDED
+#define ASTC_VECMATHLIB_NONE_4_H_INCLUDED
+
+#ifndef ASTCENC_SIMD_INLINE
+	#error "Include astcenc_vecmathlib.h, do not include directly"
+#endif
+
+#include <algorithm>
+#include <cstdio>
+#include <cstring>
+#include <cfenv>
+
+// ============================================================================
+// vfloat4 data type
+// ============================================================================
+
+/**
+ * @brief Data type for 4-wide floats.
+ */
+struct vfloat4
+{
+	/**
+	 * @brief Construct from zero-initialized value.
+	 */
+	ASTCENC_SIMD_INLINE vfloat4() {}
+
+	/**
+	 * @brief Construct from 4 values loaded from an unaligned address.
+	 *
+	 * Consider using loada() which is better with wider VLA vectors if data is
+	 * aligned to vector length.
+	 */
+	ASTCENC_SIMD_INLINE explicit vfloat4(const float* p)
+	{
+		m[0] = p[0];
+		m[1] = p[1];
+		m[2] = p[2];
+		m[3] = p[3];
+	}
+
+	/**
+	 * @brief Construct from 4 scalar values replicated across all lanes.
+	 *
+	 * Consider using zero() for constexpr zeros.
+	 */
+	ASTCENC_SIMD_INLINE explicit vfloat4(float a)
+	{
+		m[0] = a;
+		m[1] = a;
+		m[2] = a;
+		m[3] = a;
+	}
+
+	/**
+	 * @brief Construct from 4 scalar values.
+	 *
+	 * The value of @c a is stored to lane 0 (LSB) in the SIMD register.
+	 */
+	ASTCENC_SIMD_INLINE explicit vfloat4(float a, float b, float c, float d)
+	{
+		m[0] = a;
+		m[1] = b;
+		m[2] = c;
+		m[3] = d;
+	}
+
+	/**
+	 * @brief Get the scalar value of a single lane.
+	 */
+	template <int l> ASTCENC_SIMD_INLINE float lane() const
+	{
+		return m[l];
+	}
+
+	/**
+	 * @brief Set the scalar value of a single lane.
+	 */
+	template <int l> ASTCENC_SIMD_INLINE void set_lane(float a)
+	{
+		m[l] = a;
+	}
+
+	/**
+	 * @brief Factory that returns a vector of zeros.
+	 */
+	static ASTCENC_SIMD_INLINE vfloat4 zero()
+	{
+		return vfloat4(0.0f);
+	}
+
+	/**
+	 * @brief Factory that returns a replicated scalar loaded from memory.
+	 */
+	static ASTCENC_SIMD_INLINE vfloat4 load1(const float* p)
+	{
+		return vfloat4(*p);
+	}
+
+	/**
+	 * @brief Factory that returns a vector loaded from aligned memory.
+	 */
+	static ASTCENC_SIMD_INLINE vfloat4 loada(const float* p)
+	{
+		return vfloat4(p);
+	}
+
+	/**
+	 * @brief Factory that returns a vector containing the lane IDs.
+	 */
+	static ASTCENC_SIMD_INLINE vfloat4 lane_id()
+	{
+		return vfloat4(0.0f, 1.0f, 2.0f, 3.0f);
+	}
+
+	/**
+	 * @brief Return a swizzled float 2.
+	 */
+	template <int l0, int l1> ASTCENC_SIMD_INLINE float2 swz() const
+	{
+		return float2(lane<l0>(), lane<l1>());
+	}
+
+	/**
+	 * @brief Return a swizzled float 3.
+	 */
+	template <int l0, int l1, int l2> ASTCENC_SIMD_INLINE vfloat4 swz() const
+	{
+		return vfloat4(lane<l0>(), lane<l1>(), lane<l2>(), 0.0f);
+	}
+
+	/**
+	 * @brief Return a swizzled float 4.
+	 */
+	template <int l0, int l1, int l2, int l3> ASTCENC_SIMD_INLINE vfloat4 swz() const
+	{
+		return vfloat4(lane<l0>(), lane<l1>(), lane<l2>(), lane<l3>());
+	}
+
+	/**
+	 * @brief The vector ...
+	 */
+	float m[4];
+};
+
+// ============================================================================
+// vint4 data type
+// ============================================================================
+
+/**
+ * @brief Data type for 4-wide ints.
+ */
+struct vint4
+{
+	/**
+	 * @brief Construct from zero-initialized value.
+	 */
+	ASTCENC_SIMD_INLINE vint4() {}
+
+	/**
+	 * @brief Construct from 4 values loaded from an unaligned address.
+	 *
+	 * Consider using vint4::loada() which is better with wider VLA vectors
+	 * if data is aligned.
+	 */
+	ASTCENC_SIMD_INLINE explicit vint4(const int* p)
+	{
+		m[0] = p[0];
+		m[1] = p[1];
+		m[2] = p[2];
+		m[3] = p[3];
+	}
+
+	/**
+	 * @brief Construct from 4 uint8_t loaded from an unaligned address.
+	 */
+	ASTCENC_SIMD_INLINE explicit vint4(const uint8_t *p)
+	{
+		m[0] = p[0];
+		m[1] = p[1];
+		m[2] = p[2];
+		m[3] = p[3];
+	}
+
+	/**
+	 * @brief Construct from 4 scalar values.
+	 *
+	 * The value of @c a is stored to lane 0 (LSB) in the SIMD register.
+	 */
+	ASTCENC_SIMD_INLINE explicit vint4(int a, int b, int c, int d)
+	{
+		m[0] = a;
+		m[1] = b;
+		m[2] = c;
+		m[3] = d;
+	}
+
+
+	/**
+	 * @brief Construct from 4 scalar values replicated across all lanes.
+	 *
+	 * Consider using vint4::zero() for constexpr zeros.
+	 */
+	ASTCENC_SIMD_INLINE explicit vint4(int a)
+	{
+		m[0] = a;
+		m[1] = a;
+		m[2] = a;
+		m[3] = a;
+	}
+
+	/**
+	 * @brief Get the scalar value of a single lane.
+	 */
+	template <int l> ASTCENC_SIMD_INLINE int lane() const
+	{
+		return m[l];
+	}
+
+	/**
+	 * @brief Set the scalar value of a single lane.
+	 */
+	template <int l> ASTCENC_SIMD_INLINE void set_lane(int a)
+	{
+		m[l] = a;
+	}
+
+	/**
+	 * @brief Factory that returns a vector of zeros.
+	 */
+	static ASTCENC_SIMD_INLINE vint4 zero()
+	{
+		return vint4(0);
+	}
+
+	/**
+	 * @brief Factory that returns a replicated scalar loaded from memory.
+	 */
+	static ASTCENC_SIMD_INLINE vint4 load1(const int* p)
+	{
+		return vint4(*p);
+	}
+
+	/**
+	 * @brief Factory that returns a vector loaded from 16B aligned memory.
+	 */
+	static ASTCENC_SIMD_INLINE vint4 loada(const int* p)
+	{
+		return vint4(p);
+	}
+
+	/**
+	 * @brief Factory that returns a vector containing the lane IDs.
+	 */
+	static ASTCENC_SIMD_INLINE vint4 lane_id()
+	{
+		return vint4(0, 1, 2, 3);
+	}
+
+	/**
+	 * @brief The vector ...
+	 */
+	int m[4];
+};
+
+// ============================================================================
+// vmask4 data type
+// ============================================================================
+
+/**
+ * @brief Data type for 4-wide control plane masks.
+ */
+struct vmask4
+{
+	/**
+	 * @brief Construct from an existing mask value.
+	 */
+	ASTCENC_SIMD_INLINE explicit vmask4(int* p)
+	{
+		m[0] = p[0];
+		m[1] = p[1];
+		m[2] = p[2];
+		m[3] = p[3];
+	}
+
+	/**
+	 * @brief Construct from 4 scalar values.
+	 *
+	 * The value of @c a is stored to lane 0 (LSB) in the SIMD register.
+	 */
+	ASTCENC_SIMD_INLINE explicit vmask4(bool a, bool b, bool c, bool d)
+	{
+		m[0] = a == false ? 0 : -1;
+		m[1] = b == false ? 0 : -1;
+		m[2] = c == false ? 0 : -1;
+		m[3] = d == false ? 0 : -1;
+	}
+
+	/**
+	 * @brief The vector ...
+	 */
+	int m[4];
+};
+
+// ============================================================================
+// vmask4 operators and functions
+// ============================================================================
+
+/**
+ * @brief Overload: mask union (or).
+ */
+ASTCENC_SIMD_INLINE vmask4 operator|(vmask4 a, vmask4 b)
+{
+	return vmask4(a.m[0] | b.m[0],
+	              a.m[1] | b.m[1],
+	              a.m[2] | b.m[2],
+	              a.m[3] | b.m[3]);
+}
+
+/**
+ * @brief Overload: mask intersect (and).
+ */
+ASTCENC_SIMD_INLINE vmask4 operator&(vmask4 a, vmask4 b)
+{
+	return vmask4(a.m[0] & b.m[0],
+	              a.m[1] & b.m[1],
+	              a.m[2] & b.m[2],
+	              a.m[3] & b.m[3]);
+}
+
+/**
+ * @brief Overload: mask difference (xor).
+ */
+ASTCENC_SIMD_INLINE vmask4 operator^(vmask4 a, vmask4 b)
+{
+	return vmask4(a.m[0] ^ b.m[0],
+	              a.m[1] ^ b.m[1],
+	              a.m[2] ^ b.m[2],
+	              a.m[3] ^ b.m[3]);
+}
+
+/**
+ * @brief Overload: mask invert (not).
+ */
+ASTCENC_SIMD_INLINE vmask4 operator~(vmask4 a)
+{
+	return vmask4(~a.m[0],
+	              ~a.m[1],
+	              ~a.m[2],
+	              ~a.m[3]);
+}
+
+/**
+ * @brief Return a 1-bit mask code indicating mask status.
+ *
+ * bit0 = lane 0
+ */
+ASTCENC_SIMD_INLINE unsigned int mask(vmask4 a)
+{
+	return ((a.m[0] >> 31) & 0x1) |
+	       ((a.m[1] >> 30) & 0x2) |
+	       ((a.m[2] >> 29) & 0x4) |
+	       ((a.m[3] >> 28) & 0x8);
+}
+
+// ============================================================================
+// vint4 operators and functions
+// ============================================================================
+
+/**
+ * @brief Overload: vector by vector addition.
+ */
+ASTCENC_SIMD_INLINE vint4 operator+(vint4 a, vint4 b)
+{
+	return vint4(a.m[0] + b.m[0],
+	             a.m[1] + b.m[1],
+	             a.m[2] + b.m[2],
+	             a.m[3] + b.m[3]);
+}
+
+/**
+ * @brief Overload: vector by vector subtraction.
+ */
+ASTCENC_SIMD_INLINE vint4 operator-(vint4 a, vint4 b)
+{
+	return vint4(a.m[0] - b.m[0],
+	             a.m[1] - b.m[1],
+	             a.m[2] - b.m[2],
+	             a.m[3] - b.m[3]);
+}
+
+/**
+ * @brief Overload: vector by vector multiplication.
+ */
+ASTCENC_SIMD_INLINE vint4 operator*(vint4 a, vint4 b)
+{
+	return vint4(a.m[0] * b.m[0],
+	             a.m[1] * b.m[1],
+	             a.m[2] * b.m[2],
+	             a.m[3] * b.m[3]);
+}
+
+/**
+ * @brief Overload: vector bit invert.
+ */
+ASTCENC_SIMD_INLINE vint4 operator~(vint4 a)
+{
+	return vint4(~a.m[0],
+	             ~a.m[1],
+	             ~a.m[2],
+	             ~a.m[3]);
+}
+
+/**
+ * @brief Overload: vector by vector bitwise or.
+ */
+ASTCENC_SIMD_INLINE vint4 operator|(vint4 a, vint4 b)
+{
+	return vint4(a.m[0] | b.m[0],
+	             a.m[1] | b.m[1],
+	             a.m[2] | b.m[2],
+	             a.m[3] | b.m[3]);
+}
+
+/**
+ * @brief Overload: vector by vector bitwise and.
+ */
+ASTCENC_SIMD_INLINE vint4 operator&(vint4 a, vint4 b)
+{
+	return vint4(a.m[0] & b.m[0],
+	             a.m[1] & b.m[1],
+	             a.m[2] & b.m[2],
+	             a.m[3] & b.m[3]);
+}
+
+/**
+ * @brief Overload: vector by vector bitwise xor.
+ */
+ASTCENC_SIMD_INLINE vint4 operator^(vint4 a, vint4 b)
+{
+	return vint4(a.m[0] ^ b.m[0],
+	             a.m[1] ^ b.m[1],
+	             a.m[2] ^ b.m[2],
+	             a.m[3] ^ b.m[3]);
+}
+
+/**
+ * @brief Overload: vector by vector equality.
+ */
+ASTCENC_SIMD_INLINE vmask4 operator==(vint4 a, vint4 b)
+{
+	return vmask4(a.m[0] == b.m[0],
+	              a.m[1] == b.m[1],
+	              a.m[2] == b.m[2],
+	              a.m[3] == b.m[3]);
+}
+
+/**
+ * @brief Overload: vector by vector inequality.
+ */
+ASTCENC_SIMD_INLINE vmask4 operator!=(vint4 a, vint4 b)
+{
+	return vmask4(a.m[0] != b.m[0],
+	              a.m[1] != b.m[1],
+	              a.m[2] != b.m[2],
+	              a.m[3] != b.m[3]);
+}
+
+/**
+ * @brief Overload: vector by vector less than.
+ */
+ASTCENC_SIMD_INLINE vmask4 operator<(vint4 a, vint4 b)
+{
+	return vmask4(a.m[0] < b.m[0],
+	              a.m[1] < b.m[1],
+	              a.m[2] < b.m[2],
+	              a.m[3] < b.m[3]);
+}
+
+/**
+ * @brief Overload: vector by vector greater than.
+ */
+ASTCENC_SIMD_INLINE vmask4 operator>(vint4 a, vint4 b)
+{
+	return vmask4(a.m[0] > b.m[0],
+	              a.m[1] > b.m[1],
+	              a.m[2] > b.m[2],
+	              a.m[3] > b.m[3]);
+}
+
+/**
+ * @brief Logical shift left.
+ */
+template <int s> ASTCENC_SIMD_INLINE vint4 lsl(vint4 a)
+{
+	return vint4(a.m[0] << s,
+	             a.m[1] << s,
+	             a.m[2] << s,
+	             a.m[3] << s);
+}
+
+/**
+ * @brief Logical shift right.
+ */
+template <int s> ASTCENC_SIMD_INLINE vint4 lsr(vint4 a)
+{
+	return vint4((int)(((unsigned int)a.m[0]) >> s),
+	             (int)(((unsigned int)a.m[1]) >> s),
+	             (int)(((unsigned int)a.m[2]) >> s),
+	             (int)(((unsigned int)a.m[3]) >> s));
+}
+
+/**
+ * @brief Arithmetic shift right.
+ */
+template <int s> ASTCENC_SIMD_INLINE vint4 asr(vint4 a)
+{
+	return vint4(a.m[0] >> s,
+	             a.m[1] >> s,
+	             a.m[2] >> s,
+	             a.m[3] >> s);
+}
+
+/**
+ * @brief Return the min vector of two vectors.
+ */
+ASTCENC_SIMD_INLINE vint4 min(vint4 a, vint4 b)
+{
+	return vint4(a.m[0] < b.m[0] ? a.m[0] : b.m[0],
+	             a.m[1] < b.m[1] ? a.m[1] : b.m[1],
+	             a.m[2] < b.m[2] ? a.m[2] : b.m[2],
+	             a.m[3] < b.m[3] ? a.m[3] : b.m[3]);
+}
+
+/**
+ * @brief Return the min vector of two vectors.
+ */
+ASTCENC_SIMD_INLINE vint4 max(vint4 a, vint4 b)
+{
+	return vint4(a.m[0] > b.m[0] ? a.m[0] : b.m[0],
+	             a.m[1] > b.m[1] ? a.m[1] : b.m[1],
+	             a.m[2] > b.m[2] ? a.m[2] : b.m[2],
+	             a.m[3] > b.m[3] ? a.m[3] : b.m[3]);
+}
+
+/**
+ * @brief Return the horizontal minimum of a single vector.
+ */
+ASTCENC_SIMD_INLINE vint4 hmin(vint4 a)
+{
+	int b = std::min(a.m[0], a.m[1]);
+	int c = std::min(a.m[2], a.m[3]);
+	return vint4(std::min(b, c));
+}
+
+/**
+ * @brief Return the horizontal maximum of a single vector.
+ */
+ASTCENC_SIMD_INLINE vint4 hmax(vint4 a)
+{
+	int b = std::max(a.m[0], a.m[1]);
+	int c = std::max(a.m[2], a.m[3]);
+	return vint4(std::max(b, c));
+}
+
+/**
+ * @brief Return the horizontal sum of vector lanes as a scalar.
+ */
+ASTCENC_SIMD_INLINE int hadd_s(vint4 a)
+{
+	return a.m[0] + a.m[1] + a.m[2] + a.m[3];
+}
+
+/**
+ * @brief Store a vector to an aligned memory address.
+ */
+ASTCENC_SIMD_INLINE void storea(vint4 a, int* p)
+{
+	p[0] = a.m[0];
+	p[1] = a.m[1];
+	p[2] = a.m[2];
+	p[3] = a.m[3];
+}
+
+/**
+ * @brief Store a vector to an unaligned memory address.
+ */
+ASTCENC_SIMD_INLINE void store(vint4 a, int* p)
+{
+	p[0] = a.m[0];
+	p[1] = a.m[1];
+	p[2] = a.m[2];
+	p[3] = a.m[3];
+}
+
+/**
+ * @brief Store lowest N (vector width) bytes into an unaligned address.
+ */
+ASTCENC_SIMD_INLINE void store_nbytes(vint4 a, uint8_t* p)
+{
+	int* pi = (int*)p;
+	*pi = a.m[0];
+}
+
+/**
+ * @brief Gather N (vector width) indices from the array.
+ */
+ASTCENC_SIMD_INLINE vint4 gatheri(const int* base, vint4 indices)
+{
+	return vint4(base[indices.m[0]],
+	             base[indices.m[1]],
+	             base[indices.m[2]],
+	             base[indices.m[3]]);
+}
+
+/**
+ * @brief Pack low 8 bits of N (vector width) lanes into bottom of vector.
+ */
+ASTCENC_SIMD_INLINE vint4 pack_low_bytes(vint4 a)
+{
+	int b0 = a.m[0] & 0xFF;
+	int b1 = a.m[1] & 0xFF;
+	int b2 = a.m[2] & 0xFF;
+	int b3 = a.m[3] & 0xFF;
+
+	int b = b0 | (b1 << 8) | (b2 << 16) | (b3 << 24);
+	return vint4(b, 0, 0, 0);
+}
+
+/**
+ * @brief Return lanes from @c b if MSB of @c cond is set, else @c a.
+ */
+ASTCENC_SIMD_INLINE vint4 select(vint4 a, vint4 b, vmask4 cond)
+{
+	return vint4((cond.m[0] & 0x80000000) ? b.m[0] : a.m[0],
+	             (cond.m[1] & 0x80000000) ? b.m[1] : a.m[1],
+	             (cond.m[2] & 0x80000000) ? b.m[2] : a.m[2],
+	             (cond.m[3] & 0x80000000) ? b.m[3] : a.m[3]);
+}
+
+// ============================================================================
+// vfloat4 operators and functions
+// ============================================================================
+
+/**
+ * @brief Overload: vector by vector addition.
+ */
+ASTCENC_SIMD_INLINE vfloat4 operator+(vfloat4 a, vfloat4 b)
+{
+	return vfloat4(a.m[0] + b.m[0],
+	               a.m[1] + b.m[1],
+	               a.m[2] + b.m[2],
+	               a.m[3] + b.m[3]);
+}
+
+/**
+ * @brief Overload: vector by vector subtraction.
+ */
+ASTCENC_SIMD_INLINE vfloat4 operator-(vfloat4 a, vfloat4 b)
+{
+	return vfloat4(a.m[0] - b.m[0],
+	               a.m[1] - b.m[1],
+	               a.m[2] - b.m[2],
+	               a.m[3] - b.m[3]);
+}
+
+/**
+ * @brief Overload: vector by vector multiplication.
+ */
+ASTCENC_SIMD_INLINE vfloat4 operator*(vfloat4 a, vfloat4 b)
+{
+	return vfloat4(a.m[0] * b.m[0],
+	               a.m[1] * b.m[1],
+	               a.m[2] * b.m[2],
+	               a.m[3] * b.m[3]);
+}
+
+/**
+ * @brief Overload: vector by vector division.
+ */
+ASTCENC_SIMD_INLINE vfloat4 operator/(vfloat4 a, vfloat4 b)
+{
+	return vfloat4(a.m[0] / b.m[0],
+	               a.m[1] / b.m[1],
+	               a.m[2] / b.m[2],
+	               a.m[3] / b.m[3]);
+}
+
+/**
+ * @brief Overload: vector by vector equality.
+ */
+ASTCENC_SIMD_INLINE vmask4 operator==(vfloat4 a, vfloat4 b)
+{
+	return vmask4(a.m[0] == b.m[0],
+	              a.m[1] == b.m[1],
+	              a.m[2] == b.m[2],
+	              a.m[3] == b.m[3]);
+}
+
+/**
+ * @brief Overload: vector by vector inequality.
+ */
+ASTCENC_SIMD_INLINE vmask4 operator!=(vfloat4 a, vfloat4 b)
+{
+	return vmask4(a.m[0] != b.m[0],
+	              a.m[1] != b.m[1],
+	              a.m[2] != b.m[2],
+	              a.m[3] != b.m[3]);
+}
+
+/**
+ * @brief Overload: vector by vector less than.
+ */
+ASTCENC_SIMD_INLINE vmask4 operator<(vfloat4 a, vfloat4 b)
+{
+	return vmask4(a.m[0] < b.m[0],
+	              a.m[1] < b.m[1],
+	              a.m[2] < b.m[2],
+	              a.m[3] < b.m[3]);
+}
+
+/**
+ * @brief Overload: vector by vector greater than.
+ */
+ASTCENC_SIMD_INLINE vmask4 operator>(vfloat4 a, vfloat4 b)
+{
+	return vmask4(a.m[0] > b.m[0],
+	              a.m[1] > b.m[1],
+	              a.m[2] > b.m[2],
+	              a.m[3] > b.m[3]);
+}
+
+/**
+ * @brief Overload: vector by vector less than or equal.
+ */
+ASTCENC_SIMD_INLINE vmask4 operator<=(vfloat4 a, vfloat4 b)
+{
+	return vmask4(a.m[0] <= b.m[0],
+	              a.m[1] <= b.m[1],
+	              a.m[2] <= b.m[2],
+	              a.m[3] <= b.m[3]);
+}
+
+/**
+ * @brief Overload: vector by vector greater than or equal.
+ */
+ASTCENC_SIMD_INLINE vmask4 operator>=(vfloat4 a, vfloat4 b)
+{
+	return vmask4(a.m[0] >= b.m[0],
+	              a.m[1] >= b.m[1],
+	              a.m[2] >= b.m[2],
+	              a.m[3] >= b.m[3]);
+}
+
+/**
+ * @brief Return the min vector of two vectors.
+ *
+ * If either lane value is NaN, @c b will be returned for that lane.
+ */
+ASTCENC_SIMD_INLINE vfloat4 min(vfloat4 a, vfloat4 b)
+{
+	return vfloat4(a.m[0] < b.m[0] ? a.m[0] : b.m[0],
+	               a.m[1] < b.m[1] ? a.m[1] : b.m[1],
+	               a.m[2] < b.m[2] ? a.m[2] : b.m[2],
+	               a.m[3] < b.m[3] ? a.m[3] : b.m[3]);
+}
+
+/**
+ * @brief Return the max vector of two vectors.
+ *
+ * If either lane value is NaN, @c b will be returned for that lane.
+ */
+ASTCENC_SIMD_INLINE vfloat4 max(vfloat4 a, vfloat4 b)
+{
+	return vfloat4(a.m[0] > b.m[0] ? a.m[0] : b.m[0],
+	               a.m[1] > b.m[1] ? a.m[1] : b.m[1],
+	               a.m[2] > b.m[2] ? a.m[2] : b.m[2],
+	               a.m[3] > b.m[3] ? a.m[3] : b.m[3]);
+}
+
+/**
+ * @brief Return the absolute value of the float vector.
+ */
+ASTCENC_SIMD_INLINE vfloat4 abs(vfloat4 a)
+{
+	return vfloat4(std::abs(a.m[0]),
+	               std::abs(a.m[1]),
+	               std::abs(a.m[2]),
+	               std::abs(a.m[3]));
+}
+
+/**
+ * @brief Return a float rounded to the nearest integer value.
+ */
+ASTCENC_SIMD_INLINE vfloat4 round(vfloat4 a)
+{
+	assert(std::fegetround() == FE_TONEAREST);
+	return vfloat4(std::nearbyint(a.m[0]),
+	               std::nearbyint(a.m[1]),
+	               std::nearbyint(a.m[2]),
+	               std::nearbyint(a.m[3]));
+}
+
+/**
+ * @brief Return the horizontal minimum of a vector.
+ */
+ASTCENC_SIMD_INLINE vfloat4 hmin(vfloat4 a)
+{
+	float tmp1 = std::min(a.m[0], a.m[1]);
+	float tmp2 = std::min(a.m[2], a.m[3]);
+	return vfloat4(std::min(tmp1, tmp2));
+}
+
+/**
+ * @brief Return the horizontal maximum of a vector.
+ */
+ASTCENC_SIMD_INLINE vfloat4 hmax(vfloat4 a)
+{
+	float tmp1 = std::max(a.m[0], a.m[1]);
+	float tmp2 = std::max(a.m[2], a.m[3]);
+	return vfloat4(std::max(tmp1, tmp2));
+}
+
+/**
+ * @brief Return the horizontal sum of a vector.
+ */
+ASTCENC_SIMD_INLINE float hadd_s(vfloat4 a)
+{
+	// Use halving add, gives invariance with SIMD versions
+	return (a.m[0] + a.m[2]) + (a.m[1] + a.m[3]);
+}
+
+/**
+ * @brief Return the sqrt of the lanes in the vector.
+ */
+ASTCENC_SIMD_INLINE vfloat4 sqrt(vfloat4 a)
+{
+	return vfloat4(std::sqrt(a.m[0]),
+	               std::sqrt(a.m[1]),
+	               std::sqrt(a.m[2]),
+	               std::sqrt(a.m[3]));
+}
+
+/**
+ * @brief Return lanes from @c b if MSB of @c cond is set, else @c a.
+ */
+ASTCENC_SIMD_INLINE vfloat4 select(vfloat4 a, vfloat4 b, vmask4 cond)
+{
+	return vfloat4((cond.m[0] & 0x80000000) ? b.m[0] : a.m[0],
+	               (cond.m[1] & 0x80000000) ? b.m[1] : a.m[1],
+	               (cond.m[2] & 0x80000000) ? b.m[2] : a.m[2],
+	               (cond.m[3] & 0x80000000) ? b.m[3] : a.m[3]);
+}
+
+/**
+ * @brief Load a vector of gathered results from an array;
+ */
+ASTCENC_SIMD_INLINE vfloat4 gatherf(const float* base, vint4 indices)
+{
+	return vfloat4(base[indices.m[0]],
+	               base[indices.m[1]],
+	               base[indices.m[2]],
+	               base[indices.m[3]]);
+}
+
+/**
+ * @brief Store a vector to an unaligned memory address.
+ */
+ASTCENC_SIMD_INLINE void store(vfloat4 a, float* ptr)
+{
+	ptr[0] = a.m[0];
+	ptr[1] = a.m[1];
+	ptr[2] = a.m[2];
+	ptr[3] = a.m[3];
+}
+
+/**
+ * @brief Store a vector to an aligned memory address.
+ */
+ASTCENC_SIMD_INLINE void storea(vfloat4 a, float* ptr)
+{
+	ptr[0] = a.m[0];
+	ptr[1] = a.m[1];
+	ptr[2] = a.m[2];
+	ptr[3] = a.m[3];
+}
+
+/**
+ * @brief Return a integer value for a float vector, using truncation.
+ */
+ASTCENC_SIMD_INLINE vint4 float_to_int(vfloat4 a)
+{
+	// Casting to unsigned buys us an extra bit of precision in cases where
+	// we can use the integer as nasty bit hacks.
+	return vint4((unsigned int)a.m[0],
+	             (unsigned int)a.m[1],
+	             (unsigned int)a.m[2],
+	             (unsigned int)a.m[3]);
+}
+
+/**f
+ * @brief Return a integer value for a float vector, using round-to-nearest.
+ */
+ASTCENC_SIMD_INLINE vint4 float_to_int_rtn(vfloat4 a)
+{
+	return vint4((int)(a.m[0] + 0.5f),
+	             (int)(a.m[1] + 0.5f),
+	             (int)(a.m[2] + 0.5f),
+	             (int)(a.m[3] + 0.5f));
+}
+
+/**
+ * @brief Return a float value for a integer vector.
+ */
+ASTCENC_SIMD_INLINE vfloat4 int_to_float(vint4 a)
+{
+	return vfloat4((float)a.m[0],
+	               (float)a.m[1],
+	               (float)a.m[2],
+	               (float)a.m[3]);
+}
+
+/**
+ * @brief Return a float16 value for a float vector, using round-to-nearest.
+ */
+ASTCENC_SIMD_INLINE vint4 float_to_float16(vfloat4 a)
+{
+	return vint4(
+		float_to_sf16(a.lane<0>()),
+		float_to_sf16(a.lane<1>()),
+		float_to_sf16(a.lane<2>()),
+		float_to_sf16(a.lane<3>()));
+}
+
+/**
+ * @brief Return a float16 value for a float scalar, using round-to-nearest.
+ */
+static inline uint16_t float_to_float16(float a)
+{
+	return float_to_sf16(a);
+}
+
+/**
+ * @brief Return a float value for a float16 vector.
+ */
+ASTCENC_SIMD_INLINE vfloat4 float16_to_float(vint4 a)
+{
+	return vfloat4(
+		sf16_to_float(a.lane<0>()),
+		sf16_to_float(a.lane<1>()),
+		sf16_to_float(a.lane<2>()),
+		sf16_to_float(a.lane<3>()));
+}
+
+/**
+ * @brief Return a float value for a float16 scalar.
+ */
+ASTCENC_SIMD_INLINE float float16_to_float(uint16_t a)
+{
+	return sf16_to_float(a);
+}
+
+/**
+ * @brief Return a float value as an integer bit pattern (i.e. no conversion).
+ *
+ * It is a common trick to convert floats into integer bit patterns, perform
+ * some bit hackery based on knowledge they are IEEE 754 layout, and then
+ * convert them back again. This is the first half of that flip.
+ */
+ASTCENC_SIMD_INLINE vint4 float_as_int(vfloat4 a)
+{
+	vint4 r;
+	memcpy(r.m, a.m, 4 * 4);
+	return r;
+}
+
+/**
+ * @brief Return a integer value as a float bit pattern (i.e. no conversion).
+ *
+ * It is a common trick to convert floats into integer bit patterns, perform
+ * some bit hackery based on knowledge they are IEEE 754 layout, and then
+ * convert them back again. This is the second half of that flip.
+ */
+ASTCENC_SIMD_INLINE vfloat4 int_as_float(vint4 a)
+{
+	vfloat4 r;
+	memcpy(r.m, a.m, 4 * 4);
+	return r;
+}
+
+#endif // #ifndef ASTC_VECMATHLIB_NONE_4_H_INCLUDED
diff --git a/libkram/astc-encoder/astcenc_vecmathlib_sse_4.h b/libkram/astc-encoder/astcenc_vecmathlib_sse_4.h
new file mode 100755
index 00000000..4bb8ea96
--- /dev/null
+++ b/libkram/astc-encoder/astcenc_vecmathlib_sse_4.h
@@ -0,0 +1,1008 @@
+// SPDX-License-Identifier: Apache-2.0
+// ----------------------------------------------------------------------------
+// Copyright 2019-2021 Arm Limited
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy
+// of the License at:
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations
+// under the License.
+// ----------------------------------------------------------------------------
+
+/**
+ * @brief 4x32-bit vectors, implemented using SSE.
+ *
+ * This module implements 4-wide 32-bit float, int, and mask vectors for x86
+ * SSE. The implementation requires at least SSE2, but higher levels of SSE can
+ * be selected at compile time to improve performance.
+ *
+ * There is a baseline level of functionality provided by all vector widths and
+ * implementations. This is implemented using identical function signatures,
+ * modulo data type, so we can use them as substitutable implementations in VLA
+ * code.
+ *
+ * The 4-wide vectors are also used as a fixed-width type, and significantly
+ * extend the functionality above that available to VLA code.
+ */
+
+#ifndef ASTC_VECMATHLIB_SSE_4_H_INCLUDED
+#define ASTC_VECMATHLIB_SSE_4_H_INCLUDED
+
+#ifndef ASTCENC_SIMD_INLINE
+	#error "Include astcenc_vecmathlib.h, do not include directly"
+#endif
+
+#include <cstdio>
+
+// ============================================================================
+// vfloat4 data type
+// ============================================================================
+
+/**
+ * @brief Data type for 4-wide floats.
+ */
+struct vfloat4
+{
+	/**
+	 * @brief Construct from zero-initialized value.
+	 */
+	ASTCENC_SIMD_INLINE vfloat4() {}
+
+	/**
+	 * @brief Construct from 4 values loaded from an unaligned address.
+	 *
+	 * Consider using loada() which is better with vectors if data is aligned
+	 * to vector length.
+	 */
+	ASTCENC_SIMD_INLINE explicit vfloat4(const float *p)
+	{
+		m = _mm_loadu_ps(p);
+	}
+
+	/**
+	 * @brief Construct from 1 scalar value replicated across all lanes.
+	 *
+	 * Consider using zero() for constexpr zeros.
+	 */
+	ASTCENC_SIMD_INLINE explicit vfloat4(float a)
+	{
+		m = _mm_set1_ps(a);
+	}
+
+	/**
+	 * @brief Construct from 4 scalar values.
+	 *
+	 * The value of @c a is stored to lane 0 (LSB) in the SIMD register.
+	 */
+	ASTCENC_SIMD_INLINE explicit vfloat4(float a, float b, float c, float d)
+	{
+		m = _mm_set_ps(d, c, b, a);
+	}
+
+	/**
+	 * @brief Construct from an existing SIMD register.
+	 */
+	ASTCENC_SIMD_INLINE explicit vfloat4(__m128 a)
+	{
+		m = a;
+	}
+
+	/**
+	 * @brief Get the scalar value of a single lane.
+	 */
+	template <int l> ASTCENC_SIMD_INLINE float lane() const
+	{
+		return _mm_cvtss_f32(_mm_shuffle_ps(m, m, l));
+	}
+
+	/**
+	 * @brief Set the scalar value of a single lane.
+	 */
+	template <int l> ASTCENC_SIMD_INLINE void set_lane(float a)
+	{
+#if ASTCENC_SSE >= 41
+		__m128 v = _mm_set1_ps(a);
+		m = _mm_insert_ps(m, v, l << 6 | l << 4);
+#else
+		alignas(16) float idx[4];
+		_mm_store_ps(idx, m);
+		idx[l] = a;
+		m = _mm_load_ps(idx);
+#endif
+	}
+
+	/**
+	 * @brief Factory that returns a vector of zeros.
+	 */
+	static ASTCENC_SIMD_INLINE vfloat4 zero()
+	{
+		return vfloat4(_mm_setzero_ps());
+	}
+
+	/**
+	 * @brief Factory that returns a replicated scalar loaded from memory.
+	 */
+	static ASTCENC_SIMD_INLINE vfloat4 load1(const float* p)
+	{
+		return vfloat4(_mm_load_ps1(p));
+	}
+
+	/**
+	 * @brief Factory that returns a vector loaded from 16B aligned memory.
+	 */
+	static ASTCENC_SIMD_INLINE vfloat4 loada(const float* p)
+	{
+		return vfloat4(_mm_load_ps(p));
+	}
+
+	/**
+	 * @brief Factory that returns a vector containing the lane IDs.
+	 */
+	static ASTCENC_SIMD_INLINE vfloat4 lane_id()
+	{
+		return vfloat4(_mm_set_ps(3, 2, 1, 0));
+	}
+
+	/**
+	 * @brief Return a swizzled float 2.
+	 */
+	template <int l0, int l1> ASTCENC_SIMD_INLINE float2 swz() const
+	{
+		return float2(lane<l0>(), lane<l1>());
+	}
+
+	/**
+	 * @brief Return a swizzled float 3.
+	 */
+	template <int l0, int l1, int l2> ASTCENC_SIMD_INLINE vfloat4 swz() const
+	{
+		vfloat4 result(_mm_shuffle_ps(m, m, l0 | l1 << 2 | l2 << 4));
+		result.set_lane<3>(0.0f);
+		return result;
+	}
+
+	/**
+	 * @brief Return a swizzled float 4.
+	 */
+	template <int l0, int l1, int l2, int l3> ASTCENC_SIMD_INLINE vfloat4 swz() const
+	{
+		return vfloat4(_mm_shuffle_ps(m, m, l0 | l1 << 2 | l2 << 4 | l3 << 6));
+	}
+
+	/**
+	 * @brief The vector ...
+	 */
+	__m128 m;
+};
+
+// ============================================================================
+// vint4 data type
+// ============================================================================
+
+/**
+ * @brief Data type for 4-wide ints.
+ */
+struct vint4
+{
+	/**
+	 * @brief Construct from zero-initialized value.
+	 */
+	ASTCENC_SIMD_INLINE vint4() {}
+
+	/**
+	 * @brief Construct from 4 values loaded from an unaligned address.
+	 *
+	 * Consider using loada() which is better with vectors if data is aligned
+	 * to vector length.
+	 */
+	ASTCENC_SIMD_INLINE explicit vint4(const int *p)
+	{
+		m = _mm_loadu_si128((const __m128i*)p);
+	}
+
+	/**
+	 * @brief Construct from 4 uint8_t loaded from an unaligned address.
+	 */
+	ASTCENC_SIMD_INLINE explicit vint4(const uint8_t *p)
+	{
+		// _mm_loadu_si32 would be nicer syntax, but missing on older GCC
+		__m128i t = _mm_cvtsi32_si128(*(const int*)p);
+
+#if ASTCENC_SSE >= 41
+		m = _mm_cvtepu8_epi32(t);
+#else
+		t = _mm_unpacklo_epi8(t, _mm_setzero_si128());
+		m = _mm_unpacklo_epi16(t, _mm_setzero_si128());
+#endif
+	}
+
+	/**
+	 * @brief Construct from 1 scalar value replicated across all lanes.
+	 *
+	 * Consider using vfloat4::zero() for constexpr zeros.
+	 */
+	ASTCENC_SIMD_INLINE explicit vint4(int a)
+	{
+		m = _mm_set1_epi32(a);
+	}
+
+	/**
+	 * @brief Construct from 4 scalar values.
+	 *
+	 * The value of @c a is stored to lane 0 (LSB) in the SIMD register.
+	 */
+	ASTCENC_SIMD_INLINE explicit vint4(int a, int b, int c, int d)
+	{
+		m = _mm_set_epi32(d, c, b, a);
+	}
+
+	/**
+	 * @brief Construct from an existing SIMD register.
+	 */
+	ASTCENC_SIMD_INLINE explicit vint4(__m128i a)
+	{
+		m = a;
+	}
+
+	/**
+	 * @brief Get the scalar from a single lane.
+	 */
+	template <int l> ASTCENC_SIMD_INLINE int lane() const
+	{
+		return _mm_cvtsi128_si32(_mm_shuffle_epi32(m, l));
+	}
+
+	/**
+	 * @brief Set the scalar value of a single lane.
+	 */
+	template <int l> ASTCENC_SIMD_INLINE void set_lane(int a)
+	{
+#if ASTCENC_SSE >= 41
+		m = _mm_insert_epi32(m, a, l);
+#else
+		alignas(16) int idx[4];
+		_mm_store_si128((__m128i*)idx, m);
+		idx[l] = a;
+		m = _mm_load_si128((const __m128i*)idx);
+#endif
+	}
+
+	/**
+	 * @brief Factory that returns a vector of zeros.
+	 */
+	static ASTCENC_SIMD_INLINE vint4 zero()
+	{
+		return vint4(_mm_setzero_si128());
+	}
+
+	/**
+	 * @brief Factory that returns a replicated scalar loaded from memory.
+	 */
+	static ASTCENC_SIMD_INLINE vint4 load1(const int* p)
+	{
+		return vint4(*p);
+	}
+
+	/**
+	 * @brief Factory that returns a vector loaded from 16B aligned memory.
+	 */
+	static ASTCENC_SIMD_INLINE vint4 loada(const int* p)
+	{
+		return vint4(_mm_load_si128((const __m128i*)p));
+	}
+
+	/**
+	 * @brief Factory that returns a vector containing the lane IDs.
+	 */
+	static ASTCENC_SIMD_INLINE vint4 lane_id()
+	{
+		return vint4(_mm_set_epi32(3, 2, 1, 0));
+	}
+
+	/**
+	 * @brief The vector ...
+	 */
+	__m128i m;
+};
+
+// ============================================================================
+// vmask4 data type
+// ============================================================================
+
+/**
+ * @brief Data type for 4-wide control plane masks.
+ */
+struct vmask4
+{
+	/**
+	 * @brief Construct from an existing SIMD register.
+	 */
+	ASTCENC_SIMD_INLINE explicit vmask4(__m128 a)
+	{
+		m = a;
+	}
+
+	/**
+	 * @brief Construct from an existing SIMD register.
+	 */
+	ASTCENC_SIMD_INLINE explicit vmask4(__m128i a)
+	{
+		m = _mm_castsi128_ps(a);
+	}
+
+	/**
+	 * @brief Construct from 4 scalar values.
+	 *
+	 * The value of @c a is stored to lane 0 (LSB) in the SIMD register.
+	 */
+	ASTCENC_SIMD_INLINE explicit vmask4(bool a, bool b, bool c, bool d)
+	{
+		vint4 mask(a == false ? 0 : -1,
+		           b == false ? 0 : -1,
+		           c == false ? 0 : -1,
+		           d == false ? 0 : -1);
+
+		m = _mm_castsi128_ps(mask.m);
+	}
+
+	/**
+	 * @brief The vector ...
+	 */
+	__m128 m;
+};
+
+// ============================================================================
+// vmask4 operators and functions
+// ============================================================================
+
+/**
+ * @brief Overload: mask union (or).
+ */
+ASTCENC_SIMD_INLINE vmask4 operator|(vmask4 a, vmask4 b)
+{
+	return vmask4(_mm_or_ps(a.m, b.m));
+}
+
+/**
+ * @brief Overload: mask intersect (and).
+ */
+ASTCENC_SIMD_INLINE vmask4 operator&(vmask4 a, vmask4 b)
+{
+	return vmask4(_mm_and_ps(a.m, b.m));
+}
+
+/**
+ * @brief Overload: mask difference (xor).
+ */
+ASTCENC_SIMD_INLINE vmask4 operator^(vmask4 a, vmask4 b)
+{
+	return vmask4(_mm_xor_ps(a.m, b.m));
+}
+
+/**
+ * @brief Overload: mask invert (not).
+ */
+ASTCENC_SIMD_INLINE vmask4 operator~(vmask4 a)
+{
+	return vmask4(_mm_xor_si128(_mm_castps_si128(a.m), _mm_set1_epi32(-1)));
+}
+
+/**
+ * @brief Return a 4-bit mask code indicating mask status.
+ *
+ * bit0 = lane 0
+ */
+ASTCENC_SIMD_INLINE unsigned int mask(vmask4 a)
+{
+	return _mm_movemask_ps(a.m);
+}
+
+// ============================================================================
+// vint4 operators and functions
+// ============================================================================
+
+/**
+ * @brief Overload: vector by vector addition.
+ */
+ASTCENC_SIMD_INLINE vint4 operator+(vint4 a, vint4 b)
+{
+	return vint4(_mm_add_epi32(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector by vector subtraction.
+ */
+ASTCENC_SIMD_INLINE vint4 operator-(vint4 a, vint4 b)
+{
+	return vint4(_mm_sub_epi32(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector by vector multiplication.
+ */
+ASTCENC_SIMD_INLINE vint4 operator*(vint4 a, vint4 b)
+{
+#if ASTCENC_SSE >= 41
+	return vint4(_mm_mullo_epi32 (a.m, b.m));
+#else
+	__m128i t1 = _mm_mul_epu32(a.m, b.m);
+	__m128i t2 = _mm_mul_epu32(
+	                 _mm_srli_si128(a.m, 4),
+	                 _mm_srli_si128(b.m, 4));
+	__m128i r =  _mm_unpacklo_epi32(
+	                 _mm_shuffle_epi32(t1, _MM_SHUFFLE (0, 0, 2, 0)),
+	                 _mm_shuffle_epi32(t2, _MM_SHUFFLE (0, 0, 2, 0)));
+	return vint4(r);
+#endif
+}
+
+/**
+ * @brief Overload: vector bit invert.
+ */
+ASTCENC_SIMD_INLINE vint4 operator~(vint4 a)
+{
+	return vint4(_mm_xor_si128(a.m, _mm_set1_epi32(-1)));
+}
+
+/**
+ * @brief Overload: vector by vector bitwise or.
+ */
+ASTCENC_SIMD_INLINE vint4 operator|(vint4 a, vint4 b)
+{
+	return vint4(_mm_or_si128(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector by vector bitwise and.
+ */
+ASTCENC_SIMD_INLINE vint4 operator&(vint4 a, vint4 b)
+{
+	return vint4(_mm_and_si128(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector by vector bitwise xor.
+ */
+ASTCENC_SIMD_INLINE vint4 operator^(vint4 a, vint4 b)
+{
+	return vint4(_mm_xor_si128(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector by vector equality.
+ */
+ASTCENC_SIMD_INLINE vmask4 operator==(vint4 a, vint4 b)
+{
+	return vmask4(_mm_cmpeq_epi32(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector by vector inequality.
+ */
+ASTCENC_SIMD_INLINE vmask4 operator!=(vint4 a, vint4 b)
+{
+	return ~vmask4(_mm_cmpeq_epi32(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector by vector less than.
+ */
+ASTCENC_SIMD_INLINE vmask4 operator<(vint4 a, vint4 b)
+{
+	return vmask4(_mm_cmplt_epi32(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector by vector greater than.
+ */
+ASTCENC_SIMD_INLINE vmask4 operator>(vint4 a, vint4 b)
+{
+	return vmask4(_mm_cmpgt_epi32(a.m, b.m));
+}
+
+/**
+ * @brief Logical shift left.
+ */
+template <int s> ASTCENC_SIMD_INLINE vint4 lsl(vint4 a)
+{
+	return vint4(_mm_slli_epi32(a.m, s));
+}
+
+/**
+ * @brief Logical shift right.
+ */
+template <int s> ASTCENC_SIMD_INLINE vint4 lsr(vint4 a)
+{
+	return vint4(_mm_srli_epi32(a.m, s));
+}
+
+/**
+ * @brief Arithmetic shift right.
+ */
+template <int s> ASTCENC_SIMD_INLINE vint4 asr(vint4 a)
+{
+	return vint4(_mm_srai_epi32(a.m, s));
+}
+
+/**
+ * @brief Return the min vector of two vectors.
+ */
+ASTCENC_SIMD_INLINE vint4 min(vint4 a, vint4 b)
+{
+#if ASTCENC_SSE >= 41
+	return vint4(_mm_min_epi32(a.m, b.m));
+#else
+	vmask4 d = a < b;
+	__m128i ap = _mm_and_si128(_mm_castps_si128(d.m), a.m);
+	__m128i bp = _mm_andnot_si128(_mm_castps_si128(d.m), b.m);
+	return vint4(_mm_or_si128(ap,bp));
+#endif
+}
+
+/**
+ * @brief Return the max vector of two vectors.
+ */
+ASTCENC_SIMD_INLINE vint4 max(vint4 a, vint4 b)
+{
+#if ASTCENC_SSE >= 41
+	return vint4(_mm_max_epi32(a.m, b.m));
+#else
+	vmask4 d = a > b;
+	__m128i ap = _mm_and_si128(_mm_castps_si128(d.m), a.m);
+	__m128i bp = _mm_andnot_si128(_mm_castps_si128(d.m), b.m);
+	return vint4(_mm_or_si128(ap,bp));
+#endif
+}
+
+/**
+ * @brief Return the horizontal minimum of a vector.
+ */
+ASTCENC_SIMD_INLINE vint4 hmin(vint4 a)
+{
+	a = min(a, vint4(_mm_shuffle_epi32(a.m, _MM_SHUFFLE(0, 0, 3, 2))));
+	a = min(a, vint4(_mm_shuffle_epi32(a.m, _MM_SHUFFLE(0, 0, 0, 1))));
+	return vint4(_mm_shuffle_epi32(a.m, _MM_SHUFFLE(0, 0, 0, 0)));
+}
+
+/*
+ * @brief Return the horizontal maximum of a vector.
+ */
+ASTCENC_SIMD_INLINE vint4 hmax(vint4 a)
+{
+	a = max(a, vint4(_mm_shuffle_epi32(a.m, _MM_SHUFFLE(0, 0, 3, 2))));
+	a = max(a, vint4(_mm_shuffle_epi32(a.m, _MM_SHUFFLE(0, 0, 0, 1))));
+	return vint4(_mm_shuffle_epi32(a.m, _MM_SHUFFLE(0, 0, 0, 0)));
+}
+
+/**
+ * @brief Return the horizontal sum of a vector as a scalar.
+ */
+ASTCENC_SIMD_INLINE int hadd_s(vint4 a)
+{
+	// Add top and bottom halves, lane 1/0
+	__m128i fold = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(a.m),
+	                                              _mm_castsi128_ps(a.m)));
+	__m128i t = _mm_add_epi32(a.m, fold);
+
+	// Add top and bottom halves, lane 0 (_mm_hadd_ps exists but slow)
+	t = _mm_add_epi32(t, _mm_shuffle_epi32(t, 0x55));
+
+	return _mm_cvtsi128_si32(t);
+}
+
+/**
+ * @brief Store a vector to a 16B aligned memory address.
+ */
+ASTCENC_SIMD_INLINE void storea(vint4 a, int* p)
+{
+	_mm_store_si128((__m128i*)p, a.m);
+}
+
+/**
+ * @brief Store a vector to an unaligned memory address.
+ */
+ASTCENC_SIMD_INLINE void store(vint4 a, int* p)
+{
+	// Cast due to missing intrinsics
+	_mm_storeu_ps((float*)p, _mm_castsi128_ps(a.m));
+}
+
+/**
+ * @brief Store lowest N (vector width) bytes into an unaligned address.
+ */
+ASTCENC_SIMD_INLINE void store_nbytes(vint4 a, uint8_t* p)
+{
+	// Cast due to missing intrinsics
+	_mm_store_ss((float*)p, _mm_castsi128_ps(a.m));
+}
+
+/**
+ * @brief Gather N (vector width) indices from the array.
+ */
+ASTCENC_SIMD_INLINE vint4 gatheri(const int* base, vint4 indices)
+{
+#if ASTCENC_AVX >= 2
+	return vint4(_mm_i32gather_epi32(base, indices.m, 4));
+#else
+	alignas(16) int idx[4];
+	storea(indices, idx);
+	return vint4(base[idx[0]], base[idx[1]], base[idx[2]], base[idx[3]]);
+#endif
+}
+
+/**
+ * @brief Pack low 8 bits of N (vector width) lanes into bottom of vector.
+ */
+ASTCENC_SIMD_INLINE vint4 pack_low_bytes(vint4 a)
+{
+#if ASTCENC_SSE >= 41
+	__m128i shuf = _mm_set_epi8(0,0,0,0, 0,0,0,0, 0,0,0,0, 12,8,4,0);
+	return vint4(_mm_shuffle_epi8(a.m, shuf));
+#else
+	__m128i va = _mm_unpacklo_epi8(a.m, _mm_shuffle_epi32(a.m, _MM_SHUFFLE(1,1,1,1)));
+	__m128i vb = _mm_unpackhi_epi8(a.m, _mm_shuffle_epi32(a.m, _MM_SHUFFLE(3,3,3,3)));
+	return vint4(_mm_unpacklo_epi16(va, vb));
+#endif
+}
+
+/**
+ * @brief Return lanes from @c b if MSB of @c cond is set, else @c a.
+ */
+ASTCENC_SIMD_INLINE vint4 select(vint4 a, vint4 b, vmask4 cond)
+{
+#if ASTCENC_SSE >= 41
+	// Don't use _mm_blendv_epi8 directly, as it doesn't give the select on
+	// float sign-bit in the mask behavior which is useful. Performance is the
+	// same, these casts are free.
+	__m128 av = _mm_castsi128_ps(a.m);
+	__m128 bv = _mm_castsi128_ps(b.m);
+	return vint4(_mm_castps_si128(_mm_blendv_ps(av, bv, cond.m)));
+#else
+	__m128i d = _mm_srai_epi32(_mm_castps_si128(cond.m), 31);
+	return vint4(_mm_or_si128(_mm_and_si128(d, b.m), _mm_andnot_si128(d, a.m)));
+#endif
+}
+
+// ============================================================================
+// vfloat4 operators and functions
+// ============================================================================
+
+/**
+ * @brief Overload: vector by vector addition.
+ */
+ASTCENC_SIMD_INLINE vfloat4 operator+(vfloat4 a, vfloat4 b)
+{
+	return vfloat4(_mm_add_ps(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector by vector subtraction.
+ */
+ASTCENC_SIMD_INLINE vfloat4 operator-(vfloat4 a, vfloat4 b)
+{
+	return vfloat4(_mm_sub_ps(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector by vector multiplication.
+ */
+ASTCENC_SIMD_INLINE vfloat4 operator*(vfloat4 a, vfloat4 b)
+{
+	return vfloat4(_mm_mul_ps(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector by vector division.
+ */
+ASTCENC_SIMD_INLINE vfloat4 operator/(vfloat4 a, vfloat4 b)
+{
+	return vfloat4(_mm_div_ps(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector by vector equality.
+ */
+ASTCENC_SIMD_INLINE vmask4 operator==(vfloat4 a, vfloat4 b)
+{
+	return vmask4(_mm_cmpeq_ps(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector by vector inequality.
+ */
+ASTCENC_SIMD_INLINE vmask4 operator!=(vfloat4 a, vfloat4 b)
+{
+	return vmask4(_mm_cmpneq_ps(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector by vector less than.
+ */
+ASTCENC_SIMD_INLINE vmask4 operator<(vfloat4 a, vfloat4 b)
+{
+	return vmask4(_mm_cmplt_ps(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector by vector greater than.
+ */
+ASTCENC_SIMD_INLINE vmask4 operator>(vfloat4 a, vfloat4 b)
+{
+	return vmask4(_mm_cmpgt_ps(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector by vector less than or equal.
+ */
+ASTCENC_SIMD_INLINE vmask4 operator<=(vfloat4 a, vfloat4 b)
+{
+	return vmask4(_mm_cmple_ps(a.m, b.m));
+}
+
+/**
+ * @brief Overload: vector by vector greater than or equal.
+ */
+ASTCENC_SIMD_INLINE vmask4 operator>=(vfloat4 a, vfloat4 b)
+{
+	return vmask4(_mm_cmpge_ps(a.m, b.m));
+}
+
+/**
+ * @brief Return the min vector of two vectors.
+ *
+ * If either lane value is NaN, @c b will be returned for that lane.
+ */
+ASTCENC_SIMD_INLINE vfloat4 min(vfloat4 a, vfloat4 b)
+{
+	// Do not reorder - second operand will return if either is NaN
+	return vfloat4(_mm_min_ps(a.m, b.m));
+}
+
+/**
+ * @brief Return the max vector of two vectors.
+ *
+ * If either lane value is NaN, @c b will be returned for that lane.
+ */
+ASTCENC_SIMD_INLINE vfloat4 max(vfloat4 a, vfloat4 b)
+{
+	// Do not reorder - second operand will return if either is NaN
+	return vfloat4(_mm_max_ps(a.m, b.m));
+}
+
+/**
+ * @brief Return the absolute value of the float vector.
+ */
+ASTCENC_SIMD_INLINE vfloat4 abs(vfloat4 a)
+{
+	return vfloat4(_mm_max_ps(_mm_sub_ps(_mm_setzero_ps(), a.m), a.m));
+}
+
+/**
+ * @brief Return a float rounded to the nearest integer value.
+ */
+ASTCENC_SIMD_INLINE vfloat4 round(vfloat4 a)
+{
+#if ASTCENC_SSE >= 41
+	constexpr int flags = _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC;
+	return vfloat4(_mm_round_ps(a.m, flags));
+#else
+	__m128 v = a.m;
+	__m128 neg_zero = _mm_castsi128_ps(_mm_set1_epi32(0x80000000));
+	__m128 no_fraction = _mm_set1_ps(8388608.0f);
+	__m128 abs_mask = _mm_castsi128_ps(_mm_set1_epi32(0x7FFFFFFF));
+	__m128 sign = _mm_and_ps(v, neg_zero);
+	__m128 s_magic = _mm_or_ps(no_fraction, sign);
+	__m128 r1 = _mm_add_ps(v, s_magic);
+	r1 = _mm_sub_ps(r1, s_magic);
+	__m128 r2 = _mm_and_ps(v, abs_mask);
+	__m128 mask = _mm_cmple_ps(r2, no_fraction);
+	r2 = _mm_andnot_ps(mask, v);
+	r1 = _mm_and_ps(r1, mask);
+	return vfloat4(_mm_xor_ps(r1, r2));
+#endif
+}
+
+/**
+ * @brief Return the horizontal minimum of a vector.
+ */
+ASTCENC_SIMD_INLINE vfloat4 hmin(vfloat4 a)
+{
+	a = min(a, vfloat4(_mm_shuffle_ps(a.m, a.m, _MM_SHUFFLE(0, 0, 3, 2))));
+	a = min(a, vfloat4(_mm_shuffle_ps(a.m, a.m, _MM_SHUFFLE(0, 0, 0, 1))));
+	return vfloat4(_mm_shuffle_ps(a.m, a.m, _MM_SHUFFLE(0, 0, 0, 0)));
+}
+
+/**
+ * @brief Return the horizontal maximum of a vector.
+ */
+ASTCENC_SIMD_INLINE vfloat4 hmax(vfloat4 a)
+{
+	a = max(a, vfloat4(_mm_shuffle_ps(a.m, a.m, _MM_SHUFFLE(0, 0, 3, 2))));
+	a = max(a, vfloat4(_mm_shuffle_ps(a.m, a.m, _MM_SHUFFLE(0, 0, 0, 1))));
+	return vfloat4(_mm_shuffle_ps(a.m, a.m, _MM_SHUFFLE(0, 0, 0, 0)));
+}
+
+/**
+ * @brief Return the horizontal sum of a vector as a scalar.
+ */
+ASTCENC_SIMD_INLINE float hadd_s(vfloat4 a)
+{
+	// Add top and bottom halves, lane 1/0
+	__m128 t = _mm_add_ps(a.m, _mm_movehl_ps(a.m, a.m));
+
+	// Add top and bottom halves, lane 0 (_mm_hadd_ps exists but slow)
+	t = _mm_add_ss(t, _mm_shuffle_ps(t, t, 0x55));
+
+	return _mm_cvtss_f32(t);
+}
+
+/**
+ * @brief Return the sqrt of the lanes in the vector.
+ */
+ASTCENC_SIMD_INLINE vfloat4 sqrt(vfloat4 a)
+{
+	return vfloat4(_mm_sqrt_ps(a.m));
+}
+
+/**
+ * @brief Return lanes from @c b if MSB of @c cond is set, else @c a.
+ */
+ASTCENC_SIMD_INLINE vfloat4 select(vfloat4 a, vfloat4 b, vmask4 cond)
+{
+#if ASTCENC_SSE >= 41
+	return vfloat4(_mm_blendv_ps(a.m, b.m, cond.m));
+#else
+	__m128 d = _mm_castsi128_ps(_mm_srai_epi32(_mm_castps_si128(cond.m), 31));
+	return vfloat4(_mm_or_ps(_mm_and_ps(d, b.m), _mm_andnot_ps(d, a.m)));
+#endif
+}
+
+/**
+ * @brief Load a vector of gathered results from an array;
+ */
+ASTCENC_SIMD_INLINE vfloat4 gatherf(const float* base, vint4 indices)
+{
+#if ASTCENC_AVX >= 2
+	return vfloat4(_mm_i32gather_ps(base, indices.m, 4));
+#else
+	alignas(16) int idx[4];
+	storea(indices, idx);
+	return vfloat4(base[idx[0]], base[idx[1]], base[idx[2]], base[idx[3]]);
+#endif
+}
+
+/**
+ * @brief Store a vector to an unaligned memory address.
+ */
+ASTCENC_SIMD_INLINE void store(vfloat4 a, float* p)
+{
+	_mm_storeu_ps(p, a.m);
+}
+
+/**
+ * @brief Store a vector to a 16B aligned memory address.
+ */
+ASTCENC_SIMD_INLINE void storea(vfloat4 a, float* p)
+{
+	_mm_store_ps(p, a.m);
+}
+
+/**
+ * @brief Return a integer value for a float vector, using truncation.
+ */
+ASTCENC_SIMD_INLINE vint4 float_to_int(vfloat4 a)
+{
+	return vint4(_mm_cvttps_epi32(a.m));
+}
+
+/**
+ * @brief Return a integer value for a float vector, using round-to-nearest.
+ */
+ASTCENC_SIMD_INLINE vint4 float_to_int_rtn(vfloat4 a)
+{
+	a = round(a);
+	return vint4(_mm_cvttps_epi32(a.m));
+}
+
+/**
+ * @brief Return a float value for an integer vector.
+ */
+ASTCENC_SIMD_INLINE vfloat4 int_to_float(vint4 a)
+{
+	return vfloat4(_mm_cvtepi32_ps(a.m));
+}
+
+/**
+ * @brief Return a float16 value for a float vector, using round-to-nearest.
+ */
+ASTCENC_SIMD_INLINE vint4 float_to_float16(vfloat4 a)
+{
+#if ASTCENC_F16C >= 1
+	__m128i packedf16 = _mm_cvtps_ph(a.m, 0);
+	__m128i f16 = _mm_cvtepu16_epi32(packedf16);
+	return vint4(f16);
+#else
+	return vint4(
+		float_to_sf16(a.lane<0>()),
+		float_to_sf16(a.lane<1>()),
+		float_to_sf16(a.lane<2>()),
+		float_to_sf16(a.lane<3>()));
+#endif
+}
+
+/**
+ * @brief Return a float16 value for a float scalar, using round-to-nearest.
+ */
+static inline uint16_t float_to_float16(float a)
+{
+#if ASTCENC_F16C >= 1
+	__m128i f16 = _mm_cvtps_ph(_mm_set1_ps(a), 0);
+	return  (uint16_t)_mm_cvtsi128_si32(f16);
+#else
+	return float_to_sf16(a);
+#endif
+}
+
+/**
+ * @brief Return a float value for a float16 vector.
+ */
+ASTCENC_SIMD_INLINE vfloat4 float16_to_float(vint4 a)
+{
+#if ASTCENC_F16C >= 1
+	__m128i packed = _mm_packs_epi32(a.m, a.m);
+	__m128 f32 = _mm_cvtph_ps(packed);
+	return vfloat4(f32);
+#else
+	return vfloat4(
+		sf16_to_float(a.lane<0>()),
+		sf16_to_float(a.lane<1>()),
+		sf16_to_float(a.lane<2>()),
+		sf16_to_float(a.lane<3>()));
+#endif
+}
+
+/**
+ * @brief Return a float value for a float16 scalar.
+ */
+ASTCENC_SIMD_INLINE float float16_to_float(uint16_t a)
+{
+#if ASTCENC_F16C >= 1
+	__m128i packed = _mm_set1_epi16(a);
+	__m128 f32 = _mm_cvtph_ps(packed);
+	return _mm_cvtss_f32(f32);
+#else
+	return sf16_to_float(a);
+#endif
+}
+
+/**
+ * @brief Return a float value as an integer bit pattern (i.e. no conversion).
+ *
+ * It is a common trick to convert floats into integer bit patterns, perform
+ * some bit hackery based on knowledge they are IEEE 754 layout, and then
+ * convert them back again. This is the first half of that flip.
+ */
+ASTCENC_SIMD_INLINE vint4 float_as_int(vfloat4 a)
+{
+	return vint4(_mm_castps_si128(a.m));
+}
+
+/**
+ * @brief Return a integer value as a float bit pattern (i.e. no conversion).
+ *
+ * It is a common trick to convert floats into integer bit patterns, perform
+ * some bit hackery based on knowledge they are IEEE 754 layout, and then
+ * convert them back again. This is the second half of that flip.
+ */
+ASTCENC_SIMD_INLINE vfloat4 int_as_float(vint4 v)
+{
+	return vfloat4(_mm_castsi128_ps(v.m));
+}
+
+#endif // #ifndef ASTC_VECMATHLIB_SSE_4_H_INCLUDED
diff --git a/libkram/astc-encoder/astcenc_weight_align.cpp b/libkram/astc-encoder/astcenc_weight_align.cpp
index e329b13d..97da89d1 100644
--- a/libkram/astc-encoder/astcenc_weight_align.cpp
+++ b/libkram/astc-encoder/astcenc_weight_align.cpp
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2020 Arm Limited
+// Copyright 2011-2021 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -51,46 +51,19 @@
 #include <cassert>
 #include <cstring>
 
-#if ASTCENC_SIMD_WIDTH <= 4
-    #define ANGULAR_STEPS 44
-#elif ASTCENC_SIMD_WIDTH == 8
-    // AVX code path loops over these tables 8 elements at a time,
-    // so make sure to have their size a multiple of 8.
-    #define ANGULAR_STEPS 48
-#else
-    #error Unknown SIMD width
-#endif
-static_assert((ANGULAR_STEPS % ASTCENC_SIMD_WIDTH) == 0, "ANGULAR_STEPS should be multiple of ASTCENC_SIMD_WIDTH");
-
-alignas(ASTCENC_VECALIGN) static const float angular_steppings[ANGULAR_STEPS] = {
-	 1.0f, 1.25f, 1.5f, 1.75f,
-
-	 2.0f,  2.5f, 3.0f, 3.5f,
-	 4.0f,  4.5f, 5.0f, 5.5f,
-	 6.0f,  6.5f, 7.0f, 7.5f,
-
-	 8.0f,  9.0f, 10.0f, 11.0f,
-	12.0f, 13.0f, 14.0f, 15.0f,
-	16.0f, 17.0f, 18.0f, 19.0f,
-	20.0f, 21.0f, 22.0f, 23.0f,
-	24.0f, 25.0f, 26.0f, 27.0f,
-	28.0f, 29.0f, 30.0f, 31.0f,
-	32.0f, 33.0f, 34.0f, 35.0f,
-#if ANGULAR_STEPS >= 48
-    // This is "redundant" and only used in more-than-4-wide
-    // SIMD code paths, to make the steps table size
-    // be a multiple of SIMD width. Values are replicated
-    // from last entry so that AVX2 and SSE code paths
-    // return the same results.
-    35.0f, 35.0f, 35.0f, 35.0f,
-#endif
-};
-
-alignas(ASTCENC_VECALIGN) static float stepsizes[ANGULAR_STEPS];
-alignas(ASTCENC_VECALIGN) static float stepsizes_sqr[ANGULAR_STEPS];
+#define ANGULAR_STEPS 40
+static_assert((ANGULAR_STEPS % ASTCENC_SIMD_WIDTH) == 0,
+              "ANGULAR_STEPS must be multiple of ASTCENC_SIMD_WIDTH");
 
 static int max_angular_steps_needed_for_quant_level[13];
 
+// Yes, the next-to-last entry is supposed to have the value 33. This because
+// the 32-weight mode leaves a double-sized hole in the middle of the weight
+// space, so we are better off matching 33 weights than 32.
+static const int quantization_steps_for_level[13] = {
+	2, 3, 4, 5, 6, 8, 10, 12, 16, 20, 24, 33, 36
+};
+
 // Store a reduced sin/cos table for 64 possible weight values; this causes
 // slight quality loss compared to using sin() and cos() directly. Must be 2^N.
 #define SINCOS_STEPS 64
@@ -100,30 +73,23 @@ alignas(ASTCENC_VECALIGN) static float cos_table[SINCOS_STEPS][ANGULAR_STEPS];
 
 void prepare_angular_tables()
 {
-	int max_angular_steps_needed_for_quant_steps[40];
+	int max_angular_steps_needed_for_quant_steps[ANGULAR_STEPS + 1];
 	for (int i = 0; i < ANGULAR_STEPS; i++)
 	{
-		stepsizes[i] = 1.0f / angular_steppings[i];
-		stepsizes_sqr[i] = stepsizes[i] * stepsizes[i];
+		float angle_step = (float)(i + 1);
 
 		for (int j = 0; j < SINCOS_STEPS; j++)
 		{
-			sin_table[j][i] = static_cast<float>(sinf((2.0f * astc::PI / (SINCOS_STEPS - 1.0f)) * angular_steppings[i] * j));
-			cos_table[j][i] = static_cast<float>(cosf((2.0f * astc::PI / (SINCOS_STEPS - 1.0f)) * angular_steppings[i] * j));
+			sin_table[j][i] = static_cast<float>(sinf((2.0f * astc::PI / (SINCOS_STEPS - 1.0f)) * angle_step * static_cast<float>(j)));
+			cos_table[j][i] = static_cast<float>(cosf((2.0f * astc::PI / (SINCOS_STEPS - 1.0f)) * angle_step * static_cast<float>(j)));
 		}
 
-		int p = astc::flt2int_rd(angular_steppings[i]) + 1;
-		max_angular_steps_needed_for_quant_steps[p] = MIN(i + 1, ANGULAR_STEPS - 1);
+		max_angular_steps_needed_for_quant_steps[i + 1] = astc::min(i + 1, ANGULAR_STEPS - 1);
 	}
 
-	// yes, the next-to-last entry is supposed to have the value 33. This because under
-	// ASTC, the 32-weight mode leaves a double-sized hole in the middle of the
-	// weight space, so we are better off matching 33 weights than 32.
-	static const int steps_of_level[] = { 2, 3, 4, 5, 6, 8, 10, 12, 16, 20, 24, 33, 36 };
-
 	for (int i = 0; i < 13; i++)
 	{
-		max_angular_steps_needed_for_quant_level[i] = max_angular_steps_needed_for_quant_steps[steps_of_level[i]];
+		max_angular_steps_needed_for_quant_level[i] = max_angular_steps_needed_for_quant_steps[quantization_steps_for_level[i]];
 	}
 }
 
@@ -137,10 +103,11 @@ static void compute_angular_offsets(
 	int max_angular_steps,
 	float* offsets
 ) {
-	alignas(ASTCENC_VECALIGN) float anglesum_x[ANGULAR_STEPS];
-	alignas(ASTCENC_VECALIGN) float anglesum_y[ANGULAR_STEPS];
-	std::memset(anglesum_x, 0, max_angular_steps*sizeof(anglesum_x[0]));
-	std::memset(anglesum_y, 0, max_angular_steps*sizeof(anglesum_y[0]));
+	promise(samplecount > 0);
+	promise(max_angular_steps > 0);
+
+	alignas(ASTCENC_VECALIGN) float anglesum_x[ANGULAR_STEPS] { 0 };
+	alignas(ASTCENC_VECALIGN) float anglesum_y[ANGULAR_STEPS] { 0 };
 
 	// compute the angle-sums.
 	for (int i = 0; i < samplecount; i++)
@@ -155,31 +122,35 @@ static void compute_angular_offsets(
 		const float *cosptr = cos_table[isample];
 
 		vfloat sample_weightv(sample_weight);
-		for (int j = 0; j < max_angular_steps; j += ASTCENC_SIMD_WIDTH) // arrays are multiple of SIMD width (ANGULAR_STEPS), safe to overshoot max
+ 		// Arrays are multiple of SIMD width (ANGULAR_STEPS), safe to overshoot max
+		for (int j = 0; j < max_angular_steps; j += ASTCENC_SIMD_WIDTH)
 		{
 			vfloat cp = loada(&cosptr[j]);
 			vfloat sp = loada(&sinptr[j]);
 			vfloat ax = loada(&anglesum_x[j]) + cp * sample_weightv;
 			vfloat ay = loada(&anglesum_y[j]) + sp * sample_weightv;
-			store(ax, &anglesum_x[j]);
-			store(ay, &anglesum_y[j]);
+			storea(ax, &anglesum_x[j]);
+			storea(ay, &anglesum_y[j]);
 		}
 	}
 
 	// post-process the angle-sums
 	vfloat mult = vfloat(1.0f / (2.0f * astc::PI));
-	for (int i = 0; i < max_angular_steps; i += ASTCENC_SIMD_WIDTH) // arrays are multiple of SIMD width (ANGULAR_STEPS), safe to overshoot max
+	vfloat rcp_stepsize = vfloat::lane_id() + vfloat(1.0f);
+ 	// Arrays are multiple of SIMD width (ANGULAR_STEPS), safe to overshoot max
+	for (int i = 0; i < max_angular_steps; i += ASTCENC_SIMD_WIDTH)
 	{
+		vfloat ssize = 1.0f / rcp_stepsize;
+		rcp_stepsize = rcp_stepsize + vfloat(ASTCENC_SIMD_WIDTH);
 		vfloat angle = atan2(loada(&anglesum_y[i]), loada(&anglesum_x[i]));
-		vfloat ofs = angle * (loada(&stepsizes[i]) * mult);
-		store(ofs, &offsets[i]);
+		vfloat ofs = angle * ssize * mult;
+		storea(ofs, &offsets[i]);
 	}
 }
 
 // for a given step-size and a given offset, compute the
 // lowest and highest weight that results from quantizing using the stepsize & offset.
 // also, compute the resulting error.
-
 static void compute_lowest_and_highest_weight(
 	int samplecount,
 	const float *samples,
@@ -193,7 +164,12 @@ static void compute_lowest_and_highest_weight(
 	float *cut_low_weight_error,
 	float *cut_high_weight_error
 ) {
-	// Arrays are always multiple of SIMD width (ANGULAR_STEPS), so this is safe even if overshoot max
+	promise(samplecount > 0);
+	promise(max_angular_steps > 0);
+
+	vfloat rcp_stepsize = vfloat::lane_id() + vfloat(1.0f);
+
+	// Arrays are ANGULAR_STEPS long, so always safe to run full vectors
 	for (int sp = 0; sp < max_angular_steps; sp += ASTCENC_SIMD_WIDTH)
 	{
 		vint minidx(128);
@@ -201,36 +177,34 @@ static void compute_lowest_and_highest_weight(
 		vfloat errval = vfloat::zero();
 		vfloat cut_low_weight_err = vfloat::zero();
 		vfloat cut_high_weight_err = vfloat::zero();
-		vfloat rcp_stepsize = loada(&angular_steppings[sp]);
 		vfloat offset = loada(&offsets[sp]);
 		vfloat scaled_offset = rcp_stepsize * offset;
 		for (int j = 0; j < samplecount; ++j)
 		{
-			vfloat wt = load1a(&sample_weights[j]);
-			vfloat sval = load1a(&samples[j]) * rcp_stepsize - scaled_offset;
+			vfloat wt = load1(&sample_weights[j]);
+			vfloat sval = load1(&samples[j]) * rcp_stepsize - scaled_offset;
 			vfloat svalrte = round(sval);
-			vint idxv = floatToInt(svalrte);
+			vint idxv = float_to_int(svalrte);
 			vfloat dif = sval - svalrte;
 			vfloat dwt = dif * wt;
 			errval = errval + dwt * dif;
 
-			// Reset tracker on min hit.
+			// Reset tracker on min hit
 			vmask mask = idxv < minidx;
 			minidx = select(minidx, idxv, mask);
 			cut_low_weight_err = select(cut_low_weight_err, vfloat::zero(), mask);
 
-			// Accumulate on min hit.
+			// Accumulate on min hit
 			mask = idxv == minidx;
-			minidx = select(minidx, idxv, mask);
 			vfloat accum = cut_low_weight_err + wt - vfloat(2.0f) * dwt;
 			cut_low_weight_err = select(cut_low_weight_err, accum, mask);
 
-			// Reset tracker on max hit.
+			// Reset tracker on max hit
 			mask = idxv > maxidx;
 			maxidx = select(maxidx, idxv, mask);
 			cut_high_weight_err = select(cut_high_weight_err, vfloat::zero(), mask);
 
-			// Accumulate on max hit.
+			// Accumulate on max hit
 			mask = idxv == maxidx;
 			accum = cut_high_weight_err + wt + vfloat(2.0f) * dwt;
 			cut_high_weight_err = select(cut_high_weight_err, accum, mask);
@@ -240,34 +214,35 @@ static void compute_lowest_and_highest_weight(
 		vint span = maxidx - minidx + vint(1);
 		span = min(span, vint(max_quantization_steps + 3));
 		span = max(span, vint(2));
-		store(minidx, &lowest_weight[sp]);
-		store(span, &weight_span[sp]);
+		storea(minidx, &lowest_weight[sp]);
+		storea(span, &weight_span[sp]);
 
 		// The cut_(lowest/highest)_weight_error indicate the error that
 		// results from  forcing samples that should have had the weight value
 		// one step (up/down).
-		vfloat errscale = loada(&stepsizes_sqr[sp]);
-		store(errval * errscale, &error[sp]);
-		store(cut_low_weight_err * errscale, &cut_low_weight_error[sp]);
-		store(cut_high_weight_err * errscale, &cut_high_weight_error[sp]);
+		vfloat ssize = 1.0f / rcp_stepsize;
+		vfloat errscale = ssize * ssize;
+		storea(errval * errscale, &error[sp]);
+		storea(cut_low_weight_err * errscale, &cut_low_weight_error[sp]);
+		storea(cut_high_weight_err * errscale, &cut_high_weight_error[sp]);
+
+		rcp_stepsize = rcp_stepsize + vfloat(ASTCENC_SIMD_WIDTH);
 	}
 }
 
 // main function for running the angular algorithm.
-static void compute_angular_endpoints_for_quantization_levels(
+static void compute_angular_endpoints_for_quant_levels(
 	int samplecount,
 	const float* samples,
 	const float* sample_weights,
-	int max_quantization_level,
+	int max_quant_level,
 	float low_value[12],
 	float high_value[12]
 ) {
-	static const int quantization_steps_for_level[13] = { 2, 3, 4, 5, 6, 8, 10, 12, 16, 20, 24, 33, 36 };
-
-	int max_quantization_steps = quantization_steps_for_level[max_quantization_level + 1];
+	int max_quantization_steps = quantization_steps_for_level[max_quant_level + 1];
 
 	alignas(ASTCENC_VECALIGN) float angular_offsets[ANGULAR_STEPS];
-	int max_angular_steps = max_angular_steps_needed_for_quant_level[max_quantization_level];
+	int max_angular_steps = max_angular_steps_needed_for_quant_level[max_quant_level];
 	compute_angular_offsets(samplecount, samples, sample_weights, max_angular_steps, angular_offsets);
 
 	alignas(ASTCENC_VECALIGN) int32_t lowest_weight[ANGULAR_STEPS];
@@ -292,6 +267,7 @@ static void compute_angular_endpoints_for_quantization_levels(
 		cut_low_weight[i] = 0;
 	}
 
+	promise(max_angular_steps > 0);
 	for (int i = 0; i < max_angular_steps; i++)
 	{
 		int idx_span = weight_span[i];
@@ -327,7 +303,6 @@ static void compute_angular_endpoints_for_quantization_levels(
 			best_scale[idx_span - 2] = i;
 			cut_low_weight[idx_span - 2] = 1;
 		}
-
 	}
 
 	// if we got a better error-value for a low sample count than for a high one,
@@ -342,7 +317,7 @@ static void compute_angular_endpoints_for_quantization_levels(
 		}
 	}
 
-	for (int i = 0; i <= max_quantization_level; i++)
+	for (int i = 0; i <= max_quant_level; i++)
 	{
 		int q = quantization_steps_for_level[i];
 		int bsi = best_scale[q];
@@ -350,26 +325,30 @@ static void compute_angular_endpoints_for_quantization_levels(
 		// Did we find anything?
 		// TODO: Can we do better than bsi = 0 here. We should at least
 		// propagate an error (and move the printf into the CLI).
+#if defined(NDEBUG)
 		if (bsi < 0)
 		{
-			KLOGW("Astcenc", "Unable to find encoding within specified error limit\n");
+			printf("WARNING: Unable to find encoding within specified error limit\n");
 			bsi = 0;
 		}
+else
+		bsi = astc::max(0, bsi);
+#endif
 
-		float stepsize = stepsizes[bsi];
+		float stepsize = 1.0f / (1.0f + (float)bsi);
 		int lwi = lowest_weight[bsi] + cut_low_weight[q];
 		int hwi = lwi + q - 1;
 		float offset = angular_offsets[bsi];
 
-		low_value[i] = offset + lwi * stepsize;
-		high_value[i] = offset + hwi * stepsize;
+		low_value[i] = offset + static_cast<float>(lwi) * stepsize;
+		high_value[i] = offset + static_cast<float>(hwi) * stepsize;
 	}
 }
 
 // helper functions that will compute ideal angular-endpoints
 // for a given set of weights and a given block size descriptors
 void compute_angular_endpoints_1plane(
-	float mode_cutoff,
+	bool only_always,
 	const block_size_descriptor* bsd,
 	const float* decimated_quantized_weights,
 	const float* decimated_weights,
@@ -379,32 +358,29 @@ void compute_angular_endpoints_1plane(
 	float low_values[MAX_DECIMATION_MODES][12];
 	float high_values[MAX_DECIMATION_MODES][12];
 
-	for (int i = 0; i < MAX_DECIMATION_MODES; i++)
+	for (int i = 0; i < bsd->decimation_mode_count; i++)
 	{
-		// TODO: Do this at build time and cache the result
-		int samplecount = bsd->decimation_mode_samples[i];
-		int quant_mode = bsd->decimation_mode_maxprec_1plane[i];
-		float percentile = bsd->decimation_mode_percentile[i];
-		int permit_encode = bsd->permit_encode[i];
-		if (permit_encode == 0 || samplecount < 1 || quant_mode < 0 || percentile > mode_cutoff)
+		const decimation_mode& dm = bsd->decimation_modes[i];
+		if (dm.maxprec_1plane < 0 || (only_always && !dm.percentile_always) || !dm.percentile_hit)
 		{
 			continue;
 		}
 
-		compute_angular_endpoints_for_quantization_levels(samplecount,
+		int samplecount = bsd->decimation_tables[i]->weight_count;
+		compute_angular_endpoints_for_quant_levels(samplecount,
 		                                                  decimated_quantized_weights + i * MAX_WEIGHTS_PER_BLOCK,
-		                                                  decimated_weights + i * MAX_WEIGHTS_PER_BLOCK, quant_mode, low_values[i], high_values[i]);
+		                                                  decimated_weights + i * MAX_WEIGHTS_PER_BLOCK, dm.maxprec_1plane, low_values[i], high_values[i]);
 	}
 
-	for (int i = 0, ni = bsd->block_mode_packed_count; i < ni; ++i)
+	for (int i = 0; i < bsd->block_mode_count; ++i)
 	{
-		const block_mode& bm = bsd->block_modes_packed[i];
-		if (bm.is_dual_plane != 0 || bm.percentile > mode_cutoff)
+		const block_mode& bm = bsd->block_modes[i];
+		if (bm.is_dual_plane || (only_always && !bm.percentile_always) || !bm.percentile_hit)
 		{
 			continue;
 		}
 
-		int quant_mode = bm.quantization_mode;
+		int quant_mode = bm.quant_mode;
 		int decim_mode = bm.decimation_mode;
 
 		low_value[i] = low_values[decim_mode][quant_mode];
@@ -413,7 +389,7 @@ void compute_angular_endpoints_1plane(
 }
 
 void compute_angular_endpoints_2planes(
-	float mode_cutoff,
+	bool only_always,
 	const block_size_descriptor* bsd,
 	const float* decimated_quantized_weights,
 	const float* decimated_weights,
@@ -427,37 +403,34 @@ void compute_angular_endpoints_2planes(
 	float low_values2[MAX_DECIMATION_MODES][12];
 	float high_values2[MAX_DECIMATION_MODES][12];
 
-	for (int i = 0; i < MAX_DECIMATION_MODES; i++)
+	for (int i = 0; i < bsd->decimation_mode_count; i++)
 	{
-		// TODO: Do this at build time and cache the result
-		int samplecount = bsd->decimation_mode_samples[i];
-		int quant_mode = bsd->decimation_mode_maxprec_2planes[i];
-		float percentile = bsd->decimation_mode_percentile[i];
-		int permit_encode = bsd->permit_encode[i];
-
-		if (permit_encode == 0 || samplecount < 1 || quant_mode < 0 || percentile > mode_cutoff)
+		const decimation_mode& dm = bsd->decimation_modes[i];
+		if (dm.maxprec_2planes < 0 || (only_always && !dm.percentile_always) || !dm.percentile_hit)
 		{
 			continue;
 		}
 
-		compute_angular_endpoints_for_quantization_levels(samplecount,
-		                                                  decimated_quantized_weights + 2 * i * MAX_WEIGHTS_PER_BLOCK,
-		                                                  decimated_weights + 2 * i * MAX_WEIGHTS_PER_BLOCK, quant_mode, low_values1[i], high_values1[i]);
+		int samplecount = bsd->decimation_tables[i]->weight_count;
+
+		compute_angular_endpoints_for_quant_levels(samplecount,
+		                                           decimated_quantized_weights + 2 * i * MAX_WEIGHTS_PER_BLOCK,
+		                                           decimated_weights + 2 * i * MAX_WEIGHTS_PER_BLOCK, dm.maxprec_2planes, low_values1[i], high_values1[i]);
 
-		compute_angular_endpoints_for_quantization_levels(samplecount,
-		                                                  decimated_quantized_weights + (2 * i + 1) * MAX_WEIGHTS_PER_BLOCK,
-		                                                  decimated_weights + (2 * i + 1) * MAX_WEIGHTS_PER_BLOCK, quant_mode, low_values2[i], high_values2[i]);
+		compute_angular_endpoints_for_quant_levels(samplecount,
+		                                           decimated_quantized_weights + (2 * i + 1) * MAX_WEIGHTS_PER_BLOCK,
+		                                           decimated_weights + (2 * i + 1) * MAX_WEIGHTS_PER_BLOCK, dm.maxprec_2planes, low_values2[i], high_values2[i]);
 	}
 
-	for (int i = 0, ni = bsd->block_mode_packed_count; i < ni; ++i)
+	for (int i = 0; i < bsd->block_mode_count; ++i)
 	{
-		const block_mode& bm = bsd->block_modes_packed[i];
-		if (bm.is_dual_plane != 1 || bm.percentile > mode_cutoff)
+		const block_mode& bm = bsd->block_modes[i];
+		if ((!bm.is_dual_plane) || (only_always && !bm.percentile_always) || !bm.percentile_hit)
 		{
 			continue;
 		}
 
-		int quant_mode = bm.quantization_mode;
+		int quant_mode = bm.quant_mode;
 		int decim_mode = bm.decimation_mode;
 
 		low_value1[i] = low_values1[decim_mode][quant_mode];
diff --git a/libkram/astc-encoder/astcenc_weight_quant_xfer_tables.cpp b/libkram/astc-encoder/astcenc_weight_quant_xfer_tables.cpp
index d87bb5c2..d2191496 100644
--- a/libkram/astc-encoder/astcenc_weight_quant_xfer_tables.cpp
+++ b/libkram/astc-encoder/astcenc_weight_quant_xfer_tables.cpp
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2020 Arm Limited
+// Copyright 2011-2021 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 8a82e448..10ce6f7a 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -75,11 +75,15 @@ bool LoadPng(const uint8_t* data, size_t dataSize, Image& sourceImage)
         case LCT_GREY_ALPHA:
             hasColor = false;
             break;
+        case LCT_MAX_OCTET_VALUE:
         case LCT_RGB:
         case LCT_RGBA:
         case LCT_PALETTE:  // ?
             hasColor = true;
             break;
+        
+            hasColor = false;
+            break;;
     }
 
     switch (state.info_png.color.colortype) {
@@ -87,6 +91,7 @@ bool LoadPng(const uint8_t* data, size_t dataSize, Image& sourceImage)
         case LCT_RGB:
             hasAlpha = false;
             break;
+        case LCT_MAX_OCTET_VALUE:
         case LCT_RGBA:
         case LCT_GREY_ALPHA:
         case LCT_PALETTE:  // ?
@@ -1254,6 +1259,7 @@ string kramInfoPNGToString(const string& srcFilename, const uint8_t* data, uint6
         case LCT_GREY_ALPHA:
             hasColor = false;
             break;
+        case LCT_MAX_OCTET_VALUE:
         case LCT_RGB:
         case LCT_RGBA:
         case LCT_PALETTE:  // ?
@@ -1266,6 +1272,7 @@ string kramInfoPNGToString(const string& srcFilename, const uint8_t* data, uint6
         case LCT_RGB:
             hasAlpha = false;
             break;
+        case LCT_MAX_OCTET_VALUE:
         case LCT_RGBA:
         case LCT_GREY_ALPHA:
         case LCT_PALETTE:  // ?
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index cbf10b64..af16acd2 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -311,4 +311,11 @@ inline half4 toHalf4(const float4& vv)
 }
 #endif
 
+// this just strips args
+#define macroUnusedArg(x)
+
+// this just strips args
+#define macroUnusedVar(x) (void)x
+
+
 }  // namespace simd
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index 6e71f5ed..92c915c6 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -777,7 +777,11 @@ bool Image::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage& dstIma
                     dstImageASTC.dim_z = 1;  // Not using 3D blocks, not supported on iOS
                     //dstImageASTC.dim_pad = 0;
                     dstImageASTC.data_type = ASTCENC_TYPE_U8;
-                    dstImageASTC.data = outputTexture.data();
+                    
+                    
+                    // encode/encode still setup on array of 2d slices, so need address of data
+                    uint8_t* outData = outputTexture.data();
+                    dstImageASTC.data = (void**)&outData;
 
                     int32_t srcDataLength = (int32_t)srcMipLevel.length;
                     Int2 blockDims = srcImage.blockDims();
@@ -790,20 +794,20 @@ bool Image::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage& dstIma
 
                     astcenc_config config;
                     astcenc_error error = astcenc_config_init(
-                        profile, blockDims.x, blockDims.y, 1, ASTCENC_PRE_FAST, ASTCENC_FLG_DECOMPRESS_ONLY, config);
+                        profile, blockDims.x, blockDims.y, 1, ASTCENC_PRE_FAST, ASTCENC_FLG_DECOMPRESS_ONLY, &config);
                     if (error != ASTCENC_SUCCESS) {
                         return false;
                     }
 
                     astcenc_context* codec_context = nullptr;
-                    error = astcenc_context_alloc(config, 1, &codec_context);
+                    error = astcenc_context_alloc(&config, 1, &codec_context);
                     if (error != ASTCENC_SUCCESS) {
                         return false;
                     }
                     // no swizzle
                     astcenc_swizzle swizzleDecode = {ASTCENC_SWZ_R, ASTCENC_SWZ_G, ASTCENC_SWZ_B, ASTCENC_SWZ_A};
 
-                    error = astcenc_decompress_image(codec_context, srcData, srcDataLength, dstImageASTC, swizzleDecode);
+                    error = astcenc_decompress_image(codec_context, srcData, srcDataLength, &dstImageASTC, swizzleDecode, 0);
 
                     astcenc_context_free(codec_context);
 
@@ -908,26 +912,41 @@ bool Image::resizeImage(int32_t wResize, int32_t hResize, bool resizePow2, Image
     return true;
 }
 
+// functional ctor
+inline float4 float4m(float x, float y, float z, float w)
+{
+    return { x, y, z, w };
+}
+
 // TODO: to hook this up, read 16u png into pixelsFlat, then gen an 8-bit normal xy
 // from that.  This is more like SDF where a single height is used.
-void Image::heightToNormals(float scale)
+void Image::heightToNormals(float scale, bool isWrap)
 {
     int32_t w = _width;
     int32_t h = _height;
 
-    // TODO: hook these up, but needs src != dst or copy
-    bool isWrapY = false;
-    bool isWrapX = false;
+    bool isWrapY = isWrap;
+    bool isWrapX = isWrap;
 
     // 2.0 is distance betwen +1 and -1
-    float scaleX = scale / 2.0;
-    float scaleY = scale / 2.0;
+    // don't scale by this, want caller to be able to pass 1.0 as default scale not 2.0
+    float scaleX = scale; // / 2.0;
+    float scaleY = scale; // / 2.0;
 
     // src/dst the same here
     // may need to copy a row/column of pixels for wrap
     const float4* srcPixels = _pixelsFloat.data();
     float4* dstPixels = (float4*)_pixelsFloat.data();
 
+    const Color* srcPixels8 = (const Color*)_pixels.data();
+    Color* dstPixels8 = (Color*)_pixels.data();
+    bool isFloat = _pixels.empty();
+    
+    if (!isFloat) {
+        scaleX /= 255.0f;
+        scaleY /= 255.0f;
+    }
+    
     for (int32_t y = 0; y < h; ++y) {
         int32_t y0 = y;
         int32_t ym = y - 1;
@@ -962,27 +981,58 @@ void Image::heightToNormals(float scale)
                 if (xp > (w - 1)) xp = w - 1;
             }
 
-            // cross pattern
-            // height channel is in x
-            float cN = srcPixels[ym + x0].x;
-            float cS = srcPixels[yp + x0].x;
-            float cE = srcPixels[y0 + xp].x;
-            float cW = srcPixels[y0 + xm].x;
-
-            // up is N, so this is rhcs
-            float dx = (cE - cW) * scaleX;
-            float dy = (cN - cS) * scaleY;
-
-            float len = sqrtf(dx * dx + dy * dy + 1.0f);
-
-            dx /= len;
-            dy /= len;
-
-            // write out the result
-            float4& dstPixel = dstPixels[y0 + x];
+            
+            if (isFloat) {
+                
+                // cross pattern
+                // height channel is in x
+                float cN = srcPixels[ym + x0].x;
+                float cS = srcPixels[yp + x0].x;
+                float cE = srcPixels[y0 + xp].x;
+                float cW = srcPixels[y0 + xm].x;
+
+                // up is N, so this is rhcs
+                float dx = (cE - cW) * scaleX;
+                float dy = (cN - cS) * scaleY;
+           
+                float4 normal = float4m(dx, dy, 1.0f, 0.0f);
+                normal = normalize(normal);
+
+                // write out the result
+                float4& dstPixel = dstPixels[y0 + x];
+
+                dstPixel.x = normal.x;
+                dstPixel.y = normal.y;
+                dstPixel.z = normal.z; // can reconstruct
+                
+                // store height in alpha
+                dstPixel.w = srcPixels[y0 + x0].x;
+            }
+            else {
+                // cross pattern
+                // height channel is in x
+                uint8_t cN = srcPixels8[4 * (ym + x0)].r; // assumes first elem (.r) is height channel
+                uint8_t cS = srcPixels8[4 * (yp + x0)].r;
+                uint8_t cE = srcPixels8[4 * (y0 + xp)].r;
+                uint8_t cW = srcPixels8[4 * (y0 + xm)].r;
+
+                float dx = (cE - cW) * scaleX;
+                float dy = (cN - cS) * scaleY;
+           
+                float4 normal = float4m(dx, dy, 1.0f, 0.0f);
+                normal = normalize(normal);
+                normal *= 127.5f;
+                
+                Color& dstPixel8 = dstPixels8[y0 + x];
 
-            dstPixel.x = dx;
-            dstPixel.y = dy;
+                dstPixel8.r = normal.x;
+                dstPixel8.g = normal.y;
+                dstPixel8.b = normal.z; // can reconstruct
+                
+                // store height in alpha
+                dstPixel8.a = srcPixels8[y0 + x0].r;
+            }
+                
         }
     }
 }
@@ -2031,23 +2081,25 @@ bool Image::compressMipLevel(const ImageInfo& info, KTXImage& image,
             // flags |= ASTCENC_FLG_USE_ALPHA_WEIGHT;
 
             // convert quality to present
-            astcenc_preset preset = ASTCENC_PRE_FAST;
-            if (info.quality <= 10) {
-                preset = ASTCENC_PRE_FAST;
-            }
-            else if (info.quality <= 50) {
-                preset = ASTCENC_PRE_MEDIUM;
-            }
-            else if (info.quality < 90) {
-                preset = ASTCENC_PRE_THOROUGH;
-            }
-            else {
-                preset = ASTCENC_PRE_EXHAUSTIVE;
-            }
+            float quality = info.quality;
+            
+//            ASTCENC_PRE_FAST;
+//            if (info.quality <= 10) {
+//                preset = ASTCENC_PRE_FAST;
+//            }
+//            else if (info.quality <= 50) {
+//                preset = ASTCENC_PRE_MEDIUM;
+//            }
+//            else if (info.quality < 90) {
+//                preset = ASTCENC_PRE_THOROUGH;
+//            }
+//            else {
+//                preset = ASTCENC_PRE_EXHAUSTIVE;
+//            }
 
             astcenc_config config;
             astcenc_error error = astcenc_config_init(
-                profile, blockDims.x, blockDims.y, 1, preset, flags, config);
+                profile, blockDims.x, blockDims.y, 1, quality, flags, &config);
             if (error != ASTCENC_SUCCESS) {
                 return false;
             }
@@ -2099,11 +2151,11 @@ bool Image::compressMipLevel(const ImageInfo& info, KTXImage& image,
             // have 2d image
             // hacked the src pixel handling to only do slices, not a 3D texture
             if (info.isHDR) {
-                srcImage.data = (void*)srcPixelDataFloat4;
+                srcImage.data = (void**)&srcPixelDataFloat4;
                 srcImage.data_type = ASTCENC_TYPE_F32;
             }
             else {
-                srcImage.data = (void*)srcPixelData;
+                srcImage.data = (void**)&srcPixelData;
                 srcImage.data_type = ASTCENC_TYPE_U8;
             }
 
@@ -2113,7 +2165,7 @@ bool Image::compressMipLevel(const ImageInfo& info, KTXImage& image,
 
             // could this be built once, and reused across all mips
             astcenc_context* codec_context = nullptr;
-            error = astcenc_context_alloc(config, 1, &codec_context);
+            error = astcenc_context_alloc(&config, 1, &codec_context);
             if (error != ASTCENC_SUCCESS) {
                 return false;
             }
@@ -2144,7 +2196,7 @@ bool Image::compressMipLevel(const ImageInfo& info, KTXImage& image,
             }
 
             error = astcenc_compress_image(
-                codec_context, srcImage, swizzleEncode,
+                codec_context, &srcImage, swizzleEncode,
                 outputTexture.data.data(), mipStorageSize,
                 0);  // threadIndex
 
@@ -2153,7 +2205,7 @@ bool Image::compressMipLevel(const ImageInfo& info, KTXImage& image,
             }
 #else
             error = astcenc_compress_image(
-                codec_context, srcImage, swizzleEncode,
+                codec_context, &srcImage, swizzleEncode,
                 outputTexture.data.data(), mipStorageSize,
                 0);  // threadIndex
 #endif
diff --git a/libkram/kram/KramImage.h b/libkram/kram/KramImage.h
index 535a74b2..c69683e9 100644
--- a/libkram/kram/KramImage.h
+++ b/libkram/kram/KramImage.h
@@ -86,7 +86,7 @@ class Image {
                                 vector<Color>& tmpImage) const;
 
     // convert x field to normals
-    void heightToNormals(float scale);
+    void heightToNormals(float scale, bool isWrap = false);
 
 private:
     // pixel size of image
diff --git a/plugin/kps/KPS.cpp b/plugin/kps/KPS.cpp
index 5239a7b8..e3524f85 100755
--- a/plugin/kps/KPS.cpp
+++ b/plugin/kps/KPS.cpp
@@ -83,8 +83,6 @@ extern DialogFormat FormatToDialog(DDS_Format fmt);
 extern DDS_Format DialogToFormat(DialogFormat fmt);
 extern MyMTLPixelFormat FormatToPixelFormat(DDS_Format fmt);
 
-// this just strips args
-#define macroUnusedArg(x)
 
 // global needed by a bunch of Photoshop SDK routines
 SPBasicSuite *sSPBasic = NULL;

From 58de0f6945cb0e216430e69f79cdc918a2ccd1cf Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 27 Mar 2021 20:47:42 -0700
Subject: [PATCH 021/901] kramv - more hud data on block and mip pixel

cleanup float4 creation.
early out if ptrs same on heightToNormals
add block and mipDims to hud, and a few more letters in hud
---
 kramv/KramRenderer.mm       |   8 +--
 kramv/KramViewerMain.mm     |  73 ++++++++++++++++-----
 libkram/bc7enc/bc7enc.cpp   |   4 +-
 libkram/kram/KramConfig.h   | 122 +++++++++++++++++++++++++++++-------
 libkram/kram/KramImage.cpp  |  38 ++++++++---
 libkram/kram/KramMipper.cpp |   4 +-
 libkram/kram/KramMipper.h   |   4 +-
 libkram/kram/float4a.h      |   2 +-
 libkram/squish/maths.h      |   2 +-
 9 files changed, 195 insertions(+), 62 deletions(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index d7471d55..3b3b74e7 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -585,7 +585,7 @@ - (BOOL)loadTextureImpl:(const string&)fullFilename isTextureChanged:(BOOL)isTex
     // have one of these for each texture added to the viewer
     float scaleX = MAX(1, texture.width);
     float scaleY = MAX(1, texture.height);
-    _modelMatrix = float4x4(simd_make_float4(scaleX, scaleY, 1.0f, 1.0f));
+    _modelMatrix = float4x4(float4m(scaleX, scaleY, 1.0f, 1.0f));
     _modelMatrix = _modelMatrix * matrix4x4_translation(0.0f, 0.0f, -1.0);
     
     return YES;
@@ -596,7 +596,7 @@ - (float4x4)computeImageTransform:(float)panX panY:(float)panY zoom:(float)zoom
     float4x4 panTransform = matrix4x4_translation(-panX, panY, 0.0);
     
     // scale
-    float4x4 viewMatrix = float4x4(simd_make_float4(zoom, zoom, 1.0f, 1.0f));
+    float4x4 viewMatrix = float4x4(float4m(zoom, zoom, 1.0f, 1.0f));
     viewMatrix = panTransform * viewMatrix;
     
     return _projectionMatrix * viewMatrix * _modelMatrix;
@@ -656,7 +656,7 @@ - (void)_updateGameState
     float4x4 panTransform = matrix4x4_translation(-_showSettings->panX, _showSettings->panY, 0.0);
     
     // scale
-    _viewMatrix = float4x4(simd_make_float4(_showSettings->zoom, _showSettings->zoom, 1.0f, 1.0f));
+    _viewMatrix = float4x4(float4m(_showSettings->zoom, _showSettings->zoom, 1.0f, 1.0f));
     _viewMatrix = panTransform * _viewMatrix;
     
     // viewMatrix should typically be the inverse
@@ -864,7 +864,7 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer view:(nonnull MTKView *)vie
         
         
         UniformsLevel uniformsLevel;
-        uniformsLevel.drawOffset = simd_make_float2(0.0f);
+        uniformsLevel.drawOffset = float2m(0.0f);
         
         if (_showSettings->isPreview) {
             // upload this on each face drawn, since want to be able to draw all mips/levels at once
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 08b78d75..af562df3 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -423,7 +423,7 @@ - (void)doZoomMath:(float)newZoom newPan:(float2&)newPan {
     float4x4 mInv = simd_inverse(projectionViewModelMatrix);
     mInv.columns[3].w = 1.0f; // fixes inverse, calls always leaves m[3][3] = 0.999
 
-    float4 pixel = mInv * simd_make_float4(clipPoint.x, clipPoint.y, 1.0f, 1.0f);
+    float4 pixel = mInv * float4m(clipPoint.x, clipPoint.y, 1.0f, 1.0f);
     //pixel /= pixel.w; // in case perspective used
 
     // allow pan to extend to show all
@@ -489,8 +489,8 @@ - (void)handleGesture:(NSGestureRecognizer *)gestureRecognizer
     // https://stackoverflow.com/questions/30002361/image-zoom-centered-on-mouse-position
     
     // find the cursor location with respect to the image
-    float4 bottomLeftCorner = simd_make_float4(-0.5, -0.5f, 0.0f, 1.0f);
-    float4 topRightCorner = simd_make_float4(0.5, 0.5f, 0.0f, 1.0f);
+    float4 bottomLeftCorner = float4m(-0.5, -0.5f, 0.0f, 1.0f);
+    float4 topRightCorner = float4m(0.5, 0.5f, 0.0f, 1.0f);
     
     Renderer* renderer = (Renderer*)self.delegate;
     float4x4 newMatrix = [renderer computeImageTransform:_showSettings->panX panY:_showSettings->panY zoom:zoom];
@@ -625,7 +625,7 @@ - (void)updateEyedropper {
     float4x4 mInv = simd_inverse(projectionViewModelMatrix);
     mInv.columns[3].w = 1.0f; // fixes inverse, calls always leaves m[3][3] = 0.999
     
-    float4 pixel = mInv * simd_make_float4(clipPoint.x, clipPoint.y, 1.0f, 1.0f);
+    float4 pixel = mInv * float4m(clipPoint.x, clipPoint.y, 1.0f, 1.0f);
     //pixel /= pixel.w; // in case perspective used
     
     // that's in model space (+/0.5f, +/0.5f), so convert to texture space
@@ -637,10 +637,13 @@ - (void)updateEyedropper {
     pixel.x *= 0.999f;
     pixel.y *= 0.999f;
     
+    float uvX = pixel.x;
+    float uvY = pixel.y;
+    
     // pixels are 0 based
     pixel.x *= _showSettings->imageBoundsX;
     pixel.y *= _showSettings->imageBoundsY;
-
+    
 // TODO: finish this logic, need to account for gaps too, and then isolate to a given level and mip to sample
 //    if (_showSettings->isShowingAllLevelsAndMips) {
 //        pixel.x *= _showSettings->totalLevels();
@@ -700,46 +703,82 @@ - (void)updateEyedropper {
         int32_t x = _showSettings->textureResultX;
         int32_t y = _showSettings->textureResultY;
         
+        // pixel at top-level mip
         sprintf(text, "px:%d %d\n", x, y);
         
+        // show block num
+        int mipLOD = _showSettings->mipLOD;
+        
+        // TODO:: these block numbers are not accurate on Toof at 4x4
+        // there is resizing going on to the dimensions
+        
+        int mipX = _showSettings->imageBoundsX;
+        int mipY = _showSettings->imageBoundsY;
+        
+        for (int i = 0; i < mipLOD; ++i) {
+            mipX = (mipX+1) >> 1;
+            mipY = (mipY+1) >> 1;
+        }
+        mipX = std::max(1, mipX);
+        mipY = std::max(1, mipY);
+        
+        mipX = (int32_t)(uvX * mipX);
+        mipY = (int32_t)(uvY * mipY);
+        
+        // TODO: may want to return mip in pixel readback
+        // don't have it right now, so don't display if preview is enabled
+        if (_showSettings->isPreview)
+            mipLOD = 0;
+        
+        auto blockDims = blockDimsOfFormat(format);
+        if (blockDims.x > 1)
+            append_sprintf(text, "bpx: %d %d\n", mipX / blockDims.x, mipY / blockDims.y);
+        
+        // TODO: on astc if we have original blocks can run analysis from astc-encoder
+        // about each block.
+        
+        // show the mip pixel (only if not preview and mip changed)
+        if (mipLOD > 0 && !_showSettings->isPreview)
+            append_sprintf(text, "mpx: %d %d\n", mipX, mipY);
+        
         // TODO: more criteria here, can have 2 channel PBR metal-roughness
         // also have 4 channel normals where zw store other data.
         bool isNormal = _showSettings->isNormal;
         bool isFloat = isHdr;
         
         if (isNormal) {
-            float x = c.x;
-            float y = c.y;
+            float nx = c.x;
+            float ny = c.y;
             
             // unorm -> snorm
             if (!isSigned) {
-                x = x * 2.0f - 1.0f;
-                y = y * 2.0f - 1.0f;
+                nx = nx * 2.0f - 1.0f;
+                ny = ny * 2.0f - 1.0f;
             }
             
             // this is always postive on tan-space normals
             // assuming we're not viewing world normals
-            float z = sqrt(1.0f - std::min(x * x + y * y, 1.0f));
+            float nz = sqrt(1.0f - std::min(nx * nx + ny * ny, 1.0f));
             
             // print the underlying color (some nmaps are xy in 4 channels)
             string tmp;
-            printChannels(tmp, "c: ", c, numChannels, isFloat, isSigned);
+            printChannels(tmp, "ln: ", c, numChannels, isFloat, isSigned);
             text += tmp;
             
             // print direction
-            float4 d = simd_make_float4(x,y,z,0.0f);
+            float4 d = float4m(nx,ny,nz,0.0f);
             isFloat = true;
-            printChannels(tmp, "d: ", d, 3, isFloat, isSigned);
+            printChannels(tmp, "dr: ", d, 3, isFloat, isSigned);
             text += tmp;
         }
         else {
             // DONE: write some print helpers based on float4 and length
             string tmp;
-            printChannels(tmp, "l: ", c, numChannels, isFloat, isSigned);
+            printChannels(tmp, "ln: ", c, numChannels, isFloat, isSigned);
             text += tmp;
             
             if (isSrgb) {
-                printChannels(tmp, "s: ", s, numChannels, isFloat, isSigned);
+                printChannels(tmp, "sr: ", s, numChannels, isFloat, isSigned);
                 text += tmp;
             }
         }
@@ -820,8 +859,8 @@ - (void)scrollWheel:(NSEvent *)event
     
     // what if zoom moves it outside?
     
-    float4 pt0 = projectionViewModelMatrix * simd_make_float4(-0.5, -0.5f, 0.0f, 1.0f);
-    float4 pt1 = projectionViewModelMatrix * simd_make_float4(0.5, 0.5f, 0.0f, 1.0f);
+    float4 pt0 = projectionViewModelMatrix * float4m(-0.5, -0.5f, 0.0f, 1.0f);
+    float4 pt1 = projectionViewModelMatrix * float4m(0.5, 0.5f, 0.0f, 1.0f);
     
     // for perspective
     //pt0 /= pt0.w;
diff --git a/libkram/bc7enc/bc7enc.cpp b/libkram/bc7enc/bc7enc.cpp
index a8e32f15..61b4abe6 100644
--- a/libkram/bc7enc/bc7enc.cpp
+++ b/libkram/bc7enc/bc7enc.cpp
@@ -48,11 +48,11 @@ using namespace simd;
 using vec4F = float4;
 
 static inline vec4F *vec4F_set_scalar(vec4F *pV, float x) {	*pV = vec4F(x); return pV; }
-static inline vec4F *vec4F_set(vec4F *pV, float x, float y, float z, float w) {	*pV = simd_make_float4(x,y,z,w); return pV; }
+static inline vec4F *vec4F_set(vec4F *pV, float x, float y, float z, float w) {	*pV = float4m(x,y,z,w); return pV; }
 static inline vec4F *vec4F_saturate_in_place(vec4F *pV) { *pV = saturate(*pV); return pV; }
 static inline vec4F vec4F_saturate(const vec4F *pV) { vec4F res = saturate(*pV); return res; }
 
-static inline vec4F vec4F_from_color(const color_quad_u8 *pC) { vec4F res = simd_make_float4((float)pC->r, (float)pC->g, (float)pC->b, (float)pC->a); return res; }
+static inline vec4F vec4F_from_color(const color_quad_u8 *pC) { vec4F res = float4m((float)pC->r, (float)pC->g, (float)pC->b, (float)pC->a); return res; }
 static inline vec4F vec4F_add(const vec4F *pLHS, const vec4F *pRHS) { vec4F res = *pLHS + *pRHS; return res; }
 static inline vec4F vec4F_sub(const vec4F *pLHS, const vec4F *pRHS) { vec4F res = *pLHS - *pRHS; return res; }
 static inline float vec4F_dot(const vec4F *pLHS, const vec4F *pRHS) { return dot(*pLHS, *pRHS); }
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index af16acd2..e36ed0d1 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -107,6 +107,7 @@
 #define USE_NEON 0
 #endif
 
+// not using simd/simd.h on Win or Linux, but clang would support
 #ifndef USE_SIMDLIB
 #if KRAM_MAC || KRAM_IOS
 #define USE_SIMDLIB 1
@@ -115,10 +116,6 @@
 #endif
 #endif
 
-#if !USE_SIMDLIB
-#define simd_make_float4(x, y, z, w) float4(x, y, z, w)
-#endif
-
 // use _Float16/_fp16 vs. other
 #if KRAM_MAC || KRAM_IOS
 #define USE_FLOAT16 1
@@ -219,32 +216,84 @@ class half4 {
 #include "float4a.h"
 #endif
 
-// D3D hobbled non-pow2 mips by only supporting round down, not round up
-// So then OpenGL followed that.  And then Metal followd OpenGL.
-// Round up adds an extra mip level to the chain, but results in much better filtering.
-// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_non_power_of_two.txt
-// http://download.nvidia.com/developer/Papers/2005/NP2_Mipmapping/NP2_Mipmap_Creation.pdf
-#define ROUNDMIPSDOWN 1
+namespace simd
+{
 
-inline void mipDown(int32_t& w, int32_t& h)
+#if !USE_SIMDLIB
+
+
+// don't have float2/float3 type yet
+//// use instead of simd_make_float
+//inline float2 float2m(float x)
+//{
+//    return float2(x);
+//}
+//
+//inline float3 float3m(float x)
+//{
+//    return float3(x);
+//}
+//inline float3 float3m(float x, float y, float z)
+//{
+//    return float3(x, y, z);
+//}
+
+inline float4 float4m(float x)
 {
-#if ROUNDMIPSDOWN
-    w = w / 2;
-    h = h / 2;
+    return float4(x);
+}
+
+inline float4 float4m(float x, float y, float z, float w)
+{
+    return float4(x, y, z, w);
+}
+//inline float4 float4m(const float3& v float w)
+//{
+//    return float4(v, w);
+//}
 
-    if (w < 1) w = 1;
-    if (h < 1) h = 1;
 #else
-    w = (w + 1) / 2;
-    h = (h + 1) / 2;
-#endif
+
+// functional ctor
+inline float4 float4m(float3 v, float w)
+{
+    return vector4(v, w);
 }
 
-namespace simd {
+inline float2 float2m(float x, float y)
+{
+    return { x, y };
+}
+inline float3 float3m(float x, float y, float z)
+{
+    return { x, y, z };
+}
+inline float4 float4m(float x, float y, float z, float w)
+{
+    return { x, y, z, w };
+}
+
+inline float2 float2m(float x)
+{
+    return float2m(x,x);
+}
+
+inline float3 float3m(float x)
+{
+    return float3m(x,x,x);
+}
+
+inline float4 float4m(float x)
+{
+    return float4m(x,x,x,x);
+}
+
+#endif
+
 inline float4 saturate(const float4& v)
 {
-    const float4 kZero = simd_make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-    const float4 kOne = simd_make_float4(1.0f, 1.0f, 1.0f, 1.0f);
+    const float4 kZero = float4m(0.0f, 0.0f, 0.0f, 0.0f);
+    const float4 kOne = float4m(1.0f, 1.0f, 1.0f, 1.0f);
     return min(max(v, kZero), kOne);
 }
 
@@ -255,7 +304,7 @@ inline float4 toFloat4(const half4& vv)
     // https://patchwork.ozlabs.org/project/gcc/patch/559BC75A.1080606@arm.com/
     // https://gcc.gnu.org/onlinedocs/gcc-7.5.0/gcc/Half-Precision.html
     // https://developer.arm.com/documentation/dui0491/i/Using-NEON-Support/Converting-vectors
-    return simd_make_float4((float)vv.x, (float)vv.y, (float)vv.z, (float)vv.w);
+    return float4m((float)vv.x, (float)vv.y, (float)vv.z, (float)vv.w);
 }
 inline half4 toHalf4(const float4& vv)
 {
@@ -311,11 +360,36 @@ inline half4 toHalf4(const float4& vv)
 }
 #endif
 
+}  // namespace simd
+
+//---------------------------------------
+
 // this just strips args
 #define macroUnusedArg(x)
 
 // this just strips args
 #define macroUnusedVar(x) (void)x
 
+// GL/D3D hobbled non-pow2 mips by only supporting round down, not round up
+// And then Metal followd OpenGL since it's the same hw and drivers.
+// Round up adds an extra mip level to the chain, but results in much better filtering.
+// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_non_power_of_two.txt
+// http://download.nvidia.com/developer/Papers/2005/NP2_Mipmapping/NP2_Mipmap_Creation.pdf
+#define ROUNDMIPSDOWN 1
+
+inline void mipDown(int32_t& w, int32_t& h)
+{
+#if ROUNDMIPSDOWN
+    w = w / 2;
+    h = h / 2;
+
+    if (w < 1) w = 1;
+    if (h < 1) h = 1;
+#else
+    w = (w + 1) / 2;
+    h = (h + 1) / 2;
+#endif
+}
+
+
 
-}  // namespace simd
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index 92c915c6..d7906f29 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -912,16 +912,13 @@ bool Image::resizeImage(int32_t wResize, int32_t hResize, bool resizePow2, Image
     return true;
 }
 
-// functional ctor
-inline float4 float4m(float x, float y, float z, float w)
-{
-    return { x, y, z, w };
-}
-
 // TODO: to hook this up, read 16u png into pixelsFlat, then gen an 8-bit normal xy
 // from that.  This is more like SDF where a single height is used.
 void Image::heightToNormals(float scale, bool isWrap)
 {
+    // see here
+    // https://developer.download.nvidia.com/CgTutorial/cg_tutorial_chapter08.html
+    
     int32_t w = _width;
     int32_t h = _height;
 
@@ -946,6 +943,28 @@ void Image::heightToNormals(float scale, bool isWrap)
         scaleX /= 255.0f;
         scaleY /= 255.0f;
     }
+ 
+    bool isSame = srcPixels8 == dstPixels8;
+    if (isFloat)
+        isSame = srcPixels == dstPixels;
+        
+    // TODO: doing this at image level doesn't support chunk conversion
+    // so this would only work for 2D images, and not atlas strips to a 2D array.
+    
+    // TODO: to copy 3 rows in cyclic buffer, if src == dst, and handle clamp/wrap
+    // by copying the first/last row.   For now disallow this.
+    // TODO: copy two rows here, then one row in the loop this would fundamentally
+    // change the algorithm, since all of these lookups assume the full srcImage
+    // really only need prev row, and previous height in row.  Larger kernel support
+    // to 3x3, 5x5, 7x7, 9x9.  This pattern is a 3x3 area with a cross
+    // where only 4 cardinal samples are used.  This bigger areas have bleed
+    // especially if this is run on a chart.  This is more for like terrain height maps.
+    
+    // this recomends generating a few maps, and blending between them
+    // https://vrayschool.com/normal-map/
+    
+    if (isSame)
+        return;
     
     for (int32_t y = 0; y < h; ++y) {
         int32_t y0 = y;
@@ -1003,8 +1022,9 @@ void Image::heightToNormals(float scale, bool isWrap)
 
                 dstPixel.x = normal.x;
                 dstPixel.y = normal.y;
-                dstPixel.z = normal.z; // can reconstruct
+                dstPixel.z = normal.z; // can reconstruct from xy
                 
+                // TODO: consider storing in z, easier to see data channel, not premul
                 // store height in alpha
                 dstPixel.w = srcPixels[y0 + x0].x;
             }
@@ -1027,12 +1047,12 @@ void Image::heightToNormals(float scale, bool isWrap)
 
                 dstPixel8.r = normal.x;
                 dstPixel8.g = normal.y;
-                dstPixel8.b = normal.z; // can reconstruct
+                dstPixel8.b = normal.z; // can reconstruct from xy
                 
+                // TODO: consider storing in z, easier to see data channel, not premul
                 // store height in alpha
                 dstPixel8.a = srcPixels8[y0 + x0].r;
             }
-                
         }
     }
 }
diff --git a/libkram/kram/KramMipper.cpp b/libkram/kram/KramMipper.cpp
index f2f4d300..6f0b6701 100644
--- a/libkram/kram/KramMipper.cpp
+++ b/libkram/kram/KramMipper.cpp
@@ -76,7 +76,7 @@ inline float srgbToLinearFunc(float s)
 float4 linearToSRGB(float4 lin) {
     lin = saturate(lin);
     
-    return simd_make_float4(
+    return float4m(
         linearToSRGBFunc(lin.x),
         linearToSRGBFunc(lin.y),
         linearToSRGBFunc(lin.z),
@@ -168,7 +168,7 @@ void Mipper::initPixelsHalfIfNeeded(ImageData& srcImage, bool doPremultiply, boo
                                     vector<half4>& halfImage) const
 {
     Color zeroColor = { 0, 0, 0, 0 };
-    float4 zeroColorf = simd_make_float4(0.0, 0.0f, 0.0f, 0.f); // need a constant for this
+    float4 zeroColorf = float4m(0.0, 0.0f, 0.0f, 0.f); // need a constant for this
     half4 zeroColorh = toHalf4(zeroColorf);
     
     int32_t w = srcImage.width;
diff --git a/libkram/kram/KramMipper.h b/libkram/kram/KramMipper.h
index 0751d440..e5439f25 100644
--- a/libkram/kram/KramMipper.h
+++ b/libkram/kram/KramMipper.h
@@ -26,13 +26,13 @@ struct Color {
 inline float4 ColorToUnormFloat4(const Color &value)
 {
     // simd lib can't ctor these even in C++, so will make abstracting harder
-    float4 c = simd_make_float4((float)value.r, (float)value.g, (float)value.b, (float)value.a);
+    float4 c = float4m((float)value.r, (float)value.g, (float)value.b, (float)value.a);
     return c / 255.0f;
 }
 
 inline float4 ColorToSnormFloat4(const Color &value)
 {
-    float4 c = simd_make_float4((float)value.r, (float)value.g, (float)value.b, (float)value.a);
+    float4 c = float4m((float)value.r, (float)value.g, (float)value.b, (float)value.a);
     return (c - float4(128.0f)) / 255.0f;
 }
 
diff --git a/libkram/kram/float4a.h b/libkram/kram/float4a.h
index 5b78fe0c..ca8111e7 100644
--- a/libkram/kram/float4a.h
+++ b/libkram/kram/float4a.h
@@ -174,7 +174,7 @@ class float4 {
     float4() {}
 
     // TODO: problem is that Apple's simd::float4(val) is val,000, simd::float4(val, 0, 0, 0) is 0 (last element?)
-    // have to go through simd_make_float4(val, val, val, val) to get 4 values
+    // have to go through float4m(val, val, val, val) to get 4 values
     // This behavior doesn't match HLSL/GLSL and is an artifact of the comma operator messing things up.
     explicit float4(float val) { reg = _mm_set1_ps(val); }  // xyzw = val
     explicit float4(tType val) { reg = val; }
diff --git a/libkram/squish/maths.h b/libkram/squish/maths.h
index a7367508..726129c7 100644
--- a/libkram/squish/maths.h
+++ b/libkram/squish/maths.h
@@ -361,7 +361,7 @@ using namespace simd;
 using Vec4 = float4;
 // default ctor for float4(1) sets 1,0,0,0 in simd, but impls like Vec4 expect float4(repeating: x)
 #define VEC4_CONST(x) Vec4(makeVec4(x,x,x,x))
-#define makeVec4(x,y,z,w) simd_make_float4(x,y,z,w)
+#define makeVec4(x,y,z,w) float4m(x,y,z,w)
 
 inline bool CompareAnyLessThan(Vec4 x, Vec4 y) { return any(x < y); }
 inline Vec4 Min(Vec4 x, Vec4 y) { return min(x, y); }

From 6250e043aeef4c564d172ee181cfd89c10e93e2f Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 28 Mar 2021 01:14:30 -0700
Subject: [PATCH 022/901] Kram - add -height, -heightScale, -wrap options

Can convert any height map to a normal with these settings.  Uses standard cross layout.
heightScale can be negative to generate recessed vs. protruding normal map.
Added in heights test case.
---
 libkram/kram/Kram.cpp            |  39 +++++++
 libkram/kram/KramConfig.h        |   7 ++
 libkram/kram/KramImage.cpp       | 144 -------------------------
 libkram/kram/KramImage.h         |   4 +-
 libkram/kram/KramImageInfo.cpp   | 174 +++++++++++++++++++++++++++++++
 libkram/kram/KramImageInfo.h     |  19 +++-
 scripts/kramTextures.py          |  26 ++++-
 tests/src/collectorbarrelh-h.png |   3 +
 8 files changed, 262 insertions(+), 154 deletions(-)
 create mode 100644 tests/src/collectorbarrelh-h.png

diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 10ce6f7a..b3a73281 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -999,6 +999,15 @@ void kramEncodeUsage(bool showVersion = true)
           "\tOnly output mips <= size px\n"
           "\n"
 
+          // tex to normal
+          "\t-height"
+          "\tConvert height.x to normal.xy\n"
+          "\t-heightScale scale"
+          "\tScale heights up down to adjust normal map\n"
+          "\t-wrap"
+          "\tWrap texture at edges (height only for now)\n"
+          "\n"
+
           "\t-srgb"
           "\tsRGB for rgb/rgba formats\n"
           "\t-signed"
@@ -1678,6 +1687,36 @@ static int32_t kramAppEncode(vector<const char*>& args)
 //            continue;
 //        }
 
+        else if (isStringEqual(word, "-heightScale")) {
+            ++i;
+            if (i >= argc) {
+                KLOGE("Kram", "heightScale arg invalid");
+                error = true;
+                break;
+            }
+            
+            infoArgs.isHeight = true;
+            infoArgs.heightScale = atof(args[i]);
+            
+            // Note: caller can negate scale, but don't allow scale 0.
+            if (infoArgs.heightScale == 0.0f) {
+                KLOGE("Kram", "heightScale arg cannot be 0");
+                error = true;
+            }
+            continue;
+        }
+        else if (isStringEqual(word, "-height")) {
+            // converted to a normal map
+            infoArgs.isHeight = true;
+            continue;
+        }
+        else if (isStringEqual(word, "-wrap")) {
+            // whether texture is clamp or wrap
+            infoArgs.isWrap = true;
+            continue;
+        }
+        
+        
         else if (isStringEqual(word, "-e") ||
                  isStringEqual(word, "-encoder")) {
             ++i;
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index e36ed0d1..257a7737 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -391,5 +391,12 @@ inline void mipDown(int32_t& w, int32_t& h)
 #endif
 }
 
+// Use this on vectors
+template<typename T>
+inline size_t vsizeof(const std::vector<T>& v)
+{
+    return sizeof(T) * v.size();
+}
+
 
 
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index d7906f29..b3758174 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -912,150 +912,6 @@ bool Image::resizeImage(int32_t wResize, int32_t hResize, bool resizePow2, Image
     return true;
 }
 
-// TODO: to hook this up, read 16u png into pixelsFlat, then gen an 8-bit normal xy
-// from that.  This is more like SDF where a single height is used.
-void Image::heightToNormals(float scale, bool isWrap)
-{
-    // see here
-    // https://developer.download.nvidia.com/CgTutorial/cg_tutorial_chapter08.html
-    
-    int32_t w = _width;
-    int32_t h = _height;
-
-    bool isWrapY = isWrap;
-    bool isWrapX = isWrap;
-
-    // 2.0 is distance betwen +1 and -1
-    // don't scale by this, want caller to be able to pass 1.0 as default scale not 2.0
-    float scaleX = scale; // / 2.0;
-    float scaleY = scale; // / 2.0;
-
-    // src/dst the same here
-    // may need to copy a row/column of pixels for wrap
-    const float4* srcPixels = _pixelsFloat.data();
-    float4* dstPixels = (float4*)_pixelsFloat.data();
-
-    const Color* srcPixels8 = (const Color*)_pixels.data();
-    Color* dstPixels8 = (Color*)_pixels.data();
-    bool isFloat = _pixels.empty();
-    
-    if (!isFloat) {
-        scaleX /= 255.0f;
-        scaleY /= 255.0f;
-    }
- 
-    bool isSame = srcPixels8 == dstPixels8;
-    if (isFloat)
-        isSame = srcPixels == dstPixels;
-        
-    // TODO: doing this at image level doesn't support chunk conversion
-    // so this would only work for 2D images, and not atlas strips to a 2D array.
-    
-    // TODO: to copy 3 rows in cyclic buffer, if src == dst, and handle clamp/wrap
-    // by copying the first/last row.   For now disallow this.
-    // TODO: copy two rows here, then one row in the loop this would fundamentally
-    // change the algorithm, since all of these lookups assume the full srcImage
-    // really only need prev row, and previous height in row.  Larger kernel support
-    // to 3x3, 5x5, 7x7, 9x9.  This pattern is a 3x3 area with a cross
-    // where only 4 cardinal samples are used.  This bigger areas have bleed
-    // especially if this is run on a chart.  This is more for like terrain height maps.
-    
-    // this recomends generating a few maps, and blending between them
-    // https://vrayschool.com/normal-map/
-    
-    if (isSame)
-        return;
-    
-    for (int32_t y = 0; y < h; ++y) {
-        int32_t y0 = y;
-        int32_t ym = y - 1;
-        int32_t yp = y + 1;
-
-        if (isWrapY) {
-            ym = (ym + h) % h;
-            yp = (yp) % h;
-        }
-        else {
-            // clamp
-            if (ym < 0) ym = 0;
-            if (yp > (h - 1)) yp = h - 1;
-        }
-
-        y0 *= w;
-        ym *= w;
-        yp *= w;
-
-        for (int32_t x = 0; x < w; ++x) {
-            int32_t x0 = x;
-            int32_t xm = x - 1;
-            int32_t xp = x + 1;
-
-            if (isWrapX) {
-                xm = (xm + w) % w;
-                xp = (xp) % w;
-            }
-            else {
-                // clamp
-                if (xm < 0) xm = 0;
-                if (xp > (w - 1)) xp = w - 1;
-            }
-
-            
-            if (isFloat) {
-                
-                // cross pattern
-                // height channel is in x
-                float cN = srcPixels[ym + x0].x;
-                float cS = srcPixels[yp + x0].x;
-                float cE = srcPixels[y0 + xp].x;
-                float cW = srcPixels[y0 + xm].x;
-
-                // up is N, so this is rhcs
-                float dx = (cE - cW) * scaleX;
-                float dy = (cN - cS) * scaleY;
-           
-                float4 normal = float4m(dx, dy, 1.0f, 0.0f);
-                normal = normalize(normal);
-
-                // write out the result
-                float4& dstPixel = dstPixels[y0 + x];
-
-                dstPixel.x = normal.x;
-                dstPixel.y = normal.y;
-                dstPixel.z = normal.z; // can reconstruct from xy
-                
-                // TODO: consider storing in z, easier to see data channel, not premul
-                // store height in alpha
-                dstPixel.w = srcPixels[y0 + x0].x;
-            }
-            else {
-                // cross pattern
-                // height channel is in x
-                uint8_t cN = srcPixels8[4 * (ym + x0)].r; // assumes first elem (.r) is height channel
-                uint8_t cS = srcPixels8[4 * (yp + x0)].r;
-                uint8_t cE = srcPixels8[4 * (y0 + xp)].r;
-                uint8_t cW = srcPixels8[4 * (y0 + xm)].r;
-
-                float dx = (cE - cW) * scaleX;
-                float dy = (cN - cS) * scaleY;
-           
-                float4 normal = float4m(dx, dy, 1.0f, 0.0f);
-                normal = normalize(normal);
-                normal *= 127.5f;
-                
-                Color& dstPixel8 = dstPixels8[y0 + x];
-
-                dstPixel8.r = normal.x;
-                dstPixel8.g = normal.y;
-                dstPixel8.b = normal.z; // can reconstruct from xy
-                
-                // TODO: consider storing in z, easier to see data channel, not premul
-                // store height in alpha
-                dstPixel8.a = srcPixels8[y0 + x0].r;
-            }
-        }
-    }
-}
 
 bool Image::encode(ImageInfo& info, KTXImage& dstImage) const
 {
diff --git a/libkram/kram/KramImage.h b/libkram/kram/KramImage.h
index c69683e9..3fc97346 100644
--- a/libkram/kram/KramImage.h
+++ b/libkram/kram/KramImage.h
@@ -85,9 +85,7 @@ class Image {
                                 const KTXImage& image, ImageData& srcImage,
                                 vector<Color>& tmpImage) const;
 
-    // convert x field to normals
-    void heightToNormals(float scale, bool isWrap = false);
-
+    
 private:
     // pixel size of image
     int32_t _width = 0;
diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp
index 2652c0c3..71b370c9 100644
--- a/libkram/kram/KramImageInfo.cpp
+++ b/libkram/kram/KramImageInfo.cpp
@@ -1017,6 +1017,13 @@ void ImageInfo::initWithArgs(const ImageInfoArgs& args)
 
     quality = args.quality;
 
+    // this is for height to normal, will convert .r to normal xy
+    isHeight = args.isHeight;
+    isWrap = args.isWrap;
+    heightScale = args.heightScale;
+    if (isHeight)
+        isNormal = true;
+    
     // Note: difference between input srgb and output srgb, but it's mingled
     // here a bit
 
@@ -1099,6 +1106,11 @@ void ImageInfo::initWithSourceImage(Image& sourceImage)
         hasColor = false;
     }
 
+    // this will only work on 2d textures, since this is all pre-chunk
+    if (isHeight) {
+        heightToNormals(w, h, srcPixelsFloat, srcPixels, heightScale, isWrap);
+    }
+    
     // this updates hasColor/hasAlpha
     if (!swizzleText.empty()) {
         // set any channels that are constant
@@ -1182,6 +1194,168 @@ void ImageInfo::initWithSourceImage(Image& sourceImage)
     }
 }
 
+
+
+// TODO: tread 16u png into pixelsFlat, then gen an 8-bit normal xy
+// from that.  This is more like SDF where a single height is used.
+
+void ImageInfo::heightToNormals(int32_t w, int32_t h,
+                            float4* srcPixels,
+                            Color* srcPixels8,
+                            float scale, bool isWrap)
+{
+    // see here
+    // https://developer.download.nvidia.com/CgTutorial/cg_tutorial_chapter08.html
+
+    // src/dst the same here
+    // may need to copy a row/column of pixels for wrap
+    float4* dstPixels = srcPixels;
+    Color* dstPixels8 = srcPixels8;
+    
+    bool isFloat = srcPixels;
+    
+    // copy the texture, or there are too many edge cases in the code below
+    vector<Color> srcDataCopy8;
+    vector<float4> srcDataCopy;
+    if (isFloat) {
+        srcDataCopy.resize(w*h);
+        memcpy(srcDataCopy.data(), srcPixels, vsizeof(srcDataCopy));
+        srcPixels = srcDataCopy.data();
+    }
+    else {
+        srcDataCopy8.resize(w*h);
+        memcpy(srcDataCopy8.data(), srcPixels8, vsizeof(srcDataCopy8));
+        srcPixels8 = srcDataCopy8.data();
+    }
+    
+    //-----------------------
+    
+    bool isWrapX = isWrap;
+    bool isWrapY = isWrap;
+    
+    // 2.0 is distance betwen +1 and -1
+    // don't scale by this, want caller to be able to pass 1.0 as default scale not 2.0
+    float scaleX = scale; // / 2.0;
+    float scaleY = scale; // / 2.0;
+
+    if (!isFloat) {
+        scaleX /= 255.0f;
+        scaleY /= 255.0f;
+    }
+    
+    // TODO: doing this at image level doesn't support chunk conversion
+    // so this would only work for 2D images, and not atlas strips to a 2D array.
+    
+    // TODO: Larger kernel support to 2x2, 3x3, 5x5, 7x7, 9x9
+    // This pattern is a 3x3 cross here only 4 cardinal samples are used.
+    // Bigger areas have bleed especially if this is run on a chart.
+    
+    // this recommends generating a few maps, and blending between them
+    // https://vrayschool.com/normal-map/
+    
+    for (int32_t y = 0; y < h; ++y) {
+        int32_t y0 = y;
+        int32_t ym = y - 1;
+        int32_t yp = y + 1;
+
+        if (isWrapY) {
+            ym = (ym + h) % h;
+            yp = (yp) % h;
+        }
+        else {
+            // clamp
+            if (ym < 0) ym = 0;
+            if (yp > (h - 1)) yp = h - 1;
+        }
+
+        y0 *= w;
+        ym *= w;
+        yp *= w;
+
+        for (int32_t x = 0; x < w; ++x) {
+            //int32_t x0 = x;
+            int32_t xm = x - 1;
+            int32_t xp = x + 1;
+
+            if (isWrapX) {
+                xm = (xm + w) % w;
+                xp = (xp) % w;
+            }
+            else {
+                // clamp
+                if (xm < 0) xm = 0;
+                if (xp > (w - 1)) xp = w - 1;
+            }
+
+            
+            if (isFloat) {
+                
+                // cross pattern
+                // height channel is in x
+                float cN = srcPixels[ym + x].x;
+                float cS = srcPixels[yp + x].x;
+                float cE = srcPixels[y0 + xp].x;
+                float cW = srcPixels[y0 + xm].x;
+
+                // up is N, so this is rhcs
+                float dx = (cE - cW) * scaleX;
+                float dy = (cN - cS) * scaleY;
+           
+                float4 normal = float4m(dx, dy, 1.0f, 0.0f);
+                normal = normalize(normal);
+                
+                // convert to unorm
+                normal = normal * 0.5 + 0.5f;
+                
+                // write out the result
+                float4& dstPixel = dstPixels[y0 + x];
+
+                dstPixel.x = normal.x;
+                dstPixel.y = normal.y;
+                
+                // TODO: consider storing in z, easier to see data channel, not premul
+                // store height in alpha.  Let caller provide the swizzle xyzh01
+                //dstPixel.z = normal.z; // can reconstruct from xy
+                //dstPixel.w = srcPixels[y0 + x0].x;
+                
+                dstPixel.z = srcPixels[y0 + x].z;
+                dstPixel.w = srcPixels[y0 + x].w;
+            }
+            else {
+                // cross pattern
+                // height channel is in x
+                uint8_t cN = srcPixels8[ym + x].r; // assumes first elem (.r) is height channel
+                uint8_t cS = srcPixels8[yp + x].r;
+                uint8_t cE = srcPixels8[y0 + xp].r;
+                uint8_t cW = srcPixels8[y0 + xm].r;
+
+                float dx = (cE - cW) * scaleX;
+                float dy = (cN - cS) * scaleY;
+           
+                float4 normal = float4m(dx, dy, 1.0f, 0.0f);
+                normal = normalize(normal);
+                
+                // convert to unorm
+                normal = normal * 127 + 128.0f;
+                
+                Color& dstPixel8 = dstPixels8[y0 + x];
+
+                dstPixel8.r = normal.x;
+                dstPixel8.g = normal.y;
+                
+                // TODO: consider storing height in z, easier to see data channel, not premul
+                // store height in alpha.  Let caller provide the swizzle xyzh01
+                //dstPixel8.b = normal.z; // can reconstruct from xy
+                //dstPixel8.a = srcPixels8[y0 + x0].r;
+                
+                dstPixel8.b = srcPixels8[y0 + x].b;
+                dstPixel8.a = srcPixels8[y0 + x].a;
+            }
+        }
+    }
+}
+
+
 const char* encoderName(TexEncoder encoder)
 {
     switch(encoder) {
diff --git a/libkram/kram/KramImageInfo.h b/libkram/kram/KramImageInfo.h
index 0d9b2ae5..d4c9f862 100644
--- a/libkram/kram/KramImageInfo.h
+++ b/libkram/kram/KramImageInfo.h
@@ -58,7 +58,7 @@ class ImageInfoArgs {
     bool isPremultiplied = false;
     bool isPrezero = false;
     bool isNormal = false;  // signed, but may be stored unorm and swizzled (f.e. astc/bc3nm gggr or rrrg)
-
+    
     // can pick a smaller format if alpha = 1 (only for bc and etc)
     bool optimizeFormatForOpaque = false;
 
@@ -68,6 +68,11 @@ class ImageInfoArgs {
     bool isSRGB = false;
     bool isHDR = false;
 
+    // for now these are only usable with normal to height
+    bool isHeight = false;
+    bool isWrap = false;
+    float heightScale = 1.0f;
+    
     string swizzleText;
     string averageChannels;
     
@@ -91,7 +96,12 @@ class ImageInfo {
                                   const char* swizzleText);
     static void swizzleTextureLDR(int32_t w, int32_t h, Color* srcPixels_,
                                   const char* swizzleText);
-
+    
+    // convert x field to normals
+    static void heightToNormals(int32_t w, int32_t h,
+                                float4* srcPixelsFloat_,
+                                Color* srcPixels_,
+                                float scale, bool isWrap = false);
 private:
     // this walks pixels for hasColor and hasAlpha if not already set to false
     void updateImageTraitsHDR(int32_t w, int32_t h,
@@ -145,6 +155,11 @@ class ImageInfo {
     bool useEtcenc = false;
     bool useExplicit = false;
 
+    // for now these are only usable with normal to height
+    bool isHeight = false;
+    bool isWrap = false;
+    float heightScale = 1.0f;
+    
     int32_t quality = 49;
 
     int32_t mipMinSize = 1;
diff --git a/scripts/kramTextures.py b/scripts/kramTextures.py
index c92e60c6..e4a52c55 100755
--- a/scripts/kramTextures.py
+++ b/scripts/kramTextures.py
@@ -31,6 +31,7 @@ class TextureContent(Enum):
 	SDF = 3
 	MetalRoughness = 4
 	Mask = 5
+	Height = 6
 
 class TextureType(Enum):
 	Unknown = 0
@@ -103,6 +104,8 @@ def textureContent(self, name):
 			content = TextureContent.Albedo
 		elif name.endswith("-n") or name.endswith("-normal"):
 			content = TextureContent.Normal
+		elif name.endswith("-h") or name.endswith("-height"):
+			content = TextureContent.Height
 
 		return content
 
@@ -167,7 +170,14 @@ def processTextureKram(self, srcPath, dstDir, srcModstamp):
 
 		# this only exports to ktx, post process will convert to ktx2
 		ext = ".ktx"
-		dstName = srcFilename + ext
+		dstName = srcFilename 
+
+		# replace -h with -n, since it will be converted to a normal
+		if dstName.endswith("-h"):
+			dstName = dstName.replace("-h", "-n")
+
+		dstName += ext
+
 		dstFile = dstDir + dstName
 
 		# check the modstamp of src vs. dst output, form same name at dstPath, and check os.stat() on that
@@ -423,7 +433,8 @@ def processTextures(platform, container, verbose, quality, jobs, force, script,
 	fmtSDF = ""
 	fmtMetalRoughness = ""
 	fmtMask = ""
-	
+	fmtHeight = ""
+
 	# note 1/2/2nm in astc need swizzles to store more efficiently
 	# and at 4x4 aren't any smaller than explicit values
 	# prefer etc since it's smaller and higher bit depth (11-bits)
@@ -438,6 +449,7 @@ def processTextures(platform, container, verbose, quality, jobs, force, script,
 		fmtMetalRoughness = " -f etc2rg"
 		fmtMask = " -f etc2r"
 		fmtSDF = " -f etc2r -signed -sdf"
+		fmtHeight = fmtNormal + " -height -heightScale 2 -wrap"
 
 	elif platform == "android":
 		fmtAlbedo = " -f etc2rgba -srgb -premul -optopaque" # or astc
@@ -445,7 +457,8 @@ def processTextures(platform, container, verbose, quality, jobs, force, script,
 		fmtMetalRoughness = " -f etc2rg"
 		fmtMask = " -f etc2r"
 		fmtSDF = " -f etc2r -signed -sdf"
-		
+		fmtHeight = fmtNormal + " -height -heightScale 2 -wrap"
+
 	elif platform == "mac":
 		# bc1 on Toof has purple, green, yellow artifacts with bc7enc, and has more banding
 		# and a lot of weird blocky artifacts, look into bc1 encoder.  
@@ -455,7 +468,8 @@ def processTextures(platform, container, verbose, quality, jobs, force, script,
 		fmtMetalRoughness = " -f bc5"
 		fmtMask = " -f bc4"
 		fmtSDF = " -f bc4 -signed -sdf"
-		
+		fmtHeight = fmtNormal + " -height -heightScale 2 -wrap"
+
 	elif platform == "win":
 		# bc1 on Toof has purple, green, yellow artifacts with bc7enc, and has more banding
 		# and a lot of weird blocky artifacts, look into bc1 encoder
@@ -464,6 +478,7 @@ def processTextures(platform, container, verbose, quality, jobs, force, script,
 		fmtMetalRoughness = " -f bc5"
 		fmtMask = " -f bc4"
 		fmtSDF = " -f bc4 -signed -sdf"
+		fmtHeight = fmtNormal + " -height -heightScale 2 -wrap"
 
 	elif platform == "any":
 		# output to s/rgba8u, then run through ktxsc to go to BasisLZ
@@ -473,6 +488,7 @@ def processTextures(platform, container, verbose, quality, jobs, force, script,
 		fmtMetalRoughness = " -f rgba8 -swizzle r001"
 		fmtMask = " -f rgba8 -swizzle r001"
 		fmtSDF = " -f rgba8 -swizzle r001 -sdf"
+		fmtHeight = fmtNormal + " -height -heightScale 2 -wrap"
 
 	else:
 		return 1
@@ -492,7 +508,7 @@ def processTextures(platform, container, verbose, quality, jobs, force, script,
 	if verbose:
 		moreArgs += " -v"
 		
-	formats = [fmtUnknown, fmtAlbedo, fmtNormal, fmtSDF, fmtMetalRoughness, fmtMask]
+	formats = [fmtUnknown, fmtAlbedo, fmtNormal, fmtSDF, fmtMetalRoughness, fmtMask, fmtHeight]
 
 	formats = [fmt + moreArgs for fmt in formats]
 	
diff --git a/tests/src/collectorbarrelh-h.png b/tests/src/collectorbarrelh-h.png
new file mode 100644
index 00000000..fdaabf09
--- /dev/null
+++ b/tests/src/collectorbarrelh-h.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b71f2ac9ab675fa807f7757e39019b780b1d67638bb53c27980d29d9e5c6da76
+size 35466

From 36bce4d2ab1009462d7d12df6cc4d4ff356d5404 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 28 Mar 2021 01:50:34 -0700
Subject: [PATCH 023/901] kramv - fix display of signed data (f.e. normals)
 that have been decoded to unorm.

---
 kramv/KramRenderer.mm          |  1 +
 kramv/KramViewerBase.h         |  1 +
 kramv/KramViewerMain.mm        | 13 ++++++++-----
 libkram/kram/KramImageInfo.cpp |  2 +-
 4 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 3b3b74e7..c4632b03 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -462,6 +462,7 @@ - (BOOL)loadTexture:(nonnull NSURL *)url
         _showSettings->imageInfo = kramInfoToString(fullFilename, isVerbose);
         
         _showSettings->originalFormat = (MyMTLPixelFormat)originalFormatMTL;
+        _showSettings->decodedFormat = (MyMTLPixelFormat)texture.pixelFormat;
         
         _showSettings->lastFilename = fullFilename;
         _showSettings->lastTimestamp = timestamp;
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index 62e9612d..d1766924 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -142,6 +142,7 @@ class ShowSettings {
     
     // format before any transcode to supported formats
     MyMTLPixelFormat originalFormat;
+    MyMTLPixelFormat decodedFormat;
     
     void advanceDebugMode(bool isShiftKeyDown);
     
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index af562df3..ead6f91b 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -746,12 +746,14 @@ - (void)updateEyedropper {
         bool isNormal = _showSettings->isNormal;
         bool isFloat = isHdr;
         
+        bool isDecodeSigned = isSignedFormat(_showSettings->decodedFormat);
+        
         if (isNormal) {
             float nx = c.x;
             float ny = c.y;
             
             // unorm -> snorm
-            if (!isSigned) {
+            if (!isDecodeSigned) {
                 nx = nx * 2.0f - 1.0f;
                 ny = ny * 2.0f - 1.0f;
             }
@@ -762,23 +764,24 @@ - (void)updateEyedropper {
             
             // print the underlying color (some nmaps are xy in 4 channels)
             string tmp;
-            printChannels(tmp, "ln: ", c, numChannels, isFloat, isSigned);
+            printChannels(tmp, "ln: ", c, numChannels, isFloat, isDecodeSigned);
             text += tmp;
             
             // print direction
             float4 d = float4m(nx,ny,nz,0.0f);
             isFloat = true;
-            printChannels(tmp, "dr: ", d, 3, isFloat, isSigned);
+            isDecodeSigned = true;
+            printChannels(tmp, "dr: ", d, 3, isFloat, isDecodeSigned);
             text += tmp;
         }
         else {
             // DONE: write some print helpers based on float4 and length
             string tmp;
-            printChannels(tmp, "ln: ", c, numChannels, isFloat, isSigned);
+            printChannels(tmp, "ln: ", c, numChannels, isFloat, isDecodeSigned);
             text += tmp;
             
             if (isSrgb) {
-                printChannels(tmp, "sr: ", s, numChannels, isFloat, isSigned);
+                printChannels(tmp, "sr: ", s, numChannels, isFloat, isDecodeSigned);
                 text += tmp;
             }
         }
diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp
index 71b370c9..f0c87451 100644
--- a/libkram/kram/KramImageInfo.cpp
+++ b/libkram/kram/KramImageInfo.cpp
@@ -1336,7 +1336,7 @@ void ImageInfo::heightToNormals(int32_t w, int32_t h,
                 normal = normalize(normal);
                 
                 // convert to unorm
-                normal = normal * 127 + 128.0f;
+                normal = normal * 127.0f + 128.0f;
                 
                 Color& dstPixel8 = dstPixels8[y0 + x];
 

From dc1f924e7042b9ce1bceab6f2fe2fd4f4b6cef23 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 28 Mar 2021 10:34:22 -0700
Subject: [PATCH 024/901] Kramv - fix signed eyedropper conversion, convert
 drag-drop url to file path for reload

---
 kramv/KramViewerMain.mm        | 41 ++++++++++++++++++++++++++--------
 libkram/kram/KramConfig.h      | 18 ++++++++++-----
 libkram/kram/KramImageInfo.cpp |  3 +++
 scripts/kramTextures.py        | 16 ++++++++-----
 4 files changed, 58 insertions(+), 20 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index ead6f91b..075e24ed 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -697,8 +697,6 @@ - (void)updateEyedropper {
         // this will always be a linear color
         float4 c = _showSettings->textureResult;
         
-        // this saturates the value, so don't use for extended srgb
-        float4 s = linearToSRGB(c);
         
         int32_t x = _showSettings->textureResultX;
         int32_t y = _showSettings->textureResultY;
@@ -747,13 +745,19 @@ - (void)updateEyedropper {
         bool isFloat = isHdr;
         
         bool isDecodeSigned = isSignedFormat(_showSettings->decodedFormat);
+        if (isSigned && !isDecodeSigned) {
+            c.x = c.x * 2.0f - 1.0f;
+            c.y = c.y * 2.0f - 1.0f;
+            c.z = c.y * 2.0f - 1.0f;
+            c.w = c.y * 2.0f - 1.0f;
+        }
         
         if (isNormal) {
             float nx = c.x;
             float ny = c.y;
             
             // unorm -> snorm
-            if (!isDecodeSigned) {
+            if (!isSigned) {
                 nx = nx * 2.0f - 1.0f;
                 ny = ny * 2.0f - 1.0f;
             }
@@ -764,24 +768,27 @@ - (void)updateEyedropper {
             
             // print the underlying color (some nmaps are xy in 4 channels)
             string tmp;
-            printChannels(tmp, "ln: ", c, numChannels, isFloat, isDecodeSigned);
+            printChannels(tmp, "ln: ", c, numChannels, isFloat, isSigned);
             text += tmp;
             
             // print direction
             float4 d = float4m(nx,ny,nz,0.0f);
             isFloat = true;
-            isDecodeSigned = true;
-            printChannels(tmp, "dr: ", d, 3, isFloat, isDecodeSigned);
+            isSigned = true;
+            printChannels(tmp, "dr: ", d, 3, isFloat, isSigned);
             text += tmp;
         }
         else {
             // DONE: write some print helpers based on float4 and length
             string tmp;
-            printChannels(tmp, "ln: ", c, numChannels, isFloat, isDecodeSigned);
+            printChannels(tmp, "ln: ", c, numChannels, isFloat, isSigned);
             text += tmp;
             
             if (isSrgb) {
-                printChannels(tmp, "sr: ", s, numChannels, isFloat, isDecodeSigned);
+                // this saturates the value, so don't use for extended srgb
+                float4 s = linearToSRGB(c);
+                
+                printChannels(tmp, "sr: ", s, numChannels, isFloat, isSigned);
                 text += tmp;
             }
         }
@@ -1265,6 +1272,13 @@ - (BOOL)performDragOperation:(id)sender {
         // this turns it into a real path (supposedly works even with sandbox)
         NSURL * url = [NSURL URLWithString:urlString];
         
+        // convert the original path and then back to a url, otherwise reload fails
+        // when this file is replaced.
+        const char* filename = url.fileSystemRepresentation;
+        NSString* filenameString = [NSString stringWithUTF8String:filename];
+        
+        url = [NSURL fileURLWithPath:filenameString];
+        
         if ([self loadTextureFromURL:url]) {
             return YES;
         }
@@ -1368,7 +1382,16 @@ - (BOOL)loadTextureFromURL:(NSURL*)url {
     //NSLog(@"LoadTexture");
     
     const char* filename = url.fileSystemRepresentation;
-
+    
+    // Getting a url that returns nil on reload, probably some security thing
+    // consider storing a path instead of a url.  Probably when file is replaced
+    // the saved image url no longer points to a valid filename.
+    if (filename == nullptr)
+    {
+        KLOGE("kramv", "Fix this url returning nil issue");
+        return NO;
+    }
+    
     if (endsWithExtension(filename, ".zip")) {
         if (!self.imageURL || ![self.imageURL isEqualTo:url]) {
             BOOL isArchiveLoaded = [self loadArchive:filename];
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index 257a7737..3c344385 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -370,28 +370,36 @@ inline half4 toHalf4(const float4& vv)
 // this just strips args
 #define macroUnusedVar(x) (void)x
 
-// GL/D3D hobbled non-pow2 mips by only supporting round down, not round up
-// And then Metal followd OpenGL since it's the same hw and drivers.
-// Round up adds an extra mip level to the chain, but results in much better filtering.
-// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_non_power_of_two.txt
-// http://download.nvidia.com/developer/Papers/2005/NP2_Mipmapping/NP2_Mipmap_Creation.pdf
+
+//---------------------------------------
+
 #define ROUNDMIPSDOWN 1
 
 inline void mipDown(int32_t& w, int32_t& h)
 {
+    // GL/D3D hobbled non-pow2 mips by only supporting round down, not round up
+    // And then Metal followd OpenGL since it's the same hw and drivers.
+    // Round up adds an extra mip level to the chain, but results in much better filtering.
+    // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_non_power_of_two.txt
+    // http://download.nvidia.com/developer/Papers/2005/NP2_Mipmapping/NP2_Mipmap_Creation.pdf
+    
 #if ROUNDMIPSDOWN
+    // round-down
     w = w / 2;
     h = h / 2;
 
     if (w < 1) w = 1;
     if (h < 1) h = 1;
 #else
+    // round-up
     w = (w + 1) / 2;
     h = (h + 1) / 2;
 #endif
 }
 
 // Use this on vectors
+#include <vector>
+
 template<typename T>
 inline size_t vsizeof(const std::vector<T>& v)
 {
diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp
index f0c87451..2cab1300 100644
--- a/libkram/kram/KramImageInfo.cpp
+++ b/libkram/kram/KramImageInfo.cpp
@@ -1332,6 +1332,9 @@ void ImageInfo::heightToNormals(int32_t w, int32_t h,
                 float dx = (cE - cW) * scaleX;
                 float dy = (cN - cS) * scaleY;
            
+                dx = -dx;
+                dy = -dy;
+                
                 float4 normal = float4m(dx, dy, 1.0f, 0.0f);
                 normal = normalize(normal);
                 
diff --git a/scripts/kramTextures.py b/scripts/kramTextures.py
index e4a52c55..183644c0 100755
--- a/scripts/kramTextures.py
+++ b/scripts/kramTextures.py
@@ -440,7 +440,11 @@ def processTextures(platform, container, verbose, quality, jobs, force, script,
 	# prefer etc since it's smaller and higher bit depth (11-bits)
 
 	# note sdf and signed data will look odd in Preview.  It's not
-	# really setup for signed data.
+	# really setup for signed data.  
+
+	# heightScale and wrap is really a per texture setting, but don't have 
+	# support for that yet.
+	fmtHeightArgs = " -height -heightScale 4 -wrap"
 
 	if platform == "ios":
 		# use astc since has more quality settings
@@ -449,7 +453,7 @@ def processTextures(platform, container, verbose, quality, jobs, force, script,
 		fmtMetalRoughness = " -f etc2rg"
 		fmtMask = " -f etc2r"
 		fmtSDF = " -f etc2r -signed -sdf"
-		fmtHeight = fmtNormal + " -height -heightScale 2 -wrap"
+		fmtHeight = fmtNormal + fmtHeightArgs
 
 	elif platform == "android":
 		fmtAlbedo = " -f etc2rgba -srgb -premul -optopaque" # or astc
@@ -457,7 +461,7 @@ def processTextures(platform, container, verbose, quality, jobs, force, script,
 		fmtMetalRoughness = " -f etc2rg"
 		fmtMask = " -f etc2r"
 		fmtSDF = " -f etc2r -signed -sdf"
-		fmtHeight = fmtNormal + " -height -heightScale 2 -wrap"
+		fmtHeight = fmtNormal + fmtHeightArgs
 
 	elif platform == "mac":
 		# bc1 on Toof has purple, green, yellow artifacts with bc7enc, and has more banding
@@ -468,7 +472,7 @@ def processTextures(platform, container, verbose, quality, jobs, force, script,
 		fmtMetalRoughness = " -f bc5"
 		fmtMask = " -f bc4"
 		fmtSDF = " -f bc4 -signed -sdf"
-		fmtHeight = fmtNormal + " -height -heightScale 2 -wrap"
+		fmtHeight = fmtNormal + fmtHeightArgs
 
 	elif platform == "win":
 		# bc1 on Toof has purple, green, yellow artifacts with bc7enc, and has more banding
@@ -478,7 +482,7 @@ def processTextures(platform, container, verbose, quality, jobs, force, script,
 		fmtMetalRoughness = " -f bc5"
 		fmtMask = " -f bc4"
 		fmtSDF = " -f bc4 -signed -sdf"
-		fmtHeight = fmtNormal + " -height -heightScale 2 -wrap"
+		fmtHeight = fmtNormal + fmtHeightArgs
 
 	elif platform == "any":
 		# output to s/rgba8u, then run through ktxsc to go to BasisLZ
@@ -488,7 +492,7 @@ def processTextures(platform, container, verbose, quality, jobs, force, script,
 		fmtMetalRoughness = " -f rgba8 -swizzle r001"
 		fmtMask = " -f rgba8 -swizzle r001"
 		fmtSDF = " -f rgba8 -swizzle r001 -sdf"
-		fmtHeight = fmtNormal + " -height -heightScale 2 -wrap"
+		fmtHeight = fmtNormal + fmtHeightArgs
 
 	else:
 		return 1

From 9a2577c9b3f6141b97ec78e45b8f0ff4ab17ce0e Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 28 Mar 2021 16:53:16 -0700
Subject: [PATCH 025/901] kramv - fix the "Open" and "OpenRecent" menu to work,
 allow ktx2 support

The UTI's needed to point to an NSDocument for "Open" to not be disabled.

Define a document, and then re-route the load to the view or else the call crashes on readFromData unimplemented in the default NSDocument.

Also define an Export Type Identifier or KTX2 was not highlighted in the NSOpenPanel file listings.

"New" is still now enabled, since I set the app up as an Editor for "KTX2" type.  If they're all viewers, then no "New" option is provided.  Just want that to provide empty document windows.
---
 kramv/Info.plist        |  32 +++++++++++-
 kramv/KramViewerMain.mm | 105 ++++++++++++++++++++++++++++++++++------
 2 files changed, 122 insertions(+), 15 deletions(-)

diff --git a/kramv/Info.plist b/kramv/Info.plist
index 11f0adb3..c3a1e263 100644
--- a/kramv/Info.plist
+++ b/kramv/Info.plist
@@ -19,6 +19,8 @@
 			<array>
 				<string>org.khronos.ktx</string>
 			</array>
+			<key>NSDocumentClass</key>
+			<string>KramDocument</string>
 		</dict>
 		<dict>
 			<key>CFBundleTypeIconSystemGenerated</key>
@@ -33,6 +35,8 @@
 			<array>
 				<string>public.png</string>
 			</array>
+			<key>NSDocumentClass</key>
+			<string>KramDocument</string>
 		</dict>
 		<dict>
 			<key>CFBundleTypeIconSystemGenerated</key>
@@ -40,13 +44,15 @@
 			<key>CFBundleTypeName</key>
 			<string>KTX2</string>
 			<key>CFBundleTypeRole</key>
-			<string>Viewer</string>
+			<string>Editor</string>
 			<key>LSHandlerRank</key>
 			<string>Default</string>
 			<key>LSItemContentTypes</key>
 			<array>
 				<string>public.ktx2</string>
 			</array>
+			<key>NSDocumentClass</key>
+			<string>KramDocument</string>
 		</dict>
 		<dict>
 			<key>CFBundleTypeIconSystemGenerated</key>
@@ -61,6 +67,8 @@
 			<array>
 				<string>public.zip-archive</string>
 			</array>
+			<key>NSDocumentClass</key>
+			<string>KramDocument</string>
 		</dict>
 	</array>
 	<key>CFBundleExecutable</key>
@@ -85,5 +93,27 @@
 	<string>Main</string>
 	<key>NSPrincipalClass</key>
 	<string>NSApplication</string>
+	<key>UTExportedTypeDeclarations</key>
+	<array>
+		<dict>
+			<key>UTTypeConformsTo</key>
+			<array>
+				<string>public.image</string>
+			</array>
+			<key>UTTypeDescription</key>
+			<string>KTX2</string>
+			<key>UTTypeIcons</key>
+			<dict/>
+			<key>UTTypeIdentifier</key>
+			<string>public.ktx2</string>
+			<key>UTTypeTagSpecification</key>
+			<dict>
+				<key>public.filename-extension</key>
+				<array>
+					<string>ktx2</string>
+				</array>
+			</dict>
+		</dict>
+	</array>
 </dict>
 </plist>
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 075e24ed..03d128cd 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -48,6 +48,64 @@ - (BOOL)loadTextureFromURL:(NSURL*)url;
 
 @end
 
+//-------------
+
+@interface KramDocument : NSDocument
+
+@end
+
+
+@interface KramDocument ()
+
+@end
+
+@implementation KramDocument
+
+- (instancetype)init {
+    self = [super init];
+    if (self) {
+        // Add your subclass-specific initialization here.
+    }
+    return self;
+}
+
++ (BOOL)autosavesInPlace {
+    return NO; // YES;
+}
+
+// call when "new" called
+- (void)makeWindowControllers {
+    // Override to return the Storyboard file name of the document.
+    //NSStoryboard* storyboard = [NSStoryboard storyboardWithName:@"Main" bundle:nil];
+    //NSWindowController* controller = [storyboard instantiateControllerWithIdentifier:@"Document Window Controller"];
+    //[self addWindowController:controller];
+}
+
+
+- (NSData *)dataOfType:(NSString *)typeName error:(NSError **)outError {
+    // Insert code here to write your document to data of the specified type. If outError != NULL, ensure that you create and set an appropriate error if you return nil.
+    // Alternatively, you could remove this method and override -fileWrapperOfType:error:, -writeToURL:ofType:error:, or -writeToURL:ofType:forSaveOperation:originalContentsURL:error: instead.
+    [NSException raise:@"UnimplementedMethod" format:@"%@ is unimplemented", NSStringFromSelector(_cmd)];
+    return nil;
+}
+
+
+- (BOOL)readFromURL:(NSURL *)url ofType:(NSString *)typeName error:(NSError **)outError {
+    
+#if 0
+    MyMTKView* view = self.windowControllers.firstObject.window.contentView;
+    return [view loadTextureFromURL:url];
+#else
+    NSApplication* app = [NSApplication sharedApplication];
+    MyMTKView* view = app.mainWindow.contentView;
+    return [view loadTextureFromURL:url];
+#endif
+}
+
+
+@end
+
+
 
 //-------------
 
@@ -79,6 +137,7 @@ - (BOOL)applicationShouldTerminateAfterLastWindowClosed:(NSApplication *)sender
     return YES;
 }
 
+#if 1
 - (void)application:(NSApplication *)sender openURLs:(nonnull NSArray<NSURL *> *)urls
 {
     // see if this is called
@@ -90,7 +149,19 @@ - (void)application:(NSApplication *)sender openURLs:(nonnull NSArray<NSURL *> *
     NSURL *url = urls.firstObject;
     [view loadTextureFromURL:url];
 }
-
+#else
+- (BOOL)application:(NSApplication *)sender openFile:(nonnull NSString*)filename
+{
+    // see if this is called
+    //NSLog(@"OpenURLs");
+    
+    // this is called from "Open In...", and also from OpenRecent documents menu
+    MyMTKView* view = sender.mainWindow.contentView;
+    
+    NSURL *url = [NSURL URLWithString:filename];
+    return [view loadTextureFromURL:url];
+}
+#endif
 
 - (IBAction)showAboutDialog:(id)sender {
     // calls openDocumentWithContentsOfURL above
@@ -1226,9 +1297,7 @@ - (void)keyDown:(NSEvent *)theEvent
     }
 }
 
-- (BOOL)acceptsFirstResponder {
-    return YES;
-}
+
 
 // Note: docs state that drag&drop should be handled automatically by UTI setup via openURLs
 // but I find these calls are needed, or it doesn't work.  Maybe need to register for NSRUL
@@ -1275,6 +1344,12 @@ - (BOOL)performDragOperation:(id)sender {
         // convert the original path and then back to a url, otherwise reload fails
         // when this file is replaced.
         const char* filename = url.fileSystemRepresentation;
+        if (filename == nullptr)
+        {
+            KLOGE("kramv", "Fix this drop url returning nil issue");
+            return NO;
+        }
+        
         NSString* filenameString = [NSString stringWithUTF8String:filename];
         
         url = [NSURL fileURLWithPath:filenameString];
@@ -1382,13 +1457,11 @@ - (BOOL)loadTextureFromURL:(NSURL*)url {
     //NSLog(@"LoadTexture");
     
     const char* filename = url.fileSystemRepresentation;
-    
-    // Getting a url that returns nil on reload, probably some security thing
-    // consider storing a path instead of a url.  Probably when file is replaced
-    // the saved image url no longer points to a valid filename.
     if (filename == nullptr)
     {
-        KLOGE("kramv", "Fix this url returning nil issue");
+        // Fixed by converting dropped urls into paths then back to a url.
+        // When file replaced the drop url is no longer valid.
+        KLOGE("kramv", "Fix this load url returning nil issue");
         return NO;
     }
     
@@ -1456,10 +1529,10 @@ - (void)concludeDragOperation:(id)sender {
 
 
-// this doesn't seem to enable New/Open File menu items, but it should
+// this doesn't seem to enable New.  Was able to get "Open" to highlight by setting NSDocument as class for doc types.
 // https://developer.apple.com/library/archive/documentation/Cocoa/Conceptual/EventOverview/EventArchitecture/EventArchitecture.html
 #if 0
-
+/*
 // "New"" calls this
 - (__kindof NSDocument *)openUntitledDocumentAndDisplay:(BOOL)displayDocument
                                                   error:(NSError * _Nullable *)outError
@@ -1484,8 +1557,15 @@ - (IBAction)openDocument {
 - (IBAction)newDocument {
     // calls openUntitledDocumentAndDisplay above
 }
+*/
 #endif
 
+
+- (BOOL)acceptsFirstResponder {
+    return YES;
+}
+
+
 @end
 
 //-------------
@@ -1535,9 +1615,6 @@ - (void)viewDidLoad
 
 }
 
-
-
-
 @end
 
 
From 2cb3a9dd1a7a570cf65c6dd566e976c8013d7079 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 7 Apr 2021 14:44:17 -0700
Subject: [PATCH 026/901] kram - fix main to return 1 on failures, KPS
 CFBundlePackageType set to 8BIF

KPS plugin shows up in PS but "fails to find file" on open/save.
---
 kram/KramMain.cpp         | 9 ++++++++-
 plugin/kps/mac/Info.plist | 2 +-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/kram/KramMain.cpp b/kram/KramMain.cpp
index dc4ef8bb..9ddbdcde 100644
--- a/kram/KramMain.cpp
+++ b/kram/KramMain.cpp
@@ -2,5 +2,12 @@
 
 int main(int argc, char* argv[])
 {
-    return kram::kramAppMain(argc, argv);
+    int errorCode = kram::kramAppMain(argc, argv);
+    
+    // returning -1 from main results in exit code of 255, so fix this to return 1 on failure.
+    if (errorCode != 0) {
+        exit(1);
+    }
+    
+    return 0;
 }
diff --git a/plugin/kps/mac/Info.plist b/plugin/kps/mac/Info.plist
index 0537f0ac..f39d6d81 100755
--- a/plugin/kps/mac/Info.plist
+++ b/plugin/kps/mac/Info.plist
@@ -13,7 +13,7 @@
 	<key>CFBundleName</key>
 	<string>$(PRODUCT_NAME)</string>
 	<key>CFBundlePackageType</key>
-	<string>$(PLUGIN_TYPE)</string>
+	<string>8BIF</string>
 	<key>CFBundleSignature</key>
 	<string>8BIM</string>
 </dict>

From e840b55d6a10842bd235196b0431d939f444e634 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 8 May 2021 13:14:19 -0700
Subject: [PATCH 027/901] Kramv - switch to 16f,

RGBA16f backbuffer avoids the srgb curve, and stores the linear color and normal values.  Otherwise linear values like normals were getting munged writing them.  The display pipeline will still convert these to srgb, but the compute shader can sample from these and get unsnapped linear values.

Display the premul values in eyedropper if alpha is present.  This might be too many values, but keep for now.

Fix signed conversion.

Add helpers to shader, and handle 8-bit unorm to snorm conversions.
Better normal reconstruct that avoids non-unit vecs, and n.z = 0 case.
Will add debug mode to flag these pixels.  n.xy = 00, 10, 01, 11, etc are all invalid, and the sat isn't enough.
---
 kramv/KramRenderer.mm       |   6 +-
 kramv/KramShaders.metal     | 106 +++++++++++++++++++++++++++++++-----
 kramv/KramViewerMain.mm     |  46 ++++++++++------
 libkram/kram/KramMipper.cpp |   2 +-
 4 files changed, 128 insertions(+), 32 deletions(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index c4632b03..c311a6e2 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -144,7 +144,11 @@ - (void)_loadMetalWithView:(nonnull MTKView *)view
     /// Load Metal state objects and initialize renderer dependent view properties
 
     view.depthStencilPixelFormat = MTLPixelFormatDepth32Float_Stencil8;
-    view.colorPixelFormat = MTLPixelFormatBGRA8Unorm_sRGB; // TODO: adjust this to draw srgb or not, prefer RGBA
+    //view.colorPixelFormat = MTLPixelFormatBGRA8Unorm_sRGB; // TODO: adjust this to draw srgb or not, prefer RGBA
+    
+    // have a mix of linear color and normals, don't want srgb conversion until displayed
+    view.colorPixelFormat = MTLPixelFormatRGBA16Float;
+    
     view.sampleCount = 1;
 
     _mtlVertexDescriptor = [[MTLVertexDescriptor alloc] init];
diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index 97e76179..0b128133 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -129,6 +129,86 @@ vertex ColorInOut DrawVolumeVS(
     return out;
 }
 
+float toUnorm8(float c)
+{
+    return (127.0 / 255.0) * c + (128 / 255.0);
+}
+float2 toUnorm8(float2 c)
+{
+    return (127.0 / 255.0) * c + float2(128 / 255.0);
+}
+float3 toUnorm8(float3 c)
+{
+    return (127.0 / 255.0) * c + float3(128 / 255.0);
+}
+
+float toUnorm(float c)
+{
+    return 0.5 * c + 0.5;
+}
+float2 toUnorm(float2 c)
+{
+    return 0.5 * c + 0.5;
+}
+float3 toUnorm(float3 c)
+{
+    return 0.5 * c + 0.5;
+}
+
+float toSnorm8(float c)
+{
+    return (255.0 / 127.0) * c - (128 / 127.0);
+}
+
+float2 toSnorm8(float2 c)
+{
+    return (255.0 / 127.0) * c - float2(128 / 127.0);
+}
+
+float3 toSnorm8(float3 c)
+{
+    return (255.0 / 127.0) * c - float3(128 / 127.0);
+}
+
+float recip(float c)
+{
+    return 1.0 / c;
+}
+float2 recip(float2 c)
+{
+    return 1.0 / c;
+}
+float3 recip(float3 c)
+{
+    return 1.0 / c;
+}
+float4 recip(float4 c)
+{
+    return 1.0 / c;
+}
+
+
+// scale and reconstruct normal
+float3 toNormal(float3 n)
+{
+    // make sure the normal doesn't exceed the unit circle
+    // many reconstructs skip and get a non-unit or z=0 normal
+    // might make optional or flag pixel with a debug mode that exeed
+    float len = length_squared(n.xy);
+    if (len > 0.99 * 0.99)
+    {
+        len *= 1.001; // so we have a non-zero z component below
+        n.xy *= rsqrt(len);
+    }
+
+    // make sure always have non-zero z, or get Nan after it knocks out N of TBN
+    // since that's often pointing purely in 001 direction.
+    len = min(0.999, len);
+    n.z = sqrt(1 - len);
+    return n;
+}
+
+
 float4 DrawPixels(
     ColorInOut in [[stage_in]],
     constant Uniforms& uniforms,
@@ -142,7 +222,7 @@ float4 DrawPixels(
         if (uniforms.isSDF) {
             if (!uniforms.isSigned) {
                 // convert to signed normal to compute z
-                c.r = 2.0 * c.r - 256.0 / 255.0; // 0 = 128 on unorm data on 8u
+                c.r = toSnorm8(c.r); // 0 = 128 on unorm data on 8u
             }
             
             // 0.0 is the boundary of visible vs. non-visible and not a true alpha
@@ -157,7 +237,7 @@ float4 DrawPixels(
             float dist = c.r;
 
             // size of one pixel line
-            float onePixel = 1.0 / max(0.0001, length(float2(dfdx(dist), dfdy(dist))));
+            float onePixel = recip(max(0.0001, length(float2(dfdx(dist), dfdy(dist)))));
 
             // distance to edge in pixels (scalar)
             float pixelDist = dist * onePixel;
@@ -178,10 +258,10 @@ float4 DrawPixels(
             // to signed
             if (!uniforms.isSigned) {
                 // convert to signed normal to compute z
-                c.rg = 2.0 * c.rg - float2(256.0 / 255.0); // 0 = 128 on unorm data on 8u
+                c.rg = toSnorm8(c.rg);
             }
             
-            c.z = sqrt(1 - saturate(dot(c.xy, c.xy))); // z always positive
+            c.rgb = toNormal(c.rgb);
             
             float3 lightDir = normalize(float3(1,1,1));
             float3 lightColor = float3(1,1,1);
@@ -213,7 +293,7 @@ float4 DrawPixels(
         else {
             // to unorm
             if (uniforms.isSigned) {
-                c.xyz = c.xyz * 0.5 + 0.5;
+                c.xyz = toUnorm(c.xyz);
             }
             
             // to premul, but also need to see without premul
@@ -227,7 +307,7 @@ float4 DrawPixels(
         if (uniforms.numChannels == 1) {
             // toUnorm
             if (uniforms.isSigned) {
-                c.x = c.x * 0.5 + 0.5;
+                c.x = toUnorm(c.x);
             }
         }
         else if (uniforms.isNormal) {
@@ -239,13 +319,13 @@ float4 DrawPixels(
             // to signed
             if (!uniforms.isSigned) {
                 // convert to signed normal to compute z
-                c.rg = 2.0 * c.rg - float2(256.0 / 255.0); // 0 = 128 on unorm data
+                c.rg = toSnorm8(c.rg);
             }
             
-            c.z = sqrt(1 - saturate(dot(c.xy, c.xy))); // z always positive
+            c.rgb = toNormal(c.rgb);
             
             // from signed, to match other editors that don't display signed data
-            c.xyz = c.xyz * 0.5 + 0.5; // can sample from this
+            c.xyz = toUnorm(c.xyz); // can sample from this
             
             // view data as abs magnitude
             //c.xyz = abs(c.xyz); // bright on extrema, but no indicator of sign (use r,g viz)
@@ -258,7 +338,7 @@ float4 DrawPixels(
             // signed 1/2 channel formats return sr,0,0, and sr,sg,0 for rgb?
             // May want to display those as 0 not 0.5.
             if (uniforms.isSigned) {
-                c.xyz = c.xyz * 0.5 + 0.5;
+                c.xyz = toUnorm(c.xyz);
             }
             
             // to premul, but also need to see without premul
@@ -307,7 +387,7 @@ float4 DrawPixels(
 
     
-    if (uniforms.debugMode != ShDebugModeNone && c.a != 0.0f) {
+    if (uniforms.debugMode != ShDebugModeNone && c.a != 0.0) {
         
         bool isHighlighted = false;
         if (uniforms.debugMode == ShDebugModeTransparent) {
@@ -357,7 +437,7 @@ float4 DrawPixels(
         // TODO: is it best to highlight the interest pixels in red
         // or the negation of that to see which ones aren't.
         if (isHighlighted) {
-            float3 highlightColor = float3(1.0f, 0.0f, 1.0f);
+            float3 highlightColor = float3(1, 0, 1);
             c.rgb = highlightColor;
         }
         
@@ -375,7 +455,7 @@ float4 DrawPixels(
         // DONE: don't draw grid if too small
         
         // fwidth = abs(ddx(p)) + abs(ddy(p))
-        float2 lineWidth = 1.0 / fwidth(pixels);
+        float2 lineWidth = recip(fwidth(pixels));
         
         // only show grid when pixels are 8px or bigger
         if (max(lineWidth.x, lineWidth.y) >= 8.0) {
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 03d128cd..59951065 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -77,7 +77,7 @@ + (BOOL)autosavesInPlace {
 - (void)makeWindowControllers {
     // Override to return the Storyboard file name of the document.
     //NSStoryboard* storyboard = [NSStoryboard storyboardWithName:@"Main" bundle:nil];
-    //NSWindowController* controller = [storyboard instantiateControllerWithIdentifier:@"Document Window Controller"];
+    //NSWindowController* controller = [storyboard instantiateControllerWithIdentifier:@"NameNeeded];
     //[self addWindowController:controller];
 }
 
@@ -137,7 +137,6 @@ - (BOOL)applicationShouldTerminateAfterLastWindowClosed:(NSApplication *)sender
     return YES;
 }
 
-#if 1
 - (void)application:(NSApplication *)sender openURLs:(nonnull NSArray<NSURL *> *)urls
 {
     // see if this is called
@@ -149,19 +148,6 @@ - (void)application:(NSApplication *)sender openURLs:(nonnull NSArray<NSURL *> *
     NSURL *url = urls.firstObject;
     [view loadTextureFromURL:url];
 }
-#else
-- (BOOL)application:(NSApplication *)sender openFile:(nonnull NSString*)filename
-{
-    // see if this is called
-    //NSLog(@"OpenURLs");
-    
-    // this is called from "Open In...", and also from OpenRecent documents menu
-    MyMTKView* view = sender.mainWindow.contentView;
-    
-    NSURL *url = [NSURL URLWithString:filename];
-    return [view loadTextureFromURL:url];
-}
-#endif
 
 - (IBAction)showAboutDialog:(id)sender {
     // calls openDocumentWithContentsOfURL above
@@ -663,6 +649,16 @@ - (void)mouseMoved:(NSEvent*)event
     [self updateEyedropper];
 }
 
+inline float4 toPremul(const float4& c)
+{
+    // premul with a
+    float4 cpremul = c;
+    float a = c.a;
+    cpremul.w = 1.0f;
+    cpremul *= a;
+    return cpremul;
+}
+
 - (void)updateEyedropper {
     if ((!_showSettings->isHudShown)) {
         return;
@@ -819,8 +815,8 @@ - (void)updateEyedropper {
         if (isSigned && !isDecodeSigned) {
             c.x = c.x * 2.0f - 1.0f;
             c.y = c.y * 2.0f - 1.0f;
-            c.z = c.y * 2.0f - 1.0f;
-            c.w = c.y * 2.0f - 1.0f;
+            c.z = c.z * 2.0f - 1.0f;
+            c.w = c.w * 2.0f - 1.0f;
         }
         
         if (isNormal) {
@@ -862,6 +858,22 @@ - (void)updateEyedropper {
                 printChannels(tmp, "sr: ", s, numChannels, isFloat, isSigned);
                 text += tmp;
             }
+            
+            // display the premul values too
+            if (c.a < 1.0f)
+            {
+                printChannels(tmp, "lnp: ", toPremul(c), numChannels, isFloat, isSigned);
+                text += tmp;
+                
+                // TODO: do we need the premul srgb color too?
+                if (isSrgb) {
+                    // this saturates the value, so don't use for extended srgb
+                    float4 s = linearToSRGB(c);
+                    
+                    printChannels(tmp, "srp: ", toPremul(s), numChannels, isFloat, isSigned);
+                    text += tmp;
+                }
+            }
         }
         
         [self setEyedropperText:text.c_str()];
diff --git a/libkram/kram/KramMipper.cpp b/libkram/kram/KramMipper.cpp
index 6f0b6701..50b5715c 100644
--- a/libkram/kram/KramMipper.cpp
+++ b/libkram/kram/KramMipper.cpp
@@ -51,7 +51,7 @@ inline Color Unormfloat4ToColor(float4 value)
 inline Color Snormfloat4ToColor(float4 value)
 {
     Color c;
-    value = round(127.0f * value) + float4(127.0f);  // or is it 128? TODO: validatate last ctor sets all values
+    value = round(127.0f * value) + float4(128.0f); 
     c.r = (uint8_t)value.x;
     c.g = (uint8_t)value.y;
     c.b = (uint8_t)value.z;

From 0d1dc780604d066f14948e48200102dc64fe31a0 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 8 May 2021 15:22:38 -0700
Subject: [PATCH 028/901] kramv - debug mode for len(n.xy) > 0.99 pixels

This is a fairly common issue on normal maps, so flag it to show why protections in reconstruct are important.
---
 kramv/KramShaders.h      |  1 +
 kramv/KramShaders.metal  | 45 +++++++++++++++++++++++++++++-----------
 kramv/KramViewerBase.cpp |  3 +++
 kramv/KramViewerBase.h   |  1 +
 kramv/KramViewerMain.mm  |  1 +
 5 files changed, 39 insertions(+), 12 deletions(-)

diff --git a/kramv/KramShaders.h b/kramv/KramShaders.h
index fa8ea0fc..2799faa4 100644
--- a/kramv/KramShaders.h
+++ b/kramv/KramShaders.h
@@ -80,6 +80,7 @@ typedef NS_ENUM(int32_t, ShaderDebugMode)
     
     ShDebugModePosX = 5,
     ShDebugModePosY = 6,
+    ShDebugModeCircleXY = 7,
     
     ShDebugModeCount
 };
diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index 0b128133..4e64dc2a 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -135,11 +135,11 @@ float toUnorm8(float c)
 }
 float2 toUnorm8(float2 c)
 {
-    return (127.0 / 255.0) * c + float2(128 / 255.0);
+    return (127.0 / 255.0) * c + (128 / 255.0);
 }
 float3 toUnorm8(float3 c)
 {
-    return (127.0 / 255.0) * c + float3(128 / 255.0);
+    return (127.0 / 255.0) * c + (128 / 255.0);
 }
 
 float toUnorm(float c)
@@ -162,12 +162,17 @@ float toSnorm8(float c)
 
 float2 toSnorm8(float2 c)
 {
-    return (255.0 / 127.0) * c - float2(128 / 127.0);
+    return (255.0 / 127.0) * c - (128 / 127.0);
 }
 
 float3 toSnorm8(float3 c)
 {
-    return (255.0 / 127.0) * c - float3(128 / 127.0);
+    return (255.0 / 127.0) * c - (128 / 127.0);
+}
+
+float2 toSnorm(float2 c)
+{
+    return 2 * c - 1.0;
 }
 
 float recip(float c)
@@ -194,20 +199,29 @@ float3 toNormal(float3 n)
     // make sure the normal doesn't exceed the unit circle
     // many reconstructs skip and get a non-unit or z=0 normal
     // might make optional or flag pixel with a debug mode that exeed
-    float len = length_squared(n.xy);
-    if (len > 0.99 * 0.99)
+    float len2 = length_squared(n.xy);
+    const float maxLen2 = 0.99 * 0.99;
+    
+    if (len2 > maxLen2)
     {
-        len *= 1.001; // so we have a non-zero z component below
-        n.xy *= rsqrt(len);
+        len2 *= 1.001; // so we have a non-zero z component below
+        n.xy *= rsqrt(len2);
+        len2 = maxLen2;
     }
-
+    //len2 = min(0.99, len2);
+    
     // make sure always have non-zero z, or get Nan after it knocks out N of TBN
     // since that's often pointing purely in 001 direction.
-    len = min(0.999, len);
-    n.z = sqrt(1 - len);
+    n.z = sqrt(1 - len2);
     return n;
 }
 
+// TODO: do more test shapes, but that affects eyedropper
+// use mikktspace, gen bitan in frag shader with sign, don't normalize vb/vt
+// see http://www.mikktspace.com/
+
+// TODO: eliminate the toUnorm() calls below, rendering to rgba16f
+// but also need to remove conversion code on cpu side expecting unorm in eyedropper
 
 float4 DrawPixels(
     ColorInOut in [[stage_in]],
@@ -433,7 +447,14 @@ float4 DrawPixels(
                 isHighlighted = true;
             }
         }
-        
+        else if (uniforms.debugMode == ShDebugModeCircleXY) {
+            // flag pixels that would throw off normal reconstruct sqrt(1-dot(n.xy,n.xy))
+            // see code above in shader that helps keep that from z = 0
+            float len2 = length_squared(toSnorm(c.rg));
+            if (len2 > (0.99 * 0.99)) {
+                isHighlighted = true;
+            }
+        }
         // TODO: is it best to highlight the interest pixels in red
         // or the negation of that to see which ones aren't.
         if (isHighlighted) {
diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index e1edac33..5fb3d12c 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -53,6 +53,9 @@ void ShowSettings::advanceDebugMode(bool isShiftKeyDown) {
     if (debugMode == DebugModePosY && !(isNormal)) {
         advanceDebugMode(isShiftKeyDown);
     }
+    if (debugMode == DebugModeCircleXY && !(isNormal)) {
+        advanceDebugMode(isShiftKeyDown);
+    }
     
     // TODO: have a clipping mode against a variable range too, only show pixels within that range
     // to help isolate problem pixels.  Useful for depth, and have auto-range scaling for it and hdr.
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index d1766924..3f5a883e 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -40,6 +40,7 @@ enum DebugMode
     
     DebugModePosX = 5,
     DebugModePosY = 6,
+    DebugModeCircleXY = 7,
     
     DebugModeCount
 };
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 59951065..682ba97d 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -1072,6 +1072,7 @@ - (void)keyDown:(NSEvent *)theEvent
                 case DebugModeHDR: text = "Debug HDR"; break;
                 case DebugModePosX: text = "Debug +X"; break;
                 case DebugModePosY: text = "Debug +Y"; break;
+                case DebugModeCircleXY: text = "Debug XY>=1"; break;
                 default: break;
             }
             isChanged = true;

From 4cad98c995757b3ece422d86ddc0d37b3549708b Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 8 May 2021 16:36:56 -0700
Subject: [PATCH 029/901] kramv - clear hud text on texture load since
 debugMode is reset, increase nmap cutoff

Change from 0.99 to 0.999.  nmap.z min is 0.14 to  0.04.

Open Recent document menu is only working the first time an item is picked.  Need to hook some other callback since these are added into document menu list, and readFromURL isn't called second time.
---
 kramv/KramRenderer.mm   |  2 ++
 kramv/KramShaders.metal | 10 ++++--
 kramv/KramViewerMain.mm | 78 ++++++++++++++++++++++++++++++++++-------
 3 files changed, 74 insertions(+), 16 deletions(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index c311a6e2..7014ff63 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -585,6 +585,8 @@ - (BOOL)loadTextureImpl:(const string&)fullFilename isTextureChanged:(BOOL)isTex
     
     _showSettings->zoom = _showSettings->zoomFit;
     
+    // wish could keep existing setting, but new texture might not
+    // be supported debugMode for new texture
     _showSettings->debugMode = DebugMode::DebugModeNone;
     
     // have one of these for each texture added to the viewer
diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index 4e64dc2a..de4066d8 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -169,6 +169,10 @@ float3 toSnorm8(float3 c)
 {
     return (255.0 / 127.0) * c - (128 / 127.0);
 }
+float4 toSnorm8(float4 c)
+{
+    return (255.0 / 127.0) * c - (128 / 127.0);
+}
 
 float2 toSnorm(float2 c)
 {
@@ -200,7 +204,7 @@ float3 toNormal(float3 n)
     // many reconstructs skip and get a non-unit or z=0 normal
     // might make optional or flag pixel with a debug mode that exeed
     float len2 = length_squared(n.xy);
-    const float maxLen2 = 0.99 * 0.99;
+    const float maxLen2 = 0.999 * 0.999;
     
     if (len2 > maxLen2)
     {
@@ -208,7 +212,7 @@ float3 toNormal(float3 n)
         n.xy *= rsqrt(len2);
         len2 = maxLen2;
     }
-    //len2 = min(0.99, len2);
+    //len2 = min(0.999, len2);
     
     // make sure always have non-zero z, or get Nan after it knocks out N of TBN
     // since that's often pointing purely in 001 direction.
@@ -451,7 +455,7 @@ float4 DrawPixels(
             // flag pixels that would throw off normal reconstruct sqrt(1-dot(n.xy,n.xy))
             // see code above in shader that helps keep that from z = 0
             float len2 = length_squared(toSnorm(c.rg));
-            if (len2 > (0.99 * 0.99)) {
+            if (len2 > (0.999 * 0.999)) {
                 isHighlighted = true;
             }
         }
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 682ba97d..483d68a0 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -46,6 +46,8 @@ @interface MyMTKView : MTKView
 
 - (BOOL)loadTextureFromURL:(NSURL*)url;
 
+- (void)setHudText:(const char*)text;
+
 @end
 
 //-------------
@@ -92,13 +94,37 @@ - (NSData *)dataOfType:(NSString *)typeName error:(NSError **)outError {
 
 - (BOOL)readFromURL:(NSURL *)url ofType:(NSString *)typeName error:(NSError **)outError {
     
+    // TODO: this recent menu only seems to work the first time
+    // and not in subsequent calls to the same entry.  readFromUrl isn't even called.
+    // So don't get a chance to switch back to a recent texture.
+    // Maybe there's some list of documents created and so it doesn't
+    // think the file needs to be reloaded.
+    //
+    // Note: if I return NO from this call then a dialog pops up that image
+    // couldn't be loaded, but then the readFromURL is called everytime a new
+    // image is picked from the list.
+    
+    // called from OpenRecent documents menu
+    
 #if 0
     MyMTKView* view = self.windowControllers.firstObject.window.contentView;
     return [view loadTextureFromURL:url];
 #else
     NSApplication* app = [NSApplication sharedApplication];
     MyMTKView* view = app.mainWindow.contentView;
-    return [view loadTextureFromURL:url];
+    BOOL success = [view loadTextureFromURL:url];
+    if (success)
+    {
+        [view setHudText:""];
+    }
+    
+    // Let's see the document list
+//    NSDocumentController* dc = [NSDocumentController sharedDocumentController];
+//    NSDocument* currentDoc = dc.currentDocument;
+//
+//    KLOGW("kramv", "This is document count %d", (int)dc.documents.count, );
+
+    return success;
 #endif
 }
 
@@ -139,10 +165,7 @@ - (BOOL)applicationShouldTerminateAfterLastWindowClosed:(NSApplication *)sender
 
 - (void)application:(NSApplication *)sender openURLs:(nonnull NSArray<NSURL *> *)urls
 {
-    // see if this is called
-    //NSLog(@"OpenURLs");
-    
-    // this is called from "Open In...", and also from OpenRecent documents menu
+    // this is called from "Open In..."
     MyMTKView* view = sender.mainWindow.contentView;
     
     NSURL *url = urls.firstObject;
@@ -659,6 +682,26 @@ inline float4 toPremul(const float4& c)
     return cpremul;
 }
 
+float toSnorm8(float c)
+{
+    return (255.0 / 127.0) * c - (128 / 127.0);
+}
+
+float2 toSnorm8(float2 c)
+{
+    return (255.0 / 127.0) * c - (128 / 127.0);
+}
+
+float3 toSnorm8(float3 c)
+{
+    return (255.0 / 127.0) * c - (128 / 127.0);
+}
+float4 toSnorm8(float4 c)
+{
+    return (255.0 / 127.0) * c - (128 / 127.0);
+}
+
+
 - (void)updateEyedropper {
     if ((!_showSettings->isHudShown)) {
         return;
@@ -813,10 +856,7 @@ - (void)updateEyedropper {
         
         bool isDecodeSigned = isSignedFormat(_showSettings->decodedFormat);
         if (isSigned && !isDecodeSigned) {
-            c.x = c.x * 2.0f - 1.0f;
-            c.y = c.y * 2.0f - 1.0f;
-            c.z = c.z * 2.0f - 1.0f;
-            c.w = c.w * 2.0f - 1.0f;
+            c = toSnorm8(c.x);
         }
         
         if (isNormal) {
@@ -825,13 +865,20 @@ - (void)updateEyedropper {
             
             // unorm -> snorm
             if (!isSigned) {
-                nx = nx * 2.0f - 1.0f;
-                ny = ny * 2.0f - 1.0f;
+                nx = toSnorm8(nx);
+                ny = toSnorm8(ny);
             }
             
+            // Note: not clamping nx,ny to < 1 like in shader
+            
             // this is always postive on tan-space normals
             // assuming we're not viewing world normals
-            float nz = sqrt(1.0f - std::min(nx * nx + ny * ny, 1.0f));
+            const float maxLen2 = 0.999 * 0.999;
+            float len2 = nx * nx + ny * ny;
+            if (len2 > maxLen2)
+                len2 = maxLen2;
+            
+            float nz = sqrt(1.0f - len2);
             
             // print the underlying color (some nmaps are xy in 4 channels)
             string tmp;
@@ -1368,6 +1415,8 @@ - (BOOL)performDragOperation:(id)sender {
         url = [NSURL fileURLWithPath:filenameString];
         
         if ([self loadTextureFromURL:url]) {
+            [self setHudText:""];
+            
             return YES;
         }
    }
@@ -1525,8 +1574,11 @@ - (BOOL)loadTextureFromURL:(NSURL*)url {
     
     self.window.title = [NSString stringWithUTF8String: title.c_str()];
     
+     // topmost entry will be the recently opened document
+    // some entries may go stale if directories change, not sure who validates the list
+    
     // add to recent document menu
-    NSDocumentController *dc = [NSDocumentController sharedDocumentController];
+    NSDocumentController* dc = [NSDocumentController sharedDocumentController];
     [dc noteNewRecentDocumentURL:url];
 
     self.imageURL = url;

From d81d87fa2f9843141a226c86288654c638127a9c Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 8 May 2021 17:14:36 -0700
Subject: [PATCH 030/901] kramv - add a hack to keep "Open Recent" menu calling
 readFromURL

readFromURL stops getting called once NSDocumentController has an NSDocument associate with a URL.  So keep wiping out the NSDocument list on every load.   The doc controller comments are near impossible to decipher what caller is supposed to do when document changes, and this works.
---
 kramv/KramViewerMain.mm | 46 ++++++++++++++++++++++++++---------------
 1 file changed, 29 insertions(+), 17 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 483d68a0..ede98cbb 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -94,36 +94,46 @@ - (NSData *)dataOfType:(NSString *)typeName error:(NSError **)outError {
 
 - (BOOL)readFromURL:(NSURL *)url ofType:(NSString *)typeName error:(NSError **)outError {
     
-    // TODO: this recent menu only seems to work the first time
-    // and not in subsequent calls to the same entry.  readFromUrl isn't even called.
-    // So don't get a chance to switch back to a recent texture.
-    // Maybe there's some list of documents created and so it doesn't
-    // think the file needs to be reloaded.
-    //
-    // Note: if I return NO from this call then a dialog pops up that image
-    // couldn't be loaded, but then the readFromURL is called everytime a new
-    // image is picked from the list.
     
     // called from OpenRecent documents menu
     
 #if 0
-    MyMTKView* view = self.windowControllers.firstObject.window.contentView;
-    return [view loadTextureFromURL:url];
+    //MyMTKView* view = self.windowControllers.firstObject.window.contentView;
+    //return [view loadTextureFromURL:url];
 #else
+
     NSApplication* app = [NSApplication sharedApplication];
     MyMTKView* view = app.mainWindow.contentView;
     BOOL success = [view loadTextureFromURL:url];
     if (success)
     {
         [view setHudText:""];
+    
+        // DONE: this recent menu only seems to work the first time
+        // and not in subsequent calls to the same entry.  readFromUrl isn't even called.
+        // So don't get a chance to switch back to a recent texture.
+        // Maybe there's some list of documents created and so it doesn't
+        // think the file needs to be reloaded.
+        //
+        // Note: if I return NO from this call then a dialog pops up that image
+        // couldn't be loaded, but then the readFromURL is called everytime a new
+        // image is picked from the list.
+        
+        // Clear the document list so readFromURL keeps getting called
+        // Can't remove currentDoc, so have to skip that
+        NSDocumentController* dc = [NSDocumentController sharedDocumentController];
+        NSDocument* currentDoc = dc.currentDocument;
+        NSMutableArray* docsToRemove = [[NSMutableArray alloc] init];
+        for (NSDocument* doc in dc.documents) {
+            if (doc != currentDoc)
+                [docsToRemove addObject: doc];
+        }
+        
+        for (NSDocument* doc in docsToRemove) {
+            [dc removeDocument: doc];
+        }
     }
     
-    // Let's see the document list
-//    NSDocumentController* dc = [NSDocumentController sharedDocumentController];
-//    NSDocument* currentDoc = dc.currentDocument;
-//
-//    KLOGW("kramv", "This is document count %d", (int)dc.documents.count, );
-
     return success;
 #endif
 }
@@ -682,6 +692,8 @@ inline float4 toPremul(const float4& c)
     return cpremul;
 }
 
+// Writing out to rgba32 for sampling, but unorm formats like ASTC and RGBA8
+// are still off and need to use the following.
 float toSnorm8(float c)
 {
     return (255.0 / 127.0) * c - (128 / 127.0);

From 247ac8baf729982f8ad38b60c71de46c8c922a15 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 8 May 2021 19:34:57 -0700
Subject: [PATCH 031/901] kramv - turn assert in KTXImage into returning false
 to fail the load

Several of libktx's test images were hitting this assert.  Need to find out why.  Could be reversed mips on ktx2 vs. ktx.
---
 kramv/KramViewerMain.mm   |  5 +++++
 libkram/kram/KTXImage.cpp | 10 +++++++++-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index ede98cbb..78ffc877 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -178,6 +178,11 @@ - (void)application:(NSApplication *)sender openURLs:(nonnull NSArray<NSURL *> *
     // this is called from "Open In..."
     MyMTKView* view = sender.mainWindow.contentView;
     
+    // TODO: if more than one url dropped, and they are albedo/nmap, then display them
+    // together with the single uv set.  Need controls to show one or all together.
+    
+    // TODO: also do an overlapping diff if two files are dropped with same dimensions.
+    
     NSURL *url = urls.firstObject;
     [view loadTextureFromURL:url];
 }
diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index 754a0172..99b27e8f 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -1363,7 +1363,15 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength)
             
             // the offsets are reversed in ktx2 file
             level1.offset = level2.offset;
-            assert(level1.length == level2.length);
+            
+            if (level1.length != level2.length)
+            {
+                // This is likely due to the reversal of mips
+                // but many of the test images from libkx are hitting this, fix this issue.
+                
+                KLOGE("kram", "mip sizes aren't equal");
+                return false;
+            }
         }
     }
     else {

From c71f613889a7cc1aa17734e1310b3e70bf124e39 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 8 May 2021 20:20:49 -0700
Subject: [PATCH 032/901] kram - fail early if the pixel format is unupported
 in the loader.

---
 libkram/kram/KTXImage.cpp | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index 99b27e8f..7decca81 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -503,7 +503,9 @@ const KTXFormatInfo& formatInfo(MyMTLPixelFormat format)
     initFormatsIfNeeded();
     
     const auto& it = gFormatTable->find(format);
-    assert(it != gFormatTable->end());
+    if (it == gFormatTable->end()) {
+        return gFormatTable->find(MyMTLPixelFormatInvalid)->second;
+    }
     return it->second;
 }
 
@@ -911,6 +913,11 @@ bool KTXImage::open(const uint8_t* imageData, size_t imageDataLength)
         pixelFormat = header.metalFormat();
     }
 
+    if (pixelFormat == MyMTLPixelFormatInvalid) {
+        KLOGE("kram", "unsupported texture format glType 0x%0X", header.glFormat);
+        return false;
+    }
+    
     return initMipLevels(true, sizeof(KTXHeader) + header.bytesOfKeyValueData);
 }
 
@@ -1326,6 +1333,13 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength)
     // convert format to MyMTLPixelFormat
     pixelFormat = vulkanToMetalFormat(header2.vkFormat);
     
+    // kram can only load a subset of format
+    if (pixelFormat == MyMTLPixelFormatInvalid)
+    {
+        KLOGE("kram", "unsupported texture format VK_FORMAT %u", header2.vkFormat);
+        return false;
+    }
+    
     // Note: KTX2 also doesn't have the length field embedded the mipData
     // so need to be able to set skipLength to unify the mipgen if aliasing the mip data
     // Only reading this format, never writing it out.

From 38d072559fb6f15b2a5ff4a218ffa7ad0d4f6b1d Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 8 May 2021 20:57:27 -0700
Subject: [PATCH 033/901] kram - print the glBase/Internal format.

For some reason, on several libktx images glFormat is 0.
---
 libkram/kram/KTXImage.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index 7decca81..f69c75b6 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -914,7 +914,7 @@ bool KTXImage::open(const uint8_t* imageData, size_t imageDataLength)
     }
 
     if (pixelFormat == MyMTLPixelFormatInvalid) {
-        KLOGE("kram", "unsupported texture format glType 0x%0X", header.glFormat);
+        KLOGE("kram", "unsupported texture format glBase/glInternalFormat 0x%04X 0x%04X", header.glBaseInternalFormat, header.glInternalFormat);
         return false;
     }
     

From 8a6ec896fd0f8b0121d10882c1f64a76948b4eb1 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 8 May 2021 23:30:21 -0700
Subject: [PATCH 034/901] kram/v - add SUPPORT_RGB flag, fix RGBA32F format

RGB isn't supported by Metal, but we try our best to convert one mip level so that kramv can display the image.  kram converts it to RGBA, but info will still report the RGB format.  Row Alignment isn't always conducive to opening these, but will fix that later.  Also not handling srgb properly.  This has to first go to KramImage to load as RGBA and then back to KTXImage.

Fix RGBA32F format in the table.
---
 libkram/kram/KTXImage.cpp  | 28 ++++++++++++++++++++++++++--
 libkram/kram/KTXImage.h    | 14 ++++++++++++--
 libkram/kram/KramConfig.h  |  5 +++++
 libkram/kram/KramImage.cpp | 16 ++++++++++++++--
 4 files changed, 57 insertions(+), 6 deletions(-)

diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index f69c75b6..868c77af 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -159,6 +159,13 @@ enum GLFormat {
     GL_RG32F = 0x8230,
     GL_RGBA32F = 0x8814,
 
+#if SUPPORT_RGB
+    GL_RGB8   = 0x8051,
+    GL_SRGB8  = 0x8C41,
+    GL_RGB16F = 0x881B,
+    GL_RGB32F = 0x8815
+#endif
+    
     /* These are all of the variants of ASTC, ugh.  Only way to identify them is to
  walk blocks and it's unclear how to convert from 3D to 2D blocks or whether hw
  supports sliced 3D.
@@ -299,6 +306,15 @@ enum VKFormat {
     //    VK_FORMAT_ASTC_12x10_SRGB_BLOCK = 182,
     //    VK_FORMAT_ASTC_12x12_UNORM_BLOCK = 183,
     //    VK_FORMAT_ASTC_12x12_SRGB_BLOCK = 184,
+
+#if SUPPORT_RGB
+    // import only
+    VK_FORMAT_R8G8B8_UNORM      = 23,
+    VK_FORMAT_R8G8B8_SRGB       = 29,
+    VK_FORMAT_R16G16B16_SFLOAT  = 90,
+    VK_FORMAT_R32G32B32_SFLOAT   = 106,
+   
+#endif
 };
 
 // DONE: setup a format table, so can switch on it
@@ -489,8 +505,16 @@ static bool initFormatsIfNeeded()
 
     KTX_FORMAT(EXPr32f, MyMTLPixelFormatR32Float, VK_FORMAT_R32_SFLOAT, GL_R32F, GL_RED, 1, 1, 4, 1, FLAG_32F)
     KTX_FORMAT(EXPrg32f, MyMTLPixelFormatRG32Float, VK_FORMAT_R32G32_SFLOAT, GL_RG32F, GL_RG, 1, 1, 8, 2, FLAG_32F)
-    KTX_FORMAT(EXPrg32f, MyMTLPixelFormatRGBA32Float, VK_FORMAT_R32G32B32A32_SFLOAT, GL_RGBA32F, GL_RGBA, 1, 1, 16, 4, FLAG_32F)
-
+    KTX_FORMAT(EXPrgba32f, MyMTLPixelFormatRGBA32Float, VK_FORMAT_R32G32B32A32_SFLOAT, GL_RGBA32F, GL_RGBA, 1, 1, 16, 4, FLAG_32F)
+
+#if SUPPORT_RGB
+    // these are import only formats
+    KTX_FORMAT(EXPrgb8, MyMTLPixelFormatRGB8Unorm_internal, VK_FORMAT_R8G8B8_UNORM, GL_RGB8, GL_RGB, 1, 1, 3, 3, 0)
+    KTX_FORMAT(EXPsrgb8, MyMTLPixelFormatRGB8Unorm_sRGB_internal, VK_FORMAT_R8G8B8_SRGB, GL_SRGB8, GL_SRGB, 1, 1, 3, 3, FLAG_SRGB)
+    KTX_FORMAT(EXPrgb16f, MyMTLPixelFormatRGB16Float_internal, VK_FORMAT_R16G16B16_SFLOAT, GL_RGB16F, GL_RGB, 1, 1, 6, 3, FLAG_16F)
+    KTX_FORMAT(EXPrgb32f, MyMTLPixelFormatRGB32Float_internal, VK_FORMAT_R32G32B32_SFLOAT, GL_RGB32F, GL_RGB, 1, 1, 12, 3, FLAG_32F)
+#endif
+    
     return true;
 }
 
diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h
index 02387293..bf377602 100644
--- a/libkram/kram/KTXImage.h
+++ b/libkram/kram/KTXImage.h
@@ -102,11 +102,21 @@ enum MyMTLPixelFormat {
 
     // TODO: also need rgb9e5 for fallback if ASTC HDR/6H not supported
     // That is Unity's fallback if alpha not needed, otherwise RGBA16F.
+    
+#if SUPPORT_RGB
+    // Can import files from KTX/KTX2 with RGB data, but convert right away to RGBA.
+    // These are not export formats.  Watch alignment on these too.  These
+    // have no MTLPixelFormat.
+    MyMTLPixelFormatRGB8Unorm_internal = 200,
+    MyMTLPixelFormatRGB8Unorm_sRGB_internal = 201,
+    MyMTLPixelFormatRGB16Float_internal = 202,
+    MyMTLPixelFormatRGB32Float_internal = 203,
+#endif
 };
 
 enum MyMTLTextureType {
-    // MyMTLTextureType1D = 0,
-    MyMTLTextureType1DArray = 1,
+    // MyMTLTextureType1D = 0,   // not twiddled or compressed, more like a buffer but with texture limits
+    MyMTLTextureType1DArray = 1, // not twiddled or compressed, more like a buffer but with texture limits
     MyMTLTextureType2D = 2,
     MyMTLTextureType2DArray = 3,
     // MyMTLTextureType2DMultisample = 4,
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index 3c344385..cdf2c205 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -146,6 +146,11 @@
 #define COMPILE_ASTCENC 0
 #endif
 
+// rgb8/16f/32f formats only supported for import, Metal doesn't expose these formats
+#ifndef SUPPORT_RGB
+#define SUPPORT_RGB 1
+#endif
+
 // includes that are usable across all files
 #include "KramLog.h"
 
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index b3758174..87915903 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -130,7 +130,13 @@ bool Image::loadImageFromKTX(const KTXImage& image)
     switch (image.pixelFormat) {
         case MyMTLPixelFormatR8Unorm:
         case MyMTLPixelFormatRG8Unorm:
-        case MyMTLPixelFormatRGBA8Unorm: {
+#if SUPPORT_RGB
+        case MyMTLPixelFormatRGB8Unorm_sRGB_internal: // TODO: not handling srgba yet
+        case MyMTLPixelFormatRGB8Unorm_internal:
+#endif
+        case MyMTLPixelFormatRGBA8Unorm_sRGB: // TODO: not handling srgba yet
+        case MyMTLPixelFormatRGBA8Unorm:
+        {
             const uint8_t* srcPixels =
                 image.fileData + image.mipLevels[0].offset;
 
@@ -171,6 +177,9 @@ bool Image::loadImageFromKTX(const KTXImage& image)
 
         case MyMTLPixelFormatR16Float:
         case MyMTLPixelFormatRG16Float:
+#if SUPPORT_RGB
+        case MyMTLPixelFormatRGB16Float_internal:
+#endif
         case MyMTLPixelFormatRGBA16Float: {
             int32_t numSrcChannels = blockSize / 2;  // 2 = sizeof(_float16)
             int32_t numDstChannels = 4;
@@ -220,7 +229,10 @@ bool Image::loadImageFromKTX(const KTXImage& image)
 
         case MyMTLPixelFormatR32Float:
         case MyMTLPixelFormatRG32Float:
-        case MyMTLPixelFormatRGBA32Float: {
+#if SUPPORT_RGB
+        case MyMTLPixelFormatRGB32Float_internal:
+#endif
+       case MyMTLPixelFormatRGBA32Float: {
             const float* srcPixels =
                 (const float*)(image.fileData + image.mipLevels[0].offset);
 

From 3dbaf8635ab27ae52aaf705dc0707a9c6ce0774e Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 9 May 2021 00:07:52 -0700
Subject: [PATCH 035/901] kramv - add atlas grid

This cycles through 32, 64, 128, 256 entries, since these are common atlasing sizes.
Now the d key and shift-d advance through none, pixel, block and the 4 atlas grid sizes.
---
 kramv/KramRenderer.mm      |  5 +++-
 kramv/KramViewerBase.h     |  6 +++-
 kramv/KramViewerMain.mm    | 61 ++++++++++++++++++++++++++++----------
 libkram/kram/KramImage.cpp |  7 +++--
 4 files changed, 60 insertions(+), 19 deletions(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 7014ff63..a98896b0 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -648,12 +648,15 @@ - (void)_updateGameState
         uniforms.gridY = 1;
     }
     else if (_showSettings->isBlockGridShown) {
-        
         if (_showSettings->blockX > 1) {
             uniforms.gridX = _showSettings->blockX;
             uniforms.gridY = _showSettings->blockY;
         }
     }
+    else if (_showSettings->isAtlasGridShown) {
+        uniforms.gridX = _showSettings->gridSize;
+        uniforms.gridY = _showSettings->gridSize;
+    }
     
     // no debug mode when preview kicks on, make it possible to toggle back and forth more easily
     uniforms.debugMode = _showSettings->isPreview ? ShaderDebugMode::ShDebugModeNone : (ShaderDebugMode)_showSettings->debugMode;
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index 3f5a883e..4c82328b 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -77,6 +77,7 @@ class ShowSettings {
     // draw a 1x1 or blockSize grid, note ASTC has non-square grid sizes
     bool isPixelGridShown = false;
     bool isBlockGridShown = false;
+    bool isAtlasGridShown = false;
    
     // show all mips, faces, arrays all at once
     bool isShowingAllLevelsAndMips = false;
@@ -107,10 +108,13 @@ class ShowSettings {
     int32_t imageBoundsX = 0; // px
     int32_t imageBoundsY = 0; // px
  
-    // size of the block, uses in block grid drawing
+    // size of the block, used in block grid drawing
     int32_t blockX = 1;
     int32_t blockY = 1;
     
+    // set when isGridShow is true
+    int32_t gridSize = 1;
+    
     // for eyedropper, lookup this pixel value, and return it to CPU
     int32_t textureLookupX = 0;
     int32_t textureLookupY = 0;
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 78ffc877..2ccf6801 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -1232,28 +1232,59 @@ - (void)keyDown:(NSEvent *)theEvent
             break;
         
         // toggle pixel grid when magnified above 1 pixel, can happen from mipmap changes too
-        case Key::D:
+        case Key::D: {
+            static int grid = 0;
+            static const int kNumGrids = 7;
+            
+            #define advanceGrid(g, dec) \
+                grid = (grid + kNumGrids + (dec ? -1 : 1)) % kNumGrids
+            
             // TODO: display how many blocks there are
-            if (isShiftKeyDown && _showSettings->blockX > 1) {
-                // if block size is 1, then this shouldn't toggle
-                _showSettings->isBlockGridShown = !_showSettings->isBlockGridShown;
-                _showSettings->isPixelGridShown = false;
-                sprintf(text, "Block Grid %dx%d %s",
-                        _showSettings->blockX, _showSettings->blockY,
-                        _showSettings->isBlockGridShown ? "On" : "Off");
+                
+            // if block size is 1, then this shouldn't toggle
+            _showSettings->isBlockGridShown = false;
+            _showSettings->isAtlasGridShown = false;
+            _showSettings->isPixelGridShown = false;
+
+            advanceGrid(grid, isShiftKeyDown);
+
+            if (grid == 2 && _showSettings->blockX == 1) {
+                // skip it
+                advanceGrid(grid, isShiftKeyDown);
+            }
+
+            static const uint32_t gridSizes[kNumGrids] = {
+                0, 1, 2,
+                32, 64, 128, 256 // atlas sizes
+            };
+
+            if (grid == 0) {
+                sprintf(text, "Grid Off");
+            }
+            else if (grid == 1) {
+                _showSettings->isPixelGridShown = true;
+
+                sprintf(text, "Pixel Grid 1x1 On");
+            }
+            else if (grid == 2) {
+                _showSettings->isBlockGridShown = true;
+
+                sprintf(text, "Block Grid %dx%d On",
+                        _showSettings->blockX, _showSettings->blockY);
             }
             else {
-               
-                _showSettings->isPixelGridShown = !_showSettings->isPixelGridShown;
-                _showSettings->isBlockGridShown = false;
-                text = "Pixel Grid ";
-                text += _showSettings->isPixelGridShown ? "On" : "Off";
+                _showSettings->isAtlasGridShown = true;
+
+                _showSettings->gridSize = gridSizes[grid];
+
+                sprintf(text, "Atlas Grid %dx%d On",
+                        _showSettings->gridSize, _showSettings->gridSize);
             }
-        
+            
             isChanged = true;
             
             break;
-        
+        }
         case Key::S:
             // TODO: have drawAllMips, drawAllLevels, drawAllLevelsAndMips
             _showSettings->isShowingAllLevelsAndMips = !_showSettings->isShowingAllLevelsAndMips;
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index 87915903..b1f4ade0 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -131,10 +131,10 @@ bool Image::loadImageFromKTX(const KTXImage& image)
         case MyMTLPixelFormatR8Unorm:
         case MyMTLPixelFormatRG8Unorm:
 #if SUPPORT_RGB
-        case MyMTLPixelFormatRGB8Unorm_sRGB_internal: // TODO: not handling srgba yet
+        case MyMTLPixelFormatRGB8Unorm_sRGB_internal:
         case MyMTLPixelFormatRGB8Unorm_internal:
 #endif
-        case MyMTLPixelFormatRGBA8Unorm_sRGB: // TODO: not handling srgba yet
+        case MyMTLPixelFormatRGBA8Unorm_sRGB:
         case MyMTLPixelFormatRGBA8Unorm:
         {
             const uint8_t* srcPixels =
@@ -144,6 +144,7 @@ bool Image::loadImageFromKTX(const KTXImage& image)
             int32_t numDstChannels = 4;
 
             // Note: clearing unspecified channels to 0000, not 0001
+            // can set swizzleText when encoding
             _pixels.resize(4 * _width * _height);
             if (numSrcChannels != 4) {
                 memset(_pixels.data(), 0, _pixels.size());
@@ -185,6 +186,7 @@ bool Image::loadImageFromKTX(const KTXImage& image)
             int32_t numDstChannels = 4;
 
             // Note: clearing unspecified channels to 0000, not 0001
+            // can set swizzleText when encoding
             _pixelsFloat.resize(_width * _height);
             if (numSrcChannels != 4) {
                 memset(_pixelsFloat.data(), 0,
@@ -240,6 +242,7 @@ bool Image::loadImageFromKTX(const KTXImage& image)
             int32_t numDstChannels = 4;
 
             // Note: clearing unspecified channels to 0000, not 0001
+            // can set swizzleText when encoding
             _pixelsFloat.resize(_width * _height);
             if (numSrcChannels != 4) {
                 memset(_pixelsFloat.data(), 0,

From c18deb1ac1f1d652afed6082cfed8e6c1204c75f Mon Sep 17 00:00:00 2001
From: Alec Miller <alecazam@users.noreply.github.com>
Date: Sun, 9 May 2021 00:15:59 -0700
Subject: [PATCH 036/901] Update README.md

---
 README.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 29a3d94d..7e932538 100644
--- a/README.md
+++ b/README.md
@@ -31,15 +31,15 @@ Compute shaders are used to display a single pixel sample from the gpu texture.
 In non-preview mode, point sampling in a pixel shader is used to show exact pixel values of a single mip, array, and face.  Debug modes provide pixel analysis.  KramLoader shows synchronous cpu upload to a private Metal texture, but does not yet supply the underlying KTXImage.  Pinch zoom and panning tries to keep the image from onscreen, and zoom is to the cursor so navigating feels intuitive.
 
 ```
-Formats - R/RG/RGBA 8/16F/32F, BC/ETC2/ASTC
+Formats - R/RG/RGBA 8/16F/32F, BC/ETC2/ASTC,  RGB has limited import support
 Container Types - KTX, KTX2, PNG
-Content Types - Albedo, Normal, SDF
-Debug modes - transparent, color, gray, +x, +y
+Content Types - Albedo, Normal, SDF, Height
+Debug modes - transparent, color, gray, +x, +y, xy >= 1
 Texture Types - 1darray (no mips), 2d, 2darray, 3d (no mips), cube, cube array
 
 / - show keyboard shortcuts
 O - toggle preview, disables debug mode, shows lit normals, and mips and filtering are enabled
-⇧D - toggle pixel grid, must be zoomed-in to see it (block grid with ⇧)
+⇧D - toggle through none, pixel grid, block grid, atlas grid (32, 64, 128, 256), must be zoomed-in to see pixel grid
 ⇧E - advance debug mode, this is texture content specific (reverse dir with ⇧)
 H - toggle hud
 I - show texture info in overlay

From d43d8c676d7a7a99d4c0c1c88cb2387a02d09061 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 9 May 2021 00:33:01 -0700
Subject: [PATCH 037/901] kramv - finish normal handling shader

---
 kramv/KramShaders.metal | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index de4066d8..d1eb084b 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -201,22 +201,22 @@ float4 recip(float4 c)
 float3 toNormal(float3 n)
 {
     // make sure the normal doesn't exceed the unit circle
-    // many reconstructs skip and get a non-unit or z=0 normal
-    // might make optional or flag pixel with a debug mode that exeed
+    // many reconstructs skip and get a non-unit or n.z=0
+    // this can all be done with half math too
+    
     float len2 = length_squared(n.xy);
     const float maxLen2 = 0.999 * 0.999;
     
-    if (len2 > maxLen2)
-    {
-        len2 *= 1.001; // so we have a non-zero z component below
+    if (len2 <= maxLen2) {
+        // textures should be corrected to always take this path
+        n.z = sqrt(1 - len2);
+    }
+    else {
+        len2 *= 1.001*1.001;  // need n.xy = approx 0.999 length
         n.xy *= rsqrt(len2);
-        len2 = maxLen2;
+        n.z = 0.0447108; // sqrt(1-maxLen2)
     }
-    //len2 = min(0.999, len2);
     
-    // make sure always have non-zero z, or get Nan after it knocks out N of TBN
-    // since that's often pointing purely in 001 direction.
-    n.z = sqrt(1 - len2);
     return n;
 }
 

From 8f676de49b8303cb657554d7404042e0217cce9f Mon Sep 17 00:00:00 2001
From: Alec Miller <alecazam@users.noreply.github.com>
Date: Sun, 9 May 2021 00:42:32 -0700
Subject: [PATCH 038/901] Update README.md

---
 README.md | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 7e932538..91e4ab4e 100644
--- a/README.md
+++ b/README.md
@@ -37,10 +37,12 @@ Content Types - Albedo, Normal, SDF, Height
 Debug modes - transparent, color, gray, +x, +y, xy >= 1
 Texture Types - 1darray (no mips), 2d, 2darray, 3d (no mips), cube, cube array
 
+⇧ decrement any advance listed below
+
 / - show keyboard shortcuts
 O - toggle preview, disables debug mode, shows lit normals, and mips and filtering are enabled
 ⇧D - toggle through none, pixel grid, block grid, atlas grid (32, 64, 128, 256), must be zoomed-in to see pixel grid
-⇧E - advance debug mode, this is texture content specific (reverse dir with ⇧)
+⇧E - advance debug mode, this is texture content specific 
 H - toggle hud
 I - show texture info in overlay
 W - toggle repeat filter, scales uv from [0,1] to [0,2]
@@ -53,7 +55,7 @@ N - toggle signed/unsigned
 ⇧0 - refit the current mip image to 1x, or fit view.  (at 1x with ⇧).
 ⇧L - reload from disk if changed, zoom to fit (at 1x with ⇧)
 
-⇧Y advance array (reverse dir with ⇧)
+⇧Y advance array 
 ⇧F advance face
 ⇧M advance mip
 
@@ -68,10 +70,10 @@ Texture processing is complex and there be dragons.  Just be aware of some of th
 GPU - none of the encoders use the GPU, so cpu threading and multi-process is used
 
 Rescale Filtering - 1x1 point filter
-Mip filtering - 2x2 box filter that's reasonable for pow2, but not ideal for non-pow2 mips, 
+Mip filtering - 2x2 box filter that's reasonable for pow2, and a non-linear filters for non-pow2 so there is no pixel shift 
   done in linear space using half4 storage, in-place to save mem
 
-1D array - no mip support due to hardware
+1D array - no mip support due to hardware, no encoding
 3D textures - no mip support, uses ASTC 2d slice encode used by Metal/Android, not exotic ASTC 3d format
     
 BC/ETC2/ASTC - supposedly WebGL requires pow2, and some implementation need top multiple of 4 for BC/ETC2

From 45677c0875be3ed6dcdb7e78559b74afd6cda54f Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 9 May 2021 11:25:03 -0700
Subject: [PATCH 039/901] kramv - improve shader, start moving towards more
 general shapes

Add basis transform calls, but not yet generating them on model.
Add half version of toNormal(), and simplify so it works with half.
---
 kramv/KramShaders.metal | 380 ++++++++++++++++++++++++++++++----------
 1 file changed, 285 insertions(+), 95 deletions(-)

diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index d1eb084b..5c3a1116 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -6,6 +6,283 @@
 
 using namespace metal;
 
+//---------------------------------
+// helpers
+
+float toUnorm8(float c)
+{
+    return (127.0 / 255.0) * c + (128.0 / 255.0);
+}
+float2 toUnorm8(float2 c)
+{
+    return (127.0 / 255.0) * c + (128.0 / 255.0);
+}
+float3 toUnorm8(float3 c)
+{
+    return (127.0 / 255.0) * c + (128.0 / 255.0);
+}
+float4 toUnorm8(float4 c)
+{
+    return (127.0 / 255.0) * c + (128.0 / 255.0);
+}
+
+float toUnorm(float c)
+{
+    return 0.5 * c + 0.5;
+}
+float2 toUnorm(float2 c)
+{
+    return 0.5 * c + 0.5;
+}
+float3 toUnorm(float3 c)
+{
+    return 0.5 * c + 0.5;
+}
+float4 toUnorm(float4 c)
+{
+    return 0.5 * c + 0.5;
+}
+
+float toSnorm8(float c)
+{
+    return (255.0 / 127.0) * c - (128.0 / 127.0);
+}
+float2 toSnorm8(float2 c)
+{
+    return (255.0 / 127.0) * c - (128.0 / 127.0);
+}
+float3 toSnorm8(float3 c)
+{
+    return (255.0 / 127.0) * c - (128.0 / 127.0);
+}
+float4 toSnorm8(float4 c)
+{
+    return (255.0 / 127.0) * c - (128.0 / 127.0);
+}
+
+half2 toSnorm8(half2 c)
+{
+    return (255.0h / 127.0h) * c - (128.0h / 127.0h);
+}
+
+float toSnorm(float c)
+{
+    return 2.0 * c - 1.0;
+}
+float2 toSnorm(float2 c)
+{
+    return 2.0 * c - 1.0;
+}
+float3 toSnorm(float3 c)
+{
+    return 2.0 * c - 1.0;
+}
+float4 toSnorm(float4 c)
+{
+    return 2.0 * c - 1.0;
+}
+
+float recip(float c)
+{
+    return 1.0 / c;
+}
+float2 recip(float2 c)
+{
+    return 1.0 / c;
+}
+float3 recip(float3 c)
+{
+    return 1.0 / c;
+}
+float4 recip(float4 c)
+{
+    return 1.0 / c;
+}
+
+half toHalf(float c)
+{
+    return half(c);
+}
+half2 toHalf(float2 c)
+{
+    return half2(c);
+}
+half3 toHalf(float3 c)
+{
+    return half3(c);
+}
+half4 toHalf(float4 c)
+{
+    return half4(c);
+}
+
+float toFloat(half c)
+{
+    return float(c);
+}
+float2 toFloat(half2 c)
+{
+    return float2(c);
+}
+float3 toFloat(half3 c)
+{
+    return float3(c);
+}
+float4 toFloat(half4 c)
+{
+    return float4(c);
+}
+
+//-------------------------------------------
+// functions
+
+// reconstruct normal from xy, n.z ignored
+float3 toNormal(float3 n)
+{
+    // make sure the normal doesn't exceed the unit circle
+    // many reconstructs skip and get a non-unit or n.z=0
+    // this can all be done with half math too
+    
+    float len2 = length_squared(n.xy);
+    const float maxLen2 = 0.999 * 0.999;
+    
+    if (len2 <= maxLen2) {
+        // textures should be corrected to always take this path
+        n.z = sqrt(1.0 - len2);
+    }
+    else {
+        n.xy *= 0.999 * rsqrt(len2);
+        n.z = 0.0447108; // sqrt(1-maxLen2)
+    }
+    
+    return n;
+}
+
+// reconstruct normal from xy, n.z ignored
+half3 toNormal(half3 n)
+{
+    // make sure the normal doesn't exceed the unit circle
+    // many reconstructs skip and get a non-unit or n.z=0
+    // this can all be done with half math too
+    
+    half len2 = length_squared(n.xy);
+    const half maxLen2 = 0.999h * 0.999h;
+    
+    if (len2 <= maxLen2) {
+        // textures should be corrected to always take this path
+        n.z = sqrt(1.0h - len2);
+    }
+    else {
+        n.xy *= 0.999h * rsqrt(len2);
+        n.z = 0.0447108h; // sqrt(1-maxLen2)
+    }
+    
+    return n;
+}
+
+// use mikktspace, gen bitan in frag shader with sign, don't normalize vb/vt
+// see http://www.mikktspace.com/
+half3 transformNormal(half4 tangent, half3 vertexNormal,
+                      texture2d<half> texture, sampler s, float2 uv, bool isSigned = true)
+{
+    // Normalize tangent/vertexNormal in vertex shader
+    // but don't renormalize interpolated tangent, vertexNormal in fragment shader
+    // Reconstruct bitan in frag shader
+    // https://bgolus.medium.com/generating-perfect-normal-maps-for-unity-f929e673fc57
+
+    half4 nmap = texture.sample(s, uv);
+    if (!isSigned) {
+        nmap.xy = toSnorm8(nmap.xy);
+    }
+    half3 normal = toNormal(nmap.xyz);
+    
+    // now transform by basis and normalize from any shearing, and since interpolated basis vectors
+    // are not normalized
+    half3x3 tbn = half3x3(tangent.xyz, tangent.w * cross(vertexNormal, tangent.xyz), vertexNormal);
+    normal = tbn * normal;
+    return normalize(normal);
+}
+
+// TODO: have more bones, or read from texture instead of uniforms
+// can then do instanced skining, but vfetch lookup slower
+#define maxBones 128
+
+// this is for vertex shader
+void skinPosAndBasis(thread float4& position, thread float3& tangent, thread float3& normal,
+                     uint4 indices, float4 weights, float3x4 bones[maxBones])
+{
+    // TODO: might do this as up to 12x vtex lookup, fetch from buffer texture
+    // but uniforms after setup would be faster if many bones
+    float3x4 bindPoseToBoneTransform = bones[indices.x];
+    
+    if (weights[0] != 1.0)
+    {
+        // weight the bone transforms
+        bindPoseToBoneTransform *= weights[0];
+        
+        // with RGB10A2U have 2 bits in weights.w to store the boneCount
+        // or could count non-zero weights, make sure to set w > 0 if 4 bones
+        // the latter is more compatible with more conent
+        
+        //int numBones = 1 + int(weights.w * 3.0);
+        
+        int numBones = int(dot(float4(weights > 0.0), float4(1)));
+        
+        // reconstruct so can store weights in RGB10A2U
+        if (numBones == 4)
+            weights.w = 1 - saturate(dot(weights.xyz, float3(1.0)));
+        
+        for (int i = 1; i < numBones; ++i)
+        {
+            bindPoseToBoneTransform += bones[indices[i]] * weights[i];
+        }
+    }
+    
+    // 3x4 is a transpose of 4x4 transform
+    position.xyz = position * bindPoseToBoneTransform;
+    
+    // not dealing with non-uniform scale correction
+    // see scale2 handling in transformBasis, a little different with transpose of 3x4
+    
+    tangent = (float4(tangent, 0.0) * bindPoseToBoneTransform);
+    normal  = (float4(normal, 0.0)  * bindPoseToBoneTransform);
+}
+
+// this is for vertex shader
+void transformBasis(thread float3& tangent, thread float3& normal,
+                    float4x4 modelToWorldTfm, bool isScaled = false)
+{
+    tangent = (modelToWorldTfm * float4(tangent, 0.0)).xyz;
+    normal  = (modelToWorldTfm * float4(normal, 0.0)).xyz;
+    
+    // have to apply invSquare of scale here to approximate invT
+    // also make sure to identify inversion off determinant before instancing so that backfacing is correct
+    // this is only needed if non-uniform scale present in modelToWorldTfm, could precompute
+    if (isScaled)
+    {
+        // compute scale squared from rows
+        float3 scale2 = float3(
+            length_squared(modelToWorldTfm[0].xyz),
+            length_squared(modelToWorldTfm[1].xyz),
+            length_squared(modelToWorldTfm[2].xyz));
+        
+        // do a max(1e4), but really don't have scale be super small
+        scale2 = max(0.0001 * 0.0001, scale2);
+        
+        // apply inverse
+        tangent /= scale2;
+        normal  /= scale2;
+    }
+    
+    // vertex shader normalize, but the fragment shader should not
+    tangent = normalize(tangent);
+    normal  = normalize(normal);
+    
+    // make sure to preserve bitan sign in tangent.w
+}
+
+//-------------------------------------------
+
 struct Vertex
 {
     float4 position [[attribute(VertexAttributePosition)]];
@@ -41,7 +318,9 @@ ColorInOut DrawImageFunc(
     // this is a 2d coord always which is 0 to 1, or 0 to 2
     out.texCoord.xy = in.texCoord;
     if (uniforms.isWrap) {
-        out.texCoord.xy *= 2.0; // can make this a repeat value uniform
+        // can make this a repeat value uniform
+        float wrapAmount = 2.0;
+        out.texCoord.xy *= wrapAmount;
     }
    
     // potentially 3d coord, and may be -1 to 1
@@ -71,7 +350,7 @@ vertex ColorInOut DrawCubeVS(
     
     // convert to -1 to 1
     float3 uvw = out.texCoordXYZ;
-    uvw.xy = uvw.xy * 2.0 - 1.0;
+    uvw.xy = toSnorm(uvw.xy);
     uvw.z = 1.0;
     
     // switch to the face
@@ -129,100 +408,9 @@ vertex ColorInOut DrawVolumeVS(
     return out;
 }
 
-float toUnorm8(float c)
-{
-    return (127.0 / 255.0) * c + (128 / 255.0);
-}
-float2 toUnorm8(float2 c)
-{
-    return (127.0 / 255.0) * c + (128 / 255.0);
-}
-float3 toUnorm8(float3 c)
-{
-    return (127.0 / 255.0) * c + (128 / 255.0);
-}
-
-float toUnorm(float c)
-{
-    return 0.5 * c + 0.5;
-}
-float2 toUnorm(float2 c)
-{
-    return 0.5 * c + 0.5;
-}
-float3 toUnorm(float3 c)
-{
-    return 0.5 * c + 0.5;
-}
-
-float toSnorm8(float c)
-{
-    return (255.0 / 127.0) * c - (128 / 127.0);
-}
-
-float2 toSnorm8(float2 c)
-{
-    return (255.0 / 127.0) * c - (128 / 127.0);
-}
-
-float3 toSnorm8(float3 c)
-{
-    return (255.0 / 127.0) * c - (128 / 127.0);
-}
-float4 toSnorm8(float4 c)
-{
-    return (255.0 / 127.0) * c - (128 / 127.0);
-}
-
-float2 toSnorm(float2 c)
-{
-    return 2 * c - 1.0;
-}
-
-float recip(float c)
-{
-    return 1.0 / c;
-}
-float2 recip(float2 c)
-{
-    return 1.0 / c;
-}
-float3 recip(float3 c)
-{
-    return 1.0 / c;
-}
-float4 recip(float4 c)
-{
-    return 1.0 / c;
-}
-
-
-// scale and reconstruct normal
-float3 toNormal(float3 n)
-{
-    // make sure the normal doesn't exceed the unit circle
-    // many reconstructs skip and get a non-unit or n.z=0
-    // this can all be done with half math too
-    
-    float len2 = length_squared(n.xy);
-    const float maxLen2 = 0.999 * 0.999;
-    
-    if (len2 <= maxLen2) {
-        // textures should be corrected to always take this path
-        n.z = sqrt(1 - len2);
-    }
-    else {
-        len2 *= 1.001*1.001;  // need n.xy = approx 0.999 length
-        n.xy *= rsqrt(len2);
-        n.z = 0.0447108; // sqrt(1-maxLen2)
-    }
-    
-    return n;
-}
 
 // TODO: do more test shapes, but that affects eyedropper
-// use mikktspace, gen bitan in frag shader with sign, don't normalize vb/vt
-// see http://www.mikktspace.com/
+// generate and pass down tangents + bitanSign in the geometry
 
 // TODO: eliminate the toUnorm() calls below, rendering to rgba16f
 // but also need to remove conversion code on cpu side expecting unorm in eyedropper
@@ -455,7 +643,9 @@ float4 DrawPixels(
             // flag pixels that would throw off normal reconstruct sqrt(1-dot(n.xy,n.xy))
             // see code above in shader that helps keep that from z = 0
             float len2 = length_squared(toSnorm(c.rg));
-            if (len2 > (0.999 * 0.999)) {
+            const float maxLen2 = 0.999 * 0.999;
+            
+            if (len2 > maxLen2) {
                 isHighlighted = true;
             }
         }

From 647af3d2500d1638bdf9c6fa34b1f6b66e7fa8ad Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 12 May 2021 22:54:06 -0700
Subject: [PATCH 040/901] kram - new blit loader, start of ktx2 save, srgb test

also fix formatSources script for clang_format.

blit encoder started to stage textures and upload those to gpu side.  Needed to land this for rgb support.

ktx2 needs ztd encode path, and dfd structures to write out, but fleshed out the basics in KramImage.  It's commented out.
  also still need to honor .ktx2 as an output extension.
---
 kramv/KramLoader.h         |  10 ++
 kramv/KramLoader.mm        | 298 ++++++++++++++++++++++++++++++++++++-
 kramv/KramRenderer.mm      |   4 +-
 kramv/KramShaders.metal    |  44 ++++--
 kramv/KramViewerBase.h     |   5 +-
 kramv/KramViewerMain.mm    |   9 +-
 libkram/kram/KTXImage.cpp  |   8 -
 libkram/kram/KTXImage.h    |  27 +++-
 libkram/kram/Kram.cpp      |  68 +++++++++
 libkram/kram/KramImage.cpp | 138 +++++++++++++++++
 scripts/formatSources.sh   |   2 +-
 11 files changed, 577 insertions(+), 36 deletions(-)

diff --git a/kramv/KramLoader.h b/kramv/KramLoader.h
index 9ed5da22..2b14b16f 100644
--- a/kramv/KramLoader.h
+++ b/kramv/KramLoader.h
@@ -40,6 +40,16 @@
 
 //-------------------------------------
 
+// This loads KTX and PNG data synchronously.  Will likely move to only loading KTX files, with a png -> ktx conversion.
+// The underlying KTXImage is not yet returned to the caller, but would be useful for prop queries.
+@interface KramBlitLoader : NSObject
+
+@property (retain, nonatomic, readwrite, nonnull) id<MTLDevice> device;
+
+@end
+
+//-------------------------------------
+
 // for toLower
 #include <string>
 
diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index 3ef9c0cb..97cac1d5 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -14,6 +14,7 @@
 
 #include <vector>
 #include <algorithm> // for max
+#include <mm_malloc.h>
 
 #include "Kram.h"
 #include "KramLog.h"
@@ -75,15 +76,86 @@ - (BOOL)decodeImageIfNeeded:(KTXImage&)image imageDecoded:(KTXImage&)imageDecode
     
     return YES;
 }
-    
+
+#if SUPPORT_RGB
+inline bool isInternalRGBFormat(MyMTLPixelFormat format) {
+    bool isInternal = false;
+    switch(format) {
+        case MyMTLPixelFormatRGB8Unorm_internal:
+        case MyMTLPixelFormatRGB8Unorm_sRGB_internal:
+        case MyMTLPixelFormatRGB16Float_internal:
+        case MyMTLPixelFormatRGB32Float_internal:
+            isInternal = true;
+            break;
+        default:
+            break;
+    }
+    return isInternal;
+}
+
+inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) {
+    MyMTLPixelFormat remapFormat = MyMTLPixelFormatInvalid;
+    switch(format) {
+        case MyMTLPixelFormatRGB8Unorm_internal:
+            remapFormat = MyMTLPixelFormatRGBA8Unorm;
+            break;
+        case MyMTLPixelFormatRGB8Unorm_sRGB_internal:
+            remapFormat = MyMTLPixelFormatRGBA8Unorm_sRGB;
+            break;
+        case MyMTLPixelFormatRGB16Float_internal:
+            remapFormat = MyMTLPixelFormatRGBA32Float;
+            break;
+        case MyMTLPixelFormatRGB32Float_internal:
+            remapFormat = MyMTLPixelFormatRGBA32Float;
+            break;
+        default:
+            break;
+    }
+    return remapFormat;
+}
+
+#endif
 
 - (nullable id<MTLTexture>)loadTextureFromData:(nonnull const uint8_t *)imageData imageDataLength:(int32_t)imageDataLength originalFormat:(nullable MTLPixelFormat*)originalFormat
 {
     KTXImage image;
+    
     if (!image.open(imageData, imageDataLength)) {
         return nil;
     }
     
+#if SUPPORT_RGB
+    if (isInternalRGBFormat(image.pixelFormat)) {
+        // loads and converts image to RGBA version
+        Image rbgaImage;
+        if (!rbgaImage.loadImageFromKTX(image))
+            return nil;
+        
+        // re-encode it as a KTXImage, even though this is just a copy
+        KTXImage rbgaImage2;
+       
+        ImageInfoArgs dstImageInfoArgs;
+        dstImageInfoArgs.pixelFormat = remapInternalRGBFormat(image.pixelFormat);
+        dstImageInfoArgs.doMipmaps = false;
+        dstImageInfoArgs.textureEncoder = kTexEncoderExplicit;
+        dstImageInfoArgs.swizzleText = "rgb1";
+        
+        ImageInfo dstImageInfo;
+        dstImageInfo.initWithArgs(dstImageInfoArgs);
+       
+        if (!rbgaImage.encode(dstImageInfo, rbgaImage2)) {
+            return nil;
+        }
+        
+        if (originalFormat != nullptr) {
+            *originalFormat = (MTLPixelFormat)rbgaImage2.pixelFormat;
+        }
+        
+        return [self loadTextureFromImage:rbgaImage2];
+    }
+#endif
+   
+    
     if (originalFormat != nullptr) {
         *originalFormat = (MTLPixelFormat)image.pixelFormat;
     }
@@ -236,7 +308,7 @@ - (void)setMipgenNeeded:(BOOL)enabled {
     // and only get box filtering in API-level filters.  But would cut storage.
     textureDescriptor.mipmapLevelCount = MAX(1, image.header.numberOfMipmapLevels);
 
-    // only do this for viewer, this disables lossless compression
+    // only do this for viewer
     // but allows encoded textures to enable/disable their sRGB state.
     // Since the view isn't accurate, will probably pull this out.
     // Keep usageRead set by default.
@@ -405,6 +477,228 @@ - (void)setMipgenNeeded:(BOOL)enabled {
 
 @end
 
+//--------------------------
+
+
+
+
+@implementation KramBlitLoader {
+    // this must be created in render, and then do blits into this
+    id<MTLBlitCommandEncoder> _blitEncoder;
+    id<MTLBuffer> _buffer;
+    uint8_t* data;
+    size_t dataSize;
+}
+
+- (nonnull instancetype)init {
+    self = [super init];
+    
+    // must be aligned to pagesize() or can't use with newBufferWithBytesNoCopy
+    dataSize = 16*1024*1024;
+    posix_memalign((void**)&data, getpagesize(), dataSize);
+    
+    // allocate memory for circular staging buffer, only need to memcpy to this
+    // but need a rolling buffer atop to track current begin/end.
+    
+    _buffer = [_device newBufferWithBytesNoCopy:data
+                                            length:dataSize
+                                           options:MTLResourceStorageModeShared
+                                       deallocator: ^(void *macroUnusedArg(pointer), NSUInteger macroUnusedArg(length)) {
+                                            delete data;
+                                            }
+    ];
+    return self;
+}
+
+- (nullable id<MTLTexture>)createTexture:(KTXImage&)image {
+    MTLTextureDescriptor *textureDescriptor = [[MTLTextureDescriptor alloc] init];
+
+    // Indicate that each pixel has a blue, green, red, and alpha channel, where each channel is
+    // an 8-bit unsigned normalized value (i.e. 0 maps to 0.0 and 255 maps to 1.0)
+    textureDescriptor.textureType = (MTLTextureType)image.textureType;
+    textureDescriptor.pixelFormat = (MTLPixelFormat)image.pixelFormat;
+
+    // Set the pixel dimensions of the texture
+    textureDescriptor.width = image.width;
+    textureDescriptor.height = MAX(1, image.height);
+    textureDescriptor.depth = MAX(1, image.depth);
+
+    textureDescriptor.arrayLength = MAX(1, image.header.numberOfArrayElements);
+
+    // ignoring 0 (auto mip), but might need to support for explicit formats
+    // must have hw filtering support for format, and 32f filtering only first appeared on A14/M1
+    // and only get box filtering in API-level filters.  But would cut storage.
+    textureDescriptor.mipmapLevelCount = MAX(1, image.header.numberOfMipmapLevels);
+
+    // needed for blit,
+    textureDescriptor.storageMode = MTLStorageModePrivate;
+    
+    // only do this for viewer
+    // but allows encoded textures to enable/disable their sRGB state.
+    // Since the view isn't accurate, will probably pull this out.
+    // Keep usageRead set by default.
+    //textureDescriptor.usage = MTLTextureUsageShaderRead;
+    
+    // this was so that could toggle srgb on/off, but mips are built linear and encoded as lin or srgb
+    // in the encoded formats so this wouldn't accurately reflect with/without srgb.
+    //textureDescriptor.usage |= MTLTextureUsagePixelFormatView;
+    
+    // Create the texture from the device by using the descriptor
+    id<MTLTexture> texture = [self.device newTextureWithDescriptor:textureDescriptor];
+    if (!texture) {
+        KLOGE("kramv", "could not allocate texture");
+        return nil;
+    }
+    
+    return texture;
+}
+
+//for (int mipLevelNumber = 0; mipLevelNumber < numMips; ++mipLevelNumber) {
+//
+//    // zstd decompress entire mip level to the staging buffer
+//    zstd
+//}
+//
+//// so first memcpy and entire level(s) into the buffer
+////memcpy(...);
+
+
+// Has a synchronous upload via replaceRegion that only works for shared/managed (f.e. ktx),
+// and another path for private that uses a blitEncoder and must have block aligned data (f.e. ktxa, ktx2).
+// Could repack ktx data into ktxa before writing to temporary file, or when copying NSData into MTLBuffer.
+- (nullable id<MTLTexture>)blitTextureFromImage:(KTXImage &)image
+{
+    id<MTLTexture> texture = [self createTexture:image];
+    
+    //--------------------------------
+    // upload mip levels
+    
+    // TODO: about aligning to 4k for base + length
+    // http://metalkit.org/2017/05/26/working-with-memory-in-metal-part-2.html
+    
+    int32_t w = image.width;
+    int32_t h = image.height;
+    
+    int32_t numMips     = MAX(1, image.header.numberOfMipmapLevels);
+    int32_t numArrays   = MAX(1, image.header.numberOfArrayElements);
+    int32_t numFaces    = MAX(1, image.header.numberOfFaces);
+    int32_t numSlices   = MAX(1, image.depth);
+    
+    Int2 blockDims = image.blockDims();
+    
+    for (int mipLevelNumber = 0; mipLevelNumber < numMips; ++mipLevelNumber) {
+        // there's a 4 byte levelSize for each mipLevel
+        // the mipLevel.offset is immediately after this
+        
+        // this is offset to a given level
+        const KTXImageLevel& mipLevel = image.mipLevels[mipLevelNumber];
+        
+        // only have face, face+array, or slice but this handles all cases
+        for (int array = 0; array < numArrays; ++array) {
+            for (int face = 0; face < numFaces; ++face) {
+                for (int slice = 0; slice < numSlices; ++slice) {
+    
+                    int32_t bytesPerRow = 0;
+                    
+                    // 1D/1DArray textures set bytesPerRow to 0
+                    if ((MTLTextureType)image.textureType != MTLTextureType1D &&
+                        (MTLTextureType)image.textureType != MTLTextureType1DArray)
+                    {
+                        // for compressed, bytesPerRow needs to be multiple of block size
+                        // so divide by the number of blocks making up the height
+                        //int xBlocks = ((w + blockDims.x - 1) / blockDims.x);
+                        int32_t yBlocks = ((h + blockDims.y - 1) / blockDims.y);
+                        
+                        // Calculate the number of bytes per row in the image.
+                        // for compressed images this is xBlocks * blockSize
+                        bytesPerRow = (int32_t)mipLevel.length / yBlocks;
+                    }
+                    
+                    int32_t sliceOrArrayOrFace;
+                                    
+                    if (image.header.numberOfArrayElements > 0) {
+                        // can be 1d, 2d, or cube array
+                        sliceOrArrayOrFace = array;
+                        if (numFaces > 1) {
+                            sliceOrArrayOrFace = 6 * sliceOrArrayOrFace + face;
+                        }
+                    }
+                    else {
+                        // can be 1d, 2d, or 3d
+                        sliceOrArrayOrFace = slice;
+                        if (numFaces > 1) {
+                            sliceOrArrayOrFace = face;
+                        }
+                    }
+                    
+                    // this is size of one face/slice/texture, not the levels size
+                    int32_t mipStorageSize = (int32_t)mipLevel.length;
+                    
+                    int32_t mipOffset = (int32_t)mipLevel.offset + sliceOrArrayOrFace * mipStorageSize;
+                    
+                    int32_t bufferBaseOffset = 0; // TODO: pos offset into the staging buffer
+                    mipOffset += bufferBaseOffset;
+                    
+                    // using buffer to store
+                    // offset into the level
+                    //const uint8_t *srcBytes = image.fileData + mipOffset;
+            
+                    // had blitEncoder support here
+                    
+                    {
+                        // Note: this only works for managed/shared textures.
+                        // For private upload to buffer and then use blitEncoder to copy to texture.
+                        //bool isCubemap = image.textureType == MyMTLTextureTypeCube ||
+                        //                 image.textureType == MyMTLTextureTypeCubeArray;
+                        bool is3D = image.textureType == MyMTLTextureType3D;
+                        //bool is2DArray = image.textureType == MyMTLTextureType2DArray;
+                        //bool is1DArray = image.textureType == MyMTLTextureType1DArray;
+                        
+                        // cpu copy the bytes from the data object into the texture
+                        MTLRegion region = {
+                            { 0, 0, 0 }, // MTLOrigin
+                            { (NSUInteger)w,  (NSUInteger)h, 1 }  // MTLSize
+                        };
+                        
+                        // TODO: revist how loading is done to load entire levels
+                        // otherwise too many replaceRegion calls.   Data is already packed by mip.
+                        
+                        if (is3D) {
+                            region.origin.z = sliceOrArrayOrFace;
+                            sliceOrArrayOrFace = 0;
+                        }
+                        
+                        // TODO: no call on MTLBlitEncoder to copy entire level of mips like glTexImage3D
+                        
+                        [_blitEncoder copyFromBuffer:_buffer
+                                       sourceOffset:mipOffset
+                              sourceBytesPerRow:bytesPerRow
+                            sourceBytesPerImage:mipStorageSize
+                                     sourceSize:region.size
+                         
+                                      toTexture:texture
+                               destinationSlice:sliceOrArrayOrFace
+                               destinationLevel:mipLevelNumber
+                              destinationOrigin:region.origin
+                                        options:MTLBlitOptionNone
+                         ];
+                    }
+                }
+            }
+        }
+        
+        mipDown(w, h);
+    }
+        
+    // this only affect managed textures
+    [_blitEncoder optimizeContentsForGPUAccess:texture];
+    
+    return texture;
+}
+
+
+@end
+
 
 
diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index a98896b0..1f44314e 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -654,8 +654,8 @@ - (void)_updateGameState
         }
     }
     else if (_showSettings->isAtlasGridShown) {
-        uniforms.gridX = _showSettings->gridSize;
-        uniforms.gridY = _showSettings->gridSize;
+        uniforms.gridX = _showSettings->gridSizeX;
+        uniforms.gridY = _showSettings->gridSizeY;
     }
     
     // no debug mode when preview kicks on, make it possible to toggle back and forth more easily
diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index 5c3a1116..54832b83 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -133,6 +133,9 @@ float4 toFloat(half4 c)
     return float4(c);
 }
 
+// TODO: note that Metal must pass the same half3 from vertex to fragment shader
+// so can't mix a float vs with half fs.
+
 //-------------------------------------------
 // functions
 
@@ -213,6 +216,9 @@ void skinPosAndBasis(thread float4& position, thread float3& tangent, thread flo
 {
     // TODO: might do this as up to 12x vtex lookup, fetch from buffer texture
     // but uniforms after setup would be faster if many bones
+    // instances use same bones, but different indices/weights already
+    // but could draw skinned variants with vtex lookup and not have so much upload prep
+    
     float3x4 bindPoseToBoneTransform = bones[indices.x];
     
     if (weights[0] != 1.0)
@@ -226,11 +232,11 @@ void skinPosAndBasis(thread float4& position, thread float3& tangent, thread flo
         
         //int numBones = 1 + int(weights.w * 3.0);
         
-        int numBones = int(dot(float4(weights > 0.0), float4(1)));
+        int numBones = int(dot(float4(weights > 0.0), float4(1.0)));
         
         // reconstruct so can store weights in RGB10A2U
         if (numBones == 4)
-            weights.w = 1 - saturate(dot(weights.xyz, float3(1.0)));
+            weights.w = 1.0 - saturate(dot(weights.xyz, float3(1.0)));
         
         for (int i = 1; i < numBones; ++i)
         {
@@ -248,30 +254,46 @@ void skinPosAndBasis(thread float4& position, thread float3& tangent, thread flo
     normal  = (float4(normal, 0.0)  * bindPoseToBoneTransform);
 }
 
+float3x3 toFloat3x3(float4x4 m)
+{
+    return float3x3(m[0].xyz, m[1].xyz, m[2].xyz);
+}
+
 // this is for vertex shader
 void transformBasis(thread float3& tangent, thread float3& normal,
                     float4x4 modelToWorldTfm, bool isScaled = false)
 {
-    tangent = (modelToWorldTfm * float4(tangent, 0.0)).xyz;
-    normal  = (modelToWorldTfm * float4(normal, 0.0)).xyz;
     
+    float3x3 m = toFloat3x3(modelToWorldTfm);
+    
+    // question here of whether tangent is transformed by m or mInvT
+    // most apps assume m, but after averaging it can be just as off the surface as the normal
+    bool useInverseOnTangent = true;
+    if (useInverseOnTangent)
+        tangent = tangent * m;
+    else
+        tangent = m * tangent;
+    
+    // note this is n * R = Rt * n, for simple affine transforms Rinv = Rt, invScale then handled below
+    normal = normal * m;
+       
     // have to apply invSquare of scale here to approximate invT
     // also make sure to identify inversion off determinant before instancing so that backfacing is correct
-    // this is only needed if non-uniform scale present in modelToWorldTfm, could precompute
+    // this is only needed if non-uniform scale present in modelToWorldTfm, could precompute scale2
     if (isScaled)
     {
         // compute scale squared from rows
         float3 scale2 = float3(
-            length_squared(modelToWorldTfm[0].xyz),
-            length_squared(modelToWorldTfm[1].xyz),
-            length_squared(modelToWorldTfm[2].xyz));
+            length_squared(m[0].xyz),
+            length_squared(m[1].xyz),
+            length_squared(m[2].xyz));
         
         // do a max(1e4), but really don't have scale be super small
-        scale2 = max(0.0001 * 0.0001, scale2);
+        scale2 = recip(max(0.0001 * 0.0001, scale2));
         
         // apply inverse
-        tangent /= scale2;
-        normal  /= scale2;
+        tangent *= scale2;
+        normal  *= scale2;
     }
     
     // vertex shader normalize, but the fragment shader should not
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index 4c82328b..3f706a06 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -112,8 +112,9 @@ class ShowSettings {
     int32_t blockX = 1;
     int32_t blockY = 1;
     
-    // set when isGridShow is true
-    int32_t gridSize = 1;
+    // set when isGridShown is true
+    int32_t gridSizeX = 1;
+    int32_t gridSizeY = 1;
     
     // for eyedropper, lookup this pixel value, and return it to CPU
     int32_t textureLookupX = 0;
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 2ccf6801..9f4fcdb6 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -1275,10 +1275,13 @@ - (void)keyDown:(NSEvent *)theEvent
             else {
                 _showSettings->isAtlasGridShown = true;
 
-                _showSettings->gridSize = gridSizes[grid];
-
+                // want to be able to show altases tht have long entries derived from props
+                // but right now just a square grid atlas
+                _showSettings->gridSizeX =
+                _showSettings->gridSizeY = gridSizes[grid];
+                
                 sprintf(text, "Atlas Grid %dx%d On",
-                        _showSettings->gridSize, _showSettings->gridSize);
+                        _showSettings->gridSizeX, _showSettings->gridSizeY);
             }
             
             isChanged = true;
diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index 868c77af..c9f37333 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -1240,14 +1240,6 @@ const char* textureTypeName(MyMTLTextureType textureType)
 }
 
 
-// This is one entire level of mipLevels.
-class KTX2ImageLevel {
-public:
-    uint64_t offset; // numChunks * length
-    uint64_t lengthCompressed; // can only be read in, can't compute this, but can compute upper bound from zstd
-    uint64_t length; // size of a single mip
-};
-
 
 
 //// Data Format Descriptor
diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h
index bf377602..a5787056 100644
--- a/libkram/kram/KTXImage.h
+++ b/libkram/kram/KTXImage.h
@@ -179,19 +179,26 @@ class KTXHeader {
     MyMTLPixelFormat metalFormat() const;
 };
 
+// This is one entire level of mipLevels.
+// In KTX, the image levels are assumed from format and size since no compression applied.
+class KTXImageLevel {
+public:
+    uint64_t offset; // numChunks * length
+    uint64_t length; // size of a single mip
+};
+
 //---------------------------------------------
 
 // Mips are reversed from KTX1 (mips are smallest first for streaming),
 // and this stores an array of supercompressed levels, and has dfds.
 class KTX2Header {
 public:
-
-    uint8_t identifier[kKTXIdentifierSize] = { // same is kKTX2Identifier
+     uint8_t identifier[kKTXIdentifierSize] = { // same is kKTX2Identifier
         0xAB, 0x4B, 0x54, 0x58, 0x20, 0x32, 0x30, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A
         // '«', 'K', 'T', 'X', ' ', '2', '0', '»', '\r', '\n', '\x1A', '\n'
     };
 
-    uint32_t vkFormat = 0; // invalid
+    uint32_t vkFormat = 0; // invalid format
     uint32_t typeSize = 1;
     
     uint32_t pixelWidth = 1;
@@ -218,18 +225,24 @@ class KTX2Header {
     uint64_t sgdByteLength = 0;
 
     // chunks hold levelCount of all mips of the same size
-    // KTX2ImageChunk* chunks; // [levelCount]
+    // KTX2ImageLevel* chunks; // [levelCount]
 };
 
-//---------------------------------------------
-
+// Unlike KTX, KTX2 writes an array of level sizes since compression may e involved.
+// These correspond to an entire compressed array of chunks.
+// So often an entire level mus be decompressed before a chunk can be accessed.
 // This is one entire level of mipLevels.
-class KTXImageLevel {
+class KTX2ImageLevel {
 public:
     uint64_t offset; // numChunks * length
+    uint64_t lengthCompressed; // can only be read in, can't compute this, but can compute upper bound from zstd
     uint64_t length; // size of a single mip
 };
 
+//---------------------------------------------
+
+
+
 // Since can't add anything to KTXHeader without throwing off KTXHeader size,
 // this holds any mutable data for reading/writing KTX images.
 class KTXImage {
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index b3a73281..5cf1d8a2 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -2303,8 +2303,76 @@ CommandType parseCommandType(const char* command)
     return commandType;
 }
 
+void PSTest() {
+    static bool doTest = false;
+    if (doTest) {
+        return;
+    }
+    
+    // So it looks like Photoshop is doing srgb * alpha right away on PNG import. This results in dimmer colors
+    // when they are read on the GPU, since then the gpu does srgb to linear conversion.  values2
+    // is that case below.  Also note that the Photoshop color picker shows only srgb intensities, not the linear.
+    // color value.  This lets it line up with screen color pickers like Apple DCM.  Apple Preview also shows
+    // images with the same dim colors, so it's replicating what Photoshop does.
+    //
+    // Gimp and kramv do what is in values3 resulting in brighter intensities. One question with formats like
+    // astc that interpolate the endpoints in srgb space off the selectors is how to encode colors.
+    // Almost makes sense to drop srgb when premul alpha is involved and store linear color instead.
+    // Figma follows that convention.
+
+    // Here's kramv's srgb flow:
+    // PNG unmul alpha -> srbToLinear(rgb) * alpha -> build mips in linear -> linearToSrgb(lin.rgb)
+    //   -> encode endpoints/colors -> BC/ASTC/ETC2
+    //
+    // Here's Photoshop I think:
+    // PNG unmul alpha -> srgbToLinear(rgb * alpha) -> linarToSrgb( c ) -> toUnmul( c/alpha ) -> Png
+    
+    
+    Mipper mipper;
+
+    // 1. srgb 8-bit values
+    uint8_t alpha = 200;
+    float alphaF = mipper.toAlphaFloat(alpha);
+
+    uint8_t values1[256];
+    uint8_t values2[256];
+    uint8_t values3[256];
+
+    for (int32_t i = 0; i < 256; ++i) {
+        // premul and then snap back to store
+        values1[i] = ((uint32_t)i * (uint32_t)alpha) / 255;
+    }
+
+    // now convert those values to linear color (float)
+    for (int32_t i = 0; i < 256; ++i) {
+        float value = mipper.toLinear(values1[i]);
+        
+        values2[i] = uint8_t(value * 255.1);
+        
+        //KLOGI("srgb", "[%d] = %g\n", i, value);
+    }
+
+    // convert srgb to linear and then do premul
+    for (int32_t i = 0; i < 256; ++i) {
+        float value = mipper.toLinear(i);
+        value *= alphaF;
+        
+        values3[i] = uint8_t(value * 255.1);
+    }
+
+    // log them side-by-side for comparison
+    KLOGI("srgb", "premul by %0.3f", 200.0/255.0);
+    for (int32_t i = 0; i < 256; ++i) {
+        KLOGI("srgb", "[%d] = %u, %u, %u",
+              i, values1[i], values2[i], values3[i]);
+    }
+}
+
+
 int32_t kramAppCommand(vector<const char*>& args)
 {
+    PSTest();
+    
     // make sure next arg is a valid command type
     CommandType commandType = kCommandTypeUnknown;
     if (args.size() >= 1) {
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index b1f4ade0..c8ce1b0b 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -1223,6 +1223,144 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const
 
     Mipper mipper;
     SDFMipper sdfMipper;
+#if 0
+    // TODO: can go out to KTX2 here instead
+    // It has two different blocks, supercompression for BasisLZ
+    // and a DFD block which details the block content.
+    // And mips are reversed.
+    bool doWriteKTX2 = false;
+    if (doWriteKTX2 && dstFile) // in memory version will always be KTX1 format for nwo
+    {
+        KTX2Header header2;
+        
+        header2.vkFormat = vulkanType(info.pixelFormat);
+        // header2.typeSize = 1; // skip
+        
+        header2.pixelWidth = header.pixelWidth;
+        header2.pixelHeight = header.pixelHeight;
+        header2.pixelDepth = header.pixelDepth;
+        
+        if (dstImage.textureType == MyMTLTextureType1DArray) {
+            header2.pixelHeight = 0;
+            header2.pixelDepth = 0;
+        }
+        
+        header2.layerCount = header.numberOfArrayElements;
+        header2.faceCount = header.numberOfFaces;
+        header2.levelCount = numDstMipLevels; // header.numberOfMipmapLevels;
+        
+        // compute size of dfd
+        vector<uint8_t> dfdData;
+        
+        // compute offsets and lengts of data blocks
+        header2.dfdByteOffset = sizeof(header2);
+        header2.kvdByteOffset = header2.dfdByteOffset + dfdData.size();
+        header2.sgdByteOffset = header2.kvdByteOffset + propsData.size();
+        
+        header2.dfdByteLength = dfdData.size();
+        header2.kvdByteLength = propsData.size();
+        header2.sgdByteLength = 0;
+        
+        // TODO: figure out dfd here
+        
+        // write the header
+        if (!writeDataAtOffset((const uint8_t*)&header2, sizeof(header2), 0, dstFile, dstImage)) {
+            return false;
+        }
+        
+        // write the dfd
+        if (!writeDataAtOffset(dfdData.data(), dfdData.size(), header2.dfdByteOffset, dstFile, dstImage)) {
+            return false;
+        }
+        
+        // write the props
+        if (!writeDataAtOffset(propsData.data(), propsData.size(), header2.kvdByteOffset, dstFile, dstImage)) {
+            return false;
+        }
+        
+        // skip supercompression block
+        
+        // TODO: this either writes to file or to dstImage (in-memory KTX file)
+        
+        // TODO: also need to support a few compressions
+        // zstd and zlib, does dfd contain the offsets of each chunk
+        // and the compressed sizes of mips.  Know format and sizes uncompressed.
+        // but need to fill out the compressed size field.
+        
+        vector<KTX2ImageLevel> levels;
+        levels.resize(numDstMipLevels);
+        
+        size_t levelListStartOffset = header2.sgdByteOffset + header2.sgdByteLength;
+        size_t levelStartOffset = levelListStartOffset + levels.size() * sizeof(KTX2ImageLevel);
+       
+        size_t lastLevelOffset = levelStartOffset;
+        for (int32_t i = 0; i < numDstMipLevels; ++i) {
+            levels[i].length = numChunks * numDstMipLevels;
+            levels[i].lengthCompressed = levels[i].length;
+            levels[i].offset = lastLevelOffset + levels[i].lengthCompressed;
+            lastLevelOffset = levels[i].offset;
+        }
+    
+        // TODO: compress to a seperate zstd stream for each level
+        // then can continue to do mips in place, and just append the bytes to that level
+        // after compression.   If not compressed, then code from KTX1 can be used.
+        bool isCompressed = false;
+        
+        if (!isCompressed) {
+            if (!writeDataAtOffset(levels.data(), levels.size(), levelListStartOffset, dstFile, dstImage)) {
+                return false;
+            }
+        }
+        
+        // TODO: here allocate a zstd encoder for each level
+        vector< vector<uint8_t> > compressedLevels;
+        if (isCompressed) {
+            compressedLevels.resize(numDstMipLevels);
+        }
+        
+        // write the chunks of mips see code below, seeks are important since
+        // it's building mips on the fly.
+        for (int32_t chunk = 0; chunk < numChunks; ++chunk) {
+            // TODO: actually build the mip (reuse code below for KTX)
+            
+            if (!isCompressed)
+                continue;
+            
+            // handle zstd compression here, and add to end of existing encoder for level
+            zstd_compress(level);
+            
+            // append the compressed bytes to each strea
+            levels[mipLevel].append(data);
+        }
+        
+        if (isCompressed) {
+            
+            // update the offsets and compressed sizes
+            lastLevelOffset = levelStartOffset;
+            for (int32_t i = 0; i < numDstMipLevels; ++i) {
+                levels[i].lengthCompressed = compressedLevels[i].size();
+                levels[i].offset = lastLevelOffset + levels[i].lengthCompressed;
+                lastLevelOffset = levels[i].offset;
+            }
+            
+            // write out sizes
+            if (!writeDataAtOffset(levels.data(), levels.size(), levelListStartOffset, dstFile, dstImage)) {
+                return false;
+            }
+            
+            // and now seek and write out each compressed level
+            for (int32_t i = 0; i < numDstMipLevels; ++i) {
+                if (!writeDataAtOffset(compressedLevels[i].data(), compressedLevels[i].size(), levels[i].offset, dstFile, dstImage)) {
+                    return false;
+                }
+            }
+        }
+        
+        return true;
+    }
+#endif
+    
+    // ----------------------------------------------------
 
     // write the header out
     KTXHeader headerCopy = header;
diff --git a/scripts/formatSources.sh b/scripts/formatSources.sh
index 5578718d..45547dd0 100755
--- a/scripts/formatSources.sh
+++ b/scripts/formatSources.sh
@@ -10,7 +10,7 @@ clang-format -style=file -i KTX*.cpp
 clang-format -style=file -i KTX*.h
 popd
 
-pushd ../viewer
+pushd ../kramv
 clang-format -style=file -i Kram*.cpp
 clang-format -style=file -i Kram*.h
 clang-format -style=file -i Kram*.mm

From 76393eeb9100e8ef6353faf2fe7a9d866f8439d7 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 12 May 2021 22:58:17 -0700
Subject: [PATCH 041/901] kram - fix Mipper with newer calls

Just to bury more non-public calls/data.
---
 libkram/kram/KramMipper.h | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/libkram/kram/KramMipper.h b/libkram/kram/KramMipper.h
index e5439f25..c6f60391 100644
--- a/libkram/kram/KramMipper.h
+++ b/libkram/kram/KramMipper.h
@@ -57,21 +57,25 @@ class ImageData {
 };
 
 class Mipper {
-public:
+private:
     float srgbToLinear[256];
     float alphaToFloat[256];
 
+public:
     Mipper();
 
-    void initTables();
-
     // drop by 1 mip level by box filter
     void mipmap(const ImageData &srcImage, ImageData &dstImage) const;
 
     void initPixelsHalfIfNeeded(ImageData &srcImage, bool doPremultiply, bool doPrezero,
                                 vector<half4> &halfImage) const;
 
+    float toLinear(uint8_t srgb) const { return srgbToLinear[srgb]; }
+    float toAlphaFloat(uint8_t alpha) const { return alphaToFloat[alpha]; }
+    
 private:
+    void initTables();
+
     void mipmapLevel(const ImageData &srcImage, ImageData &dstImage) const;
     
     void mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) const;

From 2c425f53f415e647d3bdb3de48adfe3e5b64eba3 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 13 May 2021 14:24:15 -0700
Subject: [PATCH 042/901] Kram - add premulrgb option to match Photoshop's and
 Apple Previews non-srgb compliant handling of premul srgb files.

This does premul directly to the raw srgb data in the png file in 8-bits.  Mips are not done in linear space either.
Turn off the srgb test for this.
---
 libkram/kram/Kram.cpp     | 45 +++++++++++++++++++++++++++++++++------
 libkram/kram/Kram.h       |  2 +-
 libkram/kram/KramMipper.h |  2 ++
 3 files changed, 42 insertions(+), 7 deletions(-)

diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 5cf1d8a2..a3c6db07 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -47,7 +47,14 @@ bool LoadKtx(const uint8_t* data, size_t dataSize, Image& sourceImage)
     return sourceImage.loadImageFromKTX(image);
 }
 
-bool LoadPng(const uint8_t* data, size_t dataSize, Image& sourceImage)
+inline Color toPremul(Color c) {
+    c.r = ((uint32_t)c.r * (uint32_t)c.a) / 255;
+    c.g = ((uint32_t)c.g * (uint32_t)c.a) / 255;
+    c.b = ((uint32_t)c.b * (uint32_t)c.a) / 255;
+    return c;
+}
+
+bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, Image& sourceImage)
 {
     uint32_t width = 0;
     uint32_t height = 0;
@@ -107,6 +114,18 @@ bool LoadPng(const uint8_t* data, size_t dataSize, Image& sourceImage)
         return false;
     }
 
+    // apply premul srgb right away, don't use with -premul or alpha is applied twice
+    // this may throw off the props.  Note this ignores srgb conversion.
+    // This is hack to look like Photoshop and Apple Preview, where they process srgb wrong
+    // on premul PNG data on load, and colors look much darker.
+    
+    if (hasAlpha && isPremulRgb) {
+        Color* colors = (Color*)pixels.data();
+        for (int32_t i = 0, iEnd = width*height; i < iEnd; ++i) {
+            colors[i] = toPremul(colors[i]);
+        }
+    }
+    
     return sourceImage.loadImageFromPixels(pixels, width, height, hasColor, hasAlpha);
 }
 
@@ -117,7 +136,7 @@ bool SetupTmpFile(FileHelper& tmpFileHelper, const char* suffix)
 
 bool SetupSourceImage(MmapHelper& mmapHelper, FileHelper& fileHelper,
                       vector<uint8_t>& fileBuffer,
-                      const string& srcFilename, Image& sourceImage)
+                      const string& srcFilename, Image& sourceImage, bool isPremulSrgb = false)
 {
     bool isKTX = endsWith(srcFilename, ".ktx") || endsWith(srcFilename, ".ktx2");
     bool isPNG = endsWith(srcFilename, ".png");
@@ -143,7 +162,7 @@ bool SetupSourceImage(MmapHelper& mmapHelper, FileHelper& fileHelper,
             }
         }
         else if (isPNG) {
-            if (!LoadPng(mmapHelper.data(), mmapHelper.dataLength(),
+            if (!LoadPng(mmapHelper.data(), mmapHelper.dataLength(), isPremulSrgb,
                          sourceImage)) {
                 return false;  // error
             }
@@ -171,7 +190,7 @@ bool SetupSourceImage(MmapHelper& mmapHelper, FileHelper& fileHelper,
             }
         }
         else if (isPNG) {
-            if (!LoadPng(fileBuffer.data(), fileHelper.size(),
+            if (!LoadPng(fileBuffer.data(), fileHelper.size(), isPremulSrgb,
                          sourceImage)) {
                 return false;  // error
             }
@@ -1029,6 +1048,10 @@ void kramEncodeUsage(bool showVersion = true)
           "\tPremultiplied alpha to src pixels before output but only where a=0\n"
           "\n"
           
+          "\t-premulrgb"
+          "\tPremultiplied alpha to src pixels at load to emulate Photoshop, don't use with -premul\n"
+          "\n"
+          
           "\t-optopaque"
           "\tChange format from bc7/3 to bc1, or etc2rgba to rgba if opaque\n"
           "\n"
@@ -1632,6 +1655,7 @@ static int32_t kramAppEncode(vector<const char*>& args)
 
     ImageInfoArgs infoArgs;
 
+    bool isPremulRgb = false;
     
     bool error = false;
     for (int32_t i = 0; i < argc; ++i) {
@@ -1874,6 +1898,9 @@ static int32_t kramAppEncode(vector<const char*>& args)
             continue;
         }
 
+        // This means to post-multiply alpha after loading, not that incoming data in already premul
+        // png has the limitation that it's unmul, but tiff/exr can store premul.  With 8-bit images
+        // really would prefer to premul them when building the texture.
         else if (isStringEqual(word, "-premul")) {
             infoArgs.isPremultiplied = true;
             continue;
@@ -1882,6 +1909,12 @@ static int32_t kramAppEncode(vector<const char*>& args)
             infoArgs.isPrezero = true;
             continue;
         }
+        // this means premul the data at read from srgb, this it to match photoshop
+        else if (isStringEqual(word, "-premulrgb")) {
+            isPremulRgb = true;
+            continue;
+        }
+        
         else if (isStringEqual(word, "-v") ||
                  isStringEqual(word, "-verbose")) {
             infoArgs.isVerbose = true;
@@ -1977,7 +2010,7 @@ static int32_t kramAppEncode(vector<const char*>& args)
     vector<uint8_t> srcFileBuffer;
 
     bool success = SetupSourceImage(srcMmapHelper, srcFileHelper, srcFileBuffer,
-                                    srcFilename, srcImage);
+                                    srcFilename, srcImage, isPremulRgb);
 
     if (success) {
         success = SetupTmpFile(tmpFileHelper, ".ktx");
@@ -2305,7 +2338,7 @@ CommandType parseCommandType(const char* command)
 
 void PSTest() {
     static bool doTest = false;
-    if (doTest) {
+    if (!doTest) {
         return;
     }
     
diff --git a/libkram/kram/Kram.h b/libkram/kram/Kram.h
index 13273035..9a52a695 100644
--- a/libkram/kram/Kram.h
+++ b/libkram/kram/Kram.h
@@ -14,7 +14,7 @@ class KTXImage;
 
 // helpers to source from a png or single level of a ktx
 bool LoadKtx(const uint8_t* data, size_t dataSize, Image& sourceImage);
-bool LoadPng(const uint8_t* data, size_t dataSize, Image& sourceImage);
+bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulSrgb, Image& sourceImage);
 
 // can call these with data instead of needing a file
 string kramInfoPNGToString(const string& srcFilename, const uint8_t* data, uint64_t dataSize, bool isVerbose);
diff --git a/libkram/kram/KramMipper.h b/libkram/kram/KramMipper.h
index c6f60391..65013751 100644
--- a/libkram/kram/KramMipper.h
+++ b/libkram/kram/KramMipper.h
@@ -73,6 +73,8 @@ class Mipper {
     float toLinear(uint8_t srgb) const { return srgbToLinear[srgb]; }
     float toAlphaFloat(uint8_t alpha) const { return alphaToFloat[alpha]; }
     
+    uint8_t toPremul(uint8_t channelIntensity, uint8_t alpha) const { return ((uint32_t)channelIntensity * (uint32_t)alpha) / 255; }
+    
 private:
     void initTables();
 

From 4be8f27613f8058aa3bcaae43765a9be433542ee Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 13 May 2021 14:40:43 -0700
Subject: [PATCH 043/901] kram - fix build break in loader

pass false for isPremulRgb to LoadPng.
---
 kramv/KramLoader.mm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index 97cac1d5..11421ae5 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -186,7 +186,7 @@ static int32_t numberOfMipmapLevels(const Image& image) {
 {
     // can only load 8u and 16u from png, no hdr formats, no premul either, no props
     Image sourceImage;
-    bool isLoaded = LoadPng(data, dataSize, sourceImage);
+    bool isLoaded = LoadPng(data, dataSize, false, sourceImage);
     if (!isLoaded) {
         return nil;
     }

From f0f81cbc44b956fddbfe7699b311e067256235cc Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 13 May 2021 15:24:52 -0700
Subject: [PATCH 044/901] kram - fix bug in length() call, and warnings in
 lodepng

---
 libkram/kram/float4a.h      | 2 +-
 libkram/lodepng/lodepng.cpp | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/libkram/kram/float4a.h b/libkram/kram/float4a.h
index ca8111e7..80ba20a4 100644
--- a/libkram/kram/float4a.h
+++ b/libkram/kram/float4a.h
@@ -368,7 +368,7 @@ inline float length_squared(const float4& vv)
 }
 inline float length(const float4& vv)
 {
-    return sqrtf(length(vv));
+    return sqrtf(length_squared(vv));
 }
 
 // sse4.1 ops
diff --git a/libkram/lodepng/lodepng.cpp b/libkram/lodepng/lodepng.cpp
index b08b0858..58c61022 100644
--- a/libkram/lodepng/lodepng.cpp
+++ b/libkram/lodepng/lodepng.cpp
@@ -715,7 +715,7 @@ static unsigned HuffmanTree_makeTable(HuffmanTree* tree) {
   size = headsize;
   for(i = 0; i < headsize; ++i) {
     unsigned l = maxlens[i];
-    if(l > FIRSTBITS) size += (1u << (l - FIRSTBITS));
+    if(l > FIRSTBITS) size += (unsigned)(1u << (l - FIRSTBITS));
   }
   tree->table_len = (unsigned char*)lodepng_malloc(size * sizeof(*tree->table_len));
   tree->table_value = (unsigned short*)lodepng_malloc(size * sizeof(*tree->table_value));
@@ -734,7 +734,7 @@ static unsigned HuffmanTree_makeTable(HuffmanTree* tree) {
     if(l <= FIRSTBITS) continue;
     tree->table_len[i] = l;
     tree->table_value[i] = pointer;
-    pointer += (1u << (l - FIRSTBITS));
+    pointer += (unsigned)(1u << (l - FIRSTBITS));
   }
   lodepng_free(maxlens);
 
@@ -5447,7 +5447,7 @@ static size_t ilog2i(size_t i) {
   l = ilog2(i);
   /* approximate i*log2(i): l is integer logarithm, ((i - (1u << l)) << 1u)
   linearly approximates the missing fractional part multiplied by i */
-  return i * l + ((i - (1u << l)) << 1u);
+  return i * l + ((i - ((size_t)1 << l)) << (size_t)1);
 }
 
 static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, unsigned h,

From ce3a6d8e56e2d0a37df0b1d62ddc2bf61a8ebbf5 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 16 May 2021 16:14:12 -0700
Subject: [PATCH 045/901] kram - update zstd and add encoder, move miniz into
 libkram from kramv, small fix to bc7enc

---
 libkram/CMakeLists.txt             |    18 +-
 libkram/bc7enc/bc7enc.cpp          |     5 +-
 {kramv => libkram/miniz}/miniz.cpp |     1 +
 {kramv => libkram/miniz}/miniz.h   |     0
 libkram/zstd/zstd.cpp              | 40929 +++++++++++++++++++++++++++
 libkram/zstd/zstd.h                |  2532 ++
 libkram/zstd/zstddeclib.cpp        | 13875 +++++----
 7 files changed, 52085 insertions(+), 5275 deletions(-)
 rename {kramv => libkram/miniz}/miniz.cpp (99%)
 rename {kramv => libkram/miniz}/miniz.h (100%)
 create mode 100644 libkram/zstd/zstd.cpp
 create mode 100644 libkram/zstd/zstd.h

diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index 0b501ee3..e5fd590b 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -86,7 +86,19 @@ file(GLOB_RECURSE libSources CONFIGURE_DEPENDS
     "${SOURCE_DIR}/tmpfileplus/*.cpp"
     "${SOURCE_DIR}/tmpfileplus/*.h"
     
-    "${SOURCE_DIR}/zstd/zstddeclib.cpp"
+    # decoder unity file
+    # cd zstd/build/single_file_libs
+    # ./combine.sh -r ../../lib -o zstddeclib.c zstddeclib-in.c
+    # "${SOURCE_DIR}/zstd/zstddeclib.cpp"
+    
+    # full unity file
+    # cd zstd/build/single_file_libs
+    # ./combine.sh -r ../../lib -o zstd.c zstd-in.c
+    "${SOURCE_DIR}/zstd/zstd.h"
+    "${SOURCE_DIR}/zstd/zstd.cpp"
+   
+    "${SOURCE_DIR}/miniz/miniz.h"
+    "${SOURCE_DIR}/miniz/miniz.cpp"
 )
 
 # no objc on win or linux
@@ -128,6 +140,10 @@ target_include_directories(${myTargetLib} PRIVATE
     "${SOURCE_DIR}/zstd/"
     )
  
+target_include_directories(${myTargetLib} PUBLIC
+    "${SOURCE_DIR}/miniz/"
+    )
+    
 # only add sources to the library
 target_sources(${myTargetLib} PRIVATE ${libSources})
 
diff --git a/libkram/bc7enc/bc7enc.cpp b/libkram/bc7enc/bc7enc.cpp
index 61b4abe6..b2403b84 100644
--- a/libkram/bc7enc/bc7enc.cpp
+++ b/libkram/bc7enc/bc7enc.cpp
@@ -1944,8 +1944,9 @@ static void handle_alpha_block(void *pBlock, const color_quad_u8 *pPixels, const
 	pParams->m_pSelector_weightsx = (const vec4F *)g_bc7_weights4x;
 	pParams->m_num_selector_weights = 16;
 	pParams->m_comp_bits = 7;
-	pParams->m_has_pbits = BC7ENC_TRUE;
-	pParams->m_has_alpha = BC7ENC_TRUE;
+    pParams->m_has_pbits = BC7ENC_TRUE;
+    pParams->m_endpoints_share_pbit = BC7ENC_FALSE;
+    pParams->m_has_alpha = BC7ENC_TRUE;
 	pParams->m_perceptual = pComp_params->m_perceptual;
 	pParams->m_num_pixels = 16;
 	pParams->m_pPixels = pPixels;
diff --git a/kramv/miniz.cpp b/libkram/miniz/miniz.cpp
similarity index 99%
rename from kramv/miniz.cpp
rename to libkram/miniz/miniz.cpp
index e3deec32..62ea05c4 100644
--- a/kramv/miniz.cpp
+++ b/libkram/miniz/miniz.cpp
@@ -3229,6 +3229,7 @@ struct mz_zip_internal_state_tag
 #if defined(DEBUG) || defined(_DEBUG) || defined(NDEBUG)
 static MZ_FORCEINLINE mz_uint mz_zip_array_range_check(const mz_zip_array *pArray, mz_uint index)
 {
+    (void)pArray;
     MZ_ASSERT(index < pArray->m_size);
     return index;
 }
diff --git a/kramv/miniz.h b/libkram/miniz/miniz.h
similarity index 100%
rename from kramv/miniz.h
rename to libkram/miniz/miniz.h
diff --git a/libkram/zstd/zstd.cpp b/libkram/zstd/zstd.cpp
new file mode 100644
index 00000000..45a4c83e
--- /dev/null
+++ b/libkram/zstd/zstd.cpp
@@ -0,0 +1,40929 @@
+/**
+ * \file zstd.c
+ * Single-file Zstandard library.
+ *
+ * Generate using:
+ * \code
+ *	combine.sh -r ../../lib -o zstd.c zstd-in.c
+ * \endcode
+ */
+/*
+ * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+/*
+ * Settings to bake for the single library file.
+ *
+ * Note: It's important that none of these affects 'zstd.h' (only the
+ * implementation files we're amalgamating).
+ *
+ * Note: MEM_MODULE stops xxhash redefining BYTE, U16, etc., which are also
+ * defined in mem.h (breaking C99 compatibility).
+ *
+ * Note: the undefs for xxHash allow Zstd's implementation to coinside with with
+ * standalone xxHash usage (with global defines).
+ *
+ * Note: multithreading is enabled for all platforms apart from Emscripten.
+ */
+#define DEBUGLEVEL 0
+#define MEM_MODULE
+#undef  XXH_NAMESPACE
+#define XXH_NAMESPACE ZSTD_
+#undef  XXH_PRIVATE_API
+#define XXH_PRIVATE_API
+#undef  XXH_INLINE_ALL
+#define XXH_INLINE_ALL
+#define ZSTD_LEGACY_SUPPORT 0
+#ifndef __EMSCRIPTEN__
+#define ZSTD_MULTITHREAD
+#endif
+#define ZSTD_TRACE 0
+
+/* Include zstd_deps.h first with all the options we need enabled. */
+#define ZSTD_DEPS_NEED_MALLOC
+#define ZSTD_DEPS_NEED_MATH64
+/**** start inlining common/zstd_deps.h ****/
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* This file provides common libc dependencies that zstd requires.
+ * The purpose is to allow replacing this file with a custom implementation
+ * to compile zstd without libc support.
+ */
+
+/* Need:
+ * NULL
+ * INT_MAX
+ * UINT_MAX
+ * ZSTD_memcpy()
+ * ZSTD_memset()
+ * ZSTD_memmove()
+ */
+#ifndef ZSTD_DEPS_COMMON
+#define ZSTD_DEPS_COMMON
+
+#include <limits.h>
+#include <stddef.h>
+#include <string.h>
+
+#if defined(__GNUC__) && __GNUC__ >= 4
+# define ZSTD_memcpy(d,s,l) __builtin_memcpy((d),(s),(l))
+# define ZSTD_memmove(d,s,l) __builtin_memmove((d),(s),(l))
+# define ZSTD_memset(p,v,l) __builtin_memset((p),(v),(l))
+#else
+# define ZSTD_memcpy(d,s,l) memcpy((d),(s),(l))
+# define ZSTD_memmove(d,s,l) memmove((d),(s),(l))
+# define ZSTD_memset(p,v,l) memset((p),(v),(l))
+#endif
+
+#endif /* ZSTD_DEPS_COMMON */
+
+/* Need:
+ * ZSTD_malloc()
+ * ZSTD_free()
+ * ZSTD_calloc()
+ */
+#ifdef ZSTD_DEPS_NEED_MALLOC
+#ifndef ZSTD_DEPS_MALLOC
+#define ZSTD_DEPS_MALLOC
+
+#include <stdlib.h>
+
+#define ZSTD_malloc(s) malloc(s)
+#define ZSTD_calloc(n,s) calloc((n), (s))
+#define ZSTD_free(p) free((p))
+
+#endif /* ZSTD_DEPS_MALLOC */
+#endif /* ZSTD_DEPS_NEED_MALLOC */
+
+/*
+ * Provides 64-bit math support.
+ * Need:
+ * U64 ZSTD_div64(U64 dividend, U32 divisor)
+ */
+#ifdef ZSTD_DEPS_NEED_MATH64
+#ifndef ZSTD_DEPS_MATH64
+#define ZSTD_DEPS_MATH64
+
+#define ZSTD_div64(dividend, divisor) ((dividend) / (divisor))
+
+#endif /* ZSTD_DEPS_MATH64 */
+#endif /* ZSTD_DEPS_NEED_MATH64 */
+
+/* Need:
+ * assert()
+ */
+#ifdef ZSTD_DEPS_NEED_ASSERT
+#ifndef ZSTD_DEPS_ASSERT
+#define ZSTD_DEPS_ASSERT
+
+#include <assert.h>
+
+#endif /* ZSTD_DEPS_ASSERT */
+#endif /* ZSTD_DEPS_NEED_ASSERT */
+
+/* Need:
+ * ZSTD_DEBUG_PRINT()
+ */
+#ifdef ZSTD_DEPS_NEED_IO
+#ifndef ZSTD_DEPS_IO
+#define ZSTD_DEPS_IO
+
+#include <stdio.h>
+#define ZSTD_DEBUG_PRINT(...) fprintf(stderr, __VA_ARGS__)
+
+#endif /* ZSTD_DEPS_IO */
+#endif /* ZSTD_DEPS_NEED_IO */
+
+/* Only requested when <stdint.h> is known to be present.
+ * Need:
+ * intptr_t
+ */
+#ifdef ZSTD_DEPS_NEED_STDINT
+#ifndef ZSTD_DEPS_STDINT
+#define ZSTD_DEPS_STDINT
+
+#include <stdint.h>
+
+#endif /* ZSTD_DEPS_STDINT */
+#endif /* ZSTD_DEPS_NEED_STDINT */
+/**** ended inlining common/zstd_deps.h ****/
+
+/**** start inlining common/debug.c ****/
+/* ******************************************************************
+ * debug
+ * Part of FSE library
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+
+/*
+ * This module only hosts one global variable
+ * which can be used to dynamically influence the verbosity of traces,
+ * such as DEBUGLOG and RAWLOG
+ */
+
+/**** start inlining debug.h ****/
+/* ******************************************************************
+ * debug
+ * Part of FSE library
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+
+/*
+ * The purpose of this header is to enable debug functions.
+ * They regroup assert(), DEBUGLOG() and RAWLOG() for run-time,
+ * and DEBUG_STATIC_ASSERT() for compile-time.
+ *
+ * By default, DEBUGLEVEL==0, which means run-time debug is disabled.
+ *
+ * Level 1 enables assert() only.
+ * Starting level 2, traces can be generated and pushed to stderr.
+ * The higher the level, the more verbose the traces.
+ *
+ * It's possible to dynamically adjust level using variable g_debug_level,
+ * which is only declared if DEBUGLEVEL>=2,
+ * and is a global variable, not multi-thread protected (use with care)
+ */
+
+#ifndef DEBUG_H_12987983217
+#define DEBUG_H_12987983217
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* static assert is triggered at compile time, leaving no runtime artefact.
+ * static assert only works with compile-time constants.
+ * Also, this variant can only be used inside a function. */
+#define DEBUG_STATIC_ASSERT(c) (void)sizeof(char[(c) ? 1 : -1])
+
+
+/* DEBUGLEVEL is expected to be defined externally,
+ * typically through compiler command line.
+ * Value must be a number. */
+#ifndef DEBUGLEVEL
+#  define DEBUGLEVEL 0
+#endif
+
+
+/* recommended values for DEBUGLEVEL :
+ * 0 : release mode, no debug, all run-time checks disabled
+ * 1 : enables assert() only, no display
+ * 2 : reserved, for currently active debug path
+ * 3 : events once per object lifetime (CCtx, CDict, etc.)
+ * 4 : events once per frame
+ * 5 : events once per block
+ * 6 : events once per sequence (verbose)
+ * 7+: events at every position (*very* verbose)
+ *
+ * It's generally inconvenient to output traces > 5.
+ * In which case, it's possible to selectively trigger high verbosity levels
+ * by modifying g_debug_level.
+ */
+
+#if (DEBUGLEVEL>=1)
+#  define ZSTD_DEPS_NEED_ASSERT
+/**** skipping file: zstd_deps.h ****/
+#else
+#  ifndef assert   /* assert may be already defined, due to prior #include <assert.h> */
+#    define assert(condition) ((void)0)   /* disable assert (default) */
+#  endif
+#endif
+
+#if (DEBUGLEVEL>=2)
+#  define ZSTD_DEPS_NEED_IO
+/**** skipping file: zstd_deps.h ****/
+extern int g_debuglevel; /* the variable is only declared,
+                            it actually lives in debug.c,
+                            and is shared by the whole process.
+                            It's not thread-safe.
+                            It's useful when enabling very verbose levels
+                            on selective conditions (such as position in src) */
+
+#  define RAWLOG(l, ...) {                                       \
+                if (l<=g_debuglevel) {                           \
+                    ZSTD_DEBUG_PRINT(__VA_ARGS__);               \
+            }   }
+#  define DEBUGLOG(l, ...) {                                     \
+                if (l<=g_debuglevel) {                           \
+                    ZSTD_DEBUG_PRINT(__FILE__ ": " __VA_ARGS__); \
+                    ZSTD_DEBUG_PRINT(" \n");                     \
+            }   }
+#else
+#  define RAWLOG(l, ...)      {}    /* disabled */
+#  define DEBUGLOG(l, ...)    {}    /* disabled */
+#endif
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* DEBUG_H_12987983217 */
+/**** ended inlining debug.h ****/
+
+int g_debuglevel = DEBUGLEVEL;
+/**** ended inlining common/debug.c ****/
+/**** start inlining common/entropy_common.c ****/
+/* ******************************************************************
+ * Common functions of New Generation Entropy library
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+/* *************************************
+*  Dependencies
+***************************************/
+/**** start inlining mem.h ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef MEM_H_MODULE
+#define MEM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*-****************************************
+*  Dependencies
+******************************************/
+#include <stddef.h>  /* size_t, ptrdiff_t */
+/**** start inlining compiler.h ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_COMPILER_H
+#define ZSTD_COMPILER_H
+
+/*-*******************************************************
+*  Compiler specifics
+*********************************************************/
+/* force inlining */
+
+#if !defined(ZSTD_NO_INLINE)
+#if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#  define INLINE_KEYWORD inline
+#else
+#  define INLINE_KEYWORD
+#endif
+
+#if defined(__GNUC__) || defined(__ICCARM__)
+#  define FORCE_INLINE_ATTR __attribute__((always_inline))
+#elif defined(_MSC_VER)
+#  define FORCE_INLINE_ATTR __forceinline
+#else
+#  define FORCE_INLINE_ATTR
+#endif
+
+#else
+
+#define INLINE_KEYWORD
+#define FORCE_INLINE_ATTR
+
+#endif
+
+/**
+  On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC).
+  This explictly marks such functions as __cdecl so that the code will still compile
+  if a CC other than __cdecl has been made the default.
+*/
+#if  defined(_MSC_VER)
+#  define WIN_CDECL __cdecl
+#else
+#  define WIN_CDECL
+#endif
+
+/**
+ * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
+ * parameters. They must be inlined for the compiler to eliminate the constant
+ * branches.
+ */
+#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
+/**
+ * HINT_INLINE is used to help the compiler generate better code. It is *not*
+ * used for "templates", so it can be tweaked based on the compilers
+ * performance.
+ *
+ * gcc-4.8 and gcc-4.9 have been shown to benefit from leaving off the
+ * always_inline attribute.
+ *
+ * clang up to 5.0.0 (trunk) benefit tremendously from the always_inline
+ * attribute.
+ */
+#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5
+#  define HINT_INLINE static INLINE_KEYWORD
+#else
+#  define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR
+#endif
+
+/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
+#if defined(__GNUC__)
+#  define UNUSED_ATTR __attribute__((unused))
+#else
+#  define UNUSED_ATTR
+#endif
+
+/* force no inlining */
+#ifdef _MSC_VER
+#  define FORCE_NOINLINE static __declspec(noinline)
+#else
+#  if defined(__GNUC__) || defined(__ICCARM__)
+#    define FORCE_NOINLINE static __attribute__((__noinline__))
+#  else
+#    define FORCE_NOINLINE static
+#  endif
+#endif
+
+
+/* target attribute */
+#ifndef __has_attribute
+  #define __has_attribute(x) 0  /* Compatibility with non-clang compilers. */
+#endif
+#if defined(__GNUC__) || defined(__ICCARM__)
+#  define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
+#else
+#  define TARGET_ATTRIBUTE(target)
+#endif
+
+/* Enable runtime BMI2 dispatch based on the CPU.
+ * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
+ */
+#ifndef DYNAMIC_BMI2
+  #if ((defined(__clang__) && __has_attribute(__target__)) \
+      || (defined(__GNUC__) \
+          && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
+      && (defined(__x86_64__) || defined(_M_X86)) \
+      && !defined(__BMI2__)
+  #  define DYNAMIC_BMI2 1
+  #else
+  #  define DYNAMIC_BMI2 0
+  #endif
+#endif
+
+/* prefetch
+ * can be disabled, by declaring NO_PREFETCH build macro */
+#if defined(NO_PREFETCH)
+#  define PREFETCH_L1(ptr)  (void)(ptr)  /* disabled */
+#  define PREFETCH_L2(ptr)  (void)(ptr)  /* disabled */
+#else
+#  if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86))  /* _mm_prefetch() is not defined outside of x86/x64 */
+#    include <mmintrin.h>   /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
+#    define PREFETCH_L1(ptr)  _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
+#    define PREFETCH_L2(ptr)  _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
+#  elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
+#    define PREFETCH_L1(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
+#    define PREFETCH_L2(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
+#  elif defined(__aarch64__)
+#    define PREFETCH_L1(ptr)  __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr)))
+#    define PREFETCH_L2(ptr)  __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr)))
+#  else
+#    define PREFETCH_L1(ptr) (void)(ptr)  /* disabled */
+#    define PREFETCH_L2(ptr) (void)(ptr)  /* disabled */
+#  endif
+#endif  /* NO_PREFETCH */
+
+#define CACHELINE_SIZE 64
+
+#define PREFETCH_AREA(p, s)  {            \
+    const char* const _ptr = (const char*)(p);  \
+    size_t const _size = (size_t)(s);     \
+    size_t _pos;                          \
+    for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) {  \
+        PREFETCH_L2(_ptr + _pos);         \
+    }                                     \
+}
+
+/* vectorization
+ * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */
+#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__)
+#  if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5)
+#    define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
+#  else
+#    define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")")
+#  endif
+#else
+#  define DONT_VECTORIZE
+#endif
+
+/* Tell the compiler that a branch is likely or unlikely.
+ * Only use these macros if it causes the compiler to generate better code.
+ * If you can remove a LIKELY/UNLIKELY annotation without speed changes in gcc
+ * and clang, please do.
+ */
+#if defined(__GNUC__)
+#define LIKELY(x) (__builtin_expect((x), 1))
+#define UNLIKELY(x) (__builtin_expect((x), 0))
+#else
+#define LIKELY(x) (x)
+#define UNLIKELY(x) (x)
+#endif
+
+/* disable warnings */
+#ifdef _MSC_VER    /* Visual Studio */
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4100)        /* disable: C4100: unreferenced formal parameter */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4204)        /* disable: C4204: non-constant aggregate initializer */
+#  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
+#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
+#endif
+
+/*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/
+#ifndef STATIC_BMI2
+#  if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86))
+#    ifdef __AVX2__  //MSVC does not have a BMI2 specific flag, but every CPU that supports AVX2 also supports BMI2
+#       define STATIC_BMI2 1
+#    endif
+#  endif
+#endif
+
+#ifndef STATIC_BMI2
+    #define STATIC_BMI2 0
+#endif
+
+/* compat. with non-clang compilers */
+#ifndef __has_builtin
+#  define __has_builtin(x) 0
+#endif
+
+/* compat. with non-clang compilers */
+#ifndef __has_feature
+#  define __has_feature(x) 0
+#endif
+
+/* detects whether we are being compiled under msan */
+#ifndef ZSTD_MEMORY_SANITIZER
+#  if __has_feature(memory_sanitizer)
+#    define ZSTD_MEMORY_SANITIZER 1
+#  else
+#    define ZSTD_MEMORY_SANITIZER 0
+#  endif
+#endif
+
+#if ZSTD_MEMORY_SANITIZER
+/* Not all platforms that support msan provide sanitizers/msan_interface.h.
+ * We therefore declare the functions we need ourselves, rather than trying to
+ * include the header file... */
+#include <stddef.h>  /* size_t */
+#define ZSTD_DEPS_NEED_STDINT
+/**** skipping file: zstd_deps.h ****/
+
+/* Make memory region fully initialized (without changing its contents). */
+void __msan_unpoison(const volatile void *a, size_t size);
+
+/* Make memory region fully uninitialized (without changing its contents).
+   This is a legacy interface that does not update origin information. Use
+   __msan_allocated_memory() instead. */
+void __msan_poison(const volatile void *a, size_t size);
+
+/* Returns the offset of the first (at least partially) poisoned byte in the
+   memory range, or -1 if the whole range is good. */
+intptr_t __msan_test_shadow(const volatile void *x, size_t size);
+#endif
+
+/* detects whether we are being compiled under asan */
+#ifndef ZSTD_ADDRESS_SANITIZER
+#  if __has_feature(address_sanitizer)
+#    define ZSTD_ADDRESS_SANITIZER 1
+#  elif defined(__SANITIZE_ADDRESS__)
+#    define ZSTD_ADDRESS_SANITIZER 1
+#  else
+#    define ZSTD_ADDRESS_SANITIZER 0
+#  endif
+#endif
+
+#if ZSTD_ADDRESS_SANITIZER
+/* Not all platforms that support asan provide sanitizers/asan_interface.h.
+ * We therefore declare the functions we need ourselves, rather than trying to
+ * include the header file... */
+#include <stddef.h>  /* size_t */
+
+/**
+ * Marks a memory region (<c>[addr, addr+size)</c>) as unaddressable.
+ *
+ * This memory must be previously allocated by your program. Instrumented
+ * code is forbidden from accessing addresses in this region until it is
+ * unpoisoned. This function is not guaranteed to poison the entire region -
+ * it could poison only a subregion of <c>[addr, addr+size)</c> due to ASan
+ * alignment restrictions.
+ *
+ * \note This function is not thread-safe because no two threads can poison or
+ * unpoison memory in the same memory region simultaneously.
+ *
+ * \param addr Start of memory region.
+ * \param size Size of memory region. */
+void __asan_poison_memory_region(void const volatile *addr, size_t size);
+
+/**
+ * Marks a memory region (<c>[addr, addr+size)</c>) as addressable.
+ *
+ * This memory must be previously allocated by your program. Accessing
+ * addresses in this region is allowed until this region is poisoned again.
+ * This function could unpoison a super-region of <c>[addr, addr+size)</c> due
+ * to ASan alignment restrictions.
+ *
+ * \note This function is not thread-safe because no two threads can
+ * poison or unpoison memory in the same memory region simultaneously.
+ *
+ * \param addr Start of memory region.
+ * \param size Size of memory region. */
+void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
+#endif
+
+#endif /* ZSTD_COMPILER_H */
+/**** ended inlining compiler.h ****/
+/**** skipping file: debug.h ****/
+/**** skipping file: zstd_deps.h ****/
+
+
+/*-****************************************
+*  Compiler specifics
+******************************************/
+#if defined(_MSC_VER)   /* Visual Studio */
+#   include <stdlib.h>  /* _byteswap_ulong */
+#   include <intrin.h>  /* _byteswap_* */
+#endif
+#if defined(__GNUC__)
+#  define MEM_STATIC static __inline __attribute__((unused))
+#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define MEM_STATIC static inline
+#elif defined(_MSC_VER)
+#  define MEM_STATIC static __inline
+#else
+#  define MEM_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
+/*-**************************************************************
+*  Basic Types
+*****************************************************************/
+#if  !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+#  if defined(_AIX)
+#    include <inttypes.h>
+#  else
+#    include <stdint.h> /* intptr_t */
+#  endif
+  typedef   uint8_t BYTE;
+  typedef  uint16_t U16;
+  typedef   int16_t S16;
+  typedef  uint32_t U32;
+  typedef   int32_t S32;
+  typedef  uint64_t U64;
+  typedef   int64_t S64;
+#else
+# include <limits.h>
+#if CHAR_BIT != 8
+#  error "this implementation requires char to be exactly 8-bit type"
+#endif
+  typedef unsigned char      BYTE;
+#if USHRT_MAX != 65535
+#  error "this implementation requires short to be exactly 16-bit type"
+#endif
+  typedef unsigned short      U16;
+  typedef   signed short      S16;
+#if UINT_MAX != 4294967295
+#  error "this implementation requires int to be exactly 32-bit type"
+#endif
+  typedef unsigned int        U32;
+  typedef   signed int        S32;
+/* note : there are no limits defined for long long type in C90.
+ * limits exist in C99, however, in such case, <stdint.h> is preferred */
+  typedef unsigned long long  U64;
+  typedef   signed long long  S64;
+#endif
+
+
+/*-**************************************************************
+*  Memory I/O API
+*****************************************************************/
+/*=== Static platform detection ===*/
+MEM_STATIC unsigned MEM_32bits(void);
+MEM_STATIC unsigned MEM_64bits(void);
+MEM_STATIC unsigned MEM_isLittleEndian(void);
+
+/*=== Native unaligned read/write ===*/
+MEM_STATIC U16 MEM_read16(const void* memPtr);
+MEM_STATIC U32 MEM_read32(const void* memPtr);
+MEM_STATIC U64 MEM_read64(const void* memPtr);
+MEM_STATIC size_t MEM_readST(const void* memPtr);
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value);
+MEM_STATIC void MEM_write32(void* memPtr, U32 value);
+MEM_STATIC void MEM_write64(void* memPtr, U64 value);
+
+/*=== Little endian unaligned read/write ===*/
+MEM_STATIC U16 MEM_readLE16(const void* memPtr);
+MEM_STATIC U32 MEM_readLE24(const void* memPtr);
+MEM_STATIC U32 MEM_readLE32(const void* memPtr);
+MEM_STATIC U64 MEM_readLE64(const void* memPtr);
+MEM_STATIC size_t MEM_readLEST(const void* memPtr);
+
+MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val);
+MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val);
+MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32);
+MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64);
+MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val);
+
+/*=== Big endian unaligned read/write ===*/
+MEM_STATIC U32 MEM_readBE32(const void* memPtr);
+MEM_STATIC U64 MEM_readBE64(const void* memPtr);
+MEM_STATIC size_t MEM_readBEST(const void* memPtr);
+
+MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32);
+MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64);
+MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val);
+
+/*=== Byteswap ===*/
+MEM_STATIC U32 MEM_swap32(U32 in);
+MEM_STATIC U64 MEM_swap64(U64 in);
+MEM_STATIC size_t MEM_swapST(size_t in);
+
+
+/*-**************************************************************
+*  Memory I/O Implementation
+*****************************************************************/
+/* MEM_FORCE_MEMORY_ACCESS :
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (i.e., not portable).
+ *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method is portable but violate C standard.
+ *            It can generate buggy code on targets depending on alignment.
+ *            In some circumstances, it's the only known way to get the most performance (i.e. GCC + ARMv6)
+ * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
+#  if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__)
+#    define MEM_FORCE_MEMORY_ACCESS 1
+#  endif
+#endif
+
+MEM_STATIC unsigned MEM_32bits(void) { return sizeof(size_t)==4; }
+MEM_STATIC unsigned MEM_64bits(void) { return sizeof(size_t)==8; }
+
+MEM_STATIC unsigned MEM_isLittleEndian(void)
+{
+    const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
+    return one.c[0];
+}
+
+#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2)
+
+/* violates C standard, by lying on structure alignment.
+Only use if no other choice to achieve best performance on target platform */
+MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; }
+MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; }
+MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; }
+MEM_STATIC size_t MEM_readST(const void* memPtr) { return *(const size_t*) memPtr; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
+MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
+MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; }
+
+#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1)
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32))
+    __pragma( pack(push, 1) )
+    typedef struct { U16 v; } unalign16;
+    typedef struct { U32 v; } unalign32;
+    typedef struct { U64 v; } unalign64;
+    typedef struct { size_t v; } unalignArch;
+    __pragma( pack(pop) )
+#else
+    typedef struct { U16 v; } __attribute__((packed)) unalign16;
+    typedef struct { U32 v; } __attribute__((packed)) unalign32;
+    typedef struct { U64 v; } __attribute__((packed)) unalign64;
+    typedef struct { size_t v; } __attribute__((packed)) unalignArch;
+#endif
+
+MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign16*)ptr)->v; }
+MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign32*)ptr)->v; }
+MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign64*)ptr)->v; }
+MEM_STATIC size_t MEM_readST(const void* ptr) { return ((const unalignArch*)ptr)->v; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign16*)memPtr)->v = value; }
+MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign32*)memPtr)->v = value; }
+MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v = value; }
+
+#else
+
+/* default method, safe and standard.
+   can sometimes prove slower */
+
+MEM_STATIC U16 MEM_read16(const void* memPtr)
+{
+    U16 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC U32 MEM_read32(const void* memPtr)
+{
+    U32 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC U64 MEM_read64(const void* memPtr)
+{
+    U64 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC size_t MEM_readST(const void* memPtr)
+{
+    size_t val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value)
+{
+    ZSTD_memcpy(memPtr, &value, sizeof(value));
+}
+
+MEM_STATIC void MEM_write32(void* memPtr, U32 value)
+{
+    ZSTD_memcpy(memPtr, &value, sizeof(value));
+}
+
+MEM_STATIC void MEM_write64(void* memPtr, U64 value)
+{
+    ZSTD_memcpy(memPtr, &value, sizeof(value));
+}
+
+#endif /* MEM_FORCE_MEMORY_ACCESS */
+
+MEM_STATIC U32 MEM_swap32(U32 in)
+{
+#if defined(_MSC_VER)     /* Visual Studio */
+    return _byteswap_ulong(in);
+#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \
+  || (defined(__clang__) && __has_builtin(__builtin_bswap32))
+    return __builtin_bswap32(in);
+#else
+    return  ((in << 24) & 0xff000000 ) |
+            ((in <<  8) & 0x00ff0000 ) |
+            ((in >>  8) & 0x0000ff00 ) |
+            ((in >> 24) & 0x000000ff );
+#endif
+}
+
+MEM_STATIC U64 MEM_swap64(U64 in)
+{
+#if defined(_MSC_VER)     /* Visual Studio */
+    return _byteswap_uint64(in);
+#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \
+  || (defined(__clang__) && __has_builtin(__builtin_bswap64))
+    return __builtin_bswap64(in);
+#else
+    return  ((in << 56) & 0xff00000000000000ULL) |
+            ((in << 40) & 0x00ff000000000000ULL) |
+            ((in << 24) & 0x0000ff0000000000ULL) |
+            ((in << 8)  & 0x000000ff00000000ULL) |
+            ((in >> 8)  & 0x00000000ff000000ULL) |
+            ((in >> 24) & 0x0000000000ff0000ULL) |
+            ((in >> 40) & 0x000000000000ff00ULL) |
+            ((in >> 56) & 0x00000000000000ffULL);
+#endif
+}
+
+MEM_STATIC size_t MEM_swapST(size_t in)
+{
+    if (MEM_32bits())
+        return (size_t)MEM_swap32((U32)in);
+    else
+        return (size_t)MEM_swap64((U64)in);
+}
+
+/*=== Little endian r/w ===*/
+
+MEM_STATIC U16 MEM_readLE16(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read16(memPtr);
+    else {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U16)(p[0] + (p[1]<<8));
+    }
+}
+
+MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
+{
+    if (MEM_isLittleEndian()) {
+        MEM_write16(memPtr, val);
+    } else {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE)val;
+        p[1] = (BYTE)(val>>8);
+    }
+}
+
+MEM_STATIC U32 MEM_readLE24(const void* memPtr)
+{
+    return (U32)MEM_readLE16(memPtr) + ((U32)(((const BYTE*)memPtr)[2]) << 16);
+}
+
+MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val)
+{
+    MEM_writeLE16(memPtr, (U16)val);
+    ((BYTE*)memPtr)[2] = (BYTE)(val>>16);
+}
+
+MEM_STATIC U32 MEM_readLE32(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read32(memPtr);
+    else
+        return MEM_swap32(MEM_read32(memPtr));
+}
+
+MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32)
+{
+    if (MEM_isLittleEndian())
+        MEM_write32(memPtr, val32);
+    else
+        MEM_write32(memPtr, MEM_swap32(val32));
+}
+
+MEM_STATIC U64 MEM_readLE64(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read64(memPtr);
+    else
+        return MEM_swap64(MEM_read64(memPtr));
+}
+
+MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64)
+{
+    if (MEM_isLittleEndian())
+        MEM_write64(memPtr, val64);
+    else
+        MEM_write64(memPtr, MEM_swap64(val64));
+}
+
+MEM_STATIC size_t MEM_readLEST(const void* memPtr)
+{
+    if (MEM_32bits())
+        return (size_t)MEM_readLE32(memPtr);
+    else
+        return (size_t)MEM_readLE64(memPtr);
+}
+
+MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val)
+{
+    if (MEM_32bits())
+        MEM_writeLE32(memPtr, (U32)val);
+    else
+        MEM_writeLE64(memPtr, (U64)val);
+}
+
+/*=== Big endian r/w ===*/
+
+MEM_STATIC U32 MEM_readBE32(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_swap32(MEM_read32(memPtr));
+    else
+        return MEM_read32(memPtr);
+}
+
+MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32)
+{
+    if (MEM_isLittleEndian())
+        MEM_write32(memPtr, MEM_swap32(val32));
+    else
+        MEM_write32(memPtr, val32);
+}
+
+MEM_STATIC U64 MEM_readBE64(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_swap64(MEM_read64(memPtr));
+    else
+        return MEM_read64(memPtr);
+}
+
+MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64)
+{
+    if (MEM_isLittleEndian())
+        MEM_write64(memPtr, MEM_swap64(val64));
+    else
+        MEM_write64(memPtr, val64);
+}
+
+MEM_STATIC size_t MEM_readBEST(const void* memPtr)
+{
+    if (MEM_32bits())
+        return (size_t)MEM_readBE32(memPtr);
+    else
+        return (size_t)MEM_readBE64(memPtr);
+}
+
+MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val)
+{
+    if (MEM_32bits())
+        MEM_writeBE32(memPtr, (U32)val);
+    else
+        MEM_writeBE64(memPtr, (U64)val);
+}
+
+/* code only tested on 32 and 64 bits systems */
+MEM_STATIC void MEM_check(void) { DEBUG_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* MEM_H_MODULE */
+/**** ended inlining mem.h ****/
+/**** start inlining error_private.h ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* Note : this module is expected to remain private, do not expose it */
+
+#ifndef ERROR_H_MODULE
+#define ERROR_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* ****************************************
+*  Dependencies
+******************************************/
+/**** start inlining ../zstd_errors.h ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_ERRORS_H_398273423
+#define ZSTD_ERRORS_H_398273423
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*===== dependency =====*/
+#include <stddef.h>   /* size_t */
+
+
+/* =====   ZSTDERRORLIB_API : control library symbols visibility   ===== */
+#ifndef ZSTDERRORLIB_VISIBILITY
+#  if defined(__GNUC__) && (__GNUC__ >= 4)
+#    define ZSTDERRORLIB_VISIBILITY __attribute__ ((visibility ("default")))
+#  else
+#    define ZSTDERRORLIB_VISIBILITY
+#  endif
+#endif
+#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
+#  define ZSTDERRORLIB_API __declspec(dllexport) ZSTDERRORLIB_VISIBILITY
+#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
+#  define ZSTDERRORLIB_API __declspec(dllimport) ZSTDERRORLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
+#else
+#  define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY
+#endif
+
+/*-*********************************************
+ *  Error codes list
+ *-*********************************************
+ *  Error codes _values_ are pinned down since v1.3.1 only.
+ *  Therefore, don't rely on values if you may link to any version < v1.3.1.
+ *
+ *  Only values < 100 are considered stable.
+ *
+ *  note 1 : this API shall be used with static linking only.
+ *           dynamic linking is not yet officially supported.
+ *  note 2 : Prefer relying on the enum than on its value whenever possible
+ *           This is the only supported way to use the error list < v1.3.1
+ *  note 3 : ZSTD_isError() is always correct, whatever the library version.
+ **********************************************/
+typedef enum {
+  ZSTD_error_no_error = 0,
+  ZSTD_error_GENERIC  = 1,
+  ZSTD_error_prefix_unknown                = 10,
+  ZSTD_error_version_unsupported           = 12,
+  ZSTD_error_frameParameter_unsupported    = 14,
+  ZSTD_error_frameParameter_windowTooLarge = 16,
+  ZSTD_error_corruption_detected = 20,
+  ZSTD_error_checksum_wrong      = 22,
+  ZSTD_error_dictionary_corrupted      = 30,
+  ZSTD_error_dictionary_wrong          = 32,
+  ZSTD_error_dictionaryCreation_failed = 34,
+  ZSTD_error_parameter_unsupported   = 40,
+  ZSTD_error_parameter_outOfBound    = 42,
+  ZSTD_error_tableLog_tooLarge       = 44,
+  ZSTD_error_maxSymbolValue_tooLarge = 46,
+  ZSTD_error_maxSymbolValue_tooSmall = 48,
+  ZSTD_error_stage_wrong       = 60,
+  ZSTD_error_init_missing      = 62,
+  ZSTD_error_memory_allocation = 64,
+  ZSTD_error_workSpace_tooSmall= 66,
+  ZSTD_error_dstSize_tooSmall = 70,
+  ZSTD_error_srcSize_wrong    = 72,
+  ZSTD_error_dstBuffer_null   = 74,
+  /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */
+  ZSTD_error_frameIndex_tooLarge = 100,
+  ZSTD_error_seekableIO          = 102,
+  ZSTD_error_dstBuffer_wrong     = 104,
+  ZSTD_error_srcBuffer_wrong     = 105,
+  ZSTD_error_maxCode = 120  /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */
+} ZSTD_ErrorCode;
+
+/*! ZSTD_getErrorCode() :
+    convert a `size_t` function result into a `ZSTD_ErrorCode` enum type,
+    which can be used to compare with enum list published above */
+ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult);
+ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code);   /**< Same as ZSTD_getErrorName, but using a `ZSTD_ErrorCode` enum argument */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ZSTD_ERRORS_H_398273423 */
+/**** ended inlining ../zstd_errors.h ****/
+/**** skipping file: zstd_deps.h ****/
+
+
+/* ****************************************
+*  Compiler-specific
+******************************************/
+#if defined(__GNUC__)
+#  define ERR_STATIC static __attribute__((unused))
+#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define ERR_STATIC static inline
+#elif defined(_MSC_VER)
+#  define ERR_STATIC static __inline
+#else
+#  define ERR_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
+
+/*-****************************************
+*  Customization (error_public.h)
+******************************************/
+typedef ZSTD_ErrorCode ERR_enum;
+#define PREFIX(name) ZSTD_error_##name
+
+
+/*-****************************************
+*  Error codes handling
+******************************************/
+#undef ERROR   /* already defined on Visual Studio */
+#define ERROR(name) ZSTD_ERROR(name)
+#define ZSTD_ERROR(name) ((size_t)-PREFIX(name))
+
+ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
+
+ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); }
+
+/* check and forward error code */
+#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
+#define CHECK_F(f)   { CHECK_V_F(_var_err__, f); }
+
+
+/*-****************************************
+*  Error Strings
+******************************************/
+
+const char* ERR_getErrorString(ERR_enum code);   /* error_private.c */
+
+ERR_STATIC const char* ERR_getErrorName(size_t code)
+{
+    return ERR_getErrorString(ERR_getErrorCode(code));
+}
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ERROR_H_MODULE */
+/**** ended inlining error_private.h ****/
+#define FSE_STATIC_LINKING_ONLY  /* FSE_MIN_TABLELOG */
+/**** start inlining fse.h ****/
+/* ******************************************************************
+ * FSE : Finite State Entropy codec
+ * Public Prototypes declaration
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+#ifndef FSE_H
+#define FSE_H
+
+
+/*-*****************************************
+*  Dependencies
+******************************************/
+/**** skipping file: zstd_deps.h ****/
+
+
+/*-*****************************************
+*  FSE_PUBLIC_API : control library symbols visibility
+******************************************/
+#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4)
+#  define FSE_PUBLIC_API __attribute__ ((visibility ("default")))
+#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1)   /* Visual expected */
+#  define FSE_PUBLIC_API __declspec(dllexport)
+#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1)
+#  define FSE_PUBLIC_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
+#else
+#  define FSE_PUBLIC_API
+#endif
+
+/*------   Version   ------*/
+#define FSE_VERSION_MAJOR    0
+#define FSE_VERSION_MINOR    9
+#define FSE_VERSION_RELEASE  0
+
+#define FSE_LIB_VERSION FSE_VERSION_MAJOR.FSE_VERSION_MINOR.FSE_VERSION_RELEASE
+#define FSE_QUOTE(str) #str
+#define FSE_EXPAND_AND_QUOTE(str) FSE_QUOTE(str)
+#define FSE_VERSION_STRING FSE_EXPAND_AND_QUOTE(FSE_LIB_VERSION)
+
+#define FSE_VERSION_NUMBER  (FSE_VERSION_MAJOR *100*100 + FSE_VERSION_MINOR *100 + FSE_VERSION_RELEASE)
+FSE_PUBLIC_API unsigned FSE_versionNumber(void);   /**< library version number; to be used when checking dll version */
+
+
+/*-****************************************
+*  FSE simple functions
+******************************************/
+/*! FSE_compress() :
+    Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'.
+    'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize).
+    @return : size of compressed data (<= dstCapacity).
+    Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
+                     if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead.
+                     if FSE_isError(return), compression failed (more details using FSE_getErrorName())
+*/
+FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity,
+                             const void* src, size_t srcSize);
+
+/*! FSE_decompress():
+    Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
+    into already allocated destination buffer 'dst', of size 'dstCapacity'.
+    @return : size of regenerated data (<= maxDstSize),
+              or an error code, which can be tested using FSE_isError() .
+
+    ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!!
+    Why ? : making this distinction requires a header.
+    Header management is intentionally delegated to the user layer, which can better manage special cases.
+*/
+FSE_PUBLIC_API size_t FSE_decompress(void* dst,  size_t dstCapacity,
+                               const void* cSrc, size_t cSrcSize);
+
+
+/*-*****************************************
+*  Tool functions
+******************************************/
+FSE_PUBLIC_API size_t FSE_compressBound(size_t size);       /* maximum compressed size */
+
+/* Error Management */
+FSE_PUBLIC_API unsigned    FSE_isError(size_t code);        /* tells if a return value is an error code */
+FSE_PUBLIC_API const char* FSE_getErrorName(size_t code);   /* provides error code string (useful for debugging) */
+
+
+/*-*****************************************
+*  FSE advanced functions
+******************************************/
+/*! FSE_compress2() :
+    Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog'
+    Both parameters can be defined as '0' to mean : use default value
+    @return : size of compressed data
+    Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!!
+                     if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression.
+                     if FSE_isError(return), it's an error code.
+*/
+FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
+
+
+/*-*****************************************
+*  FSE detailed API
+******************************************/
+/*!
+FSE_compress() does the following:
+1. count symbol occurrence from source[] into table count[] (see hist.h)
+2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
+3. save normalized counters to memory buffer using writeNCount()
+4. build encoding table 'CTable' from normalized counters
+5. encode the data stream using encoding table 'CTable'
+
+FSE_decompress() does the following:
+1. read normalized counters with readNCount()
+2. build decoding table 'DTable' from normalized counters
+3. decode the data stream using decoding table 'DTable'
+
+The following API allows targeting specific sub-functions for advanced tasks.
+For example, it's possible to compress several blocks using the same 'CTable',
+or to save and provide normalized distribution using external method.
+*/
+
+/* *** COMPRESSION *** */
+
+/*! FSE_optimalTableLog():
+    dynamically downsize 'tableLog' when conditions are met.
+    It saves CPU time, by using smaller tables, while preserving or even improving compression ratio.
+    @return : recommended tableLog (necessarily <= 'maxTableLog') */
+FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
+
+/*! FSE_normalizeCount():
+    normalize counts so that sum(count[]) == Power_of_2 (2^tableLog)
+    'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
+    useLowProbCount is a boolean parameter which trades off compressed size for
+    faster header decoding. When it is set to 1, the compressed data will be slightly
+    smaller. And when it is set to 0, FSE_readNCount() and FSE_buildDTable() will be
+    faster. If you are compressing a small amount of data (< 2 KB) then useLowProbCount=0
+    is a good default, since header deserialization makes a big speed difference.
+    Otherwise, useLowProbCount=1 is a good default, since the speed difference is small.
+    @return : tableLog,
+              or an errorCode, which can be tested using FSE_isError() */
+FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog,
+                    const unsigned* count, size_t srcSize, unsigned maxSymbolValue, unsigned useLowProbCount);
+
+/*! FSE_NCountWriteBound():
+    Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
+    Typically useful for allocation purpose. */
+FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog);
+
+/*! FSE_writeNCount():
+    Compactly save 'normalizedCounter' into 'buffer'.
+    @return : size of the compressed table,
+              or an errorCode, which can be tested using FSE_isError(). */
+FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize,
+                                 const short* normalizedCounter,
+                                 unsigned maxSymbolValue, unsigned tableLog);
+
+/*! Constructor and Destructor of FSE_CTable.
+    Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
+typedef unsigned FSE_CTable;   /* don't allocate that. It's only meant to be more restrictive than void* */
+FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog);
+FSE_PUBLIC_API void        FSE_freeCTable (FSE_CTable* ct);
+
+/*! FSE_buildCTable():
+    Builds `ct`, which must be already allocated, using FSE_createCTable().
+    @return : 0, or an errorCode, which can be tested using FSE_isError() */
+FSE_PUBLIC_API size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+
+/*! FSE_compress_usingCTable():
+    Compress `src` using `ct` into `dst` which must be already allocated.
+    @return : size of compressed data (<= `dstCapacity`),
+              or 0 if compressed data could not fit into `dst`,
+              or an errorCode, which can be tested using FSE_isError() */
+FSE_PUBLIC_API size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct);
+
+/*!
+Tutorial :
+----------
+The first step is to count all symbols. FSE_count() does this job very fast.
+Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells.
+'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0]
+maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value)
+FSE_count() will return the number of occurrence of the most frequent symbol.
+This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility.
+If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
+
+The next step is to normalize the frequencies.
+FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'.
+It also guarantees a minimum of 1 to any Symbol with frequency >= 1.
+You can use 'tableLog'==0 to mean "use default tableLog value".
+If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(),
+which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default").
+
+The result of FSE_normalizeCount() will be saved into a table,
+called 'normalizedCounter', which is a table of signed short.
+'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells.
+The return value is tableLog if everything proceeded as expected.
+It is 0 if there is a single symbol within distribution.
+If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()).
+
+'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount().
+'buffer' must be already allocated.
+For guaranteed success, buffer size must be at least FSE_headerBound().
+The result of the function is the number of bytes written into 'buffer'.
+If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small).
+
+'normalizedCounter' can then be used to create the compression table 'CTable'.
+The space required by 'CTable' must be already allocated, using FSE_createCTable().
+You can then use FSE_buildCTable() to fill 'CTable'.
+If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()).
+
+'CTable' can then be used to compress 'src', with FSE_compress_usingCTable().
+Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize'
+The function returns the size of compressed data (without header), necessarily <= `dstCapacity`.
+If it returns '0', compressed data could not fit into 'dst'.
+If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
+*/
+
+
+/* *** DECOMPRESSION *** */
+
+/*! FSE_readNCount():
+    Read compactly saved 'normalizedCounter' from 'rBuffer'.
+    @return : size read from 'rBuffer',
+              or an errorCode, which can be tested using FSE_isError().
+              maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
+FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter,
+                           unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
+                           const void* rBuffer, size_t rBuffSize);
+
+/*! FSE_readNCount_bmi2():
+ * Same as FSE_readNCount() but pass bmi2=1 when your CPU supports BMI2 and 0 otherwise.
+ */
+FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short* normalizedCounter,
+                           unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
+                           const void* rBuffer, size_t rBuffSize, int bmi2);
+
+/*! Constructor and Destructor of FSE_DTable.
+    Note that its size depends on 'tableLog' */
+typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
+FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog);
+FSE_PUBLIC_API void        FSE_freeDTable(FSE_DTable* dt);
+
+/*! FSE_buildDTable():
+    Builds 'dt', which must be already allocated, using FSE_createDTable().
+    return : 0, or an errorCode, which can be tested using FSE_isError() */
+FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+
+/*! FSE_decompress_usingDTable():
+    Decompress compressed source `cSrc` of size `cSrcSize` using `dt`
+    into `dst` which must be already allocated.
+    @return : size of regenerated data (necessarily <= `dstCapacity`),
+              or an errorCode, which can be tested using FSE_isError() */
+FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
+
+/*!
+Tutorial :
+----------
+(Note : these functions only decompress FSE-compressed blocks.
+ If block is uncompressed, use memcpy() instead
+ If block is a single repeated byte, use memset() instead )
+
+The first step is to obtain the normalized frequencies of symbols.
+This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount().
+'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short.
+In practice, that means it's necessary to know 'maxSymbolValue' beforehand,
+or size the table to handle worst case situations (typically 256).
+FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'.
+The result of FSE_readNCount() is the number of bytes read from 'rBuffer'.
+Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that.
+If there is an error, the function will return an error code, which can be tested using FSE_isError().
+
+The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'.
+This is performed by the function FSE_buildDTable().
+The space required by 'FSE_DTable' must be already allocated using FSE_createDTable().
+If there is an error, the function will return an error code, which can be tested using FSE_isError().
+
+`FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable().
+`cSrcSize` must be strictly correct, otherwise decompression will fail.
+FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`).
+If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small)
+*/
+
+#endif  /* FSE_H */
+
+#if defined(FSE_STATIC_LINKING_ONLY) && !defined(FSE_H_FSE_STATIC_LINKING_ONLY)
+#define FSE_H_FSE_STATIC_LINKING_ONLY
+
+/* *** Dependency *** */
+/**** start inlining bitstream.h ****/
+/* ******************************************************************
+ * bitstream
+ * Part of FSE library
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+#ifndef BITSTREAM_H_MODULE
+#define BITSTREAM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+/*
+*  This API consists of small unitary functions, which must be inlined for best performance.
+*  Since link-time-optimization is not available for all compilers,
+*  these functions are defined into a .h to be included.
+*/
+
+/*-****************************************
+*  Dependencies
+******************************************/
+/**** skipping file: mem.h ****/
+/**** skipping file: compiler.h ****/
+/**** skipping file: debug.h ****/
+/**** skipping file: error_private.h ****/
+
+
+/*=========================================
+*  Target specific
+=========================================*/
+#ifndef ZSTD_NO_INTRINSICS
+#  if defined(__BMI__) && defined(__GNUC__)
+#    include <immintrin.h>   /* support for bextr (experimental) */
+#  elif defined(__ICCARM__)
+#    include <intrinsics.h>
+#  endif
+#endif
+
+#define STREAM_ACCUMULATOR_MIN_32  25
+#define STREAM_ACCUMULATOR_MIN_64  57
+#define STREAM_ACCUMULATOR_MIN    ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64))
+
+
+/*-******************************************
+*  bitStream encoding API (write forward)
+********************************************/
+/* bitStream can mix input from multiple sources.
+ * A critical property of these streams is that they encode and decode in **reverse** direction.
+ * So the first bit sequence you add will be the last to be read, like a LIFO stack.
+ */
+typedef struct {
+    size_t bitContainer;
+    unsigned bitPos;
+    char*  startPtr;
+    char*  ptr;
+    char*  endPtr;
+} BIT_CStream_t;
+
+MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity);
+MEM_STATIC void   BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
+MEM_STATIC void   BIT_flushBits(BIT_CStream_t* bitC);
+MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
+
+/* Start with initCStream, providing the size of buffer to write into.
+*  bitStream will never write outside of this buffer.
+*  `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code.
+*
+*  bits are first added to a local register.
+*  Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
+*  Writing data into memory is an explicit operation, performed by the flushBits function.
+*  Hence keep track how many bits are potentially stored into local register to avoid register overflow.
+*  After a flushBits, a maximum of 7 bits might still be stored into local register.
+*
+*  Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers.
+*
+*  Last operation is to close the bitStream.
+*  The function returns the final size of CStream in bytes.
+*  If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable)
+*/
+
+
+/*-********************************************
+*  bitStream decoding API (read backward)
+**********************************************/
+typedef struct {
+    size_t   bitContainer;
+    unsigned bitsConsumed;
+    const char* ptr;
+    const char* start;
+    const char* limitPtr;
+} BIT_DStream_t;
+
+typedef enum { BIT_DStream_unfinished = 0,
+               BIT_DStream_endOfBuffer = 1,
+               BIT_DStream_completed = 2,
+               BIT_DStream_overflow = 3 } BIT_DStream_status;  /* result of BIT_reloadDStream() */
+               /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
+
+MEM_STATIC size_t   BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
+MEM_STATIC size_t   BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
+MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
+MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
+
+
+/* Start by invoking BIT_initDStream().
+*  A chunk of the bitStream is then stored into a local register.
+*  Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
+*  You can then retrieve bitFields stored into the local register, **in reverse order**.
+*  Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
+*  A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
+*  Otherwise, it can be less than that, so proceed accordingly.
+*  Checking if DStream has reached its end can be performed with BIT_endOfDStream().
+*/
+
+
+/*-****************************************
+*  unsafe API
+******************************************/
+MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
+/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */
+
+MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
+/* unsafe version; does not check buffer overflow */
+
+MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
+/* faster, but works only if nbBits >= 1 */
+
+
+
+/*-**************************************************************
+*  Internal functions
+****************************************************************/
+MEM_STATIC unsigned BIT_highbit32 (U32 val)
+{
+    assert(val != 0);
+    {
+#   if defined(_MSC_VER)   /* Visual */
+#       if STATIC_BMI2 == 1
+		return _lzcnt_u32(val) ^ 31;
+#       else
+		unsigned long r = 0;
+		return _BitScanReverse(&r, val) ? (unsigned)r : 0;
+#       endif
+#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
+        return __builtin_clz (val) ^ 31;
+#   elif defined(__ICCARM__)    /* IAR Intrinsic */
+        return 31 - __CLZ(val);
+#   else   /* Software version */
+        static const unsigned DeBruijnClz[32] = { 0,  9,  1, 10, 13, 21,  2, 29,
+                                                 11, 14, 16, 18, 22, 25,  3, 30,
+                                                  8, 12, 20, 28, 15, 17, 24,  7,
+                                                 19, 27, 23,  6, 26,  5,  4, 31 };
+        U32 v = val;
+        v |= v >> 1;
+        v |= v >> 2;
+        v |= v >> 4;
+        v |= v >> 8;
+        v |= v >> 16;
+        return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
+#   endif
+    }
+}
+
+/*=====    Local Constants   =====*/
+static const unsigned BIT_mask[] = {
+    0,          1,         3,         7,         0xF,       0x1F,
+    0x3F,       0x7F,      0xFF,      0x1FF,     0x3FF,     0x7FF,
+    0xFFF,      0x1FFF,    0x3FFF,    0x7FFF,    0xFFFF,    0x1FFFF,
+    0x3FFFF,    0x7FFFF,   0xFFFFF,   0x1FFFFF,  0x3FFFFF,  0x7FFFFF,
+    0xFFFFFF,   0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF,
+    0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */
+#define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0]))
+
+/*-**************************************************************
+*  bitStream encoding
+****************************************************************/
+/*! BIT_initCStream() :
+ *  `dstCapacity` must be > sizeof(size_t)
+ *  @return : 0 if success,
+ *            otherwise an error code (can be tested using ERR_isError()) */
+MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
+                                  void* startPtr, size_t dstCapacity)
+{
+    bitC->bitContainer = 0;
+    bitC->bitPos = 0;
+    bitC->startPtr = (char*)startPtr;
+    bitC->ptr = bitC->startPtr;
+    bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer);
+    if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall);
+    return 0;
+}
+
+/*! BIT_addBits() :
+ *  can add up to 31 bits into `bitC`.
+ *  Note : does not check for register overflow ! */
+MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
+                            size_t value, unsigned nbBits)
+{
+    DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
+    assert(nbBits < BIT_MASK_SIZE);
+    assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
+    bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
+    bitC->bitPos += nbBits;
+}
+
+/*! BIT_addBitsFast() :
+ *  works only if `value` is _clean_,
+ *  meaning all high bits above nbBits are 0 */
+MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
+                                size_t value, unsigned nbBits)
+{
+    assert((value>>nbBits) == 0);
+    assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
+    bitC->bitContainer |= value << bitC->bitPos;
+    bitC->bitPos += nbBits;
+}
+
+/*! BIT_flushBitsFast() :
+ *  assumption : bitContainer has not overflowed
+ *  unsafe version; does not check buffer overflow */
+MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
+{
+    size_t const nbBytes = bitC->bitPos >> 3;
+    assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
+    assert(bitC->ptr <= bitC->endPtr);
+    MEM_writeLEST(bitC->ptr, bitC->bitContainer);
+    bitC->ptr += nbBytes;
+    bitC->bitPos &= 7;
+    bitC->bitContainer >>= nbBytes*8;
+}
+
+/*! BIT_flushBits() :
+ *  assumption : bitContainer has not overflowed
+ *  safe version; check for buffer overflow, and prevents it.
+ *  note : does not signal buffer overflow.
+ *  overflow will be revealed later on using BIT_closeCStream() */
+MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
+{
+    size_t const nbBytes = bitC->bitPos >> 3;
+    assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
+    assert(bitC->ptr <= bitC->endPtr);
+    MEM_writeLEST(bitC->ptr, bitC->bitContainer);
+    bitC->ptr += nbBytes;
+    if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
+    bitC->bitPos &= 7;
+    bitC->bitContainer >>= nbBytes*8;
+}
+
+/*! BIT_closeCStream() :
+ *  @return : size of CStream, in bytes,
+ *            or 0 if it could not fit into dstBuffer */
+MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
+{
+    BIT_addBitsFast(bitC, 1, 1);   /* endMark */
+    BIT_flushBits(bitC);
+    if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
+    return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
+}
+
+
+/*-********************************************************
+*  bitStream decoding
+**********************************************************/
+/*! BIT_initDStream() :
+ *  Initialize a BIT_DStream_t.
+ * `bitD` : a pointer to an already allocated BIT_DStream_t structure.
+ * `srcSize` must be the *exact* size of the bitStream, in bytes.
+ * @return : size of stream (== srcSize), or an errorCode if a problem is detected
+ */
+MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
+{
+    if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
+
+    bitD->start = (const char*)srcBuffer;
+    bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
+
+    if (srcSize >=  sizeof(bitD->bitContainer)) {  /* normal case */
+        bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
+          bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;  /* ensures bitsConsumed is always set */
+          if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
+    } else {
+        bitD->ptr   = bitD->start;
+        bitD->bitContainer = *(const BYTE*)(bitD->start);
+        switch(srcSize)
+        {
+        case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
+                /* fall-through */
+
+        case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
+                /* fall-through */
+
+        case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
+                /* fall-through */
+
+        case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
+                /* fall-through */
+
+        case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
+                /* fall-through */
+
+        case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) <<  8;
+                /* fall-through */
+
+        default: break;
+        }
+        {   BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
+            bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
+            if (lastByte == 0) return ERROR(corruption_detected);  /* endMark not present */
+        }
+        bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
+    }
+
+    return srcSize;
+}
+
+MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
+{
+    return bitContainer >> start;
+}
+
+MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
+{
+    U32 const regMask = sizeof(bitContainer)*8 - 1;
+    /* if start > regMask, bitstream is corrupted, and result is undefined */
+    assert(nbBits < BIT_MASK_SIZE);
+    return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
+}
+
+MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
+{
+#if defined(STATIC_BMI2) && STATIC_BMI2 == 1
+	return  _bzhi_u64(bitContainer, nbBits);
+#else
+    assert(nbBits < BIT_MASK_SIZE);
+    return bitContainer & BIT_mask[nbBits];
+#endif
+}
+
+/*! BIT_lookBits() :
+ *  Provides next n bits from local register.
+ *  local register is not modified.
+ *  On 32-bits, maxNbBits==24.
+ *  On 64-bits, maxNbBits==56.
+ * @return : value extracted */
+MEM_STATIC  FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t*  bitD, U32 nbBits)
+{
+    /* arbitrate between double-shift and shift+mask */
+#if 1
+    /* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8,
+     * bitstream is likely corrupted, and result is undefined */
+    return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
+#else
+    /* this code path is slower on my os-x laptop */
+    U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
+    return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);
+#endif
+}
+
+/*! BIT_lookBitsFast() :
+ *  unsafe version; only works if nbBits >= 1 */
+MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
+{
+    U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
+    assert(nbBits >= 1);
+    return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
+}
+
+MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    bitD->bitsConsumed += nbBits;
+}
+
+/*! BIT_readBits() :
+ *  Read (consume) next n bits from local register and update.
+ *  Pay attention to not read more than nbBits contained into local register.
+ * @return : extracted value. */
+MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
+{
+    size_t const value = BIT_lookBits(bitD, nbBits);
+    BIT_skipBits(bitD, nbBits);
+    return value;
+}
+
+/*! BIT_readBitsFast() :
+ *  unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
+{
+    size_t const value = BIT_lookBitsFast(bitD, nbBits);
+    assert(nbBits >= 1);
+    BIT_skipBits(bitD, nbBits);
+    return value;
+}
+
+/*! BIT_reloadDStreamFast() :
+ *  Similar to BIT_reloadDStream(), but with two differences:
+ *  1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!
+ *  2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this
+ *     point you must use BIT_reloadDStream() to reload.
+ */
+MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
+{
+    if (UNLIKELY(bitD->ptr < bitD->limitPtr))
+        return BIT_DStream_overflow;
+    assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
+    bitD->ptr -= bitD->bitsConsumed >> 3;
+    bitD->bitsConsumed &= 7;
+    bitD->bitContainer = MEM_readLEST(bitD->ptr);
+    return BIT_DStream_unfinished;
+}
+
+/*! BIT_reloadDStream() :
+ *  Refill `bitD` from buffer previously set in BIT_initDStream() .
+ *  This function is safe, it guarantees it will not read beyond src buffer.
+ * @return : status of `BIT_DStream_t` internal register.
+ *           when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
+MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
+{
+    if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* overflow detected, like end of stream */
+        return BIT_DStream_overflow;
+
+    if (bitD->ptr >= bitD->limitPtr) {
+        return BIT_reloadDStreamFast(bitD);
+    }
+    if (bitD->ptr == bitD->start) {
+        if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
+        return BIT_DStream_completed;
+    }
+    /* start < ptr < limitPtr */
+    {   U32 nbBytes = bitD->bitsConsumed >> 3;
+        BIT_DStream_status result = BIT_DStream_unfinished;
+        if (bitD->ptr - nbBytes < bitD->start) {
+            nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
+            result = BIT_DStream_endOfBuffer;
+        }
+        bitD->ptr -= nbBytes;
+        bitD->bitsConsumed -= nbBytes*8;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */
+        return result;
+    }
+}
+
+/*! BIT_endOfDStream() :
+ * @return : 1 if DStream has _exactly_ reached its end (all bits consumed).
+ */
+MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
+{
+    return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
+}
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* BITSTREAM_H_MODULE */
+/**** ended inlining bitstream.h ****/
+
+
+/* *****************************************
+*  Static allocation
+*******************************************/
+/* FSE buffer bounds */
+#define FSE_NCOUNTBOUND 512
+#define FSE_BLOCKBOUND(size) ((size) + ((size)>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
+#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
+
+/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
+#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue)   (1 + (1<<((maxTableLog)-1)) + (((maxSymbolValue)+1)*2))
+#define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<(maxTableLog)))
+
+/* or use the size to malloc() space directly. Pay attention to alignment restrictions though */
+#define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue)   (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable))
+#define FSE_DTABLE_SIZE(maxTableLog)                   (FSE_DTABLE_SIZE_U32(maxTableLog) * sizeof(FSE_DTable))
+
+
+/* *****************************************
+ *  FSE advanced API
+ ***************************************** */
+
+unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus);
+/**< same as FSE_optimalTableLog(), which used `minus==2` */
+
+/* FSE_compress_wksp() :
+ * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
+ * FSE_COMPRESS_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
+ */
+#define FSE_COMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue)   ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
+size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
+
+size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
+/**< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */
+
+size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
+/**< build a fake FSE_CTable, designed to compress always the same symbolValue */
+
+/* FSE_buildCTable_wksp() :
+ * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
+ * `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`.
+ */
+#define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (maxSymbolValue + 2 + (1ull << (tableLog - 2)))
+#define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog))
+size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
+
+#define FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) (sizeof(short) * (maxSymbolValue + 1) + (1ULL << maxTableLog) + 8)
+#define FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ((FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) + sizeof(unsigned) - 1) / sizeof(unsigned))
+FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
+/**< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */
+
+size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
+/**< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */
+
+size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
+/**< build a fake FSE_DTable, designed to always generate the same symbolValue */
+
+#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1)
+#define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned))
+size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize);
+/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)` */
+
+size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2);
+/**< Same as FSE_decompress_wksp() but with dynamic BMI2 support. Pass 1 if your CPU supports BMI2 or 0 if it doesn't. */
+
+typedef enum {
+   FSE_repeat_none,  /**< Cannot use the previous table */
+   FSE_repeat_check, /**< Can use the previous table but it must be checked */
+   FSE_repeat_valid  /**< Can use the previous table and it is assumed to be valid */
+ } FSE_repeat;
+
+/* *****************************************
+*  FSE symbol compression API
+*******************************************/
+/*!
+   This API consists of small unitary functions, which highly benefit from being inlined.
+   Hence their body are included in next section.
+*/
+typedef struct {
+    ptrdiff_t   value;
+    const void* stateTable;
+    const void* symbolTT;
+    unsigned    stateLog;
+} FSE_CState_t;
+
+static void FSE_initCState(FSE_CState_t* CStatePtr, const FSE_CTable* ct);
+
+static void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* CStatePtr, unsigned symbol);
+
+static void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* CStatePtr);
+
+/**<
+These functions are inner components of FSE_compress_usingCTable().
+They allow the creation of custom streams, mixing multiple tables and bit sources.
+
+A key property to keep in mind is that encoding and decoding are done **in reverse direction**.
+So the first symbol you will encode is the last you will decode, like a LIFO stack.
+
+You will need a few variables to track your CStream. They are :
+
+FSE_CTable    ct;         // Provided by FSE_buildCTable()
+BIT_CStream_t bitStream;  // bitStream tracking structure
+FSE_CState_t  state;      // State tracking structure (can have several)
+
+
+The first thing to do is to init bitStream and state.
+    size_t errorCode = BIT_initCStream(&bitStream, dstBuffer, maxDstSize);
+    FSE_initCState(&state, ct);
+
+Note that BIT_initCStream() can produce an error code, so its result should be tested, using FSE_isError();
+You can then encode your input data, byte after byte.
+FSE_encodeSymbol() outputs a maximum of 'tableLog' bits at a time.
+Remember decoding will be done in reverse direction.
+    FSE_encodeByte(&bitStream, &state, symbol);
+
+At any time, you can also add any bit sequence.
+Note : maximum allowed nbBits is 25, for compatibility with 32-bits decoders
+    BIT_addBits(&bitStream, bitField, nbBits);
+
+The above methods don't commit data to memory, they just store it into local register, for speed.
+Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
+Writing data to memory is a manual operation, performed by the flushBits function.
+    BIT_flushBits(&bitStream);
+
+Your last FSE encoding operation shall be to flush your last state value(s).
+    FSE_flushState(&bitStream, &state);
+
+Finally, you must close the bitStream.
+The function returns the size of CStream in bytes.
+If data couldn't fit into dstBuffer, it will return a 0 ( == not compressible)
+If there is an error, it returns an errorCode (which can be tested using FSE_isError()).
+    size_t size = BIT_closeCStream(&bitStream);
+*/
+
+
+/* *****************************************
+*  FSE symbol decompression API
+*******************************************/
+typedef struct {
+    size_t      state;
+    const void* table;   /* precise table may vary, depending on U16 */
+} FSE_DState_t;
+
+
+static void     FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt);
+
+static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
+
+static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr);
+
+/**<
+Let's now decompose FSE_decompress_usingDTable() into its unitary components.
+You will decode FSE-encoded symbols from the bitStream,
+and also any other bitFields you put in, **in reverse order**.
+
+You will need a few variables to track your bitStream. They are :
+
+BIT_DStream_t DStream;    // Stream context
+FSE_DState_t  DState;     // State context. Multiple ones are possible
+FSE_DTable*   DTablePtr;  // Decoding table, provided by FSE_buildDTable()
+
+The first thing to do is to init the bitStream.
+    errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize);
+
+You should then retrieve your initial state(s)
+(in reverse flushing order if you have several ones) :
+    errorCode = FSE_initDState(&DState, &DStream, DTablePtr);
+
+You can then decode your data, symbol after symbol.
+For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'.
+Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out).
+    unsigned char symbol = FSE_decodeSymbol(&DState, &DStream);
+
+You can retrieve any bitfield you eventually stored into the bitStream (in reverse order)
+Note : maximum allowed nbBits is 25, for 32-bits compatibility
+    size_t bitField = BIT_readBits(&DStream, nbBits);
+
+All above operations only read from local register (which size depends on size_t).
+Refueling the register from memory is manually performed by the reload method.
+    endSignal = FSE_reloadDStream(&DStream);
+
+BIT_reloadDStream() result tells if there is still some more data to read from DStream.
+BIT_DStream_unfinished : there is still some data left into the DStream.
+BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled.
+BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed.
+BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted.
+
+When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop,
+to properly detect the exact end of stream.
+After each decoded symbol, check if DStream is fully consumed using this simple test :
+    BIT_reloadDStream(&DStream) >= BIT_DStream_completed
+
+When it's done, verify decompression is fully completed, by checking both DStream and the relevant states.
+Checking if DStream has reached its end is performed by :
+    BIT_endOfDStream(&DStream);
+Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible.
+    FSE_endOfDState(&DState);
+*/
+
+
+/* *****************************************
+*  FSE unsafe API
+*******************************************/
+static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
+/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */
+
+
+/* *****************************************
+*  Implementation of inlined functions
+*******************************************/
+typedef struct {
+    int deltaFindState;
+    U32 deltaNbBits;
+} FSE_symbolCompressionTransform; /* total 8 bytes */
+
+MEM_STATIC void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct)
+{
+    const void* ptr = ct;
+    const U16* u16ptr = (const U16*) ptr;
+    const U32 tableLog = MEM_read16(ptr);
+    statePtr->value = (ptrdiff_t)1<<tableLog;
+    statePtr->stateTable = u16ptr+2;
+    statePtr->symbolTT = ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1);
+    statePtr->stateLog = tableLog;
+}
+
+
+/*! FSE_initCState2() :
+*   Same as FSE_initCState(), but the first symbol to include (which will be the last to be read)
+*   uses the smallest state value possible, saving the cost of this symbol */
+MEM_STATIC void FSE_initCState2(FSE_CState_t* statePtr, const FSE_CTable* ct, U32 symbol)
+{
+    FSE_initCState(statePtr, ct);
+    {   const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
+        const U16* stateTable = (const U16*)(statePtr->stateTable);
+        U32 nbBitsOut  = (U32)((symbolTT.deltaNbBits + (1<<15)) >> 16);
+        statePtr->value = (nbBitsOut << 16) - symbolTT.deltaNbBits;
+        statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
+    }
+}
+
+MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, unsigned symbol)
+{
+    FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
+    const U16* const stateTable = (const U16*)(statePtr->stateTable);
+    U32 const nbBitsOut  = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16);
+    BIT_addBits(bitC, statePtr->value, nbBitsOut);
+    statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
+}
+
+MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr)
+{
+    BIT_addBits(bitC, statePtr->value, statePtr->stateLog);
+    BIT_flushBits(bitC);
+}
+
+
+/* FSE_getMaxNbBits() :
+ * Approximate maximum cost of a symbol, in bits.
+ * Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2)
+ * note 1 : assume symbolValue is valid (<= maxSymbolValue)
+ * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
+MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue)
+{
+    const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr;
+    return (symbolTT[symbolValue].deltaNbBits + ((1<<16)-1)) >> 16;
+}
+
+/* FSE_bitCost() :
+ * Approximate symbol cost, as fractional value, using fixed-point format (accuracyLog fractional bits)
+ * note 1 : assume symbolValue is valid (<= maxSymbolValue)
+ * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
+MEM_STATIC U32 FSE_bitCost(const void* symbolTTPtr, U32 tableLog, U32 symbolValue, U32 accuracyLog)
+{
+    const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr;
+    U32 const minNbBits = symbolTT[symbolValue].deltaNbBits >> 16;
+    U32 const threshold = (minNbBits+1) << 16;
+    assert(tableLog < 16);
+    assert(accuracyLog < 31-tableLog);  /* ensure enough room for renormalization double shift */
+    {   U32 const tableSize = 1 << tableLog;
+        U32 const deltaFromThreshold = threshold - (symbolTT[symbolValue].deltaNbBits + tableSize);
+        U32 const normalizedDeltaFromThreshold = (deltaFromThreshold << accuracyLog) >> tableLog;   /* linear interpolation (very approximate) */
+        U32 const bitMultiplier = 1 << accuracyLog;
+        assert(symbolTT[symbolValue].deltaNbBits + tableSize <= threshold);
+        assert(normalizedDeltaFromThreshold <= bitMultiplier);
+        return (minNbBits+1)*bitMultiplier - normalizedDeltaFromThreshold;
+    }
+}
+
+
+/* ======    Decompression    ====== */
+
+typedef struct {
+    U16 tableLog;
+    U16 fastMode;
+} FSE_DTableHeader;   /* sizeof U32 */
+
+typedef struct
+{
+    unsigned short newState;
+    unsigned char  symbol;
+    unsigned char  nbBits;
+} FSE_decode_t;   /* size == U32 */
+
+MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt)
+{
+    const void* ptr = dt;
+    const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr;
+    DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
+    BIT_reloadDStream(bitD);
+    DStatePtr->table = dt + 1;
+}
+
+MEM_STATIC BYTE FSE_peekSymbol(const FSE_DState_t* DStatePtr)
+{
+    FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    return DInfo.symbol;
+}
+
+MEM_STATIC void FSE_updateState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    U32 const nbBits = DInfo.nbBits;
+    size_t const lowBits = BIT_readBits(bitD, nbBits);
+    DStatePtr->state = DInfo.newState + lowBits;
+}
+
+MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    U32 const nbBits = DInfo.nbBits;
+    BYTE const symbol = DInfo.symbol;
+    size_t const lowBits = BIT_readBits(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+/*! FSE_decodeSymbolFast() :
+    unsafe, only works if no symbol has a probability > 50% */
+MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    U32 const nbBits = DInfo.nbBits;
+    BYTE const symbol = DInfo.symbol;
+    size_t const lowBits = BIT_readBitsFast(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
+{
+    return DStatePtr->state == 0;
+}
+
+
+
+#ifndef FSE_COMMONDEFS_ONLY
+
+/* **************************************************************
+*  Tuning parameters
+****************************************************************/
+/*!MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect
+*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
+#ifndef FSE_MAX_MEMORY_USAGE
+#  define FSE_MAX_MEMORY_USAGE 14
+#endif
+#ifndef FSE_DEFAULT_MEMORY_USAGE
+#  define FSE_DEFAULT_MEMORY_USAGE 13
+#endif
+#if (FSE_DEFAULT_MEMORY_USAGE > FSE_MAX_MEMORY_USAGE)
+#  error "FSE_DEFAULT_MEMORY_USAGE must be <= FSE_MAX_MEMORY_USAGE"
+#endif
+
+/*!FSE_MAX_SYMBOL_VALUE :
+*  Maximum symbol value authorized.
+*  Required for proper stack allocation */
+#ifndef FSE_MAX_SYMBOL_VALUE
+#  define FSE_MAX_SYMBOL_VALUE 255
+#endif
+
+/* **************************************************************
+*  template functions type & suffix
+****************************************************************/
+#define FSE_FUNCTION_TYPE BYTE
+#define FSE_FUNCTION_EXTENSION
+#define FSE_DECODE_TYPE FSE_decode_t
+
+
+#endif   /* !FSE_COMMONDEFS_ONLY */
+
+
+/* ***************************************************************
+*  Constants
+*****************************************************************/
+#define FSE_MAX_TABLELOG  (FSE_MAX_MEMORY_USAGE-2)
+#define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
+#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
+#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
+#define FSE_MIN_TABLELOG 5
+
+#define FSE_TABLELOG_ABSOLUTE_MAX 15
+#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
+#  error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
+#endif
+
+#define FSE_TABLESTEP(tableSize) (((tableSize)>>1) + ((tableSize)>>3) + 3)
+
+
+#endif /* FSE_STATIC_LINKING_ONLY */
+
+
+#if defined (__cplusplus)
+}
+#endif
+/**** ended inlining fse.h ****/
+#define HUF_STATIC_LINKING_ONLY  /* HUF_TABLELOG_ABSOLUTEMAX */
+/**** start inlining huf.h ****/
+/* ******************************************************************
+ * huff0 huffman codec,
+ * part of Finite State Entropy library
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+#ifndef HUF_H_298734234
+#define HUF_H_298734234
+
+/* *** Dependencies *** */
+/**** skipping file: zstd_deps.h ****/
+
+
+/* *** library symbols visibility *** */
+/* Note : when linking with -fvisibility=hidden on gcc, or by default on Visual,
+ *        HUF symbols remain "private" (internal symbols for library only).
+ *        Set macro FSE_DLL_EXPORT to 1 if you want HUF symbols visible on DLL interface */
+#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4)
+#  define HUF_PUBLIC_API __attribute__ ((visibility ("default")))
+#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1)   /* Visual expected */
+#  define HUF_PUBLIC_API __declspec(dllexport)
+#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1)
+#  define HUF_PUBLIC_API __declspec(dllimport)  /* not required, just to generate faster code (saves a function pointer load from IAT and an indirect jump) */
+#else
+#  define HUF_PUBLIC_API
+#endif
+
+
+/* ========================== */
+/* ***  simple functions  *** */
+/* ========================== */
+
+/** HUF_compress() :
+ *  Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'.
+ * 'dst' buffer must be already allocated.
+ *  Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize).
+ * `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB.
+ * @return : size of compressed data (<= `dstCapacity`).
+ *  Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
+ *                   if HUF_isError(return), compression failed (more details using HUF_getErrorName())
+ */
+HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity,
+                             const void* src, size_t srcSize);
+
+/** HUF_decompress() :
+ *  Decompress HUF data from buffer 'cSrc', of size 'cSrcSize',
+ *  into already allocated buffer 'dst', of minimum size 'dstSize'.
+ * `originalSize` : **must** be the ***exact*** size of original (uncompressed) data.
+ *  Note : in contrast with FSE, HUF_decompress can regenerate
+ *         RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data,
+ *         because it knows size to regenerate (originalSize).
+ * @return : size of regenerated data (== originalSize),
+ *           or an error code, which can be tested using HUF_isError()
+ */
+HUF_PUBLIC_API size_t HUF_decompress(void* dst,  size_t originalSize,
+                               const void* cSrc, size_t cSrcSize);
+
+
+/* ***   Tool functions *** */
+#define HUF_BLOCKSIZE_MAX (128 * 1024)                  /**< maximum input size for a single block compressed with HUF_compress */
+HUF_PUBLIC_API size_t HUF_compressBound(size_t size);   /**< maximum compressed size (worst case) */
+
+/* Error Management */
+HUF_PUBLIC_API unsigned    HUF_isError(size_t code);       /**< tells if a return value is an error code */
+HUF_PUBLIC_API const char* HUF_getErrorName(size_t code);  /**< provides error code string (useful for debugging) */
+
+
+/* ***   Advanced function   *** */
+
+/** HUF_compress2() :
+ *  Same as HUF_compress(), but offers control over `maxSymbolValue` and `tableLog`.
+ * `maxSymbolValue` must be <= HUF_SYMBOLVALUE_MAX .
+ * `tableLog` must be `<= HUF_TABLELOG_MAX` . */
+HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity,
+                               const void* src, size_t srcSize,
+                               unsigned maxSymbolValue, unsigned tableLog);
+
+/** HUF_compress4X_wksp() :
+ *  Same as HUF_compress2(), but uses externally allocated `workSpace`.
+ * `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */
+#define HUF_WORKSPACE_SIZE ((6 << 10) + 256)
+#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32))
+HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
+                                     const void* src, size_t srcSize,
+                                     unsigned maxSymbolValue, unsigned tableLog,
+                                     void* workSpace, size_t wkspSize);
+
+#endif   /* HUF_H_298734234 */
+
+/* ******************************************************************
+ *  WARNING !!
+ *  The following section contains advanced and experimental definitions
+ *  which shall never be used in the context of a dynamic library,
+ *  because they are not guaranteed to remain stable in the future.
+ *  Only consider them in association with static linking.
+ * *****************************************************************/
+#if defined(HUF_STATIC_LINKING_ONLY) && !defined(HUF_H_HUF_STATIC_LINKING_ONLY)
+#define HUF_H_HUF_STATIC_LINKING_ONLY
+
+/* *** Dependencies *** */
+/**** skipping file: mem.h ****/
+#define FSE_STATIC_LINKING_ONLY
+/**** skipping file: fse.h ****/
+
+
+/* *** Constants *** */
+#define HUF_TABLELOG_MAX      12      /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
+#define HUF_TABLELOG_DEFAULT  11      /* default tableLog value when none specified */
+#define HUF_SYMBOLVALUE_MAX  255
+
+#define HUF_TABLELOG_ABSOLUTEMAX  15  /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
+#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX)
+#  error "HUF_TABLELOG_MAX is too large !"
+#endif
+
+
+/* ****************************************
+*  Static allocation
+******************************************/
+/* HUF buffer bounds */
+#define HUF_CTABLEBOUND 129
+#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8)   /* only true when incompressible is pre-filtered with fast heuristic */
+#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
+
+/* static allocation of HUF's Compression Table */
+/* this is a private definition, just exposed for allocation and strict aliasing purpose. never EVER access its members directly */
+struct HUF_CElt_s {
+  U16  val;
+  BYTE nbBits;
+};   /* typedef'd to HUF_CElt */
+typedef struct HUF_CElt_s HUF_CElt;   /* consider it an incomplete type */
+#define HUF_CTABLE_SIZE_U32(maxSymbolValue)   ((maxSymbolValue)+1)   /* Use tables of U32, for proper alignment */
+#define HUF_CTABLE_SIZE(maxSymbolValue)       (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32))
+#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \
+    HUF_CElt name[HUF_CTABLE_SIZE_U32(maxSymbolValue)] /* no final ; */
+
+/* static allocation of HUF's DTable */
+typedef U32 HUF_DTable;
+#define HUF_DTABLE_SIZE(maxTableLog)   (1 + (1<<(maxTableLog)))
+#define HUF_CREATE_STATIC_DTABLEX1(DTable, maxTableLog) \
+        HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1) * 0x01000001) }
+#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
+        HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog) * 0x01000001) }
+
+
+/* ****************************************
+*  Advanced decompression functions
+******************************************/
+size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< single-symbol decoder */
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< double-symbols decoder */
+#endif
+
+size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< decodes RLE and uncompressed */
+size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */
+size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< considers RLE and uncompressed as errors */
+size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< single-symbol decoder */
+size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /**< single-symbol decoder */
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< double-symbols decoder */
+size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /**< double-symbols decoder */
+#endif
+
+
+/* ****************************************
+ *  HUF detailed API
+ * ****************************************/
+
+/*! HUF_compress() does the following:
+ *  1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h")
+ *  2. (optional) refine tableLog using HUF_optimalTableLog()
+ *  3. build Huffman table from count using HUF_buildCTable()
+ *  4. save Huffman table to memory buffer using HUF_writeCTable()
+ *  5. encode the data stream using HUF_compress4X_usingCTable()
+ *
+ *  The following API allows targeting specific sub-functions for advanced tasks.
+ *  For example, it's possible to compress several blocks using the same 'CTable',
+ *  or to save and regenerate 'CTable' using external methods.
+ */
+unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
+size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits);   /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */
+size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
+size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize);
+size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
+size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
+int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
+
+typedef enum {
+   HUF_repeat_none,  /**< Cannot use the previous table */
+   HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */
+   HUF_repeat_valid  /**< Can use the previous table and it is assumed to be valid */
+ } HUF_repeat;
+/** HUF_compress4X_repeat() :
+ *  Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
+ *  If it uses hufTable it does not modify hufTable or repeat.
+ *  If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
+ *  If preferRepeat then the old table will always be used if valid. */
+size_t HUF_compress4X_repeat(void* dst, size_t dstSize,
+                       const void* src, size_t srcSize,
+                       unsigned maxSymbolValue, unsigned tableLog,
+                       void* workSpace, size_t wkspSize,    /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
+                       HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2);
+
+/** HUF_buildCTable_wksp() :
+ *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
+ * `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE.
+ */
+#define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1)
+#define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned))
+size_t HUF_buildCTable_wksp (HUF_CElt* tree,
+                       const unsigned* count, U32 maxSymbolValue, U32 maxNbBits,
+                             void* workSpace, size_t wkspSize);
+
+/*! HUF_readStats() :
+ *  Read compact Huffman tree, saved by HUF_writeCTable().
+ * `huffWeight` is destination buffer.
+ * @return : size read from `src` , or an error Code .
+ *  Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */
+size_t HUF_readStats(BYTE* huffWeight, size_t hwSize,
+                     U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
+                     const void* src, size_t srcSize);
+
+/*! HUF_readStats_wksp() :
+ * Same as HUF_readStats() but takes an external workspace which must be
+ * 4-byte aligned and its size must be >= HUF_READ_STATS_WORKSPACE_SIZE.
+ * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
+ */
+#define HUF_READ_STATS_WORKSPACE_SIZE_U32 FSE_DECOMPRESS_WKSP_SIZE_U32(6, HUF_TABLELOG_MAX-1)
+#define HUF_READ_STATS_WORKSPACE_SIZE (HUF_READ_STATS_WORKSPACE_SIZE_U32 * sizeof(unsigned))
+size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize,
+                          U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
+                          const void* src, size_t srcSize,
+                          void* workspace, size_t wkspSize,
+                          int bmi2);
+
+/** HUF_readCTable() :
+ *  Loading a CTable saved with HUF_writeCTable() */
+size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights);
+
+/** HUF_getNbBits() :
+ *  Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX
+ *  Note 1 : is not inlined, as HUF_CElt definition is private
+ *  Note 2 : const void* used, so that it can provide a statically allocated table as argument (which uses type U32) */
+U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue);
+
+/*
+ * HUF_decompress() does the following:
+ * 1. select the decompression algorithm (X1, X2) based on pre-computed heuristics
+ * 2. build Huffman table from save, using HUF_readDTableX?()
+ * 3. decode 1 or 4 segments in parallel using HUF_decompress?X?_usingDTable()
+ */
+
+/** HUF_selectDecoder() :
+ *  Tells which decoder is likely to decode faster,
+ *  based on a set of pre-computed metrics.
+ * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
+ *  Assumption : 0 < dstSize <= 128 KB */
+U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize);
+
+/**
+ *  The minimum workspace size for the `workSpace` used in
+ *  HUF_readDTableX1_wksp() and HUF_readDTableX2_wksp().
+ *
+ *  The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when
+ *  HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15.
+ *  Buffer overflow errors may potentially occur if code modifications result in
+ *  a required workspace size greater than that specified in the following
+ *  macro.
+ */
+#define HUF_DECOMPRESS_WORKSPACE_SIZE ((2 << 10) + (1 << 9))
+#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
+
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize);
+size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
+#endif
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize);
+size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
+#endif
+
+size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+#endif
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+#endif
+
+
+/* ====================== */
+/* single stream variants */
+/* ====================== */
+
+size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
+size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);  /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
+size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
+/** HUF_compress1X_repeat() :
+ *  Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
+ *  If it uses hufTable it does not modify hufTable or repeat.
+ *  If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
+ *  If preferRepeat then the old table will always be used if valid. */
+size_t HUF_compress1X_repeat(void* dst, size_t dstSize,
+                       const void* src, size_t srcSize,
+                       unsigned maxSymbolValue, unsigned tableLog,
+                       void* workSpace, size_t wkspSize,   /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
+                       HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2);
+
+size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* single-symbol decoder */
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* double-symbol decoder */
+#endif
+
+size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
+size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< single-symbol decoder */
+size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /**< single-symbol decoder */
+#endif
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< double-symbols decoder */
+size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /**< double-symbols decoder */
+#endif
+
+size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);   /**< automatic selection of sing or double symbol decoder, based on DTable */
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+#endif
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+#endif
+
+/* BMI2 variants.
+ * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
+ */
+size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
+#endif
+size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
+size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2);
+#endif
+
+#endif /* HUF_STATIC_LINKING_ONLY */
+
+#if defined (__cplusplus)
+}
+#endif
+/**** ended inlining huf.h ****/
+
+
+/*===   Version   ===*/
+unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; }
+
+
+/*===   Error Management   ===*/
+unsigned FSE_isError(size_t code) { return ERR_isError(code); }
+const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); }
+
+unsigned HUF_isError(size_t code) { return ERR_isError(code); }
+const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
+
+
+/*-**************************************************************
+*  FSE NCount encoding-decoding
+****************************************************************/
+static U32 FSE_ctz(U32 val)
+{
+    assert(val != 0);
+    {
+#   if defined(_MSC_VER)   /* Visual */
+        unsigned long r=0;
+        return _BitScanForward(&r, val) ? (unsigned)r : 0;
+#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* GCC Intrinsic */
+        return __builtin_ctz(val);
+#   elif defined(__ICCARM__)    /* IAR Intrinsic */
+        return __CTZ(val);
+#   else   /* Software version */
+        U32 count = 0;
+        while ((val & 1) == 0) {
+            val >>= 1;
+            ++count;
+        }
+        return count;
+#   endif
+    }
+}
+
+FORCE_INLINE_TEMPLATE
+size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+                           const void* headerBuffer, size_t hbSize)
+{
+    const BYTE* const istart = (const BYTE*) headerBuffer;
+    const BYTE* const iend = istart + hbSize;
+    const BYTE* ip = istart;
+    int nbBits;
+    int remaining;
+    int threshold;
+    U32 bitStream;
+    int bitCount;
+    unsigned charnum = 0;
+    unsigned const maxSV1 = *maxSVPtr + 1;
+    int previous0 = 0;
+
+    if (hbSize < 8) {
+        /* This function only works when hbSize >= 8 */
+        char buffer[8] = {0};
+        ZSTD_memcpy(buffer, headerBuffer, hbSize);
+        {   size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr,
+                                                    buffer, sizeof(buffer));
+            if (FSE_isError(countSize)) return countSize;
+            if (countSize > hbSize) return ERROR(corruption_detected);
+            return countSize;
+    }   }
+    assert(hbSize >= 8);
+
+    /* init */
+    ZSTD_memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0]));   /* all symbols not present in NCount have a frequency of 0 */
+    bitStream = MEM_readLE32(ip);
+    nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */
+    if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
+    bitStream >>= 4;
+    bitCount = 4;
+    *tableLogPtr = nbBits;
+    remaining = (1<<nbBits)+1;
+    threshold = 1<<nbBits;
+    nbBits++;
+
+    for (;;) {
+        if (previous0) {
+            /* Count the number of repeats. Each time the
+             * 2-bit repeat code is 0b11 there is another
+             * repeat.
+             * Avoid UB by setting the high bit to 1.
+             */
+            int repeats = FSE_ctz(~bitStream | 0x80000000) >> 1;
+            while (repeats >= 12) {
+                charnum += 3 * 12;
+                if (LIKELY(ip <= iend-7)) {
+                    ip += 3;
+                } else {
+                    bitCount -= (int)(8 * (iend - 7 - ip));
+                    bitCount &= 31;
+                    ip = iend - 4;
+                }
+                bitStream = MEM_readLE32(ip) >> bitCount;
+                repeats = FSE_ctz(~bitStream | 0x80000000) >> 1;
+            }
+            charnum += 3 * repeats;
+            bitStream >>= 2 * repeats;
+            bitCount += 2 * repeats;
+
+            /* Add the final repeat which isn't 0b11. */
+            assert((bitStream & 3) < 3);
+            charnum += bitStream & 3;
+            bitCount += 2;
+
+            /* This is an error, but break and return an error
+             * at the end, because returning out of a loop makes
+             * it harder for the compiler to optimize.
+             */
+            if (charnum >= maxSV1) break;
+
+            /* We don't need to set the normalized count to 0
+             * because we already memset the whole buffer to 0.
+             */
+
+            if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
+                assert((bitCount >> 3) <= 3); /* For first condition to work */
+                ip += bitCount>>3;
+                bitCount &= 7;
+            } else {
+                bitCount -= (int)(8 * (iend - 4 - ip));
+                bitCount &= 31;
+                ip = iend - 4;
+            }
+            bitStream = MEM_readLE32(ip) >> bitCount;
+        }
+        {
+            int const max = (2*threshold-1) - remaining;
+            int count;
+
+            if ((bitStream & (threshold-1)) < (U32)max) {
+                count = bitStream & (threshold-1);
+                bitCount += nbBits-1;
+            } else {
+                count = bitStream & (2*threshold-1);
+                if (count >= threshold) count -= max;
+                bitCount += nbBits;
+            }
+
+            count--;   /* extra accuracy */
+            /* When it matters (small blocks), this is a
+             * predictable branch, because we don't use -1.
+             */
+            if (count >= 0) {
+                remaining -= count;
+            } else {
+                assert(count == -1);
+                remaining += count;
+            }
+            normalizedCounter[charnum++] = (short)count;
+            previous0 = !count;
+
+            assert(threshold > 1);
+            if (remaining < threshold) {
+                /* This branch can be folded into the
+                 * threshold update condition because we
+                 * know that threshold > 1.
+                 */
+                if (remaining <= 1) break;
+                nbBits = BIT_highbit32(remaining) + 1;
+                threshold = 1 << (nbBits - 1);
+            }
+            if (charnum >= maxSV1) break;
+
+            if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
+                ip += bitCount>>3;
+                bitCount &= 7;
+            } else {
+                bitCount -= (int)(8 * (iend - 4 - ip));
+                bitCount &= 31;
+                ip = iend - 4;
+            }
+            bitStream = MEM_readLE32(ip) >> bitCount;
+    }   }
+    if (remaining != 1) return ERROR(corruption_detected);
+    /* Only possible when there are too many zeros. */
+    if (charnum > maxSV1) return ERROR(maxSymbolValue_tooSmall);
+    if (bitCount > 32) return ERROR(corruption_detected);
+    *maxSVPtr = charnum-1;
+
+    ip += (bitCount+7)>>3;
+    return ip-istart;
+}
+
+/* Avoids the FORCE_INLINE of the _body() function. */
+static size_t FSE_readNCount_body_default(
+        short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+        const void* headerBuffer, size_t hbSize)
+{
+    return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
+}
+
+#if DYNAMIC_BMI2
+TARGET_ATTRIBUTE("bmi2") static size_t FSE_readNCount_body_bmi2(
+        short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+        const void* headerBuffer, size_t hbSize)
+{
+    return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
+}
+#endif
+
+size_t FSE_readNCount_bmi2(
+        short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+        const void* headerBuffer, size_t hbSize, int bmi2)
+{
+#if DYNAMIC_BMI2
+    if (bmi2) {
+        return FSE_readNCount_body_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
+    }
+#endif
+    (void)bmi2;
+    return FSE_readNCount_body_default(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
+}
+
+size_t FSE_readNCount(
+        short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+        const void* headerBuffer, size_t hbSize)
+{
+    return FSE_readNCount_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize, /* bmi2 */ 0);
+}
+
+
+/*! HUF_readStats() :
+    Read compact Huffman tree, saved by HUF_writeCTable().
+    `huffWeight` is destination buffer.
+    `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32.
+    @return : size read from `src` , or an error Code .
+    Note : Needed by HUF_readCTable() and HUF_readDTableX?() .
+*/
+size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                     U32* nbSymbolsPtr, U32* tableLogPtr,
+                     const void* src, size_t srcSize)
+{
+    U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
+    return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* bmi2 */ 0);
+}
+
+FORCE_INLINE_TEMPLATE size_t
+HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                   U32* nbSymbolsPtr, U32* tableLogPtr,
+                   const void* src, size_t srcSize,
+                   void* workSpace, size_t wkspSize,
+                   int bmi2)
+{
+    U32 weightTotal;
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize;
+    size_t oSize;
+
+    if (!srcSize) return ERROR(srcSize_wrong);
+    iSize = ip[0];
+    /* ZSTD_memset(huffWeight, 0, hwSize);   *//* is not necessary, even though some analyzer complain ... */
+
+    if (iSize >= 128) {  /* special header */
+        oSize = iSize - 127;
+        iSize = ((oSize+1)/2);
+        if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+        if (oSize >= hwSize) return ERROR(corruption_detected);
+        ip += 1;
+        {   U32 n;
+            for (n=0; n<oSize; n+=2) {
+                huffWeight[n]   = ip[n/2] >> 4;
+                huffWeight[n+1] = ip[n/2] & 15;
+    }   }   }
+    else  {   /* header compressed with FSE (normal case) */
+        if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+        /* max (hwSize-1) values decoded, as last one is implied */
+        oSize = FSE_decompress_wksp_bmi2(huffWeight, hwSize-1, ip+1, iSize, 6, workSpace, wkspSize, bmi2);
+        if (FSE_isError(oSize)) return oSize;
+    }
+
+    /* collect weight stats */
+    ZSTD_memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
+    weightTotal = 0;
+    {   U32 n; for (n=0; n<oSize; n++) {
+            if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected);
+            rankStats[huffWeight[n]]++;
+            weightTotal += (1 << huffWeight[n]) >> 1;
+    }   }
+    if (weightTotal == 0) return ERROR(corruption_detected);
+
+    /* get last non-null symbol weight (implied, total must be 2^n) */
+    {   U32 const tableLog = BIT_highbit32(weightTotal) + 1;
+        if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
+        *tableLogPtr = tableLog;
+        /* determine last weight */
+        {   U32 const total = 1 << tableLog;
+            U32 const rest = total - weightTotal;
+            U32 const verif = 1 << BIT_highbit32(rest);
+            U32 const lastWeight = BIT_highbit32(rest) + 1;
+            if (verif != rest) return ERROR(corruption_detected);    /* last value must be a clean power of 2 */
+            huffWeight[oSize] = (BYTE)lastWeight;
+            rankStats[lastWeight]++;
+    }   }
+
+    /* check tree construction validity */
+    if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected);   /* by construction : at least 2 elts of rank 1, must be even */
+
+    /* results */
+    *nbSymbolsPtr = (U32)(oSize+1);
+    return iSize+1;
+}
+
+/* Avoids the FORCE_INLINE of the _body() function. */
+static size_t HUF_readStats_body_default(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                     U32* nbSymbolsPtr, U32* tableLogPtr,
+                     const void* src, size_t srcSize,
+                     void* workSpace, size_t wkspSize)
+{
+    return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 0);
+}
+
+#if DYNAMIC_BMI2
+static TARGET_ATTRIBUTE("bmi2") size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                     U32* nbSymbolsPtr, U32* tableLogPtr,
+                     const void* src, size_t srcSize,
+                     void* workSpace, size_t wkspSize)
+{
+    return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 1);
+}
+#endif
+
+size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                     U32* nbSymbolsPtr, U32* tableLogPtr,
+                     const void* src, size_t srcSize,
+                     void* workSpace, size_t wkspSize,
+                     int bmi2)
+{
+#if DYNAMIC_BMI2
+    if (bmi2) {
+        return HUF_readStats_body_bmi2(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
+    }
+#endif
+    (void)bmi2;
+    return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
+}
+/**** ended inlining common/entropy_common.c ****/
+/**** start inlining common/error_private.c ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* The purpose of this file is to have a single list of error strings embedded in binary */
+
+/**** skipping file: error_private.h ****/
+
+const char* ERR_getErrorString(ERR_enum code)
+{
+#ifdef ZSTD_STRIP_ERROR_STRINGS
+    (void)code;
+    return "Error strings stripped";
+#else
+    static const char* const notErrorCode = "Unspecified error code";
+    switch( code )
+    {
+    case PREFIX(no_error): return "No error detected";
+    case PREFIX(GENERIC):  return "Error (generic)";
+    case PREFIX(prefix_unknown): return "Unknown frame descriptor";
+    case PREFIX(version_unsupported): return "Version not supported";
+    case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter";
+    case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding";
+    case PREFIX(corruption_detected): return "Corrupted block detected";
+    case PREFIX(checksum_wrong): return "Restored data doesn't match checksum";
+    case PREFIX(parameter_unsupported): return "Unsupported parameter";
+    case PREFIX(parameter_outOfBound): return "Parameter is out of bound";
+    case PREFIX(init_missing): return "Context should be init first";
+    case PREFIX(memory_allocation): return "Allocation error : not enough memory";
+    case PREFIX(workSpace_tooSmall): return "workSpace buffer is not large enough";
+    case PREFIX(stage_wrong): return "Operation not authorized at current processing stage";
+    case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported";
+    case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large";
+    case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small";
+    case PREFIX(dictionary_corrupted): return "Dictionary is corrupted";
+    case PREFIX(dictionary_wrong): return "Dictionary mismatch";
+    case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples";
+    case PREFIX(dstSize_tooSmall): return "Destination buffer is too small";
+    case PREFIX(srcSize_wrong): return "Src size is incorrect";
+    case PREFIX(dstBuffer_null): return "Operation on NULL destination buffer";
+        /* following error codes are not stable and may be removed or changed in a future version */
+    case PREFIX(frameIndex_tooLarge): return "Frame index is too large";
+    case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking";
+    case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong";
+    case PREFIX(srcBuffer_wrong): return "Source buffer is wrong";
+    case PREFIX(maxCode):
+    default: return notErrorCode;
+    }
+#endif
+}
+/**** ended inlining common/error_private.c ****/
+/**** start inlining common/fse_decompress.c ****/
+/* ******************************************************************
+ * FSE : Finite State Entropy decoder
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+
+/* **************************************************************
+*  Includes
+****************************************************************/
+/**** skipping file: debug.h ****/
+/**** skipping file: bitstream.h ****/
+/**** skipping file: compiler.h ****/
+#define FSE_STATIC_LINKING_ONLY
+/**** skipping file: fse.h ****/
+/**** skipping file: error_private.h ****/
+#define ZSTD_DEPS_NEED_MALLOC
+/**** skipping file: zstd_deps.h ****/
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define FSE_isError ERR_isError
+#define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)   /* use only *after* variable declarations */
+
+
+/* **************************************************************
+*  Templates
+****************************************************************/
+/*
+  designed to be included
+  for type-specific functions (template emulation in C)
+  Objective is to write these functions only once, for improved maintenance
+*/
+
+/* safety checks */
+#ifndef FSE_FUNCTION_EXTENSION
+#  error "FSE_FUNCTION_EXTENSION must be defined"
+#endif
+#ifndef FSE_FUNCTION_TYPE
+#  error "FSE_FUNCTION_TYPE must be defined"
+#endif
+
+/* Function names */
+#define FSE_CAT(X,Y) X##Y
+#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
+#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
+
+
+/* Function templates */
+FSE_DTable* FSE_createDTable (unsigned tableLog)
+{
+    if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
+    return (FSE_DTable*)ZSTD_malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
+}
+
+void FSE_freeDTable (FSE_DTable* dt)
+{
+    ZSTD_free(dt);
+}
+
+static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
+{
+    void* const tdPtr = dt+1;   /* because *dt is unsigned, 32-bits aligned on 32-bits */
+    FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr);
+    U16* symbolNext = (U16*)workSpace;
+    BYTE* spread = (BYTE*)(symbolNext + maxSymbolValue + 1);
+
+    U32 const maxSV1 = maxSymbolValue + 1;
+    U32 const tableSize = 1 << tableLog;
+    U32 highThreshold = tableSize-1;
+
+    /* Sanity Checks */
+    if (FSE_BUILD_DTABLE_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(maxSymbolValue_tooLarge);
+    if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
+    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
+
+    /* Init, lay down lowprob symbols */
+    {   FSE_DTableHeader DTableH;
+        DTableH.tableLog = (U16)tableLog;
+        DTableH.fastMode = 1;
+        {   S16 const largeLimit= (S16)(1 << (tableLog-1));
+            U32 s;
+            for (s=0; s<maxSV1; s++) {
+                if (normalizedCounter[s]==-1) {
+                    tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
+                    symbolNext[s] = 1;
+                } else {
+                    if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
+                    symbolNext[s] = normalizedCounter[s];
+        }   }   }
+        ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
+    }
+
+    /* Spread symbols */
+    if (highThreshold == tableSize - 1) {
+        size_t const tableMask = tableSize-1;
+        size_t const step = FSE_TABLESTEP(tableSize);
+        /* First lay down the symbols in order.
+         * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
+         * misses since small blocks generally have small table logs, so nearly
+         * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
+         * our buffer to handle the over-write.
+         */
+        {
+            U64 const add = 0x0101010101010101ull;
+            size_t pos = 0;
+            U64 sv = 0;
+            U32 s;
+            for (s=0; s<maxSV1; ++s, sv += add) {
+                int i;
+                int const n = normalizedCounter[s];
+                MEM_write64(spread + pos, sv);
+                for (i = 8; i < n; i += 8) {
+                    MEM_write64(spread + pos + i, sv);
+                }
+                pos += n;
+            }
+        }
+        /* Now we spread those positions across the table.
+         * The benefit of doing it in two stages is that we avoid the the
+         * variable size inner loop, which caused lots of branch misses.
+         * Now we can run through all the positions without any branch misses.
+         * We unroll the loop twice, since that is what emperically worked best.
+         */
+        {
+            size_t position = 0;
+            size_t s;
+            size_t const unroll = 2;
+            assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
+            for (s = 0; s < (size_t)tableSize; s += unroll) {
+                size_t u;
+                for (u = 0; u < unroll; ++u) {
+                    size_t const uPosition = (position + (u * step)) & tableMask;
+                    tableDecode[uPosition].symbol = spread[s + u];
+                }
+                position = (position + (unroll * step)) & tableMask;
+            }
+            assert(position == 0);
+        }
+    } else {
+        U32 const tableMask = tableSize-1;
+        U32 const step = FSE_TABLESTEP(tableSize);
+        U32 s, position = 0;
+        for (s=0; s<maxSV1; s++) {
+            int i;
+            for (i=0; i<normalizedCounter[s]; i++) {
+                tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
+                position = (position + step) & tableMask;
+                while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
+        }   }
+        if (position!=0) return ERROR(GENERIC);   /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+    }
+
+    /* Build Decoding table */
+    {   U32 u;
+        for (u=0; u<tableSize; u++) {
+            FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
+            U32 const nextState = symbolNext[symbol]++;
+            tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
+            tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
+    }   }
+
+    return 0;
+}
+
+size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
+{
+    return FSE_buildDTable_internal(dt, normalizedCounter, maxSymbolValue, tableLog, workSpace, wkspSize);
+}
+
+
+#ifndef FSE_COMMONDEFS_ONLY
+
+/*-*******************************************************
+*  Decompression (Byte symbols)
+*********************************************************/
+size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
+{
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    void* dPtr = dt + 1;
+    FSE_decode_t* const cell = (FSE_decode_t*)dPtr;
+
+    DTableH->tableLog = 0;
+    DTableH->fastMode = 0;
+
+    cell->newState = 0;
+    cell->symbol = symbolValue;
+    cell->nbBits = 0;
+
+    return 0;
+}
+
+
+size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
+{
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    void* dPtr = dt + 1;
+    FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr;
+    const unsigned tableSize = 1 << nbBits;
+    const unsigned tableMask = tableSize - 1;
+    const unsigned maxSV1 = tableMask+1;
+    unsigned s;
+
+    /* Sanity checks */
+    if (nbBits < 1) return ERROR(GENERIC);         /* min size */
+
+    /* Build Decoding Table */
+    DTableH->tableLog = (U16)nbBits;
+    DTableH->fastMode = 1;
+    for (s=0; s<maxSV1; s++) {
+        dinfo[s].newState = 0;
+        dinfo[s].symbol = (BYTE)s;
+        dinfo[s].nbBits = (BYTE)nbBits;
+    }
+
+    return 0;
+}
+
+FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
+          void* dst, size_t maxDstSize,
+    const void* cSrc, size_t cSrcSize,
+    const FSE_DTable* dt, const unsigned fast)
+{
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const omax = op + maxDstSize;
+    BYTE* const olimit = omax-3;
+
+    BIT_DStream_t bitD;
+    FSE_DState_t state1;
+    FSE_DState_t state2;
+
+    /* Init */
+    CHECK_F(BIT_initDStream(&bitD, cSrc, cSrcSize));
+
+    FSE_initDState(&state1, &bitD, dt);
+    FSE_initDState(&state2, &bitD, dt);
+
+#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
+
+    /* 4 symbols per loop */
+    for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) & (op<olimit) ; op+=4) {
+        op[0] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BIT_reloadDStream(&bitD);
+
+        op[1] = FSE_GETSYMBOL(&state2);
+
+        if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } }
+
+        op[2] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BIT_reloadDStream(&bitD);
+
+        op[3] = FSE_GETSYMBOL(&state2);
+    }
+
+    /* tail */
+    /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */
+    while (1) {
+        if (op>(omax-2)) return ERROR(dstSize_tooSmall);
+        *op++ = FSE_GETSYMBOL(&state1);
+        if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) {
+            *op++ = FSE_GETSYMBOL(&state2);
+            break;
+        }
+
+        if (op>(omax-2)) return ERROR(dstSize_tooSmall);
+        *op++ = FSE_GETSYMBOL(&state2);
+        if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) {
+            *op++ = FSE_GETSYMBOL(&state1);
+            break;
+    }   }
+
+    return op-ostart;
+}
+
+
+size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
+                            const void* cSrc, size_t cSrcSize,
+                            const FSE_DTable* dt)
+{
+    const void* ptr = dt;
+    const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
+    const U32 fastMode = DTableH->fastMode;
+
+    /* select fast mode (static) */
+    if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
+    return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
+}
+
+
+size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
+{
+    return FSE_decompress_wksp_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, /* bmi2 */ 0);
+}
+
+typedef struct {
+    short ncount[FSE_MAX_SYMBOL_VALUE + 1];
+    FSE_DTable dtable[1]; /* Dynamically sized */
+} FSE_DecompressWksp;
+
+
+FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
+        void* dst, size_t dstCapacity,
+        const void* cSrc, size_t cSrcSize,
+        unsigned maxLog, void* workSpace, size_t wkspSize,
+        int bmi2)
+{
+    const BYTE* const istart = (const BYTE*)cSrc;
+    const BYTE* ip = istart;
+    unsigned tableLog;
+    unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
+    FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace;
+
+    DEBUG_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0);
+    if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC);
+
+    /* normal FSE decoding mode */
+    {
+        size_t const NCountLength = FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
+        if (FSE_isError(NCountLength)) return NCountLength;
+        if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
+        assert(NCountLength <= cSrcSize);
+        ip += NCountLength;
+        cSrcSize -= NCountLength;
+    }
+
+    if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge);
+    workSpace = wksp->dtable + FSE_DTABLE_SIZE_U32(tableLog);
+    wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
+
+    CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) );
+
+    {
+        const void* ptr = wksp->dtable;
+        const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
+        const U32 fastMode = DTableH->fastMode;
+
+        /* select fast mode (static) */
+        if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 1);
+        return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 0);
+    }
+}
+
+/* Avoids the FORCE_INLINE of the _body() function. */
+static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
+{
+    return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 0);
+}
+
+#if DYNAMIC_BMI2
+TARGET_ATTRIBUTE("bmi2") static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
+{
+    return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1);
+}
+#endif
+
+size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2)
+{
+#if DYNAMIC_BMI2
+    if (bmi2) {
+        return FSE_decompress_wksp_body_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
+    }
+#endif
+    (void)bmi2;
+    return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
+}
+
+
+typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
+
+#ifndef ZSTD_NO_UNUSED_FUNCTIONS
+size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) {
+    U32 wksp[FSE_BUILD_DTABLE_WKSP_SIZE_U32(FSE_TABLELOG_ABSOLUTE_MAX, FSE_MAX_SYMBOL_VALUE)];
+    return FSE_buildDTable_wksp(dt, normalizedCounter, maxSymbolValue, tableLog, wksp, sizeof(wksp));
+}
+
+size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize)
+{
+    /* Static analyzer seems unable to understand this table will be properly initialized later */
+    U32 wksp[FSE_DECOMPRESS_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
+    return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, FSE_MAX_TABLELOG, wksp, sizeof(wksp));
+}
+#endif
+
+
+#endif   /* FSE_COMMONDEFS_ONLY */
+/**** ended inlining common/fse_decompress.c ****/
+/**** start inlining common/threading.c ****/
+/**
+ * Copyright (c) 2016 Tino Reichardt
+ * All rights reserved.
+ *
+ * You can contact the author at:
+ * - zstdmt source repository: https://github.com/mcmilk/zstdmt
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/**
+ * This file will hold wrapper for systems, which do not support pthreads
+ */
+
+/**** start inlining threading.h ****/
+/**
+ * Copyright (c) 2016 Tino Reichardt
+ * All rights reserved.
+ *
+ * You can contact the author at:
+ * - zstdmt source repository: https://github.com/mcmilk/zstdmt
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef THREADING_H_938743
+#define THREADING_H_938743
+
+/**** skipping file: debug.h ****/
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+#if defined(ZSTD_MULTITHREAD) && defined(_WIN32)
+
+/**
+ * Windows minimalist Pthread Wrapper, based on :
+ * http://www.cse.wustl.edu/~schmidt/win32-cv-1.html
+ */
+#ifdef WINVER
+#  undef WINVER
+#endif
+#define WINVER       0x0600
+
+#ifdef _WIN32_WINNT
+#  undef _WIN32_WINNT
+#endif
+#define _WIN32_WINNT 0x0600
+
+#ifndef WIN32_LEAN_AND_MEAN
+#  define WIN32_LEAN_AND_MEAN
+#endif
+
+#undef ERROR   /* reported already defined on VS 2015 (Rich Geldreich) */
+#include <windows.h>
+#undef ERROR
+#define ERROR(name) ZSTD_ERROR(name)
+
+
+/* mutex */
+#define ZSTD_pthread_mutex_t           CRITICAL_SECTION
+#define ZSTD_pthread_mutex_init(a, b)  ((void)(b), InitializeCriticalSection((a)), 0)
+#define ZSTD_pthread_mutex_destroy(a)  DeleteCriticalSection((a))
+#define ZSTD_pthread_mutex_lock(a)     EnterCriticalSection((a))
+#define ZSTD_pthread_mutex_unlock(a)   LeaveCriticalSection((a))
+
+/* condition variable */
+#define ZSTD_pthread_cond_t             CONDITION_VARIABLE
+#define ZSTD_pthread_cond_init(a, b)    ((void)(b), InitializeConditionVariable((a)), 0)
+#define ZSTD_pthread_cond_destroy(a)    ((void)(a))
+#define ZSTD_pthread_cond_wait(a, b)    SleepConditionVariableCS((a), (b), INFINITE)
+#define ZSTD_pthread_cond_signal(a)     WakeConditionVariable((a))
+#define ZSTD_pthread_cond_broadcast(a)  WakeAllConditionVariable((a))
+
+/* ZSTD_pthread_create() and ZSTD_pthread_join() */
+typedef struct {
+    HANDLE handle;
+    void* (*start_routine)(void*);
+    void* arg;
+} ZSTD_pthread_t;
+
+int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused,
+                   void* (*start_routine) (void*), void* arg);
+
+int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr);
+
+/**
+ * add here more wrappers as required
+ */
+
+
+#elif defined(ZSTD_MULTITHREAD)    /* posix assumed ; need a better detection method */
+/* ===   POSIX Systems   === */
+#  include <pthread.h>
+
+#if DEBUGLEVEL < 1
+
+#define ZSTD_pthread_mutex_t            pthread_mutex_t
+#define ZSTD_pthread_mutex_init(a, b)   pthread_mutex_init((a), (b))
+#define ZSTD_pthread_mutex_destroy(a)   pthread_mutex_destroy((a))
+#define ZSTD_pthread_mutex_lock(a)      pthread_mutex_lock((a))
+#define ZSTD_pthread_mutex_unlock(a)    pthread_mutex_unlock((a))
+
+#define ZSTD_pthread_cond_t             pthread_cond_t
+#define ZSTD_pthread_cond_init(a, b)    pthread_cond_init((a), (b))
+#define ZSTD_pthread_cond_destroy(a)    pthread_cond_destroy((a))
+#define ZSTD_pthread_cond_wait(a, b)    pthread_cond_wait((a), (b))
+#define ZSTD_pthread_cond_signal(a)     pthread_cond_signal((a))
+#define ZSTD_pthread_cond_broadcast(a)  pthread_cond_broadcast((a))
+
+#define ZSTD_pthread_t                  pthread_t
+#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d))
+#define ZSTD_pthread_join(a, b)         pthread_join((a),(b))
+
+#else /* DEBUGLEVEL >= 1 */
+
+/* Debug implementation of threading.
+ * In this implementation we use pointers for mutexes and condition variables.
+ * This way, if we forget to init/destroy them the program will crash or ASAN
+ * will report leaks.
+ */
+
+#define ZSTD_pthread_mutex_t            pthread_mutex_t*
+int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr);
+int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex);
+#define ZSTD_pthread_mutex_lock(a)      pthread_mutex_lock(*(a))
+#define ZSTD_pthread_mutex_unlock(a)    pthread_mutex_unlock(*(a))
+
+#define ZSTD_pthread_cond_t             pthread_cond_t*
+int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr);
+int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond);
+#define ZSTD_pthread_cond_wait(a, b)    pthread_cond_wait(*(a), *(b))
+#define ZSTD_pthread_cond_signal(a)     pthread_cond_signal(*(a))
+#define ZSTD_pthread_cond_broadcast(a)  pthread_cond_broadcast(*(a))
+
+#define ZSTD_pthread_t                  pthread_t
+#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d))
+#define ZSTD_pthread_join(a, b)         pthread_join((a),(b))
+
+#endif
+
+#else  /* ZSTD_MULTITHREAD not defined */
+/* No multithreading support */
+
+typedef int ZSTD_pthread_mutex_t;
+#define ZSTD_pthread_mutex_init(a, b)   ((void)(a), (void)(b), 0)
+#define ZSTD_pthread_mutex_destroy(a)   ((void)(a))
+#define ZSTD_pthread_mutex_lock(a)      ((void)(a))
+#define ZSTD_pthread_mutex_unlock(a)    ((void)(a))
+
+typedef int ZSTD_pthread_cond_t;
+#define ZSTD_pthread_cond_init(a, b)    ((void)(a), (void)(b), 0)
+#define ZSTD_pthread_cond_destroy(a)    ((void)(a))
+#define ZSTD_pthread_cond_wait(a, b)    ((void)(a), (void)(b))
+#define ZSTD_pthread_cond_signal(a)     ((void)(a))
+#define ZSTD_pthread_cond_broadcast(a)  ((void)(a))
+
+/* do not use ZSTD_pthread_t */
+
+#endif /* ZSTD_MULTITHREAD */
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* THREADING_H_938743 */
+/**** ended inlining threading.h ****/
+
+/* create fake symbol to avoid empty translation unit warning */
+int g_ZSTD_threading_useless_symbol;
+
+#if defined(ZSTD_MULTITHREAD) && defined(_WIN32)
+
+/**
+ * Windows minimalist Pthread Wrapper, based on :
+ * http://www.cse.wustl.edu/~schmidt/win32-cv-1.html
+ */
+
+
+/* ===  Dependencies  === */
+#include <process.h>
+#include <errno.h>
+
+
+/* ===  Implementation  === */
+
+static unsigned __stdcall worker(void *arg)
+{
+    ZSTD_pthread_t* const thread = (ZSTD_pthread_t*) arg;
+    thread->arg = thread->start_routine(thread->arg);
+    return 0;
+}
+
+int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused,
+            void* (*start_routine) (void*), void* arg)
+{
+    (void)unused;
+    thread->arg = arg;
+    thread->start_routine = start_routine;
+    thread->handle = (HANDLE) _beginthreadex(NULL, 0, worker, thread, 0, NULL);
+
+    if (!thread->handle)
+        return errno;
+    else
+        return 0;
+}
+
+int ZSTD_pthread_join(ZSTD_pthread_t thread, void **value_ptr)
+{
+    DWORD result;
+
+    if (!thread.handle) return 0;
+
+    result = WaitForSingleObject(thread.handle, INFINITE);
+    switch (result) {
+    case WAIT_OBJECT_0:
+        if (value_ptr) *value_ptr = thread.arg;
+        return 0;
+    case WAIT_ABANDONED:
+        return EINVAL;
+    default:
+        return GetLastError();
+    }
+}
+
+#endif   /* ZSTD_MULTITHREAD */
+
+#if defined(ZSTD_MULTITHREAD) && DEBUGLEVEL >= 1 && !defined(_WIN32)
+
+#define ZSTD_DEPS_NEED_MALLOC
+/**** skipping file: zstd_deps.h ****/
+
+int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr)
+{
+    *mutex = (pthread_mutex_t*)ZSTD_malloc(sizeof(pthread_mutex_t));
+    if (!*mutex)
+        return 1;
+    return pthread_mutex_init(*mutex, attr);
+}
+
+int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex)
+{
+    if (!*mutex)
+        return 0;
+    {
+        int const ret = pthread_mutex_destroy(*mutex);
+        ZSTD_free(*mutex);
+        return ret;
+    }
+}
+
+int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr)
+{
+    *cond = (pthread_cond_t*)ZSTD_malloc(sizeof(pthread_cond_t));
+    if (!*cond)
+        return 1;
+    return pthread_cond_init(*cond, attr);
+}
+
+int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond)
+{
+    if (!*cond)
+        return 0;
+    {
+        int const ret = pthread_cond_destroy(*cond);
+        ZSTD_free(*cond);
+        return ret;
+    }
+}
+
+#endif
+/**** ended inlining common/threading.c ****/
+/**** start inlining common/pool.c ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+
+/* ======   Dependencies   ======= */
+/**** skipping file: zstd_deps.h ****/
+/**** skipping file: debug.h ****/
+/**** start inlining zstd_internal.h ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_CCOMMON_H_MODULE
+#define ZSTD_CCOMMON_H_MODULE
+
+/* this module contains definitions which must be identical
+ * across compression, decompression and dictBuilder.
+ * It also contains a few functions useful to at least 2 of them
+ * and which benefit from being inlined */
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON)
+#include <arm_neon.h>
+#endif
+/**** skipping file: compiler.h ****/
+/**** skipping file: mem.h ****/
+/**** skipping file: debug.h ****/
+/**** skipping file: error_private.h ****/
+#define ZSTD_STATIC_LINKING_ONLY
+/**** start inlining ../zstd.h ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+#ifndef ZSTD_H_235446
+#define ZSTD_H_235446
+
+/* ======   Dependency   ======*/
+#include <limits.h>   /* INT_MAX */
+#include <stddef.h>   /* size_t */
+
+
+/* =====   ZSTDLIB_API : control library symbols visibility   ===== */
+#ifndef ZSTDLIB_VISIBILITY
+#  if defined(__GNUC__) && (__GNUC__ >= 4)
+#    define ZSTDLIB_VISIBILITY __attribute__ ((visibility ("default")))
+#  else
+#    define ZSTDLIB_VISIBILITY
+#  endif
+#endif
+#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
+#  define ZSTDLIB_API __declspec(dllexport) ZSTDLIB_VISIBILITY
+#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
+#  define ZSTDLIB_API __declspec(dllimport) ZSTDLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
+#else
+#  define ZSTDLIB_API ZSTDLIB_VISIBILITY
+#endif
+
+
+/*******************************************************************************
+  Introduction
+
+  zstd, short for Zstandard, is a fast lossless compression algorithm, targeting
+  real-time compression scenarios at zlib-level and better compression ratios.
+  The zstd compression library provides in-memory compression and decompression
+  functions.
+
+  The library supports regular compression levels from 1 up to ZSTD_maxCLevel(),
+  which is currently 22. Levels >= 20, labeled `--ultra`, should be used with
+  caution, as they require more memory. The library also offers negative
+  compression levels, which extend the range of speed vs. ratio preferences.
+  The lower the level, the faster the speed (at the cost of compression).
+
+  Compression can be done in:
+    - a single step (described as Simple API)
+    - a single step, reusing a context (described as Explicit context)
+    - unbounded multiple steps (described as Streaming compression)
+
+  The compression ratio achievable on small data can be highly improved using
+  a dictionary. Dictionary compression can be performed in:
+    - a single step (described as Simple dictionary API)
+    - a single step, reusing a dictionary (described as Bulk-processing
+      dictionary API)
+
+  Advanced experimental functions can be accessed using
+  `#define ZSTD_STATIC_LINKING_ONLY` before including zstd.h.
+
+  Advanced experimental APIs should never be used with a dynamically-linked
+  library. They are not "stable"; their definitions or signatures may change in
+  the future. Only static linking is allowed.
+*******************************************************************************/
+
+/*------   Version   ------*/
+#define ZSTD_VERSION_MAJOR    1
+#define ZSTD_VERSION_MINOR    5
+#define ZSTD_VERSION_RELEASE  0
+#define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
+
+/*! ZSTD_versionNumber() :
+ *  Return runtime library version, the value is (MAJOR*100*100 + MINOR*100 + RELEASE). */
+ZSTDLIB_API unsigned ZSTD_versionNumber(void);
+
+#define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE
+#define ZSTD_QUOTE(str) #str
+#define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str)
+#define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION)
+
+/*! ZSTD_versionString() :
+ *  Return runtime library version, like "1.4.5". Requires v1.3.0+. */
+ZSTDLIB_API const char* ZSTD_versionString(void);
+
+/* *************************************
+ *  Default constant
+ ***************************************/
+#ifndef ZSTD_CLEVEL_DEFAULT
+#  define ZSTD_CLEVEL_DEFAULT 3
+#endif
+
+/* *************************************
+ *  Constants
+ ***************************************/
+
+/* All magic numbers are supposed read/written to/from files/memory using little-endian convention */
+#define ZSTD_MAGICNUMBER            0xFD2FB528    /* valid since v0.8.0 */
+#define ZSTD_MAGIC_DICTIONARY       0xEC30A437    /* valid since v0.7.0 */
+#define ZSTD_MAGIC_SKIPPABLE_START  0x184D2A50    /* all 16 values, from 0x184D2A50 to 0x184D2A5F, signal the beginning of a skippable frame */
+#define ZSTD_MAGIC_SKIPPABLE_MASK   0xFFFFFFF0
+
+#define ZSTD_BLOCKSIZELOG_MAX  17
+#define ZSTD_BLOCKSIZE_MAX     (1<<ZSTD_BLOCKSIZELOG_MAX)
+
+
+/***************************************
+*  Simple API
+***************************************/
+/*! ZSTD_compress() :
+ *  Compresses `src` content as a single zstd compressed frame into already allocated `dst`.
+ *  Hint : compression runs faster if `dstCapacity` >=  `ZSTD_compressBound(srcSize)`.
+ *  @return : compressed size written into `dst` (<= `dstCapacity),
+ *            or an error code if it fails (which can be tested using ZSTD_isError()). */
+ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity,
+                            const void* src, size_t srcSize,
+                                  int compressionLevel);
+
+/*! ZSTD_decompress() :
+ *  `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames.
+ *  `dstCapacity` is an upper bound of originalSize to regenerate.
+ *  If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data.
+ *  @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
+ *            or an errorCode if it fails (which can be tested using ZSTD_isError()). */
+ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity,
+                              const void* src, size_t compressedSize);
+
+/*! ZSTD_getFrameContentSize() : requires v1.3.0+
+ *  `src` should point to the start of a ZSTD encoded frame.
+ *  `srcSize` must be at least as large as the frame header.
+ *            hint : any size >= `ZSTD_frameHeaderSize_max` is large enough.
+ *  @return : - decompressed size of `src` frame content, if known
+ *            - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
+ *            - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small)
+ *   note 1 : a 0 return value means the frame is valid but "empty".
+ *   note 2 : decompressed size is an optional field, it may not be present, typically in streaming mode.
+ *            When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size.
+ *            In which case, it's necessary to use streaming mode to decompress data.
+ *            Optionally, application can rely on some implicit limit,
+ *            as ZSTD_decompress() only needs an upper bound of decompressed size.
+ *            (For example, data could be necessarily cut into blocks <= 16 KB).
+ *   note 3 : decompressed size is always present when compression is completed using single-pass functions,
+ *            such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict().
+ *   note 4 : decompressed size can be very large (64-bits value),
+ *            potentially larger than what local system can handle as a single memory segment.
+ *            In which case, it's necessary to use streaming mode to decompress data.
+ *   note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified.
+ *            Always ensure return value fits within application's authorized limits.
+ *            Each application can set its own limits.
+ *   note 6 : This function replaces ZSTD_getDecompressedSize() */
+#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1)
+#define ZSTD_CONTENTSIZE_ERROR   (0ULL - 2)
+ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize);
+
+/*! ZSTD_getDecompressedSize() :
+ *  NOTE: This function is now obsolete, in favor of ZSTD_getFrameContentSize().
+ *  Both functions work the same way, but ZSTD_getDecompressedSize() blends
+ *  "empty", "unknown" and "error" results to the same return value (0),
+ *  while ZSTD_getFrameContentSize() gives them separate return values.
+ * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */
+ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
+
+/*! ZSTD_findFrameCompressedSize() : Requires v1.4.0+
+ * `src` should point to the start of a ZSTD frame or skippable frame.
+ * `srcSize` must be >= first frame size
+ * @return : the compressed size of the first frame starting at `src`,
+ *           suitable to pass as `srcSize` to `ZSTD_decompress` or similar,
+ *        or an error code if input is invalid */
+ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize);
+
+
+/*======  Helper functions  ======*/
+#define ZSTD_COMPRESSBOUND(srcSize)   ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0))  /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */
+ZSTDLIB_API size_t      ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */
+ZSTDLIB_API unsigned    ZSTD_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */
+ZSTDLIB_API const char* ZSTD_getErrorName(size_t code);     /*!< provides readable string from an error code */
+ZSTDLIB_API int         ZSTD_minCLevel(void);               /*!< minimum negative compression level allowed, requires v1.4.0+ */
+ZSTDLIB_API int         ZSTD_maxCLevel(void);               /*!< maximum compression level available */
+ZSTDLIB_API int         ZSTD_defaultCLevel(void);           /*!< default compression level, specified by ZSTD_CLEVEL_DEFAULT, requires v1.5.0+ */
+
+
+/***************************************
+*  Explicit context
+***************************************/
+/*= Compression context
+ *  When compressing many times,
+ *  it is recommended to allocate a context just once,
+ *  and re-use it for each successive compression operation.
+ *  This will make workload friendlier for system's memory.
+ *  Note : re-using context is just a speed / resource optimization.
+ *         It doesn't change the compression ratio, which remains identical.
+ *  Note 2 : In multi-threaded environments,
+ *         use one different context per thread for parallel execution.
+ */
+typedef struct ZSTD_CCtx_s ZSTD_CCtx;
+ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void);
+ZSTDLIB_API size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);  /* accept NULL pointer */
+
+/*! ZSTD_compressCCtx() :
+ *  Same as ZSTD_compress(), using an explicit ZSTD_CCtx.
+ *  Important : in order to behave similarly to `ZSTD_compress()`,
+ *  this function compresses at requested compression level,
+ *  __ignoring any other parameter__ .
+ *  If any advanced parameter was set using the advanced API,
+ *  they will all be reset. Only `compressionLevel` remains.
+ */
+ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
+                                     void* dst, size_t dstCapacity,
+                               const void* src, size_t srcSize,
+                                     int compressionLevel);
+
+/*= Decompression context
+ *  When decompressing many times,
+ *  it is recommended to allocate a context only once,
+ *  and re-use it for each successive compression operation.
+ *  This will make workload friendlier for system's memory.
+ *  Use one context per thread for parallel execution. */
+typedef struct ZSTD_DCtx_s ZSTD_DCtx;
+ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void);
+ZSTDLIB_API size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);  /* accept NULL pointer */
+
+/*! ZSTD_decompressDCtx() :
+ *  Same as ZSTD_decompress(),
+ *  requires an allocated ZSTD_DCtx.
+ *  Compatible with sticky parameters.
+ */
+ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx,
+                                       void* dst, size_t dstCapacity,
+                                 const void* src, size_t srcSize);
+
+
+/*********************************************
+*  Advanced compression API (Requires v1.4.0+)
+**********************************************/
+
+/* API design :
+ *   Parameters are pushed one by one into an existing context,
+ *   using ZSTD_CCtx_set*() functions.
+ *   Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame.
+ *   "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` !
+ *   __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ .
+ *
+ *   It's possible to reset all parameters to "default" using ZSTD_CCtx_reset().
+ *
+ *   This API supercedes all other "advanced" API entry points in the experimental section.
+ *   In the future, we expect to remove from experimental API entry points which are redundant with this API.
+ */
+
+
+/* Compression strategies, listed from fastest to strongest */
+typedef enum { ZSTD_fast=1,
+               ZSTD_dfast=2,
+               ZSTD_greedy=3,
+               ZSTD_lazy=4,
+               ZSTD_lazy2=5,
+               ZSTD_btlazy2=6,
+               ZSTD_btopt=7,
+               ZSTD_btultra=8,
+               ZSTD_btultra2=9
+               /* note : new strategies _might_ be added in the future.
+                         Only the order (from fast to strong) is guaranteed */
+} ZSTD_strategy;
+
+typedef enum {
+
+    /* compression parameters
+     * Note: When compressing with a ZSTD_CDict these parameters are superseded
+     * by the parameters used to construct the ZSTD_CDict.
+     * See ZSTD_CCtx_refCDict() for more info (superseded-by-cdict). */
+    ZSTD_c_compressionLevel=100, /* Set compression parameters according to pre-defined cLevel table.
+                              * Note that exact compression parameters are dynamically determined,
+                              * depending on both compression level and srcSize (when known).
+                              * Default level is ZSTD_CLEVEL_DEFAULT==3.
+                              * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT.
+                              * Note 1 : it's possible to pass a negative compression level.
+                              * Note 2 : setting a level does not automatically set all other compression parameters
+                              *   to default. Setting this will however eventually dynamically impact the compression
+                              *   parameters which have not been manually set. The manually set
+                              *   ones will 'stick'. */
+    /* Advanced compression parameters :
+     * It's possible to pin down compression parameters to some specific values.
+     * In which case, these values are no longer dynamically selected by the compressor */
+    ZSTD_c_windowLog=101,    /* Maximum allowed back-reference distance, expressed as power of 2.
+                              * This will set a memory budget for streaming decompression,
+                              * with larger values requiring more memory
+                              * and typically compressing more.
+                              * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX.
+                              * Special: value 0 means "use default windowLog".
+                              * Note: Using a windowLog greater than ZSTD_WINDOWLOG_LIMIT_DEFAULT
+                              *       requires explicitly allowing such size at streaming decompression stage. */
+    ZSTD_c_hashLog=102,      /* Size of the initial probe table, as a power of 2.
+                              * Resulting memory usage is (1 << (hashLog+2)).
+                              * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX.
+                              * Larger tables improve compression ratio of strategies <= dFast,
+                              * and improve speed of strategies > dFast.
+                              * Special: value 0 means "use default hashLog". */
+    ZSTD_c_chainLog=103,     /* Size of the multi-probe search table, as a power of 2.
+                              * Resulting memory usage is (1 << (chainLog+2)).
+                              * Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX.
+                              * Larger tables result in better and slower compression.
+                              * This parameter is useless for "fast" strategy.
+                              * It's still useful when using "dfast" strategy,
+                              * in which case it defines a secondary probe table.
+                              * Special: value 0 means "use default chainLog". */
+    ZSTD_c_searchLog=104,    /* Number of search attempts, as a power of 2.
+                              * More attempts result in better and slower compression.
+                              * This parameter is useless for "fast" and "dFast" strategies.
+                              * Special: value 0 means "use default searchLog". */
+    ZSTD_c_minMatch=105,     /* Minimum size of searched matches.
+                              * Note that Zstandard can still find matches of smaller size,
+                              * it just tweaks its search algorithm to look for this size and larger.
+                              * Larger values increase compression and decompression speed, but decrease ratio.
+                              * Must be clamped between ZSTD_MINMATCH_MIN and ZSTD_MINMATCH_MAX.
+                              * Note that currently, for all strategies < btopt, effective minimum is 4.
+                              *                    , for all strategies > fast, effective maximum is 6.
+                              * Special: value 0 means "use default minMatchLength". */
+    ZSTD_c_targetLength=106, /* Impact of this field depends on strategy.
+                              * For strategies btopt, btultra & btultra2:
+                              *     Length of Match considered "good enough" to stop search.
+                              *     Larger values make compression stronger, and slower.
+                              * For strategy fast:
+                              *     Distance between match sampling.
+                              *     Larger values make compression faster, and weaker.
+                              * Special: value 0 means "use default targetLength". */
+    ZSTD_c_strategy=107,     /* See ZSTD_strategy enum definition.
+                              * The higher the value of selected strategy, the more complex it is,
+                              * resulting in stronger and slower compression.
+                              * Special: value 0 means "use default strategy". */
+    /* LDM mode parameters */
+    ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching.
+                                     * This parameter is designed to improve compression ratio
+                                     * for large inputs, by finding large matches at long distance.
+                                     * It increases memory usage and window size.
+                                     * Note: enabling this parameter increases default ZSTD_c_windowLog to 128 MB
+                                     * except when expressly set to a different value.
+                                     * Note: will be enabled by default if ZSTD_c_windowLog >= 128 MB and
+                                     * compression strategy >= ZSTD_btopt (== compression level 16+) */
+    ZSTD_c_ldmHashLog=161,   /* Size of the table for long distance matching, as a power of 2.
+                              * Larger values increase memory usage and compression ratio,
+                              * but decrease compression speed.
+                              * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX
+                              * default: windowlog - 7.
+                              * Special: value 0 means "automatically determine hashlog". */
+    ZSTD_c_ldmMinMatch=162,  /* Minimum match size for long distance matcher.
+                              * Larger/too small values usually decrease compression ratio.
+                              * Must be clamped between ZSTD_LDM_MINMATCH_MIN and ZSTD_LDM_MINMATCH_MAX.
+                              * Special: value 0 means "use default value" (default: 64). */
+    ZSTD_c_ldmBucketSizeLog=163, /* Log size of each bucket in the LDM hash table for collision resolution.
+                              * Larger values improve collision resolution but decrease compression speed.
+                              * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX.
+                              * Special: value 0 means "use default value" (default: 3). */
+    ZSTD_c_ldmHashRateLog=164, /* Frequency of inserting/looking up entries into the LDM hash table.
+                              * Must be clamped between 0 and (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN).
+                              * Default is MAX(0, (windowLog - ldmHashLog)), optimizing hash table usage.
+                              * Larger values improve compression speed.
+                              * Deviating far from default value will likely result in a compression ratio decrease.
+                              * Special: value 0 means "automatically determine hashRateLog". */
+
+    /* frame parameters */
+    ZSTD_c_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1)
+                              * Content size must be known at the beginning of compression.
+                              * This is automatically the case when using ZSTD_compress2(),
+                              * For streaming scenarios, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */
+    ZSTD_c_checksumFlag=201, /* A 32-bits checksum of content is written at end of frame (default:0) */
+    ZSTD_c_dictIDFlag=202,   /* When applicable, dictionary's ID is written into frame header (default:1) */
+
+    /* multi-threading parameters */
+    /* These parameters are only active if multi-threading is enabled (compiled with build macro ZSTD_MULTITHREAD).
+     * Otherwise, trying to set any other value than default (0) will be a no-op and return an error.
+     * In a situation where it's unknown if the linked library supports multi-threading or not,
+     * setting ZSTD_c_nbWorkers to any value >= 1 and consulting the return value provides a quick way to check this property.
+     */
+    ZSTD_c_nbWorkers=400,    /* Select how many threads will be spawned to compress in parallel.
+                              * When nbWorkers >= 1, triggers asynchronous mode when invoking ZSTD_compressStream*() :
+                              * ZSTD_compressStream*() consumes input and flush output if possible, but immediately gives back control to caller,
+                              * while compression is performed in parallel, within worker thread(s).
+                              * (note : a strong exception to this rule is when first invocation of ZSTD_compressStream2() sets ZSTD_e_end :
+                              *  in which case, ZSTD_compressStream2() delegates to ZSTD_compress2(), which is always a blocking call).
+                              * More workers improve speed, but also increase memory usage.
+                              * Default value is `0`, aka "single-threaded mode" : no worker is spawned,
+                              * compression is performed inside Caller's thread, and all invocations are blocking */
+    ZSTD_c_jobSize=401,      /* Size of a compression job. This value is enforced only when nbWorkers >= 1.
+                              * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads.
+                              * 0 means default, which is dynamically determined based on compression parameters.
+                              * Job size must be a minimum of overlap size, or ZSTDMT_JOBSIZE_MIN (= 512 KB), whichever is largest.
+                              * The minimum size is automatically and transparently enforced. */
+    ZSTD_c_overlapLog=402,   /* Control the overlap size, as a fraction of window size.
+                              * The overlap size is an amount of data reloaded from previous job at the beginning of a new job.
+                              * It helps preserve compression ratio, while each job is compressed in parallel.
+                              * This value is enforced only when nbWorkers >= 1.
+                              * Larger values increase compression ratio, but decrease speed.
+                              * Possible values range from 0 to 9 :
+                              * - 0 means "default" : value will be determined by the library, depending on strategy
+                              * - 1 means "no overlap"
+                              * - 9 means "full overlap", using a full window size.
+                              * Each intermediate rank increases/decreases load size by a factor 2 :
+                              * 9: full window;  8: w/2;  7: w/4;  6: w/8;  5:w/16;  4: w/32;  3:w/64;  2:w/128;  1:no overlap;  0:default
+                              * default value varies between 6 and 9, depending on strategy */
+
+    /* note : additional experimental parameters are also available
+     * within the experimental section of the API.
+     * At the time of this writing, they include :
+     * ZSTD_c_rsyncable
+     * ZSTD_c_format
+     * ZSTD_c_forceMaxWindow
+     * ZSTD_c_forceAttachDict
+     * ZSTD_c_literalCompressionMode
+     * ZSTD_c_targetCBlockSize
+     * ZSTD_c_srcSizeHint
+     * ZSTD_c_enableDedicatedDictSearch
+     * ZSTD_c_stableInBuffer
+     * ZSTD_c_stableOutBuffer
+     * ZSTD_c_blockDelimiters
+     * ZSTD_c_validateSequences
+     * ZSTD_c_splitBlocks
+     * ZSTD_c_useRowMatchFinder
+     * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
+     * note : never ever use experimentalParam? names directly;
+     *        also, the enums values themselves are unstable and can still change.
+     */
+     ZSTD_c_experimentalParam1=500,
+     ZSTD_c_experimentalParam2=10,
+     ZSTD_c_experimentalParam3=1000,
+     ZSTD_c_experimentalParam4=1001,
+     ZSTD_c_experimentalParam5=1002,
+     ZSTD_c_experimentalParam6=1003,
+     ZSTD_c_experimentalParam7=1004,
+     ZSTD_c_experimentalParam8=1005,
+     ZSTD_c_experimentalParam9=1006,
+     ZSTD_c_experimentalParam10=1007,
+     ZSTD_c_experimentalParam11=1008,
+     ZSTD_c_experimentalParam12=1009,
+     ZSTD_c_experimentalParam13=1010,
+     ZSTD_c_experimentalParam14=1011,
+     ZSTD_c_experimentalParam15=1012
+} ZSTD_cParameter;
+
+typedef struct {
+    size_t error;
+    int lowerBound;
+    int upperBound;
+} ZSTD_bounds;
+
+/*! ZSTD_cParam_getBounds() :
+ *  All parameters must belong to an interval with lower and upper bounds,
+ *  otherwise they will either trigger an error or be automatically clamped.
+ * @return : a structure, ZSTD_bounds, which contains
+ *         - an error status field, which must be tested using ZSTD_isError()
+ *         - lower and upper bounds, both inclusive
+ */
+ZSTDLIB_API ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter cParam);
+
+/*! ZSTD_CCtx_setParameter() :
+ *  Set one compression parameter, selected by enum ZSTD_cParameter.
+ *  All parameters have valid bounds. Bounds can be queried using ZSTD_cParam_getBounds().
+ *  Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter).
+ *  Setting a parameter is generally only possible during frame initialization (before starting compression).
+ *  Exception : when using multi-threading mode (nbWorkers >= 1),
+ *              the following parameters can be updated _during_ compression (within same frame):
+ *              => compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy.
+ *              new parameters will be active for next job only (after a flush()).
+ * @return : an error code (which can be tested using ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value);
+
+/*! ZSTD_CCtx_setPledgedSrcSize() :
+ *  Total input data size to be compressed as a single frame.
+ *  Value will be written in frame header, unless if explicitly forbidden using ZSTD_c_contentSizeFlag.
+ *  This value will also be controlled at end of frame, and trigger an error if not respected.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Note 1 : pledgedSrcSize==0 actually means zero, aka an empty frame.
+ *           In order to mean "unknown content size", pass constant ZSTD_CONTENTSIZE_UNKNOWN.
+ *           ZSTD_CONTENTSIZE_UNKNOWN is default value for any new frame.
+ *  Note 2 : pledgedSrcSize is only valid once, for the next frame.
+ *           It's discarded at the end of the frame, and replaced by ZSTD_CONTENTSIZE_UNKNOWN.
+ *  Note 3 : Whenever all input data is provided and consumed in a single round,
+ *           for example with ZSTD_compress2(),
+ *           or invoking immediately ZSTD_compressStream2(,,,ZSTD_e_end),
+ *           this value is automatically overridden by srcSize instead.
+ */
+ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize);
+
+typedef enum {
+    ZSTD_reset_session_only = 1,
+    ZSTD_reset_parameters = 2,
+    ZSTD_reset_session_and_parameters = 3
+} ZSTD_ResetDirective;
+
+/*! ZSTD_CCtx_reset() :
+ *  There are 2 different things that can be reset, independently or jointly :
+ *  - The session : will stop compressing current frame, and make CCtx ready to start a new one.
+ *                  Useful after an error, or to interrupt any ongoing compression.
+ *                  Any internal data not yet flushed is cancelled.
+ *                  Compression parameters and dictionary remain unchanged.
+ *                  They will be used to compress next frame.
+ *                  Resetting session never fails.
+ *  - The parameters : changes all parameters back to "default".
+ *                  This removes any reference to any dictionary too.
+ *                  Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing)
+ *                  otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError())
+ *  - Both : similar to resetting the session, followed by resetting parameters.
+ */
+ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset);
+
+/*! ZSTD_compress2() :
+ *  Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API.
+ *  ZSTD_compress2() always starts a new frame.
+ *  Should cctx hold data from a previously unfinished frame, everything about it is forgotten.
+ *  - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*()
+ *  - The function is always blocking, returns when compression is completed.
+ *  Hint : compression runs faster if `dstCapacity` >=  `ZSTD_compressBound(srcSize)`.
+ * @return : compressed size written into `dst` (<= `dstCapacity),
+ *           or an error code if it fails (which can be tested using ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_compress2( ZSTD_CCtx* cctx,
+                                   void* dst, size_t dstCapacity,
+                             const void* src, size_t srcSize);
+
+
+/***********************************************
+*  Advanced decompression API (Requires v1.4.0+)
+************************************************/
+
+/* The advanced API pushes parameters one by one into an existing DCtx context.
+ * Parameters are sticky, and remain valid for all following frames
+ * using the same DCtx context.
+ * It's possible to reset parameters to default values using ZSTD_DCtx_reset().
+ * Note : This API is compatible with existing ZSTD_decompressDCtx() and ZSTD_decompressStream().
+ *        Therefore, no new decompression function is necessary.
+ */
+
+typedef enum {
+
+    ZSTD_d_windowLogMax=100, /* Select a size limit (in power of 2) beyond which
+                              * the streaming API will refuse to allocate memory buffer
+                              * in order to protect the host from unreasonable memory requirements.
+                              * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode.
+                              * By default, a decompression context accepts window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT).
+                              * Special: value 0 means "use default maximum windowLog". */
+
+    /* note : additional experimental parameters are also available
+     * within the experimental section of the API.
+     * At the time of this writing, they include :
+     * ZSTD_d_format
+     * ZSTD_d_stableOutBuffer
+     * ZSTD_d_forceIgnoreChecksum
+     * ZSTD_d_refMultipleDDicts
+     * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
+     * note : never ever use experimentalParam? names directly
+     */
+     ZSTD_d_experimentalParam1=1000,
+     ZSTD_d_experimentalParam2=1001,
+     ZSTD_d_experimentalParam3=1002,
+     ZSTD_d_experimentalParam4=1003
+
+} ZSTD_dParameter;
+
+/*! ZSTD_dParam_getBounds() :
+ *  All parameters must belong to an interval with lower and upper bounds,
+ *  otherwise they will either trigger an error or be automatically clamped.
+ * @return : a structure, ZSTD_bounds, which contains
+ *         - an error status field, which must be tested using ZSTD_isError()
+ *         - both lower and upper bounds, inclusive
+ */
+ZSTDLIB_API ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam);
+
+/*! ZSTD_DCtx_setParameter() :
+ *  Set one compression parameter, selected by enum ZSTD_dParameter.
+ *  All parameters have valid bounds. Bounds can be queried using ZSTD_dParam_getBounds().
+ *  Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter).
+ *  Setting a parameter is only possible during frame initialization (before starting decompression).
+ * @return : 0, or an error code (which can be tested using ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int value);
+
+/*! ZSTD_DCtx_reset() :
+ *  Return a DCtx to clean state.
+ *  Session and parameters can be reset jointly or separately.
+ *  Parameters can only be reset when no active frame is being decompressed.
+ * @return : 0, or an error code, which can be tested with ZSTD_isError()
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset);
+
+
+/****************************
+*  Streaming
+****************************/
+
+typedef struct ZSTD_inBuffer_s {
+  const void* src;    /**< start of input buffer */
+  size_t size;        /**< size of input buffer */
+  size_t pos;         /**< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */
+} ZSTD_inBuffer;
+
+typedef struct ZSTD_outBuffer_s {
+  void*  dst;         /**< start of output buffer */
+  size_t size;        /**< size of output buffer */
+  size_t pos;         /**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */
+} ZSTD_outBuffer;
+
+
+
+/*-***********************************************************************
+*  Streaming compression - HowTo
+*
+*  A ZSTD_CStream object is required to track streaming operation.
+*  Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources.
+*  ZSTD_CStream objects can be reused multiple times on consecutive compression operations.
+*  It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory.
+*
+*  For parallel execution, use one separate ZSTD_CStream per thread.
+*
+*  note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing.
+*
+*  Parameters are sticky : when starting a new compression on the same context,
+*  it will re-use the same sticky parameters as previous compression session.
+*  When in doubt, it's recommended to fully initialize the context before usage.
+*  Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(),
+*  ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to
+*  set more specific parameters, the pledged source size, or load a dictionary.
+*
+*  Use ZSTD_compressStream2() with ZSTD_e_continue as many times as necessary to
+*  consume input stream. The function will automatically update both `pos`
+*  fields within `input` and `output`.
+*  Note that the function may not consume the entire input, for example, because
+*  the output buffer is already full, in which case `input.pos < input.size`.
+*  The caller must check if input has been entirely consumed.
+*  If not, the caller must make some room to receive more compressed data,
+*  and then present again remaining input data.
+*  note: ZSTD_e_continue is guaranteed to make some forward progress when called,
+*        but doesn't guarantee maximal forward progress. This is especially relevant
+*        when compressing with multiple threads. The call won't block if it can
+*        consume some input, but if it can't it will wait for some, but not all,
+*        output to be flushed.
+* @return : provides a minimum amount of data remaining to be flushed from internal buffers
+*           or an error code, which can be tested using ZSTD_isError().
+*
+*  At any moment, it's possible to flush whatever data might remain stuck within internal buffer,
+*  using ZSTD_compressStream2() with ZSTD_e_flush. `output->pos` will be updated.
+*  Note that, if `output->size` is too small, a single invocation with ZSTD_e_flush might not be enough (return code > 0).
+*  In which case, make some room to receive more compressed data, and call again ZSTD_compressStream2() with ZSTD_e_flush.
+*  You must continue calling ZSTD_compressStream2() with ZSTD_e_flush until it returns 0, at which point you can change the
+*  operation.
+*  note: ZSTD_e_flush will flush as much output as possible, meaning when compressing with multiple threads, it will
+*        block until the flush is complete or the output buffer is full.
+*  @return : 0 if internal buffers are entirely flushed,
+*            >0 if some data still present within internal buffer (the value is minimal estimation of remaining size),
+*            or an error code, which can be tested using ZSTD_isError().
+*
+*  Calling ZSTD_compressStream2() with ZSTD_e_end instructs to finish a frame.
+*  It will perform a flush and write frame epilogue.
+*  The epilogue is required for decoders to consider a frame completed.
+*  flush operation is the same, and follows same rules as calling ZSTD_compressStream2() with ZSTD_e_flush.
+*  You must continue calling ZSTD_compressStream2() with ZSTD_e_end until it returns 0, at which point you are free to
+*  start a new frame.
+*  note: ZSTD_e_end will flush as much output as possible, meaning when compressing with multiple threads, it will
+*        block until the flush is complete or the output buffer is full.
+*  @return : 0 if frame fully completed and fully flushed,
+*            >0 if some data still present within internal buffer (the value is minimal estimation of remaining size),
+*            or an error code, which can be tested using ZSTD_isError().
+*
+* *******************************************************************/
+
+typedef ZSTD_CCtx ZSTD_CStream;  /**< CCtx and CStream are now effectively same object (>= v1.3.0) */
+                                 /* Continue to distinguish them for compatibility with older versions <= v1.2.0 */
+/*===== ZSTD_CStream management functions =====*/
+ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void);
+ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs);  /* accept NULL pointer */
+
+/*===== Streaming compression functions =====*/
+typedef enum {
+    ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal compression ratio */
+    ZSTD_e_flush=1,    /* flush any data provided so far,
+                        * it creates (at least) one new block, that can be decoded immediately on reception;
+                        * frame will continue: any future data can still reference previously compressed data, improving compression.
+                        * note : multithreaded compression will block to flush as much output as possible. */
+    ZSTD_e_end=2       /* flush any remaining data _and_ close current frame.
+                        * note that frame is only closed after compressed data is fully flushed (return value == 0).
+                        * After that point, any additional data starts a new frame.
+                        * note : each frame is independent (does not reference any content from previous frame).
+                        : note : multithreaded compression will block to flush as much output as possible. */
+} ZSTD_EndDirective;
+
+/*! ZSTD_compressStream2() : Requires v1.4.0+
+ *  Behaves about the same as ZSTD_compressStream, with additional control on end directive.
+ *  - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*()
+ *  - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode)
+ *  - output->pos must be <= dstCapacity, input->pos must be <= srcSize
+ *  - output->pos and input->pos will be updated. They are guaranteed to remain below their respective limit.
+ *  - endOp must be a valid directive
+ *  - When nbWorkers==0 (default), function is blocking : it completes its job before returning to caller.
+ *  - When nbWorkers>=1, function is non-blocking : it copies a portion of input, distributes jobs to internal worker threads, flush to output whatever is available,
+ *                                                  and then immediately returns, just indicating that there is some data remaining to be flushed.
+ *                                                  The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte.
+ *  - Exception : if the first call requests a ZSTD_e_end directive and provides enough dstCapacity, the function delegates to ZSTD_compress2() which is always blocking.
+ *  - @return provides a minimum amount of data remaining to be flushed from internal buffers
+ *            or an error code, which can be tested using ZSTD_isError().
+ *            if @return != 0, flush is not fully completed, there is still some data left within internal buffers.
+ *            This is useful for ZSTD_e_flush, since in this case more flushes are necessary to empty all buffers.
+ *            For ZSTD_e_end, @return == 0 when internal buffers are fully flushed and frame is completed.
+ *  - after a ZSTD_e_end directive, if internal buffer is not fully flushed (@return != 0),
+ *            only ZSTD_e_end or ZSTD_e_flush operations are allowed.
+ *            Before starting a new compression job, or changing compression parameters,
+ *            it is required to fully flush internal buffers.
+ */
+ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
+                                         ZSTD_outBuffer* output,
+                                         ZSTD_inBuffer* input,
+                                         ZSTD_EndDirective endOp);
+
+
+/* These buffer sizes are softly recommended.
+ * They are not required : ZSTD_compressStream*() happily accepts any buffer size, for both input and output.
+ * Respecting the recommended size just makes it a bit easier for ZSTD_compressStream*(),
+ * reducing the amount of memory shuffling and buffering, resulting in minor performance savings.
+ *
+ * However, note that these recommendations are from the perspective of a C caller program.
+ * If the streaming interface is invoked from some other language,
+ * especially managed ones such as Java or Go, through a foreign function interface such as jni or cgo,
+ * a major performance rule is to reduce crossing such interface to an absolute minimum.
+ * It's not rare that performance ends being spent more into the interface, rather than compression itself.
+ * In which cases, prefer using large buffers, as large as practical,
+ * for both input and output, to reduce the nb of roundtrips.
+ */
+ZSTDLIB_API size_t ZSTD_CStreamInSize(void);    /**< recommended size for input buffer */
+ZSTDLIB_API size_t ZSTD_CStreamOutSize(void);   /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */
+
+
+/* *****************************************************************************
+ * This following is a legacy streaming API, available since v1.0+ .
+ * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2().
+ * It is redundant, but remains fully supported.
+ * Streaming in combination with advanced parameters and dictionary compression
+ * can only be used through the new API.
+ ******************************************************************************/
+
+/*!
+ * Equivalent to:
+ *
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
+ *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
+ */
+ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel);
+/*!
+ * Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue).
+ * NOTE: The return value is different. ZSTD_compressStream() returns a hint for
+ * the next read size (if non-zero and not an error). ZSTD_compressStream2()
+ * returns the minimum nb of bytes left to flush (if non-zero and not an error).
+ */
+ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
+/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */
+ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
+/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */
+ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
+
+
+/*-***************************************************************************
+*  Streaming decompression - HowTo
+*
+*  A ZSTD_DStream object is required to track streaming operations.
+*  Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources.
+*  ZSTD_DStream objects can be re-used multiple times.
+*
+*  Use ZSTD_initDStream() to start a new decompression operation.
+* @return : recommended first input size
+*  Alternatively, use advanced API to set specific properties.
+*
+*  Use ZSTD_decompressStream() repetitively to consume your input.
+*  The function will update both `pos` fields.
+*  If `input.pos < input.size`, some input has not been consumed.
+*  It's up to the caller to present again remaining data.
+*  The function tries to flush all data decoded immediately, respecting output buffer size.
+*  If `output.pos < output.size`, decoder has flushed everything it could.
+*  But if `output.pos == output.size`, there might be some data left within internal buffers.,
+*  In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer.
+*  Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX.
+* @return : 0 when a frame is completely decoded and fully flushed,
+*        or an error code, which can be tested using ZSTD_isError(),
+*        or any other value > 0, which means there is still some decoding or flushing to do to complete current frame :
+*                                the return value is a suggested next input size (just a hint for better latency)
+*                                that will never request more than the remaining frame size.
+* *******************************************************************************/
+
+typedef ZSTD_DCtx ZSTD_DStream;  /**< DCtx and DStream are now effectively same object (>= v1.3.0) */
+                                 /* For compatibility with versions <= v1.2.0, prefer differentiating them. */
+/*===== ZSTD_DStream management functions =====*/
+ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void);
+ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds);  /* accept NULL pointer */
+
+/*===== Streaming decompression functions =====*/
+
+/* This function is redundant with the advanced API and equivalent to:
+ *
+ *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+ *     ZSTD_DCtx_refDDict(zds, NULL);
+ */
+ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds);
+
+ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
+
+ZSTDLIB_API size_t ZSTD_DStreamInSize(void);    /*!< recommended size for input buffer */
+ZSTDLIB_API size_t ZSTD_DStreamOutSize(void);   /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */
+
+
+/**************************
+*  Simple dictionary API
+***************************/
+/*! ZSTD_compress_usingDict() :
+ *  Compression at an explicit compression level using a Dictionary.
+ *  A dictionary can be any arbitrary data segment (also called a prefix),
+ *  or a buffer with specified information (see zdict.h).
+ *  Note : This function loads the dictionary, resulting in significant startup delay.
+ *         It's intended for a dictionary used only once.
+ *  Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. */
+ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx,
+                                           void* dst, size_t dstCapacity,
+                                     const void* src, size_t srcSize,
+                                     const void* dict,size_t dictSize,
+                                           int compressionLevel);
+
+/*! ZSTD_decompress_usingDict() :
+ *  Decompression using a known Dictionary.
+ *  Dictionary must be identical to the one used during compression.
+ *  Note : This function loads the dictionary, resulting in significant startup delay.
+ *         It's intended for a dictionary used only once.
+ *  Note : When `dict == NULL || dictSize < 8` no dictionary is used. */
+ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
+                                             void* dst, size_t dstCapacity,
+                                       const void* src, size_t srcSize,
+                                       const void* dict,size_t dictSize);
+
+
+/***********************************
+ *  Bulk processing dictionary API
+ **********************************/
+typedef struct ZSTD_CDict_s ZSTD_CDict;
+
+/*! ZSTD_createCDict() :
+ *  When compressing multiple messages or blocks using the same dictionary,
+ *  it's recommended to digest the dictionary only once, since it's a costly operation.
+ *  ZSTD_createCDict() will create a state from digesting a dictionary.
+ *  The resulting state can be used for future compression operations with very limited startup cost.
+ *  ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
+ * @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict.
+ *  Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content.
+ *  Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer,
+ *      in which case the only thing that it transports is the @compressionLevel.
+ *      This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively,
+ *      expecting a ZSTD_CDict parameter with any data, including those without a known dictionary. */
+ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize,
+                                         int compressionLevel);
+
+/*! ZSTD_freeCDict() :
+ *  Function frees memory allocated by ZSTD_createCDict().
+ *  If a NULL pointer is passed, no operation is performed. */
+ZSTDLIB_API size_t      ZSTD_freeCDict(ZSTD_CDict* CDict);
+
+/*! ZSTD_compress_usingCDict() :
+ *  Compression using a digested Dictionary.
+ *  Recommended when same dictionary is used multiple times.
+ *  Note : compression level is _decided at dictionary creation time_,
+ *     and frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) */
+ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
+                                            void* dst, size_t dstCapacity,
+                                      const void* src, size_t srcSize,
+                                      const ZSTD_CDict* cdict);
+
+
+typedef struct ZSTD_DDict_s ZSTD_DDict;
+
+/*! ZSTD_createDDict() :
+ *  Create a digested dictionary, ready to start decompression operation without startup delay.
+ *  dictBuffer can be released after DDict creation, as its content is copied inside DDict. */
+ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize);
+
+/*! ZSTD_freeDDict() :
+ *  Function frees memory allocated with ZSTD_createDDict()
+ *  If a NULL pointer is passed, no operation is performed. */
+ZSTDLIB_API size_t      ZSTD_freeDDict(ZSTD_DDict* ddict);
+
+/*! ZSTD_decompress_usingDDict() :
+ *  Decompression using a digested Dictionary.
+ *  Recommended when same dictionary is used multiple times. */
+ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
+                                              void* dst, size_t dstCapacity,
+                                        const void* src, size_t srcSize,
+                                        const ZSTD_DDict* ddict);
+
+
+/********************************
+ *  Dictionary helper functions
+ *******************************/
+
+/*! ZSTD_getDictID_fromDict() : Requires v1.4.0+
+ *  Provides the dictID stored within dictionary.
+ *  if @return == 0, the dictionary is not conformant with Zstandard specification.
+ *  It can still be loaded, but as a content-only dictionary. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize);
+
+/*! ZSTD_getDictID_fromCDict() : Requires v1.5.0+
+ *  Provides the dictID of the dictionary loaded into `cdict`.
+ *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
+ *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict);
+
+/*! ZSTD_getDictID_fromDDict() : Requires v1.4.0+
+ *  Provides the dictID of the dictionary loaded into `ddict`.
+ *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
+ *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
+
+/*! ZSTD_getDictID_fromFrame() : Requires v1.4.0+
+ *  Provides the dictID required to decompressed the frame stored within `src`.
+ *  If @return == 0, the dictID could not be decoded.
+ *  This could for one of the following reasons :
+ *  - The frame does not require a dictionary to be decoded (most common case).
+ *  - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information.
+ *    Note : this use case also happens when using a non-conformant dictionary.
+ *  - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`).
+ *  - This is not a Zstandard frame.
+ *  When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
+
+
+/*******************************************************************************
+ * Advanced dictionary and prefix API (Requires v1.4.0+)
+ *
+ * This API allows dictionaries to be used with ZSTD_compress2(),
+ * ZSTD_compressStream2(), and ZSTD_decompress(). Dictionaries are sticky, and
+ * only reset with the context is reset with ZSTD_reset_parameters or
+ * ZSTD_reset_session_and_parameters. Prefixes are single-use.
+ ******************************************************************************/
+
+
+/*! ZSTD_CCtx_loadDictionary() : Requires v1.4.0+
+ *  Create an internal CDict from `dict` buffer.
+ *  Decompression will have to use same dictionary.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary,
+ *           meaning "return to no-dictionary mode".
+ *  Note 1 : Dictionary is sticky, it will be used for all future compressed frames.
+ *           To return to "no-dictionary" situation, load a NULL dictionary (or reset parameters).
+ *  Note 2 : Loading a dictionary involves building tables.
+ *           It's also a CPU consuming operation, with non-negligible impact on latency.
+ *           Tables are dependent on compression parameters, and for this reason,
+ *           compression parameters can no longer be changed after loading a dictionary.
+ *  Note 3 :`dict` content will be copied internally.
+ *           Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead.
+ *           In such a case, dictionary buffer must outlive its users.
+ *  Note 4 : Use ZSTD_CCtx_loadDictionary_advanced()
+ *           to precisely select how dictionary content must be interpreted. */
+ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
+
+/*! ZSTD_CCtx_refCDict() : Requires v1.4.0+
+ *  Reference a prepared dictionary, to be used for all next compressed frames.
+ *  Note that compression parameters are enforced from within CDict,
+ *  and supersede any compression parameter previously set within CCtx.
+ *  The parameters ignored are labelled as "superseded-by-cdict" in the ZSTD_cParameter enum docs.
+ *  The ignored parameters will be used again if the CCtx is returned to no-dictionary mode.
+ *  The dictionary will remain valid for future compressed frames using same CCtx.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Special : Referencing a NULL CDict means "return to no-dictionary mode".
+ *  Note 1 : Currently, only one dictionary can be managed.
+ *           Referencing a new dictionary effectively "discards" any previous one.
+ *  Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. */
+ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
+
+/*! ZSTD_CCtx_refPrefix() : Requires v1.4.0+
+ *  Reference a prefix (single-usage dictionary) for next compressed frame.
+ *  A prefix is **only used once**. Tables are discarded at end of frame (ZSTD_e_end).
+ *  Decompression will need same prefix to properly regenerate data.
+ *  Compressing with a prefix is similar in outcome as performing a diff and compressing it,
+ *  but performs much faster, especially during decompression (compression speed is tunable with compression level).
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary
+ *  Note 1 : Prefix buffer is referenced. It **must** outlive compression.
+ *           Its content must remain unmodified during compression.
+ *  Note 2 : If the intention is to diff some large src data blob with some prior version of itself,
+ *           ensure that the window size is large enough to contain the entire source.
+ *           See ZSTD_c_windowLog.
+ *  Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters.
+ *           It's a CPU consuming operation, with non-negligible impact on latency.
+ *           If there is a need to use the same prefix multiple times, consider loadDictionary instead.
+ *  Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dct_rawContent).
+ *           Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. */
+ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx,
+                                 const void* prefix, size_t prefixSize);
+
+/*! ZSTD_DCtx_loadDictionary() : Requires v1.4.0+
+ *  Create an internal DDict from dict buffer,
+ *  to be used to decompress next frames.
+ *  The dictionary remains valid for all future frames, until explicitly invalidated.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary,
+ *            meaning "return to no-dictionary mode".
+ *  Note 1 : Loading a dictionary involves building tables,
+ *           which has a non-negligible impact on CPU usage and latency.
+ *           It's recommended to "load once, use many times", to amortize the cost
+ *  Note 2 :`dict` content will be copied internally, so `dict` can be released after loading.
+ *           Use ZSTD_DCtx_loadDictionary_byReference() to reference dictionary content instead.
+ *  Note 3 : Use ZSTD_DCtx_loadDictionary_advanced() to take control of
+ *           how dictionary content is loaded and interpreted.
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
+
+/*! ZSTD_DCtx_refDDict() : Requires v1.4.0+
+ *  Reference a prepared dictionary, to be used to decompress next frames.
+ *  The dictionary remains active for decompression of future frames using same DCtx.
+ *
+ *  If called with ZSTD_d_refMultipleDDicts enabled, repeated calls of this function
+ *  will store the DDict references in a table, and the DDict used for decompression
+ *  will be determined at decompression time, as per the dict ID in the frame.
+ *  The memory for the table is allocated on the first call to refDDict, and can be
+ *  freed with ZSTD_freeDCtx().
+ *
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Note 1 : Currently, only one dictionary can be managed.
+ *           Referencing a new dictionary effectively "discards" any previous one.
+ *  Special: referencing a NULL DDict means "return to no-dictionary mode".
+ *  Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx.
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
+
+/*! ZSTD_DCtx_refPrefix() : Requires v1.4.0+
+ *  Reference a prefix (single-usage dictionary) to decompress next frame.
+ *  This is the reverse operation of ZSTD_CCtx_refPrefix(),
+ *  and must use the same prefix as the one used during compression.
+ *  Prefix is **only used once**. Reference is discarded at end of frame.
+ *  End of frame is reached when ZSTD_decompressStream() returns 0.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Note 1 : Adding any prefix (including NULL) invalidates any previously set prefix or dictionary
+ *  Note 2 : Prefix buffer is referenced. It **must** outlive decompression.
+ *           Prefix buffer must remain unmodified up to the end of frame,
+ *           reached when ZSTD_decompressStream() returns 0.
+ *  Note 3 : By default, the prefix is treated as raw content (ZSTD_dct_rawContent).
+ *           Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode (Experimental section)
+ *  Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost.
+ *           A full dictionary is more costly, as it requires building tables.
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx,
+                                 const void* prefix, size_t prefixSize);
+
+/* ===   Memory management   === */
+
+/*! ZSTD_sizeof_*() : Requires v1.4.0+
+ *  These functions give the _current_ memory usage of selected object.
+ *  Note that object memory usage can evolve (increase or decrease) over time. */
+ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
+ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx);
+ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs);
+ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
+ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict);
+ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
+
+#endif  /* ZSTD_H_235446 */
+
+
+/* **************************************************************************************
+ *   ADVANCED AND EXPERIMENTAL FUNCTIONS
+ ****************************************************************************************
+ * The definitions in the following section are considered experimental.
+ * They are provided for advanced scenarios.
+ * They should never be used with a dynamic library, as prototypes may change in the future.
+ * Use them only in association with static linking.
+ * ***************************************************************************************/
+
+#if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY)
+#define ZSTD_H_ZSTD_STATIC_LINKING_ONLY
+
+/* Deprecation warnings :
+ * Should these warnings be a problem, it is generally possible to disable them,
+ * typically with -Wno-deprecated-declarations for gcc or _CRT_SECURE_NO_WARNINGS in Visual.
+ * Otherwise, it's also possible to define ZSTD_DISABLE_DEPRECATE_WARNINGS.
+ */
+#ifdef ZSTD_DISABLE_DEPRECATE_WARNINGS
+#  define ZSTD_DEPRECATED(message) ZSTDLIB_API  /* disable deprecation warnings */
+#else
+#  if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
+#    define ZSTD_DEPRECATED(message) [[deprecated(message)]] ZSTDLIB_API
+#  elif (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__)
+#    define ZSTD_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated(message)))
+#  elif defined(__GNUC__) && (__GNUC__ >= 3)
+#    define ZSTD_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated))
+#  elif defined(_MSC_VER)
+#    define ZSTD_DEPRECATED(message) ZSTDLIB_API __declspec(deprecated(message))
+#  else
+#    pragma message("WARNING: You need to implement ZSTD_DEPRECATED for this compiler")
+#    define ZSTD_DEPRECATED(message) ZSTDLIB_API
+#  endif
+#endif /* ZSTD_DISABLE_DEPRECATE_WARNINGS */
+
+/****************************************************************************************
+ *   experimental API (static linking only)
+ ****************************************************************************************
+ * The following symbols and constants
+ * are not planned to join "stable API" status in the near future.
+ * They can still change in future versions.
+ * Some of them are planned to remain in the static_only section indefinitely.
+ * Some of them might be removed in the future (especially when redundant with existing stable functions)
+ * ***************************************************************************************/
+
+#define ZSTD_FRAMEHEADERSIZE_PREFIX(format) ((format) == ZSTD_f_zstd1 ? 5 : 1)   /* minimum input size required to query frame header size */
+#define ZSTD_FRAMEHEADERSIZE_MIN(format)    ((format) == ZSTD_f_zstd1 ? 6 : 2)
+#define ZSTD_FRAMEHEADERSIZE_MAX   18   /* can be useful for static allocation */
+#define ZSTD_SKIPPABLEHEADERSIZE    8
+
+/* compression parameter bounds */
+#define ZSTD_WINDOWLOG_MAX_32    30
+#define ZSTD_WINDOWLOG_MAX_64    31
+#define ZSTD_WINDOWLOG_MAX     ((int)(sizeof(size_t) == 4 ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64))
+#define ZSTD_WINDOWLOG_MIN       10
+#define ZSTD_HASHLOG_MAX       ((ZSTD_WINDOWLOG_MAX < 30) ? ZSTD_WINDOWLOG_MAX : 30)
+#define ZSTD_HASHLOG_MIN          6
+#define ZSTD_CHAINLOG_MAX_32     29
+#define ZSTD_CHAINLOG_MAX_64     30
+#define ZSTD_CHAINLOG_MAX      ((int)(sizeof(size_t) == 4 ? ZSTD_CHAINLOG_MAX_32 : ZSTD_CHAINLOG_MAX_64))
+#define ZSTD_CHAINLOG_MIN        ZSTD_HASHLOG_MIN
+#define ZSTD_SEARCHLOG_MAX      (ZSTD_WINDOWLOG_MAX-1)
+#define ZSTD_SEARCHLOG_MIN        1
+#define ZSTD_MINMATCH_MAX         7   /* only for ZSTD_fast, other strategies are limited to 6 */
+#define ZSTD_MINMATCH_MIN         3   /* only for ZSTD_btopt+, faster strategies are limited to 4 */
+#define ZSTD_TARGETLENGTH_MAX    ZSTD_BLOCKSIZE_MAX
+#define ZSTD_TARGETLENGTH_MIN     0   /* note : comparing this constant to an unsigned results in a tautological test */
+#define ZSTD_STRATEGY_MIN        ZSTD_fast
+#define ZSTD_STRATEGY_MAX        ZSTD_btultra2
+
+
+#define ZSTD_OVERLAPLOG_MIN       0
+#define ZSTD_OVERLAPLOG_MAX       9
+
+#define ZSTD_WINDOWLOG_LIMIT_DEFAULT 27   /* by default, the streaming decoder will refuse any frame
+                                           * requiring larger than (1<<ZSTD_WINDOWLOG_LIMIT_DEFAULT) window size,
+                                           * to preserve host's memory from unreasonable requirements.
+                                           * This limit can be overridden using ZSTD_DCtx_setParameter(,ZSTD_d_windowLogMax,).
+                                           * The limit does not apply for one-pass decoders (such as ZSTD_decompress()), since no additional memory is allocated */
+
+
+/* LDM parameter bounds */
+#define ZSTD_LDM_HASHLOG_MIN      ZSTD_HASHLOG_MIN
+#define ZSTD_LDM_HASHLOG_MAX      ZSTD_HASHLOG_MAX
+#define ZSTD_LDM_MINMATCH_MIN        4
+#define ZSTD_LDM_MINMATCH_MAX     4096
+#define ZSTD_LDM_BUCKETSIZELOG_MIN   1
+#define ZSTD_LDM_BUCKETSIZELOG_MAX   8
+#define ZSTD_LDM_HASHRATELOG_MIN     0
+#define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN)
+
+/* Advanced parameter bounds */
+#define ZSTD_TARGETCBLOCKSIZE_MIN   64
+#define ZSTD_TARGETCBLOCKSIZE_MAX   ZSTD_BLOCKSIZE_MAX
+#define ZSTD_SRCSIZEHINT_MIN        0
+#define ZSTD_SRCSIZEHINT_MAX        INT_MAX
+
+/* internal */
+#define ZSTD_HASHLOG3_MAX           17
+
+
+/* ---  Advanced types  --- */
+
+typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params;
+
+typedef struct {
+    unsigned int offset;      /* The offset of the match. (NOT the same as the offset code)
+                               * If offset == 0 and matchLength == 0, this sequence represents the last
+                               * literals in the block of litLength size.
+                               */
+
+    unsigned int litLength;   /* Literal length of the sequence. */
+    unsigned int matchLength; /* Match length of the sequence. */
+
+                              /* Note: Users of this API may provide a sequence with matchLength == litLength == offset == 0.
+                               * In this case, we will treat the sequence as a marker for a block boundary.
+                               */
+
+    unsigned int rep;         /* Represents which repeat offset is represented by the field 'offset'.
+                               * Ranges from [0, 3].
+                               *
+                               * Repeat offsets are essentially previous offsets from previous sequences sorted in
+                               * recency order. For more detail, see doc/zstd_compression_format.md
+                               *
+                               * If rep == 0, then 'offset' does not contain a repeat offset.
+                               * If rep > 0:
+                               *  If litLength != 0:
+                               *      rep == 1 --> offset == repeat_offset_1
+                               *      rep == 2 --> offset == repeat_offset_2
+                               *      rep == 3 --> offset == repeat_offset_3
+                               *  If litLength == 0:
+                               *      rep == 1 --> offset == repeat_offset_2
+                               *      rep == 2 --> offset == repeat_offset_3
+                               *      rep == 3 --> offset == repeat_offset_1 - 1
+                               *
+                               * Note: This field is optional. ZSTD_generateSequences() will calculate the value of
+                               * 'rep', but repeat offsets do not necessarily need to be calculated from an external
+                               * sequence provider's perspective. For example, ZSTD_compressSequences() does not
+                               * use this 'rep' field at all (as of now).
+                               */
+} ZSTD_Sequence;
+
+typedef struct {
+    unsigned windowLog;       /**< largest match distance : larger == more compression, more memory needed during decompression */
+    unsigned chainLog;        /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */
+    unsigned hashLog;         /**< dispatch table : larger == faster, more memory */
+    unsigned searchLog;       /**< nb of searches : larger == more compression, slower */
+    unsigned minMatch;        /**< match length searched : larger == faster decompression, sometimes less compression */
+    unsigned targetLength;    /**< acceptable match size for optimal parser (only) : larger == more compression, slower */
+    ZSTD_strategy strategy;   /**< see ZSTD_strategy definition above */
+} ZSTD_compressionParameters;
+
+typedef struct {
+    int contentSizeFlag; /**< 1: content size will be in frame header (when known) */
+    int checksumFlag;    /**< 1: generate a 32-bits checksum using XXH64 algorithm at end of frame, for error detection */
+    int noDictIDFlag;    /**< 1: no dictID will be saved into frame header (dictID is only useful for dictionary compression) */
+} ZSTD_frameParameters;
+
+typedef struct {
+    ZSTD_compressionParameters cParams;
+    ZSTD_frameParameters fParams;
+} ZSTD_parameters;
+
+typedef enum {
+    ZSTD_dct_auto = 0,       /* dictionary is "full" when starting with ZSTD_MAGIC_DICTIONARY, otherwise it is "rawContent" */
+    ZSTD_dct_rawContent = 1, /* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */
+    ZSTD_dct_fullDict = 2    /* refuses to load a dictionary if it does not respect Zstandard's specification, starting with ZSTD_MAGIC_DICTIONARY */
+} ZSTD_dictContentType_e;
+
+typedef enum {
+    ZSTD_dlm_byCopy = 0,  /**< Copy dictionary content internally */
+    ZSTD_dlm_byRef = 1    /**< Reference dictionary content -- the dictionary buffer must outlive its users. */
+} ZSTD_dictLoadMethod_e;
+
+typedef enum {
+    ZSTD_f_zstd1 = 0,           /* zstd frame format, specified in zstd_compression_format.md (default) */
+    ZSTD_f_zstd1_magicless = 1  /* Variant of zstd frame format, without initial 4-bytes magic number.
+                                 * Useful to save 4 bytes per generated frame.
+                                 * Decoder cannot recognise automatically this format, requiring this instruction. */
+} ZSTD_format_e;
+
+typedef enum {
+    /* Note: this enum controls ZSTD_d_forceIgnoreChecksum */
+    ZSTD_d_validateChecksum = 0,
+    ZSTD_d_ignoreChecksum = 1
+} ZSTD_forceIgnoreChecksum_e;
+
+typedef enum {
+    /* Note: this enum controls ZSTD_d_refMultipleDDicts */
+    ZSTD_rmd_refSingleDDict = 0,
+    ZSTD_rmd_refMultipleDDicts = 1
+} ZSTD_refMultipleDDicts_e;
+
+typedef enum {
+    /* Note: this enum and the behavior it controls are effectively internal
+     * implementation details of the compressor. They are expected to continue
+     * to evolve and should be considered only in the context of extremely
+     * advanced performance tuning.
+     *
+     * Zstd currently supports the use of a CDict in three ways:
+     *
+     * - The contents of the CDict can be copied into the working context. This
+     *   means that the compression can search both the dictionary and input
+     *   while operating on a single set of internal tables. This makes
+     *   the compression faster per-byte of input. However, the initial copy of
+     *   the CDict's tables incurs a fixed cost at the beginning of the
+     *   compression. For small compressions (< 8 KB), that copy can dominate
+     *   the cost of the compression.
+     *
+     * - The CDict's tables can be used in-place. In this model, compression is
+     *   slower per input byte, because the compressor has to search two sets of
+     *   tables. However, this model incurs no start-up cost (as long as the
+     *   working context's tables can be reused). For small inputs, this can be
+     *   faster than copying the CDict's tables.
+     *
+     * - The CDict's tables are not used at all, and instead we use the working
+     *   context alone to reload the dictionary and use params based on the source
+     *   size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict().
+     *   This method is effective when the dictionary sizes are very small relative
+     *   to the input size, and the input size is fairly large to begin with.
+     *
+     * Zstd has a simple internal heuristic that selects which strategy to use
+     * at the beginning of a compression. However, if experimentation shows that
+     * Zstd is making poor choices, it is possible to override that choice with
+     * this enum.
+     */
+    ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */
+    ZSTD_dictForceAttach   = 1, /* Never copy the dictionary. */
+    ZSTD_dictForceCopy     = 2, /* Always copy the dictionary. */
+    ZSTD_dictForceLoad     = 3  /* Always reload the dictionary */
+} ZSTD_dictAttachPref_e;
+
+typedef enum {
+  ZSTD_lcm_auto = 0,          /**< Automatically determine the compression mode based on the compression level.
+                               *   Negative compression levels will be uncompressed, and positive compression
+                               *   levels will be compressed. */
+  ZSTD_lcm_huffman = 1,       /**< Always attempt Huffman compression. Uncompressed literals will still be
+                               *   emitted if Huffman compression is not profitable. */
+  ZSTD_lcm_uncompressed = 2   /**< Always emit uncompressed literals. */
+} ZSTD_literalCompressionMode_e;
+
+typedef enum {
+  ZSTD_urm_auto = 0,                   /* Automatically determine whether or not we use row matchfinder */
+  ZSTD_urm_disableRowMatchFinder = 1,  /* Never use row matchfinder */
+  ZSTD_urm_enableRowMatchFinder = 2    /* Always use row matchfinder when applicable */
+} ZSTD_useRowMatchFinderMode_e;
+
+/***************************************
+*  Frame size functions
+***************************************/
+
+/*! ZSTD_findDecompressedSize() :
+ *  `src` should point to the start of a series of ZSTD encoded and/or skippable frames
+ *  `srcSize` must be the _exact_ size of this series
+ *       (i.e. there should be a frame boundary at `src + srcSize`)
+ *  @return : - decompressed size of all data in all successive frames
+ *            - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN
+ *            - if an error occurred: ZSTD_CONTENTSIZE_ERROR
+ *
+ *   note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode.
+ *            When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size.
+ *            In which case, it's necessary to use streaming mode to decompress data.
+ *   note 2 : decompressed size is always present when compression is done with ZSTD_compress()
+ *   note 3 : decompressed size can be very large (64-bits value),
+ *            potentially larger than what local system can handle as a single memory segment.
+ *            In which case, it's necessary to use streaming mode to decompress data.
+ *   note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified.
+ *            Always ensure result fits within application's authorized limits.
+ *            Each application can set its own limits.
+ *   note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to
+ *            read each contained frame header.  This is fast as most of the data is skipped,
+ *            however it does mean that all frame data must be present and valid. */
+ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);
+
+/*! ZSTD_decompressBound() :
+ *  `src` should point to the start of a series of ZSTD encoded and/or skippable frames
+ *  `srcSize` must be the _exact_ size of this series
+ *       (i.e. there should be a frame boundary at `src + srcSize`)
+ *  @return : - upper-bound for the decompressed size of all data in all successive frames
+ *            - if an error occurred: ZSTD_CONTENTSIZE_ERROR
+ *
+ *  note 1  : an error can occur if `src` contains an invalid or incorrectly formatted frame.
+ *  note 2  : the upper-bound is exact when the decompressed size field is available in every ZSTD encoded frame of `src`.
+ *            in this case, `ZSTD_findDecompressedSize` and `ZSTD_decompressBound` return the same value.
+ *  note 3  : when the decompressed size field isn't available, the upper-bound for that frame is calculated by:
+ *              upper-bound = # blocks * min(128 KB, Window_Size)
+ */
+ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize);
+
+/*! ZSTD_frameHeaderSize() :
+ *  srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX.
+ * @return : size of the Frame Header,
+ *           or an error code (if srcSize is too small) */
+ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
+
+typedef enum {
+  ZSTD_sf_noBlockDelimiters = 0,         /* Representation of ZSTD_Sequence has no block delimiters, sequences only */
+  ZSTD_sf_explicitBlockDelimiters = 1    /* Representation of ZSTD_Sequence contains explicit block delimiters */
+} ZSTD_sequenceFormat_e;
+
+/*! ZSTD_generateSequences() :
+ * Generate sequences using ZSTD_compress2, given a source buffer.
+ *
+ * Each block will end with a dummy sequence
+ * with offset == 0, matchLength == 0, and litLength == length of last literals.
+ * litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0)
+ * simply acts as a block delimiter.
+ *
+ * zc can be used to insert custom compression params.
+ * This function invokes ZSTD_compress2
+ *
+ * The output of this function can be fed into ZSTD_compressSequences() with CCtx
+ * setting of ZSTD_c_blockDelimiters as ZSTD_sf_explicitBlockDelimiters
+ * @return : number of sequences generated
+ */
+
+ZSTDLIB_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
+                                          size_t outSeqsSize, const void* src, size_t srcSize);
+
+/*! ZSTD_mergeBlockDelimiters() :
+ * Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals
+ * by merging them into into the literals of the next sequence.
+ *
+ * As such, the final generated result has no explicit representation of block boundaries,
+ * and the final last literals segment is not represented in the sequences.
+ *
+ * The output of this function can be fed into ZSTD_compressSequences() with CCtx
+ * setting of ZSTD_c_blockDelimiters as ZSTD_sf_noBlockDelimiters
+ * @return : number of sequences left after merging
+ */
+ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize);
+
+/*! ZSTD_compressSequences() :
+ * Compress an array of ZSTD_Sequence, generated from the original source buffer, into dst.
+ * If a dictionary is included, then the cctx should reference the dict. (see: ZSTD_CCtx_refCDict(), ZSTD_CCtx_loadDictionary(), etc.)
+ * The entire source is compressed into a single frame.
+ *
+ * The compression behavior changes based on cctx params. In particular:
+ *    If ZSTD_c_blockDelimiters == ZSTD_sf_noBlockDelimiters, the array of ZSTD_Sequence is expected to contain
+ *    no block delimiters (defined in ZSTD_Sequence). Block boundaries are roughly determined based on
+ *    the block size derived from the cctx, and sequences may be split. This is the default setting.
+ *
+ *    If ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, the array of ZSTD_Sequence is expected to contain
+ *    block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided.
+ *
+ *    If ZSTD_c_validateSequences == 0, this function will blindly accept the sequences provided. Invalid sequences cause undefined
+ *    behavior. If ZSTD_c_validateSequences == 1, then if sequence is invalid (see doc/zstd_compression_format.md for
+ *    specifics regarding offset/matchlength requirements) then the function will bail out and return an error.
+ *
+ *    In addition to the two adjustable experimental params, there are other important cctx params.
+ *    - ZSTD_c_minMatch MUST be set as less than or equal to the smallest match generated by the match finder. It has a minimum value of ZSTD_MINMATCH_MIN.
+ *    - ZSTD_c_compressionLevel accordingly adjusts the strength of the entropy coder, as it would in typical compression.
+ *    - ZSTD_c_windowLog affects offset validation: this function will return an error at higher debug levels if a provided offset
+ *      is larger than what the spec allows for a given window log and dictionary (if present). See: doc/zstd_compression_format.md
+ *
+ * Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused.
+ * Note 2: Once we integrate ability to ingest repcodes, the explicit block delims mode must respect those repcodes exactly,
+ *         and cannot emit an RLE block that disagrees with the repcode history
+ * @return : final compressed size or a ZSTD error.
+ */
+ZSTDLIB_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize,
+                                  const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
+                                  const void* src, size_t srcSize);
+
+
+/*! ZSTD_writeSkippableFrame() :
+ * Generates a zstd skippable frame containing data given by src, and writes it to dst buffer.
+ *
+ * Skippable frames begin with a a 4-byte magic number. There are 16 possible choices of magic number,
+ * ranging from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15.
+ * As such, the parameter magicVariant controls the exact skippable frame magic number variant used, so
+ * the magic number used will be ZSTD_MAGIC_SKIPPABLE_START + magicVariant.
+ *
+ * Returns an error if destination buffer is not large enough, if the source size is not representable
+ * with a 4-byte unsigned int, or if the parameter magicVariant is greater than 15 (and therefore invalid).
+ *
+ * @return : number of bytes written or a ZSTD error.
+ */
+ZSTDLIB_API size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity,
+                                            const void* src, size_t srcSize, unsigned magicVariant);
+
+
+/***************************************
+*  Memory management
+***************************************/
+
+/*! ZSTD_estimate*() :
+ *  These functions make it possible to estimate memory usage
+ *  of a future {D,C}Ctx, before its creation.
+ *
+ *  ZSTD_estimateCCtxSize() will provide a memory budget large enough
+ *  for any compression level up to selected one.
+ *  Note : Unlike ZSTD_estimateCStreamSize*(), this estimate
+ *         does not include space for a window buffer.
+ *         Therefore, the estimation is only guaranteed for single-shot compressions, not streaming.
+ *  The estimate will assume the input may be arbitrarily large,
+ *  which is the worst case.
+ *
+ *  When srcSize can be bound by a known and rather "small" value,
+ *  this fact can be used to provide a tighter estimation
+ *  because the CCtx compression context will need less memory.
+ *  This tighter estimation can be provided by more advanced functions
+ *  ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(),
+ *  and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter().
+ *  Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits.
+ *
+ *  Note 2 : only single-threaded compression is supported.
+ *  ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1.
+ */
+ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel);
+ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
+ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params);
+ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void);
+
+/*! ZSTD_estimateCStreamSize() :
+ *  ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one.
+ *  It will also consider src size to be arbitrarily "large", which is worst case.
+ *  If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation.
+ *  ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
+ *  ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1.
+ *  Note : CStream size estimation is only correct for single-threaded compression.
+ *  ZSTD_DStream memory budget depends on window Size.
+ *  This information can be passed manually, using ZSTD_estimateDStreamSize,
+ *  or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame();
+ *  Note : if streaming is init with function ZSTD_init?Stream_usingDict(),
+ *         an internal ?Dict will be created, which additional size is not estimated here.
+ *         In this case, get total size by adding ZSTD_estimate?DictSize */
+ZSTDLIB_API size_t ZSTD_estimateCStreamSize(int compressionLevel);
+ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams);
+ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params);
+ZSTDLIB_API size_t ZSTD_estimateDStreamSize(size_t windowSize);
+ZSTDLIB_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize);
+
+/*! ZSTD_estimate?DictSize() :
+ *  ZSTD_estimateCDictSize() will bet that src size is relatively "small", and content is copied, like ZSTD_createCDict().
+ *  ZSTD_estimateCDictSize_advanced() makes it possible to control compression parameters precisely, like ZSTD_createCDict_advanced().
+ *  Note : dictionaries created by reference (`ZSTD_dlm_byRef`) are logically smaller.
+ */
+ZSTDLIB_API size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel);
+ZSTDLIB_API size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, ZSTD_dictLoadMethod_e dictLoadMethod);
+ZSTDLIB_API size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod);
+
+/*! ZSTD_initStatic*() :
+ *  Initialize an object using a pre-allocated fixed-size buffer.
+ *  workspace: The memory area to emplace the object into.
+ *             Provided pointer *must be 8-bytes aligned*.
+ *             Buffer must outlive object.
+ *  workspaceSize: Use ZSTD_estimate*Size() to determine
+ *                 how large workspace must be to support target scenario.
+ * @return : pointer to object (same address as workspace, just different type),
+ *           or NULL if error (size too small, incorrect alignment, etc.)
+ *  Note : zstd will never resize nor malloc() when using a static buffer.
+ *         If the object requires more memory than available,
+ *         zstd will just error out (typically ZSTD_error_memory_allocation).
+ *  Note 2 : there is no corresponding "free" function.
+ *           Since workspace is allocated externally, it must be freed externally too.
+ *  Note 3 : cParams : use ZSTD_getCParams() to convert a compression level
+ *           into its associated cParams.
+ *  Limitation 1 : currently not compatible with internal dictionary creation, triggered by
+ *                 ZSTD_CCtx_loadDictionary(), ZSTD_initCStream_usingDict() or ZSTD_initDStream_usingDict().
+ *  Limitation 2 : static cctx currently not compatible with multi-threading.
+ *  Limitation 3 : static dctx is incompatible with legacy support.
+ */
+ZSTDLIB_API ZSTD_CCtx*    ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize);
+ZSTDLIB_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize);    /**< same as ZSTD_initStaticCCtx() */
+
+ZSTDLIB_API ZSTD_DCtx*    ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize);
+ZSTDLIB_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize);    /**< same as ZSTD_initStaticDCtx() */
+
+ZSTDLIB_API const ZSTD_CDict* ZSTD_initStaticCDict(
+                                        void* workspace, size_t workspaceSize,
+                                        const void* dict, size_t dictSize,
+                                        ZSTD_dictLoadMethod_e dictLoadMethod,
+                                        ZSTD_dictContentType_e dictContentType,
+                                        ZSTD_compressionParameters cParams);
+
+ZSTDLIB_API const ZSTD_DDict* ZSTD_initStaticDDict(
+                                        void* workspace, size_t workspaceSize,
+                                        const void* dict, size_t dictSize,
+                                        ZSTD_dictLoadMethod_e dictLoadMethod,
+                                        ZSTD_dictContentType_e dictContentType);
+
+
+/*! Custom memory allocation :
+ *  These prototypes make it possible to pass your own allocation/free functions.
+ *  ZSTD_customMem is provided at creation time, using ZSTD_create*_advanced() variants listed below.
+ *  All allocation/free operations will be completed using these custom variants instead of regular <stdlib.h> ones.
+ */
+typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size);
+typedef void  (*ZSTD_freeFunction) (void* opaque, void* address);
+typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem;
+static
+#ifdef __GNUC__
+__attribute__((__unused__))
+#endif
+ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL };  /**< this constant defers to stdlib's functions */
+
+ZSTDLIB_API ZSTD_CCtx*    ZSTD_createCCtx_advanced(ZSTD_customMem customMem);
+ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
+ZSTDLIB_API ZSTD_DCtx*    ZSTD_createDCtx_advanced(ZSTD_customMem customMem);
+ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem);
+
+ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize,
+                                                  ZSTD_dictLoadMethod_e dictLoadMethod,
+                                                  ZSTD_dictContentType_e dictContentType,
+                                                  ZSTD_compressionParameters cParams,
+                                                  ZSTD_customMem customMem);
+
+/* ! Thread pool :
+ * These prototypes make it possible to share a thread pool among multiple compression contexts.
+ * This can limit resources for applications with multiple threads where each one uses
+ * a threaded compression mode (via ZSTD_c_nbWorkers parameter).
+ * ZSTD_createThreadPool creates a new thread pool with a given number of threads.
+ * Note that the lifetime of such pool must exist while being used.
+ * ZSTD_CCtx_refThreadPool assigns a thread pool to a context (use NULL argument value
+ * to use an internal thread pool).
+ * ZSTD_freeThreadPool frees a thread pool, accepts NULL pointer.
+ */
+typedef struct POOL_ctx_s ZSTD_threadPool;
+ZSTDLIB_API ZSTD_threadPool* ZSTD_createThreadPool(size_t numThreads);
+ZSTDLIB_API void ZSTD_freeThreadPool (ZSTD_threadPool* pool);  /* accept NULL pointer */
+ZSTDLIB_API size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool);
+
+
+/*
+ * This API is temporary and is expected to change or disappear in the future!
+ */
+ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2(
+    const void* dict, size_t dictSize,
+    ZSTD_dictLoadMethod_e dictLoadMethod,
+    ZSTD_dictContentType_e dictContentType,
+    const ZSTD_CCtx_params* cctxParams,
+    ZSTD_customMem customMem);
+
+ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(
+    const void* dict, size_t dictSize,
+    ZSTD_dictLoadMethod_e dictLoadMethod,
+    ZSTD_dictContentType_e dictContentType,
+    ZSTD_customMem customMem);
+
+
+/***************************************
+*  Advanced compression functions
+***************************************/
+
+/*! ZSTD_createCDict_byReference() :
+ *  Create a digested dictionary for compression
+ *  Dictionary content is just referenced, not duplicated.
+ *  As a consequence, `dictBuffer` **must** outlive CDict,
+ *  and its content must remain unmodified throughout the lifetime of CDict.
+ *  note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */
+ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
+
+/*! ZSTD_getCParams() :
+ * @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize.
+ * `estimatedSrcSize` value is optional, select 0 if not known */
+ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
+
+/*! ZSTD_getParams() :
+ *  same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`.
+ *  All fields of `ZSTD_frameParameters` are set to default : contentSize=1, checksum=0, noDictID=0 */
+ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
+
+/*! ZSTD_checkCParams() :
+ *  Ensure param values remain within authorized range.
+ * @return 0 on success, or an error code (can be checked with ZSTD_isError()) */
+ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params);
+
+/*! ZSTD_adjustCParams() :
+ *  optimize params for a given `srcSize` and `dictSize`.
+ * `srcSize` can be unknown, in which case use ZSTD_CONTENTSIZE_UNKNOWN.
+ * `dictSize` must be `0` when there is no dictionary.
+ *  cPar can be invalid : all parameters will be clamped within valid range in the @return struct.
+ *  This function never fails (wide contract) */
+ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize);
+
+/*! ZSTD_compress_advanced() :
+ *  Note : this function is now DEPRECATED.
+ *         It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters.
+ *  This prototype will generate compilation warnings. */
+ZSTD_DEPRECATED("use ZSTD_compress2")
+size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx,
+                                          void* dst, size_t dstCapacity,
+                                    const void* src, size_t srcSize,
+                                    const void* dict,size_t dictSize,
+                                          ZSTD_parameters params);
+
+/*! ZSTD_compress_usingCDict_advanced() :
+ *  Note : this function is now DEPRECATED.
+ *         It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters.
+ *  This prototype will generate compilation warnings. */
+ZSTD_DEPRECATED("use ZSTD_compress2 with ZSTD_CCtx_loadDictionary")
+size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
+                                              void* dst, size_t dstCapacity,
+                                        const void* src, size_t srcSize,
+                                        const ZSTD_CDict* cdict,
+                                              ZSTD_frameParameters fParams);
+
+
+/*! ZSTD_CCtx_loadDictionary_byReference() :
+ *  Same as ZSTD_CCtx_loadDictionary(), but dictionary content is referenced, instead of being copied into CCtx.
+ *  It saves some memory, but also requires that `dict` outlives its usage within `cctx` */
+ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
+
+/*! ZSTD_CCtx_loadDictionary_advanced() :
+ *  Same as ZSTD_CCtx_loadDictionary(), but gives finer control over
+ *  how to load the dictionary (by copy ? by reference ?)
+ *  and how to interpret it (automatic ? force raw mode ? full mode only ?) */
+ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType);
+
+/*! ZSTD_CCtx_refPrefix_advanced() :
+ *  Same as ZSTD_CCtx_refPrefix(), but gives finer control over
+ *  how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */
+ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
+
+/* ===   experimental parameters   === */
+/* these parameters can be used with ZSTD_setParameter()
+ * they are not guaranteed to remain supported in the future */
+
+ /* Enables rsyncable mode,
+  * which makes compressed files more rsync friendly
+  * by adding periodic synchronization points to the compressed data.
+  * The target average block size is ZSTD_c_jobSize / 2.
+  * It's possible to modify the job size to increase or decrease
+  * the granularity of the synchronization point.
+  * Once the jobSize is smaller than the window size,
+  * it will result in compression ratio degradation.
+  * NOTE 1: rsyncable mode only works when multithreading is enabled.
+  * NOTE 2: rsyncable performs poorly in combination with long range mode,
+  * since it will decrease the effectiveness of synchronization points,
+  * though mileage may vary.
+  * NOTE 3: Rsyncable mode limits maximum compression speed to ~400 MB/s.
+  * If the selected compression level is already running significantly slower,
+  * the overall speed won't be significantly impacted.
+  */
+ #define ZSTD_c_rsyncable ZSTD_c_experimentalParam1
+
+/* Select a compression format.
+ * The value must be of type ZSTD_format_e.
+ * See ZSTD_format_e enum definition for details */
+#define ZSTD_c_format ZSTD_c_experimentalParam2
+
+/* Force back-reference distances to remain < windowSize,
+ * even when referencing into Dictionary content (default:0) */
+#define ZSTD_c_forceMaxWindow ZSTD_c_experimentalParam3
+
+/* Controls whether the contents of a CDict
+ * are used in place, or copied into the working context.
+ * Accepts values from the ZSTD_dictAttachPref_e enum.
+ * See the comments on that enum for an explanation of the feature. */
+#define ZSTD_c_forceAttachDict ZSTD_c_experimentalParam4
+
+/* Controls how the literals are compressed (default is auto).
+ * The value must be of type ZSTD_literalCompressionMode_e.
+ * See ZSTD_literalCompressionMode_e enum definition for details.
+ */
+#define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5
+
+/* Tries to fit compressed block size to be around targetCBlockSize.
+ * No target when targetCBlockSize == 0.
+ * There is no guarantee on compressed block size (default:0) */
+#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6
+
+/* User's best guess of source size.
+ * Hint is not valid when srcSizeHint == 0.
+ * There is no guarantee that hint is close to actual source size,
+ * but compression ratio may regress significantly if guess considerably underestimates */
+#define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7
+
+/* Controls whether the new and experimental "dedicated dictionary search
+ * structure" can be used. This feature is still rough around the edges, be
+ * prepared for surprising behavior!
+ *
+ * How to use it:
+ *
+ * When using a CDict, whether to use this feature or not is controlled at
+ * CDict creation, and it must be set in a CCtxParams set passed into that
+ * construction (via ZSTD_createCDict_advanced2()). A compression will then
+ * use the feature or not based on how the CDict was constructed; the value of
+ * this param, set in the CCtx, will have no effect.
+ *
+ * However, when a dictionary buffer is passed into a CCtx, such as via
+ * ZSTD_CCtx_loadDictionary(), this param can be set on the CCtx to control
+ * whether the CDict that is created internally can use the feature or not.
+ *
+ * What it does:
+ *
+ * Normally, the internal data structures of the CDict are analogous to what
+ * would be stored in a CCtx after compressing the contents of a dictionary.
+ * To an approximation, a compression using a dictionary can then use those
+ * data structures to simply continue what is effectively a streaming
+ * compression where the simulated compression of the dictionary left off.
+ * Which is to say, the search structures in the CDict are normally the same
+ * format as in the CCtx.
+ *
+ * It is possible to do better, since the CDict is not like a CCtx: the search
+ * structures are written once during CDict creation, and then are only read
+ * after that, while the search structures in the CCtx are both read and
+ * written as the compression goes along. This means we can choose a search
+ * structure for the dictionary that is read-optimized.
+ *
+ * This feature enables the use of that different structure.
+ *
+ * Note that some of the members of the ZSTD_compressionParameters struct have
+ * different semantics and constraints in the dedicated search structure. It is
+ * highly recommended that you simply set a compression level in the CCtxParams
+ * you pass into the CDict creation call, and avoid messing with the cParams
+ * directly.
+ *
+ * Effects:
+ *
+ * This will only have any effect when the selected ZSTD_strategy
+ * implementation supports this feature. Currently, that's limited to
+ * ZSTD_greedy, ZSTD_lazy, and ZSTD_lazy2.
+ *
+ * Note that this means that the CDict tables can no longer be copied into the
+ * CCtx, so the dict attachment mode ZSTD_dictForceCopy will no longer be
+ * useable. The dictionary can only be attached or reloaded.
+ *
+ * In general, you should expect compression to be faster--sometimes very much
+ * so--and CDict creation to be slightly slower. Eventually, we will probably
+ * make this mode the default.
+ */
+#define ZSTD_c_enableDedicatedDictSearch ZSTD_c_experimentalParam8
+
+/* ZSTD_c_stableInBuffer
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable.
+ *
+ * Tells the compressor that the ZSTD_inBuffer will ALWAYS be the same
+ * between calls, except for the modifications that zstd makes to pos (the
+ * caller must not modify pos). This is checked by the compressor, and
+ * compression will fail if it ever changes. This means the only flush
+ * mode that makes sense is ZSTD_e_end, so zstd will error if ZSTD_e_end
+ * is not used. The data in the ZSTD_inBuffer in the range [src, src + pos)
+ * MUST not be modified during compression or you will get data corruption.
+ *
+ * When this flag is enabled zstd won't allocate an input window buffer,
+ * because the user guarantees it can reference the ZSTD_inBuffer until
+ * the frame is complete. But, it will still allocate an output buffer
+ * large enough to fit a block (see ZSTD_c_stableOutBuffer). This will also
+ * avoid the memcpy() from the input buffer to the input window buffer.
+ *
+ * NOTE: ZSTD_compressStream2() will error if ZSTD_e_end is not used.
+ * That means this flag cannot be used with ZSTD_compressStream().
+ *
+ * NOTE: So long as the ZSTD_inBuffer always points to valid memory, using
+ * this flag is ALWAYS memory safe, and will never access out-of-bounds
+ * memory. However, compression WILL fail if you violate the preconditions.
+ *
+ * WARNING: The data in the ZSTD_inBuffer in the range [dst, dst + pos) MUST
+ * not be modified during compression or you will get data corruption. This
+ * is because zstd needs to reference data in the ZSTD_inBuffer to find
+ * matches. Normally zstd maintains its own window buffer for this purpose,
+ * but passing this flag tells zstd to use the user provided buffer.
+ */
+#define ZSTD_c_stableInBuffer ZSTD_c_experimentalParam9
+
+/* ZSTD_c_stableOutBuffer
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable.
+ *
+ * Tells he compressor that the ZSTD_outBuffer will not be resized between
+ * calls. Specifically: (out.size - out.pos) will never grow. This gives the
+ * compressor the freedom to say: If the compressed data doesn't fit in the
+ * output buffer then return ZSTD_error_dstSizeTooSmall. This allows us to
+ * always decompress directly into the output buffer, instead of decompressing
+ * into an internal buffer and copying to the output buffer.
+ *
+ * When this flag is enabled zstd won't allocate an output buffer, because
+ * it can write directly to the ZSTD_outBuffer. It will still allocate the
+ * input window buffer (see ZSTD_c_stableInBuffer).
+ *
+ * Zstd will check that (out.size - out.pos) never grows and return an error
+ * if it does. While not strictly necessary, this should prevent surprises.
+ */
+#define ZSTD_c_stableOutBuffer ZSTD_c_experimentalParam10
+
+/* ZSTD_c_blockDelimiters
+ * Default is 0 == ZSTD_sf_noBlockDelimiters.
+ *
+ * For use with sequence compression API: ZSTD_compressSequences().
+ *
+ * Designates whether or not the given array of ZSTD_Sequence contains block delimiters
+ * and last literals, which are defined as sequences with offset == 0 and matchLength == 0.
+ * See the definition of ZSTD_Sequence for more specifics.
+ */
+#define ZSTD_c_blockDelimiters ZSTD_c_experimentalParam11
+
+/* ZSTD_c_validateSequences
+ * Default is 0 == disabled. Set to 1 to enable sequence validation.
+ *
+ * For use with sequence compression API: ZSTD_compressSequences().
+ * Designates whether or not we validate sequences provided to ZSTD_compressSequences()
+ * during function execution.
+ *
+ * Without validation, providing a sequence that does not conform to the zstd spec will cause
+ * undefined behavior, and may produce a corrupted block.
+ *
+ * With validation enabled, a if sequence is invalid (see doc/zstd_compression_format.md for
+ * specifics regarding offset/matchlength requirements) then the function will bail out and
+ * return an error.
+ *
+ */
+#define ZSTD_c_validateSequences ZSTD_c_experimentalParam12
+
+/* ZSTD_c_splitBlocks
+ * Default is 0 == disabled. Set to 1 to enable block splitting.
+ *
+ * Will attempt to split blocks in order to improve compression ratio at the cost of speed.
+ */
+#define ZSTD_c_splitBlocks ZSTD_c_experimentalParam13
+
+/* ZSTD_c_useRowMatchFinder
+ * Default is ZSTD_urm_auto.
+ * Controlled with ZSTD_useRowMatchFinderMode_e enum.
+ *
+ * By default, in ZSTD_urm_auto, when finalizing the compression parameters, the library
+ * will decide at runtime whether to use the row-based matchfinder based on support for SIMD
+ * instructions as well as the windowLog.
+ *
+ * Set to ZSTD_urm_disableRowMatchFinder to never use row-based matchfinder.
+ * Set to ZSTD_urm_enableRowMatchFinder to force usage of row-based matchfinder.
+ */
+#define ZSTD_c_useRowMatchFinder ZSTD_c_experimentalParam14
+
+/* ZSTD_c_deterministicRefPrefix
+ * Default is 0 == disabled. Set to 1 to enable.
+ *
+ * Zstd produces different results for prefix compression when the prefix is
+ * directly adjacent to the data about to be compressed vs. when it isn't.
+ * This is because zstd detects that the two buffers are contiguous and it can
+ * use a more efficient match finding algorithm. However, this produces different
+ * results than when the two buffers are non-contiguous. This flag forces zstd
+ * to always load the prefix in non-contiguous mode, even if it happens to be
+ * adjacent to the data, to guarantee determinism.
+ *
+ * If you really care about determinism when using a dictionary or prefix,
+ * like when doing delta compression, you should select this option. It comes
+ * at a speed penalty of about ~2.5% if the dictionary and data happened to be
+ * contiguous, and is free if they weren't contiguous. We don't expect that
+ * intentionally making the dictionary and data contiguous will be worth the
+ * cost to memcpy() the data.
+ */
+#define ZSTD_c_deterministicRefPrefix ZSTD_c_experimentalParam15
+
+/*! ZSTD_CCtx_getParameter() :
+ *  Get the requested compression parameter value, selected by enum ZSTD_cParameter,
+ *  and store it into int* value.
+ * @return : 0, or an error code (which can be tested with ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_CCtx_getParameter(const ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value);
+
+
+/*! ZSTD_CCtx_params :
+ *  Quick howto :
+ *  - ZSTD_createCCtxParams() : Create a ZSTD_CCtx_params structure
+ *  - ZSTD_CCtxParams_setParameter() : Push parameters one by one into
+ *                                     an existing ZSTD_CCtx_params structure.
+ *                                     This is similar to
+ *                                     ZSTD_CCtx_setParameter().
+ *  - ZSTD_CCtx_setParametersUsingCCtxParams() : Apply parameters to
+ *                                    an existing CCtx.
+ *                                    These parameters will be applied to
+ *                                    all subsequent frames.
+ *  - ZSTD_compressStream2() : Do compression using the CCtx.
+ *  - ZSTD_freeCCtxParams() : Free the memory, accept NULL pointer.
+ *
+ *  This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams()
+ *  for static allocation of CCtx for single-threaded compression.
+ */
+ZSTDLIB_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void);
+ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);  /* accept NULL pointer */
+
+/*! ZSTD_CCtxParams_reset() :
+ *  Reset params to default values.
+ */
+ZSTDLIB_API size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params);
+
+/*! ZSTD_CCtxParams_init() :
+ *  Initializes the compression parameters of cctxParams according to
+ *  compression level. All other parameters are reset to their default values.
+ */
+ZSTDLIB_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel);
+
+/*! ZSTD_CCtxParams_init_advanced() :
+ *  Initializes the compression and frame parameters of cctxParams according to
+ *  params. All other parameters are reset to their default values.
+ */
+ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params);
+
+/*! ZSTD_CCtxParams_setParameter() : Requires v1.4.0+
+ *  Similar to ZSTD_CCtx_setParameter.
+ *  Set one compression parameter, selected by enum ZSTD_cParameter.
+ *  Parameters must be applied to a ZSTD_CCtx using
+ *  ZSTD_CCtx_setParametersUsingCCtxParams().
+ * @result : a code representing success or failure (which can be tested with
+ *           ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value);
+
+/*! ZSTD_CCtxParams_getParameter() :
+ * Similar to ZSTD_CCtx_getParameter.
+ * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_CCtxParams_getParameter(const ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value);
+
+/*! ZSTD_CCtx_setParametersUsingCCtxParams() :
+ *  Apply a set of ZSTD_CCtx_params to the compression context.
+ *  This can be done even after compression is started,
+ *    if nbWorkers==0, this will have no impact until a new compression is started.
+ *    if nbWorkers>=1, new parameters will be picked up at next job,
+ *       with a few restrictions (windowLog, pledgedSrcSize, nbWorkers, jobSize, and overlapLog are not updated).
+ */
+ZSTDLIB_API size_t ZSTD_CCtx_setParametersUsingCCtxParams(
+        ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params);
+
+/*! ZSTD_compressStream2_simpleArgs() :
+ *  Same as ZSTD_compressStream2(),
+ *  but using only integral types as arguments.
+ *  This variant might be helpful for binders from dynamic languages
+ *  which have troubles handling structures containing memory pointers.
+ */
+ZSTDLIB_API size_t ZSTD_compressStream2_simpleArgs (
+                            ZSTD_CCtx* cctx,
+                            void* dst, size_t dstCapacity, size_t* dstPos,
+                      const void* src, size_t srcSize, size_t* srcPos,
+                            ZSTD_EndDirective endOp);
+
+
+/***************************************
+*  Advanced decompression functions
+***************************************/
+
+/*! ZSTD_isFrame() :
+ *  Tells if the content of `buffer` starts with a valid Frame Identifier.
+ *  Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
+ *  Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled.
+ *  Note 3 : Skippable Frame Identifiers are considered valid. */
+ZSTDLIB_API unsigned ZSTD_isFrame(const void* buffer, size_t size);
+
+/*! ZSTD_createDDict_byReference() :
+ *  Create a digested dictionary, ready to start decompression operation without startup delay.
+ *  Dictionary content is referenced, and therefore stays in dictBuffer.
+ *  It is important that dictBuffer outlives DDict,
+ *  it must remain read accessible throughout the lifetime of DDict */
+ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize);
+
+/*! ZSTD_DCtx_loadDictionary_byReference() :
+ *  Same as ZSTD_DCtx_loadDictionary(),
+ *  but references `dict` content instead of copying it into `dctx`.
+ *  This saves memory if `dict` remains around.,
+ *  However, it's imperative that `dict` remains accessible (and unmodified) while being used, so it must outlive decompression. */
+ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
+
+/*! ZSTD_DCtx_loadDictionary_advanced() :
+ *  Same as ZSTD_DCtx_loadDictionary(),
+ *  but gives direct control over
+ *  how to load the dictionary (by copy ? by reference ?)
+ *  and how to interpret it (automatic ? force raw mode ? full mode only ?). */
+ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType);
+
+/*! ZSTD_DCtx_refPrefix_advanced() :
+ *  Same as ZSTD_DCtx_refPrefix(), but gives finer control over
+ *  how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */
+ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
+
+/*! ZSTD_DCtx_setMaxWindowSize() :
+ *  Refuses allocating internal buffers for frames requiring a window size larger than provided limit.
+ *  This protects a decoder context from reserving too much memory for itself (potential attack scenario).
+ *  This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode.
+ *  By default, a decompression context accepts all window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT)
+ * @return : 0, or an error code (which can be tested using ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize);
+
+/*! ZSTD_DCtx_getParameter() :
+ *  Get the requested decompression parameter value, selected by enum ZSTD_dParameter,
+ *  and store it into int* value.
+ * @return : 0, or an error code (which can be tested with ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value);
+
+/* ZSTD_d_format
+ * experimental parameter,
+ * allowing selection between ZSTD_format_e input compression formats
+ */
+#define ZSTD_d_format ZSTD_d_experimentalParam1
+/* ZSTD_d_stableOutBuffer
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable.
+ *
+ * Tells the decompressor that the ZSTD_outBuffer will ALWAYS be the same
+ * between calls, except for the modifications that zstd makes to pos (the
+ * caller must not modify pos). This is checked by the decompressor, and
+ * decompression will fail if it ever changes. Therefore the ZSTD_outBuffer
+ * MUST be large enough to fit the entire decompressed frame. This will be
+ * checked when the frame content size is known. The data in the ZSTD_outBuffer
+ * in the range [dst, dst + pos) MUST not be modified during decompression
+ * or you will get data corruption.
+ *
+ * When this flags is enabled zstd won't allocate an output buffer, because
+ * it can write directly to the ZSTD_outBuffer, but it will still allocate
+ * an input buffer large enough to fit any compressed block. This will also
+ * avoid the memcpy() from the internal output buffer to the ZSTD_outBuffer.
+ * If you need to avoid the input buffer allocation use the buffer-less
+ * streaming API.
+ *
+ * NOTE: So long as the ZSTD_outBuffer always points to valid memory, using
+ * this flag is ALWAYS memory safe, and will never access out-of-bounds
+ * memory. However, decompression WILL fail if you violate the preconditions.
+ *
+ * WARNING: The data in the ZSTD_outBuffer in the range [dst, dst + pos) MUST
+ * not be modified during decompression or you will get data corruption. This
+ * is because zstd needs to reference data in the ZSTD_outBuffer to regenerate
+ * matches. Normally zstd maintains its own buffer for this purpose, but passing
+ * this flag tells zstd to use the user provided buffer.
+ */
+#define ZSTD_d_stableOutBuffer ZSTD_d_experimentalParam2
+
+/* ZSTD_d_forceIgnoreChecksum
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable
+ *
+ * Tells the decompressor to skip checksum validation during decompression, regardless
+ * of whether checksumming was specified during compression. This offers some
+ * slight performance benefits, and may be useful for debugging.
+ * Param has values of type ZSTD_forceIgnoreChecksum_e
+ */
+#define ZSTD_d_forceIgnoreChecksum ZSTD_d_experimentalParam3
+
+/* ZSTD_d_refMultipleDDicts
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable
+ *
+ * If enabled and dctx is allocated on the heap, then additional memory will be allocated
+ * to store references to multiple ZSTD_DDict. That is, multiple calls of ZSTD_refDDict()
+ * using a given ZSTD_DCtx, rather than overwriting the previous DDict reference, will instead
+ * store all references. At decompression time, the appropriate dictID is selected
+ * from the set of DDicts based on the dictID in the frame.
+ *
+ * Usage is simply calling ZSTD_refDDict() on multiple dict buffers.
+ *
+ * Param has values of byte ZSTD_refMultipleDDicts_e
+ *
+ * WARNING: Enabling this parameter and calling ZSTD_DCtx_refDDict(), will trigger memory
+ * allocation for the hash table. ZSTD_freeDCtx() also frees this memory.
+ * Memory is allocated as per ZSTD_DCtx::customMem.
+ *
+ * Although this function allocates memory for the table, the user is still responsible for
+ * memory management of the underlying ZSTD_DDict* themselves.
+ */
+#define ZSTD_d_refMultipleDDicts ZSTD_d_experimentalParam4
+
+
+/*! ZSTD_DCtx_setFormat() :
+ *  This function is REDUNDANT. Prefer ZSTD_DCtx_setParameter().
+ *  Instruct the decoder context about what kind of data to decode next.
+ *  This instruction is mandatory to decode data without a fully-formed header,
+ *  such ZSTD_f_zstd1_magicless for example.
+ * @return : 0, or an error code (which can be tested using ZSTD_isError()). */
+ZSTD_DEPRECATED("use ZSTD_DCtx_setParameter() instead")
+size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format);
+
+/*! ZSTD_decompressStream_simpleArgs() :
+ *  Same as ZSTD_decompressStream(),
+ *  but using only integral types as arguments.
+ *  This can be helpful for binders from dynamic languages
+ *  which have troubles handling structures containing memory pointers.
+ */
+ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs (
+                            ZSTD_DCtx* dctx,
+                            void* dst, size_t dstCapacity, size_t* dstPos,
+                      const void* src, size_t srcSize, size_t* srcPos);
+
+
+/********************************************************************
+*  Advanced streaming functions
+*  Warning : most of these functions are now redundant with the Advanced API.
+*  Once Advanced API reaches "stable" status,
+*  redundant functions will be deprecated, and then at some point removed.
+********************************************************************/
+
+/*=====   Advanced Streaming compression functions  =====*/
+
+/*! ZSTD_initCStream_srcSize() :
+ * This function is DEPRECATED, and equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
+ *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
+ *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ *
+ * pledgedSrcSize must be correct. If it is not known at init time, use
+ * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs,
+ * "0" also disables frame content size field. It may be enabled in the future.
+ * This prototype will generate compilation warnings.
+ */
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs,
+                         int compressionLevel,
+                         unsigned long long pledgedSrcSize);
+
+/*! ZSTD_initCStream_usingDict() :
+ * This function is DEPRECATED, and is equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
+ *     ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
+ *
+ * Creates of an internal CDict (incompatible with static CCtx), except if
+ * dict == NULL or dictSize < 8, in which case no dict is used.
+ * Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if
+ * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy.
+ * This prototype will generate compilation warnings.
+ */
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs,
+                     const void* dict, size_t dictSize,
+                           int compressionLevel);
+
+/*! ZSTD_initCStream_advanced() :
+ * This function is DEPRECATED, and is approximately equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     // Pseudocode: Set each zstd parameter and leave the rest as-is.
+ *     for ((param, value) : params) {
+ *         ZSTD_CCtx_setParameter(zcs, param, value);
+ *     }
+ *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ *     ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
+ *
+ * dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy.
+ * pledgedSrcSize must be correct.
+ * If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.
+ * This prototype will generate compilation warnings.
+ */
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
+                    const void* dict, size_t dictSize,
+                          ZSTD_parameters params,
+                          unsigned long long pledgedSrcSize);
+
+/*! ZSTD_initCStream_usingCDict() :
+ * This function is DEPRECATED, and equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_refCDict(zcs, cdict);
+ * 
+ * note : cdict will just be referenced, and must outlive compression session
+ * This prototype will generate compilation warnings.
+ */
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);
+
+/*! ZSTD_initCStream_usingCDict_advanced() :
+ *   This function is DEPRECATED, and is approximately equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     // Pseudocode: Set each zstd frame parameter and leave the rest as-is.
+ *     for ((fParam, value) : fParams) {
+ *         ZSTD_CCtx_setParameter(zcs, fParam, value);
+ *     }
+ *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ *     ZSTD_CCtx_refCDict(zcs, cdict);
+ *
+ * same as ZSTD_initCStream_usingCDict(), with control over frame parameters.
+ * pledgedSrcSize must be correct. If srcSize is not known at init time, use
+ * value ZSTD_CONTENTSIZE_UNKNOWN.
+ * This prototype will generate compilation warnings.
+ */
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
+                               const ZSTD_CDict* cdict,
+                                     ZSTD_frameParameters fParams,
+                                     unsigned long long pledgedSrcSize);
+
+/*! ZSTD_resetCStream() :
+ * This function is DEPRECATED, and is equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ * Note: ZSTD_resetCStream() interprets pledgedSrcSize == 0 as ZSTD_CONTENTSIZE_UNKNOWN, but
+ *       ZSTD_CCtx_setPledgedSrcSize() does not do the same, so ZSTD_CONTENTSIZE_UNKNOWN must be
+ *       explicitly specified.
+ *
+ *  start a new frame, using same parameters from previous frame.
+ *  This is typically useful to skip dictionary loading stage, since it will re-use it in-place.
+ *  Note that zcs must be init at least once before using ZSTD_resetCStream().
+ *  If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN.
+ *  If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end.
+ *  For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs,
+ *  but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead.
+ * @return : 0, or an error code (which can be tested using ZSTD_isError())
+ *  This prototype will generate compilation warnings.
+ */
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
+size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);
+
+
+typedef struct {
+    unsigned long long ingested;   /* nb input bytes read and buffered */
+    unsigned long long consumed;   /* nb input bytes actually compressed */
+    unsigned long long produced;   /* nb of compressed bytes generated and buffered */
+    unsigned long long flushed;    /* nb of compressed bytes flushed : not provided; can be tracked from caller side */
+    unsigned currentJobID;         /* MT only : latest started job nb */
+    unsigned nbActiveWorkers;      /* MT only : nb of workers actively compressing at probe time */
+} ZSTD_frameProgression;
+
+/* ZSTD_getFrameProgression() :
+ * tells how much data has been ingested (read from input)
+ * consumed (input actually compressed) and produced (output) for current frame.
+ * Note : (ingested - consumed) is amount of input data buffered internally, not yet compressed.
+ * Aggregates progression inside active worker threads.
+ */
+ZSTDLIB_API ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx);
+
+/*! ZSTD_toFlushNow() :
+ *  Tell how many bytes are ready to be flushed immediately.
+ *  Useful for multithreading scenarios (nbWorkers >= 1).
+ *  Probe the oldest active job, defined as oldest job not yet entirely flushed,
+ *  and check its output buffer.
+ * @return : amount of data stored in oldest job and ready to be flushed immediately.
+ *  if @return == 0, it means either :
+ *  + there is no active job (could be checked with ZSTD_frameProgression()), or
+ *  + oldest job is still actively compressing data,
+ *    but everything it has produced has also been flushed so far,
+ *    therefore flush speed is limited by production speed of oldest job
+ *    irrespective of the speed of concurrent (and newer) jobs.
+ */
+ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx);
+
+
+/*=====   Advanced Streaming decompression functions  =====*/
+
+/*!
+ * This function is deprecated, and is equivalent to:
+ *
+ *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+ *     ZSTD_DCtx_loadDictionary(zds, dict, dictSize);
+ *
+ * note: no dictionary will be used if dict == NULL or dictSize < 8
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
+
+/*!
+ * This function is deprecated, and is equivalent to:
+ *
+ *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+ *     ZSTD_DCtx_refDDict(zds, ddict);
+ *
+ * note : ddict is referenced, it must outlive decompression session
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict);
+
+/*!
+ * This function is deprecated, and is equivalent to:
+ *
+ *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+ *
+ * re-use decompression parameters from previous init; saves dictionary loading
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);
+
+
+/*********************************************************************
+*  Buffer-less and synchronous inner streaming functions
+*
+*  This is an advanced API, giving full control over buffer management, for users which need direct control over memory.
+*  But it's also a complex one, with several restrictions, documented below.
+*  Prefer normal streaming API for an easier experience.
+********************************************************************* */
+
+/**
+  Buffer-less streaming compression (synchronous mode)
+
+  A ZSTD_CCtx object is required to track streaming operations.
+  Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource.
+  ZSTD_CCtx object can be re-used multiple times within successive compression operations.
+
+  Start by initializing a context.
+  Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression.
+  It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx()
+
+  Then, consume your input using ZSTD_compressContinue().
+  There are some important considerations to keep in mind when using this advanced function :
+  - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffers only.
+  - Interface is synchronous : input is consumed entirely and produces 1+ compressed blocks.
+  - Caller must ensure there is enough space in `dst` to store compressed data under worst case scenario.
+    Worst case evaluation is provided by ZSTD_compressBound().
+    ZSTD_compressContinue() doesn't guarantee recover after a failed compression.
+  - ZSTD_compressContinue() presumes prior input ***is still accessible and unmodified*** (up to maximum distance size, see WindowLog).
+    It remembers all previous contiguous blocks, plus one separated memory segment (which can itself consists of multiple contiguous blocks)
+  - ZSTD_compressContinue() detects that prior input has been overwritten when `src` buffer overlaps.
+    In which case, it will "discard" the relevant memory section from its history.
+
+  Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum.
+  It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame.
+  Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders.
+
+  `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again.
+*/
+
+/*=====   Buffer-less streaming compression functions  =====*/
+ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
+ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
+ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /**< note: fails if cdict==NULL */
+ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**<  note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */
+
+ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+/* The ZSTD_compressBegin_advanced() and ZSTD_compressBegin_usingCDict_advanced() are now DEPRECATED and will generate a compiler warning */
+ZSTD_DEPRECATED("use advanced API to access custom parameters")
+size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */
+ZSTD_DEPRECATED("use advanced API to access custom parameters")
+size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize);   /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */
+/**
+  Buffer-less streaming decompression (synchronous mode)
+
+  A ZSTD_DCtx object is required to track streaming operations.
+  Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it.
+  A ZSTD_DCtx object can be re-used multiple times.
+
+  First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader().
+  Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough.
+  Data fragment must be large enough to ensure successful decoding.
+ `ZSTD_frameHeaderSize_max` bytes is guaranteed to always be large enough.
+  @result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled.
+           >0 : `srcSize` is too small, please provide at least @result bytes on next attempt.
+           errorCode, which can be tested using ZSTD_isError().
+
+  It fills a ZSTD_frameHeader structure with important information to correctly decode the frame,
+  such as the dictionary ID, content size, or maximum back-reference distance (`windowSize`).
+  Note that these values could be wrong, either because of data corruption, or because a 3rd party deliberately spoofs false information.
+  As a consequence, check that values remain within valid application range.
+  For example, do not allocate memory blindly, check that `windowSize` is within expectation.
+  Each application can set its own limits, depending on local restrictions.
+  For extended interoperability, it is recommended to support `windowSize` of at least 8 MB.
+
+  ZSTD_decompressContinue() needs previous data blocks during decompression, up to `windowSize` bytes.
+  ZSTD_decompressContinue() is very sensitive to contiguity,
+  if 2 blocks don't follow each other, make sure that either the compressor breaks contiguity at the same place,
+  or that previous contiguous segment is large enough to properly handle maximum back-reference distance.
+  There are multiple ways to guarantee this condition.
+
+  The most memory efficient way is to use a round buffer of sufficient size.
+  Sufficient size is determined by invoking ZSTD_decodingBufferSize_min(),
+  which can @return an error code if required value is too large for current system (in 32-bits mode).
+  In a round buffer methodology, ZSTD_decompressContinue() decompresses each block next to previous one,
+  up to the moment there is not enough room left in the buffer to guarantee decoding another full block,
+  which maximum size is provided in `ZSTD_frameHeader` structure, field `blockSizeMax`.
+  At which point, decoding can resume from the beginning of the buffer.
+  Note that already decoded data stored in the buffer should be flushed before being overwritten.
+
+  There are alternatives possible, for example using two or more buffers of size `windowSize` each, though they consume more memory.
+
+  Finally, if you control the compression process, you can also ignore all buffer size rules,
+  as long as the encoder and decoder progress in "lock-step",
+  aka use exactly the same buffer sizes, break contiguity at the same place, etc.
+
+  Once buffers are setup, start decompression, with ZSTD_decompressBegin().
+  If decompression requires a dictionary, use ZSTD_decompressBegin_usingDict() or ZSTD_decompressBegin_usingDDict().
+
+  Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail.
+
+ @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity).
+  It can be zero : it just means ZSTD_decompressContinue() has decoded some metadata item.
+  It can also be an error code, which can be tested with ZSTD_isError().
+
+  A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero.
+  Context can then be reset to start a new decompression.
+
+  Note : it's possible to know if next input to present is a header or a block, using ZSTD_nextInputType().
+  This information is not required to properly decode a frame.
+
+  == Special case : skippable frames ==
+
+  Skippable frames allow integration of user-defined data into a flow of concatenated frames.
+  Skippable frames will be ignored (skipped) by decompressor.
+  The format of skippable frames is as follows :
+  a) Skippable frame ID - 4 Bytes, Little endian format, any value from 0x184D2A50 to 0x184D2A5F
+  b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits
+  c) Frame Content - any content (User Data) of length equal to Frame Size
+  For skippable frames ZSTD_getFrameHeader() returns zfhPtr->frameType==ZSTD_skippableFrame.
+  For skippable frames ZSTD_decompressContinue() always returns 0 : it only skips the content.
+*/
+
+/*=====   Buffer-less streaming decompression functions  =====*/
+typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e;
+typedef struct {
+    unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */
+    unsigned long long windowSize;       /* can be very large, up to <= frameContentSize */
+    unsigned blockSizeMax;
+    ZSTD_frameType_e frameType;          /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */
+    unsigned headerSize;
+    unsigned dictID;
+    unsigned checksumFlag;
+} ZSTD_frameHeader;
+
+/*! ZSTD_getFrameHeader() :
+ *  decode Frame Header, or requires larger `srcSize`.
+ * @return : 0, `zfhPtr` is correctly filled,
+ *          >0, `srcSize` is too small, value is wanted `srcSize` amount,
+ *           or an error code, which can be tested using ZSTD_isError() */
+ZSTDLIB_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize);   /**< doesn't consume input */
+/*! ZSTD_getFrameHeader_advanced() :
+ *  same as ZSTD_getFrameHeader(),
+ *  with added capability to select a format (like ZSTD_f_zstd1_magicless) */
+ZSTDLIB_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format);
+ZSTDLIB_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize);  /**< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */
+
+ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx);
+ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
+ZSTDLIB_API size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
+
+ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx);
+ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+/* misc */
+ZSTDLIB_API void   ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
+typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e;
+ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
+
+
+
+
+/* ============================ */
+/**       Block level API       */
+/* ============================ */
+
+/*!
+    Block functions produce and decode raw zstd blocks, without frame metadata.
+    Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes).
+    But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes.
+
+    A few rules to respect :
+    - Compressing and decompressing require a context structure
+      + Use ZSTD_createCCtx() and ZSTD_createDCtx()
+    - It is necessary to init context before starting
+      + compression : any ZSTD_compressBegin*() variant, including with dictionary
+      + decompression : any ZSTD_decompressBegin*() variant, including with dictionary
+      + copyCCtx() and copyDCtx() can be used too
+    - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB
+      + If input is larger than a block size, it's necessary to split input data into multiple blocks
+      + For inputs larger than a single block, consider using regular ZSTD_compress() instead.
+        Frame metadata is not that costly, and quickly becomes negligible as source size grows larger than a block.
+    - When a block is considered not compressible enough, ZSTD_compressBlock() result will be 0 (zero) !
+      ===> In which case, nothing is produced into `dst` !
+      + User __must__ test for such outcome and deal directly with uncompressed data
+      + A block cannot be declared incompressible if ZSTD_compressBlock() return value was != 0.
+        Doing so would mess up with statistics history, leading to potential data corruption.
+      + ZSTD_decompressBlock() _doesn't accept uncompressed data as input_ !!
+      + In case of multiple successive blocks, should some of them be uncompressed,
+        decoder must be informed of their existence in order to follow proper history.
+        Use ZSTD_insertBlock() for such a case.
+*/
+
+/*=====   Raw zstd block functions  =====*/
+ZSTDLIB_API size_t ZSTD_getBlockSize   (const ZSTD_CCtx* cctx);
+ZSTDLIB_API size_t ZSTD_compressBlock  (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ZSTDLIB_API size_t ZSTD_insertBlock    (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize);  /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */
+
+
+#endif   /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */
+
+#if defined (__cplusplus)
+}
+#endif
+/**** ended inlining ../zstd.h ****/
+#define FSE_STATIC_LINKING_ONLY
+/**** skipping file: fse.h ****/
+#define HUF_STATIC_LINKING_ONLY
+/**** skipping file: huf.h ****/
+#ifndef XXH_STATIC_LINKING_ONLY
+#  define XXH_STATIC_LINKING_ONLY  /* XXH64_state_t */
+#endif
+/**** start inlining xxhash.h ****/
+/*
+ * xxHash - Extremely Fast Hash algorithm
+ * Header File
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - xxHash source repository : https://github.com/Cyan4973/xxHash
+ * 
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+*/
+
+/* Notice extracted from xxHash homepage :
+
+xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
+It also successfully passes all tests from the SMHasher suite.
+
+Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
+
+Name            Speed       Q.Score   Author
+xxHash          5.4 GB/s     10
+CrapWow         3.2 GB/s      2       Andrew
+MumurHash 3a    2.7 GB/s     10       Austin Appleby
+SpookyHash      2.0 GB/s     10       Bob Jenkins
+SBox            1.4 GB/s      9       Bret Mulvey
+Lookup3         1.2 GB/s      9       Bob Jenkins
+SuperFastHash   1.2 GB/s      1       Paul Hsieh
+CityHash64      1.05 GB/s    10       Pike & Alakuijala
+FNV             0.55 GB/s     5       Fowler, Noll, Vo
+CRC32           0.43 GB/s     9
+MD5-32          0.33 GB/s    10       Ronald L. Rivest
+SHA1-32         0.28 GB/s    10
+
+Q.Score is a measure of quality of the hash function.
+It depends on successfully passing SMHasher test set.
+10 is a perfect score.
+
+A 64-bits version, named XXH64, is available since r35.
+It offers much better speed, but for 64-bits applications only.
+Name     Speed on 64 bits    Speed on 32 bits
+XXH64       13.8 GB/s            1.9 GB/s
+XXH32        6.8 GB/s            6.0 GB/s
+*/
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+#ifndef XXHASH_H_5627135585666179
+#define XXHASH_H_5627135585666179 1
+
+
+/* ****************************
+*  Definitions
+******************************/
+/**** skipping file: zstd_deps.h ****/
+typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
+
+
+/* ****************************
+*  API modifier
+******************************/
+/** XXH_PRIVATE_API
+*   This is useful if you want to include xxhash functions in `static` mode
+*   in order to inline them, and remove their symbol from the public list.
+*   Methodology :
+*     #define XXH_PRIVATE_API
+*     #include "xxhash.h"
+*   `xxhash.c` is automatically included.
+*   It's not useful to compile and link it as a separate module anymore.
+*/
+#ifdef XXH_PRIVATE_API
+#  ifndef XXH_STATIC_LINKING_ONLY
+#    define XXH_STATIC_LINKING_ONLY
+#  endif
+#  if defined(__GNUC__)
+#    define XXH_PUBLIC_API static __inline __attribute__((unused))
+#  elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#    define XXH_PUBLIC_API static inline
+#  elif defined(_MSC_VER)
+#    define XXH_PUBLIC_API static __inline
+#  else
+#    define XXH_PUBLIC_API static   /* this version may generate warnings for unused static functions; disable the relevant warning */
+#  endif
+#else
+#  define XXH_PUBLIC_API   /* do nothing */
+#endif /* XXH_PRIVATE_API */
+
+/*!XXH_NAMESPACE, aka Namespace Emulation :
+
+If you want to include _and expose_ xxHash functions from within your own library,
+but also want to avoid symbol collisions with another library which also includes xxHash,
+
+you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library
+with the value of XXH_NAMESPACE (so avoid to keep it NULL and avoid numeric values).
+
+Note that no change is required within the calling program as long as it includes `xxhash.h` :
+regular symbol name will be automatically translated by this header.
+*/
+#ifdef XXH_NAMESPACE
+#  define XXH_CAT(A,B) A##B
+#  define XXH_NAME2(A,B) XXH_CAT(A,B)
+#  define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
+#  define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
+#  define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
+#  define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
+#  define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
+#  define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
+#  define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
+#  define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
+#  define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
+#  define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
+#  define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
+#  define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
+#  define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
+#  define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
+#  define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
+#  define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
+#  define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
+#  define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
+#  define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
+#endif
+
+
+/* *************************************
+*  Version
+***************************************/
+#define XXH_VERSION_MAJOR    0
+#define XXH_VERSION_MINOR    6
+#define XXH_VERSION_RELEASE  2
+#define XXH_VERSION_NUMBER  (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
+XXH_PUBLIC_API unsigned XXH_versionNumber (void);
+
+
+/* ****************************
+*  Simple Hash Functions
+******************************/
+typedef unsigned int       XXH32_hash_t;
+typedef unsigned long long XXH64_hash_t;
+
+XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed);
+XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed);
+
+/*!
+XXH32() :
+    Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input".
+    The memory between input & input+length must be valid (allocated and read-accessible).
+    "seed" can be used to alter the result predictably.
+    Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
+XXH64() :
+    Calculate the 64-bits hash of sequence of length "len" stored at memory address "input".
+    "seed" can be used to alter the result predictably.
+    This function runs 2x faster on 64-bits systems, but slower on 32-bits systems (see benchmark).
+*/
+
+
+/* ****************************
+*  Streaming Hash Functions
+******************************/
+typedef struct XXH32_state_s XXH32_state_t;   /* incomplete type */
+typedef struct XXH64_state_s XXH64_state_t;   /* incomplete type */
+
+/*! State allocation, compatible with dynamic libraries */
+
+XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
+XXH_PUBLIC_API XXH_errorcode  XXH32_freeState(XXH32_state_t* statePtr);
+
+XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
+XXH_PUBLIC_API XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
+
+
+/* hash streaming */
+
+XXH_PUBLIC_API XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, unsigned int seed);
+XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
+XXH_PUBLIC_API XXH32_hash_t  XXH32_digest (const XXH32_state_t* statePtr);
+
+XXH_PUBLIC_API XXH_errorcode XXH64_reset  (XXH64_state_t* statePtr, unsigned long long seed);
+XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
+XXH_PUBLIC_API XXH64_hash_t  XXH64_digest (const XXH64_state_t* statePtr);
+
+/*
+These functions generate the xxHash of an input provided in multiple segments.
+Note that, for small input, they are slower than single-call functions, due to state management.
+For small input, prefer `XXH32()` and `XXH64()` .
+
+XXH state must first be allocated, using XXH*_createState() .
+
+Start a new hash by initializing state with a seed, using XXH*_reset().
+
+Then, feed the hash state by calling XXH*_update() as many times as necessary.
+Obviously, input must be allocated and read accessible.
+The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
+
+Finally, a hash value can be produced anytime, by using XXH*_digest().
+This function returns the nn-bits hash as an int or long long.
+
+It's still possible to continue inserting input into the hash state after a digest,
+and generate some new hashes later on, by calling again XXH*_digest().
+
+When done, free XXH state space if it was allocated dynamically.
+*/
+
+
+/* **************************
+*  Utils
+****************************/
+#if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L))   /* ! C99 */
+#  define restrict   /* disable restrict */
+#endif
+
+XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dst_state, const XXH32_state_t* restrict src_state);
+XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dst_state, const XXH64_state_t* restrict src_state);
+
+
+/* **************************
+*  Canonical representation
+****************************/
+/* Default result type for XXH functions are primitive unsigned 32 and 64 bits.
+*  The canonical representation uses human-readable write convention, aka big-endian (large digits first).
+*  These functions allow transformation of hash result into and from its canonical format.
+*  This way, hash values can be written into a file / memory, and remain comparable on different systems and programs.
+*/
+typedef struct { unsigned char digest[4]; } XXH32_canonical_t;
+typedef struct { unsigned char digest[8]; } XXH64_canonical_t;
+
+XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
+
+XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
+XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
+
+#endif /* XXHASH_H_5627135585666179 */
+
+
+
+/* ================================================================================================
+   This section contains definitions which are not guaranteed to remain stable.
+   They may change in future versions, becoming incompatible with a different version of the library.
+   They shall only be used with static linking.
+   Never use these definitions in association with dynamic linking !
+=================================================================================================== */
+#if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXH_STATIC_H_3543687687345)
+#define XXH_STATIC_H_3543687687345
+
+/* These definitions are only meant to allow allocation of XXH state
+   statically, on stack, or in a struct for example.
+   Do not use members directly. */
+
+   struct XXH32_state_s {
+       unsigned total_len_32;
+       unsigned large_len;
+       unsigned v1;
+       unsigned v2;
+       unsigned v3;
+       unsigned v4;
+       unsigned mem32[4];   /* buffer defined as U32 for alignment */
+       unsigned memsize;
+       unsigned reserved;   /* never read nor write, will be removed in a future version */
+   };   /* typedef'd to XXH32_state_t */
+
+   struct XXH64_state_s {
+       unsigned long long total_len;
+       unsigned long long v1;
+       unsigned long long v2;
+       unsigned long long v3;
+       unsigned long long v4;
+       unsigned long long mem64[4];   /* buffer defined as U64 for alignment */
+       unsigned memsize;
+       unsigned reserved[2];          /* never read nor write, will be removed in a future version */
+   };   /* typedef'd to XXH64_state_t */
+
+
+#  ifdef XXH_PRIVATE_API
+/**** start inlining xxhash.c ****/
+/*
+ *  xxHash - Fast Hash algorithm
+ *  Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - xxHash homepage: http://www.xxhash.com
+ *  - xxHash source repository : https://github.com/Cyan4973/xxHash
+ * 
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+*/
+
+
+/* *************************************
+*  Tuning parameters
+***************************************/
+/*!XXH_FORCE_MEMORY_ACCESS :
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method doesn't depend on compiler but violate C standard.
+ *            It can generate buggy code on targets which do not support unaligned memory accesses.
+ *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See http://stackoverflow.com/a/32095106/646947 for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef XXH_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
+#  if (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
+  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) || \
+  defined(__ICCARM__)
+#    define XXH_FORCE_MEMORY_ACCESS 1
+#  endif
+#endif
+
+/*!XXH_ACCEPT_NULL_INPUT_POINTER :
+ * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
+ * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
+ * By default, this option is disabled. To enable it, uncomment below define :
+ */
+/* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */
+
+/*!XXH_FORCE_NATIVE_FORMAT :
+ * By default, xxHash library provides endian-independent Hash values, based on little-endian convention.
+ * Results are therefore identical for little-endian and big-endian CPU.
+ * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
+ * Should endian-independence be of no importance for your application, you may set the #define below to 1,
+ * to improve speed for Big-endian CPU.
+ * This option has no impact on Little_Endian CPU.
+ */
+#ifndef XXH_FORCE_NATIVE_FORMAT   /* can be defined externally */
+#  define XXH_FORCE_NATIVE_FORMAT 0
+#endif
+
+/*!XXH_FORCE_ALIGN_CHECK :
+ * This is a minor performance trick, only useful with lots of very small keys.
+ * It means : check for aligned/unaligned input.
+ * The check costs one initial branch per hash; set to 0 when the input data
+ * is guaranteed to be aligned.
+ */
+#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
+#  if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
+#    define XXH_FORCE_ALIGN_CHECK 0
+#  else
+#    define XXH_FORCE_ALIGN_CHECK 1
+#  endif
+#endif
+
+
+/* *************************************
+*  Includes & Memory related functions
+***************************************/
+/* Modify the local functions below should you wish to use some other memory routines */
+/* for ZSTD_malloc(), ZSTD_free() */
+#define ZSTD_DEPS_NEED_MALLOC
+/**** skipping file: zstd_deps.h ****/
+static void* XXH_malloc(size_t s) { return ZSTD_malloc(s); }
+static void  XXH_free  (void* p)  { ZSTD_free(p); }
+static void* XXH_memcpy(void* dest, const void* src, size_t size) { return ZSTD_memcpy(dest,src,size); }
+
+#ifndef XXH_STATIC_LINKING_ONLY
+#  define XXH_STATIC_LINKING_ONLY
+#endif
+/**** skipping file: xxhash.h ****/
+
+
+/* *************************************
+*  Compiler Specific Options
+***************************************/
+/**** skipping file: compiler.h ****/
+
+
+/* *************************************
+*  Basic Types
+***************************************/
+/**** skipping file: mem.h ****/
+
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
+
+/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
+static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; }
+static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; }
+
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign;
+
+static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
+
+#else
+
+/* portable and safe solution. Generally efficient.
+ * see : http://stackoverflow.com/a/32095106/646947
+ */
+
+static U32 XXH_read32(const void* memPtr)
+{
+    U32 val;
+    ZSTD_memcpy(&val, memPtr, sizeof(val));
+    return val;
+}
+
+static U64 XXH_read64(const void* memPtr)
+{
+    U64 val;
+    ZSTD_memcpy(&val, memPtr, sizeof(val));
+    return val;
+}
+
+#endif   /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
+
+
+/* ****************************************
+*  Compiler-specific Functions and Macros
+******************************************/
+#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+
+/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */
+#if defined(_MSC_VER)
+#  define XXH_rotl32(x,r) _rotl(x,r)
+#  define XXH_rotl64(x,r) _rotl64(x,r)
+#else
+#if defined(__ICCARM__)
+#  include <intrinsics.h>
+#  define XXH_rotl32(x,r) __ROR(x,(32 - r))
+#else
+#  define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
+#endif
+#  define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
+#endif
+
+#if defined(_MSC_VER)     /* Visual Studio */
+#  define XXH_swap32 _byteswap_ulong
+#  define XXH_swap64 _byteswap_uint64
+#elif GCC_VERSION >= 403
+#  define XXH_swap32 __builtin_bswap32
+#  define XXH_swap64 __builtin_bswap64
+#else
+static U32 XXH_swap32 (U32 x)
+{
+    return  ((x << 24) & 0xff000000 ) |
+            ((x <<  8) & 0x00ff0000 ) |
+            ((x >>  8) & 0x0000ff00 ) |
+            ((x >> 24) & 0x000000ff );
+}
+static U64 XXH_swap64 (U64 x)
+{
+    return  ((x << 56) & 0xff00000000000000ULL) |
+            ((x << 40) & 0x00ff000000000000ULL) |
+            ((x << 24) & 0x0000ff0000000000ULL) |
+            ((x << 8)  & 0x000000ff00000000ULL) |
+            ((x >> 8)  & 0x00000000ff000000ULL) |
+            ((x >> 24) & 0x0000000000ff0000ULL) |
+            ((x >> 40) & 0x000000000000ff00ULL) |
+            ((x >> 56) & 0x00000000000000ffULL);
+}
+#endif
+
+
+/* *************************************
+*  Architecture Macros
+***************************************/
+typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
+
+/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */
+#ifndef XXH_CPU_LITTLE_ENDIAN
+    static const int g_one = 1;
+#   define XXH_CPU_LITTLE_ENDIAN   (*(const char*)(&g_one))
+#endif
+
+
+/* ***************************
+*  Memory reads
+*****************************/
+typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
+
+FORCE_INLINE_TEMPLATE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
+{
+    if (align==XXH_unaligned)
+        return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
+    else
+        return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr);
+}
+
+FORCE_INLINE_TEMPLATE U32 XXH_readLE32(const void* ptr, XXH_endianess endian)
+{
+    return XXH_readLE32_align(ptr, endian, XXH_unaligned);
+}
+
+static U32 XXH_readBE32(const void* ptr)
+{
+    return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
+}
+
+FORCE_INLINE_TEMPLATE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
+{
+    if (align==XXH_unaligned)
+        return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
+    else
+        return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr);
+}
+
+FORCE_INLINE_TEMPLATE U64 XXH_readLE64(const void* ptr, XXH_endianess endian)
+{
+    return XXH_readLE64_align(ptr, endian, XXH_unaligned);
+}
+
+static U64 XXH_readBE64(const void* ptr)
+{
+    return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
+}
+
+
+/* *************************************
+*  Macros
+***************************************/
+#define XXH_STATIC_ASSERT(c)   { enum { XXH_static_assert = 1/(int)(!!(c)) }; }    /* use only *after* variable declarations */
+
+
+/* *************************************
+*  Constants
+***************************************/
+static const U32 PRIME32_1 = 2654435761U;
+static const U32 PRIME32_2 = 2246822519U;
+static const U32 PRIME32_3 = 3266489917U;
+static const U32 PRIME32_4 =  668265263U;
+static const U32 PRIME32_5 =  374761393U;
+
+static const U64 PRIME64_1 = 11400714785074694791ULL;
+static const U64 PRIME64_2 = 14029467366897019727ULL;
+static const U64 PRIME64_3 =  1609587929392839161ULL;
+static const U64 PRIME64_4 =  9650029242287828579ULL;
+static const U64 PRIME64_5 =  2870177450012600261ULL;
+
+XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
+
+
+/* **************************
+*  Utils
+****************************/
+XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dstState, const XXH32_state_t* restrict srcState)
+{
+    ZSTD_memcpy(dstState, srcState, sizeof(*dstState));
+}
+
+XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dstState, const XXH64_state_t* restrict srcState)
+{
+    ZSTD_memcpy(dstState, srcState, sizeof(*dstState));
+}
+
+
+/* ***************************
+*  Simple Hash Functions
+*****************************/
+
+static U32 XXH32_round(U32 seed, U32 input)
+{
+    seed += input * PRIME32_2;
+    seed  = XXH_rotl32(seed, 13);
+    seed *= PRIME32_1;
+    return seed;
+}
+
+FORCE_INLINE_TEMPLATE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align)
+{
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* bEnd = p + len;
+    U32 h32;
+#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align)
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (p==NULL) {
+        len=0;
+        bEnd=p=(const BYTE*)(size_t)16;
+    }
+#endif
+
+    if (len>=16) {
+        const BYTE* const limit = bEnd - 16;
+        U32 v1 = seed + PRIME32_1 + PRIME32_2;
+        U32 v2 = seed + PRIME32_2;
+        U32 v3 = seed + 0;
+        U32 v4 = seed - PRIME32_1;
+
+        do {
+            v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4;
+            v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4;
+            v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4;
+            v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4;
+        } while (p<=limit);
+
+        h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
+    } else {
+        h32  = seed + PRIME32_5;
+    }
+
+    h32 += (U32) len;
+
+    while (p+4<=bEnd) {
+        h32 += XXH_get32bits(p) * PRIME32_3;
+        h32  = XXH_rotl32(h32, 17) * PRIME32_4 ;
+        p+=4;
+    }
+
+    while (p<bEnd) {
+        h32 += (*p) * PRIME32_5;
+        h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
+        p++;
+    }
+
+    h32 ^= h32 >> 15;
+    h32 *= PRIME32_2;
+    h32 ^= h32 >> 13;
+    h32 *= PRIME32_3;
+    h32 ^= h32 >> 16;
+
+    return h32;
+}
+
+
+XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed)
+{
+#if 0
+    /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+    XXH32_CREATESTATE_STATIC(state);
+    XXH32_reset(state, seed);
+    XXH32_update(state, input, len);
+    return XXH32_digest(state);
+#else
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if (XXH_FORCE_ALIGN_CHECK) {
+        if ((((size_t)input) & 3) == 0) {   /* Input is 4-bytes aligned, leverage the speed benefit */
+            if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+                return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
+            else
+                return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
+    }   }
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
+    else
+        return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
+#endif
+}
+
+
+static U64 XXH64_round(U64 acc, U64 input)
+{
+    acc += input * PRIME64_2;
+    acc  = XXH_rotl64(acc, 31);
+    acc *= PRIME64_1;
+    return acc;
+}
+
+static U64 XXH64_mergeRound(U64 acc, U64 val)
+{
+    val  = XXH64_round(0, val);
+    acc ^= val;
+    acc  = acc * PRIME64_1 + PRIME64_4;
+    return acc;
+}
+
+FORCE_INLINE_TEMPLATE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align)
+{
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* const bEnd = p + len;
+    U64 h64;
+#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align)
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (p==NULL) {
+        len=0;
+        bEnd=p=(const BYTE*)(size_t)32;
+    }
+#endif
+
+    if (len>=32) {
+        const BYTE* const limit = bEnd - 32;
+        U64 v1 = seed + PRIME64_1 + PRIME64_2;
+        U64 v2 = seed + PRIME64_2;
+        U64 v3 = seed + 0;
+        U64 v4 = seed - PRIME64_1;
+
+        do {
+            v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8;
+            v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8;
+            v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8;
+            v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8;
+        } while (p<=limit);
+
+        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+        h64 = XXH64_mergeRound(h64, v1);
+        h64 = XXH64_mergeRound(h64, v2);
+        h64 = XXH64_mergeRound(h64, v3);
+        h64 = XXH64_mergeRound(h64, v4);
+
+    } else {
+        h64  = seed + PRIME64_5;
+    }
+
+    h64 += (U64) len;
+
+    while (p+8<=bEnd) {
+        U64 const k1 = XXH64_round(0, XXH_get64bits(p));
+        h64 ^= k1;
+        h64  = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
+        p+=8;
+    }
+
+    if (p+4<=bEnd) {
+        h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1;
+        h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
+        p+=4;
+    }
+
+    while (p<bEnd) {
+        h64 ^= (*p) * PRIME64_5;
+        h64 = XXH_rotl64(h64, 11) * PRIME64_1;
+        p++;
+    }
+
+    h64 ^= h64 >> 33;
+    h64 *= PRIME64_2;
+    h64 ^= h64 >> 29;
+    h64 *= PRIME64_3;
+    h64 ^= h64 >> 32;
+
+    return h64;
+}
+
+
+XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed)
+{
+#if 0
+    /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+    XXH64_CREATESTATE_STATIC(state);
+    XXH64_reset(state, seed);
+    XXH64_update(state, input, len);
+    return XXH64_digest(state);
+#else
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if (XXH_FORCE_ALIGN_CHECK) {
+        if ((((size_t)input) & 7)==0) {  /* Input is aligned, let's leverage the speed advantage */
+            if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+                return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
+            else
+                return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
+    }   }
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
+    else
+        return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
+#endif
+}
+
+
+/* **************************************************
+*  Advanced Hash Functions
+****************************************************/
+
+XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
+{
+    return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
+}
+XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
+{
+    XXH_free(statePtr);
+    return XXH_OK;
+}
+
+XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
+{
+    return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
+}
+XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
+{
+    XXH_free(statePtr);
+    return XXH_OK;
+}
+
+
+/*** Hash feed ***/
+
+XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed)
+{
+    XXH32_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
+    ZSTD_memset(&state, 0, sizeof(state)-4);   /* do not write into reserved, for future removal */
+    state.v1 = seed + PRIME32_1 + PRIME32_2;
+    state.v2 = seed + PRIME32_2;
+    state.v3 = seed + 0;
+    state.v4 = seed - PRIME32_1;
+    ZSTD_memcpy(statePtr, &state, sizeof(state));
+    return XXH_OK;
+}
+
+
+XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed)
+{
+    XXH64_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
+    ZSTD_memset(&state, 0, sizeof(state)-8);   /* do not write into reserved, for future removal */
+    state.v1 = seed + PRIME64_1 + PRIME64_2;
+    state.v2 = seed + PRIME64_2;
+    state.v3 = seed + 0;
+    state.v4 = seed - PRIME64_1;
+    ZSTD_memcpy(statePtr, &state, sizeof(state));
+    return XXH_OK;
+}
+
+
+FORCE_INLINE_TEMPLATE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian)
+{
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* const bEnd = p + len;
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (input==NULL) return XXH_ERROR;
+#endif
+
+    state->total_len_32 += (unsigned)len;
+    state->large_len |= (len>=16) | (state->total_len_32>=16);
+
+    if (state->memsize + len < 16)  {   /* fill in tmp buffer */
+        XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len);
+        state->memsize += (unsigned)len;
+        return XXH_OK;
+    }
+
+    if (state->memsize) {   /* some data left from previous update */
+        XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize);
+        {   const U32* p32 = state->mem32;
+            state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++;
+            state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++;
+            state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++;
+            state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian)); p32++;
+        }
+        p += 16-state->memsize;
+        state->memsize = 0;
+    }
+
+    if (p <= bEnd-16) {
+        const BYTE* const limit = bEnd - 16;
+        U32 v1 = state->v1;
+        U32 v2 = state->v2;
+        U32 v3 = state->v3;
+        U32 v4 = state->v4;
+
+        do {
+            v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4;
+            v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4;
+            v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4;
+            v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4;
+        } while (p<=limit);
+
+        state->v1 = v1;
+        state->v2 = v2;
+        state->v3 = v3;
+        state->v4 = v4;
+    }
+
+    if (p < bEnd) {
+        XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
+        state->memsize = (unsigned)(bEnd-p);
+    }
+
+    return XXH_OK;
+}
+
+XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
+    else
+        return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
+}
+
+
+
+FORCE_INLINE_TEMPLATE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian)
+{
+    const BYTE * p = (const BYTE*)state->mem32;
+    const BYTE* const bEnd = (const BYTE*)(state->mem32) + state->memsize;
+    U32 h32;
+
+    if (state->large_len) {
+        h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
+    } else {
+        h32 = state->v3 /* == seed */ + PRIME32_5;
+    }
+
+    h32 += state->total_len_32;
+
+    while (p+4<=bEnd) {
+        h32 += XXH_readLE32(p, endian) * PRIME32_3;
+        h32  = XXH_rotl32(h32, 17) * PRIME32_4;
+        p+=4;
+    }
+
+    while (p<bEnd) {
+        h32 += (*p) * PRIME32_5;
+        h32  = XXH_rotl32(h32, 11) * PRIME32_1;
+        p++;
+    }
+
+    h32 ^= h32 >> 15;
+    h32 *= PRIME32_2;
+    h32 ^= h32 >> 13;
+    h32 *= PRIME32_3;
+    h32 ^= h32 >> 16;
+
+    return h32;
+}
+
+
+XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH32_digest_endian(state_in, XXH_littleEndian);
+    else
+        return XXH32_digest_endian(state_in, XXH_bigEndian);
+}
+
+
+
+/* **** XXH64 **** */
+
+FORCE_INLINE_TEMPLATE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian)
+{
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* const bEnd = p + len;
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (input==NULL) return XXH_ERROR;
+#endif
+
+    state->total_len += len;
+
+    if (state->memsize + len < 32) {  /* fill in tmp buffer */
+        if (input != NULL) {
+            XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
+        }
+        state->memsize += (U32)len;
+        return XXH_OK;
+    }
+
+    if (state->memsize) {   /* tmp buffer is full */
+        XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize);
+        state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian));
+        state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian));
+        state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian));
+        state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian));
+        p += 32-state->memsize;
+        state->memsize = 0;
+    }
+
+    if (p+32 <= bEnd) {
+        const BYTE* const limit = bEnd - 32;
+        U64 v1 = state->v1;
+        U64 v2 = state->v2;
+        U64 v3 = state->v3;
+        U64 v4 = state->v4;
+
+        do {
+            v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8;
+            v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8;
+            v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8;
+            v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8;
+        } while (p<=limit);
+
+        state->v1 = v1;
+        state->v2 = v2;
+        state->v3 = v3;
+        state->v4 = v4;
+    }
+
+    if (p < bEnd) {
+        XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
+        state->memsize = (unsigned)(bEnd-p);
+    }
+
+    return XXH_OK;
+}
+
+XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH64_update_endian(state_in, input, len, XXH_littleEndian);
+    else
+        return XXH64_update_endian(state_in, input, len, XXH_bigEndian);
+}
+
+
+
+FORCE_INLINE_TEMPLATE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian)
+{
+    const BYTE * p = (const BYTE*)state->mem64;
+    const BYTE* const bEnd = (const BYTE*)state->mem64 + state->memsize;
+    U64 h64;
+
+    if (state->total_len >= 32) {
+        U64 const v1 = state->v1;
+        U64 const v2 = state->v2;
+        U64 const v3 = state->v3;
+        U64 const v4 = state->v4;
+
+        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+        h64 = XXH64_mergeRound(h64, v1);
+        h64 = XXH64_mergeRound(h64, v2);
+        h64 = XXH64_mergeRound(h64, v3);
+        h64 = XXH64_mergeRound(h64, v4);
+    } else {
+        h64  = state->v3 + PRIME64_5;
+    }
+
+    h64 += (U64) state->total_len;
+
+    while (p+8<=bEnd) {
+        U64 const k1 = XXH64_round(0, XXH_readLE64(p, endian));
+        h64 ^= k1;
+        h64  = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
+        p+=8;
+    }
+
+    if (p+4<=bEnd) {
+        h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1;
+        h64  = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
+        p+=4;
+    }
+
+    while (p<bEnd) {
+        h64 ^= (*p) * PRIME64_5;
+        h64  = XXH_rotl64(h64, 11) * PRIME64_1;
+        p++;
+    }
+
+    h64 ^= h64 >> 33;
+    h64 *= PRIME64_2;
+    h64 ^= h64 >> 29;
+    h64 *= PRIME64_3;
+    h64 ^= h64 >> 32;
+
+    return h64;
+}
+
+
+XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH64_digest_endian(state_in, XXH_littleEndian);
+    else
+        return XXH64_digest_endian(state_in, XXH_bigEndian);
+}
+
+
+/* **************************
+*  Canonical representation
+****************************/
+
+/*! Default XXH result types are basic unsigned 32 and 64 bits.
+*   The canonical representation follows human-readable write convention, aka big-endian (large digits first).
+*   These functions allow transformation of hash result into and from its canonical format.
+*   This way, hash values can be written into a file or buffer, and remain comparable across different systems and programs.
+*/
+
+XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
+{
+    XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
+    if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
+    ZSTD_memcpy(dst, &hash, sizeof(*dst));
+}
+
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
+{
+    XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
+    if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
+    ZSTD_memcpy(dst, &hash, sizeof(*dst));
+}
+
+XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
+{
+    return XXH_readBE32(src);
+}
+
+XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
+{
+    return XXH_readBE64(src);
+}
+/**** ended inlining xxhash.c ****/
+#  endif
+
+#endif /* XXH_STATIC_LINKING_ONLY && XXH_STATIC_H_3543687687345 */
+
+
+#if defined (__cplusplus)
+}
+#endif
+/**** ended inlining xxhash.h ****/
+#ifndef ZSTD_NO_TRACE
+/**** start inlining zstd_trace.h ****/
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_TRACE_H
+#define ZSTD_TRACE_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+#include <stddef.h>
+
+/* weak symbol support */
+#if !defined(ZSTD_HAVE_WEAK_SYMBOLS) && defined(__GNUC__) && \
+    !defined(__APPLE__) && !defined(_WIN32) && !defined(__MINGW32__) && \
+    !defined(__CYGWIN__)
+#  define ZSTD_HAVE_WEAK_SYMBOLS 1
+#else
+#  define ZSTD_HAVE_WEAK_SYMBOLS 0
+#endif
+#if ZSTD_HAVE_WEAK_SYMBOLS
+#  define ZSTD_WEAK_ATTR __attribute__((__weak__))
+#else
+#  define ZSTD_WEAK_ATTR
+#endif
+
+/* Only enable tracing when weak symbols are available. */
+#ifndef ZSTD_TRACE
+#  define ZSTD_TRACE ZSTD_HAVE_WEAK_SYMBOLS
+#endif
+
+#if ZSTD_TRACE
+
+struct ZSTD_CCtx_s;
+struct ZSTD_DCtx_s;
+struct ZSTD_CCtx_params_s;
+
+typedef struct {
+    /**
+     * ZSTD_VERSION_NUMBER
+     *
+     * This is guaranteed to be the first member of ZSTD_trace.
+     * Otherwise, this struct is not stable between versions. If
+     * the version number does not match your expectation, you
+     * should not interpret the rest of the struct.
+     */
+    unsigned version;
+    /**
+     * Non-zero if streaming (de)compression is used.
+     */
+    unsigned streaming;
+    /**
+     * The dictionary ID.
+     */
+    unsigned dictionaryID;
+    /**
+     * Is the dictionary cold?
+     * Only set on decompression.
+     */
+    unsigned dictionaryIsCold;
+    /**
+     * The dictionary size or zero if no dictionary.
+     */
+    size_t dictionarySize;
+    /**
+     * The uncompressed size of the data.
+     */
+    size_t uncompressedSize;
+    /**
+     * The compressed size of the data.
+     */
+    size_t compressedSize;
+    /**
+     * The fully resolved CCtx parameters (NULL on decompression).
+     */
+    struct ZSTD_CCtx_params_s const* params;
+    /**
+     * The ZSTD_CCtx pointer (NULL on decompression).
+     */
+    struct ZSTD_CCtx_s const* cctx;
+    /**
+     * The ZSTD_DCtx pointer (NULL on compression).
+     */
+    struct ZSTD_DCtx_s const* dctx;
+} ZSTD_Trace;
+
+/**
+ * A tracing context. It must be 0 when tracing is disabled.
+ * Otherwise, any non-zero value returned by a tracing begin()
+ * function is presented to any subsequent calls to end().
+ *
+ * Any non-zero value is treated as tracing is enabled and not
+ * interpreted by the library.
+ *
+ * Two possible uses are:
+ * * A timestamp for when the begin() function was called.
+ * * A unique key identifying the (de)compression, like the
+ *   address of the [dc]ctx pointer if you need to track
+ *   more information than just a timestamp.
+ */
+typedef unsigned long long ZSTD_TraceCtx;
+
+/**
+ * Trace the beginning of a compression call.
+ * @param cctx The dctx pointer for the compression.
+ *             It can be used as a key to map begin() to end().
+ * @returns Non-zero if tracing is enabled. The return value is
+ *          passed to ZSTD_trace_compress_end().
+ */
+ZSTD_WEAK_ATTR ZSTD_TraceCtx ZSTD_trace_compress_begin(
+    struct ZSTD_CCtx_s const* cctx);
+
+/**
+ * Trace the end of a compression call.
+ * @param ctx The return value of ZSTD_trace_compress_begin().
+ * @param trace The zstd tracing info.
+ */
+ZSTD_WEAK_ATTR void ZSTD_trace_compress_end(
+    ZSTD_TraceCtx ctx,
+    ZSTD_Trace const* trace);
+
+/**
+ * Trace the beginning of a decompression call.
+ * @param dctx The dctx pointer for the decompression.
+ *             It can be used as a key to map begin() to end().
+ * @returns Non-zero if tracing is enabled. The return value is
+ *          passed to ZSTD_trace_compress_end().
+ */
+ZSTD_WEAK_ATTR ZSTD_TraceCtx ZSTD_trace_decompress_begin(
+    struct ZSTD_DCtx_s const* dctx);
+
+/**
+ * Trace the end of a decompression call.
+ * @param ctx The return value of ZSTD_trace_decompress_begin().
+ * @param trace The zstd tracing info.
+ */
+ZSTD_WEAK_ATTR void ZSTD_trace_decompress_end(
+    ZSTD_TraceCtx ctx,
+    ZSTD_Trace const* trace);
+
+#endif /* ZSTD_TRACE */
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ZSTD_TRACE_H */
+/**** ended inlining zstd_trace.h ****/
+#else
+#  define ZSTD_TRACE 0
+#endif
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* ---- static assert (debug) --- */
+#define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)
+#define ZSTD_isError ERR_isError   /* for inlining */
+#define FSE_isError  ERR_isError
+#define HUF_isError  ERR_isError
+
+
+/*-*************************************
+*  shared macros
+***************************************/
+#undef MIN
+#undef MAX
+#define MIN(a,b) ((a)<(b) ? (a) : (b))
+#define MAX(a,b) ((a)>(b) ? (a) : (b))
+
+/**
+ * Ignore: this is an internal helper.
+ *
+ * This is a helper function to help force C99-correctness during compilation.
+ * Under strict compilation modes, variadic macro arguments can't be empty.
+ * However, variadic function arguments can be. Using a function therefore lets
+ * us statically check that at least one (string) argument was passed,
+ * independent of the compilation flags.
+ */
+static INLINE_KEYWORD UNUSED_ATTR
+void _force_has_format_string(const char *format, ...) {
+  (void)format;
+}
+
+/**
+ * Ignore: this is an internal helper.
+ *
+ * We want to force this function invocation to be syntactically correct, but
+ * we don't want to force runtime evaluation of its arguments.
+ */
+#define _FORCE_HAS_FORMAT_STRING(...) \
+  if (0) { \
+    _force_has_format_string(__VA_ARGS__); \
+  }
+
+/**
+ * Return the specified error if the condition evaluates to true.
+ *
+ * In debug modes, prints additional information.
+ * In order to do that (particularly, printing the conditional that failed),
+ * this can't just wrap RETURN_ERROR().
+ */
+#define RETURN_ERROR_IF(cond, err, ...) \
+  if (cond) { \
+    RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \
+           __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \
+    _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
+    RAWLOG(3, ": " __VA_ARGS__); \
+    RAWLOG(3, "\n"); \
+    return ERROR(err); \
+  }
+
+/**
+ * Unconditionally return the specified error.
+ *
+ * In debug modes, prints additional information.
+ */
+#define RETURN_ERROR(err, ...) \
+  do { \
+    RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
+           __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \
+    _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
+    RAWLOG(3, ": " __VA_ARGS__); \
+    RAWLOG(3, "\n"); \
+    return ERROR(err); \
+  } while(0);
+
+/**
+ * If the provided expression evaluates to an error code, returns that error code.
+ *
+ * In debug modes, prints additional information.
+ */
+#define FORWARD_IF_ERROR(err, ...) \
+  do { \
+    size_t const err_code = (err); \
+    if (ERR_isError(err_code)) { \
+      RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \
+             __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \
+      _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
+      RAWLOG(3, ": " __VA_ARGS__); \
+      RAWLOG(3, "\n"); \
+      return err_code; \
+    } \
+  } while(0);
+
+
+/*-*************************************
+*  Common constants
+***************************************/
+#define ZSTD_OPT_NUM    (1<<12)
+
+#define ZSTD_REP_NUM      3                 /* number of repcodes */
+#define ZSTD_REP_MOVE     (ZSTD_REP_NUM-1)
+static UNUSED_ATTR const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define BIT7 128
+#define BIT6  64
+#define BIT5  32
+#define BIT4  16
+#define BIT1   2
+#define BIT0   1
+
+#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10
+static UNUSED_ATTR const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
+static UNUSED_ATTR const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
+
+#define ZSTD_FRAMEIDSIZE 4   /* magic number size */
+
+#define ZSTD_BLOCKHEADERSIZE 3   /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
+static UNUSED_ATTR const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
+typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;
+
+#define ZSTD_FRAMECHECKSUMSIZE 4
+
+#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
+#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */)   /* for a non-null block */
+
+#define HufLog 12
+typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e;
+
+#define LONGNBSEQ 0x7F00
+
+#define MINMATCH 3
+
+#define Litbits  8
+#define MaxLit ((1<<Litbits) - 1)
+#define MaxML   52
+#define MaxLL   35
+#define DefaultMaxOff 28
+#define MaxOff  31
+#define MaxSeq MAX(MaxLL, MaxML)   /* Assumption : MaxOff < MaxLL,MaxML */
+#define MLFSELog    9
+#define LLFSELog    9
+#define OffFSELog   8
+#define MaxFSELog  MAX(MAX(MLFSELog, LLFSELog), OffFSELog)
+
+#define ZSTD_MAX_HUF_HEADER_SIZE 128 /* header + <= 127 byte tree description */
+/* Each table cannot take more than #symbols * FSELog bits */
+#define ZSTD_MAX_FSE_HEADERS_SIZE (((MaxML + 1) * MLFSELog + (MaxLL + 1) * LLFSELog + (MaxOff + 1) * OffFSELog + 7) / 8)
+
+static UNUSED_ATTR const U32 LL_bits[MaxLL+1] = {
+     0, 0, 0, 0, 0, 0, 0, 0,
+     0, 0, 0, 0, 0, 0, 0, 0,
+     1, 1, 1, 1, 2, 2, 3, 3,
+     4, 6, 7, 8, 9,10,11,12,
+    13,14,15,16
+};
+static UNUSED_ATTR const S16 LL_defaultNorm[MaxLL+1] = {
+     4, 3, 2, 2, 2, 2, 2, 2,
+     2, 2, 2, 2, 2, 1, 1, 1,
+     2, 2, 2, 2, 2, 2, 2, 2,
+     2, 3, 2, 1, 1, 1, 1, 1,
+    -1,-1,-1,-1
+};
+#define LL_DEFAULTNORMLOG 6  /* for static allocation */
+static UNUSED_ATTR const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;
+
+static UNUSED_ATTR const U32 ML_bits[MaxML+1] = {
+     0, 0, 0, 0, 0, 0, 0, 0,
+     0, 0, 0, 0, 0, 0, 0, 0,
+     0, 0, 0, 0, 0, 0, 0, 0,
+     0, 0, 0, 0, 0, 0, 0, 0,
+     1, 1, 1, 1, 2, 2, 3, 3,
+     4, 4, 5, 7, 8, 9,10,11,
+    12,13,14,15,16
+};
+static UNUSED_ATTR const S16 ML_defaultNorm[MaxML+1] = {
+     1, 4, 3, 2, 2, 2, 2, 2,
+     2, 1, 1, 1, 1, 1, 1, 1,
+     1, 1, 1, 1, 1, 1, 1, 1,
+     1, 1, 1, 1, 1, 1, 1, 1,
+     1, 1, 1, 1, 1, 1, 1, 1,
+     1, 1, 1, 1, 1, 1,-1,-1,
+    -1,-1,-1,-1,-1
+};
+#define ML_DEFAULTNORMLOG 6  /* for static allocation */
+static UNUSED_ATTR const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG;
+
+static UNUSED_ATTR const S16 OF_defaultNorm[DefaultMaxOff+1] = {
+     1, 1, 1, 1, 1, 1, 2, 2,
+     2, 1, 1, 1, 1, 1, 1, 1,
+     1, 1, 1, 1, 1, 1, 1, 1,
+    -1,-1,-1,-1,-1
+};
+#define OF_DEFAULTNORMLOG 5  /* for static allocation */
+static UNUSED_ATTR const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
+
+
+/*-*******************************************
+*  Shared functions to include for inlining
+*********************************************/
+static void ZSTD_copy8(void* dst, const void* src) {
+#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON)
+    vst1_u8((uint8_t*)dst, vld1_u8((const uint8_t*)src));
+#else
+    ZSTD_memcpy(dst, src, 8);
+#endif
+}
+
+#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
+static void ZSTD_copy16(void* dst, const void* src) {
+#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON)
+    vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src));
+#else
+    ZSTD_memcpy(dst, src, 16);
+#endif
+}
+#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
+
+#define WILDCOPY_OVERLENGTH 32
+#define WILDCOPY_VECLEN 16
+
+typedef enum {
+    ZSTD_no_overlap,
+    ZSTD_overlap_src_before_dst
+    /*  ZSTD_overlap_dst_before_src, */
+} ZSTD_overlap_e;
+
+/*! ZSTD_wildcopy() :
+ *  Custom version of ZSTD_memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0)
+ *  @param ovtype controls the overlap detection
+ *         - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
+ *         - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart.
+ *           The src buffer must be before the dst buffer.
+ */
+MEM_STATIC FORCE_INLINE_ATTR
+void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype)
+{
+    ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + length;
+
+    assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN));
+
+    if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {
+        /* Handle short offset copies. */
+        do {
+            COPY8(op, ip)
+        } while (op < oend);
+    } else {
+        assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN);
+        /* Separate out the first COPY16() call because the copy length is
+         * almost certain to be short, so the branches have different
+         * probabilities. Since it is almost certain to be short, only do
+         * one COPY16() in the first call. Then, do two calls per loop since
+         * at that point it is more likely to have a high trip count.
+         */
+#ifdef __aarch64__
+        do {
+            COPY16(op, ip);
+        }
+        while (op < oend);
+#else
+        ZSTD_copy16(op, ip);
+        if (16 >= length) return;
+        op += 16;
+        ip += 16;
+        do {
+            COPY16(op, ip);
+            COPY16(op, ip);
+        }
+        while (op < oend);
+#endif
+    }
+}
+
+MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    size_t const length = MIN(dstCapacity, srcSize);
+    if (length > 0) {
+        ZSTD_memcpy(dst, src, length);
+    }
+    return length;
+}
+
+/* define "workspace is too large" as this number of times larger than needed */
+#define ZSTD_WORKSPACETOOLARGE_FACTOR 3
+
+/* when workspace is continuously too large
+ * during at least this number of times,
+ * context's memory usage is considered wasteful,
+ * because it's sized to handle a worst case scenario which rarely happens.
+ * In which case, resize it down to free some memory */
+#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128
+
+/* Controls whether the input/output buffer is buffered or stable. */
+typedef enum {
+    ZSTD_bm_buffered = 0,  /* Buffer the input/output */
+    ZSTD_bm_stable = 1     /* ZSTD_inBuffer/ZSTD_outBuffer is stable */
+} ZSTD_bufferMode_e;
+
+
+/*-*******************************************
+*  Private declarations
+*********************************************/
+typedef struct seqDef_s {
+    U32 offset;         /* offset == rawOffset + ZSTD_REP_NUM, or equivalently, offCode + 1 */
+    U16 litLength;
+    U16 matchLength;
+} seqDef;
+
+/* Controls whether seqStore has a single "long" litLength or matchLength. See seqStore_t. */
+typedef enum {
+    ZSTD_llt_none = 0,             /* no longLengthType */
+    ZSTD_llt_literalLength = 1,    /* represents a long literal */
+    ZSTD_llt_matchLength = 2       /* represents a long match */
+} ZSTD_longLengthType_e;
+
+typedef struct {
+    seqDef* sequencesStart;
+    seqDef* sequences;      /* ptr to end of sequences */
+    BYTE* litStart;
+    BYTE* lit;              /* ptr to end of literals */
+    BYTE* llCode;
+    BYTE* mlCode;
+    BYTE* ofCode;
+    size_t maxNbSeq;
+    size_t maxNbLit;
+
+    /* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength
+     * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment
+     * the existing value of the litLength or matchLength by 0x10000.
+     */
+    ZSTD_longLengthType_e   longLengthType;
+    U32                     longLengthPos;  /* Index of the sequence to apply long length modification to */
+} seqStore_t;
+
+typedef struct {
+    U32 litLength;
+    U32 matchLength;
+} ZSTD_sequenceLength;
+
+/**
+ * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences
+ * indicated by longLengthPos and longLengthType, and adds MINMATCH back to matchLength.
+ */
+MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq)
+{
+    ZSTD_sequenceLength seqLen;
+    seqLen.litLength = seq->litLength;
+    seqLen.matchLength = seq->matchLength + MINMATCH;
+    if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) {
+        if (seqStore->longLengthType == ZSTD_llt_literalLength) {
+            seqLen.litLength += 0xFFFF;
+        }
+        if (seqStore->longLengthType == ZSTD_llt_matchLength) {
+            seqLen.matchLength += 0xFFFF;
+        }
+    }
+    return seqLen;
+}
+
+/**
+ * Contains the compressed frame size and an upper-bound for the decompressed frame size.
+ * Note: before using `compressedSize`, check for errors using ZSTD_isError().
+ *       similarly, before using `decompressedBound`, check for errors using:
+ *          `decompressedBound != ZSTD_CONTENTSIZE_ERROR`
+ */
+typedef struct {
+    size_t compressedSize;
+    unsigned long long decompressedBound;
+} ZSTD_frameSizeInfo;   /* decompress & legacy */
+
+const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx);   /* compress & dictBuilder */
+void ZSTD_seqToCodes(const seqStore_t* seqStorePtr);   /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
+
+/* custom memory allocation functions */
+void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem);
+void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem);
+void ZSTD_customFree(void* ptr, ZSTD_customMem customMem);
+
+
+MEM_STATIC U32 ZSTD_highbit32(U32 val)   /* compress, dictBuilder, decodeCorpus */
+{
+    assert(val != 0);
+    {
+#   if defined(_MSC_VER)   /* Visual */
+#       if STATIC_BMI2 == 1
+            return _lzcnt_u32(val)^31;
+#       else
+            unsigned long r=0;
+            return _BitScanReverse(&r, val) ? (unsigned)r : 0;
+#       endif
+#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* GCC Intrinsic */
+        return __builtin_clz (val) ^ 31;
+#   elif defined(__ICCARM__)    /* IAR Intrinsic */
+        return 31 - __CLZ(val);
+#   else   /* Software version */
+        static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+        U32 v = val;
+        v |= v >> 1;
+        v |= v >> 2;
+        v |= v >> 4;
+        v |= v >> 8;
+        v |= v >> 16;
+        return DeBruijnClz[(v * 0x07C4ACDDU) >> 27];
+#   endif
+    }
+}
+
+
+/* ZSTD_invalidateRepCodes() :
+ * ensures next compression will not use repcodes from previous block.
+ * Note : only works with regular variant;
+ *        do not use with extDict variant ! */
+void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx);   /* zstdmt, adaptive_compression (shouldn't get this definition from here) */
+
+
+typedef struct {
+    blockType_e blockType;
+    U32 lastBlock;
+    U32 origSize;
+} blockProperties_t;   /* declared here for decompress and fullbench */
+
+/*! ZSTD_getcBlockSize() :
+ *  Provides the size of compressed block from block header `src` */
+/* Used by: decompress, fullbench (does not get its definition from here) */
+size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
+                          blockProperties_t* bpPtr);
+
+/*! ZSTD_decodeSeqHeaders() :
+ *  decode sequence header from src */
+/* Used by: decompress, fullbench (does not get its definition from here) */
+size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
+                       const void* src, size_t srcSize);
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif   /* ZSTD_CCOMMON_H_MODULE */
+/**** ended inlining zstd_internal.h ****/
+/**** start inlining pool.h ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef POOL_H
+#define POOL_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/**** skipping file: zstd_deps.h ****/
+#define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_customMem */
+/**** skipping file: ../zstd.h ****/
+
+typedef struct POOL_ctx_s POOL_ctx;
+
+/*! POOL_create() :
+ *  Create a thread pool with at most `numThreads` threads.
+ * `numThreads` must be at least 1.
+ *  The maximum number of queued jobs before blocking is `queueSize`.
+ * @return : POOL_ctx pointer on success, else NULL.
+*/
+POOL_ctx* POOL_create(size_t numThreads, size_t queueSize);
+
+POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
+                               ZSTD_customMem customMem);
+
+/*! POOL_free() :
+ *  Free a thread pool returned by POOL_create().
+ */
+void POOL_free(POOL_ctx* ctx);
+
+/*! POOL_resize() :
+ *  Expands or shrinks pool's number of threads.
+ *  This is more efficient than releasing + creating a new context,
+ *  since it tries to preserve and re-use existing threads.
+ * `numThreads` must be at least 1.
+ * @return : 0 when resize was successful,
+ *           !0 (typically 1) if there is an error.
+ *    note : only numThreads can be resized, queueSize remains unchanged.
+ */
+int POOL_resize(POOL_ctx* ctx, size_t numThreads);
+
+/*! POOL_sizeof() :
+ * @return threadpool memory usage
+ *  note : compatible with NULL (returns 0 in this case)
+ */
+size_t POOL_sizeof(POOL_ctx* ctx);
+
+/*! POOL_function :
+ *  The function type that can be added to a thread pool.
+ */
+typedef void (*POOL_function)(void*);
+
+/*! POOL_add() :
+ *  Add the job `function(opaque)` to the thread pool. `ctx` must be valid.
+ *  Possibly blocks until there is room in the queue.
+ *  Note : The function may be executed asynchronously,
+ *         therefore, `opaque` must live until function has been completed.
+ */
+void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque);
+
+
+/*! POOL_tryAdd() :
+ *  Add the job `function(opaque)` to thread pool _if_ a worker is available.
+ *  Returns immediately even if not (does not block).
+ * @return : 1 if successful, 0 if not.
+ */
+int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque);
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif
+/**** ended inlining pool.h ****/
+
+/* ======   Compiler specifics   ====== */
+#if defined(_MSC_VER)
+#  pragma warning(disable : 4204)        /* disable: C4204: non-constant aggregate initializer */
+#endif
+
+
+#ifdef ZSTD_MULTITHREAD
+
+/**** skipping file: threading.h ****/
+
+/* A job is a function and an opaque argument */
+typedef struct POOL_job_s {
+    POOL_function function;
+    void *opaque;
+} POOL_job;
+
+struct POOL_ctx_s {
+    ZSTD_customMem customMem;
+    /* Keep track of the threads */
+    ZSTD_pthread_t* threads;
+    size_t threadCapacity;
+    size_t threadLimit;
+
+    /* The queue is a circular buffer */
+    POOL_job *queue;
+    size_t queueHead;
+    size_t queueTail;
+    size_t queueSize;
+
+    /* The number of threads working on jobs */
+    size_t numThreadsBusy;
+    /* Indicates if the queue is empty */
+    int queueEmpty;
+
+    /* The mutex protects the queue */
+    ZSTD_pthread_mutex_t queueMutex;
+    /* Condition variable for pushers to wait on when the queue is full */
+    ZSTD_pthread_cond_t queuePushCond;
+    /* Condition variables for poppers to wait on when the queue is empty */
+    ZSTD_pthread_cond_t queuePopCond;
+    /* Indicates if the queue is shutting down */
+    int shutdown;
+};
+
+/* POOL_thread() :
+ * Work thread for the thread pool.
+ * Waits for jobs and executes them.
+ * @returns : NULL on failure else non-null.
+ */
+static void* POOL_thread(void* opaque) {
+    POOL_ctx* const ctx = (POOL_ctx*)opaque;
+    if (!ctx) { return NULL; }
+    for (;;) {
+        /* Lock the mutex and wait for a non-empty queue or until shutdown */
+        ZSTD_pthread_mutex_lock(&ctx->queueMutex);
+
+        while ( ctx->queueEmpty
+            || (ctx->numThreadsBusy >= ctx->threadLimit) ) {
+            if (ctx->shutdown) {
+                /* even if !queueEmpty, (possible if numThreadsBusy >= threadLimit),
+                 * a few threads will be shutdown while !queueEmpty,
+                 * but enough threads will remain active to finish the queue */
+                ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
+                return opaque;
+            }
+            ZSTD_pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex);
+        }
+        /* Pop a job off the queue */
+        {   POOL_job const job = ctx->queue[ctx->queueHead];
+            ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize;
+            ctx->numThreadsBusy++;
+            ctx->queueEmpty = ctx->queueHead == ctx->queueTail;
+            /* Unlock the mutex, signal a pusher, and run the job */
+            ZSTD_pthread_cond_signal(&ctx->queuePushCond);
+            ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
+
+            job.function(job.opaque);
+
+            /* If the intended queue size was 0, signal after finishing job */
+            ZSTD_pthread_mutex_lock(&ctx->queueMutex);
+            ctx->numThreadsBusy--;
+            if (ctx->queueSize == 1) {
+                ZSTD_pthread_cond_signal(&ctx->queuePushCond);
+            }
+            ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
+        }
+    }  /* for (;;) */
+    assert(0);  /* Unreachable */
+}
+
+POOL_ctx* ZSTD_createThreadPool(size_t numThreads) {
+    return POOL_create (numThreads, 0);
+}
+
+POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) {
+    return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem);
+}
+
+POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
+                               ZSTD_customMem customMem) {
+    POOL_ctx* ctx;
+    /* Check parameters */
+    if (!numThreads) { return NULL; }
+    /* Allocate the context and zero initialize */
+    ctx = (POOL_ctx*)ZSTD_customCalloc(sizeof(POOL_ctx), customMem);
+    if (!ctx) { return NULL; }
+    /* Initialize the job queue.
+     * It needs one extra space since one space is wasted to differentiate
+     * empty and full queues.
+     */
+    ctx->queueSize = queueSize + 1;
+    ctx->queue = (POOL_job*)ZSTD_customMalloc(ctx->queueSize * sizeof(POOL_job), customMem);
+    ctx->queueHead = 0;
+    ctx->queueTail = 0;
+    ctx->numThreadsBusy = 0;
+    ctx->queueEmpty = 1;
+    {
+        int error = 0;
+        error |= ZSTD_pthread_mutex_init(&ctx->queueMutex, NULL);
+        error |= ZSTD_pthread_cond_init(&ctx->queuePushCond, NULL);
+        error |= ZSTD_pthread_cond_init(&ctx->queuePopCond, NULL);
+        if (error) { POOL_free(ctx); return NULL; }
+    }
+    ctx->shutdown = 0;
+    /* Allocate space for the thread handles */
+    ctx->threads = (ZSTD_pthread_t*)ZSTD_customMalloc(numThreads * sizeof(ZSTD_pthread_t), customMem);
+    ctx->threadCapacity = 0;
+    ctx->customMem = customMem;
+    /* Check for errors */
+    if (!ctx->threads || !ctx->queue) { POOL_free(ctx); return NULL; }
+    /* Initialize the threads */
+    {   size_t i;
+        for (i = 0; i < numThreads; ++i) {
+            if (ZSTD_pthread_create(&ctx->threads[i], NULL, &POOL_thread, ctx)) {
+                ctx->threadCapacity = i;
+                POOL_free(ctx);
+                return NULL;
+        }   }
+        ctx->threadCapacity = numThreads;
+        ctx->threadLimit = numThreads;
+    }
+    return ctx;
+}
+
+/*! POOL_join() :
+    Shutdown the queue, wake any sleeping threads, and join all of the threads.
+*/
+static void POOL_join(POOL_ctx* ctx) {
+    /* Shut down the queue */
+    ZSTD_pthread_mutex_lock(&ctx->queueMutex);
+    ctx->shutdown = 1;
+    ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
+    /* Wake up sleeping threads */
+    ZSTD_pthread_cond_broadcast(&ctx->queuePushCond);
+    ZSTD_pthread_cond_broadcast(&ctx->queuePopCond);
+    /* Join all of the threads */
+    {   size_t i;
+        for (i = 0; i < ctx->threadCapacity; ++i) {
+            ZSTD_pthread_join(ctx->threads[i], NULL);  /* note : could fail */
+    }   }
+}
+
+void POOL_free(POOL_ctx *ctx) {
+    if (!ctx) { return; }
+    POOL_join(ctx);
+    ZSTD_pthread_mutex_destroy(&ctx->queueMutex);
+    ZSTD_pthread_cond_destroy(&ctx->queuePushCond);
+    ZSTD_pthread_cond_destroy(&ctx->queuePopCond);
+    ZSTD_customFree(ctx->queue, ctx->customMem);
+    ZSTD_customFree(ctx->threads, ctx->customMem);
+    ZSTD_customFree(ctx, ctx->customMem);
+}
+
+void ZSTD_freeThreadPool (ZSTD_threadPool* pool) {
+  POOL_free (pool);
+}
+
+size_t POOL_sizeof(POOL_ctx *ctx) {
+    if (ctx==NULL) return 0;  /* supports sizeof NULL */
+    return sizeof(*ctx)
+        + ctx->queueSize * sizeof(POOL_job)
+        + ctx->threadCapacity * sizeof(ZSTD_pthread_t);
+}
+
+
+/* @return : 0 on success, 1 on error */
+static int POOL_resize_internal(POOL_ctx* ctx, size_t numThreads)
+{
+    if (numThreads <= ctx->threadCapacity) {
+        if (!numThreads) return 1;
+        ctx->threadLimit = numThreads;
+        return 0;
+    }
+    /* numThreads > threadCapacity */
+    {   ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_customMalloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem);
+        if (!threadPool) return 1;
+        /* replace existing thread pool */
+        ZSTD_memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(*threadPool));
+        ZSTD_customFree(ctx->threads, ctx->customMem);
+        ctx->threads = threadPool;
+        /* Initialize additional threads */
+        {   size_t threadId;
+            for (threadId = ctx->threadCapacity; threadId < numThreads; ++threadId) {
+                if (ZSTD_pthread_create(&threadPool[threadId], NULL, &POOL_thread, ctx)) {
+                    ctx->threadCapacity = threadId;
+                    return 1;
+            }   }
+    }   }
+    /* successfully expanded */
+    ctx->threadCapacity = numThreads;
+    ctx->threadLimit = numThreads;
+    return 0;
+}
+
+/* @return : 0 on success, 1 on error */
+int POOL_resize(POOL_ctx* ctx, size_t numThreads)
+{
+    int result;
+    if (ctx==NULL) return 1;
+    ZSTD_pthread_mutex_lock(&ctx->queueMutex);
+    result = POOL_resize_internal(ctx, numThreads);
+    ZSTD_pthread_cond_broadcast(&ctx->queuePopCond);
+    ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
+    return result;
+}
+
+/**
+ * Returns 1 if the queue is full and 0 otherwise.
+ *
+ * When queueSize is 1 (pool was created with an intended queueSize of 0),
+ * then a queue is empty if there is a thread free _and_ no job is waiting.
+ */
+static int isQueueFull(POOL_ctx const* ctx) {
+    if (ctx->queueSize > 1) {
+        return ctx->queueHead == ((ctx->queueTail + 1) % ctx->queueSize);
+    } else {
+        return (ctx->numThreadsBusy == ctx->threadLimit) ||
+               !ctx->queueEmpty;
+    }
+}
+
+
+static void POOL_add_internal(POOL_ctx* ctx, POOL_function function, void *opaque)
+{
+    POOL_job const job = {function, opaque};
+    assert(ctx != NULL);
+    if (ctx->shutdown) return;
+
+    ctx->queueEmpty = 0;
+    ctx->queue[ctx->queueTail] = job;
+    ctx->queueTail = (ctx->queueTail + 1) % ctx->queueSize;
+    ZSTD_pthread_cond_signal(&ctx->queuePopCond);
+}
+
+void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque)
+{
+    assert(ctx != NULL);
+    ZSTD_pthread_mutex_lock(&ctx->queueMutex);
+    /* Wait until there is space in the queue for the new job */
+    while (isQueueFull(ctx) && (!ctx->shutdown)) {
+        ZSTD_pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex);
+    }
+    POOL_add_internal(ctx, function, opaque);
+    ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
+}
+
+
+int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque)
+{
+    assert(ctx != NULL);
+    ZSTD_pthread_mutex_lock(&ctx->queueMutex);
+    if (isQueueFull(ctx)) {
+        ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
+        return 0;
+    }
+    POOL_add_internal(ctx, function, opaque);
+    ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
+    return 1;
+}
+
+
+#else  /* ZSTD_MULTITHREAD  not defined */
+
+/* ========================== */
+/* No multi-threading support */
+/* ========================== */
+
+
+/* We don't need any data, but if it is empty, malloc() might return NULL. */
+struct POOL_ctx_s {
+    int dummy;
+};
+static POOL_ctx g_poolCtx;
+
+POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) {
+    return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem);
+}
+
+POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem) {
+    (void)numThreads;
+    (void)queueSize;
+    (void)customMem;
+    return &g_poolCtx;
+}
+
+void POOL_free(POOL_ctx* ctx) {
+    assert(!ctx || ctx == &g_poolCtx);
+    (void)ctx;
+}
+
+int POOL_resize(POOL_ctx* ctx, size_t numThreads) {
+    (void)ctx; (void)numThreads;
+    return 0;
+}
+
+void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque) {
+    (void)ctx;
+    function(opaque);
+}
+
+int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) {
+    (void)ctx;
+    function(opaque);
+    return 1;
+}
+
+size_t POOL_sizeof(POOL_ctx* ctx) {
+    if (ctx==NULL) return 0;  /* supports sizeof NULL */
+    assert(ctx == &g_poolCtx);
+    return sizeof(*ctx);
+}
+
+#endif  /* ZSTD_MULTITHREAD */
+/**** ended inlining common/pool.c ****/
+/**** start inlining common/zstd_common.c ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#define ZSTD_DEPS_NEED_MALLOC
+/**** skipping file: zstd_deps.h ****/
+/**** skipping file: error_private.h ****/
+/**** skipping file: zstd_internal.h ****/
+
+
+/*-****************************************
+*  Version
+******************************************/
+unsigned ZSTD_versionNumber(void) { return ZSTD_VERSION_NUMBER; }
+
+const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; }
+
+
+/*-****************************************
+*  ZSTD Error Management
+******************************************/
+#undef ZSTD_isError   /* defined within zstd_internal.h */
+/*! ZSTD_isError() :
+ *  tells if a return value is an error code
+ *  symbol is required for external callers */
+unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
+
+/*! ZSTD_getErrorName() :
+ *  provides error code string from function result (useful for debugging) */
+const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); }
+
+/*! ZSTD_getError() :
+ *  convert a `size_t` function result into a proper ZSTD_errorCode enum */
+ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); }
+
+/*! ZSTD_getErrorString() :
+ *  provides error code string from enum */
+const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); }
+
+
+
+/*=**************************************************************
+*  Custom allocator
+****************************************************************/
+void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem)
+{
+    if (customMem.customAlloc)
+        return customMem.customAlloc(customMem.opaque, size);
+    return ZSTD_malloc(size);
+}
+
+void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem)
+{
+    if (customMem.customAlloc) {
+        /* calloc implemented as malloc+memset;
+         * not as efficient as calloc, but next best guess for custom malloc */
+        void* const ptr = customMem.customAlloc(customMem.opaque, size);
+        ZSTD_memset(ptr, 0, size);
+        return ptr;
+    }
+    return ZSTD_calloc(1, size);
+}
+
+void ZSTD_customFree(void* ptr, ZSTD_customMem customMem)
+{
+    if (ptr!=NULL) {
+        if (customMem.customFree)
+            customMem.customFree(customMem.opaque, ptr);
+        else
+            ZSTD_free(ptr);
+    }
+}
+/**** ended inlining common/zstd_common.c ****/
+
+/**** start inlining compress/fse_compress.c ****/
+/* ******************************************************************
+ * FSE : Finite State Entropy encoder
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+/* **************************************************************
+*  Includes
+****************************************************************/
+/**** skipping file: ../common/compiler.h ****/
+/**** skipping file: ../common/mem.h ****/
+/**** skipping file: ../common/debug.h ****/
+/**** start inlining hist.h ****/
+/* ******************************************************************
+ * hist : Histogram functions
+ * part of Finite State Entropy project
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+/* --- dependencies --- */
+/**** skipping file: ../common/zstd_deps.h ****/
+
+
+/* --- simple histogram functions --- */
+
+/*! HIST_count():
+ *  Provides the precise count of each byte within a table 'count'.
+ * 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1).
+ *  Updates *maxSymbolValuePtr with actual largest symbol value detected.
+ * @return : count of the most frequent symbol (which isn't identified).
+ *           or an error code, which can be tested using HIST_isError().
+ *           note : if return == srcSize, there is only one symbol.
+ */
+size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
+                  const void* src, size_t srcSize);
+
+unsigned HIST_isError(size_t code);  /**< tells if a return value is an error code */
+
+
+/* --- advanced histogram functions --- */
+
+#define HIST_WKSP_SIZE_U32 1024
+#define HIST_WKSP_SIZE    (HIST_WKSP_SIZE_U32 * sizeof(unsigned))
+/** HIST_count_wksp() :
+ *  Same as HIST_count(), but using an externally provided scratch buffer.
+ *  Benefit is this function will use very little stack space.
+ * `workSpace` is a writable buffer which must be 4-bytes aligned,
+ * `workSpaceSize` must be >= HIST_WKSP_SIZE
+ */
+size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
+                       const void* src, size_t srcSize,
+                       void* workSpace, size_t workSpaceSize);
+
+/** HIST_countFast() :
+ *  same as HIST_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr.
+ *  This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr`
+ */
+size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
+                      const void* src, size_t srcSize);
+
+/** HIST_countFast_wksp() :
+ *  Same as HIST_countFast(), but using an externally provided scratch buffer.
+ * `workSpace` is a writable buffer which must be 4-bytes aligned,
+ * `workSpaceSize` must be >= HIST_WKSP_SIZE
+ */
+size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
+                           const void* src, size_t srcSize,
+                           void* workSpace, size_t workSpaceSize);
+
+/*! HIST_count_simple() :
+ *  Same as HIST_countFast(), this function is unsafe,
+ *  and will segfault if any value within `src` is `> *maxSymbolValuePtr`.
+ *  It is also a bit slower for large inputs.
+ *  However, it does not need any additional memory (not even on stack).
+ * @return : count of the most frequent symbol.
+ *  Note this function doesn't produce any error (i.e. it must succeed).
+ */
+unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
+                           const void* src, size_t srcSize);
+/**** ended inlining hist.h ****/
+/**** skipping file: ../common/bitstream.h ****/
+#define FSE_STATIC_LINKING_ONLY
+/**** skipping file: ../common/fse.h ****/
+/**** skipping file: ../common/error_private.h ****/
+#define ZSTD_DEPS_NEED_MALLOC
+#define ZSTD_DEPS_NEED_MATH64
+/**** skipping file: ../common/zstd_deps.h ****/
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define FSE_isError ERR_isError
+
+
+/* **************************************************************
+*  Templates
+****************************************************************/
+/*
+  designed to be included
+  for type-specific functions (template emulation in C)
+  Objective is to write these functions only once, for improved maintenance
+*/
+
+/* safety checks */
+#ifndef FSE_FUNCTION_EXTENSION
+#  error "FSE_FUNCTION_EXTENSION must be defined"
+#endif
+#ifndef FSE_FUNCTION_TYPE
+#  error "FSE_FUNCTION_TYPE must be defined"
+#endif
+
+/* Function names */
+#define FSE_CAT(X,Y) X##Y
+#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
+#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
+
+
+/* Function templates */
+
+/* FSE_buildCTable_wksp() :
+ * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
+ * wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)`
+ * workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements
+ */
+size_t FSE_buildCTable_wksp(FSE_CTable* ct,
+                      const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
+                            void* workSpace, size_t wkspSize)
+{
+    U32 const tableSize = 1 << tableLog;
+    U32 const tableMask = tableSize - 1;
+    void* const ptr = ct;
+    U16* const tableU16 = ( (U16*) ptr) + 2;
+    void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
+    FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
+    U32 const step = FSE_TABLESTEP(tableSize);
+
+    U32* cumul = (U32*)workSpace;
+    FSE_FUNCTION_TYPE* tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSymbolValue + 2));
+
+    U32 highThreshold = tableSize-1;
+
+    if ((size_t)workSpace & 3) return ERROR(GENERIC); /* Must be 4 byte aligned */
+    if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge);
+    /* CTable header */
+    tableU16[-2] = (U16) tableLog;
+    tableU16[-1] = (U16) maxSymbolValue;
+    assert(tableLog < 16);   /* required for threshold strategy to work */
+
+    /* For explanations on how to distribute symbol values over the table :
+     * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
+
+     #ifdef __clang_analyzer__
+     ZSTD_memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize);   /* useless initialization, just to keep scan-build happy */
+     #endif
+
+    /* symbol start positions */
+    {   U32 u;
+        cumul[0] = 0;
+        for (u=1; u <= maxSymbolValue+1; u++) {
+            if (normalizedCounter[u-1]==-1) {  /* Low proba symbol */
+                cumul[u] = cumul[u-1] + 1;
+                tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
+            } else {
+                cumul[u] = cumul[u-1] + normalizedCounter[u-1];
+        }   }
+        cumul[maxSymbolValue+1] = tableSize+1;
+    }
+
+    /* Spread symbols */
+    {   U32 position = 0;
+        U32 symbol;
+        for (symbol=0; symbol<=maxSymbolValue; symbol++) {
+            int nbOccurrences;
+            int const freq = normalizedCounter[symbol];
+            for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) {
+                tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
+                position = (position + step) & tableMask;
+                while (position > highThreshold)
+                    position = (position + step) & tableMask;   /* Low proba area */
+        }   }
+
+        assert(position==0);  /* Must have initialized all positions */
+    }
+
+    /* Build table */
+    {   U32 u; for (u=0; u<tableSize; u++) {
+        FSE_FUNCTION_TYPE s = tableSymbol[u];   /* note : static analyzer may not understand tableSymbol is properly initialized */
+        tableU16[cumul[s]++] = (U16) (tableSize+u);   /* TableU16 : sorted by symbol order; gives next state value */
+    }   }
+
+    /* Build Symbol Transformation Table */
+    {   unsigned total = 0;
+        unsigned s;
+        for (s=0; s<=maxSymbolValue; s++) {
+            switch (normalizedCounter[s])
+            {
+            case  0:
+                /* filling nonetheless, for compatibility with FSE_getMaxNbBits() */
+                symbolTT[s].deltaNbBits = ((tableLog+1) << 16) - (1<<tableLog);
+                break;
+
+            case -1:
+            case  1:
+                symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog);
+                symbolTT[s].deltaFindState = total - 1;
+                total ++;
+                break;
+            default :
+                {
+                    U32 const maxBitsOut = tableLog - BIT_highbit32 (normalizedCounter[s]-1);
+                    U32 const minStatePlus = normalizedCounter[s] << maxBitsOut;
+                    symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
+                    symbolTT[s].deltaFindState = total - normalizedCounter[s];
+                    total +=  normalizedCounter[s];
+    }   }   }   }
+
+#if 0  /* debug : symbol costs */
+    DEBUGLOG(5, "\n --- table statistics : ");
+    {   U32 symbol;
+        for (symbol=0; symbol<=maxSymbolValue; symbol++) {
+            DEBUGLOG(5, "%3u: w=%3i,   maxBits=%u, fracBits=%.2f",
+                symbol, normalizedCounter[symbol],
+                FSE_getMaxNbBits(symbolTT, symbol),
+                (double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256);
+        }
+    }
+#endif
+
+    return 0;
+}
+
+#ifndef ZSTD_NO_UNUSED_FUNCTIONS
+size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+    FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE];   /* memset() is not necessary, even if static analyzer complain about it */
+    return FSE_buildCTable_wksp(ct, normalizedCounter, maxSymbolValue, tableLog, tableSymbol, sizeof(tableSymbol));
+}
+#endif
+
+
+
+#ifndef FSE_COMMONDEFS_ONLY
+
+
+/*-**************************************************************
+*  FSE NCount encoding
+****************************************************************/
+size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
+{
+    size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3;
+    return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND;  /* maxSymbolValue==0 ? use default */
+}
+
+static size_t
+FSE_writeNCount_generic (void* header, size_t headerBufferSize,
+                   const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
+                         unsigned writeIsSafe)
+{
+    BYTE* const ostart = (BYTE*) header;
+    BYTE* out = ostart;
+    BYTE* const oend = ostart + headerBufferSize;
+    int nbBits;
+    const int tableSize = 1 << tableLog;
+    int remaining;
+    int threshold;
+    U32 bitStream = 0;
+    int bitCount = 0;
+    unsigned symbol = 0;
+    unsigned const alphabetSize = maxSymbolValue + 1;
+    int previousIs0 = 0;
+
+    /* Table Size */
+    bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;
+    bitCount  += 4;
+
+    /* Init */
+    remaining = tableSize+1;   /* +1 for extra accuracy */
+    threshold = tableSize;
+    nbBits = tableLog+1;
+
+    while ((symbol < alphabetSize) && (remaining>1)) {  /* stops at 1 */
+        if (previousIs0) {
+            unsigned start = symbol;
+            while ((symbol < alphabetSize) && !normalizedCounter[symbol]) symbol++;
+            if (symbol == alphabetSize) break;   /* incorrect distribution */
+            while (symbol >= start+24) {
+                start+=24;
+                bitStream += 0xFFFFU << bitCount;
+                if ((!writeIsSafe) && (out > oend-2))
+                    return ERROR(dstSize_tooSmall);   /* Buffer overflow */
+                out[0] = (BYTE) bitStream;
+                out[1] = (BYTE)(bitStream>>8);
+                out+=2;
+                bitStream>>=16;
+            }
+            while (symbol >= start+3) {
+                start+=3;
+                bitStream += 3 << bitCount;
+                bitCount += 2;
+            }
+            bitStream += (symbol-start) << bitCount;
+            bitCount += 2;
+            if (bitCount>16) {
+                if ((!writeIsSafe) && (out > oend - 2))
+                    return ERROR(dstSize_tooSmall);   /* Buffer overflow */
+                out[0] = (BYTE)bitStream;
+                out[1] = (BYTE)(bitStream>>8);
+                out += 2;
+                bitStream >>= 16;
+                bitCount -= 16;
+        }   }
+        {   int count = normalizedCounter[symbol++];
+            int const max = (2*threshold-1) - remaining;
+            remaining -= count < 0 ? -count : count;
+            count++;   /* +1 for extra accuracy */
+            if (count>=threshold)
+                count += max;   /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
+            bitStream += count << bitCount;
+            bitCount  += nbBits;
+            bitCount  -= (count<max);
+            previousIs0  = (count==1);
+            if (remaining<1) return ERROR(GENERIC);
+            while (remaining<threshold) { nbBits--; threshold>>=1; }
+        }
+        if (bitCount>16) {
+            if ((!writeIsSafe) && (out > oend - 2))
+                return ERROR(dstSize_tooSmall);   /* Buffer overflow */
+            out[0] = (BYTE)bitStream;
+            out[1] = (BYTE)(bitStream>>8);
+            out += 2;
+            bitStream >>= 16;
+            bitCount -= 16;
+    }   }
+
+    if (remaining != 1)
+        return ERROR(GENERIC);  /* incorrect normalized distribution */
+    assert(symbol <= alphabetSize);
+
+    /* flush remaining bitStream */
+    if ((!writeIsSafe) && (out > oend - 2))
+        return ERROR(dstSize_tooSmall);   /* Buffer overflow */
+    out[0] = (BYTE)bitStream;
+    out[1] = (BYTE)(bitStream>>8);
+    out+= (bitCount+7) /8;
+
+    return (out-ostart);
+}
+
+
+size_t FSE_writeNCount (void* buffer, size_t bufferSize,
+                  const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);   /* Unsupported */
+    if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC);   /* Unsupported */
+
+    if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
+        return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
+
+    return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1 /* write in buffer is safe */);
+}
+
+
+/*-**************************************************************
+*  FSE Compression Code
+****************************************************************/
+
+FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
+{
+    size_t size;
+    if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
+    size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
+    return (FSE_CTable*)ZSTD_malloc(size);
+}
+
+void FSE_freeCTable (FSE_CTable* ct) { ZSTD_free(ct); }
+
+/* provides the minimum logSize to safely represent a distribution */
+static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
+{
+    U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1;
+    U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
+    U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
+    assert(srcSize > 1); /* Not supported, RLE should be used instead */
+    return minBits;
+}
+
+unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
+{
+    U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
+    U32 tableLog = maxTableLog;
+    U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
+    assert(srcSize > 1); /* Not supported, RLE should be used instead */
+    if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
+    if (maxBitsSrc < tableLog) tableLog = maxBitsSrc;   /* Accuracy can be reduced */
+    if (minBits > tableLog) tableLog = minBits;   /* Need a minimum to safely represent all symbol values */
+    if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG;
+    if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG;
+    return tableLog;
+}
+
+unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
+{
+    return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2);
+}
+
+/* Secondary normalization method.
+   To be used when primary method fails. */
+
+static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue, short lowProbCount)
+{
+    short const NOT_YET_ASSIGNED = -2;
+    U32 s;
+    U32 distributed = 0;
+    U32 ToDistribute;
+
+    /* Init */
+    U32 const lowThreshold = (U32)(total >> tableLog);
+    U32 lowOne = (U32)((total * 3) >> (tableLog + 1));
+
+    for (s=0; s<=maxSymbolValue; s++) {
+        if (count[s] == 0) {
+            norm[s]=0;
+            continue;
+        }
+        if (count[s] <= lowThreshold) {
+            norm[s] = lowProbCount;
+            distributed++;
+            total -= count[s];
+            continue;
+        }
+        if (count[s] <= lowOne) {
+            norm[s] = 1;
+            distributed++;
+            total -= count[s];
+            continue;
+        }
+
+        norm[s]=NOT_YET_ASSIGNED;
+    }
+    ToDistribute = (1 << tableLog) - distributed;
+
+    if (ToDistribute == 0)
+        return 0;
+
+    if ((total / ToDistribute) > lowOne) {
+        /* risk of rounding to zero */
+        lowOne = (U32)((total * 3) / (ToDistribute * 2));
+        for (s=0; s<=maxSymbolValue; s++) {
+            if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) {
+                norm[s] = 1;
+                distributed++;
+                total -= count[s];
+                continue;
+        }   }
+        ToDistribute = (1 << tableLog) - distributed;
+    }
+
+    if (distributed == maxSymbolValue+1) {
+        /* all values are pretty poor;
+           probably incompressible data (should have already been detected);
+           find max, then give all remaining points to max */
+        U32 maxV = 0, maxC = 0;
+        for (s=0; s<=maxSymbolValue; s++)
+            if (count[s] > maxC) { maxV=s; maxC=count[s]; }
+        norm[maxV] += (short)ToDistribute;
+        return 0;
+    }
+
+    if (total == 0) {
+        /* all of the symbols were low enough for the lowOne or lowThreshold */
+        for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1))
+            if (norm[s] > 0) { ToDistribute--; norm[s]++; }
+        return 0;
+    }
+
+    {   U64 const vStepLog = 62 - tableLog;
+        U64 const mid = (1ULL << (vStepLog-1)) - 1;
+        U64 const rStep = ZSTD_div64((((U64)1<<vStepLog) * ToDistribute) + mid, (U32)total);   /* scale on remaining */
+        U64 tmpTotal = mid;
+        for (s=0; s<=maxSymbolValue; s++) {
+            if (norm[s]==NOT_YET_ASSIGNED) {
+                U64 const end = tmpTotal + (count[s] * rStep);
+                U32 const sStart = (U32)(tmpTotal >> vStepLog);
+                U32 const sEnd = (U32)(end >> vStepLog);
+                U32 const weight = sEnd - sStart;
+                if (weight < 1)
+                    return ERROR(GENERIC);
+                norm[s] = (short)weight;
+                tmpTotal = end;
+    }   }   }
+
+    return 0;
+}
+
+size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
+                           const unsigned* count, size_t total,
+                           unsigned maxSymbolValue, unsigned useLowProbCount)
+{
+    /* Sanity checks */
+    if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
+    if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC);   /* Unsupported size */
+    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);   /* Unsupported size */
+    if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC);   /* Too small tableLog, compression potentially impossible */
+
+    {   static U32 const rtbTable[] = {     0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
+        short const lowProbCount = useLowProbCount ? -1 : 1;
+        U64 const scale = 62 - tableLog;
+        U64 const step = ZSTD_div64((U64)1<<62, (U32)total);   /* <== here, one division ! */
+        U64 const vStep = 1ULL<<(scale-20);
+        int stillToDistribute = 1<<tableLog;
+        unsigned s;
+        unsigned largest=0;
+        short largestP=0;
+        U32 lowThreshold = (U32)(total >> tableLog);
+
+        for (s=0; s<=maxSymbolValue; s++) {
+            if (count[s] == total) return 0;   /* rle special case */
+            if (count[s] == 0) { normalizedCounter[s]=0; continue; }
+            if (count[s] <= lowThreshold) {
+                normalizedCounter[s] = lowProbCount;
+                stillToDistribute--;
+            } else {
+                short proba = (short)((count[s]*step) >> scale);
+                if (proba<8) {
+                    U64 restToBeat = vStep * rtbTable[proba];
+                    proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat;
+                }
+                if (proba > largestP) { largestP=proba; largest=s; }
+                normalizedCounter[s] = proba;
+                stillToDistribute -= proba;
+        }   }
+        if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {
+            /* corner case, need another normalization method */
+            size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue, lowProbCount);
+            if (FSE_isError(errorCode)) return errorCode;
+        }
+        else normalizedCounter[largest] += (short)stillToDistribute;
+    }
+
+#if 0
+    {   /* Print Table (debug) */
+        U32 s;
+        U32 nTotal = 0;
+        for (s=0; s<=maxSymbolValue; s++)
+            RAWLOG(2, "%3i: %4i \n", s, normalizedCounter[s]);
+        for (s=0; s<=maxSymbolValue; s++)
+            nTotal += abs(normalizedCounter[s]);
+        if (nTotal != (1U<<tableLog))
+            RAWLOG(2, "Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
+        getchar();
+    }
+#endif
+
+    return tableLog;
+}
+
+
+/* fake FSE_CTable, for raw (uncompressed) input */
+size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
+{
+    const unsigned tableSize = 1 << nbBits;
+    const unsigned tableMask = tableSize - 1;
+    const unsigned maxSymbolValue = tableMask;
+    void* const ptr = ct;
+    U16* const tableU16 = ( (U16*) ptr) + 2;
+    void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableSize>>1);   /* assumption : tableLog >= 1 */
+    FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
+    unsigned s;
+
+    /* Sanity checks */
+    if (nbBits < 1) return ERROR(GENERIC);             /* min size */
+
+    /* header */
+    tableU16[-2] = (U16) nbBits;
+    tableU16[-1] = (U16) maxSymbolValue;
+
+    /* Build table */
+    for (s=0; s<tableSize; s++)
+        tableU16[s] = (U16)(tableSize + s);
+
+    /* Build Symbol Transformation Table */
+    {   const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
+        for (s=0; s<=maxSymbolValue; s++) {
+            symbolTT[s].deltaNbBits = deltaNbBits;
+            symbolTT[s].deltaFindState = s-1;
+    }   }
+
+    return 0;
+}
+
+/* fake FSE_CTable, for rle input (always same symbol) */
+size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
+{
+    void* ptr = ct;
+    U16* tableU16 = ( (U16*) ptr) + 2;
+    void* FSCTptr = (U32*)ptr + 2;
+    FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) FSCTptr;
+
+    /* header */
+    tableU16[-2] = (U16) 0;
+    tableU16[-1] = (U16) symbolValue;
+
+    /* Build table */
+    tableU16[0] = 0;
+    tableU16[1] = 0;   /* just in case */
+
+    /* Build Symbol Transformation Table */
+    symbolTT[symbolValue].deltaNbBits = 0;
+    symbolTT[symbolValue].deltaFindState = 0;
+
+    return 0;
+}
+
+
+static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize,
+                           const void* src, size_t srcSize,
+                           const FSE_CTable* ct, const unsigned fast)
+{
+    const BYTE* const istart = (const BYTE*) src;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* ip=iend;
+
+    BIT_CStream_t bitC;
+    FSE_CState_t CState1, CState2;
+
+    /* init */
+    if (srcSize <= 2) return 0;
+    { size_t const initError = BIT_initCStream(&bitC, dst, dstSize);
+      if (FSE_isError(initError)) return 0; /* not enough space available to write a bitstream */ }
+
+#define FSE_FLUSHBITS(s)  (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
+
+    if (srcSize & 1) {
+        FSE_initCState2(&CState1, ct, *--ip);
+        FSE_initCState2(&CState2, ct, *--ip);
+        FSE_encodeSymbol(&bitC, &CState1, *--ip);
+        FSE_FLUSHBITS(&bitC);
+    } else {
+        FSE_initCState2(&CState2, ct, *--ip);
+        FSE_initCState2(&CState1, ct, *--ip);
+    }
+
+    /* join to mod 4 */
+    srcSize -= 2;
+    if ((sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) {  /* test bit 2 */
+        FSE_encodeSymbol(&bitC, &CState2, *--ip);
+        FSE_encodeSymbol(&bitC, &CState1, *--ip);
+        FSE_FLUSHBITS(&bitC);
+    }
+
+    /* 2 or 4 encoding per loop */
+    while ( ip>istart ) {
+
+        FSE_encodeSymbol(&bitC, &CState2, *--ip);
+
+        if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 )   /* this test must be static */
+            FSE_FLUSHBITS(&bitC);
+
+        FSE_encodeSymbol(&bitC, &CState1, *--ip);
+
+        if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) {  /* this test must be static */
+            FSE_encodeSymbol(&bitC, &CState2, *--ip);
+            FSE_encodeSymbol(&bitC, &CState1, *--ip);
+        }
+
+        FSE_FLUSHBITS(&bitC);
+    }
+
+    FSE_flushCState(&bitC, &CState2);
+    FSE_flushCState(&bitC, &CState1);
+    return BIT_closeCStream(&bitC);
+}
+
+size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
+                           const void* src, size_t srcSize,
+                           const FSE_CTable* ct)
+{
+    unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize));
+
+    if (fast)
+        return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1);
+    else
+        return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0);
+}
+
+
+size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
+
+#ifndef ZSTD_NO_UNUSED_FUNCTIONS
+/* FSE_compress_wksp() :
+ * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
+ * `wkspSize` size must be `(1<<tableLog)`.
+ */
+size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
+{
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + dstSize;
+
+    unsigned count[FSE_MAX_SYMBOL_VALUE+1];
+    S16   norm[FSE_MAX_SYMBOL_VALUE+1];
+    FSE_CTable* CTable = (FSE_CTable*)workSpace;
+    size_t const CTableSize = FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue);
+    void* scratchBuffer = (void*)(CTable + CTableSize);
+    size_t const scratchBufferSize = wkspSize - (CTableSize * sizeof(FSE_CTable));
+
+    /* init conditions */
+    if (wkspSize < FSE_COMPRESS_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge);
+    if (srcSize <= 1) return 0;  /* Not compressible */
+    if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
+    if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
+
+    /* Scan input and build symbol stats */
+    {   CHECK_V_F(maxCount, HIST_count_wksp(count, &maxSymbolValue, src, srcSize, scratchBuffer, scratchBufferSize) );
+        if (maxCount == srcSize) return 1;   /* only a single symbol in src : rle */
+        if (maxCount == 1) return 0;         /* each symbol present maximum once => not compressible */
+        if (maxCount < (srcSize >> 7)) return 0;   /* Heuristic : not compressible enough */
+    }
+
+    tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
+    CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue, /* useLowProbCount */ srcSize >= 2048) );
+
+    /* Write table description header */
+    {   CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
+        op += nc_err;
+    }
+
+    /* Compress */
+    CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, scratchBufferSize) );
+    {   CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, src, srcSize, CTable) );
+        if (cSize == 0) return 0;   /* not enough space for compressed data */
+        op += cSize;
+    }
+
+    /* check compressibility */
+    if ( (size_t)(op-ostart) >= srcSize-1 ) return 0;
+
+    return op-ostart;
+}
+
+typedef struct {
+    FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
+    union {
+      U32 hist_wksp[HIST_WKSP_SIZE_U32];
+      BYTE scratchBuffer[1 << FSE_MAX_TABLELOG];
+    } workspace;
+} fseWkspMax_t;
+
+size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
+{
+    fseWkspMax_t scratchBuffer;
+    DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_COMPRESS_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE));   /* compilation failures here means scratchBuffer is not large enough */
+    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
+    return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer));
+}
+
+size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
+}
+#endif
+
+#endif   /* FSE_COMMONDEFS_ONLY */
+/**** ended inlining compress/fse_compress.c ****/
+/**** start inlining compress/hist.c ****/
+/* ******************************************************************
+ * hist : Histogram functions
+ * part of Finite State Entropy project
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+/* --- dependencies --- */
+/**** skipping file: ../common/mem.h ****/
+/**** skipping file: ../common/debug.h ****/
+/**** skipping file: ../common/error_private.h ****/
+/**** skipping file: hist.h ****/
+
+
+/* --- Error management --- */
+unsigned HIST_isError(size_t code) { return ERR_isError(code); }
+
+/*-**************************************************************
+ *  Histogram functions
+ ****************************************************************/
+unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
+                           const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+    const BYTE* const end = ip + srcSize;
+    unsigned maxSymbolValue = *maxSymbolValuePtr;
+    unsigned largestCount=0;
+
+    ZSTD_memset(count, 0, (maxSymbolValue+1) * sizeof(*count));
+    if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
+
+    while (ip<end) {
+        assert(*ip <= maxSymbolValue);
+        count[*ip++]++;
+    }
+
+    while (!count[maxSymbolValue]) maxSymbolValue--;
+    *maxSymbolValuePtr = maxSymbolValue;
+
+    {   U32 s;
+        for (s=0; s<=maxSymbolValue; s++)
+            if (count[s] > largestCount) largestCount = count[s];
+    }
+
+    return largestCount;
+}
+
+typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e;
+
+/* HIST_count_parallel_wksp() :
+ * store histogram into 4 intermediate tables, recombined at the end.
+ * this design makes better use of OoO cpus,
+ * and is noticeably faster when some values are heavily repeated.
+ * But it needs some additional workspace for intermediate tables.
+ * `workSpace` must be a U32 table of size >= HIST_WKSP_SIZE_U32.
+ * @return : largest histogram frequency,
+ *           or an error code (notably when histogram's alphabet is larger than *maxSymbolValuePtr) */
+static size_t HIST_count_parallel_wksp(
+                                unsigned* count, unsigned* maxSymbolValuePtr,
+                                const void* source, size_t sourceSize,
+                                HIST_checkInput_e check,
+                                U32* const workSpace)
+{
+    const BYTE* ip = (const BYTE*)source;
+    const BYTE* const iend = ip+sourceSize;
+    size_t const countSize = (*maxSymbolValuePtr + 1) * sizeof(*count);
+    unsigned max=0;
+    U32* const Counting1 = workSpace;
+    U32* const Counting2 = Counting1 + 256;
+    U32* const Counting3 = Counting2 + 256;
+    U32* const Counting4 = Counting3 + 256;
+
+    /* safety checks */
+    assert(*maxSymbolValuePtr <= 255);
+    if (!sourceSize) {
+        ZSTD_memset(count, 0, countSize);
+        *maxSymbolValuePtr = 0;
+        return 0;
+    }
+    ZSTD_memset(workSpace, 0, 4*256*sizeof(unsigned));
+
+    /* by stripes of 16 bytes */
+    {   U32 cached = MEM_read32(ip); ip += 4;
+        while (ip < iend-15) {
+            U32 c = cached; cached = MEM_read32(ip); ip += 4;
+            Counting1[(BYTE) c     ]++;
+            Counting2[(BYTE)(c>>8) ]++;
+            Counting3[(BYTE)(c>>16)]++;
+            Counting4[       c>>24 ]++;
+            c = cached; cached = MEM_read32(ip); ip += 4;
+            Counting1[(BYTE) c     ]++;
+            Counting2[(BYTE)(c>>8) ]++;
+            Counting3[(BYTE)(c>>16)]++;
+            Counting4[       c>>24 ]++;
+            c = cached; cached = MEM_read32(ip); ip += 4;
+            Counting1[(BYTE) c     ]++;
+            Counting2[(BYTE)(c>>8) ]++;
+            Counting3[(BYTE)(c>>16)]++;
+            Counting4[       c>>24 ]++;
+            c = cached; cached = MEM_read32(ip); ip += 4;
+            Counting1[(BYTE) c     ]++;
+            Counting2[(BYTE)(c>>8) ]++;
+            Counting3[(BYTE)(c>>16)]++;
+            Counting4[       c>>24 ]++;
+        }
+        ip-=4;
+    }
+
+    /* finish last symbols */
+    while (ip<iend) Counting1[*ip++]++;
+
+    {   U32 s;
+        for (s=0; s<256; s++) {
+            Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
+            if (Counting1[s] > max) max = Counting1[s];
+    }   }
+
+    {   unsigned maxSymbolValue = 255;
+        while (!Counting1[maxSymbolValue]) maxSymbolValue--;
+        if (check && maxSymbolValue > *maxSymbolValuePtr) return ERROR(maxSymbolValue_tooSmall);
+        *maxSymbolValuePtr = maxSymbolValue;
+        ZSTD_memmove(count, Counting1, countSize);   /* in case count & Counting1 are overlapping */
+    }
+    return (size_t)max;
+}
+
+/* HIST_countFast_wksp() :
+ * Same as HIST_countFast(), but using an externally provided scratch buffer.
+ * `workSpace` is a writable buffer which must be 4-bytes aligned,
+ * `workSpaceSize` must be >= HIST_WKSP_SIZE
+ */
+size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
+                          const void* source, size_t sourceSize,
+                          void* workSpace, size_t workSpaceSize)
+{
+    if (sourceSize < 1500) /* heuristic threshold */
+        return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize);
+    if ((size_t)workSpace & 3) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
+    if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
+    return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace);
+}
+
+/* HIST_count_wksp() :
+ * Same as HIST_count(), but using an externally provided scratch buffer.
+ * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
+size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
+                       const void* source, size_t sourceSize,
+                       void* workSpace, size_t workSpaceSize)
+{
+    if ((size_t)workSpace & 3) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
+    if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
+    if (*maxSymbolValuePtr < 255)
+        return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, checkMaxSymbolValue, (U32*)workSpace);
+    *maxSymbolValuePtr = 255;
+    return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize);
+}
+
+#ifndef ZSTD_NO_UNUSED_FUNCTIONS
+/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
+size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
+                     const void* source, size_t sourceSize)
+{
+    unsigned tmpCounters[HIST_WKSP_SIZE_U32];
+    return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters));
+}
+
+size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
+                 const void* src, size_t srcSize)
+{
+    unsigned tmpCounters[HIST_WKSP_SIZE_U32];
+    return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters, sizeof(tmpCounters));
+}
+#endif
+/**** ended inlining compress/hist.c ****/
+/**** start inlining compress/huf_compress.c ****/
+/* ******************************************************************
+ * Huffman encoder, part of New Generation Entropy library
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+/* **************************************************************
+*  Compiler specifics
+****************************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#endif
+
+
+/* **************************************************************
+*  Includes
+****************************************************************/
+/**** skipping file: ../common/zstd_deps.h ****/
+/**** skipping file: ../common/compiler.h ****/
+/**** skipping file: ../common/bitstream.h ****/
+/**** skipping file: hist.h ****/
+#define FSE_STATIC_LINKING_ONLY   /* FSE_optimalTableLog_internal */
+/**** skipping file: ../common/fse.h ****/
+#define HUF_STATIC_LINKING_ONLY
+/**** skipping file: ../common/huf.h ****/
+/**** skipping file: ../common/error_private.h ****/
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define HUF_isError ERR_isError
+#define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)   /* use only *after* variable declarations */
+
+
+/* **************************************************************
+*  Utils
+****************************************************************/
+unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
+{
+    return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
+}
+
+
+/* *******************************************************
+*  HUF : Huffman block compression
+*********************************************************/
+/* HUF_compressWeights() :
+ * Same as FSE_compress(), but dedicated to huff0's weights compression.
+ * The use case needs much less stack memory.
+ * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
+ */
+#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
+
+typedef struct {
+    FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
+    U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)];
+    unsigned count[HUF_TABLELOG_MAX+1];
+    S16 norm[HUF_TABLELOG_MAX+1];
+} HUF_CompressWeightsWksp;
+
+static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightTable, size_t wtSize, void* workspace, size_t workspaceSize)
+{
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + dstSize;
+
+    unsigned maxSymbolValue = HUF_TABLELOG_MAX;
+    U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
+    HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)workspace;
+
+    if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC);
+
+    /* init conditions */
+    if (wtSize <= 1) return 0;  /* Not compressible */
+
+    /* Scan input and build symbol stats */
+    {   unsigned const maxCount = HIST_count_simple(wksp->count, &maxSymbolValue, weightTable, wtSize);   /* never fails */
+        if (maxCount == wtSize) return 1;   /* only a single symbol in src : rle */
+        if (maxCount == 1) return 0;        /* each symbol present maximum once => not compressible */
+    }
+
+    tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
+    CHECK_F( FSE_normalizeCount(wksp->norm, tableLog, wksp->count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) );
+
+    /* Write table description header */
+    {   CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), wksp->norm, maxSymbolValue, tableLog) );
+        op += hSize;
+    }
+
+    /* Compress */
+    CHECK_F( FSE_buildCTable_wksp(wksp->CTable, wksp->norm, maxSymbolValue, tableLog, wksp->scratchBuffer, sizeof(wksp->scratchBuffer)) );
+    {   CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, wksp->CTable) );
+        if (cSize == 0) return 0;   /* not enough space for compressed data */
+        op += cSize;
+    }
+
+    return (size_t)(op-ostart);
+}
+
+
+typedef struct {
+    HUF_CompressWeightsWksp wksp;
+    BYTE bitsToWeight[HUF_TABLELOG_MAX + 1];   /* precomputed conversion table */
+    BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
+} HUF_WriteCTableWksp;
+
+size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
+                            const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog,
+                            void* workspace, size_t workspaceSize)
+{
+    BYTE* op = (BYTE*)dst;
+    U32 n;
+    HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)workspace;
+
+    /* check conditions */
+    if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);
+    if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
+
+    /* convert to weight */
+    wksp->bitsToWeight[0] = 0;
+    for (n=1; n<huffLog+1; n++)
+        wksp->bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
+    for (n=0; n<maxSymbolValue; n++)
+        wksp->huffWeight[n] = wksp->bitsToWeight[CTable[n].nbBits];
+
+    /* attempt weights compression by FSE */
+    {   CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, wksp->huffWeight, maxSymbolValue, &wksp->wksp, sizeof(wksp->wksp)) );
+        if ((hSize>1) & (hSize < maxSymbolValue/2)) {   /* FSE compressed */
+            op[0] = (BYTE)hSize;
+            return hSize+1;
+    }   }
+
+    /* write raw values as 4-bits (max : 15) */
+    if (maxSymbolValue > (256-128)) return ERROR(GENERIC);   /* should not happen : likely means source cannot be compressed */
+    if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall);   /* not enough space within dst buffer */
+    op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));
+    wksp->huffWeight[maxSymbolValue] = 0;   /* to be sure it doesn't cause msan issue in final combination */
+    for (n=0; n<maxSymbolValue; n+=2)
+        op[(n/2)+1] = (BYTE)((wksp->huffWeight[n] << 4) + wksp->huffWeight[n+1]);
+    return ((maxSymbolValue+1)/2) + 1;
+}
+
+/*! HUF_writeCTable() :
+    `CTable` : Huffman tree to save, using huf representation.
+    @return : size of saved CTable */
+size_t HUF_writeCTable (void* dst, size_t maxDstSize,
+                        const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
+{
+    HUF_WriteCTableWksp wksp;
+    return HUF_writeCTable_wksp(dst, maxDstSize, CTable, maxSymbolValue, huffLog, &wksp, sizeof(wksp));
+}
+
+
+size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights)
+{
+    BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];   /* init not required, even though some static analyzer may complain */
+    U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];   /* large enough for values from 0 to 16 */
+    U32 tableLog = 0;
+    U32 nbSymbols = 0;
+
+    /* get symbol weights */
+    CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));
+    *hasZeroWeights = (rankVal[0] > 0);
+
+    /* check result */
+    if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
+    if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
+
+    /* Prepare base value per rank */
+    {   U32 n, nextRankStart = 0;
+        for (n=1; n<=tableLog; n++) {
+            U32 curr = nextRankStart;
+            nextRankStart += (rankVal[n] << (n-1));
+            rankVal[n] = curr;
+    }   }
+
+    /* fill nbBits */
+    {   U32 n; for (n=0; n<nbSymbols; n++) {
+            const U32 w = huffWeight[n];
+            CTable[n].nbBits = (BYTE)(tableLog + 1 - w) & -(w != 0);
+    }   }
+
+    /* fill val */
+    {   U16 nbPerRank[HUF_TABLELOG_MAX+2]  = {0};  /* support w=0=>n=tableLog+1 */
+        U16 valPerRank[HUF_TABLELOG_MAX+2] = {0};
+        { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; }
+        /* determine stating value per rank */
+        valPerRank[tableLog+1] = 0;   /* for w==0 */
+        {   U16 min = 0;
+            U32 n; for (n=tableLog; n>0; n--) {  /* start at n=tablelog <-> w=1 */
+                valPerRank[n] = min;     /* get starting value within each rank */
+                min += nbPerRank[n];
+                min >>= 1;
+        }   }
+        /* assign value within rank, symbol order */
+        { U32 n; for (n=0; n<nbSymbols; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; }
+    }
+
+    *maxSymbolValuePtr = nbSymbols - 1;
+    return readSize;
+}
+
+U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue)
+{
+    const HUF_CElt* table = (const HUF_CElt*)symbolTable;
+    assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
+    return table[symbolValue].nbBits;
+}
+
+
+typedef struct nodeElt_s {
+    U32 count;
+    U16 parent;
+    BYTE byte;
+    BYTE nbBits;
+} nodeElt;
+
+/**
+ * HUF_setMaxHeight():
+ * Enforces maxNbBits on the Huffman tree described in huffNode.
+ *
+ * It sets all nodes with nbBits > maxNbBits to be maxNbBits. Then it adjusts
+ * the tree to so that it is a valid canonical Huffman tree.
+ *
+ * @pre               The sum of the ranks of each symbol == 2^largestBits,
+ *                    where largestBits == huffNode[lastNonNull].nbBits.
+ * @post              The sum of the ranks of each symbol == 2^largestBits,
+ *                    where largestBits is the return value <= maxNbBits.
+ *
+ * @param huffNode    The Huffman tree modified in place to enforce maxNbBits.
+ * @param lastNonNull The symbol with the lowest count in the Huffman tree.
+ * @param maxNbBits   The maximum allowed number of bits, which the Huffman tree
+ *                    may not respect. After this function the Huffman tree will
+ *                    respect maxNbBits.
+ * @return            The maximum number of bits of the Huffman tree after adjustment,
+ *                    necessarily no more than maxNbBits.
+ */
+static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
+{
+    const U32 largestBits = huffNode[lastNonNull].nbBits;
+    /* early exit : no elt > maxNbBits, so the tree is already valid. */
+    if (largestBits <= maxNbBits) return largestBits;
+
+    /* there are several too large elements (at least >= 2) */
+    {   int totalCost = 0;
+        const U32 baseCost = 1 << (largestBits - maxNbBits);
+        int n = (int)lastNonNull;
+
+        /* Adjust any ranks > maxNbBits to maxNbBits.
+         * Compute totalCost, which is how far the sum of the ranks is
+         * we are over 2^largestBits after adjust the offending ranks.
+         */
+        while (huffNode[n].nbBits > maxNbBits) {
+            totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
+            huffNode[n].nbBits = (BYTE)maxNbBits;
+            n--;
+        }
+        /* n stops at huffNode[n].nbBits <= maxNbBits */
+        assert(huffNode[n].nbBits <= maxNbBits);
+        /* n end at index of smallest symbol using < maxNbBits */
+        while (huffNode[n].nbBits == maxNbBits) --n;
+
+        /* renorm totalCost from 2^largestBits to 2^maxNbBits
+         * note : totalCost is necessarily a multiple of baseCost */
+        assert((totalCost & (baseCost - 1)) == 0);
+        totalCost >>= (largestBits - maxNbBits);
+        assert(totalCost > 0);
+
+        /* repay normalized cost */
+        {   U32 const noSymbol = 0xF0F0F0F0;
+            U32 rankLast[HUF_TABLELOG_MAX+2];
+
+            /* Get pos of last (smallest = lowest cum. count) symbol per rank */
+            ZSTD_memset(rankLast, 0xF0, sizeof(rankLast));
+            {   U32 currentNbBits = maxNbBits;
+                int pos;
+                for (pos=n ; pos >= 0; pos--) {
+                    if (huffNode[pos].nbBits >= currentNbBits) continue;
+                    currentNbBits = huffNode[pos].nbBits;   /* < maxNbBits */
+                    rankLast[maxNbBits-currentNbBits] = (U32)pos;
+            }   }
+
+            while (totalCost > 0) {
+                /* Try to reduce the next power of 2 above totalCost because we
+                 * gain back half the rank.
+                 */
+                U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1;
+                for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
+                    U32 const highPos = rankLast[nBitsToDecrease];
+                    U32 const lowPos = rankLast[nBitsToDecrease-1];
+                    if (highPos == noSymbol) continue;
+                    /* Decrease highPos if no symbols of lowPos or if it is
+                     * not cheaper to remove 2 lowPos than highPos.
+                     */
+                    if (lowPos == noSymbol) break;
+                    {   U32 const highTotal = huffNode[highPos].count;
+                        U32 const lowTotal = 2 * huffNode[lowPos].count;
+                        if (highTotal <= lowTotal) break;
+                }   }
+                /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
+                assert(rankLast[nBitsToDecrease] != noSymbol || nBitsToDecrease == 1);
+                /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
+                while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))
+                    nBitsToDecrease++;
+                assert(rankLast[nBitsToDecrease] != noSymbol);
+                /* Increase the number of bits to gain back half the rank cost. */
+                totalCost -= 1 << (nBitsToDecrease-1);
+                huffNode[rankLast[nBitsToDecrease]].nbBits++;
+
+                /* Fix up the new rank.
+                 * If the new rank was empty, this symbol is now its smallest.
+                 * Otherwise, this symbol will be the largest in the new rank so no adjustment.
+                 */
+                if (rankLast[nBitsToDecrease-1] == noSymbol)
+                    rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease];
+                /* Fix up the old rank.
+                 * If the symbol was at position 0, meaning it was the highest weight symbol in the tree,
+                 * it must be the only symbol in its rank, so the old rank now has no symbols.
+                 * Otherwise, since the Huffman nodes are sorted by count, the previous position is now
+                 * the smallest node in the rank. If the previous position belongs to a different rank,
+                 * then the rank is now empty.
+                 */
+                if (rankLast[nBitsToDecrease] == 0)    /* special case, reached largest symbol */
+                    rankLast[nBitsToDecrease] = noSymbol;
+                else {
+                    rankLast[nBitsToDecrease]--;
+                    if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
+                        rankLast[nBitsToDecrease] = noSymbol;   /* this rank is now empty */
+                }
+            }   /* while (totalCost > 0) */
+
+            /* If we've removed too much weight, then we have to add it back.
+             * To avoid overshooting again, we only adjust the smallest rank.
+             * We take the largest nodes from the lowest rank 0 and move them
+             * to rank 1. There's guaranteed to be enough rank 0 symbols because
+             * TODO.
+             */
+            while (totalCost < 0) {  /* Sometimes, cost correction overshoot */
+                /* special case : no rank 1 symbol (using maxNbBits-1);
+                 * let's create one from largest rank 0 (using maxNbBits).
+                 */
+                if (rankLast[1] == noSymbol) {
+                    while (huffNode[n].nbBits == maxNbBits) n--;
+                    huffNode[n+1].nbBits--;
+                    assert(n >= 0);
+                    rankLast[1] = (U32)(n+1);
+                    totalCost++;
+                    continue;
+                }
+                huffNode[ rankLast[1] + 1 ].nbBits--;
+                rankLast[1]++;
+                totalCost ++;
+            }
+        }   /* repay normalized cost */
+    }   /* there are several too large elements (at least >= 2) */
+
+    return maxNbBits;
+}
+
+typedef struct {
+    U32 base;
+    U32 curr;
+} rankPos;
+
+typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
+
+#define RANK_POSITION_TABLE_SIZE 32
+
+typedef struct {
+  huffNodeTable huffNodeTbl;
+  rankPos rankPosition[RANK_POSITION_TABLE_SIZE];
+} HUF_buildCTable_wksp_tables;
+
+/**
+ * HUF_sort():
+ * Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order.
+ *
+ * @param[out] huffNode       Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled.
+ *                            Must have (maxSymbolValue + 1) entries.
+ * @param[in]  count          Histogram of the symbols.
+ * @param[in]  maxSymbolValue Maximum symbol value.
+ * @param      rankPosition   This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries.
+ */
+static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue, rankPos* rankPosition)
+{
+    int n;
+    int const maxSymbolValue1 = (int)maxSymbolValue + 1;
+
+    /* Compute base and set curr to base.
+     * For symbol s let lowerRank = BIT_highbit32(count[n]+1) and rank = lowerRank + 1.
+     * Then 2^lowerRank <= count[n]+1 <= 2^rank.
+     * We attribute each symbol to lowerRank's base value, because we want to know where
+     * each rank begins in the output, so for rank R we want to count ranks R+1 and above.
+     */
+    ZSTD_memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE);
+    for (n = 0; n < maxSymbolValue1; ++n) {
+        U32 lowerRank = BIT_highbit32(count[n] + 1);
+        rankPosition[lowerRank].base++;
+    }
+    assert(rankPosition[RANK_POSITION_TABLE_SIZE - 1].base == 0);
+    for (n = RANK_POSITION_TABLE_SIZE - 1; n > 0; --n) {
+        rankPosition[n-1].base += rankPosition[n].base;
+        rankPosition[n-1].curr = rankPosition[n-1].base;
+    }
+    /* Sort */
+    for (n = 0; n < maxSymbolValue1; ++n) {
+        U32 const c = count[n];
+        U32 const r = BIT_highbit32(c+1) + 1;
+        U32 pos = rankPosition[r].curr++;
+        /* Insert into the correct position in the rank.
+         * We have at most 256 symbols, so this insertion should be fine.
+         */
+        while ((pos > rankPosition[r].base) && (c > huffNode[pos-1].count)) {
+            huffNode[pos] = huffNode[pos-1];
+            pos--;
+        }
+        huffNode[pos].count = c;
+        huffNode[pos].byte  = (BYTE)n;
+    }
+}
+
+
+/** HUF_buildCTable_wksp() :
+ *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
+ *  `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables).
+ */
+#define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
+
+/* HUF_buildTree():
+ * Takes the huffNode array sorted by HUF_sort() and builds an unlimited-depth Huffman tree.
+ *
+ * @param huffNode        The array sorted by HUF_sort(). Builds the Huffman tree in this array.
+ * @param maxSymbolValue  The maximum symbol value.
+ * @return                The smallest node in the Huffman tree (by count).
+ */
+static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
+{
+    nodeElt* const huffNode0 = huffNode - 1;
+    int nonNullRank;
+    int lowS, lowN;
+    int nodeNb = STARTNODE;
+    int n, nodeRoot;
+    /* init for parents */
+    nonNullRank = (int)maxSymbolValue;
+    while(huffNode[nonNullRank].count == 0) nonNullRank--;
+    lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb;
+    huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count;
+    huffNode[lowS].parent = huffNode[lowS-1].parent = (U16)nodeNb;
+    nodeNb++; lowS-=2;
+    for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);
+    huffNode0[0].count = (U32)(1U<<31);  /* fake entry, strong barrier */
+
+    /* create parents */
+    while (nodeNb <= nodeRoot) {
+        int const n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
+        int const n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
+        huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;
+        huffNode[n1].parent = huffNode[n2].parent = (U16)nodeNb;
+        nodeNb++;
+    }
+
+    /* distribute weights (unlimited tree height) */
+    huffNode[nodeRoot].nbBits = 0;
+    for (n=nodeRoot-1; n>=STARTNODE; n--)
+        huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
+    for (n=0; n<=nonNullRank; n++)
+        huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
+
+    return nonNullRank;
+}
+
+/**
+ * HUF_buildCTableFromTree():
+ * Build the CTable given the Huffman tree in huffNode.
+ *
+ * @param[out] CTable         The output Huffman CTable.
+ * @param      huffNode       The Huffman tree.
+ * @param      nonNullRank    The last and smallest node in the Huffman tree.
+ * @param      maxSymbolValue The maximum symbol value.
+ * @param      maxNbBits      The exact maximum number of bits used in the Huffman tree.
+ */
+static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits)
+{
+    /* fill result into ctable (val, nbBits) */
+    int n;
+    U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
+    U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
+    int const alphabetSize = (int)(maxSymbolValue + 1);
+    for (n=0; n<=nonNullRank; n++)
+        nbPerRank[huffNode[n].nbBits]++;
+    /* determine starting value per rank */
+    {   U16 min = 0;
+        for (n=(int)maxNbBits; n>0; n--) {
+            valPerRank[n] = min;      /* get starting value within each rank */
+            min += nbPerRank[n];
+            min >>= 1;
+    }   }
+    for (n=0; n<alphabetSize; n++)
+        CTable[huffNode[n].byte].nbBits = huffNode[n].nbBits;   /* push nbBits per symbol, symbol order */
+    for (n=0; n<alphabetSize; n++)
+        CTable[n].val = valPerRank[CTable[n].nbBits]++;   /* assign value within rank, symbol order */
+}
+
+size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
+{
+    HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)workSpace;
+    nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
+    nodeElt* const huffNode = huffNode0+1;
+    int nonNullRank;
+
+    /* safety checks */
+    if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
+    if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
+      return ERROR(workSpace_tooSmall);
+    if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
+    if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
+      return ERROR(maxSymbolValue_tooLarge);
+    ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable));
+
+    /* sort, decreasing order */
+    HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition);
+
+    /* build tree */
+    nonNullRank = HUF_buildTree(huffNode, maxSymbolValue);
+
+    /* enforce maxTableLog */
+    maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
+    if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC);   /* check fit into table */
+
+    HUF_buildCTableFromTree(tree, huffNode, nonNullRank, maxSymbolValue, maxNbBits);
+
+    return maxNbBits;
+}
+
+size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
+{
+    size_t nbBits = 0;
+    int s;
+    for (s = 0; s <= (int)maxSymbolValue; ++s) {
+        nbBits += CTable[s].nbBits * count[s];
+    }
+    return nbBits >> 3;
+}
+
+int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
+  int bad = 0;
+  int s;
+  for (s = 0; s <= (int)maxSymbolValue; ++s) {
+    bad |= (count[s] != 0) & (CTable[s].nbBits == 0);
+  }
+  return !bad;
+}
+
+size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
+
+FORCE_INLINE_TEMPLATE void
+HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
+{
+    BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
+}
+
+#define HUF_FLUSHBITS(s)  BIT_flushBits(s)
+
+#define HUF_FLUSHBITS_1(stream) \
+    if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream)
+
+#define HUF_FLUSHBITS_2(stream) \
+    if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream)
+
+FORCE_INLINE_TEMPLATE size_t
+HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
+                                   const void* src, size_t srcSize,
+                                   const HUF_CElt* CTable)
+{
+    const BYTE* ip = (const BYTE*) src;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstSize;
+    BYTE* op = ostart;
+    size_t n;
+    BIT_CStream_t bitC;
+
+    /* init */
+    if (dstSize < 8) return 0;   /* not enough space to compress */
+    { size_t const initErr = BIT_initCStream(&bitC, op, (size_t)(oend-op));
+      if (HUF_isError(initErr)) return 0; }
+
+    n = srcSize & ~3;  /* join to mod 4 */
+    switch (srcSize & 3)
+    {
+        case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable);
+                 HUF_FLUSHBITS_2(&bitC);
+		 /* fall-through */
+        case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable);
+                 HUF_FLUSHBITS_1(&bitC);
+		 /* fall-through */
+        case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable);
+                 HUF_FLUSHBITS(&bitC);
+		 /* fall-through */
+        case 0 : /* fall-through */
+        default: break;
+    }
+
+    for (; n>0; n-=4) {  /* note : n&3==0 at this stage */
+        HUF_encodeSymbol(&bitC, ip[n- 1], CTable);
+        HUF_FLUSHBITS_1(&bitC);
+        HUF_encodeSymbol(&bitC, ip[n- 2], CTable);
+        HUF_FLUSHBITS_2(&bitC);
+        HUF_encodeSymbol(&bitC, ip[n- 3], CTable);
+        HUF_FLUSHBITS_1(&bitC);
+        HUF_encodeSymbol(&bitC, ip[n- 4], CTable);
+        HUF_FLUSHBITS(&bitC);
+    }
+
+    return BIT_closeCStream(&bitC);
+}
+
+#if DYNAMIC_BMI2
+
+static TARGET_ATTRIBUTE("bmi2") size_t
+HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
+                                   const void* src, size_t srcSize,
+                                   const HUF_CElt* CTable)
+{
+    return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
+}
+
+static size_t
+HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize,
+                                      const void* src, size_t srcSize,
+                                      const HUF_CElt* CTable)
+{
+    return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
+}
+
+static size_t
+HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
+                              const void* src, size_t srcSize,
+                              const HUF_CElt* CTable, const int bmi2)
+{
+    if (bmi2) {
+        return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable);
+    }
+    return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable);
+}
+
+#else
+
+static size_t
+HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
+                              const void* src, size_t srcSize,
+                              const HUF_CElt* CTable, const int bmi2)
+{
+    (void)bmi2;
+    return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
+}
+
+#endif
+
+size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
+{
+    return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
+}
+
+
+static size_t
+HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
+                              const void* src, size_t srcSize,
+                              const HUF_CElt* CTable, int bmi2)
+{
+    size_t const segmentSize = (srcSize+3)/4;   /* first 3 segments */
+    const BYTE* ip = (const BYTE*) src;
+    const BYTE* const iend = ip + srcSize;
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* const oend = ostart + dstSize;
+    BYTE* op = ostart;
+
+    if (dstSize < 6 + 1 + 1 + 1 + 8) return 0;   /* minimum space to compress successfully */
+    if (srcSize < 12) return 0;   /* no saving possible : too small input */
+    op += 6;   /* jumpTable */
+
+    assert(op <= oend);
+    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
+        if (cSize==0) return 0;
+        assert(cSize <= 65535);
+        MEM_writeLE16(ostart, (U16)cSize);
+        op += cSize;
+    }
+
+    ip += segmentSize;
+    assert(op <= oend);
+    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
+        if (cSize==0) return 0;
+        assert(cSize <= 65535);
+        MEM_writeLE16(ostart+2, (U16)cSize);
+        op += cSize;
+    }
+
+    ip += segmentSize;
+    assert(op <= oend);
+    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
+        if (cSize==0) return 0;
+        assert(cSize <= 65535);
+        MEM_writeLE16(ostart+4, (U16)cSize);
+        op += cSize;
+    }
+
+    ip += segmentSize;
+    assert(op <= oend);
+    assert(ip <= iend);
+    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) );
+        if (cSize==0) return 0;
+        op += cSize;
+    }
+
+    return (size_t)(op-ostart);
+}
+
+size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
+{
+    return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
+}
+
+typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
+
+static size_t HUF_compressCTable_internal(
+                BYTE* const ostart, BYTE* op, BYTE* const oend,
+                const void* src, size_t srcSize,
+                HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2)
+{
+    size_t const cSize = (nbStreams==HUF_singleStream) ?
+                         HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) :
+                         HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2);
+    if (HUF_isError(cSize)) { return cSize; }
+    if (cSize==0) { return 0; }   /* uncompressible */
+    op += cSize;
+    /* check compressibility */
+    assert(op >= ostart);
+    if ((size_t)(op-ostart) >= srcSize-1) { return 0; }
+    return (size_t)(op-ostart);
+}
+
+typedef struct {
+    unsigned count[HUF_SYMBOLVALUE_MAX + 1];
+    HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
+    union {
+        HUF_buildCTable_wksp_tables buildCTable_wksp;
+        HUF_WriteCTableWksp writeCTable_wksp;
+    } wksps;
+} HUF_compress_tables_t;
+
+/* HUF_compress_internal() :
+ * `workSpace_align4` must be aligned on 4-bytes boundaries,
+ * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U32 unsigned */
+static size_t
+HUF_compress_internal (void* dst, size_t dstSize,
+                 const void* src, size_t srcSize,
+                       unsigned maxSymbolValue, unsigned huffLog,
+                       HUF_nbStreams_e nbStreams,
+                       void* workSpace_align4, size_t wkspSize,
+                       HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
+                 const int bmi2)
+{
+    HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace_align4;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstSize;
+    BYTE* op = ostart;
+
+    HUF_STATIC_ASSERT(sizeof(*table) <= HUF_WORKSPACE_SIZE);
+    assert(((size_t)workSpace_align4 & 3) == 0);   /* must be aligned on 4-bytes boundaries */
+
+    /* checks & inits */
+    if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall);
+    if (!srcSize) return 0;  /* Uncompressed */
+    if (!dstSize) return 0;  /* cannot fit anything within dst budget */
+    if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);   /* current block size limit */
+    if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
+    if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
+    if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX;
+    if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
+
+    /* Heuristic : If old table is valid, use it for small inputs */
+    if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
+        return HUF_compressCTable_internal(ostart, op, oend,
+                                           src, srcSize,
+                                           nbStreams, oldHufTable, bmi2);
+    }
+
+    /* Scan input and build symbol stats */
+    {   CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace_align4, wkspSize) );
+        if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; }   /* single symbol, rle */
+        if (largest <= (srcSize >> 7)+4) return 0;   /* heuristic : probably not compressible enough */
+    }
+
+    /* Check validity of previous table */
+    if ( repeat
+      && *repeat == HUF_repeat_check
+      && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) {
+        *repeat = HUF_repeat_none;
+    }
+    /* Heuristic : use existing table for small inputs */
+    if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
+        return HUF_compressCTable_internal(ostart, op, oend,
+                                           src, srcSize,
+                                           nbStreams, oldHufTable, bmi2);
+    }
+
+    /* Build Huffman Tree */
+    huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
+    {   size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
+                                            maxSymbolValue, huffLog,
+                                            &table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp));
+        CHECK_F(maxBits);
+        huffLog = (U32)maxBits;
+        /* Zero unused symbols in CTable, so we can check it for validity */
+        ZSTD_memset(table->CTable + (maxSymbolValue + 1), 0,
+               sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt)));
+    }
+
+    /* Write table description header */
+    {   CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog,
+                                              &table->wksps.writeCTable_wksp, sizeof(table->wksps.writeCTable_wksp)) );
+        /* Check if using previous huffman table is beneficial */
+        if (repeat && *repeat != HUF_repeat_none) {
+            size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue);
+            size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue);
+            if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
+                return HUF_compressCTable_internal(ostart, op, oend,
+                                                   src, srcSize,
+                                                   nbStreams, oldHufTable, bmi2);
+        }   }
+
+        /* Use the new huffman table */
+        if (hSize + 12ul >= srcSize) { return 0; }
+        op += hSize;
+        if (repeat) { *repeat = HUF_repeat_none; }
+        if (oldHufTable)
+            ZSTD_memcpy(oldHufTable, table->CTable, sizeof(table->CTable));  /* Save new table */
+    }
+    return HUF_compressCTable_internal(ostart, op, oend,
+                                       src, srcSize,
+                                       nbStreams, table->CTable, bmi2);
+}
+
+
+size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
+                      const void* src, size_t srcSize,
+                      unsigned maxSymbolValue, unsigned huffLog,
+                      void* workSpace, size_t wkspSize)
+{
+    return HUF_compress_internal(dst, dstSize, src, srcSize,
+                                 maxSymbolValue, huffLog, HUF_singleStream,
+                                 workSpace, wkspSize,
+                                 NULL, NULL, 0, 0 /*bmi2*/);
+}
+
+size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
+                      const void* src, size_t srcSize,
+                      unsigned maxSymbolValue, unsigned huffLog,
+                      void* workSpace, size_t wkspSize,
+                      HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
+{
+    return HUF_compress_internal(dst, dstSize, src, srcSize,
+                                 maxSymbolValue, huffLog, HUF_singleStream,
+                                 workSpace, wkspSize, hufTable,
+                                 repeat, preferRepeat, bmi2);
+}
+
+/* HUF_compress4X_repeat():
+ * compress input using 4 streams.
+ * provide workspace to generate compression tables */
+size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
+                      const void* src, size_t srcSize,
+                      unsigned maxSymbolValue, unsigned huffLog,
+                      void* workSpace, size_t wkspSize)
+{
+    return HUF_compress_internal(dst, dstSize, src, srcSize,
+                                 maxSymbolValue, huffLog, HUF_fourStreams,
+                                 workSpace, wkspSize,
+                                 NULL, NULL, 0, 0 /*bmi2*/);
+}
+
+/* HUF_compress4X_repeat():
+ * compress input using 4 streams.
+ * re-use an existing huffman compression table */
+size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
+                      const void* src, size_t srcSize,
+                      unsigned maxSymbolValue, unsigned huffLog,
+                      void* workSpace, size_t wkspSize,
+                      HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
+{
+    return HUF_compress_internal(dst, dstSize, src, srcSize,
+                                 maxSymbolValue, huffLog, HUF_fourStreams,
+                                 workSpace, wkspSize,
+                                 hufTable, repeat, preferRepeat, bmi2);
+}
+
+#ifndef ZSTD_NO_UNUSED_FUNCTIONS
+/** HUF_buildCTable() :
+ * @return : maxNbBits
+ *  Note : count is used before tree is written, so they can safely overlap
+ */
+size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits)
+{
+    HUF_buildCTable_wksp_tables workspace;
+    return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, &workspace, sizeof(workspace));
+}
+
+size_t HUF_compress1X (void* dst, size_t dstSize,
+                 const void* src, size_t srcSize,
+                 unsigned maxSymbolValue, unsigned huffLog)
+{
+    unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
+    return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
+}
+
+size_t HUF_compress2 (void* dst, size_t dstSize,
+                const void* src, size_t srcSize,
+                unsigned maxSymbolValue, unsigned huffLog)
+{
+    unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
+    return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
+}
+
+size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT);
+}
+#endif
+/**** ended inlining compress/huf_compress.c ****/
+/**** start inlining compress/zstd_compress_literals.c ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+ /*-*************************************
+ *  Dependencies
+ ***************************************/
+/**** start inlining zstd_compress_literals.h ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_COMPRESS_LITERALS_H
+#define ZSTD_COMPRESS_LITERALS_H
+
+/**** start inlining zstd_compress_internal.h ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* This header contains definitions
+ * that shall **only** be used by modules within lib/compress.
+ */
+
+#ifndef ZSTD_COMPRESS_H
+#define ZSTD_COMPRESS_H
+
+/*-*************************************
+*  Dependencies
+***************************************/
+/**** skipping file: ../common/zstd_internal.h ****/
+/**** start inlining zstd_cwksp.h ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_CWKSP_H
+#define ZSTD_CWKSP_H
+
+/*-*************************************
+*  Dependencies
+***************************************/
+/**** skipping file: ../common/zstd_internal.h ****/
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*-*************************************
+*  Constants
+***************************************/
+
+/* Since the workspace is effectively its own little malloc implementation /
+ * arena, when we run under ASAN, we should similarly insert redzones between
+ * each internal element of the workspace, so ASAN will catch overruns that
+ * reach outside an object but that stay inside the workspace.
+ *
+ * This defines the size of that redzone.
+ */
+#ifndef ZSTD_CWKSP_ASAN_REDZONE_SIZE
+#define ZSTD_CWKSP_ASAN_REDZONE_SIZE 128
+#endif
+
+
+/* Set our tables and aligneds to align by 64 bytes */
+#define ZSTD_CWKSP_ALIGNMENT_BYTES 64
+
+/*-*************************************
+*  Structures
+***************************************/
+typedef enum {
+    ZSTD_cwksp_alloc_objects,
+    ZSTD_cwksp_alloc_buffers,
+    ZSTD_cwksp_alloc_aligned
+} ZSTD_cwksp_alloc_phase_e;
+
+/**
+ * Used to describe whether the workspace is statically allocated (and will not
+ * necessarily ever be freed), or if it's dynamically allocated and we can
+ * expect a well-formed caller to free this.
+ */
+typedef enum {
+    ZSTD_cwksp_dynamic_alloc,
+    ZSTD_cwksp_static_alloc
+} ZSTD_cwksp_static_alloc_e;
+
+/**
+ * Zstd fits all its internal datastructures into a single continuous buffer,
+ * so that it only needs to perform a single OS allocation (or so that a buffer
+ * can be provided to it and it can perform no allocations at all). This buffer
+ * is called the workspace.
+ *
+ * Several optimizations complicate that process of allocating memory ranges
+ * from this workspace for each internal datastructure:
+ *
+ * - These different internal datastructures have different setup requirements:
+ *
+ *   - The static objects need to be cleared once and can then be trivially
+ *     reused for each compression.
+ *
+ *   - Various buffers don't need to be initialized at all--they are always
+ *     written into before they're read.
+ *
+ *   - The matchstate tables have a unique requirement that they don't need
+ *     their memory to be totally cleared, but they do need the memory to have
+ *     some bound, i.e., a guarantee that all values in the memory they've been
+ *     allocated is less than some maximum value (which is the starting value
+ *     for the indices that they will then use for compression). When this
+ *     guarantee is provided to them, they can use the memory without any setup
+ *     work. When it can't, they have to clear the area.
+ *
+ * - These buffers also have different alignment requirements.
+ *
+ * - We would like to reuse the objects in the workspace for multiple
+ *   compressions without having to perform any expensive reallocation or
+ *   reinitialization work.
+ *
+ * - We would like to be able to efficiently reuse the workspace across
+ *   multiple compressions **even when the compression parameters change** and
+ *   we need to resize some of the objects (where possible).
+ *
+ * To attempt to manage this buffer, given these constraints, the ZSTD_cwksp
+ * abstraction was created. It works as follows:
+ *
+ * Workspace Layout:
+ *
+ * [                        ... workspace ...                         ]
+ * [objects][tables ... ->] free space [<- ... aligned][<- ... buffers]
+ *
+ * The various objects that live in the workspace are divided into the
+ * following categories, and are allocated separately:
+ *
+ * - Static objects: this is optionally the enclosing ZSTD_CCtx or ZSTD_CDict,
+ *   so that literally everything fits in a single buffer. Note: if present,
+ *   this must be the first object in the workspace, since ZSTD_customFree{CCtx,
+ *   CDict}() rely on a pointer comparison to see whether one or two frees are
+ *   required.
+ *
+ * - Fixed size objects: these are fixed-size, fixed-count objects that are
+ *   nonetheless "dynamically" allocated in the workspace so that we can
+ *   control how they're initialized separately from the broader ZSTD_CCtx.
+ *   Examples:
+ *   - Entropy Workspace
+ *   - 2 x ZSTD_compressedBlockState_t
+ *   - CDict dictionary contents
+ *
+ * - Tables: these are any of several different datastructures (hash tables,
+ *   chain tables, binary trees) that all respect a common format: they are
+ *   uint32_t arrays, all of whose values are between 0 and (nextSrc - base).
+ *   Their sizes depend on the cparams. These tables are 64-byte aligned.
+ *
+ * - Aligned: these buffers are used for various purposes that require 4 byte
+ *   alignment, but don't require any initialization before they're used. These
+ *   buffers are each aligned to 64 bytes.
+ *
+ * - Buffers: these buffers are used for various purposes that don't require
+ *   any alignment or initialization before they're used. This means they can
+ *   be moved around at no cost for a new compression.
+ *
+ * Allocating Memory:
+ *
+ * The various types of objects must be allocated in order, so they can be
+ * correctly packed into the workspace buffer. That order is:
+ *
+ * 1. Objects
+ * 2. Buffers
+ * 3. Aligned/Tables
+ *
+ * Attempts to reserve objects of different types out of order will fail.
+ */
+typedef struct {
+    void* workspace;
+    void* workspaceEnd;
+
+    void* objectEnd;
+    void* tableEnd;
+    void* tableValidEnd;
+    void* allocStart;
+
+    BYTE allocFailed;
+    int workspaceOversizedDuration;
+    ZSTD_cwksp_alloc_phase_e phase;
+    ZSTD_cwksp_static_alloc_e isStatic;
+} ZSTD_cwksp;
+
+/*-*************************************
+*  Functions
+***************************************/
+
+MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws);
+
+MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
+    (void)ws;
+    assert(ws->workspace <= ws->objectEnd);
+    assert(ws->objectEnd <= ws->tableEnd);
+    assert(ws->objectEnd <= ws->tableValidEnd);
+    assert(ws->tableEnd <= ws->allocStart);
+    assert(ws->tableValidEnd <= ws->allocStart);
+    assert(ws->allocStart <= ws->workspaceEnd);
+}
+
+/**
+ * Align must be a power of 2.
+ */
+MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) {
+    size_t const mask = align - 1;
+    assert((align & mask) == 0);
+    return (size + mask) & ~mask;
+}
+
+/**
+ * Use this to determine how much space in the workspace we will consume to
+ * allocate this object. (Normally it should be exactly the size of the object,
+ * but under special conditions, like ASAN, where we pad each object, it might
+ * be larger.)
+ *
+ * Since tables aren't currently redzoned, you don't need to call through this
+ * to figure out how much space you need for the matchState tables. Everything
+ * else is though.
+ *
+ * Do not use for sizing aligned buffers. Instead, use ZSTD_cwksp_aligned_alloc_size().
+ */
+MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) {
+    if (size == 0)
+        return 0;
+#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+    return size + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
+#else
+    return size;
+#endif
+}
+
+/**
+ * Returns an adjusted alloc size that is the nearest larger multiple of 64 bytes.
+ * Used to determine the number of bytes required for a given "aligned".
+ */
+MEM_STATIC size_t ZSTD_cwksp_aligned_alloc_size(size_t size) {
+    return ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(size, ZSTD_CWKSP_ALIGNMENT_BYTES));
+}
+
+/**
+ * Returns the amount of additional space the cwksp must allocate
+ * for internal purposes (currently only alignment).
+ */
+MEM_STATIC size_t ZSTD_cwksp_slack_space_required(void) {
+    /* For alignment, the wksp will always allocate an additional n_1=[1, 64] bytes
+     * to align the beginning of tables section, as well as another n_2=[0, 63] bytes
+     * to align the beginning of the aligned secion.
+     *
+     * n_1 + n_2 == 64 bytes if the cwksp is freshly allocated, due to tables and
+     * aligneds being sized in multiples of 64 bytes.
+     */
+    size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES;
+    return slackSpace;
+}
+
+
+/**
+ * Return the number of additional bytes required to align a pointer to the given number of bytes.
+ * alignBytes must be a power of two.
+ */
+MEM_STATIC size_t ZSTD_cwksp_bytes_to_align_ptr(void* ptr, const size_t alignBytes) {
+    size_t const alignBytesMask = alignBytes - 1;
+    size_t const bytes = (alignBytes - ((size_t)ptr & (alignBytesMask))) & alignBytesMask;
+    assert((alignBytes & alignBytesMask) == 0);
+    assert(bytes != ZSTD_CWKSP_ALIGNMENT_BYTES);
+    return bytes;
+}
+
+/**
+ * Internal function. Do not use directly.
+ * Reserves the given number of bytes within the aligned/buffer segment of the wksp, which
+ * counts from the end of the wksp. (as opposed to the object/table segment)
+ *
+ * Returns a pointer to the beginning of that space.
+ */
+MEM_STATIC void* ZSTD_cwksp_reserve_internal_buffer_space(ZSTD_cwksp* ws, size_t const bytes) {
+    void* const alloc = (BYTE*)ws->allocStart - bytes;
+    void* const bottom = ws->tableEnd;
+    DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining",
+        alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
+    ZSTD_cwksp_assert_internal_consistency(ws);
+    assert(alloc >= bottom);
+    if (alloc < bottom) {
+        DEBUGLOG(4, "cwksp: alloc failed!");
+        ws->allocFailed = 1;
+        return NULL;
+    }
+    if (alloc < ws->tableValidEnd) {
+        ws->tableValidEnd = alloc;
+    }
+    ws->allocStart = alloc;
+    return alloc;
+}
+
+/**
+ * Moves the cwksp to the next phase, and does any necessary allocations.
+ * Returns a 0 on success, or zstd error
+ */
+MEM_STATIC size_t ZSTD_cwksp_internal_advance_phase(
+        ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) {
+    assert(phase >= ws->phase);
+    if (phase > ws->phase) {
+        /* Going from allocating objects to allocating buffers */
+        if (ws->phase < ZSTD_cwksp_alloc_buffers &&
+                phase >= ZSTD_cwksp_alloc_buffers) {
+            ws->tableValidEnd = ws->objectEnd;
+        }
+
+        /* Going from allocating buffers to allocating aligneds/tables */
+        if (ws->phase < ZSTD_cwksp_alloc_aligned &&
+                phase >= ZSTD_cwksp_alloc_aligned) {
+            {   /* Align the start of the "aligned" to 64 bytes. Use [1, 64] bytes. */
+                size_t const bytesToAlign =
+                    ZSTD_CWKSP_ALIGNMENT_BYTES - ZSTD_cwksp_bytes_to_align_ptr(ws->allocStart, ZSTD_CWKSP_ALIGNMENT_BYTES);
+                DEBUGLOG(5, "reserving aligned alignment addtl space: %zu", bytesToAlign);
+                ZSTD_STATIC_ASSERT((ZSTD_CWKSP_ALIGNMENT_BYTES & (ZSTD_CWKSP_ALIGNMENT_BYTES - 1)) == 0); /* power of 2 */
+                RETURN_ERROR_IF(!ZSTD_cwksp_reserve_internal_buffer_space(ws, bytesToAlign),
+                                memory_allocation, "aligned phase - alignment initial allocation failed!");
+            }
+            {   /* Align the start of the tables to 64 bytes. Use [0, 63] bytes */
+                void* const alloc = ws->objectEnd;
+                size_t const bytesToAlign = ZSTD_cwksp_bytes_to_align_ptr(alloc, ZSTD_CWKSP_ALIGNMENT_BYTES);
+                void* const end = (BYTE*)alloc + bytesToAlign;
+                DEBUGLOG(5, "reserving table alignment addtl space: %zu", bytesToAlign);
+                RETURN_ERROR_IF(end > ws->workspaceEnd, memory_allocation,
+                                "table phase - alignment initial allocation failed!");
+                ws->objectEnd = end;
+                ws->tableEnd = end;
+                ws->tableValidEnd = end;
+            }
+        }
+        ws->phase = phase;
+        ZSTD_cwksp_assert_internal_consistency(ws);
+    }
+    return 0;
+}
+
+/**
+ * Returns whether this object/buffer/etc was allocated in this workspace.
+ */
+MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) {
+    return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd);
+}
+
+/**
+ * Internal function. Do not use directly.
+ */
+MEM_STATIC void* ZSTD_cwksp_reserve_internal(
+        ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) {
+    void* alloc;
+    if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase)) || bytes == 0) {
+        return NULL;
+    }
+
+#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+    /* over-reserve space */
+    bytes += 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
+#endif
+
+    alloc = ZSTD_cwksp_reserve_internal_buffer_space(ws, bytes);
+
+#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+    /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on
+     * either size. */
+    if (alloc) {
+        alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
+        if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
+            __asan_unpoison_memory_region(alloc, bytes);
+        }
+    }
+#endif
+
+    return alloc;
+}
+
+/**
+ * Reserves and returns unaligned memory.
+ */
+MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) {
+    return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers);
+}
+
+/**
+ * Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes).
+ */
+MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) {
+    void* ptr = ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES),
+                                            ZSTD_cwksp_alloc_aligned);
+    assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0);
+    return ptr;
+}
+
+/**
+ * Aligned on 64 bytes. These buffers have the special property that
+ * their values remain constrained, allowing us to re-use them without
+ * memset()-ing them.
+ */
+MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) {
+    const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned;
+    void* alloc;
+    void* end;
+    void* top;
+
+    if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) {
+        return NULL;
+    }
+    alloc = ws->tableEnd;
+    end = (BYTE *)alloc + bytes;
+    top = ws->allocStart;
+
+    DEBUGLOG(5, "cwksp: reserving %p table %zd bytes, %zd bytes remaining",
+        alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
+    assert((bytes & (sizeof(U32)-1)) == 0);
+    ZSTD_cwksp_assert_internal_consistency(ws);
+    assert(end <= top);
+    if (end > top) {
+        DEBUGLOG(4, "cwksp: table alloc failed!");
+        ws->allocFailed = 1;
+        return NULL;
+    }
+    ws->tableEnd = end;
+
+#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+    if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
+        __asan_unpoison_memory_region(alloc, bytes);
+    }
+#endif
+
+    assert((bytes & (ZSTD_CWKSP_ALIGNMENT_BYTES-1)) == 0);
+    assert(((size_t)alloc & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0);
+    return alloc;
+}
+
+/**
+ * Aligned on sizeof(void*).
+ */
+MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) {
+    size_t roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*));
+    void* alloc = ws->objectEnd;
+    void* end = (BYTE*)alloc + roundedBytes;
+
+#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+    /* over-reserve space */
+    end = (BYTE *)end + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
+#endif
+
+    DEBUGLOG(5,
+        "cwksp: reserving %p object %zd bytes (rounded to %zd), %zd bytes remaining",
+        alloc, bytes, roundedBytes, ZSTD_cwksp_available_space(ws) - roundedBytes);
+    assert(((size_t)alloc & (sizeof(void*)-1)) == 0);
+    assert((bytes & (sizeof(void*)-1)) == 0);
+    ZSTD_cwksp_assert_internal_consistency(ws);
+    /* we must be in the first phase, no advance is possible */
+    if (ws->phase != ZSTD_cwksp_alloc_objects || end > ws->workspaceEnd) {
+        DEBUGLOG(4, "cwksp: object alloc failed!");
+        ws->allocFailed = 1;
+        return NULL;
+    }
+    ws->objectEnd = end;
+    ws->tableEnd = end;
+    ws->tableValidEnd = end;
+
+#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+    /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on
+     * either size. */
+    alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
+    if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
+        __asan_unpoison_memory_region(alloc, bytes);
+    }
+#endif
+
+    return alloc;
+}
+
+MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) {
+    DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty");
+
+#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
+    /* To validate that the table re-use logic is sound, and that we don't
+     * access table space that we haven't cleaned, we re-"poison" the table
+     * space every time we mark it dirty. */
+    {
+        size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd;
+        assert(__msan_test_shadow(ws->objectEnd, size) == -1);
+        __msan_poison(ws->objectEnd, size);
+    }
+#endif
+
+    assert(ws->tableValidEnd >= ws->objectEnd);
+    assert(ws->tableValidEnd <= ws->allocStart);
+    ws->tableValidEnd = ws->objectEnd;
+    ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+MEM_STATIC void ZSTD_cwksp_mark_tables_clean(ZSTD_cwksp* ws) {
+    DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_clean");
+    assert(ws->tableValidEnd >= ws->objectEnd);
+    assert(ws->tableValidEnd <= ws->allocStart);
+    if (ws->tableValidEnd < ws->tableEnd) {
+        ws->tableValidEnd = ws->tableEnd;
+    }
+    ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+/**
+ * Zero the part of the allocated tables not already marked clean.
+ */
+MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) {
+    DEBUGLOG(4, "cwksp: ZSTD_cwksp_clean_tables");
+    assert(ws->tableValidEnd >= ws->objectEnd);
+    assert(ws->tableValidEnd <= ws->allocStart);
+    if (ws->tableValidEnd < ws->tableEnd) {
+        ZSTD_memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd);
+    }
+    ZSTD_cwksp_mark_tables_clean(ws);
+}
+
+/**
+ * Invalidates table allocations.
+ * All other allocations remain valid.
+ */
+MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) {
+    DEBUGLOG(4, "cwksp: clearing tables!");
+
+#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+    /* We don't do this when the workspace is statically allocated, because
+     * when that is the case, we have no capability to hook into the end of the
+     * workspace's lifecycle to unpoison the memory.
+     */
+    if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
+        size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd;
+        __asan_poison_memory_region(ws->objectEnd, size);
+    }
+#endif
+
+    ws->tableEnd = ws->objectEnd;
+    ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+/**
+ * Invalidates all buffer, aligned, and table allocations.
+ * Object allocations remain valid.
+ */
+MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
+    DEBUGLOG(4, "cwksp: clearing!");
+
+#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
+    /* To validate that the context re-use logic is sound, and that we don't
+     * access stuff that this compression hasn't initialized, we re-"poison"
+     * the workspace (or at least the non-static, non-table parts of it)
+     * every time we start a new compression. */
+    {
+        size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->tableValidEnd;
+        __msan_poison(ws->tableValidEnd, size);
+    }
+#endif
+
+#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+    /* We don't do this when the workspace is statically allocated, because
+     * when that is the case, we have no capability to hook into the end of the
+     * workspace's lifecycle to unpoison the memory.
+     */
+    if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
+        size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->objectEnd;
+        __asan_poison_memory_region(ws->objectEnd, size);
+    }
+#endif
+
+    ws->tableEnd = ws->objectEnd;
+    ws->allocStart = ws->workspaceEnd;
+    ws->allocFailed = 0;
+    if (ws->phase > ZSTD_cwksp_alloc_buffers) {
+        ws->phase = ZSTD_cwksp_alloc_buffers;
+    }
+    ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+/**
+ * The provided workspace takes ownership of the buffer [start, start+size).
+ * Any existing values in the workspace are ignored (the previously managed
+ * buffer, if present, must be separately freed).
+ */
+MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size, ZSTD_cwksp_static_alloc_e isStatic) {
+    DEBUGLOG(4, "cwksp: init'ing workspace with %zd bytes", size);
+    assert(((size_t)start & (sizeof(void*)-1)) == 0); /* ensure correct alignment */
+    ws->workspace = start;
+    ws->workspaceEnd = (BYTE*)start + size;
+    ws->objectEnd = ws->workspace;
+    ws->tableValidEnd = ws->objectEnd;
+    ws->phase = ZSTD_cwksp_alloc_objects;
+    ws->isStatic = isStatic;
+    ZSTD_cwksp_clear(ws);
+    ws->workspaceOversizedDuration = 0;
+    ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem customMem) {
+    void* workspace = ZSTD_customMalloc(size, customMem);
+    DEBUGLOG(4, "cwksp: creating new workspace with %zd bytes", size);
+    RETURN_ERROR_IF(workspace == NULL, memory_allocation, "NULL pointer!");
+    ZSTD_cwksp_init(ws, workspace, size, ZSTD_cwksp_dynamic_alloc);
+    return 0;
+}
+
+MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) {
+    void *ptr = ws->workspace;
+    DEBUGLOG(4, "cwksp: freeing workspace");
+    ZSTD_memset(ws, 0, sizeof(ZSTD_cwksp));
+    ZSTD_customFree(ptr, customMem);
+}
+
+/**
+ * Moves the management of a workspace from one cwksp to another. The src cwksp
+ * is left in an invalid state (src must be re-init()'ed before it's used again).
+ */
+MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) {
+    *dst = *src;
+    ZSTD_memset(src, 0, sizeof(ZSTD_cwksp));
+}
+
+MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) {
+    return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace);
+}
+
+MEM_STATIC size_t ZSTD_cwksp_used(const ZSTD_cwksp* ws) {
+    return (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->workspace)
+         + (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->allocStart);
+}
+
+MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
+    return ws->allocFailed;
+}
+
+/*-*************************************
+*  Functions Checking Free Space
+***************************************/
+
+/* ZSTD_alignmentSpaceWithinBounds() :
+ * Returns if the estimated space needed for a wksp is within an acceptable limit of the
+ * actual amount of space used.
+ */
+MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp* const ws,
+                                                        size_t const estimatedSpace, int resizedWorkspace) {
+    if (resizedWorkspace) {
+        /* Resized/newly allocated wksp should have exact bounds */
+        return ZSTD_cwksp_used(ws) == estimatedSpace;
+    } else {
+        /* Due to alignment, when reusing a workspace, we can actually consume 63 fewer or more bytes
+         * than estimatedSpace. See the comments in zstd_cwksp.h for details.
+         */
+        return (ZSTD_cwksp_used(ws) >= estimatedSpace - 63) && (ZSTD_cwksp_used(ws) <= estimatedSpace + 63);
+    }
+}
+
+
+MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws) {
+    return (size_t)((BYTE*)ws->allocStart - (BYTE*)ws->tableEnd);
+}
+
+MEM_STATIC int ZSTD_cwksp_check_available(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
+    return ZSTD_cwksp_available_space(ws) >= additionalNeededSpace;
+}
+
+MEM_STATIC int ZSTD_cwksp_check_too_large(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
+    return ZSTD_cwksp_check_available(
+        ws, additionalNeededSpace * ZSTD_WORKSPACETOOLARGE_FACTOR);
+}
+
+MEM_STATIC int ZSTD_cwksp_check_wasteful(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
+    return ZSTD_cwksp_check_too_large(ws, additionalNeededSpace)
+        && ws->workspaceOversizedDuration > ZSTD_WORKSPACETOOLARGE_MAXDURATION;
+}
+
+MEM_STATIC void ZSTD_cwksp_bump_oversized_duration(
+        ZSTD_cwksp* ws, size_t additionalNeededSpace) {
+    if (ZSTD_cwksp_check_too_large(ws, additionalNeededSpace)) {
+        ws->workspaceOversizedDuration++;
+    } else {
+        ws->workspaceOversizedDuration = 0;
+    }
+}
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ZSTD_CWKSP_H */
+/**** ended inlining zstd_cwksp.h ****/
+#ifdef ZSTD_MULTITHREAD
+/**** start inlining zstdmt_compress.h ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+ #ifndef ZSTDMT_COMPRESS_H
+ #define ZSTDMT_COMPRESS_H
+
+ #if defined (__cplusplus)
+ extern "C" {
+ #endif
+
+
+/* Note : This is an internal API.
+ *        These APIs used to be exposed with ZSTDLIB_API,
+ *        because it used to be the only way to invoke MT compression.
+ *        Now, you must use ZSTD_compress2 and ZSTD_compressStream2() instead.
+ *
+ *        This API requires ZSTD_MULTITHREAD to be defined during compilation,
+ *        otherwise ZSTDMT_createCCtx*() will fail.
+ */
+
+/* ===   Dependencies   === */
+/**** skipping file: ../common/zstd_deps.h ****/
+#define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_parameters */
+/**** skipping file: ../zstd.h ****/
+
+
+/* ===   Constants   === */
+#ifndef ZSTDMT_NBWORKERS_MAX /* a different value can be selected at compile time */
+#  define ZSTDMT_NBWORKERS_MAX ((sizeof(void*)==4) /*32-bit*/ ? 64 : 256)
+#endif
+#ifndef ZSTDMT_JOBSIZE_MIN   /* a different value can be selected at compile time */
+#  define ZSTDMT_JOBSIZE_MIN (512 KB)
+#endif
+#define ZSTDMT_JOBLOG_MAX   (MEM_32bits() ? 29 : 30)
+#define ZSTDMT_JOBSIZE_MAX  (MEM_32bits() ? (512 MB) : (1024 MB))
+
+
+/* ========================================================
+ * ===  Private interface, for use by ZSTD_compress.c   ===
+ * ===  Not exposed in libzstd. Never invoke directly   ===
+ * ======================================================== */
+
+/* ===   Memory management   === */
+typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx;
+/* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */
+ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers,
+                                        ZSTD_customMem cMem,
+					ZSTD_threadPool *pool);
+size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx);
+
+size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx);
+
+/* ===   Streaming functions   === */
+
+size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx);
+
+/*! ZSTDMT_initCStream_internal() :
+ *  Private use only. Init streaming operation.
+ *  expects params to be valid.
+ *  must receive dict, or cdict, or none, but not both.
+ *  @return : 0, or an error code */
+size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
+                    const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType,
+                    const ZSTD_CDict* cdict,
+                    ZSTD_CCtx_params params, unsigned long long pledgedSrcSize);
+
+/*! ZSTDMT_compressStream_generic() :
+ *  Combines ZSTDMT_compressStream() with optional ZSTDMT_flushStream() or ZSTDMT_endStream()
+ *  depending on flush directive.
+ * @return : minimum amount of data still to be flushed
+ *           0 if fully flushed
+ *           or an error code
+ *  note : needs to be init using any ZSTD_initCStream*() variant */
+size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
+                                     ZSTD_outBuffer* output,
+                                     ZSTD_inBuffer* input,
+                                     ZSTD_EndDirective endOp);
+
+ /*! ZSTDMT_toFlushNow()
+  *  Tell how many bytes are ready to be flushed immediately.
+  *  Probe the oldest active job (not yet entirely flushed) and check its output buffer.
+  *  If return 0, it means there is no active job,
+  *  or, it means oldest job is still active, but everything produced has been flushed so far,
+  *  therefore flushing is limited by speed of oldest job. */
+size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx);
+
+/*! ZSTDMT_updateCParams_whileCompressing() :
+ *  Updates only a selected set of compression parameters, to remain compatible with current frame.
+ *  New parameters will be applied to next compression job. */
+void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams);
+
+/*! ZSTDMT_getFrameProgression():
+ *  tells how much data has been consumed (input) and produced (output) for current frame.
+ *  able to count progression inside worker threads.
+ */
+ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx);
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif   /* ZSTDMT_COMPRESS_H */
+/**** ended inlining zstdmt_compress.h ****/
+#endif
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*-*************************************
+*  Constants
+***************************************/
+#define kSearchStrength      8
+#define HASH_READ_SIZE       8
+#define ZSTD_DUBT_UNSORTED_MARK 1   /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted".
+                                       It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
+                                       It's not a big deal though : candidate will just be sorted again.
+                                       Additionally, candidate position 1 will be lost.
+                                       But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
+                                       The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy.
+                                       This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
+
+
+/*-*************************************
+*  Context memory management
+***************************************/
+typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
+typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage;
+
+typedef struct ZSTD_prefixDict_s {
+    const void* dict;
+    size_t dictSize;
+    ZSTD_dictContentType_e dictContentType;
+} ZSTD_prefixDict;
+
+typedef struct {
+    void* dictBuffer;
+    void const* dict;
+    size_t dictSize;
+    ZSTD_dictContentType_e dictContentType;
+    ZSTD_CDict* cdict;
+} ZSTD_localDict;
+
+typedef struct {
+    HUF_CElt CTable[HUF_CTABLE_SIZE_U32(255)];
+    HUF_repeat repeatMode;
+} ZSTD_hufCTables_t;
+
+typedef struct {
+    FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
+    FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
+    FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
+    FSE_repeat offcode_repeatMode;
+    FSE_repeat matchlength_repeatMode;
+    FSE_repeat litlength_repeatMode;
+} ZSTD_fseCTables_t;
+
+typedef struct {
+    ZSTD_hufCTables_t huf;
+    ZSTD_fseCTables_t fse;
+} ZSTD_entropyCTables_t;
+
+/***********************************************
+*  Entropy buffer statistics structs and funcs *
+***********************************************/
+/** ZSTD_hufCTablesMetadata_t :
+ *  Stores Literals Block Type for a super-block in hType, and
+ *  huffman tree description in hufDesBuffer.
+ *  hufDesSize refers to the size of huffman tree description in bytes.
+ *  This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */
+typedef struct {
+    symbolEncodingType_e hType;
+    BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE];
+    size_t hufDesSize;
+} ZSTD_hufCTablesMetadata_t;
+
+/** ZSTD_fseCTablesMetadata_t :
+ *  Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
+ *  fse tables in fseTablesBuffer.
+ *  fseTablesSize refers to the size of fse tables in bytes.
+ *  This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */
+typedef struct {
+    symbolEncodingType_e llType;
+    symbolEncodingType_e ofType;
+    symbolEncodingType_e mlType;
+    BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE];
+    size_t fseTablesSize;
+    size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */
+} ZSTD_fseCTablesMetadata_t;
+
+typedef struct {
+    ZSTD_hufCTablesMetadata_t hufMetadata;
+    ZSTD_fseCTablesMetadata_t fseMetadata;
+} ZSTD_entropyCTablesMetadata_t;
+
+/** ZSTD_buildBlockEntropyStats() :
+ *  Builds entropy for the block.
+ *  @return : 0 on success or error code */
+size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
+                             const ZSTD_entropyCTables_t* prevEntropy,
+                                   ZSTD_entropyCTables_t* nextEntropy,
+                             const ZSTD_CCtx_params* cctxParams,
+                                   ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+                                   void* workspace, size_t wkspSize);
+
+/*********************************
+*  Compression internals structs *
+*********************************/
+
+typedef struct {
+    U32 off;            /* Offset code (offset + ZSTD_REP_MOVE) for the match */
+    U32 len;            /* Raw length of match */
+} ZSTD_match_t;
+
+typedef struct {
+    U32 offset;         /* Offset of sequence */
+    U32 litLength;      /* Length of literals prior to match */
+    U32 matchLength;    /* Raw length of match */
+} rawSeq;
+
+typedef struct {
+  rawSeq* seq;          /* The start of the sequences */
+  size_t pos;           /* The index in seq where reading stopped. pos <= size. */
+  size_t posInSequence; /* The position within the sequence at seq[pos] where reading
+                           stopped. posInSequence <= seq[pos].litLength + seq[pos].matchLength */
+  size_t size;          /* The number of sequences. <= capacity. */
+  size_t capacity;      /* The capacity starting from `seq` pointer */
+} rawSeqStore_t;
+
+UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};
+
+typedef struct {
+    int price;
+    U32 off;
+    U32 mlen;
+    U32 litlen;
+    U32 rep[ZSTD_REP_NUM];
+} ZSTD_optimal_t;
+
+typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e;
+
+typedef struct {
+    /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
+    unsigned* litFreq;           /* table of literals statistics, of size 256 */
+    unsigned* litLengthFreq;     /* table of litLength statistics, of size (MaxLL+1) */
+    unsigned* matchLengthFreq;   /* table of matchLength statistics, of size (MaxML+1) */
+    unsigned* offCodeFreq;       /* table of offCode statistics, of size (MaxOff+1) */
+    ZSTD_match_t* matchTable;    /* list of found matches, of size ZSTD_OPT_NUM+1 */
+    ZSTD_optimal_t* priceTable;  /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */
+
+    U32  litSum;                 /* nb of literals */
+    U32  litLengthSum;           /* nb of litLength codes */
+    U32  matchLengthSum;         /* nb of matchLength codes */
+    U32  offCodeSum;             /* nb of offset codes */
+    U32  litSumBasePrice;        /* to compare to log2(litfreq) */
+    U32  litLengthSumBasePrice;  /* to compare to log2(llfreq)  */
+    U32  matchLengthSumBasePrice;/* to compare to log2(mlfreq)  */
+    U32  offCodeSumBasePrice;    /* to compare to log2(offreq)  */
+    ZSTD_OptPrice_e priceType;   /* prices can be determined dynamically, or follow a pre-defined cost structure */
+    const ZSTD_entropyCTables_t* symbolCosts;  /* pre-calculated dictionary statistics */
+    ZSTD_literalCompressionMode_e literalCompressionMode;
+} optState_t;
+
+typedef struct {
+  ZSTD_entropyCTables_t entropy;
+  U32 rep[ZSTD_REP_NUM];
+} ZSTD_compressedBlockState_t;
+
+typedef struct {
+    BYTE const* nextSrc;       /* next block here to continue on current prefix */
+    BYTE const* base;          /* All regular indexes relative to this position */
+    BYTE const* dictBase;      /* extDict indexes relative to this position */
+    U32 dictLimit;             /* below that point, need extDict */
+    U32 lowLimit;              /* below that point, no more valid data */
+    U32 nbOverflowCorrections; /* Number of times overflow correction has run since
+                                * ZSTD_window_init(). Useful for debugging coredumps
+                                * and for ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY.
+                                */
+} ZSTD_window_t;
+
+typedef struct ZSTD_matchState_t ZSTD_matchState_t;
+
+#define ZSTD_ROW_HASH_CACHE_SIZE 8       /* Size of prefetching hash cache for row-based matchfinder */
+
+struct ZSTD_matchState_t {
+    ZSTD_window_t window;   /* State for window round buffer management */
+    U32 loadedDictEnd;      /* index of end of dictionary, within context's referential.
+                             * When loadedDictEnd != 0, a dictionary is in use, and still valid.
+                             * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance.
+                             * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity().
+                             * When dict referential is copied into active context (i.e. not attached),
+                             * loadedDictEnd == dictSize, since referential starts from zero.
+                             */
+    U32 nextToUpdate;       /* index from which to continue table update */
+    U32 hashLog3;           /* dispatch table for matches of len==3 : larger == faster, more memory */
+
+    U32 rowHashLog;                          /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
+    U16* tagTable;                           /* For row-based matchFinder: A row-based table containing the hashes and head index. */
+    U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
+
+    U32* hashTable;
+    U32* hashTable3;
+    U32* chainTable;
+
+    U32 forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */
+
+    int dedicatedDictSearch;  /* Indicates whether this matchState is using the
+                               * dedicated dictionary search structure.
+                               */
+    optState_t opt;         /* optimal parser state */
+    const ZSTD_matchState_t* dictMatchState;
+    ZSTD_compressionParameters cParams;
+    const rawSeqStore_t* ldmSeqStore;
+};
+
+typedef struct {
+    ZSTD_compressedBlockState_t* prevCBlock;
+    ZSTD_compressedBlockState_t* nextCBlock;
+    ZSTD_matchState_t matchState;
+} ZSTD_blockState_t;
+
+typedef struct {
+    U32 offset;
+    U32 checksum;
+} ldmEntry_t;
+
+typedef struct {
+    BYTE const* split;
+    U32 hash;
+    U32 checksum;
+    ldmEntry_t* bucket;
+} ldmMatchCandidate_t;
+
+#define LDM_BATCH_SIZE 64
+
+typedef struct {
+    ZSTD_window_t window;   /* State for the window round buffer management */
+    ldmEntry_t* hashTable;
+    U32 loadedDictEnd;
+    BYTE* bucketOffsets;    /* Next position in bucket to insert entry */
+    size_t splitIndices[LDM_BATCH_SIZE];
+    ldmMatchCandidate_t matchCandidates[LDM_BATCH_SIZE];
+} ldmState_t;
+
+typedef struct {
+    U32 enableLdm;          /* 1 if enable long distance matching */
+    U32 hashLog;            /* Log size of hashTable */
+    U32 bucketSizeLog;      /* Log bucket size for collision resolution, at most 8 */
+    U32 minMatchLength;     /* Minimum match length */
+    U32 hashRateLog;       /* Log number of entries to skip */
+    U32 windowLog;          /* Window log for the LDM */
+} ldmParams_t;
+
+typedef struct {
+    int collectSequences;
+    ZSTD_Sequence* seqStart;
+    size_t seqIndex;
+    size_t maxSequences;
+} SeqCollector;
+
+struct ZSTD_CCtx_params_s {
+    ZSTD_format_e format;
+    ZSTD_compressionParameters cParams;
+    ZSTD_frameParameters fParams;
+
+    int compressionLevel;
+    int forceWindow;           /* force back-references to respect limit of
+                                * 1<<wLog, even for dictionary */
+    size_t targetCBlockSize;   /* Tries to fit compressed block size to be around targetCBlockSize.
+                                * No target when targetCBlockSize == 0.
+                                * There is no guarantee on compressed block size */
+    int srcSizeHint;           /* User's best guess of source size.
+                                * Hint is not valid when srcSizeHint == 0.
+                                * There is no guarantee that hint is close to actual source size */
+
+    ZSTD_dictAttachPref_e attachDictPref;
+    ZSTD_literalCompressionMode_e literalCompressionMode;
+
+    /* Multithreading: used to pass parameters to mtctx */
+    int nbWorkers;
+    size_t jobSize;
+    int overlapLog;
+    int rsyncable;
+
+    /* Long distance matching parameters */
+    ldmParams_t ldmParams;
+
+    /* Dedicated dict search algorithm trigger */
+    int enableDedicatedDictSearch;
+
+    /* Input/output buffer modes */
+    ZSTD_bufferMode_e inBufferMode;
+    ZSTD_bufferMode_e outBufferMode;
+
+    /* Sequence compression API */
+    ZSTD_sequenceFormat_e blockDelimiters;
+    int validateSequences;
+
+    /* Block splitting */
+    int splitBlocks;
+
+    /* Param for deciding whether to use row-based matchfinder */
+    ZSTD_useRowMatchFinderMode_e useRowMatchFinder;
+
+    /* Always load a dictionary in ext-dict mode (not prefix mode)? */
+    int deterministicRefPrefix;
+
+    /* Internal use, for createCCtxParams() and freeCCtxParams() only */
+    ZSTD_customMem customMem;
+};  /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
+
+#define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
+#define ENTROPY_WORKSPACE_SIZE (HUF_WORKSPACE_SIZE + COMPRESS_SEQUENCES_WORKSPACE_SIZE)
+
+/**
+ * Indicates whether this compression proceeds directly from user-provided
+ * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or
+ * whether the context needs to buffer the input/output (ZSTDb_buffered).
+ */
+typedef enum {
+    ZSTDb_not_buffered,
+    ZSTDb_buffered
+} ZSTD_buffered_policy_e;
+
+struct ZSTD_CCtx_s {
+    ZSTD_compressionStage_e stage;
+    int cParamsChanged;                  /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
+    int bmi2;                            /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
+    ZSTD_CCtx_params requestedParams;
+    ZSTD_CCtx_params appliedParams;
+    ZSTD_CCtx_params simpleApiParams;    /* Param storage used by the simple API - not sticky. Must only be used in top-level simple API functions for storage. */
+    U32   dictID;
+    size_t dictContentSize;
+
+    ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */
+    size_t blockSize;
+    unsigned long long pledgedSrcSizePlusOne;  /* this way, 0 (default) == unknown */
+    unsigned long long consumedSrcSize;
+    unsigned long long producedCSize;
+    XXH64_state_t xxhState;
+    ZSTD_customMem customMem;
+    ZSTD_threadPool* pool;
+    size_t staticSize;
+    SeqCollector seqCollector;
+    int isFirstBlock;
+    int initialized;
+
+    seqStore_t seqStore;      /* sequences storage ptrs */
+    ldmState_t ldmState;      /* long distance matching state */
+    rawSeq* ldmSequences;     /* Storage for the ldm output sequences */
+    size_t maxNbLdmSequences;
+    rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */
+    ZSTD_blockState_t blockState;
+    U32* entropyWorkspace;  /* entropy workspace of ENTROPY_WORKSPACE_SIZE bytes */
+
+    /* Wether we are streaming or not */
+    ZSTD_buffered_policy_e bufferedPolicy;
+
+    /* streaming */
+    char*  inBuff;
+    size_t inBuffSize;
+    size_t inToCompress;
+    size_t inBuffPos;
+    size_t inBuffTarget;
+    char*  outBuff;
+    size_t outBuffSize;
+    size_t outBuffContentSize;
+    size_t outBuffFlushedSize;
+    ZSTD_cStreamStage streamStage;
+    U32    frameEnded;
+
+    /* Stable in/out buffer verification */
+    ZSTD_inBuffer expectedInBuffer;
+    size_t expectedOutBufferSize;
+
+    /* Dictionary */
+    ZSTD_localDict localDict;
+    const ZSTD_CDict* cdict;
+    ZSTD_prefixDict prefixDict;   /* single-usage dictionary */
+
+    /* Multi-threading */
+#ifdef ZSTD_MULTITHREAD
+    ZSTDMT_CCtx* mtctx;
+#endif
+
+    /* Tracing */
+#if ZSTD_TRACE
+    ZSTD_TraceCtx traceCtx;
+#endif
+};
+
+typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
+
+typedef enum {
+    ZSTD_noDict = 0,
+    ZSTD_extDict = 1,
+    ZSTD_dictMatchState = 2,
+    ZSTD_dedicatedDictSearch = 3
+} ZSTD_dictMode_e;
+
+typedef enum {
+    ZSTD_cpm_noAttachDict = 0,  /* Compression with ZSTD_noDict or ZSTD_extDict.
+                                 * In this mode we use both the srcSize and the dictSize
+                                 * when selecting and adjusting parameters.
+                                 */
+    ZSTD_cpm_attachDict = 1,    /* Compression with ZSTD_dictMatchState or ZSTD_dedicatedDictSearch.
+                                 * In this mode we only take the srcSize into account when selecting
+                                 * and adjusting parameters.
+                                 */
+    ZSTD_cpm_createCDict = 2,   /* Creating a CDict.
+                                 * In this mode we take both the source size and the dictionary size
+                                 * into account when selecting and adjusting the parameters.
+                                 */
+    ZSTD_cpm_unknown = 3,       /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams.
+                                 * We don't know what these parameters are for. We default to the legacy
+                                 * behavior of taking both the source size and the dict size into account
+                                 * when selecting and adjusting parameters.
+                                 */
+} ZSTD_cParamMode_e;
+
+typedef size_t (*ZSTD_blockCompressor) (
+        ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_useRowMatchFinderMode_e rowMatchfinderMode, ZSTD_dictMode_e dictMode);
+
+
+MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
+{
+    static const BYTE LL_Code[64] = {  0,  1,  2,  3,  4,  5,  6,  7,
+                                       8,  9, 10, 11, 12, 13, 14, 15,
+                                      16, 16, 17, 17, 18, 18, 19, 19,
+                                      20, 20, 20, 20, 21, 21, 21, 21,
+                                      22, 22, 22, 22, 22, 22, 22, 22,
+                                      23, 23, 23, 23, 23, 23, 23, 23,
+                                      24, 24, 24, 24, 24, 24, 24, 24,
+                                      24, 24, 24, 24, 24, 24, 24, 24 };
+    static const U32 LL_deltaCode = 19;
+    return (litLength > 63) ? ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
+}
+
+/* ZSTD_MLcode() :
+ * note : mlBase = matchLength - MINMATCH;
+ *        because it's the format it's stored in seqStore->sequences */
+MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
+{
+    static const BYTE ML_Code[128] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+                                      16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+                                      32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37,
+                                      38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39,
+                                      40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
+                                      41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
+                                      42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
+                                      42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 };
+    static const U32 ML_deltaCode = 36;
+    return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
+}
+
+typedef struct repcodes_s {
+    U32 rep[3];
+} repcodes_t;
+
+MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
+{
+    repcodes_t newReps;
+    if (offset >= ZSTD_REP_NUM) {  /* full offset */
+        newReps.rep[2] = rep[1];
+        newReps.rep[1] = rep[0];
+        newReps.rep[0] = offset - ZSTD_REP_MOVE;
+    } else {   /* repcode */
+        U32 const repCode = offset + ll0;
+        if (repCode > 0) {  /* note : if repCode==0, no change */
+            U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
+            newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
+            newReps.rep[1] = rep[0];
+            newReps.rep[0] = currentOffset;
+        } else {   /* repCode == 0 */
+            ZSTD_memcpy(&newReps, rep, sizeof(newReps));
+        }
+    }
+    return newReps;
+}
+
+/* ZSTD_cParam_withinBounds:
+ * @return 1 if value is within cParam bounds,
+ * 0 otherwise */
+MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
+{
+    ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
+    if (ZSTD_isError(bounds.error)) return 0;
+    if (value < bounds.lowerBound) return 0;
+    if (value > bounds.upperBound) return 0;
+    return 1;
+}
+
+/* ZSTD_noCompressBlock() :
+ * Writes uncompressed block to dst buffer from given src.
+ * Returns the size of the block */
+MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
+{
+    U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3);
+    RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity,
+                    dstSize_tooSmall, "dst buf too small for uncompressed block");
+    MEM_writeLE24(dst, cBlockHeader24);
+    ZSTD_memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
+    return ZSTD_blockHeaderSize + srcSize;
+}
+
+MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock)
+{
+    BYTE* const op = (BYTE*)dst;
+    U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3);
+    RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, "");
+    MEM_writeLE24(op, cBlockHeader);
+    op[3] = src;
+    return 4;
+}
+
+
+/* ZSTD_minGain() :
+ * minimum compression required
+ * to generate a compress block or a compressed literals section.
+ * note : use same formula for both situations */
+MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
+{
+    U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6;
+    ZSTD_STATIC_ASSERT(ZSTD_btultra == 8);
+    assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
+    return (srcSize >> minlog) + 2;
+}
+
+MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams)
+{
+    switch (cctxParams->literalCompressionMode) {
+    case ZSTD_lcm_huffman:
+        return 0;
+    case ZSTD_lcm_uncompressed:
+        return 1;
+    default:
+        assert(0 /* impossible: pre-validated */);
+        /* fall-through */
+    case ZSTD_lcm_auto:
+        return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
+    }
+}
+
+/*! ZSTD_safecopyLiterals() :
+ *  memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w.
+ *  Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
+ *  large copies.
+ */
+static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) {
+    assert(iend > ilimit_w);
+    if (ip <= ilimit_w) {
+        ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap);
+        op += ilimit_w - ip;
+        ip = ilimit_w;
+    }
+    while (ip < iend) *op++ = *ip++;
+}
+
+/*! ZSTD_storeSeq() :
+ *  Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t.
+ *  `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes).
+ *  `mlBase` : matchLength - MINMATCH
+ *  Allowed to overread literals up to litLimit.
+*/
+HINT_INLINE UNUSED_ATTR
+void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase)
+{
+    BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
+    BYTE const* const litEnd = literals + litLength;
+#if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
+    static const BYTE* g_start = NULL;
+    if (g_start==NULL) g_start = (const BYTE*)literals;  /* note : index only works for compression within a single segment */
+    {   U32 const pos = (U32)((const BYTE*)literals - g_start);
+        DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
+               pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode);
+    }
+#endif
+    assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
+    /* copy Literals */
+    assert(seqStorePtr->maxNbLit <= 128 KB);
+    assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
+    assert(literals + litLength <= litLimit);
+    if (litEnd <= litLimit_w) {
+        /* Common case we can use wildcopy.
+	 * First copy 16 bytes, because literals are likely short.
+	 */
+        assert(WILDCOPY_OVERLENGTH >= 16);
+        ZSTD_copy16(seqStorePtr->lit, literals);
+        if (litLength > 16) {
+            ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap);
+        }
+    } else {
+        ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w);
+    }
+    seqStorePtr->lit += litLength;
+
+    /* literal Length */
+    if (litLength>0xFFFF) {
+        assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
+        seqStorePtr->longLengthType = ZSTD_llt_literalLength;
+        seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
+    }
+    seqStorePtr->sequences[0].litLength = (U16)litLength;
+
+    /* match offset */
+    seqStorePtr->sequences[0].offset = offCode + 1;
+
+    /* match Length */
+    if (mlBase>0xFFFF) {
+        assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
+        seqStorePtr->longLengthType = ZSTD_llt_matchLength;
+        seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
+    }
+    seqStorePtr->sequences[0].matchLength = (U16)mlBase;
+
+    seqStorePtr->sequences++;
+}
+
+
+/*-*************************************
+*  Match length counter
+***************************************/
+static unsigned ZSTD_NbCommonBytes (size_t val)
+{
+    if (MEM_isLittleEndian()) {
+        if (MEM_64bits()) {
+#       if defined(_MSC_VER) && defined(_WIN64)
+#           if STATIC_BMI2
+                return _tzcnt_u64(val) >> 3;
+#           else
+                unsigned long r = 0;
+                return _BitScanForward64( &r, (U64)val ) ? (unsigned)(r >> 3) : 0;
+#           endif
+#       elif defined(__GNUC__) && (__GNUC__ >= 4)
+            return (__builtin_ctzll((U64)val) >> 3);
+#       else
+            static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
+                                                     0, 3, 1, 3, 1, 4, 2, 7,
+                                                     0, 2, 3, 6, 1, 5, 3, 5,
+                                                     1, 3, 4, 4, 2, 5, 6, 7,
+                                                     7, 0, 1, 2, 3, 3, 4, 6,
+                                                     2, 6, 5, 5, 3, 4, 5, 6,
+                                                     7, 1, 2, 4, 6, 4, 4, 5,
+                                                     7, 2, 6, 5, 7, 6, 7, 7 };
+            return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
+#       endif
+        } else { /* 32 bits */
+#       if defined(_MSC_VER)
+            unsigned long r=0;
+            return _BitScanForward( &r, (U32)val ) ? (unsigned)(r >> 3) : 0;
+#       elif defined(__GNUC__) && (__GNUC__ >= 3)
+            return (__builtin_ctz((U32)val) >> 3);
+#       else
+            static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
+                                                     3, 2, 2, 1, 3, 2, 0, 1,
+                                                     3, 3, 1, 2, 2, 2, 2, 0,
+                                                     3, 1, 2, 0, 1, 0, 1, 1 };
+            return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
+#       endif
+        }
+    } else {  /* Big Endian CPU */
+        if (MEM_64bits()) {
+#       if defined(_MSC_VER) && defined(_WIN64)
+#           if STATIC_BMI2
+			    return _lzcnt_u64(val) >> 3;
+#           else
+			    unsigned long r = 0;
+			    return _BitScanReverse64(&r, (U64)val) ? (unsigned)(r >> 3) : 0;
+#           endif
+#       elif defined(__GNUC__) && (__GNUC__ >= 4)
+            return (__builtin_clzll(val) >> 3);
+#       else
+            unsigned r;
+            const unsigned n32 = sizeof(size_t)*4;   /* calculate this way due to compiler complaining in 32-bits mode */
+            if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
+            if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
+            r += (!val);
+            return r;
+#       endif
+        } else { /* 32 bits */
+#       if defined(_MSC_VER)
+            unsigned long r = 0;
+            return _BitScanReverse( &r, (unsigned long)val ) ? (unsigned)(r >> 3) : 0;
+#       elif defined(__GNUC__) && (__GNUC__ >= 3)
+            return (__builtin_clz((U32)val) >> 3);
+#       else
+            unsigned r;
+            if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
+            r += (!val);
+            return r;
+#       endif
+    }   }
+}
+
+
+MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
+{
+    const BYTE* const pStart = pIn;
+    const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1);
+
+    if (pIn < pInLoopLimit) {
+        { size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
+          if (diff) return ZSTD_NbCommonBytes(diff); }
+        pIn+=sizeof(size_t); pMatch+=sizeof(size_t);
+        while (pIn < pInLoopLimit) {
+            size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
+            if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
+            pIn += ZSTD_NbCommonBytes(diff);
+            return (size_t)(pIn - pStart);
+    }   }
+    if (MEM_64bits() && (pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; }
+    if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; }
+    if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
+    return (size_t)(pIn - pStart);
+}
+
+/** ZSTD_count_2segments() :
+ *  can count match length with `ip` & `match` in 2 different segments.
+ *  convention : on reaching mEnd, match count continue starting from iStart
+ */
+MEM_STATIC size_t
+ZSTD_count_2segments(const BYTE* ip, const BYTE* match,
+                     const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
+{
+    const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd);
+    size_t const matchLength = ZSTD_count(ip, match, vEnd);
+    if (match + matchLength != mEnd) return matchLength;
+    DEBUGLOG(7, "ZSTD_count_2segments: found a 2-parts match (current length==%zu)", matchLength);
+    DEBUGLOG(7, "distance from match beginning to end dictionary = %zi", mEnd - match);
+    DEBUGLOG(7, "distance from current pos to end buffer = %zi", iEnd - ip);
+    DEBUGLOG(7, "next byte : ip==%02X, istart==%02X", ip[matchLength], *iStart);
+    DEBUGLOG(7, "final match length = %zu", matchLength + ZSTD_count(ip+matchLength, iStart, iEnd));
+    return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd);
+}
+
+
+/*-*************************************
+ *  Hashes
+ ***************************************/
+static const U32 prime3bytes = 506832829U;
+static U32    ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes)  >> (32-h) ; }
+MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
+
+static const U32 prime4bytes = 2654435761U;
+static U32    ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
+static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); }
+
+static const U64 prime5bytes = 889523592379ULL;
+static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u  << (64-40)) * prime5bytes) >> (64-h)) ; }
+static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); }
+
+static const U64 prime6bytes = 227718039650203ULL;
+static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u  << (64-48)) * prime6bytes) >> (64-h)) ; }
+static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
+
+static const U64 prime7bytes = 58295818150454627ULL;
+static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u  << (64-56)) * prime7bytes) >> (64-h)) ; }
+static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); }
+
+static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
+static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
+static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
+
+MEM_STATIC FORCE_INLINE_ATTR
+size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
+{
+    switch(mls)
+    {
+    default:
+    case 4: return ZSTD_hash4Ptr(p, hBits);
+    case 5: return ZSTD_hash5Ptr(p, hBits);
+    case 6: return ZSTD_hash6Ptr(p, hBits);
+    case 7: return ZSTD_hash7Ptr(p, hBits);
+    case 8: return ZSTD_hash8Ptr(p, hBits);
+    }
+}
+
+/** ZSTD_ipow() :
+ * Return base^exponent.
+ */
+static U64 ZSTD_ipow(U64 base, U64 exponent)
+{
+    U64 power = 1;
+    while (exponent) {
+      if (exponent & 1) power *= base;
+      exponent >>= 1;
+      base *= base;
+    }
+    return power;
+}
+
+#define ZSTD_ROLL_HASH_CHAR_OFFSET 10
+
+/** ZSTD_rollingHash_append() :
+ * Add the buffer to the hash value.
+ */
+static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size)
+{
+    BYTE const* istart = (BYTE const*)buf;
+    size_t pos;
+    for (pos = 0; pos < size; ++pos) {
+        hash *= prime8bytes;
+        hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET;
+    }
+    return hash;
+}
+
+/** ZSTD_rollingHash_compute() :
+ * Compute the rolling hash value of the buffer.
+ */
+MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size)
+{
+    return ZSTD_rollingHash_append(0, buf, size);
+}
+
+/** ZSTD_rollingHash_primePower() :
+ * Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash
+ * over a window of length bytes.
+ */
+MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length)
+{
+    return ZSTD_ipow(prime8bytes, length - 1);
+}
+
+/** ZSTD_rollingHash_rotate() :
+ * Rotate the rolling hash by one byte.
+ */
+MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower)
+{
+    hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower;
+    hash *= prime8bytes;
+    hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET;
+    return hash;
+}
+
+/*-*************************************
+*  Round buffer management
+***************************************/
+#if (ZSTD_WINDOWLOG_MAX_64 > 31)
+# error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX"
+#endif
+/* Max current allowed */
+#define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX))
+/* Maximum chunk size before overflow correction needs to be called again */
+#define ZSTD_CHUNKSIZE_MAX                                                     \
+    ( ((U32)-1)                  /* Maximum ending current index */            \
+    - ZSTD_CURRENT_MAX)          /* Maximum beginning lowLimit */
+
+/**
+ * ZSTD_window_clear():
+ * Clears the window containing the history by simply setting it to empty.
+ */
+MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window)
+{
+    size_t const endT = (size_t)(window->nextSrc - window->base);
+    U32 const end = (U32)endT;
+
+    window->lowLimit = end;
+    window->dictLimit = end;
+}
+
+MEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window)
+{
+    return window.dictLimit == 1 &&
+           window.lowLimit == 1 &&
+           (window.nextSrc - window.base) == 1;
+}
+
+/**
+ * ZSTD_window_hasExtDict():
+ * Returns non-zero if the window has a non-empty extDict.
+ */
+MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window)
+{
+    return window.lowLimit < window.dictLimit;
+}
+
+/**
+ * ZSTD_matchState_dictMode():
+ * Inspects the provided matchState and figures out what dictMode should be
+ * passed to the compressor.
+ */
+MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)
+{
+    return ZSTD_window_hasExtDict(ms->window) ?
+        ZSTD_extDict :
+        ms->dictMatchState != NULL ?
+            (ms->dictMatchState->dedicatedDictSearch ? ZSTD_dedicatedDictSearch : ZSTD_dictMatchState) :
+            ZSTD_noDict;
+}
+
+/* Defining this macro to non-zero tells zstd to run the overflow correction
+ * code much more frequently. This is very inefficient, and should only be
+ * used for tests and fuzzers.
+ */
+#ifndef ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY
+#  ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+#    define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 1
+#  else
+#    define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 0
+#  endif
+#endif
+
+/**
+ * ZSTD_window_canOverflowCorrect():
+ * Returns non-zero if the indices are large enough for overflow correction
+ * to work correctly without impacting compression ratio.
+ */
+MEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window,
+                                              U32 cycleLog,
+                                              U32 maxDist,
+                                              U32 loadedDictEnd,
+                                              void const* src)
+{
+    U32 const cycleSize = 1u << cycleLog;
+    U32 const curr = (U32)((BYTE const*)src - window.base);
+    U32 const minIndexToOverflowCorrect = cycleSize + MAX(maxDist, cycleSize);
+
+    /* Adjust the min index to backoff the overflow correction frequency,
+     * so we don't waste too much CPU in overflow correction. If this
+     * computation overflows we don't really care, we just need to make
+     * sure it is at least minIndexToOverflowCorrect.
+     */
+    U32 const adjustment = window.nbOverflowCorrections + 1;
+    U32 const adjustedIndex = MAX(minIndexToOverflowCorrect * adjustment,
+                                  minIndexToOverflowCorrect);
+    U32 const indexLargeEnough = curr > adjustedIndex;
+
+    /* Only overflow correct early if the dictionary is invalidated already,
+     * so we don't hurt compression ratio.
+     */
+    U32 const dictionaryInvalidated = curr > maxDist + loadedDictEnd;
+
+    return indexLargeEnough && dictionaryInvalidated;
+}
+
+/**
+ * ZSTD_window_needOverflowCorrection():
+ * Returns non-zero if the indices are getting too large and need overflow
+ * protection.
+ */
+MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
+                                                  U32 cycleLog,
+                                                  U32 maxDist,
+                                                  U32 loadedDictEnd,
+                                                  void const* src,
+                                                  void const* srcEnd)
+{
+    U32 const curr = (U32)((BYTE const*)srcEnd - window.base);
+    if (ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
+        if (ZSTD_window_canOverflowCorrect(window, cycleLog, maxDist, loadedDictEnd, src)) {
+            return 1;
+        }
+    }
+    return curr > ZSTD_CURRENT_MAX;
+}
+
+/**
+ * ZSTD_window_correctOverflow():
+ * Reduces the indices to protect from index overflow.
+ * Returns the correction made to the indices, which must be applied to every
+ * stored index.
+ *
+ * The least significant cycleLog bits of the indices must remain the same,
+ * which may be 0. Every index up to maxDist in the past must be valid.
+ */
+MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
+                                           U32 maxDist, void const* src)
+{
+    /* preemptive overflow correction:
+     * 1. correction is large enough:
+     *    lowLimit > (3<<29) ==> current > 3<<29 + 1<<windowLog
+     *    1<<windowLog <= newCurrent < 1<<chainLog + 1<<windowLog
+     *
+     *    current - newCurrent
+     *    > (3<<29 + 1<<windowLog) - (1<<windowLog + 1<<chainLog)
+     *    > (3<<29) - (1<<chainLog)
+     *    > (3<<29) - (1<<30)             (NOTE: chainLog <= 30)
+     *    > 1<<29
+     *
+     * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow:
+     *    After correction, current is less than (1<<chainLog + 1<<windowLog).
+     *    In 64-bit mode we are safe, because we have 64-bit ptrdiff_t.
+     *    In 32-bit mode we are safe, because (chainLog <= 29), so
+     *    ip+ZSTD_CHUNKSIZE_MAX - cctx->base < 1<<32.
+     * 3. (cctx->lowLimit + 1<<windowLog) < 1<<32:
+     *    windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32.
+     */
+    U32 const cycleSize = 1u << cycleLog;
+    U32 const cycleMask = cycleSize - 1;
+    U32 const curr = (U32)((BYTE const*)src - window->base);
+    U32 const currentCycle0 = curr & cycleMask;
+    /* Exclude zero so that newCurrent - maxDist >= 1. */
+    U32 const currentCycle1 = currentCycle0 == 0 ? cycleSize : currentCycle0;
+    U32 const newCurrent = currentCycle1 + MAX(maxDist, cycleSize);
+    U32 const correction = curr - newCurrent;
+    /* maxDist must be a power of two so that:
+     *   (newCurrent & cycleMask) == (curr & cycleMask)
+     * This is required to not corrupt the chains / binary tree.
+     */
+    assert((maxDist & (maxDist - 1)) == 0);
+    assert((curr & cycleMask) == (newCurrent & cycleMask));
+    assert(curr > newCurrent);
+    if (!ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
+        /* Loose bound, should be around 1<<29 (see above) */
+        assert(correction > 1<<28);
+    }
+
+    window->base += correction;
+    window->dictBase += correction;
+    if (window->lowLimit <= correction) window->lowLimit = 1;
+    else window->lowLimit -= correction;
+    if (window->dictLimit <= correction) window->dictLimit = 1;
+    else window->dictLimit -= correction;
+
+    /* Ensure we can still reference the full window. */
+    assert(newCurrent >= maxDist);
+    assert(newCurrent - maxDist >= 1);
+    /* Ensure that lowLimit and dictLimit didn't underflow. */
+    assert(window->lowLimit <= newCurrent);
+    assert(window->dictLimit <= newCurrent);
+
+    ++window->nbOverflowCorrections;
+
+    DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction,
+             window->lowLimit);
+    return correction;
+}
+
+/**
+ * ZSTD_window_enforceMaxDist():
+ * Updates lowLimit so that:
+ *    (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
+ *
+ * It ensures index is valid as long as index >= lowLimit.
+ * This must be called before a block compression call.
+ *
+ * loadedDictEnd is only defined if a dictionary is in use for current compression.
+ * As the name implies, loadedDictEnd represents the index at end of dictionary.
+ * The value lies within context's referential, it can be directly compared to blockEndIdx.
+ *
+ * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0.
+ * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit.
+ * This is because dictionaries are allowed to be referenced fully
+ * as long as the last byte of the dictionary is in the window.
+ * Once input has progressed beyond window size, dictionary cannot be referenced anymore.
+ *
+ * In normal dict mode, the dictionary lies between lowLimit and dictLimit.
+ * In dictMatchState mode, lowLimit and dictLimit are the same,
+ * and the dictionary is below them.
+ * forceWindow and dictMatchState are therefore incompatible.
+ */
+MEM_STATIC void
+ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
+                     const void* blockEnd,
+                           U32   maxDist,
+                           U32*  loadedDictEndPtr,
+                     const ZSTD_matchState_t** dictMatchStatePtr)
+{
+    U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
+    U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
+    DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
+                (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
+
+    /* - When there is no dictionary : loadedDictEnd == 0.
+         In which case, the test (blockEndIdx > maxDist) is merely to avoid
+         overflowing next operation `newLowLimit = blockEndIdx - maxDist`.
+       - When there is a standard dictionary :
+         Index referential is copied from the dictionary,
+         which means it starts from 0.
+         In which case, loadedDictEnd == dictSize,
+         and it makes sense to compare `blockEndIdx > maxDist + dictSize`
+         since `blockEndIdx` also starts from zero.
+       - When there is an attached dictionary :
+         loadedDictEnd is expressed within the referential of the context,
+         so it can be directly compared against blockEndIdx.
+    */
+    if (blockEndIdx > maxDist + loadedDictEnd) {
+        U32 const newLowLimit = blockEndIdx - maxDist;
+        if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
+        if (window->dictLimit < window->lowLimit) {
+            DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u",
+                        (unsigned)window->dictLimit, (unsigned)window->lowLimit);
+            window->dictLimit = window->lowLimit;
+        }
+        /* On reaching window size, dictionaries are invalidated */
+        if (loadedDictEndPtr) *loadedDictEndPtr = 0;
+        if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
+    }
+}
+
+/* Similar to ZSTD_window_enforceMaxDist(),
+ * but only invalidates dictionary
+ * when input progresses beyond window size.
+ * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL)
+ *              loadedDictEnd uses same referential as window->base
+ *              maxDist is the window size */
+MEM_STATIC void
+ZSTD_checkDictValidity(const ZSTD_window_t* window,
+                       const void* blockEnd,
+                             U32   maxDist,
+                             U32*  loadedDictEndPtr,
+                       const ZSTD_matchState_t** dictMatchStatePtr)
+{
+    assert(loadedDictEndPtr != NULL);
+    assert(dictMatchStatePtr != NULL);
+    {   U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
+        U32 const loadedDictEnd = *loadedDictEndPtr;
+        DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
+                    (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
+        assert(blockEndIdx >= loadedDictEnd);
+
+        if (blockEndIdx > loadedDictEnd + maxDist) {
+            /* On reaching window size, dictionaries are invalidated.
+             * For simplification, if window size is reached anywhere within next block,
+             * the dictionary is invalidated for the full block.
+             */
+            DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)");
+            *loadedDictEndPtr = 0;
+            *dictMatchStatePtr = NULL;
+        } else {
+            if (*loadedDictEndPtr != 0) {
+                DEBUGLOG(6, "dictionary considered valid for current block");
+    }   }   }
+}
+
+MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
+    ZSTD_memset(window, 0, sizeof(*window));
+    window->base = (BYTE const*)"";
+    window->dictBase = (BYTE const*)"";
+    window->dictLimit = 1;    /* start from 1, so that 1st position is valid */
+    window->lowLimit = 1;     /* it ensures first and later CCtx usages compress the same */
+    window->nextSrc = window->base + 1;   /* see issue #1241 */
+    window->nbOverflowCorrections = 0;
+}
+
+/**
+ * ZSTD_window_update():
+ * Updates the window by appending [src, src + srcSize) to the window.
+ * If it is not contiguous, the current prefix becomes the extDict, and we
+ * forget about the extDict. Handles overlap of the prefix and extDict.
+ * Returns non-zero if the segment is contiguous.
+ */
+MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
+                                  void const* src, size_t srcSize,
+                                  int forceNonContiguous)
+{
+    BYTE const* const ip = (BYTE const*)src;
+    U32 contiguous = 1;
+    DEBUGLOG(5, "ZSTD_window_update");
+    if (srcSize == 0)
+        return contiguous;
+    assert(window->base != NULL);
+    assert(window->dictBase != NULL);
+    /* Check if blocks follow each other */
+    if (src != window->nextSrc || forceNonContiguous) {
+        /* not contiguous */
+        size_t const distanceFromBase = (size_t)(window->nextSrc - window->base);
+        DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit);
+        window->lowLimit = window->dictLimit;
+        assert(distanceFromBase == (size_t)(U32)distanceFromBase);  /* should never overflow */
+        window->dictLimit = (U32)distanceFromBase;
+        window->dictBase = window->base;
+        window->base = ip - distanceFromBase;
+        /* ms->nextToUpdate = window->dictLimit; */
+        if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit;   /* too small extDict */
+        contiguous = 0;
+    }
+    window->nextSrc = ip + srcSize;
+    /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */
+    if ( (ip+srcSize > window->dictBase + window->lowLimit)
+       & (ip < window->dictBase + window->dictLimit)) {
+        ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase;
+        U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx;
+        window->lowLimit = lowLimitMax;
+        DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit);
+    }
+    return contiguous;
+}
+
+/**
+ * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix.
+ */
+MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
+{
+    U32    const maxDistance = 1U << windowLog;
+    U32    const lowestValid = ms->window.lowLimit;
+    U32    const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
+    U32    const isDictionary = (ms->loadedDictEnd != 0);
+    /* When using a dictionary the entire dictionary is valid if a single byte of the dictionary
+     * is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't
+     * valid for the entire block. So this check is sufficient to find the lowest valid match index.
+     */
+    U32    const matchLowest = isDictionary ? lowestValid : withinWindow;
+    return matchLowest;
+}
+
+/**
+ * Returns the lowest allowed match index in the prefix.
+ */
+MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
+{
+    U32    const maxDistance = 1U << windowLog;
+    U32    const lowestValid = ms->window.dictLimit;
+    U32    const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
+    U32    const isDictionary = (ms->loadedDictEnd != 0);
+    /* When computing the lowest prefix index we need to take the dictionary into account to handle
+     * the edge case where the dictionary and the source are contiguous in memory.
+     */
+    U32    const matchLowest = isDictionary ? lowestValid : withinWindow;
+    return matchLowest;
+}
+
+
+
+/* debug functions */
+#if (DEBUGLEVEL>=2)
+
+MEM_STATIC double ZSTD_fWeight(U32 rawStat)
+{
+    U32 const fp_accuracy = 8;
+    U32 const fp_multiplier = (1 << fp_accuracy);
+    U32 const newStat = rawStat + 1;
+    U32 const hb = ZSTD_highbit32(newStat);
+    U32 const BWeight = hb * fp_multiplier;
+    U32 const FWeight = (newStat << fp_accuracy) >> hb;
+    U32 const weight = BWeight + FWeight;
+    assert(hb + fp_accuracy < 31);
+    return (double)weight / fp_multiplier;
+}
+
+/* display a table content,
+ * listing each element, its frequency, and its predicted bit cost */
+MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
+{
+    unsigned u, sum;
+    for (u=0, sum=0; u<=max; u++) sum += table[u];
+    DEBUGLOG(2, "total nb elts: %u", sum);
+    for (u=0; u<=max; u++) {
+        DEBUGLOG(2, "%2u: %5u  (%.2f)",
+                u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) );
+    }
+}
+
+#endif
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+/* ===============================================================
+ * Shared internal declarations
+ * These prototypes may be called from sources not in lib/compress
+ * =============================================================== */
+
+/* ZSTD_loadCEntropy() :
+ * dict : must point at beginning of a valid zstd dictionary.
+ * return : size of dictionary header (size of magic number + dict ID + entropy tables)
+ * assumptions : magic number supposed already checked
+ *               and dictSize >= 8 */
+size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
+                         const void* const dict, size_t dictSize);
+
+void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs);
+
+/* ==============================================================
+ * Private declarations
+ * These prototypes shall only be called from within lib/compress
+ * ============================================================== */
+
+/* ZSTD_getCParamsFromCCtxParams() :
+ * cParams are built depending on compressionLevel, src size hints,
+ * LDM and manually set compression parameters.
+ * Note: srcSizeHint == 0 means 0!
+ */
+ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
+        const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
+
+/*! ZSTD_initCStream_internal() :
+ *  Private use only. Init streaming operation.
+ *  expects params to be valid.
+ *  must receive dict, or cdict, or none, but not both.
+ *  @return : 0, or an error code */
+size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
+                     const void* dict, size_t dictSize,
+                     const ZSTD_CDict* cdict,
+                     const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize);
+
+void ZSTD_resetSeqStore(seqStore_t* ssPtr);
+
+/*! ZSTD_getCParamsFromCDict() :
+ *  as the name implies */
+ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict);
+
+/* ZSTD_compressBegin_advanced_internal() :
+ * Private use only. To be called from zstdmt_compress.c. */
+size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
+                                    const void* dict, size_t dictSize,
+                                    ZSTD_dictContentType_e dictContentType,
+                                    ZSTD_dictTableLoadMethod_e dtlm,
+                                    const ZSTD_CDict* cdict,
+                                    const ZSTD_CCtx_params* params,
+                                    unsigned long long pledgedSrcSize);
+
+/* ZSTD_compress_advanced_internal() :
+ * Private use only. To be called from zstdmt_compress.c. */
+size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
+                                       void* dst, size_t dstCapacity,
+                                 const void* src, size_t srcSize,
+                                 const void* dict,size_t dictSize,
+                                 const ZSTD_CCtx_params* params);
+
+
+/* ZSTD_writeLastEmptyBlock() :
+ * output an empty Block with end-of-frame mark to complete a frame
+ * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
+ *           or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)
+ */
+size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
+
+
+/* ZSTD_referenceExternalSequences() :
+ * Must be called before starting a compression operation.
+ * seqs must parse a prefix of the source.
+ * This cannot be used when long range matching is enabled.
+ * Zstd will use these sequences, and pass the literals to a secondary block
+ * compressor.
+ * @return : An error code on failure.
+ * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory
+ * access and data corruption.
+ */
+size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
+
+/** ZSTD_cycleLog() :
+ *  condition for correct operation : hashLog > 1 */
+U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat);
+
+/** ZSTD_CCtx_trace() :
+ *  Trace the end of a compression call.
+ */
+void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize);
+
+#endif /* ZSTD_COMPRESS_H */
+/**** ended inlining zstd_compress_internal.h ****/
+
+
+size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
+                              ZSTD_hufCTables_t* nextHuf,
+                              ZSTD_strategy strategy, int disableLiteralCompression,
+                              void* dst, size_t dstCapacity,
+                        const void* src, size_t srcSize,
+                              void* entropyWorkspace, size_t entropyWorkspaceSize,
+                        const int bmi2);
+
+#endif /* ZSTD_COMPRESS_LITERALS_H */
+/**** ended inlining zstd_compress_literals.h ****/
+
+size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    BYTE* const ostart = (BYTE*)dst;
+    U32   const flSize = 1 + (srcSize>31) + (srcSize>4095);
+
+    RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, "");
+
+    switch(flSize)
+    {
+        case 1: /* 2 - 1 - 5 */
+            ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3));
+            break;
+        case 2: /* 2 - 2 - 12 */
+            MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4)));
+            break;
+        case 3: /* 2 - 2 - 20 */
+            MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4)));
+            break;
+        default:   /* not necessary : flSize is {1,2,3} */
+            assert(0);
+    }
+
+    ZSTD_memcpy(ostart + flSize, src, srcSize);
+    DEBUGLOG(5, "Raw literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));
+    return srcSize + flSize;
+}
+
+size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    BYTE* const ostart = (BYTE*)dst;
+    U32   const flSize = 1 + (srcSize>31) + (srcSize>4095);
+
+    (void)dstCapacity;  /* dstCapacity already guaranteed to be >=4, hence large enough */
+
+    switch(flSize)
+    {
+        case 1: /* 2 - 1 - 5 */
+            ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3));
+            break;
+        case 2: /* 2 - 2 - 12 */
+            MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4)));
+            break;
+        case 3: /* 2 - 2 - 20 */
+            MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4)));
+            break;
+        default:   /* not necessary : flSize is {1,2,3} */
+            assert(0);
+    }
+
+    ostart[flSize] = *(const BYTE*)src;
+    DEBUGLOG(5, "RLE literals: %u -> %u", (U32)srcSize, (U32)flSize + 1);
+    return flSize+1;
+}
+
+size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
+                              ZSTD_hufCTables_t* nextHuf,
+                              ZSTD_strategy strategy, int disableLiteralCompression,
+                              void* dst, size_t dstCapacity,
+                        const void* src, size_t srcSize,
+                              void* entropyWorkspace, size_t entropyWorkspaceSize,
+                        const int bmi2)
+{
+    size_t const minGain = ZSTD_minGain(srcSize, strategy);
+    size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
+    BYTE*  const ostart = (BYTE*)dst;
+    U32 singleStream = srcSize < 256;
+    symbolEncodingType_e hType = set_compressed;
+    size_t cLitSize;
+
+    DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i srcSize=%u)",
+                disableLiteralCompression, (U32)srcSize);
+
+    /* Prepare nextEntropy assuming reusing the existing table */
+    ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+
+    if (disableLiteralCompression)
+        return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
+
+    /* small ? don't even attempt compression (speed opt) */
+#   define COMPRESS_LITERALS_SIZE_MIN 63
+    {   size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
+        if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
+    }
+
+    RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression");
+    {   HUF_repeat repeat = prevHuf->repeatMode;
+        int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
+        if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
+        cLitSize = singleStream ?
+            HUF_compress1X_repeat(
+                ostart+lhSize, dstCapacity-lhSize, src, srcSize,
+                HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
+                (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) :
+            HUF_compress4X_repeat(
+                ostart+lhSize, dstCapacity-lhSize, src, srcSize,
+                HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
+                (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
+        if (repeat != HUF_repeat_none) {
+            /* reused the existing table */
+            DEBUGLOG(5, "Reusing previous huffman table");
+            hType = set_repeat;
+        }
+    }
+
+    if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) {
+        ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+        return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
+    }
+    if (cLitSize==1) {
+        ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+        return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
+    }
+
+    if (hType == set_compressed) {
+        /* using a newly constructed table */
+        nextHuf->repeatMode = HUF_repeat_check;
+    }
+
+    /* Build header */
+    switch(lhSize)
+    {
+    case 3: /* 2 - 2 - 10 - 10 */
+        {   U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
+            MEM_writeLE24(ostart, lhc);
+            break;
+        }
+    case 4: /* 2 - 2 - 14 - 14 */
+        {   U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
+            MEM_writeLE32(ostart, lhc);
+            break;
+        }
+    case 5: /* 2 - 2 - 18 - 18 */
+        {   U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
+            MEM_writeLE32(ostart, lhc);
+            ostart[4] = (BYTE)(cLitSize >> 10);
+            break;
+        }
+    default:  /* not possible : lhSize is {3,4,5} */
+        assert(0);
+    }
+    DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)srcSize, (U32)(lhSize+cLitSize));
+    return lhSize+cLitSize;
+}
+/**** ended inlining compress/zstd_compress_literals.c ****/
+/**** start inlining compress/zstd_compress_sequences.c ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+ /*-*************************************
+ *  Dependencies
+ ***************************************/
+/**** start inlining zstd_compress_sequences.h ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_COMPRESS_SEQUENCES_H
+#define ZSTD_COMPRESS_SEQUENCES_H
+
+/**** skipping file: ../common/fse.h ****/
+/**** skipping file: ../common/zstd_internal.h ****/
+
+typedef enum {
+    ZSTD_defaultDisallowed = 0,
+    ZSTD_defaultAllowed = 1
+} ZSTD_defaultPolicy_e;
+
+symbolEncodingType_e
+ZSTD_selectEncodingType(
+        FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
+        size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
+        FSE_CTable const* prevCTable,
+        short const* defaultNorm, U32 defaultNormLog,
+        ZSTD_defaultPolicy_e const isDefaultAllowed,
+        ZSTD_strategy const strategy);
+
+size_t
+ZSTD_buildCTable(void* dst, size_t dstCapacity,
+                FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
+                unsigned* count, U32 max,
+                const BYTE* codeTable, size_t nbSeq,
+                const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
+                const FSE_CTable* prevCTable, size_t prevCTableSize,
+                void* entropyWorkspace, size_t entropyWorkspaceSize);
+
+size_t ZSTD_encodeSequences(
+            void* dst, size_t dstCapacity,
+            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+            seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2);
+
+size_t ZSTD_fseBitCost(
+    FSE_CTable const* ctable,
+    unsigned const* count,
+    unsigned const max);
+
+size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
+                             unsigned const* count, unsigned const max);
+#endif /* ZSTD_COMPRESS_SEQUENCES_H */
+/**** ended inlining zstd_compress_sequences.h ****/
+
+/**
+ * -log2(x / 256) lookup table for x in [0, 256).
+ * If x == 0: Return 0
+ * Else: Return floor(-log2(x / 256) * 256)
+ */
+static unsigned const kInverseProbabilityLog256[256] = {
+    0,    2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162,
+    1130, 1100, 1073, 1047, 1024, 1001, 980,  960,  941,  923,  906,  889,
+    874,  859,  844,  830,  817,  804,  791,  779,  768,  756,  745,  734,
+    724,  714,  704,  694,  685,  676,  667,  658,  650,  642,  633,  626,
+    618,  610,  603,  595,  588,  581,  574,  567,  561,  554,  548,  542,
+    535,  529,  523,  517,  512,  506,  500,  495,  489,  484,  478,  473,
+    468,  463,  458,  453,  448,  443,  438,  434,  429,  424,  420,  415,
+    411,  407,  402,  398,  394,  390,  386,  382,  377,  373,  370,  366,
+    362,  358,  354,  350,  347,  343,  339,  336,  332,  329,  325,  322,
+    318,  315,  311,  308,  305,  302,  298,  295,  292,  289,  286,  282,
+    279,  276,  273,  270,  267,  264,  261,  258,  256,  253,  250,  247,
+    244,  241,  239,  236,  233,  230,  228,  225,  222,  220,  217,  215,
+    212,  209,  207,  204,  202,  199,  197,  194,  192,  190,  187,  185,
+    182,  180,  178,  175,  173,  171,  168,  166,  164,  162,  159,  157,
+    155,  153,  151,  149,  146,  144,  142,  140,  138,  136,  134,  132,
+    130,  128,  126,  123,  121,  119,  117,  115,  114,  112,  110,  108,
+    106,  104,  102,  100,  98,   96,   94,   93,   91,   89,   87,   85,
+    83,   82,   80,   78,   76,   74,   73,   71,   69,   67,   66,   64,
+    62,   61,   59,   57,   55,   54,   52,   50,   49,   47,   46,   44,
+    42,   41,   39,   37,   36,   34,   33,   31,   30,   28,   26,   25,
+    23,   22,   20,   19,   17,   16,   14,   13,   11,   10,   8,    7,
+    5,    4,    2,    1,
+};
+
+static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) {
+  void const* ptr = ctable;
+  U16 const* u16ptr = (U16 const*)ptr;
+  U32 const maxSymbolValue = MEM_read16(u16ptr + 1);
+  return maxSymbolValue;
+}
+
+/**
+ * Returns true if we should use ncount=-1 else we should
+ * use ncount=1 for low probability symbols instead.
+ */
+static unsigned ZSTD_useLowProbCount(size_t const nbSeq)
+{
+    /* Heuristic: This should cover most blocks <= 16K and
+     * start to fade out after 16K to about 32K depending on
+     * comprssibility.
+     */
+    return nbSeq >= 2048;
+}
+
+/**
+ * Returns the cost in bytes of encoding the normalized count header.
+ * Returns an error if any of the helper functions return an error.
+ */
+static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max,
+                              size_t const nbSeq, unsigned const FSELog)
+{
+    BYTE wksp[FSE_NCOUNTBOUND];
+    S16 norm[MaxSeq + 1];
+    const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
+    FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max, ZSTD_useLowProbCount(nbSeq)), "");
+    return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog);
+}
+
+/**
+ * Returns the cost in bits of encoding the distribution described by count
+ * using the entropy bound.
+ */
+static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total)
+{
+    unsigned cost = 0;
+    unsigned s;
+
+    assert(total > 0);
+    for (s = 0; s <= max; ++s) {
+        unsigned norm = (unsigned)((256 * count[s]) / total);
+        if (count[s] != 0 && norm == 0)
+            norm = 1;
+        assert(count[s] < total);
+        cost += count[s] * kInverseProbabilityLog256[norm];
+    }
+    return cost >> 8;
+}
+
+/**
+ * Returns the cost in bits of encoding the distribution in count using ctable.
+ * Returns an error if ctable cannot represent all the symbols in count.
+ */
+size_t ZSTD_fseBitCost(
+    FSE_CTable const* ctable,
+    unsigned const* count,
+    unsigned const max)
+{
+    unsigned const kAccuracyLog = 8;
+    size_t cost = 0;
+    unsigned s;
+    FSE_CState_t cstate;
+    FSE_initCState(&cstate, ctable);
+    if (ZSTD_getFSEMaxSymbolValue(ctable) < max) {
+        DEBUGLOG(5, "Repeat FSE_CTable has maxSymbolValue %u < %u",
+                    ZSTD_getFSEMaxSymbolValue(ctable), max);
+        return ERROR(GENERIC);
+    }
+    for (s = 0; s <= max; ++s) {
+        unsigned const tableLog = cstate.stateLog;
+        unsigned const badCost = (tableLog + 1) << kAccuracyLog;
+        unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog);
+        if (count[s] == 0)
+            continue;
+        if (bitCost >= badCost) {
+            DEBUGLOG(5, "Repeat FSE_CTable has Prob[%u] == 0", s);
+            return ERROR(GENERIC);
+        }
+        cost += (size_t)count[s] * bitCost;
+    }
+    return cost >> kAccuracyLog;
+}
+
+/**
+ * Returns the cost in bits of encoding the distribution in count using the
+ * table described by norm. The max symbol support by norm is assumed >= max.
+ * norm must be valid for every symbol with non-zero probability in count.
+ */
+size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
+                             unsigned const* count, unsigned const max)
+{
+    unsigned const shift = 8 - accuracyLog;
+    size_t cost = 0;
+    unsigned s;
+    assert(accuracyLog <= 8);
+    for (s = 0; s <= max; ++s) {
+        unsigned const normAcc = (norm[s] != -1) ? (unsigned)norm[s] : 1;
+        unsigned const norm256 = normAcc << shift;
+        assert(norm256 > 0);
+        assert(norm256 < 256);
+        cost += count[s] * kInverseProbabilityLog256[norm256];
+    }
+    return cost >> 8;
+}
+
+symbolEncodingType_e
+ZSTD_selectEncodingType(
+        FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
+        size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
+        FSE_CTable const* prevCTable,
+        short const* defaultNorm, U32 defaultNormLog,
+        ZSTD_defaultPolicy_e const isDefaultAllowed,
+        ZSTD_strategy const strategy)
+{
+    ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0);
+    if (mostFrequent == nbSeq) {
+        *repeatMode = FSE_repeat_none;
+        if (isDefaultAllowed && nbSeq <= 2) {
+            /* Prefer set_basic over set_rle when there are 2 or less symbols,
+             * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
+             * If basic encoding isn't possible, always choose RLE.
+             */
+            DEBUGLOG(5, "Selected set_basic");
+            return set_basic;
+        }
+        DEBUGLOG(5, "Selected set_rle");
+        return set_rle;
+    }
+    if (strategy < ZSTD_lazy) {
+        if (isDefaultAllowed) {
+            size_t const staticFse_nbSeq_max = 1000;
+            size_t const mult = 10 - strategy;
+            size_t const baseLog = 3;
+            size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog;  /* 28-36 for offset, 56-72 for lengths */
+            assert(defaultNormLog >= 5 && defaultNormLog <= 6);  /* xx_DEFAULTNORMLOG */
+            assert(mult <= 9 && mult >= 7);
+            if ( (*repeatMode == FSE_repeat_valid)
+              && (nbSeq < staticFse_nbSeq_max) ) {
+                DEBUGLOG(5, "Selected set_repeat");
+                return set_repeat;
+            }
+            if ( (nbSeq < dynamicFse_nbSeq_min)
+              || (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) {
+                DEBUGLOG(5, "Selected set_basic");
+                /* The format allows default tables to be repeated, but it isn't useful.
+                 * When using simple heuristics to select encoding type, we don't want
+                 * to confuse these tables with dictionaries. When running more careful
+                 * analysis, we don't need to waste time checking both repeating tables
+                 * and default tables.
+                 */
+                *repeatMode = FSE_repeat_none;
+                return set_basic;
+            }
+        }
+    } else {
+        size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC);
+        size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC);
+        size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog);
+        size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq);
+
+        if (isDefaultAllowed) {
+            assert(!ZSTD_isError(basicCost));
+            assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost)));
+        }
+        assert(!ZSTD_isError(NCountCost));
+        assert(compressedCost < ERROR(maxCode));
+        DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u",
+                    (unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost);
+        if (basicCost <= repeatCost && basicCost <= compressedCost) {
+            DEBUGLOG(5, "Selected set_basic");
+            assert(isDefaultAllowed);
+            *repeatMode = FSE_repeat_none;
+            return set_basic;
+        }
+        if (repeatCost <= compressedCost) {
+            DEBUGLOG(5, "Selected set_repeat");
+            assert(!ZSTD_isError(repeatCost));
+            return set_repeat;
+        }
+        assert(compressedCost < basicCost && compressedCost < repeatCost);
+    }
+    DEBUGLOG(5, "Selected set_compressed");
+    *repeatMode = FSE_repeat_check;
+    return set_compressed;
+}
+
+typedef struct {
+    S16 norm[MaxSeq + 1];
+    U32 wksp[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(MaxSeq, MaxFSELog)];
+} ZSTD_BuildCTableWksp;
+
+size_t
+ZSTD_buildCTable(void* dst, size_t dstCapacity,
+                FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
+                unsigned* count, U32 max,
+                const BYTE* codeTable, size_t nbSeq,
+                const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
+                const FSE_CTable* prevCTable, size_t prevCTableSize,
+                void* entropyWorkspace, size_t entropyWorkspaceSize)
+{
+    BYTE* op = (BYTE*)dst;
+    const BYTE* const oend = op + dstCapacity;
+    DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity);
+
+    switch (type) {
+    case set_rle:
+        FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max), "");
+        RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall, "not enough space");
+        *op = codeTable[0];
+        return 1;
+    case set_repeat:
+        ZSTD_memcpy(nextCTable, prevCTable, prevCTableSize);
+        return 0;
+    case set_basic:
+        FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize), "");  /* note : could be pre-calculated */
+        return 0;
+    case set_compressed: {
+        ZSTD_BuildCTableWksp* wksp = (ZSTD_BuildCTableWksp*)entropyWorkspace;
+        size_t nbSeq_1 = nbSeq;
+        const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
+        if (count[codeTable[nbSeq-1]] > 1) {
+            count[codeTable[nbSeq-1]]--;
+            nbSeq_1--;
+        }
+        assert(nbSeq_1 > 1);
+        assert(entropyWorkspaceSize >= sizeof(ZSTD_BuildCTableWksp));
+        (void)entropyWorkspaceSize;
+        FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "");
+        {   size_t const NCountSize = FSE_writeNCount(op, oend - op, wksp->norm, max, tableLog);   /* overflow protected */
+            FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed");
+            FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), "");
+            return NCountSize;
+        }
+    }
+    default: assert(0); RETURN_ERROR(GENERIC, "impossible to reach");
+    }
+}
+
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_encodeSequences_body(
+            void* dst, size_t dstCapacity,
+            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+            seqDef const* sequences, size_t nbSeq, int longOffsets)
+{
+    BIT_CStream_t blockStream;
+    FSE_CState_t  stateMatchLength;
+    FSE_CState_t  stateOffsetBits;
+    FSE_CState_t  stateLitLength;
+
+    RETURN_ERROR_IF(
+        ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)),
+        dstSize_tooSmall, "not enough space remaining");
+    DEBUGLOG(6, "available space for bitstream : %i  (dstCapacity=%u)",
+                (int)(blockStream.endPtr - blockStream.startPtr),
+                (unsigned)dstCapacity);
+
+    /* first symbols */
+    FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
+    FSE_initCState2(&stateOffsetBits,  CTable_OffsetBits,  ofCodeTable[nbSeq-1]);
+    FSE_initCState2(&stateLitLength,   CTable_LitLength,   llCodeTable[nbSeq-1]);
+    BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
+    if (MEM_32bits()) BIT_flushBits(&blockStream);
+    BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
+    if (MEM_32bits()) BIT_flushBits(&blockStream);
+    if (longOffsets) {
+        U32 const ofBits = ofCodeTable[nbSeq-1];
+        unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
+        if (extraBits) {
+            BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
+            BIT_flushBits(&blockStream);
+        }
+        BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
+                    ofBits - extraBits);
+    } else {
+        BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
+    }
+    BIT_flushBits(&blockStream);
+
+    {   size_t n;
+        for (n=nbSeq-2 ; n<nbSeq ; n--) {      /* intentional underflow */
+            BYTE const llCode = llCodeTable[n];
+            BYTE const ofCode = ofCodeTable[n];
+            BYTE const mlCode = mlCodeTable[n];
+            U32  const llBits = LL_bits[llCode];
+            U32  const ofBits = ofCode;
+            U32  const mlBits = ML_bits[mlCode];
+            DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
+                        (unsigned)sequences[n].litLength,
+                        (unsigned)sequences[n].matchLength + MINMATCH,
+                        (unsigned)sequences[n].offset);
+                                                                            /* 32b*/  /* 64b*/
+                                                                            /* (7)*/  /* (7)*/
+            FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode);       /* 15 */  /* 15 */
+            FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode);      /* 24 */  /* 24 */
+            if (MEM_32bits()) BIT_flushBits(&blockStream);                  /* (7)*/
+            FSE_encodeSymbol(&blockStream, &stateLitLength, llCode);        /* 16 */  /* 33 */
+            if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
+                BIT_flushBits(&blockStream);                                /* (7)*/
+            BIT_addBits(&blockStream, sequences[n].litLength, llBits);
+            if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
+            BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
+            if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream);
+            if (longOffsets) {
+                unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
+                if (extraBits) {
+                    BIT_addBits(&blockStream, sequences[n].offset, extraBits);
+                    BIT_flushBits(&blockStream);                            /* (7)*/
+                }
+                BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
+                            ofBits - extraBits);                            /* 31 */
+            } else {
+                BIT_addBits(&blockStream, sequences[n].offset, ofBits);     /* 31 */
+            }
+            BIT_flushBits(&blockStream);                                    /* (7)*/
+            DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr));
+    }   }
+
+    DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog);
+    FSE_flushCState(&blockStream, &stateMatchLength);
+    DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog);
+    FSE_flushCState(&blockStream, &stateOffsetBits);
+    DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog);
+    FSE_flushCState(&blockStream, &stateLitLength);
+
+    {   size_t const streamSize = BIT_closeCStream(&blockStream);
+        RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space");
+        return streamSize;
+    }
+}
+
+static size_t
+ZSTD_encodeSequences_default(
+            void* dst, size_t dstCapacity,
+            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+            seqDef const* sequences, size_t nbSeq, int longOffsets)
+{
+    return ZSTD_encodeSequences_body(dst, dstCapacity,
+                                    CTable_MatchLength, mlCodeTable,
+                                    CTable_OffsetBits, ofCodeTable,
+                                    CTable_LitLength, llCodeTable,
+                                    sequences, nbSeq, longOffsets);
+}
+
+
+#if DYNAMIC_BMI2
+
+static TARGET_ATTRIBUTE("bmi2") size_t
+ZSTD_encodeSequences_bmi2(
+            void* dst, size_t dstCapacity,
+            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+            seqDef const* sequences, size_t nbSeq, int longOffsets)
+{
+    return ZSTD_encodeSequences_body(dst, dstCapacity,
+                                    CTable_MatchLength, mlCodeTable,
+                                    CTable_OffsetBits, ofCodeTable,
+                                    CTable_LitLength, llCodeTable,
+                                    sequences, nbSeq, longOffsets);
+}
+
+#endif
+
+size_t ZSTD_encodeSequences(
+            void* dst, size_t dstCapacity,
+            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+            seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2)
+{
+    DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity);
+#if DYNAMIC_BMI2
+    if (bmi2) {
+        return ZSTD_encodeSequences_bmi2(dst, dstCapacity,
+                                         CTable_MatchLength, mlCodeTable,
+                                         CTable_OffsetBits, ofCodeTable,
+                                         CTable_LitLength, llCodeTable,
+                                         sequences, nbSeq, longOffsets);
+    }
+#endif
+    (void)bmi2;
+    return ZSTD_encodeSequences_default(dst, dstCapacity,
+                                        CTable_MatchLength, mlCodeTable,
+                                        CTable_OffsetBits, ofCodeTable,
+                                        CTable_LitLength, llCodeTable,
+                                        sequences, nbSeq, longOffsets);
+}
+/**** ended inlining compress/zstd_compress_sequences.c ****/
+/**** start inlining compress/zstd_compress_superblock.c ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+ /*-*************************************
+ *  Dependencies
+ ***************************************/
+/**** start inlining zstd_compress_superblock.h ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_COMPRESS_ADVANCED_H
+#define ZSTD_COMPRESS_ADVANCED_H
+
+/*-*************************************
+*  Dependencies
+***************************************/
+
+/**** skipping file: ../zstd.h ****/
+
+/*-*************************************
+*  Target Compressed Block Size
+***************************************/
+
+/* ZSTD_compressSuperBlock() :
+ * Used to compress a super block when targetCBlockSize is being used.
+ * The given block will be compressed into multiple sub blocks that are around targetCBlockSize. */
+size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
+                               void* dst, size_t dstCapacity,
+                               void const* src, size_t srcSize,
+                               unsigned lastBlock);
+
+#endif /* ZSTD_COMPRESS_ADVANCED_H */
+/**** ended inlining zstd_compress_superblock.h ****/
+
+/**** skipping file: ../common/zstd_internal.h ****/
+/**** skipping file: hist.h ****/
+/**** skipping file: zstd_compress_internal.h ****/
+/**** skipping file: zstd_compress_sequences.h ****/
+/**** skipping file: zstd_compress_literals.h ****/
+
+/** ZSTD_compressSubBlock_literal() :
+ *  Compresses literals section for a sub-block.
+ *  When we have to write the Huffman table we will sometimes choose a header
+ *  size larger than necessary. This is because we have to pick the header size
+ *  before we know the table size + compressed size, so we have a bound on the
+ *  table size. If we guessed incorrectly, we fall back to uncompressed literals.
+ *
+ *  We write the header when writeEntropy=1 and set entropyWritten=1 when we succeeded
+ *  in writing the header, otherwise it is set to 0.
+ *
+ *  hufMetadata->hType has literals block type info.
+ *      If it is set_basic, all sub-blocks literals section will be Raw_Literals_Block.
+ *      If it is set_rle, all sub-blocks literals section will be RLE_Literals_Block.
+ *      If it is set_compressed, first sub-block's literals section will be Compressed_Literals_Block
+ *      If it is set_compressed, first sub-block's literals section will be Treeless_Literals_Block
+ *      and the following sub-blocks' literals sections will be Treeless_Literals_Block.
+ *  @return : compressed size of literals section of a sub-block
+ *            Or 0 if it unable to compress.
+ *            Or error code */
+static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
+                                    const ZSTD_hufCTablesMetadata_t* hufMetadata,
+                                    const BYTE* literals, size_t litSize,
+                                    void* dst, size_t dstSize,
+                                    const int bmi2, int writeEntropy, int* entropyWritten)
+{
+    size_t const header = writeEntropy ? 200 : 0;
+    size_t const lhSize = 3 + (litSize >= (1 KB - header)) + (litSize >= (16 KB - header));
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstSize;
+    BYTE* op = ostart + lhSize;
+    U32 const singleStream = lhSize == 3;
+    symbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat;
+    size_t cLitSize = 0;
+
+    (void)bmi2; /* TODO bmi2... */
+
+    DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy);
+
+    *entropyWritten = 0;
+    if (litSize == 0 || hufMetadata->hType == set_basic) {
+      DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal");
+      return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize);
+    } else if (hufMetadata->hType == set_rle) {
+      DEBUGLOG(5, "ZSTD_compressSubBlock_literal using rle literal");
+      return ZSTD_compressRleLiteralsBlock(dst, dstSize, literals, litSize);
+    }
+
+    assert(litSize > 0);
+    assert(hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat);
+
+    if (writeEntropy && hufMetadata->hType == set_compressed) {
+        ZSTD_memcpy(op, hufMetadata->hufDesBuffer, hufMetadata->hufDesSize);
+        op += hufMetadata->hufDesSize;
+        cLitSize += hufMetadata->hufDesSize;
+        DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize);
+    }
+
+    /* TODO bmi2 */
+    {   const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable)
+                                          : HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable);
+        op += cSize;
+        cLitSize += cSize;
+        if (cSize == 0 || ERR_isError(cSize)) {
+            DEBUGLOG(5, "Failed to write entropy tables %s", ZSTD_getErrorName(cSize));
+            return 0;
+        }
+        /* If we expand and we aren't writing a header then emit uncompressed */
+        if (!writeEntropy && cLitSize >= litSize) {
+            DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal because uncompressible");
+            return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize);
+        }
+        /* If we are writing headers then allow expansion that doesn't change our header size. */
+        if (lhSize < (size_t)(3 + (cLitSize >= 1 KB) + (cLitSize >= 16 KB))) {
+            assert(cLitSize > litSize);
+            DEBUGLOG(5, "Literals expanded beyond allowed header size");
+            return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize);
+        }
+        DEBUGLOG(5, "ZSTD_compressSubBlock_literal (cSize=%zu)", cSize);
+    }
+
+    /* Build header */
+    switch(lhSize)
+    {
+    case 3: /* 2 - 2 - 10 - 10 */
+        {   U32 const lhc = hType + ((!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14);
+            MEM_writeLE24(ostart, lhc);
+            break;
+        }
+    case 4: /* 2 - 2 - 14 - 14 */
+        {   U32 const lhc = hType + (2 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<18);
+            MEM_writeLE32(ostart, lhc);
+            break;
+        }
+    case 5: /* 2 - 2 - 18 - 18 */
+        {   U32 const lhc = hType + (3 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<22);
+            MEM_writeLE32(ostart, lhc);
+            ostart[4] = (BYTE)(cLitSize >> 10);
+            break;
+        }
+    default:  /* not possible : lhSize is {3,4,5} */
+        assert(0);
+    }
+    *entropyWritten = 1;
+    DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)litSize, (U32)(op-ostart));
+    return op-ostart;
+}
+
+static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef* sequences, size_t nbSeq, size_t litSize, int lastSequence) {
+    const seqDef* const sstart = sequences;
+    const seqDef* const send = sequences + nbSeq;
+    const seqDef* sp = sstart;
+    size_t matchLengthSum = 0;
+    size_t litLengthSum = 0;
+    while (send-sp > 0) {
+        ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp);
+        litLengthSum += seqLen.litLength;
+        matchLengthSum += seqLen.matchLength;
+        sp++;
+    }
+    assert(litLengthSum <= litSize);
+    if (!lastSequence) {
+        assert(litLengthSum == litSize);
+    }
+    return matchLengthSum + litSize;
+}
+
+/** ZSTD_compressSubBlock_sequences() :
+ *  Compresses sequences section for a sub-block.
+ *  fseMetadata->llType, fseMetadata->ofType, and fseMetadata->mlType have
+ *  symbol compression modes for the super-block.
+ *  The first successfully compressed block will have these in its header.
+ *  We set entropyWritten=1 when we succeed in compressing the sequences.
+ *  The following sub-blocks will always have repeat mode.
+ *  @return : compressed size of sequences section of a sub-block
+ *            Or 0 if it is unable to compress
+ *            Or error code. */
+static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
+                                              const ZSTD_fseCTablesMetadata_t* fseMetadata,
+                                              const seqDef* sequences, size_t nbSeq,
+                                              const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
+                                              const ZSTD_CCtx_params* cctxParams,
+                                              void* dst, size_t dstCapacity,
+                                              const int bmi2, int writeEntropy, int* entropyWritten)
+{
+    const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstCapacity;
+    BYTE* op = ostart;
+    BYTE* seqHead;
+
+    DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (nbSeq=%zu, writeEntropy=%d, longOffsets=%d)", nbSeq, writeEntropy, longOffsets);
+
+    *entropyWritten = 0;
+    /* Sequences Header */
+    RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,
+                    dstSize_tooSmall, "");
+    if (nbSeq < 0x7F)
+        *op++ = (BYTE)nbSeq;
+    else if (nbSeq < LONGNBSEQ)
+        op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
+    else
+        op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
+    if (nbSeq==0) {
+        return op - ostart;
+    }
+
+    /* seqHead : flags for FSE encoding type */
+    seqHead = op++;
+
+    DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (seqHeadSize=%u)", (unsigned)(op-ostart));
+
+    if (writeEntropy) {
+        const U32 LLtype = fseMetadata->llType;
+        const U32 Offtype = fseMetadata->ofType;
+        const U32 MLtype = fseMetadata->mlType;
+        DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (fseTablesSize=%zu)", fseMetadata->fseTablesSize);
+        *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
+        ZSTD_memcpy(op, fseMetadata->fseTablesBuffer, fseMetadata->fseTablesSize);
+        op += fseMetadata->fseTablesSize;
+    } else {
+        const U32 repeat = set_repeat;
+        *seqHead = (BYTE)((repeat<<6) + (repeat<<4) + (repeat<<2));
+    }
+
+    {   size_t const bitstreamSize = ZSTD_encodeSequences(
+                                        op, oend - op,
+                                        fseTables->matchlengthCTable, mlCode,
+                                        fseTables->offcodeCTable, ofCode,
+                                        fseTables->litlengthCTable, llCode,
+                                        sequences, nbSeq,
+                                        longOffsets, bmi2);
+        FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed");
+        op += bitstreamSize;
+        /* zstd versions <= 1.3.4 mistakenly report corruption when
+         * FSE_readNCount() receives a buffer < 4 bytes.
+         * Fixed by https://github.com/facebook/zstd/pull/1146.
+         * This can happen when the last set_compressed table present is 2
+         * bytes and the bitstream is only one byte.
+         * In this exceedingly rare case, we will simply emit an uncompressed
+         * block, since it isn't worth optimizing.
+         */
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+        if (writeEntropy && fseMetadata->lastCountSize && fseMetadata->lastCountSize + bitstreamSize < 4) {
+            /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */
+            assert(fseMetadata->lastCountSize + bitstreamSize == 3);
+            DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by "
+                        "emitting an uncompressed block.");
+            return 0;
+        }
+#endif
+        DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (bitstreamSize=%zu)", bitstreamSize);
+    }
+
+    /* zstd versions <= 1.4.0 mistakenly report error when
+     * sequences section body size is less than 3 bytes.
+     * Fixed by https://github.com/facebook/zstd/pull/1664.
+     * This can happen when the previous sequences section block is compressed
+     * with rle mode and the current block's sequences section is compressed
+     * with repeat mode where sequences section body size can be 1 byte.
+     */
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    if (op-seqHead < 4) {
+        DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.4.0 by emitting "
+                    "an uncompressed block when sequences are < 4 bytes");
+        return 0;
+    }
+#endif
+
+    *entropyWritten = 1;
+    return op - ostart;
+}
+
+/** ZSTD_compressSubBlock() :
+ *  Compresses a single sub-block.
+ *  @return : compressed size of the sub-block
+ *            Or 0 if it failed to compress. */
+static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
+                                    const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+                                    const seqDef* sequences, size_t nbSeq,
+                                    const BYTE* literals, size_t litSize,
+                                    const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
+                                    const ZSTD_CCtx_params* cctxParams,
+                                    void* dst, size_t dstCapacity,
+                                    const int bmi2,
+                                    int writeLitEntropy, int writeSeqEntropy,
+                                    int* litEntropyWritten, int* seqEntropyWritten,
+                                    U32 lastBlock)
+{
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstCapacity;
+    BYTE* op = ostart + ZSTD_blockHeaderSize;
+    DEBUGLOG(5, "ZSTD_compressSubBlock (litSize=%zu, nbSeq=%zu, writeLitEntropy=%d, writeSeqEntropy=%d, lastBlock=%d)",
+                litSize, nbSeq, writeLitEntropy, writeSeqEntropy, lastBlock);
+    {   size_t cLitSize = ZSTD_compressSubBlock_literal((const HUF_CElt*)entropy->huf.CTable,
+                                                        &entropyMetadata->hufMetadata, literals, litSize,
+                                                        op, oend-op, bmi2, writeLitEntropy, litEntropyWritten);
+        FORWARD_IF_ERROR(cLitSize, "ZSTD_compressSubBlock_literal failed");
+        if (cLitSize == 0) return 0;
+        op += cLitSize;
+    }
+    {   size_t cSeqSize = ZSTD_compressSubBlock_sequences(&entropy->fse,
+                                                  &entropyMetadata->fseMetadata,
+                                                  sequences, nbSeq,
+                                                  llCode, mlCode, ofCode,
+                                                  cctxParams,
+                                                  op, oend-op,
+                                                  bmi2, writeSeqEntropy, seqEntropyWritten);
+        FORWARD_IF_ERROR(cSeqSize, "ZSTD_compressSubBlock_sequences failed");
+        if (cSeqSize == 0) return 0;
+        op += cSeqSize;
+    }
+    /* Write block header */
+    {   size_t cSize = (op-ostart)-ZSTD_blockHeaderSize;
+        U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
+        MEM_writeLE24(ostart, cBlockHeader24);
+    }
+    return op-ostart;
+}
+
+static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize,
+                                                const ZSTD_hufCTables_t* huf,
+                                                const ZSTD_hufCTablesMetadata_t* hufMetadata,
+                                                void* workspace, size_t wkspSize,
+                                                int writeEntropy)
+{
+    unsigned* const countWksp = (unsigned*)workspace;
+    unsigned maxSymbolValue = 255;
+    size_t literalSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
+
+    if (hufMetadata->hType == set_basic) return litSize;
+    else if (hufMetadata->hType == set_rle) return 1;
+    else if (hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat) {
+        size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize);
+        if (ZSTD_isError(largest)) return litSize;
+        {   size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue);
+            if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize;
+            return cLitSizeEstimate + literalSectionHeaderSize;
+    }   }
+    assert(0); /* impossible */
+    return 0;
+}
+
+static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type,
+                        const BYTE* codeTable, unsigned maxCode,
+                        size_t nbSeq, const FSE_CTable* fseCTable,
+                        const U32* additionalBits,
+                        short const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
+                        void* workspace, size_t wkspSize)
+{
+    unsigned* const countWksp = (unsigned*)workspace;
+    const BYTE* ctp = codeTable;
+    const BYTE* const ctStart = ctp;
+    const BYTE* const ctEnd = ctStart + nbSeq;
+    size_t cSymbolTypeSizeEstimateInBits = 0;
+    unsigned max = maxCode;
+
+    HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize);  /* can't fail */
+    if (type == set_basic) {
+        /* We selected this encoding type, so it must be valid. */
+        assert(max <= defaultMax);
+        cSymbolTypeSizeEstimateInBits = max <= defaultMax
+                ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max)
+                : ERROR(GENERIC);
+    } else if (type == set_rle) {
+        cSymbolTypeSizeEstimateInBits = 0;
+    } else if (type == set_compressed || type == set_repeat) {
+        cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max);
+    }
+    if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) return nbSeq * 10;
+    while (ctp < ctEnd) {
+        if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp];
+        else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */
+        ctp++;
+    }
+    return cSymbolTypeSizeEstimateInBits / 8;
+}
+
+static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable,
+                                                  const BYTE* llCodeTable,
+                                                  const BYTE* mlCodeTable,
+                                                  size_t nbSeq,
+                                                  const ZSTD_fseCTables_t* fseTables,
+                                                  const ZSTD_fseCTablesMetadata_t* fseMetadata,
+                                                  void* workspace, size_t wkspSize,
+                                                  int writeEntropy)
+{
+    size_t const sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
+    size_t cSeqSizeEstimate = 0;
+    if (nbSeq == 0) return sequencesSectionHeaderSize;
+    cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff,
+                                         nbSeq, fseTables->offcodeCTable, NULL,
+                                         OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
+                                         workspace, wkspSize);
+    cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->llType, llCodeTable, MaxLL,
+                                         nbSeq, fseTables->litlengthCTable, LL_bits,
+                                         LL_defaultNorm, LL_defaultNormLog, MaxLL,
+                                         workspace, wkspSize);
+    cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, MaxML,
+                                         nbSeq, fseTables->matchlengthCTable, ML_bits,
+                                         ML_defaultNorm, ML_defaultNormLog, MaxML,
+                                         workspace, wkspSize);
+    if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize;
+    return cSeqSizeEstimate + sequencesSectionHeaderSize;
+}
+
+static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
+                                        const BYTE* ofCodeTable,
+                                        const BYTE* llCodeTable,
+                                        const BYTE* mlCodeTable,
+                                        size_t nbSeq,
+                                        const ZSTD_entropyCTables_t* entropy,
+                                        const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+                                        void* workspace, size_t wkspSize,
+                                        int writeLitEntropy, int writeSeqEntropy) {
+    size_t cSizeEstimate = 0;
+    cSizeEstimate += ZSTD_estimateSubBlockSize_literal(literals, litSize,
+                                                         &entropy->huf, &entropyMetadata->hufMetadata,
+                                                         workspace, wkspSize, writeLitEntropy);
+    cSizeEstimate += ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
+                                                         nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,
+                                                         workspace, wkspSize, writeSeqEntropy);
+    return cSizeEstimate + ZSTD_blockHeaderSize;
+}
+
+static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata)
+{
+    if (fseMetadata->llType == set_compressed || fseMetadata->llType == set_rle)
+        return 1;
+    if (fseMetadata->mlType == set_compressed || fseMetadata->mlType == set_rle)
+        return 1;
+    if (fseMetadata->ofType == set_compressed || fseMetadata->ofType == set_rle)
+        return 1;
+    return 0;
+}
+
+/** ZSTD_compressSubBlock_multi() :
+ *  Breaks super-block into multiple sub-blocks and compresses them.
+ *  Entropy will be written to the first block.
+ *  The following blocks will use repeat mode to compress.
+ *  All sub-blocks are compressed blocks (no raw or rle blocks).
+ *  @return : compressed size of the super block (which is multiple ZSTD blocks)
+ *            Or 0 if it failed to compress. */
+static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
+                            const ZSTD_compressedBlockState_t* prevCBlock,
+                            ZSTD_compressedBlockState_t* nextCBlock,
+                            const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+                            const ZSTD_CCtx_params* cctxParams,
+                                  void* dst, size_t dstCapacity,
+                            const void* src, size_t srcSize,
+                            const int bmi2, U32 lastBlock,
+                            void* workspace, size_t wkspSize)
+{
+    const seqDef* const sstart = seqStorePtr->sequencesStart;
+    const seqDef* const send = seqStorePtr->sequences;
+    const seqDef* sp = sstart;
+    const BYTE* const lstart = seqStorePtr->litStart;
+    const BYTE* const lend = seqStorePtr->lit;
+    const BYTE* lp = lstart;
+    BYTE const* ip = (BYTE const*)src;
+    BYTE const* const iend = ip + srcSize;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstCapacity;
+    BYTE* op = ostart;
+    const BYTE* llCodePtr = seqStorePtr->llCode;
+    const BYTE* mlCodePtr = seqStorePtr->mlCode;
+    const BYTE* ofCodePtr = seqStorePtr->ofCode;
+    size_t targetCBlockSize = cctxParams->targetCBlockSize;
+    size_t litSize, seqCount;
+    int writeLitEntropy = entropyMetadata->hufMetadata.hType == set_compressed;
+    int writeSeqEntropy = 1;
+    int lastSequence = 0;
+
+    DEBUGLOG(5, "ZSTD_compressSubBlock_multi (litSize=%u, nbSeq=%u)",
+                (unsigned)(lend-lp), (unsigned)(send-sstart));
+
+    litSize = 0;
+    seqCount = 0;
+    do {
+        size_t cBlockSizeEstimate = 0;
+        if (sstart == send) {
+            lastSequence = 1;
+        } else {
+            const seqDef* const sequence = sp + seqCount;
+            lastSequence = sequence == send - 1;
+            litSize += ZSTD_getSequenceLength(seqStorePtr, sequence).litLength;
+            seqCount++;
+        }
+        if (lastSequence) {
+            assert(lp <= lend);
+            assert(litSize <= (size_t)(lend - lp));
+            litSize = (size_t)(lend - lp);
+        }
+        /* I think there is an optimization opportunity here.
+         * Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful
+         * since it recalculates estimate from scratch.
+         * For example, it would recount literal distribution and symbol codes everytime.
+         */
+        cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount,
+                                                       &nextCBlock->entropy, entropyMetadata,
+                                                       workspace, wkspSize, writeLitEntropy, writeSeqEntropy);
+        if (cBlockSizeEstimate > targetCBlockSize || lastSequence) {
+            int litEntropyWritten = 0;
+            int seqEntropyWritten = 0;
+            const size_t decompressedSize = ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, lastSequence);
+            const size_t cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
+                                                       sp, seqCount,
+                                                       lp, litSize,
+                                                       llCodePtr, mlCodePtr, ofCodePtr,
+                                                       cctxParams,
+                                                       op, oend-op,
+                                                       bmi2, writeLitEntropy, writeSeqEntropy,
+                                                       &litEntropyWritten, &seqEntropyWritten,
+                                                       lastBlock && lastSequence);
+            FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
+            if (cSize > 0 && cSize < decompressedSize) {
+                DEBUGLOG(5, "Committed the sub-block");
+                assert(ip + decompressedSize <= iend);
+                ip += decompressedSize;
+                sp += seqCount;
+                lp += litSize;
+                op += cSize;
+                llCodePtr += seqCount;
+                mlCodePtr += seqCount;
+                ofCodePtr += seqCount;
+                litSize = 0;
+                seqCount = 0;
+                /* Entropy only needs to be written once */
+                if (litEntropyWritten) {
+                    writeLitEntropy = 0;
+                }
+                if (seqEntropyWritten) {
+                    writeSeqEntropy = 0;
+                }
+            }
+        }
+    } while (!lastSequence);
+    if (writeLitEntropy) {
+        DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten");
+        ZSTD_memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf));
+    }
+    if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) {
+        /* If we haven't written our entropy tables, then we've violated our contract and
+         * must emit an uncompressed block.
+         */
+        DEBUGLOG(5, "ZSTD_compressSubBlock_multi has sequence entropy tables unwritten");
+        return 0;
+    }
+    if (ip < iend) {
+        size_t const cSize = ZSTD_noCompressBlock(op, oend - op, ip, iend - ip, lastBlock);
+        DEBUGLOG(5, "ZSTD_compressSubBlock_multi last sub-block uncompressed, %zu bytes", (size_t)(iend - ip));
+        FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
+        assert(cSize != 0);
+        op += cSize;
+        /* We have to regenerate the repcodes because we've skipped some sequences */
+        if (sp < send) {
+            seqDef const* seq;
+            repcodes_t rep;
+            ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep));
+            for (seq = sstart; seq < sp; ++seq) {
+                rep = ZSTD_updateRep(rep.rep, seq->offset - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
+            }
+            ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep));
+        }
+    }
+    DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed");
+    return op-ostart;
+}
+
+size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
+                               void* dst, size_t dstCapacity,
+                               void const* src, size_t srcSize,
+                               unsigned lastBlock) {
+    ZSTD_entropyCTablesMetadata_t entropyMetadata;
+
+    FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(&zc->seqStore,
+          &zc->blockState.prevCBlock->entropy,
+          &zc->blockState.nextCBlock->entropy,
+          &zc->appliedParams,
+          &entropyMetadata,
+          zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "");
+
+    return ZSTD_compressSubBlock_multi(&zc->seqStore,
+            zc->blockState.prevCBlock,
+            zc->blockState.nextCBlock,
+            &entropyMetadata,
+            &zc->appliedParams,
+            dst, dstCapacity,
+            src, srcSize,
+            zc->bmi2, lastBlock,
+            zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */);
+}
+/**** ended inlining compress/zstd_compress_superblock.c ****/
+/**** start inlining compress/zstd_compress.c ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/*-*************************************
+*  Dependencies
+***************************************/
+/**** skipping file: ../common/zstd_deps.h ****/
+/**** start inlining ../common/cpu.h ****/
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_COMMON_CPU_H
+#define ZSTD_COMMON_CPU_H
+
+/**
+ * Implementation taken from folly/CpuId.h
+ * https://github.com/facebook/folly/blob/master/folly/CpuId.h
+ */
+
+/**** skipping file: mem.h ****/
+
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+
+typedef struct {
+    U32 f1c;
+    U32 f1d;
+    U32 f7b;
+    U32 f7c;
+} ZSTD_cpuid_t;
+
+MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
+    U32 f1c = 0;
+    U32 f1d = 0;
+    U32 f7b = 0;
+    U32 f7c = 0;
+#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
+    int reg[4];
+    __cpuid((int*)reg, 0);
+    {
+        int const n = reg[0];
+        if (n >= 1) {
+            __cpuid((int*)reg, 1);
+            f1c = (U32)reg[2];
+            f1d = (U32)reg[3];
+        }
+        if (n >= 7) {
+            __cpuidex((int*)reg, 7, 0);
+            f7b = (U32)reg[1];
+            f7c = (U32)reg[2];
+        }
+    }
+#elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__)
+    /* The following block like the normal cpuid branch below, but gcc
+     * reserves ebx for use of its pic register so we must specially
+     * handle the save and restore to avoid clobbering the register
+     */
+    U32 n;
+    __asm__(
+        "pushl %%ebx\n\t"
+        "cpuid\n\t"
+        "popl %%ebx\n\t"
+        : "=a"(n)
+        : "a"(0)
+        : "ecx", "edx");
+    if (n >= 1) {
+      U32 f1a;
+      __asm__(
+          "pushl %%ebx\n\t"
+          "cpuid\n\t"
+          "popl %%ebx\n\t"
+          : "=a"(f1a), "=c"(f1c), "=d"(f1d)
+          : "a"(1));
+    }
+    if (n >= 7) {
+      __asm__(
+          "pushl %%ebx\n\t"
+          "cpuid\n\t"
+          "movl %%ebx, %%eax\n\t"
+          "popl %%ebx"
+          : "=a"(f7b), "=c"(f7c)
+          : "a"(7), "c"(0)
+          : "edx");
+    }
+#elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__)
+    U32 n;
+    __asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx");
+    if (n >= 1) {
+      U32 f1a;
+      __asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx");
+    }
+    if (n >= 7) {
+      U32 f7a;
+      __asm__("cpuid"
+              : "=a"(f7a), "=b"(f7b), "=c"(f7c)
+              : "a"(7), "c"(0)
+              : "edx");
+    }
+#endif
+    {
+        ZSTD_cpuid_t cpuid;
+        cpuid.f1c = f1c;
+        cpuid.f1d = f1d;
+        cpuid.f7b = f7b;
+        cpuid.f7c = f7c;
+        return cpuid;
+    }
+}
+
+#define X(name, r, bit)                                                        \
+  MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) {                 \
+    return ((cpuid.r) & (1U << bit)) != 0;                                     \
+  }
+
+/* cpuid(1): Processor Info and Feature Bits. */
+#define C(name, bit) X(name, f1c, bit)
+  C(sse3, 0)
+  C(pclmuldq, 1)
+  C(dtes64, 2)
+  C(monitor, 3)
+  C(dscpl, 4)
+  C(vmx, 5)
+  C(smx, 6)
+  C(eist, 7)
+  C(tm2, 8)
+  C(ssse3, 9)
+  C(cnxtid, 10)
+  C(fma, 12)
+  C(cx16, 13)
+  C(xtpr, 14)
+  C(pdcm, 15)
+  C(pcid, 17)
+  C(dca, 18)
+  C(sse41, 19)
+  C(sse42, 20)
+  C(x2apic, 21)
+  C(movbe, 22)
+  C(popcnt, 23)
+  C(tscdeadline, 24)
+  C(aes, 25)
+  C(xsave, 26)
+  C(osxsave, 27)
+  C(avx, 28)
+  C(f16c, 29)
+  C(rdrand, 30)
+#undef C
+#define D(name, bit) X(name, f1d, bit)
+  D(fpu, 0)
+  D(vme, 1)
+  D(de, 2)
+  D(pse, 3)
+  D(tsc, 4)
+  D(msr, 5)
+  D(pae, 6)
+  D(mce, 7)
+  D(cx8, 8)
+  D(apic, 9)
+  D(sep, 11)
+  D(mtrr, 12)
+  D(pge, 13)
+  D(mca, 14)
+  D(cmov, 15)
+  D(pat, 16)
+  D(pse36, 17)
+  D(psn, 18)
+  D(clfsh, 19)
+  D(ds, 21)
+  D(acpi, 22)
+  D(mmx, 23)
+  D(fxsr, 24)
+  D(sse, 25)
+  D(sse2, 26)
+  D(ss, 27)
+  D(htt, 28)
+  D(tm, 29)
+  D(pbe, 31)
+#undef D
+
+/* cpuid(7): Extended Features. */
+#define B(name, bit) X(name, f7b, bit)
+  B(bmi1, 3)
+  B(hle, 4)
+  B(avx2, 5)
+  B(smep, 7)
+  B(bmi2, 8)
+  B(erms, 9)
+  B(invpcid, 10)
+  B(rtm, 11)
+  B(mpx, 14)
+  B(avx512f, 16)
+  B(avx512dq, 17)
+  B(rdseed, 18)
+  B(adx, 19)
+  B(smap, 20)
+  B(avx512ifma, 21)
+  B(pcommit, 22)
+  B(clflushopt, 23)
+  B(clwb, 24)
+  B(avx512pf, 26)
+  B(avx512er, 27)
+  B(avx512cd, 28)
+  B(sha, 29)
+  B(avx512bw, 30)
+  B(avx512vl, 31)
+#undef B
+#define C(name, bit) X(name, f7c, bit)
+  C(prefetchwt1, 0)
+  C(avx512vbmi, 1)
+#undef C
+
+#undef X
+
+#endif /* ZSTD_COMMON_CPU_H */
+/**** ended inlining ../common/cpu.h ****/
+/**** skipping file: ../common/mem.h ****/
+/**** skipping file: hist.h ****/
+#define FSE_STATIC_LINKING_ONLY   /* FSE_encodeSymbol */
+/**** skipping file: ../common/fse.h ****/
+#define HUF_STATIC_LINKING_ONLY
+/**** skipping file: ../common/huf.h ****/
+/**** skipping file: zstd_compress_internal.h ****/
+/**** skipping file: zstd_compress_sequences.h ****/
+/**** skipping file: zstd_compress_literals.h ****/
+/**** start inlining zstd_fast.h ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_FAST_H
+#define ZSTD_FAST_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/**** skipping file: ../common/mem.h ****/
+/**** skipping file: zstd_compress_internal.h ****/
+
+void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
+                        void const* end, ZSTD_dictTableLoadMethod_e dtlm);
+size_t ZSTD_compressBlock_fast(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_fast_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_fast_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ZSTD_FAST_H */
+/**** ended inlining zstd_fast.h ****/
+/**** start inlining zstd_double_fast.h ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_DOUBLE_FAST_H
+#define ZSTD_DOUBLE_FAST_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/**** skipping file: ../common/mem.h ****/
+/**** skipping file: zstd_compress_internal.h ****/
+
+void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
+                              void const* end, ZSTD_dictTableLoadMethod_e dtlm);
+size_t ZSTD_compressBlock_doubleFast(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_doubleFast_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_doubleFast_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ZSTD_DOUBLE_FAST_H */
+/**** ended inlining zstd_double_fast.h ****/
+/**** start inlining zstd_lazy.h ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_LAZY_H
+#define ZSTD_LAZY_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/**** skipping file: zstd_compress_internal.h ****/
+
+/**
+ * Dedicated Dictionary Search Structure bucket log. In the
+ * ZSTD_dedicatedDictSearch mode, the hashTable has
+ * 2 ** ZSTD_LAZY_DDSS_BUCKET_LOG entries in each bucket, rather than just
+ * one.
+ */
+#define ZSTD_LAZY_DDSS_BUCKET_LOG 2
+
+U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
+void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip);
+
+void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip);
+
+void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue);  /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */
+
+size_t ZSTD_compressBlock_btlazy2(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy2(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_greedy(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy2_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_greedy_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
+size_t ZSTD_compressBlock_btlazy2_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy2_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_greedy_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy_dictMatchState_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_greedy_dictMatchState_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
+size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
+size_t ZSTD_compressBlock_greedy_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy2_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_greedy_extDict_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy_extDict_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy2_extDict_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_btlazy2_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+        
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ZSTD_LAZY_H */
+/**** ended inlining zstd_lazy.h ****/
+/**** start inlining zstd_opt.h ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_OPT_H
+#define ZSTD_OPT_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/**** skipping file: zstd_compress_internal.h ****/
+
+/* used in ZSTD_loadDictionaryContent() */
+void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend);
+
+size_t ZSTD_compressBlock_btopt(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_btultra(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_btultra2(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
+
+size_t ZSTD_compressBlock_btopt_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_btultra_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
+size_t ZSTD_compressBlock_btopt_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_btultra_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
+        /* note : no btultra2 variant for extDict nor dictMatchState,
+         * because btultra2 is not meant to work with dictionaries
+         * and is only specific for the first block (no prefix) */
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ZSTD_OPT_H */
+/**** ended inlining zstd_opt.h ****/
+/**** start inlining zstd_ldm.h ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_LDM_H
+#define ZSTD_LDM_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/**** skipping file: zstd_compress_internal.h ****/
+/**** skipping file: ../zstd.h ****/
+
+/*-*************************************
+*  Long distance matching
+***************************************/
+
+#define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_LIMIT_DEFAULT
+
+void ZSTD_ldm_fillHashTable(
+            ldmState_t* state, const BYTE* ip,
+            const BYTE* iend, ldmParams_t const* params);
+
+/**
+ * ZSTD_ldm_generateSequences():
+ *
+ * Generates the sequences using the long distance match finder.
+ * Generates long range matching sequences in `sequences`, which parse a prefix
+ * of the source. `sequences` must be large enough to store every sequence,
+ * which can be checked with `ZSTD_ldm_getMaxNbSeq()`.
+ * @returns 0 or an error code.
+ *
+ * NOTE: The user must have called ZSTD_window_update() for all of the input
+ * they have, even if they pass it to ZSTD_ldm_generateSequences() in chunks.
+ * NOTE: This function returns an error if it runs out of space to store
+ *       sequences.
+ */
+size_t ZSTD_ldm_generateSequences(
+            ldmState_t* ldms, rawSeqStore_t* sequences,
+            ldmParams_t const* params, void const* src, size_t srcSize);
+
+/**
+ * ZSTD_ldm_blockCompress():
+ *
+ * Compresses a block using the predefined sequences, along with a secondary
+ * block compressor. The literals section of every sequence is passed to the
+ * secondary block compressor, and those sequences are interspersed with the
+ * predefined sequences. Returns the length of the last literals.
+ * Updates `rawSeqStore.pos` to indicate how many sequences have been consumed.
+ * `rawSeqStore.seq` may also be updated to split the last sequence between two
+ * blocks.
+ * @return The length of the last literals.
+ *
+ * NOTE: The source must be at most the maximum block size, but the predefined
+ * sequences can be any size, and may be longer than the block. In the case that
+ * they are longer than the block, the last sequences may need to be split into
+ * two. We handle that case correctly, and update `rawSeqStore` appropriately.
+ * NOTE: This function does not return any errors.
+ */
+size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
+            ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+            ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
+            void const* src, size_t srcSize);
+
+/**
+ * ZSTD_ldm_skipSequences():
+ *
+ * Skip past `srcSize` bytes worth of sequences in `rawSeqStore`.
+ * Avoids emitting matches less than `minMatch` bytes.
+ * Must be called for data that is not passed to ZSTD_ldm_blockCompress().
+ */
+void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize,
+    U32 const minMatch);
+
+/* ZSTD_ldm_skipRawSeqStoreBytes():
+ * Moves forward in rawSeqStore by nbBytes, updating fields 'pos' and 'posInSequence'.
+ * Not to be used in conjunction with ZSTD_ldm_skipSequences().
+ * Must be called for data with is not passed to ZSTD_ldm_blockCompress().
+ */
+void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes);
+
+/** ZSTD_ldm_getTableSize() :
+ *  Estimate the space needed for long distance matching tables or 0 if LDM is
+ *  disabled.
+ */
+size_t ZSTD_ldm_getTableSize(ldmParams_t params);
+
+/** ZSTD_ldm_getSeqSpace() :
+ *  Return an upper bound on the number of sequences that can be produced by
+ *  the long distance matcher, or 0 if LDM is disabled.
+ */
+size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize);
+
+/** ZSTD_ldm_adjustParameters() :
+ *  If the params->hashRateLog is not set, set it to its default value based on
+ *  windowLog and params->hashLog.
+ *
+ *  Ensures that params->bucketSizeLog is <= params->hashLog (setting it to
+ *  params->hashLog if it is not).
+ *
+ *  Ensures that the minMatchLength >= targetLength during optimal parsing.
+ */
+void ZSTD_ldm_adjustParameters(ldmParams_t* params,
+                               ZSTD_compressionParameters const* cParams);
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ZSTD_FAST_H */
+/**** ended inlining zstd_ldm.h ****/
+/**** skipping file: zstd_compress_superblock.h ****/
+
+/* ***************************************************************
+*  Tuning parameters
+*****************************************************************/
+/*!
+ * COMPRESS_HEAPMODE :
+ * Select how default decompression function ZSTD_compress() allocates its context,
+ * on stack (0, default), or into heap (1).
+ * Note that functions with explicit context such as ZSTD_compressCCtx() are unaffected.
+ */
+#ifndef ZSTD_COMPRESS_HEAPMODE
+#  define ZSTD_COMPRESS_HEAPMODE 0
+#endif
+
+
+/*-*************************************
+*  Helper functions
+***************************************/
+/* ZSTD_compressBound()
+ * Note that the result from this function is only compatible with the "normal"
+ * full-block strategy.
+ * When there are a lot of small blocks due to frequent flush in streaming mode
+ * the overhead of headers can make the compressed data to be larger than the
+ * return value of ZSTD_compressBound().
+ */
+size_t ZSTD_compressBound(size_t srcSize) {
+    return ZSTD_COMPRESSBOUND(srcSize);
+}
+
+
+/*-*************************************
+*  Context memory management
+***************************************/
+struct ZSTD_CDict_s {
+    const void* dictContent;
+    size_t dictContentSize;
+    ZSTD_dictContentType_e dictContentType; /* The dictContentType the CDict was created with */
+    U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */
+    ZSTD_cwksp workspace;
+    ZSTD_matchState_t matchState;
+    ZSTD_compressedBlockState_t cBlockState;
+    ZSTD_customMem customMem;
+    U32 dictID;
+    int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */
+    ZSTD_useRowMatchFinderMode_e useRowMatchFinder; /* Indicates whether the CDict was created with params that would use
+                                                     * row-based matchfinder. Unless the cdict is reloaded, we will use
+                                                     * the same greedy/lazy matchfinder at compression time.
+                                                     */
+};  /* typedef'd to ZSTD_CDict within "zstd.h" */
+
+ZSTD_CCtx* ZSTD_createCCtx(void)
+{
+    return ZSTD_createCCtx_advanced(ZSTD_defaultCMem);
+}
+
+static void ZSTD_initCCtx(ZSTD_CCtx* cctx, ZSTD_customMem memManager)
+{
+    assert(cctx != NULL);
+    ZSTD_memset(cctx, 0, sizeof(*cctx));
+    cctx->customMem = memManager;
+    cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
+    {   size_t const err = ZSTD_CCtx_reset(cctx, ZSTD_reset_parameters);
+        assert(!ZSTD_isError(err));
+        (void)err;
+    }
+}
+
+ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem)
+{
+    ZSTD_STATIC_ASSERT(zcss_init==0);
+    ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1));
+    if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
+    {   ZSTD_CCtx* const cctx = (ZSTD_CCtx*)ZSTD_customMalloc(sizeof(ZSTD_CCtx), customMem);
+        if (!cctx) return NULL;
+        ZSTD_initCCtx(cctx, customMem);
+        return cctx;
+    }
+}
+
+ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize)
+{
+    ZSTD_cwksp ws;
+    ZSTD_CCtx* cctx;
+    if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL;  /* minimum size */
+    if ((size_t)workspace & 7) return NULL;  /* must be 8-aligned */
+    ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_static_alloc);
+
+    cctx = (ZSTD_CCtx*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CCtx));
+    if (cctx == NULL) return NULL;
+
+    ZSTD_memset(cctx, 0, sizeof(ZSTD_CCtx));
+    ZSTD_cwksp_move(&cctx->workspace, &ws);
+    cctx->staticSize = workspaceSize;
+
+    /* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */
+    if (!ZSTD_cwksp_check_available(&cctx->workspace, ENTROPY_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL;
+    cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t));
+    cctx->blockState.nextCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t));
+    cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cctx->workspace, ENTROPY_WORKSPACE_SIZE);
+    cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
+    return cctx;
+}
+
+/**
+ * Clears and frees all of the dictionaries in the CCtx.
+ */
+static void ZSTD_clearAllDicts(ZSTD_CCtx* cctx)
+{
+    ZSTD_customFree(cctx->localDict.dictBuffer, cctx->customMem);
+    ZSTD_freeCDict(cctx->localDict.cdict);
+    ZSTD_memset(&cctx->localDict, 0, sizeof(cctx->localDict));
+    ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict));
+    cctx->cdict = NULL;
+}
+
+static size_t ZSTD_sizeof_localDict(ZSTD_localDict dict)
+{
+    size_t const bufferSize = dict.dictBuffer != NULL ? dict.dictSize : 0;
+    size_t const cdictSize = ZSTD_sizeof_CDict(dict.cdict);
+    return bufferSize + cdictSize;
+}
+
+static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx)
+{
+    assert(cctx != NULL);
+    assert(cctx->staticSize == 0);
+    ZSTD_clearAllDicts(cctx);
+#ifdef ZSTD_MULTITHREAD
+    ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = NULL;
+#endif
+    ZSTD_cwksp_free(&cctx->workspace, cctx->customMem);
+}
+
+size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
+{
+    if (cctx==NULL) return 0;   /* support free on NULL */
+    RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
+                    "not compatible with static CCtx");
+    {
+        int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx);
+        ZSTD_freeCCtxContent(cctx);
+        if (!cctxInWorkspace) {
+            ZSTD_customFree(cctx, cctx->customMem);
+        }
+    }
+    return 0;
+}
+
+
+static size_t ZSTD_sizeof_mtctx(const ZSTD_CCtx* cctx)
+{
+#ifdef ZSTD_MULTITHREAD
+    return ZSTDMT_sizeof_CCtx(cctx->mtctx);
+#else
+    (void)cctx;
+    return 0;
+#endif
+}
+
+
+size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx)
+{
+    if (cctx==NULL) return 0;   /* support sizeof on NULL */
+    /* cctx may be in the workspace */
+    return (cctx->workspace.workspace == cctx ? 0 : sizeof(*cctx))
+           + ZSTD_cwksp_sizeof(&cctx->workspace)
+           + ZSTD_sizeof_localDict(cctx->localDict)
+           + ZSTD_sizeof_mtctx(cctx);
+}
+
+size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs)
+{
+    return ZSTD_sizeof_CCtx(zcs);  /* same object */
+}
+
+/* private API call, for dictBuilder only */
+const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); }
+
+/* Returns true if the strategy supports using a row based matchfinder */
+static int ZSTD_rowMatchFinderSupported(const ZSTD_strategy strategy) {
+    return (strategy >= ZSTD_greedy && strategy <= ZSTD_lazy2);
+}
+
+/* Returns true if the strategy and useRowMatchFinder mode indicate that we will use the row based matchfinder
+ * for this compression.
+ */
+static int ZSTD_rowMatchFinderUsed(const ZSTD_strategy strategy, const ZSTD_useRowMatchFinderMode_e mode) {
+    assert(mode != ZSTD_urm_auto);
+    return ZSTD_rowMatchFinderSupported(strategy) && (mode == ZSTD_urm_enableRowMatchFinder);
+}
+
+/* Returns row matchfinder usage enum given an initial mode and cParams */
+static ZSTD_useRowMatchFinderMode_e ZSTD_resolveRowMatchFinderMode(ZSTD_useRowMatchFinderMode_e mode,
+                                                                   const ZSTD_compressionParameters* const cParams) {
+#if !defined(ZSTD_NO_INTRINSICS) && (defined(__SSE2__) || defined(__ARM_NEON))
+    int const kHasSIMD128 = 1;
+#else
+    int const kHasSIMD128 = 0;
+#endif
+    if (mode != ZSTD_urm_auto) return mode; /* if requested enabled, but no SIMD, we still will use row matchfinder */
+    mode = ZSTD_urm_disableRowMatchFinder;
+    if (!ZSTD_rowMatchFinderSupported(cParams->strategy)) return mode;
+    if (kHasSIMD128) {
+        if (cParams->windowLog > 14) mode = ZSTD_urm_enableRowMatchFinder;
+    } else {
+        if (cParams->windowLog > 17) mode = ZSTD_urm_enableRowMatchFinder;
+    }
+    return mode;
+}
+
+/* Returns 1 if the arguments indicate that we should allocate a chainTable, 0 otherwise */
+static int ZSTD_allocateChainTable(const ZSTD_strategy strategy,
+                                   const ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
+                                   const U32 forDDSDict) {
+    assert(useRowMatchFinder != ZSTD_urm_auto);
+    /* We always should allocate a chaintable if we are allocating a matchstate for a DDS dictionary matchstate.
+     * We do not allocate a chaintable if we are using ZSTD_fast, or are using the row-based matchfinder.
+     */
+    return forDDSDict || ((strategy != ZSTD_fast) && !ZSTD_rowMatchFinderUsed(strategy, useRowMatchFinder));
+}
+
+/* Returns 1 if compression parameters are such that we should
+ * enable long distance matching (wlog >= 27, strategy >= btopt).
+ * Returns 0 otherwise.
+ */
+static U32 ZSTD_CParams_shouldEnableLdm(const ZSTD_compressionParameters* const cParams) {
+    return cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27;
+}
+
+/* Returns 1 if compression parameters are such that we should
+ * enable blockSplitter (wlog >= 17, strategy >= btopt).
+ * Returns 0 otherwise.
+ */
+static U32 ZSTD_CParams_useBlockSplitter(const ZSTD_compressionParameters* const cParams) {
+    return cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 17;
+}
+
+static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
+        ZSTD_compressionParameters cParams)
+{
+    ZSTD_CCtx_params cctxParams;
+    /* should not matter, as all cParams are presumed properly defined */
+    ZSTD_CCtxParams_init(&cctxParams, ZSTD_CLEVEL_DEFAULT);
+    cctxParams.cParams = cParams;
+
+    /* Adjust advanced params according to cParams */
+    if (ZSTD_CParams_shouldEnableLdm(&cParams)) {
+        DEBUGLOG(4, "ZSTD_makeCCtxParamsFromCParams(): Including LDM into cctx params");
+        cctxParams.ldmParams.enableLdm = 1;
+        /* LDM is enabled by default for optimal parser and window size >= 128MB */
+        ZSTD_ldm_adjustParameters(&cctxParams.ldmParams, &cParams);
+        assert(cctxParams.ldmParams.hashLog >= cctxParams.ldmParams.bucketSizeLog);
+        assert(cctxParams.ldmParams.hashRateLog < 32);
+    }
+
+    if (ZSTD_CParams_useBlockSplitter(&cParams)) {
+        DEBUGLOG(4, "ZSTD_makeCCtxParamsFromCParams(): Including block splitting into cctx params");
+        cctxParams.splitBlocks = 1;
+    }
+
+    cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams);
+    assert(!ZSTD_checkCParams(cParams));
+    return cctxParams;
+}
+
+static ZSTD_CCtx_params* ZSTD_createCCtxParams_advanced(
+        ZSTD_customMem customMem)
+{
+    ZSTD_CCtx_params* params;
+    if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
+    params = (ZSTD_CCtx_params*)ZSTD_customCalloc(
+            sizeof(ZSTD_CCtx_params), customMem);
+    if (!params) { return NULL; }
+    ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT);
+    params->customMem = customMem;
+    return params;
+}
+
+ZSTD_CCtx_params* ZSTD_createCCtxParams(void)
+{
+    return ZSTD_createCCtxParams_advanced(ZSTD_defaultCMem);
+}
+
+size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params)
+{
+    if (params == NULL) { return 0; }
+    ZSTD_customFree(params, params->customMem);
+    return 0;
+}
+
+size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params)
+{
+    return ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT);
+}
+
+size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) {
+    RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!");
+    ZSTD_memset(cctxParams, 0, sizeof(*cctxParams));
+    cctxParams->compressionLevel = compressionLevel;
+    cctxParams->fParams.contentSizeFlag = 1;
+    return 0;
+}
+
+#define ZSTD_NO_CLEVEL 0
+
+/**
+ * Initializes the cctxParams from params and compressionLevel.
+ * @param compressionLevel If params are derived from a compression level then that compression level, otherwise ZSTD_NO_CLEVEL.
+ */
+static void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, ZSTD_parameters const* params, int compressionLevel)
+{
+    assert(!ZSTD_checkCParams(params->cParams));
+    ZSTD_memset(cctxParams, 0, sizeof(*cctxParams));
+    cctxParams->cParams = params->cParams;
+    cctxParams->fParams = params->fParams;
+    /* Should not matter, as all cParams are presumed properly defined.
+     * But, set it for tracing anyway.
+     */
+    cctxParams->compressionLevel = compressionLevel;
+    cctxParams->useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams->useRowMatchFinder, &params->cParams);
+    DEBUGLOG(4, "ZSTD_CCtxParams_init_internal: useRowMatchFinder=%d", cctxParams->useRowMatchFinder);
+}
+
+size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params)
+{
+    RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!");
+    FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , "");
+    ZSTD_CCtxParams_init_internal(cctxParams, &params, ZSTD_NO_CLEVEL);
+    return 0;
+}
+
+/**
+ * Sets cctxParams' cParams and fParams from params, but otherwise leaves them alone.
+ * @param param Validated zstd parameters.
+ */
+static void ZSTD_CCtxParams_setZstdParams(
+        ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params)
+{
+    assert(!ZSTD_checkCParams(params->cParams));
+    cctxParams->cParams = params->cParams;
+    cctxParams->fParams = params->fParams;
+    /* Should not matter, as all cParams are presumed properly defined.
+     * But, set it for tracing anyway.
+     */
+    cctxParams->compressionLevel = ZSTD_NO_CLEVEL;
+}
+
+ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
+{
+    ZSTD_bounds bounds = { 0, 0, 0 };
+
+    switch(param)
+    {
+    case ZSTD_c_compressionLevel:
+        bounds.lowerBound = ZSTD_minCLevel();
+        bounds.upperBound = ZSTD_maxCLevel();
+        return bounds;
+
+    case ZSTD_c_windowLog:
+        bounds.lowerBound = ZSTD_WINDOWLOG_MIN;
+        bounds.upperBound = ZSTD_WINDOWLOG_MAX;
+        return bounds;
+
+    case ZSTD_c_hashLog:
+        bounds.lowerBound = ZSTD_HASHLOG_MIN;
+        bounds.upperBound = ZSTD_HASHLOG_MAX;
+        return bounds;
+
+    case ZSTD_c_chainLog:
+        bounds.lowerBound = ZSTD_CHAINLOG_MIN;
+        bounds.upperBound = ZSTD_CHAINLOG_MAX;
+        return bounds;
+
+    case ZSTD_c_searchLog:
+        bounds.lowerBound = ZSTD_SEARCHLOG_MIN;
+        bounds.upperBound = ZSTD_SEARCHLOG_MAX;
+        return bounds;
+
+    case ZSTD_c_minMatch:
+        bounds.lowerBound = ZSTD_MINMATCH_MIN;
+        bounds.upperBound = ZSTD_MINMATCH_MAX;
+        return bounds;
+
+    case ZSTD_c_targetLength:
+        bounds.lowerBound = ZSTD_TARGETLENGTH_MIN;
+        bounds.upperBound = ZSTD_TARGETLENGTH_MAX;
+        return bounds;
+
+    case ZSTD_c_strategy:
+        bounds.lowerBound = ZSTD_STRATEGY_MIN;
+        bounds.upperBound = ZSTD_STRATEGY_MAX;
+        return bounds;
+
+    case ZSTD_c_contentSizeFlag:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
+    case ZSTD_c_checksumFlag:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
+    case ZSTD_c_dictIDFlag:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
+    case ZSTD_c_nbWorkers:
+        bounds.lowerBound = 0;
+#ifdef ZSTD_MULTITHREAD
+        bounds.upperBound = ZSTDMT_NBWORKERS_MAX;
+#else
+        bounds.upperBound = 0;
+#endif
+        return bounds;
+
+    case ZSTD_c_jobSize:
+        bounds.lowerBound = 0;
+#ifdef ZSTD_MULTITHREAD
+        bounds.upperBound = ZSTDMT_JOBSIZE_MAX;
+#else
+        bounds.upperBound = 0;
+#endif
+        return bounds;
+
+    case ZSTD_c_overlapLog:
+#ifdef ZSTD_MULTITHREAD
+        bounds.lowerBound = ZSTD_OVERLAPLOG_MIN;
+        bounds.upperBound = ZSTD_OVERLAPLOG_MAX;
+#else
+        bounds.lowerBound = 0;
+        bounds.upperBound = 0;
+#endif
+        return bounds;
+
+    case ZSTD_c_enableDedicatedDictSearch:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
+    case ZSTD_c_enableLongDistanceMatching:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
+    case ZSTD_c_ldmHashLog:
+        bounds.lowerBound = ZSTD_LDM_HASHLOG_MIN;
+        bounds.upperBound = ZSTD_LDM_HASHLOG_MAX;
+        return bounds;
+
+    case ZSTD_c_ldmMinMatch:
+        bounds.lowerBound = ZSTD_LDM_MINMATCH_MIN;
+        bounds.upperBound = ZSTD_LDM_MINMATCH_MAX;
+        return bounds;
+
+    case ZSTD_c_ldmBucketSizeLog:
+        bounds.lowerBound = ZSTD_LDM_BUCKETSIZELOG_MIN;
+        bounds.upperBound = ZSTD_LDM_BUCKETSIZELOG_MAX;
+        return bounds;
+
+    case ZSTD_c_ldmHashRateLog:
+        bounds.lowerBound = ZSTD_LDM_HASHRATELOG_MIN;
+        bounds.upperBound = ZSTD_LDM_HASHRATELOG_MAX;
+        return bounds;
+
+    /* experimental parameters */
+    case ZSTD_c_rsyncable:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
+    case ZSTD_c_forceMaxWindow :
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
+    case ZSTD_c_format:
+        ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless);
+        bounds.lowerBound = ZSTD_f_zstd1;
+        bounds.upperBound = ZSTD_f_zstd1_magicless;   /* note : how to ensure at compile time that this is the highest value enum ? */
+        return bounds;
+
+    case ZSTD_c_forceAttachDict:
+        ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceLoad);
+        bounds.lowerBound = ZSTD_dictDefaultAttach;
+        bounds.upperBound = ZSTD_dictForceLoad;       /* note : how to ensure at compile time that this is the highest value enum ? */
+        return bounds;
+
+    case ZSTD_c_literalCompressionMode:
+        ZSTD_STATIC_ASSERT(ZSTD_lcm_auto < ZSTD_lcm_huffman && ZSTD_lcm_huffman < ZSTD_lcm_uncompressed);
+        bounds.lowerBound = ZSTD_lcm_auto;
+        bounds.upperBound = ZSTD_lcm_uncompressed;
+        return bounds;
+
+    case ZSTD_c_targetCBlockSize:
+        bounds.lowerBound = ZSTD_TARGETCBLOCKSIZE_MIN;
+        bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX;
+        return bounds;
+
+    case ZSTD_c_srcSizeHint:
+        bounds.lowerBound = ZSTD_SRCSIZEHINT_MIN;
+        bounds.upperBound = ZSTD_SRCSIZEHINT_MAX;
+        return bounds;
+
+    case ZSTD_c_stableInBuffer:
+    case ZSTD_c_stableOutBuffer:
+        bounds.lowerBound = (int)ZSTD_bm_buffered;
+        bounds.upperBound = (int)ZSTD_bm_stable;
+        return bounds;
+
+    case ZSTD_c_blockDelimiters:
+        bounds.lowerBound = (int)ZSTD_sf_noBlockDelimiters;
+        bounds.upperBound = (int)ZSTD_sf_explicitBlockDelimiters;
+        return bounds;
+
+    case ZSTD_c_validateSequences:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
+    case ZSTD_c_splitBlocks:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
+    case ZSTD_c_useRowMatchFinder:
+        bounds.lowerBound = (int)ZSTD_urm_auto;
+        bounds.upperBound = (int)ZSTD_urm_enableRowMatchFinder;
+        return bounds;
+
+    case ZSTD_c_deterministicRefPrefix:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
+    default:
+        bounds.error = ERROR(parameter_unsupported);
+        return bounds;
+    }
+}
+
+/* ZSTD_cParam_clampBounds:
+ * Clamps the value into the bounded range.
+ */
+static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam, int* value)
+{
+    ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
+    if (ZSTD_isError(bounds.error)) return bounds.error;
+    if (*value < bounds.lowerBound) *value = bounds.lowerBound;
+    if (*value > bounds.upperBound) *value = bounds.upperBound;
+    return 0;
+}
+
+#define BOUNDCHECK(cParam, val) { \
+    RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \
+                    parameter_outOfBound, "Param out of bounds"); \
+}
+
+
+static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
+{
+    switch(param)
+    {
+    case ZSTD_c_compressionLevel:
+    case ZSTD_c_hashLog:
+    case ZSTD_c_chainLog:
+    case ZSTD_c_searchLog:
+    case ZSTD_c_minMatch:
+    case ZSTD_c_targetLength:
+    case ZSTD_c_strategy:
+        return 1;
+
+    case ZSTD_c_format:
+    case ZSTD_c_windowLog:
+    case ZSTD_c_contentSizeFlag:
+    case ZSTD_c_checksumFlag:
+    case ZSTD_c_dictIDFlag:
+    case ZSTD_c_forceMaxWindow :
+    case ZSTD_c_nbWorkers:
+    case ZSTD_c_jobSize:
+    case ZSTD_c_overlapLog:
+    case ZSTD_c_rsyncable:
+    case ZSTD_c_enableDedicatedDictSearch:
+    case ZSTD_c_enableLongDistanceMatching:
+    case ZSTD_c_ldmHashLog:
+    case ZSTD_c_ldmMinMatch:
+    case ZSTD_c_ldmBucketSizeLog:
+    case ZSTD_c_ldmHashRateLog:
+    case ZSTD_c_forceAttachDict:
+    case ZSTD_c_literalCompressionMode:
+    case ZSTD_c_targetCBlockSize:
+    case ZSTD_c_srcSizeHint:
+    case ZSTD_c_stableInBuffer:
+    case ZSTD_c_stableOutBuffer:
+    case ZSTD_c_blockDelimiters:
+    case ZSTD_c_validateSequences:
+    case ZSTD_c_splitBlocks:
+    case ZSTD_c_useRowMatchFinder:
+    case ZSTD_c_deterministicRefPrefix:
+    default:
+        return 0;
+    }
+}
+
+size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
+{
+    DEBUGLOG(4, "ZSTD_CCtx_setParameter (%i, %i)", (int)param, value);
+    if (cctx->streamStage != zcss_init) {
+        if (ZSTD_isUpdateAuthorized(param)) {
+            cctx->cParamsChanged = 1;
+        } else {
+            RETURN_ERROR(stage_wrong, "can only set params in ctx init stage");
+    }   }
+
+    switch(param)
+    {
+    case ZSTD_c_nbWorkers:
+        RETURN_ERROR_IF((value!=0) && cctx->staticSize, parameter_unsupported,
+                        "MT not compatible with static alloc");
+        break;
+
+    case ZSTD_c_compressionLevel:
+    case ZSTD_c_windowLog:
+    case ZSTD_c_hashLog:
+    case ZSTD_c_chainLog:
+    case ZSTD_c_searchLog:
+    case ZSTD_c_minMatch:
+    case ZSTD_c_targetLength:
+    case ZSTD_c_strategy:
+    case ZSTD_c_ldmHashRateLog:
+    case ZSTD_c_format:
+    case ZSTD_c_contentSizeFlag:
+    case ZSTD_c_checksumFlag:
+    case ZSTD_c_dictIDFlag:
+    case ZSTD_c_forceMaxWindow:
+    case ZSTD_c_forceAttachDict:
+    case ZSTD_c_literalCompressionMode:
+    case ZSTD_c_jobSize:
+    case ZSTD_c_overlapLog:
+    case ZSTD_c_rsyncable:
+    case ZSTD_c_enableDedicatedDictSearch:
+    case ZSTD_c_enableLongDistanceMatching:
+    case ZSTD_c_ldmHashLog:
+    case ZSTD_c_ldmMinMatch:
+    case ZSTD_c_ldmBucketSizeLog:
+    case ZSTD_c_targetCBlockSize:
+    case ZSTD_c_srcSizeHint:
+    case ZSTD_c_stableInBuffer:
+    case ZSTD_c_stableOutBuffer:
+    case ZSTD_c_blockDelimiters:
+    case ZSTD_c_validateSequences:
+    case ZSTD_c_splitBlocks:
+    case ZSTD_c_useRowMatchFinder:
+    case ZSTD_c_deterministicRefPrefix:
+        break;
+
+    default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
+    }
+    return ZSTD_CCtxParams_setParameter(&cctx->requestedParams, param, value);
+}
+
+size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
+                                    ZSTD_cParameter param, int value)
+{
+    DEBUGLOG(4, "ZSTD_CCtxParams_setParameter (%i, %i)", (int)param, value);
+    switch(param)
+    {
+    case ZSTD_c_format :
+        BOUNDCHECK(ZSTD_c_format, value);
+        CCtxParams->format = (ZSTD_format_e)value;
+        return (size_t)CCtxParams->format;
+
+    case ZSTD_c_compressionLevel : {
+        FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), "");
+        if (value == 0)
+            CCtxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* 0 == default */
+        else
+            CCtxParams->compressionLevel = value;
+        if (CCtxParams->compressionLevel >= 0) return (size_t)CCtxParams->compressionLevel;
+        return 0;  /* return type (size_t) cannot represent negative values */
+    }
+
+    case ZSTD_c_windowLog :
+        if (value!=0)   /* 0 => use default */
+            BOUNDCHECK(ZSTD_c_windowLog, value);
+        CCtxParams->cParams.windowLog = (U32)value;
+        return CCtxParams->cParams.windowLog;
+
+    case ZSTD_c_hashLog :
+        if (value!=0)   /* 0 => use default */
+            BOUNDCHECK(ZSTD_c_hashLog, value);
+        CCtxParams->cParams.hashLog = (U32)value;
+        return CCtxParams->cParams.hashLog;
+
+    case ZSTD_c_chainLog :
+        if (value!=0)   /* 0 => use default */
+            BOUNDCHECK(ZSTD_c_chainLog, value);
+        CCtxParams->cParams.chainLog = (U32)value;
+        return CCtxParams->cParams.chainLog;
+
+    case ZSTD_c_searchLog :
+        if (value!=0)   /* 0 => use default */
+            BOUNDCHECK(ZSTD_c_searchLog, value);
+        CCtxParams->cParams.searchLog = (U32)value;
+        return (size_t)value;
+
+    case ZSTD_c_minMatch :
+        if (value!=0)   /* 0 => use default */
+            BOUNDCHECK(ZSTD_c_minMatch, value);
+        CCtxParams->cParams.minMatch = value;
+        return CCtxParams->cParams.minMatch;
+
+    case ZSTD_c_targetLength :
+        BOUNDCHECK(ZSTD_c_targetLength, value);
+        CCtxParams->cParams.targetLength = value;
+        return CCtxParams->cParams.targetLength;
+
+    case ZSTD_c_strategy :
+        if (value!=0)   /* 0 => use default */
+            BOUNDCHECK(ZSTD_c_strategy, value);
+        CCtxParams->cParams.strategy = (ZSTD_strategy)value;
+        return (size_t)CCtxParams->cParams.strategy;
+
+    case ZSTD_c_contentSizeFlag :
+        /* Content size written in frame header _when known_ (default:1) */
+        DEBUGLOG(4, "set content size flag = %u", (value!=0));
+        CCtxParams->fParams.contentSizeFlag = value != 0;
+        return CCtxParams->fParams.contentSizeFlag;
+
+    case ZSTD_c_checksumFlag :
+        /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */
+        CCtxParams->fParams.checksumFlag = value != 0;
+        return CCtxParams->fParams.checksumFlag;
+
+    case ZSTD_c_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */
+        DEBUGLOG(4, "set dictIDFlag = %u", (value!=0));
+        CCtxParams->fParams.noDictIDFlag = !value;
+        return !CCtxParams->fParams.noDictIDFlag;
+
+    case ZSTD_c_forceMaxWindow :
+        CCtxParams->forceWindow = (value != 0);
+        return CCtxParams->forceWindow;
+
+    case ZSTD_c_forceAttachDict : {
+        const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value;
+        BOUNDCHECK(ZSTD_c_forceAttachDict, pref);
+        CCtxParams->attachDictPref = pref;
+        return CCtxParams->attachDictPref;
+    }
+
+    case ZSTD_c_literalCompressionMode : {
+        const ZSTD_literalCompressionMode_e lcm = (ZSTD_literalCompressionMode_e)value;
+        BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm);
+        CCtxParams->literalCompressionMode = lcm;
+        return CCtxParams->literalCompressionMode;
+    }
+
+    case ZSTD_c_nbWorkers :
+#ifndef ZSTD_MULTITHREAD
+        RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
+        return 0;
+#else
+        FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), "");
+        CCtxParams->nbWorkers = value;
+        return CCtxParams->nbWorkers;
+#endif
+
+    case ZSTD_c_jobSize :
+#ifndef ZSTD_MULTITHREAD
+        RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
+        return 0;
+#else
+        /* Adjust to the minimum non-default value. */
+        if (value != 0 && value < ZSTDMT_JOBSIZE_MIN)
+            value = ZSTDMT_JOBSIZE_MIN;
+        FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), "");
+        assert(value >= 0);
+        CCtxParams->jobSize = value;
+        return CCtxParams->jobSize;
+#endif
+
+    case ZSTD_c_overlapLog :
+#ifndef ZSTD_MULTITHREAD
+        RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
+        return 0;
+#else
+        FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), "");
+        CCtxParams->overlapLog = value;
+        return CCtxParams->overlapLog;
+#endif
+
+    case ZSTD_c_rsyncable :
+#ifndef ZSTD_MULTITHREAD
+        RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
+        return 0;
+#else
+        FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), "");
+        CCtxParams->rsyncable = value;
+        return CCtxParams->rsyncable;
+#endif
+
+    case ZSTD_c_enableDedicatedDictSearch :
+        CCtxParams->enableDedicatedDictSearch = (value!=0);
+        return CCtxParams->enableDedicatedDictSearch;
+
+    case ZSTD_c_enableLongDistanceMatching :
+        CCtxParams->ldmParams.enableLdm = (value!=0);
+        return CCtxParams->ldmParams.enableLdm;
+
+    case ZSTD_c_ldmHashLog :
+        if (value!=0)   /* 0 ==> auto */
+            BOUNDCHECK(ZSTD_c_ldmHashLog, value);
+        CCtxParams->ldmParams.hashLog = value;
+        return CCtxParams->ldmParams.hashLog;
+
+    case ZSTD_c_ldmMinMatch :
+        if (value!=0)   /* 0 ==> default */
+            BOUNDCHECK(ZSTD_c_ldmMinMatch, value);
+        CCtxParams->ldmParams.minMatchLength = value;
+        return CCtxParams->ldmParams.minMatchLength;
+
+    case ZSTD_c_ldmBucketSizeLog :
+        if (value!=0)   /* 0 ==> default */
+            BOUNDCHECK(ZSTD_c_ldmBucketSizeLog, value);
+        CCtxParams->ldmParams.bucketSizeLog = value;
+        return CCtxParams->ldmParams.bucketSizeLog;
+
+    case ZSTD_c_ldmHashRateLog :
+        RETURN_ERROR_IF(value > ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN,
+                        parameter_outOfBound, "Param out of bounds!");
+        CCtxParams->ldmParams.hashRateLog = value;
+        return CCtxParams->ldmParams.hashRateLog;
+
+    case ZSTD_c_targetCBlockSize :
+        if (value!=0)   /* 0 ==> default */
+            BOUNDCHECK(ZSTD_c_targetCBlockSize, value);
+        CCtxParams->targetCBlockSize = value;
+        return CCtxParams->targetCBlockSize;
+
+    case ZSTD_c_srcSizeHint :
+        if (value!=0)    /* 0 ==> default */
+            BOUNDCHECK(ZSTD_c_srcSizeHint, value);
+        CCtxParams->srcSizeHint = value;
+        return CCtxParams->srcSizeHint;
+
+    case ZSTD_c_stableInBuffer:
+        BOUNDCHECK(ZSTD_c_stableInBuffer, value);
+        CCtxParams->inBufferMode = (ZSTD_bufferMode_e)value;
+        return CCtxParams->inBufferMode;
+
+    case ZSTD_c_stableOutBuffer:
+        BOUNDCHECK(ZSTD_c_stableOutBuffer, value);
+        CCtxParams->outBufferMode = (ZSTD_bufferMode_e)value;
+        return CCtxParams->outBufferMode;
+
+    case ZSTD_c_blockDelimiters:
+        BOUNDCHECK(ZSTD_c_blockDelimiters, value);
+        CCtxParams->blockDelimiters = (ZSTD_sequenceFormat_e)value;
+        return CCtxParams->blockDelimiters;
+
+    case ZSTD_c_validateSequences:
+        BOUNDCHECK(ZSTD_c_validateSequences, value);
+        CCtxParams->validateSequences = value;
+        return CCtxParams->validateSequences;
+
+    case ZSTD_c_splitBlocks:
+        BOUNDCHECK(ZSTD_c_splitBlocks, value);
+        CCtxParams->splitBlocks = value;
+        return CCtxParams->splitBlocks;
+
+    case ZSTD_c_useRowMatchFinder:
+        BOUNDCHECK(ZSTD_c_useRowMatchFinder, value);
+        CCtxParams->useRowMatchFinder = (ZSTD_useRowMatchFinderMode_e)value;
+        return CCtxParams->useRowMatchFinder;
+
+    case ZSTD_c_deterministicRefPrefix:
+        BOUNDCHECK(ZSTD_c_deterministicRefPrefix, value);
+        CCtxParams->deterministicRefPrefix = !!value;
+        return CCtxParams->deterministicRefPrefix;
+
+    default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
+    }
+}
+
+size_t ZSTD_CCtx_getParameter(ZSTD_CCtx const* cctx, ZSTD_cParameter param, int* value)
+{
+    return ZSTD_CCtxParams_getParameter(&cctx->requestedParams, param, value);
+}
+
+size_t ZSTD_CCtxParams_getParameter(
+        ZSTD_CCtx_params const* CCtxParams, ZSTD_cParameter param, int* value)
+{
+    switch(param)
+    {
+    case ZSTD_c_format :
+        *value = CCtxParams->format;
+        break;
+    case ZSTD_c_compressionLevel :
+        *value = CCtxParams->compressionLevel;
+        break;
+    case ZSTD_c_windowLog :
+        *value = (int)CCtxParams->cParams.windowLog;
+        break;
+    case ZSTD_c_hashLog :
+        *value = (int)CCtxParams->cParams.hashLog;
+        break;
+    case ZSTD_c_chainLog :
+        *value = (int)CCtxParams->cParams.chainLog;
+        break;
+    case ZSTD_c_searchLog :
+        *value = CCtxParams->cParams.searchLog;
+        break;
+    case ZSTD_c_minMatch :
+        *value = CCtxParams->cParams.minMatch;
+        break;
+    case ZSTD_c_targetLength :
+        *value = CCtxParams->cParams.targetLength;
+        break;
+    case ZSTD_c_strategy :
+        *value = (unsigned)CCtxParams->cParams.strategy;
+        break;
+    case ZSTD_c_contentSizeFlag :
+        *value = CCtxParams->fParams.contentSizeFlag;
+        break;
+    case ZSTD_c_checksumFlag :
+        *value = CCtxParams->fParams.checksumFlag;
+        break;
+    case ZSTD_c_dictIDFlag :
+        *value = !CCtxParams->fParams.noDictIDFlag;
+        break;
+    case ZSTD_c_forceMaxWindow :
+        *value = CCtxParams->forceWindow;
+        break;
+    case ZSTD_c_forceAttachDict :
+        *value = CCtxParams->attachDictPref;
+        break;
+    case ZSTD_c_literalCompressionMode :
+        *value = CCtxParams->literalCompressionMode;
+        break;
+    case ZSTD_c_nbWorkers :
+#ifndef ZSTD_MULTITHREAD
+        assert(CCtxParams->nbWorkers == 0);
+#endif
+        *value = CCtxParams->nbWorkers;
+        break;
+    case ZSTD_c_jobSize :
+#ifndef ZSTD_MULTITHREAD
+        RETURN_ERROR(parameter_unsupported, "not compiled with multithreading");
+#else
+        assert(CCtxParams->jobSize <= INT_MAX);
+        *value = (int)CCtxParams->jobSize;
+        break;
+#endif
+    case ZSTD_c_overlapLog :
+#ifndef ZSTD_MULTITHREAD
+        RETURN_ERROR(parameter_unsupported, "not compiled with multithreading");
+#else
+        *value = CCtxParams->overlapLog;
+        break;
+#endif
+    case ZSTD_c_rsyncable :
+#ifndef ZSTD_MULTITHREAD
+        RETURN_ERROR(parameter_unsupported, "not compiled with multithreading");
+#else
+        *value = CCtxParams->rsyncable;
+        break;
+#endif
+    case ZSTD_c_enableDedicatedDictSearch :
+        *value = CCtxParams->enableDedicatedDictSearch;
+        break;
+    case ZSTD_c_enableLongDistanceMatching :
+        *value = CCtxParams->ldmParams.enableLdm;
+        break;
+    case ZSTD_c_ldmHashLog :
+        *value = CCtxParams->ldmParams.hashLog;
+        break;
+    case ZSTD_c_ldmMinMatch :
+        *value = CCtxParams->ldmParams.minMatchLength;
+        break;
+    case ZSTD_c_ldmBucketSizeLog :
+        *value = CCtxParams->ldmParams.bucketSizeLog;
+        break;
+    case ZSTD_c_ldmHashRateLog :
+        *value = CCtxParams->ldmParams.hashRateLog;
+        break;
+    case ZSTD_c_targetCBlockSize :
+        *value = (int)CCtxParams->targetCBlockSize;
+        break;
+    case ZSTD_c_srcSizeHint :
+        *value = (int)CCtxParams->srcSizeHint;
+        break;
+    case ZSTD_c_stableInBuffer :
+        *value = (int)CCtxParams->inBufferMode;
+        break;
+    case ZSTD_c_stableOutBuffer :
+        *value = (int)CCtxParams->outBufferMode;
+        break;
+    case ZSTD_c_blockDelimiters :
+        *value = (int)CCtxParams->blockDelimiters;
+        break;
+    case ZSTD_c_validateSequences :
+        *value = (int)CCtxParams->validateSequences;
+        break;
+    case ZSTD_c_splitBlocks :
+        *value = (int)CCtxParams->splitBlocks;
+        break;
+    case ZSTD_c_useRowMatchFinder :
+        *value = (int)CCtxParams->useRowMatchFinder;
+        break;
+    case ZSTD_c_deterministicRefPrefix:
+        *value = (int)CCtxParams->deterministicRefPrefix;
+        break;
+    default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
+    }
+    return 0;
+}
+
+/** ZSTD_CCtx_setParametersUsingCCtxParams() :
+ *  just applies `params` into `cctx`
+ *  no action is performed, parameters are merely stored.
+ *  If ZSTDMT is enabled, parameters are pushed to cctx->mtctx.
+ *    This is possible even if a compression is ongoing.
+ *    In which case, new parameters will be applied on the fly, starting with next compression job.
+ */
+size_t ZSTD_CCtx_setParametersUsingCCtxParams(
+        ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params)
+{
+    DEBUGLOG(4, "ZSTD_CCtx_setParametersUsingCCtxParams");
+    RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+                    "The context is in the wrong stage!");
+    RETURN_ERROR_IF(cctx->cdict, stage_wrong,
+                    "Can't override parameters with cdict attached (some must "
+                    "be inherited from the cdict).");
+
+    cctx->requestedParams = *params;
+    return 0;
+}
+
+ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize)
+{
+    DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize);
+    RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+                    "Can't set pledgedSrcSize when not in init stage.");
+    cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1;
+    return 0;
+}
+
+static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(
+        int const compressionLevel,
+        size_t const dictSize);
+static int ZSTD_dedicatedDictSearch_isSupported(
+        const ZSTD_compressionParameters* cParams);
+static void ZSTD_dedicatedDictSearch_revertCParams(
+        ZSTD_compressionParameters* cParams);
+
+/**
+ * Initializes the local dict using the requested parameters.
+ * NOTE: This does not use the pledged src size, because it may be used for more
+ * than one compression.
+ */
+static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx)
+{
+    ZSTD_localDict* const dl = &cctx->localDict;
+    if (dl->dict == NULL) {
+        /* No local dictionary. */
+        assert(dl->dictBuffer == NULL);
+        assert(dl->cdict == NULL);
+        assert(dl->dictSize == 0);
+        return 0;
+    }
+    if (dl->cdict != NULL) {
+        assert(cctx->cdict == dl->cdict);
+        /* Local dictionary already initialized. */
+        return 0;
+    }
+    assert(dl->dictSize > 0);
+    assert(cctx->cdict == NULL);
+    assert(cctx->prefixDict.dict == NULL);
+
+    dl->cdict = ZSTD_createCDict_advanced2(
+            dl->dict,
+            dl->dictSize,
+            ZSTD_dlm_byRef,
+            dl->dictContentType,
+            &cctx->requestedParams,
+            cctx->customMem);
+    RETURN_ERROR_IF(!dl->cdict, memory_allocation, "ZSTD_createCDict_advanced failed");
+    cctx->cdict = dl->cdict;
+    return 0;
+}
+
+size_t ZSTD_CCtx_loadDictionary_advanced(
+        ZSTD_CCtx* cctx, const void* dict, size_t dictSize,
+        ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType)
+{
+    RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+                    "Can't load a dictionary when ctx is not in init stage.");
+    DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize);
+    ZSTD_clearAllDicts(cctx);  /* in case one already exists */
+    if (dict == NULL || dictSize == 0)  /* no dictionary mode */
+        return 0;
+    if (dictLoadMethod == ZSTD_dlm_byRef) {
+        cctx->localDict.dict = dict;
+    } else {
+        void* dictBuffer;
+        RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
+                        "no malloc for static CCtx");
+        dictBuffer = ZSTD_customMalloc(dictSize, cctx->customMem);
+        RETURN_ERROR_IF(!dictBuffer, memory_allocation, "NULL pointer!");
+        ZSTD_memcpy(dictBuffer, dict, dictSize);
+        cctx->localDict.dictBuffer = dictBuffer;
+        cctx->localDict.dict = dictBuffer;
+    }
+    cctx->localDict.dictSize = dictSize;
+    cctx->localDict.dictContentType = dictContentType;
+    return 0;
+}
+
+ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(
+      ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
+{
+    return ZSTD_CCtx_loadDictionary_advanced(
+            cctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto);
+}
+
+ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
+{
+    return ZSTD_CCtx_loadDictionary_advanced(
+            cctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto);
+}
+
+
+size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
+{
+    RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+                    "Can't ref a dict when ctx not in init stage.");
+    /* Free the existing local cdict (if any) to save memory. */
+    ZSTD_clearAllDicts(cctx);
+    cctx->cdict = cdict;
+    return 0;
+}
+
+size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool)
+{
+    RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+                    "Can't ref a pool when ctx not in init stage.");
+    cctx->pool = pool;
+    return 0;
+}
+
+size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize)
+{
+    return ZSTD_CCtx_refPrefix_advanced(cctx, prefix, prefixSize, ZSTD_dct_rawContent);
+}
+
+size_t ZSTD_CCtx_refPrefix_advanced(
+        ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType)
+{
+    RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+                    "Can't ref a prefix when ctx not in init stage.");
+    ZSTD_clearAllDicts(cctx);
+    if (prefix != NULL && prefixSize > 0) {
+        cctx->prefixDict.dict = prefix;
+        cctx->prefixDict.dictSize = prefixSize;
+        cctx->prefixDict.dictContentType = dictContentType;
+    }
+    return 0;
+}
+
+/*! ZSTD_CCtx_reset() :
+ *  Also dumps dictionary */
+size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset)
+{
+    if ( (reset == ZSTD_reset_session_only)
+      || (reset == ZSTD_reset_session_and_parameters) ) {
+        cctx->streamStage = zcss_init;
+        cctx->pledgedSrcSizePlusOne = 0;
+    }
+    if ( (reset == ZSTD_reset_parameters)
+      || (reset == ZSTD_reset_session_and_parameters) ) {
+        RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+                        "Can't reset parameters only when not in init stage.");
+        ZSTD_clearAllDicts(cctx);
+        return ZSTD_CCtxParams_reset(&cctx->requestedParams);
+    }
+    return 0;
+}
+
+
+/** ZSTD_checkCParams() :
+    control CParam values remain within authorized range.
+    @return : 0, or an error code if one value is beyond authorized range */
+size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
+{
+    BOUNDCHECK(ZSTD_c_windowLog, (int)cParams.windowLog);
+    BOUNDCHECK(ZSTD_c_chainLog,  (int)cParams.chainLog);
+    BOUNDCHECK(ZSTD_c_hashLog,   (int)cParams.hashLog);
+    BOUNDCHECK(ZSTD_c_searchLog, (int)cParams.searchLog);
+    BOUNDCHECK(ZSTD_c_minMatch,  (int)cParams.minMatch);
+    BOUNDCHECK(ZSTD_c_targetLength,(int)cParams.targetLength);
+    BOUNDCHECK(ZSTD_c_strategy,  cParams.strategy);
+    return 0;
+}
+
+/** ZSTD_clampCParams() :
+ *  make CParam values within valid range.
+ *  @return : valid CParams */
+static ZSTD_compressionParameters
+ZSTD_clampCParams(ZSTD_compressionParameters cParams)
+{
+#   define CLAMP_TYPE(cParam, val, type) {                                \
+        ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);         \
+        if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound;      \
+        else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \
+    }
+#   define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned)
+    CLAMP(ZSTD_c_windowLog, cParams.windowLog);
+    CLAMP(ZSTD_c_chainLog,  cParams.chainLog);
+    CLAMP(ZSTD_c_hashLog,   cParams.hashLog);
+    CLAMP(ZSTD_c_searchLog, cParams.searchLog);
+    CLAMP(ZSTD_c_minMatch,  cParams.minMatch);
+    CLAMP(ZSTD_c_targetLength,cParams.targetLength);
+    CLAMP_TYPE(ZSTD_c_strategy,cParams.strategy, ZSTD_strategy);
+    return cParams;
+}
+
+/** ZSTD_cycleLog() :
+ *  condition for correct operation : hashLog > 1 */
+U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat)
+{
+    U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2);
+    return hashLog - btScale;
+}
+
+/** ZSTD_dictAndWindowLog() :
+ * Returns an adjusted window log that is large enough to fit the source and the dictionary.
+ * The zstd format says that the entire dictionary is valid if one byte of the dictionary
+ * is within the window. So the hashLog and chainLog should be large enough to reference both
+ * the dictionary and the window. So we must use this adjusted dictAndWindowLog when downsizing
+ * the hashLog and windowLog.
+ * NOTE: srcSize must not be ZSTD_CONTENTSIZE_UNKNOWN.
+ */
+static U32 ZSTD_dictAndWindowLog(U32 windowLog, U64 srcSize, U64 dictSize)
+{
+    const U64 maxWindowSize = 1ULL << ZSTD_WINDOWLOG_MAX;
+    /* No dictionary ==> No change */
+    if (dictSize == 0) {
+        return windowLog;
+    }
+    assert(windowLog <= ZSTD_WINDOWLOG_MAX);
+    assert(srcSize != ZSTD_CONTENTSIZE_UNKNOWN); /* Handled in ZSTD_adjustCParams_internal() */
+    {
+        U64 const windowSize = 1ULL << windowLog;
+        U64 const dictAndWindowSize = dictSize + windowSize;
+        /* If the window size is already large enough to fit both the source and the dictionary
+         * then just use the window size. Otherwise adjust so that it fits the dictionary and
+         * the window.
+         */
+        if (windowSize >= dictSize + srcSize) {
+            return windowLog; /* Window size large enough already */
+        } else if (dictAndWindowSize >= maxWindowSize) {
+            return ZSTD_WINDOWLOG_MAX; /* Larger than max window log */
+        } else  {
+            return ZSTD_highbit32((U32)dictAndWindowSize - 1) + 1;
+        }
+    }
+}
+
+/** ZSTD_adjustCParams_internal() :
+ *  optimize `cPar` for a specified input (`srcSize` and `dictSize`).
+ *  mostly downsize to reduce memory consumption and initialization latency.
+ * `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known.
+ * `mode` is the mode for parameter adjustment. See docs for `ZSTD_cParamMode_e`.
+ *  note : `srcSize==0` means 0!
+ *  condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */
+static ZSTD_compressionParameters
+ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
+                            unsigned long long srcSize,
+                            size_t dictSize,
+                            ZSTD_cParamMode_e mode)
+{
+    const U64 minSrcSize = 513; /* (1<<9) + 1 */
+    const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1);
+    assert(ZSTD_checkCParams(cPar)==0);
+
+    switch (mode) {
+    case ZSTD_cpm_unknown:
+    case ZSTD_cpm_noAttachDict:
+        /* If we don't know the source size, don't make any
+         * assumptions about it. We will already have selected
+         * smaller parameters if a dictionary is in use.
+         */
+        break;
+    case ZSTD_cpm_createCDict:
+        /* Assume a small source size when creating a dictionary
+         * with an unkown source size.
+         */
+        if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN)
+            srcSize = minSrcSize;
+        break;
+    case ZSTD_cpm_attachDict:
+        /* Dictionary has its own dedicated parameters which have
+         * already been selected. We are selecting parameters
+         * for only the source.
+         */
+        dictSize = 0;
+        break;
+    default:
+        assert(0);
+        break;
+    }
+
+    /* resize windowLog if input is small enough, to use less memory */
+    if ( (srcSize < maxWindowResize)
+      && (dictSize < maxWindowResize) )  {
+        U32 const tSize = (U32)(srcSize + dictSize);
+        static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN;
+        U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN :
+                            ZSTD_highbit32(tSize-1) + 1;
+        if (cPar.windowLog > srcLog) cPar.windowLog = srcLog;
+    }
+    if (srcSize != ZSTD_CONTENTSIZE_UNKNOWN) {
+        U32 const dictAndWindowLog = ZSTD_dictAndWindowLog(cPar.windowLog, (U64)srcSize, (U64)dictSize);
+        U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy);
+        if (cPar.hashLog > dictAndWindowLog+1) cPar.hashLog = dictAndWindowLog+1;
+        if (cycleLog > dictAndWindowLog)
+            cPar.chainLog -= (cycleLog - dictAndWindowLog);
+    }
+
+    if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN)
+        cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN;  /* minimum wlog required for valid frame header */
+
+    return cPar;
+}
+
+ZSTD_compressionParameters
+ZSTD_adjustCParams(ZSTD_compressionParameters cPar,
+                   unsigned long long srcSize,
+                   size_t dictSize)
+{
+    cPar = ZSTD_clampCParams(cPar);   /* resulting cPar is necessarily valid (all parameters within range) */
+    if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN;
+    return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown);
+}
+
+static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
+static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
+
+static void ZSTD_overrideCParams(
+              ZSTD_compressionParameters* cParams,
+        const ZSTD_compressionParameters* overrides)
+{
+    if (overrides->windowLog)    cParams->windowLog    = overrides->windowLog;
+    if (overrides->hashLog)      cParams->hashLog      = overrides->hashLog;
+    if (overrides->chainLog)     cParams->chainLog     = overrides->chainLog;
+    if (overrides->searchLog)    cParams->searchLog    = overrides->searchLog;
+    if (overrides->minMatch)     cParams->minMatch     = overrides->minMatch;
+    if (overrides->targetLength) cParams->targetLength = overrides->targetLength;
+    if (overrides->strategy)     cParams->strategy     = overrides->strategy;
+}
+
+ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
+        const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode)
+{
+    ZSTD_compressionParameters cParams;
+    if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) {
+      srcSizeHint = CCtxParams->srcSizeHint;
+    }
+    cParams = ZSTD_getCParams_internal(CCtxParams->compressionLevel, srcSizeHint, dictSize, mode);
+    if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG;
+    ZSTD_overrideCParams(&cParams, &CCtxParams->cParams);
+    assert(!ZSTD_checkCParams(cParams));
+    /* srcSizeHint == 0 means 0 */
+    return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode);
+}
+
+static size_t
+ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
+                       const ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
+                       const U32 enableDedicatedDictSearch,
+                       const U32 forCCtx)
+{
+    /* chain table size should be 0 for fast or row-hash strategies */
+    size_t const chainSize = ZSTD_allocateChainTable(cParams->strategy, useRowMatchFinder, enableDedicatedDictSearch && !forCCtx)
+                                ? ((size_t)1 << cParams->chainLog)
+                                : 0;
+    size_t const hSize = ((size_t)1) << cParams->hashLog;
+    U32    const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
+    size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;
+    /* We don't use ZSTD_cwksp_alloc_size() here because the tables aren't
+     * surrounded by redzones in ASAN. */
+    size_t const tableSpace = chainSize * sizeof(U32)
+                            + hSize * sizeof(U32)
+                            + h3Size * sizeof(U32);
+    size_t const optPotentialSpace =
+        ZSTD_cwksp_aligned_alloc_size((MaxML+1) * sizeof(U32))
+      + ZSTD_cwksp_aligned_alloc_size((MaxLL+1) * sizeof(U32))
+      + ZSTD_cwksp_aligned_alloc_size((MaxOff+1) * sizeof(U32))
+      + ZSTD_cwksp_aligned_alloc_size((1<<Litbits) * sizeof(U32))
+      + ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t))
+      + ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
+    size_t const lazyAdditionalSpace = ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)
+                                            ? ZSTD_cwksp_aligned_alloc_size(hSize*sizeof(U16))
+                                            : 0;
+    size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt))
+                                ? optPotentialSpace
+                                : 0;
+    size_t const slackSpace = ZSTD_cwksp_slack_space_required();
+
+    /* tables are guaranteed to be sized in multiples of 64 bytes (or 16 uint32_t) */
+    ZSTD_STATIC_ASSERT(ZSTD_HASHLOG_MIN >= 4 && ZSTD_WINDOWLOG_MIN >= 4 && ZSTD_CHAINLOG_MIN >= 4);
+    assert(useRowMatchFinder != ZSTD_urm_auto);
+
+    DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u",
+                (U32)chainSize, (U32)hSize, (U32)h3Size);
+    return tableSpace + optSpace + slackSpace + lazyAdditionalSpace;
+}
+
+static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
+        const ZSTD_compressionParameters* cParams,
+        const ldmParams_t* ldmParams,
+        const int isStatic,
+        const ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
+        const size_t buffInSize,
+        const size_t buffOutSize,
+        const U64 pledgedSrcSize)
+{
+    size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << cParams->windowLog), pledgedSrcSize));
+    size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
+    U32    const divider = (cParams->minMatch==3) ? 3 : 4;
+    size_t const maxNbSeq = blockSize / divider;
+    size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize)
+                            + ZSTD_cwksp_aligned_alloc_size(maxNbSeq * sizeof(seqDef))
+                            + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE));
+    size_t const entropySpace = ZSTD_cwksp_alloc_size(ENTROPY_WORKSPACE_SIZE);
+    size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t));
+    size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 0, /* forCCtx */ 1);
+
+    size_t const ldmSpace = ZSTD_ldm_getTableSize(*ldmParams);
+    size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(*ldmParams, blockSize);
+    size_t const ldmSeqSpace = ldmParams->enableLdm ?
+        ZSTD_cwksp_aligned_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0;
+
+
+    size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize)
+                             + ZSTD_cwksp_alloc_size(buffOutSize);
+
+    size_t const cctxSpace = isStatic ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0;
+
+    size_t const neededSpace =
+        cctxSpace +
+        entropySpace +
+        blockStateSpace +
+        ldmSpace +
+        ldmSeqSpace +
+        matchStateSize +
+        tokenSpace +
+        bufferSpace;
+
+    DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace);
+    return neededSpace;
+}
+
+size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params)
+{
+    ZSTD_compressionParameters const cParams =
+                ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
+    ZSTD_useRowMatchFinderMode_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder,
+                                                                                         &cParams);
+
+    RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
+    /* estimateCCtxSize is for one-shot compression. So no buffers should
+     * be needed. However, we still allocate two 0-sized buffers, which can
+     * take space under ASAN. */
+    return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
+        &cParams, &params->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN);
+}
+
+size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)
+{
+    ZSTD_CCtx_params initialParams = ZSTD_makeCCtxParamsFromCParams(cParams);
+    if (ZSTD_rowMatchFinderSupported(cParams.strategy)) {
+        /* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */
+        size_t noRowCCtxSize;
+        size_t rowCCtxSize;
+        initialParams.useRowMatchFinder = ZSTD_urm_disableRowMatchFinder;
+        noRowCCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams);
+        initialParams.useRowMatchFinder = ZSTD_urm_enableRowMatchFinder;
+        rowCCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams);
+        return MAX(noRowCCtxSize, rowCCtxSize);
+    } else {
+        return ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams);
+    }
+}
+
+static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel)
+{
+    int tier = 0;
+    size_t largestSize = 0;
+    static const unsigned long long srcSizeTiers[4] = {16 KB, 128 KB, 256 KB, ZSTD_CONTENTSIZE_UNKNOWN};
+    for (; tier < 4; ++tier) {
+        /* Choose the set of cParams for a given level across all srcSizes that give the largest cctxSize */
+        ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeTiers[tier], 0, ZSTD_cpm_noAttachDict);
+        largestSize = MAX(ZSTD_estimateCCtxSize_usingCParams(cParams), largestSize);
+    }
+    return largestSize;
+}
+
+size_t ZSTD_estimateCCtxSize(int compressionLevel)
+{
+    int level;
+    size_t memBudget = 0;
+    for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) {
+        /* Ensure monotonically increasing memory usage as compression level increases */
+        size_t const newMB = ZSTD_estimateCCtxSize_internal(level);
+        if (newMB > memBudget) memBudget = newMB;
+    }
+    return memBudget;
+}
+
+size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params)
+{
+    RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
+    {   ZSTD_compressionParameters const cParams =
+                ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
+        size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
+        size_t const inBuffSize = (params->inBufferMode == ZSTD_bm_buffered)
+                ? ((size_t)1 << cParams.windowLog) + blockSize
+                : 0;
+        size_t const outBuffSize = (params->outBufferMode == ZSTD_bm_buffered)
+                ? ZSTD_compressBound(blockSize) + 1
+                : 0;
+        ZSTD_useRowMatchFinderMode_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder, &params->cParams);
+
+        return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
+            &cParams, &params->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize,
+            ZSTD_CONTENTSIZE_UNKNOWN);
+    }
+}
+
+size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams)
+{
+    ZSTD_CCtx_params initialParams = ZSTD_makeCCtxParamsFromCParams(cParams);
+    if (ZSTD_rowMatchFinderSupported(cParams.strategy)) {
+        /* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */
+        size_t noRowCCtxSize;
+        size_t rowCCtxSize;
+        initialParams.useRowMatchFinder = ZSTD_urm_disableRowMatchFinder;
+        noRowCCtxSize = ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams);
+        initialParams.useRowMatchFinder = ZSTD_urm_enableRowMatchFinder;
+        rowCCtxSize = ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams);
+        return MAX(noRowCCtxSize, rowCCtxSize);
+    } else {
+        return ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams);
+    }
+}
+
+static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel)
+{
+    ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
+    return ZSTD_estimateCStreamSize_usingCParams(cParams);
+}
+
+size_t ZSTD_estimateCStreamSize(int compressionLevel)
+{
+    int level;
+    size_t memBudget = 0;
+    for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) {
+        size_t const newMB = ZSTD_estimateCStreamSize_internal(level);
+        if (newMB > memBudget) memBudget = newMB;
+    }
+    return memBudget;
+}
+
+/* ZSTD_getFrameProgression():
+ * tells how much data has been consumed (input) and produced (output) for current frame.
+ * able to count progression inside worker threads (non-blocking mode).
+ */
+ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx)
+{
+#ifdef ZSTD_MULTITHREAD
+    if (cctx->appliedParams.nbWorkers > 0) {
+        return ZSTDMT_getFrameProgression(cctx->mtctx);
+    }
+#endif
+    {   ZSTD_frameProgression fp;
+        size_t const buffered = (cctx->inBuff == NULL) ? 0 :
+                                cctx->inBuffPos - cctx->inToCompress;
+        if (buffered) assert(cctx->inBuffPos >= cctx->inToCompress);
+        assert(buffered <= ZSTD_BLOCKSIZE_MAX);
+        fp.ingested = cctx->consumedSrcSize + buffered;
+        fp.consumed = cctx->consumedSrcSize;
+        fp.produced = cctx->producedCSize;
+        fp.flushed  = cctx->producedCSize;   /* simplified; some data might still be left within streaming output buffer */
+        fp.currentJobID = 0;
+        fp.nbActiveWorkers = 0;
+        return fp;
+}   }
+
+/*! ZSTD_toFlushNow()
+ *  Only useful for multithreading scenarios currently (nbWorkers >= 1).
+ */
+size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx)
+{
+#ifdef ZSTD_MULTITHREAD
+    if (cctx->appliedParams.nbWorkers > 0) {
+        return ZSTDMT_toFlushNow(cctx->mtctx);
+    }
+#endif
+    (void)cctx;
+    return 0;   /* over-simplification; could also check if context is currently running in streaming mode, and in which case, report how many bytes are left to be flushed within output buffer */
+}
+
+static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1,
+                                    ZSTD_compressionParameters cParams2)
+{
+    (void)cParams1;
+    (void)cParams2;
+    assert(cParams1.windowLog    == cParams2.windowLog);
+    assert(cParams1.chainLog     == cParams2.chainLog);
+    assert(cParams1.hashLog      == cParams2.hashLog);
+    assert(cParams1.searchLog    == cParams2.searchLog);
+    assert(cParams1.minMatch     == cParams2.minMatch);
+    assert(cParams1.targetLength == cParams2.targetLength);
+    assert(cParams1.strategy     == cParams2.strategy);
+}
+
+void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs)
+{
+    int i;
+    for (i = 0; i < ZSTD_REP_NUM; ++i)
+        bs->rep[i] = repStartValue[i];
+    bs->entropy.huf.repeatMode = HUF_repeat_none;
+    bs->entropy.fse.offcode_repeatMode = FSE_repeat_none;
+    bs->entropy.fse.matchlength_repeatMode = FSE_repeat_none;
+    bs->entropy.fse.litlength_repeatMode = FSE_repeat_none;
+}
+
+/*! ZSTD_invalidateMatchState()
+ *  Invalidate all the matches in the match finder tables.
+ *  Requires nextSrc and base to be set (can be NULL).
+ */
+static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms)
+{
+    ZSTD_window_clear(&ms->window);
+
+    ms->nextToUpdate = ms->window.dictLimit;
+    ms->loadedDictEnd = 0;
+    ms->opt.litLengthSum = 0;  /* force reset of btopt stats */
+    ms->dictMatchState = NULL;
+}
+
+/**
+ * Controls, for this matchState reset, whether the tables need to be cleared /
+ * prepared for the coming compression (ZSTDcrp_makeClean), or whether the
+ * tables can be left unclean (ZSTDcrp_leaveDirty), because we know that a
+ * subsequent operation will overwrite the table space anyways (e.g., copying
+ * the matchState contents in from a CDict).
+ */
+typedef enum {
+    ZSTDcrp_makeClean,
+    ZSTDcrp_leaveDirty
+} ZSTD_compResetPolicy_e;
+
+/**
+ * Controls, for this matchState reset, whether indexing can continue where it
+ * left off (ZSTDirp_continue), or whether it needs to be restarted from zero
+ * (ZSTDirp_reset).
+ */
+typedef enum {
+    ZSTDirp_continue,
+    ZSTDirp_reset
+} ZSTD_indexResetPolicy_e;
+
+typedef enum {
+    ZSTD_resetTarget_CDict,
+    ZSTD_resetTarget_CCtx
+} ZSTD_resetTarget_e;
+
+
+static size_t
+ZSTD_reset_matchState(ZSTD_matchState_t* ms,
+                      ZSTD_cwksp* ws,
+                const ZSTD_compressionParameters* cParams,
+                const ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
+                const ZSTD_compResetPolicy_e crp,
+                const ZSTD_indexResetPolicy_e forceResetIndex,
+                const ZSTD_resetTarget_e forWho)
+{
+    /* disable chain table allocation for fast or row-based strategies */
+    size_t const chainSize = ZSTD_allocateChainTable(cParams->strategy, useRowMatchFinder,
+                                                     ms->dedicatedDictSearch && (forWho == ZSTD_resetTarget_CDict))
+                                ? ((size_t)1 << cParams->chainLog)
+                                : 0;
+    size_t const hSize = ((size_t)1) << cParams->hashLog;
+    U32    const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
+    size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;
+
+    DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset);
+    assert(useRowMatchFinder != ZSTD_urm_auto);
+    if (forceResetIndex == ZSTDirp_reset) {
+        ZSTD_window_init(&ms->window);
+        ZSTD_cwksp_mark_tables_dirty(ws);
+    }
+
+    ms->hashLog3 = hashLog3;
+
+    ZSTD_invalidateMatchState(ms);
+
+    assert(!ZSTD_cwksp_reserve_failed(ws)); /* check that allocation hasn't already failed */
+
+    ZSTD_cwksp_clear_tables(ws);
+
+    DEBUGLOG(5, "reserving table space");
+    /* table Space */
+    ms->hashTable = (U32*)ZSTD_cwksp_reserve_table(ws, hSize * sizeof(U32));
+    ms->chainTable = (U32*)ZSTD_cwksp_reserve_table(ws, chainSize * sizeof(U32));
+    ms->hashTable3 = (U32*)ZSTD_cwksp_reserve_table(ws, h3Size * sizeof(U32));
+    RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,
+                    "failed a workspace allocation in ZSTD_reset_matchState");
+
+    DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_leaveDirty);
+    if (crp!=ZSTDcrp_leaveDirty) {
+        /* reset tables only */
+        ZSTD_cwksp_clean_tables(ws);
+    }
+
+    /* opt parser space */
+    if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) {
+        DEBUGLOG(4, "reserving optimal parser space");
+        ms->opt.litFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (1<<Litbits) * sizeof(unsigned));
+        ms->opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned));
+        ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned));
+        ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned));
+        ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t));
+        ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
+    }
+
+    if (ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)) {
+        {   /* Row match finder needs an additional table of hashes ("tags") */
+            size_t const tagTableSize = hSize*sizeof(U16);
+            ms->tagTable = (U16*)ZSTD_cwksp_reserve_aligned(ws, tagTableSize);
+            if (ms->tagTable) ZSTD_memset(ms->tagTable, 0, tagTableSize);
+        }
+        {   /* Switch to 32-entry rows if searchLog is 5 (or more) */
+            U32 const rowLog = cParams->searchLog < 5 ? 4 : 5;
+            assert(cParams->hashLog > rowLog);
+            ms->rowHashLog = cParams->hashLog - rowLog;
+        }
+    }
+
+    ms->cParams = *cParams;
+
+    RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,
+                    "failed a workspace allocation in ZSTD_reset_matchState");
+    return 0;
+}
+
+/* ZSTD_indexTooCloseToMax() :
+ * minor optimization : prefer memset() rather than reduceIndex()
+ * which is measurably slow in some circumstances (reported for Visual Studio).
+ * Works when re-using a context for a lot of smallish inputs :
+ * if all inputs are smaller than ZSTD_INDEXOVERFLOW_MARGIN,
+ * memset() will be triggered before reduceIndex().
+ */
+#define ZSTD_INDEXOVERFLOW_MARGIN (16 MB)
+static int ZSTD_indexTooCloseToMax(ZSTD_window_t w)
+{
+    return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN);
+}
+
+/** ZSTD_dictTooBig():
+ * When dictionaries are larger than ZSTD_CHUNKSIZE_MAX they can't be loaded in
+ * one go generically. So we ensure that in that case we reset the tables to zero,
+ * so that we can load as much of the dictionary as possible.
+ */
+static int ZSTD_dictTooBig(size_t const loadedDictSize)
+{
+    return loadedDictSize > ZSTD_CHUNKSIZE_MAX;
+}
+
+/*! ZSTD_resetCCtx_internal() :
+ * @param loadedDictSize The size of the dictionary to be loaded
+ * into the context, if any. If no dictionary is used, or the
+ * dictionary is being attached / copied, then pass 0.
+ * note : `params` are assumed fully validated at this stage.
+ */
+static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
+                                      ZSTD_CCtx_params const* params,
+                                      U64 const pledgedSrcSize,
+                                      size_t const loadedDictSize,
+                                      ZSTD_compResetPolicy_e const crp,
+                                      ZSTD_buffered_policy_e const zbuff)
+{
+    ZSTD_cwksp* const ws = &zc->workspace;
+    DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u, useRowMatchFinder=%d",
+                (U32)pledgedSrcSize, params->cParams.windowLog, (int)params->useRowMatchFinder);
+    assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));
+
+    zc->isFirstBlock = 1;
+
+    /* Set applied params early so we can modify them for LDM,
+     * and point params at the applied params.
+     */
+    zc->appliedParams = *params;
+    params = &zc->appliedParams;
+
+    assert(params->useRowMatchFinder != ZSTD_urm_auto);
+    if (params->ldmParams.enableLdm) {
+        /* Adjust long distance matching parameters */
+        ZSTD_ldm_adjustParameters(&zc->appliedParams.ldmParams, &params->cParams);
+        assert(params->ldmParams.hashLog >= params->ldmParams.bucketSizeLog);
+        assert(params->ldmParams.hashRateLog < 32);
+    }
+
+    {   size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize));
+        size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
+        U32    const divider = (params->cParams.minMatch==3) ? 3 : 4;
+        size_t const maxNbSeq = blockSize / divider;
+        size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered)
+                ? ZSTD_compressBound(blockSize) + 1
+                : 0;
+        size_t const buffInSize = (zbuff == ZSTDb_buffered && params->inBufferMode == ZSTD_bm_buffered)
+                ? windowSize + blockSize
+                : 0;
+        size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize);
+
+        int const indexTooClose = ZSTD_indexTooCloseToMax(zc->blockState.matchState.window);
+        int const dictTooBig = ZSTD_dictTooBig(loadedDictSize);
+        ZSTD_indexResetPolicy_e needsIndexReset =
+            (indexTooClose || dictTooBig || !zc->initialized) ? ZSTDirp_reset : ZSTDirp_continue;
+
+        size_t const neededSpace =
+            ZSTD_estimateCCtxSize_usingCCtxParams_internal(
+                &params->cParams, &params->ldmParams, zc->staticSize != 0, params->useRowMatchFinder,
+                buffInSize, buffOutSize, pledgedSrcSize);
+        int resizeWorkspace;
+
+        FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!");
+
+        if (!zc->staticSize) ZSTD_cwksp_bump_oversized_duration(ws, 0);
+
+        {   /* Check if workspace is large enough, alloc a new one if needed */
+            int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace;
+            int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace);
+            resizeWorkspace = workspaceTooSmall || workspaceWasteful;
+            DEBUGLOG(4, "Need %zu B workspace", neededSpace);
+            DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize);
+
+            if (resizeWorkspace) {
+                DEBUGLOG(4, "Resize workspaceSize from %zuKB to %zuKB",
+                            ZSTD_cwksp_sizeof(ws) >> 10,
+                            neededSpace >> 10);
+
+                RETURN_ERROR_IF(zc->staticSize, memory_allocation, "static cctx : no resize");
+
+                needsIndexReset = ZSTDirp_reset;
+
+                ZSTD_cwksp_free(ws, zc->customMem);
+                FORWARD_IF_ERROR(ZSTD_cwksp_create(ws, neededSpace, zc->customMem), "");
+
+                DEBUGLOG(5, "reserving object space");
+                /* Statically sized space.
+                 * entropyWorkspace never moves,
+                 * though prev/next block swap places */
+                assert(ZSTD_cwksp_check_available(ws, 2 * sizeof(ZSTD_compressedBlockState_t)));
+                zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t));
+                RETURN_ERROR_IF(zc->blockState.prevCBlock == NULL, memory_allocation, "couldn't allocate prevCBlock");
+                zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t));
+                RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock");
+                zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, ENTROPY_WORKSPACE_SIZE);
+                RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate entropyWorkspace");
+        }   }
+
+        ZSTD_cwksp_clear(ws);
+
+        /* init params */
+        zc->blockState.matchState.cParams = params->cParams;
+        zc->pledgedSrcSizePlusOne = pledgedSrcSize+1;
+        zc->consumedSrcSize = 0;
+        zc->producedCSize = 0;
+        if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)
+            zc->appliedParams.fParams.contentSizeFlag = 0;
+        DEBUGLOG(4, "pledged content size : %u ; flag : %u",
+            (unsigned)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag);
+        zc->blockSize = blockSize;
+
+        XXH64_reset(&zc->xxhState, 0);
+        zc->stage = ZSTDcs_init;
+        zc->dictID = 0;
+        zc->dictContentSize = 0;
+
+        ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock);
+
+        /* ZSTD_wildcopy() is used to copy into the literals buffer,
+         * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes.
+         */
+        zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, blockSize + WILDCOPY_OVERLENGTH);
+        zc->seqStore.maxNbLit = blockSize;
+
+        /* buffers */
+        zc->bufferedPolicy = zbuff;
+        zc->inBuffSize = buffInSize;
+        zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize);
+        zc->outBuffSize = buffOutSize;
+        zc->outBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffOutSize);
+
+        /* ldm bucketOffsets table */
+        if (params->ldmParams.enableLdm) {
+            /* TODO: avoid memset? */
+            size_t const numBuckets =
+                  ((size_t)1) << (params->ldmParams.hashLog -
+                                  params->ldmParams.bucketSizeLog);
+            zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, numBuckets);
+            ZSTD_memset(zc->ldmState.bucketOffsets, 0, numBuckets);
+        }
+
+        /* sequences storage */
+        ZSTD_referenceExternalSequences(zc, NULL, 0);
+        zc->seqStore.maxNbSeq = maxNbSeq;
+        zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
+        zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
+        zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
+        zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef));
+
+        FORWARD_IF_ERROR(ZSTD_reset_matchState(
+            &zc->blockState.matchState,
+            ws,
+            &params->cParams,
+            params->useRowMatchFinder,
+            crp,
+            needsIndexReset,
+            ZSTD_resetTarget_CCtx), "");
+
+        /* ldm hash table */
+        if (params->ldmParams.enableLdm) {
+            /* TODO: avoid memset? */
+            size_t const ldmHSize = ((size_t)1) << params->ldmParams.hashLog;
+            zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t));
+            ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t));
+            zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq));
+            zc->maxNbLdmSequences = maxNbLdmSeq;
+
+            ZSTD_window_init(&zc->ldmState.window);
+            zc->ldmState.loadedDictEnd = 0;
+        }
+
+        assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace, resizeWorkspace));
+        DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws));
+
+        zc->initialized = 1;
+
+        return 0;
+    }
+}
+
+/* ZSTD_invalidateRepCodes() :
+ * ensures next compression will not use repcodes from previous block.
+ * Note : only works with regular variant;
+ *        do not use with extDict variant ! */
+void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) {
+    int i;
+    for (i=0; i<ZSTD_REP_NUM; i++) cctx->blockState.prevCBlock->rep[i] = 0;
+    assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window));
+}
+
+/* These are the approximate sizes for each strategy past which copying the
+ * dictionary tables into the working context is faster than using them
+ * in-place.
+ */
+static const size_t attachDictSizeCutoffs[ZSTD_STRATEGY_MAX+1] = {
+    8 KB,  /* unused */
+    8 KB,  /* ZSTD_fast */
+    16 KB, /* ZSTD_dfast */
+    32 KB, /* ZSTD_greedy */
+    32 KB, /* ZSTD_lazy */
+    32 KB, /* ZSTD_lazy2 */
+    32 KB, /* ZSTD_btlazy2 */
+    32 KB, /* ZSTD_btopt */
+    8 KB,  /* ZSTD_btultra */
+    8 KB   /* ZSTD_btultra2 */
+};
+
+static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict,
+                                 const ZSTD_CCtx_params* params,
+                                 U64 pledgedSrcSize)
+{
+    size_t cutoff = attachDictSizeCutoffs[cdict->matchState.cParams.strategy];
+    int const dedicatedDictSearch = cdict->matchState.dedicatedDictSearch;
+    return dedicatedDictSearch
+        || ( ( pledgedSrcSize <= cutoff
+            || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
+            || params->attachDictPref == ZSTD_dictForceAttach )
+          && params->attachDictPref != ZSTD_dictForceCopy
+          && !params->forceWindow ); /* dictMatchState isn't correctly
+                                      * handled in _enforceMaxDist */
+}
+
+static size_t
+ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
+                        const ZSTD_CDict* cdict,
+                        ZSTD_CCtx_params params,
+                        U64 pledgedSrcSize,
+                        ZSTD_buffered_policy_e zbuff)
+{
+    DEBUGLOG(4, "ZSTD_resetCCtx_byAttachingCDict() pledgedSrcSize=%llu",
+                (unsigned long long)pledgedSrcSize);
+    {
+        ZSTD_compressionParameters adjusted_cdict_cParams = cdict->matchState.cParams;
+        unsigned const windowLog = params.cParams.windowLog;
+        assert(windowLog != 0);
+        /* Resize working context table params for input only, since the dict
+         * has its own tables. */
+        /* pledgedSrcSize == 0 means 0! */
+
+        if (cdict->matchState.dedicatedDictSearch) {
+            ZSTD_dedicatedDictSearch_revertCParams(&adjusted_cdict_cParams);
+        }
+
+        params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize,
+                                                     cdict->dictContentSize, ZSTD_cpm_attachDict);
+        params.cParams.windowLog = windowLog;
+        params.useRowMatchFinder = cdict->useRowMatchFinder;    /* cdict overrides */
+        FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, &params, pledgedSrcSize,
+                                                 /* loadedDictSize */ 0,
+                                                 ZSTDcrp_makeClean, zbuff), "");
+        assert(cctx->appliedParams.cParams.strategy == adjusted_cdict_cParams.strategy);
+    }
+
+    {   const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc
+                                  - cdict->matchState.window.base);
+        const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit;
+        if (cdictLen == 0) {
+            /* don't even attach dictionaries with no contents */
+            DEBUGLOG(4, "skipping attaching empty dictionary");
+        } else {
+            DEBUGLOG(4, "attaching dictionary into context");
+            cctx->blockState.matchState.dictMatchState = &cdict->matchState;
+
+            /* prep working match state so dict matches never have negative indices
+             * when they are translated to the working context's index space. */
+            if (cctx->blockState.matchState.window.dictLimit < cdictEnd) {
+                cctx->blockState.matchState.window.nextSrc =
+                    cctx->blockState.matchState.window.base + cdictEnd;
+                ZSTD_window_clear(&cctx->blockState.matchState.window);
+            }
+            /* loadedDictEnd is expressed within the referential of the active context */
+            cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit;
+    }   }
+
+    cctx->dictID = cdict->dictID;
+    cctx->dictContentSize = cdict->dictContentSize;
+
+    /* copy block state */
+    ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState));
+
+    return 0;
+}
+
+static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
+                            const ZSTD_CDict* cdict,
+                            ZSTD_CCtx_params params,
+                            U64 pledgedSrcSize,
+                            ZSTD_buffered_policy_e zbuff)
+{
+    const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams;
+
+    assert(!cdict->matchState.dedicatedDictSearch);
+    DEBUGLOG(4, "ZSTD_resetCCtx_byCopyingCDict() pledgedSrcSize=%llu",
+                (unsigned long long)pledgedSrcSize);
+
+    {   unsigned const windowLog = params.cParams.windowLog;
+        assert(windowLog != 0);
+        /* Copy only compression parameters related to tables. */
+        params.cParams = *cdict_cParams;
+        params.cParams.windowLog = windowLog;
+        params.useRowMatchFinder = cdict->useRowMatchFinder;
+        FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, &params, pledgedSrcSize,
+                                                 /* loadedDictSize */ 0,
+                                                 ZSTDcrp_leaveDirty, zbuff), "");
+        assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy);
+        assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog);
+        assert(cctx->appliedParams.cParams.chainLog == cdict_cParams->chainLog);
+    }
+
+    ZSTD_cwksp_mark_tables_dirty(&cctx->workspace);
+    assert(params.useRowMatchFinder != ZSTD_urm_auto);
+
+    /* copy tables */
+    {   size_t const chainSize = ZSTD_allocateChainTable(cdict_cParams->strategy, cdict->useRowMatchFinder, 0 /* DDS guaranteed disabled */)
+                                                            ? ((size_t)1 << cdict_cParams->chainLog)
+                                                            : 0;
+        size_t const hSize =  (size_t)1 << cdict_cParams->hashLog;
+
+        ZSTD_memcpy(cctx->blockState.matchState.hashTable,
+               cdict->matchState.hashTable,
+               hSize * sizeof(U32));
+        /* Do not copy cdict's chainTable if cctx has parameters such that it would not use chainTable */
+        if (ZSTD_allocateChainTable(cctx->appliedParams.cParams.strategy, cctx->appliedParams.useRowMatchFinder, 0 /* forDDSDict */)) {
+            ZSTD_memcpy(cctx->blockState.matchState.chainTable,
+               cdict->matchState.chainTable,
+               chainSize * sizeof(U32));
+        }
+        /* copy tag table */
+        if (ZSTD_rowMatchFinderUsed(cdict_cParams->strategy, cdict->useRowMatchFinder)) {
+            size_t const tagTableSize = hSize*sizeof(U16);
+            ZSTD_memcpy(cctx->blockState.matchState.tagTable,
+                cdict->matchState.tagTable,
+                tagTableSize);
+        }
+    }
+
+    /* Zero the hashTable3, since the cdict never fills it */
+    {   int const h3log = cctx->blockState.matchState.hashLog3;
+        size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0;
+        assert(cdict->matchState.hashLog3 == 0);
+        ZSTD_memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32));
+    }
+
+    ZSTD_cwksp_mark_tables_clean(&cctx->workspace);
+
+    /* copy dictionary offsets */
+    {   ZSTD_matchState_t const* srcMatchState = &cdict->matchState;
+        ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState;
+        dstMatchState->window       = srcMatchState->window;
+        dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
+        dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
+    }
+
+    cctx->dictID = cdict->dictID;
+    cctx->dictContentSize = cdict->dictContentSize;
+
+    /* copy block state */
+    ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState));
+
+    return 0;
+}
+
+/* We have a choice between copying the dictionary context into the working
+ * context, or referencing the dictionary context from the working context
+ * in-place. We decide here which strategy to use. */
+static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx,
+                            const ZSTD_CDict* cdict,
+                            const ZSTD_CCtx_params* params,
+                            U64 pledgedSrcSize,
+                            ZSTD_buffered_policy_e zbuff)
+{
+
+    DEBUGLOG(4, "ZSTD_resetCCtx_usingCDict (pledgedSrcSize=%u)",
+                (unsigned)pledgedSrcSize);
+
+    if (ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) {
+        return ZSTD_resetCCtx_byAttachingCDict(
+            cctx, cdict, *params, pledgedSrcSize, zbuff);
+    } else {
+        return ZSTD_resetCCtx_byCopyingCDict(
+            cctx, cdict, *params, pledgedSrcSize, zbuff);
+    }
+}
+
+/*! ZSTD_copyCCtx_internal() :
+ *  Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
+ *  Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
+ *  The "context", in this case, refers to the hash and chain tables,
+ *  entropy tables, and dictionary references.
+ * `windowLog` value is enforced if != 0, otherwise value is copied from srcCCtx.
+ * @return : 0, or an error code */
+static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
+                            const ZSTD_CCtx* srcCCtx,
+                            ZSTD_frameParameters fParams,
+                            U64 pledgedSrcSize,
+                            ZSTD_buffered_policy_e zbuff)
+{
+    RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong,
+                    "Can't copy a ctx that's not in init stage.");
+    DEBUGLOG(5, "ZSTD_copyCCtx_internal");
+    ZSTD_memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem));
+    {   ZSTD_CCtx_params params = dstCCtx->requestedParams;
+        /* Copy only compression parameters related to tables. */
+        params.cParams = srcCCtx->appliedParams.cParams;
+        assert(srcCCtx->appliedParams.useRowMatchFinder != ZSTD_urm_auto);
+        params.useRowMatchFinder = srcCCtx->appliedParams.useRowMatchFinder;
+        params.fParams = fParams;
+        ZSTD_resetCCtx_internal(dstCCtx, &params, pledgedSrcSize,
+                                /* loadedDictSize */ 0,
+                                ZSTDcrp_leaveDirty, zbuff);
+        assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog);
+        assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy);
+        assert(dstCCtx->appliedParams.cParams.hashLog == srcCCtx->appliedParams.cParams.hashLog);
+        assert(dstCCtx->appliedParams.cParams.chainLog == srcCCtx->appliedParams.cParams.chainLog);
+        assert(dstCCtx->blockState.matchState.hashLog3 == srcCCtx->blockState.matchState.hashLog3);
+    }
+
+    ZSTD_cwksp_mark_tables_dirty(&dstCCtx->workspace);
+
+    /* copy tables */
+    {   size_t const chainSize = ZSTD_allocateChainTable(srcCCtx->appliedParams.cParams.strategy,
+                                                         srcCCtx->appliedParams.useRowMatchFinder,
+                                                         0 /* forDDSDict */)
+                                    ? ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog)
+                                    : 0;
+        size_t const hSize =  (size_t)1 << srcCCtx->appliedParams.cParams.hashLog;
+        int const h3log = srcCCtx->blockState.matchState.hashLog3;
+        size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0;
+
+        ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable,
+               srcCCtx->blockState.matchState.hashTable,
+               hSize * sizeof(U32));
+        ZSTD_memcpy(dstCCtx->blockState.matchState.chainTable,
+               srcCCtx->blockState.matchState.chainTable,
+               chainSize * sizeof(U32));
+        ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable3,
+               srcCCtx->blockState.matchState.hashTable3,
+               h3Size * sizeof(U32));
+    }
+
+    ZSTD_cwksp_mark_tables_clean(&dstCCtx->workspace);
+
+    /* copy dictionary offsets */
+    {
+        const ZSTD_matchState_t* srcMatchState = &srcCCtx->blockState.matchState;
+        ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState;
+        dstMatchState->window       = srcMatchState->window;
+        dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
+        dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
+    }
+    dstCCtx->dictID = srcCCtx->dictID;
+    dstCCtx->dictContentSize = srcCCtx->dictContentSize;
+
+    /* copy block state */
+    ZSTD_memcpy(dstCCtx->blockState.prevCBlock, srcCCtx->blockState.prevCBlock, sizeof(*srcCCtx->blockState.prevCBlock));
+
+    return 0;
+}
+
+/*! ZSTD_copyCCtx() :
+ *  Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
+ *  Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
+ *  pledgedSrcSize==0 means "unknown".
+*   @return : 0, or an error code */
+size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize)
+{
+    ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
+    ZSTD_buffered_policy_e const zbuff = srcCCtx->bufferedPolicy;
+    ZSTD_STATIC_ASSERT((U32)ZSTDb_buffered==1);
+    if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN;
+    fParams.contentSizeFlag = (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN);
+
+    return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx,
+                                fParams, pledgedSrcSize,
+                                zbuff);
+}
+
+
+#define ZSTD_ROWSIZE 16
+/*! ZSTD_reduceTable() :
+ *  reduce table indexes by `reducerValue`, or squash to zero.
+ *  PreserveMark preserves "unsorted mark" for btlazy2 strategy.
+ *  It must be set to a clear 0/1 value, to remove branch during inlining.
+ *  Presume table size is a multiple of ZSTD_ROWSIZE
+ *  to help auto-vectorization */
+FORCE_INLINE_TEMPLATE void
+ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerValue, int const preserveMark)
+{
+    int const nbRows = (int)size / ZSTD_ROWSIZE;
+    int cellNb = 0;
+    int rowNb;
+    assert((size & (ZSTD_ROWSIZE-1)) == 0);  /* multiple of ZSTD_ROWSIZE */
+    assert(size < (1U<<31));   /* can be casted to int */
+
+#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
+    /* To validate that the table re-use logic is sound, and that we don't
+     * access table space that we haven't cleaned, we re-"poison" the table
+     * space every time we mark it dirty.
+     *
+     * This function however is intended to operate on those dirty tables and
+     * re-clean them. So when this function is used correctly, we can unpoison
+     * the memory it operated on. This introduces a blind spot though, since
+     * if we now try to operate on __actually__ poisoned memory, we will not
+     * detect that. */
+    __msan_unpoison(table, size * sizeof(U32));
+#endif
+
+    for (rowNb=0 ; rowNb < nbRows ; rowNb++) {
+        int column;
+        for (column=0; column<ZSTD_ROWSIZE; column++) {
+            if (preserveMark) {
+                U32 const adder = (table[cellNb] == ZSTD_DUBT_UNSORTED_MARK) ? reducerValue : 0;
+                table[cellNb] += adder;
+            }
+            if (table[cellNb] < reducerValue) table[cellNb] = 0;
+            else table[cellNb] -= reducerValue;
+            cellNb++;
+    }   }
+}
+
+static void ZSTD_reduceTable(U32* const table, U32 const size, U32 const reducerValue)
+{
+    ZSTD_reduceTable_internal(table, size, reducerValue, 0);
+}
+
+static void ZSTD_reduceTable_btlazy2(U32* const table, U32 const size, U32 const reducerValue)
+{
+    ZSTD_reduceTable_internal(table, size, reducerValue, 1);
+}
+
+/*! ZSTD_reduceIndex() :
+*   rescale all indexes to avoid future overflow (indexes are U32) */
+static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const U32 reducerValue)
+{
+    {   U32 const hSize = (U32)1 << params->cParams.hashLog;
+        ZSTD_reduceTable(ms->hashTable, hSize, reducerValue);
+    }
+
+    if (ZSTD_allocateChainTable(params->cParams.strategy, params->useRowMatchFinder, (U32)ms->dedicatedDictSearch)) {
+        U32 const chainSize = (U32)1 << params->cParams.chainLog;
+        if (params->cParams.strategy == ZSTD_btlazy2)
+            ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue);
+        else
+            ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue);
+    }
+
+    if (ms->hashLog3) {
+        U32 const h3Size = (U32)1 << ms->hashLog3;
+        ZSTD_reduceTable(ms->hashTable3, h3Size, reducerValue);
+    }
+}
+
+
+/*-*******************************************************
+*  Block entropic compression
+*********************************************************/
+
+/* See doc/zstd_compression_format.md for detailed format description */
+
+void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
+{
+    const seqDef* const sequences = seqStorePtr->sequencesStart;
+    BYTE* const llCodeTable = seqStorePtr->llCode;
+    BYTE* const ofCodeTable = seqStorePtr->ofCode;
+    BYTE* const mlCodeTable = seqStorePtr->mlCode;
+    U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
+    U32 u;
+    assert(nbSeq <= seqStorePtr->maxNbSeq);
+    for (u=0; u<nbSeq; u++) {
+        U32 const llv = sequences[u].litLength;
+        U32 const mlv = sequences[u].matchLength;
+        llCodeTable[u] = (BYTE)ZSTD_LLcode(llv);
+        ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offset);
+        mlCodeTable[u] = (BYTE)ZSTD_MLcode(mlv);
+    }
+    if (seqStorePtr->longLengthType==ZSTD_llt_literalLength)
+        llCodeTable[seqStorePtr->longLengthPos] = MaxLL;
+    if (seqStorePtr->longLengthType==ZSTD_llt_matchLength)
+        mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
+}
+
+/* ZSTD_useTargetCBlockSize():
+ * Returns if target compressed block size param is being used.
+ * If used, compression will do best effort to make a compressed block size to be around targetCBlockSize.
+ * Returns 1 if true, 0 otherwise. */
+static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams)
+{
+    DEBUGLOG(5, "ZSTD_useTargetCBlockSize (targetCBlockSize=%zu)", cctxParams->targetCBlockSize);
+    return (cctxParams->targetCBlockSize != 0);
+}
+
+/* ZSTD_blockSplitterEnabled():
+ * Returns if block splitting param is being used
+ * If used, compression will do best effort to split a block in order to improve compression ratio.
+ * Returns 1 if true, 0 otherwise. */
+static int ZSTD_blockSplitterEnabled(ZSTD_CCtx_params* cctxParams)
+{
+    DEBUGLOG(5, "ZSTD_blockSplitterEnabled(splitBlocks=%d)", cctxParams->splitBlocks);
+    return (cctxParams->splitBlocks != 0);
+}
+
+/* Type returned by ZSTD_buildSequencesStatistics containing finalized symbol encoding types
+ * and size of the sequences statistics
+ */
+typedef struct {
+    U32 LLtype;
+    U32 Offtype;
+    U32 MLtype;
+    size_t size;
+    size_t lastCountSize; /* Accounts for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */
+} ZSTD_symbolEncodingTypeStats_t;
+
+/* ZSTD_buildSequencesStatistics():
+ * Returns a ZSTD_symbolEncodingTypeStats_t, or a zstd error code in the `size` field.
+ * Modifies `nextEntropy` to have the appropriate values as a side effect.
+ * nbSeq must be greater than 0.
+ *
+ * entropyWkspSize must be of size at least ENTROPY_WORKSPACE_SIZE - (MaxSeq + 1)*sizeof(U32)
+ */
+static ZSTD_symbolEncodingTypeStats_t
+ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq,
+                        const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy,
+                              BYTE* dst, const BYTE* const dstEnd,
+                              ZSTD_strategy strategy, unsigned* countWorkspace,
+                              void* entropyWorkspace, size_t entropyWkspSize) {
+    BYTE* const ostart = dst;
+    const BYTE* const oend = dstEnd;
+    BYTE* op = ostart;
+    FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable;
+    FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable;
+    FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable;
+    const BYTE* const ofCodeTable = seqStorePtr->ofCode;
+    const BYTE* const llCodeTable = seqStorePtr->llCode;
+    const BYTE* const mlCodeTable = seqStorePtr->mlCode;
+    ZSTD_symbolEncodingTypeStats_t stats;
+
+    stats.lastCountSize = 0;
+    /* convert length/distances into codes */
+    ZSTD_seqToCodes(seqStorePtr);
+    assert(op <= oend);
+    assert(nbSeq != 0); /* ZSTD_selectEncodingType() divides by nbSeq */
+    /* build CTable for Literal Lengths */
+    {   unsigned max = MaxLL;
+        size_t const mostFrequent = HIST_countFast_wksp(countWorkspace, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);   /* can't fail */
+        DEBUGLOG(5, "Building LL table");
+        nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode;
+        stats.LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode,
+                                        countWorkspace, max, mostFrequent, nbSeq,
+                                        LLFSELog, prevEntropy->litlengthCTable,
+                                        LL_defaultNorm, LL_defaultNormLog,
+                                        ZSTD_defaultAllowed, strategy);
+        assert(set_basic < set_compressed && set_rle < set_compressed);
+        assert(!(stats.LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
+        {   size_t const countSize = ZSTD_buildCTable(
+                op, (size_t)(oend - op),
+                CTable_LitLength, LLFSELog, (symbolEncodingType_e)stats.LLtype,
+                countWorkspace, max, llCodeTable, nbSeq,
+                LL_defaultNorm, LL_defaultNormLog, MaxLL,
+                prevEntropy->litlengthCTable,
+                sizeof(prevEntropy->litlengthCTable),
+                entropyWorkspace, entropyWkspSize);
+            if (ZSTD_isError(countSize)) {
+                DEBUGLOG(3, "ZSTD_buildCTable for LitLens failed");
+                stats.size = countSize;
+                return stats;
+            }
+            if (stats.LLtype == set_compressed)
+                stats.lastCountSize = countSize;
+            op += countSize;
+            assert(op <= oend);
+    }   }
+    /* build CTable for Offsets */
+    {   unsigned max = MaxOff;
+        size_t const mostFrequent = HIST_countFast_wksp(
+            countWorkspace, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);  /* can't fail */
+        /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
+        ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
+        DEBUGLOG(5, "Building OF table");
+        nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode;
+        stats.Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode,
+                                        countWorkspace, max, mostFrequent, nbSeq,
+                                        OffFSELog, prevEntropy->offcodeCTable,
+                                        OF_defaultNorm, OF_defaultNormLog,
+                                        defaultPolicy, strategy);
+        assert(!(stats.Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
+        {   size_t const countSize = ZSTD_buildCTable(
+                op, (size_t)(oend - op),
+                CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)stats.Offtype,
+                countWorkspace, max, ofCodeTable, nbSeq,
+                OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
+                prevEntropy->offcodeCTable,
+                sizeof(prevEntropy->offcodeCTable),
+                entropyWorkspace, entropyWkspSize);
+            if (ZSTD_isError(countSize)) {
+                DEBUGLOG(3, "ZSTD_buildCTable for Offsets failed");
+                stats.size = countSize;
+                return stats;
+            }
+            if (stats.Offtype == set_compressed)
+                stats.lastCountSize = countSize;
+            op += countSize;
+            assert(op <= oend);
+    }   }
+    /* build CTable for MatchLengths */
+    {   unsigned max = MaxML;
+        size_t const mostFrequent = HIST_countFast_wksp(
+            countWorkspace, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);   /* can't fail */
+        DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
+        nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode;
+        stats.MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode,
+                                        countWorkspace, max, mostFrequent, nbSeq,
+                                        MLFSELog, prevEntropy->matchlengthCTable,
+                                        ML_defaultNorm, ML_defaultNormLog,
+                                        ZSTD_defaultAllowed, strategy);
+        assert(!(stats.MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
+        {   size_t const countSize = ZSTD_buildCTable(
+                op, (size_t)(oend - op),
+                CTable_MatchLength, MLFSELog, (symbolEncodingType_e)stats.MLtype,
+                countWorkspace, max, mlCodeTable, nbSeq,
+                ML_defaultNorm, ML_defaultNormLog, MaxML,
+                prevEntropy->matchlengthCTable,
+                sizeof(prevEntropy->matchlengthCTable),
+                entropyWorkspace, entropyWkspSize);
+            if (ZSTD_isError(countSize)) {
+                DEBUGLOG(3, "ZSTD_buildCTable for MatchLengths failed");
+                stats.size = countSize;
+                return stats;
+            }
+            if (stats.MLtype == set_compressed)
+                stats.lastCountSize = countSize;
+            op += countSize;
+            assert(op <= oend);
+    }   }
+    stats.size = (size_t)(op-ostart);
+    return stats;
+}
+
+/* ZSTD_entropyCompressSeqStore_internal():
+ * compresses both literals and sequences
+ * Returns compressed size of block, or a zstd error.
+ */
+MEM_STATIC size_t
+ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr,
+                          const ZSTD_entropyCTables_t* prevEntropy,
+                                ZSTD_entropyCTables_t* nextEntropy,
+                          const ZSTD_CCtx_params* cctxParams,
+                                void* dst, size_t dstCapacity,
+                                void* entropyWorkspace, size_t entropyWkspSize,
+                          const int bmi2)
+{
+    const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
+    ZSTD_strategy const strategy = cctxParams->cParams.strategy;
+    unsigned* count = (unsigned*)entropyWorkspace;
+    FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable;
+    FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable;
+    FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable;
+    const seqDef* const sequences = seqStorePtr->sequencesStart;
+    const size_t nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
+    const BYTE* const ofCodeTable = seqStorePtr->ofCode;
+    const BYTE* const llCodeTable = seqStorePtr->llCode;
+    const BYTE* const mlCodeTable = seqStorePtr->mlCode;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstCapacity;
+    BYTE* op = ostart;
+    size_t lastCountSize;
+
+    entropyWorkspace = count + (MaxSeq + 1);
+    entropyWkspSize -= (MaxSeq + 1) * sizeof(*count);
+
+    DEBUGLOG(4, "ZSTD_entropyCompressSeqStore_internal (nbSeq=%zu)", nbSeq);
+    ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
+    assert(entropyWkspSize >= HUF_WORKSPACE_SIZE);
+
+    /* Compress literals */
+    {   const BYTE* const literals = seqStorePtr->litStart;
+        size_t const litSize = (size_t)(seqStorePtr->lit - literals);
+        size_t const cSize = ZSTD_compressLiterals(
+                                    &prevEntropy->huf, &nextEntropy->huf,
+                                    cctxParams->cParams.strategy,
+                                    ZSTD_disableLiteralsCompression(cctxParams),
+                                    op, dstCapacity,
+                                    literals, litSize,
+                                    entropyWorkspace, entropyWkspSize,
+                                    bmi2);
+        FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed");
+        assert(cSize <= dstCapacity);
+        op += cSize;
+    }
+
+    /* Sequences Header */
+    RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,
+                    dstSize_tooSmall, "Can't fit seq hdr in output buf!");
+    if (nbSeq < 128) {
+        *op++ = (BYTE)nbSeq;
+    } else if (nbSeq < LONGNBSEQ) {
+        op[0] = (BYTE)((nbSeq>>8) + 0x80);
+        op[1] = (BYTE)nbSeq;
+        op+=2;
+    } else {
+        op[0]=0xFF;
+        MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ));
+        op+=3;
+    }
+    assert(op <= oend);
+    if (nbSeq==0) {
+        /* Copy the old tables over as if we repeated them */
+        ZSTD_memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse));
+        return (size_t)(op - ostart);
+    }
+    {
+        ZSTD_symbolEncodingTypeStats_t stats;
+        BYTE* seqHead = op++;
+        /* build stats for sequences */
+        stats = ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq,
+                                             &prevEntropy->fse, &nextEntropy->fse,
+                                              op, oend,
+                                              strategy, count,
+                                              entropyWorkspace, entropyWkspSize);
+        FORWARD_IF_ERROR(stats.size, "ZSTD_buildSequencesStatistics failed!");
+        *seqHead = (BYTE)((stats.LLtype<<6) + (stats.Offtype<<4) + (stats.MLtype<<2));
+        lastCountSize = stats.lastCountSize;
+        op += stats.size;
+    }
+
+    {   size_t const bitstreamSize = ZSTD_encodeSequences(
+                                        op, (size_t)(oend - op),
+                                        CTable_MatchLength, mlCodeTable,
+                                        CTable_OffsetBits, ofCodeTable,
+                                        CTable_LitLength, llCodeTable,
+                                        sequences, nbSeq,
+                                        longOffsets, bmi2);
+        FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed");
+        op += bitstreamSize;
+        assert(op <= oend);
+        /* zstd versions <= 1.3.4 mistakenly report corruption when
+         * FSE_readNCount() receives a buffer < 4 bytes.
+         * Fixed by https://github.com/facebook/zstd/pull/1146.
+         * This can happen when the last set_compressed table present is 2
+         * bytes and the bitstream is only one byte.
+         * In this exceedingly rare case, we will simply emit an uncompressed
+         * block, since it isn't worth optimizing.
+         */
+        if (lastCountSize && (lastCountSize + bitstreamSize) < 4) {
+            /* lastCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */
+            assert(lastCountSize + bitstreamSize == 3);
+            DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by "
+                        "emitting an uncompressed block.");
+            return 0;
+        }
+    }
+
+    DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart));
+    return (size_t)(op - ostart);
+}
+
+MEM_STATIC size_t
+ZSTD_entropyCompressSeqStore(seqStore_t* seqStorePtr,
+                       const ZSTD_entropyCTables_t* prevEntropy,
+                             ZSTD_entropyCTables_t* nextEntropy,
+                       const ZSTD_CCtx_params* cctxParams,
+                             void* dst, size_t dstCapacity,
+                             size_t srcSize,
+                             void* entropyWorkspace, size_t entropyWkspSize,
+                             int bmi2)
+{
+    size_t const cSize = ZSTD_entropyCompressSeqStore_internal(
+                            seqStorePtr, prevEntropy, nextEntropy, cctxParams,
+                            dst, dstCapacity,
+                            entropyWorkspace, entropyWkspSize, bmi2);
+    if (cSize == 0) return 0;
+    /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block.
+     * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block.
+     */
+    if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity))
+        return 0;  /* block not compressed */
+    FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSeqStore_internal failed");
+
+    /* Check compressibility */
+    {   size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy);
+        if (cSize >= maxCSize) return 0;  /* block not compressed */
+    }
+    DEBUGLOG(4, "ZSTD_entropyCompressSeqStore() cSize: %zu", cSize);
+    return cSize;
+}
+
+/* ZSTD_selectBlockCompressor() :
+ * Not static, but internal use only (used by long distance matcher)
+ * assumption : strat is a valid strategy */
+ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_useRowMatchFinderMode_e useRowMatchFinder, ZSTD_dictMode_e dictMode)
+{
+    static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = {
+        { ZSTD_compressBlock_fast  /* default for 0 */,
+          ZSTD_compressBlock_fast,
+          ZSTD_compressBlock_doubleFast,
+          ZSTD_compressBlock_greedy,
+          ZSTD_compressBlock_lazy,
+          ZSTD_compressBlock_lazy2,
+          ZSTD_compressBlock_btlazy2,
+          ZSTD_compressBlock_btopt,
+          ZSTD_compressBlock_btultra,
+          ZSTD_compressBlock_btultra2 },
+        { ZSTD_compressBlock_fast_extDict  /* default for 0 */,
+          ZSTD_compressBlock_fast_extDict,
+          ZSTD_compressBlock_doubleFast_extDict,
+          ZSTD_compressBlock_greedy_extDict,
+          ZSTD_compressBlock_lazy_extDict,
+          ZSTD_compressBlock_lazy2_extDict,
+          ZSTD_compressBlock_btlazy2_extDict,
+          ZSTD_compressBlock_btopt_extDict,
+          ZSTD_compressBlock_btultra_extDict,
+          ZSTD_compressBlock_btultra_extDict },
+        { ZSTD_compressBlock_fast_dictMatchState  /* default for 0 */,
+          ZSTD_compressBlock_fast_dictMatchState,
+          ZSTD_compressBlock_doubleFast_dictMatchState,
+          ZSTD_compressBlock_greedy_dictMatchState,
+          ZSTD_compressBlock_lazy_dictMatchState,
+          ZSTD_compressBlock_lazy2_dictMatchState,
+          ZSTD_compressBlock_btlazy2_dictMatchState,
+          ZSTD_compressBlock_btopt_dictMatchState,
+          ZSTD_compressBlock_btultra_dictMatchState,
+          ZSTD_compressBlock_btultra_dictMatchState },
+        { NULL  /* default for 0 */,
+          NULL,
+          NULL,
+          ZSTD_compressBlock_greedy_dedicatedDictSearch,
+          ZSTD_compressBlock_lazy_dedicatedDictSearch,
+          ZSTD_compressBlock_lazy2_dedicatedDictSearch,
+          NULL,
+          NULL,
+          NULL,
+          NULL }
+    };
+    ZSTD_blockCompressor selectedCompressor;
+    ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1);
+
+    assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
+    DEBUGLOG(4, "Selected block compressor: dictMode=%d strat=%d rowMatchfinder=%d", (int)dictMode, (int)strat, (int)useRowMatchFinder);
+    if (ZSTD_rowMatchFinderUsed(strat, useRowMatchFinder)) {
+        static const ZSTD_blockCompressor rowBasedBlockCompressors[4][3] = {
+            { ZSTD_compressBlock_greedy_row,
+            ZSTD_compressBlock_lazy_row,
+            ZSTD_compressBlock_lazy2_row },
+            { ZSTD_compressBlock_greedy_extDict_row,
+            ZSTD_compressBlock_lazy_extDict_row,
+            ZSTD_compressBlock_lazy2_extDict_row },
+            { ZSTD_compressBlock_greedy_dictMatchState_row,
+            ZSTD_compressBlock_lazy_dictMatchState_row,
+            ZSTD_compressBlock_lazy2_dictMatchState_row },
+            { ZSTD_compressBlock_greedy_dedicatedDictSearch_row,
+            ZSTD_compressBlock_lazy_dedicatedDictSearch_row,
+            ZSTD_compressBlock_lazy2_dedicatedDictSearch_row }
+        };
+        DEBUGLOG(4, "Selecting a row-based matchfinder");
+        assert(useRowMatchFinder != ZSTD_urm_auto);
+        selectedCompressor = rowBasedBlockCompressors[(int)dictMode][(int)strat - (int)ZSTD_greedy];
+    } else {
+        selectedCompressor = blockCompressor[(int)dictMode][(int)strat];
+    }
+    assert(selectedCompressor != NULL);
+    return selectedCompressor;
+}
+
+static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr,
+                                   const BYTE* anchor, size_t lastLLSize)
+{
+    ZSTD_memcpy(seqStorePtr->lit, anchor, lastLLSize);
+    seqStorePtr->lit += lastLLSize;
+}
+
+void ZSTD_resetSeqStore(seqStore_t* ssPtr)
+{
+    ssPtr->lit = ssPtr->litStart;
+    ssPtr->sequences = ssPtr->sequencesStart;
+    ssPtr->longLengthType = ZSTD_llt_none;
+}
+
+typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e;
+
+static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
+{
+    ZSTD_matchState_t* const ms = &zc->blockState.matchState;
+    DEBUGLOG(5, "ZSTD_buildSeqStore (srcSize=%zu)", srcSize);
+    assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
+    /* Assert that we have correctly flushed the ctx params into the ms's copy */
+    ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams);
+    if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
+        if (zc->appliedParams.cParams.strategy >= ZSTD_btopt) {
+            ZSTD_ldm_skipRawSeqStoreBytes(&zc->externSeqStore, srcSize);
+        } else {
+            ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch);
+        }
+        return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */
+    }
+    ZSTD_resetSeqStore(&(zc->seqStore));
+    /* required for optimal parser to read stats from dictionary */
+    ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy;
+    /* tell the optimal parser how we expect to compress literals */
+    ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode;
+    /* a gap between an attached dict and the current window is not safe,
+     * they must remain adjacent,
+     * and when that stops being the case, the dict must be unset */
+    assert(ms->dictMatchState == NULL || ms->loadedDictEnd == ms->window.dictLimit);
+
+    /* limited update after a very long match */
+    {   const BYTE* const base = ms->window.base;
+        const BYTE* const istart = (const BYTE*)src;
+        const U32 curr = (U32)(istart-base);
+        if (sizeof(ptrdiff_t)==8) assert(istart - base < (ptrdiff_t)(U32)(-1));   /* ensure no overflow */
+        if (curr > ms->nextToUpdate + 384)
+            ms->nextToUpdate = curr - MIN(192, (U32)(curr - ms->nextToUpdate - 384));
+    }
+
+    /* select and store sequences */
+    {   ZSTD_dictMode_e const dictMode = ZSTD_matchState_dictMode(ms);
+        size_t lastLLSize;
+        {   int i;
+            for (i = 0; i < ZSTD_REP_NUM; ++i)
+                zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i];
+        }
+        if (zc->externSeqStore.pos < zc->externSeqStore.size) {
+            assert(!zc->appliedParams.ldmParams.enableLdm);
+            /* Updates ldmSeqStore.pos */
+            lastLLSize =
+                ZSTD_ldm_blockCompress(&zc->externSeqStore,
+                                       ms, &zc->seqStore,
+                                       zc->blockState.nextCBlock->rep,
+                                       zc->appliedParams.useRowMatchFinder,
+                                       src, srcSize);
+            assert(zc->externSeqStore.pos <= zc->externSeqStore.size);
+        } else if (zc->appliedParams.ldmParams.enableLdm) {
+            rawSeqStore_t ldmSeqStore = kNullRawSeqStore;
+
+            ldmSeqStore.seq = zc->ldmSequences;
+            ldmSeqStore.capacity = zc->maxNbLdmSequences;
+            /* Updates ldmSeqStore.size */
+            FORWARD_IF_ERROR(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore,
+                                               &zc->appliedParams.ldmParams,
+                                               src, srcSize), "");
+            /* Updates ldmSeqStore.pos */
+            lastLLSize =
+                ZSTD_ldm_blockCompress(&ldmSeqStore,
+                                       ms, &zc->seqStore,
+                                       zc->blockState.nextCBlock->rep,
+                                       zc->appliedParams.useRowMatchFinder,
+                                       src, srcSize);
+            assert(ldmSeqStore.pos == ldmSeqStore.size);
+        } else {   /* not long range mode */
+            ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy,
+                                                                                    zc->appliedParams.useRowMatchFinder,
+                                                                                    dictMode);
+            ms->ldmSeqStore = NULL;
+            lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
+        }
+        {   const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize;
+            ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize);
+    }   }
+    return ZSTDbss_compress;
+}
+
+static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
+{
+    const seqStore_t* seqStore = ZSTD_getSeqStore(zc);
+    const seqDef* seqStoreSeqs = seqStore->sequencesStart;
+    size_t seqStoreSeqSize = seqStore->sequences - seqStoreSeqs;
+    size_t seqStoreLiteralsSize = (size_t)(seqStore->lit - seqStore->litStart);
+    size_t literalsRead = 0;
+    size_t lastLLSize;
+
+    ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex];
+    size_t i;
+    repcodes_t updatedRepcodes;
+
+    assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences);
+    /* Ensure we have enough space for last literals "sequence" */
+    assert(zc->seqCollector.maxSequences >= seqStoreSeqSize + 1);
+    ZSTD_memcpy(updatedRepcodes.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
+    for (i = 0; i < seqStoreSeqSize; ++i) {
+        U32 rawOffset = seqStoreSeqs[i].offset - ZSTD_REP_NUM;
+        outSeqs[i].litLength = seqStoreSeqs[i].litLength;
+        outSeqs[i].matchLength = seqStoreSeqs[i].matchLength + MINMATCH;
+        outSeqs[i].rep = 0;
+
+        if (i == seqStore->longLengthPos) {
+            if (seqStore->longLengthType == ZSTD_llt_literalLength) {
+                outSeqs[i].litLength += 0x10000;
+            } else if (seqStore->longLengthType == ZSTD_llt_matchLength) {
+                outSeqs[i].matchLength += 0x10000;
+            }
+        }
+
+        if (seqStoreSeqs[i].offset <= ZSTD_REP_NUM) {
+            /* Derive the correct offset corresponding to a repcode */
+            outSeqs[i].rep = seqStoreSeqs[i].offset;
+            if (outSeqs[i].litLength != 0) {
+                rawOffset = updatedRepcodes.rep[outSeqs[i].rep - 1];
+            } else {
+                if (outSeqs[i].rep == 3) {
+                    rawOffset = updatedRepcodes.rep[0] - 1;
+                } else {
+                    rawOffset = updatedRepcodes.rep[outSeqs[i].rep];
+                }
+            }
+        }
+        outSeqs[i].offset = rawOffset;
+        /* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode
+           so we provide seqStoreSeqs[i].offset - 1 */
+        updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep,
+                                         seqStoreSeqs[i].offset - 1,
+                                         seqStoreSeqs[i].litLength == 0);
+        literalsRead += outSeqs[i].litLength;
+    }
+    /* Insert last literals (if any exist) in the block as a sequence with ml == off == 0.
+     * If there are no last literals, then we'll emit (of: 0, ml: 0, ll: 0), which is a marker
+     * for the block boundary, according to the API.
+     */
+    assert(seqStoreLiteralsSize >= literalsRead);
+    lastLLSize = seqStoreLiteralsSize - literalsRead;
+    outSeqs[i].litLength = (U32)lastLLSize;
+    outSeqs[i].matchLength = outSeqs[i].offset = outSeqs[i].rep = 0;
+    seqStoreSeqSize++;
+    zc->seqCollector.seqIndex += seqStoreSeqSize;
+}
+
+size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
+                              size_t outSeqsSize, const void* src, size_t srcSize)
+{
+    const size_t dstCapacity = ZSTD_compressBound(srcSize);
+    void* dst = ZSTD_customMalloc(dstCapacity, ZSTD_defaultCMem);
+    SeqCollector seqCollector;
+
+    RETURN_ERROR_IF(dst == NULL, memory_allocation, "NULL pointer!");
+
+    seqCollector.collectSequences = 1;
+    seqCollector.seqStart = outSeqs;
+    seqCollector.seqIndex = 0;
+    seqCollector.maxSequences = outSeqsSize;
+    zc->seqCollector = seqCollector;
+
+    ZSTD_compress2(zc, dst, dstCapacity, src, srcSize);
+    ZSTD_customFree(dst, ZSTD_defaultCMem);
+    return zc->seqCollector.seqIndex;
+}
+
+size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize) {
+    size_t in = 0;
+    size_t out = 0;
+    for (; in < seqsSize; ++in) {
+        if (sequences[in].offset == 0 && sequences[in].matchLength == 0) {
+            if (in != seqsSize - 1) {
+                sequences[in+1].litLength += sequences[in].litLength;
+            }
+        } else {
+            sequences[out] = sequences[in];
+            ++out;
+        }
+    }
+    return out;
+}
+
+/* Unrolled loop to read four size_ts of input at a time. Returns 1 if is RLE, 0 if not. */
+static int ZSTD_isRLE(const BYTE* src, size_t length) {
+    const BYTE* ip = src;
+    const BYTE value = ip[0];
+    const size_t valueST = (size_t)((U64)value * 0x0101010101010101ULL);
+    const size_t unrollSize = sizeof(size_t) * 4;
+    const size_t unrollMask = unrollSize - 1;
+    const size_t prefixLength = length & unrollMask;
+    size_t i;
+    size_t u;
+    if (length == 1) return 1;
+    /* Check if prefix is RLE first before using unrolled loop */
+    if (prefixLength && ZSTD_count(ip+1, ip, ip+prefixLength) != prefixLength-1) {
+        return 0;
+    }
+    for (i = prefixLength; i != length; i += unrollSize) {
+        for (u = 0; u < unrollSize; u += sizeof(size_t)) {
+            if (MEM_readST(ip + i + u) != valueST) {
+                return 0;
+            }
+        }
+    }
+    return 1;
+}
+
+/* Returns true if the given block may be RLE.
+ * This is just a heuristic based on the compressibility.
+ * It may return both false positives and false negatives.
+ */
+static int ZSTD_maybeRLE(seqStore_t const* seqStore)
+{
+    size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart);
+    size_t const nbLits = (size_t)(seqStore->lit - seqStore->litStart);
+
+    return nbSeqs < 4 && nbLits < 10;
+}
+
+static void ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t* const bs)
+{
+    ZSTD_compressedBlockState_t* const tmp = bs->prevCBlock;
+    bs->prevCBlock = bs->nextCBlock;
+    bs->nextCBlock = tmp;
+}
+
+/* Writes the block header */
+static void writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastBlock) {
+    U32 const cBlockHeader = cSize == 1 ?
+                        lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) :
+                        lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
+    MEM_writeLE24(op, cBlockHeader);
+    DEBUGLOG(3, "writeBlockHeader: cSize: %zu blockSize: %zu lastBlock: %u", cSize, blockSize, lastBlock);
+}
+
+/** ZSTD_buildBlockEntropyStats_literals() :
+ *  Builds entropy for the literals.
+ *  Stores literals block type (raw, rle, compressed, repeat) and
+ *  huffman description table to hufMetadata.
+ *  Requires ENTROPY_WORKSPACE_SIZE workspace
+ *  @return : size of huffman description table or error code */
+static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSize,
+                                            const ZSTD_hufCTables_t* prevHuf,
+                                                  ZSTD_hufCTables_t* nextHuf,
+                                                  ZSTD_hufCTablesMetadata_t* hufMetadata,
+                                                  const int disableLiteralsCompression,
+                                                  void* workspace, size_t wkspSize)
+{
+    BYTE* const wkspStart = (BYTE*)workspace;
+    BYTE* const wkspEnd = wkspStart + wkspSize;
+    BYTE* const countWkspStart = wkspStart;
+    unsigned* const countWksp = (unsigned*)workspace;
+    const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned);
+    BYTE* const nodeWksp = countWkspStart + countWkspSize;
+    const size_t nodeWkspSize = wkspEnd-nodeWksp;
+    unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX;
+    unsigned huffLog = HUF_TABLELOG_DEFAULT;
+    HUF_repeat repeat = prevHuf->repeatMode;
+    DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_literals (srcSize=%zu)", srcSize);
+
+    /* Prepare nextEntropy assuming reusing the existing table */
+    ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+
+    if (disableLiteralsCompression) {
+        DEBUGLOG(5, "set_basic - disabled");
+        hufMetadata->hType = set_basic;
+        return 0;
+    }
+
+    /* small ? don't even attempt compression (speed opt) */
+#ifndef COMPRESS_LITERALS_SIZE_MIN
+#define COMPRESS_LITERALS_SIZE_MIN 63
+#endif
+    {   size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
+        if (srcSize <= minLitSize) {
+            DEBUGLOG(5, "set_basic - too small");
+            hufMetadata->hType = set_basic;
+            return 0;
+        }
+    }
+
+    /* Scan input and build symbol stats */
+    {   size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize);
+        FORWARD_IF_ERROR(largest, "HIST_count_wksp failed");
+        if (largest == srcSize) {
+            DEBUGLOG(5, "set_rle");
+            hufMetadata->hType = set_rle;
+            return 0;
+        }
+        if (largest <= (srcSize >> 7)+4) {
+            DEBUGLOG(5, "set_basic - no gain");
+            hufMetadata->hType = set_basic;
+            return 0;
+        }
+    }
+
+    /* Validate the previous Huffman table */
+    if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) {
+        repeat = HUF_repeat_none;
+    }
+
+    /* Build Huffman Tree */
+    ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable));
+    huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
+    {   size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp,
+                                                    maxSymbolValue, huffLog,
+                                                    nodeWksp, nodeWkspSize);
+        FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp");
+        huffLog = (U32)maxBits;
+        {   /* Build and write the CTable */
+            size_t const newCSize = HUF_estimateCompressedSize(
+                    (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);
+            size_t const hSize = HUF_writeCTable_wksp(
+                    hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),
+                    (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog,
+                    nodeWksp, nodeWkspSize);
+            /* Check against repeating the previous CTable */
+            if (repeat != HUF_repeat_none) {
+                size_t const oldCSize = HUF_estimateCompressedSize(
+                        (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue);
+                if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) {
+                    DEBUGLOG(5, "set_repeat - smaller");
+                    ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+                    hufMetadata->hType = set_repeat;
+                    return 0;
+                }
+            }
+            if (newCSize + hSize >= srcSize) {
+                DEBUGLOG(5, "set_basic - no gains");
+                ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+                hufMetadata->hType = set_basic;
+                return 0;
+            }
+            DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize);
+            hufMetadata->hType = set_compressed;
+            nextHuf->repeatMode = HUF_repeat_check;
+            return hSize;
+        }
+    }
+}
+
+
+/* ZSTD_buildDummySequencesStatistics():
+ * Returns a ZSTD_symbolEncodingTypeStats_t with all encoding types as set_basic,
+ * and updates nextEntropy to the appropriate repeatMode.
+ */
+static ZSTD_symbolEncodingTypeStats_t
+ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy) {
+    ZSTD_symbolEncodingTypeStats_t stats = {set_basic, set_basic, set_basic, 0, 0};
+    nextEntropy->litlength_repeatMode = FSE_repeat_none;
+    nextEntropy->offcode_repeatMode = FSE_repeat_none;
+    nextEntropy->matchlength_repeatMode = FSE_repeat_none;
+    return stats;
+}
+
+/** ZSTD_buildBlockEntropyStats_sequences() :
+ *  Builds entropy for the sequences.
+ *  Stores symbol compression modes and fse table to fseMetadata.
+ *  Requires ENTROPY_WORKSPACE_SIZE wksp.
+ *  @return : size of fse tables or error code */
+static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t* seqStorePtr,
+                                              const ZSTD_fseCTables_t* prevEntropy,
+                                                    ZSTD_fseCTables_t* nextEntropy,
+                                              const ZSTD_CCtx_params* cctxParams,
+                                                    ZSTD_fseCTablesMetadata_t* fseMetadata,
+                                                    void* workspace, size_t wkspSize)
+{
+    ZSTD_strategy const strategy = cctxParams->cParams.strategy;
+    size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
+    BYTE* const ostart = fseMetadata->fseTablesBuffer;
+    BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer);
+    BYTE* op = ostart;
+    unsigned* countWorkspace = (unsigned*)workspace;
+    unsigned* entropyWorkspace = countWorkspace + (MaxSeq + 1);
+    size_t entropyWorkspaceSize = wkspSize - (MaxSeq + 1) * sizeof(*countWorkspace);
+    ZSTD_symbolEncodingTypeStats_t stats;
+
+    DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_sequences (nbSeq=%zu)", nbSeq);
+    stats = nbSeq != 0 ? ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq,
+                                          prevEntropy, nextEntropy, op, oend,
+                                          strategy, countWorkspace,
+                                          entropyWorkspace, entropyWorkspaceSize)
+                       : ZSTD_buildDummySequencesStatistics(nextEntropy);
+    FORWARD_IF_ERROR(stats.size, "ZSTD_buildSequencesStatistics failed!");
+    fseMetadata->llType = (symbolEncodingType_e) stats.LLtype;
+    fseMetadata->ofType = (symbolEncodingType_e) stats.Offtype;
+    fseMetadata->mlType = (symbolEncodingType_e) stats.MLtype;
+    fseMetadata->lastCountSize = stats.lastCountSize;
+    return stats.size;
+}
+
+
+/** ZSTD_buildBlockEntropyStats() :
+ *  Builds entropy for the block.
+ *  Requires workspace size ENTROPY_WORKSPACE_SIZE
+ *
+ *  @return : 0 on success or error code
+ */
+size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
+                             const ZSTD_entropyCTables_t* prevEntropy,
+                                   ZSTD_entropyCTables_t* nextEntropy,
+                             const ZSTD_CCtx_params* cctxParams,
+                                   ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+                                   void* workspace, size_t wkspSize)
+{
+    size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart;
+    entropyMetadata->hufMetadata.hufDesSize =
+        ZSTD_buildBlockEntropyStats_literals(seqStorePtr->litStart, litSize,
+                                            &prevEntropy->huf, &nextEntropy->huf,
+                                            &entropyMetadata->hufMetadata,
+                                            ZSTD_disableLiteralsCompression(cctxParams),
+                                            workspace, wkspSize);
+    FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildBlockEntropyStats_literals failed");
+    entropyMetadata->fseMetadata.fseTablesSize =
+        ZSTD_buildBlockEntropyStats_sequences(seqStorePtr,
+                                              &prevEntropy->fse, &nextEntropy->fse,
+                                              cctxParams,
+                                              &entropyMetadata->fseMetadata,
+                                              workspace, wkspSize);
+    FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildBlockEntropyStats_sequences failed");
+    return 0;
+}
+
+/* Returns the size estimate for the literals section (header + content) of a block */
+static size_t ZSTD_estimateBlockSize_literal(const BYTE* literals, size_t litSize,
+                                                const ZSTD_hufCTables_t* huf,
+                                                const ZSTD_hufCTablesMetadata_t* hufMetadata,
+                                                void* workspace, size_t wkspSize,
+                                                int writeEntropy)
+{
+    unsigned* const countWksp = (unsigned*)workspace;
+    unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX;
+    size_t literalSectionHeaderSize = 3 + (litSize >= 1 KB) + (litSize >= 16 KB);
+    U32 singleStream = litSize < 256;
+
+    if (hufMetadata->hType == set_basic) return litSize;
+    else if (hufMetadata->hType == set_rle) return 1;
+    else if (hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat) {
+        size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize);
+        if (ZSTD_isError(largest)) return litSize;
+        {   size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue);
+            if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize;
+            if (!singleStream) cLitSizeEstimate += 6; /* multi-stream huffman uses 6-byte jump table */
+            return cLitSizeEstimate + literalSectionHeaderSize;
+    }   }
+    assert(0); /* impossible */
+    return 0;
+}
+
+/* Returns the size estimate for the FSE-compressed symbols (of, ml, ll) of a block */
+static size_t ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type,
+                        const BYTE* codeTable, size_t nbSeq, unsigned maxCode,
+                        const FSE_CTable* fseCTable,
+                        const U32* additionalBits,
+                        short const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
+                        void* workspace, size_t wkspSize)
+{
+    unsigned* const countWksp = (unsigned*)workspace;
+    const BYTE* ctp = codeTable;
+    const BYTE* const ctStart = ctp;
+    const BYTE* const ctEnd = ctStart + nbSeq;
+    size_t cSymbolTypeSizeEstimateInBits = 0;
+    unsigned max = maxCode;
+
+    HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize);  /* can't fail */
+    if (type == set_basic) {
+        /* We selected this encoding type, so it must be valid. */
+        assert(max <= defaultMax);
+        (void)defaultMax;
+        cSymbolTypeSizeEstimateInBits = ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max);
+    } else if (type == set_rle) {
+        cSymbolTypeSizeEstimateInBits = 0;
+    } else if (type == set_compressed || type == set_repeat) {
+        cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max);
+    }
+    if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) {
+        return nbSeq * 10;
+    }
+    while (ctp < ctEnd) {
+        if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp];
+        else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */
+        ctp++;
+    }
+    return cSymbolTypeSizeEstimateInBits >> 3;
+}
+
+/* Returns the size estimate for the sequences section (header + content) of a block */
+static size_t ZSTD_estimateBlockSize_sequences(const BYTE* ofCodeTable,
+                                                  const BYTE* llCodeTable,
+                                                  const BYTE* mlCodeTable,
+                                                  size_t nbSeq,
+                                                  const ZSTD_fseCTables_t* fseTables,
+                                                  const ZSTD_fseCTablesMetadata_t* fseMetadata,
+                                                  void* workspace, size_t wkspSize,
+                                                  int writeEntropy)
+{
+    size_t sequencesSectionHeaderSize = 1 /* seqHead */ + 1 /* min seqSize size */ + (nbSeq >= 128) + (nbSeq >= LONGNBSEQ);
+    size_t cSeqSizeEstimate = 0;
+    cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, nbSeq, MaxOff,
+                                         fseTables->offcodeCTable, NULL,
+                                         OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
+                                         workspace, wkspSize);
+    cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->llType, llCodeTable, nbSeq, MaxLL,
+                                         fseTables->litlengthCTable, LL_bits,
+                                         LL_defaultNorm, LL_defaultNormLog, MaxLL,
+                                         workspace, wkspSize);
+    cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, nbSeq, MaxML,
+                                         fseTables->matchlengthCTable, ML_bits,
+                                         ML_defaultNorm, ML_defaultNormLog, MaxML,
+                                         workspace, wkspSize);
+    if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize;
+    return cSeqSizeEstimate + sequencesSectionHeaderSize;
+}
+
+/* Returns the size estimate for a given stream of literals, of, ll, ml */
+static size_t ZSTD_estimateBlockSize(const BYTE* literals, size_t litSize,
+                                     const BYTE* ofCodeTable,
+                                     const BYTE* llCodeTable,
+                                     const BYTE* mlCodeTable,
+                                     size_t nbSeq,
+                                     const ZSTD_entropyCTables_t* entropy,
+                                     const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+                                     void* workspace, size_t wkspSize,
+                                     int writeLitEntropy, int writeSeqEntropy) {
+    size_t const literalsSize = ZSTD_estimateBlockSize_literal(literals, litSize,
+                                                         &entropy->huf, &entropyMetadata->hufMetadata,
+                                                         workspace, wkspSize, writeLitEntropy);
+    size_t const seqSize = ZSTD_estimateBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
+                                                         nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,
+                                                         workspace, wkspSize, writeSeqEntropy);
+    return seqSize + literalsSize + ZSTD_blockHeaderSize;
+}
+
+/* Builds entropy statistics and uses them for blocksize estimation.
+ *
+ * Returns the estimated compressed size of the seqStore, or a zstd error.
+ */
+static size_t ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore_t* seqStore, const ZSTD_CCtx* zc) {
+    ZSTD_entropyCTablesMetadata_t entropyMetadata;
+    FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(seqStore,
+                    &zc->blockState.prevCBlock->entropy,
+                    &zc->blockState.nextCBlock->entropy,
+                    &zc->appliedParams,
+                    &entropyMetadata,
+                    zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "");
+    return ZSTD_estimateBlockSize(seqStore->litStart, (size_t)(seqStore->lit - seqStore->litStart),
+                    seqStore->ofCode, seqStore->llCode, seqStore->mlCode,
+                    (size_t)(seqStore->sequences - seqStore->sequencesStart),
+                    &zc->blockState.nextCBlock->entropy, &entropyMetadata, zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE,
+                    (int)(entropyMetadata.hufMetadata.hType == set_compressed), 1);
+}
+
+/* Returns literals bytes represented in a seqStore */
+static size_t ZSTD_countSeqStoreLiteralsBytes(const seqStore_t* const seqStore) {
+    size_t literalsBytes = 0;
+    size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart;
+    size_t i;
+    for (i = 0; i < nbSeqs; ++i) {
+        seqDef seq = seqStore->sequencesStart[i];
+        literalsBytes += seq.litLength;
+        if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_literalLength) {
+            literalsBytes += 0x10000;
+        }
+    }
+    return literalsBytes;
+}
+
+/* Returns match bytes represented in a seqStore */
+static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* const seqStore) {
+    size_t matchBytes = 0;
+    size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart;
+    size_t i;
+    for (i = 0; i < nbSeqs; ++i) {
+        seqDef seq = seqStore->sequencesStart[i];
+        matchBytes += seq.matchLength + MINMATCH;
+        if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_matchLength) {
+            matchBytes += 0x10000;
+        }
+    }
+    return matchBytes;
+}
+
+/* Derives the seqStore that is a chunk of the originalSeqStore from [startIdx, endIdx).
+ * Stores the result in resultSeqStore.
+ */
+static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore,
+                               const seqStore_t* originalSeqStore,
+                                     size_t startIdx, size_t endIdx) {
+    BYTE* const litEnd = originalSeqStore->lit;
+    size_t literalsBytes;
+    size_t literalsBytesPreceding = 0;
+
+    *resultSeqStore = *originalSeqStore;
+    if (startIdx > 0) {
+        resultSeqStore->sequences = originalSeqStore->sequencesStart + startIdx;
+        literalsBytesPreceding = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore);
+    }
+
+    /* Move longLengthPos into the correct position if necessary */
+    if (originalSeqStore->longLengthType != ZSTD_llt_none) {
+        if (originalSeqStore->longLengthPos < startIdx || originalSeqStore->longLengthPos > endIdx) {
+            resultSeqStore->longLengthType = ZSTD_llt_none;
+        } else {
+            resultSeqStore->longLengthPos -= (U32)startIdx;
+        }
+    }
+    resultSeqStore->sequencesStart = originalSeqStore->sequencesStart + startIdx;
+    resultSeqStore->sequences = originalSeqStore->sequencesStart + endIdx;
+    literalsBytes = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore);
+    resultSeqStore->litStart += literalsBytesPreceding;
+    if (endIdx == (size_t)(originalSeqStore->sequences - originalSeqStore->sequencesStart)) {
+        /* This accounts for possible last literals if the derived chunk reaches the end of the block */
+        resultSeqStore->lit = litEnd;
+    } else {
+        resultSeqStore->lit = resultSeqStore->litStart+literalsBytes;
+    }
+    resultSeqStore->llCode += startIdx;
+    resultSeqStore->mlCode += startIdx;
+    resultSeqStore->ofCode += startIdx;
+}
+
+/**
+ * Returns the raw offset represented by the combination of offCode, ll0, and repcode history.
+ * offCode must be an offCode representing a repcode, therefore in the range of [0, 2].
+ */
+static U32 ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offCode, const U32 ll0) {
+    U32 const adjustedOffCode = offCode + ll0;
+    assert(offCode < ZSTD_REP_NUM);
+    if (adjustedOffCode == ZSTD_REP_NUM) {
+        /* litlength == 0 and offCode == 2 implies selection of first repcode - 1 */
+        assert(rep[0] > 0);
+        return rep[0] - 1;
+    }
+    return rep[adjustedOffCode];
+}
+
+/**
+ * ZSTD_seqStore_resolveOffCodes() reconciles any possible divergences in offset history that may arise
+ * due to emission of RLE/raw blocks that disturb the offset history, and replaces any repcodes within
+ * the seqStore that may be invalid.
+ *
+ * dRepcodes are updated as would be on the decompression side. cRepcodes are updated exactly in
+ * accordance with the seqStore.
+ */
+static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_t* const cRepcodes,
+                                          seqStore_t* const seqStore, U32 const nbSeq) {
+    U32 idx = 0;
+    for (; idx < nbSeq; ++idx) {
+        seqDef* const seq = seqStore->sequencesStart + idx;
+        U32 const ll0 = (seq->litLength == 0);
+        U32 offCode = seq->offset - 1;
+        assert(seq->offset > 0);
+        if (offCode <= ZSTD_REP_MOVE) {
+            U32 const dRawOffset = ZSTD_resolveRepcodeToRawOffset(dRepcodes->rep, offCode, ll0);
+            U32 const cRawOffset = ZSTD_resolveRepcodeToRawOffset(cRepcodes->rep, offCode, ll0);
+            /* Adjust simulated decompression repcode history if we come across a mismatch. Replace
+             * the repcode with the offset it actually references, determined by the compression
+             * repcode history.
+             */
+            if (dRawOffset != cRawOffset) {
+                seq->offset = cRawOffset + ZSTD_REP_NUM;
+            }
+        }
+        /* Compression repcode history is always updated with values directly from the unmodified seqStore.
+         * Decompression repcode history may use modified seq->offset value taken from compression repcode history.
+         */
+        *dRepcodes = ZSTD_updateRep(dRepcodes->rep, seq->offset - 1, ll0);
+        *cRepcodes = ZSTD_updateRep(cRepcodes->rep, offCode, ll0);
+    }
+}
+
+/* ZSTD_compressSeqStore_singleBlock():
+ * Compresses a seqStore into a block with a block header, into the buffer dst.
+ *
+ * Returns the total size of that block (including header) or a ZSTD error code.
+ */
+static size_t ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, seqStore_t* const seqStore,
+                                                repcodes_t* const dRep, repcodes_t* const cRep,
+                                                void* dst, size_t dstCapacity,
+                                                const void* src, size_t srcSize,
+                                                U32 lastBlock, U32 isPartition) {
+    const U32 rleMaxLength = 25;
+    BYTE* op = (BYTE*)dst;
+    const BYTE* ip = (const BYTE*)src;
+    size_t cSize;
+    size_t cSeqsSize;
+
+    /* In case of an RLE or raw block, the simulated decompression repcode history must be reset */
+    repcodes_t const dRepOriginal = *dRep;
+    if (isPartition)
+        ZSTD_seqStore_resolveOffCodes(dRep, cRep, seqStore, (U32)(seqStore->sequences - seqStore->sequencesStart));
+
+    cSeqsSize = ZSTD_entropyCompressSeqStore(seqStore,
+                &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
+                &zc->appliedParams,
+                op + ZSTD_blockHeaderSize, dstCapacity - ZSTD_blockHeaderSize,
+                srcSize,
+                zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
+                zc->bmi2);
+    FORWARD_IF_ERROR(cSeqsSize, "ZSTD_entropyCompressSeqStore failed!");
+
+    if (!zc->isFirstBlock &&
+        cSeqsSize < rleMaxLength &&
+        ZSTD_isRLE((BYTE const*)src, srcSize)) {
+        /* We don't want to emit our first block as a RLE even if it qualifies because
+        * doing so will cause the decoder (cli only) to throw a "should consume all input error."
+        * This is only an issue for zstd <= v1.4.3
+        */
+        cSeqsSize = 1;
+    }
+
+    if (zc->seqCollector.collectSequences) {
+        ZSTD_copyBlockSequences(zc);
+        ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
+        return 0;
+    }
+
+    if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
+        zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
+
+    if (cSeqsSize == 0) {
+        cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock);
+        FORWARD_IF_ERROR(cSize, "Nocompress block failed");
+        DEBUGLOG(4, "Writing out nocompress block, size: %zu", cSize);
+        *dRep = dRepOriginal; /* reset simulated decompression repcode history */
+    } else if (cSeqsSize == 1) {
+        cSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, srcSize, lastBlock);
+        FORWARD_IF_ERROR(cSize, "RLE compress block failed");
+        DEBUGLOG(4, "Writing out RLE block, size: %zu", cSize);
+        *dRep = dRepOriginal; /* reset simulated decompression repcode history */
+    } else {
+        ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
+        writeBlockHeader(op, cSeqsSize, srcSize, lastBlock);
+        cSize = ZSTD_blockHeaderSize + cSeqsSize;
+        DEBUGLOG(4, "Writing out compressed block, size: %zu", cSize);
+    }
+    return cSize;
+}
+
+/* Struct to keep track of where we are in our recursive calls. */
+typedef struct {
+    U32* splitLocations;    /* Array of split indices */
+    size_t idx;             /* The current index within splitLocations being worked on */
+} seqStoreSplits;
+
+#define MIN_SEQUENCES_BLOCK_SPLITTING 300
+#define MAX_NB_SPLITS 196
+
+/* Helper function to perform the recursive search for block splits.
+ * Estimates the cost of seqStore prior to split, and estimates the cost of splitting the sequences in half.
+ * If advantageous to split, then we recurse down the two sub-blocks. If not, or if an error occurred in estimation, then
+ * we do not recurse.
+ *
+ * Note: The recursion depth is capped by a heuristic minimum number of sequences, defined by MIN_SEQUENCES_BLOCK_SPLITTING.
+ * In theory, this means the absolute largest recursion depth is 10 == log2(maxNbSeqInBlock/MIN_SEQUENCES_BLOCK_SPLITTING).
+ * In practice, recursion depth usually doesn't go beyond 4.
+ *
+ * Furthermore, the number of splits is capped by MAX_NB_SPLITS. At MAX_NB_SPLITS == 196 with the current existing blockSize
+ * maximum of 128 KB, this value is actually impossible to reach.
+ */
+static void ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t endIdx,
+                                         const ZSTD_CCtx* zc, const seqStore_t* origSeqStore) {
+    seqStore_t fullSeqStoreChunk;
+    seqStore_t firstHalfSeqStore;
+    seqStore_t secondHalfSeqStore;
+    size_t estimatedOriginalSize;
+    size_t estimatedFirstHalfSize;
+    size_t estimatedSecondHalfSize;
+    size_t midIdx = (startIdx + endIdx)/2;
+
+    if (endIdx - startIdx < MIN_SEQUENCES_BLOCK_SPLITTING || splits->idx >= MAX_NB_SPLITS) {
+        return;
+    }
+    ZSTD_deriveSeqStoreChunk(&fullSeqStoreChunk, origSeqStore, startIdx, endIdx);
+    ZSTD_deriveSeqStoreChunk(&firstHalfSeqStore, origSeqStore, startIdx, midIdx);
+    ZSTD_deriveSeqStoreChunk(&secondHalfSeqStore, origSeqStore, midIdx, endIdx);
+    estimatedOriginalSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&fullSeqStoreChunk, zc);
+    estimatedFirstHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&firstHalfSeqStore, zc);
+    estimatedSecondHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&secondHalfSeqStore, zc);
+    DEBUGLOG(5, "Estimated original block size: %zu -- First half split: %zu -- Second half split: %zu",
+             estimatedOriginalSize, estimatedFirstHalfSize, estimatedSecondHalfSize);
+    if (ZSTD_isError(estimatedOriginalSize) || ZSTD_isError(estimatedFirstHalfSize) || ZSTD_isError(estimatedSecondHalfSize)) {
+        return;
+    }
+    if (estimatedFirstHalfSize + estimatedSecondHalfSize < estimatedOriginalSize) {
+        ZSTD_deriveBlockSplitsHelper(splits, startIdx, midIdx, zc, origSeqStore);
+        splits->splitLocations[splits->idx] = (U32)midIdx;
+        splits->idx++;
+        ZSTD_deriveBlockSplitsHelper(splits, midIdx, endIdx, zc, origSeqStore);
+    }
+}
+
+/* Base recursive function. Populates a table with intra-block partition indices that can improve compression ratio.
+ *
+ * Returns the number of splits made (which equals the size of the partition table - 1).
+ */
+static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq) {
+    seqStoreSplits splits = {partitions, 0};
+    if (nbSeq <= 4) {
+        DEBUGLOG(4, "ZSTD_deriveBlockSplits: Too few sequences to split");
+        /* Refuse to try and split anything with less than 4 sequences */
+        return 0;
+    }
+    ZSTD_deriveBlockSplitsHelper(&splits, 0, nbSeq, zc, &zc->seqStore);
+    splits.splitLocations[splits.idx] = nbSeq;
+    DEBUGLOG(5, "ZSTD_deriveBlockSplits: final nb partitions: %zu", splits.idx+1);
+    return splits.idx;
+}
+
+/* ZSTD_compressBlock_splitBlock():
+ * Attempts to split a given block into multiple blocks to improve compression ratio.
+ *
+ * Returns combined size of all blocks (which includes headers), or a ZSTD error code.
+ */
+static size_t ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity,
+                                                     const void* src, size_t blockSize, U32 lastBlock, U32 nbSeq) {
+    size_t cSize = 0;
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    U32 partitions[MAX_NB_SPLITS];
+    size_t i = 0;
+    size_t srcBytesTotal = 0;
+    size_t numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq);
+    seqStore_t nextSeqStore;
+    seqStore_t currSeqStore;
+
+    /* If a block is split and some partitions are emitted as RLE/uncompressed, then repcode history
+     * may become invalid. In order to reconcile potentially invalid repcodes, we keep track of two
+     * separate repcode histories that simulate repcode history on compression and decompression side,
+     * and use the histories to determine whether we must replace a particular repcode with its raw offset.
+     *
+     * 1) cRep gets updated for each partition, regardless of whether the block was emitted as uncompressed
+     *    or RLE. This allows us to retrieve the offset value that an invalid repcode references within
+     *    a nocompress/RLE block.
+     * 2) dRep gets updated only for compressed partitions, and when a repcode gets replaced, will use
+     *    the replacement offset value rather than the original repcode to update the repcode history.
+     *    dRep also will be the final repcode history sent to the next block.
+     *
+     * See ZSTD_seqStore_resolveOffCodes() for more details.
+     */
+    repcodes_t dRep;
+    repcodes_t cRep;
+    ZSTD_memcpy(dRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
+    ZSTD_memcpy(cRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
+
+    DEBUGLOG(4, "ZSTD_compressBlock_splitBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
+                (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,
+                (unsigned)zc->blockState.matchState.nextToUpdate);
+
+    if (numSplits == 0) {
+        size_t cSizeSingleBlock = ZSTD_compressSeqStore_singleBlock(zc, &zc->seqStore,
+                                                                   &dRep, &cRep,
+                                                                    op, dstCapacity,
+                                                                    ip, blockSize,
+                                                                    lastBlock, 0 /* isPartition */);
+        FORWARD_IF_ERROR(cSizeSingleBlock, "Compressing single block from splitBlock_internal() failed!");
+        DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal: No splits");
+        assert(cSizeSingleBlock <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize);
+        return cSizeSingleBlock;
+    }
+
+    ZSTD_deriveSeqStoreChunk(&currSeqStore, &zc->seqStore, 0, partitions[0]);
+    for (i = 0; i <= numSplits; ++i) {
+        size_t srcBytes;
+        size_t cSizeChunk;
+        U32 const lastPartition = (i == numSplits);
+        U32 lastBlockEntireSrc = 0;
+
+        srcBytes = ZSTD_countSeqStoreLiteralsBytes(&currSeqStore) + ZSTD_countSeqStoreMatchBytes(&currSeqStore);
+        srcBytesTotal += srcBytes;
+        if (lastPartition) {
+            /* This is the final partition, need to account for possible last literals */
+            srcBytes += blockSize - srcBytesTotal;
+            lastBlockEntireSrc = lastBlock;
+        } else {
+            ZSTD_deriveSeqStoreChunk(&nextSeqStore, &zc->seqStore, partitions[i], partitions[i+1]);
+        }
+
+        cSizeChunk = ZSTD_compressSeqStore_singleBlock(zc, &currSeqStore,
+                                                      &dRep, &cRep,
+                                                       op, dstCapacity,
+                                                       ip, srcBytes,
+                                                       lastBlockEntireSrc, 1 /* isPartition */);
+        DEBUGLOG(5, "Estimated size: %zu actual size: %zu", ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&currSeqStore, zc), cSizeChunk);
+        FORWARD_IF_ERROR(cSizeChunk, "Compressing chunk failed!");
+
+        ip += srcBytes;
+        op += cSizeChunk;
+        dstCapacity -= cSizeChunk;
+        cSize += cSizeChunk;
+        currSeqStore = nextSeqStore;
+        assert(cSizeChunk <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize);
+    }
+    /* cRep and dRep may have diverged during the compression. If so, we use the dRep repcodes
+     * for the next block.
+     */
+    ZSTD_memcpy(zc->blockState.prevCBlock->rep, dRep.rep, sizeof(repcodes_t));
+    return cSize;
+}
+
+static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc,
+                                        void* dst, size_t dstCapacity,
+                                        const void* src, size_t srcSize, U32 lastBlock) {
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    U32 nbSeq;
+    size_t cSize;
+    DEBUGLOG(4, "ZSTD_compressBlock_splitBlock");
+
+    {   const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
+        FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");
+        if (bss == ZSTDbss_noCompress) {
+            if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
+                zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
+            cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock);
+            FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
+            DEBUGLOG(4, "ZSTD_compressBlock_splitBlock: Nocompress block");
+            return cSize;
+        }
+        nbSeq = (U32)(zc->seqStore.sequences - zc->seqStore.sequencesStart);
+    }
+
+    assert(zc->appliedParams.splitBlocks == 1);
+    cSize = ZSTD_compressBlock_splitBlock_internal(zc, dst, dstCapacity, src, srcSize, lastBlock, nbSeq);
+    FORWARD_IF_ERROR(cSize, "Splitting blocks failed!");
+    return cSize;
+}
+
+static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
+                                        void* dst, size_t dstCapacity,
+                                        const void* src, size_t srcSize, U32 frame)
+{
+    /* This the upper bound for the length of an rle block.
+     * This isn't the actual upper bound. Finding the real threshold
+     * needs further investigation.
+     */
+    const U32 rleMaxLength = 25;
+    size_t cSize;
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
+                (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,
+                (unsigned)zc->blockState.matchState.nextToUpdate);
+
+    {   const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
+        FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");
+        if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; }
+    }
+
+    if (zc->seqCollector.collectSequences) {
+        ZSTD_copyBlockSequences(zc);
+        ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
+        return 0;
+    }
+
+    /* encode sequences and literals */
+    cSize = ZSTD_entropyCompressSeqStore(&zc->seqStore,
+            &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
+            &zc->appliedParams,
+            dst, dstCapacity,
+            srcSize,
+            zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
+            zc->bmi2);
+
+    if (zc->seqCollector.collectSequences) {
+        ZSTD_copyBlockSequences(zc);
+        return 0;
+    }
+
+
+    if (frame &&
+        /* We don't want to emit our first block as a RLE even if it qualifies because
+         * doing so will cause the decoder (cli only) to throw a "should consume all input error."
+         * This is only an issue for zstd <= v1.4.3
+         */
+        !zc->isFirstBlock &&
+        cSize < rleMaxLength &&
+        ZSTD_isRLE(ip, srcSize))
+    {
+        cSize = 1;
+        op[0] = ip[0];
+    }
+
+out:
+    if (!ZSTD_isError(cSize) && cSize > 1) {
+        ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
+    }
+    /* We check that dictionaries have offset codes available for the first
+     * block. After the first block, the offcode table might not have large
+     * enough codes to represent the offsets in the data.
+     */
+    if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
+        zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
+
+    return cSize;
+}
+
+static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc,
+                               void* dst, size_t dstCapacity,
+                               const void* src, size_t srcSize,
+                               const size_t bss, U32 lastBlock)
+{
+    DEBUGLOG(6, "Attempting ZSTD_compressSuperBlock()");
+    if (bss == ZSTDbss_compress) {
+        if (/* We don't want to emit our first block as a RLE even if it qualifies because
+            * doing so will cause the decoder (cli only) to throw a "should consume all input error."
+            * This is only an issue for zstd <= v1.4.3
+            */
+            !zc->isFirstBlock &&
+            ZSTD_maybeRLE(&zc->seqStore) &&
+            ZSTD_isRLE((BYTE const*)src, srcSize))
+        {
+            return ZSTD_rleCompressBlock(dst, dstCapacity, *(BYTE const*)src, srcSize, lastBlock);
+        }
+        /* Attempt superblock compression.
+         *
+         * Note that compressed size of ZSTD_compressSuperBlock() is not bound by the
+         * standard ZSTD_compressBound(). This is a problem, because even if we have
+         * space now, taking an extra byte now could cause us to run out of space later
+         * and violate ZSTD_compressBound().
+         *
+         * Define blockBound(blockSize) = blockSize + ZSTD_blockHeaderSize.
+         *
+         * In order to respect ZSTD_compressBound() we must attempt to emit a raw
+         * uncompressed block in these cases:
+         *   * cSize == 0: Return code for an uncompressed block.
+         *   * cSize == dstSize_tooSmall: We may have expanded beyond blockBound(srcSize).
+         *     ZSTD_noCompressBlock() will return dstSize_tooSmall if we are really out of
+         *     output space.
+         *   * cSize >= blockBound(srcSize): We have expanded the block too much so
+         *     emit an uncompressed block.
+         */
+        {
+            size_t const cSize = ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock);
+            if (cSize != ERROR(dstSize_tooSmall)) {
+                size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy);
+                FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed");
+                if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) {
+                    ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
+                    return cSize;
+                }
+            }
+        }
+    }
+
+    DEBUGLOG(6, "Resorting to ZSTD_noCompressBlock()");
+    /* Superblock compression failed, attempt to emit a single no compress block.
+     * The decoder will be able to stream this block since it is uncompressed.
+     */
+    return ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock);
+}
+
+static size_t ZSTD_compressBlock_targetCBlockSize(ZSTD_CCtx* zc,
+                               void* dst, size_t dstCapacity,
+                               const void* src, size_t srcSize,
+                               U32 lastBlock)
+{
+    size_t cSize = 0;
+    const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
+    DEBUGLOG(5, "ZSTD_compressBlock_targetCBlockSize (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u, srcSize=%zu)",
+                (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate, srcSize);
+    FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");
+
+    cSize = ZSTD_compressBlock_targetCBlockSize_body(zc, dst, dstCapacity, src, srcSize, bss, lastBlock);
+    FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize_body failed");
+
+    if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
+        zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
+
+    return cSize;
+}
+
+static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms,
+                                         ZSTD_cwksp* ws,
+                                         ZSTD_CCtx_params const* params,
+                                         void const* ip,
+                                         void const* iend)
+{
+    U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy);
+    U32 const maxDist = (U32)1 << params->cParams.windowLog;
+    if (ZSTD_window_needOverflowCorrection(ms->window, cycleLog, maxDist, ms->loadedDictEnd, ip, iend)) {
+        U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip);
+        ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);
+        ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);
+        ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
+        ZSTD_cwksp_mark_tables_dirty(ws);
+        ZSTD_reduceIndex(ms, params, correction);
+        ZSTD_cwksp_mark_tables_clean(ws);
+        if (ms->nextToUpdate < correction) ms->nextToUpdate = 0;
+        else ms->nextToUpdate -= correction;
+        /* invalidate dictionaries on overflow correction */
+        ms->loadedDictEnd = 0;
+        ms->dictMatchState = NULL;
+    }
+}
+
+/*! ZSTD_compress_frameChunk() :
+*   Compress a chunk of data into one or multiple blocks.
+*   All blocks will be terminated, all input will be consumed.
+*   Function will issue an error if there is not enough `dstCapacity` to hold the compressed content.
+*   Frame is supposed already started (header already produced)
+*   @return : compressed size, or an error code
+*/
+static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx,
+                                     void* dst, size_t dstCapacity,
+                               const void* src, size_t srcSize,
+                                     U32 lastFrameChunk)
+{
+    size_t blockSize = cctx->blockSize;
+    size_t remaining = srcSize;
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* op = ostart;
+    U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog;
+
+    assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX);
+
+    DEBUGLOG(4, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize);
+    if (cctx->appliedParams.fParams.checksumFlag && srcSize)
+        XXH64_update(&cctx->xxhState, src, srcSize);
+
+    while (remaining) {
+        ZSTD_matchState_t* const ms = &cctx->blockState.matchState;
+        U32 const lastBlock = lastFrameChunk & (blockSize >= remaining);
+
+        RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE,
+                        dstSize_tooSmall,
+                        "not enough space to store compressed block");
+        if (remaining < blockSize) blockSize = remaining;
+
+        ZSTD_overflowCorrectIfNeeded(
+            ms, &cctx->workspace, &cctx->appliedParams, ip, ip + blockSize);
+        ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
+
+        /* Ensure hash/chain table insertion resumes no sooner than lowlimit */
+        if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit;
+
+        {   size_t cSize;
+            if (ZSTD_useTargetCBlockSize(&cctx->appliedParams)) {
+                cSize = ZSTD_compressBlock_targetCBlockSize(cctx, op, dstCapacity, ip, blockSize, lastBlock);
+                FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize failed");
+                assert(cSize > 0);
+                assert(cSize <= blockSize + ZSTD_blockHeaderSize);
+            } else if (ZSTD_blockSplitterEnabled(&cctx->appliedParams)) {
+                cSize = ZSTD_compressBlock_splitBlock(cctx, op, dstCapacity, ip, blockSize, lastBlock);
+                FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_splitBlock failed");
+                assert(cSize > 0 || cctx->seqCollector.collectSequences == 1);
+            } else {
+                cSize = ZSTD_compressBlock_internal(cctx,
+                                        op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize,
+                                        ip, blockSize, 1 /* frame */);
+                FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_internal failed");
+
+                if (cSize == 0) {  /* block is not compressible */
+                    cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
+                    FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
+                } else {
+                    U32 const cBlockHeader = cSize == 1 ?
+                        lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) :
+                        lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
+                    MEM_writeLE24(op, cBlockHeader);
+                    cSize += ZSTD_blockHeaderSize;
+                }
+            }
+
+
+            ip += blockSize;
+            assert(remaining >= blockSize);
+            remaining -= blockSize;
+            op += cSize;
+            assert(dstCapacity >= cSize);
+            dstCapacity -= cSize;
+            cctx->isFirstBlock = 0;
+            DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u",
+                        (unsigned)cSize);
+    }   }
+
+    if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending;
+    return (size_t)(op-ostart);
+}
+
+
+static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
+                                    const ZSTD_CCtx_params* params, U64 pledgedSrcSize, U32 dictID)
+{   BYTE* const op = (BYTE*)dst;
+    U32   const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536);   /* 0-3 */
+    U32   const dictIDSizeCode = params->fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength;   /* 0-3 */
+    U32   const checksumFlag = params->fParams.checksumFlag>0;
+    U32   const windowSize = (U32)1 << params->cParams.windowLog;
+    U32   const singleSegment = params->fParams.contentSizeFlag && (windowSize >= pledgedSrcSize);
+    BYTE  const windowLogByte = (BYTE)((params->cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3);
+    U32   const fcsCode = params->fParams.contentSizeFlag ?
+                     (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0;  /* 0-3 */
+    BYTE  const frameHeaderDescriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) );
+    size_t pos=0;
+
+    assert(!(params->fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN));
+    RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall,
+                    "dst buf is too small to fit worst-case frame header size.");
+    DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u",
+                !params->fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode);
+    if (params->format == ZSTD_f_zstd1) {
+        MEM_writeLE32(dst, ZSTD_MAGICNUMBER);
+        pos = 4;
+    }
+    op[pos++] = frameHeaderDescriptionByte;
+    if (!singleSegment) op[pos++] = windowLogByte;
+    switch(dictIDSizeCode)
+    {
+        default:  assert(0); /* impossible */
+        case 0 : break;
+        case 1 : op[pos] = (BYTE)(dictID); pos++; break;
+        case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break;
+        case 3 : MEM_writeLE32(op+pos, dictID); pos+=4; break;
+    }
+    switch(fcsCode)
+    {
+        default:  assert(0); /* impossible */
+        case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break;
+        case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break;
+        case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break;
+        case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break;
+    }
+    return pos;
+}
+
+/* ZSTD_writeSkippableFrame_advanced() :
+ * Writes out a skippable frame with the specified magic number variant (16 are supported),
+ * from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15, and the desired source data.
+ *
+ * Returns the total number of bytes written, or a ZSTD error code.
+ */
+size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity,
+                                const void* src, size_t srcSize, unsigned magicVariant) {
+    BYTE* op = (BYTE*)dst;
+    RETURN_ERROR_IF(dstCapacity < srcSize + ZSTD_SKIPPABLEHEADERSIZE /* Skippable frame overhead */,
+                    dstSize_tooSmall, "Not enough room for skippable frame");
+    RETURN_ERROR_IF(srcSize > (unsigned)0xFFFFFFFF, srcSize_wrong, "Src size too large for skippable frame");
+    RETURN_ERROR_IF(magicVariant > 15, parameter_outOfBound, "Skippable frame magic number variant not supported");
+
+    MEM_writeLE32(op, (U32)(ZSTD_MAGIC_SKIPPABLE_START + magicVariant));
+    MEM_writeLE32(op+4, (U32)srcSize);
+    ZSTD_memcpy(op+8, src, srcSize);
+    return srcSize + ZSTD_SKIPPABLEHEADERSIZE;
+}
+
+/* ZSTD_writeLastEmptyBlock() :
+ * output an empty Block with end-of-frame mark to complete a frame
+ * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
+ *           or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)
+ */
+size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity)
+{
+    RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall,
+                    "dst buf is too small to write frame trailer empty block.");
+    {   U32 const cBlockHeader24 = 1 /*lastBlock*/ + (((U32)bt_raw)<<1);  /* 0 size */
+        MEM_writeLE24(dst, cBlockHeader24);
+        return ZSTD_blockHeaderSize;
+    }
+}
+
+size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq)
+{
+    RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong,
+                    "wrong cctx stage");
+    RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm,
+                    parameter_unsupported,
+                    "incompatible with ldm");
+    cctx->externSeqStore.seq = seq;
+    cctx->externSeqStore.size = nbSeq;
+    cctx->externSeqStore.capacity = nbSeq;
+    cctx->externSeqStore.pos = 0;
+    cctx->externSeqStore.posInSequence = 0;
+    return 0;
+}
+
+
+static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
+                              void* dst, size_t dstCapacity,
+                        const void* src, size_t srcSize,
+                               U32 frame, U32 lastFrameChunk)
+{
+    ZSTD_matchState_t* const ms = &cctx->blockState.matchState;
+    size_t fhSize = 0;
+
+    DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u",
+                cctx->stage, (unsigned)srcSize);
+    RETURN_ERROR_IF(cctx->stage==ZSTDcs_created, stage_wrong,
+                    "missing init (ZSTD_compressBegin)");
+
+    if (frame && (cctx->stage==ZSTDcs_init)) {
+        fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams,
+                                       cctx->pledgedSrcSizePlusOne-1, cctx->dictID);
+        FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed");
+        assert(fhSize <= dstCapacity);
+        dstCapacity -= fhSize;
+        dst = (char*)dst + fhSize;
+        cctx->stage = ZSTDcs_ongoing;
+    }
+
+    if (!srcSize) return fhSize;  /* do not generate an empty block if no input */
+
+    if (!ZSTD_window_update(&ms->window, src, srcSize, ms->forceNonContiguous)) {
+        ms->forceNonContiguous = 0;
+        ms->nextToUpdate = ms->window.dictLimit;
+    }
+    if (cctx->appliedParams.ldmParams.enableLdm) {
+        ZSTD_window_update(&cctx->ldmState.window, src, srcSize, /* forceNonContiguous */ 0);
+    }
+
+    if (!frame) {
+        /* overflow check and correction for block mode */
+        ZSTD_overflowCorrectIfNeeded(
+            ms, &cctx->workspace, &cctx->appliedParams,
+            src, (BYTE const*)src + srcSize);
+    }
+
+    DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize);
+    {   size_t const cSize = frame ?
+                             ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) :
+                             ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */);
+        FORWARD_IF_ERROR(cSize, "%s", frame ? "ZSTD_compress_frameChunk failed" : "ZSTD_compressBlock_internal failed");
+        cctx->consumedSrcSize += srcSize;
+        cctx->producedCSize += (cSize + fhSize);
+        assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0));
+        if (cctx->pledgedSrcSizePlusOne != 0) {  /* control src size */
+            ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1);
+            RETURN_ERROR_IF(
+                cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne,
+                srcSize_wrong,
+                "error : pledgedSrcSize = %u, while realSrcSize >= %u",
+                (unsigned)cctx->pledgedSrcSizePlusOne-1,
+                (unsigned)cctx->consumedSrcSize);
+        }
+        return cSize + fhSize;
+    }
+}
+
+size_t ZSTD_compressContinue (ZSTD_CCtx* cctx,
+                              void* dst, size_t dstCapacity,
+                        const void* src, size_t srcSize)
+{
+    DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (unsigned)srcSize);
+    return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */);
+}
+
+
+size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx)
+{
+    ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams;
+    assert(!ZSTD_checkCParams(cParams));
+    return MIN (ZSTD_BLOCKSIZE_MAX, (U32)1 << cParams.windowLog);
+}
+
+size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize);
+    { size_t const blockSizeMax = ZSTD_getBlockSize(cctx);
+      RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong, "input is larger than a block"); }
+
+    return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */);
+}
+
+/*! ZSTD_loadDictionaryContent() :
+ *  @return : 0, or an error code
+ */
+static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
+                                         ldmState_t* ls,
+                                         ZSTD_cwksp* ws,
+                                         ZSTD_CCtx_params const* params,
+                                         const void* src, size_t srcSize,
+                                         ZSTD_dictTableLoadMethod_e dtlm)
+{
+    const BYTE* ip = (const BYTE*) src;
+    const BYTE* const iend = ip + srcSize;
+    int const loadLdmDict = params->ldmParams.enableLdm && ls != NULL;
+
+    /* Assert that we the ms params match the params we're being given */
+    ZSTD_assertEqualCParams(params->cParams, ms->cParams);
+
+    if (srcSize > ZSTD_CHUNKSIZE_MAX) {
+        /* Allow the dictionary to set indices up to exactly ZSTD_CURRENT_MAX.
+         * Dictionaries right at the edge will immediately trigger overflow
+         * correction, but I don't want to insert extra constraints here.
+         */
+        U32 const maxDictSize = ZSTD_CURRENT_MAX - 1;
+        /* We must have cleared our windows when our source is this large. */
+        assert(ZSTD_window_isEmpty(ms->window));
+        if (loadLdmDict)
+            assert(ZSTD_window_isEmpty(ls->window));
+        /* If the dictionary is too large, only load the suffix of the dictionary. */
+        if (srcSize > maxDictSize) {
+            ip = iend - maxDictSize;
+            src = ip;
+            srcSize = maxDictSize;
+        }
+    }
+
+    DEBUGLOG(4, "ZSTD_loadDictionaryContent(): useRowMatchFinder=%d", (int)params->useRowMatchFinder);
+    ZSTD_window_update(&ms->window, src, srcSize, /* forceNonContiguous */ 0);
+    ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base);
+    ms->forceNonContiguous = params->deterministicRefPrefix;
+
+    if (loadLdmDict) {
+        ZSTD_window_update(&ls->window, src, srcSize, /* forceNonContiguous */ 0);
+        ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base);
+    }
+
+    if (srcSize <= HASH_READ_SIZE) return 0;
+
+    ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, iend);
+
+    if (loadLdmDict)
+        ZSTD_ldm_fillHashTable(ls, ip, iend, &params->ldmParams);
+
+    switch(params->cParams.strategy)
+    {
+    case ZSTD_fast:
+        ZSTD_fillHashTable(ms, iend, dtlm);
+        break;
+    case ZSTD_dfast:
+        ZSTD_fillDoubleHashTable(ms, iend, dtlm);
+        break;
+
+    case ZSTD_greedy:
+    case ZSTD_lazy:
+    case ZSTD_lazy2:
+        assert(srcSize >= HASH_READ_SIZE);
+        if (ms->dedicatedDictSearch) {
+            assert(ms->chainTable != NULL);
+            ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, iend-HASH_READ_SIZE);
+        } else {
+            assert(params->useRowMatchFinder != ZSTD_urm_auto);
+            if (params->useRowMatchFinder == ZSTD_urm_enableRowMatchFinder) {
+                size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog) * sizeof(U16);
+                ZSTD_memset(ms->tagTable, 0, tagTableSize);
+                ZSTD_row_update(ms, iend-HASH_READ_SIZE);
+                DEBUGLOG(4, "Using row-based hash table for lazy dict");
+            } else {
+                ZSTD_insertAndFindFirstIndex(ms, iend-HASH_READ_SIZE);
+                DEBUGLOG(4, "Using chain-based hash table for lazy dict");
+            }
+        }
+        break;
+
+    case ZSTD_btlazy2:   /* we want the dictionary table fully sorted */
+    case ZSTD_btopt:
+    case ZSTD_btultra:
+    case ZSTD_btultra2:
+        assert(srcSize >= HASH_READ_SIZE);
+        ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend);
+        break;
+
+    default:
+        assert(0);  /* not possible : not a valid strategy id */
+    }
+
+    ms->nextToUpdate = (U32)(iend - ms->window.base);
+    return 0;
+}
+
+
+/* Dictionaries that assign zero probability to symbols that show up causes problems
+ * when FSE encoding. Mark dictionaries with zero probability symbols as FSE_repeat_check
+ * and only dictionaries with 100% valid symbols can be assumed valid.
+ */
+static FSE_repeat ZSTD_dictNCountRepeat(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue)
+{
+    U32 s;
+    if (dictMaxSymbolValue < maxSymbolValue) {
+        return FSE_repeat_check;
+    }
+    for (s = 0; s <= maxSymbolValue; ++s) {
+        if (normalizedCounter[s] == 0) {
+            return FSE_repeat_check;
+        }
+    }
+    return FSE_repeat_valid;
+}
+
+size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
+                         const void* const dict, size_t dictSize)
+{
+    short offcodeNCount[MaxOff+1];
+    unsigned offcodeMaxValue = MaxOff;
+    const BYTE* dictPtr = (const BYTE*)dict;    /* skip magic num and dict ID */
+    const BYTE* const dictEnd = dictPtr + dictSize;
+    dictPtr += 8;
+    bs->entropy.huf.repeatMode = HUF_repeat_check;
+
+    {   unsigned maxSymbolValue = 255;
+        unsigned hasZeroWeights = 1;
+        size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr,
+            dictEnd-dictPtr, &hasZeroWeights);
+
+        /* We only set the loaded table as valid if it contains all non-zero
+         * weights. Otherwise, we set it to check */
+        if (!hasZeroWeights)
+            bs->entropy.huf.repeatMode = HUF_repeat_valid;
+
+        RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted, "");
+        RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted, "");
+        dictPtr += hufHeaderSize;
+    }
+
+    {   unsigned offcodeLog;
+        size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
+        RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, "");
+        RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, "");
+        /* fill all offset symbols to avoid garbage at end of table */
+        RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
+                bs->entropy.fse.offcodeCTable,
+                offcodeNCount, MaxOff, offcodeLog,
+                workspace, HUF_WORKSPACE_SIZE)),
+            dictionary_corrupted, "");
+        /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
+        dictPtr += offcodeHeaderSize;
+    }
+
+    {   short matchlengthNCount[MaxML+1];
+        unsigned matchlengthMaxValue = MaxML, matchlengthLog;
+        size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
+        RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, "");
+        RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, "");
+        RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
+                bs->entropy.fse.matchlengthCTable,
+                matchlengthNCount, matchlengthMaxValue, matchlengthLog,
+                workspace, HUF_WORKSPACE_SIZE)),
+            dictionary_corrupted, "");
+        bs->entropy.fse.matchlength_repeatMode = ZSTD_dictNCountRepeat(matchlengthNCount, matchlengthMaxValue, MaxML);
+        dictPtr += matchlengthHeaderSize;
+    }
+
+    {   short litlengthNCount[MaxLL+1];
+        unsigned litlengthMaxValue = MaxLL, litlengthLog;
+        size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
+        RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, "");
+        RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, "");
+        RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
+                bs->entropy.fse.litlengthCTable,
+                litlengthNCount, litlengthMaxValue, litlengthLog,
+                workspace, HUF_WORKSPACE_SIZE)),
+            dictionary_corrupted, "");
+        bs->entropy.fse.litlength_repeatMode = ZSTD_dictNCountRepeat(litlengthNCount, litlengthMaxValue, MaxLL);
+        dictPtr += litlengthHeaderSize;
+    }
+
+    RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, "");
+    bs->rep[0] = MEM_readLE32(dictPtr+0);
+    bs->rep[1] = MEM_readLE32(dictPtr+4);
+    bs->rep[2] = MEM_readLE32(dictPtr+8);
+    dictPtr += 12;
+
+    {   size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
+        U32 offcodeMax = MaxOff;
+        if (dictContentSize <= ((U32)-1) - 128 KB) {
+            U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */
+            offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */
+        }
+        /* All offset values <= dictContentSize + 128 KB must be representable for a valid table */
+        bs->entropy.fse.offcode_repeatMode = ZSTD_dictNCountRepeat(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff));
+
+        /* All repCodes must be <= dictContentSize and != 0 */
+        {   U32 u;
+            for (u=0; u<3; u++) {
+                RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted, "");
+                RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted, "");
+    }   }   }
+
+    return dictPtr - (const BYTE*)dict;
+}
+
+/* Dictionary format :
+ * See :
+ * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#dictionary-format
+ */
+/*! ZSTD_loadZstdDictionary() :
+ * @return : dictID, or an error code
+ *  assumptions : magic number supposed already checked
+ *                dictSize supposed >= 8
+ */
+static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
+                                      ZSTD_matchState_t* ms,
+                                      ZSTD_cwksp* ws,
+                                      ZSTD_CCtx_params const* params,
+                                      const void* dict, size_t dictSize,
+                                      ZSTD_dictTableLoadMethod_e dtlm,
+                                      void* workspace)
+{
+    const BYTE* dictPtr = (const BYTE*)dict;
+    const BYTE* const dictEnd = dictPtr + dictSize;
+    size_t dictID;
+    size_t eSize;
+    ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
+    assert(dictSize >= 8);
+    assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY);
+
+    dictID = params->fParams.noDictIDFlag ? 0 :  MEM_readLE32(dictPtr + 4 /* skip magic number */ );
+    eSize = ZSTD_loadCEntropy(bs, workspace, dict, dictSize);
+    FORWARD_IF_ERROR(eSize, "ZSTD_loadCEntropy failed");
+    dictPtr += eSize;
+
+    {
+        size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
+        FORWARD_IF_ERROR(ZSTD_loadDictionaryContent(
+            ms, NULL, ws, params, dictPtr, dictContentSize, dtlm), "");
+    }
+    return dictID;
+}
+
+/** ZSTD_compress_insertDictionary() :
+*   @return : dictID, or an error code */
+static size_t
+ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,
+                               ZSTD_matchState_t* ms,
+                               ldmState_t* ls,
+                               ZSTD_cwksp* ws,
+                         const ZSTD_CCtx_params* params,
+                         const void* dict, size_t dictSize,
+                               ZSTD_dictContentType_e dictContentType,
+                               ZSTD_dictTableLoadMethod_e dtlm,
+                               void* workspace)
+{
+    DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize);
+    if ((dict==NULL) || (dictSize<8)) {
+        RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, "");
+        return 0;
+    }
+
+    ZSTD_reset_compressedBlockState(bs);
+
+    /* dict restricted modes */
+    if (dictContentType == ZSTD_dct_rawContent)
+        return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm);
+
+    if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) {
+        if (dictContentType == ZSTD_dct_auto) {
+            DEBUGLOG(4, "raw content dictionary detected");
+            return ZSTD_loadDictionaryContent(
+                ms, ls, ws, params, dict, dictSize, dtlm);
+        }
+        RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, "");
+        assert(0);   /* impossible */
+    }
+
+    /* dict as full zstd dictionary */
+    return ZSTD_loadZstdDictionary(
+        bs, ms, ws, params, dict, dictSize, dtlm, workspace);
+}
+
+#define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB)
+#define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6ULL)
+
+/*! ZSTD_compressBegin_internal() :
+ * @return : 0, or an error code */
+static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
+                                    const void* dict, size_t dictSize,
+                                    ZSTD_dictContentType_e dictContentType,
+                                    ZSTD_dictTableLoadMethod_e dtlm,
+                                    const ZSTD_CDict* cdict,
+                                    const ZSTD_CCtx_params* params, U64 pledgedSrcSize,
+                                    ZSTD_buffered_policy_e zbuff)
+{
+    size_t const dictContentSize = cdict ? cdict->dictContentSize : dictSize;
+#if ZSTD_TRACE
+    cctx->traceCtx = (ZSTD_trace_compress_begin != NULL) ? ZSTD_trace_compress_begin(cctx) : 0;
+#endif
+    DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog);
+    /* params are supposed to be fully validated at this point */
+    assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));
+    assert(!((dict) && (cdict)));  /* either dict or cdict, not both */
+    if ( (cdict)
+      && (cdict->dictContentSize > 0)
+      && ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF
+        || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER
+        || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
+        || cdict->compressionLevel == 0)
+      && (params->attachDictPref != ZSTD_dictForceLoad) ) {
+        return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff);
+    }
+
+    FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
+                                     dictContentSize,
+                                     ZSTDcrp_makeClean, zbuff) , "");
+    {   size_t const dictID = cdict ?
+                ZSTD_compress_insertDictionary(
+                        cctx->blockState.prevCBlock, &cctx->blockState.matchState,
+                        &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent,
+                        cdict->dictContentSize, cdict->dictContentType, dtlm,
+                        cctx->entropyWorkspace)
+              : ZSTD_compress_insertDictionary(
+                        cctx->blockState.prevCBlock, &cctx->blockState.matchState,
+                        &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, dict, dictSize,
+                        dictContentType, dtlm, cctx->entropyWorkspace);
+        FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed");
+        assert(dictID <= UINT_MAX);
+        cctx->dictID = (U32)dictID;
+        cctx->dictContentSize = dictContentSize;
+    }
+    return 0;
+}
+
+size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
+                                    const void* dict, size_t dictSize,
+                                    ZSTD_dictContentType_e dictContentType,
+                                    ZSTD_dictTableLoadMethod_e dtlm,
+                                    const ZSTD_CDict* cdict,
+                                    const ZSTD_CCtx_params* params,
+                                    unsigned long long pledgedSrcSize)
+{
+    DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params->cParams.windowLog);
+    /* compression parameters verification and optimization */
+    FORWARD_IF_ERROR( ZSTD_checkCParams(params->cParams) , "");
+    return ZSTD_compressBegin_internal(cctx,
+                                       dict, dictSize, dictContentType, dtlm,
+                                       cdict,
+                                       params, pledgedSrcSize,
+                                       ZSTDb_not_buffered);
+}
+
+/*! ZSTD_compressBegin_advanced() :
+*   @return : 0, or an error code */
+size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,
+                             const void* dict, size_t dictSize,
+                                   ZSTD_parameters params, unsigned long long pledgedSrcSize)
+{
+    ZSTD_CCtx_params cctxParams;
+    ZSTD_CCtxParams_init_internal(&cctxParams, &params, ZSTD_NO_CLEVEL);
+    return ZSTD_compressBegin_advanced_internal(cctx,
+                                            dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast,
+                                            NULL /*cdict*/,
+                                            &cctxParams, pledgedSrcSize);
+}
+
+size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
+{
+    ZSTD_CCtx_params cctxParams;
+    {
+        ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict);
+        ZSTD_CCtxParams_init_internal(&cctxParams, &params, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel);
+    }
+    DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize);
+    return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
+                                       &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered);
+}
+
+size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel)
+{
+    return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel);
+}
+
+
+/*! ZSTD_writeEpilogue() :
+*   Ends a frame.
+*   @return : nb of bytes written into dst (or an error code) */
+static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
+{
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* op = ostart;
+    size_t fhSize = 0;
+
+    DEBUGLOG(4, "ZSTD_writeEpilogue");
+    RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing");
+
+    /* special case : empty frame */
+    if (cctx->stage == ZSTDcs_init) {
+        fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0);
+        FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed");
+        dstCapacity -= fhSize;
+        op += fhSize;
+        cctx->stage = ZSTDcs_ongoing;
+    }
+
+    if (cctx->stage != ZSTDcs_ending) {
+        /* write one last empty block, make it the "last" block */
+        U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0;
+        RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for epilogue");
+        MEM_writeLE32(op, cBlockHeader24);
+        op += ZSTD_blockHeaderSize;
+        dstCapacity -= ZSTD_blockHeaderSize;
+    }
+
+    if (cctx->appliedParams.fParams.checksumFlag) {
+        U32 const checksum = (U32) XXH64_digest(&cctx->xxhState);
+        RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum");
+        DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", (unsigned)checksum);
+        MEM_writeLE32(op, checksum);
+        op += 4;
+    }
+
+    cctx->stage = ZSTDcs_created;  /* return to "created but no init" status */
+    return op-ostart;
+}
+
+void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize)
+{
+#if ZSTD_TRACE
+    if (cctx->traceCtx && ZSTD_trace_compress_end != NULL) {
+        int const streaming = cctx->inBuffSize > 0 || cctx->outBuffSize > 0 || cctx->appliedParams.nbWorkers > 0;
+        ZSTD_Trace trace;
+        ZSTD_memset(&trace, 0, sizeof(trace));
+        trace.version = ZSTD_VERSION_NUMBER;
+        trace.streaming = streaming;
+        trace.dictionaryID = cctx->dictID;
+        trace.dictionarySize = cctx->dictContentSize;
+        trace.uncompressedSize = cctx->consumedSrcSize;
+        trace.compressedSize = cctx->producedCSize + extraCSize;
+        trace.params = &cctx->appliedParams;
+        trace.cctx = cctx;
+        ZSTD_trace_compress_end(cctx->traceCtx, &trace);
+    }
+    cctx->traceCtx = 0;
+#else
+    (void)cctx;
+    (void)extraCSize;
+#endif
+}
+
+size_t ZSTD_compressEnd (ZSTD_CCtx* cctx,
+                         void* dst, size_t dstCapacity,
+                   const void* src, size_t srcSize)
+{
+    size_t endResult;
+    size_t const cSize = ZSTD_compressContinue_internal(cctx,
+                                dst, dstCapacity, src, srcSize,
+                                1 /* frame mode */, 1 /* last chunk */);
+    FORWARD_IF_ERROR(cSize, "ZSTD_compressContinue_internal failed");
+    endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize);
+    FORWARD_IF_ERROR(endResult, "ZSTD_writeEpilogue failed");
+    assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0));
+    if (cctx->pledgedSrcSizePlusOne != 0) {  /* control src size */
+        ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1);
+        DEBUGLOG(4, "end of frame : controlling src size");
+        RETURN_ERROR_IF(
+            cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1,
+            srcSize_wrong,
+             "error : pledgedSrcSize = %u, while realSrcSize = %u",
+            (unsigned)cctx->pledgedSrcSizePlusOne-1,
+            (unsigned)cctx->consumedSrcSize);
+    }
+    ZSTD_CCtx_trace(cctx, endResult);
+    return cSize + endResult;
+}
+
+size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx,
+                               void* dst, size_t dstCapacity,
+                         const void* src, size_t srcSize,
+                         const void* dict,size_t dictSize,
+                               ZSTD_parameters params)
+{
+    DEBUGLOG(4, "ZSTD_compress_advanced");
+    FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), "");
+    ZSTD_CCtxParams_init_internal(&cctx->simpleApiParams, &params, ZSTD_NO_CLEVEL);
+    return ZSTD_compress_advanced_internal(cctx,
+                                           dst, dstCapacity,
+                                           src, srcSize,
+                                           dict, dictSize,
+                                           &cctx->simpleApiParams);
+}
+
+/* Internal */
+size_t ZSTD_compress_advanced_internal(
+        ZSTD_CCtx* cctx,
+        void* dst, size_t dstCapacity,
+        const void* src, size_t srcSize,
+        const void* dict,size_t dictSize,
+        const ZSTD_CCtx_params* params)
+{
+    DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (unsigned)srcSize);
+    FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
+                         dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
+                         params, srcSize, ZSTDb_not_buffered) , "");
+    return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
+}
+
+size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx,
+                               void* dst, size_t dstCapacity,
+                         const void* src, size_t srcSize,
+                         const void* dict, size_t dictSize,
+                               int compressionLevel)
+{
+    {
+        ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0, ZSTD_cpm_noAttachDict);
+        assert(params.fParams.contentSizeFlag == 1);
+        ZSTD_CCtxParams_init_internal(&cctx->simpleApiParams, &params, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT: compressionLevel);
+    }
+    DEBUGLOG(4, "ZSTD_compress_usingDict (srcSize=%u)", (unsigned)srcSize);
+    return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctx->simpleApiParams);
+}
+
+size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
+                         void* dst, size_t dstCapacity,
+                   const void* src, size_t srcSize,
+                         int compressionLevel)
+{
+    DEBUGLOG(4, "ZSTD_compressCCtx (srcSize=%u)", (unsigned)srcSize);
+    assert(cctx != NULL);
+    return ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel);
+}
+
+size_t ZSTD_compress(void* dst, size_t dstCapacity,
+               const void* src, size_t srcSize,
+                     int compressionLevel)
+{
+    size_t result;
+#if ZSTD_COMPRESS_HEAPMODE
+    ZSTD_CCtx* cctx = ZSTD_createCCtx();
+    RETURN_ERROR_IF(!cctx, memory_allocation, "ZSTD_createCCtx failed");
+    result = ZSTD_compressCCtx(cctx, dst, dstCapacity, src, srcSize, compressionLevel);
+    ZSTD_freeCCtx(cctx);
+#else
+    ZSTD_CCtx ctxBody;
+    ZSTD_initCCtx(&ctxBody, ZSTD_defaultCMem);
+    result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel);
+    ZSTD_freeCCtxContent(&ctxBody);   /* can't free ctxBody itself, as it's on stack; free only heap content */
+#endif
+    return result;
+}
+
+
+/* =====  Dictionary API  ===== */
+
+/*! ZSTD_estimateCDictSize_advanced() :
+ *  Estimate amount of memory that will be needed to create a dictionary with following arguments */
+size_t ZSTD_estimateCDictSize_advanced(
+        size_t dictSize, ZSTD_compressionParameters cParams,
+        ZSTD_dictLoadMethod_e dictLoadMethod)
+{
+    DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict));
+    return ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict))
+         + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE)
+         /* enableDedicatedDictSearch == 1 ensures that CDict estimation will not be too small
+          * in case we are using DDS with row-hash. */
+         + ZSTD_sizeof_matchState(&cParams, ZSTD_resolveRowMatchFinderMode(ZSTD_urm_auto, &cParams),
+                                  /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0)
+         + (dictLoadMethod == ZSTD_dlm_byRef ? 0
+            : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *))));
+}
+
+size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel)
+{
+    ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
+    return ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy);
+}
+
+size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict)
+{
+    if (cdict==NULL) return 0;   /* support sizeof on NULL */
+    DEBUGLOG(5, "sizeof(*cdict) : %u", (unsigned)sizeof(*cdict));
+    /* cdict may be in the workspace */
+    return (cdict->workspace.workspace == cdict ? 0 : sizeof(*cdict))
+        + ZSTD_cwksp_sizeof(&cdict->workspace);
+}
+
+static size_t ZSTD_initCDict_internal(
+                    ZSTD_CDict* cdict,
+              const void* dictBuffer, size_t dictSize,
+                    ZSTD_dictLoadMethod_e dictLoadMethod,
+                    ZSTD_dictContentType_e dictContentType,
+                    ZSTD_CCtx_params params)
+{
+    DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (unsigned)dictContentType);
+    assert(!ZSTD_checkCParams(params.cParams));
+    cdict->matchState.cParams = params.cParams;
+    cdict->matchState.dedicatedDictSearch = params.enableDedicatedDictSearch;
+    if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) {
+        cdict->dictContent = dictBuffer;
+    } else {
+         void *internalBuffer = ZSTD_cwksp_reserve_object(&cdict->workspace, ZSTD_cwksp_align(dictSize, sizeof(void*)));
+        RETURN_ERROR_IF(!internalBuffer, memory_allocation, "NULL pointer!");
+        cdict->dictContent = internalBuffer;
+        ZSTD_memcpy(internalBuffer, dictBuffer, dictSize);
+    }
+    cdict->dictContentSize = dictSize;
+    cdict->dictContentType = dictContentType;
+
+    cdict->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cdict->workspace, HUF_WORKSPACE_SIZE);
+
+
+    /* Reset the state to no dictionary */
+    ZSTD_reset_compressedBlockState(&cdict->cBlockState);
+    FORWARD_IF_ERROR(ZSTD_reset_matchState(
+        &cdict->matchState,
+        &cdict->workspace,
+        &params.cParams,
+        params.useRowMatchFinder,
+        ZSTDcrp_makeClean,
+        ZSTDirp_reset,
+        ZSTD_resetTarget_CDict), "");
+    /* (Maybe) load the dictionary
+     * Skips loading the dictionary if it is < 8 bytes.
+     */
+    {   params.compressionLevel = ZSTD_CLEVEL_DEFAULT;
+        params.fParams.contentSizeFlag = 1;
+        {   size_t const dictID = ZSTD_compress_insertDictionary(
+                    &cdict->cBlockState, &cdict->matchState, NULL, &cdict->workspace,
+                    &params, cdict->dictContent, cdict->dictContentSize,
+                    dictContentType, ZSTD_dtlm_full, cdict->entropyWorkspace);
+            FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed");
+            assert(dictID <= (size_t)(U32)-1);
+            cdict->dictID = (U32)dictID;
+        }
+    }
+
+    return 0;
+}
+
+static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize,
+                                      ZSTD_dictLoadMethod_e dictLoadMethod,
+                                      ZSTD_compressionParameters cParams,
+                                      ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
+                                      U32 enableDedicatedDictSearch,
+                                      ZSTD_customMem customMem)
+{
+    if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
+
+    {   size_t const workspaceSize =
+            ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) +
+            ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) +
+            ZSTD_sizeof_matchState(&cParams, useRowMatchFinder, enableDedicatedDictSearch, /* forCCtx */ 0) +
+            (dictLoadMethod == ZSTD_dlm_byRef ? 0
+             : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))));
+        void* const workspace = ZSTD_customMalloc(workspaceSize, customMem);
+        ZSTD_cwksp ws;
+        ZSTD_CDict* cdict;
+
+        if (!workspace) {
+            ZSTD_customFree(workspace, customMem);
+            return NULL;
+        }
+
+        ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_dynamic_alloc);
+
+        cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict));
+        assert(cdict != NULL);
+        ZSTD_cwksp_move(&cdict->workspace, &ws);
+        cdict->customMem = customMem;
+        cdict->compressionLevel = ZSTD_NO_CLEVEL; /* signals advanced API usage */
+        cdict->useRowMatchFinder = useRowMatchFinder;
+        return cdict;
+    }
+}
+
+ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize,
+                                      ZSTD_dictLoadMethod_e dictLoadMethod,
+                                      ZSTD_dictContentType_e dictContentType,
+                                      ZSTD_compressionParameters cParams,
+                                      ZSTD_customMem customMem)
+{
+    ZSTD_CCtx_params cctxParams;
+    ZSTD_memset(&cctxParams, 0, sizeof(cctxParams));
+    ZSTD_CCtxParams_init(&cctxParams, 0);
+    cctxParams.cParams = cParams;
+    cctxParams.customMem = customMem;
+    return ZSTD_createCDict_advanced2(
+        dictBuffer, dictSize,
+        dictLoadMethod, dictContentType,
+        &cctxParams, customMem);
+}
+
+ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2(
+        const void* dict, size_t dictSize,
+        ZSTD_dictLoadMethod_e dictLoadMethod,
+        ZSTD_dictContentType_e dictContentType,
+        const ZSTD_CCtx_params* originalCctxParams,
+        ZSTD_customMem customMem)
+{
+    ZSTD_CCtx_params cctxParams = *originalCctxParams;
+    ZSTD_compressionParameters cParams;
+    ZSTD_CDict* cdict;
+
+    DEBUGLOG(3, "ZSTD_createCDict_advanced2, mode %u", (unsigned)dictContentType);
+    if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
+
+    if (cctxParams.enableDedicatedDictSearch) {
+        cParams = ZSTD_dedicatedDictSearch_getCParams(
+            cctxParams.compressionLevel, dictSize);
+        ZSTD_overrideCParams(&cParams, &cctxParams.cParams);
+    } else {
+        cParams = ZSTD_getCParamsFromCCtxParams(
+            &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
+    }
+
+    if (!ZSTD_dedicatedDictSearch_isSupported(&cParams)) {
+        /* Fall back to non-DDSS params */
+        cctxParams.enableDedicatedDictSearch = 0;
+        cParams = ZSTD_getCParamsFromCCtxParams(
+            &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
+    }
+
+    DEBUGLOG(3, "ZSTD_createCDict_advanced2: DDS: %u", cctxParams.enableDedicatedDictSearch);
+    cctxParams.cParams = cParams;
+    cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams);
+
+    cdict = ZSTD_createCDict_advanced_internal(dictSize,
+                        dictLoadMethod, cctxParams.cParams,
+                        cctxParams.useRowMatchFinder, cctxParams.enableDedicatedDictSearch,
+                        customMem);
+
+    if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
+                                    dict, dictSize,
+                                    dictLoadMethod, dictContentType,
+                                    cctxParams) )) {
+        ZSTD_freeCDict(cdict);
+        return NULL;
+    }
+
+    return cdict;
+}
+
+ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel)
+{
+    ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
+    ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize,
+                                                  ZSTD_dlm_byCopy, ZSTD_dct_auto,
+                                                  cParams, ZSTD_defaultCMem);
+    if (cdict)
+        cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel;
+    return cdict;
+}
+
+ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel)
+{
+    ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
+    ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize,
+                                     ZSTD_dlm_byRef, ZSTD_dct_auto,
+                                     cParams, ZSTD_defaultCMem);
+    if (cdict)
+        cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel;
+    return cdict;
+}
+
+size_t ZSTD_freeCDict(ZSTD_CDict* cdict)
+{
+    if (cdict==NULL) return 0;   /* support free on NULL */
+    {   ZSTD_customMem const cMem = cdict->customMem;
+        int cdictInWorkspace = ZSTD_cwksp_owns_buffer(&cdict->workspace, cdict);
+        ZSTD_cwksp_free(&cdict->workspace, cMem);
+        if (!cdictInWorkspace) {
+            ZSTD_customFree(cdict, cMem);
+        }
+        return 0;
+    }
+}
+
+/*! ZSTD_initStaticCDict_advanced() :
+ *  Generate a digested dictionary in provided memory area.
+ *  workspace: The memory area to emplace the dictionary into.
+ *             Provided pointer must 8-bytes aligned.
+ *             It must outlive dictionary usage.
+ *  workspaceSize: Use ZSTD_estimateCDictSize()
+ *                 to determine how large workspace must be.
+ *  cParams : use ZSTD_getCParams() to transform a compression level
+ *            into its relevants cParams.
+ * @return : pointer to ZSTD_CDict*, or NULL if error (size too small)
+ *  Note : there is no corresponding "free" function.
+ *         Since workspace was allocated externally, it must be freed externally.
+ */
+const ZSTD_CDict* ZSTD_initStaticCDict(
+                                 void* workspace, size_t workspaceSize,
+                           const void* dict, size_t dictSize,
+                                 ZSTD_dictLoadMethod_e dictLoadMethod,
+                                 ZSTD_dictContentType_e dictContentType,
+                                 ZSTD_compressionParameters cParams)
+{
+    ZSTD_useRowMatchFinderMode_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(ZSTD_urm_auto, &cParams);
+    /* enableDedicatedDictSearch == 1 ensures matchstate is not too small in case this CDict will be used for DDS + row hash */
+    size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0);
+    size_t const neededSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict))
+                            + (dictLoadMethod == ZSTD_dlm_byRef ? 0
+                               : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))))
+                            + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE)
+                            + matchStateSize;
+    ZSTD_CDict* cdict;
+    ZSTD_CCtx_params params;
+
+    if ((size_t)workspace & 7) return NULL;  /* 8-aligned */
+
+    {
+        ZSTD_cwksp ws;
+        ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_static_alloc);
+        cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict));
+        if (cdict == NULL) return NULL;
+        ZSTD_cwksp_move(&cdict->workspace, &ws);
+    }
+
+    DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u",
+        (unsigned)workspaceSize, (unsigned)neededSize, (unsigned)(workspaceSize < neededSize));
+    if (workspaceSize < neededSize) return NULL;
+
+    ZSTD_CCtxParams_init(&params, 0);
+    params.cParams = cParams;
+    params.useRowMatchFinder = useRowMatchFinder;
+    cdict->useRowMatchFinder = useRowMatchFinder;
+
+    if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
+                                              dict, dictSize,
+                                              dictLoadMethod, dictContentType,
+                                              params) ))
+        return NULL;
+
+    return cdict;
+}
+
+ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict)
+{
+    assert(cdict != NULL);
+    return cdict->matchState.cParams;
+}
+
+/*! ZSTD_getDictID_fromCDict() :
+ *  Provides the dictID of the dictionary loaded into `cdict`.
+ *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
+ *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
+unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict)
+{
+    if (cdict==NULL) return 0;
+    return cdict->dictID;
+}
+
+/* ZSTD_compressBegin_usingCDict_internal() :
+ * Implementation of various ZSTD_compressBegin_usingCDict* functions.
+ */
+static size_t ZSTD_compressBegin_usingCDict_internal(
+    ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict,
+    ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize)
+{
+    ZSTD_CCtx_params cctxParams;
+    DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_internal");
+    RETURN_ERROR_IF(cdict==NULL, dictionary_wrong, "NULL pointer!");
+    /* Initialize the cctxParams from the cdict */
+    {
+        ZSTD_parameters params;
+        params.fParams = fParams;
+        params.cParams = ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF
+                        || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER
+                        || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
+                        || cdict->compressionLevel == 0 ) ?
+                ZSTD_getCParamsFromCDict(cdict)
+              : ZSTD_getCParams(cdict->compressionLevel,
+                                pledgedSrcSize,
+                                cdict->dictContentSize);
+        ZSTD_CCtxParams_init_internal(&cctxParams, &params, cdict->compressionLevel);
+    }
+    /* Increase window log to fit the entire dictionary and source if the
+     * source size is known. Limit the increase to 19, which is the
+     * window log for compression level 1 with the largest source size.
+     */
+    if (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN) {
+        U32 const limitedSrcSize = (U32)MIN(pledgedSrcSize, 1U << 19);
+        U32 const limitedSrcLog = limitedSrcSize > 1 ? ZSTD_highbit32(limitedSrcSize - 1) + 1 : 1;
+        cctxParams.cParams.windowLog = MAX(cctxParams.cParams.windowLog, limitedSrcLog);
+    }
+    return ZSTD_compressBegin_internal(cctx,
+                                        NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast,
+                                        cdict,
+                                        &cctxParams, pledgedSrcSize,
+                                        ZSTDb_not_buffered);
+}
+
+
+/* ZSTD_compressBegin_usingCDict_advanced() :
+ * This function is DEPRECATED.
+ * cdict must be != NULL */
+size_t ZSTD_compressBegin_usingCDict_advanced(
+    ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict,
+    ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize)
+{
+    return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, pledgedSrcSize);
+}
+
+/* ZSTD_compressBegin_usingCDict() :
+ * cdict must be != NULL */
+size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
+{
+    ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
+    return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN);
+}
+
+/*! ZSTD_compress_usingCDict_internal():
+ * Implementation of various ZSTD_compress_usingCDict* functions.
+ */
+static size_t ZSTD_compress_usingCDict_internal(ZSTD_CCtx* cctx,
+                                void* dst, size_t dstCapacity,
+                                const void* src, size_t srcSize,
+                                const ZSTD_CDict* cdict, ZSTD_frameParameters fParams)
+{
+    FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, srcSize), ""); /* will check if cdict != NULL */
+    return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
+}
+
+/*! ZSTD_compress_usingCDict_advanced():
+ * This function is DEPRECATED.
+ */
+size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
+                                void* dst, size_t dstCapacity,
+                                const void* src, size_t srcSize,
+                                const ZSTD_CDict* cdict, ZSTD_frameParameters fParams)
+{
+    return ZSTD_compress_usingCDict_internal(cctx, dst, dstCapacity, src, srcSize, cdict, fParams);
+}
+
+/*! ZSTD_compress_usingCDict() :
+ *  Compression using a digested Dictionary.
+ *  Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
+ *  Note that compression parameters are decided at CDict creation time
+ *  while frame parameters are hardcoded */
+size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
+                                void* dst, size_t dstCapacity,
+                                const void* src, size_t srcSize,
+                                const ZSTD_CDict* cdict)
+{
+    ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
+    return ZSTD_compress_usingCDict_internal(cctx, dst, dstCapacity, src, srcSize, cdict, fParams);
+}
+
+
+
+/* ******************************************************************
+*  Streaming
+********************************************************************/
+
+ZSTD_CStream* ZSTD_createCStream(void)
+{
+    DEBUGLOG(3, "ZSTD_createCStream");
+    return ZSTD_createCStream_advanced(ZSTD_defaultCMem);
+}
+
+ZSTD_CStream* ZSTD_initStaticCStream(void *workspace, size_t workspaceSize)
+{
+    return ZSTD_initStaticCCtx(workspace, workspaceSize);
+}
+
+ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem)
+{   /* CStream and CCtx are now same object */
+    return ZSTD_createCCtx_advanced(customMem);
+}
+
+size_t ZSTD_freeCStream(ZSTD_CStream* zcs)
+{
+    return ZSTD_freeCCtx(zcs);   /* same object */
+}
+
+
+
+/*======   Initialization   ======*/
+
+size_t ZSTD_CStreamInSize(void)  { return ZSTD_BLOCKSIZE_MAX; }
+
+size_t ZSTD_CStreamOutSize(void)
+{
+    return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ;
+}
+
+static ZSTD_cParamMode_e ZSTD_getCParamMode(ZSTD_CDict const* cdict, ZSTD_CCtx_params const* params, U64 pledgedSrcSize)
+{
+    if (cdict != NULL && ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize))
+        return ZSTD_cpm_attachDict;
+    else
+        return ZSTD_cpm_noAttachDict;
+}
+
+/* ZSTD_resetCStream():
+ * pledgedSrcSize == 0 means "unknown" */
+size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pss)
+{
+    /* temporary : 0 interpreted as "unknown" during transition period.
+     * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN.
+     * 0 will be interpreted as "empty" in the future.
+     */
+    U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss;
+    DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (unsigned)pledgedSrcSize);
+    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
+    return 0;
+}
+
+/*! ZSTD_initCStream_internal() :
+ *  Note : for lib/compress only. Used by zstdmt_compress.c.
+ *  Assumption 1 : params are valid
+ *  Assumption 2 : either dict, or cdict, is defined, not both */
+size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
+                    const void* dict, size_t dictSize, const ZSTD_CDict* cdict,
+                    const ZSTD_CCtx_params* params,
+                    unsigned long long pledgedSrcSize)
+{
+    DEBUGLOG(4, "ZSTD_initCStream_internal");
+    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
+    assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));
+    zcs->requestedParams = *params;
+    assert(!((dict) && (cdict)));  /* either dict or cdict, not both */
+    if (dict) {
+        FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , "");
+    } else {
+        /* Dictionary is cleared if !cdict */
+        FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , "");
+    }
+    return 0;
+}
+
+/* ZSTD_initCStream_usingCDict_advanced() :
+ * same as ZSTD_initCStream_usingCDict(), with control over frame parameters */
+size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
+                                            const ZSTD_CDict* cdict,
+                                            ZSTD_frameParameters fParams,
+                                            unsigned long long pledgedSrcSize)
+{
+    DEBUGLOG(4, "ZSTD_initCStream_usingCDict_advanced");
+    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
+    zcs->requestedParams.fParams = fParams;
+    FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , "");
+    return 0;
+}
+
+/* note : cdict must outlive compression session */
+size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict)
+{
+    DEBUGLOG(4, "ZSTD_initCStream_usingCDict");
+    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , "");
+    return 0;
+}
+
+
+/* ZSTD_initCStream_advanced() :
+ * pledgedSrcSize must be exact.
+ * if srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.
+ * dict is loaded with default parameters ZSTD_dct_auto and ZSTD_dlm_byCopy. */
+size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
+                                 const void* dict, size_t dictSize,
+                                 ZSTD_parameters params, unsigned long long pss)
+{
+    /* for compatibility with older programs relying on this behavior.
+     * Users should now specify ZSTD_CONTENTSIZE_UNKNOWN.
+     * This line will be removed in the future.
+     */
+    U64 const pledgedSrcSize = (pss==0 && params.fParams.contentSizeFlag==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss;
+    DEBUGLOG(4, "ZSTD_initCStream_advanced");
+    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
+    FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , "");
+    ZSTD_CCtxParams_setZstdParams(&zcs->requestedParams, &params);
+    FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , "");
+    return 0;
+}
+
+size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel)
+{
+    DEBUGLOG(4, "ZSTD_initCStream_usingDict");
+    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , "");
+    return 0;
+}
+
+size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pss)
+{
+    /* temporary : 0 interpreted as "unknown" during transition period.
+     * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN.
+     * 0 will be interpreted as "empty" in the future.
+     */
+    U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss;
+    DEBUGLOG(4, "ZSTD_initCStream_srcSize");
+    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
+    return 0;
+}
+
+size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel)
+{
+    DEBUGLOG(4, "ZSTD_initCStream");
+    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , "");
+    return 0;
+}
+
+/*======   Compression   ======*/
+
+static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx)
+{
+    size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos;
+    if (hintInSize==0) hintInSize = cctx->blockSize;
+    return hintInSize;
+}
+
+/** ZSTD_compressStream_generic():
+ *  internal function for all *compressStream*() variants
+ *  non-static, because can be called from zstdmt_compress.c
+ * @return : hint size for next input */
+static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
+                                          ZSTD_outBuffer* output,
+                                          ZSTD_inBuffer* input,
+                                          ZSTD_EndDirective const flushMode)
+{
+    const char* const istart = (const char*)input->src;
+    const char* const iend = input->size != 0 ? istart + input->size : istart;
+    const char* ip = input->pos != 0 ? istart + input->pos : istart;
+    char* const ostart = (char*)output->dst;
+    char* const oend = output->size != 0 ? ostart + output->size : ostart;
+    char* op = output->pos != 0 ? ostart + output->pos : ostart;
+    U32 someMoreWork = 1;
+
+    /* check expectations */
+    DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (unsigned)flushMode);
+    if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) {
+        assert(zcs->inBuff != NULL);
+        assert(zcs->inBuffSize > 0);
+    }
+    if (zcs->appliedParams.outBufferMode == ZSTD_bm_buffered) {
+        assert(zcs->outBuff !=  NULL);
+        assert(zcs->outBuffSize > 0);
+    }
+    assert(output->pos <= output->size);
+    assert(input->pos <= input->size);
+    assert((U32)flushMode <= (U32)ZSTD_e_end);
+
+    while (someMoreWork) {
+        switch(zcs->streamStage)
+        {
+        case zcss_init:
+            RETURN_ERROR(init_missing, "call ZSTD_initCStream() first!");
+
+        case zcss_load:
+            if ( (flushMode == ZSTD_e_end)
+              && ( (size_t)(oend-op) >= ZSTD_compressBound(iend-ip)     /* Enough output space */
+                || zcs->appliedParams.outBufferMode == ZSTD_bm_stable)  /* OR we are allowed to return dstSizeTooSmall */
+              && (zcs->inBuffPos == 0) ) {
+                /* shortcut to compression pass directly into output buffer */
+                size_t const cSize = ZSTD_compressEnd(zcs,
+                                                op, oend-op, ip, iend-ip);
+                DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize);
+                FORWARD_IF_ERROR(cSize, "ZSTD_compressEnd failed");
+                ip = iend;
+                op += cSize;
+                zcs->frameEnded = 1;
+                ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+                someMoreWork = 0; break;
+            }
+            /* complete loading into inBuffer in buffered mode */
+            if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) {
+                size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos;
+                size_t const loaded = ZSTD_limitCopy(
+                                        zcs->inBuff + zcs->inBuffPos, toLoad,
+                                        ip, iend-ip);
+                zcs->inBuffPos += loaded;
+                if (loaded != 0)
+                    ip += loaded;
+                if ( (flushMode == ZSTD_e_continue)
+                  && (zcs->inBuffPos < zcs->inBuffTarget) ) {
+                    /* not enough input to fill full block : stop here */
+                    someMoreWork = 0; break;
+                }
+                if ( (flushMode == ZSTD_e_flush)
+                  && (zcs->inBuffPos == zcs->inToCompress) ) {
+                    /* empty */
+                    someMoreWork = 0; break;
+                }
+            }
+            /* compress current block (note : this stage cannot be stopped in the middle) */
+            DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode);
+            {   int const inputBuffered = (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered);
+                void* cDst;
+                size_t cSize;
+                size_t oSize = oend-op;
+                size_t const iSize = inputBuffered
+                    ? zcs->inBuffPos - zcs->inToCompress
+                    : MIN((size_t)(iend - ip), zcs->blockSize);
+                if (oSize >= ZSTD_compressBound(iSize) || zcs->appliedParams.outBufferMode == ZSTD_bm_stable)
+                    cDst = op;   /* compress into output buffer, to skip flush stage */
+                else
+                    cDst = zcs->outBuff, oSize = zcs->outBuffSize;
+                if (inputBuffered) {
+                    unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend);
+                    cSize = lastBlock ?
+                            ZSTD_compressEnd(zcs, cDst, oSize,
+                                        zcs->inBuff + zcs->inToCompress, iSize) :
+                            ZSTD_compressContinue(zcs, cDst, oSize,
+                                        zcs->inBuff + zcs->inToCompress, iSize);
+                    FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");
+                    zcs->frameEnded = lastBlock;
+                    /* prepare next block */
+                    zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize;
+                    if (zcs->inBuffTarget > zcs->inBuffSize)
+                        zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize;
+                    DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u",
+                            (unsigned)zcs->inBuffTarget, (unsigned)zcs->inBuffSize);
+                    if (!lastBlock)
+                        assert(zcs->inBuffTarget <= zcs->inBuffSize);
+                    zcs->inToCompress = zcs->inBuffPos;
+                } else {
+                    unsigned const lastBlock = (ip + iSize == iend);
+                    assert(flushMode == ZSTD_e_end /* Already validated */);
+                    cSize = lastBlock ?
+                            ZSTD_compressEnd(zcs, cDst, oSize, ip, iSize) :
+                            ZSTD_compressContinue(zcs, cDst, oSize, ip, iSize);
+                    /* Consume the input prior to error checking to mirror buffered mode. */
+                    if (iSize > 0)
+                        ip += iSize;
+                    FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");
+                    zcs->frameEnded = lastBlock;
+                    if (lastBlock)
+                        assert(ip == iend);
+                }
+                if (cDst == op) {  /* no need to flush */
+                    op += cSize;
+                    if (zcs->frameEnded) {
+                        DEBUGLOG(5, "Frame completed directly in outBuffer");
+                        someMoreWork = 0;
+                        ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+                    }
+                    break;
+                }
+                zcs->outBuffContentSize = cSize;
+                zcs->outBuffFlushedSize = 0;
+                zcs->streamStage = zcss_flush; /* pass-through to flush stage */
+            }
+	    /* fall-through */
+        case zcss_flush:
+            DEBUGLOG(5, "flush stage");
+            assert(zcs->appliedParams.outBufferMode == ZSTD_bm_buffered);
+            {   size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
+                size_t const flushed = ZSTD_limitCopy(op, (size_t)(oend-op),
+                            zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
+                DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u",
+                            (unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed);
+                if (flushed)
+                    op += flushed;
+                zcs->outBuffFlushedSize += flushed;
+                if (toFlush!=flushed) {
+                    /* flush not fully completed, presumably because dst is too small */
+                    assert(op==oend);
+                    someMoreWork = 0;
+                    break;
+                }
+                zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;
+                if (zcs->frameEnded) {
+                    DEBUGLOG(5, "Frame completed on flush");
+                    someMoreWork = 0;
+                    ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+                    break;
+                }
+                zcs->streamStage = zcss_load;
+                break;
+            }
+
+        default: /* impossible */
+            assert(0);
+        }
+    }
+
+    input->pos = ip - istart;
+    output->pos = op - ostart;
+    if (zcs->frameEnded) return 0;
+    return ZSTD_nextInputSizeHint(zcs);
+}
+
+static size_t ZSTD_nextInputSizeHint_MTorST(const ZSTD_CCtx* cctx)
+{
+#ifdef ZSTD_MULTITHREAD
+    if (cctx->appliedParams.nbWorkers >= 1) {
+        assert(cctx->mtctx != NULL);
+        return ZSTDMT_nextInputSizeHint(cctx->mtctx);
+    }
+#endif
+    return ZSTD_nextInputSizeHint(cctx);
+
+}
+
+size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
+{
+    FORWARD_IF_ERROR( ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue) , "");
+    return ZSTD_nextInputSizeHint_MTorST(zcs);
+}
+
+/* After a compression call set the expected input/output buffer.
+ * This is validated at the start of the next compression call.
+ */
+static void ZSTD_setBufferExpectations(ZSTD_CCtx* cctx, ZSTD_outBuffer const* output, ZSTD_inBuffer const* input)
+{
+    if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) {
+        cctx->expectedInBuffer = *input;
+    }
+    if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) {
+        cctx->expectedOutBufferSize = output->size - output->pos;
+    }
+}
+
+/* Validate that the input/output buffers match the expectations set by
+ * ZSTD_setBufferExpectations.
+ */
+static size_t ZSTD_checkBufferStability(ZSTD_CCtx const* cctx,
+                                        ZSTD_outBuffer const* output,
+                                        ZSTD_inBuffer const* input,
+                                        ZSTD_EndDirective endOp)
+{
+    if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) {
+        ZSTD_inBuffer const expect = cctx->expectedInBuffer;
+        if (expect.src != input->src || expect.pos != input->pos || expect.size != input->size)
+            RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer enabled but input differs!");
+        if (endOp != ZSTD_e_end)
+            RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer can only be used with ZSTD_e_end!");
+    }
+    if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) {
+        size_t const outBufferSize = output->size - output->pos;
+        if (cctx->expectedOutBufferSize != outBufferSize)
+            RETURN_ERROR(dstBuffer_wrong, "ZSTD_c_stableOutBuffer enabled but output size differs!");
+    }
+    return 0;
+}
+
+static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
+                                             ZSTD_EndDirective endOp,
+                                             size_t inSize) {
+    ZSTD_CCtx_params params = cctx->requestedParams;
+    ZSTD_prefixDict const prefixDict = cctx->prefixDict;
+    FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */
+    ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict));   /* single usage */
+    assert(prefixDict.dict==NULL || cctx->cdict==NULL);    /* only one can be set */
+    if (cctx->cdict && !cctx->localDict.cdict) {
+        /* Let the cdict's compression level take priority over the requested params.
+         * But do not take the cdict's compression level if the "cdict" is actually a localDict
+         * generated from ZSTD_initLocalDict().
+         */
+        params.compressionLevel = cctx->cdict->compressionLevel;
+    }
+    DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage");
+    if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1;  /* auto-fix pledgedSrcSize */
+    {
+        size_t const dictSize = prefixDict.dict
+                ? prefixDict.dictSize
+                : (cctx->cdict ? cctx->cdict->dictContentSize : 0);
+        ZSTD_cParamMode_e const mode = ZSTD_getCParamMode(cctx->cdict, &params, cctx->pledgedSrcSizePlusOne - 1);
+        params.cParams = ZSTD_getCParamsFromCCtxParams(
+                &params, cctx->pledgedSrcSizePlusOne-1,
+                dictSize, mode);
+    }
+
+    if (ZSTD_CParams_shouldEnableLdm(&params.cParams)) {
+        /* Enable LDM by default for optimal parser and window size >= 128MB */
+        DEBUGLOG(4, "LDM enabled by default (window size >= 128MB, strategy >= btopt)");
+        params.ldmParams.enableLdm = 1;
+    }
+
+    if (ZSTD_CParams_useBlockSplitter(&params.cParams)) {
+        DEBUGLOG(4, "Block splitter enabled by default (window size >= 128K, strategy >= btopt)");
+        params.splitBlocks = 1;
+    }
+
+    params.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params.useRowMatchFinder, &params.cParams);
+
+#ifdef ZSTD_MULTITHREAD
+    if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) {
+        params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */
+    }
+    if (params.nbWorkers > 0) {
+#if ZSTD_TRACE
+        cctx->traceCtx = (ZSTD_trace_compress_begin != NULL) ? ZSTD_trace_compress_begin(cctx) : 0;
+#endif
+        /* mt context creation */
+        if (cctx->mtctx == NULL) {
+            DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u",
+                        params.nbWorkers);
+            cctx->mtctx = ZSTDMT_createCCtx_advanced((U32)params.nbWorkers, cctx->customMem, cctx->pool);
+            RETURN_ERROR_IF(cctx->mtctx == NULL, memory_allocation, "NULL pointer!");
+        }
+        /* mt compression */
+        DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbWorkers=%u", params.nbWorkers);
+        FORWARD_IF_ERROR( ZSTDMT_initCStream_internal(
+                    cctx->mtctx,
+                    prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType,
+                    cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) , "");
+        cctx->dictID = cctx->cdict ? cctx->cdict->dictID : 0;
+        cctx->dictContentSize = cctx->cdict ? cctx->cdict->dictContentSize : prefixDict.dictSize;
+        cctx->consumedSrcSize = 0;
+        cctx->producedCSize = 0;
+        cctx->streamStage = zcss_load;
+        cctx->appliedParams = params;
+    } else
+#endif
+    {   U64 const pledgedSrcSize = cctx->pledgedSrcSizePlusOne - 1;
+        assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
+        FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
+                prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, ZSTD_dtlm_fast,
+                cctx->cdict,
+                &params, pledgedSrcSize,
+                ZSTDb_buffered) , "");
+        assert(cctx->appliedParams.nbWorkers == 0);
+        cctx->inToCompress = 0;
+        cctx->inBuffPos = 0;
+        if (cctx->appliedParams.inBufferMode == ZSTD_bm_buffered) {
+            /* for small input: avoid automatic flush on reaching end of block, since
+            * it would require to add a 3-bytes null block to end frame
+            */
+            cctx->inBuffTarget = cctx->blockSize + (cctx->blockSize == pledgedSrcSize);
+        } else {
+            cctx->inBuffTarget = 0;
+        }
+        cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0;
+        cctx->streamStage = zcss_load;
+        cctx->frameEnded = 0;
+    }
+    return 0;
+}
+
+size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
+                             ZSTD_outBuffer* output,
+                             ZSTD_inBuffer* input,
+                             ZSTD_EndDirective endOp)
+{
+    DEBUGLOG(5, "ZSTD_compressStream2, endOp=%u ", (unsigned)endOp);
+    /* check conditions */
+    RETURN_ERROR_IF(output->pos > output->size, dstSize_tooSmall, "invalid output buffer");
+    RETURN_ERROR_IF(input->pos  > input->size, srcSize_wrong, "invalid input buffer");
+    RETURN_ERROR_IF((U32)endOp > (U32)ZSTD_e_end, parameter_outOfBound, "invalid endDirective");
+    assert(cctx != NULL);
+
+    /* transparent initialization stage */
+    if (cctx->streamStage == zcss_init) {
+        FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, endOp, input->size), "CompressStream2 initialization failed");
+        ZSTD_setBufferExpectations(cctx, output, input);    /* Set initial buffer expectations now that we've initialized */
+    }
+    /* end of transparent initialization stage */
+
+    FORWARD_IF_ERROR(ZSTD_checkBufferStability(cctx, output, input, endOp), "invalid buffers");
+    /* compression stage */
+#ifdef ZSTD_MULTITHREAD
+    if (cctx->appliedParams.nbWorkers > 0) {
+        size_t flushMin;
+        if (cctx->cParamsChanged) {
+            ZSTDMT_updateCParams_whileCompressing(cctx->mtctx, &cctx->requestedParams);
+            cctx->cParamsChanged = 0;
+        }
+        for (;;) {
+            size_t const ipos = input->pos;
+            size_t const opos = output->pos;
+            flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp);
+            cctx->consumedSrcSize += (U64)(input->pos - ipos);
+            cctx->producedCSize += (U64)(output->pos - opos);
+            if ( ZSTD_isError(flushMin)
+              || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */
+                if (flushMin == 0)
+                    ZSTD_CCtx_trace(cctx, 0);
+                ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only);
+            }
+            FORWARD_IF_ERROR(flushMin, "ZSTDMT_compressStream_generic failed");
+
+            if (endOp == ZSTD_e_continue) {
+                /* We only require some progress with ZSTD_e_continue, not maximal progress.
+                 * We're done if we've consumed or produced any bytes, or either buffer is
+                 * full.
+                 */
+                if (input->pos != ipos || output->pos != opos || input->pos == input->size || output->pos == output->size)
+                    break;
+            } else {
+                assert(endOp == ZSTD_e_flush || endOp == ZSTD_e_end);
+                /* We require maximal progress. We're done when the flush is complete or the
+                 * output buffer is full.
+                 */
+                if (flushMin == 0 || output->pos == output->size)
+                    break;
+            }
+        }
+        DEBUGLOG(5, "completed ZSTD_compressStream2 delegating to ZSTDMT_compressStream_generic");
+        /* Either we don't require maximum forward progress, we've finished the
+         * flush, or we are out of output space.
+         */
+        assert(endOp == ZSTD_e_continue || flushMin == 0 || output->pos == output->size);
+        ZSTD_setBufferExpectations(cctx, output, input);
+        return flushMin;
+    }
+#endif
+    FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) , "");
+    DEBUGLOG(5, "completed ZSTD_compressStream2");
+    ZSTD_setBufferExpectations(cctx, output, input);
+    return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */
+}
+
+size_t ZSTD_compressStream2_simpleArgs (
+                            ZSTD_CCtx* cctx,
+                            void* dst, size_t dstCapacity, size_t* dstPos,
+                      const void* src, size_t srcSize, size_t* srcPos,
+                            ZSTD_EndDirective endOp)
+{
+    ZSTD_outBuffer output = { dst, dstCapacity, *dstPos };
+    ZSTD_inBuffer  input  = { src, srcSize, *srcPos };
+    /* ZSTD_compressStream2() will check validity of dstPos and srcPos */
+    size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp);
+    *dstPos = output.pos;
+    *srcPos = input.pos;
+    return cErr;
+}
+
+size_t ZSTD_compress2(ZSTD_CCtx* cctx,
+                      void* dst, size_t dstCapacity,
+                      const void* src, size_t srcSize)
+{
+    ZSTD_bufferMode_e const originalInBufferMode = cctx->requestedParams.inBufferMode;
+    ZSTD_bufferMode_e const originalOutBufferMode = cctx->requestedParams.outBufferMode;
+    DEBUGLOG(4, "ZSTD_compress2 (srcSize=%u)", (unsigned)srcSize);
+    ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only);
+    /* Enable stable input/output buffers. */
+    cctx->requestedParams.inBufferMode = ZSTD_bm_stable;
+    cctx->requestedParams.outBufferMode = ZSTD_bm_stable;
+    {   size_t oPos = 0;
+        size_t iPos = 0;
+        size_t const result = ZSTD_compressStream2_simpleArgs(cctx,
+                                        dst, dstCapacity, &oPos,
+                                        src, srcSize, &iPos,
+                                        ZSTD_e_end);
+        /* Reset to the original values. */
+        cctx->requestedParams.inBufferMode = originalInBufferMode;
+        cctx->requestedParams.outBufferMode = originalOutBufferMode;
+        FORWARD_IF_ERROR(result, "ZSTD_compressStream2_simpleArgs failed");
+        if (result != 0) {  /* compression not completed, due to lack of output space */
+            assert(oPos == dstCapacity);
+            RETURN_ERROR(dstSize_tooSmall, "");
+        }
+        assert(iPos == srcSize);   /* all input is expected consumed */
+        return oPos;
+    }
+}
+
+typedef struct {
+    U32 idx;             /* Index in array of ZSTD_Sequence */
+    U32 posInSequence;   /* Position within sequence at idx */
+    size_t posInSrc;        /* Number of bytes given by sequences provided so far */
+} ZSTD_sequencePosition;
+
+/* Returns a ZSTD error code if sequence is not valid */
+static size_t ZSTD_validateSequence(U32 offCode, U32 matchLength,
+                                    size_t posInSrc, U32 windowLog, size_t dictSize, U32 minMatch) {
+    size_t offsetBound;
+    U32 windowSize = 1 << windowLog;
+    /* posInSrc represents the amount of data the the decoder would decode up to this point.
+     * As long as the amount of data decoded is less than or equal to window size, offsets may be
+     * larger than the total length of output decoded in order to reference the dict, even larger than
+     * window size. After output surpasses windowSize, we're limited to windowSize offsets again.
+     */
+    offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize;
+    RETURN_ERROR_IF(offCode > offsetBound + ZSTD_REP_MOVE, corruption_detected, "Offset too large!");
+    RETURN_ERROR_IF(matchLength < minMatch, corruption_detected, "Matchlength too small");
+    return 0;
+}
+
+/* Returns an offset code, given a sequence's raw offset, the ongoing repcode array, and whether litLength == 0 */
+static U32 ZSTD_finalizeOffCode(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0) {
+    U32 offCode = rawOffset + ZSTD_REP_MOVE;
+    U32 repCode = 0;
+
+    if (!ll0 && rawOffset == rep[0]) {
+        repCode = 1;
+    } else if (rawOffset == rep[1]) {
+        repCode = 2 - ll0;
+    } else if (rawOffset == rep[2]) {
+        repCode = 3 - ll0;
+    } else if (ll0 && rawOffset == rep[0] - 1) {
+        repCode = 3;
+    }
+    if (repCode) {
+        /* ZSTD_storeSeq expects a number in the range [0, 2] to represent a repcode */
+        offCode = repCode - 1;
+    }
+    return offCode;
+}
+
+/* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of
+ * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter.
+ */
+static size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
+                                                             const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
+                                                             const void* src, size_t blockSize) {
+    U32 idx = seqPos->idx;
+    BYTE const* ip = (BYTE const*)(src);
+    const BYTE* const iend = ip + blockSize;
+    repcodes_t updatedRepcodes;
+    U32 dictSize;
+    U32 litLength;
+    U32 matchLength;
+    U32 ll0;
+    U32 offCode;
+
+    if (cctx->cdict) {
+        dictSize = (U32)cctx->cdict->dictContentSize;
+    } else if (cctx->prefixDict.dict) {
+        dictSize = (U32)cctx->prefixDict.dictSize;
+    } else {
+        dictSize = 0;
+    }
+    ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t));
+    for (; (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0) && idx < inSeqsSize; ++idx) {
+        litLength = inSeqs[idx].litLength;
+        matchLength = inSeqs[idx].matchLength;
+        ll0 = litLength == 0;
+        offCode = ZSTD_finalizeOffCode(inSeqs[idx].offset, updatedRepcodes.rep, ll0);
+        updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
+
+        DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);
+        if (cctx->appliedParams.validateSequences) {
+            seqPos->posInSrc += litLength + matchLength;
+            FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
+                                                cctx->appliedParams.cParams.windowLog, dictSize,
+                                                cctx->appliedParams.cParams.minMatch),
+                                                "Sequence validation failed");
+        }
+        RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation,
+                        "Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
+        ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH);
+        ip += matchLength + litLength;
+    }
+    ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t));
+
+    if (inSeqs[idx].litLength) {
+        DEBUGLOG(6, "Storing last literals of size: %u", inSeqs[idx].litLength);
+        ZSTD_storeLastLiterals(&cctx->seqStore, ip, inSeqs[idx].litLength);
+        ip += inSeqs[idx].litLength;
+        seqPos->posInSrc += inSeqs[idx].litLength;
+    }
+    RETURN_ERROR_IF(ip != iend, corruption_detected, "Blocksize doesn't agree with block delimiter!");
+    seqPos->idx = idx+1;
+    return 0;
+}
+
+/* Returns the number of bytes to move the current read position back by. Only non-zero
+ * if we ended up splitting a sequence. Otherwise, it may return a ZSTD error if something
+ * went wrong.
+ *
+ * This function will attempt to scan through blockSize bytes represented by the sequences
+ * in inSeqs, storing any (partial) sequences.
+ *
+ * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to
+ * avoid splitting a match, or to avoid splitting a match such that it would produce a match
+ * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block.
+ */
+static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
+                                                       const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
+                                                       const void* src, size_t blockSize) {
+    U32 idx = seqPos->idx;
+    U32 startPosInSequence = seqPos->posInSequence;
+    U32 endPosInSequence = seqPos->posInSequence + (U32)blockSize;
+    size_t dictSize;
+    BYTE const* ip = (BYTE const*)(src);
+    BYTE const* iend = ip + blockSize;  /* May be adjusted if we decide to process fewer than blockSize bytes */
+    repcodes_t updatedRepcodes;
+    U32 bytesAdjustment = 0;
+    U32 finalMatchSplit = 0;
+    U32 litLength;
+    U32 matchLength;
+    U32 rawOffset;
+    U32 offCode;
+
+    if (cctx->cdict) {
+        dictSize = cctx->cdict->dictContentSize;
+    } else if (cctx->prefixDict.dict) {
+        dictSize = cctx->prefixDict.dictSize;
+    } else {
+        dictSize = 0;
+    }
+    DEBUGLOG(5, "ZSTD_copySequencesToSeqStore: idx: %u PIS: %u blockSize: %zu", idx, startPosInSequence, blockSize);
+    DEBUGLOG(5, "Start seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength);
+    ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t));
+    while (endPosInSequence && idx < inSeqsSize && !finalMatchSplit) {
+        const ZSTD_Sequence currSeq = inSeqs[idx];
+        litLength = currSeq.litLength;
+        matchLength = currSeq.matchLength;
+        rawOffset = currSeq.offset;
+
+        /* Modify the sequence depending on where endPosInSequence lies */
+        if (endPosInSequence >= currSeq.litLength + currSeq.matchLength) {
+            if (startPosInSequence >= litLength) {
+                startPosInSequence -= litLength;
+                litLength = 0;
+                matchLength -= startPosInSequence;
+            } else {
+                litLength -= startPosInSequence;
+            }
+            /* Move to the next sequence */
+            endPosInSequence -= currSeq.litLength + currSeq.matchLength;
+            startPosInSequence = 0;
+            idx++;
+        } else {
+            /* This is the final (partial) sequence we're adding from inSeqs, and endPosInSequence
+               does not reach the end of the match. So, we have to split the sequence */
+            DEBUGLOG(6, "Require a split: diff: %u, idx: %u PIS: %u",
+                     currSeq.litLength + currSeq.matchLength - endPosInSequence, idx, endPosInSequence);
+            if (endPosInSequence > litLength) {
+                U32 firstHalfMatchLength;
+                litLength = startPosInSequence >= litLength ? 0 : litLength - startPosInSequence;
+                firstHalfMatchLength = endPosInSequence - startPosInSequence - litLength;
+                if (matchLength > blockSize && firstHalfMatchLength >= cctx->appliedParams.cParams.minMatch) {
+                    /* Only ever split the match if it is larger than the block size */
+                    U32 secondHalfMatchLength = currSeq.matchLength + currSeq.litLength - endPosInSequence;
+                    if (secondHalfMatchLength < cctx->appliedParams.cParams.minMatch) {
+                        /* Move the endPosInSequence backward so that it creates match of minMatch length */
+                        endPosInSequence -= cctx->appliedParams.cParams.minMatch - secondHalfMatchLength;
+                        bytesAdjustment = cctx->appliedParams.cParams.minMatch - secondHalfMatchLength;
+                        firstHalfMatchLength -= bytesAdjustment;
+                    }
+                    matchLength = firstHalfMatchLength;
+                    /* Flag that we split the last match - after storing the sequence, exit the loop,
+                       but keep the value of endPosInSequence */
+                    finalMatchSplit = 1;
+                } else {
+                    /* Move the position in sequence backwards so that we don't split match, and break to store
+                     * the last literals. We use the original currSeq.litLength as a marker for where endPosInSequence
+                     * should go. We prefer to do this whenever it is not necessary to split the match, or if doing so
+                     * would cause the first half of the match to be too small
+                     */
+                    bytesAdjustment = endPosInSequence - currSeq.litLength;
+                    endPosInSequence = currSeq.litLength;
+                    break;
+                }
+            } else {
+                /* This sequence ends inside the literals, break to store the last literals */
+                break;
+            }
+        }
+        /* Check if this offset can be represented with a repcode */
+        {   U32 ll0 = (litLength == 0);
+            offCode = ZSTD_finalizeOffCode(rawOffset, updatedRepcodes.rep, ll0);
+            updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
+        }
+
+        if (cctx->appliedParams.validateSequences) {
+            seqPos->posInSrc += litLength + matchLength;
+            FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
+                                                   cctx->appliedParams.cParams.windowLog, dictSize,
+                                                   cctx->appliedParams.cParams.minMatch),
+                                                   "Sequence validation failed");
+        }
+        DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);
+        RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation,
+                        "Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
+        ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH);
+        ip += matchLength + litLength;
+    }
+    DEBUGLOG(5, "Ending seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength);
+    assert(idx == inSeqsSize || endPosInSequence <= inSeqs[idx].litLength + inSeqs[idx].matchLength);
+    seqPos->idx = idx;
+    seqPos->posInSequence = endPosInSequence;
+    ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t));
+
+    iend -= bytesAdjustment;
+    if (ip != iend) {
+        /* Store any last literals */
+        U32 lastLLSize = (U32)(iend - ip);
+        assert(ip <= iend);
+        DEBUGLOG(6, "Storing last literals of size: %u", lastLLSize);
+        ZSTD_storeLastLiterals(&cctx->seqStore, ip, lastLLSize);
+        seqPos->posInSrc += lastLLSize;
+    }
+
+    return bytesAdjustment;
+}
+
+typedef size_t (*ZSTD_sequenceCopier) (ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
+                                       const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
+                                       const void* src, size_t blockSize);
+static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode) {
+    ZSTD_sequenceCopier sequenceCopier = NULL;
+    assert(ZSTD_cParam_withinBounds(ZSTD_c_blockDelimiters, mode));
+    if (mode == ZSTD_sf_explicitBlockDelimiters) {
+        return ZSTD_copySequencesToSeqStoreExplicitBlockDelim;
+    } else if (mode == ZSTD_sf_noBlockDelimiters) {
+        return ZSTD_copySequencesToSeqStoreNoBlockDelim;
+    }
+    assert(sequenceCopier != NULL);
+    return sequenceCopier;
+}
+
+/* Compress, block-by-block, all of the sequences given.
+ *
+ * Returns the cumulative size of all compressed blocks (including their headers), otherwise a ZSTD error.
+ */
+static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
+                                              void* dst, size_t dstCapacity,
+                                              const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
+                                              const void* src, size_t srcSize) {
+    size_t cSize = 0;
+    U32 lastBlock;
+    size_t blockSize;
+    size_t compressedSeqsSize;
+    size_t remaining = srcSize;
+    ZSTD_sequencePosition seqPos = {0, 0, 0};
+
+    BYTE const* ip = (BYTE const*)src;
+    BYTE* op = (BYTE*)dst;
+    ZSTD_sequenceCopier sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters);
+
+    DEBUGLOG(4, "ZSTD_compressSequences_internal srcSize: %zu, inSeqsSize: %zu", srcSize, inSeqsSize);
+    /* Special case: empty frame */
+    if (remaining == 0) {
+        U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1);
+        RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "No room for empty frame block header");
+        MEM_writeLE32(op, cBlockHeader24);
+        op += ZSTD_blockHeaderSize;
+        dstCapacity -= ZSTD_blockHeaderSize;
+        cSize += ZSTD_blockHeaderSize;
+    }
+
+    while (remaining) {
+        size_t cBlockSize;
+        size_t additionalByteAdjustment;
+        lastBlock = remaining <= cctx->blockSize;
+        blockSize = lastBlock ? (U32)remaining : (U32)cctx->blockSize;
+        ZSTD_resetSeqStore(&cctx->seqStore);
+        DEBUGLOG(4, "Working on new block. Blocksize: %zu", blockSize);
+
+        additionalByteAdjustment = sequenceCopier(cctx, &seqPos, inSeqs, inSeqsSize, ip, blockSize);
+        FORWARD_IF_ERROR(additionalByteAdjustment, "Bad sequence copy");
+        blockSize -= additionalByteAdjustment;
+
+        /* If blocks are too small, emit as a nocompress block */
+        if (blockSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
+            cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
+            FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed");
+            DEBUGLOG(4, "Block too small, writing out nocompress block: cSize: %zu", cBlockSize);
+            cSize += cBlockSize;
+            ip += blockSize;
+            op += cBlockSize;
+            remaining -= blockSize;
+            dstCapacity -= cBlockSize;
+            continue;
+        }
+
+        compressedSeqsSize = ZSTD_entropyCompressSeqStore(&cctx->seqStore,
+                                &cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy,
+                                &cctx->appliedParams,
+                                op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize,
+                                blockSize,
+                                cctx->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
+                                cctx->bmi2);
+        FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed");
+        DEBUGLOG(4, "Compressed sequences size: %zu", compressedSeqsSize);
+
+        if (!cctx->isFirstBlock &&
+            ZSTD_maybeRLE(&cctx->seqStore) &&
+            ZSTD_isRLE((BYTE const*)src, srcSize)) {
+            /* We don't want to emit our first block as a RLE even if it qualifies because
+            * doing so will cause the decoder (cli only) to throw a "should consume all input error."
+            * This is only an issue for zstd <= v1.4.3
+            */
+            compressedSeqsSize = 1;
+        }
+
+        if (compressedSeqsSize == 0) {
+            /* ZSTD_noCompressBlock writes the block header as well */
+            cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
+            FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed");
+            DEBUGLOG(4, "Writing out nocompress block, size: %zu", cBlockSize);
+        } else if (compressedSeqsSize == 1) {
+            cBlockSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, blockSize, lastBlock);
+            FORWARD_IF_ERROR(cBlockSize, "RLE compress block failed");
+            DEBUGLOG(4, "Writing out RLE block, size: %zu", cBlockSize);
+        } else {
+            U32 cBlockHeader;
+            /* Error checking and repcodes update */
+            ZSTD_blockState_confirmRepcodesAndEntropyTables(&cctx->blockState);
+            if (cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
+                cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
+
+            /* Write block header into beginning of block*/
+            cBlockHeader = lastBlock + (((U32)bt_compressed)<<1) + (U32)(compressedSeqsSize << 3);
+            MEM_writeLE24(op, cBlockHeader);
+            cBlockSize = ZSTD_blockHeaderSize + compressedSeqsSize;
+            DEBUGLOG(4, "Writing out compressed block, size: %zu", cBlockSize);
+        }
+
+        cSize += cBlockSize;
+        DEBUGLOG(4, "cSize running total: %zu", cSize);
+
+        if (lastBlock) {
+            break;
+        } else {
+            ip += blockSize;
+            op += cBlockSize;
+            remaining -= blockSize;
+            dstCapacity -= cBlockSize;
+            cctx->isFirstBlock = 0;
+        }
+    }
+
+    return cSize;
+}
+
+size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstCapacity,
+                              const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
+                              const void* src, size_t srcSize) {
+    BYTE* op = (BYTE*)dst;
+    size_t cSize = 0;
+    size_t compressedBlocksSize = 0;
+    size_t frameHeaderSize = 0;
+
+    /* Transparent initialization stage, same as compressStream2() */
+    DEBUGLOG(3, "ZSTD_compressSequences()");
+    assert(cctx != NULL);
+    FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_end, srcSize), "CCtx initialization failed");
+    /* Begin writing output, starting with frame header */
+    frameHeaderSize = ZSTD_writeFrameHeader(op, dstCapacity, &cctx->appliedParams, srcSize, cctx->dictID);
+    op += frameHeaderSize;
+    dstCapacity -= frameHeaderSize;
+    cSize += frameHeaderSize;
+    if (cctx->appliedParams.fParams.checksumFlag && srcSize) {
+        XXH64_update(&cctx->xxhState, src, srcSize);
+    }
+    /* cSize includes block header size and compressed sequences size */
+    compressedBlocksSize = ZSTD_compressSequences_internal(cctx,
+                                                           op, dstCapacity,
+                                                           inSeqs, inSeqsSize,
+                                                           src, srcSize);
+    FORWARD_IF_ERROR(compressedBlocksSize, "Compressing blocks failed!");
+    cSize += compressedBlocksSize;
+    dstCapacity -= compressedBlocksSize;
+
+    if (cctx->appliedParams.fParams.checksumFlag) {
+        U32 const checksum = (U32) XXH64_digest(&cctx->xxhState);
+        RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum");
+        DEBUGLOG(4, "Write checksum : %08X", (unsigned)checksum);
+        MEM_writeLE32((char*)dst + cSize, checksum);
+        cSize += 4;
+    }
+
+    DEBUGLOG(3, "Final compressed size: %zu", cSize);
+    return cSize;
+}
+
+/*======   Finalize   ======*/
+
+/*! ZSTD_flushStream() :
+ * @return : amount of data remaining to flush */
+size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
+{
+    ZSTD_inBuffer input = { NULL, 0, 0 };
+    return ZSTD_compressStream2(zcs, output, &input, ZSTD_e_flush);
+}
+
+
+size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
+{
+    ZSTD_inBuffer input = { NULL, 0, 0 };
+    size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end);
+    FORWARD_IF_ERROR( remainingToFlush , "ZSTD_compressStream2 failed");
+    if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush;   /* minimal estimation */
+    /* single thread mode : attempt to calculate remaining to flush more precisely */
+    {   size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE;
+        size_t const checksumSize = (size_t)(zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4);
+        size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize;
+        DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush);
+        return toFlush;
+    }
+}
+
+
+/*-=====  Pre-defined compression levels  =====-*/
+
+#define ZSTD_MAX_CLEVEL     22
+int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }
+int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; }
+int ZSTD_defaultCLevel(void) { return ZSTD_CLEVEL_DEFAULT; }
+
+static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
+{   /* "default" - for any srcSize > 256 KB */
+    /* W,  C,  H,  S,  L, TL, strat */
+    { 19, 12, 13,  1,  6,  1, ZSTD_fast    },  /* base for negative levels */
+    { 19, 13, 14,  1,  7,  0, ZSTD_fast    },  /* level  1 */
+    { 20, 15, 16,  1,  6,  0, ZSTD_fast    },  /* level  2 */
+    { 21, 16, 17,  1,  5,  0, ZSTD_dfast   },  /* level  3 */
+    { 21, 18, 18,  1,  5,  0, ZSTD_dfast   },  /* level  4 */
+    { 21, 18, 19,  2,  5,  2, ZSTD_greedy  },  /* level  5 */
+    { 21, 19, 19,  3,  5,  4, ZSTD_greedy  },  /* level  6 */
+    { 21, 19, 19,  3,  5,  8, ZSTD_lazy    },  /* level  7 */
+    { 21, 19, 19,  3,  5, 16, ZSTD_lazy2   },  /* level  8 */
+    { 21, 19, 20,  4,  5, 16, ZSTD_lazy2   },  /* level  9 */
+    { 22, 20, 21,  4,  5, 16, ZSTD_lazy2   },  /* level 10 */
+    { 22, 21, 22,  4,  5, 16, ZSTD_lazy2   },  /* level 11 */
+    { 22, 21, 22,  5,  5, 16, ZSTD_lazy2   },  /* level 12 */
+    { 22, 21, 22,  5,  5, 32, ZSTD_btlazy2 },  /* level 13 */
+    { 22, 22, 23,  5,  5, 32, ZSTD_btlazy2 },  /* level 14 */
+    { 22, 23, 23,  6,  5, 32, ZSTD_btlazy2 },  /* level 15 */
+    { 22, 22, 22,  5,  5, 48, ZSTD_btopt   },  /* level 16 */
+    { 23, 23, 22,  5,  4, 64, ZSTD_btopt   },  /* level 17 */
+    { 23, 23, 22,  6,  3, 64, ZSTD_btultra },  /* level 18 */
+    { 23, 24, 22,  7,  3,256, ZSTD_btultra2},  /* level 19 */
+    { 25, 25, 23,  7,  3,256, ZSTD_btultra2},  /* level 20 */
+    { 26, 26, 24,  7,  3,512, ZSTD_btultra2},  /* level 21 */
+    { 27, 27, 25,  9,  3,999, ZSTD_btultra2},  /* level 22 */
+},
+{   /* for srcSize <= 256 KB */
+    /* W,  C,  H,  S,  L,  T, strat */
+    { 18, 12, 13,  1,  5,  1, ZSTD_fast    },  /* base for negative levels */
+    { 18, 13, 14,  1,  6,  0, ZSTD_fast    },  /* level  1 */
+    { 18, 14, 14,  1,  5,  0, ZSTD_dfast   },  /* level  2 */
+    { 18, 16, 16,  1,  4,  0, ZSTD_dfast   },  /* level  3 */
+    { 18, 16, 17,  2,  5,  2, ZSTD_greedy  },  /* level  4.*/
+    { 18, 18, 18,  3,  5,  2, ZSTD_greedy  },  /* level  5.*/
+    { 18, 18, 19,  3,  5,  4, ZSTD_lazy    },  /* level  6.*/
+    { 18, 18, 19,  4,  4,  4, ZSTD_lazy    },  /* level  7 */
+    { 18, 18, 19,  4,  4,  8, ZSTD_lazy2   },  /* level  8 */
+    { 18, 18, 19,  5,  4,  8, ZSTD_lazy2   },  /* level  9 */
+    { 18, 18, 19,  6,  4,  8, ZSTD_lazy2   },  /* level 10 */
+    { 18, 18, 19,  5,  4, 12, ZSTD_btlazy2 },  /* level 11.*/
+    { 18, 19, 19,  7,  4, 12, ZSTD_btlazy2 },  /* level 12.*/
+    { 18, 18, 19,  4,  4, 16, ZSTD_btopt   },  /* level 13 */
+    { 18, 18, 19,  4,  3, 32, ZSTD_btopt   },  /* level 14.*/
+    { 18, 18, 19,  6,  3,128, ZSTD_btopt   },  /* level 15.*/
+    { 18, 19, 19,  6,  3,128, ZSTD_btultra },  /* level 16.*/
+    { 18, 19, 19,  8,  3,256, ZSTD_btultra },  /* level 17.*/
+    { 18, 19, 19,  6,  3,128, ZSTD_btultra2},  /* level 18.*/
+    { 18, 19, 19,  8,  3,256, ZSTD_btultra2},  /* level 19.*/
+    { 18, 19, 19, 10,  3,512, ZSTD_btultra2},  /* level 20.*/
+    { 18, 19, 19, 12,  3,512, ZSTD_btultra2},  /* level 21.*/
+    { 18, 19, 19, 13,  3,999, ZSTD_btultra2},  /* level 22.*/
+},
+{   /* for srcSize <= 128 KB */
+    /* W,  C,  H,  S,  L,  T, strat */
+    { 17, 12, 12,  1,  5,  1, ZSTD_fast    },  /* base for negative levels */
+    { 17, 12, 13,  1,  6,  0, ZSTD_fast    },  /* level  1 */
+    { 17, 13, 15,  1,  5,  0, ZSTD_fast    },  /* level  2 */
+    { 17, 15, 16,  2,  5,  0, ZSTD_dfast   },  /* level  3 */
+    { 17, 17, 17,  2,  4,  0, ZSTD_dfast   },  /* level  4 */
+    { 17, 16, 17,  3,  4,  2, ZSTD_greedy  },  /* level  5 */
+    { 17, 17, 17,  3,  4,  4, ZSTD_lazy    },  /* level  6 */
+    { 17, 17, 17,  3,  4,  8, ZSTD_lazy2   },  /* level  7 */
+    { 17, 17, 17,  4,  4,  8, ZSTD_lazy2   },  /* level  8 */
+    { 17, 17, 17,  5,  4,  8, ZSTD_lazy2   },  /* level  9 */
+    { 17, 17, 17,  6,  4,  8, ZSTD_lazy2   },  /* level 10 */
+    { 17, 17, 17,  5,  4,  8, ZSTD_btlazy2 },  /* level 11 */
+    { 17, 18, 17,  7,  4, 12, ZSTD_btlazy2 },  /* level 12 */
+    { 17, 18, 17,  3,  4, 12, ZSTD_btopt   },  /* level 13.*/
+    { 17, 18, 17,  4,  3, 32, ZSTD_btopt   },  /* level 14.*/
+    { 17, 18, 17,  6,  3,256, ZSTD_btopt   },  /* level 15.*/
+    { 17, 18, 17,  6,  3,128, ZSTD_btultra },  /* level 16.*/
+    { 17, 18, 17,  8,  3,256, ZSTD_btultra },  /* level 17.*/
+    { 17, 18, 17, 10,  3,512, ZSTD_btultra },  /* level 18.*/
+    { 17, 18, 17,  5,  3,256, ZSTD_btultra2},  /* level 19.*/
+    { 17, 18, 17,  7,  3,512, ZSTD_btultra2},  /* level 20.*/
+    { 17, 18, 17,  9,  3,512, ZSTD_btultra2},  /* level 21.*/
+    { 17, 18, 17, 11,  3,999, ZSTD_btultra2},  /* level 22.*/
+},
+{   /* for srcSize <= 16 KB */
+    /* W,  C,  H,  S,  L,  T, strat */
+    { 14, 12, 13,  1,  5,  1, ZSTD_fast    },  /* base for negative levels */
+    { 14, 14, 15,  1,  5,  0, ZSTD_fast    },  /* level  1 */
+    { 14, 14, 15,  1,  4,  0, ZSTD_fast    },  /* level  2 */
+    { 14, 14, 15,  2,  4,  0, ZSTD_dfast   },  /* level  3 */
+    { 14, 14, 14,  4,  4,  2, ZSTD_greedy  },  /* level  4 */
+    { 14, 14, 14,  3,  4,  4, ZSTD_lazy    },  /* level  5.*/
+    { 14, 14, 14,  4,  4,  8, ZSTD_lazy2   },  /* level  6 */
+    { 14, 14, 14,  6,  4,  8, ZSTD_lazy2   },  /* level  7 */
+    { 14, 14, 14,  8,  4,  8, ZSTD_lazy2   },  /* level  8.*/
+    { 14, 15, 14,  5,  4,  8, ZSTD_btlazy2 },  /* level  9.*/
+    { 14, 15, 14,  9,  4,  8, ZSTD_btlazy2 },  /* level 10.*/
+    { 14, 15, 14,  3,  4, 12, ZSTD_btopt   },  /* level 11.*/
+    { 14, 15, 14,  4,  3, 24, ZSTD_btopt   },  /* level 12.*/
+    { 14, 15, 14,  5,  3, 32, ZSTD_btultra },  /* level 13.*/
+    { 14, 15, 15,  6,  3, 64, ZSTD_btultra },  /* level 14.*/
+    { 14, 15, 15,  7,  3,256, ZSTD_btultra },  /* level 15.*/
+    { 14, 15, 15,  5,  3, 48, ZSTD_btultra2},  /* level 16.*/
+    { 14, 15, 15,  6,  3,128, ZSTD_btultra2},  /* level 17.*/
+    { 14, 15, 15,  7,  3,256, ZSTD_btultra2},  /* level 18.*/
+    { 14, 15, 15,  8,  3,256, ZSTD_btultra2},  /* level 19.*/
+    { 14, 15, 15,  8,  3,512, ZSTD_btultra2},  /* level 20.*/
+    { 14, 15, 15,  9,  3,512, ZSTD_btultra2},  /* level 21.*/
+    { 14, 15, 15, 10,  3,999, ZSTD_btultra2},  /* level 22.*/
+},
+};
+
+static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(int const compressionLevel, size_t const dictSize)
+{
+    ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, 0, dictSize, ZSTD_cpm_createCDict);
+    switch (cParams.strategy) {
+        case ZSTD_fast:
+        case ZSTD_dfast:
+            break;
+        case ZSTD_greedy:
+        case ZSTD_lazy:
+        case ZSTD_lazy2:
+            cParams.hashLog += ZSTD_LAZY_DDSS_BUCKET_LOG;
+            break;
+        case ZSTD_btlazy2:
+        case ZSTD_btopt:
+        case ZSTD_btultra:
+        case ZSTD_btultra2:
+            break;
+    }
+    return cParams;
+}
+
+static int ZSTD_dedicatedDictSearch_isSupported(
+        ZSTD_compressionParameters const* cParams)
+{
+    return (cParams->strategy >= ZSTD_greedy)
+        && (cParams->strategy <= ZSTD_lazy2)
+        && (cParams->hashLog > cParams->chainLog)
+        && (cParams->chainLog <= 24);
+}
+
+/**
+ * Reverses the adjustment applied to cparams when enabling dedicated dict
+ * search. This is used to recover the params set to be used in the working
+ * context. (Otherwise, those tables would also grow.)
+ */
+static void ZSTD_dedicatedDictSearch_revertCParams(
+        ZSTD_compressionParameters* cParams) {
+    switch (cParams->strategy) {
+        case ZSTD_fast:
+        case ZSTD_dfast:
+            break;
+        case ZSTD_greedy:
+        case ZSTD_lazy:
+        case ZSTD_lazy2:
+            cParams->hashLog -= ZSTD_LAZY_DDSS_BUCKET_LOG;
+            if (cParams->hashLog < ZSTD_HASHLOG_MIN) {
+                cParams->hashLog = ZSTD_HASHLOG_MIN;
+            }
+            break;
+        case ZSTD_btlazy2:
+        case ZSTD_btopt:
+        case ZSTD_btultra:
+        case ZSTD_btultra2:
+            break;
+    }
+}
+
+static U64 ZSTD_getCParamRowSize(U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode)
+{
+    switch (mode) {
+    case ZSTD_cpm_unknown:
+    case ZSTD_cpm_noAttachDict:
+    case ZSTD_cpm_createCDict:
+        break;
+    case ZSTD_cpm_attachDict:
+        dictSize = 0;
+        break;
+    default:
+        assert(0);
+        break;
+    }
+    {   int const unknown = srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN;
+        size_t const addedSize = unknown && dictSize > 0 ? 500 : 0;
+        return unknown && dictSize == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : srcSizeHint+dictSize+addedSize;
+    }
+}
+
+/*! ZSTD_getCParams_internal() :
+ * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize.
+ *  Note: srcSizeHint 0 means 0, use ZSTD_CONTENTSIZE_UNKNOWN for unknown.
+ *        Use dictSize == 0 for unknown or unused.
+ *  Note: `mode` controls how we treat the `dictSize`. See docs for `ZSTD_cParamMode_e`. */
+static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode)
+{
+    U64 const rSize = ZSTD_getCParamRowSize(srcSizeHint, dictSize, mode);
+    U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB);
+    int row;
+    DEBUGLOG(5, "ZSTD_getCParams_internal (cLevel=%i)", compressionLevel);
+
+    /* row */
+    if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT;   /* 0 == default */
+    else if (compressionLevel < 0) row = 0;   /* entry 0 is baseline for fast mode */
+    else if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL;
+    else row = compressionLevel;
+
+    {   ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row];
+        DEBUGLOG(5, "ZSTD_getCParams_internal selected tableID: %u row: %u strat: %u", tableID, row, (U32)cp.strategy);
+        /* acceleration factor */
+        if (compressionLevel < 0) {
+            int const clampedCompressionLevel = MAX(ZSTD_minCLevel(), compressionLevel);
+            cp.targetLength = (unsigned)(-clampedCompressionLevel);
+        }
+        /* refine parameters based on srcSize & dictSize */
+        return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode);
+    }
+}
+
+/*! ZSTD_getCParams() :
+ * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize.
+ *  Size values are optional, provide 0 if not known or unused */
+ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize)
+{
+    if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN;
+    return ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown);
+}
+
+/*! ZSTD_getParams() :
+ *  same idea as ZSTD_getCParams()
+ * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`).
+ *  Fields of `ZSTD_frameParameters` are set to default values */
+static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) {
+    ZSTD_parameters params;
+    ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, mode);
+    DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel);
+    ZSTD_memset(&params, 0, sizeof(params));
+    params.cParams = cParams;
+    params.fParams.contentSizeFlag = 1;
+    return params;
+}
+
+/*! ZSTD_getParams() :
+ *  same idea as ZSTD_getCParams()
+ * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`).
+ *  Fields of `ZSTD_frameParameters` are set to default values */
+ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) {
+    if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN;
+    return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown);
+}
+/**** ended inlining compress/zstd_compress.c ****/
+/**** start inlining compress/zstd_double_fast.c ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/**** skipping file: zstd_compress_internal.h ****/
+/**** skipping file: zstd_double_fast.h ****/
+
+
+void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
+                              void const* end, ZSTD_dictTableLoadMethod_e dtlm)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32* const hashLarge = ms->hashTable;
+    U32  const hBitsL = cParams->hashLog;
+    U32  const mls = cParams->minMatch;
+    U32* const hashSmall = ms->chainTable;
+    U32  const hBitsS = cParams->chainLog;
+    const BYTE* const base = ms->window.base;
+    const BYTE* ip = base + ms->nextToUpdate;
+    const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
+    const U32 fastHashFillStep = 3;
+
+    /* Always insert every fastHashFillStep position into the hash tables.
+     * Insert the other positions into the large hash table if their entry
+     * is empty.
+     */
+    for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
+        U32 const curr = (U32)(ip - base);
+        U32 i;
+        for (i = 0; i < fastHashFillStep; ++i) {
+            size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls);
+            size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8);
+            if (i == 0)
+                hashSmall[smHash] = curr + i;
+            if (i == 0 || hashLarge[lgHash] == 0)
+                hashLarge[lgHash] = curr + i;
+            /* Only load extra positions for ZSTD_dtlm_full */
+            if (dtlm == ZSTD_dtlm_fast)
+                break;
+    }   }
+}
+
+
+FORCE_INLINE_TEMPLATE
+size_t ZSTD_compressBlock_doubleFast_generic(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize,
+        U32 const mls /* template */, ZSTD_dictMode_e const dictMode)
+{
+    ZSTD_compressionParameters const* cParams = &ms->cParams;
+    U32* const hashLong = ms->hashTable;
+    const U32 hBitsL = cParams->hashLog;
+    U32* const hashSmall = ms->chainTable;
+    const U32 hBitsS = cParams->chainLog;
+    const BYTE* const base = ms->window.base;
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
+    /* presumes that, if there is a dictionary, it must be using Attach mode */
+    const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
+    const BYTE* const prefixLowest = base + prefixLowestIndex;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - HASH_READ_SIZE;
+    U32 offset_1=rep[0], offset_2=rep[1];
+    U32 offsetSaved = 0;
+
+    const ZSTD_matchState_t* const dms = ms->dictMatchState;
+    const ZSTD_compressionParameters* const dictCParams =
+                                     dictMode == ZSTD_dictMatchState ?
+                                     &dms->cParams : NULL;
+    const U32* const dictHashLong  = dictMode == ZSTD_dictMatchState ?
+                                     dms->hashTable : NULL;
+    const U32* const dictHashSmall = dictMode == ZSTD_dictMatchState ?
+                                     dms->chainTable : NULL;
+    const U32 dictStartIndex       = dictMode == ZSTD_dictMatchState ?
+                                     dms->window.dictLimit : 0;
+    const BYTE* const dictBase     = dictMode == ZSTD_dictMatchState ?
+                                     dms->window.base : NULL;
+    const BYTE* const dictStart    = dictMode == ZSTD_dictMatchState ?
+                                     dictBase + dictStartIndex : NULL;
+    const BYTE* const dictEnd      = dictMode == ZSTD_dictMatchState ?
+                                     dms->window.nextSrc : NULL;
+    const U32 dictIndexDelta       = dictMode == ZSTD_dictMatchState ?
+                                     prefixLowestIndex - (U32)(dictEnd - dictBase) :
+                                     0;
+    const U32 dictHBitsL           = dictMode == ZSTD_dictMatchState ?
+                                     dictCParams->hashLog : hBitsL;
+    const U32 dictHBitsS           = dictMode == ZSTD_dictMatchState ?
+                                     dictCParams->chainLog : hBitsS;
+    const U32 dictAndPrefixLength  = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
+
+    DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic");
+
+    assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState);
+
+    /* if a dictionary is attached, it must be within window range */
+    if (dictMode == ZSTD_dictMatchState) {
+        assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
+    }
+
+    /* init */
+    ip += (dictAndPrefixLength == 0);
+    if (dictMode == ZSTD_noDict) {
+        U32 const curr = (U32)(ip - base);
+        U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
+        U32 const maxRep = curr - windowLow;
+        if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
+        if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
+    }
+    if (dictMode == ZSTD_dictMatchState) {
+        /* dictMatchState repCode checks don't currently handle repCode == 0
+         * disabling. */
+        assert(offset_1 <= dictAndPrefixLength);
+        assert(offset_2 <= dictAndPrefixLength);
+    }
+
+    /* Main Search Loop */
+    while (ip < ilimit) {   /* < instead of <=, because repcode check at (ip+1) */
+        size_t mLength;
+        U32 offset;
+        size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
+        size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
+        size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8);
+        size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls);
+        U32 const curr = (U32)(ip-base);
+        U32 const matchIndexL = hashLong[h2];
+        U32 matchIndexS = hashSmall[h];
+        const BYTE* matchLong = base + matchIndexL;
+        const BYTE* match = base + matchIndexS;
+        const U32 repIndex = curr + 1 - offset_1;
+        const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
+                            && repIndex < prefixLowestIndex) ?
+                               dictBase + (repIndex - dictIndexDelta) :
+                               base + repIndex;
+        hashLong[h2] = hashSmall[h] = curr;   /* update hash tables */
+
+        /* check dictMatchState repcode */
+        if (dictMode == ZSTD_dictMatchState
+            && ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
+            && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
+            const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
+            mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
+            ip++;
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
+            goto _match_stored;
+        }
+
+        /* check noDict repcode */
+        if ( dictMode == ZSTD_noDict
+          && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
+            mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
+            ip++;
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
+            goto _match_stored;
+        }
+
+        if (matchIndexL > prefixLowestIndex) {
+            /* check prefix long match */
+            if (MEM_read64(matchLong) == MEM_read64(ip)) {
+                mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;
+                offset = (U32)(ip-matchLong);
+                while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
+                goto _match_found;
+            }
+        } else if (dictMode == ZSTD_dictMatchState) {
+            /* check dictMatchState long match */
+            U32 const dictMatchIndexL = dictHashLong[dictHL];
+            const BYTE* dictMatchL = dictBase + dictMatchIndexL;
+            assert(dictMatchL < dictEnd);
+
+            if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) {
+                mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8;
+                offset = (U32)(curr - dictMatchIndexL - dictIndexDelta);
+                while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
+                goto _match_found;
+        }   }
+
+        if (matchIndexS > prefixLowestIndex) {
+            /* check prefix short match */
+            if (MEM_read32(match) == MEM_read32(ip)) {
+                goto _search_next_long;
+            }
+        } else if (dictMode == ZSTD_dictMatchState) {
+            /* check dictMatchState short match */
+            U32 const dictMatchIndexS = dictHashSmall[dictHS];
+            match = dictBase + dictMatchIndexS;
+            matchIndexS = dictMatchIndexS + dictIndexDelta;
+
+            if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) {
+                goto _search_next_long;
+        }   }
+
+        ip += ((ip-anchor) >> kSearchStrength) + 1;
+#if defined(__aarch64__)
+        PREFETCH_L1(ip+256);
+#endif
+        continue;
+
+_search_next_long:
+
+        {   size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
+            size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
+            U32 const matchIndexL3 = hashLong[hl3];
+            const BYTE* matchL3 = base + matchIndexL3;
+            hashLong[hl3] = curr + 1;
+
+            /* check prefix long +1 match */
+            if (matchIndexL3 > prefixLowestIndex) {
+                if (MEM_read64(matchL3) == MEM_read64(ip+1)) {
+                    mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8;
+                    ip++;
+                    offset = (U32)(ip-matchL3);
+                    while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
+                    goto _match_found;
+                }
+            } else if (dictMode == ZSTD_dictMatchState) {
+                /* check dict long +1 match */
+                U32 const dictMatchIndexL3 = dictHashLong[dictHLNext];
+                const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3;
+                assert(dictMatchL3 < dictEnd);
+                if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
+                    mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8;
+                    ip++;
+                    offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta);
+                    while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
+                    goto _match_found;
+        }   }   }
+
+        /* if no long +1 match, explore the short match we found */
+        if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) {
+            mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4;
+            offset = (U32)(curr - matchIndexS);
+            while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
+        } else {
+            mLength = ZSTD_count(ip+4, match+4, iend) + 4;
+            offset = (U32)(ip - match);
+            while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
+        }
+
+        /* fall-through */
+
+_match_found:
+        offset_2 = offset_1;
+        offset_1 = offset;
+
+        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+
+_match_stored:
+        /* match found */
+        ip += mLength;
+        anchor = ip;
+
+        if (ip <= ilimit) {
+            /* Complementary insertion */
+            /* done after iLimit test, as candidates could be > iend-8 */
+            {   U32 const indexToInsert = curr+2;
+                hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
+                hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
+                hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
+                hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
+            }
+
+            /* check immediate repcode */
+            if (dictMode == ZSTD_dictMatchState) {
+                while (ip <= ilimit) {
+                    U32 const current2 = (U32)(ip-base);
+                    U32 const repIndex2 = current2 - offset_2;
+                    const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState
+                        && repIndex2 < prefixLowestIndex ?
+                            dictBase + repIndex2 - dictIndexDelta :
+                            base + repIndex2;
+                    if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
+                       && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
+                        const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
+                        size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
+                        U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
+                        ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
+                        hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
+                        hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
+                        ip += repLength2;
+                        anchor = ip;
+                        continue;
+                    }
+                    break;
+            }   }
+
+            if (dictMode == ZSTD_noDict) {
+                while ( (ip <= ilimit)
+                     && ( (offset_2>0)
+                        & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
+                    /* store sequence */
+                    size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
+                    U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff;  /* swap offset_2 <=> offset_1 */
+                    hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
+                    hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH);
+                    ip += rLength;
+                    anchor = ip;
+                    continue;   /* faster when present ... (?) */
+        }   }   }
+    }   /* while (ip < ilimit) */
+
+    /* save reps for next block */
+    rep[0] = offset_1 ? offset_1 : offsetSaved;
+    rep[1] = offset_2 ? offset_2 : offsetSaved;
+
+    /* Return the last literals size */
+    return (size_t)(iend - anchor);
+}
+
+
+size_t ZSTD_compressBlock_doubleFast(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    const U32 mls = ms->cParams.minMatch;
+    switch(mls)
+    {
+    default: /* includes case 3 */
+    case 4 :
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict);
+    case 5 :
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict);
+    case 6 :
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict);
+    case 7 :
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict);
+    }
+}
+
+
+size_t ZSTD_compressBlock_doubleFast_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    const U32 mls = ms->cParams.minMatch;
+    switch(mls)
+    {
+    default: /* includes case 3 */
+    case 4 :
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState);
+    case 5 :
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState);
+    case 6 :
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState);
+    case 7 :
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState);
+    }
+}
+
+
+static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize,
+        U32 const mls /* template */)
+{
+    ZSTD_compressionParameters const* cParams = &ms->cParams;
+    U32* const hashLong = ms->hashTable;
+    U32  const hBitsL = cParams->hashLog;
+    U32* const hashSmall = ms->chainTable;
+    U32  const hBitsS = cParams->chainLog;
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - 8;
+    const BYTE* const base = ms->window.base;
+    const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
+    const U32   lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
+    const U32   dictStartIndex = lowLimit;
+    const U32   dictLimit = ms->window.dictLimit;
+    const U32   prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit;
+    const BYTE* const prefixStart = base + prefixStartIndex;
+    const BYTE* const dictBase = ms->window.dictBase;
+    const BYTE* const dictStart = dictBase + dictStartIndex;
+    const BYTE* const dictEnd = dictBase + prefixStartIndex;
+    U32 offset_1=rep[0], offset_2=rep[1];
+
+    DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize);
+
+    /* if extDict is invalidated due to maxDistance, switch to "regular" variant */
+    if (prefixStartIndex == dictStartIndex)
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict);
+
+    /* Search Loop */
+    while (ip < ilimit) {  /* < instead of <=, because (ip+1) */
+        const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
+        const U32 matchIndex = hashSmall[hSmall];
+        const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
+        const BYTE* match = matchBase + matchIndex;
+
+        const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8);
+        const U32 matchLongIndex = hashLong[hLong];
+        const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base;
+        const BYTE* matchLong = matchLongBase + matchLongIndex;
+
+        const U32 curr = (U32)(ip-base);
+        const U32 repIndex = curr + 1 - offset_1;   /* offset_1 expected <= curr +1 */
+        const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
+        const BYTE* const repMatch = repBase + repIndex;
+        size_t mLength;
+        hashSmall[hSmall] = hashLong[hLong] = curr;   /* update hash table */
+
+        if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
+            & (offset_1 < curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */
+          && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
+            const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
+            mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
+            ip++;
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
+        } else {
+            if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
+                const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
+                const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart;
+                U32 offset;
+                mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8;
+                offset = curr - matchLongIndex;
+                while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; }   /* catch up */
+                offset_2 = offset_1;
+                offset_1 = offset;
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+
+            } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
+                size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
+                U32 const matchIndex3 = hashLong[h3];
+                const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base;
+                const BYTE* match3 = match3Base + matchIndex3;
+                U32 offset;
+                hashLong[h3] = curr + 1;
+                if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
+                    const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend;
+                    const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart;
+                    mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8;
+                    ip++;
+                    offset = curr+1 - matchIndex3;
+                    while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
+                } else {
+                    const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
+                    const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
+                    mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
+                    offset = curr - matchIndex;
+                    while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; }   /* catch up */
+                }
+                offset_2 = offset_1;
+                offset_1 = offset;
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+
+            } else {
+                ip += ((ip-anchor) >> kSearchStrength) + 1;
+                continue;
+        }   }
+
+        /* move to next sequence start */
+        ip += mLength;
+        anchor = ip;
+
+        if (ip <= ilimit) {
+            /* Complementary insertion */
+            /* done after iLimit test, as candidates could be > iend-8 */
+            {   U32 const indexToInsert = curr+2;
+                hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
+                hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
+                hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
+                hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
+            }
+
+            /* check immediate repcode */
+            while (ip <= ilimit) {
+                U32 const current2 = (U32)(ip-base);
+                U32 const repIndex2 = current2 - offset_2;
+                const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
+                if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3)   /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */
+                    & (offset_2 < current2 - dictStartIndex))
+                  && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
+                    const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
+                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
+                    U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
+                    hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
+                    hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
+                    ip += repLength2;
+                    anchor = ip;
+                    continue;
+                }
+                break;
+    }   }   }
+
+    /* save reps for next block */
+    rep[0] = offset_1;
+    rep[1] = offset_2;
+
+    /* Return the last literals size */
+    return (size_t)(iend - anchor);
+}
+
+
+size_t ZSTD_compressBlock_doubleFast_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    U32 const mls = ms->cParams.minMatch;
+    switch(mls)
+    {
+    default: /* includes case 3 */
+    case 4 :
+        return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 4);
+    case 5 :
+        return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 5);
+    case 6 :
+        return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 6);
+    case 7 :
+        return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 7);
+    }
+}
+/**** ended inlining compress/zstd_double_fast.c ****/
+/**** start inlining compress/zstd_fast.c ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/**** skipping file: zstd_compress_internal.h ****/
+/**** skipping file: zstd_fast.h ****/
+
+
+void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
+                        const void* const end,
+                        ZSTD_dictTableLoadMethod_e dtlm)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32* const hashTable = ms->hashTable;
+    U32  const hBits = cParams->hashLog;
+    U32  const mls = cParams->minMatch;
+    const BYTE* const base = ms->window.base;
+    const BYTE* ip = base + ms->nextToUpdate;
+    const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
+    const U32 fastHashFillStep = 3;
+
+    /* Always insert every fastHashFillStep position into the hash table.
+     * Insert the other positions if their hash entry is empty.
+     */
+    for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) {
+        U32 const curr = (U32)(ip - base);
+        size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls);
+        hashTable[hash0] = curr;
+        if (dtlm == ZSTD_dtlm_fast) continue;
+        /* Only load extra positions for ZSTD_dtlm_full */
+        {   U32 p;
+            for (p = 1; p < fastHashFillStep; ++p) {
+                size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls);
+                if (hashTable[hash] == 0) {  /* not yet filled */
+                    hashTable[hash] = curr + p;
+    }   }   }   }
+}
+
+
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_compressBlock_fast_generic(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize,
+        U32 const mls)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32* const hashTable = ms->hashTable;
+    U32 const hlog = cParams->hashLog;
+    /* support stepSize of 0 */
+    size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1;
+    const BYTE* const base = ms->window.base;
+    const BYTE* const istart = (const BYTE*)src;
+    /* We check ip0 (ip + 0) and ip1 (ip + 1) each loop */
+    const BYTE* ip0 = istart;
+    const BYTE* ip1;
+    const BYTE* anchor = istart;
+    const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
+    const U32   prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
+    const BYTE* const prefixStart = base + prefixStartIndex;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - HASH_READ_SIZE;
+    U32 offset_1=rep[0], offset_2=rep[1];
+    U32 offsetSaved = 0;
+
+    /* init */
+    DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
+    ip0 += (ip0 == prefixStart);
+    ip1 = ip0 + 1;
+    {   U32 const curr = (U32)(ip0 - base);
+        U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
+        U32 const maxRep = curr - windowLow;
+        if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
+        if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
+    }
+
+    /* Main Search Loop */
+#ifdef __INTEL_COMPILER
+    /* From intel 'The vector pragma indicates that the loop should be
+     * vectorized if it is legal to do so'. Can be used together with
+     * #pragma ivdep (but have opted to exclude that because intel
+     * warns against using it).*/
+    #pragma vector always
+#endif
+    while (ip1 < ilimit) {   /* < instead of <=, because check at ip0+2 */
+        size_t mLength;
+        BYTE const* ip2 = ip0 + 2;
+        size_t const h0 = ZSTD_hashPtr(ip0, hlog, mls);
+        U32 const val0 = MEM_read32(ip0);
+        size_t const h1 = ZSTD_hashPtr(ip1, hlog, mls);
+        U32 const val1 = MEM_read32(ip1);
+        U32 const current0 = (U32)(ip0-base);
+        U32 const current1 = (U32)(ip1-base);
+        U32 const matchIndex0 = hashTable[h0];
+        U32 const matchIndex1 = hashTable[h1];
+        BYTE const* repMatch = ip2 - offset_1;
+        const BYTE* match0 = base + matchIndex0;
+        const BYTE* match1 = base + matchIndex1;
+        U32 offcode;
+
+#if defined(__aarch64__)
+        PREFETCH_L1(ip0+256);
+#endif
+
+        hashTable[h0] = current0;   /* update hash table */
+        hashTable[h1] = current1;   /* update hash table */
+
+        assert(ip0 + 1 == ip1);
+
+        if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) {
+            mLength = (ip2[-1] == repMatch[-1]) ? 1 : 0;
+            ip0 = ip2 - mLength;
+            match0 = repMatch - mLength;
+            mLength += 4;
+            offcode = 0;
+            goto _match;
+        }
+        if ((matchIndex0 > prefixStartIndex) && MEM_read32(match0) == val0) {
+            /* found a regular match */
+            goto _offset;
+        }
+        if ((matchIndex1 > prefixStartIndex) && MEM_read32(match1) == val1) {
+            /* found a regular match after one literal */
+            ip0 = ip1;
+            match0 = match1;
+            goto _offset;
+        }
+        {   size_t const step = ((size_t)(ip0-anchor) >> (kSearchStrength - 1)) + stepSize;
+            assert(step >= 2);
+            ip0 += step;
+            ip1 += step;
+            continue;
+        }
+_offset: /* Requires: ip0, match0 */
+        /* Compute the offset code */
+        offset_2 = offset_1;
+        offset_1 = (U32)(ip0-match0);
+        offcode = offset_1 + ZSTD_REP_MOVE;
+        mLength = 4;
+        /* Count the backwards match length */
+        while (((ip0>anchor) & (match0>prefixStart))
+             && (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */
+
+_match: /* Requires: ip0, match0, offcode */
+        /* Count the forward length */
+        mLength += ZSTD_count(ip0+mLength, match0+mLength, iend);
+        ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH);
+        /* match found */
+        ip0 += mLength;
+        anchor = ip0;
+
+        if (ip0 <= ilimit) {
+            /* Fill Table */
+            assert(base+current0+2 > istart);  /* check base overflow */
+            hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2;  /* here because current+2 could be > iend-8 */
+            hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
+
+            if (offset_2 > 0) { /* offset_2==0 means offset_2 is invalidated */
+                while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) {
+                    /* store sequence */
+                    size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4;
+                    { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
+                    hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
+                    ip0 += rLength;
+                    ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH);
+                    anchor = ip0;
+                    continue;   /* faster when present (confirmed on gcc-8) ... (?) */
+        }   }   }
+        ip1 = ip0 + 1;
+    }
+
+    /* save reps for next block */
+    rep[0] = offset_1 ? offset_1 : offsetSaved;
+    rep[1] = offset_2 ? offset_2 : offsetSaved;
+
+    /* Return the last literals size */
+    return (size_t)(iend - anchor);
+}
+
+
+size_t ZSTD_compressBlock_fast(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    U32 const mls = ms->cParams.minMatch;
+    assert(ms->dictMatchState == NULL);
+    switch(mls)
+    {
+    default: /* includes case 3 */
+    case 4 :
+        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4);
+    case 5 :
+        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5);
+    case 6 :
+        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6);
+    case 7 :
+        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7);
+    }
+}
+
+FORCE_INLINE_TEMPLATE
+size_t ZSTD_compressBlock_fast_dictMatchState_generic(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize, U32 const mls)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32* const hashTable = ms->hashTable;
+    U32 const hlog = cParams->hashLog;
+    /* support stepSize of 0 */
+    U32 const stepSize = cParams->targetLength + !(cParams->targetLength);
+    const BYTE* const base = ms->window.base;
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const U32   prefixStartIndex = ms->window.dictLimit;
+    const BYTE* const prefixStart = base + prefixStartIndex;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - HASH_READ_SIZE;
+    U32 offset_1=rep[0], offset_2=rep[1];
+    U32 offsetSaved = 0;
+
+    const ZSTD_matchState_t* const dms = ms->dictMatchState;
+    const ZSTD_compressionParameters* const dictCParams = &dms->cParams ;
+    const U32* const dictHashTable = dms->hashTable;
+    const U32 dictStartIndex       = dms->window.dictLimit;
+    const BYTE* const dictBase     = dms->window.base;
+    const BYTE* const dictStart    = dictBase + dictStartIndex;
+    const BYTE* const dictEnd      = dms->window.nextSrc;
+    const U32 dictIndexDelta       = prefixStartIndex - (U32)(dictEnd - dictBase);
+    const U32 dictAndPrefixLength  = (U32)(ip - prefixStart + dictEnd - dictStart);
+    const U32 dictHLog             = dictCParams->hashLog;
+
+    /* if a dictionary is still attached, it necessarily means that
+     * it is within window size. So we just check it. */
+    const U32 maxDistance = 1U << cParams->windowLog;
+    const U32 endIndex = (U32)((size_t)(ip - base) + srcSize);
+    assert(endIndex - prefixStartIndex <= maxDistance);
+    (void)maxDistance; (void)endIndex;   /* these variables are not used when assert() is disabled */
+
+    /* ensure there will be no underflow
+     * when translating a dict index into a local index */
+    assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
+
+    /* init */
+    DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic");
+    ip += (dictAndPrefixLength == 0);
+    /* dictMatchState repCode checks don't currently handle repCode == 0
+     * disabling. */
+    assert(offset_1 <= dictAndPrefixLength);
+    assert(offset_2 <= dictAndPrefixLength);
+
+    /* Main Search Loop */
+    while (ip < ilimit) {   /* < instead of <=, because repcode check at (ip+1) */
+        size_t mLength;
+        size_t const h = ZSTD_hashPtr(ip, hlog, mls);
+        U32 const curr = (U32)(ip-base);
+        U32 const matchIndex = hashTable[h];
+        const BYTE* match = base + matchIndex;
+        const U32 repIndex = curr + 1 - offset_1;
+        const BYTE* repMatch = (repIndex < prefixStartIndex) ?
+                               dictBase + (repIndex - dictIndexDelta) :
+                               base + repIndex;
+        hashTable[h] = curr;   /* update hash table */
+
+        if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
+          && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
+            const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
+            mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
+            ip++;
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
+        } else if ( (matchIndex <= prefixStartIndex) ) {
+            size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
+            U32 const dictMatchIndex = dictHashTable[dictHash];
+            const BYTE* dictMatch = dictBase + dictMatchIndex;
+            if (dictMatchIndex <= dictStartIndex ||
+                MEM_read32(dictMatch) != MEM_read32(ip)) {
+                assert(stepSize >= 1);
+                ip += ((ip-anchor) >> kSearchStrength) + stepSize;
+                continue;
+            } else {
+                /* found a dict match */
+                U32 const offset = (U32)(curr-dictMatchIndex-dictIndexDelta);
+                mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4;
+                while (((ip>anchor) & (dictMatch>dictStart))
+                     && (ip[-1] == dictMatch[-1])) {
+                    ip--; dictMatch--; mLength++;
+                } /* catch up */
+                offset_2 = offset_1;
+                offset_1 = offset;
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+            }
+        } else if (MEM_read32(match) != MEM_read32(ip)) {
+            /* it's not a match, and we're not going to check the dictionary */
+            assert(stepSize >= 1);
+            ip += ((ip-anchor) >> kSearchStrength) + stepSize;
+            continue;
+        } else {
+            /* found a regular match */
+            U32 const offset = (U32)(ip-match);
+            mLength = ZSTD_count(ip+4, match+4, iend) + 4;
+            while (((ip>anchor) & (match>prefixStart))
+                 && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
+            offset_2 = offset_1;
+            offset_1 = offset;
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+        }
+
+        /* match found */
+        ip += mLength;
+        anchor = ip;
+
+        if (ip <= ilimit) {
+            /* Fill Table */
+            assert(base+curr+2 > istart);  /* check base overflow */
+            hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2;  /* here because curr+2 could be > iend-8 */
+            hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
+
+            /* check immediate repcode */
+            while (ip <= ilimit) {
+                U32 const current2 = (U32)(ip-base);
+                U32 const repIndex2 = current2 - offset_2;
+                const BYTE* repMatch2 = repIndex2 < prefixStartIndex ?
+                        dictBase - dictIndexDelta + repIndex2 :
+                        base + repIndex2;
+                if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
+                   && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
+                    const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
+                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
+                    U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
+                    hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
+                    ip += repLength2;
+                    anchor = ip;
+                    continue;
+                }
+                break;
+            }
+        }
+    }
+
+    /* save reps for next block */
+    rep[0] = offset_1 ? offset_1 : offsetSaved;
+    rep[1] = offset_2 ? offset_2 : offsetSaved;
+
+    /* Return the last literals size */
+    return (size_t)(iend - anchor);
+}
+
+size_t ZSTD_compressBlock_fast_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    U32 const mls = ms->cParams.minMatch;
+    assert(ms->dictMatchState != NULL);
+    switch(mls)
+    {
+    default: /* includes case 3 */
+    case 4 :
+        return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 4);
+    case 5 :
+        return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 5);
+    case 6 :
+        return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 6);
+    case 7 :
+        return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 7);
+    }
+}
+
+
+static size_t ZSTD_compressBlock_fast_extDict_generic(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize, U32 const mls)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32* const hashTable = ms->hashTable;
+    U32 const hlog = cParams->hashLog;
+    /* support stepSize of 0 */
+    U32 const stepSize = cParams->targetLength + !(cParams->targetLength);
+    const BYTE* const base = ms->window.base;
+    const BYTE* const dictBase = ms->window.dictBase;
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
+    const U32   lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
+    const U32   dictStartIndex = lowLimit;
+    const BYTE* const dictStart = dictBase + dictStartIndex;
+    const U32   dictLimit = ms->window.dictLimit;
+    const U32   prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit;
+    const BYTE* const prefixStart = base + prefixStartIndex;
+    const BYTE* const dictEnd = dictBase + prefixStartIndex;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - 8;
+    U32 offset_1=rep[0], offset_2=rep[1];
+
+    DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1);
+
+    /* switch to "regular" variant if extDict is invalidated due to maxDistance */
+    if (prefixStartIndex == dictStartIndex)
+        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls);
+
+    /* Search Loop */
+    while (ip < ilimit) {  /* < instead of <=, because (ip+1) */
+        const size_t h = ZSTD_hashPtr(ip, hlog, mls);
+        const U32    matchIndex = hashTable[h];
+        const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
+        const BYTE*  match = matchBase + matchIndex;
+        const U32    curr = (U32)(ip-base);
+        const U32    repIndex = curr + 1 - offset_1;
+        const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
+        const BYTE* const repMatch = repBase + repIndex;
+        hashTable[h] = curr;   /* update hash table */
+        DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr);
+
+        if ( ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */
+             & (offset_1 < curr+1 - dictStartIndex) ) /* note: we are searching at curr+1 */
+           && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
+            const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
+            size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
+            ip++;
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength-MINMATCH);
+            ip += rLength;
+            anchor = ip;
+        } else {
+            if ( (matchIndex < dictStartIndex) ||
+                 (MEM_read32(match) != MEM_read32(ip)) ) {
+                assert(stepSize >= 1);
+                ip += ((ip-anchor) >> kSearchStrength) + stepSize;
+                continue;
+            }
+            {   const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
+                const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
+                U32 const offset = curr - matchIndex;
+                size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
+                while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; }   /* catch up */
+                offset_2 = offset_1; offset_1 = offset;  /* update offset history */
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+                ip += mLength;
+                anchor = ip;
+        }   }
+
+        if (ip <= ilimit) {
+            /* Fill Table */
+            hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2;
+            hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
+            /* check immediate repcode */
+            while (ip <= ilimit) {
+                U32 const current2 = (U32)(ip-base);
+                U32 const repIndex2 = current2 - offset_2;
+                const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
+                if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 < curr - dictStartIndex))  /* intentional overflow */
+                   && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
+                    const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
+                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
+                    { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; }  /* swap offset_2 <=> offset_1 */
+                    ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2-MINMATCH);
+                    hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
+                    ip += repLength2;
+                    anchor = ip;
+                    continue;
+                }
+                break;
+    }   }   }
+
+    /* save reps for next block */
+    rep[0] = offset_1;
+    rep[1] = offset_2;
+
+    /* Return the last literals size */
+    return (size_t)(iend - anchor);
+}
+
+
+size_t ZSTD_compressBlock_fast_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    U32 const mls = ms->cParams.minMatch;
+    switch(mls)
+    {
+    default: /* includes case 3 */
+    case 4 :
+        return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 4);
+    case 5 :
+        return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 5);
+    case 6 :
+        return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 6);
+    case 7 :
+        return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 7);
+    }
+}
+/**** ended inlining compress/zstd_fast.c ****/
+/**** start inlining compress/zstd_lazy.c ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/**** skipping file: zstd_compress_internal.h ****/
+/**** skipping file: zstd_lazy.h ****/
+
+
+/*-*************************************
+*  Binary Tree search
+***************************************/
+
+static void
+ZSTD_updateDUBT(ZSTD_matchState_t* ms,
+                const BYTE* ip, const BYTE* iend,
+                U32 mls)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32* const hashTable = ms->hashTable;
+    U32  const hashLog = cParams->hashLog;
+
+    U32* const bt = ms->chainTable;
+    U32  const btLog  = cParams->chainLog - 1;
+    U32  const btMask = (1 << btLog) - 1;
+
+    const BYTE* const base = ms->window.base;
+    U32 const target = (U32)(ip - base);
+    U32 idx = ms->nextToUpdate;
+
+    if (idx != target)
+        DEBUGLOG(7, "ZSTD_updateDUBT, from %u to %u (dictLimit:%u)",
+                    idx, target, ms->window.dictLimit);
+    assert(ip + 8 <= iend);   /* condition for ZSTD_hashPtr */
+    (void)iend;
+
+    assert(idx >= ms->window.dictLimit);   /* condition for valid base+idx */
+    for ( ; idx < target ; idx++) {
+        size_t const h  = ZSTD_hashPtr(base + idx, hashLog, mls);   /* assumption : ip + 8 <= iend */
+        U32    const matchIndex = hashTable[h];
+
+        U32*   const nextCandidatePtr = bt + 2*(idx&btMask);
+        U32*   const sortMarkPtr  = nextCandidatePtr + 1;
+
+        DEBUGLOG(8, "ZSTD_updateDUBT: insert %u", idx);
+        hashTable[h] = idx;   /* Update Hash Table */
+        *nextCandidatePtr = matchIndex;   /* update BT like a chain */
+        *sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK;
+    }
+    ms->nextToUpdate = target;
+}
+
+
+/** ZSTD_insertDUBT1() :
+ *  sort one already inserted but unsorted position
+ *  assumption : curr >= btlow == (curr - btmask)
+ *  doesn't fail */
+static void
+ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
+                 U32 curr, const BYTE* inputEnd,
+                 U32 nbCompares, U32 btLow,
+                 const ZSTD_dictMode_e dictMode)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32* const bt = ms->chainTable;
+    U32  const btLog  = cParams->chainLog - 1;
+    U32  const btMask = (1 << btLog) - 1;
+    size_t commonLengthSmaller=0, commonLengthLarger=0;
+    const BYTE* const base = ms->window.base;
+    const BYTE* const dictBase = ms->window.dictBase;
+    const U32 dictLimit = ms->window.dictLimit;
+    const BYTE* const ip = (curr>=dictLimit) ? base + curr : dictBase + curr;
+    const BYTE* const iend = (curr>=dictLimit) ? inputEnd : dictBase + dictLimit;
+    const BYTE* const dictEnd = dictBase + dictLimit;
+    const BYTE* const prefixStart = base + dictLimit;
+    const BYTE* match;
+    U32* smallerPtr = bt + 2*(curr&btMask);
+    U32* largerPtr  = smallerPtr + 1;
+    U32 matchIndex = *smallerPtr;   /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
+    U32 dummy32;   /* to be nullified at the end */
+    U32 const windowValid = ms->window.lowLimit;
+    U32 const maxDistance = 1U << cParams->windowLog;
+    U32 const windowLow = (curr - windowValid > maxDistance) ? curr - maxDistance : windowValid;
+
+
+    DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
+                curr, dictLimit, windowLow);
+    assert(curr >= btLow);
+    assert(ip < iend);   /* condition for ZSTD_count */
+
+    while (nbCompares-- && (matchIndex > windowLow)) {
+        U32* const nextPtr = bt + 2*(matchIndex & btMask);
+        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
+        assert(matchIndex < curr);
+        /* note : all candidates are now supposed sorted,
+         * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK
+         * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */
+
+        if ( (dictMode != ZSTD_extDict)
+          || (matchIndex+matchLength >= dictLimit)  /* both in current segment*/
+          || (curr < dictLimit) /* both in extDict */) {
+            const BYTE* const mBase = ( (dictMode != ZSTD_extDict)
+                                     || (matchIndex+matchLength >= dictLimit)) ?
+                                        base : dictBase;
+            assert( (matchIndex+matchLength >= dictLimit)   /* might be wrong if extDict is incorrectly set to 0 */
+                 || (curr < dictLimit) );
+            match = mBase + matchIndex;
+            matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
+        } else {
+            match = dictBase + matchIndex;
+            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
+            if (matchIndex+matchLength >= dictLimit)
+                match = base + matchIndex;   /* preparation for next read of match[matchLength] */
+        }
+
+        DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ",
+                    curr, matchIndex, (U32)matchLength);
+
+        if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
+            break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
+        }
+
+        if (match[matchLength] < ip[matchLength]) {  /* necessarily within buffer */
+            /* match is smaller than current */
+            *smallerPtr = matchIndex;             /* update smaller idx */
+            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
+            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
+            DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u",
+                        matchIndex, btLow, nextPtr[1]);
+            smallerPtr = nextPtr+1;               /* new "candidate" => larger than match, which was smaller than target */
+            matchIndex = nextPtr[1];              /* new matchIndex, larger than previous and closer to current */
+        } else {
+            /* match is larger than current */
+            *largerPtr = matchIndex;
+            commonLengthLarger = matchLength;
+            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
+            DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u",
+                        matchIndex, btLow, nextPtr[0]);
+            largerPtr = nextPtr;
+            matchIndex = nextPtr[0];
+    }   }
+
+    *smallerPtr = *largerPtr = 0;
+}
+
+
+static size_t
+ZSTD_DUBT_findBetterDictMatch (
+        ZSTD_matchState_t* ms,
+        const BYTE* const ip, const BYTE* const iend,
+        size_t* offsetPtr,
+        size_t bestLength,
+        U32 nbCompares,
+        U32 const mls,
+        const ZSTD_dictMode_e dictMode)
+{
+    const ZSTD_matchState_t * const dms = ms->dictMatchState;
+    const ZSTD_compressionParameters* const dmsCParams = &dms->cParams;
+    const U32 * const dictHashTable = dms->hashTable;
+    U32         const hashLog = dmsCParams->hashLog;
+    size_t      const h  = ZSTD_hashPtr(ip, hashLog, mls);
+    U32               dictMatchIndex = dictHashTable[h];
+
+    const BYTE* const base = ms->window.base;
+    const BYTE* const prefixStart = base + ms->window.dictLimit;
+    U32         const curr = (U32)(ip-base);
+    const BYTE* const dictBase = dms->window.base;
+    const BYTE* const dictEnd = dms->window.nextSrc;
+    U32         const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base);
+    U32         const dictLowLimit = dms->window.lowLimit;
+    U32         const dictIndexDelta = ms->window.lowLimit - dictHighLimit;
+
+    U32*        const dictBt = dms->chainTable;
+    U32         const btLog  = dmsCParams->chainLog - 1;
+    U32         const btMask = (1 << btLog) - 1;
+    U32         const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit - btMask;
+
+    size_t commonLengthSmaller=0, commonLengthLarger=0;
+
+    (void)dictMode;
+    assert(dictMode == ZSTD_dictMatchState);
+
+    while (nbCompares-- && (dictMatchIndex > dictLowLimit)) {
+        U32* const nextPtr = dictBt + 2*(dictMatchIndex & btMask);
+        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
+        const BYTE* match = dictBase + dictMatchIndex;
+        matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
+        if (dictMatchIndex+matchLength >= dictHighLimit)
+            match = base + dictMatchIndex + dictIndexDelta;   /* to prepare for next usage of match[matchLength] */
+
+        if (matchLength > bestLength) {
+            U32 matchIndex = dictMatchIndex + dictIndexDelta;
+            if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
+                DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
+                    curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + curr - matchIndex, dictMatchIndex, matchIndex);
+                bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex;
+            }
+            if (ip+matchLength == iend) {   /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */
+                break;   /* drop, to guarantee consistency (miss a little bit of compression) */
+            }
+        }
+
+        if (match[matchLength] < ip[matchLength]) {
+            if (dictMatchIndex <= btLow) { break; }   /* beyond tree size, stop the search */
+            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
+            dictMatchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
+        } else {
+            /* match is larger than current */
+            if (dictMatchIndex <= btLow) { break; }   /* beyond tree size, stop the search */
+            commonLengthLarger = matchLength;
+            dictMatchIndex = nextPtr[0];
+        }
+    }
+
+    if (bestLength >= MINMATCH) {
+        U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
+        DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
+                    curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
+    }
+    return bestLength;
+
+}
+
+
+static size_t
+ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
+                        const BYTE* const ip, const BYTE* const iend,
+                        size_t* offsetPtr,
+                        U32 const mls,
+                        const ZSTD_dictMode_e dictMode)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32*   const hashTable = ms->hashTable;
+    U32    const hashLog = cParams->hashLog;
+    size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
+    U32          matchIndex  = hashTable[h];
+
+    const BYTE* const base = ms->window.base;
+    U32    const curr = (U32)(ip-base);
+    U32    const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
+
+    U32*   const bt = ms->chainTable;
+    U32    const btLog  = cParams->chainLog - 1;
+    U32    const btMask = (1 << btLog) - 1;
+    U32    const btLow = (btMask >= curr) ? 0 : curr - btMask;
+    U32    const unsortLimit = MAX(btLow, windowLow);
+
+    U32*         nextCandidate = bt + 2*(matchIndex&btMask);
+    U32*         unsortedMark = bt + 2*(matchIndex&btMask) + 1;
+    U32          nbCompares = 1U << cParams->searchLog;
+    U32          nbCandidates = nbCompares;
+    U32          previousCandidate = 0;
+
+    DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", curr);
+    assert(ip <= iend-8);   /* required for h calculation */
+    assert(dictMode != ZSTD_dedicatedDictSearch);
+
+    /* reach end of unsorted candidates list */
+    while ( (matchIndex > unsortLimit)
+         && (*unsortedMark == ZSTD_DUBT_UNSORTED_MARK)
+         && (nbCandidates > 1) ) {
+        DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted",
+                    matchIndex);
+        *unsortedMark = previousCandidate;  /* the unsortedMark becomes a reversed chain, to move up back to original position */
+        previousCandidate = matchIndex;
+        matchIndex = *nextCandidate;
+        nextCandidate = bt + 2*(matchIndex&btMask);
+        unsortedMark = bt + 2*(matchIndex&btMask) + 1;
+        nbCandidates --;
+    }
+
+    /* nullify last candidate if it's still unsorted
+     * simplification, detrimental to compression ratio, beneficial for speed */
+    if ( (matchIndex > unsortLimit)
+      && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) {
+        DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u",
+                    matchIndex);
+        *nextCandidate = *unsortedMark = 0;
+    }
+
+    /* batch sort stacked candidates */
+    matchIndex = previousCandidate;
+    while (matchIndex) {  /* will end on matchIndex == 0 */
+        U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1;
+        U32 const nextCandidateIdx = *nextCandidateIdxPtr;
+        ZSTD_insertDUBT1(ms, matchIndex, iend,
+                         nbCandidates, unsortLimit, dictMode);
+        matchIndex = nextCandidateIdx;
+        nbCandidates++;
+    }
+
+    /* find longest match */
+    {   size_t commonLengthSmaller = 0, commonLengthLarger = 0;
+        const BYTE* const dictBase = ms->window.dictBase;
+        const U32 dictLimit = ms->window.dictLimit;
+        const BYTE* const dictEnd = dictBase + dictLimit;
+        const BYTE* const prefixStart = base + dictLimit;
+        U32* smallerPtr = bt + 2*(curr&btMask);
+        U32* largerPtr  = bt + 2*(curr&btMask) + 1;
+        U32 matchEndIdx = curr + 8 + 1;
+        U32 dummy32;   /* to be nullified at the end */
+        size_t bestLength = 0;
+
+        matchIndex  = hashTable[h];
+        hashTable[h] = curr;   /* Update Hash Table */
+
+        while (nbCompares-- && (matchIndex > windowLow)) {
+            U32* const nextPtr = bt + 2*(matchIndex & btMask);
+            size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
+            const BYTE* match;
+
+            if ((dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) {
+                match = base + matchIndex;
+                matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
+            } else {
+                match = dictBase + matchIndex;
+                matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
+                if (matchIndex+matchLength >= dictLimit)
+                    match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
+            }
+
+            if (matchLength > bestLength) {
+                if (matchLength > matchEndIdx - matchIndex)
+                    matchEndIdx = matchIndex + (U32)matchLength;
+                if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
+                    bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex;
+                if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
+                    if (dictMode == ZSTD_dictMatchState) {
+                        nbCompares = 0; /* in addition to avoiding checking any
+                                         * further in this loop, make sure we
+                                         * skip checking in the dictionary. */
+                    }
+                    break;   /* drop, to guarantee consistency (miss a little bit of compression) */
+                }
+            }
+
+            if (match[matchLength] < ip[matchLength]) {
+                /* match is smaller than current */
+                *smallerPtr = matchIndex;             /* update smaller idx */
+                commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
+                if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+                smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
+                matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
+            } else {
+                /* match is larger than current */
+                *largerPtr = matchIndex;
+                commonLengthLarger = matchLength;
+                if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+                largerPtr = nextPtr;
+                matchIndex = nextPtr[0];
+        }   }
+
+        *smallerPtr = *largerPtr = 0;
+
+        if (dictMode == ZSTD_dictMatchState && nbCompares) {
+            bestLength = ZSTD_DUBT_findBetterDictMatch(
+                    ms, ip, iend,
+                    offsetPtr, bestLength, nbCompares,
+                    mls, dictMode);
+        }
+
+        assert(matchEndIdx > curr+8); /* ensure nextToUpdate is increased */
+        ms->nextToUpdate = matchEndIdx - 8;   /* skip repetitive patterns */
+        if (bestLength >= MINMATCH) {
+            U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
+            DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
+                        curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
+        }
+        return bestLength;
+    }
+}
+
+
+/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
+                const BYTE* const ip, const BYTE* const iLimit,
+                      size_t* offsetPtr,
+                const U32 mls /* template */,
+                const ZSTD_dictMode_e dictMode)
+{
+    DEBUGLOG(7, "ZSTD_BtFindBestMatch");
+    if (ip < ms->window.base + ms->nextToUpdate) return 0;   /* skipped area */
+    ZSTD_updateDUBT(ms, ip, iLimit, mls);
+    return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode);
+}
+
+
+static size_t
+ZSTD_BtFindBestMatch_selectMLS (  ZSTD_matchState_t* ms,
+                            const BYTE* ip, const BYTE* const iLimit,
+                                  size_t* offsetPtr)
+{
+    switch(ms->cParams.minMatch)
+    {
+    default : /* includes case 3 */
+    case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
+    case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
+    case 7 :
+    case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
+    }
+}
+
+
+static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr)
+{
+    switch(ms->cParams.minMatch)
+    {
+    default : /* includes case 3 */
+    case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
+    case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
+    case 7 :
+    case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
+    }
+}
+
+
+static size_t ZSTD_BtFindBestMatch_extDict_selectMLS (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr)
+{
+    switch(ms->cParams.minMatch)
+    {
+    default : /* includes case 3 */
+    case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
+    case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
+    case 7 :
+    case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
+    }
+}
+
+/***********************************
+* Dedicated dict search
+***********************************/
+
+void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip)
+{
+    const BYTE* const base = ms->window.base;
+    U32 const target = (U32)(ip - base);
+    U32* const hashTable = ms->hashTable;
+    U32* const chainTable = ms->chainTable;
+    U32 const chainSize = 1 << ms->cParams.chainLog;
+    U32 idx = ms->nextToUpdate;
+    U32 const minChain = chainSize < target ? target - chainSize : idx;
+    U32 const bucketSize = 1 << ZSTD_LAZY_DDSS_BUCKET_LOG;
+    U32 const cacheSize = bucketSize - 1;
+    U32 const chainAttempts = (1 << ms->cParams.searchLog) - cacheSize;
+    U32 const chainLimit = chainAttempts > 255 ? 255 : chainAttempts;
+
+    /* We know the hashtable is oversized by a factor of `bucketSize`.
+     * We are going to temporarily pretend `bucketSize == 1`, keeping only a
+     * single entry. We will use the rest of the space to construct a temporary
+     * chaintable.
+     */
+    U32 const hashLog = ms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG;
+    U32* const tmpHashTable = hashTable;
+    U32* const tmpChainTable = hashTable + ((size_t)1 << hashLog);
+    U32 const tmpChainSize = ((1 << ZSTD_LAZY_DDSS_BUCKET_LOG) - 1) << hashLog;
+    U32 const tmpMinChain = tmpChainSize < target ? target - tmpChainSize : idx;
+    U32 hashIdx;
+
+    assert(ms->cParams.chainLog <= 24);
+    assert(ms->cParams.hashLog > ms->cParams.chainLog);
+    assert(idx != 0);
+    assert(tmpMinChain <= minChain);
+
+    /* fill conventional hash table and conventional chain table */
+    for ( ; idx < target; idx++) {
+        U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch);
+        if (idx >= tmpMinChain) {
+            tmpChainTable[idx - tmpMinChain] = hashTable[h];
+        }
+        tmpHashTable[h] = idx;
+    }
+
+    /* sort chains into ddss chain table */
+    {
+        U32 chainPos = 0;
+        for (hashIdx = 0; hashIdx < (1U << hashLog); hashIdx++) {
+            U32 count;
+            U32 countBeyondMinChain = 0;
+            U32 i = tmpHashTable[hashIdx];
+            for (count = 0; i >= tmpMinChain && count < cacheSize; count++) {
+                /* skip through the chain to the first position that won't be
+                 * in the hash cache bucket */
+                if (i < minChain) {
+                    countBeyondMinChain++;
+                }
+                i = tmpChainTable[i - tmpMinChain];
+            }
+            if (count == cacheSize) {
+                for (count = 0; count < chainLimit;) {
+                    if (i < minChain) {
+                        if (!i || ++countBeyondMinChain > cacheSize) {
+                            /* only allow pulling `cacheSize` number of entries
+                             * into the cache or chainTable beyond `minChain`,
+                             * to replace the entries pulled out of the
+                             * chainTable into the cache. This lets us reach
+                             * back further without increasing the total number
+                             * of entries in the chainTable, guaranteeing the
+                             * DDSS chain table will fit into the space
+                             * allocated for the regular one. */
+                            break;
+                        }
+                    }
+                    chainTable[chainPos++] = i;
+                    count++;
+                    if (i < tmpMinChain) {
+                        break;
+                    }
+                    i = tmpChainTable[i - tmpMinChain];
+                }
+            } else {
+                count = 0;
+            }
+            if (count) {
+                tmpHashTable[hashIdx] = ((chainPos - count) << 8) + count;
+            } else {
+                tmpHashTable[hashIdx] = 0;
+            }
+        }
+        assert(chainPos <= chainSize); /* I believe this is guaranteed... */
+    }
+
+    /* move chain pointers into the last entry of each hash bucket */
+    for (hashIdx = (1 << hashLog); hashIdx; ) {
+        U32 const bucketIdx = --hashIdx << ZSTD_LAZY_DDSS_BUCKET_LOG;
+        U32 const chainPackedPointer = tmpHashTable[hashIdx];
+        U32 i;
+        for (i = 0; i < cacheSize; i++) {
+            hashTable[bucketIdx + i] = 0;
+        }
+        hashTable[bucketIdx + bucketSize - 1] = chainPackedPointer;
+    }
+
+    /* fill the buckets of the hash table */
+    for (idx = ms->nextToUpdate; idx < target; idx++) {
+        U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch)
+                   << ZSTD_LAZY_DDSS_BUCKET_LOG;
+        U32 i;
+        /* Shift hash cache down 1. */
+        for (i = cacheSize - 1; i; i--)
+            hashTable[h + i] = hashTable[h + i - 1];
+        hashTable[h] = idx;
+    }
+
+    ms->nextToUpdate = target;
+}
+
+/* Returns the longest match length found in the dedicated dict search structure.
+ * If none are longer than the argument ml, then ml will be returned.
+ */
+FORCE_INLINE_TEMPLATE
+size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nbAttempts,
+                                            const ZSTD_matchState_t* const dms,
+                                            const BYTE* const ip, const BYTE* const iLimit,
+                                            const BYTE* const prefixStart, const U32 curr,
+                                            const U32 dictLimit, const size_t ddsIdx) {
+    const U32 ddsLowestIndex  = dms->window.dictLimit;
+    const BYTE* const ddsBase = dms->window.base;
+    const BYTE* const ddsEnd  = dms->window.nextSrc;
+    const U32 ddsSize         = (U32)(ddsEnd - ddsBase);
+    const U32 ddsIndexDelta   = dictLimit - ddsSize;
+    const U32 bucketSize      = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG);
+    const U32 bucketLimit     = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1;
+    U32 ddsAttempt;
+    U32 matchIndex;
+
+    for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) {
+        PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]);
+    }
+
+    {
+        U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
+        U32 const chainIndex = chainPackedPointer >> 8;
+
+        PREFETCH_L1(&dms->chainTable[chainIndex]);
+    }
+
+    for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) {
+        size_t currentMl=0;
+        const BYTE* match;
+        matchIndex = dms->hashTable[ddsIdx + ddsAttempt];
+        match = ddsBase + matchIndex;
+
+        if (!matchIndex) {
+            return ml;
+        }
+
+        /* guaranteed by table construction */
+        (void)ddsLowestIndex;
+        assert(matchIndex >= ddsLowestIndex);
+        assert(match+4 <= ddsEnd);
+        if (MEM_read32(match) == MEM_read32(ip)) {
+            /* assumption : matchIndex <= dictLimit-4 (by table construction) */
+            currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
+        }
+
+        /* save best solution */
+        if (currentMl > ml) {
+            ml = currentMl;
+            *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
+            if (ip+currentMl == iLimit) {
+                /* best possible, avoids read overflow on next attempt */
+                return ml;
+            }
+        }
+    }
+
+    {
+        U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
+        U32 chainIndex = chainPackedPointer >> 8;
+        U32 const chainLength = chainPackedPointer & 0xFF;
+        U32 const chainAttempts = nbAttempts - ddsAttempt;
+        U32 const chainLimit = chainAttempts > chainLength ? chainLength : chainAttempts;
+        U32 chainAttempt;
+
+        for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++) {
+            PREFETCH_L1(ddsBase + dms->chainTable[chainIndex + chainAttempt]);
+        }
+
+        for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++, chainIndex++) {
+            size_t currentMl=0;
+            const BYTE* match;
+            matchIndex = dms->chainTable[chainIndex];
+            match = ddsBase + matchIndex;
+
+            /* guaranteed by table construction */
+            assert(matchIndex >= ddsLowestIndex);
+            assert(match+4 <= ddsEnd);
+            if (MEM_read32(match) == MEM_read32(ip)) {
+                /* assumption : matchIndex <= dictLimit-4 (by table construction) */
+                currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
+            }
+
+            /* save best solution */
+            if (currentMl > ml) {
+                ml = currentMl;
+                *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
+                if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
+            }
+        }
+    }
+    return ml;
+}
+
+
+/* *********************************
+*  Hash Chain
+***********************************/
+#define NEXT_IN_CHAIN(d, mask)   chainTable[(d) & (mask)]
+
+/* Update chains up to ip (excluded)
+   Assumption : always within prefix (i.e. not within extDict) */
+FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
+                        ZSTD_matchState_t* ms,
+                        const ZSTD_compressionParameters* const cParams,
+                        const BYTE* ip, U32 const mls)
+{
+    U32* const hashTable  = ms->hashTable;
+    const U32 hashLog = cParams->hashLog;
+    U32* const chainTable = ms->chainTable;
+    const U32 chainMask = (1 << cParams->chainLog) - 1;
+    const BYTE* const base = ms->window.base;
+    const U32 target = (U32)(ip - base);
+    U32 idx = ms->nextToUpdate;
+
+    while(idx < target) { /* catch up */
+        size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls);
+        NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
+        hashTable[h] = idx;
+        idx++;
+    }
+
+    ms->nextToUpdate = target;
+    return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
+}
+
+U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
+}
+
+/* inlining is important to hardwire a hot branch (template emulation) */
+FORCE_INLINE_TEMPLATE
+size_t ZSTD_HcFindBestMatch_generic (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* const ip, const BYTE* const iLimit,
+                        size_t* offsetPtr,
+                        const U32 mls, const ZSTD_dictMode_e dictMode)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32* const chainTable = ms->chainTable;
+    const U32 chainSize = (1 << cParams->chainLog);
+    const U32 chainMask = chainSize-1;
+    const BYTE* const base = ms->window.base;
+    const BYTE* const dictBase = ms->window.dictBase;
+    const U32 dictLimit = ms->window.dictLimit;
+    const BYTE* const prefixStart = base + dictLimit;
+    const BYTE* const dictEnd = dictBase + dictLimit;
+    const U32 curr = (U32)(ip-base);
+    const U32 maxDistance = 1U << cParams->windowLog;
+    const U32 lowestValid = ms->window.lowLimit;
+    const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
+    const U32 isDictionary = (ms->loadedDictEnd != 0);
+    const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
+    const U32 minChain = curr > chainSize ? curr - chainSize : 0;
+    U32 nbAttempts = 1U << cParams->searchLog;
+    size_t ml=4-1;
+
+    const ZSTD_matchState_t* const dms = ms->dictMatchState;
+    const U32 ddsHashLog = dictMode == ZSTD_dedicatedDictSearch
+                         ? dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG : 0;
+    const size_t ddsIdx = dictMode == ZSTD_dedicatedDictSearch
+                        ? ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG : 0;
+
+    U32 matchIndex;
+
+    if (dictMode == ZSTD_dedicatedDictSearch) {
+        const U32* entry = &dms->hashTable[ddsIdx];
+        PREFETCH_L1(entry);
+    }
+
+    /* HC4 match finder */
+    matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
+
+    for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) {
+        size_t currentMl=0;
+        if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
+            const BYTE* const match = base + matchIndex;
+            assert(matchIndex >= dictLimit);   /* ensures this is true if dictMode != ZSTD_extDict */
+            if (match[ml] == ip[ml])   /* potentially better */
+                currentMl = ZSTD_count(ip, match, iLimit);
+        } else {
+            const BYTE* const match = dictBase + matchIndex;
+            assert(match+4 <= dictEnd);
+            if (MEM_read32(match) == MEM_read32(ip))   /* assumption : matchIndex <= dictLimit-4 (by table construction) */
+                currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4;
+        }
+
+        /* save best solution */
+        if (currentMl > ml) {
+            ml = currentMl;
+            *offsetPtr = curr - matchIndex + ZSTD_REP_MOVE;
+            if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
+        }
+
+        if (matchIndex <= minChain) break;
+        matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
+    }
+
+    if (dictMode == ZSTD_dedicatedDictSearch) {
+        ml = ZSTD_dedicatedDictSearch_lazy_search(offsetPtr, ml, nbAttempts, dms,
+                                                  ip, iLimit, prefixStart, curr, dictLimit, ddsIdx);
+    } else if (dictMode == ZSTD_dictMatchState) {
+        const U32* const dmsChainTable = dms->chainTable;
+        const U32 dmsChainSize         = (1 << dms->cParams.chainLog);
+        const U32 dmsChainMask         = dmsChainSize - 1;
+        const U32 dmsLowestIndex       = dms->window.dictLimit;
+        const BYTE* const dmsBase      = dms->window.base;
+        const BYTE* const dmsEnd       = dms->window.nextSrc;
+        const U32 dmsSize              = (U32)(dmsEnd - dmsBase);
+        const U32 dmsIndexDelta        = dictLimit - dmsSize;
+        const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0;
+
+        matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)];
+
+        for ( ; (matchIndex>=dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) {
+            size_t currentMl=0;
+            const BYTE* const match = dmsBase + matchIndex;
+            assert(match+4 <= dmsEnd);
+            if (MEM_read32(match) == MEM_read32(ip))   /* assumption : matchIndex <= dictLimit-4 (by table construction) */
+                currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4;
+
+            /* save best solution */
+            if (currentMl > ml) {
+                ml = currentMl;
+                *offsetPtr = curr - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
+                if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
+            }
+
+            if (matchIndex <= dmsMinChain) break;
+
+            matchIndex = dmsChainTable[matchIndex & dmsChainMask];
+        }
+    }
+
+    return ml;
+}
+
+
+FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr)
+{
+    switch(ms->cParams.minMatch)
+    {
+    default : /* includes case 3 */
+    case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
+    case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
+    case 7 :
+    case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
+    }
+}
+
+
+static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr)
+{
+    switch(ms->cParams.minMatch)
+    {
+    default : /* includes case 3 */
+    case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
+    case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
+    case 7 :
+    case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
+    }
+}
+
+
+static size_t ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr)
+{
+    switch(ms->cParams.minMatch)
+    {
+    default : /* includes case 3 */
+    case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dedicatedDictSearch);
+    case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dedicatedDictSearch);
+    case 7 :
+    case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dedicatedDictSearch);
+    }
+}
+
+
+FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr)
+{
+    switch(ms->cParams.minMatch)
+    {
+    default : /* includes case 3 */
+    case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
+    case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
+    case 7 :
+    case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
+    }
+}
+
+/* *********************************
+* (SIMD) Row-based matchfinder
+***********************************/
+/* Constants for row-based hash */
+#define ZSTD_ROW_HASH_TAG_OFFSET 1                               /* byte offset of hashes in the match state's tagTable from the beginning of a row */
+#define ZSTD_ROW_HASH_TAG_BITS 8                                 /* nb bits to use for the tag */
+#define ZSTD_ROW_HASH_TAG_MASK ((1u << ZSTD_ROW_HASH_TAG_BITS) - 1)
+
+#define ZSTD_ROW_HASH_CACHE_MASK (ZSTD_ROW_HASH_CACHE_SIZE - 1)
+
+typedef U32 ZSTD_VecMask;   /* Clarifies when we are interacting with a U32 representing a mask of matches */
+
+#if !defined(ZSTD_NO_INTRINSICS) && defined(__SSE2__) /* SIMD SSE version */
+
+#include <emmintrin.h>
+typedef __m128i ZSTD_Vec128;
+
+/* Returns a 128-bit container with 128-bits from src */
+static ZSTD_Vec128 ZSTD_Vec128_read(const void* const src) {
+  return _mm_loadu_si128((ZSTD_Vec128 const*)src);
+}
+
+/* Returns a ZSTD_Vec128 with the byte "val" packed 16 times */
+static ZSTD_Vec128 ZSTD_Vec128_set8(BYTE val) {
+  return _mm_set1_epi8((char)val);
+}
+
+/* Do byte-by-byte comparison result of x and y. Then collapse 128-bit resultant mask
+ * into a 32-bit mask that is the MSB of each byte.
+ * */
+static ZSTD_VecMask ZSTD_Vec128_cmpMask8(ZSTD_Vec128 x, ZSTD_Vec128 y) {
+  return (ZSTD_VecMask)_mm_movemask_epi8(_mm_cmpeq_epi8(x, y));
+}
+
+typedef struct {
+  __m128i fst;
+  __m128i snd;
+} ZSTD_Vec256;
+
+static ZSTD_Vec256 ZSTD_Vec256_read(const void* const ptr) {
+  ZSTD_Vec256 v;
+  v.fst = ZSTD_Vec128_read(ptr);
+  v.snd = ZSTD_Vec128_read((ZSTD_Vec128 const*)ptr + 1);
+  return v;
+}
+
+static ZSTD_Vec256 ZSTD_Vec256_set8(BYTE val) {
+  ZSTD_Vec256 v;
+  v.fst = ZSTD_Vec128_set8(val);
+  v.snd = ZSTD_Vec128_set8(val);
+  return v;
+}
+
+static ZSTD_VecMask ZSTD_Vec256_cmpMask8(ZSTD_Vec256 x, ZSTD_Vec256 y) {
+  ZSTD_VecMask fstMask;
+  ZSTD_VecMask sndMask;
+  fstMask = ZSTD_Vec128_cmpMask8(x.fst, y.fst);
+  sndMask = ZSTD_Vec128_cmpMask8(x.snd, y.snd);
+  return fstMask | (sndMask << 16);
+}
+
+#elif !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON) /* SIMD ARM NEON Version */
+
+#include <arm_neon.h>
+typedef uint8x16_t ZSTD_Vec128;
+
+static ZSTD_Vec128 ZSTD_Vec128_read(const void* const src) {
+  return vld1q_u8((const BYTE* const)src);
+}
+
+static ZSTD_Vec128 ZSTD_Vec128_set8(BYTE val) {
+  return vdupq_n_u8(val);
+}
+
+/* Mimics '_mm_movemask_epi8()' from SSE */
+static U32 ZSTD_vmovmaskq_u8(ZSTD_Vec128 val) {
+    /* Shift out everything but the MSB bits in each byte */
+    uint16x8_t highBits = vreinterpretq_u16_u8(vshrq_n_u8(val, 7));
+    /* Merge the even lanes together with vsra (right shift and add) */
+    uint32x4_t paired16 = vreinterpretq_u32_u16(vsraq_n_u16(highBits, highBits, 7));
+    uint64x2_t paired32 = vreinterpretq_u64_u32(vsraq_n_u32(paired16, paired16, 14));
+    uint8x16_t paired64 = vreinterpretq_u8_u64(vsraq_n_u64(paired32, paired32, 28));
+    /* Extract the low 8 bits from each lane, merge */
+    return vgetq_lane_u8(paired64, 0) | ((U32)vgetq_lane_u8(paired64, 8) << 8);
+}
+
+static ZSTD_VecMask ZSTD_Vec128_cmpMask8(ZSTD_Vec128 x, ZSTD_Vec128 y) {
+  return (ZSTD_VecMask)ZSTD_vmovmaskq_u8(vceqq_u8(x, y));
+}
+
+typedef struct {
+    uint8x16_t fst;
+    uint8x16_t snd;
+} ZSTD_Vec256;
+
+static ZSTD_Vec256 ZSTD_Vec256_read(const void* const ptr) {
+  ZSTD_Vec256 v;
+  v.fst = ZSTD_Vec128_read(ptr);
+  v.snd = ZSTD_Vec128_read((ZSTD_Vec128 const*)ptr + 1);
+  return v;
+}
+
+static ZSTD_Vec256 ZSTD_Vec256_set8(BYTE val) {
+  ZSTD_Vec256 v;
+  v.fst = ZSTD_Vec128_set8(val);
+  v.snd = ZSTD_Vec128_set8(val);
+  return v;
+}
+
+static ZSTD_VecMask ZSTD_Vec256_cmpMask8(ZSTD_Vec256 x, ZSTD_Vec256 y) {
+  ZSTD_VecMask fstMask;
+  ZSTD_VecMask sndMask;
+  fstMask = ZSTD_Vec128_cmpMask8(x.fst, y.fst);
+  sndMask = ZSTD_Vec128_cmpMask8(x.snd, y.snd);
+  return fstMask | (sndMask << 16);
+}
+
+#else /* Scalar fallback version */
+
+#define VEC128_NB_SIZE_T (16 / sizeof(size_t))
+typedef struct {
+    size_t vec[VEC128_NB_SIZE_T];
+} ZSTD_Vec128;
+
+static ZSTD_Vec128 ZSTD_Vec128_read(const void* const src) {
+    ZSTD_Vec128 ret;
+    ZSTD_memcpy(ret.vec, src, VEC128_NB_SIZE_T*sizeof(size_t));
+    return ret;
+}
+
+static ZSTD_Vec128 ZSTD_Vec128_set8(BYTE val) {
+    ZSTD_Vec128 ret = { {0} };
+    int startBit = sizeof(size_t) * 8 - 8;
+    for (;startBit >= 0; startBit -= 8) {
+        unsigned j = 0;
+        for (;j < VEC128_NB_SIZE_T; ++j) {
+            ret.vec[j] |= ((size_t)val << startBit);
+        }
+    }
+    return ret;
+}
+
+/* Compare x to y, byte by byte, generating a "matches" bitfield */
+static ZSTD_VecMask ZSTD_Vec128_cmpMask8(ZSTD_Vec128 x, ZSTD_Vec128 y) {
+    ZSTD_VecMask res = 0;
+    unsigned i = 0;
+    unsigned l = 0;
+    for (; i < VEC128_NB_SIZE_T; ++i) {
+        const size_t cmp1 = x.vec[i];
+        const size_t cmp2 = y.vec[i];
+        unsigned j = 0;
+        for (; j < sizeof(size_t); ++j, ++l) {
+            if (((cmp1 >> j*8) & 0xFF) == ((cmp2 >> j*8) & 0xFF)) {
+                res |= ((U32)1 << (j+i*sizeof(size_t)));
+            }
+        }
+    }
+    return res;
+}
+
+#define VEC256_NB_SIZE_T 2*VEC128_NB_SIZE_T
+typedef struct {
+    size_t vec[VEC256_NB_SIZE_T];
+} ZSTD_Vec256;
+
+static ZSTD_Vec256 ZSTD_Vec256_read(const void* const src) {
+    ZSTD_Vec256 ret;
+    ZSTD_memcpy(ret.vec, src, VEC256_NB_SIZE_T*sizeof(size_t));
+    return ret;
+}
+
+static ZSTD_Vec256 ZSTD_Vec256_set8(BYTE val) {
+    ZSTD_Vec256 ret = { {0} };
+    int startBit = sizeof(size_t) * 8 - 8;
+    for (;startBit >= 0; startBit -= 8) {
+        unsigned j = 0;
+        for (;j < VEC256_NB_SIZE_T; ++j) {
+            ret.vec[j] |= ((size_t)val << startBit);
+        }
+    }
+    return ret;
+}
+
+/* Compare x to y, byte by byte, generating a "matches" bitfield */
+static ZSTD_VecMask ZSTD_Vec256_cmpMask8(ZSTD_Vec256 x, ZSTD_Vec256 y) {
+    ZSTD_VecMask res = 0;
+    unsigned i = 0;
+    unsigned l = 0;
+    for (; i < VEC256_NB_SIZE_T; ++i) {
+        const size_t cmp1 = x.vec[i];
+        const size_t cmp2 = y.vec[i];
+        unsigned j = 0;
+        for (; j < sizeof(size_t); ++j, ++l) {
+            if (((cmp1 >> j*8) & 0xFF) == ((cmp2 >> j*8) & 0xFF)) {
+                res |= ((U32)1 << (j+i*sizeof(size_t)));
+            }
+        }
+    }
+    return res;
+}
+
+#endif /* !defined(ZSTD_NO_INTRINSICS) && defined(__SSE2__) */
+
+/* ZSTD_VecMask_next():
+ * Starting from the LSB, returns the idx of the next non-zero bit.
+ * Basically counting the nb of trailing zeroes.
+ */
+static U32 ZSTD_VecMask_next(ZSTD_VecMask val) {
+#   if defined(_MSC_VER)   /* Visual */
+    unsigned long r=0;
+    return _BitScanForward(&r, val) ? (U32)r : 0;
+#   elif defined(__GNUC__) && (__GNUC__ >= 3)
+    return (U32)__builtin_ctz(val);
+#   else
+    /* Software ctz version: http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightMultLookup */
+    static const U32 multiplyDeBruijnBitPosition[32] =
+    {
+        0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
+		31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
+    };
+	return multiplyDeBruijnBitPosition[((U32)((v & -(int)v) * 0x077CB531U)) >> 27];
+#   endif
+}
+
+/* ZSTD_VecMask_rotateRight():
+ * Rotates a bitfield to the right by "rotation" bits.
+ * If the rotation is greater than totalBits, the returned mask is 0.
+ */
+FORCE_INLINE_TEMPLATE ZSTD_VecMask
+ZSTD_VecMask_rotateRight(ZSTD_VecMask mask, U32 const rotation, U32 const totalBits) {
+  if (rotation == 0)
+    return mask;
+  switch (totalBits) {
+    default:
+      assert(0);
+    case 16:
+      return (mask >> rotation) | (U16)(mask << (16 - rotation));
+    case 32:
+      return (mask >> rotation) | (U32)(mask << (32 - rotation));
+  }
+}
+
+/* ZSTD_row_nextIndex():
+ * Returns the next index to insert at within a tagTable row, and updates the "head"
+ * value to reflect the update. Essentially cycles backwards from [0, {entries per row})
+ */
+FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextIndex(BYTE* const tagRow, U32 const rowMask) {
+  U32 const next = (*tagRow - 1) & rowMask;
+  *tagRow = (BYTE)next;
+  return next;
+}
+
+/* ZSTD_isAligned():
+ * Checks that a pointer is aligned to "align" bytes which must be a power of 2.
+ */
+MEM_STATIC int ZSTD_isAligned(void const* ptr, size_t align) {
+    assert((align & (align - 1)) == 0);
+    return (((size_t)ptr) & (align - 1)) == 0;
+}
+
+/* ZSTD_row_prefetch():
+ * Performs prefetching for the hashTable and tagTable at a given row.
+ */
+FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, U16 const* tagTable, U32 const relRow, U32 const rowLog) {
+    PREFETCH_L1(hashTable + relRow);
+    if (rowLog == 5) {
+        PREFETCH_L1(hashTable + relRow + 16);
+    }
+    PREFETCH_L1(tagTable + relRow);
+    assert(rowLog == 4 || rowLog == 5);
+    assert(ZSTD_isAligned(hashTable + relRow, 64));                 /* prefetched hash row always 64-byte aligned */
+    assert(ZSTD_isAligned(tagTable + relRow, (size_t)1 << rowLog)); /* prefetched tagRow sits on a multiple of 32 or 64 bytes */
+}
+
+/* ZSTD_row_fillHashCache():
+ * Fill up the hash cache starting at idx, prefetching up to ZSTD_ROW_HASH_CACHE_SIZE entries,
+ * but not beyond iLimit.
+ */
+static void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const BYTE* base,
+                                   U32 const rowLog, U32 const mls,
+                                   U32 idx, const BYTE* const iLimit)
+{
+    U32 const* const hashTable = ms->hashTable;
+    U16 const* const tagTable = ms->tagTable;
+    U32 const hashLog = ms->rowHashLog;
+    U32 const maxElemsToPrefetch = (base + idx) > iLimit ? 0 : (U32)(iLimit - (base + idx) + 1);
+    U32 const lim = idx + MIN(ZSTD_ROW_HASH_CACHE_SIZE, maxElemsToPrefetch);
+
+    for (; idx < lim; ++idx) {
+        U32 const hash = (U32)ZSTD_hashPtr(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
+        U32 const row = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
+        ZSTD_row_prefetch(hashTable, tagTable, row, rowLog);
+        ms->hashCache[idx & ZSTD_ROW_HASH_CACHE_MASK] = hash;
+    }
+
+    DEBUGLOG(6, "ZSTD_row_fillHashCache(): [%u %u %u %u %u %u %u %u]", ms->hashCache[0], ms->hashCache[1],
+                                                     ms->hashCache[2], ms->hashCache[3], ms->hashCache[4],
+                                                     ms->hashCache[5], ms->hashCache[6], ms->hashCache[7]);
+}
+
+/* ZSTD_row_nextCachedHash():
+ * Returns the hash of base + idx, and replaces the hash in the hash cache with the byte at
+ * base + idx + ZSTD_ROW_HASH_CACHE_SIZE. Also prefetches the appropriate rows from hashTable and tagTable.
+ */
+FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable,
+                                                  U16 const* tagTable, BYTE const* base,
+                                                  U32 idx, U32 const hashLog,
+                                                  U32 const rowLog, U32 const mls)
+{
+    U32 const newHash = (U32)ZSTD_hashPtr(base+idx+ZSTD_ROW_HASH_CACHE_SIZE, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
+    U32 const row = (newHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
+    ZSTD_row_prefetch(hashTable, tagTable, row, rowLog);
+    {   U32 const hash = cache[idx & ZSTD_ROW_HASH_CACHE_MASK];
+        cache[idx & ZSTD_ROW_HASH_CACHE_MASK] = newHash;
+        return hash;
+    }
+}
+
+/* ZSTD_row_update_internal():
+ * Inserts the byte at ip into the appropriate position in the hash table.
+ * Determines the relative row, and the position within the {16, 32} entry row to insert at.
+ */
+FORCE_INLINE_TEMPLATE void ZSTD_row_update_internal(ZSTD_matchState_t* ms, const BYTE* ip,
+                                                    U32 const mls, U32 const rowLog,
+                                                    U32 const rowMask, U32 const useCache)
+{
+    U32* const hashTable = ms->hashTable;
+    U16* const tagTable = ms->tagTable;
+    U32 const hashLog = ms->rowHashLog;
+    const BYTE* const base = ms->window.base;
+    const U32 target = (U32)(ip - base);
+    U32 idx = ms->nextToUpdate;
+
+    DEBUGLOG(6, "ZSTD_row_update_internal(): nextToUpdate=%u, current=%u", idx, target);
+    for (; idx < target; ++idx) {
+        U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, idx, hashLog, rowLog, mls)
+                                  : (U32)ZSTD_hashPtr(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
+        U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
+        U32* const row = hashTable + relRow;
+        BYTE* tagRow = (BYTE*)(tagTable + relRow);  /* Though tagTable is laid out as a table of U16, each tag is only 1 byte.
+                                                       Explicit cast allows us to get exact desired position within each row */
+        U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
+
+        assert(hash == ZSTD_hashPtr(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls));
+        ((BYTE*)tagRow)[pos + ZSTD_ROW_HASH_TAG_OFFSET] = hash & ZSTD_ROW_HASH_TAG_MASK;
+        row[pos] = idx;
+    }
+    ms->nextToUpdate = target;
+}
+
+/* ZSTD_row_update():
+ * External wrapper for ZSTD_row_update_internal(). Used for filling the hashtable during dictionary
+ * processing.
+ */
+void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip) {
+    const U32 rowLog = ms->cParams.searchLog < 5 ? 4 : 5;
+    const U32 rowMask = (1u << rowLog) - 1;
+    const U32 mls = MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */);
+
+    DEBUGLOG(5, "ZSTD_row_update(), rowLog=%u", rowLog);
+    ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 0 /* dont use cache */);
+}
+
+/* Returns a ZSTD_VecMask (U32) that has the nth bit set to 1 if the newly-computed "tag" matches
+ * the hash at the nth position in a row of the tagTable.
+ */
+FORCE_INLINE_TEMPLATE
+ZSTD_VecMask ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 head, const U32 rowEntries) {
+    ZSTD_VecMask matches = 0;
+    if (rowEntries == 16) {
+        ZSTD_Vec128 hashes        = ZSTD_Vec128_read(tagRow + ZSTD_ROW_HASH_TAG_OFFSET);
+        ZSTD_Vec128 expandedTags  = ZSTD_Vec128_set8(tag);
+        matches                   = ZSTD_Vec128_cmpMask8(hashes, expandedTags);
+    } else if (rowEntries == 32) {
+        ZSTD_Vec256 hashes        = ZSTD_Vec256_read(tagRow + ZSTD_ROW_HASH_TAG_OFFSET);
+        ZSTD_Vec256 expandedTags  = ZSTD_Vec256_set8(tag);
+        matches                   = ZSTD_Vec256_cmpMask8(hashes, expandedTags);
+    } else {
+        assert(0);
+    }
+    /* Each row is a circular buffer beginning at the value of "head". So we must rotate the "matches" bitfield
+        to match up with the actual layout of the entries within the hashTable */
+    return ZSTD_VecMask_rotateRight(matches, head, rowEntries);
+}
+
+/* The high-level approach of the SIMD row based match finder is as follows:
+ * - Figure out where to insert the new entry:
+ *      - Generate a hash from a byte along with an additional 1-byte "short hash". The additional byte is our "tag"
+ *      - The hashTable is effectively split into groups or "rows" of 16 or 32 entries of U32, and the hash determines
+ *        which row to insert into.
+ *      - Determine the correct position within the row to insert the entry into. Each row of 16 or 32 can
+ *        be considered as a circular buffer with a "head" index that resides in the tagTable.
+ *      - Also insert the "tag" into the equivalent row and position in the tagTable.
+ *          - Note: The tagTable has 17 or 33 1-byte entries per row, due to 16 or 32 tags, and 1 "head" entry.
+ *                  The 17 or 33 entry rows are spaced out to occur every 32 or 64 bytes, respectively,
+ *                  for alignment/performance reasons, leaving some bytes unused.
+ * - Use SIMD to efficiently compare the tags in the tagTable to the 1-byte "short hash" and
+ *   generate a bitfield that we can cycle through to check the collisions in the hash table.
+ * - Pick the longest match.
+ */
+FORCE_INLINE_TEMPLATE
+size_t ZSTD_RowFindBestMatch_generic (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* const ip, const BYTE* const iLimit,
+                        size_t* offsetPtr,
+                        const U32 mls, const ZSTD_dictMode_e dictMode,
+                        const U32 rowLog)
+{
+    U32* const hashTable = ms->hashTable;
+    U16* const tagTable = ms->tagTable;
+    U32* const hashCache = ms->hashCache;
+    const U32 hashLog = ms->rowHashLog;
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    const BYTE* const base = ms->window.base;
+    const BYTE* const dictBase = ms->window.dictBase;
+    const U32 dictLimit = ms->window.dictLimit;
+    const BYTE* const prefixStart = base + dictLimit;
+    const BYTE* const dictEnd = dictBase + dictLimit;
+    const U32 curr = (U32)(ip-base);
+    const U32 maxDistance = 1U << cParams->windowLog;
+    const U32 lowestValid = ms->window.lowLimit;
+    const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
+    const U32 isDictionary = (ms->loadedDictEnd != 0);
+    const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
+    const U32 rowEntries = (1U << rowLog);
+    const U32 rowMask = rowEntries - 1;
+    const U32 cappedSearchLog = MIN(cParams->searchLog, rowLog); /* nb of searches is capped at nb entries per row */
+    U32 nbAttempts = 1U << cappedSearchLog;
+    size_t ml=4-1;
+
+    /* DMS/DDS variables that may be referenced laster */
+    const ZSTD_matchState_t* const dms = ms->dictMatchState;
+    size_t ddsIdx;
+    U32 ddsExtraAttempts; /* cctx hash tables are limited in searches, but allow extra searches into DDS */
+    U32 dmsTag;
+    U32* dmsRow;
+    BYTE* dmsTagRow;
+
+    if (dictMode == ZSTD_dedicatedDictSearch) {
+        const U32 ddsHashLog = dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG;
+        {   /* Prefetch DDS hashtable entry */
+            ddsIdx = ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG;
+            PREFETCH_L1(&dms->hashTable[ddsIdx]);
+        }
+        ddsExtraAttempts = cParams->searchLog > rowLog ? 1U << (cParams->searchLog - rowLog) : 0;
+    }
+
+    if (dictMode == ZSTD_dictMatchState) {
+        /* Prefetch DMS rows */
+        U32* const dmsHashTable = dms->hashTable;
+        U16* const dmsTagTable = dms->tagTable;
+        U32 const dmsHash = (U32)ZSTD_hashPtr(ip, dms->rowHashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
+        U32 const dmsRelRow = (dmsHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
+        dmsTag = dmsHash & ZSTD_ROW_HASH_TAG_MASK;
+        dmsTagRow = (BYTE*)(dmsTagTable + dmsRelRow);
+        dmsRow = dmsHashTable + dmsRelRow;
+        ZSTD_row_prefetch(dmsHashTable, dmsTagTable, dmsRelRow, rowLog);
+    }
+
+    /* Update the hashTable and tagTable up to (but not including) ip */
+    ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 1 /* useCache */);
+    {   /* Get the hash for ip, compute the appropriate row */
+        U32 const hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls);
+        U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
+        U32 const tag = hash & ZSTD_ROW_HASH_TAG_MASK;
+        U32* const row = hashTable + relRow;
+        BYTE* tagRow = (BYTE*)(tagTable + relRow);
+        U32 const head = *tagRow & rowMask;
+        U32 matchBuffer[32 /* maximum nb entries per row */];
+        size_t numMatches = 0;
+        size_t currMatch = 0;
+        ZSTD_VecMask matches = ZSTD_row_getMatchMask(tagRow, (BYTE)tag, head, rowEntries);
+
+        /* Cycle through the matches and prefetch */
+        for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) {
+            U32 const matchPos = (head + ZSTD_VecMask_next(matches)) & rowMask;
+            U32 const matchIndex = row[matchPos];
+            assert(numMatches < rowEntries);
+            if (matchIndex < lowLimit)
+                break;
+            if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
+                PREFETCH_L1(base + matchIndex);
+            } else {
+                PREFETCH_L1(dictBase + matchIndex);
+            }
+            matchBuffer[numMatches++] = matchIndex;
+        }
+
+        /* Speed opt: insert current byte into hashtable too. This allows us to avoid one iteration of the loop
+           in ZSTD_row_update_internal() at the next search. */
+        {
+            U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
+            tagRow[pos + ZSTD_ROW_HASH_TAG_OFFSET] = (BYTE)tag;
+            row[pos] = ms->nextToUpdate++;
+        }
+
+        /* Return the longest match */
+        for (; currMatch < numMatches; ++currMatch) {
+            U32 const matchIndex = matchBuffer[currMatch];
+            size_t currentMl=0;
+            assert(matchIndex < curr);
+            assert(matchIndex >= lowLimit);
+
+            if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
+                const BYTE* const match = base + matchIndex;
+                assert(matchIndex >= dictLimit);   /* ensures this is true if dictMode != ZSTD_extDict */
+                if (match[ml] == ip[ml])   /* potentially better */
+                    currentMl = ZSTD_count(ip, match, iLimit);
+            } else {
+                const BYTE* const match = dictBase + matchIndex;
+                assert(match+4 <= dictEnd);
+                if (MEM_read32(match) == MEM_read32(ip))   /* assumption : matchIndex <= dictLimit-4 (by table construction) */
+                    currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4;
+            }
+
+            /* Save best solution */
+            if (currentMl > ml) {
+                ml = currentMl;
+                *offsetPtr = curr - matchIndex + ZSTD_REP_MOVE;
+                if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
+            }
+        }
+    }
+
+    if (dictMode == ZSTD_dedicatedDictSearch) {
+        ml = ZSTD_dedicatedDictSearch_lazy_search(offsetPtr, ml, nbAttempts + ddsExtraAttempts, dms,
+                                                  ip, iLimit, prefixStart, curr, dictLimit, ddsIdx);
+    } else if (dictMode == ZSTD_dictMatchState) {
+        /* TODO: Measure and potentially add prefetching to DMS */
+        const U32 dmsLowestIndex       = dms->window.dictLimit;
+        const BYTE* const dmsBase      = dms->window.base;
+        const BYTE* const dmsEnd       = dms->window.nextSrc;
+        const U32 dmsSize              = (U32)(dmsEnd - dmsBase);
+        const U32 dmsIndexDelta        = dictLimit - dmsSize;
+
+        {   U32 const head = *dmsTagRow & rowMask;
+            U32 matchBuffer[32 /* maximum nb row entries */];
+            size_t numMatches = 0;
+            size_t currMatch = 0;
+            ZSTD_VecMask matches = ZSTD_row_getMatchMask(dmsTagRow, (BYTE)dmsTag, head, rowEntries);
+
+            for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) {
+                U32 const matchPos = (head + ZSTD_VecMask_next(matches)) & rowMask;
+                U32 const matchIndex = dmsRow[matchPos];
+                if (matchIndex < dmsLowestIndex)
+                    break;
+                PREFETCH_L1(dmsBase + matchIndex);
+                matchBuffer[numMatches++] = matchIndex;
+            }
+
+            /* Return the longest match */
+            for (; currMatch < numMatches; ++currMatch) {
+                U32 const matchIndex = matchBuffer[currMatch];
+                size_t currentMl=0;
+                assert(matchIndex >= dmsLowestIndex);
+                assert(matchIndex < curr);
+
+                {   const BYTE* const match = dmsBase + matchIndex;
+                    assert(match+4 <= dmsEnd);
+                    if (MEM_read32(match) == MEM_read32(ip))
+                        currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4;
+                }
+
+                if (currentMl > ml) {
+                    ml = currentMl;
+                    *offsetPtr = curr - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
+                    if (ip+currentMl == iLimit) break;
+                }
+            }
+        }
+    }
+    return ml;
+}
+
+/* Inlining is important to hardwire a hot branch (template emulation) */
+FORCE_INLINE_TEMPLATE size_t ZSTD_RowFindBestMatch_selectMLS (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        const ZSTD_dictMode_e dictMode, size_t* offsetPtr, const U32 rowLog)
+{
+    switch(ms->cParams.minMatch)
+    {
+    default : /* includes case 3 */
+    case 4 : return ZSTD_RowFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, dictMode, rowLog);
+    case 5 : return ZSTD_RowFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, dictMode, rowLog);
+    case 7 :
+    case 6 : return ZSTD_RowFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, dictMode, rowLog);
+    }
+}
+
+FORCE_INLINE_TEMPLATE size_t ZSTD_RowFindBestMatch_selectRowLog (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr)
+{
+    const U32 cappedSearchLog = MIN(ms->cParams.searchLog, 5);
+    switch(cappedSearchLog)
+    {
+    default :
+    case 4 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_noDict, offsetPtr, 4);
+    case 5 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_noDict, offsetPtr, 5);
+    }
+}
+
+FORCE_INLINE_TEMPLATE size_t ZSTD_RowFindBestMatch_dictMatchState_selectRowLog(
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr)
+{
+    const U32 cappedSearchLog = MIN(ms->cParams.searchLog, 5);
+    switch(cappedSearchLog)
+    {
+    default :
+    case 4 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_dictMatchState, offsetPtr, 4);
+    case 5 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_dictMatchState, offsetPtr, 5);
+    }
+}
+
+FORCE_INLINE_TEMPLATE size_t ZSTD_RowFindBestMatch_dedicatedDictSearch_selectRowLog(
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr)
+{
+    const U32 cappedSearchLog = MIN(ms->cParams.searchLog, 5);
+    switch(cappedSearchLog)
+    {
+    default :
+    case 4 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_dedicatedDictSearch, offsetPtr, 4);
+    case 5 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_dedicatedDictSearch, offsetPtr, 5);
+    }
+}
+
+FORCE_INLINE_TEMPLATE size_t ZSTD_RowFindBestMatch_extDict_selectRowLog (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr)
+{
+    const U32 cappedSearchLog = MIN(ms->cParams.searchLog, 5);
+    switch(cappedSearchLog)
+    {
+    default :
+    case 4 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_extDict, offsetPtr, 4);
+    case 5 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_extDict, offsetPtr, 5);
+    }
+}
+
+
+/* *******************************
+*  Common parser - lazy strategy
+*********************************/
+typedef enum { search_hashChain=0, search_binaryTree=1, search_rowHash=2 } searchMethod_e;
+
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_compressBlock_lazy_generic(
+                        ZSTD_matchState_t* ms, seqStore_t* seqStore,
+                        U32 rep[ZSTD_REP_NUM],
+                        const void* src, size_t srcSize,
+                        const searchMethod_e searchMethod, const U32 depth,
+                        ZSTD_dictMode_e const dictMode)
+{
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = searchMethod == search_rowHash ? iend - 8 - ZSTD_ROW_HASH_CACHE_SIZE : iend - 8;
+    const BYTE* const base = ms->window.base;
+    const U32 prefixLowestIndex = ms->window.dictLimit;
+    const BYTE* const prefixLowest = base + prefixLowestIndex;
+    const U32 rowLog = ms->cParams.searchLog < 5 ? 4 : 5;
+
+    typedef size_t (*searchMax_f)(
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
+
+    /**
+     * This table is indexed first by the four ZSTD_dictMode_e values, and then
+     * by the two searchMethod_e values. NULLs are placed for configurations
+     * that should never occur (extDict modes go to the other implementation
+     * below and there is no DDSS for binary tree search yet).
+     */
+    const searchMax_f searchFuncs[4][3] = {
+        {
+            ZSTD_HcFindBestMatch_selectMLS,
+            ZSTD_BtFindBestMatch_selectMLS,
+            ZSTD_RowFindBestMatch_selectRowLog
+        },
+        {
+            NULL,
+            NULL,
+            NULL
+        },
+        {
+            ZSTD_HcFindBestMatch_dictMatchState_selectMLS,
+            ZSTD_BtFindBestMatch_dictMatchState_selectMLS,
+            ZSTD_RowFindBestMatch_dictMatchState_selectRowLog
+        },
+        {
+            ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS,
+            NULL,
+            ZSTD_RowFindBestMatch_dedicatedDictSearch_selectRowLog
+        }
+    };
+
+    searchMax_f const searchMax = searchFuncs[dictMode][(int)searchMethod];
+    U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
+
+    const int isDMS = dictMode == ZSTD_dictMatchState;
+    const int isDDS = dictMode == ZSTD_dedicatedDictSearch;
+    const int isDxS = isDMS || isDDS;
+    const ZSTD_matchState_t* const dms = ms->dictMatchState;
+    const U32 dictLowestIndex      = isDxS ? dms->window.dictLimit : 0;
+    const BYTE* const dictBase     = isDxS ? dms->window.base : NULL;
+    const BYTE* const dictLowest   = isDxS ? dictBase + dictLowestIndex : NULL;
+    const BYTE* const dictEnd      = isDxS ? dms->window.nextSrc : NULL;
+    const U32 dictIndexDelta       = isDxS ?
+                                     prefixLowestIndex - (U32)(dictEnd - dictBase) :
+                                     0;
+    const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));
+
+    assert(searchMax != NULL);
+
+    DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u) (searchFunc=%u)", (U32)dictMode, (U32)searchMethod);
+    ip += (dictAndPrefixLength == 0);
+    if (dictMode == ZSTD_noDict) {
+        U32 const curr = (U32)(ip - base);
+        U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, ms->cParams.windowLog);
+        U32 const maxRep = curr - windowLow;
+        if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
+        if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
+    }
+    if (isDxS) {
+        /* dictMatchState repCode checks don't currently handle repCode == 0
+         * disabling. */
+        assert(offset_1 <= dictAndPrefixLength);
+        assert(offset_2 <= dictAndPrefixLength);
+    }
+
+    if (searchMethod == search_rowHash) {
+        ZSTD_row_fillHashCache(ms, base, rowLog,
+                            MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */),
+                            ms->nextToUpdate, ilimit);
+    }
+
+    /* Match Loop */
+#if defined(__GNUC__) && defined(__x86_64__)
+    /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
+     * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
+     */
+    __asm__(".p2align 5");
+#endif
+    while (ip < ilimit) {
+        size_t matchLength=0;
+        size_t offset=0;
+        const BYTE* start=ip+1;
+
+        /* check repCode */
+        if (isDxS) {
+            const U32 repIndex = (U32)(ip - base) + 1 - offset_1;
+            const BYTE* repMatch = ((dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch)
+                                && repIndex < prefixLowestIndex) ?
+                                   dictBase + (repIndex - dictIndexDelta) :
+                                   base + repIndex;
+            if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
+                && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
+                const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
+                matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
+                if (depth==0) goto _storeSequence;
+            }
+        }
+        if ( dictMode == ZSTD_noDict
+          && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
+            matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
+            if (depth==0) goto _storeSequence;
+        }
+
+        /* first search (depth 0) */
+        {   size_t offsetFound = 999999999;
+            size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
+            if (ml2 > matchLength)
+                matchLength = ml2, start = ip, offset=offsetFound;
+        }
+
+        if (matchLength < 4) {
+            ip += ((ip-anchor) >> kSearchStrength) + 1;   /* jump faster over incompressible sections */
+            continue;
+        }
+
+        /* let's try to find a better solution */
+        if (depth>=1)
+        while (ip<ilimit) {
+            ip ++;
+            if ( (dictMode == ZSTD_noDict)
+              && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
+                size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
+                int const gain2 = (int)(mlRep * 3);
+                int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
+                if ((mlRep >= 4) && (gain2 > gain1))
+                    matchLength = mlRep, offset = 0, start = ip;
+            }
+            if (isDxS) {
+                const U32 repIndex = (U32)(ip - base) - offset_1;
+                const BYTE* repMatch = repIndex < prefixLowestIndex ?
+                               dictBase + (repIndex - dictIndexDelta) :
+                               base + repIndex;
+                if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
+                    && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
+                    const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
+                    size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
+                    int const gain2 = (int)(mlRep * 3);
+                    int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
+                    if ((mlRep >= 4) && (gain2 > gain1))
+                        matchLength = mlRep, offset = 0, start = ip;
+                }
+            }
+            {   size_t offset2=999999999;
+                size_t const ml2 = searchMax(ms, ip, iend, &offset2);
+                int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
+                int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
+                if ((ml2 >= 4) && (gain2 > gain1)) {
+                    matchLength = ml2, offset = offset2, start = ip;
+                    continue;   /* search a better one */
+            }   }
+
+            /* let's find an even better one */
+            if ((depth==2) && (ip<ilimit)) {
+                ip ++;
+                if ( (dictMode == ZSTD_noDict)
+                  && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
+                    size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
+                    int const gain2 = (int)(mlRep * 4);
+                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
+                    if ((mlRep >= 4) && (gain2 > gain1))
+                        matchLength = mlRep, offset = 0, start = ip;
+                }
+                if (isDxS) {
+                    const U32 repIndex = (U32)(ip - base) - offset_1;
+                    const BYTE* repMatch = repIndex < prefixLowestIndex ?
+                                   dictBase + (repIndex - dictIndexDelta) :
+                                   base + repIndex;
+                    if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
+                        && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
+                        const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
+                        size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
+                        int const gain2 = (int)(mlRep * 4);
+                        int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
+                        if ((mlRep >= 4) && (gain2 > gain1))
+                            matchLength = mlRep, offset = 0, start = ip;
+                    }
+                }
+                {   size_t offset2=999999999;
+                    size_t const ml2 = searchMax(ms, ip, iend, &offset2);
+                    int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
+                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
+                    if ((ml2 >= 4) && (gain2 > gain1)) {
+                        matchLength = ml2, offset = offset2, start = ip;
+                        continue;
+            }   }   }
+            break;  /* nothing found : store previous solution */
+        }
+
+        /* NOTE:
+         * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior.
+         * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which
+         * overflows the pointer, which is undefined behavior.
+         */
+        /* catch up */
+        if (offset) {
+            if (dictMode == ZSTD_noDict) {
+                while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest))
+                     && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) )  /* only search for offset within prefix */
+                    { start--; matchLength++; }
+            }
+            if (isDxS) {
+                U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
+                const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
+                const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
+                while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }  /* catch up */
+            }
+            offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
+        }
+        /* store sequence */
+_storeSequence:
+        {   size_t const litLength = start - anchor;
+            ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
+            anchor = ip = start + matchLength;
+        }
+
+        /* check immediate repcode */
+        if (isDxS) {
+            while (ip <= ilimit) {
+                U32 const current2 = (U32)(ip-base);
+                U32 const repIndex = current2 - offset_2;
+                const BYTE* repMatch = repIndex < prefixLowestIndex ?
+                        dictBase - dictIndexDelta + repIndex :
+                        base + repIndex;
+                if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */)
+                   && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
+                    const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
+                    matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
+                    offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;   /* swap offset_2 <=> offset_1 */
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
+                    ip += matchLength;
+                    anchor = ip;
+                    continue;
+                }
+                break;
+            }
+        }
+
+        if (dictMode == ZSTD_noDict) {
+            while ( ((ip <= ilimit) & (offset_2>0))
+                 && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
+                /* store sequence */
+                matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
+                offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
+                ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
+                ip += matchLength;
+                anchor = ip;
+                continue;   /* faster when present ... (?) */
+    }   }   }
+
+    /* Save reps for next block */
+    rep[0] = offset_1 ? offset_1 : savedOffset;
+    rep[1] = offset_2 ? offset_2 : savedOffset;
+
+    /* Return the last literals size */
+    return (size_t)(iend - anchor);
+}
+
+
+size_t ZSTD_compressBlock_btlazy2(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_lazy2(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_lazy(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_greedy(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_btlazy2_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
+}
+
+size_t ZSTD_compressBlock_lazy2_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
+}
+
+size_t ZSTD_compressBlock_lazy_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
+}
+
+size_t ZSTD_compressBlock_greedy_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
+}
+
+
+size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch);
+}
+
+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch);
+}
+
+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch);
+}
+
+/* Row-based matchfinder */
+size_t ZSTD_compressBlock_lazy2_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_lazy_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_greedy_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dictMatchState);
+}
+
+size_t ZSTD_compressBlock_lazy_dictMatchState_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dictMatchState);
+}
+
+size_t ZSTD_compressBlock_greedy_dictMatchState_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dictMatchState);
+}
+
+
+size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dedicatedDictSearch);
+}
+
+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dedicatedDictSearch);
+}
+
+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dedicatedDictSearch);
+}
+
+FORCE_INLINE_TEMPLATE
+size_t ZSTD_compressBlock_lazy_extDict_generic(
+                        ZSTD_matchState_t* ms, seqStore_t* seqStore,
+                        U32 rep[ZSTD_REP_NUM],
+                        const void* src, size_t srcSize,
+                        const searchMethod_e searchMethod, const U32 depth)
+{
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = searchMethod == search_rowHash ? iend - 8 - ZSTD_ROW_HASH_CACHE_SIZE : iend - 8;
+    const BYTE* const base = ms->window.base;
+    const U32 dictLimit = ms->window.dictLimit;
+    const BYTE* const prefixStart = base + dictLimit;
+    const BYTE* const dictBase = ms->window.dictBase;
+    const BYTE* const dictEnd  = dictBase + dictLimit;
+    const BYTE* const dictStart  = dictBase + ms->window.lowLimit;
+    const U32 windowLog = ms->cParams.windowLog;
+    const U32 rowLog = ms->cParams.searchLog < 5 ? 4 : 5;
+
+    typedef size_t (*searchMax_f)(
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
+    const searchMax_f searchFuncs[3] = {
+        ZSTD_HcFindBestMatch_extDict_selectMLS,
+        ZSTD_BtFindBestMatch_extDict_selectMLS,
+        ZSTD_RowFindBestMatch_extDict_selectRowLog
+    };
+    searchMax_f searchMax = searchFuncs[(int)searchMethod];
+    U32 offset_1 = rep[0], offset_2 = rep[1];
+
+    DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic (searchFunc=%u)", (U32)searchMethod);
+
+    /* init */
+    ip += (ip == prefixStart);
+    if (searchMethod == search_rowHash) {
+        ZSTD_row_fillHashCache(ms, base, rowLog,
+                               MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */),
+                               ms->nextToUpdate, ilimit);
+    }
+
+    /* Match Loop */
+#if defined(__GNUC__) && defined(__x86_64__)
+    /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
+     * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
+     */
+    __asm__(".p2align 5");
+#endif
+    while (ip < ilimit) {
+        size_t matchLength=0;
+        size_t offset=0;
+        const BYTE* start=ip+1;
+        U32 curr = (U32)(ip-base);
+
+        /* check repCode */
+        {   const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr+1, windowLog);
+            const U32 repIndex = (U32)(curr+1 - offset_1);
+            const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
+            const BYTE* const repMatch = repBase + repIndex;
+            if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */
+               & (offset_1 < curr+1 - windowLow) ) /* note: we are searching at curr+1 */
+            if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
+                /* repcode detected we should take it */
+                const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
+                matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4;
+                if (depth==0) goto _storeSequence;
+        }   }
+
+        /* first search (depth 0) */
+        {   size_t offsetFound = 999999999;
+            size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
+            if (ml2 > matchLength)
+                matchLength = ml2, start = ip, offset=offsetFound;
+        }
+
+         if (matchLength < 4) {
+            ip += ((ip-anchor) >> kSearchStrength) + 1;   /* jump faster over incompressible sections */
+            continue;
+        }
+
+        /* let's try to find a better solution */
+        if (depth>=1)
+        while (ip<ilimit) {
+            ip ++;
+            curr++;
+            /* check repCode */
+            if (offset) {
+                const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
+                const U32 repIndex = (U32)(curr - offset_1);
+                const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
+                const BYTE* const repMatch = repBase + repIndex;
+                if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments  */
+                   & (offset_1 < curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
+                if (MEM_read32(ip) == MEM_read32(repMatch)) {
+                    /* repcode detected */
+                    const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
+                    size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
+                    int const gain2 = (int)(repLength * 3);
+                    int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
+                    if ((repLength >= 4) && (gain2 > gain1))
+                        matchLength = repLength, offset = 0, start = ip;
+            }   }
+
+            /* search match, depth 1 */
+            {   size_t offset2=999999999;
+                size_t const ml2 = searchMax(ms, ip, iend, &offset2);
+                int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
+                int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
+                if ((ml2 >= 4) && (gain2 > gain1)) {
+                    matchLength = ml2, offset = offset2, start = ip;
+                    continue;   /* search a better one */
+            }   }
+
+            /* let's find an even better one */
+            if ((depth==2) && (ip<ilimit)) {
+                ip ++;
+                curr++;
+                /* check repCode */
+                if (offset) {
+                    const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
+                    const U32 repIndex = (U32)(curr - offset_1);
+                    const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
+                    const BYTE* const repMatch = repBase + repIndex;
+                    if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments  */
+                       & (offset_1 < curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
+                    if (MEM_read32(ip) == MEM_read32(repMatch)) {
+                        /* repcode detected */
+                        const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
+                        size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
+                        int const gain2 = (int)(repLength * 4);
+                        int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
+                        if ((repLength >= 4) && (gain2 > gain1))
+                            matchLength = repLength, offset = 0, start = ip;
+                }   }
+
+                /* search match, depth 2 */
+                {   size_t offset2=999999999;
+                    size_t const ml2 = searchMax(ms, ip, iend, &offset2);
+                    int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
+                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
+                    if ((ml2 >= 4) && (gain2 > gain1)) {
+                        matchLength = ml2, offset = offset2, start = ip;
+                        continue;
+            }   }   }
+            break;  /* nothing found : store previous solution */
+        }
+
+        /* catch up */
+        if (offset) {
+            U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
+            const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
+            const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
+            while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }  /* catch up */
+            offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
+        }
+
+        /* store sequence */
+_storeSequence:
+        {   size_t const litLength = start - anchor;
+            ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
+            anchor = ip = start + matchLength;
+        }
+
+        /* check immediate repcode */
+        while (ip <= ilimit) {
+            const U32 repCurrent = (U32)(ip-base);
+            const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog);
+            const U32 repIndex = repCurrent - offset_2;
+            const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
+            const BYTE* const repMatch = repBase + repIndex;
+            if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments  */
+               & (offset_2 < repCurrent - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
+            if (MEM_read32(ip) == MEM_read32(repMatch)) {
+                /* repcode detected we should take it */
+                const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
+                matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
+                offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;   /* swap offset history */
+                ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
+                ip += matchLength;
+                anchor = ip;
+                continue;   /* faster when present ... (?) */
+            }
+            break;
+    }   }
+
+    /* Save reps for next block */
+    rep[0] = offset_1;
+    rep[1] = offset_2;
+
+    /* Return the last literals size */
+    return (size_t)(iend - anchor);
+}
+
+
+size_t ZSTD_compressBlock_greedy_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0);
+}
+
+size_t ZSTD_compressBlock_lazy_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+
+{
+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
+}
+
+size_t ZSTD_compressBlock_lazy2_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+
+{
+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
+}
+
+size_t ZSTD_compressBlock_btlazy2_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+
+{
+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
+}
+
+size_t ZSTD_compressBlock_greedy_extDict_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0);
+}
+
+size_t ZSTD_compressBlock_lazy_extDict_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+
+{
+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1);
+}
+
+size_t ZSTD_compressBlock_lazy2_extDict_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+
+{
+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2);
+}
+/**** ended inlining compress/zstd_lazy.c ****/
+/**** start inlining compress/zstd_ldm.c ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/**** skipping file: zstd_ldm.h ****/
+
+/**** skipping file: ../common/debug.h ****/
+/**** skipping file: ../common/xxhash.h ****/
+/**** skipping file: zstd_fast.h ****/
+/**** skipping file: zstd_double_fast.h ****/
+/**** start inlining zstd_ldm_geartab.h ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_LDM_GEARTAB_H
+#define ZSTD_LDM_GEARTAB_H
+
+static U64 ZSTD_ldm_gearTab[256] = {
+    0xf5b8f72c5f77775c, 0x84935f266b7ac412, 0xb647ada9ca730ccc,
+    0xb065bb4b114fb1de, 0x34584e7e8c3a9fd0, 0x4e97e17c6ae26b05,
+    0x3a03d743bc99a604, 0xcecd042422c4044f, 0x76de76c58524259e,
+    0x9c8528f65badeaca, 0x86563706e2097529, 0x2902475fa375d889,
+    0xafb32a9739a5ebe6, 0xce2714da3883e639, 0x21eaf821722e69e,
+    0x37b628620b628,    0x49a8d455d88caf5,  0x8556d711e6958140,
+    0x4f7ae74fc605c1f,  0x829f0c3468bd3a20, 0x4ffdc885c625179e,
+    0x8473de048a3daf1b, 0x51008822b05646b2, 0x69d75d12b2d1cc5f,
+    0x8c9d4a19159154bc, 0xc3cc10f4abbd4003, 0xd06ddc1cecb97391,
+    0xbe48e6e7ed80302e, 0x3481db31cee03547, 0xacc3f67cdaa1d210,
+    0x65cb771d8c7f96cc, 0x8eb27177055723dd, 0xc789950d44cd94be,
+    0x934feadc3700b12b, 0x5e485f11edbdf182, 0x1e2e2a46fd64767a,
+    0x2969ca71d82efa7c, 0x9d46e9935ebbba2e, 0xe056b67e05e6822b,
+    0x94d73f55739d03a0, 0xcd7010bdb69b5a03, 0x455ef9fcd79b82f4,
+    0x869cb54a8749c161, 0x38d1a4fa6185d225, 0xb475166f94bbe9bb,
+    0xa4143548720959f1, 0x7aed4780ba6b26ba, 0xd0ce264439e02312,
+    0x84366d746078d508, 0xa8ce973c72ed17be, 0x21c323a29a430b01,
+    0x9962d617e3af80ee, 0xab0ce91d9c8cf75b, 0x530e8ee6d19a4dbc,
+    0x2ef68c0cf53f5d72, 0xc03a681640a85506, 0x496e4e9f9c310967,
+    0x78580472b59b14a0, 0x273824c23b388577, 0x66bf923ad45cb553,
+    0x47ae1a5a2492ba86, 0x35e304569e229659, 0x4765182a46870b6f,
+    0x6cbab625e9099412, 0xddac9a2e598522c1, 0x7172086e666624f2,
+    0xdf5003ca503b7837, 0x88c0c1db78563d09, 0x58d51865acfc289d,
+    0x177671aec65224f1, 0xfb79d8a241e967d7, 0x2be1e101cad9a49a,
+    0x6625682f6e29186b, 0x399553457ac06e50, 0x35dffb4c23abb74,
+    0x429db2591f54aade, 0xc52802a8037d1009, 0x6acb27381f0b25f3,
+    0xf45e2551ee4f823b, 0x8b0ea2d99580c2f7, 0x3bed519cbcb4e1e1,
+    0xff452823dbb010a,  0x9d42ed614f3dd267, 0x5b9313c06257c57b,
+    0xa114b8008b5e1442, 0xc1fe311c11c13d4b, 0x66e8763ea34c5568,
+    0x8b982af1c262f05d, 0xee8876faaa75fbb7, 0x8a62a4d0d172bb2a,
+    0xc13d94a3b7449a97, 0x6dbbba9dc15d037c, 0xc786101f1d92e0f1,
+    0xd78681a907a0b79b, 0xf61aaf2962c9abb9, 0x2cfd16fcd3cb7ad9,
+    0x868c5b6744624d21, 0x25e650899c74ddd7, 0xba042af4a7c37463,
+    0x4eb1a539465a3eca, 0xbe09dbf03b05d5ca, 0x774e5a362b5472ba,
+    0x47a1221229d183cd, 0x504b0ca18ef5a2df, 0xdffbdfbde2456eb9,
+    0x46cd2b2fbee34634, 0xf2aef8fe819d98c3, 0x357f5276d4599d61,
+    0x24a5483879c453e3, 0x88026889192b4b9,  0x28da96671782dbec,
+    0x4ef37c40588e9aaa, 0x8837b90651bc9fb3, 0xc164f741d3f0e5d6,
+    0xbc135a0a704b70ba, 0x69cd868f7622ada,  0xbc37ba89e0b9c0ab,
+    0x47c14a01323552f6, 0x4f00794bacee98bb, 0x7107de7d637a69d5,
+    0x88af793bb6f2255e, 0xf3c6466b8799b598, 0xc288c616aa7f3b59,
+    0x81ca63cf42fca3fd, 0x88d85ace36a2674b, 0xd056bd3792389e7,
+    0xe55c396c4e9dd32d, 0xbefb504571e6c0a6, 0x96ab32115e91e8cc,
+    0xbf8acb18de8f38d1, 0x66dae58801672606, 0x833b6017872317fb,
+    0xb87c16f2d1c92864, 0xdb766a74e58b669c, 0x89659f85c61417be,
+    0xc8daad856011ea0c, 0x76a4b565b6fe7eae, 0xa469d085f6237312,
+    0xaaf0365683a3e96c, 0x4dbb746f8424f7b8, 0x638755af4e4acc1,
+    0x3d7807f5bde64486, 0x17be6d8f5bbb7639, 0x903f0cd44dc35dc,
+    0x67b672eafdf1196c, 0xa676ff93ed4c82f1, 0x521d1004c5053d9d,
+    0x37ba9ad09ccc9202, 0x84e54d297aacfb51, 0xa0b4b776a143445,
+    0x820d471e20b348e,  0x1874383cb83d46dc, 0x97edeec7a1efe11c,
+    0xb330e50b1bdc42aa, 0x1dd91955ce70e032, 0xa514cdb88f2939d5,
+    0x2791233fd90db9d3, 0x7b670a4cc50f7a9b, 0x77c07d2a05c6dfa5,
+    0xe3778b6646d0a6fa, 0xb39c8eda47b56749, 0x933ed448addbef28,
+    0xaf846af6ab7d0bf4, 0xe5af208eb666e49,  0x5e6622f73534cd6a,
+    0x297daeca42ef5b6e, 0x862daef3d35539a6, 0xe68722498f8e1ea9,
+    0x981c53093dc0d572, 0xfa09b0bfbf86fbf5, 0x30b1e96166219f15,
+    0x70e7d466bdc4fb83, 0x5a66736e35f2a8e9, 0xcddb59d2b7c1baef,
+    0xd6c7d247d26d8996, 0xea4e39eac8de1ba3, 0x539c8bb19fa3aff2,
+    0x9f90e4c5fd508d8,  0xa34e5956fbaf3385, 0x2e2f8e151d3ef375,
+    0x173691e9b83faec1, 0xb85a8d56bf016379, 0x8382381267408ae3,
+    0xb90f901bbdc0096d, 0x7c6ad32933bcec65, 0x76bb5e2f2c8ad595,
+    0x390f851a6cf46d28, 0xc3e6064da1c2da72, 0xc52a0c101cfa5389,
+    0xd78eaf84a3fbc530, 0x3781b9e2288b997e, 0x73c2f6dea83d05c4,
+    0x4228e364c5b5ed7,  0x9d7a3edf0da43911, 0x8edcfeda24686756,
+    0x5e7667a7b7a9b3a1, 0x4c4f389fa143791d, 0xb08bc1023da7cddc,
+    0x7ab4be3ae529b1cc, 0x754e6132dbe74ff9, 0x71635442a839df45,
+    0x2f6fb1643fbe52de, 0x961e0a42cf7a8177, 0xf3b45d83d89ef2ea,
+    0xee3de4cf4a6e3e9b, 0xcd6848542c3295e7, 0xe4cee1664c78662f,
+    0x9947548b474c68c4, 0x25d73777a5ed8b0b, 0xc915b1d636b7fc,
+    0x21c2ba75d9b0d2da, 0x5f6b5dcf608a64a1, 0xdcf333255ff9570c,
+    0x633b922418ced4ee, 0xc136dde0b004b34a, 0x58cc83b05d4b2f5a,
+    0x5eb424dda28e42d2, 0x62df47369739cd98, 0xb4e0b42485e4ce17,
+    0x16e1f0c1f9a8d1e7, 0x8ec3916707560ebf, 0x62ba6e2df2cc9db3,
+    0xcbf9f4ff77d83a16, 0x78d9d7d07d2bbcc4, 0xef554ce1e02c41f4,
+    0x8d7581127eccf94d, 0xa9b53336cb3c8a05, 0x38c42c0bf45c4f91,
+    0x640893cdf4488863, 0x80ec34bc575ea568, 0x39f324f5b48eaa40,
+    0xe9d9ed1f8eff527f, 0x9224fc058cc5a214, 0xbaba00b04cfe7741,
+    0x309a9f120fcf52af, 0xa558f3ec65626212, 0x424bec8b7adabe2f,
+    0x41622513a6aea433, 0xb88da2d5324ca798, 0xd287733b245528a4,
+    0x9a44697e6d68aec3, 0x7b1093be2f49bb28, 0x50bbec632e3d8aad,
+    0x6cd90723e1ea8283, 0x897b9e7431b02bf3, 0x219efdcb338a7047,
+    0x3b0311f0a27c0656, 0xdb17bf91c0db96e7, 0x8cd4fd6b4e85a5b2,
+    0xfab071054ba6409d, 0x40d6fe831fa9dfd9, 0xaf358debad7d791e,
+    0xeb8d0e25a65e3e58, 0xbbcbd3df14e08580, 0xcf751f27ecdab2b,
+    0x2b4da14f2613d8f4
+};
+
+#endif /* ZSTD_LDM_GEARTAB_H */
+/**** ended inlining zstd_ldm_geartab.h ****/
+
+#define LDM_BUCKET_SIZE_LOG 3
+#define LDM_MIN_MATCH_LENGTH 64
+#define LDM_HASH_RLOG 7
+
+typedef struct {
+    U64 rolling;
+    U64 stopMask;
+} ldmRollingHashState_t;
+
+/** ZSTD_ldm_gear_init():
+ *
+ * Initializes the rolling hash state such that it will honor the
+ * settings in params. */
+static void ZSTD_ldm_gear_init(ldmRollingHashState_t* state, ldmParams_t const* params)
+{
+    unsigned maxBitsInMask = MIN(params->minMatchLength, 64);
+    unsigned hashRateLog = params->hashRateLog;
+
+    state->rolling = ~(U32)0;
+
+    /* The choice of the splitting criterion is subject to two conditions:
+     *   1. it has to trigger on average every 2^(hashRateLog) bytes;
+     *   2. ideally, it has to depend on a window of minMatchLength bytes.
+     *
+     * In the gear hash algorithm, bit n depends on the last n bytes;
+     * so in order to obtain a good quality splitting criterion it is
+     * preferable to use bits with high weight.
+     *
+     * To match condition 1 we use a mask with hashRateLog bits set
+     * and, because of the previous remark, we make sure these bits
+     * have the highest possible weight while still respecting
+     * condition 2.
+     */
+    if (hashRateLog > 0 && hashRateLog <= maxBitsInMask) {
+        state->stopMask = (((U64)1 << hashRateLog) - 1) << (maxBitsInMask - hashRateLog);
+    } else {
+        /* In this degenerate case we simply honor the hash rate. */
+        state->stopMask = ((U64)1 << hashRateLog) - 1;
+    }
+}
+
+/** ZSTD_ldm_gear_reset()
+ * Feeds [data, data + minMatchLength) into the hash without registering any
+ * splits. This effectively resets the hash state. This is used when skipping
+ * over data, either at the beginning of a block, or skipping sections.
+ */
+static void ZSTD_ldm_gear_reset(ldmRollingHashState_t* state,
+                                BYTE const* data, size_t minMatchLength)
+{
+    U64 hash = state->rolling;
+    size_t n = 0;
+
+#define GEAR_ITER_ONCE() do {                                  \
+        hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \
+        n += 1;                                                \
+    } while (0)
+    while (n + 3 < minMatchLength) {
+        GEAR_ITER_ONCE();
+        GEAR_ITER_ONCE();
+        GEAR_ITER_ONCE();
+        GEAR_ITER_ONCE();
+    }
+    while (n < minMatchLength) {
+        GEAR_ITER_ONCE();
+    }
+#undef GEAR_ITER_ONCE
+}
+
+/** ZSTD_ldm_gear_feed():
+ *
+ * Registers in the splits array all the split points found in the first
+ * size bytes following the data pointer. This function terminates when
+ * either all the data has been processed or LDM_BATCH_SIZE splits are
+ * present in the splits array.
+ *
+ * Precondition: The splits array must not be full.
+ * Returns: The number of bytes processed. */
+static size_t ZSTD_ldm_gear_feed(ldmRollingHashState_t* state,
+                                 BYTE const* data, size_t size,
+                                 size_t* splits, unsigned* numSplits)
+{
+    size_t n;
+    U64 hash, mask;
+
+    hash = state->rolling;
+    mask = state->stopMask;
+    n = 0;
+
+#define GEAR_ITER_ONCE() do { \
+        hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \
+        n += 1; \
+        if (UNLIKELY((hash & mask) == 0)) { \
+            splits[*numSplits] = n; \
+            *numSplits += 1; \
+            if (*numSplits == LDM_BATCH_SIZE) \
+                goto done; \
+        } \
+    } while (0)
+
+    while (n + 3 < size) {
+        GEAR_ITER_ONCE();
+        GEAR_ITER_ONCE();
+        GEAR_ITER_ONCE();
+        GEAR_ITER_ONCE();
+    }
+    while (n < size) {
+        GEAR_ITER_ONCE();
+    }
+
+#undef GEAR_ITER_ONCE
+
+done:
+    state->rolling = hash;
+    return n;
+}
+
+void ZSTD_ldm_adjustParameters(ldmParams_t* params,
+                               ZSTD_compressionParameters const* cParams)
+{
+    params->windowLog = cParams->windowLog;
+    ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX);
+    DEBUGLOG(4, "ZSTD_ldm_adjustParameters");
+    if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
+    if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH;
+    if (params->hashLog == 0) {
+        params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG);
+        assert(params->hashLog <= ZSTD_HASHLOG_MAX);
+    }
+    if (params->hashRateLog == 0) {
+        params->hashRateLog = params->windowLog < params->hashLog
+                                   ? 0
+                                   : params->windowLog - params->hashLog;
+    }
+    params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog);
+}
+
+size_t ZSTD_ldm_getTableSize(ldmParams_t params)
+{
+    size_t const ldmHSize = ((size_t)1) << params.hashLog;
+    size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog);
+    size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
+    size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize)
+                           + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t));
+    return params.enableLdm ? totalSize : 0;
+}
+
+size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize)
+{
+    return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0;
+}
+
+/** ZSTD_ldm_getBucket() :
+ *  Returns a pointer to the start of the bucket associated with hash. */
+static ldmEntry_t* ZSTD_ldm_getBucket(
+        ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams)
+{
+    return ldmState->hashTable + (hash << ldmParams.bucketSizeLog);
+}
+
+/** ZSTD_ldm_insertEntry() :
+ *  Insert the entry with corresponding hash into the hash table */
+static void ZSTD_ldm_insertEntry(ldmState_t* ldmState,
+                                 size_t const hash, const ldmEntry_t entry,
+                                 ldmParams_t const ldmParams)
+{
+    BYTE* const pOffset = ldmState->bucketOffsets + hash;
+    unsigned const offset = *pOffset;
+
+    *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + offset) = entry;
+    *pOffset = (BYTE)((offset + 1) & ((1u << ldmParams.bucketSizeLog) - 1));
+
+}
+
+/** ZSTD_ldm_countBackwardsMatch() :
+ *  Returns the number of bytes that match backwards before pIn and pMatch.
+ *
+ *  We count only bytes where pMatch >= pBase and pIn >= pAnchor. */
+static size_t ZSTD_ldm_countBackwardsMatch(
+            const BYTE* pIn, const BYTE* pAnchor,
+            const BYTE* pMatch, const BYTE* pMatchBase)
+{
+    size_t matchLength = 0;
+    while (pIn > pAnchor && pMatch > pMatchBase && pIn[-1] == pMatch[-1]) {
+        pIn--;
+        pMatch--;
+        matchLength++;
+    }
+    return matchLength;
+}
+
+/** ZSTD_ldm_countBackwardsMatch_2segments() :
+ *  Returns the number of bytes that match backwards from pMatch,
+ *  even with the backwards match spanning 2 different segments.
+ *
+ *  On reaching `pMatchBase`, start counting from mEnd */
+static size_t ZSTD_ldm_countBackwardsMatch_2segments(
+                    const BYTE* pIn, const BYTE* pAnchor,
+                    const BYTE* pMatch, const BYTE* pMatchBase,
+                    const BYTE* pExtDictStart, const BYTE* pExtDictEnd)
+{
+    size_t matchLength = ZSTD_ldm_countBackwardsMatch(pIn, pAnchor, pMatch, pMatchBase);
+    if (pMatch - matchLength != pMatchBase || pMatchBase == pExtDictStart) {
+        /* If backwards match is entirely in the extDict or prefix, immediately return */
+        return matchLength;
+    }
+    DEBUGLOG(7, "ZSTD_ldm_countBackwardsMatch_2segments: found 2-parts backwards match (length in prefix==%zu)", matchLength);
+    matchLength += ZSTD_ldm_countBackwardsMatch(pIn - matchLength, pAnchor, pExtDictEnd, pExtDictStart);
+    DEBUGLOG(7, "final backwards match length = %zu", matchLength);
+    return matchLength;
+}
+
+/** ZSTD_ldm_fillFastTables() :
+ *
+ *  Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies.
+ *  This is similar to ZSTD_loadDictionaryContent.
+ *
+ *  The tables for the other strategies are filled within their
+ *  block compressors. */
+static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
+                                      void const* end)
+{
+    const BYTE* const iend = (const BYTE*)end;
+
+    switch(ms->cParams.strategy)
+    {
+    case ZSTD_fast:
+        ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast);
+        break;
+
+    case ZSTD_dfast:
+        ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast);
+        break;
+
+    case ZSTD_greedy:
+    case ZSTD_lazy:
+    case ZSTD_lazy2:
+    case ZSTD_btlazy2:
+    case ZSTD_btopt:
+    case ZSTD_btultra:
+    case ZSTD_btultra2:
+        break;
+    default:
+        assert(0);  /* not possible : not a valid strategy id */
+    }
+
+    return 0;
+}
+
+void ZSTD_ldm_fillHashTable(
+            ldmState_t* ldmState, const BYTE* ip,
+            const BYTE* iend, ldmParams_t const* params)
+{
+    U32 const minMatchLength = params->minMatchLength;
+    U32 const hBits = params->hashLog - params->bucketSizeLog;
+    BYTE const* const base = ldmState->window.base;
+    BYTE const* const istart = ip;
+    ldmRollingHashState_t hashState;
+    size_t* const splits = ldmState->splitIndices;
+    unsigned numSplits;
+
+    DEBUGLOG(5, "ZSTD_ldm_fillHashTable");
+
+    ZSTD_ldm_gear_init(&hashState, params);
+    while (ip < iend) {
+        size_t hashed;
+        unsigned n;
+
+        numSplits = 0;
+        hashed = ZSTD_ldm_gear_feed(&hashState, ip, iend - ip, splits, &numSplits);
+
+        for (n = 0; n < numSplits; n++) {
+            if (ip + splits[n] >= istart + minMatchLength) {
+                BYTE const* const split = ip + splits[n] - minMatchLength;
+                U64 const xxhash = XXH64(split, minMatchLength, 0);
+                U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1));
+                ldmEntry_t entry;
+
+                entry.offset = (U32)(split - base);
+                entry.checksum = (U32)(xxhash >> 32);
+                ZSTD_ldm_insertEntry(ldmState, hash, entry, *params);
+            }
+        }
+
+        ip += hashed;
+    }
+}
+
+
+/** ZSTD_ldm_limitTableUpdate() :
+ *
+ *  Sets cctx->nextToUpdate to a position corresponding closer to anchor
+ *  if it is far way
+ *  (after a long match, only update tables a limited amount). */
+static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor)
+{
+    U32 const curr = (U32)(anchor - ms->window.base);
+    if (curr > ms->nextToUpdate + 1024) {
+        ms->nextToUpdate =
+            curr - MIN(512, curr - ms->nextToUpdate - 1024);
+    }
+}
+
+static size_t ZSTD_ldm_generateSequences_internal(
+        ldmState_t* ldmState, rawSeqStore_t* rawSeqStore,
+        ldmParams_t const* params, void const* src, size_t srcSize)
+{
+    /* LDM parameters */
+    int const extDict = ZSTD_window_hasExtDict(ldmState->window);
+    U32 const minMatchLength = params->minMatchLength;
+    U32 const entsPerBucket = 1U << params->bucketSizeLog;
+    U32 const hBits = params->hashLog - params->bucketSizeLog;
+    /* Prefix and extDict parameters */
+    U32 const dictLimit = ldmState->window.dictLimit;
+    U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit;
+    BYTE const* const base = ldmState->window.base;
+    BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL;
+    BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL;
+    BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL;
+    BYTE const* const lowPrefixPtr = base + dictLimit;
+    /* Input bounds */
+    BYTE const* const istart = (BYTE const*)src;
+    BYTE const* const iend = istart + srcSize;
+    BYTE const* const ilimit = iend - HASH_READ_SIZE;
+    /* Input positions */
+    BYTE const* anchor = istart;
+    BYTE const* ip = istart;
+    /* Rolling hash state */
+    ldmRollingHashState_t hashState;
+    /* Arrays for staged-processing */
+    size_t* const splits = ldmState->splitIndices;
+    ldmMatchCandidate_t* const candidates = ldmState->matchCandidates;
+    unsigned numSplits;
+
+    if (srcSize < minMatchLength)
+        return iend - anchor;
+
+    /* Initialize the rolling hash state with the first minMatchLength bytes */
+    ZSTD_ldm_gear_init(&hashState, params);
+    ZSTD_ldm_gear_reset(&hashState, ip, minMatchLength);
+    ip += minMatchLength;
+
+    while (ip < ilimit) {
+        size_t hashed;
+        unsigned n;
+
+        numSplits = 0;
+        hashed = ZSTD_ldm_gear_feed(&hashState, ip, ilimit - ip,
+                                    splits, &numSplits);
+
+        for (n = 0; n < numSplits; n++) {
+            BYTE const* const split = ip + splits[n] - minMatchLength;
+            U64 const xxhash = XXH64(split, minMatchLength, 0);
+            U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1));
+
+            candidates[n].split = split;
+            candidates[n].hash = hash;
+            candidates[n].checksum = (U32)(xxhash >> 32);
+            candidates[n].bucket = ZSTD_ldm_getBucket(ldmState, hash, *params);
+            PREFETCH_L1(candidates[n].bucket);
+        }
+
+        for (n = 0; n < numSplits; n++) {
+            size_t forwardMatchLength = 0, backwardMatchLength = 0,
+                   bestMatchLength = 0, mLength;
+            U32 offset;
+            BYTE const* const split = candidates[n].split;
+            U32 const checksum = candidates[n].checksum;
+            U32 const hash = candidates[n].hash;
+            ldmEntry_t* const bucket = candidates[n].bucket;
+            ldmEntry_t const* cur;
+            ldmEntry_t const* bestEntry = NULL;
+            ldmEntry_t newEntry;
+
+            newEntry.offset = (U32)(split - base);
+            newEntry.checksum = checksum;
+
+            /* If a split point would generate a sequence overlapping with
+             * the previous one, we merely register it in the hash table and
+             * move on */
+            if (split < anchor) {
+                ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
+                continue;
+            }
+
+            for (cur = bucket; cur < bucket + entsPerBucket; cur++) {
+                size_t curForwardMatchLength, curBackwardMatchLength,
+                       curTotalMatchLength;
+                if (cur->checksum != checksum || cur->offset <= lowestIndex) {
+                    continue;
+                }
+                if (extDict) {
+                    BYTE const* const curMatchBase =
+                        cur->offset < dictLimit ? dictBase : base;
+                    BYTE const* const pMatch = curMatchBase + cur->offset;
+                    BYTE const* const matchEnd =
+                        cur->offset < dictLimit ? dictEnd : iend;
+                    BYTE const* const lowMatchPtr =
+                        cur->offset < dictLimit ? dictStart : lowPrefixPtr;
+                    curForwardMatchLength =
+                        ZSTD_count_2segments(split, pMatch, iend, matchEnd, lowPrefixPtr);
+                    if (curForwardMatchLength < minMatchLength) {
+                        continue;
+                    }
+                    curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch_2segments(
+                            split, anchor, pMatch, lowMatchPtr, dictStart, dictEnd);
+                } else { /* !extDict */
+                    BYTE const* const pMatch = base + cur->offset;
+                    curForwardMatchLength = ZSTD_count(split, pMatch, iend);
+                    if (curForwardMatchLength < minMatchLength) {
+                        continue;
+                    }
+                    curBackwardMatchLength =
+                        ZSTD_ldm_countBackwardsMatch(split, anchor, pMatch, lowPrefixPtr);
+                }
+                curTotalMatchLength = curForwardMatchLength + curBackwardMatchLength;
+
+                if (curTotalMatchLength > bestMatchLength) {
+                    bestMatchLength = curTotalMatchLength;
+                    forwardMatchLength = curForwardMatchLength;
+                    backwardMatchLength = curBackwardMatchLength;
+                    bestEntry = cur;
+                }
+            }
+
+            /* No match found -- insert an entry into the hash table
+             * and process the next candidate match */
+            if (bestEntry == NULL) {
+                ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
+                continue;
+            }
+
+            /* Match found */
+            offset = (U32)(split - base) - bestEntry->offset;
+            mLength = forwardMatchLength + backwardMatchLength;
+            {
+                rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
+
+                /* Out of sequence storage */
+                if (rawSeqStore->size == rawSeqStore->capacity)
+                    return ERROR(dstSize_tooSmall);
+                seq->litLength = (U32)(split - backwardMatchLength - anchor);
+                seq->matchLength = (U32)mLength;
+                seq->offset = offset;
+                rawSeqStore->size++;
+            }
+
+            /* Insert the current entry into the hash table --- it must be
+             * done after the previous block to avoid clobbering bestEntry */
+            ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
+
+            anchor = split + forwardMatchLength;
+
+            /* If we find a match that ends after the data that we've hashed
+             * then we have a repeating, overlapping, pattern. E.g. all zeros.
+             * If one repetition of the pattern matches our `stopMask` then all
+             * repetitions will. We don't need to insert them all into out table,
+             * only the first one. So skip over overlapping matches.
+             * This is a major speed boost (20x) for compressing a single byte
+             * repeated, when that byte ends up in the table.
+             */
+            if (anchor > ip + hashed) {
+                ZSTD_ldm_gear_reset(&hashState, anchor - minMatchLength, minMatchLength);
+                /* Continue the outter loop at anchor (ip + hashed == anchor). */
+                ip = anchor - hashed;
+                break;
+            }
+        }
+
+        ip += hashed;
+    }
+
+    return iend - anchor;
+}
+
+/*! ZSTD_ldm_reduceTable() :
+ *  reduce table indexes by `reducerValue` */
+static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size,
+                                 U32 const reducerValue)
+{
+    U32 u;
+    for (u = 0; u < size; u++) {
+        if (table[u].offset < reducerValue) table[u].offset = 0;
+        else table[u].offset -= reducerValue;
+    }
+}
+
+size_t ZSTD_ldm_generateSequences(
+        ldmState_t* ldmState, rawSeqStore_t* sequences,
+        ldmParams_t const* params, void const* src, size_t srcSize)
+{
+    U32 const maxDist = 1U << params->windowLog;
+    BYTE const* const istart = (BYTE const*)src;
+    BYTE const* const iend = istart + srcSize;
+    size_t const kMaxChunkSize = 1 << 20;
+    size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0);
+    size_t chunk;
+    size_t leftoverSize = 0;
+
+    assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize);
+    /* Check that ZSTD_window_update() has been called for this chunk prior
+     * to passing it to this function.
+     */
+    assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize);
+    /* The input could be very large (in zstdmt), so it must be broken up into
+     * chunks to enforce the maximum distance and handle overflow correction.
+     */
+    assert(sequences->pos <= sequences->size);
+    assert(sequences->size <= sequences->capacity);
+    for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) {
+        BYTE const* const chunkStart = istart + chunk * kMaxChunkSize;
+        size_t const remaining = (size_t)(iend - chunkStart);
+        BYTE const *const chunkEnd =
+            (remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize;
+        size_t const chunkSize = chunkEnd - chunkStart;
+        size_t newLeftoverSize;
+        size_t const prevSize = sequences->size;
+
+        assert(chunkStart < iend);
+        /* 1. Perform overflow correction if necessary. */
+        if (ZSTD_window_needOverflowCorrection(ldmState->window, 0, maxDist, ldmState->loadedDictEnd, chunkStart, chunkEnd)) {
+            U32 const ldmHSize = 1U << params->hashLog;
+            U32 const correction = ZSTD_window_correctOverflow(
+                &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
+            ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
+            /* invalidate dictionaries on overflow correction */
+            ldmState->loadedDictEnd = 0;
+        }
+        /* 2. We enforce the maximum offset allowed.
+         *
+         * kMaxChunkSize should be small enough that we don't lose too much of
+         * the window through early invalidation.
+         * TODO: * Test the chunk size.
+         *       * Try invalidation after the sequence generation and test the
+         *         the offset against maxDist directly.
+         *
+         * NOTE: Because of dictionaries + sequence splitting we MUST make sure
+         * that any offset used is valid at the END of the sequence, since it may
+         * be split into two sequences. This condition holds when using
+         * ZSTD_window_enforceMaxDist(), but if we move to checking offsets
+         * against maxDist directly, we'll have to carefully handle that case.
+         */
+        ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL);
+        /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
+        newLeftoverSize = ZSTD_ldm_generateSequences_internal(
+            ldmState, sequences, params, chunkStart, chunkSize);
+        if (ZSTD_isError(newLeftoverSize))
+            return newLeftoverSize;
+        /* 4. We add the leftover literals from previous iterations to the first
+         *    newly generated sequence, or add the `newLeftoverSize` if none are
+         *    generated.
+         */
+        /* Prepend the leftover literals from the last call */
+        if (prevSize < sequences->size) {
+            sequences->seq[prevSize].litLength += (U32)leftoverSize;
+            leftoverSize = newLeftoverSize;
+        } else {
+            assert(newLeftoverSize == chunkSize);
+            leftoverSize += chunkSize;
+        }
+    }
+    return 0;
+}
+
+void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) {
+    while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) {
+        rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos;
+        if (srcSize <= seq->litLength) {
+            /* Skip past srcSize literals */
+            seq->litLength -= (U32)srcSize;
+            return;
+        }
+        srcSize -= seq->litLength;
+        seq->litLength = 0;
+        if (srcSize < seq->matchLength) {
+            /* Skip past the first srcSize of the match */
+            seq->matchLength -= (U32)srcSize;
+            if (seq->matchLength < minMatch) {
+                /* The match is too short, omit it */
+                if (rawSeqStore->pos + 1 < rawSeqStore->size) {
+                    seq[1].litLength += seq[0].matchLength;
+                }
+                rawSeqStore->pos++;
+            }
+            return;
+        }
+        srcSize -= seq->matchLength;
+        seq->matchLength = 0;
+        rawSeqStore->pos++;
+    }
+}
+
+/**
+ * If the sequence length is longer than remaining then the sequence is split
+ * between this block and the next.
+ *
+ * Returns the current sequence to handle, or if the rest of the block should
+ * be literals, it returns a sequence with offset == 0.
+ */
+static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
+                                 U32 const remaining, U32 const minMatch)
+{
+    rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos];
+    assert(sequence.offset > 0);
+    /* Likely: No partial sequence */
+    if (remaining >= sequence.litLength + sequence.matchLength) {
+        rawSeqStore->pos++;
+        return sequence;
+    }
+    /* Cut the sequence short (offset == 0 ==> rest is literals). */
+    if (remaining <= sequence.litLength) {
+        sequence.offset = 0;
+    } else if (remaining < sequence.litLength + sequence.matchLength) {
+        sequence.matchLength = remaining - sequence.litLength;
+        if (sequence.matchLength < minMatch) {
+            sequence.offset = 0;
+        }
+    }
+    /* Skip past `remaining` bytes for the future sequences. */
+    ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch);
+    return sequence;
+}
+
+void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
+    U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
+    while (currPos && rawSeqStore->pos < rawSeqStore->size) {
+        rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
+        if (currPos >= currSeq.litLength + currSeq.matchLength) {
+            currPos -= currSeq.litLength + currSeq.matchLength;
+            rawSeqStore->pos++;
+        } else {
+            rawSeqStore->posInSequence = currPos;
+            break;
+        }
+    }
+    if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
+        rawSeqStore->posInSequence = 0;
+    }
+}
+
+size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
+    ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+    ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
+    void const* src, size_t srcSize)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    unsigned const minMatch = cParams->minMatch;
+    ZSTD_blockCompressor const blockCompressor =
+        ZSTD_selectBlockCompressor(cParams->strategy, useRowMatchFinder, ZSTD_matchState_dictMode(ms));
+    /* Input bounds */
+    BYTE const* const istart = (BYTE const*)src;
+    BYTE const* const iend = istart + srcSize;
+    /* Input positions */
+    BYTE const* ip = istart;
+
+    DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize);
+    /* If using opt parser, use LDMs only as candidates rather than always accepting them */
+    if (cParams->strategy >= ZSTD_btopt) {
+        size_t lastLLSize;
+        ms->ldmSeqStore = rawSeqStore;
+        lastLLSize = blockCompressor(ms, seqStore, rep, src, srcSize);
+        ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore, srcSize);
+        return lastLLSize;
+    }
+
+    assert(rawSeqStore->pos <= rawSeqStore->size);
+    assert(rawSeqStore->size <= rawSeqStore->capacity);
+    /* Loop through each sequence and apply the block compressor to the literals */
+    while (rawSeqStore->pos < rawSeqStore->size && ip < iend) {
+        /* maybeSplitSequence updates rawSeqStore->pos */
+        rawSeq const sequence = maybeSplitSequence(rawSeqStore,
+                                                   (U32)(iend - ip), minMatch);
+        int i;
+        /* End signal */
+        if (sequence.offset == 0)
+            break;
+
+        assert(ip + sequence.litLength + sequence.matchLength <= iend);
+
+        /* Fill tables for block compressor */
+        ZSTD_ldm_limitTableUpdate(ms, ip);
+        ZSTD_ldm_fillFastTables(ms, ip);
+        /* Run the block compressor */
+        DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength);
+        {
+            size_t const newLitLength =
+                blockCompressor(ms, seqStore, rep, ip, sequence.litLength);
+            ip += sequence.litLength;
+            /* Update the repcodes */
+            for (i = ZSTD_REP_NUM - 1; i > 0; i--)
+                rep[i] = rep[i-1];
+            rep[0] = sequence.offset;
+            /* Store the sequence */
+            ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
+                          sequence.offset + ZSTD_REP_MOVE,
+                          sequence.matchLength - MINMATCH);
+            ip += sequence.matchLength;
+        }
+    }
+    /* Fill the tables for the block compressor */
+    ZSTD_ldm_limitTableUpdate(ms, ip);
+    ZSTD_ldm_fillFastTables(ms, ip);
+    /* Compress the last literals */
+    return blockCompressor(ms, seqStore, rep, ip, iend - ip);
+}
+/**** ended inlining compress/zstd_ldm.c ****/
+/**** start inlining compress/zstd_opt.c ****/
+/*
+ * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/**** skipping file: zstd_compress_internal.h ****/
+/**** skipping file: hist.h ****/
+/**** skipping file: zstd_opt.h ****/
+
+
+#define ZSTD_LITFREQ_ADD    2   /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
+#define ZSTD_FREQ_DIV       4   /* log factor when using previous stats to init next stats */
+#define ZSTD_MAX_PRICE     (1<<30)
+
+#define ZSTD_PREDEF_THRESHOLD 1024   /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
+
+
+/*-*************************************
+*  Price functions for optimal parser
+***************************************/
+
+#if 0    /* approximation at bit level */
+#  define BITCOST_ACCURACY 0
+#  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
+#  define WEIGHT(stat)  ((void)opt, ZSTD_bitWeight(stat))
+#elif 0  /* fractional bit accuracy */
+#  define BITCOST_ACCURACY 8
+#  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
+#  define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
+#else    /* opt==approx, ultra==accurate */
+#  define BITCOST_ACCURACY 8
+#  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
+#  define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
+#endif
+
+MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
+{
+    return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
+}
+
+MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
+{
+    U32 const stat = rawStat + 1;
+    U32 const hb = ZSTD_highbit32(stat);
+    U32 const BWeight = hb * BITCOST_MULTIPLIER;
+    U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
+    U32 const weight = BWeight + FWeight;
+    assert(hb + BITCOST_ACCURACY < 31);
+    return weight;
+}
+
+#if (DEBUGLEVEL>=2)
+/* debugging function,
+ * @return price in bytes as fractional value
+ * for debug messages only */
+MEM_STATIC double ZSTD_fCost(U32 price)
+{
+    return (double)price / (BITCOST_MULTIPLIER*8);
+}
+#endif
+
+static int ZSTD_compressedLiterals(optState_t const* const optPtr)
+{
+    return optPtr->literalCompressionMode != ZSTD_lcm_uncompressed;
+}
+
+static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
+{
+    if (ZSTD_compressedLiterals(optPtr))
+        optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
+    optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
+    optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
+    optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
+}
+
+
+/* ZSTD_downscaleStat() :
+ * reduce all elements in table by a factor 2^(ZSTD_FREQ_DIV+malus)
+ * return the resulting sum of elements */
+static U32 ZSTD_downscaleStat(unsigned* table, U32 lastEltIndex, int malus)
+{
+    U32 s, sum=0;
+    DEBUGLOG(5, "ZSTD_downscaleStat (nbElts=%u)", (unsigned)lastEltIndex+1);
+    assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31);
+    for (s=0; s<lastEltIndex+1; s++) {
+        table[s] = 1 + (table[s] >> (ZSTD_FREQ_DIV+malus));
+        sum += table[s];
+    }
+    return sum;
+}
+
+/* ZSTD_rescaleFreqs() :
+ * if first block (detected by optPtr->litLengthSum == 0) : init statistics
+ *    take hints from dictionary if there is one
+ *    or init from zero, using src for literals stats, or flat 1 for match symbols
+ * otherwise downscale existing stats, to be used as seed for next block.
+ */
+static void
+ZSTD_rescaleFreqs(optState_t* const optPtr,
+            const BYTE* const src, size_t const srcSize,
+                  int const optLevel)
+{
+    int const compressedLiterals = ZSTD_compressedLiterals(optPtr);
+    DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
+    optPtr->priceType = zop_dynamic;
+
+    if (optPtr->litLengthSum == 0) {  /* first block : init */
+        if (srcSize <= ZSTD_PREDEF_THRESHOLD) {  /* heuristic */
+            DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef");
+            optPtr->priceType = zop_predef;
+        }
+
+        assert(optPtr->symbolCosts != NULL);
+        if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
+            /* huffman table presumed generated by dictionary */
+            optPtr->priceType = zop_dynamic;
+
+            if (compressedLiterals) {
+                unsigned lit;
+                assert(optPtr->litFreq != NULL);
+                optPtr->litSum = 0;
+                for (lit=0; lit<=MaxLit; lit++) {
+                    U32 const scaleLog = 11;   /* scale to 2K */
+                    U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit);
+                    assert(bitCost <= scaleLog);
+                    optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
+                    optPtr->litSum += optPtr->litFreq[lit];
+            }   }
+
+            {   unsigned ll;
+                FSE_CState_t llstate;
+                FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable);
+                optPtr->litLengthSum = 0;
+                for (ll=0; ll<=MaxLL; ll++) {
+                    U32 const scaleLog = 10;   /* scale to 1K */
+                    U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll);
+                    assert(bitCost < scaleLog);
+                    optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
+                    optPtr->litLengthSum += optPtr->litLengthFreq[ll];
+            }   }
+
+            {   unsigned ml;
+                FSE_CState_t mlstate;
+                FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable);
+                optPtr->matchLengthSum = 0;
+                for (ml=0; ml<=MaxML; ml++) {
+                    U32 const scaleLog = 10;
+                    U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml);
+                    assert(bitCost < scaleLog);
+                    optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
+                    optPtr->matchLengthSum += optPtr->matchLengthFreq[ml];
+            }   }
+
+            {   unsigned of;
+                FSE_CState_t ofstate;
+                FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable);
+                optPtr->offCodeSum = 0;
+                for (of=0; of<=MaxOff; of++) {
+                    U32 const scaleLog = 10;
+                    U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of);
+                    assert(bitCost < scaleLog);
+                    optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
+                    optPtr->offCodeSum += optPtr->offCodeFreq[of];
+            }   }
+
+        } else {  /* not a dictionary */
+
+            assert(optPtr->litFreq != NULL);
+            if (compressedLiterals) {
+                unsigned lit = MaxLit;
+                HIST_count_simple(optPtr->litFreq, &lit, src, srcSize);   /* use raw first block to init statistics */
+                optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
+            }
+
+            {   unsigned ll;
+                for (ll=0; ll<=MaxLL; ll++)
+                    optPtr->litLengthFreq[ll] = 1;
+            }
+            optPtr->litLengthSum = MaxLL+1;
+
+            {   unsigned ml;
+                for (ml=0; ml<=MaxML; ml++)
+                    optPtr->matchLengthFreq[ml] = 1;
+            }
+            optPtr->matchLengthSum = MaxML+1;
+
+            {   unsigned of;
+                for (of=0; of<=MaxOff; of++)
+                    optPtr->offCodeFreq[of] = 1;
+            }
+            optPtr->offCodeSum = MaxOff+1;
+
+        }
+
+    } else {   /* new block : re-use previous statistics, scaled down */
+
+        if (compressedLiterals)
+            optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
+        optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0);
+        optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0);
+        optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0);
+    }
+
+    ZSTD_setBasePrices(optPtr, optLevel);
+}
+
+/* ZSTD_rawLiteralsCost() :
+ * price of literals (only) in specified segment (which length can be 0).
+ * does not include price of literalLength symbol */
+static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
+                                const optState_t* const optPtr,
+                                int optLevel)
+{
+    if (litLength == 0) return 0;
+
+    if (!ZSTD_compressedLiterals(optPtr))
+        return (litLength << 3) * BITCOST_MULTIPLIER;  /* Uncompressed - 8 bytes per literal. */
+
+    if (optPtr->priceType == zop_predef)
+        return (litLength*6) * BITCOST_MULTIPLIER;  /* 6 bit per literal - no statistic used */
+
+    /* dynamic statistics */
+    {   U32 price = litLength * optPtr->litSumBasePrice;
+        U32 u;
+        for (u=0; u < litLength; u++) {
+            assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice);   /* literal cost should never be negative */
+            price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel);
+        }
+        return price;
+    }
+}
+
+/* ZSTD_litLengthPrice() :
+ * cost of literalLength symbol */
+static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
+{
+    if (optPtr->priceType == zop_predef) return WEIGHT(litLength, optLevel);
+
+    /* dynamic statistics */
+    {   U32 const llCode = ZSTD_LLcode(litLength);
+        return (LL_bits[llCode] * BITCOST_MULTIPLIER)
+             + optPtr->litLengthSumBasePrice
+             - WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
+    }
+}
+
+/* ZSTD_getMatchPrice() :
+ * Provides the cost of the match part (offset + matchLength) of a sequence
+ * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
+ * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */
+FORCE_INLINE_TEMPLATE U32
+ZSTD_getMatchPrice(U32 const offset,
+                   U32 const matchLength,
+             const optState_t* const optPtr,
+                   int const optLevel)
+{
+    U32 price;
+    U32 const offCode = ZSTD_highbit32(offset+1);
+    U32 const mlBase = matchLength - MINMATCH;
+    assert(matchLength >= MINMATCH);
+
+    if (optPtr->priceType == zop_predef)  /* fixed scheme, do not use statistics */
+        return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER);
+
+    /* dynamic statistics */
+    price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
+    if ((optLevel<2) /*static*/ && offCode >= 20)
+        price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */
+
+    /* match Length */
+    {   U32 const mlCode = ZSTD_MLcode(mlBase);
+        price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel));
+    }
+
+    price += BITCOST_MULTIPLIER / 5;   /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */
+
+    DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
+    return price;
+}
+
+/* ZSTD_updateStats() :
+ * assumption : literals + litLengtn <= iend */
+static void ZSTD_updateStats(optState_t* const optPtr,
+                             U32 litLength, const BYTE* literals,
+                             U32 offsetCode, U32 matchLength)
+{
+    /* literals */
+    if (ZSTD_compressedLiterals(optPtr)) {
+        U32 u;
+        for (u=0; u < litLength; u++)
+            optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
+        optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
+    }
+
+    /* literal Length */
+    {   U32 const llCode = ZSTD_LLcode(litLength);
+        optPtr->litLengthFreq[llCode]++;
+        optPtr->litLengthSum++;
+    }
+
+    /* match offset code (0-2=>repCode; 3+=>offset+2) */
+    {   U32 const offCode = ZSTD_highbit32(offsetCode+1);
+        assert(offCode <= MaxOff);
+        optPtr->offCodeFreq[offCode]++;
+        optPtr->offCodeSum++;
+    }
+
+    /* match Length */
+    {   U32 const mlBase = matchLength - MINMATCH;
+        U32 const mlCode = ZSTD_MLcode(mlBase);
+        optPtr->matchLengthFreq[mlCode]++;
+        optPtr->matchLengthSum++;
+    }
+}
+
+
+/* ZSTD_readMINMATCH() :
+ * function safe only for comparisons
+ * assumption : memPtr must be at least 4 bytes before end of buffer */
+MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
+{
+    switch (length)
+    {
+    default :
+    case 4 : return MEM_read32(memPtr);
+    case 3 : if (MEM_isLittleEndian())
+                return MEM_read32(memPtr)<<8;
+             else
+                return MEM_read32(memPtr)>>8;
+    }
+}
+
+
+/* Update hashTable3 up to ip (excluded)
+   Assumption : always within prefix (i.e. not within extDict) */
+static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
+                                              U32* nextToUpdate3,
+                                              const BYTE* const ip)
+{
+    U32* const hashTable3 = ms->hashTable3;
+    U32 const hashLog3 = ms->hashLog3;
+    const BYTE* const base = ms->window.base;
+    U32 idx = *nextToUpdate3;
+    U32 const target = (U32)(ip - base);
+    size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
+    assert(hashLog3 > 0);
+
+    while(idx < target) {
+        hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
+        idx++;
+    }
+
+    *nextToUpdate3 = target;
+    return hashTable3[hash3];
+}
+
+
+/*-*************************************
+*  Binary Tree search
+***************************************/
+/** ZSTD_insertBt1() : add one or multiple positions to tree.
+ *  ip : assumed <= iend-8 .
+ * @return : nb of positions added */
+static U32 ZSTD_insertBt1(
+                ZSTD_matchState_t* ms,
+                const BYTE* const ip, const BYTE* const iend,
+                U32 const mls, const int extDict)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32*   const hashTable = ms->hashTable;
+    U32    const hashLog = cParams->hashLog;
+    size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
+    U32*   const bt = ms->chainTable;
+    U32    const btLog  = cParams->chainLog - 1;
+    U32    const btMask = (1 << btLog) - 1;
+    U32 matchIndex = hashTable[h];
+    size_t commonLengthSmaller=0, commonLengthLarger=0;
+    const BYTE* const base = ms->window.base;
+    const BYTE* const dictBase = ms->window.dictBase;
+    const U32 dictLimit = ms->window.dictLimit;
+    const BYTE* const dictEnd = dictBase + dictLimit;
+    const BYTE* const prefixStart = base + dictLimit;
+    const BYTE* match;
+    const U32 curr = (U32)(ip-base);
+    const U32 btLow = btMask >= curr ? 0 : curr - btMask;
+    U32* smallerPtr = bt + 2*(curr&btMask);
+    U32* largerPtr  = smallerPtr + 1;
+    U32 dummy32;   /* to be nullified at the end */
+    U32 const windowLow = ms->window.lowLimit;
+    U32 matchEndIdx = curr+8+1;
+    size_t bestLength = 8;
+    U32 nbCompares = 1U << cParams->searchLog;
+#ifdef ZSTD_C_PREDICT
+    U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0);
+    U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1);
+    predictedSmall += (predictedSmall>0);
+    predictedLarge += (predictedLarge>0);
+#endif /* ZSTD_C_PREDICT */
+
+    DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
+
+    assert(ip <= iend-8);   /* required for h calculation */
+    hashTable[h] = curr;   /* Update Hash Table */
+
+    assert(windowLow > 0);
+    while (nbCompares-- && (matchIndex >= windowLow)) {
+        U32* const nextPtr = bt + 2*(matchIndex & btMask);
+        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
+        assert(matchIndex < curr);
+
+#ifdef ZSTD_C_PREDICT   /* note : can create issues when hlog small <= 11 */
+        const U32* predictPtr = bt + 2*((matchIndex-1) & btMask);   /* written this way, as bt is a roll buffer */
+        if (matchIndex == predictedSmall) {
+            /* no need to check length, result known */
+            *smallerPtr = matchIndex;
+            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+            smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
+            matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
+            predictedSmall = predictPtr[1] + (predictPtr[1]>0);
+            continue;
+        }
+        if (matchIndex == predictedLarge) {
+            *largerPtr = matchIndex;
+            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+            largerPtr = nextPtr;
+            matchIndex = nextPtr[0];
+            predictedLarge = predictPtr[0] + (predictPtr[0]>0);
+            continue;
+        }
+#endif
+
+        if (!extDict || (matchIndex+matchLength >= dictLimit)) {
+            assert(matchIndex+matchLength >= dictLimit);   /* might be wrong if actually extDict */
+            match = base + matchIndex;
+            matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
+        } else {
+            match = dictBase + matchIndex;
+            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
+            if (matchIndex+matchLength >= dictLimit)
+                match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
+        }
+
+        if (matchLength > bestLength) {
+            bestLength = matchLength;
+            if (matchLength > matchEndIdx - matchIndex)
+                matchEndIdx = matchIndex + (U32)matchLength;
+        }
+
+        if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
+            break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
+        }
+
+        if (match[matchLength] < ip[matchLength]) {  /* necessarily within buffer */
+            /* match is smaller than current */
+            *smallerPtr = matchIndex;             /* update smaller idx */
+            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
+            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
+            smallerPtr = nextPtr+1;               /* new "candidate" => larger than match, which was smaller than target */
+            matchIndex = nextPtr[1];              /* new matchIndex, larger than previous and closer to current */
+        } else {
+            /* match is larger than current */
+            *largerPtr = matchIndex;
+            commonLengthLarger = matchLength;
+            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
+            largerPtr = nextPtr;
+            matchIndex = nextPtr[0];
+    }   }
+
+    *smallerPtr = *largerPtr = 0;
+    {   U32 positions = 0;
+        if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384));   /* speed optimization */
+        assert(matchEndIdx > curr + 8);
+        return MAX(positions, matchEndIdx - (curr + 8));
+    }
+}
+
+FORCE_INLINE_TEMPLATE
+void ZSTD_updateTree_internal(
+                ZSTD_matchState_t* ms,
+                const BYTE* const ip, const BYTE* const iend,
+                const U32 mls, const ZSTD_dictMode_e dictMode)
+{
+    const BYTE* const base = ms->window.base;
+    U32 const target = (U32)(ip - base);
+    U32 idx = ms->nextToUpdate;
+    DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u  (dictMode:%u)",
+                idx, target, dictMode);
+
+    while(idx < target) {
+        U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
+        assert(idx < (U32)(idx + forward));
+        idx += forward;
+    }
+    assert((size_t)(ip - base) <= (size_t)(U32)(-1));
+    assert((size_t)(iend - base) <= (size_t)(U32)(-1));
+    ms->nextToUpdate = target;
+}
+
+void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
+    ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
+}
+
+FORCE_INLINE_TEMPLATE
+U32 ZSTD_insertBtAndGetAllMatches (
+                    ZSTD_match_t* matches,   /* store result (found matches) in this table (presumed large enough) */
+                    ZSTD_matchState_t* ms,
+                    U32* nextToUpdate3,
+                    const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
+                    const U32 rep[ZSTD_REP_NUM],
+                    U32 const ll0,   /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
+                    const U32 lengthToBeat,
+                    U32 const mls /* template */)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
+    const BYTE* const base = ms->window.base;
+    U32 const curr = (U32)(ip-base);
+    U32 const hashLog = cParams->hashLog;
+    U32 const minMatch = (mls==3) ? 3 : 4;
+    U32* const hashTable = ms->hashTable;
+    size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
+    U32 matchIndex  = hashTable[h];
+    U32* const bt   = ms->chainTable;
+    U32 const btLog = cParams->chainLog - 1;
+    U32 const btMask= (1U << btLog) - 1;
+    size_t commonLengthSmaller=0, commonLengthLarger=0;
+    const BYTE* const dictBase = ms->window.dictBase;
+    U32 const dictLimit = ms->window.dictLimit;
+    const BYTE* const dictEnd = dictBase + dictLimit;
+    const BYTE* const prefixStart = base + dictLimit;
+    U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
+    U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
+    U32 const matchLow = windowLow ? windowLow : 1;
+    U32* smallerPtr = bt + 2*(curr&btMask);
+    U32* largerPtr  = bt + 2*(curr&btMask) + 1;
+    U32 matchEndIdx = curr+8+1;   /* farthest referenced position of any match => detects repetitive patterns */
+    U32 dummy32;   /* to be nullified at the end */
+    U32 mnum = 0;
+    U32 nbCompares = 1U << cParams->searchLog;
+
+    const ZSTD_matchState_t* dms    = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL;
+    const ZSTD_compressionParameters* const dmsCParams =
+                                      dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL;
+    const BYTE* const dmsBase       = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL;
+    const BYTE* const dmsEnd        = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL;
+    U32         const dmsHighLimit  = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0;
+    U32         const dmsLowLimit   = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0;
+    U32         const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0;
+    U32         const dmsHashLog    = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog;
+    U32         const dmsBtLog      = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog;
+    U32         const dmsBtMask     = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0;
+    U32         const dmsBtLow      = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
+
+    size_t bestLength = lengthToBeat-1;
+    DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr);
+
+    /* check repCode */
+    assert(ll0 <= 1);   /* necessarily 1 or 0 */
+    {   U32 const lastR = ZSTD_REP_NUM + ll0;
+        U32 repCode;
+        for (repCode = ll0; repCode < lastR; repCode++) {
+            U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
+            U32 const repIndex = curr - repOffset;
+            U32 repLen = 0;
+            assert(curr >= dictLimit);
+            if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) {  /* equivalent to `curr > repIndex >= dictLimit` */
+                /* We must validate the repcode offset because when we're using a dictionary the
+                 * valid offset range shrinks when the dictionary goes out of bounds.
+                 */
+                if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
+                    repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
+                }
+            } else {  /* repIndex < dictLimit || repIndex >= curr */
+                const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
+                                             dmsBase + repIndex - dmsIndexDelta :
+                                             dictBase + repIndex;
+                assert(curr >= windowLow);
+                if ( dictMode == ZSTD_extDict
+                  && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow)  /* equivalent to `curr > repIndex >= windowLow` */
+                     & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
+                  && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
+                    repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
+                }
+                if (dictMode == ZSTD_dictMatchState
+                  && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta))  /* equivalent to `curr > repIndex >= dmsLowLimit` */
+                     & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
+                  && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
+                    repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
+            }   }
+            /* save longer solution */
+            if (repLen > bestLength) {
+                DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
+                            repCode, ll0, repOffset, repLen);
+                bestLength = repLen;
+                matches[mnum].off = repCode - ll0;
+                matches[mnum].len = (U32)repLen;
+                mnum++;
+                if ( (repLen > sufficient_len)
+                   | (ip+repLen == iLimit) ) {  /* best possible */
+                    return mnum;
+    }   }   }   }
+
+    /* HC3 match finder */
+    if ((mls == 3) /*static*/ && (bestLength < mls)) {
+        U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
+        if ((matchIndex3 >= matchLow)
+          & (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
+            size_t mlen;
+            if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
+                const BYTE* const match = base + matchIndex3;
+                mlen = ZSTD_count(ip, match, iLimit);
+            } else {
+                const BYTE* const match = dictBase + matchIndex3;
+                mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart);
+            }
+
+            /* save best solution */
+            if (mlen >= mls /* == 3 > bestLength */) {
+                DEBUGLOG(8, "found small match with hlog3, of length %u",
+                            (U32)mlen);
+                bestLength = mlen;
+                assert(curr > matchIndex3);
+                assert(mnum==0);  /* no prior solution */
+                matches[0].off = (curr - matchIndex3) + ZSTD_REP_MOVE;
+                matches[0].len = (U32)mlen;
+                mnum = 1;
+                if ( (mlen > sufficient_len) |
+                     (ip+mlen == iLimit) ) {  /* best possible length */
+                    ms->nextToUpdate = curr+1;  /* skip insertion */
+                    return 1;
+        }   }   }
+        /* no dictMatchState lookup: dicts don't have a populated HC3 table */
+    }
+
+    hashTable[h] = curr;   /* Update Hash Table */
+
+    while (nbCompares-- && (matchIndex >= matchLow)) {
+        U32* const nextPtr = bt + 2*(matchIndex & btMask);
+        const BYTE* match;
+        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
+        assert(curr > matchIndex);
+
+        if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
+            assert(matchIndex+matchLength >= dictLimit);  /* ensure the condition is correct when !extDict */
+            match = base + matchIndex;
+            if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0);  /* ensure early section of match is equal as expected */
+            matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
+        } else {
+            match = dictBase + matchIndex;
+            assert(memcmp(match, ip, matchLength) == 0);  /* ensure early section of match is equal as expected */
+            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
+            if (matchIndex+matchLength >= dictLimit)
+                match = base + matchIndex;   /* prepare for match[matchLength] read */
+        }
+
+        if (matchLength > bestLength) {
+            DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
+                    (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE);
+            assert(matchEndIdx > matchIndex);
+            if (matchLength > matchEndIdx - matchIndex)
+                matchEndIdx = matchIndex + (U32)matchLength;
+            bestLength = matchLength;
+            matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE;
+            matches[mnum].len = (U32)matchLength;
+            mnum++;
+            if ( (matchLength > ZSTD_OPT_NUM)
+               | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
+                if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
+                break; /* drop, to preserve bt consistency (miss a little bit of compression) */
+            }
+        }
+
+        if (match[matchLength] < ip[matchLength]) {
+            /* match smaller than current */
+            *smallerPtr = matchIndex;             /* update smaller idx */
+            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
+            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+            smallerPtr = nextPtr+1;               /* new candidate => larger than match, which was smaller than current */
+            matchIndex = nextPtr[1];              /* new matchIndex, larger than previous, closer to current */
+        } else {
+            *largerPtr = matchIndex;
+            commonLengthLarger = matchLength;
+            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+            largerPtr = nextPtr;
+            matchIndex = nextPtr[0];
+    }   }
+
+    *smallerPtr = *largerPtr = 0;
+
+    if (dictMode == ZSTD_dictMatchState && nbCompares) {
+        size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
+        U32 dictMatchIndex = dms->hashTable[dmsH];
+        const U32* const dmsBt = dms->chainTable;
+        commonLengthSmaller = commonLengthLarger = 0;
+        while (nbCompares-- && (dictMatchIndex > dmsLowLimit)) {
+            const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
+            size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
+            const BYTE* match = dmsBase + dictMatchIndex;
+            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart);
+            if (dictMatchIndex+matchLength >= dmsHighLimit)
+                match = base + dictMatchIndex + dmsIndexDelta;   /* to prepare for next usage of match[matchLength] */
+
+            if (matchLength > bestLength) {
+                matchIndex = dictMatchIndex + dmsIndexDelta;
+                DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
+                        (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE);
+                if (matchLength > matchEndIdx - matchIndex)
+                    matchEndIdx = matchIndex + (U32)matchLength;
+                bestLength = matchLength;
+                matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE;
+                matches[mnum].len = (U32)matchLength;
+                mnum++;
+                if ( (matchLength > ZSTD_OPT_NUM)
+                   | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
+                    break;   /* drop, to guarantee consistency (miss a little bit of compression) */
+                }
+            }
+
+            if (dictMatchIndex <= dmsBtLow) { break; }   /* beyond tree size, stop the search */
+            if (match[matchLength] < ip[matchLength]) {
+                commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
+                dictMatchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
+            } else {
+                /* match is larger than current */
+                commonLengthLarger = matchLength;
+                dictMatchIndex = nextPtr[0];
+            }
+        }
+    }
+
+    assert(matchEndIdx > curr+8);
+    ms->nextToUpdate = matchEndIdx - 8;  /* skip repetitive patterns */
+    return mnum;
+}
+
+
+FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
+                        ZSTD_match_t* matches,   /* store result (match found, increasing size) in this table */
+                        ZSTD_matchState_t* ms,
+                        U32* nextToUpdate3,
+                        const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode,
+                        const U32 rep[ZSTD_REP_NUM],
+                        U32 const ll0,
+                        U32 const lengthToBeat)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32 const matchLengthSearch = cParams->minMatch;
+    DEBUGLOG(8, "ZSTD_BtGetAllMatches");
+    if (ip < ms->window.base + ms->nextToUpdate) return 0;   /* skipped area */
+    ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode);
+    switch(matchLengthSearch)
+    {
+    case 3 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 3);
+    default :
+    case 4 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 4);
+    case 5 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 5);
+    case 7 :
+    case 6 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 6);
+    }
+}
+
+/*************************
+*  LDM helper functions  *
+*************************/
+
+/* Struct containing info needed to make decision about ldm inclusion */
+typedef struct {
+    rawSeqStore_t seqStore;         /* External match candidates store for this block */
+    U32 startPosInBlock;            /* Start position of the current match candidate */
+    U32 endPosInBlock;              /* End position of the current match candidate */
+    U32 offset;                     /* Offset of the match candidate */
+} ZSTD_optLdm_t;
+
+/* ZSTD_optLdm_skipRawSeqStoreBytes():
+ * Moves forward in rawSeqStore by nbBytes, which will update the fields 'pos' and 'posInSequence'.
+ */
+static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
+    U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
+    while (currPos && rawSeqStore->pos < rawSeqStore->size) {
+        rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
+        if (currPos >= currSeq.litLength + currSeq.matchLength) {
+            currPos -= currSeq.litLength + currSeq.matchLength;
+            rawSeqStore->pos++;
+        } else {
+            rawSeqStore->posInSequence = currPos;
+            break;
+        }
+    }
+    if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
+        rawSeqStore->posInSequence = 0;
+    }
+}
+
+/* ZSTD_opt_getNextMatchAndUpdateSeqStore():
+ * Calculates the beginning and end of the next match in the current block.
+ * Updates 'pos' and 'posInSequence' of the ldmSeqStore.
+ */
+static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
+                                                   U32 blockBytesRemaining) {
+    rawSeq currSeq;
+    U32 currBlockEndPos;
+    U32 literalsBytesRemaining;
+    U32 matchBytesRemaining;
+
+    /* Setting match end position to MAX to ensure we never use an LDM during this block */
+    if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
+        optLdm->startPosInBlock = UINT_MAX;
+        optLdm->endPosInBlock = UINT_MAX;
+        return;
+    }
+    /* Calculate appropriate bytes left in matchLength and litLength after adjusting
+       based on ldmSeqStore->posInSequence */
+    currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
+    assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
+    currBlockEndPos = currPosInBlock + blockBytesRemaining;
+    literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ?
+            currSeq.litLength - (U32)optLdm->seqStore.posInSequence :
+            0;
+    matchBytesRemaining = (literalsBytesRemaining == 0) ?
+            currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) :
+            currSeq.matchLength;
+
+    /* If there are more literal bytes than bytes remaining in block, no ldm is possible */
+    if (literalsBytesRemaining >= blockBytesRemaining) {
+        optLdm->startPosInBlock = UINT_MAX;
+        optLdm->endPosInBlock = UINT_MAX;
+        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining);
+        return;
+    }
+
+    /* Matches may be < MINMATCH by this process. In that case, we will reject them
+       when we are deciding whether or not to add the ldm */
+    optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
+    optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
+    optLdm->offset = currSeq.offset;
+
+    if (optLdm->endPosInBlock > currBlockEndPos) {
+        /* Match ends after the block ends, we can't use the whole match */
+        optLdm->endPosInBlock = currBlockEndPos;
+        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock);
+    } else {
+        /* Consume nb of bytes equal to size of sequence left */
+        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining);
+    }
+}
+
+/* ZSTD_optLdm_maybeAddMatch():
+ * Adds a match if it's long enough, based on it's 'matchStartPosInBlock'
+ * and 'matchEndPosInBlock', into 'matches'. Maintains the correct ordering of 'matches'
+ */
+static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
+                                      ZSTD_optLdm_t* optLdm, U32 currPosInBlock) {
+    U32 posDiff = currPosInBlock - optLdm->startPosInBlock;
+    /* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */
+    U32 candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
+    U32 candidateOffCode = optLdm->offset + ZSTD_REP_MOVE;
+
+    /* Ensure that current block position is not outside of the match */
+    if (currPosInBlock < optLdm->startPosInBlock
+      || currPosInBlock >= optLdm->endPosInBlock
+      || candidateMatchLength < MINMATCH) {
+        return;
+    }
+
+    if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
+        DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u",
+                 candidateOffCode, candidateMatchLength, currPosInBlock);
+        matches[*nbMatches].len = candidateMatchLength;
+        matches[*nbMatches].off = candidateOffCode;
+        (*nbMatches)++;
+    }
+}
+
+/* ZSTD_optLdm_processMatchCandidate():
+ * Wrapper function to update ldm seq store and call ldm functions as necessary.
+ */
+static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_t* matches, U32* nbMatches,
+                                              U32 currPosInBlock, U32 remainingBytes) {
+    if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
+        return;
+    }
+
+    if (currPosInBlock >= optLdm->endPosInBlock) {
+        if (currPosInBlock > optLdm->endPosInBlock) {
+            /* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily
+             * at the end of a match from the ldm seq store, and will often be some bytes
+             * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
+             */
+            U32 posOvershoot = currPosInBlock - optLdm->endPosInBlock;
+            ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
+        } 
+        ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
+    }
+    ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
+}
+
+/*-*******************************
+*  Optimal parser
+*********************************/
+
+
+static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
+{
+    return sol.litlen + sol.mlen;
+}
+
+#if 0 /* debug */
+
+static void
+listStats(const U32* table, int lastEltID)
+{
+    int const nbElts = lastEltID + 1;
+    int enb;
+    for (enb=0; enb < nbElts; enb++) {
+        (void)table;
+        /* RAWLOG(2, "%3i:%3i,  ", enb, table[enb]); */
+        RAWLOG(2, "%4i,", table[enb]);
+    }
+    RAWLOG(2, " \n");
+}
+
+#endif
+
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
+                               seqStore_t* seqStore,
+                               U32 rep[ZSTD_REP_NUM],
+                         const void* src, size_t srcSize,
+                         const int optLevel,
+                         const ZSTD_dictMode_e dictMode)
+{
+    optState_t* const optStatePtr = &ms->opt;
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - 8;
+    const BYTE* const base = ms->window.base;
+    const BYTE* const prefixStart = base + ms->window.dictLimit;
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+
+    U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
+    U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
+    U32 nextToUpdate3 = ms->nextToUpdate;
+
+    ZSTD_optimal_t* const opt = optStatePtr->priceTable;
+    ZSTD_match_t* const matches = optStatePtr->matchTable;
+    ZSTD_optimal_t lastSequence;
+    ZSTD_optLdm_t optLdm;
+
+    optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
+    optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
+    ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
+
+    /* init */
+    DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
+                (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
+    assert(optLevel <= 2);
+    ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
+    ip += (ip==prefixStart);
+
+    /* Match Loop */
+    while (ip < ilimit) {
+        U32 cur, last_pos = 0;
+
+        /* find first match */
+        {   U32 const litlen = (U32)(ip - anchor);
+            U32 const ll0 = !litlen;
+            U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch);
+            ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
+                                              (U32)(ip-istart), (U32)(iend - ip));
+            if (!nbMatches) { ip++; continue; }
+
+            /* initialize opt[0] */
+            { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
+            opt[0].mlen = 0;  /* means is_a_literal */
+            opt[0].litlen = litlen;
+            /* We don't need to include the actual price of the literals because
+             * it is static for the duration of the forward pass, and is included
+             * in every price. We include the literal length to avoid negative
+             * prices when we subtract the previous literal length.
+             */
+            opt[0].price = ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
+
+            /* large match -> immediate encoding */
+            {   U32 const maxML = matches[nbMatches-1].len;
+                U32 const maxOffset = matches[nbMatches-1].off;
+                DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series",
+                            nbMatches, maxML, maxOffset, (U32)(ip-prefixStart));
+
+                if (maxML > sufficient_len) {
+                    lastSequence.litlen = litlen;
+                    lastSequence.mlen = maxML;
+                    lastSequence.off = maxOffset;
+                    DEBUGLOG(6, "large match (%u>%u), immediate encoding",
+                                maxML, sufficient_len);
+                    cur = 0;
+                    last_pos = ZSTD_totalLen(lastSequence);
+                    goto _shortestPath;
+            }   }
+
+            /* set prices for first matches starting position == 0 */
+            {   U32 const literalsPrice = opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
+                U32 pos;
+                U32 matchNb;
+                for (pos = 1; pos < minMatch; pos++) {
+                    opt[pos].price = ZSTD_MAX_PRICE;   /* mlen, litlen and price will be fixed during forward scanning */
+                }
+                for (matchNb = 0; matchNb < nbMatches; matchNb++) {
+                    U32 const offset = matches[matchNb].off;
+                    U32 const end = matches[matchNb].len;
+                    for ( ; pos <= end ; pos++ ) {
+                        U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel);
+                        U32 const sequencePrice = literalsPrice + matchPrice;
+                        DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
+                                    pos, ZSTD_fCost(sequencePrice));
+                        opt[pos].mlen = pos;
+                        opt[pos].off = offset;
+                        opt[pos].litlen = litlen;
+                        opt[pos].price = sequencePrice;
+                }   }
+                last_pos = pos-1;
+            }
+        }
+
+        /* check further positions */
+        for (cur = 1; cur <= last_pos; cur++) {
+            const BYTE* const inr = ip + cur;
+            assert(cur < ZSTD_OPT_NUM);
+            DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur)
+
+            /* Fix current position with one literal if cheaper */
+            {   U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
+                int const price = opt[cur-1].price
+                                + ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
+                                + ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
+                                - ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
+                assert(price < 1000000000); /* overflow check */
+                if (price <= opt[cur].price) {
+                    DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
+                                inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
+                                opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
+                    opt[cur].mlen = 0;
+                    opt[cur].off = 0;
+                    opt[cur].litlen = litlen;
+                    opt[cur].price = price;
+                } else {
+                    DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
+                                inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
+                                opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]);
+                }
+            }
+
+            /* Set the repcodes of the current position. We must do it here
+             * because we rely on the repcodes of the 2nd to last sequence being
+             * correct to set the next chunks repcodes during the backward
+             * traversal.
+             */
+            ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
+            assert(cur >= opt[cur].mlen);
+            if (opt[cur].mlen != 0) {
+                U32 const prev = cur - opt[cur].mlen;
+                repcodes_t newReps = ZSTD_updateRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
+                ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
+            } else {
+                ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
+            }
+
+            /* last match must start at a minimum distance of 8 from oend */
+            if (inr > ilimit) continue;
+
+            if (cur == last_pos) break;
+
+            if ( (optLevel==0) /*static_test*/
+              && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
+                DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1);
+                continue;  /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
+            }
+
+            {   U32 const ll0 = (opt[cur].mlen != 0);
+                U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
+                U32 const previousPrice = opt[cur].price;
+                U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
+                U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch);
+                U32 matchNb;
+
+                ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
+                                                  (U32)(inr-istart), (U32)(iend-inr));
+
+                if (!nbMatches) {
+                    DEBUGLOG(7, "rPos:%u : no match found", cur);
+                    continue;
+                }
+
+                {   U32 const maxML = matches[nbMatches-1].len;
+                    DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u",
+                                inr-istart, cur, nbMatches, maxML);
+
+                    if ( (maxML > sufficient_len)
+                      || (cur + maxML >= ZSTD_OPT_NUM) ) {
+                        lastSequence.mlen = maxML;
+                        lastSequence.off = matches[nbMatches-1].off;
+                        lastSequence.litlen = litlen;
+                        cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0;  /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */
+                        last_pos = cur + ZSTD_totalLen(lastSequence);
+                        if (cur > ZSTD_OPT_NUM) cur = 0;   /* underflow => first match */
+                        goto _shortestPath;
+                }   }
+
+                /* set prices using matches found at position == cur */
+                for (matchNb = 0; matchNb < nbMatches; matchNb++) {
+                    U32 const offset = matches[matchNb].off;
+                    U32 const lastML = matches[matchNb].len;
+                    U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
+                    U32 mlen;
+
+                    DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u",
+                                matchNb, matches[matchNb].off, lastML, litlen);
+
+                    for (mlen = lastML; mlen >= startML; mlen--) {  /* scan downward */
+                        U32 const pos = cur + mlen;
+                        int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
+
+                        if ((pos > last_pos) || (price < opt[pos].price)) {
+                            DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
+                                        pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
+                            while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; }   /* fill empty positions */
+                            opt[pos].mlen = mlen;
+                            opt[pos].off = offset;
+                            opt[pos].litlen = litlen;
+                            opt[pos].price = price;
+                        } else {
+                            DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
+                                        pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
+                            if (optLevel==0) break;  /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
+                        }
+            }   }   }
+        }  /* for (cur = 1; cur <= last_pos; cur++) */
+
+        lastSequence = opt[last_pos];
+        cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0;  /* single sequence, and it starts before `ip` */
+        assert(cur < ZSTD_OPT_NUM);  /* control overflow*/
+
+_shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */
+        assert(opt[0].mlen == 0);
+
+        /* Set the next chunk's repcodes based on the repcodes of the beginning
+         * of the last match, and the last sequence. This avoids us having to
+         * update them while traversing the sequences.
+         */
+        if (lastSequence.mlen != 0) {
+            repcodes_t reps = ZSTD_updateRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
+            ZSTD_memcpy(rep, &reps, sizeof(reps));
+        } else {
+            ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
+        }
+
+        {   U32 const storeEnd = cur + 1;
+            U32 storeStart = storeEnd;
+            U32 seqPos = cur;
+
+            DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
+                        last_pos, cur); (void)last_pos;
+            assert(storeEnd < ZSTD_OPT_NUM);
+            DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
+                        storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off);
+            opt[storeEnd] = lastSequence;
+            while (seqPos > 0) {
+                U32 const backDist = ZSTD_totalLen(opt[seqPos]);
+                storeStart--;
+                DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
+                            seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off);
+                opt[storeStart] = opt[seqPos];
+                seqPos = (seqPos > backDist) ? seqPos - backDist : 0;
+            }
+
+            /* save sequences */
+            DEBUGLOG(6, "sending selected sequences into seqStore")
+            {   U32 storePos;
+                for (storePos=storeStart; storePos <= storeEnd; storePos++) {
+                    U32 const llen = opt[storePos].litlen;
+                    U32 const mlen = opt[storePos].mlen;
+                    U32 const offCode = opt[storePos].off;
+                    U32 const advance = llen + mlen;
+                    DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
+                                anchor - istart, (unsigned)llen, (unsigned)mlen);
+
+                    if (mlen==0) {  /* only literals => must be last "sequence", actually starting a new stream of sequences */
+                        assert(storePos == storeEnd);   /* must be last sequence */
+                        ip = anchor + llen;     /* last "sequence" is a bunch of literals => don't progress anchor */
+                        continue;   /* will finish */
+                    }
+
+                    assert(anchor + llen <= iend);
+                    ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
+                    ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH);
+                    anchor += advance;
+                    ip = anchor;
+            }   }
+            ZSTD_setBasePrices(optStatePtr, optLevel);
+        }
+    }   /* while (ip < ilimit) */
+
+    /* Return the last literals size */
+    return (size_t)(iend - anchor);
+}
+
+
+size_t ZSTD_compressBlock_btopt(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        const void* src, size_t srcSize)
+{
+    DEBUGLOG(5, "ZSTD_compressBlock_btopt");
+    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_noDict);
+}
+
+
+/* used in 2-pass strategy */
+static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus)
+{
+    U32 s, sum=0;
+    assert(ZSTD_FREQ_DIV+bonus >= 0);
+    for (s=0; s<lastEltIndex+1; s++) {
+        table[s] <<= ZSTD_FREQ_DIV+bonus;
+        table[s]--;
+        sum += table[s];
+    }
+    return sum;
+}
+
+/* used in 2-pass strategy */
+MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr)
+{
+    if (ZSTD_compressedLiterals(optPtr))
+        optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
+    optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0);
+    optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0);
+    optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0);
+}
+
+/* ZSTD_initStats_ultra():
+ * make a first compression pass, just to seed stats with more accurate starting values.
+ * only works on first block, with no dictionary and no ldm.
+ * this function cannot error, hence its contract must be respected.
+ */
+static void
+ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
+                     seqStore_t* seqStore,
+                     U32 rep[ZSTD_REP_NUM],
+               const void* src, size_t srcSize)
+{
+    U32 tmpRep[ZSTD_REP_NUM];  /* updated rep codes will sink here */
+    ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
+
+    DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
+    assert(ms->opt.litLengthSum == 0);    /* first block */
+    assert(seqStore->sequences == seqStore->sequencesStart);   /* no ldm */
+    assert(ms->window.dictLimit == ms->window.lowLimit);   /* no dictionary */
+    assert(ms->window.dictLimit - ms->nextToUpdate <= 1);  /* no prefix (note: intentional overflow, defined as 2-complement) */
+
+    ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);   /* generate stats into ms->opt*/
+
+    /* invalidate first scan from history */
+    ZSTD_resetSeqStore(seqStore);
+    ms->window.base -= srcSize;
+    ms->window.dictLimit += (U32)srcSize;
+    ms->window.lowLimit = ms->window.dictLimit;
+    ms->nextToUpdate = ms->window.dictLimit;
+
+    /* re-inforce weight of collected statistics */
+    ZSTD_upscaleStats(&ms->opt);
+}
+
+size_t ZSTD_compressBlock_btultra(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        const void* src, size_t srcSize)
+{
+    DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
+    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_btultra2(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        const void* src, size_t srcSize)
+{
+    U32 const curr = (U32)((const BYTE*)src - ms->window.base);
+    DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
+
+    /* 2-pass strategy:
+     * this strategy makes a first pass over first block to collect statistics
+     * and seed next round's statistics with it.
+     * After 1st pass, function forgets everything, and starts a new block.
+     * Consequently, this can only work if no data has been previously loaded in tables,
+     * aka, no dictionary, no prefix, no ldm preprocessing.
+     * The compression ratio gain is generally small (~0.5% on first block),
+     * the cost is 2x cpu time on first block. */
+    assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
+    if ( (ms->opt.litLengthSum==0)   /* first block */
+      && (seqStore->sequences == seqStore->sequencesStart)  /* no ldm */
+      && (ms->window.dictLimit == ms->window.lowLimit)   /* no dictionary */
+      && (curr == ms->window.dictLimit)   /* start of frame, nothing already loaded nor skipped */
+      && (srcSize > ZSTD_PREDEF_THRESHOLD)
+      ) {
+        ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
+    }
+
+    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_btopt_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        const void* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_dictMatchState);
+}
+
+size_t ZSTD_compressBlock_btultra_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        const void* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_dictMatchState);
+}
+
+size_t ZSTD_compressBlock_btopt_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        const void* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_extDict);
+}
+
+size_t ZSTD_compressBlock_btultra_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        const void* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict);
+}
+
+/* note : no btultra2 variant for extDict nor dictMatchState,
+ * because btultra2 is not meant to work with dictionaries
+ * and is only specific for the first block (no prefix) */
+/**** ended inlining compress/zstd_opt.c ****/
+#ifdef ZSTD_MULTITHREAD
+/**** start inlining compress/zstdmt_compress.c ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+
+/* ======   Compiler specifics   ====== */
+#if defined(_MSC_VER)
+#  pragma warning(disable : 4204)   /* disable: C4204: non-constant aggregate initializer */
+#endif
+
+
+/* ======   Constants   ====== */
+#define ZSTDMT_OVERLAPLOG_DEFAULT 0
+
+
+/* ======   Dependencies   ====== */
+/**** skipping file: ../common/zstd_deps.h ****/
+/**** skipping file: ../common/mem.h ****/
+/**** skipping file: ../common/pool.h ****/
+/**** skipping file: ../common/threading.h ****/
+/**** skipping file: zstd_compress_internal.h ****/
+/**** skipping file: zstd_ldm.h ****/
+/**** skipping file: zstdmt_compress.h ****/
+
+/* Guards code to support resizing the SeqPool.
+ * We will want to resize the SeqPool to save memory in the future.
+ * Until then, comment the code out since it is unused.
+ */
+#define ZSTD_RESIZE_SEQPOOL 0
+
+/* ======   Debug   ====== */
+#if defined(DEBUGLEVEL) && (DEBUGLEVEL>=2) \
+    && !defined(_MSC_VER) \
+    && !defined(__MINGW32__)
+
+#  include <stdio.h>
+#  include <unistd.h>
+#  include <sys/times.h>
+
+#  define DEBUG_PRINTHEX(l,p,n) {            \
+    unsigned debug_u;                        \
+    for (debug_u=0; debug_u<(n); debug_u++)  \
+        RAWLOG(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
+    RAWLOG(l, " \n");                        \
+}
+
+static unsigned long long GetCurrentClockTimeMicroseconds(void)
+{
+   static clock_t _ticksPerSecond = 0;
+   if (_ticksPerSecond <= 0) _ticksPerSecond = sysconf(_SC_CLK_TCK);
+
+   {   struct tms junk; clock_t newTicks = (clock_t) times(&junk);
+       return ((((unsigned long long)newTicks)*(1000000))/_ticksPerSecond);
+}  }
+
+#define MUTEX_WAIT_TIME_DLEVEL 6
+#define ZSTD_PTHREAD_MUTEX_LOCK(mutex) {          \
+    if (DEBUGLEVEL >= MUTEX_WAIT_TIME_DLEVEL) {   \
+        unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \
+        ZSTD_pthread_mutex_lock(mutex);           \
+        {   unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \
+            unsigned long long const elapsedTime = (afterTime-beforeTime); \
+            if (elapsedTime > 1000) {  /* or whatever threshold you like; I'm using 1 millisecond here */ \
+                DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, "Thread took %llu microseconds to acquire mutex %s \n", \
+                   elapsedTime, #mutex);          \
+        }   }                                     \
+    } else {                                      \
+        ZSTD_pthread_mutex_lock(mutex);           \
+    }                                             \
+}
+
+#else
+
+#  define ZSTD_PTHREAD_MUTEX_LOCK(m) ZSTD_pthread_mutex_lock(m)
+#  define DEBUG_PRINTHEX(l,p,n) {}
+
+#endif
+
+
+/* =====   Buffer Pool   ===== */
+/* a single Buffer Pool can be invoked from multiple threads in parallel */
+
+typedef struct buffer_s {
+    void* start;
+    size_t capacity;
+} buffer_t;
+
+static const buffer_t g_nullBuffer = { NULL, 0 };
+
+typedef struct ZSTDMT_bufferPool_s {
+    ZSTD_pthread_mutex_t poolMutex;
+    size_t bufferSize;
+    unsigned totalBuffers;
+    unsigned nbBuffers;
+    ZSTD_customMem cMem;
+    buffer_t bTable[1];   /* variable size */
+} ZSTDMT_bufferPool;
+
+static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbWorkers, ZSTD_customMem cMem)
+{
+    unsigned const maxNbBuffers = 2*nbWorkers + 3;
+    ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_customCalloc(
+        sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t), cMem);
+    if (bufPool==NULL) return NULL;
+    if (ZSTD_pthread_mutex_init(&bufPool->poolMutex, NULL)) {
+        ZSTD_customFree(bufPool, cMem);
+        return NULL;
+    }
+    bufPool->bufferSize = 64 KB;
+    bufPool->totalBuffers = maxNbBuffers;
+    bufPool->nbBuffers = 0;
+    bufPool->cMem = cMem;
+    return bufPool;
+}
+
+static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
+{
+    unsigned u;
+    DEBUGLOG(3, "ZSTDMT_freeBufferPool (address:%08X)", (U32)(size_t)bufPool);
+    if (!bufPool) return;   /* compatibility with free on NULL */
+    for (u=0; u<bufPool->totalBuffers; u++) {
+        DEBUGLOG(4, "free buffer %2u (address:%08X)", u, (U32)(size_t)bufPool->bTable[u].start);
+        ZSTD_customFree(bufPool->bTable[u].start, bufPool->cMem);
+    }
+    ZSTD_pthread_mutex_destroy(&bufPool->poolMutex);
+    ZSTD_customFree(bufPool, bufPool->cMem);
+}
+
+/* only works at initialization, not during compression */
+static size_t ZSTDMT_sizeof_bufferPool(ZSTDMT_bufferPool* bufPool)
+{
+    size_t const poolSize = sizeof(*bufPool)
+                          + (bufPool->totalBuffers - 1) * sizeof(buffer_t);
+    unsigned u;
+    size_t totalBufferSize = 0;
+    ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
+    for (u=0; u<bufPool->totalBuffers; u++)
+        totalBufferSize += bufPool->bTable[u].capacity;
+    ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
+
+    return poolSize + totalBufferSize;
+}
+
+/* ZSTDMT_setBufferSize() :
+ * all future buffers provided by this buffer pool will have _at least_ this size
+ * note : it's better for all buffers to have same size,
+ * as they become freely interchangeable, reducing malloc/free usages and memory fragmentation */
+static void ZSTDMT_setBufferSize(ZSTDMT_bufferPool* const bufPool, size_t const bSize)
+{
+    ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
+    DEBUGLOG(4, "ZSTDMT_setBufferSize: bSize = %u", (U32)bSize);
+    bufPool->bufferSize = bSize;
+    ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
+}
+
+
+static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool, U32 nbWorkers)
+{
+    unsigned const maxNbBuffers = 2*nbWorkers + 3;
+    if (srcBufPool==NULL) return NULL;
+    if (srcBufPool->totalBuffers >= maxNbBuffers) /* good enough */
+        return srcBufPool;
+    /* need a larger buffer pool */
+    {   ZSTD_customMem const cMem = srcBufPool->cMem;
+        size_t const bSize = srcBufPool->bufferSize;   /* forward parameters */
+        ZSTDMT_bufferPool* newBufPool;
+        ZSTDMT_freeBufferPool(srcBufPool);
+        newBufPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
+        if (newBufPool==NULL) return newBufPool;
+        ZSTDMT_setBufferSize(newBufPool, bSize);
+        return newBufPool;
+    }
+}
+
+/** ZSTDMT_getBuffer() :
+ *  assumption : bufPool must be valid
+ * @return : a buffer, with start pointer and size
+ *  note: allocation may fail, in this case, start==NULL and size==0 */
+static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool)
+{
+    size_t const bSize = bufPool->bufferSize;
+    DEBUGLOG(5, "ZSTDMT_getBuffer: bSize = %u", (U32)bufPool->bufferSize);
+    ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
+    if (bufPool->nbBuffers) {   /* try to use an existing buffer */
+        buffer_t const buf = bufPool->bTable[--(bufPool->nbBuffers)];
+        size_t const availBufferSize = buf.capacity;
+        bufPool->bTable[bufPool->nbBuffers] = g_nullBuffer;
+        if ((availBufferSize >= bSize) & ((availBufferSize>>3) <= bSize)) {
+            /* large enough, but not too much */
+            DEBUGLOG(5, "ZSTDMT_getBuffer: provide buffer %u of size %u",
+                        bufPool->nbBuffers, (U32)buf.capacity);
+            ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
+            return buf;
+        }
+        /* size conditions not respected : scratch this buffer, create new one */
+        DEBUGLOG(5, "ZSTDMT_getBuffer: existing buffer does not meet size conditions => freeing");
+        ZSTD_customFree(buf.start, bufPool->cMem);
+    }
+    ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
+    /* create new buffer */
+    DEBUGLOG(5, "ZSTDMT_getBuffer: create a new buffer");
+    {   buffer_t buffer;
+        void* const start = ZSTD_customMalloc(bSize, bufPool->cMem);
+        buffer.start = start;   /* note : start can be NULL if malloc fails ! */
+        buffer.capacity = (start==NULL) ? 0 : bSize;
+        if (start==NULL) {
+            DEBUGLOG(5, "ZSTDMT_getBuffer: buffer allocation failure !!");
+        } else {
+            DEBUGLOG(5, "ZSTDMT_getBuffer: created buffer of size %u", (U32)bSize);
+        }
+        return buffer;
+    }
+}
+
+#if ZSTD_RESIZE_SEQPOOL
+/** ZSTDMT_resizeBuffer() :
+ * assumption : bufPool must be valid
+ * @return : a buffer that is at least the buffer pool buffer size.
+ *           If a reallocation happens, the data in the input buffer is copied.
+ */
+static buffer_t ZSTDMT_resizeBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buffer)
+{
+    size_t const bSize = bufPool->bufferSize;
+    if (buffer.capacity < bSize) {
+        void* const start = ZSTD_customMalloc(bSize, bufPool->cMem);
+        buffer_t newBuffer;
+        newBuffer.start = start;
+        newBuffer.capacity = start == NULL ? 0 : bSize;
+        if (start != NULL) {
+            assert(newBuffer.capacity >= buffer.capacity);
+            ZSTD_memcpy(newBuffer.start, buffer.start, buffer.capacity);
+            DEBUGLOG(5, "ZSTDMT_resizeBuffer: created buffer of size %u", (U32)bSize);
+            return newBuffer;
+        }
+        DEBUGLOG(5, "ZSTDMT_resizeBuffer: buffer allocation failure !!");
+    }
+    return buffer;
+}
+#endif
+
+/* store buffer for later re-use, up to pool capacity */
+static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf)
+{
+    DEBUGLOG(5, "ZSTDMT_releaseBuffer");
+    if (buf.start == NULL) return;   /* compatible with release on NULL */
+    ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
+    if (bufPool->nbBuffers < bufPool->totalBuffers) {
+        bufPool->bTable[bufPool->nbBuffers++] = buf;  /* stored for later use */
+        DEBUGLOG(5, "ZSTDMT_releaseBuffer: stored buffer of size %u in slot %u",
+                    (U32)buf.capacity, (U32)(bufPool->nbBuffers-1));
+        ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
+        return;
+    }
+    ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
+    /* Reached bufferPool capacity (should not happen) */
+    DEBUGLOG(5, "ZSTDMT_releaseBuffer: pool capacity reached => freeing ");
+    ZSTD_customFree(buf.start, bufPool->cMem);
+}
+
+
+/* =====   Seq Pool Wrapper   ====== */
+
+typedef ZSTDMT_bufferPool ZSTDMT_seqPool;
+
+static size_t ZSTDMT_sizeof_seqPool(ZSTDMT_seqPool* seqPool)
+{
+    return ZSTDMT_sizeof_bufferPool(seqPool);
+}
+
+static rawSeqStore_t bufferToSeq(buffer_t buffer)
+{
+    rawSeqStore_t seq = kNullRawSeqStore;
+    seq.seq = (rawSeq*)buffer.start;
+    seq.capacity = buffer.capacity / sizeof(rawSeq);
+    return seq;
+}
+
+static buffer_t seqToBuffer(rawSeqStore_t seq)
+{
+    buffer_t buffer;
+    buffer.start = seq.seq;
+    buffer.capacity = seq.capacity * sizeof(rawSeq);
+    return buffer;
+}
+
+static rawSeqStore_t ZSTDMT_getSeq(ZSTDMT_seqPool* seqPool)
+{
+    if (seqPool->bufferSize == 0) {
+        return kNullRawSeqStore;
+    }
+    return bufferToSeq(ZSTDMT_getBuffer(seqPool));
+}
+
+#if ZSTD_RESIZE_SEQPOOL
+static rawSeqStore_t ZSTDMT_resizeSeq(ZSTDMT_seqPool* seqPool, rawSeqStore_t seq)
+{
+  return bufferToSeq(ZSTDMT_resizeBuffer(seqPool, seqToBuffer(seq)));
+}
+#endif
+
+static void ZSTDMT_releaseSeq(ZSTDMT_seqPool* seqPool, rawSeqStore_t seq)
+{
+  ZSTDMT_releaseBuffer(seqPool, seqToBuffer(seq));
+}
+
+static void ZSTDMT_setNbSeq(ZSTDMT_seqPool* const seqPool, size_t const nbSeq)
+{
+  ZSTDMT_setBufferSize(seqPool, nbSeq * sizeof(rawSeq));
+}
+
+static ZSTDMT_seqPool* ZSTDMT_createSeqPool(unsigned nbWorkers, ZSTD_customMem cMem)
+{
+    ZSTDMT_seqPool* const seqPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
+    if (seqPool == NULL) return NULL;
+    ZSTDMT_setNbSeq(seqPool, 0);
+    return seqPool;
+}
+
+static void ZSTDMT_freeSeqPool(ZSTDMT_seqPool* seqPool)
+{
+    ZSTDMT_freeBufferPool(seqPool);
+}
+
+static ZSTDMT_seqPool* ZSTDMT_expandSeqPool(ZSTDMT_seqPool* pool, U32 nbWorkers)
+{
+    return ZSTDMT_expandBufferPool(pool, nbWorkers);
+}
+
+
+/* =====   CCtx Pool   ===== */
+/* a single CCtx Pool can be invoked from multiple threads in parallel */
+
+typedef struct {
+    ZSTD_pthread_mutex_t poolMutex;
+    int totalCCtx;
+    int availCCtx;
+    ZSTD_customMem cMem;
+    ZSTD_CCtx* cctx[1];   /* variable size */
+} ZSTDMT_CCtxPool;
+
+/* note : all CCtx borrowed from the pool should be released back to the pool _before_ freeing the pool */
+static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
+{
+    int cid;
+    for (cid=0; cid<pool->totalCCtx; cid++)
+        ZSTD_freeCCtx(pool->cctx[cid]);  /* note : compatible with free on NULL */
+    ZSTD_pthread_mutex_destroy(&pool->poolMutex);
+    ZSTD_customFree(pool, pool->cMem);
+}
+
+/* ZSTDMT_createCCtxPool() :
+ * implies nbWorkers >= 1 , checked by caller ZSTDMT_createCCtx() */
+static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(int nbWorkers,
+                                              ZSTD_customMem cMem)
+{
+    ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_customCalloc(
+        sizeof(ZSTDMT_CCtxPool) + (nbWorkers-1)*sizeof(ZSTD_CCtx*), cMem);
+    assert(nbWorkers > 0);
+    if (!cctxPool) return NULL;
+    if (ZSTD_pthread_mutex_init(&cctxPool->poolMutex, NULL)) {
+        ZSTD_customFree(cctxPool, cMem);
+        return NULL;
+    }
+    cctxPool->cMem = cMem;
+    cctxPool->totalCCtx = nbWorkers;
+    cctxPool->availCCtx = 1;   /* at least one cctx for single-thread mode */
+    cctxPool->cctx[0] = ZSTD_createCCtx_advanced(cMem);
+    if (!cctxPool->cctx[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; }
+    DEBUGLOG(3, "cctxPool created, with %u workers", nbWorkers);
+    return cctxPool;
+}
+
+static ZSTDMT_CCtxPool* ZSTDMT_expandCCtxPool(ZSTDMT_CCtxPool* srcPool,
+                                              int nbWorkers)
+{
+    if (srcPool==NULL) return NULL;
+    if (nbWorkers <= srcPool->totalCCtx) return srcPool;   /* good enough */
+    /* need a larger cctx pool */
+    {   ZSTD_customMem const cMem = srcPool->cMem;
+        ZSTDMT_freeCCtxPool(srcPool);
+        return ZSTDMT_createCCtxPool(nbWorkers, cMem);
+    }
+}
+
+/* only works during initialization phase, not during compression */
+static size_t ZSTDMT_sizeof_CCtxPool(ZSTDMT_CCtxPool* cctxPool)
+{
+    ZSTD_pthread_mutex_lock(&cctxPool->poolMutex);
+    {   unsigned const nbWorkers = cctxPool->totalCCtx;
+        size_t const poolSize = sizeof(*cctxPool)
+                                + (nbWorkers-1) * sizeof(ZSTD_CCtx*);
+        unsigned u;
+        size_t totalCCtxSize = 0;
+        for (u=0; u<nbWorkers; u++) {
+            totalCCtxSize += ZSTD_sizeof_CCtx(cctxPool->cctx[u]);
+        }
+        ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex);
+        assert(nbWorkers > 0);
+        return poolSize + totalCCtxSize;
+    }
+}
+
+static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* cctxPool)
+{
+    DEBUGLOG(5, "ZSTDMT_getCCtx");
+    ZSTD_pthread_mutex_lock(&cctxPool->poolMutex);
+    if (cctxPool->availCCtx) {
+        cctxPool->availCCtx--;
+        {   ZSTD_CCtx* const cctx = cctxPool->cctx[cctxPool->availCCtx];
+            ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex);
+            return cctx;
+    }   }
+    ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex);
+    DEBUGLOG(5, "create one more CCtx");
+    return ZSTD_createCCtx_advanced(cctxPool->cMem);   /* note : can be NULL, when creation fails ! */
+}
+
+static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx)
+{
+    if (cctx==NULL) return;   /* compatibility with release on NULL */
+    ZSTD_pthread_mutex_lock(&pool->poolMutex);
+    if (pool->availCCtx < pool->totalCCtx)
+        pool->cctx[pool->availCCtx++] = cctx;
+    else {
+        /* pool overflow : should not happen, since totalCCtx==nbWorkers */
+        DEBUGLOG(4, "CCtx pool overflow : free cctx");
+        ZSTD_freeCCtx(cctx);
+    }
+    ZSTD_pthread_mutex_unlock(&pool->poolMutex);
+}
+
+/* ====   Serial State   ==== */
+
+typedef struct {
+    void const* start;
+    size_t size;
+} range_t;
+
+typedef struct {
+    /* All variables in the struct are protected by mutex. */
+    ZSTD_pthread_mutex_t mutex;
+    ZSTD_pthread_cond_t cond;
+    ZSTD_CCtx_params params;
+    ldmState_t ldmState;
+    XXH64_state_t xxhState;
+    unsigned nextJobID;
+    /* Protects ldmWindow.
+     * Must be acquired after the main mutex when acquiring both.
+     */
+    ZSTD_pthread_mutex_t ldmWindowMutex;
+    ZSTD_pthread_cond_t ldmWindowCond;  /* Signaled when ldmWindow is updated */
+    ZSTD_window_t ldmWindow;  /* A thread-safe copy of ldmState.window */
+} serialState_t;
+
+static int
+ZSTDMT_serialState_reset(serialState_t* serialState,
+                         ZSTDMT_seqPool* seqPool,
+                         ZSTD_CCtx_params params,
+                         size_t jobSize,
+                         const void* dict, size_t const dictSize,
+                         ZSTD_dictContentType_e dictContentType)
+{
+    /* Adjust parameters */
+    if (params.ldmParams.enableLdm) {
+        DEBUGLOG(4, "LDM window size = %u KB", (1U << params.cParams.windowLog) >> 10);
+        ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
+        assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
+        assert(params.ldmParams.hashRateLog < 32);
+    } else {
+        ZSTD_memset(&params.ldmParams, 0, sizeof(params.ldmParams));
+    }
+    serialState->nextJobID = 0;
+    if (params.fParams.checksumFlag)
+        XXH64_reset(&serialState->xxhState, 0);
+    if (params.ldmParams.enableLdm) {
+        ZSTD_customMem cMem = params.customMem;
+        unsigned const hashLog = params.ldmParams.hashLog;
+        size_t const hashSize = ((size_t)1 << hashLog) * sizeof(ldmEntry_t);
+        unsigned const bucketLog =
+            params.ldmParams.hashLog - params.ldmParams.bucketSizeLog;
+        unsigned const prevBucketLog =
+            serialState->params.ldmParams.hashLog -
+            serialState->params.ldmParams.bucketSizeLog;
+        size_t const numBuckets = (size_t)1 << bucketLog;
+        /* Size the seq pool tables */
+        ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize));
+        /* Reset the window */
+        ZSTD_window_init(&serialState->ldmState.window);
+        /* Resize tables and output space if necessary. */
+        if (serialState->ldmState.hashTable == NULL || serialState->params.ldmParams.hashLog < hashLog) {
+            ZSTD_customFree(serialState->ldmState.hashTable, cMem);
+            serialState->ldmState.hashTable = (ldmEntry_t*)ZSTD_customMalloc(hashSize, cMem);
+        }
+        if (serialState->ldmState.bucketOffsets == NULL || prevBucketLog < bucketLog) {
+            ZSTD_customFree(serialState->ldmState.bucketOffsets, cMem);
+            serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_customMalloc(numBuckets, cMem);
+        }
+        if (!serialState->ldmState.hashTable || !serialState->ldmState.bucketOffsets)
+            return 1;
+        /* Zero the tables */
+        ZSTD_memset(serialState->ldmState.hashTable, 0, hashSize);
+        ZSTD_memset(serialState->ldmState.bucketOffsets, 0, numBuckets);
+
+        /* Update window state and fill hash table with dict */
+        serialState->ldmState.loadedDictEnd = 0;
+        if (dictSize > 0) {
+            if (dictContentType == ZSTD_dct_rawContent) {
+                BYTE const* const dictEnd = (const BYTE*)dict + dictSize;
+                ZSTD_window_update(&serialState->ldmState.window, dict, dictSize, /* forceNonContiguous */ 0);
+                ZSTD_ldm_fillHashTable(&serialState->ldmState, (const BYTE*)dict, dictEnd, &params.ldmParams);
+                serialState->ldmState.loadedDictEnd = params.forceWindow ? 0 : (U32)(dictEnd - serialState->ldmState.window.base);
+            } else {
+                /* don't even load anything */
+            }
+        }
+
+        /* Initialize serialState's copy of ldmWindow. */
+        serialState->ldmWindow = serialState->ldmState.window;
+    }
+
+    serialState->params = params;
+    serialState->params.jobSize = (U32)jobSize;
+    return 0;
+}
+
+static int ZSTDMT_serialState_init(serialState_t* serialState)
+{
+    int initError = 0;
+    ZSTD_memset(serialState, 0, sizeof(*serialState));
+    initError |= ZSTD_pthread_mutex_init(&serialState->mutex, NULL);
+    initError |= ZSTD_pthread_cond_init(&serialState->cond, NULL);
+    initError |= ZSTD_pthread_mutex_init(&serialState->ldmWindowMutex, NULL);
+    initError |= ZSTD_pthread_cond_init(&serialState->ldmWindowCond, NULL);
+    return initError;
+}
+
+static void ZSTDMT_serialState_free(serialState_t* serialState)
+{
+    ZSTD_customMem cMem = serialState->params.customMem;
+    ZSTD_pthread_mutex_destroy(&serialState->mutex);
+    ZSTD_pthread_cond_destroy(&serialState->cond);
+    ZSTD_pthread_mutex_destroy(&serialState->ldmWindowMutex);
+    ZSTD_pthread_cond_destroy(&serialState->ldmWindowCond);
+    ZSTD_customFree(serialState->ldmState.hashTable, cMem);
+    ZSTD_customFree(serialState->ldmState.bucketOffsets, cMem);
+}
+
+static void ZSTDMT_serialState_update(serialState_t* serialState,
+                                      ZSTD_CCtx* jobCCtx, rawSeqStore_t seqStore,
+                                      range_t src, unsigned jobID)
+{
+    /* Wait for our turn */
+    ZSTD_PTHREAD_MUTEX_LOCK(&serialState->mutex);
+    while (serialState->nextJobID < jobID) {
+        DEBUGLOG(5, "wait for serialState->cond");
+        ZSTD_pthread_cond_wait(&serialState->cond, &serialState->mutex);
+    }
+    /* A future job may error and skip our job */
+    if (serialState->nextJobID == jobID) {
+        /* It is now our turn, do any processing necessary */
+        if (serialState->params.ldmParams.enableLdm) {
+            size_t error;
+            assert(seqStore.seq != NULL && seqStore.pos == 0 &&
+                   seqStore.size == 0 && seqStore.capacity > 0);
+            assert(src.size <= serialState->params.jobSize);
+            ZSTD_window_update(&serialState->ldmState.window, src.start, src.size, /* forceNonContiguous */ 0);
+            error = ZSTD_ldm_generateSequences(
+                &serialState->ldmState, &seqStore,
+                &serialState->params.ldmParams, src.start, src.size);
+            /* We provide a large enough buffer to never fail. */
+            assert(!ZSTD_isError(error)); (void)error;
+            /* Update ldmWindow to match the ldmState.window and signal the main
+             * thread if it is waiting for a buffer.
+             */
+            ZSTD_PTHREAD_MUTEX_LOCK(&serialState->ldmWindowMutex);
+            serialState->ldmWindow = serialState->ldmState.window;
+            ZSTD_pthread_cond_signal(&serialState->ldmWindowCond);
+            ZSTD_pthread_mutex_unlock(&serialState->ldmWindowMutex);
+        }
+        if (serialState->params.fParams.checksumFlag && src.size > 0)
+            XXH64_update(&serialState->xxhState, src.start, src.size);
+    }
+    /* Now it is the next jobs turn */
+    serialState->nextJobID++;
+    ZSTD_pthread_cond_broadcast(&serialState->cond);
+    ZSTD_pthread_mutex_unlock(&serialState->mutex);
+
+    if (seqStore.size > 0) {
+        size_t const err = ZSTD_referenceExternalSequences(
+            jobCCtx, seqStore.seq, seqStore.size);
+        assert(serialState->params.ldmParams.enableLdm);
+        assert(!ZSTD_isError(err));
+        (void)err;
+    }
+}
+
+static void ZSTDMT_serialState_ensureFinished(serialState_t* serialState,
+                                              unsigned jobID, size_t cSize)
+{
+    ZSTD_PTHREAD_MUTEX_LOCK(&serialState->mutex);
+    if (serialState->nextJobID <= jobID) {
+        assert(ZSTD_isError(cSize)); (void)cSize;
+        DEBUGLOG(5, "Skipping past job %u because of error", jobID);
+        serialState->nextJobID = jobID + 1;
+        ZSTD_pthread_cond_broadcast(&serialState->cond);
+
+        ZSTD_PTHREAD_MUTEX_LOCK(&serialState->ldmWindowMutex);
+        ZSTD_window_clear(&serialState->ldmWindow);
+        ZSTD_pthread_cond_signal(&serialState->ldmWindowCond);
+        ZSTD_pthread_mutex_unlock(&serialState->ldmWindowMutex);
+    }
+    ZSTD_pthread_mutex_unlock(&serialState->mutex);
+
+}
+
+
+/* ------------------------------------------ */
+/* =====          Worker thread         ===== */
+/* ------------------------------------------ */
+
+static const range_t kNullRange = { NULL, 0 };
+
+typedef struct {
+    size_t   consumed;                   /* SHARED - set0 by mtctx, then modified by worker AND read by mtctx */
+    size_t   cSize;                      /* SHARED - set0 by mtctx, then modified by worker AND read by mtctx, then set0 by mtctx */
+    ZSTD_pthread_mutex_t job_mutex;      /* Thread-safe - used by mtctx and worker */
+    ZSTD_pthread_cond_t job_cond;        /* Thread-safe - used by mtctx and worker */
+    ZSTDMT_CCtxPool* cctxPool;           /* Thread-safe - used by mtctx and (all) workers */
+    ZSTDMT_bufferPool* bufPool;          /* Thread-safe - used by mtctx and (all) workers */
+    ZSTDMT_seqPool* seqPool;             /* Thread-safe - used by mtctx and (all) workers */
+    serialState_t* serial;               /* Thread-safe - used by mtctx and (all) workers */
+    buffer_t dstBuff;                    /* set by worker (or mtctx), then read by worker & mtctx, then modified by mtctx => no barrier */
+    range_t prefix;                      /* set by mtctx, then read by worker & mtctx => no barrier */
+    range_t src;                         /* set by mtctx, then read by worker & mtctx => no barrier */
+    unsigned jobID;                      /* set by mtctx, then read by worker => no barrier */
+    unsigned firstJob;                   /* set by mtctx, then read by worker => no barrier */
+    unsigned lastJob;                    /* set by mtctx, then read by worker => no barrier */
+    ZSTD_CCtx_params params;             /* set by mtctx, then read by worker => no barrier */
+    const ZSTD_CDict* cdict;             /* set by mtctx, then read by worker => no barrier */
+    unsigned long long fullFrameSize;    /* set by mtctx, then read by worker => no barrier */
+    size_t   dstFlushed;                 /* used only by mtctx */
+    unsigned frameChecksumNeeded;        /* used only by mtctx */
+} ZSTDMT_jobDescription;
+
+#define JOB_ERROR(e) {                          \
+    ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex);   \
+    job->cSize = e;                             \
+    ZSTD_pthread_mutex_unlock(&job->job_mutex); \
+    goto _endJob;                               \
+}
+
+/* ZSTDMT_compressionJob() is a POOL_function type */
+static void ZSTDMT_compressionJob(void* jobDescription)
+{
+    ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription;
+    ZSTD_CCtx_params jobParams = job->params;   /* do not modify job->params ! copy it, modify the copy */
+    ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(job->cctxPool);
+    rawSeqStore_t rawSeqStore = ZSTDMT_getSeq(job->seqPool);
+    buffer_t dstBuff = job->dstBuff;
+    size_t lastCBlockSize = 0;
+
+    /* resources */
+    if (cctx==NULL) JOB_ERROR(ERROR(memory_allocation));
+    if (dstBuff.start == NULL) {   /* streaming job : doesn't provide a dstBuffer */
+        dstBuff = ZSTDMT_getBuffer(job->bufPool);
+        if (dstBuff.start==NULL) JOB_ERROR(ERROR(memory_allocation));
+        job->dstBuff = dstBuff;   /* this value can be read in ZSTDMT_flush, when it copies the whole job */
+    }
+    if (jobParams.ldmParams.enableLdm && rawSeqStore.seq == NULL)
+        JOB_ERROR(ERROR(memory_allocation));
+
+    /* Don't compute the checksum for chunks, since we compute it externally,
+     * but write it in the header.
+     */
+    if (job->jobID != 0) jobParams.fParams.checksumFlag = 0;
+    /* Don't run LDM for the chunks, since we handle it externally */
+    jobParams.ldmParams.enableLdm = 0;
+    /* Correct nbWorkers to 0. */
+    jobParams.nbWorkers = 0;
+
+
+    /* init */
+    if (job->cdict) {
+        size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, &jobParams, job->fullFrameSize);
+        assert(job->firstJob);  /* only allowed for first job */
+        if (ZSTD_isError(initError)) JOB_ERROR(initError);
+    } else {  /* srcStart points at reloaded section */
+        U64 const pledgedSrcSize = job->firstJob ? job->fullFrameSize : job->src.size;
+        {   size_t const forceWindowError = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_forceMaxWindow, !job->firstJob);
+            if (ZSTD_isError(forceWindowError)) JOB_ERROR(forceWindowError);
+        }
+        if (!job->firstJob) {
+            size_t const err = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_deterministicRefPrefix, 0);
+            if (ZSTD_isError(err)) JOB_ERROR(err);
+        }
+        {   size_t const initError = ZSTD_compressBegin_advanced_internal(cctx,
+                                        job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
+                                        ZSTD_dtlm_fast,
+                                        NULL, /*cdict*/
+                                        &jobParams, pledgedSrcSize);
+            if (ZSTD_isError(initError)) JOB_ERROR(initError);
+    }   }
+
+    /* Perform serial step as early as possible, but after CCtx initialization */
+    ZSTDMT_serialState_update(job->serial, cctx, rawSeqStore, job->src, job->jobID);
+
+    if (!job->firstJob) {  /* flush and overwrite frame header when it's not first job */
+        size_t const hSize = ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.capacity, job->src.start, 0);
+        if (ZSTD_isError(hSize)) JOB_ERROR(hSize);
+        DEBUGLOG(5, "ZSTDMT_compressionJob: flush and overwrite %u bytes of frame header (not first job)", (U32)hSize);
+        ZSTD_invalidateRepCodes(cctx);
+    }
+
+    /* compress */
+    {   size_t const chunkSize = 4*ZSTD_BLOCKSIZE_MAX;
+        int const nbChunks = (int)((job->src.size + (chunkSize-1)) / chunkSize);
+        const BYTE* ip = (const BYTE*) job->src.start;
+        BYTE* const ostart = (BYTE*)dstBuff.start;
+        BYTE* op = ostart;
+        BYTE* oend = op + dstBuff.capacity;
+        int chunkNb;
+        if (sizeof(size_t) > sizeof(int)) assert(job->src.size < ((size_t)INT_MAX) * chunkSize);   /* check overflow */
+        DEBUGLOG(5, "ZSTDMT_compressionJob: compress %u bytes in %i blocks", (U32)job->src.size, nbChunks);
+        assert(job->cSize == 0);
+        for (chunkNb = 1; chunkNb < nbChunks; chunkNb++) {
+            size_t const cSize = ZSTD_compressContinue(cctx, op, oend-op, ip, chunkSize);
+            if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
+            ip += chunkSize;
+            op += cSize; assert(op < oend);
+            /* stats */
+            ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex);
+            job->cSize += cSize;
+            job->consumed = chunkSize * chunkNb;
+            DEBUGLOG(5, "ZSTDMT_compressionJob: compress new block : cSize==%u bytes (total: %u)",
+                        (U32)cSize, (U32)job->cSize);
+            ZSTD_pthread_cond_signal(&job->job_cond);   /* warns some more data is ready to be flushed */
+            ZSTD_pthread_mutex_unlock(&job->job_mutex);
+        }
+        /* last block */
+        assert(chunkSize > 0);
+        assert((chunkSize & (chunkSize - 1)) == 0);  /* chunkSize must be power of 2 for mask==(chunkSize-1) to work */
+        if ((nbChunks > 0) | job->lastJob /*must output a "last block" flag*/ ) {
+            size_t const lastBlockSize1 = job->src.size & (chunkSize-1);
+            size_t const lastBlockSize = ((lastBlockSize1==0) & (job->src.size>=chunkSize)) ? chunkSize : lastBlockSize1;
+            size_t const cSize = (job->lastJob) ?
+                 ZSTD_compressEnd     (cctx, op, oend-op, ip, lastBlockSize) :
+                 ZSTD_compressContinue(cctx, op, oend-op, ip, lastBlockSize);
+            if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
+            lastCBlockSize = cSize;
+    }   }
+    if (!job->firstJob) {
+        /* Double check that we don't have an ext-dict, because then our
+         * repcode invalidation doesn't work.
+         */
+        assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window));
+    }
+    ZSTD_CCtx_trace(cctx, 0);
+
+_endJob:
+    ZSTDMT_serialState_ensureFinished(job->serial, job->jobID, job->cSize);
+    if (job->prefix.size > 0)
+        DEBUGLOG(5, "Finished with prefix: %zx", (size_t)job->prefix.start);
+    DEBUGLOG(5, "Finished with source: %zx", (size_t)job->src.start);
+    /* release resources */
+    ZSTDMT_releaseSeq(job->seqPool, rawSeqStore);
+    ZSTDMT_releaseCCtx(job->cctxPool, cctx);
+    /* report */
+    ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex);
+    if (ZSTD_isError(job->cSize)) assert(lastCBlockSize == 0);
+    job->cSize += lastCBlockSize;
+    job->consumed = job->src.size;  /* when job->consumed == job->src.size , compression job is presumed completed */
+    ZSTD_pthread_cond_signal(&job->job_cond);
+    ZSTD_pthread_mutex_unlock(&job->job_mutex);
+}
+
+
+/* ------------------------------------------ */
+/* =====   Multi-threaded compression   ===== */
+/* ------------------------------------------ */
+
+typedef struct {
+    range_t prefix;         /* read-only non-owned prefix buffer */
+    buffer_t buffer;
+    size_t filled;
+} inBuff_t;
+
+typedef struct {
+  BYTE* buffer;     /* The round input buffer. All jobs get references
+                     * to pieces of the buffer. ZSTDMT_tryGetInputRange()
+                     * handles handing out job input buffers, and makes
+                     * sure it doesn't overlap with any pieces still in use.
+                     */
+  size_t capacity;  /* The capacity of buffer. */
+  size_t pos;       /* The position of the current inBuff in the round
+                     * buffer. Updated past the end if the inBuff once
+                     * the inBuff is sent to the worker thread.
+                     * pos <= capacity.
+                     */
+} roundBuff_t;
+
+static const roundBuff_t kNullRoundBuff = {NULL, 0, 0};
+
+#define RSYNC_LENGTH 32
+
+typedef struct {
+  U64 hash;
+  U64 hitMask;
+  U64 primePower;
+} rsyncState_t;
+
+struct ZSTDMT_CCtx_s {
+    POOL_ctx* factory;
+    ZSTDMT_jobDescription* jobs;
+    ZSTDMT_bufferPool* bufPool;
+    ZSTDMT_CCtxPool* cctxPool;
+    ZSTDMT_seqPool* seqPool;
+    ZSTD_CCtx_params params;
+    size_t targetSectionSize;
+    size_t targetPrefixSize;
+    int jobReady;        /* 1 => one job is already prepared, but pool has shortage of workers. Don't create a new job. */
+    inBuff_t inBuff;
+    roundBuff_t roundBuff;
+    serialState_t serial;
+    rsyncState_t rsync;
+    unsigned jobIDMask;
+    unsigned doneJobID;
+    unsigned nextJobID;
+    unsigned frameEnded;
+    unsigned allJobsCompleted;
+    unsigned long long frameContentSize;
+    unsigned long long consumed;
+    unsigned long long produced;
+    ZSTD_customMem cMem;
+    ZSTD_CDict* cdictLocal;
+    const ZSTD_CDict* cdict;
+    unsigned providedFactory: 1;
+};
+
+static void ZSTDMT_freeJobsTable(ZSTDMT_jobDescription* jobTable, U32 nbJobs, ZSTD_customMem cMem)
+{
+    U32 jobNb;
+    if (jobTable == NULL) return;
+    for (jobNb=0; jobNb<nbJobs; jobNb++) {
+        ZSTD_pthread_mutex_destroy(&jobTable[jobNb].job_mutex);
+        ZSTD_pthread_cond_destroy(&jobTable[jobNb].job_cond);
+    }
+    ZSTD_customFree(jobTable, cMem);
+}
+
+/* ZSTDMT_allocJobsTable()
+ * allocate and init a job table.
+ * update *nbJobsPtr to next power of 2 value, as size of table */
+static ZSTDMT_jobDescription* ZSTDMT_createJobsTable(U32* nbJobsPtr, ZSTD_customMem cMem)
+{
+    U32 const nbJobsLog2 = ZSTD_highbit32(*nbJobsPtr) + 1;
+    U32 const nbJobs = 1 << nbJobsLog2;
+    U32 jobNb;
+    ZSTDMT_jobDescription* const jobTable = (ZSTDMT_jobDescription*)
+                ZSTD_customCalloc(nbJobs * sizeof(ZSTDMT_jobDescription), cMem);
+    int initError = 0;
+    if (jobTable==NULL) return NULL;
+    *nbJobsPtr = nbJobs;
+    for (jobNb=0; jobNb<nbJobs; jobNb++) {
+        initError |= ZSTD_pthread_mutex_init(&jobTable[jobNb].job_mutex, NULL);
+        initError |= ZSTD_pthread_cond_init(&jobTable[jobNb].job_cond, NULL);
+    }
+    if (initError != 0) {
+        ZSTDMT_freeJobsTable(jobTable, nbJobs, cMem);
+        return NULL;
+    }
+    return jobTable;
+}
+
+static size_t ZSTDMT_expandJobsTable (ZSTDMT_CCtx* mtctx, U32 nbWorkers) {
+    U32 nbJobs = nbWorkers + 2;
+    if (nbJobs > mtctx->jobIDMask+1) {  /* need more job capacity */
+        ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+1, mtctx->cMem);
+        mtctx->jobIDMask = 0;
+        mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, mtctx->cMem);
+        if (mtctx->jobs==NULL) return ERROR(memory_allocation);
+        assert((nbJobs != 0) && ((nbJobs & (nbJobs - 1)) == 0));  /* ensure nbJobs is a power of 2 */
+        mtctx->jobIDMask = nbJobs - 1;
+    }
+    return 0;
+}
+
+
+/* ZSTDMT_CCtxParam_setNbWorkers():
+ * Internal use only */
+static size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers)
+{
+    return ZSTD_CCtxParams_setParameter(params, ZSTD_c_nbWorkers, (int)nbWorkers);
+}
+
+MEM_STATIC ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced_internal(unsigned nbWorkers, ZSTD_customMem cMem, ZSTD_threadPool* pool)
+{
+    ZSTDMT_CCtx* mtctx;
+    U32 nbJobs = nbWorkers + 2;
+    int initError;
+    DEBUGLOG(3, "ZSTDMT_createCCtx_advanced (nbWorkers = %u)", nbWorkers);
+
+    if (nbWorkers < 1) return NULL;
+    nbWorkers = MIN(nbWorkers , ZSTDMT_NBWORKERS_MAX);
+    if ((cMem.customAlloc!=NULL) ^ (cMem.customFree!=NULL))
+        /* invalid custom allocator */
+        return NULL;
+
+    mtctx = (ZSTDMT_CCtx*) ZSTD_customCalloc(sizeof(ZSTDMT_CCtx), cMem);
+    if (!mtctx) return NULL;
+    ZSTDMT_CCtxParam_setNbWorkers(&mtctx->params, nbWorkers);
+    mtctx->cMem = cMem;
+    mtctx->allJobsCompleted = 1;
+    if (pool != NULL) {
+      mtctx->factory = pool;
+      mtctx->providedFactory = 1;
+    }
+    else {
+      mtctx->factory = POOL_create_advanced(nbWorkers, 0, cMem);
+      mtctx->providedFactory = 0;
+    }
+    mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, cMem);
+    assert(nbJobs > 0); assert((nbJobs & (nbJobs - 1)) == 0);  /* ensure nbJobs is a power of 2 */
+    mtctx->jobIDMask = nbJobs - 1;
+    mtctx->bufPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
+    mtctx->cctxPool = ZSTDMT_createCCtxPool(nbWorkers, cMem);
+    mtctx->seqPool = ZSTDMT_createSeqPool(nbWorkers, cMem);
+    initError = ZSTDMT_serialState_init(&mtctx->serial);
+    mtctx->roundBuff = kNullRoundBuff;
+    if (!mtctx->factory | !mtctx->jobs | !mtctx->bufPool | !mtctx->cctxPool | !mtctx->seqPool | initError) {
+        ZSTDMT_freeCCtx(mtctx);
+        return NULL;
+    }
+    DEBUGLOG(3, "mt_cctx created, for %u threads", nbWorkers);
+    return mtctx;
+}
+
+ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, ZSTD_customMem cMem, ZSTD_threadPool* pool)
+{
+#ifdef ZSTD_MULTITHREAD
+    return ZSTDMT_createCCtx_advanced_internal(nbWorkers, cMem, pool);
+#else
+    (void)nbWorkers;
+    (void)cMem;
+    (void)pool;
+    return NULL;
+#endif
+}
+
+
+/* ZSTDMT_releaseAllJobResources() :
+ * note : ensure all workers are killed first ! */
+static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
+{
+    unsigned jobID;
+    DEBUGLOG(3, "ZSTDMT_releaseAllJobResources");
+    for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) {
+        /* Copy the mutex/cond out */
+        ZSTD_pthread_mutex_t const mutex = mtctx->jobs[jobID].job_mutex;
+        ZSTD_pthread_cond_t const cond = mtctx->jobs[jobID].job_cond;
+
+        DEBUGLOG(4, "job%02u: release dst address %08X", jobID, (U32)(size_t)mtctx->jobs[jobID].dstBuff.start);
+        ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff);
+
+        /* Clear the job description, but keep the mutex/cond */
+        ZSTD_memset(&mtctx->jobs[jobID], 0, sizeof(mtctx->jobs[jobID]));
+        mtctx->jobs[jobID].job_mutex = mutex;
+        mtctx->jobs[jobID].job_cond = cond;
+    }
+    mtctx->inBuff.buffer = g_nullBuffer;
+    mtctx->inBuff.filled = 0;
+    mtctx->allJobsCompleted = 1;
+}
+
+static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* mtctx)
+{
+    DEBUGLOG(4, "ZSTDMT_waitForAllJobsCompleted");
+    while (mtctx->doneJobID < mtctx->nextJobID) {
+        unsigned const jobID = mtctx->doneJobID & mtctx->jobIDMask;
+        ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[jobID].job_mutex);
+        while (mtctx->jobs[jobID].consumed < mtctx->jobs[jobID].src.size) {
+            DEBUGLOG(4, "waiting for jobCompleted signal from job %u", mtctx->doneJobID);   /* we want to block when waiting for data to flush */
+            ZSTD_pthread_cond_wait(&mtctx->jobs[jobID].job_cond, &mtctx->jobs[jobID].job_mutex);
+        }
+        ZSTD_pthread_mutex_unlock(&mtctx->jobs[jobID].job_mutex);
+        mtctx->doneJobID++;
+    }
+}
+
+size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx)
+{
+    if (mtctx==NULL) return 0;   /* compatible with free on NULL */
+    if (!mtctx->providedFactory)
+        POOL_free(mtctx->factory);   /* stop and free worker threads */
+    ZSTDMT_releaseAllJobResources(mtctx);  /* release job resources into pools first */
+    ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+1, mtctx->cMem);
+    ZSTDMT_freeBufferPool(mtctx->bufPool);
+    ZSTDMT_freeCCtxPool(mtctx->cctxPool);
+    ZSTDMT_freeSeqPool(mtctx->seqPool);
+    ZSTDMT_serialState_free(&mtctx->serial);
+    ZSTD_freeCDict(mtctx->cdictLocal);
+    if (mtctx->roundBuff.buffer)
+        ZSTD_customFree(mtctx->roundBuff.buffer, mtctx->cMem);
+    ZSTD_customFree(mtctx, mtctx->cMem);
+    return 0;
+}
+
+size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx)
+{
+    if (mtctx == NULL) return 0;   /* supports sizeof NULL */
+    return sizeof(*mtctx)
+            + POOL_sizeof(mtctx->factory)
+            + ZSTDMT_sizeof_bufferPool(mtctx->bufPool)
+            + (mtctx->jobIDMask+1) * sizeof(ZSTDMT_jobDescription)
+            + ZSTDMT_sizeof_CCtxPool(mtctx->cctxPool)
+            + ZSTDMT_sizeof_seqPool(mtctx->seqPool)
+            + ZSTD_sizeof_CDict(mtctx->cdictLocal)
+            + mtctx->roundBuff.capacity;
+}
+
+
+/* ZSTDMT_resize() :
+ * @return : error code if fails, 0 on success */
+static size_t ZSTDMT_resize(ZSTDMT_CCtx* mtctx, unsigned nbWorkers)
+{
+    if (POOL_resize(mtctx->factory, nbWorkers)) return ERROR(memory_allocation);
+    FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbWorkers) , "");
+    mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, nbWorkers);
+    if (mtctx->bufPool == NULL) return ERROR(memory_allocation);
+    mtctx->cctxPool = ZSTDMT_expandCCtxPool(mtctx->cctxPool, nbWorkers);
+    if (mtctx->cctxPool == NULL) return ERROR(memory_allocation);
+    mtctx->seqPool = ZSTDMT_expandSeqPool(mtctx->seqPool, nbWorkers);
+    if (mtctx->seqPool == NULL) return ERROR(memory_allocation);
+    ZSTDMT_CCtxParam_setNbWorkers(&mtctx->params, nbWorkers);
+    return 0;
+}
+
+
+/*! ZSTDMT_updateCParams_whileCompressing() :
+ *  Updates a selected set of compression parameters, remaining compatible with currently active frame.
+ *  New parameters will be applied to next compression job. */
+void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams)
+{
+    U32 const saved_wlog = mtctx->params.cParams.windowLog;   /* Do not modify windowLog while compressing */
+    int const compressionLevel = cctxParams->compressionLevel;
+    DEBUGLOG(5, "ZSTDMT_updateCParams_whileCompressing (level:%i)",
+                compressionLevel);
+    mtctx->params.compressionLevel = compressionLevel;
+    {   ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
+        cParams.windowLog = saved_wlog;
+        mtctx->params.cParams = cParams;
+    }
+}
+
+/* ZSTDMT_getFrameProgression():
+ * tells how much data has been consumed (input) and produced (output) for current frame.
+ * able to count progression inside worker threads.
+ * Note : mutex will be acquired during statistics collection inside workers. */
+ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx)
+{
+    ZSTD_frameProgression fps;
+    DEBUGLOG(5, "ZSTDMT_getFrameProgression");
+    fps.ingested = mtctx->consumed + mtctx->inBuff.filled;
+    fps.consumed = mtctx->consumed;
+    fps.produced = fps.flushed = mtctx->produced;
+    fps.currentJobID = mtctx->nextJobID;
+    fps.nbActiveWorkers = 0;
+    {   unsigned jobNb;
+        unsigned lastJobNb = mtctx->nextJobID + mtctx->jobReady; assert(mtctx->jobReady <= 1);
+        DEBUGLOG(6, "ZSTDMT_getFrameProgression: jobs: from %u to <%u (jobReady:%u)",
+                    mtctx->doneJobID, lastJobNb, mtctx->jobReady)
+        for (jobNb = mtctx->doneJobID ; jobNb < lastJobNb ; jobNb++) {
+            unsigned const wJobID = jobNb & mtctx->jobIDMask;
+            ZSTDMT_jobDescription* jobPtr = &mtctx->jobs[wJobID];
+            ZSTD_pthread_mutex_lock(&jobPtr->job_mutex);
+            {   size_t const cResult = jobPtr->cSize;
+                size_t const produced = ZSTD_isError(cResult) ? 0 : cResult;
+                size_t const flushed = ZSTD_isError(cResult) ? 0 : jobPtr->dstFlushed;
+                assert(flushed <= produced);
+                fps.ingested += jobPtr->src.size;
+                fps.consumed += jobPtr->consumed;
+                fps.produced += produced;
+                fps.flushed  += flushed;
+                fps.nbActiveWorkers += (jobPtr->consumed < jobPtr->src.size);
+            }
+            ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex);
+        }
+    }
+    return fps;
+}
+
+
+size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
+{
+    size_t toFlush;
+    unsigned const jobID = mtctx->doneJobID;
+    assert(jobID <= mtctx->nextJobID);
+    if (jobID == mtctx->nextJobID) return 0;   /* no active job => nothing to flush */
+
+    /* look into oldest non-fully-flushed job */
+    {   unsigned const wJobID = jobID & mtctx->jobIDMask;
+        ZSTDMT_jobDescription* const jobPtr = &mtctx->jobs[wJobID];
+        ZSTD_pthread_mutex_lock(&jobPtr->job_mutex);
+        {   size_t const cResult = jobPtr->cSize;
+            size_t const produced = ZSTD_isError(cResult) ? 0 : cResult;
+            size_t const flushed = ZSTD_isError(cResult) ? 0 : jobPtr->dstFlushed;
+            assert(flushed <= produced);
+            assert(jobPtr->consumed <= jobPtr->src.size);
+            toFlush = produced - flushed;
+            /* if toFlush==0, nothing is available to flush.
+             * However, jobID is expected to still be active:
+             * if jobID was already completed and fully flushed,
+             * ZSTDMT_flushProduced() should have already moved onto next job.
+             * Therefore, some input has not yet been consumed. */
+            if (toFlush==0) {
+                assert(jobPtr->consumed < jobPtr->src.size);
+            }
+        }
+        ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex);
+    }
+
+    return toFlush;
+}
+
+
+/* ------------------------------------------ */
+/* =====   Multi-threaded compression   ===== */
+/* ------------------------------------------ */
+
+static unsigned ZSTDMT_computeTargetJobLog(const ZSTD_CCtx_params* params)
+{
+    unsigned jobLog;
+    if (params->ldmParams.enableLdm) {
+        /* In Long Range Mode, the windowLog is typically oversized.
+         * In which case, it's preferable to determine the jobSize
+         * based on cycleLog instead. */
+        jobLog = MAX(21, ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy) + 3);
+    } else {
+        jobLog = MAX(20, params->cParams.windowLog + 2);
+    }
+    return MIN(jobLog, (unsigned)ZSTDMT_JOBLOG_MAX);
+}
+
+static int ZSTDMT_overlapLog_default(ZSTD_strategy strat)
+{
+    switch(strat)
+    {
+        case ZSTD_btultra2:
+            return 9;
+        case ZSTD_btultra:
+        case ZSTD_btopt:
+            return 8;
+        case ZSTD_btlazy2:
+        case ZSTD_lazy2:
+            return 7;
+        case ZSTD_lazy:
+        case ZSTD_greedy:
+        case ZSTD_dfast:
+        case ZSTD_fast:
+        default:;
+    }
+    return 6;
+}
+
+static int ZSTDMT_overlapLog(int ovlog, ZSTD_strategy strat)
+{
+    assert(0 <= ovlog && ovlog <= 9);
+    if (ovlog == 0) return ZSTDMT_overlapLog_default(strat);
+    return ovlog;
+}
+
+static size_t ZSTDMT_computeOverlapSize(const ZSTD_CCtx_params* params)
+{
+    int const overlapRLog = 9 - ZSTDMT_overlapLog(params->overlapLog, params->cParams.strategy);
+    int ovLog = (overlapRLog >= 8) ? 0 : (params->cParams.windowLog - overlapRLog);
+    assert(0 <= overlapRLog && overlapRLog <= 8);
+    if (params->ldmParams.enableLdm) {
+        /* In Long Range Mode, the windowLog is typically oversized.
+         * In which case, it's preferable to determine the jobSize
+         * based on chainLog instead.
+         * Then, ovLog becomes a fraction of the jobSize, rather than windowSize */
+        ovLog = MIN(params->cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
+                - overlapRLog;
+    }
+    assert(0 <= ovLog && ovLog <= ZSTD_WINDOWLOG_MAX);
+    DEBUGLOG(4, "overlapLog : %i", params->overlapLog);
+    DEBUGLOG(4, "overlap size : %i", 1 << ovLog);
+    return (ovLog==0) ? 0 : (size_t)1 << ovLog;
+}
+
+/* ====================================== */
+/* =======      Streaming API     ======= */
+/* ====================================== */
+
+size_t ZSTDMT_initCStream_internal(
+        ZSTDMT_CCtx* mtctx,
+        const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType,
+        const ZSTD_CDict* cdict, ZSTD_CCtx_params params,
+        unsigned long long pledgedSrcSize)
+{
+    DEBUGLOG(4, "ZSTDMT_initCStream_internal (pledgedSrcSize=%u, nbWorkers=%u, cctxPool=%u)",
+                (U32)pledgedSrcSize, params.nbWorkers, mtctx->cctxPool->totalCCtx);
+
+    /* params supposed partially fully validated at this point */
+    assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
+    assert(!((dict) && (cdict)));  /* either dict or cdict, not both */
+
+    /* init */
+    if (params.nbWorkers != mtctx->params.nbWorkers)
+        FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) , "");
+
+    if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
+    if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = (size_t)ZSTDMT_JOBSIZE_MAX;
+
+    DEBUGLOG(4, "ZSTDMT_initCStream_internal: %u workers", params.nbWorkers);
+
+    if (mtctx->allJobsCompleted == 0) {   /* previous compression not correctly finished */
+        ZSTDMT_waitForAllJobsCompleted(mtctx);
+        ZSTDMT_releaseAllJobResources(mtctx);
+        mtctx->allJobsCompleted = 1;
+    }
+
+    mtctx->params = params;
+    mtctx->frameContentSize = pledgedSrcSize;
+    if (dict) {
+        ZSTD_freeCDict(mtctx->cdictLocal);
+        mtctx->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize,
+                                                    ZSTD_dlm_byCopy, dictContentType, /* note : a loadPrefix becomes an internal CDict */
+                                                    params.cParams, mtctx->cMem);
+        mtctx->cdict = mtctx->cdictLocal;
+        if (mtctx->cdictLocal == NULL) return ERROR(memory_allocation);
+    } else {
+        ZSTD_freeCDict(mtctx->cdictLocal);
+        mtctx->cdictLocal = NULL;
+        mtctx->cdict = cdict;
+    }
+
+    mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(&params);
+    DEBUGLOG(4, "overlapLog=%i => %u KB", params.overlapLog, (U32)(mtctx->targetPrefixSize>>10));
+    mtctx->targetSectionSize = params.jobSize;
+    if (mtctx->targetSectionSize == 0) {
+        mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(&params);
+    }
+    assert(mtctx->targetSectionSize <= (size_t)ZSTDMT_JOBSIZE_MAX);
+
+    if (params.rsyncable) {
+        /* Aim for the targetsectionSize as the average job size. */
+        U32 const jobSizeKB = (U32)(mtctx->targetSectionSize >> 10);
+        U32 const rsyncBits = (assert(jobSizeKB >= 1), ZSTD_highbit32(jobSizeKB) + 10);
+        DEBUGLOG(4, "rsyncLog = %u", rsyncBits);
+        mtctx->rsync.hash = 0;
+        mtctx->rsync.hitMask = (1ULL << rsyncBits) - 1;
+        mtctx->rsync.primePower = ZSTD_rollingHash_primePower(RSYNC_LENGTH);
+    }
+    if (mtctx->targetSectionSize < mtctx->targetPrefixSize) mtctx->targetSectionSize = mtctx->targetPrefixSize;  /* job size must be >= overlap size */
+    DEBUGLOG(4, "Job Size : %u KB (note : set to %u)", (U32)(mtctx->targetSectionSize>>10), (U32)params.jobSize);
+    DEBUGLOG(4, "inBuff Size : %u KB", (U32)(mtctx->targetSectionSize>>10));
+    ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(mtctx->targetSectionSize));
+    {
+        /* If ldm is enabled we need windowSize space. */
+        size_t const windowSize = mtctx->params.ldmParams.enableLdm ? (1U << mtctx->params.cParams.windowLog) : 0;
+        /* Two buffers of slack, plus extra space for the overlap
+         * This is the minimum slack that LDM works with. One extra because
+         * flush might waste up to targetSectionSize-1 bytes. Another extra
+         * for the overlap (if > 0), then one to fill which doesn't overlap
+         * with the LDM window.
+         */
+        size_t const nbSlackBuffers = 2 + (mtctx->targetPrefixSize > 0);
+        size_t const slackSize = mtctx->targetSectionSize * nbSlackBuffers;
+        /* Compute the total size, and always have enough slack */
+        size_t const nbWorkers = MAX(mtctx->params.nbWorkers, 1);
+        size_t const sectionsSize = mtctx->targetSectionSize * nbWorkers;
+        size_t const capacity = MAX(windowSize, sectionsSize) + slackSize;
+        if (mtctx->roundBuff.capacity < capacity) {
+            if (mtctx->roundBuff.buffer)
+                ZSTD_customFree(mtctx->roundBuff.buffer, mtctx->cMem);
+            mtctx->roundBuff.buffer = (BYTE*)ZSTD_customMalloc(capacity, mtctx->cMem);
+            if (mtctx->roundBuff.buffer == NULL) {
+                mtctx->roundBuff.capacity = 0;
+                return ERROR(memory_allocation);
+            }
+            mtctx->roundBuff.capacity = capacity;
+        }
+    }
+    DEBUGLOG(4, "roundBuff capacity : %u KB", (U32)(mtctx->roundBuff.capacity>>10));
+    mtctx->roundBuff.pos = 0;
+    mtctx->inBuff.buffer = g_nullBuffer;
+    mtctx->inBuff.filled = 0;
+    mtctx->inBuff.prefix = kNullRange;
+    mtctx->doneJobID = 0;
+    mtctx->nextJobID = 0;
+    mtctx->frameEnded = 0;
+    mtctx->allJobsCompleted = 0;
+    mtctx->consumed = 0;
+    mtctx->produced = 0;
+    if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, mtctx->targetSectionSize,
+                                 dict, dictSize, dictContentType))
+        return ERROR(memory_allocation);
+    return 0;
+}
+
+
+/* ZSTDMT_writeLastEmptyBlock()
+ * Write a single empty block with an end-of-frame to finish a frame.
+ * Job must be created from streaming variant.
+ * This function is always successful if expected conditions are fulfilled.
+ */
+static void ZSTDMT_writeLastEmptyBlock(ZSTDMT_jobDescription* job)
+{
+    assert(job->lastJob == 1);
+    assert(job->src.size == 0);   /* last job is empty -> will be simplified into a last empty block */
+    assert(job->firstJob == 0);   /* cannot be first job, as it also needs to create frame header */
+    assert(job->dstBuff.start == NULL);   /* invoked from streaming variant only (otherwise, dstBuff might be user's output) */
+    job->dstBuff = ZSTDMT_getBuffer(job->bufPool);
+    if (job->dstBuff.start == NULL) {
+      job->cSize = ERROR(memory_allocation);
+      return;
+    }
+    assert(job->dstBuff.capacity >= ZSTD_blockHeaderSize);   /* no buffer should ever be that small */
+    job->src = kNullRange;
+    job->cSize = ZSTD_writeLastEmptyBlock(job->dstBuff.start, job->dstBuff.capacity);
+    assert(!ZSTD_isError(job->cSize));
+    assert(job->consumed == 0);
+}
+
+static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* mtctx, size_t srcSize, ZSTD_EndDirective endOp)
+{
+    unsigned const jobID = mtctx->nextJobID & mtctx->jobIDMask;
+    int const endFrame = (endOp == ZSTD_e_end);
+
+    if (mtctx->nextJobID > mtctx->doneJobID + mtctx->jobIDMask) {
+        DEBUGLOG(5, "ZSTDMT_createCompressionJob: will not create new job : table is full");
+        assert((mtctx->nextJobID & mtctx->jobIDMask) == (mtctx->doneJobID & mtctx->jobIDMask));
+        return 0;
+    }
+
+    if (!mtctx->jobReady) {
+        BYTE const* src = (BYTE const*)mtctx->inBuff.buffer.start;
+        DEBUGLOG(5, "ZSTDMT_createCompressionJob: preparing job %u to compress %u bytes with %u preload ",
+                    mtctx->nextJobID, (U32)srcSize, (U32)mtctx->inBuff.prefix.size);
+        mtctx->jobs[jobID].src.start = src;
+        mtctx->jobs[jobID].src.size = srcSize;
+        assert(mtctx->inBuff.filled >= srcSize);
+        mtctx->jobs[jobID].prefix = mtctx->inBuff.prefix;
+        mtctx->jobs[jobID].consumed = 0;
+        mtctx->jobs[jobID].cSize = 0;
+        mtctx->jobs[jobID].params = mtctx->params;
+        mtctx->jobs[jobID].cdict = mtctx->nextJobID==0 ? mtctx->cdict : NULL;
+        mtctx->jobs[jobID].fullFrameSize = mtctx->frameContentSize;
+        mtctx->jobs[jobID].dstBuff = g_nullBuffer;
+        mtctx->jobs[jobID].cctxPool = mtctx->cctxPool;
+        mtctx->jobs[jobID].bufPool = mtctx->bufPool;
+        mtctx->jobs[jobID].seqPool = mtctx->seqPool;
+        mtctx->jobs[jobID].serial = &mtctx->serial;
+        mtctx->jobs[jobID].jobID = mtctx->nextJobID;
+        mtctx->jobs[jobID].firstJob = (mtctx->nextJobID==0);
+        mtctx->jobs[jobID].lastJob = endFrame;
+        mtctx->jobs[jobID].frameChecksumNeeded = mtctx->params.fParams.checksumFlag && endFrame && (mtctx->nextJobID>0);
+        mtctx->jobs[jobID].dstFlushed = 0;
+
+        /* Update the round buffer pos and clear the input buffer to be reset */
+        mtctx->roundBuff.pos += srcSize;
+        mtctx->inBuff.buffer = g_nullBuffer;
+        mtctx->inBuff.filled = 0;
+        /* Set the prefix */
+        if (!endFrame) {
+            size_t const newPrefixSize = MIN(srcSize, mtctx->targetPrefixSize);
+            mtctx->inBuff.prefix.start = src + srcSize - newPrefixSize;
+            mtctx->inBuff.prefix.size = newPrefixSize;
+        } else {   /* endFrame==1 => no need for another input buffer */
+            mtctx->inBuff.prefix = kNullRange;
+            mtctx->frameEnded = endFrame;
+            if (mtctx->nextJobID == 0) {
+                /* single job exception : checksum is already calculated directly within worker thread */
+                mtctx->params.fParams.checksumFlag = 0;
+        }   }
+
+        if ( (srcSize == 0)
+          && (mtctx->nextJobID>0)/*single job must also write frame header*/ ) {
+            DEBUGLOG(5, "ZSTDMT_createCompressionJob: creating a last empty block to end frame");
+            assert(endOp == ZSTD_e_end);  /* only possible case : need to end the frame with an empty last block */
+            ZSTDMT_writeLastEmptyBlock(mtctx->jobs + jobID);
+            mtctx->nextJobID++;
+            return 0;
+        }
+    }
+
+    DEBUGLOG(5, "ZSTDMT_createCompressionJob: posting job %u : %u bytes  (end:%u, jobNb == %u (mod:%u))",
+                mtctx->nextJobID,
+                (U32)mtctx->jobs[jobID].src.size,
+                mtctx->jobs[jobID].lastJob,
+                mtctx->nextJobID,
+                jobID);
+    if (POOL_tryAdd(mtctx->factory, ZSTDMT_compressionJob, &mtctx->jobs[jobID])) {
+        mtctx->nextJobID++;
+        mtctx->jobReady = 0;
+    } else {
+        DEBUGLOG(5, "ZSTDMT_createCompressionJob: no worker available for job %u", mtctx->nextJobID);
+        mtctx->jobReady = 1;
+    }
+    return 0;
+}
+
+
+/*! ZSTDMT_flushProduced() :
+ *  flush whatever data has been produced but not yet flushed in current job.
+ *  move to next job if current one is fully flushed.
+ * `output` : `pos` will be updated with amount of data flushed .
+ * `blockToFlush` : if >0, the function will block and wait if there is no data available to flush .
+ * @return : amount of data remaining within internal buffer, 0 if no more, 1 if unknown but > 0, or an error code */
+static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, unsigned blockToFlush, ZSTD_EndDirective end)
+{
+    unsigned const wJobID = mtctx->doneJobID & mtctx->jobIDMask;
+    DEBUGLOG(5, "ZSTDMT_flushProduced (blocking:%u , job %u <= %u)",
+                blockToFlush, mtctx->doneJobID, mtctx->nextJobID);
+    assert(output->size >= output->pos);
+
+    ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[wJobID].job_mutex);
+    if (  blockToFlush
+      && (mtctx->doneJobID < mtctx->nextJobID) ) {
+        assert(mtctx->jobs[wJobID].dstFlushed <= mtctx->jobs[wJobID].cSize);
+        while (mtctx->jobs[wJobID].dstFlushed == mtctx->jobs[wJobID].cSize) {  /* nothing to flush */
+            if (mtctx->jobs[wJobID].consumed == mtctx->jobs[wJobID].src.size) {
+                DEBUGLOG(5, "job %u is completely consumed (%u == %u) => don't wait for cond, there will be none",
+                            mtctx->doneJobID, (U32)mtctx->jobs[wJobID].consumed, (U32)mtctx->jobs[wJobID].src.size);
+                break;
+            }
+            DEBUGLOG(5, "waiting for something to flush from job %u (currently flushed: %u bytes)",
+                        mtctx->doneJobID, (U32)mtctx->jobs[wJobID].dstFlushed);
+            ZSTD_pthread_cond_wait(&mtctx->jobs[wJobID].job_cond, &mtctx->jobs[wJobID].job_mutex);  /* block when nothing to flush but some to come */
+    }   }
+
+    /* try to flush something */
+    {   size_t cSize = mtctx->jobs[wJobID].cSize;                  /* shared */
+        size_t const srcConsumed = mtctx->jobs[wJobID].consumed;   /* shared */
+        size_t const srcSize = mtctx->jobs[wJobID].src.size;       /* read-only, could be done after mutex lock, but no-declaration-after-statement */
+        ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex);
+        if (ZSTD_isError(cSize)) {
+            DEBUGLOG(5, "ZSTDMT_flushProduced: job %u : compression error detected : %s",
+                        mtctx->doneJobID, ZSTD_getErrorName(cSize));
+            ZSTDMT_waitForAllJobsCompleted(mtctx);
+            ZSTDMT_releaseAllJobResources(mtctx);
+            return cSize;
+        }
+        /* add frame checksum if necessary (can only happen once) */
+        assert(srcConsumed <= srcSize);
+        if ( (srcConsumed == srcSize)   /* job completed -> worker no longer active */
+          && mtctx->jobs[wJobID].frameChecksumNeeded ) {
+            U32 const checksum = (U32)XXH64_digest(&mtctx->serial.xxhState);
+            DEBUGLOG(4, "ZSTDMT_flushProduced: writing checksum : %08X \n", checksum);
+            MEM_writeLE32((char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].cSize, checksum);
+            cSize += 4;
+            mtctx->jobs[wJobID].cSize += 4;  /* can write this shared value, as worker is no longer active */
+            mtctx->jobs[wJobID].frameChecksumNeeded = 0;
+        }
+
+        if (cSize > 0) {   /* compression is ongoing or completed */
+            size_t const toFlush = MIN(cSize - mtctx->jobs[wJobID].dstFlushed, output->size - output->pos);
+            DEBUGLOG(5, "ZSTDMT_flushProduced: Flushing %u bytes from job %u (completion:%u/%u, generated:%u)",
+                        (U32)toFlush, mtctx->doneJobID, (U32)srcConsumed, (U32)srcSize, (U32)cSize);
+            assert(mtctx->doneJobID < mtctx->nextJobID);
+            assert(cSize >= mtctx->jobs[wJobID].dstFlushed);
+            assert(mtctx->jobs[wJobID].dstBuff.start != NULL);
+            if (toFlush > 0) {
+                ZSTD_memcpy((char*)output->dst + output->pos,
+                    (const char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].dstFlushed,
+                    toFlush);
+            }
+            output->pos += toFlush;
+            mtctx->jobs[wJobID].dstFlushed += toFlush;  /* can write : this value is only used by mtctx */
+
+            if ( (srcConsumed == srcSize)    /* job is completed */
+              && (mtctx->jobs[wJobID].dstFlushed == cSize) ) {   /* output buffer fully flushed => free this job position */
+                DEBUGLOG(5, "Job %u completed (%u bytes), moving to next one",
+                        mtctx->doneJobID, (U32)mtctx->jobs[wJobID].dstFlushed);
+                ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[wJobID].dstBuff);
+                DEBUGLOG(5, "dstBuffer released");
+                mtctx->jobs[wJobID].dstBuff = g_nullBuffer;
+                mtctx->jobs[wJobID].cSize = 0;   /* ensure this job slot is considered "not started" in future check */
+                mtctx->consumed += srcSize;
+                mtctx->produced += cSize;
+                mtctx->doneJobID++;
+        }   }
+
+        /* return value : how many bytes left in buffer ; fake it to 1 when unknown but >0 */
+        if (cSize > mtctx->jobs[wJobID].dstFlushed) return (cSize - mtctx->jobs[wJobID].dstFlushed);
+        if (srcSize > srcConsumed) return 1;   /* current job not completely compressed */
+    }
+    if (mtctx->doneJobID < mtctx->nextJobID) return 1;   /* some more jobs ongoing */
+    if (mtctx->jobReady) return 1;      /* one job is ready to push, just not yet in the list */
+    if (mtctx->inBuff.filled > 0) return 1;   /* input is not empty, and still needs to be converted into a job */
+    mtctx->allJobsCompleted = mtctx->frameEnded;   /* all jobs are entirely flushed => if this one is last one, frame is completed */
+    if (end == ZSTD_e_end) return !mtctx->frameEnded;  /* for ZSTD_e_end, question becomes : is frame completed ? instead of : are internal buffers fully flushed ? */
+    return 0;   /* internal buffers fully flushed */
+}
+
+/**
+ * Returns the range of data used by the earliest job that is not yet complete.
+ * If the data of the first job is broken up into two segments, we cover both
+ * sections.
+ */
+static range_t ZSTDMT_getInputDataInUse(ZSTDMT_CCtx* mtctx)
+{
+    unsigned const firstJobID = mtctx->doneJobID;
+    unsigned const lastJobID = mtctx->nextJobID;
+    unsigned jobID;
+
+    for (jobID = firstJobID; jobID < lastJobID; ++jobID) {
+        unsigned const wJobID = jobID & mtctx->jobIDMask;
+        size_t consumed;
+
+        ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[wJobID].job_mutex);
+        consumed = mtctx->jobs[wJobID].consumed;
+        ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex);
+
+        if (consumed < mtctx->jobs[wJobID].src.size) {
+            range_t range = mtctx->jobs[wJobID].prefix;
+            if (range.size == 0) {
+                /* Empty prefix */
+                range = mtctx->jobs[wJobID].src;
+            }
+            /* Job source in multiple segments not supported yet */
+            assert(range.start <= mtctx->jobs[wJobID].src.start);
+            return range;
+        }
+    }
+    return kNullRange;
+}
+
+/**
+ * Returns non-zero iff buffer and range overlap.
+ */
+static int ZSTDMT_isOverlapped(buffer_t buffer, range_t range)
+{
+    BYTE const* const bufferStart = (BYTE const*)buffer.start;
+    BYTE const* const bufferEnd = bufferStart + buffer.capacity;
+    BYTE const* const rangeStart = (BYTE const*)range.start;
+    BYTE const* const rangeEnd = range.size != 0 ? rangeStart + range.size : rangeStart;
+
+    if (rangeStart == NULL || bufferStart == NULL)
+        return 0;
+    /* Empty ranges cannot overlap */
+    if (bufferStart == bufferEnd || rangeStart == rangeEnd)
+        return 0;
+
+    return bufferStart < rangeEnd && rangeStart < bufferEnd;
+}
+
+static int ZSTDMT_doesOverlapWindow(buffer_t buffer, ZSTD_window_t window)
+{
+    range_t extDict;
+    range_t prefix;
+
+    DEBUGLOG(5, "ZSTDMT_doesOverlapWindow");
+    extDict.start = window.dictBase + window.lowLimit;
+    extDict.size = window.dictLimit - window.lowLimit;
+
+    prefix.start = window.base + window.dictLimit;
+    prefix.size = window.nextSrc - (window.base + window.dictLimit);
+    DEBUGLOG(5, "extDict [0x%zx, 0x%zx)",
+                (size_t)extDict.start,
+                (size_t)extDict.start + extDict.size);
+    DEBUGLOG(5, "prefix  [0x%zx, 0x%zx)",
+                (size_t)prefix.start,
+                (size_t)prefix.start + prefix.size);
+
+    return ZSTDMT_isOverlapped(buffer, extDict)
+        || ZSTDMT_isOverlapped(buffer, prefix);
+}
+
+static void ZSTDMT_waitForLdmComplete(ZSTDMT_CCtx* mtctx, buffer_t buffer)
+{
+    if (mtctx->params.ldmParams.enableLdm) {
+        ZSTD_pthread_mutex_t* mutex = &mtctx->serial.ldmWindowMutex;
+        DEBUGLOG(5, "ZSTDMT_waitForLdmComplete");
+        DEBUGLOG(5, "source  [0x%zx, 0x%zx)",
+                    (size_t)buffer.start,
+                    (size_t)buffer.start + buffer.capacity);
+        ZSTD_PTHREAD_MUTEX_LOCK(mutex);
+        while (ZSTDMT_doesOverlapWindow(buffer, mtctx->serial.ldmWindow)) {
+            DEBUGLOG(5, "Waiting for LDM to finish...");
+            ZSTD_pthread_cond_wait(&mtctx->serial.ldmWindowCond, mutex);
+        }
+        DEBUGLOG(6, "Done waiting for LDM to finish");
+        ZSTD_pthread_mutex_unlock(mutex);
+    }
+}
+
+/**
+ * Attempts to set the inBuff to the next section to fill.
+ * If any part of the new section is still in use we give up.
+ * Returns non-zero if the buffer is filled.
+ */
+static int ZSTDMT_tryGetInputRange(ZSTDMT_CCtx* mtctx)
+{
+    range_t const inUse = ZSTDMT_getInputDataInUse(mtctx);
+    size_t const spaceLeft = mtctx->roundBuff.capacity - mtctx->roundBuff.pos;
+    size_t const target = mtctx->targetSectionSize;
+    buffer_t buffer;
+
+    DEBUGLOG(5, "ZSTDMT_tryGetInputRange");
+    assert(mtctx->inBuff.buffer.start == NULL);
+    assert(mtctx->roundBuff.capacity >= target);
+
+    if (spaceLeft < target) {
+        /* ZSTD_invalidateRepCodes() doesn't work for extDict variants.
+         * Simply copy the prefix to the beginning in that case.
+         */
+        BYTE* const start = (BYTE*)mtctx->roundBuff.buffer;
+        size_t const prefixSize = mtctx->inBuff.prefix.size;
+
+        buffer.start = start;
+        buffer.capacity = prefixSize;
+        if (ZSTDMT_isOverlapped(buffer, inUse)) {
+            DEBUGLOG(5, "Waiting for buffer...");
+            return 0;
+        }
+        ZSTDMT_waitForLdmComplete(mtctx, buffer);
+        ZSTD_memmove(start, mtctx->inBuff.prefix.start, prefixSize);
+        mtctx->inBuff.prefix.start = start;
+        mtctx->roundBuff.pos = prefixSize;
+    }
+    buffer.start = mtctx->roundBuff.buffer + mtctx->roundBuff.pos;
+    buffer.capacity = target;
+
+    if (ZSTDMT_isOverlapped(buffer, inUse)) {
+        DEBUGLOG(5, "Waiting for buffer...");
+        return 0;
+    }
+    assert(!ZSTDMT_isOverlapped(buffer, mtctx->inBuff.prefix));
+
+    ZSTDMT_waitForLdmComplete(mtctx, buffer);
+
+    DEBUGLOG(5, "Using prefix range [%zx, %zx)",
+                (size_t)mtctx->inBuff.prefix.start,
+                (size_t)mtctx->inBuff.prefix.start + mtctx->inBuff.prefix.size);
+    DEBUGLOG(5, "Using source range [%zx, %zx)",
+                (size_t)buffer.start,
+                (size_t)buffer.start + buffer.capacity);
+
+
+    mtctx->inBuff.buffer = buffer;
+    mtctx->inBuff.filled = 0;
+    assert(mtctx->roundBuff.pos + buffer.capacity <= mtctx->roundBuff.capacity);
+    return 1;
+}
+
+typedef struct {
+  size_t toLoad;  /* The number of bytes to load from the input. */
+  int flush;      /* Boolean declaring if we must flush because we found a synchronization point. */
+} syncPoint_t;
+
+/**
+ * Searches through the input for a synchronization point. If one is found, we
+ * will instruct the caller to flush, and return the number of bytes to load.
+ * Otherwise, we will load as many bytes as possible and instruct the caller
+ * to continue as normal.
+ */
+static syncPoint_t
+findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
+{
+    BYTE const* const istart = (BYTE const*)input.src + input.pos;
+    U64 const primePower = mtctx->rsync.primePower;
+    U64 const hitMask = mtctx->rsync.hitMask;
+
+    syncPoint_t syncPoint;
+    U64 hash;
+    BYTE const* prev;
+    size_t pos;
+
+    syncPoint.toLoad = MIN(input.size - input.pos, mtctx->targetSectionSize - mtctx->inBuff.filled);
+    syncPoint.flush = 0;
+    if (!mtctx->params.rsyncable)
+        /* Rsync is disabled. */
+        return syncPoint;
+    if (mtctx->inBuff.filled + syncPoint.toLoad < RSYNC_LENGTH)
+        /* Not enough to compute the hash.
+         * We will miss any synchronization points in this RSYNC_LENGTH byte
+         * window. However, since it depends only in the internal buffers, if the
+         * state is already synchronized, we will remain synchronized.
+         * Additionally, the probability that we miss a synchronization point is
+         * low: RSYNC_LENGTH / targetSectionSize.
+         */
+        return syncPoint;
+    /* Initialize the loop variables. */
+    if (mtctx->inBuff.filled >= RSYNC_LENGTH) {
+        /* We have enough bytes buffered to initialize the hash.
+         * Start scanning at the beginning of the input.
+         */
+        pos = 0;
+        prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH;
+        hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH);
+        if ((hash & hitMask) == hitMask) {
+            /* We're already at a sync point so don't load any more until
+             * we're able to flush this sync point.
+             * This likely happened because the job table was full so we
+             * couldn't add our job.
+             */
+            syncPoint.toLoad = 0;
+            syncPoint.flush = 1;
+            return syncPoint;
+        }
+    } else {
+        /* We don't have enough bytes buffered to initialize the hash, but
+         * we know we have at least RSYNC_LENGTH bytes total.
+         * Start scanning after the first RSYNC_LENGTH bytes less the bytes
+         * already buffered.
+         */
+        pos = RSYNC_LENGTH - mtctx->inBuff.filled;
+        prev = (BYTE const*)mtctx->inBuff.buffer.start - pos;
+        hash = ZSTD_rollingHash_compute(mtctx->inBuff.buffer.start, mtctx->inBuff.filled);
+        hash = ZSTD_rollingHash_append(hash, istart, pos);
+    }
+    /* Starting with the hash of the previous RSYNC_LENGTH bytes, roll
+     * through the input. If we hit a synchronization point, then cut the
+     * job off, and tell the compressor to flush the job. Otherwise, load
+     * all the bytes and continue as normal.
+     * If we go too long without a synchronization point (targetSectionSize)
+     * then a block will be emitted anyways, but this is okay, since if we
+     * are already synchronized we will remain synchronized.
+     */
+    for (; pos < syncPoint.toLoad; ++pos) {
+        BYTE const toRemove = pos < RSYNC_LENGTH ? prev[pos] : istart[pos - RSYNC_LENGTH];
+        /* if (pos >= RSYNC_LENGTH) assert(ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash); */
+        hash = ZSTD_rollingHash_rotate(hash, toRemove, istart[pos], primePower);
+        if ((hash & hitMask) == hitMask) {
+            syncPoint.toLoad = pos + 1;
+            syncPoint.flush = 1;
+            break;
+        }
+    }
+    return syncPoint;
+}
+
+size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx)
+{
+    size_t hintInSize = mtctx->targetSectionSize - mtctx->inBuff.filled;
+    if (hintInSize==0) hintInSize = mtctx->targetSectionSize;
+    return hintInSize;
+}
+
+/** ZSTDMT_compressStream_generic() :
+ *  internal use only - exposed to be invoked from zstd_compress.c
+ *  assumption : output and input are valid (pos <= size)
+ * @return : minimum amount of data remaining to flush, 0 if none */
+size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
+                                     ZSTD_outBuffer* output,
+                                     ZSTD_inBuffer* input,
+                                     ZSTD_EndDirective endOp)
+{
+    unsigned forwardInputProgress = 0;
+    DEBUGLOG(5, "ZSTDMT_compressStream_generic (endOp=%u, srcSize=%u)",
+                (U32)endOp, (U32)(input->size - input->pos));
+    assert(output->pos <= output->size);
+    assert(input->pos  <= input->size);
+
+    if ((mtctx->frameEnded) && (endOp==ZSTD_e_continue)) {
+        /* current frame being ended. Only flush/end are allowed */
+        return ERROR(stage_wrong);
+    }
+
+    /* fill input buffer */
+    if ( (!mtctx->jobReady)
+      && (input->size > input->pos) ) {   /* support NULL input */
+        if (mtctx->inBuff.buffer.start == NULL) {
+            assert(mtctx->inBuff.filled == 0); /* Can't fill an empty buffer */
+            if (!ZSTDMT_tryGetInputRange(mtctx)) {
+                /* It is only possible for this operation to fail if there are
+                 * still compression jobs ongoing.
+                 */
+                DEBUGLOG(5, "ZSTDMT_tryGetInputRange failed");
+                assert(mtctx->doneJobID != mtctx->nextJobID);
+            } else
+                DEBUGLOG(5, "ZSTDMT_tryGetInputRange completed successfully : mtctx->inBuff.buffer.start = %p", mtctx->inBuff.buffer.start);
+        }
+        if (mtctx->inBuff.buffer.start != NULL) {
+            syncPoint_t const syncPoint = findSynchronizationPoint(mtctx, *input);
+            if (syncPoint.flush && endOp == ZSTD_e_continue) {
+                endOp = ZSTD_e_flush;
+            }
+            assert(mtctx->inBuff.buffer.capacity >= mtctx->targetSectionSize);
+            DEBUGLOG(5, "ZSTDMT_compressStream_generic: adding %u bytes on top of %u to buffer of size %u",
+                        (U32)syncPoint.toLoad, (U32)mtctx->inBuff.filled, (U32)mtctx->targetSectionSize);
+            ZSTD_memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, syncPoint.toLoad);
+            input->pos += syncPoint.toLoad;
+            mtctx->inBuff.filled += syncPoint.toLoad;
+            forwardInputProgress = syncPoint.toLoad>0;
+        }
+    }
+    if ((input->pos < input->size) && (endOp == ZSTD_e_end)) {
+        /* Can't end yet because the input is not fully consumed.
+            * We are in one of these cases:
+            * - mtctx->inBuff is NULL & empty: we couldn't get an input buffer so don't create a new job.
+            * - We filled the input buffer: flush this job but don't end the frame.
+            * - We hit a synchronization point: flush this job but don't end the frame.
+            */
+        assert(mtctx->inBuff.filled == 0 || mtctx->inBuff.filled == mtctx->targetSectionSize || mtctx->params.rsyncable);
+        endOp = ZSTD_e_flush;
+    }
+
+    if ( (mtctx->jobReady)
+      || (mtctx->inBuff.filled >= mtctx->targetSectionSize)  /* filled enough : let's compress */
+      || ((endOp != ZSTD_e_continue) && (mtctx->inBuff.filled > 0))  /* something to flush : let's go */
+      || ((endOp == ZSTD_e_end) && (!mtctx->frameEnded)) ) {   /* must finish the frame with a zero-size block */
+        size_t const jobSize = mtctx->inBuff.filled;
+        assert(mtctx->inBuff.filled <= mtctx->targetSectionSize);
+        FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp) , "");
+    }
+
+    /* check for potential compressed data ready to be flushed */
+    {   size_t const remainingToFlush = ZSTDMT_flushProduced(mtctx, output, !forwardInputProgress, endOp); /* block if there was no forward input progress */
+        if (input->pos < input->size) return MAX(remainingToFlush, 1);  /* input not consumed : do not end flush yet */
+        DEBUGLOG(5, "end of ZSTDMT_compressStream_generic: remainingToFlush = %u", (U32)remainingToFlush);
+        return remainingToFlush;
+    }
+}
+/**** ended inlining compress/zstdmt_compress.c ****/
+#endif
+
+/**** start inlining decompress/huf_decompress.c ****/
+/* ******************************************************************
+ * huff0 huffman decoder,
+ * part of Finite State Entropy library
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+/* **************************************************************
+*  Dependencies
+****************************************************************/
+/**** skipping file: ../common/zstd_deps.h ****/
+/**** skipping file: ../common/compiler.h ****/
+/**** skipping file: ../common/bitstream.h ****/
+/**** skipping file: ../common/fse.h ****/
+#define HUF_STATIC_LINKING_ONLY
+/**** skipping file: ../common/huf.h ****/
+/**** skipping file: ../common/error_private.h ****/
+
+/* **************************************************************
+*  Macros
+****************************************************************/
+
+/* These two optional macros force the use one way or another of the two
+ * Huffman decompression implementations. You can't force in both directions
+ * at the same time.
+ */
+#if defined(HUF_FORCE_DECOMPRESS_X1) && \
+    defined(HUF_FORCE_DECOMPRESS_X2)
+#error "Cannot force the use of the X1 and X2 decoders at the same time!"
+#endif
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define HUF_isError ERR_isError
+
+
+/* **************************************************************
+*  Byte alignment for workSpace management
+****************************************************************/
+#define HUF_ALIGN(x, a)         HUF_ALIGN_MASK((x), (a) - 1)
+#define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
+
+
+/* **************************************************************
+*  BMI2 Variant Wrappers
+****************************************************************/
+#if DYNAMIC_BMI2
+
+#define HUF_DGEN(fn)                                                        \
+                                                                            \
+    static size_t fn##_default(                                             \
+                  void* dst,  size_t dstSize,                               \
+            const void* cSrc, size_t cSrcSize,                              \
+            const HUF_DTable* DTable)                                       \
+    {                                                                       \
+        return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
+    }                                                                       \
+                                                                            \
+    static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2(                       \
+                  void* dst,  size_t dstSize,                               \
+            const void* cSrc, size_t cSrcSize,                              \
+            const HUF_DTable* DTable)                                       \
+    {                                                                       \
+        return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
+    }                                                                       \
+                                                                            \
+    static size_t fn(void* dst, size_t dstSize, void const* cSrc,           \
+                     size_t cSrcSize, HUF_DTable const* DTable, int bmi2)   \
+    {                                                                       \
+        if (bmi2) {                                                         \
+            return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);         \
+        }                                                                   \
+        return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable);          \
+    }
+
+#else
+
+#define HUF_DGEN(fn)                                                        \
+    static size_t fn(void* dst, size_t dstSize, void const* cSrc,           \
+                     size_t cSrcSize, HUF_DTable const* DTable, int bmi2)   \
+    {                                                                       \
+        (void)bmi2;                                                         \
+        return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
+    }
+
+#endif
+
+
+/*-***************************/
+/*  generic DTableDesc       */
+/*-***************************/
+typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
+
+static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
+{
+    DTableDesc dtd;
+    ZSTD_memcpy(&dtd, table, sizeof(dtd));
+    return dtd;
+}
+
+
+#ifndef HUF_FORCE_DECOMPRESS_X2
+
+/*-***************************/
+/*  single-symbol decoding   */
+/*-***************************/
+typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1;   /* single-symbol decoding */
+
+/**
+ * Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at
+ * a time.
+ */
+static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) {
+    U64 D4;
+    if (MEM_isLittleEndian()) {
+        D4 = symbol + (nbBits << 8);
+    } else {
+        D4 = (symbol << 8) + nbBits;
+    }
+    D4 *= 0x0001000100010001ULL;
+    return D4;
+}
+
+typedef struct {
+        U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];
+        U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1];
+        U32 statsWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
+        BYTE symbols[HUF_SYMBOLVALUE_MAX + 1];
+        BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
+} HUF_ReadDTableX1_Workspace;
+
+
+size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
+{
+    return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
+}
+
+size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2)
+{
+    U32 tableLog = 0;
+    U32 nbSymbols = 0;
+    size_t iSize;
+    void* const dtPtr = DTable + 1;
+    HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
+    HUF_ReadDTableX1_Workspace* wksp = (HUF_ReadDTableX1_Workspace*)workSpace;
+
+    DEBUG_STATIC_ASSERT(HUF_DECOMPRESS_WORKSPACE_SIZE >= sizeof(*wksp));
+    if (sizeof(*wksp) > wkspSize) return ERROR(tableLog_tooLarge);
+
+    DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
+    /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2);
+    if (HUF_isError(iSize)) return iSize;
+
+    /* Table header */
+    {   DTableDesc dtd = HUF_getDTableDesc(DTable);
+        if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge);   /* DTable too small, Huffman tree cannot fit in */
+        dtd.tableType = 0;
+        dtd.tableLog = (BYTE)tableLog;
+        ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
+    }
+
+    /* Compute symbols and rankStart given rankVal:
+     *
+     * rankVal already contains the number of values of each weight.
+     *
+     * symbols contains the symbols ordered by weight. First are the rankVal[0]
+     * weight 0 symbols, followed by the rankVal[1] weight 1 symbols, and so on.
+     * symbols[0] is filled (but unused) to avoid a branch.
+     *
+     * rankStart contains the offset where each rank belongs in the DTable.
+     * rankStart[0] is not filled because there are no entries in the table for
+     * weight 0.
+     */
+    {
+        int n;
+        int nextRankStart = 0;
+        int const unroll = 4;
+        int const nLimit = (int)nbSymbols - unroll + 1;
+        for (n=0; n<(int)tableLog+1; n++) {
+            U32 const curr = nextRankStart;
+            nextRankStart += wksp->rankVal[n];
+            wksp->rankStart[n] = curr;
+        }
+        for (n=0; n < nLimit; n += unroll) {
+            int u;
+            for (u=0; u < unroll; ++u) {
+                size_t const w = wksp->huffWeight[n+u];
+                wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u);
+            }
+        }
+        for (; n < (int)nbSymbols; ++n) {
+            size_t const w = wksp->huffWeight[n];
+            wksp->symbols[wksp->rankStart[w]++] = (BYTE)n;
+        }
+    }
+
+    /* fill DTable
+     * We fill all entries of each weight in order.
+     * That way length is a constant for each iteration of the outter loop.
+     * We can switch based on the length to a different inner loop which is
+     * optimized for that particular case.
+     */
+    {
+        U32 w;
+        int symbol=wksp->rankVal[0];
+        int rankStart=0;
+        for (w=1; w<tableLog+1; ++w) {
+            int const symbolCount = wksp->rankVal[w];
+            int const length = (1 << w) >> 1;
+            int uStart = rankStart;
+            BYTE const nbBits = (BYTE)(tableLog + 1 - w);
+            int s;
+            int u;
+            switch (length) {
+            case 1:
+                for (s=0; s<symbolCount; ++s) {
+                    HUF_DEltX1 D;
+                    D.byte = wksp->symbols[symbol + s];
+                    D.nbBits = nbBits;
+                    dt[uStart] = D;
+                    uStart += 1;
+                }
+                break;
+            case 2:
+                for (s=0; s<symbolCount; ++s) {
+                    HUF_DEltX1 D;
+                    D.byte = wksp->symbols[symbol + s];
+                    D.nbBits = nbBits;
+                    dt[uStart+0] = D;
+                    dt[uStart+1] = D;
+                    uStart += 2;
+                }
+                break;
+            case 4:
+                for (s=0; s<symbolCount; ++s) {
+                    U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
+                    MEM_write64(dt + uStart, D4);
+                    uStart += 4;
+                }
+                break;
+            case 8:
+                for (s=0; s<symbolCount; ++s) {
+                    U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
+                    MEM_write64(dt + uStart, D4);
+                    MEM_write64(dt + uStart + 4, D4);
+                    uStart += 8;
+                }
+                break;
+            default:
+                for (s=0; s<symbolCount; ++s) {
+                    U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
+                    for (u=0; u < length; u += 16) {
+                        MEM_write64(dt + uStart + u + 0, D4);
+                        MEM_write64(dt + uStart + u + 4, D4);
+                        MEM_write64(dt + uStart + u + 8, D4);
+                        MEM_write64(dt + uStart + u + 12, D4);
+                    }
+                    assert(u == length);
+                    uStart += length;
+                }
+                break;
+            }
+            symbol += symbolCount;
+            rankStart += symbolCount * length;
+        }
+    }
+    return iSize;
+}
+
+FORCE_INLINE_TEMPLATE BYTE
+HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
+{
+    size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
+    BYTE const c = dt[val].byte;
+    BIT_skipBits(Dstream, dt[val].nbBits);
+    return c;
+}
+
+#define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
+    *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr)  \
+    if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
+        HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
+
+#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
+
+HINT_INLINE size_t
+HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 4 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
+        HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
+    }
+
+    /* [0-3] symbols remaining */
+    if (MEM_32bits())
+        while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
+            HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
+
+    /* no more data to retrieve from bitstream, no need to reload */
+    while (p < pEnd)
+        HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
+
+    return pEnd-pStart;
+}
+
+FORCE_INLINE_TEMPLATE size_t
+HUF_decompress1X1_usingDTable_internal_body(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + dstSize;
+    const void* dtPtr = DTable + 1;
+    const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
+    BIT_DStream_t bitD;
+    DTableDesc const dtd = HUF_getDTableDesc(DTable);
+    U32 const dtLog = dtd.tableLog;
+
+    CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
+
+    HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);
+
+    if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
+
+    return dstSize;
+}
+
+FORCE_INLINE_TEMPLATE size_t
+HUF_decompress4X1_usingDTable_internal_body(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    /* Check */
+    if (cSrcSize < 10) return ERROR(corruption_detected);  /* strict minimum : jump table + 1 byte per stream */
+
+    {   const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+        BYTE* const olimit = oend - 3;
+        const void* const dtPtr = DTable + 1;
+        const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
+
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        size_t const length1 = MEM_readLE16(istart);
+        size_t const length2 = MEM_readLE16(istart+2);
+        size_t const length3 = MEM_readLE16(istart+4);
+        size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        DTableDesc const dtd = HUF_getDTableDesc(DTable);
+        U32 const dtLog = dtd.tableLog;
+        U32 endSignal = 1;
+
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
+        CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
+        CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
+        CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
+
+        /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
+        for ( ; (endSignal) & (op4 < olimit) ; ) {
+            HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
+            endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
+            endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
+            endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
+            endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
+        }
+
+        /* check corruption */
+        /* note : should not be necessary : op# advance in lock step, and we control op4.
+         *        but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);
+        HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);
+        HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);
+        HUF_decodeStreamX1(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+          if (!endCheck) return ERROR(corruption_detected); }
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
+                                               const void *cSrc,
+                                               size_t cSrcSize,
+                                               const HUF_DTable *DTable);
+
+HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
+HUF_DGEN(HUF_decompress4X1_usingDTable_internal)
+
+
+
+size_t HUF_decompress1X1_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    DTableDesc dtd = HUF_getDTableDesc(DTable);
+    if (dtd.tableType != 0) return ERROR(GENERIC);
+    return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+}
+
+size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+                                   void* workSpace, size_t wkspSize)
+{
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize; cSrcSize -= hSize;
+
+    return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
+}
+
+
+size_t HUF_decompress4X1_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    DTableDesc dtd = HUF_getDTableDesc(DTable);
+    if (dtd.tableType != 0) return ERROR(GENERIC);
+    return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+}
+
+static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+                                   void* workSpace, size_t wkspSize, int bmi2)
+{
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize; cSrcSize -= hSize;
+
+    return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
+}
+
+size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+                                   void* workSpace, size_t wkspSize)
+{
+    return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
+}
+
+
+#endif /* HUF_FORCE_DECOMPRESS_X2 */
+
+
+#ifndef HUF_FORCE_DECOMPRESS_X1
+
+/* *************************/
+/* double-symbols decoding */
+/* *************************/
+
+typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2;  /* double-symbols decoding */
+typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
+typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
+typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
+
+
+/* HUF_fillDTableX2Level2() :
+ * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
+static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed,
+                           const U32* rankValOrigin, const int minWeight,
+                           const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
+                           U32 nbBitsBaseline, U16 baseSeq, U32* wksp, size_t wkspSize)
+{
+    HUF_DEltX2 DElt;
+    U32* rankVal = wksp;
+
+    assert(wkspSize >= HUF_TABLELOG_MAX + 1);
+    (void)wkspSize;
+    /* get pre-calculated rankVal */
+    ZSTD_memcpy(rankVal, rankValOrigin, sizeof(U32) * (HUF_TABLELOG_MAX + 1));
+
+    /* fill skipped values */
+    if (minWeight>1) {
+        U32 i, skipSize = rankVal[minWeight];
+        MEM_writeLE16(&(DElt.sequence), baseSeq);
+        DElt.nbBits   = (BYTE)(consumed);
+        DElt.length   = 1;
+        for (i = 0; i < skipSize; i++)
+            DTable[i] = DElt;
+    }
+
+    /* fill DTable */
+    {   U32 s; for (s=0; s<sortedListSize; s++) {   /* note : sortedSymbols already skipped */
+            const U32 symbol = sortedSymbols[s].symbol;
+            const U32 weight = sortedSymbols[s].weight;
+            const U32 nbBits = nbBitsBaseline - weight;
+            const U32 length = 1 << (sizeLog-nbBits);
+            const U32 start = rankVal[weight];
+            U32 i = start;
+            const U32 end = start + length;
+
+            MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
+            DElt.nbBits = (BYTE)(nbBits + consumed);
+            DElt.length = 2;
+            do { DTable[i++] = DElt; } while (i<end);   /* since length >= 1 */
+
+            rankVal[weight] += length;
+    }   }
+}
+
+
+static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
+                           const sortedSymbol_t* sortedList, const U32 sortedListSize,
+                           const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
+                           const U32 nbBitsBaseline, U32* wksp, size_t wkspSize)
+{
+    U32* rankVal = wksp;
+    const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */
+    const U32 minBits  = nbBitsBaseline - maxWeight;
+    U32 s;
+
+    assert(wkspSize >= HUF_TABLELOG_MAX + 1);
+    wksp += HUF_TABLELOG_MAX + 1;
+    wkspSize -= HUF_TABLELOG_MAX + 1;
+
+    ZSTD_memcpy(rankVal, rankValOrigin, sizeof(U32) * (HUF_TABLELOG_MAX + 1));
+
+    /* fill DTable */
+    for (s=0; s<sortedListSize; s++) {
+        const U16 symbol = sortedList[s].symbol;
+        const U32 weight = sortedList[s].weight;
+        const U32 nbBits = nbBitsBaseline - weight;
+        const U32 start = rankVal[weight];
+        const U32 length = 1 << (targetLog-nbBits);
+
+        if (targetLog-nbBits >= minBits) {   /* enough room for a second symbol */
+            U32 sortedRank;
+            int minWeight = nbBits + scaleLog;
+            if (minWeight < 1) minWeight = 1;
+            sortedRank = rankStart[minWeight];
+            HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits,
+                           rankValOrigin[nbBits], minWeight,
+                           sortedList+sortedRank, sortedListSize-sortedRank,
+                           nbBitsBaseline, symbol, wksp, wkspSize);
+        } else {
+            HUF_DEltX2 DElt;
+            MEM_writeLE16(&(DElt.sequence), symbol);
+            DElt.nbBits = (BYTE)(nbBits);
+            DElt.length = 1;
+            {   U32 const end = start + length;
+                U32 u;
+                for (u = start; u < end; u++) DTable[u] = DElt;
+        }   }
+        rankVal[weight] += length;
+    }
+}
+
+typedef struct {
+    rankValCol_t rankVal[HUF_TABLELOG_MAX];
+    U32 rankStats[HUF_TABLELOG_MAX + 1];
+    U32 rankStart0[HUF_TABLELOG_MAX + 2];
+    sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1];
+    BYTE weightList[HUF_SYMBOLVALUE_MAX + 1];
+    U32 calleeWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
+} HUF_ReadDTableX2_Workspace;
+
+size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
+                       const void* src, size_t srcSize,
+                             void* workSpace, size_t wkspSize)
+{
+    U32 tableLog, maxW, sizeOfSort, nbSymbols;
+    DTableDesc dtd = HUF_getDTableDesc(DTable);
+    U32 const maxTableLog = dtd.maxTableLog;
+    size_t iSize;
+    void* dtPtr = DTable+1;   /* force compiler to avoid strict-aliasing */
+    HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
+    U32 *rankStart;
+
+    HUF_ReadDTableX2_Workspace* const wksp = (HUF_ReadDTableX2_Workspace*)workSpace;
+
+    if (sizeof(*wksp) > wkspSize) return ERROR(GENERIC);
+
+    rankStart = wksp->rankStart0 + 1;
+    ZSTD_memset(wksp->rankStats, 0, sizeof(wksp->rankStats));
+    ZSTD_memset(wksp->rankStart0, 0, sizeof(wksp->rankStart0));
+
+    DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable));   /* if compiler fails here, assertion is wrong */
+    if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
+    /* ZSTD_memset(weightList, 0, sizeof(weightList)); */  /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), /* bmi2 */ 0);
+    if (HUF_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */
+
+    /* find maxWeight */
+    for (maxW = tableLog; wksp->rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
+
+    /* Get start index of each weight */
+    {   U32 w, nextRankStart = 0;
+        for (w=1; w<maxW+1; w++) {
+            U32 curr = nextRankStart;
+            nextRankStart += wksp->rankStats[w];
+            rankStart[w] = curr;
+        }
+        rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
+        sizeOfSort = nextRankStart;
+    }
+
+    /* sort symbols by weight */
+    {   U32 s;
+        for (s=0; s<nbSymbols; s++) {
+            U32 const w = wksp->weightList[s];
+            U32 const r = rankStart[w]++;
+            wksp->sortedSymbol[r].symbol = (BYTE)s;
+            wksp->sortedSymbol[r].weight = (BYTE)w;
+        }
+        rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
+    }
+
+    /* Build rankVal */
+    {   U32* const rankVal0 = wksp->rankVal[0];
+        {   int const rescale = (maxTableLog-tableLog) - 1;   /* tableLog <= maxTableLog */
+            U32 nextRankVal = 0;
+            U32 w;
+            for (w=1; w<maxW+1; w++) {
+                U32 curr = nextRankVal;
+                nextRankVal += wksp->rankStats[w] << (w+rescale);
+                rankVal0[w] = curr;
+        }   }
+        {   U32 const minBits = tableLog+1 - maxW;
+            U32 consumed;
+            for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
+                U32* const rankValPtr = wksp->rankVal[consumed];
+                U32 w;
+                for (w = 1; w < maxW+1; w++) {
+                    rankValPtr[w] = rankVal0[w] >> consumed;
+    }   }   }   }
+
+    HUF_fillDTableX2(dt, maxTableLog,
+                   wksp->sortedSymbol, sizeOfSort,
+                   wksp->rankStart0, wksp->rankVal, maxW,
+                   tableLog+1,
+                   wksp->calleeWksp, sizeof(wksp->calleeWksp) / sizeof(U32));
+
+    dtd.tableLog = (BYTE)maxTableLog;
+    dtd.tableType = 1;
+    ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
+    return iSize;
+}
+
+
+FORCE_INLINE_TEMPLATE U32
+HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
+{
+    size_t const val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    ZSTD_memcpy(op, dt+val, 2);
+    BIT_skipBits(DStream, dt[val].nbBits);
+    return dt[val].length;
+}
+
+FORCE_INLINE_TEMPLATE U32
+HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
+{
+    size_t const val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    ZSTD_memcpy(op, dt+val, 1);
+    if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
+    else {
+        if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
+            BIT_skipBits(DStream, dt[val].nbBits);
+            if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
+                /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
+                DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
+    }   }
+    return 1;
+}
+
+#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
+    ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
+        ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
+
+HINT_INLINE size_t
+HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
+                const HUF_DEltX2* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 8 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
+        HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+    }
+
+    /* closer to end : up to 2 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+    while (p <= pEnd-2)
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */
+
+    if (p < pEnd)
+        p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
+
+    return p-pStart;
+}
+
+FORCE_INLINE_TEMPLATE size_t
+HUF_decompress1X2_usingDTable_internal_body(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    BIT_DStream_t bitD;
+
+    /* Init */
+    CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
+
+    /* decode */
+    {   BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+        const void* const dtPtr = DTable+1;   /* force compiler to not use strict-aliasing */
+        const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
+        DTableDesc const dtd = HUF_getDTableDesc(DTable);
+        HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);
+    }
+
+    /* check */
+    if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
+
+    /* decoded size */
+    return dstSize;
+}
+
+FORCE_INLINE_TEMPLATE size_t
+HUF_decompress4X2_usingDTable_internal_body(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    {   const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+        BYTE* const olimit = oend - (sizeof(size_t)-1);
+        const void* const dtPtr = DTable+1;
+        const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
+
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        size_t const length1 = MEM_readLE16(istart);
+        size_t const length2 = MEM_readLE16(istart+2);
+        size_t const length3 = MEM_readLE16(istart+4);
+        size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        size_t const segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal = 1;
+        DTableDesc const dtd = HUF_getDTableDesc(DTable);
+        U32 const dtLog = dtd.tableLog;
+
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
+        CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
+        CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
+        CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
+
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
+        for ( ; (endSignal) & (op4 < olimit); ) {
+#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
+            endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
+            endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
+            endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
+            endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
+#else
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
+            endSignal = (U32)LIKELY(
+                        (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
+                      & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
+                      & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
+                      & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
+#endif
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
+        HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
+        HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
+        HUF_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+          if (!endCheck) return ERROR(corruption_detected); }
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
+HUF_DGEN(HUF_decompress4X2_usingDTable_internal)
+
+size_t HUF_decompress1X2_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    DTableDesc dtd = HUF_getDTableDesc(DTable);
+    if (dtd.tableType != 1) return ERROR(GENERIC);
+    return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+}
+
+size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+                                   void* workSpace, size_t wkspSize)
+{
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
+                                               workSpace, wkspSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize; cSrcSize -= hSize;
+
+    return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
+}
+
+
+size_t HUF_decompress4X2_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    DTableDesc dtd = HUF_getDTableDesc(DTable);
+    if (dtd.tableType != 1) return ERROR(GENERIC);
+    return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+}
+
+static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+                                   void* workSpace, size_t wkspSize, int bmi2)
+{
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
+                                         workSpace, wkspSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize; cSrcSize -= hSize;
+
+    return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
+}
+
+size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+                                   void* workSpace, size_t wkspSize)
+{
+    return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
+}
+
+
+#endif /* HUF_FORCE_DECOMPRESS_X1 */
+
+
+/* ***********************************/
+/* Universal decompression selectors */
+/* ***********************************/
+
+size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
+                                    const void* cSrc, size_t cSrcSize,
+                                    const HUF_DTable* DTable)
+{
+    DTableDesc const dtd = HUF_getDTableDesc(DTable);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+    (void)dtd;
+    assert(dtd.tableType == 0);
+    return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+    (void)dtd;
+    assert(dtd.tableType == 1);
+    return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+#else
+    return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
+                           HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+#endif
+}
+
+size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
+                                    const void* cSrc, size_t cSrcSize,
+                                    const HUF_DTable* DTable)
+{
+    DTableDesc const dtd = HUF_getDTableDesc(DTable);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+    (void)dtd;
+    assert(dtd.tableType == 0);
+    return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+    (void)dtd;
+    assert(dtd.tableType == 1);
+    return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+#else
+    return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
+                           HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+#endif
+}
+
+
+#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
+typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
+static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
+{
+    /* single, double, quad */
+    {{0,0}, {1,1}, {2,2}},  /* Q==0 : impossible */
+    {{0,0}, {1,1}, {2,2}},  /* Q==1 : impossible */
+    {{  38,130}, {1313, 74}, {2151, 38}},   /* Q == 2 : 12-18% */
+    {{ 448,128}, {1353, 74}, {2238, 41}},   /* Q == 3 : 18-25% */
+    {{ 556,128}, {1353, 74}, {2238, 47}},   /* Q == 4 : 25-32% */
+    {{ 714,128}, {1418, 74}, {2436, 53}},   /* Q == 5 : 32-38% */
+    {{ 883,128}, {1437, 74}, {2464, 61}},   /* Q == 6 : 38-44% */
+    {{ 897,128}, {1515, 75}, {2622, 68}},   /* Q == 7 : 44-50% */
+    {{ 926,128}, {1613, 75}, {2730, 75}},   /* Q == 8 : 50-56% */
+    {{ 947,128}, {1729, 77}, {3359, 77}},   /* Q == 9 : 56-62% */
+    {{1107,128}, {2083, 81}, {4006, 84}},   /* Q ==10 : 62-69% */
+    {{1177,128}, {2379, 87}, {4785, 88}},   /* Q ==11 : 69-75% */
+    {{1242,128}, {2415, 93}, {5155, 84}},   /* Q ==12 : 75-81% */
+    {{1349,128}, {2644,106}, {5260,106}},   /* Q ==13 : 81-87% */
+    {{1455,128}, {2422,124}, {4174,124}},   /* Q ==14 : 87-93% */
+    {{ 722,128}, {1891,145}, {1936,146}},   /* Q ==15 : 93-99% */
+};
+#endif
+
+/** HUF_selectDecoder() :
+ *  Tells which decoder is likely to decode faster,
+ *  based on a set of pre-computed metrics.
+ * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
+ *  Assumption : 0 < dstSize <= 128 KB */
+U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
+{
+    assert(dstSize > 0);
+    assert(dstSize <= 128*1024);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+    (void)dstSize;
+    (void)cSrcSize;
+    return 0;
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+    (void)dstSize;
+    (void)cSrcSize;
+    return 1;
+#else
+    /* decoder timing evaluation */
+    {   U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 */
+        U32 const D256 = (U32)(dstSize >> 8);
+        U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
+        U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
+        DTime1 += DTime1 >> 3;  /* advantage to algorithm using less memory, to reduce cache eviction */
+        return DTime1 < DTime0;
+    }
+#endif
+}
+
+
+size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
+                                     size_t dstSize, const void* cSrc,
+                                     size_t cSrcSize, void* workSpace,
+                                     size_t wkspSize)
+{
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize == 0) return ERROR(corruption_detected);
+
+    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+        (void)algoNb;
+        assert(algoNb == 0);
+        return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+        (void)algoNb;
+        assert(algoNb == 1);
+        return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
+#else
+        return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
+                            cSrcSize, workSpace, wkspSize):
+                        HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
+#endif
+    }
+}
+
+size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+                                  const void* cSrc, size_t cSrcSize,
+                                  void* workSpace, size_t wkspSize)
+{
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
+    if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
+    if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
+
+    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+        (void)algoNb;
+        assert(algoNb == 0);
+        return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
+                                cSrcSize, workSpace, wkspSize);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+        (void)algoNb;
+        assert(algoNb == 1);
+        return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
+                                cSrcSize, workSpace, wkspSize);
+#else
+        return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
+                                cSrcSize, workSpace, wkspSize):
+                        HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
+                                cSrcSize, workSpace, wkspSize);
+#endif
+    }
+}
+
+
+size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
+{
+    DTableDesc const dtd = HUF_getDTableDesc(DTable);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+    (void)dtd;
+    assert(dtd.tableType == 0);
+    return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+    (void)dtd;
+    assert(dtd.tableType == 1);
+    return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+#else
+    return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
+                           HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+#endif
+}
+
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
+{
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize; cSrcSize -= hSize;
+
+    return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
+}
+#endif
+
+size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
+{
+    DTableDesc const dtd = HUF_getDTableDesc(DTable);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+    (void)dtd;
+    assert(dtd.tableType == 0);
+    return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+    (void)dtd;
+    assert(dtd.tableType == 1);
+    return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+#else
+    return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
+                           HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+#endif
+}
+
+size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
+{
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize == 0) return ERROR(corruption_detected);
+
+    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+        (void)algoNb;
+        assert(algoNb == 0);
+        return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+        (void)algoNb;
+        assert(algoNb == 1);
+        return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
+#else
+        return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
+                        HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
+#endif
+    }
+}
+
+#ifndef ZSTD_NO_UNUSED_FUNCTIONS
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
+{
+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+    return HUF_readDTableX1_wksp(DTable, src, srcSize,
+                                 workSpace, sizeof(workSpace));
+}
+
+size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
+                              const void* cSrc, size_t cSrcSize)
+{
+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+    return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
+                                       workSpace, sizeof(workSpace));
+}
+
+size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
+    return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
+}
+#endif
+
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
+{
+  U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+  return HUF_readDTableX2_wksp(DTable, src, srcSize,
+                               workSpace, sizeof(workSpace));
+}
+
+size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
+                              const void* cSrc, size_t cSrcSize)
+{
+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+    return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
+                                       workSpace, sizeof(workSpace));
+}
+
+size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
+    return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
+}
+#endif
+
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+    return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
+                                       workSpace, sizeof(workSpace));
+}
+size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
+    return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
+}
+#endif
+
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
+                              const void* cSrc, size_t cSrcSize)
+{
+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+    return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
+                                       workSpace, sizeof(workSpace));
+}
+
+size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
+    return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
+}
+#endif
+
+typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
+
+size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
+    static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
+#endif
+
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
+    if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
+    if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
+
+    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+        (void)algoNb;
+        assert(algoNb == 0);
+        return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+        (void)algoNb;
+        assert(algoNb == 1);
+        return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);
+#else
+        return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
+#endif
+    }
+}
+
+size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
+    if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
+    if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
+
+    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+        (void)algoNb;
+        assert(algoNb == 0);
+        return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+        (void)algoNb;
+        assert(algoNb == 1);
+        return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
+#else
+        return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
+                        HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
+#endif
+    }
+}
+
+size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+    return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
+                                         workSpace, sizeof(workSpace));
+}
+
+size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
+                             const void* cSrc, size_t cSrcSize)
+{
+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+    return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
+                                      workSpace, sizeof(workSpace));
+}
+#endif
+/**** ended inlining decompress/huf_decompress.c ****/
+/**** start inlining decompress/zstd_ddict.c ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* zstd_ddict.c :
+ * concentrates all logic that needs to know the internals of ZSTD_DDict object */
+
+/*-*******************************************************
+*  Dependencies
+*********************************************************/
+/**** skipping file: ../common/zstd_deps.h ****/
+/**** skipping file: ../common/cpu.h ****/
+/**** skipping file: ../common/mem.h ****/
+#define FSE_STATIC_LINKING_ONLY
+/**** skipping file: ../common/fse.h ****/
+#define HUF_STATIC_LINKING_ONLY
+/**** skipping file: ../common/huf.h ****/
+/**** start inlining zstd_decompress_internal.h ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+
+/* zstd_decompress_internal:
+ * objects and definitions shared within lib/decompress modules */
+
+ #ifndef ZSTD_DECOMPRESS_INTERNAL_H
+ #define ZSTD_DECOMPRESS_INTERNAL_H
+
+
+/*-*******************************************************
+ *  Dependencies
+ *********************************************************/
+/**** skipping file: ../common/mem.h ****/
+/**** skipping file: ../common/zstd_internal.h ****/
+
+
+
+/*-*******************************************************
+ *  Constants
+ *********************************************************/
+static UNUSED_ATTR const U32 LL_base[MaxLL+1] = {
+                 0,    1,    2,     3,     4,     5,     6,      7,
+                 8,    9,   10,    11,    12,    13,    14,     15,
+                16,   18,   20,    22,    24,    28,    32,     40,
+                48,   64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
+                0x2000, 0x4000, 0x8000, 0x10000 };
+
+static UNUSED_ATTR const U32 OF_base[MaxOff+1] = {
+                 0,        1,       1,       5,     0xD,     0x1D,     0x3D,     0x7D,
+                 0xFD,   0x1FD,   0x3FD,   0x7FD,   0xFFD,   0x1FFD,   0x3FFD,   0x7FFD,
+                 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
+                 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
+
+static UNUSED_ATTR const U32 OF_bits[MaxOff+1] = {
+                     0,  1,  2,  3,  4,  5,  6,  7,
+                     8,  9, 10, 11, 12, 13, 14, 15,
+                    16, 17, 18, 19, 20, 21, 22, 23,
+                    24, 25, 26, 27, 28, 29, 30, 31 };
+
+static UNUSED_ATTR const U32 ML_base[MaxML+1] = {
+                     3,  4,  5,    6,     7,     8,     9,    10,
+                    11, 12, 13,   14,    15,    16,    17,    18,
+                    19, 20, 21,   22,    23,    24,    25,    26,
+                    27, 28, 29,   30,    31,    32,    33,    34,
+                    35, 37, 39,   41,    43,    47,    51,    59,
+                    67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
+                    0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
+
+
+/*-*******************************************************
+ *  Decompression types
+ *********************************************************/
+ typedef struct {
+     U32 fastMode;
+     U32 tableLog;
+ } ZSTD_seqSymbol_header;
+
+ typedef struct {
+     U16  nextState;
+     BYTE nbAdditionalBits;
+     BYTE nbBits;
+     U32  baseValue;
+ } ZSTD_seqSymbol;
+
+ #define SEQSYMBOL_TABLE_SIZE(log)   (1 + (1 << (log)))
+
+#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
+#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
+
+typedef struct {
+    ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)];    /* Note : Space reserved for FSE Tables */
+    ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)];   /* is also used as temporary workspace while building hufTable during DDict creation */
+    ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)];    /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
+    HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)];  /* can accommodate HUF_decompress4X */
+    U32 rep[ZSTD_REP_NUM];
+    U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
+} ZSTD_entropyDTables_t;
+
+typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
+               ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock,
+               ZSTDds_decompressLastBlock, ZSTDds_checkChecksum,
+               ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage;
+
+typedef enum { zdss_init=0, zdss_loadHeader,
+               zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage;
+
+typedef enum {
+    ZSTD_use_indefinitely = -1,  /* Use the dictionary indefinitely */
+    ZSTD_dont_use = 0,           /* Do not use the dictionary (if one exists free it) */
+    ZSTD_use_once = 1            /* Use the dictionary once and set to ZSTD_dont_use */
+} ZSTD_dictUses_e;
+
+/* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */
+typedef struct {
+    const ZSTD_DDict** ddictPtrTable;
+    size_t ddictPtrTableSize;
+    size_t ddictPtrCount;
+} ZSTD_DDictHashSet;
+
+struct ZSTD_DCtx_s
+{
+    const ZSTD_seqSymbol* LLTptr;
+    const ZSTD_seqSymbol* MLTptr;
+    const ZSTD_seqSymbol* OFTptr;
+    const HUF_DTable* HUFptr;
+    ZSTD_entropyDTables_t entropy;
+    U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];   /* space needed when building huffman tables */
+    const void* previousDstEnd;   /* detect continuity */
+    const void* prefixStart;      /* start of current segment */
+    const void* virtualStart;     /* virtual start of previous segment if it was just before current one */
+    const void* dictEnd;          /* end of previous segment */
+    size_t expected;
+    ZSTD_frameHeader fParams;
+    U64 processedCSize;
+    U64 decodedSize;
+    blockType_e bType;            /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
+    ZSTD_dStage stage;
+    U32 litEntropy;
+    U32 fseEntropy;
+    XXH64_state_t xxhState;
+    size_t headerSize;
+    ZSTD_format_e format;
+    ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum;   /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */
+    U32 validateChecksum;         /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */
+    const BYTE* litPtr;
+    ZSTD_customMem customMem;
+    size_t litSize;
+    size_t rleSize;
+    size_t staticSize;
+    int bmi2;                     /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
+
+    /* dictionary */
+    ZSTD_DDict* ddictLocal;
+    const ZSTD_DDict* ddict;     /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */
+    U32 dictID;
+    int ddictIsCold;             /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
+    ZSTD_dictUses_e dictUses;
+    ZSTD_DDictHashSet* ddictSet;                    /* Hash set for multiple ddicts */
+    ZSTD_refMultipleDDicts_e refMultipleDDicts;     /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
+
+    /* streaming */
+    ZSTD_dStreamStage streamStage;
+    char*  inBuff;
+    size_t inBuffSize;
+    size_t inPos;
+    size_t maxWindowSize;
+    char*  outBuff;
+    size_t outBuffSize;
+    size_t outStart;
+    size_t outEnd;
+    size_t lhSize;
+    void* legacyContext;
+    U32 previousLegacyVersion;
+    U32 legacyVersion;
+    U32 hostageByte;
+    int noForwardProgress;
+    ZSTD_bufferMode_e outBufferMode;
+    ZSTD_outBuffer expectedOutBuffer;
+
+    /* workspace */
+    BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
+    BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
+
+    size_t oversizedDuration;
+
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    void const* dictContentBeginForFuzzing;
+    void const* dictContentEndForFuzzing;
+#endif
+
+    /* Tracing */
+#if ZSTD_TRACE
+    ZSTD_TraceCtx traceCtx;
+#endif
+};  /* typedef'd to ZSTD_DCtx within "zstd.h" */
+
+
+/*-*******************************************************
+ *  Shared internal functions
+ *********************************************************/
+
+/*! ZSTD_loadDEntropy() :
+ *  dict : must point at beginning of a valid zstd dictionary.
+ * @return : size of dictionary header (size of magic number + dict ID + entropy tables) */
+size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
+                   const void* const dict, size_t const dictSize);
+
+/*! ZSTD_checkContinuity() :
+ *  check if next `dst` follows previous position, where decompression ended.
+ *  If yes, do nothing (continue on current segment).
+ *  If not, classify previous segment as "external dictionary", and start a new segment.
+ *  This function cannot fail. */
+void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize);
+
+
+#endif /* ZSTD_DECOMPRESS_INTERNAL_H */
+/**** ended inlining zstd_decompress_internal.h ****/
+/**** start inlining zstd_ddict.h ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+
+#ifndef ZSTD_DDICT_H
+#define ZSTD_DDICT_H
+
+/*-*******************************************************
+ *  Dependencies
+ *********************************************************/
+/**** skipping file: ../common/zstd_deps.h ****/
+/**** skipping file: ../zstd.h ****/
+
+
+/*-*******************************************************
+ *  Interface
+ *********************************************************/
+
+/* note: several prototypes are already published in `zstd.h` :
+ * ZSTD_createDDict()
+ * ZSTD_createDDict_byReference()
+ * ZSTD_createDDict_advanced()
+ * ZSTD_freeDDict()
+ * ZSTD_initStaticDDict()
+ * ZSTD_sizeof_DDict()
+ * ZSTD_estimateDDictSize()
+ * ZSTD_getDictID_fromDict()
+ */
+
+const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict);
+size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict);
+
+void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
+
+
+
+#endif /* ZSTD_DDICT_H */
+/**** ended inlining zstd_ddict.h ****/
+
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
+/**** start inlining ../legacy/zstd_legacy.h ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_LEGACY_H
+#define ZSTD_LEGACY_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Includes
+***************************************/
+/**** skipping file: ../common/mem.h ****/
+/**** skipping file: ../common/error_private.h ****/
+/**** skipping file: ../common/zstd_internal.h ****/
+
+#if !defined (ZSTD_LEGACY_SUPPORT) || (ZSTD_LEGACY_SUPPORT == 0)
+#  undef ZSTD_LEGACY_SUPPORT
+#  define ZSTD_LEGACY_SUPPORT 8
+#endif
+
+#if (ZSTD_LEGACY_SUPPORT <= 1)
+/**** start inlining zstd_v01.h ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_V01_H_28739879432
+#define ZSTD_V01_H_28739879432
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Includes
+***************************************/
+#include <stddef.h>   /* size_t */
+
+
+/* *************************************
+*  Simple one-step function
+***************************************/
+/**
+ZSTDv01_decompress() : decompress ZSTD frames compliant with v0.1.x format
+    compressedSize : is the exact source size
+    maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated.
+                      It must be equal or larger than originalSize, otherwise decompression will fail.
+    return : the number of bytes decompressed into destination buffer (originalSize)
+             or an errorCode if it fails (which can be tested using ZSTDv01_isError())
+*/
+size_t ZSTDv01_decompress( void* dst, size_t maxOriginalSize,
+                     const void* src, size_t compressedSize);
+
+ /**
+ ZSTDv01_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.1.x format
+     srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
+     cSize (output parameter)  : the number of bytes that would be read to decompress this frame
+                                 or an error code if it fails (which can be tested using ZSTDv01_isError())
+     dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
+                                 or ZSTD_CONTENTSIZE_ERROR if an error occurs
+
+     note : assumes `cSize` and `dBound` are _not_ NULL.
+ */
+void ZSTDv01_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
+                                     size_t* cSize, unsigned long long* dBound);
+
+/**
+ZSTDv01_isError() : tells if the result of ZSTDv01_decompress() is an error
+*/
+unsigned ZSTDv01_isError(size_t code);
+
+
+/* *************************************
+*  Advanced functions
+***************************************/
+typedef struct ZSTDv01_Dctx_s ZSTDv01_Dctx;
+ZSTDv01_Dctx* ZSTDv01_createDCtx(void);
+size_t ZSTDv01_freeDCtx(ZSTDv01_Dctx* dctx);
+
+size_t ZSTDv01_decompressDCtx(void* ctx,
+                              void* dst, size_t maxOriginalSize,
+                        const void* src, size_t compressedSize);
+
+/* *************************************
+*  Streaming functions
+***************************************/
+size_t ZSTDv01_resetDCtx(ZSTDv01_Dctx* dctx);
+
+size_t ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx* dctx);
+size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
+/**
+  Use above functions alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block.
+  Result is the number of bytes regenerated within 'dst'.
+  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
+*/
+
+/* *************************************
+*  Prefix - version detection
+***************************************/
+#define ZSTDv01_magicNumber   0xFD2FB51E   /* Big Endian version */
+#define ZSTDv01_magicNumberLE 0x1EB52FFD   /* Little Endian version */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ZSTD_V01_H_28739879432 */
+/**** ended inlining zstd_v01.h ****/
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 2)
+/**** start inlining zstd_v02.h ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_V02_H_4174539423
+#define ZSTD_V02_H_4174539423
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Includes
+***************************************/
+#include <stddef.h>   /* size_t */
+
+
+/* *************************************
+*  Simple one-step function
+***************************************/
+/**
+ZSTDv02_decompress() : decompress ZSTD frames compliant with v0.2.x format
+    compressedSize : is the exact source size
+    maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated.
+                      It must be equal or larger than originalSize, otherwise decompression will fail.
+    return : the number of bytes decompressed into destination buffer (originalSize)
+             or an errorCode if it fails (which can be tested using ZSTDv01_isError())
+*/
+size_t ZSTDv02_decompress( void* dst, size_t maxOriginalSize,
+                     const void* src, size_t compressedSize);
+
+ /**
+ ZSTDv02_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.2.x format
+     srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
+     cSize (output parameter)  : the number of bytes that would be read to decompress this frame
+                                 or an error code if it fails (which can be tested using ZSTDv01_isError())
+     dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
+                                 or ZSTD_CONTENTSIZE_ERROR if an error occurs
+
+    note : assumes `cSize` and `dBound` are _not_ NULL.
+ */
+void ZSTDv02_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
+                                     size_t* cSize, unsigned long long* dBound);
+
+/**
+ZSTDv02_isError() : tells if the result of ZSTDv02_decompress() is an error
+*/
+unsigned ZSTDv02_isError(size_t code);
+
+
+/* *************************************
+*  Advanced functions
+***************************************/
+typedef struct ZSTDv02_Dctx_s ZSTDv02_Dctx;
+ZSTDv02_Dctx* ZSTDv02_createDCtx(void);
+size_t ZSTDv02_freeDCtx(ZSTDv02_Dctx* dctx);
+
+size_t ZSTDv02_decompressDCtx(void* ctx,
+                              void* dst, size_t maxOriginalSize,
+                        const void* src, size_t compressedSize);
+
+/* *************************************
+*  Streaming functions
+***************************************/
+size_t ZSTDv02_resetDCtx(ZSTDv02_Dctx* dctx);
+
+size_t ZSTDv02_nextSrcSizeToDecompress(ZSTDv02_Dctx* dctx);
+size_t ZSTDv02_decompressContinue(ZSTDv02_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
+/**
+  Use above functions alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block.
+  Result is the number of bytes regenerated within 'dst'.
+  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
+*/
+
+/* *************************************
+*  Prefix - version detection
+***************************************/
+#define ZSTDv02_magicNumber 0xFD2FB522   /* v0.2 */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ZSTD_V02_H_4174539423 */
+/**** ended inlining zstd_v02.h ****/
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 3)
+/**** start inlining zstd_v03.h ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_V03_H_298734209782
+#define ZSTD_V03_H_298734209782
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Includes
+***************************************/
+#include <stddef.h>   /* size_t */
+
+
+/* *************************************
+*  Simple one-step function
+***************************************/
+/**
+ZSTDv03_decompress() : decompress ZSTD frames compliant with v0.3.x format
+    compressedSize : is the exact source size
+    maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated.
+                      It must be equal or larger than originalSize, otherwise decompression will fail.
+    return : the number of bytes decompressed into destination buffer (originalSize)
+             or an errorCode if it fails (which can be tested using ZSTDv01_isError())
+*/
+size_t ZSTDv03_decompress( void* dst, size_t maxOriginalSize,
+                     const void* src, size_t compressedSize);
+
+ /**
+ ZSTDv03_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.3.x format
+     srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
+     cSize (output parameter)  : the number of bytes that would be read to decompress this frame
+                                 or an error code if it fails (which can be tested using ZSTDv01_isError())
+     dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
+                                 or ZSTD_CONTENTSIZE_ERROR if an error occurs
+
+    note : assumes `cSize` and `dBound` are _not_ NULL.
+ */
+ void ZSTDv03_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
+                                      size_t* cSize, unsigned long long* dBound);
+
+    /**
+ZSTDv03_isError() : tells if the result of ZSTDv03_decompress() is an error
+*/
+unsigned ZSTDv03_isError(size_t code);
+
+
+/* *************************************
+*  Advanced functions
+***************************************/
+typedef struct ZSTDv03_Dctx_s ZSTDv03_Dctx;
+ZSTDv03_Dctx* ZSTDv03_createDCtx(void);
+size_t ZSTDv03_freeDCtx(ZSTDv03_Dctx* dctx);
+
+size_t ZSTDv03_decompressDCtx(void* ctx,
+                              void* dst, size_t maxOriginalSize,
+                        const void* src, size_t compressedSize);
+
+/* *************************************
+*  Streaming functions
+***************************************/
+size_t ZSTDv03_resetDCtx(ZSTDv03_Dctx* dctx);
+
+size_t ZSTDv03_nextSrcSizeToDecompress(ZSTDv03_Dctx* dctx);
+size_t ZSTDv03_decompressContinue(ZSTDv03_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
+/**
+  Use above functions alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block.
+  Result is the number of bytes regenerated within 'dst'.
+  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
+*/
+
+/* *************************************
+*  Prefix - version detection
+***************************************/
+#define ZSTDv03_magicNumber 0xFD2FB523   /* v0.3 */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ZSTD_V03_H_298734209782 */
+/**** ended inlining zstd_v03.h ****/
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 4)
+/**** start inlining zstd_v04.h ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_V04_H_91868324769238
+#define ZSTD_V04_H_91868324769238
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Includes
+***************************************/
+#include <stddef.h>   /* size_t */
+
+
+/* *************************************
+*  Simple one-step function
+***************************************/
+/**
+ZSTDv04_decompress() : decompress ZSTD frames compliant with v0.4.x format
+    compressedSize : is the exact source size
+    maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated.
+                      It must be equal or larger than originalSize, otherwise decompression will fail.
+    return : the number of bytes decompressed into destination buffer (originalSize)
+             or an errorCode if it fails (which can be tested using ZSTDv01_isError())
+*/
+size_t ZSTDv04_decompress( void* dst, size_t maxOriginalSize,
+                     const void* src, size_t compressedSize);
+
+ /**
+ ZSTDv04_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.4.x format
+     srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
+     cSize (output parameter)  : the number of bytes that would be read to decompress this frame
+                                 or an error code if it fails (which can be tested using ZSTDv01_isError())
+     dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
+                                 or ZSTD_CONTENTSIZE_ERROR if an error occurs
+
+    note : assumes `cSize` and `dBound` are _not_ NULL.
+ */
+ void ZSTDv04_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
+                                      size_t* cSize, unsigned long long* dBound);
+
+/**
+ZSTDv04_isError() : tells if the result of ZSTDv04_decompress() is an error
+*/
+unsigned ZSTDv04_isError(size_t code);
+
+
+/* *************************************
+*  Advanced functions
+***************************************/
+typedef struct ZSTDv04_Dctx_s ZSTDv04_Dctx;
+ZSTDv04_Dctx* ZSTDv04_createDCtx(void);
+size_t ZSTDv04_freeDCtx(ZSTDv04_Dctx* dctx);
+
+size_t ZSTDv04_decompressDCtx(ZSTDv04_Dctx* dctx,
+                              void* dst, size_t maxOriginalSize,
+                        const void* src, size_t compressedSize);
+
+
+/* *************************************
+*  Direct Streaming
+***************************************/
+size_t ZSTDv04_resetDCtx(ZSTDv04_Dctx* dctx);
+
+size_t ZSTDv04_nextSrcSizeToDecompress(ZSTDv04_Dctx* dctx);
+size_t ZSTDv04_decompressContinue(ZSTDv04_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
+/**
+  Use above functions alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block.
+  Result is the number of bytes regenerated within 'dst'.
+  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
+*/
+
+
+/* *************************************
+*  Buffered Streaming
+***************************************/
+typedef struct ZBUFFv04_DCtx_s ZBUFFv04_DCtx;
+ZBUFFv04_DCtx* ZBUFFv04_createDCtx(void);
+size_t         ZBUFFv04_freeDCtx(ZBUFFv04_DCtx* dctx);
+
+size_t ZBUFFv04_decompressInit(ZBUFFv04_DCtx* dctx);
+size_t ZBUFFv04_decompressWithDictionary(ZBUFFv04_DCtx* dctx, const void* dict, size_t dictSize);
+
+size_t ZBUFFv04_decompressContinue(ZBUFFv04_DCtx* dctx, void* dst, size_t* maxDstSizePtr, const void* src, size_t* srcSizePtr);
+
+/** ************************************************
+*  Streaming decompression
+*
+*  A ZBUFF_DCtx object is required to track streaming operation.
+*  Use ZBUFF_createDCtx() and ZBUFF_freeDCtx() to create/release resources.
+*  Use ZBUFF_decompressInit() to start a new decompression operation.
+*  ZBUFF_DCtx objects can be reused multiple times.
+*
+*  Optionally, a reference to a static dictionary can be set, using ZBUFF_decompressWithDictionary()
+*  It must be the same content as the one set during compression phase.
+*  Dictionary content must remain accessible during the decompression process.
+*
+*  Use ZBUFF_decompressContinue() repetitively to consume your input.
+*  *srcSizePtr and *maxDstSizePtr can be any size.
+*  The function will report how many bytes were read or written by modifying *srcSizePtr and *maxDstSizePtr.
+*  Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again.
+*  The content of dst will be overwritten (up to *maxDstSizePtr) at each function call, so save its content if it matters or change dst.
+*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency)
+*            or 0 when a frame is completely decoded
+*            or an error code, which can be tested using ZBUFF_isError().
+*
+*  Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedDInSize / ZBUFF_recommendedDOutSize
+*  output : ZBUFF_recommendedDOutSize==128 KB block size is the internal unit, it ensures it's always possible to write a full block when it's decoded.
+*  input : ZBUFF_recommendedDInSize==128Kb+3; just follow indications from ZBUFF_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
+* **************************************************/
+unsigned ZBUFFv04_isError(size_t errorCode);
+const char* ZBUFFv04_getErrorName(size_t errorCode);
+
+
+/** The below functions provide recommended buffer sizes for Compression or Decompression operations.
+*   These sizes are not compulsory, they just tend to offer better latency */
+size_t ZBUFFv04_recommendedDInSize(void);
+size_t ZBUFFv04_recommendedDOutSize(void);
+
+
+/* *************************************
+*  Prefix - version detection
+***************************************/
+#define ZSTDv04_magicNumber 0xFD2FB524   /* v0.4 */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ZSTD_V04_H_91868324769238 */
+/**** ended inlining zstd_v04.h ****/
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 5)
+/**** start inlining zstd_v05.h ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTDv05_H
+#define ZSTDv05_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#include <stddef.h>   /* size_t */
+/**** skipping file: ../common/mem.h ****/
+
+
+/* *************************************
+*  Simple functions
+***************************************/
+/*! ZSTDv05_decompress() :
+    `compressedSize` : is the _exact_ size of the compressed blob, otherwise decompression will fail.
+    `dstCapacity` must be large enough, equal or larger than originalSize.
+    @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
+              or an errorCode if it fails (which can be tested using ZSTDv05_isError()) */
+size_t ZSTDv05_decompress( void* dst, size_t dstCapacity,
+                     const void* src, size_t compressedSize);
+
+ /**
+ ZSTDv05_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.5.x format
+     srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
+     cSize (output parameter)  : the number of bytes that would be read to decompress this frame
+                                 or an error code if it fails (which can be tested using ZSTDv01_isError())
+     dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
+                                 or ZSTD_CONTENTSIZE_ERROR if an error occurs
+
+    note : assumes `cSize` and `dBound` are _not_ NULL.
+ */
+void ZSTDv05_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
+                                     size_t* cSize, unsigned long long* dBound);
+
+/* *************************************
+*  Helper functions
+***************************************/
+/* Error Management */
+unsigned    ZSTDv05_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */
+const char* ZSTDv05_getErrorName(size_t code);     /*!< provides readable string for an error code */
+
+
+/* *************************************
+*  Explicit memory management
+***************************************/
+/** Decompression context */
+typedef struct ZSTDv05_DCtx_s ZSTDv05_DCtx;
+ZSTDv05_DCtx* ZSTDv05_createDCtx(void);
+size_t ZSTDv05_freeDCtx(ZSTDv05_DCtx* dctx);      /*!< @return : errorCode */
+
+/** ZSTDv05_decompressDCtx() :
+*   Same as ZSTDv05_decompress(), but requires an already allocated ZSTDv05_DCtx (see ZSTDv05_createDCtx()) */
+size_t ZSTDv05_decompressDCtx(ZSTDv05_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+
+/*-***********************
+*  Simple Dictionary API
+*************************/
+/*! ZSTDv05_decompress_usingDict() :
+*   Decompression using a pre-defined Dictionary content (see dictBuilder).
+*   Dictionary must be identical to the one used during compression, otherwise regenerated data will be corrupted.
+*   Note : dict can be NULL, in which case, it's equivalent to ZSTDv05_decompressDCtx() */
+size_t ZSTDv05_decompress_usingDict(ZSTDv05_DCtx* dctx,
+                                            void* dst, size_t dstCapacity,
+                                      const void* src, size_t srcSize,
+                                      const void* dict,size_t dictSize);
+
+/*-************************
+*  Advanced Streaming API
+***************************/
+typedef enum { ZSTDv05_fast, ZSTDv05_greedy, ZSTDv05_lazy, ZSTDv05_lazy2, ZSTDv05_btlazy2, ZSTDv05_opt, ZSTDv05_btopt } ZSTDv05_strategy;
+typedef struct {
+    U64 srcSize;
+    U32 windowLog;     /* the only useful information to retrieve */
+    U32 contentLog; U32 hashLog; U32 searchLog; U32 searchLength; U32 targetLength; ZSTDv05_strategy strategy;
+} ZSTDv05_parameters;
+size_t ZSTDv05_getFrameParams(ZSTDv05_parameters* params, const void* src, size_t srcSize);
+
+size_t ZSTDv05_decompressBegin_usingDict(ZSTDv05_DCtx* dctx, const void* dict, size_t dictSize);
+void   ZSTDv05_copyDCtx(ZSTDv05_DCtx* dstDCtx, const ZSTDv05_DCtx* srcDCtx);
+size_t ZSTDv05_nextSrcSizeToDecompress(ZSTDv05_DCtx* dctx);
+size_t ZSTDv05_decompressContinue(ZSTDv05_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+
+/*-***********************
+*  ZBUFF API
+*************************/
+typedef struct ZBUFFv05_DCtx_s ZBUFFv05_DCtx;
+ZBUFFv05_DCtx* ZBUFFv05_createDCtx(void);
+size_t         ZBUFFv05_freeDCtx(ZBUFFv05_DCtx* dctx);
+
+size_t ZBUFFv05_decompressInit(ZBUFFv05_DCtx* dctx);
+size_t ZBUFFv05_decompressInitDictionary(ZBUFFv05_DCtx* dctx, const void* dict, size_t dictSize);
+
+size_t ZBUFFv05_decompressContinue(ZBUFFv05_DCtx* dctx,
+                                            void* dst, size_t* dstCapacityPtr,
+                                      const void* src, size_t* srcSizePtr);
+
+/*-***************************************************************************
+*  Streaming decompression
+*
+*  A ZBUFFv05_DCtx object is required to track streaming operations.
+*  Use ZBUFFv05_createDCtx() and ZBUFFv05_freeDCtx() to create/release resources.
+*  Use ZBUFFv05_decompressInit() to start a new decompression operation,
+*   or ZBUFFv05_decompressInitDictionary() if decompression requires a dictionary.
+*  Note that ZBUFFv05_DCtx objects can be reused multiple times.
+*
+*  Use ZBUFFv05_decompressContinue() repetitively to consume your input.
+*  *srcSizePtr and *dstCapacityPtr can be any size.
+*  The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr.
+*  Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again.
+*  The content of @dst will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters or change @dst.
+*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency)
+*            or 0 when a frame is completely decoded
+*            or an error code, which can be tested using ZBUFFv05_isError().
+*
+*  Hint : recommended buffer sizes (not compulsory) : ZBUFFv05_recommendedDInSize() / ZBUFFv05_recommendedDOutSize()
+*  output : ZBUFFv05_recommendedDOutSize==128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded.
+*  input  : ZBUFFv05_recommendedDInSize==128Kb+3; just follow indications from ZBUFFv05_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
+* *******************************************************************************/
+
+
+/* *************************************
+*  Tool functions
+***************************************/
+unsigned ZBUFFv05_isError(size_t errorCode);
+const char* ZBUFFv05_getErrorName(size_t errorCode);
+
+/** Functions below provide recommended buffer sizes for Compression or Decompression operations.
+*   These sizes are just hints, and tend to offer better latency */
+size_t ZBUFFv05_recommendedDInSize(void);
+size_t ZBUFFv05_recommendedDOutSize(void);
+
+
+
+/*-*************************************
+*  Constants
+***************************************/
+#define ZSTDv05_MAGICNUMBER 0xFD2FB525   /* v0.5 */
+
+
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* ZSTDv0505_H */
+/**** ended inlining zstd_v05.h ****/
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 6)
+/**** start inlining zstd_v06.h ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTDv06_H
+#define ZSTDv06_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*======  Dependency  ======*/
+#include <stddef.h>   /* size_t */
+
+
+/*======  Export for Windows  ======*/
+/*!
+*  ZSTDv06_DLL_EXPORT :
+*  Enable exporting of functions when building a Windows DLL
+*/
+#if defined(_WIN32) && defined(ZSTDv06_DLL_EXPORT) && (ZSTDv06_DLL_EXPORT==1)
+#  define ZSTDLIBv06_API __declspec(dllexport)
+#else
+#  define ZSTDLIBv06_API
+#endif
+
+
+/* *************************************
+*  Simple functions
+***************************************/
+/*! ZSTDv06_decompress() :
+    `compressedSize` : is the _exact_ size of the compressed blob, otherwise decompression will fail.
+    `dstCapacity` must be large enough, equal or larger than originalSize.
+    @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
+              or an errorCode if it fails (which can be tested using ZSTDv06_isError()) */
+ZSTDLIBv06_API size_t ZSTDv06_decompress( void* dst, size_t dstCapacity,
+                                    const void* src, size_t compressedSize);
+
+/**
+ZSTDv06_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.6.x format
+    srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
+    cSize (output parameter)  : the number of bytes that would be read to decompress this frame
+                                or an error code if it fails (which can be tested using ZSTDv01_isError())
+    dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
+                                or ZSTD_CONTENTSIZE_ERROR if an error occurs
+
+    note : assumes `cSize` and `dBound` are _not_ NULL.
+*/
+void ZSTDv06_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
+                                     size_t* cSize, unsigned long long* dBound);
+
+/* *************************************
+*  Helper functions
+***************************************/
+ZSTDLIBv06_API size_t      ZSTDv06_compressBound(size_t srcSize); /*!< maximum compressed size (worst case scenario) */
+
+/* Error Management */
+ZSTDLIBv06_API unsigned    ZSTDv06_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */
+ZSTDLIBv06_API const char* ZSTDv06_getErrorName(size_t code);     /*!< provides readable string for an error code */
+
+
+/* *************************************
+*  Explicit memory management
+***************************************/
+/** Decompression context */
+typedef struct ZSTDv06_DCtx_s ZSTDv06_DCtx;
+ZSTDLIBv06_API ZSTDv06_DCtx* ZSTDv06_createDCtx(void);
+ZSTDLIBv06_API size_t     ZSTDv06_freeDCtx(ZSTDv06_DCtx* dctx);      /*!< @return : errorCode */
+
+/** ZSTDv06_decompressDCtx() :
+*   Same as ZSTDv06_decompress(), but requires an already allocated ZSTDv06_DCtx (see ZSTDv06_createDCtx()) */
+ZSTDLIBv06_API size_t ZSTDv06_decompressDCtx(ZSTDv06_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+
+/*-***********************
+*  Dictionary API
+*************************/
+/*! ZSTDv06_decompress_usingDict() :
+*   Decompression using a pre-defined Dictionary content (see dictBuilder).
+*   Dictionary must be identical to the one used during compression, otherwise regenerated data will be corrupted.
+*   Note : dict can be NULL, in which case, it's equivalent to ZSTDv06_decompressDCtx() */
+ZSTDLIBv06_API size_t ZSTDv06_decompress_usingDict(ZSTDv06_DCtx* dctx,
+                                                   void* dst, size_t dstCapacity,
+                                             const void* src, size_t srcSize,
+                                             const void* dict,size_t dictSize);
+
+
+/*-************************
+*  Advanced Streaming API
+***************************/
+struct ZSTDv06_frameParams_s { unsigned long long frameContentSize; unsigned windowLog; };
+typedef struct ZSTDv06_frameParams_s ZSTDv06_frameParams;
+
+ZSTDLIBv06_API size_t ZSTDv06_getFrameParams(ZSTDv06_frameParams* fparamsPtr, const void* src, size_t srcSize);   /**< doesn't consume input */
+ZSTDLIBv06_API size_t ZSTDv06_decompressBegin_usingDict(ZSTDv06_DCtx* dctx, const void* dict, size_t dictSize);
+ZSTDLIBv06_API void   ZSTDv06_copyDCtx(ZSTDv06_DCtx* dctx, const ZSTDv06_DCtx* preparedDCtx);
+
+ZSTDLIBv06_API size_t ZSTDv06_nextSrcSizeToDecompress(ZSTDv06_DCtx* dctx);
+ZSTDLIBv06_API size_t ZSTDv06_decompressContinue(ZSTDv06_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+
+
+/* *************************************
+*  ZBUFF API
+***************************************/
+
+typedef struct ZBUFFv06_DCtx_s ZBUFFv06_DCtx;
+ZSTDLIBv06_API ZBUFFv06_DCtx* ZBUFFv06_createDCtx(void);
+ZSTDLIBv06_API size_t         ZBUFFv06_freeDCtx(ZBUFFv06_DCtx* dctx);
+
+ZSTDLIBv06_API size_t ZBUFFv06_decompressInit(ZBUFFv06_DCtx* dctx);
+ZSTDLIBv06_API size_t ZBUFFv06_decompressInitDictionary(ZBUFFv06_DCtx* dctx, const void* dict, size_t dictSize);
+
+ZSTDLIBv06_API size_t ZBUFFv06_decompressContinue(ZBUFFv06_DCtx* dctx,
+                                                  void* dst, size_t* dstCapacityPtr,
+                                            const void* src, size_t* srcSizePtr);
+
+/*-***************************************************************************
+*  Streaming decompression howto
+*
+*  A ZBUFFv06_DCtx object is required to track streaming operations.
+*  Use ZBUFFv06_createDCtx() and ZBUFFv06_freeDCtx() to create/release resources.
+*  Use ZBUFFv06_decompressInit() to start a new decompression operation,
+*   or ZBUFFv06_decompressInitDictionary() if decompression requires a dictionary.
+*  Note that ZBUFFv06_DCtx objects can be re-init multiple times.
+*
+*  Use ZBUFFv06_decompressContinue() repetitively to consume your input.
+*  *srcSizePtr and *dstCapacityPtr can be any size.
+*  The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr.
+*  Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again.
+*  The content of `dst` will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters, or change `dst`.
+*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency),
+*            or 0 when a frame is completely decoded,
+*            or an error code, which can be tested using ZBUFFv06_isError().
+*
+*  Hint : recommended buffer sizes (not compulsory) : ZBUFFv06_recommendedDInSize() and ZBUFFv06_recommendedDOutSize()
+*  output : ZBUFFv06_recommendedDOutSize== 128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded.
+*  input  : ZBUFFv06_recommendedDInSize == 128KB + 3;
+*           just follow indications from ZBUFFv06_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
+* *******************************************************************************/
+
+
+/* *************************************
+*  Tool functions
+***************************************/
+ZSTDLIBv06_API unsigned ZBUFFv06_isError(size_t errorCode);
+ZSTDLIBv06_API const char* ZBUFFv06_getErrorName(size_t errorCode);
+
+/** Functions below provide recommended buffer sizes for Compression or Decompression operations.
+*   These sizes are just hints, they tend to offer better latency */
+ZSTDLIBv06_API size_t ZBUFFv06_recommendedDInSize(void);
+ZSTDLIBv06_API size_t ZBUFFv06_recommendedDOutSize(void);
+
+
+/*-*************************************
+*  Constants
+***************************************/
+#define ZSTDv06_MAGICNUMBER 0xFD2FB526   /* v0.6 */
+
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* ZSTDv06_BUFFERED_H */
+/**** ended inlining zstd_v06.h ****/
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 7)
+/**** start inlining zstd_v07.h ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTDv07_H_235446
+#define ZSTDv07_H_235446
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*======  Dependency  ======*/
+#include <stddef.h>   /* size_t */
+
+
+/*======  Export for Windows  ======*/
+/*!
+*  ZSTDv07_DLL_EXPORT :
+*  Enable exporting of functions when building a Windows DLL
+*/
+#if defined(_WIN32) && defined(ZSTDv07_DLL_EXPORT) && (ZSTDv07_DLL_EXPORT==1)
+#  define ZSTDLIBv07_API __declspec(dllexport)
+#else
+#  define ZSTDLIBv07_API
+#endif
+
+
+/* *************************************
+*  Simple API
+***************************************/
+/*! ZSTDv07_getDecompressedSize() :
+*   @return : decompressed size if known, 0 otherwise.
+       note 1 : if `0`, follow up with ZSTDv07_getFrameParams() to know precise failure cause.
+       note 2 : decompressed size could be wrong or intentionally modified !
+                always ensure results fit within application's authorized limits */
+unsigned long long ZSTDv07_getDecompressedSize(const void* src, size_t srcSize);
+
+/*! ZSTDv07_decompress() :
+    `compressedSize` : must be _exact_ size of compressed input, otherwise decompression will fail.
+    `dstCapacity` must be equal or larger than originalSize.
+    @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
+              or an errorCode if it fails (which can be tested using ZSTDv07_isError()) */
+ZSTDLIBv07_API size_t ZSTDv07_decompress( void* dst, size_t dstCapacity,
+                                    const void* src, size_t compressedSize);
+
+/**
+ZSTDv07_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.7.x format
+    srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
+    cSize (output parameter)  : the number of bytes that would be read to decompress this frame
+                                or an error code if it fails (which can be tested using ZSTDv01_isError())
+    dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
+                                or ZSTD_CONTENTSIZE_ERROR if an error occurs
+
+    note : assumes `cSize` and `dBound` are _not_ NULL.
+*/
+void ZSTDv07_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
+                                     size_t* cSize, unsigned long long* dBound);
+
+/*======  Helper functions  ======*/
+ZSTDLIBv07_API unsigned    ZSTDv07_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */
+ZSTDLIBv07_API const char* ZSTDv07_getErrorName(size_t code);     /*!< provides readable string from an error code */
+
+
+/*-*************************************
+*  Explicit memory management
+***************************************/
+/** Decompression context */
+typedef struct ZSTDv07_DCtx_s ZSTDv07_DCtx;
+ZSTDLIBv07_API ZSTDv07_DCtx* ZSTDv07_createDCtx(void);
+ZSTDLIBv07_API size_t     ZSTDv07_freeDCtx(ZSTDv07_DCtx* dctx);      /*!< @return : errorCode */
+
+/** ZSTDv07_decompressDCtx() :
+*   Same as ZSTDv07_decompress(), requires an allocated ZSTDv07_DCtx (see ZSTDv07_createDCtx()) */
+ZSTDLIBv07_API size_t ZSTDv07_decompressDCtx(ZSTDv07_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+
+/*-************************
+*  Simple dictionary API
+***************************/
+/*! ZSTDv07_decompress_usingDict() :
+*   Decompression using a pre-defined Dictionary content (see dictBuilder).
+*   Dictionary must be identical to the one used during compression.
+*   Note : This function load the dictionary, resulting in a significant startup time */
+ZSTDLIBv07_API size_t ZSTDv07_decompress_usingDict(ZSTDv07_DCtx* dctx,
+                                                   void* dst, size_t dstCapacity,
+                                             const void* src, size_t srcSize,
+                                             const void* dict,size_t dictSize);
+
+
+/*-**************************
+*  Advanced Dictionary API
+****************************/
+/*! ZSTDv07_createDDict() :
+*   Create a digested dictionary, ready to start decompression operation without startup delay.
+*   `dict` can be released after creation */
+typedef struct ZSTDv07_DDict_s ZSTDv07_DDict;
+ZSTDLIBv07_API ZSTDv07_DDict* ZSTDv07_createDDict(const void* dict, size_t dictSize);
+ZSTDLIBv07_API size_t      ZSTDv07_freeDDict(ZSTDv07_DDict* ddict);
+
+/*! ZSTDv07_decompress_usingDDict() :
+*   Decompression using a pre-digested Dictionary
+*   Faster startup than ZSTDv07_decompress_usingDict(), recommended when same dictionary is used multiple times. */
+ZSTDLIBv07_API size_t ZSTDv07_decompress_usingDDict(ZSTDv07_DCtx* dctx,
+                                                    void* dst, size_t dstCapacity,
+                                              const void* src, size_t srcSize,
+                                              const ZSTDv07_DDict* ddict);
+
+typedef struct {
+    unsigned long long frameContentSize;
+    unsigned windowSize;
+    unsigned dictID;
+    unsigned checksumFlag;
+} ZSTDv07_frameParams;
+
+ZSTDLIBv07_API size_t ZSTDv07_getFrameParams(ZSTDv07_frameParams* fparamsPtr, const void* src, size_t srcSize);   /**< doesn't consume input */
+
+
+
+
+/* *************************************
+*  Streaming functions
+***************************************/
+typedef struct ZBUFFv07_DCtx_s ZBUFFv07_DCtx;
+ZSTDLIBv07_API ZBUFFv07_DCtx* ZBUFFv07_createDCtx(void);
+ZSTDLIBv07_API size_t      ZBUFFv07_freeDCtx(ZBUFFv07_DCtx* dctx);
+
+ZSTDLIBv07_API size_t ZBUFFv07_decompressInit(ZBUFFv07_DCtx* dctx);
+ZSTDLIBv07_API size_t ZBUFFv07_decompressInitDictionary(ZBUFFv07_DCtx* dctx, const void* dict, size_t dictSize);
+
+ZSTDLIBv07_API size_t ZBUFFv07_decompressContinue(ZBUFFv07_DCtx* dctx,
+                                            void* dst, size_t* dstCapacityPtr,
+                                      const void* src, size_t* srcSizePtr);
+
+/*-***************************************************************************
+*  Streaming decompression howto
+*
+*  A ZBUFFv07_DCtx object is required to track streaming operations.
+*  Use ZBUFFv07_createDCtx() and ZBUFFv07_freeDCtx() to create/release resources.
+*  Use ZBUFFv07_decompressInit() to start a new decompression operation,
+*   or ZBUFFv07_decompressInitDictionary() if decompression requires a dictionary.
+*  Note that ZBUFFv07_DCtx objects can be re-init multiple times.
+*
+*  Use ZBUFFv07_decompressContinue() repetitively to consume your input.
+*  *srcSizePtr and *dstCapacityPtr can be any size.
+*  The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr.
+*  Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again.
+*  The content of `dst` will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters, or change `dst`.
+*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency),
+*            or 0 when a frame is completely decoded,
+*            or an error code, which can be tested using ZBUFFv07_isError().
+*
+*  Hint : recommended buffer sizes (not compulsory) : ZBUFFv07_recommendedDInSize() and ZBUFFv07_recommendedDOutSize()
+*  output : ZBUFFv07_recommendedDOutSize== 128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded.
+*  input  : ZBUFFv07_recommendedDInSize == 128KB + 3;
+*           just follow indications from ZBUFFv07_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
+* *******************************************************************************/
+
+
+/* *************************************
+*  Tool functions
+***************************************/
+ZSTDLIBv07_API unsigned ZBUFFv07_isError(size_t errorCode);
+ZSTDLIBv07_API const char* ZBUFFv07_getErrorName(size_t errorCode);
+
+/** Functions below provide recommended buffer sizes for Compression or Decompression operations.
+*   These sizes are just hints, they tend to offer better latency */
+ZSTDLIBv07_API size_t ZBUFFv07_recommendedDInSize(void);
+ZSTDLIBv07_API size_t ZBUFFv07_recommendedDOutSize(void);
+
+
+/*-*************************************
+*  Constants
+***************************************/
+#define ZSTDv07_MAGICNUMBER            0xFD2FB527   /* v0.7 */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* ZSTDv07_H_235446 */
+/**** ended inlining zstd_v07.h ****/
+#endif
+
+/** ZSTD_isLegacy() :
+    @return : > 0 if supported by legacy decoder. 0 otherwise.
+              return value is the version.
+*/
+MEM_STATIC unsigned ZSTD_isLegacy(const void* src, size_t srcSize)
+{
+    U32 magicNumberLE;
+    if (srcSize<4) return 0;
+    magicNumberLE = MEM_readLE32(src);
+    switch(magicNumberLE)
+    {
+#if (ZSTD_LEGACY_SUPPORT <= 1)
+        case ZSTDv01_magicNumberLE:return 1;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 2)
+        case ZSTDv02_magicNumber : return 2;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 3)
+        case ZSTDv03_magicNumber : return 3;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 4)
+        case ZSTDv04_magicNumber : return 4;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 5)
+        case ZSTDv05_MAGICNUMBER : return 5;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 6)
+        case ZSTDv06_MAGICNUMBER : return 6;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 7)
+        case ZSTDv07_MAGICNUMBER : return 7;
+#endif
+        default : return 0;
+    }
+}
+
+
+MEM_STATIC unsigned long long ZSTD_getDecompressedSize_legacy(const void* src, size_t srcSize)
+{
+    U32 const version = ZSTD_isLegacy(src, srcSize);
+    if (version < 5) return 0;  /* no decompressed size in frame header, or not a legacy format */
+#if (ZSTD_LEGACY_SUPPORT <= 5)
+    if (version==5) {
+        ZSTDv05_parameters fParams;
+        size_t const frResult = ZSTDv05_getFrameParams(&fParams, src, srcSize);
+        if (frResult != 0) return 0;
+        return fParams.srcSize;
+    }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 6)
+    if (version==6) {
+        ZSTDv06_frameParams fParams;
+        size_t const frResult = ZSTDv06_getFrameParams(&fParams, src, srcSize);
+        if (frResult != 0) return 0;
+        return fParams.frameContentSize;
+    }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 7)
+    if (version==7) {
+        ZSTDv07_frameParams fParams;
+        size_t const frResult = ZSTDv07_getFrameParams(&fParams, src, srcSize);
+        if (frResult != 0) return 0;
+        return fParams.frameContentSize;
+    }
+#endif
+    return 0;   /* should not be possible */
+}
+
+
+MEM_STATIC size_t ZSTD_decompressLegacy(
+                     void* dst, size_t dstCapacity,
+               const void* src, size_t compressedSize,
+               const void* dict,size_t dictSize)
+{
+    U32 const version = ZSTD_isLegacy(src, compressedSize);
+    (void)dst; (void)dstCapacity; (void)dict; (void)dictSize;  /* unused when ZSTD_LEGACY_SUPPORT >= 8 */
+    switch(version)
+    {
+#if (ZSTD_LEGACY_SUPPORT <= 1)
+        case 1 :
+            return ZSTDv01_decompress(dst, dstCapacity, src, compressedSize);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 2)
+        case 2 :
+            return ZSTDv02_decompress(dst, dstCapacity, src, compressedSize);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 3)
+        case 3 :
+            return ZSTDv03_decompress(dst, dstCapacity, src, compressedSize);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 4)
+        case 4 :
+            return ZSTDv04_decompress(dst, dstCapacity, src, compressedSize);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 5)
+        case 5 :
+            {   size_t result;
+                ZSTDv05_DCtx* const zd = ZSTDv05_createDCtx();
+                if (zd==NULL) return ERROR(memory_allocation);
+                result = ZSTDv05_decompress_usingDict(zd, dst, dstCapacity, src, compressedSize, dict, dictSize);
+                ZSTDv05_freeDCtx(zd);
+                return result;
+            }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 6)
+        case 6 :
+            {   size_t result;
+                ZSTDv06_DCtx* const zd = ZSTDv06_createDCtx();
+                if (zd==NULL) return ERROR(memory_allocation);
+                result = ZSTDv06_decompress_usingDict(zd, dst, dstCapacity, src, compressedSize, dict, dictSize);
+                ZSTDv06_freeDCtx(zd);
+                return result;
+            }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 7)
+        case 7 :
+            {   size_t result;
+                ZSTDv07_DCtx* const zd = ZSTDv07_createDCtx();
+                if (zd==NULL) return ERROR(memory_allocation);
+                result = ZSTDv07_decompress_usingDict(zd, dst, dstCapacity, src, compressedSize, dict, dictSize);
+                ZSTDv07_freeDCtx(zd);
+                return result;
+            }
+#endif
+        default :
+            return ERROR(prefix_unknown);
+    }
+}
+
+MEM_STATIC ZSTD_frameSizeInfo ZSTD_findFrameSizeInfoLegacy(const void *src, size_t srcSize)
+{
+    ZSTD_frameSizeInfo frameSizeInfo;
+    U32 const version = ZSTD_isLegacy(src, srcSize);
+    switch(version)
+    {
+#if (ZSTD_LEGACY_SUPPORT <= 1)
+        case 1 :
+            ZSTDv01_findFrameSizeInfoLegacy(src, srcSize,
+                &frameSizeInfo.compressedSize,
+                &frameSizeInfo.decompressedBound);
+            break;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 2)
+        case 2 :
+            ZSTDv02_findFrameSizeInfoLegacy(src, srcSize,
+                &frameSizeInfo.compressedSize,
+                &frameSizeInfo.decompressedBound);
+            break;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 3)
+        case 3 :
+            ZSTDv03_findFrameSizeInfoLegacy(src, srcSize,
+                &frameSizeInfo.compressedSize,
+                &frameSizeInfo.decompressedBound);
+            break;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 4)
+        case 4 :
+            ZSTDv04_findFrameSizeInfoLegacy(src, srcSize,
+                &frameSizeInfo.compressedSize,
+                &frameSizeInfo.decompressedBound);
+            break;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 5)
+        case 5 :
+            ZSTDv05_findFrameSizeInfoLegacy(src, srcSize,
+                &frameSizeInfo.compressedSize,
+                &frameSizeInfo.decompressedBound);
+            break;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 6)
+        case 6 :
+            ZSTDv06_findFrameSizeInfoLegacy(src, srcSize,
+                &frameSizeInfo.compressedSize,
+                &frameSizeInfo.decompressedBound);
+            break;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 7)
+        case 7 :
+            ZSTDv07_findFrameSizeInfoLegacy(src, srcSize,
+                &frameSizeInfo.compressedSize,
+                &frameSizeInfo.decompressedBound);
+            break;
+#endif
+        default :
+            frameSizeInfo.compressedSize = ERROR(prefix_unknown);
+            frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
+            break;
+    }
+    if (!ZSTD_isError(frameSizeInfo.compressedSize) && frameSizeInfo.compressedSize > srcSize) {
+        frameSizeInfo.compressedSize = ERROR(srcSize_wrong);
+        frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
+    }
+    return frameSizeInfo;
+}
+
+MEM_STATIC size_t ZSTD_findFrameCompressedSizeLegacy(const void *src, size_t srcSize)
+{
+    ZSTD_frameSizeInfo frameSizeInfo = ZSTD_findFrameSizeInfoLegacy(src, srcSize);
+    return frameSizeInfo.compressedSize;
+}
+
+MEM_STATIC size_t ZSTD_freeLegacyStreamContext(void* legacyContext, U32 version)
+{
+    switch(version)
+    {
+        default :
+        case 1 :
+        case 2 :
+        case 3 :
+            (void)legacyContext;
+            return ERROR(version_unsupported);
+#if (ZSTD_LEGACY_SUPPORT <= 4)
+        case 4 : return ZBUFFv04_freeDCtx((ZBUFFv04_DCtx*)legacyContext);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 5)
+        case 5 : return ZBUFFv05_freeDCtx((ZBUFFv05_DCtx*)legacyContext);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 6)
+        case 6 : return ZBUFFv06_freeDCtx((ZBUFFv06_DCtx*)legacyContext);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 7)
+        case 7 : return ZBUFFv07_freeDCtx((ZBUFFv07_DCtx*)legacyContext);
+#endif
+    }
+}
+
+
+MEM_STATIC size_t ZSTD_initLegacyStream(void** legacyContext, U32 prevVersion, U32 newVersion,
+                                        const void* dict, size_t dictSize)
+{
+    DEBUGLOG(5, "ZSTD_initLegacyStream for v0.%u", newVersion);
+    if (prevVersion != newVersion) ZSTD_freeLegacyStreamContext(*legacyContext, prevVersion);
+    switch(newVersion)
+    {
+        default :
+        case 1 :
+        case 2 :
+        case 3 :
+            (void)dict; (void)dictSize;
+            return 0;
+#if (ZSTD_LEGACY_SUPPORT <= 4)
+        case 4 :
+        {
+            ZBUFFv04_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv04_createDCtx() : (ZBUFFv04_DCtx*)*legacyContext;
+            if (dctx==NULL) return ERROR(memory_allocation);
+            ZBUFFv04_decompressInit(dctx);
+            ZBUFFv04_decompressWithDictionary(dctx, dict, dictSize);
+            *legacyContext = dctx;
+            return 0;
+        }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 5)
+        case 5 :
+        {
+            ZBUFFv05_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv05_createDCtx() : (ZBUFFv05_DCtx*)*legacyContext;
+            if (dctx==NULL) return ERROR(memory_allocation);
+            ZBUFFv05_decompressInitDictionary(dctx, dict, dictSize);
+            *legacyContext = dctx;
+            return 0;
+        }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 6)
+        case 6 :
+        {
+            ZBUFFv06_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv06_createDCtx() : (ZBUFFv06_DCtx*)*legacyContext;
+            if (dctx==NULL) return ERROR(memory_allocation);
+            ZBUFFv06_decompressInitDictionary(dctx, dict, dictSize);
+            *legacyContext = dctx;
+            return 0;
+        }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 7)
+        case 7 :
+        {
+            ZBUFFv07_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv07_createDCtx() : (ZBUFFv07_DCtx*)*legacyContext;
+            if (dctx==NULL) return ERROR(memory_allocation);
+            ZBUFFv07_decompressInitDictionary(dctx, dict, dictSize);
+            *legacyContext = dctx;
+            return 0;
+        }
+#endif
+    }
+}
+
+
+
+MEM_STATIC size_t ZSTD_decompressLegacyStream(void* legacyContext, U32 version,
+                                              ZSTD_outBuffer* output, ZSTD_inBuffer* input)
+{
+    DEBUGLOG(5, "ZSTD_decompressLegacyStream for v0.%u", version);
+    switch(version)
+    {
+        default :
+        case 1 :
+        case 2 :
+        case 3 :
+            (void)legacyContext; (void)output; (void)input;
+            return ERROR(version_unsupported);
+#if (ZSTD_LEGACY_SUPPORT <= 4)
+        case 4 :
+            {
+                ZBUFFv04_DCtx* dctx = (ZBUFFv04_DCtx*) legacyContext;
+                const void* src = (const char*)input->src + input->pos;
+                size_t readSize = input->size - input->pos;
+                void* dst = (char*)output->dst + output->pos;
+                size_t decodedSize = output->size - output->pos;
+                size_t const hintSize = ZBUFFv04_decompressContinue(dctx, dst, &decodedSize, src, &readSize);
+                output->pos += decodedSize;
+                input->pos += readSize;
+                return hintSize;
+            }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 5)
+        case 5 :
+            {
+                ZBUFFv05_DCtx* dctx = (ZBUFFv05_DCtx*) legacyContext;
+                const void* src = (const char*)input->src + input->pos;
+                size_t readSize = input->size - input->pos;
+                void* dst = (char*)output->dst + output->pos;
+                size_t decodedSize = output->size - output->pos;
+                size_t const hintSize = ZBUFFv05_decompressContinue(dctx, dst, &decodedSize, src, &readSize);
+                output->pos += decodedSize;
+                input->pos += readSize;
+                return hintSize;
+            }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 6)
+        case 6 :
+            {
+                ZBUFFv06_DCtx* dctx = (ZBUFFv06_DCtx*) legacyContext;
+                const void* src = (const char*)input->src + input->pos;
+                size_t readSize = input->size - input->pos;
+                void* dst = (char*)output->dst + output->pos;
+                size_t decodedSize = output->size - output->pos;
+                size_t const hintSize = ZBUFFv06_decompressContinue(dctx, dst, &decodedSize, src, &readSize);
+                output->pos += decodedSize;
+                input->pos += readSize;
+                return hintSize;
+            }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 7)
+        case 7 :
+            {
+                ZBUFFv07_DCtx* dctx = (ZBUFFv07_DCtx*) legacyContext;
+                const void* src = (const char*)input->src + input->pos;
+                size_t readSize = input->size - input->pos;
+                void* dst = (char*)output->dst + output->pos;
+                size_t decodedSize = output->size - output->pos;
+                size_t const hintSize = ZBUFFv07_decompressContinue(dctx, dst, &decodedSize, src, &readSize);
+                output->pos += decodedSize;
+                input->pos += readSize;
+                return hintSize;
+            }
+#endif
+    }
+}
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif   /* ZSTD_LEGACY_H */
+/**** ended inlining ../legacy/zstd_legacy.h ****/
+#endif
+
+
+
+/*-*******************************************************
+*  Types
+*********************************************************/
+struct ZSTD_DDict_s {
+    void* dictBuffer;
+    const void* dictContent;
+    size_t dictSize;
+    ZSTD_entropyDTables_t entropy;
+    U32 dictID;
+    U32 entropyPresent;
+    ZSTD_customMem cMem;
+};  /* typedef'd to ZSTD_DDict within "zstd.h" */
+
+const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict)
+{
+    assert(ddict != NULL);
+    return ddict->dictContent;
+}
+
+size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict)
+{
+    assert(ddict != NULL);
+    return ddict->dictSize;
+}
+
+void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
+{
+    DEBUGLOG(4, "ZSTD_copyDDictParameters");
+    assert(dctx != NULL);
+    assert(ddict != NULL);
+    dctx->dictID = ddict->dictID;
+    dctx->prefixStart = ddict->dictContent;
+    dctx->virtualStart = ddict->dictContent;
+    dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
+    dctx->previousDstEnd = dctx->dictEnd;
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    dctx->dictContentBeginForFuzzing = dctx->prefixStart;
+    dctx->dictContentEndForFuzzing = dctx->previousDstEnd;
+#endif
+    if (ddict->entropyPresent) {
+        dctx->litEntropy = 1;
+        dctx->fseEntropy = 1;
+        dctx->LLTptr = ddict->entropy.LLTable;
+        dctx->MLTptr = ddict->entropy.MLTable;
+        dctx->OFTptr = ddict->entropy.OFTable;
+        dctx->HUFptr = ddict->entropy.hufTable;
+        dctx->entropy.rep[0] = ddict->entropy.rep[0];
+        dctx->entropy.rep[1] = ddict->entropy.rep[1];
+        dctx->entropy.rep[2] = ddict->entropy.rep[2];
+    } else {
+        dctx->litEntropy = 0;
+        dctx->fseEntropy = 0;
+    }
+}
+
+
+static size_t
+ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict,
+                           ZSTD_dictContentType_e dictContentType)
+{
+    ddict->dictID = 0;
+    ddict->entropyPresent = 0;
+    if (dictContentType == ZSTD_dct_rawContent) return 0;
+
+    if (ddict->dictSize < 8) {
+        if (dictContentType == ZSTD_dct_fullDict)
+            return ERROR(dictionary_corrupted);   /* only accept specified dictionaries */
+        return 0;   /* pure content mode */
+    }
+    {   U32 const magic = MEM_readLE32(ddict->dictContent);
+        if (magic != ZSTD_MAGIC_DICTIONARY) {
+            if (dictContentType == ZSTD_dct_fullDict)
+                return ERROR(dictionary_corrupted);   /* only accept specified dictionaries */
+            return 0;   /* pure content mode */
+        }
+    }
+    ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE);
+
+    /* load entropy tables */
+    RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy(
+            &ddict->entropy, ddict->dictContent, ddict->dictSize)),
+        dictionary_corrupted, "");
+    ddict->entropyPresent = 1;
+    return 0;
+}
+
+
+static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
+                                      const void* dict, size_t dictSize,
+                                      ZSTD_dictLoadMethod_e dictLoadMethod,
+                                      ZSTD_dictContentType_e dictContentType)
+{
+    if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) {
+        ddict->dictBuffer = NULL;
+        ddict->dictContent = dict;
+        if (!dict) dictSize = 0;
+    } else {
+        void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem);
+        ddict->dictBuffer = internalBuffer;
+        ddict->dictContent = internalBuffer;
+        if (!internalBuffer) return ERROR(memory_allocation);
+        ZSTD_memcpy(internalBuffer, dict, dictSize);
+    }
+    ddict->dictSize = dictSize;
+    ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001);  /* cover both little and big endian */
+
+    /* parse dictionary content */
+    FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , "");
+
+    return 0;
+}
+
+ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
+                                      ZSTD_dictLoadMethod_e dictLoadMethod,
+                                      ZSTD_dictContentType_e dictContentType,
+                                      ZSTD_customMem customMem)
+{
+    if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
+
+    {   ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem);
+        if (ddict == NULL) return NULL;
+        ddict->cMem = customMem;
+        {   size_t const initResult = ZSTD_initDDict_internal(ddict,
+                                            dict, dictSize,
+                                            dictLoadMethod, dictContentType);
+            if (ZSTD_isError(initResult)) {
+                ZSTD_freeDDict(ddict);
+                return NULL;
+        }   }
+        return ddict;
+    }
+}
+
+/*! ZSTD_createDDict() :
+*   Create a digested dictionary, to start decompression without startup delay.
+*   `dict` content is copied inside DDict.
+*   Consequently, `dict` can be released after `ZSTD_DDict` creation */
+ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
+{
+    ZSTD_customMem const allocator = { NULL, NULL, NULL };
+    return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator);
+}
+
+/*! ZSTD_createDDict_byReference() :
+ *  Create a digested dictionary, to start decompression without startup delay.
+ *  Dictionary content is simply referenced, it will be accessed during decompression.
+ *  Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */
+ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
+{
+    ZSTD_customMem const allocator = { NULL, NULL, NULL };
+    return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator);
+}
+
+
+const ZSTD_DDict* ZSTD_initStaticDDict(
+                                void* sBuffer, size_t sBufferSize,
+                                const void* dict, size_t dictSize,
+                                ZSTD_dictLoadMethod_e dictLoadMethod,
+                                ZSTD_dictContentType_e dictContentType)
+{
+    size_t const neededSpace = sizeof(ZSTD_DDict)
+                             + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
+    ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer;
+    assert(sBuffer != NULL);
+    assert(dict != NULL);
+    if ((size_t)sBuffer & 7) return NULL;   /* 8-aligned */
+    if (sBufferSize < neededSpace) return NULL;
+    if (dictLoadMethod == ZSTD_dlm_byCopy) {
+        ZSTD_memcpy(ddict+1, dict, dictSize);  /* local copy */
+        dict = ddict+1;
+    }
+    if (ZSTD_isError( ZSTD_initDDict_internal(ddict,
+                                              dict, dictSize,
+                                              ZSTD_dlm_byRef, dictContentType) ))
+        return NULL;
+    return ddict;
+}
+
+
+size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
+{
+    if (ddict==NULL) return 0;   /* support free on NULL */
+    {   ZSTD_customMem const cMem = ddict->cMem;
+        ZSTD_customFree(ddict->dictBuffer, cMem);
+        ZSTD_customFree(ddict, cMem);
+        return 0;
+    }
+}
+
+/*! ZSTD_estimateDDictSize() :
+ *  Estimate amount of memory that will be needed to create a dictionary for decompression.
+ *  Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */
+size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod)
+{
+    return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
+}
+
+size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
+{
+    if (ddict==NULL) return 0;   /* support sizeof on NULL */
+    return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ;
+}
+
+/*! ZSTD_getDictID_fromDDict() :
+ *  Provides the dictID of the dictionary loaded into `ddict`.
+ *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
+ *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
+unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
+{
+    if (ddict==NULL) return 0;
+    return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize);
+}
+/**** ended inlining decompress/zstd_ddict.c ****/
+/**** start inlining decompress/zstd_decompress.c ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+
+/* ***************************************************************
+*  Tuning parameters
+*****************************************************************/
+/*!
+ * HEAPMODE :
+ * Select how default decompression function ZSTD_decompress() allocates its context,
+ * on stack (0), or into heap (1, default; requires malloc()).
+ * Note that functions with explicit context such as ZSTD_decompressDCtx() are unaffected.
+ */
+#ifndef ZSTD_HEAPMODE
+#  define ZSTD_HEAPMODE 1
+#endif
+
+/*!
+*  LEGACY_SUPPORT :
+*  if set to 1+, ZSTD_decompress() can decode older formats (v0.1+)
+*/
+#ifndef ZSTD_LEGACY_SUPPORT
+#  define ZSTD_LEGACY_SUPPORT 0
+#endif
+
+/*!
+ *  MAXWINDOWSIZE_DEFAULT :
+ *  maximum window size accepted by DStream __by default__.
+ *  Frames requiring more memory will be rejected.
+ *  It's possible to set a different limit using ZSTD_DCtx_setMaxWindowSize().
+ */
+#ifndef ZSTD_MAXWINDOWSIZE_DEFAULT
+#  define ZSTD_MAXWINDOWSIZE_DEFAULT (((U32)1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) + 1)
+#endif
+
+/*!
+ *  NO_FORWARD_PROGRESS_MAX :
+ *  maximum allowed nb of calls to ZSTD_decompressStream()
+ *  without any forward progress
+ *  (defined as: no byte read from input, and no byte flushed to output)
+ *  before triggering an error.
+ */
+#ifndef ZSTD_NO_FORWARD_PROGRESS_MAX
+#  define ZSTD_NO_FORWARD_PROGRESS_MAX 16
+#endif
+
+
+/*-*******************************************************
+*  Dependencies
+*********************************************************/
+/**** skipping file: ../common/zstd_deps.h ****/
+/**** skipping file: ../common/cpu.h ****/
+/**** skipping file: ../common/mem.h ****/
+#define FSE_STATIC_LINKING_ONLY
+/**** skipping file: ../common/fse.h ****/
+#define HUF_STATIC_LINKING_ONLY
+/**** skipping file: ../common/huf.h ****/
+/**** skipping file: ../common/xxhash.h ****/
+/**** skipping file: ../common/zstd_internal.h ****/
+/**** skipping file: zstd_decompress_internal.h ****/
+/**** skipping file: zstd_ddict.h ****/
+/**** start inlining zstd_decompress_block.h ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+
+#ifndef ZSTD_DEC_BLOCK_H
+#define ZSTD_DEC_BLOCK_H
+
+/*-*******************************************************
+ *  Dependencies
+ *********************************************************/
+/**** skipping file: ../common/zstd_deps.h ****/
+/**** skipping file: ../zstd.h ****/
+/**** skipping file: ../common/zstd_internal.h ****/
+/**** skipping file: zstd_decompress_internal.h ****/
+
+
+/* ===   Prototypes   === */
+
+/* note: prototypes already published within `zstd.h` :
+ * ZSTD_decompressBlock()
+ */
+
+/* note: prototypes already published within `zstd_internal.h` :
+ * ZSTD_getcBlockSize()
+ * ZSTD_decodeSeqHeaders()
+ */
+
+
+/* ZSTD_decompressBlock_internal() :
+ * decompress block, starting at `src`,
+ * into destination buffer `dst`.
+ * @return : decompressed block size,
+ *           or an error code (which can be tested using ZSTD_isError())
+ */
+size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
+                               void* dst, size_t dstCapacity,
+                         const void* src, size_t srcSize, const int frame);
+
+/* ZSTD_buildFSETable() :
+ * generate FSE decoding table for one symbol (ll, ml or off)
+ * this function must be called with valid parameters only
+ * (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.)
+ * in which case it cannot fail.
+ * The workspace must be 4-byte aligned and at least ZSTD_BUILD_FSE_TABLE_WKSP_SIZE bytes, which is
+ * defined in zstd_decompress_internal.h.
+ * Internal use only.
+ */
+void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
+             const short* normalizedCounter, unsigned maxSymbolValue,
+             const U32* baseValue, const U32* nbAdditionalBits,
+                   unsigned tableLog, void* wksp, size_t wkspSize,
+                   int bmi2);
+
+
+#endif /* ZSTD_DEC_BLOCK_H */
+/**** ended inlining zstd_decompress_block.h ****/
+
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
+/**** skipping file: ../legacy/zstd_legacy.h ****/
+#endif
+
+
+
+/*************************************
+ * Multiple DDicts Hashset internals *
+ *************************************/
+
+#define DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT 4
+#define DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT 3   /* These two constants represent SIZE_MULT/COUNT_MULT load factor without using a float.
+                                                     * Currently, that means a 0.75 load factor.
+                                                     * So, if count * COUNT_MULT / size * SIZE_MULT != 0, then we've exceeded
+                                                     * the load factor of the ddict hash set.
+                                                     */
+
+#define DDICT_HASHSET_TABLE_BASE_SIZE 64
+#define DDICT_HASHSET_RESIZE_FACTOR 2
+
+/* Hash function to determine starting position of dict insertion within the table
+ * Returns an index between [0, hashSet->ddictPtrTableSize]
+ */
+static size_t ZSTD_DDictHashSet_getIndex(const ZSTD_DDictHashSet* hashSet, U32 dictID) {
+    const U64 hash = XXH64(&dictID, sizeof(U32), 0);
+    /* DDict ptr table size is a multiple of 2, use size - 1 as mask to get index within [0, hashSet->ddictPtrTableSize) */
+    return hash & (hashSet->ddictPtrTableSize - 1);
+}
+
+/* Adds DDict to a hashset without resizing it.
+ * If inserting a DDict with a dictID that already exists in the set, replaces the one in the set.
+ * Returns 0 if successful, or a zstd error code if something went wrong.
+ */
+static size_t ZSTD_DDictHashSet_emplaceDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict) {
+    const U32 dictID = ZSTD_getDictID_fromDDict(ddict);
+    size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID);
+    const size_t idxRangeMask = hashSet->ddictPtrTableSize - 1;
+    RETURN_ERROR_IF(hashSet->ddictPtrCount == hashSet->ddictPtrTableSize, GENERIC, "Hash set is full!");
+    DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx);
+    while (hashSet->ddictPtrTable[idx] != NULL) {
+        /* Replace existing ddict if inserting ddict with same dictID */
+        if (ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]) == dictID) {
+            DEBUGLOG(4, "DictID already exists, replacing rather than adding");
+            hashSet->ddictPtrTable[idx] = ddict;
+            return 0;
+        }
+        idx &= idxRangeMask;
+        idx++;
+    }
+    DEBUGLOG(4, "Final idx after probing for dictID %u is: %zu", dictID, idx);
+    hashSet->ddictPtrTable[idx] = ddict;
+    hashSet->ddictPtrCount++;
+    return 0;
+}
+
+/* Expands hash table by factor of DDICT_HASHSET_RESIZE_FACTOR and
+ * rehashes all values, allocates new table, frees old table.
+ * Returns 0 on success, otherwise a zstd error code.
+ */
+static size_t ZSTD_DDictHashSet_expand(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) {
+    size_t newTableSize = hashSet->ddictPtrTableSize * DDICT_HASHSET_RESIZE_FACTOR;
+    const ZSTD_DDict** newTable = (const ZSTD_DDict**)ZSTD_customCalloc(sizeof(ZSTD_DDict*) * newTableSize, customMem);
+    const ZSTD_DDict** oldTable = hashSet->ddictPtrTable;
+    size_t oldTableSize = hashSet->ddictPtrTableSize;
+    size_t i;
+
+    DEBUGLOG(4, "Expanding DDict hash table! Old size: %zu new size: %zu", oldTableSize, newTableSize);
+    RETURN_ERROR_IF(!newTable, memory_allocation, "Expanded hashset allocation failed!");
+    hashSet->ddictPtrTable = newTable;
+    hashSet->ddictPtrTableSize = newTableSize;
+    hashSet->ddictPtrCount = 0;
+    for (i = 0; i < oldTableSize; ++i) {
+        if (oldTable[i] != NULL) {
+            FORWARD_IF_ERROR(ZSTD_DDictHashSet_emplaceDDict(hashSet, oldTable[i]), "");
+        }
+    }
+    ZSTD_customFree((void*)oldTable, customMem);
+    DEBUGLOG(4, "Finished re-hash");
+    return 0;
+}
+
+/* Fetches a DDict with the given dictID
+ * Returns the ZSTD_DDict* with the requested dictID. If it doesn't exist, then returns NULL.
+ */
+static const ZSTD_DDict* ZSTD_DDictHashSet_getDDict(ZSTD_DDictHashSet* hashSet, U32 dictID) {
+    size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID);
+    const size_t idxRangeMask = hashSet->ddictPtrTableSize - 1;
+    DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx);
+    for (;;) {
+        size_t currDictID = ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]);
+        if (currDictID == dictID || currDictID == 0) {
+            /* currDictID == 0 implies a NULL ddict entry */
+            break;
+        } else {
+            idx &= idxRangeMask;    /* Goes to start of table when we reach the end */
+            idx++;
+        }
+    }
+    DEBUGLOG(4, "Final idx after probing for dictID %u is: %zu", dictID, idx);
+    return hashSet->ddictPtrTable[idx];
+}
+
+/* Allocates space for and returns a ddict hash set
+ * The hash set's ZSTD_DDict* table has all values automatically set to NULL to begin with.
+ * Returns NULL if allocation failed.
+ */
+static ZSTD_DDictHashSet* ZSTD_createDDictHashSet(ZSTD_customMem customMem) {
+    ZSTD_DDictHashSet* ret = (ZSTD_DDictHashSet*)ZSTD_customMalloc(sizeof(ZSTD_DDictHashSet), customMem);
+    DEBUGLOG(4, "Allocating new hash set");
+    ret->ddictPtrTable = (const ZSTD_DDict**)ZSTD_customCalloc(DDICT_HASHSET_TABLE_BASE_SIZE * sizeof(ZSTD_DDict*), customMem);
+    ret->ddictPtrTableSize = DDICT_HASHSET_TABLE_BASE_SIZE;
+    ret->ddictPtrCount = 0;
+    if (!ret || !ret->ddictPtrTable) {
+        return NULL;
+    }
+    return ret;
+}
+
+/* Frees the table of ZSTD_DDict* within a hashset, then frees the hashset itself.
+ * Note: The ZSTD_DDict* within the table are NOT freed.
+ */
+static void ZSTD_freeDDictHashSet(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) {
+    DEBUGLOG(4, "Freeing ddict hash set");
+    if (hashSet && hashSet->ddictPtrTable) {
+        ZSTD_customFree((void*)hashSet->ddictPtrTable, customMem);
+    }
+    if (hashSet) {
+        ZSTD_customFree(hashSet, customMem);
+    }
+}
+
+/* Public function: Adds a DDict into the ZSTD_DDictHashSet, possibly triggering a resize of the hash set.
+ * Returns 0 on success, or a ZSTD error.
+ */
+static size_t ZSTD_DDictHashSet_addDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict, ZSTD_customMem customMem) {
+    DEBUGLOG(4, "Adding dict ID: %u to hashset with - Count: %zu Tablesize: %zu", ZSTD_getDictID_fromDDict(ddict), hashSet->ddictPtrCount, hashSet->ddictPtrTableSize);
+    if (hashSet->ddictPtrCount * DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT / hashSet->ddictPtrTableSize * DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT != 0) {
+        FORWARD_IF_ERROR(ZSTD_DDictHashSet_expand(hashSet, customMem), "");
+    }
+    FORWARD_IF_ERROR(ZSTD_DDictHashSet_emplaceDDict(hashSet, ddict), "");
+    return 0;
+}
+
+/*-*************************************************************
+*   Context management
+***************************************************************/
+size_t ZSTD_sizeof_DCtx (const ZSTD_DCtx* dctx)
+{
+    if (dctx==NULL) return 0;   /* support sizeof NULL */
+    return sizeof(*dctx)
+           + ZSTD_sizeof_DDict(dctx->ddictLocal)
+           + dctx->inBuffSize + dctx->outBuffSize;
+}
+
+size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); }
+
+
+static size_t ZSTD_startingInputLength(ZSTD_format_e format)
+{
+    size_t const startingInputLength = ZSTD_FRAMEHEADERSIZE_PREFIX(format);
+    /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */
+    assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) );
+    return startingInputLength;
+}
+
+static void ZSTD_DCtx_resetParameters(ZSTD_DCtx* dctx)
+{
+    assert(dctx->streamStage == zdss_init);
+    dctx->format = ZSTD_f_zstd1;
+    dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
+    dctx->outBufferMode = ZSTD_bm_buffered;
+    dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum;
+    dctx->refMultipleDDicts = ZSTD_rmd_refSingleDDict;
+}
+
+static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
+{
+    dctx->staticSize  = 0;
+    dctx->ddict       = NULL;
+    dctx->ddictLocal  = NULL;
+    dctx->dictEnd     = NULL;
+    dctx->ddictIsCold = 0;
+    dctx->dictUses = ZSTD_dont_use;
+    dctx->inBuff      = NULL;
+    dctx->inBuffSize  = 0;
+    dctx->outBuffSize = 0;
+    dctx->streamStage = zdss_init;
+    dctx->legacyContext = NULL;
+    dctx->previousLegacyVersion = 0;
+    dctx->noForwardProgress = 0;
+    dctx->oversizedDuration = 0;
+    dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
+    dctx->ddictSet = NULL;
+    ZSTD_DCtx_resetParameters(dctx);
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    dctx->dictContentEndForFuzzing = NULL;
+#endif
+}
+
+ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize)
+{
+    ZSTD_DCtx* const dctx = (ZSTD_DCtx*) workspace;
+
+    if ((size_t)workspace & 7) return NULL;  /* 8-aligned */
+    if (workspaceSize < sizeof(ZSTD_DCtx)) return NULL;  /* minimum size */
+
+    ZSTD_initDCtx_internal(dctx);
+    dctx->staticSize = workspaceSize;
+    dctx->inBuff = (char*)(dctx+1);
+    return dctx;
+}
+
+ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem)
+{
+    if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
+
+    {   ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_customMalloc(sizeof(*dctx), customMem);
+        if (!dctx) return NULL;
+        dctx->customMem = customMem;
+        ZSTD_initDCtx_internal(dctx);
+        return dctx;
+    }
+}
+
+ZSTD_DCtx* ZSTD_createDCtx(void)
+{
+    DEBUGLOG(3, "ZSTD_createDCtx");
+    return ZSTD_createDCtx_advanced(ZSTD_defaultCMem);
+}
+
+static void ZSTD_clearDict(ZSTD_DCtx* dctx)
+{
+    ZSTD_freeDDict(dctx->ddictLocal);
+    dctx->ddictLocal = NULL;
+    dctx->ddict = NULL;
+    dctx->dictUses = ZSTD_dont_use;
+}
+
+size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
+{
+    if (dctx==NULL) return 0;   /* support free on NULL */
+    RETURN_ERROR_IF(dctx->staticSize, memory_allocation, "not compatible with static DCtx");
+    {   ZSTD_customMem const cMem = dctx->customMem;
+        ZSTD_clearDict(dctx);
+        ZSTD_customFree(dctx->inBuff, cMem);
+        dctx->inBuff = NULL;
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
+        if (dctx->legacyContext)
+            ZSTD_freeLegacyStreamContext(dctx->legacyContext, dctx->previousLegacyVersion);
+#endif
+        if (dctx->ddictSet) {
+            ZSTD_freeDDictHashSet(dctx->ddictSet, cMem);
+            dctx->ddictSet = NULL;
+        }
+        ZSTD_customFree(dctx, cMem);
+        return 0;
+    }
+}
+
+/* no longer useful */
+void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
+{
+    size_t const toCopy = (size_t)((char*)(&dstDCtx->inBuff) - (char*)dstDCtx);
+    ZSTD_memcpy(dstDCtx, srcDCtx, toCopy);  /* no need to copy workspace */
+}
+
+/* Given a dctx with a digested frame params, re-selects the correct ZSTD_DDict based on
+ * the requested dict ID from the frame. If there exists a reference to the correct ZSTD_DDict, then
+ * accordingly sets the ddict to be used to decompress the frame.
+ *
+ * If no DDict is found, then no action is taken, and the ZSTD_DCtx::ddict remains as-is.
+ *
+ * ZSTD_d_refMultipleDDicts must be enabled for this function to be called.
+ */
+static void ZSTD_DCtx_selectFrameDDict(ZSTD_DCtx* dctx) {
+    assert(dctx->refMultipleDDicts && dctx->ddictSet);
+    DEBUGLOG(4, "Adjusting DDict based on requested dict ID from frame");
+    if (dctx->ddict) {
+        const ZSTD_DDict* frameDDict = ZSTD_DDictHashSet_getDDict(dctx->ddictSet, dctx->fParams.dictID);
+        if (frameDDict) {
+            DEBUGLOG(4, "DDict found!");
+            ZSTD_clearDict(dctx);
+            dctx->dictID = dctx->fParams.dictID;
+            dctx->ddict = frameDDict;
+            dctx->dictUses = ZSTD_use_indefinitely;
+        }
+    }
+}
+
+
+/*-*************************************************************
+ *   Frame header decoding
+ ***************************************************************/
+
+/*! ZSTD_isFrame() :
+ *  Tells if the content of `buffer` starts with a valid Frame Identifier.
+ *  Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
+ *  Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled.
+ *  Note 3 : Skippable Frame Identifiers are considered valid. */
+unsigned ZSTD_isFrame(const void* buffer, size_t size)
+{
+    if (size < ZSTD_FRAMEIDSIZE) return 0;
+    {   U32 const magic = MEM_readLE32(buffer);
+        if (magic == ZSTD_MAGICNUMBER) return 1;
+        if ((magic & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) return 1;
+    }
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
+    if (ZSTD_isLegacy(buffer, size)) return 1;
+#endif
+    return 0;
+}
+
+/** ZSTD_frameHeaderSize_internal() :
+ *  srcSize must be large enough to reach header size fields.
+ *  note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless.
+ * @return : size of the Frame Header
+ *           or an error code, which can be tested with ZSTD_isError() */
+static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format)
+{
+    size_t const minInputSize = ZSTD_startingInputLength(format);
+    RETURN_ERROR_IF(srcSize < minInputSize, srcSize_wrong, "");
+
+    {   BYTE const fhd = ((const BYTE*)src)[minInputSize-1];
+        U32 const dictID= fhd & 3;
+        U32 const singleSegment = (fhd >> 5) & 1;
+        U32 const fcsId = fhd >> 6;
+        return minInputSize + !singleSegment
+             + ZSTD_did_fieldSize[dictID] + ZSTD_fcs_fieldSize[fcsId]
+             + (singleSegment && !fcsId);
+    }
+}
+
+/** ZSTD_frameHeaderSize() :
+ *  srcSize must be >= ZSTD_frameHeaderSize_prefix.
+ * @return : size of the Frame Header,
+ *           or an error code (if srcSize is too small) */
+size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize)
+{
+    return ZSTD_frameHeaderSize_internal(src, srcSize, ZSTD_f_zstd1);
+}
+
+
+/** ZSTD_getFrameHeader_advanced() :
+ *  decode Frame Header, or require larger `srcSize`.
+ *  note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless
+ * @return : 0, `zfhPtr` is correctly filled,
+ *          >0, `srcSize` is too small, value is wanted `srcSize` amount,
+ *           or an error code, which can be tested using ZSTD_isError() */
+size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format)
+{
+    const BYTE* ip = (const BYTE*)src;
+    size_t const minInputSize = ZSTD_startingInputLength(format);
+
+    ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr));   /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */
+    if (srcSize < minInputSize) return minInputSize;
+    RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter");
+
+    if ( (format != ZSTD_f_zstd1_magicless)
+      && (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) {
+        if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
+            /* skippable frame */
+            if (srcSize < ZSTD_SKIPPABLEHEADERSIZE)
+                return ZSTD_SKIPPABLEHEADERSIZE; /* magic number + frame length */
+            ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr));
+            zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_FRAMEIDSIZE);
+            zfhPtr->frameType = ZSTD_skippableFrame;
+            return 0;
+        }
+        RETURN_ERROR(prefix_unknown, "");
+    }
+
+    /* ensure there is enough `srcSize` to fully read/decode frame header */
+    {   size_t const fhsize = ZSTD_frameHeaderSize_internal(src, srcSize, format);
+        if (srcSize < fhsize) return fhsize;
+        zfhPtr->headerSize = (U32)fhsize;
+    }
+
+    {   BYTE const fhdByte = ip[minInputSize-1];
+        size_t pos = minInputSize;
+        U32 const dictIDSizeCode = fhdByte&3;
+        U32 const checksumFlag = (fhdByte>>2)&1;
+        U32 const singleSegment = (fhdByte>>5)&1;
+        U32 const fcsID = fhdByte>>6;
+        U64 windowSize = 0;
+        U32 dictID = 0;
+        U64 frameContentSize = ZSTD_CONTENTSIZE_UNKNOWN;
+        RETURN_ERROR_IF((fhdByte & 0x08) != 0, frameParameter_unsupported,
+                        "reserved bits, must be zero");
+
+        if (!singleSegment) {
+            BYTE const wlByte = ip[pos++];
+            U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN;
+            RETURN_ERROR_IF(windowLog > ZSTD_WINDOWLOG_MAX, frameParameter_windowTooLarge, "");
+            windowSize = (1ULL << windowLog);
+            windowSize += (windowSize >> 3) * (wlByte&7);
+        }
+        switch(dictIDSizeCode)
+        {
+            default: assert(0);  /* impossible */
+            case 0 : break;
+            case 1 : dictID = ip[pos]; pos++; break;
+            case 2 : dictID = MEM_readLE16(ip+pos); pos+=2; break;
+            case 3 : dictID = MEM_readLE32(ip+pos); pos+=4; break;
+        }
+        switch(fcsID)
+        {
+            default: assert(0);  /* impossible */
+            case 0 : if (singleSegment) frameContentSize = ip[pos]; break;
+            case 1 : frameContentSize = MEM_readLE16(ip+pos)+256; break;
+            case 2 : frameContentSize = MEM_readLE32(ip+pos); break;
+            case 3 : frameContentSize = MEM_readLE64(ip+pos); break;
+        }
+        if (singleSegment) windowSize = frameContentSize;
+
+        zfhPtr->frameType = ZSTD_frame;
+        zfhPtr->frameContentSize = frameContentSize;
+        zfhPtr->windowSize = windowSize;
+        zfhPtr->blockSizeMax = (unsigned) MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
+        zfhPtr->dictID = dictID;
+        zfhPtr->checksumFlag = checksumFlag;
+    }
+    return 0;
+}
+
+/** ZSTD_getFrameHeader() :
+ *  decode Frame Header, or require larger `srcSize`.
+ *  note : this function does not consume input, it only reads it.
+ * @return : 0, `zfhPtr` is correctly filled,
+ *          >0, `srcSize` is too small, value is wanted `srcSize` amount,
+ *           or an error code, which can be tested using ZSTD_isError() */
+size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize)
+{
+    return ZSTD_getFrameHeader_advanced(zfhPtr, src, srcSize, ZSTD_f_zstd1);
+}
+
+
+/** ZSTD_getFrameContentSize() :
+ *  compatible with legacy mode
+ * @return : decompressed size of the single frame pointed to be `src` if known, otherwise
+ *         - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
+ *         - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) */
+unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize)
+{
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
+    if (ZSTD_isLegacy(src, srcSize)) {
+        unsigned long long const ret = ZSTD_getDecompressedSize_legacy(src, srcSize);
+        return ret == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : ret;
+    }
+#endif
+    {   ZSTD_frameHeader zfh;
+        if (ZSTD_getFrameHeader(&zfh, src, srcSize) != 0)
+            return ZSTD_CONTENTSIZE_ERROR;
+        if (zfh.frameType == ZSTD_skippableFrame) {
+            return 0;
+        } else {
+            return zfh.frameContentSize;
+    }   }
+}
+
+static size_t readSkippableFrameSize(void const* src, size_t srcSize)
+{
+    size_t const skippableHeaderSize = ZSTD_SKIPPABLEHEADERSIZE;
+    U32 sizeU32;
+
+    RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong, "");
+
+    sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE);
+    RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32,
+                    frameParameter_unsupported, "");
+    {
+        size_t const skippableSize = skippableHeaderSize + sizeU32;
+        RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong, "");
+        return skippableSize;
+    }
+}
+
+/** ZSTD_findDecompressedSize() :
+ *  compatible with legacy mode
+ *  `srcSize` must be the exact length of some number of ZSTD compressed and/or
+ *      skippable frames
+ *  @return : decompressed size of the frames contained */
+unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
+{
+    unsigned long long totalDstSize = 0;
+
+    while (srcSize >= ZSTD_startingInputLength(ZSTD_f_zstd1)) {
+        U32 const magicNumber = MEM_readLE32(src);
+
+        if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
+            size_t const skippableSize = readSkippableFrameSize(src, srcSize);
+            if (ZSTD_isError(skippableSize)) {
+                return ZSTD_CONTENTSIZE_ERROR;
+            }
+            assert(skippableSize <= srcSize);
+
+            src = (const BYTE *)src + skippableSize;
+            srcSize -= skippableSize;
+            continue;
+        }
+
+        {   unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize);
+            if (ret >= ZSTD_CONTENTSIZE_ERROR) return ret;
+
+            /* check for overflow */
+            if (totalDstSize + ret < totalDstSize) return ZSTD_CONTENTSIZE_ERROR;
+            totalDstSize += ret;
+        }
+        {   size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize);
+            if (ZSTD_isError(frameSrcSize)) {
+                return ZSTD_CONTENTSIZE_ERROR;
+            }
+
+            src = (const BYTE *)src + frameSrcSize;
+            srcSize -= frameSrcSize;
+        }
+    }  /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */
+
+    if (srcSize) return ZSTD_CONTENTSIZE_ERROR;
+
+    return totalDstSize;
+}
+
+/** ZSTD_getDecompressedSize() :
+ *  compatible with legacy mode
+ * @return : decompressed size if known, 0 otherwise
+             note : 0 can mean any of the following :
+                   - frame content is empty
+                   - decompressed size field is not present in frame header
+                   - frame header unknown / not supported
+                   - frame header not complete (`srcSize` too small) */
+unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize)
+{
+    unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize);
+    ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_ERROR < ZSTD_CONTENTSIZE_UNKNOWN);
+    return (ret >= ZSTD_CONTENTSIZE_ERROR) ? 0 : ret;
+}
+
+
+/** ZSTD_decodeFrameHeader() :
+ * `headerSize` must be the size provided by ZSTD_frameHeaderSize().
+ * If multiple DDict references are enabled, also will choose the correct DDict to use.
+ * @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */
+static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize)
+{
+    size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format);
+    if (ZSTD_isError(result)) return result;    /* invalid header */
+    RETURN_ERROR_IF(result>0, srcSize_wrong, "headerSize too small");
+
+    /* Reference DDict requested by frame if dctx references multiple ddicts */
+    if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts && dctx->ddictSet) {
+        ZSTD_DCtx_selectFrameDDict(dctx);
+    }
+
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    /* Skip the dictID check in fuzzing mode, because it makes the search
+     * harder.
+     */
+    RETURN_ERROR_IF(dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID),
+                    dictionary_wrong, "");
+#endif
+    dctx->validateChecksum = (dctx->fParams.checksumFlag && !dctx->forceIgnoreChecksum) ? 1 : 0;
+    if (dctx->validateChecksum) XXH64_reset(&dctx->xxhState, 0);
+    dctx->processedCSize += headerSize;
+    return 0;
+}
+
+static ZSTD_frameSizeInfo ZSTD_errorFrameSizeInfo(size_t ret)
+{
+    ZSTD_frameSizeInfo frameSizeInfo;
+    frameSizeInfo.compressedSize = ret;
+    frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
+    return frameSizeInfo;
+}
+
+static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize)
+{
+    ZSTD_frameSizeInfo frameSizeInfo;
+    ZSTD_memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo));
+
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
+    if (ZSTD_isLegacy(src, srcSize))
+        return ZSTD_findFrameSizeInfoLegacy(src, srcSize);
+#endif
+
+    if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE)
+        && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
+        frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize);
+        assert(ZSTD_isError(frameSizeInfo.compressedSize) ||
+               frameSizeInfo.compressedSize <= srcSize);
+        return frameSizeInfo;
+    } else {
+        const BYTE* ip = (const BYTE*)src;
+        const BYTE* const ipstart = ip;
+        size_t remainingSize = srcSize;
+        size_t nbBlocks = 0;
+        ZSTD_frameHeader zfh;
+
+        /* Extract Frame Header */
+        {   size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize);
+            if (ZSTD_isError(ret))
+                return ZSTD_errorFrameSizeInfo(ret);
+            if (ret > 0)
+                return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong));
+        }
+
+        ip += zfh.headerSize;
+        remainingSize -= zfh.headerSize;
+
+        /* Iterate over each block */
+        while (1) {
+            blockProperties_t blockProperties;
+            size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
+            if (ZSTD_isError(cBlockSize))
+                return ZSTD_errorFrameSizeInfo(cBlockSize);
+
+            if (ZSTD_blockHeaderSize + cBlockSize > remainingSize)
+                return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong));
+
+            ip += ZSTD_blockHeaderSize + cBlockSize;
+            remainingSize -= ZSTD_blockHeaderSize + cBlockSize;
+            nbBlocks++;
+
+            if (blockProperties.lastBlock) break;
+        }
+
+        /* Final frame content checksum */
+        if (zfh.checksumFlag) {
+            if (remainingSize < 4)
+                return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong));
+            ip += 4;
+        }
+
+        frameSizeInfo.compressedSize = (size_t)(ip - ipstart);
+        frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN)
+                                        ? zfh.frameContentSize
+                                        : nbBlocks * zfh.blockSizeMax;
+        return frameSizeInfo;
+    }
+}
+
+/** ZSTD_findFrameCompressedSize() :
+ *  compatible with legacy mode
+ *  `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame
+ *  `srcSize` must be at least as large as the frame contained
+ *  @return : the compressed size of the frame starting at `src` */
+size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
+{
+    ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize);
+    return frameSizeInfo.compressedSize;
+}
+
+/** ZSTD_decompressBound() :
+ *  compatible with legacy mode
+ *  `src` must point to the start of a ZSTD frame or a skippeable frame
+ *  `srcSize` must be at least as large as the frame contained
+ *  @return : the maximum decompressed size of the compressed source
+ */
+unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize)
+{
+    unsigned long long bound = 0;
+    /* Iterate over each frame */
+    while (srcSize > 0) {
+        ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize);
+        size_t const compressedSize = frameSizeInfo.compressedSize;
+        unsigned long long const decompressedBound = frameSizeInfo.decompressedBound;
+        if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR)
+            return ZSTD_CONTENTSIZE_ERROR;
+        assert(srcSize >= compressedSize);
+        src = (const BYTE*)src + compressedSize;
+        srcSize -= compressedSize;
+        bound += decompressedBound;
+    }
+    return bound;
+}
+
+
+/*-*************************************************************
+ *   Frame decoding
+ ***************************************************************/
+
+/** ZSTD_insertBlock() :
+ *  insert `src` block into `dctx` history. Useful to track uncompressed blocks. */
+size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize)
+{
+    DEBUGLOG(5, "ZSTD_insertBlock: %u bytes", (unsigned)blockSize);
+    ZSTD_checkContinuity(dctx, blockStart, blockSize);
+    dctx->previousDstEnd = (const char*)blockStart + blockSize;
+    return blockSize;
+}
+
+
+static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity,
+                          const void* src, size_t srcSize)
+{
+    DEBUGLOG(5, "ZSTD_copyRawBlock");
+    RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall, "");
+    if (dst == NULL) {
+        if (srcSize == 0) return 0;
+        RETURN_ERROR(dstBuffer_null, "");
+    }
+    ZSTD_memcpy(dst, src, srcSize);
+    return srcSize;
+}
+
+static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity,
+                               BYTE b,
+                               size_t regenSize)
+{
+    RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall, "");
+    if (dst == NULL) {
+        if (regenSize == 0) return 0;
+        RETURN_ERROR(dstBuffer_null, "");
+    }
+    ZSTD_memset(dst, b, regenSize);
+    return regenSize;
+}
+
+static void ZSTD_DCtx_trace_end(ZSTD_DCtx const* dctx, U64 uncompressedSize, U64 compressedSize, unsigned streaming)
+{
+#if ZSTD_TRACE
+    if (dctx->traceCtx && ZSTD_trace_decompress_end != NULL) {
+        ZSTD_Trace trace;
+        ZSTD_memset(&trace, 0, sizeof(trace));
+        trace.version = ZSTD_VERSION_NUMBER;
+        trace.streaming = streaming;
+        if (dctx->ddict) {
+            trace.dictionaryID = ZSTD_getDictID_fromDDict(dctx->ddict);
+            trace.dictionarySize = ZSTD_DDict_dictSize(dctx->ddict);
+            trace.dictionaryIsCold = dctx->ddictIsCold;
+        }
+        trace.uncompressedSize = (size_t)uncompressedSize;
+        trace.compressedSize = (size_t)compressedSize;
+        trace.dctx = dctx;
+        ZSTD_trace_decompress_end(dctx->traceCtx, &trace);
+    }
+#else
+    (void)dctx;
+    (void)uncompressedSize;
+    (void)compressedSize;
+    (void)streaming;
+#endif
+}
+
+
+/*! ZSTD_decompressFrame() :
+ * @dctx must be properly initialized
+ *  will update *srcPtr and *srcSizePtr,
+ *  to make *srcPtr progress by one frame. */
+static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
+                                   void* dst, size_t dstCapacity,
+                             const void** srcPtr, size_t *srcSizePtr)
+{
+    const BYTE* const istart = (const BYTE*)(*srcPtr);
+    const BYTE* ip = istart;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = dstCapacity != 0 ? ostart + dstCapacity : ostart;
+    BYTE* op = ostart;
+    size_t remainingSrcSize = *srcSizePtr;
+
+    DEBUGLOG(4, "ZSTD_decompressFrame (srcSize:%i)", (int)*srcSizePtr);
+
+    /* check */
+    RETURN_ERROR_IF(
+        remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN(dctx->format)+ZSTD_blockHeaderSize,
+        srcSize_wrong, "");
+
+    /* Frame Header */
+    {   size_t const frameHeaderSize = ZSTD_frameHeaderSize_internal(
+                ip, ZSTD_FRAMEHEADERSIZE_PREFIX(dctx->format), dctx->format);
+        if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
+        RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize,
+                        srcSize_wrong, "");
+        FORWARD_IF_ERROR( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) , "");
+        ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize;
+    }
+
+    /* Loop on each block */
+    while (1) {
+        size_t decodedSize;
+        blockProperties_t blockProperties;
+        size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSrcSize, &blockProperties);
+        if (ZSTD_isError(cBlockSize)) return cBlockSize;
+
+        ip += ZSTD_blockHeaderSize;
+        remainingSrcSize -= ZSTD_blockHeaderSize;
+        RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong, "");
+
+        switch(blockProperties.blockType)
+        {
+        case bt_compressed:
+            decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oend-op), ip, cBlockSize, /* frame */ 1);
+            break;
+        case bt_raw :
+            decodedSize = ZSTD_copyRawBlock(op, (size_t)(oend-op), ip, cBlockSize);
+            break;
+        case bt_rle :
+            decodedSize = ZSTD_setRleBlock(op, (size_t)(oend-op), *ip, blockProperties.origSize);
+            break;
+        case bt_reserved :
+        default:
+            RETURN_ERROR(corruption_detected, "invalid block type");
+        }
+
+        if (ZSTD_isError(decodedSize)) return decodedSize;
+        if (dctx->validateChecksum)
+            XXH64_update(&dctx->xxhState, op, decodedSize);
+        if (decodedSize != 0)
+            op += decodedSize;
+        assert(ip != NULL);
+        ip += cBlockSize;
+        remainingSrcSize -= cBlockSize;
+        if (blockProperties.lastBlock) break;
+    }
+
+    if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) {
+        RETURN_ERROR_IF((U64)(op-ostart) != dctx->fParams.frameContentSize,
+                        corruption_detected, "");
+    }
+    if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */
+        RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong, "");
+        if (!dctx->forceIgnoreChecksum) {
+            U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState);
+            U32 checkRead;
+            checkRead = MEM_readLE32(ip);
+            RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong, "");
+        }
+        ip += 4;
+        remainingSrcSize -= 4;
+    }
+    ZSTD_DCtx_trace_end(dctx, (U64)(op-ostart), (U64)(ip-istart), /* streaming */ 0);
+    /* Allow caller to get size read */
+    *srcPtr = ip;
+    *srcSizePtr = remainingSrcSize;
+    return (size_t)(op-ostart);
+}
+
+static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
+                                        void* dst, size_t dstCapacity,
+                                  const void* src, size_t srcSize,
+                                  const void* dict, size_t dictSize,
+                                  const ZSTD_DDict* ddict)
+{
+    void* const dststart = dst;
+    int moreThan1Frame = 0;
+
+    DEBUGLOG(5, "ZSTD_decompressMultiFrame");
+    assert(dict==NULL || ddict==NULL);  /* either dict or ddict set, not both */
+
+    if (ddict) {
+        dict = ZSTD_DDict_dictContent(ddict);
+        dictSize = ZSTD_DDict_dictSize(ddict);
+    }
+
+    while (srcSize >= ZSTD_startingInputLength(dctx->format)) {
+
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
+        if (ZSTD_isLegacy(src, srcSize)) {
+            size_t decodedSize;
+            size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize);
+            if (ZSTD_isError(frameSize)) return frameSize;
+            RETURN_ERROR_IF(dctx->staticSize, memory_allocation,
+                "legacy support is not compatible with static dctx");
+
+            decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize);
+            if (ZSTD_isError(decodedSize)) return decodedSize;
+
+            assert(decodedSize <= dstCapacity);
+            dst = (BYTE*)dst + decodedSize;
+            dstCapacity -= decodedSize;
+
+            src = (const BYTE*)src + frameSize;
+            srcSize -= frameSize;
+
+            continue;
+        }
+#endif
+
+        {   U32 const magicNumber = MEM_readLE32(src);
+            DEBUGLOG(4, "reading magic number %08X (expecting %08X)",
+                        (unsigned)magicNumber, ZSTD_MAGICNUMBER);
+            if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
+                size_t const skippableSize = readSkippableFrameSize(src, srcSize);
+                FORWARD_IF_ERROR(skippableSize, "readSkippableFrameSize failed");
+                assert(skippableSize <= srcSize);
+
+                src = (const BYTE *)src + skippableSize;
+                srcSize -= skippableSize;
+                continue;
+        }   }
+
+        if (ddict) {
+            /* we were called from ZSTD_decompress_usingDDict */
+            FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(dctx, ddict), "");
+        } else {
+            /* this will initialize correctly with no dict if dict == NULL, so
+             * use this in all cases but ddict */
+            FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize), "");
+        }
+        ZSTD_checkContinuity(dctx, dst, dstCapacity);
+
+        {   const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity,
+                                                    &src, &srcSize);
+            RETURN_ERROR_IF(
+                (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown)
+             && (moreThan1Frame==1),
+                srcSize_wrong,
+                "At least one frame successfully completed, "
+                "but following bytes are garbage: "
+                "it's more likely to be a srcSize error, "
+                "specifying more input bytes than size of frame(s). "
+                "Note: one could be unlucky, it might be a corruption error instead, "
+                "happening right at the place where we expect zstd magic bytes. "
+                "But this is _much_ less likely than a srcSize field error.");
+            if (ZSTD_isError(res)) return res;
+            assert(res <= dstCapacity);
+            if (res != 0)
+                dst = (BYTE*)dst + res;
+            dstCapacity -= res;
+        }
+        moreThan1Frame = 1;
+    }  /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */
+
+    RETURN_ERROR_IF(srcSize, srcSize_wrong, "input not entirely consumed");
+
+    return (size_t)((BYTE*)dst - (BYTE*)dststart);
+}
+
+size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
+                                 void* dst, size_t dstCapacity,
+                           const void* src, size_t srcSize,
+                           const void* dict, size_t dictSize)
+{
+    return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, dict, dictSize, NULL);
+}
+
+
+static ZSTD_DDict const* ZSTD_getDDict(ZSTD_DCtx* dctx)
+{
+    switch (dctx->dictUses) {
+    default:
+        assert(0 /* Impossible */);
+        /* fall-through */
+    case ZSTD_dont_use:
+        ZSTD_clearDict(dctx);
+        return NULL;
+    case ZSTD_use_indefinitely:
+        return dctx->ddict;
+    case ZSTD_use_once:
+        dctx->dictUses = ZSTD_dont_use;
+        return dctx->ddict;
+    }
+}
+
+size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    return ZSTD_decompress_usingDDict(dctx, dst, dstCapacity, src, srcSize, ZSTD_getDDict(dctx));
+}
+
+
+size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1)
+    size_t regenSize;
+    ZSTD_DCtx* const dctx = ZSTD_createDCtx();
+    RETURN_ERROR_IF(dctx==NULL, memory_allocation, "NULL pointer!");
+    regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize);
+    ZSTD_freeDCtx(dctx);
+    return regenSize;
+#else   /* stack mode */
+    ZSTD_DCtx dctx;
+    ZSTD_initDCtx_internal(&dctx);
+    return ZSTD_decompressDCtx(&dctx, dst, dstCapacity, src, srcSize);
+#endif
+}
+
+
+/*-**************************************
+*   Advanced Streaming Decompression API
+*   Bufferless and synchronous
+****************************************/
+size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; }
+
+/**
+ * Similar to ZSTD_nextSrcSizeToDecompress(), but when when a block input can be streamed,
+ * we allow taking a partial block as the input. Currently only raw uncompressed blocks can
+ * be streamed.
+ *
+ * For blocks that can be streamed, this allows us to reduce the latency until we produce
+ * output, and avoid copying the input.
+ *
+ * @param inputSize - The total amount of input that the caller currently has.
+ */
+static size_t ZSTD_nextSrcSizeToDecompressWithInputSize(ZSTD_DCtx* dctx, size_t inputSize) {
+    if (!(dctx->stage == ZSTDds_decompressBlock || dctx->stage == ZSTDds_decompressLastBlock))
+        return dctx->expected;
+    if (dctx->bType != bt_raw)
+        return dctx->expected;
+    return MIN(MAX(inputSize, 1), dctx->expected);
+}
+
+ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) {
+    switch(dctx->stage)
+    {
+    default:   /* should not happen */
+        assert(0);
+    case ZSTDds_getFrameHeaderSize:
+    case ZSTDds_decodeFrameHeader:
+        return ZSTDnit_frameHeader;
+    case ZSTDds_decodeBlockHeader:
+        return ZSTDnit_blockHeader;
+    case ZSTDds_decompressBlock:
+        return ZSTDnit_block;
+    case ZSTDds_decompressLastBlock:
+        return ZSTDnit_lastBlock;
+    case ZSTDds_checkChecksum:
+        return ZSTDnit_checksum;
+    case ZSTDds_decodeSkippableHeader:
+    case ZSTDds_skipFrame:
+        return ZSTDnit_skippableFrame;
+    }
+}
+
+static int ZSTD_isSkipFrame(ZSTD_DCtx* dctx) { return dctx->stage == ZSTDds_skipFrame; }
+
+/** ZSTD_decompressContinue() :
+ *  srcSize : must be the exact nb of bytes expected (see ZSTD_nextSrcSizeToDecompress())
+ *  @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity)
+ *            or an error code, which can be tested using ZSTD_isError() */
+size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (unsigned)srcSize);
+    /* Sanity check */
+    RETURN_ERROR_IF(srcSize != ZSTD_nextSrcSizeToDecompressWithInputSize(dctx, srcSize), srcSize_wrong, "not allowed");
+    ZSTD_checkContinuity(dctx, dst, dstCapacity);
+
+    dctx->processedCSize += srcSize;
+
+    switch (dctx->stage)
+    {
+    case ZSTDds_getFrameHeaderSize :
+        assert(src != NULL);
+        if (dctx->format == ZSTD_f_zstd1) {  /* allows header */
+            assert(srcSize >= ZSTD_FRAMEIDSIZE);  /* to read skippable magic number */
+            if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {        /* skippable frame */
+                ZSTD_memcpy(dctx->headerBuffer, src, srcSize);
+                dctx->expected = ZSTD_SKIPPABLEHEADERSIZE - srcSize;  /* remaining to load to get full skippable frame header */
+                dctx->stage = ZSTDds_decodeSkippableHeader;
+                return 0;
+        }   }
+        dctx->headerSize = ZSTD_frameHeaderSize_internal(src, srcSize, dctx->format);
+        if (ZSTD_isError(dctx->headerSize)) return dctx->headerSize;
+        ZSTD_memcpy(dctx->headerBuffer, src, srcSize);
+        dctx->expected = dctx->headerSize - srcSize;
+        dctx->stage = ZSTDds_decodeFrameHeader;
+        return 0;
+
+    case ZSTDds_decodeFrameHeader:
+        assert(src != NULL);
+        ZSTD_memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize);
+        FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize), "");
+        dctx->expected = ZSTD_blockHeaderSize;
+        dctx->stage = ZSTDds_decodeBlockHeader;
+        return 0;
+
+    case ZSTDds_decodeBlockHeader:
+        {   blockProperties_t bp;
+            size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
+            if (ZSTD_isError(cBlockSize)) return cBlockSize;
+            RETURN_ERROR_IF(cBlockSize > dctx->fParams.blockSizeMax, corruption_detected, "Block Size Exceeds Maximum");
+            dctx->expected = cBlockSize;
+            dctx->bType = bp.blockType;
+            dctx->rleSize = bp.origSize;
+            if (cBlockSize) {
+                dctx->stage = bp.lastBlock ? ZSTDds_decompressLastBlock : ZSTDds_decompressBlock;
+                return 0;
+            }
+            /* empty block */
+            if (bp.lastBlock) {
+                if (dctx->fParams.checksumFlag) {
+                    dctx->expected = 4;
+                    dctx->stage = ZSTDds_checkChecksum;
+                } else {
+                    dctx->expected = 0; /* end of frame */
+                    dctx->stage = ZSTDds_getFrameHeaderSize;
+                }
+            } else {
+                dctx->expected = ZSTD_blockHeaderSize;  /* jump to next header */
+                dctx->stage = ZSTDds_decodeBlockHeader;
+            }
+            return 0;
+        }
+
+    case ZSTDds_decompressLastBlock:
+    case ZSTDds_decompressBlock:
+        DEBUGLOG(5, "ZSTD_decompressContinue: case ZSTDds_decompressBlock");
+        {   size_t rSize;
+            switch(dctx->bType)
+            {
+            case bt_compressed:
+                DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed");
+                rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1);
+                dctx->expected = 0;  /* Streaming not supported */
+                break;
+            case bt_raw :
+                assert(srcSize <= dctx->expected);
+                rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize);
+                FORWARD_IF_ERROR(rSize, "ZSTD_copyRawBlock failed");
+                assert(rSize == srcSize);
+                dctx->expected -= rSize;
+                break;
+            case bt_rle :
+                rSize = ZSTD_setRleBlock(dst, dstCapacity, *(const BYTE*)src, dctx->rleSize);
+                dctx->expected = 0;  /* Streaming not supported */
+                break;
+            case bt_reserved :   /* should never happen */
+            default:
+                RETURN_ERROR(corruption_detected, "invalid block type");
+            }
+            FORWARD_IF_ERROR(rSize, "");
+            RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected, "Decompressed Block Size Exceeds Maximum");
+            DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize);
+            dctx->decodedSize += rSize;
+            if (dctx->validateChecksum) XXH64_update(&dctx->xxhState, dst, rSize);
+            dctx->previousDstEnd = (char*)dst + rSize;
+
+            /* Stay on the same stage until we are finished streaming the block. */
+            if (dctx->expected > 0) {
+                return rSize;
+            }
+
+            if (dctx->stage == ZSTDds_decompressLastBlock) {   /* end of frame */
+                DEBUGLOG(4, "ZSTD_decompressContinue: decoded size from frame : %u", (unsigned)dctx->decodedSize);
+                RETURN_ERROR_IF(
+                    dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
+                 && dctx->decodedSize != dctx->fParams.frameContentSize,
+                    corruption_detected, "");
+                if (dctx->fParams.checksumFlag) {  /* another round for frame checksum */
+                    dctx->expected = 4;
+                    dctx->stage = ZSTDds_checkChecksum;
+                } else {
+                    ZSTD_DCtx_trace_end(dctx, dctx->decodedSize, dctx->processedCSize, /* streaming */ 1);
+                    dctx->expected = 0;   /* ends here */
+                    dctx->stage = ZSTDds_getFrameHeaderSize;
+                }
+            } else {
+                dctx->stage = ZSTDds_decodeBlockHeader;
+                dctx->expected = ZSTD_blockHeaderSize;
+            }
+            return rSize;
+        }
+
+    case ZSTDds_checkChecksum:
+        assert(srcSize == 4);  /* guaranteed by dctx->expected */
+        {
+            if (dctx->validateChecksum) {
+                U32 const h32 = (U32)XXH64_digest(&dctx->xxhState);
+                U32 const check32 = MEM_readLE32(src);
+                DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32);
+                RETURN_ERROR_IF(check32 != h32, checksum_wrong, "");
+            }
+            ZSTD_DCtx_trace_end(dctx, dctx->decodedSize, dctx->processedCSize, /* streaming */ 1);
+            dctx->expected = 0;
+            dctx->stage = ZSTDds_getFrameHeaderSize;
+            return 0;
+        }
+
+    case ZSTDds_decodeSkippableHeader:
+        assert(src != NULL);
+        assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE);
+        ZSTD_memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize);   /* complete skippable header */
+        dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE);   /* note : dctx->expected can grow seriously large, beyond local buffer size */
+        dctx->stage = ZSTDds_skipFrame;
+        return 0;
+
+    case ZSTDds_skipFrame:
+        dctx->expected = 0;
+        dctx->stage = ZSTDds_getFrameHeaderSize;
+        return 0;
+
+    default:
+        assert(0);   /* impossible */
+        RETURN_ERROR(GENERIC, "impossible to reach");   /* some compiler require default to do something */
+    }
+}
+
+
+static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    dctx->dictEnd = dctx->previousDstEnd;
+    dctx->virtualStart = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
+    dctx->prefixStart = dict;
+    dctx->previousDstEnd = (const char*)dict + dictSize;
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    dctx->dictContentBeginForFuzzing = dctx->prefixStart;
+    dctx->dictContentEndForFuzzing = dctx->previousDstEnd;
+#endif
+    return 0;
+}
+
+/*! ZSTD_loadDEntropy() :
+ *  dict : must point at beginning of a valid zstd dictionary.
+ * @return : size of entropy tables read */
+size_t
+ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
+                  const void* const dict, size_t const dictSize)
+{
+    const BYTE* dictPtr = (const BYTE*)dict;
+    const BYTE* const dictEnd = dictPtr + dictSize;
+
+    RETURN_ERROR_IF(dictSize <= 8, dictionary_corrupted, "dict is too small");
+    assert(MEM_readLE32(dict) == ZSTD_MAGIC_DICTIONARY);   /* dict must be valid */
+    dictPtr += 8;   /* skip header = magic + dictID */
+
+    ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, OFTable) == offsetof(ZSTD_entropyDTables_t, LLTable) + sizeof(entropy->LLTable));
+    ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, MLTable) == offsetof(ZSTD_entropyDTables_t, OFTable) + sizeof(entropy->OFTable));
+    ZSTD_STATIC_ASSERT(sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable) >= HUF_DECOMPRESS_WORKSPACE_SIZE);
+    {   void* const workspace = &entropy->LLTable;   /* use fse tables as temporary workspace; implies fse tables are grouped together */
+        size_t const workspaceSize = sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable);
+#ifdef HUF_FORCE_DECOMPRESS_X1
+        /* in minimal huffman, we always use X1 variants */
+        size_t const hSize = HUF_readDTableX1_wksp(entropy->hufTable,
+                                                dictPtr, dictEnd - dictPtr,
+                                                workspace, workspaceSize);
+#else
+        size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable,
+                                                dictPtr, (size_t)(dictEnd - dictPtr),
+                                                workspace, workspaceSize);
+#endif
+        RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted, "");
+        dictPtr += hSize;
+    }
+
+    {   short offcodeNCount[MaxOff+1];
+        unsigned offcodeMaxValue = MaxOff, offcodeLog;
+        size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, (size_t)(dictEnd-dictPtr));
+        RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, "");
+        RETURN_ERROR_IF(offcodeMaxValue > MaxOff, dictionary_corrupted, "");
+        RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, "");
+        ZSTD_buildFSETable( entropy->OFTable,
+                            offcodeNCount, offcodeMaxValue,
+                            OF_base, OF_bits,
+                            offcodeLog,
+                            entropy->workspace, sizeof(entropy->workspace),
+                            /* bmi2 */0);
+        dictPtr += offcodeHeaderSize;
+    }
+
+    {   short matchlengthNCount[MaxML+1];
+        unsigned matchlengthMaxValue = MaxML, matchlengthLog;
+        size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, (size_t)(dictEnd-dictPtr));
+        RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, "");
+        RETURN_ERROR_IF(matchlengthMaxValue > MaxML, dictionary_corrupted, "");
+        RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, "");
+        ZSTD_buildFSETable( entropy->MLTable,
+                            matchlengthNCount, matchlengthMaxValue,
+                            ML_base, ML_bits,
+                            matchlengthLog,
+                            entropy->workspace, sizeof(entropy->workspace),
+                            /* bmi2 */ 0);
+        dictPtr += matchlengthHeaderSize;
+    }
+
+    {   short litlengthNCount[MaxLL+1];
+        unsigned litlengthMaxValue = MaxLL, litlengthLog;
+        size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, (size_t)(dictEnd-dictPtr));
+        RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, "");
+        RETURN_ERROR_IF(litlengthMaxValue > MaxLL, dictionary_corrupted, "");
+        RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, "");
+        ZSTD_buildFSETable( entropy->LLTable,
+                            litlengthNCount, litlengthMaxValue,
+                            LL_base, LL_bits,
+                            litlengthLog,
+                            entropy->workspace, sizeof(entropy->workspace),
+                            /* bmi2 */ 0);
+        dictPtr += litlengthHeaderSize;
+    }
+
+    RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, "");
+    {   int i;
+        size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12));
+        for (i=0; i<3; i++) {
+            U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4;
+            RETURN_ERROR_IF(rep==0 || rep > dictContentSize,
+                            dictionary_corrupted, "");
+            entropy->rep[i] = rep;
+    }   }
+
+    return (size_t)(dictPtr - (const BYTE*)dict);
+}
+
+static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    if (dictSize < 8) return ZSTD_refDictContent(dctx, dict, dictSize);
+    {   U32 const magic = MEM_readLE32(dict);
+        if (magic != ZSTD_MAGIC_DICTIONARY) {
+            return ZSTD_refDictContent(dctx, dict, dictSize);   /* pure content mode */
+    }   }
+    dctx->dictID = MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE);
+
+    /* load entropy tables */
+    {   size_t const eSize = ZSTD_loadDEntropy(&dctx->entropy, dict, dictSize);
+        RETURN_ERROR_IF(ZSTD_isError(eSize), dictionary_corrupted, "");
+        dict = (const char*)dict + eSize;
+        dictSize -= eSize;
+    }
+    dctx->litEntropy = dctx->fseEntropy = 1;
+
+    /* reference dictionary content */
+    return ZSTD_refDictContent(dctx, dict, dictSize);
+}
+
+size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
+{
+    assert(dctx != NULL);
+#if ZSTD_TRACE
+    dctx->traceCtx = (ZSTD_trace_decompress_begin != NULL) ? ZSTD_trace_decompress_begin(dctx) : 0;
+#endif
+    dctx->expected = ZSTD_startingInputLength(dctx->format);  /* dctx->format must be properly set */
+    dctx->stage = ZSTDds_getFrameHeaderSize;
+    dctx->processedCSize = 0;
+    dctx->decodedSize = 0;
+    dctx->previousDstEnd = NULL;
+    dctx->prefixStart = NULL;
+    dctx->virtualStart = NULL;
+    dctx->dictEnd = NULL;
+    dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001);  /* cover both little and big endian */
+    dctx->litEntropy = dctx->fseEntropy = 0;
+    dctx->dictID = 0;
+    dctx->bType = bt_reserved;
+    ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue));
+    ZSTD_memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue));  /* initial repcodes */
+    dctx->LLTptr = dctx->entropy.LLTable;
+    dctx->MLTptr = dctx->entropy.MLTable;
+    dctx->OFTptr = dctx->entropy.OFTable;
+    dctx->HUFptr = dctx->entropy.hufTable;
+    return 0;
+}
+
+size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , "");
+    if (dict && dictSize)
+        RETURN_ERROR_IF(
+            ZSTD_isError(ZSTD_decompress_insertDictionary(dctx, dict, dictSize)),
+            dictionary_corrupted, "");
+    return 0;
+}
+
+
+/* ======   ZSTD_DDict   ====== */
+
+size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
+{
+    DEBUGLOG(4, "ZSTD_decompressBegin_usingDDict");
+    assert(dctx != NULL);
+    if (ddict) {
+        const char* const dictStart = (const char*)ZSTD_DDict_dictContent(ddict);
+        size_t const dictSize = ZSTD_DDict_dictSize(ddict);
+        const void* const dictEnd = dictStart + dictSize;
+        dctx->ddictIsCold = (dctx->dictEnd != dictEnd);
+        DEBUGLOG(4, "DDict is %s",
+                    dctx->ddictIsCold ? "~cold~" : "hot!");
+    }
+    FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , "");
+    if (ddict) {   /* NULL ddict is equivalent to no dictionary */
+        ZSTD_copyDDictParameters(dctx, ddict);
+    }
+    return 0;
+}
+
+/*! ZSTD_getDictID_fromDict() :
+ *  Provides the dictID stored within dictionary.
+ *  if @return == 0, the dictionary is not conformant with Zstandard specification.
+ *  It can still be loaded, but as a content-only dictionary. */
+unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize)
+{
+    if (dictSize < 8) return 0;
+    if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) return 0;
+    return MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE);
+}
+
+/*! ZSTD_getDictID_fromFrame() :
+ *  Provides the dictID required to decompress frame stored within `src`.
+ *  If @return == 0, the dictID could not be decoded.
+ *  This could for one of the following reasons :
+ *  - The frame does not require a dictionary (most common case).
+ *  - The frame was built with dictID intentionally removed.
+ *    Needed dictionary is a hidden information.
+ *    Note : this use case also happens when using a non-conformant dictionary.
+ *  - `srcSize` is too small, and as a result, frame header could not be decoded.
+ *    Note : possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`.
+ *  - This is not a Zstandard frame.
+ *  When identifying the exact failure cause, it's possible to use
+ *  ZSTD_getFrameHeader(), which will provide a more precise error code. */
+unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize)
+{
+    ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0 };
+    size_t const hError = ZSTD_getFrameHeader(&zfp, src, srcSize);
+    if (ZSTD_isError(hError)) return 0;
+    return zfp.dictID;
+}
+
+
+/*! ZSTD_decompress_usingDDict() :
+*   Decompression using a pre-digested Dictionary
+*   Use dictionary without significant overhead. */
+size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
+                                  void* dst, size_t dstCapacity,
+                            const void* src, size_t srcSize,
+                            const ZSTD_DDict* ddict)
+{
+    /* pass content and size in case legacy frames are encountered */
+    return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize,
+                                     NULL, 0,
+                                     ddict);
+}
+
+
+/*=====================================
+*   Streaming decompression
+*====================================*/
+
+ZSTD_DStream* ZSTD_createDStream(void)
+{
+    DEBUGLOG(3, "ZSTD_createDStream");
+    return ZSTD_createDStream_advanced(ZSTD_defaultCMem);
+}
+
+ZSTD_DStream* ZSTD_initStaticDStream(void *workspace, size_t workspaceSize)
+{
+    return ZSTD_initStaticDCtx(workspace, workspaceSize);
+}
+
+ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem)
+{
+    return ZSTD_createDCtx_advanced(customMem);
+}
+
+size_t ZSTD_freeDStream(ZSTD_DStream* zds)
+{
+    return ZSTD_freeDCtx(zds);
+}
+
+
+/* ***  Initialization  *** */
+
+size_t ZSTD_DStreamInSize(void)  { return ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize; }
+size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_MAX; }
+
+size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx,
+                                   const void* dict, size_t dictSize,
+                                         ZSTD_dictLoadMethod_e dictLoadMethod,
+                                         ZSTD_dictContentType_e dictContentType)
+{
+    RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
+    ZSTD_clearDict(dctx);
+    if (dict && dictSize != 0) {
+        dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem);
+        RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation, "NULL pointer!");
+        dctx->ddict = dctx->ddictLocal;
+        dctx->dictUses = ZSTD_use_indefinitely;
+    }
+    return 0;
+}
+
+size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto);
+}
+
+size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto);
+}
+
+size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType)
+{
+    FORWARD_IF_ERROR(ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType), "");
+    dctx->dictUses = ZSTD_use_once;
+    return 0;
+}
+
+size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize)
+{
+    return ZSTD_DCtx_refPrefix_advanced(dctx, prefix, prefixSize, ZSTD_dct_rawContent);
+}
+
+
+/* ZSTD_initDStream_usingDict() :
+ * return : expected size, aka ZSTD_startingInputLength().
+ * this function cannot fail */
+size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize)
+{
+    DEBUGLOG(4, "ZSTD_initDStream_usingDict");
+    FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) , "");
+    return ZSTD_startingInputLength(zds->format);
+}
+
+/* note : this variant can't fail */
+size_t ZSTD_initDStream(ZSTD_DStream* zds)
+{
+    DEBUGLOG(4, "ZSTD_initDStream");
+    return ZSTD_initDStream_usingDDict(zds, NULL);
+}
+
+/* ZSTD_initDStream_usingDDict() :
+ * ddict will just be referenced, and must outlive decompression session
+ * this function cannot fail */
+size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict)
+{
+    FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) , "");
+    return ZSTD_startingInputLength(dctx->format);
+}
+
+/* ZSTD_resetDStream() :
+ * return : expected size, aka ZSTD_startingInputLength().
+ * this function cannot fail */
+size_t ZSTD_resetDStream(ZSTD_DStream* dctx)
+{
+    FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only), "");
+    return ZSTD_startingInputLength(dctx->format);
+}
+
+
+size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
+{
+    RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
+    ZSTD_clearDict(dctx);
+    if (ddict) {
+        dctx->ddict = ddict;
+        dctx->dictUses = ZSTD_use_indefinitely;
+        if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts) {
+            if (dctx->ddictSet == NULL) {
+                dctx->ddictSet = ZSTD_createDDictHashSet(dctx->customMem);
+                if (!dctx->ddictSet) {
+                    RETURN_ERROR(memory_allocation, "Failed to allocate memory for hash set!");
+                }
+            }
+            assert(!dctx->staticSize);  /* Impossible: ddictSet cannot have been allocated if static dctx */
+            FORWARD_IF_ERROR(ZSTD_DDictHashSet_addDDict(dctx->ddictSet, ddict, dctx->customMem), "");
+        }
+    }
+    return 0;
+}
+
+/* ZSTD_DCtx_setMaxWindowSize() :
+ * note : no direct equivalence in ZSTD_DCtx_setParameter,
+ * since this version sets windowSize, and the other sets windowLog */
+size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize)
+{
+    ZSTD_bounds const bounds = ZSTD_dParam_getBounds(ZSTD_d_windowLogMax);
+    size_t const min = (size_t)1 << bounds.lowerBound;
+    size_t const max = (size_t)1 << bounds.upperBound;
+    RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
+    RETURN_ERROR_IF(maxWindowSize < min, parameter_outOfBound, "");
+    RETURN_ERROR_IF(maxWindowSize > max, parameter_outOfBound, "");
+    dctx->maxWindowSize = maxWindowSize;
+    return 0;
+}
+
+size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format)
+{
+    return ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, (int)format);
+}
+
+ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam)
+{
+    ZSTD_bounds bounds = { 0, 0, 0 };
+    switch(dParam) {
+        case ZSTD_d_windowLogMax:
+            bounds.lowerBound = ZSTD_WINDOWLOG_ABSOLUTEMIN;
+            bounds.upperBound = ZSTD_WINDOWLOG_MAX;
+            return bounds;
+        case ZSTD_d_format:
+            bounds.lowerBound = (int)ZSTD_f_zstd1;
+            bounds.upperBound = (int)ZSTD_f_zstd1_magicless;
+            ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless);
+            return bounds;
+        case ZSTD_d_stableOutBuffer:
+            bounds.lowerBound = (int)ZSTD_bm_buffered;
+            bounds.upperBound = (int)ZSTD_bm_stable;
+            return bounds;
+        case ZSTD_d_forceIgnoreChecksum:
+            bounds.lowerBound = (int)ZSTD_d_validateChecksum;
+            bounds.upperBound = (int)ZSTD_d_ignoreChecksum;
+            return bounds;
+        case ZSTD_d_refMultipleDDicts:
+            bounds.lowerBound = (int)ZSTD_rmd_refSingleDDict;
+            bounds.upperBound = (int)ZSTD_rmd_refMultipleDDicts;
+            return bounds;
+        default:;
+    }
+    bounds.error = ERROR(parameter_unsupported);
+    return bounds;
+}
+
+/* ZSTD_dParam_withinBounds:
+ * @return 1 if value is within dParam bounds,
+ * 0 otherwise */
+static int ZSTD_dParam_withinBounds(ZSTD_dParameter dParam, int value)
+{
+    ZSTD_bounds const bounds = ZSTD_dParam_getBounds(dParam);
+    if (ZSTD_isError(bounds.error)) return 0;
+    if (value < bounds.lowerBound) return 0;
+    if (value > bounds.upperBound) return 0;
+    return 1;
+}
+
+#define CHECK_DBOUNDS(p,v) {                \
+    RETURN_ERROR_IF(!ZSTD_dParam_withinBounds(p, v), parameter_outOfBound, ""); \
+}
+
+size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value)
+{
+    switch (param) {
+        case ZSTD_d_windowLogMax:
+            *value = (int)ZSTD_highbit32((U32)dctx->maxWindowSize);
+            return 0;
+        case ZSTD_d_format:
+            *value = (int)dctx->format;
+            return 0;
+        case ZSTD_d_stableOutBuffer:
+            *value = (int)dctx->outBufferMode;
+            return 0;
+        case ZSTD_d_forceIgnoreChecksum:
+            *value = (int)dctx->forceIgnoreChecksum;
+            return 0;
+        case ZSTD_d_refMultipleDDicts:
+            *value = (int)dctx->refMultipleDDicts;
+            return 0;
+        default:;
+    }
+    RETURN_ERROR(parameter_unsupported, "");
+}
+
+size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value)
+{
+    RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
+    switch(dParam) {
+        case ZSTD_d_windowLogMax:
+            if (value == 0) value = ZSTD_WINDOWLOG_LIMIT_DEFAULT;
+            CHECK_DBOUNDS(ZSTD_d_windowLogMax, value);
+            dctx->maxWindowSize = ((size_t)1) << value;
+            return 0;
+        case ZSTD_d_format:
+            CHECK_DBOUNDS(ZSTD_d_format, value);
+            dctx->format = (ZSTD_format_e)value;
+            return 0;
+        case ZSTD_d_stableOutBuffer:
+            CHECK_DBOUNDS(ZSTD_d_stableOutBuffer, value);
+            dctx->outBufferMode = (ZSTD_bufferMode_e)value;
+            return 0;
+        case ZSTD_d_forceIgnoreChecksum:
+            CHECK_DBOUNDS(ZSTD_d_forceIgnoreChecksum, value);
+            dctx->forceIgnoreChecksum = (ZSTD_forceIgnoreChecksum_e)value;
+            return 0;
+        case ZSTD_d_refMultipleDDicts:
+            CHECK_DBOUNDS(ZSTD_d_refMultipleDDicts, value);
+            if (dctx->staticSize != 0) {
+                RETURN_ERROR(parameter_unsupported, "Static dctx does not support multiple DDicts!");
+            }
+            dctx->refMultipleDDicts = (ZSTD_refMultipleDDicts_e)value;
+            return 0;
+        default:;
+    }
+    RETURN_ERROR(parameter_unsupported, "");
+}
+
+size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset)
+{
+    if ( (reset == ZSTD_reset_session_only)
+      || (reset == ZSTD_reset_session_and_parameters) ) {
+        dctx->streamStage = zdss_init;
+        dctx->noForwardProgress = 0;
+    }
+    if ( (reset == ZSTD_reset_parameters)
+      || (reset == ZSTD_reset_session_and_parameters) ) {
+        RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
+        ZSTD_clearDict(dctx);
+        ZSTD_DCtx_resetParameters(dctx);
+    }
+    return 0;
+}
+
+
+size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx)
+{
+    return ZSTD_sizeof_DCtx(dctx);
+}
+
+size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize)
+{
+    size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
+    unsigned long long const neededRBSize = windowSize + blockSize + (WILDCOPY_OVERLENGTH * 2);
+    unsigned long long const neededSize = MIN(frameContentSize, neededRBSize);
+    size_t const minRBSize = (size_t) neededSize;
+    RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize,
+                    frameParameter_windowTooLarge, "");
+    return minRBSize;
+}
+
+size_t ZSTD_estimateDStreamSize(size_t windowSize)
+{
+    size_t const blockSize = MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
+    size_t const inBuffSize = blockSize;  /* no block can be larger */
+    size_t const outBuffSize = ZSTD_decodingBufferSize_min(windowSize, ZSTD_CONTENTSIZE_UNKNOWN);
+    return ZSTD_estimateDCtxSize() + inBuffSize + outBuffSize;
+}
+
+size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize)
+{
+    U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX;   /* note : should be user-selectable, but requires an additional parameter (or a dctx) */
+    ZSTD_frameHeader zfh;
+    size_t const err = ZSTD_getFrameHeader(&zfh, src, srcSize);
+    if (ZSTD_isError(err)) return err;
+    RETURN_ERROR_IF(err>0, srcSize_wrong, "");
+    RETURN_ERROR_IF(zfh.windowSize > windowSizeMax,
+                    frameParameter_windowTooLarge, "");
+    return ZSTD_estimateDStreamSize((size_t)zfh.windowSize);
+}
+
+
+/* *****   Decompression   ***** */
+
+static int ZSTD_DCtx_isOverflow(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize)
+{
+    return (zds->inBuffSize + zds->outBuffSize) >= (neededInBuffSize + neededOutBuffSize) * ZSTD_WORKSPACETOOLARGE_FACTOR;
+}
+
+static void ZSTD_DCtx_updateOversizedDuration(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize)
+{
+    if (ZSTD_DCtx_isOverflow(zds, neededInBuffSize, neededOutBuffSize))
+        zds->oversizedDuration++;
+    else
+        zds->oversizedDuration = 0;
+}
+
+static int ZSTD_DCtx_isOversizedTooLong(ZSTD_DStream* zds)
+{
+    return zds->oversizedDuration >= ZSTD_WORKSPACETOOLARGE_MAXDURATION;
+}
+
+/* Checks that the output buffer hasn't changed if ZSTD_obm_stable is used. */
+static size_t ZSTD_checkOutBuffer(ZSTD_DStream const* zds, ZSTD_outBuffer const* output)
+{
+    ZSTD_outBuffer const expect = zds->expectedOutBuffer;
+    /* No requirement when ZSTD_obm_stable is not enabled. */
+    if (zds->outBufferMode != ZSTD_bm_stable)
+        return 0;
+    /* Any buffer is allowed in zdss_init, this must be the same for every other call until
+     * the context is reset.
+     */
+    if (zds->streamStage == zdss_init)
+        return 0;
+    /* The buffer must match our expectation exactly. */
+    if (expect.dst == output->dst && expect.pos == output->pos && expect.size == output->size)
+        return 0;
+    RETURN_ERROR(dstBuffer_wrong, "ZSTD_d_stableOutBuffer enabled but output differs!");
+}
+
+/* Calls ZSTD_decompressContinue() with the right parameters for ZSTD_decompressStream()
+ * and updates the stage and the output buffer state. This call is extracted so it can be
+ * used both when reading directly from the ZSTD_inBuffer, and in buffered input mode.
+ * NOTE: You must break after calling this function since the streamStage is modified.
+ */
+static size_t ZSTD_decompressContinueStream(
+            ZSTD_DStream* zds, char** op, char* oend,
+            void const* src, size_t srcSize) {
+    int const isSkipFrame = ZSTD_isSkipFrame(zds);
+    if (zds->outBufferMode == ZSTD_bm_buffered) {
+        size_t const dstSize = isSkipFrame ? 0 : zds->outBuffSize - zds->outStart;
+        size_t const decodedSize = ZSTD_decompressContinue(zds,
+                zds->outBuff + zds->outStart, dstSize, src, srcSize);
+        FORWARD_IF_ERROR(decodedSize, "");
+        if (!decodedSize && !isSkipFrame) {
+            zds->streamStage = zdss_read;
+        } else {
+            zds->outEnd = zds->outStart + decodedSize;
+            zds->streamStage = zdss_flush;
+        }
+    } else {
+        /* Write directly into the output buffer */
+        size_t const dstSize = isSkipFrame ? 0 : (size_t)(oend - *op);
+        size_t const decodedSize = ZSTD_decompressContinue(zds, *op, dstSize, src, srcSize);
+        FORWARD_IF_ERROR(decodedSize, "");
+        *op += decodedSize;
+        /* Flushing is not needed. */
+        zds->streamStage = zdss_read;
+        assert(*op <= oend);
+        assert(zds->outBufferMode == ZSTD_bm_stable);
+    }
+    return 0;
+}
+
+size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
+{
+    const char* const src = (const char*)input->src;
+    const char* const istart = input->pos != 0 ? src + input->pos : src;
+    const char* const iend = input->size != 0 ? src + input->size : src;
+    const char* ip = istart;
+    char* const dst = (char*)output->dst;
+    char* const ostart = output->pos != 0 ? dst + output->pos : dst;
+    char* const oend = output->size != 0 ? dst + output->size : dst;
+    char* op = ostart;
+    U32 someMoreWork = 1;
+
+    DEBUGLOG(5, "ZSTD_decompressStream");
+    RETURN_ERROR_IF(
+        input->pos > input->size,
+        srcSize_wrong,
+        "forbidden. in: pos: %u   vs size: %u",
+        (U32)input->pos, (U32)input->size);
+    RETURN_ERROR_IF(
+        output->pos > output->size,
+        dstSize_tooSmall,
+        "forbidden. out: pos: %u   vs size: %u",
+        (U32)output->pos, (U32)output->size);
+    DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos));
+    FORWARD_IF_ERROR(ZSTD_checkOutBuffer(zds, output), "");
+
+    while (someMoreWork) {
+        switch(zds->streamStage)
+        {
+        case zdss_init :
+            DEBUGLOG(5, "stage zdss_init => transparent reset ");
+            zds->streamStage = zdss_loadHeader;
+            zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
+            zds->legacyVersion = 0;
+            zds->hostageByte = 0;
+            zds->expectedOutBuffer = *output;
+            /* fall-through */
+
+        case zdss_loadHeader :
+            DEBUGLOG(5, "stage zdss_loadHeader (srcSize : %u)", (U32)(iend - ip));
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
+            if (zds->legacyVersion) {
+                RETURN_ERROR_IF(zds->staticSize, memory_allocation,
+                    "legacy support is incompatible with static dctx");
+                {   size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input);
+                    if (hint==0) zds->streamStage = zdss_init;
+                    return hint;
+            }   }
+#endif
+            {   size_t const hSize = ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format);
+                if (zds->refMultipleDDicts && zds->ddictSet) {
+                    ZSTD_DCtx_selectFrameDDict(zds);
+                }
+                DEBUGLOG(5, "header size : %u", (U32)hSize);
+                if (ZSTD_isError(hSize)) {
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
+                    U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart);
+                    if (legacyVersion) {
+                        ZSTD_DDict const* const ddict = ZSTD_getDDict(zds);
+                        const void* const dict = ddict ? ZSTD_DDict_dictContent(ddict) : NULL;
+                        size_t const dictSize = ddict ? ZSTD_DDict_dictSize(ddict) : 0;
+                        DEBUGLOG(5, "ZSTD_decompressStream: detected legacy version v0.%u", legacyVersion);
+                        RETURN_ERROR_IF(zds->staticSize, memory_allocation,
+                            "legacy support is incompatible with static dctx");
+                        FORWARD_IF_ERROR(ZSTD_initLegacyStream(&zds->legacyContext,
+                                    zds->previousLegacyVersion, legacyVersion,
+                                    dict, dictSize), "");
+                        zds->legacyVersion = zds->previousLegacyVersion = legacyVersion;
+                        {   size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, legacyVersion, output, input);
+                            if (hint==0) zds->streamStage = zdss_init;   /* or stay in stage zdss_loadHeader */
+                            return hint;
+                    }   }
+#endif
+                    return hSize;   /* error */
+                }
+                if (hSize != 0) {   /* need more input */
+                    size_t const toLoad = hSize - zds->lhSize;   /* if hSize!=0, hSize > zds->lhSize */
+                    size_t const remainingInput = (size_t)(iend-ip);
+                    assert(iend >= ip);
+                    if (toLoad > remainingInput) {   /* not enough input to load full header */
+                        if (remainingInput > 0) {
+                            ZSTD_memcpy(zds->headerBuffer + zds->lhSize, ip, remainingInput);
+                            zds->lhSize += remainingInput;
+                        }
+                        input->pos = input->size;
+                        return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize;   /* remaining header bytes + next block header */
+                    }
+                    assert(ip != NULL);
+                    ZSTD_memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad;
+                    break;
+            }   }
+
+            /* check for single-pass mode opportunity */
+            if (zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
+                && zds->fParams.frameType != ZSTD_skippableFrame
+                && (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) {
+                size_t const cSize = ZSTD_findFrameCompressedSize(istart, (size_t)(iend-istart));
+                if (cSize <= (size_t)(iend-istart)) {
+                    /* shortcut : using single-pass mode */
+                    size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, (size_t)(oend-op), istart, cSize, ZSTD_getDDict(zds));
+                    if (ZSTD_isError(decompressedSize)) return decompressedSize;
+                    DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()")
+                    ip = istart + cSize;
+                    op += decompressedSize;
+                    zds->expected = 0;
+                    zds->streamStage = zdss_init;
+                    someMoreWork = 0;
+                    break;
+            }   }
+
+            /* Check output buffer is large enough for ZSTD_odm_stable. */
+            if (zds->outBufferMode == ZSTD_bm_stable
+                && zds->fParams.frameType != ZSTD_skippableFrame
+                && zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
+                && (U64)(size_t)(oend-op) < zds->fParams.frameContentSize) {
+                RETURN_ERROR(dstSize_tooSmall, "ZSTD_obm_stable passed but ZSTD_outBuffer is too small");
+            }
+
+            /* Consume header (see ZSTDds_decodeFrameHeader) */
+            DEBUGLOG(4, "Consume header");
+            FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)), "");
+
+            if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {  /* skippable frame */
+                zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE);
+                zds->stage = ZSTDds_skipFrame;
+            } else {
+                FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize), "");
+                zds->expected = ZSTD_blockHeaderSize;
+                zds->stage = ZSTDds_decodeBlockHeader;
+            }
+
+            /* control buffer memory usage */
+            DEBUGLOG(4, "Control max memory usage (%u KB <= max %u KB)",
+                        (U32)(zds->fParams.windowSize >>10),
+                        (U32)(zds->maxWindowSize >> 10) );
+            zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN);
+            RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize,
+                            frameParameter_windowTooLarge, "");
+
+            /* Adapt buffer sizes to frame header instructions */
+            {   size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */);
+                size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_bm_buffered
+                        ? ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize)
+                        : 0;
+
+                ZSTD_DCtx_updateOversizedDuration(zds, neededInBuffSize, neededOutBuffSize);
+
+                {   int const tooSmall = (zds->inBuffSize < neededInBuffSize) || (zds->outBuffSize < neededOutBuffSize);
+                    int const tooLarge = ZSTD_DCtx_isOversizedTooLong(zds);
+
+                    if (tooSmall || tooLarge) {
+                        size_t const bufferSize = neededInBuffSize + neededOutBuffSize;
+                        DEBUGLOG(4, "inBuff  : from %u to %u",
+                                    (U32)zds->inBuffSize, (U32)neededInBuffSize);
+                        DEBUGLOG(4, "outBuff : from %u to %u",
+                                    (U32)zds->outBuffSize, (U32)neededOutBuffSize);
+                        if (zds->staticSize) {  /* static DCtx */
+                            DEBUGLOG(4, "staticSize : %u", (U32)zds->staticSize);
+                            assert(zds->staticSize >= sizeof(ZSTD_DCtx));  /* controlled at init */
+                            RETURN_ERROR_IF(
+                                bufferSize > zds->staticSize - sizeof(ZSTD_DCtx),
+                                memory_allocation, "");
+                        } else {
+                            ZSTD_customFree(zds->inBuff, zds->customMem);
+                            zds->inBuffSize = 0;
+                            zds->outBuffSize = 0;
+                            zds->inBuff = (char*)ZSTD_customMalloc(bufferSize, zds->customMem);
+                            RETURN_ERROR_IF(zds->inBuff == NULL, memory_allocation, "");
+                        }
+                        zds->inBuffSize = neededInBuffSize;
+                        zds->outBuff = zds->inBuff + zds->inBuffSize;
+                        zds->outBuffSize = neededOutBuffSize;
+            }   }   }
+            zds->streamStage = zdss_read;
+            /* fall-through */
+
+        case zdss_read:
+            DEBUGLOG(5, "stage zdss_read");
+            {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompressWithInputSize(zds, (size_t)(iend - ip));
+                DEBUGLOG(5, "neededInSize = %u", (U32)neededInSize);
+                if (neededInSize==0) {  /* end of frame */
+                    zds->streamStage = zdss_init;
+                    someMoreWork = 0;
+                    break;
+                }
+                if ((size_t)(iend-ip) >= neededInSize) {  /* decode directly from src */
+                    FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, ip, neededInSize), "");
+                    ip += neededInSize;
+                    /* Function modifies the stage so we must break */
+                    break;
+            }   }
+            if (ip==iend) { someMoreWork = 0; break; }   /* no more input */
+            zds->streamStage = zdss_load;
+            /* fall-through */
+
+        case zdss_load:
+            {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds);
+                size_t const toLoad = neededInSize - zds->inPos;
+                int const isSkipFrame = ZSTD_isSkipFrame(zds);
+                size_t loadedSize;
+                /* At this point we shouldn't be decompressing a block that we can stream. */
+                assert(neededInSize == ZSTD_nextSrcSizeToDecompressWithInputSize(zds, iend - ip));
+                if (isSkipFrame) {
+                    loadedSize = MIN(toLoad, (size_t)(iend-ip));
+                } else {
+                    RETURN_ERROR_IF(toLoad > zds->inBuffSize - zds->inPos,
+                                    corruption_detected,
+                                    "should never happen");
+                    loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, (size_t)(iend-ip));
+                }
+                ip += loadedSize;
+                zds->inPos += loadedSize;
+                if (loadedSize < toLoad) { someMoreWork = 0; break; }   /* not enough input, wait for more */
+
+                /* decode loaded input */
+                zds->inPos = 0;   /* input is consumed */
+                FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, zds->inBuff, neededInSize), "");
+                /* Function modifies the stage so we must break */
+                break;
+            }
+        case zdss_flush:
+            {   size_t const toFlushSize = zds->outEnd - zds->outStart;
+                size_t const flushedSize = ZSTD_limitCopy(op, (size_t)(oend-op), zds->outBuff + zds->outStart, toFlushSize);
+                op += flushedSize;
+                zds->outStart += flushedSize;
+                if (flushedSize == toFlushSize) {  /* flush completed */
+                    zds->streamStage = zdss_read;
+                    if ( (zds->outBuffSize < zds->fParams.frameContentSize)
+                      && (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) {
+                        DEBUGLOG(5, "restart filling outBuff from beginning (left:%i, needed:%u)",
+                                (int)(zds->outBuffSize - zds->outStart),
+                                (U32)zds->fParams.blockSizeMax);
+                        zds->outStart = zds->outEnd = 0;
+                    }
+                    break;
+            }   }
+            /* cannot complete flush */
+            someMoreWork = 0;
+            break;
+
+        default:
+            assert(0);    /* impossible */
+            RETURN_ERROR(GENERIC, "impossible to reach");   /* some compiler require default to do something */
+    }   }
+
+    /* result */
+    input->pos = (size_t)(ip - (const char*)(input->src));
+    output->pos = (size_t)(op - (char*)(output->dst));
+
+    /* Update the expected output buffer for ZSTD_obm_stable. */
+    zds->expectedOutBuffer = *output;
+
+    if ((ip==istart) && (op==ostart)) {  /* no forward progress */
+        zds->noForwardProgress ++;
+        if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) {
+            RETURN_ERROR_IF(op==oend, dstSize_tooSmall, "");
+            RETURN_ERROR_IF(ip==iend, srcSize_wrong, "");
+            assert(0);
+        }
+    } else {
+        zds->noForwardProgress = 0;
+    }
+    {   size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds);
+        if (!nextSrcSizeHint) {   /* frame fully decoded */
+            if (zds->outEnd == zds->outStart) {  /* output fully flushed */
+                if (zds->hostageByte) {
+                    if (input->pos >= input->size) {
+                        /* can't release hostage (not present) */
+                        zds->streamStage = zdss_read;
+                        return 1;
+                    }
+                    input->pos++;  /* release hostage */
+                }   /* zds->hostageByte */
+                return 0;
+            }  /* zds->outEnd == zds->outStart */
+            if (!zds->hostageByte) { /* output not fully flushed; keep last byte as hostage; will be released when all output is flushed */
+                input->pos--;   /* note : pos > 0, otherwise, impossible to finish reading last block */
+                zds->hostageByte=1;
+            }
+            return 1;
+        }  /* nextSrcSizeHint==0 */
+        nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds) == ZSTDnit_block);   /* preload header of next block */
+        assert(zds->inPos <= nextSrcSizeHint);
+        nextSrcSizeHint -= zds->inPos;   /* part already loaded*/
+        return nextSrcSizeHint;
+    }
+}
+
+size_t ZSTD_decompressStream_simpleArgs (
+                            ZSTD_DCtx* dctx,
+                            void* dst, size_t dstCapacity, size_t* dstPos,
+                      const void* src, size_t srcSize, size_t* srcPos)
+{
+    ZSTD_outBuffer output = { dst, dstCapacity, *dstPos };
+    ZSTD_inBuffer  input  = { src, srcSize, *srcPos };
+    /* ZSTD_compress_generic() will check validity of dstPos and srcPos */
+    size_t const cErr = ZSTD_decompressStream(dctx, &output, &input);
+    *dstPos = output.pos;
+    *srcPos = input.pos;
+    return cErr;
+}
+/**** ended inlining decompress/zstd_decompress.c ****/
+/**** start inlining decompress/zstd_decompress_block.c ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* zstd_decompress_block :
+ * this module takes care of decompressing _compressed_ block */
+
+/*-*******************************************************
+*  Dependencies
+*********************************************************/
+/**** skipping file: ../common/zstd_deps.h ****/
+/**** skipping file: ../common/compiler.h ****/
+/**** skipping file: ../common/cpu.h ****/
+/**** skipping file: ../common/mem.h ****/
+#define FSE_STATIC_LINKING_ONLY
+/**** skipping file: ../common/fse.h ****/
+#define HUF_STATIC_LINKING_ONLY
+/**** skipping file: ../common/huf.h ****/
+/**** skipping file: ../common/zstd_internal.h ****/
+/**** skipping file: zstd_decompress_internal.h ****/
+/**** skipping file: zstd_ddict.h ****/
+/**** skipping file: zstd_decompress_block.h ****/
+
+/*_*******************************************************
+*  Macros
+**********************************************************/
+
+/* These two optional macros force the use one way or another of the two
+ * ZSTD_decompressSequences implementations. You can't force in both directions
+ * at the same time.
+ */
+#if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
+    defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
+#error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!"
+#endif
+
+
+/*_*******************************************************
+*  Memory operations
+**********************************************************/
+static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); }
+
+
+/*-*************************************************************
+ *   Block decoding
+ ***************************************************************/
+
+/*! ZSTD_getcBlockSize() :
+ *  Provides the size of compressed block from block header `src` */
+size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
+                          blockProperties_t* bpPtr)
+{
+    RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, "");
+
+    {   U32 const cBlockHeader = MEM_readLE24(src);
+        U32 const cSize = cBlockHeader >> 3;
+        bpPtr->lastBlock = cBlockHeader & 1;
+        bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
+        bpPtr->origSize = cSize;   /* only useful for RLE */
+        if (bpPtr->blockType == bt_rle) return 1;
+        RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, "");
+        return cSize;
+    }
+}
+
+
+/* Hidden declaration for fullbench */
+size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
+                          const void* src, size_t srcSize);
+/*! ZSTD_decodeLiteralsBlock() :
+ * @return : nb of bytes read from src (< srcSize )
+ *  note : symbol not declared but exposed for fullbench */
+size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
+                          const void* src, size_t srcSize)   /* note : srcSize < BLOCKSIZE */
+{
+    DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
+    RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
+
+    {   const BYTE* const istart = (const BYTE*) src;
+        symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
+
+        switch(litEncType)
+        {
+        case set_repeat:
+            DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
+            RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, "");
+            /* fall-through */
+
+        case set_compressed:
+            RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3");
+            {   size_t lhSize, litSize, litCSize;
+                U32 singleStream=0;
+                U32 const lhlCode = (istart[0] >> 2) & 3;
+                U32 const lhc = MEM_readLE32(istart);
+                size_t hufSuccess;
+                switch(lhlCode)
+                {
+                case 0: case 1: default:   /* note : default is impossible, since lhlCode into [0..3] */
+                    /* 2 - 2 - 10 - 10 */
+                    singleStream = !lhlCode;
+                    lhSize = 3;
+                    litSize  = (lhc >> 4) & 0x3FF;
+                    litCSize = (lhc >> 14) & 0x3FF;
+                    break;
+                case 2:
+                    /* 2 - 2 - 14 - 14 */
+                    lhSize = 4;
+                    litSize  = (lhc >> 4) & 0x3FFF;
+                    litCSize = lhc >> 18;
+                    break;
+                case 3:
+                    /* 2 - 2 - 18 - 18 */
+                    lhSize = 5;
+                    litSize  = (lhc >> 4) & 0x3FFFF;
+                    litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
+                    break;
+                }
+                RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
+                RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
+
+                /* prefetch huffman table if cold */
+                if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
+                    PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));
+                }
+
+                if (litEncType==set_repeat) {
+                    if (singleStream) {
+                        hufSuccess = HUF_decompress1X_usingDTable_bmi2(
+                            dctx->litBuffer, litSize, istart+lhSize, litCSize,
+                            dctx->HUFptr, dctx->bmi2);
+                    } else {
+                        hufSuccess = HUF_decompress4X_usingDTable_bmi2(
+                            dctx->litBuffer, litSize, istart+lhSize, litCSize,
+                            dctx->HUFptr, dctx->bmi2);
+                    }
+                } else {
+                    if (singleStream) {
+#if defined(HUF_FORCE_DECOMPRESS_X2)
+                        hufSuccess = HUF_decompress1X_DCtx_wksp(
+                            dctx->entropy.hufTable, dctx->litBuffer, litSize,
+                            istart+lhSize, litCSize, dctx->workspace,
+                            sizeof(dctx->workspace));
+#else
+                        hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2(
+                            dctx->entropy.hufTable, dctx->litBuffer, litSize,
+                            istart+lhSize, litCSize, dctx->workspace,
+                            sizeof(dctx->workspace), dctx->bmi2);
+#endif
+                    } else {
+                        hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2(
+                            dctx->entropy.hufTable, dctx->litBuffer, litSize,
+                            istart+lhSize, litCSize, dctx->workspace,
+                            sizeof(dctx->workspace), dctx->bmi2);
+                    }
+                }
+
+                RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
+
+                dctx->litPtr = dctx->litBuffer;
+                dctx->litSize = litSize;
+                dctx->litEntropy = 1;
+                if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
+                ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
+                return litCSize + lhSize;
+            }
+
+        case set_basic:
+            {   size_t litSize, lhSize;
+                U32 const lhlCode = ((istart[0]) >> 2) & 3;
+                switch(lhlCode)
+                {
+                case 0: case 2: default:   /* note : default is impossible, since lhlCode into [0..3] */
+                    lhSize = 1;
+                    litSize = istart[0] >> 3;
+                    break;
+                case 1:
+                    lhSize = 2;
+                    litSize = MEM_readLE16(istart) >> 4;
+                    break;
+                case 3:
+                    lhSize = 3;
+                    litSize = MEM_readLE24(istart) >> 4;
+                    break;
+                }
+
+                if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) {  /* risk reading beyond src buffer with wildcopy */
+                    RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
+                    ZSTD_memcpy(dctx->litBuffer, istart+lhSize, litSize);
+                    dctx->litPtr = dctx->litBuffer;
+                    dctx->litSize = litSize;
+                    ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
+                    return lhSize+litSize;
+                }
+                /* direct reference into compressed stream */
+                dctx->litPtr = istart+lhSize;
+                dctx->litSize = litSize;
+                return lhSize+litSize;
+            }
+
+        case set_rle:
+            {   U32 const lhlCode = ((istart[0]) >> 2) & 3;
+                size_t litSize, lhSize;
+                switch(lhlCode)
+                {
+                case 0: case 2: default:   /* note : default is impossible, since lhlCode into [0..3] */
+                    lhSize = 1;
+                    litSize = istart[0] >> 3;
+                    break;
+                case 1:
+                    lhSize = 2;
+                    litSize = MEM_readLE16(istart) >> 4;
+                    break;
+                case 3:
+                    lhSize = 3;
+                    litSize = MEM_readLE24(istart) >> 4;
+                    RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
+                    break;
+                }
+                RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
+                ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
+                dctx->litPtr = dctx->litBuffer;
+                dctx->litSize = litSize;
+                return lhSize+1;
+            }
+        default:
+            RETURN_ERROR(corruption_detected, "impossible");
+        }
+    }
+}
+
+/* Default FSE distribution tables.
+ * These are pre-calculated FSE decoding tables using default distributions as defined in specification :
+ * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
+ * They were generated programmatically with following method :
+ * - start from default distributions, present in /lib/common/zstd_internal.h
+ * - generate tables normally, using ZSTD_buildFSETable()
+ * - printout the content of tables
+ * - pretify output, report below, test with fuzzer to ensure it's correct */
+
+/* Default FSE distribution table for Literal Lengths */
+static const ZSTD_seqSymbol LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = {
+     {  1,  1,  1, LL_DEFAULTNORMLOG},  /* header : fastMode, tableLog */
+     /* nextState, nbAddBits, nbBits, baseVal */
+     {  0,  0,  4,    0},  { 16,  0,  4,    0},
+     { 32,  0,  5,    1},  {  0,  0,  5,    3},
+     {  0,  0,  5,    4},  {  0,  0,  5,    6},
+     {  0,  0,  5,    7},  {  0,  0,  5,    9},
+     {  0,  0,  5,   10},  {  0,  0,  5,   12},
+     {  0,  0,  6,   14},  {  0,  1,  5,   16},
+     {  0,  1,  5,   20},  {  0,  1,  5,   22},
+     {  0,  2,  5,   28},  {  0,  3,  5,   32},
+     {  0,  4,  5,   48},  { 32,  6,  5,   64},
+     {  0,  7,  5,  128},  {  0,  8,  6,  256},
+     {  0, 10,  6, 1024},  {  0, 12,  6, 4096},
+     { 32,  0,  4,    0},  {  0,  0,  4,    1},
+     {  0,  0,  5,    2},  { 32,  0,  5,    4},
+     {  0,  0,  5,    5},  { 32,  0,  5,    7},
+     {  0,  0,  5,    8},  { 32,  0,  5,   10},
+     {  0,  0,  5,   11},  {  0,  0,  6,   13},
+     { 32,  1,  5,   16},  {  0,  1,  5,   18},
+     { 32,  1,  5,   22},  {  0,  2,  5,   24},
+     { 32,  3,  5,   32},  {  0,  3,  5,   40},
+     {  0,  6,  4,   64},  { 16,  6,  4,   64},
+     { 32,  7,  5,  128},  {  0,  9,  6,  512},
+     {  0, 11,  6, 2048},  { 48,  0,  4,    0},
+     { 16,  0,  4,    1},  { 32,  0,  5,    2},
+     { 32,  0,  5,    3},  { 32,  0,  5,    5},
+     { 32,  0,  5,    6},  { 32,  0,  5,    8},
+     { 32,  0,  5,    9},  { 32,  0,  5,   11},
+     { 32,  0,  5,   12},  {  0,  0,  6,   15},
+     { 32,  1,  5,   18},  { 32,  1,  5,   20},
+     { 32,  2,  5,   24},  { 32,  2,  5,   28},
+     { 32,  3,  5,   40},  { 32,  4,  5,   48},
+     {  0, 16,  6,65536},  {  0, 15,  6,32768},
+     {  0, 14,  6,16384},  {  0, 13,  6, 8192},
+};   /* LL_defaultDTable */
+
+/* Default FSE distribution table for Offset Codes */
+static const ZSTD_seqSymbol OF_defaultDTable[(1<<OF_DEFAULTNORMLOG)+1] = {
+    {  1,  1,  1, OF_DEFAULTNORMLOG},  /* header : fastMode, tableLog */
+    /* nextState, nbAddBits, nbBits, baseVal */
+    {  0,  0,  5,    0},     {  0,  6,  4,   61},
+    {  0,  9,  5,  509},     {  0, 15,  5,32765},
+    {  0, 21,  5,2097149},   {  0,  3,  5,    5},
+    {  0,  7,  4,  125},     {  0, 12,  5, 4093},
+    {  0, 18,  5,262141},    {  0, 23,  5,8388605},
+    {  0,  5,  5,   29},     {  0,  8,  4,  253},
+    {  0, 14,  5,16381},     {  0, 20,  5,1048573},
+    {  0,  2,  5,    1},     { 16,  7,  4,  125},
+    {  0, 11,  5, 2045},     {  0, 17,  5,131069},
+    {  0, 22,  5,4194301},   {  0,  4,  5,   13},
+    { 16,  8,  4,  253},     {  0, 13,  5, 8189},
+    {  0, 19,  5,524285},    {  0,  1,  5,    1},
+    { 16,  6,  4,   61},     {  0, 10,  5, 1021},
+    {  0, 16,  5,65533},     {  0, 28,  5,268435453},
+    {  0, 27,  5,134217725}, {  0, 26,  5,67108861},
+    {  0, 25,  5,33554429},  {  0, 24,  5,16777213},
+};   /* OF_defaultDTable */
+
+
+/* Default FSE distribution table for Match Lengths */
+static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
+    {  1,  1,  1, ML_DEFAULTNORMLOG},  /* header : fastMode, tableLog */
+    /* nextState, nbAddBits, nbBits, baseVal */
+    {  0,  0,  6,    3},  {  0,  0,  4,    4},
+    { 32,  0,  5,    5},  {  0,  0,  5,    6},
+    {  0,  0,  5,    8},  {  0,  0,  5,    9},
+    {  0,  0,  5,   11},  {  0,  0,  6,   13},
+    {  0,  0,  6,   16},  {  0,  0,  6,   19},
+    {  0,  0,  6,   22},  {  0,  0,  6,   25},
+    {  0,  0,  6,   28},  {  0,  0,  6,   31},
+    {  0,  0,  6,   34},  {  0,  1,  6,   37},
+    {  0,  1,  6,   41},  {  0,  2,  6,   47},
+    {  0,  3,  6,   59},  {  0,  4,  6,   83},
+    {  0,  7,  6,  131},  {  0,  9,  6,  515},
+    { 16,  0,  4,    4},  {  0,  0,  4,    5},
+    { 32,  0,  5,    6},  {  0,  0,  5,    7},
+    { 32,  0,  5,    9},  {  0,  0,  5,   10},
+    {  0,  0,  6,   12},  {  0,  0,  6,   15},
+    {  0,  0,  6,   18},  {  0,  0,  6,   21},
+    {  0,  0,  6,   24},  {  0,  0,  6,   27},
+    {  0,  0,  6,   30},  {  0,  0,  6,   33},
+    {  0,  1,  6,   35},  {  0,  1,  6,   39},
+    {  0,  2,  6,   43},  {  0,  3,  6,   51},
+    {  0,  4,  6,   67},  {  0,  5,  6,   99},
+    {  0,  8,  6,  259},  { 32,  0,  4,    4},
+    { 48,  0,  4,    4},  { 16,  0,  4,    5},
+    { 32,  0,  5,    7},  { 32,  0,  5,    8},
+    { 32,  0,  5,   10},  { 32,  0,  5,   11},
+    {  0,  0,  6,   14},  {  0,  0,  6,   17},
+    {  0,  0,  6,   20},  {  0,  0,  6,   23},
+    {  0,  0,  6,   26},  {  0,  0,  6,   29},
+    {  0,  0,  6,   32},  {  0, 16,  6,65539},
+    {  0, 15,  6,32771},  {  0, 14,  6,16387},
+    {  0, 13,  6, 8195},  {  0, 12,  6, 4099},
+    {  0, 11,  6, 2051},  {  0, 10,  6, 1027},
+};   /* ML_defaultDTable */
+
+
+static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddBits)
+{
+    void* ptr = dt;
+    ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
+    ZSTD_seqSymbol* const cell = dt + 1;
+
+    DTableH->tableLog = 0;
+    DTableH->fastMode = 0;
+
+    cell->nbBits = 0;
+    cell->nextState = 0;
+    assert(nbAddBits < 255);
+    cell->nbAdditionalBits = (BYTE)nbAddBits;
+    cell->baseValue = baseValue;
+}
+
+
+/* ZSTD_buildFSETable() :
+ * generate FSE decoding table for one symbol (ll, ml or off)
+ * cannot fail if input is valid =>
+ * all inputs are presumed validated at this stage */
+FORCE_INLINE_TEMPLATE
+void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
+            const short* normalizedCounter, unsigned maxSymbolValue,
+            const U32* baseValue, const U32* nbAdditionalBits,
+            unsigned tableLog, void* wksp, size_t wkspSize)
+{
+    ZSTD_seqSymbol* const tableDecode = dt+1;
+    U32 const maxSV1 = maxSymbolValue + 1;
+    U32 const tableSize = 1 << tableLog;
+
+    U16* symbolNext = (U16*)wksp;
+    BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1);
+    U32 highThreshold = tableSize - 1;
+
+
+    /* Sanity Checks */
+    assert(maxSymbolValue <= MaxSeq);
+    assert(tableLog <= MaxFSELog);
+    assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE);
+    (void)wkspSize;
+    /* Init, lay down lowprob symbols */
+    {   ZSTD_seqSymbol_header DTableH;
+        DTableH.tableLog = tableLog;
+        DTableH.fastMode = 1;
+        {   S16 const largeLimit= (S16)(1 << (tableLog-1));
+            U32 s;
+            for (s=0; s<maxSV1; s++) {
+                if (normalizedCounter[s]==-1) {
+                    tableDecode[highThreshold--].baseValue = s;
+                    symbolNext[s] = 1;
+                } else {
+                    if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
+                    assert(normalizedCounter[s]>=0);
+                    symbolNext[s] = (U16)normalizedCounter[s];
+        }   }   }
+        ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
+    }
+
+    /* Spread symbols */
+    assert(tableSize <= 512);
+    /* Specialized symbol spreading for the case when there are
+     * no low probability (-1 count) symbols. When compressing
+     * small blocks we avoid low probability symbols to hit this
+     * case, since header decoding speed matters more.
+     */
+    if (highThreshold == tableSize - 1) {
+        size_t const tableMask = tableSize-1;
+        size_t const step = FSE_TABLESTEP(tableSize);
+        /* First lay down the symbols in order.
+         * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
+         * misses since small blocks generally have small table logs, so nearly
+         * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
+         * our buffer to handle the over-write.
+         */
+        {
+            U64 const add = 0x0101010101010101ull;
+            size_t pos = 0;
+            U64 sv = 0;
+            U32 s;
+            for (s=0; s<maxSV1; ++s, sv += add) {
+                int i;
+                int const n = normalizedCounter[s];
+                MEM_write64(spread + pos, sv);
+                for (i = 8; i < n; i += 8) {
+                    MEM_write64(spread + pos + i, sv);
+                }
+                pos += n;
+            }
+        }
+        /* Now we spread those positions across the table.
+         * The benefit of doing it in two stages is that we avoid the the
+         * variable size inner loop, which caused lots of branch misses.
+         * Now we can run through all the positions without any branch misses.
+         * We unroll the loop twice, since that is what emperically worked best.
+         */
+        {
+            size_t position = 0;
+            size_t s;
+            size_t const unroll = 2;
+            assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
+            for (s = 0; s < (size_t)tableSize; s += unroll) {
+                size_t u;
+                for (u = 0; u < unroll; ++u) {
+                    size_t const uPosition = (position + (u * step)) & tableMask;
+                    tableDecode[uPosition].baseValue = spread[s + u];
+                }
+                position = (position + (unroll * step)) & tableMask;
+            }
+            assert(position == 0);
+        }
+    } else {
+        U32 const tableMask = tableSize-1;
+        U32 const step = FSE_TABLESTEP(tableSize);
+        U32 s, position = 0;
+        for (s=0; s<maxSV1; s++) {
+            int i;
+            int const n = normalizedCounter[s];
+            for (i=0; i<n; i++) {
+                tableDecode[position].baseValue = s;
+                position = (position + step) & tableMask;
+                while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
+        }   }
+        assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+    }
+
+    /* Build Decoding table */
+    {
+        U32 u;
+        for (u=0; u<tableSize; u++) {
+            U32 const symbol = tableDecode[u].baseValue;
+            U32 const nextState = symbolNext[symbol]++;
+            tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
+            tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
+            assert(nbAdditionalBits[symbol] < 255);
+            tableDecode[u].nbAdditionalBits = (BYTE)nbAdditionalBits[symbol];
+            tableDecode[u].baseValue = baseValue[symbol];
+        }
+    }
+}
+
+/* Avoids the FORCE_INLINE of the _body() function. */
+static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
+            const short* normalizedCounter, unsigned maxSymbolValue,
+            const U32* baseValue, const U32* nbAdditionalBits,
+            unsigned tableLog, void* wksp, size_t wkspSize)
+{
+    ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
+            baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
+}
+
+#if DYNAMIC_BMI2
+TARGET_ATTRIBUTE("bmi2") static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
+            const short* normalizedCounter, unsigned maxSymbolValue,
+            const U32* baseValue, const U32* nbAdditionalBits,
+            unsigned tableLog, void* wksp, size_t wkspSize)
+{
+    ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
+            baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
+}
+#endif
+
+void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
+            const short* normalizedCounter, unsigned maxSymbolValue,
+            const U32* baseValue, const U32* nbAdditionalBits,
+            unsigned tableLog, void* wksp, size_t wkspSize, int bmi2)
+{
+#if DYNAMIC_BMI2
+    if (bmi2) {
+        ZSTD_buildFSETable_body_bmi2(dt, normalizedCounter, maxSymbolValue,
+                baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
+        return;
+    }
+#endif
+    (void)bmi2;
+    ZSTD_buildFSETable_body_default(dt, normalizedCounter, maxSymbolValue,
+            baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
+}
+
+
+/*! ZSTD_buildSeqTable() :
+ * @return : nb bytes read from src,
+ *           or an error code if it fails */
+static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
+                                 symbolEncodingType_e type, unsigned max, U32 maxLog,
+                                 const void* src, size_t srcSize,
+                                 const U32* baseValue, const U32* nbAdditionalBits,
+                                 const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
+                                 int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize,
+                                 int bmi2)
+{
+    switch(type)
+    {
+    case set_rle :
+        RETURN_ERROR_IF(!srcSize, srcSize_wrong, "");
+        RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, "");
+        {   U32 const symbol = *(const BYTE*)src;
+            U32 const baseline = baseValue[symbol];
+            U32 const nbBits = nbAdditionalBits[symbol];
+            ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);
+        }
+        *DTablePtr = DTableSpace;
+        return 1;
+    case set_basic :
+        *DTablePtr = defaultTable;
+        return 0;
+    case set_repeat:
+        RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, "");
+        /* prefetch FSE table if used */
+        if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
+            const void* const pStart = *DTablePtr;
+            size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog));
+            PREFETCH_AREA(pStart, pSize);
+        }
+        return 0;
+    case set_compressed :
+        {   unsigned tableLog;
+            S16 norm[MaxSeq+1];
+            size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
+            RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
+            RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
+            ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2);
+            *DTablePtr = DTableSpace;
+            return headerSize;
+        }
+    default :
+        assert(0);
+        RETURN_ERROR(GENERIC, "impossible");
+    }
+}
+
+size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
+                             const void* src, size_t srcSize)
+{
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* ip = istart;
+    int nbSeq;
+    DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
+
+    /* check */
+    RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, "");
+
+    /* SeqHead */
+    nbSeq = *ip++;
+    if (!nbSeq) {
+        *nbSeqPtr=0;
+        RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, "");
+        return 1;
+    }
+    if (nbSeq > 0x7F) {
+        if (nbSeq == 0xFF) {
+            RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
+            nbSeq = MEM_readLE16(ip) + LONGNBSEQ;
+            ip+=2;
+        } else {
+            RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
+            nbSeq = ((nbSeq-0x80)<<8) + *ip++;
+        }
+    }
+    *nbSeqPtr = nbSeq;
+
+    /* FSE table descriptors */
+    RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
+    {   symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
+        symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
+        symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
+        ip++;
+
+        /* Build DTables */
+        {   size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr,
+                                                      LLtype, MaxLL, LLFSELog,
+                                                      ip, iend-ip,
+                                                      LL_base, LL_bits,
+                                                      LL_defaultDTable, dctx->fseEntropy,
+                                                      dctx->ddictIsCold, nbSeq,
+                                                      dctx->workspace, sizeof(dctx->workspace),
+                                                      dctx->bmi2);
+            RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
+            ip += llhSize;
+        }
+
+        {   size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr,
+                                                      OFtype, MaxOff, OffFSELog,
+                                                      ip, iend-ip,
+                                                      OF_base, OF_bits,
+                                                      OF_defaultDTable, dctx->fseEntropy,
+                                                      dctx->ddictIsCold, nbSeq,
+                                                      dctx->workspace, sizeof(dctx->workspace),
+                                                      dctx->bmi2);
+            RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
+            ip += ofhSize;
+        }
+
+        {   size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr,
+                                                      MLtype, MaxML, MLFSELog,
+                                                      ip, iend-ip,
+                                                      ML_base, ML_bits,
+                                                      ML_defaultDTable, dctx->fseEntropy,
+                                                      dctx->ddictIsCold, nbSeq,
+                                                      dctx->workspace, sizeof(dctx->workspace),
+                                                      dctx->bmi2);
+            RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
+            ip += mlhSize;
+        }
+    }
+
+    return ip-istart;
+}
+
+
+typedef struct {
+    size_t litLength;
+    size_t matchLength;
+    size_t offset;
+} seq_t;
+
+typedef struct {
+    size_t state;
+    const ZSTD_seqSymbol* table;
+} ZSTD_fseState;
+
+typedef struct {
+    BIT_DStream_t DStream;
+    ZSTD_fseState stateLL;
+    ZSTD_fseState stateOffb;
+    ZSTD_fseState stateML;
+    size_t prevOffset[ZSTD_REP_NUM];
+} seqState_t;
+
+/*! ZSTD_overlapCopy8() :
+ *  Copies 8 bytes from ip to op and updates op and ip where ip <= op.
+ *  If the offset is < 8 then the offset is spread to at least 8 bytes.
+ *
+ *  Precondition: *ip <= *op
+ *  Postcondition: *op - *op >= 8
+ */
+HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
+    assert(*ip <= *op);
+    if (offset < 8) {
+        /* close range match, overlap */
+        static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */
+        static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* subtracted */
+        int const sub2 = dec64table[offset];
+        (*op)[0] = (*ip)[0];
+        (*op)[1] = (*ip)[1];
+        (*op)[2] = (*ip)[2];
+        (*op)[3] = (*ip)[3];
+        *ip += dec32table[offset];
+        ZSTD_copy4(*op+4, *ip);
+        *ip -= sub2;
+    } else {
+        ZSTD_copy8(*op, *ip);
+    }
+    *ip += 8;
+    *op += 8;
+    assert(*op - *ip >= 8);
+}
+
+/*! ZSTD_safecopy() :
+ *  Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
+ *  and write up to 16 bytes past oend_w (op >= oend_w is allowed).
+ *  This function is only called in the uncommon case where the sequence is near the end of the block. It
+ *  should be fast for a single long sequence, but can be slow for several short sequences.
+ *
+ *  @param ovtype controls the overlap detection
+ *         - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
+ *         - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
+ *           The src buffer must be before the dst buffer.
+ */
+static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
+    ptrdiff_t const diff = op - ip;
+    BYTE* const oend = op + length;
+
+    assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) ||
+           (ovtype == ZSTD_overlap_src_before_dst && diff >= 0));
+
+    if (length < 8) {
+        /* Handle short lengths. */
+        while (op < oend) *op++ = *ip++;
+        return;
+    }
+    if (ovtype == ZSTD_overlap_src_before_dst) {
+        /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
+        assert(length >= 8);
+        ZSTD_overlapCopy8(&op, &ip, diff);
+        assert(op - ip >= 8);
+        assert(op <= oend);
+    }
+
+    if (oend <= oend_w) {
+        /* No risk of overwrite. */
+        ZSTD_wildcopy(op, ip, length, ovtype);
+        return;
+    }
+    if (op <= oend_w) {
+        /* Wildcopy until we get close to the end. */
+        assert(oend > oend_w);
+        ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
+        ip += oend_w - op;
+        op = oend_w;
+    }
+    /* Handle the leftovers. */
+    while (op < oend) *op++ = *ip++;
+}
+
+/* ZSTD_execSequenceEnd():
+ * This version handles cases that are near the end of the output buffer. It requires
+ * more careful checks to make sure there is no overflow. By separating out these hard
+ * and unlikely cases, we can speed up the common cases.
+ *
+ * NOTE: This function needs to be fast for a single long sequence, but doesn't need
+ * to be optimized for many small sequences, since those fall into ZSTD_execSequence().
+ */
+FORCE_NOINLINE
+size_t ZSTD_execSequenceEnd(BYTE* op,
+                            BYTE* const oend, seq_t sequence,
+                            const BYTE** litPtr, const BYTE* const litLimit,
+                            const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
+{
+    BYTE* const oLitEnd = op + sequence.litLength;
+    size_t const sequenceLength = sequence.litLength + sequence.matchLength;
+    const BYTE* const iLitEnd = *litPtr + sequence.litLength;
+    const BYTE* match = oLitEnd - sequence.offset;
+    BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
+
+    /* bounds checks : careful of address space overflow in 32-bit mode */
+    RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
+    RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
+    assert(op < op + sequenceLength);
+    assert(oLitEnd < op + sequenceLength);
+
+    /* copy literals */
+    ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
+    op = oLitEnd;
+    *litPtr = iLitEnd;
+
+    /* copy Match */
+    if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
+        /* offset beyond prefix */
+        RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
+        match = dictEnd - (prefixStart-match);
+        if (match + sequence.matchLength <= dictEnd) {
+            ZSTD_memmove(oLitEnd, match, sequence.matchLength);
+            return sequenceLength;
+        }
+        /* span extDict & currentPrefixSegment */
+        {   size_t const length1 = dictEnd - match;
+            ZSTD_memmove(oLitEnd, match, length1);
+            op = oLitEnd + length1;
+            sequence.matchLength -= length1;
+            match = prefixStart;
+    }   }
+    ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
+    return sequenceLength;
+}
+
+HINT_INLINE
+size_t ZSTD_execSequence(BYTE* op,
+                         BYTE* const oend, seq_t sequence,
+                         const BYTE** litPtr, const BYTE* const litLimit,
+                         const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
+{
+    BYTE* const oLitEnd = op + sequence.litLength;
+    size_t const sequenceLength = sequence.litLength + sequence.matchLength;
+    BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
+    BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;   /* risk : address space underflow on oend=NULL */
+    const BYTE* const iLitEnd = *litPtr + sequence.litLength;
+    const BYTE* match = oLitEnd - sequence.offset;
+
+    assert(op != NULL /* Precondition */);
+    assert(oend_w < oend /* No underflow */);
+    /* Handle edge cases in a slow path:
+     *   - Read beyond end of literals
+     *   - Match end is within WILDCOPY_OVERLIMIT of oend
+     *   - 32-bit mode and the match length overflows
+     */
+    if (UNLIKELY(
+            iLitEnd > litLimit ||
+            oMatchEnd > oend_w ||
+            (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
+        return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
+
+    /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
+    assert(op <= oLitEnd /* No overflow */);
+    assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
+    assert(oMatchEnd <= oend /* No underflow */);
+    assert(iLitEnd <= litLimit /* Literal length is in bounds */);
+    assert(oLitEnd <= oend_w /* Can wildcopy literals */);
+    assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
+
+    /* Copy Literals:
+     * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
+     * We likely don't need the full 32-byte wildcopy.
+     */
+    assert(WILDCOPY_OVERLENGTH >= 16);
+    ZSTD_copy16(op, (*litPtr));
+    if (UNLIKELY(sequence.litLength > 16)) {
+        ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
+    }
+    op = oLitEnd;
+    *litPtr = iLitEnd;   /* update for next sequence */
+
+    /* Copy Match */
+    if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
+        /* offset beyond prefix -> go into extDict */
+        RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
+        match = dictEnd + (match - prefixStart);
+        if (match + sequence.matchLength <= dictEnd) {
+            ZSTD_memmove(oLitEnd, match, sequence.matchLength);
+            return sequenceLength;
+        }
+        /* span extDict & currentPrefixSegment */
+        {   size_t const length1 = dictEnd - match;
+            ZSTD_memmove(oLitEnd, match, length1);
+            op = oLitEnd + length1;
+            sequence.matchLength -= length1;
+            match = prefixStart;
+    }   }
+    /* Match within prefix of 1 or more bytes */
+    assert(op <= oMatchEnd);
+    assert(oMatchEnd <= oend_w);
+    assert(match >= prefixStart);
+    assert(sequence.matchLength >= 1);
+
+    /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
+     * without overlap checking.
+     */
+    if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
+        /* We bet on a full wildcopy for matches, since we expect matches to be
+         * longer than literals (in general). In silesia, ~10% of matches are longer
+         * than 16 bytes.
+         */
+        ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
+        return sequenceLength;
+    }
+    assert(sequence.offset < WILDCOPY_VECLEN);
+
+    /* Copy 8 bytes and spread the offset to be >= 8. */
+    ZSTD_overlapCopy8(&op, &match, sequence.offset);
+
+    /* If the match length is > 8 bytes, then continue with the wildcopy. */
+    if (sequence.matchLength > 8) {
+        assert(op < oMatchEnd);
+        ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);
+    }
+    return sequenceLength;
+}
+
+static void
+ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
+{
+    const void* ptr = dt;
+    const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr;
+    DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
+    DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits",
+                (U32)DStatePtr->state, DTableH->tableLog);
+    BIT_reloadDStream(bitD);
+    DStatePtr->table = dt + 1;
+}
+
+FORCE_INLINE_TEMPLATE void
+ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
+{
+    ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state];
+    U32 const nbBits = DInfo.nbBits;
+    size_t const lowBits = BIT_readBits(bitD, nbBits);
+    DStatePtr->state = DInfo.nextState + lowBits;
+}
+
+FORCE_INLINE_TEMPLATE void
+ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD_seqSymbol const DInfo)
+{
+    U32 const nbBits = DInfo.nbBits;
+    size_t const lowBits = BIT_readBits(bitD, nbBits);
+    DStatePtr->state = DInfo.nextState + lowBits;
+}
+
+/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
+ * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
+ * bits before reloading. This value is the maximum number of bytes we read
+ * after reloading when we are decoding long offsets.
+ */
+#define LONG_OFFSETS_MAX_EXTRA_BITS_32                       \
+    (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32       \
+        ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32  \
+        : 0)
+
+typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
+
+FORCE_INLINE_TEMPLATE seq_t
+ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
+{
+    seq_t seq;
+    ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state];
+    ZSTD_seqSymbol const mlDInfo = seqState->stateML.table[seqState->stateML.state];
+    ZSTD_seqSymbol const ofDInfo = seqState->stateOffb.table[seqState->stateOffb.state];
+    U32 const llBase = llDInfo.baseValue;
+    U32 const mlBase = mlDInfo.baseValue;
+    U32 const ofBase = ofDInfo.baseValue;
+    BYTE const llBits = llDInfo.nbAdditionalBits;
+    BYTE const mlBits = mlDInfo.nbAdditionalBits;
+    BYTE const ofBits = ofDInfo.nbAdditionalBits;
+    BYTE const totalBits = llBits+mlBits+ofBits;
+
+    /* sequence */
+    {   size_t offset;
+        if (ofBits > 1) {
+            ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
+            ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
+            assert(ofBits <= MaxOff);
+            if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
+                U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed);
+                offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
+                BIT_reloadDStream(&seqState->DStream);
+                if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
+                assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32);   /* to avoid another reload */
+            } else {
+                offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/);   /* <=  (ZSTD_WINDOWLOG_MAX-1) bits */
+                if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
+            }
+            seqState->prevOffset[2] = seqState->prevOffset[1];
+            seqState->prevOffset[1] = seqState->prevOffset[0];
+            seqState->prevOffset[0] = offset;
+        } else {
+            U32 const ll0 = (llBase == 0);
+            if (LIKELY((ofBits == 0))) {
+                if (LIKELY(!ll0))
+                    offset = seqState->prevOffset[0];
+                else {
+                    offset = seqState->prevOffset[1];
+                    seqState->prevOffset[1] = seqState->prevOffset[0];
+                    seqState->prevOffset[0] = offset;
+                }
+            } else {
+                offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
+                {   size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
+                    temp += !temp;   /* 0 is not valid; input is corrupted; force offset to 1 */
+                    if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
+                    seqState->prevOffset[1] = seqState->prevOffset[0];
+                    seqState->prevOffset[0] = offset = temp;
+        }   }   }
+        seq.offset = offset;
+    }
+
+    seq.matchLength = mlBase;
+    if (mlBits > 0)
+        seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
+
+    if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
+        BIT_reloadDStream(&seqState->DStream);
+    if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
+        BIT_reloadDStream(&seqState->DStream);
+    /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
+    ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
+
+    seq.litLength = llBase;
+    if (llBits > 0)
+        seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
+
+    if (MEM_32bits())
+        BIT_reloadDStream(&seqState->DStream);
+
+    DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
+                (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
+
+    /* ANS state update
+     * gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo().
+     * clang-9.2.0 does 7% worse with ZSTD_updateFseState().
+     * Naturally it seems like ZSTD_updateFseStateWithDInfo() should be the
+     * better option, so it is the default for other compilers. But, if you
+     * measure that it is worse, please put up a pull request.
+     */
+    {
+#if defined(__GNUC__) && !defined(__clang__)
+        const int kUseUpdateFseState = 1;
+#else
+        const int kUseUpdateFseState = 0;
+#endif
+        if (kUseUpdateFseState) {
+            ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream);    /* <=  9 bits */
+            ZSTD_updateFseState(&seqState->stateML, &seqState->DStream);    /* <=  9 bits */
+            if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);    /* <= 18 bits */
+            ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream);  /* <=  8 bits */
+        } else {
+            ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llDInfo);    /* <=  9 bits */
+            ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlDInfo);    /* <=  9 bits */
+            if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);    /* <= 18 bits */
+            ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofDInfo);  /* <=  8 bits */
+        }
+    }
+
+    return seq;
+}
+
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
+{
+    size_t const windowSize = dctx->fParams.windowSize;
+    /* No dictionary used. */
+    if (dctx->dictContentEndForFuzzing == NULL) return 0;
+    /* Dictionary is our prefix. */
+    if (prefixStart == dctx->dictContentBeginForFuzzing) return 1;
+    /* Dictionary is not our ext-dict. */
+    if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0;
+    /* Dictionary is not within our window size. */
+    if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0;
+    /* Dictionary is active. */
+    return 1;
+}
+
+MEM_STATIC void ZSTD_assertValidSequence(
+        ZSTD_DCtx const* dctx,
+        BYTE const* op, BYTE const* oend,
+        seq_t const seq,
+        BYTE const* prefixStart, BYTE const* virtualStart)
+{
+#if DEBUGLEVEL >= 1
+    size_t const windowSize = dctx->fParams.windowSize;
+    size_t const sequenceSize = seq.litLength + seq.matchLength;
+    BYTE const* const oLitEnd = op + seq.litLength;
+    DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
+            (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
+    assert(op <= oend);
+    assert((size_t)(oend - op) >= sequenceSize);
+    assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX);
+    if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
+        size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
+        /* Offset must be within the dictionary. */
+        assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
+        assert(seq.offset <= windowSize + dictSize);
+    } else {
+        /* Offset must be within our window. */
+        assert(seq.offset <= windowSize);
+    }
+#else
+    (void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart;
+#endif
+}
+#endif
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
+FORCE_INLINE_TEMPLATE size_t
+DONT_VECTORIZE
+ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
+                               void* dst, size_t maxDstSize,
+                         const void* seqStart, size_t seqSize, int nbSeq,
+                         const ZSTD_longOffset_e isLongOffset,
+                         const int frame)
+{
+    const BYTE* ip = (const BYTE*)seqStart;
+    const BYTE* const iend = ip + seqSize;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + maxDstSize;
+    BYTE* op = ostart;
+    const BYTE* litPtr = dctx->litPtr;
+    const BYTE* const litEnd = litPtr + dctx->litSize;
+    const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
+    const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
+    const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
+    DEBUGLOG(5, "ZSTD_decompressSequences_body");
+    (void)frame;
+
+    /* Regen sequences */
+    if (nbSeq) {
+        seqState_t seqState;
+        dctx->fseEntropy = 1;
+        { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
+        RETURN_ERROR_IF(
+            ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
+            corruption_detected, "");
+        ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
+        ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
+        ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
+        assert(dst != NULL);
+
+        ZSTD_STATIC_ASSERT(
+                BIT_DStream_unfinished < BIT_DStream_completed &&
+                BIT_DStream_endOfBuffer < BIT_DStream_completed &&
+                BIT_DStream_completed < BIT_DStream_overflow);
+
+#if defined(__GNUC__) && defined(__x86_64__)
+        /* Align the decompression loop to 32 + 16 bytes.
+         *
+         * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
+         * speed swings based on the alignment of the decompression loop. This
+         * performance swing is caused by parts of the decompression loop falling
+         * out of the DSB. The entire decompression loop should fit in the DSB,
+         * when it can't we get much worse performance. You can measure if you've
+         * hit the good case or the bad case with this perf command for some
+         * compressed file test.zst:
+         *
+         *   perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
+         *             -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
+         *
+         * If you see most cycles served out of the MITE you've hit the bad case.
+         * If you see most cycles served out of the DSB you've hit the good case.
+         * If it is pretty even then you may be in an okay case.
+         *
+         * This issue has been reproduced on the following CPUs:
+         *   - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
+         *               Use Instruments->Counters to get DSB/MITE cycles.
+         *               I never got performance swings, but I was able to
+         *               go from the good case of mostly DSB to half of the
+         *               cycles served from MITE.
+         *   - Coffeelake: Intel i9-9900k
+         *   - Coffeelake: Intel i7-9700k
+         *
+         * I haven't been able to reproduce the instability or DSB misses on any
+         * of the following CPUS:
+         *   - Haswell
+         *   - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
+         *   - Skylake
+         *
+         * If you are seeing performance stability this script can help test.
+         * It tests on 4 commits in zstd where I saw performance change.
+         *
+         *   https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
+         */
+        __asm__(".p2align 6");
+        __asm__("nop");
+        __asm__(".p2align 5");
+        __asm__("nop");
+#  if __GNUC__ >= 9
+        /* better for gcc-9 and gcc-10, worse for clang and gcc-8 */
+        __asm__(".p2align 3");
+#  else
+        __asm__(".p2align 4");
+#  endif
+#endif
+        for ( ; ; ) {
+            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
+            size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
+#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+            assert(!ZSTD_isError(oneSeqSize));
+            if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
+#endif
+            if (UNLIKELY(ZSTD_isError(oneSeqSize)))
+                return oneSeqSize;
+            DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
+            op += oneSeqSize;
+            if (UNLIKELY(!--nbSeq))
+                break;
+            BIT_reloadDStream(&(seqState.DStream));
+        }
+
+        /* check if reached exact end */
+        DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
+        RETURN_ERROR_IF(nbSeq, corruption_detected, "");
+        RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
+        /* save reps for next block */
+        { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
+    }
+
+    /* last literal segment */
+    {   size_t const lastLLSize = litEnd - litPtr;
+        RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
+        if (op != NULL) {
+            ZSTD_memcpy(op, litPtr, lastLLSize);
+            op += lastLLSize;
+        }
+    }
+
+    return op-ostart;
+}
+
+static size_t
+ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
+                                 void* dst, size_t maxDstSize,
+                           const void* seqStart, size_t seqSize, int nbSeq,
+                           const ZSTD_longOffset_e isLongOffset,
+                           const int frame)
+{
+    return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+}
+#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
+
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
+                   const BYTE* const prefixStart, const BYTE* const dictEnd)
+{
+    prefetchPos += sequence.litLength;
+    {   const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart;
+        const BYTE* const match = matchBase + prefetchPos - sequence.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
+                                                                              * No consequence though : memory address is only used for prefetching, not for dereferencing */
+        PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE);   /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
+    }
+    return prefetchPos + sequence.matchLength;
+}
+
+/* This decoding function employs prefetching
+ * to reduce latency impact of cache misses.
+ * It's generally employed when block contains a significant portion of long-distance matches
+ * or when coupled with a "cold" dictionary */
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_decompressSequencesLong_body(
+                               ZSTD_DCtx* dctx,
+                               void* dst, size_t maxDstSize,
+                         const void* seqStart, size_t seqSize, int nbSeq,
+                         const ZSTD_longOffset_e isLongOffset,
+                         const int frame)
+{
+    const BYTE* ip = (const BYTE*)seqStart;
+    const BYTE* const iend = ip + seqSize;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + maxDstSize;
+    BYTE* op = ostart;
+    const BYTE* litPtr = dctx->litPtr;
+    const BYTE* const litEnd = litPtr + dctx->litSize;
+    const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
+    const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
+    const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
+    (void)frame;
+
+    /* Regen sequences */
+    if (nbSeq) {
+#define STORED_SEQS 8
+#define STORED_SEQS_MASK (STORED_SEQS-1)
+#define ADVANCED_SEQS STORED_SEQS
+        seq_t sequences[STORED_SEQS];
+        int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
+        seqState_t seqState;
+        int seqNb;
+        size_t prefetchPos = (size_t)(op-prefixStart); /* track position relative to prefixStart */
+
+        dctx->fseEntropy = 1;
+        { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
+        assert(dst != NULL);
+        assert(iend >= ip);
+        RETURN_ERROR_IF(
+            ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
+            corruption_detected, "");
+        ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
+        ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
+        ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
+
+        /* prepare in advance */
+        for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
+            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
+            prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
+            sequences[seqNb] = sequence;
+        }
+        RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
+
+        /* decode and decompress */
+        for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
+            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
+            size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
+#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+            assert(!ZSTD_isError(oneSeqSize));
+            if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
+#endif
+            if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+
+            prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
+            sequences[seqNb & STORED_SEQS_MASK] = sequence;
+            op += oneSeqSize;
+        }
+        RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");
+
+        /* finish queue */
+        seqNb -= seqAdvance;
+        for ( ; seqNb<nbSeq ; seqNb++) {
+            size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
+#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+            assert(!ZSTD_isError(oneSeqSize));
+            if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
+#endif
+            if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+            op += oneSeqSize;
+        }
+
+        /* save reps for next block */
+        { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
+    }
+
+    /* last literal segment */
+    {   size_t const lastLLSize = litEnd - litPtr;
+        RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
+        if (op != NULL) {
+            ZSTD_memcpy(op, litPtr, lastLLSize);
+            op += lastLLSize;
+        }
+    }
+
+    return op-ostart;
+}
+
+static size_t
+ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
+                                 void* dst, size_t maxDstSize,
+                           const void* seqStart, size_t seqSize, int nbSeq,
+                           const ZSTD_longOffset_e isLongOffset,
+                           const int frame)
+{
+    return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+}
+#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
+
+
+
+#if DYNAMIC_BMI2
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
+static TARGET_ATTRIBUTE("bmi2") size_t
+DONT_VECTORIZE
+ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
+                                 void* dst, size_t maxDstSize,
+                           const void* seqStart, size_t seqSize, int nbSeq,
+                           const ZSTD_longOffset_e isLongOffset,
+                           const int frame)
+{
+    return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+}
+#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
+static TARGET_ATTRIBUTE("bmi2") size_t
+ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
+                                 void* dst, size_t maxDstSize,
+                           const void* seqStart, size_t seqSize, int nbSeq,
+                           const ZSTD_longOffset_e isLongOffset,
+                           const int frame)
+{
+    return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+}
+#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
+
+#endif /* DYNAMIC_BMI2 */
+
+typedef size_t (*ZSTD_decompressSequences_t)(
+                            ZSTD_DCtx* dctx,
+                            void* dst, size_t maxDstSize,
+                            const void* seqStart, size_t seqSize, int nbSeq,
+                            const ZSTD_longOffset_e isLongOffset,
+                            const int frame);
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
+static size_t
+ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
+                   const void* seqStart, size_t seqSize, int nbSeq,
+                   const ZSTD_longOffset_e isLongOffset,
+                   const int frame)
+{
+    DEBUGLOG(5, "ZSTD_decompressSequences");
+#if DYNAMIC_BMI2
+    if (dctx->bmi2) {
+        return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+    }
+#endif
+  return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+}
+#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
+
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
+/* ZSTD_decompressSequencesLong() :
+ * decompression function triggered when a minimum share of offsets is considered "long",
+ * aka out of cache.
+ * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance".
+ * This function will try to mitigate main memory latency through the use of prefetching */
+static size_t
+ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
+                             void* dst, size_t maxDstSize,
+                             const void* seqStart, size_t seqSize, int nbSeq,
+                             const ZSTD_longOffset_e isLongOffset,
+                             const int frame)
+{
+    DEBUGLOG(5, "ZSTD_decompressSequencesLong");
+#if DYNAMIC_BMI2
+    if (dctx->bmi2) {
+        return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+    }
+#endif
+  return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+}
+#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
+
+
+
+#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
+    !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
+/* ZSTD_getLongOffsetsShare() :
+ * condition : offTable must be valid
+ * @return : "share" of long offsets (arbitrarily defined as > (1<<23))
+ *           compared to maximum possible of (1<<OffFSELog) */
+static unsigned
+ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
+{
+    const void* ptr = offTable;
+    U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
+    const ZSTD_seqSymbol* table = offTable + 1;
+    U32 const max = 1 << tableLog;
+    U32 u, total = 0;
+    DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
+
+    assert(max <= (1 << OffFSELog));  /* max not too large */
+    for (u=0; u<max; u++) {
+        if (table[u].nbAdditionalBits > 22) total += 1;
+    }
+
+    assert(tableLog <= OffFSELog);
+    total <<= (OffFSELog - tableLog);  /* scale to OffFSELog */
+
+    return total;
+}
+#endif
+
+size_t
+ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
+                              void* dst, size_t dstCapacity,
+                        const void* src, size_t srcSize, const int frame)
+{   /* blockType == blockCompressed */
+    const BYTE* ip = (const BYTE*)src;
+    /* isLongOffset must be true if there are long offsets.
+     * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
+     * We don't expect that to be the case in 64-bit mode.
+     * In block mode, window size is not known, so we have to be conservative.
+     * (note: but it could be evaluated from current-lowLimit)
+     */
+    ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
+    DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
+
+    RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
+
+    /* Decode literals section */
+    {   size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
+        DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize);
+        if (ZSTD_isError(litCSize)) return litCSize;
+        ip += litCSize;
+        srcSize -= litCSize;
+    }
+
+    /* Build Decoding Tables */
+    {
+        /* These macros control at build-time which decompressor implementation
+         * we use. If neither is defined, we do some inspection and dispatch at
+         * runtime.
+         */
+#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
+    !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
+        int usePrefetchDecoder = dctx->ddictIsCold;
+#endif
+        int nbSeq;
+        size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
+        if (ZSTD_isError(seqHSize)) return seqHSize;
+        ip += seqHSize;
+        srcSize -= seqHSize;
+
+        RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
+
+#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
+    !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
+        if ( !usePrefetchDecoder
+          && (!frame || (dctx->fParams.windowSize > (1<<24)))
+          && (nbSeq>ADVANCED_SEQS) ) {  /* could probably use a larger nbSeq limit */
+            U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr);
+            U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
+            usePrefetchDecoder = (shareLongOffsets >= minShare);
+        }
+#endif
+
+        dctx->ddictIsCold = 0;
+
+#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
+    !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
+        if (usePrefetchDecoder)
+#endif
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
+            return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
+#endif
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
+        /* else */
+        return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
+#endif
+    }
+}
+
+
+void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
+{
+    if (dst != dctx->previousDstEnd && dstSize > 0) {   /* not contiguous */
+        dctx->dictEnd = dctx->previousDstEnd;
+        dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
+        dctx->prefixStart = dst;
+        dctx->previousDstEnd = dst;
+    }
+}
+
+
+size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
+                            void* dst, size_t dstCapacity,
+                      const void* src, size_t srcSize)
+{
+    size_t dSize;
+    ZSTD_checkContinuity(dctx, dst, dstCapacity);
+    dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0);
+    dctx->previousDstEnd = (char*)dst + dSize;
+    return dSize;
+}
+/**** ended inlining decompress/zstd_decompress_block.c ****/
+
+/**** start inlining dictBuilder/cover.c ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* *****************************************************************************
+ * Constructs a dictionary using a heuristic based on the following paper:
+ *
+ * Liao, Petri, Moffat, Wirth
+ * Effective Construction of Relative Lempel-Ziv Dictionaries
+ * Published in WWW 2016.
+ *
+ * Adapted from code originally written by @ot (Giuseppe Ottaviano).
+ ******************************************************************************/
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#include <stdio.h>  /* fprintf */
+#include <stdlib.h> /* malloc, free, qsort */
+#include <string.h> /* memset */
+#include <time.h>   /* clock */
+
+#ifndef ZDICT_STATIC_LINKING_ONLY
+#  define ZDICT_STATIC_LINKING_ONLY
+#endif
+
+/**** skipping file: ../common/mem.h ****/
+/**** skipping file: ../common/pool.h ****/
+/**** skipping file: ../common/threading.h ****/
+/**** skipping file: ../common/zstd_internal.h ****/
+/**** start inlining ../zdict.h ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef DICTBUILDER_H_001
+#define DICTBUILDER_H_001
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/*======  Dependencies  ======*/
+#include <stddef.h>  /* size_t */
+
+
+/* =====   ZDICTLIB_API : control library symbols visibility   ===== */
+#ifndef ZDICTLIB_VISIBILITY
+#  if defined(__GNUC__) && (__GNUC__ >= 4)
+#    define ZDICTLIB_VISIBILITY __attribute__ ((visibility ("default")))
+#  else
+#    define ZDICTLIB_VISIBILITY
+#  endif
+#endif
+#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
+#  define ZDICTLIB_API __declspec(dllexport) ZDICTLIB_VISIBILITY
+#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
+#  define ZDICTLIB_API __declspec(dllimport) ZDICTLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
+#else
+#  define ZDICTLIB_API ZDICTLIB_VISIBILITY
+#endif
+
+/*******************************************************************************
+ * Zstd dictionary builder
+ *
+ * FAQ
+ * ===
+ * Why should I use a dictionary?
+ * ------------------------------
+ *
+ * Zstd can use dictionaries to improve compression ratio of small data.
+ * Traditionally small files don't compress well because there is very little
+ * repetion in a single sample, since it is small. But, if you are compressing
+ * many similar files, like a bunch of JSON records that share the same
+ * structure, you can train a dictionary on ahead of time on some samples of
+ * these files. Then, zstd can use the dictionary to find repetitions that are
+ * present across samples. This can vastly improve compression ratio.
+ *
+ * When is a dictionary useful?
+ * ----------------------------
+ *
+ * Dictionaries are useful when compressing many small files that are similar.
+ * The larger a file is, the less benefit a dictionary will have. Generally,
+ * we don't expect dictionary compression to be effective past 100KB. And the
+ * smaller a file is, the more we would expect the dictionary to help.
+ *
+ * How do I use a dictionary?
+ * --------------------------
+ *
+ * Simply pass the dictionary to the zstd compressor with
+ * `ZSTD_CCtx_loadDictionary()`. The same dictionary must then be passed to
+ * the decompressor, using `ZSTD_DCtx_loadDictionary()`. There are other
+ * more advanced functions that allow selecting some options, see zstd.h for
+ * complete documentation.
+ *
+ * What is a zstd dictionary?
+ * --------------------------
+ *
+ * A zstd dictionary has two pieces: Its header, and its content. The header
+ * contains a magic number, the dictionary ID, and entropy tables. These
+ * entropy tables allow zstd to save on header costs in the compressed file,
+ * which really matters for small data. The content is just bytes, which are
+ * repeated content that is common across many samples.
+ *
+ * What is a raw content dictionary?
+ * ---------------------------------
+ *
+ * A raw content dictionary is just bytes. It doesn't have a zstd dictionary
+ * header, a dictionary ID, or entropy tables. Any buffer is a valid raw
+ * content dictionary.
+ *
+ * How do I train a dictionary?
+ * ----------------------------
+ *
+ * Gather samples from your use case. These samples should be similar to each
+ * other. If you have several use cases, you could try to train one dictionary
+ * per use case.
+ *
+ * Pass those samples to `ZDICT_trainFromBuffer()` and that will train your
+ * dictionary. There are a few advanced versions of this function, but this
+ * is a great starting point. If you want to further tune your dictionary
+ * you could try `ZDICT_optimizeTrainFromBuffer_cover()`. If that is too slow
+ * you can try `ZDICT_optimizeTrainFromBuffer_fastCover()`.
+ *
+ * If the dictionary training function fails, that is likely because you
+ * either passed too few samples, or a dictionary would not be effective
+ * for your data. Look at the messages that the dictionary trainer printed,
+ * if it doesn't say too few samples, then a dictionary would not be effective.
+ *
+ * How large should my dictionary be?
+ * ----------------------------------
+ *
+ * A reasonable dictionary size, the `dictBufferCapacity`, is about 100KB.
+ * The zstd CLI defaults to a 110KB dictionary. You likely don't need a
+ * dictionary larger than that. But, most use cases can get away with a
+ * smaller dictionary. The advanced dictionary builders can automatically
+ * shrink the dictionary for you, and select a the smallest size that
+ * doesn't hurt compression ratio too much. See the `shrinkDict` parameter.
+ * A smaller dictionary can save memory, and potentially speed up
+ * compression.
+ *
+ * How many samples should I provide to the dictionary builder?
+ * ------------------------------------------------------------
+ *
+ * We generally recommend passing ~100x the size of the dictionary
+ * in samples. A few thousand should suffice. Having too few samples
+ * can hurt the dictionaries effectiveness. Having more samples will
+ * only improve the dictionaries effectiveness. But having too many
+ * samples can slow down the dictionary builder.
+ *
+ * How do I determine if a dictionary will be effective?
+ * -----------------------------------------------------
+ *
+ * Simply train a dictionary and try it out. You can use zstd's built in
+ * benchmarking tool to test the dictionary effectiveness.
+ *
+ *   # Benchmark levels 1-3 without a dictionary
+ *   zstd -b1e3 -r /path/to/my/files
+ *   # Benchmark levels 1-3 with a dictioanry
+ *   zstd -b1e3 -r /path/to/my/files -D /path/to/my/dictionary
+ *
+ * When should I retrain a dictionary?
+ * -----------------------------------
+ *
+ * You should retrain a dictionary when its effectiveness drops. Dictionary
+ * effectiveness drops as the data you are compressing changes. Generally, we do
+ * expect dictionaries to "decay" over time, as your data changes, but the rate
+ * at which they decay depends on your use case. Internally, we regularly
+ * retrain dictionaries, and if the new dictionary performs significantly
+ * better than the old dictionary, we will ship the new dictionary.
+ *
+ * I have a raw content dictionary, how do I turn it into a zstd dictionary?
+ * -------------------------------------------------------------------------
+ *
+ * If you have a raw content dictionary, e.g. by manually constructing it, or
+ * using a third-party dictionary builder, you can turn it into a zstd
+ * dictionary by using `ZDICT_finalizeDictionary()`. You'll also have to
+ * provide some samples of the data. It will add the zstd header to the
+ * raw content, which contains a dictionary ID and entropy tables, which
+ * will improve compression ratio, and allow zstd to write the dictionary ID
+ * into the frame, if you so choose.
+ *
+ * Do I have to use zstd's dictionary builder?
+ * -------------------------------------------
+ *
+ * No! You can construct dictionary content however you please, it is just
+ * bytes. It will always be valid as a raw content dictionary. If you want
+ * a zstd dictionary, which can improve compression ratio, use
+ * `ZDICT_finalizeDictionary()`.
+ *
+ * What is the attack surface of a zstd dictionary?
+ * ------------------------------------------------
+ *
+ * Zstd is heavily fuzz tested, including loading fuzzed dictionaries, so
+ * zstd should never crash, or access out-of-bounds memory no matter what
+ * the dictionary is. However, if an attacker can control the dictionary
+ * during decompression, they can cause zstd to generate arbitrary bytes,
+ * just like if they controlled the compressed data.
+ *
+ ******************************************************************************/
+
+
+/*! ZDICT_trainFromBuffer():
+ *  Train a dictionary from an array of samples.
+ *  Redirect towards ZDICT_optimizeTrainFromBuffer_fastCover() single-threaded, with d=8, steps=4,
+ *  f=20, and accel=1.
+ *  Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
+ *  supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
+ *  The resulting dictionary will be saved into `dictBuffer`.
+ * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
+ *          or an error code, which can be tested with ZDICT_isError().
+ *  Note:  Dictionary training will fail if there are not enough samples to construct a
+ *         dictionary, or if most of the samples are too small (< 8 bytes being the lower limit).
+ *         If dictionary training fails, you should use zstd without a dictionary, as the dictionary
+ *         would've been ineffective anyways. If you believe your samples would benefit from a dictionary
+ *         please open an issue with details, and we can look into it.
+ *  Note: ZDICT_trainFromBuffer()'s memory usage is about 6 MB.
+ *  Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
+ *        It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
+ *        In general, it's recommended to provide a few thousands samples, though this can vary a lot.
+ *        It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
+ */
+ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
+                                    const void* samplesBuffer,
+                                    const size_t* samplesSizes, unsigned nbSamples);
+
+typedef struct {
+    int      compressionLevel;   /*< optimize for a specific zstd compression level; 0 means default */
+    unsigned notificationLevel;  /*< Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
+    unsigned dictID;             /*< force dictID value; 0 means auto mode (32-bits random value)
+                                  *   NOTE: The zstd format reserves some dictionary IDs for future use.
+                                  *         You may use them in private settings, but be warned that they
+                                  *         may be used by zstd in a public dictionary registry in the future.
+                                  *         These dictionary IDs are:
+                                  *           - low range  : <= 32767
+                                  *           - high range : >= (2^31)
+                                  */
+} ZDICT_params_t;
+
+/*! ZDICT_finalizeDictionary():
+ * Given a custom content as a basis for dictionary, and a set of samples,
+ * finalize dictionary by adding headers and statistics according to the zstd
+ * dictionary format.
+ *
+ * Samples must be stored concatenated in a flat buffer `samplesBuffer`,
+ * supplied with an array of sizes `samplesSizes`, providing the size of each
+ * sample in order. The samples are used to construct the statistics, so they
+ * should be representative of what you will compress with this dictionary.
+ *
+ * The compression level can be set in `parameters`. You should pass the
+ * compression level you expect to use in production. The statistics for each
+ * compression level differ, so tuning the dictionary for the compression level
+ * can help quite a bit.
+ *
+ * You can set an explicit dictionary ID in `parameters`, or allow us to pick
+ * a random dictionary ID for you, but we can't guarantee no collisions.
+ *
+ * The dstDictBuffer and the dictContent may overlap, and the content will be
+ * appended to the end of the header. If the header + the content doesn't fit in
+ * maxDictSize the beginning of the content is truncated to make room, since it
+ * is presumed that the most profitable content is at the end of the dictionary,
+ * since that is the cheapest to reference.
+ *
+ * `dictContentSize` must be >= ZDICT_CONTENTSIZE_MIN bytes.
+ * `maxDictSize` must be >= max(dictContentSize, ZSTD_DICTSIZE_MIN).
+ *
+ * @return: size of dictionary stored into `dstDictBuffer` (<= `maxDictSize`),
+ *          or an error code, which can be tested by ZDICT_isError().
+ * Note: ZDICT_finalizeDictionary() will push notifications into stderr if
+ *       instructed to, using notificationLevel>0.
+ * NOTE: This function currently may fail in several edge cases including:
+ *         * Not enough samples
+ *         * Samples are uncompressible
+ *         * Samples are all exactly the same
+ */
+ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dstDictBuffer, size_t maxDictSize,
+                                const void* dictContent, size_t dictContentSize,
+                                const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
+                                ZDICT_params_t parameters);
+
+
+/*======   Helper functions   ======*/
+ZDICTLIB_API unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize);  /**< extracts dictID; @return zero if error (not a valid dictionary) */
+ZDICTLIB_API size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize);  /* returns dict header size; returns a ZSTD error code on failure */
+ZDICTLIB_API unsigned ZDICT_isError(size_t errorCode);
+ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
+
+
+
+#ifdef ZDICT_STATIC_LINKING_ONLY
+
+/* ====================================================================================
+ * The definitions in this section are considered experimental.
+ * They should never be used with a dynamic library, as they may change in the future.
+ * They are provided for advanced usages.
+ * Use them only in association with static linking.
+ * ==================================================================================== */
+
+#define ZDICT_CONTENTSIZE_MIN 128
+#define ZDICT_DICTSIZE_MIN    256
+
+/*! ZDICT_cover_params_t:
+ *  k and d are the only required parameters.
+ *  For others, value 0 means default.
+ */
+typedef struct {
+    unsigned k;                  /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
+    unsigned d;                  /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
+    unsigned steps;              /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */
+    unsigned nbThreads;          /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
+    double splitPoint;           /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (1.0), 1.0 when all samples are used for both training and testing */
+    unsigned shrinkDict;         /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking  */
+    unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */
+    ZDICT_params_t zParams;
+} ZDICT_cover_params_t;
+
+typedef struct {
+    unsigned k;                  /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
+    unsigned d;                  /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
+    unsigned f;                  /* log of size of frequency array : constraint: 0 < f <= 31 : 1 means default(20)*/
+    unsigned steps;              /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */
+    unsigned nbThreads;          /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
+    double splitPoint;           /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (0.75), 1.0 when all samples are used for both training and testing */
+    unsigned accel;              /* Acceleration level: constraint: 0 < accel <= 10, higher means faster and less accurate, 0 means default(1) */
+    unsigned shrinkDict;         /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking  */
+    unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */
+
+    ZDICT_params_t zParams;
+} ZDICT_fastCover_params_t;
+
+/*! ZDICT_trainFromBuffer_cover():
+ *  Train a dictionary from an array of samples using the COVER algorithm.
+ *  Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
+ *  supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
+ *  The resulting dictionary will be saved into `dictBuffer`.
+ * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
+ *          or an error code, which can be tested with ZDICT_isError().
+ *          See ZDICT_trainFromBuffer() for details on failure modes.
+ *  Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte.
+ *  Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
+ *        It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
+ *        In general, it's recommended to provide a few thousands samples, though this can vary a lot.
+ *        It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
+ */
+ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
+          void *dictBuffer, size_t dictBufferCapacity,
+    const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
+          ZDICT_cover_params_t parameters);
+
+/*! ZDICT_optimizeTrainFromBuffer_cover():
+ * The same requirements as above hold for all the parameters except `parameters`.
+ * This function tries many parameter combinations and picks the best parameters.
+ * `*parameters` is filled with the best parameters found,
+ * dictionary constructed with those parameters is stored in `dictBuffer`.
+ *
+ * All of the parameters d, k, steps are optional.
+ * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8}.
+ * if steps is zero it defaults to its default value.
+ * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000].
+ *
+ * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
+ *          or an error code, which can be tested with ZDICT_isError().
+ *          On success `*parameters` contains the parameters selected.
+ *          See ZDICT_trainFromBuffer() for details on failure modes.
+ * Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
+ */
+ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
+          void* dictBuffer, size_t dictBufferCapacity,
+    const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
+          ZDICT_cover_params_t* parameters);
+
+/*! ZDICT_trainFromBuffer_fastCover():
+ *  Train a dictionary from an array of samples using a modified version of COVER algorithm.
+ *  Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
+ *  supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
+ *  d and k are required.
+ *  All other parameters are optional, will use default values if not provided
+ *  The resulting dictionary will be saved into `dictBuffer`.
+ * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
+ *          or an error code, which can be tested with ZDICT_isError().
+ *          See ZDICT_trainFromBuffer() for details on failure modes.
+ *  Note: ZDICT_trainFromBuffer_fastCover() requires 6 * 2^f bytes of memory.
+ *  Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
+ *        It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
+ *        In general, it's recommended to provide a few thousands samples, though this can vary a lot.
+ *        It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
+ */
+ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(void *dictBuffer,
+                    size_t dictBufferCapacity, const void *samplesBuffer,
+                    const size_t *samplesSizes, unsigned nbSamples,
+                    ZDICT_fastCover_params_t parameters);
+
+/*! ZDICT_optimizeTrainFromBuffer_fastCover():
+ * The same requirements as above hold for all the parameters except `parameters`.
+ * This function tries many parameter combinations (specifically, k and d combinations)
+ * and picks the best parameters. `*parameters` is filled with the best parameters found,
+ * dictionary constructed with those parameters is stored in `dictBuffer`.
+ * All of the parameters d, k, steps, f, and accel are optional.
+ * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8}.
+ * if steps is zero it defaults to its default value.
+ * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000].
+ * If f is zero, default value of 20 is used.
+ * If accel is zero, default value of 1 is used.
+ *
+ * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
+ *          or an error code, which can be tested with ZDICT_isError().
+ *          On success `*parameters` contains the parameters selected.
+ *          See ZDICT_trainFromBuffer() for details on failure modes.
+ * Note: ZDICT_optimizeTrainFromBuffer_fastCover() requires about 6 * 2^f bytes of memory for each thread.
+ */
+ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer,
+                    size_t dictBufferCapacity, const void* samplesBuffer,
+                    const size_t* samplesSizes, unsigned nbSamples,
+                    ZDICT_fastCover_params_t* parameters);
+
+typedef struct {
+    unsigned selectivityLevel;   /* 0 means default; larger => select more => larger dictionary */
+    ZDICT_params_t zParams;
+} ZDICT_legacy_params_t;
+
+/*! ZDICT_trainFromBuffer_legacy():
+ *  Train a dictionary from an array of samples.
+ *  Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
+ *  supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
+ *  The resulting dictionary will be saved into `dictBuffer`.
+ * `parameters` is optional and can be provided with values set to 0 to mean "default".
+ * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
+ *          or an error code, which can be tested with ZDICT_isError().
+ *          See ZDICT_trainFromBuffer() for details on failure modes.
+ *  Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
+ *        It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
+ *        In general, it's recommended to provide a few thousands samples, though this can vary a lot.
+ *        It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
+ *  Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0.
+ */
+ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
+    void* dictBuffer, size_t dictBufferCapacity,
+    const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
+    ZDICT_legacy_params_t parameters);
+
+
+/* Deprecation warnings */
+/* It is generally possible to disable deprecation warnings from compiler,
+   for example with -Wno-deprecated-declarations for gcc
+   or _CRT_SECURE_NO_WARNINGS in Visual.
+   Otherwise, it's also possible to manually define ZDICT_DISABLE_DEPRECATE_WARNINGS */
+#ifdef ZDICT_DISABLE_DEPRECATE_WARNINGS
+#  define ZDICT_DEPRECATED(message) ZDICTLIB_API   /* disable deprecation warnings */
+#else
+#  define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#  if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
+#    define ZDICT_DEPRECATED(message) [[deprecated(message)]] ZDICTLIB_API
+#  elif defined(__clang__) || (ZDICT_GCC_VERSION >= 405)
+#    define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated(message)))
+#  elif (ZDICT_GCC_VERSION >= 301)
+#    define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated))
+#  elif defined(_MSC_VER)
+#    define ZDICT_DEPRECATED(message) ZDICTLIB_API __declspec(deprecated(message))
+#  else
+#    pragma message("WARNING: You need to implement ZDICT_DEPRECATED for this compiler")
+#    define ZDICT_DEPRECATED(message) ZDICTLIB_API
+#  endif
+#endif /* ZDICT_DISABLE_DEPRECATE_WARNINGS */
+
+ZDICT_DEPRECATED("use ZDICT_finalizeDictionary() instead")
+size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
+                                  const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
+
+
+#endif   /* ZDICT_STATIC_LINKING_ONLY */
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif   /* DICTBUILDER_H_001 */
+/**** ended inlining ../zdict.h ****/
+/**** start inlining cover.h ****/
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZDICT_STATIC_LINKING_ONLY
+#  define ZDICT_STATIC_LINKING_ONLY
+#endif
+
+#include <stdio.h>  /* fprintf */
+#include <stdlib.h> /* malloc, free, qsort */
+#include <string.h> /* memset */
+#include <time.h>   /* clock */
+/**** skipping file: ../common/mem.h ****/
+/**** skipping file: ../common/pool.h ****/
+/**** skipping file: ../common/threading.h ****/
+/**** skipping file: ../common/zstd_internal.h ****/
+/**** skipping file: ../zdict.h ****/
+
+/**
+ * COVER_best_t is used for two purposes:
+ * 1. Synchronizing threads.
+ * 2. Saving the best parameters and dictionary.
+ *
+ * All of the methods except COVER_best_init() are thread safe if zstd is
+ * compiled with multithreaded support.
+ */
+typedef struct COVER_best_s {
+  ZSTD_pthread_mutex_t mutex;
+  ZSTD_pthread_cond_t cond;
+  size_t liveJobs;
+  void *dict;
+  size_t dictSize;
+  ZDICT_cover_params_t parameters;
+  size_t compressedSize;
+} COVER_best_t;
+
+/**
+ * A segment is a range in the source as well as the score of the segment.
+ */
+typedef struct {
+  U32 begin;
+  U32 end;
+  U32 score;
+} COVER_segment_t;
+
+/**
+ *Number of epochs and size of each epoch.
+ */
+typedef struct {
+  U32 num;
+  U32 size;
+} COVER_epoch_info_t;
+
+/**
+ * Struct used for the dictionary selection function.
+ */
+typedef struct COVER_dictSelection {
+  BYTE* dictContent;
+  size_t dictSize;
+  size_t totalCompressedSize;
+} COVER_dictSelection_t;
+
+/**
+ * Computes the number of epochs and the size of each epoch.
+ * We will make sure that each epoch gets at least 10 * k bytes.
+ *
+ * The COVER algorithms divide the data up into epochs of equal size and
+ * select one segment from each epoch.
+ *
+ * @param maxDictSize The maximum allowed dictionary size.
+ * @param nbDmers     The number of dmers we are training on.
+ * @param k           The parameter k (segment size).
+ * @param passes      The target number of passes over the dmer corpus.
+ *                    More passes means a better dictionary.
+ */
+COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize, U32 nbDmers,
+                                       U32 k, U32 passes);
+
+/**
+ * Warns the user when their corpus is too small.
+ */
+void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel);
+
+/**
+ *  Checks total compressed size of a dictionary
+ */
+size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters,
+                                      const size_t *samplesSizes, const BYTE *samples,
+                                      size_t *offsets,
+                                      size_t nbTrainSamples, size_t nbSamples,
+                                      BYTE *const dict, size_t dictBufferCapacity);
+
+/**
+ * Returns the sum of the sample sizes.
+ */
+size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) ;
+
+/**
+ * Initialize the `COVER_best_t`.
+ */
+void COVER_best_init(COVER_best_t *best);
+
+/**
+ * Wait until liveJobs == 0.
+ */
+void COVER_best_wait(COVER_best_t *best);
+
+/**
+ * Call COVER_best_wait() and then destroy the COVER_best_t.
+ */
+void COVER_best_destroy(COVER_best_t *best);
+
+/**
+ * Called when a thread is about to be launched.
+ * Increments liveJobs.
+ */
+void COVER_best_start(COVER_best_t *best);
+
+/**
+ * Called when a thread finishes executing, both on error or success.
+ * Decrements liveJobs and signals any waiting threads if liveJobs == 0.
+ * If this dictionary is the best so far save it and its parameters.
+ */
+void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
+                       COVER_dictSelection_t selection);
+/**
+ * Error function for COVER_selectDict function. Checks if the return
+ * value is an error.
+ */
+unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection);
+
+ /**
+  * Error function for COVER_selectDict function. Returns a struct where
+  * return.totalCompressedSize is a ZSTD error.
+  */
+COVER_dictSelection_t COVER_dictSelectionError(size_t error);
+
+/**
+ * Always call after selectDict is called to free up used memory from
+ * newly created dictionary.
+ */
+void COVER_dictSelectionFree(COVER_dictSelection_t selection);
+
+/**
+ * Called to finalize the dictionary and select one based on whether or not
+ * the shrink-dict flag was enabled. If enabled the dictionary used is the
+ * smallest dictionary within a specified regression of the compressed size
+ * from the largest dictionary.
+ */
+ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity,
+                       size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
+                       size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize);
+/**** ended inlining cover.h ****/
+
+/*-*************************************
+*  Constants
+***************************************/
+#define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
+#define COVER_DEFAULT_SPLITPOINT 1.0
+
+/*-*************************************
+*  Console display
+***************************************/
+#ifndef LOCALDISPLAYLEVEL
+static int g_displayLevel = 2;
+#endif
+#undef  DISPLAY
+#define DISPLAY(...)                                                           \
+  {                                                                            \
+    fprintf(stderr, __VA_ARGS__);                                              \
+    fflush(stderr);                                                            \
+  }
+#undef  LOCALDISPLAYLEVEL
+#define LOCALDISPLAYLEVEL(displayLevel, l, ...)                                \
+  if (displayLevel >= l) {                                                     \
+    DISPLAY(__VA_ARGS__);                                                      \
+  } /* 0 : no display;   1: errors;   2: default;  3: details;  4: debug */
+#undef  DISPLAYLEVEL
+#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
+
+#ifndef LOCALDISPLAYUPDATE
+static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100;
+static clock_t g_time = 0;
+#endif
+#undef  LOCALDISPLAYUPDATE
+#define LOCALDISPLAYUPDATE(displayLevel, l, ...)                               \
+  if (displayLevel >= l) {                                                     \
+    if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) {             \
+      g_time = clock();                                                        \
+      DISPLAY(__VA_ARGS__);                                                    \
+    }                                                                          \
+  }
+#undef  DISPLAYUPDATE
+#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
+
+/*-*************************************
+* Hash table
+***************************************
+* A small specialized hash map for storing activeDmers.
+* The map does not resize, so if it becomes full it will loop forever.
+* Thus, the map must be large enough to store every value.
+* The map implements linear probing and keeps its load less than 0.5.
+*/
+
+#define MAP_EMPTY_VALUE ((U32)-1)
+typedef struct COVER_map_pair_t_s {
+  U32 key;
+  U32 value;
+} COVER_map_pair_t;
+
+typedef struct COVER_map_s {
+  COVER_map_pair_t *data;
+  U32 sizeLog;
+  U32 size;
+  U32 sizeMask;
+} COVER_map_t;
+
+/**
+ * Clear the map.
+ */
+static void COVER_map_clear(COVER_map_t *map) {
+  memset(map->data, MAP_EMPTY_VALUE, map->size * sizeof(COVER_map_pair_t));
+}
+
+/**
+ * Initializes a map of the given size.
+ * Returns 1 on success and 0 on failure.
+ * The map must be destroyed with COVER_map_destroy().
+ * The map is only guaranteed to be large enough to hold size elements.
+ */
+static int COVER_map_init(COVER_map_t *map, U32 size) {
+  map->sizeLog = ZSTD_highbit32(size) + 2;
+  map->size = (U32)1 << map->sizeLog;
+  map->sizeMask = map->size - 1;
+  map->data = (COVER_map_pair_t *)malloc(map->size * sizeof(COVER_map_pair_t));
+  if (!map->data) {
+    map->sizeLog = 0;
+    map->size = 0;
+    return 0;
+  }
+  COVER_map_clear(map);
+  return 1;
+}
+
+/**
+ * Internal hash function
+ */
+static const U32 COVER_prime4bytes = 2654435761U;
+static U32 COVER_map_hash(COVER_map_t *map, U32 key) {
+  return (key * COVER_prime4bytes) >> (32 - map->sizeLog);
+}
+
+/**
+ * Helper function that returns the index that a key should be placed into.
+ */
+static U32 COVER_map_index(COVER_map_t *map, U32 key) {
+  const U32 hash = COVER_map_hash(map, key);
+  U32 i;
+  for (i = hash;; i = (i + 1) & map->sizeMask) {
+    COVER_map_pair_t *pos = &map->data[i];
+    if (pos->value == MAP_EMPTY_VALUE) {
+      return i;
+    }
+    if (pos->key == key) {
+      return i;
+    }
+  }
+}
+
+/**
+ * Returns the pointer to the value for key.
+ * If key is not in the map, it is inserted and the value is set to 0.
+ * The map must not be full.
+ */
+static U32 *COVER_map_at(COVER_map_t *map, U32 key) {
+  COVER_map_pair_t *pos = &map->data[COVER_map_index(map, key)];
+  if (pos->value == MAP_EMPTY_VALUE) {
+    pos->key = key;
+    pos->value = 0;
+  }
+  return &pos->value;
+}
+
+/**
+ * Deletes key from the map if present.
+ */
+static void COVER_map_remove(COVER_map_t *map, U32 key) {
+  U32 i = COVER_map_index(map, key);
+  COVER_map_pair_t *del = &map->data[i];
+  U32 shift = 1;
+  if (del->value == MAP_EMPTY_VALUE) {
+    return;
+  }
+  for (i = (i + 1) & map->sizeMask;; i = (i + 1) & map->sizeMask) {
+    COVER_map_pair_t *const pos = &map->data[i];
+    /* If the position is empty we are done */
+    if (pos->value == MAP_EMPTY_VALUE) {
+      del->value = MAP_EMPTY_VALUE;
+      return;
+    }
+    /* If pos can be moved to del do so */
+    if (((i - COVER_map_hash(map, pos->key)) & map->sizeMask) >= shift) {
+      del->key = pos->key;
+      del->value = pos->value;
+      del = pos;
+      shift = 1;
+    } else {
+      ++shift;
+    }
+  }
+}
+
+/**
+ * Destroys a map that is inited with COVER_map_init().
+ */
+static void COVER_map_destroy(COVER_map_t *map) {
+  if (map->data) {
+    free(map->data);
+  }
+  map->data = NULL;
+  map->size = 0;
+}
+
+/*-*************************************
+* Context
+***************************************/
+
+typedef struct {
+  const BYTE *samples;
+  size_t *offsets;
+  const size_t *samplesSizes;
+  size_t nbSamples;
+  size_t nbTrainSamples;
+  size_t nbTestSamples;
+  U32 *suffix;
+  size_t suffixSize;
+  U32 *freqs;
+  U32 *dmerAt;
+  unsigned d;
+} COVER_ctx_t;
+
+/* We need a global context for qsort... */
+static COVER_ctx_t *g_coverCtx = NULL;
+
+/*-*************************************
+*  Helper functions
+***************************************/
+
+/**
+ * Returns the sum of the sample sizes.
+ */
+size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) {
+  size_t sum = 0;
+  unsigned i;
+  for (i = 0; i < nbSamples; ++i) {
+    sum += samplesSizes[i];
+  }
+  return sum;
+}
+
+/**
+ * Returns -1 if the dmer at lp is less than the dmer at rp.
+ * Return 0 if the dmers at lp and rp are equal.
+ * Returns 1 if the dmer at lp is greater than the dmer at rp.
+ */
+static int COVER_cmp(COVER_ctx_t *ctx, const void *lp, const void *rp) {
+  U32 const lhs = *(U32 const *)lp;
+  U32 const rhs = *(U32 const *)rp;
+  return memcmp(ctx->samples + lhs, ctx->samples + rhs, ctx->d);
+}
+/**
+ * Faster version for d <= 8.
+ */
+static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) {
+  U64 const mask = (ctx->d == 8) ? (U64)-1 : (((U64)1 << (8 * ctx->d)) - 1);
+  U64 const lhs = MEM_readLE64(ctx->samples + *(U32 const *)lp) & mask;
+  U64 const rhs = MEM_readLE64(ctx->samples + *(U32 const *)rp) & mask;
+  if (lhs < rhs) {
+    return -1;
+  }
+  return (lhs > rhs);
+}
+
+/**
+ * Same as COVER_cmp() except ties are broken by pointer value
+ * NOTE: g_coverCtx must be set to call this function.  A global is required because
+ * qsort doesn't take an opaque pointer.
+ */
+static int WIN_CDECL COVER_strict_cmp(const void *lp, const void *rp) {
+  int result = COVER_cmp(g_coverCtx, lp, rp);
+  if (result == 0) {
+    result = lp < rp ? -1 : 1;
+  }
+  return result;
+}
+/**
+ * Faster version for d <= 8.
+ */
+static int WIN_CDECL COVER_strict_cmp8(const void *lp, const void *rp) {
+  int result = COVER_cmp8(g_coverCtx, lp, rp);
+  if (result == 0) {
+    result = lp < rp ? -1 : 1;
+  }
+  return result;
+}
+
+/**
+ * Returns the first pointer in [first, last) whose element does not compare
+ * less than value.  If no such element exists it returns last.
+ */
+static const size_t *COVER_lower_bound(const size_t *first, const size_t *last,
+                                       size_t value) {
+  size_t count = last - first;
+  while (count != 0) {
+    size_t step = count / 2;
+    const size_t *ptr = first;
+    ptr += step;
+    if (*ptr < value) {
+      first = ++ptr;
+      count -= step + 1;
+    } else {
+      count = step;
+    }
+  }
+  return first;
+}
+
+/**
+ * Generic groupBy function.
+ * Groups an array sorted by cmp into groups with equivalent values.
+ * Calls grp for each group.
+ */
+static void
+COVER_groupBy(const void *data, size_t count, size_t size, COVER_ctx_t *ctx,
+              int (*cmp)(COVER_ctx_t *, const void *, const void *),
+              void (*grp)(COVER_ctx_t *, const void *, const void *)) {
+  const BYTE *ptr = (const BYTE *)data;
+  size_t num = 0;
+  while (num < count) {
+    const BYTE *grpEnd = ptr + size;
+    ++num;
+    while (num < count && cmp(ctx, ptr, grpEnd) == 0) {
+      grpEnd += size;
+      ++num;
+    }
+    grp(ctx, ptr, grpEnd);
+    ptr = grpEnd;
+  }
+}
+
+/*-*************************************
+*  Cover functions
+***************************************/
+
+/**
+ * Called on each group of positions with the same dmer.
+ * Counts the frequency of each dmer and saves it in the suffix array.
+ * Fills `ctx->dmerAt`.
+ */
+static void COVER_group(COVER_ctx_t *ctx, const void *group,
+                        const void *groupEnd) {
+  /* The group consists of all the positions with the same first d bytes. */
+  const U32 *grpPtr = (const U32 *)group;
+  const U32 *grpEnd = (const U32 *)groupEnd;
+  /* The dmerId is how we will reference this dmer.
+   * This allows us to map the whole dmer space to a much smaller space, the
+   * size of the suffix array.
+   */
+  const U32 dmerId = (U32)(grpPtr - ctx->suffix);
+  /* Count the number of samples this dmer shows up in */
+  U32 freq = 0;
+  /* Details */
+  const size_t *curOffsetPtr = ctx->offsets;
+  const size_t *offsetsEnd = ctx->offsets + ctx->nbSamples;
+  /* Once *grpPtr >= curSampleEnd this occurrence of the dmer is in a
+   * different sample than the last.
+   */
+  size_t curSampleEnd = ctx->offsets[0];
+  for (; grpPtr != grpEnd; ++grpPtr) {
+    /* Save the dmerId for this position so we can get back to it. */
+    ctx->dmerAt[*grpPtr] = dmerId;
+    /* Dictionaries only help for the first reference to the dmer.
+     * After that zstd can reference the match from the previous reference.
+     * So only count each dmer once for each sample it is in.
+     */
+    if (*grpPtr < curSampleEnd) {
+      continue;
+    }
+    freq += 1;
+    /* Binary search to find the end of the sample *grpPtr is in.
+     * In the common case that grpPtr + 1 == grpEnd we can skip the binary
+     * search because the loop is over.
+     */
+    if (grpPtr + 1 != grpEnd) {
+      const size_t *sampleEndPtr =
+          COVER_lower_bound(curOffsetPtr, offsetsEnd, *grpPtr);
+      curSampleEnd = *sampleEndPtr;
+      curOffsetPtr = sampleEndPtr + 1;
+    }
+  }
+  /* At this point we are never going to look at this segment of the suffix
+   * array again.  We take advantage of this fact to save memory.
+   * We store the frequency of the dmer in the first position of the group,
+   * which is dmerId.
+   */
+  ctx->suffix[dmerId] = freq;
+}
+
+
+/**
+ * Selects the best segment in an epoch.
+ * Segments of are scored according to the function:
+ *
+ * Let F(d) be the frequency of dmer d.
+ * Let S_i be the dmer at position i of segment S which has length k.
+ *
+ *     Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1})
+ *
+ * Once the dmer d is in the dictionary we set F(d) = 0.
+ */
+static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
+                                           COVER_map_t *activeDmers, U32 begin,
+                                           U32 end,
+                                           ZDICT_cover_params_t parameters) {
+  /* Constants */
+  const U32 k = parameters.k;
+  const U32 d = parameters.d;
+  const U32 dmersInK = k - d + 1;
+  /* Try each segment (activeSegment) and save the best (bestSegment) */
+  COVER_segment_t bestSegment = {0, 0, 0};
+  COVER_segment_t activeSegment;
+  /* Reset the activeDmers in the segment */
+  COVER_map_clear(activeDmers);
+  /* The activeSegment starts at the beginning of the epoch. */
+  activeSegment.begin = begin;
+  activeSegment.end = begin;
+  activeSegment.score = 0;
+  /* Slide the activeSegment through the whole epoch.
+   * Save the best segment in bestSegment.
+   */
+  while (activeSegment.end < end) {
+    /* The dmerId for the dmer at the next position */
+    U32 newDmer = ctx->dmerAt[activeSegment.end];
+    /* The entry in activeDmers for this dmerId */
+    U32 *newDmerOcc = COVER_map_at(activeDmers, newDmer);
+    /* If the dmer isn't already present in the segment add its score. */
+    if (*newDmerOcc == 0) {
+      /* The paper suggest using the L-0.5 norm, but experiments show that it
+       * doesn't help.
+       */
+      activeSegment.score += freqs[newDmer];
+    }
+    /* Add the dmer to the segment */
+    activeSegment.end += 1;
+    *newDmerOcc += 1;
+
+    /* If the window is now too large, drop the first position */
+    if (activeSegment.end - activeSegment.begin == dmersInK + 1) {
+      U32 delDmer = ctx->dmerAt[activeSegment.begin];
+      U32 *delDmerOcc = COVER_map_at(activeDmers, delDmer);
+      activeSegment.begin += 1;
+      *delDmerOcc -= 1;
+      /* If this is the last occurrence of the dmer, subtract its score */
+      if (*delDmerOcc == 0) {
+        COVER_map_remove(activeDmers, delDmer);
+        activeSegment.score -= freqs[delDmer];
+      }
+    }
+
+    /* If this segment is the best so far save it */
+    if (activeSegment.score > bestSegment.score) {
+      bestSegment = activeSegment;
+    }
+  }
+  {
+    /* Trim off the zero frequency head and tail from the segment. */
+    U32 newBegin = bestSegment.end;
+    U32 newEnd = bestSegment.begin;
+    U32 pos;
+    for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
+      U32 freq = freqs[ctx->dmerAt[pos]];
+      if (freq != 0) {
+        newBegin = MIN(newBegin, pos);
+        newEnd = pos + 1;
+      }
+    }
+    bestSegment.begin = newBegin;
+    bestSegment.end = newEnd;
+  }
+  {
+    /* Zero out the frequency of each dmer covered by the chosen segment. */
+    U32 pos;
+    for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
+      freqs[ctx->dmerAt[pos]] = 0;
+    }
+  }
+  return bestSegment;
+}
+
+/**
+ * Check the validity of the parameters.
+ * Returns non-zero if the parameters are valid and 0 otherwise.
+ */
+static int COVER_checkParameters(ZDICT_cover_params_t parameters,
+                                 size_t maxDictSize) {
+  /* k and d are required parameters */
+  if (parameters.d == 0 || parameters.k == 0) {
+    return 0;
+  }
+  /* k <= maxDictSize */
+  if (parameters.k > maxDictSize) {
+    return 0;
+  }
+  /* d <= k */
+  if (parameters.d > parameters.k) {
+    return 0;
+  }
+  /* 0 < splitPoint <= 1 */
+  if (parameters.splitPoint <= 0 || parameters.splitPoint > 1){
+    return 0;
+  }
+  return 1;
+}
+
+/**
+ * Clean up a context initialized with `COVER_ctx_init()`.
+ */
+static void COVER_ctx_destroy(COVER_ctx_t *ctx) {
+  if (!ctx) {
+    return;
+  }
+  if (ctx->suffix) {
+    free(ctx->suffix);
+    ctx->suffix = NULL;
+  }
+  if (ctx->freqs) {
+    free(ctx->freqs);
+    ctx->freqs = NULL;
+  }
+  if (ctx->dmerAt) {
+    free(ctx->dmerAt);
+    ctx->dmerAt = NULL;
+  }
+  if (ctx->offsets) {
+    free(ctx->offsets);
+    ctx->offsets = NULL;
+  }
+}
+
+/**
+ * Prepare a context for dictionary building.
+ * The context is only dependent on the parameter `d` and can used multiple
+ * times.
+ * Returns 0 on success or error code on error.
+ * The context must be destroyed with `COVER_ctx_destroy()`.
+ */
+static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
+                          const size_t *samplesSizes, unsigned nbSamples,
+                          unsigned d, double splitPoint) {
+  const BYTE *const samples = (const BYTE *)samplesBuffer;
+  const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples);
+  /* Split samples into testing and training sets */
+  const unsigned nbTrainSamples = splitPoint < 1.0 ? (unsigned)((double)nbSamples * splitPoint) : nbSamples;
+  const unsigned nbTestSamples = splitPoint < 1.0 ? nbSamples - nbTrainSamples : nbSamples;
+  const size_t trainingSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes, nbTrainSamples) : totalSamplesSize;
+  const size_t testSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes + nbTrainSamples, nbTestSamples) : totalSamplesSize;
+  /* Checks */
+  if (totalSamplesSize < MAX(d, sizeof(U64)) ||
+      totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
+    DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
+                 (unsigned)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20));
+    return ERROR(srcSize_wrong);
+  }
+  /* Check if there are at least 5 training samples */
+  if (nbTrainSamples < 5) {
+    DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples);
+    return ERROR(srcSize_wrong);
+  }
+  /* Check if there's testing sample */
+  if (nbTestSamples < 1) {
+    DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples);
+    return ERROR(srcSize_wrong);
+  }
+  /* Zero the context */
+  memset(ctx, 0, sizeof(*ctx));
+  DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples,
+               (unsigned)trainingSamplesSize);
+  DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples,
+               (unsigned)testSamplesSize);
+  ctx->samples = samples;
+  ctx->samplesSizes = samplesSizes;
+  ctx->nbSamples = nbSamples;
+  ctx->nbTrainSamples = nbTrainSamples;
+  ctx->nbTestSamples = nbTestSamples;
+  /* Partial suffix array */
+  ctx->suffixSize = trainingSamplesSize - MAX(d, sizeof(U64)) + 1;
+  ctx->suffix = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
+  /* Maps index to the dmerID */
+  ctx->dmerAt = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
+  /* The offsets of each file */
+  ctx->offsets = (size_t *)malloc((nbSamples + 1) * sizeof(size_t));
+  if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) {
+    DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n");
+    COVER_ctx_destroy(ctx);
+    return ERROR(memory_allocation);
+  }
+  ctx->freqs = NULL;
+  ctx->d = d;
+
+  /* Fill offsets from the samplesSizes */
+  {
+    U32 i;
+    ctx->offsets[0] = 0;
+    for (i = 1; i <= nbSamples; ++i) {
+      ctx->offsets[i] = ctx->offsets[i - 1] + samplesSizes[i - 1];
+    }
+  }
+  DISPLAYLEVEL(2, "Constructing partial suffix array\n");
+  {
+    /* suffix is a partial suffix array.
+     * It only sorts suffixes by their first parameters.d bytes.
+     * The sort is stable, so each dmer group is sorted by position in input.
+     */
+    U32 i;
+    for (i = 0; i < ctx->suffixSize; ++i) {
+      ctx->suffix[i] = i;
+    }
+    /* qsort doesn't take an opaque pointer, so pass as a global.
+     * On OpenBSD qsort() is not guaranteed to be stable, their mergesort() is.
+     */
+    g_coverCtx = ctx;
+#if defined(__OpenBSD__)
+    mergesort(ctx->suffix, ctx->suffixSize, sizeof(U32),
+          (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
+#else
+    qsort(ctx->suffix, ctx->suffixSize, sizeof(U32),
+          (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
+#endif
+  }
+  DISPLAYLEVEL(2, "Computing frequencies\n");
+  /* For each dmer group (group of positions with the same first d bytes):
+   * 1. For each position we set dmerAt[position] = dmerID.  The dmerID is
+   *    (groupBeginPtr - suffix).  This allows us to go from position to
+   *    dmerID so we can look up values in freq.
+   * 2. We calculate how many samples the dmer occurs in and save it in
+   *    freqs[dmerId].
+   */
+  COVER_groupBy(ctx->suffix, ctx->suffixSize, sizeof(U32), ctx,
+                (ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group);
+  ctx->freqs = ctx->suffix;
+  ctx->suffix = NULL;
+  return 0;
+}
+
+void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel)
+{
+  const double ratio = (double)nbDmers / maxDictSize;
+  if (ratio >= 10) {
+      return;
+  }
+  LOCALDISPLAYLEVEL(displayLevel, 1,
+                    "WARNING: The maximum dictionary size %u is too large "
+                    "compared to the source size %u! "
+                    "size(source)/size(dictionary) = %f, but it should be >= "
+                    "10! This may lead to a subpar dictionary! We recommend "
+                    "training on sources at least 10x, and preferably 100x "
+                    "the size of the dictionary! \n", (U32)maxDictSize,
+                    (U32)nbDmers, ratio);
+}
+
+COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize,
+                                       U32 nbDmers, U32 k, U32 passes)
+{
+  const U32 minEpochSize = k * 10;
+  COVER_epoch_info_t epochs;
+  epochs.num = MAX(1, maxDictSize / k / passes);
+  epochs.size = nbDmers / epochs.num;
+  if (epochs.size >= minEpochSize) {
+      assert(epochs.size * epochs.num <= nbDmers);
+      return epochs;
+  }
+  epochs.size = MIN(minEpochSize, nbDmers);
+  epochs.num = nbDmers / epochs.size;
+  assert(epochs.size * epochs.num <= nbDmers);
+  return epochs;
+}
+
+/**
+ * Given the prepared context build the dictionary.
+ */
+static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
+                                    COVER_map_t *activeDmers, void *dictBuffer,
+                                    size_t dictBufferCapacity,
+                                    ZDICT_cover_params_t parameters) {
+  BYTE *const dict = (BYTE *)dictBuffer;
+  size_t tail = dictBufferCapacity;
+  /* Divide the data into epochs. We will select one segment from each epoch. */
+  const COVER_epoch_info_t epochs = COVER_computeEpochs(
+      (U32)dictBufferCapacity, (U32)ctx->suffixSize, parameters.k, 4);
+  const size_t maxZeroScoreRun = MAX(10, MIN(100, epochs.num >> 3));
+  size_t zeroScoreRun = 0;
+  size_t epoch;
+  DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n",
+                (U32)epochs.num, (U32)epochs.size);
+  /* Loop through the epochs until there are no more segments or the dictionary
+   * is full.
+   */
+  for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs.num) {
+    const U32 epochBegin = (U32)(epoch * epochs.size);
+    const U32 epochEnd = epochBegin + epochs.size;
+    size_t segmentSize;
+    /* Select a segment */
+    COVER_segment_t segment = COVER_selectSegment(
+        ctx, freqs, activeDmers, epochBegin, epochEnd, parameters);
+    /* If the segment covers no dmers, then we are out of content.
+     * There may be new content in other epochs, for continue for some time.
+     */
+    if (segment.score == 0) {
+      if (++zeroScoreRun >= maxZeroScoreRun) {
+          break;
+      }
+      continue;
+    }
+    zeroScoreRun = 0;
+    /* Trim the segment if necessary and if it is too small then we are done */
+    segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail);
+    if (segmentSize < parameters.d) {
+      break;
+    }
+    /* We fill the dictionary from the back to allow the best segments to be
+     * referenced with the smallest offsets.
+     */
+    tail -= segmentSize;
+    memcpy(dict + tail, ctx->samples + segment.begin, segmentSize);
+    DISPLAYUPDATE(
+        2, "\r%u%%       ",
+        (unsigned)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
+  }
+  DISPLAYLEVEL(2, "\r%79s\r", "");
+  return tail;
+}
+
+ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
+    void *dictBuffer, size_t dictBufferCapacity,
+    const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
+    ZDICT_cover_params_t parameters)
+{
+  BYTE* const dict = (BYTE*)dictBuffer;
+  COVER_ctx_t ctx;
+  COVER_map_t activeDmers;
+  parameters.splitPoint = 1.0;
+  /* Initialize global data */
+  g_displayLevel = parameters.zParams.notificationLevel;
+  /* Checks */
+  if (!COVER_checkParameters(parameters, dictBufferCapacity)) {
+    DISPLAYLEVEL(1, "Cover parameters incorrect\n");
+    return ERROR(parameter_outOfBound);
+  }
+  if (nbSamples == 0) {
+    DISPLAYLEVEL(1, "Cover must have at least one input file\n");
+    return ERROR(srcSize_wrong);
+  }
+  if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
+    DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
+                 ZDICT_DICTSIZE_MIN);
+    return ERROR(dstSize_tooSmall);
+  }
+  /* Initialize context and activeDmers */
+  {
+    size_t const initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
+                      parameters.d, parameters.splitPoint);
+    if (ZSTD_isError(initVal)) {
+      return initVal;
+    }
+  }
+  COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, g_displayLevel);
+  if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
+    DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
+    COVER_ctx_destroy(&ctx);
+    return ERROR(memory_allocation);
+  }
+
+  DISPLAYLEVEL(2, "Building dictionary\n");
+  {
+    const size_t tail =
+        COVER_buildDictionary(&ctx, ctx.freqs, &activeDmers, dictBuffer,
+                              dictBufferCapacity, parameters);
+    const size_t dictionarySize = ZDICT_finalizeDictionary(
+        dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
+        samplesBuffer, samplesSizes, nbSamples, parameters.zParams);
+    if (!ZSTD_isError(dictionarySize)) {
+      DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
+                   (unsigned)dictionarySize);
+    }
+    COVER_ctx_destroy(&ctx);
+    COVER_map_destroy(&activeDmers);
+    return dictionarySize;
+  }
+}
+
+
+
+size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters,
+                                    const size_t *samplesSizes, const BYTE *samples,
+                                    size_t *offsets,
+                                    size_t nbTrainSamples, size_t nbSamples,
+                                    BYTE *const dict, size_t dictBufferCapacity) {
+  size_t totalCompressedSize = ERROR(GENERIC);
+  /* Pointers */
+  ZSTD_CCtx *cctx;
+  ZSTD_CDict *cdict;
+  void *dst;
+  /* Local variables */
+  size_t dstCapacity;
+  size_t i;
+  /* Allocate dst with enough space to compress the maximum sized sample */
+  {
+    size_t maxSampleSize = 0;
+    i = parameters.splitPoint < 1.0 ? nbTrainSamples : 0;
+    for (; i < nbSamples; ++i) {
+      maxSampleSize = MAX(samplesSizes[i], maxSampleSize);
+    }
+    dstCapacity = ZSTD_compressBound(maxSampleSize);
+    dst = malloc(dstCapacity);
+  }
+  /* Create the cctx and cdict */
+  cctx = ZSTD_createCCtx();
+  cdict = ZSTD_createCDict(dict, dictBufferCapacity,
+                           parameters.zParams.compressionLevel);
+  if (!dst || !cctx || !cdict) {
+    goto _compressCleanup;
+  }
+  /* Compress each sample and sum their sizes (or error) */
+  totalCompressedSize = dictBufferCapacity;
+  i = parameters.splitPoint < 1.0 ? nbTrainSamples : 0;
+  for (; i < nbSamples; ++i) {
+    const size_t size = ZSTD_compress_usingCDict(
+        cctx, dst, dstCapacity, samples + offsets[i],
+        samplesSizes[i], cdict);
+    if (ZSTD_isError(size)) {
+      totalCompressedSize = size;
+      goto _compressCleanup;
+    }
+    totalCompressedSize += size;
+  }
+_compressCleanup:
+  ZSTD_freeCCtx(cctx);
+  ZSTD_freeCDict(cdict);
+  if (dst) {
+    free(dst);
+  }
+  return totalCompressedSize;
+}
+
+
+/**
+ * Initialize the `COVER_best_t`.
+ */
+void COVER_best_init(COVER_best_t *best) {
+  if (best==NULL) return; /* compatible with init on NULL */
+  (void)ZSTD_pthread_mutex_init(&best->mutex, NULL);
+  (void)ZSTD_pthread_cond_init(&best->cond, NULL);
+  best->liveJobs = 0;
+  best->dict = NULL;
+  best->dictSize = 0;
+  best->compressedSize = (size_t)-1;
+  memset(&best->parameters, 0, sizeof(best->parameters));
+}
+
+/**
+ * Wait until liveJobs == 0.
+ */
+void COVER_best_wait(COVER_best_t *best) {
+  if (!best) {
+    return;
+  }
+  ZSTD_pthread_mutex_lock(&best->mutex);
+  while (best->liveJobs != 0) {
+    ZSTD_pthread_cond_wait(&best->cond, &best->mutex);
+  }
+  ZSTD_pthread_mutex_unlock(&best->mutex);
+}
+
+/**
+ * Call COVER_best_wait() and then destroy the COVER_best_t.
+ */
+void COVER_best_destroy(COVER_best_t *best) {
+  if (!best) {
+    return;
+  }
+  COVER_best_wait(best);
+  if (best->dict) {
+    free(best->dict);
+  }
+  ZSTD_pthread_mutex_destroy(&best->mutex);
+  ZSTD_pthread_cond_destroy(&best->cond);
+}
+
+/**
+ * Called when a thread is about to be launched.
+ * Increments liveJobs.
+ */
+void COVER_best_start(COVER_best_t *best) {
+  if (!best) {
+    return;
+  }
+  ZSTD_pthread_mutex_lock(&best->mutex);
+  ++best->liveJobs;
+  ZSTD_pthread_mutex_unlock(&best->mutex);
+}
+
+/**
+ * Called when a thread finishes executing, both on error or success.
+ * Decrements liveJobs and signals any waiting threads if liveJobs == 0.
+ * If this dictionary is the best so far save it and its parameters.
+ */
+void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
+                              COVER_dictSelection_t selection) {
+  void* dict = selection.dictContent;
+  size_t compressedSize = selection.totalCompressedSize;
+  size_t dictSize = selection.dictSize;
+  if (!best) {
+    return;
+  }
+  {
+    size_t liveJobs;
+    ZSTD_pthread_mutex_lock(&best->mutex);
+    --best->liveJobs;
+    liveJobs = best->liveJobs;
+    /* If the new dictionary is better */
+    if (compressedSize < best->compressedSize) {
+      /* Allocate space if necessary */
+      if (!best->dict || best->dictSize < dictSize) {
+        if (best->dict) {
+          free(best->dict);
+        }
+        best->dict = malloc(dictSize);
+        if (!best->dict) {
+          best->compressedSize = ERROR(GENERIC);
+          best->dictSize = 0;
+          ZSTD_pthread_cond_signal(&best->cond);
+          ZSTD_pthread_mutex_unlock(&best->mutex);
+          return;
+        }
+      }
+      /* Save the dictionary, parameters, and size */
+      if (dict) {
+        memcpy(best->dict, dict, dictSize);
+        best->dictSize = dictSize;
+        best->parameters = parameters;
+        best->compressedSize = compressedSize;
+      }
+    }
+    if (liveJobs == 0) {
+      ZSTD_pthread_cond_broadcast(&best->cond);
+    }
+    ZSTD_pthread_mutex_unlock(&best->mutex);
+  }
+}
+
+COVER_dictSelection_t COVER_dictSelectionError(size_t error) {
+    COVER_dictSelection_t selection = { NULL, 0, error };
+    return selection;
+}
+
+unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection) {
+  return (ZSTD_isError(selection.totalCompressedSize) || !selection.dictContent);
+}
+
+void COVER_dictSelectionFree(COVER_dictSelection_t selection){
+  free(selection.dictContent);
+}
+
+COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity,
+        size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
+        size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize) {
+
+  size_t largestDict = 0;
+  size_t largestCompressed = 0;
+  BYTE* customDictContentEnd = customDictContent + dictContentSize;
+
+  BYTE * largestDictbuffer = (BYTE *)malloc(dictBufferCapacity);
+  BYTE * candidateDictBuffer = (BYTE *)malloc(dictBufferCapacity);
+  double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00;
+
+  if (!largestDictbuffer || !candidateDictBuffer) {
+    free(largestDictbuffer);
+    free(candidateDictBuffer);
+    return COVER_dictSelectionError(dictContentSize);
+  }
+
+  /* Initial dictionary size and compressed size */
+  memcpy(largestDictbuffer, customDictContent, dictContentSize);
+  dictContentSize = ZDICT_finalizeDictionary(
+    largestDictbuffer, dictBufferCapacity, customDictContent, dictContentSize,
+    samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
+
+  if (ZDICT_isError(dictContentSize)) {
+    free(largestDictbuffer);
+    free(candidateDictBuffer);
+    return COVER_dictSelectionError(dictContentSize);
+  }
+
+  totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes,
+                                                       samplesBuffer, offsets,
+                                                       nbCheckSamples, nbSamples,
+                                                       largestDictbuffer, dictContentSize);
+
+  if (ZSTD_isError(totalCompressedSize)) {
+    free(largestDictbuffer);
+    free(candidateDictBuffer);
+    return COVER_dictSelectionError(totalCompressedSize);
+  }
+
+  if (params.shrinkDict == 0) {
+    COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
+    free(candidateDictBuffer);
+    return selection;
+  }
+
+  largestDict = dictContentSize;
+  largestCompressed = totalCompressedSize;
+  dictContentSize = ZDICT_DICTSIZE_MIN;
+
+  /* Largest dict is initially at least ZDICT_DICTSIZE_MIN */
+  while (dictContentSize < largestDict) {
+    memcpy(candidateDictBuffer, largestDictbuffer, largestDict);
+    dictContentSize = ZDICT_finalizeDictionary(
+      candidateDictBuffer, dictBufferCapacity, customDictContentEnd - dictContentSize, dictContentSize,
+      samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
+
+    if (ZDICT_isError(dictContentSize)) {
+      free(largestDictbuffer);
+      free(candidateDictBuffer);
+      return COVER_dictSelectionError(dictContentSize);
+
+    }
+
+    totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes,
+                                                         samplesBuffer, offsets,
+                                                         nbCheckSamples, nbSamples,
+                                                         candidateDictBuffer, dictContentSize);
+
+    if (ZSTD_isError(totalCompressedSize)) {
+      free(largestDictbuffer);
+      free(candidateDictBuffer);
+      return COVER_dictSelectionError(totalCompressedSize);
+    }
+
+    if (totalCompressedSize <= largestCompressed * regressionTolerance) {
+      COVER_dictSelection_t selection = { candidateDictBuffer, dictContentSize, totalCompressedSize };
+      free(largestDictbuffer);
+      return selection;
+    }
+    dictContentSize *= 2;
+  }
+  dictContentSize = largestDict;
+  totalCompressedSize = largestCompressed;
+  {
+    COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
+    free(candidateDictBuffer);
+    return selection;
+  }
+}
+
+/**
+ * Parameters for COVER_tryParameters().
+ */
+typedef struct COVER_tryParameters_data_s {
+  const COVER_ctx_t *ctx;
+  COVER_best_t *best;
+  size_t dictBufferCapacity;
+  ZDICT_cover_params_t parameters;
+} COVER_tryParameters_data_t;
+
+/**
+ * Tries a set of parameters and updates the COVER_best_t with the results.
+ * This function is thread safe if zstd is compiled with multithreaded support.
+ * It takes its parameters as an *OWNING* opaque pointer to support threading.
+ */
+static void COVER_tryParameters(void *opaque)
+{
+  /* Save parameters as local variables */
+  COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t*)opaque;
+  const COVER_ctx_t *const ctx = data->ctx;
+  const ZDICT_cover_params_t parameters = data->parameters;
+  size_t dictBufferCapacity = data->dictBufferCapacity;
+  size_t totalCompressedSize = ERROR(GENERIC);
+  /* Allocate space for hash table, dict, and freqs */
+  COVER_map_t activeDmers;
+  BYTE* const dict = (BYTE*)malloc(dictBufferCapacity);
+  COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
+  U32* const freqs = (U32*)malloc(ctx->suffixSize * sizeof(U32));
+  if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
+    DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
+    goto _cleanup;
+  }
+  if (!dict || !freqs) {
+    DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
+    goto _cleanup;
+  }
+  /* Copy the frequencies because we need to modify them */
+  memcpy(freqs, ctx->freqs, ctx->suffixSize * sizeof(U32));
+  /* Build the dictionary */
+  {
+    const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
+                                              dictBufferCapacity, parameters);
+    selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail,
+        ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
+        totalCompressedSize);
+
+    if (COVER_dictSelectionIsError(selection)) {
+      DISPLAYLEVEL(1, "Failed to select dictionary\n");
+      goto _cleanup;
+    }
+  }
+_cleanup:
+  free(dict);
+  COVER_best_finish(data->best, parameters, selection);
+  free(data);
+  COVER_map_destroy(&activeDmers);
+  COVER_dictSelectionFree(selection);
+  free(freqs);
+}
+
+ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
+    void* dictBuffer, size_t dictBufferCapacity, const void* samplesBuffer,
+    const size_t* samplesSizes, unsigned nbSamples,
+    ZDICT_cover_params_t* parameters)
+{
+  /* constants */
+  const unsigned nbThreads = parameters->nbThreads;
+  const double splitPoint =
+      parameters->splitPoint <= 0.0 ? COVER_DEFAULT_SPLITPOINT : parameters->splitPoint;
+  const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
+  const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
+  const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
+  const unsigned kMaxK = parameters->k == 0 ? 2000 : parameters->k;
+  const unsigned kSteps = parameters->steps == 0 ? 40 : parameters->steps;
+  const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
+  const unsigned kIterations =
+      (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
+  const unsigned shrinkDict = 0;
+  /* Local variables */
+  const int displayLevel = parameters->zParams.notificationLevel;
+  unsigned iteration = 1;
+  unsigned d;
+  unsigned k;
+  COVER_best_t best;
+  POOL_ctx *pool = NULL;
+  int warned = 0;
+
+  /* Checks */
+  if (splitPoint <= 0 || splitPoint > 1) {
+    LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
+    return ERROR(parameter_outOfBound);
+  }
+  if (kMinK < kMaxD || kMaxK < kMinK) {
+    LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
+    return ERROR(parameter_outOfBound);
+  }
+  if (nbSamples == 0) {
+    DISPLAYLEVEL(1, "Cover must have at least one input file\n");
+    return ERROR(srcSize_wrong);
+  }
+  if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
+    DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
+                 ZDICT_DICTSIZE_MIN);
+    return ERROR(dstSize_tooSmall);
+  }
+  if (nbThreads > 1) {
+    pool = POOL_create(nbThreads, 1);
+    if (!pool) {
+      return ERROR(memory_allocation);
+    }
+  }
+  /* Initialization */
+  COVER_best_init(&best);
+  /* Turn down global display level to clean up display at level 2 and below */
+  g_displayLevel = displayLevel == 0 ? 0 : displayLevel - 1;
+  /* Loop through d first because each new value needs a new context */
+  LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n",
+                    kIterations);
+  for (d = kMinD; d <= kMaxD; d += 2) {
+    /* Initialize the context for this value of d */
+    COVER_ctx_t ctx;
+    LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
+    {
+      const size_t initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint);
+      if (ZSTD_isError(initVal)) {
+        LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
+        COVER_best_destroy(&best);
+        POOL_free(pool);
+        return initVal;
+      }
+    }
+    if (!warned) {
+      COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, displayLevel);
+      warned = 1;
+    }
+    /* Loop through k reusing the same context */
+    for (k = kMinK; k <= kMaxK; k += kStepSize) {
+      /* Prepare the arguments */
+      COVER_tryParameters_data_t *data = (COVER_tryParameters_data_t *)malloc(
+          sizeof(COVER_tryParameters_data_t));
+      LOCALDISPLAYLEVEL(displayLevel, 3, "k=%u\n", k);
+      if (!data) {
+        LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to allocate parameters\n");
+        COVER_best_destroy(&best);
+        COVER_ctx_destroy(&ctx);
+        POOL_free(pool);
+        return ERROR(memory_allocation);
+      }
+      data->ctx = &ctx;
+      data->best = &best;
+      data->dictBufferCapacity = dictBufferCapacity;
+      data->parameters = *parameters;
+      data->parameters.k = k;
+      data->parameters.d = d;
+      data->parameters.splitPoint = splitPoint;
+      data->parameters.steps = kSteps;
+      data->parameters.shrinkDict = shrinkDict;
+      data->parameters.zParams.notificationLevel = g_displayLevel;
+      /* Check the parameters */
+      if (!COVER_checkParameters(data->parameters, dictBufferCapacity)) {
+        DISPLAYLEVEL(1, "Cover parameters incorrect\n");
+        free(data);
+        continue;
+      }
+      /* Call the function and pass ownership of data to it */
+      COVER_best_start(&best);
+      if (pool) {
+        POOL_add(pool, &COVER_tryParameters, data);
+      } else {
+        COVER_tryParameters(data);
+      }
+      /* Print status */
+      LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%%       ",
+                         (unsigned)((iteration * 100) / kIterations));
+      ++iteration;
+    }
+    COVER_best_wait(&best);
+    COVER_ctx_destroy(&ctx);
+  }
+  LOCALDISPLAYLEVEL(displayLevel, 2, "\r%79s\r", "");
+  /* Fill the output buffer and parameters with output of the best parameters */
+  {
+    const size_t dictSize = best.dictSize;
+    if (ZSTD_isError(best.compressedSize)) {
+      const size_t compressedSize = best.compressedSize;
+      COVER_best_destroy(&best);
+      POOL_free(pool);
+      return compressedSize;
+    }
+    *parameters = best.parameters;
+    memcpy(dictBuffer, best.dict, dictSize);
+    COVER_best_destroy(&best);
+    POOL_free(pool);
+    return dictSize;
+  }
+}
+/**** ended inlining dictBuilder/cover.c ****/
+/**** start inlining dictBuilder/divsufsort.c ****/
+/*
+ * divsufsort.c for libdivsufsort-lite
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*- Compiler specifics -*/
+#ifdef __clang__
+#pragma clang diagnostic ignored "-Wshorten-64-to-32"
+#endif
+
+#if defined(_MSC_VER)
+#  pragma warning(disable : 4244)
+#  pragma warning(disable : 4127)    /* C4127 : Condition expression is constant */
+#endif
+
+
+/*- Dependencies -*/
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+/**** start inlining divsufsort.h ****/
+/*
+ * divsufsort.h for libdivsufsort-lite
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _DIVSUFSORT_H
+#define _DIVSUFSORT_H 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+
+/*- Prototypes -*/
+
+/**
+ * Constructs the suffix array of a given string.
+ * @param T [0..n-1] The input string.
+ * @param SA [0..n-1] The output array of suffixes.
+ * @param n The length of the given string.
+ * @param openMP enables OpenMP optimization.
+ * @return 0 if no error occurred, -1 or -2 otherwise.
+ */
+int
+divsufsort(const unsigned char *T, int *SA, int n, int openMP);
+
+/**
+ * Constructs the burrows-wheeler transformed string of a given string.
+ * @param T [0..n-1] The input string.
+ * @param U [0..n-1] The output string. (can be T)
+ * @param A [0..n-1] The temporary array. (can be NULL)
+ * @param n The length of the given string.
+ * @param num_indexes The length of secondary indexes array. (can be NULL)
+ * @param indexes The secondary indexes array. (can be NULL)
+ * @param openMP enables OpenMP optimization.
+ * @return The primary index if no error occurred, -1 or -2 otherwise.
+ */
+int
+divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char * num_indexes, int * indexes, int openMP);
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */
+
+#endif /* _DIVSUFSORT_H */
+/**** ended inlining divsufsort.h ****/
+
+/*- Constants -*/
+#if defined(INLINE)
+# undef INLINE
+#endif
+#if !defined(INLINE)
+# define INLINE __inline
+#endif
+#if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1)
+# undef ALPHABET_SIZE
+#endif
+#if !defined(ALPHABET_SIZE)
+# define ALPHABET_SIZE (256)
+#endif
+#define BUCKET_A_SIZE (ALPHABET_SIZE)
+#define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE)
+#if defined(SS_INSERTIONSORT_THRESHOLD)
+# if SS_INSERTIONSORT_THRESHOLD < 1
+#  undef SS_INSERTIONSORT_THRESHOLD
+#  define SS_INSERTIONSORT_THRESHOLD (1)
+# endif
+#else
+# define SS_INSERTIONSORT_THRESHOLD (8)
+#endif
+#if defined(SS_BLOCKSIZE)
+# if SS_BLOCKSIZE < 0
+#  undef SS_BLOCKSIZE
+#  define SS_BLOCKSIZE (0)
+# elif 32768 <= SS_BLOCKSIZE
+#  undef SS_BLOCKSIZE
+#  define SS_BLOCKSIZE (32767)
+# endif
+#else
+# define SS_BLOCKSIZE (1024)
+#endif
+/* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */
+#if SS_BLOCKSIZE == 0
+# define SS_MISORT_STACKSIZE (96)
+#elif SS_BLOCKSIZE <= 4096
+# define SS_MISORT_STACKSIZE (16)
+#else
+# define SS_MISORT_STACKSIZE (24)
+#endif
+#define SS_SMERGE_STACKSIZE (32)
+#define TR_INSERTIONSORT_THRESHOLD (8)
+#define TR_STACKSIZE (64)
+
+
+/*- Macros -*/
+#ifndef SWAP
+# define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0)
+#endif /* SWAP */
+#ifndef MIN
+# define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b))
+#endif /* MIN */
+#ifndef MAX
+# define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b))
+#endif /* MAX */
+#define STACK_PUSH(_a, _b, _c, _d)\
+  do {\
+    assert(ssize < STACK_SIZE);\
+    stack[ssize].a = (_a), stack[ssize].b = (_b),\
+    stack[ssize].c = (_c), stack[ssize++].d = (_d);\
+  } while(0)
+#define STACK_PUSH5(_a, _b, _c, _d, _e)\
+  do {\
+    assert(ssize < STACK_SIZE);\
+    stack[ssize].a = (_a), stack[ssize].b = (_b),\
+    stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\
+  } while(0)
+#define STACK_POP(_a, _b, _c, _d)\
+  do {\
+    assert(0 <= ssize);\
+    if(ssize == 0) { return; }\
+    (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
+    (_c) = stack[ssize].c, (_d) = stack[ssize].d;\
+  } while(0)
+#define STACK_POP5(_a, _b, _c, _d, _e)\
+  do {\
+    assert(0 <= ssize);\
+    if(ssize == 0) { return; }\
+    (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
+    (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\
+  } while(0)
+#define BUCKET_A(_c0) bucket_A[(_c0)]
+#if ALPHABET_SIZE == 256
+#define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)])
+#define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)])
+#else
+#define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)])
+#define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)])
+#endif
+
+
+/*- Private Functions -*/
+
+static const int lg_table[256]= {
+ -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+  5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
+};
+
+#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
+
+static INLINE
+int
+ss_ilg(int n) {
+#if SS_BLOCKSIZE == 0
+  return (n & 0xffff0000) ?
+          ((n & 0xff000000) ?
+            24 + lg_table[(n >> 24) & 0xff] :
+            16 + lg_table[(n >> 16) & 0xff]) :
+          ((n & 0x0000ff00) ?
+             8 + lg_table[(n >>  8) & 0xff] :
+             0 + lg_table[(n >>  0) & 0xff]);
+#elif SS_BLOCKSIZE < 256
+  return lg_table[n];
+#else
+  return (n & 0xff00) ?
+          8 + lg_table[(n >> 8) & 0xff] :
+          0 + lg_table[(n >> 0) & 0xff];
+#endif
+}
+
+#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
+
+#if SS_BLOCKSIZE != 0
+
+static const int sqq_table[256] = {
+  0,  16,  22,  27,  32,  35,  39,  42,  45,  48,  50,  53,  55,  57,  59,  61,
+ 64,  65,  67,  69,  71,  73,  75,  76,  78,  80,  81,  83,  84,  86,  87,  89,
+ 90,  91,  93,  94,  96,  97,  98,  99, 101, 102, 103, 104, 106, 107, 108, 109,
+110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
+128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
+143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155,
+156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168,
+169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180,
+181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191,
+192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201,
+202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211,
+212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221,
+221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230,
+230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238,
+239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247,
+247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255
+};
+
+static INLINE
+int
+ss_isqrt(int x) {
+  int y, e;
+
+  if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; }
+  e = (x & 0xffff0000) ?
+        ((x & 0xff000000) ?
+          24 + lg_table[(x >> 24) & 0xff] :
+          16 + lg_table[(x >> 16) & 0xff]) :
+        ((x & 0x0000ff00) ?
+           8 + lg_table[(x >>  8) & 0xff] :
+           0 + lg_table[(x >>  0) & 0xff]);
+
+  if(e >= 16) {
+    y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7);
+    if(e >= 24) { y = (y + 1 + x / y) >> 1; }
+    y = (y + 1 + x / y) >> 1;
+  } else if(e >= 8) {
+    y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1;
+  } else {
+    return sqq_table[x] >> 4;
+  }
+
+  return (x < (y * y)) ? y - 1 : y;
+}
+
+#endif /* SS_BLOCKSIZE != 0 */
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Compares two suffixes. */
+static INLINE
+int
+ss_compare(const unsigned char *T,
+           const int *p1, const int *p2,
+           int depth) {
+  const unsigned char *U1, *U2, *U1n, *U2n;
+
+  for(U1 = T + depth + *p1,
+      U2 = T + depth + *p2,
+      U1n = T + *(p1 + 1) + 2,
+      U2n = T + *(p2 + 1) + 2;
+      (U1 < U1n) && (U2 < U2n) && (*U1 == *U2);
+      ++U1, ++U2) {
+  }
+
+  return U1 < U1n ?
+        (U2 < U2n ? *U1 - *U2 : 1) :
+        (U2 < U2n ? -1 : 0);
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+#if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1)
+
+/* Insertionsort for small size groups */
+static
+void
+ss_insertionsort(const unsigned char *T, const int *PA,
+                 int *first, int *last, int depth) {
+  int *i, *j;
+  int t;
+  int r;
+
+  for(i = last - 2; first <= i; --i) {
+    for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) {
+      do { *(j - 1) = *j; } while((++j < last) && (*j < 0));
+      if(last <= j) { break; }
+    }
+    if(r == 0) { *j = ~*j; }
+    *(j - 1) = t;
+  }
+}
+
+#endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */
+
+
+/*---------------------------------------------------------------------------*/
+
+#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
+
+static INLINE
+void
+ss_fixdown(const unsigned char *Td, const int *PA,
+           int *SA, int i, int size) {
+  int j, k;
+  int v;
+  int c, d, e;
+
+  for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
+    d = Td[PA[SA[k = j++]]];
+    if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; }
+    if(d <= c) { break; }
+  }
+  SA[i] = v;
+}
+
+/* Simple top-down heapsort. */
+static
+void
+ss_heapsort(const unsigned char *Td, const int *PA, int *SA, int size) {
+  int i, m;
+  int t;
+
+  m = size;
+  if((size % 2) == 0) {
+    m--;
+    if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); }
+  }
+
+  for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); }
+  if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); }
+  for(i = m - 1; 0 < i; --i) {
+    t = SA[0], SA[0] = SA[i];
+    ss_fixdown(Td, PA, SA, 0, i);
+    SA[i] = t;
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Returns the median of three elements. */
+static INLINE
+int *
+ss_median3(const unsigned char *Td, const int *PA,
+           int *v1, int *v2, int *v3) {
+  int *t;
+  if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); }
+  if(Td[PA[*v2]] > Td[PA[*v3]]) {
+    if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; }
+    else { return v3; }
+  }
+  return v2;
+}
+
+/* Returns the median of five elements. */
+static INLINE
+int *
+ss_median5(const unsigned char *Td, const int *PA,
+           int *v1, int *v2, int *v3, int *v4, int *v5) {
+  int *t;
+  if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); }
+  if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); }
+  if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); }
+  if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); }
+  if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); }
+  if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; }
+  return v3;
+}
+
+/* Returns the pivot element. */
+static INLINE
+int *
+ss_pivot(const unsigned char *Td, const int *PA, int *first, int *last) {
+  int *middle;
+  int t;
+
+  t = last - first;
+  middle = first + t / 2;
+
+  if(t <= 512) {
+    if(t <= 32) {
+      return ss_median3(Td, PA, first, middle, last - 1);
+    } else {
+      t >>= 2;
+      return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1);
+    }
+  }
+  t >>= 3;
+  first  = ss_median3(Td, PA, first, first + t, first + (t << 1));
+  middle = ss_median3(Td, PA, middle - t, middle, middle + t);
+  last   = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1);
+  return ss_median3(Td, PA, first, middle, last);
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Binary partition for substrings. */
+static INLINE
+int *
+ss_partition(const int *PA,
+                    int *first, int *last, int depth) {
+  int *a, *b;
+  int t;
+  for(a = first - 1, b = last;;) {
+    for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; }
+    for(; (a < --b) && ((PA[*b] + depth) <  (PA[*b + 1] + 1));) { }
+    if(b <= a) { break; }
+    t = ~*b;
+    *b = *a;
+    *a = t;
+  }
+  if(first < a) { *first = ~*first; }
+  return a;
+}
+
+/* Multikey introsort for medium size groups. */
+static
+void
+ss_mintrosort(const unsigned char *T, const int *PA,
+              int *first, int *last,
+              int depth) {
+#define STACK_SIZE SS_MISORT_STACKSIZE
+  struct { int *a, *b, c; int d; } stack[STACK_SIZE];
+  const unsigned char *Td;
+  int *a, *b, *c, *d, *e, *f;
+  int s, t;
+  int ssize;
+  int limit;
+  int v, x = 0;
+
+  for(ssize = 0, limit = ss_ilg(last - first);;) {
+
+    if((last - first) <= SS_INSERTIONSORT_THRESHOLD) {
+#if 1 < SS_INSERTIONSORT_THRESHOLD
+      if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); }
+#endif
+      STACK_POP(first, last, depth, limit);
+      continue;
+    }
+
+    Td = T + depth;
+    if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); }
+    if(limit < 0) {
+      for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) {
+        if((x = Td[PA[*a]]) != v) {
+          if(1 < (a - first)) { break; }
+          v = x;
+          first = a;
+        }
+      }
+      if(Td[PA[*first] - 1] < v) {
+        first = ss_partition(PA, first, a, depth);
+      }
+      if((a - first) <= (last - a)) {
+        if(1 < (a - first)) {
+          STACK_PUSH(a, last, depth, -1);
+          last = a, depth += 1, limit = ss_ilg(a - first);
+        } else {
+          first = a, limit = -1;
+        }
+      } else {
+        if(1 < (last - a)) {
+          STACK_PUSH(first, a, depth + 1, ss_ilg(a - first));
+          first = a, limit = -1;
+        } else {
+          last = a, depth += 1, limit = ss_ilg(a - first);
+        }
+      }
+      continue;
+    }
+
+    /* choose pivot */
+    a = ss_pivot(Td, PA, first, last);
+    v = Td[PA[*a]];
+    SWAP(*first, *a);
+
+    /* partition */
+    for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { }
+    if(((a = b) < last) && (x < v)) {
+      for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) {
+        if(x == v) { SWAP(*b, *a); ++a; }
+      }
+    }
+    for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { }
+    if((b < (d = c)) && (x > v)) {
+      for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
+        if(x == v) { SWAP(*c, *d); --d; }
+      }
+    }
+    for(; b < c;) {
+      SWAP(*b, *c);
+      for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) {
+        if(x == v) { SWAP(*b, *a); ++a; }
+      }
+      for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
+        if(x == v) { SWAP(*c, *d); --d; }
+      }
+    }
+
+    if(a <= d) {
+      c = b - 1;
+
+      if((s = a - first) > (t = b - a)) { s = t; }
+      for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
+      if((s = d - c) > (t = last - d - 1)) { s = t; }
+      for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
+
+      a = first + (b - a), c = last - (d - c);
+      b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth);
+
+      if((a - first) <= (last - c)) {
+        if((last - c) <= (c - b)) {
+          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
+          STACK_PUSH(c, last, depth, limit);
+          last = a;
+        } else if((a - first) <= (c - b)) {
+          STACK_PUSH(c, last, depth, limit);
+          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
+          last = a;
+        } else {
+          STACK_PUSH(c, last, depth, limit);
+          STACK_PUSH(first, a, depth, limit);
+          first = b, last = c, depth += 1, limit = ss_ilg(c - b);
+        }
+      } else {
+        if((a - first) <= (c - b)) {
+          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
+          STACK_PUSH(first, a, depth, limit);
+          first = c;
+        } else if((last - c) <= (c - b)) {
+          STACK_PUSH(first, a, depth, limit);
+          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
+          first = c;
+        } else {
+          STACK_PUSH(first, a, depth, limit);
+          STACK_PUSH(c, last, depth, limit);
+          first = b, last = c, depth += 1, limit = ss_ilg(c - b);
+        }
+      }
+    } else {
+      limit += 1;
+      if(Td[PA[*first] - 1] < v) {
+        first = ss_partition(PA, first, last, depth);
+        limit = ss_ilg(last - first);
+      }
+      depth += 1;
+    }
+  }
+#undef STACK_SIZE
+}
+
+#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
+
+
+/*---------------------------------------------------------------------------*/
+
+#if SS_BLOCKSIZE != 0
+
+static INLINE
+void
+ss_blockswap(int *a, int *b, int n) {
+  int t;
+  for(; 0 < n; --n, ++a, ++b) {
+    t = *a, *a = *b, *b = t;
+  }
+}
+
+static INLINE
+void
+ss_rotate(int *first, int *middle, int *last) {
+  int *a, *b, t;
+  int l, r;
+  l = middle - first, r = last - middle;
+  for(; (0 < l) && (0 < r);) {
+    if(l == r) { ss_blockswap(first, middle, l); break; }
+    if(l < r) {
+      a = last - 1, b = middle - 1;
+      t = *a;
+      do {
+        *a-- = *b, *b-- = *a;
+        if(b < first) {
+          *a = t;
+          last = a;
+          if((r -= l + 1) <= l) { break; }
+          a -= 1, b = middle - 1;
+          t = *a;
+        }
+      } while(1);
+    } else {
+      a = first, b = middle;
+      t = *a;
+      do {
+        *a++ = *b, *b++ = *a;
+        if(last <= b) {
+          *a = t;
+          first = a + 1;
+          if((l -= r + 1) <= r) { break; }
+          a += 1, b = middle;
+          t = *a;
+        }
+      } while(1);
+    }
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+static
+void
+ss_inplacemerge(const unsigned char *T, const int *PA,
+                int *first, int *middle, int *last,
+                int depth) {
+  const int *p;
+  int *a, *b;
+  int len, half;
+  int q, r;
+  int x;
+
+  for(;;) {
+    if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); }
+    else                { x = 0; p = PA +  *(last - 1); }
+    for(a = first, len = middle - first, half = len >> 1, r = -1;
+        0 < len;
+        len = half, half >>= 1) {
+      b = a + half;
+      q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth);
+      if(q < 0) {
+        a = b + 1;
+        half -= (len & 1) ^ 1;
+      } else {
+        r = q;
+      }
+    }
+    if(a < middle) {
+      if(r == 0) { *a = ~*a; }
+      ss_rotate(a, middle, last);
+      last -= middle - a;
+      middle = a;
+      if(first == middle) { break; }
+    }
+    --last;
+    if(x != 0) { while(*--last < 0) { } }
+    if(middle == last) { break; }
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Merge-forward with internal buffer. */
+static
+void
+ss_mergeforward(const unsigned char *T, const int *PA,
+                int *first, int *middle, int *last,
+                int *buf, int depth) {
+  int *a, *b, *c, *bufend;
+  int t;
+  int r;
+
+  bufend = buf + (middle - first) - 1;
+  ss_blockswap(buf, first, middle - first);
+
+  for(t = *(a = first), b = buf, c = middle;;) {
+    r = ss_compare(T, PA + *b, PA + *c, depth);
+    if(r < 0) {
+      do {
+        *a++ = *b;
+        if(bufend <= b) { *bufend = t; return; }
+        *b++ = *a;
+      } while(*b < 0);
+    } else if(r > 0) {
+      do {
+        *a++ = *c, *c++ = *a;
+        if(last <= c) {
+          while(b < bufend) { *a++ = *b, *b++ = *a; }
+          *a = *b, *b = t;
+          return;
+        }
+      } while(*c < 0);
+    } else {
+      *c = ~*c;
+      do {
+        *a++ = *b;
+        if(bufend <= b) { *bufend = t; return; }
+        *b++ = *a;
+      } while(*b < 0);
+
+      do {
+        *a++ = *c, *c++ = *a;
+        if(last <= c) {
+          while(b < bufend) { *a++ = *b, *b++ = *a; }
+          *a = *b, *b = t;
+          return;
+        }
+      } while(*c < 0);
+    }
+  }
+}
+
+/* Merge-backward with internal buffer. */
+static
+void
+ss_mergebackward(const unsigned char *T, const int *PA,
+                 int *first, int *middle, int *last,
+                 int *buf, int depth) {
+  const int *p1, *p2;
+  int *a, *b, *c, *bufend;
+  int t;
+  int r;
+  int x;
+
+  bufend = buf + (last - middle) - 1;
+  ss_blockswap(buf, middle, last - middle);
+
+  x = 0;
+  if(*bufend < 0)       { p1 = PA + ~*bufend; x |= 1; }
+  else                  { p1 = PA +  *bufend; }
+  if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; }
+  else                  { p2 = PA +  *(middle - 1); }
+  for(t = *(a = last - 1), b = bufend, c = middle - 1;;) {
+    r = ss_compare(T, p1, p2, depth);
+    if(0 < r) {
+      if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
+      *a-- = *b;
+      if(b <= buf) { *buf = t; break; }
+      *b-- = *a;
+      if(*b < 0) { p1 = PA + ~*b; x |= 1; }
+      else       { p1 = PA +  *b; }
+    } else if(r < 0) {
+      if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
+      *a-- = *c, *c-- = *a;
+      if(c < first) {
+        while(buf < b) { *a-- = *b, *b-- = *a; }
+        *a = *b, *b = t;
+        break;
+      }
+      if(*c < 0) { p2 = PA + ~*c; x |= 2; }
+      else       { p2 = PA +  *c; }
+    } else {
+      if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
+      *a-- = ~*b;
+      if(b <= buf) { *buf = t; break; }
+      *b-- = *a;
+      if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
+      *a-- = *c, *c-- = *a;
+      if(c < first) {
+        while(buf < b) { *a-- = *b, *b-- = *a; }
+        *a = *b, *b = t;
+        break;
+      }
+      if(*b < 0) { p1 = PA + ~*b; x |= 1; }
+      else       { p1 = PA +  *b; }
+      if(*c < 0) { p2 = PA + ~*c; x |= 2; }
+      else       { p2 = PA +  *c; }
+    }
+  }
+}
+
+/* D&C based merge. */
+static
+void
+ss_swapmerge(const unsigned char *T, const int *PA,
+             int *first, int *middle, int *last,
+             int *buf, int bufsize, int depth) {
+#define STACK_SIZE SS_SMERGE_STACKSIZE
+#define GETIDX(a) ((0 <= (a)) ? (a) : (~(a)))
+#define MERGE_CHECK(a, b, c)\
+  do {\
+    if(((c) & 1) ||\
+       (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\
+      *(a) = ~*(a);\
+    }\
+    if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\
+      *(b) = ~*(b);\
+    }\
+  } while(0)
+  struct { int *a, *b, *c; int d; } stack[STACK_SIZE];
+  int *l, *r, *lm, *rm;
+  int m, len, half;
+  int ssize;
+  int check, next;
+
+  for(check = 0, ssize = 0;;) {
+    if((last - middle) <= bufsize) {
+      if((first < middle) && (middle < last)) {
+        ss_mergebackward(T, PA, first, middle, last, buf, depth);
+      }
+      MERGE_CHECK(first, last, check);
+      STACK_POP(first, middle, last, check);
+      continue;
+    }
+
+    if((middle - first) <= bufsize) {
+      if(first < middle) {
+        ss_mergeforward(T, PA, first, middle, last, buf, depth);
+      }
+      MERGE_CHECK(first, last, check);
+      STACK_POP(first, middle, last, check);
+      continue;
+    }
+
+    for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1;
+        0 < len;
+        len = half, half >>= 1) {
+      if(ss_compare(T, PA + GETIDX(*(middle + m + half)),
+                       PA + GETIDX(*(middle - m - half - 1)), depth) < 0) {
+        m += half + 1;
+        half -= (len & 1) ^ 1;
+      }
+    }
+
+    if(0 < m) {
+      lm = middle - m, rm = middle + m;
+      ss_blockswap(lm, middle, m);
+      l = r = middle, next = 0;
+      if(rm < last) {
+        if(*rm < 0) {
+          *rm = ~*rm;
+          if(first < lm) { for(; *--l < 0;) { } next |= 4; }
+          next |= 1;
+        } else if(first < lm) {
+          for(; *r < 0; ++r) { }
+          next |= 2;
+        }
+      }
+
+      if((l - first) <= (last - r)) {
+        STACK_PUSH(r, rm, last, (next & 3) | (check & 4));
+        middle = lm, last = l, check = (check & 3) | (next & 4);
+      } else {
+        if((next & 2) && (r == middle)) { next ^= 6; }
+        STACK_PUSH(first, lm, l, (check & 3) | (next & 4));
+        first = r, middle = rm, check = (next & 3) | (check & 4);
+      }
+    } else {
+      if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) {
+        *middle = ~*middle;
+      }
+      MERGE_CHECK(first, last, check);
+      STACK_POP(first, middle, last, check);
+    }
+  }
+#undef STACK_SIZE
+}
+
+#endif /* SS_BLOCKSIZE != 0 */
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Substring sort */
+static
+void
+sssort(const unsigned char *T, const int *PA,
+       int *first, int *last,
+       int *buf, int bufsize,
+       int depth, int n, int lastsuffix) {
+  int *a;
+#if SS_BLOCKSIZE != 0
+  int *b, *middle, *curbuf;
+  int j, k, curbufsize, limit;
+#endif
+  int i;
+
+  if(lastsuffix != 0) { ++first; }
+
+#if SS_BLOCKSIZE == 0
+  ss_mintrosort(T, PA, first, last, depth);
+#else
+  if((bufsize < SS_BLOCKSIZE) &&
+      (bufsize < (last - first)) &&
+      (bufsize < (limit = ss_isqrt(last - first)))) {
+    if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; }
+    buf = middle = last - limit, bufsize = limit;
+  } else {
+    middle = last, limit = 0;
+  }
+  for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) {
+#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
+    ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth);
+#elif 1 < SS_BLOCKSIZE
+    ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth);
+#endif
+    curbufsize = last - (a + SS_BLOCKSIZE);
+    curbuf = a + SS_BLOCKSIZE;
+    if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; }
+    for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) {
+      ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth);
+    }
+  }
+#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
+  ss_mintrosort(T, PA, a, middle, depth);
+#elif 1 < SS_BLOCKSIZE
+  ss_insertionsort(T, PA, a, middle, depth);
+#endif
+  for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) {
+    if(i & 1) {
+      ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth);
+      a -= k;
+    }
+  }
+  if(limit != 0) {
+#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
+    ss_mintrosort(T, PA, middle, last, depth);
+#elif 1 < SS_BLOCKSIZE
+    ss_insertionsort(T, PA, middle, last, depth);
+#endif
+    ss_inplacemerge(T, PA, first, middle, last, depth);
+  }
+#endif
+
+  if(lastsuffix != 0) {
+    /* Insert last type B* suffix. */
+    int PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2;
+    for(a = first, i = *(first - 1);
+        (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth)));
+        ++a) {
+      *(a - 1) = *a;
+    }
+    *(a - 1) = i;
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+static INLINE
+int
+tr_ilg(int n) {
+  return (n & 0xffff0000) ?
+          ((n & 0xff000000) ?
+            24 + lg_table[(n >> 24) & 0xff] :
+            16 + lg_table[(n >> 16) & 0xff]) :
+          ((n & 0x0000ff00) ?
+             8 + lg_table[(n >>  8) & 0xff] :
+             0 + lg_table[(n >>  0) & 0xff]);
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Simple insertionsort for small size groups. */
+static
+void
+tr_insertionsort(const int *ISAd, int *first, int *last) {
+  int *a, *b;
+  int t, r;
+
+  for(a = first + 1; a < last; ++a) {
+    for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) {
+      do { *(b + 1) = *b; } while((first <= --b) && (*b < 0));
+      if(b < first) { break; }
+    }
+    if(r == 0) { *b = ~*b; }
+    *(b + 1) = t;
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+static INLINE
+void
+tr_fixdown(const int *ISAd, int *SA, int i, int size) {
+  int j, k;
+  int v;
+  int c, d, e;
+
+  for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
+    d = ISAd[SA[k = j++]];
+    if(d < (e = ISAd[SA[j]])) { k = j; d = e; }
+    if(d <= c) { break; }
+  }
+  SA[i] = v;
+}
+
+/* Simple top-down heapsort. */
+static
+void
+tr_heapsort(const int *ISAd, int *SA, int size) {
+  int i, m;
+  int t;
+
+  m = size;
+  if((size % 2) == 0) {
+    m--;
+    if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); }
+  }
+
+  for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); }
+  if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); }
+  for(i = m - 1; 0 < i; --i) {
+    t = SA[0], SA[0] = SA[i];
+    tr_fixdown(ISAd, SA, 0, i);
+    SA[i] = t;
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Returns the median of three elements. */
+static INLINE
+int *
+tr_median3(const int *ISAd, int *v1, int *v2, int *v3) {
+  int *t;
+  if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); }
+  if(ISAd[*v2] > ISAd[*v3]) {
+    if(ISAd[*v1] > ISAd[*v3]) { return v1; }
+    else { return v3; }
+  }
+  return v2;
+}
+
+/* Returns the median of five elements. */
+static INLINE
+int *
+tr_median5(const int *ISAd,
+           int *v1, int *v2, int *v3, int *v4, int *v5) {
+  int *t;
+  if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); }
+  if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); }
+  if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); }
+  if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); }
+  if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); }
+  if(ISAd[*v3] > ISAd[*v4]) { return v4; }
+  return v3;
+}
+
+/* Returns the pivot element. */
+static INLINE
+int *
+tr_pivot(const int *ISAd, int *first, int *last) {
+  int *middle;
+  int t;
+
+  t = last - first;
+  middle = first + t / 2;
+
+  if(t <= 512) {
+    if(t <= 32) {
+      return tr_median3(ISAd, first, middle, last - 1);
+    } else {
+      t >>= 2;
+      return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1);
+    }
+  }
+  t >>= 3;
+  first  = tr_median3(ISAd, first, first + t, first + (t << 1));
+  middle = tr_median3(ISAd, middle - t, middle, middle + t);
+  last   = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1);
+  return tr_median3(ISAd, first, middle, last);
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+typedef struct _trbudget_t trbudget_t;
+struct _trbudget_t {
+  int chance;
+  int remain;
+  int incval;
+  int count;
+};
+
+static INLINE
+void
+trbudget_init(trbudget_t *budget, int chance, int incval) {
+  budget->chance = chance;
+  budget->remain = budget->incval = incval;
+}
+
+static INLINE
+int
+trbudget_check(trbudget_t *budget, int size) {
+  if(size <= budget->remain) { budget->remain -= size; return 1; }
+  if(budget->chance == 0) { budget->count += size; return 0; }
+  budget->remain += budget->incval - size;
+  budget->chance -= 1;
+  return 1;
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+static INLINE
+void
+tr_partition(const int *ISAd,
+             int *first, int *middle, int *last,
+             int **pa, int **pb, int v) {
+  int *a, *b, *c, *d, *e, *f;
+  int t, s;
+  int x = 0;
+
+  for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { }
+  if(((a = b) < last) && (x < v)) {
+    for(; (++b < last) && ((x = ISAd[*b]) <= v);) {
+      if(x == v) { SWAP(*b, *a); ++a; }
+    }
+  }
+  for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { }
+  if((b < (d = c)) && (x > v)) {
+    for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
+      if(x == v) { SWAP(*c, *d); --d; }
+    }
+  }
+  for(; b < c;) {
+    SWAP(*b, *c);
+    for(; (++b < c) && ((x = ISAd[*b]) <= v);) {
+      if(x == v) { SWAP(*b, *a); ++a; }
+    }
+    for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
+      if(x == v) { SWAP(*c, *d); --d; }
+    }
+  }
+
+  if(a <= d) {
+    c = b - 1;
+    if((s = a - first) > (t = b - a)) { s = t; }
+    for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
+    if((s = d - c) > (t = last - d - 1)) { s = t; }
+    for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
+    first += (b - a), last -= (d - c);
+  }
+  *pa = first, *pb = last;
+}
+
+static
+void
+tr_copy(int *ISA, const int *SA,
+        int *first, int *a, int *b, int *last,
+        int depth) {
+  /* sort suffixes of middle partition
+     by using sorted order of suffixes of left and right partition. */
+  int *c, *d, *e;
+  int s, v;
+
+  v = b - SA - 1;
+  for(c = first, d = a - 1; c <= d; ++c) {
+    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
+      *++d = s;
+      ISA[s] = d - SA;
+    }
+  }
+  for(c = last - 1, e = d + 1, d = b; e < d; --c) {
+    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
+      *--d = s;
+      ISA[s] = d - SA;
+    }
+  }
+}
+
+static
+void
+tr_partialcopy(int *ISA, const int *SA,
+               int *first, int *a, int *b, int *last,
+               int depth) {
+  int *c, *d, *e;
+  int s, v;
+  int rank, lastrank, newrank = -1;
+
+  v = b - SA - 1;
+  lastrank = -1;
+  for(c = first, d = a - 1; c <= d; ++c) {
+    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
+      *++d = s;
+      rank = ISA[s + depth];
+      if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
+      ISA[s] = newrank;
+    }
+  }
+
+  lastrank = -1;
+  for(e = d; first <= e; --e) {
+    rank = ISA[*e];
+    if(lastrank != rank) { lastrank = rank; newrank = e - SA; }
+    if(newrank != rank) { ISA[*e] = newrank; }
+  }
+
+  lastrank = -1;
+  for(c = last - 1, e = d + 1, d = b; e < d; --c) {
+    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
+      *--d = s;
+      rank = ISA[s + depth];
+      if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
+      ISA[s] = newrank;
+    }
+  }
+}
+
+static
+void
+tr_introsort(int *ISA, const int *ISAd,
+             int *SA, int *first, int *last,
+             trbudget_t *budget) {
+#define STACK_SIZE TR_STACKSIZE
+  struct { const int *a; int *b, *c; int d, e; }stack[STACK_SIZE];
+  int *a, *b, *c;
+  int t;
+  int v, x = 0;
+  int incr = ISAd - ISA;
+  int limit, next;
+  int ssize, trlink = -1;
+
+  for(ssize = 0, limit = tr_ilg(last - first);;) {
+
+    if(limit < 0) {
+      if(limit == -1) {
+        /* tandem repeat partition */
+        tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1);
+
+        /* update ranks */
+        if(a < last) {
+          for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
+        }
+        if(b < last) {
+          for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; }
+        }
+
+        /* push */
+        if(1 < (b - a)) {
+          STACK_PUSH5(NULL, a, b, 0, 0);
+          STACK_PUSH5(ISAd - incr, first, last, -2, trlink);
+          trlink = ssize - 2;
+        }
+        if((a - first) <= (last - b)) {
+          if(1 < (a - first)) {
+            STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink);
+            last = a, limit = tr_ilg(a - first);
+          } else if(1 < (last - b)) {
+            first = b, limit = tr_ilg(last - b);
+          } else {
+            STACK_POP5(ISAd, first, last, limit, trlink);
+          }
+        } else {
+          if(1 < (last - b)) {
+            STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink);
+            first = b, limit = tr_ilg(last - b);
+          } else if(1 < (a - first)) {
+            last = a, limit = tr_ilg(a - first);
+          } else {
+            STACK_POP5(ISAd, first, last, limit, trlink);
+          }
+        }
+      } else if(limit == -2) {
+        /* tandem repeat copy */
+        a = stack[--ssize].b, b = stack[ssize].c;
+        if(stack[ssize].d == 0) {
+          tr_copy(ISA, SA, first, a, b, last, ISAd - ISA);
+        } else {
+          if(0 <= trlink) { stack[trlink].d = -1; }
+          tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA);
+        }
+        STACK_POP5(ISAd, first, last, limit, trlink);
+      } else {
+        /* sorted partition */
+        if(0 <= *first) {
+          a = first;
+          do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a));
+          first = a;
+        }
+        if(first < last) {
+          a = first; do { *a = ~*a; } while(*++a < 0);
+          next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1;
+          if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } }
+
+          /* push */
+          if(trbudget_check(budget, a - first)) {
+            if((a - first) <= (last - a)) {
+              STACK_PUSH5(ISAd, a, last, -3, trlink);
+              ISAd += incr, last = a, limit = next;
+            } else {
+              if(1 < (last - a)) {
+                STACK_PUSH5(ISAd + incr, first, a, next, trlink);
+                first = a, limit = -3;
+              } else {
+                ISAd += incr, last = a, limit = next;
+              }
+            }
+          } else {
+            if(0 <= trlink) { stack[trlink].d = -1; }
+            if(1 < (last - a)) {
+              first = a, limit = -3;
+            } else {
+              STACK_POP5(ISAd, first, last, limit, trlink);
+            }
+          }
+        } else {
+          STACK_POP5(ISAd, first, last, limit, trlink);
+        }
+      }
+      continue;
+    }
+
+    if((last - first) <= TR_INSERTIONSORT_THRESHOLD) {
+      tr_insertionsort(ISAd, first, last);
+      limit = -3;
+      continue;
+    }
+
+    if(limit-- == 0) {
+      tr_heapsort(ISAd, first, last - first);
+      for(a = last - 1; first < a; a = b) {
+        for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; }
+      }
+      limit = -3;
+      continue;
+    }
+
+    /* choose pivot */
+    a = tr_pivot(ISAd, first, last);
+    SWAP(*first, *a);
+    v = ISAd[*first];
+
+    /* partition */
+    tr_partition(ISAd, first, first + 1, last, &a, &b, v);
+    if((last - first) != (b - a)) {
+      next = (ISA[*a] != v) ? tr_ilg(b - a) : -1;
+
+      /* update ranks */
+      for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
+      if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } }
+
+      /* push */
+      if((1 < (b - a)) && (trbudget_check(budget, b - a))) {
+        if((a - first) <= (last - b)) {
+          if((last - b) <= (b - a)) {
+            if(1 < (a - first)) {
+              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+              STACK_PUSH5(ISAd, b, last, limit, trlink);
+              last = a;
+            } else if(1 < (last - b)) {
+              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+              first = b;
+            } else {
+              ISAd += incr, first = a, last = b, limit = next;
+            }
+          } else if((a - first) <= (b - a)) {
+            if(1 < (a - first)) {
+              STACK_PUSH5(ISAd, b, last, limit, trlink);
+              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+              last = a;
+            } else {
+              STACK_PUSH5(ISAd, b, last, limit, trlink);
+              ISAd += incr, first = a, last = b, limit = next;
+            }
+          } else {
+            STACK_PUSH5(ISAd, b, last, limit, trlink);
+            STACK_PUSH5(ISAd, first, a, limit, trlink);
+            ISAd += incr, first = a, last = b, limit = next;
+          }
+        } else {
+          if((a - first) <= (b - a)) {
+            if(1 < (last - b)) {
+              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+              STACK_PUSH5(ISAd, first, a, limit, trlink);
+              first = b;
+            } else if(1 < (a - first)) {
+              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+              last = a;
+            } else {
+              ISAd += incr, first = a, last = b, limit = next;
+            }
+          } else if((last - b) <= (b - a)) {
+            if(1 < (last - b)) {
+              STACK_PUSH5(ISAd, first, a, limit, trlink);
+              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+              first = b;
+            } else {
+              STACK_PUSH5(ISAd, first, a, limit, trlink);
+              ISAd += incr, first = a, last = b, limit = next;
+            }
+          } else {
+            STACK_PUSH5(ISAd, first, a, limit, trlink);
+            STACK_PUSH5(ISAd, b, last, limit, trlink);
+            ISAd += incr, first = a, last = b, limit = next;
+          }
+        }
+      } else {
+        if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; }
+        if((a - first) <= (last - b)) {
+          if(1 < (a - first)) {
+            STACK_PUSH5(ISAd, b, last, limit, trlink);
+            last = a;
+          } else if(1 < (last - b)) {
+            first = b;
+          } else {
+            STACK_POP5(ISAd, first, last, limit, trlink);
+          }
+        } else {
+          if(1 < (last - b)) {
+            STACK_PUSH5(ISAd, first, a, limit, trlink);
+            first = b;
+          } else if(1 < (a - first)) {
+            last = a;
+          } else {
+            STACK_POP5(ISAd, first, last, limit, trlink);
+          }
+        }
+      }
+    } else {
+      if(trbudget_check(budget, last - first)) {
+        limit = tr_ilg(last - first), ISAd += incr;
+      } else {
+        if(0 <= trlink) { stack[trlink].d = -1; }
+        STACK_POP5(ISAd, first, last, limit, trlink);
+      }
+    }
+  }
+#undef STACK_SIZE
+}
+
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Tandem repeat sort */
+static
+void
+trsort(int *ISA, int *SA, int n, int depth) {
+  int *ISAd;
+  int *first, *last;
+  trbudget_t budget;
+  int t, skip, unsorted;
+
+  trbudget_init(&budget, tr_ilg(n) * 2 / 3, n);
+/*  trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */
+  for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) {
+    first = SA;
+    skip = 0;
+    unsorted = 0;
+    do {
+      if((t = *first) < 0) { first -= t; skip += t; }
+      else {
+        if(skip != 0) { *(first + skip) = skip; skip = 0; }
+        last = SA + ISA[t] + 1;
+        if(1 < (last - first)) {
+          budget.count = 0;
+          tr_introsort(ISA, ISAd, SA, first, last, &budget);
+          if(budget.count != 0) { unsorted += budget.count; }
+          else { skip = first - last; }
+        } else if((last - first) == 1) {
+          skip = -1;
+        }
+        first = last;
+      }
+    } while(first < (SA + n));
+    if(skip != 0) { *(first + skip) = skip; }
+    if(unsorted == 0) { break; }
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Sorts suffixes of type B*. */
+static
+int
+sort_typeBstar(const unsigned char *T, int *SA,
+               int *bucket_A, int *bucket_B,
+               int n, int openMP) {
+  int *PAb, *ISAb, *buf;
+#ifdef LIBBSC_OPENMP
+  int *curbuf;
+  int l;
+#endif
+  int i, j, k, t, m, bufsize;
+  int c0, c1;
+#ifdef LIBBSC_OPENMP
+  int d0, d1;
+#endif
+  (void)openMP;
+
+  /* Initialize bucket arrays. */
+  for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; }
+  for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; }
+
+  /* Count the number of occurrences of the first one or two characters of each
+     type A, B and B* suffix. Moreover, store the beginning position of all
+     type B* suffixes into the array SA. */
+  for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) {
+    /* type A suffix. */
+    do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1));
+    if(0 <= i) {
+      /* type B* suffix. */
+      ++BUCKET_BSTAR(c0, c1);
+      SA[--m] = i;
+      /* type B suffix. */
+      for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) {
+        ++BUCKET_B(c0, c1);
+      }
+    }
+  }
+  m = n - m;
+/*
+note:
+  A type B* suffix is lexicographically smaller than a type B suffix that
+  begins with the same first two characters.
+*/
+
+  /* Calculate the index of start/end point of each bucket. */
+  for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) {
+    t = i + BUCKET_A(c0);
+    BUCKET_A(c0) = i + j; /* start point */
+    i = t + BUCKET_B(c0, c0);
+    for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) {
+      j += BUCKET_BSTAR(c0, c1);
+      BUCKET_BSTAR(c0, c1) = j; /* end point */
+      i += BUCKET_B(c0, c1);
+    }
+  }
+
+  if(0 < m) {
+    /* Sort the type B* suffixes by their first two characters. */
+    PAb = SA + n - m; ISAb = SA + m;
+    for(i = m - 2; 0 <= i; --i) {
+      t = PAb[i], c0 = T[t], c1 = T[t + 1];
+      SA[--BUCKET_BSTAR(c0, c1)] = i;
+    }
+    t = PAb[m - 1], c0 = T[t], c1 = T[t + 1];
+    SA[--BUCKET_BSTAR(c0, c1)] = m - 1;
+
+    /* Sort the type B* substrings using sssort. */
+#ifdef LIBBSC_OPENMP
+    if (openMP)
+    {
+        buf = SA + m;
+        c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m;
+#pragma omp parallel default(shared) private(bufsize, curbuf, k, l, d0, d1)
+        {
+          bufsize = (n - (2 * m)) / omp_get_num_threads();
+          curbuf = buf + omp_get_thread_num() * bufsize;
+          k = 0;
+          for(;;) {
+            #pragma omp critical(sssort_lock)
+            {
+              if(0 < (l = j)) {
+                d0 = c0, d1 = c1;
+                do {
+                  k = BUCKET_BSTAR(d0, d1);
+                  if(--d1 <= d0) {
+                    d1 = ALPHABET_SIZE - 1;
+                    if(--d0 < 0) { break; }
+                  }
+                } while(((l - k) <= 1) && (0 < (l = k)));
+                c0 = d0, c1 = d1, j = k;
+              }
+            }
+            if(l == 0) { break; }
+            sssort(T, PAb, SA + k, SA + l,
+                   curbuf, bufsize, 2, n, *(SA + k) == (m - 1));
+          }
+        }
+    }
+    else
+    {
+        buf = SA + m, bufsize = n - (2 * m);
+        for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
+          for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
+            i = BUCKET_BSTAR(c0, c1);
+            if(1 < (j - i)) {
+              sssort(T, PAb, SA + i, SA + j,
+                     buf, bufsize, 2, n, *(SA + i) == (m - 1));
+            }
+          }
+        }
+    }
+#else
+    buf = SA + m, bufsize = n - (2 * m);
+    for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
+      for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
+        i = BUCKET_BSTAR(c0, c1);
+        if(1 < (j - i)) {
+          sssort(T, PAb, SA + i, SA + j,
+                 buf, bufsize, 2, n, *(SA + i) == (m - 1));
+        }
+      }
+    }
+#endif
+
+    /* Compute ranks of type B* substrings. */
+    for(i = m - 1; 0 <= i; --i) {
+      if(0 <= SA[i]) {
+        j = i;
+        do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i]));
+        SA[i + 1] = i - j;
+        if(i <= 0) { break; }
+      }
+      j = i;
+      do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0);
+      ISAb[SA[i]] = j;
+    }
+
+    /* Construct the inverse suffix array of type B* suffixes using trsort. */
+    trsort(ISAb, SA, m, 1);
+
+    /* Set the sorted order of type B* suffixes. */
+    for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
+      for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
+      if(0 <= i) {
+        t = i;
+        for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { }
+        SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t;
+      }
+    }
+
+    /* Calculate the index of start/end point of each bucket. */
+    BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */
+    for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) {
+      i = BUCKET_A(c0 + 1) - 1;
+      for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) {
+        t = i - BUCKET_B(c0, c1);
+        BUCKET_B(c0, c1) = i; /* end point */
+
+        /* Move all type B* suffixes to the correct position. */
+        for(i = t, j = BUCKET_BSTAR(c0, c1);
+            j <= k;
+            --i, --k) { SA[i] = SA[k]; }
+      }
+      BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */
+      BUCKET_B(c0, c0) = i; /* end point */
+    }
+  }
+
+  return m;
+}
+
+/* Constructs the suffix array by using the sorted order of type B* suffixes. */
+static
+void
+construct_SA(const unsigned char *T, int *SA,
+             int *bucket_A, int *bucket_B,
+             int n, int m) {
+  int *i, *j, *k;
+  int s;
+  int c0, c1, c2;
+
+  if(0 < m) {
+    /* Construct the sorted order of type B suffixes by using
+       the sorted order of type B* suffixes. */
+    for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
+      /* Scan the suffix array from right to left. */
+      for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
+          j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
+          i <= j;
+          --j) {
+        if(0 < (s = *j)) {
+          assert(T[s] == c1);
+          assert(((s + 1) < n) && (T[s] <= T[s + 1]));
+          assert(T[s - 1] <= T[s]);
+          *j = ~s;
+          c0 = T[--s];
+          if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
+          if(c0 != c2) {
+            if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
+            k = SA + BUCKET_B(c2 = c0, c1);
+          }
+          assert(k < j); assert(k != NULL);
+          *k-- = s;
+        } else {
+          assert(((s == 0) && (T[s] == c1)) || (s < 0));
+          *j = ~s;
+        }
+      }
+    }
+  }
+
+  /* Construct the suffix array by using
+     the sorted order of type B suffixes. */
+  k = SA + BUCKET_A(c2 = T[n - 1]);
+  *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1);
+  /* Scan the suffix array from left to right. */
+  for(i = SA, j = SA + n; i < j; ++i) {
+    if(0 < (s = *i)) {
+      assert(T[s - 1] >= T[s]);
+      c0 = T[--s];
+      if((s == 0) || (T[s - 1] < c0)) { s = ~s; }
+      if(c0 != c2) {
+        BUCKET_A(c2) = k - SA;
+        k = SA + BUCKET_A(c2 = c0);
+      }
+      assert(i < k);
+      *k++ = s;
+    } else {
+      assert(s < 0);
+      *i = ~s;
+    }
+  }
+}
+
+/* Constructs the burrows-wheeler transformed string directly
+   by using the sorted order of type B* suffixes. */
+static
+int
+construct_BWT(const unsigned char *T, int *SA,
+              int *bucket_A, int *bucket_B,
+              int n, int m) {
+  int *i, *j, *k, *orig;
+  int s;
+  int c0, c1, c2;
+
+  if(0 < m) {
+    /* Construct the sorted order of type B suffixes by using
+       the sorted order of type B* suffixes. */
+    for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
+      /* Scan the suffix array from right to left. */
+      for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
+          j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
+          i <= j;
+          --j) {
+        if(0 < (s = *j)) {
+          assert(T[s] == c1);
+          assert(((s + 1) < n) && (T[s] <= T[s + 1]));
+          assert(T[s - 1] <= T[s]);
+          c0 = T[--s];
+          *j = ~((int)c0);
+          if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
+          if(c0 != c2) {
+            if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
+            k = SA + BUCKET_B(c2 = c0, c1);
+          }
+          assert(k < j); assert(k != NULL);
+          *k-- = s;
+        } else if(s != 0) {
+          *j = ~s;
+#ifndef NDEBUG
+        } else {
+          assert(T[s] == c1);
+#endif
+        }
+      }
+    }
+  }
+
+  /* Construct the BWTed string by using
+     the sorted order of type B suffixes. */
+  k = SA + BUCKET_A(c2 = T[n - 1]);
+  *k++ = (T[n - 2] < c2) ? ~((int)T[n - 2]) : (n - 1);
+  /* Scan the suffix array from left to right. */
+  for(i = SA, j = SA + n, orig = SA; i < j; ++i) {
+    if(0 < (s = *i)) {
+      assert(T[s - 1] >= T[s]);
+      c0 = T[--s];
+      *i = c0;
+      if((0 < s) && (T[s - 1] < c0)) { s = ~((int)T[s - 1]); }
+      if(c0 != c2) {
+        BUCKET_A(c2) = k - SA;
+        k = SA + BUCKET_A(c2 = c0);
+      }
+      assert(i < k);
+      *k++ = s;
+    } else if(s != 0) {
+      *i = ~s;
+    } else {
+      orig = i;
+    }
+  }
+
+  return orig - SA;
+}
+
+/* Constructs the burrows-wheeler transformed string directly
+   by using the sorted order of type B* suffixes. */
+static
+int
+construct_BWT_indexes(const unsigned char *T, int *SA,
+                      int *bucket_A, int *bucket_B,
+                      int n, int m,
+                      unsigned char * num_indexes, int * indexes) {
+  int *i, *j, *k, *orig;
+  int s;
+  int c0, c1, c2;
+
+  int mod = n / 8;
+  {
+      mod |= mod >> 1;  mod |= mod >> 2;
+      mod |= mod >> 4;  mod |= mod >> 8;
+      mod |= mod >> 16; mod >>= 1;
+
+      *num_indexes = (unsigned char)((n - 1) / (mod + 1));
+  }
+
+  if(0 < m) {
+    /* Construct the sorted order of type B suffixes by using
+       the sorted order of type B* suffixes. */
+    for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
+      /* Scan the suffix array from right to left. */
+      for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
+          j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
+          i <= j;
+          --j) {
+        if(0 < (s = *j)) {
+          assert(T[s] == c1);
+          assert(((s + 1) < n) && (T[s] <= T[s + 1]));
+          assert(T[s - 1] <= T[s]);
+
+          if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = j - SA;
+
+          c0 = T[--s];
+          *j = ~((int)c0);
+          if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
+          if(c0 != c2) {
+            if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
+            k = SA + BUCKET_B(c2 = c0, c1);
+          }
+          assert(k < j); assert(k != NULL);
+          *k-- = s;
+        } else if(s != 0) {
+          *j = ~s;
+#ifndef NDEBUG
+        } else {
+          assert(T[s] == c1);
+#endif
+        }
+      }
+    }
+  }
+
+  /* Construct the BWTed string by using
+     the sorted order of type B suffixes. */
+  k = SA + BUCKET_A(c2 = T[n - 1]);
+  if (T[n - 2] < c2) {
+    if (((n - 1) & mod) == 0) indexes[(n - 1) / (mod + 1) - 1] = k - SA;
+    *k++ = ~((int)T[n - 2]);
+  }
+  else {
+    *k++ = n - 1;
+  }
+
+  /* Scan the suffix array from left to right. */
+  for(i = SA, j = SA + n, orig = SA; i < j; ++i) {
+    if(0 < (s = *i)) {
+      assert(T[s - 1] >= T[s]);
+
+      if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = i - SA;
+
+      c0 = T[--s];
+      *i = c0;
+      if(c0 != c2) {
+        BUCKET_A(c2) = k - SA;
+        k = SA + BUCKET_A(c2 = c0);
+      }
+      assert(i < k);
+      if((0 < s) && (T[s - 1] < c0)) {
+          if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = k - SA;
+          *k++ = ~((int)T[s - 1]);
+      } else
+        *k++ = s;
+    } else if(s != 0) {
+      *i = ~s;
+    } else {
+      orig = i;
+    }
+  }
+
+  return orig - SA;
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/*- Function -*/
+
+int
+divsufsort(const unsigned char *T, int *SA, int n, int openMP) {
+  int *bucket_A, *bucket_B;
+  int m;
+  int err = 0;
+
+  /* Check arguments. */
+  if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; }
+  else if(n == 0) { return 0; }
+  else if(n == 1) { SA[0] = 0; return 0; }
+  else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; }
+
+  bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int));
+  bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int));
+
+  /* Suffixsort. */
+  if((bucket_A != NULL) && (bucket_B != NULL)) {
+    m = sort_typeBstar(T, SA, bucket_A, bucket_B, n, openMP);
+    construct_SA(T, SA, bucket_A, bucket_B, n, m);
+  } else {
+    err = -2;
+  }
+
+  free(bucket_B);
+  free(bucket_A);
+
+  return err;
+}
+
+int
+divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char * num_indexes, int * indexes, int openMP) {
+  int *B;
+  int *bucket_A, *bucket_B;
+  int m, pidx, i;
+
+  /* Check arguments. */
+  if((T == NULL) || (U == NULL) || (n < 0)) { return -1; }
+  else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; }
+
+  if((B = A) == NULL) { B = (int *)malloc((size_t)(n + 1) * sizeof(int)); }
+  bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int));
+  bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int));
+
+  /* Burrows-Wheeler Transform. */
+  if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) {
+    m = sort_typeBstar(T, B, bucket_A, bucket_B, n, openMP);
+
+    if (num_indexes == NULL || indexes == NULL) {
+        pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m);
+    } else {
+        pidx = construct_BWT_indexes(T, B, bucket_A, bucket_B, n, m, num_indexes, indexes);
+    }
+
+    /* Copy to output string. */
+    U[0] = T[n - 1];
+    for(i = 0; i < pidx; ++i) { U[i + 1] = (unsigned char)B[i]; }
+    for(i += 1; i < n; ++i) { U[i] = (unsigned char)B[i]; }
+    pidx += 1;
+  } else {
+    pidx = -2;
+  }
+
+  free(bucket_B);
+  free(bucket_A);
+  if(A == NULL) { free(B); }
+
+  return pidx;
+}
+/**** ended inlining dictBuilder/divsufsort.c ****/
+/**** start inlining dictBuilder/fastcover.c ****/
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#include <stdio.h>  /* fprintf */
+#include <stdlib.h> /* malloc, free, qsort */
+#include <string.h> /* memset */
+#include <time.h>   /* clock */
+
+#ifndef ZDICT_STATIC_LINKING_ONLY
+#  define ZDICT_STATIC_LINKING_ONLY
+#endif
+
+/**** skipping file: ../common/mem.h ****/
+/**** skipping file: ../common/pool.h ****/
+/**** skipping file: ../common/threading.h ****/
+/**** skipping file: ../common/zstd_internal.h ****/
+/**** skipping file: ../compress/zstd_compress_internal.h ****/
+/**** skipping file: ../zdict.h ****/
+/**** skipping file: cover.h ****/
+
+
+/*-*************************************
+*  Constants
+***************************************/
+#define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
+#define FASTCOVER_MAX_F 31
+#define FASTCOVER_MAX_ACCEL 10
+#define FASTCOVER_DEFAULT_SPLITPOINT 0.75
+#define DEFAULT_F 20
+#define DEFAULT_ACCEL 1
+
+
+/*-*************************************
+*  Console display
+***************************************/
+#ifndef LOCALDISPLAYLEVEL
+static int g_displayLevel = 2;
+#endif
+#undef  DISPLAY
+#define DISPLAY(...)                                                           \
+  {                                                                            \
+    fprintf(stderr, __VA_ARGS__);                                              \
+    fflush(stderr);                                                            \
+  }
+#undef  LOCALDISPLAYLEVEL
+#define LOCALDISPLAYLEVEL(displayLevel, l, ...)                                \
+  if (displayLevel >= l) {                                                     \
+    DISPLAY(__VA_ARGS__);                                                      \
+  } /* 0 : no display;   1: errors;   2: default;  3: details;  4: debug */
+#undef  DISPLAYLEVEL
+#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
+
+#ifndef LOCALDISPLAYUPDATE
+static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100;
+static clock_t g_time = 0;
+#endif
+#undef  LOCALDISPLAYUPDATE
+#define LOCALDISPLAYUPDATE(displayLevel, l, ...)                               \
+  if (displayLevel >= l) {                                                     \
+    if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) {             \
+      g_time = clock();                                                        \
+      DISPLAY(__VA_ARGS__);                                                    \
+    }                                                                          \
+  }
+#undef  DISPLAYUPDATE
+#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
+
+
+/*-*************************************
+* Hash Functions
+***************************************/
+/**
+ * Hash the d-byte value pointed to by p and mod 2^f into the frequency vector
+ */
+static size_t FASTCOVER_hashPtrToIndex(const void* p, U32 f, unsigned d) {
+  if (d == 6) {
+    return ZSTD_hash6Ptr(p, f);
+  }
+  return ZSTD_hash8Ptr(p, f);
+}
+
+
+/*-*************************************
+* Acceleration
+***************************************/
+typedef struct {
+  unsigned finalize;    /* Percentage of training samples used for ZDICT_finalizeDictionary */
+  unsigned skip;        /* Number of dmer skipped between each dmer counted in computeFrequency */
+} FASTCOVER_accel_t;
+
+
+static const FASTCOVER_accel_t FASTCOVER_defaultAccelParameters[FASTCOVER_MAX_ACCEL+1] = {
+  { 100, 0 },   /* accel = 0, should not happen because accel = 0 defaults to accel = 1 */
+  { 100, 0 },   /* accel = 1 */
+  { 50, 1 },   /* accel = 2 */
+  { 34, 2 },   /* accel = 3 */
+  { 25, 3 },   /* accel = 4 */
+  { 20, 4 },   /* accel = 5 */
+  { 17, 5 },   /* accel = 6 */
+  { 14, 6 },   /* accel = 7 */
+  { 13, 7 },   /* accel = 8 */
+  { 11, 8 },   /* accel = 9 */
+  { 10, 9 },   /* accel = 10 */
+};
+
+
+/*-*************************************
+* Context
+***************************************/
+typedef struct {
+  const BYTE *samples;
+  size_t *offsets;
+  const size_t *samplesSizes;
+  size_t nbSamples;
+  size_t nbTrainSamples;
+  size_t nbTestSamples;
+  size_t nbDmers;
+  U32 *freqs;
+  unsigned d;
+  unsigned f;
+  FASTCOVER_accel_t accelParams;
+} FASTCOVER_ctx_t;
+
+
+/*-*************************************
+*  Helper functions
+***************************************/
+/**
+ * Selects the best segment in an epoch.
+ * Segments of are scored according to the function:
+ *
+ * Let F(d) be the frequency of all dmers with hash value d.
+ * Let S_i be hash value of the dmer at position i of segment S which has length k.
+ *
+ *     Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1})
+ *
+ * Once the dmer with hash value d is in the dictionary we set F(d) = 0.
+ */
+static COVER_segment_t FASTCOVER_selectSegment(const FASTCOVER_ctx_t *ctx,
+                                              U32 *freqs, U32 begin, U32 end,
+                                              ZDICT_cover_params_t parameters,
+                                              U16* segmentFreqs) {
+  /* Constants */
+  const U32 k = parameters.k;
+  const U32 d = parameters.d;
+  const U32 f = ctx->f;
+  const U32 dmersInK = k - d + 1;
+
+  /* Try each segment (activeSegment) and save the best (bestSegment) */
+  COVER_segment_t bestSegment = {0, 0, 0};
+  COVER_segment_t activeSegment;
+
+  /* Reset the activeDmers in the segment */
+  /* The activeSegment starts at the beginning of the epoch. */
+  activeSegment.begin = begin;
+  activeSegment.end = begin;
+  activeSegment.score = 0;
+
+  /* Slide the activeSegment through the whole epoch.
+   * Save the best segment in bestSegment.
+   */
+  while (activeSegment.end < end) {
+    /* Get hash value of current dmer */
+    const size_t idx = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.end, f, d);
+
+    /* Add frequency of this index to score if this is the first occurrence of index in active segment */
+    if (segmentFreqs[idx] == 0) {
+      activeSegment.score += freqs[idx];
+    }
+    /* Increment end of segment and segmentFreqs*/
+    activeSegment.end += 1;
+    segmentFreqs[idx] += 1;
+    /* If the window is now too large, drop the first position */
+    if (activeSegment.end - activeSegment.begin == dmersInK + 1) {
+      /* Get hash value of the dmer to be eliminated from active segment */
+      const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, f, d);
+      segmentFreqs[delIndex] -= 1;
+      /* Subtract frequency of this index from score if this is the last occurrence of this index in active segment */
+      if (segmentFreqs[delIndex] == 0) {
+        activeSegment.score -= freqs[delIndex];
+      }
+      /* Increment start of segment */
+      activeSegment.begin += 1;
+    }
+
+    /* If this segment is the best so far save it */
+    if (activeSegment.score > bestSegment.score) {
+      bestSegment = activeSegment;
+    }
+  }
+
+  /* Zero out rest of segmentFreqs array */
+  while (activeSegment.begin < end) {
+    const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, f, d);
+    segmentFreqs[delIndex] -= 1;
+    activeSegment.begin += 1;
+  }
+
+  {
+    /*  Zero the frequency of hash value of each dmer covered by the chosen segment. */
+    U32 pos;
+    for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
+      const size_t i = FASTCOVER_hashPtrToIndex(ctx->samples + pos, f, d);
+      freqs[i] = 0;
+    }
+  }
+
+  return bestSegment;
+}
+
+
+static int FASTCOVER_checkParameters(ZDICT_cover_params_t parameters,
+                                     size_t maxDictSize, unsigned f,
+                                     unsigned accel) {
+  /* k, d, and f are required parameters */
+  if (parameters.d == 0 || parameters.k == 0) {
+    return 0;
+  }
+  /* d has to be 6 or 8 */
+  if (parameters.d != 6 && parameters.d != 8) {
+    return 0;
+  }
+  /* k <= maxDictSize */
+  if (parameters.k > maxDictSize) {
+    return 0;
+  }
+  /* d <= k */
+  if (parameters.d > parameters.k) {
+    return 0;
+  }
+  /* 0 < f <= FASTCOVER_MAX_F*/
+  if (f > FASTCOVER_MAX_F || f == 0) {
+    return 0;
+  }
+  /* 0 < splitPoint <= 1 */
+  if (parameters.splitPoint <= 0 || parameters.splitPoint > 1) {
+    return 0;
+  }
+  /* 0 < accel <= 10 */
+  if (accel > 10 || accel == 0) {
+    return 0;
+  }
+  return 1;
+}
+
+
+/**
+ * Clean up a context initialized with `FASTCOVER_ctx_init()`.
+ */
+static void
+FASTCOVER_ctx_destroy(FASTCOVER_ctx_t* ctx)
+{
+    if (!ctx) return;
+
+    free(ctx->freqs);
+    ctx->freqs = NULL;
+
+    free(ctx->offsets);
+    ctx->offsets = NULL;
+}
+
+
+/**
+ * Calculate for frequency of hash value of each dmer in ctx->samples
+ */
+static void
+FASTCOVER_computeFrequency(U32* freqs, const FASTCOVER_ctx_t* ctx)
+{
+    const unsigned f = ctx->f;
+    const unsigned d = ctx->d;
+    const unsigned skip = ctx->accelParams.skip;
+    const unsigned readLength = MAX(d, 8);
+    size_t i;
+    assert(ctx->nbTrainSamples >= 5);
+    assert(ctx->nbTrainSamples <= ctx->nbSamples);
+    for (i = 0; i < ctx->nbTrainSamples; i++) {
+        size_t start = ctx->offsets[i];  /* start of current dmer */
+        size_t const currSampleEnd = ctx->offsets[i+1];
+        while (start + readLength <= currSampleEnd) {
+            const size_t dmerIndex = FASTCOVER_hashPtrToIndex(ctx->samples + start, f, d);
+            freqs[dmerIndex]++;
+            start = start + skip + 1;
+        }
+    }
+}
+
+
+/**
+ * Prepare a context for dictionary building.
+ * The context is only dependent on the parameter `d` and can used multiple
+ * times.
+ * Returns 0 on success or error code on error.
+ * The context must be destroyed with `FASTCOVER_ctx_destroy()`.
+ */
+static size_t
+FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
+                   const void* samplesBuffer,
+                   const size_t* samplesSizes, unsigned nbSamples,
+                   unsigned d, double splitPoint, unsigned f,
+                   FASTCOVER_accel_t accelParams)
+{
+    const BYTE* const samples = (const BYTE*)samplesBuffer;
+    const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples);
+    /* Split samples into testing and training sets */
+    const unsigned nbTrainSamples = splitPoint < 1.0 ? (unsigned)((double)nbSamples * splitPoint) : nbSamples;
+    const unsigned nbTestSamples = splitPoint < 1.0 ? nbSamples - nbTrainSamples : nbSamples;
+    const size_t trainingSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes, nbTrainSamples) : totalSamplesSize;
+    const size_t testSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes + nbTrainSamples, nbTestSamples) : totalSamplesSize;
+
+    /* Checks */
+    if (totalSamplesSize < MAX(d, sizeof(U64)) ||
+        totalSamplesSize >= (size_t)FASTCOVER_MAX_SAMPLES_SIZE) {
+        DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
+                    (unsigned)(totalSamplesSize >> 20), (FASTCOVER_MAX_SAMPLES_SIZE >> 20));
+        return ERROR(srcSize_wrong);
+    }
+
+    /* Check if there are at least 5 training samples */
+    if (nbTrainSamples < 5) {
+        DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid\n", nbTrainSamples);
+        return ERROR(srcSize_wrong);
+    }
+
+    /* Check if there's testing sample */
+    if (nbTestSamples < 1) {
+        DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.\n", nbTestSamples);
+        return ERROR(srcSize_wrong);
+    }
+
+    /* Zero the context */
+    memset(ctx, 0, sizeof(*ctx));
+    DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples,
+                    (unsigned)trainingSamplesSize);
+    DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples,
+                    (unsigned)testSamplesSize);
+
+    ctx->samples = samples;
+    ctx->samplesSizes = samplesSizes;
+    ctx->nbSamples = nbSamples;
+    ctx->nbTrainSamples = nbTrainSamples;
+    ctx->nbTestSamples = nbTestSamples;
+    ctx->nbDmers = trainingSamplesSize - MAX(d, sizeof(U64)) + 1;
+    ctx->d = d;
+    ctx->f = f;
+    ctx->accelParams = accelParams;
+
+    /* The offsets of each file */
+    ctx->offsets = (size_t*)calloc((nbSamples + 1), sizeof(size_t));
+    if (ctx->offsets == NULL) {
+        DISPLAYLEVEL(1, "Failed to allocate scratch buffers \n");
+        FASTCOVER_ctx_destroy(ctx);
+        return ERROR(memory_allocation);
+    }
+
+    /* Fill offsets from the samplesSizes */
+    {   U32 i;
+        ctx->offsets[0] = 0;
+        assert(nbSamples >= 5);
+        for (i = 1; i <= nbSamples; ++i) {
+            ctx->offsets[i] = ctx->offsets[i - 1] + samplesSizes[i - 1];
+        }
+    }
+
+    /* Initialize frequency array of size 2^f */
+    ctx->freqs = (U32*)calloc(((U64)1 << f), sizeof(U32));
+    if (ctx->freqs == NULL) {
+        DISPLAYLEVEL(1, "Failed to allocate frequency table \n");
+        FASTCOVER_ctx_destroy(ctx);
+        return ERROR(memory_allocation);
+    }
+
+    DISPLAYLEVEL(2, "Computing frequencies\n");
+    FASTCOVER_computeFrequency(ctx->freqs, ctx);
+
+    return 0;
+}
+
+
+/**
+ * Given the prepared context build the dictionary.
+ */
+static size_t
+FASTCOVER_buildDictionary(const FASTCOVER_ctx_t* ctx,
+                          U32* freqs,
+                          void* dictBuffer, size_t dictBufferCapacity,
+                          ZDICT_cover_params_t parameters,
+                          U16* segmentFreqs)
+{
+  BYTE *const dict = (BYTE *)dictBuffer;
+  size_t tail = dictBufferCapacity;
+  /* Divide the data into epochs. We will select one segment from each epoch. */
+  const COVER_epoch_info_t epochs = COVER_computeEpochs(
+      (U32)dictBufferCapacity, (U32)ctx->nbDmers, parameters.k, 1);
+  const size_t maxZeroScoreRun = 10;
+  size_t zeroScoreRun = 0;
+  size_t epoch;
+  DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n",
+                (U32)epochs.num, (U32)epochs.size);
+  /* Loop through the epochs until there are no more segments or the dictionary
+   * is full.
+   */
+  for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs.num) {
+    const U32 epochBegin = (U32)(epoch * epochs.size);
+    const U32 epochEnd = epochBegin + epochs.size;
+    size_t segmentSize;
+    /* Select a segment */
+    COVER_segment_t segment = FASTCOVER_selectSegment(
+        ctx, freqs, epochBegin, epochEnd, parameters, segmentFreqs);
+
+    /* If the segment covers no dmers, then we are out of content.
+     * There may be new content in other epochs, for continue for some time.
+     */
+    if (segment.score == 0) {
+      if (++zeroScoreRun >= maxZeroScoreRun) {
+          break;
+      }
+      continue;
+    }
+    zeroScoreRun = 0;
+
+    /* Trim the segment if necessary and if it is too small then we are done */
+    segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail);
+    if (segmentSize < parameters.d) {
+      break;
+    }
+
+    /* We fill the dictionary from the back to allow the best segments to be
+     * referenced with the smallest offsets.
+     */
+    tail -= segmentSize;
+    memcpy(dict + tail, ctx->samples + segment.begin, segmentSize);
+    DISPLAYUPDATE(
+        2, "\r%u%%       ",
+        (unsigned)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
+  }
+  DISPLAYLEVEL(2, "\r%79s\r", "");
+  return tail;
+}
+
+/**
+ * Parameters for FASTCOVER_tryParameters().
+ */
+typedef struct FASTCOVER_tryParameters_data_s {
+    const FASTCOVER_ctx_t* ctx;
+    COVER_best_t* best;
+    size_t dictBufferCapacity;
+    ZDICT_cover_params_t parameters;
+} FASTCOVER_tryParameters_data_t;
+
+
+/**
+ * Tries a set of parameters and updates the COVER_best_t with the results.
+ * This function is thread safe if zstd is compiled with multithreaded support.
+ * It takes its parameters as an *OWNING* opaque pointer to support threading.
+ */
+static void FASTCOVER_tryParameters(void* opaque)
+{
+  /* Save parameters as local variables */
+  FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t*)opaque;
+  const FASTCOVER_ctx_t *const ctx = data->ctx;
+  const ZDICT_cover_params_t parameters = data->parameters;
+  size_t dictBufferCapacity = data->dictBufferCapacity;
+  size_t totalCompressedSize = ERROR(GENERIC);
+  /* Initialize array to keep track of frequency of dmer within activeSegment */
+  U16* segmentFreqs = (U16*)calloc(((U64)1 << ctx->f), sizeof(U16));
+  /* Allocate space for hash table, dict, and freqs */
+  BYTE *const dict = (BYTE*)malloc(dictBufferCapacity);
+  COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
+  U32* freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32));
+  if (!segmentFreqs || !dict || !freqs) {
+    DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
+    goto _cleanup;
+  }
+  /* Copy the frequencies because we need to modify them */
+  memcpy(freqs, ctx->freqs, ((U64)1 << ctx->f) * sizeof(U32));
+  /* Build the dictionary */
+  { const size_t tail = FASTCOVER_buildDictionary(ctx, freqs, dict, dictBufferCapacity,
+                                                    parameters, segmentFreqs);
+
+    const unsigned nbFinalizeSamples = (unsigned)(ctx->nbTrainSamples * ctx->accelParams.finalize / 100);
+    selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail,
+         ctx->samples, ctx->samplesSizes, nbFinalizeSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
+         totalCompressedSize);
+
+    if (COVER_dictSelectionIsError(selection)) {
+      DISPLAYLEVEL(1, "Failed to select dictionary\n");
+      goto _cleanup;
+    }
+  }
+_cleanup:
+  free(dict);
+  COVER_best_finish(data->best, parameters, selection);
+  free(data);
+  free(segmentFreqs);
+  COVER_dictSelectionFree(selection);
+  free(freqs);
+}
+
+
+static void
+FASTCOVER_convertToCoverParams(ZDICT_fastCover_params_t fastCoverParams,
+                               ZDICT_cover_params_t* coverParams)
+{
+    coverParams->k = fastCoverParams.k;
+    coverParams->d = fastCoverParams.d;
+    coverParams->steps = fastCoverParams.steps;
+    coverParams->nbThreads = fastCoverParams.nbThreads;
+    coverParams->splitPoint = fastCoverParams.splitPoint;
+    coverParams->zParams = fastCoverParams.zParams;
+    coverParams->shrinkDict = fastCoverParams.shrinkDict;
+}
+
+
+static void
+FASTCOVER_convertToFastCoverParams(ZDICT_cover_params_t coverParams,
+                                   ZDICT_fastCover_params_t* fastCoverParams,
+                                   unsigned f, unsigned accel)
+{
+    fastCoverParams->k = coverParams.k;
+    fastCoverParams->d = coverParams.d;
+    fastCoverParams->steps = coverParams.steps;
+    fastCoverParams->nbThreads = coverParams.nbThreads;
+    fastCoverParams->splitPoint = coverParams.splitPoint;
+    fastCoverParams->f = f;
+    fastCoverParams->accel = accel;
+    fastCoverParams->zParams = coverParams.zParams;
+    fastCoverParams->shrinkDict = coverParams.shrinkDict;
+}
+
+
+ZDICTLIB_API size_t
+ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
+                                const void* samplesBuffer,
+                                const size_t* samplesSizes, unsigned nbSamples,
+                                ZDICT_fastCover_params_t parameters)
+{
+    BYTE* const dict = (BYTE*)dictBuffer;
+    FASTCOVER_ctx_t ctx;
+    ZDICT_cover_params_t coverParams;
+    FASTCOVER_accel_t accelParams;
+    /* Initialize global data */
+    g_displayLevel = parameters.zParams.notificationLevel;
+    /* Assign splitPoint and f if not provided */
+    parameters.splitPoint = 1.0;
+    parameters.f = parameters.f == 0 ? DEFAULT_F : parameters.f;
+    parameters.accel = parameters.accel == 0 ? DEFAULT_ACCEL : parameters.accel;
+    /* Convert to cover parameter */
+    memset(&coverParams, 0 , sizeof(coverParams));
+    FASTCOVER_convertToCoverParams(parameters, &coverParams);
+    /* Checks */
+    if (!FASTCOVER_checkParameters(coverParams, dictBufferCapacity, parameters.f,
+                                   parameters.accel)) {
+      DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n");
+      return ERROR(parameter_outOfBound);
+    }
+    if (nbSamples == 0) {
+      DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n");
+      return ERROR(srcSize_wrong);
+    }
+    if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
+      DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
+                   ZDICT_DICTSIZE_MIN);
+      return ERROR(dstSize_tooSmall);
+    }
+    /* Assign corresponding FASTCOVER_accel_t to accelParams*/
+    accelParams = FASTCOVER_defaultAccelParameters[parameters.accel];
+    /* Initialize context */
+    {
+      size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
+                            coverParams.d, parameters.splitPoint, parameters.f,
+                            accelParams);
+      if (ZSTD_isError(initVal)) {
+        DISPLAYLEVEL(1, "Failed to initialize context\n");
+        return initVal;
+      }
+    }
+    COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, g_displayLevel);
+    /* Build the dictionary */
+    DISPLAYLEVEL(2, "Building dictionary\n");
+    {
+      /* Initialize array to keep track of frequency of dmer within activeSegment */
+      U16* segmentFreqs = (U16 *)calloc(((U64)1 << parameters.f), sizeof(U16));
+      const size_t tail = FASTCOVER_buildDictionary(&ctx, ctx.freqs, dictBuffer,
+                                                dictBufferCapacity, coverParams, segmentFreqs);
+      const unsigned nbFinalizeSamples = (unsigned)(ctx.nbTrainSamples * ctx.accelParams.finalize / 100);
+      const size_t dictionarySize = ZDICT_finalizeDictionary(
+          dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
+          samplesBuffer, samplesSizes, nbFinalizeSamples, coverParams.zParams);
+      if (!ZSTD_isError(dictionarySize)) {
+          DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
+                      (unsigned)dictionarySize);
+      }
+      FASTCOVER_ctx_destroy(&ctx);
+      free(segmentFreqs);
+      return dictionarySize;
+    }
+}
+
+
+ZDICTLIB_API size_t
+ZDICT_optimizeTrainFromBuffer_fastCover(
+                    void* dictBuffer, size_t dictBufferCapacity,
+                    const void* samplesBuffer,
+                    const size_t* samplesSizes, unsigned nbSamples,
+                    ZDICT_fastCover_params_t* parameters)
+{
+    ZDICT_cover_params_t coverParams;
+    FASTCOVER_accel_t accelParams;
+    /* constants */
+    const unsigned nbThreads = parameters->nbThreads;
+    const double splitPoint =
+        parameters->splitPoint <= 0.0 ? FASTCOVER_DEFAULT_SPLITPOINT : parameters->splitPoint;
+    const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
+    const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
+    const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
+    const unsigned kMaxK = parameters->k == 0 ? 2000 : parameters->k;
+    const unsigned kSteps = parameters->steps == 0 ? 40 : parameters->steps;
+    const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
+    const unsigned kIterations =
+        (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
+    const unsigned f = parameters->f == 0 ? DEFAULT_F : parameters->f;
+    const unsigned accel = parameters->accel == 0 ? DEFAULT_ACCEL : parameters->accel;
+    const unsigned shrinkDict = 0;
+    /* Local variables */
+    const int displayLevel = parameters->zParams.notificationLevel;
+    unsigned iteration = 1;
+    unsigned d;
+    unsigned k;
+    COVER_best_t best;
+    POOL_ctx *pool = NULL;
+    int warned = 0;
+    /* Checks */
+    if (splitPoint <= 0 || splitPoint > 1) {
+      LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect splitPoint\n");
+      return ERROR(parameter_outOfBound);
+    }
+    if (accel == 0 || accel > FASTCOVER_MAX_ACCEL) {
+      LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect accel\n");
+      return ERROR(parameter_outOfBound);
+    }
+    if (kMinK < kMaxD || kMaxK < kMinK) {
+      LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect k\n");
+      return ERROR(parameter_outOfBound);
+    }
+    if (nbSamples == 0) {
+      LOCALDISPLAYLEVEL(displayLevel, 1, "FASTCOVER must have at least one input file\n");
+      return ERROR(srcSize_wrong);
+    }
+    if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
+      LOCALDISPLAYLEVEL(displayLevel, 1, "dictBufferCapacity must be at least %u\n",
+                   ZDICT_DICTSIZE_MIN);
+      return ERROR(dstSize_tooSmall);
+    }
+    if (nbThreads > 1) {
+      pool = POOL_create(nbThreads, 1);
+      if (!pool) {
+        return ERROR(memory_allocation);
+      }
+    }
+    /* Initialization */
+    COVER_best_init(&best);
+    memset(&coverParams, 0 , sizeof(coverParams));
+    FASTCOVER_convertToCoverParams(*parameters, &coverParams);
+    accelParams = FASTCOVER_defaultAccelParameters[accel];
+    /* Turn down global display level to clean up display at level 2 and below */
+    g_displayLevel = displayLevel == 0 ? 0 : displayLevel - 1;
+    /* Loop through d first because each new value needs a new context */
+    LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n",
+                      kIterations);
+    for (d = kMinD; d <= kMaxD; d += 2) {
+      /* Initialize the context for this value of d */
+      FASTCOVER_ctx_t ctx;
+      LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
+      {
+        size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams);
+        if (ZSTD_isError(initVal)) {
+          LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
+          COVER_best_destroy(&best);
+          POOL_free(pool);
+          return initVal;
+        }
+      }
+      if (!warned) {
+        COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, displayLevel);
+        warned = 1;
+      }
+      /* Loop through k reusing the same context */
+      for (k = kMinK; k <= kMaxK; k += kStepSize) {
+        /* Prepare the arguments */
+        FASTCOVER_tryParameters_data_t *data = (FASTCOVER_tryParameters_data_t *)malloc(
+            sizeof(FASTCOVER_tryParameters_data_t));
+        LOCALDISPLAYLEVEL(displayLevel, 3, "k=%u\n", k);
+        if (!data) {
+          LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to allocate parameters\n");
+          COVER_best_destroy(&best);
+          FASTCOVER_ctx_destroy(&ctx);
+          POOL_free(pool);
+          return ERROR(memory_allocation);
+        }
+        data->ctx = &ctx;
+        data->best = &best;
+        data->dictBufferCapacity = dictBufferCapacity;
+        data->parameters = coverParams;
+        data->parameters.k = k;
+        data->parameters.d = d;
+        data->parameters.splitPoint = splitPoint;
+        data->parameters.steps = kSteps;
+        data->parameters.shrinkDict = shrinkDict;
+        data->parameters.zParams.notificationLevel = g_displayLevel;
+        /* Check the parameters */
+        if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity,
+                                       data->ctx->f, accel)) {
+          DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n");
+          free(data);
+          continue;
+        }
+        /* Call the function and pass ownership of data to it */
+        COVER_best_start(&best);
+        if (pool) {
+          POOL_add(pool, &FASTCOVER_tryParameters, data);
+        } else {
+          FASTCOVER_tryParameters(data);
+        }
+        /* Print status */
+        LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%%       ",
+                           (unsigned)((iteration * 100) / kIterations));
+        ++iteration;
+      }
+      COVER_best_wait(&best);
+      FASTCOVER_ctx_destroy(&ctx);
+    }
+    LOCALDISPLAYLEVEL(displayLevel, 2, "\r%79s\r", "");
+    /* Fill the output buffer and parameters with output of the best parameters */
+    {
+      const size_t dictSize = best.dictSize;
+      if (ZSTD_isError(best.compressedSize)) {
+        const size_t compressedSize = best.compressedSize;
+        COVER_best_destroy(&best);
+        POOL_free(pool);
+        return compressedSize;
+      }
+      FASTCOVER_convertToFastCoverParams(best.parameters, parameters, f, accel);
+      memcpy(dictBuffer, best.dict, dictSize);
+      COVER_best_destroy(&best);
+      POOL_free(pool);
+      return dictSize;
+    }
+
+}
+/**** ended inlining dictBuilder/fastcover.c ****/
+/**** start inlining dictBuilder/zdict.c ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+
+/*-**************************************
+*  Tuning parameters
+****************************************/
+#define MINRATIO 4   /* minimum nb of apparition to be selected in dictionary */
+#define ZDICT_MAX_SAMPLES_SIZE (2000U << 20)
+#define ZDICT_MIN_SAMPLES_SIZE (ZDICT_CONTENTSIZE_MIN * MINRATIO)
+
+
+/*-**************************************
+*  Compiler Options
+****************************************/
+/* Unix Large Files support (>4GB) */
+#define _FILE_OFFSET_BITS 64
+#if (defined(__sun__) && (!defined(__LP64__)))   /* Sun Solaris 32-bits requires specific definitions */
+#  ifndef _LARGEFILE_SOURCE
+#  define _LARGEFILE_SOURCE
+#  endif
+#elif ! defined(__LP64__)                        /* No point defining Large file for 64 bit */
+#  ifndef _LARGEFILE64_SOURCE
+#  define _LARGEFILE64_SOURCE
+#  endif
+#endif
+
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#include <stdlib.h>        /* malloc, free */
+#include <string.h>        /* memset */
+#include <stdio.h>         /* fprintf, fopen, ftello64 */
+#include <time.h>          /* clock */
+
+#ifndef ZDICT_STATIC_LINKING_ONLY
+#  define ZDICT_STATIC_LINKING_ONLY
+#endif
+#define HUF_STATIC_LINKING_ONLY
+
+/**** skipping file: ../common/mem.h ****/
+/**** skipping file: ../common/fse.h ****/
+/**** skipping file: ../common/huf.h ****/
+/**** skipping file: ../common/zstd_internal.h ****/
+/**** skipping file: ../common/xxhash.h ****/
+/**** skipping file: ../compress/zstd_compress_internal.h ****/
+/**** skipping file: ../zdict.h ****/
+/**** skipping file: divsufsort.h ****/
+
+
+/*-*************************************
+*  Constants
+***************************************/
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define DICTLISTSIZE_DEFAULT 10000
+
+#define NOISELENGTH 32
+
+static const U32 g_selectivity_default = 9;
+
+
+/*-*************************************
+*  Console display
+***************************************/
+#undef  DISPLAY
+#define DISPLAY(...)         { fprintf(stderr, __VA_ARGS__); fflush( stderr ); }
+#undef  DISPLAYLEVEL
+#define DISPLAYLEVEL(l, ...) if (notificationLevel>=l) { DISPLAY(__VA_ARGS__); }    /* 0 : no display;   1: errors;   2: default;  3: details;  4: debug */
+
+static clock_t ZDICT_clockSpan(clock_t nPrevious) { return clock() - nPrevious; }
+
+static void ZDICT_printHex(const void* ptr, size_t length)
+{
+    const BYTE* const b = (const BYTE*)ptr;
+    size_t u;
+    for (u=0; u<length; u++) {
+        BYTE c = b[u];
+        if (c<32 || c>126) c = '.';   /* non-printable char */
+        DISPLAY("%c", c);
+    }
+}
+
+
+/*-********************************************************
+*  Helper functions
+**********************************************************/
+unsigned ZDICT_isError(size_t errorCode) { return ERR_isError(errorCode); }
+
+const char* ZDICT_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
+
+unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize)
+{
+    if (dictSize < 8) return 0;
+    if (MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return 0;
+    return MEM_readLE32((const char*)dictBuffer + 4);
+}
+
+size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize)
+{
+    size_t headerSize;
+    if (dictSize <= 8 || MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return ERROR(dictionary_corrupted);
+
+    {   ZSTD_compressedBlockState_t* bs = (ZSTD_compressedBlockState_t*)malloc(sizeof(ZSTD_compressedBlockState_t));
+        U32* wksp = (U32*)malloc(HUF_WORKSPACE_SIZE);
+        if (!bs || !wksp) {
+            headerSize = ERROR(memory_allocation);
+        } else {
+            ZSTD_reset_compressedBlockState(bs);
+            headerSize = ZSTD_loadCEntropy(bs, wksp, dictBuffer, dictSize);
+        }
+
+        free(bs);
+        free(wksp);
+    }
+
+    return headerSize;
+}
+
+/*-********************************************************
+*  Dictionary training functions
+**********************************************************/
+static unsigned ZDICT_NbCommonBytes (size_t val)
+{
+    if (MEM_isLittleEndian()) {
+        if (MEM_64bits()) {
+#       if defined(_MSC_VER) && defined(_WIN64)
+            unsigned long r = 0;
+            _BitScanForward64( &r, (U64)val );
+            return (unsigned)(r>>3);
+#       elif defined(__GNUC__) && (__GNUC__ >= 3)
+            return (__builtin_ctzll((U64)val) >> 3);
+#       else
+            static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
+            return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
+#       endif
+        } else { /* 32 bits */
+#       if defined(_MSC_VER)
+            unsigned long r=0;
+            _BitScanForward( &r, (U32)val );
+            return (unsigned)(r>>3);
+#       elif defined(__GNUC__) && (__GNUC__ >= 3)
+            return (__builtin_ctz((U32)val) >> 3);
+#       else
+            static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
+            return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
+#       endif
+        }
+    } else {  /* Big Endian CPU */
+        if (MEM_64bits()) {
+#       if defined(_MSC_VER) && defined(_WIN64)
+            unsigned long r = 0;
+            _BitScanReverse64( &r, val );
+            return (unsigned)(r>>3);
+#       elif defined(__GNUC__) && (__GNUC__ >= 3)
+            return (__builtin_clzll(val) >> 3);
+#       else
+            unsigned r;
+            const unsigned n32 = sizeof(size_t)*4;   /* calculate this way due to compiler complaining in 32-bits mode */
+            if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
+            if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
+            r += (!val);
+            return r;
+#       endif
+        } else { /* 32 bits */
+#       if defined(_MSC_VER)
+            unsigned long r = 0;
+            _BitScanReverse( &r, (unsigned long)val );
+            return (unsigned)(r>>3);
+#       elif defined(__GNUC__) && (__GNUC__ >= 3)
+            return (__builtin_clz((U32)val) >> 3);
+#       else
+            unsigned r;
+            if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
+            r += (!val);
+            return r;
+#       endif
+    }   }
+}
+
+
+/*! ZDICT_count() :
+    Count the nb of common bytes between 2 pointers.
+    Note : this function presumes end of buffer followed by noisy guard band.
+*/
+static size_t ZDICT_count(const void* pIn, const void* pMatch)
+{
+    const char* const pStart = (const char*)pIn;
+    for (;;) {
+        size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
+        if (!diff) {
+            pIn = (const char*)pIn+sizeof(size_t);
+            pMatch = (const char*)pMatch+sizeof(size_t);
+            continue;
+        }
+        pIn = (const char*)pIn+ZDICT_NbCommonBytes(diff);
+        return (size_t)((const char*)pIn - pStart);
+    }
+}
+
+
+typedef struct {
+    U32 pos;
+    U32 length;
+    U32 savings;
+} dictItem;
+
+static void ZDICT_initDictItem(dictItem* d)
+{
+    d->pos = 1;
+    d->length = 0;
+    d->savings = (U32)(-1);
+}
+
+
+#define LLIMIT 64          /* heuristic determined experimentally */
+#define MINMATCHLENGTH 7   /* heuristic determined experimentally */
+static dictItem ZDICT_analyzePos(
+                       BYTE* doneMarks,
+                       const int* suffix, U32 start,
+                       const void* buffer, U32 minRatio, U32 notificationLevel)
+{
+    U32 lengthList[LLIMIT] = {0};
+    U32 cumulLength[LLIMIT] = {0};
+    U32 savings[LLIMIT] = {0};
+    const BYTE* b = (const BYTE*)buffer;
+    size_t maxLength = LLIMIT;
+    size_t pos = suffix[start];
+    U32 end = start;
+    dictItem solution;
+
+    /* init */
+    memset(&solution, 0, sizeof(solution));
+    doneMarks[pos] = 1;
+
+    /* trivial repetition cases */
+    if ( (MEM_read16(b+pos+0) == MEM_read16(b+pos+2))
+       ||(MEM_read16(b+pos+1) == MEM_read16(b+pos+3))
+       ||(MEM_read16(b+pos+2) == MEM_read16(b+pos+4)) ) {
+        /* skip and mark segment */
+        U16 const pattern16 = MEM_read16(b+pos+4);
+        U32 u, patternEnd = 6;
+        while (MEM_read16(b+pos+patternEnd) == pattern16) patternEnd+=2 ;
+        if (b[pos+patternEnd] == b[pos+patternEnd-1]) patternEnd++;
+        for (u=1; u<patternEnd; u++)
+            doneMarks[pos+u] = 1;
+        return solution;
+    }
+
+    /* look forward */
+    {   size_t length;
+        do {
+            end++;
+            length = ZDICT_count(b + pos, b + suffix[end]);
+        } while (length >= MINMATCHLENGTH);
+    }
+
+    /* look backward */
+    {   size_t length;
+        do {
+            length = ZDICT_count(b + pos, b + *(suffix+start-1));
+            if (length >=MINMATCHLENGTH) start--;
+        } while(length >= MINMATCHLENGTH);
+    }
+
+    /* exit if not found a minimum nb of repetitions */
+    if (end-start < minRatio) {
+        U32 idx;
+        for(idx=start; idx<end; idx++)
+            doneMarks[suffix[idx]] = 1;
+        return solution;
+    }
+
+    {   int i;
+        U32 mml;
+        U32 refinedStart = start;
+        U32 refinedEnd = end;
+
+        DISPLAYLEVEL(4, "\n");
+        DISPLAYLEVEL(4, "found %3u matches of length >= %i at pos %7u  ", (unsigned)(end-start), MINMATCHLENGTH, (unsigned)pos);
+        DISPLAYLEVEL(4, "\n");
+
+        for (mml = MINMATCHLENGTH ; ; mml++) {
+            BYTE currentChar = 0;
+            U32 currentCount = 0;
+            U32 currentID = refinedStart;
+            U32 id;
+            U32 selectedCount = 0;
+            U32 selectedID = currentID;
+            for (id =refinedStart; id < refinedEnd; id++) {
+                if (b[suffix[id] + mml] != currentChar) {
+                    if (currentCount > selectedCount) {
+                        selectedCount = currentCount;
+                        selectedID = currentID;
+                    }
+                    currentID = id;
+                    currentChar = b[ suffix[id] + mml];
+                    currentCount = 0;
+                }
+                currentCount ++;
+            }
+            if (currentCount > selectedCount) {  /* for last */
+                selectedCount = currentCount;
+                selectedID = currentID;
+            }
+
+            if (selectedCount < minRatio)
+                break;
+            refinedStart = selectedID;
+            refinedEnd = refinedStart + selectedCount;
+        }
+
+        /* evaluate gain based on new dict */
+        start = refinedStart;
+        pos = suffix[refinedStart];
+        end = start;
+        memset(lengthList, 0, sizeof(lengthList));
+
+        /* look forward */
+        {   size_t length;
+            do {
+                end++;
+                length = ZDICT_count(b + pos, b + suffix[end]);
+                if (length >= LLIMIT) length = LLIMIT-1;
+                lengthList[length]++;
+            } while (length >=MINMATCHLENGTH);
+        }
+
+        /* look backward */
+        {   size_t length = MINMATCHLENGTH;
+            while ((length >= MINMATCHLENGTH) & (start > 0)) {
+                length = ZDICT_count(b + pos, b + suffix[start - 1]);
+                if (length >= LLIMIT) length = LLIMIT - 1;
+                lengthList[length]++;
+                if (length >= MINMATCHLENGTH) start--;
+            }
+        }
+
+        /* largest useful length */
+        memset(cumulLength, 0, sizeof(cumulLength));
+        cumulLength[maxLength-1] = lengthList[maxLength-1];
+        for (i=(int)(maxLength-2); i>=0; i--)
+            cumulLength[i] = cumulLength[i+1] + lengthList[i];
+
+        for (i=LLIMIT-1; i>=MINMATCHLENGTH; i--) if (cumulLength[i]>=minRatio) break;
+        maxLength = i;
+
+        /* reduce maxLength in case of final into repetitive data */
+        {   U32 l = (U32)maxLength;
+            BYTE const c = b[pos + maxLength-1];
+            while (b[pos+l-2]==c) l--;
+            maxLength = l;
+        }
+        if (maxLength < MINMATCHLENGTH) return solution;   /* skip : no long-enough solution */
+
+        /* calculate savings */
+        savings[5] = 0;
+        for (i=MINMATCHLENGTH; i<=(int)maxLength; i++)
+            savings[i] = savings[i-1] + (lengthList[i] * (i-3));
+
+        DISPLAYLEVEL(4, "Selected dict at position %u, of length %u : saves %u (ratio: %.2f)  \n",
+                     (unsigned)pos, (unsigned)maxLength, (unsigned)savings[maxLength], (double)savings[maxLength] / maxLength);
+
+        solution.pos = (U32)pos;
+        solution.length = (U32)maxLength;
+        solution.savings = savings[maxLength];
+
+        /* mark positions done */
+        {   U32 id;
+            for (id=start; id<end; id++) {
+                U32 p, pEnd, length;
+                U32 const testedPos = suffix[id];
+                if (testedPos == pos)
+                    length = solution.length;
+                else {
+                    length = (U32)ZDICT_count(b+pos, b+testedPos);
+                    if (length > solution.length) length = solution.length;
+                }
+                pEnd = (U32)(testedPos + length);
+                for (p=testedPos; p<pEnd; p++)
+                    doneMarks[p] = 1;
+    }   }   }
+
+    return solution;
+}
+
+
+static int isIncluded(const void* in, const void* container, size_t length)
+{
+    const char* const ip = (const char*) in;
+    const char* const into = (const char*) container;
+    size_t u;
+
+    for (u=0; u<length; u++) {  /* works because end of buffer is a noisy guard band */
+        if (ip[u] != into[u]) break;
+    }
+
+    return u==length;
+}
+
+/*! ZDICT_tryMerge() :
+    check if dictItem can be merged, do it if possible
+    @return : id of destination elt, 0 if not merged
+*/
+static U32 ZDICT_tryMerge(dictItem* table, dictItem elt, U32 eltNbToSkip, const void* buffer)
+{
+    const U32 tableSize = table->pos;
+    const U32 eltEnd = elt.pos + elt.length;
+    const char* const buf = (const char*) buffer;
+
+    /* tail overlap */
+    U32 u; for (u=1; u<tableSize; u++) {
+        if (u==eltNbToSkip) continue;
+        if ((table[u].pos > elt.pos) && (table[u].pos <= eltEnd)) {  /* overlap, existing > new */
+            /* append */
+            U32 const addedLength = table[u].pos - elt.pos;
+            table[u].length += addedLength;
+            table[u].pos = elt.pos;
+            table[u].savings += elt.savings * addedLength / elt.length;   /* rough approx */
+            table[u].savings += elt.length / 8;    /* rough approx bonus */
+            elt = table[u];
+            /* sort : improve rank */
+            while ((u>1) && (table[u-1].savings < elt.savings))
+            table[u] = table[u-1], u--;
+            table[u] = elt;
+            return u;
+    }   }
+
+    /* front overlap */
+    for (u=1; u<tableSize; u++) {
+        if (u==eltNbToSkip) continue;
+
+        if ((table[u].pos + table[u].length >= elt.pos) && (table[u].pos < elt.pos)) {  /* overlap, existing < new */
+            /* append */
+            int const addedLength = (int)eltEnd - (table[u].pos + table[u].length);
+            table[u].savings += elt.length / 8;    /* rough approx bonus */
+            if (addedLength > 0) {   /* otherwise, elt fully included into existing */
+                table[u].length += addedLength;
+                table[u].savings += elt.savings * addedLength / elt.length;   /* rough approx */
+            }
+            /* sort : improve rank */
+            elt = table[u];
+            while ((u>1) && (table[u-1].savings < elt.savings))
+                table[u] = table[u-1], u--;
+            table[u] = elt;
+            return u;
+        }
+
+        if (MEM_read64(buf + table[u].pos) == MEM_read64(buf + elt.pos + 1)) {
+            if (isIncluded(buf + table[u].pos, buf + elt.pos + 1, table[u].length)) {
+                size_t const addedLength = MAX( (int)elt.length - (int)table[u].length , 1 );
+                table[u].pos = elt.pos;
+                table[u].savings += (U32)(elt.savings * addedLength / elt.length);
+                table[u].length = MIN(elt.length, table[u].length + 1);
+                return u;
+            }
+        }
+    }
+
+    return 0;
+}
+
+
+static void ZDICT_removeDictItem(dictItem* table, U32 id)
+{
+    /* convention : table[0].pos stores nb of elts */
+    U32 const max = table[0].pos;
+    U32 u;
+    if (!id) return;   /* protection, should never happen */
+    for (u=id; u<max-1; u++)
+        table[u] = table[u+1];
+    table->pos--;
+}
+
+
+static void ZDICT_insertDictItem(dictItem* table, U32 maxSize, dictItem elt, const void* buffer)
+{
+    /* merge if possible */
+    U32 mergeId = ZDICT_tryMerge(table, elt, 0, buffer);
+    if (mergeId) {
+        U32 newMerge = 1;
+        while (newMerge) {
+            newMerge = ZDICT_tryMerge(table, table[mergeId], mergeId, buffer);
+            if (newMerge) ZDICT_removeDictItem(table, mergeId);
+            mergeId = newMerge;
+        }
+        return;
+    }
+
+    /* insert */
+    {   U32 current;
+        U32 nextElt = table->pos;
+        if (nextElt >= maxSize) nextElt = maxSize-1;
+        current = nextElt-1;
+        while (table[current].savings < elt.savings) {
+            table[current+1] = table[current];
+            current--;
+        }
+        table[current+1] = elt;
+        table->pos = nextElt+1;
+    }
+}
+
+
+static U32 ZDICT_dictSize(const dictItem* dictList)
+{
+    U32 u, dictSize = 0;
+    for (u=1; u<dictList[0].pos; u++)
+        dictSize += dictList[u].length;
+    return dictSize;
+}
+
+
+static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
+                            const void* const buffer, size_t bufferSize,   /* buffer must end with noisy guard band */
+                            const size_t* fileSizes, unsigned nbFiles,
+                            unsigned minRatio, U32 notificationLevel)
+{
+    int* const suffix0 = (int*)malloc((bufferSize+2)*sizeof(*suffix0));
+    int* const suffix = suffix0+1;
+    U32* reverseSuffix = (U32*)malloc((bufferSize)*sizeof(*reverseSuffix));
+    BYTE* doneMarks = (BYTE*)malloc((bufferSize+16)*sizeof(*doneMarks));   /* +16 for overflow security */
+    U32* filePos = (U32*)malloc(nbFiles * sizeof(*filePos));
+    size_t result = 0;
+    clock_t displayClock = 0;
+    clock_t const refreshRate = CLOCKS_PER_SEC * 3 / 10;
+
+#   undef  DISPLAYUPDATE
+#   define DISPLAYUPDATE(l, ...) if (notificationLevel>=l) { \
+            if (ZDICT_clockSpan(displayClock) > refreshRate)  \
+            { displayClock = clock(); DISPLAY(__VA_ARGS__); \
+            if (notificationLevel>=4) fflush(stderr); } }
+
+    /* init */
+    DISPLAYLEVEL(2, "\r%70s\r", "");   /* clean display line */
+    if (!suffix0 || !reverseSuffix || !doneMarks || !filePos) {
+        result = ERROR(memory_allocation);
+        goto _cleanup;
+    }
+    if (minRatio < MINRATIO) minRatio = MINRATIO;
+    memset(doneMarks, 0, bufferSize+16);
+
+    /* limit sample set size (divsufsort limitation)*/
+    if (bufferSize > ZDICT_MAX_SAMPLES_SIZE) DISPLAYLEVEL(3, "sample set too large : reduced to %u MB ...\n", (unsigned)(ZDICT_MAX_SAMPLES_SIZE>>20));
+    while (bufferSize > ZDICT_MAX_SAMPLES_SIZE) bufferSize -= fileSizes[--nbFiles];
+
+    /* sort */
+    DISPLAYLEVEL(2, "sorting %u files of total size %u MB ...\n", nbFiles, (unsigned)(bufferSize>>20));
+    {   int const divSuftSortResult = divsufsort((const unsigned char*)buffer, suffix, (int)bufferSize, 0);
+        if (divSuftSortResult != 0) { result = ERROR(GENERIC); goto _cleanup; }
+    }
+    suffix[bufferSize] = (int)bufferSize;   /* leads into noise */
+    suffix0[0] = (int)bufferSize;           /* leads into noise */
+    /* build reverse suffix sort */
+    {   size_t pos;
+        for (pos=0; pos < bufferSize; pos++)
+            reverseSuffix[suffix[pos]] = (U32)pos;
+        /* note filePos tracks borders between samples.
+           It's not used at this stage, but planned to become useful in a later update */
+        filePos[0] = 0;
+        for (pos=1; pos<nbFiles; pos++)
+            filePos[pos] = (U32)(filePos[pos-1] + fileSizes[pos-1]);
+    }
+
+    DISPLAYLEVEL(2, "finding patterns ... \n");
+    DISPLAYLEVEL(3, "minimum ratio : %u \n", minRatio);
+
+    {   U32 cursor; for (cursor=0; cursor < bufferSize; ) {
+            dictItem solution;
+            if (doneMarks[cursor]) { cursor++; continue; }
+            solution = ZDICT_analyzePos(doneMarks, suffix, reverseSuffix[cursor], buffer, minRatio, notificationLevel);
+            if (solution.length==0) { cursor++; continue; }
+            ZDICT_insertDictItem(dictList, dictListSize, solution, buffer);
+            cursor += solution.length;
+            DISPLAYUPDATE(2, "\r%4.2f %% \r", (double)cursor / bufferSize * 100);
+    }   }
+
+_cleanup:
+    free(suffix0);
+    free(reverseSuffix);
+    free(doneMarks);
+    free(filePos);
+    return result;
+}
+
+
+static void ZDICT_fillNoise(void* buffer, size_t length)
+{
+    unsigned const prime1 = 2654435761U;
+    unsigned const prime2 = 2246822519U;
+    unsigned acc = prime1;
+    size_t p=0;
+    for (p=0; p<length; p++) {
+        acc *= prime2;
+        ((unsigned char*)buffer)[p] = (unsigned char)(acc >> 21);
+    }
+}
+
+
+typedef struct
+{
+    ZSTD_CDict* dict;    /* dictionary */
+    ZSTD_CCtx* zc;     /* working context */
+    void* workPlace;   /* must be ZSTD_BLOCKSIZE_MAX allocated */
+} EStats_ress_t;
+
+#define MAXREPOFFSET 1024
+
+static void ZDICT_countEStats(EStats_ress_t esr, const ZSTD_parameters* params,
+                              unsigned* countLit, unsigned* offsetcodeCount, unsigned* matchlengthCount, unsigned* litlengthCount, U32* repOffsets,
+                              const void* src, size_t srcSize,
+                              U32 notificationLevel)
+{
+    size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params->cParams.windowLog);
+    size_t cSize;
+
+    if (srcSize > blockSizeMax) srcSize = blockSizeMax;   /* protection vs large samples */
+    {   size_t const errorCode = ZSTD_compressBegin_usingCDict(esr.zc, esr.dict);
+        if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_compressBegin_usingCDict failed \n"); return; }
+
+    }
+    cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
+    if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (unsigned)srcSize); return; }
+
+    if (cSize) {  /* if == 0; block is not compressible */
+        const seqStore_t* const seqStorePtr = ZSTD_getSeqStore(esr.zc);
+
+        /* literals stats */
+        {   const BYTE* bytePtr;
+            for(bytePtr = seqStorePtr->litStart; bytePtr < seqStorePtr->lit; bytePtr++)
+                countLit[*bytePtr]++;
+        }
+
+        /* seqStats */
+        {   U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
+            ZSTD_seqToCodes(seqStorePtr);
+
+            {   const BYTE* codePtr = seqStorePtr->ofCode;
+                U32 u;
+                for (u=0; u<nbSeq; u++) offsetcodeCount[codePtr[u]]++;
+            }
+
+            {   const BYTE* codePtr = seqStorePtr->mlCode;
+                U32 u;
+                for (u=0; u<nbSeq; u++) matchlengthCount[codePtr[u]]++;
+            }
+
+            {   const BYTE* codePtr = seqStorePtr->llCode;
+                U32 u;
+                for (u=0; u<nbSeq; u++) litlengthCount[codePtr[u]]++;
+            }
+
+            if (nbSeq >= 2) { /* rep offsets */
+                const seqDef* const seq = seqStorePtr->sequencesStart;
+                U32 offset1 = seq[0].offset - 3;
+                U32 offset2 = seq[1].offset - 3;
+                if (offset1 >= MAXREPOFFSET) offset1 = 0;
+                if (offset2 >= MAXREPOFFSET) offset2 = 0;
+                repOffsets[offset1] += 3;
+                repOffsets[offset2] += 1;
+    }   }   }
+}
+
+static size_t ZDICT_totalSampleSize(const size_t* fileSizes, unsigned nbFiles)
+{
+    size_t total=0;
+    unsigned u;
+    for (u=0; u<nbFiles; u++) total += fileSizes[u];
+    return total;
+}
+
+typedef struct { U32 offset; U32 count; } offsetCount_t;
+
+static void ZDICT_insertSortCount(offsetCount_t table[ZSTD_REP_NUM+1], U32 val, U32 count)
+{
+    U32 u;
+    table[ZSTD_REP_NUM].offset = val;
+    table[ZSTD_REP_NUM].count = count;
+    for (u=ZSTD_REP_NUM; u>0; u--) {
+        offsetCount_t tmp;
+        if (table[u-1].count >= table[u].count) break;
+        tmp = table[u-1];
+        table[u-1] = table[u];
+        table[u] = tmp;
+    }
+}
+
+/* ZDICT_flatLit() :
+ * rewrite `countLit` to contain a mostly flat but still compressible distribution of literals.
+ * necessary to avoid generating a non-compressible distribution that HUF_writeCTable() cannot encode.
+ */
+static void ZDICT_flatLit(unsigned* countLit)
+{
+    int u;
+    for (u=1; u<256; u++) countLit[u] = 2;
+    countLit[0]   = 4;
+    countLit[253] = 1;
+    countLit[254] = 1;
+}
+
+#define OFFCODE_MAX 30  /* only applicable to first block */
+static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
+                                   int compressionLevel,
+                             const void*  srcBuffer, const size_t* fileSizes, unsigned nbFiles,
+                             const void* dictBuffer, size_t  dictBufferSize,
+                                   unsigned notificationLevel)
+{
+    unsigned countLit[256];
+    HUF_CREATE_STATIC_CTABLE(hufTable, 255);
+    unsigned offcodeCount[OFFCODE_MAX+1];
+    short offcodeNCount[OFFCODE_MAX+1];
+    U32 offcodeMax = ZSTD_highbit32((U32)(dictBufferSize + 128 KB));
+    unsigned matchLengthCount[MaxML+1];
+    short matchLengthNCount[MaxML+1];
+    unsigned litLengthCount[MaxLL+1];
+    short litLengthNCount[MaxLL+1];
+    U32 repOffset[MAXREPOFFSET];
+    offsetCount_t bestRepOffset[ZSTD_REP_NUM+1];
+    EStats_ress_t esr = { NULL, NULL, NULL };
+    ZSTD_parameters params;
+    U32 u, huffLog = 11, Offlog = OffFSELog, mlLog = MLFSELog, llLog = LLFSELog, total;
+    size_t pos = 0, errorCode;
+    size_t eSize = 0;
+    size_t const totalSrcSize = ZDICT_totalSampleSize(fileSizes, nbFiles);
+    size_t const averageSampleSize = totalSrcSize / (nbFiles + !nbFiles);
+    BYTE* dstPtr = (BYTE*)dstBuffer;
+
+    /* init */
+    DEBUGLOG(4, "ZDICT_analyzeEntropy");
+    if (offcodeMax>OFFCODE_MAX) { eSize = ERROR(dictionaryCreation_failed); goto _cleanup; }   /* too large dictionary */
+    for (u=0; u<256; u++) countLit[u] = 1;   /* any character must be described */
+    for (u=0; u<=offcodeMax; u++) offcodeCount[u] = 1;
+    for (u=0; u<=MaxML; u++) matchLengthCount[u] = 1;
+    for (u=0; u<=MaxLL; u++) litLengthCount[u] = 1;
+    memset(repOffset, 0, sizeof(repOffset));
+    repOffset[1] = repOffset[4] = repOffset[8] = 1;
+    memset(bestRepOffset, 0, sizeof(bestRepOffset));
+    if (compressionLevel==0) compressionLevel = ZSTD_CLEVEL_DEFAULT;
+    params = ZSTD_getParams(compressionLevel, averageSampleSize, dictBufferSize);
+
+    esr.dict = ZSTD_createCDict_advanced(dictBuffer, dictBufferSize, ZSTD_dlm_byRef, ZSTD_dct_rawContent, params.cParams, ZSTD_defaultCMem);
+    esr.zc = ZSTD_createCCtx();
+    esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX);
+    if (!esr.dict || !esr.zc || !esr.workPlace) {
+        eSize = ERROR(memory_allocation);
+        DISPLAYLEVEL(1, "Not enough memory \n");
+        goto _cleanup;
+    }
+
+    /* collect stats on all samples */
+    for (u=0; u<nbFiles; u++) {
+        ZDICT_countEStats(esr, &params,
+                          countLit, offcodeCount, matchLengthCount, litLengthCount, repOffset,
+                         (const char*)srcBuffer + pos, fileSizes[u],
+                          notificationLevel);
+        pos += fileSizes[u];
+    }
+
+    /* analyze, build stats, starting with literals */
+    {   size_t maxNbBits = HUF_buildCTable (hufTable, countLit, 255, huffLog);
+        if (HUF_isError(maxNbBits)) {
+            eSize = maxNbBits;
+            DISPLAYLEVEL(1, " HUF_buildCTable error \n");
+            goto _cleanup;
+        }
+        if (maxNbBits==8) {  /* not compressible : will fail on HUF_writeCTable() */
+            DISPLAYLEVEL(2, "warning : pathological dataset : literals are not compressible : samples are noisy or too regular \n");
+            ZDICT_flatLit(countLit);  /* replace distribution by a fake "mostly flat but still compressible" distribution, that HUF_writeCTable() can encode */
+            maxNbBits = HUF_buildCTable (hufTable, countLit, 255, huffLog);
+            assert(maxNbBits==9);
+        }
+        huffLog = (U32)maxNbBits;
+    }
+
+    /* looking for most common first offsets */
+    {   U32 offset;
+        for (offset=1; offset<MAXREPOFFSET; offset++)
+            ZDICT_insertSortCount(bestRepOffset, offset, repOffset[offset]);
+    }
+    /* note : the result of this phase should be used to better appreciate the impact on statistics */
+
+    total=0; for (u=0; u<=offcodeMax; u++) total+=offcodeCount[u];
+    errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax, /* useLowProbCount */ 1);
+    if (FSE_isError(errorCode)) {
+        eSize = errorCode;
+        DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount \n");
+        goto _cleanup;
+    }
+    Offlog = (U32)errorCode;
+
+    total=0; for (u=0; u<=MaxML; u++) total+=matchLengthCount[u];
+    errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML, /* useLowProbCount */ 1);
+    if (FSE_isError(errorCode)) {
+        eSize = errorCode;
+        DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount \n");
+        goto _cleanup;
+    }
+    mlLog = (U32)errorCode;
+
+    total=0; for (u=0; u<=MaxLL; u++) total+=litLengthCount[u];
+    errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL, /* useLowProbCount */ 1);
+    if (FSE_isError(errorCode)) {
+        eSize = errorCode;
+        DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount \n");
+        goto _cleanup;
+    }
+    llLog = (U32)errorCode;
+
+    /* write result to buffer */
+    {   size_t const hhSize = HUF_writeCTable(dstPtr, maxDstSize, hufTable, 255, huffLog);
+        if (HUF_isError(hhSize)) {
+            eSize = hhSize;
+            DISPLAYLEVEL(1, "HUF_writeCTable error \n");
+            goto _cleanup;
+        }
+        dstPtr += hhSize;
+        maxDstSize -= hhSize;
+        eSize += hhSize;
+    }
+
+    {   size_t const ohSize = FSE_writeNCount(dstPtr, maxDstSize, offcodeNCount, OFFCODE_MAX, Offlog);
+        if (FSE_isError(ohSize)) {
+            eSize = ohSize;
+            DISPLAYLEVEL(1, "FSE_writeNCount error with offcodeNCount \n");
+            goto _cleanup;
+        }
+        dstPtr += ohSize;
+        maxDstSize -= ohSize;
+        eSize += ohSize;
+    }
+
+    {   size_t const mhSize = FSE_writeNCount(dstPtr, maxDstSize, matchLengthNCount, MaxML, mlLog);
+        if (FSE_isError(mhSize)) {
+            eSize = mhSize;
+            DISPLAYLEVEL(1, "FSE_writeNCount error with matchLengthNCount \n");
+            goto _cleanup;
+        }
+        dstPtr += mhSize;
+        maxDstSize -= mhSize;
+        eSize += mhSize;
+    }
+
+    {   size_t const lhSize = FSE_writeNCount(dstPtr, maxDstSize, litLengthNCount, MaxLL, llLog);
+        if (FSE_isError(lhSize)) {
+            eSize = lhSize;
+            DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount \n");
+            goto _cleanup;
+        }
+        dstPtr += lhSize;
+        maxDstSize -= lhSize;
+        eSize += lhSize;
+    }
+
+    if (maxDstSize<12) {
+        eSize = ERROR(dstSize_tooSmall);
+        DISPLAYLEVEL(1, "not enough space to write RepOffsets \n");
+        goto _cleanup;
+    }
+# if 0
+    MEM_writeLE32(dstPtr+0, bestRepOffset[0].offset);
+    MEM_writeLE32(dstPtr+4, bestRepOffset[1].offset);
+    MEM_writeLE32(dstPtr+8, bestRepOffset[2].offset);
+#else
+    /* at this stage, we don't use the result of "most common first offset",
+       as the impact of statistics is not properly evaluated */
+    MEM_writeLE32(dstPtr+0, repStartValue[0]);
+    MEM_writeLE32(dstPtr+4, repStartValue[1]);
+    MEM_writeLE32(dstPtr+8, repStartValue[2]);
+#endif
+    eSize += 12;
+
+_cleanup:
+    ZSTD_freeCDict(esr.dict);
+    ZSTD_freeCCtx(esr.zc);
+    free(esr.workPlace);
+
+    return eSize;
+}
+
+
+
+size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
+                          const void* customDictContent, size_t dictContentSize,
+                          const void* samplesBuffer, const size_t* samplesSizes,
+                          unsigned nbSamples, ZDICT_params_t params)
+{
+    size_t hSize;
+#define HBUFFSIZE 256   /* should prove large enough for all entropy headers */
+    BYTE header[HBUFFSIZE];
+    int const compressionLevel = (params.compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : params.compressionLevel;
+    U32 const notificationLevel = params.notificationLevel;
+
+    /* check conditions */
+    DEBUGLOG(4, "ZDICT_finalizeDictionary");
+    if (dictBufferCapacity < dictContentSize) return ERROR(dstSize_tooSmall);
+    if (dictContentSize < ZDICT_CONTENTSIZE_MIN) return ERROR(srcSize_wrong);
+    if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall);
+
+    /* dictionary header */
+    MEM_writeLE32(header, ZSTD_MAGIC_DICTIONARY);
+    {   U64 const randomID = XXH64(customDictContent, dictContentSize, 0);
+        U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
+        U32 const dictID = params.dictID ? params.dictID : compliantID;
+        MEM_writeLE32(header+4, dictID);
+    }
+    hSize = 8;
+
+    /* entropy tables */
+    DISPLAYLEVEL(2, "\r%70s\r", "");   /* clean display line */
+    DISPLAYLEVEL(2, "statistics ... \n");
+    {   size_t const eSize = ZDICT_analyzeEntropy(header+hSize, HBUFFSIZE-hSize,
+                                  compressionLevel,
+                                  samplesBuffer, samplesSizes, nbSamples,
+                                  customDictContent, dictContentSize,
+                                  notificationLevel);
+        if (ZDICT_isError(eSize)) return eSize;
+        hSize += eSize;
+    }
+
+    /* copy elements in final buffer ; note : src and dst buffer can overlap */
+    if (hSize + dictContentSize > dictBufferCapacity) dictContentSize = dictBufferCapacity - hSize;
+    {   size_t const dictSize = hSize + dictContentSize;
+        char* dictEnd = (char*)dictBuffer + dictSize;
+        memmove(dictEnd - dictContentSize, customDictContent, dictContentSize);
+        memcpy(dictBuffer, header, hSize);
+        return dictSize;
+    }
+}
+
+
+static size_t ZDICT_addEntropyTablesFromBuffer_advanced(
+        void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
+        const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
+        ZDICT_params_t params)
+{
+    int const compressionLevel = (params.compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : params.compressionLevel;
+    U32 const notificationLevel = params.notificationLevel;
+    size_t hSize = 8;
+
+    /* calculate entropy tables */
+    DISPLAYLEVEL(2, "\r%70s\r", "");   /* clean display line */
+    DISPLAYLEVEL(2, "statistics ... \n");
+    {   size_t const eSize = ZDICT_analyzeEntropy((char*)dictBuffer+hSize, dictBufferCapacity-hSize,
+                                  compressionLevel,
+                                  samplesBuffer, samplesSizes, nbSamples,
+                                  (char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize,
+                                  notificationLevel);
+        if (ZDICT_isError(eSize)) return eSize;
+        hSize += eSize;
+    }
+
+    /* add dictionary header (after entropy tables) */
+    MEM_writeLE32(dictBuffer, ZSTD_MAGIC_DICTIONARY);
+    {   U64 const randomID = XXH64((char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, 0);
+        U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
+        U32 const dictID = params.dictID ? params.dictID : compliantID;
+        MEM_writeLE32((char*)dictBuffer+4, dictID);
+    }
+
+    if (hSize + dictContentSize < dictBufferCapacity)
+        memmove((char*)dictBuffer + hSize, (char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize);
+    return MIN(dictBufferCapacity, hSize+dictContentSize);
+}
+
+/*! ZDICT_trainFromBuffer_unsafe_legacy() :
+*   Warning : `samplesBuffer` must be followed by noisy guard band !!!
+*   @return : size of dictionary, or an error code which can be tested with ZDICT_isError()
+*/
+static size_t ZDICT_trainFromBuffer_unsafe_legacy(
+                            void* dictBuffer, size_t maxDictSize,
+                            const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
+                            ZDICT_legacy_params_t params)
+{
+    U32 const dictListSize = MAX(MAX(DICTLISTSIZE_DEFAULT, nbSamples), (U32)(maxDictSize/16));
+    dictItem* const dictList = (dictItem*)malloc(dictListSize * sizeof(*dictList));
+    unsigned const selectivity = params.selectivityLevel == 0 ? g_selectivity_default : params.selectivityLevel;
+    unsigned const minRep = (selectivity > 30) ? MINRATIO : nbSamples >> selectivity;
+    size_t const targetDictSize = maxDictSize;
+    size_t const samplesBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples);
+    size_t dictSize = 0;
+    U32 const notificationLevel = params.zParams.notificationLevel;
+
+    /* checks */
+    if (!dictList) return ERROR(memory_allocation);
+    if (maxDictSize < ZDICT_DICTSIZE_MIN) { free(dictList); return ERROR(dstSize_tooSmall); }   /* requested dictionary size is too small */
+    if (samplesBuffSize < ZDICT_MIN_SAMPLES_SIZE) { free(dictList); return ERROR(dictionaryCreation_failed); }   /* not enough source to create dictionary */
+
+    /* init */
+    ZDICT_initDictItem(dictList);
+
+    /* build dictionary */
+    ZDICT_trainBuffer_legacy(dictList, dictListSize,
+                       samplesBuffer, samplesBuffSize,
+                       samplesSizes, nbSamples,
+                       minRep, notificationLevel);
+
+    /* display best matches */
+    if (params.zParams.notificationLevel>= 3) {
+        unsigned const nb = MIN(25, dictList[0].pos);
+        unsigned const dictContentSize = ZDICT_dictSize(dictList);
+        unsigned u;
+        DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", (unsigned)dictList[0].pos-1, dictContentSize);
+        DISPLAYLEVEL(3, "list %u best segments \n", nb-1);
+        for (u=1; u<nb; u++) {
+            unsigned const pos = dictList[u].pos;
+            unsigned const length = dictList[u].length;
+            U32 const printedLength = MIN(40, length);
+            if ((pos > samplesBuffSize) || ((pos + length) > samplesBuffSize)) {
+                free(dictList);
+                return ERROR(GENERIC);   /* should never happen */
+            }
+            DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |",
+                         u, length, pos, (unsigned)dictList[u].savings);
+            ZDICT_printHex((const char*)samplesBuffer+pos, printedLength);
+            DISPLAYLEVEL(3, "| \n");
+    }   }
+
+
+    /* create dictionary */
+    {   unsigned dictContentSize = ZDICT_dictSize(dictList);
+        if (dictContentSize < ZDICT_CONTENTSIZE_MIN) { free(dictList); return ERROR(dictionaryCreation_failed); }   /* dictionary content too small */
+        if (dictContentSize < targetDictSize/4) {
+            DISPLAYLEVEL(2, "!  warning : selected content significantly smaller than requested (%u < %u) \n", dictContentSize, (unsigned)maxDictSize);
+            if (samplesBuffSize < 10 * targetDictSize)
+                DISPLAYLEVEL(2, "!  consider increasing the number of samples (total size : %u MB)\n", (unsigned)(samplesBuffSize>>20));
+            if (minRep > MINRATIO) {
+                DISPLAYLEVEL(2, "!  consider increasing selectivity to produce larger dictionary (-s%u) \n", selectivity+1);
+                DISPLAYLEVEL(2, "!  note : larger dictionaries are not necessarily better, test its efficiency on samples \n");
+            }
+        }
+
+        if ((dictContentSize > targetDictSize*3) && (nbSamples > 2*MINRATIO) && (selectivity>1)) {
+            unsigned proposedSelectivity = selectivity-1;
+            while ((nbSamples >> proposedSelectivity) <= MINRATIO) { proposedSelectivity--; }
+            DISPLAYLEVEL(2, "!  note : calculated dictionary significantly larger than requested (%u > %u) \n", dictContentSize, (unsigned)maxDictSize);
+            DISPLAYLEVEL(2, "!  consider increasing dictionary size, or produce denser dictionary (-s%u) \n", proposedSelectivity);
+            DISPLAYLEVEL(2, "!  always test dictionary efficiency on real samples \n");
+        }
+
+        /* limit dictionary size */
+        {   U32 const max = dictList->pos;   /* convention : nb of useful elts within dictList */
+            U32 currentSize = 0;
+            U32 n; for (n=1; n<max; n++) {
+                currentSize += dictList[n].length;
+                if (currentSize > targetDictSize) { currentSize -= dictList[n].length; break; }
+            }
+            dictList->pos = n;
+            dictContentSize = currentSize;
+        }
+
+        /* build dict content */
+        {   U32 u;
+            BYTE* ptr = (BYTE*)dictBuffer + maxDictSize;
+            for (u=1; u<dictList->pos; u++) {
+                U32 l = dictList[u].length;
+                ptr -= l;
+                if (ptr<(BYTE*)dictBuffer) { free(dictList); return ERROR(GENERIC); }   /* should not happen */
+                memcpy(ptr, (const char*)samplesBuffer+dictList[u].pos, l);
+        }   }
+
+        dictSize = ZDICT_addEntropyTablesFromBuffer_advanced(dictBuffer, dictContentSize, maxDictSize,
+                                                             samplesBuffer, samplesSizes, nbSamples,
+                                                             params.zParams);
+    }
+
+    /* clean up */
+    free(dictList);
+    return dictSize;
+}
+
+
+/* ZDICT_trainFromBuffer_legacy() :
+ * issue : samplesBuffer need to be followed by a noisy guard band.
+ * work around : duplicate the buffer, and add the noise */
+size_t ZDICT_trainFromBuffer_legacy(void* dictBuffer, size_t dictBufferCapacity,
+                              const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
+                              ZDICT_legacy_params_t params)
+{
+    size_t result;
+    void* newBuff;
+    size_t const sBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples);
+    if (sBuffSize < ZDICT_MIN_SAMPLES_SIZE) return 0;   /* not enough content => no dictionary */
+
+    newBuff = malloc(sBuffSize + NOISELENGTH);
+    if (!newBuff) return ERROR(memory_allocation);
+
+    memcpy(newBuff, samplesBuffer, sBuffSize);
+    ZDICT_fillNoise((char*)newBuff + sBuffSize, NOISELENGTH);   /* guard band, for end of buffer condition */
+
+    result =
+        ZDICT_trainFromBuffer_unsafe_legacy(dictBuffer, dictBufferCapacity, newBuff,
+                                            samplesSizes, nbSamples, params);
+    free(newBuff);
+    return result;
+}
+
+
+size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
+                             const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples)
+{
+    ZDICT_fastCover_params_t params;
+    DEBUGLOG(3, "ZDICT_trainFromBuffer");
+    memset(&params, 0, sizeof(params));
+    params.d = 8;
+    params.steps = 4;
+    /* Use default level since no compression level information is available */
+    params.zParams.compressionLevel = ZSTD_CLEVEL_DEFAULT;
+#if defined(DEBUGLEVEL) && (DEBUGLEVEL>=1)
+    params.zParams.notificationLevel = DEBUGLEVEL;
+#endif
+    return ZDICT_optimizeTrainFromBuffer_fastCover(dictBuffer, dictBufferCapacity,
+                                               samplesBuffer, samplesSizes, nbSamples,
+                                               &params);
+}
+
+size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
+                                  const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples)
+{
+    ZDICT_params_t params;
+    memset(&params, 0, sizeof(params));
+    return ZDICT_addEntropyTablesFromBuffer_advanced(dictBuffer, dictContentSize, dictBufferCapacity,
+                                                     samplesBuffer, samplesSizes, nbSamples,
+                                                     params);
+}
+/**** ended inlining dictBuilder/zdict.c ****/
diff --git a/libkram/zstd/zstd.h b/libkram/zstd/zstd.h
new file mode 100644
index 00000000..4651e6c4
--- /dev/null
+++ b/libkram/zstd/zstd.h
@@ -0,0 +1,2532 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+#ifndef ZSTD_H_235446
+#define ZSTD_H_235446
+
+/* ======   Dependency   ======*/
+#include <limits.h>   /* INT_MAX */
+#include <stddef.h>   /* size_t */
+
+
+/* =====   ZSTDLIB_API : control library symbols visibility   ===== */
+#ifndef ZSTDLIB_VISIBILITY
+#  if defined(__GNUC__) && (__GNUC__ >= 4)
+#    define ZSTDLIB_VISIBILITY __attribute__ ((visibility ("default")))
+#  else
+#    define ZSTDLIB_VISIBILITY
+#  endif
+#endif
+#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
+#  define ZSTDLIB_API __declspec(dllexport) ZSTDLIB_VISIBILITY
+#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
+#  define ZSTDLIB_API __declspec(dllimport) ZSTDLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
+#else
+#  define ZSTDLIB_API ZSTDLIB_VISIBILITY
+#endif
+
+
+/*******************************************************************************
+  Introduction
+
+  zstd, short for Zstandard, is a fast lossless compression algorithm, targeting
+  real-time compression scenarios at zlib-level and better compression ratios.
+  The zstd compression library provides in-memory compression and decompression
+  functions.
+
+  The library supports regular compression levels from 1 up to ZSTD_maxCLevel(),
+  which is currently 22. Levels >= 20, labeled `--ultra`, should be used with
+  caution, as they require more memory. The library also offers negative
+  compression levels, which extend the range of speed vs. ratio preferences.
+  The lower the level, the faster the speed (at the cost of compression).
+
+  Compression can be done in:
+    - a single step (described as Simple API)
+    - a single step, reusing a context (described as Explicit context)
+    - unbounded multiple steps (described as Streaming compression)
+
+  The compression ratio achievable on small data can be highly improved using
+  a dictionary. Dictionary compression can be performed in:
+    - a single step (described as Simple dictionary API)
+    - a single step, reusing a dictionary (described as Bulk-processing
+      dictionary API)
+
+  Advanced experimental functions can be accessed using
+  `#define ZSTD_STATIC_LINKING_ONLY` before including zstd.h.
+
+  Advanced experimental APIs should never be used with a dynamically-linked
+  library. They are not "stable"; their definitions or signatures may change in
+  the future. Only static linking is allowed.
+*******************************************************************************/
+
+/*------   Version   ------*/
+#define ZSTD_VERSION_MAJOR    1
+#define ZSTD_VERSION_MINOR    5
+#define ZSTD_VERSION_RELEASE  0
+#define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
+
+/*! ZSTD_versionNumber() :
+ *  Return runtime library version, the value is (MAJOR*100*100 + MINOR*100 + RELEASE). */
+ZSTDLIB_API unsigned ZSTD_versionNumber(void);
+
+#define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE
+#define ZSTD_QUOTE(str) #str
+#define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str)
+#define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION)
+
+/*! ZSTD_versionString() :
+ *  Return runtime library version, like "1.4.5". Requires v1.3.0+. */
+ZSTDLIB_API const char* ZSTD_versionString(void);
+
+/* *************************************
+ *  Default constant
+ ***************************************/
+#ifndef ZSTD_CLEVEL_DEFAULT
+#  define ZSTD_CLEVEL_DEFAULT 3
+#endif
+
+/* *************************************
+ *  Constants
+ ***************************************/
+
+/* All magic numbers are supposed read/written to/from files/memory using little-endian convention */
+#define ZSTD_MAGICNUMBER            0xFD2FB528    /* valid since v0.8.0 */
+#define ZSTD_MAGIC_DICTIONARY       0xEC30A437    /* valid since v0.7.0 */
+#define ZSTD_MAGIC_SKIPPABLE_START  0x184D2A50    /* all 16 values, from 0x184D2A50 to 0x184D2A5F, signal the beginning of a skippable frame */
+#define ZSTD_MAGIC_SKIPPABLE_MASK   0xFFFFFFF0
+
+#define ZSTD_BLOCKSIZELOG_MAX  17
+#define ZSTD_BLOCKSIZE_MAX     (1<<ZSTD_BLOCKSIZELOG_MAX)
+
+
+/***************************************
+*  Simple API
+***************************************/
+/*! ZSTD_compress() :
+ *  Compresses `src` content as a single zstd compressed frame into already allocated `dst`.
+ *  Hint : compression runs faster if `dstCapacity` >=  `ZSTD_compressBound(srcSize)`.
+ *  @return : compressed size written into `dst` (<= `dstCapacity),
+ *            or an error code if it fails (which can be tested using ZSTD_isError()). */
+ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity,
+                            const void* src, size_t srcSize,
+                                  int compressionLevel);
+
+/*! ZSTD_decompress() :
+ *  `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames.
+ *  `dstCapacity` is an upper bound of originalSize to regenerate.
+ *  If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data.
+ *  @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
+ *            or an errorCode if it fails (which can be tested using ZSTD_isError()). */
+ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity,
+                              const void* src, size_t compressedSize);
+
+/*! ZSTD_getFrameContentSize() : requires v1.3.0+
+ *  `src` should point to the start of a ZSTD encoded frame.
+ *  `srcSize` must be at least as large as the frame header.
+ *            hint : any size >= `ZSTD_frameHeaderSize_max` is large enough.
+ *  @return : - decompressed size of `src` frame content, if known
+ *            - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
+ *            - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small)
+ *   note 1 : a 0 return value means the frame is valid but "empty".
+ *   note 2 : decompressed size is an optional field, it may not be present, typically in streaming mode.
+ *            When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size.
+ *            In which case, it's necessary to use streaming mode to decompress data.
+ *            Optionally, application can rely on some implicit limit,
+ *            as ZSTD_decompress() only needs an upper bound of decompressed size.
+ *            (For example, data could be necessarily cut into blocks <= 16 KB).
+ *   note 3 : decompressed size is always present when compression is completed using single-pass functions,
+ *            such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict().
+ *   note 4 : decompressed size can be very large (64-bits value),
+ *            potentially larger than what local system can handle as a single memory segment.
+ *            In which case, it's necessary to use streaming mode to decompress data.
+ *   note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified.
+ *            Always ensure return value fits within application's authorized limits.
+ *            Each application can set its own limits.
+ *   note 6 : This function replaces ZSTD_getDecompressedSize() */
+#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1)
+#define ZSTD_CONTENTSIZE_ERROR   (0ULL - 2)
+ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize);
+
+/*! ZSTD_getDecompressedSize() :
+ *  NOTE: This function is now obsolete, in favor of ZSTD_getFrameContentSize().
+ *  Both functions work the same way, but ZSTD_getDecompressedSize() blends
+ *  "empty", "unknown" and "error" results to the same return value (0),
+ *  while ZSTD_getFrameContentSize() gives them separate return values.
+ * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */
+ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
+
+/*! ZSTD_findFrameCompressedSize() : Requires v1.4.0+
+ * `src` should point to the start of a ZSTD frame or skippable frame.
+ * `srcSize` must be >= first frame size
+ * @return : the compressed size of the first frame starting at `src`,
+ *           suitable to pass as `srcSize` to `ZSTD_decompress` or similar,
+ *        or an error code if input is invalid */
+ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize);
+
+
+/*======  Helper functions  ======*/
+#define ZSTD_COMPRESSBOUND(srcSize)   ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0))  /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */
+ZSTDLIB_API size_t      ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */
+ZSTDLIB_API unsigned    ZSTD_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */
+ZSTDLIB_API const char* ZSTD_getErrorName(size_t code);     /*!< provides readable string from an error code */
+ZSTDLIB_API int         ZSTD_minCLevel(void);               /*!< minimum negative compression level allowed, requires v1.4.0+ */
+ZSTDLIB_API int         ZSTD_maxCLevel(void);               /*!< maximum compression level available */
+ZSTDLIB_API int         ZSTD_defaultCLevel(void);           /*!< default compression level, specified by ZSTD_CLEVEL_DEFAULT, requires v1.5.0+ */
+
+
+/***************************************
+*  Explicit context
+***************************************/
+/*= Compression context
+ *  When compressing many times,
+ *  it is recommended to allocate a context just once,
+ *  and re-use it for each successive compression operation.
+ *  This will make workload friendlier for system's memory.
+ *  Note : re-using context is just a speed / resource optimization.
+ *         It doesn't change the compression ratio, which remains identical.
+ *  Note 2 : In multi-threaded environments,
+ *         use one different context per thread for parallel execution.
+ */
+typedef struct ZSTD_CCtx_s ZSTD_CCtx;
+ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void);
+ZSTDLIB_API size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);  /* accept NULL pointer */
+
+/*! ZSTD_compressCCtx() :
+ *  Same as ZSTD_compress(), using an explicit ZSTD_CCtx.
+ *  Important : in order to behave similarly to `ZSTD_compress()`,
+ *  this function compresses at requested compression level,
+ *  __ignoring any other parameter__ .
+ *  If any advanced parameter was set using the advanced API,
+ *  they will all be reset. Only `compressionLevel` remains.
+ */
+ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
+                                     void* dst, size_t dstCapacity,
+                               const void* src, size_t srcSize,
+                                     int compressionLevel);
+
+/*= Decompression context
+ *  When decompressing many times,
+ *  it is recommended to allocate a context only once,
+ *  and re-use it for each successive compression operation.
+ *  This will make workload friendlier for system's memory.
+ *  Use one context per thread for parallel execution. */
+typedef struct ZSTD_DCtx_s ZSTD_DCtx;
+ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void);
+ZSTDLIB_API size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);  /* accept NULL pointer */
+
+/*! ZSTD_decompressDCtx() :
+ *  Same as ZSTD_decompress(),
+ *  requires an allocated ZSTD_DCtx.
+ *  Compatible with sticky parameters.
+ */
+ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx,
+                                       void* dst, size_t dstCapacity,
+                                 const void* src, size_t srcSize);
+
+
+/*********************************************
+*  Advanced compression API (Requires v1.4.0+)
+**********************************************/
+
+/* API design :
+ *   Parameters are pushed one by one into an existing context,
+ *   using ZSTD_CCtx_set*() functions.
+ *   Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame.
+ *   "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` !
+ *   __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ .
+ *
+ *   It's possible to reset all parameters to "default" using ZSTD_CCtx_reset().
+ *
+ *   This API supercedes all other "advanced" API entry points in the experimental section.
+ *   In the future, we expect to remove from experimental API entry points which are redundant with this API.
+ */
+
+
+/* Compression strategies, listed from fastest to strongest */
+typedef enum { ZSTD_fast=1,
+               ZSTD_dfast=2,
+               ZSTD_greedy=3,
+               ZSTD_lazy=4,
+               ZSTD_lazy2=5,
+               ZSTD_btlazy2=6,
+               ZSTD_btopt=7,
+               ZSTD_btultra=8,
+               ZSTD_btultra2=9
+               /* note : new strategies _might_ be added in the future.
+                         Only the order (from fast to strong) is guaranteed */
+} ZSTD_strategy;
+
+typedef enum {
+
+    /* compression parameters
+     * Note: When compressing with a ZSTD_CDict these parameters are superseded
+     * by the parameters used to construct the ZSTD_CDict.
+     * See ZSTD_CCtx_refCDict() for more info (superseded-by-cdict). */
+    ZSTD_c_compressionLevel=100, /* Set compression parameters according to pre-defined cLevel table.
+                              * Note that exact compression parameters are dynamically determined,
+                              * depending on both compression level and srcSize (when known).
+                              * Default level is ZSTD_CLEVEL_DEFAULT==3.
+                              * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT.
+                              * Note 1 : it's possible to pass a negative compression level.
+                              * Note 2 : setting a level does not automatically set all other compression parameters
+                              *   to default. Setting this will however eventually dynamically impact the compression
+                              *   parameters which have not been manually set. The manually set
+                              *   ones will 'stick'. */
+    /* Advanced compression parameters :
+     * It's possible to pin down compression parameters to some specific values.
+     * In which case, these values are no longer dynamically selected by the compressor */
+    ZSTD_c_windowLog=101,    /* Maximum allowed back-reference distance, expressed as power of 2.
+                              * This will set a memory budget for streaming decompression,
+                              * with larger values requiring more memory
+                              * and typically compressing more.
+                              * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX.
+                              * Special: value 0 means "use default windowLog".
+                              * Note: Using a windowLog greater than ZSTD_WINDOWLOG_LIMIT_DEFAULT
+                              *       requires explicitly allowing such size at streaming decompression stage. */
+    ZSTD_c_hashLog=102,      /* Size of the initial probe table, as a power of 2.
+                              * Resulting memory usage is (1 << (hashLog+2)).
+                              * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX.
+                              * Larger tables improve compression ratio of strategies <= dFast,
+                              * and improve speed of strategies > dFast.
+                              * Special: value 0 means "use default hashLog". */
+    ZSTD_c_chainLog=103,     /* Size of the multi-probe search table, as a power of 2.
+                              * Resulting memory usage is (1 << (chainLog+2)).
+                              * Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX.
+                              * Larger tables result in better and slower compression.
+                              * This parameter is useless for "fast" strategy.
+                              * It's still useful when using "dfast" strategy,
+                              * in which case it defines a secondary probe table.
+                              * Special: value 0 means "use default chainLog". */
+    ZSTD_c_searchLog=104,    /* Number of search attempts, as a power of 2.
+                              * More attempts result in better and slower compression.
+                              * This parameter is useless for "fast" and "dFast" strategies.
+                              * Special: value 0 means "use default searchLog". */
+    ZSTD_c_minMatch=105,     /* Minimum size of searched matches.
+                              * Note that Zstandard can still find matches of smaller size,
+                              * it just tweaks its search algorithm to look for this size and larger.
+                              * Larger values increase compression and decompression speed, but decrease ratio.
+                              * Must be clamped between ZSTD_MINMATCH_MIN and ZSTD_MINMATCH_MAX.
+                              * Note that currently, for all strategies < btopt, effective minimum is 4.
+                              *                    , for all strategies > fast, effective maximum is 6.
+                              * Special: value 0 means "use default minMatchLength". */
+    ZSTD_c_targetLength=106, /* Impact of this field depends on strategy.
+                              * For strategies btopt, btultra & btultra2:
+                              *     Length of Match considered "good enough" to stop search.
+                              *     Larger values make compression stronger, and slower.
+                              * For strategy fast:
+                              *     Distance between match sampling.
+                              *     Larger values make compression faster, and weaker.
+                              * Special: value 0 means "use default targetLength". */
+    ZSTD_c_strategy=107,     /* See ZSTD_strategy enum definition.
+                              * The higher the value of selected strategy, the more complex it is,
+                              * resulting in stronger and slower compression.
+                              * Special: value 0 means "use default strategy". */
+    /* LDM mode parameters */
+    ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching.
+                                     * This parameter is designed to improve compression ratio
+                                     * for large inputs, by finding large matches at long distance.
+                                     * It increases memory usage and window size.
+                                     * Note: enabling this parameter increases default ZSTD_c_windowLog to 128 MB
+                                     * except when expressly set to a different value.
+                                     * Note: will be enabled by default if ZSTD_c_windowLog >= 128 MB and
+                                     * compression strategy >= ZSTD_btopt (== compression level 16+) */
+    ZSTD_c_ldmHashLog=161,   /* Size of the table for long distance matching, as a power of 2.
+                              * Larger values increase memory usage and compression ratio,
+                              * but decrease compression speed.
+                              * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX
+                              * default: windowlog - 7.
+                              * Special: value 0 means "automatically determine hashlog". */
+    ZSTD_c_ldmMinMatch=162,  /* Minimum match size for long distance matcher.
+                              * Larger/too small values usually decrease compression ratio.
+                              * Must be clamped between ZSTD_LDM_MINMATCH_MIN and ZSTD_LDM_MINMATCH_MAX.
+                              * Special: value 0 means "use default value" (default: 64). */
+    ZSTD_c_ldmBucketSizeLog=163, /* Log size of each bucket in the LDM hash table for collision resolution.
+                              * Larger values improve collision resolution but decrease compression speed.
+                              * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX.
+                              * Special: value 0 means "use default value" (default: 3). */
+    ZSTD_c_ldmHashRateLog=164, /* Frequency of inserting/looking up entries into the LDM hash table.
+                              * Must be clamped between 0 and (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN).
+                              * Default is MAX(0, (windowLog - ldmHashLog)), optimizing hash table usage.
+                              * Larger values improve compression speed.
+                              * Deviating far from default value will likely result in a compression ratio decrease.
+                              * Special: value 0 means "automatically determine hashRateLog". */
+
+    /* frame parameters */
+    ZSTD_c_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1)
+                              * Content size must be known at the beginning of compression.
+                              * This is automatically the case when using ZSTD_compress2(),
+                              * For streaming scenarios, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */
+    ZSTD_c_checksumFlag=201, /* A 32-bits checksum of content is written at end of frame (default:0) */
+    ZSTD_c_dictIDFlag=202,   /* When applicable, dictionary's ID is written into frame header (default:1) */
+
+    /* multi-threading parameters */
+    /* These parameters are only active if multi-threading is enabled (compiled with build macro ZSTD_MULTITHREAD).
+     * Otherwise, trying to set any other value than default (0) will be a no-op and return an error.
+     * In a situation where it's unknown if the linked library supports multi-threading or not,
+     * setting ZSTD_c_nbWorkers to any value >= 1 and consulting the return value provides a quick way to check this property.
+     */
+    ZSTD_c_nbWorkers=400,    /* Select how many threads will be spawned to compress in parallel.
+                              * When nbWorkers >= 1, triggers asynchronous mode when invoking ZSTD_compressStream*() :
+                              * ZSTD_compressStream*() consumes input and flush output if possible, but immediately gives back control to caller,
+                              * while compression is performed in parallel, within worker thread(s).
+                              * (note : a strong exception to this rule is when first invocation of ZSTD_compressStream2() sets ZSTD_e_end :
+                              *  in which case, ZSTD_compressStream2() delegates to ZSTD_compress2(), which is always a blocking call).
+                              * More workers improve speed, but also increase memory usage.
+                              * Default value is `0`, aka "single-threaded mode" : no worker is spawned,
+                              * compression is performed inside Caller's thread, and all invocations are blocking */
+    ZSTD_c_jobSize=401,      /* Size of a compression job. This value is enforced only when nbWorkers >= 1.
+                              * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads.
+                              * 0 means default, which is dynamically determined based on compression parameters.
+                              * Job size must be a minimum of overlap size, or ZSTDMT_JOBSIZE_MIN (= 512 KB), whichever is largest.
+                              * The minimum size is automatically and transparently enforced. */
+    ZSTD_c_overlapLog=402,   /* Control the overlap size, as a fraction of window size.
+                              * The overlap size is an amount of data reloaded from previous job at the beginning of a new job.
+                              * It helps preserve compression ratio, while each job is compressed in parallel.
+                              * This value is enforced only when nbWorkers >= 1.
+                              * Larger values increase compression ratio, but decrease speed.
+                              * Possible values range from 0 to 9 :
+                              * - 0 means "default" : value will be determined by the library, depending on strategy
+                              * - 1 means "no overlap"
+                              * - 9 means "full overlap", using a full window size.
+                              * Each intermediate rank increases/decreases load size by a factor 2 :
+                              * 9: full window;  8: w/2;  7: w/4;  6: w/8;  5:w/16;  4: w/32;  3:w/64;  2:w/128;  1:no overlap;  0:default
+                              * default value varies between 6 and 9, depending on strategy */
+
+    /* note : additional experimental parameters are also available
+     * within the experimental section of the API.
+     * At the time of this writing, they include :
+     * ZSTD_c_rsyncable
+     * ZSTD_c_format
+     * ZSTD_c_forceMaxWindow
+     * ZSTD_c_forceAttachDict
+     * ZSTD_c_literalCompressionMode
+     * ZSTD_c_targetCBlockSize
+     * ZSTD_c_srcSizeHint
+     * ZSTD_c_enableDedicatedDictSearch
+     * ZSTD_c_stableInBuffer
+     * ZSTD_c_stableOutBuffer
+     * ZSTD_c_blockDelimiters
+     * ZSTD_c_validateSequences
+     * ZSTD_c_splitBlocks
+     * ZSTD_c_useRowMatchFinder
+     * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
+     * note : never ever use experimentalParam? names directly;
+     *        also, the enums values themselves are unstable and can still change.
+     */
+     ZSTD_c_experimentalParam1=500,
+     ZSTD_c_experimentalParam2=10,
+     ZSTD_c_experimentalParam3=1000,
+     ZSTD_c_experimentalParam4=1001,
+     ZSTD_c_experimentalParam5=1002,
+     ZSTD_c_experimentalParam6=1003,
+     ZSTD_c_experimentalParam7=1004,
+     ZSTD_c_experimentalParam8=1005,
+     ZSTD_c_experimentalParam9=1006,
+     ZSTD_c_experimentalParam10=1007,
+     ZSTD_c_experimentalParam11=1008,
+     ZSTD_c_experimentalParam12=1009,
+     ZSTD_c_experimentalParam13=1010,
+     ZSTD_c_experimentalParam14=1011,
+     ZSTD_c_experimentalParam15=1012
+} ZSTD_cParameter;
+
+typedef struct {
+    size_t error;
+    int lowerBound;
+    int upperBound;
+} ZSTD_bounds;
+
+/*! ZSTD_cParam_getBounds() :
+ *  All parameters must belong to an interval with lower and upper bounds,
+ *  otherwise they will either trigger an error or be automatically clamped.
+ * @return : a structure, ZSTD_bounds, which contains
+ *         - an error status field, which must be tested using ZSTD_isError()
+ *         - lower and upper bounds, both inclusive
+ */
+ZSTDLIB_API ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter cParam);
+
+/*! ZSTD_CCtx_setParameter() :
+ *  Set one compression parameter, selected by enum ZSTD_cParameter.
+ *  All parameters have valid bounds. Bounds can be queried using ZSTD_cParam_getBounds().
+ *  Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter).
+ *  Setting a parameter is generally only possible during frame initialization (before starting compression).
+ *  Exception : when using multi-threading mode (nbWorkers >= 1),
+ *              the following parameters can be updated _during_ compression (within same frame):
+ *              => compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy.
+ *              new parameters will be active for next job only (after a flush()).
+ * @return : an error code (which can be tested using ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value);
+
+/*! ZSTD_CCtx_setPledgedSrcSize() :
+ *  Total input data size to be compressed as a single frame.
+ *  Value will be written in frame header, unless if explicitly forbidden using ZSTD_c_contentSizeFlag.
+ *  This value will also be controlled at end of frame, and trigger an error if not respected.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Note 1 : pledgedSrcSize==0 actually means zero, aka an empty frame.
+ *           In order to mean "unknown content size", pass constant ZSTD_CONTENTSIZE_UNKNOWN.
+ *           ZSTD_CONTENTSIZE_UNKNOWN is default value for any new frame.
+ *  Note 2 : pledgedSrcSize is only valid once, for the next frame.
+ *           It's discarded at the end of the frame, and replaced by ZSTD_CONTENTSIZE_UNKNOWN.
+ *  Note 3 : Whenever all input data is provided and consumed in a single round,
+ *           for example with ZSTD_compress2(),
+ *           or invoking immediately ZSTD_compressStream2(,,,ZSTD_e_end),
+ *           this value is automatically overridden by srcSize instead.
+ */
+ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize);
+
+typedef enum {
+    ZSTD_reset_session_only = 1,
+    ZSTD_reset_parameters = 2,
+    ZSTD_reset_session_and_parameters = 3
+} ZSTD_ResetDirective;
+
+/*! ZSTD_CCtx_reset() :
+ *  There are 2 different things that can be reset, independently or jointly :
+ *  - The session : will stop compressing current frame, and make CCtx ready to start a new one.
+ *                  Useful after an error, or to interrupt any ongoing compression.
+ *                  Any internal data not yet flushed is cancelled.
+ *                  Compression parameters and dictionary remain unchanged.
+ *                  They will be used to compress next frame.
+ *                  Resetting session never fails.
+ *  - The parameters : changes all parameters back to "default".
+ *                  This removes any reference to any dictionary too.
+ *                  Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing)
+ *                  otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError())
+ *  - Both : similar to resetting the session, followed by resetting parameters.
+ */
+ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset);
+
+/*! ZSTD_compress2() :
+ *  Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API.
+ *  ZSTD_compress2() always starts a new frame.
+ *  Should cctx hold data from a previously unfinished frame, everything about it is forgotten.
+ *  - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*()
+ *  - The function is always blocking, returns when compression is completed.
+ *  Hint : compression runs faster if `dstCapacity` >=  `ZSTD_compressBound(srcSize)`.
+ * @return : compressed size written into `dst` (<= `dstCapacity),
+ *           or an error code if it fails (which can be tested using ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_compress2( ZSTD_CCtx* cctx,
+                                   void* dst, size_t dstCapacity,
+                             const void* src, size_t srcSize);
+
+
+/***********************************************
+*  Advanced decompression API (Requires v1.4.0+)
+************************************************/
+
+/* The advanced API pushes parameters one by one into an existing DCtx context.
+ * Parameters are sticky, and remain valid for all following frames
+ * using the same DCtx context.
+ * It's possible to reset parameters to default values using ZSTD_DCtx_reset().
+ * Note : This API is compatible with existing ZSTD_decompressDCtx() and ZSTD_decompressStream().
+ *        Therefore, no new decompression function is necessary.
+ */
+
+typedef enum {
+
+    ZSTD_d_windowLogMax=100, /* Select a size limit (in power of 2) beyond which
+                              * the streaming API will refuse to allocate memory buffer
+                              * in order to protect the host from unreasonable memory requirements.
+                              * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode.
+                              * By default, a decompression context accepts window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT).
+                              * Special: value 0 means "use default maximum windowLog". */
+
+    /* note : additional experimental parameters are also available
+     * within the experimental section of the API.
+     * At the time of this writing, they include :
+     * ZSTD_d_format
+     * ZSTD_d_stableOutBuffer
+     * ZSTD_d_forceIgnoreChecksum
+     * ZSTD_d_refMultipleDDicts
+     * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
+     * note : never ever use experimentalParam? names directly
+     */
+     ZSTD_d_experimentalParam1=1000,
+     ZSTD_d_experimentalParam2=1001,
+     ZSTD_d_experimentalParam3=1002,
+     ZSTD_d_experimentalParam4=1003
+
+} ZSTD_dParameter;
+
+/*! ZSTD_dParam_getBounds() :
+ *  All parameters must belong to an interval with lower and upper bounds,
+ *  otherwise they will either trigger an error or be automatically clamped.
+ * @return : a structure, ZSTD_bounds, which contains
+ *         - an error status field, which must be tested using ZSTD_isError()
+ *         - both lower and upper bounds, inclusive
+ */
+ZSTDLIB_API ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam);
+
+/*! ZSTD_DCtx_setParameter() :
+ *  Set one compression parameter, selected by enum ZSTD_dParameter.
+ *  All parameters have valid bounds. Bounds can be queried using ZSTD_dParam_getBounds().
+ *  Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter).
+ *  Setting a parameter is only possible during frame initialization (before starting decompression).
+ * @return : 0, or an error code (which can be tested using ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int value);
+
+/*! ZSTD_DCtx_reset() :
+ *  Return a DCtx to clean state.
+ *  Session and parameters can be reset jointly or separately.
+ *  Parameters can only be reset when no active frame is being decompressed.
+ * @return : 0, or an error code, which can be tested with ZSTD_isError()
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset);
+
+
+/****************************
+*  Streaming
+****************************/
+
+typedef struct ZSTD_inBuffer_s {
+  const void* src;    /**< start of input buffer */
+  size_t size;        /**< size of input buffer */
+  size_t pos;         /**< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */
+} ZSTD_inBuffer;
+
+typedef struct ZSTD_outBuffer_s {
+  void*  dst;         /**< start of output buffer */
+  size_t size;        /**< size of output buffer */
+  size_t pos;         /**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */
+} ZSTD_outBuffer;
+
+
+
+/*-***********************************************************************
+*  Streaming compression - HowTo
+*
+*  A ZSTD_CStream object is required to track streaming operation.
+*  Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources.
+*  ZSTD_CStream objects can be reused multiple times on consecutive compression operations.
+*  It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory.
+*
+*  For parallel execution, use one separate ZSTD_CStream per thread.
+*
+*  note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing.
+*
+*  Parameters are sticky : when starting a new compression on the same context,
+*  it will re-use the same sticky parameters as previous compression session.
+*  When in doubt, it's recommended to fully initialize the context before usage.
+*  Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(),
+*  ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to
+*  set more specific parameters, the pledged source size, or load a dictionary.
+*
+*  Use ZSTD_compressStream2() with ZSTD_e_continue as many times as necessary to
+*  consume input stream. The function will automatically update both `pos`
+*  fields within `input` and `output`.
+*  Note that the function may not consume the entire input, for example, because
+*  the output buffer is already full, in which case `input.pos < input.size`.
+*  The caller must check if input has been entirely consumed.
+*  If not, the caller must make some room to receive more compressed data,
+*  and then present again remaining input data.
+*  note: ZSTD_e_continue is guaranteed to make some forward progress when called,
+*        but doesn't guarantee maximal forward progress. This is especially relevant
+*        when compressing with multiple threads. The call won't block if it can
+*        consume some input, but if it can't it will wait for some, but not all,
+*        output to be flushed.
+* @return : provides a minimum amount of data remaining to be flushed from internal buffers
+*           or an error code, which can be tested using ZSTD_isError().
+*
+*  At any moment, it's possible to flush whatever data might remain stuck within internal buffer,
+*  using ZSTD_compressStream2() with ZSTD_e_flush. `output->pos` will be updated.
+*  Note that, if `output->size` is too small, a single invocation with ZSTD_e_flush might not be enough (return code > 0).
+*  In which case, make some room to receive more compressed data, and call again ZSTD_compressStream2() with ZSTD_e_flush.
+*  You must continue calling ZSTD_compressStream2() with ZSTD_e_flush until it returns 0, at which point you can change the
+*  operation.
+*  note: ZSTD_e_flush will flush as much output as possible, meaning when compressing with multiple threads, it will
+*        block until the flush is complete or the output buffer is full.
+*  @return : 0 if internal buffers are entirely flushed,
+*            >0 if some data still present within internal buffer (the value is minimal estimation of remaining size),
+*            or an error code, which can be tested using ZSTD_isError().
+*
+*  Calling ZSTD_compressStream2() with ZSTD_e_end instructs to finish a frame.
+*  It will perform a flush and write frame epilogue.
+*  The epilogue is required for decoders to consider a frame completed.
+*  flush operation is the same, and follows same rules as calling ZSTD_compressStream2() with ZSTD_e_flush.
+*  You must continue calling ZSTD_compressStream2() with ZSTD_e_end until it returns 0, at which point you are free to
+*  start a new frame.
+*  note: ZSTD_e_end will flush as much output as possible, meaning when compressing with multiple threads, it will
+*        block until the flush is complete or the output buffer is full.
+*  @return : 0 if frame fully completed and fully flushed,
+*            >0 if some data still present within internal buffer (the value is minimal estimation of remaining size),
+*            or an error code, which can be tested using ZSTD_isError().
+*
+* *******************************************************************/
+
+typedef ZSTD_CCtx ZSTD_CStream;  /**< CCtx and CStream are now effectively same object (>= v1.3.0) */
+                                 /* Continue to distinguish them for compatibility with older versions <= v1.2.0 */
+/*===== ZSTD_CStream management functions =====*/
+ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void);
+ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs);  /* accept NULL pointer */
+
+/*===== Streaming compression functions =====*/
+typedef enum {
+    ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal compression ratio */
+    ZSTD_e_flush=1,    /* flush any data provided so far,
+                        * it creates (at least) one new block, that can be decoded immediately on reception;
+                        * frame will continue: any future data can still reference previously compressed data, improving compression.
+                        * note : multithreaded compression will block to flush as much output as possible. */
+    ZSTD_e_end=2       /* flush any remaining data _and_ close current frame.
+                        * note that frame is only closed after compressed data is fully flushed (return value == 0).
+                        * After that point, any additional data starts a new frame.
+                        * note : each frame is independent (does not reference any content from previous frame).
+                        : note : multithreaded compression will block to flush as much output as possible. */
+} ZSTD_EndDirective;
+
+/*! ZSTD_compressStream2() : Requires v1.4.0+
+ *  Behaves about the same as ZSTD_compressStream, with additional control on end directive.
+ *  - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*()
+ *  - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode)
+ *  - output->pos must be <= dstCapacity, input->pos must be <= srcSize
+ *  - output->pos and input->pos will be updated. They are guaranteed to remain below their respective limit.
+ *  - endOp must be a valid directive
+ *  - When nbWorkers==0 (default), function is blocking : it completes its job before returning to caller.
+ *  - When nbWorkers>=1, function is non-blocking : it copies a portion of input, distributes jobs to internal worker threads, flush to output whatever is available,
+ *                                                  and then immediately returns, just indicating that there is some data remaining to be flushed.
+ *                                                  The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte.
+ *  - Exception : if the first call requests a ZSTD_e_end directive and provides enough dstCapacity, the function delegates to ZSTD_compress2() which is always blocking.
+ *  - @return provides a minimum amount of data remaining to be flushed from internal buffers
+ *            or an error code, which can be tested using ZSTD_isError().
+ *            if @return != 0, flush is not fully completed, there is still some data left within internal buffers.
+ *            This is useful for ZSTD_e_flush, since in this case more flushes are necessary to empty all buffers.
+ *            For ZSTD_e_end, @return == 0 when internal buffers are fully flushed and frame is completed.
+ *  - after a ZSTD_e_end directive, if internal buffer is not fully flushed (@return != 0),
+ *            only ZSTD_e_end or ZSTD_e_flush operations are allowed.
+ *            Before starting a new compression job, or changing compression parameters,
+ *            it is required to fully flush internal buffers.
+ */
+ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
+                                         ZSTD_outBuffer* output,
+                                         ZSTD_inBuffer* input,
+                                         ZSTD_EndDirective endOp);
+
+
+/* These buffer sizes are softly recommended.
+ * They are not required : ZSTD_compressStream*() happily accepts any buffer size, for both input and output.
+ * Respecting the recommended size just makes it a bit easier for ZSTD_compressStream*(),
+ * reducing the amount of memory shuffling and buffering, resulting in minor performance savings.
+ *
+ * However, note that these recommendations are from the perspective of a C caller program.
+ * If the streaming interface is invoked from some other language,
+ * especially managed ones such as Java or Go, through a foreign function interface such as jni or cgo,
+ * a major performance rule is to reduce crossing such interface to an absolute minimum.
+ * It's not rare that performance ends being spent more into the interface, rather than compression itself.
+ * In which cases, prefer using large buffers, as large as practical,
+ * for both input and output, to reduce the nb of roundtrips.
+ */
+ZSTDLIB_API size_t ZSTD_CStreamInSize(void);    /**< recommended size for input buffer */
+ZSTDLIB_API size_t ZSTD_CStreamOutSize(void);   /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */
+
+
+/* *****************************************************************************
+ * This following is a legacy streaming API, available since v1.0+ .
+ * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2().
+ * It is redundant, but remains fully supported.
+ * Streaming in combination with advanced parameters and dictionary compression
+ * can only be used through the new API.
+ ******************************************************************************/
+
+/*!
+ * Equivalent to:
+ *
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
+ *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
+ */
+ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel);
+/*!
+ * Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue).
+ * NOTE: The return value is different. ZSTD_compressStream() returns a hint for
+ * the next read size (if non-zero and not an error). ZSTD_compressStream2()
+ * returns the minimum nb of bytes left to flush (if non-zero and not an error).
+ */
+ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
+/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */
+ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
+/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */
+ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
+
+
+/*-***************************************************************************
+*  Streaming decompression - HowTo
+*
+*  A ZSTD_DStream object is required to track streaming operations.
+*  Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources.
+*  ZSTD_DStream objects can be re-used multiple times.
+*
+*  Use ZSTD_initDStream() to start a new decompression operation.
+* @return : recommended first input size
+*  Alternatively, use advanced API to set specific properties.
+*
+*  Use ZSTD_decompressStream() repetitively to consume your input.
+*  The function will update both `pos` fields.
+*  If `input.pos < input.size`, some input has not been consumed.
+*  It's up to the caller to present again remaining data.
+*  The function tries to flush all data decoded immediately, respecting output buffer size.
+*  If `output.pos < output.size`, decoder has flushed everything it could.
+*  But if `output.pos == output.size`, there might be some data left within internal buffers.,
+*  In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer.
+*  Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX.
+* @return : 0 when a frame is completely decoded and fully flushed,
+*        or an error code, which can be tested using ZSTD_isError(),
+*        or any other value > 0, which means there is still some decoding or flushing to do to complete current frame :
+*                                the return value is a suggested next input size (just a hint for better latency)
+*                                that will never request more than the remaining frame size.
+* *******************************************************************************/
+
+typedef ZSTD_DCtx ZSTD_DStream;  /**< DCtx and DStream are now effectively same object (>= v1.3.0) */
+                                 /* For compatibility with versions <= v1.2.0, prefer differentiating them. */
+/*===== ZSTD_DStream management functions =====*/
+ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void);
+ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds);  /* accept NULL pointer */
+
+/*===== Streaming decompression functions =====*/
+
+/* This function is redundant with the advanced API and equivalent to:
+ *
+ *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+ *     ZSTD_DCtx_refDDict(zds, NULL);
+ */
+ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds);
+
+ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
+
+ZSTDLIB_API size_t ZSTD_DStreamInSize(void);    /*!< recommended size for input buffer */
+ZSTDLIB_API size_t ZSTD_DStreamOutSize(void);   /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */
+
+
+/**************************
+*  Simple dictionary API
+***************************/
+/*! ZSTD_compress_usingDict() :
+ *  Compression at an explicit compression level using a Dictionary.
+ *  A dictionary can be any arbitrary data segment (also called a prefix),
+ *  or a buffer with specified information (see zdict.h).
+ *  Note : This function loads the dictionary, resulting in significant startup delay.
+ *         It's intended for a dictionary used only once.
+ *  Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. */
+ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx,
+                                           void* dst, size_t dstCapacity,
+                                     const void* src, size_t srcSize,
+                                     const void* dict,size_t dictSize,
+                                           int compressionLevel);
+
+/*! ZSTD_decompress_usingDict() :
+ *  Decompression using a known Dictionary.
+ *  Dictionary must be identical to the one used during compression.
+ *  Note : This function loads the dictionary, resulting in significant startup delay.
+ *         It's intended for a dictionary used only once.
+ *  Note : When `dict == NULL || dictSize < 8` no dictionary is used. */
+ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
+                                             void* dst, size_t dstCapacity,
+                                       const void* src, size_t srcSize,
+                                       const void* dict,size_t dictSize);
+
+
+/***********************************
+ *  Bulk processing dictionary API
+ **********************************/
+typedef struct ZSTD_CDict_s ZSTD_CDict;
+
+/*! ZSTD_createCDict() :
+ *  When compressing multiple messages or blocks using the same dictionary,
+ *  it's recommended to digest the dictionary only once, since it's a costly operation.
+ *  ZSTD_createCDict() will create a state from digesting a dictionary.
+ *  The resulting state can be used for future compression operations with very limited startup cost.
+ *  ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
+ * @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict.
+ *  Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content.
+ *  Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer,
+ *      in which case the only thing that it transports is the @compressionLevel.
+ *      This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively,
+ *      expecting a ZSTD_CDict parameter with any data, including those without a known dictionary. */
+ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize,
+                                         int compressionLevel);
+
+/*! ZSTD_freeCDict() :
+ *  Function frees memory allocated by ZSTD_createCDict().
+ *  If a NULL pointer is passed, no operation is performed. */
+ZSTDLIB_API size_t      ZSTD_freeCDict(ZSTD_CDict* CDict);
+
+/*! ZSTD_compress_usingCDict() :
+ *  Compression using a digested Dictionary.
+ *  Recommended when same dictionary is used multiple times.
+ *  Note : compression level is _decided at dictionary creation time_,
+ *     and frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) */
+ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
+                                            void* dst, size_t dstCapacity,
+                                      const void* src, size_t srcSize,
+                                      const ZSTD_CDict* cdict);
+
+
+typedef struct ZSTD_DDict_s ZSTD_DDict;
+
+/*! ZSTD_createDDict() :
+ *  Create a digested dictionary, ready to start decompression operation without startup delay.
+ *  dictBuffer can be released after DDict creation, as its content is copied inside DDict. */
+ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize);
+
+/*! ZSTD_freeDDict() :
+ *  Function frees memory allocated with ZSTD_createDDict()
+ *  If a NULL pointer is passed, no operation is performed. */
+ZSTDLIB_API size_t      ZSTD_freeDDict(ZSTD_DDict* ddict);
+
+/*! ZSTD_decompress_usingDDict() :
+ *  Decompression using a digested Dictionary.
+ *  Recommended when same dictionary is used multiple times. */
+ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
+                                              void* dst, size_t dstCapacity,
+                                        const void* src, size_t srcSize,
+                                        const ZSTD_DDict* ddict);
+
+
+/********************************
+ *  Dictionary helper functions
+ *******************************/
+
+/*! ZSTD_getDictID_fromDict() : Requires v1.4.0+
+ *  Provides the dictID stored within dictionary.
+ *  if @return == 0, the dictionary is not conformant with Zstandard specification.
+ *  It can still be loaded, but as a content-only dictionary. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize);
+
+/*! ZSTD_getDictID_fromCDict() : Requires v1.5.0+
+ *  Provides the dictID of the dictionary loaded into `cdict`.
+ *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
+ *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict);
+
+/*! ZSTD_getDictID_fromDDict() : Requires v1.4.0+
+ *  Provides the dictID of the dictionary loaded into `ddict`.
+ *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
+ *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
+
+/*! ZSTD_getDictID_fromFrame() : Requires v1.4.0+
+ *  Provides the dictID required to decompressed the frame stored within `src`.
+ *  If @return == 0, the dictID could not be decoded.
+ *  This could for one of the following reasons :
+ *  - The frame does not require a dictionary to be decoded (most common case).
+ *  - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information.
+ *    Note : this use case also happens when using a non-conformant dictionary.
+ *  - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`).
+ *  - This is not a Zstandard frame.
+ *  When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
+
+
+/*******************************************************************************
+ * Advanced dictionary and prefix API (Requires v1.4.0+)
+ *
+ * This API allows dictionaries to be used with ZSTD_compress2(),
+ * ZSTD_compressStream2(), and ZSTD_decompress(). Dictionaries are sticky, and
+ * only reset with the context is reset with ZSTD_reset_parameters or
+ * ZSTD_reset_session_and_parameters. Prefixes are single-use.
+ ******************************************************************************/
+
+
+/*! ZSTD_CCtx_loadDictionary() : Requires v1.4.0+
+ *  Create an internal CDict from `dict` buffer.
+ *  Decompression will have to use same dictionary.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary,
+ *           meaning "return to no-dictionary mode".
+ *  Note 1 : Dictionary is sticky, it will be used for all future compressed frames.
+ *           To return to "no-dictionary" situation, load a NULL dictionary (or reset parameters).
+ *  Note 2 : Loading a dictionary involves building tables.
+ *           It's also a CPU consuming operation, with non-negligible impact on latency.
+ *           Tables are dependent on compression parameters, and for this reason,
+ *           compression parameters can no longer be changed after loading a dictionary.
+ *  Note 3 :`dict` content will be copied internally.
+ *           Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead.
+ *           In such a case, dictionary buffer must outlive its users.
+ *  Note 4 : Use ZSTD_CCtx_loadDictionary_advanced()
+ *           to precisely select how dictionary content must be interpreted. */
+ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
+
+/*! ZSTD_CCtx_refCDict() : Requires v1.4.0+
+ *  Reference a prepared dictionary, to be used for all next compressed frames.
+ *  Note that compression parameters are enforced from within CDict,
+ *  and supersede any compression parameter previously set within CCtx.
+ *  The parameters ignored are labelled as "superseded-by-cdict" in the ZSTD_cParameter enum docs.
+ *  The ignored parameters will be used again if the CCtx is returned to no-dictionary mode.
+ *  The dictionary will remain valid for future compressed frames using same CCtx.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Special : Referencing a NULL CDict means "return to no-dictionary mode".
+ *  Note 1 : Currently, only one dictionary can be managed.
+ *           Referencing a new dictionary effectively "discards" any previous one.
+ *  Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. */
+ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
+
+/*! ZSTD_CCtx_refPrefix() : Requires v1.4.0+
+ *  Reference a prefix (single-usage dictionary) for next compressed frame.
+ *  A prefix is **only used once**. Tables are discarded at end of frame (ZSTD_e_end).
+ *  Decompression will need same prefix to properly regenerate data.
+ *  Compressing with a prefix is similar in outcome as performing a diff and compressing it,
+ *  but performs much faster, especially during decompression (compression speed is tunable with compression level).
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary
+ *  Note 1 : Prefix buffer is referenced. It **must** outlive compression.
+ *           Its content must remain unmodified during compression.
+ *  Note 2 : If the intention is to diff some large src data blob with some prior version of itself,
+ *           ensure that the window size is large enough to contain the entire source.
+ *           See ZSTD_c_windowLog.
+ *  Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters.
+ *           It's a CPU consuming operation, with non-negligible impact on latency.
+ *           If there is a need to use the same prefix multiple times, consider loadDictionary instead.
+ *  Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dct_rawContent).
+ *           Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. */
+ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx,
+                                 const void* prefix, size_t prefixSize);
+
+/*! ZSTD_DCtx_loadDictionary() : Requires v1.4.0+
+ *  Create an internal DDict from dict buffer,
+ *  to be used to decompress next frames.
+ *  The dictionary remains valid for all future frames, until explicitly invalidated.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary,
+ *            meaning "return to no-dictionary mode".
+ *  Note 1 : Loading a dictionary involves building tables,
+ *           which has a non-negligible impact on CPU usage and latency.
+ *           It's recommended to "load once, use many times", to amortize the cost
+ *  Note 2 :`dict` content will be copied internally, so `dict` can be released after loading.
+ *           Use ZSTD_DCtx_loadDictionary_byReference() to reference dictionary content instead.
+ *  Note 3 : Use ZSTD_DCtx_loadDictionary_advanced() to take control of
+ *           how dictionary content is loaded and interpreted.
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
+
+/*! ZSTD_DCtx_refDDict() : Requires v1.4.0+
+ *  Reference a prepared dictionary, to be used to decompress next frames.
+ *  The dictionary remains active for decompression of future frames using same DCtx.
+ *
+ *  If called with ZSTD_d_refMultipleDDicts enabled, repeated calls of this function
+ *  will store the DDict references in a table, and the DDict used for decompression
+ *  will be determined at decompression time, as per the dict ID in the frame.
+ *  The memory for the table is allocated on the first call to refDDict, and can be
+ *  freed with ZSTD_freeDCtx().
+ *
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Note 1 : Currently, only one dictionary can be managed.
+ *           Referencing a new dictionary effectively "discards" any previous one.
+ *  Special: referencing a NULL DDict means "return to no-dictionary mode".
+ *  Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx.
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
+
+/*! ZSTD_DCtx_refPrefix() : Requires v1.4.0+
+ *  Reference a prefix (single-usage dictionary) to decompress next frame.
+ *  This is the reverse operation of ZSTD_CCtx_refPrefix(),
+ *  and must use the same prefix as the one used during compression.
+ *  Prefix is **only used once**. Reference is discarded at end of frame.
+ *  End of frame is reached when ZSTD_decompressStream() returns 0.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Note 1 : Adding any prefix (including NULL) invalidates any previously set prefix or dictionary
+ *  Note 2 : Prefix buffer is referenced. It **must** outlive decompression.
+ *           Prefix buffer must remain unmodified up to the end of frame,
+ *           reached when ZSTD_decompressStream() returns 0.
+ *  Note 3 : By default, the prefix is treated as raw content (ZSTD_dct_rawContent).
+ *           Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode (Experimental section)
+ *  Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost.
+ *           A full dictionary is more costly, as it requires building tables.
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx,
+                                 const void* prefix, size_t prefixSize);
+
+/* ===   Memory management   === */
+
+/*! ZSTD_sizeof_*() : Requires v1.4.0+
+ *  These functions give the _current_ memory usage of selected object.
+ *  Note that object memory usage can evolve (increase or decrease) over time. */
+ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
+ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx);
+ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs);
+ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
+ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict);
+ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
+
+#endif  /* ZSTD_H_235446 */
+
+
+/* **************************************************************************************
+ *   ADVANCED AND EXPERIMENTAL FUNCTIONS
+ ****************************************************************************************
+ * The definitions in the following section are considered experimental.
+ * They are provided for advanced scenarios.
+ * They should never be used with a dynamic library, as prototypes may change in the future.
+ * Use them only in association with static linking.
+ * ***************************************************************************************/
+
+#if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY)
+#define ZSTD_H_ZSTD_STATIC_LINKING_ONLY
+
+/* Deprecation warnings :
+ * Should these warnings be a problem, it is generally possible to disable them,
+ * typically with -Wno-deprecated-declarations for gcc or _CRT_SECURE_NO_WARNINGS in Visual.
+ * Otherwise, it's also possible to define ZSTD_DISABLE_DEPRECATE_WARNINGS.
+ */
+#ifdef ZSTD_DISABLE_DEPRECATE_WARNINGS
+#  define ZSTD_DEPRECATED(message) ZSTDLIB_API  /* disable deprecation warnings */
+#else
+#  if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
+#    define ZSTD_DEPRECATED(message) [[deprecated(message)]] ZSTDLIB_API
+#  elif (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__)
+#    define ZSTD_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated(message)))
+#  elif defined(__GNUC__) && (__GNUC__ >= 3)
+#    define ZSTD_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated))
+#  elif defined(_MSC_VER)
+#    define ZSTD_DEPRECATED(message) ZSTDLIB_API __declspec(deprecated(message))
+#  else
+#    pragma message("WARNING: You need to implement ZSTD_DEPRECATED for this compiler")
+#    define ZSTD_DEPRECATED(message) ZSTDLIB_API
+#  endif
+#endif /* ZSTD_DISABLE_DEPRECATE_WARNINGS */
+
+/****************************************************************************************
+ *   experimental API (static linking only)
+ ****************************************************************************************
+ * The following symbols and constants
+ * are not planned to join "stable API" status in the near future.
+ * They can still change in future versions.
+ * Some of them are planned to remain in the static_only section indefinitely.
+ * Some of them might be removed in the future (especially when redundant with existing stable functions)
+ * ***************************************************************************************/
+
+#define ZSTD_FRAMEHEADERSIZE_PREFIX(format) ((format) == ZSTD_f_zstd1 ? 5 : 1)   /* minimum input size required to query frame header size */
+#define ZSTD_FRAMEHEADERSIZE_MIN(format)    ((format) == ZSTD_f_zstd1 ? 6 : 2)
+#define ZSTD_FRAMEHEADERSIZE_MAX   18   /* can be useful for static allocation */
+#define ZSTD_SKIPPABLEHEADERSIZE    8
+
+/* compression parameter bounds */
+#define ZSTD_WINDOWLOG_MAX_32    30
+#define ZSTD_WINDOWLOG_MAX_64    31
+#define ZSTD_WINDOWLOG_MAX     ((int)(sizeof(size_t) == 4 ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64))
+#define ZSTD_WINDOWLOG_MIN       10
+#define ZSTD_HASHLOG_MAX       ((ZSTD_WINDOWLOG_MAX < 30) ? ZSTD_WINDOWLOG_MAX : 30)
+#define ZSTD_HASHLOG_MIN          6
+#define ZSTD_CHAINLOG_MAX_32     29
+#define ZSTD_CHAINLOG_MAX_64     30
+#define ZSTD_CHAINLOG_MAX      ((int)(sizeof(size_t) == 4 ? ZSTD_CHAINLOG_MAX_32 : ZSTD_CHAINLOG_MAX_64))
+#define ZSTD_CHAINLOG_MIN        ZSTD_HASHLOG_MIN
+#define ZSTD_SEARCHLOG_MAX      (ZSTD_WINDOWLOG_MAX-1)
+#define ZSTD_SEARCHLOG_MIN        1
+#define ZSTD_MINMATCH_MAX         7   /* only for ZSTD_fast, other strategies are limited to 6 */
+#define ZSTD_MINMATCH_MIN         3   /* only for ZSTD_btopt+, faster strategies are limited to 4 */
+#define ZSTD_TARGETLENGTH_MAX    ZSTD_BLOCKSIZE_MAX
+#define ZSTD_TARGETLENGTH_MIN     0   /* note : comparing this constant to an unsigned results in a tautological test */
+#define ZSTD_STRATEGY_MIN        ZSTD_fast
+#define ZSTD_STRATEGY_MAX        ZSTD_btultra2
+
+
+#define ZSTD_OVERLAPLOG_MIN       0
+#define ZSTD_OVERLAPLOG_MAX       9
+
+#define ZSTD_WINDOWLOG_LIMIT_DEFAULT 27   /* by default, the streaming decoder will refuse any frame
+                                           * requiring larger than (1<<ZSTD_WINDOWLOG_LIMIT_DEFAULT) window size,
+                                           * to preserve host's memory from unreasonable requirements.
+                                           * This limit can be overridden using ZSTD_DCtx_setParameter(,ZSTD_d_windowLogMax,).
+                                           * The limit does not apply for one-pass decoders (such as ZSTD_decompress()), since no additional memory is allocated */
+
+
+/* LDM parameter bounds */
+#define ZSTD_LDM_HASHLOG_MIN      ZSTD_HASHLOG_MIN
+#define ZSTD_LDM_HASHLOG_MAX      ZSTD_HASHLOG_MAX
+#define ZSTD_LDM_MINMATCH_MIN        4
+#define ZSTD_LDM_MINMATCH_MAX     4096
+#define ZSTD_LDM_BUCKETSIZELOG_MIN   1
+#define ZSTD_LDM_BUCKETSIZELOG_MAX   8
+#define ZSTD_LDM_HASHRATELOG_MIN     0
+#define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN)
+
+/* Advanced parameter bounds */
+#define ZSTD_TARGETCBLOCKSIZE_MIN   64
+#define ZSTD_TARGETCBLOCKSIZE_MAX   ZSTD_BLOCKSIZE_MAX
+#define ZSTD_SRCSIZEHINT_MIN        0
+#define ZSTD_SRCSIZEHINT_MAX        INT_MAX
+
+/* internal */
+#define ZSTD_HASHLOG3_MAX           17
+
+
+/* ---  Advanced types  --- */
+
+typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params;
+
+typedef struct {
+    unsigned int offset;      /* The offset of the match. (NOT the same as the offset code)
+                               * If offset == 0 and matchLength == 0, this sequence represents the last
+                               * literals in the block of litLength size.
+                               */
+
+    unsigned int litLength;   /* Literal length of the sequence. */
+    unsigned int matchLength; /* Match length of the sequence. */
+
+                              /* Note: Users of this API may provide a sequence with matchLength == litLength == offset == 0.
+                               * In this case, we will treat the sequence as a marker for a block boundary.
+                               */
+
+    unsigned int rep;         /* Represents which repeat offset is represented by the field 'offset'.
+                               * Ranges from [0, 3].
+                               *
+                               * Repeat offsets are essentially previous offsets from previous sequences sorted in
+                               * recency order. For more detail, see doc/zstd_compression_format.md
+                               *
+                               * If rep == 0, then 'offset' does not contain a repeat offset.
+                               * If rep > 0:
+                               *  If litLength != 0:
+                               *      rep == 1 --> offset == repeat_offset_1
+                               *      rep == 2 --> offset == repeat_offset_2
+                               *      rep == 3 --> offset == repeat_offset_3
+                               *  If litLength == 0:
+                               *      rep == 1 --> offset == repeat_offset_2
+                               *      rep == 2 --> offset == repeat_offset_3
+                               *      rep == 3 --> offset == repeat_offset_1 - 1
+                               *
+                               * Note: This field is optional. ZSTD_generateSequences() will calculate the value of
+                               * 'rep', but repeat offsets do not necessarily need to be calculated from an external
+                               * sequence provider's perspective. For example, ZSTD_compressSequences() does not
+                               * use this 'rep' field at all (as of now).
+                               */
+} ZSTD_Sequence;
+
+typedef struct {
+    unsigned windowLog;       /**< largest match distance : larger == more compression, more memory needed during decompression */
+    unsigned chainLog;        /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */
+    unsigned hashLog;         /**< dispatch table : larger == faster, more memory */
+    unsigned searchLog;       /**< nb of searches : larger == more compression, slower */
+    unsigned minMatch;        /**< match length searched : larger == faster decompression, sometimes less compression */
+    unsigned targetLength;    /**< acceptable match size for optimal parser (only) : larger == more compression, slower */
+    ZSTD_strategy strategy;   /**< see ZSTD_strategy definition above */
+} ZSTD_compressionParameters;
+
+typedef struct {
+    int contentSizeFlag; /**< 1: content size will be in frame header (when known) */
+    int checksumFlag;    /**< 1: generate a 32-bits checksum using XXH64 algorithm at end of frame, for error detection */
+    int noDictIDFlag;    /**< 1: no dictID will be saved into frame header (dictID is only useful for dictionary compression) */
+} ZSTD_frameParameters;
+
+typedef struct {
+    ZSTD_compressionParameters cParams;
+    ZSTD_frameParameters fParams;
+} ZSTD_parameters;
+
+typedef enum {
+    ZSTD_dct_auto = 0,       /* dictionary is "full" when starting with ZSTD_MAGIC_DICTIONARY, otherwise it is "rawContent" */
+    ZSTD_dct_rawContent = 1, /* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */
+    ZSTD_dct_fullDict = 2    /* refuses to load a dictionary if it does not respect Zstandard's specification, starting with ZSTD_MAGIC_DICTIONARY */
+} ZSTD_dictContentType_e;
+
+typedef enum {
+    ZSTD_dlm_byCopy = 0,  /**< Copy dictionary content internally */
+    ZSTD_dlm_byRef = 1    /**< Reference dictionary content -- the dictionary buffer must outlive its users. */
+} ZSTD_dictLoadMethod_e;
+
+typedef enum {
+    ZSTD_f_zstd1 = 0,           /* zstd frame format, specified in zstd_compression_format.md (default) */
+    ZSTD_f_zstd1_magicless = 1  /* Variant of zstd frame format, without initial 4-bytes magic number.
+                                 * Useful to save 4 bytes per generated frame.
+                                 * Decoder cannot recognise automatically this format, requiring this instruction. */
+} ZSTD_format_e;
+
+typedef enum {
+    /* Note: this enum controls ZSTD_d_forceIgnoreChecksum */
+    ZSTD_d_validateChecksum = 0,
+    ZSTD_d_ignoreChecksum = 1
+} ZSTD_forceIgnoreChecksum_e;
+
+typedef enum {
+    /* Note: this enum controls ZSTD_d_refMultipleDDicts */
+    ZSTD_rmd_refSingleDDict = 0,
+    ZSTD_rmd_refMultipleDDicts = 1
+} ZSTD_refMultipleDDicts_e;
+
+typedef enum {
+    /* Note: this enum and the behavior it controls are effectively internal
+     * implementation details of the compressor. They are expected to continue
+     * to evolve and should be considered only in the context of extremely
+     * advanced performance tuning.
+     *
+     * Zstd currently supports the use of a CDict in three ways:
+     *
+     * - The contents of the CDict can be copied into the working context. This
+     *   means that the compression can search both the dictionary and input
+     *   while operating on a single set of internal tables. This makes
+     *   the compression faster per-byte of input. However, the initial copy of
+     *   the CDict's tables incurs a fixed cost at the beginning of the
+     *   compression. For small compressions (< 8 KB), that copy can dominate
+     *   the cost of the compression.
+     *
+     * - The CDict's tables can be used in-place. In this model, compression is
+     *   slower per input byte, because the compressor has to search two sets of
+     *   tables. However, this model incurs no start-up cost (as long as the
+     *   working context's tables can be reused). For small inputs, this can be
+     *   faster than copying the CDict's tables.
+     *
+     * - The CDict's tables are not used at all, and instead we use the working
+     *   context alone to reload the dictionary and use params based on the source
+     *   size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict().
+     *   This method is effective when the dictionary sizes are very small relative
+     *   to the input size, and the input size is fairly large to begin with.
+     *
+     * Zstd has a simple internal heuristic that selects which strategy to use
+     * at the beginning of a compression. However, if experimentation shows that
+     * Zstd is making poor choices, it is possible to override that choice with
+     * this enum.
+     */
+    ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */
+    ZSTD_dictForceAttach   = 1, /* Never copy the dictionary. */
+    ZSTD_dictForceCopy     = 2, /* Always copy the dictionary. */
+    ZSTD_dictForceLoad     = 3  /* Always reload the dictionary */
+} ZSTD_dictAttachPref_e;
+
+typedef enum {
+  ZSTD_lcm_auto = 0,          /**< Automatically determine the compression mode based on the compression level.
+                               *   Negative compression levels will be uncompressed, and positive compression
+                               *   levels will be compressed. */
+  ZSTD_lcm_huffman = 1,       /**< Always attempt Huffman compression. Uncompressed literals will still be
+                               *   emitted if Huffman compression is not profitable. */
+  ZSTD_lcm_uncompressed = 2   /**< Always emit uncompressed literals. */
+} ZSTD_literalCompressionMode_e;
+
+typedef enum {
+  ZSTD_urm_auto = 0,                   /* Automatically determine whether or not we use row matchfinder */
+  ZSTD_urm_disableRowMatchFinder = 1,  /* Never use row matchfinder */
+  ZSTD_urm_enableRowMatchFinder = 2    /* Always use row matchfinder when applicable */
+} ZSTD_useRowMatchFinderMode_e;
+
+/***************************************
+*  Frame size functions
+***************************************/
+
+/*! ZSTD_findDecompressedSize() :
+ *  `src` should point to the start of a series of ZSTD encoded and/or skippable frames
+ *  `srcSize` must be the _exact_ size of this series
+ *       (i.e. there should be a frame boundary at `src + srcSize`)
+ *  @return : - decompressed size of all data in all successive frames
+ *            - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN
+ *            - if an error occurred: ZSTD_CONTENTSIZE_ERROR
+ *
+ *   note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode.
+ *            When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size.
+ *            In which case, it's necessary to use streaming mode to decompress data.
+ *   note 2 : decompressed size is always present when compression is done with ZSTD_compress()
+ *   note 3 : decompressed size can be very large (64-bits value),
+ *            potentially larger than what local system can handle as a single memory segment.
+ *            In which case, it's necessary to use streaming mode to decompress data.
+ *   note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified.
+ *            Always ensure result fits within application's authorized limits.
+ *            Each application can set its own limits.
+ *   note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to
+ *            read each contained frame header.  This is fast as most of the data is skipped,
+ *            however it does mean that all frame data must be present and valid. */
+ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);
+
+/*! ZSTD_decompressBound() :
+ *  `src` should point to the start of a series of ZSTD encoded and/or skippable frames
+ *  `srcSize` must be the _exact_ size of this series
+ *       (i.e. there should be a frame boundary at `src + srcSize`)
+ *  @return : - upper-bound for the decompressed size of all data in all successive frames
+ *            - if an error occurred: ZSTD_CONTENTSIZE_ERROR
+ *
+ *  note 1  : an error can occur if `src` contains an invalid or incorrectly formatted frame.
+ *  note 2  : the upper-bound is exact when the decompressed size field is available in every ZSTD encoded frame of `src`.
+ *            in this case, `ZSTD_findDecompressedSize` and `ZSTD_decompressBound` return the same value.
+ *  note 3  : when the decompressed size field isn't available, the upper-bound for that frame is calculated by:
+ *              upper-bound = # blocks * min(128 KB, Window_Size)
+ */
+ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize);
+
+/*! ZSTD_frameHeaderSize() :
+ *  srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX.
+ * @return : size of the Frame Header,
+ *           or an error code (if srcSize is too small) */
+ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
+
+typedef enum {
+  ZSTD_sf_noBlockDelimiters = 0,         /* Representation of ZSTD_Sequence has no block delimiters, sequences only */
+  ZSTD_sf_explicitBlockDelimiters = 1    /* Representation of ZSTD_Sequence contains explicit block delimiters */
+} ZSTD_sequenceFormat_e;
+
+/*! ZSTD_generateSequences() :
+ * Generate sequences using ZSTD_compress2, given a source buffer.
+ *
+ * Each block will end with a dummy sequence
+ * with offset == 0, matchLength == 0, and litLength == length of last literals.
+ * litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0)
+ * simply acts as a block delimiter.
+ *
+ * zc can be used to insert custom compression params.
+ * This function invokes ZSTD_compress2
+ *
+ * The output of this function can be fed into ZSTD_compressSequences() with CCtx
+ * setting of ZSTD_c_blockDelimiters as ZSTD_sf_explicitBlockDelimiters
+ * @return : number of sequences generated
+ */
+
+ZSTDLIB_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
+                                          size_t outSeqsSize, const void* src, size_t srcSize);
+
+/*! ZSTD_mergeBlockDelimiters() :
+ * Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals
+ * by merging them into into the literals of the next sequence.
+ *
+ * As such, the final generated result has no explicit representation of block boundaries,
+ * and the final last literals segment is not represented in the sequences.
+ *
+ * The output of this function can be fed into ZSTD_compressSequences() with CCtx
+ * setting of ZSTD_c_blockDelimiters as ZSTD_sf_noBlockDelimiters
+ * @return : number of sequences left after merging
+ */
+ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize);
+
+/*! ZSTD_compressSequences() :
+ * Compress an array of ZSTD_Sequence, generated from the original source buffer, into dst.
+ * If a dictionary is included, then the cctx should reference the dict. (see: ZSTD_CCtx_refCDict(), ZSTD_CCtx_loadDictionary(), etc.)
+ * The entire source is compressed into a single frame.
+ *
+ * The compression behavior changes based on cctx params. In particular:
+ *    If ZSTD_c_blockDelimiters == ZSTD_sf_noBlockDelimiters, the array of ZSTD_Sequence is expected to contain
+ *    no block delimiters (defined in ZSTD_Sequence). Block boundaries are roughly determined based on
+ *    the block size derived from the cctx, and sequences may be split. This is the default setting.
+ *
+ *    If ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, the array of ZSTD_Sequence is expected to contain
+ *    block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided.
+ *
+ *    If ZSTD_c_validateSequences == 0, this function will blindly accept the sequences provided. Invalid sequences cause undefined
+ *    behavior. If ZSTD_c_validateSequences == 1, then if sequence is invalid (see doc/zstd_compression_format.md for
+ *    specifics regarding offset/matchlength requirements) then the function will bail out and return an error.
+ *
+ *    In addition to the two adjustable experimental params, there are other important cctx params.
+ *    - ZSTD_c_minMatch MUST be set as less than or equal to the smallest match generated by the match finder. It has a minimum value of ZSTD_MINMATCH_MIN.
+ *    - ZSTD_c_compressionLevel accordingly adjusts the strength of the entropy coder, as it would in typical compression.
+ *    - ZSTD_c_windowLog affects offset validation: this function will return an error at higher debug levels if a provided offset
+ *      is larger than what the spec allows for a given window log and dictionary (if present). See: doc/zstd_compression_format.md
+ *
+ * Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused.
+ * Note 2: Once we integrate ability to ingest repcodes, the explicit block delims mode must respect those repcodes exactly,
+ *         and cannot emit an RLE block that disagrees with the repcode history
+ * @return : final compressed size or a ZSTD error.
+ */
+ZSTDLIB_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize,
+                                  const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
+                                  const void* src, size_t srcSize);
+
+
+/*! ZSTD_writeSkippableFrame() :
+ * Generates a zstd skippable frame containing data given by src, and writes it to dst buffer.
+ *
+ * Skippable frames begin with a a 4-byte magic number. There are 16 possible choices of magic number,
+ * ranging from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15.
+ * As such, the parameter magicVariant controls the exact skippable frame magic number variant used, so
+ * the magic number used will be ZSTD_MAGIC_SKIPPABLE_START + magicVariant.
+ *
+ * Returns an error if destination buffer is not large enough, if the source size is not representable
+ * with a 4-byte unsigned int, or if the parameter magicVariant is greater than 15 (and therefore invalid).
+ *
+ * @return : number of bytes written or a ZSTD error.
+ */
+ZSTDLIB_API size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity,
+                                            const void* src, size_t srcSize, unsigned magicVariant);
+
+
+/***************************************
+*  Memory management
+***************************************/
+
+/*! ZSTD_estimate*() :
+ *  These functions make it possible to estimate memory usage
+ *  of a future {D,C}Ctx, before its creation.
+ *
+ *  ZSTD_estimateCCtxSize() will provide a memory budget large enough
+ *  for any compression level up to selected one.
+ *  Note : Unlike ZSTD_estimateCStreamSize*(), this estimate
+ *         does not include space for a window buffer.
+ *         Therefore, the estimation is only guaranteed for single-shot compressions, not streaming.
+ *  The estimate will assume the input may be arbitrarily large,
+ *  which is the worst case.
+ *
+ *  When srcSize can be bound by a known and rather "small" value,
+ *  this fact can be used to provide a tighter estimation
+ *  because the CCtx compression context will need less memory.
+ *  This tighter estimation can be provided by more advanced functions
+ *  ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(),
+ *  and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter().
+ *  Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits.
+ *
+ *  Note 2 : only single-threaded compression is supported.
+ *  ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1.
+ */
+ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel);
+ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
+ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params);
+ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void);
+
+/*! ZSTD_estimateCStreamSize() :
+ *  ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one.
+ *  It will also consider src size to be arbitrarily "large", which is worst case.
+ *  If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation.
+ *  ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
+ *  ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1.
+ *  Note : CStream size estimation is only correct for single-threaded compression.
+ *  ZSTD_DStream memory budget depends on window Size.
+ *  This information can be passed manually, using ZSTD_estimateDStreamSize,
+ *  or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame();
+ *  Note : if streaming is init with function ZSTD_init?Stream_usingDict(),
+ *         an internal ?Dict will be created, which additional size is not estimated here.
+ *         In this case, get total size by adding ZSTD_estimate?DictSize */
+ZSTDLIB_API size_t ZSTD_estimateCStreamSize(int compressionLevel);
+ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams);
+ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params);
+ZSTDLIB_API size_t ZSTD_estimateDStreamSize(size_t windowSize);
+ZSTDLIB_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize);
+
+/*! ZSTD_estimate?DictSize() :
+ *  ZSTD_estimateCDictSize() will bet that src size is relatively "small", and content is copied, like ZSTD_createCDict().
+ *  ZSTD_estimateCDictSize_advanced() makes it possible to control compression parameters precisely, like ZSTD_createCDict_advanced().
+ *  Note : dictionaries created by reference (`ZSTD_dlm_byRef`) are logically smaller.
+ */
+ZSTDLIB_API size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel);
+ZSTDLIB_API size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, ZSTD_dictLoadMethod_e dictLoadMethod);
+ZSTDLIB_API size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod);
+
+/*! ZSTD_initStatic*() :
+ *  Initialize an object using a pre-allocated fixed-size buffer.
+ *  workspace: The memory area to emplace the object into.
+ *             Provided pointer *must be 8-bytes aligned*.
+ *             Buffer must outlive object.
+ *  workspaceSize: Use ZSTD_estimate*Size() to determine
+ *                 how large workspace must be to support target scenario.
+ * @return : pointer to object (same address as workspace, just different type),
+ *           or NULL if error (size too small, incorrect alignment, etc.)
+ *  Note : zstd will never resize nor malloc() when using a static buffer.
+ *         If the object requires more memory than available,
+ *         zstd will just error out (typically ZSTD_error_memory_allocation).
+ *  Note 2 : there is no corresponding "free" function.
+ *           Since workspace is allocated externally, it must be freed externally too.
+ *  Note 3 : cParams : use ZSTD_getCParams() to convert a compression level
+ *           into its associated cParams.
+ *  Limitation 1 : currently not compatible with internal dictionary creation, triggered by
+ *                 ZSTD_CCtx_loadDictionary(), ZSTD_initCStream_usingDict() or ZSTD_initDStream_usingDict().
+ *  Limitation 2 : static cctx currently not compatible with multi-threading.
+ *  Limitation 3 : static dctx is incompatible with legacy support.
+ */
+ZSTDLIB_API ZSTD_CCtx*    ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize);
+ZSTDLIB_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize);    /**< same as ZSTD_initStaticCCtx() */
+
+ZSTDLIB_API ZSTD_DCtx*    ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize);
+ZSTDLIB_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize);    /**< same as ZSTD_initStaticDCtx() */
+
+ZSTDLIB_API const ZSTD_CDict* ZSTD_initStaticCDict(
+                                        void* workspace, size_t workspaceSize,
+                                        const void* dict, size_t dictSize,
+                                        ZSTD_dictLoadMethod_e dictLoadMethod,
+                                        ZSTD_dictContentType_e dictContentType,
+                                        ZSTD_compressionParameters cParams);
+
+ZSTDLIB_API const ZSTD_DDict* ZSTD_initStaticDDict(
+                                        void* workspace, size_t workspaceSize,
+                                        const void* dict, size_t dictSize,
+                                        ZSTD_dictLoadMethod_e dictLoadMethod,
+                                        ZSTD_dictContentType_e dictContentType);
+
+
+/*! Custom memory allocation :
+ *  These prototypes make it possible to pass your own allocation/free functions.
+ *  ZSTD_customMem is provided at creation time, using ZSTD_create*_advanced() variants listed below.
+ *  All allocation/free operations will be completed using these custom variants instead of regular <stdlib.h> ones.
+ */
+typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size);
+typedef void  (*ZSTD_freeFunction) (void* opaque, void* address);
+typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem;
+static
+#ifdef __GNUC__
+__attribute__((__unused__))
+#endif
+ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL };  /**< this constant defers to stdlib's functions */
+
+ZSTDLIB_API ZSTD_CCtx*    ZSTD_createCCtx_advanced(ZSTD_customMem customMem);
+ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
+ZSTDLIB_API ZSTD_DCtx*    ZSTD_createDCtx_advanced(ZSTD_customMem customMem);
+ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem);
+
+ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize,
+                                                  ZSTD_dictLoadMethod_e dictLoadMethod,
+                                                  ZSTD_dictContentType_e dictContentType,
+                                                  ZSTD_compressionParameters cParams,
+                                                  ZSTD_customMem customMem);
+
+/* ! Thread pool :
+ * These prototypes make it possible to share a thread pool among multiple compression contexts.
+ * This can limit resources for applications with multiple threads where each one uses
+ * a threaded compression mode (via ZSTD_c_nbWorkers parameter).
+ * ZSTD_createThreadPool creates a new thread pool with a given number of threads.
+ * Note that the lifetime of such pool must exist while being used.
+ * ZSTD_CCtx_refThreadPool assigns a thread pool to a context (use NULL argument value
+ * to use an internal thread pool).
+ * ZSTD_freeThreadPool frees a thread pool, accepts NULL pointer.
+ */
+typedef struct POOL_ctx_s ZSTD_threadPool;
+ZSTDLIB_API ZSTD_threadPool* ZSTD_createThreadPool(size_t numThreads);
+ZSTDLIB_API void ZSTD_freeThreadPool (ZSTD_threadPool* pool);  /* accept NULL pointer */
+ZSTDLIB_API size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool);
+
+
+/*
+ * This API is temporary and is expected to change or disappear in the future!
+ */
+ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2(
+    const void* dict, size_t dictSize,
+    ZSTD_dictLoadMethod_e dictLoadMethod,
+    ZSTD_dictContentType_e dictContentType,
+    const ZSTD_CCtx_params* cctxParams,
+    ZSTD_customMem customMem);
+
+ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(
+    const void* dict, size_t dictSize,
+    ZSTD_dictLoadMethod_e dictLoadMethod,
+    ZSTD_dictContentType_e dictContentType,
+    ZSTD_customMem customMem);
+
+
+/***************************************
+*  Advanced compression functions
+***************************************/
+
+/*! ZSTD_createCDict_byReference() :
+ *  Create a digested dictionary for compression
+ *  Dictionary content is just referenced, not duplicated.
+ *  As a consequence, `dictBuffer` **must** outlive CDict,
+ *  and its content must remain unmodified throughout the lifetime of CDict.
+ *  note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */
+ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
+
+/*! ZSTD_getCParams() :
+ * @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize.
+ * `estimatedSrcSize` value is optional, select 0 if not known */
+ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
+
+/*! ZSTD_getParams() :
+ *  same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`.
+ *  All fields of `ZSTD_frameParameters` are set to default : contentSize=1, checksum=0, noDictID=0 */
+ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
+
+/*! ZSTD_checkCParams() :
+ *  Ensure param values remain within authorized range.
+ * @return 0 on success, or an error code (can be checked with ZSTD_isError()) */
+ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params);
+
+/*! ZSTD_adjustCParams() :
+ *  optimize params for a given `srcSize` and `dictSize`.
+ * `srcSize` can be unknown, in which case use ZSTD_CONTENTSIZE_UNKNOWN.
+ * `dictSize` must be `0` when there is no dictionary.
+ *  cPar can be invalid : all parameters will be clamped within valid range in the @return struct.
+ *  This function never fails (wide contract) */
+ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize);
+
+/*! ZSTD_compress_advanced() :
+ *  Note : this function is now DEPRECATED.
+ *         It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters.
+ *  This prototype will generate compilation warnings. */
+ZSTD_DEPRECATED("use ZSTD_compress2")
+size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx,
+                                          void* dst, size_t dstCapacity,
+                                    const void* src, size_t srcSize,
+                                    const void* dict,size_t dictSize,
+                                          ZSTD_parameters params);
+
+/*! ZSTD_compress_usingCDict_advanced() :
+ *  Note : this function is now DEPRECATED.
+ *         It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters.
+ *  This prototype will generate compilation warnings. */
+ZSTD_DEPRECATED("use ZSTD_compress2 with ZSTD_CCtx_loadDictionary")
+size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
+                                              void* dst, size_t dstCapacity,
+                                        const void* src, size_t srcSize,
+                                        const ZSTD_CDict* cdict,
+                                              ZSTD_frameParameters fParams);
+
+
+/*! ZSTD_CCtx_loadDictionary_byReference() :
+ *  Same as ZSTD_CCtx_loadDictionary(), but dictionary content is referenced, instead of being copied into CCtx.
+ *  It saves some memory, but also requires that `dict` outlives its usage within `cctx` */
+ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
+
+/*! ZSTD_CCtx_loadDictionary_advanced() :
+ *  Same as ZSTD_CCtx_loadDictionary(), but gives finer control over
+ *  how to load the dictionary (by copy ? by reference ?)
+ *  and how to interpret it (automatic ? force raw mode ? full mode only ?) */
+ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType);
+
+/*! ZSTD_CCtx_refPrefix_advanced() :
+ *  Same as ZSTD_CCtx_refPrefix(), but gives finer control over
+ *  how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */
+ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
+
+/* ===   experimental parameters   === */
+/* these parameters can be used with ZSTD_setParameter()
+ * they are not guaranteed to remain supported in the future */
+
+ /* Enables rsyncable mode,
+  * which makes compressed files more rsync friendly
+  * by adding periodic synchronization points to the compressed data.
+  * The target average block size is ZSTD_c_jobSize / 2.
+  * It's possible to modify the job size to increase or decrease
+  * the granularity of the synchronization point.
+  * Once the jobSize is smaller than the window size,
+  * it will result in compression ratio degradation.
+  * NOTE 1: rsyncable mode only works when multithreading is enabled.
+  * NOTE 2: rsyncable performs poorly in combination with long range mode,
+  * since it will decrease the effectiveness of synchronization points,
+  * though mileage may vary.
+  * NOTE 3: Rsyncable mode limits maximum compression speed to ~400 MB/s.
+  * If the selected compression level is already running significantly slower,
+  * the overall speed won't be significantly impacted.
+  */
+ #define ZSTD_c_rsyncable ZSTD_c_experimentalParam1
+
+/* Select a compression format.
+ * The value must be of type ZSTD_format_e.
+ * See ZSTD_format_e enum definition for details */
+#define ZSTD_c_format ZSTD_c_experimentalParam2
+
+/* Force back-reference distances to remain < windowSize,
+ * even when referencing into Dictionary content (default:0) */
+#define ZSTD_c_forceMaxWindow ZSTD_c_experimentalParam3
+
+/* Controls whether the contents of a CDict
+ * are used in place, or copied into the working context.
+ * Accepts values from the ZSTD_dictAttachPref_e enum.
+ * See the comments on that enum for an explanation of the feature. */
+#define ZSTD_c_forceAttachDict ZSTD_c_experimentalParam4
+
+/* Controls how the literals are compressed (default is auto).
+ * The value must be of type ZSTD_literalCompressionMode_e.
+ * See ZSTD_literalCompressionMode_e enum definition for details.
+ */
+#define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5
+
+/* Tries to fit compressed block size to be around targetCBlockSize.
+ * No target when targetCBlockSize == 0.
+ * There is no guarantee on compressed block size (default:0) */
+#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6
+
+/* User's best guess of source size.
+ * Hint is not valid when srcSizeHint == 0.
+ * There is no guarantee that hint is close to actual source size,
+ * but compression ratio may regress significantly if guess considerably underestimates */
+#define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7
+
+/* Controls whether the new and experimental "dedicated dictionary search
+ * structure" can be used. This feature is still rough around the edges, be
+ * prepared for surprising behavior!
+ *
+ * How to use it:
+ *
+ * When using a CDict, whether to use this feature or not is controlled at
+ * CDict creation, and it must be set in a CCtxParams set passed into that
+ * construction (via ZSTD_createCDict_advanced2()). A compression will then
+ * use the feature or not based on how the CDict was constructed; the value of
+ * this param, set in the CCtx, will have no effect.
+ *
+ * However, when a dictionary buffer is passed into a CCtx, such as via
+ * ZSTD_CCtx_loadDictionary(), this param can be set on the CCtx to control
+ * whether the CDict that is created internally can use the feature or not.
+ *
+ * What it does:
+ *
+ * Normally, the internal data structures of the CDict are analogous to what
+ * would be stored in a CCtx after compressing the contents of a dictionary.
+ * To an approximation, a compression using a dictionary can then use those
+ * data structures to simply continue what is effectively a streaming
+ * compression where the simulated compression of the dictionary left off.
+ * Which is to say, the search structures in the CDict are normally the same
+ * format as in the CCtx.
+ *
+ * It is possible to do better, since the CDict is not like a CCtx: the search
+ * structures are written once during CDict creation, and then are only read
+ * after that, while the search structures in the CCtx are both read and
+ * written as the compression goes along. This means we can choose a search
+ * structure for the dictionary that is read-optimized.
+ *
+ * This feature enables the use of that different structure.
+ *
+ * Note that some of the members of the ZSTD_compressionParameters struct have
+ * different semantics and constraints in the dedicated search structure. It is
+ * highly recommended that you simply set a compression level in the CCtxParams
+ * you pass into the CDict creation call, and avoid messing with the cParams
+ * directly.
+ *
+ * Effects:
+ *
+ * This will only have any effect when the selected ZSTD_strategy
+ * implementation supports this feature. Currently, that's limited to
+ * ZSTD_greedy, ZSTD_lazy, and ZSTD_lazy2.
+ *
+ * Note that this means that the CDict tables can no longer be copied into the
+ * CCtx, so the dict attachment mode ZSTD_dictForceCopy will no longer be
+ * useable. The dictionary can only be attached or reloaded.
+ *
+ * In general, you should expect compression to be faster--sometimes very much
+ * so--and CDict creation to be slightly slower. Eventually, we will probably
+ * make this mode the default.
+ */
+#define ZSTD_c_enableDedicatedDictSearch ZSTD_c_experimentalParam8
+
+/* ZSTD_c_stableInBuffer
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable.
+ *
+ * Tells the compressor that the ZSTD_inBuffer will ALWAYS be the same
+ * between calls, except for the modifications that zstd makes to pos (the
+ * caller must not modify pos). This is checked by the compressor, and
+ * compression will fail if it ever changes. This means the only flush
+ * mode that makes sense is ZSTD_e_end, so zstd will error if ZSTD_e_end
+ * is not used. The data in the ZSTD_inBuffer in the range [src, src + pos)
+ * MUST not be modified during compression or you will get data corruption.
+ *
+ * When this flag is enabled zstd won't allocate an input window buffer,
+ * because the user guarantees it can reference the ZSTD_inBuffer until
+ * the frame is complete. But, it will still allocate an output buffer
+ * large enough to fit a block (see ZSTD_c_stableOutBuffer). This will also
+ * avoid the memcpy() from the input buffer to the input window buffer.
+ *
+ * NOTE: ZSTD_compressStream2() will error if ZSTD_e_end is not used.
+ * That means this flag cannot be used with ZSTD_compressStream().
+ *
+ * NOTE: So long as the ZSTD_inBuffer always points to valid memory, using
+ * this flag is ALWAYS memory safe, and will never access out-of-bounds
+ * memory. However, compression WILL fail if you violate the preconditions.
+ *
+ * WARNING: The data in the ZSTD_inBuffer in the range [dst, dst + pos) MUST
+ * not be modified during compression or you will get data corruption. This
+ * is because zstd needs to reference data in the ZSTD_inBuffer to find
+ * matches. Normally zstd maintains its own window buffer for this purpose,
+ * but passing this flag tells zstd to use the user provided buffer.
+ */
+#define ZSTD_c_stableInBuffer ZSTD_c_experimentalParam9
+
+/* ZSTD_c_stableOutBuffer
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable.
+ *
+ * Tells he compressor that the ZSTD_outBuffer will not be resized between
+ * calls. Specifically: (out.size - out.pos) will never grow. This gives the
+ * compressor the freedom to say: If the compressed data doesn't fit in the
+ * output buffer then return ZSTD_error_dstSizeTooSmall. This allows us to
+ * always decompress directly into the output buffer, instead of decompressing
+ * into an internal buffer and copying to the output buffer.
+ *
+ * When this flag is enabled zstd won't allocate an output buffer, because
+ * it can write directly to the ZSTD_outBuffer. It will still allocate the
+ * input window buffer (see ZSTD_c_stableInBuffer).
+ *
+ * Zstd will check that (out.size - out.pos) never grows and return an error
+ * if it does. While not strictly necessary, this should prevent surprises.
+ */
+#define ZSTD_c_stableOutBuffer ZSTD_c_experimentalParam10
+
+/* ZSTD_c_blockDelimiters
+ * Default is 0 == ZSTD_sf_noBlockDelimiters.
+ *
+ * For use with sequence compression API: ZSTD_compressSequences().
+ *
+ * Designates whether or not the given array of ZSTD_Sequence contains block delimiters
+ * and last literals, which are defined as sequences with offset == 0 and matchLength == 0.
+ * See the definition of ZSTD_Sequence for more specifics.
+ */
+#define ZSTD_c_blockDelimiters ZSTD_c_experimentalParam11
+
+/* ZSTD_c_validateSequences
+ * Default is 0 == disabled. Set to 1 to enable sequence validation.
+ *
+ * For use with sequence compression API: ZSTD_compressSequences().
+ * Designates whether or not we validate sequences provided to ZSTD_compressSequences()
+ * during function execution.
+ *
+ * Without validation, providing a sequence that does not conform to the zstd spec will cause
+ * undefined behavior, and may produce a corrupted block.
+ *
+ * With validation enabled, a if sequence is invalid (see doc/zstd_compression_format.md for
+ * specifics regarding offset/matchlength requirements) then the function will bail out and
+ * return an error.
+ *
+ */
+#define ZSTD_c_validateSequences ZSTD_c_experimentalParam12
+
+/* ZSTD_c_splitBlocks
+ * Default is 0 == disabled. Set to 1 to enable block splitting.
+ *
+ * Will attempt to split blocks in order to improve compression ratio at the cost of speed.
+ */
+#define ZSTD_c_splitBlocks ZSTD_c_experimentalParam13
+
+/* ZSTD_c_useRowMatchFinder
+ * Default is ZSTD_urm_auto.
+ * Controlled with ZSTD_useRowMatchFinderMode_e enum.
+ *
+ * By default, in ZSTD_urm_auto, when finalizing the compression parameters, the library
+ * will decide at runtime whether to use the row-based matchfinder based on support for SIMD
+ * instructions as well as the windowLog.
+ *
+ * Set to ZSTD_urm_disableRowMatchFinder to never use row-based matchfinder.
+ * Set to ZSTD_urm_enableRowMatchFinder to force usage of row-based matchfinder.
+ */
+#define ZSTD_c_useRowMatchFinder ZSTD_c_experimentalParam14
+
+/* ZSTD_c_deterministicRefPrefix
+ * Default is 0 == disabled. Set to 1 to enable.
+ *
+ * Zstd produces different results for prefix compression when the prefix is
+ * directly adjacent to the data about to be compressed vs. when it isn't.
+ * This is because zstd detects that the two buffers are contiguous and it can
+ * use a more efficient match finding algorithm. However, this produces different
+ * results than when the two buffers are non-contiguous. This flag forces zstd
+ * to always load the prefix in non-contiguous mode, even if it happens to be
+ * adjacent to the data, to guarantee determinism.
+ *
+ * If you really care about determinism when using a dictionary or prefix,
+ * like when doing delta compression, you should select this option. It comes
+ * at a speed penalty of about ~2.5% if the dictionary and data happened to be
+ * contiguous, and is free if they weren't contiguous. We don't expect that
+ * intentionally making the dictionary and data contiguous will be worth the
+ * cost to memcpy() the data.
+ */
+#define ZSTD_c_deterministicRefPrefix ZSTD_c_experimentalParam15
+
+/*! ZSTD_CCtx_getParameter() :
+ *  Get the requested compression parameter value, selected by enum ZSTD_cParameter,
+ *  and store it into int* value.
+ * @return : 0, or an error code (which can be tested with ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_CCtx_getParameter(const ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value);
+
+
+/*! ZSTD_CCtx_params :
+ *  Quick howto :
+ *  - ZSTD_createCCtxParams() : Create a ZSTD_CCtx_params structure
+ *  - ZSTD_CCtxParams_setParameter() : Push parameters one by one into
+ *                                     an existing ZSTD_CCtx_params structure.
+ *                                     This is similar to
+ *                                     ZSTD_CCtx_setParameter().
+ *  - ZSTD_CCtx_setParametersUsingCCtxParams() : Apply parameters to
+ *                                    an existing CCtx.
+ *                                    These parameters will be applied to
+ *                                    all subsequent frames.
+ *  - ZSTD_compressStream2() : Do compression using the CCtx.
+ *  - ZSTD_freeCCtxParams() : Free the memory, accept NULL pointer.
+ *
+ *  This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams()
+ *  for static allocation of CCtx for single-threaded compression.
+ */
+ZSTDLIB_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void);
+ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);  /* accept NULL pointer */
+
+/*! ZSTD_CCtxParams_reset() :
+ *  Reset params to default values.
+ */
+ZSTDLIB_API size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params);
+
+/*! ZSTD_CCtxParams_init() :
+ *  Initializes the compression parameters of cctxParams according to
+ *  compression level. All other parameters are reset to their default values.
+ */
+ZSTDLIB_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel);
+
+/*! ZSTD_CCtxParams_init_advanced() :
+ *  Initializes the compression and frame parameters of cctxParams according to
+ *  params. All other parameters are reset to their default values.
+ */
+ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params);
+
+/*! ZSTD_CCtxParams_setParameter() : Requires v1.4.0+
+ *  Similar to ZSTD_CCtx_setParameter.
+ *  Set one compression parameter, selected by enum ZSTD_cParameter.
+ *  Parameters must be applied to a ZSTD_CCtx using
+ *  ZSTD_CCtx_setParametersUsingCCtxParams().
+ * @result : a code representing success or failure (which can be tested with
+ *           ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value);
+
+/*! ZSTD_CCtxParams_getParameter() :
+ * Similar to ZSTD_CCtx_getParameter.
+ * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_CCtxParams_getParameter(const ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value);
+
+/*! ZSTD_CCtx_setParametersUsingCCtxParams() :
+ *  Apply a set of ZSTD_CCtx_params to the compression context.
+ *  This can be done even after compression is started,
+ *    if nbWorkers==0, this will have no impact until a new compression is started.
+ *    if nbWorkers>=1, new parameters will be picked up at next job,
+ *       with a few restrictions (windowLog, pledgedSrcSize, nbWorkers, jobSize, and overlapLog are not updated).
+ */
+ZSTDLIB_API size_t ZSTD_CCtx_setParametersUsingCCtxParams(
+        ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params);
+
+/*! ZSTD_compressStream2_simpleArgs() :
+ *  Same as ZSTD_compressStream2(),
+ *  but using only integral types as arguments.
+ *  This variant might be helpful for binders from dynamic languages
+ *  which have troubles handling structures containing memory pointers.
+ */
+ZSTDLIB_API size_t ZSTD_compressStream2_simpleArgs (
+                            ZSTD_CCtx* cctx,
+                            void* dst, size_t dstCapacity, size_t* dstPos,
+                      const void* src, size_t srcSize, size_t* srcPos,
+                            ZSTD_EndDirective endOp);
+
+
+/***************************************
+*  Advanced decompression functions
+***************************************/
+
+/*! ZSTD_isFrame() :
+ *  Tells if the content of `buffer` starts with a valid Frame Identifier.
+ *  Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
+ *  Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled.
+ *  Note 3 : Skippable Frame Identifiers are considered valid. */
+ZSTDLIB_API unsigned ZSTD_isFrame(const void* buffer, size_t size);
+
+/*! ZSTD_createDDict_byReference() :
+ *  Create a digested dictionary, ready to start decompression operation without startup delay.
+ *  Dictionary content is referenced, and therefore stays in dictBuffer.
+ *  It is important that dictBuffer outlives DDict,
+ *  it must remain read accessible throughout the lifetime of DDict */
+ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize);
+
+/*! ZSTD_DCtx_loadDictionary_byReference() :
+ *  Same as ZSTD_DCtx_loadDictionary(),
+ *  but references `dict` content instead of copying it into `dctx`.
+ *  This saves memory if `dict` remains around.,
+ *  However, it's imperative that `dict` remains accessible (and unmodified) while being used, so it must outlive decompression. */
+ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
+
+/*! ZSTD_DCtx_loadDictionary_advanced() :
+ *  Same as ZSTD_DCtx_loadDictionary(),
+ *  but gives direct control over
+ *  how to load the dictionary (by copy ? by reference ?)
+ *  and how to interpret it (automatic ? force raw mode ? full mode only ?). */
+ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType);
+
+/*! ZSTD_DCtx_refPrefix_advanced() :
+ *  Same as ZSTD_DCtx_refPrefix(), but gives finer control over
+ *  how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */
+ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
+
+/*! ZSTD_DCtx_setMaxWindowSize() :
+ *  Refuses allocating internal buffers for frames requiring a window size larger than provided limit.
+ *  This protects a decoder context from reserving too much memory for itself (potential attack scenario).
+ *  This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode.
+ *  By default, a decompression context accepts all window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT)
+ * @return : 0, or an error code (which can be tested using ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize);
+
+/*! ZSTD_DCtx_getParameter() :
+ *  Get the requested decompression parameter value, selected by enum ZSTD_dParameter,
+ *  and store it into int* value.
+ * @return : 0, or an error code (which can be tested with ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value);
+
+/* ZSTD_d_format
+ * experimental parameter,
+ * allowing selection between ZSTD_format_e input compression formats
+ */
+#define ZSTD_d_format ZSTD_d_experimentalParam1
+/* ZSTD_d_stableOutBuffer
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable.
+ *
+ * Tells the decompressor that the ZSTD_outBuffer will ALWAYS be the same
+ * between calls, except for the modifications that zstd makes to pos (the
+ * caller must not modify pos). This is checked by the decompressor, and
+ * decompression will fail if it ever changes. Therefore the ZSTD_outBuffer
+ * MUST be large enough to fit the entire decompressed frame. This will be
+ * checked when the frame content size is known. The data in the ZSTD_outBuffer
+ * in the range [dst, dst + pos) MUST not be modified during decompression
+ * or you will get data corruption.
+ *
+ * When this flags is enabled zstd won't allocate an output buffer, because
+ * it can write directly to the ZSTD_outBuffer, but it will still allocate
+ * an input buffer large enough to fit any compressed block. This will also
+ * avoid the memcpy() from the internal output buffer to the ZSTD_outBuffer.
+ * If you need to avoid the input buffer allocation use the buffer-less
+ * streaming API.
+ *
+ * NOTE: So long as the ZSTD_outBuffer always points to valid memory, using
+ * this flag is ALWAYS memory safe, and will never access out-of-bounds
+ * memory. However, decompression WILL fail if you violate the preconditions.
+ *
+ * WARNING: The data in the ZSTD_outBuffer in the range [dst, dst + pos) MUST
+ * not be modified during decompression or you will get data corruption. This
+ * is because zstd needs to reference data in the ZSTD_outBuffer to regenerate
+ * matches. Normally zstd maintains its own buffer for this purpose, but passing
+ * this flag tells zstd to use the user provided buffer.
+ */
+#define ZSTD_d_stableOutBuffer ZSTD_d_experimentalParam2
+
+/* ZSTD_d_forceIgnoreChecksum
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable
+ *
+ * Tells the decompressor to skip checksum validation during decompression, regardless
+ * of whether checksumming was specified during compression. This offers some
+ * slight performance benefits, and may be useful for debugging.
+ * Param has values of type ZSTD_forceIgnoreChecksum_e
+ */
+#define ZSTD_d_forceIgnoreChecksum ZSTD_d_experimentalParam3
+
+/* ZSTD_d_refMultipleDDicts
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable
+ *
+ * If enabled and dctx is allocated on the heap, then additional memory will be allocated
+ * to store references to multiple ZSTD_DDict. That is, multiple calls of ZSTD_refDDict()
+ * using a given ZSTD_DCtx, rather than overwriting the previous DDict reference, will instead
+ * store all references. At decompression time, the appropriate dictID is selected
+ * from the set of DDicts based on the dictID in the frame.
+ *
+ * Usage is simply calling ZSTD_refDDict() on multiple dict buffers.
+ *
+ * Param has values of byte ZSTD_refMultipleDDicts_e
+ *
+ * WARNING: Enabling this parameter and calling ZSTD_DCtx_refDDict(), will trigger memory
+ * allocation for the hash table. ZSTD_freeDCtx() also frees this memory.
+ * Memory is allocated as per ZSTD_DCtx::customMem.
+ *
+ * Although this function allocates memory for the table, the user is still responsible for
+ * memory management of the underlying ZSTD_DDict* themselves.
+ */
+#define ZSTD_d_refMultipleDDicts ZSTD_d_experimentalParam4
+
+
+/*! ZSTD_DCtx_setFormat() :
+ *  This function is REDUNDANT. Prefer ZSTD_DCtx_setParameter().
+ *  Instruct the decoder context about what kind of data to decode next.
+ *  This instruction is mandatory to decode data without a fully-formed header,
+ *  such ZSTD_f_zstd1_magicless for example.
+ * @return : 0, or an error code (which can be tested using ZSTD_isError()). */
+ZSTD_DEPRECATED("use ZSTD_DCtx_setParameter() instead")
+size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format);
+
+/*! ZSTD_decompressStream_simpleArgs() :
+ *  Same as ZSTD_decompressStream(),
+ *  but using only integral types as arguments.
+ *  This can be helpful for binders from dynamic languages
+ *  which have troubles handling structures containing memory pointers.
+ */
+ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs (
+                            ZSTD_DCtx* dctx,
+                            void* dst, size_t dstCapacity, size_t* dstPos,
+                      const void* src, size_t srcSize, size_t* srcPos);
+
+
+/********************************************************************
+*  Advanced streaming functions
+*  Warning : most of these functions are now redundant with the Advanced API.
+*  Once Advanced API reaches "stable" status,
+*  redundant functions will be deprecated, and then at some point removed.
+********************************************************************/
+
+/*=====   Advanced Streaming compression functions  =====*/
+
+/*! ZSTD_initCStream_srcSize() :
+ * This function is DEPRECATED, and equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
+ *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
+ *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ *
+ * pledgedSrcSize must be correct. If it is not known at init time, use
+ * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs,
+ * "0" also disables frame content size field. It may be enabled in the future.
+ * This prototype will generate compilation warnings.
+ */
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs,
+                         int compressionLevel,
+                         unsigned long long pledgedSrcSize);
+
+/*! ZSTD_initCStream_usingDict() :
+ * This function is DEPRECATED, and is equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
+ *     ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
+ *
+ * Creates of an internal CDict (incompatible with static CCtx), except if
+ * dict == NULL or dictSize < 8, in which case no dict is used.
+ * Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if
+ * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy.
+ * This prototype will generate compilation warnings.
+ */
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs,
+                     const void* dict, size_t dictSize,
+                           int compressionLevel);
+
+/*! ZSTD_initCStream_advanced() :
+ * This function is DEPRECATED, and is approximately equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     // Pseudocode: Set each zstd parameter and leave the rest as-is.
+ *     for ((param, value) : params) {
+ *         ZSTD_CCtx_setParameter(zcs, param, value);
+ *     }
+ *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ *     ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
+ *
+ * dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy.
+ * pledgedSrcSize must be correct.
+ * If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.
+ * This prototype will generate compilation warnings.
+ */
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
+                    const void* dict, size_t dictSize,
+                          ZSTD_parameters params,
+                          unsigned long long pledgedSrcSize);
+
+/*! ZSTD_initCStream_usingCDict() :
+ * This function is DEPRECATED, and equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_refCDict(zcs, cdict);
+ * 
+ * note : cdict will just be referenced, and must outlive compression session
+ * This prototype will generate compilation warnings.
+ */
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);
+
+/*! ZSTD_initCStream_usingCDict_advanced() :
+ *   This function is DEPRECATED, and is approximately equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     // Pseudocode: Set each zstd frame parameter and leave the rest as-is.
+ *     for ((fParam, value) : fParams) {
+ *         ZSTD_CCtx_setParameter(zcs, fParam, value);
+ *     }
+ *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ *     ZSTD_CCtx_refCDict(zcs, cdict);
+ *
+ * same as ZSTD_initCStream_usingCDict(), with control over frame parameters.
+ * pledgedSrcSize must be correct. If srcSize is not known at init time, use
+ * value ZSTD_CONTENTSIZE_UNKNOWN.
+ * This prototype will generate compilation warnings.
+ */
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
+                               const ZSTD_CDict* cdict,
+                                     ZSTD_frameParameters fParams,
+                                     unsigned long long pledgedSrcSize);
+
+/*! ZSTD_resetCStream() :
+ * This function is DEPRECATED, and is equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ * Note: ZSTD_resetCStream() interprets pledgedSrcSize == 0 as ZSTD_CONTENTSIZE_UNKNOWN, but
+ *       ZSTD_CCtx_setPledgedSrcSize() does not do the same, so ZSTD_CONTENTSIZE_UNKNOWN must be
+ *       explicitly specified.
+ *
+ *  start a new frame, using same parameters from previous frame.
+ *  This is typically useful to skip dictionary loading stage, since it will re-use it in-place.
+ *  Note that zcs must be init at least once before using ZSTD_resetCStream().
+ *  If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN.
+ *  If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end.
+ *  For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs,
+ *  but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead.
+ * @return : 0, or an error code (which can be tested using ZSTD_isError())
+ *  This prototype will generate compilation warnings.
+ */
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
+size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);
+
+
+typedef struct {
+    unsigned long long ingested;   /* nb input bytes read and buffered */
+    unsigned long long consumed;   /* nb input bytes actually compressed */
+    unsigned long long produced;   /* nb of compressed bytes generated and buffered */
+    unsigned long long flushed;    /* nb of compressed bytes flushed : not provided; can be tracked from caller side */
+    unsigned currentJobID;         /* MT only : latest started job nb */
+    unsigned nbActiveWorkers;      /* MT only : nb of workers actively compressing at probe time */
+} ZSTD_frameProgression;
+
+/* ZSTD_getFrameProgression() :
+ * tells how much data has been ingested (read from input)
+ * consumed (input actually compressed) and produced (output) for current frame.
+ * Note : (ingested - consumed) is amount of input data buffered internally, not yet compressed.
+ * Aggregates progression inside active worker threads.
+ */
+ZSTDLIB_API ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx);
+
+/*! ZSTD_toFlushNow() :
+ *  Tell how many bytes are ready to be flushed immediately.
+ *  Useful for multithreading scenarios (nbWorkers >= 1).
+ *  Probe the oldest active job, defined as oldest job not yet entirely flushed,
+ *  and check its output buffer.
+ * @return : amount of data stored in oldest job and ready to be flushed immediately.
+ *  if @return == 0, it means either :
+ *  + there is no active job (could be checked with ZSTD_frameProgression()), or
+ *  + oldest job is still actively compressing data,
+ *    but everything it has produced has also been flushed so far,
+ *    therefore flush speed is limited by production speed of oldest job
+ *    irrespective of the speed of concurrent (and newer) jobs.
+ */
+ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx);
+
+
+/*=====   Advanced Streaming decompression functions  =====*/
+
+/*!
+ * This function is deprecated, and is equivalent to:
+ *
+ *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+ *     ZSTD_DCtx_loadDictionary(zds, dict, dictSize);
+ *
+ * note: no dictionary will be used if dict == NULL or dictSize < 8
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
+
+/*!
+ * This function is deprecated, and is equivalent to:
+ *
+ *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+ *     ZSTD_DCtx_refDDict(zds, ddict);
+ *
+ * note : ddict is referenced, it must outlive decompression session
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict);
+
+/*!
+ * This function is deprecated, and is equivalent to:
+ *
+ *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+ *
+ * re-use decompression parameters from previous init; saves dictionary loading
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);
+
+
+/*********************************************************************
+*  Buffer-less and synchronous inner streaming functions
+*
+*  This is an advanced API, giving full control over buffer management, for users which need direct control over memory.
+*  But it's also a complex one, with several restrictions, documented below.
+*  Prefer normal streaming API for an easier experience.
+********************************************************************* */
+
+/**
+  Buffer-less streaming compression (synchronous mode)
+
+  A ZSTD_CCtx object is required to track streaming operations.
+  Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource.
+  ZSTD_CCtx object can be re-used multiple times within successive compression operations.
+
+  Start by initializing a context.
+  Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression.
+  It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx()
+
+  Then, consume your input using ZSTD_compressContinue().
+  There are some important considerations to keep in mind when using this advanced function :
+  - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffers only.
+  - Interface is synchronous : input is consumed entirely and produces 1+ compressed blocks.
+  - Caller must ensure there is enough space in `dst` to store compressed data under worst case scenario.
+    Worst case evaluation is provided by ZSTD_compressBound().
+    ZSTD_compressContinue() doesn't guarantee recover after a failed compression.
+  - ZSTD_compressContinue() presumes prior input ***is still accessible and unmodified*** (up to maximum distance size, see WindowLog).
+    It remembers all previous contiguous blocks, plus one separated memory segment (which can itself consists of multiple contiguous blocks)
+  - ZSTD_compressContinue() detects that prior input has been overwritten when `src` buffer overlaps.
+    In which case, it will "discard" the relevant memory section from its history.
+
+  Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum.
+  It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame.
+  Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders.
+
+  `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again.
+*/
+
+/*=====   Buffer-less streaming compression functions  =====*/
+ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
+ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
+ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /**< note: fails if cdict==NULL */
+ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**<  note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */
+
+ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+/* The ZSTD_compressBegin_advanced() and ZSTD_compressBegin_usingCDict_advanced() are now DEPRECATED and will generate a compiler warning */
+ZSTD_DEPRECATED("use advanced API to access custom parameters")
+size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */
+ZSTD_DEPRECATED("use advanced API to access custom parameters")
+size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize);   /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */
+/**
+  Buffer-less streaming decompression (synchronous mode)
+
+  A ZSTD_DCtx object is required to track streaming operations.
+  Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it.
+  A ZSTD_DCtx object can be re-used multiple times.
+
+  First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader().
+  Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough.
+  Data fragment must be large enough to ensure successful decoding.
+ `ZSTD_frameHeaderSize_max` bytes is guaranteed to always be large enough.
+  @result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled.
+           >0 : `srcSize` is too small, please provide at least @result bytes on next attempt.
+           errorCode, which can be tested using ZSTD_isError().
+
+  It fills a ZSTD_frameHeader structure with important information to correctly decode the frame,
+  such as the dictionary ID, content size, or maximum back-reference distance (`windowSize`).
+  Note that these values could be wrong, either because of data corruption, or because a 3rd party deliberately spoofs false information.
+  As a consequence, check that values remain within valid application range.
+  For example, do not allocate memory blindly, check that `windowSize` is within expectation.
+  Each application can set its own limits, depending on local restrictions.
+  For extended interoperability, it is recommended to support `windowSize` of at least 8 MB.
+
+  ZSTD_decompressContinue() needs previous data blocks during decompression, up to `windowSize` bytes.
+  ZSTD_decompressContinue() is very sensitive to contiguity,
+  if 2 blocks don't follow each other, make sure that either the compressor breaks contiguity at the same place,
+  or that previous contiguous segment is large enough to properly handle maximum back-reference distance.
+  There are multiple ways to guarantee this condition.
+
+  The most memory efficient way is to use a round buffer of sufficient size.
+  Sufficient size is determined by invoking ZSTD_decodingBufferSize_min(),
+  which can @return an error code if required value is too large for current system (in 32-bits mode).
+  In a round buffer methodology, ZSTD_decompressContinue() decompresses each block next to previous one,
+  up to the moment there is not enough room left in the buffer to guarantee decoding another full block,
+  which maximum size is provided in `ZSTD_frameHeader` structure, field `blockSizeMax`.
+  At which point, decoding can resume from the beginning of the buffer.
+  Note that already decoded data stored in the buffer should be flushed before being overwritten.
+
+  There are alternatives possible, for example using two or more buffers of size `windowSize` each, though they consume more memory.
+
+  Finally, if you control the compression process, you can also ignore all buffer size rules,
+  as long as the encoder and decoder progress in "lock-step",
+  aka use exactly the same buffer sizes, break contiguity at the same place, etc.
+
+  Once buffers are setup, start decompression, with ZSTD_decompressBegin().
+  If decompression requires a dictionary, use ZSTD_decompressBegin_usingDict() or ZSTD_decompressBegin_usingDDict().
+
+  Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail.
+
+ @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity).
+  It can be zero : it just means ZSTD_decompressContinue() has decoded some metadata item.
+  It can also be an error code, which can be tested with ZSTD_isError().
+
+  A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero.
+  Context can then be reset to start a new decompression.
+
+  Note : it's possible to know if next input to present is a header or a block, using ZSTD_nextInputType().
+  This information is not required to properly decode a frame.
+
+  == Special case : skippable frames ==
+
+  Skippable frames allow integration of user-defined data into a flow of concatenated frames.
+  Skippable frames will be ignored (skipped) by decompressor.
+  The format of skippable frames is as follows :
+  a) Skippable frame ID - 4 Bytes, Little endian format, any value from 0x184D2A50 to 0x184D2A5F
+  b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits
+  c) Frame Content - any content (User Data) of length equal to Frame Size
+  For skippable frames ZSTD_getFrameHeader() returns zfhPtr->frameType==ZSTD_skippableFrame.
+  For skippable frames ZSTD_decompressContinue() always returns 0 : it only skips the content.
+*/
+
+/*=====   Buffer-less streaming decompression functions  =====*/
+typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e;
+typedef struct {
+    unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */
+    unsigned long long windowSize;       /* can be very large, up to <= frameContentSize */
+    unsigned blockSizeMax;
+    ZSTD_frameType_e frameType;          /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */
+    unsigned headerSize;
+    unsigned dictID;
+    unsigned checksumFlag;
+} ZSTD_frameHeader;
+
+/*! ZSTD_getFrameHeader() :
+ *  decode Frame Header, or requires larger `srcSize`.
+ * @return : 0, `zfhPtr` is correctly filled,
+ *          >0, `srcSize` is too small, value is wanted `srcSize` amount,
+ *           or an error code, which can be tested using ZSTD_isError() */
+ZSTDLIB_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize);   /**< doesn't consume input */
+/*! ZSTD_getFrameHeader_advanced() :
+ *  same as ZSTD_getFrameHeader(),
+ *  with added capability to select a format (like ZSTD_f_zstd1_magicless) */
+ZSTDLIB_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format);
+ZSTDLIB_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize);  /**< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */
+
+ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx);
+ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
+ZSTDLIB_API size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
+
+ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx);
+ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+/* misc */
+ZSTDLIB_API void   ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
+typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e;
+ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
+
+
+
+
+/* ============================ */
+/**       Block level API       */
+/* ============================ */
+
+/*!
+    Block functions produce and decode raw zstd blocks, without frame metadata.
+    Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes).
+    But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes.
+
+    A few rules to respect :
+    - Compressing and decompressing require a context structure
+      + Use ZSTD_createCCtx() and ZSTD_createDCtx()
+    - It is necessary to init context before starting
+      + compression : any ZSTD_compressBegin*() variant, including with dictionary
+      + decompression : any ZSTD_decompressBegin*() variant, including with dictionary
+      + copyCCtx() and copyDCtx() can be used too
+    - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB
+      + If input is larger than a block size, it's necessary to split input data into multiple blocks
+      + For inputs larger than a single block, consider using regular ZSTD_compress() instead.
+        Frame metadata is not that costly, and quickly becomes negligible as source size grows larger than a block.
+    - When a block is considered not compressible enough, ZSTD_compressBlock() result will be 0 (zero) !
+      ===> In which case, nothing is produced into `dst` !
+      + User __must__ test for such outcome and deal directly with uncompressed data
+      + A block cannot be declared incompressible if ZSTD_compressBlock() return value was != 0.
+        Doing so would mess up with statistics history, leading to potential data corruption.
+      + ZSTD_decompressBlock() _doesn't accept uncompressed data as input_ !!
+      + In case of multiple successive blocks, should some of them be uncompressed,
+        decoder must be informed of their existence in order to follow proper history.
+        Use ZSTD_insertBlock() for such a case.
+*/
+
+/*=====   Raw zstd block functions  =====*/
+ZSTDLIB_API size_t ZSTD_getBlockSize   (const ZSTD_CCtx* cctx);
+ZSTDLIB_API size_t ZSTD_compressBlock  (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ZSTDLIB_API size_t ZSTD_insertBlock    (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize);  /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */
+
+
+#endif   /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/libkram/zstd/zstddeclib.cpp b/libkram/zstd/zstddeclib.cpp
index 7d8cf975..c4f292fb 100644
--- a/libkram/zstd/zstddeclib.cpp
+++ b/libkram/zstd/zstddeclib.cpp
@@ -1,47 +1,166 @@
 /**
  * \file zstddeclib.c
  * Single-file Zstandard decompressor.
- * 
+ *
  * Generate using:
  * \code
- *	combine.sh -r ../../lib -r ../../lib/common -r ../../lib/decompress -o zstddeclib.c zstddeclib-in.c
+ *	combine.sh -r ../../lib -o zstddeclib.c zstddeclib-in.c
  * \endcode
  */
-/* 
- * BSD License
- * 
- * For Zstandard software
- * 
- * Copyright 2016-present, Facebook, Inc. All rights reserved.
- * SPDX-License-Identifier: BSD-3-Clause
+/*
+ * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
  */
 /*
  * Settings to bake for the standalone decompressor.
- * 
+ *
  * Note: It's important that none of these affects 'zstd.h' (only the
  * implementation files we're amalgamating).
- * 
+ *
  * Note: MEM_MODULE stops xxhash redefining BYTE, U16, etc., which are also
  * defined in mem.h (breaking C99 compatibility).
+ *
+ * Note: the undefs for xxHash allow Zstd's implementation to coinside with with
+ * standalone xxHash usage (with global defines).
  */
 #define DEBUGLEVEL 0
 #define MEM_MODULE
+#undef  XXH_NAMESPACE
 #define XXH_NAMESPACE ZSTD_
+#undef  XXH_PRIVATE_API
 #define XXH_PRIVATE_API
+#undef  XXH_INLINE_ALL
 #define XXH_INLINE_ALL
 #define ZSTD_LEGACY_SUPPORT 0
-#define ZSTD_LIB_COMPRESSION 0
-#define ZSTD_LIB_DEPRECATED 0
-#define ZSTD_NOBENCH
 #define ZSTD_STRIP_ERROR_STRINGS
+#define ZSTD_TRACE 0
+
+/* Include zstd_deps.h first with all the options we need enabled. */
+#define ZSTD_DEPS_NEED_MALLOC
+/**** start inlining common/zstd_deps.h ****/
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* This file provides common libc dependencies that zstd requires.
+ * The purpose is to allow replacing this file with a custom implementation
+ * to compile zstd without libc support.
+ */
+
+/* Need:
+ * NULL
+ * INT_MAX
+ * UINT_MAX
+ * ZSTD_memcpy()
+ * ZSTD_memset()
+ * ZSTD_memmove()
+ */
+#ifndef ZSTD_DEPS_COMMON
+#define ZSTD_DEPS_COMMON
+
+#include <limits.h>
+#include <stddef.h>
+#include <string.h>
+
+#if defined(__GNUC__) && __GNUC__ >= 4
+# define ZSTD_memcpy(d,s,l) __builtin_memcpy((d),(s),(l))
+# define ZSTD_memmove(d,s,l) __builtin_memmove((d),(s),(l))
+# define ZSTD_memset(p,v,l) __builtin_memset((p),(v),(l))
+#else
+# define ZSTD_memcpy(d,s,l) memcpy((d),(s),(l))
+# define ZSTD_memmove(d,s,l) memmove((d),(s),(l))
+# define ZSTD_memset(p,v,l) memset((p),(v),(l))
+#endif
+
+#endif /* ZSTD_DEPS_COMMON */
+
+/* Need:
+ * ZSTD_malloc()
+ * ZSTD_free()
+ * ZSTD_calloc()
+ */
+#ifdef ZSTD_DEPS_NEED_MALLOC
+#ifndef ZSTD_DEPS_MALLOC
+#define ZSTD_DEPS_MALLOC
+
+#include <stdlib.h>
+
+#define ZSTD_malloc(s) malloc(s)
+#define ZSTD_calloc(n,s) calloc((n), (s))
+#define ZSTD_free(p) free((p))
+
+#endif /* ZSTD_DEPS_MALLOC */
+#endif /* ZSTD_DEPS_NEED_MALLOC */
+
+/*
+ * Provides 64-bit math support.
+ * Need:
+ * U64 ZSTD_div64(U64 dividend, U32 divisor)
+ */
+#ifdef ZSTD_DEPS_NEED_MATH64
+#ifndef ZSTD_DEPS_MATH64
+#define ZSTD_DEPS_MATH64
+
+#define ZSTD_div64(dividend, divisor) ((dividend) / (divisor))
+
+#endif /* ZSTD_DEPS_MATH64 */
+#endif /* ZSTD_DEPS_NEED_MATH64 */
+
+/* Need:
+ * assert()
+ */
+#ifdef ZSTD_DEPS_NEED_ASSERT
+#ifndef ZSTD_DEPS_ASSERT
+#define ZSTD_DEPS_ASSERT
 
-/**** start inlining debug.c ****/
+#include <assert.h>
+
+#endif /* ZSTD_DEPS_ASSERT */
+#endif /* ZSTD_DEPS_NEED_ASSERT */
+
+/* Need:
+ * ZSTD_DEBUG_PRINT()
+ */
+#ifdef ZSTD_DEPS_NEED_IO
+#ifndef ZSTD_DEPS_IO
+#define ZSTD_DEPS_IO
+
+#include <stdio.h>
+#define ZSTD_DEBUG_PRINT(...) fprintf(stderr, __VA_ARGS__)
+
+#endif /* ZSTD_DEPS_IO */
+#endif /* ZSTD_DEPS_NEED_IO */
+
+/* Only requested when <stdint.h> is known to be present.
+ * Need:
+ * intptr_t
+ */
+#ifdef ZSTD_DEPS_NEED_STDINT
+#ifndef ZSTD_DEPS_STDINT
+#define ZSTD_DEPS_STDINT
+
+#include <stdint.h>
+
+#endif /* ZSTD_DEPS_STDINT */
+#endif /* ZSTD_DEPS_NEED_STDINT */
+/**** ended inlining common/zstd_deps.h ****/
+
+/**** start inlining common/debug.c ****/
 /* ******************************************************************
  * debug
  * Part of FSE library
- *
- * Copyright 2013-2020, Yann Collet, Facebook, Inc.
- * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-or-later
+ * Copyright (c) Yann Collet, Facebook, Inc.
  *
  * You can contact the author at :
  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -63,7 +182,7 @@
 /* ******************************************************************
  * debug
  * Part of FSE library
- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  *
  * You can contact the author at :
  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -113,15 +232,6 @@ extern "C" {
 #endif
 
 
-/* DEBUGFILE can be defined externally,
- * typically through compiler command line.
- * note : currently useless.
- * Value must be stderr or stdout */
-#ifndef DEBUGFILE
-#  define DEBUGFILE stderr
-#endif
-
-
 /* recommended values for DEBUGLEVEL :
  * 0 : release mode, no debug, all run-time checks disabled
  * 1 : enables assert() only, no display
@@ -138,7 +248,8 @@ extern "C" {
  */
 
 #if (DEBUGLEVEL>=1)
-#  include <assert.h>
+#  define ZSTD_DEPS_NEED_ASSERT
+/**** skipping file: zstd_deps.h ****/
 #else
 #  ifndef assert   /* assert may be already defined, due to prior #include <assert.h> */
 #    define assert(condition) ((void)0)   /* disable assert (default) */
@@ -146,7 +257,8 @@ extern "C" {
 #endif
 
 #if (DEBUGLEVEL>=2)
-#  include <stdio.h>
+#  define ZSTD_DEPS_NEED_IO
+/**** skipping file: zstd_deps.h ****/
 extern int g_debuglevel; /* the variable is only declared,
                             it actually lives in debug.c,
                             and is shared by the whole process.
@@ -154,14 +266,14 @@ extern int g_debuglevel; /* the variable is only declared,
                             It's useful when enabling very verbose levels
                             on selective conditions (such as position in src) */
 
-#  define RAWLOG(l, ...) {                                      \
-                if (l<=g_debuglevel) {                          \
-                    fprintf(stderr, __VA_ARGS__);               \
+#  define RAWLOG(l, ...) {                                       \
+                if (l<=g_debuglevel) {                           \
+                    ZSTD_DEBUG_PRINT(__VA_ARGS__);               \
             }   }
-#  define DEBUGLOG(l, ...) {                                    \
-                if (l<=g_debuglevel) {                          \
-                    fprintf(stderr, __FILE__ ": " __VA_ARGS__); \
-                    fprintf(stderr, " \n");                     \
+#  define DEBUGLOG(l, ...) {                                     \
+                if (l<=g_debuglevel) {                           \
+                    ZSTD_DEBUG_PRINT(__FILE__ ": " __VA_ARGS__); \
+                    ZSTD_DEBUG_PRINT(" \n");                     \
             }   }
 #else
 #  define RAWLOG(l, ...)      {}    /* disabled */
@@ -177,11 +289,11 @@ extern int g_debuglevel; /* the variable is only declared,
 /**** ended inlining debug.h ****/
 
 int g_debuglevel = DEBUGLEVEL;
-/**** ended inlining debug.c ****/
-/**** start inlining entropy_common.c ****/
+/**** ended inlining common/debug.c ****/
+/**** start inlining common/entropy_common.c ****/
 /* ******************************************************************
  * Common functions of New Generation Entropy library
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  *
  *  You can contact the author at :
  *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -198,7 +310,7 @@ int g_debuglevel = DEBUGLEVEL;
 ***************************************/
 /**** start inlining mem.h ****/
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -217,48 +329,233 @@ extern "C" {
 /*-****************************************
 *  Dependencies
 ******************************************/
-#include <stddef.h>     /* size_t, ptrdiff_t */
-#include <string.h>     /* memcpy */
+#include <stddef.h>  /* size_t, ptrdiff_t */
+/**** start inlining compiler.h ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
 
+#ifndef ZSTD_COMPILER_H
+#define ZSTD_COMPILER_H
 
-/*-****************************************
+/*-*******************************************************
 *  Compiler specifics
-******************************************/
-#if defined(_MSC_VER)   /* Visual Studio */
-#   include <stdlib.h>  /* _byteswap_ulong */
-#   include <intrin.h>  /* _byteswap_* */
+*********************************************************/
+/* force inlining */
+
+#if !defined(ZSTD_NO_INLINE)
+#if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#  define INLINE_KEYWORD inline
+#else
+#  define INLINE_KEYWORD
 #endif
-#if defined(__GNUC__)
-#  define MEM_STATIC static __inline __attribute__((unused))
-#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
-#  define MEM_STATIC static inline
+
+#if defined(__GNUC__) || defined(__ICCARM__)
+#  define FORCE_INLINE_ATTR __attribute__((always_inline))
 #elif defined(_MSC_VER)
-#  define MEM_STATIC static __inline
+#  define FORCE_INLINE_ATTR __forceinline
 #else
-#  define MEM_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#  define FORCE_INLINE_ATTR
+#endif
+
+#else
+
+#define INLINE_KEYWORD
+#define FORCE_INLINE_ATTR
+
+#endif
+
+/**
+  On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC).
+  This explictly marks such functions as __cdecl so that the code will still compile
+  if a CC other than __cdecl has been made the default.
+*/
+#if  defined(_MSC_VER)
+#  define WIN_CDECL __cdecl
+#else
+#  define WIN_CDECL
+#endif
+
+/**
+ * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
+ * parameters. They must be inlined for the compiler to eliminate the constant
+ * branches.
+ */
+#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
+/**
+ * HINT_INLINE is used to help the compiler generate better code. It is *not*
+ * used for "templates", so it can be tweaked based on the compilers
+ * performance.
+ *
+ * gcc-4.8 and gcc-4.9 have been shown to benefit from leaving off the
+ * always_inline attribute.
+ *
+ * clang up to 5.0.0 (trunk) benefit tremendously from the always_inline
+ * attribute.
+ */
+#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5
+#  define HINT_INLINE static INLINE_KEYWORD
+#else
+#  define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR
+#endif
+
+/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
+#if defined(__GNUC__)
+#  define UNUSED_ATTR __attribute__((unused))
+#else
+#  define UNUSED_ATTR
+#endif
+
+/* force no inlining */
+#ifdef _MSC_VER
+#  define FORCE_NOINLINE static __declspec(noinline)
+#else
+#  if defined(__GNUC__) || defined(__ICCARM__)
+#    define FORCE_NOINLINE static __attribute__((__noinline__))
+#  else
+#    define FORCE_NOINLINE static
+#  endif
+#endif
+
+
+/* target attribute */
+#ifndef __has_attribute
+  #define __has_attribute(x) 0  /* Compatibility with non-clang compilers. */
+#endif
+#if defined(__GNUC__) || defined(__ICCARM__)
+#  define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
+#else
+#  define TARGET_ATTRIBUTE(target)
+#endif
+
+/* Enable runtime BMI2 dispatch based on the CPU.
+ * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
+ */
+#ifndef DYNAMIC_BMI2
+  #if ((defined(__clang__) && __has_attribute(__target__)) \
+      || (defined(__GNUC__) \
+          && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
+      && (defined(__x86_64__) || defined(_M_X86)) \
+      && !defined(__BMI2__)
+  #  define DYNAMIC_BMI2 1
+  #else
+  #  define DYNAMIC_BMI2 0
+  #endif
+#endif
+
+/* prefetch
+ * can be disabled, by declaring NO_PREFETCH build macro */
+#if defined(NO_PREFETCH)
+#  define PREFETCH_L1(ptr)  (void)(ptr)  /* disabled */
+#  define PREFETCH_L2(ptr)  (void)(ptr)  /* disabled */
+#else
+#  if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86))  /* _mm_prefetch() is not defined outside of x86/x64 */
+#    include <mmintrin.h>   /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
+#    define PREFETCH_L1(ptr)  _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
+#    define PREFETCH_L2(ptr)  _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
+#  elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
+#    define PREFETCH_L1(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
+#    define PREFETCH_L2(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
+#  elif defined(__aarch64__)
+#    define PREFETCH_L1(ptr)  __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr)))
+#    define PREFETCH_L2(ptr)  __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr)))
+#  else
+#    define PREFETCH_L1(ptr) (void)(ptr)  /* disabled */
+#    define PREFETCH_L2(ptr) (void)(ptr)  /* disabled */
+#  endif
+#endif  /* NO_PREFETCH */
+
+#define CACHELINE_SIZE 64
+
+#define PREFETCH_AREA(p, s)  {            \
+    const char* const _ptr = (const char*)(p);  \
+    size_t const _size = (size_t)(s);     \
+    size_t _pos;                          \
+    for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) {  \
+        PREFETCH_L2(_ptr + _pos);         \
+    }                                     \
+}
+
+/* vectorization
+ * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */
+#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__)
+#  if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5)
+#    define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
+#  else
+#    define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")")
+#  endif
+#else
+#  define DONT_VECTORIZE
+#endif
+
+/* Tell the compiler that a branch is likely or unlikely.
+ * Only use these macros if it causes the compiler to generate better code.
+ * If you can remove a LIKELY/UNLIKELY annotation without speed changes in gcc
+ * and clang, please do.
+ */
+#if defined(__GNUC__)
+#define LIKELY(x) (__builtin_expect((x), 1))
+#define UNLIKELY(x) (__builtin_expect((x), 0))
+#else
+#define LIKELY(x) (x)
+#define UNLIKELY(x) (x)
+#endif
+
+/* disable warnings */
+#ifdef _MSC_VER    /* Visual Studio */
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4100)        /* disable: C4100: unreferenced formal parameter */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4204)        /* disable: C4204: non-constant aggregate initializer */
+#  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
+#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
+#endif
+
+/*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/
+#ifndef STATIC_BMI2
+#  if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86))
+#    ifdef __AVX2__  //MSVC does not have a BMI2 specific flag, but every CPU that supports AVX2 also supports BMI2
+#       define STATIC_BMI2 1
+#    endif
+#  endif
+#endif
+
+#ifndef STATIC_BMI2
+    #define STATIC_BMI2 0
 #endif
 
+/* compat. with non-clang compilers */
 #ifndef __has_builtin
-#  define __has_builtin(x) 0  /* compat. with non-clang compilers */
+#  define __has_builtin(x) 0
 #endif
 
-/* code only tested on 32 and 64 bits systems */
-#define MEM_STATIC_ASSERT(c)   { enum { MEM_static_assert = 1/(int)(!!(c)) }; }
-MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
+/* compat. with non-clang compilers */
+#ifndef __has_feature
+#  define __has_feature(x) 0
+#endif
 
 /* detects whether we are being compiled under msan */
-#if defined (__has_feature)
+#ifndef ZSTD_MEMORY_SANITIZER
 #  if __has_feature(memory_sanitizer)
-#    define MEMORY_SANITIZER 1
+#    define ZSTD_MEMORY_SANITIZER 1
+#  else
+#    define ZSTD_MEMORY_SANITIZER 0
 #  endif
 #endif
 
-#if defined (MEMORY_SANITIZER)
+#if ZSTD_MEMORY_SANITIZER
 /* Not all platforms that support msan provide sanitizers/msan_interface.h.
  * We therefore declare the functions we need ourselves, rather than trying to
  * include the header file... */
-
-#include <stdint.h> /* intptr_t */
+#include <stddef.h>  /* size_t */
+#define ZSTD_DEPS_NEED_STDINT
+/**** skipping file: zstd_deps.h ****/
 
 /* Make memory region fully initialized (without changing its contents). */
 void __msan_unpoison(const volatile void *a, size_t size);
@@ -274,18 +571,21 @@ intptr_t __msan_test_shadow(const volatile void *x, size_t size);
 #endif
 
 /* detects whether we are being compiled under asan */
-#if defined (__has_feature)
+#ifndef ZSTD_ADDRESS_SANITIZER
 #  if __has_feature(address_sanitizer)
-#    define ADDRESS_SANITIZER 1
+#    define ZSTD_ADDRESS_SANITIZER 1
+#  elif defined(__SANITIZE_ADDRESS__)
+#    define ZSTD_ADDRESS_SANITIZER 1
+#  else
+#    define ZSTD_ADDRESS_SANITIZER 0
 #  endif
-#elif defined(__SANITIZE_ADDRESS__)
-#  define ADDRESS_SANITIZER 1
 #endif
 
-#if defined (ADDRESS_SANITIZER)
+#if ZSTD_ADDRESS_SANITIZER
 /* Not all platforms that support asan provide sanitizers/asan_interface.h.
  * We therefore declare the functions we need ourselves, rather than trying to
  * include the header file... */
+#include <stddef.h>  /* size_t */
 
 /**
  * Marks a memory region (<c>[addr, addr+size)</c>) as unaddressable.
@@ -319,12 +619,38 @@ void __asan_poison_memory_region(void const volatile *addr, size_t size);
 void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
 #endif
 
+#endif /* ZSTD_COMPILER_H */
+/**** ended inlining compiler.h ****/
+/**** skipping file: debug.h ****/
+/**** skipping file: zstd_deps.h ****/
 
-/*-**************************************************************
+
+/*-****************************************
+*  Compiler specifics
+******************************************/
+#if defined(_MSC_VER)   /* Visual Studio */
+#   include <stdlib.h>  /* _byteswap_ulong */
+#   include <intrin.h>  /* _byteswap_* */
+#endif
+#if defined(__GNUC__)
+#  define MEM_STATIC static __inline __attribute__((unused))
+#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define MEM_STATIC static inline
+#elif defined(_MSC_VER)
+#  define MEM_STATIC static __inline
+#else
+#  define MEM_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
+/*-**************************************************************
 *  Basic Types
 *****************************************************************/
 #if  !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
-# include <stdint.h>
+#  if defined(_AIX)
+#    include <inttypes.h>
+#  else
+#    include <stdint.h> /* intptr_t */
+#  endif
   typedef   uint8_t BYTE;
   typedef  uint16_t U16;
   typedef   int16_t S16;
@@ -356,7 +682,53 @@ void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
 
 
 /*-**************************************************************
-*  Memory I/O
+*  Memory I/O API
+*****************************************************************/
+/*=== Static platform detection ===*/
+MEM_STATIC unsigned MEM_32bits(void);
+MEM_STATIC unsigned MEM_64bits(void);
+MEM_STATIC unsigned MEM_isLittleEndian(void);
+
+/*=== Native unaligned read/write ===*/
+MEM_STATIC U16 MEM_read16(const void* memPtr);
+MEM_STATIC U32 MEM_read32(const void* memPtr);
+MEM_STATIC U64 MEM_read64(const void* memPtr);
+MEM_STATIC size_t MEM_readST(const void* memPtr);
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value);
+MEM_STATIC void MEM_write32(void* memPtr, U32 value);
+MEM_STATIC void MEM_write64(void* memPtr, U64 value);
+
+/*=== Little endian unaligned read/write ===*/
+MEM_STATIC U16 MEM_readLE16(const void* memPtr);
+MEM_STATIC U32 MEM_readLE24(const void* memPtr);
+MEM_STATIC U32 MEM_readLE32(const void* memPtr);
+MEM_STATIC U64 MEM_readLE64(const void* memPtr);
+MEM_STATIC size_t MEM_readLEST(const void* memPtr);
+
+MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val);
+MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val);
+MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32);
+MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64);
+MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val);
+
+/*=== Big endian unaligned read/write ===*/
+MEM_STATIC U32 MEM_readBE32(const void* memPtr);
+MEM_STATIC U64 MEM_readBE64(const void* memPtr);
+MEM_STATIC size_t MEM_readBEST(const void* memPtr);
+
+MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32);
+MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64);
+MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val);
+
+/*=== Byteswap ===*/
+MEM_STATIC U32 MEM_swap32(U32 in);
+MEM_STATIC U64 MEM_swap64(U64 in);
+MEM_STATIC size_t MEM_swapST(size_t in);
+
+
+/*-**************************************************************
+*  Memory I/O Implementation
 *****************************************************************/
 /* MEM_FORCE_MEMORY_ACCESS :
  * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
@@ -372,9 +744,7 @@ void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
  * Prefer these methods in priority order (0 > 1 > 2)
  */
 #ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
-#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
-#    define MEM_FORCE_MEMORY_ACCESS 2
-#  elif defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__)
+#  if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__)
 #    define MEM_FORCE_MEMORY_ACCESS 1
 #  endif
 #endif
@@ -435,37 +805,37 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v =
 
 MEM_STATIC U16 MEM_read16(const void* memPtr)
 {
-    U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
+    U16 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
 }
 
 MEM_STATIC U32 MEM_read32(const void* memPtr)
 {
-    U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
+    U32 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
 }
 
 MEM_STATIC U64 MEM_read64(const void* memPtr)
 {
-    U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
+    U64 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
 }
 
 MEM_STATIC size_t MEM_readST(const void* memPtr)
 {
-    size_t val; memcpy(&val, memPtr, sizeof(val)); return val;
+    size_t val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
 }
 
 MEM_STATIC void MEM_write16(void* memPtr, U16 value)
 {
-    memcpy(memPtr, &value, sizeof(value));
+    ZSTD_memcpy(memPtr, &value, sizeof(value));
 }
 
 MEM_STATIC void MEM_write32(void* memPtr, U32 value)
 {
-    memcpy(memPtr, &value, sizeof(value));
+    ZSTD_memcpy(memPtr, &value, sizeof(value));
 }
 
 MEM_STATIC void MEM_write64(void* memPtr, U64 value)
 {
-    memcpy(memPtr, &value, sizeof(value));
+    ZSTD_memcpy(memPtr, &value, sizeof(value));
 }
 
 #endif /* MEM_FORCE_MEMORY_ACCESS */
@@ -537,7 +907,7 @@ MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
 
 MEM_STATIC U32 MEM_readLE24(const void* memPtr)
 {
-    return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
+    return (U32)MEM_readLE16(memPtr) + ((U32)(((const BYTE*)memPtr)[2]) << 16);
 }
 
 MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val)
@@ -644,6 +1014,9 @@ MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val)
         MEM_writeBE64(memPtr, (U64)val);
 }
 
+/* code only tested on 32 and 64 bits systems */
+MEM_STATIC void MEM_check(void) { DEBUG_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
+
 
 #if defined (__cplusplus)
 }
@@ -653,7 +1026,7 @@ MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val)
 /**** ended inlining mem.h ****/
 /**** start inlining error_private.h ****/
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -675,10 +1048,9 @@ extern "C" {
 /* ****************************************
 *  Dependencies
 ******************************************/
-#include <stddef.h>        /* size_t */
-/**** start inlining zstd_errors.h ****/
+/**** start inlining ../zstd_errors.h ****/
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -755,6 +1127,8 @@ typedef enum {
   /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */
   ZSTD_error_frameIndex_tooLarge = 100,
   ZSTD_error_seekableIO          = 102,
+  ZSTD_error_dstBuffer_wrong     = 104,
+  ZSTD_error_srcBuffer_wrong     = 105,
   ZSTD_error_maxCode = 120  /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */
 } ZSTD_ErrorCode;
 
@@ -770,7 +1144,8 @@ ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code);   /**< Sa
 #endif
 
 #endif /* ZSTD_ERRORS_H_398273423 */
-/**** ended inlining zstd_errors.h ****/
+/**** ended inlining ../zstd_errors.h ****/
+/**** skipping file: zstd_deps.h ****/
 
 
 /* ****************************************
@@ -797,7 +1172,7 @@ typedef ZSTD_ErrorCode ERR_enum;
 /*-****************************************
 *  Error codes handling
 ******************************************/
-#undef ERROR   /* reported already defined on VS 2015 (Rich Geldreich) */
+#undef ERROR   /* already defined on Visual Studio */
 #define ERROR(name) ZSTD_ERROR(name)
 #define ZSTD_ERROR(name) ((size_t)-PREFIX(name))
 
@@ -805,6 +1180,10 @@ ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
 
 ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); }
 
+/* check and forward error code */
+#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
+#define CHECK_F(f)   { CHECK_V_F(_var_err__, f); }
+
 
 /*-****************************************
 *  Error Strings
@@ -828,7 +1207,7 @@ ERR_STATIC const char* ERR_getErrorName(size_t code)
 /* ******************************************************************
  * FSE : Finite State Entropy codec
  * Public Prototypes declaration
- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  *
  * You can contact the author at :
  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -850,7 +1229,7 @@ extern "C" {
 /*-*****************************************
 *  Dependencies
 ******************************************/
-#include <stddef.h>    /* size_t, ptrdiff_t */
+/**** skipping file: zstd_deps.h ****/
 
 
 /*-*****************************************
@@ -964,10 +1343,16 @@ FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize
 /*! FSE_normalizeCount():
     normalize counts so that sum(count[]) == Power_of_2 (2^tableLog)
     'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
+    useLowProbCount is a boolean parameter which trades off compressed size for
+    faster header decoding. When it is set to 1, the compressed data will be slightly
+    smaller. And when it is set to 0, FSE_readNCount() and FSE_buildDTable() will be
+    faster. If you are compressing a small amount of data (< 2 KB) then useLowProbCount=0
+    is a good default, since header deserialization makes a big speed difference.
+    Otherwise, useLowProbCount=1 is a good default, since the speed difference is small.
     @return : tableLog,
               or an errorCode, which can be tested using FSE_isError() */
 FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog,
-                    const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
+                    const unsigned* count, size_t srcSize, unsigned maxSymbolValue, unsigned useLowProbCount);
 
 /*! FSE_NCountWriteBound():
     Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
@@ -1055,6 +1440,13 @@ FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter,
                            unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
                            const void* rBuffer, size_t rBuffSize);
 
+/*! FSE_readNCount_bmi2():
+ * Same as FSE_readNCount() but pass bmi2=1 when your CPU supports BMI2 and 0 otherwise.
+ */
+FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short* normalizedCounter,
+                           unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
+                           const void* rBuffer, size_t rBuffSize, int bmi2);
+
 /*! Constructor and Destructor of FSE_DTable.
     Note that its size depends on 'tableLog' */
 typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
@@ -1111,7 +1503,7 @@ If there is an error, the function will return an error code, which can be teste
 /* ******************************************************************
  * bitstream
  * Part of FSE library
- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  *
  * You can contact the author at :
  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -1127,7 +1519,6 @@ If there is an error, the function will return an error code, which can be teste
 #if defined (__cplusplus)
 extern "C" {
 #endif
-
 /*
 *  This API consists of small unitary functions, which must be inlined for best performance.
 *  Since link-time-optimization is not available for all compilers,
@@ -1138,180 +1529,7 @@ extern "C" {
 *  Dependencies
 ******************************************/
 /**** skipping file: mem.h ****/
-/**** start inlining compiler.h ****/
-/*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-#ifndef ZSTD_COMPILER_H
-#define ZSTD_COMPILER_H
-
-/*-*******************************************************
-*  Compiler specifics
-*********************************************************/
-/* force inlining */
-
-#if !defined(ZSTD_NO_INLINE)
-#if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
-#  define INLINE_KEYWORD inline
-#else
-#  define INLINE_KEYWORD
-#endif
-
-#if defined(__GNUC__) || defined(__ICCARM__)
-#  define FORCE_INLINE_ATTR __attribute__((always_inline))
-#elif defined(_MSC_VER)
-#  define FORCE_INLINE_ATTR __forceinline
-#else
-#  define FORCE_INLINE_ATTR
-#endif
-
-#else
-
-#define INLINE_KEYWORD
-#define FORCE_INLINE_ATTR
-
-#endif
-
-/**
- * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
- * parameters. They must be inlined for the compiler to eliminate the constant
- * branches.
- */
-#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
-/**
- * HINT_INLINE is used to help the compiler generate better code. It is *not*
- * used for "templates", so it can be tweaked based on the compilers
- * performance.
- *
- * gcc-4.8 and gcc-4.9 have been shown to benefit from leaving off the
- * always_inline attribute.
- *
- * clang up to 5.0.0 (trunk) benefit tremendously from the always_inline
- * attribute.
- */
-#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5
-#  define HINT_INLINE static INLINE_KEYWORD
-#else
-#  define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR
-#endif
-
-/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
-#if defined(__GNUC__)
-#  define UNUSED_ATTR __attribute__((unused))
-#else
-#  define UNUSED_ATTR
-#endif
-
-/* force no inlining */
-#ifdef _MSC_VER
-#  define FORCE_NOINLINE static __declspec(noinline)
-#else
-#  if defined(__GNUC__) || defined(__ICCARM__)
-#    define FORCE_NOINLINE static __attribute__((__noinline__))
-#  else
-#    define FORCE_NOINLINE static
-#  endif
-#endif
-
-/* target attribute */
-#ifndef __has_attribute
-  #define __has_attribute(x) 0  /* Compatibility with non-clang compilers. */
-#endif
-#if defined(__GNUC__) || defined(__ICCARM__)
-#  define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
-#else
-#  define TARGET_ATTRIBUTE(target)
-#endif
-
-/* Enable runtime BMI2 dispatch based on the CPU.
- * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
- */
-#ifndef DYNAMIC_BMI2
-  #if ((defined(__clang__) && __has_attribute(__target__)) \
-      || (defined(__GNUC__) \
-          && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
-      && (defined(__x86_64__) || defined(_M_X86)) \
-      && !defined(__BMI2__)
-  #  define DYNAMIC_BMI2 1
-  #else
-  #  define DYNAMIC_BMI2 0
-  #endif
-#endif
-
-/* prefetch
- * can be disabled, by declaring NO_PREFETCH build macro */
-#if defined(NO_PREFETCH)
-#  define PREFETCH_L1(ptr)  (void)(ptr)  /* disabled */
-#  define PREFETCH_L2(ptr)  (void)(ptr)  /* disabled */
-#else
-#  if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86))  /* _mm_prefetch() is not defined outside of x86/x64 */
-#    include <mmintrin.h>   /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
-#    define PREFETCH_L1(ptr)  _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
-#    define PREFETCH_L2(ptr)  _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
-#  elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
-#    define PREFETCH_L1(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
-#    define PREFETCH_L2(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
-#  else
-#    define PREFETCH_L1(ptr) (void)(ptr)  /* disabled */
-#    define PREFETCH_L2(ptr) (void)(ptr)  /* disabled */
-#  endif
-#endif  /* NO_PREFETCH */
-
-#define CACHELINE_SIZE 64
-
-#define PREFETCH_AREA(p, s)  {            \
-    const char* const _ptr = (const char*)(p);  \
-    size_t const _size = (size_t)(s);     \
-    size_t _pos;                          \
-    for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) {  \
-        PREFETCH_L2(_ptr + _pos);         \
-    }                                     \
-}
-
-/* vectorization
- * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */
-#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__)
-#  if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5)
-#    define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
-#  else
-#    define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")")
-#  endif
-#else
-#  define DONT_VECTORIZE
-#endif
-
-/* Tell the compiler that a branch is likely or unlikely.
- * Only use these macros if it causes the compiler to generate better code.
- * If you can remove a LIKELY/UNLIKELY annotation without speed changes in gcc
- * and clang, please do.
- */
-#if defined(__GNUC__)
-#define LIKELY(x) (__builtin_expect((x), 1))
-#define UNLIKELY(x) (__builtin_expect((x), 0))
-#else
-#define LIKELY(x) (x)
-#define UNLIKELY(x) (x)
-#endif
-
-/* disable warnings */
-#ifdef _MSC_VER    /* Visual Studio */
-#  include <intrin.h>                    /* For Visual 2005 */
-#  pragma warning(disable : 4100)        /* disable: C4100: unreferenced formal parameter */
-#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
-#  pragma warning(disable : 4204)        /* disable: C4204: non-constant aggregate initializer */
-#  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
-#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
-#endif
-
-#endif /* ZSTD_COMPILER_H */
-/**** ended inlining compiler.h ****/
+/**** skipping file: compiler.h ****/
 /**** skipping file: debug.h ****/
 /**** skipping file: error_private.h ****/
 
@@ -1319,10 +1537,12 @@ extern "C" {
 /*=========================================
 *  Target specific
 =========================================*/
-#if defined(__BMI__) && defined(__GNUC__)
-#  include <immintrin.h>   /* support for bextr (experimental) */
-#elif defined(__ICCARM__)
-#  include <intrinsics.h>
+#ifndef ZSTD_NO_INTRINSICS
+#  if defined(__BMI__) && defined(__GNUC__)
+#    include <immintrin.h>   /* support for bextr (experimental) */
+#  elif defined(__ICCARM__)
+#    include <intrinsics.h>
+#  endif
 #endif
 
 #define STREAM_ACCUMULATOR_MIN_32  25
@@ -1424,8 +1644,12 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val)
     assert(val != 0);
     {
 #   if defined(_MSC_VER)   /* Visual */
-        unsigned long r=0;
-        return _BitScanReverse ( &r, val ) ? (unsigned)r : 0;
+#       if STATIC_BMI2 == 1
+		return _lzcnt_u32(val) ^ 31;
+#       else
+		unsigned long r = 0;
+		return _BitScanReverse(&r, val) ? (unsigned)r : 0;
+#       endif
 #   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
         return __builtin_clz (val) ^ 31;
 #   elif defined(__ICCARM__)    /* IAR Intrinsic */
@@ -1481,7 +1705,7 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
 MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
                             size_t value, unsigned nbBits)
 {
-    MEM_STATIC_ASSERT(BIT_MASK_SIZE == 32);
+    DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
     assert(nbBits < BIT_MASK_SIZE);
     assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
     bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
@@ -1554,7 +1778,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
  */
 MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
 {
-    if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
+    if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
 
     bitD->start = (const char*)srcBuffer;
     bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
@@ -1600,12 +1824,12 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
     return srcSize;
 }
 
-MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
+MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
 {
     return bitContainer >> start;
 }
 
-MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
+MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
 {
     U32 const regMask = sizeof(bitContainer)*8 - 1;
     /* if start > regMask, bitstream is corrupted, and result is undefined */
@@ -1613,10 +1837,14 @@ MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 co
     return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
 }
 
-MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
+MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
 {
+#if defined(STATIC_BMI2) && STATIC_BMI2 == 1
+	return  _bzhi_u64(bitContainer, nbBits);
+#else
     assert(nbBits < BIT_MASK_SIZE);
     return bitContainer & BIT_mask[nbBits];
+#endif
 }
 
 /*! BIT_lookBits() :
@@ -1625,7 +1853,7 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
  *  On 32-bits, maxNbBits==24.
  *  On 64-bits, maxNbBits==56.
  * @return : value extracted */
-MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
+MEM_STATIC  FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t*  bitD, U32 nbBits)
 {
     /* arbitrate between double-shift and shift+mask */
 #if 1
@@ -1648,7 +1876,7 @@ MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
     return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
 }
 
-MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
+MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
 {
     bitD->bitsConsumed += nbBits;
 }
@@ -1657,7 +1885,7 @@ MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
  *  Read (consume) next n bits from local register and update.
  *  Pay attention to not read more than nbBits contained into local register.
  * @return : extracted value. */
-MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
+MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
 {
     size_t const value = BIT_lookBits(bitD, nbBits);
     BIT_skipBits(bitD, nbBits);
@@ -1743,12 +1971,12 @@ MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
 *******************************************/
 /* FSE buffer bounds */
 #define FSE_NCOUNTBOUND 512
-#define FSE_BLOCKBOUND(size) (size + (size>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
+#define FSE_BLOCKBOUND(size) ((size) + ((size)>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
 #define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
 
 /* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
-#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue)   (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
-#define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog))
+#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue)   (1 + (1<<((maxTableLog)-1)) + (((maxSymbolValue)+1)*2))
+#define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<(maxTableLog)))
 
 /* or use the size to malloc() space directly. Pay attention to alignment restrictions though */
 #define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue)   (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable))
@@ -1764,9 +1992,9 @@ unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsi
 
 /* FSE_compress_wksp() :
  * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
- * FSE_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
+ * FSE_COMPRESS_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
  */
-#define FSE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue)   ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
+#define FSE_COMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue)   ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
 size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
 
 size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
@@ -1777,18 +2005,30 @@ size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
 
 /* FSE_buildCTable_wksp() :
  * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
- * `wkspSize` must be >= `(1<<tableLog)`.
+ * `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`.
  */
+#define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (maxSymbolValue + 2 + (1ull << (tableLog - 2)))
+#define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog))
 size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
 
+#define FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) (sizeof(short) * (maxSymbolValue + 1) + (1ULL << maxTableLog) + 8)
+#define FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ((FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) + sizeof(unsigned) - 1) / sizeof(unsigned))
+FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
+/**< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */
+
 size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
 /**< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */
 
 size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
 /**< build a fake FSE_DTable, designed to always generate the same symbolValue */
 
-size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog);
-/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DTABLE_SIZE_U32(maxLog)` */
+#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1)
+#define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned))
+size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize);
+/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)` */
+
+size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2);
+/**< Same as FSE_decompress_wksp() but with dynamic BMI2 support. Pass 1 if your CPU supports BMI2 or 0 if it doesn't. */
 
 typedef enum {
    FSE_repeat_none,  /**< Cannot use the previous table */
@@ -2099,6 +2339,9 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
 #ifndef FSE_DEFAULT_MEMORY_USAGE
 #  define FSE_DEFAULT_MEMORY_USAGE 13
 #endif
+#if (FSE_DEFAULT_MEMORY_USAGE > FSE_MAX_MEMORY_USAGE)
+#  error "FSE_DEFAULT_MEMORY_USAGE must be <= FSE_MAX_MEMORY_USAGE"
+#endif
 
 /*!FSE_MAX_SYMBOL_VALUE :
 *  Maximum symbol value authorized.
@@ -2132,7 +2375,7 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
 #  error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
 #endif
 
-#define FSE_TABLESTEP(tableSize) ((tableSize>>1) + (tableSize>>3) + 3)
+#define FSE_TABLESTEP(tableSize) (((tableSize)>>1) + ((tableSize)>>3) + 3)
 
 
 #endif /* FSE_STATIC_LINKING_ONLY */
@@ -2147,7 +2390,7 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
 /* ******************************************************************
  * huff0 huffman codec,
  * part of Finite State Entropy library
- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  *
  * You can contact the author at :
  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -2166,7 +2409,7 @@ extern "C" {
 #define HUF_H_298734234
 
 /* *** Dependencies *** */
-#include <stddef.h>    /* size_t */
+/**** skipping file: zstd_deps.h ****/
 
 
 /* *** library symbols visibility *** */
@@ -2236,7 +2479,7 @@ HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity,
 /** HUF_compress4X_wksp() :
  *  Same as HUF_compress2(), but uses externally allocated `workSpace`.
  * `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */
-#define HUF_WORKSPACE_SIZE (6 << 10)
+#define HUF_WORKSPACE_SIZE ((6 << 10) + 256)
 #define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32))
 HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
                                      const void* src, size_t srcSize,
@@ -2257,6 +2500,8 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
 
 /* *** Dependencies *** */
 /**** skipping file: mem.h ****/
+#define FSE_STATIC_LINKING_ONLY
+/**** skipping file: fse.h ****/
 
 
 /* *** Constants *** */
@@ -2279,12 +2524,16 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
 #define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
 
 /* static allocation of HUF's Compression Table */
+/* this is a private definition, just exposed for allocation and strict aliasing purpose. never EVER access its members directly */
+struct HUF_CElt_s {
+  U16  val;
+  BYTE nbBits;
+};   /* typedef'd to HUF_CElt */
+typedef struct HUF_CElt_s HUF_CElt;   /* consider it an incomplete type */
 #define HUF_CTABLE_SIZE_U32(maxSymbolValue)   ((maxSymbolValue)+1)   /* Use tables of U32, for proper alignment */
 #define HUF_CTABLE_SIZE(maxSymbolValue)       (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32))
 #define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \
-    U32 name##hb[HUF_CTABLE_SIZE_U32(maxSymbolValue)]; \
-    void* name##hv = &(name##hb); \
-    HUF_CElt* name = (HUF_CElt*)(name##hv)   /* no final ; */
+    HUF_CElt name[HUF_CTABLE_SIZE_U32(maxSymbolValue)] /* no final ; */
 
 /* static allocation of HUF's DTable */
 typedef U32 HUF_DTable;
@@ -2330,11 +2579,12 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
  *  or to save and regenerate 'CTable' using external methods.
  */
 unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
-typedef struct HUF_CElt_s HUF_CElt;   /* incomplete type */
 size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits);   /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */
 size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
+size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize);
 size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
 size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
+int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
 
 typedef enum {
    HUF_repeat_none,  /**< Cannot use the previous table */
@@ -2371,6 +2621,19 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize,
                      U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
                      const void* src, size_t srcSize);
 
+/*! HUF_readStats_wksp() :
+ * Same as HUF_readStats() but takes an external workspace which must be
+ * 4-byte aligned and its size must be >= HUF_READ_STATS_WORKSPACE_SIZE.
+ * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
+ */
+#define HUF_READ_STATS_WORKSPACE_SIZE_U32 FSE_DECOMPRESS_WKSP_SIZE_U32(6, HUF_TABLELOG_MAX-1)
+#define HUF_READ_STATS_WORKSPACE_SIZE (HUF_READ_STATS_WORKSPACE_SIZE_U32 * sizeof(unsigned))
+size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize,
+                          U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
+                          const void* src, size_t srcSize,
+                          void* workspace, size_t wkspSize,
+                          int bmi2);
+
 /** HUF_readCTable() :
  *  Loading a CTable saved with HUF_writeCTable() */
 size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights);
@@ -2405,7 +2668,7 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize);
  *  a required workspace size greater than that specified in the following
  *  macro.
  */
-#define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10)
+#define HUF_DECOMPRESS_WORKSPACE_SIZE ((2 << 10) + (1 << 9))
 #define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
 
 #ifndef HUF_FORCE_DECOMPRESS_X2
@@ -2477,6 +2740,9 @@ size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstS
 #endif
 size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
 size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2);
+#endif
 
 #endif /* HUF_STATIC_LINKING_ONLY */
 
@@ -2501,8 +2767,31 @@ const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
 /*-**************************************************************
 *  FSE NCount encoding-decoding
 ****************************************************************/
-size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
-                 const void* headerBuffer, size_t hbSize)
+static U32 FSE_ctz(U32 val)
+{
+    assert(val != 0);
+    {
+#   if defined(_MSC_VER)   /* Visual */
+        unsigned long r=0;
+        return _BitScanForward(&r, val) ? (unsigned)r : 0;
+#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* GCC Intrinsic */
+        return __builtin_ctz(val);
+#   elif defined(__ICCARM__)    /* IAR Intrinsic */
+        return __CTZ(val);
+#   else   /* Software version */
+        U32 count = 0;
+        while ((val & 1) == 0) {
+            val >>= 1;
+            ++count;
+        }
+        return count;
+#   endif
+    }
+}
+
+FORCE_INLINE_TEMPLATE
+size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+                           const void* headerBuffer, size_t hbSize)
 {
     const BYTE* const istart = (const BYTE*) headerBuffer;
     const BYTE* const iend = istart + hbSize;
@@ -2513,23 +2802,23 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
     U32 bitStream;
     int bitCount;
     unsigned charnum = 0;
+    unsigned const maxSV1 = *maxSVPtr + 1;
     int previous0 = 0;
 
-    if (hbSize < 4) {
-        /* This function only works when hbSize >= 4 */
-        char buffer[4];
-        memset(buffer, 0, sizeof(buffer));
-        memcpy(buffer, headerBuffer, hbSize);
+    if (hbSize < 8) {
+        /* This function only works when hbSize >= 8 */
+        char buffer[8] = {0};
+        ZSTD_memcpy(buffer, headerBuffer, hbSize);
         {   size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr,
                                                     buffer, sizeof(buffer));
             if (FSE_isError(countSize)) return countSize;
             if (countSize > hbSize) return ERROR(corruption_detected);
             return countSize;
     }   }
-    assert(hbSize >= 4);
+    assert(hbSize >= 8);
 
     /* init */
-    memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0]));   /* all symbols not present in NCount have a frequency of 0 */
+    ZSTD_memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0]));   /* all symbols not present in NCount have a frequency of 0 */
     bitStream = MEM_readLE32(ip);
     nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */
     if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
@@ -2540,36 +2829,58 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
     threshold = 1<<nbBits;
     nbBits++;
 
-    while ((remaining>1) & (charnum<=*maxSVPtr)) {
+    for (;;) {
         if (previous0) {
-            unsigned n0 = charnum;
-            while ((bitStream & 0xFFFF) == 0xFFFF) {
-                n0 += 24;
-                if (ip < iend-5) {
-                    ip += 2;
-                    bitStream = MEM_readLE32(ip) >> bitCount;
+            /* Count the number of repeats. Each time the
+             * 2-bit repeat code is 0b11 there is another
+             * repeat.
+             * Avoid UB by setting the high bit to 1.
+             */
+            int repeats = FSE_ctz(~bitStream | 0x80000000) >> 1;
+            while (repeats >= 12) {
+                charnum += 3 * 12;
+                if (LIKELY(ip <= iend-7)) {
+                    ip += 3;
                 } else {
-                    bitStream >>= 16;
-                    bitCount   += 16;
-            }   }
-            while ((bitStream & 3) == 3) {
-                n0 += 3;
-                bitStream >>= 2;
-                bitCount += 2;
+                    bitCount -= (int)(8 * (iend - 7 - ip));
+                    bitCount &= 31;
+                    ip = iend - 4;
+                }
+                bitStream = MEM_readLE32(ip) >> bitCount;
+                repeats = FSE_ctz(~bitStream | 0x80000000) >> 1;
             }
-            n0 += bitStream & 3;
+            charnum += 3 * repeats;
+            bitStream >>= 2 * repeats;
+            bitCount += 2 * repeats;
+
+            /* Add the final repeat which isn't 0b11. */
+            assert((bitStream & 3) < 3);
+            charnum += bitStream & 3;
             bitCount += 2;
-            if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
-            while (charnum < n0) normalizedCounter[charnum++] = 0;
-            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
+
+            /* This is an error, but break and return an error
+             * at the end, because returning out of a loop makes
+             * it harder for the compiler to optimize.
+             */
+            if (charnum >= maxSV1) break;
+
+            /* We don't need to set the normalized count to 0
+             * because we already memset the whole buffer to 0.
+             */
+
+            if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
                 assert((bitCount >> 3) <= 3); /* For first condition to work */
                 ip += bitCount>>3;
                 bitCount &= 7;
-                bitStream = MEM_readLE32(ip) >> bitCount;
             } else {
-                bitStream >>= 2;
-        }   }
-        {   int const max = (2*threshold-1) - remaining;
+                bitCount -= (int)(8 * (iend - 4 - ip));
+                bitCount &= 31;
+                ip = iend - 4;
+            }
+            bitStream = MEM_readLE32(ip) >> bitCount;
+        }
+        {
+            int const max = (2*threshold-1) - remaining;
             int count;
 
             if ((bitStream & (threshold-1)) < (U32)max) {
@@ -2582,24 +2893,43 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
             }
 
             count--;   /* extra accuracy */
-            remaining -= count < 0 ? -count : count;   /* -1 means +1 */
+            /* When it matters (small blocks), this is a
+             * predictable branch, because we don't use -1.
+             */
+            if (count >= 0) {
+                remaining -= count;
+            } else {
+                assert(count == -1);
+                remaining += count;
+            }
             normalizedCounter[charnum++] = (short)count;
             previous0 = !count;
-            while (remaining < threshold) {
-                nbBits--;
-                threshold >>= 1;
+
+            assert(threshold > 1);
+            if (remaining < threshold) {
+                /* This branch can be folded into the
+                 * threshold update condition because we
+                 * know that threshold > 1.
+                 */
+                if (remaining <= 1) break;
+                nbBits = BIT_highbit32(remaining) + 1;
+                threshold = 1 << (nbBits - 1);
             }
+            if (charnum >= maxSV1) break;
 
-            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
+            if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
                 ip += bitCount>>3;
                 bitCount &= 7;
             } else {
                 bitCount -= (int)(8 * (iend - 4 - ip));
+                bitCount &= 31;
                 ip = iend - 4;
             }
-            bitStream = MEM_readLE32(ip) >> (bitCount & 31);
-    }   }   /* while ((remaining>1) & (charnum<=*maxSVPtr)) */
+            bitStream = MEM_readLE32(ip) >> bitCount;
+    }   }
     if (remaining != 1) return ERROR(corruption_detected);
+    /* Only possible when there are too many zeros. */
+    if (charnum > maxSV1) return ERROR(maxSymbolValue_tooSmall);
     if (bitCount > 32) return ERROR(corruption_detected);
     *maxSVPtr = charnum-1;
 
@@ -2607,6 +2937,43 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
     return ip-istart;
 }
 
+/* Avoids the FORCE_INLINE of the _body() function. */
+static size_t FSE_readNCount_body_default(
+        short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+        const void* headerBuffer, size_t hbSize)
+{
+    return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
+}
+
+#if DYNAMIC_BMI2
+TARGET_ATTRIBUTE("bmi2") static size_t FSE_readNCount_body_bmi2(
+        short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+        const void* headerBuffer, size_t hbSize)
+{
+    return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
+}
+#endif
+
+size_t FSE_readNCount_bmi2(
+        short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+        const void* headerBuffer, size_t hbSize, int bmi2)
+{
+#if DYNAMIC_BMI2
+    if (bmi2) {
+        return FSE_readNCount_body_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
+    }
+#endif
+    (void)bmi2;
+    return FSE_readNCount_body_default(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
+}
+
+size_t FSE_readNCount(
+        short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+        const void* headerBuffer, size_t hbSize)
+{
+    return FSE_readNCount_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize, /* bmi2 */ 0);
+}
+
 
 /*! HUF_readStats() :
     Read compact Huffman tree, saved by HUF_writeCTable().
@@ -2618,6 +2985,17 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
 size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
                      U32* nbSymbolsPtr, U32* tableLogPtr,
                      const void* src, size_t srcSize)
+{
+    U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
+    return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* bmi2 */ 0);
+}
+
+FORCE_INLINE_TEMPLATE size_t
+HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                   U32* nbSymbolsPtr, U32* tableLogPtr,
+                   const void* src, size_t srcSize,
+                   void* workSpace, size_t wkspSize,
+                   int bmi2)
 {
     U32 weightTotal;
     const BYTE* ip = (const BYTE*) src;
@@ -2626,7 +3004,7 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
 
     if (!srcSize) return ERROR(srcSize_wrong);
     iSize = ip[0];
-    /* memset(huffWeight, 0, hwSize);   *//* is not necessary, even though some analyzer complain ... */
+    /* ZSTD_memset(huffWeight, 0, hwSize);   *//* is not necessary, even though some analyzer complain ... */
 
     if (iSize >= 128) {  /* special header */
         oSize = iSize - 127;
@@ -2640,14 +3018,14 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
                 huffWeight[n+1] = ip[n/2] & 15;
     }   }   }
     else  {   /* header compressed with FSE (normal case) */
-        FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)];  /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */
         if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
-        oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6);   /* max (hwSize-1) values decoded, as last one is implied */
+        /* max (hwSize-1) values decoded, as last one is implied */
+        oSize = FSE_decompress_wksp_bmi2(huffWeight, hwSize-1, ip+1, iSize, 6, workSpace, wkspSize, bmi2);
         if (FSE_isError(oSize)) return oSize;
     }
 
     /* collect weight stats */
-    memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
+    ZSTD_memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
     weightTotal = 0;
     {   U32 n; for (n=0; n<oSize; n++) {
             if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected);
@@ -2677,10 +3055,44 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
     *nbSymbolsPtr = (U32)(oSize+1);
     return iSize+1;
 }
-/**** ended inlining entropy_common.c ****/
-/**** start inlining error_private.c ****/
+
+/* Avoids the FORCE_INLINE of the _body() function. */
+static size_t HUF_readStats_body_default(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                     U32* nbSymbolsPtr, U32* tableLogPtr,
+                     const void* src, size_t srcSize,
+                     void* workSpace, size_t wkspSize)
+{
+    return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 0);
+}
+
+#if DYNAMIC_BMI2
+static TARGET_ATTRIBUTE("bmi2") size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                     U32* nbSymbolsPtr, U32* tableLogPtr,
+                     const void* src, size_t srcSize,
+                     void* workSpace, size_t wkspSize)
+{
+    return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 1);
+}
+#endif
+
+size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                     U32* nbSymbolsPtr, U32* tableLogPtr,
+                     const void* src, size_t srcSize,
+                     void* workSpace, size_t wkspSize,
+                     int bmi2)
+{
+#if DYNAMIC_BMI2
+    if (bmi2) {
+        return HUF_readStats_body_bmi2(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
+    }
+#endif
+    (void)bmi2;
+    return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
+}
+/**** ended inlining common/entropy_common.c ****/
+/**** start inlining common/error_private.c ****/
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -2728,16 +3140,18 @@ const char* ERR_getErrorString(ERR_enum code)
         /* following error codes are not stable and may be removed or changed in a future version */
     case PREFIX(frameIndex_tooLarge): return "Frame index is too large";
     case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking";
+    case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong";
+    case PREFIX(srcBuffer_wrong): return "Source buffer is wrong";
     case PREFIX(maxCode):
     default: return notErrorCode;
     }
 #endif
 }
-/**** ended inlining error_private.c ****/
-/**** start inlining fse_decompress.c ****/
+/**** ended inlining common/error_private.c ****/
+/**** start inlining common/fse_decompress.c ****/
 /* ******************************************************************
  * FSE : Finite State Entropy decoder
- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  *
  *  You can contact the author at :
  *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -2753,13 +3167,14 @@ const char* ERR_getErrorString(ERR_enum code)
 /* **************************************************************
 *  Includes
 ****************************************************************/
-#include <stdlib.h>     /* malloc, free, qsort */
-#include <string.h>     /* memcpy, memset */
+/**** skipping file: debug.h ****/
 /**** skipping file: bitstream.h ****/
 /**** skipping file: compiler.h ****/
 #define FSE_STATIC_LINKING_ONLY
 /**** skipping file: fse.h ****/
 /**** skipping file: error_private.h ****/
+#define ZSTD_DEPS_NEED_MALLOC
+/**** skipping file: zstd_deps.h ****/
 
 
 /* **************************************************************
@@ -2768,11 +3183,6 @@ const char* ERR_getErrorString(ERR_enum code)
 #define FSE_isError ERR_isError
 #define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)   /* use only *after* variable declarations */
 
-/* check and forward error code */
-#ifndef CHECK_F
-#define CHECK_F(f) { size_t const e = f; if (FSE_isError(e)) return e; }
-#endif
-
 
 /* **************************************************************
 *  Templates
@@ -2801,25 +3211,27 @@ const char* ERR_getErrorString(ERR_enum code)
 FSE_DTable* FSE_createDTable (unsigned tableLog)
 {
     if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
-    return (FSE_DTable*)malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
+    return (FSE_DTable*)ZSTD_malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
 }
 
 void FSE_freeDTable (FSE_DTable* dt)
 {
-    free(dt);
+    ZSTD_free(dt);
 }
 
-size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
 {
     void* const tdPtr = dt+1;   /* because *dt is unsigned, 32-bits aligned on 32-bits */
     FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr);
-    U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
+    U16* symbolNext = (U16*)workSpace;
+    BYTE* spread = (BYTE*)(symbolNext + maxSymbolValue + 1);
 
     U32 const maxSV1 = maxSymbolValue + 1;
     U32 const tableSize = 1 << tableLog;
     U32 highThreshold = tableSize-1;
 
     /* Sanity Checks */
+    if (FSE_BUILD_DTABLE_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(maxSymbolValue_tooLarge);
     if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
     if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
 
@@ -2837,11 +3249,57 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned
                     if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
                     symbolNext[s] = normalizedCounter[s];
         }   }   }
-        memcpy(dt, &DTableH, sizeof(DTableH));
+        ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
     }
 
     /* Spread symbols */
-    {   U32 const tableMask = tableSize-1;
+    if (highThreshold == tableSize - 1) {
+        size_t const tableMask = tableSize-1;
+        size_t const step = FSE_TABLESTEP(tableSize);
+        /* First lay down the symbols in order.
+         * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
+         * misses since small blocks generally have small table logs, so nearly
+         * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
+         * our buffer to handle the over-write.
+         */
+        {
+            U64 const add = 0x0101010101010101ull;
+            size_t pos = 0;
+            U64 sv = 0;
+            U32 s;
+            for (s=0; s<maxSV1; ++s, sv += add) {
+                int i;
+                int const n = normalizedCounter[s];
+                MEM_write64(spread + pos, sv);
+                for (i = 8; i < n; i += 8) {
+                    MEM_write64(spread + pos + i, sv);
+                }
+                pos += n;
+            }
+        }
+        /* Now we spread those positions across the table.
+         * The benefit of doing it in two stages is that we avoid the the
+         * variable size inner loop, which caused lots of branch misses.
+         * Now we can run through all the positions without any branch misses.
+         * We unroll the loop twice, since that is what emperically worked best.
+         */
+        {
+            size_t position = 0;
+            size_t s;
+            size_t const unroll = 2;
+            assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
+            for (s = 0; s < (size_t)tableSize; s += unroll) {
+                size_t u;
+                for (u = 0; u < unroll; ++u) {
+                    size_t const uPosition = (position + (u * step)) & tableMask;
+                    tableDecode[uPosition].symbol = spread[s + u];
+                }
+                position = (position + (unroll * step)) & tableMask;
+            }
+            assert(position == 0);
+        }
+    } else {
+        U32 const tableMask = tableSize-1;
         U32 const step = FSE_TABLESTEP(tableSize);
         U32 s, position = 0;
         for (s=0; s<maxSV1; s++) {
@@ -2866,6 +3324,11 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned
     return 0;
 }
 
+size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
+{
+    return FSE_buildDTable_internal(dt, normalizedCounter, maxSymbolValue, tableLog, workSpace, wkspSize);
+}
+
 
 #ifndef FSE_COMMONDEFS_ONLY
 
@@ -2993,3869 +3456,5688 @@ size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
 }
 
 
-size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog)
+size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
+{
+    return FSE_decompress_wksp_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, /* bmi2 */ 0);
+}
+
+typedef struct {
+    short ncount[FSE_MAX_SYMBOL_VALUE + 1];
+    FSE_DTable dtable[1]; /* Dynamically sized */
+} FSE_DecompressWksp;
+
+
+FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
+        void* dst, size_t dstCapacity,
+        const void* cSrc, size_t cSrcSize,
+        unsigned maxLog, void* workSpace, size_t wkspSize,
+        int bmi2)
 {
     const BYTE* const istart = (const BYTE*)cSrc;
     const BYTE* ip = istart;
-    short counting[FSE_MAX_SYMBOL_VALUE+1];
     unsigned tableLog;
     unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
+    FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace;
+
+    DEBUG_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0);
+    if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC);
 
     /* normal FSE decoding mode */
-    size_t const NCountLength = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
-    if (FSE_isError(NCountLength)) return NCountLength;
-    //if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong);   /* too small input size; supposed to be already checked in NCountLength, only remaining case : NCountLength==cSrcSize */
-    if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
-    ip += NCountLength;
-    cSrcSize -= NCountLength;
+    {
+        size_t const NCountLength = FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
+        if (FSE_isError(NCountLength)) return NCountLength;
+        if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
+        assert(NCountLength <= cSrcSize);
+        ip += NCountLength;
+        cSrcSize -= NCountLength;
+    }
+
+    if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge);
+    workSpace = wksp->dtable + FSE_DTABLE_SIZE_U32(tableLog);
+    wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
+
+    CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) );
+
+    {
+        const void* ptr = wksp->dtable;
+        const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
+        const U32 fastMode = DTableH->fastMode;
+
+        /* select fast mode (static) */
+        if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 1);
+        return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 0);
+    }
+}
+
+/* Avoids the FORCE_INLINE of the _body() function. */
+static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
+{
+    return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 0);
+}
 
-    CHECK_F( FSE_buildDTable (workSpace, counting, maxSymbolValue, tableLog) );
+#if DYNAMIC_BMI2
+TARGET_ATTRIBUTE("bmi2") static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
+{
+    return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1);
+}
+#endif
 
-    return FSE_decompress_usingDTable (dst, dstCapacity, ip, cSrcSize, workSpace);   /* always return, even if it is an error code */
+size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2)
+{
+#if DYNAMIC_BMI2
+    if (bmi2) {
+        return FSE_decompress_wksp_body_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
+    }
+#endif
+    (void)bmi2;
+    return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
 }
 
 
 typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
 
+#ifndef ZSTD_NO_UNUSED_FUNCTIONS
+size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) {
+    U32 wksp[FSE_BUILD_DTABLE_WKSP_SIZE_U32(FSE_TABLELOG_ABSOLUTE_MAX, FSE_MAX_SYMBOL_VALUE)];
+    return FSE_buildDTable_wksp(dt, normalizedCounter, maxSymbolValue, tableLog, wksp, sizeof(wksp));
+}
+
 size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize)
 {
-    DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */
-    return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, dt, FSE_MAX_TABLELOG);
+    /* Static analyzer seems unable to understand this table will be properly initialized later */
+    U32 wksp[FSE_DECOMPRESS_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
+    return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, FSE_MAX_TABLELOG, wksp, sizeof(wksp));
 }
-
+#endif
 
 
 #endif   /* FSE_COMMONDEFS_ONLY */
-/**** ended inlining fse_decompress.c ****/
-/**** start inlining xxhash.c ****/
+/**** ended inlining common/fse_decompress.c ****/
+/**** start inlining common/zstd_common.c ****/
 /*
- *  xxHash - Fast Hash algorithm
- *  Copyright (c) 2012-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
  *
- *  You can contact the author at :
- *  - xxHash homepage: http://www.xxhash.com
- *  - xxHash source repository : https://github.com/Cyan4973/xxHash
- * 
  * This source code is licensed under both the BSD-style license (found in the
  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  * in the COPYING file in the root directory of this source tree).
  * You may select, at your option, one of the above-listed licenses.
-*/
-
-
-/* *************************************
-*  Tuning parameters
-***************************************/
-/*!XXH_FORCE_MEMORY_ACCESS :
- * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
- * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
- * The below switch allow to select different access method for improved performance.
- * Method 0 (default) : use `memcpy()`. Safe and portable.
- * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
- *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
- * Method 2 : direct access. This method doesn't depend on compiler but violate C standard.
- *            It can generate buggy code on targets which do not support unaligned memory accesses.
- *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
- * See http://stackoverflow.com/a/32095106/646947 for details.
- * Prefer these methods in priority order (0 > 1 > 2)
  */
-#ifndef XXH_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
-#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
-#    define XXH_FORCE_MEMORY_ACCESS 2
-#  elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
-  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) || \
-  defined(__ICCARM__)
-#    define XXH_FORCE_MEMORY_ACCESS 1
-#  endif
-#endif
 
-/*!XXH_ACCEPT_NULL_INPUT_POINTER :
- * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
- * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
- * By default, this option is disabled. To enable it, uncomment below define :
- */
-/* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */
 
-/*!XXH_FORCE_NATIVE_FORMAT :
- * By default, xxHash library provides endian-independent Hash values, based on little-endian convention.
- * Results are therefore identical for little-endian and big-endian CPU.
- * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
- * Should endian-independence be of no importance for your application, you may set the #define below to 1,
- * to improve speed for Big-endian CPU.
- * This option has no impact on Little_Endian CPU.
- */
-#ifndef XXH_FORCE_NATIVE_FORMAT   /* can be defined externally */
-#  define XXH_FORCE_NATIVE_FORMAT 0
-#endif
 
-/*!XXH_FORCE_ALIGN_CHECK :
- * This is a minor performance trick, only useful with lots of very small keys.
- * It means : check for aligned/unaligned input.
- * The check costs one initial branch per hash; set to 0 when the input data
- * is guaranteed to be aligned.
+/*-*************************************
+*  Dependencies
+***************************************/
+#define ZSTD_DEPS_NEED_MALLOC
+/**** skipping file: zstd_deps.h ****/
+/**** skipping file: error_private.h ****/
+/**** start inlining zstd_internal.h ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
  */
-#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
-#  if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
-#    define XXH_FORCE_ALIGN_CHECK 0
-#  else
-#    define XXH_FORCE_ALIGN_CHECK 1
-#  endif
-#endif
 
+#ifndef ZSTD_CCOMMON_H_MODULE
+#define ZSTD_CCOMMON_H_MODULE
 
-/* *************************************
-*  Includes & Memory related functions
-***************************************/
-/* Modify the local functions below should you wish to use some other memory routines */
-/* for malloc(), free() */
-#include <stdlib.h>
-#include <stddef.h>     /* size_t */
-static void* XXH_malloc(size_t s) { return malloc(s); }
-static void  XXH_free  (void* p)  { free(p); }
-/* for memcpy() */
-#include <string.h>
-static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
+/* this module contains definitions which must be identical
+ * across compression, decompression and dictBuilder.
+ * It also contains a few functions useful to at least 2 of them
+ * and which benefit from being inlined */
 
-#ifndef XXH_STATIC_LINKING_ONLY
-#  define XXH_STATIC_LINKING_ONLY
+/*-*************************************
+*  Dependencies
+***************************************/
+#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON)
+#include <arm_neon.h>
 #endif
-/**** start inlining xxhash.h ****/
+/**** skipping file: compiler.h ****/
+/**** skipping file: mem.h ****/
+/**** skipping file: debug.h ****/
+/**** skipping file: error_private.h ****/
+#define ZSTD_STATIC_LINKING_ONLY
+/**** start inlining ../zstd.h ****/
 /*
- * xxHash - Extremely Fast Hash algorithm
- * Header File
- * Copyright (c) 2012-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
  *
- * You can contact the author at :
- * - xxHash source repository : https://github.com/Cyan4973/xxHash
- * 
  * This source code is licensed under both the BSD-style license (found in the
  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  * in the COPYING file in the root directory of this source tree).
  * You may select, at your option, one of the above-listed licenses.
-*/
-
-/* Notice extracted from xxHash homepage :
-
-xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
-It also successfully passes all tests from the SMHasher suite.
-
-Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
-
-Name            Speed       Q.Score   Author
-xxHash          5.4 GB/s     10
-CrapWow         3.2 GB/s      2       Andrew
-MumurHash 3a    2.7 GB/s     10       Austin Appleby
-SpookyHash      2.0 GB/s     10       Bob Jenkins
-SBox            1.4 GB/s      9       Bret Mulvey
-Lookup3         1.2 GB/s      9       Bob Jenkins
-SuperFastHash   1.2 GB/s      1       Paul Hsieh
-CityHash64      1.05 GB/s    10       Pike & Alakuijala
-FNV             0.55 GB/s     5       Fowler, Noll, Vo
-CRC32           0.43 GB/s     9
-MD5-32          0.33 GB/s    10       Ronald L. Rivest
-SHA1-32         0.28 GB/s    10
-
-Q.Score is a measure of quality of the hash function.
-It depends on successfully passing SMHasher test set.
-10 is a perfect score.
-
-A 64-bits version, named XXH64, is available since r35.
-It offers much better speed, but for 64-bits applications only.
-Name     Speed on 64 bits    Speed on 32 bits
-XXH64       13.8 GB/s            1.9 GB/s
-XXH32        6.8 GB/s            6.0 GB/s
-*/
-
+ */
 #if defined (__cplusplus)
 extern "C" {
 #endif
 
-#ifndef XXHASH_H_5627135585666179
-#define XXHASH_H_5627135585666179 1
-
+#ifndef ZSTD_H_235446
+#define ZSTD_H_235446
 
-/* ****************************
-*  Definitions
-******************************/
+/* ======   Dependency   ======*/
+#include <limits.h>   /* INT_MAX */
 #include <stddef.h>   /* size_t */
-typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
 
 
-/* ****************************
-*  API modifier
-******************************/
-/** XXH_PRIVATE_API
-*   This is useful if you want to include xxhash functions in `static` mode
-*   in order to inline them, and remove their symbol from the public list.
-*   Methodology :
-*     #define XXH_PRIVATE_API
-*     #include "xxhash.h"
-*   `xxhash.c` is automatically included.
-*   It's not useful to compile and link it as a separate module anymore.
-*/
-#ifdef XXH_PRIVATE_API
-#  ifndef XXH_STATIC_LINKING_ONLY
-#    define XXH_STATIC_LINKING_ONLY
-#  endif
-#  if defined(__GNUC__)
-#    define XXH_PUBLIC_API static __inline __attribute__((unused))
-#  elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
-#    define XXH_PUBLIC_API static inline
-#  elif defined(_MSC_VER)
-#    define XXH_PUBLIC_API static __inline
-#  else
-#    define XXH_PUBLIC_API static   /* this version may generate warnings for unused static functions; disable the relevant warning */
+/* =====   ZSTDLIB_API : control library symbols visibility   ===== */
+#ifndef ZSTDLIB_VISIBILITY
+#  if defined(__GNUC__) && (__GNUC__ >= 4)
+#    define ZSTDLIB_VISIBILITY __attribute__ ((visibility ("default")))
+#  else
+#    define ZSTDLIB_VISIBILITY
 #  endif
+#endif
+#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
+#  define ZSTDLIB_API __declspec(dllexport) ZSTDLIB_VISIBILITY
+#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
+#  define ZSTDLIB_API __declspec(dllimport) ZSTDLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
 #else
-#  define XXH_PUBLIC_API   /* do nothing */
-#endif /* XXH_PRIVATE_API */
-
-/*!XXH_NAMESPACE, aka Namespace Emulation :
-
-If you want to include _and expose_ xxHash functions from within your own library,
-but also want to avoid symbol collisions with another library which also includes xxHash,
-
-you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library
-with the value of XXH_NAMESPACE (so avoid to keep it NULL and avoid numeric values).
-
-Note that no change is required within the calling program as long as it includes `xxhash.h` :
-regular symbol name will be automatically translated by this header.
-*/
-#ifdef XXH_NAMESPACE
-#  define XXH_CAT(A,B) A##B
-#  define XXH_NAME2(A,B) XXH_CAT(A,B)
-#  define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
-#  define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
-#  define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
-#  define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
-#  define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
-#  define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
-#  define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
-#  define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
-#  define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
-#  define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
-#  define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
-#  define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
-#  define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
-#  define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
-#  define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
-#  define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
-#  define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
-#  define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
-#  define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
+#  define ZSTDLIB_API ZSTDLIB_VISIBILITY
 #endif
 
 
-/* *************************************
-*  Version
-***************************************/
-#define XXH_VERSION_MAJOR    0
-#define XXH_VERSION_MINOR    6
-#define XXH_VERSION_RELEASE  2
-#define XXH_VERSION_NUMBER  (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
-XXH_PUBLIC_API unsigned XXH_versionNumber (void);
-
+/*******************************************************************************
+  Introduction
 
-/* ****************************
-*  Simple Hash Functions
-******************************/
-typedef unsigned int       XXH32_hash_t;
-typedef unsigned long long XXH64_hash_t;
+  zstd, short for Zstandard, is a fast lossless compression algorithm, targeting
+  real-time compression scenarios at zlib-level and better compression ratios.
+  The zstd compression library provides in-memory compression and decompression
+  functions.
 
-XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed);
-XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed);
+  The library supports regular compression levels from 1 up to ZSTD_maxCLevel(),
+  which is currently 22. Levels >= 20, labeled `--ultra`, should be used with
+  caution, as they require more memory. The library also offers negative
+  compression levels, which extend the range of speed vs. ratio preferences.
+  The lower the level, the faster the speed (at the cost of compression).
 
-/*!
-XXH32() :
-    Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input".
-    The memory between input & input+length must be valid (allocated and read-accessible).
-    "seed" can be used to alter the result predictably.
-    Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
-XXH64() :
-    Calculate the 64-bits hash of sequence of length "len" stored at memory address "input".
-    "seed" can be used to alter the result predictably.
-    This function runs 2x faster on 64-bits systems, but slower on 32-bits systems (see benchmark).
-*/
+  Compression can be done in:
+    - a single step (described as Simple API)
+    - a single step, reusing a context (described as Explicit context)
+    - unbounded multiple steps (described as Streaming compression)
 
+  The compression ratio achievable on small data can be highly improved using
+  a dictionary. Dictionary compression can be performed in:
+    - a single step (described as Simple dictionary API)
+    - a single step, reusing a dictionary (described as Bulk-processing
+      dictionary API)
 
-/* ****************************
-*  Streaming Hash Functions
-******************************/
-typedef struct XXH32_state_s XXH32_state_t;   /* incomplete type */
-typedef struct XXH64_state_s XXH64_state_t;   /* incomplete type */
+  Advanced experimental functions can be accessed using
+  `#define ZSTD_STATIC_LINKING_ONLY` before including zstd.h.
 
-/*! State allocation, compatible with dynamic libraries */
+  Advanced experimental APIs should never be used with a dynamically-linked
+  library. They are not "stable"; their definitions or signatures may change in
+  the future. Only static linking is allowed.
+*******************************************************************************/
 
-XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
-XXH_PUBLIC_API XXH_errorcode  XXH32_freeState(XXH32_state_t* statePtr);
+/*------   Version   ------*/
+#define ZSTD_VERSION_MAJOR    1
+#define ZSTD_VERSION_MINOR    5
+#define ZSTD_VERSION_RELEASE  0
+#define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
 
-XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
-XXH_PUBLIC_API XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
+/*! ZSTD_versionNumber() :
+ *  Return runtime library version, the value is (MAJOR*100*100 + MINOR*100 + RELEASE). */
+ZSTDLIB_API unsigned ZSTD_versionNumber(void);
 
+#define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE
+#define ZSTD_QUOTE(str) #str
+#define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str)
+#define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION)
 
-/* hash streaming */
+/*! ZSTD_versionString() :
+ *  Return runtime library version, like "1.4.5". Requires v1.3.0+. */
+ZSTDLIB_API const char* ZSTD_versionString(void);
 
-XXH_PUBLIC_API XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, unsigned int seed);
-XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
-XXH_PUBLIC_API XXH32_hash_t  XXH32_digest (const XXH32_state_t* statePtr);
+/* *************************************
+ *  Default constant
+ ***************************************/
+#ifndef ZSTD_CLEVEL_DEFAULT
+#  define ZSTD_CLEVEL_DEFAULT 3
+#endif
 
-XXH_PUBLIC_API XXH_errorcode XXH64_reset  (XXH64_state_t* statePtr, unsigned long long seed);
-XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
-XXH_PUBLIC_API XXH64_hash_t  XXH64_digest (const XXH64_state_t* statePtr);
+/* *************************************
+ *  Constants
+ ***************************************/
 
-/*
-These functions generate the xxHash of an input provided in multiple segments.
-Note that, for small input, they are slower than single-call functions, due to state management.
-For small input, prefer `XXH32()` and `XXH64()` .
+/* All magic numbers are supposed read/written to/from files/memory using little-endian convention */
+#define ZSTD_MAGICNUMBER            0xFD2FB528    /* valid since v0.8.0 */
+#define ZSTD_MAGIC_DICTIONARY       0xEC30A437    /* valid since v0.7.0 */
+#define ZSTD_MAGIC_SKIPPABLE_START  0x184D2A50    /* all 16 values, from 0x184D2A50 to 0x184D2A5F, signal the beginning of a skippable frame */
+#define ZSTD_MAGIC_SKIPPABLE_MASK   0xFFFFFFF0
 
-XXH state must first be allocated, using XXH*_createState() .
+#define ZSTD_BLOCKSIZELOG_MAX  17
+#define ZSTD_BLOCKSIZE_MAX     (1<<ZSTD_BLOCKSIZELOG_MAX)
 
-Start a new hash by initializing state with a seed, using XXH*_reset().
 
-Then, feed the hash state by calling XXH*_update() as many times as necessary.
-Obviously, input must be allocated and read accessible.
-The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
+/***************************************
+*  Simple API
+***************************************/
+/*! ZSTD_compress() :
+ *  Compresses `src` content as a single zstd compressed frame into already allocated `dst`.
+ *  Hint : compression runs faster if `dstCapacity` >=  `ZSTD_compressBound(srcSize)`.
+ *  @return : compressed size written into `dst` (<= `dstCapacity),
+ *            or an error code if it fails (which can be tested using ZSTD_isError()). */
+ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity,
+                            const void* src, size_t srcSize,
+                                  int compressionLevel);
 
-Finally, a hash value can be produced anytime, by using XXH*_digest().
-This function returns the nn-bits hash as an int or long long.
+/*! ZSTD_decompress() :
+ *  `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames.
+ *  `dstCapacity` is an upper bound of originalSize to regenerate.
+ *  If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data.
+ *  @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
+ *            or an errorCode if it fails (which can be tested using ZSTD_isError()). */
+ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity,
+                              const void* src, size_t compressedSize);
 
-It's still possible to continue inserting input into the hash state after a digest,
-and generate some new hashes later on, by calling again XXH*_digest().
+/*! ZSTD_getFrameContentSize() : requires v1.3.0+
+ *  `src` should point to the start of a ZSTD encoded frame.
+ *  `srcSize` must be at least as large as the frame header.
+ *            hint : any size >= `ZSTD_frameHeaderSize_max` is large enough.
+ *  @return : - decompressed size of `src` frame content, if known
+ *            - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
+ *            - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small)
+ *   note 1 : a 0 return value means the frame is valid but "empty".
+ *   note 2 : decompressed size is an optional field, it may not be present, typically in streaming mode.
+ *            When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size.
+ *            In which case, it's necessary to use streaming mode to decompress data.
+ *            Optionally, application can rely on some implicit limit,
+ *            as ZSTD_decompress() only needs an upper bound of decompressed size.
+ *            (For example, data could be necessarily cut into blocks <= 16 KB).
+ *   note 3 : decompressed size is always present when compression is completed using single-pass functions,
+ *            such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict().
+ *   note 4 : decompressed size can be very large (64-bits value),
+ *            potentially larger than what local system can handle as a single memory segment.
+ *            In which case, it's necessary to use streaming mode to decompress data.
+ *   note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified.
+ *            Always ensure return value fits within application's authorized limits.
+ *            Each application can set its own limits.
+ *   note 6 : This function replaces ZSTD_getDecompressedSize() */
+#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1)
+#define ZSTD_CONTENTSIZE_ERROR   (0ULL - 2)
+ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize);
 
-When done, free XXH state space if it was allocated dynamically.
-*/
+/*! ZSTD_getDecompressedSize() :
+ *  NOTE: This function is now obsolete, in favor of ZSTD_getFrameContentSize().
+ *  Both functions work the same way, but ZSTD_getDecompressedSize() blends
+ *  "empty", "unknown" and "error" results to the same return value (0),
+ *  while ZSTD_getFrameContentSize() gives them separate return values.
+ * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */
+ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
 
+/*! ZSTD_findFrameCompressedSize() : Requires v1.4.0+
+ * `src` should point to the start of a ZSTD frame or skippable frame.
+ * `srcSize` must be >= first frame size
+ * @return : the compressed size of the first frame starting at `src`,
+ *           suitable to pass as `srcSize` to `ZSTD_decompress` or similar,
+ *        or an error code if input is invalid */
+ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize);
 
-/* **************************
-*  Utils
-****************************/
-#if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L))   /* ! C99 */
-#  define restrict   /* disable restrict */
-#endif
 
-XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dst_state, const XXH32_state_t* restrict src_state);
-XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dst_state, const XXH64_state_t* restrict src_state);
+/*======  Helper functions  ======*/
+#define ZSTD_COMPRESSBOUND(srcSize)   ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0))  /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */
+ZSTDLIB_API size_t      ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */
+ZSTDLIB_API unsigned    ZSTD_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */
+ZSTDLIB_API const char* ZSTD_getErrorName(size_t code);     /*!< provides readable string from an error code */
+ZSTDLIB_API int         ZSTD_minCLevel(void);               /*!< minimum negative compression level allowed, requires v1.4.0+ */
+ZSTDLIB_API int         ZSTD_maxCLevel(void);               /*!< maximum compression level available */
+ZSTDLIB_API int         ZSTD_defaultCLevel(void);           /*!< default compression level, specified by ZSTD_CLEVEL_DEFAULT, requires v1.5.0+ */
 
 
-/* **************************
-*  Canonical representation
-****************************/
-/* Default result type for XXH functions are primitive unsigned 32 and 64 bits.
-*  The canonical representation uses human-readable write convention, aka big-endian (large digits first).
-*  These functions allow transformation of hash result into and from its canonical format.
-*  This way, hash values can be written into a file / memory, and remain comparable on different systems and programs.
-*/
-typedef struct { unsigned char digest[4]; } XXH32_canonical_t;
-typedef struct { unsigned char digest[8]; } XXH64_canonical_t;
+/***************************************
+*  Explicit context
+***************************************/
+/*= Compression context
+ *  When compressing many times,
+ *  it is recommended to allocate a context just once,
+ *  and re-use it for each successive compression operation.
+ *  This will make workload friendlier for system's memory.
+ *  Note : re-using context is just a speed / resource optimization.
+ *         It doesn't change the compression ratio, which remains identical.
+ *  Note 2 : In multi-threaded environments,
+ *         use one different context per thread for parallel execution.
+ */
+typedef struct ZSTD_CCtx_s ZSTD_CCtx;
+ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void);
+ZSTDLIB_API size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);  /* accept NULL pointer */
 
-XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
-XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
+/*! ZSTD_compressCCtx() :
+ *  Same as ZSTD_compress(), using an explicit ZSTD_CCtx.
+ *  Important : in order to behave similarly to `ZSTD_compress()`,
+ *  this function compresses at requested compression level,
+ *  __ignoring any other parameter__ .
+ *  If any advanced parameter was set using the advanced API,
+ *  they will all be reset. Only `compressionLevel` remains.
+ */
+ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
+                                     void* dst, size_t dstCapacity,
+                               const void* src, size_t srcSize,
+                                     int compressionLevel);
 
-XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
-XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
+/*= Decompression context
+ *  When decompressing many times,
+ *  it is recommended to allocate a context only once,
+ *  and re-use it for each successive compression operation.
+ *  This will make workload friendlier for system's memory.
+ *  Use one context per thread for parallel execution. */
+typedef struct ZSTD_DCtx_s ZSTD_DCtx;
+ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void);
+ZSTDLIB_API size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);  /* accept NULL pointer */
 
-#endif /* XXHASH_H_5627135585666179 */
+/*! ZSTD_decompressDCtx() :
+ *  Same as ZSTD_decompress(),
+ *  requires an allocated ZSTD_DCtx.
+ *  Compatible with sticky parameters.
+ */
+ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx,
+                                       void* dst, size_t dstCapacity,
+                                 const void* src, size_t srcSize);
 
 
+/*********************************************
+*  Advanced compression API (Requires v1.4.0+)
+**********************************************/
 
-/* ================================================================================================
-   This section contains definitions which are not guaranteed to remain stable.
-   They may change in future versions, becoming incompatible with a different version of the library.
-   They shall only be used with static linking.
-   Never use these definitions in association with dynamic linking !
-=================================================================================================== */
-#if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXH_STATIC_H_3543687687345)
-#define XXH_STATIC_H_3543687687345
+/* API design :
+ *   Parameters are pushed one by one into an existing context,
+ *   using ZSTD_CCtx_set*() functions.
+ *   Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame.
+ *   "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` !
+ *   __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ .
+ *
+ *   It's possible to reset all parameters to "default" using ZSTD_CCtx_reset().
+ *
+ *   This API supercedes all other "advanced" API entry points in the experimental section.
+ *   In the future, we expect to remove from experimental API entry points which are redundant with this API.
+ */
 
-/* These definitions are only meant to allow allocation of XXH state
-   statically, on stack, or in a struct for example.
-   Do not use members directly. */
 
-   struct XXH32_state_s {
-       unsigned total_len_32;
-       unsigned large_len;
-       unsigned v1;
-       unsigned v2;
-       unsigned v3;
-       unsigned v4;
-       unsigned mem32[4];   /* buffer defined as U32 for alignment */
-       unsigned memsize;
-       unsigned reserved;   /* never read nor write, will be removed in a future version */
-   };   /* typedef'd to XXH32_state_t */
+/* Compression strategies, listed from fastest to strongest */
+typedef enum { ZSTD_fast=1,
+               ZSTD_dfast=2,
+               ZSTD_greedy=3,
+               ZSTD_lazy=4,
+               ZSTD_lazy2=5,
+               ZSTD_btlazy2=6,
+               ZSTD_btopt=7,
+               ZSTD_btultra=8,
+               ZSTD_btultra2=9
+               /* note : new strategies _might_ be added in the future.
+                         Only the order (from fast to strong) is guaranteed */
+} ZSTD_strategy;
 
-   struct XXH64_state_s {
-       unsigned long long total_len;
-       unsigned long long v1;
-       unsigned long long v2;
-       unsigned long long v3;
-       unsigned long long v4;
-       unsigned long long mem64[4];   /* buffer defined as U64 for alignment */
-       unsigned memsize;
-       unsigned reserved[2];          /* never read nor write, will be removed in a future version */
-   };   /* typedef'd to XXH64_state_t */
+typedef enum {
 
+    /* compression parameters
+     * Note: When compressing with a ZSTD_CDict these parameters are superseded
+     * by the parameters used to construct the ZSTD_CDict.
+     * See ZSTD_CCtx_refCDict() for more info (superseded-by-cdict). */
+    ZSTD_c_compressionLevel=100, /* Set compression parameters according to pre-defined cLevel table.
+                              * Note that exact compression parameters are dynamically determined,
+                              * depending on both compression level and srcSize (when known).
+                              * Default level is ZSTD_CLEVEL_DEFAULT==3.
+                              * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT.
+                              * Note 1 : it's possible to pass a negative compression level.
+                              * Note 2 : setting a level does not automatically set all other compression parameters
+                              *   to default. Setting this will however eventually dynamically impact the compression
+                              *   parameters which have not been manually set. The manually set
+                              *   ones will 'stick'. */
+    /* Advanced compression parameters :
+     * It's possible to pin down compression parameters to some specific values.
+     * In which case, these values are no longer dynamically selected by the compressor */
+    ZSTD_c_windowLog=101,    /* Maximum allowed back-reference distance, expressed as power of 2.
+                              * This will set a memory budget for streaming decompression,
+                              * with larger values requiring more memory
+                              * and typically compressing more.
+                              * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX.
+                              * Special: value 0 means "use default windowLog".
+                              * Note: Using a windowLog greater than ZSTD_WINDOWLOG_LIMIT_DEFAULT
+                              *       requires explicitly allowing such size at streaming decompression stage. */
+    ZSTD_c_hashLog=102,      /* Size of the initial probe table, as a power of 2.
+                              * Resulting memory usage is (1 << (hashLog+2)).
+                              * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX.
+                              * Larger tables improve compression ratio of strategies <= dFast,
+                              * and improve speed of strategies > dFast.
+                              * Special: value 0 means "use default hashLog". */
+    ZSTD_c_chainLog=103,     /* Size of the multi-probe search table, as a power of 2.
+                              * Resulting memory usage is (1 << (chainLog+2)).
+                              * Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX.
+                              * Larger tables result in better and slower compression.
+                              * This parameter is useless for "fast" strategy.
+                              * It's still useful when using "dfast" strategy,
+                              * in which case it defines a secondary probe table.
+                              * Special: value 0 means "use default chainLog". */
+    ZSTD_c_searchLog=104,    /* Number of search attempts, as a power of 2.
+                              * More attempts result in better and slower compression.
+                              * This parameter is useless for "fast" and "dFast" strategies.
+                              * Special: value 0 means "use default searchLog". */
+    ZSTD_c_minMatch=105,     /* Minimum size of searched matches.
+                              * Note that Zstandard can still find matches of smaller size,
+                              * it just tweaks its search algorithm to look for this size and larger.
+                              * Larger values increase compression and decompression speed, but decrease ratio.
+                              * Must be clamped between ZSTD_MINMATCH_MIN and ZSTD_MINMATCH_MAX.
+                              * Note that currently, for all strategies < btopt, effective minimum is 4.
+                              *                    , for all strategies > fast, effective maximum is 6.
+                              * Special: value 0 means "use default minMatchLength". */
+    ZSTD_c_targetLength=106, /* Impact of this field depends on strategy.
+                              * For strategies btopt, btultra & btultra2:
+                              *     Length of Match considered "good enough" to stop search.
+                              *     Larger values make compression stronger, and slower.
+                              * For strategy fast:
+                              *     Distance between match sampling.
+                              *     Larger values make compression faster, and weaker.
+                              * Special: value 0 means "use default targetLength". */
+    ZSTD_c_strategy=107,     /* See ZSTD_strategy enum definition.
+                              * The higher the value of selected strategy, the more complex it is,
+                              * resulting in stronger and slower compression.
+                              * Special: value 0 means "use default strategy". */
+    /* LDM mode parameters */
+    ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching.
+                                     * This parameter is designed to improve compression ratio
+                                     * for large inputs, by finding large matches at long distance.
+                                     * It increases memory usage and window size.
+                                     * Note: enabling this parameter increases default ZSTD_c_windowLog to 128 MB
+                                     * except when expressly set to a different value.
+                                     * Note: will be enabled by default if ZSTD_c_windowLog >= 128 MB and
+                                     * compression strategy >= ZSTD_btopt (== compression level 16+) */
+    ZSTD_c_ldmHashLog=161,   /* Size of the table for long distance matching, as a power of 2.
+                              * Larger values increase memory usage and compression ratio,
+                              * but decrease compression speed.
+                              * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX
+                              * default: windowlog - 7.
+                              * Special: value 0 means "automatically determine hashlog". */
+    ZSTD_c_ldmMinMatch=162,  /* Minimum match size for long distance matcher.
+                              * Larger/too small values usually decrease compression ratio.
+                              * Must be clamped between ZSTD_LDM_MINMATCH_MIN and ZSTD_LDM_MINMATCH_MAX.
+                              * Special: value 0 means "use default value" (default: 64). */
+    ZSTD_c_ldmBucketSizeLog=163, /* Log size of each bucket in the LDM hash table for collision resolution.
+                              * Larger values improve collision resolution but decrease compression speed.
+                              * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX.
+                              * Special: value 0 means "use default value" (default: 3). */
+    ZSTD_c_ldmHashRateLog=164, /* Frequency of inserting/looking up entries into the LDM hash table.
+                              * Must be clamped between 0 and (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN).
+                              * Default is MAX(0, (windowLog - ldmHashLog)), optimizing hash table usage.
+                              * Larger values improve compression speed.
+                              * Deviating far from default value will likely result in a compression ratio decrease.
+                              * Special: value 0 means "automatically determine hashRateLog". */
 
-#  ifdef XXH_PRIVATE_API
-/**** skipping file: xxhash.c ****/
-#  endif
+    /* frame parameters */
+    ZSTD_c_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1)
+                              * Content size must be known at the beginning of compression.
+                              * This is automatically the case when using ZSTD_compress2(),
+                              * For streaming scenarios, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */
+    ZSTD_c_checksumFlag=201, /* A 32-bits checksum of content is written at end of frame (default:0) */
+    ZSTD_c_dictIDFlag=202,   /* When applicable, dictionary's ID is written into frame header (default:1) */
 
-#endif /* XXH_STATIC_LINKING_ONLY && XXH_STATIC_H_3543687687345 */
+    /* multi-threading parameters */
+    /* These parameters are only active if multi-threading is enabled (compiled with build macro ZSTD_MULTITHREAD).
+     * Otherwise, trying to set any other value than default (0) will be a no-op and return an error.
+     * In a situation where it's unknown if the linked library supports multi-threading or not,
+     * setting ZSTD_c_nbWorkers to any value >= 1 and consulting the return value provides a quick way to check this property.
+     */
+    ZSTD_c_nbWorkers=400,    /* Select how many threads will be spawned to compress in parallel.
+                              * When nbWorkers >= 1, triggers asynchronous mode when invoking ZSTD_compressStream*() :
+                              * ZSTD_compressStream*() consumes input and flush output if possible, but immediately gives back control to caller,
+                              * while compression is performed in parallel, within worker thread(s).
+                              * (note : a strong exception to this rule is when first invocation of ZSTD_compressStream2() sets ZSTD_e_end :
+                              *  in which case, ZSTD_compressStream2() delegates to ZSTD_compress2(), which is always a blocking call).
+                              * More workers improve speed, but also increase memory usage.
+                              * Default value is `0`, aka "single-threaded mode" : no worker is spawned,
+                              * compression is performed inside Caller's thread, and all invocations are blocking */
+    ZSTD_c_jobSize=401,      /* Size of a compression job. This value is enforced only when nbWorkers >= 1.
+                              * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads.
+                              * 0 means default, which is dynamically determined based on compression parameters.
+                              * Job size must be a minimum of overlap size, or ZSTDMT_JOBSIZE_MIN (= 512 KB), whichever is largest.
+                              * The minimum size is automatically and transparently enforced. */
+    ZSTD_c_overlapLog=402,   /* Control the overlap size, as a fraction of window size.
+                              * The overlap size is an amount of data reloaded from previous job at the beginning of a new job.
+                              * It helps preserve compression ratio, while each job is compressed in parallel.
+                              * This value is enforced only when nbWorkers >= 1.
+                              * Larger values increase compression ratio, but decrease speed.
+                              * Possible values range from 0 to 9 :
+                              * - 0 means "default" : value will be determined by the library, depending on strategy
+                              * - 1 means "no overlap"
+                              * - 9 means "full overlap", using a full window size.
+                              * Each intermediate rank increases/decreases load size by a factor 2 :
+                              * 9: full window;  8: w/2;  7: w/4;  6: w/8;  5:w/16;  4: w/32;  3:w/64;  2:w/128;  1:no overlap;  0:default
+                              * default value varies between 6 and 9, depending on strategy */
 
+    /* note : additional experimental parameters are also available
+     * within the experimental section of the API.
+     * At the time of this writing, they include :
+     * ZSTD_c_rsyncable
+     * ZSTD_c_format
+     * ZSTD_c_forceMaxWindow
+     * ZSTD_c_forceAttachDict
+     * ZSTD_c_literalCompressionMode
+     * ZSTD_c_targetCBlockSize
+     * ZSTD_c_srcSizeHint
+     * ZSTD_c_enableDedicatedDictSearch
+     * ZSTD_c_stableInBuffer
+     * ZSTD_c_stableOutBuffer
+     * ZSTD_c_blockDelimiters
+     * ZSTD_c_validateSequences
+     * ZSTD_c_splitBlocks
+     * ZSTD_c_useRowMatchFinder
+     * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
+     * note : never ever use experimentalParam? names directly;
+     *        also, the enums values themselves are unstable and can still change.
+     */
+     ZSTD_c_experimentalParam1=500,
+     ZSTD_c_experimentalParam2=10,
+     ZSTD_c_experimentalParam3=1000,
+     ZSTD_c_experimentalParam4=1001,
+     ZSTD_c_experimentalParam5=1002,
+     ZSTD_c_experimentalParam6=1003,
+     ZSTD_c_experimentalParam7=1004,
+     ZSTD_c_experimentalParam8=1005,
+     ZSTD_c_experimentalParam9=1006,
+     ZSTD_c_experimentalParam10=1007,
+     ZSTD_c_experimentalParam11=1008,
+     ZSTD_c_experimentalParam12=1009,
+     ZSTD_c_experimentalParam13=1010,
+     ZSTD_c_experimentalParam14=1011,
+     ZSTD_c_experimentalParam15=1012
+} ZSTD_cParameter;
 
-#if defined (__cplusplus)
-}
-#endif
-/**** ended inlining xxhash.h ****/
+typedef struct {
+    size_t error;
+    int lowerBound;
+    int upperBound;
+} ZSTD_bounds;
 
+/*! ZSTD_cParam_getBounds() :
+ *  All parameters must belong to an interval with lower and upper bounds,
+ *  otherwise they will either trigger an error or be automatically clamped.
+ * @return : a structure, ZSTD_bounds, which contains
+ *         - an error status field, which must be tested using ZSTD_isError()
+ *         - lower and upper bounds, both inclusive
+ */
+ZSTDLIB_API ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter cParam);
 
-/* *************************************
-*  Compiler Specific Options
-***************************************/
-#if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
-#  define INLINE_KEYWORD inline
-#else
-#  define INLINE_KEYWORD
-#endif
-
-#if defined(__GNUC__) || defined(__ICCARM__)
-#  define FORCE_INLINE_ATTR __attribute__((always_inline))
-#elif defined(_MSC_VER)
-#  define FORCE_INLINE_ATTR __forceinline
-#else
-#  define FORCE_INLINE_ATTR
-#endif
-
-#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
-
-
-#ifdef _MSC_VER
-#  pragma warning(disable : 4127)      /* disable: C4127: conditional expression is constant */
-#endif
-
-
-/* *************************************
-*  Basic Types
-***************************************/
-#ifndef MEM_MODULE
-# define MEM_MODULE
-# if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
-#   include <stdint.h>
-    typedef uint8_t  BYTE;
-    typedef uint16_t U16;
-    typedef uint32_t U32;
-    typedef  int32_t S32;
-    typedef uint64_t U64;
-#  else
-    typedef unsigned char      BYTE;
-    typedef unsigned short     U16;
-    typedef unsigned int       U32;
-    typedef   signed int       S32;
-    typedef unsigned long long U64;   /* if your compiler doesn't support unsigned long long, replace by another 64-bit type here. Note that xxhash.h will also need to be updated. */
-#  endif
-#endif
-
-
-#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
-
-/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
-static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; }
-static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; }
-
-#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
-
-/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
-/* currently only defined for gcc and icc */
-typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign;
-
-static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
-static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
-
-#else
+/*! ZSTD_CCtx_setParameter() :
+ *  Set one compression parameter, selected by enum ZSTD_cParameter.
+ *  All parameters have valid bounds. Bounds can be queried using ZSTD_cParam_getBounds().
+ *  Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter).
+ *  Setting a parameter is generally only possible during frame initialization (before starting compression).
+ *  Exception : when using multi-threading mode (nbWorkers >= 1),
+ *              the following parameters can be updated _during_ compression (within same frame):
+ *              => compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy.
+ *              new parameters will be active for next job only (after a flush()).
+ * @return : an error code (which can be tested using ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value);
 
-/* portable and safe solution. Generally efficient.
- * see : http://stackoverflow.com/a/32095106/646947
+/*! ZSTD_CCtx_setPledgedSrcSize() :
+ *  Total input data size to be compressed as a single frame.
+ *  Value will be written in frame header, unless if explicitly forbidden using ZSTD_c_contentSizeFlag.
+ *  This value will also be controlled at end of frame, and trigger an error if not respected.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Note 1 : pledgedSrcSize==0 actually means zero, aka an empty frame.
+ *           In order to mean "unknown content size", pass constant ZSTD_CONTENTSIZE_UNKNOWN.
+ *           ZSTD_CONTENTSIZE_UNKNOWN is default value for any new frame.
+ *  Note 2 : pledgedSrcSize is only valid once, for the next frame.
+ *           It's discarded at the end of the frame, and replaced by ZSTD_CONTENTSIZE_UNKNOWN.
+ *  Note 3 : Whenever all input data is provided and consumed in a single round,
+ *           for example with ZSTD_compress2(),
+ *           or invoking immediately ZSTD_compressStream2(,,,ZSTD_e_end),
+ *           this value is automatically overridden by srcSize instead.
  */
+ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize);
 
-static U32 XXH_read32(const void* memPtr)
-{
-    U32 val;
-    memcpy(&val, memPtr, sizeof(val));
-    return val;
-}
+typedef enum {
+    ZSTD_reset_session_only = 1,
+    ZSTD_reset_parameters = 2,
+    ZSTD_reset_session_and_parameters = 3
+} ZSTD_ResetDirective;
 
-static U64 XXH_read64(const void* memPtr)
-{
-    U64 val;
-    memcpy(&val, memPtr, sizeof(val));
-    return val;
-}
+/*! ZSTD_CCtx_reset() :
+ *  There are 2 different things that can be reset, independently or jointly :
+ *  - The session : will stop compressing current frame, and make CCtx ready to start a new one.
+ *                  Useful after an error, or to interrupt any ongoing compression.
+ *                  Any internal data not yet flushed is cancelled.
+ *                  Compression parameters and dictionary remain unchanged.
+ *                  They will be used to compress next frame.
+ *                  Resetting session never fails.
+ *  - The parameters : changes all parameters back to "default".
+ *                  This removes any reference to any dictionary too.
+ *                  Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing)
+ *                  otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError())
+ *  - Both : similar to resetting the session, followed by resetting parameters.
+ */
+ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset);
 
-#endif   /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
+/*! ZSTD_compress2() :
+ *  Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API.
+ *  ZSTD_compress2() always starts a new frame.
+ *  Should cctx hold data from a previously unfinished frame, everything about it is forgotten.
+ *  - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*()
+ *  - The function is always blocking, returns when compression is completed.
+ *  Hint : compression runs faster if `dstCapacity` >=  `ZSTD_compressBound(srcSize)`.
+ * @return : compressed size written into `dst` (<= `dstCapacity),
+ *           or an error code if it fails (which can be tested using ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_compress2( ZSTD_CCtx* cctx,
+                                   void* dst, size_t dstCapacity,
+                             const void* src, size_t srcSize);
 
 
-/* ****************************************
-*  Compiler-specific Functions and Macros
-******************************************/
-#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+/***********************************************
+*  Advanced decompression API (Requires v1.4.0+)
+************************************************/
 
-/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */
-#if defined(_MSC_VER)
-#  define XXH_rotl32(x,r) _rotl(x,r)
-#  define XXH_rotl64(x,r) _rotl64(x,r)
-#else
-#if defined(__ICCARM__)
-#  include <intrinsics.h>
-#  define XXH_rotl32(x,r) __ROR(x,(32 - r))
-#else
-#  define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
-#endif
-#  define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
-#endif
+/* The advanced API pushes parameters one by one into an existing DCtx context.
+ * Parameters are sticky, and remain valid for all following frames
+ * using the same DCtx context.
+ * It's possible to reset parameters to default values using ZSTD_DCtx_reset().
+ * Note : This API is compatible with existing ZSTD_decompressDCtx() and ZSTD_decompressStream().
+ *        Therefore, no new decompression function is necessary.
+ */
 
-#if defined(_MSC_VER)     /* Visual Studio */
-#  define XXH_swap32 _byteswap_ulong
-#  define XXH_swap64 _byteswap_uint64
-#elif GCC_VERSION >= 403
-#  define XXH_swap32 __builtin_bswap32
-#  define XXH_swap64 __builtin_bswap64
-#else
-static U32 XXH_swap32 (U32 x)
-{
-    return  ((x << 24) & 0xff000000 ) |
-            ((x <<  8) & 0x00ff0000 ) |
-            ((x >>  8) & 0x0000ff00 ) |
-            ((x >> 24) & 0x000000ff );
-}
-static U64 XXH_swap64 (U64 x)
-{
-    return  ((x << 56) & 0xff00000000000000ULL) |
-            ((x << 40) & 0x00ff000000000000ULL) |
-            ((x << 24) & 0x0000ff0000000000ULL) |
-            ((x << 8)  & 0x000000ff00000000ULL) |
-            ((x >> 8)  & 0x00000000ff000000ULL) |
-            ((x >> 24) & 0x0000000000ff0000ULL) |
-            ((x >> 40) & 0x000000000000ff00ULL) |
-            ((x >> 56) & 0x00000000000000ffULL);
-}
-#endif
+typedef enum {
 
+    ZSTD_d_windowLogMax=100, /* Select a size limit (in power of 2) beyond which
+                              * the streaming API will refuse to allocate memory buffer
+                              * in order to protect the host from unreasonable memory requirements.
+                              * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode.
+                              * By default, a decompression context accepts window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT).
+                              * Special: value 0 means "use default maximum windowLog". */
 
-/* *************************************
-*  Architecture Macros
-***************************************/
-typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
+    /* note : additional experimental parameters are also available
+     * within the experimental section of the API.
+     * At the time of this writing, they include :
+     * ZSTD_d_format
+     * ZSTD_d_stableOutBuffer
+     * ZSTD_d_forceIgnoreChecksum
+     * ZSTD_d_refMultipleDDicts
+     * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
+     * note : never ever use experimentalParam? names directly
+     */
+     ZSTD_d_experimentalParam1=1000,
+     ZSTD_d_experimentalParam2=1001,
+     ZSTD_d_experimentalParam3=1002,
+     ZSTD_d_experimentalParam4=1003
 
-/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */
-#ifndef XXH_CPU_LITTLE_ENDIAN
-    static const int g_one = 1;
-#   define XXH_CPU_LITTLE_ENDIAN   (*(const char*)(&g_one))
-#endif
+} ZSTD_dParameter;
 
+/*! ZSTD_dParam_getBounds() :
+ *  All parameters must belong to an interval with lower and upper bounds,
+ *  otherwise they will either trigger an error or be automatically clamped.
+ * @return : a structure, ZSTD_bounds, which contains
+ *         - an error status field, which must be tested using ZSTD_isError()
+ *         - both lower and upper bounds, inclusive
+ */
+ZSTDLIB_API ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam);
 
-/* ***************************
-*  Memory reads
-*****************************/
-typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
+/*! ZSTD_DCtx_setParameter() :
+ *  Set one compression parameter, selected by enum ZSTD_dParameter.
+ *  All parameters have valid bounds. Bounds can be queried using ZSTD_dParam_getBounds().
+ *  Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter).
+ *  Setting a parameter is only possible during frame initialization (before starting decompression).
+ * @return : 0, or an error code (which can be tested using ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int value);
 
-FORCE_INLINE_TEMPLATE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
-{
-    if (align==XXH_unaligned)
-        return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
-    else
-        return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr);
-}
+/*! ZSTD_DCtx_reset() :
+ *  Return a DCtx to clean state.
+ *  Session and parameters can be reset jointly or separately.
+ *  Parameters can only be reset when no active frame is being decompressed.
+ * @return : 0, or an error code, which can be tested with ZSTD_isError()
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset);
 
-FORCE_INLINE_TEMPLATE U32 XXH_readLE32(const void* ptr, XXH_endianess endian)
-{
-    return XXH_readLE32_align(ptr, endian, XXH_unaligned);
-}
 
-static U32 XXH_readBE32(const void* ptr)
-{
-    return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
-}
+/****************************
+*  Streaming
+****************************/
 
-FORCE_INLINE_TEMPLATE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
-{
-    if (align==XXH_unaligned)
-        return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
-    else
-        return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr);
-}
+typedef struct ZSTD_inBuffer_s {
+  const void* src;    /**< start of input buffer */
+  size_t size;        /**< size of input buffer */
+  size_t pos;         /**< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */
+} ZSTD_inBuffer;
 
-FORCE_INLINE_TEMPLATE U64 XXH_readLE64(const void* ptr, XXH_endianess endian)
-{
-    return XXH_readLE64_align(ptr, endian, XXH_unaligned);
-}
+typedef struct ZSTD_outBuffer_s {
+  void*  dst;         /**< start of output buffer */
+  size_t size;        /**< size of output buffer */
+  size_t pos;         /**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */
+} ZSTD_outBuffer;
 
-static U64 XXH_readBE64(const void* ptr)
-{
-    return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
-}
 
 
-/* *************************************
-*  Macros
-***************************************/
-#define XXH_STATIC_ASSERT(c)   { enum { XXH_static_assert = 1/(int)(!!(c)) }; }    /* use only *after* variable declarations */
-
-
-/* *************************************
-*  Constants
-***************************************/
-static const U32 PRIME32_1 = 2654435761U;
-static const U32 PRIME32_2 = 2246822519U;
-static const U32 PRIME32_3 = 3266489917U;
-static const U32 PRIME32_4 =  668265263U;
-static const U32 PRIME32_5 =  374761393U;
+/*-***********************************************************************
+*  Streaming compression - HowTo
+*
+*  A ZSTD_CStream object is required to track streaming operation.
+*  Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources.
+*  ZSTD_CStream objects can be reused multiple times on consecutive compression operations.
+*  It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory.
+*
+*  For parallel execution, use one separate ZSTD_CStream per thread.
+*
+*  note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing.
+*
+*  Parameters are sticky : when starting a new compression on the same context,
+*  it will re-use the same sticky parameters as previous compression session.
+*  When in doubt, it's recommended to fully initialize the context before usage.
+*  Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(),
+*  ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to
+*  set more specific parameters, the pledged source size, or load a dictionary.
+*
+*  Use ZSTD_compressStream2() with ZSTD_e_continue as many times as necessary to
+*  consume input stream. The function will automatically update both `pos`
+*  fields within `input` and `output`.
+*  Note that the function may not consume the entire input, for example, because
+*  the output buffer is already full, in which case `input.pos < input.size`.
+*  The caller must check if input has been entirely consumed.
+*  If not, the caller must make some room to receive more compressed data,
+*  and then present again remaining input data.
+*  note: ZSTD_e_continue is guaranteed to make some forward progress when called,
+*        but doesn't guarantee maximal forward progress. This is especially relevant
+*        when compressing with multiple threads. The call won't block if it can
+*        consume some input, but if it can't it will wait for some, but not all,
+*        output to be flushed.
+* @return : provides a minimum amount of data remaining to be flushed from internal buffers
+*           or an error code, which can be tested using ZSTD_isError().
+*
+*  At any moment, it's possible to flush whatever data might remain stuck within internal buffer,
+*  using ZSTD_compressStream2() with ZSTD_e_flush. `output->pos` will be updated.
+*  Note that, if `output->size` is too small, a single invocation with ZSTD_e_flush might not be enough (return code > 0).
+*  In which case, make some room to receive more compressed data, and call again ZSTD_compressStream2() with ZSTD_e_flush.
+*  You must continue calling ZSTD_compressStream2() with ZSTD_e_flush until it returns 0, at which point you can change the
+*  operation.
+*  note: ZSTD_e_flush will flush as much output as possible, meaning when compressing with multiple threads, it will
+*        block until the flush is complete or the output buffer is full.
+*  @return : 0 if internal buffers are entirely flushed,
+*            >0 if some data still present within internal buffer (the value is minimal estimation of remaining size),
+*            or an error code, which can be tested using ZSTD_isError().
+*
+*  Calling ZSTD_compressStream2() with ZSTD_e_end instructs to finish a frame.
+*  It will perform a flush and write frame epilogue.
+*  The epilogue is required for decoders to consider a frame completed.
+*  flush operation is the same, and follows same rules as calling ZSTD_compressStream2() with ZSTD_e_flush.
+*  You must continue calling ZSTD_compressStream2() with ZSTD_e_end until it returns 0, at which point you are free to
+*  start a new frame.
+*  note: ZSTD_e_end will flush as much output as possible, meaning when compressing with multiple threads, it will
+*        block until the flush is complete or the output buffer is full.
+*  @return : 0 if frame fully completed and fully flushed,
+*            >0 if some data still present within internal buffer (the value is minimal estimation of remaining size),
+*            or an error code, which can be tested using ZSTD_isError().
+*
+* *******************************************************************/
 
-static const U64 PRIME64_1 = 11400714785074694791ULL;
-static const U64 PRIME64_2 = 14029467366897019727ULL;
-static const U64 PRIME64_3 =  1609587929392839161ULL;
-static const U64 PRIME64_4 =  9650029242287828579ULL;
-static const U64 PRIME64_5 =  2870177450012600261ULL;
+typedef ZSTD_CCtx ZSTD_CStream;  /**< CCtx and CStream are now effectively same object (>= v1.3.0) */
+                                 /* Continue to distinguish them for compatibility with older versions <= v1.2.0 */
+/*===== ZSTD_CStream management functions =====*/
+ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void);
+ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs);  /* accept NULL pointer */
 
-XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
+/*===== Streaming compression functions =====*/
+typedef enum {
+    ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal compression ratio */
+    ZSTD_e_flush=1,    /* flush any data provided so far,
+                        * it creates (at least) one new block, that can be decoded immediately on reception;
+                        * frame will continue: any future data can still reference previously compressed data, improving compression.
+                        * note : multithreaded compression will block to flush as much output as possible. */
+    ZSTD_e_end=2       /* flush any remaining data _and_ close current frame.
+                        * note that frame is only closed after compressed data is fully flushed (return value == 0).
+                        * After that point, any additional data starts a new frame.
+                        * note : each frame is independent (does not reference any content from previous frame).
+                        : note : multithreaded compression will block to flush as much output as possible. */
+} ZSTD_EndDirective;
 
+/*! ZSTD_compressStream2() : Requires v1.4.0+
+ *  Behaves about the same as ZSTD_compressStream, with additional control on end directive.
+ *  - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*()
+ *  - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode)
+ *  - output->pos must be <= dstCapacity, input->pos must be <= srcSize
+ *  - output->pos and input->pos will be updated. They are guaranteed to remain below their respective limit.
+ *  - endOp must be a valid directive
+ *  - When nbWorkers==0 (default), function is blocking : it completes its job before returning to caller.
+ *  - When nbWorkers>=1, function is non-blocking : it copies a portion of input, distributes jobs to internal worker threads, flush to output whatever is available,
+ *                                                  and then immediately returns, just indicating that there is some data remaining to be flushed.
+ *                                                  The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte.
+ *  - Exception : if the first call requests a ZSTD_e_end directive and provides enough dstCapacity, the function delegates to ZSTD_compress2() which is always blocking.
+ *  - @return provides a minimum amount of data remaining to be flushed from internal buffers
+ *            or an error code, which can be tested using ZSTD_isError().
+ *            if @return != 0, flush is not fully completed, there is still some data left within internal buffers.
+ *            This is useful for ZSTD_e_flush, since in this case more flushes are necessary to empty all buffers.
+ *            For ZSTD_e_end, @return == 0 when internal buffers are fully flushed and frame is completed.
+ *  - after a ZSTD_e_end directive, if internal buffer is not fully flushed (@return != 0),
+ *            only ZSTD_e_end or ZSTD_e_flush operations are allowed.
+ *            Before starting a new compression job, or changing compression parameters,
+ *            it is required to fully flush internal buffers.
+ */
+ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
+                                         ZSTD_outBuffer* output,
+                                         ZSTD_inBuffer* input,
+                                         ZSTD_EndDirective endOp);
 
-/* **************************
-*  Utils
-****************************/
-XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dstState, const XXH32_state_t* restrict srcState)
-{
-    memcpy(dstState, srcState, sizeof(*dstState));
-}
 
-XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dstState, const XXH64_state_t* restrict srcState)
-{
-    memcpy(dstState, srcState, sizeof(*dstState));
-}
+/* These buffer sizes are softly recommended.
+ * They are not required : ZSTD_compressStream*() happily accepts any buffer size, for both input and output.
+ * Respecting the recommended size just makes it a bit easier for ZSTD_compressStream*(),
+ * reducing the amount of memory shuffling and buffering, resulting in minor performance savings.
+ *
+ * However, note that these recommendations are from the perspective of a C caller program.
+ * If the streaming interface is invoked from some other language,
+ * especially managed ones such as Java or Go, through a foreign function interface such as jni or cgo,
+ * a major performance rule is to reduce crossing such interface to an absolute minimum.
+ * It's not rare that performance ends being spent more into the interface, rather than compression itself.
+ * In which cases, prefer using large buffers, as large as practical,
+ * for both input and output, to reduce the nb of roundtrips.
+ */
+ZSTDLIB_API size_t ZSTD_CStreamInSize(void);    /**< recommended size for input buffer */
+ZSTDLIB_API size_t ZSTD_CStreamOutSize(void);   /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */
 
 
-/* ***************************
-*  Simple Hash Functions
-*****************************/
+/* *****************************************************************************
+ * This following is a legacy streaming API, available since v1.0+ .
+ * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2().
+ * It is redundant, but remains fully supported.
+ * Streaming in combination with advanced parameters and dictionary compression
+ * can only be used through the new API.
+ ******************************************************************************/
 
-static U32 XXH32_round(U32 seed, U32 input)
-{
-    seed += input * PRIME32_2;
-    seed  = XXH_rotl32(seed, 13);
-    seed *= PRIME32_1;
-    return seed;
-}
+/*!
+ * Equivalent to:
+ *
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
+ *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
+ */
+ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel);
+/*!
+ * Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue).
+ * NOTE: The return value is different. ZSTD_compressStream() returns a hint for
+ * the next read size (if non-zero and not an error). ZSTD_compressStream2()
+ * returns the minimum nb of bytes left to flush (if non-zero and not an error).
+ */
+ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
+/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */
+ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
+/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */
+ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
 
-FORCE_INLINE_TEMPLATE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align)
-{
-    const BYTE* p = (const BYTE*)input;
-    const BYTE* bEnd = p + len;
-    U32 h32;
-#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align)
 
-#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
-    if (p==NULL) {
-        len=0;
-        bEnd=p=(const BYTE*)(size_t)16;
-    }
-#endif
+/*-***************************************************************************
+*  Streaming decompression - HowTo
+*
+*  A ZSTD_DStream object is required to track streaming operations.
+*  Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources.
+*  ZSTD_DStream objects can be re-used multiple times.
+*
+*  Use ZSTD_initDStream() to start a new decompression operation.
+* @return : recommended first input size
+*  Alternatively, use advanced API to set specific properties.
+*
+*  Use ZSTD_decompressStream() repetitively to consume your input.
+*  The function will update both `pos` fields.
+*  If `input.pos < input.size`, some input has not been consumed.
+*  It's up to the caller to present again remaining data.
+*  The function tries to flush all data decoded immediately, respecting output buffer size.
+*  If `output.pos < output.size`, decoder has flushed everything it could.
+*  But if `output.pos == output.size`, there might be some data left within internal buffers.,
+*  In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer.
+*  Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX.
+* @return : 0 when a frame is completely decoded and fully flushed,
+*        or an error code, which can be tested using ZSTD_isError(),
+*        or any other value > 0, which means there is still some decoding or flushing to do to complete current frame :
+*                                the return value is a suggested next input size (just a hint for better latency)
+*                                that will never request more than the remaining frame size.
+* *******************************************************************************/
 
-    if (len>=16) {
-        const BYTE* const limit = bEnd - 16;
-        U32 v1 = seed + PRIME32_1 + PRIME32_2;
-        U32 v2 = seed + PRIME32_2;
-        U32 v3 = seed + 0;
-        U32 v4 = seed - PRIME32_1;
+typedef ZSTD_DCtx ZSTD_DStream;  /**< DCtx and DStream are now effectively same object (>= v1.3.0) */
+                                 /* For compatibility with versions <= v1.2.0, prefer differentiating them. */
+/*===== ZSTD_DStream management functions =====*/
+ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void);
+ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds);  /* accept NULL pointer */
 
-        do {
-            v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4;
-            v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4;
-            v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4;
-            v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4;
-        } while (p<=limit);
+/*===== Streaming decompression functions =====*/
 
-        h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
-    } else {
-        h32  = seed + PRIME32_5;
-    }
+/* This function is redundant with the advanced API and equivalent to:
+ *
+ *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+ *     ZSTD_DCtx_refDDict(zds, NULL);
+ */
+ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds);
 
-    h32 += (U32) len;
+ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
 
-    while (p+4<=bEnd) {
-        h32 += XXH_get32bits(p) * PRIME32_3;
-        h32  = XXH_rotl32(h32, 17) * PRIME32_4 ;
-        p+=4;
-    }
+ZSTDLIB_API size_t ZSTD_DStreamInSize(void);    /*!< recommended size for input buffer */
+ZSTDLIB_API size_t ZSTD_DStreamOutSize(void);   /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */
 
-    while (p<bEnd) {
-        h32 += (*p) * PRIME32_5;
-        h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
-        p++;
-    }
 
-    h32 ^= h32 >> 15;
-    h32 *= PRIME32_2;
-    h32 ^= h32 >> 13;
-    h32 *= PRIME32_3;
-    h32 ^= h32 >> 16;
-
-    return h32;
-}
-
-
-XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed)
-{
-#if 0
-    /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
-    XXH32_CREATESTATE_STATIC(state);
-    XXH32_reset(state, seed);
-    XXH32_update(state, input, len);
-    return XXH32_digest(state);
-#else
-    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
-
-    if (XXH_FORCE_ALIGN_CHECK) {
-        if ((((size_t)input) & 3) == 0) {   /* Input is 4-bytes aligned, leverage the speed benefit */
-            if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
-                return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
-            else
-                return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
-    }   }
+/**************************
+*  Simple dictionary API
+***************************/
+/*! ZSTD_compress_usingDict() :
+ *  Compression at an explicit compression level using a Dictionary.
+ *  A dictionary can be any arbitrary data segment (also called a prefix),
+ *  or a buffer with specified information (see zdict.h).
+ *  Note : This function loads the dictionary, resulting in significant startup delay.
+ *         It's intended for a dictionary used only once.
+ *  Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. */
+ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx,
+                                           void* dst, size_t dstCapacity,
+                                     const void* src, size_t srcSize,
+                                     const void* dict,size_t dictSize,
+                                           int compressionLevel);
 
-    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
-        return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
-    else
-        return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
-#endif
-}
+/*! ZSTD_decompress_usingDict() :
+ *  Decompression using a known Dictionary.
+ *  Dictionary must be identical to the one used during compression.
+ *  Note : This function loads the dictionary, resulting in significant startup delay.
+ *         It's intended for a dictionary used only once.
+ *  Note : When `dict == NULL || dictSize < 8` no dictionary is used. */
+ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
+                                             void* dst, size_t dstCapacity,
+                                       const void* src, size_t srcSize,
+                                       const void* dict,size_t dictSize);
 
 
-static U64 XXH64_round(U64 acc, U64 input)
-{
-    acc += input * PRIME64_2;
-    acc  = XXH_rotl64(acc, 31);
-    acc *= PRIME64_1;
-    return acc;
-}
+/***********************************
+ *  Bulk processing dictionary API
+ **********************************/
+typedef struct ZSTD_CDict_s ZSTD_CDict;
 
-static U64 XXH64_mergeRound(U64 acc, U64 val)
-{
-    val  = XXH64_round(0, val);
-    acc ^= val;
-    acc  = acc * PRIME64_1 + PRIME64_4;
-    return acc;
-}
+/*! ZSTD_createCDict() :
+ *  When compressing multiple messages or blocks using the same dictionary,
+ *  it's recommended to digest the dictionary only once, since it's a costly operation.
+ *  ZSTD_createCDict() will create a state from digesting a dictionary.
+ *  The resulting state can be used for future compression operations with very limited startup cost.
+ *  ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
+ * @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict.
+ *  Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content.
+ *  Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer,
+ *      in which case the only thing that it transports is the @compressionLevel.
+ *      This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively,
+ *      expecting a ZSTD_CDict parameter with any data, including those without a known dictionary. */
+ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize,
+                                         int compressionLevel);
 
-FORCE_INLINE_TEMPLATE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align)
-{
-    const BYTE* p = (const BYTE*)input;
-    const BYTE* const bEnd = p + len;
-    U64 h64;
-#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align)
+/*! ZSTD_freeCDict() :
+ *  Function frees memory allocated by ZSTD_createCDict().
+ *  If a NULL pointer is passed, no operation is performed. */
+ZSTDLIB_API size_t      ZSTD_freeCDict(ZSTD_CDict* CDict);
 
-#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
-    if (p==NULL) {
-        len=0;
-        bEnd=p=(const BYTE*)(size_t)32;
-    }
-#endif
+/*! ZSTD_compress_usingCDict() :
+ *  Compression using a digested Dictionary.
+ *  Recommended when same dictionary is used multiple times.
+ *  Note : compression level is _decided at dictionary creation time_,
+ *     and frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) */
+ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
+                                            void* dst, size_t dstCapacity,
+                                      const void* src, size_t srcSize,
+                                      const ZSTD_CDict* cdict);
 
-    if (len>=32) {
-        const BYTE* const limit = bEnd - 32;
-        U64 v1 = seed + PRIME64_1 + PRIME64_2;
-        U64 v2 = seed + PRIME64_2;
-        U64 v3 = seed + 0;
-        U64 v4 = seed - PRIME64_1;
 
-        do {
-            v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8;
-            v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8;
-            v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8;
-            v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8;
-        } while (p<=limit);
+typedef struct ZSTD_DDict_s ZSTD_DDict;
 
-        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
-        h64 = XXH64_mergeRound(h64, v1);
-        h64 = XXH64_mergeRound(h64, v2);
-        h64 = XXH64_mergeRound(h64, v3);
-        h64 = XXH64_mergeRound(h64, v4);
+/*! ZSTD_createDDict() :
+ *  Create a digested dictionary, ready to start decompression operation without startup delay.
+ *  dictBuffer can be released after DDict creation, as its content is copied inside DDict. */
+ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize);
 
-    } else {
-        h64  = seed + PRIME64_5;
-    }
+/*! ZSTD_freeDDict() :
+ *  Function frees memory allocated with ZSTD_createDDict()
+ *  If a NULL pointer is passed, no operation is performed. */
+ZSTDLIB_API size_t      ZSTD_freeDDict(ZSTD_DDict* ddict);
 
-    h64 += (U64) len;
+/*! ZSTD_decompress_usingDDict() :
+ *  Decompression using a digested Dictionary.
+ *  Recommended when same dictionary is used multiple times. */
+ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
+                                              void* dst, size_t dstCapacity,
+                                        const void* src, size_t srcSize,
+                                        const ZSTD_DDict* ddict);
 
-    while (p+8<=bEnd) {
-        U64 const k1 = XXH64_round(0, XXH_get64bits(p));
-        h64 ^= k1;
-        h64  = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
-        p+=8;
-    }
 
-    if (p+4<=bEnd) {
-        h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1;
-        h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
-        p+=4;
-    }
+/********************************
+ *  Dictionary helper functions
+ *******************************/
 
-    while (p<bEnd) {
-        h64 ^= (*p) * PRIME64_5;
-        h64 = XXH_rotl64(h64, 11) * PRIME64_1;
-        p++;
-    }
+/*! ZSTD_getDictID_fromDict() : Requires v1.4.0+
+ *  Provides the dictID stored within dictionary.
+ *  if @return == 0, the dictionary is not conformant with Zstandard specification.
+ *  It can still be loaded, but as a content-only dictionary. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize);
 
-    h64 ^= h64 >> 33;
-    h64 *= PRIME64_2;
-    h64 ^= h64 >> 29;
-    h64 *= PRIME64_3;
-    h64 ^= h64 >> 32;
+/*! ZSTD_getDictID_fromCDict() : Requires v1.5.0+
+ *  Provides the dictID of the dictionary loaded into `cdict`.
+ *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
+ *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict);
 
-    return h64;
-}
+/*! ZSTD_getDictID_fromDDict() : Requires v1.4.0+
+ *  Provides the dictID of the dictionary loaded into `ddict`.
+ *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
+ *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
 
+/*! ZSTD_getDictID_fromFrame() : Requires v1.4.0+
+ *  Provides the dictID required to decompressed the frame stored within `src`.
+ *  If @return == 0, the dictID could not be decoded.
+ *  This could for one of the following reasons :
+ *  - The frame does not require a dictionary to be decoded (most common case).
+ *  - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information.
+ *    Note : this use case also happens when using a non-conformant dictionary.
+ *  - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`).
+ *  - This is not a Zstandard frame.
+ *  When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
 
-XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed)
-{
-#if 0
-    /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
-    XXH64_CREATESTATE_STATIC(state);
-    XXH64_reset(state, seed);
-    XXH64_update(state, input, len);
-    return XXH64_digest(state);
-#else
-    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
 
-    if (XXH_FORCE_ALIGN_CHECK) {
-        if ((((size_t)input) & 7)==0) {  /* Input is aligned, let's leverage the speed advantage */
-            if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
-                return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
-            else
-                return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
-    }   }
+/*******************************************************************************
+ * Advanced dictionary and prefix API (Requires v1.4.0+)
+ *
+ * This API allows dictionaries to be used with ZSTD_compress2(),
+ * ZSTD_compressStream2(), and ZSTD_decompress(). Dictionaries are sticky, and
+ * only reset with the context is reset with ZSTD_reset_parameters or
+ * ZSTD_reset_session_and_parameters. Prefixes are single-use.
+ ******************************************************************************/
 
-    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
-        return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
-    else
-        return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
-#endif
-}
 
+/*! ZSTD_CCtx_loadDictionary() : Requires v1.4.0+
+ *  Create an internal CDict from `dict` buffer.
+ *  Decompression will have to use same dictionary.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary,
+ *           meaning "return to no-dictionary mode".
+ *  Note 1 : Dictionary is sticky, it will be used for all future compressed frames.
+ *           To return to "no-dictionary" situation, load a NULL dictionary (or reset parameters).
+ *  Note 2 : Loading a dictionary involves building tables.
+ *           It's also a CPU consuming operation, with non-negligible impact on latency.
+ *           Tables are dependent on compression parameters, and for this reason,
+ *           compression parameters can no longer be changed after loading a dictionary.
+ *  Note 3 :`dict` content will be copied internally.
+ *           Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead.
+ *           In such a case, dictionary buffer must outlive its users.
+ *  Note 4 : Use ZSTD_CCtx_loadDictionary_advanced()
+ *           to precisely select how dictionary content must be interpreted. */
+ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
 
-/* **************************************************
-*  Advanced Hash Functions
-****************************************************/
+/*! ZSTD_CCtx_refCDict() : Requires v1.4.0+
+ *  Reference a prepared dictionary, to be used for all next compressed frames.
+ *  Note that compression parameters are enforced from within CDict,
+ *  and supersede any compression parameter previously set within CCtx.
+ *  The parameters ignored are labelled as "superseded-by-cdict" in the ZSTD_cParameter enum docs.
+ *  The ignored parameters will be used again if the CCtx is returned to no-dictionary mode.
+ *  The dictionary will remain valid for future compressed frames using same CCtx.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Special : Referencing a NULL CDict means "return to no-dictionary mode".
+ *  Note 1 : Currently, only one dictionary can be managed.
+ *           Referencing a new dictionary effectively "discards" any previous one.
+ *  Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. */
+ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
 
-XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
-{
-    return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
-}
-XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
-{
-    XXH_free(statePtr);
-    return XXH_OK;
-}
-
-XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
-{
-    return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
-}
-XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
-{
-    XXH_free(statePtr);
-    return XXH_OK;
-}
+/*! ZSTD_CCtx_refPrefix() : Requires v1.4.0+
+ *  Reference a prefix (single-usage dictionary) for next compressed frame.
+ *  A prefix is **only used once**. Tables are discarded at end of frame (ZSTD_e_end).
+ *  Decompression will need same prefix to properly regenerate data.
+ *  Compressing with a prefix is similar in outcome as performing a diff and compressing it,
+ *  but performs much faster, especially during decompression (compression speed is tunable with compression level).
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary
+ *  Note 1 : Prefix buffer is referenced. It **must** outlive compression.
+ *           Its content must remain unmodified during compression.
+ *  Note 2 : If the intention is to diff some large src data blob with some prior version of itself,
+ *           ensure that the window size is large enough to contain the entire source.
+ *           See ZSTD_c_windowLog.
+ *  Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters.
+ *           It's a CPU consuming operation, with non-negligible impact on latency.
+ *           If there is a need to use the same prefix multiple times, consider loadDictionary instead.
+ *  Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dct_rawContent).
+ *           Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. */
+ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx,
+                                 const void* prefix, size_t prefixSize);
 
+/*! ZSTD_DCtx_loadDictionary() : Requires v1.4.0+
+ *  Create an internal DDict from dict buffer,
+ *  to be used to decompress next frames.
+ *  The dictionary remains valid for all future frames, until explicitly invalidated.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary,
+ *            meaning "return to no-dictionary mode".
+ *  Note 1 : Loading a dictionary involves building tables,
+ *           which has a non-negligible impact on CPU usage and latency.
+ *           It's recommended to "load once, use many times", to amortize the cost
+ *  Note 2 :`dict` content will be copied internally, so `dict` can be released after loading.
+ *           Use ZSTD_DCtx_loadDictionary_byReference() to reference dictionary content instead.
+ *  Note 3 : Use ZSTD_DCtx_loadDictionary_advanced() to take control of
+ *           how dictionary content is loaded and interpreted.
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
 
-/*** Hash feed ***/
+/*! ZSTD_DCtx_refDDict() : Requires v1.4.0+
+ *  Reference a prepared dictionary, to be used to decompress next frames.
+ *  The dictionary remains active for decompression of future frames using same DCtx.
+ *
+ *  If called with ZSTD_d_refMultipleDDicts enabled, repeated calls of this function
+ *  will store the DDict references in a table, and the DDict used for decompression
+ *  will be determined at decompression time, as per the dict ID in the frame.
+ *  The memory for the table is allocated on the first call to refDDict, and can be
+ *  freed with ZSTD_freeDCtx().
+ *
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Note 1 : Currently, only one dictionary can be managed.
+ *           Referencing a new dictionary effectively "discards" any previous one.
+ *  Special: referencing a NULL DDict means "return to no-dictionary mode".
+ *  Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx.
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
 
-XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed)
-{
-    XXH32_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
-    memset(&state, 0, sizeof(state)-4);   /* do not write into reserved, for future removal */
-    state.v1 = seed + PRIME32_1 + PRIME32_2;
-    state.v2 = seed + PRIME32_2;
-    state.v3 = seed + 0;
-    state.v4 = seed - PRIME32_1;
-    memcpy(statePtr, &state, sizeof(state));
-    return XXH_OK;
-}
+/*! ZSTD_DCtx_refPrefix() : Requires v1.4.0+
+ *  Reference a prefix (single-usage dictionary) to decompress next frame.
+ *  This is the reverse operation of ZSTD_CCtx_refPrefix(),
+ *  and must use the same prefix as the one used during compression.
+ *  Prefix is **only used once**. Reference is discarded at end of frame.
+ *  End of frame is reached when ZSTD_decompressStream() returns 0.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Note 1 : Adding any prefix (including NULL) invalidates any previously set prefix or dictionary
+ *  Note 2 : Prefix buffer is referenced. It **must** outlive decompression.
+ *           Prefix buffer must remain unmodified up to the end of frame,
+ *           reached when ZSTD_decompressStream() returns 0.
+ *  Note 3 : By default, the prefix is treated as raw content (ZSTD_dct_rawContent).
+ *           Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode (Experimental section)
+ *  Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost.
+ *           A full dictionary is more costly, as it requires building tables.
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx,
+                                 const void* prefix, size_t prefixSize);
 
+/* ===   Memory management   === */
 
-XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed)
-{
-    XXH64_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
-    memset(&state, 0, sizeof(state)-8);   /* do not write into reserved, for future removal */
-    state.v1 = seed + PRIME64_1 + PRIME64_2;
-    state.v2 = seed + PRIME64_2;
-    state.v3 = seed + 0;
-    state.v4 = seed - PRIME64_1;
-    memcpy(statePtr, &state, sizeof(state));
-    return XXH_OK;
-}
+/*! ZSTD_sizeof_*() : Requires v1.4.0+
+ *  These functions give the _current_ memory usage of selected object.
+ *  Note that object memory usage can evolve (increase or decrease) over time. */
+ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
+ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx);
+ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs);
+ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
+ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict);
+ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
 
+#endif  /* ZSTD_H_235446 */
 
-FORCE_INLINE_TEMPLATE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian)
-{
-    const BYTE* p = (const BYTE*)input;
-    const BYTE* const bEnd = p + len;
 
-#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
-    if (input==NULL) return XXH_ERROR;
-#endif
+/* **************************************************************************************
+ *   ADVANCED AND EXPERIMENTAL FUNCTIONS
+ ****************************************************************************************
+ * The definitions in the following section are considered experimental.
+ * They are provided for advanced scenarios.
+ * They should never be used with a dynamic library, as prototypes may change in the future.
+ * Use them only in association with static linking.
+ * ***************************************************************************************/
 
-    state->total_len_32 += (unsigned)len;
-    state->large_len |= (len>=16) | (state->total_len_32>=16);
+#if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY)
+#define ZSTD_H_ZSTD_STATIC_LINKING_ONLY
 
-    if (state->memsize + len < 16)  {   /* fill in tmp buffer */
-        XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len);
-        state->memsize += (unsigned)len;
-        return XXH_OK;
-    }
+/* Deprecation warnings :
+ * Should these warnings be a problem, it is generally possible to disable them,
+ * typically with -Wno-deprecated-declarations for gcc or _CRT_SECURE_NO_WARNINGS in Visual.
+ * Otherwise, it's also possible to define ZSTD_DISABLE_DEPRECATE_WARNINGS.
+ */
+#ifdef ZSTD_DISABLE_DEPRECATE_WARNINGS
+#  define ZSTD_DEPRECATED(message) ZSTDLIB_API  /* disable deprecation warnings */
+#else
+#  if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
+#    define ZSTD_DEPRECATED(message) [[deprecated(message)]] ZSTDLIB_API
+#  elif (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__)
+#    define ZSTD_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated(message)))
+#  elif defined(__GNUC__) && (__GNUC__ >= 3)
+#    define ZSTD_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated))
+#  elif defined(_MSC_VER)
+#    define ZSTD_DEPRECATED(message) ZSTDLIB_API __declspec(deprecated(message))
+#  else
+#    pragma message("WARNING: You need to implement ZSTD_DEPRECATED for this compiler")
+#    define ZSTD_DEPRECATED(message) ZSTDLIB_API
+#  endif
+#endif /* ZSTD_DISABLE_DEPRECATE_WARNINGS */
 
-    if (state->memsize) {   /* some data left from previous update */
-        XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize);
-        {   const U32* p32 = state->mem32;
-            state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++;
-            state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++;
-            state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++;
-            state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian)); p32++;
-        }
-        p += 16-state->memsize;
-        state->memsize = 0;
-    }
+/****************************************************************************************
+ *   experimental API (static linking only)
+ ****************************************************************************************
+ * The following symbols and constants
+ * are not planned to join "stable API" status in the near future.
+ * They can still change in future versions.
+ * Some of them are planned to remain in the static_only section indefinitely.
+ * Some of them might be removed in the future (especially when redundant with existing stable functions)
+ * ***************************************************************************************/
 
-    if (p <= bEnd-16) {
-        const BYTE* const limit = bEnd - 16;
-        U32 v1 = state->v1;
-        U32 v2 = state->v2;
-        U32 v3 = state->v3;
-        U32 v4 = state->v4;
+#define ZSTD_FRAMEHEADERSIZE_PREFIX(format) ((format) == ZSTD_f_zstd1 ? 5 : 1)   /* minimum input size required to query frame header size */
+#define ZSTD_FRAMEHEADERSIZE_MIN(format)    ((format) == ZSTD_f_zstd1 ? 6 : 2)
+#define ZSTD_FRAMEHEADERSIZE_MAX   18   /* can be useful for static allocation */
+#define ZSTD_SKIPPABLEHEADERSIZE    8
 
-        do {
-            v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4;
-            v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4;
-            v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4;
-            v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4;
-        } while (p<=limit);
+/* compression parameter bounds */
+#define ZSTD_WINDOWLOG_MAX_32    30
+#define ZSTD_WINDOWLOG_MAX_64    31
+#define ZSTD_WINDOWLOG_MAX     ((int)(sizeof(size_t) == 4 ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64))
+#define ZSTD_WINDOWLOG_MIN       10
+#define ZSTD_HASHLOG_MAX       ((ZSTD_WINDOWLOG_MAX < 30) ? ZSTD_WINDOWLOG_MAX : 30)
+#define ZSTD_HASHLOG_MIN          6
+#define ZSTD_CHAINLOG_MAX_32     29
+#define ZSTD_CHAINLOG_MAX_64     30
+#define ZSTD_CHAINLOG_MAX      ((int)(sizeof(size_t) == 4 ? ZSTD_CHAINLOG_MAX_32 : ZSTD_CHAINLOG_MAX_64))
+#define ZSTD_CHAINLOG_MIN        ZSTD_HASHLOG_MIN
+#define ZSTD_SEARCHLOG_MAX      (ZSTD_WINDOWLOG_MAX-1)
+#define ZSTD_SEARCHLOG_MIN        1
+#define ZSTD_MINMATCH_MAX         7   /* only for ZSTD_fast, other strategies are limited to 6 */
+#define ZSTD_MINMATCH_MIN         3   /* only for ZSTD_btopt+, faster strategies are limited to 4 */
+#define ZSTD_TARGETLENGTH_MAX    ZSTD_BLOCKSIZE_MAX
+#define ZSTD_TARGETLENGTH_MIN     0   /* note : comparing this constant to an unsigned results in a tautological test */
+#define ZSTD_STRATEGY_MIN        ZSTD_fast
+#define ZSTD_STRATEGY_MAX        ZSTD_btultra2
 
-        state->v1 = v1;
-        state->v2 = v2;
-        state->v3 = v3;
-        state->v4 = v4;
-    }
 
-    if (p < bEnd) {
-        XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
-        state->memsize = (unsigned)(bEnd-p);
-    }
+#define ZSTD_OVERLAPLOG_MIN       0
+#define ZSTD_OVERLAPLOG_MAX       9
 
-    return XXH_OK;
-}
+#define ZSTD_WINDOWLOG_LIMIT_DEFAULT 27   /* by default, the streaming decoder will refuse any frame
+                                           * requiring larger than (1<<ZSTD_WINDOWLOG_LIMIT_DEFAULT) window size,
+                                           * to preserve host's memory from unreasonable requirements.
+                                           * This limit can be overridden using ZSTD_DCtx_setParameter(,ZSTD_d_windowLogMax,).
+                                           * The limit does not apply for one-pass decoders (such as ZSTD_decompress()), since no additional memory is allocated */
 
-XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len)
-{
-    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
 
-    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
-        return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
-    else
-        return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
-}
+/* LDM parameter bounds */
+#define ZSTD_LDM_HASHLOG_MIN      ZSTD_HASHLOG_MIN
+#define ZSTD_LDM_HASHLOG_MAX      ZSTD_HASHLOG_MAX
+#define ZSTD_LDM_MINMATCH_MIN        4
+#define ZSTD_LDM_MINMATCH_MAX     4096
+#define ZSTD_LDM_BUCKETSIZELOG_MIN   1
+#define ZSTD_LDM_BUCKETSIZELOG_MAX   8
+#define ZSTD_LDM_HASHRATELOG_MIN     0
+#define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN)
 
+/* Advanced parameter bounds */
+#define ZSTD_TARGETCBLOCKSIZE_MIN   64
+#define ZSTD_TARGETCBLOCKSIZE_MAX   ZSTD_BLOCKSIZE_MAX
+#define ZSTD_SRCSIZEHINT_MIN        0
+#define ZSTD_SRCSIZEHINT_MAX        INT_MAX
 
+/* internal */
+#define ZSTD_HASHLOG3_MAX           17
 
-FORCE_INLINE_TEMPLATE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian)
-{
-    const BYTE * p = (const BYTE*)state->mem32;
-    const BYTE* const bEnd = (const BYTE*)(state->mem32) + state->memsize;
-    U32 h32;
 
-    if (state->large_len) {
-        h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
-    } else {
-        h32 = state->v3 /* == seed */ + PRIME32_5;
-    }
+/* ---  Advanced types  --- */
 
-    h32 += state->total_len_32;
+typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params;
 
-    while (p+4<=bEnd) {
-        h32 += XXH_readLE32(p, endian) * PRIME32_3;
-        h32  = XXH_rotl32(h32, 17) * PRIME32_4;
-        p+=4;
-    }
+typedef struct {
+    unsigned int offset;      /* The offset of the match. (NOT the same as the offset code)
+                               * If offset == 0 and matchLength == 0, this sequence represents the last
+                               * literals in the block of litLength size.
+                               */
+
+    unsigned int litLength;   /* Literal length of the sequence. */
+    unsigned int matchLength; /* Match length of the sequence. */
+
+                              /* Note: Users of this API may provide a sequence with matchLength == litLength == offset == 0.
+                               * In this case, we will treat the sequence as a marker for a block boundary.
+                               */
+
+    unsigned int rep;         /* Represents which repeat offset is represented by the field 'offset'.
+                               * Ranges from [0, 3].
+                               *
+                               * Repeat offsets are essentially previous offsets from previous sequences sorted in
+                               * recency order. For more detail, see doc/zstd_compression_format.md
+                               *
+                               * If rep == 0, then 'offset' does not contain a repeat offset.
+                               * If rep > 0:
+                               *  If litLength != 0:
+                               *      rep == 1 --> offset == repeat_offset_1
+                               *      rep == 2 --> offset == repeat_offset_2
+                               *      rep == 3 --> offset == repeat_offset_3
+                               *  If litLength == 0:
+                               *      rep == 1 --> offset == repeat_offset_2
+                               *      rep == 2 --> offset == repeat_offset_3
+                               *      rep == 3 --> offset == repeat_offset_1 - 1
+                               *
+                               * Note: This field is optional. ZSTD_generateSequences() will calculate the value of
+                               * 'rep', but repeat offsets do not necessarily need to be calculated from an external
+                               * sequence provider's perspective. For example, ZSTD_compressSequences() does not
+                               * use this 'rep' field at all (as of now).
+                               */
+} ZSTD_Sequence;
 
-    while (p<bEnd) {
-        h32 += (*p) * PRIME32_5;
-        h32  = XXH_rotl32(h32, 11) * PRIME32_1;
-        p++;
-    }
-
-    h32 ^= h32 >> 15;
-    h32 *= PRIME32_2;
-    h32 ^= h32 >> 13;
-    h32 *= PRIME32_3;
-    h32 ^= h32 >> 16;
-
-    return h32;
-}
+typedef struct {
+    unsigned windowLog;       /**< largest match distance : larger == more compression, more memory needed during decompression */
+    unsigned chainLog;        /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */
+    unsigned hashLog;         /**< dispatch table : larger == faster, more memory */
+    unsigned searchLog;       /**< nb of searches : larger == more compression, slower */
+    unsigned minMatch;        /**< match length searched : larger == faster decompression, sometimes less compression */
+    unsigned targetLength;    /**< acceptable match size for optimal parser (only) : larger == more compression, slower */
+    ZSTD_strategy strategy;   /**< see ZSTD_strategy definition above */
+} ZSTD_compressionParameters;
 
+typedef struct {
+    int contentSizeFlag; /**< 1: content size will be in frame header (when known) */
+    int checksumFlag;    /**< 1: generate a 32-bits checksum using XXH64 algorithm at end of frame, for error detection */
+    int noDictIDFlag;    /**< 1: no dictID will be saved into frame header (dictID is only useful for dictionary compression) */
+} ZSTD_frameParameters;
 
-XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in)
-{
-    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+typedef struct {
+    ZSTD_compressionParameters cParams;
+    ZSTD_frameParameters fParams;
+} ZSTD_parameters;
 
-    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
-        return XXH32_digest_endian(state_in, XXH_littleEndian);
-    else
-        return XXH32_digest_endian(state_in, XXH_bigEndian);
-}
+typedef enum {
+    ZSTD_dct_auto = 0,       /* dictionary is "full" when starting with ZSTD_MAGIC_DICTIONARY, otherwise it is "rawContent" */
+    ZSTD_dct_rawContent = 1, /* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */
+    ZSTD_dct_fullDict = 2    /* refuses to load a dictionary if it does not respect Zstandard's specification, starting with ZSTD_MAGIC_DICTIONARY */
+} ZSTD_dictContentType_e;
 
+typedef enum {
+    ZSTD_dlm_byCopy = 0,  /**< Copy dictionary content internally */
+    ZSTD_dlm_byRef = 1    /**< Reference dictionary content -- the dictionary buffer must outlive its users. */
+} ZSTD_dictLoadMethod_e;
 
+typedef enum {
+    ZSTD_f_zstd1 = 0,           /* zstd frame format, specified in zstd_compression_format.md (default) */
+    ZSTD_f_zstd1_magicless = 1  /* Variant of zstd frame format, without initial 4-bytes magic number.
+                                 * Useful to save 4 bytes per generated frame.
+                                 * Decoder cannot recognise automatically this format, requiring this instruction. */
+} ZSTD_format_e;
 
-/* **** XXH64 **** */
+typedef enum {
+    /* Note: this enum controls ZSTD_d_forceIgnoreChecksum */
+    ZSTD_d_validateChecksum = 0,
+    ZSTD_d_ignoreChecksum = 1
+} ZSTD_forceIgnoreChecksum_e;
 
-FORCE_INLINE_TEMPLATE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian)
-{
-    const BYTE* p = (const BYTE*)input;
-    const BYTE* const bEnd = p + len;
+typedef enum {
+    /* Note: this enum controls ZSTD_d_refMultipleDDicts */
+    ZSTD_rmd_refSingleDDict = 0,
+    ZSTD_rmd_refMultipleDDicts = 1
+} ZSTD_refMultipleDDicts_e;
 
-#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
-    if (input==NULL) return XXH_ERROR;
-#endif
+typedef enum {
+    /* Note: this enum and the behavior it controls are effectively internal
+     * implementation details of the compressor. They are expected to continue
+     * to evolve and should be considered only in the context of extremely
+     * advanced performance tuning.
+     *
+     * Zstd currently supports the use of a CDict in three ways:
+     *
+     * - The contents of the CDict can be copied into the working context. This
+     *   means that the compression can search both the dictionary and input
+     *   while operating on a single set of internal tables. This makes
+     *   the compression faster per-byte of input. However, the initial copy of
+     *   the CDict's tables incurs a fixed cost at the beginning of the
+     *   compression. For small compressions (< 8 KB), that copy can dominate
+     *   the cost of the compression.
+     *
+     * - The CDict's tables can be used in-place. In this model, compression is
+     *   slower per input byte, because the compressor has to search two sets of
+     *   tables. However, this model incurs no start-up cost (as long as the
+     *   working context's tables can be reused). For small inputs, this can be
+     *   faster than copying the CDict's tables.
+     *
+     * - The CDict's tables are not used at all, and instead we use the working
+     *   context alone to reload the dictionary and use params based on the source
+     *   size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict().
+     *   This method is effective when the dictionary sizes are very small relative
+     *   to the input size, and the input size is fairly large to begin with.
+     *
+     * Zstd has a simple internal heuristic that selects which strategy to use
+     * at the beginning of a compression. However, if experimentation shows that
+     * Zstd is making poor choices, it is possible to override that choice with
+     * this enum.
+     */
+    ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */
+    ZSTD_dictForceAttach   = 1, /* Never copy the dictionary. */
+    ZSTD_dictForceCopy     = 2, /* Always copy the dictionary. */
+    ZSTD_dictForceLoad     = 3  /* Always reload the dictionary */
+} ZSTD_dictAttachPref_e;
 
-    state->total_len += len;
+typedef enum {
+  ZSTD_lcm_auto = 0,          /**< Automatically determine the compression mode based on the compression level.
+                               *   Negative compression levels will be uncompressed, and positive compression
+                               *   levels will be compressed. */
+  ZSTD_lcm_huffman = 1,       /**< Always attempt Huffman compression. Uncompressed literals will still be
+                               *   emitted if Huffman compression is not profitable. */
+  ZSTD_lcm_uncompressed = 2   /**< Always emit uncompressed literals. */
+} ZSTD_literalCompressionMode_e;
 
-    if (state->memsize + len < 32) {  /* fill in tmp buffer */
-        XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
-        state->memsize += (U32)len;
-        return XXH_OK;
-    }
+typedef enum {
+  ZSTD_urm_auto = 0,                   /* Automatically determine whether or not we use row matchfinder */
+  ZSTD_urm_disableRowMatchFinder = 1,  /* Never use row matchfinder */
+  ZSTD_urm_enableRowMatchFinder = 2    /* Always use row matchfinder when applicable */
+} ZSTD_useRowMatchFinderMode_e;
 
-    if (state->memsize) {   /* tmp buffer is full */
-        XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize);
-        state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian));
-        state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian));
-        state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian));
-        state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian));
-        p += 32-state->memsize;
-        state->memsize = 0;
-    }
+/***************************************
+*  Frame size functions
+***************************************/
 
-    if (p+32 <= bEnd) {
-        const BYTE* const limit = bEnd - 32;
-        U64 v1 = state->v1;
-        U64 v2 = state->v2;
-        U64 v3 = state->v3;
-        U64 v4 = state->v4;
+/*! ZSTD_findDecompressedSize() :
+ *  `src` should point to the start of a series of ZSTD encoded and/or skippable frames
+ *  `srcSize` must be the _exact_ size of this series
+ *       (i.e. there should be a frame boundary at `src + srcSize`)
+ *  @return : - decompressed size of all data in all successive frames
+ *            - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN
+ *            - if an error occurred: ZSTD_CONTENTSIZE_ERROR
+ *
+ *   note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode.
+ *            When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size.
+ *            In which case, it's necessary to use streaming mode to decompress data.
+ *   note 2 : decompressed size is always present when compression is done with ZSTD_compress()
+ *   note 3 : decompressed size can be very large (64-bits value),
+ *            potentially larger than what local system can handle as a single memory segment.
+ *            In which case, it's necessary to use streaming mode to decompress data.
+ *   note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified.
+ *            Always ensure result fits within application's authorized limits.
+ *            Each application can set its own limits.
+ *   note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to
+ *            read each contained frame header.  This is fast as most of the data is skipped,
+ *            however it does mean that all frame data must be present and valid. */
+ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);
 
-        do {
-            v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8;
-            v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8;
-            v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8;
-            v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8;
-        } while (p<=limit);
+/*! ZSTD_decompressBound() :
+ *  `src` should point to the start of a series of ZSTD encoded and/or skippable frames
+ *  `srcSize` must be the _exact_ size of this series
+ *       (i.e. there should be a frame boundary at `src + srcSize`)
+ *  @return : - upper-bound for the decompressed size of all data in all successive frames
+ *            - if an error occurred: ZSTD_CONTENTSIZE_ERROR
+ *
+ *  note 1  : an error can occur if `src` contains an invalid or incorrectly formatted frame.
+ *  note 2  : the upper-bound is exact when the decompressed size field is available in every ZSTD encoded frame of `src`.
+ *            in this case, `ZSTD_findDecompressedSize` and `ZSTD_decompressBound` return the same value.
+ *  note 3  : when the decompressed size field isn't available, the upper-bound for that frame is calculated by:
+ *              upper-bound = # blocks * min(128 KB, Window_Size)
+ */
+ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize);
 
-        state->v1 = v1;
-        state->v2 = v2;
-        state->v3 = v3;
-        state->v4 = v4;
-    }
+/*! ZSTD_frameHeaderSize() :
+ *  srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX.
+ * @return : size of the Frame Header,
+ *           or an error code (if srcSize is too small) */
+ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
 
-    if (p < bEnd) {
-        XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
-        state->memsize = (unsigned)(bEnd-p);
-    }
+typedef enum {
+  ZSTD_sf_noBlockDelimiters = 0,         /* Representation of ZSTD_Sequence has no block delimiters, sequences only */
+  ZSTD_sf_explicitBlockDelimiters = 1    /* Representation of ZSTD_Sequence contains explicit block delimiters */
+} ZSTD_sequenceFormat_e;
 
-    return XXH_OK;
-}
+/*! ZSTD_generateSequences() :
+ * Generate sequences using ZSTD_compress2, given a source buffer.
+ *
+ * Each block will end with a dummy sequence
+ * with offset == 0, matchLength == 0, and litLength == length of last literals.
+ * litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0)
+ * simply acts as a block delimiter.
+ *
+ * zc can be used to insert custom compression params.
+ * This function invokes ZSTD_compress2
+ *
+ * The output of this function can be fed into ZSTD_compressSequences() with CCtx
+ * setting of ZSTD_c_blockDelimiters as ZSTD_sf_explicitBlockDelimiters
+ * @return : number of sequences generated
+ */
 
-XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len)
-{
-    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+ZSTDLIB_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
+                                          size_t outSeqsSize, const void* src, size_t srcSize);
 
-    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
-        return XXH64_update_endian(state_in, input, len, XXH_littleEndian);
-    else
-        return XXH64_update_endian(state_in, input, len, XXH_bigEndian);
-}
+/*! ZSTD_mergeBlockDelimiters() :
+ * Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals
+ * by merging them into into the literals of the next sequence.
+ *
+ * As such, the final generated result has no explicit representation of block boundaries,
+ * and the final last literals segment is not represented in the sequences.
+ *
+ * The output of this function can be fed into ZSTD_compressSequences() with CCtx
+ * setting of ZSTD_c_blockDelimiters as ZSTD_sf_noBlockDelimiters
+ * @return : number of sequences left after merging
+ */
+ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize);
 
+/*! ZSTD_compressSequences() :
+ * Compress an array of ZSTD_Sequence, generated from the original source buffer, into dst.
+ * If a dictionary is included, then the cctx should reference the dict. (see: ZSTD_CCtx_refCDict(), ZSTD_CCtx_loadDictionary(), etc.)
+ * The entire source is compressed into a single frame.
+ *
+ * The compression behavior changes based on cctx params. In particular:
+ *    If ZSTD_c_blockDelimiters == ZSTD_sf_noBlockDelimiters, the array of ZSTD_Sequence is expected to contain
+ *    no block delimiters (defined in ZSTD_Sequence). Block boundaries are roughly determined based on
+ *    the block size derived from the cctx, and sequences may be split. This is the default setting.
+ *
+ *    If ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, the array of ZSTD_Sequence is expected to contain
+ *    block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided.
+ *
+ *    If ZSTD_c_validateSequences == 0, this function will blindly accept the sequences provided. Invalid sequences cause undefined
+ *    behavior. If ZSTD_c_validateSequences == 1, then if sequence is invalid (see doc/zstd_compression_format.md for
+ *    specifics regarding offset/matchlength requirements) then the function will bail out and return an error.
+ *
+ *    In addition to the two adjustable experimental params, there are other important cctx params.
+ *    - ZSTD_c_minMatch MUST be set as less than or equal to the smallest match generated by the match finder. It has a minimum value of ZSTD_MINMATCH_MIN.
+ *    - ZSTD_c_compressionLevel accordingly adjusts the strength of the entropy coder, as it would in typical compression.
+ *    - ZSTD_c_windowLog affects offset validation: this function will return an error at higher debug levels if a provided offset
+ *      is larger than what the spec allows for a given window log and dictionary (if present). See: doc/zstd_compression_format.md
+ *
+ * Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused.
+ * Note 2: Once we integrate ability to ingest repcodes, the explicit block delims mode must respect those repcodes exactly,
+ *         and cannot emit an RLE block that disagrees with the repcode history
+ * @return : final compressed size or a ZSTD error.
+ */
+ZSTDLIB_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize,
+                                  const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
+                                  const void* src, size_t srcSize);
 
 
-FORCE_INLINE_TEMPLATE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian)
-{
-    const BYTE * p = (const BYTE*)state->mem64;
-    const BYTE* const bEnd = (const BYTE*)state->mem64 + state->memsize;
-    U64 h64;
+/*! ZSTD_writeSkippableFrame() :
+ * Generates a zstd skippable frame containing data given by src, and writes it to dst buffer.
+ *
+ * Skippable frames begin with a a 4-byte magic number. There are 16 possible choices of magic number,
+ * ranging from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15.
+ * As such, the parameter magicVariant controls the exact skippable frame magic number variant used, so
+ * the magic number used will be ZSTD_MAGIC_SKIPPABLE_START + magicVariant.
+ *
+ * Returns an error if destination buffer is not large enough, if the source size is not representable
+ * with a 4-byte unsigned int, or if the parameter magicVariant is greater than 15 (and therefore invalid).
+ *
+ * @return : number of bytes written or a ZSTD error.
+ */
+ZSTDLIB_API size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity,
+                                            const void* src, size_t srcSize, unsigned magicVariant);
 
-    if (state->total_len >= 32) {
-        U64 const v1 = state->v1;
-        U64 const v2 = state->v2;
-        U64 const v3 = state->v3;
-        U64 const v4 = state->v4;
 
-        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
-        h64 = XXH64_mergeRound(h64, v1);
-        h64 = XXH64_mergeRound(h64, v2);
-        h64 = XXH64_mergeRound(h64, v3);
-        h64 = XXH64_mergeRound(h64, v4);
-    } else {
-        h64  = state->v3 + PRIME64_5;
-    }
+/***************************************
+*  Memory management
+***************************************/
 
-    h64 += (U64) state->total_len;
+/*! ZSTD_estimate*() :
+ *  These functions make it possible to estimate memory usage
+ *  of a future {D,C}Ctx, before its creation.
+ *
+ *  ZSTD_estimateCCtxSize() will provide a memory budget large enough
+ *  for any compression level up to selected one.
+ *  Note : Unlike ZSTD_estimateCStreamSize*(), this estimate
+ *         does not include space for a window buffer.
+ *         Therefore, the estimation is only guaranteed for single-shot compressions, not streaming.
+ *  The estimate will assume the input may be arbitrarily large,
+ *  which is the worst case.
+ *
+ *  When srcSize can be bound by a known and rather "small" value,
+ *  this fact can be used to provide a tighter estimation
+ *  because the CCtx compression context will need less memory.
+ *  This tighter estimation can be provided by more advanced functions
+ *  ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(),
+ *  and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter().
+ *  Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits.
+ *
+ *  Note 2 : only single-threaded compression is supported.
+ *  ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1.
+ */
+ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel);
+ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
+ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params);
+ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void);
 
-    while (p+8<=bEnd) {
-        U64 const k1 = XXH64_round(0, XXH_readLE64(p, endian));
-        h64 ^= k1;
-        h64  = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
-        p+=8;
-    }
-
-    if (p+4<=bEnd) {
-        h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1;
-        h64  = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
-        p+=4;
-    }
-
-    while (p<bEnd) {
-        h64 ^= (*p) * PRIME64_5;
-        h64  = XXH_rotl64(h64, 11) * PRIME64_1;
-        p++;
-    }
+/*! ZSTD_estimateCStreamSize() :
+ *  ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one.
+ *  It will also consider src size to be arbitrarily "large", which is worst case.
+ *  If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation.
+ *  ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
+ *  ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1.
+ *  Note : CStream size estimation is only correct for single-threaded compression.
+ *  ZSTD_DStream memory budget depends on window Size.
+ *  This information can be passed manually, using ZSTD_estimateDStreamSize,
+ *  or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame();
+ *  Note : if streaming is init with function ZSTD_init?Stream_usingDict(),
+ *         an internal ?Dict will be created, which additional size is not estimated here.
+ *         In this case, get total size by adding ZSTD_estimate?DictSize */
+ZSTDLIB_API size_t ZSTD_estimateCStreamSize(int compressionLevel);
+ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams);
+ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params);
+ZSTDLIB_API size_t ZSTD_estimateDStreamSize(size_t windowSize);
+ZSTDLIB_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize);
 
-    h64 ^= h64 >> 33;
-    h64 *= PRIME64_2;
-    h64 ^= h64 >> 29;
-    h64 *= PRIME64_3;
-    h64 ^= h64 >> 32;
+/*! ZSTD_estimate?DictSize() :
+ *  ZSTD_estimateCDictSize() will bet that src size is relatively "small", and content is copied, like ZSTD_createCDict().
+ *  ZSTD_estimateCDictSize_advanced() makes it possible to control compression parameters precisely, like ZSTD_createCDict_advanced().
+ *  Note : dictionaries created by reference (`ZSTD_dlm_byRef`) are logically smaller.
+ */
+ZSTDLIB_API size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel);
+ZSTDLIB_API size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, ZSTD_dictLoadMethod_e dictLoadMethod);
+ZSTDLIB_API size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod);
 
-    return h64;
-}
+/*! ZSTD_initStatic*() :
+ *  Initialize an object using a pre-allocated fixed-size buffer.
+ *  workspace: The memory area to emplace the object into.
+ *             Provided pointer *must be 8-bytes aligned*.
+ *             Buffer must outlive object.
+ *  workspaceSize: Use ZSTD_estimate*Size() to determine
+ *                 how large workspace must be to support target scenario.
+ * @return : pointer to object (same address as workspace, just different type),
+ *           or NULL if error (size too small, incorrect alignment, etc.)
+ *  Note : zstd will never resize nor malloc() when using a static buffer.
+ *         If the object requires more memory than available,
+ *         zstd will just error out (typically ZSTD_error_memory_allocation).
+ *  Note 2 : there is no corresponding "free" function.
+ *           Since workspace is allocated externally, it must be freed externally too.
+ *  Note 3 : cParams : use ZSTD_getCParams() to convert a compression level
+ *           into its associated cParams.
+ *  Limitation 1 : currently not compatible with internal dictionary creation, triggered by
+ *                 ZSTD_CCtx_loadDictionary(), ZSTD_initCStream_usingDict() or ZSTD_initDStream_usingDict().
+ *  Limitation 2 : static cctx currently not compatible with multi-threading.
+ *  Limitation 3 : static dctx is incompatible with legacy support.
+ */
+ZSTDLIB_API ZSTD_CCtx*    ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize);
+ZSTDLIB_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize);    /**< same as ZSTD_initStaticCCtx() */
 
+ZSTDLIB_API ZSTD_DCtx*    ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize);
+ZSTDLIB_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize);    /**< same as ZSTD_initStaticDCtx() */
 
-XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in)
-{
-    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+ZSTDLIB_API const ZSTD_CDict* ZSTD_initStaticCDict(
+                                        void* workspace, size_t workspaceSize,
+                                        const void* dict, size_t dictSize,
+                                        ZSTD_dictLoadMethod_e dictLoadMethod,
+                                        ZSTD_dictContentType_e dictContentType,
+                                        ZSTD_compressionParameters cParams);
 
-    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
-        return XXH64_digest_endian(state_in, XXH_littleEndian);
-    else
-        return XXH64_digest_endian(state_in, XXH_bigEndian);
-}
+ZSTDLIB_API const ZSTD_DDict* ZSTD_initStaticDDict(
+                                        void* workspace, size_t workspaceSize,
+                                        const void* dict, size_t dictSize,
+                                        ZSTD_dictLoadMethod_e dictLoadMethod,
+                                        ZSTD_dictContentType_e dictContentType);
 
 
-/* **************************
-*  Canonical representation
-****************************/
+/*! Custom memory allocation :
+ *  These prototypes make it possible to pass your own allocation/free functions.
+ *  ZSTD_customMem is provided at creation time, using ZSTD_create*_advanced() variants listed below.
+ *  All allocation/free operations will be completed using these custom variants instead of regular <stdlib.h> ones.
+ */
+typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size);
+typedef void  (*ZSTD_freeFunction) (void* opaque, void* address);
+typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem;
+static
+#ifdef __GNUC__
+__attribute__((__unused__))
+#endif
+ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL };  /**< this constant defers to stdlib's functions */
 
-/*! Default XXH result types are basic unsigned 32 and 64 bits.
-*   The canonical representation follows human-readable write convention, aka big-endian (large digits first).
-*   These functions allow transformation of hash result into and from its canonical format.
-*   This way, hash values can be written into a file or buffer, and remain comparable across different systems and programs.
-*/
+ZSTDLIB_API ZSTD_CCtx*    ZSTD_createCCtx_advanced(ZSTD_customMem customMem);
+ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
+ZSTDLIB_API ZSTD_DCtx*    ZSTD_createDCtx_advanced(ZSTD_customMem customMem);
+ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem);
 
-XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
-{
-    XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
-    if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
-    memcpy(dst, &hash, sizeof(*dst));
-}
+ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize,
+                                                  ZSTD_dictLoadMethod_e dictLoadMethod,
+                                                  ZSTD_dictContentType_e dictContentType,
+                                                  ZSTD_compressionParameters cParams,
+                                                  ZSTD_customMem customMem);
 
-XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
-{
-    XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
-    if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
-    memcpy(dst, &hash, sizeof(*dst));
-}
+/* ! Thread pool :
+ * These prototypes make it possible to share a thread pool among multiple compression contexts.
+ * This can limit resources for applications with multiple threads where each one uses
+ * a threaded compression mode (via ZSTD_c_nbWorkers parameter).
+ * ZSTD_createThreadPool creates a new thread pool with a given number of threads.
+ * Note that the lifetime of such pool must exist while being used.
+ * ZSTD_CCtx_refThreadPool assigns a thread pool to a context (use NULL argument value
+ * to use an internal thread pool).
+ * ZSTD_freeThreadPool frees a thread pool, accepts NULL pointer.
+ */
+typedef struct POOL_ctx_s ZSTD_threadPool;
+ZSTDLIB_API ZSTD_threadPool* ZSTD_createThreadPool(size_t numThreads);
+ZSTDLIB_API void ZSTD_freeThreadPool (ZSTD_threadPool* pool);  /* accept NULL pointer */
+ZSTDLIB_API size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool);
 
-XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
-{
-    return XXH_readBE32(src);
-}
 
-XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
-{
-    return XXH_readBE64(src);
-}
-/**** ended inlining xxhash.c ****/
-/**** start inlining zstd_common.c ****/
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
+ * This API is temporary and is expected to change or disappear in the future!
  */
+ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2(
+    const void* dict, size_t dictSize,
+    ZSTD_dictLoadMethod_e dictLoadMethod,
+    ZSTD_dictContentType_e dictContentType,
+    const ZSTD_CCtx_params* cctxParams,
+    ZSTD_customMem customMem);
 
+ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(
+    const void* dict, size_t dictSize,
+    ZSTD_dictLoadMethod_e dictLoadMethod,
+    ZSTD_dictContentType_e dictContentType,
+    ZSTD_customMem customMem);
 
 
-/*-*************************************
-*  Dependencies
+/***************************************
+*  Advanced compression functions
 ***************************************/
-#include <stdlib.h>      /* malloc, calloc, free */
-#include <string.h>      /* memset */
-/**** skipping file: error_private.h ****/
-/**** start inlining zstd_internal.h ****/
-/*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
 
-#ifndef ZSTD_CCOMMON_H_MODULE
-#define ZSTD_CCOMMON_H_MODULE
+/*! ZSTD_createCDict_byReference() :
+ *  Create a digested dictionary for compression
+ *  Dictionary content is just referenced, not duplicated.
+ *  As a consequence, `dictBuffer` **must** outlive CDict,
+ *  and its content must remain unmodified throughout the lifetime of CDict.
+ *  note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */
+ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
 
-/* this module contains definitions which must be identical
- * across compression, decompression and dictBuilder.
- * It also contains a few functions useful to at least 2 of them
- * and which benefit from being inlined */
+/*! ZSTD_getCParams() :
+ * @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize.
+ * `estimatedSrcSize` value is optional, select 0 if not known */
+ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
 
-/*-*************************************
-*  Dependencies
-***************************************/
-/**** skipping file: compiler.h ****/
-/**** skipping file: mem.h ****/
-/**** skipping file: debug.h ****/
-/**** skipping file: error_private.h ****/
-#define ZSTD_STATIC_LINKING_ONLY
-/**** start inlining zstd.h ****/
-/*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-#if defined (__cplusplus)
-extern "C" {
-#endif
+/*! ZSTD_getParams() :
+ *  same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`.
+ *  All fields of `ZSTD_frameParameters` are set to default : contentSize=1, checksum=0, noDictID=0 */
+ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
 
-#ifndef ZSTD_H_235446
-#define ZSTD_H_235446
+/*! ZSTD_checkCParams() :
+ *  Ensure param values remain within authorized range.
+ * @return 0 on success, or an error code (can be checked with ZSTD_isError()) */
+ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params);
 
-/* ======   Dependency   ======*/
-#include <limits.h>   /* INT_MAX */
-#include <stddef.h>   /* size_t */
+/*! ZSTD_adjustCParams() :
+ *  optimize params for a given `srcSize` and `dictSize`.
+ * `srcSize` can be unknown, in which case use ZSTD_CONTENTSIZE_UNKNOWN.
+ * `dictSize` must be `0` when there is no dictionary.
+ *  cPar can be invalid : all parameters will be clamped within valid range in the @return struct.
+ *  This function never fails (wide contract) */
+ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize);
 
+/*! ZSTD_compress_advanced() :
+ *  Note : this function is now DEPRECATED.
+ *         It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters.
+ *  This prototype will generate compilation warnings. */
+ZSTD_DEPRECATED("use ZSTD_compress2")
+size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx,
+                                          void* dst, size_t dstCapacity,
+                                    const void* src, size_t srcSize,
+                                    const void* dict,size_t dictSize,
+                                          ZSTD_parameters params);
 
-/* =====   ZSTDLIB_API : control library symbols visibility   ===== */
-#ifndef ZSTDLIB_VISIBILITY
-#  if defined(__GNUC__) && (__GNUC__ >= 4)
-#    define ZSTDLIB_VISIBILITY __attribute__ ((visibility ("default")))
-#  else
-#    define ZSTDLIB_VISIBILITY
-#  endif
-#endif
-#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
-#  define ZSTDLIB_API __declspec(dllexport) ZSTDLIB_VISIBILITY
-#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
-#  define ZSTDLIB_API __declspec(dllimport) ZSTDLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
-#else
-#  define ZSTDLIB_API ZSTDLIB_VISIBILITY
-#endif
+/*! ZSTD_compress_usingCDict_advanced() :
+ *  Note : this function is now DEPRECATED.
+ *         It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters.
+ *  This prototype will generate compilation warnings. */
+ZSTD_DEPRECATED("use ZSTD_compress2 with ZSTD_CCtx_loadDictionary")
+size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
+                                              void* dst, size_t dstCapacity,
+                                        const void* src, size_t srcSize,
+                                        const ZSTD_CDict* cdict,
+                                              ZSTD_frameParameters fParams);
 
 
-/*******************************************************************************
-  Introduction
+/*! ZSTD_CCtx_loadDictionary_byReference() :
+ *  Same as ZSTD_CCtx_loadDictionary(), but dictionary content is referenced, instead of being copied into CCtx.
+ *  It saves some memory, but also requires that `dict` outlives its usage within `cctx` */
+ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
 
-  zstd, short for Zstandard, is a fast lossless compression algorithm, targeting
-  real-time compression scenarios at zlib-level and better compression ratios.
-  The zstd compression library provides in-memory compression and decompression
-  functions.
+/*! ZSTD_CCtx_loadDictionary_advanced() :
+ *  Same as ZSTD_CCtx_loadDictionary(), but gives finer control over
+ *  how to load the dictionary (by copy ? by reference ?)
+ *  and how to interpret it (automatic ? force raw mode ? full mode only ?) */
+ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType);
 
-  The library supports regular compression levels from 1 up to ZSTD_maxCLevel(),
-  which is currently 22. Levels >= 20, labeled `--ultra`, should be used with
-  caution, as they require more memory. The library also offers negative
-  compression levels, which extend the range of speed vs. ratio preferences.
-  The lower the level, the faster the speed (at the cost of compression).
-
-  Compression can be done in:
-    - a single step (described as Simple API)
-    - a single step, reusing a context (described as Explicit context)
-    - unbounded multiple steps (described as Streaming compression)
+/*! ZSTD_CCtx_refPrefix_advanced() :
+ *  Same as ZSTD_CCtx_refPrefix(), but gives finer control over
+ *  how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */
+ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
 
-  The compression ratio achievable on small data can be highly improved using
-  a dictionary. Dictionary compression can be performed in:
-    - a single step (described as Simple dictionary API)
-    - a single step, reusing a dictionary (described as Bulk-processing
-      dictionary API)
+/* ===   experimental parameters   === */
+/* these parameters can be used with ZSTD_setParameter()
+ * they are not guaranteed to remain supported in the future */
 
-  Advanced experimental functions can be accessed using
-  `#define ZSTD_STATIC_LINKING_ONLY` before including zstd.h.
+ /* Enables rsyncable mode,
+  * which makes compressed files more rsync friendly
+  * by adding periodic synchronization points to the compressed data.
+  * The target average block size is ZSTD_c_jobSize / 2.
+  * It's possible to modify the job size to increase or decrease
+  * the granularity of the synchronization point.
+  * Once the jobSize is smaller than the window size,
+  * it will result in compression ratio degradation.
+  * NOTE 1: rsyncable mode only works when multithreading is enabled.
+  * NOTE 2: rsyncable performs poorly in combination with long range mode,
+  * since it will decrease the effectiveness of synchronization points,
+  * though mileage may vary.
+  * NOTE 3: Rsyncable mode limits maximum compression speed to ~400 MB/s.
+  * If the selected compression level is already running significantly slower,
+  * the overall speed won't be significantly impacted.
+  */
+ #define ZSTD_c_rsyncable ZSTD_c_experimentalParam1
 
-  Advanced experimental APIs should never be used with a dynamically-linked
-  library. They are not "stable"; their definitions or signatures may change in
-  the future. Only static linking is allowed.
-*******************************************************************************/
+/* Select a compression format.
+ * The value must be of type ZSTD_format_e.
+ * See ZSTD_format_e enum definition for details */
+#define ZSTD_c_format ZSTD_c_experimentalParam2
 
-/*------   Version   ------*/
-#define ZSTD_VERSION_MAJOR    1
-#define ZSTD_VERSION_MINOR    4
-#define ZSTD_VERSION_RELEASE  5
+/* Force back-reference distances to remain < windowSize,
+ * even when referencing into Dictionary content (default:0) */
+#define ZSTD_c_forceMaxWindow ZSTD_c_experimentalParam3
 
-#define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
-ZSTDLIB_API unsigned ZSTD_versionNumber(void);   /**< to check runtime library version */
+/* Controls whether the contents of a CDict
+ * are used in place, or copied into the working context.
+ * Accepts values from the ZSTD_dictAttachPref_e enum.
+ * See the comments on that enum for an explanation of the feature. */
+#define ZSTD_c_forceAttachDict ZSTD_c_experimentalParam4
 
-#define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE
-#define ZSTD_QUOTE(str) #str
-#define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str)
-#define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION)
-ZSTDLIB_API const char* ZSTD_versionString(void);   /* requires v1.3.0+ */
+/* Controls how the literals are compressed (default is auto).
+ * The value must be of type ZSTD_literalCompressionMode_e.
+ * See ZSTD_literalCompressionMode_e enum definition for details.
+ */
+#define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5
 
-/* *************************************
- *  Default constant
- ***************************************/
-#ifndef ZSTD_CLEVEL_DEFAULT
-#  define ZSTD_CLEVEL_DEFAULT 3
-#endif
+/* Tries to fit compressed block size to be around targetCBlockSize.
+ * No target when targetCBlockSize == 0.
+ * There is no guarantee on compressed block size (default:0) */
+#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6
 
-/* *************************************
- *  Constants
- ***************************************/
+/* User's best guess of source size.
+ * Hint is not valid when srcSizeHint == 0.
+ * There is no guarantee that hint is close to actual source size,
+ * but compression ratio may regress significantly if guess considerably underestimates */
+#define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7
 
-/* All magic numbers are supposed read/written to/from files/memory using little-endian convention */
-#define ZSTD_MAGICNUMBER            0xFD2FB528    /* valid since v0.8.0 */
-#define ZSTD_MAGIC_DICTIONARY       0xEC30A437    /* valid since v0.7.0 */
-#define ZSTD_MAGIC_SKIPPABLE_START  0x184D2A50    /* all 16 values, from 0x184D2A50 to 0x184D2A5F, signal the beginning of a skippable frame */
-#define ZSTD_MAGIC_SKIPPABLE_MASK   0xFFFFFFF0
+/* Controls whether the new and experimental "dedicated dictionary search
+ * structure" can be used. This feature is still rough around the edges, be
+ * prepared for surprising behavior!
+ *
+ * How to use it:
+ *
+ * When using a CDict, whether to use this feature or not is controlled at
+ * CDict creation, and it must be set in a CCtxParams set passed into that
+ * construction (via ZSTD_createCDict_advanced2()). A compression will then
+ * use the feature or not based on how the CDict was constructed; the value of
+ * this param, set in the CCtx, will have no effect.
+ *
+ * However, when a dictionary buffer is passed into a CCtx, such as via
+ * ZSTD_CCtx_loadDictionary(), this param can be set on the CCtx to control
+ * whether the CDict that is created internally can use the feature or not.
+ *
+ * What it does:
+ *
+ * Normally, the internal data structures of the CDict are analogous to what
+ * would be stored in a CCtx after compressing the contents of a dictionary.
+ * To an approximation, a compression using a dictionary can then use those
+ * data structures to simply continue what is effectively a streaming
+ * compression where the simulated compression of the dictionary left off.
+ * Which is to say, the search structures in the CDict are normally the same
+ * format as in the CCtx.
+ *
+ * It is possible to do better, since the CDict is not like a CCtx: the search
+ * structures are written once during CDict creation, and then are only read
+ * after that, while the search structures in the CCtx are both read and
+ * written as the compression goes along. This means we can choose a search
+ * structure for the dictionary that is read-optimized.
+ *
+ * This feature enables the use of that different structure.
+ *
+ * Note that some of the members of the ZSTD_compressionParameters struct have
+ * different semantics and constraints in the dedicated search structure. It is
+ * highly recommended that you simply set a compression level in the CCtxParams
+ * you pass into the CDict creation call, and avoid messing with the cParams
+ * directly.
+ *
+ * Effects:
+ *
+ * This will only have any effect when the selected ZSTD_strategy
+ * implementation supports this feature. Currently, that's limited to
+ * ZSTD_greedy, ZSTD_lazy, and ZSTD_lazy2.
+ *
+ * Note that this means that the CDict tables can no longer be copied into the
+ * CCtx, so the dict attachment mode ZSTD_dictForceCopy will no longer be
+ * useable. The dictionary can only be attached or reloaded.
+ *
+ * In general, you should expect compression to be faster--sometimes very much
+ * so--and CDict creation to be slightly slower. Eventually, we will probably
+ * make this mode the default.
+ */
+#define ZSTD_c_enableDedicatedDictSearch ZSTD_c_experimentalParam8
 
-#define ZSTD_BLOCKSIZELOG_MAX  17
-#define ZSTD_BLOCKSIZE_MAX     (1<<ZSTD_BLOCKSIZELOG_MAX)
+/* ZSTD_c_stableInBuffer
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable.
+ *
+ * Tells the compressor that the ZSTD_inBuffer will ALWAYS be the same
+ * between calls, except for the modifications that zstd makes to pos (the
+ * caller must not modify pos). This is checked by the compressor, and
+ * compression will fail if it ever changes. This means the only flush
+ * mode that makes sense is ZSTD_e_end, so zstd will error if ZSTD_e_end
+ * is not used. The data in the ZSTD_inBuffer in the range [src, src + pos)
+ * MUST not be modified during compression or you will get data corruption.
+ *
+ * When this flag is enabled zstd won't allocate an input window buffer,
+ * because the user guarantees it can reference the ZSTD_inBuffer until
+ * the frame is complete. But, it will still allocate an output buffer
+ * large enough to fit a block (see ZSTD_c_stableOutBuffer). This will also
+ * avoid the memcpy() from the input buffer to the input window buffer.
+ *
+ * NOTE: ZSTD_compressStream2() will error if ZSTD_e_end is not used.
+ * That means this flag cannot be used with ZSTD_compressStream().
+ *
+ * NOTE: So long as the ZSTD_inBuffer always points to valid memory, using
+ * this flag is ALWAYS memory safe, and will never access out-of-bounds
+ * memory. However, compression WILL fail if you violate the preconditions.
+ *
+ * WARNING: The data in the ZSTD_inBuffer in the range [dst, dst + pos) MUST
+ * not be modified during compression or you will get data corruption. This
+ * is because zstd needs to reference data in the ZSTD_inBuffer to find
+ * matches. Normally zstd maintains its own window buffer for this purpose,
+ * but passing this flag tells zstd to use the user provided buffer.
+ */
+#define ZSTD_c_stableInBuffer ZSTD_c_experimentalParam9
 
+/* ZSTD_c_stableOutBuffer
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable.
+ *
+ * Tells he compressor that the ZSTD_outBuffer will not be resized between
+ * calls. Specifically: (out.size - out.pos) will never grow. This gives the
+ * compressor the freedom to say: If the compressed data doesn't fit in the
+ * output buffer then return ZSTD_error_dstSizeTooSmall. This allows us to
+ * always decompress directly into the output buffer, instead of decompressing
+ * into an internal buffer and copying to the output buffer.
+ *
+ * When this flag is enabled zstd won't allocate an output buffer, because
+ * it can write directly to the ZSTD_outBuffer. It will still allocate the
+ * input window buffer (see ZSTD_c_stableInBuffer).
+ *
+ * Zstd will check that (out.size - out.pos) never grows and return an error
+ * if it does. While not strictly necessary, this should prevent surprises.
+ */
+#define ZSTD_c_stableOutBuffer ZSTD_c_experimentalParam10
 
+/* ZSTD_c_blockDelimiters
+ * Default is 0 == ZSTD_sf_noBlockDelimiters.
+ *
+ * For use with sequence compression API: ZSTD_compressSequences().
+ *
+ * Designates whether or not the given array of ZSTD_Sequence contains block delimiters
+ * and last literals, which are defined as sequences with offset == 0 and matchLength == 0.
+ * See the definition of ZSTD_Sequence for more specifics.
+ */
+#define ZSTD_c_blockDelimiters ZSTD_c_experimentalParam11
 
-/***************************************
-*  Simple API
-***************************************/
-/*! ZSTD_compress() :
- *  Compresses `src` content as a single zstd compressed frame into already allocated `dst`.
- *  Hint : compression runs faster if `dstCapacity` >=  `ZSTD_compressBound(srcSize)`.
- *  @return : compressed size written into `dst` (<= `dstCapacity),
- *            or an error code if it fails (which can be tested using ZSTD_isError()). */
-ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity,
-                            const void* src, size_t srcSize,
-                                  int compressionLevel);
+/* ZSTD_c_validateSequences
+ * Default is 0 == disabled. Set to 1 to enable sequence validation.
+ *
+ * For use with sequence compression API: ZSTD_compressSequences().
+ * Designates whether or not we validate sequences provided to ZSTD_compressSequences()
+ * during function execution.
+ *
+ * Without validation, providing a sequence that does not conform to the zstd spec will cause
+ * undefined behavior, and may produce a corrupted block.
+ *
+ * With validation enabled, a if sequence is invalid (see doc/zstd_compression_format.md for
+ * specifics regarding offset/matchlength requirements) then the function will bail out and
+ * return an error.
+ *
+ */
+#define ZSTD_c_validateSequences ZSTD_c_experimentalParam12
 
-/*! ZSTD_decompress() :
- *  `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames.
- *  `dstCapacity` is an upper bound of originalSize to regenerate.
- *  If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data.
- *  @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
- *            or an errorCode if it fails (which can be tested using ZSTD_isError()). */
-ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity,
-                              const void* src, size_t compressedSize);
+/* ZSTD_c_splitBlocks
+ * Default is 0 == disabled. Set to 1 to enable block splitting.
+ *
+ * Will attempt to split blocks in order to improve compression ratio at the cost of speed.
+ */
+#define ZSTD_c_splitBlocks ZSTD_c_experimentalParam13
 
-/*! ZSTD_getFrameContentSize() : requires v1.3.0+
- *  `src` should point to the start of a ZSTD encoded frame.
- *  `srcSize` must be at least as large as the frame header.
- *            hint : any size >= `ZSTD_frameHeaderSize_max` is large enough.
- *  @return : - decompressed size of `src` frame content, if known
- *            - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
- *            - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small)
- *   note 1 : a 0 return value means the frame is valid but "empty".
- *   note 2 : decompressed size is an optional field, it may not be present, typically in streaming mode.
- *            When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size.
- *            In which case, it's necessary to use streaming mode to decompress data.
- *            Optionally, application can rely on some implicit limit,
- *            as ZSTD_decompress() only needs an upper bound of decompressed size.
- *            (For example, data could be necessarily cut into blocks <= 16 KB).
- *   note 3 : decompressed size is always present when compression is completed using single-pass functions,
- *            such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict().
- *   note 4 : decompressed size can be very large (64-bits value),
- *            potentially larger than what local system can handle as a single memory segment.
- *            In which case, it's necessary to use streaming mode to decompress data.
- *   note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified.
- *            Always ensure return value fits within application's authorized limits.
- *            Each application can set its own limits.
- *   note 6 : This function replaces ZSTD_getDecompressedSize() */
-#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1)
-#define ZSTD_CONTENTSIZE_ERROR   (0ULL - 2)
-ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize);
+/* ZSTD_c_useRowMatchFinder
+ * Default is ZSTD_urm_auto.
+ * Controlled with ZSTD_useRowMatchFinderMode_e enum.
+ *
+ * By default, in ZSTD_urm_auto, when finalizing the compression parameters, the library
+ * will decide at runtime whether to use the row-based matchfinder based on support for SIMD
+ * instructions as well as the windowLog.
+ *
+ * Set to ZSTD_urm_disableRowMatchFinder to never use row-based matchfinder.
+ * Set to ZSTD_urm_enableRowMatchFinder to force usage of row-based matchfinder.
+ */
+#define ZSTD_c_useRowMatchFinder ZSTD_c_experimentalParam14
 
-/*! ZSTD_getDecompressedSize() :
- *  NOTE: This function is now obsolete, in favor of ZSTD_getFrameContentSize().
- *  Both functions work the same way, but ZSTD_getDecompressedSize() blends
- *  "empty", "unknown" and "error" results to the same return value (0),
- *  while ZSTD_getFrameContentSize() gives them separate return values.
- * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */
-ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
+/* ZSTD_c_deterministicRefPrefix
+ * Default is 0 == disabled. Set to 1 to enable.
+ *
+ * Zstd produces different results for prefix compression when the prefix is
+ * directly adjacent to the data about to be compressed vs. when it isn't.
+ * This is because zstd detects that the two buffers are contiguous and it can
+ * use a more efficient match finding algorithm. However, this produces different
+ * results than when the two buffers are non-contiguous. This flag forces zstd
+ * to always load the prefix in non-contiguous mode, even if it happens to be
+ * adjacent to the data, to guarantee determinism.
+ *
+ * If you really care about determinism when using a dictionary or prefix,
+ * like when doing delta compression, you should select this option. It comes
+ * at a speed penalty of about ~2.5% if the dictionary and data happened to be
+ * contiguous, and is free if they weren't contiguous. We don't expect that
+ * intentionally making the dictionary and data contiguous will be worth the
+ * cost to memcpy() the data.
+ */
+#define ZSTD_c_deterministicRefPrefix ZSTD_c_experimentalParam15
 
-/*! ZSTD_findFrameCompressedSize() :
- * `src` should point to the start of a ZSTD frame or skippable frame.
- * `srcSize` must be >= first frame size
- * @return : the compressed size of the first frame starting at `src`,
- *           suitable to pass as `srcSize` to `ZSTD_decompress` or similar,
- *        or an error code if input is invalid */
-ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize);
+/*! ZSTD_CCtx_getParameter() :
+ *  Get the requested compression parameter value, selected by enum ZSTD_cParameter,
+ *  and store it into int* value.
+ * @return : 0, or an error code (which can be tested with ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_CCtx_getParameter(const ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value);
 
 
-/*======  Helper functions  ======*/
-#define ZSTD_COMPRESSBOUND(srcSize)   ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0))  /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */
-ZSTDLIB_API size_t      ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */
-ZSTDLIB_API unsigned    ZSTD_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */
-ZSTDLIB_API const char* ZSTD_getErrorName(size_t code);     /*!< provides readable string from an error code */
-ZSTDLIB_API int         ZSTD_minCLevel(void);               /*!< minimum negative compression level allowed */
-ZSTDLIB_API int         ZSTD_maxCLevel(void);               /*!< maximum compression level available */
+/*! ZSTD_CCtx_params :
+ *  Quick howto :
+ *  - ZSTD_createCCtxParams() : Create a ZSTD_CCtx_params structure
+ *  - ZSTD_CCtxParams_setParameter() : Push parameters one by one into
+ *                                     an existing ZSTD_CCtx_params structure.
+ *                                     This is similar to
+ *                                     ZSTD_CCtx_setParameter().
+ *  - ZSTD_CCtx_setParametersUsingCCtxParams() : Apply parameters to
+ *                                    an existing CCtx.
+ *                                    These parameters will be applied to
+ *                                    all subsequent frames.
+ *  - ZSTD_compressStream2() : Do compression using the CCtx.
+ *  - ZSTD_freeCCtxParams() : Free the memory, accept NULL pointer.
+ *
+ *  This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams()
+ *  for static allocation of CCtx for single-threaded compression.
+ */
+ZSTDLIB_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void);
+ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);  /* accept NULL pointer */
 
+/*! ZSTD_CCtxParams_reset() :
+ *  Reset params to default values.
+ */
+ZSTDLIB_API size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params);
 
-/***************************************
-*  Explicit context
-***************************************/
-/*= Compression context
- *  When compressing many times,
- *  it is recommended to allocate a context just once,
- *  and re-use it for each successive compression operation.
- *  This will make workload friendlier for system's memory.
- *  Note : re-using context is just a speed / resource optimization.
- *         It doesn't change the compression ratio, which remains identical.
- *  Note 2 : In multi-threaded environments,
- *         use one different context per thread for parallel execution.
+/*! ZSTD_CCtxParams_init() :
+ *  Initializes the compression parameters of cctxParams according to
+ *  compression level. All other parameters are reset to their default values.
  */
-typedef struct ZSTD_CCtx_s ZSTD_CCtx;
-ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void);
-ZSTDLIB_API size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);
+ZSTDLIB_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel);
 
-/*! ZSTD_compressCCtx() :
- *  Same as ZSTD_compress(), using an explicit ZSTD_CCtx.
- *  Important : in order to behave similarly to `ZSTD_compress()`,
- *  this function compresses at requested compression level,
- *  __ignoring any other parameter__ .
- *  If any advanced parameter was set using the advanced API,
- *  they will all be reset. Only `compressionLevel` remains.
+/*! ZSTD_CCtxParams_init_advanced() :
+ *  Initializes the compression and frame parameters of cctxParams according to
+ *  params. All other parameters are reset to their default values.
  */
-ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
-                                     void* dst, size_t dstCapacity,
-                               const void* src, size_t srcSize,
-                                     int compressionLevel);
+ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params);
 
-/*= Decompression context
- *  When decompressing many times,
- *  it is recommended to allocate a context only once,
- *  and re-use it for each successive compression operation.
- *  This will make workload friendlier for system's memory.
- *  Use one context per thread for parallel execution. */
-typedef struct ZSTD_DCtx_s ZSTD_DCtx;
-ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void);
-ZSTDLIB_API size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
+/*! ZSTD_CCtxParams_setParameter() : Requires v1.4.0+
+ *  Similar to ZSTD_CCtx_setParameter.
+ *  Set one compression parameter, selected by enum ZSTD_cParameter.
+ *  Parameters must be applied to a ZSTD_CCtx using
+ *  ZSTD_CCtx_setParametersUsingCCtxParams().
+ * @result : a code representing success or failure (which can be tested with
+ *           ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value);
 
-/*! ZSTD_decompressDCtx() :
- *  Same as ZSTD_decompress(),
- *  requires an allocated ZSTD_DCtx.
- *  Compatible with sticky parameters.
+/*! ZSTD_CCtxParams_getParameter() :
+ * Similar to ZSTD_CCtx_getParameter.
+ * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
  */
-ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx,
-                                       void* dst, size_t dstCapacity,
-                                 const void* src, size_t srcSize);
+ZSTDLIB_API size_t ZSTD_CCtxParams_getParameter(const ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value);
+
+/*! ZSTD_CCtx_setParametersUsingCCtxParams() :
+ *  Apply a set of ZSTD_CCtx_params to the compression context.
+ *  This can be done even after compression is started,
+ *    if nbWorkers==0, this will have no impact until a new compression is started.
+ *    if nbWorkers>=1, new parameters will be picked up at next job,
+ *       with a few restrictions (windowLog, pledgedSrcSize, nbWorkers, jobSize, and overlapLog are not updated).
+ */
+ZSTDLIB_API size_t ZSTD_CCtx_setParametersUsingCCtxParams(
+        ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params);
+
+/*! ZSTD_compressStream2_simpleArgs() :
+ *  Same as ZSTD_compressStream2(),
+ *  but using only integral types as arguments.
+ *  This variant might be helpful for binders from dynamic languages
+ *  which have troubles handling structures containing memory pointers.
+ */
+ZSTDLIB_API size_t ZSTD_compressStream2_simpleArgs (
+                            ZSTD_CCtx* cctx,
+                            void* dst, size_t dstCapacity, size_t* dstPos,
+                      const void* src, size_t srcSize, size_t* srcPos,
+                            ZSTD_EndDirective endOp);
 
 
 /***************************************
-*  Advanced compression API
+*  Advanced decompression functions
 ***************************************/
 
-/* API design :
- *   Parameters are pushed one by one into an existing context,
- *   using ZSTD_CCtx_set*() functions.
- *   Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame.
- *   "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` !
- *   __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ .
+/*! ZSTD_isFrame() :
+ *  Tells if the content of `buffer` starts with a valid Frame Identifier.
+ *  Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
+ *  Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled.
+ *  Note 3 : Skippable Frame Identifiers are considered valid. */
+ZSTDLIB_API unsigned ZSTD_isFrame(const void* buffer, size_t size);
+
+/*! ZSTD_createDDict_byReference() :
+ *  Create a digested dictionary, ready to start decompression operation without startup delay.
+ *  Dictionary content is referenced, and therefore stays in dictBuffer.
+ *  It is important that dictBuffer outlives DDict,
+ *  it must remain read accessible throughout the lifetime of DDict */
+ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize);
+
+/*! ZSTD_DCtx_loadDictionary_byReference() :
+ *  Same as ZSTD_DCtx_loadDictionary(),
+ *  but references `dict` content instead of copying it into `dctx`.
+ *  This saves memory if `dict` remains around.,
+ *  However, it's imperative that `dict` remains accessible (and unmodified) while being used, so it must outlive decompression. */
+ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
+
+/*! ZSTD_DCtx_loadDictionary_advanced() :
+ *  Same as ZSTD_DCtx_loadDictionary(),
+ *  but gives direct control over
+ *  how to load the dictionary (by copy ? by reference ?)
+ *  and how to interpret it (automatic ? force raw mode ? full mode only ?). */
+ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType);
+
+/*! ZSTD_DCtx_refPrefix_advanced() :
+ *  Same as ZSTD_DCtx_refPrefix(), but gives finer control over
+ *  how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */
+ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
+
+/*! ZSTD_DCtx_setMaxWindowSize() :
+ *  Refuses allocating internal buffers for frames requiring a window size larger than provided limit.
+ *  This protects a decoder context from reserving too much memory for itself (potential attack scenario).
+ *  This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode.
+ *  By default, a decompression context accepts all window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT)
+ * @return : 0, or an error code (which can be tested using ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize);
+
+/*! ZSTD_DCtx_getParameter() :
+ *  Get the requested decompression parameter value, selected by enum ZSTD_dParameter,
+ *  and store it into int* value.
+ * @return : 0, or an error code (which can be tested with ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value);
+
+/* ZSTD_d_format
+ * experimental parameter,
+ * allowing selection between ZSTD_format_e input compression formats
+ */
+#define ZSTD_d_format ZSTD_d_experimentalParam1
+/* ZSTD_d_stableOutBuffer
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable.
  *
- *   It's possible to reset all parameters to "default" using ZSTD_CCtx_reset().
+ * Tells the decompressor that the ZSTD_outBuffer will ALWAYS be the same
+ * between calls, except for the modifications that zstd makes to pos (the
+ * caller must not modify pos). This is checked by the decompressor, and
+ * decompression will fail if it ever changes. Therefore the ZSTD_outBuffer
+ * MUST be large enough to fit the entire decompressed frame. This will be
+ * checked when the frame content size is known. The data in the ZSTD_outBuffer
+ * in the range [dst, dst + pos) MUST not be modified during decompression
+ * or you will get data corruption.
  *
- *   This API supercedes all other "advanced" API entry points in the experimental section.
- *   In the future, we expect to remove from experimental API entry points which are redundant with this API.
+ * When this flags is enabled zstd won't allocate an output buffer, because
+ * it can write directly to the ZSTD_outBuffer, but it will still allocate
+ * an input buffer large enough to fit any compressed block. This will also
+ * avoid the memcpy() from the internal output buffer to the ZSTD_outBuffer.
+ * If you need to avoid the input buffer allocation use the buffer-less
+ * streaming API.
+ *
+ * NOTE: So long as the ZSTD_outBuffer always points to valid memory, using
+ * this flag is ALWAYS memory safe, and will never access out-of-bounds
+ * memory. However, decompression WILL fail if you violate the preconditions.
+ *
+ * WARNING: The data in the ZSTD_outBuffer in the range [dst, dst + pos) MUST
+ * not be modified during decompression or you will get data corruption. This
+ * is because zstd needs to reference data in the ZSTD_outBuffer to regenerate
+ * matches. Normally zstd maintains its own buffer for this purpose, but passing
+ * this flag tells zstd to use the user provided buffer.
  */
+#define ZSTD_d_stableOutBuffer ZSTD_d_experimentalParam2
 
+/* ZSTD_d_forceIgnoreChecksum
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable
+ *
+ * Tells the decompressor to skip checksum validation during decompression, regardless
+ * of whether checksumming was specified during compression. This offers some
+ * slight performance benefits, and may be useful for debugging.
+ * Param has values of type ZSTD_forceIgnoreChecksum_e
+ */
+#define ZSTD_d_forceIgnoreChecksum ZSTD_d_experimentalParam3
 
-/* Compression strategies, listed from fastest to strongest */
-typedef enum { ZSTD_fast=1,
-               ZSTD_dfast=2,
-               ZSTD_greedy=3,
-               ZSTD_lazy=4,
-               ZSTD_lazy2=5,
-               ZSTD_btlazy2=6,
-               ZSTD_btopt=7,
-               ZSTD_btultra=8,
-               ZSTD_btultra2=9
-               /* note : new strategies _might_ be added in the future.
-                         Only the order (from fast to strong) is guaranteed */
-} ZSTD_strategy;
+/* ZSTD_d_refMultipleDDicts
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable
+ *
+ * If enabled and dctx is allocated on the heap, then additional memory will be allocated
+ * to store references to multiple ZSTD_DDict. That is, multiple calls of ZSTD_refDDict()
+ * using a given ZSTD_DCtx, rather than overwriting the previous DDict reference, will instead
+ * store all references. At decompression time, the appropriate dictID is selected
+ * from the set of DDicts based on the dictID in the frame.
+ *
+ * Usage is simply calling ZSTD_refDDict() on multiple dict buffers.
+ *
+ * Param has values of byte ZSTD_refMultipleDDicts_e
+ *
+ * WARNING: Enabling this parameter and calling ZSTD_DCtx_refDDict(), will trigger memory
+ * allocation for the hash table. ZSTD_freeDCtx() also frees this memory.
+ * Memory is allocated as per ZSTD_DCtx::customMem.
+ *
+ * Although this function allocates memory for the table, the user is still responsible for
+ * memory management of the underlying ZSTD_DDict* themselves.
+ */
+#define ZSTD_d_refMultipleDDicts ZSTD_d_experimentalParam4
 
 
-typedef enum {
+/*! ZSTD_DCtx_setFormat() :
+ *  This function is REDUNDANT. Prefer ZSTD_DCtx_setParameter().
+ *  Instruct the decoder context about what kind of data to decode next.
+ *  This instruction is mandatory to decode data without a fully-formed header,
+ *  such ZSTD_f_zstd1_magicless for example.
+ * @return : 0, or an error code (which can be tested using ZSTD_isError()). */
+ZSTD_DEPRECATED("use ZSTD_DCtx_setParameter() instead")
+size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format);
 
-    /* compression parameters
-     * Note: When compressing with a ZSTD_CDict these parameters are superseded
-     * by the parameters used to construct the ZSTD_CDict.
-     * See ZSTD_CCtx_refCDict() for more info (superseded-by-cdict). */
-    ZSTD_c_compressionLevel=100, /* Set compression parameters according to pre-defined cLevel table.
-                              * Note that exact compression parameters are dynamically determined,
-                              * depending on both compression level and srcSize (when known).
-                              * Default level is ZSTD_CLEVEL_DEFAULT==3.
-                              * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT.
-                              * Note 1 : it's possible to pass a negative compression level.
-                              * Note 2 : setting a level does not automatically set all other compression parameters 
-                              *   to default. Setting this will however eventually dynamically impact the compression 
-                              *   parameters which have not been manually set. The manually set 
-                              *   ones will 'stick'. */
-    /* Advanced compression parameters :
-     * It's possible to pin down compression parameters to some specific values.
-     * In which case, these values are no longer dynamically selected by the compressor */
-    ZSTD_c_windowLog=101,    /* Maximum allowed back-reference distance, expressed as power of 2.
-                              * This will set a memory budget for streaming decompression,
-                              * with larger values requiring more memory
-                              * and typically compressing more.
-                              * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX.
-                              * Special: value 0 means "use default windowLog".
-                              * Note: Using a windowLog greater than ZSTD_WINDOWLOG_LIMIT_DEFAULT
-                              *       requires explicitly allowing such size at streaming decompression stage. */
-    ZSTD_c_hashLog=102,      /* Size of the initial probe table, as a power of 2.
-                              * Resulting memory usage is (1 << (hashLog+2)).
-                              * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX.
-                              * Larger tables improve compression ratio of strategies <= dFast,
-                              * and improve speed of strategies > dFast.
-                              * Special: value 0 means "use default hashLog". */
-    ZSTD_c_chainLog=103,     /* Size of the multi-probe search table, as a power of 2.
-                              * Resulting memory usage is (1 << (chainLog+2)).
-                              * Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX.
-                              * Larger tables result in better and slower compression.
-                              * This parameter is useless for "fast" strategy.
-                              * It's still useful when using "dfast" strategy,
-                              * in which case it defines a secondary probe table.
-                              * Special: value 0 means "use default chainLog". */
-    ZSTD_c_searchLog=104,    /* Number of search attempts, as a power of 2.
-                              * More attempts result in better and slower compression.
-                              * This parameter is useless for "fast" and "dFast" strategies.
-                              * Special: value 0 means "use default searchLog". */
-    ZSTD_c_minMatch=105,     /* Minimum size of searched matches.
-                              * Note that Zstandard can still find matches of smaller size,
-                              * it just tweaks its search algorithm to look for this size and larger.
-                              * Larger values increase compression and decompression speed, but decrease ratio.
-                              * Must be clamped between ZSTD_MINMATCH_MIN and ZSTD_MINMATCH_MAX.
-                              * Note that currently, for all strategies < btopt, effective minimum is 4.
-                              *                    , for all strategies > fast, effective maximum is 6.
-                              * Special: value 0 means "use default minMatchLength". */
-    ZSTD_c_targetLength=106, /* Impact of this field depends on strategy.
-                              * For strategies btopt, btultra & btultra2:
-                              *     Length of Match considered "good enough" to stop search.
-                              *     Larger values make compression stronger, and slower.
-                              * For strategy fast:
-                              *     Distance between match sampling.
-                              *     Larger values make compression faster, and weaker.
-                              * Special: value 0 means "use default targetLength". */
-    ZSTD_c_strategy=107,     /* See ZSTD_strategy enum definition.
-                              * The higher the value of selected strategy, the more complex it is,
-                              * resulting in stronger and slower compression.
-                              * Special: value 0 means "use default strategy". */
+/*! ZSTD_decompressStream_simpleArgs() :
+ *  Same as ZSTD_decompressStream(),
+ *  but using only integral types as arguments.
+ *  This can be helpful for binders from dynamic languages
+ *  which have troubles handling structures containing memory pointers.
+ */
+ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs (
+                            ZSTD_DCtx* dctx,
+                            void* dst, size_t dstCapacity, size_t* dstPos,
+                      const void* src, size_t srcSize, size_t* srcPos);
 
-    /* LDM mode parameters */
-    ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching.
-                                     * This parameter is designed to improve compression ratio
-                                     * for large inputs, by finding large matches at long distance.
-                                     * It increases memory usage and window size.
-                                     * Note: enabling this parameter increases default ZSTD_c_windowLog to 128 MB
-                                     * except when expressly set to a different value. */
-    ZSTD_c_ldmHashLog=161,   /* Size of the table for long distance matching, as a power of 2.
-                              * Larger values increase memory usage and compression ratio,
-                              * but decrease compression speed.
-                              * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX
-                              * default: windowlog - 7.
-                              * Special: value 0 means "automatically determine hashlog". */
-    ZSTD_c_ldmMinMatch=162,  /* Minimum match size for long distance matcher.
-                              * Larger/too small values usually decrease compression ratio.
-                              * Must be clamped between ZSTD_LDM_MINMATCH_MIN and ZSTD_LDM_MINMATCH_MAX.
-                              * Special: value 0 means "use default value" (default: 64). */
-    ZSTD_c_ldmBucketSizeLog=163, /* Log size of each bucket in the LDM hash table for collision resolution.
-                              * Larger values improve collision resolution but decrease compression speed.
-                              * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX.
-                              * Special: value 0 means "use default value" (default: 3). */
-    ZSTD_c_ldmHashRateLog=164, /* Frequency of inserting/looking up entries into the LDM hash table.
-                              * Must be clamped between 0 and (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN).
-                              * Default is MAX(0, (windowLog - ldmHashLog)), optimizing hash table usage.
-                              * Larger values improve compression speed.
-                              * Deviating far from default value will likely result in a compression ratio decrease.
-                              * Special: value 0 means "automatically determine hashRateLog". */
 
-    /* frame parameters */
-    ZSTD_c_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1)
-                              * Content size must be known at the beginning of compression.
-                              * This is automatically the case when using ZSTD_compress2(),
-                              * For streaming scenarios, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */
-    ZSTD_c_checksumFlag=201, /* A 32-bits checksum of content is written at end of frame (default:0) */
-    ZSTD_c_dictIDFlag=202,   /* When applicable, dictionary's ID is written into frame header (default:1) */
+/********************************************************************
+*  Advanced streaming functions
+*  Warning : most of these functions are now redundant with the Advanced API.
+*  Once Advanced API reaches "stable" status,
+*  redundant functions will be deprecated, and then at some point removed.
+********************************************************************/
 
-    /* multi-threading parameters */
-    /* These parameters are only useful if multi-threading is enabled (compiled with build macro ZSTD_MULTITHREAD).
-     * They return an error otherwise. */
-    ZSTD_c_nbWorkers=400,    /* Select how many threads will be spawned to compress in parallel.
-                              * When nbWorkers >= 1, triggers asynchronous mode when used with ZSTD_compressStream*() :
-                              * ZSTD_compressStream*() consumes input and flush output if possible, but immediately gives back control to caller,
-                              * while compression work is performed in parallel, within worker threads.
-                              * (note : a strong exception to this rule is when first invocation of ZSTD_compressStream2() sets ZSTD_e_end :
-                              *  in which case, ZSTD_compressStream2() delegates to ZSTD_compress2(), which is always a blocking call).
-                              * More workers improve speed, but also increase memory usage.
-                              * Default value is `0`, aka "single-threaded mode" : no worker is spawned, compression is performed inside Caller's thread, all invocations are blocking */
-    ZSTD_c_jobSize=401,      /* Size of a compression job. This value is enforced only when nbWorkers >= 1.
-                              * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads.
-                              * 0 means default, which is dynamically determined based on compression parameters.
-                              * Job size must be a minimum of overlap size, or 1 MB, whichever is largest.
-                              * The minimum size is automatically and transparently enforced. */
-    ZSTD_c_overlapLog=402,   /* Control the overlap size, as a fraction of window size.
-                              * The overlap size is an amount of data reloaded from previous job at the beginning of a new job.
-                              * It helps preserve compression ratio, while each job is compressed in parallel.
-                              * This value is enforced only when nbWorkers >= 1.
-                              * Larger values increase compression ratio, but decrease speed.
-                              * Possible values range from 0 to 9 :
-                              * - 0 means "default" : value will be determined by the library, depending on strategy
-                              * - 1 means "no overlap"
-                              * - 9 means "full overlap", using a full window size.
-                              * Each intermediate rank increases/decreases load size by a factor 2 :
-                              * 9: full window;  8: w/2;  7: w/4;  6: w/8;  5:w/16;  4: w/32;  3:w/64;  2:w/128;  1:no overlap;  0:default
-                              * default value varies between 6 and 9, depending on strategy */
+/*=====   Advanced Streaming compression functions  =====*/
 
-    /* note : additional experimental parameters are also available
-     * within the experimental section of the API.
-     * At the time of this writing, they include :
-     * ZSTD_c_rsyncable
-     * ZSTD_c_format
-     * ZSTD_c_forceMaxWindow
-     * ZSTD_c_forceAttachDict
-     * ZSTD_c_literalCompressionMode
-     * ZSTD_c_targetCBlockSize
-     * ZSTD_c_srcSizeHint
-     * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
-     * note : never ever use experimentalParam? names directly;
-     *        also, the enums values themselves are unstable and can still change.
-     */
-     ZSTD_c_experimentalParam1=500,
-     ZSTD_c_experimentalParam2=10,
-     ZSTD_c_experimentalParam3=1000,
-     ZSTD_c_experimentalParam4=1001,
-     ZSTD_c_experimentalParam5=1002,
-     ZSTD_c_experimentalParam6=1003,
-     ZSTD_c_experimentalParam7=1004
-} ZSTD_cParameter;
+/*! ZSTD_initCStream_srcSize() :
+ * This function is DEPRECATED, and equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
+ *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
+ *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ *
+ * pledgedSrcSize must be correct. If it is not known at init time, use
+ * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs,
+ * "0" also disables frame content size field. It may be enabled in the future.
+ * This prototype will generate compilation warnings.
+ */
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs,
+                         int compressionLevel,
+                         unsigned long long pledgedSrcSize);
 
-typedef struct {
-    size_t error;
-    int lowerBound;
-    int upperBound;
-} ZSTD_bounds;
+/*! ZSTD_initCStream_usingDict() :
+ * This function is DEPRECATED, and is equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
+ *     ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
+ *
+ * Creates of an internal CDict (incompatible with static CCtx), except if
+ * dict == NULL or dictSize < 8, in which case no dict is used.
+ * Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if
+ * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy.
+ * This prototype will generate compilation warnings.
+ */
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs,
+                     const void* dict, size_t dictSize,
+                           int compressionLevel);
 
-/*! ZSTD_cParam_getBounds() :
- *  All parameters must belong to an interval with lower and upper bounds,
- *  otherwise they will either trigger an error or be automatically clamped.
- * @return : a structure, ZSTD_bounds, which contains
- *         - an error status field, which must be tested using ZSTD_isError()
- *         - lower and upper bounds, both inclusive
+/*! ZSTD_initCStream_advanced() :
+ * This function is DEPRECATED, and is approximately equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     // Pseudocode: Set each zstd parameter and leave the rest as-is.
+ *     for ((param, value) : params) {
+ *         ZSTD_CCtx_setParameter(zcs, param, value);
+ *     }
+ *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ *     ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
+ *
+ * dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy.
+ * pledgedSrcSize must be correct.
+ * If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.
+ * This prototype will generate compilation warnings.
  */
-ZSTDLIB_API ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter cParam);
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
+                    const void* dict, size_t dictSize,
+                          ZSTD_parameters params,
+                          unsigned long long pledgedSrcSize);
 
-/*! ZSTD_CCtx_setParameter() :
- *  Set one compression parameter, selected by enum ZSTD_cParameter.
- *  All parameters have valid bounds. Bounds can be queried using ZSTD_cParam_getBounds().
- *  Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter).
- *  Setting a parameter is generally only possible during frame initialization (before starting compression).
- *  Exception : when using multi-threading mode (nbWorkers >= 1),
- *              the following parameters can be updated _during_ compression (within same frame):
- *              => compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy.
- *              new parameters will be active for next job only (after a flush()).
- * @return : an error code (which can be tested using ZSTD_isError()).
+/*! ZSTD_initCStream_usingCDict() :
+ * This function is DEPRECATED, and equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_refCDict(zcs, cdict);
+ * 
+ * note : cdict will just be referenced, and must outlive compression session
+ * This prototype will generate compilation warnings.
  */
-ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value);
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);
 
-/*! ZSTD_CCtx_setPledgedSrcSize() :
- *  Total input data size to be compressed as a single frame.
- *  Value will be written in frame header, unless if explicitly forbidden using ZSTD_c_contentSizeFlag.
- *  This value will also be controlled at end of frame, and trigger an error if not respected.
- * @result : 0, or an error code (which can be tested with ZSTD_isError()).
- *  Note 1 : pledgedSrcSize==0 actually means zero, aka an empty frame.
- *           In order to mean "unknown content size", pass constant ZSTD_CONTENTSIZE_UNKNOWN.
- *           ZSTD_CONTENTSIZE_UNKNOWN is default value for any new frame.
- *  Note 2 : pledgedSrcSize is only valid once, for the next frame.
- *           It's discarded at the end of the frame, and replaced by ZSTD_CONTENTSIZE_UNKNOWN.
- *  Note 3 : Whenever all input data is provided and consumed in a single round,
- *           for example with ZSTD_compress2(),
- *           or invoking immediately ZSTD_compressStream2(,,,ZSTD_e_end),
- *           this value is automatically overridden by srcSize instead.
+/*! ZSTD_initCStream_usingCDict_advanced() :
+ *   This function is DEPRECATED, and is approximately equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     // Pseudocode: Set each zstd frame parameter and leave the rest as-is.
+ *     for ((fParam, value) : fParams) {
+ *         ZSTD_CCtx_setParameter(zcs, fParam, value);
+ *     }
+ *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ *     ZSTD_CCtx_refCDict(zcs, cdict);
+ *
+ * same as ZSTD_initCStream_usingCDict(), with control over frame parameters.
+ * pledgedSrcSize must be correct. If srcSize is not known at init time, use
+ * value ZSTD_CONTENTSIZE_UNKNOWN.
+ * This prototype will generate compilation warnings.
  */
-ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize);
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
+                               const ZSTD_CDict* cdict,
+                                     ZSTD_frameParameters fParams,
+                                     unsigned long long pledgedSrcSize);
 
-typedef enum {
-    ZSTD_reset_session_only = 1,
-    ZSTD_reset_parameters = 2,
-    ZSTD_reset_session_and_parameters = 3
-} ZSTD_ResetDirective;
+/*! ZSTD_resetCStream() :
+ * This function is DEPRECATED, and is equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ * Note: ZSTD_resetCStream() interprets pledgedSrcSize == 0 as ZSTD_CONTENTSIZE_UNKNOWN, but
+ *       ZSTD_CCtx_setPledgedSrcSize() does not do the same, so ZSTD_CONTENTSIZE_UNKNOWN must be
+ *       explicitly specified.
+ *
+ *  start a new frame, using same parameters from previous frame.
+ *  This is typically useful to skip dictionary loading stage, since it will re-use it in-place.
+ *  Note that zcs must be init at least once before using ZSTD_resetCStream().
+ *  If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN.
+ *  If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end.
+ *  For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs,
+ *  but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead.
+ * @return : 0, or an error code (which can be tested using ZSTD_isError())
+ *  This prototype will generate compilation warnings.
+ */
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
+size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);
 
-/*! ZSTD_CCtx_reset() :
- *  There are 2 different things that can be reset, independently or jointly :
- *  - The session : will stop compressing current frame, and make CCtx ready to start a new one.
- *                  Useful after an error, or to interrupt any ongoing compression.
- *                  Any internal data not yet flushed is cancelled.
- *                  Compression parameters and dictionary remain unchanged.
- *                  They will be used to compress next frame.
- *                  Resetting session never fails.
- *  - The parameters : changes all parameters back to "default".
- *                  This removes any reference to any dictionary too.
- *                  Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing)
- *                  otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError())
- *  - Both : similar to resetting the session, followed by resetting parameters.
+
+typedef struct {
+    unsigned long long ingested;   /* nb input bytes read and buffered */
+    unsigned long long consumed;   /* nb input bytes actually compressed */
+    unsigned long long produced;   /* nb of compressed bytes generated and buffered */
+    unsigned long long flushed;    /* nb of compressed bytes flushed : not provided; can be tracked from caller side */
+    unsigned currentJobID;         /* MT only : latest started job nb */
+    unsigned nbActiveWorkers;      /* MT only : nb of workers actively compressing at probe time */
+} ZSTD_frameProgression;
+
+/* ZSTD_getFrameProgression() :
+ * tells how much data has been ingested (read from input)
+ * consumed (input actually compressed) and produced (output) for current frame.
+ * Note : (ingested - consumed) is amount of input data buffered internally, not yet compressed.
+ * Aggregates progression inside active worker threads.
  */
-ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset);
+ZSTDLIB_API ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx);
 
-/*! ZSTD_compress2() :
- *  Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API.
- *  ZSTD_compress2() always starts a new frame.
- *  Should cctx hold data from a previously unfinished frame, everything about it is forgotten.
- *  - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*()
- *  - The function is always blocking, returns when compression is completed.
- *  Hint : compression runs faster if `dstCapacity` >=  `ZSTD_compressBound(srcSize)`.
- * @return : compressed size written into `dst` (<= `dstCapacity),
- *           or an error code if it fails (which can be tested using ZSTD_isError()).
+/*! ZSTD_toFlushNow() :
+ *  Tell how many bytes are ready to be flushed immediately.
+ *  Useful for multithreading scenarios (nbWorkers >= 1).
+ *  Probe the oldest active job, defined as oldest job not yet entirely flushed,
+ *  and check its output buffer.
+ * @return : amount of data stored in oldest job and ready to be flushed immediately.
+ *  if @return == 0, it means either :
+ *  + there is no active job (could be checked with ZSTD_frameProgression()), or
+ *  + oldest job is still actively compressing data,
+ *    but everything it has produced has also been flushed so far,
+ *    therefore flush speed is limited by production speed of oldest job
+ *    irrespective of the speed of concurrent (and newer) jobs.
  */
-ZSTDLIB_API size_t ZSTD_compress2( ZSTD_CCtx* cctx,
-                                   void* dst, size_t dstCapacity,
-                             const void* src, size_t srcSize);
+ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx);
 
 
-/***************************************
-*  Advanced decompression API
-***************************************/
+/*=====   Advanced Streaming decompression functions  =====*/
 
-/* The advanced API pushes parameters one by one into an existing DCtx context.
- * Parameters are sticky, and remain valid for all following frames
- * using the same DCtx context.
- * It's possible to reset parameters to default values using ZSTD_DCtx_reset().
- * Note : This API is compatible with existing ZSTD_decompressDCtx() and ZSTD_decompressStream().
- *        Therefore, no new decompression function is necessary.
+/*!
+ * This function is deprecated, and is equivalent to:
+ *
+ *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+ *     ZSTD_DCtx_loadDictionary(zds, dict, dictSize);
+ *
+ * note: no dictionary will be used if dict == NULL or dictSize < 8
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
+
+/*!
+ * This function is deprecated, and is equivalent to:
+ *
+ *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+ *     ZSTD_DCtx_refDDict(zds, ddict);
+ *
+ * note : ddict is referenced, it must outlive decompression session
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict);
+
+/*!
+ * This function is deprecated, and is equivalent to:
+ *
+ *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+ *
+ * re-use decompression parameters from previous init; saves dictionary loading
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
  */
+ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);
+
+
+/*********************************************************************
+*  Buffer-less and synchronous inner streaming functions
+*
+*  This is an advanced API, giving full control over buffer management, for users which need direct control over memory.
+*  But it's also a complex one, with several restrictions, documented below.
+*  Prefer normal streaming API for an easier experience.
+********************************************************************* */
+
+/**
+  Buffer-less streaming compression (synchronous mode)
+
+  A ZSTD_CCtx object is required to track streaming operations.
+  Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource.
+  ZSTD_CCtx object can be re-used multiple times within successive compression operations.
+
+  Start by initializing a context.
+  Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression.
+  It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx()
+
+  Then, consume your input using ZSTD_compressContinue().
+  There are some important considerations to keep in mind when using this advanced function :
+  - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffers only.
+  - Interface is synchronous : input is consumed entirely and produces 1+ compressed blocks.
+  - Caller must ensure there is enough space in `dst` to store compressed data under worst case scenario.
+    Worst case evaluation is provided by ZSTD_compressBound().
+    ZSTD_compressContinue() doesn't guarantee recover after a failed compression.
+  - ZSTD_compressContinue() presumes prior input ***is still accessible and unmodified*** (up to maximum distance size, see WindowLog).
+    It remembers all previous contiguous blocks, plus one separated memory segment (which can itself consists of multiple contiguous blocks)
+  - ZSTD_compressContinue() detects that prior input has been overwritten when `src` buffer overlaps.
+    In which case, it will "discard" the relevant memory section from its history.
+
+  Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum.
+  It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame.
+  Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders.
+
+  `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again.
+*/
+
+/*=====   Buffer-less streaming compression functions  =====*/
+ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
+ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
+ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /**< note: fails if cdict==NULL */
+ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**<  note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */
+
+ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+/* The ZSTD_compressBegin_advanced() and ZSTD_compressBegin_usingCDict_advanced() are now DEPRECATED and will generate a compiler warning */
+ZSTD_DEPRECATED("use advanced API to access custom parameters")
+size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */
+ZSTD_DEPRECATED("use advanced API to access custom parameters")
+size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize);   /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */
+/**
+  Buffer-less streaming decompression (synchronous mode)
+
+  A ZSTD_DCtx object is required to track streaming operations.
+  Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it.
+  A ZSTD_DCtx object can be re-used multiple times.
+
+  First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader().
+  Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough.
+  Data fragment must be large enough to ensure successful decoding.
+ `ZSTD_frameHeaderSize_max` bytes is guaranteed to always be large enough.
+  @result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled.
+           >0 : `srcSize` is too small, please provide at least @result bytes on next attempt.
+           errorCode, which can be tested using ZSTD_isError().
+
+  It fills a ZSTD_frameHeader structure with important information to correctly decode the frame,
+  such as the dictionary ID, content size, or maximum back-reference distance (`windowSize`).
+  Note that these values could be wrong, either because of data corruption, or because a 3rd party deliberately spoofs false information.
+  As a consequence, check that values remain within valid application range.
+  For example, do not allocate memory blindly, check that `windowSize` is within expectation.
+  Each application can set its own limits, depending on local restrictions.
+  For extended interoperability, it is recommended to support `windowSize` of at least 8 MB.
+
+  ZSTD_decompressContinue() needs previous data blocks during decompression, up to `windowSize` bytes.
+  ZSTD_decompressContinue() is very sensitive to contiguity,
+  if 2 blocks don't follow each other, make sure that either the compressor breaks contiguity at the same place,
+  or that previous contiguous segment is large enough to properly handle maximum back-reference distance.
+  There are multiple ways to guarantee this condition.
+
+  The most memory efficient way is to use a round buffer of sufficient size.
+  Sufficient size is determined by invoking ZSTD_decodingBufferSize_min(),
+  which can @return an error code if required value is too large for current system (in 32-bits mode).
+  In a round buffer methodology, ZSTD_decompressContinue() decompresses each block next to previous one,
+  up to the moment there is not enough room left in the buffer to guarantee decoding another full block,
+  which maximum size is provided in `ZSTD_frameHeader` structure, field `blockSizeMax`.
+  At which point, decoding can resume from the beginning of the buffer.
+  Note that already decoded data stored in the buffer should be flushed before being overwritten.
+
+  There are alternatives possible, for example using two or more buffers of size `windowSize` each, though they consume more memory.
+
+  Finally, if you control the compression process, you can also ignore all buffer size rules,
+  as long as the encoder and decoder progress in "lock-step",
+  aka use exactly the same buffer sizes, break contiguity at the same place, etc.
+
+  Once buffers are setup, start decompression, with ZSTD_decompressBegin().
+  If decompression requires a dictionary, use ZSTD_decompressBegin_usingDict() or ZSTD_decompressBegin_usingDDict().
+
+  Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail.
+
+ @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity).
+  It can be zero : it just means ZSTD_decompressContinue() has decoded some metadata item.
+  It can also be an error code, which can be tested with ZSTD_isError().
+
+  A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero.
+  Context can then be reset to start a new decompression.
+
+  Note : it's possible to know if next input to present is a header or a block, using ZSTD_nextInputType().
+  This information is not required to properly decode a frame.
+
+  == Special case : skippable frames ==
+
+  Skippable frames allow integration of user-defined data into a flow of concatenated frames.
+  Skippable frames will be ignored (skipped) by decompressor.
+  The format of skippable frames is as follows :
+  a) Skippable frame ID - 4 Bytes, Little endian format, any value from 0x184D2A50 to 0x184D2A5F
+  b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits
+  c) Frame Content - any content (User Data) of length equal to Frame Size
+  For skippable frames ZSTD_getFrameHeader() returns zfhPtr->frameType==ZSTD_skippableFrame.
+  For skippable frames ZSTD_decompressContinue() always returns 0 : it only skips the content.
+*/
+
+/*=====   Buffer-less streaming decompression functions  =====*/
+typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e;
+typedef struct {
+    unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */
+    unsigned long long windowSize;       /* can be very large, up to <= frameContentSize */
+    unsigned blockSizeMax;
+    ZSTD_frameType_e frameType;          /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */
+    unsigned headerSize;
+    unsigned dictID;
+    unsigned checksumFlag;
+} ZSTD_frameHeader;
+
+/*! ZSTD_getFrameHeader() :
+ *  decode Frame Header, or requires larger `srcSize`.
+ * @return : 0, `zfhPtr` is correctly filled,
+ *          >0, `srcSize` is too small, value is wanted `srcSize` amount,
+ *           or an error code, which can be tested using ZSTD_isError() */
+ZSTDLIB_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize);   /**< doesn't consume input */
+/*! ZSTD_getFrameHeader_advanced() :
+ *  same as ZSTD_getFrameHeader(),
+ *  with added capability to select a format (like ZSTD_f_zstd1_magicless) */
+ZSTDLIB_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format);
+ZSTDLIB_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize);  /**< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */
+
+ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx);
+ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
+ZSTDLIB_API size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
+
+ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx);
+ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+/* misc */
+ZSTDLIB_API void   ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
+typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e;
+ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
+
+
+
+
+/* ============================ */
+/**       Block level API       */
+/* ============================ */
+
+/*!
+    Block functions produce and decode raw zstd blocks, without frame metadata.
+    Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes).
+    But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes.
+
+    A few rules to respect :
+    - Compressing and decompressing require a context structure
+      + Use ZSTD_createCCtx() and ZSTD_createDCtx()
+    - It is necessary to init context before starting
+      + compression : any ZSTD_compressBegin*() variant, including with dictionary
+      + decompression : any ZSTD_decompressBegin*() variant, including with dictionary
+      + copyCCtx() and copyDCtx() can be used too
+    - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB
+      + If input is larger than a block size, it's necessary to split input data into multiple blocks
+      + For inputs larger than a single block, consider using regular ZSTD_compress() instead.
+        Frame metadata is not that costly, and quickly becomes negligible as source size grows larger than a block.
+    - When a block is considered not compressible enough, ZSTD_compressBlock() result will be 0 (zero) !
+      ===> In which case, nothing is produced into `dst` !
+      + User __must__ test for such outcome and deal directly with uncompressed data
+      + A block cannot be declared incompressible if ZSTD_compressBlock() return value was != 0.
+        Doing so would mess up with statistics history, leading to potential data corruption.
+      + ZSTD_decompressBlock() _doesn't accept uncompressed data as input_ !!
+      + In case of multiple successive blocks, should some of them be uncompressed,
+        decoder must be informed of their existence in order to follow proper history.
+        Use ZSTD_insertBlock() for such a case.
+*/
+
+/*=====   Raw zstd block functions  =====*/
+ZSTDLIB_API size_t ZSTD_getBlockSize   (const ZSTD_CCtx* cctx);
+ZSTDLIB_API size_t ZSTD_compressBlock  (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ZSTDLIB_API size_t ZSTD_insertBlock    (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize);  /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */
+
+
+#endif   /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */
+
+#if defined (__cplusplus)
+}
+#endif
+/**** ended inlining ../zstd.h ****/
+#define FSE_STATIC_LINKING_ONLY
+/**** skipping file: fse.h ****/
+#define HUF_STATIC_LINKING_ONLY
+/**** skipping file: huf.h ****/
+#ifndef XXH_STATIC_LINKING_ONLY
+#  define XXH_STATIC_LINKING_ONLY  /* XXH64_state_t */
+#endif
+/**** start inlining xxhash.h ****/
+/*
+ * xxHash - Extremely Fast Hash algorithm
+ * Header File
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - xxHash source repository : https://github.com/Cyan4973/xxHash
+ * 
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+*/
+
+/* Notice extracted from xxHash homepage :
+
+xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
+It also successfully passes all tests from the SMHasher suite.
+
+Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
+
+Name            Speed       Q.Score   Author
+xxHash          5.4 GB/s     10
+CrapWow         3.2 GB/s      2       Andrew
+MumurHash 3a    2.7 GB/s     10       Austin Appleby
+SpookyHash      2.0 GB/s     10       Bob Jenkins
+SBox            1.4 GB/s      9       Bret Mulvey
+Lookup3         1.2 GB/s      9       Bob Jenkins
+SuperFastHash   1.2 GB/s      1       Paul Hsieh
+CityHash64      1.05 GB/s    10       Pike & Alakuijala
+FNV             0.55 GB/s     5       Fowler, Noll, Vo
+CRC32           0.43 GB/s     9
+MD5-32          0.33 GB/s    10       Ronald L. Rivest
+SHA1-32         0.28 GB/s    10
+
+Q.Score is a measure of quality of the hash function.
+It depends on successfully passing SMHasher test set.
+10 is a perfect score.
+
+A 64-bits version, named XXH64, is available since r35.
+It offers much better speed, but for 64-bits applications only.
+Name     Speed on 64 bits    Speed on 32 bits
+XXH64       13.8 GB/s            1.9 GB/s
+XXH32        6.8 GB/s            6.0 GB/s
+*/
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+#ifndef XXHASH_H_5627135585666179
+#define XXHASH_H_5627135585666179 1
+
+
+/* ****************************
+*  Definitions
+******************************/
+/**** skipping file: zstd_deps.h ****/
+typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
+
+
+/* ****************************
+*  API modifier
+******************************/
+/** XXH_PRIVATE_API
+*   This is useful if you want to include xxhash functions in `static` mode
+*   in order to inline them, and remove their symbol from the public list.
+*   Methodology :
+*     #define XXH_PRIVATE_API
+*     #include "xxhash.h"
+*   `xxhash.c` is automatically included.
+*   It's not useful to compile and link it as a separate module anymore.
+*/
+#ifdef XXH_PRIVATE_API
+#  ifndef XXH_STATIC_LINKING_ONLY
+#    define XXH_STATIC_LINKING_ONLY
+#  endif
+#  if defined(__GNUC__)
+#    define XXH_PUBLIC_API static __inline __attribute__((unused))
+#  elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#    define XXH_PUBLIC_API static inline
+#  elif defined(_MSC_VER)
+#    define XXH_PUBLIC_API static __inline
+#  else
+#    define XXH_PUBLIC_API static   /* this version may generate warnings for unused static functions; disable the relevant warning */
+#  endif
+#else
+#  define XXH_PUBLIC_API   /* do nothing */
+#endif /* XXH_PRIVATE_API */
+
+/*!XXH_NAMESPACE, aka Namespace Emulation :
+
+If you want to include _and expose_ xxHash functions from within your own library,
+but also want to avoid symbol collisions with another library which also includes xxHash,
+
+you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library
+with the value of XXH_NAMESPACE (so avoid to keep it NULL and avoid numeric values).
+
+Note that no change is required within the calling program as long as it includes `xxhash.h` :
+regular symbol name will be automatically translated by this header.
+*/
+#ifdef XXH_NAMESPACE
+#  define XXH_CAT(A,B) A##B
+#  define XXH_NAME2(A,B) XXH_CAT(A,B)
+#  define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
+#  define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
+#  define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
+#  define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
+#  define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
+#  define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
+#  define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
+#  define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
+#  define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
+#  define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
+#  define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
+#  define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
+#  define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
+#  define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
+#  define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
+#  define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
+#  define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
+#  define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
+#  define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
+#endif
+
+
+/* *************************************
+*  Version
+***************************************/
+#define XXH_VERSION_MAJOR    0
+#define XXH_VERSION_MINOR    6
+#define XXH_VERSION_RELEASE  2
+#define XXH_VERSION_NUMBER  (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
+XXH_PUBLIC_API unsigned XXH_versionNumber (void);
+
+
+/* ****************************
+*  Simple Hash Functions
+******************************/
+typedef unsigned int       XXH32_hash_t;
+typedef unsigned long long XXH64_hash_t;
+
+XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed);
+XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed);
+
+/*!
+XXH32() :
+    Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input".
+    The memory between input & input+length must be valid (allocated and read-accessible).
+    "seed" can be used to alter the result predictably.
+    Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
+XXH64() :
+    Calculate the 64-bits hash of sequence of length "len" stored at memory address "input".
+    "seed" can be used to alter the result predictably.
+    This function runs 2x faster on 64-bits systems, but slower on 32-bits systems (see benchmark).
+*/
+
+
+/* ****************************
+*  Streaming Hash Functions
+******************************/
+typedef struct XXH32_state_s XXH32_state_t;   /* incomplete type */
+typedef struct XXH64_state_s XXH64_state_t;   /* incomplete type */
+
+/*! State allocation, compatible with dynamic libraries */
+
+XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
+XXH_PUBLIC_API XXH_errorcode  XXH32_freeState(XXH32_state_t* statePtr);
+
+XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
+XXH_PUBLIC_API XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
+
+
+/* hash streaming */
+
+XXH_PUBLIC_API XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, unsigned int seed);
+XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
+XXH_PUBLIC_API XXH32_hash_t  XXH32_digest (const XXH32_state_t* statePtr);
+
+XXH_PUBLIC_API XXH_errorcode XXH64_reset  (XXH64_state_t* statePtr, unsigned long long seed);
+XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
+XXH_PUBLIC_API XXH64_hash_t  XXH64_digest (const XXH64_state_t* statePtr);
+
+/*
+These functions generate the xxHash of an input provided in multiple segments.
+Note that, for small input, they are slower than single-call functions, due to state management.
+For small input, prefer `XXH32()` and `XXH64()` .
+
+XXH state must first be allocated, using XXH*_createState() .
+
+Start a new hash by initializing state with a seed, using XXH*_reset().
+
+Then, feed the hash state by calling XXH*_update() as many times as necessary.
+Obviously, input must be allocated and read accessible.
+The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
+
+Finally, a hash value can be produced anytime, by using XXH*_digest().
+This function returns the nn-bits hash as an int or long long.
+
+It's still possible to continue inserting input into the hash state after a digest,
+and generate some new hashes later on, by calling again XXH*_digest().
+
+When done, free XXH state space if it was allocated dynamically.
+*/
+
+
+/* **************************
+*  Utils
+****************************/
+#if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L))   /* ! C99 */
+#  define restrict   /* disable restrict */
+#endif
+
+XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dst_state, const XXH32_state_t* restrict src_state);
+XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dst_state, const XXH64_state_t* restrict src_state);
+
+
+/* **************************
+*  Canonical representation
+****************************/
+/* Default result type for XXH functions are primitive unsigned 32 and 64 bits.
+*  The canonical representation uses human-readable write convention, aka big-endian (large digits first).
+*  These functions allow transformation of hash result into and from its canonical format.
+*  This way, hash values can be written into a file / memory, and remain comparable on different systems and programs.
+*/
+typedef struct { unsigned char digest[4]; } XXH32_canonical_t;
+typedef struct { unsigned char digest[8]; } XXH64_canonical_t;
+
+XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
+
+XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
+XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
+
+#endif /* XXHASH_H_5627135585666179 */
+
+
+
+/* ================================================================================================
+   This section contains definitions which are not guaranteed to remain stable.
+   They may change in future versions, becoming incompatible with a different version of the library.
+   They shall only be used with static linking.
+   Never use these definitions in association with dynamic linking !
+=================================================================================================== */
+#if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXH_STATIC_H_3543687687345)
+#define XXH_STATIC_H_3543687687345
+
+/* These definitions are only meant to allow allocation of XXH state
+   statically, on stack, or in a struct for example.
+   Do not use members directly. */
+
+   struct XXH32_state_s {
+       unsigned total_len_32;
+       unsigned large_len;
+       unsigned v1;
+       unsigned v2;
+       unsigned v3;
+       unsigned v4;
+       unsigned mem32[4];   /* buffer defined as U32 for alignment */
+       unsigned memsize;
+       unsigned reserved;   /* never read nor write, will be removed in a future version */
+   };   /* typedef'd to XXH32_state_t */
+
+   struct XXH64_state_s {
+       unsigned long long total_len;
+       unsigned long long v1;
+       unsigned long long v2;
+       unsigned long long v3;
+       unsigned long long v4;
+       unsigned long long mem64[4];   /* buffer defined as U64 for alignment */
+       unsigned memsize;
+       unsigned reserved[2];          /* never read nor write, will be removed in a future version */
+   };   /* typedef'd to XXH64_state_t */
+
+
+#  ifdef XXH_PRIVATE_API
+/**** start inlining xxhash.c ****/
+/*
+ *  xxHash - Fast Hash algorithm
+ *  Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - xxHash homepage: http://www.xxhash.com
+ *  - xxHash source repository : https://github.com/Cyan4973/xxHash
+ * 
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+*/
+
+
+/* *************************************
+*  Tuning parameters
+***************************************/
+/*!XXH_FORCE_MEMORY_ACCESS :
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method doesn't depend on compiler but violate C standard.
+ *            It can generate buggy code on targets which do not support unaligned memory accesses.
+ *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See http://stackoverflow.com/a/32095106/646947 for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef XXH_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
+#  if (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
+  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) || \
+  defined(__ICCARM__)
+#    define XXH_FORCE_MEMORY_ACCESS 1
+#  endif
+#endif
+
+/*!XXH_ACCEPT_NULL_INPUT_POINTER :
+ * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
+ * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
+ * By default, this option is disabled. To enable it, uncomment below define :
+ */
+/* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */
+
+/*!XXH_FORCE_NATIVE_FORMAT :
+ * By default, xxHash library provides endian-independent Hash values, based on little-endian convention.
+ * Results are therefore identical for little-endian and big-endian CPU.
+ * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
+ * Should endian-independence be of no importance for your application, you may set the #define below to 1,
+ * to improve speed for Big-endian CPU.
+ * This option has no impact on Little_Endian CPU.
+ */
+#ifndef XXH_FORCE_NATIVE_FORMAT   /* can be defined externally */
+#  define XXH_FORCE_NATIVE_FORMAT 0
+#endif
+
+/*!XXH_FORCE_ALIGN_CHECK :
+ * This is a minor performance trick, only useful with lots of very small keys.
+ * It means : check for aligned/unaligned input.
+ * The check costs one initial branch per hash; set to 0 when the input data
+ * is guaranteed to be aligned.
+ */
+#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
+#  if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
+#    define XXH_FORCE_ALIGN_CHECK 0
+#  else
+#    define XXH_FORCE_ALIGN_CHECK 1
+#  endif
+#endif
+
+
+/* *************************************
+*  Includes & Memory related functions
+***************************************/
+/* Modify the local functions below should you wish to use some other memory routines */
+/* for ZSTD_malloc(), ZSTD_free() */
+#define ZSTD_DEPS_NEED_MALLOC
+/**** skipping file: zstd_deps.h ****/
+static void* XXH_malloc(size_t s) { return ZSTD_malloc(s); }
+static void  XXH_free  (void* p)  { ZSTD_free(p); }
+static void* XXH_memcpy(void* dest, const void* src, size_t size) { return ZSTD_memcpy(dest,src,size); }
+
+#ifndef XXH_STATIC_LINKING_ONLY
+#  define XXH_STATIC_LINKING_ONLY
+#endif
+/**** skipping file: xxhash.h ****/
+
+
+/* *************************************
+*  Compiler Specific Options
+***************************************/
+/**** skipping file: compiler.h ****/
+
+
+/* *************************************
+*  Basic Types
+***************************************/
+/**** skipping file: mem.h ****/
+
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
+
+/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
+static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; }
+static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; }
+
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign;
+
+static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
+
+#else
+
+/* portable and safe solution. Generally efficient.
+ * see : http://stackoverflow.com/a/32095106/646947
+ */
+
+static U32 XXH_read32(const void* memPtr)
+{
+    U32 val;
+    ZSTD_memcpy(&val, memPtr, sizeof(val));
+    return val;
+}
+
+static U64 XXH_read64(const void* memPtr)
+{
+    U64 val;
+    ZSTD_memcpy(&val, memPtr, sizeof(val));
+    return val;
+}
+
+#endif   /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
+
+
+/* ****************************************
+*  Compiler-specific Functions and Macros
+******************************************/
+#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+
+/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */
+#if defined(_MSC_VER)
+#  define XXH_rotl32(x,r) _rotl(x,r)
+#  define XXH_rotl64(x,r) _rotl64(x,r)
+#else
+#if defined(__ICCARM__)
+#  include <intrinsics.h>
+#  define XXH_rotl32(x,r) __ROR(x,(32 - r))
+#else
+#  define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
+#endif
+#  define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
+#endif
+
+#if defined(_MSC_VER)     /* Visual Studio */
+#  define XXH_swap32 _byteswap_ulong
+#  define XXH_swap64 _byteswap_uint64
+#elif GCC_VERSION >= 403
+#  define XXH_swap32 __builtin_bswap32
+#  define XXH_swap64 __builtin_bswap64
+#else
+static U32 XXH_swap32 (U32 x)
+{
+    return  ((x << 24) & 0xff000000 ) |
+            ((x <<  8) & 0x00ff0000 ) |
+            ((x >>  8) & 0x0000ff00 ) |
+            ((x >> 24) & 0x000000ff );
+}
+static U64 XXH_swap64 (U64 x)
+{
+    return  ((x << 56) & 0xff00000000000000ULL) |
+            ((x << 40) & 0x00ff000000000000ULL) |
+            ((x << 24) & 0x0000ff0000000000ULL) |
+            ((x << 8)  & 0x000000ff00000000ULL) |
+            ((x >> 8)  & 0x00000000ff000000ULL) |
+            ((x >> 24) & 0x0000000000ff0000ULL) |
+            ((x >> 40) & 0x000000000000ff00ULL) |
+            ((x >> 56) & 0x00000000000000ffULL);
+}
+#endif
+
+
+/* *************************************
+*  Architecture Macros
+***************************************/
+typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
+
+/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */
+#ifndef XXH_CPU_LITTLE_ENDIAN
+    static const int g_one = 1;
+#   define XXH_CPU_LITTLE_ENDIAN   (*(const char*)(&g_one))
+#endif
+
+
+/* ***************************
+*  Memory reads
+*****************************/
+typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
+
+FORCE_INLINE_TEMPLATE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
+{
+    if (align==XXH_unaligned)
+        return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
+    else
+        return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr);
+}
+
+FORCE_INLINE_TEMPLATE U32 XXH_readLE32(const void* ptr, XXH_endianess endian)
+{
+    return XXH_readLE32_align(ptr, endian, XXH_unaligned);
+}
+
+static U32 XXH_readBE32(const void* ptr)
+{
+    return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
+}
+
+FORCE_INLINE_TEMPLATE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
+{
+    if (align==XXH_unaligned)
+        return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
+    else
+        return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr);
+}
+
+FORCE_INLINE_TEMPLATE U64 XXH_readLE64(const void* ptr, XXH_endianess endian)
+{
+    return XXH_readLE64_align(ptr, endian, XXH_unaligned);
+}
+
+static U64 XXH_readBE64(const void* ptr)
+{
+    return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
+}
+
+
+/* *************************************
+*  Macros
+***************************************/
+#define XXH_STATIC_ASSERT(c)   { enum { XXH_static_assert = 1/(int)(!!(c)) }; }    /* use only *after* variable declarations */
+
+
+/* *************************************
+*  Constants
+***************************************/
+static const U32 PRIME32_1 = 2654435761U;
+static const U32 PRIME32_2 = 2246822519U;
+static const U32 PRIME32_3 = 3266489917U;
+static const U32 PRIME32_4 =  668265263U;
+static const U32 PRIME32_5 =  374761393U;
+
+static const U64 PRIME64_1 = 11400714785074694791ULL;
+static const U64 PRIME64_2 = 14029467366897019727ULL;
+static const U64 PRIME64_3 =  1609587929392839161ULL;
+static const U64 PRIME64_4 =  9650029242287828579ULL;
+static const U64 PRIME64_5 =  2870177450012600261ULL;
+
+XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
+
+
+/* **************************
+*  Utils
+****************************/
+XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dstState, const XXH32_state_t* restrict srcState)
+{
+    ZSTD_memcpy(dstState, srcState, sizeof(*dstState));
+}
+
+XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dstState, const XXH64_state_t* restrict srcState)
+{
+    ZSTD_memcpy(dstState, srcState, sizeof(*dstState));
+}
+
+
+/* ***************************
+*  Simple Hash Functions
+*****************************/
+
+static U32 XXH32_round(U32 seed, U32 input)
+{
+    seed += input * PRIME32_2;
+    seed  = XXH_rotl32(seed, 13);
+    seed *= PRIME32_1;
+    return seed;
+}
+
+FORCE_INLINE_TEMPLATE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align)
+{
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* bEnd = p + len;
+    U32 h32;
+#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align)
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (p==NULL) {
+        len=0;
+        bEnd=p=(const BYTE*)(size_t)16;
+    }
+#endif
+
+    if (len>=16) {
+        const BYTE* const limit = bEnd - 16;
+        U32 v1 = seed + PRIME32_1 + PRIME32_2;
+        U32 v2 = seed + PRIME32_2;
+        U32 v3 = seed + 0;
+        U32 v4 = seed - PRIME32_1;
+
+        do {
+            v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4;
+            v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4;
+            v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4;
+            v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4;
+        } while (p<=limit);
+
+        h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
+    } else {
+        h32  = seed + PRIME32_5;
+    }
+
+    h32 += (U32) len;
+
+    while (p+4<=bEnd) {
+        h32 += XXH_get32bits(p) * PRIME32_3;
+        h32  = XXH_rotl32(h32, 17) * PRIME32_4 ;
+        p+=4;
+    }
+
+    while (p<bEnd) {
+        h32 += (*p) * PRIME32_5;
+        h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
+        p++;
+    }
+
+    h32 ^= h32 >> 15;
+    h32 *= PRIME32_2;
+    h32 ^= h32 >> 13;
+    h32 *= PRIME32_3;
+    h32 ^= h32 >> 16;
+
+    return h32;
+}
+
+
+XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed)
+{
+#if 0
+    /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+    XXH32_CREATESTATE_STATIC(state);
+    XXH32_reset(state, seed);
+    XXH32_update(state, input, len);
+    return XXH32_digest(state);
+#else
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if (XXH_FORCE_ALIGN_CHECK) {
+        if ((((size_t)input) & 3) == 0) {   /* Input is 4-bytes aligned, leverage the speed benefit */
+            if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+                return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
+            else
+                return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
+    }   }
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
+    else
+        return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
+#endif
+}
+
+
+static U64 XXH64_round(U64 acc, U64 input)
+{
+    acc += input * PRIME64_2;
+    acc  = XXH_rotl64(acc, 31);
+    acc *= PRIME64_1;
+    return acc;
+}
+
+static U64 XXH64_mergeRound(U64 acc, U64 val)
+{
+    val  = XXH64_round(0, val);
+    acc ^= val;
+    acc  = acc * PRIME64_1 + PRIME64_4;
+    return acc;
+}
+
+FORCE_INLINE_TEMPLATE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align)
+{
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* const bEnd = p + len;
+    U64 h64;
+#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align)
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (p==NULL) {
+        len=0;
+        bEnd=p=(const BYTE*)(size_t)32;
+    }
+#endif
+
+    if (len>=32) {
+        const BYTE* const limit = bEnd - 32;
+        U64 v1 = seed + PRIME64_1 + PRIME64_2;
+        U64 v2 = seed + PRIME64_2;
+        U64 v3 = seed + 0;
+        U64 v4 = seed - PRIME64_1;
+
+        do {
+            v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8;
+            v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8;
+            v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8;
+            v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8;
+        } while (p<=limit);
+
+        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+        h64 = XXH64_mergeRound(h64, v1);
+        h64 = XXH64_mergeRound(h64, v2);
+        h64 = XXH64_mergeRound(h64, v3);
+        h64 = XXH64_mergeRound(h64, v4);
+
+    } else {
+        h64  = seed + PRIME64_5;
+    }
+
+    h64 += (U64) len;
+
+    while (p+8<=bEnd) {
+        U64 const k1 = XXH64_round(0, XXH_get64bits(p));
+        h64 ^= k1;
+        h64  = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
+        p+=8;
+    }
+
+    if (p+4<=bEnd) {
+        h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1;
+        h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
+        p+=4;
+    }
+
+    while (p<bEnd) {
+        h64 ^= (*p) * PRIME64_5;
+        h64 = XXH_rotl64(h64, 11) * PRIME64_1;
+        p++;
+    }
+
+    h64 ^= h64 >> 33;
+    h64 *= PRIME64_2;
+    h64 ^= h64 >> 29;
+    h64 *= PRIME64_3;
+    h64 ^= h64 >> 32;
+
+    return h64;
+}
+
+
+XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed)
+{
+#if 0
+    /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+    XXH64_CREATESTATE_STATIC(state);
+    XXH64_reset(state, seed);
+    XXH64_update(state, input, len);
+    return XXH64_digest(state);
+#else
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if (XXH_FORCE_ALIGN_CHECK) {
+        if ((((size_t)input) & 7)==0) {  /* Input is aligned, let's leverage the speed advantage */
+            if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+                return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
+            else
+                return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
+    }   }
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
+    else
+        return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
+#endif
+}
+
+
+/* **************************************************
+*  Advanced Hash Functions
+****************************************************/
+
+XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
+{
+    return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
+}
+XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
+{
+    XXH_free(statePtr);
+    return XXH_OK;
+}
+
+XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
+{
+    return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
+}
+XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
+{
+    XXH_free(statePtr);
+    return XXH_OK;
+}
+
+
+/*** Hash feed ***/
+
+XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed)
+{
+    XXH32_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
+    ZSTD_memset(&state, 0, sizeof(state)-4);   /* do not write into reserved, for future removal */
+    state.v1 = seed + PRIME32_1 + PRIME32_2;
+    state.v2 = seed + PRIME32_2;
+    state.v3 = seed + 0;
+    state.v4 = seed - PRIME32_1;
+    ZSTD_memcpy(statePtr, &state, sizeof(state));
+    return XXH_OK;
+}
+
+
+XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed)
+{
+    XXH64_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
+    ZSTD_memset(&state, 0, sizeof(state)-8);   /* do not write into reserved, for future removal */
+    state.v1 = seed + PRIME64_1 + PRIME64_2;
+    state.v2 = seed + PRIME64_2;
+    state.v3 = seed + 0;
+    state.v4 = seed - PRIME64_1;
+    ZSTD_memcpy(statePtr, &state, sizeof(state));
+    return XXH_OK;
+}
+
+
+FORCE_INLINE_TEMPLATE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian)
+{
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* const bEnd = p + len;
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (input==NULL) return XXH_ERROR;
+#endif
+
+    state->total_len_32 += (unsigned)len;
+    state->large_len |= (len>=16) | (state->total_len_32>=16);
+
+    if (state->memsize + len < 16)  {   /* fill in tmp buffer */
+        XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len);
+        state->memsize += (unsigned)len;
+        return XXH_OK;
+    }
+
+    if (state->memsize) {   /* some data left from previous update */
+        XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize);
+        {   const U32* p32 = state->mem32;
+            state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++;
+            state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++;
+            state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++;
+            state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian)); p32++;
+        }
+        p += 16-state->memsize;
+        state->memsize = 0;
+    }
+
+    if (p <= bEnd-16) {
+        const BYTE* const limit = bEnd - 16;
+        U32 v1 = state->v1;
+        U32 v2 = state->v2;
+        U32 v3 = state->v3;
+        U32 v4 = state->v4;
+
+        do {
+            v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4;
+            v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4;
+            v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4;
+            v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4;
+        } while (p<=limit);
+
+        state->v1 = v1;
+        state->v2 = v2;
+        state->v3 = v3;
+        state->v4 = v4;
+    }
+
+    if (p < bEnd) {
+        XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
+        state->memsize = (unsigned)(bEnd-p);
+    }
+
+    return XXH_OK;
+}
+
+XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
+    else
+        return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
+}
 
-typedef enum {
 
-    ZSTD_d_windowLogMax=100, /* Select a size limit (in power of 2) beyond which
-                              * the streaming API will refuse to allocate memory buffer
-                              * in order to protect the host from unreasonable memory requirements.
-                              * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode.
-                              * By default, a decompression context accepts window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT).
-                              * Special: value 0 means "use default maximum windowLog". */
 
-    /* note : additional experimental parameters are also available
-     * within the experimental section of the API.
-     * At the time of this writing, they include :
-     * ZSTD_c_format
-     * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
-     * note : never ever use experimentalParam? names directly
-     */
-     ZSTD_d_experimentalParam1=1000
+FORCE_INLINE_TEMPLATE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian)
+{
+    const BYTE * p = (const BYTE*)state->mem32;
+    const BYTE* const bEnd = (const BYTE*)(state->mem32) + state->memsize;
+    U32 h32;
 
-} ZSTD_dParameter;
+    if (state->large_len) {
+        h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
+    } else {
+        h32 = state->v3 /* == seed */ + PRIME32_5;
+    }
 
-/*! ZSTD_dParam_getBounds() :
- *  All parameters must belong to an interval with lower and upper bounds,
- *  otherwise they will either trigger an error or be automatically clamped.
- * @return : a structure, ZSTD_bounds, which contains
- *         - an error status field, which must be tested using ZSTD_isError()
- *         - both lower and upper bounds, inclusive
- */
-ZSTDLIB_API ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam);
+    h32 += state->total_len_32;
 
-/*! ZSTD_DCtx_setParameter() :
- *  Set one compression parameter, selected by enum ZSTD_dParameter.
- *  All parameters have valid bounds. Bounds can be queried using ZSTD_dParam_getBounds().
- *  Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter).
- *  Setting a parameter is only possible during frame initialization (before starting decompression).
- * @return : 0, or an error code (which can be tested using ZSTD_isError()).
- */
-ZSTDLIB_API size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int value);
+    while (p+4<=bEnd) {
+        h32 += XXH_readLE32(p, endian) * PRIME32_3;
+        h32  = XXH_rotl32(h32, 17) * PRIME32_4;
+        p+=4;
+    }
 
-/*! ZSTD_DCtx_reset() :
- *  Return a DCtx to clean state.
- *  Session and parameters can be reset jointly or separately.
- *  Parameters can only be reset when no active frame is being decompressed.
- * @return : 0, or an error code, which can be tested with ZSTD_isError()
- */
-ZSTDLIB_API size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset);
+    while (p<bEnd) {
+        h32 += (*p) * PRIME32_5;
+        h32  = XXH_rotl32(h32, 11) * PRIME32_1;
+        p++;
+    }
 
+    h32 ^= h32 >> 15;
+    h32 *= PRIME32_2;
+    h32 ^= h32 >> 13;
+    h32 *= PRIME32_3;
+    h32 ^= h32 >> 16;
 
-/****************************
-*  Streaming
-****************************/
+    return h32;
+}
 
-typedef struct ZSTD_inBuffer_s {
-  const void* src;    /**< start of input buffer */
-  size_t size;        /**< size of input buffer */
-  size_t pos;         /**< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */
-} ZSTD_inBuffer;
 
-typedef struct ZSTD_outBuffer_s {
-  void*  dst;         /**< start of output buffer */
-  size_t size;        /**< size of output buffer */
-  size_t pos;         /**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */
-} ZSTD_outBuffer;
+XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
 
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH32_digest_endian(state_in, XXH_littleEndian);
+    else
+        return XXH32_digest_endian(state_in, XXH_bigEndian);
+}
 
 
-/*-***********************************************************************
-*  Streaming compression - HowTo
-*
-*  A ZSTD_CStream object is required to track streaming operation.
-*  Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources.
-*  ZSTD_CStream objects can be reused multiple times on consecutive compression operations.
-*  It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory.
-*
-*  For parallel execution, use one separate ZSTD_CStream per thread.
-*
-*  note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing.
-*
-*  Parameters are sticky : when starting a new compression on the same context,
-*  it will re-use the same sticky parameters as previous compression session.
-*  When in doubt, it's recommended to fully initialize the context before usage.
-*  Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(),
-*  ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to
-*  set more specific parameters, the pledged source size, or load a dictionary.
-*
-*  Use ZSTD_compressStream2() with ZSTD_e_continue as many times as necessary to
-*  consume input stream. The function will automatically update both `pos`
-*  fields within `input` and `output`.
-*  Note that the function may not consume the entire input, for example, because
-*  the output buffer is already full, in which case `input.pos < input.size`.
-*  The caller must check if input has been entirely consumed.
-*  If not, the caller must make some room to receive more compressed data,
-*  and then present again remaining input data.
-*  note: ZSTD_e_continue is guaranteed to make some forward progress when called,
-*        but doesn't guarantee maximal forward progress. This is especially relevant
-*        when compressing with multiple threads. The call won't block if it can
-*        consume some input, but if it can't it will wait for some, but not all,
-*        output to be flushed.
-* @return : provides a minimum amount of data remaining to be flushed from internal buffers
-*           or an error code, which can be tested using ZSTD_isError().
-*
-*  At any moment, it's possible to flush whatever data might remain stuck within internal buffer,
-*  using ZSTD_compressStream2() with ZSTD_e_flush. `output->pos` will be updated.
-*  Note that, if `output->size` is too small, a single invocation with ZSTD_e_flush might not be enough (return code > 0).
-*  In which case, make some room to receive more compressed data, and call again ZSTD_compressStream2() with ZSTD_e_flush.
-*  You must continue calling ZSTD_compressStream2() with ZSTD_e_flush until it returns 0, at which point you can change the
-*  operation.
-*  note: ZSTD_e_flush will flush as much output as possible, meaning when compressing with multiple threads, it will
-*        block until the flush is complete or the output buffer is full.
-*  @return : 0 if internal buffers are entirely flushed,
-*            >0 if some data still present within internal buffer (the value is minimal estimation of remaining size),
-*            or an error code, which can be tested using ZSTD_isError().
-*
-*  Calling ZSTD_compressStream2() with ZSTD_e_end instructs to finish a frame.
-*  It will perform a flush and write frame epilogue.
-*  The epilogue is required for decoders to consider a frame completed.
-*  flush operation is the same, and follows same rules as calling ZSTD_compressStream2() with ZSTD_e_flush.
-*  You must continue calling ZSTD_compressStream2() with ZSTD_e_end until it returns 0, at which point you are free to
-*  start a new frame.
-*  note: ZSTD_e_end will flush as much output as possible, meaning when compressing with multiple threads, it will
-*        block until the flush is complete or the output buffer is full.
-*  @return : 0 if frame fully completed and fully flushed,
-*            >0 if some data still present within internal buffer (the value is minimal estimation of remaining size),
-*            or an error code, which can be tested using ZSTD_isError().
-*
-* *******************************************************************/
 
-typedef ZSTD_CCtx ZSTD_CStream;  /**< CCtx and CStream are now effectively same object (>= v1.3.0) */
-                                 /* Continue to distinguish them for compatibility with older versions <= v1.2.0 */
-/*===== ZSTD_CStream management functions =====*/
-ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void);
-ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs);
+/* **** XXH64 **** */
 
-/*===== Streaming compression functions =====*/
-typedef enum {
-    ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal compression ratio */
-    ZSTD_e_flush=1,    /* flush any data provided so far,
-                        * it creates (at least) one new block, that can be decoded immediately on reception;
-                        * frame will continue: any future data can still reference previously compressed data, improving compression.
-                        * note : multithreaded compression will block to flush as much output as possible. */
-    ZSTD_e_end=2       /* flush any remaining data _and_ close current frame.
-                        * note that frame is only closed after compressed data is fully flushed (return value == 0).
-                        * After that point, any additional data starts a new frame.
-                        * note : each frame is independent (does not reference any content from previous frame).
-                        : note : multithreaded compression will block to flush as much output as possible. */
-} ZSTD_EndDirective;
+FORCE_INLINE_TEMPLATE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian)
+{
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* const bEnd = p + len;
 
-/*! ZSTD_compressStream2() :
- *  Behaves about the same as ZSTD_compressStream, with additional control on end directive.
- *  - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*()
- *  - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode)
- *  - output->pos must be <= dstCapacity, input->pos must be <= srcSize
- *  - output->pos and input->pos will be updated. They are guaranteed to remain below their respective limit.
- *  - When nbWorkers==0 (default), function is blocking : it completes its job before returning to caller.
- *  - When nbWorkers>=1, function is non-blocking : it just acquires a copy of input, and distributes jobs to internal worker threads, flush whatever is available,
- *                                                  and then immediately returns, just indicating that there is some data remaining to be flushed.
- *                                                  The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte.
- *  - Exception : if the first call requests a ZSTD_e_end directive and provides enough dstCapacity, the function delegates to ZSTD_compress2() which is always blocking.
- *  - @return provides a minimum amount of data remaining to be flushed from internal buffers
- *            or an error code, which can be tested using ZSTD_isError().
- *            if @return != 0, flush is not fully completed, there is still some data left within internal buffers.
- *            This is useful for ZSTD_e_flush, since in this case more flushes are necessary to empty all buffers.
- *            For ZSTD_e_end, @return == 0 when internal buffers are fully flushed and frame is completed.
- *  - after a ZSTD_e_end directive, if internal buffer is not fully flushed (@return != 0),
- *            only ZSTD_e_end or ZSTD_e_flush operations are allowed.
- *            Before starting a new compression job, or changing compression parameters,
- *            it is required to fully flush internal buffers.
- */
-ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
-                                         ZSTD_outBuffer* output,
-                                         ZSTD_inBuffer* input,
-                                         ZSTD_EndDirective endOp);
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (input==NULL) return XXH_ERROR;
+#endif
 
+    state->total_len += len;
 
-/* These buffer sizes are softly recommended.
- * They are not required : ZSTD_compressStream*() happily accepts any buffer size, for both input and output.
- * Respecting the recommended size just makes it a bit easier for ZSTD_compressStream*(),
- * reducing the amount of memory shuffling and buffering, resulting in minor performance savings.
- *
- * However, note that these recommendations are from the perspective of a C caller program.
- * If the streaming interface is invoked from some other language,
- * especially managed ones such as Java or Go, through a foreign function interface such as jni or cgo,
- * a major performance rule is to reduce crossing such interface to an absolute minimum.
- * It's not rare that performance ends being spent more into the interface, rather than compression itself.
- * In which cases, prefer using large buffers, as large as practical,
- * for both input and output, to reduce the nb of roundtrips.
- */
-ZSTDLIB_API size_t ZSTD_CStreamInSize(void);    /**< recommended size for input buffer */
-ZSTDLIB_API size_t ZSTD_CStreamOutSize(void);   /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */
+    if (state->memsize + len < 32) {  /* fill in tmp buffer */
+        if (input != NULL) {
+            XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
+        }
+        state->memsize += (U32)len;
+        return XXH_OK;
+    }
 
+    if (state->memsize) {   /* tmp buffer is full */
+        XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize);
+        state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian));
+        state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian));
+        state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian));
+        state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian));
+        p += 32-state->memsize;
+        state->memsize = 0;
+    }
 
-/* *****************************************************************************
- * This following is a legacy streaming API.
- * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2().
- * It is redundant, but remains fully supported.
- * Advanced parameters and dictionary compression can only be used through the
- * new API.
- ******************************************************************************/
+    if (p+32 <= bEnd) {
+        const BYTE* const limit = bEnd - 32;
+        U64 v1 = state->v1;
+        U64 v2 = state->v2;
+        U64 v3 = state->v3;
+        U64 v4 = state->v4;
 
-/*!
- * Equivalent to:
- *
- *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
- *     ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
- *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
- */
-ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel);
-/*!
- * Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue).
- * NOTE: The return value is different. ZSTD_compressStream() returns a hint for
- * the next read size (if non-zero and not an error). ZSTD_compressStream2()
- * returns the minimum nb of bytes left to flush (if non-zero and not an error).
- */
-ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
-/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */
-ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
-/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */
-ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
+        do {
+            v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8;
+            v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8;
+            v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8;
+            v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8;
+        } while (p<=limit);
 
+        state->v1 = v1;
+        state->v2 = v2;
+        state->v3 = v3;
+        state->v4 = v4;
+    }
 
-/*-***************************************************************************
-*  Streaming decompression - HowTo
-*
-*  A ZSTD_DStream object is required to track streaming operations.
-*  Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources.
-*  ZSTD_DStream objects can be re-used multiple times.
-*
-*  Use ZSTD_initDStream() to start a new decompression operation.
-* @return : recommended first input size
-*  Alternatively, use advanced API to set specific properties.
-*
-*  Use ZSTD_decompressStream() repetitively to consume your input.
-*  The function will update both `pos` fields.
-*  If `input.pos < input.size`, some input has not been consumed.
-*  It's up to the caller to present again remaining data.
-*  The function tries to flush all data decoded immediately, respecting output buffer size.
-*  If `output.pos < output.size`, decoder has flushed everything it could.
-*  But if `output.pos == output.size`, there might be some data left within internal buffers.,
-*  In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer.
-*  Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX.
-* @return : 0 when a frame is completely decoded and fully flushed,
-*        or an error code, which can be tested using ZSTD_isError(),
-*        or any other value > 0, which means there is still some decoding or flushing to do to complete current frame :
-*                                the return value is a suggested next input size (just a hint for better latency)
-*                                that will never request more than the remaining frame size.
-* *******************************************************************************/
+    if (p < bEnd) {
+        XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
+        state->memsize = (unsigned)(bEnd-p);
+    }
 
-typedef ZSTD_DCtx ZSTD_DStream;  /**< DCtx and DStream are now effectively same object (>= v1.3.0) */
-                                 /* For compatibility with versions <= v1.2.0, prefer differentiating them. */
-/*===== ZSTD_DStream management functions =====*/
-ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void);
-ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds);
+    return XXH_OK;
+}
 
-/*===== Streaming decompression functions =====*/
+XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
 
-/* This function is redundant with the advanced API and equivalent to:
- *
- *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
- *     ZSTD_DCtx_refDDict(zds, NULL);
- */
-ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds);
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH64_update_endian(state_in, input, len, XXH_littleEndian);
+    else
+        return XXH64_update_endian(state_in, input, len, XXH_bigEndian);
+}
 
-ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
 
-ZSTDLIB_API size_t ZSTD_DStreamInSize(void);    /*!< recommended size for input buffer */
-ZSTDLIB_API size_t ZSTD_DStreamOutSize(void);   /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */
 
+FORCE_INLINE_TEMPLATE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian)
+{
+    const BYTE * p = (const BYTE*)state->mem64;
+    const BYTE* const bEnd = (const BYTE*)state->mem64 + state->memsize;
+    U64 h64;
 
-/**************************
-*  Simple dictionary API
-***************************/
-/*! ZSTD_compress_usingDict() :
- *  Compression at an explicit compression level using a Dictionary.
- *  A dictionary can be any arbitrary data segment (also called a prefix),
- *  or a buffer with specified information (see dictBuilder/zdict.h).
- *  Note : This function loads the dictionary, resulting in significant startup delay.
- *         It's intended for a dictionary used only once.
- *  Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. */
-ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx,
-                                           void* dst, size_t dstCapacity,
-                                     const void* src, size_t srcSize,
-                                     const void* dict,size_t dictSize,
-                                           int compressionLevel);
+    if (state->total_len >= 32) {
+        U64 const v1 = state->v1;
+        U64 const v2 = state->v2;
+        U64 const v3 = state->v3;
+        U64 const v4 = state->v4;
 
-/*! ZSTD_decompress_usingDict() :
- *  Decompression using a known Dictionary.
- *  Dictionary must be identical to the one used during compression.
- *  Note : This function loads the dictionary, resulting in significant startup delay.
- *         It's intended for a dictionary used only once.
- *  Note : When `dict == NULL || dictSize < 8` no dictionary is used. */
-ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
-                                             void* dst, size_t dstCapacity,
-                                       const void* src, size_t srcSize,
-                                       const void* dict,size_t dictSize);
+        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+        h64 = XXH64_mergeRound(h64, v1);
+        h64 = XXH64_mergeRound(h64, v2);
+        h64 = XXH64_mergeRound(h64, v3);
+        h64 = XXH64_mergeRound(h64, v4);
+    } else {
+        h64  = state->v3 + PRIME64_5;
+    }
 
+    h64 += (U64) state->total_len;
 
-/***********************************
- *  Bulk processing dictionary API
- **********************************/
-typedef struct ZSTD_CDict_s ZSTD_CDict;
+    while (p+8<=bEnd) {
+        U64 const k1 = XXH64_round(0, XXH_readLE64(p, endian));
+        h64 ^= k1;
+        h64  = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
+        p+=8;
+    }
 
-/*! ZSTD_createCDict() :
- *  When compressing multiple messages or blocks using the same dictionary,
- *  it's recommended to digest the dictionary only once, since it's a costly operation.
- *  ZSTD_createCDict() will create a state from digesting a dictionary.
- *  The resulting state can be used for future compression operations with very limited startup cost.
- *  ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
- * @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict.
- *  Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content.
- *  Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer,
- *      in which case the only thing that it transports is the @compressionLevel.
- *      This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively,
- *      expecting a ZSTD_CDict parameter with any data, including those without a known dictionary. */
-ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize,
-                                         int compressionLevel);
+    if (p+4<=bEnd) {
+        h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1;
+        h64  = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
+        p+=4;
+    }
 
-/*! ZSTD_freeCDict() :
- *  Function frees memory allocated by ZSTD_createCDict(). */
-ZSTDLIB_API size_t      ZSTD_freeCDict(ZSTD_CDict* CDict);
+    while (p<bEnd) {
+        h64 ^= (*p) * PRIME64_5;
+        h64  = XXH_rotl64(h64, 11) * PRIME64_1;
+        p++;
+    }
 
-/*! ZSTD_compress_usingCDict() :
- *  Compression using a digested Dictionary.
- *  Recommended when same dictionary is used multiple times.
- *  Note : compression level is _decided at dictionary creation time_,
- *     and frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) */
-ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
-                                            void* dst, size_t dstCapacity,
-                                      const void* src, size_t srcSize,
-                                      const ZSTD_CDict* cdict);
+    h64 ^= h64 >> 33;
+    h64 *= PRIME64_2;
+    h64 ^= h64 >> 29;
+    h64 *= PRIME64_3;
+    h64 ^= h64 >> 32;
 
+    return h64;
+}
 
-typedef struct ZSTD_DDict_s ZSTD_DDict;
 
-/*! ZSTD_createDDict() :
- *  Create a digested dictionary, ready to start decompression operation without startup delay.
- *  dictBuffer can be released after DDict creation, as its content is copied inside DDict. */
-ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize);
+XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
 
-/*! ZSTD_freeDDict() :
- *  Function frees memory allocated with ZSTD_createDDict() */
-ZSTDLIB_API size_t      ZSTD_freeDDict(ZSTD_DDict* ddict);
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH64_digest_endian(state_in, XXH_littleEndian);
+    else
+        return XXH64_digest_endian(state_in, XXH_bigEndian);
+}
 
-/*! ZSTD_decompress_usingDDict() :
- *  Decompression using a digested Dictionary.
- *  Recommended when same dictionary is used multiple times. */
-ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
-                                              void* dst, size_t dstCapacity,
-                                        const void* src, size_t srcSize,
-                                        const ZSTD_DDict* ddict);
 
+/* **************************
+*  Canonical representation
+****************************/
 
-/********************************
- *  Dictionary helper functions
- *******************************/
+/*! Default XXH result types are basic unsigned 32 and 64 bits.
+*   The canonical representation follows human-readable write convention, aka big-endian (large digits first).
+*   These functions allow transformation of hash result into and from its canonical format.
+*   This way, hash values can be written into a file or buffer, and remain comparable across different systems and programs.
+*/
 
-/*! ZSTD_getDictID_fromDict() :
- *  Provides the dictID stored within dictionary.
- *  if @return == 0, the dictionary is not conformant with Zstandard specification.
- *  It can still be loaded, but as a content-only dictionary. */
-ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize);
+XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
+{
+    XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
+    if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
+    ZSTD_memcpy(dst, &hash, sizeof(*dst));
+}
 
-/*! ZSTD_getDictID_fromDDict() :
- *  Provides the dictID of the dictionary loaded into `ddict`.
- *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
- *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
-ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
+{
+    XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
+    if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
+    ZSTD_memcpy(dst, &hash, sizeof(*dst));
+}
 
-/*! ZSTD_getDictID_fromFrame() :
- *  Provides the dictID required to decompressed the frame stored within `src`.
- *  If @return == 0, the dictID could not be decoded.
- *  This could for one of the following reasons :
- *  - The frame does not require a dictionary to be decoded (most common case).
- *  - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information.
- *    Note : this use case also happens when using a non-conformant dictionary.
- *  - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`).
- *  - This is not a Zstandard frame.
- *  When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. */
-ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
+XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
+{
+    return XXH_readBE32(src);
+}
+
+XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
+{
+    return XXH_readBE64(src);
+}
+/**** ended inlining xxhash.c ****/
+#  endif
 
+#endif /* XXH_STATIC_LINKING_ONLY && XXH_STATIC_H_3543687687345 */
 
-/*******************************************************************************
- * Advanced dictionary and prefix API
+
+#if defined (__cplusplus)
+}
+#endif
+/**** ended inlining xxhash.h ****/
+#ifndef ZSTD_NO_TRACE
+/**** start inlining zstd_trace.h ****/
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
  *
- * This API allows dictionaries to be used with ZSTD_compress2(),
- * ZSTD_compressStream2(), and ZSTD_decompress(). Dictionaries are sticky, and
- * only reset with the context is reset with ZSTD_reset_parameters or
- * ZSTD_reset_session_and_parameters. Prefixes are single-use.
- ******************************************************************************/
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_TRACE_H
+#define ZSTD_TRACE_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
 
+#include <stddef.h>
 
-/*! ZSTD_CCtx_loadDictionary() :
- *  Create an internal CDict from `dict` buffer.
- *  Decompression will have to use same dictionary.
- * @result : 0, or an error code (which can be tested with ZSTD_isError()).
- *  Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary,
- *           meaning "return to no-dictionary mode".
- *  Note 1 : Dictionary is sticky, it will be used for all future compressed frames.
- *           To return to "no-dictionary" situation, load a NULL dictionary (or reset parameters).
- *  Note 2 : Loading a dictionary involves building tables.
- *           It's also a CPU consuming operation, with non-negligible impact on latency.
- *           Tables are dependent on compression parameters, and for this reason,
- *           compression parameters can no longer be changed after loading a dictionary.
- *  Note 3 :`dict` content will be copied internally.
- *           Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead.
- *           In such a case, dictionary buffer must outlive its users.
- *  Note 4 : Use ZSTD_CCtx_loadDictionary_advanced()
- *           to precisely select how dictionary content must be interpreted. */
-ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
+/* weak symbol support */
+#if !defined(ZSTD_HAVE_WEAK_SYMBOLS) && defined(__GNUC__) && \
+    !defined(__APPLE__) && !defined(_WIN32) && !defined(__MINGW32__) && \
+    !defined(__CYGWIN__)
+#  define ZSTD_HAVE_WEAK_SYMBOLS 1
+#else
+#  define ZSTD_HAVE_WEAK_SYMBOLS 0
+#endif
+#if ZSTD_HAVE_WEAK_SYMBOLS
+#  define ZSTD_WEAK_ATTR __attribute__((__weak__))
+#else
+#  define ZSTD_WEAK_ATTR
+#endif
 
-/*! ZSTD_CCtx_refCDict() :
- *  Reference a prepared dictionary, to be used for all next compressed frames.
- *  Note that compression parameters are enforced from within CDict,
- *  and supersede any compression parameter previously set within CCtx.
- *  The parameters ignored are labled as "superseded-by-cdict" in the ZSTD_cParameter enum docs.
- *  The ignored parameters will be used again if the CCtx is returned to no-dictionary mode.
- *  The dictionary will remain valid for future compressed frames using same CCtx.
- * @result : 0, or an error code (which can be tested with ZSTD_isError()).
- *  Special : Referencing a NULL CDict means "return to no-dictionary mode".
- *  Note 1 : Currently, only one dictionary can be managed.
- *           Referencing a new dictionary effectively "discards" any previous one.
- *  Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. */
-ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
+/* Only enable tracing when weak symbols are available. */
+#ifndef ZSTD_TRACE
+#  define ZSTD_TRACE ZSTD_HAVE_WEAK_SYMBOLS
+#endif
 
-/*! ZSTD_CCtx_refPrefix() :
- *  Reference a prefix (single-usage dictionary) for next compressed frame.
- *  A prefix is **only used once**. Tables are discarded at end of frame (ZSTD_e_end).
- *  Decompression will need same prefix to properly regenerate data.
- *  Compressing with a prefix is similar in outcome as performing a diff and compressing it,
- *  but performs much faster, especially during decompression (compression speed is tunable with compression level).
- * @result : 0, or an error code (which can be tested with ZSTD_isError()).
- *  Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary
- *  Note 1 : Prefix buffer is referenced. It **must** outlive compression.
- *           Its content must remain unmodified during compression.
- *  Note 2 : If the intention is to diff some large src data blob with some prior version of itself,
- *           ensure that the window size is large enough to contain the entire source.
- *           See ZSTD_c_windowLog.
- *  Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters.
- *           It's a CPU consuming operation, with non-negligible impact on latency.
- *           If there is a need to use the same prefix multiple times, consider loadDictionary instead.
- *  Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dct_rawContent).
- *           Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. */
-ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx,
-                                 const void* prefix, size_t prefixSize);
+#if ZSTD_TRACE
 
-/*! ZSTD_DCtx_loadDictionary() :
- *  Create an internal DDict from dict buffer,
- *  to be used to decompress next frames.
- *  The dictionary remains valid for all future frames, until explicitly invalidated.
- * @result : 0, or an error code (which can be tested with ZSTD_isError()).
- *  Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary,
- *            meaning "return to no-dictionary mode".
- *  Note 1 : Loading a dictionary involves building tables,
- *           which has a non-negligible impact on CPU usage and latency.
- *           It's recommended to "load once, use many times", to amortize the cost
- *  Note 2 :`dict` content will be copied internally, so `dict` can be released after loading.
- *           Use ZSTD_DCtx_loadDictionary_byReference() to reference dictionary content instead.
- *  Note 3 : Use ZSTD_DCtx_loadDictionary_advanced() to take control of
- *           how dictionary content is loaded and interpreted.
- */
-ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
+struct ZSTD_CCtx_s;
+struct ZSTD_DCtx_s;
+struct ZSTD_CCtx_params_s;
 
-/*! ZSTD_DCtx_refDDict() :
- *  Reference a prepared dictionary, to be used to decompress next frames.
- *  The dictionary remains active for decompression of future frames using same DCtx.
- * @result : 0, or an error code (which can be tested with ZSTD_isError()).
- *  Note 1 : Currently, only one dictionary can be managed.
- *           Referencing a new dictionary effectively "discards" any previous one.
- *  Special: referencing a NULL DDict means "return to no-dictionary mode".
- *  Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx.
+typedef struct {
+    /**
+     * ZSTD_VERSION_NUMBER
+     *
+     * This is guaranteed to be the first member of ZSTD_trace.
+     * Otherwise, this struct is not stable between versions. If
+     * the version number does not match your expectation, you
+     * should not interpret the rest of the struct.
+     */
+    unsigned version;
+    /**
+     * Non-zero if streaming (de)compression is used.
+     */
+    unsigned streaming;
+    /**
+     * The dictionary ID.
+     */
+    unsigned dictionaryID;
+    /**
+     * Is the dictionary cold?
+     * Only set on decompression.
+     */
+    unsigned dictionaryIsCold;
+    /**
+     * The dictionary size or zero if no dictionary.
+     */
+    size_t dictionarySize;
+    /**
+     * The uncompressed size of the data.
+     */
+    size_t uncompressedSize;
+    /**
+     * The compressed size of the data.
+     */
+    size_t compressedSize;
+    /**
+     * The fully resolved CCtx parameters (NULL on decompression).
+     */
+    struct ZSTD_CCtx_params_s const* params;
+    /**
+     * The ZSTD_CCtx pointer (NULL on decompression).
+     */
+    struct ZSTD_CCtx_s const* cctx;
+    /**
+     * The ZSTD_DCtx pointer (NULL on compression).
+     */
+    struct ZSTD_DCtx_s const* dctx;
+} ZSTD_Trace;
+
+/**
+ * A tracing context. It must be 0 when tracing is disabled.
+ * Otherwise, any non-zero value returned by a tracing begin()
+ * function is presented to any subsequent calls to end().
+ *
+ * Any non-zero value is treated as tracing is enabled and not
+ * interpreted by the library.
+ *
+ * Two possible uses are:
+ * * A timestamp for when the begin() function was called.
+ * * A unique key identifying the (de)compression, like the
+ *   address of the [dc]ctx pointer if you need to track
+ *   more information than just a timestamp.
  */
-ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
+typedef unsigned long long ZSTD_TraceCtx;
 
-/*! ZSTD_DCtx_refPrefix() :
- *  Reference a prefix (single-usage dictionary) to decompress next frame.
- *  This is the reverse operation of ZSTD_CCtx_refPrefix(),
- *  and must use the same prefix as the one used during compression.
- *  Prefix is **only used once**. Reference is discarded at end of frame.
- *  End of frame is reached when ZSTD_decompressStream() returns 0.
- * @result : 0, or an error code (which can be tested with ZSTD_isError()).
- *  Note 1 : Adding any prefix (including NULL) invalidates any previously set prefix or dictionary
- *  Note 2 : Prefix buffer is referenced. It **must** outlive decompression.
- *           Prefix buffer must remain unmodified up to the end of frame,
- *           reached when ZSTD_decompressStream() returns 0.
- *  Note 3 : By default, the prefix is treated as raw content (ZSTD_dct_rawContent).
- *           Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode (Experimental section)
- *  Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost.
- *           A full dictionary is more costly, as it requires building tables.
+/**
+ * Trace the beginning of a compression call.
+ * @param cctx The dctx pointer for the compression.
+ *             It can be used as a key to map begin() to end().
+ * @returns Non-zero if tracing is enabled. The return value is
+ *          passed to ZSTD_trace_compress_end().
  */
-ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx,
-                                 const void* prefix, size_t prefixSize);
+ZSTD_WEAK_ATTR ZSTD_TraceCtx ZSTD_trace_compress_begin(
+    struct ZSTD_CCtx_s const* cctx);
 
-/* ===   Memory management   === */
+/**
+ * Trace the end of a compression call.
+ * @param ctx The return value of ZSTD_trace_compress_begin().
+ * @param trace The zstd tracing info.
+ */
+ZSTD_WEAK_ATTR void ZSTD_trace_compress_end(
+    ZSTD_TraceCtx ctx,
+    ZSTD_Trace const* trace);
 
-/*! ZSTD_sizeof_*() :
- *  These functions give the _current_ memory usage of selected object.
- *  Note that object memory usage can evolve (increase or decrease) over time. */
-ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
-ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx);
-ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs);
-ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
-ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict);
-ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
+/**
+ * Trace the beginning of a decompression call.
+ * @param dctx The dctx pointer for the decompression.
+ *             It can be used as a key to map begin() to end().
+ * @returns Non-zero if tracing is enabled. The return value is
+ *          passed to ZSTD_trace_compress_end().
+ */
+ZSTD_WEAK_ATTR ZSTD_TraceCtx ZSTD_trace_decompress_begin(
+    struct ZSTD_DCtx_s const* dctx);
 
-#endif  /* ZSTD_H_235446 */
+/**
+ * Trace the end of a decompression call.
+ * @param ctx The return value of ZSTD_trace_decompress_begin().
+ * @param trace The zstd tracing info.
+ */
+ZSTD_WEAK_ATTR void ZSTD_trace_decompress_end(
+    ZSTD_TraceCtx ctx,
+    ZSTD_Trace const* trace);
 
+#endif /* ZSTD_TRACE */
 
-/* **************************************************************************************
- *   ADVANCED AND EXPERIMENTAL FUNCTIONS
- ****************************************************************************************
- * The definitions in the following section are considered experimental.
- * They are provided for advanced scenarios.
- * They should never be used with a dynamic library, as prototypes may change in the future.
- * Use them only in association with static linking.
- * ***************************************************************************************/
+#if defined (__cplusplus)
+}
+#endif
 
-#if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY)
-#define ZSTD_H_ZSTD_STATIC_LINKING_ONLY
+#endif /* ZSTD_TRACE_H */
+/**** ended inlining zstd_trace.h ****/
+#else
+#  define ZSTD_TRACE 0
+#endif
 
-/****************************************************************************************
- *   experimental API (static linking only)
- ****************************************************************************************
- * The following symbols and constants
- * are not planned to join "stable API" status in the near future.
- * They can still change in future versions.
- * Some of them are planned to remain in the static_only section indefinitely.
- * Some of them might be removed in the future (especially when redundant with existing stable functions)
- * ***************************************************************************************/
+#if defined (__cplusplus)
+extern "C" {
+#endif
 
-#define ZSTD_FRAMEHEADERSIZE_PREFIX(format) ((format) == ZSTD_f_zstd1 ? 5 : 1)   /* minimum input size required to query frame header size */
-#define ZSTD_FRAMEHEADERSIZE_MIN(format)    ((format) == ZSTD_f_zstd1 ? 6 : 2)
-#define ZSTD_FRAMEHEADERSIZE_MAX   18   /* can be useful for static allocation */
-#define ZSTD_SKIPPABLEHEADERSIZE    8
+/* ---- static assert (debug) --- */
+#define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)
+#define ZSTD_isError ERR_isError   /* for inlining */
+#define FSE_isError  ERR_isError
+#define HUF_isError  ERR_isError
 
-/* compression parameter bounds */
-#define ZSTD_WINDOWLOG_MAX_32    30
-#define ZSTD_WINDOWLOG_MAX_64    31
-#define ZSTD_WINDOWLOG_MAX     ((int)(sizeof(size_t) == 4 ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64))
-#define ZSTD_WINDOWLOG_MIN       10
-#define ZSTD_HASHLOG_MAX       ((ZSTD_WINDOWLOG_MAX < 30) ? ZSTD_WINDOWLOG_MAX : 30)
-#define ZSTD_HASHLOG_MIN          6
-#define ZSTD_CHAINLOG_MAX_32     29
-#define ZSTD_CHAINLOG_MAX_64     30
-#define ZSTD_CHAINLOG_MAX      ((int)(sizeof(size_t) == 4 ? ZSTD_CHAINLOG_MAX_32 : ZSTD_CHAINLOG_MAX_64))
-#define ZSTD_CHAINLOG_MIN        ZSTD_HASHLOG_MIN
-#define ZSTD_SEARCHLOG_MAX      (ZSTD_WINDOWLOG_MAX-1)
-#define ZSTD_SEARCHLOG_MIN        1
-#define ZSTD_MINMATCH_MAX         7   /* only for ZSTD_fast, other strategies are limited to 6 */
-#define ZSTD_MINMATCH_MIN         3   /* only for ZSTD_btopt+, faster strategies are limited to 4 */
-#define ZSTD_TARGETLENGTH_MAX    ZSTD_BLOCKSIZE_MAX
-#define ZSTD_TARGETLENGTH_MIN     0   /* note : comparing this constant to an unsigned results in a tautological test */
-#define ZSTD_STRATEGY_MIN        ZSTD_fast
-#define ZSTD_STRATEGY_MAX        ZSTD_btultra2
 
+/*-*************************************
+*  shared macros
+***************************************/
+#undef MIN
+#undef MAX
+#define MIN(a,b) ((a)<(b) ? (a) : (b))
+#define MAX(a,b) ((a)>(b) ? (a) : (b))
 
-#define ZSTD_OVERLAPLOG_MIN       0
-#define ZSTD_OVERLAPLOG_MAX       9
+/**
+ * Ignore: this is an internal helper.
+ *
+ * This is a helper function to help force C99-correctness during compilation.
+ * Under strict compilation modes, variadic macro arguments can't be empty.
+ * However, variadic function arguments can be. Using a function therefore lets
+ * us statically check that at least one (string) argument was passed,
+ * independent of the compilation flags.
+ */
+static INLINE_KEYWORD UNUSED_ATTR
+void _force_has_format_string(const char *format, ...) {
+  (void)format;
+}
 
-#define ZSTD_WINDOWLOG_LIMIT_DEFAULT 27   /* by default, the streaming decoder will refuse any frame
-                                           * requiring larger than (1<<ZSTD_WINDOWLOG_LIMIT_DEFAULT) window size,
-                                           * to preserve host's memory from unreasonable requirements.
-                                           * This limit can be overridden using ZSTD_DCtx_setParameter(,ZSTD_d_windowLogMax,).
-                                           * The limit does not apply for one-pass decoders (such as ZSTD_decompress()), since no additional memory is allocated */
+/**
+ * Ignore: this is an internal helper.
+ *
+ * We want to force this function invocation to be syntactically correct, but
+ * we don't want to force runtime evaluation of its arguments.
+ */
+#define _FORCE_HAS_FORMAT_STRING(...) \
+  if (0) { \
+    _force_has_format_string(__VA_ARGS__); \
+  }
 
+/**
+ * Return the specified error if the condition evaluates to true.
+ *
+ * In debug modes, prints additional information.
+ * In order to do that (particularly, printing the conditional that failed),
+ * this can't just wrap RETURN_ERROR().
+ */
+#define RETURN_ERROR_IF(cond, err, ...) \
+  if (cond) { \
+    RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \
+           __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \
+    _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
+    RAWLOG(3, ": " __VA_ARGS__); \
+    RAWLOG(3, "\n"); \
+    return ERROR(err); \
+  }
 
-/* LDM parameter bounds */
-#define ZSTD_LDM_HASHLOG_MIN      ZSTD_HASHLOG_MIN
-#define ZSTD_LDM_HASHLOG_MAX      ZSTD_HASHLOG_MAX
-#define ZSTD_LDM_MINMATCH_MIN        4
-#define ZSTD_LDM_MINMATCH_MAX     4096
-#define ZSTD_LDM_BUCKETSIZELOG_MIN   1
-#define ZSTD_LDM_BUCKETSIZELOG_MAX   8
-#define ZSTD_LDM_HASHRATELOG_MIN     0
-#define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN)
+/**
+ * Unconditionally return the specified error.
+ *
+ * In debug modes, prints additional information.
+ */
+#define RETURN_ERROR(err, ...) \
+  do { \
+    RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
+           __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \
+    _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
+    RAWLOG(3, ": " __VA_ARGS__); \
+    RAWLOG(3, "\n"); \
+    return ERROR(err); \
+  } while(0);
 
-/* Advanced parameter bounds */
-#define ZSTD_TARGETCBLOCKSIZE_MIN   64
-#define ZSTD_TARGETCBLOCKSIZE_MAX   ZSTD_BLOCKSIZE_MAX
-#define ZSTD_SRCSIZEHINT_MIN        0
-#define ZSTD_SRCSIZEHINT_MAX        INT_MAX
+/**
+ * If the provided expression evaluates to an error code, returns that error code.
+ *
+ * In debug modes, prints additional information.
+ */
+#define FORWARD_IF_ERROR(err, ...) \
+  do { \
+    size_t const err_code = (err); \
+    if (ERR_isError(err_code)) { \
+      RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \
+             __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \
+      _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
+      RAWLOG(3, ": " __VA_ARGS__); \
+      RAWLOG(3, "\n"); \
+      return err_code; \
+    } \
+  } while(0);
 
-/* internal */
-#define ZSTD_HASHLOG3_MAX           17
 
+/*-*************************************
+*  Common constants
+***************************************/
+#define ZSTD_OPT_NUM    (1<<12)
 
-/* ---  Advanced types  --- */
+#define ZSTD_REP_NUM      3                 /* number of repcodes */
+#define ZSTD_REP_MOVE     (ZSTD_REP_NUM-1)
+static UNUSED_ATTR const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
 
-typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params;
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
 
-typedef struct {
-    unsigned int matchPos; /* Match pos in dst */
-    /* If seqDef.offset > 3, then this is seqDef.offset - 3
-     * If seqDef.offset < 3, then this is the corresponding repeat offset
-     * But if seqDef.offset < 3 and litLength == 0, this is the
-     *   repeat offset before the corresponding repeat offset
-     * And if seqDef.offset == 3 and litLength == 0, this is the
-     *   most recent repeat offset - 1
-     */
-    unsigned int offset;
-    unsigned int litLength; /* Literal length */
-    unsigned int matchLength; /* Match length */
-    /* 0 when seq not rep and seqDef.offset otherwise
-     * when litLength == 0 this will be <= 4, otherwise <= 3 like normal
-     */
-    unsigned int rep;
-} ZSTD_Sequence;
+#define BIT7 128
+#define BIT6  64
+#define BIT5  32
+#define BIT4  16
+#define BIT1   2
+#define BIT0   1
 
-typedef struct {
-    unsigned windowLog;       /**< largest match distance : larger == more compression, more memory needed during decompression */
-    unsigned chainLog;        /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */
-    unsigned hashLog;         /**< dispatch table : larger == faster, more memory */
-    unsigned searchLog;       /**< nb of searches : larger == more compression, slower */
-    unsigned minMatch;        /**< match length searched : larger == faster decompression, sometimes less compression */
-    unsigned targetLength;    /**< acceptable match size for optimal parser (only) : larger == more compression, slower */
-    ZSTD_strategy strategy;   /**< see ZSTD_strategy definition above */
-} ZSTD_compressionParameters;
+#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10
+static UNUSED_ATTR const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
+static UNUSED_ATTR const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
 
-typedef struct {
-    int contentSizeFlag; /**< 1: content size will be in frame header (when known) */
-    int checksumFlag;    /**< 1: generate a 32-bits checksum using XXH64 algorithm at end of frame, for error detection */
-    int noDictIDFlag;    /**< 1: no dictID will be saved into frame header (dictID is only useful for dictionary compression) */
-} ZSTD_frameParameters;
+#define ZSTD_FRAMEIDSIZE 4   /* magic number size */
 
-typedef struct {
-    ZSTD_compressionParameters cParams;
-    ZSTD_frameParameters fParams;
-} ZSTD_parameters;
+#define ZSTD_BLOCKHEADERSIZE 3   /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
+static UNUSED_ATTR const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
+typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;
 
-typedef enum {
-    ZSTD_dct_auto = 0,       /* dictionary is "full" when starting with ZSTD_MAGIC_DICTIONARY, otherwise it is "rawContent" */
-    ZSTD_dct_rawContent = 1, /* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */
-    ZSTD_dct_fullDict = 2    /* refuses to load a dictionary if it does not respect Zstandard's specification, starting with ZSTD_MAGIC_DICTIONARY */
-} ZSTD_dictContentType_e;
+#define ZSTD_FRAMECHECKSUMSIZE 4
 
-typedef enum {
-    ZSTD_dlm_byCopy = 0,  /**< Copy dictionary content internally */
-    ZSTD_dlm_byRef = 1    /**< Reference dictionary content -- the dictionary buffer must outlive its users. */
-} ZSTD_dictLoadMethod_e;
+#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
+#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */)   /* for a non-null block */
 
-typedef enum {
-    ZSTD_f_zstd1 = 0,           /* zstd frame format, specified in zstd_compression_format.md (default) */
-    ZSTD_f_zstd1_magicless = 1  /* Variant of zstd frame format, without initial 4-bytes magic number.
-                                 * Useful to save 4 bytes per generated frame.
-                                 * Decoder cannot recognise automatically this format, requiring this instruction. */
-} ZSTD_format_e;
+#define HufLog 12
+typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e;
 
-typedef enum {
-    /* Note: this enum and the behavior it controls are effectively internal
-     * implementation details of the compressor. They are expected to continue
-     * to evolve and should be considered only in the context of extremely
-     * advanced performance tuning.
-     *
-     * Zstd currently supports the use of a CDict in three ways:
-     *
-     * - The contents of the CDict can be copied into the working context. This
-     *   means that the compression can search both the dictionary and input
-     *   while operating on a single set of internal tables. This makes
-     *   the compression faster per-byte of input. However, the initial copy of
-     *   the CDict's tables incurs a fixed cost at the beginning of the
-     *   compression. For small compressions (< 8 KB), that copy can dominate
-     *   the cost of the compression.
-     *
-     * - The CDict's tables can be used in-place. In this model, compression is
-     *   slower per input byte, because the compressor has to search two sets of
-     *   tables. However, this model incurs no start-up cost (as long as the
-     *   working context's tables can be reused). For small inputs, this can be
-     *   faster than copying the CDict's tables.
-     *
-     * - The CDict's tables are not used at all, and instead we use the working
-     *   context alone to reload the dictionary and use params based on the source
-     *   size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict().
-     *   This method is effective when the dictionary sizes are very small relative
-     *   to the input size, and the input size is fairly large to begin with.
-     *
-     * Zstd has a simple internal heuristic that selects which strategy to use
-     * at the beginning of a compression. However, if experimentation shows that
-     * Zstd is making poor choices, it is possible to override that choice with
-     * this enum.
-     */
-    ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */
-    ZSTD_dictForceAttach   = 1, /* Never copy the dictionary. */
-    ZSTD_dictForceCopy     = 2, /* Always copy the dictionary. */
-    ZSTD_dictForceLoad     = 3  /* Always reload the dictionary */
-} ZSTD_dictAttachPref_e;
+#define LONGNBSEQ 0x7F00
 
-typedef enum {
-  ZSTD_lcm_auto = 0,          /**< Automatically determine the compression mode based on the compression level.
-                               *   Negative compression levels will be uncompressed, and positive compression
-                               *   levels will be compressed. */
-  ZSTD_lcm_huffman = 1,       /**< Always attempt Huffman compression. Uncompressed literals will still be
-                               *   emitted if Huffman compression is not profitable. */
-  ZSTD_lcm_uncompressed = 2   /**< Always emit uncompressed literals. */
-} ZSTD_literalCompressionMode_e;
+#define MINMATCH 3
 
+#define Litbits  8
+#define MaxLit ((1<<Litbits) - 1)
+#define MaxML   52
+#define MaxLL   35
+#define DefaultMaxOff 28
+#define MaxOff  31
+#define MaxSeq MAX(MaxLL, MaxML)   /* Assumption : MaxOff < MaxLL,MaxML */
+#define MLFSELog    9
+#define LLFSELog    9
+#define OffFSELog   8
+#define MaxFSELog  MAX(MAX(MLFSELog, LLFSELog), OffFSELog)
 
-/***************************************
-*  Frame size functions
-***************************************/
+#define ZSTD_MAX_HUF_HEADER_SIZE 128 /* header + <= 127 byte tree description */
+/* Each table cannot take more than #symbols * FSELog bits */
+#define ZSTD_MAX_FSE_HEADERS_SIZE (((MaxML + 1) * MLFSELog + (MaxLL + 1) * LLFSELog + (MaxOff + 1) * OffFSELog + 7) / 8)
 
-/*! ZSTD_findDecompressedSize() :
- *  `src` should point to the start of a series of ZSTD encoded and/or skippable frames
- *  `srcSize` must be the _exact_ size of this series
- *       (i.e. there should be a frame boundary at `src + srcSize`)
- *  @return : - decompressed size of all data in all successive frames
- *            - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN
- *            - if an error occurred: ZSTD_CONTENTSIZE_ERROR
- *
- *   note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode.
- *            When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size.
- *            In which case, it's necessary to use streaming mode to decompress data.
- *   note 2 : decompressed size is always present when compression is done with ZSTD_compress()
- *   note 3 : decompressed size can be very large (64-bits value),
- *            potentially larger than what local system can handle as a single memory segment.
- *            In which case, it's necessary to use streaming mode to decompress data.
- *   note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified.
- *            Always ensure result fits within application's authorized limits.
- *            Each application can set its own limits.
- *   note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to
- *            read each contained frame header.  This is fast as most of the data is skipped,
- *            however it does mean that all frame data must be present and valid. */
-ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);
+static UNUSED_ATTR const U32 LL_bits[MaxLL+1] = {
+     0, 0, 0, 0, 0, 0, 0, 0,
+     0, 0, 0, 0, 0, 0, 0, 0,
+     1, 1, 1, 1, 2, 2, 3, 3,
+     4, 6, 7, 8, 9,10,11,12,
+    13,14,15,16
+};
+static UNUSED_ATTR const S16 LL_defaultNorm[MaxLL+1] = {
+     4, 3, 2, 2, 2, 2, 2, 2,
+     2, 2, 2, 2, 2, 1, 1, 1,
+     2, 2, 2, 2, 2, 2, 2, 2,
+     2, 3, 2, 1, 1, 1, 1, 1,
+    -1,-1,-1,-1
+};
+#define LL_DEFAULTNORMLOG 6  /* for static allocation */
+static UNUSED_ATTR const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;
+
+static UNUSED_ATTR const U32 ML_bits[MaxML+1] = {
+     0, 0, 0, 0, 0, 0, 0, 0,
+     0, 0, 0, 0, 0, 0, 0, 0,
+     0, 0, 0, 0, 0, 0, 0, 0,
+     0, 0, 0, 0, 0, 0, 0, 0,
+     1, 1, 1, 1, 2, 2, 3, 3,
+     4, 4, 5, 7, 8, 9,10,11,
+    12,13,14,15,16
+};
+static UNUSED_ATTR const S16 ML_defaultNorm[MaxML+1] = {
+     1, 4, 3, 2, 2, 2, 2, 2,
+     2, 1, 1, 1, 1, 1, 1, 1,
+     1, 1, 1, 1, 1, 1, 1, 1,
+     1, 1, 1, 1, 1, 1, 1, 1,
+     1, 1, 1, 1, 1, 1, 1, 1,
+     1, 1, 1, 1, 1, 1,-1,-1,
+    -1,-1,-1,-1,-1
+};
+#define ML_DEFAULTNORMLOG 6  /* for static allocation */
+static UNUSED_ATTR const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG;
 
-/*! ZSTD_decompressBound() :
- *  `src` should point to the start of a series of ZSTD encoded and/or skippable frames
- *  `srcSize` must be the _exact_ size of this series
- *       (i.e. there should be a frame boundary at `src + srcSize`)
- *  @return : - upper-bound for the decompressed size of all data in all successive frames
- *            - if an error occured: ZSTD_CONTENTSIZE_ERROR
- *
- *  note 1  : an error can occur if `src` contains an invalid or incorrectly formatted frame.
- *  note 2  : the upper-bound is exact when the decompressed size field is available in every ZSTD encoded frame of `src`.
- *            in this case, `ZSTD_findDecompressedSize` and `ZSTD_decompressBound` return the same value.
- *  note 3  : when the decompressed size field isn't available, the upper-bound for that frame is calculated by:
- *              upper-bound = # blocks * min(128 KB, Window_Size)
- */
-ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize);
+static UNUSED_ATTR const S16 OF_defaultNorm[DefaultMaxOff+1] = {
+     1, 1, 1, 1, 1, 1, 2, 2,
+     2, 1, 1, 1, 1, 1, 1, 1,
+     1, 1, 1, 1, 1, 1, 1, 1,
+    -1,-1,-1,-1,-1
+};
+#define OF_DEFAULTNORMLOG 5  /* for static allocation */
+static UNUSED_ATTR const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
 
-/*! ZSTD_frameHeaderSize() :
- *  srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX.
- * @return : size of the Frame Header,
- *           or an error code (if srcSize is too small) */
-ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
 
-/*! ZSTD_getSequences() :
- * Extract sequences from the sequence store
- * zc can be used to insert custom compression params.
- * This function invokes ZSTD_compress2
- * @return : number of sequences extracted
- */
-ZSTDLIB_API size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
-    size_t outSeqsSize, const void* src, size_t srcSize);
+/*-*******************************************
+*  Shared functions to include for inlining
+*********************************************/
+static void ZSTD_copy8(void* dst, const void* src) {
+#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON)
+    vst1_u8((uint8_t*)dst, vld1_u8((const uint8_t*)src));
+#else
+    ZSTD_memcpy(dst, src, 8);
+#endif
+}
 
+#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
+static void ZSTD_copy16(void* dst, const void* src) {
+#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON)
+    vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src));
+#else
+    ZSTD_memcpy(dst, src, 16);
+#endif
+}
+#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
 
-/***************************************
-*  Memory management
-***************************************/
+#define WILDCOPY_OVERLENGTH 32
+#define WILDCOPY_VECLEN 16
 
-/*! ZSTD_estimate*() :
- *  These functions make it possible to estimate memory usage of a future
- *  {D,C}Ctx, before its creation.
- *
- *  ZSTD_estimateCCtxSize() will provide a budget large enough for any
- *  compression level up to selected one. Unlike ZSTD_estimateCStreamSize*(),
- *  this estimate does not include space for a window buffer, so this estimate
- *  is guaranteed to be enough for single-shot compressions, but not streaming
- *  compressions. It will however assume the input may be arbitrarily large,
- *  which is the worst case. If srcSize is known to always be small,
- *  ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation.
- *  ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with
- *  ZSTD_getCParams() to create cParams from compressionLevel.
- *  ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with
- *  ZSTD_CCtxParams_setParameter().
- *
- *  Note: only single-threaded compression is supported. This function will
- *  return an error code if ZSTD_c_nbWorkers is >= 1. */
-ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel);
-ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
-ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params);
-ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void);
+typedef enum {
+    ZSTD_no_overlap,
+    ZSTD_overlap_src_before_dst
+    /*  ZSTD_overlap_dst_before_src, */
+} ZSTD_overlap_e;
 
-/*! ZSTD_estimateCStreamSize() :
- *  ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one.
- *  It will also consider src size to be arbitrarily "large", which is worst case.
- *  If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation.
- *  ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
- *  ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1.
- *  Note : CStream size estimation is only correct for single-threaded compression.
- *  ZSTD_DStream memory budget depends on window Size.
- *  This information can be passed manually, using ZSTD_estimateDStreamSize,
- *  or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame();
- *  Note : if streaming is init with function ZSTD_init?Stream_usingDict(),
- *         an internal ?Dict will be created, which additional size is not estimated here.
- *         In this case, get total size by adding ZSTD_estimate?DictSize */
-ZSTDLIB_API size_t ZSTD_estimateCStreamSize(int compressionLevel);
-ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams);
-ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params);
-ZSTDLIB_API size_t ZSTD_estimateDStreamSize(size_t windowSize);
-ZSTDLIB_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize);
+/*! ZSTD_wildcopy() :
+ *  Custom version of ZSTD_memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0)
+ *  @param ovtype controls the overlap detection
+ *         - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
+ *         - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart.
+ *           The src buffer must be before the dst buffer.
+ */
+MEM_STATIC FORCE_INLINE_ATTR
+void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype)
+{
+    ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + length;
+
+    assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN));
+
+    if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {
+        /* Handle short offset copies. */
+        do {
+            COPY8(op, ip)
+        } while (op < oend);
+    } else {
+        assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN);
+        /* Separate out the first COPY16() call because the copy length is
+         * almost certain to be short, so the branches have different
+         * probabilities. Since it is almost certain to be short, only do
+         * one COPY16() in the first call. Then, do two calls per loop since
+         * at that point it is more likely to have a high trip count.
+         */
+#ifdef __aarch64__
+        do {
+            COPY16(op, ip);
+        }
+        while (op < oend);
+#else
+        ZSTD_copy16(op, ip);
+        if (16 >= length) return;
+        op += 16;
+        ip += 16;
+        do {
+            COPY16(op, ip);
+            COPY16(op, ip);
+        }
+        while (op < oend);
+#endif
+    }
+}
 
-/*! ZSTD_estimate?DictSize() :
- *  ZSTD_estimateCDictSize() will bet that src size is relatively "small", and content is copied, like ZSTD_createCDict().
- *  ZSTD_estimateCDictSize_advanced() makes it possible to control compression parameters precisely, like ZSTD_createCDict_advanced().
- *  Note : dictionaries created by reference (`ZSTD_dlm_byRef`) are logically smaller.
- */
-ZSTDLIB_API size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel);
-ZSTDLIB_API size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, ZSTD_dictLoadMethod_e dictLoadMethod);
-ZSTDLIB_API size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod);
+MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    size_t const length = MIN(dstCapacity, srcSize);
+    if (length > 0) {
+        ZSTD_memcpy(dst, src, length);
+    }
+    return length;
+}
 
-/*! ZSTD_initStatic*() :
- *  Initialize an object using a pre-allocated fixed-size buffer.
- *  workspace: The memory area to emplace the object into.
- *             Provided pointer *must be 8-bytes aligned*.
- *             Buffer must outlive object.
- *  workspaceSize: Use ZSTD_estimate*Size() to determine
- *                 how large workspace must be to support target scenario.
- * @return : pointer to object (same address as workspace, just different type),
- *           or NULL if error (size too small, incorrect alignment, etc.)
- *  Note : zstd will never resize nor malloc() when using a static buffer.
- *         If the object requires more memory than available,
- *         zstd will just error out (typically ZSTD_error_memory_allocation).
- *  Note 2 : there is no corresponding "free" function.
- *           Since workspace is allocated externally, it must be freed externally too.
- *  Note 3 : cParams : use ZSTD_getCParams() to convert a compression level
- *           into its associated cParams.
- *  Limitation 1 : currently not compatible with internal dictionary creation, triggered by
- *                 ZSTD_CCtx_loadDictionary(), ZSTD_initCStream_usingDict() or ZSTD_initDStream_usingDict().
- *  Limitation 2 : static cctx currently not compatible with multi-threading.
- *  Limitation 3 : static dctx is incompatible with legacy support.
- */
-ZSTDLIB_API ZSTD_CCtx*    ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize);
-ZSTDLIB_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize);    /**< same as ZSTD_initStaticCCtx() */
+/* define "workspace is too large" as this number of times larger than needed */
+#define ZSTD_WORKSPACETOOLARGE_FACTOR 3
 
-ZSTDLIB_API ZSTD_DCtx*    ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize);
-ZSTDLIB_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize);    /**< same as ZSTD_initStaticDCtx() */
+/* when workspace is continuously too large
+ * during at least this number of times,
+ * context's memory usage is considered wasteful,
+ * because it's sized to handle a worst case scenario which rarely happens.
+ * In which case, resize it down to free some memory */
+#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128
 
-ZSTDLIB_API const ZSTD_CDict* ZSTD_initStaticCDict(
-                                        void* workspace, size_t workspaceSize,
-                                        const void* dict, size_t dictSize,
-                                        ZSTD_dictLoadMethod_e dictLoadMethod,
-                                        ZSTD_dictContentType_e dictContentType,
-                                        ZSTD_compressionParameters cParams);
+/* Controls whether the input/output buffer is buffered or stable. */
+typedef enum {
+    ZSTD_bm_buffered = 0,  /* Buffer the input/output */
+    ZSTD_bm_stable = 1     /* ZSTD_inBuffer/ZSTD_outBuffer is stable */
+} ZSTD_bufferMode_e;
 
-ZSTDLIB_API const ZSTD_DDict* ZSTD_initStaticDDict(
-                                        void* workspace, size_t workspaceSize,
-                                        const void* dict, size_t dictSize,
-                                        ZSTD_dictLoadMethod_e dictLoadMethod,
-                                        ZSTD_dictContentType_e dictContentType);
 
+/*-*******************************************
+*  Private declarations
+*********************************************/
+typedef struct seqDef_s {
+    U32 offset;         /* offset == rawOffset + ZSTD_REP_NUM, or equivalently, offCode + 1 */
+    U16 litLength;
+    U16 matchLength;
+} seqDef;
 
-/*! Custom memory allocation :
- *  These prototypes make it possible to pass your own allocation/free functions.
- *  ZSTD_customMem is provided at creation time, using ZSTD_create*_advanced() variants listed below.
- *  All allocation/free operations will be completed using these custom variants instead of regular <stdlib.h> ones.
- */
-typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size);
-typedef void  (*ZSTD_freeFunction) (void* opaque, void* address);
-typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem;
-static ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL };  /**< this constant defers to stdlib's functions */
+/* Controls whether seqStore has a single "long" litLength or matchLength. See seqStore_t. */
+typedef enum {
+    ZSTD_llt_none = 0,             /* no longLengthType */
+    ZSTD_llt_literalLength = 1,    /* represents a long literal */
+    ZSTD_llt_matchLength = 2       /* represents a long match */
+} ZSTD_longLengthType_e;
 
-ZSTDLIB_API ZSTD_CCtx*    ZSTD_createCCtx_advanced(ZSTD_customMem customMem);
-ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
-ZSTDLIB_API ZSTD_DCtx*    ZSTD_createDCtx_advanced(ZSTD_customMem customMem);
-ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem);
+typedef struct {
+    seqDef* sequencesStart;
+    seqDef* sequences;      /* ptr to end of sequences */
+    BYTE* litStart;
+    BYTE* lit;              /* ptr to end of literals */
+    BYTE* llCode;
+    BYTE* mlCode;
+    BYTE* ofCode;
+    size_t maxNbSeq;
+    size_t maxNbLit;
 
-ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize,
-                                                  ZSTD_dictLoadMethod_e dictLoadMethod,
-                                                  ZSTD_dictContentType_e dictContentType,
-                                                  ZSTD_compressionParameters cParams,
-                                                  ZSTD_customMem customMem);
+    /* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength
+     * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment
+     * the existing value of the litLength or matchLength by 0x10000.
+     */
+    ZSTD_longLengthType_e   longLengthType;
+    U32                     longLengthPos;  /* Index of the sequence to apply long length modification to */
+} seqStore_t;
 
-ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
-                                                  ZSTD_dictLoadMethod_e dictLoadMethod,
-                                                  ZSTD_dictContentType_e dictContentType,
-                                                  ZSTD_customMem customMem);
+typedef struct {
+    U32 litLength;
+    U32 matchLength;
+} ZSTD_sequenceLength;
 
+/**
+ * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences
+ * indicated by longLengthPos and longLengthType, and adds MINMATCH back to matchLength.
+ */
+MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq)
+{
+    ZSTD_sequenceLength seqLen;
+    seqLen.litLength = seq->litLength;
+    seqLen.matchLength = seq->matchLength + MINMATCH;
+    if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) {
+        if (seqStore->longLengthType == ZSTD_llt_literalLength) {
+            seqLen.litLength += 0xFFFF;
+        }
+        if (seqStore->longLengthType == ZSTD_llt_matchLength) {
+            seqLen.matchLength += 0xFFFF;
+        }
+    }
+    return seqLen;
+}
 
+/**
+ * Contains the compressed frame size and an upper-bound for the decompressed frame size.
+ * Note: before using `compressedSize`, check for errors using ZSTD_isError().
+ *       similarly, before using `decompressedBound`, check for errors using:
+ *          `decompressedBound != ZSTD_CONTENTSIZE_ERROR`
+ */
+typedef struct {
+    size_t compressedSize;
+    unsigned long long decompressedBound;
+} ZSTD_frameSizeInfo;   /* decompress & legacy */
 
-/***************************************
-*  Advanced compression functions
-***************************************/
+const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx);   /* compress & dictBuilder */
+void ZSTD_seqToCodes(const seqStore_t* seqStorePtr);   /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
 
-/*! ZSTD_createCDict_byReference() :
- *  Create a digested dictionary for compression
- *  Dictionary content is just referenced, not duplicated.
- *  As a consequence, `dictBuffer` **must** outlive CDict,
- *  and its content must remain unmodified throughout the lifetime of CDict.
- *  note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */
-ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
+/* custom memory allocation functions */
+void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem);
+void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem);
+void ZSTD_customFree(void* ptr, ZSTD_customMem customMem);
 
-/*! ZSTD_getCParams() :
- * @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize.
- * `estimatedSrcSize` value is optional, select 0 if not known */
-ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
 
-/*! ZSTD_getParams() :
- *  same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`.
- *  All fields of `ZSTD_frameParameters` are set to default : contentSize=1, checksum=0, noDictID=0 */
-ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
+MEM_STATIC U32 ZSTD_highbit32(U32 val)   /* compress, dictBuilder, decodeCorpus */
+{
+    assert(val != 0);
+    {
+#   if defined(_MSC_VER)   /* Visual */
+#       if STATIC_BMI2 == 1
+            return _lzcnt_u32(val)^31;
+#       else
+            unsigned long r=0;
+            return _BitScanReverse(&r, val) ? (unsigned)r : 0;
+#       endif
+#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* GCC Intrinsic */
+        return __builtin_clz (val) ^ 31;
+#   elif defined(__ICCARM__)    /* IAR Intrinsic */
+        return 31 - __CLZ(val);
+#   else   /* Software version */
+        static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+        U32 v = val;
+        v |= v >> 1;
+        v |= v >> 2;
+        v |= v >> 4;
+        v |= v >> 8;
+        v |= v >> 16;
+        return DeBruijnClz[(v * 0x07C4ACDDU) >> 27];
+#   endif
+    }
+}
 
-/*! ZSTD_checkCParams() :
- *  Ensure param values remain within authorized range.
- * @return 0 on success, or an error code (can be checked with ZSTD_isError()) */
-ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params);
 
-/*! ZSTD_adjustCParams() :
- *  optimize params for a given `srcSize` and `dictSize`.
- * `srcSize` can be unknown, in which case use ZSTD_CONTENTSIZE_UNKNOWN.
- * `dictSize` must be `0` when there is no dictionary.
- *  cPar can be invalid : all parameters will be clamped within valid range in the @return struct.
- *  This function never fails (wide contract) */
-ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize);
+/* ZSTD_invalidateRepCodes() :
+ * ensures next compression will not use repcodes from previous block.
+ * Note : only works with regular variant;
+ *        do not use with extDict variant ! */
+void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx);   /* zstdmt, adaptive_compression (shouldn't get this definition from here) */
 
-/*! ZSTD_compress_advanced() :
- *  Note : this function is now DEPRECATED.
- *         It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters.
- *  This prototype will be marked as deprecated and generate compilation warning on reaching v1.5.x */
-ZSTDLIB_API size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx,
-                                          void* dst, size_t dstCapacity,
-                                    const void* src, size_t srcSize,
-                                    const void* dict,size_t dictSize,
-                                          ZSTD_parameters params);
 
-/*! ZSTD_compress_usingCDict_advanced() :
- *  Note : this function is now REDUNDANT.
- *         It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters.
- *  This prototype will be marked as deprecated and generate compilation warning in some future version */
-ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
-                                              void* dst, size_t dstCapacity,
-                                        const void* src, size_t srcSize,
-                                        const ZSTD_CDict* cdict,
-                                              ZSTD_frameParameters fParams);
+typedef struct {
+    blockType_e blockType;
+    U32 lastBlock;
+    U32 origSize;
+} blockProperties_t;   /* declared here for decompress and fullbench */
+
+/*! ZSTD_getcBlockSize() :
+ *  Provides the size of compressed block from block header `src` */
+/* Used by: decompress, fullbench (does not get its definition from here) */
+size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
+                          blockProperties_t* bpPtr);
 
+/*! ZSTD_decodeSeqHeaders() :
+ *  decode sequence header from src */
+/* Used by: decompress, fullbench (does not get its definition from here) */
+size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
+                       const void* src, size_t srcSize);
 
-/*! ZSTD_CCtx_loadDictionary_byReference() :
- *  Same as ZSTD_CCtx_loadDictionary(), but dictionary content is referenced, instead of being copied into CCtx.
- *  It saves some memory, but also requires that `dict` outlives its usage within `cctx` */
-ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
 
-/*! ZSTD_CCtx_loadDictionary_advanced() :
- *  Same as ZSTD_CCtx_loadDictionary(), but gives finer control over
- *  how to load the dictionary (by copy ? by reference ?)
- *  and how to interpret it (automatic ? force raw mode ? full mode only ?) */
-ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType);
+#if defined (__cplusplus)
+}
+#endif
 
-/*! ZSTD_CCtx_refPrefix_advanced() :
- *  Same as ZSTD_CCtx_refPrefix(), but gives finer control over
- *  how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */
-ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
+#endif   /* ZSTD_CCOMMON_H_MODULE */
+/**** ended inlining zstd_internal.h ****/
 
-/* ===   experimental parameters   === */
-/* these parameters can be used with ZSTD_setParameter()
- * they are not guaranteed to remain supported in the future */
 
- /* Enables rsyncable mode,
-  * which makes compressed files more rsync friendly
-  * by adding periodic synchronization points to the compressed data.
-  * The target average block size is ZSTD_c_jobSize / 2.
-  * It's possible to modify the job size to increase or decrease
-  * the granularity of the synchronization point.
-  * Once the jobSize is smaller than the window size,
-  * it will result in compression ratio degradation.
-  * NOTE 1: rsyncable mode only works when multithreading is enabled.
-  * NOTE 2: rsyncable performs poorly in combination with long range mode,
-  * since it will decrease the effectiveness of synchronization points,
-  * though mileage may vary.
-  * NOTE 3: Rsyncable mode limits maximum compression speed to ~400 MB/s.
-  * If the selected compression level is already running significantly slower,
-  * the overall speed won't be significantly impacted.
-  */
- #define ZSTD_c_rsyncable ZSTD_c_experimentalParam1
+/*-****************************************
+*  Version
+******************************************/
+unsigned ZSTD_versionNumber(void) { return ZSTD_VERSION_NUMBER; }
 
-/* Select a compression format.
- * The value must be of type ZSTD_format_e.
- * See ZSTD_format_e enum definition for details */
-#define ZSTD_c_format ZSTD_c_experimentalParam2
+const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; }
+
+
+/*-****************************************
+*  ZSTD Error Management
+******************************************/
+#undef ZSTD_isError   /* defined within zstd_internal.h */
+/*! ZSTD_isError() :
+ *  tells if a return value is an error code
+ *  symbol is required for external callers */
+unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
 
-/* Force back-reference distances to remain < windowSize,
- * even when referencing into Dictionary content (default:0) */
-#define ZSTD_c_forceMaxWindow ZSTD_c_experimentalParam3
+/*! ZSTD_getErrorName() :
+ *  provides error code string from function result (useful for debugging) */
+const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); }
 
-/* Controls whether the contents of a CDict
- * are used in place, or copied into the working context.
- * Accepts values from the ZSTD_dictAttachPref_e enum.
- * See the comments on that enum for an explanation of the feature. */
-#define ZSTD_c_forceAttachDict ZSTD_c_experimentalParam4
+/*! ZSTD_getError() :
+ *  convert a `size_t` function result into a proper ZSTD_errorCode enum */
+ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); }
 
-/* Controls how the literals are compressed (default is auto).
- * The value must be of type ZSTD_literalCompressionMode_e.
- * See ZSTD_literalCompressionMode_t enum definition for details.
- */
-#define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5
+/*! ZSTD_getErrorString() :
+ *  provides error code string from enum */
+const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); }
 
-/* Tries to fit compressed block size to be around targetCBlockSize.
- * No target when targetCBlockSize == 0.
- * There is no guarantee on compressed block size (default:0) */
-#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6
 
-/* User's best guess of source size.
- * Hint is not valid when srcSizeHint == 0.
- * There is no guarantee that hint is close to actual source size,
- * but compression ratio may regress significantly if guess considerably underestimates */
-#define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7
 
-/*! ZSTD_CCtx_getParameter() :
- *  Get the requested compression parameter value, selected by enum ZSTD_cParameter,
- *  and store it into int* value.
- * @return : 0, or an error code (which can be tested with ZSTD_isError()).
- */
-ZSTDLIB_API size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value);
+/*=**************************************************************
+*  Custom allocator
+****************************************************************/
+void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem)
+{
+    if (customMem.customAlloc)
+        return customMem.customAlloc(customMem.opaque, size);
+    return ZSTD_malloc(size);
+}
 
+void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem)
+{
+    if (customMem.customAlloc) {
+        /* calloc implemented as malloc+memset;
+         * not as efficient as calloc, but next best guess for custom malloc */
+        void* const ptr = customMem.customAlloc(customMem.opaque, size);
+        ZSTD_memset(ptr, 0, size);
+        return ptr;
+    }
+    return ZSTD_calloc(1, size);
+}
 
-/*! ZSTD_CCtx_params :
- *  Quick howto :
- *  - ZSTD_createCCtxParams() : Create a ZSTD_CCtx_params structure
- *  - ZSTD_CCtxParams_setParameter() : Push parameters one by one into
- *                                     an existing ZSTD_CCtx_params structure.
- *                                     This is similar to
- *                                     ZSTD_CCtx_setParameter().
- *  - ZSTD_CCtx_setParametersUsingCCtxParams() : Apply parameters to
- *                                    an existing CCtx.
- *                                    These parameters will be applied to
- *                                    all subsequent frames.
- *  - ZSTD_compressStream2() : Do compression using the CCtx.
- *  - ZSTD_freeCCtxParams() : Free the memory.
+void ZSTD_customFree(void* ptr, ZSTD_customMem customMem)
+{
+    if (ptr!=NULL) {
+        if (customMem.customFree)
+            customMem.customFree(customMem.opaque, ptr);
+        else
+            ZSTD_free(ptr);
+    }
+}
+/**** ended inlining common/zstd_common.c ****/
+
+/**** start inlining decompress/huf_decompress.c ****/
+/* ******************************************************************
+ * huff0 huffman decoder,
+ * part of Finite State Entropy library
+ * Copyright (c) Yann Collet, Facebook, Inc.
  *
- *  This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams()
- *  for static allocation of CCtx for single-threaded compression.
- */
-ZSTDLIB_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void);
-ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);
+ *  You can contact the author at :
+ *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
 
-/*! ZSTD_CCtxParams_reset() :
- *  Reset params to default values.
- */
-ZSTDLIB_API size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params);
+/* **************************************************************
+*  Dependencies
+****************************************************************/
+/**** skipping file: ../common/zstd_deps.h ****/
+/**** skipping file: ../common/compiler.h ****/
+/**** skipping file: ../common/bitstream.h ****/
+/**** skipping file: ../common/fse.h ****/
+#define HUF_STATIC_LINKING_ONLY
+/**** skipping file: ../common/huf.h ****/
+/**** skipping file: ../common/error_private.h ****/
 
-/*! ZSTD_CCtxParams_init() :
- *  Initializes the compression parameters of cctxParams according to
- *  compression level. All other parameters are reset to their default values.
- */
-ZSTDLIB_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel);
+/* **************************************************************
+*  Macros
+****************************************************************/
 
-/*! ZSTD_CCtxParams_init_advanced() :
- *  Initializes the compression and frame parameters of cctxParams according to
- *  params. All other parameters are reset to their default values.
+/* These two optional macros force the use one way or another of the two
+ * Huffman decompression implementations. You can't force in both directions
+ * at the same time.
  */
-ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params);
+#if defined(HUF_FORCE_DECOMPRESS_X1) && \
+    defined(HUF_FORCE_DECOMPRESS_X2)
+#error "Cannot force the use of the X1 and X2 decoders at the same time!"
+#endif
 
-/*! ZSTD_CCtxParams_setParameter() :
- *  Similar to ZSTD_CCtx_setParameter.
- *  Set one compression parameter, selected by enum ZSTD_cParameter.
- *  Parameters must be applied to a ZSTD_CCtx using ZSTD_CCtx_setParametersUsingCCtxParams().
- * @result : 0, or an error code (which can be tested with ZSTD_isError()).
- */
-ZSTDLIB_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value);
 
-/*! ZSTD_CCtxParams_getParameter() :
- * Similar to ZSTD_CCtx_getParameter.
- * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter.
- * @result : 0, or an error code (which can be tested with ZSTD_isError()).
- */
-ZSTDLIB_API size_t ZSTD_CCtxParams_getParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value);
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define HUF_isError ERR_isError
 
-/*! ZSTD_CCtx_setParametersUsingCCtxParams() :
- *  Apply a set of ZSTD_CCtx_params to the compression context.
- *  This can be done even after compression is started,
- *    if nbWorkers==0, this will have no impact until a new compression is started.
- *    if nbWorkers>=1, new parameters will be picked up at next job,
- *       with a few restrictions (windowLog, pledgedSrcSize, nbWorkers, jobSize, and overlapLog are not updated).
- */
-ZSTDLIB_API size_t ZSTD_CCtx_setParametersUsingCCtxParams(
-        ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params);
 
-/*! ZSTD_compressStream2_simpleArgs() :
- *  Same as ZSTD_compressStream2(),
- *  but using only integral types as arguments.
- *  This variant might be helpful for binders from dynamic languages
- *  which have troubles handling structures containing memory pointers.
- */
-ZSTDLIB_API size_t ZSTD_compressStream2_simpleArgs (
-                            ZSTD_CCtx* cctx,
-                            void* dst, size_t dstCapacity, size_t* dstPos,
-                      const void* src, size_t srcSize, size_t* srcPos,
-                            ZSTD_EndDirective endOp);
+/* **************************************************************
+*  Byte alignment for workSpace management
+****************************************************************/
+#define HUF_ALIGN(x, a)         HUF_ALIGN_MASK((x), (a) - 1)
+#define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
 
 
-/***************************************
-*  Advanced decompression functions
-***************************************/
+/* **************************************************************
+*  BMI2 Variant Wrappers
+****************************************************************/
+#if DYNAMIC_BMI2
 
-/*! ZSTD_isFrame() :
- *  Tells if the content of `buffer` starts with a valid Frame Identifier.
- *  Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
- *  Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled.
- *  Note 3 : Skippable Frame Identifiers are considered valid. */
-ZSTDLIB_API unsigned ZSTD_isFrame(const void* buffer, size_t size);
+#define HUF_DGEN(fn)                                                        \
+                                                                            \
+    static size_t fn##_default(                                             \
+                  void* dst,  size_t dstSize,                               \
+            const void* cSrc, size_t cSrcSize,                              \
+            const HUF_DTable* DTable)                                       \
+    {                                                                       \
+        return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
+    }                                                                       \
+                                                                            \
+    static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2(                       \
+                  void* dst,  size_t dstSize,                               \
+            const void* cSrc, size_t cSrcSize,                              \
+            const HUF_DTable* DTable)                                       \
+    {                                                                       \
+        return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
+    }                                                                       \
+                                                                            \
+    static size_t fn(void* dst, size_t dstSize, void const* cSrc,           \
+                     size_t cSrcSize, HUF_DTable const* DTable, int bmi2)   \
+    {                                                                       \
+        if (bmi2) {                                                         \
+            return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);         \
+        }                                                                   \
+        return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable);          \
+    }
 
-/*! ZSTD_createDDict_byReference() :
- *  Create a digested dictionary, ready to start decompression operation without startup delay.
- *  Dictionary content is referenced, and therefore stays in dictBuffer.
- *  It is important that dictBuffer outlives DDict,
- *  it must remain read accessible throughout the lifetime of DDict */
-ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize);
+#else
 
-/*! ZSTD_DCtx_loadDictionary_byReference() :
- *  Same as ZSTD_DCtx_loadDictionary(),
- *  but references `dict` content instead of copying it into `dctx`.
- *  This saves memory if `dict` remains around.,
- *  However, it's imperative that `dict` remains accessible (and unmodified) while being used, so it must outlive decompression. */
-ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
+#define HUF_DGEN(fn)                                                        \
+    static size_t fn(void* dst, size_t dstSize, void const* cSrc,           \
+                     size_t cSrcSize, HUF_DTable const* DTable, int bmi2)   \
+    {                                                                       \
+        (void)bmi2;                                                         \
+        return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
+    }
 
-/*! ZSTD_DCtx_loadDictionary_advanced() :
- *  Same as ZSTD_DCtx_loadDictionary(),
- *  but gives direct control over
- *  how to load the dictionary (by copy ? by reference ?)
- *  and how to interpret it (automatic ? force raw mode ? full mode only ?). */
-ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType);
+#endif
 
-/*! ZSTD_DCtx_refPrefix_advanced() :
- *  Same as ZSTD_DCtx_refPrefix(), but gives finer control over
- *  how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */
-ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
 
-/*! ZSTD_DCtx_setMaxWindowSize() :
- *  Refuses allocating internal buffers for frames requiring a window size larger than provided limit.
- *  This protects a decoder context from reserving too much memory for itself (potential attack scenario).
- *  This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode.
- *  By default, a decompression context accepts all window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT)
- * @return : 0, or an error code (which can be tested using ZSTD_isError()).
- */
-ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize);
+/*-***************************/
+/*  generic DTableDesc       */
+/*-***************************/
+typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
 
-/* ZSTD_d_format
- * experimental parameter,
- * allowing selection between ZSTD_format_e input compression formats
+static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
+{
+    DTableDesc dtd;
+    ZSTD_memcpy(&dtd, table, sizeof(dtd));
+    return dtd;
+}
+
+
+#ifndef HUF_FORCE_DECOMPRESS_X2
+
+/*-***************************/
+/*  single-symbol decoding   */
+/*-***************************/
+typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1;   /* single-symbol decoding */
+
+/**
+ * Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at
+ * a time.
  */
-#define ZSTD_d_format ZSTD_d_experimentalParam1
+static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) {
+    U64 D4;
+    if (MEM_isLittleEndian()) {
+        D4 = symbol + (nbBits << 8);
+    } else {
+        D4 = (symbol << 8) + nbBits;
+    }
+    D4 *= 0x0001000100010001ULL;
+    return D4;
+}
 
-/*! ZSTD_DCtx_setFormat() :
- *  Instruct the decoder context about what kind of data to decode next.
- *  This instruction is mandatory to decode data without a fully-formed header,
- *  such ZSTD_f_zstd1_magicless for example.
- * @return : 0, or an error code (which can be tested using ZSTD_isError()). */
-ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format);
+typedef struct {
+        U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];
+        U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1];
+        U32 statsWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
+        BYTE symbols[HUF_SYMBOLVALUE_MAX + 1];
+        BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
+} HUF_ReadDTableX1_Workspace;
 
-/*! ZSTD_decompressStream_simpleArgs() :
- *  Same as ZSTD_decompressStream(),
- *  but using only integral types as arguments.
- *  This can be helpful for binders from dynamic languages
- *  which have troubles handling structures containing memory pointers.
- */
-ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs (
-                            ZSTD_DCtx* dctx,
-                            void* dst, size_t dstCapacity, size_t* dstPos,
-                      const void* src, size_t srcSize, size_t* srcPos);
 
+size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
+{
+    return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
+}
 
-/********************************************************************
-*  Advanced streaming functions
-*  Warning : most of these functions are now redundant with the Advanced API.
-*  Once Advanced API reaches "stable" status,
-*  redundant functions will be deprecated, and then at some point removed.
-********************************************************************/
+size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2)
+{
+    U32 tableLog = 0;
+    U32 nbSymbols = 0;
+    size_t iSize;
+    void* const dtPtr = DTable + 1;
+    HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
+    HUF_ReadDTableX1_Workspace* wksp = (HUF_ReadDTableX1_Workspace*)workSpace;
 
-/*=====   Advanced Streaming compression functions  =====*/
-/**! ZSTD_initCStream_srcSize() :
- * This function is deprecated, and equivalent to:
- *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
- *     ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
- *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
- *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
- *
- * pledgedSrcSize must be correct. If it is not known at init time, use
- * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs,
- * "0" also disables frame content size field. It may be enabled in the future.
- * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
- */
-ZSTDLIB_API size_t
-ZSTD_initCStream_srcSize(ZSTD_CStream* zcs,
-                         int compressionLevel,
-                         unsigned long long pledgedSrcSize);
+    DEBUG_STATIC_ASSERT(HUF_DECOMPRESS_WORKSPACE_SIZE >= sizeof(*wksp));
+    if (sizeof(*wksp) > wkspSize) return ERROR(tableLog_tooLarge);
 
-/**! ZSTD_initCStream_usingDict() :
- * This function is deprecated, and is equivalent to:
- *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
- *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
- *     ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
- *
- * Creates of an internal CDict (incompatible with static CCtx), except if
- * dict == NULL or dictSize < 8, in which case no dict is used.
- * Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if
- * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy.
- * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
- */
-ZSTDLIB_API size_t
-ZSTD_initCStream_usingDict(ZSTD_CStream* zcs,
-                     const void* dict, size_t dictSize,
-                           int compressionLevel);
+    DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
+    /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */   /* is not necessary, even though some analyzer complain ... */
 
-/**! ZSTD_initCStream_advanced() :
- * This function is deprecated, and is approximately equivalent to:
- *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
- *     // Pseudocode: Set each zstd parameter and leave the rest as-is.
- *     for ((param, value) : params) {
- *         ZSTD_CCtx_setParameter(zcs, param, value);
- *     }
- *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
- *     ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
- *
- * dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy.
- * pledgedSrcSize must be correct.
- * If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.
- * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
- */
-ZSTDLIB_API size_t
-ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
-                    const void* dict, size_t dictSize,
-                          ZSTD_parameters params,
-                          unsigned long long pledgedSrcSize);
+    iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2);
+    if (HUF_isError(iSize)) return iSize;
 
-/**! ZSTD_initCStream_usingCDict() :
- * This function is deprecated, and equivalent to:
- *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
- *     ZSTD_CCtx_refCDict(zcs, cdict);
- *
- * note : cdict will just be referenced, and must outlive compression session
- * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
- */
-ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);
+    /* Table header */
+    {   DTableDesc dtd = HUF_getDTableDesc(DTable);
+        if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge);   /* DTable too small, Huffman tree cannot fit in */
+        dtd.tableType = 0;
+        dtd.tableLog = (BYTE)tableLog;
+        ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
+    }
 
-/**! ZSTD_initCStream_usingCDict_advanced() :
- *   This function is DEPRECATED, and is approximately equivalent to:
- *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
- *     // Pseudocode: Set each zstd frame parameter and leave the rest as-is.
- *     for ((fParam, value) : fParams) {
- *         ZSTD_CCtx_setParameter(zcs, fParam, value);
- *     }
- *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
- *     ZSTD_CCtx_refCDict(zcs, cdict);
- *
- * same as ZSTD_initCStream_usingCDict(), with control over frame parameters.
- * pledgedSrcSize must be correct. If srcSize is not known at init time, use
- * value ZSTD_CONTENTSIZE_UNKNOWN.
- * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
- */
-ZSTDLIB_API size_t
-ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
-                               const ZSTD_CDict* cdict,
-                                     ZSTD_frameParameters fParams,
-                                     unsigned long long pledgedSrcSize);
+    /* Compute symbols and rankStart given rankVal:
+     *
+     * rankVal already contains the number of values of each weight.
+     *
+     * symbols contains the symbols ordered by weight. First are the rankVal[0]
+     * weight 0 symbols, followed by the rankVal[1] weight 1 symbols, and so on.
+     * symbols[0] is filled (but unused) to avoid a branch.
+     *
+     * rankStart contains the offset where each rank belongs in the DTable.
+     * rankStart[0] is not filled because there are no entries in the table for
+     * weight 0.
+     */
+    {
+        int n;
+        int nextRankStart = 0;
+        int const unroll = 4;
+        int const nLimit = (int)nbSymbols - unroll + 1;
+        for (n=0; n<(int)tableLog+1; n++) {
+            U32 const curr = nextRankStart;
+            nextRankStart += wksp->rankVal[n];
+            wksp->rankStart[n] = curr;
+        }
+        for (n=0; n < nLimit; n += unroll) {
+            int u;
+            for (u=0; u < unroll; ++u) {
+                size_t const w = wksp->huffWeight[n+u];
+                wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u);
+            }
+        }
+        for (; n < (int)nbSymbols; ++n) {
+            size_t const w = wksp->huffWeight[n];
+            wksp->symbols[wksp->rankStart[w]++] = (BYTE)n;
+        }
+    }
 
-/*! ZSTD_resetCStream() :
- * This function is deprecated, and is equivalent to:
- *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
- *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
- *
- *  start a new frame, using same parameters from previous frame.
- *  This is typically useful to skip dictionary loading stage, since it will re-use it in-place.
- *  Note that zcs must be init at least once before using ZSTD_resetCStream().
- *  If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN.
- *  If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end.
- *  For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs,
- *  but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead.
- * @return : 0, or an error code (which can be tested using ZSTD_isError())
- *  Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
- */
-ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);
+    /* fill DTable
+     * We fill all entries of each weight in order.
+     * That way length is a constant for each iteration of the outter loop.
+     * We can switch based on the length to a different inner loop which is
+     * optimized for that particular case.
+     */
+    {
+        U32 w;
+        int symbol=wksp->rankVal[0];
+        int rankStart=0;
+        for (w=1; w<tableLog+1; ++w) {
+            int const symbolCount = wksp->rankVal[w];
+            int const length = (1 << w) >> 1;
+            int uStart = rankStart;
+            BYTE const nbBits = (BYTE)(tableLog + 1 - w);
+            int s;
+            int u;
+            switch (length) {
+            case 1:
+                for (s=0; s<symbolCount; ++s) {
+                    HUF_DEltX1 D;
+                    D.byte = wksp->symbols[symbol + s];
+                    D.nbBits = nbBits;
+                    dt[uStart] = D;
+                    uStart += 1;
+                }
+                break;
+            case 2:
+                for (s=0; s<symbolCount; ++s) {
+                    HUF_DEltX1 D;
+                    D.byte = wksp->symbols[symbol + s];
+                    D.nbBits = nbBits;
+                    dt[uStart+0] = D;
+                    dt[uStart+1] = D;
+                    uStart += 2;
+                }
+                break;
+            case 4:
+                for (s=0; s<symbolCount; ++s) {
+                    U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
+                    MEM_write64(dt + uStart, D4);
+                    uStart += 4;
+                }
+                break;
+            case 8:
+                for (s=0; s<symbolCount; ++s) {
+                    U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
+                    MEM_write64(dt + uStart, D4);
+                    MEM_write64(dt + uStart + 4, D4);
+                    uStart += 8;
+                }
+                break;
+            default:
+                for (s=0; s<symbolCount; ++s) {
+                    U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
+                    for (u=0; u < length; u += 16) {
+                        MEM_write64(dt + uStart + u + 0, D4);
+                        MEM_write64(dt + uStart + u + 4, D4);
+                        MEM_write64(dt + uStart + u + 8, D4);
+                        MEM_write64(dt + uStart + u + 12, D4);
+                    }
+                    assert(u == length);
+                    uStart += length;
+                }
+                break;
+            }
+            symbol += symbolCount;
+            rankStart += symbolCount * length;
+        }
+    }
+    return iSize;
+}
 
+FORCE_INLINE_TEMPLATE BYTE
+HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
+{
+    size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
+    BYTE const c = dt[val].byte;
+    BIT_skipBits(Dstream, dt[val].nbBits);
+    return c;
+}
 
-typedef struct {
-    unsigned long long ingested;   /* nb input bytes read and buffered */
-    unsigned long long consumed;   /* nb input bytes actually compressed */
-    unsigned long long produced;   /* nb of compressed bytes generated and buffered */
-    unsigned long long flushed;    /* nb of compressed bytes flushed : not provided; can be tracked from caller side */
-    unsigned currentJobID;         /* MT only : latest started job nb */
-    unsigned nbActiveWorkers;      /* MT only : nb of workers actively compressing at probe time */
-} ZSTD_frameProgression;
+#define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
+    *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
 
-/* ZSTD_getFrameProgression() :
- * tells how much data has been ingested (read from input)
- * consumed (input actually compressed) and produced (output) for current frame.
- * Note : (ingested - consumed) is amount of input data buffered internally, not yet compressed.
- * Aggregates progression inside active worker threads.
- */
-ZSTDLIB_API ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx);
+#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr)  \
+    if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
+        HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
 
-/*! ZSTD_toFlushNow() :
- *  Tell how many bytes are ready to be flushed immediately.
- *  Useful for multithreading scenarios (nbWorkers >= 1).
- *  Probe the oldest active job, defined as oldest job not yet entirely flushed,
- *  and check its output buffer.
- * @return : amount of data stored in oldest job and ready to be flushed immediately.
- *  if @return == 0, it means either :
- *  + there is no active job (could be checked with ZSTD_frameProgression()), or
- *  + oldest job is still actively compressing data,
- *    but everything it has produced has also been flushed so far,
- *    therefore flush speed is limited by production speed of oldest job
- *    irrespective of the speed of concurrent (and newer) jobs.
- */
-ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx);
+#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
 
+HINT_INLINE size_t
+HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
 
-/*=====   Advanced Streaming decompression functions  =====*/
-/**
- * This function is deprecated, and is equivalent to:
- *
- *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
- *     ZSTD_DCtx_loadDictionary(zds, dict, dictSize);
- *
- * note: no dictionary will be used if dict == NULL or dictSize < 8
- * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
- */
-ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
+    /* up to 4 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
+        HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
+    }
 
-/**
- * This function is deprecated, and is equivalent to:
- *
- *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
- *     ZSTD_DCtx_refDDict(zds, ddict);
- *
- * note : ddict is referenced, it must outlive decompression session
- * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
- */
-ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict);
+    /* [0-3] symbols remaining */
+    if (MEM_32bits())
+        while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
+            HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
 
-/**
- * This function is deprecated, and is equivalent to:
- *
- *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
- *
- * re-use decompression parameters from previous init; saves dictionary loading
- * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
- */
-ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);
+    /* no more data to retrieve from bitstream, no need to reload */
+    while (p < pEnd)
+        HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
+
+    return pEnd-pStart;
+}
+
+FORCE_INLINE_TEMPLATE size_t
+HUF_decompress1X1_usingDTable_internal_body(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + dstSize;
+    const void* dtPtr = DTable + 1;
+    const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
+    BIT_DStream_t bitD;
+    DTableDesc const dtd = HUF_getDTableDesc(DTable);
+    U32 const dtLog = dtd.tableLog;
 
+    CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
 
-/*********************************************************************
-*  Buffer-less and synchronous inner streaming functions
-*
-*  This is an advanced API, giving full control over buffer management, for users which need direct control over memory.
-*  But it's also a complex one, with several restrictions, documented below.
-*  Prefer normal streaming API for an easier experience.
-********************************************************************* */
+    HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);
 
-/**
-  Buffer-less streaming compression (synchronous mode)
+    if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
 
-  A ZSTD_CCtx object is required to track streaming operations.
-  Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource.
-  ZSTD_CCtx object can be re-used multiple times within successive compression operations.
+    return dstSize;
+}
 
-  Start by initializing a context.
-  Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression,
-  or ZSTD_compressBegin_advanced(), for finer parameter control.
-  It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx()
+FORCE_INLINE_TEMPLATE size_t
+HUF_decompress4X1_usingDTable_internal_body(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    /* Check */
+    if (cSrcSize < 10) return ERROR(corruption_detected);  /* strict minimum : jump table + 1 byte per stream */
 
-  Then, consume your input using ZSTD_compressContinue().
-  There are some important considerations to keep in mind when using this advanced function :
-  - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffers only.
-  - Interface is synchronous : input is consumed entirely and produces 1+ compressed blocks.
-  - Caller must ensure there is enough space in `dst` to store compressed data under worst case scenario.
-    Worst case evaluation is provided by ZSTD_compressBound().
-    ZSTD_compressContinue() doesn't guarantee recover after a failed compression.
-  - ZSTD_compressContinue() presumes prior input ***is still accessible and unmodified*** (up to maximum distance size, see WindowLog).
-    It remembers all previous contiguous blocks, plus one separated memory segment (which can itself consists of multiple contiguous blocks)
-  - ZSTD_compressContinue() detects that prior input has been overwritten when `src` buffer overlaps.
-    In which case, it will "discard" the relevant memory section from its history.
+    {   const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+        BYTE* const olimit = oend - 3;
+        const void* const dtPtr = DTable + 1;
+        const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
 
-  Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum.
-  It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame.
-  Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders.
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        size_t const length1 = MEM_readLE16(istart);
+        size_t const length2 = MEM_readLE16(istart+2);
+        size_t const length3 = MEM_readLE16(istart+4);
+        size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        DTableDesc const dtd = HUF_getDTableDesc(DTable);
+        U32 const dtLog = dtd.tableLog;
+        U32 endSignal = 1;
 
-  `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again.
-*/
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
+        CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
+        CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
+        CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
 
-/*=====   Buffer-less streaming compression functions  =====*/
-ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
-ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
-ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */
-ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /**< note: fails if cdict==NULL */
-ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize);   /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */
-ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**<  note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */
+        /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
+        for ( ; (endSignal) & (op4 < olimit) ; ) {
+            HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
+            endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
+            endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
+            endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
+            endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
+        }
 
-ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
-ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+        /* check corruption */
+        /* note : should not be necessary : op# advance in lock step, and we control op4.
+         *        but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
 
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);
+        HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);
+        HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);
+        HUF_decodeStreamX1(op4, &bitD4, oend,     dt, dtLog);
 
-/*-
-  Buffer-less streaming decompression (synchronous mode)
+        /* check */
+        { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+          if (!endCheck) return ERROR(corruption_detected); }
 
-  A ZSTD_DCtx object is required to track streaming operations.
-  Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it.
-  A ZSTD_DCtx object can be re-used multiple times.
+        /* decoded size */
+        return dstSize;
+    }
+}
 
-  First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader().
-  Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough.
-  Data fragment must be large enough to ensure successful decoding.
- `ZSTD_frameHeaderSize_max` bytes is guaranteed to always be large enough.
-  @result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled.
-           >0 : `srcSize` is too small, please provide at least @result bytes on next attempt.
-           errorCode, which can be tested using ZSTD_isError().
 
-  It fills a ZSTD_frameHeader structure with important information to correctly decode the frame,
-  such as the dictionary ID, content size, or maximum back-reference distance (`windowSize`).
-  Note that these values could be wrong, either because of data corruption, or because a 3rd party deliberately spoofs false information.
-  As a consequence, check that values remain within valid application range.
-  For example, do not allocate memory blindly, check that `windowSize` is within expectation.
-  Each application can set its own limits, depending on local restrictions.
-  For extended interoperability, it is recommended to support `windowSize` of at least 8 MB.
+typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
+                                               const void *cSrc,
+                                               size_t cSrcSize,
+                                               const HUF_DTable *DTable);
 
-  ZSTD_decompressContinue() needs previous data blocks during decompression, up to `windowSize` bytes.
-  ZSTD_decompressContinue() is very sensitive to contiguity,
-  if 2 blocks don't follow each other, make sure that either the compressor breaks contiguity at the same place,
-  or that previous contiguous segment is large enough to properly handle maximum back-reference distance.
-  There are multiple ways to guarantee this condition.
+HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
+HUF_DGEN(HUF_decompress4X1_usingDTable_internal)
 
-  The most memory efficient way is to use a round buffer of sufficient size.
-  Sufficient size is determined by invoking ZSTD_decodingBufferSize_min(),
-  which can @return an error code if required value is too large for current system (in 32-bits mode).
-  In a round buffer methodology, ZSTD_decompressContinue() decompresses each block next to previous one,
-  up to the moment there is not enough room left in the buffer to guarantee decoding another full block,
-  which maximum size is provided in `ZSTD_frameHeader` structure, field `blockSizeMax`.
-  At which point, decoding can resume from the beginning of the buffer.
-  Note that already decoded data stored in the buffer should be flushed before being overwritten.
 
-  There are alternatives possible, for example using two or more buffers of size `windowSize` each, though they consume more memory.
 
-  Finally, if you control the compression process, you can also ignore all buffer size rules,
-  as long as the encoder and decoder progress in "lock-step",
-  aka use exactly the same buffer sizes, break contiguity at the same place, etc.
+size_t HUF_decompress1X1_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    DTableDesc dtd = HUF_getDTableDesc(DTable);
+    if (dtd.tableType != 0) return ERROR(GENERIC);
+    return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+}
 
-  Once buffers are setup, start decompression, with ZSTD_decompressBegin().
-  If decompression requires a dictionary, use ZSTD_decompressBegin_usingDict() or ZSTD_decompressBegin_usingDDict().
+size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+                                   void* workSpace, size_t wkspSize)
+{
+    const BYTE* ip = (const BYTE*) cSrc;
 
-  Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively.
-  ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue().
-  ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail.
+    size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize; cSrcSize -= hSize;
 
- @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity).
-  It can be zero : it just means ZSTD_decompressContinue() has decoded some metadata item.
-  It can also be an error code, which can be tested with ZSTD_isError().
+    return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
+}
 
-  A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero.
-  Context can then be reset to start a new decompression.
 
-  Note : it's possible to know if next input to present is a header or a block, using ZSTD_nextInputType().
-  This information is not required to properly decode a frame.
+size_t HUF_decompress4X1_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    DTableDesc dtd = HUF_getDTableDesc(DTable);
+    if (dtd.tableType != 0) return ERROR(GENERIC);
+    return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+}
 
-  == Special case : skippable frames ==
+static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+                                   void* workSpace, size_t wkspSize, int bmi2)
+{
+    const BYTE* ip = (const BYTE*) cSrc;
 
-  Skippable frames allow integration of user-defined data into a flow of concatenated frames.
-  Skippable frames will be ignored (skipped) by decompressor.
-  The format of skippable frames is as follows :
-  a) Skippable frame ID - 4 Bytes, Little endian format, any value from 0x184D2A50 to 0x184D2A5F
-  b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits
-  c) Frame Content - any content (User Data) of length equal to Frame Size
-  For skippable frames ZSTD_getFrameHeader() returns zfhPtr->frameType==ZSTD_skippableFrame.
-  For skippable frames ZSTD_decompressContinue() always returns 0 : it only skips the content.
-*/
+    size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize; cSrcSize -= hSize;
 
-/*=====   Buffer-less streaming decompression functions  =====*/
-typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e;
-typedef struct {
-    unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */
-    unsigned long long windowSize;       /* can be very large, up to <= frameContentSize */
-    unsigned blockSizeMax;
-    ZSTD_frameType_e frameType;          /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */
-    unsigned headerSize;
-    unsigned dictID;
-    unsigned checksumFlag;
-} ZSTD_frameHeader;
+    return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
+}
 
-/*! ZSTD_getFrameHeader() :
- *  decode Frame Header, or requires larger `srcSize`.
- * @return : 0, `zfhPtr` is correctly filled,
- *          >0, `srcSize` is too small, value is wanted `srcSize` amount,
- *           or an error code, which can be tested using ZSTD_isError() */
-ZSTDLIB_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize);   /**< doesn't consume input */
-/*! ZSTD_getFrameHeader_advanced() :
- *  same as ZSTD_getFrameHeader(),
- *  with added capability to select a format (like ZSTD_f_zstd1_magicless) */
-ZSTDLIB_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format);
-ZSTDLIB_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize);  /**< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */
+size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+                                   void* workSpace, size_t wkspSize)
+{
+    return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
+}
 
-ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx);
-ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
-ZSTDLIB_API size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
 
-ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx);
-ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+#endif /* HUF_FORCE_DECOMPRESS_X2 */
 
-/* misc */
-ZSTDLIB_API void   ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
-typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e;
-ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
 
+#ifndef HUF_FORCE_DECOMPRESS_X1
 
+/* *************************/
+/* double-symbols decoding */
+/* *************************/
 
+typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2;  /* double-symbols decoding */
+typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
+typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
+typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
 
-/* ============================ */
-/**       Block level API       */
-/* ============================ */
 
-/*!
-    Block functions produce and decode raw zstd blocks, without frame metadata.
-    Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes).
-    But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes.
+/* HUF_fillDTableX2Level2() :
+ * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
+static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed,
+                           const U32* rankValOrigin, const int minWeight,
+                           const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
+                           U32 nbBitsBaseline, U16 baseSeq, U32* wksp, size_t wkspSize)
+{
+    HUF_DEltX2 DElt;
+    U32* rankVal = wksp;
 
-    A few rules to respect :
-    - Compressing and decompressing require a context structure
-      + Use ZSTD_createCCtx() and ZSTD_createDCtx()
-    - It is necessary to init context before starting
-      + compression : any ZSTD_compressBegin*() variant, including with dictionary
-      + decompression : any ZSTD_decompressBegin*() variant, including with dictionary
-      + copyCCtx() and copyDCtx() can be used too
-    - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB
-      + If input is larger than a block size, it's necessary to split input data into multiple blocks
-      + For inputs larger than a single block, consider using regular ZSTD_compress() instead.
-        Frame metadata is not that costly, and quickly becomes negligible as source size grows larger than a block.
-    - When a block is considered not compressible enough, ZSTD_compressBlock() result will be 0 (zero) !
-      ===> In which case, nothing is produced into `dst` !
-      + User __must__ test for such outcome and deal directly with uncompressed data
-      + A block cannot be declared incompressible if ZSTD_compressBlock() return value was != 0.
-        Doing so would mess up with statistics history, leading to potential data corruption.
-      + ZSTD_decompressBlock() _doesn't accept uncompressed data as input_ !!
-      + In case of multiple successive blocks, should some of them be uncompressed,
-        decoder must be informed of their existence in order to follow proper history.
-        Use ZSTD_insertBlock() for such a case.
-*/
+    assert(wkspSize >= HUF_TABLELOG_MAX + 1);
+    (void)wkspSize;
+    /* get pre-calculated rankVal */
+    ZSTD_memcpy(rankVal, rankValOrigin, sizeof(U32) * (HUF_TABLELOG_MAX + 1));
 
-/*=====   Raw zstd block functions  =====*/
-ZSTDLIB_API size_t ZSTD_getBlockSize   (const ZSTD_CCtx* cctx);
-ZSTDLIB_API size_t ZSTD_compressBlock  (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
-ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
-ZSTDLIB_API size_t ZSTD_insertBlock    (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize);  /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */
+    /* fill skipped values */
+    if (minWeight>1) {
+        U32 i, skipSize = rankVal[minWeight];
+        MEM_writeLE16(&(DElt.sequence), baseSeq);
+        DElt.nbBits   = (BYTE)(consumed);
+        DElt.length   = 1;
+        for (i = 0; i < skipSize; i++)
+            DTable[i] = DElt;
+    }
 
+    /* fill DTable */
+    {   U32 s; for (s=0; s<sortedListSize; s++) {   /* note : sortedSymbols already skipped */
+            const U32 symbol = sortedSymbols[s].symbol;
+            const U32 weight = sortedSymbols[s].weight;
+            const U32 nbBits = nbBitsBaseline - weight;
+            const U32 length = 1 << (sizeLog-nbBits);
+            const U32 start = rankVal[weight];
+            U32 i = start;
+            const U32 end = start + length;
 
-#endif   /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */
+            MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
+            DElt.nbBits = (BYTE)(nbBits + consumed);
+            DElt.length = 2;
+            do { DTable[i++] = DElt; } while (i<end);   /* since length >= 1 */
 
-#if defined (__cplusplus)
+            rankVal[weight] += length;
+    }   }
 }
-#endif
-/**** ended inlining zstd.h ****/
-#define FSE_STATIC_LINKING_ONLY
-/**** skipping file: fse.h ****/
-#define HUF_STATIC_LINKING_ONLY
-/**** skipping file: huf.h ****/
-#ifndef XXH_STATIC_LINKING_ONLY
-#  define XXH_STATIC_LINKING_ONLY  /* XXH64_state_t */
-#endif
-/**** skipping file: xxhash.h ****/
 
-#if defined (__cplusplus)
-extern "C" {
-#endif
 
-/* ---- static assert (debug) --- */
-#define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)
-#define ZSTD_isError ERR_isError   /* for inlining */
-#define FSE_isError  ERR_isError
-#define HUF_isError  ERR_isError
+static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
+                           const sortedSymbol_t* sortedList, const U32 sortedListSize,
+                           const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
+                           const U32 nbBitsBaseline, U32* wksp, size_t wkspSize)
+{
+    U32* rankVal = wksp;
+    const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */
+    const U32 minBits  = nbBitsBaseline - maxWeight;
+    U32 s;
 
+    assert(wkspSize >= HUF_TABLELOG_MAX + 1);
+    wksp += HUF_TABLELOG_MAX + 1;
+    wkspSize -= HUF_TABLELOG_MAX + 1;
 
-/*-*************************************
-*  shared macros
-***************************************/
-#undef MIN
-#undef MAX
-#define MIN(a,b) ((a)<(b) ? (a) : (b))
-#define MAX(a,b) ((a)>(b) ? (a) : (b))
+    ZSTD_memcpy(rankVal, rankValOrigin, sizeof(U32) * (HUF_TABLELOG_MAX + 1));
 
-/**
- * Return the specified error if the condition evaluates to true.
- *
- * In debug modes, prints additional information.
- * In order to do that (particularly, printing the conditional that failed),
- * this can't just wrap RETURN_ERROR().
- */
-#define RETURN_ERROR_IF(cond, err, ...) \
-  if (cond) { \
-    RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \
-    RAWLOG(3, ": " __VA_ARGS__); \
-    RAWLOG(3, "\n"); \
-    return ERROR(err); \
-  }
+    /* fill DTable */
+    for (s=0; s<sortedListSize; s++) {
+        const U16 symbol = sortedList[s].symbol;
+        const U32 weight = sortedList[s].weight;
+        const U32 nbBits = nbBitsBaseline - weight;
+        const U32 start = rankVal[weight];
+        const U32 length = 1 << (targetLog-nbBits);
 
-/**
- * Unconditionally return the specified error.
- *
- * In debug modes, prints additional information.
- */
-#define RETURN_ERROR(err, ...) \
-  do { \
-    RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \
-    RAWLOG(3, ": " __VA_ARGS__); \
-    RAWLOG(3, "\n"); \
-    return ERROR(err); \
-  } while(0);
+        if (targetLog-nbBits >= minBits) {   /* enough room for a second symbol */
+            U32 sortedRank;
+            int minWeight = nbBits + scaleLog;
+            if (minWeight < 1) minWeight = 1;
+            sortedRank = rankStart[minWeight];
+            HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits,
+                           rankValOrigin[nbBits], minWeight,
+                           sortedList+sortedRank, sortedListSize-sortedRank,
+                           nbBitsBaseline, symbol, wksp, wkspSize);
+        } else {
+            HUF_DEltX2 DElt;
+            MEM_writeLE16(&(DElt.sequence), symbol);
+            DElt.nbBits = (BYTE)(nbBits);
+            DElt.length = 1;
+            {   U32 const end = start + length;
+                U32 u;
+                for (u = start; u < end; u++) DTable[u] = DElt;
+        }   }
+        rankVal[weight] += length;
+    }
+}
 
-/**
- * If the provided expression evaluates to an error code, returns that error code.
- *
- * In debug modes, prints additional information.
- */
-#define FORWARD_IF_ERROR(err, ...) \
-  do { \
-    size_t const err_code = (err); \
-    if (ERR_isError(err_code)) { \
-      RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \
-      RAWLOG(3, ": " __VA_ARGS__); \
-      RAWLOG(3, "\n"); \
-      return err_code; \
-    } \
-  } while(0);
+typedef struct {
+    rankValCol_t rankVal[HUF_TABLELOG_MAX];
+    U32 rankStats[HUF_TABLELOG_MAX + 1];
+    U32 rankStart0[HUF_TABLELOG_MAX + 2];
+    sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1];
+    BYTE weightList[HUF_SYMBOLVALUE_MAX + 1];
+    U32 calleeWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
+} HUF_ReadDTableX2_Workspace;
+
+size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
+                       const void* src, size_t srcSize,
+                             void* workSpace, size_t wkspSize)
+{
+    U32 tableLog, maxW, sizeOfSort, nbSymbols;
+    DTableDesc dtd = HUF_getDTableDesc(DTable);
+    U32 const maxTableLog = dtd.maxTableLog;
+    size_t iSize;
+    void* dtPtr = DTable+1;   /* force compiler to avoid strict-aliasing */
+    HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
+    U32 *rankStart;
 
+    HUF_ReadDTableX2_Workspace* const wksp = (HUF_ReadDTableX2_Workspace*)workSpace;
 
-/*-*************************************
-*  Common constants
-***************************************/
-#define ZSTD_OPT_NUM    (1<<12)
+    if (sizeof(*wksp) > wkspSize) return ERROR(GENERIC);
 
-#define ZSTD_REP_NUM      3                 /* number of repcodes */
-#define ZSTD_REP_MOVE     (ZSTD_REP_NUM-1)
-static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
+    rankStart = wksp->rankStart0 + 1;
+    ZSTD_memset(wksp->rankStats, 0, sizeof(wksp->rankStats));
+    ZSTD_memset(wksp->rankStart0, 0, sizeof(wksp->rankStart0));
 
-#define KB *(1 <<10)
-#define MB *(1 <<20)
-#define GB *(1U<<30)
+    DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable));   /* if compiler fails here, assertion is wrong */
+    if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
+    /* ZSTD_memset(weightList, 0, sizeof(weightList)); */  /* is not necessary, even though some analyzer complain ... */
 
-#define BIT7 128
-#define BIT6  64
-#define BIT5  32
-#define BIT4  16
-#define BIT1   2
-#define BIT0   1
+    iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), /* bmi2 */ 0);
+    if (HUF_isError(iSize)) return iSize;
 
-#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10
-static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
-static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
+    /* check result */
+    if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */
 
-#define ZSTD_FRAMEIDSIZE 4   /* magic number size */
+    /* find maxWeight */
+    for (maxW = tableLog; wksp->rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
 
-#define ZSTD_BLOCKHEADERSIZE 3   /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
-static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
-typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;
+    /* Get start index of each weight */
+    {   U32 w, nextRankStart = 0;
+        for (w=1; w<maxW+1; w++) {
+            U32 curr = nextRankStart;
+            nextRankStart += wksp->rankStats[w];
+            rankStart[w] = curr;
+        }
+        rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
+        sizeOfSort = nextRankStart;
+    }
 
-#define ZSTD_FRAMECHECKSUMSIZE 4
+    /* sort symbols by weight */
+    {   U32 s;
+        for (s=0; s<nbSymbols; s++) {
+            U32 const w = wksp->weightList[s];
+            U32 const r = rankStart[w]++;
+            wksp->sortedSymbol[r].symbol = (BYTE)s;
+            wksp->sortedSymbol[r].weight = (BYTE)w;
+        }
+        rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
+    }
+
+    /* Build rankVal */
+    {   U32* const rankVal0 = wksp->rankVal[0];
+        {   int const rescale = (maxTableLog-tableLog) - 1;   /* tableLog <= maxTableLog */
+            U32 nextRankVal = 0;
+            U32 w;
+            for (w=1; w<maxW+1; w++) {
+                U32 curr = nextRankVal;
+                nextRankVal += wksp->rankStats[w] << (w+rescale);
+                rankVal0[w] = curr;
+        }   }
+        {   U32 const minBits = tableLog+1 - maxW;
+            U32 consumed;
+            for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
+                U32* const rankValPtr = wksp->rankVal[consumed];
+                U32 w;
+                for (w = 1; w < maxW+1; w++) {
+                    rankValPtr[w] = rankVal0[w] >> consumed;
+    }   }   }   }
+
+    HUF_fillDTableX2(dt, maxTableLog,
+                   wksp->sortedSymbol, sizeOfSort,
+                   wksp->rankStart0, wksp->rankVal, maxW,
+                   tableLog+1,
+                   wksp->calleeWksp, sizeof(wksp->calleeWksp) / sizeof(U32));
+
+    dtd.tableLog = (BYTE)maxTableLog;
+    dtd.tableType = 1;
+    ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
+    return iSize;
+}
 
-#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
-#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */)   /* for a non-null block */
 
-#define HufLog 12
-typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e;
+FORCE_INLINE_TEMPLATE U32
+HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
+{
+    size_t const val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    ZSTD_memcpy(op, dt+val, 2);
+    BIT_skipBits(DStream, dt[val].nbBits);
+    return dt[val].length;
+}
 
-#define LONGNBSEQ 0x7F00
+FORCE_INLINE_TEMPLATE U32
+HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
+{
+    size_t const val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    ZSTD_memcpy(op, dt+val, 1);
+    if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
+    else {
+        if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
+            BIT_skipBits(DStream, dt[val].nbBits);
+            if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
+                /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
+                DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
+    }   }
+    return 1;
+}
 
-#define MINMATCH 3
+#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
+    ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
 
-#define Litbits  8
-#define MaxLit ((1<<Litbits) - 1)
-#define MaxML   52
-#define MaxLL   35
-#define DefaultMaxOff 28
-#define MaxOff  31
-#define MaxSeq MAX(MaxLL, MaxML)   /* Assumption : MaxOff < MaxLL,MaxML */
-#define MLFSELog    9
-#define LLFSELog    9
-#define OffFSELog   8
-#define MaxFSELog  MAX(MAX(MLFSELog, LLFSELog), OffFSELog)
+#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
+        ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
 
-static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0,
-                                      0, 0, 0, 0, 0, 0, 0, 0,
-                                      1, 1, 1, 1, 2, 2, 3, 3,
-                                      4, 6, 7, 8, 9,10,11,12,
-                                     13,14,15,16 };
-static const S16 LL_defaultNorm[MaxLL+1] = { 4, 3, 2, 2, 2, 2, 2, 2,
-                                             2, 2, 2, 2, 2, 1, 1, 1,
-                                             2, 2, 2, 2, 2, 2, 2, 2,
-                                             2, 3, 2, 1, 1, 1, 1, 1,
-                                            -1,-1,-1,-1 };
-#define LL_DEFAULTNORMLOG 6  /* for static allocation */
-static const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;
-
-static const U32 ML_bits[MaxML+1] = { 0, 0, 0, 0, 0, 0, 0, 0,
-                                      0, 0, 0, 0, 0, 0, 0, 0,
-                                      0, 0, 0, 0, 0, 0, 0, 0,
-                                      0, 0, 0, 0, 0, 0, 0, 0,
-                                      1, 1, 1, 1, 2, 2, 3, 3,
-                                      4, 4, 5, 7, 8, 9,10,11,
-                                     12,13,14,15,16 };
-static const S16 ML_defaultNorm[MaxML+1] = { 1, 4, 3, 2, 2, 2, 2, 2,
-                                             2, 1, 1, 1, 1, 1, 1, 1,
-                                             1, 1, 1, 1, 1, 1, 1, 1,
-                                             1, 1, 1, 1, 1, 1, 1, 1,
-                                             1, 1, 1, 1, 1, 1, 1, 1,
-                                             1, 1, 1, 1, 1, 1,-1,-1,
-                                            -1,-1,-1,-1,-1 };
-#define ML_DEFAULTNORMLOG 6  /* for static allocation */
-static const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG;
+#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
 
-static const S16 OF_defaultNorm[DefaultMaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2,
-                                                     2, 1, 1, 1, 1, 1, 1, 1,
-                                                     1, 1, 1, 1, 1, 1, 1, 1,
-                                                    -1,-1,-1,-1,-1 };
-#define OF_DEFAULTNORMLOG 5  /* for static allocation */
-static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
+HINT_INLINE size_t
+HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
+                const HUF_DEltX2* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
 
+    /* up to 8 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
+        HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+    }
 
-/*-*******************************************
-*  Shared functions to include for inlining
-*********************************************/
-static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
+    /* closer to end : up to 2 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
 
-#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
-static void ZSTD_copy16(void* dst, const void* src) { memcpy(dst, src, 16); }
-#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
+    while (p <= pEnd-2)
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */
 
-#define WILDCOPY_OVERLENGTH 32
-#define WILDCOPY_VECLEN 16
+    if (p < pEnd)
+        p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
 
-typedef enum {
-    ZSTD_no_overlap,
-    ZSTD_overlap_src_before_dst
-    /*  ZSTD_overlap_dst_before_src, */
-} ZSTD_overlap_e;
+    return p-pStart;
+}
 
-/*! ZSTD_wildcopy() :
- *  Custom version of memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0)
- *  @param ovtype controls the overlap detection
- *         - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
- *         - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart.
- *           The src buffer must be before the dst buffer.
- */
-MEM_STATIC FORCE_INLINE_ATTR 
-void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype)
+FORCE_INLINE_TEMPLATE size_t
+HUF_decompress1X2_usingDTable_internal_body(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
 {
-    ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
-    const BYTE* ip = (const BYTE*)src;
-    BYTE* op = (BYTE*)dst;
-    BYTE* const oend = op + length;
+    BIT_DStream_t bitD;
 
-    assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN));
+    /* Init */
+    CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
 
-    if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {
-        /* Handle short offset copies. */
-        do {
-            COPY8(op, ip)
-        } while (op < oend);
-    } else {
-        assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN);
-        /* Separate out the first COPY16() call because the copy length is
-         * almost certain to be short, so the branches have different
-         * probabilities. Since it is almost certain to be short, only do
-	 * one COPY16() in the first call. Then, do two calls per loop since
-	 * at that point it is more likely to have a high trip count.
-         */
-        COPY16(op, ip);
-        if (op >= oend) return;
-        do {
-            COPY16(op, ip);
-            COPY16(op, ip);
-        }
-        while (op < oend);
+    /* decode */
+    {   BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+        const void* const dtPtr = DTable+1;   /* force compiler to not use strict-aliasing */
+        const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
+        DTableDesc const dtd = HUF_getDTableDesc(DTable);
+        HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);
     }
+
+    /* check */
+    if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
+
+    /* decoded size */
+    return dstSize;
 }
 
+FORCE_INLINE_TEMPLATE size_t
+HUF_decompress4X2_usingDTable_internal_body(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
 
-/*-*******************************************
-*  Private declarations
-*********************************************/
-typedef struct seqDef_s {
-    U32 offset;
-    U16 litLength;
-    U16 matchLength;
-} seqDef;
+    {   const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+        BYTE* const olimit = oend - (sizeof(size_t)-1);
+        const void* const dtPtr = DTable+1;
+        const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
 
-typedef struct {
-    seqDef* sequencesStart;
-    seqDef* sequences;
-    BYTE* litStart;
-    BYTE* lit;
-    BYTE* llCode;
-    BYTE* mlCode;
-    BYTE* ofCode;
-    size_t maxNbSeq;
-    size_t maxNbLit;
-    U32   longLengthID;   /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
-    U32   longLengthPos;
-} seqStore_t;
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        size_t const length1 = MEM_readLE16(istart);
+        size_t const length2 = MEM_readLE16(istart+2);
+        size_t const length3 = MEM_readLE16(istart+4);
+        size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        size_t const segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal = 1;
+        DTableDesc const dtd = HUF_getDTableDesc(DTable);
+        U32 const dtLog = dtd.tableLog;
 
-/**
- * Contains the compressed frame size and an upper-bound for the decompressed frame size.
- * Note: before using `compressedSize`, check for errors using ZSTD_isError().
- *       similarly, before using `decompressedBound`, check for errors using:
- *          `decompressedBound != ZSTD_CONTENTSIZE_ERROR`
- */
-typedef struct {
-    size_t compressedSize;
-    unsigned long long decompressedBound;
-} ZSTD_frameSizeInfo;   /* decompress & legacy */
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
+        CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
+        CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
+        CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
 
-const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx);   /* compress & dictBuilder */
-void ZSTD_seqToCodes(const seqStore_t* seqStorePtr);   /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
+        for ( ; (endSignal) & (op4 < olimit); ) {
+#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
+            endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
+            endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
+            endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
+            endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
+#else
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
+            endSignal = (U32)LIKELY(
+                        (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
+                      & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
+                      & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
+                      & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
+#endif
+        }
 
-/* custom memory allocation functions */
-void* ZSTD_malloc(size_t size, ZSTD_customMem customMem);
-void* ZSTD_calloc(size_t size, ZSTD_customMem customMem);
-void ZSTD_free(void* ptr, ZSTD_customMem customMem);
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
+        HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
+        HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
+        HUF_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog);
 
+        /* check */
+        { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+          if (!endCheck) return ERROR(corruption_detected); }
 
-MEM_STATIC U32 ZSTD_highbit32(U32 val)   /* compress, dictBuilder, decodeCorpus */
-{
-    assert(val != 0);
-    {
-#   if defined(_MSC_VER)   /* Visual */
-        unsigned long r=0;
-        return _BitScanReverse(&r, val) ? (unsigned)r : 0;
-#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* GCC Intrinsic */
-        return __builtin_clz (val) ^ 31;
-#   elif defined(__ICCARM__)    /* IAR Intrinsic */
-        return 31 - __CLZ(val);
-#   else   /* Software version */
-        static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
-        U32 v = val;
-        v |= v >> 1;
-        v |= v >> 2;
-        v |= v >> 4;
-        v |= v >> 8;
-        v |= v >> 16;
-        return DeBruijnClz[(v * 0x07C4ACDDU) >> 27];
-#   endif
+        /* decoded size */
+        return dstSize;
     }
 }
 
+HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
+HUF_DGEN(HUF_decompress4X2_usingDTable_internal)
 
-/* ZSTD_invalidateRepCodes() :
- * ensures next compression will not use repcodes from previous block.
- * Note : only works with regular variant;
- *        do not use with extDict variant ! */
-void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx);   /* zstdmt, adaptive_compression (shouldn't get this definition from here) */
-
-
-typedef struct {
-    blockType_e blockType;
-    U32 lastBlock;
-    U32 origSize;
-} blockProperties_t;   /* declared here for decompress and fullbench */
-
-/*! ZSTD_getcBlockSize() :
- *  Provides the size of compressed block from block header `src` */
-/* Used by: decompress, fullbench (does not get its definition from here) */
-size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
-                          blockProperties_t* bpPtr);
+size_t HUF_decompress1X2_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    DTableDesc dtd = HUF_getDTableDesc(DTable);
+    if (dtd.tableType != 1) return ERROR(GENERIC);
+    return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+}
 
-/*! ZSTD_decodeSeqHeaders() :
- *  decode sequence header from src */
-/* Used by: decompress, fullbench (does not get its definition from here) */
-size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
-                       const void* src, size_t srcSize);
+size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+                                   void* workSpace, size_t wkspSize)
+{
+    const BYTE* ip = (const BYTE*) cSrc;
 
+    size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
+                                               workSpace, wkspSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize; cSrcSize -= hSize;
 
-#if defined (__cplusplus)
+    return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
 }
-#endif
-
-#endif   /* ZSTD_CCOMMON_H_MODULE */
-/**** ended inlining zstd_internal.h ****/
-
 
-/*-****************************************
-*  Version
-******************************************/
-unsigned ZSTD_versionNumber(void) { return ZSTD_VERSION_NUMBER; }
 
-const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; }
+size_t HUF_decompress4X2_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    DTableDesc dtd = HUF_getDTableDesc(DTable);
+    if (dtd.tableType != 1) return ERROR(GENERIC);
+    return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+}
 
+static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+                                   void* workSpace, size_t wkspSize, int bmi2)
+{
+    const BYTE* ip = (const BYTE*) cSrc;
 
-/*-****************************************
-*  ZSTD Error Management
-******************************************/
-#undef ZSTD_isError   /* defined within zstd_internal.h */
-/*! ZSTD_isError() :
- *  tells if a return value is an error code
- *  symbol is required for external callers */
-unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
+    size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
+                                         workSpace, wkspSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize; cSrcSize -= hSize;
 
-/*! ZSTD_getErrorName() :
- *  provides error code string from function result (useful for debugging) */
-const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); }
+    return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
+}
 
-/*! ZSTD_getError() :
- *  convert a `size_t` function result into a proper ZSTD_errorCode enum */
-ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); }
+size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+                                   void* workSpace, size_t wkspSize)
+{
+    return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
+}
 
-/*! ZSTD_getErrorString() :
- *  provides error code string from enum */
-const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); }
 
+#endif /* HUF_FORCE_DECOMPRESS_X1 */
 
 
-/*=**************************************************************
-*  Custom allocator
-****************************************************************/
-void* ZSTD_malloc(size_t size, ZSTD_customMem customMem)
-{
-    if (customMem.customAlloc)
-        return customMem.customAlloc(customMem.opaque, size);
-    return malloc(size);
-}
+/* ***********************************/
+/* Universal decompression selectors */
+/* ***********************************/
 
-void* ZSTD_calloc(size_t size, ZSTD_customMem customMem)
+size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
+                                    const void* cSrc, size_t cSrcSize,
+                                    const HUF_DTable* DTable)
 {
-    if (customMem.customAlloc) {
-        /* calloc implemented as malloc+memset;
-         * not as efficient as calloc, but next best guess for custom malloc */
-        void* const ptr = customMem.customAlloc(customMem.opaque, size);
-        memset(ptr, 0, size);
-        return ptr;
-    }
-    return calloc(1, size);
+    DTableDesc const dtd = HUF_getDTableDesc(DTable);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+    (void)dtd;
+    assert(dtd.tableType == 0);
+    return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+    (void)dtd;
+    assert(dtd.tableType == 1);
+    return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+#else
+    return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
+                           HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+#endif
 }
 
-void ZSTD_free(void* ptr, ZSTD_customMem customMem)
+size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
+                                    const void* cSrc, size_t cSrcSize,
+                                    const HUF_DTable* DTable)
 {
-    if (ptr!=NULL) {
-        if (customMem.customFree)
-            customMem.customFree(customMem.opaque, ptr);
-        else
-            free(ptr);
-    }
+    DTableDesc const dtd = HUF_getDTableDesc(DTable);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+    (void)dtd;
+    assert(dtd.tableType == 0);
+    return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+    (void)dtd;
+    assert(dtd.tableType == 1);
+    return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+#else
+    return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
+                           HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+#endif
 }
-/**** ended inlining zstd_common.c ****/
-/**** start inlining huf_decompress.c ****/
-/* ******************************************************************
- * huff0 huffman decoder,
- * part of Finite State Entropy library
- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
- *
- *  You can contact the author at :
- *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
-****************************************************************** */
-
-/* **************************************************************
-*  Dependencies
-****************************************************************/
-#include <string.h>     /* memcpy, memset */
-/**** skipping file: compiler.h ****/
-/**** skipping file: bitstream.h ****/
-/**** skipping file: fse.h ****/
-#define HUF_STATIC_LINKING_ONLY
-/**** skipping file: huf.h ****/
-/**** skipping file: error_private.h ****/
 
-/* **************************************************************
-*  Macros
-****************************************************************/
 
-/* These two optional macros force the use one way or another of the two
- * Huffman decompression implementations. You can't force in both directions
- * at the same time.
- */
-#if defined(HUF_FORCE_DECOMPRESS_X1) && \
-    defined(HUF_FORCE_DECOMPRESS_X2)
-#error "Cannot force the use of the X1 and X2 decoders at the same time!"
+#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
+typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
+static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
+{
+    /* single, double, quad */
+    {{0,0}, {1,1}, {2,2}},  /* Q==0 : impossible */
+    {{0,0}, {1,1}, {2,2}},  /* Q==1 : impossible */
+    {{  38,130}, {1313, 74}, {2151, 38}},   /* Q == 2 : 12-18% */
+    {{ 448,128}, {1353, 74}, {2238, 41}},   /* Q == 3 : 18-25% */
+    {{ 556,128}, {1353, 74}, {2238, 47}},   /* Q == 4 : 25-32% */
+    {{ 714,128}, {1418, 74}, {2436, 53}},   /* Q == 5 : 32-38% */
+    {{ 883,128}, {1437, 74}, {2464, 61}},   /* Q == 6 : 38-44% */
+    {{ 897,128}, {1515, 75}, {2622, 68}},   /* Q == 7 : 44-50% */
+    {{ 926,128}, {1613, 75}, {2730, 75}},   /* Q == 8 : 50-56% */
+    {{ 947,128}, {1729, 77}, {3359, 77}},   /* Q == 9 : 56-62% */
+    {{1107,128}, {2083, 81}, {4006, 84}},   /* Q ==10 : 62-69% */
+    {{1177,128}, {2379, 87}, {4785, 88}},   /* Q ==11 : 69-75% */
+    {{1242,128}, {2415, 93}, {5155, 84}},   /* Q ==12 : 75-81% */
+    {{1349,128}, {2644,106}, {5260,106}},   /* Q ==13 : 81-87% */
+    {{1455,128}, {2422,124}, {4174,124}},   /* Q ==14 : 87-93% */
+    {{ 722,128}, {1891,145}, {1936,146}},   /* Q ==15 : 93-99% */
+};
 #endif
 
-
-/* **************************************************************
-*  Error Management
-****************************************************************/
-#define HUF_isError ERR_isError
-#ifndef CHECK_F
-#define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
+/** HUF_selectDecoder() :
+ *  Tells which decoder is likely to decode faster,
+ *  based on a set of pre-computed metrics.
+ * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
+ *  Assumption : 0 < dstSize <= 128 KB */
+U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
+{
+    assert(dstSize > 0);
+    assert(dstSize <= 128*1024);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+    (void)dstSize;
+    (void)cSrcSize;
+    return 0;
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+    (void)dstSize;
+    (void)cSrcSize;
+    return 1;
+#else
+    /* decoder timing evaluation */
+    {   U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 */
+        U32 const D256 = (U32)(dstSize >> 8);
+        U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
+        U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
+        DTime1 += DTime1 >> 3;  /* advantage to algorithm using less memory, to reduce cache eviction */
+        return DTime1 < DTime0;
+    }
 #endif
+}
 
 
-/* **************************************************************
-*  Byte alignment for workSpace management
-****************************************************************/
-#define HUF_ALIGN(x, a)         HUF_ALIGN_MASK((x), (a) - 1)
-#define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
-
-
-/* **************************************************************
-*  BMI2 Variant Wrappers
-****************************************************************/
-#if DYNAMIC_BMI2
-
-#define HUF_DGEN(fn)                                                        \
-                                                                            \
-    static size_t fn##_default(                                             \
-                  void* dst,  size_t dstSize,                               \
-            const void* cSrc, size_t cSrcSize,                              \
-            const HUF_DTable* DTable)                                       \
-    {                                                                       \
-        return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
-    }                                                                       \
-                                                                            \
-    static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2(                       \
-                  void* dst,  size_t dstSize,                               \
-            const void* cSrc, size_t cSrcSize,                              \
-            const HUF_DTable* DTable)                                       \
-    {                                                                       \
-        return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
-    }                                                                       \
-                                                                            \
-    static size_t fn(void* dst, size_t dstSize, void const* cSrc,           \
-                     size_t cSrcSize, HUF_DTable const* DTable, int bmi2)   \
-    {                                                                       \
-        if (bmi2) {                                                         \
-            return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);         \
-        }                                                                   \
-        return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable);          \
-    }
+size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
+                                     size_t dstSize, const void* cSrc,
+                                     size_t cSrcSize, void* workSpace,
+                                     size_t wkspSize)
+{
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize == 0) return ERROR(corruption_detected);
 
+    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+        (void)algoNb;
+        assert(algoNb == 0);
+        return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+        (void)algoNb;
+        assert(algoNb == 1);
+        return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
 #else
-
-#define HUF_DGEN(fn)                                                        \
-    static size_t fn(void* dst, size_t dstSize, void const* cSrc,           \
-                     size_t cSrcSize, HUF_DTable const* DTable, int bmi2)   \
-    {                                                                       \
-        (void)bmi2;                                                         \
-        return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
+        return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
+                            cSrcSize, workSpace, wkspSize):
+                        HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
+#endif
     }
+}
 
-#endif
+size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+                                  const void* cSrc, size_t cSrcSize,
+                                  void* workSpace, size_t wkspSize)
+{
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
+    if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
+    if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
 
+    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+        (void)algoNb;
+        assert(algoNb == 0);
+        return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
+                                cSrcSize, workSpace, wkspSize);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+        (void)algoNb;
+        assert(algoNb == 1);
+        return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
+                                cSrcSize, workSpace, wkspSize);
+#else
+        return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
+                                cSrcSize, workSpace, wkspSize):
+                        HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
+                                cSrcSize, workSpace, wkspSize);
+#endif
+    }
+}
 
-/*-***************************/
-/*  generic DTableDesc       */
-/*-***************************/
-typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
 
-static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
+size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
 {
-    DTableDesc dtd;
-    memcpy(&dtd, table, sizeof(dtd));
-    return dtd;
+    DTableDesc const dtd = HUF_getDTableDesc(DTable);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+    (void)dtd;
+    assert(dtd.tableType == 0);
+    return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+    (void)dtd;
+    assert(dtd.tableType == 1);
+    return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+#else
+    return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
+                           HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+#endif
 }
 
-
 #ifndef HUF_FORCE_DECOMPRESS_X2
-
-/*-***************************/
-/*  single-symbol decoding   */
-/*-***************************/
-typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1;   /* single-symbol decoding */
-
-size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
+size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
 {
-    U32 tableLog = 0;
-    U32 nbSymbols = 0;
-    size_t iSize;
-    void* const dtPtr = DTable + 1;
-    HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
-
-    U32* rankVal;
-    BYTE* huffWeight;
-    size_t spaceUsed32 = 0;
+    const BYTE* ip = (const BYTE*) cSrc;
 
-    rankVal = (U32 *)workSpace + spaceUsed32;
-    spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;
-    huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32);
-    spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
+    size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize; cSrcSize -= hSize;
 
-    if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
+    return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
+}
+#endif
 
-    DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
-    /* memset(huffWeight, 0, sizeof(huffWeight)); */   /* is not necessary, even though some analyzer complain ... */
+size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
+{
+    DTableDesc const dtd = HUF_getDTableDesc(DTable);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+    (void)dtd;
+    assert(dtd.tableType == 0);
+    return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+    (void)dtd;
+    assert(dtd.tableType == 1);
+    return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+#else
+    return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
+                           HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+#endif
+}
 
-    iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
-    if (HUF_isError(iSize)) return iSize;
+size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
+{
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize == 0) return ERROR(corruption_detected);
 
-    /* Table header */
-    {   DTableDesc dtd = HUF_getDTableDesc(DTable);
-        if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge);   /* DTable too small, Huffman tree cannot fit in */
-        dtd.tableType = 0;
-        dtd.tableLog = (BYTE)tableLog;
-        memcpy(DTable, &dtd, sizeof(dtd));
+    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+        (void)algoNb;
+        assert(algoNb == 0);
+        return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+        (void)algoNb;
+        assert(algoNb == 1);
+        return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
+#else
+        return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
+                        HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
+#endif
     }
-
-    /* Calculate starting value for each rank */
-    {   U32 n, nextRankStart = 0;
-        for (n=1; n<tableLog+1; n++) {
-            U32 const current = nextRankStart;
-            nextRankStart += (rankVal[n] << (n-1));
-            rankVal[n] = current;
-    }   }
-
-    /* fill DTable */
-    {   U32 n;
-        size_t const nEnd = nbSymbols;
-        for (n=0; n<nEnd; n++) {
-            size_t const w = huffWeight[n];
-            size_t const length = (1 << w) >> 1;
-            size_t const uStart = rankVal[w];
-            size_t const uEnd = uStart + length;
-            size_t u;
-            HUF_DEltX1 D;
-            D.byte = (BYTE)n;
-            D.nbBits = (BYTE)(tableLog + 1 - w);
-            rankVal[w] = (U32)uEnd;
-            if (length < 4) {
-                /* Use length in the loop bound so the compiler knows it is short. */
-                for (u = 0; u < length; ++u)
-                    dt[uStart + u] = D;
-            } else {
-                /* Unroll the loop 4 times, we know it is a power of 2. */
-                for (u = uStart; u < uEnd; u += 4) {
-                    dt[u + 0] = D;
-                    dt[u + 1] = D;
-                    dt[u + 2] = D;
-                    dt[u + 3] = D;
-    }   }   }   }
-    return iSize;
 }
 
+#ifndef ZSTD_NO_UNUSED_FUNCTIONS
+#ifndef HUF_FORCE_DECOMPRESS_X2
 size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
 {
     U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
@@ -6863,1065 +9145,1108 @@ size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
                                  workSpace, sizeof(workSpace));
 }
 
-FORCE_INLINE_TEMPLATE BYTE
-HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
+size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
+                              const void* cSrc, size_t cSrcSize)
 {
-    size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
-    BYTE const c = dt[val].byte;
-    BIT_skipBits(Dstream, dt[val].nbBits);
-    return c;
+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+    return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
+                                       workSpace, sizeof(workSpace));
 }
 
-#define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
-    *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
+size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
+    return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
+}
+#endif
 
-#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr)  \
-    if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
-        HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
+{
+  U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+  return HUF_readDTableX2_wksp(DTable, src, srcSize,
+                               workSpace, sizeof(workSpace));
+}
+
+size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
+                              const void* cSrc, size_t cSrcSize)
+{
+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+    return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
+                                       workSpace, sizeof(workSpace));
+}
+
+size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
+    return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
+}
+#endif
+
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+    return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
+                                       workSpace, sizeof(workSpace));
+}
+size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
+    return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
+}
+#endif
 
-#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
-    if (MEM_64bits()) \
-        HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
+                              const void* cSrc, size_t cSrcSize)
+{
+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+    return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
+                                       workSpace, sizeof(workSpace));
+}
 
-HINT_INLINE size_t
-HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
+size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
 {
-    BYTE* const pStart = p;
+    HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
+    return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
+}
+#endif
 
-    /* up to 4 symbols at a time */
-    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
-        HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
-        HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
-        HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
-        HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
-    }
+typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
 
-    /* [0-3] symbols remaining */
-    if (MEM_32bits())
-        while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
-            HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
+size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
+    static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
+#endif
 
-    /* no more data to retrieve from bitstream, no need to reload */
-    while (p < pEnd)
-        HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
+    if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
+    if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
 
-    return pEnd-pStart;
+    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+        (void)algoNb;
+        assert(algoNb == 0);
+        return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+        (void)algoNb;
+        assert(algoNb == 1);
+        return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);
+#else
+        return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
+#endif
+    }
 }
 
-FORCE_INLINE_TEMPLATE size_t
-HUF_decompress1X1_usingDTable_internal_body(
-          void* dst,  size_t dstSize,
-    const void* cSrc, size_t cSrcSize,
-    const HUF_DTable* DTable)
+size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
 {
-    BYTE* op = (BYTE*)dst;
-    BYTE* const oend = op + dstSize;
-    const void* dtPtr = DTable + 1;
-    const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
-    BIT_DStream_t bitD;
-    DTableDesc const dtd = HUF_getDTableDesc(DTable);
-    U32 const dtLog = dtd.tableLog;
-
-    CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
-
-    HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
+    if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
+    if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
 
-    if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
+    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+        (void)algoNb;
+        assert(algoNb == 0);
+        return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+        (void)algoNb;
+        assert(algoNb == 1);
+        return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
+#else
+        return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
+                        HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
+#endif
+    }
+}
 
-    return dstSize;
+size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+    return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
+                                         workSpace, sizeof(workSpace));
 }
 
-FORCE_INLINE_TEMPLATE size_t
-HUF_decompress4X1_usingDTable_internal_body(
-          void* dst,  size_t dstSize,
-    const void* cSrc, size_t cSrcSize,
-    const HUF_DTable* DTable)
+size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
+                             const void* cSrc, size_t cSrcSize)
 {
-    /* Check */
-    if (cSrcSize < 10) return ERROR(corruption_detected);  /* strict minimum : jump table + 1 byte per stream */
+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+    return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
+                                      workSpace, sizeof(workSpace));
+}
+#endif
+/**** ended inlining decompress/huf_decompress.c ****/
+/**** start inlining decompress/zstd_ddict.c ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
 
-    {   const BYTE* const istart = (const BYTE*) cSrc;
-        BYTE* const ostart = (BYTE*) dst;
-        BYTE* const oend = ostart + dstSize;
-        BYTE* const olimit = oend - 3;
-        const void* const dtPtr = DTable + 1;
-        const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
+/* zstd_ddict.c :
+ * concentrates all logic that needs to know the internals of ZSTD_DDict object */
 
-        /* Init */
-        BIT_DStream_t bitD1;
-        BIT_DStream_t bitD2;
-        BIT_DStream_t bitD3;
-        BIT_DStream_t bitD4;
-        size_t const length1 = MEM_readLE16(istart);
-        size_t const length2 = MEM_readLE16(istart+2);
-        size_t const length3 = MEM_readLE16(istart+4);
-        size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
-        const BYTE* const istart1 = istart + 6;  /* jumpTable */
-        const BYTE* const istart2 = istart1 + length1;
-        const BYTE* const istart3 = istart2 + length2;
-        const BYTE* const istart4 = istart3 + length3;
-        const size_t segmentSize = (dstSize+3) / 4;
-        BYTE* const opStart2 = ostart + segmentSize;
-        BYTE* const opStart3 = opStart2 + segmentSize;
-        BYTE* const opStart4 = opStart3 + segmentSize;
-        BYTE* op1 = ostart;
-        BYTE* op2 = opStart2;
-        BYTE* op3 = opStart3;
-        BYTE* op4 = opStart4;
-        DTableDesc const dtd = HUF_getDTableDesc(DTable);
-        U32 const dtLog = dtd.tableLog;
-        U32 endSignal = 1;
+/*-*******************************************************
+*  Dependencies
+*********************************************************/
+/**** skipping file: ../common/zstd_deps.h ****/
+/**** start inlining ../common/cpu.h ****/
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
 
-        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
-        CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
-        CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
-        CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
-        CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
+#ifndef ZSTD_COMMON_CPU_H
+#define ZSTD_COMMON_CPU_H
 
-        /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
-        for ( ; (endSignal) & (op4 < olimit) ; ) {
-            HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
-            HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
-            HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
-            HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
-            HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
-            HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
-            HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
-            HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
-            HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
-            HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
-            HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
-            HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
-            HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
-            HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
-            HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
-            HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
-            endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
-            endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
-            endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
-            endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
-        }
+/**
+ * Implementation taken from folly/CpuId.h
+ * https://github.com/facebook/folly/blob/master/folly/CpuId.h
+ */
 
-        /* check corruption */
-        /* note : should not be necessary : op# advance in lock step, and we control op4.
-         *        but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
-        if (op1 > opStart2) return ERROR(corruption_detected);
-        if (op2 > opStart3) return ERROR(corruption_detected);
-        if (op3 > opStart4) return ERROR(corruption_detected);
-        /* note : op4 supposed already verified within main loop */
+/**** skipping file: mem.h ****/
 
-        /* finish bitStreams one by one */
-        HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);
-        HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);
-        HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);
-        HUF_decodeStreamX1(op4, &bitD4, oend,     dt, dtLog);
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
 
-        /* check */
-        { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
-          if (!endCheck) return ERROR(corruption_detected); }
+typedef struct {
+    U32 f1c;
+    U32 f1d;
+    U32 f7b;
+    U32 f7c;
+} ZSTD_cpuid_t;
 
-        /* decoded size */
-        return dstSize;
+MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
+    U32 f1c = 0;
+    U32 f1d = 0;
+    U32 f7b = 0;
+    U32 f7c = 0;
+#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
+    int reg[4];
+    __cpuid((int*)reg, 0);
+    {
+        int const n = reg[0];
+        if (n >= 1) {
+            __cpuid((int*)reg, 1);
+            f1c = (U32)reg[2];
+            f1d = (U32)reg[3];
+        }
+        if (n >= 7) {
+            __cpuidex((int*)reg, 7, 0);
+            f7b = (U32)reg[1];
+            f7c = (U32)reg[2];
+        }
+    }
+#elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__)
+    /* The following block like the normal cpuid branch below, but gcc
+     * reserves ebx for use of its pic register so we must specially
+     * handle the save and restore to avoid clobbering the register
+     */
+    U32 n;
+    __asm__(
+        "pushl %%ebx\n\t"
+        "cpuid\n\t"
+        "popl %%ebx\n\t"
+        : "=a"(n)
+        : "a"(0)
+        : "ecx", "edx");
+    if (n >= 1) {
+      U32 f1a;
+      __asm__(
+          "pushl %%ebx\n\t"
+          "cpuid\n\t"
+          "popl %%ebx\n\t"
+          : "=a"(f1a), "=c"(f1c), "=d"(f1d)
+          : "a"(1));
+    }
+    if (n >= 7) {
+      __asm__(
+          "pushl %%ebx\n\t"
+          "cpuid\n\t"
+          "movl %%ebx, %%eax\n\t"
+          "popl %%ebx"
+          : "=a"(f7b), "=c"(f7c)
+          : "a"(7), "c"(0)
+          : "edx");
+    }
+#elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__)
+    U32 n;
+    __asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx");
+    if (n >= 1) {
+      U32 f1a;
+      __asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx");
+    }
+    if (n >= 7) {
+      U32 f7a;
+      __asm__("cpuid"
+              : "=a"(f7a), "=b"(f7b), "=c"(f7c)
+              : "a"(7), "c"(0)
+              : "edx");
+    }
+#endif
+    {
+        ZSTD_cpuid_t cpuid;
+        cpuid.f1c = f1c;
+        cpuid.f1d = f1d;
+        cpuid.f7b = f7b;
+        cpuid.f7c = f7c;
+        return cpuid;
     }
 }
 
+#define X(name, r, bit)                                                        \
+  MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) {                 \
+    return ((cpuid.r) & (1U << bit)) != 0;                                     \
+  }
 
-typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
-                                               const void *cSrc,
-                                               size_t cSrcSize,
-                                               const HUF_DTable *DTable);
+/* cpuid(1): Processor Info and Feature Bits. */
+#define C(name, bit) X(name, f1c, bit)
+  C(sse3, 0)
+  C(pclmuldq, 1)
+  C(dtes64, 2)
+  C(monitor, 3)
+  C(dscpl, 4)
+  C(vmx, 5)
+  C(smx, 6)
+  C(eist, 7)
+  C(tm2, 8)
+  C(ssse3, 9)
+  C(cnxtid, 10)
+  C(fma, 12)
+  C(cx16, 13)
+  C(xtpr, 14)
+  C(pdcm, 15)
+  C(pcid, 17)
+  C(dca, 18)
+  C(sse41, 19)
+  C(sse42, 20)
+  C(x2apic, 21)
+  C(movbe, 22)
+  C(popcnt, 23)
+  C(tscdeadline, 24)
+  C(aes, 25)
+  C(xsave, 26)
+  C(osxsave, 27)
+  C(avx, 28)
+  C(f16c, 29)
+  C(rdrand, 30)
+#undef C
+#define D(name, bit) X(name, f1d, bit)
+  D(fpu, 0)
+  D(vme, 1)
+  D(de, 2)
+  D(pse, 3)
+  D(tsc, 4)
+  D(msr, 5)
+  D(pae, 6)
+  D(mce, 7)
+  D(cx8, 8)
+  D(apic, 9)
+  D(sep, 11)
+  D(mtrr, 12)
+  D(pge, 13)
+  D(mca, 14)
+  D(cmov, 15)
+  D(pat, 16)
+  D(pse36, 17)
+  D(psn, 18)
+  D(clfsh, 19)
+  D(ds, 21)
+  D(acpi, 22)
+  D(mmx, 23)
+  D(fxsr, 24)
+  D(sse, 25)
+  D(sse2, 26)
+  D(ss, 27)
+  D(htt, 28)
+  D(tm, 29)
+  D(pbe, 31)
+#undef D
 
-HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
-HUF_DGEN(HUF_decompress4X1_usingDTable_internal)
+/* cpuid(7): Extended Features. */
+#define B(name, bit) X(name, f7b, bit)
+  B(bmi1, 3)
+  B(hle, 4)
+  B(avx2, 5)
+  B(smep, 7)
+  B(bmi2, 8)
+  B(erms, 9)
+  B(invpcid, 10)
+  B(rtm, 11)
+  B(mpx, 14)
+  B(avx512f, 16)
+  B(avx512dq, 17)
+  B(rdseed, 18)
+  B(adx, 19)
+  B(smap, 20)
+  B(avx512ifma, 21)
+  B(pcommit, 22)
+  B(clflushopt, 23)
+  B(clwb, 24)
+  B(avx512pf, 26)
+  B(avx512er, 27)
+  B(avx512cd, 28)
+  B(sha, 29)
+  B(avx512bw, 30)
+  B(avx512vl, 31)
+#undef B
+#define C(name, bit) X(name, f7c, bit)
+  C(prefetchwt1, 0)
+  C(avx512vbmi, 1)
+#undef C
 
+#undef X
 
+#endif /* ZSTD_COMMON_CPU_H */
+/**** ended inlining ../common/cpu.h ****/
+/**** skipping file: ../common/mem.h ****/
+#define FSE_STATIC_LINKING_ONLY
+/**** skipping file: ../common/fse.h ****/
+#define HUF_STATIC_LINKING_ONLY
+/**** skipping file: ../common/huf.h ****/
+/**** start inlining zstd_decompress_internal.h ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
 
-size_t HUF_decompress1X1_usingDTable(
-          void* dst,  size_t dstSize,
-    const void* cSrc, size_t cSrcSize,
-    const HUF_DTable* DTable)
-{
-    DTableDesc dtd = HUF_getDTableDesc(DTable);
-    if (dtd.tableType != 0) return ERROR(GENERIC);
-    return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
-}
 
-size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
-                                   const void* cSrc, size_t cSrcSize,
-                                   void* workSpace, size_t wkspSize)
-{
-    const BYTE* ip = (const BYTE*) cSrc;
+/* zstd_decompress_internal:
+ * objects and definitions shared within lib/decompress modules */
 
-    size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
-    if (HUF_isError(hSize)) return hSize;
-    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
-    ip += hSize; cSrcSize -= hSize;
+ #ifndef ZSTD_DECOMPRESS_INTERNAL_H
+ #define ZSTD_DECOMPRESS_INTERNAL_H
 
-    return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
-}
 
+/*-*******************************************************
+ *  Dependencies
+ *********************************************************/
+/**** skipping file: ../common/mem.h ****/
+/**** skipping file: ../common/zstd_internal.h ****/
 
-size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
-                              const void* cSrc, size_t cSrcSize)
-{
-    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
-    return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
-                                       workSpace, sizeof(workSpace));
-}
 
-size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
-{
-    HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
-    return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
-}
 
-size_t HUF_decompress4X1_usingDTable(
-          void* dst,  size_t dstSize,
-    const void* cSrc, size_t cSrcSize,
-    const HUF_DTable* DTable)
-{
-    DTableDesc dtd = HUF_getDTableDesc(DTable);
-    if (dtd.tableType != 0) return ERROR(GENERIC);
-    return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
-}
+/*-*******************************************************
+ *  Constants
+ *********************************************************/
+static UNUSED_ATTR const U32 LL_base[MaxLL+1] = {
+                 0,    1,    2,     3,     4,     5,     6,      7,
+                 8,    9,   10,    11,    12,    13,    14,     15,
+                16,   18,   20,    22,    24,    28,    32,     40,
+                48,   64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
+                0x2000, 0x4000, 0x8000, 0x10000 };
 
-static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
-                                   const void* cSrc, size_t cSrcSize,
-                                   void* workSpace, size_t wkspSize, int bmi2)
-{
-    const BYTE* ip = (const BYTE*) cSrc;
+static UNUSED_ATTR const U32 OF_base[MaxOff+1] = {
+                 0,        1,       1,       5,     0xD,     0x1D,     0x3D,     0x7D,
+                 0xFD,   0x1FD,   0x3FD,   0x7FD,   0xFFD,   0x1FFD,   0x3FFD,   0x7FFD,
+                 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
+                 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
 
-    size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize,
-                                                workSpace, wkspSize);
-    if (HUF_isError(hSize)) return hSize;
-    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
-    ip += hSize; cSrcSize -= hSize;
+static UNUSED_ATTR const U32 OF_bits[MaxOff+1] = {
+                     0,  1,  2,  3,  4,  5,  6,  7,
+                     8,  9, 10, 11, 12, 13, 14, 15,
+                    16, 17, 18, 19, 20, 21, 22, 23,
+                    24, 25, 26, 27, 28, 29, 30, 31 };
 
-    return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
-}
+static UNUSED_ATTR const U32 ML_base[MaxML+1] = {
+                     3,  4,  5,    6,     7,     8,     9,    10,
+                    11, 12, 13,   14,    15,    16,    17,    18,
+                    19, 20, 21,   22,    23,    24,    25,    26,
+                    27, 28, 29,   30,    31,    32,    33,    34,
+                    35, 37, 39,   41,    43,    47,    51,    59,
+                    67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
+                    0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
 
-size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
-                                   const void* cSrc, size_t cSrcSize,
-                                   void* workSpace, size_t wkspSize)
-{
-    return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
-}
 
+/*-*******************************************************
+ *  Decompression types
+ *********************************************************/
+ typedef struct {
+     U32 fastMode;
+     U32 tableLog;
+ } ZSTD_seqSymbol_header;
+
+ typedef struct {
+     U16  nextState;
+     BYTE nbAdditionalBits;
+     BYTE nbBits;
+     U32  baseValue;
+ } ZSTD_seqSymbol;
 
-size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
-{
-    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
-    return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
-                                       workSpace, sizeof(workSpace));
-}
-size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
-{
-    HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
-    return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
-}
+ #define SEQSYMBOL_TABLE_SIZE(log)   (1 + (1 << (log)))
 
-#endif /* HUF_FORCE_DECOMPRESS_X2 */
+#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
+#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
 
+typedef struct {
+    ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)];    /* Note : Space reserved for FSE Tables */
+    ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)];   /* is also used as temporary workspace while building hufTable during DDict creation */
+    ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)];    /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
+    HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)];  /* can accommodate HUF_decompress4X */
+    U32 rep[ZSTD_REP_NUM];
+    U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
+} ZSTD_entropyDTables_t;
 
-#ifndef HUF_FORCE_DECOMPRESS_X1
+typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
+               ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock,
+               ZSTDds_decompressLastBlock, ZSTDds_checkChecksum,
+               ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage;
 
-/* *************************/
-/* double-symbols decoding */
-/* *************************/
+typedef enum { zdss_init=0, zdss_loadHeader,
+               zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage;
 
-typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2;  /* double-symbols decoding */
-typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
-typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
-typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
+typedef enum {
+    ZSTD_use_indefinitely = -1,  /* Use the dictionary indefinitely */
+    ZSTD_dont_use = 0,           /* Do not use the dictionary (if one exists free it) */
+    ZSTD_use_once = 1            /* Use the dictionary once and set to ZSTD_dont_use */
+} ZSTD_dictUses_e;
 
+/* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */
+typedef struct {
+    const ZSTD_DDict** ddictPtrTable;
+    size_t ddictPtrTableSize;
+    size_t ddictPtrCount;
+} ZSTD_DDictHashSet;
 
-/* HUF_fillDTableX2Level2() :
- * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
-static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed,
-                           const U32* rankValOrigin, const int minWeight,
-                           const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
-                           U32 nbBitsBaseline, U16 baseSeq)
+struct ZSTD_DCtx_s
 {
-    HUF_DEltX2 DElt;
-    U32 rankVal[HUF_TABLELOG_MAX + 1];
+    const ZSTD_seqSymbol* LLTptr;
+    const ZSTD_seqSymbol* MLTptr;
+    const ZSTD_seqSymbol* OFTptr;
+    const HUF_DTable* HUFptr;
+    ZSTD_entropyDTables_t entropy;
+    U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];   /* space needed when building huffman tables */
+    const void* previousDstEnd;   /* detect continuity */
+    const void* prefixStart;      /* start of current segment */
+    const void* virtualStart;     /* virtual start of previous segment if it was just before current one */
+    const void* dictEnd;          /* end of previous segment */
+    size_t expected;
+    ZSTD_frameHeader fParams;
+    U64 processedCSize;
+    U64 decodedSize;
+    blockType_e bType;            /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
+    ZSTD_dStage stage;
+    U32 litEntropy;
+    U32 fseEntropy;
+    XXH64_state_t xxhState;
+    size_t headerSize;
+    ZSTD_format_e format;
+    ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum;   /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */
+    U32 validateChecksum;         /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */
+    const BYTE* litPtr;
+    ZSTD_customMem customMem;
+    size_t litSize;
+    size_t rleSize;
+    size_t staticSize;
+    int bmi2;                     /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
 
-    /* get pre-calculated rankVal */
-    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+    /* dictionary */
+    ZSTD_DDict* ddictLocal;
+    const ZSTD_DDict* ddict;     /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */
+    U32 dictID;
+    int ddictIsCold;             /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
+    ZSTD_dictUses_e dictUses;
+    ZSTD_DDictHashSet* ddictSet;                    /* Hash set for multiple ddicts */
+    ZSTD_refMultipleDDicts_e refMultipleDDicts;     /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
 
-    /* fill skipped values */
-    if (minWeight>1) {
-        U32 i, skipSize = rankVal[minWeight];
-        MEM_writeLE16(&(DElt.sequence), baseSeq);
-        DElt.nbBits   = (BYTE)(consumed);
-        DElt.length   = 1;
-        for (i = 0; i < skipSize; i++)
-            DTable[i] = DElt;
-    }
+    /* streaming */
+    ZSTD_dStreamStage streamStage;
+    char*  inBuff;
+    size_t inBuffSize;
+    size_t inPos;
+    size_t maxWindowSize;
+    char*  outBuff;
+    size_t outBuffSize;
+    size_t outStart;
+    size_t outEnd;
+    size_t lhSize;
+    void* legacyContext;
+    U32 previousLegacyVersion;
+    U32 legacyVersion;
+    U32 hostageByte;
+    int noForwardProgress;
+    ZSTD_bufferMode_e outBufferMode;
+    ZSTD_outBuffer expectedOutBuffer;
 
-    /* fill DTable */
-    {   U32 s; for (s=0; s<sortedListSize; s++) {   /* note : sortedSymbols already skipped */
-            const U32 symbol = sortedSymbols[s].symbol;
-            const U32 weight = sortedSymbols[s].weight;
-            const U32 nbBits = nbBitsBaseline - weight;
-            const U32 length = 1 << (sizeLog-nbBits);
-            const U32 start = rankVal[weight];
-            U32 i = start;
-            const U32 end = start + length;
+    /* workspace */
+    BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
+    BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
 
-            MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
-            DElt.nbBits = (BYTE)(nbBits + consumed);
-            DElt.length = 2;
-            do { DTable[i++] = DElt; } while (i<end);   /* since length >= 1 */
+    size_t oversizedDuration;
 
-            rankVal[weight] += length;
-    }   }
-}
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    void const* dictContentBeginForFuzzing;
+    void const* dictContentEndForFuzzing;
+#endif
 
+    /* Tracing */
+#if ZSTD_TRACE
+    ZSTD_TraceCtx traceCtx;
+#endif
+};  /* typedef'd to ZSTD_DCtx within "zstd.h" */
 
-static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
-                           const sortedSymbol_t* sortedList, const U32 sortedListSize,
-                           const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
-                           const U32 nbBitsBaseline)
-{
-    U32 rankVal[HUF_TABLELOG_MAX + 1];
-    const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */
-    const U32 minBits  = nbBitsBaseline - maxWeight;
-    U32 s;
 
-    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+/*-*******************************************************
+ *  Shared internal functions
+ *********************************************************/
 
-    /* fill DTable */
-    for (s=0; s<sortedListSize; s++) {
-        const U16 symbol = sortedList[s].symbol;
-        const U32 weight = sortedList[s].weight;
-        const U32 nbBits = nbBitsBaseline - weight;
-        const U32 start = rankVal[weight];
-        const U32 length = 1 << (targetLog-nbBits);
+/*! ZSTD_loadDEntropy() :
+ *  dict : must point at beginning of a valid zstd dictionary.
+ * @return : size of dictionary header (size of magic number + dict ID + entropy tables) */
+size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
+                   const void* const dict, size_t const dictSize);
 
-        if (targetLog-nbBits >= minBits) {   /* enough room for a second symbol */
-            U32 sortedRank;
-            int minWeight = nbBits + scaleLog;
-            if (minWeight < 1) minWeight = 1;
-            sortedRank = rankStart[minWeight];
-            HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits,
-                           rankValOrigin[nbBits], minWeight,
-                           sortedList+sortedRank, sortedListSize-sortedRank,
-                           nbBitsBaseline, symbol);
-        } else {
-            HUF_DEltX2 DElt;
-            MEM_writeLE16(&(DElt.sequence), symbol);
-            DElt.nbBits = (BYTE)(nbBits);
-            DElt.length = 1;
-            {   U32 const end = start + length;
-                U32 u;
-                for (u = start; u < end; u++) DTable[u] = DElt;
-        }   }
-        rankVal[weight] += length;
-    }
-}
+/*! ZSTD_checkContinuity() :
+ *  check if next `dst` follows previous position, where decompression ended.
+ *  If yes, do nothing (continue on current segment).
+ *  If not, classify previous segment as "external dictionary", and start a new segment.
+ *  This function cannot fail. */
+void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize);
 
-size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
-                       const void* src, size_t srcSize,
-                             void* workSpace, size_t wkspSize)
-{
-    U32 tableLog, maxW, sizeOfSort, nbSymbols;
-    DTableDesc dtd = HUF_getDTableDesc(DTable);
-    U32 const maxTableLog = dtd.maxTableLog;
-    size_t iSize;
-    void* dtPtr = DTable+1;   /* force compiler to avoid strict-aliasing */
-    HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
-    U32 *rankStart;
 
-    rankValCol_t* rankVal;
-    U32* rankStats;
-    U32* rankStart0;
-    sortedSymbol_t* sortedSymbol;
-    BYTE* weightList;
-    size_t spaceUsed32 = 0;
-
-    rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32);
-    spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2;
-    rankStats = (U32 *)workSpace + spaceUsed32;
-    spaceUsed32 += HUF_TABLELOG_MAX + 1;
-    rankStart0 = (U32 *)workSpace + spaceUsed32;
-    spaceUsed32 += HUF_TABLELOG_MAX + 2;
-    sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t);
-    spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2;
-    weightList = (BYTE *)((U32 *)workSpace + spaceUsed32);
-    spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
-
-    if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
-
-    rankStart = rankStart0 + 1;
-    memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
+#endif /* ZSTD_DECOMPRESS_INTERNAL_H */
+/**** ended inlining zstd_decompress_internal.h ****/
+/**** start inlining zstd_ddict.h ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
 
-    DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable));   /* if compiler fails here, assertion is wrong */
-    if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
-    /* memset(weightList, 0, sizeof(weightList)); */  /* is not necessary, even though some analyzer complain ... */
+#ifndef ZSTD_DDICT_H
+#define ZSTD_DDICT_H
 
-    iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
-    if (HUF_isError(iSize)) return iSize;
+/*-*******************************************************
+ *  Dependencies
+ *********************************************************/
+/**** skipping file: ../common/zstd_deps.h ****/
+/**** skipping file: ../zstd.h ****/
 
-    /* check result */
-    if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */
 
-    /* find maxWeight */
-    for (maxW = tableLog; rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
+/*-*******************************************************
+ *  Interface
+ *********************************************************/
 
-    /* Get start index of each weight */
-    {   U32 w, nextRankStart = 0;
-        for (w=1; w<maxW+1; w++) {
-            U32 current = nextRankStart;
-            nextRankStart += rankStats[w];
-            rankStart[w] = current;
-        }
-        rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
-        sizeOfSort = nextRankStart;
-    }
+/* note: several prototypes are already published in `zstd.h` :
+ * ZSTD_createDDict()
+ * ZSTD_createDDict_byReference()
+ * ZSTD_createDDict_advanced()
+ * ZSTD_freeDDict()
+ * ZSTD_initStaticDDict()
+ * ZSTD_sizeof_DDict()
+ * ZSTD_estimateDDictSize()
+ * ZSTD_getDictID_fromDict()
+ */
 
-    /* sort symbols by weight */
-    {   U32 s;
-        for (s=0; s<nbSymbols; s++) {
-            U32 const w = weightList[s];
-            U32 const r = rankStart[w]++;
-            sortedSymbol[r].symbol = (BYTE)s;
-            sortedSymbol[r].weight = (BYTE)w;
-        }
-        rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
-    }
+const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict);
+size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict);
 
-    /* Build rankVal */
-    {   U32* const rankVal0 = rankVal[0];
-        {   int const rescale = (maxTableLog-tableLog) - 1;   /* tableLog <= maxTableLog */
-            U32 nextRankVal = 0;
-            U32 w;
-            for (w=1; w<maxW+1; w++) {
-                U32 current = nextRankVal;
-                nextRankVal += rankStats[w] << (w+rescale);
-                rankVal0[w] = current;
-        }   }
-        {   U32 const minBits = tableLog+1 - maxW;
-            U32 consumed;
-            for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
-                U32* const rankValPtr = rankVal[consumed];
-                U32 w;
-                for (w = 1; w < maxW+1; w++) {
-                    rankValPtr[w] = rankVal0[w] >> consumed;
-    }   }   }   }
+void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
 
-    HUF_fillDTableX2(dt, maxTableLog,
-                   sortedSymbol, sizeOfSort,
-                   rankStart0, rankVal, maxW,
-                   tableLog+1);
 
-    dtd.tableLog = (BYTE)maxTableLog;
-    dtd.tableType = 1;
-    memcpy(DTable, &dtd, sizeof(dtd));
-    return iSize;
-}
 
-size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
-{
-  U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
-  return HUF_readDTableX2_wksp(DTable, src, srcSize,
-                               workSpace, sizeof(workSpace));
-}
+#endif /* ZSTD_DDICT_H */
+/**** ended inlining zstd_ddict.h ****/
 
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
+/**** start inlining ../legacy/zstd_legacy.h ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
 
-FORCE_INLINE_TEMPLATE U32
-HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
-{
-    size_t const val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
-    memcpy(op, dt+val, 2);
-    BIT_skipBits(DStream, dt[val].nbBits);
-    return dt[val].length;
-}
+#ifndef ZSTD_LEGACY_H
+#define ZSTD_LEGACY_H
 
-FORCE_INLINE_TEMPLATE U32
-HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
-{
-    size_t const val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
-    memcpy(op, dt+val, 1);
-    if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
-    else {
-        if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
-            BIT_skipBits(DStream, dt[val].nbBits);
-            if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
-                /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
-                DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
-    }   }
-    return 1;
-}
+#if defined (__cplusplus)
+extern "C" {
+#endif
 
-#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
-    ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
+/* *************************************
+*  Includes
+***************************************/
+/**** skipping file: ../common/mem.h ****/
+/**** skipping file: ../common/error_private.h ****/
+/**** skipping file: ../common/zstd_internal.h ****/
 
-#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
-    if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
-        ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
+#if !defined (ZSTD_LEGACY_SUPPORT) || (ZSTD_LEGACY_SUPPORT == 0)
+#  undef ZSTD_LEGACY_SUPPORT
+#  define ZSTD_LEGACY_SUPPORT 8
+#endif
 
-#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
-    if (MEM_64bits()) \
-        ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
+#if (ZSTD_LEGACY_SUPPORT <= 1)
+/**** start inlining zstd_v01.h ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
 
-HINT_INLINE size_t
-HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
-                const HUF_DEltX2* const dt, const U32 dtLog)
-{
-    BYTE* const pStart = p;
+#ifndef ZSTD_V01_H_28739879432
+#define ZSTD_V01_H_28739879432
 
-    /* up to 8 symbols at a time */
-    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
-        HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
-        HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
-        HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
-        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
-    }
+#if defined (__cplusplus)
+extern "C" {
+#endif
 
-    /* closer to end : up to 2 symbols at a time */
-    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
-        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+/* *************************************
+*  Includes
+***************************************/
+#include <stddef.h>   /* size_t */
 
-    while (p <= pEnd-2)
-        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */
 
-    if (p < pEnd)
-        p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
+/* *************************************
+*  Simple one-step function
+***************************************/
+/**
+ZSTDv01_decompress() : decompress ZSTD frames compliant with v0.1.x format
+    compressedSize : is the exact source size
+    maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated.
+                      It must be equal or larger than originalSize, otherwise decompression will fail.
+    return : the number of bytes decompressed into destination buffer (originalSize)
+             or an errorCode if it fails (which can be tested using ZSTDv01_isError())
+*/
+size_t ZSTDv01_decompress( void* dst, size_t maxOriginalSize,
+                     const void* src, size_t compressedSize);
+
+ /**
+ ZSTDv01_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.1.x format
+     srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
+     cSize (output parameter)  : the number of bytes that would be read to decompress this frame
+                                 or an error code if it fails (which can be tested using ZSTDv01_isError())
+     dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
+                                 or ZSTD_CONTENTSIZE_ERROR if an error occurs
+
+     note : assumes `cSize` and `dBound` are _not_ NULL.
+ */
+void ZSTDv01_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
+                                     size_t* cSize, unsigned long long* dBound);
 
-    return p-pStart;
-}
+/**
+ZSTDv01_isError() : tells if the result of ZSTDv01_decompress() is an error
+*/
+unsigned ZSTDv01_isError(size_t code);
 
-FORCE_INLINE_TEMPLATE size_t
-HUF_decompress1X2_usingDTable_internal_body(
-          void* dst,  size_t dstSize,
-    const void* cSrc, size_t cSrcSize,
-    const HUF_DTable* DTable)
-{
-    BIT_DStream_t bitD;
 
-    /* Init */
-    CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
+/* *************************************
+*  Advanced functions
+***************************************/
+typedef struct ZSTDv01_Dctx_s ZSTDv01_Dctx;
+ZSTDv01_Dctx* ZSTDv01_createDCtx(void);
+size_t ZSTDv01_freeDCtx(ZSTDv01_Dctx* dctx);
 
-    /* decode */
-    {   BYTE* const ostart = (BYTE*) dst;
-        BYTE* const oend = ostart + dstSize;
-        const void* const dtPtr = DTable+1;   /* force compiler to not use strict-aliasing */
-        const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
-        DTableDesc const dtd = HUF_getDTableDesc(DTable);
-        HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);
-    }
+size_t ZSTDv01_decompressDCtx(void* ctx,
+                              void* dst, size_t maxOriginalSize,
+                        const void* src, size_t compressedSize);
 
-    /* check */
-    if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
+/* *************************************
+*  Streaming functions
+***************************************/
+size_t ZSTDv01_resetDCtx(ZSTDv01_Dctx* dctx);
 
-    /* decoded size */
-    return dstSize;
-}
+size_t ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx* dctx);
+size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
+/**
+  Use above functions alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block.
+  Result is the number of bytes regenerated within 'dst'.
+  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
+*/
 
-FORCE_INLINE_TEMPLATE size_t
-HUF_decompress4X2_usingDTable_internal_body(
-          void* dst,  size_t dstSize,
-    const void* cSrc, size_t cSrcSize,
-    const HUF_DTable* DTable)
-{
-    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+/* *************************************
+*  Prefix - version detection
+***************************************/
+#define ZSTDv01_magicNumber   0xFD2FB51E   /* Big Endian version */
+#define ZSTDv01_magicNumberLE 0x1EB52FFD   /* Little Endian version */
 
-    {   const BYTE* const istart = (const BYTE*) cSrc;
-        BYTE* const ostart = (BYTE*) dst;
-        BYTE* const oend = ostart + dstSize;
-        BYTE* const olimit = oend - (sizeof(size_t)-1);
-        const void* const dtPtr = DTable+1;
-        const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
 
-        /* Init */
-        BIT_DStream_t bitD1;
-        BIT_DStream_t bitD2;
-        BIT_DStream_t bitD3;
-        BIT_DStream_t bitD4;
-        size_t const length1 = MEM_readLE16(istart);
-        size_t const length2 = MEM_readLE16(istart+2);
-        size_t const length3 = MEM_readLE16(istart+4);
-        size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
-        const BYTE* const istart1 = istart + 6;  /* jumpTable */
-        const BYTE* const istart2 = istart1 + length1;
-        const BYTE* const istart3 = istart2 + length2;
-        const BYTE* const istart4 = istart3 + length3;
-        size_t const segmentSize = (dstSize+3) / 4;
-        BYTE* const opStart2 = ostart + segmentSize;
-        BYTE* const opStart3 = opStart2 + segmentSize;
-        BYTE* const opStart4 = opStart3 + segmentSize;
-        BYTE* op1 = ostart;
-        BYTE* op2 = opStart2;
-        BYTE* op3 = opStart3;
-        BYTE* op4 = opStart4;
-        U32 endSignal = 1;
-        DTableDesc const dtd = HUF_getDTableDesc(DTable);
-        U32 const dtLog = dtd.tableLog;
+#if defined (__cplusplus)
+}
+#endif
 
-        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
-        CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
-        CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
-        CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
-        CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
+#endif /* ZSTD_V01_H_28739879432 */
+/**** ended inlining zstd_v01.h ****/
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 2)
+/**** start inlining zstd_v02.h ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
 
-        /* 16-32 symbols per loop (4-8 symbols per stream) */
-        for ( ; (endSignal) & (op4 < olimit); ) {
-#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
-            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
-            HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
-            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
-            HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
-            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
-            HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
-            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
-            HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
-            endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
-            endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
-            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
-            HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
-            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
-            HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
-            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
-            HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
-            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
-            HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
-            endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
-            endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
-#else
-            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
-            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
-            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
-            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
-            HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
-            HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
-            HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
-            HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
-            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
-            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
-            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
-            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
-            HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
-            HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
-            HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
-            HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
-            endSignal = LIKELY(
-                        (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
-                      & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
-                      & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
-                      & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
+#ifndef ZSTD_V02_H_4174539423
+#define ZSTD_V02_H_4174539423
+
+#if defined (__cplusplus)
+extern "C" {
 #endif
-        }
 
-        /* check corruption */
-        if (op1 > opStart2) return ERROR(corruption_detected);
-        if (op2 > opStart3) return ERROR(corruption_detected);
-        if (op3 > opStart4) return ERROR(corruption_detected);
-        /* note : op4 already verified within main loop */
+/* *************************************
+*  Includes
+***************************************/
+#include <stddef.h>   /* size_t */
 
-        /* finish bitStreams one by one */
-        HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
-        HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
-        HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
-        HUF_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog);
 
-        /* check */
-        { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
-          if (!endCheck) return ERROR(corruption_detected); }
+/* *************************************
+*  Simple one-step function
+***************************************/
+/**
+ZSTDv02_decompress() : decompress ZSTD frames compliant with v0.2.x format
+    compressedSize : is the exact source size
+    maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated.
+                      It must be equal or larger than originalSize, otherwise decompression will fail.
+    return : the number of bytes decompressed into destination buffer (originalSize)
+             or an errorCode if it fails (which can be tested using ZSTDv01_isError())
+*/
+size_t ZSTDv02_decompress( void* dst, size_t maxOriginalSize,
+                     const void* src, size_t compressedSize);
+
+ /**
+ ZSTDv02_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.2.x format
+     srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
+     cSize (output parameter)  : the number of bytes that would be read to decompress this frame
+                                 or an error code if it fails (which can be tested using ZSTDv01_isError())
+     dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
+                                 or ZSTD_CONTENTSIZE_ERROR if an error occurs
+
+    note : assumes `cSize` and `dBound` are _not_ NULL.
+ */
+void ZSTDv02_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
+                                     size_t* cSize, unsigned long long* dBound);
 
-        /* decoded size */
-        return dstSize;
-    }
-}
+/**
+ZSTDv02_isError() : tells if the result of ZSTDv02_decompress() is an error
+*/
+unsigned ZSTDv02_isError(size_t code);
 
-HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
-HUF_DGEN(HUF_decompress4X2_usingDTable_internal)
 
-size_t HUF_decompress1X2_usingDTable(
-          void* dst,  size_t dstSize,
-    const void* cSrc, size_t cSrcSize,
-    const HUF_DTable* DTable)
-{
-    DTableDesc dtd = HUF_getDTableDesc(DTable);
-    if (dtd.tableType != 1) return ERROR(GENERIC);
-    return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
-}
+/* *************************************
+*  Advanced functions
+***************************************/
+typedef struct ZSTDv02_Dctx_s ZSTDv02_Dctx;
+ZSTDv02_Dctx* ZSTDv02_createDCtx(void);
+size_t ZSTDv02_freeDCtx(ZSTDv02_Dctx* dctx);
 
-size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
-                                   const void* cSrc, size_t cSrcSize,
-                                   void* workSpace, size_t wkspSize)
-{
-    const BYTE* ip = (const BYTE*) cSrc;
+size_t ZSTDv02_decompressDCtx(void* ctx,
+                              void* dst, size_t maxOriginalSize,
+                        const void* src, size_t compressedSize);
 
-    size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
-                                               workSpace, wkspSize);
-    if (HUF_isError(hSize)) return hSize;
-    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
-    ip += hSize; cSrcSize -= hSize;
+/* *************************************
+*  Streaming functions
+***************************************/
+size_t ZSTDv02_resetDCtx(ZSTDv02_Dctx* dctx);
 
-    return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
-}
+size_t ZSTDv02_nextSrcSizeToDecompress(ZSTDv02_Dctx* dctx);
+size_t ZSTDv02_decompressContinue(ZSTDv02_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
+/**
+  Use above functions alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block.
+  Result is the number of bytes regenerated within 'dst'.
+  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
+*/
 
+/* *************************************
+*  Prefix - version detection
+***************************************/
+#define ZSTDv02_magicNumber 0xFD2FB522   /* v0.2 */
 
-size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
-                              const void* cSrc, size_t cSrcSize)
-{
-    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
-    return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
-                                       workSpace, sizeof(workSpace));
-}
 
-size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
-{
-    HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
-    return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
+#if defined (__cplusplus)
 }
+#endif
 
-size_t HUF_decompress4X2_usingDTable(
-          void* dst,  size_t dstSize,
-    const void* cSrc, size_t cSrcSize,
-    const HUF_DTable* DTable)
-{
-    DTableDesc dtd = HUF_getDTableDesc(DTable);
-    if (dtd.tableType != 1) return ERROR(GENERIC);
-    return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
-}
+#endif /* ZSTD_V02_H_4174539423 */
+/**** ended inlining zstd_v02.h ****/
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 3)
+/**** start inlining zstd_v03.h ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
 
-static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
-                                   const void* cSrc, size_t cSrcSize,
-                                   void* workSpace, size_t wkspSize, int bmi2)
-{
-    const BYTE* ip = (const BYTE*) cSrc;
+#ifndef ZSTD_V03_H_298734209782
+#define ZSTD_V03_H_298734209782
 
-    size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
-                                         workSpace, wkspSize);
-    if (HUF_isError(hSize)) return hSize;
-    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
-    ip += hSize; cSrcSize -= hSize;
+#if defined (__cplusplus)
+extern "C" {
+#endif
 
-    return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
-}
+/* *************************************
+*  Includes
+***************************************/
+#include <stddef.h>   /* size_t */
 
-size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
-                                   const void* cSrc, size_t cSrcSize,
-                                   void* workSpace, size_t wkspSize)
-{
-    return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
-}
 
+/* *************************************
+*  Simple one-step function
+***************************************/
+/**
+ZSTDv03_decompress() : decompress ZSTD frames compliant with v0.3.x format
+    compressedSize : is the exact source size
+    maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated.
+                      It must be equal or larger than originalSize, otherwise decompression will fail.
+    return : the number of bytes decompressed into destination buffer (originalSize)
+             or an errorCode if it fails (which can be tested using ZSTDv01_isError())
+*/
+size_t ZSTDv03_decompress( void* dst, size_t maxOriginalSize,
+                     const void* src, size_t compressedSize);
+
+ /**
+ ZSTDv03_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.3.x format
+     srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
+     cSize (output parameter)  : the number of bytes that would be read to decompress this frame
+                                 or an error code if it fails (which can be tested using ZSTDv01_isError())
+     dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
+                                 or ZSTD_CONTENTSIZE_ERROR if an error occurs
+
+    note : assumes `cSize` and `dBound` are _not_ NULL.
+ */
+ void ZSTDv03_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
+                                      size_t* cSize, unsigned long long* dBound);
 
-size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
-                              const void* cSrc, size_t cSrcSize)
-{
-    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
-    return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
-                                       workSpace, sizeof(workSpace));
-}
+    /**
+ZSTDv03_isError() : tells if the result of ZSTDv03_decompress() is an error
+*/
+unsigned ZSTDv03_isError(size_t code);
 
-size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
-{
-    HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
-    return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
-}
 
-#endif /* HUF_FORCE_DECOMPRESS_X1 */
+/* *************************************
+*  Advanced functions
+***************************************/
+typedef struct ZSTDv03_Dctx_s ZSTDv03_Dctx;
+ZSTDv03_Dctx* ZSTDv03_createDCtx(void);
+size_t ZSTDv03_freeDCtx(ZSTDv03_Dctx* dctx);
 
+size_t ZSTDv03_decompressDCtx(void* ctx,
+                              void* dst, size_t maxOriginalSize,
+                        const void* src, size_t compressedSize);
 
-/* ***********************************/
-/* Universal decompression selectors */
-/* ***********************************/
+/* *************************************
+*  Streaming functions
+***************************************/
+size_t ZSTDv03_resetDCtx(ZSTDv03_Dctx* dctx);
 
-size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
-                                    const void* cSrc, size_t cSrcSize,
-                                    const HUF_DTable* DTable)
-{
-    DTableDesc const dtd = HUF_getDTableDesc(DTable);
-#if defined(HUF_FORCE_DECOMPRESS_X1)
-    (void)dtd;
-    assert(dtd.tableType == 0);
-    return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
-#elif defined(HUF_FORCE_DECOMPRESS_X2)
-    (void)dtd;
-    assert(dtd.tableType == 1);
-    return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
-#else
-    return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
-                           HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
-#endif
-}
+size_t ZSTDv03_nextSrcSizeToDecompress(ZSTDv03_Dctx* dctx);
+size_t ZSTDv03_decompressContinue(ZSTDv03_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
+/**
+  Use above functions alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block.
+  Result is the number of bytes regenerated within 'dst'.
+  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
+*/
 
-size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
-                                    const void* cSrc, size_t cSrcSize,
-                                    const HUF_DTable* DTable)
-{
-    DTableDesc const dtd = HUF_getDTableDesc(DTable);
-#if defined(HUF_FORCE_DECOMPRESS_X1)
-    (void)dtd;
-    assert(dtd.tableType == 0);
-    return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
-#elif defined(HUF_FORCE_DECOMPRESS_X2)
-    (void)dtd;
-    assert(dtd.tableType == 1);
-    return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
-#else
-    return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
-                           HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
-#endif
-}
+/* *************************************
+*  Prefix - version detection
+***************************************/
+#define ZSTDv03_magicNumber 0xFD2FB523   /* v0.3 */
 
 
-#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
-typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
-static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
-{
-    /* single, double, quad */
-    {{0,0}, {1,1}, {2,2}},  /* Q==0 : impossible */
-    {{0,0}, {1,1}, {2,2}},  /* Q==1 : impossible */
-    {{  38,130}, {1313, 74}, {2151, 38}},   /* Q == 2 : 12-18% */
-    {{ 448,128}, {1353, 74}, {2238, 41}},   /* Q == 3 : 18-25% */
-    {{ 556,128}, {1353, 74}, {2238, 47}},   /* Q == 4 : 25-32% */
-    {{ 714,128}, {1418, 74}, {2436, 53}},   /* Q == 5 : 32-38% */
-    {{ 883,128}, {1437, 74}, {2464, 61}},   /* Q == 6 : 38-44% */
-    {{ 897,128}, {1515, 75}, {2622, 68}},   /* Q == 7 : 44-50% */
-    {{ 926,128}, {1613, 75}, {2730, 75}},   /* Q == 8 : 50-56% */
-    {{ 947,128}, {1729, 77}, {3359, 77}},   /* Q == 9 : 56-62% */
-    {{1107,128}, {2083, 81}, {4006, 84}},   /* Q ==10 : 62-69% */
-    {{1177,128}, {2379, 87}, {4785, 88}},   /* Q ==11 : 69-75% */
-    {{1242,128}, {2415, 93}, {5155, 84}},   /* Q ==12 : 75-81% */
-    {{1349,128}, {2644,106}, {5260,106}},   /* Q ==13 : 81-87% */
-    {{1455,128}, {2422,124}, {4174,124}},   /* Q ==14 : 87-93% */
-    {{ 722,128}, {1891,145}, {1936,146}},   /* Q ==15 : 93-99% */
-};
+#if defined (__cplusplus)
+}
 #endif
 
-/** HUF_selectDecoder() :
- *  Tells which decoder is likely to decode faster,
- *  based on a set of pre-computed metrics.
- * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
- *  Assumption : 0 < dstSize <= 128 KB */
-U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
-{
-    assert(dstSize > 0);
-    assert(dstSize <= 128*1024);
-#if defined(HUF_FORCE_DECOMPRESS_X1)
-    (void)dstSize;
-    (void)cSrcSize;
-    return 0;
-#elif defined(HUF_FORCE_DECOMPRESS_X2)
-    (void)dstSize;
-    (void)cSrcSize;
-    return 1;
-#else
-    /* decoder timing evaluation */
-    {   U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 */
-        U32 const D256 = (U32)(dstSize >> 8);
-        U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
-        U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
-        DTime1 += DTime1 >> 3;  /* advantage to algorithm using less memory, to reduce cache eviction */
-        return DTime1 < DTime0;
-    }
+#endif /* ZSTD_V03_H_298734209782 */
+/**** ended inlining zstd_v03.h ****/
 #endif
-}
-
+#if (ZSTD_LEGACY_SUPPORT <= 4)
+/**** start inlining zstd_v04.h ****/
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
 
-typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
+#ifndef ZSTD_V04_H_91868324769238
+#define ZSTD_V04_H_91868324769238
 
-size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
-{
-#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
-    static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
+#if defined (__cplusplus)
+extern "C" {
 #endif
 
-    /* validation checks */
-    if (dstSize == 0) return ERROR(dstSize_tooSmall);
-    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
-    if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
-    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
+/* *************************************
+*  Includes
+***************************************/
+#include <stddef.h>   /* size_t */
 
-    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
-#if defined(HUF_FORCE_DECOMPRESS_X1)
-        (void)algoNb;
-        assert(algoNb == 0);
-        return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize);
-#elif defined(HUF_FORCE_DECOMPRESS_X2)
-        (void)algoNb;
-        assert(algoNb == 1);
-        return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);
-#else
-        return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
-#endif
-    }
-}
 
-size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
-{
-    /* validation checks */
-    if (dstSize == 0) return ERROR(dstSize_tooSmall);
-    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
-    if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
-    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
+/* *************************************
+*  Simple one-step function
+***************************************/
+/**
+ZSTDv04_decompress() : decompress ZSTD frames compliant with v0.4.x format
+    compressedSize : is the exact source size
+    maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated.
+                      It must be equal or larger than originalSize, otherwise decompression will fail.
+    return : the number of bytes decompressed into destination buffer (originalSize)
+             or an errorCode if it fails (which can be tested using ZSTDv01_isError())
+*/
+size_t ZSTDv04_decompress( void* dst, size_t maxOriginalSize,
+                     const void* src, size_t compressedSize);
+
+ /**
+ ZSTDv04_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.4.x format
+     srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
+     cSize (output parameter)  : the number of bytes that would be read to decompress this frame
+                                 or an error code if it fails (which can be tested using ZSTDv01_isError())
+     dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
+                                 or ZSTD_CONTENTSIZE_ERROR if an error occurs
+
+    note : assumes `cSize` and `dBound` are _not_ NULL.
+ */
+ void ZSTDv04_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
+                                      size_t* cSize, unsigned long long* dBound);
+
+/**
+ZSTDv04_isError() : tells if the result of ZSTDv04_decompress() is an error
+*/
+unsigned ZSTDv04_isError(size_t code);
+
 
-    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
-#if defined(HUF_FORCE_DECOMPRESS_X1)
-        (void)algoNb;
-        assert(algoNb == 0);
-        return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
-#elif defined(HUF_FORCE_DECOMPRESS_X2)
-        (void)algoNb;
-        assert(algoNb == 1);
-        return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
-#else
-        return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
-                        HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
-#endif
-    }
-}
+/* *************************************
+*  Advanced functions
+***************************************/
+typedef struct ZSTDv04_Dctx_s ZSTDv04_Dctx;
+ZSTDv04_Dctx* ZSTDv04_createDCtx(void);
+size_t ZSTDv04_freeDCtx(ZSTDv04_Dctx* dctx);
 
-size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
-{
-    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
-    return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
-                                         workSpace, sizeof(workSpace));
-}
+size_t ZSTDv04_decompressDCtx(ZSTDv04_Dctx* dctx,
+                              void* dst, size_t maxOriginalSize,
+                        const void* src, size_t compressedSize);
 
 
-size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
-                                     size_t dstSize, const void* cSrc,
-                                     size_t cSrcSize, void* workSpace,
-                                     size_t wkspSize)
-{
-    /* validation checks */
-    if (dstSize == 0) return ERROR(dstSize_tooSmall);
-    if (cSrcSize == 0) return ERROR(corruption_detected);
+/* *************************************
+*  Direct Streaming
+***************************************/
+size_t ZSTDv04_resetDCtx(ZSTDv04_Dctx* dctx);
 
-    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
-#if defined(HUF_FORCE_DECOMPRESS_X1)
-        (void)algoNb;
-        assert(algoNb == 0);
-        return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
-#elif defined(HUF_FORCE_DECOMPRESS_X2)
-        (void)algoNb;
-        assert(algoNb == 1);
-        return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
-#else
-        return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
-                            cSrcSize, workSpace, wkspSize):
-                        HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
-#endif
-    }
-}
+size_t ZSTDv04_nextSrcSizeToDecompress(ZSTDv04_Dctx* dctx);
+size_t ZSTDv04_decompressContinue(ZSTDv04_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
+/**
+  Use above functions alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block.
+  Result is the number of bytes regenerated within 'dst'.
+  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
+*/
 
-size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
-                                  const void* cSrc, size_t cSrcSize,
-                                  void* workSpace, size_t wkspSize)
-{
-    /* validation checks */
-    if (dstSize == 0) return ERROR(dstSize_tooSmall);
-    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
-    if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
-    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
 
-    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
-#if defined(HUF_FORCE_DECOMPRESS_X1)
-        (void)algoNb;
-        assert(algoNb == 0);
-        return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
-                                cSrcSize, workSpace, wkspSize);
-#elif defined(HUF_FORCE_DECOMPRESS_X2)
-        (void)algoNb;
-        assert(algoNb == 1);
-        return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
-                                cSrcSize, workSpace, wkspSize);
-#else
-        return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
-                                cSrcSize, workSpace, wkspSize):
-                        HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
-                                cSrcSize, workSpace, wkspSize);
-#endif
-    }
-}
+/* *************************************
+*  Buffered Streaming
+***************************************/
+typedef struct ZBUFFv04_DCtx_s ZBUFFv04_DCtx;
+ZBUFFv04_DCtx* ZBUFFv04_createDCtx(void);
+size_t         ZBUFFv04_freeDCtx(ZBUFFv04_DCtx* dctx);
 
-size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
-                             const void* cSrc, size_t cSrcSize)
-{
-    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
-    return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
-                                      workSpace, sizeof(workSpace));
-}
+size_t ZBUFFv04_decompressInit(ZBUFFv04_DCtx* dctx);
+size_t ZBUFFv04_decompressWithDictionary(ZBUFFv04_DCtx* dctx, const void* dict, size_t dictSize);
 
+size_t ZBUFFv04_decompressContinue(ZBUFFv04_DCtx* dctx, void* dst, size_t* maxDstSizePtr, const void* src, size_t* srcSizePtr);
 
-size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
-{
-    DTableDesc const dtd = HUF_getDTableDesc(DTable);
-#if defined(HUF_FORCE_DECOMPRESS_X1)
-    (void)dtd;
-    assert(dtd.tableType == 0);
-    return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
-#elif defined(HUF_FORCE_DECOMPRESS_X2)
-    (void)dtd;
-    assert(dtd.tableType == 1);
-    return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
-#else
-    return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
-                           HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
-#endif
-}
+/** ************************************************
+*  Streaming decompression
+*
+*  A ZBUFF_DCtx object is required to track streaming operation.
+*  Use ZBUFF_createDCtx() and ZBUFF_freeDCtx() to create/release resources.
+*  Use ZBUFF_decompressInit() to start a new decompression operation.
+*  ZBUFF_DCtx objects can be reused multiple times.
+*
+*  Optionally, a reference to a static dictionary can be set, using ZBUFF_decompressWithDictionary()
+*  It must be the same content as the one set during compression phase.
+*  Dictionary content must remain accessible during the decompression process.
+*
+*  Use ZBUFF_decompressContinue() repetitively to consume your input.
+*  *srcSizePtr and *maxDstSizePtr can be any size.
+*  The function will report how many bytes were read or written by modifying *srcSizePtr and *maxDstSizePtr.
+*  Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again.
+*  The content of dst will be overwritten (up to *maxDstSizePtr) at each function call, so save its content if it matters or change dst.
+*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency)
+*            or 0 when a frame is completely decoded
+*            or an error code, which can be tested using ZBUFF_isError().
+*
+*  Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedDInSize / ZBUFF_recommendedDOutSize
+*  output : ZBUFF_recommendedDOutSize==128 KB block size is the internal unit, it ensures it's always possible to write a full block when it's decoded.
+*  input : ZBUFF_recommendedDInSize==128Kb+3; just follow indications from ZBUFF_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
+* **************************************************/
+unsigned ZBUFFv04_isError(size_t errorCode);
+const char* ZBUFFv04_getErrorName(size_t errorCode);
 
-#ifndef HUF_FORCE_DECOMPRESS_X2
-size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
-{
-    const BYTE* ip = (const BYTE*) cSrc;
 
-    size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize);
-    if (HUF_isError(hSize)) return hSize;
-    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
-    ip += hSize; cSrcSize -= hSize;
+/** The below functions provide recommended buffer sizes for Compression or Decompression operations.
+*   These sizes are not compulsory, they just tend to offer better latency */
+size_t ZBUFFv04_recommendedDInSize(void);
+size_t ZBUFFv04_recommendedDOutSize(void);
 
-    return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
-}
-#endif
 
-size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
-{
-    DTableDesc const dtd = HUF_getDTableDesc(DTable);
-#if defined(HUF_FORCE_DECOMPRESS_X1)
-    (void)dtd;
-    assert(dtd.tableType == 0);
-    return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
-#elif defined(HUF_FORCE_DECOMPRESS_X2)
-    (void)dtd;
-    assert(dtd.tableType == 1);
-    return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
-#else
-    return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
-                           HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
-#endif
-}
+/* *************************************
+*  Prefix - version detection
+***************************************/
+#define ZSTDv04_magicNumber 0xFD2FB524   /* v0.4 */
 
-size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
-{
-    /* validation checks */
-    if (dstSize == 0) return ERROR(dstSize_tooSmall);
-    if (cSrcSize == 0) return ERROR(corruption_detected);
 
-    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
-#if defined(HUF_FORCE_DECOMPRESS_X1)
-        (void)algoNb;
-        assert(algoNb == 0);
-        return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
-#elif defined(HUF_FORCE_DECOMPRESS_X2)
-        (void)algoNb;
-        assert(algoNb == 1);
-        return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
-#else
-        return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
-                        HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
-#endif
-    }
+#if defined (__cplusplus)
 }
-/**** ended inlining huf_decompress.c ****/
-/**** start inlining zstd_ddict.c ****/
+#endif
+
+#endif /* ZSTD_V04_H_91868324769238 */
+/**** ended inlining zstd_v04.h ****/
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 5)
+/**** start inlining zstd_v05.h ****/
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -7930,16 +10255,164 @@ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t ds
  * You may select, at your option, one of the above-listed licenses.
  */
 
-/* zstd_ddict.c :
- * concentrates all logic that needs to know the internals of ZSTD_DDict object */
+#ifndef ZSTDv05_H
+#define ZSTDv05_H
 
-/*-*******************************************************
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*-*************************************
 *  Dependencies
-*********************************************************/
-#include <string.h>      /* memcpy, memmove, memset */
-/**** start inlining cpu.h ****/
+***************************************/
+#include <stddef.h>   /* size_t */
+/**** skipping file: ../common/mem.h ****/
+
+
+/* *************************************
+*  Simple functions
+***************************************/
+/*! ZSTDv05_decompress() :
+    `compressedSize` : is the _exact_ size of the compressed blob, otherwise decompression will fail.
+    `dstCapacity` must be large enough, equal or larger than originalSize.
+    @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
+              or an errorCode if it fails (which can be tested using ZSTDv05_isError()) */
+size_t ZSTDv05_decompress( void* dst, size_t dstCapacity,
+                     const void* src, size_t compressedSize);
+
+ /**
+ ZSTDv05_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.5.x format
+     srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
+     cSize (output parameter)  : the number of bytes that would be read to decompress this frame
+                                 or an error code if it fails (which can be tested using ZSTDv01_isError())
+     dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
+                                 or ZSTD_CONTENTSIZE_ERROR if an error occurs
+
+    note : assumes `cSize` and `dBound` are _not_ NULL.
+ */
+void ZSTDv05_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
+                                     size_t* cSize, unsigned long long* dBound);
+
+/* *************************************
+*  Helper functions
+***************************************/
+/* Error Management */
+unsigned    ZSTDv05_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */
+const char* ZSTDv05_getErrorName(size_t code);     /*!< provides readable string for an error code */
+
+
+/* *************************************
+*  Explicit memory management
+***************************************/
+/** Decompression context */
+typedef struct ZSTDv05_DCtx_s ZSTDv05_DCtx;
+ZSTDv05_DCtx* ZSTDv05_createDCtx(void);
+size_t ZSTDv05_freeDCtx(ZSTDv05_DCtx* dctx);      /*!< @return : errorCode */
+
+/** ZSTDv05_decompressDCtx() :
+*   Same as ZSTDv05_decompress(), but requires an already allocated ZSTDv05_DCtx (see ZSTDv05_createDCtx()) */
+size_t ZSTDv05_decompressDCtx(ZSTDv05_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+
+/*-***********************
+*  Simple Dictionary API
+*************************/
+/*! ZSTDv05_decompress_usingDict() :
+*   Decompression using a pre-defined Dictionary content (see dictBuilder).
+*   Dictionary must be identical to the one used during compression, otherwise regenerated data will be corrupted.
+*   Note : dict can be NULL, in which case, it's equivalent to ZSTDv05_decompressDCtx() */
+size_t ZSTDv05_decompress_usingDict(ZSTDv05_DCtx* dctx,
+                                            void* dst, size_t dstCapacity,
+                                      const void* src, size_t srcSize,
+                                      const void* dict,size_t dictSize);
+
+/*-************************
+*  Advanced Streaming API
+***************************/
+typedef enum { ZSTDv05_fast, ZSTDv05_greedy, ZSTDv05_lazy, ZSTDv05_lazy2, ZSTDv05_btlazy2, ZSTDv05_opt, ZSTDv05_btopt } ZSTDv05_strategy;
+typedef struct {
+    U64 srcSize;
+    U32 windowLog;     /* the only useful information to retrieve */
+    U32 contentLog; U32 hashLog; U32 searchLog; U32 searchLength; U32 targetLength; ZSTDv05_strategy strategy;
+} ZSTDv05_parameters;
+size_t ZSTDv05_getFrameParams(ZSTDv05_parameters* params, const void* src, size_t srcSize);
+
+size_t ZSTDv05_decompressBegin_usingDict(ZSTDv05_DCtx* dctx, const void* dict, size_t dictSize);
+void   ZSTDv05_copyDCtx(ZSTDv05_DCtx* dstDCtx, const ZSTDv05_DCtx* srcDCtx);
+size_t ZSTDv05_nextSrcSizeToDecompress(ZSTDv05_DCtx* dctx);
+size_t ZSTDv05_decompressContinue(ZSTDv05_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+
+/*-***********************
+*  ZBUFF API
+*************************/
+typedef struct ZBUFFv05_DCtx_s ZBUFFv05_DCtx;
+ZBUFFv05_DCtx* ZBUFFv05_createDCtx(void);
+size_t         ZBUFFv05_freeDCtx(ZBUFFv05_DCtx* dctx);
+
+size_t ZBUFFv05_decompressInit(ZBUFFv05_DCtx* dctx);
+size_t ZBUFFv05_decompressInitDictionary(ZBUFFv05_DCtx* dctx, const void* dict, size_t dictSize);
+
+size_t ZBUFFv05_decompressContinue(ZBUFFv05_DCtx* dctx,
+                                            void* dst, size_t* dstCapacityPtr,
+                                      const void* src, size_t* srcSizePtr);
+
+/*-***************************************************************************
+*  Streaming decompression
+*
+*  A ZBUFFv05_DCtx object is required to track streaming operations.
+*  Use ZBUFFv05_createDCtx() and ZBUFFv05_freeDCtx() to create/release resources.
+*  Use ZBUFFv05_decompressInit() to start a new decompression operation,
+*   or ZBUFFv05_decompressInitDictionary() if decompression requires a dictionary.
+*  Note that ZBUFFv05_DCtx objects can be reused multiple times.
+*
+*  Use ZBUFFv05_decompressContinue() repetitively to consume your input.
+*  *srcSizePtr and *dstCapacityPtr can be any size.
+*  The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr.
+*  Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again.
+*  The content of @dst will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters or change @dst.
+*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency)
+*            or 0 when a frame is completely decoded
+*            or an error code, which can be tested using ZBUFFv05_isError().
+*
+*  Hint : recommended buffer sizes (not compulsory) : ZBUFFv05_recommendedDInSize() / ZBUFFv05_recommendedDOutSize()
+*  output : ZBUFFv05_recommendedDOutSize==128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded.
+*  input  : ZBUFFv05_recommendedDInSize==128Kb+3; just follow indications from ZBUFFv05_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
+* *******************************************************************************/
+
+
+/* *************************************
+*  Tool functions
+***************************************/
+unsigned ZBUFFv05_isError(size_t errorCode);
+const char* ZBUFFv05_getErrorName(size_t errorCode);
+
+/** Functions below provide recommended buffer sizes for Compression or Decompression operations.
+*   These sizes are just hints, and tend to offer better latency */
+size_t ZBUFFv05_recommendedDInSize(void);
+size_t ZBUFFv05_recommendedDOutSize(void);
+
+
+
+/*-*************************************
+*  Constants
+***************************************/
+#define ZSTDv05_MAGICNUMBER 0xFD2FB525   /* v0.5 */
+
+
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* ZSTDv0505_H */
+/**** ended inlining zstd_v05.h ****/
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 6)
+/**** start inlining zstd_v06.h ****/
 /*
- * Copyright (c) 2018-2020, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -7948,220 +10421,174 @@ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t ds
  * You may select, at your option, one of the above-listed licenses.
  */
 
-#ifndef ZSTD_COMMON_CPU_H
-#define ZSTD_COMMON_CPU_H
+#ifndef ZSTDv06_H
+#define ZSTDv06_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*======  Dependency  ======*/
+#include <stddef.h>   /* size_t */
+
+
+/*======  Export for Windows  ======*/
+/*!
+*  ZSTDv06_DLL_EXPORT :
+*  Enable exporting of functions when building a Windows DLL
+*/
+#if defined(_WIN32) && defined(ZSTDv06_DLL_EXPORT) && (ZSTDv06_DLL_EXPORT==1)
+#  define ZSTDLIBv06_API __declspec(dllexport)
+#else
+#  define ZSTDLIBv06_API
+#endif
+
+
+/* *************************************
+*  Simple functions
+***************************************/
+/*! ZSTDv06_decompress() :
+    `compressedSize` : is the _exact_ size of the compressed blob, otherwise decompression will fail.
+    `dstCapacity` must be large enough, equal or larger than originalSize.
+    @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
+              or an errorCode if it fails (which can be tested using ZSTDv06_isError()) */
+ZSTDLIBv06_API size_t ZSTDv06_decompress( void* dst, size_t dstCapacity,
+                                    const void* src, size_t compressedSize);
+
+/**
+ZSTDv06_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.6.x format
+    srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
+    cSize (output parameter)  : the number of bytes that would be read to decompress this frame
+                                or an error code if it fails (which can be tested using ZSTDv01_isError())
+    dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
+                                or ZSTD_CONTENTSIZE_ERROR if an error occurs
+
+    note : assumes `cSize` and `dBound` are _not_ NULL.
+*/
+void ZSTDv06_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
+                                     size_t* cSize, unsigned long long* dBound);
+
+/* *************************************
+*  Helper functions
+***************************************/
+ZSTDLIBv06_API size_t      ZSTDv06_compressBound(size_t srcSize); /*!< maximum compressed size (worst case scenario) */
+
+/* Error Management */
+ZSTDLIBv06_API unsigned    ZSTDv06_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */
+ZSTDLIBv06_API const char* ZSTDv06_getErrorName(size_t code);     /*!< provides readable string for an error code */
+
+
+/* *************************************
+*  Explicit memory management
+***************************************/
+/** Decompression context */
+typedef struct ZSTDv06_DCtx_s ZSTDv06_DCtx;
+ZSTDLIBv06_API ZSTDv06_DCtx* ZSTDv06_createDCtx(void);
+ZSTDLIBv06_API size_t     ZSTDv06_freeDCtx(ZSTDv06_DCtx* dctx);      /*!< @return : errorCode */
+
+/** ZSTDv06_decompressDCtx() :
+*   Same as ZSTDv06_decompress(), but requires an already allocated ZSTDv06_DCtx (see ZSTDv06_createDCtx()) */
+ZSTDLIBv06_API size_t ZSTDv06_decompressDCtx(ZSTDv06_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+
+/*-***********************
+*  Dictionary API
+*************************/
+/*! ZSTDv06_decompress_usingDict() :
+*   Decompression using a pre-defined Dictionary content (see dictBuilder).
+*   Dictionary must be identical to the one used during compression, otherwise regenerated data will be corrupted.
+*   Note : dict can be NULL, in which case, it's equivalent to ZSTDv06_decompressDCtx() */
+ZSTDLIBv06_API size_t ZSTDv06_decompress_usingDict(ZSTDv06_DCtx* dctx,
+                                                   void* dst, size_t dstCapacity,
+                                             const void* src, size_t srcSize,
+                                             const void* dict,size_t dictSize);
+
+
+/*-************************
+*  Advanced Streaming API
+***************************/
+struct ZSTDv06_frameParams_s { unsigned long long frameContentSize; unsigned windowLog; };
+typedef struct ZSTDv06_frameParams_s ZSTDv06_frameParams;
+
+ZSTDLIBv06_API size_t ZSTDv06_getFrameParams(ZSTDv06_frameParams* fparamsPtr, const void* src, size_t srcSize);   /**< doesn't consume input */
+ZSTDLIBv06_API size_t ZSTDv06_decompressBegin_usingDict(ZSTDv06_DCtx* dctx, const void* dict, size_t dictSize);
+ZSTDLIBv06_API void   ZSTDv06_copyDCtx(ZSTDv06_DCtx* dctx, const ZSTDv06_DCtx* preparedDCtx);
+
+ZSTDLIBv06_API size_t ZSTDv06_nextSrcSizeToDecompress(ZSTDv06_DCtx* dctx);
+ZSTDLIBv06_API size_t ZSTDv06_decompressContinue(ZSTDv06_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+
 
-/**
- * Implementation taken from folly/CpuId.h
- * https://github.com/facebook/folly/blob/master/folly/CpuId.h
- */
+/* *************************************
+*  ZBUFF API
+***************************************/
 
-#include <string.h>
+typedef struct ZBUFFv06_DCtx_s ZBUFFv06_DCtx;
+ZSTDLIBv06_API ZBUFFv06_DCtx* ZBUFFv06_createDCtx(void);
+ZSTDLIBv06_API size_t         ZBUFFv06_freeDCtx(ZBUFFv06_DCtx* dctx);
 
-/**** skipping file: mem.h ****/
+ZSTDLIBv06_API size_t ZBUFFv06_decompressInit(ZBUFFv06_DCtx* dctx);
+ZSTDLIBv06_API size_t ZBUFFv06_decompressInitDictionary(ZBUFFv06_DCtx* dctx, const void* dict, size_t dictSize);
 
-#ifdef _MSC_VER
-#include <intrin.h>
-#endif
+ZSTDLIBv06_API size_t ZBUFFv06_decompressContinue(ZBUFFv06_DCtx* dctx,
+                                                  void* dst, size_t* dstCapacityPtr,
+                                            const void* src, size_t* srcSizePtr);
 
-typedef struct {
-    U32 f1c;
-    U32 f1d;
-    U32 f7b;
-    U32 f7c;
-} ZSTD_cpuid_t;
+/*-***************************************************************************
+*  Streaming decompression howto
+*
+*  A ZBUFFv06_DCtx object is required to track streaming operations.
+*  Use ZBUFFv06_createDCtx() and ZBUFFv06_freeDCtx() to create/release resources.
+*  Use ZBUFFv06_decompressInit() to start a new decompression operation,
+*   or ZBUFFv06_decompressInitDictionary() if decompression requires a dictionary.
+*  Note that ZBUFFv06_DCtx objects can be re-init multiple times.
+*
+*  Use ZBUFFv06_decompressContinue() repetitively to consume your input.
+*  *srcSizePtr and *dstCapacityPtr can be any size.
+*  The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr.
+*  Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again.
+*  The content of `dst` will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters, or change `dst`.
+*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency),
+*            or 0 when a frame is completely decoded,
+*            or an error code, which can be tested using ZBUFFv06_isError().
+*
+*  Hint : recommended buffer sizes (not compulsory) : ZBUFFv06_recommendedDInSize() and ZBUFFv06_recommendedDOutSize()
+*  output : ZBUFFv06_recommendedDOutSize== 128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded.
+*  input  : ZBUFFv06_recommendedDInSize == 128KB + 3;
+*           just follow indications from ZBUFFv06_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
+* *******************************************************************************/
 
-MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
-    U32 f1c = 0;
-    U32 f1d = 0;
-    U32 f7b = 0;
-    U32 f7c = 0;
-#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
-    int reg[4];
-    __cpuid((int*)reg, 0);
-    {
-        int const n = reg[0];
-        if (n >= 1) {
-            __cpuid((int*)reg, 1);
-            f1c = (U32)reg[2];
-            f1d = (U32)reg[3];
-        }
-        if (n >= 7) {
-            __cpuidex((int*)reg, 7, 0);
-            f7b = (U32)reg[1];
-            f7c = (U32)reg[2];
-        }
-    }
-#elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__)
-    /* The following block like the normal cpuid branch below, but gcc
-     * reserves ebx for use of its pic register so we must specially
-     * handle the save and restore to avoid clobbering the register
-     */
-    U32 n;
-    __asm__(
-        "pushl %%ebx\n\t"
-        "cpuid\n\t"
-        "popl %%ebx\n\t"
-        : "=a"(n)
-        : "a"(0)
-        : "ecx", "edx");
-    if (n >= 1) {
-      U32 f1a;
-      __asm__(
-          "pushl %%ebx\n\t"
-          "cpuid\n\t"
-          "popl %%ebx\n\t"
-          : "=a"(f1a), "=c"(f1c), "=d"(f1d)
-          : "a"(1));
-    }
-    if (n >= 7) {
-      __asm__(
-          "pushl %%ebx\n\t"
-          "cpuid\n\t"
-          "movl %%ebx, %%eax\n\t"
-          "popl %%ebx"
-          : "=a"(f7b), "=c"(f7c)
-          : "a"(7), "c"(0)
-          : "edx");
-    }
-#elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__)
-    U32 n;
-    __asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx");
-    if (n >= 1) {
-      U32 f1a;
-      __asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx");
-    }
-    if (n >= 7) {
-      U32 f7a;
-      __asm__("cpuid"
-              : "=a"(f7a), "=b"(f7b), "=c"(f7c)
-              : "a"(7), "c"(0)
-              : "edx");
-    }
-#endif
-    {
-        ZSTD_cpuid_t cpuid;
-        cpuid.f1c = f1c;
-        cpuid.f1d = f1d;
-        cpuid.f7b = f7b;
-        cpuid.f7c = f7c;
-        return cpuid;
-    }
-}
 
-#define X(name, r, bit)                                                        \
-  MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) {                 \
-    return ((cpuid.r) & (1U << bit)) != 0;                                     \
-  }
+/* *************************************
+*  Tool functions
+***************************************/
+ZSTDLIBv06_API unsigned ZBUFFv06_isError(size_t errorCode);
+ZSTDLIBv06_API const char* ZBUFFv06_getErrorName(size_t errorCode);
 
-/* cpuid(1): Processor Info and Feature Bits. */
-#define C(name, bit) X(name, f1c, bit)
-  C(sse3, 0)
-  C(pclmuldq, 1)
-  C(dtes64, 2)
-  C(monitor, 3)
-  C(dscpl, 4)
-  C(vmx, 5)
-  C(smx, 6)
-  C(eist, 7)
-  C(tm2, 8)
-  C(ssse3, 9)
-  C(cnxtid, 10)
-  C(fma, 12)
-  C(cx16, 13)
-  C(xtpr, 14)
-  C(pdcm, 15)
-  C(pcid, 17)
-  C(dca, 18)
-  C(sse41, 19)
-  C(sse42, 20)
-  C(x2apic, 21)
-  C(movbe, 22)
-  C(popcnt, 23)
-  C(tscdeadline, 24)
-  C(aes, 25)
-  C(xsave, 26)
-  C(osxsave, 27)
-  C(avx, 28)
-  C(f16c, 29)
-  C(rdrand, 30)
-#undef C
-#define D(name, bit) X(name, f1d, bit)
-  D(fpu, 0)
-  D(vme, 1)
-  D(de, 2)
-  D(pse, 3)
-  D(tsc, 4)
-  D(msr, 5)
-  D(pae, 6)
-  D(mce, 7)
-  D(cx8, 8)
-  D(apic, 9)
-  D(sep, 11)
-  D(mtrr, 12)
-  D(pge, 13)
-  D(mca, 14)
-  D(cmov, 15)
-  D(pat, 16)
-  D(pse36, 17)
-  D(psn, 18)
-  D(clfsh, 19)
-  D(ds, 21)
-  D(acpi, 22)
-  D(mmx, 23)
-  D(fxsr, 24)
-  D(sse, 25)
-  D(sse2, 26)
-  D(ss, 27)
-  D(htt, 28)
-  D(tm, 29)
-  D(pbe, 31)
-#undef D
+/** Functions below provide recommended buffer sizes for Compression or Decompression operations.
+*   These sizes are just hints, they tend to offer better latency */
+ZSTDLIBv06_API size_t ZBUFFv06_recommendedDInSize(void);
+ZSTDLIBv06_API size_t ZBUFFv06_recommendedDOutSize(void);
 
-/* cpuid(7): Extended Features. */
-#define B(name, bit) X(name, f7b, bit)
-  B(bmi1, 3)
-  B(hle, 4)
-  B(avx2, 5)
-  B(smep, 7)
-  B(bmi2, 8)
-  B(erms, 9)
-  B(invpcid, 10)
-  B(rtm, 11)
-  B(mpx, 14)
-  B(avx512f, 16)
-  B(avx512dq, 17)
-  B(rdseed, 18)
-  B(adx, 19)
-  B(smap, 20)
-  B(avx512ifma, 21)
-  B(pcommit, 22)
-  B(clflushopt, 23)
-  B(clwb, 24)
-  B(avx512pf, 26)
-  B(avx512er, 27)
-  B(avx512cd, 28)
-  B(sha, 29)
-  B(avx512bw, 30)
-  B(avx512vl, 31)
-#undef B
-#define C(name, bit) X(name, f7c, bit)
-  C(prefetchwt1, 0)
-  C(avx512vbmi, 1)
-#undef C
 
-#undef X
+/*-*************************************
+*  Constants
+***************************************/
+#define ZSTDv06_MAGICNUMBER 0xFD2FB526   /* v0.6 */
 
-#endif /* ZSTD_COMMON_CPU_H */
-/**** ended inlining cpu.h ****/
-/**** skipping file: mem.h ****/
-#define FSE_STATIC_LINKING_ONLY
-/**** skipping file: fse.h ****/
-#define HUF_STATIC_LINKING_ONLY
-/**** skipping file: huf.h ****/
-/**** start inlining zstd_decompress_internal.h ****/
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* ZSTDv06_BUFFERED_H */
+/**** ended inlining zstd_v06.h ****/
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 7)
+/**** start inlining zstd_v07.h ****/
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -8170,223 +10597,551 @@ MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
  * You may select, at your option, one of the above-listed licenses.
  */
 
+#ifndef ZSTDv07_H_235446
+#define ZSTDv07_H_235446
 
-/* zstd_decompress_internal:
- * objects and definitions shared within lib/decompress modules */
+#if defined (__cplusplus)
+extern "C" {
+#endif
 
- #ifndef ZSTD_DECOMPRESS_INTERNAL_H
- #define ZSTD_DECOMPRESS_INTERNAL_H
+/*======  Dependency  ======*/
+#include <stddef.h>   /* size_t */
 
 
-/*-*******************************************************
- *  Dependencies
- *********************************************************/
-/**** skipping file: mem.h ****/
-/**** skipping file: zstd_internal.h ****/
+/*======  Export for Windows  ======*/
+/*!
+*  ZSTDv07_DLL_EXPORT :
+*  Enable exporting of functions when building a Windows DLL
+*/
+#if defined(_WIN32) && defined(ZSTDv07_DLL_EXPORT) && (ZSTDv07_DLL_EXPORT==1)
+#  define ZSTDLIBv07_API __declspec(dllexport)
+#else
+#  define ZSTDLIBv07_API
+#endif
 
 
+/* *************************************
+*  Simple API
+***************************************/
+/*! ZSTDv07_getDecompressedSize() :
+*   @return : decompressed size if known, 0 otherwise.
+       note 1 : if `0`, follow up with ZSTDv07_getFrameParams() to know precise failure cause.
+       note 2 : decompressed size could be wrong or intentionally modified !
+                always ensure results fit within application's authorized limits */
+unsigned long long ZSTDv07_getDecompressedSize(const void* src, size_t srcSize);
+
+/*! ZSTDv07_decompress() :
+    `compressedSize` : must be _exact_ size of compressed input, otherwise decompression will fail.
+    `dstCapacity` must be equal or larger than originalSize.
+    @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
+              or an errorCode if it fails (which can be tested using ZSTDv07_isError()) */
+ZSTDLIBv07_API size_t ZSTDv07_decompress( void* dst, size_t dstCapacity,
+                                    const void* src, size_t compressedSize);
 
-/*-*******************************************************
- *  Constants
- *********************************************************/
-static const U32 LL_base[MaxLL+1] = {
-                 0,    1,    2,     3,     4,     5,     6,      7,
-                 8,    9,   10,    11,    12,    13,    14,     15,
-                16,   18,   20,    22,    24,    28,    32,     40,
-                48,   64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
-                0x2000, 0x4000, 0x8000, 0x10000 };
+/**
+ZSTDv07_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.7.x format
+    srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
+    cSize (output parameter)  : the number of bytes that would be read to decompress this frame
+                                or an error code if it fails (which can be tested using ZSTDv01_isError())
+    dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
+                                or ZSTD_CONTENTSIZE_ERROR if an error occurs
+
+    note : assumes `cSize` and `dBound` are _not_ NULL.
+*/
+void ZSTDv07_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
+                                     size_t* cSize, unsigned long long* dBound);
+
+/*======  Helper functions  ======*/
+ZSTDLIBv07_API unsigned    ZSTDv07_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */
+ZSTDLIBv07_API const char* ZSTDv07_getErrorName(size_t code);     /*!< provides readable string from an error code */
+
+
+/*-*************************************
+*  Explicit memory management
+***************************************/
+/** Decompression context */
+typedef struct ZSTDv07_DCtx_s ZSTDv07_DCtx;
+ZSTDLIBv07_API ZSTDv07_DCtx* ZSTDv07_createDCtx(void);
+ZSTDLIBv07_API size_t     ZSTDv07_freeDCtx(ZSTDv07_DCtx* dctx);      /*!< @return : errorCode */
+
+/** ZSTDv07_decompressDCtx() :
+*   Same as ZSTDv07_decompress(), requires an allocated ZSTDv07_DCtx (see ZSTDv07_createDCtx()) */
+ZSTDLIBv07_API size_t ZSTDv07_decompressDCtx(ZSTDv07_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
 
-static const U32 OF_base[MaxOff+1] = {
-                 0,        1,       1,       5,     0xD,     0x1D,     0x3D,     0x7D,
-                 0xFD,   0x1FD,   0x3FD,   0x7FD,   0xFFD,   0x1FFD,   0x3FFD,   0x7FFD,
-                 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
-                 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
 
-static const U32 OF_bits[MaxOff+1] = {
-                     0,  1,  2,  3,  4,  5,  6,  7,
-                     8,  9, 10, 11, 12, 13, 14, 15,
-                    16, 17, 18, 19, 20, 21, 22, 23,
-                    24, 25, 26, 27, 28, 29, 30, 31 };
+/*-************************
+*  Simple dictionary API
+***************************/
+/*! ZSTDv07_decompress_usingDict() :
+*   Decompression using a pre-defined Dictionary content (see dictBuilder).
+*   Dictionary must be identical to the one used during compression.
+*   Note : This function load the dictionary, resulting in a significant startup time */
+ZSTDLIBv07_API size_t ZSTDv07_decompress_usingDict(ZSTDv07_DCtx* dctx,
+                                                   void* dst, size_t dstCapacity,
+                                             const void* src, size_t srcSize,
+                                             const void* dict,size_t dictSize);
+
+
+/*-**************************
+*  Advanced Dictionary API
+****************************/
+/*! ZSTDv07_createDDict() :
+*   Create a digested dictionary, ready to start decompression operation without startup delay.
+*   `dict` can be released after creation */
+typedef struct ZSTDv07_DDict_s ZSTDv07_DDict;
+ZSTDLIBv07_API ZSTDv07_DDict* ZSTDv07_createDDict(const void* dict, size_t dictSize);
+ZSTDLIBv07_API size_t      ZSTDv07_freeDDict(ZSTDv07_DDict* ddict);
+
+/*! ZSTDv07_decompress_usingDDict() :
+*   Decompression using a pre-digested Dictionary
+*   Faster startup than ZSTDv07_decompress_usingDict(), recommended when same dictionary is used multiple times. */
+ZSTDLIBv07_API size_t ZSTDv07_decompress_usingDDict(ZSTDv07_DCtx* dctx,
+                                                    void* dst, size_t dstCapacity,
+                                              const void* src, size_t srcSize,
+                                              const ZSTDv07_DDict* ddict);
 
-static const U32 ML_base[MaxML+1] = {
-                     3,  4,  5,    6,     7,     8,     9,    10,
-                    11, 12, 13,   14,    15,    16,    17,    18,
-                    19, 20, 21,   22,    23,    24,    25,    26,
-                    27, 28, 29,   30,    31,    32,    33,    34,
-                    35, 37, 39,   41,    43,    47,    51,    59,
-                    67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
-                    0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
+typedef struct {
+    unsigned long long frameContentSize;
+    unsigned windowSize;
+    unsigned dictID;
+    unsigned checksumFlag;
+} ZSTDv07_frameParams;
 
+ZSTDLIBv07_API size_t ZSTDv07_getFrameParams(ZSTDv07_frameParams* fparamsPtr, const void* src, size_t srcSize);   /**< doesn't consume input */
 
-/*-*******************************************************
- *  Decompression types
- *********************************************************/
- typedef struct {
-     U32 fastMode;
-     U32 tableLog;
- } ZSTD_seqSymbol_header;
 
- typedef struct {
-     U16  nextState;
-     BYTE nbAdditionalBits;
-     BYTE nbBits;
-     U32  baseValue;
- } ZSTD_seqSymbol;
 
- #define SEQSYMBOL_TABLE_SIZE(log)   (1 + (1 << (log)))
 
-typedef struct {
-    ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)];    /* Note : Space reserved for FSE Tables */
-    ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)];   /* is also used as temporary workspace while building hufTable during DDict creation */
-    ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)];    /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
-    HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)];  /* can accommodate HUF_decompress4X */
-    U32 rep[ZSTD_REP_NUM];
-} ZSTD_entropyDTables_t;
+/* *************************************
+*  Streaming functions
+***************************************/
+typedef struct ZBUFFv07_DCtx_s ZBUFFv07_DCtx;
+ZSTDLIBv07_API ZBUFFv07_DCtx* ZBUFFv07_createDCtx(void);
+ZSTDLIBv07_API size_t      ZBUFFv07_freeDCtx(ZBUFFv07_DCtx* dctx);
 
-typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
-               ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock,
-               ZSTDds_decompressLastBlock, ZSTDds_checkChecksum,
-               ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage;
+ZSTDLIBv07_API size_t ZBUFFv07_decompressInit(ZBUFFv07_DCtx* dctx);
+ZSTDLIBv07_API size_t ZBUFFv07_decompressInitDictionary(ZBUFFv07_DCtx* dctx, const void* dict, size_t dictSize);
 
-typedef enum { zdss_init=0, zdss_loadHeader,
-               zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage;
+ZSTDLIBv07_API size_t ZBUFFv07_decompressContinue(ZBUFFv07_DCtx* dctx,
+                                            void* dst, size_t* dstCapacityPtr,
+                                      const void* src, size_t* srcSizePtr);
 
-typedef enum {
-    ZSTD_use_indefinitely = -1,  /* Use the dictionary indefinitely */
-    ZSTD_dont_use = 0,           /* Do not use the dictionary (if one exists free it) */
-    ZSTD_use_once = 1            /* Use the dictionary once and set to ZSTD_dont_use */
-} ZSTD_dictUses_e;
+/*-***************************************************************************
+*  Streaming decompression howto
+*
+*  A ZBUFFv07_DCtx object is required to track streaming operations.
+*  Use ZBUFFv07_createDCtx() and ZBUFFv07_freeDCtx() to create/release resources.
+*  Use ZBUFFv07_decompressInit() to start a new decompression operation,
+*   or ZBUFFv07_decompressInitDictionary() if decompression requires a dictionary.
+*  Note that ZBUFFv07_DCtx objects can be re-init multiple times.
+*
+*  Use ZBUFFv07_decompressContinue() repetitively to consume your input.
+*  *srcSizePtr and *dstCapacityPtr can be any size.
+*  The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr.
+*  Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again.
+*  The content of `dst` will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters, or change `dst`.
+*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency),
+*            or 0 when a frame is completely decoded,
+*            or an error code, which can be tested using ZBUFFv07_isError().
+*
+*  Hint : recommended buffer sizes (not compulsory) : ZBUFFv07_recommendedDInSize() and ZBUFFv07_recommendedDOutSize()
+*  output : ZBUFFv07_recommendedDOutSize== 128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded.
+*  input  : ZBUFFv07_recommendedDInSize == 128KB + 3;
+*           just follow indications from ZBUFFv07_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
+* *******************************************************************************/
 
-struct ZSTD_DCtx_s
-{
-    const ZSTD_seqSymbol* LLTptr;
-    const ZSTD_seqSymbol* MLTptr;
-    const ZSTD_seqSymbol* OFTptr;
-    const HUF_DTable* HUFptr;
-    ZSTD_entropyDTables_t entropy;
-    U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];   /* space needed when building huffman tables */
-    const void* previousDstEnd;   /* detect continuity */
-    const void* prefixStart;      /* start of current segment */
-    const void* virtualStart;     /* virtual start of previous segment if it was just before current one */
-    const void* dictEnd;          /* end of previous segment */
-    size_t expected;
-    ZSTD_frameHeader fParams;
-    U64 decodedSize;
-    blockType_e bType;            /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
-    ZSTD_dStage stage;
-    U32 litEntropy;
-    U32 fseEntropy;
-    XXH64_state_t xxhState;
-    size_t headerSize;
-    ZSTD_format_e format;
-    const BYTE* litPtr;
-    ZSTD_customMem customMem;
-    size_t litSize;
-    size_t rleSize;
-    size_t staticSize;
-    int bmi2;                     /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
 
-    /* dictionary */
-    ZSTD_DDict* ddictLocal;
-    const ZSTD_DDict* ddict;     /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */
-    U32 dictID;
-    int ddictIsCold;             /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
-    ZSTD_dictUses_e dictUses;
+/* *************************************
+*  Tool functions
+***************************************/
+ZSTDLIBv07_API unsigned ZBUFFv07_isError(size_t errorCode);
+ZSTDLIBv07_API const char* ZBUFFv07_getErrorName(size_t errorCode);
 
-    /* streaming */
-    ZSTD_dStreamStage streamStage;
-    char*  inBuff;
-    size_t inBuffSize;
-    size_t inPos;
-    size_t maxWindowSize;
-    char*  outBuff;
-    size_t outBuffSize;
-    size_t outStart;
-    size_t outEnd;
-    size_t lhSize;
-    void* legacyContext;
-    U32 previousLegacyVersion;
-    U32 legacyVersion;
-    U32 hostageByte;
-    int noForwardProgress;
+/** Functions below provide recommended buffer sizes for Compression or Decompression operations.
+*   These sizes are just hints, they tend to offer better latency */
+ZSTDLIBv07_API size_t ZBUFFv07_recommendedDInSize(void);
+ZSTDLIBv07_API size_t ZBUFFv07_recommendedDOutSize(void);
 
-    /* workspace */
-    BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
-    BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
-};  /* typedef'd to ZSTD_DCtx within "zstd.h" */
 
+/*-*************************************
+*  Constants
+***************************************/
+#define ZSTDv07_MAGICNUMBER            0xFD2FB527   /* v0.7 */
 
-/*-*******************************************************
- *  Shared internal functions
- *********************************************************/
 
-/*! ZSTD_loadDEntropy() :
- *  dict : must point at beginning of a valid zstd dictionary.
- * @return : size of dictionary header (size of magic number + dict ID + entropy tables) */
-size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
-                   const void* const dict, size_t const dictSize);
+#if defined (__cplusplus)
+}
+#endif
 
-/*! ZSTD_checkContinuity() :
- *  check if next `dst` follows previous position, where decompression ended.
- *  If yes, do nothing (continue on current segment).
- *  If not, classify previous segment as "external dictionary", and start a new segment.
- *  This function cannot fail. */
-void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst);
+#endif  /* ZSTDv07_H_235446 */
+/**** ended inlining zstd_v07.h ****/
+#endif
 
+/** ZSTD_isLegacy() :
+    @return : > 0 if supported by legacy decoder. 0 otherwise.
+              return value is the version.
+*/
+MEM_STATIC unsigned ZSTD_isLegacy(const void* src, size_t srcSize)
+{
+    U32 magicNumberLE;
+    if (srcSize<4) return 0;
+    magicNumberLE = MEM_readLE32(src);
+    switch(magicNumberLE)
+    {
+#if (ZSTD_LEGACY_SUPPORT <= 1)
+        case ZSTDv01_magicNumberLE:return 1;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 2)
+        case ZSTDv02_magicNumber : return 2;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 3)
+        case ZSTDv03_magicNumber : return 3;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 4)
+        case ZSTDv04_magicNumber : return 4;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 5)
+        case ZSTDv05_MAGICNUMBER : return 5;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 6)
+        case ZSTDv06_MAGICNUMBER : return 6;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 7)
+        case ZSTDv07_MAGICNUMBER : return 7;
+#endif
+        default : return 0;
+    }
+}
 
-#endif /* ZSTD_DECOMPRESS_INTERNAL_H */
-/**** ended inlining zstd_decompress_internal.h ****/
-/**** start inlining zstd_ddict.h ****/
-/*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
 
+MEM_STATIC unsigned long long ZSTD_getDecompressedSize_legacy(const void* src, size_t srcSize)
+{
+    U32 const version = ZSTD_isLegacy(src, srcSize);
+    if (version < 5) return 0;  /* no decompressed size in frame header, or not a legacy format */
+#if (ZSTD_LEGACY_SUPPORT <= 5)
+    if (version==5) {
+        ZSTDv05_parameters fParams;
+        size_t const frResult = ZSTDv05_getFrameParams(&fParams, src, srcSize);
+        if (frResult != 0) return 0;
+        return fParams.srcSize;
+    }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 6)
+    if (version==6) {
+        ZSTDv06_frameParams fParams;
+        size_t const frResult = ZSTDv06_getFrameParams(&fParams, src, srcSize);
+        if (frResult != 0) return 0;
+        return fParams.frameContentSize;
+    }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 7)
+    if (version==7) {
+        ZSTDv07_frameParams fParams;
+        size_t const frResult = ZSTDv07_getFrameParams(&fParams, src, srcSize);
+        if (frResult != 0) return 0;
+        return fParams.frameContentSize;
+    }
+#endif
+    return 0;   /* should not be possible */
+}
 
-#ifndef ZSTD_DDICT_H
-#define ZSTD_DDICT_H
 
-/*-*******************************************************
- *  Dependencies
- *********************************************************/
-#include <stddef.h>   /* size_t */
-/**** skipping file: zstd.h ****/
+MEM_STATIC size_t ZSTD_decompressLegacy(
+                     void* dst, size_t dstCapacity,
+               const void* src, size_t compressedSize,
+               const void* dict,size_t dictSize)
+{
+    U32 const version = ZSTD_isLegacy(src, compressedSize);
+    (void)dst; (void)dstCapacity; (void)dict; (void)dictSize;  /* unused when ZSTD_LEGACY_SUPPORT >= 8 */
+    switch(version)
+    {
+#if (ZSTD_LEGACY_SUPPORT <= 1)
+        case 1 :
+            return ZSTDv01_decompress(dst, dstCapacity, src, compressedSize);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 2)
+        case 2 :
+            return ZSTDv02_decompress(dst, dstCapacity, src, compressedSize);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 3)
+        case 3 :
+            return ZSTDv03_decompress(dst, dstCapacity, src, compressedSize);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 4)
+        case 4 :
+            return ZSTDv04_decompress(dst, dstCapacity, src, compressedSize);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 5)
+        case 5 :
+            {   size_t result;
+                ZSTDv05_DCtx* const zd = ZSTDv05_createDCtx();
+                if (zd==NULL) return ERROR(memory_allocation);
+                result = ZSTDv05_decompress_usingDict(zd, dst, dstCapacity, src, compressedSize, dict, dictSize);
+                ZSTDv05_freeDCtx(zd);
+                return result;
+            }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 6)
+        case 6 :
+            {   size_t result;
+                ZSTDv06_DCtx* const zd = ZSTDv06_createDCtx();
+                if (zd==NULL) return ERROR(memory_allocation);
+                result = ZSTDv06_decompress_usingDict(zd, dst, dstCapacity, src, compressedSize, dict, dictSize);
+                ZSTDv06_freeDCtx(zd);
+                return result;
+            }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 7)
+        case 7 :
+            {   size_t result;
+                ZSTDv07_DCtx* const zd = ZSTDv07_createDCtx();
+                if (zd==NULL) return ERROR(memory_allocation);
+                result = ZSTDv07_decompress_usingDict(zd, dst, dstCapacity, src, compressedSize, dict, dictSize);
+                ZSTDv07_freeDCtx(zd);
+                return result;
+            }
+#endif
+        default :
+            return ERROR(prefix_unknown);
+    }
+}
+
+MEM_STATIC ZSTD_frameSizeInfo ZSTD_findFrameSizeInfoLegacy(const void *src, size_t srcSize)
+{
+    ZSTD_frameSizeInfo frameSizeInfo;
+    U32 const version = ZSTD_isLegacy(src, srcSize);
+    switch(version)
+    {
+#if (ZSTD_LEGACY_SUPPORT <= 1)
+        case 1 :
+            ZSTDv01_findFrameSizeInfoLegacy(src, srcSize,
+                &frameSizeInfo.compressedSize,
+                &frameSizeInfo.decompressedBound);
+            break;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 2)
+        case 2 :
+            ZSTDv02_findFrameSizeInfoLegacy(src, srcSize,
+                &frameSizeInfo.compressedSize,
+                &frameSizeInfo.decompressedBound);
+            break;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 3)
+        case 3 :
+            ZSTDv03_findFrameSizeInfoLegacy(src, srcSize,
+                &frameSizeInfo.compressedSize,
+                &frameSizeInfo.decompressedBound);
+            break;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 4)
+        case 4 :
+            ZSTDv04_findFrameSizeInfoLegacy(src, srcSize,
+                &frameSizeInfo.compressedSize,
+                &frameSizeInfo.decompressedBound);
+            break;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 5)
+        case 5 :
+            ZSTDv05_findFrameSizeInfoLegacy(src, srcSize,
+                &frameSizeInfo.compressedSize,
+                &frameSizeInfo.decompressedBound);
+            break;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 6)
+        case 6 :
+            ZSTDv06_findFrameSizeInfoLegacy(src, srcSize,
+                &frameSizeInfo.compressedSize,
+                &frameSizeInfo.decompressedBound);
+            break;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 7)
+        case 7 :
+            ZSTDv07_findFrameSizeInfoLegacy(src, srcSize,
+                &frameSizeInfo.compressedSize,
+                &frameSizeInfo.decompressedBound);
+            break;
+#endif
+        default :
+            frameSizeInfo.compressedSize = ERROR(prefix_unknown);
+            frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
+            break;
+    }
+    if (!ZSTD_isError(frameSizeInfo.compressedSize) && frameSizeInfo.compressedSize > srcSize) {
+        frameSizeInfo.compressedSize = ERROR(srcSize_wrong);
+        frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
+    }
+    return frameSizeInfo;
+}
 
+MEM_STATIC size_t ZSTD_findFrameCompressedSizeLegacy(const void *src, size_t srcSize)
+{
+    ZSTD_frameSizeInfo frameSizeInfo = ZSTD_findFrameSizeInfoLegacy(src, srcSize);
+    return frameSizeInfo.compressedSize;
+}
 
-/*-*******************************************************
- *  Interface
- *********************************************************/
+MEM_STATIC size_t ZSTD_freeLegacyStreamContext(void* legacyContext, U32 version)
+{
+    switch(version)
+    {
+        default :
+        case 1 :
+        case 2 :
+        case 3 :
+            (void)legacyContext;
+            return ERROR(version_unsupported);
+#if (ZSTD_LEGACY_SUPPORT <= 4)
+        case 4 : return ZBUFFv04_freeDCtx((ZBUFFv04_DCtx*)legacyContext);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 5)
+        case 5 : return ZBUFFv05_freeDCtx((ZBUFFv05_DCtx*)legacyContext);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 6)
+        case 6 : return ZBUFFv06_freeDCtx((ZBUFFv06_DCtx*)legacyContext);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 7)
+        case 7 : return ZBUFFv07_freeDCtx((ZBUFFv07_DCtx*)legacyContext);
+#endif
+    }
+}
 
-/* note: several prototypes are already published in `zstd.h` :
- * ZSTD_createDDict()
- * ZSTD_createDDict_byReference()
- * ZSTD_createDDict_advanced()
- * ZSTD_freeDDict()
- * ZSTD_initStaticDDict()
- * ZSTD_sizeof_DDict()
- * ZSTD_estimateDDictSize()
- * ZSTD_getDictID_fromDict()
- */
 
-const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict);
-size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict);
+MEM_STATIC size_t ZSTD_initLegacyStream(void** legacyContext, U32 prevVersion, U32 newVersion,
+                                        const void* dict, size_t dictSize)
+{
+    DEBUGLOG(5, "ZSTD_initLegacyStream for v0.%u", newVersion);
+    if (prevVersion != newVersion) ZSTD_freeLegacyStreamContext(*legacyContext, prevVersion);
+    switch(newVersion)
+    {
+        default :
+        case 1 :
+        case 2 :
+        case 3 :
+            (void)dict; (void)dictSize;
+            return 0;
+#if (ZSTD_LEGACY_SUPPORT <= 4)
+        case 4 :
+        {
+            ZBUFFv04_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv04_createDCtx() : (ZBUFFv04_DCtx*)*legacyContext;
+            if (dctx==NULL) return ERROR(memory_allocation);
+            ZBUFFv04_decompressInit(dctx);
+            ZBUFFv04_decompressWithDictionary(dctx, dict, dictSize);
+            *legacyContext = dctx;
+            return 0;
+        }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 5)
+        case 5 :
+        {
+            ZBUFFv05_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv05_createDCtx() : (ZBUFFv05_DCtx*)*legacyContext;
+            if (dctx==NULL) return ERROR(memory_allocation);
+            ZBUFFv05_decompressInitDictionary(dctx, dict, dictSize);
+            *legacyContext = dctx;
+            return 0;
+        }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 6)
+        case 6 :
+        {
+            ZBUFFv06_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv06_createDCtx() : (ZBUFFv06_DCtx*)*legacyContext;
+            if (dctx==NULL) return ERROR(memory_allocation);
+            ZBUFFv06_decompressInitDictionary(dctx, dict, dictSize);
+            *legacyContext = dctx;
+            return 0;
+        }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 7)
+        case 7 :
+        {
+            ZBUFFv07_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv07_createDCtx() : (ZBUFFv07_DCtx*)*legacyContext;
+            if (dctx==NULL) return ERROR(memory_allocation);
+            ZBUFFv07_decompressInitDictionary(dctx, dict, dictSize);
+            *legacyContext = dctx;
+            return 0;
+        }
+#endif
+    }
+}
+
 
-void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
 
+MEM_STATIC size_t ZSTD_decompressLegacyStream(void* legacyContext, U32 version,
+                                              ZSTD_outBuffer* output, ZSTD_inBuffer* input)
+{
+    DEBUGLOG(5, "ZSTD_decompressLegacyStream for v0.%u", version);
+    switch(version)
+    {
+        default :
+        case 1 :
+        case 2 :
+        case 3 :
+            (void)legacyContext; (void)output; (void)input;
+            return ERROR(version_unsupported);
+#if (ZSTD_LEGACY_SUPPORT <= 4)
+        case 4 :
+            {
+                ZBUFFv04_DCtx* dctx = (ZBUFFv04_DCtx*) legacyContext;
+                const void* src = (const char*)input->src + input->pos;
+                size_t readSize = input->size - input->pos;
+                void* dst = (char*)output->dst + output->pos;
+                size_t decodedSize = output->size - output->pos;
+                size_t const hintSize = ZBUFFv04_decompressContinue(dctx, dst, &decodedSize, src, &readSize);
+                output->pos += decodedSize;
+                input->pos += readSize;
+                return hintSize;
+            }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 5)
+        case 5 :
+            {
+                ZBUFFv05_DCtx* dctx = (ZBUFFv05_DCtx*) legacyContext;
+                const void* src = (const char*)input->src + input->pos;
+                size_t readSize = input->size - input->pos;
+                void* dst = (char*)output->dst + output->pos;
+                size_t decodedSize = output->size - output->pos;
+                size_t const hintSize = ZBUFFv05_decompressContinue(dctx, dst, &decodedSize, src, &readSize);
+                output->pos += decodedSize;
+                input->pos += readSize;
+                return hintSize;
+            }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 6)
+        case 6 :
+            {
+                ZBUFFv06_DCtx* dctx = (ZBUFFv06_DCtx*) legacyContext;
+                const void* src = (const char*)input->src + input->pos;
+                size_t readSize = input->size - input->pos;
+                void* dst = (char*)output->dst + output->pos;
+                size_t decodedSize = output->size - output->pos;
+                size_t const hintSize = ZBUFFv06_decompressContinue(dctx, dst, &decodedSize, src, &readSize);
+                output->pos += decodedSize;
+                input->pos += readSize;
+                return hintSize;
+            }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 7)
+        case 7 :
+            {
+                ZBUFFv07_DCtx* dctx = (ZBUFFv07_DCtx*) legacyContext;
+                const void* src = (const char*)input->src + input->pos;
+                size_t readSize = input->size - input->pos;
+                void* dst = (char*)output->dst + output->pos;
+                size_t decodedSize = output->size - output->pos;
+                size_t const hintSize = ZBUFFv07_decompressContinue(dctx, dst, &decodedSize, src, &readSize);
+                output->pos += decodedSize;
+                input->pos += readSize;
+                return hintSize;
+            }
+#endif
+    }
+}
 
 
-#endif /* ZSTD_DDICT_H */
-/**** ended inlining zstd_ddict.h ****/
+#if defined (__cplusplus)
+}
+#endif
 
-#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
-/**** start inlining zstd_legacy.h ****/
-#error Unable to find "zstd_legacy.h"
-/**** ended inlining zstd_legacy.h ****/
+#endif   /* ZSTD_LEGACY_H */
+/**** ended inlining ../legacy/zstd_legacy.h ****/
 #endif
 
 
@@ -8426,6 +11181,10 @@ void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
     dctx->virtualStart = ddict->dictContent;
     dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
     dctx->previousDstEnd = dctx->dictEnd;
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    dctx->dictContentBeginForFuzzing = dctx->prefixStart;
+    dctx->dictContentEndForFuzzing = dctx->previousDstEnd;
+#endif
     if (ddict->entropyPresent) {
         dctx->litEntropy = 1;
         dctx->fseEntropy = 1;
@@ -8468,7 +11227,7 @@ ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict,
     /* load entropy tables */
     RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy(
             &ddict->entropy, ddict->dictContent, ddict->dictSize)),
-        dictionary_corrupted);
+        dictionary_corrupted, "");
     ddict->entropyPresent = 1;
     return 0;
 }
@@ -8484,17 +11243,17 @@ static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
         ddict->dictContent = dict;
         if (!dict) dictSize = 0;
     } else {
-        void* const internalBuffer = ZSTD_malloc(dictSize, ddict->cMem);
+        void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem);
         ddict->dictBuffer = internalBuffer;
         ddict->dictContent = internalBuffer;
         if (!internalBuffer) return ERROR(memory_allocation);
-        memcpy(internalBuffer, dict, dictSize);
+        ZSTD_memcpy(internalBuffer, dict, dictSize);
     }
     ddict->dictSize = dictSize;
     ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001);  /* cover both little and big endian */
 
     /* parse dictionary content */
-    FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) );
+    FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , "");
 
     return 0;
 }
@@ -8504,9 +11263,9 @@ ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
                                       ZSTD_dictContentType_e dictContentType,
                                       ZSTD_customMem customMem)
 {
-    if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
+    if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
 
-    {   ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem);
+    {   ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem);
         if (ddict == NULL) return NULL;
         ddict->cMem = customMem;
         {   size_t const initResult = ZSTD_initDDict_internal(ddict,
@@ -8555,7 +11314,7 @@ const ZSTD_DDict* ZSTD_initStaticDDict(
     if ((size_t)sBuffer & 7) return NULL;   /* 8-aligned */
     if (sBufferSize < neededSpace) return NULL;
     if (dictLoadMethod == ZSTD_dlm_byCopy) {
-        memcpy(ddict+1, dict, dictSize);  /* local copy */
+        ZSTD_memcpy(ddict+1, dict, dictSize);  /* local copy */
         dict = ddict+1;
     }
     if (ZSTD_isError( ZSTD_initDDict_internal(ddict,
@@ -8570,8 +11329,8 @@ size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
 {
     if (ddict==NULL) return 0;   /* support free on NULL */
     {   ZSTD_customMem const cMem = ddict->cMem;
-        ZSTD_free(ddict->dictBuffer, cMem);
-        ZSTD_free(ddict, cMem);
+        ZSTD_customFree(ddict->dictBuffer, cMem);
+        ZSTD_customFree(ddict, cMem);
         return 0;
     }
 }
@@ -8599,10 +11358,10 @@ unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
     if (ddict==NULL) return 0;
     return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize);
 }
-/**** ended inlining zstd_ddict.c ****/
-/**** start inlining zstd_decompress.c ****/
+/**** ended inlining decompress/zstd_ddict.c ****/
+/**** start inlining decompress/zstd_decompress.c ****/
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -8658,19 +11417,20 @@ unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
 /*-*******************************************************
 *  Dependencies
 *********************************************************/
-#include <string.h>      /* memcpy, memmove, memset */
-/**** skipping file: cpu.h ****/
-/**** skipping file: mem.h ****/
+/**** skipping file: ../common/zstd_deps.h ****/
+/**** skipping file: ../common/cpu.h ****/
+/**** skipping file: ../common/mem.h ****/
 #define FSE_STATIC_LINKING_ONLY
-/**** skipping file: fse.h ****/
+/**** skipping file: ../common/fse.h ****/
 #define HUF_STATIC_LINKING_ONLY
-/**** skipping file: huf.h ****/
-/**** skipping file: zstd_internal.h ****/
+/**** skipping file: ../common/huf.h ****/
+/**** skipping file: ../common/xxhash.h ****/
+/**** skipping file: ../common/zstd_internal.h ****/
 /**** skipping file: zstd_decompress_internal.h ****/
 /**** skipping file: zstd_ddict.h ****/
 /**** start inlining zstd_decompress_block.h ****/
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -8686,9 +11446,9 @@ unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
 /*-*******************************************************
  *  Dependencies
  *********************************************************/
-#include <stddef.h>   /* size_t */
-/**** skipping file: zstd.h ****/
-/**** skipping file: zstd_internal.h ****/
+/**** skipping file: ../common/zstd_deps.h ****/
+/**** skipping file: ../zstd.h ****/
+/**** skipping file: ../common/zstd_internal.h ****/
 /**** skipping file: zstd_decompress_internal.h ****/
 
 
@@ -8719,22 +11479,163 @@ size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
  * this function must be called with valid parameters only
  * (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.)
  * in which case it cannot fail.
+ * The workspace must be 4-byte aligned and at least ZSTD_BUILD_FSE_TABLE_WKSP_SIZE bytes, which is
+ * defined in zstd_decompress_internal.h.
  * Internal use only.
  */
 void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
              const short* normalizedCounter, unsigned maxSymbolValue,
              const U32* baseValue, const U32* nbAdditionalBits,
-                   unsigned tableLog);
+                   unsigned tableLog, void* wksp, size_t wkspSize,
+                   int bmi2);
 
 
 #endif /* ZSTD_DEC_BLOCK_H */
 /**** ended inlining zstd_decompress_block.h ****/
 
 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
-/**** skipping file: zstd_legacy.h ****/
+/**** skipping file: ../legacy/zstd_legacy.h ****/
 #endif
 
 
+
+/*************************************
+ * Multiple DDicts Hashset internals *
+ *************************************/
+
+#define DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT 4
+#define DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT 3   /* These two constants represent SIZE_MULT/COUNT_MULT load factor without using a float.
+                                                     * Currently, that means a 0.75 load factor.
+                                                     * So, if count * COUNT_MULT / size * SIZE_MULT != 0, then we've exceeded
+                                                     * the load factor of the ddict hash set.
+                                                     */
+
+#define DDICT_HASHSET_TABLE_BASE_SIZE 64
+#define DDICT_HASHSET_RESIZE_FACTOR 2
+
+/* Hash function to determine starting position of dict insertion within the table
+ * Returns an index between [0, hashSet->ddictPtrTableSize]
+ */
+static size_t ZSTD_DDictHashSet_getIndex(const ZSTD_DDictHashSet* hashSet, U32 dictID) {
+    const U64 hash = XXH64(&dictID, sizeof(U32), 0);
+    /* DDict ptr table size is a multiple of 2, use size - 1 as mask to get index within [0, hashSet->ddictPtrTableSize) */
+    return hash & (hashSet->ddictPtrTableSize - 1);
+}
+
+/* Adds DDict to a hashset without resizing it.
+ * If inserting a DDict with a dictID that already exists in the set, replaces the one in the set.
+ * Returns 0 if successful, or a zstd error code if something went wrong.
+ */
+static size_t ZSTD_DDictHashSet_emplaceDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict) {
+    const U32 dictID = ZSTD_getDictID_fromDDict(ddict);
+    size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID);
+    const size_t idxRangeMask = hashSet->ddictPtrTableSize - 1;
+    RETURN_ERROR_IF(hashSet->ddictPtrCount == hashSet->ddictPtrTableSize, GENERIC, "Hash set is full!");
+    DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx);
+    while (hashSet->ddictPtrTable[idx] != NULL) {
+        /* Replace existing ddict if inserting ddict with same dictID */
+        if (ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]) == dictID) {
+            DEBUGLOG(4, "DictID already exists, replacing rather than adding");
+            hashSet->ddictPtrTable[idx] = ddict;
+            return 0;
+        }
+        idx &= idxRangeMask;
+        idx++;
+    }
+    DEBUGLOG(4, "Final idx after probing for dictID %u is: %zu", dictID, idx);
+    hashSet->ddictPtrTable[idx] = ddict;
+    hashSet->ddictPtrCount++;
+    return 0;
+}
+
+/* Expands hash table by factor of DDICT_HASHSET_RESIZE_FACTOR and
+ * rehashes all values, allocates new table, frees old table.
+ * Returns 0 on success, otherwise a zstd error code.
+ */
+static size_t ZSTD_DDictHashSet_expand(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) {
+    size_t newTableSize = hashSet->ddictPtrTableSize * DDICT_HASHSET_RESIZE_FACTOR;
+    const ZSTD_DDict** newTable = (const ZSTD_DDict**)ZSTD_customCalloc(sizeof(ZSTD_DDict*) * newTableSize, customMem);
+    const ZSTD_DDict** oldTable = hashSet->ddictPtrTable;
+    size_t oldTableSize = hashSet->ddictPtrTableSize;
+    size_t i;
+
+    DEBUGLOG(4, "Expanding DDict hash table! Old size: %zu new size: %zu", oldTableSize, newTableSize);
+    RETURN_ERROR_IF(!newTable, memory_allocation, "Expanded hashset allocation failed!");
+    hashSet->ddictPtrTable = newTable;
+    hashSet->ddictPtrTableSize = newTableSize;
+    hashSet->ddictPtrCount = 0;
+    for (i = 0; i < oldTableSize; ++i) {
+        if (oldTable[i] != NULL) {
+            FORWARD_IF_ERROR(ZSTD_DDictHashSet_emplaceDDict(hashSet, oldTable[i]), "");
+        }
+    }
+    ZSTD_customFree((void*)oldTable, customMem);
+    DEBUGLOG(4, "Finished re-hash");
+    return 0;
+}
+
+/* Fetches a DDict with the given dictID
+ * Returns the ZSTD_DDict* with the requested dictID. If it doesn't exist, then returns NULL.
+ */
+static const ZSTD_DDict* ZSTD_DDictHashSet_getDDict(ZSTD_DDictHashSet* hashSet, U32 dictID) {
+    size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID);
+    const size_t idxRangeMask = hashSet->ddictPtrTableSize - 1;
+    DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx);
+    for (;;) {
+        size_t currDictID = ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]);
+        if (currDictID == dictID || currDictID == 0) {
+            /* currDictID == 0 implies a NULL ddict entry */
+            break;
+        } else {
+            idx &= idxRangeMask;    /* Goes to start of table when we reach the end */
+            idx++;
+        }
+    }
+    DEBUGLOG(4, "Final idx after probing for dictID %u is: %zu", dictID, idx);
+    return hashSet->ddictPtrTable[idx];
+}
+
+/* Allocates space for and returns a ddict hash set
+ * The hash set's ZSTD_DDict* table has all values automatically set to NULL to begin with.
+ * Returns NULL if allocation failed.
+ */
+static ZSTD_DDictHashSet* ZSTD_createDDictHashSet(ZSTD_customMem customMem) {
+    ZSTD_DDictHashSet* ret = (ZSTD_DDictHashSet*)ZSTD_customMalloc(sizeof(ZSTD_DDictHashSet), customMem);
+    DEBUGLOG(4, "Allocating new hash set");
+    ret->ddictPtrTable = (const ZSTD_DDict**)ZSTD_customCalloc(DDICT_HASHSET_TABLE_BASE_SIZE * sizeof(ZSTD_DDict*), customMem);
+    ret->ddictPtrTableSize = DDICT_HASHSET_TABLE_BASE_SIZE;
+    ret->ddictPtrCount = 0;
+    if (!ret || !ret->ddictPtrTable) {
+        return NULL;
+    }
+    return ret;
+}
+
+/* Frees the table of ZSTD_DDict* within a hashset, then frees the hashset itself.
+ * Note: The ZSTD_DDict* within the table are NOT freed.
+ */
+static void ZSTD_freeDDictHashSet(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) {
+    DEBUGLOG(4, "Freeing ddict hash set");
+    if (hashSet && hashSet->ddictPtrTable) {
+        ZSTD_customFree((void*)hashSet->ddictPtrTable, customMem);
+    }
+    if (hashSet) {
+        ZSTD_customFree(hashSet, customMem);
+    }
+}
+
+/* Public function: Adds a DDict into the ZSTD_DDictHashSet, possibly triggering a resize of the hash set.
+ * Returns 0 on success, or a ZSTD error.
+ */
+static size_t ZSTD_DDictHashSet_addDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict, ZSTD_customMem customMem) {
+    DEBUGLOG(4, "Adding dict ID: %u to hashset with - Count: %zu Tablesize: %zu", ZSTD_getDictID_fromDDict(ddict), hashSet->ddictPtrCount, hashSet->ddictPtrTableSize);
+    if (hashSet->ddictPtrCount * DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT / hashSet->ddictPtrTableSize * DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT != 0) {
+        FORWARD_IF_ERROR(ZSTD_DDictHashSet_expand(hashSet, customMem), "");
+    }
+    FORWARD_IF_ERROR(ZSTD_DDictHashSet_emplaceDDict(hashSet, ddict), "");
+    return 0;
+}
+
 /*-*************************************************************
 *   Context management
 ***************************************************************/
@@ -8757,11 +11658,19 @@ static size_t ZSTD_startingInputLength(ZSTD_format_e format)
     return startingInputLength;
 }
 
+static void ZSTD_DCtx_resetParameters(ZSTD_DCtx* dctx)
+{
+    assert(dctx->streamStage == zdss_init);
+    dctx->format = ZSTD_f_zstd1;
+    dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
+    dctx->outBufferMode = ZSTD_bm_buffered;
+    dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum;
+    dctx->refMultipleDDicts = ZSTD_rmd_refSingleDDict;
+}
+
 static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
 {
-    dctx->format = ZSTD_f_zstd1;  /* ZSTD_decompressBegin() invokes ZSTD_startingInputLength() with argument dctx->format */
     dctx->staticSize  = 0;
-    dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
     dctx->ddict       = NULL;
     dctx->ddictLocal  = NULL;
     dctx->dictEnd     = NULL;
@@ -8774,7 +11683,13 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
     dctx->legacyContext = NULL;
     dctx->previousLegacyVersion = 0;
     dctx->noForwardProgress = 0;
+    dctx->oversizedDuration = 0;
     dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
+    dctx->ddictSet = NULL;
+    ZSTD_DCtx_resetParameters(dctx);
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    dctx->dictContentEndForFuzzing = NULL;
+#endif
 }
 
 ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize)
@@ -8792,9 +11707,9 @@ ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize)
 
 ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem)
 {
-    if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
+    if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
 
-    {   ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_malloc(sizeof(*dctx), customMem);
+    {   ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_customMalloc(sizeof(*dctx), customMem);
         if (!dctx) return NULL;
         dctx->customMem = customMem;
         ZSTD_initDCtx_internal(dctx);
@@ -8822,13 +11737,17 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
     RETURN_ERROR_IF(dctx->staticSize, memory_allocation, "not compatible with static DCtx");
     {   ZSTD_customMem const cMem = dctx->customMem;
         ZSTD_clearDict(dctx);
-        ZSTD_free(dctx->inBuff, cMem);
+        ZSTD_customFree(dctx->inBuff, cMem);
         dctx->inBuff = NULL;
 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
         if (dctx->legacyContext)
             ZSTD_freeLegacyStreamContext(dctx->legacyContext, dctx->previousLegacyVersion);
 #endif
-        ZSTD_free(dctx, cMem);
+        if (dctx->ddictSet) {
+            ZSTD_freeDDictHashSet(dctx->ddictSet, cMem);
+            dctx->ddictSet = NULL;
+        }
+        ZSTD_customFree(dctx, cMem);
         return 0;
     }
 }
@@ -8837,7 +11756,30 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
 void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
 {
     size_t const toCopy = (size_t)((char*)(&dstDCtx->inBuff) - (char*)dstDCtx);
-    memcpy(dstDCtx, srcDCtx, toCopy);  /* no need to copy workspace */
+    ZSTD_memcpy(dstDCtx, srcDCtx, toCopy);  /* no need to copy workspace */
+}
+
+/* Given a dctx with a digested frame params, re-selects the correct ZSTD_DDict based on
+ * the requested dict ID from the frame. If there exists a reference to the correct ZSTD_DDict, then
+ * accordingly sets the ddict to be used to decompress the frame.
+ *
+ * If no DDict is found, then no action is taken, and the ZSTD_DCtx::ddict remains as-is.
+ *
+ * ZSTD_d_refMultipleDDicts must be enabled for this function to be called.
+ */
+static void ZSTD_DCtx_selectFrameDDict(ZSTD_DCtx* dctx) {
+    assert(dctx->refMultipleDDicts && dctx->ddictSet);
+    DEBUGLOG(4, "Adjusting DDict based on requested dict ID from frame");
+    if (dctx->ddict) {
+        const ZSTD_DDict* frameDDict = ZSTD_DDictHashSet_getDDict(dctx->ddictSet, dctx->fParams.dictID);
+        if (frameDDict) {
+            DEBUGLOG(4, "DDict found!");
+            ZSTD_clearDict(dctx);
+            dctx->dictID = dctx->fParams.dictID;
+            dctx->ddict = frameDDict;
+            dctx->dictUses = ZSTD_use_indefinitely;
+        }
+    }
 }
 
 
@@ -8871,7 +11813,7 @@ unsigned ZSTD_isFrame(const void* buffer, size_t size)
 static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format)
 {
     size_t const minInputSize = ZSTD_startingInputLength(format);
-    RETURN_ERROR_IF(srcSize < minInputSize, srcSize_wrong);
+    RETURN_ERROR_IF(srcSize < minInputSize, srcSize_wrong, "");
 
     {   BYTE const fhd = ((const BYTE*)src)[minInputSize-1];
         U32 const dictID= fhd & 3;
@@ -8904,7 +11846,7 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s
     const BYTE* ip = (const BYTE*)src;
     size_t const minInputSize = ZSTD_startingInputLength(format);
 
-    memset(zfhPtr, 0, sizeof(*zfhPtr));   /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */
+    ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr));   /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */
     if (srcSize < minInputSize) return minInputSize;
     RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter");
 
@@ -8914,12 +11856,12 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s
             /* skippable frame */
             if (srcSize < ZSTD_SKIPPABLEHEADERSIZE)
                 return ZSTD_SKIPPABLEHEADERSIZE; /* magic number + frame length */
-            memset(zfhPtr, 0, sizeof(*zfhPtr));
+            ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr));
             zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_FRAMEIDSIZE);
             zfhPtr->frameType = ZSTD_skippableFrame;
             return 0;
         }
-        RETURN_ERROR(prefix_unknown);
+        RETURN_ERROR(prefix_unknown, "");
     }
 
     /* ensure there is enough `srcSize` to fully read/decode frame header */
@@ -8943,7 +11885,7 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s
         if (!singleSegment) {
             BYTE const wlByte = ip[pos++];
             U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN;
-            RETURN_ERROR_IF(windowLog > ZSTD_WINDOWLOG_MAX, frameParameter_windowTooLarge);
+            RETURN_ERROR_IF(windowLog > ZSTD_WINDOWLOG_MAX, frameParameter_windowTooLarge, "");
             windowSize = (1ULL << windowLog);
             windowSize += (windowSize >> 3) * (wlByte&7);
         }
@@ -9015,14 +11957,14 @@ static size_t readSkippableFrameSize(void const* src, size_t srcSize)
     size_t const skippableHeaderSize = ZSTD_SKIPPABLEHEADERSIZE;
     U32 sizeU32;
 
-    RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong);
+    RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong, "");
 
     sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE);
     RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32,
-                    frameParameter_unsupported);
+                    frameParameter_unsupported, "");
     {
         size_t const skippableSize = skippableHeaderSize + sizeU32;
-        RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong);
+        RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong, "");
         return skippableSize;
     }
 }
@@ -9091,20 +12033,29 @@ unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize)
 
 /** ZSTD_decodeFrameHeader() :
  * `headerSize` must be the size provided by ZSTD_frameHeaderSize().
+ * If multiple DDict references are enabled, also will choose the correct DDict to use.
  * @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */
 static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize)
 {
     size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format);
     if (ZSTD_isError(result)) return result;    /* invalid header */
     RETURN_ERROR_IF(result>0, srcSize_wrong, "headerSize too small");
+
+    /* Reference DDict requested by frame if dctx references multiple ddicts */
+    if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts && dctx->ddictSet) {
+        ZSTD_DCtx_selectFrameDDict(dctx);
+    }
+
 #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
     /* Skip the dictID check in fuzzing mode, because it makes the search
      * harder.
      */
     RETURN_ERROR_IF(dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID),
-                    dictionary_wrong);
+                    dictionary_wrong, "");
 #endif
-    if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0);
+    dctx->validateChecksum = (dctx->fParams.checksumFlag && !dctx->forceIgnoreChecksum) ? 1 : 0;
+    if (dctx->validateChecksum) XXH64_reset(&dctx->xxhState, 0);
+    dctx->processedCSize += headerSize;
     return 0;
 }
 
@@ -9119,7 +12070,7 @@ static ZSTD_frameSizeInfo ZSTD_errorFrameSizeInfo(size_t ret)
 static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize)
 {
     ZSTD_frameSizeInfo frameSizeInfo;
-    memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo));
+    ZSTD_memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo));
 
 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
     if (ZSTD_isLegacy(src, srcSize))
@@ -9174,7 +12125,7 @@ static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize
             ip += 4;
         }
 
-        frameSizeInfo.compressedSize = ip - ipstart;
+        frameSizeInfo.compressedSize = (size_t)(ip - ipstart);
         frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN)
                                         ? zfh.frameContentSize
                                         : nbBlocks * zfh.blockSizeMax;
@@ -9227,7 +12178,7 @@ unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize)
 size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize)
 {
     DEBUGLOG(5, "ZSTD_insertBlock: %u bytes", (unsigned)blockSize);
-    ZSTD_checkContinuity(dctx, blockStart);
+    ZSTD_checkContinuity(dctx, blockStart, blockSize);
     dctx->previousDstEnd = (const char*)blockStart + blockSize;
     return blockSize;
 }
@@ -9237,12 +12188,12 @@ static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity,
                           const void* src, size_t srcSize)
 {
     DEBUGLOG(5, "ZSTD_copyRawBlock");
+    RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall, "");
     if (dst == NULL) {
         if (srcSize == 0) return 0;
-        RETURN_ERROR(dstBuffer_null);
+        RETURN_ERROR(dstBuffer_null, "");
     }
-    RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall);
-    memcpy(dst, src, srcSize);
+    ZSTD_memcpy(dst, src, srcSize);
     return srcSize;
 }
 
@@ -9250,15 +12201,41 @@ static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity,
                                BYTE b,
                                size_t regenSize)
 {
+    RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall, "");
     if (dst == NULL) {
         if (regenSize == 0) return 0;
-        RETURN_ERROR(dstBuffer_null);
+        RETURN_ERROR(dstBuffer_null, "");
     }
-    RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall);
-    memset(dst, b, regenSize);
+    ZSTD_memset(dst, b, regenSize);
     return regenSize;
 }
 
+static void ZSTD_DCtx_trace_end(ZSTD_DCtx const* dctx, U64 uncompressedSize, U64 compressedSize, unsigned streaming)
+{
+#if ZSTD_TRACE
+    if (dctx->traceCtx && ZSTD_trace_decompress_end != NULL) {
+        ZSTD_Trace trace;
+        ZSTD_memset(&trace, 0, sizeof(trace));
+        trace.version = ZSTD_VERSION_NUMBER;
+        trace.streaming = streaming;
+        if (dctx->ddict) {
+            trace.dictionaryID = ZSTD_getDictID_fromDDict(dctx->ddict);
+            trace.dictionarySize = ZSTD_DDict_dictSize(dctx->ddict);
+            trace.dictionaryIsCold = dctx->ddictIsCold;
+        }
+        trace.uncompressedSize = (size_t)uncompressedSize;
+        trace.compressedSize = (size_t)compressedSize;
+        trace.dctx = dctx;
+        ZSTD_trace_decompress_end(dctx->traceCtx, &trace);
+    }
+#else
+    (void)dctx;
+    (void)uncompressedSize;
+    (void)compressedSize;
+    (void)streaming;
+#endif
+}
+
 
 /*! ZSTD_decompressFrame() :
  * @dctx must be properly initialized
@@ -9268,8 +12245,9 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
                                    void* dst, size_t dstCapacity,
                              const void** srcPtr, size_t *srcSizePtr)
 {
-    const BYTE* ip = (const BYTE*)(*srcPtr);
-    BYTE* const ostart = (BYTE* const)dst;
+    const BYTE* const istart = (const BYTE*)(*srcPtr);
+    const BYTE* ip = istart;
+    BYTE* const ostart = (BYTE*)dst;
     BYTE* const oend = dstCapacity != 0 ? ostart + dstCapacity : ostart;
     BYTE* op = ostart;
     size_t remainingSrcSize = *srcSizePtr;
@@ -9279,15 +12257,15 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
     /* check */
     RETURN_ERROR_IF(
         remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN(dctx->format)+ZSTD_blockHeaderSize,
-        srcSize_wrong);
+        srcSize_wrong, "");
 
     /* Frame Header */
     {   size_t const frameHeaderSize = ZSTD_frameHeaderSize_internal(
                 ip, ZSTD_FRAMEHEADERSIZE_PREFIX(dctx->format), dctx->format);
         if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
         RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize,
-                        srcSize_wrong);
-        FORWARD_IF_ERROR( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) );
+                        srcSize_wrong, "");
+        FORWARD_IF_ERROR( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) , "");
         ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize;
     }
 
@@ -9300,26 +12278,26 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
 
         ip += ZSTD_blockHeaderSize;
         remainingSrcSize -= ZSTD_blockHeaderSize;
-        RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong);
+        RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong, "");
 
         switch(blockProperties.blockType)
         {
         case bt_compressed:
-            decodedSize = ZSTD_decompressBlock_internal(dctx, op, oend-op, ip, cBlockSize, /* frame */ 1);
+            decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oend-op), ip, cBlockSize, /* frame */ 1);
             break;
         case bt_raw :
-            decodedSize = ZSTD_copyRawBlock(op, oend-op, ip, cBlockSize);
+            decodedSize = ZSTD_copyRawBlock(op, (size_t)(oend-op), ip, cBlockSize);
             break;
         case bt_rle :
-            decodedSize = ZSTD_setRleBlock(op, oend-op, *ip, blockProperties.origSize);
+            decodedSize = ZSTD_setRleBlock(op, (size_t)(oend-op), *ip, blockProperties.origSize);
             break;
         case bt_reserved :
         default:
-            RETURN_ERROR(corruption_detected);
+            RETURN_ERROR(corruption_detected, "invalid block type");
         }
 
         if (ZSTD_isError(decodedSize)) return decodedSize;
-        if (dctx->fParams.checksumFlag)
+        if (dctx->validateChecksum)
             XXH64_update(&dctx->xxhState, op, decodedSize);
         if (decodedSize != 0)
             op += decodedSize;
@@ -9331,22 +12309,24 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
 
     if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) {
         RETURN_ERROR_IF((U64)(op-ostart) != dctx->fParams.frameContentSize,
-                        corruption_detected);
+                        corruption_detected, "");
     }
     if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */
-        U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState);
-        U32 checkRead;
-        RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong);
-        checkRead = MEM_readLE32(ip);
-        RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong);
+        RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong, "");
+        if (!dctx->forceIgnoreChecksum) {
+            U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState);
+            U32 checkRead;
+            checkRead = MEM_readLE32(ip);
+            RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong, "");
+        }
         ip += 4;
         remainingSrcSize -= 4;
     }
-
+    ZSTD_DCtx_trace_end(dctx, (U64)(op-ostart), (U64)(ip-istart), /* streaming */ 0);
     /* Allow caller to get size read */
     *srcPtr = ip;
     *srcSizePtr = remainingSrcSize;
-    return op-ostart;
+    return (size_t)(op-ostart);
 }
 
 static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
@@ -9379,7 +12359,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
             decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize);
             if (ZSTD_isError(decodedSize)) return decodedSize;
 
-            assert(decodedSize <=- dstCapacity);
+            assert(decodedSize <= dstCapacity);
             dst = (BYTE*)dst + decodedSize;
             dstCapacity -= decodedSize;
 
@@ -9395,7 +12375,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
                         (unsigned)magicNumber, ZSTD_MAGICNUMBER);
             if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
                 size_t const skippableSize = readSkippableFrameSize(src, srcSize);
-                FORWARD_IF_ERROR(skippableSize);
+                FORWARD_IF_ERROR(skippableSize, "readSkippableFrameSize failed");
                 assert(skippableSize <= srcSize);
 
                 src = (const BYTE *)src + skippableSize;
@@ -9405,13 +12385,13 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
 
         if (ddict) {
             /* we were called from ZSTD_decompress_usingDDict */
-            FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(dctx, ddict));
+            FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(dctx, ddict), "");
         } else {
             /* this will initialize correctly with no dict if dict == NULL, so
              * use this in all cases but ddict */
-            FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize));
+            FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize), "");
         }
-        ZSTD_checkContinuity(dctx, dst);
+        ZSTD_checkContinuity(dctx, dst, dstCapacity);
 
         {   const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity,
                                                     &src, &srcSize);
@@ -9419,15 +12399,13 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
                 (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown)
              && (moreThan1Frame==1),
                 srcSize_wrong,
-                "at least one frame successfully completed, but following "
-                "bytes are garbage: it's more likely to be a srcSize error, "
-                "specifying more bytes than compressed size of frame(s). This "
-                "error message replaces ERROR(prefix_unknown), which would be "
-                "confusing, as the first header is actually correct. Note that "
-                "one could be unlucky, it might be a corruption error instead, "
-                "happening right at the place where we expect zstd magic "
-                "bytes. But this is _much_ less likely than a srcSize field "
-                "error.");
+                "At least one frame successfully completed, "
+                "but following bytes are garbage: "
+                "it's more likely to be a srcSize error, "
+                "specifying more input bytes than size of frame(s). "
+                "Note: one could be unlucky, it might be a corruption error instead, "
+                "happening right at the place where we expect zstd magic bytes. "
+                "But this is _much_ less likely than a srcSize field error.");
             if (ZSTD_isError(res)) return res;
             assert(res <= dstCapacity);
             if (res != 0)
@@ -9439,7 +12417,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
 
     RETURN_ERROR_IF(srcSize, srcSize_wrong, "input not entirely consumed");
 
-    return (BYTE*)dst - (BYTE*)dststart;
+    return (size_t)((BYTE*)dst - (BYTE*)dststart);
 }
 
 size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
@@ -9479,7 +12457,7 @@ size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t sr
 #if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1)
     size_t regenSize;
     ZSTD_DCtx* const dctx = ZSTD_createDCtx();
-    RETURN_ERROR_IF(dctx==NULL, memory_allocation);
+    RETURN_ERROR_IF(dctx==NULL, memory_allocation, "NULL pointer!");
     regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize);
     ZSTD_freeDCtx(dctx);
     return regenSize;
@@ -9548,7 +12526,9 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
     DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (unsigned)srcSize);
     /* Sanity check */
     RETURN_ERROR_IF(srcSize != ZSTD_nextSrcSizeToDecompressWithInputSize(dctx, srcSize), srcSize_wrong, "not allowed");
-    if (dstCapacity) ZSTD_checkContinuity(dctx, dst);
+    ZSTD_checkContinuity(dctx, dst, dstCapacity);
+
+    dctx->processedCSize += srcSize;
 
     switch (dctx->stage)
     {
@@ -9557,22 +12537,22 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
         if (dctx->format == ZSTD_f_zstd1) {  /* allows header */
             assert(srcSize >= ZSTD_FRAMEIDSIZE);  /* to read skippable magic number */
             if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {        /* skippable frame */
-                memcpy(dctx->headerBuffer, src, srcSize);
+                ZSTD_memcpy(dctx->headerBuffer, src, srcSize);
                 dctx->expected = ZSTD_SKIPPABLEHEADERSIZE - srcSize;  /* remaining to load to get full skippable frame header */
                 dctx->stage = ZSTDds_decodeSkippableHeader;
                 return 0;
         }   }
         dctx->headerSize = ZSTD_frameHeaderSize_internal(src, srcSize, dctx->format);
         if (ZSTD_isError(dctx->headerSize)) return dctx->headerSize;
-        memcpy(dctx->headerBuffer, src, srcSize);
+        ZSTD_memcpy(dctx->headerBuffer, src, srcSize);
         dctx->expected = dctx->headerSize - srcSize;
         dctx->stage = ZSTDds_decodeFrameHeader;
         return 0;
 
     case ZSTDds_decodeFrameHeader:
         assert(src != NULL);
-        memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize);
-        FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize));
+        ZSTD_memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize);
+        FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize), "");
         dctx->expected = ZSTD_blockHeaderSize;
         dctx->stage = ZSTDds_decodeBlockHeader;
         return 0;
@@ -9619,7 +12599,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
             case bt_raw :
                 assert(srcSize <= dctx->expected);
                 rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize);
-                FORWARD_IF_ERROR(rSize);
+                FORWARD_IF_ERROR(rSize, "ZSTD_copyRawBlock failed");
                 assert(rSize == srcSize);
                 dctx->expected -= rSize;
                 break;
@@ -9629,13 +12609,13 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
                 break;
             case bt_reserved :   /* should never happen */
             default:
-                RETURN_ERROR(corruption_detected);
+                RETURN_ERROR(corruption_detected, "invalid block type");
             }
-            FORWARD_IF_ERROR(rSize);
+            FORWARD_IF_ERROR(rSize, "");
             RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected, "Decompressed Block Size Exceeds Maximum");
             DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize);
             dctx->decodedSize += rSize;
-            if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize);
+            if (dctx->validateChecksum) XXH64_update(&dctx->xxhState, dst, rSize);
             dctx->previousDstEnd = (char*)dst + rSize;
 
             /* Stay on the same stage until we are finished streaming the block. */
@@ -9648,11 +12628,12 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
                 RETURN_ERROR_IF(
                     dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
                  && dctx->decodedSize != dctx->fParams.frameContentSize,
-                    corruption_detected);
+                    corruption_detected, "");
                 if (dctx->fParams.checksumFlag) {  /* another round for frame checksum */
                     dctx->expected = 4;
                     dctx->stage = ZSTDds_checkChecksum;
                 } else {
+                    ZSTD_DCtx_trace_end(dctx, dctx->decodedSize, dctx->processedCSize, /* streaming */ 1);
                     dctx->expected = 0;   /* ends here */
                     dctx->stage = ZSTDds_getFrameHeaderSize;
                 }
@@ -9665,10 +12646,14 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
 
     case ZSTDds_checkChecksum:
         assert(srcSize == 4);  /* guaranteed by dctx->expected */
-        {   U32 const h32 = (U32)XXH64_digest(&dctx->xxhState);
-            U32 const check32 = MEM_readLE32(src);
-            DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32);
-            RETURN_ERROR_IF(check32 != h32, checksum_wrong);
+        {
+            if (dctx->validateChecksum) {
+                U32 const h32 = (U32)XXH64_digest(&dctx->xxhState);
+                U32 const check32 = MEM_readLE32(src);
+                DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32);
+                RETURN_ERROR_IF(check32 != h32, checksum_wrong, "");
+            }
+            ZSTD_DCtx_trace_end(dctx, dctx->decodedSize, dctx->processedCSize, /* streaming */ 1);
             dctx->expected = 0;
             dctx->stage = ZSTDds_getFrameHeaderSize;
             return 0;
@@ -9677,7 +12662,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
     case ZSTDds_decodeSkippableHeader:
         assert(src != NULL);
         assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE);
-        memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize);   /* complete skippable header */
+        ZSTD_memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize);   /* complete skippable header */
         dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE);   /* note : dctx->expected can grow seriously large, beyond local buffer size */
         dctx->stage = ZSTDds_skipFrame;
         return 0;
@@ -9689,7 +12674,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
 
     default:
         assert(0);   /* impossible */
-        RETURN_ERROR(GENERIC);   /* some compiler require default to do something */
+        RETURN_ERROR(GENERIC, "impossible to reach");   /* some compiler require default to do something */
     }
 }
 
@@ -9700,6 +12685,10 @@ static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dict
     dctx->virtualStart = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
     dctx->prefixStart = dict;
     dctx->previousDstEnd = (const char*)dict + dictSize;
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    dctx->dictContentBeginForFuzzing = dctx->prefixStart;
+    dctx->dictContentEndForFuzzing = dctx->previousDstEnd;
+#endif
     return 0;
 }
 
@@ -9713,7 +12702,7 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
     const BYTE* dictPtr = (const BYTE*)dict;
     const BYTE* const dictEnd = dictPtr + dictSize;
 
-    RETURN_ERROR_IF(dictSize <= 8, dictionary_corrupted);
+    RETURN_ERROR_IF(dictSize <= 8, dictionary_corrupted, "dict is too small");
     assert(MEM_readLE32(dict) == ZSTD_MAGIC_DICTIONARY);   /* dict must be valid */
     dictPtr += 8;   /* skip header = magic + dictID */
 
@@ -9729,63 +12718,69 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
                                                 workspace, workspaceSize);
 #else
         size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable,
-                                                dictPtr, dictEnd - dictPtr,
+                                                dictPtr, (size_t)(dictEnd - dictPtr),
                                                 workspace, workspaceSize);
 #endif
-        RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted);
+        RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted, "");
         dictPtr += hSize;
     }
 
     {   short offcodeNCount[MaxOff+1];
         unsigned offcodeMaxValue = MaxOff, offcodeLog;
-        size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
-        RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted);
-        RETURN_ERROR_IF(offcodeMaxValue > MaxOff, dictionary_corrupted);
-        RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted);
+        size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, (size_t)(dictEnd-dictPtr));
+        RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, "");
+        RETURN_ERROR_IF(offcodeMaxValue > MaxOff, dictionary_corrupted, "");
+        RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, "");
         ZSTD_buildFSETable( entropy->OFTable,
                             offcodeNCount, offcodeMaxValue,
                             OF_base, OF_bits,
-                            offcodeLog);
+                            offcodeLog,
+                            entropy->workspace, sizeof(entropy->workspace),
+                            /* bmi2 */0);
         dictPtr += offcodeHeaderSize;
     }
 
     {   short matchlengthNCount[MaxML+1];
         unsigned matchlengthMaxValue = MaxML, matchlengthLog;
-        size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
-        RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted);
-        RETURN_ERROR_IF(matchlengthMaxValue > MaxML, dictionary_corrupted);
-        RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted);
+        size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, (size_t)(dictEnd-dictPtr));
+        RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, "");
+        RETURN_ERROR_IF(matchlengthMaxValue > MaxML, dictionary_corrupted, "");
+        RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, "");
         ZSTD_buildFSETable( entropy->MLTable,
                             matchlengthNCount, matchlengthMaxValue,
                             ML_base, ML_bits,
-                            matchlengthLog);
+                            matchlengthLog,
+                            entropy->workspace, sizeof(entropy->workspace),
+                            /* bmi2 */ 0);
         dictPtr += matchlengthHeaderSize;
     }
 
     {   short litlengthNCount[MaxLL+1];
         unsigned litlengthMaxValue = MaxLL, litlengthLog;
-        size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
-        RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted);
-        RETURN_ERROR_IF(litlengthMaxValue > MaxLL, dictionary_corrupted);
-        RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted);
+        size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, (size_t)(dictEnd-dictPtr));
+        RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, "");
+        RETURN_ERROR_IF(litlengthMaxValue > MaxLL, dictionary_corrupted, "");
+        RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, "");
         ZSTD_buildFSETable( entropy->LLTable,
                             litlengthNCount, litlengthMaxValue,
                             LL_base, LL_bits,
-                            litlengthLog);
+                            litlengthLog,
+                            entropy->workspace, sizeof(entropy->workspace),
+                            /* bmi2 */ 0);
         dictPtr += litlengthHeaderSize;
     }
 
-    RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted);
+    RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, "");
     {   int i;
         size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12));
         for (i=0; i<3; i++) {
             U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4;
             RETURN_ERROR_IF(rep==0 || rep > dictContentSize,
-                            dictionary_corrupted);
+                            dictionary_corrupted, "");
             entropy->rep[i] = rep;
     }   }
 
-    return dictPtr - (const BYTE*)dict;
+    return (size_t)(dictPtr - (const BYTE*)dict);
 }
 
 static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
@@ -9799,7 +12794,7 @@ static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict
 
     /* load entropy tables */
     {   size_t const eSize = ZSTD_loadDEntropy(&dctx->entropy, dict, dictSize);
-        RETURN_ERROR_IF(ZSTD_isError(eSize), dictionary_corrupted);
+        RETURN_ERROR_IF(ZSTD_isError(eSize), dictionary_corrupted, "");
         dict = (const char*)dict + eSize;
         dictSize -= eSize;
     }
@@ -9812,8 +12807,12 @@ static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict
 size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
 {
     assert(dctx != NULL);
+#if ZSTD_TRACE
+    dctx->traceCtx = (ZSTD_trace_decompress_begin != NULL) ? ZSTD_trace_decompress_begin(dctx) : 0;
+#endif
     dctx->expected = ZSTD_startingInputLength(dctx->format);  /* dctx->format must be properly set */
     dctx->stage = ZSTDds_getFrameHeaderSize;
+    dctx->processedCSize = 0;
     dctx->decodedSize = 0;
     dctx->previousDstEnd = NULL;
     dctx->prefixStart = NULL;
@@ -9824,7 +12823,7 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
     dctx->dictID = 0;
     dctx->bType = bt_reserved;
     ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue));
-    memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue));  /* initial repcodes */
+    ZSTD_memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue));  /* initial repcodes */
     dctx->LLTptr = dctx->entropy.LLTable;
     dctx->MLTptr = dctx->entropy.MLTable;
     dctx->OFTptr = dctx->entropy.OFTable;
@@ -9834,11 +12833,11 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
 
 size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
 {
-    FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) );
+    FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , "");
     if (dict && dictSize)
         RETURN_ERROR_IF(
             ZSTD_isError(ZSTD_decompress_insertDictionary(dctx, dict, dictSize)),
-            dictionary_corrupted);
+            dictionary_corrupted, "");
     return 0;
 }
 
@@ -9857,7 +12856,7 @@ size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
         DEBUGLOG(4, "DDict is %s",
                     dctx->ddictIsCold ? "~cold~" : "hot!");
     }
-    FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) );
+    FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , "");
     if (ddict) {   /* NULL ddict is equivalent to no dictionary */
         ZSTD_copyDDictParameters(dctx, ddict);
     }
@@ -9948,11 +12947,11 @@ size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx,
                                          ZSTD_dictLoadMethod_e dictLoadMethod,
                                          ZSTD_dictContentType_e dictContentType)
 {
-    RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong);
+    RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
     ZSTD_clearDict(dctx);
     if (dict && dictSize != 0) {
         dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem);
-        RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation);
+        RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation, "NULL pointer!");
         dctx->ddict = dctx->ddictLocal;
         dctx->dictUses = ZSTD_use_indefinitely;
     }
@@ -9971,7 +12970,7 @@ size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSi
 
 size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType)
 {
-    FORWARD_IF_ERROR(ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType));
+    FORWARD_IF_ERROR(ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType), "");
     dctx->dictUses = ZSTD_use_once;
     return 0;
 }
@@ -9988,8 +12987,8 @@ size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSiz
 size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize)
 {
     DEBUGLOG(4, "ZSTD_initDStream_usingDict");
-    FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) );
-    FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) );
+    FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) , "");
     return ZSTD_startingInputLength(zds->format);
 }
 
@@ -10005,8 +13004,8 @@ size_t ZSTD_initDStream(ZSTD_DStream* zds)
  * this function cannot fail */
 size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict)
 {
-    FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) );
-    FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) );
+    FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) , "");
     return ZSTD_startingInputLength(dctx->format);
 }
 
@@ -10015,18 +13014,28 @@ size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict)
  * this function cannot fail */
 size_t ZSTD_resetDStream(ZSTD_DStream* dctx)
 {
-    FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only));
+    FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only), "");
     return ZSTD_startingInputLength(dctx->format);
 }
 
 
 size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
 {
-    RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong);
+    RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
     ZSTD_clearDict(dctx);
     if (ddict) {
         dctx->ddict = ddict;
         dctx->dictUses = ZSTD_use_indefinitely;
+        if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts) {
+            if (dctx->ddictSet == NULL) {
+                dctx->ddictSet = ZSTD_createDDictHashSet(dctx->customMem);
+                if (!dctx->ddictSet) {
+                    RETURN_ERROR(memory_allocation, "Failed to allocate memory for hash set!");
+                }
+            }
+            assert(!dctx->staticSize);  /* Impossible: ddictSet cannot have been allocated if static dctx */
+            FORWARD_IF_ERROR(ZSTD_DDictHashSet_addDDict(dctx->ddictSet, ddict, dctx->customMem), "");
+        }
     }
     return 0;
 }
@@ -10039,16 +13048,16 @@ size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize)
     ZSTD_bounds const bounds = ZSTD_dParam_getBounds(ZSTD_d_windowLogMax);
     size_t const min = (size_t)1 << bounds.lowerBound;
     size_t const max = (size_t)1 << bounds.upperBound;
-    RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong);
-    RETURN_ERROR_IF(maxWindowSize < min, parameter_outOfBound);
-    RETURN_ERROR_IF(maxWindowSize > max, parameter_outOfBound);
+    RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
+    RETURN_ERROR_IF(maxWindowSize < min, parameter_outOfBound, "");
+    RETURN_ERROR_IF(maxWindowSize > max, parameter_outOfBound, "");
     dctx->maxWindowSize = maxWindowSize;
     return 0;
 }
 
 size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format)
 {
-    return ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, format);
+    return ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, (int)format);
 }
 
 ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam)
@@ -10064,6 +13073,18 @@ ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam)
             bounds.upperBound = (int)ZSTD_f_zstd1_magicless;
             ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless);
             return bounds;
+        case ZSTD_d_stableOutBuffer:
+            bounds.lowerBound = (int)ZSTD_bm_buffered;
+            bounds.upperBound = (int)ZSTD_bm_stable;
+            return bounds;
+        case ZSTD_d_forceIgnoreChecksum:
+            bounds.lowerBound = (int)ZSTD_d_validateChecksum;
+            bounds.upperBound = (int)ZSTD_d_ignoreChecksum;
+            return bounds;
+        case ZSTD_d_refMultipleDDicts:
+            bounds.lowerBound = (int)ZSTD_rmd_refSingleDDict;
+            bounds.upperBound = (int)ZSTD_rmd_refMultipleDDicts;
+            return bounds;
         default:;
     }
     bounds.error = ERROR(parameter_unsupported);
@@ -10083,12 +13104,35 @@ static int ZSTD_dParam_withinBounds(ZSTD_dParameter dParam, int value)
 }
 
 #define CHECK_DBOUNDS(p,v) {                \
-    RETURN_ERROR_IF(!ZSTD_dParam_withinBounds(p, v), parameter_outOfBound); \
+    RETURN_ERROR_IF(!ZSTD_dParam_withinBounds(p, v), parameter_outOfBound, ""); \
+}
+
+size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value)
+{
+    switch (param) {
+        case ZSTD_d_windowLogMax:
+            *value = (int)ZSTD_highbit32((U32)dctx->maxWindowSize);
+            return 0;
+        case ZSTD_d_format:
+            *value = (int)dctx->format;
+            return 0;
+        case ZSTD_d_stableOutBuffer:
+            *value = (int)dctx->outBufferMode;
+            return 0;
+        case ZSTD_d_forceIgnoreChecksum:
+            *value = (int)dctx->forceIgnoreChecksum;
+            return 0;
+        case ZSTD_d_refMultipleDDicts:
+            *value = (int)dctx->refMultipleDDicts;
+            return 0;
+        default:;
+    }
+    RETURN_ERROR(parameter_unsupported, "");
 }
 
 size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value)
 {
-    RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong);
+    RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
     switch(dParam) {
         case ZSTD_d_windowLogMax:
             if (value == 0) value = ZSTD_WINDOWLOG_LIMIT_DEFAULT;
@@ -10099,9 +13143,24 @@ size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value
             CHECK_DBOUNDS(ZSTD_d_format, value);
             dctx->format = (ZSTD_format_e)value;
             return 0;
+        case ZSTD_d_stableOutBuffer:
+            CHECK_DBOUNDS(ZSTD_d_stableOutBuffer, value);
+            dctx->outBufferMode = (ZSTD_bufferMode_e)value;
+            return 0;
+        case ZSTD_d_forceIgnoreChecksum:
+            CHECK_DBOUNDS(ZSTD_d_forceIgnoreChecksum, value);
+            dctx->forceIgnoreChecksum = (ZSTD_forceIgnoreChecksum_e)value;
+            return 0;
+        case ZSTD_d_refMultipleDDicts:
+            CHECK_DBOUNDS(ZSTD_d_refMultipleDDicts, value);
+            if (dctx->staticSize != 0) {
+                RETURN_ERROR(parameter_unsupported, "Static dctx does not support multiple DDicts!");
+            }
+            dctx->refMultipleDDicts = (ZSTD_refMultipleDDicts_e)value;
+            return 0;
         default:;
     }
-    RETURN_ERROR(parameter_unsupported);
+    RETURN_ERROR(parameter_unsupported, "");
 }
 
 size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset)
@@ -10113,10 +13172,9 @@ size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset)
     }
     if ( (reset == ZSTD_reset_parameters)
       || (reset == ZSTD_reset_session_and_parameters) ) {
-        RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong);
+        RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
         ZSTD_clearDict(dctx);
-        dctx->format = ZSTD_f_zstd1;
-        dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
+        ZSTD_DCtx_resetParameters(dctx);
     }
     return 0;
 }
@@ -10134,7 +13192,7 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long
     unsigned long long const neededSize = MIN(frameContentSize, neededRBSize);
     size_t const minRBSize = (size_t) neededSize;
     RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize,
-                    frameParameter_windowTooLarge);
+                    frameParameter_windowTooLarge, "");
     return minRBSize;
 }
 
@@ -10152,24 +13210,84 @@ size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize)
     ZSTD_frameHeader zfh;
     size_t const err = ZSTD_getFrameHeader(&zfh, src, srcSize);
     if (ZSTD_isError(err)) return err;
-    RETURN_ERROR_IF(err>0, srcSize_wrong);
+    RETURN_ERROR_IF(err>0, srcSize_wrong, "");
     RETURN_ERROR_IF(zfh.windowSize > windowSizeMax,
-                    frameParameter_windowTooLarge);
+                    frameParameter_windowTooLarge, "");
     return ZSTD_estimateDStreamSize((size_t)zfh.windowSize);
 }
 
 
 /* *****   Decompression   ***** */
 
-MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+static int ZSTD_DCtx_isOverflow(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize)
 {
-    size_t const length = MIN(dstCapacity, srcSize);
-    if (length > 0) {
-        memcpy(dst, src, length);
-    }
-    return length;
+    return (zds->inBuffSize + zds->outBuffSize) >= (neededInBuffSize + neededOutBuffSize) * ZSTD_WORKSPACETOOLARGE_FACTOR;
+}
+
+static void ZSTD_DCtx_updateOversizedDuration(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize)
+{
+    if (ZSTD_DCtx_isOverflow(zds, neededInBuffSize, neededOutBuffSize))
+        zds->oversizedDuration++;
+    else
+        zds->oversizedDuration = 0;
+}
+
+static int ZSTD_DCtx_isOversizedTooLong(ZSTD_DStream* zds)
+{
+    return zds->oversizedDuration >= ZSTD_WORKSPACETOOLARGE_MAXDURATION;
+}
+
+/* Checks that the output buffer hasn't changed if ZSTD_obm_stable is used. */
+static size_t ZSTD_checkOutBuffer(ZSTD_DStream const* zds, ZSTD_outBuffer const* output)
+{
+    ZSTD_outBuffer const expect = zds->expectedOutBuffer;
+    /* No requirement when ZSTD_obm_stable is not enabled. */
+    if (zds->outBufferMode != ZSTD_bm_stable)
+        return 0;
+    /* Any buffer is allowed in zdss_init, this must be the same for every other call until
+     * the context is reset.
+     */
+    if (zds->streamStage == zdss_init)
+        return 0;
+    /* The buffer must match our expectation exactly. */
+    if (expect.dst == output->dst && expect.pos == output->pos && expect.size == output->size)
+        return 0;
+    RETURN_ERROR(dstBuffer_wrong, "ZSTD_d_stableOutBuffer enabled but output differs!");
 }
 
+/* Calls ZSTD_decompressContinue() with the right parameters for ZSTD_decompressStream()
+ * and updates the stage and the output buffer state. This call is extracted so it can be
+ * used both when reading directly from the ZSTD_inBuffer, and in buffered input mode.
+ * NOTE: You must break after calling this function since the streamStage is modified.
+ */
+static size_t ZSTD_decompressContinueStream(
+            ZSTD_DStream* zds, char** op, char* oend,
+            void const* src, size_t srcSize) {
+    int const isSkipFrame = ZSTD_isSkipFrame(zds);
+    if (zds->outBufferMode == ZSTD_bm_buffered) {
+        size_t const dstSize = isSkipFrame ? 0 : zds->outBuffSize - zds->outStart;
+        size_t const decodedSize = ZSTD_decompressContinue(zds,
+                zds->outBuff + zds->outStart, dstSize, src, srcSize);
+        FORWARD_IF_ERROR(decodedSize, "");
+        if (!decodedSize && !isSkipFrame) {
+            zds->streamStage = zdss_read;
+        } else {
+            zds->outEnd = zds->outStart + decodedSize;
+            zds->streamStage = zdss_flush;
+        }
+    } else {
+        /* Write directly into the output buffer */
+        size_t const dstSize = isSkipFrame ? 0 : (size_t)(oend - *op);
+        size_t const decodedSize = ZSTD_decompressContinue(zds, *op, dstSize, src, srcSize);
+        FORWARD_IF_ERROR(decodedSize, "");
+        *op += decodedSize;
+        /* Flushing is not needed. */
+        zds->streamStage = zdss_read;
+        assert(*op <= oend);
+        assert(zds->outBufferMode == ZSTD_bm_stable);
+    }
+    return 0;
+}
 
 size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
 {
@@ -10195,6 +13313,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
         "forbidden. out: pos: %u   vs size: %u",
         (U32)output->pos, (U32)output->size);
     DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos));
+    FORWARD_IF_ERROR(ZSTD_checkOutBuffer(zds, output), "");
 
     while (someMoreWork) {
         switch(zds->streamStage)
@@ -10205,6 +13324,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
             zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
             zds->legacyVersion = 0;
             zds->hostageByte = 0;
+            zds->expectedOutBuffer = *output;
             /* fall-through */
 
         case zdss_loadHeader :
@@ -10219,6 +13339,9 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
             }   }
 #endif
             {   size_t const hSize = ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format);
+                if (zds->refMultipleDDicts && zds->ddictSet) {
+                    ZSTD_DCtx_selectFrameDDict(zds);
+                }
                 DEBUGLOG(5, "header size : %u", (U32)hSize);
                 if (ZSTD_isError(hSize)) {
 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
@@ -10232,7 +13355,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
                             "legacy support is incompatible with static dctx");
                         FORWARD_IF_ERROR(ZSTD_initLegacyStream(&zds->legacyContext,
                                     zds->previousLegacyVersion, legacyVersion,
-                                    dict, dictSize));
+                                    dict, dictSize), "");
                         zds->legacyVersion = zds->previousLegacyVersion = legacyVersion;
                         {   size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, legacyVersion, output, input);
                             if (hint==0) zds->streamStage = zdss_init;   /* or stay in stage zdss_loadHeader */
@@ -10247,24 +13370,25 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
                     assert(iend >= ip);
                     if (toLoad > remainingInput) {   /* not enough input to load full header */
                         if (remainingInput > 0) {
-                            memcpy(zds->headerBuffer + zds->lhSize, ip, remainingInput);
+                            ZSTD_memcpy(zds->headerBuffer + zds->lhSize, ip, remainingInput);
                             zds->lhSize += remainingInput;
                         }
                         input->pos = input->size;
                         return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize;   /* remaining header bytes + next block header */
                     }
                     assert(ip != NULL);
-                    memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad;
+                    ZSTD_memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad;
                     break;
             }   }
 
             /* check for single-pass mode opportunity */
-            if (zds->fParams.frameContentSize && zds->fParams.windowSize /* skippable frame if == 0 */
+            if (zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
+                && zds->fParams.frameType != ZSTD_skippableFrame
                 && (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) {
-                size_t const cSize = ZSTD_findFrameCompressedSize(istart, iend-istart);
+                size_t const cSize = ZSTD_findFrameCompressedSize(istart, (size_t)(iend-istart));
                 if (cSize <= (size_t)(iend-istart)) {
                     /* shortcut : using single-pass mode */
-                    size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, oend-op, istart, cSize, ZSTD_getDDict(zds));
+                    size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, (size_t)(oend-op), istart, cSize, ZSTD_getDDict(zds));
                     if (ZSTD_isError(decompressedSize)) return decompressedSize;
                     DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()")
                     ip = istart + cSize;
@@ -10275,15 +13399,23 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
                     break;
             }   }
 
+            /* Check output buffer is large enough for ZSTD_odm_stable. */
+            if (zds->outBufferMode == ZSTD_bm_stable
+                && zds->fParams.frameType != ZSTD_skippableFrame
+                && zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
+                && (U64)(size_t)(oend-op) < zds->fParams.frameContentSize) {
+                RETURN_ERROR(dstSize_tooSmall, "ZSTD_obm_stable passed but ZSTD_outBuffer is too small");
+            }
+
             /* Consume header (see ZSTDds_decodeFrameHeader) */
             DEBUGLOG(4, "Consume header");
-            FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)));
+            FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)), "");
 
             if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {  /* skippable frame */
                 zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE);
                 zds->stage = ZSTDds_skipFrame;
             } else {
-                FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize));
+                FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize), "");
                 zds->expected = ZSTD_blockHeaderSize;
                 zds->stage = ZSTDds_decodeBlockHeader;
             }
@@ -10294,40 +13426,48 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
                         (U32)(zds->maxWindowSize >> 10) );
             zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN);
             RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize,
-                            frameParameter_windowTooLarge);
+                            frameParameter_windowTooLarge, "");
 
             /* Adapt buffer sizes to frame header instructions */
             {   size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */);
-                size_t const neededOutBuffSize = ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize);
-                if ((zds->inBuffSize < neededInBuffSize) || (zds->outBuffSize < neededOutBuffSize)) {
-                    size_t const bufferSize = neededInBuffSize + neededOutBuffSize;
-                    DEBUGLOG(4, "inBuff  : from %u to %u",
-                                (U32)zds->inBuffSize, (U32)neededInBuffSize);
-                    DEBUGLOG(4, "outBuff : from %u to %u",
-                                (U32)zds->outBuffSize, (U32)neededOutBuffSize);
-                    if (zds->staticSize) {  /* static DCtx */
-                        DEBUGLOG(4, "staticSize : %u", (U32)zds->staticSize);
-                        assert(zds->staticSize >= sizeof(ZSTD_DCtx));  /* controlled at init */
-                        RETURN_ERROR_IF(
-                            bufferSize > zds->staticSize - sizeof(ZSTD_DCtx),
-                            memory_allocation);
-                    } else {
-                        ZSTD_free(zds->inBuff, zds->customMem);
-                        zds->inBuffSize = 0;
-                        zds->outBuffSize = 0;
-                        zds->inBuff = (char*)ZSTD_malloc(bufferSize, zds->customMem);
-                        RETURN_ERROR_IF(zds->inBuff == NULL, memory_allocation);
-                    }
-                    zds->inBuffSize = neededInBuffSize;
-                    zds->outBuff = zds->inBuff + zds->inBuffSize;
-                    zds->outBuffSize = neededOutBuffSize;
-            }   }
+                size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_bm_buffered
+                        ? ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize)
+                        : 0;
+
+                ZSTD_DCtx_updateOversizedDuration(zds, neededInBuffSize, neededOutBuffSize);
+
+                {   int const tooSmall = (zds->inBuffSize < neededInBuffSize) || (zds->outBuffSize < neededOutBuffSize);
+                    int const tooLarge = ZSTD_DCtx_isOversizedTooLong(zds);
+
+                    if (tooSmall || tooLarge) {
+                        size_t const bufferSize = neededInBuffSize + neededOutBuffSize;
+                        DEBUGLOG(4, "inBuff  : from %u to %u",
+                                    (U32)zds->inBuffSize, (U32)neededInBuffSize);
+                        DEBUGLOG(4, "outBuff : from %u to %u",
+                                    (U32)zds->outBuffSize, (U32)neededOutBuffSize);
+                        if (zds->staticSize) {  /* static DCtx */
+                            DEBUGLOG(4, "staticSize : %u", (U32)zds->staticSize);
+                            assert(zds->staticSize >= sizeof(ZSTD_DCtx));  /* controlled at init */
+                            RETURN_ERROR_IF(
+                                bufferSize > zds->staticSize - sizeof(ZSTD_DCtx),
+                                memory_allocation, "");
+                        } else {
+                            ZSTD_customFree(zds->inBuff, zds->customMem);
+                            zds->inBuffSize = 0;
+                            zds->outBuffSize = 0;
+                            zds->inBuff = (char*)ZSTD_customMalloc(bufferSize, zds->customMem);
+                            RETURN_ERROR_IF(zds->inBuff == NULL, memory_allocation, "");
+                        }
+                        zds->inBuffSize = neededInBuffSize;
+                        zds->outBuff = zds->inBuff + zds->inBuffSize;
+                        zds->outBuffSize = neededOutBuffSize;
+            }   }   }
             zds->streamStage = zdss_read;
             /* fall-through */
 
         case zdss_read:
             DEBUGLOG(5, "stage zdss_read");
-            {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompressWithInputSize(zds, iend - ip);
+            {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompressWithInputSize(zds, (size_t)(iend - ip));
                 DEBUGLOG(5, "neededInSize = %u", (U32)neededInSize);
                 if (neededInSize==0) {  /* end of frame */
                     zds->streamStage = zdss_init;
@@ -10335,15 +13475,9 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
                     break;
                 }
                 if ((size_t)(iend-ip) >= neededInSize) {  /* decode directly from src */
-                    int const isSkipFrame = ZSTD_isSkipFrame(zds);
-                    size_t const decodedSize = ZSTD_decompressContinue(zds,
-                        zds->outBuff + zds->outStart, (isSkipFrame ? 0 : zds->outBuffSize - zds->outStart),
-                        ip, neededInSize);
-                    if (ZSTD_isError(decodedSize)) return decodedSize;
+                    FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, ip, neededInSize), "");
                     ip += neededInSize;
-                    if (!decodedSize && !isSkipFrame) break;   /* this was just a header */
-                    zds->outEnd = zds->outStart + decodedSize;
-                    zds->streamStage = zdss_flush;
+                    /* Function modifies the stage so we must break */
                     break;
             }   }
             if (ip==iend) { someMoreWork = 0; break; }   /* no more input */
@@ -10363,27 +13497,21 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
                     RETURN_ERROR_IF(toLoad > zds->inBuffSize - zds->inPos,
                                     corruption_detected,
                                     "should never happen");
-                    loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, iend-ip);
+                    loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, (size_t)(iend-ip));
                 }
                 ip += loadedSize;
                 zds->inPos += loadedSize;
                 if (loadedSize < toLoad) { someMoreWork = 0; break; }   /* not enough input, wait for more */
 
                 /* decode loaded input */
-                {   size_t const decodedSize = ZSTD_decompressContinue(zds,
-                        zds->outBuff + zds->outStart, zds->outBuffSize - zds->outStart,
-                        zds->inBuff, neededInSize);
-                    if (ZSTD_isError(decodedSize)) return decodedSize;
-                    zds->inPos = 0;   /* input is consumed */
-                    if (!decodedSize && !isSkipFrame) { zds->streamStage = zdss_read; break; }   /* this was just a header */
-                    zds->outEnd = zds->outStart +  decodedSize;
-            }   }
-            zds->streamStage = zdss_flush;
-            /* fall-through */
-
+                zds->inPos = 0;   /* input is consumed */
+                FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, zds->inBuff, neededInSize), "");
+                /* Function modifies the stage so we must break */
+                break;
+            }
         case zdss_flush:
             {   size_t const toFlushSize = zds->outEnd - zds->outStart;
-                size_t const flushedSize = ZSTD_limitCopy(op, oend-op, zds->outBuff + zds->outStart, toFlushSize);
+                size_t const flushedSize = ZSTD_limitCopy(op, (size_t)(oend-op), zds->outBuff + zds->outStart, toFlushSize);
                 op += flushedSize;
                 zds->outStart += flushedSize;
                 if (flushedSize == toFlushSize) {  /* flush completed */
@@ -10403,17 +13531,21 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
 
         default:
             assert(0);    /* impossible */
-            RETURN_ERROR(GENERIC);   /* some compiler require default to do something */
+            RETURN_ERROR(GENERIC, "impossible to reach");   /* some compiler require default to do something */
     }   }
 
     /* result */
     input->pos = (size_t)(ip - (const char*)(input->src));
     output->pos = (size_t)(op - (char*)(output->dst));
+
+    /* Update the expected output buffer for ZSTD_obm_stable. */
+    zds->expectedOutBuffer = *output;
+
     if ((ip==istart) && (op==ostart)) {  /* no forward progress */
         zds->noForwardProgress ++;
         if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) {
-            RETURN_ERROR_IF(op==oend, dstSize_tooSmall);
-            RETURN_ERROR_IF(ip==iend, srcSize_wrong);
+            RETURN_ERROR_IF(op==oend, dstSize_tooSmall, "");
+            RETURN_ERROR_IF(ip==iend, srcSize_wrong, "");
             assert(0);
         }
     } else {
@@ -10458,10 +13590,10 @@ size_t ZSTD_decompressStream_simpleArgs (
     *srcPos = input.pos;
     return cErr;
 }
-/**** ended inlining zstd_decompress.c ****/
-/**** start inlining zstd_decompress_block.c ****/
+/**** ended inlining decompress/zstd_decompress.c ****/
+/**** start inlining decompress/zstd_decompress_block.c ****/
 /*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -10476,15 +13608,15 @@ size_t ZSTD_decompressStream_simpleArgs (
 /*-*******************************************************
 *  Dependencies
 *********************************************************/
-#include <string.h>      /* memcpy, memmove, memset */
-/**** skipping file: compiler.h ****/
-/**** skipping file: cpu.h ****/
-/**** skipping file: mem.h ****/
+/**** skipping file: ../common/zstd_deps.h ****/
+/**** skipping file: ../common/compiler.h ****/
+/**** skipping file: ../common/cpu.h ****/
+/**** skipping file: ../common/mem.h ****/
 #define FSE_STATIC_LINKING_ONLY
-/**** skipping file: fse.h ****/
+/**** skipping file: ../common/fse.h ****/
 #define HUF_STATIC_LINKING_ONLY
-/**** skipping file: huf.h ****/
-/**** skipping file: zstd_internal.h ****/
+/**** skipping file: ../common/huf.h ****/
+/**** skipping file: ../common/zstd_internal.h ****/
 /**** skipping file: zstd_decompress_internal.h ****/
 /**** skipping file: zstd_ddict.h ****/
 /**** skipping file: zstd_decompress_block.h ****/
@@ -10506,7 +13638,7 @@ size_t ZSTD_decompressStream_simpleArgs (
 /*_*******************************************************
 *  Memory operations
 **********************************************************/
-static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
+static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); }
 
 
 /*-*************************************************************
@@ -10518,7 +13650,7 @@ static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
 size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
                           blockProperties_t* bpPtr)
 {
-    RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong);
+    RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, "");
 
     {   U32 const cBlockHeader = MEM_readLE24(src);
         U32 const cSize = cBlockHeader >> 3;
@@ -10526,7 +13658,7 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
         bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
         bpPtr->origSize = cSize;   /* only useful for RLE */
         if (bpPtr->blockType == bt_rle) return 1;
-        RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected);
+        RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, "");
         return cSize;
     }
 }
@@ -10542,7 +13674,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
                           const void* src, size_t srcSize)   /* note : srcSize < BLOCKSIZE */
 {
     DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
-    RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected);
+    RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
 
     {   const BYTE* const istart = (const BYTE*) src;
         symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
@@ -10551,7 +13683,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
         {
         case set_repeat:
             DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
-            RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted);
+            RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, "");
             /* fall-through */
 
         case set_compressed:
@@ -10583,8 +13715,8 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
                     litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
                     break;
                 }
-                RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected);
-                RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected);
+                RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
+                RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
 
                 /* prefetch huffman table if cold */
                 if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
@@ -10622,13 +13754,13 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
                     }
                 }
 
-                RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected);
+                RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
 
                 dctx->litPtr = dctx->litBuffer;
                 dctx->litSize = litSize;
                 dctx->litEntropy = 1;
                 if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
-                memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
+                ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
                 return litCSize + lhSize;
             }
 
@@ -10652,11 +13784,11 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
                 }
 
                 if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) {  /* risk reading beyond src buffer with wildcopy */
-                    RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected);
-                    memcpy(dctx->litBuffer, istart+lhSize, litSize);
+                    RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
+                    ZSTD_memcpy(dctx->litBuffer, istart+lhSize, litSize);
                     dctx->litPtr = dctx->litBuffer;
                     dctx->litSize = litSize;
-                    memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
+                    ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
                     return lhSize+litSize;
                 }
                 /* direct reference into compressed stream */
@@ -10684,8 +13816,8 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
                     RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
                     break;
                 }
-                RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected);
-                memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
+                RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
+                ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
                 dctx->litPtr = dctx->litBuffer;
                 dctx->litSize = litSize;
                 return lhSize+1;
@@ -10698,7 +13830,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
 
 /* Default FSE distribution tables.
  * These are pre-calculated FSE decoding tables using default distributions as defined in specification :
- * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#default-distributions
+ * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
  * They were generated programmatically with following method :
  * - start from default distributions, present in /lib/common/zstd_internal.h
  * - generate tables normally, using ZSTD_buildFSETable()
@@ -10826,23 +13958,26 @@ static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddB
  * generate FSE decoding table for one symbol (ll, ml or off)
  * cannot fail if input is valid =>
  * all inputs are presumed validated at this stage */
-void
-ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
+FORCE_INLINE_TEMPLATE
+void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
             const short* normalizedCounter, unsigned maxSymbolValue,
             const U32* baseValue, const U32* nbAdditionalBits,
-            unsigned tableLog)
+            unsigned tableLog, void* wksp, size_t wkspSize)
 {
     ZSTD_seqSymbol* const tableDecode = dt+1;
-    U16 symbolNext[MaxSeq+1];
-
     U32 const maxSV1 = maxSymbolValue + 1;
     U32 const tableSize = 1 << tableLog;
-    U32 highThreshold = tableSize-1;
+
+    U16* symbolNext = (U16*)wksp;
+    BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1);
+    U32 highThreshold = tableSize - 1;
+
 
     /* Sanity Checks */
     assert(maxSymbolValue <= MaxSeq);
     assert(tableLog <= MaxFSELog);
-
+    assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE);
+    (void)wkspSize;
     /* Init, lay down lowprob symbols */
     {   ZSTD_seqSymbol_header DTableH;
         DTableH.tableLog = tableLog;
@@ -10858,16 +13993,69 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
                     assert(normalizedCounter[s]>=0);
                     symbolNext[s] = (U16)normalizedCounter[s];
         }   }   }
-        memcpy(dt, &DTableH, sizeof(DTableH));
+        ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
     }
 
     /* Spread symbols */
-    {   U32 const tableMask = tableSize-1;
+    assert(tableSize <= 512);
+    /* Specialized symbol spreading for the case when there are
+     * no low probability (-1 count) symbols. When compressing
+     * small blocks we avoid low probability symbols to hit this
+     * case, since header decoding speed matters more.
+     */
+    if (highThreshold == tableSize - 1) {
+        size_t const tableMask = tableSize-1;
+        size_t const step = FSE_TABLESTEP(tableSize);
+        /* First lay down the symbols in order.
+         * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
+         * misses since small blocks generally have small table logs, so nearly
+         * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
+         * our buffer to handle the over-write.
+         */
+        {
+            U64 const add = 0x0101010101010101ull;
+            size_t pos = 0;
+            U64 sv = 0;
+            U32 s;
+            for (s=0; s<maxSV1; ++s, sv += add) {
+                int i;
+                int const n = normalizedCounter[s];
+                MEM_write64(spread + pos, sv);
+                for (i = 8; i < n; i += 8) {
+                    MEM_write64(spread + pos + i, sv);
+                }
+                pos += n;
+            }
+        }
+        /* Now we spread those positions across the table.
+         * The benefit of doing it in two stages is that we avoid the the
+         * variable size inner loop, which caused lots of branch misses.
+         * Now we can run through all the positions without any branch misses.
+         * We unroll the loop twice, since that is what emperically worked best.
+         */
+        {
+            size_t position = 0;
+            size_t s;
+            size_t const unroll = 2;
+            assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
+            for (s = 0; s < (size_t)tableSize; s += unroll) {
+                size_t u;
+                for (u = 0; u < unroll; ++u) {
+                    size_t const uPosition = (position + (u * step)) & tableMask;
+                    tableDecode[uPosition].baseValue = spread[s + u];
+                }
+                position = (position + (unroll * step)) & tableMask;
+            }
+            assert(position == 0);
+        }
+    } else {
+        U32 const tableMask = tableSize-1;
         U32 const step = FSE_TABLESTEP(tableSize);
         U32 s, position = 0;
         for (s=0; s<maxSV1; s++) {
             int i;
-            for (i=0; i<normalizedCounter[s]; i++) {
+            int const n = normalizedCounter[s];
+            for (i=0; i<n; i++) {
                 tableDecode[position].baseValue = s;
                 position = (position + step) & tableMask;
                 while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
@@ -10876,7 +14064,8 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
     }
 
     /* Build Decoding table */
-    {   U32 u;
+    {
+        U32 u;
         for (u=0; u<tableSize; u++) {
             U32 const symbol = tableDecode[u].baseValue;
             U32 const nextState = symbolNext[symbol]++;
@@ -10885,7 +14074,46 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
             assert(nbAdditionalBits[symbol] < 255);
             tableDecode[u].nbAdditionalBits = (BYTE)nbAdditionalBits[symbol];
             tableDecode[u].baseValue = baseValue[symbol];
-    }   }
+        }
+    }
+}
+
+/* Avoids the FORCE_INLINE of the _body() function. */
+static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
+            const short* normalizedCounter, unsigned maxSymbolValue,
+            const U32* baseValue, const U32* nbAdditionalBits,
+            unsigned tableLog, void* wksp, size_t wkspSize)
+{
+    ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
+            baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
+}
+
+#if DYNAMIC_BMI2
+TARGET_ATTRIBUTE("bmi2") static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
+            const short* normalizedCounter, unsigned maxSymbolValue,
+            const U32* baseValue, const U32* nbAdditionalBits,
+            unsigned tableLog, void* wksp, size_t wkspSize)
+{
+    ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
+            baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
+}
+#endif
+
+void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
+            const short* normalizedCounter, unsigned maxSymbolValue,
+            const U32* baseValue, const U32* nbAdditionalBits,
+            unsigned tableLog, void* wksp, size_t wkspSize, int bmi2)
+{
+#if DYNAMIC_BMI2
+    if (bmi2) {
+        ZSTD_buildFSETable_body_bmi2(dt, normalizedCounter, maxSymbolValue,
+                baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
+        return;
+    }
+#endif
+    (void)bmi2;
+    ZSTD_buildFSETable_body_default(dt, normalizedCounter, maxSymbolValue,
+            baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
 }
 
 
@@ -10897,13 +14125,14 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
                                  const void* src, size_t srcSize,
                                  const U32* baseValue, const U32* nbAdditionalBits,
                                  const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
-                                 int ddictIsCold, int nbSeq)
+                                 int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize,
+                                 int bmi2)
 {
     switch(type)
     {
     case set_rle :
-        RETURN_ERROR_IF(!srcSize, srcSize_wrong);
-        RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected);
+        RETURN_ERROR_IF(!srcSize, srcSize_wrong, "");
+        RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, "");
         {   U32 const symbol = *(const BYTE*)src;
             U32 const baseline = baseValue[symbol];
             U32 const nbBits = nbAdditionalBits[symbol];
@@ -10915,7 +14144,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
         *DTablePtr = defaultTable;
         return 0;
     case set_repeat:
-        RETURN_ERROR_IF(!flagRepeatTable, corruption_detected);
+        RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, "");
         /* prefetch FSE table if used */
         if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
             const void* const pStart = *DTablePtr;
@@ -10927,9 +14156,9 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
         {   unsigned tableLog;
             S16 norm[MaxSeq+1];
             size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
-            RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected);
-            RETURN_ERROR_IF(tableLog > maxLog, corruption_detected);
-            ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog);
+            RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
+            RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
+            ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2);
             *DTablePtr = DTableSpace;
             return headerSize;
         }
@@ -10942,35 +14171,36 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
 size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
                              const void* src, size_t srcSize)
 {
-    const BYTE* const istart = (const BYTE* const)src;
+    const BYTE* const istart = (const BYTE*)src;
     const BYTE* const iend = istart + srcSize;
     const BYTE* ip = istart;
     int nbSeq;
     DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
 
     /* check */
-    RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong);
+    RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, "");
 
     /* SeqHead */
     nbSeq = *ip++;
     if (!nbSeq) {
         *nbSeqPtr=0;
-        RETURN_ERROR_IF(srcSize != 1, srcSize_wrong);
+        RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, "");
         return 1;
     }
     if (nbSeq > 0x7F) {
         if (nbSeq == 0xFF) {
-            RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong);
-            nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
+            RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
+            nbSeq = MEM_readLE16(ip) + LONGNBSEQ;
+            ip+=2;
         } else {
-            RETURN_ERROR_IF(ip >= iend, srcSize_wrong);
+            RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
             nbSeq = ((nbSeq-0x80)<<8) + *ip++;
         }
     }
     *nbSeqPtr = nbSeq;
 
     /* FSE table descriptors */
-    RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong); /* minimum possible size: 1 byte for symbol encoding types */
+    RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
     {   symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
         symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
         symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
@@ -10982,8 +14212,10 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
                                                       ip, iend-ip,
                                                       LL_base, LL_bits,
                                                       LL_defaultDTable, dctx->fseEntropy,
-                                                      dctx->ddictIsCold, nbSeq);
-            RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected);
+                                                      dctx->ddictIsCold, nbSeq,
+                                                      dctx->workspace, sizeof(dctx->workspace),
+                                                      dctx->bmi2);
+            RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
             ip += llhSize;
         }
 
@@ -10992,8 +14224,10 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
                                                       ip, iend-ip,
                                                       OF_base, OF_bits,
                                                       OF_defaultDTable, dctx->fseEntropy,
-                                                      dctx->ddictIsCold, nbSeq);
-            RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected);
+                                                      dctx->ddictIsCold, nbSeq,
+                                                      dctx->workspace, sizeof(dctx->workspace),
+                                                      dctx->bmi2);
+            RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
             ip += ofhSize;
         }
 
@@ -11002,8 +14236,10 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
                                                       ip, iend-ip,
                                                       ML_base, ML_bits,
                                                       ML_defaultDTable, dctx->fseEntropy,
-                                                      dctx->ddictIsCold, nbSeq);
-            RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected);
+                                                      dctx->ddictIsCold, nbSeq,
+                                                      dctx->workspace, sizeof(dctx->workspace),
+                                                      dctx->bmi2);
+            RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
             ip += mlhSize;
         }
     }
@@ -11016,7 +14252,6 @@ typedef struct {
     size_t litLength;
     size_t matchLength;
     size_t offset;
-    const BYTE* match;
 } seq_t;
 
 typedef struct {
@@ -11030,9 +14265,6 @@ typedef struct {
     ZSTD_fseState stateOffb;
     ZSTD_fseState stateML;
     size_t prevOffset[ZSTD_REP_NUM];
-    const BYTE* prefixStart;
-    const BYTE* dictEnd;
-    size_t pos;
 } seqState_t;
 
 /*! ZSTD_overlapCopy8() :
@@ -11127,15 +14359,15 @@ size_t ZSTD_execSequenceEnd(BYTE* op,
 {
     BYTE* const oLitEnd = op + sequence.litLength;
     size_t const sequenceLength = sequence.litLength + sequence.matchLength;
-    BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
     const BYTE* const iLitEnd = *litPtr + sequence.litLength;
     const BYTE* match = oLitEnd - sequence.offset;
     BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
 
-    /* bounds checks */
-    assert(oLitEnd < oMatchEnd);
-    RETURN_ERROR_IF(oMatchEnd > oend, dstSize_tooSmall, "last match must fit within dstBuffer");
-    RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "try to read beyond literal buffer");
+    /* bounds checks : careful of address space overflow in 32-bit mode */
+    RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
+    RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
+    assert(op < op + sequenceLength);
+    assert(oLitEnd < op + sequenceLength);
 
     /* copy literals */
     ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
@@ -11145,15 +14377,15 @@ size_t ZSTD_execSequenceEnd(BYTE* op,
     /* copy Match */
     if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
         /* offset beyond prefix */
-        RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected);
+        RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
         match = dictEnd - (prefixStart-match);
         if (match + sequence.matchLength <= dictEnd) {
-            memmove(oLitEnd, match, sequence.matchLength);
+            ZSTD_memmove(oLitEnd, match, sequence.matchLength);
             return sequenceLength;
         }
         /* span extDict & currentPrefixSegment */
         {   size_t const length1 = dictEnd - match;
-            memmove(oLitEnd, match, length1);
+            ZSTD_memmove(oLitEnd, match, length1);
             op = oLitEnd + length1;
             sequence.matchLength -= length1;
             match = prefixStart;
@@ -11171,16 +14403,27 @@ size_t ZSTD_execSequence(BYTE* op,
     BYTE* const oLitEnd = op + sequence.litLength;
     size_t const sequenceLength = sequence.litLength + sequence.matchLength;
     BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
-    BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
+    BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;   /* risk : address space underflow on oend=NULL */
     const BYTE* const iLitEnd = *litPtr + sequence.litLength;
     const BYTE* match = oLitEnd - sequence.offset;
 
-    /* Errors and uncommon cases handled here. */
-    assert(oLitEnd < oMatchEnd);
-    if (UNLIKELY(iLitEnd > litLimit || oMatchEnd > oend_w))
+    assert(op != NULL /* Precondition */);
+    assert(oend_w < oend /* No underflow */);
+    /* Handle edge cases in a slow path:
+     *   - Read beyond end of literals
+     *   - Match end is within WILDCOPY_OVERLIMIT of oend
+     *   - 32-bit mode and the match length overflows
+     */
+    if (UNLIKELY(
+            iLitEnd > litLimit ||
+            oMatchEnd > oend_w ||
+            (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
         return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
 
     /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
+    assert(op <= oLitEnd /* No overflow */);
+    assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
+    assert(oMatchEnd <= oend /* No underflow */);
     assert(iLitEnd <= litLimit /* Literal length is in bounds */);
     assert(oLitEnd <= oend_w /* Can wildcopy literals */);
     assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
@@ -11200,15 +14443,15 @@ size_t ZSTD_execSequence(BYTE* op,
     /* Copy Match */
     if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
         /* offset beyond prefix -> go into extDict */
-        RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected);
+        RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
         match = dictEnd + (match - prefixStart);
         if (match + sequence.matchLength <= dictEnd) {
-            memmove(oLitEnd, match, sequence.matchLength);
+            ZSTD_memmove(oLitEnd, match, sequence.matchLength);
             return sequenceLength;
         }
         /* span extDict & currentPrefixSegment */
         {   size_t const length1 = dictEnd - match;
-            memmove(oLitEnd, match, length1);
+            ZSTD_memmove(oLitEnd, match, length1);
             op = oLitEnd + length1;
             sequence.matchLength -= length1;
             match = prefixStart;
@@ -11283,10 +14526,9 @@ ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD
         : 0)
 
 typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
-typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e;
 
 FORCE_INLINE_TEMPLATE seq_t
-ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const ZSTD_prefetch_e prefetch)
+ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
 {
     seq_t seq;
     ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state];
@@ -11361,14 +14603,6 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, c
     DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
                 (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
 
-    if (prefetch == ZSTD_p_prefetch) {
-        size_t const pos = seqState->pos + seq.litLength;
-        const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
-        seq.match = matchBase + pos - seq.offset;  /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
-                                                    * No consequence though : no memory access will occur, offset is only used for prefetching */
-        seqState->pos = pos + seq.matchLength;
-    }
-
     /* ANS state update
      * gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo().
      * clang-9.2.0 does 7% worse with ZSTD_updateFseState().
@@ -11398,17 +14632,64 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, c
     return seq;
 }
 
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
+{
+    size_t const windowSize = dctx->fParams.windowSize;
+    /* No dictionary used. */
+    if (dctx->dictContentEndForFuzzing == NULL) return 0;
+    /* Dictionary is our prefix. */
+    if (prefixStart == dctx->dictContentBeginForFuzzing) return 1;
+    /* Dictionary is not our ext-dict. */
+    if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0;
+    /* Dictionary is not within our window size. */
+    if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0;
+    /* Dictionary is active. */
+    return 1;
+}
+
+MEM_STATIC void ZSTD_assertValidSequence(
+        ZSTD_DCtx const* dctx,
+        BYTE const* op, BYTE const* oend,
+        seq_t const seq,
+        BYTE const* prefixStart, BYTE const* virtualStart)
+{
+#if DEBUGLEVEL >= 1
+    size_t const windowSize = dctx->fParams.windowSize;
+    size_t const sequenceSize = seq.litLength + seq.matchLength;
+    BYTE const* const oLitEnd = op + seq.litLength;
+    DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
+            (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
+    assert(op <= oend);
+    assert((size_t)(oend - op) >= sequenceSize);
+    assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX);
+    if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
+        size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
+        /* Offset must be within the dictionary. */
+        assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
+        assert(seq.offset <= windowSize + dictSize);
+    } else {
+        /* Offset must be within our window. */
+        assert(seq.offset <= windowSize);
+    }
+#else
+    (void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart;
+#endif
+}
+#endif
+
 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
 FORCE_INLINE_TEMPLATE size_t
 DONT_VECTORIZE
 ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
                                void* dst, size_t maxDstSize,
                          const void* seqStart, size_t seqSize, int nbSeq,
-                         const ZSTD_longOffset_e isLongOffset)
+                         const ZSTD_longOffset_e isLongOffset,
+                         const int frame)
 {
     const BYTE* ip = (const BYTE*)seqStart;
     const BYTE* const iend = ip + seqSize;
-    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* const ostart = (BYTE*)dst;
     BYTE* const oend = ostart + maxDstSize;
     BYTE* op = ostart;
     const BYTE* litPtr = dctx->litPtr;
@@ -11417,19 +14698,20 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
     const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
     const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
     DEBUGLOG(5, "ZSTD_decompressSequences_body");
+    (void)frame;
 
     /* Regen sequences */
     if (nbSeq) {
         seqState_t seqState;
-        size_t error = 0;
         dctx->fseEntropy = 1;
         { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
         RETURN_ERROR_IF(
             ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
-            corruption_detected);
+            corruption_detected, "");
         ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
         ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
         ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
+        assert(dst != NULL);
 
         ZSTD_STATIC_ASSERT(
                 BIT_DStream_unfinished < BIT_DStream_completed &&
@@ -11454,13 +14736,14 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
          * If you see most cycles served out of the DSB you've hit the good case.
          * If it is pretty even then you may be in an okay case.
          *
-         * I've been able to reproduce this issue on the following CPUs:
+         * This issue has been reproduced on the following CPUs:
          *   - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
          *               Use Instruments->Counters to get DSB/MITE cycles.
          *               I never got performance swings, but I was able to
          *               go from the good case of mostly DSB to half of the
          *               cycles served from MITE.
          *   - Coffeelake: Intel i9-9900k
+         *   - Coffeelake: Intel i7-9700k
          *
          * I haven't been able to reproduce the instability or DSB misses on any
          * of the following CPUS:
@@ -11473,40 +14756,48 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
          *
          *   https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
          */
+        __asm__(".p2align 6");
+        __asm__("nop");
         __asm__(".p2align 5");
         __asm__("nop");
+#  if __GNUC__ >= 9
+        /* better for gcc-9 and gcc-10, worse for clang and gcc-8 */
+        __asm__(".p2align 3");
+#  else
         __asm__(".p2align 4");
+#  endif
 #endif
         for ( ; ; ) {
-            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_noPrefetch);
+            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
             size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
+#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+            assert(!ZSTD_isError(oneSeqSize));
+            if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
+#endif
+            if (UNLIKELY(ZSTD_isError(oneSeqSize)))
+                return oneSeqSize;
             DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
+            op += oneSeqSize;
+            if (UNLIKELY(!--nbSeq))
+                break;
             BIT_reloadDStream(&(seqState.DStream));
-            /* gcc and clang both don't like early returns in this loop.
-             * gcc doesn't like early breaks either.
-             * Instead save an error and report it at the end.
-             * When there is an error, don't increment op, so we don't
-             * overwrite.
-             */
-            if (UNLIKELY(ZSTD_isError(oneSeqSize))) error = oneSeqSize;
-            else op += oneSeqSize;
-            if (UNLIKELY(!--nbSeq)) break;
         }
 
         /* check if reached exact end */
         DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
-        if (ZSTD_isError(error)) return error;
-        RETURN_ERROR_IF(nbSeq, corruption_detected);
-        RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected);
+        RETURN_ERROR_IF(nbSeq, corruption_detected, "");
+        RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
         /* save reps for next block */
         { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
     }
 
     /* last literal segment */
     {   size_t const lastLLSize = litEnd - litPtr;
-        RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall);
-        memcpy(op, litPtr, lastLLSize);
-        op += lastLLSize;
+        RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
+        if (op != NULL) {
+            ZSTD_memcpy(op, litPtr, lastLLSize);
+            op += lastLLSize;
+        }
     }
 
     return op-ostart;
@@ -11516,23 +14807,43 @@ static size_t
 ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
                                  void* dst, size_t maxDstSize,
                            const void* seqStart, size_t seqSize, int nbSeq,
-                           const ZSTD_longOffset_e isLongOffset)
+                           const ZSTD_longOffset_e isLongOffset,
+                           const int frame)
 {
-    return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
+    return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
 }
 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
 
 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
+
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
+                   const BYTE* const prefixStart, const BYTE* const dictEnd)
+{
+    prefetchPos += sequence.litLength;
+    {   const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart;
+        const BYTE* const match = matchBase + prefetchPos - sequence.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
+                                                                              * No consequence though : memory address is only used for prefetching, not for dereferencing */
+        PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE);   /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
+    }
+    return prefetchPos + sequence.matchLength;
+}
+
+/* This decoding function employs prefetching
+ * to reduce latency impact of cache misses.
+ * It's generally employed when block contains a significant portion of long-distance matches
+ * or when coupled with a "cold" dictionary */
 FORCE_INLINE_TEMPLATE size_t
 ZSTD_decompressSequencesLong_body(
                                ZSTD_DCtx* dctx,
                                void* dst, size_t maxDstSize,
                          const void* seqStart, size_t seqSize, int nbSeq,
-                         const ZSTD_longOffset_e isLongOffset)
+                         const ZSTD_longOffset_e isLongOffset,
+                         const int frame)
 {
     const BYTE* ip = (const BYTE*)seqStart;
     const BYTE* const iend = ip + seqSize;
-    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* const ostart = (BYTE*)dst;
     BYTE* const oend = ostart + maxDstSize;
     BYTE* op = ostart;
     const BYTE* litPtr = dctx->litPtr;
@@ -11540,51 +14851,62 @@ ZSTD_decompressSequencesLong_body(
     const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
     const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
     const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
+    (void)frame;
 
     /* Regen sequences */
     if (nbSeq) {
-#define STORED_SEQS 4
+#define STORED_SEQS 8
 #define STORED_SEQS_MASK (STORED_SEQS-1)
-#define ADVANCED_SEQS 4
+#define ADVANCED_SEQS STORED_SEQS
         seq_t sequences[STORED_SEQS];
         int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
         seqState_t seqState;
         int seqNb;
+        size_t prefetchPos = (size_t)(op-prefixStart); /* track position relative to prefixStart */
+
         dctx->fseEntropy = 1;
         { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
-        seqState.prefixStart = prefixStart;
-        seqState.pos = (size_t)(op-prefixStart);
-        seqState.dictEnd = dictEnd;
+        assert(dst != NULL);
         assert(iend >= ip);
         RETURN_ERROR_IF(
             ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
-            corruption_detected);
+            corruption_detected, "");
         ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
         ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
         ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
 
         /* prepare in advance */
         for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
-            sequences[seqNb] = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
-            PREFETCH_L1(sequences[seqNb].match); PREFETCH_L1(sequences[seqNb].match + sequences[seqNb].matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
+            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
+            prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
+            sequences[seqNb] = sequence;
         }
-        RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected);
+        RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
 
         /* decode and decompress */
         for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
-            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
+            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
             size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
+#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+            assert(!ZSTD_isError(oneSeqSize));
+            if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
+#endif
             if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
-            PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
+
+            prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
             sequences[seqNb & STORED_SEQS_MASK] = sequence;
             op += oneSeqSize;
         }
-        RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected);
+        RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");
 
         /* finish queue */
         seqNb -= seqAdvance;
         for ( ; seqNb<nbSeq ; seqNb++) {
             size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
+#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+            assert(!ZSTD_isError(oneSeqSize));
+            if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
+#endif
             if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
             op += oneSeqSize;
         }
@@ -11595,9 +14917,11 @@ ZSTD_decompressSequencesLong_body(
 
     /* last literal segment */
     {   size_t const lastLLSize = litEnd - litPtr;
-        RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall);
-        memcpy(op, litPtr, lastLLSize);
-        op += lastLLSize;
+        RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
+        if (op != NULL) {
+            ZSTD_memcpy(op, litPtr, lastLLSize);
+            op += lastLLSize;
+        }
     }
 
     return op-ostart;
@@ -11607,9 +14931,10 @@ static size_t
 ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
                                  void* dst, size_t maxDstSize,
                            const void* seqStart, size_t seqSize, int nbSeq,
-                           const ZSTD_longOffset_e isLongOffset)
+                           const ZSTD_longOffset_e isLongOffset,
+                           const int frame)
 {
-    return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
+    return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
 }
 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
 
@@ -11623,9 +14948,10 @@ DONT_VECTORIZE
 ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
                                  void* dst, size_t maxDstSize,
                            const void* seqStart, size_t seqSize, int nbSeq,
-                           const ZSTD_longOffset_e isLongOffset)
+                           const ZSTD_longOffset_e isLongOffset,
+                           const int frame)
 {
-    return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
+    return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
 }
 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
 
@@ -11634,9 +14960,10 @@ static TARGET_ATTRIBUTE("bmi2") size_t
 ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
                                  void* dst, size_t maxDstSize,
                            const void* seqStart, size_t seqSize, int nbSeq,
-                           const ZSTD_longOffset_e isLongOffset)
+                           const ZSTD_longOffset_e isLongOffset,
+                           const int frame)
 {
-    return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
+    return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
 }
 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
 
@@ -11646,21 +14973,23 @@ typedef size_t (*ZSTD_decompressSequences_t)(
                             ZSTD_DCtx* dctx,
                             void* dst, size_t maxDstSize,
                             const void* seqStart, size_t seqSize, int nbSeq,
-                            const ZSTD_longOffset_e isLongOffset);
+                            const ZSTD_longOffset_e isLongOffset,
+                            const int frame);
 
 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
 static size_t
 ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
                    const void* seqStart, size_t seqSize, int nbSeq,
-                   const ZSTD_longOffset_e isLongOffset)
+                   const ZSTD_longOffset_e isLongOffset,
+                   const int frame)
 {
     DEBUGLOG(5, "ZSTD_decompressSequences");
 #if DYNAMIC_BMI2
     if (dctx->bmi2) {
-        return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
+        return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
     }
 #endif
-  return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
+  return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
 }
 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
 
@@ -11675,15 +15004,16 @@ static size_t
 ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
                              void* dst, size_t maxDstSize,
                              const void* seqStart, size_t seqSize, int nbSeq,
-                             const ZSTD_longOffset_e isLongOffset)
+                             const ZSTD_longOffset_e isLongOffset,
+                             const int frame)
 {
     DEBUGLOG(5, "ZSTD_decompressSequencesLong");
 #if DYNAMIC_BMI2
     if (dctx->bmi2) {
-        return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
+        return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
     }
 #endif
-  return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
+  return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
 }
 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
 
@@ -11717,7 +15047,6 @@ ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
 }
 #endif
 
-
 size_t
 ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
                               void* dst, size_t dstCapacity,
@@ -11733,7 +15062,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
     ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
     DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
 
-    RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong);
+    RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
 
     /* Decode literals section */
     {   size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
@@ -11759,6 +15088,8 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
         ip += seqHSize;
         srcSize -= seqHSize;
 
+        RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
+
 #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
     !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
         if ( !usePrefetchDecoder
@@ -11777,20 +15108,20 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
         if (usePrefetchDecoder)
 #endif
 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
-            return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
+            return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
 #endif
 
 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
         /* else */
-        return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
+        return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
 #endif
     }
 }
 
 
-void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst)
+void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
 {
-    if (dst != dctx->previousDstEnd) {   /* not contiguous */
+    if (dst != dctx->previousDstEnd && dstSize > 0) {   /* not contiguous */
         dctx->dictEnd = dctx->previousDstEnd;
         dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
         dctx->prefixStart = dst;
@@ -11804,9 +15135,9 @@ size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
                       const void* src, size_t srcSize)
 {
     size_t dSize;
-    ZSTD_checkContinuity(dctx, dst);
+    ZSTD_checkContinuity(dctx, dst, dstCapacity);
     dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0);
     dctx->previousDstEnd = (char*)dst + dSize;
     return dSize;
 }
-/**** ended inlining zstd_decompress_block.c ****/
+/**** ended inlining decompress/zstd_decompress_block.c ****/

From a56358b9fcc7a9f393c9eee065592cd2ed9c8ae4 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 16 May 2021 16:15:14 -0700
Subject: [PATCH 046/901] kramv - add uv coords to the hud

---
 kramv/KramViewerMain.mm | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 9f4fcdb6..f7e4a38c 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -824,12 +824,17 @@ - (void)updateEyedropper {
         // this will always be a linear color
         float4 c = _showSettings->textureResult;
         
-        
         int32_t x = _showSettings->textureResultX;
         int32_t y = _showSettings->textureResultY;
         
+        // show uv, so can relate to gpu coordinates stored in geometry and find atlas areas
+        append_sprintf(text, "uv:%0.3f %0.3f\n",
+            (float)x / _showSettings->imageBoundsX,
+            (float)y / _showSettings->imageBoundsY
+        );
+        
         // pixel at top-level mip
-        sprintf(text, "px:%d %d\n", x, y);
+        append_sprintf(text, "px:%d %d\n", x, y);
         
         // show block num
         int mipLOD = _showSettings->mipLOD;

From 28fb4ce4ed1c0a55c97255f65c60a67a633231b7 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 16 May 2021 16:16:58 -0700
Subject: [PATCH 047/901] kram - update mipDown logic to take depth

Still doesn't mean kram supports 3d with mips.  Just part of the journey.
---
 kramv/KramLoader.mm            |  6 ++-
 libkram/kram/Kram.cpp          | 86 +++++++++++++++++++++++-----------
 libkram/kram/KramConfig.h      | 15 ++----
 libkram/kram/KramImageInfo.cpp |  2 +
 libkram/kram/KramImageInfo.h   |  4 ++
 libkram/kram/KramMipper.cpp    |  7 +--
 libkram/kram/KramMipper.h      |  3 +-
 libkram/kram/KramSDFMipper.cpp |  5 +-
 8 files changed, 83 insertions(+), 45 deletions(-)

diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index 11421ae5..1fc8a651 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -343,6 +343,7 @@ - (void)setMipgenNeeded:(BOOL)enabled {
     
     int32_t w = image.width;
     int32_t h = image.height;
+    int32_t d = image.depth;
     
     int32_t numMips     = MAX(1, image.header.numberOfMipmapLevels);
     int32_t numArrays   = MAX(1, image.header.numberOfArrayElements);
@@ -469,7 +470,7 @@ - (void)setMipgenNeeded:(BOOL)enabled {
             }
         }
         
-        mipDown(w, h);
+        mipDown(w, h, d);
     }
         
     return texture;
@@ -578,6 +579,7 @@ - (nonnull instancetype)init {
     
     int32_t w = image.width;
     int32_t h = image.height;
+    int32_t d = image.depth;
     
     int32_t numMips     = MAX(1, image.header.numberOfMipmapLevels);
     int32_t numArrays   = MAX(1, image.header.numberOfArrayElements);
@@ -687,7 +689,7 @@ - (nonnull instancetype)init {
             }
         }
         
-        mipDown(w, h);
+        mipDown(w, h, d);
     }
         
     // this only affect managed textures
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index a3c6db07..5284bea1 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -511,7 +511,7 @@ string formatInputAndOutput(int32_t testNumber, const char* srcFilename, MyMTLPi
     size_t extSeparator = dst.rfind('.');
     assert(extSeparator != string::npos);
     dst.erase(extSeparator);
-    dst.append(".ktx");
+    dst.append(".ktx"); // TODO: test ktx2 too
 
     cmd += dst;
 
@@ -1386,7 +1386,7 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage,
     numPixels *= (float)pixelMultiplier;
     
     if (srcImage.header.numberOfMipmapLevels > 1) {
-        numPixels *= 4.0 / 3.0f; // estimate for now
+        numPixels *= 4.0 / 3.0f; // TODO: estimate for now
     }
     
     numPixels /= (1000.0f * 1000.0f);
@@ -1424,14 +1424,12 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage,
 
     // print out the array
     if (srcImage.header.numberOfArrayElements > 1) {
-        sprintf(tmp,
+        append_sprintf(info,
                 "arry: %d\n",
                 srcImage.header.numberOfArrayElements);
-
-        info += tmp;
     }
 
-    sprintf(tmp,
+    append_sprintf(info,
             "fmtk: %s\n"
             "fmtm: %s (%d)\n"
             "fmtv: %s (%d)\n"
@@ -1440,13 +1438,10 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage,
             metalTypeName(metalFormat), metalFormat,
             vulkanTypeName(metalFormat), vulkanType(metalFormat),
             glTypeName(metalFormat), glType(metalFormat));
-    info += tmp;
 
     // report any props
-    string propText;
     for (const auto& prop : srcImage.props) {
-        sprintf(propText, "prop: %s %s\n", prop.first.c_str(), prop.second.c_str());
-        info += propText;
+        append_sprintf(info, "prop: %s %s\n", prop.first.c_str(), prop.second.c_str());
     }
 
     // TODO: handle zstd compressed KTX2 too, they have a length and compressed length field
@@ -1457,19 +1452,51 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage,
         int32_t mipLevel = 0;
         int32_t w = srcImage.width;
         int32_t h = srcImage.height;
-
+        int32_t d = srcImage.depth; 
+        
+        // num chunks
+        append_sprintf(info,
+            "chun: %d\n",
+            srcImage.totalChunks());
+        
         for (const auto& mip : srcImage.mipLevels) {
-            sprintf(tmp,
-                    "mipn: %d\n"
-                    "mipd: %dx%d\n"
-                    "mips: %" PRIu64 "\n"
-                    "mipc: %dx\n"
-                    "mipo: %" PRIu64 "\n",
-                    w, h, mipLevel++, mip.length, srcImage.totalChunks(), mip.offset);
-            info += tmp;
 
+            switch (textureType) {
+                case MyMTLTextureType3D:
+                append_sprintf(info,
+                   "mipl: %d %dx%dx%d ",
+                    mipLevel++,
+                   w, h, d);
+                   break;
+                default:
+                   append_sprintf(info,
+                    "mipl: %d %dx%d ",
+                    mipLevel++,
+                    w, h);
+                   break;
+            }
+                           
+            if (mip.lengthCompressed != 0) {
+                size_t percent = (100 * mip.lengthCompressed) / mip.length;
+                
+                append_sprintf(info,
+                    "%" PRIu64 ",%" PRIu64 ",%" PRIu64 " %d%%\n",
+                    mip.offset,
+                    mip.length, // only size of one mip right now, not mip * numChunks
+                    mip.lengthCompressed, // TODO: preserve so can be displayed
+                    (int)percent
+                );
+            }
+            else {
+                append_sprintf(info,
+                    "%" PRIu64 ",%" PRIu64 "\n",
+                    mip.offset,
+                    mip.length // only size of one mip right now, not mip * numChunks
+                );
+            }
+            
             // drop a mip level
-            mipDown(w, h);
+            mipDown(w, h, d);
         }
     }
 
@@ -1593,10 +1620,11 @@ static int32_t kramAppDecode(vector<const char*>& args)
         error = true;
     }
 
-    isKTX = endsWith(dstFilename, ".ktx");
+    bool isDstKTX = endsWith(dstFilename, ".ktx");
+    bool isDstKTX2 = endsWith(dstFilename, ".ktx2");
 
-    if (!isKTX) {
-        KLOGE("Kram", "decode only supports ktx output");
+    if (!(isDstKTX || isDstKTX2)) {
+        KLOGE("Kram", "decode only supports ktx and ktx2 output");
         error = true;
     }
 
@@ -1620,7 +1648,7 @@ static int32_t kramAppDecode(vector<const char*>& args)
         return -1;
     }
 
-    success = success && SetupTmpFile(tmpFileHelper, ".ktx");
+    success = success && SetupTmpFile(tmpFileHelper, isDstKTX ? ".ktx" : ".ktx2");
 
     if (success && isVerbose) {
         KLOGI("Kram", "Decoding %s to %s with %s\n",
@@ -1984,10 +2012,12 @@ static int32_t kramAppEncode(vector<const char*>& args)
         error = true;
     }
 
+    // allow ktx and ktx2 output
     bool isDstKTX = endsWith(dstFilename, ".ktx");
+    bool isDstKTX2 = endsWith(dstFilename, ".ktx2");
    
-    if (!isDstKTX) {
-        KLOGE("Kram", "encode only supports ktx output");
+    if (!(isDstKTX || isDstKTX2)) {
+        KLOGE("Kram", "encode only supports ktx and ktx2 output");
         error = true;
     }
 
@@ -1996,6 +2026,8 @@ static int32_t kramAppEncode(vector<const char*>& args)
         return -1;
     }
 
+    infoArgs.isKTX2 = isDstKTX2;
+    
     // Any new settings just go into this struct which is passed into enoder
     ImageInfo info;
     info.initWithArgs(infoArgs);
@@ -2013,7 +2045,7 @@ static int32_t kramAppEncode(vector<const char*>& args)
                                     srcFilename, srcImage, isPremulRgb);
 
     if (success) {
-        success = SetupTmpFile(tmpFileHelper, ".ktx");
+        success = SetupTmpFile(tmpFileHelper, isDstKTX ? ".ktx" : ".ktx2");
 
         if (!success) {
             KLOGE("Kram", "encode couldn't generate tmp file for output");
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index cdf2c205..97fb39f6 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -378,28 +378,23 @@ inline half4 toHalf4(const float4& vv)
 
 //---------------------------------------
 
-#define ROUNDMIPSDOWN 1
 
-inline void mipDown(int32_t& w, int32_t& h)
+inline void mipDown(int32_t& w, int32_t& h, int32_t& d)
 {
     // GL/D3D hobbled non-pow2 mips by only supporting round down, not round up
-    // And then Metal followd OpenGL since it's the same hw and drivers.
+    // And then Metal followed OpenGL since it's the same hw and drivers.
     // Round up adds an extra mip level to the chain, but results in much better filtering.
     // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_non_power_of_two.txt
     // http://download.nvidia.com/developer/Papers/2005/NP2_Mipmapping/NP2_Mipmap_Creation.pdf
     
-#if ROUNDMIPSDOWN
     // round-down
     w = w / 2;
     h = h / 2;
-
+    d = h / 2;
+    
     if (w < 1) w = 1;
     if (h < 1) h = 1;
-#else
-    // round-up
-    w = (w + 1) / 2;
-    h = (h + 1) / 2;
-#endif
+    if (d < 1) d = 1;
 }
 
 // Use this on vectors
diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp
index 2cab1300..7302bbb3 100644
--- a/libkram/kram/KramImageInfo.cpp
+++ b/libkram/kram/KramImageInfo.cpp
@@ -995,6 +995,8 @@ void ImageInfo::initWithArgs(const ImageInfoArgs& args)
     textureEncoder = args.textureEncoder;
     textureType = args.textureType;
 
+    isKTX2 = args.isKTX2;
+    
     isPrezero = args.isPrezero;
     isPremultiplied = args.isPremultiplied;
     if (isPremultiplied)
diff --git a/libkram/kram/KramImageInfo.h b/libkram/kram/KramImageInfo.h
index d4c9f862..54a343b1 100644
--- a/libkram/kram/KramImageInfo.h
+++ b/libkram/kram/KramImageInfo.h
@@ -51,6 +51,8 @@ class ImageInfoArgs {
 
     int32_t quality = 49;  // may want float
 
+    bool isKTX2 = false;
+    
     //bool skipImageLength = false;
     bool doMipmaps = true;  // default to mips on
     bool isVerbose = false;
@@ -119,6 +121,8 @@ class ImageInfo {
     string averageChannels;
     string swizzleText;
 
+    bool isKTX2 = false;
+    
     // output image state
     // Note: difference between input srgb and output srgb, but it's mingled
     // here a bit
diff --git a/libkram/kram/KramMipper.cpp b/libkram/kram/KramMipper.cpp
index 50b5715c..1bd80432 100644
--- a/libkram/kram/KramMipper.cpp
+++ b/libkram/kram/KramMipper.cpp
@@ -333,8 +333,9 @@ void Mipper::mipmap(const ImageData& srcImage, ImageData& dstImage) const
 {
     dstImage.width = srcImage.width;
     dstImage.height = srcImage.height;
-
-    mipDown(dstImage.width, dstImage.height);
+    dstImage.depth = srcImage.depth;
+    
+    mipDown(dstImage.width, dstImage.height, dstImage.depth);
 
     // this assumes that we can read mip-1 from srcImage
     mipmapLevel(srcImage, dstImage);
@@ -344,7 +345,7 @@ void Mipper::mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) cons
 {
     int32_t width = srcImage.width;
     int32_t height = srcImage.height;
-
+    
     // this can receive premul, srgb data
     // the mip chain is linear data only
     Color* cDstColor = dstImage.pixels;
diff --git a/libkram/kram/KramMipper.h b/libkram/kram/KramMipper.h
index 65013751..19bde640 100644
--- a/libkram/kram/KramMipper.h
+++ b/libkram/kram/KramMipper.h
@@ -51,7 +51,8 @@ class ImageData {
 
     int32_t width = 0;
     int32_t height = 0;
-
+    int32_t depth = 0;
+    
     bool isSRGB = false;
     bool isHDR = false;  // only updates pixelsFloat
 };
diff --git a/libkram/kram/KramSDFMipper.cpp b/libkram/kram/KramSDFMipper.cpp
index 1d51e4d1..7bb6f71d 100644
--- a/libkram/kram/KramSDFMipper.cpp
+++ b/libkram/kram/KramSDFMipper.cpp
@@ -55,10 +55,11 @@ void SDFMipper::mipmap(ImageData& dstImage, int32_t mipLevel)
 {
     int32_t w = srcBitmapImage.width;
     int32_t h = srcBitmapImage.height;
-
+    int32_t d = 1;
+    
     // can use shift with mip down, but this iterates
     for (int32_t i = 0; i < mipLevel; ++i) {
-        mipDown(w, h);
+        mipDown(w, h, d);
     }
 
     dstImage.width = w;

From 55ddca6ba0e6166b2217d11c1d54d4414e708d88 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 16 May 2021 18:48:29 -0700
Subject: [PATCH 048/901] KTX2 - ability to save data to KTX2 format with
 zstd/zlib compression.

This is a pretty big revamp to KramImage and KTXImage to allow it to support KTX2 format saves.
Full DFD writing even though this is mostly redundant with VKFormat.  Premul setting not specific to when premul occurs.
Split initMipLevels and validateMipLevels.
Moved some of KramImage to KTXImage.
Broke up some KramImage monolithic functions.

Remove "any" support from scripts for now.  Will update that support later.
Now use ktx2 in python scripts directly.  Can bypass ktx2ktx2 and ktx2sc.  ktx2 always sets -zstd but -zlib works.

Made a single file version of zstd that has an encoder.  Previously only had decoder.  Use zstd.h header now to stay in sync with calls.  zstd is updated to 1.5.0.  Scoping operators for the contexts, so they are cleaned up.
CLI now supports -zstd and -zlib args.
---
 libkram/kram/KTXImage.cpp      |  345 +++++-----
 libkram/kram/KTXImage.h        |   52 +-
 libkram/kram/Kram.cpp          |   99 +--
 libkram/kram/KramImage.cpp     | 1098 ++++++++++++++++++++------------
 libkram/kram/KramImage.h       |   21 +-
 libkram/kram/KramImageInfo.cpp |    1 +
 libkram/kram/KramImageInfo.h   |    6 +
 scripts/kramTests.sh           |    4 +-
 scripts/kramTextures.py        |   70 +-
 9 files changed, 1060 insertions(+), 636 deletions(-)

diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index c9f37333..12564de9 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -10,19 +10,10 @@
 #include <map>
 #include <unordered_map>
 
-extern "C" {
-
-// not using zstd.h, so pull this in directly from zstddeclib.c
-bool FSE_isError(size_t size);
-
-typedef struct ZSTD_DCtx_s ZSTD_DCtx;
-ZSTD_DCtx* ZSTD_createDCtx(void);
-size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
-
-size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx,
-                               void* dst, size_t dstCapacity,
-                         const void* src, size_t srcSize);
-}
+// for zlib decompress
+#include "miniz.h"
+// for zstd decompress
+#include "zstd.h"
 
 namespace kram {
 
@@ -942,7 +933,8 @@ bool KTXImage::open(const uint8_t* imageData, size_t imageDataLength)
         return false;
     }
     
-    return initMipLevels(true, sizeof(KTXHeader) + header.bytesOfKeyValueData);
+    initMipLevels(sizeof(KTXHeader) + header.bytesOfKeyValueData);
+    return validateMipLevels();
 }
 
 void KTXImage::initProps(const uint8_t* propsData, size_t propDataSize)
@@ -1095,7 +1087,85 @@ void KTXImage::toPropsData(vector<uint8_t>& propsData)
     // TODO: this needs to pad to 16-bytes, so may need a prop for that
 }
 
-bool KTXImage::initMipLevels(bool validateLevelSizeFromRead, size_t offsetToImageData)
+void KTXImage::initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxSize)
+{
+     // dst levels
+    int32_t w = width;
+    int32_t h = height;
+    int32_t d = depth;
+    
+    bool needsDownsample = (w > mipMaxSize || h > mipMaxSize);
+
+    int32_t maxMipLevels = 16;  // 64K x 64K
+    
+    // can still downsample src multiple times even with only 1 level exported
+    if ((!doMipmaps) && needsDownsample) {
+        maxMipLevels = 1;
+    }
+
+    KTXImageLevel level;
+    level.offset = 0; // compute later, once know ktx vs. ktx2
+    
+    mipLevels.clear();
+    
+    if (doMipmaps || needsDownsample) {
+        bool keepMip =
+            (w >= mipMinSize && w <= mipMaxSize) &&
+            (h >= mipMinSize && h <= mipMaxSize);
+        
+        if (keepMip) {
+            level.length = mipLevelSize(w, h);
+            level.lengthCompressed = 0;
+            
+            if (mipLevels.empty()) {
+                // adjust the top dimensions
+                width = w;
+                height = h;
+                depth = d;
+            }
+            mipLevels.push_back(level);
+        }
+
+        do {
+            mipDown(w, h, d);
+
+            keepMip =
+                (w >= mipMinSize && w <= mipMaxSize) &&
+                (h >= mipMinSize && h <= mipMaxSize);
+            
+            if (keepMip && (mipLevels.size() < (size_t)maxMipLevels)) {
+                // length needs to be multiplied by chunk size before writing out
+                level.length = mipLevelSize(w, h);
+                level.lengthCompressed = 0;
+                
+                if (mipLevels.empty()) {
+                    // adjust the top dimensions
+                    width = w;
+                    height = h;
+                    depth = d;
+                }
+                
+                mipLevels.push_back(level);
+            }
+
+        } while (w > 1 || h > 1 || d > 1);
+    }
+    else {
+        // length needs to be multiplied by chunk size before writing out
+        level.length = mipLevelSize(w, h);
+        level.lengthCompressed = 0;
+        
+        mipLevels.push_back(level);
+    }
+    
+    
+    header.numberOfMipmapLevels = mipLevels.size();
+    
+    header.pixelWidth = width;
+    header.pixelHeight = height;
+}
+    
+void KTXImage::initMipLevels(size_t mipOffset)
 {
     // largest mips are first in file
     uint32_t numMips = max(1u, header.numberOfMipmapLevels);
@@ -1105,117 +1175,62 @@ bool KTXImage::initMipLevels(bool validateLevelSizeFromRead, size_t offsetToImag
     mipLevels.reserve(numMips);
     mipLevels.clear();
 
-    size_t totalDataSize = offsetToImageData; // sizeof(KTXHeader) + header.bytesOfKeyValueData;
-    //size_t blockSize = this->blockSize();
+    size_t offset = mipOffset;
 
     int32_t w = width;
     int32_t h = height;
-
+    int32_t d = depth;
+    
     for (uint32_t i = 0; i < numMips; ++i) {
         size_t dataSize = mipLevelSize(w, h);
 
         uint32_t levelSize = dataSize * numChunks;
 
         // compute dataSize from header data
-
         if (!skipImageLength) {
-            // read data size
-            // 4-byte dataSize throws off alignment of mips to block size on most formats
-            // would need to pad after this by block size
-
-            // validate that no weird size to image
-            if (validateLevelSizeFromRead) {
-                const uint8_t* levelSizeField = (const uint8_t*)fileData + totalDataSize;
-
-                uint32_t levelSizeFromRead = *(const uint32_t*)levelSizeField;
-                // cube only stores size of one face, ugh
-                if (textureType == MyMTLTextureTypeCube) {
-                    levelSizeFromRead *= 6;
-                }
-
-                if (levelSizeFromRead != levelSize) {
-                    KLOGE("kram", "mip %d levelSize mismatch %d %d", i, (int)levelSizeFromRead, (int)levelSize);
-                    return false;
-                }
-            }
-
             // advance past the length
-            totalDataSize += sizeof(uint32_t);
+            offset += sizeof(uint32_t);
         }
 
-        size_t offset = totalDataSize;
-
+        // TODO: Here is where offset alignment to 4 bytes may be needed
+        // but that also needs to be accounted for in allocation
+        
         // level holds single texture size not level size, but offset reflects level start
-        KTXImageLevel level = {offset, dataSize};
+        KTXImageLevel level = { offset, 0, dataSize };
         mipLevels.push_back(level);
 
-        totalDataSize += levelSize;
-
-        // TODO: remove code below, since padding really isn't used with 4-byte alignment of rowBytes in KTX1
-        //mips += levelSize;
-
-        //        for (int array = 0; array < numArrays; ++array) {
-        //            for (int face = 0; face < numFaces; ++face) {
-        //                for (int slice = 0; slice < numSlices; ++slice) {
-        //                    const uint8_t* srcImageData = mips;
-        //                    mips += dataSize;
-        //                    totalDataSize += dataSize;
-        //
-        //                    // assumes all images are in same mmap file, so can just
-        //                    // alias the offset these offsets need to be at a multiple
-        //                    // of the block size
-        //                    size_t offset = srcImageData - fileData;
-        //                    KTXImageLevel level = {offset, dataSize};
-        //                    mipLevels.push_back(level);
-        //
-        //                    if (skipImageLength) {
-        //                        if ((offset & (blockSize - 1)) != 0) {
-        //                            return false;
-        //                        }
-        //                    }
-        //
-        //                    // TODO: pad to 4 on 1/2/3 byte formats
-        //                    // but make sure if this is on every mip or not
-        //                }
-        //
-        ////                // cube padding to 4 byte alignment
-        ////                if (textureType == MyMTLTextureTypeCube) {
-        ////                    size_t padding =
-        ////                        3 - ((dataSize + 3) % 4);  // 0, 1, 2, 3 -> 0, 3, 2, 1
-        ////                    if (padding > 0) {
-        ////                        mips += padding;
-        ////                        totalDataSize += padding;
-        ////                    }
-        ////
-        ////                    if (skipImageLength) {
-        ////                        if (padding != 0) {
-        ////                            return false;
-        ////                        }
-        ////                    }
-        ////                }
-        //            }
-        //        }
+        offset += levelSize;
+        
+        mipDown(w, h, d);
+    }
+}
 
-        //        // mip padding to 4 byte alignment
-        //        size_t padding =
-        //            3 - ((totalDataSize + 3) % 4);  // 0, 1, 2, 3 -> 0, 3, 2, 1
-        //        if (padding > 0) {
-        //            mips += padding;
-        //            totalDataSize += padding;
-        //        }
-        //
-        //        if (skipImageLength) {
-        //            if (padding != 0) {
-        //                return false;
-        //            }
-        //        }
+bool KTXImage::validateMipLevels() const {
+    if (skipImageLength)
+        return true;
+    
+    bool isValid = true;
 
-        //  https://computergraphics.stackexchange.com/questions/1441/how-does-mip-mapping-work-with-non-power-of-2-textures
+    // validate that no weird size to image
+    for (uint32_t i = 0; i < mipLevels.size(); ++i) {
+        auto& level = mipLevels[i];
+        
+        const uint8_t* levelSizeField = (const uint8_t*)fileData + level.offset - sizeof(uint32_t);
+        uint32_t levelSizeFromRead = *(const uint32_t*)levelSizeField;
+        
+        // cube only stores size of one face, ugh
+        if (textureType == MyMTLTextureTypeCube) {
+            levelSizeFromRead *= 6;
+        }
 
-        mipDown(w, h);
+        if (levelSizeFromRead != level.length) {
+            KLOGE("kram", "mip %d levelSize mismatch %d %d", i, (int)levelSizeFromRead, (int)level.length);
+            isValid = false;
+            break;
+        }
     }
-
-    return true;
+    
+    return isValid;
 }
 
 const char* textureTypeName(MyMTLTextureType textureType)
@@ -1241,7 +1256,7 @@ const char* textureTypeName(MyMTLTextureType textureType)
 
 
-
+// KTX2 layout
 //// Data Format Descriptor
 //uint32_t dfdTotalSize = 0;
 //continue
@@ -1272,6 +1287,17 @@ const char* textureTypeName(MyMTLTextureType textureType)
 // can use ktx2ktx2 and ktx2sc to supercompress, and kramv can use this to open and view data as a KTX1 file.
 // ignoring Basis and supercompression data, etc.
 
+// wish C++ had a defer
+struct ZSTDScope2
+{
+    ZSTDScope2(ZSTD_DCtx* ctx_) : ctx(ctx_) {}
+    ~ZSTDScope2() { ZSTD_freeDCtx(ctx); }
+    
+private:
+    ZSTD_DCtx* ctx = nullptr;
+};
+
+
 bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength)
 {
     if ((size_t)imageDataLength < sizeof(KTX2Header)) {
@@ -1290,22 +1316,16 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength)
     // copy out the header,
     const KTX2Header& header2 = *(const KTX2Header*)imageData;
 
-    enum KTX2Supercompression {
-        KTX2SupercompressionNone = 0,
-        KTX2SupercompressionBasisLZ = 1, // can transcode, but can't gen from KTX file using ktxsc, uses sgdByteLength
-        KTX2SupercompressionZstd = 2, // faster deflate, ktxsc support
-        KTX2SupercompressionZlib = 3, // deflate, no ktxsc support (use miniz)
-        // TODO: Need LZFSE?
-    };
-    
-    bool isLevelOfMipCompressed = header2.supercompressionScheme != KTX2SupercompressionNone;
     
     if (header2.supercompressionScheme != KTX2SupercompressionNone &&
-        header2.supercompressionScheme != KTX2SupercompressionZstd) {
+        header2.supercompressionScheme != KTX2SupercompressionZstd &&
+        header2.supercompressionScheme != KTX2SupercompressionZlib) {
         KLOGE("kram", "Unknown supercompression %d", header2.supercompressionScheme);
         return false;
     }
     
+    bool isCompressed = header2.supercompressionScheme != KTX2SupercompressionNone;
+    
     // This typically means UASTC encoding + zstd supercompression, and code doesn't handle that below yet
     if (header2.vkFormat == 0) {
         KLOGE("kram", "Basis encode not yet supported");
@@ -1329,12 +1349,13 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength)
 
     int32_t numChunks = totalChunks();
     
-    // need to copy out lengthCompressed here, since we can't determine that
-    vector<KTX2ImageLevel> levels;
+    vector<KTXImageLevel> levels;
     uint32_t levelOffset = sizeof(KTX2Header);
     for (uint32_t i = 0; i < header.numberOfMipmapLevels; ++i) {
         // ktx2 stores levels in same order as ktx1, but larger mips occur later in the file
-        auto level = *(const KTX2ImageLevel*)(imageData + levelOffset + sizeof(KTX2ImageLevel) * i);
+        // only KTX2 writes this array out due to lengthCompressed field.
+        
+        auto level = *(const KTXImageLevel*)(imageData + levelOffset + sizeof(KTXImageLevel) * i);
         
         assert(level.length % numChunks == 0);
         
@@ -1356,32 +1377,27 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength)
         return false;
     }
     
-    // Note: KTX2 also doesn't have the length field embedded the mipData
-    // so need to be able to set skipLength to unify the mipgen if aliasing the mip data
-    // Only reading this format, never writing it out.
-    skipImageLength = true;
-    
     // transfer key-value data pairs
     // bytesOfKeyValueData will be updated if props written out
-    // but probably want to leave this out of level offsets
-    header.bytesOfKeyValueData = 0; // header2.kvdByteLength;
+    header.bytesOfKeyValueData = 0;
     initProps(imageData + header2.kvdByteOffset, header2.kvdByteLength);
    
     
-    if (!isLevelOfMipCompressed) {
+    if (!isCompressed) {
+        // Note: this is aliasing the mips from a ktx2 file into a ktx1 KTXImage
+        // This is highly unsafe.
+        
+        // Note: KTX2 also doesn't have the length field embedded the mipData
+        // so need to be able to set skipLength to unify the mipgen if aliasing the mip data
+        // Only reading this format, never writing it out.
+        skipImageLength = true;
+        
         fileData = imageData;
         fileDataLength = imageDataLength;
         
-        // might be able to just use header2.sgdByteOffset + header2.sgdByteLength
-        uint32_t offsetToImageData = //std::max(std::max(
-            //header2.dfdByteOffset +  header2.dfdByteLength,
-            //header2.kvdByteOffset +  header2.kvdByteLength),
-            header2.sgdByteOffset +  header2.sgdByteLength;
-                                           
-        
-        if (!initMipLevels(false, offsetToImageData)) {
-            return false;
-        }
+        // these are mip offset for KTX2 file
+        size_t mipOffset = header2.sgdByteOffset + header2.sgdByteLength;
+        initMipLevels(mipOffset);
         
         // TODO: KTX1 packs rows to 4 bytes, but KTX2 packs tightly to 1
         // for now just reverse the ktx2 mips back to ktx1, aliasing fileData
@@ -1405,9 +1421,9 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength)
         }
     }
     else {
-        if (!initMipLevels(false, sizeof(KTXHeader) + header.bytesOfKeyValueData)) {
-            return false;
-        }
+        // This is decompressing KTX2 into KTX1
+        size_t mipOffset = sizeof(KTXHeader) + header.bytesOfKeyValueData;
+        initMipLevels(mipOffset);
         
         // compute the decompressed size
         // Note: initMipLevels computes but doesn't store this
@@ -1421,6 +1437,7 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength)
         bool isZstd = header2.supercompressionScheme == KTX2SupercompressionZstd;
         ZSTD_DCtx* dctx = nullptr;
         if (isZstd) dctx = ZSTD_createDCtx();
+        ZSTDScope2 scope(dctx);
         
         // need to decompress mips here
         for (uint32_t i = 0; i < header.numberOfMipmapLevels; ++i) {
@@ -1434,6 +1451,13 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength)
             size_t dstDataSize = level1.length * numChunks;
             uint8_t* dstData = (uint8_t*)fileData + level1.offset; // can const_cast, since class owns data
             
+            // preserve lengthCompressed so kram info can display the value
+            // this field will need to be set to 0
+            
+            // This does display in kram info, but it's confusing since image was converted to ktx1
+            // and the offsets are largest first.  So for now, don't copy this in.
+            // level1.lengthCompressed = level2.lengthCompressed;
+            
             // TODO: use basis transcoder (single file) for Basis UASTC here, then don't need libktx yet
             // wont work for BasisLZ (which is ETC1S).
             
@@ -1445,26 +1469,39 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength)
                         dstData, dstDataSize,
                         srcData, srcDataSize);
                     
-                    if (FSE_isError(result)) {
-                        ZSTD_freeDCtx(dctx);
+                    if (ZSTD_isError(result)) {
+                        KLOGE("kram", "decode mip zstd failed");
+                        return false;
+                    }
+                    if (level2.length * numChunks != result) {
+                        KLOGE("kram", "decode mip zstd size not expected");
                         return false;
                     }
-                    assert(level2.length * numChunks == result);
                     break;
                 }
-                case KTX2SupercompressionBasisLZ:
-                    // TODO: this one really needs KTX-software branch
-                    // also loader has option to transcode to various formats
-                    break;
+                
+                case KTX2SupercompressionZlib: {
+                    // can use miniz or libCompression
+                    mz_ulong dstDataSizeMZ = 0;
+                    if (mz_uncompress(dstData, &dstDataSizeMZ,
+                                      srcData, srcDataSize) != MZ_OK) {
+                        KLOGE("kram", "decode mip zlib failed");
+                        return false;
+                    }
+                    if (dstDataSizeMZ != dstDataSize) {
+                        KLOGE("kram", "decode mip zlib size not expected");
+                        return false;
+                    }
                     
-                case KTX2SupercompressionZlib:
-                    // TODO: can use miniz on this, or libCompression
                     break;
+                }
+                    
+                // already checked at top of function
+                default: {
+                    return false;
+                }
             }
         }
-        
-        if (dctx) ZSTD_freeDCtx(dctx);
-        
     }
     
     return true;
@@ -1476,6 +1513,8 @@ vector<uint8_t>& KTXImage::imageData() {
 
 void KTXImage::reserveImageData() {
     int32_t numChunks = totalChunks();
+    
+    // on KTX1 the last mip is the smallest and last in the file
     const auto& lastMip = mipLevels[header.numberOfMipmapLevels-1];
     size_t totalKTXSize =
         lastMip.offset + lastMip.length * numChunks;
diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h
index a5787056..2b0b54c3 100644
--- a/libkram/kram/KTXImage.h
+++ b/libkram/kram/KTXImage.h
@@ -181,11 +181,11 @@ class KTXHeader {
 
 // This is one entire level of mipLevels.
 // In KTX, the image levels are assumed from format and size since no compression applied.
-class KTXImageLevel {
-public:
-    uint64_t offset; // numChunks * length
-    uint64_t length; // size of a single mip
-};
+//class KTXImageLevel {
+//public:
+//    uint64_t offset; // numChunks * length
+//    uint64_t length; // size of a single mip
+//};
 
 //---------------------------------------------
 
@@ -225,18 +225,38 @@ class KTX2Header {
     uint64_t sgdByteLength = 0;
 
     // chunks hold levelCount of all mips of the same size
-    // KTX2ImageLevel* chunks; // [levelCount]
+    // KTXImageLevel* chunks; // [levelCount]
 };
 
-// Unlike KTX, KTX2 writes an array of level sizes since compression may e involved.
-// These correspond to an entire compressed array of chunks.
-// So often an entire level mus be decompressed before a chunk can be accessed.
+// Unlike KTX, KTX2 writes an array of level sizes since level compression may be used.
+// Level compression is an entire compressed array of chunks at a given mip dimension.
+// So then the entire level must be decompressed before a chunk can be accessed.
 // This is one entire level of mipLevels.
-class KTX2ImageLevel {
+//
+// Use this for KTX, but there length == lengthCompressed, and the array is just a temporary.
+// and the offsts include a 4 byte length at the start of each level.
+class KTXImageLevel {
 public:
-    uint64_t offset; // numChunks * length
-    uint64_t lengthCompressed; // can only be read in, can't compute this, but can compute upper bound from zstd
-    uint64_t length; // size of a single mip
+    uint64_t offset = 0; //  differ in ordering - ktx largest first, ktx2 smallest first
+    uint64_t lengthCompressed = 0; // set to 0 if not compresseds
+    uint64_t length = 0; // numChunks * mipSize when written for non cube on KTX1 or all KTX2, internally only stores mipSize
+};
+
+enum KTX2Supercompression {
+    KTX2SupercompressionNone = 0,
+    KTX2SupercompressionBasisLZ = 1, // can transcode, but can't gen from KTX file using ktxsc, uses sgdByteLength
+    KTX2SupercompressionZstd = 2, // faster deflate, ktxsc support
+    KTX2SupercompressionZlib = 3, // deflate, no ktxsc support (use miniz)
+    // TODO: Need LZFSE?
+    // TODO: need Kraken for PS4
+    // TODO: need Xbox format
+};
+
+struct KTX2Compressor {
+    KTX2Supercompression compressorType = KTX2SupercompressionNone;
+    float compressorLevel = 0.0f; // 0.0 default, 100.0 full compression
+    
+    bool isCompressed() const { return compressorType != KTX2SupercompressionNone; }
 };
 
 //---------------------------------------------
@@ -251,8 +271,12 @@ class KTXImage {
     bool open(const uint8_t* imageData, size_t imageDataLength);
     
     void initProps(const uint8_t* propsData, size_t propDataSize);
-    bool initMipLevels(bool validateLevelSizeFromRead, size_t offsetToImageData);
+    
+    void initMipLevels(size_t mipOffset);
+    void initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxSize);
 
+    bool validateMipLevels() const;
+    
     // props handling
     void toPropsData(vector<uint8_t>& propsData);
 
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 5284bea1..ec475eb4 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -893,7 +893,7 @@ void kramInfoUsage(bool showVersion = true)
     KLOGI("Kram",
           "%s\n"
           "Usage: kram info\n"
-          "\t -i/nput <.png | .ktx>\n"
+          "\t -i/nput <.png | .ktx | .ktx2>\n"
           "\t [-o/utput info.txt]\n"
           "\t [-v/erbose]\n"
           "\n",
@@ -943,8 +943,8 @@ void kramEncodeUsage(bool showVersion = true)
           "Usage: kram encode\n"
           "\t -f/ormat (bc1 | astc4x4 | etc2rgba | rgba16f)\n"
           "\t [-srgb] [-signed] [-normal]\n"
-          "\t -i/nput <source.png | .ktx>\n"
-          "\t -o/utput <target.ktx | .ktxa>\n"
+          "\t -i/nput <source.png | .ktx | .ktx2>\n"
+          "\t -o/utput <target.ktx | .ktx | .ktx2>\n"
           "\n"
           "\t [-type 2d|3d|..]\n"
           "\t [-e/ncoder (squish | ate | etcenc | bcenc | astcenc | explicit | ..)]\n"
@@ -1059,6 +1059,12 @@ void kramEncodeUsage(bool showVersion = true)
           "\t-chunks 4x4"
           "\tSpecifies how many chunks to split up texture into 2darray\n"
           
+          // ktx2 specific settings
+          "\t-zstd"
+          "\tktx2 with zstd mip compressor\n"
+          "\t-zlib"
+          "\tktx2 with zlib mip compressor\n"
+          
           "\t-swizzle [rgba01 x4]"
           "\tSpecifies pre-encode swizzle pattern\n"
           "\t-avg [rgba]"
@@ -1125,7 +1131,7 @@ static int32_t kramAppInfo(vector<const char*>& args)
             }
 
             dstFilename = args[i];
-            continue;
+            //continue;
         }
         else if (isStringEqual(word, "-input") ||
                  isStringEqual(word, "-i")) {
@@ -1137,7 +1143,7 @@ static int32_t kramAppInfo(vector<const char*>& args)
             }
 
             srcFilename = args[i];
-            continue;
+            //continue;
         }
         else if (isStringEqual(word, "-v") ||
                  isStringEqual(word, "-verbose")) {
@@ -1544,7 +1550,7 @@ static int32_t kramAppDecode(vector<const char*>& args)
 
             // TODO: if args ends with /, then output to that dir
             dstFilename = args[i];
-            continue;
+            //continue;
         }
         else if (isStringEqual(word, "-input") ||
                  isStringEqual(word, "-i")) {
@@ -1556,7 +1562,7 @@ static int32_t kramAppDecode(vector<const char*>& args)
             }
 
             srcFilename = args[i];
-            continue;
+            //continue;
         }
 
         else if (isStringEqual(word, "-swizzle")) {
@@ -1574,7 +1580,7 @@ static int32_t kramAppDecode(vector<const char*>& args)
                 break;
             }
             swizzleText = swizzleString;
-            continue;
+            //continue;
         }
         // this is really decoder, but keep same argument as encoder
         else if (isStringEqual(word, "-e") ||
@@ -1587,14 +1593,14 @@ static int32_t kramAppDecode(vector<const char*>& args)
             }
 
             textureDecoder = parseEncoder(args[i]);
-            continue;
+            //continue;
         }
 
         // probably should be per-command and global verbose
         else if (isStringEqual(word, "-v") ||
                  isStringEqual(word, "-verbose")) {
             isVerbose = true;
-            continue;
+            //continue;
         }
         else {
             KLOGE("Kram", "unexpected argument \"%s\"\n",
@@ -1698,11 +1704,11 @@ static int32_t kramAppEncode(vector<const char*>& args)
 
         if (isStringEqual(word, "-sdf")) {
             infoArgs.doSDF = true;
-            continue;
+            //continue;
         }
         else if (isStringEqual(word, "-optopaque")) {
             infoArgs.optimizeFormatForOpaque = true;
-            continue;
+            //continue;
         }
 
         // mip setting
@@ -1715,7 +1721,7 @@ static int32_t kramAppEncode(vector<const char*>& args)
             }
 
             infoArgs.mipMaxSize = atoi(args[i]);
-            continue;
+            //continue;
         }
         else if (isStringEqual(word, "-mipmin")) {
             ++i;
@@ -1726,12 +1732,12 @@ static int32_t kramAppEncode(vector<const char*>& args)
             }
 
             infoArgs.mipMinSize = atoi(args[i]);
-            continue;
+            //continue;
         }
         else if (isStringEqual(word, "-mipnone")) {
             // disable mips even if pow2
             infoArgs.doMipmaps = false;
-            continue;
+            //continue;
         }
 //        else if (isStringEqual(word, "-mipalign")) {
 //            // pad start of each mip to pixel/block size of format
@@ -1755,17 +1761,17 @@ static int32_t kramAppEncode(vector<const char*>& args)
                 KLOGE("Kram", "heightScale arg cannot be 0");
                 error = true;
             }
-            continue;
+            //continue;
         }
         else if (isStringEqual(word, "-height")) {
             // converted to a normal map
             infoArgs.isHeight = true;
-            continue;
+            //continue;
         }
         else if (isStringEqual(word, "-wrap")) {
             // whether texture is clamp or wrap
             infoArgs.isWrap = true;
-            continue;
+            //continue;
         }
         
         
@@ -1779,7 +1785,7 @@ static int32_t kramAppEncode(vector<const char*>& args)
             }
 
             infoArgs.textureEncoder = parseEncoder(args[i]);
-            continue;
+            //continue;
         }
 
         else if (isStringEqual(word, "-swizzle")) {
@@ -1797,7 +1803,7 @@ static int32_t kramAppEncode(vector<const char*>& args)
                 break;
             }
             infoArgs.swizzleText = swizzleString;
-            continue;
+            //continue;
         }
 
         else if (isStringEqual(word, "-chunks")) {
@@ -1824,7 +1830,7 @@ static int32_t kramAppEncode(vector<const char*>& args)
             infoArgs.chunksY = chunksY;
             infoArgs.chunksCount = chunksX * chunksY;
             
-            continue;
+            //continue;
         }
         
         else if (isStringEqual(word, "-avg")) {
@@ -1836,7 +1842,7 @@ static int32_t kramAppEncode(vector<const char*>& args)
                 break;
             }
             infoArgs.averageChannels = channelString;
-            continue;
+            //continue;
         }
         else if (isStringEqual(word, "-type")) {
             ++i;
@@ -1847,7 +1853,7 @@ static int32_t kramAppEncode(vector<const char*>& args)
             }
 
             infoArgs.textureType = parseTextureType(args[i]);
-            continue;
+            //continue;
         }
         else if (isStringEqual(word, "-quality")) {
             ++i;
@@ -1858,7 +1864,7 @@ static int32_t kramAppEncode(vector<const char*>& args)
             }
 
             infoArgs.quality = atoi(args[i]);
-            continue;
+            //continue;
         }
 
         else if (isStringEqual(word, "-output") ||
@@ -1872,13 +1878,7 @@ static int32_t kramAppEncode(vector<const char*>& args)
 
             // TODO: if args ends with /, then output to that dir
             dstFilename = args[i];
-
-//            // see if it's a ktxa file
-//            if (dstFilename.back() == 'a' ||
-//                dstFilename.back() == 'A') {
-//                infoArgs.skipImageLength = true;
-//            }
-            continue;
+            //continue;
         }
         else if (isStringEqual(word, "-input") ||
                  isStringEqual(word, "-i")) {
@@ -1890,29 +1890,29 @@ static int32_t kramAppEncode(vector<const char*>& args)
             }
 
             srcFilename = args[i];
-            continue;
+            //continue;
         }
 
         // these affect the format
         else if (isStringEqual(word, "-hdr")) {
             // not validating format for whether it's srgb or not
             infoArgs.isHDR = true;
-            continue;
+            //continue;
         }
         else if (isStringEqual(word, "-srgb")) {
             // not validating format for whether it's srgb or not
             infoArgs.isSRGB = true;
-            continue;
+            //continue;
         }
         else if (isStringEqual(word, "-signed")) {
             // not validating format for whether it's signed or not
             infoArgs.isSigned = true;
-            continue;
+            //continue;
         }
 
         else if (isStringEqual(word, "-normal")) {
             infoArgs.isNormal = true;
-            continue;
+            //continue;
         }
         else if (isStringEqual(word, "-resize")) {
             ++i;
@@ -1923,7 +1923,7 @@ static int32_t kramAppEncode(vector<const char*>& args)
             }
 
             resizeString = args[i];
-            continue;
+            //continue;
         }
 
         // This means to post-multiply alpha after loading, not that incoming data in already premul
@@ -1931,22 +1931,22 @@ static int32_t kramAppEncode(vector<const char*>& args)
         // really would prefer to premul them when building the texture.
         else if (isStringEqual(word, "-premul")) {
             infoArgs.isPremultiplied = true;
-            continue;
+            //continue;
         }
         else if (isStringEqual(word, "-prezero")) {
             infoArgs.isPrezero = true;
-            continue;
+            //continue;
         }
         // this means premul the data at read from srgb, this it to match photoshop
         else if (isStringEqual(word, "-premulrgb")) {
             isPremulRgb = true;
-            continue;
+            //continue;
         }
         
         else if (isStringEqual(word, "-v") ||
                  isStringEqual(word, "-verbose")) {
             infoArgs.isVerbose = true;
-            continue;
+            //continue;
         }
         else if (isStringEqual(word, "-f") ||
                  isStringEqual(word, "-format")) {
@@ -1958,7 +1958,18 @@ static int32_t kramAppEncode(vector<const char*>& args)
             }
 
             infoArgs.formatString = args[i];
-            continue;
+            //continue;
+        }
+        
+        // compressor for ktx2 mips
+        // TODO: need level control
+        else if (isStringEqual(word, "-zstd")) {
+            infoArgs.compressor.compressorType = KTX2SupercompressionZstd;
+            //continue;
+        }
+        else if (isStringEqual(word, "-zlib")) {
+            infoArgs.compressor.compressorType = KTX2SupercompressionZlib;
+            //continue;
         }
         else {
             KLOGE("Kram", "unexpected argument \"%s\"\n",
@@ -2176,7 +2187,7 @@ int32_t kramAppScript(vector<const char*>& args)
             }
 
             srcFilename = args[i];
-            continue;
+            //continue;
         }
         else if (isStringEqual(word, "-jobs") ||
                  isStringEqual(word, "-j")) {
@@ -2189,7 +2200,7 @@ int32_t kramAppScript(vector<const char*>& args)
             }
 
             numJobs = atoi(args[i]);
-            continue;
+            //continue;
         }
         else if (isStringEqual(word, "-v") ||
                  isStringEqual(word, "-verbose")) {
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index c8ce1b0b..b0839191 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -48,6 +48,12 @@
 #include <sys/errno.h>
 #endif
 
+// for zlib compress
+#include "miniz.h"
+
+// for zstd compress
+#include "zstd.h"
+
 namespace kram {
 
 using namespace std;
@@ -308,71 +314,7 @@ bool Image::loadImageFromPixels(const vector<uint8_t>& pixels, int32_t width,
     return true;
 }
 
-void Image::computeMipStorage(const KTXImage& image, int32_t w, int32_t h,
-                              bool doMipmaps, int32_t mipMinSize, int32_t mipMaxSize,
-                              int32_t& storageSize, int32_t& storageSizeTotal,
-                              vector<int32_t>& mipStorageSizes,
-                              int32_t& numDstMipLevels, int32_t& numMipLevels) const
-{
-    bool canMipmap = true;  // isPow2(w) && isPow2(h); // DONE: removed pow2 requirement, mip gen handles non-pow2
-
-    bool needsDownsample = (w > mipMaxSize || h > mipMaxSize);
-
-    int32_t maxMipLevels = 16;  // 64K x 64K
-    if ((!doMipmaps) && needsDownsample) {
-        maxMipLevels = 1;
-    }
-
-    if (canMipmap && (doMipmaps || needsDownsample)) {
-        numMipLevels++;
-
-        bool keepMip =
-            (w >= mipMinSize && w <= mipMaxSize) &&
-            (h >= mipMinSize && h <= mipMaxSize);
-
-        if (keepMip) {
-            mipStorageSizes.push_back(storageSize);
-            numDstMipLevels++;
-        }
-        else {
-            mipStorageSizes.push_back(0);  // 0 means skip storing this mip
-        }
-
-        do {
-            mipDown(w, h);
-
-            keepMip =
-                (w >= mipMinSize && w <= mipMaxSize) &&
-                (h >= mipMinSize && h <= mipMaxSize);
-
-            if (keepMip && (numDstMipLevels < maxMipLevels)) {
-                int32_t mipStorageSize = image.mipLevelSize(w, h);
-                mipStorageSizes.push_back(mipStorageSize);
-                storageSizeTotal += mipStorageSize;
-                numDstMipLevels++;
-            }
-            else {
-                mipStorageSizes.push_back(0);  // - means skip storing this mip
-            }
 
-            // a count of how many mips exist from topmost
-            numMipLevels++;
-        } while (w > 1 || h > 1);
-
-        // adjust the pixel storage area to the first/largest exported mip
-        for (auto mipStorageSize : mipStorageSizes) {
-            if (mipStorageSize != 0) {
-                storageSize = mipStorageSize;
-                break;
-            }
-        }
-    }
-    else {
-        mipStorageSizes.push_back(storageSize);
-        numDstMipLevels++;
-        numMipLevels++;
-    }
-}
 
 // Can average any channels per block, this means they are constant across the
 // block and use endpoint storage but do not affect the endpoint fitting.
@@ -524,10 +466,9 @@ bool Image::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage& dstIma
 
     vector<uint8_t> propsData;
     dstImage.toPropsData(propsData);
-    dstHeader.bytesOfKeyValueData = uint32_t(propsData.size());
-    if (!dstImage.initMipLevels(false, sizeof(KTXHeader) + dstHeader.bytesOfKeyValueData)) {
-        return false;
-    }
+    dstHeader.bytesOfKeyValueData = (uint32_t)vsizeof(propsData);
+    size_t mipOffset = sizeof(KTXHeader) + dstHeader.bytesOfKeyValueData;
+    dstImage.initMipLevels(mipOffset);
     
     // allocate to hold props and entire image to write out
     if (!dstFile) {
@@ -546,12 +487,12 @@ bool Image::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage& dstIma
     
    
     // write the header out
-    if (!writeDataAtOffset((const uint8_t*)&headerCopy, sizeof(headerCopy), 0, dstFile, dstImage)) {
+    if (!writeDataAtOffset((const uint8_t*)&headerCopy, sizeof(KTXHeader), 0, dstFile, dstImage)) {
         return false;
     }
     
     // write out the props
-    if (!writeDataAtOffset(propsData.data(), propsData.size(), sizeof(KTXHeader), dstFile, dstImage)) {
+    if (!writeDataAtOffset(propsData.data(), vsizeof(propsData), sizeof(KTXHeader), dstFile, dstImage)) {
         return false;
     }
 
@@ -580,12 +521,14 @@ bool Image::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage& dstIma
     // DONE: walk chunks here and seek to src and dst offsets in conversion
     // make sure to walk chunks in the exact same order they are written, array then face, or slice
     
-    int32_t w = srcImage.width;
-    int32_t h = srcImage.height;
-
+    int32_t w = 0;
+    int32_t h = 0;
+    int32_t d = 0;
+    
     for (int32_t chunk = 0; chunk < numChunks; ++chunk) {
         w = srcImage.width;
         h = srcImage.height;
+        d = srcImage.depth;
         
         for (int32_t i = 0; i < (int32_t)srcImage.header.numberOfMipmapLevels; ++i) {
             const KTXImageLevel& dstMipLevel = dstImage.mipLevels[i];
@@ -878,7 +821,7 @@ bool Image::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage& dstIma
             }
             
             // next mip level
-            mipDown(w, h);
+            mipDown(w, h, d);
         }
     }
     
@@ -935,39 +878,298 @@ bool Image::encode(ImageInfo& info, KTXImage& dstImage) const
 
 bool Image::encode(ImageInfo& info, FILE* dstFile) const
 {
-    // this will be throw out
+    // dstImage will be ignored
     KTXImage dstImage;
+    
     return encodeImpl(info, dstFile, dstImage);
 }
 
+// Use this for in-place construction of mips
+struct MipConstructData {
+    vector<Color> tmpImageData8;  // for average channels per block
 
-bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const
-{
-    //KTXImage image;
-    KTXHeader& header = dstImage.header;
+    // use this for complex texture types, copy data from vertical/horizotnal
+    // strip image into here to then gen mips
+    vector<Color> copyImage;
+
+    // So can use simd ops to do conversions, use float4.
+    // using half4 for mips of ldr data to cut memory in half
+    // processing large textures nees lots of memory for src image
+    // 8k x 8k x 8b = 500 mb
+    // 8k x 8k x 16b = 1 gb
+    vector<half4> halfImage;
+    vector<float4> floatImage;
 
     vector<Int2> chunkOffsets;
+};
 
-    int32_t w = _width;
-    int32_t h = _height;
 
-    if (!validateTextureType(info.textureType, w, h, chunkOffsets, header,
-                             info.doMipmaps,
-                             info.chunksX, info.chunksY, info.chunksCount))
-    {
-        return false;
-    }
 
-    // cube and array this is the size of one face/slice
-    const int32_t modifiedWidth = w;
-    const int32_t modifiedHeight = h;
+// See here:
+// https://www.khronos.org/registry/DataFormat/specs/1.3/dataformat.1.3.html
 
-    // work out how much memory we need to load
-    header.initFormatGL(info.pixelFormat);
+enum KHR_DF_MODEL {
+    KHR_DF_MODEL_RGBSDA = 1,
+    
+    KHR_DF_MODEL_BC1A = 128,
+    // KHR_DF_MODEL_BC2 = 129,
+    KHR_DF_MODEL_BC3 = 130,
+    KHR_DF_MODEL_BC4 = 131,
+    KHR_DF_MODEL_BC5 = 132,
+    KHR_DF_MODEL_BC6H = 133,
+    KHR_DF_MODEL_BC7 = 134,
+    
+    //KHR_DF_MODEL_ETC1 = 160,
+    KHR_DF_MODEL_ETC2 = 161,
+    
+    KHR_DF_MODEL_ASTC = 162,
+    
+    //KHR_DF_MODEL_ETC1S = 163,
+    
+};
 
-    dstImage.pixelFormat = info.pixelFormat;
-    dstImage.textureType = info.textureType;
+enum KHR_DF_CHANNEL {
+    // guessing at these
+    KHR_DF_CHANNEL_RED = 0,
+    KHR_DF_CHANNEL_GREEN = 1,
+    KHR_DF_CHANNEL_BLUE = 2,
+    KHR_DF_CHANNEL_ALPHA = 15,
+    
+    // BC
+    //KHR_DF_CHANNEL_BC1A_COLOR = 0,
+    KHR_DF_CHANNEL_BC1A_ALPHA = 15,
+    
+    //KHR_DF_CHANNEL_BC2_COLOR = 0,
+    KHR_DF_CHANNEL_BC2_ALPHA = 15,
+
+    //KHR_DF_CHANNEL_BC3_COLOR = 0,
+    KHR_DF_CHANNEL_BC3_ALPHA = 15,
+    
+    //KHR_DF_CHANNEL_BC4_DATA = 0,
+  
+    //KHR_DF_CHANNEL_BC5_RED = 0,
+    KHR_DF_CHANNEL_BC5_GREEN = 1,
+  
+    //KHR_DF_CHANNEL_BC6H_COLOR = 0,
+    //KHR_DF_CHANNEL_BC7_COLOR = 0,
+    
+    // ETC2
+    //KHR_DF_CHANNEL_ETC2_RED = 0,
+    KHR_DF_CHANNEL_ETC2_GREEN = 1,
+    KHR_DF_CHANNEL_ETC2_COLOR = 2, // RGB
+    KHR_DF_CHANNEL_ETC2_ALPHA = 16,
+    
+    // ASTC
+    //KHR_DF_CHANNEL_ASTC_DATA = 0,
+};
+
+
+enum KHR_DF_PRIMARIES {
+    KHR_DF_PRIMARIES_BT709 = 1
+};
 
+enum KHR_DF_TRANSFER {
+    KHR_DF_TRANSFER_LINEAR = 1, // ?
+    KHR_DF_TRANSFER_SRGB = 2,
+};
+
+enum KHR_DF_ALPHA {
+    KHR_DF_FLAG_ALPHA_STRAIGHT = 0,
+    KHR_DF_FLAG_ALPHA_PREMULTIPLIED = 1,
+};
+    
+// 16 bytes total
+struct KTX2DescriptorChannelBlock {
+    
+    // 32-bits
+    uint16_t bitOffset = 0;
+    uint8_t bitLength = 0;
+    uint8_t channelType : 4; // RED, GREEN, BLUE, RRR, GGG
+    uint8_t FSEL : 4; // l is low bit
+    
+    // 32-bits
+    uint8_t samplePositions[4] = {0};
+
+    uint32_t sampleLower = 0;
+    uint32_t sampleUpper = UINT32_MAX;
+};
+
+// This can be up to 7 x 4 = 24 + 16 x channels in size
+struct KTX2DescriptorFileBlock {
+    KTX2DescriptorFileBlock(MyMTLPixelFormat format, bool isPremul, bool isCompressed);
+    
+    uint32_t totalSize = 0; // descriptorBlockSize + 4
+    
+    uint32_t vendorID : 18;
+    uint32_t descriptorType : 14;
+    uint16_t versionNumber = 2;
+    uint16_t descriptorBlockSize = 0; // 24B + channels (doesn't include totalSize)
+    
+    uint8_t colorModel = 0;
+    uint8_t colorPrimaries = 0;
+    uint8_t transferFunction = 0;
+    uint8_t flags = 0;
+    
+    uint8_t textureBlockDimensions[4] = {0};
+    uint8_t bytesPlane[8] = {0};
+    
+    // now 16 bytes for each channel present
+    KTX2DescriptorChannelBlock channels[4]; // max channels
+};
+
+KTX2DescriptorFileBlock::KTX2DescriptorFileBlock(MyMTLPixelFormat format, bool isPremul, bool isCompressed) {
+    uint32_t numChannels = numChannelsOfFormat(format);
+    Int2 blockDims = blockDimsOfFormat(format);
+    bool isSrgb = isSrgbFormat(format);
+    uint32_t blockSize = blockSizeOfFormat(format);
+    bool isFloat = isFloatFormat(format);
+    bool isSigned = isSignedFormat(format);
+    
+    totalSize = sizeof(KTX2DescriptorFileBlock) -
+        (4 - numChannels) * sizeof(KTX2DescriptorChannelBlock);
+    descriptorBlockSize = totalSize - 4;
+    
+    // ugly that these are all -1, can't simply read them in debugger
+    textureBlockDimensions[0] = blockDims.x - 1;
+    textureBlockDimensions[1] = blockDims.y - 1;
+    
+    vendorID = 0;
+    descriptorType = 0;
+    
+    // these formats are all single-planes
+    // some indication this should be 0 if zstd applied
+    if (!isCompressed) {
+        bytesPlane[0] = blockSize;
+    }
+    
+    for (uint32_t i = 0; i < numChannels; ++i) {
+        auto& c = channels[i];
+        
+        c.FSEL = 0;
+        if (isSigned)
+            c.FSEL |= 0x4;
+        if (isFloat)
+            c.FSEL |= 0x8;
+    
+        // TODO: what are E & L, nothing in docs about these ?
+        // no examples of use of these either
+        
+        c.channelType = 0;
+        
+        if (isFloat) {
+            // This is for BC6H, TODO: might be half only so test for isHalf?
+            if (isSigned) {
+                c.sampleLower = 0xBF800000U; // -1.0f;
+                c.sampleUpper = 0x7F800000U; //  1.0f;
+            }
+            else {
+                c.sampleLower = 0xBF800000U; //  -1.0f;
+                c.sampleUpper = 0x7F800000U; //   1.0f;
+            }
+        }
+        else if (isSigned) {
+            c.sampleLower = INT32_MIN;
+            c.sampleUpper = INT32_MAX;
+        }
+    }
+
+    // set this since it applies to so many block formats
+    channels[0].bitOffset = 0;
+    channels[0].bitLength = blockSize * 8 - 1; // needs to be split of channel bits
+   
+    
+    switch(format) {
+        case MyMTLPixelFormatBC1_RGBA:
+        case MyMTLPixelFormatBC1_RGBA_sRGB:
+            // if ever do punchthrough-alpha
+            //channels[1].channelType = KHR_DF_CHANNEL_BC1A_ALPHA;
+            break;
+            
+        case MyMTLPixelFormatBC3_RGBA:
+        case MyMTLPixelFormatBC3_RGBA_sRGB:
+            // alpha is first
+            channels[0].channelType = KHR_DF_CHANNEL_BC3_ALPHA;
+            
+            channels[0].bitOffset = 0;
+            channels[0].bitLength = 64 - 1;
+            
+            channels[1].bitOffset = 64;
+            channels[1].bitLength = 64 - 1;
+            
+            break;
+            
+        case MyMTLPixelFormatBC5_RGUnorm:
+        case MyMTLPixelFormatBC5_RGSnorm:
+            channels[1].channelType = KHR_DF_CHANNEL_BC3_ALPHA;
+            
+            channels[0].bitOffset = 0;
+            channels[0].bitLength = 64 - 1;
+            
+            channels[1].bitOffset = 64;
+            channels[1].bitLength = 64 - 1;
+            
+            break;
+            
+        // TODO: fix bc6h sampleLower/Upper
+            
+        // TODO: handle etc2
+        case MyMTLPixelFormatEAC_RG11Unorm:
+        case MyMTLPixelFormatEAC_RG11Snorm:
+            channels[1].channelType = KHR_DF_CHANNEL_ETC2_GREEN;
+            
+            channels[0].bitOffset = 0;
+            channels[0].bitLength = 64 - 1;
+            
+            channels[1].bitOffset = 64;
+            channels[1].bitLength = 64 - 1;
+            break;
+            
+        case MyMTLPixelFormatETC2_RGB8:
+        case MyMTLPixelFormatETC2_RGB8_sRGB:
+            channels[0].channelType = KHR_DF_CHANNEL_ETC2_COLOR;
+            break;
+            
+            
+        case MyMTLPixelFormatEAC_RGBA8:
+        case MyMTLPixelFormatEAC_RGBA8_sRGB:
+            channels[0].channelType = KHR_DF_CHANNEL_ETC2_ALPHA;
+            channels[1].channelType = KHR_DF_CHANNEL_ETC2_COLOR;
+            
+            channels[0].bitOffset = 0;
+            channels[0].bitLength = 64 - 1;
+            
+            channels[1].bitOffset = 64;
+            channels[1].bitLength = 64 - 1;
+            break;
+            
+            
+        // NOTE: astc is all the same, and can already use defaults
+    
+        default: {
+            uint32_t numChannelBits = (blockSize * 8) / numChannels;
+            // handle uniform explcit types with offset per channel
+            uint32_t lastBitOffset = 0;
+            for (uint32_t i = 0; i < numChannels; ++i) {
+                auto& c = channels[i];
+                c.channelType = KHR_DF_CHANNEL_RED + i;
+                c.bitOffset = lastBitOffset;
+                c.bitLength = numChannelBits - 1;
+                
+                lastBitOffset += numChannelBits;
+            }
+                    
+            colorModel = KHR_DF_MODEL_RGBSDA;
+            break;
+        }
+    }
+            
+    colorPrimaries = KHR_DF_PRIMARIES_BT709;
+    transferFunction = isSrgb ? KHR_DF_TRANSFER_SRGB : KHR_DF_TRANSFER_LINEAR;
+    flags = isPremul ? KHR_DF_FLAG_ALPHA_PREMULTIPLIED : KHR_DF_FLAG_ALPHA_STRAIGHT;
+}
+
+void Image::addBaseProps(const ImageInfo& info, KTXImage& dstImage) const
+{
     dstImage.addFormatProps();
 
     // TODO: caller should really set post swizzle
@@ -1010,137 +1212,414 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const
     else {
         dstImage.addAddressProps("Rep,Rep,X");
     }
-    
+
     if (info.doMipmaps) {
         dstImage.addFilterProps("Lin,Lin,Lin");  // min,mag,mip
     }
     else {
         dstImage.addFilterProps("Lin,Lin,X");  // min,mag,mip
     }
-   
+
     // This is hash of source png/ktx file (use xxhash32 or crc32)
     // can quickly check header if multiple copies of same source w/diff names.
     // May also need to store command line args in a prop to reject duplicate processing
     // TODO: ktxImage.addSourceHashProps(0);
+}
 
-    // convert props into a data blob that can be written out
-    vector<uint8_t> propsData;
-    dstImage.toPropsData(propsData);
-    header.bytesOfKeyValueData = (uint32_t)propsData.size();
+// wish C++ had a defer
+struct ZSTDScope
+{
+    ZSTDScope(ZSTD_CCtx* ctx_) : ctx(ctx_) {}
+    ~ZSTDScope() { ZSTD_freeCCtx(ctx); }
+    
+private:
+    ZSTD_CCtx* ctx = nullptr;
+};
 
-    //ktxImage.bytesPerBlock = header.blockSize();
-    //ktxImage.blockDims = header.blockDims();
 
-    int32_t storageSize = dstImage.mipLevelSize(w, h);
+bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const
+{
+    KTXHeader& header = dstImage.header;
+    MipConstructData mipConstructData;
+    
+    vector<Int2>& chunkOffsets = mipConstructData.chunkOffsets;
 
-    // how much to store to store biggest level of ktx (will in-place mip to
-    // this)
-    int32_t storageSizeTotal = storageSize;
+    int32_t w = _width;
+    int32_t h = _height;
 
-    vector<int32_t> mipOffsets;
-    vector<int32_t> mipStorageSizes;
-    int32_t numDstMipLevels = 0;
-    int32_t numMipLevels = 0;
+    // compute chunks, and adjust w/h based on that
+    // the code allows a vertical or horizontal strip or grid of chunks
+    if (!validateTextureType(info.textureType, w, h, chunkOffsets, header,
+                             info.doMipmaps,
+                             info.chunksX, info.chunksY, info.chunksCount))
+    {
+        return false;
+    }
 
-    // header only holds pixelFormat, but can generate block info from that
-    computeMipStorage(dstImage, w, h,  // pixelFormat,
-                      info.doMipmaps, info.mipMinSize, info.mipMaxSize,
-                      storageSize, storageSizeTotal, mipStorageSizes,
-                      numDstMipLevels, numMipLevels);
+    // work out how much memory we need to load
+    header.initFormatGL(info.pixelFormat);
 
-    // now compute the mip base offsets
-    int32_t mipOffset = sizeof(KTXHeader) + header.bytesOfKeyValueData;
+    dstImage.pixelFormat = info.pixelFormat;
+    dstImage.textureType = info.textureType;
 
-    for (int32_t i = 0; i < numMipLevels; ++i) {
-        int32_t mipStorageSize = mipStorageSizes[i];
-        if (mipStorageSize == 0) {
-            mipOffsets.push_back(0);
-            continue;
+    // whd might be changed by initMipLevels based on min/max mip size
+    dstImage.width = w;
+    dstImage.height = h;
+    dstImage.depth = header.pixelDepth; // from validate above
+    
+    dstImage.initMipLevels(info.doMipmaps, info.mipMinSize, info.mipMaxSize);
+    
+    // ----------------------------------------------------
+
+    int32_t numChunks = (int32_t)chunkOffsets.size();
+
+    //---------------
+    // props
+
+    addBaseProps(info, dstImage);
+    
+    // convert props into a data blob that can be written out
+    vector<uint8_t> propsData;
+    dstImage.toPropsData(propsData);
+    header.bytesOfKeyValueData = (uint32_t)vsizeof(propsData);
+
+    // ----------------------------------------------------
+
+    // can go out to KTX2 here instead
+    // It has two different blocks, supercompression for BasisLZ
+    // and a DFD block which details the block content.
+    // And mips are reversed.
+    
+    // dstImage case - in memory version will always be KTX1 format for now
+    // this even gens a KTX1 dstImage, and then just compresses the mip levels
+    
+    if (info.isKTX2 && dstFile)
+    {
+        // generate KTX1 file with uncompressed mips first
+        // a big memory hit here, since all mips stored in memory despite built in-place
+        // could build and compress and entire level at a time, but can't write any of it
+        // out until smallest mips are constructed.  Only then are offsets resolved.
+        
+        if (!writeKTX1FileOrImage(info,  mipConstructData, propsData, nullptr, dstImage)) {
+            return false;
+        }
+        
+        // now convert from ktx1 to ktx2
+        
+        KTX2Header header2;
+        
+        header2.vkFormat = vulkanType(info.pixelFormat);
+        // header2.typeSize = 1; // skip
+        
+        header2.pixelWidth = header.pixelWidth;
+        header2.pixelHeight = header.pixelHeight;
+        header2.pixelDepth = header.pixelDepth;
+                
+        header2.layerCount = header.numberOfArrayElements;
+        header2.faceCount = header.numberOfFaces;
+        header2.levelCount = header.numberOfMipmapLevels;
+        
+        header2.supercompressionScheme = info.compressor.compressorType;
+        
+        // compute the dfd
+        KTX2DescriptorFileBlock dfdData(info.pixelFormat, info.hasAlpha && info.isPremultiplied, info.compressor.isCompressed());
+        
+        // TODO: sgdData only used for BasisLZ, UASTC + zstd don't use this
+        vector<uint8_t> sgdData;
+        
+        size_t levelByteLength = header2.levelCount * sizeof(KTXImageLevel);
+        size_t levelByteOffset = sizeof(KTX2Header);
+       
+        // compute offsets and lengts of data blocks
+        header2.dfdByteOffset = levelByteOffset + levelByteLength;
+        header2.kvdByteOffset = header2.dfdByteOffset + dfdData.totalSize;
+        header2.sgdByteOffset = header2.kvdByteOffset + vsizeof(propsData);
+        
+        header2.dfdByteLength = dfdData.totalSize;
+        header2.kvdByteLength = vsizeof(propsData);
+        header2.sgdByteLength = vsizeof(sgdData);
+         
+        // write the header
+        if (!writeDataAtOffset((const uint8_t*)&header2, sizeof(KTX2Header), 0, dstFile, dstImage)) {
+            return false;
+        }
+        
+        // next are levels, but those are written out later
+        
+        // write the dfd
+        if (!writeDataAtOffset((const uint8_t*)&dfdData, dfdData.totalSize, header2.dfdByteOffset, dstFile, dstImage)) {
+            return false;
+        }
+        
+        // write the props
+        if (!writeDataAtOffset(propsData.data(), vsizeof(propsData), header2.kvdByteOffset, dstFile, dstImage)) {
+            return false;
+        }
+        
+        // skip supercompression block
+        if (!sgdData.empty()) {
+            // TODO: align(8) sgdPadding
+            if (!writeDataAtOffset(sgdData.data(), vsizeof(sgdData), header2.sgdByteOffset, dstFile, dstImage)) {
+                return false;
+            }
         }
+        
+        // offsets will be largest last unlike KTX
+        // data is packed without any length or alignment unllike in KTX
+        // reverse the mip levels offsets (but not the order) for KTX2
+        
+        size_t imageByteOffset = header2.sgdByteOffset + header2.sgdByteLength;
+        
+        size_t lastImageByteOffset = imageByteOffset;
+        
+        vector<KTXImageLevel> ktx2Levels(dstImage.mipLevels);
+        for (int32_t i = ktx2Levels.size() - 1; i >= 0; --i) {
+            
+            // align the offset to leastCommonMultiple(4, texel_block_size);
+            if (lastImageByteOffset & 0x3) {
+                lastImageByteOffset += 4 - (lastImageByteOffset & 0x3);
+            }
+            
+            auto& level = ktx2Levels[i];
+            level.length *= numChunks;
+            level.lengthCompressed = level.length;
+            level.offset = lastImageByteOffset;
+            
+            lastImageByteOffset = level.offset + level.length;
+        }
+    
+        if (!info.compressor.isCompressed()) {
+            if (!writeDataAtOffset((const uint8_t*)ktx2Levels.data(), vsizeof(ktx2Levels), levelByteOffset, dstFile, dstImage)) {
+                return false;
+            }
+            
+            // write the levels out
+            for (int32_t i = 0; i < (int32_t)ktx2Levels.size(); ++i) {
+                auto& level2 = ktx2Levels[i];
+                auto& level1 = dstImage.mipLevels[i];
+               
+                if (!writeDataAtOffset(dstImage.fileData + level1.offset, level2.length, level2.offset, dstFile, dstImage)) {
+                    return false;
+                }
+            }
+        }
+        else {
 
-        // 4 byte length of mip level is written out, this totally throws off block alignment
-        // this is size of one mip not the array of mips of that size
-        //if (!info.skipImageLength) {
-            int32_t levelSizeOf = sizeof(uint32_t);
-            mipOffset += levelSizeOf;
-        //}
-
-        // start of the mips
-        mipOffsets.push_back(mipOffset);
-
-        // ktx requires 4 byte alignment to rows of pixels (affext r8, rg8, r16f)
-        // it's not enough to fix alignment below, so this needs fixed in mipStorage calc.
-//        int32_t numPadding = 3 - ((mipStorageSize + 3) % 4);
-//        if (numPadding != 0) {
-//            // TODO: add error, need to pad rows not just stick pad at end
-//            // this can happen on mips with formats below that don't align to 4 byte boundaries
-//            // rgb8/16f also have this, but not supporting those formats currently.
-//            return false;
-//        }
-
-        // next row of mips are offset
-        mipOffset += mipStorageSize * header.totalChunks();
+            // start compression with the smallest mips first, then can write out data as we go through it all
+        
+            // update the offsets and compressed sizes
+            lastImageByteOffset = imageByteOffset;
+            
+            // allocate big enough to hold entire uncompressed level
+            vector<uint8_t> compressedData;
+            compressedData.resize(mz_compressBound(ktx2Levels.front().length)); // largest mip
+            size_t compressedDataSize = 0;
+            
+            // reuse a context here
+            ZSTD_CCtx* cctx = nullptr;
+            int zlibLevel = MZ_DEFAULT_COMPRESSION;
+                
+            if (info.compressor.compressorType == KTX2SupercompressionZstd) {
+                cctx = ZSTD_createCCtx();
+                if (!cctx) {
+                    return false;
+                }
+                
+                if (info.compressor.compressorLevel > 0.0) {
+                   int zstdLevel = (int)round(info.compressor.compressorLevel * 100.0);
+                    if (zstdLevel > 100) {
+                        zstdLevel = 100;
+                    }
+                    
+                    ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, zstdLevel);
+                    
+                    // may need to reset the compressor context, but says call starts a new frame
+                }
+            }
+            else if (info.compressor.compressorType == KTX2SupercompressionZlib) {
+                // set the level up
+                if (info.compressor.compressorLevel > 0.0) {
+                    zlibLevel = (int)round(info.compressor.compressorLevel * 10.0);
+                    if (zlibLevel > 10) {
+                        zlibLevel = 10;
+                    }
+                }
+            }
+            
+            ZSTDScope scope(cctx);
+            
+            for (int32_t i = (int32_t)ktx2Levels.size() - 1; i >= 0; --i) {
+                
+                auto& level2 = ktx2Levels[i];
+                auto& level1 = dstImage.mipLevels[i];
+               
+                const uint8_t* levelData = dstImage.fileData + level1.offset;
+                
+                // compress each mip
+                switch(info.compressor.compressorType) {
+                    case KTX2SupercompressionZstd: {
+                        // this resets the frame on each call
+                        compressedDataSize = ZSTD_compress2(cctx, compressedData.data(), compressedData.size(), levelData, level2.length);
+                        
+                        if (ZSTD_isError(compressedDataSize)) {
+                            KLOGE("kram", "encode mip zstd failed");
+                            return false;
+                        }
+                        break;
+                    }
+                    case KTX2SupercompressionZlib: {
+                        mz_ulong dstSize = compressedData.size();
+                        if (mz_compress2(compressedData.data(), &dstSize, levelData, level2.length, zlibLevel) != MZ_OK)
+                        {
+                            KLOGE("kram", "encode mip zlib failed");
+                            return false;
+                        }
+                        compressedDataSize = dstSize;
+                        
+                        break;
+                    }
+                    default:
+                        // should never get here
+                        return false;
+                }
+                
+                // also need for compressed levels?
+                // align the offset to leastCommonMultiple(4, texel_block_size);
+                if (lastImageByteOffset & 0x3) {
+                    lastImageByteOffset += 4 - (lastImageByteOffset & 0x3);
+                }
+                
+                level2.lengthCompressed = compressedDataSize;
+                level2.offset = lastImageByteOffset;
+                
+                lastImageByteOffset = level2.offset + level2.lengthCompressed;
+                
+                // write the mip
+                if (!writeDataAtOffset(compressedData.data(), compressedDataSize, level2.offset, dstFile, dstImage)) {
+                    return false;
+                }
+            }
+            
+            // write out mip level size/offsets
+            if (!writeDataAtOffset((const uint8_t*)ktx2Levels.data(), vsizeof(ktx2Levels), levelByteOffset, dstFile, dstImage)) {
+                return false;
+            }
+        }
     }
+    else {
+        // this is purely ktx1 output path
+        if (!writeKTX1FileOrImage(info, mipConstructData, propsData, dstFile, dstImage)) {
+            return false;
+        }
+    }
+    
+    return true;
+}
 
-    //----------------------------------------------
-
-    header.numberOfMipmapLevels = numDstMipLevels;
+bool Image::writeKTX1FileOrImage(
+     ImageInfo& info,
+     MipConstructData& mipConstructData,
+     const vector<uint8_t>& propsData,
+     FILE* dstFile, KTXImage& dstImage) const
+{
+    // recompute, it's had mips added into it above
+    size_t mipOffset = sizeof(KTXHeader) + dstImage.header.bytesOfKeyValueData;
 
-    // store the largest mip size that isn't skipped
-    for (auto mipStorageSize : mipStorageSizes) {
-        if (mipStorageSize != 0) {
-            header.pixelWidth = w;
-            header.pixelHeight = h;
-            break;
+    // allocate to hold props and entire image to write out
+    if (!dstFile) {
+        dstImage.initMipLevels(mipOffset);
+        
+        dstImage.reserveImageData();
+    }
+    else {
+        int32_t numChunks = (int32_t)mipConstructData.chunkOffsets.size();
+        
+        // set offsets up for ktx1
+        size_t lastMipOffset = mipOffset;
+        
+        for (int32_t i = 0; i < (int32_t)dstImage.mipLevels.size(); ++i) {
+            auto& level = dstImage.mipLevels[i];
+            level.offset = lastMipOffset + 4; // offset by length
+            
+            lastMipOffset = level.offset + level.lengthCompressed * numChunks;
         }
+    }
+    
+    // write the header out
+    KTXHeader headerCopy = dstImage.header;
+    
+    // fix header for 1d array
+    // TODO: move to initMipLevels, and just use the header
+    if (dstImage.textureType == MyMTLTextureType1DArray) {
+        headerCopy.pixelHeight = 0;
+        headerCopy.pixelDepth = 0;
+    }
+    
+    if (!writeDataAtOffset((const uint8_t*)&headerCopy, sizeof(headerCopy), 0, dstFile, dstImage)) {
+        return false;
+    }
 
-        mipDown(w, h);
+    // write out the props
+    if (!writeDataAtOffset(propsData.data(), vsizeof(propsData), sizeof(KTXHeader), dstFile, dstImage)) {
+        return false;
     }
 
-    // update image to match
-    dstImage.width = header.pixelWidth;
-    dstImage.height = header.pixelHeight;
-    dstImage.depth = header.pixelDepth;
+    // build and weite out the mip data
+    if (!createMipsFromChunks(info, mipConstructData, dstFile, dstImage)) {
+        return false;
+    }
+    
+    return true;
+}
+
 
-    // ----------------------------------------------------
 
+bool Image::createMipsFromChunks(
+    ImageInfo& info,
+    MipConstructData& data,
+    FILE* dstFile,
+    KTXImage& dstImage
+) const
+{
+    // ----------------------------------------------------
+    
     // set the structure fields and allocate it, only need enough to hold single
     // mip (reuses mem) also because mips are written out to file after
     // generated.
     TextureData outputTexture;
-    outputTexture.width = w;
-    outputTexture.height = h;
-    outputTexture.data.resize(storageSize);
-
-    // restore full src size to build the mips
-    w = modifiedWidth;
-    h = modifiedHeight;
+    outputTexture.width = dstImage.width;
+    outputTexture.height = dstImage.height;
+    outputTexture.data.resize(dstImage.mipLevels[0].length); // allocate to size of largest mip
 
     // This is for 8-bit data (pixelsFloat used for in-place mipgen)
     ImageData srcImage;
-    srcImage.width = w;
-    srcImage.height = h;
+    srcImage.width = _width;
+    srcImage.height = _height;
+    
+    // KramMipper uses these
     srcImage.isSRGB = info.isSRGB;
     srcImage.isHDR = info.isHDR;
 
+    int32_t w = srcImage.width;
+    int32_t h = srcImage.width;
+    
     // ----------------------------------------------------
-
-    vector<Color> tmpImageData8;  // for average channels per block
-
+    
     // use this for complex texture types, copy data from vertical/horizotnal
     // strip image into here to then gen mips
-    vector<Color> copyImage;
+    vector<Color>& copyImage = data.copyImage;
 
     // So can use simd ops to do conversions, use float4.
     // using half4 for mips of ldr data to cut memory in half
     // processing large textures nees lots of memory for src image
     // 8k x 8k x 8b = 500 mb
     // 8k x 8k x 16b = 1 gb
-    vector<half4> halfImage;
-    vector<float4> floatImage;
+    vector<half4>& halfImage = data.halfImage;
+    vector<float4>& floatImage = data.floatImage;
 
+    int32_t numChunks = (int32_t)data.chunkOffsets.size();
     bool doPremultiply = info.hasAlpha && (info.isPremultiplied || info.isPrezero);
-    bool isMultichunk = chunkOffsets.size() > 1;
+    bool isMultichunk = numChunks > 1;
 
     if (info.isHDR) {
         // here the source is float
@@ -1207,183 +1686,21 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const
         }
     }
     
-    int32_t numChunks = (int32_t)chunkOffsets.size();
-
-    // allocate to hold props and entire image to write out
-    if (!dstFile) {
-        // recompute, it's had mips added into it above
-        mipOffset = sizeof(KTXHeader) + header.bytesOfKeyValueData;
-
-        dstImage.initMipLevels(false, mipOffset);
-        
-        dstImage.reserveImageData();
-    }
-    
-    // ----------------------------------------------------
-
     Mipper mipper;
     SDFMipper sdfMipper;
-#if 0
-    // TODO: can go out to KTX2 here instead
-    // It has two different blocks, supercompression for BasisLZ
-    // and a DFD block which details the block content.
-    // And mips are reversed.
-    bool doWriteKTX2 = false;
-    if (doWriteKTX2 && dstFile) // in memory version will always be KTX1 format for nwo
-    {
-        KTX2Header header2;
-        
-        header2.vkFormat = vulkanType(info.pixelFormat);
-        // header2.typeSize = 1; // skip
-        
-        header2.pixelWidth = header.pixelWidth;
-        header2.pixelHeight = header.pixelHeight;
-        header2.pixelDepth = header.pixelDepth;
-        
-        if (dstImage.textureType == MyMTLTextureType1DArray) {
-            header2.pixelHeight = 0;
-            header2.pixelDepth = 0;
-        }
-        
-        header2.layerCount = header.numberOfArrayElements;
-        header2.faceCount = header.numberOfFaces;
-        header2.levelCount = numDstMipLevels; // header.numberOfMipmapLevels;
-        
-        // compute size of dfd
-        vector<uint8_t> dfdData;
-        
-        // compute offsets and lengts of data blocks
-        header2.dfdByteOffset = sizeof(header2);
-        header2.kvdByteOffset = header2.dfdByteOffset + dfdData.size();
-        header2.sgdByteOffset = header2.kvdByteOffset + propsData.size();
-        
-        header2.dfdByteLength = dfdData.size();
-        header2.kvdByteLength = propsData.size();
-        header2.sgdByteLength = 0;
-        
-        // TODO: figure out dfd here
-        
-        // write the header
-        if (!writeDataAtOffset((const uint8_t*)&header2, sizeof(header2), 0, dstFile, dstImage)) {
-            return false;
-        }
-        
-        // write the dfd
-        if (!writeDataAtOffset(dfdData.data(), dfdData.size(), header2.dfdByteOffset, dstFile, dstImage)) {
-            return false;
-        }
-        
-        // write the props
-        if (!writeDataAtOffset(propsData.data(), propsData.size(), header2.kvdByteOffset, dstFile, dstImage)) {
-            return false;
-        }
-        
-        // skip supercompression block
-        
-        // TODO: this either writes to file or to dstImage (in-memory KTX file)
-        
-        // TODO: also need to support a few compressions
-        // zstd and zlib, does dfd contain the offsets of each chunk
-        // and the compressed sizes of mips.  Know format and sizes uncompressed.
-        // but need to fill out the compressed size field.
-        
-        vector<KTX2ImageLevel> levels;
-        levels.resize(numDstMipLevels);
-        
-        size_t levelListStartOffset = header2.sgdByteOffset + header2.sgdByteLength;
-        size_t levelStartOffset = levelListStartOffset + levels.size() * sizeof(KTX2ImageLevel);
-       
-        size_t lastLevelOffset = levelStartOffset;
-        for (int32_t i = 0; i < numDstMipLevels; ++i) {
-            levels[i].length = numChunks * numDstMipLevels;
-            levels[i].lengthCompressed = levels[i].length;
-            levels[i].offset = lastLevelOffset + levels[i].lengthCompressed;
-            lastLevelOffset = levels[i].offset;
-        }
     
-        // TODO: compress to a seperate zstd stream for each level
-        // then can continue to do mips in place, and just append the bytes to that level
-        // after compression.   If not compressed, then code from KTX1 can be used.
-        bool isCompressed = false;
-        
-        if (!isCompressed) {
-            if (!writeDataAtOffset(levels.data(), levels.size(), levelListStartOffset, dstFile, dstImage)) {
-                return false;
-            }
-        }
-        
-        // TODO: here allocate a zstd encoder for each level
-        vector< vector<uint8_t> > compressedLevels;
-        if (isCompressed) {
-            compressedLevels.resize(numDstMipLevels);
-        }
-        
-        // write the chunks of mips see code below, seeks are important since
-        // it's building mips on the fly.
-        for (int32_t chunk = 0; chunk < numChunks; ++chunk) {
-            // TODO: actually build the mip (reuse code below for KTX)
-            
-            if (!isCompressed)
-                continue;
-            
-            // handle zstd compression here, and add to end of existing encoder for level
-            zstd_compress(level);
-            
-            // append the compressed bytes to each strea
-            levels[mipLevel].append(data);
-        }
-        
-        if (isCompressed) {
-            
-            // update the offsets and compressed sizes
-            lastLevelOffset = levelStartOffset;
-            for (int32_t i = 0; i < numDstMipLevels; ++i) {
-                levels[i].lengthCompressed = compressedLevels[i].size();
-                levels[i].offset = lastLevelOffset + levels[i].lengthCompressed;
-                lastLevelOffset = levels[i].offset;
-            }
-            
-            // write out sizes
-            if (!writeDataAtOffset(levels.data(), levels.size(), levelListStartOffset, dstFile, dstImage)) {
-                return false;
-            }
-            
-            // and now seek and write out each compressed level
-            for (int32_t i = 0; i < numDstMipLevels; ++i) {
-                if (!writeDataAtOffset(compressedLevels[i].data(), compressedLevels[i].size(), levels[i].offset, dstFile, dstImage)) {
-                    return false;
-                }
-            }
-        }
-        
-        return true;
-    }
-#endif
+    vector<KTXImageLevel>& dstMipLevels = dstImage.mipLevels;
+    
+    int32_t srcTopMipWidth = srcImage.width;
+    int32_t srcTopMipHeight = srcImage.height;
     
-    // ----------------------------------------------------
-
-    // write the header out
-    KTXHeader headerCopy = header;
-    if (dstImage.textureType == MyMTLTextureType1DArray) {
-        headerCopy.pixelHeight = 0;
-        headerCopy.pixelDepth = 0;
-    }
-    if (!writeDataAtOffset((const uint8_t*)&headerCopy, sizeof(headerCopy), 0, dstFile, dstImage)) {
-        return false;
-    }
-
-    // write out the props
-    if (!writeDataAtOffset(propsData.data(), propsData.size(), sizeof(KTXHeader), dstFile, dstImage)) {
-        return false;
-    }
-
     for (int32_t chunk = 0; chunk < numChunks; ++chunk) {
         // this needs to append before chunkOffset copy below
-        w = modifiedWidth;
-        h = modifiedHeight;
+        w = srcTopMipWidth;
+        h = srcTopMipHeight;
 
         // copy a chunk at a time, mip that if needed, and then move to next chunk
-        Int2 chunkOffset = chunkOffsets[chunk];
+        Int2 chunkOffset = data.chunkOffsets[chunk];
 
         // reset these dimensions, or the mip mapping drops them to 1x1
         srcImage.width = w;
@@ -1400,7 +1717,7 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const
 
                     for (int32_t x = 0; x < w; ++x) {
                         float4 c0 = srcPixels[yOffset + x];
-                        float4& d0 = floatImage[y0 + x];
+                        float4& d0 = data.floatImage[y0 + x];
                         d0 = c0;
                     }
                 }
@@ -1417,7 +1734,7 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const
 
                     for (int32_t x = 0; x < w; ++x) {
                         Color c0 = srcPixels[yOffset + x];
-                        Color& d0 = copyImage[y0 + x];
+                        Color& d0 = data.copyImage[y0 + x];
                         d0 = c0;
                     }
                 }
@@ -1430,8 +1747,10 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const
                 // copy and convert to half4 or float4 image
                 // srcImage already points to float data, so could modify that
                 // only need doPremultiply at the top mip
-                mipper.initPixelsHalfIfNeeded(srcImage, doPremultiply && !info.isPrezero, info.isPrezero,
-                                              halfImage);
+                mipper.initPixelsHalfIfNeeded(srcImage,
+                                              doPremultiply && info.isPremultiplied,
+                                              doPremultiply && info.isPrezero,
+                                              data.halfImage);
             }
         }
 
@@ -1443,24 +1762,29 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const
         // build mips for the chunk, dropping mips as needed, but downsampling
         // from available image
 
-        int32_t numDstMipLevelsWritten = 0;
-        for (int32_t mipLevel = 0; mipLevel < numMipLevels; ++mipLevel) {
-            // no need to mip futher
-            if (numDstMipLevelsWritten >= numDstMipLevels) {
-                break;
-            }
+        int32_t numSkippedMips = 0; // TODO: data.numSkippedMips;
+        
+        for (int32_t mipLevel = 0; mipLevel < (int32_t)dstMipLevels.size(); ++mipLevel) {
 
-            bool skipMip = false;
-            uint32_t mipStorageSize = mipStorageSizes[mipLevel];
-            if (mipStorageSize == 0) {
-                skipMip = true;
-            }
+            if (mipLevel == 0 && !info.doSDF)
+            {
+                if (numSkippedMips > 0) {
+                    // this does in-place mipmap to dstImage (also updates floatPixels if used)
+                    for (int32_t i = 0; i < numSkippedMips; ++i) {
+                        // have to build the submips even with skipMip
+                        mipper.mipmap(srcImage, dstImageData);
 
-            // this does in-place mipmap to dstImage (also updates floatPixels
-            // if used)
-            if (info.doSDF) {
-                // have to process all images to SDF
-                if (!skipMip) {
+                        // dst becomes src for next in-place mipmap
+                        srcImage = dstImageData;
+
+                        w = dstImageData.width;
+                        h = dstImageData.height;
+                    }
+                }
+            }
+            else {
+                if (info.doSDF) {
+                    // have to process all images to SDF
                     // sdf mipper has to build from origin sourceImage
                     // but it can in-place write to the same dstImage
                     sdfMipper.mipmap(dstImageData, mipLevel);
@@ -1468,10 +1792,7 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const
                     w = dstImageData.width;
                     h = dstImageData.height;
                 }
-            }
-            else {
-                // can export existing image for mip 0
-                if (mipLevel > 0) {
+                else {
                     // have to build the submips even with skipMip
                     mipper.mipmap(srcImage, dstImageData);
 
@@ -1482,16 +1803,13 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const
                     h = dstImageData.height;
                 }
             }
-
-            // only write out mip if non-zero storage
-            if (skipMip) {
-                continue;
-            }
-
-            // mipOffsets are start of first chunk of a given mip size
-            mipOffset = mipOffsets[mipLevel] + chunk * mipStorageSize;
-            numDstMipLevelsWritten++;
-
+            
+            // mipOffset are start of first chunk of a given mip size
+            size_t mipStorageSize = dstMipLevels[mipLevel].length; //  / numChunks;
+            
+            // offset only valid for KTX and KTX2 w/o isCompressed
+            size_t mipOffset = dstMipLevels[mipLevel].offset + chunk * mipStorageSize;
+           
             // just to check that each mip has a unique offset
             //KLOGI("Image", "chunk:%d %d\n", chunk, mipOffset);
 
@@ -1501,12 +1819,13 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const
             if (!info.averageChannels.empty()) {
                 // this isn't applied to srgb data (what about premul?)
                 averageChannelsInBlock(info.averageChannels.c_str(), dstImage,
-                                       mipImage, tmpImageData8);
+                                       mipImage, data.tmpImageData8);
 
-                mipImage.pixels = tmpImageData8.data();
+                mipImage.pixels = data.tmpImageData8.data();
                 mipImage.pixelsFloat = nullptr;
             }
 
+            
             Timer timer;
             bool success =
                 compressMipLevel(info, dstImage,
@@ -1519,33 +1838,30 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const
                 }
             }
 
-            // Write out the mip size on chunk0, all other mips are this size since not supercompressed.
-            // This throws off block alignment so have option to skip for ktxa files.  I guess 3d textures
+            // Write out the mip size on chunk 0, all other mips are this size since not supercompressed.
+            // This throws off block alignment and gpu loading of ktx files from mmap.  I guess 3d textures
             // and arrays can then load entire level in a single call.
-            if (chunk == 0) {
+            if ((!info.isKTX2) && chunk == 0) {
                 // some clarification on what imageSize means, but best to look at ktx codebase itself
                 // https://github.com/BinomialLLC/basis_universal/issues/40
 
                 // this contains all bytes at a mipLOD but not any padding
-                uint32_t levelSize = (int32_t)chunkOffsets.size() * mipStorageSize;
+                uint32_t levelSize = (uint32_t)dstMipLevels[mipLevel].length;
 
                 // this is size of one face for non-array cubes
-                if (info.textureType == MyMTLTextureTypeCube) {
-                    levelSize = mipStorageSize;
+                // but for everything else, it's the numChunks * mipStorageSize
+                if (info.textureType != MyMTLTextureTypeCube) {
+                    levelSize *= numChunks;
                 }
 
                 int32_t levelSizeOf = sizeof(levelSize);
                 assert(levelSizeOf == 4);
 
-                //fseek(dstFile, mipOffset - levelSizeOf, SEEK_SET);  // from begin
-
                 if (!writeDataAtOffset((const uint8_t*)&levelSize, levelSizeOf, mipOffset - levelSizeOf, dstFile, dstImage)) {
                     return false;
                 }
             }
-
-            //fseek(dstFile, mipOffset, SEEK_SET);  // from begin
-
+            
             // Note that default ktx alignment is 4, so r8u, r16f mips need to be padded out to 4 bytes
             // may need to write these out row by row, and let fseek pad the rows to 4.
 
diff --git a/libkram/kram/KramImage.h b/libkram/kram/KramImage.h
index 3fc97346..22b57578 100644
--- a/libkram/kram/KramImage.h
+++ b/libkram/kram/KramImage.h
@@ -29,6 +29,8 @@ enum ImageResizeFilter {
 
 //---------------------------
 
+struct MipConstructData;
+
 // TODO: this can only holds one level of mips, so custom mips aren't possible.
 // Mipmap generation is all in-place to this storage.
 class Image {
@@ -66,14 +68,13 @@ class Image {
 
 private:
     bool encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const;
+    
     bool decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage& dstImage, TexEncoder decoder, bool isVerbose, const string& swizzleText) const;
 
     // compute how big mips will be
-    void computeMipStorage(const KTXImage& image, int32_t w, int32_t h,
+    void computeMipStorage(const KTXImage& image, int32_t& w, int32_t& h, int32_t& numSkippedMips,
                            bool doMipmaps, int32_t mipMinSize, int32_t mipMaxSize,
-                           int32_t& storageSize, int32_t& storageSizeTotal,
-                           vector<int32_t>& mipStorageSizes,
-                           int32_t& numDstMipLevels, int32_t& numMipLevels) const;
+                           vector<KTXImageLevel>& dstMipLevels) const;
 
     // ugh, reduce the params into this
     bool compressMipLevel(const ImageInfo& info, KTXImage& image,
@@ -85,7 +86,17 @@ class Image {
                                 const KTXImage& image, ImageData& srcImage,
                                 vector<Color>& tmpImage) const;
 
-    
+    bool createMipsFromChunks(ImageInfo& info, MipConstructData& data,
+                              FILE* dstFile, KTXImage& dstImage) const;
+
+    bool writeKTX1FileOrImage(
+         ImageInfo& info,
+         MipConstructData& mipConstructData,
+         const vector<uint8_t>& propsData,
+         FILE* dstFile, KTXImage& dstImage) const;
+
+    void addBaseProps(const ImageInfo& info, KTXImage& dstImage) const;
+
 private:
     // pixel size of image
     int32_t _width = 0;
diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp
index 7302bbb3..78987728 100644
--- a/libkram/kram/KramImageInfo.cpp
+++ b/libkram/kram/KramImageInfo.cpp
@@ -996,6 +996,7 @@ void ImageInfo::initWithArgs(const ImageInfoArgs& args)
     textureType = args.textureType;
 
     isKTX2 = args.isKTX2;
+    compressor = args.compressor;
     
     isPrezero = args.isPrezero;
     isPremultiplied = args.isPremultiplied;
diff --git a/libkram/kram/KramImageInfo.h b/libkram/kram/KramImageInfo.h
index 54a343b1..86a42488 100644
--- a/libkram/kram/KramImageInfo.h
+++ b/libkram/kram/KramImageInfo.h
@@ -51,6 +51,8 @@ class ImageInfoArgs {
 
     int32_t quality = 49;  // may want float
 
+    // ktx2 has a compression type and level
+    KTX2Compressor compressor;
     bool isKTX2 = false;
     
     //bool skipImageLength = false;
@@ -121,6 +123,8 @@ class ImageInfo {
     string averageChannels;
     string swizzleText;
 
+    // ktx2 has a compression type and level
+    KTX2Compressor compressor;
     bool isKTX2 = false;
     
     // output image state
@@ -172,6 +176,8 @@ class ImageInfo {
     int32_t chunksX = 0;
     int32_t chunksY = 0;
     int32_t chunksCount = 0;
+    
+    
 };
 
 bool isSwizzleValid(const char* swizzle);
diff --git a/scripts/kramTests.sh b/scripts/kramTests.sh
index 70119096..d9bbf60b 100755
--- a/scripts/kramTests.sh
+++ b/scripts/kramTests.sh
@@ -10,7 +10,7 @@
 ../scripts/kramTextures.py -p android --ktx2 --bundle  
 ../scripts/kramTextures.py -p android --bundle  
 
-# this only has ktx2 form
-../scripts/kramTextures.py -p any --ktx2 --bundle  
+# this only has ktx2 form, tests uastc which kram doesn't open/save yet
+#../scripts/kramTextures.py -p any --ktx2 --bundle  
 
 
diff --git a/scripts/kramTextures.py b/scripts/kramTextures.py
index 183644c0..78ec37e9 100755
--- a/scripts/kramTextures.py
+++ b/scripts/kramTextures.py
@@ -52,7 +52,8 @@ class TextureProcessor:
 	appKtx2sc = ""
 	appKtx2check = ""
 	doUastc = False
-
+	doKTX2 = False
+	
 	# preset formats for a given platform
 	textureFormats = []
 
@@ -168,8 +169,9 @@ def processTextureKram(self, srcPath, dstDir, srcModstamp):
 		
 		srcFilename = os.path.basename(srcRoot) # just the name no ext
 
-		# this only exports to ktx, post process will convert to ktx2
 		ext = ".ktx"
+		if self.doKTX2:
+			ext = ".ktx2"
 		dstName = srcFilename 
 
 		# replace -h with -n, since it will be converted to a normal
@@ -216,6 +218,11 @@ def processTextureKram(self, srcPath, dstDir, srcModstamp):
 		}
 		typeText = switcher.get(texType, " -type 2d")
 
+		# choice of none, zlib, or zstd
+		compressorText = ""
+		if self.doKTX2:
+			compressorText = " -zstd"
+
 		# this could work on 3d and cubearray textures, but for now only use on 2D textures
 		chunksText = ""
 		if texType == TextureType.Tex2DArray:
@@ -223,7 +230,7 @@ def processTextureKram(self, srcPath, dstDir, srcModstamp):
 			if chunksX > 0 and chunksY > 0:
 				chunksText = " -chunks {0}x{1}".format(chunksX, chunksY)
 
-		cmd = "encode" + fmt + typeText + chunksText + " -i " + srcPath + " -o " + dstFile
+		cmd = "encode" + fmt + typeText + chunksText + compressorText + " -i " + srcPath + " -o " + dstFile
 
 		# can print out commands to script and then process that all in C++
 		if self.doScript:
@@ -234,6 +241,7 @@ def processTextureKram(self, srcPath, dstDir, srcModstamp):
 		else:
 			timer = -time.perf_counter()
 		
+			# kram can't compress to uastc ktx2, but this script can via ktx2sc from original file
 			result = self.spawn(self.appKram + " " + cmd)
 
 			# report slow textures
@@ -242,36 +250,38 @@ def processTextureKram(self, srcPath, dstDir, srcModstamp):
 			if timer > slowTextureTime:
 				print("perf: encode {0} took {1:.3f}s".format(dstName, timer))
 
+			
 			# TODO: split this off into another modstamp testing pass, and only do work if ktx is older than ktx2
 			# convert ktx -> ktx2, and zstd supercompress the mips, kram can read these and decompress
 			# for now, this is only possible when not scripted
 			# could read these in kram, and then execute them, or write these to another file
 			# and then execute that if script file suceeds
-			if self.appKtx2:
-				ktx2Filename = dstFile + "2"
+			# if self.appKtx2:
+			# 	ktx2Filename = dstFile + "2"
 				
-				# create the ktx2
-				result = self.spawn(self.appKtx2 + " -f -o " + ktx2Filename + " " + dstFile)
+			# 	# create the ktx2
+			# 	result = self.spawn(self.appKtx2 + " -f -o " + ktx2Filename + " " + dstFile)
 				
-				# too bad this can't check ktx1...
-				if self.appKtx2check != "" and result == 0:
-					result = self.spawn(self.appKtx2check + " -q " + ktx2Filename)
-
-				# can only zstd compress block encoded files, but can do BasisLZ on 
-				#   explicit files.
-
-				# overwrite it with supercompressed version
-				# basis uastc supercompress - only if content isn't already block encoded, TODO: kramv and loader cannot read this
-				# zstd supercompress - works on everything, kramv and loader can read this
-				if self.appKtx2sc != "" and result == 0:
-					if self.doUastc:
-						result = self.spawn(self.appKtx2sc + " --uastc 2 --uastc_rdo_q 1.0 --zcmp 3 --threads 1 " + ktx2Filename)
-					else:
-						result = self.spawn(self.appKtx2sc + " --zcmp 3 --threads 1 " + ktx2Filename)
-
-				# double check supercompressed version, may not be necessary
-				if self.appKtx2check != "" and result == 0:
-					result = self.spawn(self.appKtx2check + " -q " + ktx2Filename)
+			# 	# too bad this can't check ktx1...
+			# 	if self.appKtx2check != "" and result == 0:
+			# 		result = self.spawn(self.appKtx2check + " -q " + ktx2Filename)
+
+			# 	# can only zstd compress block encoded files, but can do BasisLZ on 
+			# 	#   explicit files.
+
+			# 	# overwrite it with supercompressed version
+			# 	# basis uastc supercompress - only if content isn't already block encoded, TODO: kramv and loader cannot read this
+			# 	# zstd supercompress - works on everything, kramv and loader can read this
+			# 	if self.appKtx2sc != "" and result == 0:
+			# 		if self.doUastc:
+			# 			result = self.spawn(self.appKtx2sc + " --uastc 2 --uastc_rdo_q 1.0 --zcmp 3 --threads 1 " + ktx2Filename)
+			# 		else:
+			# 			result = self.spawn(self.appKtx2sc + " --zcmp 3 --threads 1 " + ktx2Filename)
+
+			# double check supercompressed version, may not be necessary
+			if self.appKtx2check != "" and result == 0:
+			 	result = self.spawn(self.appKtx2check + " -q " + ktx2Filename)
+			
 
 		return result
 
@@ -526,11 +536,17 @@ def processTextures(platform, container, verbose, quality, jobs, force, script,
 		
 	processor = TextureProcessor(platform, appKram, maxCores, force, script, scriptFile, formats)
 	if ktx2:
+		processor.doKTX2 = ktx2
+
+		# used to need all of these apps to gen ktx2, but can gen directly from kram now
+		# leaving these to test aastc case
 		processor.appKtx2 = appKtx2
 		processor.appKtx2sc = appKtx2sc
-		processor.appKtx2check = appKtx2check
 		processor.doUastc = doUastc
 
+		# check app still useful
+		processor.appKtx2check = appKtx2check
+		
 	for srcDir in srcDirs:
 		dstDir = dstDirForPlatform + srcDir
 		os.makedirs(dstDir, exist_ok = True)

From e72d0d3036a09164c45feb4c407284ceba95e5d1 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 16 May 2021 19:23:09 -0700
Subject: [PATCH 049/901] Kram - fix numSkippedMips support

---
 libkram/kram/KTXImage.cpp  | 20 +++++++++++++++-----
 libkram/kram/KTXImage.h    |  2 +-
 libkram/kram/KramImage.cpp | 18 +++++++++++++-----
 3 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index 12564de9..f5311974 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -1087,13 +1087,15 @@ void KTXImage::toPropsData(vector<uint8_t>& propsData)
     // TODO: this needs to pad to 16-bytes, so may need a prop for that
 }
 
-void KTXImage::initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxSize)
+void KTXImage::initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxSize, uint32_t& numSkippedMips)
 {
      // dst levels
     int32_t w = width;
     int32_t h = height;
     int32_t d = depth;
     
+    numSkippedMips = 0;
+    
     bool needsDownsample = (w > mipMaxSize || h > mipMaxSize);
 
     int32_t maxMipLevels = 16;  // 64K x 64K
@@ -1104,7 +1106,8 @@ void KTXImage::initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxS
     }
 
     KTXImageLevel level;
-    level.offset = 0; // compute later, once know ktx vs. ktx2
+    //level.offset = 0; // compute later, once know ktx vs. ktx2
+    //level.lengthCompressed = 0;
     
     mipLevels.clear();
     
@@ -1115,7 +1118,6 @@ void KTXImage::initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxS
         
         if (keepMip) {
             level.length = mipLevelSize(w, h);
-            level.lengthCompressed = 0;
             
             if (mipLevels.empty()) {
                 // adjust the top dimensions
@@ -1125,6 +1127,11 @@ void KTXImage::initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxS
             }
             mipLevels.push_back(level);
         }
+        else {
+            if (mipLevels.empty()) {
+                numSkippedMips++;
+            }
+        }
 
         do {
             mipDown(w, h, d);
@@ -1136,7 +1143,6 @@ void KTXImage::initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxS
             if (keepMip && (mipLevels.size() < (size_t)maxMipLevels)) {
                 // length needs to be multiplied by chunk size before writing out
                 level.length = mipLevelSize(w, h);
-                level.lengthCompressed = 0;
                 
                 if (mipLevels.empty()) {
                     // adjust the top dimensions
@@ -1147,13 +1153,17 @@ void KTXImage::initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxS
                 
                 mipLevels.push_back(level);
             }
+            else {
+                if (mipLevels.empty()) {
+                    numSkippedMips++;
+                }
+            }
 
         } while (w > 1 || h > 1 || d > 1);
     }
     else {
         // length needs to be multiplied by chunk size before writing out
         level.length = mipLevelSize(w, h);
-        level.lengthCompressed = 0;
         
         mipLevels.push_back(level);
     }
diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h
index 2b0b54c3..2dace691 100644
--- a/libkram/kram/KTXImage.h
+++ b/libkram/kram/KTXImage.h
@@ -273,7 +273,7 @@ class KTXImage {
     void initProps(const uint8_t* propsData, size_t propDataSize);
     
     void initMipLevels(size_t mipOffset);
-    void initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxSize);
+    void initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxSize, uint32_t& numSkippedMips);
 
     bool validateMipLevels() const;
     
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index b0839191..9e1ed3c1 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -118,11 +118,13 @@ bool Image::loadImageFromKTX(const KTXImage& image)
         return false;
     }
 
-    // TODO: handle custom mips, this will currently box filter to build
+    // TODO: handle loading custom mips.  Save will currently box filter to build
     // remaining mips but for SDF or coverage scaled alpha test, need to
-    // preserve original data.
+    // preserve original data.  Problem is that Image save to KTX/2 always does in-place
+    // mipgen.
+    
     if (image.header.numberOfMipmapLevels > 1) {
-        KLOGW("Image", "Skipping custom mip levels");
+        KLOGW("Image", "Skipping custom mip levels from KTX load");
     }
 
     // so can call through to blockSize
@@ -467,6 +469,7 @@ bool Image::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage& dstIma
     vector<uint8_t> propsData;
     dstImage.toPropsData(propsData);
     dstHeader.bytesOfKeyValueData = (uint32_t)vsizeof(propsData);
+    
     size_t mipOffset = sizeof(KTXHeader) + dstHeader.bytesOfKeyValueData;
     dstImage.initMipLevels(mipOffset);
     
@@ -900,7 +903,12 @@ struct MipConstructData {
     vector<half4> halfImage;
     vector<float4> floatImage;
 
+    // Subdividing strips of larger images into cube/atlas/etc.
+    // These offsets are where to find each chunk in that larger image
     vector<Int2> chunkOffsets;
+    
+    // Can skip the larger and smaller mips.  This is the larger mips skipped.
+    uint32_t numSkippedMips = 0;
 };
 
 
@@ -1267,7 +1275,7 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const
     dstImage.height = h;
     dstImage.depth = header.pixelDepth; // from validate above
     
-    dstImage.initMipLevels(info.doMipmaps, info.mipMinSize, info.mipMaxSize);
+    dstImage.initMipLevels(info.doMipmaps, info.mipMinSize, info.mipMaxSize, mipConstructData.numSkippedMips);
     
     // ----------------------------------------------------
 
@@ -1762,7 +1770,7 @@ bool Image::createMipsFromChunks(
         // build mips for the chunk, dropping mips as needed, but downsampling
         // from available image
 
-        int32_t numSkippedMips = 0; // TODO: data.numSkippedMips;
+        int32_t numSkippedMips = data.numSkippedMips;
         
         for (int32_t mipLevel = 0; mipLevel < (int32_t)dstMipLevels.size(); ++mipLevel) {
 

From 43a50b9f9a51a8cb2e3bc6acab271659ed851c73 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 16 May 2021 22:42:53 -0700
Subject: [PATCH 050/901] kram - add compression level support to -zstd/-zlib,
 fix width/height issues on chunked images, update test scripts

---
 libkram/kram/KTXImage.h    |  2 +-
 libkram/kram/Kram.cpp      | 27 +++++++++++++++++++++------
 libkram/kram/KramImage.cpp | 23 ++++++++++++++++-------
 scripts/kramTests.sh       | 16 +++++++++-------
 scripts/kramTextures.py    | 29 ++++++++++++++++++++---------
 5 files changed, 67 insertions(+), 30 deletions(-)

diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h
index 2dace691..71c426e6 100644
--- a/libkram/kram/KTXImage.h
+++ b/libkram/kram/KTXImage.h
@@ -254,7 +254,7 @@ enum KTX2Supercompression {
 
 struct KTX2Compressor {
     KTX2Supercompression compressorType = KTX2SupercompressionNone;
-    float compressorLevel = 0.0f; // 0.0 default, 100.0 full compression
+    float compressorLevel = 0.0f; // 0.0 is default
     
     bool isCompressed() const { return compressorType != KTX2SupercompressionNone; }
 };
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index ec475eb4..720691ec 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -1060,10 +1060,10 @@ void kramEncodeUsage(bool showVersion = true)
           "\tSpecifies how many chunks to split up texture into 2darray\n"
           
           // ktx2 specific settings
-          "\t-zstd"
-          "\tktx2 with zstd mip compressor\n"
-          "\t-zlib"
-          "\tktx2 with zlib mip compressor\n"
+          "\t-zstd level"
+          "\tktx2 with zstd mip compressor, 0 for default\n"
+          "\t-zlib level"
+          "\tktx2 with zlib mip compressor, 0 for defauult\n"
           
           "\t-swizzle [rgba01 x4]"
           "\tSpecifies pre-encode swizzle pattern\n"
@@ -1654,7 +1654,7 @@ static int32_t kramAppDecode(vector<const char*>& args)
         return -1;
     }
 
-    success = success && SetupTmpFile(tmpFileHelper, isDstKTX ? ".ktx" : ".ktx2");
+    success = success && SetupTmpFile(tmpFileHelper, isDstKTX2 ? ".ktx2" : ".ktx");
 
     if (success && isVerbose) {
         KLOGI("Kram", "Decoding %s to %s with %s\n",
@@ -1965,10 +1965,25 @@ static int32_t kramAppEncode(vector<const char*>& args)
         // TODO: need level control
         else if (isStringEqual(word, "-zstd")) {
             infoArgs.compressor.compressorType = KTX2SupercompressionZstd;
+            ++i;
+            if (i >= argc) {
+                KLOGE("Kram", "zstd level arg invalid");
+                error = true;
+                break;
+            }
+            infoArgs.compressor.compressorLevel = atoi(args[i]);
+            
             //continue;
         }
         else if (isStringEqual(word, "-zlib")) {
             infoArgs.compressor.compressorType = KTX2SupercompressionZlib;
+            ++i;
+            if (i >= argc) {
+                KLOGE("Kram", "zlib level arg invalid");
+                error = true;
+                break;
+            }
+            infoArgs.compressor.compressorLevel = atoi(args[i]);
             //continue;
         }
         else {
@@ -2056,7 +2071,7 @@ static int32_t kramAppEncode(vector<const char*>& args)
                                     srcFilename, srcImage, isPremulRgb);
 
     if (success) {
-        success = SetupTmpFile(tmpFileHelper, isDstKTX ? ".ktx" : ".ktx2");
+        success = SetupTmpFile(tmpFileHelper, isDstKTX2 ? ".ktx2" : ".ktx");
 
         if (!success) {
             KLOGE("Kram", "encode couldn't generate tmp file for output");
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index 9e1ed3c1..a6831194 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -909,6 +909,10 @@ struct MipConstructData {
     
     // Can skip the larger and smaller mips.  This is the larger mips skipped.
     uint32_t numSkippedMips = 0;
+    
+    // this is size of 2d image src after accounting for chunks for a strip of array/cube data
+    uint32_t modifiedWidth = 0;
+    uint32_t modifiedHeight = 0;
 };
 
 
@@ -1264,6 +1268,11 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const
         return false;
     }
 
+    // This is wxh of source in case it has chunks
+    // dstImage will start at this, but may mip down smaller base on mipMaxSize
+    mipConstructData.modifiedWidth = w;
+    mipConstructData.modifiedHeight = h;
+    
     // work out how much memory we need to load
     header.initFormatGL(info.pixelFormat);
 
@@ -1433,8 +1442,8 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const
                     return false;
                 }
                 
-                if (info.compressor.compressorLevel > 0.0) {
-                   int zstdLevel = (int)round(info.compressor.compressorLevel * 100.0);
+                if (info.compressor.compressorLevel > 0.0f) {
+                   int zstdLevel = (int)round(info.compressor.compressorLevel);
                     if (zstdLevel > 100) {
                         zstdLevel = 100;
                     }
@@ -1446,8 +1455,8 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const
             }
             else if (info.compressor.compressorType == KTX2SupercompressionZlib) {
                 // set the level up
-                if (info.compressor.compressorLevel > 0.0) {
-                    zlibLevel = (int)round(info.compressor.compressorLevel * 10.0);
+                if (info.compressor.compressorLevel > 0.0f) {
+                    zlibLevel = (int)round(info.compressor.compressorLevel);
                     if (zlibLevel > 10) {
                         zlibLevel = 10;
                     }
@@ -1601,15 +1610,15 @@ bool Image::createMipsFromChunks(
 
     // This is for 8-bit data (pixelsFloat used for in-place mipgen)
     ImageData srcImage;
-    srcImage.width = _width;
-    srcImage.height = _height;
+    srcImage.width = data.modifiedWidth;
+    srcImage.height = data.modifiedHeight;
     
     // KramMipper uses these
     srcImage.isSRGB = info.isSRGB;
     srcImage.isHDR = info.isHDR;
 
     int32_t w = srcImage.width;
-    int32_t h = srcImage.width;
+    int32_t h = srcImage.height;
     
     // ----------------------------------------------------
     
diff --git a/scripts/kramTests.sh b/scripts/kramTests.sh
index d9bbf60b..93ca1d91 100755
--- a/scripts/kramTests.sh
+++ b/scripts/kramTests.sh
@@ -1,16 +1,18 @@
 #/bin/zsh
 
-../scripts/kramTextures.py -p mac --ktx2 --bundle  
-../scripts/kramTextures.py -p mac --bundle  
+args=$1
 
-../scripts/kramTextures.py -p ios --ktx2 --bundle  
-../scripts/kramTextures.py -p ios --bundle  
+../scripts/kramTextures.py -p mac --bundle ${args}
+#../scripts/kramTextures.py -p mac -c ktx --bundle ${args}
+
+../scripts/kramTextures.py -p ios --bundle ${args}
+#../scripts/kramTextures.py -p ios -c ktx --bundle ${args} 
 
 # this takes 15s+ with ETC2comp
-../scripts/kramTextures.py -p android --ktx2 --bundle  
-../scripts/kramTextures.py -p android --bundle  
+../scripts/kramTextures.py -p android --bundle ${args}
+#../scripts/kramTextures.py -p -c ktx android --bundle ${args}
 
 # this only has ktx2 form, tests uastc which kram doesn't open/save yet
-#../scripts/kramTextures.py -p any --ktx2 --bundle  
+#../scripts/kramTextures.py -p any --bundle ${args}
 
 
diff --git a/scripts/kramTextures.py b/scripts/kramTextures.py
index 78ec37e9..c8b02769 100755
--- a/scripts/kramTextures.py
+++ b/scripts/kramTextures.py
@@ -221,7 +221,7 @@ def processTextureKram(self, srcPath, dstDir, srcModstamp):
 		# choice of none, zlib, or zstd
 		compressorText = ""
 		if self.doKTX2:
-			compressorText = " -zstd"
+			compressorText = " -zstd 0"
 
 		# this could work on 3d and cubearray textures, but for now only use on 2D textures
 		chunksText = ""
@@ -278,9 +278,12 @@ def processTextureKram(self, srcPath, dstDir, srcModstamp):
 			# 		else:
 			# 			result = self.spawn(self.appKtx2sc + " --zcmp 3 --threads 1 " + ktx2Filename)
 
-			# double check supercompressed version, may not be necessary
-			if self.appKtx2check != "" and result == 0:
-			 	result = self.spawn(self.appKtx2check + " -q " + ktx2Filename)
+			if self.doKTX2:
+				ktx2Filename = dstFile
+				
+				# double check supercompressed version, may not be necessary
+				if self.appKtx2check != "" and result == 0:
+			 		result = self.spawn(self.appKtx2check + " -q " + ktx2Filename)
 			
 
 		return result
@@ -370,16 +373,15 @@ def runMapInParallel(args):
 
 @click.command()
 @click.option('-p', '--platform', type=click.Choice(['ios', 'mac', 'win', 'android', 'any']), required=True, help="build platform")
-@click.option('-c', '--container', type=click.Choice(['ktx', 'ktxa']), default="ktx", help="container type")
+@click.option('-c', '--container', type=click.Choice(['ktx', 'ktx2']), default="ktx2", help="container type")
 @click.option('-v', '--verbose', is_flag=True, help="verbose output")
 @click.option('-q', '--quality', default=49, type=click.IntRange(0, 100), help="quality affects encode speed")
 @click.option('-j', '--jobs', default=64, help="max physical cores to use")
 @click.option('--force', is_flag=True, help="force rebuild ignoring modstamps")
 @click.option('--script', is_flag=True, help="generate kram script and execute that")
-@click.option('--ktx2', is_flag=True, help="generate ktx2 files from ktx output")
-@click.option('--check', is_flag=True, help="check ktx2 files as generated")
+@click.option('--check', is_flag=True, help="check ktx2 files when generated")
 @click.option('--bundle', is_flag=True, help="bundle files by updating a zip file")
-def processTextures(platform, container, verbose, quality, jobs, force, script, ktx2, check, bundle):
+def processTextures(platform, container, verbose, quality, jobs, force, script, check, bundle):
 	# output to multiple dirs by type
 
 	# eventually pass these in as strings, so script is generic
@@ -392,6 +394,10 @@ def processTextures(platform, container, verbose, quality, jobs, force, script,
 	appKtx2check = ""
 	doUastc = False
 
+	ktx2 = True
+	if container == "ktx":
+		ktx2 = False
+
 	# can convert ktx -> ktx2 files with zstd and Basis supercompression
 	# caller must have ktx2ktx2 and ktx2sc in path build from https://github.com/KhronosGroup/KTX-Software
 	if platform == "any":
@@ -399,11 +405,16 @@ def processTextures(platform, container, verbose, quality, jobs, force, script,
 		doUastc = True
 
 	if ktx2:
-		script = False
+		# have to run check script after generating, or have to convert ktx to ktx2
+		# so that's why these disable scripting
+		if doUastc or check:
+			script = False
 
+		# these were for converting ktx output from kram to ktx2, and for uastc from original png
 		appKtx2 = "ktx2ktx2"
 		appKtx2sc ="ktxsc"
 
+		# this is a validator app
 		if check:
 			appKtx2check = "ktx2check"
 	

From 43be2410b77d4bb892114145a484f0facaa62dc7 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 18 May 2021 22:12:04 -0700
Subject: [PATCH 051/901] Kram/kramv - verbose info, -mipskip n, and kramv
 buttons, fast info load path

kram
add faster load path for kram info that doesn't decode mips and can return ktx2 data intact
hook up verbose info
add -mipskip 1 support to CLI to skip the topmost n mips.  Sometimes easier than specifying a pixel size.

kramv
dynamically add buttons and menu items, menu items not yet hooked to an Edit menu
add verbose info, flip buttons, transparent debug, fix non-zero debug mode for snorm, fix debug hdr,
improve the display of info data.  Has % compression for image and mip levels.
---
 kramv/KramRenderer.mm          |  11 +-
 kramv/KramShaders.h            |  15 +-
 kramv/KramShaders.metal        |  35 ++++-
 kramv/KramViewerBase.h         |  19 ++-
 kramv/KramViewerMain.mm        | 278 +++++++++++++++++++++++++++++++--
 libkram/kram/KTXImage.cpp      |  50 ++++--
 libkram/kram/KTXImage.h        |   6 +-
 libkram/kram/Kram.cpp          | 100 +++++++++---
 libkram/kram/KramImage.cpp     |  23 ++-
 libkram/kram/KramImageInfo.cpp |   3 +-
 libkram/kram/KramImageInfo.h   |   6 +-
 11 files changed, 459 insertions(+), 87 deletions(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 1f44314e..1b206419 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -421,9 +421,10 @@ - (BOOL)loadTextureFromData:(const string&)fullFilename timestamp:(double)timest
         if (!sourceImage.open(imageData,imageDataLength)) {
             return NO;
         }
-        bool isVerbose = false;
-        _showSettings->imageInfo = kramInfoKTXToString(fullFilename, sourceImage, isVerbose);
-           
+       
+        _showSettings->imageInfo = kramInfoKTXToString(fullFilename, sourceImage, false);
+        _showSettings->imageInfoVerbose = kramInfoKTXToString(fullFilename, sourceImage, true);
+       
         _showSettings->originalFormat = (MyMTLPixelFormat)originalFormatMTL;
         
         _showSettings->lastFilename = fullFilename;
@@ -462,8 +463,8 @@ - (BOOL)loadTexture:(nonnull NSURL *)url
             return NO;
         }
         
-        bool isVerbose = false;
-        _showSettings->imageInfo = kramInfoToString(fullFilename, isVerbose);
+        _showSettings->imageInfo = kramInfoToString(fullFilename, false);
+        _showSettings->imageInfoVerbose = kramInfoToString(fullFilename, true);
         
         _showSettings->originalFormat = (MyMTLPixelFormat)originalFormatMTL;
         _showSettings->decodedFormat = (MyMTLPixelFormat)texture.pixelFormat;
diff --git a/kramv/KramShaders.h b/kramv/KramShaders.h
index 2799faa4..5169213c 100644
--- a/kramv/KramShaders.h
+++ b/kramv/KramShaders.h
@@ -73,14 +73,15 @@ typedef NS_ENUM(int32_t, ShaderTextureChannels)
 typedef NS_ENUM(int32_t, ShaderDebugMode)
 {
     ShDebugModeNone = 0,
-    ShDebugModeTransparent = 1,
-    ShDebugModeColor = 2,
-    ShDebugModeGray = 3,
-    ShDebugModeHDR = 4,
+    ShDebugModeTransparent, // alpha < 255
+    ShDebugModeNonZero, // any(rgba) > 0
+    ShDebugModeColor,
+    ShDebugModeGray,
+    ShDebugModeHDR,
     
-    ShDebugModePosX = 5,
-    ShDebugModePosY = 6,
-    ShDebugModeCircleXY = 7,
+    ShDebugModePosX,
+    ShDebugModePosY,
+    ShDebugModeCircleXY,
     
     ShDebugModeCount
 };
diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index 54832b83..9363a5b2 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -444,6 +444,8 @@ float4 DrawPixels(
     float2 textureSize
 )
 {
+    float4 sc = c;
+    
     bool isPreview = uniforms.isPreview;
     if (isPreview) {
         
@@ -553,6 +555,7 @@ float4 DrawPixels(
             c.rgb = toNormal(c.rgb);
             
             // from signed, to match other editors that don't display signed data
+            sc = c;
             c.xyz = toUnorm(c.xyz); // can sample from this
             
             // view data as abs magnitude
@@ -566,13 +569,21 @@ float4 DrawPixels(
             // signed 1/2 channel formats return sr,0,0, and sr,sg,0 for rgb?
             // May want to display those as 0 not 0.5.
             if (uniforms.isSigned) {
+                // Note: premul on signed should occur while still signed, since it's a pull to zoer
+                // to premul, but also need to see without premul
+                if (uniforms.isPremul) {
+                    c.xyz *= c.a;
+                }
+                
+                sc = c;
                 c.xyz = toUnorm(c.xyz);
             }
-            
-            // to premul, but also need to see without premul
-            if (uniforms.isPremul) {
-                c.xyz *= c.a;
+            else {
+                if (uniforms.isPremul) {
+                    c.xyz *= c.a;
+                }
             }
+            
         }
     }
     
@@ -623,6 +634,20 @@ float4 DrawPixels(
                 isHighlighted = true;
             }
         }
+        else if (uniforms.debugMode == ShDebugModeNonZero) {
+            // want to compare so snorm 0 on signed data
+            // TODO: unorm formats don't store exact 0, so may need toleranc
+            if (uniforms.isSigned) {
+                if (any(sc != 0.0)) {
+                    isHighlighted = true;
+                }
+            }
+            else {
+                if (any(c != 0.0)) {
+                    isHighlighted = true;
+                }
+            }
+        }
         else if (uniforms.debugMode == ShDebugModeColor) {
             // with 565 formats, all pixels with light up
             if (c.r != c.g || c.r != c.b) {
@@ -636,7 +661,7 @@ float4 DrawPixels(
             }
         }
         else if (uniforms.debugMode == ShDebugModeHDR) {
-            if (any(c.rgb < float3(0.0)) || any(c.rgb < float3(0.0)) ) {
+            if (any(c.rgb < float3(0.0)) || any(c.rgb > float3(1.0)) ) {
                 isHighlighted = true;
             }
         }
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index 3f706a06..7159d256 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -30,17 +30,19 @@ enum TextureChannels
     ModeAAA1 = 8,
 };
 
+// Must line up with ShDebugMode
 enum DebugMode
 {
     DebugModeNone = 0,
-    DebugModeTransparent = 1,
-    DebugModeColor = 2,
-    DebugModeGray = 3,
-    DebugModeHDR = 4,
+    DebugModeTransparent,
+    DebugModeNonZero,
+    DebugModeColor,
+    DebugModeGray,
+    DebugModeHDR,
     
-    DebugModePosX = 5,
-    DebugModePosY = 6,
-    DebugModeCircleXY = 7,
+    DebugModePosX,
+    DebugModePosY,
+    DebugModeCircleXY,
     
     DebugModeCount
 };
@@ -145,7 +147,8 @@ class ShowSettings {
     
     // cached on load, raw info about the texture from libkram
     string imageInfo;
-    
+    string imageInfoVerbose;
+   
     // format before any transcode to supported formats
     MyMTLPixelFormat originalFormat;
     MyMTLPixelFormat decodedFormat;
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index f7e4a38c..ca2f977f 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -271,7 +271,12 @@ - (IBAction)showAboutDialog:(id)sender {
     M                    = 0x2E,
 
     // https://eastmanreference.com/complete-list-of-applescript-key-codes
-    Zero                 = 0x1D,
+    Num1                 = 0x12,
+    Num2                 = 0x13,
+    Num3                 = 0x14,
+    Num4                 = 0x15,
+    // ...
+    Num0                 = 0x1D,
     
     LeftBrace            = 0x21,
     RightBrace           = 0x1E,
@@ -395,8 +400,10 @@ void encodeSrcForEncodeComparisons(bool increment) {
 
 @implementation MyMTKView
 {
+    NSStackView* _buttonStack;
     NSTextField* _hudLabel;
     NSTextField* _hudLabel2;
+    
     vector<string> _textSlots;
     ShowSettings* _showSettings;
     
@@ -404,6 +411,7 @@ @implementation MyMTKView
     ZipHelper _zip;
     MmapHelper _zipMmap;
     int32_t _fileIndex;
+    BOOL _noImageLoaded;
 }
 
 - (void)awakeFromNib
@@ -456,6 +464,12 @@ - (instancetype)initWithCoder:(NSCoder*)coder {
     _zoomGesture = [[NSMagnificationGestureRecognizer alloc] initWithTarget:self action:@selector(handleGesture:)];
     [self addGestureRecognizer:_zoomGesture];
        
+    _buttonStack = [self _addButtons];
+    
+    // hide until image loaded
+    _buttonStack.hidden = YES;
+    _noImageLoaded = YES;
+    
     _hudLabel2 = [self _addHud:YES];
     _hudLabel  = [self _addHud:NO];
     [self setHudText:""];
@@ -467,10 +481,125 @@ - (nonnull ShowSettings*)showSettings {
     return _showSettings;
 }
 
+- (NSStackView*)_addButtons {
+    const int32_t numButtons = 25; // 13;
+    const char* names[numButtons*2] = {
+        
+        "?", "Help",
+        "I", "Info",
+        "H", "Hud",
+        "S", "Show All",
+        
+        "O", "Preview",
+        "W", "Repeat",
+        "P", "Premul",
+        "N", "Signed",
+        
+        "-", "",
+    
+        "E", "Debug",
+        "D", "Grid",
+        "C", "Checker",
+        "U", "Toggle UI",
+      
+        "-", "",
+        
+        "M", "Mip",
+        "F", "Face",
+        "Y", "Array",
+        "J", "Next",
+        "L", "Reload",
+        "0", "Fit",
+        
+        // TODO: need to shift hud over a little
+        // "UI", - add to show/hide buttons
+    
+        "-", "",
+        
+        // make these individual toggles and exclusive toggle off shift
+        "R", "Red",
+        "G", "Green",
+        "B", "Blue",
+        "A", "Alpha",
+    };
+    
+    NSRect rect = NSMakeRect(0,10,30,30);
+    
+    //#define ArrayCount(x) ((x) / sizeof(x[0]))
+    
+    NSMutableArray* buttons = [[NSMutableArray alloc] init];
+    
+    for (int32_t i = 0; i < numButtons; ++i) {
+        const char* icon = names[2*i+0];
+        const char* tip = names[2*i+1];
+        
+        NSString* name = [NSString stringWithUTF8String:icon];
+        NSString* toolTip = [NSString stringWithUTF8String:tip];
+        
+        NSButton* button = nil;
+        
+        button = [NSButton buttonWithTitle:name target:self action:@selector(handleAction:)];
+        [button setToolTip:toolTip];
+        button.hidden = NO;
+        
+        // turn off rounded bezel
+        button.bordered = NO;
+        
+        [button setFrame:rect];
+        
+        // stackView seems to disperse the items evenly across the area, so this doesn't work
+        if (icon[0] == '-') {
+            //rect.origin.y += 11;
+            button.enabled = NO;
+        }
+        else {
+            //sKrect.origin.y += 25;
+        }
+        
+        [buttons addObject:button];
+    }
+    
+    NSStackView* stackView = [NSStackView stackViewWithViews:buttons];
+    stackView.orientation = NSUserInterfaceLayoutOrientationVertical;
+    [self addSubview: stackView];
+    
+#if 0
+    // Want menus, so user can define their own shortcuts to commands
+    // Also need to enable/disable this via validateUserInterfaceItem
+    NSApplication* app = [NSApplication sharedApplication];
+
+    // TODO: add an edit menu in the storyboard
+    NSMenu* menu = app.windowsMenu;
+    [menu addItem:[NSMenuItem separatorItem]];
+
+    for (int32_t i = 0; i < numButtons; ++i) {
+        const char* icon = names[2*i+0];
+        const char* tip = names[2*i+1];
+    
+        NSString* shortcut = [NSString stringWithUTF8String:icon];
+        NSString* name = [NSString stringWithUTF8String:tip];
+        shortcut = @""; // for now, or AppKit turns key int cmd+shift+key
+        
+        if (icon[0] == '-') {
+            [menu addItem:[NSMenuItem separatorItem]];
+        }
+        else {
+            NSMenuItem* menuItem = [[NSMenuItem alloc] initWithTitle:name action:@selector(handleAction) keyEquivalent:shortcut];
+            [menu addItem: menuItem];
+        }
+    }
+#endif
+    
+    return stackView;
+}
+
 - (NSTextField*)_addHud:(BOOL)isShadow
 {
+    // TODO: This text field is clamping to the height, so have it set to 1200.
+    // really want field to expand to fill the window height for large output
+    
     // add a label for the hud
-    NSTextField *label = [[NSTextField alloc] initWithFrame:NSMakeRect(isShadow ? 11 : 10, isShadow ? 11 : 10, 800, 300)];
+    NSTextField *label = [[NSTextField alloc] initWithFrame:NSMakeRect(isShadow ? 21 : 20, isShadow ? 21 : 20, 800, 1200)];
     label.drawsBackground = NO;
     label.textColor = !isShadow ?
         [NSColor colorWithSRGBRed:0 green:1 blue:0 alpha:1] :
@@ -479,7 +608,8 @@ - (NSTextField*)_addHud:(BOOL)isShadow
     label.editable = NO;
     label.selectable = NO;
     label.lineBreakMode = NSLineBreakByClipping;
-
+    label.maximumNumberOfLines = 0; // fill to height
+    
     label.cell.scrollable = NO;
     label.cell.wraps = NO;
 
@@ -489,13 +619,19 @@ - (NSTextField*)_addHud:(BOOL)isShadow
     // UILabel has shadowColor/shadowOffset but NSTextField doesn't
     
     [self addSubview: label];
+    
+    // add vertical constrains to have it fill window, but keep 800 width
+    label.preferredMaxLayoutWidth = 800;
+
+    NSDictionary* views = @{ @"label" : label };
+    [self addConstraints:[NSLayoutConstraint constraintsWithVisualFormat:@"H:|-[label]" options:0 metrics:nil views:views]];
+    [self addConstraints:[NSLayoutConstraint constraintsWithVisualFormat:@"V:|-[label]" options:0 metrics:nil views:views]];
+    
     return label;
 }
 
 - (void)doZoomMath:(float)newZoom newPan:(float2&)newPan {
-// transform the cursor to texture coordinate, or clamped version if outside
-    
-    
+    // transform the cursor to texture coordinate, or clamped version if outside
     Renderer* renderer = (Renderer*)self.delegate;
     float4x4 projectionViewModelMatrix = [renderer computeImageTransform:_showSettings->panX panY:_showSettings->panY zoom:_showSettings->zoom];
 
@@ -1071,7 +1207,80 @@ - (void)scrollWheel:(NSEvent *)event
 // TODO: convert to C++ actions, and then call into Base holding all this
 // move pan/zoom logic too.  Then use that as start of Win32 kramv.
 
+- (IBAction)handleAction:(id)sender {
+    // sender is the UI element/NSButton
+    // if (sender == )
+    NSButton* button = (NSButton*)sender;
+    
+    NSEvent* theEvent = [NSApp currentEvent];
+    bool isShiftKeyDown = (theEvent.modifierFlags & NSEventModifierFlagShift);
+    
+    string title = [button.title UTF8String];
+    int32_t keyCode = -1;
+    
+    if (title == "?")
+        keyCode = Key::Slash; // help
+    else if (title == "I")
+        keyCode = Key::I;
+    else if (title == "H")
+        keyCode = Key::H;
+    
+    else if (title == "S")
+        keyCode = Key::S;
+    else if (title == "O")
+        keyCode = Key::O;
+    else if (title == "W")
+        keyCode = Key::W;
+    else if (title == "P")
+        keyCode = Key::P;
+    else if (title == "N")
+        keyCode = Key::N;
+    
+    else if (title == "E")
+        keyCode = Key::E;
+    else if (title == "D")
+        keyCode = Key::D;
+    else if (title == "C")
+        keyCode = Key::C;
+    else if (title == "U")
+        keyCode = Key::U;
+    
+    else if (title == "M")
+        keyCode = Key::M;
+    else if (title == "F")
+        keyCode = Key::F;
+    else if (title == "Y")
+        keyCode = Key::Y;
+    else if (title == "J")
+        keyCode = Key::J;
+    else if (title == "L")
+        keyCode = Key::L;
+    else if (title == "0")
+        keyCode = Key::Num0;
+    
+    else if (title == "R")
+        keyCode = Key::R;
+    else if (title == "G")
+        keyCode = Key::G;
+    else if (title == "B")
+        keyCode = Key::B;
+    else if (title == "A")
+        keyCode = Key::A;
+    
+    
+    if (keyCode >= 0)
+        [self handleKey:keyCode isShiftKeyDown:isShiftKeyDown];
+}
+
 - (void)keyDown:(NSEvent *)theEvent
+{
+    bool isShiftKeyDown = theEvent.modifierFlags & NSEventModifierFlagShift;
+    uint32_t keyCode = theEvent.keyCode;
+
+    [self handleKey:keyCode isShiftKeyDown:isShiftKeyDown];
+}
+
+- (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
 {
     // Some data depends on the texture data (isSigned, isNormal, ..)
     TextureChannels& channels = _showSettings->channels;
@@ -1079,11 +1288,29 @@ - (void)keyDown:(NSEvent *)theEvent
     
     // TODO: fix isChanged to only be set when value changes
     // f.e. clamped values don't need to re-render
-    bool isShiftKeyDown = theEvent.modifierFlags & NSEventModifierFlagShift;
     string text;
     
-    switch(theEvent.keyCode) {
+    switch(keyCode) {
+        case Key::V: {
+            bool isVertical = _buttonStack.orientation == NSUserInterfaceLayoutOrientationVertical;
+            isVertical = !isVertical;
+            
+            _buttonStack.orientation = isVertical ? NSUserInterfaceLayoutOrientationVertical : NSUserInterfaceLayoutOrientationHorizontal;
+            text = isVertical ? "Vert UI" : "Horiz UI";
+            break;
+        }
+        case Key::U:
+            // this means no image loaded yet
+            if (_noImageLoaded) {
+                return;
+            }
+            
+            _buttonStack.hidden = !_buttonStack.hidden;
+            text = _buttonStack.hidden ? "Hide UI" : "Show UI";
+            break;
+            
         // rgba channels
+        case Key::Num1:
         case Key::R:
             if (channels == TextureChannels::ModeRRR1 || channels == TextureChannels::ModeR001) {
                 channels = TextureChannels::ModeRGBA;
@@ -1096,6 +1323,8 @@ - (void)keyDown:(NSEvent *)theEvent
             isChanged = true;
             
             break;
+            
+        case Key::Num2:
         case Key::G:
             if (channels == TextureChannels::ModeGGG1 || channels == TextureChannels::Mode0G01) {
                 channels = TextureChannels::ModeRGBA;
@@ -1107,6 +1336,8 @@ - (void)keyDown:(NSEvent *)theEvent
             }
             isChanged = true;
             break;
+            
+        case Key::Num3:
         case Key::B:
             if (channels == TextureChannels::ModeBBB1 || channels == TextureChannels::Mode00B1) {
                 channels = TextureChannels::ModeRGBA;
@@ -1118,6 +1349,8 @@ - (void)keyDown:(NSEvent *)theEvent
             }
             isChanged = true;
             break;
+            
+        case Key::Num4:
         case Key::A:
             if (channels == TextureChannels::ModeAAA1) {
                 channels = TextureChannels::ModeRGBA;
@@ -1136,6 +1369,7 @@ - (void)keyDown:(NSEvent *)theEvent
             switch(_showSettings->debugMode) {
                 case DebugModeNone: text = "Debug Off"; break;
                 case DebugModeTransparent: text = "Debug Transparent"; break;
+                case DebugModeNonZero: text = "Debug NonZero"; break;
                 case DebugModeColor: text = "Debug Color"; break;
                 case DebugModeGray: text = "Debug Gray"; break;
                 case DebugModeHDR: text = "Debug HDR"; break;
@@ -1157,7 +1391,7 @@ - (void)keyDown:(NSEvent *)theEvent
                    "⇧J-next bundle image\n";
             break;
             
-        case Key::Zero: { // scale and reset pan
+        case Key::Num0: { // scale and reset pan
             float zoom;
             // fit image or mip
             if (isShiftKeyDown) {
@@ -1315,7 +1549,7 @@ - (void)keyDown:(NSEvent *)theEvent
         // info on the texture, could request info from lib, but would want to cache that info
         case Key::I:
             if (_showSettings->isHudShown) {
-                sprintf(text, "%s", _showSettings->imageInfo.c_str());
+                sprintf(text, "%s", isShiftKeyDown ? _showSettings->imageInfoVerbose.c_str() : _showSettings->imageInfo.c_str());
             }
             break;
         
@@ -1472,7 +1706,7 @@ - (BOOL)performDragOperation:(id)sender {
         
         if ([self loadTextureFromURL:url]) {
             [self setHudText:""];
-            
+    
             return YES;
         }
    }
@@ -1561,12 +1795,20 @@ - (BOOL)loadTextureFromArchive:(const char*)filename timestamp:(double)timestamp
         
     // was using subtitle, but that's macOS 11.0 feature.
     string title = "kramv - ";
+    title += formatTypeName(_showSettings->originalFormat);
+    title += " - ";
     title += filenameShort;
     
     self.window.title = [NSString stringWithUTF8String: title.c_str()];
         
     // doesn't set imageURL or update the recent document menu
     
+    // show the controls
+    if (_noImageLoaded) {
+        _buttonStack.hidden = NO; // show controls
+        _noImageLoaded = NO;
+    }
+
     self.needsDisplay = YES;
     return YES;
 }
@@ -1626,6 +1868,8 @@ - (BOOL)loadTextureFromURL:(NSURL*)url {
         
     // was using subtitle, but that's macOS 11.0 feature.
     string title = "kramv - ";
+    title += formatTypeName(_showSettings->originalFormat);
+    title += " - ";
     title += filenameShort;
     
     self.window.title = [NSString stringWithUTF8String: title.c_str()];
@@ -1639,6 +1883,12 @@ - (BOOL)loadTextureFromURL:(NSURL*)url {
 
     self.imageURL = url;
     
+    // show the controls
+    if (_noImageLoaded) {
+        _buttonStack.hidden = NO; // show controls
+        _noImageLoaded = NO;
+    }
+    
     self.needsDisplay = YES;
     return YES;
 }
@@ -1733,9 +1983,13 @@ - (void)viewDidLoad
                 options: (NSTrackingMouseEnteredAndExited | NSTrackingMouseMoved | NSTrackingActiveInKeyWindow )
                 owner:_view userInfo:nil];
     [_view addTrackingArea:_trackingArea];
-
+    
+    // programmatically add some buttons
+    // think limited to 11 viewws before they must be wrapepd in a container.  That's how SwiftUI was.
+    
 }
 
+
 @end
 
 
diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index f5311974..1a49b928 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -877,7 +877,7 @@ MyMTLTextureType KTXHeader::metalTextureType() const
 
 //---------------------------------------------------
 
-bool KTXImage::open(const uint8_t* imageData, size_t imageDataLength)
+bool KTXImage::open(const uint8_t* imageData, size_t imageDataLength, bool isInfoOnly)
 {
     // Note: never trust the extension, always load based on the identifier
     if ((size_t)imageDataLength < sizeof(kKTX2Identifier)) {
@@ -886,7 +886,7 @@ bool KTXImage::open(const uint8_t* imageData, size_t imageDataLength)
     
     // check for ktx2
     if (memcmp(imageData, kKTX2Identifier, sizeof(kKTX2Identifier)) == 0) {
-        return openKTX2(imageData, imageDataLength);
+        return openKTX2(imageData, imageDataLength, isInfoOnly);
     }
     
     // check for ktx1
@@ -1087,16 +1087,16 @@ void KTXImage::toPropsData(vector<uint8_t>& propsData)
     // TODO: this needs to pad to 16-bytes, so may need a prop for that
 }
 
-void KTXImage::initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxSize, uint32_t& numSkippedMips)
+void KTXImage::initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxSize, int32_t mipSkip, uint32_t& numSkippedMips)
 {
      // dst levels
     int32_t w = width;
     int32_t h = height;
     int32_t d = depth;
     
-    numSkippedMips = 0;
+    numSkippedMips = mipSkip;
     
-    bool needsDownsample = (w > mipMaxSize || h > mipMaxSize);
+    bool needsDownsample = (numSkippedMips > 0) || (w > mipMaxSize || h > mipMaxSize);
 
     int32_t maxMipLevels = 16;  // 64K x 64K
     
@@ -1113,8 +1113,9 @@ void KTXImage::initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxS
     
     if (doMipmaps || needsDownsample) {
         bool keepMip =
-            (w >= mipMinSize && w <= mipMaxSize) &&
-            (h >= mipMinSize && h <= mipMaxSize);
+            (numSkippedMips >= (uint32_t)mipSkip) ||
+            ((w >= mipMinSize && w <= mipMaxSize) &&
+            (h >= mipMinSize && h <= mipMaxSize));
         
         if (keepMip) {
             level.length = mipLevelSize(w, h);
@@ -1137,8 +1138,9 @@ void KTXImage::initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxS
             mipDown(w, h, d);
 
             keepMip =
-                (w >= mipMinSize && w <= mipMaxSize) &&
-                (h >= mipMinSize && h <= mipMaxSize);
+                (numSkippedMips >= (uint32_t)mipSkip) ||
+                ((w >= mipMinSize && w <= mipMaxSize) &&
+                (h >= mipMinSize && h <= mipMaxSize));
             
             if (keepMip && (mipLevels.size() < (size_t)maxMipLevels)) {
                 // length needs to be multiplied by chunk size before writing out
@@ -1308,7 +1310,7 @@ struct ZSTDScope2
 };
 
 
-bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength)
+bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength, bool isInfoOnly)
 {
     if ((size_t)imageDataLength < sizeof(KTX2Header)) {
         return false;
@@ -1392,10 +1394,34 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength)
     header.bytesOfKeyValueData = 0;
     initProps(imageData + header2.kvdByteOffset, header2.kvdByteLength);
    
+    // skip parsing th elevels
+    if (isInfoOnly) {
+        skipImageLength = true;
+        fileData = imageData;
+        fileDataLength = imageDataLength;
+        
+        // copy these over from ktx2
+        mipLevels = levels;
+        
+        // copy the original ktx2 levels, this includes mip compression
+        bool isCompressed =
+            (mipLevels[0].lengthCompressed > 0) &&
+            (mipLevels[0].length != mipLevels[0].lengthCompressed);
+        
+        for (auto& level : mipLevels) {
+            level.length /= numChunks;
+            
+            // this indicates not compressed
+            if (!isCompressed) {
+                level.lengthCompressed = 0;
+            }
+        }
+        return true;
+    }
     
     if (!isCompressed) {
         // Note: this is aliasing the mips from a ktx2 file into a ktx1 KTXImage
-        // This is highly unsafe.
+        // This is highly unsafe but mostly works for input.
         
         // Note: KTX2 also doesn't have the length field embedded the mipData
         // so need to be able to set skipLength to unify the mipgen if aliasing the mip data
@@ -1443,6 +1469,8 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength)
         imageDataFromKTX2.resize(fileDataLength, 0);
         fileData = imageDataFromKTX2.data();
         
+        // TODO: may need to fill out length field in fileData
+        
         // Note: specific to zstd
         bool isZstd = header2.supercompressionScheme == KTX2SupercompressionZstd;
         ZSTD_DCtx* dctx = nullptr;
diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h
index 71c426e6..b82f9299 100644
--- a/libkram/kram/KTXImage.h
+++ b/libkram/kram/KTXImage.h
@@ -268,12 +268,12 @@ struct KTX2Compressor {
 class KTXImage {
 public:
     // this calls init calls
-    bool open(const uint8_t* imageData, size_t imageDataLength);
+    bool open(const uint8_t* imageData, size_t imageDataLength, bool isInfoOnly = false);
     
     void initProps(const uint8_t* propsData, size_t propDataSize);
     
     void initMipLevels(size_t mipOffset);
-    void initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxSize, uint32_t& numSkippedMips);
+    void initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxSize, int32_t mipSkip, uint32_t& numSkippedMips);
 
     bool validateMipLevels() const;
     
@@ -305,7 +305,7 @@ class KTXImage {
     vector<uint8_t>& imageData();
 
 private:
-    bool openKTX2(const uint8_t* imageData, size_t imageDataLength);
+    bool openKTX2(const uint8_t* imageData, size_t imageDataLength, bool isInfoOnly);
 
     // ktx2 mips are uncompressed to convert back to ktx1, but without the image offset
     vector<uint8_t> imageDataFromKTX2;
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 720691ec..04f46e73 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -203,7 +203,7 @@ bool SetupSourceImage(MmapHelper& mmapHelper, FileHelper& fileHelper,
 // decoding reads a ktx file into KTXImage (not Image)
 bool SetupSourceKTX(MmapHelper& mmapHelper, FileHelper& fileHelper,
                     vector<uint8_t>& fileBuffer,
-                    const string& srcFilename, KTXImage& sourceImage)
+                    const string& srcFilename, KTXImage& sourceImage, bool isInfoOnly = false)
 {
     // first try mmap, and then use file -> buffer
     bool useMmap = true;
@@ -213,7 +213,7 @@ bool SetupSourceKTX(MmapHelper& mmapHelper, FileHelper& fileHelper,
     }
 
     if (useMmap) {
-        if (!sourceImage.open(mmapHelper.data(), mmapHelper.dataLength())) {
+        if (!sourceImage.open(mmapHelper.data(), mmapHelper.dataLength(), isInfoOnly)) {
             return false;
         }
     }
@@ -231,7 +231,7 @@ bool SetupSourceKTX(MmapHelper& mmapHelper, FileHelper& fileHelper,
             return false;
         }
 
-        if (!sourceImage.open(fileBuffer.data(), (int32_t)fileBuffer.size())) {
+        if (!sourceImage.open(fileBuffer.data(), (int32_t)fileBuffer.size(), isInfoOnly)) {
             return false;
         }
     }
@@ -952,7 +952,7 @@ void kramEncodeUsage(bool showVersion = true)
           "\n"
           //"\t [-mipalign]\n"
           "\t [-mipnone]\n"
-          "\t [-mipmin size] [-mipmax size]\n"
+          "\t [-mipmin size] [-mipmax size] [-mipskip count]\n"
           "\n"
           "\t [-chunks 4x4]\n"
           "\t [-swizzle rg01]\n"
@@ -1254,9 +1254,13 @@ string kramInfoToString(const string& srcFilename, bool isVerbose)
     else if (isKTX) {
         KTXImage srcImage;
 
+        // This means don't convert to KTX1, keep original data/offsets
+        // and also skip decompressing the mips
+        bool isInfoOnly = true;
+        
         // Note: could change to not read any mips
         bool success = SetupSourceKTX(srcMmapHelper, srcFileHelper, srcFileBuffer,
-                                      srcFilename, srcImage);
+                                      srcFilename, srcImage, isInfoOnly);
         if (!success) {
             KLOGE("Kram", "info couldn't open ktx file");
             return "";
@@ -1373,10 +1377,10 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage,
     MyMTLPixelFormat metalFormat = srcImage.pixelFormat;
 
     int32_t dataSize = srcImage.fileDataLength;
-    
-    string tmp;
+
+    //string tmp;
     bool isMB = (dataSize > (512 * 1024));
-    sprintf(tmp,
+    append_sprintf(info,
             "file: %s\n"
             "size: %d\n"
             "sizm: %0.3f %s\n",
@@ -1384,17 +1388,42 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage,
             dataSize,
             isMB ? dataSize / (1024.0f * 1024.0f) : dataSize / 1024.0f,
             isMB ? "MB" : "KB");
-    info += tmp;
+    
+    int32_t numChunks = srcImage.totalChunks();
+    
+    // add up lengtha and lengthCompressed
+    if (srcImage.mipLevels[0].lengthCompressed > 0) {
+        uint64_t length = 0;
+        uint64_t lengthCompressed = 0;
 
-    int32_t pixelMultiplier = srcImage.totalChunks();
+        for (const auto& level : srcImage.mipLevels) {
+            length += level.length;
+            lengthCompressed += level.lengthCompressed;
+        }
+        
+        length *= numChunks;
+        uint64_t percent = (100 * lengthCompressed) / length;
+       
+        isMB = (lengthCompressed > (512 * 1024));
+        double lengthF = isMB ? length / (1024.0f * 1024.0f) : length / 1024.0f;
+        double lengthCompressedF = isMB ? lengthCompressed / (1024.0f * 1024.0f) : lengthCompressed / 1024.0f;
+        
+        append_sprintf(info,
+            "sizc: %0.3f,%0.3f %s %d%%\n",
+            lengthF, lengthCompressedF,
+            isMB ? "MB" : "KB",
+            (int)percent);
+    }
+                           
     
     float numPixels = srcImage.width * srcImage.height;
-    numPixels *= (float)pixelMultiplier;
+    numPixels *= (float)numChunks;
     
     if (srcImage.header.numberOfMipmapLevels > 1) {
         numPixels *= 4.0 / 3.0f; // TODO: estimate for now
     }
     
+    // to megapixels
     numPixels /= (1000.0f * 1000.0f);
     
     auto textureType = srcImage.header.metalTextureType();
@@ -1404,7 +1433,7 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage,
         case MyMTLTextureTypeCube:
         case MyMTLTextureTypeCubeArray:
         case MyMTLTextureType2DArray:
-            sprintf(tmp,
+            append_sprintf(info,
                     "type: %s\n"
                     "dims: %dx%d\n"
                     "dimm: %0.3f MP\n"
@@ -1415,7 +1444,7 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage,
                     srcImage.header.numberOfMipmapLevels);
             break;
         case MyMTLTextureType3D:
-            sprintf(tmp,
+            append_sprintf(info,
                     "type: %s\n"
                     "dims: %dx%dx%d\n"
                     "dimm: %0.3f MP\n"
@@ -1426,8 +1455,7 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage,
                     srcImage.header.numberOfMipmapLevels);
             break;
     }
-    info += tmp;
-
+    
     // print out the array
     if (srcImage.header.numberOfArrayElements > 1) {
         append_sprintf(info,
@@ -1449,9 +1477,6 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage,
     for (const auto& prop : srcImage.props) {
         append_sprintf(info, "prop: %s %s\n", prop.first.c_str(), prop.second.c_str());
     }
-
-    // TODO: handle zstd compressed KTX2 too, they have a length and compressed length field
-    // also Basis + zstd
     
     if (isVerbose) {
         // dump mips/dims, but this can be a lot of data on arrays
@@ -1461,9 +1486,7 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage,
         int32_t d = srcImage.depth; 
         
         // num chunks
-        append_sprintf(info,
-            "chun: %d\n",
-            srcImage.totalChunks());
+        append_sprintf(info, "chun: %d\n", numChunks);
         
         for (const auto& mip : srcImage.mipLevels) {
 
@@ -1483,13 +1506,14 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage,
             }
                            
             if (mip.lengthCompressed != 0) {
-                size_t percent = (100 * mip.lengthCompressed) / mip.length;
+                uint64_t levelSize = mip.length * numChunks;
+                uint64_t percent = (100 * mip.lengthCompressed) / levelSize;
                 
                 append_sprintf(info,
                     "%" PRIu64 ",%" PRIu64 ",%" PRIu64 " %d%%\n",
                     mip.offset,
-                    mip.length, // only size of one mip right now, not mip * numChunks
-                    mip.lengthCompressed, // TODO: preserve so can be displayed
+                    levelSize,
+                    mip.lengthCompressed,
                     (int)percent
                 );
             }
@@ -1721,6 +1745,12 @@ static int32_t kramAppEncode(vector<const char*>& args)
             }
 
             infoArgs.mipMaxSize = atoi(args[i]);
+            if (infoArgs.mipMaxSize < 1 || infoArgs.mipMaxSize > 65536) {
+                KLOGE("Kram", "mipmax arg invalid");
+                error = true;
+                break;
+            }
+            
             //continue;
         }
         else if (isStringEqual(word, "-mipmin")) {
@@ -1732,6 +1762,28 @@ static int32_t kramAppEncode(vector<const char*>& args)
             }
 
             infoArgs.mipMinSize = atoi(args[i]);
+            if (infoArgs.mipMinSize < 1 || infoArgs.mipMinSize > 65536) {
+                KLOGE("Kram", "mipmin arg invalid");
+                error = true;
+                break;
+            }
+            //continue;
+        }
+        else if (isStringEqual(word, "-mipskip")) {
+            ++i;
+            if (i >= argc) {
+                KLOGE("Kram", "mipskip arg invalid");
+                error = true;
+                break;
+            }
+
+            infoArgs.mipSkip = atoi(args[i]);
+            if (infoArgs.mipSkip < 0 || infoArgs.mipSkip > 16) {
+                KLOGE("Kram", "mipskip arg invalid");
+                error = true;
+                break;
+            }
+            
             //continue;
         }
         else if (isStringEqual(word, "-mipnone")) {
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index a6831194..e7b4017a 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -910,9 +910,9 @@ struct MipConstructData {
     // Can skip the larger and smaller mips.  This is the larger mips skipped.
     uint32_t numSkippedMips = 0;
     
-    // this is size of 2d image src after accounting for chunks for a strip of array/cube data
-    uint32_t modifiedWidth = 0;
-    uint32_t modifiedHeight = 0;
+    // 2d image src after accounting for chunks for a strip of array/cube data
+    uint32_t chunkWidth = 0;
+    uint32_t chunkHeight = 0;
 };
 
 
@@ -1270,8 +1270,8 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const
 
     // This is wxh of source in case it has chunks
     // dstImage will start at this, but may mip down smaller base on mipMaxSize
-    mipConstructData.modifiedWidth = w;
-    mipConstructData.modifiedHeight = h;
+    mipConstructData.chunkWidth = w;
+    mipConstructData.chunkHeight = h;
     
     // work out how much memory we need to load
     header.initFormatGL(info.pixelFormat);
@@ -1284,8 +1284,12 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const
     dstImage.height = h;
     dstImage.depth = header.pixelDepth; // from validate above
     
-    dstImage.initMipLevels(info.doMipmaps, info.mipMinSize, info.mipMaxSize, mipConstructData.numSkippedMips);
+    dstImage.initMipLevels(info.doMipmaps, info.mipMinSize, info.mipMaxSize, info.mipSkip, mipConstructData.numSkippedMips);
     
+    if (dstImage.mipLevels.empty()) {
+        KLOGE("kram", "skipped all mips");
+        return false;
+    }
     // ----------------------------------------------------
 
     int32_t numChunks = (int32_t)chunkOffsets.size();
@@ -1317,6 +1321,9 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const
         // could build and compress and entire level at a time, but can't write any of it
         // out until smallest mips are constructed.  Only then are offsets resolved.
         
+        // A better way would be to do mips in-place, but in-order, and compressing the large
+        // to small mips into an array of open compressor streams.  Then only need one mip instead of
+        // all levels in memory.
         if (!writeKTX1FileOrImage(info,  mipConstructData, propsData, nullptr, dstImage)) {
             return false;
         }
@@ -1610,8 +1617,8 @@ bool Image::createMipsFromChunks(
 
     // This is for 8-bit data (pixelsFloat used for in-place mipgen)
     ImageData srcImage;
-    srcImage.width = data.modifiedWidth;
-    srcImage.height = data.modifiedHeight;
+    srcImage.width = data.chunkWidth;
+    srcImage.height = data.chunkHeight;
     
     // KramMipper uses these
     srcImage.isSRGB = info.isSRGB;
diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp
index 78987728..3a324a3d 100644
--- a/libkram/kram/KramImageInfo.cpp
+++ b/libkram/kram/KramImageInfo.cpp
@@ -1012,7 +1012,8 @@ void ImageInfo::initWithArgs(const ImageInfoArgs& args)
     doMipmaps = args.doMipmaps;
     mipMinSize = args.mipMinSize;
     mipMaxSize = args.mipMaxSize;
-
+    mipSkip    = args.mipSkip;
+    
     swizzleText = args.swizzleText;
     averageChannels = args.averageChannels;
 
diff --git a/libkram/kram/KramImageInfo.h b/libkram/kram/KramImageInfo.h
index 86a42488..1e34ae68 100644
--- a/libkram/kram/KramImageInfo.h
+++ b/libkram/kram/KramImageInfo.h
@@ -48,7 +48,8 @@ class ImageInfoArgs {
 
     int32_t mipMinSize = 1;
     int32_t mipMaxSize = 32 * 1024;
-
+    int32_t mipSkip = 0;
+    
     int32_t quality = 49;  // may want float
 
     // ktx2 has a compression type and level
@@ -172,12 +173,11 @@ class ImageInfo {
 
     int32_t mipMinSize = 1;
     int32_t mipMaxSize = 32 * 1024;
+    int32_t mipSkip = 0; // count of large mips to skip
     
     int32_t chunksX = 0;
     int32_t chunksY = 0;
     int32_t chunksCount = 0;
-    
-    
 };
 
 bool isSwizzleValid(const char* swizzle);

From 3af15a2af3d25f3e67ddb6dab9268bc6ab44acbf Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 18 May 2021 22:52:34 -0700
Subject: [PATCH 052/901] kramv - add mode to capture any loge messages

This is wrap around loadTex and loadArchive calls, so that error messages are reported to the kramv hud.
---
 kramv/KramViewerMain.mm   | 24 ++++++++++++++++++++++--
 libkram/kram/KTXImage.cpp |  7 ++++++-
 libkram/kram/KramLog.cpp  | 34 +++++++++++++++++++++++++++++++++-
 libkram/kram/KramLog.h    |  7 +++++++
 4 files changed, 68 insertions(+), 4 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index ca2f977f..02177e97 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -1842,7 +1842,18 @@ - (BOOL)loadTextureFromURL:(NSURL*)url {
         const char* filename = entry.filename;
         double timestamp = entry.modificationDate;
         
-        return [self loadTextureFromArchive:filename timestamp:timestamp];
+        setErrorLogCapture(true);
+        
+        BOOL success = [self loadTextureFromArchive:filename timestamp:timestamp];
+        
+        if (!success) {
+            string errorText;
+            getErrorLogCaptureText(errorText);
+            [self setHudText: errorText.c_str()];
+        }
+        
+        setErrorLogCapture(false);
+        return success;
     }
         
     if (!(endsWithExtension(filename, ".png") ||
@@ -1853,9 +1864,18 @@ - (BOOL)loadTextureFromURL:(NSURL*)url {
     }
         
     Renderer* renderer = (Renderer*)self.delegate;
-    if (![renderer loadTexture:url]) {
+    setErrorLogCapture(true);
+    
+    BOOL success = [renderer loadTexture:url];
+    
+    if (!success) {
+        string errorText;
+        getErrorLogCaptureText(errorText);
+        [self setHudText: errorText.c_str()];
+        setErrorLogCapture(false);
         return NO;
     }
+    setErrorLogCapture(false);
     
     // set title to filename, chop this to just file+ext, not directory
     const char* filenameShort = strrchr(filename, '/');
diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index 1a49b928..6fd52b24 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -1328,6 +1328,11 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength, bool i
     // copy out the header,
     const KTX2Header& header2 = *(const KTX2Header*)imageData;
 
+
+    if (header2.supercompressionScheme != KTX2SupercompressionBasisLZ) {
+        KLOGE("kram", "BasisLZ supercompression not yet supported");
+        return false;
+    }
     
     if (header2.supercompressionScheme != KTX2SupercompressionNone &&
         header2.supercompressionScheme != KTX2SupercompressionZstd &&
@@ -1340,7 +1345,7 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength, bool i
     
     // This typically means UASTC encoding + zstd supercompression, and code doesn't handle that below yet
     if (header2.vkFormat == 0) {
-        KLOGE("kram", "Basis encode not yet supported");
+        KLOGE("kram", "Basis decode not yet supported");
         return false;
     }
     
diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp
index 89090557..4045ad15 100644
--- a/libkram/kram/KramLog.cpp
+++ b/libkram/kram/KramLog.cpp
@@ -14,6 +14,31 @@
 namespace kram {
 using namespace std;
 
+static mutex gLogLock;
+static string gErrorLogCaptureText;
+static bool gIsErrorLogCapture = false;
+void setErrorLogCapture(bool enable) {
+    gIsErrorLogCapture = enable;
+    if (enable) {
+        unique_lock<mutex> lock(gLogLock);
+        gErrorLogCaptureText.clear();
+    }
+}
+bool isErrorLogCapture() { return gIsErrorLogCapture; }
+
+
+// return the text
+void getErrorLogCaptureText(string& text) {
+    if (gIsErrorLogCapture) {
+        unique_lock<mutex> lock(gLogLock);
+        text = gErrorLogCaptureText;
+    }
+    else {
+        text.clear();
+    }
+}
+
+
 // TODO: install assert handler to intercept, and also add a verify (assert that leaves source in)
 //void __assert(const char *expression, const char *file, int32_t line) {
 //
@@ -200,9 +225,16 @@ extern int32_t logMessage(const char* group, int32_t logLevel,
     }
 
     // stdout isn't thread safe, so to prevent mixed output put this under mutex
-    static mutex gLogLock;
     unique_lock<mutex> lock(gLogLock);
 
+    // this means caller needs to know all errors to display in the hud
+    if (gIsErrorLogCapture && logLevel == LogLevelError) {
+        gErrorLogCaptureText += msg;
+        if (needsNewline) {
+            gErrorLogCaptureText += "\n";
+        }
+    }
+    
     fprintf(fp, "%s%s%s%s%s%s", tag, groupString, space, msg, needsNewline ? "\n" : "", fileLineFunc.c_str());
 
     return 0;  // reserved for later
diff --git a/libkram/kram/KramLog.h b/libkram/kram/KramLog.h
index 0b24d871..38f48e2c 100644
--- a/libkram/kram/KramLog.h
+++ b/libkram/kram/KramLog.h
@@ -48,6 +48,12 @@ extern int32_t logMessage(const char* group, int32_t logLevel,
 // TODO: move to Strings.h
 using namespace std;
 
+// when set true, the internal string is cleared
+void setErrorLogCapture(bool enable);
+bool isErrorLogCapture();
+// return the text
+void getErrorLogCaptureText(string& text);
+
 // returns length of string, -1 if failure
 int32_t sprintf(string& str, const char* format, ...) __printflike(2, 3);
 
@@ -60,4 +66,5 @@ bool endsWithExtension(const char* str, const string& substring);
 // https://stackoverflow.com/questions/874134/find-out-if-string-ends-with-another-string-in-c
 bool endsWith(const string& value, const string& ending);
 
+
 }  // namespace kram

From 62d7a0de19812c53dd62ac7727c55ce940e5caa0 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 18 May 2021 22:55:14 -0700
Subject: [PATCH 053/901] kram - fix BasisLZ test

---
 libkram/kram/KTXImage.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index 6fd52b24..ce43b75c 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -1329,7 +1329,7 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength, bool i
     const KTX2Header& header2 = *(const KTX2Header*)imageData;
 
 
-    if (header2.supercompressionScheme != KTX2SupercompressionBasisLZ) {
+    if (header2.supercompressionScheme == KTX2SupercompressionBasisLZ) {
         KLOGE("kram", "BasisLZ supercompression not yet supported");
         return false;
     }

From 88203135cd0766e7c8dc1bd5ff27977bb740f00a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 18 May 2021 23:01:20 -0700
Subject: [PATCH 054/901] kram - fix ValidateMipLevels for KTX files

length is internally stored not multiplied by numChunks, so that needed to be done in this call.
---
 libkram/kram/KTXImage.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index ce43b75c..46aff718 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -1223,6 +1223,8 @@ bool KTXImage::validateMipLevels() const {
     
     bool isValid = true;
 
+    int numChunks = header.totalChunks();
+
     // validate that no weird size to image
     for (uint32_t i = 0; i < mipLevels.size(); ++i) {
         auto& level = mipLevels[i];
@@ -1235,7 +1237,7 @@ bool KTXImage::validateMipLevels() const {
             levelSizeFromRead *= 6;
         }
 
-        if (levelSizeFromRead != level.length) {
+        if (levelSizeFromRead != level.length * numChunks) {
             KLOGE("kram", "mip %d levelSize mismatch %d %d", i, (int)levelSizeFromRead, (int)level.length);
             isValid = false;
             break;

From e90c71f0d9450a46e6acffaad2e5e71a206cde3d Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 18 May 2021 23:18:40 -0700
Subject: [PATCH 055/901] kramv - prepend the filename when failure occurs

---
 kramv/KramViewerMain.mm   | 27 +++++++++++++++++++++++++--
 libkram/kram/KTXImage.cpp |  4 ++--
 2 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 02177e97..47f04c8c 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -1849,7 +1849,15 @@ - (BOOL)loadTextureFromURL:(NSURL*)url {
         if (!success) {
             string errorText;
             getErrorLogCaptureText(errorText);
-            [self setHudText: errorText.c_str()];
+            setErrorLogCapture(false);
+            
+            // prepend filename
+            string finalErrorText;
+            append_sprintf(finalErrorText,
+                           "Could not load from archive:\n %s\n", filename);
+            finalErrorText += errorText;
+            
+            [self setHudText: finalErrorText.c_str()];
         }
         
         setErrorLogCapture(false);
@@ -1860,6 +1868,14 @@ - (BOOL)loadTextureFromURL:(NSURL*)url {
           endsWithExtension(filename, ".ktx") ||
           endsWithExtension(filename, ".ktx2")) )
     {
+        string errorText = "Unsupported file extension, must be .zip, .png, .ktx, ktx2\n";
+        
+        string finalErrorText;
+        append_sprintf(finalErrorText,
+                       "Could not load from archive:\n %s\n", filename);
+        finalErrorText += errorText;
+        
+        [self setHudText: finalErrorText.c_str()];
         return NO;
     }
         
@@ -1871,8 +1887,15 @@ - (BOOL)loadTextureFromURL:(NSURL*)url {
     if (!success) {
         string errorText;
         getErrorLogCaptureText(errorText);
-        [self setHudText: errorText.c_str()];
         setErrorLogCapture(false);
+        
+        // prepend filename
+        string finalErrorText;
+        append_sprintf(finalErrorText,
+                       "Could not load from file\n %s\n", filename);
+        finalErrorText += errorText;
+       
+        [self setHudText: finalErrorText.c_str()];
         return NO;
     }
     setErrorLogCapture(false);
diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index 46aff718..d00c9c2f 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -1332,7 +1332,7 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength, bool i
 
 
     if (header2.supercompressionScheme == KTX2SupercompressionBasisLZ) {
-        KLOGE("kram", "BasisLZ supercompression not yet supported");
+        KLOGE("kram", "Basis decode not yet supported");
         return false;
     }
     
@@ -1347,7 +1347,7 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength, bool i
     
     // This typically means UASTC encoding + zstd supercompression, and code doesn't handle that below yet
     if (header2.vkFormat == 0) {
-        KLOGE("kram", "Basis decode not yet supported");
+        KLOGE("kram", "UASTC and vkFormat of 0 decode not yet supported");
         return false;
     }
     

From 04aefc8e29b7710a0c7165060da1372e06ccf71c Mon Sep 17 00:00:00 2001
From: Alec Miller <alecazam@users.noreply.github.com>
Date: Tue, 18 May 2021 23:39:16 -0700
Subject: [PATCH 056/901] Update README.md

---
 README.md | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 91e4ab4e..22dfb9cf 100644
--- a/README.md
+++ b/README.md
@@ -100,9 +100,9 @@ KTX - breaks loads of mips with 4 byte length offset at the start of each level
   metadata/props aren't standardized and only ascii prop support so easy to dump out
 
 KTX2 - works in kram and viewer, has aligned compressed levels of mips, 
-  libkram only supports None/Zstd supercompression, only read no write support, 
-  write by converting ktx -> ktx2 with ktx2ktx2 + ktxsc (see kramTexture.py --ktx2 option)
-
+  libkram supports None/Zlib/Zstd supercompression for read/write
+  doesn't support UASTC or BasisLZ yet
+  
 ```
 
 ### An example pipeline
@@ -223,10 +223,15 @@ cd build
 ./Release/kram -testall
 ./Release/kram -test 1002
 
+# for ktx
 ./Release/kram encode -f astc4x4 -srgb -premul -quality 49 -mipmax 1024 -type 2d -i ../tests/src/ColorMap-a.png -o ../tests/out/ios/ColorMap-a.ktx
 ./Release/kram encode -f etc2rg -signed -normal -quality 49 -mipmax 1024 -type 2d -i ../tests/src/collectorbarrel-n.png -o ../tests/out/ios/collectorbarrel-n.ktx
 ./Release/kram encode -f etc2r -signed -sdf -quality 49 -mipmax 1024 -type 2d -i ../kram/tests/src/flipper-sdf.png -o ../tests/out/ios/flipper-sdf.ktx
 
+# for ktx (without and with zstd compression)
+./Release/kram encode -f astc4x4 -srgb -premul -quality 49 -mipmax 1024 -type 2d -i ../tests/src/ColorMap-a.png -o ../tests/out/ios/ColorMap-a.ktx2
+./Release/kram encode -f astc4x4 -srgb -premul -quality 49 -mipmax 1024 -type 2d -zstd 0 -i ../tests/src/ColorMap-a.png -o ../tests/out/ios/ColorMap-a.ktx2
+
 ```
 
 ### Open Source Encoder Usage
@@ -257,7 +262,7 @@ Squish
 Simplified to single folder.
 Replaced sse vector with float4/a for ARM/Neon support.
 
-Astcenc v2.1
+Astcenc v2.5 (current is v3.0)
 Provide rgba8u source pixels.  Converted to 32f at tile level.
 Improved 1 and 2 channel format encoding (not transfered to v2.1).
 Avoid reading off end of arrays with padding.
@@ -585,7 +590,7 @@ Visually validating and previewing the results is complicated.  KTX/2 have few v
 
 kram adds props to KTX/2 file to store data.  Currently props store Metal and Vulkan formats.  This is important since GL's ASTC LDR and HDR formats are the same constant.  Also props are saved for channel content and post-swizzle.  Loaders, viewers, and shaders can utilize this metadata.
 
-KTX can be converted to KTX2 and each mip supercompressed via ktx2ktx2 and ktxsc.  KTX2 reverses mip ordering smallest to largest, so that streamed textures can display smaller mips before they finish fully streaming.  KTX2 can also supercompress each mip with zstd and Basis for transcode.  I suppose this could then be unpacked to tiles for sparse texturing.  KTX2 does not store a length field inside the mip data which keeps consistent alignment. 
+Kram now supports KTX2 export.  But KTX can also be converted to KTX2 and each mip supercompressed via ktx2ktx2 and ktxsc.  KTX2 reverses mip ordering smallest to largest, so that streamed textures can display smaller mips before they finish fully streaming.  KTX2 can also supercompress each mip with zstd and Basis for transcode.  I suppose this could then be unpacked to tiles for sparse texturing.  KTX2 does not store a length field inside the mip data which keeps consistent alignment. 
 
 Metal cannot load mmap mip data that isn't aligned to a multiple of the block size (8 or 16 bytes for BC/ASTC/ETC).  KTX adds a 4 byte length into the mip data that breaks alignment, but KTX2 fortunately skips that.  But KTX2 typically compresses the levels and needs decode/transcode to send to the GPU.
 

From 600577d272ad46f0eb44d099e81d5ae7c7aa425c Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 21 May 2021 15:32:21 -0700
Subject: [PATCH 057/901] kramv - fix up debug modes, add srgb savvy isGray to
 png loader

NonZero is rgb != 0 now, ignoring alpha since many images are a=1
Gray also ignores when c.r is 0, since on premul images 000 is common.
---
 kramv/KramLoader.mm         |  2 +-
 kramv/KramShaders.metal     |  6 +--
 libkram/kram/Kram.cpp       | 96 ++++++++++++++++++++++++++-----------
 libkram/kram/Kram.h         |  2 +-
 libkram/kram/KramMipper.cpp |  2 +-
 libkram/kram/KramMipper.h   |  6 +++
 6 files changed, 80 insertions(+), 34 deletions(-)

diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index 1fc8a651..67e83771 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -186,7 +186,7 @@ static int32_t numberOfMipmapLevels(const Image& image) {
 {
     // can only load 8u and 16u from png, no hdr formats, no premul either, no props
     Image sourceImage;
-    bool isLoaded = LoadPng(data, dataSize, false, sourceImage);
+    bool isLoaded = LoadPng(data, dataSize, false, false, sourceImage);
     if (!isLoaded) {
         return nil;
     }
diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index 9363a5b2..544bab07 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -638,12 +638,12 @@ float4 DrawPixels(
             // want to compare so snorm 0 on signed data
             // TODO: unorm formats don't store exact 0, so may need toleranc
             if (uniforms.isSigned) {
-                if (any(sc != 0.0)) {
+                if (any(sc.rgb != 0.0)) {
                     isHighlighted = true;
                 }
             }
             else {
-                if (any(c != 0.0)) {
+                if (any(c.rgb != 0.0)) {
                     isHighlighted = true;
                 }
             }
@@ -656,7 +656,7 @@ float4 DrawPixels(
         }
         else if (uniforms.debugMode == ShDebugModeGray) {
             // with 565 formats, all pixels with light up
-            if (c.r == c.g && c.r == c.b) {
+            if (c.r != 0 && (c.r == c.g && c.r == c.b)) {
                 isHighlighted = true;
             }
         }
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 04f46e73..815d64f0 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -48,13 +48,33 @@ bool LoadKtx(const uint8_t* data, size_t dataSize, Image& sourceImage)
 }
 
 inline Color toPremul(Color c) {
+    // these are really all fractional, but try this
     c.r = ((uint32_t)c.r * (uint32_t)c.a) / 255;
     c.g = ((uint32_t)c.g * (uint32_t)c.a) / 255;
     c.b = ((uint32_t)c.b * (uint32_t)c.a) / 255;
     return c;
 }
 
-bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, Image& sourceImage)
+// rec709
+// https://en.wikipedia.org/wiki/Grayscale
+inline Color toGrayscaleRec709(Color c, const Mipper& mipper) {
+    
+    const float4 kRec709Conversion = float4m(0.2126f, 0.7152f, 0.0722f, 0.0f); // really a float3
+    
+    // convert to linear, do luminance, then back to srgb primary
+    
+    float4 clin = mipper.toLinear(c);
+    float luminance = dot(clin, kRec709Conversion);
+    
+    c.r = (uint8_t)(linearToSRGBFunc(luminance) * 255.1f);
+    
+    // can just copy into the other 3 terms
+    c.g = c.b = c.r;
+    return c;
+}
+
+
+bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray, Image& sourceImage)
 {
     uint32_t width = 0;
     uint32_t height = 0;
@@ -114,6 +134,20 @@ bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, Image& sour
         return false;
     }
 
+    
+    // convert to grasycale on load
+    // better if could do this later in pipeline to stay in linear fp16 color
+    if (hasColor && isGray) {
+        Mipper mipper;
+            
+        Color* colors = (Color*)pixels.data();
+        for (int32_t i = 0, iEnd = width*height; i < iEnd; ++i) {
+            colors[i] = toGrayscaleRec709(colors[i], mipper);
+        }
+        
+        hasColor = false;
+    }
+    
     // apply premul srgb right away, don't use with -premul or alpha is applied twice
     // this may throw off the props.  Note this ignores srgb conversion.
     // This is hack to look like Photoshop and Apple Preview, where they process srgb wrong
@@ -136,7 +170,8 @@ bool SetupTmpFile(FileHelper& tmpFileHelper, const char* suffix)
 
 bool SetupSourceImage(MmapHelper& mmapHelper, FileHelper& fileHelper,
                       vector<uint8_t>& fileBuffer,
-                      const string& srcFilename, Image& sourceImage, bool isPremulSrgb = false)
+                      const string& srcFilename, Image& sourceImage,
+                      bool isPremulSrgb = false, bool isGray = false)
 {
     bool isKTX = endsWith(srcFilename, ".ktx") || endsWith(srcFilename, ".ktx2");
     bool isPNG = endsWith(srcFilename, ".png");
@@ -162,7 +197,7 @@ bool SetupSourceImage(MmapHelper& mmapHelper, FileHelper& fileHelper,
             }
         }
         else if (isPNG) {
-            if (!LoadPng(mmapHelper.data(), mmapHelper.dataLength(), isPremulSrgb,
+            if (!LoadPng(mmapHelper.data(), mmapHelper.dataLength(), isPremulSrgb, isGray,
                          sourceImage)) {
                 return false;  // error
             }
@@ -190,7 +225,7 @@ bool SetupSourceImage(MmapHelper& mmapHelper, FileHelper& fileHelper,
             }
         }
         else if (isPNG) {
-            if (!LoadPng(fileBuffer.data(), fileHelper.size(), isPremulSrgb,
+            if (!LoadPng(fileBuffer.data(), fileHelper.size(), isPremulSrgb, isGray,
                          sourceImage)) {
                 return false;  // error
             }
@@ -941,7 +976,8 @@ void kramEncodeUsage(bool showVersion = true)
     KLOGI("Kram",
           "%s\n"
           "Usage: kram encode\n"
-          "\t -f/ormat (bc1 | astc4x4 | etc2rgba | rgba16f)\n"
+          "\t -f/ormat (bc1 | astc4x4 | etc2rgba | rgba16f) [-quality 0-100]\n"
+          "\t [-zstd 0] or [-zlib 0] (for .ktx2 output)\n"
           "\t [-srgb] [-signed] [-normal]\n"
           "\t -i/nput <source.png | .ktx | .ktx2>\n"
           "\t -o/utput <target.ktx | .ktx | .ktx2>\n"
@@ -950,7 +986,6 @@ void kramEncodeUsage(bool showVersion = true)
           "\t [-e/ncoder (squish | ate | etcenc | bcenc | astcenc | explicit | ..)]\n"
           "\t [-resize (16x32 | pow2)]\n"
           "\n"
-          //"\t [-mipalign]\n"
           "\t [-mipnone]\n"
           "\t [-mipmin size] [-mipmax size] [-mipskip count]\n"
           "\n"
@@ -958,9 +993,8 @@ void kramEncodeUsage(bool showVersion = true)
           "\t [-swizzle rg01]\n"
           "\t [-avg rxbx]\n"
           "\t [-sdf]\n"
-          "\t [-premul]\n"
-          "\t [-prezero]\n"
-          "\t [-quality 0-100]\n"
+          "\t [-premul] [-prezero] [-premulrgb]\n"
+          "\t [-gray]\n"
           "\t [-optopaque]\n"
           "\t [-v]\n"
           "\n"
@@ -1007,17 +1041,20 @@ void kramEncodeUsage(bool showVersion = true)
           "\tr|rg|rgba[8|16f|32f]\n"
           "\n"
 
-          "\t-mipalign"
-          "\tAlign mip levels with .ktxa output \n"
+          // Mips
           "\t-mipnone"
           "\tDon't build mips even if pow2 dimensions\n"
 
           "\t-mipmin size"
           "\tOnly output mips >= size px\n"
+          
           "\t-mipmax size"
           "\tOnly output mips <= size px\n"
+          
+          "\t-mipskip count"
+          "\tOnly output largest mips >= count, similar to mipmax but with count instead of size px\n"
           "\n"
-
+          
           // tex to normal
           "\t-height"
           "\tConvert height.x to normal.xy\n"
@@ -1035,21 +1072,23 @@ void kramEncodeUsage(bool showVersion = true)
           "\tNormal map rg storage signed for etc/bc (rg01), only unsigned astc L+A (gggr).\n"
           "\t-sdf"
           "\tGenerate single-channel SDF from a bitmap, can mip and drop large mips. Encode to r8, bc4, etc2r, astc4x4 (Unorm LLL1) to encode\n"
-
+          
+          "\t-gray"
+          "\tConvert to grayscale before premul\n"
+          
           // premul is not on by default, but really should be or textures aren't sampled correctly
           // but this really only applies to color channel textures, so off by default.
           "\t-premul"
           "\tPremultiplied alpha to src pixels before output\n"
-          "\n"
-
+          
           // This is meant to work with shaders that (incorrectly) premul after sampling.
           // limits the rgb bleed in regions that should not display colors.  Can stil have black color halos.
           "\t-prezero"
           "\tPremultiplied alpha to src pixels before output but only where a=0\n"
-          "\n"
           
+          // This emulates Photoshop premul only on png files.  Multiplies  srgbColor.rgb * a.
           "\t-premulrgb"
-          "\tPremultiplied alpha to src pixels at load to emulate Photoshop, don't use with -premul\n"
+          "\tPremultiplied alpha to src pixels at load to emulate Photoshop srgbColor.rgb * a, don't use with -premul\n"
           "\n"
           
           "\t-optopaque"
@@ -1060,10 +1099,11 @@ void kramEncodeUsage(bool showVersion = true)
           "\tSpecifies how many chunks to split up texture into 2darray\n"
           
           // ktx2 specific settings
-          "\t-zstd level"
-          "\tktx2 with zstd mip compressor, 0 for default\n"
-          "\t-zlib level"
-          "\tktx2 with zlib mip compressor, 0 for defauult\n"
+          "\tktx2 mip compression, if not present then no compresion used\n"
+          "\t-zstd 0"
+          "\tktx2 with zstd mip compressor, 0 for default, 0 to 100\n"
+          "\t-zlib 0"
+          "\tktx2 with zlib mip compressor, 0 for default, 0 to 11\n"
           
           "\t-swizzle [rgba01 x4]"
           "\tSpecifies pre-encode swizzle pattern\n"
@@ -1714,6 +1754,7 @@ static int32_t kramAppEncode(vector<const char*>& args)
     ImageInfoArgs infoArgs;
 
     bool isPremulRgb = false;
+    bool isGray = false;
     
     bool error = false;
     for (int32_t i = 0; i < argc; ++i) {
@@ -1734,7 +1775,11 @@ static int32_t kramAppEncode(vector<const char*>& args)
             infoArgs.optimizeFormatForOpaque = true;
             //continue;
         }
-
+        else if (isStringEqual(word, "-gray")) {
+            isGray = true;
+            //continue;
+        }
+        
         // mip setting
         else if (isStringEqual(word, "-mipmax")) {
             ++i;
@@ -1791,11 +1836,6 @@ static int32_t kramAppEncode(vector<const char*>& args)
             infoArgs.doMipmaps = false;
             //continue;
         }
-//        else if (isStringEqual(word, "-mipalign")) {
-//            // pad start of each mip to pixel/block size of format
-//            infoArgs.skipImageLength = true;
-//            continue;
-//        }
 
         else if (isStringEqual(word, "-heightScale")) {
             ++i;
@@ -2120,7 +2160,7 @@ static int32_t kramAppEncode(vector<const char*>& args)
     vector<uint8_t> srcFileBuffer;
 
     bool success = SetupSourceImage(srcMmapHelper, srcFileHelper, srcFileBuffer,
-                                    srcFilename, srcImage, isPremulRgb);
+                                    srcFilename, srcImage, isPremulRgb, isGray);
 
     if (success) {
         success = SetupTmpFile(tmpFileHelper, isDstKTX2 ? ".ktx2" : ".ktx");
diff --git a/libkram/kram/Kram.h b/libkram/kram/Kram.h
index 9a52a695..5d715e97 100644
--- a/libkram/kram/Kram.h
+++ b/libkram/kram/Kram.h
@@ -14,7 +14,7 @@ class KTXImage;
 
 // helpers to source from a png or single level of a ktx
 bool LoadKtx(const uint8_t* data, size_t dataSize, Image& sourceImage);
-bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulSrgb, Image& sourceImage);
+bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulSrgb, bool isGray, Image& sourceImage);
 
 // can call these with data instead of needing a file
 string kramInfoPNGToString(const string& srcFilename, const uint8_t* data, uint64_t dataSize, bool isVerbose);
diff --git a/libkram/kram/KramMipper.cpp b/libkram/kram/KramMipper.cpp
index 1bd80432..dbdd80f7 100644
--- a/libkram/kram/KramMipper.cpp
+++ b/libkram/kram/KramMipper.cpp
@@ -59,7 +59,7 @@ inline Color Snormfloat4ToColor(float4 value)
     return c;
 }
 
-inline float linearToSRGBFunc(float lin)
+float linearToSRGBFunc(float lin)
 {
     assert(lin >= 0.0f && lin <= 1.0f);
     return (lin < 0.00313066844250063f) ? (lin * 12.92f)
diff --git a/libkram/kram/KramMipper.h b/libkram/kram/KramMipper.h
index 19bde640..36329eb6 100644
--- a/libkram/kram/KramMipper.h
+++ b/libkram/kram/KramMipper.h
@@ -41,6 +41,9 @@ void remapToSignedBCEndpoint88(uint16_t &endpoint);
 
 float4 linearToSRGB(float4 lin);
 
+// return srgb from a linear intesnity
+float linearToSRGBFunc(float lin);
+
 class ImageData {
 public:
     // data can be mipped as 8u, 16f, or 32f.  Prefer smallest size.
@@ -71,9 +74,12 @@ class Mipper {
     void initPixelsHalfIfNeeded(ImageData &srcImage, bool doPremultiply, bool doPrezero,
                                 vector<half4> &halfImage) const;
 
+    // these use table lookups, so need to be class members
     float toLinear(uint8_t srgb) const { return srgbToLinear[srgb]; }
     float toAlphaFloat(uint8_t alpha) const { return alphaToFloat[alpha]; }
     
+    float4 toLinear(const Color& c) const { return float4m(toLinear(c.r), toLinear(c.g), toLinear(c.b), toAlphaFloat(c.a)); }
+    
     uint8_t toPremul(uint8_t channelIntensity, uint8_t alpha) const { return ((uint32_t)channelIntensity * (uint32_t)alpha) / 255; }
     
 private:

From 0b387f1204997250cf8640beb3de69de0cff387f Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 21 May 2021 15:58:16 -0700
Subject: [PATCH 058/901] kram - use rounding in conversions to 8-bit

---
 libkram/kram/Kram.cpp          | 6 +++---
 libkram/kram/KramImageInfo.cpp | 3 ++-
 libkram/kram/KramMipper.cpp    | 4 ++--
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 815d64f0..e6f79b93 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -66,7 +66,7 @@ inline Color toGrayscaleRec709(Color c, const Mipper& mipper) {
     float4 clin = mipper.toLinear(c);
     float luminance = dot(clin, kRec709Conversion);
     
-    c.r = (uint8_t)(linearToSRGBFunc(luminance) * 255.1f);
+    c.r = (uint8_t)(roundf(linearToSRGBFunc(luminance) * 255.0f));
     
     // can just copy into the other 3 terms
     c.g = c.b = c.r;
@@ -2530,7 +2530,7 @@ void PSTest() {
     for (int32_t i = 0; i < 256; ++i) {
         float value = mipper.toLinear(values1[i]);
         
-        values2[i] = uint8_t(value * 255.1);
+        values2[i] = uint8_t(roundf(value * 255.0f));
         
         //KLOGI("srgb", "[%d] = %g\n", i, value);
     }
@@ -2540,7 +2540,7 @@ void PSTest() {
         float value = mipper.toLinear(i);
         value *= alphaF;
         
-        values3[i] = uint8_t(value * 255.1);
+        values3[i] = uint8_t(roundf(value * 255.0));
     }
 
     // log them side-by-side for comparison
diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp
index 3a324a3d..1a0ce2de 100644
--- a/libkram/kram/KramImageInfo.cpp
+++ b/libkram/kram/KramImageInfo.cpp
@@ -1309,6 +1309,7 @@ void ImageInfo::heightToNormals(int32_t w, int32_t h,
                 normal = normalize(normal);
                 
                 // convert to unorm
+                // TODO: may need to do around unorm8 offset of unorm 255/127 and + 128/127
                 normal = normal * 0.5 + 0.5f;
                 
                 // write out the result
@@ -1343,7 +1344,7 @@ void ImageInfo::heightToNormals(int32_t w, int32_t h,
                 normal = normalize(normal);
                 
                 // convert to unorm
-                normal = normal * 127.0f + 128.0f;
+                normal = round(normal * 127.0f) + 128.0f;
                 
                 Color& dstPixel8 = dstPixels8[y0 + x];
 
diff --git a/libkram/kram/KramMipper.cpp b/libkram/kram/KramMipper.cpp
index dbdd80f7..84a5f4b3 100644
--- a/libkram/kram/KramMipper.cpp
+++ b/libkram/kram/KramMipper.cpp
@@ -34,13 +34,13 @@ int32_t nextPow2(int32_t num)
 
 inline uint8_t floatToUint8(float value)
 {
-    return (uint8_t)roundf(value * 255.1f);
+    return (uint8_t)roundf(value * 255.0f);
 }
 
 inline Color Unormfloat4ToColor(float4 value)
 {
     Color c;
-    value = round(value * 255.1f);
+    value = round(value * 255.0f);
     c.r = (uint8_t)value.x;
     c.g = (uint8_t)value.y;
     c.b = (uint8_t)value.z;

From 9c2182ddd9725accf2ffe04005fac98fb4b2d51b Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 21 May 2021 19:32:56 -0700
Subject: [PATCH 059/901] Kram - fix ktx1 mip offset.

Was setting lengthCompressed to length, but switch to setting to 0 on ktx1 files to avoid confusion.  So don't use that in the offset calcs.
---
 libkram/kram/KramImage.cpp | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index e7b4017a..cb1a6e44 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -804,9 +804,9 @@ bool Image::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage& dstIma
 
             // write the mips out to the file, and code above can then decode into the same buffer
             // This isn't correct for cubes, arrays, and other types.  The mip length is only written out once for all mips.
-            int32_t dstMipOffset = dstMipLevel.offset + chunk * dstMipLevel.length;
             
             if (chunk == 0 && !dstImage.skipImageLength) {
+                // sie of one mip
                 uint32_t levelSize = dstMipLevel.length;
                 
                 // cubes write the face size, not the levels size, ugh
@@ -814,11 +814,15 @@ bool Image::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage& dstIma
                     levelSize *= numChunks;
                 }
                 
-                if (!writeDataAtOffset((const uint8_t*)&levelSize, sizeof(levelSize), dstMipOffset - sizeof(levelSize), dstFile, dstImage)) {
+                if (!writeDataAtOffset((const uint8_t*)&levelSize, sizeof(levelSize), dstMipLevel.offset - sizeof(levelSize), dstFile, dstImage)) {
                     return false;
                 }
             }
             
+            // only writing one mip at a time in the level here
+            // so written bytes are only length and not numChunks * length
+            int32_t dstMipOffset = dstMipLevel.offset + chunk * dstMipLevel.length;
+            
             if (!writeDataAtOffset(outputTexture.data(), dstMipLevel.length, dstMipOffset, dstFile, dstImage)) {
                 return false;
             }
@@ -1565,7 +1569,7 @@ bool Image::writeKTX1FileOrImage(
             auto& level = dstImage.mipLevels[i];
             level.offset = lastMipOffset + 4; // offset by length
             
-            lastMipOffset = level.offset + level.lengthCompressed * numChunks;
+            lastMipOffset = level.offset + level.length * numChunks;
         }
     }
     
@@ -1789,7 +1793,8 @@ bool Image::createMipsFromChunks(
         int32_t numSkippedMips = data.numSkippedMips;
         
         for (int32_t mipLevel = 0; mipLevel < (int32_t)dstMipLevels.size(); ++mipLevel) {
-
+            const auto& dstMipLevel = dstMipLevels[mipLevel];
+            
             if (mipLevel == 0 && !info.doSDF)
             {
                 if (numSkippedMips > 0) {
@@ -1827,12 +1832,12 @@ bool Image::createMipsFromChunks(
                     h = dstImageData.height;
                 }
             }
-            
-            // mipOffset are start of first chunk of a given mip size
-            size_t mipStorageSize = dstMipLevels[mipLevel].length; //  / numChunks;
+           
+            // size of one mip, not levelSize = numChunks * mipStorageSize
+            size_t mipStorageSize = dstMipLevel.length;
             
             // offset only valid for KTX and KTX2 w/o isCompressed
-            size_t mipOffset = dstMipLevels[mipLevel].offset + chunk * mipStorageSize;
+            size_t mipChunkOffset = dstMipLevel.offset + chunk * mipStorageSize;
            
             // just to check that each mip has a unique offset
             //KLOGI("Image", "chunk:%d %d\n", chunk, mipOffset);
@@ -1870,7 +1875,7 @@ bool Image::createMipsFromChunks(
                 // https://github.com/BinomialLLC/basis_universal/issues/40
 
                 // this contains all bytes at a mipLOD but not any padding
-                uint32_t levelSize = (uint32_t)dstMipLevels[mipLevel].length;
+                uint32_t levelSize = (uint32_t)mipStorageSize;
 
                 // this is size of one face for non-array cubes
                 // but for everything else, it's the numChunks * mipStorageSize
@@ -1881,7 +1886,7 @@ bool Image::createMipsFromChunks(
                 int32_t levelSizeOf = sizeof(levelSize);
                 assert(levelSizeOf == 4);
 
-                if (!writeDataAtOffset((const uint8_t*)&levelSize, levelSizeOf, mipOffset - levelSizeOf, dstFile, dstImage)) {
+                if (!writeDataAtOffset((const uint8_t*)&levelSize, levelSizeOf, dstMipLevel.offset - levelSizeOf, dstFile, dstImage)) {
                     return false;
                 }
             }
@@ -1889,7 +1894,7 @@ bool Image::createMipsFromChunks(
             // Note that default ktx alignment is 4, so r8u, r16f mips need to be padded out to 4 bytes
             // may need to write these out row by row, and let fseek pad the rows to 4.
 
-            if (!writeDataAtOffset(outputTexture.data.data(), mipStorageSize, mipOffset, dstFile, dstImage)) {
+            if (!writeDataAtOffset(outputTexture.data.data(), mipStorageSize, mipChunkOffset, dstFile, dstImage)) {
                 return false;
             }
         }

From b6d5b0f739ef32a513bc4d0b069c6242105a13bc Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 22 May 2021 13:20:30 -0700
Subject: [PATCH 060/901] Kram - normalize weights and add some round/snapping
 to 255 on non-pow2 mipgen.

Otherwise, mipgen was pulling down the alpha from 255 to 254.  This only affects non-pow2 mips, since they weight in x and y.
---
 libkram/kram/KTXImage.cpp   |  12 ++++
 libkram/kram/KTXImage.h     |   3 +-
 libkram/kram/KramImage.cpp  | 111 +++++++++++++++++++++++++++++++++++-
 libkram/kram/KramMipper.cpp |  20 ++++++-
 4 files changed, 143 insertions(+), 3 deletions(-)

diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index d00c9c2f..8d328a2e 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -759,6 +759,18 @@ uint32_t KTXImage::mipLevelSize(uint32_t width_, uint32_t height_) const
     return count * size;
 }
 
+uint32_t KTXImage::blockCountRows(uint32_t width_) const
+{
+    assert(width_ >= 1);
+
+    Int2 dims = blockDims();
+
+    width_ = (width_ + dims.x - 1) / dims.x;
+    
+    return width_;
+}
+    
+    
 uint32_t KTXImage::blockCount(uint32_t width_, uint32_t height_) const
 {
     assert(width_ >= 1 && height_ >= 1);
diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h
index b82f9299..b7971682 100644
--- a/libkram/kram/KTXImage.h
+++ b/libkram/kram/KTXImage.h
@@ -294,7 +294,8 @@ class KTXImage {
     uint32_t blockSize() const;
     Int2 blockDims() const;
     uint32_t blockCount(uint32_t width_, uint32_t height_) const;
-
+    uint32_t blockCountRows(uint32_t width_) const;
+    
     // mip data depends on format
     uint32_t mipLevelSize(uint32_t width_, uint32_t height_) const;
     //int totalMipLevels() const;
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index cb1a6e44..514dc646 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -87,6 +87,25 @@ class TextureData {
     vector<uint8_t> data;
 };
 
+// return the block mode of a bc7 block, or -1 if finvalid
+int32_t decodeBC7BlockMode(const void *pBlock)
+{
+    const uint32_t first_byte = static_cast<const uint8_t*>(pBlock)[0];
+
+    for (uint32_t mode = 0; mode <= 7; mode++)
+    {
+        // bit followed by zeros, mask out upper
+        uint8_t bits = (1U << mode);
+        
+        if ((first_byte & bits) == bits)
+        {
+            return mode;
+        }
+    }
+
+    return -1;
+}
+
 Image::Image() : _width(0), _height(0), _hasColor(false), _hasAlpha(false)
 {
 }
@@ -1600,7 +1619,73 @@ bool Image::writeKTX1FileOrImage(
     return true;
 }
 
-
+void printBCBlock(const uint8_t* bcBlock, MyMTLPixelFormat format) {
+    // https://docs.microsoft.com/en-us/windows/win32/direct3d11/bc7-format-mode-reference#mode-6
+    if (!(format == MyMTLPixelFormatBC7_RGBAUnorm || format == MyMTLPixelFormatBC7_RGBAUnorm_sRGB)) {
+        return;
+    }
+    
+    uint32_t mode = decodeBC7BlockMode(bcBlock);
+    
+    switch(mode) {
+        case 6: {
+            const uint64_t* block = (const uint64_t*)bcBlock;
+            // 6 bits of signature - LSB 000001
+            // 7 bits R0, 7 bits R1
+            // 7 bits G0, 7 bits G1
+            // 7 bits B0, 7 bits B1
+            // 7 bits A0, 7 bits A1
+            
+            // 1 bit P0, 1 bit  P1
+            // 63 bits of index data, how dos that work?
+            
+            uint32_t R0 = (uint32_t)((block[0] >> uint64_t(7*1)) & uint64_t(0b1111111));
+            uint32_t R1 = (uint32_t)((block[0] >> uint64_t(7*2)) & uint64_t(0b1111111));
+            
+            uint32_t G0 = (uint32_t)((block[0] >> uint64_t(7*3)) & uint64_t(0b1111111));
+            uint32_t G1 = (uint32_t)((block[0] >> uint64_t(7*4)) & uint64_t(0b1111111));
+            
+            uint32_t B0 = (uint32_t)((block[0] >> uint64_t(7*5)) & uint64_t(0b1111111));
+            uint32_t B1 = (uint32_t)((block[0] >> uint64_t(7*6)) & uint64_t(0b1111111));
+            
+            uint32_t A0 = (uint32_t)((block[0] >> uint64_t(7*7)) & uint64_t(0b1111111));
+            uint32_t A1 = (uint32_t)((block[0] >> uint64_t(7*8)) & uint64_t(0b1111111));
+            
+            uint32_t P0 = (uint32_t)((block[0] >> uint64_t(7*9)) & uint64_t(0b1));
+            uint32_t P1 = (uint32_t)((block[1] >> uint64_t(0)) & uint64_t(0b1));
+           
+            // r,g,b,a to be or-ed with the pbit to get tha actual value of the endpoints
+            
+            KLOGI("kram",
+                  "R0=%d, R1=%d\n"
+                  "G0=%d, G1=%d\n"
+                  "B0=%d, B1=%d\n"
+                  "A0=%d, A1=%d\n"
+                  "P0=%d, P1=%d\n",
+                  R0, R1,
+                  G0, G1,
+                  B0, B1,
+                  A0, A1,
+                  P0, P1);
+            
+            break;
+        }
+    }
+    
+    // Have a block debug mode that hud's the mode pixel values
+    // over the hovered block.
+    uint32_t pixels[4*4];
+    if (!unpack_bc7(bcBlock, (bc7decomp::color_rgba*)pixels)) {
+        return;
+    }
+    
+    for (uint32_t y = 0; y < 4; ++y) {
+        KLOGI("kram",
+              "[%u] = %08X %08X %08X %08X\n",
+              y, pixels[4*y + 0], pixels[4*y + 1], pixels[4*y + 2], pixels[4*y + 3]
+              );
+    }
+}
 
 bool Image::createMipsFromChunks(
     ImageInfo& info,
@@ -2184,6 +2269,9 @@ bool Image::compressMipLevel(const ImageInfo& info, KTXImage& image,
             int32_t blockSize = image.blockSize();
             for (int32_t y = 0; y < h; y += blockDim) {
                 for (int32_t x = 0; x < w; x += blockDim) {
+                    
+                    
+                    
                     // Have to copy to temp block, since encode doesn't test w/h edges
                     // copy src to 4x4 clamping the edge pixels
                     // TODO: do clamped edge pixels get weighted more then on non-multiple of 4 images ?
@@ -2210,6 +2298,17 @@ bool Image::compressMipLevel(const ImageInfo& info, KTXImage& image,
                     int32_t b0 = by * blocks_x + bx;
                     uint8_t* dstBlock = &dstData[b0 * blockSize];
 
+                    // bc7enc is not setting pbit on bc7 mode6 and doesn's support opaque mode3 yet
+                    // , so opaque textures repro as 254 alpha on Toof-a.png.
+                    // ate sets pbits on mode 6 for same block.  Also fixed mip weights in non-pow2 mipper.
+                    
+//                    bool doPrintBlock = false;
+//                    if (bx == 8 && by == 1) {
+//                        int32_t bp = 0;
+//                        bp = bp;
+//                        doPrintBlock = true;
+//                    }
+                    
                     switch (info.pixelFormat) {
                         case MyMTLPixelFormatBC1_RGBA:
                         case MyMTLPixelFormatBC1_RGBA_sRGB: {
@@ -2239,6 +2338,10 @@ bool Image::compressMipLevel(const ImageInfo& info, KTXImage& image,
                         case MyMTLPixelFormatBC7_RGBAUnorm:
                         case MyMTLPixelFormatBC7_RGBAUnorm_sRGB: {
                             bc7enc_compress_block(dstBlock, srcPixelCopy, &bc7params);
+                            
+                            if (doPrintBlock) {
+                                printBCBlock(dstBlock, info.pixelFormat);
+                            }
                             break;
                         }
                         default: {
@@ -2281,6 +2384,12 @@ bool Image::compressMipLevel(const ImageInfo& info, KTXImage& image,
             if (info.isSigned) {
                 doRemapSnormEndpoints = true;
             }
+            
+            
+            // find the 8,1 block and print it
+//            uint32_t numRowBlocks = image.blockCountRows(w);
+//            const uint8_t* block = outputTexture.data.data() + (numRowBlocks * 1 + 8) * image.blockSize();
+//            printBCBlock(block, pixelFormatRemap);
         }
 #endif
 #if COMPILE_SQUISH
diff --git a/libkram/kram/KramMipper.cpp b/libkram/kram/KramMipper.cpp
index 84a5f4b3..e3068624 100644
--- a/libkram/kram/KramMipper.cpp
+++ b/libkram/kram/KramMipper.cpp
@@ -34,7 +34,7 @@ int32_t nextPow2(int32_t num)
 
 inline uint8_t floatToUint8(float value)
 {
-    return (uint8_t)roundf(value * 255.0f);
+    return (uint8_t)roundf(value * 255.0f); // or use 255.1f ?
 }
 
 inline Color Unormfloat4ToColor(float4 value)
@@ -390,6 +390,12 @@ void Mipper::mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) cons
             y1w = 0.5f;
         }
         
+        // normalize weights
+        float totalY = ymw + y0w + y1w;
+        ymw /= totalY;
+        y0w /= totalY;
+        y1w /= totalY;
+//
         ym *= width;
         y0 *= width;
         y1 *= width;
@@ -414,6 +420,12 @@ void Mipper::mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) cons
                 x1w = 0.5f;
             }
             
+            // this mipgen is pulling down alpha of 255 to 241 and smaller over the course of the whole mip chain
+            float totalX = xmw + x0w + x1w;
+            xmw /= totalX;
+            x0w /= totalX;
+            x1w /= totalX;
+            
             // we have 3x2, 2x3 or 3x3 pattern to weight
             // now lookup the 9 values from the buffer
             
@@ -487,6 +499,9 @@ void Mipper::mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) cons
                 if (!srcImage.isHDR) {
                     // convert back to srgb for encode
                     if (srcImage.isSRGB) {
+                        // round to 8-bits before conversion, and then back
+                        cFloat = round(cFloat * 255.0f) / 255.0f;
+                        
                         cFloat.x = linearToSRGBFunc(cFloat.x);
                         cFloat.y = linearToSRGBFunc(cFloat.y);
                         cFloat.z = linearToSRGBFunc(cFloat.z);
@@ -508,6 +523,9 @@ void Mipper::mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) cons
                 if (!srcImage.isHDR) {
                     // convert back to srgb for encode
                     if (srcImage.isSRGB) {
+                        // round to 8-bits before conversion, and then back
+                        cFloat = round(cFloat * 255.0f) / 255.0f;
+                        
                         cFloat.x = linearToSRGBFunc(cFloat.x);
                         cFloat.y = linearToSRGBFunc(cFloat.y);
                         cFloat.z = linearToSRGBFunc(cFloat.z);

From 6526b1c985fe6b375fbf7f45e37f6698ada5fcb6 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 22 May 2021 13:23:49 -0700
Subject: [PATCH 061/901] kram - add back bool doPrintBlock for now.

---
 libkram/kram/KramImage.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index 514dc646..9ccee08d 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -2302,7 +2302,7 @@ bool Image::compressMipLevel(const ImageInfo& info, KTXImage& image,
                     // , so opaque textures repro as 254 alpha on Toof-a.png.
                     // ate sets pbits on mode 6 for same block.  Also fixed mip weights in non-pow2 mipper.
                     
-//                    bool doPrintBlock = false;
+                    bool doPrintBlock = false;
 //                    if (bx == 8 && by == 1) {
 //                        int32_t bp = 0;
 //                        bp = bp;

From 6f85d7c46f9e9fe514ab221157d642f7e8260346 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 22 May 2021 14:04:25 -0700
Subject: [PATCH 062/901] kram - sat don't snap/round before linearToSrgb
 conversion

with normalized weights got some values 1.0002 and that asserted in linearToSrgb.  But shouldn't snap to 255 before that.
---
 libkram/kram/KramMipper.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/libkram/kram/KramMipper.cpp b/libkram/kram/KramMipper.cpp
index e3068624..f875127c 100644
--- a/libkram/kram/KramMipper.cpp
+++ b/libkram/kram/KramMipper.cpp
@@ -499,8 +499,8 @@ void Mipper::mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) cons
                 if (!srcImage.isHDR) {
                     // convert back to srgb for encode
                     if (srcImage.isSRGB) {
-                        // round to 8-bits before conversion, and then back
-                        cFloat = round(cFloat * 255.0f) / 255.0f;
+                        // getting some values > 1
+                        cFloat = saturate(cFloat);
                         
                         cFloat.x = linearToSRGBFunc(cFloat.x);
                         cFloat.y = linearToSRGBFunc(cFloat.y);
@@ -523,8 +523,8 @@ void Mipper::mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) cons
                 if (!srcImage.isHDR) {
                     // convert back to srgb for encode
                     if (srcImage.isSRGB) {
-                        // round to 8-bits before conversion, and then back
-                        cFloat = round(cFloat * 255.0f) / 255.0f;
+                        // getting some values > 1
+                        cFloat = saturate(cFloat);
                         
                         cFloat.x = linearToSRGBFunc(cFloat.x);
                         cFloat.y = linearToSRGBFunc(cFloat.y);

From 4b79054cc96c77dd049a3101c8b8abca7a5525ee Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 22 May 2021 14:10:44 -0700
Subject: [PATCH 063/901] kram - more conversion bullteproofing. go through sat
 calls that handle srgb conversion.

---
 libkram/kram/Kram.cpp       |  1 +
 libkram/kram/KramMipper.cpp | 31 ++++++++-----------------------
 2 files changed, 9 insertions(+), 23 deletions(-)

diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index e6f79b93..4d5374a6 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -65,6 +65,7 @@ inline Color toGrayscaleRec709(Color c, const Mipper& mipper) {
     
     float4 clin = mipper.toLinear(c);
     float luminance = dot(clin, kRec709Conversion);
+    luminance = std::min(luminance, 1.0f); // to avoid assert if math goes above 1.0
     
     c.r = (uint8_t)(roundf(linearToSRGBFunc(luminance) * 255.0f));
     
diff --git a/libkram/kram/KramMipper.cpp b/libkram/kram/KramMipper.cpp
index f875127c..b20f9f77 100644
--- a/libkram/kram/KramMipper.cpp
+++ b/libkram/kram/KramMipper.cpp
@@ -225,10 +225,7 @@ void Mipper::initPixelsHalfIfNeeded(ImageData& srcImage, bool doPremultiply, boo
                 if (doPremultiply && c0.a != 255) {
                     // need to overwrite the color 8-bit color too
                     // but this writes back to srgb for encoding
-                    cFloat.x = linearToSRGBFunc(cFloat.x);
-                    cFloat.y = linearToSRGBFunc(cFloat.y);
-                    cFloat.z = linearToSRGBFunc(cFloat.z);
-
+                    cFloat = linearToSRGB(cFloat);
                     c0 = Unormfloat4ToColor(cFloat);
                 }
             }
@@ -499,15 +496,11 @@ void Mipper::mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) cons
                 if (!srcImage.isHDR) {
                     // convert back to srgb for encode
                     if (srcImage.isSRGB) {
-                        // getting some values > 1
-                        cFloat = saturate(cFloat);
-                        
-                        cFloat.x = linearToSRGBFunc(cFloat.x);
-                        cFloat.y = linearToSRGBFunc(cFloat.y);
-                        cFloat.z = linearToSRGBFunc(cFloat.z);
+                        // getting some values > 1m, but this saturates
+                        cFloat = linearToSRGB(cFloat);
                     }
 
-                    // override rgba8u version, since this is what is encoded
+                    // overwrite rgba8u version, since this is what is encoded
                     Color c = Unormfloat4ToColor(cFloat);
 
                     // can only skip this if cSrc = cDst
@@ -523,12 +516,8 @@ void Mipper::mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) cons
                 if (!srcImage.isHDR) {
                     // convert back to srgb for encode
                     if (srcImage.isSRGB) {
-                        // getting some values > 1
-                        cFloat = saturate(cFloat);
-                        
-                        cFloat.x = linearToSRGBFunc(cFloat.x);
-                        cFloat.y = linearToSRGBFunc(cFloat.y);
-                        cFloat.z = linearToSRGBFunc(cFloat.z);
+                        // getting some values > 1, but this saturates
+                        cFloat = linearToSRGB(cFloat);
                     }
 
                     // Overwrite the RGBA8u image too (this will go out to
@@ -609,9 +598,7 @@ void Mipper::mipmapLevel(const ImageData& srcImage, ImageData& dstImage) const
                 if (!srcImage.isHDR) {
                     // convert back to srgb for encode
                     if (srcImage.isSRGB) {
-                        cFloat.x = linearToSRGBFunc(cFloat.x);
-                        cFloat.y = linearToSRGBFunc(cFloat.y);
-                        cFloat.z = linearToSRGBFunc(cFloat.z);
+                        cFloat = linearToSRGB(cFloat);
                     }
 
                     // override rgba8u version, since this is what is encoded
@@ -639,9 +626,7 @@ void Mipper::mipmapLevel(const ImageData& srcImage, ImageData& dstImage) const
                 if (!srcImage.isHDR) {
                     // convert back to srgb for encode
                     if (srcImage.isSRGB) {
-                        cFloat.x = linearToSRGBFunc(cFloat.x);
-                        cFloat.y = linearToSRGBFunc(cFloat.y);
-                        cFloat.z = linearToSRGBFunc(cFloat.z);
+                        cFloat = linearToSRGB(cFloat);
                     }
 
                     // Overwrite the RGBA8u image too (this will go out to

From ff0e4bff2775de9a633bb05d0ca9bd090fd64599 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 22 May 2021 15:20:15 -0700
Subject: [PATCH 064/901] kramv - hide buttons that aren't relevant to the
 currently loaded texture

---
 kramv/KramRenderer.mm    |  4 +--
 kramv/KramViewerBase.cpp |  2 +-
 kramv/KramViewerBase.h   |  5 ++-
 kramv/KramViewerMain.mm  | 76 ++++++++++++++++++++++++++++++++++------
 4 files changed, 73 insertions(+), 14 deletions(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 1b206419..dc81331e 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -935,8 +935,8 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer view:(nonnull MTKView *)vie
                     uniformsLevel.drawOffset.y -= h + gap;
                 }
                 
-                // this its ktxImage.totalLevels()
-                int32_t numLevels =  _showSettings->totalLevels();
+                // this its ktxImage.totalChunks()
+                int32_t numLevels =  _showSettings->totalChunks();
                 
                 for (int32_t level = 0; level < numLevels; ++level) {
                     
diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index 5fb3d12c..e241bd5d 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -5,7 +5,7 @@ namespace kram
 using namespace simd;
 using namespace std;
 
-int32_t ShowSettings::totalLevels() const {
+int32_t ShowSettings::totalChunks() const {
     int32_t one = 1;
     return std::max(one, faceCount) * std::max(one, arrayCount) * std::max(one, sliceCount);
 }
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index 7159d256..d5cf2211 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -68,7 +68,7 @@ class ShowSettings {
     int32_t sliceNumber = 0;
     int32_t sliceCount = 0;
     
-    int32_t totalLevels() const;
+    int32_t totalChunks() const;
     
     // DONE: hook all these up to shader and view
     bool isHudShown = true;
@@ -103,6 +103,9 @@ class ShowSettings {
     // draw with reverseZ to better match perspective
     bool isReverseZ = true;
     
+    // whether files are pulled from disk or zip archive.
+    bool isArchive = false;
+    
     // can have up to 5 channels (xyz as xy, 2 other channels)
     int32_t numChannels = 0;
     
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 47f04c8c..0c251f48 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -401,6 +401,7 @@ void encodeSrcForEncodeComparisons(bool increment) {
 @implementation MyMTKView
 {
     NSStackView* _buttonStack;
+    NSMutableArray<NSButton*>* _buttonArray;
     NSTextField* _hudLabel;
     NSTextField* _hudLabel2;
     
@@ -464,6 +465,7 @@ - (instancetype)initWithCoder:(NSCoder*)coder {
     _zoomGesture = [[NSMagnificationGestureRecognizer alloc] initWithTarget:self action:@selector(handleGesture:)];
     [self addGestureRecognizer:_zoomGesture];
        
+    _buttonArray = [[NSMutableArray alloc] init];
     _buttonStack = [self _addButtons];
     
     // hide until image loaded
@@ -554,13 +556,19 @@ - (NSStackView*)_addButtons {
         }
         else {
             //sKrect.origin.y += 25;
+            
+            // keep all buttons, since stackView will remove and pack the stack
+            [_buttonArray addObject:button];
         }
         
         [buttons addObject:button];
+        
+        
     }
     
     NSStackView* stackView = [NSStackView stackViewWithViews:buttons];
     stackView.orientation = NSUserInterfaceLayoutOrientationVertical;
+    stackView.detachesHiddenViews = YES; // default, but why have to have _buttonArrary
     [self addSubview: stackView];
     
 #if 0
@@ -623,9 +631,9 @@ - (NSTextField*)_addHud:(BOOL)isShadow
     // add vertical constrains to have it fill window, but keep 800 width
     label.preferredMaxLayoutWidth = 800;
 
-    NSDictionary* views = @{ @"label" : label };
-    [self addConstraints:[NSLayoutConstraint constraintsWithVisualFormat:@"H:|-[label]" options:0 metrics:nil views:views]];
-    [self addConstraints:[NSLayoutConstraint constraintsWithVisualFormat:@"V:|-[label]" options:0 metrics:nil views:views]];
+    //NSDictionary* views = @{ @"label" : label };
+    //[self addConstraints:[NSLayoutConstraint constraintsWithVisualFormat:@"H:|-[label]" options:0 metrics:nil views:views]];
+    //[self addConstraints:[NSLayoutConstraint constraintsWithVisualFormat:@"V:|-[label]" options:0 metrics:nil views:views]];
     
     return label;
 }
@@ -661,7 +669,7 @@ - (void)doZoomMath:(float)newZoom newPan:(float2&)newPan {
     float maxX = 0.5f;
     float minY = -0.5f;
     if (_showSettings->isShowingAllLevelsAndMips) {
-        maxX += 1.0f * (_showSettings->totalLevels() - 1);
+        maxX += 1.0f * (_showSettings->totalChunks() - 1);
         minY -= 1.0f * (_showSettings->maxLOD - 1);
     }
     
@@ -739,7 +747,7 @@ - (void)handleGesture:(NSGestureRecognizer *)gestureRecognizer
     CGRect imageRect = CGRectMake(pt0.x, pt0.y, pt1.x - pt0.x, pt1.y - pt0.y);
     CGRect viewRect = CGRectMake(-1.0f, -1.0f, 2.0f, 2.0f);
    
-    int32_t numTexturesX = _showSettings->totalLevels();
+    int32_t numTexturesX = _showSettings->totalChunks();
     int32_t numTexturesY = _showSettings->maxLOD;
     
     if (_showSettings->isShowingAllLevelsAndMips) {
@@ -909,7 +917,7 @@ - (void)updateEyedropper {
     
 // TODO: finish this logic, need to account for gaps too, and then isolate to a given level and mip to sample
 //    if (_showSettings->isShowingAllLevelsAndMips) {
-//        pixel.x *= _showSettings->totalLevels();
+//        pixel.x *= _showSettings->totalChunks();
 //        pixel.y *= _showSettings->maxLOD;
 //    }
     
@@ -1064,8 +1072,8 @@ - (void)updateEyedropper {
                 text += tmp;
             }
             
-            // display the premul values too
-            if (c.a < 1.0f)
+            // display the premul values too, but not fully transparent pixels
+            if (c.a > 0.0 && c.a < 1.0f)
             {
                 printChannels(tmp, "lnp: ", toPremul(c), numChannels, isFloat, isSigned);
                 text += tmp;
@@ -1168,7 +1176,7 @@ - (void)scrollWheel:(NSEvent *)event
     CGRect imageRect = CGRectMake(pt0.x, pt0.y, pt1.x - pt0.x, pt1.y - pt0.y);
     CGRect viewRect = CGRectMake(-1.0f, -1.0f, 2.0f, 2.0f);
    
-    int32_t numTexturesX = _showSettings->totalLevels();
+    int32_t numTexturesX = _showSettings->totalChunks();
     int32_t numTexturesY = _showSettings->maxLOD;
     
     if (_showSettings->isShowingAllLevelsAndMips) {
@@ -1203,6 +1211,44 @@ - (void)scrollWheel:(NSEvent *)event
     }
 }
 
+// use this to enable/disable menus, buttons, etc.  Called on every event
+// when not implemented, then user items are always enabled
+- (BOOL)validateUserInterfaceItem:(id<NSValidatedUserInterfaceItem>)item
+{
+    // TODO: tie to menus and buttons
+    return YES;
+}
+
+- (NSButton*)findButton:(const char*)name {
+    NSString* title = [NSString stringWithUTF8String:name];
+    for (NSButton* button in _buttonArray) {
+        if (button.title == title)
+            return button;
+    }
+    return nil;
+}
+
+- (void)updateUIAfterLoad {
+    
+    // base on showSettings, hide some fo the buttons
+    bool isShowAllHidden = _showSettings->totalChunks() <= 1 && _showSettings->mipLOD <= 1;
+    
+    bool isArrayHidden = _showSettings->arrayCount <= 1;
+    bool isFaceSliceHidden = _showSettings->faceCount <= 1 && _showSettings->sliceCount <= 1;
+    bool isMipHidden = _showSettings->mipLOD <= 1;
+    
+    bool isJumpToNextHidden = !_showSettings->isArchive;
+    
+    // could hide rgba buttons on some formas
+    // or have XYZBA on nromals, but have Y mapped to array
+    
+    [self findButton:"Y"].hidden = isArrayHidden;
+    [self findButton:"F"].hidden = isFaceSliceHidden;
+    [self findButton:"M"].hidden = isMipHidden;
+    [self findButton:"S"].hidden = isShowAllHidden;
+    [self findButton:"J"].hidden = isJumpToNextHidden;
+}
+
 
 // TODO: convert to C++ actions, and then call into Base holding all this
 // move pan/zoom logic too.  Then use that as start of Win32 kramv.
@@ -1599,7 +1645,7 @@ - (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
            
         case Key::F:
             // cube or cube array, but hit s to pick cubearray
-            if (_showSettings->faceCount) {
+            if (_showSettings->faceCount > 1) {
                 if (isShiftKeyDown) {
                     _showSettings->faceNumber = MAX(_showSettings->faceNumber - 1, 0);
                 }
@@ -1809,6 +1855,11 @@ - (BOOL)loadTextureFromArchive:(const char*)filename timestamp:(double)timestamp
         _noImageLoaded = NO;
     }
 
+    _showSettings->isArchive = false;
+   
+    // show/hide button
+    [self updateUIAfterLoad];
+    
     self.needsDisplay = YES;
     return YES;
 }
@@ -1932,6 +1983,11 @@ - (BOOL)loadTextureFromURL:(NSURL*)url {
         _noImageLoaded = NO;
     }
     
+    _showSettings->isArchive = false;
+   
+    // show/hide button
+    [self updateUIAfterLoad];
+    
     self.needsDisplay = YES;
     return YES;
 }

From 9fd87737d78c456ae0fc0e970dc936edb09fcacd Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 22 May 2021 16:42:24 -0700
Subject: [PATCH 065/901] Kramv - fix hiding of mip button, only have show gray
 show pixels not full black/white

---
 kramv/KramShaders.metal | 2 +-
 kramv/KramViewerMain.mm | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index 544bab07..287c0570 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -656,7 +656,7 @@ float4 DrawPixels(
         }
         else if (uniforms.debugMode == ShDebugModeGray) {
             // with 565 formats, all pixels with light up
-            if (c.r != 0 && (c.r == c.g && c.r == c.b)) {
+            if ((c.r > 0.0 && c.r < 1.0) && (c.r == c.g && c.r == c.b)) {
                 isHighlighted = true;
             }
         }
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 0c251f48..426e7fea 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -1231,11 +1231,11 @@ - (NSButton*)findButton:(const char*)name {
 - (void)updateUIAfterLoad {
     
     // base on showSettings, hide some fo the buttons
-    bool isShowAllHidden = _showSettings->totalChunks() <= 1 && _showSettings->mipLOD <= 1;
+    bool isShowAllHidden = _showSettings->totalChunks() <= 1 && _showSettings->maxLOD <= 1;
     
     bool isArrayHidden = _showSettings->arrayCount <= 1;
     bool isFaceSliceHidden = _showSettings->faceCount <= 1 && _showSettings->sliceCount <= 1;
-    bool isMipHidden = _showSettings->mipLOD <= 1;
+    bool isMipHidden = _showSettings->maxLOD <= 1;
     
     bool isJumpToNextHidden = !_showSettings->isArchive;
     

From c9ebc74911ac3aaeac1c4238eeaeeb09bacee5b1 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 22 May 2021 19:51:02 -0700
Subject: [PATCH 066/901] Kram - add MortonOrder twiddle class, remove flips on
 height to normals, block actions

Now actions are blocked if the button is hidden.  Using this as poor man's action test.
Some of these states may not be getting reset when loading new texture, so test to make sure don't get stuck without action to disable.
collectorbarrelh-n from height now matches the collectorbarrel-n.  Still need to make sure using OpenGL normal +Y, not -Y of DX normals.
Have info test length for info on sizc field.
---
 kramv/KramRenderer.mm          |   2 +-
 kramv/KramViewerMain.mm        | 168 +++++++++++++++++++++------------
 libkram/kram/Kram.cpp          |  55 +++++++++--
 libkram/kram/KramImage.cpp     |   8 +-
 libkram/kram/KramImageInfo.cpp |   7 +-
 5 files changed, 162 insertions(+), 78 deletions(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index dc81331e..b2061068 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -612,7 +612,7 @@ - (float4x4)computeImageTransform:(float)panX panY:(float)panY zoom:(float)zoom
 
 - (void)_updateGameState
 {
-    /// Update any game state before encoding renderint commands to our drawable
+    /// Update any game state before encoding rendering commands to our drawable
 
     Uniforms& uniforms = *(Uniforms*)_dynamicUniformBuffer[_uniformBufferIndex].contents;
 
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 426e7fea..d23149c5 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -1230,6 +1230,9 @@ - (NSButton*)findButton:(const char*)name {
 
 - (void)updateUIAfterLoad {
     
+    // TODO: move these to actions, and test their state instead of looking up buttons
+    // here and in HandleKey.
+    
     // base on showSettings, hide some fo the buttons
     bool isShowAllHidden = _showSettings->totalChunks() <= 1 && _showSettings->maxLOD <= 1;
     
@@ -1239,14 +1242,34 @@ - (void)updateUIAfterLoad {
     
     bool isJumpToNextHidden = !_showSettings->isArchive;
     
-    // could hide rgba buttons on some formas
-    // or have XYZBA on nromals, but have Y mapped to array
+    bool isGreenHidden = _showSettings->numChannels <= 1;
+    bool isBlueHidden  = _showSettings->numChannels <= 2 && !_showSettings->isNormal; // reconstruct z = b on normals
+    
+    // TODO: also need a hasAlpha for pixels, since many compressed formats like ASTC always have 4 channels
+    // but internally store R,RG01,... etc.  Can get more data from swizzle in the props.
+    // Often alpha doesn't store anything useful to view.
+    
+    bool hasAlpha = _showSettings->numChannels >= 3;
+    
+    bool isAlphaHidden = !hasAlpha;
+    bool isPremulHidden = !hasAlpha;
+    bool isCheckerboardHidden = !hasAlpha;
+   
+    bool isSignedHidden = !isSignedFormat(_showSettings->originalFormat);
     
     [self findButton:"Y"].hidden = isArrayHidden;
     [self findButton:"F"].hidden = isFaceSliceHidden;
     [self findButton:"M"].hidden = isMipHidden;
     [self findButton:"S"].hidden = isShowAllHidden;
     [self findButton:"J"].hidden = isJumpToNextHidden;
+    
+    [self findButton:"G"].hidden = isGreenHidden;
+    [self findButton:"B"].hidden = isBlueHidden;
+    [self findButton:"A"].hidden = isAlphaHidden;
+    
+    [self findButton:"P"].hidden = isPremulHidden;
+    [self findButton:"N"].hidden = isSignedHidden;
+    [self findButton:"C"].hidden = isCheckerboardHidden;
 }
 
 
@@ -1358,55 +1381,63 @@ - (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
         // rgba channels
         case Key::Num1:
         case Key::R:
-            if (channels == TextureChannels::ModeRRR1 || channels == TextureChannels::ModeR001) {
-                channels = TextureChannels::ModeRGBA;
-                text = "Mask RGBA";
-            }
-            else {
-                channels = isShiftKeyDown ? TextureChannels::ModeRRR1 : TextureChannels::ModeR001;
-                text = isShiftKeyDown ? "Mask RRR1" : "Mask R001";
+            if (![self findButton:"R"].isHidden) {
+                if (channels == TextureChannels::ModeRRR1 || channels == TextureChannels::ModeR001) {
+                    channels = TextureChannels::ModeRGBA;
+                    text = "Mask RGBA";
+                }
+                else {
+                    channels = isShiftKeyDown ? TextureChannels::ModeRRR1 : TextureChannels::ModeR001;
+                    text = isShiftKeyDown ? "Mask RRR1" : "Mask R001";
+                }
+                isChanged = true;
             }
-            isChanged = true;
-            
+    
             break;
             
         case Key::Num2:
         case Key::G:
-            if (channels == TextureChannels::ModeGGG1 || channels == TextureChannels::Mode0G01) {
-                channels = TextureChannels::ModeRGBA;
-                text = "Mask RGBA";
-            }
-            else {
-                channels = isShiftKeyDown ? TextureChannels::ModeGGG1 : TextureChannels::Mode0G01;
-                text = isShiftKeyDown ? "Mask GGG1" : "Mask 0G01";
+            if (![self findButton:"G"].isHidden) {
+                if (channels == TextureChannels::ModeGGG1 || channels == TextureChannels::Mode0G01) {
+                    channels = TextureChannels::ModeRGBA;
+                    text = "Mask RGBA";
+                }
+                else {
+                    channels = isShiftKeyDown ? TextureChannels::ModeGGG1 : TextureChannels::Mode0G01;
+                    text = isShiftKeyDown ? "Mask GGG1" : "Mask 0G01";
+                }
+                isChanged = true;
             }
-            isChanged = true;
             break;
             
         case Key::Num3:
         case Key::B:
-            if (channels == TextureChannels::ModeBBB1 || channels == TextureChannels::Mode00B1) {
-                channels = TextureChannels::ModeRGBA;
-                text = "Mask RGBA";
-            }
-            else {
-                channels = isShiftKeyDown ? TextureChannels::ModeBBB1 : TextureChannels::Mode00B1;
-                text = isShiftKeyDown ? "Mask BBB1" : "Mask 00B1";
+            if (![self findButton:"B"].isHidden) {
+                if (channels == TextureChannels::ModeBBB1 || channels == TextureChannels::Mode00B1) {
+                    channels = TextureChannels::ModeRGBA;
+                    text = "Mask RGBA";
+                }
+                else {
+                    channels = isShiftKeyDown ? TextureChannels::ModeBBB1 : TextureChannels::Mode00B1;
+                    text = isShiftKeyDown ? "Mask BBB1" : "Mask 00B1";
+                }
+                isChanged = true;
             }
-            isChanged = true;
             break;
             
         case Key::Num4:
         case Key::A:
-            if (channels == TextureChannels::ModeAAA1) {
-                channels = TextureChannels::ModeRGBA;
-                text = "Mask RGBA";
-            }
-            else {
-                channels = TextureChannels::ModeAAA1;
-                text = "Mask AAA1";
+            if (![self findButton:"A"].isHidden) {
+                if (channels == TextureChannels::ModeAAA1) {
+                    channels = TextureChannels::ModeRGBA;
+                    text = "Mask RGBA";
+                }
+                else {
+                    channels = TextureChannels::ModeAAA1;
+                    text = "Mask AAA1";
+                }
+                isChanged = true;
             }
-            isChanged = true;
             break;
             
         case Key::E: {
@@ -1510,10 +1541,12 @@ - (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
             
         // toggle checkerboard for transparency
         case Key::C:
-            _showSettings->isCheckerboardShown = !_showSettings->isCheckerboardShown;
-            isChanged = true;
-            text = "Checker ";
-            text += _showSettings->isCheckerboardShown ? "On" : "Off";
+            if (![self findButton:"C"].isHidden) {
+                _showSettings->isCheckerboardShown = !_showSettings->isCheckerboardShown;
+                isChanged = true;
+                text = "Checker ";
+                text += _showSettings->isCheckerboardShown ? "On" : "Off";
+            }
             break;
         
         // toggle pixel grid when magnified above 1 pixel, can happen from mipmap changes too
@@ -1574,11 +1607,14 @@ - (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
             break;
         }
         case Key::S:
-            // TODO: have drawAllMips, drawAllLevels, drawAllLevelsAndMips
-            _showSettings->isShowingAllLevelsAndMips = !_showSettings->isShowingAllLevelsAndMips;
-            isChanged = true;
-            text = "Show All ";
-            text += _showSettings->isShowingAllLevelsAndMips ? "On" : "Off";
+            if (![self findButton:"S"].isHidden) {
+            
+                // TODO: have drawAllMips, drawAllLevels, drawAllLevelsAndMips
+                _showSettings->isShowingAllLevelsAndMips = !_showSettings->isShowingAllLevelsAndMips;
+                isChanged = true;
+                text = "Show All ";
+                text += _showSettings->isShowingAllLevelsAndMips ? "On" : "Off";
+            }
             break;
             
         // toggle hud that shows name and pixel value under the cursor
@@ -1610,37 +1646,45 @@ - (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
             
         // toggle signed vs. unsigned
         case Key::N:
-            _showSettings->isSigned = !_showSettings->isSigned;
-            isChanged = true;
-            text = "Signed ";
-            text += _showSettings->isSigned ? "On" : "Off";
+            if (![self findButton:"N"].isHidden) {
+                _showSettings->isSigned = !_showSettings->isSigned;
+                isChanged = true;
+                text = "Signed ";
+                text += _showSettings->isSigned ? "On" : "Off";
+            }
             break;
             
         // toggle premul alpha vs. unmul
         case Key::P:
-            _showSettings->isPremul = !_showSettings->isPremul;
-            isChanged = true;
-            text = "Premul ";
-            text += _showSettings->isPremul ? "On" : "Off";
+            if (![self findButton:"P"].isHidden) {
+                _showSettings->isPremul = !_showSettings->isPremul;
+                isChanged = true;
+                text = "Premul ";
+                text += _showSettings->isPremul ? "On" : "Off";
+            }
             break;
             
         case Key::J:
-            if ([self advanceTextureFromAchive:!isShiftKeyDown]) {
-                isChanged = true;
-                text = "Loaded " + _showSettings->lastFilename;
+            if (![self findButton:"J"].isHidden) {
+                if ([self advanceTextureFromAchive:!isShiftKeyDown]) {
+                    isChanged = true;
+                    text = "Loaded " + _showSettings->lastFilename;
+                }
             }
             break;
             
         // mip up/down
         case Key::M:
-            if (isShiftKeyDown) {
-                _showSettings->mipLOD = MAX(_showSettings->mipLOD - 1, 0);
-            }
-            else {
-                _showSettings->mipLOD = MIN(_showSettings->mipLOD + 1, _showSettings->maxLOD - 1);
+            if (_showSettings->maxLOD > 1) {
+                if (isShiftKeyDown) {
+                    _showSettings->mipLOD = MAX(_showSettings->mipLOD - 1, 0);
+                }
+                else {
+                    _showSettings->mipLOD = MIN(_showSettings->mipLOD + 1, _showSettings->maxLOD - 1);
+                }
+                sprintf(text, "Mip %d/%d", _showSettings->mipLOD, _showSettings->maxLOD);
+                isChanged = true;
             }
-            sprintf(text, "Mip %d/%d", _showSettings->mipLOD, _showSettings->maxLOD);
-            isChanged = true;
             break;
            
         case Key::F:
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 4d5374a6..463bef9e 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -34,18 +34,43 @@ namespace kram {
 
 using namespace std;
 
-bool LoadKtx(const uint8_t* data, size_t dataSize, Image& sourceImage)
+// Twiddle pixels or blocks into Morton order.  Usually this is done during the upload of
+// linear-order block textures.  But on some platforms may be able to directly use the block
+// and pixel data if organized in the exact twiddle order the hw uses.
+// Code adapted from KTX doc example.
+class MortonOrder
 {
-    KTXImage image;
-    if (!image.open(data, dataSize)) {
-        return false;
+public:
+MortonOrder(uint32_t width, uint32_t height) {
+    minDim = (width <= height) ? width : height;
+    
+    // Smaller size must be a power of 2
+    assert((minDim & (minDim - 1)) == 0);
+
+    // Larger size must be a multiple of the smaller
+    assert(width % minDim == 0 && height % minDim == 0);
+}
+    
+// For a given xy block in a mip level, find the block offset in morton order
+uint32_t mortonOffset(uint32_t x, uint32_t y)
+{
+    uint32_t offset = 0, shift = 0;
+
+    for (uint32_t mask = 1; mask < minDim; mask <<= 1) {
+        offset |= (((y & mask) << 1) | (x & mask)) << shift;
+        shift++;
     }
 
-    // many different types of KTX files, for now only import from 2D type
-    // and only pull the first mip, but want to be able to pull custom mips from
-    // many types
-    return sourceImage.loadImageFromKTX(image);
+    // At least one of width and height will have run out of most-significant bits
+    offset |= ((x | y) >> shift) << (shift * 2);
+    return offset;
 }
+    
+private:
+    uint32_t minDim = 0;
+};
+
+
 
 inline Color toPremul(Color c) {
     // these are really all fractional, but try this
@@ -74,6 +99,18 @@ inline Color toGrayscaleRec709(Color c, const Mipper& mipper) {
     return c;
 }
 
+bool LoadKtx(const uint8_t* data, size_t dataSize, Image& sourceImage)
+{
+    KTXImage image;
+    if (!image.open(data, dataSize)) {
+        return false;
+    }
+
+    // many different types of KTX files, for now only import from 2D type
+    // and only pull the first mip, but want to be able to pull custom mips from
+    // many types
+    return sourceImage.loadImageFromKTX(image);
+}
 
 bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray, Image& sourceImage)
 {
@@ -1445,7 +1482,7 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage,
         length *= numChunks;
         uint64_t percent = (100 * lengthCompressed) / length;
        
-        isMB = (lengthCompressed > (512 * 1024));
+        isMB = (length > (512 * 1024));
         double lengthF = isMB ? length / (1024.0f * 1024.0f) : length / 1024.0f;
         double lengthCompressedF = isMB ? lengthCompressed / (1024.0f * 1024.0f) : lengthCompressed / 1024.0f;
         
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index 9ccee08d..6993649e 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -2302,7 +2302,7 @@ bool Image::compressMipLevel(const ImageInfo& info, KTXImage& image,
                     // , so opaque textures repro as 254 alpha on Toof-a.png.
                     // ate sets pbits on mode 6 for same block.  Also fixed mip weights in non-pow2 mipper.
                     
-                    bool doPrintBlock = false;
+//                    bool doPrintBlock = false;
 //                    if (bx == 8 && by == 1) {
 //                        int32_t bp = 0;
 //                        bp = bp;
@@ -2339,9 +2339,9 @@ bool Image::compressMipLevel(const ImageInfo& info, KTXImage& image,
                         case MyMTLPixelFormatBC7_RGBAUnorm_sRGB: {
                             bc7enc_compress_block(dstBlock, srcPixelCopy, &bc7params);
                             
-                            if (doPrintBlock) {
-                                printBCBlock(dstBlock, info.pixelFormat);
-                            }
+//                            if (doPrintBlock) {
+//                                printBCBlock(dstBlock, info.pixelFormat);
+//                            }
                             break;
                         }
                         default: {
diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp
index 1a0ce2de..3fee97fa 100644
--- a/libkram/kram/KramImageInfo.cpp
+++ b/libkram/kram/KramImageInfo.cpp
@@ -1305,6 +1305,9 @@ void ImageInfo::heightToNormals(int32_t w, int32_t h,
                 float dx = (cE - cW) * scaleX;
                 float dy = (cN - cS) * scaleY;
            
+                //dx = -dx;
+                //dy = -dy;
+                
                 float4 normal = float4m(dx, dy, 1.0f, 0.0f);
                 normal = normalize(normal);
                 
@@ -1337,8 +1340,8 @@ void ImageInfo::heightToNormals(int32_t w, int32_t h,
                 float dx = (cE - cW) * scaleX;
                 float dy = (cN - cS) * scaleY;
            
-                dx = -dx;
-                dy = -dy;
+                //dx = -dx;
+                //dy = -dy;
                 
                 float4 normal = float4m(dx, dy, 1.0f, 0.0f);
                 normal = normalize(normal);

From d5fa28757835bbfcdc03e79edfb377eb97236c47 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 23 May 2021 00:51:54 -0700
Subject: [PATCH 067/901] kramv - fix small mip sample lookup, add cmake gpu
 capture for shaders with source, fix mipX/Y calc

---
 kramv/CMakeLists.txt    |  6 ++++++
 kramv/KramRenderer.mm   |  5 ++++-
 kramv/KramShaders.metal | 10 +++++-----
 kramv/KramViewerBase.h  |  4 ++++
 kramv/KramViewerMain.mm |  9 ++++++---
 5 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/kramv/CMakeLists.txt b/kramv/CMakeLists.txt
index cd290094..11baacc3 100644
--- a/kramv/CMakeLists.txt
+++ b/kramv/CMakeLists.txt
@@ -77,6 +77,12 @@ set_target_properties(${myTargetApp} PROPERTIES
     
     # TODO: not sure how to set this, nothing online either ?
     # MACOSX_BUNDLE_APP_CATEGORY "Developer Tools"
+    
+    #-------------------------
+    # turn on shader capture support and indexing
+    # why can't this just be a yes or no, there's "Yes, exclude source code"
+    XCODE_ATTRIBUTE_MTL_ENABLE_DEBUG_INFO "Yes, include source code"
+    XCODE_ATTRIBUTE_MTL_ENABLE_INDEX_STORE YES
 )
 
 target_compile_options(${myTargetApp} PRIVATE -W -Wall)
diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index b2061068..461141af 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -1050,7 +1050,10 @@ - (void)drawSample
     int32_t textureLookupX = _showSettings->textureLookupX;
     int32_t textureLookupY = _showSettings->textureLookupY;
     
-    [self drawSamples:commandBuffer lookupX:textureLookupX lookupY:textureLookupY];
+    int32_t textureLookupMipX = _showSettings->textureLookupMipX;
+    int32_t textureLookupMipY = _showSettings->textureLookupMipY;
+    
+    [self drawSamples:commandBuffer lookupX:textureLookupMipX lookupY:textureLookupMipY];
     
     // Synchronize the managed texture.
     id <MTLBlitCommandEncoder> blitCommandEncoder = [commandBuffer blitCommandEncoder];
diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index 287c0570..f3289232 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -982,7 +982,7 @@ kernel void SampleImageCS(
     // the for-loop is replaced with a collection of threads, each of which
     // calls this function.
     uint2 uv = uniforms.uv; // tie into texture lookup
-    uv = max(uint2(1), uv >> uniforms.mipLOD);
+    // uv >>= uniforms.mipLOD;
     
     // the color returned is linear
     float4 color = colorMap.read(uv, uniforms.mipLOD);
@@ -999,7 +999,7 @@ kernel void SampleImageArrayCS(
     // the for-loop is replaced with a collection of threads, each of which
     // calls this function.
     uint2 uv = uniforms.uv; // tie into texture lookup
-    uv = max(uint2(1), uv >> uniforms.mipLOD);
+    //uv >>= uniforms.mipLOD;
     
     uint arrayOrSlice = uniforms.arrayOrSlice;
     
@@ -1018,7 +1018,7 @@ kernel void SampleCubeCS(
     // the for-loop is replaced with a collection of threads, each of which
     // calls this function.
     uint2 uv = uint2(uniforms.uv); // tie into texture lookup
-    uv = max(uint2(1), uv >> uniforms.mipLOD);
+    //uv >>= uniforms.mipLOD;
     
     uint face = uniforms.face;
     
@@ -1040,7 +1040,7 @@ kernel void SampleCubeArrayCS(
     // the for-loop is replaced with a collection of threads, each of which
     // calls this function.
     uint2 uv = uint2(uniforms.uv); // tie into texture lookup
-    uv = max(uint2(1), uv >> uniforms.mipLOD);
+    //uv >>= uniforms.mipLOD;
     
     uint face = uniforms.face;
     uint arrayOrSlice = uniforms.arrayOrSlice;
@@ -1060,7 +1060,7 @@ kernel void SampleVolumeCS(
     // the for-loop is replaced with a collection of threads, each of which
     // calls this function.
     uint3 uv = uint3(uniforms.uv, uniforms.arrayOrSlice); // tie into texture lookup
-    uv = max(uint3(1), uv >> uniforms.mipLOD);
+    //uv >>= uniforms.mipLOD);
     
     // the color returned is linear
     float4 color = colorMap.read(uv, uniforms.mipLOD);
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index d5cf2211..d907c296 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -125,6 +125,10 @@ class ShowSettings {
     int32_t textureLookupX = 0;
     int32_t textureLookupY = 0;
     
+    // exact pixel in the mip level
+    int32_t textureLookupMipX = 0;
+    int32_t textureLookupMipY = 0;
+    
     int32_t textureResultX = 0;
     int32_t textureResultY = 0;
     float4 textureResult;
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index d23149c5..cf71e8ed 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -983,15 +983,15 @@ - (void)updateEyedropper {
         // show block num
         int mipLOD = _showSettings->mipLOD;
         
-        // TODO:: these block numbers are not accurate on Toof at 4x4
+        // TODO: these block numbers are not accurate on Toof at 4x4
         // there is resizing going on to the dimensions
         
         int mipX = _showSettings->imageBoundsX;
         int mipY = _showSettings->imageBoundsY;
         
         for (int i = 0; i < mipLOD; ++i) {
-            mipX = (mipX+1) >> 1;
-            mipY = (mipY+1) >> 1;
+            mipX = mipX >> 1;
+            mipY = mipY >> 1;
         }
         mipX = std::max(1, mipX);
         mipY = std::max(1, mipY);
@@ -999,6 +999,9 @@ - (void)updateEyedropper {
         mipX = (int32_t)(uvX * mipX);
         mipY = (int32_t)(uvY * mipY);
         
+        _showSettings->textureLookupMipX = mipX;
+        _showSettings->textureLookupMipY = mipY;
+        
         // TODO: may want to return mip in pixel readback
         // don't have it right now, so don't display if preview is enabled
         if (_showSettings->isPreview)

From f214d50822da0f5898d5cb9422c2ed5d65c75c5d Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 23 May 2021 13:19:15 -0700
Subject: [PATCH 068/901] kramv - early menu support

view menu with items show/hidden the same way buttons are.  Not handling state on/off yet.
---
 kramv/Base.lproj/Main.storyboard |  4 ++
 kramv/KramViewerMain.mm          | 83 ++++++++++++++++++++++++++------
 libkram/kram/KTXImage.cpp        |  9 ++--
 libkram/kram/KramImage.cpp       |  2 +-
 4 files changed, 78 insertions(+), 20 deletions(-)

diff --git a/kramv/Base.lproj/Main.storyboard b/kramv/Base.lproj/Main.storyboard
index 50fcd024..86041b06 100644
--- a/kramv/Base.lproj/Main.storyboard
+++ b/kramv/Base.lproj/Main.storyboard
@@ -92,6 +92,10 @@
                                     </items>
                                 </menu>
                             </menuItem>
+                            <menuItem title="View" id="69n-7z-Ieb" userLabel="View">
+                                <modifierMask key="keyEquivalentModifierMask"/>
+                                <menu key="submenu" title="View" id="yjc-yD-U6r"/>
+                            </menuItem>
                             <menuItem title="Window" id="aUF-d1-5bR">
                                 <modifierMask key="keyEquivalentModifierMask"/>
                                 <menu key="submenu" title="Window" systemMenu="window" id="Td7-aD-5lo">
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index cf71e8ed..de695e5e 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -400,6 +400,7 @@ void encodeSrcForEncodeComparisons(bool increment) {
 
 @implementation MyMTKView
 {
+    NSMenu* _viewMenu; // really the items
     NSStackView* _buttonStack;
     NSMutableArray<NSButton*>* _buttonArray;
     NSTextField* _hudLabel;
@@ -571,31 +572,42 @@ - (NSStackView*)_addButtons {
     stackView.detachesHiddenViews = YES; // default, but why have to have _buttonArrary
     [self addSubview: stackView];
     
-#if 0
+#if 1
     // Want menus, so user can define their own shortcuts to commands
     // Also need to enable/disable this via validateUserInterfaceItem
     NSApplication* app = [NSApplication sharedApplication];
 
-    // TODO: add an edit menu in the storyboard
-    NSMenu* menu = app.windowsMenu;
-    [menu addItem:[NSMenuItem separatorItem]];
+    NSMenu* mainMenu = app.mainMenu;
+    NSMenuItem* viewMenuItem = mainMenu.itemArray[2];
+    _viewMenu = viewMenuItem.submenu;
+    
+    // TODO: add a view menu in the storyboard
+    //NSMenu* menu = app.windowsMenu;
+    //[menu addItem:[NSMenuItem separatorItem]];
 
     for (int32_t i = 0; i < numButtons; ++i) {
-        const char* icon = names[2*i+0];
-        const char* tip = names[2*i+1];
+        const char* icon = names[2*i+0]; // single char
+        const char* title = names[2*i+1];
     
-        NSString* shortcut = [NSString stringWithUTF8String:icon];
-        NSString* name = [NSString stringWithUTF8String:tip];
-        shortcut = @""; // for now, or AppKit turns key int cmd+shift+key
+        NSString* toolTip = [NSString stringWithUTF8String:icon];
+        NSString* name = [NSString stringWithUTF8String:title];
+        NSString* shortcut = @""; // for now, or AppKit turns key int cmd+shift+key
         
         if (icon[0] == '-') {
-            [menu addItem:[NSMenuItem separatorItem]];
+            [_viewMenu addItem:[NSMenuItem separatorItem]];
         }
         else {
-            NSMenuItem* menuItem = [[NSMenuItem alloc] initWithTitle:name action:@selector(handleAction) keyEquivalent:shortcut];
-            [menu addItem: menuItem];
+            NSMenuItem* menuItem = [[NSMenuItem alloc] initWithTitle:name action:@selector(handleAction:) keyEquivalent:shortcut];
+            menuItem.toolTip = toolTip; // use in findMenuItem
+            
+            // TODO: menus and buttons should reflect any toggle state
+            // menuItem.state = Mixed/Off/On;
+            
+            [_viewMenu addItem: menuItem];
         }
     }
+    
+    [_viewMenu addItem:[NSMenuItem separatorItem]];
 #endif
     
     return stackView;
@@ -1231,6 +1243,17 @@ - (NSButton*)findButton:(const char*)name {
     return nil;
 }
 
+- (NSMenuItem*)findMenuItem:(const char*)name {
+    NSString* title = [NSString stringWithUTF8String:name];
+    
+    for (NSMenuItem* menuItem in _viewMenu.itemArray) {
+        if (menuItem.toolTip == title)
+            return menuItem;
+    }
+    return nil;
+}
+
+
 - (void)updateUIAfterLoad {
     
     // TODO: move these to actions, and test their state instead of looking up buttons
@@ -1260,6 +1283,7 @@ - (void)updateUIAfterLoad {
    
     bool isSignedHidden = !isSignedFormat(_showSettings->originalFormat);
     
+    // buttons
     [self findButton:"Y"].hidden = isArrayHidden;
     [self findButton:"F"].hidden = isFaceSliceHidden;
     [self findButton:"M"].hidden = isMipHidden;
@@ -1273,6 +1297,21 @@ - (void)updateUIAfterLoad {
     [self findButton:"P"].hidden = isPremulHidden;
     [self findButton:"N"].hidden = isSignedHidden;
     [self findButton:"C"].hidden = isCheckerboardHidden;
+    
+    // menus (may want to disable, not hide)
+    [self findMenuItem:"Y"].hidden = isArrayHidden;
+    [self findMenuItem:"F"].hidden = isFaceSliceHidden;
+    [self findMenuItem:"M"].hidden = isMipHidden;
+    [self findMenuItem:"S"].hidden = isShowAllHidden;
+    [self findMenuItem:"J"].hidden = isJumpToNextHidden;
+    
+    [self findMenuItem:"G"].hidden = isGreenHidden;
+    [self findMenuItem:"B"].hidden = isBlueHidden;
+    [self findMenuItem:"A"].hidden = isAlphaHidden;
+    
+    [self findMenuItem:"P"].hidden = isPremulHidden;
+    [self findMenuItem:"N"].hidden = isSignedHidden;
+    [self findMenuItem:"C"].hidden = isCheckerboardHidden;
 }
 
 
@@ -1280,14 +1319,26 @@ - (void)updateUIAfterLoad {
 // move pan/zoom logic too.  Then use that as start of Win32 kramv.
 
 - (IBAction)handleAction:(id)sender {
-    // sender is the UI element/NSButton
-    // if (sender == )
-    NSButton* button = (NSButton*)sender;
     
     NSEvent* theEvent = [NSApp currentEvent];
     bool isShiftKeyDown = (theEvent.modifierFlags & NSEventModifierFlagShift);
     
-    string title = [button.title UTF8String];
+    string title;
+    
+    // sender is the UI element/NSButton
+    if ([sender isKindOfClass:[NSButton class]]) {
+        NSButton* button = (NSButton*)sender;
+        title = [button.title UTF8String];
+    }
+    else if ([sender isKindOfClass:[NSMenuItem class]]) {
+        NSMenuItem* menuItem = (NSMenuItem*)sender;
+        title = [menuItem.toolTip UTF8String];
+    }
+    else {
+        KLOGE("kram", "unknown UI element");
+        return;
+    }
+    
     int32_t keyCode = -1;
     
     if (title == "?")
diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index 8d328a2e..55d7f671 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -1210,14 +1210,17 @@ void KTXImage::initMipLevels(size_t mipOffset)
 
         uint32_t levelSize = dataSize * numChunks;
 
+        // TODO: align mip offset to multiple of 4 bytes for KTX1, may need for kTX2
+        // make sure when adding up offsets with length to include this padding
+//        if (!skipImageLength) {
+//            offset += 3 - (offset & 3); // align level to 4 bytes
+//        }
+        
         // compute dataSize from header data
         if (!skipImageLength) {
             // advance past the length
             offset += sizeof(uint32_t);
         }
-
-        // TODO: Here is where offset alignment to 4 bytes may be needed
-        // but that also needs to be accounted for in allocation
         
         // level holds single texture size not level size, but offset reflects level start
         KTXImageLevel level = { offset, 0, dataSize };
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index 6993649e..b9704371 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -1020,7 +1020,7 @@ struct KTX2DescriptorChannelBlock {
     uint16_t bitOffset = 0;
     uint8_t bitLength = 0;
     uint8_t channelType : 4; // RED, GREEN, BLUE, RRR, GGG
-    uint8_t FSEL : 4; // l is low bit
+    uint8_t FSEL : 4; // L is low bit - Float, Signed, Exponent, Linear (used on Alpha)
     
     // 32-bits
     uint8_t samplePositions[4] = {0};

From 885d36d10245045b0ef56cd8c40ffc69b1a65469 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 23 May 2021 16:38:59 -0700
Subject: [PATCH 069/901] kram - isolate decompress and copy logic into
 unpackLevel(), update KramLoader on blit path

The blit path can directly upload to staging from zstd mips.  Update that path, and call unpackLevel() if compressed.
Bump staging memory to 64MB for larger texture support.  This code path isn't take yet, but pass isInfoOnly to skip decompress.
Also put supercompressionType in KTXImage and return that, and have info display the name.
Don't reuse zstd context across mips, so I can isolate that logic into unpackLevel.  Can add other decompression here too, and zstd+uastc need both a decompress and and a transcode.
---
 kramv/KramLoader.mm       | 102 ++++++++++++++-----------
 kramv/KramRenderer.mm     |   2 +
 kramv/KramViewerMain.mm   |  22 ++++--
 libkram/kram/KTXImage.cpp | 156 ++++++++++++++++++++++----------------
 libkram/kram/KTXImage.h   |  16 +++-
 libkram/kram/Kram.cpp     |  13 ++--
 6 files changed, 186 insertions(+), 125 deletions(-)

diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index 67e83771..b4f75e62 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -120,7 +120,14 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) {
 {
     KTXImage image;
     
-    if (!image.open(imageData, imageDataLength)) {
+    // true keeps compressed mips on KTX2 and aliases original mip data
+    // but have decode etc2/asct path below that uncompressed mips
+    // and the rgb conversion path below as well in the viewer.
+    // games would want to decompress directly from aliased mmap ktx2 data into staging
+    // or have blocks pre-twiddled in hw morton order.
+    
+    bool isInfoOnly = false;
+    if (!image.open(imageData, imageDataLength, isInfoOnly)) {
         return nil;
     }
     
@@ -495,7 +502,9 @@ - (nonnull instancetype)init {
     self = [super init];
     
     // must be aligned to pagesize() or can't use with newBufferWithBytesNoCopy
-    dataSize = 16*1024*1024;
+    // enough to upload 4k x 4k @ 4 bytes no mips, careful with array and cube that get too big
+    dataSize = 64*1024*1024;
+    
     posix_memalign((void**)&data, getpagesize(), dataSize);
     
     // allocate memory for circular staging buffer, only need to memcpy to this
@@ -554,16 +563,6 @@ - (nonnull instancetype)init {
     return texture;
 }
 
-//for (int mipLevelNumber = 0; mipLevelNumber < numMips; ++mipLevelNumber) {
-//
-//    // zstd decompress entire mip level to the staging buffer
-//    zstd
-//}
-//
-//// so first memcpy and entire level(s) into the buffer
-////memcpy(...);
-
-
 // Has a synchronous upload via replaceRegion that only works for shared/managed (f.e. ktx),
 // and another path for private that uses a blitEncoder and must have block aligned data (f.e. ktxa, ktx2).
 // Could repack ktx data into ktxa before writing to temporary file, or when copying NSData into MTLBuffer.
@@ -571,6 +570,13 @@ - (nonnull instancetype)init {
 {
     id<MTLTexture> texture = [self createTexture:image];
     
+    // Note: always starting at 0 here, since kramv is only uploading 1 texture
+    // but a real uploader would upload until buffer full, and then reset this back to 0
+    // A circular buffer if large enough to support multiple uploads over time.
+    // This can be a lot of temporary memory and must complete upload before changing.
+    
+    uint64_t bufferOffset = 0;
+    
     //--------------------------------
     // upload mip levels
     
@@ -588,7 +594,31 @@ - (nonnull instancetype)init {
     
     Int2 blockDims = image.blockDims();
     
-    for (int mipLevelNumber = 0; mipLevelNumber < numMips; ++mipLevelNumber) {
+    // Note: copy entire decompressed level from KTX, but then upload
+    // each chunk of that with separate blit calls below.
+    size_t blockSize = image.blockSize();
+    
+    vector<uint64_t> bufferOffsets;
+    uint8_t* bufferData = (uint8_t*)_buffer.contents;
+    const uint8_t* mipData = (const uint8_t*)image.fileData;
+    bufferOffsets.resize(image.mipLevels.size());
+    
+    for (int32_t i = 0; i < numMips; ++i) {
+        const KTXImageLevel& mipLevel = image.mipLevels[i];
+        
+        // pad buffer offset to a multiple of the blockSize
+        bufferOffset += (blockSize - 1) - (bufferOffset & blockSize);
+        bufferOffsets[i] = bufferOffset;
+        bufferOffset += mipLevel.length;
+        
+        // this may have to decompress the level data
+        image.unpackLevel(i, mipData + mipLevel.offset, bufferData + bufferOffset);
+    }
+    
+    // blit encode calls must all be submitted to an encoder
+    // but may not have to be on the render thrad?
+    
+    for (int32_t mipLevelNumber = 0; mipLevelNumber < numMips; ++mipLevelNumber) {
         // there's a 4 byte levelSize for each mipLevel
         // the mipLevel.offset is immediately after this
         
@@ -616,45 +646,32 @@ - (nonnull instancetype)init {
                         bytesPerRow = (int32_t)mipLevel.length / yBlocks;
                     }
                     
-                    int32_t sliceOrArrayOrFace;
+                    int32_t chunkNum;
                                     
-                    if (image.header.numberOfArrayElements > 0) {
+                    if (image.header.numberOfArrayElements > 1) {
                         // can be 1d, 2d, or cube array
-                        sliceOrArrayOrFace = array;
+                        chunkNum = array;
                         if (numFaces > 1) {
-                            sliceOrArrayOrFace = 6 * sliceOrArrayOrFace + face;
+                            chunkNum = 6 * chunkNum + face;
                         }
                     }
                     else {
                         // can be 1d, 2d, or 3d
-                        sliceOrArrayOrFace = slice;
+                        chunkNum = slice;
                         if (numFaces > 1) {
-                            sliceOrArrayOrFace = face;
+                            chunkNum = face;
                         }
                     }
                     
-                    // this is size of one face/slice/texture, not the levels size
-                    int32_t mipStorageSize = (int32_t)mipLevel.length;
+                    // This is size of one chunk
+                    uint64_t mipStorageSize = mipLevel.length;
                     
-                    int32_t mipOffset = (int32_t)mipLevel.offset + sliceOrArrayOrFace * mipStorageSize;
-                    
-                    int32_t bufferBaseOffset = 0; // TODO: pos offset into the staging buffer
-                    mipOffset += bufferBaseOffset;
-                    
-                    // using buffer to store
-                    // offset into the level
-                    //const uint8_t *srcBytes = image.fileData + mipOffset;
-            
-                    // had blitEncoder support here
+                    // Have uploaded to buffer in same order visiting chunks.
+                    // Note: no call on MTLBlitEncoder to copy entire level of mips like glTexImage3D
+                    uint64_t mipOffset =  bufferOffsets[mipLevelNumber] + chunkNum * mipStorageSize;
                     
                     {
-                        // Note: this only works for managed/shared textures.
-                        // For private upload to buffer and then use blitEncoder to copy to texture.
-                        //bool isCubemap = image.textureType == MyMTLTextureTypeCube ||
-                        //                 image.textureType == MyMTLTextureTypeCubeArray;
                         bool is3D = image.textureType == MyMTLTextureType3D;
-                        //bool is2DArray = image.textureType == MyMTLTextureType2DArray;
-                        //bool is1DArray = image.textureType == MyMTLTextureType1DArray;
                         
                         // cpu copy the bytes from the data object into the texture
                         MTLRegion region = {
@@ -662,16 +679,11 @@ - (nonnull instancetype)init {
                             { (NSUInteger)w,  (NSUInteger)h, 1 }  // MTLSize
                         };
                         
-                        // TODO: revist how loading is done to load entire levels
-                        // otherwise too many replaceRegion calls.   Data is already packed by mip.
-                        
                         if (is3D) {
-                            region.origin.z = sliceOrArrayOrFace;
-                            sliceOrArrayOrFace = 0;
+                            region.origin.z = chunkNum;
+                            chunkNum = 0;
                         }
                         
-                        // TODO: no call on MTLBlitEncoder to copy entire level of mips like glTexImage3D
-                        
                         [_blitEncoder copyFromBuffer:_buffer
                                        sourceOffset:mipOffset
                               sourceBytesPerRow:bytesPerRow
@@ -679,7 +691,7 @@ - (nonnull instancetype)init {
                                      sourceSize:region.size
                          
                                       toTexture:texture
-                               destinationSlice:sliceOrArrayOrFace
+                               destinationSlice:chunkNum
                                destinationLevel:mipLevelNumber
                               destinationOrigin:region.origin
                                         options:MTLBlitOptionNone
diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 461141af..14cce950 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -417,6 +417,8 @@ - (BOOL)loadTextureFromData:(const string&)fullFilename timestamp:(double)timest
         
         // archive shouldn't contain png, so only support ktx/ktx2 here
         // TODO: have loader return KTXImage instead of parsing it again
+        // then can decode blocks in kramv
+        
         KTXImage sourceImage;
         if (!sourceImage.open(imageData,imageDataLength)) {
             return NO;
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index de695e5e..4cbf359b 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -1226,14 +1226,6 @@ - (void)scrollWheel:(NSEvent *)event
     }
 }
 
-// use this to enable/disable menus, buttons, etc.  Called on every event
-// when not implemented, then user items are always enabled
-- (BOOL)validateUserInterfaceItem:(id<NSValidatedUserInterfaceItem>)item
-{
-    // TODO: tie to menus and buttons
-    return YES;
-}
-
 - (NSButton*)findButton:(const char*)name {
     NSString* title = [NSString stringWithUTF8String:name];
     for (NSButton* button in _buttonArray) {
@@ -1253,6 +1245,18 @@ - (NSMenuItem*)findMenuItem:(const char*)name {
     return nil;
 }
 
+// use this to enable/disable menus, buttons, etc.  Called on every event
+// when not implemented, then user items are always enabled
+- (BOOL)validateUserInterfaceItem:(id<NSValidatedUserInterfaceItem>)item
+{
+    // TODO: tie to menus and buttons states for enable/disable toggles
+    // https://developer.apple.com/library/archive/documentation/Cocoa/Conceptual/MenuList/Articles/EnablingMenuItems.html
+    
+    // MTKView is not doc based, so can't all super
+    //return [super validateUserInterfaceItem:anItem];
+    
+    return YES;
+}
 
 - (void)updateUIAfterLoad {
     
@@ -1299,6 +1303,8 @@ - (void)updateUIAfterLoad {
     [self findButton:"C"].hidden = isCheckerboardHidden;
     
     // menus (may want to disable, not hide)
+    // problem is crashes since menu seems to strip hidden items
+    // enabled state has to be handled in validateUserInterfaceItem
     [self findMenuItem:"Y"].hidden = isArrayHidden;
     [self findMenuItem:"F"].hidden = isFaceSliceHidden;
     [self findMenuItem:"M"].hidden = isMipHidden;
diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index 55d7f671..948a32c3 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -745,6 +745,18 @@ MyMTLPixelFormat toggleSrgbFormat(MyMTLPixelFormat format)
     return MyMTLPixelFormatInvalid;
 }
 
+const char* supercompressionName(KTX2Supercompression type)
+{
+    const char* name = "Unknown";
+    switch(type) {
+        case KTX2SupercompressionNone:    name = "None"; break;
+        case KTX2SupercompressionBasisLZ: name = "BasisLZ"; break;
+        case KTX2SupercompressionZstd:    name = "Zstd"; break;
+        case KTX2SupercompressionZlib:    name = "Zlib"; break;
+    }
+    return name;
+}
+
 // https://docs.unity3d.com/ScriptReference/Experimental.Rendering.GraphicsFormat.html
 // Unity only handles 4,5,6,8,10,12 square block dimensions
 
@@ -1316,17 +1328,6 @@ const char* textureTypeName(MyMTLTextureType textureType)
 // can use ktx2ktx2 and ktx2sc to supercompress, and kramv can use this to open and view data as a KTX1 file.
 // ignoring Basis and supercompression data, etc.
 
-// wish C++ had a defer
-struct ZSTDScope2
-{
-    ZSTDScope2(ZSTD_DCtx* ctx_) : ctx(ctx_) {}
-    ~ZSTDScope2() { ZSTD_freeDCtx(ctx); }
-    
-private:
-    ZSTD_DCtx* ctx = nullptr;
-};
-
-
 bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength, bool isInfoOnly)
 {
     if ((size_t)imageDataLength < sizeof(KTX2Header)) {
@@ -1416,12 +1417,16 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength, bool i
     header.bytesOfKeyValueData = 0;
     initProps(imageData + header2.kvdByteOffset, header2.kvdByteLength);
    
+    
     // skip parsing th elevels
     if (isInfoOnly) {
         skipImageLength = true;
         fileData = imageData;
         fileDataLength = imageDataLength;
         
+        // copy this in to return as info
+        supercompressionType = (KTX2Supercompression)header2.supercompressionScheme;
+        
         // copy these over from ktx2
         mipLevels = levels;
         
@@ -1493,73 +1498,94 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength, bool i
         
         // TODO: may need to fill out length field in fileData
         
-        // Note: specific to zstd
-        bool isZstd = header2.supercompressionScheme == KTX2SupercompressionZstd;
-        ZSTD_DCtx* dctx = nullptr;
-        if (isZstd) dctx = ZSTD_createDCtx();
-        ZSTDScope2 scope(dctx);
+        supercompressionType = (KTX2Supercompression)header2.supercompressionScheme;
         
         // need to decompress mips here
         for (uint32_t i = 0; i < header.numberOfMipmapLevels; ++i) {
             // compresssed level
             const auto& level2 = levels[i];
-            size_t srcDataSize = level2.lengthCompressed;
             const uint8_t* srcData = imageData + level2.offset;
-            
+           
             // uncompressed level
-            const auto& level1 = mipLevels[i];
-            size_t dstDataSize = level1.length * numChunks;
+            auto& level1 = mipLevels[i];
+            level1.lengthCompressed = level2.lengthCompressed; // need this for copyLevel to have enough data
             uint8_t* dstData = (uint8_t*)fileData + level1.offset; // can const_cast, since class owns data
+          
+            if (!unpackLevel(i, srcData, dstData)) {
+                return false;
+            }
             
-            // preserve lengthCompressed so kram info can display the value
-            // this field will need to be set to 0
-            
-            // This does display in kram info, but it's confusing since image was converted to ktx1
-            // and the offsets are largest first.  So for now, don't copy this in.
-            // level1.lengthCompressed = level2.lengthCompressed;
-            
-            // TODO: use basis transcoder (single file) for Basis UASTC here, then don't need libktx yet
-            // wont work for BasisLZ (which is ETC1S).
-            
-            switch(header2.supercompressionScheme) {
-                case KTX2SupercompressionZstd: {
-                    // decompress from zstd directly into ktx1 ordered chunk
-                    // Note: decode fails with FSE_decompress.
-                    auto result = ZSTD_decompressDCtx(dctx,
-                        dstData, dstDataSize,
-                        srcData, srcDataSize);
-                    
-                    if (ZSTD_isError(result)) {
-                        KLOGE("kram", "decode mip zstd failed");
-                        return false;
-                    }
-                    if (level2.length * numChunks != result) {
-                        KLOGE("kram", "decode mip zstd size not expected");
-                        return false;
-                    }
-                    break;
-                }
+            // have decompressed here, so set to 0
+            level1.lengthCompressed = 0;
+        }
+        
+        // have decompressed ktx1, so change back to None
+        supercompressionType = KTX2SupercompressionNone;
+    }
+    
+    return true;
+}
+
+bool KTXImage::unpackLevel(uint32_t mipNumber, const uint8_t* srcData, uint8_t* dstData) {
+    
+    // uncompressed level
+    uint32_t numChunks = totalChunks();
+    const auto& level = mipLevels[mipNumber];
+    size_t dstDataSize = level.length * numChunks;
+    
+    if (level.lengthCompressed == 0) {
+        memcpy(dstData, srcData, dstDataSize);
+    }
+    else {
+        size_t srcDataSize = level.lengthCompressed;
+        
+        // TODO: use basis transcoder (single file) for Basis UASTC here, then don't need libktx yet
+        // wont work for BasisLZ (which is ETC1S).
+        // copy this in to return as info
+        
+        switch(supercompressionType) {
+            case KTX2SupercompressionZstd: {
+                // decompress from zstd directly into ktx1 ordered chunk
+                // Note: decode fails with FSE_decompress.
+                ZSTD_DCtx* dctx = ZSTD_createDCtx();
+                if (!dctx)
+                    return false;
                 
-                case KTX2SupercompressionZlib: {
-                    // can use miniz or libCompression
-                    mz_ulong dstDataSizeMZ = 0;
-                    if (mz_uncompress(dstData, &dstDataSizeMZ,
-                                      srcData, srcDataSize) != MZ_OK) {
-                        KLOGE("kram", "decode mip zlib failed");
-                        return false;
-                    }
-                    if (dstDataSizeMZ != dstDataSize) {
-                        KLOGE("kram", "decode mip zlib size not expected");
-                        return false;
-                    }
-                    
-                    break;
+                auto dstDataSizeZstd = ZSTD_decompressDCtx(dctx,
+                    dstData, dstDataSize,
+                    srcData, srcDataSize);
+                ZSTD_freeDCtx(dctx);
+                
+                if (ZSTD_isError(dstDataSizeZstd)) {
+                    KLOGE("kram", "decode mip zstd failed");
+                    return false;
+                }
+                if (dstDataSizeZstd != dstDataSize) {
+                    KLOGE("kram", "decode mip zstd size not expected");
+                    return false;
+                }
+                break;
+            }
+            
+            case KTX2SupercompressionZlib: {
+                // can use miniz or libCompression
+                mz_ulong dstDataSizeMiniz = 0;
+                if (mz_uncompress(dstData, &dstDataSizeMiniz,
+                                  srcData, srcDataSize) != MZ_OK) {
+                    KLOGE("kram", "decode mip zlib failed");
+                    return false;
                 }
-                    
-                // already checked at top of function
-                default: {
+                if (dstDataSizeMiniz != dstDataSize) {
+                    KLOGE("kram", "decode mip zlib size not expected");
                     return false;
                 }
+                
+                break;
+            }
+                
+            // already checked at top of function
+            default: {
+                return false;
             }
         }
     }
diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h
index b7971682..0174786d 100644
--- a/libkram/kram/KTXImage.h
+++ b/libkram/kram/KTXImage.h
@@ -305,6 +305,15 @@ class KTXImage {
     void reserveImageData();
     vector<uint8_t>& imageData();
 
+    // for KTX2 files, the mips can be compressed using various encoders
+    bool isSupercompressed() const { return isKTX2() && !mipLevels.empty() && mipLevels[0].lengthCompressed != 0; }
+    
+    bool isKTX1() const { return !skipImageLength; }
+    bool isKTX2() const { return skipImageLength; }
+    
+    // can use on ktx1/2 files, does a decompress if needed
+    bool unpackLevel(uint32_t mipNumber, const uint8_t* srcData, uint8_t* dstData);
+    
 private:
     bool openKTX2(const uint8_t* imageData, size_t imageDataLength, bool isInfoOnly);
 
@@ -321,9 +330,10 @@ class KTXImage {
     uint32_t height;
     uint32_t depth;
 
-    // for ktxa and ktx2
+    // for ktx2
     bool skipImageLength = false;
-
+    KTX2Supercompression supercompressionType = KTX2SupercompressionNone;
+    
     KTXHeader header;  // copy of KTXHeader, so can be modified and then written back
 
     // write out only string/string props, for easy of viewing
@@ -336,6 +346,8 @@ class KTXImage {
     const uint8_t* fileData;  // mmap data
 };
 
+const char* supercompressionName(KTX2Supercompression type);
+
 // Generic format helpers.  All based on the ubiquitous type.
 bool isFloatFormat(MyMTLPixelFormat format);
 bool isHalfFormat(MyMTLPixelFormat format);
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 463bef9e..aaacdbc0 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -1469,8 +1469,8 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage,
     
     int32_t numChunks = srcImage.totalChunks();
     
-    // add up lengtha and lengthCompressed
-    if (srcImage.mipLevels[0].lengthCompressed > 0) {
+    // add up lengths and lengthCompressed
+    if (srcImage.isSupercompressed()) {
         uint64_t length = 0;
         uint64_t lengthCompressed = 0;
 
@@ -1485,12 +1485,15 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage,
         isMB = (length > (512 * 1024));
         double lengthF = isMB ? length / (1024.0f * 1024.0f) : length / 1024.0f;
         double lengthCompressedF = isMB ? lengthCompressed / (1024.0f * 1024.0f) : lengthCompressed / 1024.0f;
-        
+    
         append_sprintf(info,
-            "sizc: %0.3f,%0.3f %s %d%%\n",
+            "sizc: %0.3f,%0.3f %s %d%%\n"
+            "comp: %s\n",
             lengthF, lengthCompressedF,
             isMB ? "MB" : "KB",
-            (int)percent);
+            (int)percent,
+            supercompressionName(srcImage.supercompressionType)
+        );
     }
                            
     
From 2b743546084edad6f36802c63b8e9740bc6e4a7c Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 24 May 2021 07:53:39 -0700
Subject: [PATCH 070/901] kramv - update fast load path from staging MTLBuffer
 to MTLTexture, fix archive increment

This decompresses, aliases, or copies directly from blocks and compressed blocks.
The blit encoder is used to go direct to private textures via a 128MB staging MTLBuffer.
This allows the entire level to be copied at once, and the the blits reference offsets into the buffer.
The buffer is just a linear allocator right now, no circular usage.
Use completion handler.  Loader really needs a queue of pending textures, and also avoid level allocation on the copy path, and use a part of staging texture.
Fix isInfoOnly handling since levelSize was divided by length twice.
---
 kramv/KramLoader.h        |  19 +-
 kramv/KramLoader.mm       | 417 +++++++++++++++++++++++---------------
 kramv/KramRenderer.mm     |  17 +-
 kramv/KramViewerMain.mm   |   2 +-
 libkram/kram/KTXImage.cpp |  18 +-
 libkram/kram/KTXImage.h   |   8 +-
 libkram/kram/KramConfig.h |  26 ++-
 7 files changed, 301 insertions(+), 206 deletions(-)

diff --git a/kramv/KramLoader.h b/kramv/KramLoader.h
index 2b14b16f..d9ecedda 100644
--- a/kramv/KramLoader.h
+++ b/kramv/KramLoader.h
@@ -10,10 +10,11 @@
 #import <Foundation/Foundation.h>
 
 // protocol requires imports
-#import <Metal/MTLPixelFormat.h>
-#import <Metal/MTLTexture.h>
 #import <Metal/MTLBlitCommandEncoder.h>
+#import <Metal/MTLCommandBuffer.h>
 #import <Metal/MTLDevice.h>
+#import <Metal/MTLPixelFormat.h>
+#import <Metal/MTLTexture.h>
 
 #endif
 
@@ -31,18 +32,8 @@
 // from url (mmap)
 - (nullable id<MTLTexture>)loadTextureFromURL:(nonnull NSURL *)url originalFormat:(nullable MTLPixelFormat*)originalFormat;
 
-@property (retain, nonatomic, readwrite, nonnull) id<MTLDevice> device;
-
-// test this after load, and use a MTLBlitEncoder to autogen mips
-@property (nonatomic, readwrite, getter=isMipgenNeeded) BOOL mipgenNeeded;
-
-@end
-
-//-------------------------------------
-
-// This loads KTX and PNG data synchronously.  Will likely move to only loading KTX files, with a png -> ktx conversion.
-// The underlying KTXImage is not yet returned to the caller, but would be useful for prop queries.
-@interface KramBlitLoader : NSObject
+// handle auto-mipgen and upload mips from staging MTLBuffer to mips of various private MTLTexture
+- (void)uploadTexturesIfNeeded:(nonnull id<MTLBlitCommandEncoder>)blitEncoder commandBuffer:(nonnull id<MTLCommandBuffer>)commandBuffer;
 
 @property (retain, nonatomic, readwrite, nonnull) id<MTLDevice> device;
 
diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index b4f75e62..b4cf12c9 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -31,13 +31,42 @@
     return string([[[NSString stringWithUTF8String:text.c_str()] lowercaseString] UTF8String]);
 }
 
-//-----------------------------------------------
+// defer data need to blit staging MTLBuffer to MTLTexture at the start of rendering
+struct KramBlit
+{
+    uint32_t w;
+    uint32_t h;
+    uint32_t chunkNum;
+    uint32_t mipLevelNumber;
+    
+    uint64_t mipStorageSize;
+    uint64_t mipOffset;
     
-// blit path for ktxa is commented out to simplify loader, will move that to an async load
-// and simplify the loader API by making this a loader class.
+    uint32_t textureIndex;
+    uint32_t bytesPerRow;
+    bool     is3D;
+};
 
+//-----------------------------------------------
+    
 @implementation KramLoader {
-    BOOL _isMipgenNeeded;
+    // only one of these for now
+    id<MTLBuffer> _buffer;
+    uint8_t* _data;
+    uint8_t _bufferOffset;
+    
+    vector<KramBlit> _blits;
+    NSMutableArray<id<MTLTexture>>* _blitTextures;
+    NSMutableArray<id<MTLTexture>>* _mipgenTextures;
+}
+
+- (instancetype)init {
+    self = [super init];
+    
+    _blitTextures = [[NSMutableArray alloc] init];
+    _mipgenTextures = [[NSMutableArray alloc] init];
+    
+    return self;
 }
 
 - (nullable id<MTLTexture>)loadTextureFromData:(nonnull NSData*)imageData originalFormat:(nullable MTLPixelFormat*)originalFormat {
@@ -78,6 +107,7 @@ - (BOOL)decodeImageIfNeeded:(KTXImage&)image imageDecoded:(KTXImage&)imageDecode
 }
 
 #if SUPPORT_RGB
+
 inline bool isInternalRGBFormat(MyMTLPixelFormat format) {
     bool isInternal = false;
     switch(format) {
@@ -120,17 +150,39 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) {
 {
     KTXImage image;
     
-    // true keeps compressed mips on KTX2 and aliases original mip data
-    // but have decode etc2/asct path below that uncompressed mips
+    // isInfoOnly = true keeps compressed mips on KTX2 and aliases original mip data
+    // but have decode etc2/astc path below that uncompressed mips
     // and the rgb conversion path below as well in the viewer.
     // games would want to decompress directly from aliased mmap ktx2 data into staging
     // or have blocks pre-twiddled in hw morton order.
     
-    bool isInfoOnly = false;
+    bool isInfoOnly = true;
     if (!image.open(imageData, imageDataLength, isInfoOnly)) {
         return nil;
     }
     
+    // see if it needs decode first
+    bool needsDecode = false;
+    if (isInternalRGBFormat(image.pixelFormat)) {
+        needsDecode = true;
+    }
+#if DO_DECODE
+    else if (isETCFormat(image.pixelFormat)) {
+        needsDecode = true;
+    }
+    else if (isASTCFormat(image.pixelFormat)) {
+        needsDecode = true;
+    }
+#endif
+    
+    if (needsDecode) {
+        isInfoOnly = false;
+        
+        if (!image.open(imageData, imageDataLength, isInfoOnly)) {
+            return nil;
+        }
+    }
+    
 #if SUPPORT_RGB
     if (isInternalRGBFormat(image.pixelFormat)) {
         // loads and converts image to RGBA version
@@ -162,18 +214,23 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) {
     }
 #endif
    
-    
     if (originalFormat != nullptr) {
         *originalFormat = (MTLPixelFormat)image.pixelFormat;
     }
     
-    KTXImage imageDecoded;
-    bool useImageDecoded = false;
-    if (![self decodeImageIfNeeded:image imageDecoded:imageDecoded useImageDecoded:useImageDecoded]) {
-        return nil;
+    if (needsDecode) {
+        KTXImage imageDecoded;
+        bool useImageDecoded = false;
+        if (![self decodeImageIfNeeded:image imageDecoded:imageDecoded useImageDecoded:useImageDecoded]) {
+            return nil;
+        }
+        
+        return [self loadTextureFromImage:useImageDecoded ? imageDecoded : image];
+    }
+    else {
+        // fast load path directly from mmap'ed data, decompress direct to staging
+        return [self blitTextureFromImage:image];
     }
-    
-    return [self loadTextureFromImage:useImageDecoded ? imageDecoded : image];
 }
 
 static int32_t numberOfMipmapLevels(const Image& image) {
@@ -212,7 +269,7 @@ static int32_t numberOfMipmapLevels(const Image& image) {
     // TODO: replace this with code that gens a KTXImage from png (and cpu mips)
     // instead of needing to use autogenmip that has it's own filters (probably a box)
     
-    id<MTLTexture> texture = [self createTexture:image];
+    id<MTLTexture> texture = [self createTexture:image isPrivate:false];
     if (!texture) {
         return nil;
     }
@@ -238,20 +295,12 @@ static int32_t numberOfMipmapLevels(const Image& image) {
     
     // have to schedule autogen inside render using MTLBlitEncoder
     if (image.header.numberOfMipmapLevels > 1) {
-        _isMipgenNeeded = YES;
+        [_mipgenTextures addObject: texture];
     }
     
     return texture;
 }
 
-- (BOOL)isMipgenNeeded {
-    return _isMipgenNeeded;
-}
-
-- (void)setMipgenNeeded:(BOOL)enabled {
-    _isMipgenNeeded = enabled;
-}
-
 - (nullable id<MTLTexture>)loadTextureFromURL:(nonnull NSURL *)url originalFormat:(nullable MTLPixelFormat*)originalFormat {
     
     const char *path = [url.absoluteURL.path UTF8String];
@@ -295,7 +344,7 @@ - (void)setMipgenNeeded:(BOOL)enabled {
     return [self loadTextureFromData:mmapHelper.data() imageDataLength:(int32_t)mmapHelper.dataLength() originalFormat:originalFormat];
 }
 
-- (nullable id<MTLTexture>)createTexture:(KTXImage&)image {
+- (nullable id<MTLTexture>)createTexture:(KTXImage&)image isPrivate:(bool)isPrivate {
     MTLTextureDescriptor *textureDescriptor = [[MTLTextureDescriptor alloc] init];
 
     // Indicate that each pixel has a blue, green, red, and alpha channel, where each channel is
@@ -315,6 +364,10 @@ - (void)setMipgenNeeded:(BOOL)enabled {
     // and only get box filtering in API-level filters.  But would cut storage.
     textureDescriptor.mipmapLevelCount = MAX(1, image.header.numberOfMipmapLevels);
 
+    // this is needed for blit
+    if (isPrivate)
+        textureDescriptor.storageMode = MTLStorageModePrivate;
+    
     // only do this for viewer
     // but allows encoded textures to enable/disable their sRGB state.
     // Since the view isn't accurate, will probably pull this out.
@@ -340,11 +393,6 @@ - (void)setMipgenNeeded:(BOOL)enabled {
 // Could repack ktx data into ktxa before writing to temporary file, or when copying NSData into MTLBuffer.
 - (nullable id<MTLTexture>)loadTextureFromImage:(KTXImage &)image
 {
-    id<MTLTexture> texture = [self createTexture:image];
-    
-    //--------------------------------
-    // upload mip levels
-    
     // TODO: about aligning to 4k for base + length
     // http://metalkit.org/2017/05/26/working-with-memory-in-metal-part-2.html
     
@@ -359,19 +407,42 @@ - (void)setMipgenNeeded:(BOOL)enabled {
     
     Int2 blockDims = image.blockDims();
     
+    uint32_t numChunks = image.totalChunks();
+    
+    // TODO: reuse staging _buffer and _bufferOffset here, these large allocations take time
+    vector<uint8_t> mipStorage;
+    mipStorage.resize(image.mipLevels[0].length * numChunks); // enough to hold biggest mip
+    
+    //-----------------
+    
+    id<MTLTexture> texture = [self createTexture:image isPrivate:false];
+    
+    const uint8_t* srcLevelData = image.fileData;
+    
     for (int mipLevelNumber = 0; mipLevelNumber < numMips; ++mipLevelNumber) {
         // there's a 4 byte levelSize for each mipLevel
         // the mipLevel.offset is immediately after this
         
-        // this is offset to a given level
         const KTXImageLevel& mipLevel = image.mipLevels[mipLevelNumber];
         
+        // this is offset to a given level
+        uint64_t mipBaseOffset = mipLevel.offset;
+        
+        // unpack the whole level in-place
+        if (image.isSupercompressed()) {
+            image.unpackLevel(mipLevelNumber, image.fileData + mipLevel.offset, mipStorage.data());
+            srcLevelData = mipStorage.data();
+            
+            // going to upload from mipStorage temp array
+            mipBaseOffset = 0;
+        }
+        
         // only have face, face+array, or slice but this handles all cases
         for (int array = 0; array < numArrays; ++array) {
             for (int face = 0; face < numFaces; ++face) {
                 for (int slice = 0; slice < numSlices; ++slice) {
     
-                    int32_t bytesPerRow = 0;
+                    uint32_t bytesPerRow = 0;
                     
                     // 1D/1DArray textures set bytesPerRow to 0
                     if ((MTLTextureType)image.textureType != MTLTextureType1D &&
@@ -380,61 +451,62 @@ - (void)setMipgenNeeded:(BOOL)enabled {
                         // for compressed, bytesPerRow needs to be multiple of block size
                         // so divide by the number of blocks making up the height
                         //int xBlocks = ((w + blockDims.x - 1) / blockDims.x);
-                        int32_t yBlocks = ((h + blockDims.y - 1) / blockDims.y);
+                        uint32_t yBlocks = ((h + blockDims.y - 1) / blockDims.y);
                         
                         // Calculate the number of bytes per row in the image.
                         // for compressed images this is xBlocks * blockSize
-                        bytesPerRow = (int32_t)mipLevel.length / yBlocks;
+                        bytesPerRow = (uint32_t)mipLevel.length / yBlocks;
                     }
                     
-                    int32_t sliceOrArrayOrFace;
+                    int32_t chunkNum = 0;
                                     
                     if (image.header.numberOfArrayElements > 0) {
                         // can be 1d, 2d, or cube array
-                        sliceOrArrayOrFace = array;
+                        chunkNum = array;
                         if (numFaces > 1) {
-                            sliceOrArrayOrFace = 6 * sliceOrArrayOrFace + face;
+                            chunkNum = 6 * chunkNum + face;
                         }
                     }
                     else {
                         // can be 1d, 2d, or 3d
-                        sliceOrArrayOrFace = slice;
+                        chunkNum = slice;
                         if (numFaces > 1) {
-                            sliceOrArrayOrFace = face;
+                            chunkNum = face;
                         }
                     }
                     
                     // this is size of one face/slice/texture, not the levels size
-                    int32_t mipStorageSize = (int32_t)mipLevel.length;
+                    uint64_t mipStorageSize = mipLevel.length;
+                    
+                    uint64_t mipOffset = mipBaseOffset + chunkNum * mipStorageSize;
                     
-                    int32_t mipOffset = (int32_t)mipLevel.offset + sliceOrArrayOrFace * mipStorageSize;
                     // offset into the level
-                    const uint8_t *srcBytes = image.fileData + mipOffset;
-            
-                    // had blitEncoder support here
+                    const uint8_t *srcBytes = srcLevelData + mipOffset;
                     
                     {
                         // Note: this only works for managed/shared textures.
                         // For private upload to buffer and then use blitEncoder to copy to texture.
+                        // See KramBlitLoader for that.  This is all synchronous upload too.
+                        //
+                        // Note: due to API limit we can only copy one chunk at a time.  With KramBlitLoader
+                        // can copy the whole level to buffer, and then reference chunks within.
+                        
                         bool isCubemap = image.textureType == MyMTLTextureTypeCube ||
                                          image.textureType == MyMTLTextureTypeCubeArray;
-                        bool is3D = image.textureType == MyMTLTextureType3D;
+                        bool is3D      = image.textureType == MyMTLTextureType3D;
                         bool is2DArray = image.textureType == MyMTLTextureType2DArray;
                         bool is1DArray = image.textureType == MyMTLTextureType1DArray;
                         
-                        // cpu copy the bytes from the data object into the texture
+                        // sync cpu copy the bytes from the data object into the texture
                         MTLRegion region = {
                             { 0, 0, 0 }, // MTLOrigin
                             { (NSUInteger)w,  (NSUInteger)h, 1 }  // MTLSize
                         };
                         
-                        // TODO: revist how loading is done to load entire levels
-                        // otherwise too many replaceRegion calls.   Data is already packed by mip.
-                        
                         if (is1DArray) {
                             [texture replaceRegion:region
                                         mipmapLevel:mipLevelNumber
-                                            slice:sliceOrArrayOrFace
+                                            slice:chunkNum
                                           withBytes:srcBytes
                                         bytesPerRow:bytesPerRow
                                         bytesPerImage:0];
@@ -442,17 +514,18 @@ - (void)setMipgenNeeded:(BOOL)enabled {
                         else if (isCubemap) {
                             [texture replaceRegion:region
                                         mipmapLevel:mipLevelNumber
-                                            slice:sliceOrArrayOrFace
+                                            slice:chunkNum
                                           withBytes:srcBytes
                                         bytesPerRow:bytesPerRow
                                         bytesPerImage:0];
                         }
                         else if (is3D) {
-                            region.origin.z = sliceOrArrayOrFace;
+                            region.origin.z = chunkNum;
+                            chunkNum = 0;
                             
                             [texture replaceRegion:region
                                         mipmapLevel:mipLevelNumber
-                                             slice:0
+                                             slice:chunkNum
                                           withBytes:srcBytes
                                         bytesPerRow:bytesPerRow
                                      bytesPerImage:mipStorageSize]; // only for 3d
@@ -460,7 +533,7 @@ - (void)setMipgenNeeded:(BOOL)enabled {
                         else if (is2DArray) {
                             [texture replaceRegion:region
                                         mipmapLevel:mipLevelNumber
-                                             slice:array
+                                             slice:chunkNum
                                           withBytes:srcBytes
                                         bytesPerRow:bytesPerRow
                                      bytesPerImage:0];
@@ -483,84 +556,90 @@ - (void)setMipgenNeeded:(BOOL)enabled {
     return texture;
 }
 
-@end
-
 //--------------------------
 
-
-
-
-@implementation KramBlitLoader {
-    // this must be created in render, and then do blits into this
-    id<MTLBlitCommandEncoder> _blitEncoder;
-    id<MTLBuffer> _buffer;
-    uint8_t* data;
-    size_t dataSize;
-}
-
-- (nonnull instancetype)init {
-    self = [super init];
+- (void)createStagingBufffer:(uint64_t)dataSize {
     
     // must be aligned to pagesize() or can't use with newBufferWithBytesNoCopy
     // enough to upload 4k x 4k @ 4 bytes no mips, careful with array and cube that get too big
-    dataSize = 64*1024*1024;
     
-    posix_memalign((void**)&data, getpagesize(), dataSize);
+    // allocate system memory for bufffer, can memcopy to this
+    posix_memalign((void**)&_data, getpagesize(), dataSize);
     
     // allocate memory for circular staging buffer, only need to memcpy to this
     // but need a rolling buffer atop to track current begin/end.
     
-    _buffer = [_device newBufferWithBytesNoCopy:data
+    _buffer = [_device newBufferWithBytesNoCopy:_data
                                             length:dataSize
                                            options:MTLResourceStorageModeShared
                                        deallocator: ^(void *macroUnusedArg(pointer), NSUInteger macroUnusedArg(length)) {
-                                            delete data;
+                                            delete _data;
                                             }
     ];
-    return self;
 }
 
-- (nullable id<MTLTexture>)createTexture:(KTXImage&)image {
-    MTLTextureDescriptor *textureDescriptor = [[MTLTextureDescriptor alloc] init];
-
-    // Indicate that each pixel has a blue, green, red, and alpha channel, where each channel is
-    // an 8-bit unsigned normalized value (i.e. 0 maps to 0.0 and 255 maps to 1.0)
-    textureDescriptor.textureType = (MTLTextureType)image.textureType;
-    textureDescriptor.pixelFormat = (MTLPixelFormat)image.pixelFormat;
-
-    // Set the pixel dimensions of the texture
-    textureDescriptor.width = image.width;
-    textureDescriptor.height = MAX(1, image.height);
-    textureDescriptor.depth = MAX(1, image.depth);
 
-    textureDescriptor.arrayLength = MAX(1, image.header.numberOfArrayElements);
 
-    // ignoring 0 (auto mip), but might need to support for explicit formats
-    // must have hw filtering support for format, and 32f filtering only first appeared on A14/M1
-    // and only get box filtering in API-level filters.  But would cut storage.
-    textureDescriptor.mipmapLevelCount = MAX(1, image.header.numberOfMipmapLevels);
+- (void)uploadTexturesIfNeeded:(id<MTLBlitCommandEncoder>)blitEncoder commandBuffer:(id<MTLCommandBuffer>)commandBuffer {
+    if (_mipgenTextures.count > 0) {
+        for (id<MTLTexture> texture in _mipgenTextures) {
+            // autogen mips will include srgb conversions, so toggling srgb on/off isn't quite correct
+            [blitEncoder generateMipmapsForTexture:texture];
+        }
+        
+        // reset the arra
+        [_mipgenTextures removeAllObjects];
+    }
 
-    // needed for blit,
-    textureDescriptor.storageMode = MTLStorageModePrivate;
-    
-    // only do this for viewer
-    // but allows encoded textures to enable/disable their sRGB state.
-    // Since the view isn't accurate, will probably pull this out.
-    // Keep usageRead set by default.
-    //textureDescriptor.usage = MTLTextureUsageShaderRead;
-    
-    // this was so that could toggle srgb on/off, but mips are built linear and encoded as lin or srgb
-    // in the encoded formats so this wouldn't accurately reflect with/without srgb.
-    //textureDescriptor.usage |= MTLTextureUsagePixelFormatView;
-    
-    // Create the texture from the device by using the descriptor
-    id<MTLTexture> texture = [self.device newTextureWithDescriptor:textureDescriptor];
-    if (!texture) {
-        KLOGE("kramv", "could not allocate texture");
-        return nil;
+    if (!_blits.empty()) {
+        // now upload from staging MTLBuffer to private MTLTexture
+        for (const auto& blit: _blits) {
+            MTLRegion region = {
+                { 0, 0, 0 }, // MTLOrigin
+                { (NSUInteger)blit.w,  (NSUInteger)blit.h, 1 }  // MTLSize
+            };
+            
+            uint32_t chunkNum = blit.chunkNum;
+            if (blit.is3D) {
+                region.origin.z = chunkNum;
+                chunkNum = 0;
+            }
+            
+            //assert(blit.textureIndex < _blitTextures.count);
+            id<MTLTexture> texture = _blitTextures[blit.textureIndex];
+            
+            [blitEncoder copyFromBuffer:_buffer
+                           sourceOffset:blit.mipOffset
+                  sourceBytesPerRow:blit.bytesPerRow
+                sourceBytesPerImage:blit.mipStorageSize
+                         sourceSize:region.size
+             
+                          toTexture:texture
+                   destinationSlice:chunkNum
+                   destinationLevel:blit.mipLevelNumber
+                  destinationOrigin:region.origin
+                            options:MTLBlitOptionNone
+             ];
+        }
+        
+        // reset the array and buffer offset, so can upload more textures
+        _blits.clear();
+        [_blitTextures removeAllObjects];
+        
+        // TODO: use atomic on this
+        uint32_t bufferOffsetCopy = _bufferOffset;
+        [commandBuffer addCompletedHandler:^(id<MTLCommandBuffer> /* buffer */)
+         {
+            // can only reset this once gpu completes the blits above
+            // also guard against addding to this in blitTextureFromImage when completion handler will reset to 0
+            if (_bufferOffset == bufferOffsetCopy)
+                _bufferOffset = 0;
+        }];
     }
-    
-    return texture;
+}
+
+inline uint64_t alignOffset(uint64_t offset, uint64_t alignment) {
+    return offset + (alignment - offset % alignment) % alignment;
 }
 
 // Has a synchronous upload via replaceRegion that only works for shared/managed (f.e. ktx),
@@ -568,29 +647,39 @@ - (nonnull instancetype)init {
 // Could repack ktx data into ktxa before writing to temporary file, or when copying NSData into MTLBuffer.
 - (nullable id<MTLTexture>)blitTextureFromImage:(KTXImage &)image
 {
-    id<MTLTexture> texture = [self createTexture:image];
+    if (_buffer == nil) {
+        // this is only 4k x 4x @ RGBA8u with mips, 8k x 8k compressed with mips
+        [self createStagingBufffer: 128*1024*1024];
+    }
+    
+    // TODO: first make sure have enough buffer to upload, otherwise need to queue this image
+    // try not to load much until that's established
+    // queue would need KTXImage and mmap to stay alive long enough for queue to be completed
+    if (_bufferOffset != 0) {
+        return nil;
+    }
+    
+    id<MTLTexture> texture = [self createTexture:image isPrivate:true];
     
     // Note: always starting at 0 here, since kramv is only uploading 1 texture
     // but a real uploader would upload until buffer full, and then reset this back to 0
     // A circular buffer if large enough to support multiple uploads over time.
     // This can be a lot of temporary memory and must complete upload before changing.
     
-    uint64_t bufferOffset = 0;
-    
     //--------------------------------
     // upload mip levels
     
     // TODO: about aligning to 4k for base + length
     // http://metalkit.org/2017/05/26/working-with-memory-in-metal-part-2.html
     
-    int32_t w = image.width;
-    int32_t h = image.height;
-    int32_t d = image.depth;
+    uint32_t w = image.width;
+    uint32_t h = image.height;
+    uint32_t d = image.depth;
     
-    int32_t numMips     = MAX(1, image.header.numberOfMipmapLevels);
-    int32_t numArrays   = MAX(1, image.header.numberOfArrayElements);
-    int32_t numFaces    = MAX(1, image.header.numberOfFaces);
-    int32_t numSlices   = MAX(1, image.depth);
+    uint32_t numMips     = MAX(1, image.header.numberOfMipmapLevels);
+    uint32_t numArrays   = MAX(1, image.header.numberOfArrayElements);
+    uint32_t numFaces    = MAX(1, image.header.numberOfFaces);
+    uint32_t numSlices   = MAX(1, image.depth);
     
     Int2 blockDims = image.blockDims();
     
@@ -603,22 +692,35 @@ - (nonnull instancetype)init {
     const uint8_t* mipData = (const uint8_t*)image.fileData;
     bufferOffsets.resize(image.mipLevels.size());
     
-    for (int32_t i = 0; i < numMips; ++i) {
+    uint32_t bufferOffset = _bufferOffset;
+    uint32_t numChunks = image.totalChunks();
+    
+    for (uint32_t i = 0; i < numMips; ++i) {
         const KTXImageLevel& mipLevel = image.mipLevels[i];
         
         // pad buffer offset to a multiple of the blockSize
-        bufferOffset += (blockSize - 1) - (bufferOffset & blockSize);
-        bufferOffsets[i] = bufferOffset;
-        bufferOffset += mipLevel.length;
+        bufferOffset = alignOffset(bufferOffset, blockSize);
         
         // this may have to decompress the level data
         image.unpackLevel(i, mipData + mipLevel.offset, bufferData + bufferOffset);
+        
+        bufferOffsets[i] = bufferOffset;
+        bufferOffset += mipLevel.length * numChunks;
     }
     
-    // blit encode calls must all be submitted to an encoder
-    // but may not have to be on the render thrad?
     
-    for (int32_t mipLevelNumber = 0; mipLevelNumber < numMips; ++mipLevelNumber) {
+    // Should this be split off after cpu upload, could code store enough
+    // in a vector to jettison the KTXImage.  Also need a queue of textures
+    // that are not fully loaded or haven't started if sharing the staging buffer.
+    // Note that it is just system ram, and can have allocations stored into it
+    // and can be viewed in the debugger and can do memcpy to it above.
+    
+    //--------------------
+    
+    // blit encoder calls must all be submitted to an open MTLBlitCommandEncoder,
+    // but may not have to be on the render thread?
+    
+    for (uint32_t mipLevelNumber = 0; mipLevelNumber < numMips; ++mipLevelNumber) {
         // there's a 4 byte levelSize for each mipLevel
         // the mipLevel.offset is immediately after this
         
@@ -626,11 +728,11 @@ - (nonnull instancetype)init {
         const KTXImageLevel& mipLevel = image.mipLevels[mipLevelNumber];
         
         // only have face, face+array, or slice but this handles all cases
-        for (int array = 0; array < numArrays; ++array) {
-            for (int face = 0; face < numFaces; ++face) {
-                for (int slice = 0; slice < numSlices; ++slice) {
+        for (uint32_t array = 0; array < numArrays; ++array) {
+            for (uint32_t face = 0; face < numFaces; ++face) {
+                for (uint32_t slice = 0; slice < numSlices; ++slice) {
     
-                    int32_t bytesPerRow = 0;
+                    uint32_t bytesPerRow = 0;
                     
                     // 1D/1DArray textures set bytesPerRow to 0
                     if ((MTLTextureType)image.textureType != MTLTextureType1D &&
@@ -639,14 +741,14 @@ - (nonnull instancetype)init {
                         // for compressed, bytesPerRow needs to be multiple of block size
                         // so divide by the number of blocks making up the height
                         //int xBlocks = ((w + blockDims.x - 1) / blockDims.x);
-                        int32_t yBlocks = ((h + blockDims.y - 1) / blockDims.y);
+                        uint32_t yBlocks = ((h + blockDims.y - 1) / blockDims.y);
                         
                         // Calculate the number of bytes per row in the image.
                         // for compressed images this is xBlocks * blockSize
-                        bytesPerRow = (int32_t)mipLevel.length / yBlocks;
+                        bytesPerRow = mipLevel.length / yBlocks;
                     }
                     
-                    int32_t chunkNum;
+                    uint32_t chunkNum = 0;
                                     
                     if (image.header.numberOfArrayElements > 1) {
                         // can be 1d, 2d, or cube array
@@ -668,34 +770,24 @@ - (nonnull instancetype)init {
                     
                     // Have uploaded to buffer in same order visiting chunks.
                     // Note: no call on MTLBlitEncoder to copy entire level of mips like glTexImage3D
-                    uint64_t mipOffset =  bufferOffsets[mipLevelNumber] + chunkNum * mipStorageSize;
+                    uint64_t mipOffset = bufferOffsets[mipLevelNumber] + chunkNum * mipStorageSize;
                     
                     {
                         bool is3D = image.textureType == MyMTLTextureType3D;
                         
-                        // cpu copy the bytes from the data object into the texture
-                        MTLRegion region = {
-                            { 0, 0, 0 }, // MTLOrigin
-                            { (NSUInteger)w,  (NSUInteger)h, 1 }  // MTLSize
-                        };
-                        
-                        if (is3D) {
-                            region.origin.z = chunkNum;
-                            chunkNum = 0;
-                        }
-                        
-                        [_blitEncoder copyFromBuffer:_buffer
-                                       sourceOffset:mipOffset
-                              sourceBytesPerRow:bytesPerRow
-                            sourceBytesPerImage:mipStorageSize
-                                     sourceSize:region.size
-                         
-                                      toTexture:texture
-                               destinationSlice:chunkNum
-                               destinationLevel:mipLevelNumber
-                              destinationOrigin:region.origin
-                                        options:MTLBlitOptionNone
-                         ];
+                        _blits.push_back({
+                            // use named inits here
+                             w, h,
+                             chunkNum,
+                            
+                             mipLevelNumber,
+                             mipStorageSize,
+                             mipOffset,
+                            
+                             (uint32_t)_blitTextures.count,
+                             bytesPerRow,
+                             is3D // could derive from textureIndex lookup
+                        });
                     }
                 }
             }
@@ -704,9 +796,12 @@ - (nonnull instancetype)init {
         mipDown(w, h, d);
     }
         
-    // this only affect managed textures
-    [_blitEncoder optimizeContentsForGPUAccess:texture];
+    // everything succeded, so advance the offset
+    _bufferOffset = bufferOffset;
+    [_blitTextures addObject: texture];
     
+    // this texture cannot be used until buffer uploads complete
+    // but those happen at beginning of frame, so can attach to shaders, etc
     return texture;
 }
 
diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 14cce950..eb42a481 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -742,7 +742,7 @@ - (void)drawInMTKView:(nonnull MTKView *)view
 
         _uniformBufferIndex = (_uniformBufferIndex + 1) % MaxBuffersInFlight;
 
-        id <MTLCommandBuffer> commandBuffer = [_commandQueue commandBuffer];
+        id<MTLCommandBuffer> commandBuffer = [_commandQueue commandBuffer];
         commandBuffer.label = @"MyCommand";
 
         __block dispatch_semaphore_t block_sema = _inFlightSemaphore;
@@ -758,18 +758,11 @@ - (void)drawInMTKView:(nonnull MTKView *)view
         
         // also use to readback pixels
         // also use for async texture upload
-        bool needsBlit = _loader.isMipgenNeeded && _colorMap.mipmapLevelCount > 1;
-        if (needsBlit) {
-            id<MTLBlitCommandEncoder> blitEncoder = [commandBuffer blitCommandEncoder];
+        id<MTLBlitCommandEncoder> blitEncoder = [commandBuffer blitCommandEncoder];
+        if (blitEncoder)
+        {
             blitEncoder.label = @"MyBlitEncoder";
-            
-            // autogen mips will include srgb conversions, so toggling srgb on/off isn't quite correct
-            if (_loader.mipgenNeeded) {
-                [blitEncoder generateMipmapsForTexture:_colorMap];
-                
-                _loader.mipgenNeeded = NO;
-            }
-        
+            [_loader uploadTexturesIfNeeded:blitEncoder commandBuffer:commandBuffer];
             [blitEncoder endEncoding];
         }
         
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 4cbf359b..e1ffb90d 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -1959,7 +1959,7 @@ - (BOOL)loadTextureFromArchive:(const char*)filename timestamp:(double)timestamp
         _noImageLoaded = NO;
     }
 
-    _showSettings->isArchive = false;
+    _showSettings->isArchive = true;
    
     // show/hide button
     [self updateUIAfterLoad];
diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index 948a32c3..ff45debc 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -1417,7 +1417,6 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength, bool i
     header.bytesOfKeyValueData = 0;
     initProps(imageData + header2.kvdByteOffset, header2.kvdByteLength);
    
-    
     // skip parsing th elevels
     if (isInfoOnly) {
         skipImageLength = true;
@@ -1433,13 +1432,10 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength, bool i
         // copy the original ktx2 levels, this includes mip compression
         bool isCompressed =
             (mipLevels[0].lengthCompressed > 0) &&
-            (mipLevels[0].length != mipLevels[0].lengthCompressed);
+            ((mipLevels[0].length * numChunks) != mipLevels[0].lengthCompressed);
         
-        for (auto& level : mipLevels) {
-            level.length /= numChunks;
-            
-            // this indicates not compressed
-            if (!isCompressed) {
+        if (!isCompressed) {
+            for (auto& level : mipLevels) {
                 level.lengthCompressed = 0;
             }
         }
@@ -1472,6 +1468,7 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength, bool i
             
             // the offsets are reversed in ktx2 file
             level1.offset = level2.offset;
+            assert(level1.lengthCompressed == 0);
             
             if (level1.length != level2.length)
             {
@@ -1547,14 +1544,9 @@ bool KTXImage::unpackLevel(uint32_t mipNumber, const uint8_t* srcData, uint8_t*
             case KTX2SupercompressionZstd: {
                 // decompress from zstd directly into ktx1 ordered chunk
                 // Note: decode fails with FSE_decompress.
-                ZSTD_DCtx* dctx = ZSTD_createDCtx();
-                if (!dctx)
-                    return false;
-                
-                auto dstDataSizeZstd = ZSTD_decompressDCtx(dctx,
+                size_t dstDataSizeZstd = ZSTD_decompress(
                     dstData, dstDataSize,
                     srcData, srcDataSize);
-                ZSTD_freeDCtx(dctx);
                 
                 if (ZSTD_isError(dstDataSizeZstd)) {
                     KLOGE("kram", "decode mip zstd failed");
diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h
index 0174786d..19c30f05 100644
--- a/libkram/kram/KTXImage.h
+++ b/libkram/kram/KTXImage.h
@@ -306,7 +306,7 @@ class KTXImage {
     vector<uint8_t>& imageData();
 
     // for KTX2 files, the mips can be compressed using various encoders
-    bool isSupercompressed() const { return isKTX2() && !mipLevels.empty() && mipLevels[0].lengthCompressed != 0; }
+    bool isSupercompressed() const { return isKTX2() && mipLevels[0].lengthCompressed != 0; }
     
     bool isKTX1() const { return !skipImageLength; }
     bool isKTX2() const { return skipImageLength; }
@@ -314,6 +314,12 @@ class KTXImage {
     // can use on ktx1/2 files, does a decompress if needed
     bool unpackLevel(uint32_t mipNumber, const uint8_t* srcData, uint8_t* dstData);
     
+    // helpers to work with the mipLevels array, mipLength and levelLength are important to get right
+    size_t mipLength(uint32_t mipNumber) const { return mipLevels[mipNumber].length; }
+    size_t levelLength(uint32_t mipNumber) const { return mipLevels[mipNumber].length * totalChunks(); }
+    size_t levelLengthCompressed(uint32_t mipNumber) const { return mipLevels[mipNumber].lengthCompressed; }
+    size_t chunkOffset(uint32_t mipNumber, uint32_t chunkNumber) const { return mipLevels[mipNumber].offset + mipLevels[mipNumber].length * chunkNumber; }
+    
 private:
     bool openKTX2(const uint8_t* imageData, size_t imageDataLength, bool isInfoOnly);
 
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index 97fb39f6..e3b84c0a 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -379,7 +379,7 @@ inline half4 toHalf4(const float4& vv)
 //---------------------------------------
 
 
-inline void mipDown(int32_t& w, int32_t& h, int32_t& d)
+inline void mipDown(int32_t& w, int32_t& h, int32_t& d, uint32_t lod = 1)
 {
     // GL/D3D hobbled non-pow2 mips by only supporting round down, not round up
     // And then Metal followed OpenGL since it's the same hw and drivers.
@@ -388,9 +388,27 @@ inline void mipDown(int32_t& w, int32_t& h, int32_t& d)
     // http://download.nvidia.com/developer/Papers/2005/NP2_Mipmapping/NP2_Mipmap_Creation.pdf
     
     // round-down
-    w = w / 2;
-    h = h / 2;
-    d = h / 2;
+    w >>= (int32_t)lod;
+    h >>= (int32_t)lod;
+    d >>= (int32_t)lod;
+    
+    if (w < 1) w = 1;
+    if (h < 1) h = 1;
+    if (d < 1) d = 1;
+}
+
+inline void mipDown(uint32_t& w, uint32_t& h, uint32_t& d, uint32_t lod = 1)
+{
+    // GL/D3D hobbled non-pow2 mips by only supporting round down, not round up
+    // And then Metal followed OpenGL since it's the same hw and drivers.
+    // Round up adds an extra mip level to the chain, but results in much better filtering.
+    // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_non_power_of_two.txt
+    // http://download.nvidia.com/developer/Papers/2005/NP2_Mipmapping/NP2_Mipmap_Creation.pdf
+    
+    // round-down
+    w >>= lod;
+    h >>= lod;
+    d >>= lod;
     
     if (w < 1) w = 1;
     if (h < 1) h = 1;

From 1067d0c8749a033a1a983b2c5c205f461dac1e1a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 24 May 2021 09:35:45 -0700
Subject: [PATCH 071/901] kramv - a little more work on state on buttons/menus

buttons still don't highlight, so may need to do that explicitly in the update
Also need to change how rgba buttons work
---
 kramv/KramViewerBase.h  |  2 +
 kramv/KramViewerMain.mm | 89 +++++++++++++++++++++++++++++++++++++++--
 2 files changed, 87 insertions(+), 4 deletions(-)

diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index d907c296..40f883da 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -81,6 +81,8 @@ class ShowSettings {
     bool isBlockGridShown = false;
     bool isAtlasGridShown = false;
    
+    bool isAnyGridShown() const { return isPixelGridShown || isBlockGridShown || isAtlasGridShown; }
+    
     // show all mips, faces, arrays all at once
     bool isShowingAllLevelsAndMips = false;
     
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index e1ffb90d..5f88d149 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -545,9 +545,15 @@ - (NSStackView*)_addButtons {
         [button setToolTip:toolTip];
         button.hidden = NO;
         
-        // turn off rounded bezel
+#if 0
+        // can use this with border
+        // TODO: for some reason this breaks clicking on buttons
+        // TODO: eliminate the rounded border
+        button.showsBorderOnlyWhileMouseInside = YES;
+        button.bordered = YES;
+#else
         button.bordered = NO;
-        
+#endif
         [button setFrame:rect];
         
         // stackView seems to disperse the items evenly across the area, so this doesn't work
@@ -1272,6 +1278,7 @@ - (void)updateUIAfterLoad {
     
     bool isJumpToNextHidden = !_showSettings->isArchive;
     
+    bool isRedHidden = false;
     bool isGreenHidden = _showSettings->numChannels <= 1;
     bool isBlueHidden  = _showSettings->numChannels <= 2 && !_showSettings->isNormal; // reconstruct z = b on normals
     
@@ -1279,6 +1286,9 @@ - (void)updateUIAfterLoad {
     // but internally store R,RG01,... etc.  Can get more data from swizzle in the props.
     // Often alpha doesn't store anything useful to view.
     
+    // TODO: may want to disable isPremul on block textures that already have premul in data
+    // or else premul is applied a second time to the visual
+    
     bool hasAlpha = _showSettings->numChannels >= 3;
     
     bool isAlphaHidden = !hasAlpha;
@@ -1294,6 +1304,7 @@ - (void)updateUIAfterLoad {
     [self findButton:"S"].hidden = isShowAllHidden;
     [self findButton:"J"].hidden = isJumpToNextHidden;
     
+    [self findButton:"R"].hidden = isRedHidden;
     [self findButton:"G"].hidden = isGreenHidden;
     [self findButton:"B"].hidden = isBlueHidden;
     [self findButton:"A"].hidden = isAlphaHidden;
@@ -1311,6 +1322,7 @@ - (void)updateUIAfterLoad {
     [self findMenuItem:"S"].hidden = isShowAllHidden;
     [self findMenuItem:"J"].hidden = isJumpToNextHidden;
     
+    [self findMenuItem:"R"].hidden = isRedHidden;
     [self findMenuItem:"G"].hidden = isGreenHidden;
     [self findMenuItem:"B"].hidden = isBlueHidden;
     [self findMenuItem:"A"].hidden = isAlphaHidden;
@@ -1318,6 +1330,69 @@ - (void)updateUIAfterLoad {
     [self findMenuItem:"P"].hidden = isPremulHidden;
     [self findMenuItem:"N"].hidden = isSignedHidden;
     [self findMenuItem:"C"].hidden = isCheckerboardHidden;
+    
+    // also need to call after each toggle
+    [self updateUIControlState];
+}
+
+- (void)updateUIControlState
+{
+    // there is also mixed
+    auto On = NSControlStateValueOn;
+    auto Off = NSControlStateValueOff;
+        
+    auto showAllState = _showSettings->isShowingAllLevelsAndMips ? On : Off;
+    auto premulState = _showSettings->isPremul ? On : Off;
+    auto signedState = _showSettings->isSigned ? On : Off;
+    auto checkerboardState = _showSettings->isCheckerboardShown ? On : Off;
+    auto previewState = _showSettings->isPreview ? On : Off;
+    auto gridState = _showSettings->isAnyGridShown() ? On : Off;
+    auto wrapState = _showSettings->isWrap ? On : Off;
+    auto debugState = (_showSettings->debugMode != DebugModeNone) ? On : Off;
+    
+    // buttons
+//    [self findButton:"Y"].state =
+//    [self findButton:"F"].state =
+//    [self findButton:"M"].state =
+//    [self findButton:"J"].state =
+//
+//    [self findButton:"R"].state =
+//    [self findButton:"G"].state =
+//    [self findButton:"B"].state =
+//    [self findButton:"A"].state =
+    
+    [self findButton:"S"].state = showAllState;
+    [self findButton:"O"].state = previewState;
+    [self findButton:"W"].state = wrapState;
+    [self findButton:"D"].state = gridState;
+    [self findButton:"E"].state = debugState;
+    
+    [self findButton:"P"].state = premulState;
+    [self findButton:"N"].state = signedState;
+    [self findButton:"C"].state = checkerboardState;
+    
+    // menus (may want to disable, not hide)
+    // problem is crashes since menu seems to strip hidden items
+    // enabled state has to be handled in validateUserInterfaceItem
+//    [self findMenuItem:"Y"].state =
+//    [self findMenuItem:"F"].state =
+//    [self findMenuItem:"M"].state =
+//    [self findMenuItem:"J"].state =
+//
+//    [self findMenuItem:"R"].state =
+//    [self findMenuItem:"G"].state =
+//    [self findMenuItem:"B"].state =
+//    [self findMenuItem:"A"].state =
+   
+    [self findMenuItem:"S"].state = showAllState;
+    [self findMenuItem:"O"].state = previewState;
+    [self findMenuItem:"W"].state = wrapState;
+    [self findMenuItem:"D"].state = gridState;
+    [self findMenuItem:"E"].state = debugState;
+    
+    [self findMenuItem:"P"].state = premulState;
+    [self findMenuItem:"N"].state = signedState;
+    [self findMenuItem:"C"].state = checkerboardState;
 }
 
 
@@ -1793,6 +1868,8 @@ - (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
     }
     
     if (isChanged) {
+        [self updateUIControlState];
+        
         self.needsDisplay = YES;
     }
 }
@@ -1880,7 +1957,7 @@ -(BOOL)loadArchive:(const char*)zipFilename
     if (!_zip.openForRead(_zipMmap.data(), _zipMmap.dataLength())) {
         return NO;
     }
-    
+
     // load the first entry in the archive
     _fileIndex = 0;
     
@@ -1988,8 +2065,12 @@ - (BOOL)loadTextureFromURL:(NSURL*)url {
                 return NO;
             }
             
-            // store the
+            // store the archive url
             self.imageURL = url;
+
+            // add it to recent docs
+            NSDocumentController* dc = [NSDocumentController sharedDocumentController];
+            [dc noteNewRecentDocumentURL:url];
         }
        
         // now reload the filename if needed

From a3810732c41dbf2f23654f81f68028f7a2f23f1c Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 24 May 2021 23:39:17 -0700
Subject: [PATCH 072/901] kram - fix bugs in rowBytes on LoadImageFromKTX,
 simplify loader, move ZipHelper to libkram

Starting to simplify the loader, so can extract C++ portion from ObjC++.   Could use loader for other APIs then.
More error handling on loader.

ZipHelper is useful in library for bundle handling.  It didn't need to live in kramv.
---
 kramv/KramLoader.mm                       | 191 +++++++++-------------
 libkram/kram/KTXImage.h                   |   2 +
 libkram/kram/KramImage.cpp                |   8 +-
 {kramv => libkram/kram}/KramZipHelper.cpp |   0
 {kramv => libkram/kram}/KramZipHelper.h   |   0
 5 files changed, 87 insertions(+), 114 deletions(-)
 rename {kramv => libkram/kram}/KramZipHelper.cpp (100%)
 rename {kramv => libkram/kram}/KramZipHelper.h (100%)

diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index b4cf12c9..7d75a90d 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -78,34 +78,47 @@ - (instancetype)init {
 // on macOS/arm, the M1 supports all 3 encode formats
 #define DO_DECODE TARGET_CPU_X86_64
 
-- (BOOL)decodeImageIfNeeded:(KTXImage&)image imageDecoded:(KTXImage&)imageDecoded useImageDecoded:(bool&)useImageDecoded
-{
 #if DO_DECODE
-    useImageDecoded = false;
+
+// this means format isnt supported on platform, but can be decoded to rgba to display
+bool isDecodeImageNeeded(MyMTLPixelFormat pixelFormat) {
+    bool needsDecode = false;
+    
+    if (isETCFormat(pixelFormat)) {
+        needsDecode = true;
+    }
+    else if (isASTCFormat(pixelFormat)) {
+        needsDecode = true;
+    }
     
+    return needsDecode;
+}
+
+bool decodeImage(KTXImage& image, KTXImage& imageDecoded)
+{
     Image imageUnused; // TODO: move to only using KTXImage, decode needs to move there
     
     if (isETCFormat(image.pixelFormat)) {
         if (!imageUnused.decode(image, imageDecoded, kTexEncoderEtcenc, false, "")) {
             return NO;
         }
-        useImageDecoded = true;
     }
     else if (isASTCFormat(image.pixelFormat)) {
         if (!imageUnused.decode(image, imageDecoded, kTexEncoderAstcenc, false, "")) {
             return NO;
         }
-    
-        useImageDecoded = true;
+    }
+    else {
+        assert(false); // don't call this routine if decode not needed
     }
     
     // TODO: decode BC format on iOS when not supported, but viewer only on macOS for now
     
-#endif
-    
     return YES;
 }
 
+#endif
+
 #if SUPPORT_RGB
 
 inline bool isInternalRGBFormat(MyMTLPixelFormat format) {
@@ -163,18 +176,17 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) {
     
     // see if it needs decode first
     bool needsDecode = false;
+    
     if (isInternalRGBFormat(image.pixelFormat)) {
         needsDecode = true;
     }
 #if DO_DECODE
-    else if (isETCFormat(image.pixelFormat)) {
-        needsDecode = true;
-    }
-    else if (isASTCFormat(image.pixelFormat)) {
+    else if (isDecodeImageNeeded(image.pixelFormat)) {
         needsDecode = true;
     }
 #endif
     
+    // open it again, but unpack the levels if supercompressed
     if (needsDecode) {
         isInfoOnly = false;
         
@@ -185,7 +197,7 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) {
     
 #if SUPPORT_RGB
     if (isInternalRGBFormat(image.pixelFormat)) {
-        // loads and converts image to RGBA version
+        // loads and converts image from RGB to RGBA
         Image rbgaImage;
         if (!rbgaImage.loadImageFromKTX(image))
             return nil;
@@ -207,7 +219,7 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) {
         }
         
         if (originalFormat != nullptr) {
-            *originalFormat = (MTLPixelFormat)rbgaImage2.pixelFormat;
+            *originalFormat = (MTLPixelFormat)rbgaImage2.pixelFormat; // TODO: should this return rgbaImage.pixelFormat ?
         }
         
         return [self loadTextureFromImage:rbgaImage2];
@@ -217,28 +229,29 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) {
     if (originalFormat != nullptr) {
         *originalFormat = (MTLPixelFormat)image.pixelFormat;
     }
-    
+#if DO_DECODE
     if (needsDecode) {
         KTXImage imageDecoded;
-        bool useImageDecoded = false;
-        if (![self decodeImageIfNeeded:image imageDecoded:imageDecoded useImageDecoded:useImageDecoded]) {
+        if (!decodeImage(image, imageDecoded)) {
             return nil;
         }
         
-        return [self loadTextureFromImage:useImageDecoded ? imageDecoded : image];
+        return [self loadTextureFromImage:imageDecoded];
     }
-    else {
+    else
+#endif
+    {
         // fast load path directly from mmap'ed data, decompress direct to staging
         return [self blitTextureFromImage:image];
     }
 }
 
-static int32_t numberOfMipmapLevels(const Image& image) {
-    int32_t w = image.width();
-    int32_t h = image.height();
-    int32_t maxDim = MAX(w,h);
+static uint32_t numberOfMipmapLevels(const Image& image) {
+    uint32_t w = image.width();
+    uint32_t h = image.height();
+    uint32_t maxDim = MAX(w,h);
     
-    int32_t numberOfMips = 1;
+    uint32_t numberOfMips = 1;
     while (maxDim > 1) {
         numberOfMips++;
         maxDim = maxDim >> 1;
@@ -249,6 +262,8 @@ static int32_t numberOfMipmapLevels(const Image& image) {
 - (nullable id<MTLTexture>)loadTextureFromPNGData:(const uint8_t*)data dataSize:(int32_t)dataSize isSRGB:(BOOL)isSRGB originalFormat:(nullable MTLPixelFormat*)originalFormat
 {
     // can only load 8u and 16u from png, no hdr formats, no premul either, no props
+    // this also doesn't handle strips like done in libkram.
+    
     Image sourceImage;
     bool isLoaded = LoadPng(data, dataSize, false, false, sourceImage);
     if (!isLoaded) {
@@ -279,14 +294,12 @@ static int32_t numberOfMipmapLevels(const Image& image) {
     }
     
     // cpu copy the bytes from the data object into the texture
-    int32_t sliceOrArrayOrFace = 0;
-    
     const MTLRegion region = {
-        { 0, 0, (NSUInteger)sliceOrArrayOrFace }, // MTLOrigin
+        { 0, 0, 0 }, // MTLOrigin
         { static_cast<NSUInteger>(image.width), static_cast<NSUInteger>(image.height), 1 }  // MTLSize
     };
     
-    int32_t bytesPerRow = 4 * sourceImage.width();
+    size_t bytesPerRow = 4 * sourceImage.width();
     
     [texture replaceRegion:region
                 mipmapLevel:0
@@ -368,16 +381,6 @@ static int32_t numberOfMipmapLevels(const Image& image) {
     if (isPrivate)
         textureDescriptor.storageMode = MTLStorageModePrivate;
     
-    // only do this for viewer
-    // but allows encoded textures to enable/disable their sRGB state.
-    // Since the view isn't accurate, will probably pull this out.
-    // Keep usageRead set by default.
-    //textureDescriptor.usage = MTLTextureUsageShaderRead;
-    
-    // this was so that could toggle srgb on/off, but mips are built linear and encoded as lin or srgb
-    // in the encoded formats so this wouldn't accurately reflect with/without srgb.
-    //textureDescriptor.usage |= MTLTextureUsagePixelFormatView;
-    
     // Create the texture from the device by using the descriptor
     id<MTLTexture> texture = [self.device newTextureWithDescriptor:textureDescriptor];
     if (!texture) {
@@ -411,11 +414,14 @@ static int32_t numberOfMipmapLevels(const Image& image) {
     
     // TODO: reuse staging _buffer and _bufferOffset here, these large allocations take time
     vector<uint8_t> mipStorage;
-    mipStorage.resize(image.mipLevels[0].length * numChunks); // enough to hold biggest mip
+    mipStorage.resize(image.mipLengthLargest() * numChunks); // enough to hold biggest mip
     
     //-----------------
     
     id<MTLTexture> texture = [self createTexture:image isPrivate:false];
+    if (!texture) {
+        return nil;
+    }
     
     const uint8_t* srcLevelData = image.fileData;
     
@@ -430,7 +436,9 @@ static int32_t numberOfMipmapLevels(const Image& image) {
         
         // unpack the whole level in-place
         if (image.isSupercompressed()) {
-            image.unpackLevel(mipLevelNumber, image.fileData + mipLevel.offset, mipStorage.data());
+            if (!image.unpackLevel(mipLevelNumber, image.fileData + mipLevel.offset, mipStorage.data())) {
+                return nil;
+            }
             srcLevelData = mipStorage.data();
             
             // going to upload from mipStorage temp array
@@ -445,8 +453,8 @@ static int32_t numberOfMipmapLevels(const Image& image) {
                     uint32_t bytesPerRow = 0;
                     
                     // 1D/1DArray textures set bytesPerRow to 0
-                    if ((MTLTextureType)image.textureType != MTLTextureType1D &&
-                        (MTLTextureType)image.textureType != MTLTextureType1DArray)
+                    if (//image.textureType != MyMTLTextureType1D &&
+                        image.textureType != MyMTLTextureType1DArray)
                     {
                         // for compressed, bytesPerRow needs to be multiple of block size
                         // so divide by the number of blocks making up the height
@@ -491,60 +499,28 @@ static int32_t numberOfMipmapLevels(const Image& image) {
                         // Note: due to API limit we can only copy one chunk at a time.  With KramBlitLoader
                         // can copy the whole level to buffer, and then reference chunks within.
                         
-                        bool isCubemap = image.textureType == MyMTLTextureTypeCube ||
-                                         image.textureType == MyMTLTextureTypeCubeArray;
-                        bool is3D      = image.textureType == MyMTLTextureType3D;
-                        bool is2DArray = image.textureType == MyMTLTextureType2DArray;
-                        bool is1DArray = image.textureType == MyMTLTextureType1DArray;
-                        
+                        bool is3D = image.textureType == MyMTLTextureType3D;
+
                         // sync cpu copy the bytes from the data object into the texture
                         MTLRegion region = {
                             { 0, 0, 0 }, // MTLOrigin
                             { (NSUInteger)w,  (NSUInteger)h, 1 }  // MTLSize
                         };
                         
-                        if (is1DArray) {
-                            [texture replaceRegion:region
-                                        mipmapLevel:mipLevelNumber
-                                            slice:chunkNum
-                                          withBytes:srcBytes
-                                        bytesPerRow:bytesPerRow
-                                        bytesPerImage:0];
-                        }
-                        else if (isCubemap) {
-                            [texture replaceRegion:region
-                                        mipmapLevel:mipLevelNumber
-                                            slice:chunkNum
-                                          withBytes:srcBytes
-                                        bytesPerRow:bytesPerRow
-                                        bytesPerImage:0];
-                        }
-                        else if (is3D) {
+                        size_t bytesPerImage = 0;
+                        if (is3D) {
                             region.origin.z = chunkNum;
                             chunkNum = 0;
-                            
-                            [texture replaceRegion:region
-                                        mipmapLevel:mipLevelNumber
-                                             slice:chunkNum
-                                          withBytes:srcBytes
-                                        bytesPerRow:bytesPerRow
-                                     bytesPerImage:mipStorageSize]; // only for 3d
-                        }
-                        else if (is2DArray) {
-                            [texture replaceRegion:region
-                                        mipmapLevel:mipLevelNumber
-                                             slice:chunkNum
-                                          withBytes:srcBytes
-                                        bytesPerRow:bytesPerRow
-                                     bytesPerImage:0];
-                        }
-                        else {
-
-                            [texture replaceRegion:region
-                                        mipmapLevel:mipLevelNumber
-                                          withBytes:srcBytes
-                                        bytesPerRow:bytesPerRow];
+                            bytesPerImage = mipStorageSize;
                         }
+                    
+                        [texture replaceRegion:region
+                                    mipmapLevel:mipLevelNumber
+                                         slice:chunkNum
+                                      withBytes:srcBytes
+                                    bytesPerRow:bytesPerRow
+                                 bytesPerImage:bytesPerImage];
+                       
                     }
                 }
             }
@@ -648,7 +624,7 @@ inline uint64_t alignOffset(uint64_t offset, uint64_t alignment) {
 - (nullable id<MTLTexture>)blitTextureFromImage:(KTXImage &)image
 {
     if (_buffer == nil) {
-        // this is only 4k x 4x @ RGBA8u with mips, 8k x 8k compressed with mips
+        // this is enough to upload 4k x 4x @ RGBA8u with mips, 8k x 8k compressed with mips @96MB
         [self createStagingBufffer: 128*1024*1024];
     }
     
@@ -660,11 +636,11 @@ inline uint64_t alignOffset(uint64_t offset, uint64_t alignment) {
     }
     
     id<MTLTexture> texture = [self createTexture:image isPrivate:true];
+    if (!texture)
+        return nil;
     
-    // Note: always starting at 0 here, since kramv is only uploading 1 texture
-    // but a real uploader would upload until buffer full, and then reset this back to 0
-    // A circular buffer if large enough to support multiple uploads over time.
-    // This can be a lot of temporary memory and must complete upload before changing.
+    // this is index where texture will be added
+    uint32_t textureIndex = (uint32_t)_blitTextures.count;
     
     //--------------------------------
     // upload mip levels
@@ -692,9 +668,10 @@ inline uint64_t alignOffset(uint64_t offset, uint64_t alignment) {
     const uint8_t* mipData = (const uint8_t*)image.fileData;
     bufferOffsets.resize(image.mipLevels.size());
     
-    uint32_t bufferOffset = _bufferOffset;
     uint32_t numChunks = image.totalChunks();
     
+    uint32_t bufferOffset = _bufferOffset;
+    
     for (uint32_t i = 0; i < numMips; ++i) {
         const KTXImageLevel& mipLevel = image.mipLevels[i];
         
@@ -702,23 +679,20 @@ inline uint64_t alignOffset(uint64_t offset, uint64_t alignment) {
         bufferOffset = alignOffset(bufferOffset, blockSize);
         
         // this may have to decompress the level data
-        image.unpackLevel(i, mipData + mipLevel.offset, bufferData + bufferOffset);
+        if (!image.unpackLevel(i, mipData + mipLevel.offset, bufferData + bufferOffset)) {
+            return nil;
+        }
         
         bufferOffsets[i] = bufferOffset;
         bufferOffset += mipLevel.length * numChunks;
     }
     
+    // everything succeded, so advance the offset
+    _bufferOffset = bufferOffset;
+    [_blitTextures addObject: texture];
     
-    // Should this be split off after cpu upload, could code store enough
-    // in a vector to jettison the KTXImage.  Also need a queue of textures
-    // that are not fully loaded or haven't started if sharing the staging buffer.
-    // Note that it is just system ram, and can have allocations stored into it
-    // and can be viewed in the debugger and can do memcpy to it above.
-    
-    //--------------------
     
-    // blit encoder calls must all be submitted to an open MTLBlitCommandEncoder,
-    // but may not have to be on the render thread?
+    // defer the blits from buffer until start of render thread when BlitEncoder is available
     
     for (uint32_t mipLevelNumber = 0; mipLevelNumber < numMips; ++mipLevelNumber) {
         // there's a 4 byte levelSize for each mipLevel
@@ -735,8 +709,8 @@ inline uint64_t alignOffset(uint64_t offset, uint64_t alignment) {
                     uint32_t bytesPerRow = 0;
                     
                     // 1D/1DArray textures set bytesPerRow to 0
-                    if ((MTLTextureType)image.textureType != MTLTextureType1D &&
-                        (MTLTextureType)image.textureType != MTLTextureType1DArray)
+                    if (//image.textureType != MyMTLTextureType1D &&
+                        image.textureType != MyMTLTextureType1DArray)
                     {
                         // for compressed, bytesPerRow needs to be multiple of block size
                         // so divide by the number of blocks making up the height
@@ -784,7 +758,7 @@ inline uint64_t alignOffset(uint64_t offset, uint64_t alignment) {
                              mipStorageSize,
                              mipOffset,
                             
-                             (uint32_t)_blitTextures.count,
+                             textureIndex,
                              bytesPerRow,
                              is3D // could derive from textureIndex lookup
                         });
@@ -796,10 +770,7 @@ inline uint64_t alignOffset(uint64_t offset, uint64_t alignment) {
         mipDown(w, h, d);
     }
         
-    // everything succeded, so advance the offset
-    _bufferOffset = bufferOffset;
-    [_blitTextures addObject: texture];
-    
+   
     // this texture cannot be used until buffer uploads complete
     // but those happen at beginning of frame, so can attach to shaders, etc
     return texture;
diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h
index 19c30f05..bcfb49b3 100644
--- a/libkram/kram/KTXImage.h
+++ b/libkram/kram/KTXImage.h
@@ -315,11 +315,13 @@ class KTXImage {
     bool unpackLevel(uint32_t mipNumber, const uint8_t* srcData, uint8_t* dstData);
     
     // helpers to work with the mipLevels array, mipLength and levelLength are important to get right
+    size_t mipLengthLargest() const { return mipLevels[0].length; }
     size_t mipLength(uint32_t mipNumber) const { return mipLevels[mipNumber].length; }
     size_t levelLength(uint32_t mipNumber) const { return mipLevels[mipNumber].length * totalChunks(); }
     size_t levelLengthCompressed(uint32_t mipNumber) const { return mipLevels[mipNumber].lengthCompressed; }
     size_t chunkOffset(uint32_t mipNumber, uint32_t chunkNumber) const { return mipLevels[mipNumber].offset + mipLevels[mipNumber].length * chunkNumber; }
     
+    
 private:
     bool openKTX2(const uint8_t* imageData, size_t imageDataLength, bool isInfoOnly);
 
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index b9704371..8916531f 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -178,7 +178,7 @@ bool Image::loadImageFromKTX(const KTXImage& image)
             }
 
             for (int32_t y = 0; y < _height; ++y) {
-                int32_t y0 = _height * y;
+                int32_t y0 = _width * y;
 
                 for (int32_t x = 0, xEnd = _width; x < xEnd; ++x) {
                     int32_t srcX = (y0 + x) * numSrcChannels;
@@ -232,7 +232,7 @@ bool Image::loadImageFromKTX(const KTXImage& image)
             }
 
             for (int32_t y = 0; y < _height; ++y) {
-                int32_t y0 = _height * y;
+                int32_t y0 = _width * y;
 
                 for (int32_t x = 0, xEnd = _width; x < xEnd; ++x) {
                     int32_t srcX = (y0 + x) * numSrcChannels;
@@ -280,7 +280,7 @@ bool Image::loadImageFromKTX(const KTXImage& image)
             float* dstPixels = (float*)(_pixelsFloat.data());
 
             for (int32_t y = 0; y < _height; ++y) {
-                int32_t y0 = _height * y;
+                int32_t y0 = _width * y;
 
                 for (int32_t x = 0, xEnd = _width; x < xEnd; ++x) {
                     int32_t srcX = (y0 + x) * numSrcChannels;
@@ -1702,7 +1702,7 @@ bool Image::createMipsFromChunks(
     TextureData outputTexture;
     outputTexture.width = dstImage.width;
     outputTexture.height = dstImage.height;
-    outputTexture.data.resize(dstImage.mipLevels[0].length); // allocate to size of largest mip
+    outputTexture.data.resize(dstImage.mipLengthLargest());
 
     // This is for 8-bit data (pixelsFloat used for in-place mipgen)
     ImageData srcImage;
diff --git a/kramv/KramZipHelper.cpp b/libkram/kram/KramZipHelper.cpp
similarity index 100%
rename from kramv/KramZipHelper.cpp
rename to libkram/kram/KramZipHelper.cpp
diff --git a/kramv/KramZipHelper.h b/libkram/kram/KramZipHelper.h
similarity index 100%
rename from kramv/KramZipHelper.h
rename to libkram/kram/KramZipHelper.h

From 110a4ff08df760187efd74b8945dcf074503703c Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 25 May 2021 00:36:34 -0700
Subject: [PATCH 073/901] kram - simplify 1,2,3,4 channel conversion from KTX
 to Image, fix Win build

---
 libkram/kram/KramImage.cpp   | 98 ++++++++++++++----------------------
 libkram/kram/KramZipHelper.h |  1 +
 2 files changed, 39 insertions(+), 60 deletions(-)

diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index 8916531f..98b1450c 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -149,11 +149,13 @@ bool Image::loadImageFromKTX(const KTXImage& image)
     // so can call through to blockSize
     KTXHeader header;
     header.initFormatGL(image.pixelFormat);
-    int32_t blockSize = image.blockSize();
+    //int32_t blockSize = image.blockSize();
 
     _hasColor = isColorFormat(image.pixelFormat);
     _hasAlpha = isAlphaFormat(image.pixelFormat);
 
+    // TODO: this assumes 1,2,3 channel srcData has no rowPadding to say 4 bytes
+    
     switch (image.pixelFormat) {
         case MyMTLPixelFormatR8Unorm:
         case MyMTLPixelFormatRG8Unorm:
@@ -167,34 +169,28 @@ bool Image::loadImageFromKTX(const KTXImage& image)
             const uint8_t* srcPixels =
                 image.fileData + image.mipLevels[0].offset;
 
-            int32_t numSrcChannels = blockSize / sizeof(uint8_t);
-            int32_t numDstChannels = 4;
-
+            int32_t numSrcChannels = numChannelsOfFormat(image.pixelFormat);
+           
             // Note: clearing unspecified channels to 0000, not 0001
             // can set swizzleText when encoding
             _pixels.resize(4 * _width * _height);
-            if (numSrcChannels != 4) {
-                memset(_pixels.data(), 0, _pixels.size());
-            }
+            
+            Color* dstPixels = (Color*)_pixels.data();
 
+            Color dstTemp = {0,0,0,0};
+            
             for (int32_t y = 0; y < _height; ++y) {
-                int32_t y0 = _width * y;
+                int32_t y0 = y * _width;
 
-                for (int32_t x = 0, xEnd = _width; x < xEnd; ++x) {
+                for (int32_t x = 0; x < _width; ++x) {
                     int32_t srcX = (y0 + x) * numSrcChannels;
-                    int32_t dstX = (y0 + x) * numDstChannels;
+                    int32_t dstX = (y0 + x); // * numDstChannels;
 
-                    switch (numSrcChannels) {
-                        // all fallthrough
-                        case 4:
-                            _pixels[dstX + 3] = srcPixels[srcX + 3];
-                        case 3:
-                            _pixels[dstX + 2] = srcPixels[srcX + 2];
-                        case 2:
-                            _pixels[dstX + 1] = srcPixels[srcX + 1];
-                        case 1:
-                            _pixels[dstX + 0] = srcPixels[srcX + 0];
+                    for (int32_t i = 0; i < numSrcChannels; ++i) {
+                        *(&dstTemp.r + i) = srcPixels[srcX + i];
                     }
+
+                    dstPixels[dstX] = dstTemp;
                 }
             }
 
@@ -209,16 +205,11 @@ bool Image::loadImageFromKTX(const KTXImage& image)
         case MyMTLPixelFormatRGB16Float_internal:
 #endif
         case MyMTLPixelFormatRGBA16Float: {
-            int32_t numSrcChannels = blockSize / 2;  // 2 = sizeof(_float16)
-            int32_t numDstChannels = 4;
-
+            int32_t numSrcChannels = numChannelsOfFormat(image.pixelFormat);
+            
             // Note: clearing unspecified channels to 0000, not 0001
             // can set swizzleText when encoding
             _pixelsFloat.resize(_width * _height);
-            if (numSrcChannels != 4) {
-                memset(_pixelsFloat.data(), 0,
-                       _pixelsFloat.size() * sizeof(float4));
-            }
 
             // treat as float for per channel copies
             float4* dstPixels = _pixelsFloat.data();
@@ -226,25 +217,22 @@ bool Image::loadImageFromKTX(const KTXImage& image)
             const half* srcPixels =
                 (const half*)(image.fileData + image.mipLevels[0].offset);
 
-            half4 srcPixel;
-            for (int32_t i = 0; i < 4; ++i) {
-                srcPixel.v[i] = 0;
-            }
-
+            half4 dstTemp = half4((half)0);
+        
             for (int32_t y = 0; y < _height; ++y) {
-                int32_t y0 = _width * y;
+                int32_t y0 = y * _width;
 
-                for (int32_t x = 0, xEnd = _width; x < xEnd; ++x) {
+                for (int32_t x = 0; x < _width; ++x) {
                     int32_t srcX = (y0 + x) * numSrcChannels;
-                    int32_t dstX = (y0 + x) * numDstChannels;
+                    int32_t dstX = (y0 + x);
 
                     // copy in available values
                     for (int32_t i = 0; i < numSrcChannels; ++i) {
-                        srcPixel.v[i] = srcPixels[srcX + i];
+                        dstTemp.v[i] = srcPixels[srcX + i];
                     }
 
                     // use AVX to convert
-                    dstPixels[dstX] = toFloat4(srcPixel);
+                    dstPixels[dstX] = toFloat4(dstTemp);
                 }
             }
 
@@ -265,38 +253,28 @@ bool Image::loadImageFromKTX(const KTXImage& image)
             const float* srcPixels =
                 (const float*)(image.fileData + image.mipLevels[0].offset);
 
-            int32_t numSrcChannels = blockSize / sizeof(float);
-            int32_t numDstChannels = 4;
-
+            int32_t numSrcChannels = numChannelsOfFormat(image.pixelFormat);
+           
             // Note: clearing unspecified channels to 0000, not 0001
             // can set swizzleText when encoding
             _pixelsFloat.resize(_width * _height);
-            if (numSrcChannels != 4) {
-                memset(_pixelsFloat.data(), 0,
-                       _pixelsFloat.size() * sizeof(float4));
-            }
-
+           
             // treat as float for per channel copies
-            float* dstPixels = (float*)(_pixelsFloat.data());
-
+            float4* dstPixels = _pixelsFloat.data();
+            float4 dstTemp = float4m(0.0f);
+           
             for (int32_t y = 0; y < _height; ++y) {
-                int32_t y0 = _width * y;
+                int32_t y0 = y * _width;
 
-                for (int32_t x = 0, xEnd = _width; x < xEnd; ++x) {
+                for (int32_t x = 0; x < _width; ++x) {
                     int32_t srcX = (y0 + x) * numSrcChannels;
-                    int32_t dstX = (y0 + x) * numDstChannels;
+                    int32_t dstX = (y0 + x);
 
-                    switch (numSrcChannels) {
-                        // all fallthrough
-                        case 4:
-                            dstPixels[dstX + 3] = srcPixels[srcX + 3];
-                        case 3:
-                            dstPixels[dstX + 2] = srcPixels[srcX + 2];
-                        case 2:
-                            dstPixels[dstX + 1] = srcPixels[srcX + 1];
-                        case 1:
-                            dstPixels[dstX + 0] = srcPixels[srcX + 0];
+                    for (int32_t i = 0; i < numSrcChannels; ++i) {
+                        dstTemp[i] = srcPixels[srcX + i];
                     }
+                    
+                    dstPixels[dstX] = dstTemp;
                 }
             }
 
diff --git a/libkram/kram/KramZipHelper.h b/libkram/kram/KramZipHelper.h
index ea3d566c..e224c7f3 100644
--- a/libkram/kram/KramZipHelper.h
+++ b/libkram/kram/KramZipHelper.h
@@ -1,5 +1,6 @@
 #pragma once
 
+#include <memory>
 #include <stdint.h>
 #include <vector>
 #include <unordered_map>

From f3fd4c14846aec8540833653bb2fa9f31d77cd3d Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 25 May 2021 09:35:22 -0700
Subject: [PATCH 074/901] kramv - change NSTrackingArea to use less CPU

kramv was using 10% cpu in some User Interactive QoS worker threads that I didn't create.  Set the tracking area to only when app is active.  This dropped CPU use to 3% I think.
---
 kramv/KramViewerMain.mm | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 5f88d149..6f7e7af3 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -2264,7 +2264,10 @@ - (void)viewDidLoad
     // https://developer.apple.com/library/archive/documentation/Cocoa/Conceptual/EventOverview/TrackingAreaObjects/TrackingAreaObjects.html
     // this is better than requesting mousemoved events, they're only sent when cursor is inside
     _trackingArea = [[NSTrackingArea alloc] initWithRect:_view.bounds
-                options: (NSTrackingMouseEnteredAndExited | NSTrackingMouseMoved | NSTrackingActiveInKeyWindow )
+                options: (NSTrackingMouseEnteredAndExited | NSTrackingMouseMoved |
+                          NSTrackingActiveInActiveApp
+                          //NSTrackingActiveInKeyWindow
+                          )
                 owner:_view userInfo:nil];
     [_view addTrackingArea:_trackingArea];
     

From 66ec9acc3fca2b6b9bf760e20804086099bff2cb Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 25 May 2021 09:56:16 -0700
Subject: [PATCH 075/901] kramv - handle highlight state on buttons

Now these show that mode is active using a toggle button.  State already set on/off based on criteria.
---
 kramv/KramViewerMain.mm | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 6f7e7af3..1edef786 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -545,6 +545,9 @@ - (NSStackView*)_addButtons {
         [button setToolTip:toolTip];
         button.hidden = NO;
         
+        button.buttonType = NSButtonTypeToggle;
+        //NSButtonTypeOnOff
+        
 #if 0
         // can use this with border
         // TODO: for some reason this breaks clicking on buttons
@@ -1350,16 +1353,20 @@ - (void)updateUIControlState
     auto wrapState = _showSettings->isWrap ? On : Off;
     auto debugState = (_showSettings->debugMode != DebugModeNone) ? On : Off;
     
+    
     // buttons
-//    [self findButton:"Y"].state =
-//    [self findButton:"F"].state =
-//    [self findButton:"M"].state =
-//    [self findButton:"J"].state =
-//
-//    [self findButton:"R"].state =
-//    [self findButton:"G"].state =
-//    [self findButton:"B"].state =
-//    [self findButton:"A"].state =
+    [self findButton:"Y"].state = _showSettings->arrayNumber > 1 ? On : Off;
+    [self findButton:"F"].state = _showSettings->faceNumber > 1 ? On : Off;
+    [self findButton:"M"].state = _showSettings->mipLOD > 1 ? On : Off;
+
+    [self findButton:"J"].state = Off;
+    [self findButton:"U"].state = Off;
+
+    // TODO: want these to show highlight
+    [self findButton:"R"].state = Off;
+    [self findButton:"G"].state = Off;
+    [self findButton:"B"].state = Off;
+    [self findButton:"A"].state = Off;
     
     [self findButton:"S"].state = showAllState;
     [self findButton:"O"].state = previewState;
@@ -1511,6 +1518,10 @@ - (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
             
             _buttonStack.hidden = !_buttonStack.hidden;
             text = _buttonStack.hidden ? "Hide UI" : "Show UI";
+            
+            // for button control state update only
+            if (!_buttonStack.hidden)
+                isChanged = true;
             break;
             
         // rgba channels

From 2c1304ae13e12f8675bff2fd51c0ab4721e13e41 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 26 May 2021 08:46:58 -0700
Subject: [PATCH 076/901] kramv - fix on/off state on 3 buttons

---
 kramv/KramViewerMain.mm | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 1edef786..17a6613a 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -1355,9 +1355,9 @@ - (void)updateUIControlState
     
     
     // buttons
-    [self findButton:"Y"].state = _showSettings->arrayNumber > 1 ? On : Off;
-    [self findButton:"F"].state = _showSettings->faceNumber > 1 ? On : Off;
-    [self findButton:"M"].state = _showSettings->mipLOD > 1 ? On : Off;
+    [self findButton:"Y"].state = _showSettings->arrayNumber > 0 ? On : Off;
+    [self findButton:"F"].state = _showSettings->faceNumber > 0 ? On : Off;
+    [self findButton:"M"].state = _showSettings->mipLOD > 0 ? On : Off;
 
     [self findButton:"J"].state = Off;
     [self findButton:"U"].state = Off;

From cad986a6546dbd767c8fc8ec91fc6cab162cc0ed Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 26 May 2021 09:21:17 -0700
Subject: [PATCH 077/901] kramv - fix rgba toggles

Removed some swizzles.  Also need to add a grayscale mode too to look at luminance.
---
 kramv/KramShaders.h     |   6 +-
 kramv/KramShaders.metal |  10 +--
 kramv/KramViewerBase.h  |   7 ++-
 kramv/KramViewerMain.mm | 131 +++++++++++++++++++++++++++-------------
 4 files changed, 101 insertions(+), 53 deletions(-)

diff --git a/kramv/KramShaders.h b/kramv/KramShaders.h
index 5169213c..3a192e0b 100644
--- a/kramv/KramShaders.h
+++ b/kramv/KramShaders.h
@@ -63,9 +63,9 @@ typedef NS_ENUM(int32_t, ShaderTextureChannels)
     ShMode00B1 = 3,
     
     // see grayscale channels
-    ShModeRRR1 = 5,
-    ShModeGGG1 = 6,
-    ShModeBBB1 = 7,
+//    ShModeRRR1 = 5,
+//    ShModeGGG1 = 6,
+//    ShModeBBB1 = 7,
     ShModeAAA1 = 8,
 };
 
diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index f3289232..bd430fed 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -592,14 +592,16 @@ float4 DrawPixels(
     switch(uniforms.channels)
     {
         case ShModeRGBA: break;
+            
+        // with premul formats, already have ra,ga,ba
         case ShModeR001: c = float4(c.r,0,0,1); break;
         case ShMode0G01: c = float4(0,c.g,0,1); break;
         case ShMode00B1: c = float4(0,0,c.b,1); break;
             
-        case ShModeRRR1: c = float4(c.rrr,1); break;
-        case ShModeGGG1: c = float4(c.ggg,1); break;
-        case ShModeBBB1: c = float4(c.bbb,1); break;
-            
+//        case ShModeRRR1: c = float4(c.rrr,1); break;
+//        case ShModeGGG1: c = float4(c.ggg,1); break;
+//        case ShModeBBB1: c = float4(c.bbb,1); break;
+//
         case ShModeAAA1: c = float4(c.aaa,1); break;
     }
     
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index 40f883da..9b3f2e2c 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -24,9 +24,10 @@ enum TextureChannels
     Mode00B1 = 3,
     
     // see grayscale channels
-    ModeRRR1 = 5,
-    ModeGGG1 = 6,
-    ModeBBB1 = 7,
+//    ModeRRR1 = 5,
+//    ModeGGG1 = 6,
+//    ModeBBB1 = 7,
+    
     ModeAAA1 = 8,
 };
 
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 17a6613a..a0c370bf 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -1343,30 +1343,50 @@ - (void)updateUIControlState
     // there is also mixed
     auto On = NSControlStateValueOn;
     auto Off = NSControlStateValueOff;
-        
-    auto showAllState = _showSettings->isShowingAllLevelsAndMips ? On : Off;
-    auto premulState = _showSettings->isPremul ? On : Off;
-    auto signedState = _showSettings->isSigned ? On : Off;
-    auto checkerboardState = _showSettings->isCheckerboardShown ? On : Off;
-    auto previewState = _showSettings->isPreview ? On : Off;
-    auto gridState = _showSettings->isAnyGridShown() ? On : Off;
-    auto wrapState = _showSettings->isWrap ? On : Off;
-    auto debugState = (_showSettings->debugMode != DebugModeNone) ? On : Off;
+    #define toState(x) (x) ? On : Off
+    
+    auto showAllState = toState(_showSettings->isShowingAllLevelsAndMips);
+    auto premulState =  toState(_showSettings->isPremul);
+    auto signedState =  toState(_showSettings->isSigned);
+    auto checkerboardState =  toState(_showSettings->isCheckerboardShown);
+    auto previewState = toState(_showSettings->isPreview);
+    auto gridState =  toState(_showSettings->isAnyGridShown());
+    auto wrapState = toState(_showSettings->isWrap);
+    auto debugState = toState(_showSettings->debugMode != DebugModeNone);
+    
+    TextureChannels& channels = _showSettings->channels;
+
+    auto redState = toState(channels == TextureChannels::ModeR001);
+    auto greenState = toState(channels == TextureChannels::Mode0G01);
+    auto blueState = toState(channels == TextureChannels::Mode00B1);
+    auto alphaState = toState(channels == TextureChannels::ModeAAA1);
+    
+    auto arrayState = toState(_showSettings->arrayNumber > 0);
+    auto faceState = toState(_showSettings->faceNumber > 0);
+    auto mipState = toState(_showSettings->mipLOD > 0);
+    
+    // TODO: UI state, and vertical state
+    auto uiState = toState(_buttonStack.hidden);
     
+    auto helpState = Off;
+    auto infoState = Off;
+    auto jumpState = Off;
     
     // buttons
-    [self findButton:"Y"].state = _showSettings->arrayNumber > 0 ? On : Off;
-    [self findButton:"F"].state = _showSettings->faceNumber > 0 ? On : Off;
-    [self findButton:"M"].state = _showSettings->mipLOD > 0 ? On : Off;
+    [self findButton:"?"].state = helpState;
+    [self findButton:"I"].state = infoState;
+   
+    [self findButton:"Y"].state = arrayState;
+    [self findButton:"F"].state = faceState;
+    [self findButton:"M"].state = mipState;
 
-    [self findButton:"J"].state = Off;
-    [self findButton:"U"].state = Off;
+    [self findButton:"J"].state = jumpState;
+    [self findButton:"U"].state = Off; // always off
 
-    // TODO: want these to show highlight
-    [self findButton:"R"].state = Off;
-    [self findButton:"G"].state = Off;
-    [self findButton:"B"].state = Off;
-    [self findButton:"A"].state = Off;
+    [self findButton:"R"].state = redState;
+    [self findButton:"G"].state = greenState;
+    [self findButton:"B"].state = blueState;
+    [self findButton:"A"].state = alphaState;
     
     [self findButton:"S"].state = showAllState;
     [self findButton:"O"].state = previewState;
@@ -1381,15 +1401,21 @@ - (void)updateUIControlState
     // menus (may want to disable, not hide)
     // problem is crashes since menu seems to strip hidden items
     // enabled state has to be handled in validateUserInterfaceItem
-//    [self findMenuItem:"Y"].state =
-//    [self findMenuItem:"F"].state =
-//    [self findMenuItem:"M"].state =
-//    [self findMenuItem:"J"].state =
-//
-//    [self findMenuItem:"R"].state =
-//    [self findMenuItem:"G"].state =
-//    [self findMenuItem:"B"].state =
-//    [self findMenuItem:"A"].state =
+    
+    // when menu state is selected, it may not uncheck when advancing through state
+    [self findMenuItem:"?"].state = helpState;
+    [self findMenuItem:"I"].state = infoState;
+   
+    [self findMenuItem:"Y"].state = arrayState;
+    [self findMenuItem:"F"].state = faceState;
+    [self findMenuItem:"M"].state = mipState;
+    [self findMenuItem:"J"].state = jumpState;
+    [self findMenuItem:"U"].state = uiState;
+
+    [self findMenuItem:"R"].state = redState;
+    [self findMenuItem:"G"].state = greenState;
+    [self findMenuItem:"B"].state = blueState;
+    [self findMenuItem:"A"].state = alphaState;
    
     [self findMenuItem:"S"].state = showAllState;
     [self findMenuItem:"O"].state = previewState;
@@ -1494,8 +1520,8 @@ - (void)keyDown:(NSEvent *)theEvent
 - (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
 {
     // Some data depends on the texture data (isSigned, isNormal, ..)
-    TextureChannels& channels = _showSettings->channels;
     bool isChanged = false;
+    bool isStateChanged = false;
     
     // TODO: fix isChanged to only be set when value changes
     // f.e. clamped values don't need to re-render
@@ -1508,6 +1534,9 @@ - (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
             
             _buttonStack.orientation = isVertical ? NSUserInterfaceLayoutOrientationVertical : NSUserInterfaceLayoutOrientationHorizontal;
             text = isVertical ? "Vert UI" : "Horiz UI";
+            
+            // just to update toggle state to Off
+            isStateChanged = true;
             break;
         }
         case Key::U:
@@ -1519,22 +1548,23 @@ - (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
             _buttonStack.hidden = !_buttonStack.hidden;
             text = _buttonStack.hidden ? "Hide UI" : "Show UI";
             
-            // for button control state update only
-            if (!_buttonStack.hidden)
-                isChanged = true;
+            // just to update toggle state to Off
+            isStateChanged = true;
             break;
             
         // rgba channels
         case Key::Num1:
         case Key::R:
             if (![self findButton:"R"].isHidden) {
-                if (channels == TextureChannels::ModeRRR1 || channels == TextureChannels::ModeR001) {
+                TextureChannels& channels = _showSettings->channels;
+
+                if (channels == TextureChannels::ModeR001) {
                     channels = TextureChannels::ModeRGBA;
                     text = "Mask RGBA";
                 }
                 else {
-                    channels = isShiftKeyDown ? TextureChannels::ModeRRR1 : TextureChannels::ModeR001;
-                    text = isShiftKeyDown ? "Mask RRR1" : "Mask R001";
+                    channels = TextureChannels::ModeR001;
+                    text = "Mask R001";
                 }
                 isChanged = true;
             }
@@ -1544,13 +1574,15 @@ - (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
         case Key::Num2:
         case Key::G:
             if (![self findButton:"G"].isHidden) {
-                if (channels == TextureChannels::ModeGGG1 || channels == TextureChannels::Mode0G01) {
+                TextureChannels& channels = _showSettings->channels;
+
+                if (channels == TextureChannels::Mode0G01) {
                     channels = TextureChannels::ModeRGBA;
                     text = "Mask RGBA";
                 }
                 else {
-                    channels = isShiftKeyDown ? TextureChannels::ModeGGG1 : TextureChannels::Mode0G01;
-                    text = isShiftKeyDown ? "Mask GGG1" : "Mask 0G01";
+                    channels = TextureChannels::Mode0G01;
+                    text = "Mask 0G01";
                 }
                 isChanged = true;
             }
@@ -1559,14 +1591,17 @@ - (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
         case Key::Num3:
         case Key::B:
             if (![self findButton:"B"].isHidden) {
-                if (channels == TextureChannels::ModeBBB1 || channels == TextureChannels::Mode00B1) {
+                TextureChannels& channels = _showSettings->channels;
+
+                if (channels == TextureChannels::Mode00B1) {
                     channels = TextureChannels::ModeRGBA;
                     text = "Mask RGBA";
                 }
                 else {
-                    channels = isShiftKeyDown ? TextureChannels::ModeBBB1 : TextureChannels::Mode00B1;
-                    text = isShiftKeyDown ? "Mask BBB1" : "Mask 00B1";
+                    channels = TextureChannels::Mode00B1;
+                    text = "Mask 00B1";
                 }
+
                 isChanged = true;
             }
             break;
@@ -1574,6 +1609,8 @@ - (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
         case Key::Num4:
         case Key::A:
             if (![self findButton:"A"].isHidden) {
+                TextureChannels& channels = _showSettings->channels;
+
                 if (channels == TextureChannels::ModeAAA1) {
                     channels = TextureChannels::ModeRGBA;
                     text = "Mask RGBA";
@@ -1582,6 +1619,7 @@ - (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
                     channels = TextureChannels::ModeAAA1;
                     text = "Mask AAA1";
                 }
+
                 isChanged = true;
             }
             break;
@@ -1612,6 +1650,9 @@ - (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
                    "W-wrap, Premul, N-signed\n"
                    "⇧Mip, ⇧Face, ⇧Y-array/slice\n"
                    "⇧J-next bundle image\n";
+            
+            // just to update toggle state to Off
+            isStateChanged = true;
             break;
             
         case Key::Num0: { // scale and reset pan
@@ -1779,6 +1820,8 @@ - (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
             if (_showSettings->isHudShown) {
                 sprintf(text, "%s", isShiftKeyDown ? _showSettings->imageInfoVerbose.c_str() : _showSettings->imageInfo.c_str());
             }
+            // just to update toggle state to Off
+            isStateChanged = true;
             break;
         
         // toggle wrap/clamp
@@ -1878,9 +1921,11 @@ - (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
         [self setHudText:text.c_str()];
     }
     
-    if (isChanged) {
+    if (isChanged || isStateChanged) {
         [self updateUIControlState];
-        
+    }
+    
+    if (isChanged) {
         self.needsDisplay = YES;
     }
 }

From 7c8d05d83980703986c1bcbc5d84cd4af5f6c280 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 27 May 2021 13:01:03 -0700
Subject: [PATCH 078/901] kram - break off decoder/encoder from Image, add
 block decode, add macOS thumbnailer

This is the code to generate a Quicklook thumbnailer.  The project was setup by Xcode, so I don't have it tied to CMake yet.
  Had to move all projects to 10.15, where this frameworks is available.
  This is an app extension for thumbnailing tied into kramv.  Still need to detail the ktx/ktx2 formats in the plist.

Broke out the Encoder/Decoder since they shouldn't be so tied to the single-level Image class.
The thumbnailer wanted to extract a single mip, and render it to CG.  So that's done now.
Simplfied getting mip dimensions, since with mipDown it's a shift and max(1, w).
---
 CMakeLists.txt                      |   2 +-
 kram-thumb/Info.plist               |  40 ++
 kram-thumb/KramThumbnailProvider.h  |  16 +
 kram-thumb/KramThumbnailProvider.mm | 153 +++++++
 kram-thumb/kram_thumb.entitlements  |  10 +
 kramv/KramLoader.mm                 |  16 +-
 libkram/kram/KTXImage.cpp           |  15 +
 libkram/kram/KTXImage.h             |  57 ++-
 libkram/kram/Kram.cpp               |  24 +-
 libkram/kram/KramConfig.h           |  37 --
 libkram/kram/KramImage.cpp          | 670 +++++++++++++++-------------
 libkram/kram/KramImage.h            |  94 ++--
 libkram/kram/KramImageInfo.cpp      |   4 +-
 libkram/kram/KramMipper.cpp         |   2 +
 libkram/kram/KramSDFMipper.cpp      |   8 +-
 15 files changed, 729 insertions(+), 419 deletions(-)
 create mode 100644 kram-thumb/Info.plist
 create mode 100644 kram-thumb/KramThumbnailProvider.h
 create mode 100644 kram-thumb/KramThumbnailProvider.mm
 create mode 100644 kram-thumb/kram_thumb.entitlements

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5018cb5b..20cf0e14 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -57,7 +57,7 @@ if (APPLE)
         set(CMAKE_OSX_DEPLOYMENT_TARGET "11.0" CACHE STRING "Minimum iOS")
         set(CMAKE_OSX_ARCHITECTURES "$(ARCHS_STANDARD)" CACHE STRING "Architecture iOS")
     else()
-        set(CMAKE_OSX_DEPLOYMENT_TARGET "10.14" CACHE STRING "Minimum macOS")
+        set(CMAKE_OSX_DEPLOYMENT_TARGET "10.15" CACHE STRING "Minimum macOS")
         set(CMAKE_OSX_ARCHITECTURES "$(ARCHS_STANDARD)" CACHE STRING "Architecture macOS")
     endif()
 endif()
diff --git a/kram-thumb/Info.plist b/kram-thumb/Info.plist
new file mode 100644
index 00000000..e6b0324d
--- /dev/null
+++ b/kram-thumb/Info.plist
@@ -0,0 +1,40 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>CFBundleDevelopmentRegion</key>
+	<string>$(DEVELOPMENT_LANGUAGE)</string>
+	<key>CFBundleDisplayName</key>
+	<string>kram-thumb</string>
+	<key>CFBundleExecutable</key>
+	<string>$(EXECUTABLE_NAME)</string>
+	<key>CFBundleIdentifier</key>
+	<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
+	<key>CFBundleInfoDictionaryVersion</key>
+	<string>6.0</string>
+	<key>CFBundleName</key>
+	<string>$(PRODUCT_NAME)</string>
+	<key>CFBundlePackageType</key>
+	<string>$(PRODUCT_BUNDLE_PACKAGE_TYPE)</string>
+	<key>CFBundleShortVersionString</key>
+	<string>1.0</string>
+	<key>CFBundleVersion</key>
+	<string>1</string>
+	<key>LSMinimumSystemVersion</key>
+	<string>$(MACOSX_DEPLOYMENT_TARGET)</string>
+	<key>NSExtension</key>
+	<dict>
+		<key>NSExtensionAttributes</key>
+		<dict>
+			<key>QLSupportedContentTypes</key>
+			<array/>
+			<key>QLThumbnailMinimumDimension</key>
+			<integer>0</integer>
+		</dict>
+		<key>NSExtensionPointIdentifier</key>
+		<string>com.apple.quicklook.thumbnail</string>
+		<key>NSExtensionPrincipalClass</key>
+		<string>ThumbnailProvider</string>
+	</dict>
+</dict>
+</plist>
diff --git a/kram-thumb/KramThumbnailProvider.h b/kram-thumb/KramThumbnailProvider.h
new file mode 100644
index 00000000..7ee38563
--- /dev/null
+++ b/kram-thumb/KramThumbnailProvider.h
@@ -0,0 +1,16 @@
+//
+//  KramThumbnailProvider.h
+//  kram-thumb
+//
+//  Created by Alec on 5/26/21.
+//
+
+#import <QuickLookThumbnailing/QuickLookThumbnailing.h>
+
+NS_ASSUME_NONNULL_BEGIN
+
+@interface KramThumbnailProvider : QLThumbnailProvider
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/kram-thumb/KramThumbnailProvider.mm b/kram-thumb/KramThumbnailProvider.mm
new file mode 100644
index 00000000..90c9a866
--- /dev/null
+++ b/kram-thumb/KramThumbnailProvider.mm
@@ -0,0 +1,153 @@
+//
+//  KramThumbnailProvider.mm
+//  kram-thumb
+//
+//  Created by Alec on 5/26/21.
+//
+
+#import "KramThumbnailProvider.h"
+
+#include "Kram.h"
+#include "KramMmapHelper.h"
+#include "KramLog.h"
+#include "KTXImage.h"
+#include "KramImage.h" // for KramDecoder
+
+#include <CoreGraphics/CoreGraphics.h>
+
+//@import Accelerate // for vimage
+#import <Accelerate/Accelerate.h>
+
+using namespace kram;
+
+@implementation KramThumbnailProvider
+
+- (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest *)request completionHandler:(void (^)(QLThumbnailReply * _Nullable, NSError * _Nullable))handler {
+
+    // This
+    // Second way: Draw the thumbnail into a context passed to your block, set up with Core Graphics's coordinate system.
+    handler([QLThumbnailReply replyWithContextSize:request.maximumSize drawingBlock:^BOOL(CGContextRef  _Nonnull context)
+    {
+         const char* file = [request.fileURL fileSystemRepresentation];
+         
+         if (!(endsWith(file, ".ktx") || endsWith(file, ".ktx2"))) {
+             return NO;
+         }
+         
+         // load the mmap file, and interpret it as a KTXImage
+         MmapHelper mmapHelper;
+         if (!mmapHelper.open(file)) {
+             return NO;
+         }
+         
+         // open but leave the image compressed if KTX2 + zstd
+         bool isInfoOnly = true;
+         
+         KTXImage image;
+         if (!image.open(mmapHelper.data(), mmapHelper.dataLength(), isInfoOnly)) {
+             return NO;
+         }
+         
+        // no BC6 or ASTC HDR yet for thumbs, just do LDR first
+        if (isHdrFormat(image.pixelFormat)) {
+            return NO;
+        }
+        
+        // TODO: hookup to whether content is already premul with alpha
+        // will have to come from props. ASTC always 4 channels but may hold other daa.
+        bool isPremul = numChannelsOfFormat(image.pixelFormat) >= 4;
+        
+        // unpack a level to get the blocks
+        uint32_t mipNumber = 0;
+        
+        uint32_t w, h, d;
+        for (uint32_t i = 0; i < image.header.numberOfMipmapLevels; ++i) {
+            image.mipDimensions(i, w, h, d);
+            if (w > request.maximumSize.width || h > request.maximumSize.height) {
+                mipNumber++;
+            }
+        }
+        
+        // clamp to smallest
+        mipNumber = std::min(mipNumber, image.header.numberOfMipmapLevels);
+        image.mipDimensions(mipNumber, w, h, d);
+        
+        uint32_t chunkNum = 0; // TODO: could embed chunk(s) to gen thumbnail from, cube/array?
+        uint32_t numChunks = image.totalChunks();
+        
+        vector<uint8_t> mipData;
+        
+         // then decode any blocks to rgba8u, not dealing with HDR formats yet
+        if (image.isSupercompressed()) {
+            const uint8_t* srcData = image.fileData + image.mipLevels[mipNumber].offset;
+            
+            mipData.resize(image.mipLevels[mipNumber].length * numChunks);
+            uint8_t* dstData = mipData.data();
+            if (!image.unpackLevel(mipNumber, srcData, dstData)) {
+                return NO;
+            }
+        }
+        
+        // now extract the chunk for the thumbnail out of that level
+        if (numChunks > 1) {
+            macroUnusedVar(chunkNum);
+            assert(chunkNum == 0);
+            
+            // this just truncate to chunk 0 instead of copying chunkNum first
+            mipData.resize(image.mipLevels[mipNumber].length);
+        }
+        
+        // new decode the blocks in that chunk to
+        KTXImage imageDecoded;
+        if (isBlockFormat(image.pixelFormat)) {
+            
+            KramDecoder decoder;
+            KramDecoderParams params;
+            
+            vector<uint8_t> dstMipData;
+            
+            // want to just decode one chunk of the level that was unpacked abovve
+            if (!decoder.decodeBlocks(w, h, mipData.data(), mipData.size(), image.pixelFormat, dstMipData, params)) {
+                return NO;
+            }
+            
+            mipData = dstMipData;
+        }
+        
+        // https://developer.apple.com/library/archive/documentation/GraphicsImaging/Conceptual/drawingwithquartz2d/dq_images/dq_images.html#//apple_ref/doc/uid/TP30001066-CH212-TPXREF101
+
+        uint32_t rowBytes = w * sizeof(uint32_t);
+
+        // use vimage in the Accelerate.framework
+        // https://developer.apple.com/library/archive/releasenotes/Performance/RN-vecLib/index.html#//apple_ref/doc/uid/TP40001049
+
+        vImage_Buffer buf = { mipData.data(), h, w, rowBytes };
+
+        // Declare the pixel format for the vImage_Buffer
+        vImage_CGImageFormat format = {
+         .bitsPerComponent = 8,
+         .bitsPerPixel = 32,
+         };
+        
+        format.bitmapInfo = kCGBitmapByteOrderDefault | (isPremul ? kCGImageAlphaPremultipliedLast: kCGImageAlphaLast);
+        
+        // don't need to allocate, can requse memory from mip
+
+        // TODO: might want to convert to PNG, but maybe thumbnail system does that automatically?
+        // see how big thumbs.db is after running this
+        
+        //CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB();
+        vImage_Error err = 0;
+        CGImageRef cgImage = vImageCreateCGImageFromBuffer( &buf, &format, NULL, NULL, kvImageNoAllocate, &err);
+
+        CGRect rect = CGRectMake(0, 0, w, h);
+
+        // The image is scaled—disproportionately, if necessary—to fit the bounds
+        // specified by the rect parameter.
+        CGContextDrawImage(context, rect, cgImage);
+
+        return YES;
+     }], nil);
+}
+
+@end
diff --git a/kram-thumb/kram_thumb.entitlements b/kram-thumb/kram_thumb.entitlements
new file mode 100644
index 00000000..f2ef3ae0
--- /dev/null
+++ b/kram-thumb/kram_thumb.entitlements
@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+    <key>com.apple.security.app-sandbox</key>
+    <true/>
+    <key>com.apple.security.files.user-selected.read-only</key>
+    <true/>
+</dict>
+</plist>
diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index 7d75a90d..e2612552 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -96,15 +96,16 @@ bool isDecodeImageNeeded(MyMTLPixelFormat pixelFormat) {
 
 bool decodeImage(KTXImage& image, KTXImage& imageDecoded)
 {
-    Image imageUnused; // TODO: move to only using KTXImage, decode needs to move there
+    KramDecoderParams decoderParams;
+    KramDecoder decoder;
     
     if (isETCFormat(image.pixelFormat)) {
-        if (!imageUnused.decode(image, imageDecoded, kTexEncoderEtcenc, false, "")) {
+        if (!decoder.decode(image, imageDecoded, decoderParams)) {
             return NO;
         }
     }
     else if (isASTCFormat(image.pixelFormat)) {
-        if (!imageUnused.decode(image, imageDecoded, kTexEncoderAstcenc, false, "")) {
+        if (!decoder.decode(image, imageDecoded, decoderParams)) {
             return NO;
         }
     }
@@ -198,8 +199,8 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) {
 #if SUPPORT_RGB
     if (isInternalRGBFormat(image.pixelFormat)) {
         // loads and converts image from RGB to RGBA
-        Image rbgaImage;
-        if (!rbgaImage.loadImageFromKTX(image))
+        Image rgbaImage;
+        if (!rgbaImage.loadImageFromKTX(image))
             return nil;
         
         // re-encode it as a KTXImage, even though this is just a copy
@@ -214,7 +215,8 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) {
         ImageInfo dstImageInfo;
         dstImageInfo.initWithArgs(dstImageInfoArgs);
        
-        if (!rbgaImage.encode(dstImageInfo, rbgaImage2)) {
+        KramEncoder encoder;
+        if (!encoder.encode(dstImageInfo, rgbaImage, rbgaImage2)) {
             return nil;
         }
         
@@ -303,7 +305,7 @@ static uint32_t numberOfMipmapLevels(const Image& image) {
     
     [texture replaceRegion:region
                 mipmapLevel:0
-                  withBytes:sourceImage.pixels()
+                  withBytes:sourceImage.pixels().data()
                 bytesPerRow:bytesPerRow];
     
     // have to schedule autogen inside render using MTLBlitEncoder
diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index ff45debc..fa38aeec 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -554,6 +554,11 @@ bool isASTCFormat(MyMTLPixelFormat format)
     return it.isASTC();
 }
 
+bool isBlockFormat(MyMTLPixelFormat format)
+{
+    return isBCFormat(format) || isETCFormat(format) || isASTCFormat(format);
+}
+
 bool isExplicitFormat(MyMTLPixelFormat format)
 {
     const auto& it = formatInfo(format);
@@ -771,6 +776,16 @@ uint32_t KTXImage::mipLevelSize(uint32_t width_, uint32_t height_) const
     return count * size;
 }
 
+uint32_t KTXImage::mipLevelSize(uint32_t mipNumber) const
+{
+    uint32_t w = width;
+    uint32_t h = height;
+    uint32_t d = depth;
+    
+    mipDown(w, h, d, mipNumber);
+    return mipLevelSize(w, h);
+}
+
 uint32_t KTXImage::blockCountRows(uint32_t width_) const
 {
     assert(width_ >= 1);
diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h
index bcfb49b3..bc2bfae8 100644
--- a/libkram/kram/KTXImage.h
+++ b/libkram/kram/KTXImage.h
@@ -296,11 +296,7 @@ class KTXImage {
     uint32_t blockCount(uint32_t width_, uint32_t height_) const;
     uint32_t blockCountRows(uint32_t width_) const;
     
-    // mip data depends on format
-    uint32_t mipLevelSize(uint32_t width_, uint32_t height_) const;
-    //int totalMipLevels() const;
-    uint32_t totalChunks() const;
-
+   
     // this is where KTXImage holds all mip data internally
     void reserveImageData();
     vector<uint8_t>& imageData();
@@ -315,10 +311,21 @@ class KTXImage {
     bool unpackLevel(uint32_t mipNumber, const uint8_t* srcData, uint8_t* dstData);
     
     // helpers to work with the mipLevels array, mipLength and levelLength are important to get right
+    // mip data depends on format
+    
+    // mip
+    void mipDimensions(uint32_t mipNumber, uint32_t& width_, uint32_t& height_, uint32_t& depth_) const;
+    uint32_t mipLevelSize(uint32_t width_, uint32_t height_) const;
+    uint32_t mipLevelSize(uint32_t mipNumber) const;
     size_t mipLengthLargest() const { return mipLevels[0].length; }
     size_t mipLength(uint32_t mipNumber) const { return mipLevels[mipNumber].length; }
+    
+    // level
     size_t levelLength(uint32_t mipNumber) const { return mipLevels[mipNumber].length * totalChunks(); }
     size_t levelLengthCompressed(uint32_t mipNumber) const { return mipLevels[mipNumber].lengthCompressed; }
+    
+    // chunk
+    uint32_t totalChunks() const;
     size_t chunkOffset(uint32_t mipNumber, uint32_t chunkNumber) const { return mipLevels[mipNumber].offset + mipLevels[mipNumber].length * chunkNumber; }
     
     
@@ -354,6 +361,45 @@ class KTXImage {
     const uint8_t* fileData;  // mmap data
 };
 
+// GL/D3D hobbled non-pow2 mips by only supporting round down, not round up
+// And then Metal followed OpenGL since it's the same hw and drivers.
+// Round up adds an extra mip level to the chain, but results in much better filtering.
+// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_non_power_of_two.txt
+// http://download.nvidia.com/developer/Papers/2005/NP2_Mipmapping/NP2_Mipmap_Creation.pdf
+inline void mipDown(int32_t& w, int32_t& h, int32_t& d, uint32_t lod = 1)
+{
+    // round-down
+    w >>= (int32_t)lod;
+    h >>= (int32_t)lod;
+    d >>= (int32_t)lod;
+    
+    if (w < 1) w = 1;
+    if (h < 1) h = 1;
+    if (d < 1) d = 1;
+}
+
+inline void mipDown(uint32_t& w, uint32_t& h, uint32_t& d, uint32_t lod = 1)
+{
+    // round-down
+    w >>= lod;
+    h >>= lod;
+    d >>= lod;
+    
+    if (w < 1) w = 1;
+    if (h < 1) h = 1;
+    if (d < 1) d = 1;
+}
+
+inline void KTXImage::mipDimensions(uint32_t mipNumber, uint32_t& width_, uint32_t& height_, uint32_t& depth_) const {
+    assert(mipNumber < mipLevels.size());
+           
+    width_ = width;
+    height_ = height;
+    depth_ = depth;
+    
+    mipDown(width_, height_, depth_, mipNumber);
+}
+
 const char* supercompressionName(KTX2Supercompression type);
 
 // Generic format helpers.  All based on the ubiquitous type.
@@ -368,6 +414,7 @@ bool isSignedFormat(MyMTLPixelFormat format);
 bool isBCFormat(MyMTLPixelFormat format);
 bool isETCFormat(MyMTLPixelFormat format);
 bool isASTCFormat(MyMTLPixelFormat format);
+bool isBlockFormat(MyMTLPixelFormat format);
 bool isExplicitFormat(MyMTLPixelFormat format);
 
 Int2 blockDimsOfFormat(MyMTLPixelFormat format);
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index aaacdbc0..84dc571a 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -1562,15 +1562,14 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage,
     if (isVerbose) {
         // dump mips/dims, but this can be a lot of data on arrays
         int32_t mipLevel = 0;
-        int32_t w = srcImage.width;
-        int32_t h = srcImage.height;
-        int32_t d = srcImage.depth; 
         
         // num chunks
         append_sprintf(info, "chun: %d\n", numChunks);
         
         for (const auto& mip : srcImage.mipLevels) {
-
+            uint32_t w, h, d;
+            srcImage.mipDimensions(mipLevel, w, h, d);
+            
             switch (textureType) {
                 case MyMTLTextureType3D:
                 append_sprintf(info,
@@ -1605,9 +1604,6 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage,
                     mip.length // only size of one mip right now, not mip * numChunks
                 );
             }
-            
-            // drop a mip level
-            mipDown(w, h, d);
         }
     }
 
@@ -1768,8 +1764,13 @@ static int32_t kramAppDecode(vector<const char*>& args)
               encoderName(textureDecoder));
     }
     
-    Image tmpImage;  // just to call decode
-    success = success && tmpImage.decode(srcImage, tmpFileHelper.pointer(), textureDecoder, isVerbose, swizzleText);
+    KramDecoderParams params;
+    params.isVerbose = isVerbose;
+    params.decoder = textureDecoder;
+    params.swizzleText = swizzleText;
+    
+    KramDecoder decoder;  // just to call decode
+    success = success && decoder.decode(srcImage, tmpFileHelper.pointer(), params);
 
     // rename to dest filepath, note this only occurs if above succeeded
     // so any existing files are left alone on failure.
@@ -2214,7 +2215,7 @@ static int32_t kramAppEncode(vector<const char*>& args)
     // so now can complete validation knowing hdr vs. ldr input
     // this checks the dst format
     if (success) {
-        bool isHDR = srcImage.pixelsFloat() != nullptr;
+        bool isHDR = !srcImage.pixelsFloat().empty();
 
         if (isHDR) {
             MyMTLPixelFormat format = info.pixelFormat;
@@ -2272,7 +2273,8 @@ static int32_t kramAppEncode(vector<const char*>& args)
         }
         
         if (success) {
-            success = srcImage.encode(info, tmpFileHelper.pointer());
+            KramEncoder encoder;
+            success = encoder.encode(info, srcImage, tmpFileHelper.pointer());
             
             if (!success) {
                 KLOGE("Kram", "encode failed");
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index e3b84c0a..1a485c73 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -378,43 +378,6 @@ inline half4 toHalf4(const float4& vv)
 
 //---------------------------------------
 
-
-inline void mipDown(int32_t& w, int32_t& h, int32_t& d, uint32_t lod = 1)
-{
-    // GL/D3D hobbled non-pow2 mips by only supporting round down, not round up
-    // And then Metal followed OpenGL since it's the same hw and drivers.
-    // Round up adds an extra mip level to the chain, but results in much better filtering.
-    // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_non_power_of_two.txt
-    // http://download.nvidia.com/developer/Papers/2005/NP2_Mipmapping/NP2_Mipmap_Creation.pdf
-    
-    // round-down
-    w >>= (int32_t)lod;
-    h >>= (int32_t)lod;
-    d >>= (int32_t)lod;
-    
-    if (w < 1) w = 1;
-    if (h < 1) h = 1;
-    if (d < 1) d = 1;
-}
-
-inline void mipDown(uint32_t& w, uint32_t& h, uint32_t& d, uint32_t lod = 1)
-{
-    // GL/D3D hobbled non-pow2 mips by only supporting round down, not round up
-    // And then Metal followed OpenGL since it's the same hw and drivers.
-    // Round up adds an extra mip level to the chain, but results in much better filtering.
-    // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_non_power_of_two.txt
-    // http://download.nvidia.com/developer/Papers/2005/NP2_Mipmapping/NP2_Mipmap_Creation.pdf
-    
-    // round-down
-    w >>= lod;
-    h >>= lod;
-    d >>= lod;
-    
-    if (w < 1) w = 1;
-    if (h < 1) h = 1;
-    if (d < 1) d = 1;
-}
-
 // Use this on vectors
 #include <vector>
 
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index 98b1450c..f218fe8c 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -322,7 +322,7 @@ bool Image::loadImageFromPixels(const vector<uint8_t>& pixels, int32_t width,
 // BC1nm + b average.  That way color endpoints are of some use rather than just
 // being set ot 0.  This runs counter to ASTC L+A mode though which eliminates
 // the endpoint storage.
-void Image::averageChannelsInBlock(
+void KramEncoder::averageChannelsInBlock(
     const char* averageChannels, const KTXImage& image, ImageData& srcImage,
     vector<Color>& tmpImageData8) const  // otherwise, it's BlueAlpha averaging
 {
@@ -413,19 +413,319 @@ static bool writeDataAtOffset(const uint8_t* data, size_t dataSize, size_t dataO
     return true;
 }
 
-bool Image::decode(const KTXImage& srcImage, FILE* dstFile, TexEncoder decoder, bool isVerbose, const string& swizzleText) const
+bool KramDecoder::decode(const KTXImage& srcImage, FILE* dstFile, const KramDecoderParams& params) const
 {
     KTXImage dstImage; // thrown out, data written to file
-    return decodeImpl(srcImage, dstFile, dstImage, decoder, isVerbose, swizzleText);
+    return decodeImpl(srcImage, dstFile, dstImage, params);
 }
 
-bool Image::decode(const KTXImage& srcImage, KTXImage& dstImage, TexEncoder decoder, bool isVerbose, const string& swizzleText) const
+bool KramDecoder::decode(const KTXImage& srcImage, KTXImage& dstImage, const KramDecoderParams& params) const
 {
-    return decodeImpl(srcImage, nullptr, dstImage, decoder, isVerbose, swizzleText);
+    return decodeImpl(srcImage, nullptr, dstImage, params);
 }
 
-bool Image::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage& dstImage, TexEncoder decoder, bool isVerbose, const string& swizzleText) const
+bool KramDecoder::decodeBlocks(
+            int32_t w, int32_t h,
+            const uint8_t* blockData, uint32_t blockDataSize, MyMTLPixelFormat blockFormat,
+            vector<uint8_t>& outputTexture, // currently Color
+            const KramDecoderParams& params) const
 {
+    
+    bool success = false;
+    
+    // could tie use flags to format filter, or encoder settings
+    // or may want to disable if decoders don't gen correct output
+    TexEncoder decoder = params.decoder;
+#if COMPILE_ATE
+    // Encode/decode formats differ depending on library version
+    // but it's likely the fastest decoder.  Only on macOS/iOS.
+    bool useATE = decoder == kTexEncoderATE;
+#endif
+#if COMPILE_SQUISH
+    bool useSquish = decoder == kTexEncoderSquish;
+#endif
+#if COMPILE_BCENC
+    bool useBcenc = decoder == kTexEncoderBcenc;
+#endif
+#if COMPILE_ASTCENC
+    bool useAstcenc = decoder == kTexEncoderAstcenc;
+#endif
+    
+    // TODO: hook to block decode logic below
+    // copy srcData if using ATE, it says it needs 16-byte aligned data for encode
+    // and assume for decode too.  Output texture is already 16-byte aligned.
+    const uint8_t* srcData = blockData;
+    vector<uint8_t> srcTexture;
+    if (useATE && (((uintptr_t)srcData & 15) != 0)) {
+        srcTexture.resize(blockDataSize);
+        memcpy(srcTexture.data(), srcData, blockDataSize);
+        srcData = srcTexture.data();
+    }
+
+    Int2 blockDims = blockDimsOfFormat(blockFormat);
+    bool isVerbose = params.isVerbose;
+    const string& swizzleText = params.swizzleText;
+    bool isHDR = isHdrFormat(blockFormat);
+    
+    // start decoding after format pulled from KTX file
+    if (isBCFormat(blockFormat)) {
+        // bc via ate, or squish for bc1-5 if on other platforms
+        // bcenc also likely has decode for bc7
+        if (false) {
+            // just to chain if/else
+        }
+#if COMPILE_BCENC
+        else if (useBcenc) {
+            Color* dstPixels = (Color*)outputTexture.data();
+
+            const int32_t blockDim = 4;
+            int32_t blocks_x = (w + blockDim - 1) / blockDim;
+            //int32_t blocks_y = (h + blockDim - 1) / blockDim;
+            int32_t blockSize = blockSizeOfFormat(blockFormat);
+
+            for (int32_t y = 0; y < h; y += blockDim) {
+                for (int32_t x = 0; x < w; x += blockDim) {
+                    int32_t bbx = x / blockDim;
+                    int32_t bby = y / blockDim;
+                    int32_t bb0 = bby * blocks_x + bbx;
+                    const uint8_t* srcBlock = &srcData[bb0 * blockSize];
+
+                    // decode into temp 4x4 pixels
+                    Color pixels[blockDim * blockDim];
+
+                    success = true;
+
+                    switch (blockFormat) {
+                        case MyMTLPixelFormatBC1_RGBA:
+                        case MyMTLPixelFormatBC1_RGBA_sRGB:
+                            // Returns true if the block uses 3 color punchthrough alpha mode.
+                            rgbcx::unpack_bc1(srcBlock, pixels);
+                            break;
+                        case MyMTLPixelFormatBC3_RGBA_sRGB:
+                        case MyMTLPixelFormatBC3_RGBA:
+                            // Returns true if the block uses 3 color punchthrough alpha mode.
+                            rgbcx::unpack_bc3(srcBlock, pixels);
+                            break;
+                        case MyMTLPixelFormatBC4_RSnorm:
+                        case MyMTLPixelFormatBC4_RUnorm:
+                            rgbcx::unpack_bc4(srcBlock, (uint8_t*)pixels);
+                            break;
+                        case MyMTLPixelFormatBC5_RGSnorm:
+                        case MyMTLPixelFormatBC5_RGUnorm:
+                            rgbcx::unpack_bc5(srcBlock, pixels);
+                            break;
+
+                        case MyMTLPixelFormatBC7_RGBAUnorm:
+                        case MyMTLPixelFormatBC7_RGBAUnorm_sRGB:
+                            bc7decomp::unpack_bc7(srcBlock, (bc7decomp::color_rgba*)pixels);
+                            break;
+
+                        default:
+                            KLOGE("Image", "decode unsupported format");
+                            success = false;
+                            break;
+                    }
+
+                    if (!success) {
+                        return false;
+                    }
+
+                    // copy temp pixels to outputTexture
+                    for (int32_t by = 0; by < blockDim; ++by) {
+                        int32_t yy = y + by;
+                        if (yy >= h) {
+                            break;
+                        }
+
+                        for (int32_t bx = 0; bx < blockDim; ++bx) {
+                            int32_t xx = x + bx;
+                            if (xx >= w) {
+                                break;  // go to next y above
+                            }
+
+                            dstPixels[yy * w + xx] = pixels[by * blockDim + bx];
+                        }
+                    }
+                }
+            }
+        }
+#endif
+#if COMPILE_SQUISH
+        else if (useSquish) {
+            squish::TexFormat format = squish::kBC1;
+
+            success = true;
+
+            switch (blockFormat) {
+                case MyMTLPixelFormatBC1_RGBA:
+                case MyMTLPixelFormatBC1_RGBA_sRGB:
+                    format = squish::kBC1;
+                    break;
+                case MyMTLPixelFormatBC3_RGBA_sRGB:
+                case MyMTLPixelFormatBC3_RGBA:
+                    format = squish::kBC3;
+                    break;
+                case MyMTLPixelFormatBC4_RSnorm:
+                case MyMTLPixelFormatBC4_RUnorm:
+                    format = squish::kBC4;
+                    break;
+                case MyMTLPixelFormatBC5_RGSnorm:
+                case MyMTLPixelFormatBC5_RGUnorm:
+                    format = squish::kBC5;
+                    break;
+                default:
+                    KLOGE("Image", "decode unsupported format");
+                    success = false;
+                    break;
+            }
+
+            if (success) {
+                // only handles bc1,3,4,5
+                squish::DecompressImage(outputTexture.data(), w, h, srcData, format);
+                success = true;
+            }
+        }
+#endif
+#if COMPILE_ATE
+        else if (useATE) {
+            ATEEncoder encoder;
+            success = encoder.Decode(blockFormat, blockDataSize, blockDims.y,
+                                     isVerbose,
+                                     w, h, srcData, outputTexture.data());
+        }
+#endif
+    }
+    else if (isETCFormat(blockFormat)) {
+        // etc via etc2comp
+#if COMPILE_ETCENC
+        Etc::Image::Format format = Etc::Image::Format::R11;
+
+        success = true;
+
+        switch (blockFormat) {
+            case MyMTLPixelFormatEAC_R11Unorm:
+                format = Etc::Image::Format::R11;
+                break;
+            case MyMTLPixelFormatEAC_R11Snorm:
+                format = Etc::Image::Format::SIGNED_R11;
+                break;
+            case MyMTLPixelFormatEAC_RG11Unorm:
+                format = Etc::Image::Format::RG11;
+                break;
+            case MyMTLPixelFormatEAC_RG11Snorm:
+                format = Etc::Image::Format::SIGNED_RG11;
+                break;
+
+            case MyMTLPixelFormatETC2_RGB8:
+                format = Etc::Image::Format::RGB8;
+                break;
+            case MyMTLPixelFormatETC2_RGB8_sRGB:
+                format = Etc::Image::Format::SRGB8;
+                break;
+            case MyMTLPixelFormatEAC_RGBA8:
+                format = Etc::Image::Format::RGBA8;
+                break;
+            case MyMTLPixelFormatEAC_RGBA8_sRGB:
+                format = Etc::Image::Format::SRGBA8;
+                break;
+
+            default:
+                KLOGE("Image", "decode unsupported format");
+                success = false;
+                break;
+        }
+
+        if (success) {
+            Etc::Image etcImage(format, nullptr,
+                                w, h, Etc::ErrorMetric::NUMERIC);
+
+            success = etcImage.Decode(srcData, outputTexture.data()) == Etc::Image::SUCCESS;
+        }
+#endif
+    }
+    else if (isASTCFormat(blockFormat)) {
+        // ate can decode more than it encodes
+        if (false) {
+            // just to chain if/else
+        }
+#if COMPILE_ASTCENC
+        else if (useAstcenc) {
+            // decode the mip
+            astcenc_image dstImageASTC;
+            dstImageASTC.dim_x = w;
+            dstImageASTC.dim_y = h;
+            dstImageASTC.dim_z = 1;  // Not using 3D blocks, not supported on iOS
+            //dstImageASTC.dim_pad = 0;
+            dstImageASTC.data_type = ASTCENC_TYPE_U8;
+            
+            
+            // encode/encode still setup on array of 2d slices, so need address of data
+            uint8_t* outData = outputTexture.data();
+            dstImageASTC.data = (void**)&outData;
+
+            uint32_t srcDataLength = blockDataSize;
+           
+            astcenc_profile profile;
+            profile = ASTCENC_PRF_LDR;  // isSrgb ? ASTCENC_PRF_LDR_SRGB : ASTCENC_PRF_LDR;
+            if (isHDR) {
+                profile = ASTCENC_PRF_HDR;  // TODO: also ASTCENC_PRF_HDR_RGB_LDR_A
+            }
+
+            astcenc_config config;
+            astcenc_error error = astcenc_config_init(
+                profile, blockDims.x, blockDims.y, 1, ASTCENC_PRE_FAST, ASTCENC_FLG_DECOMPRESS_ONLY, &config);
+            if (error != ASTCENC_SUCCESS) {
+                return false;
+            }
+
+            astcenc_context* codec_context = nullptr;
+            error = astcenc_context_alloc(&config, 1, &codec_context);
+            if (error != ASTCENC_SUCCESS) {
+                return false;
+            }
+            // no swizzle
+            astcenc_swizzle swizzleDecode = {ASTCENC_SWZ_R, ASTCENC_SWZ_G, ASTCENC_SWZ_B, ASTCENC_SWZ_A};
+
+            error = astcenc_decompress_image(codec_context, srcData, srcDataLength, &dstImageASTC, swizzleDecode, 0);
+
+            astcenc_context_free(codec_context);
+
+            success = (error == ASTCENC_SUCCESS);
+        }
+#endif
+#if COMPILE_ATE
+        else if (useATE) {
+            // this decods all except hdr/bc6
+            ATEEncoder encoder;
+            success = encoder.Decode(blockFormat, blockDataSize, blockDims.y,
+                                     isVerbose,
+                                     w, h, srcData, outputTexture.data());
+        }
+#endif
+    }
+    else {
+        KLOGE("Image", "unsupported pixel format for decode");
+        success = false;
+    }
+
+    // stop processing mips, since failed above
+    if (!success) {
+        return false;
+    }
+
+    // swizzle the data back to a more viewable layout (f.e. gggr -> rg01)
+    // This swizzleText is currently explicit, but could be reversed from prop of content channels and preswizzle.
+    // It's hard to specify this swizzle for arbitrary content otherwise.
+    if (!swizzleText.empty()) {
+        ImageInfo::swizzleTextureLDR(w, h, (Color*)outputTexture.data(), swizzleText.c_str());
+    }
+    
+    return true;
+}
+
+bool KramDecoder::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage& dstImage, const KramDecoderParams& params) const
+{
+    
     // read existing KTX file into mip offset, then start decoding the blocks
     // and write these to 8u,16f,32f ktx with mips
     // write out KTXHeader for the explicit image, this should be similar to other code
@@ -436,8 +736,7 @@ bool Image::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage& dstIma
    
     MyMTLPixelFormat pixelFormat = srcImage.pixelFormat;
     bool isSrgb = isSrgbFormat(pixelFormat);
-    bool isHDR = isHdrFormat(pixelFormat);
-
+    
     // setup dstImage
     //KTXImage dstImage;
     dstImage = srcImage;  // copy src (name-value pairs copied too)
@@ -485,7 +784,6 @@ bool Image::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage& dstIma
         headerCopy.pixelDepth = 0;
     }
     
-   
     // write the header out
     if (!writeDataAtOffset((const uint8_t*)&headerCopy, sizeof(KTXHeader), 0, dstFile, dstImage)) {
         return false;
@@ -500,305 +798,31 @@ bool Image::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage& dstIma
 
     vector<uint8_t> outputTexture;
     vector<uint8_t> srcTexture;
-
-    // could tie use flags to format filter, or encoder settings
-    // or may want to disable if decoders don't gen correct output
-#if COMPILE_ATE
-    // Encode/decode formats differ depending on library version
-    // but it's likely the fastest decoder.  Only on macOS/iOS.
-    bool useATE = decoder == kTexEncoderATE;
-#endif
-#if COMPILE_SQUISH
-    bool useSquish = decoder == kTexEncoderSquish;
-#endif
-#if COMPILE_BCENC
-    bool useBcenc = decoder == kTexEncoderBcenc;
-#endif
-#if COMPILE_ASTCENC
-    bool useAstcenc = decoder == kTexEncoderAstcenc;
-#endif
-
+    
     // DONE: walk chunks here and seek to src and dst offsets in conversion
     // make sure to walk chunks in the exact same order they are written, array then face, or slice
     
-    int32_t w = 0;
-    int32_t h = 0;
-    int32_t d = 0;
-    
-    for (int32_t chunk = 0; chunk < numChunks; ++chunk) {
-        w = srcImage.width;
-        h = srcImage.height;
-        d = srcImage.depth;
+    for (uint32_t i = 0; i < srcImage.header.numberOfMipmapLevels; ++i) {
+        // TODO: to decode compressed KTX2 want to walk all chunks of a single level
+        // after decompressing the level.   This isn't doing unpackLevel and needs to here.
+        assert(!srcImage.isSupercompressed());
+        
+        uint32_t w, h, d;
+        srcImage.mipDimensions(i, w, h, d);
         
-        for (int32_t i = 0; i < (int32_t)srcImage.header.numberOfMipmapLevels; ++i) {
+        for (int32_t chunk = 0; chunk < numChunks; ++chunk) {
+      
             const KTXImageLevel& dstMipLevel = dstImage.mipLevels[i];
             outputTexture.resize(dstMipLevel.length);
 
             const KTXImageLevel& srcMipLevel = srcImage.mipLevels[i];
             const uint8_t* srcData = srcImage.fileData + srcMipLevel.offset + chunk * srcMipLevel.length;
             
-            // copy srcData if using ATE, it says it needs 16-byte aligned data for encode
-            // and assume for decode too.  Output texture is already 16-byte aligned.
-            if (((uintptr_t)srcData & 15) != 0) {
-                srcTexture.resize(srcMipLevel.length);
-                memcpy(srcTexture.data(), srcData, srcMipLevel.length);
-                srcData = srcTexture.data();
-            }
-
-            // start decoding after format pulled from KTX file
-            if (isBCFormat(pixelFormat)) {
-                // bc via ate, or squish for bc1-5 if on other platforms
-                // bcenc also likely has decode for bc7
-                if (false) {
-                    // just to chain if/else
-                }
-    #if COMPILE_BCENC
-                else if (useBcenc) {
-                    Color* dstPixels = (Color*)outputTexture.data();
-
-                    const int32_t blockDim = 4;
-                    int32_t blocks_x = (w + blockDim - 1) / blockDim;
-                    //int32_t blocks_y = (h + blockDim - 1) / blockDim;
-                    int32_t blockSize = blockSizeOfFormat(pixelFormat);
-
-                    for (int32_t y = 0; y < h; y += blockDim) {
-                        for (int32_t x = 0; x < w; x += blockDim) {
-                            int32_t bbx = x / blockDim;
-                            int32_t bby = y / blockDim;
-                            int32_t bb0 = bby * blocks_x + bbx;
-                            const uint8_t* srcBlock = &srcData[bb0 * blockSize];
-
-                            // decode into temp 4x4 pixels
-                            Color pixels[blockDim * blockDim];
-
-                            success = true;
-
-                            switch (pixelFormat) {
-                                case MyMTLPixelFormatBC1_RGBA:
-                                case MyMTLPixelFormatBC1_RGBA_sRGB:
-                                    // Returns true if the block uses 3 color punchthrough alpha mode.
-                                    rgbcx::unpack_bc1(srcBlock, pixels);
-                                    break;
-                                case MyMTLPixelFormatBC3_RGBA_sRGB:
-                                case MyMTLPixelFormatBC3_RGBA:
-                                    // Returns true if the block uses 3 color punchthrough alpha mode.
-                                    rgbcx::unpack_bc3(srcBlock, pixels);
-                                    break;
-                                case MyMTLPixelFormatBC4_RSnorm:
-                                case MyMTLPixelFormatBC4_RUnorm:
-                                    rgbcx::unpack_bc4(srcBlock, (uint8_t*)pixels);
-                                    break;
-                                case MyMTLPixelFormatBC5_RGSnorm:
-                                case MyMTLPixelFormatBC5_RGUnorm:
-                                    rgbcx::unpack_bc5(srcBlock, pixels);
-                                    break;
-
-                                case MyMTLPixelFormatBC7_RGBAUnorm:
-                                case MyMTLPixelFormatBC7_RGBAUnorm_sRGB:
-                                    bc7decomp::unpack_bc7(srcBlock, (bc7decomp::color_rgba*)pixels);
-                                    break;
-
-                                default:
-                                    KLOGE("Image", "decode unsupported format");
-                                    success = false;
-                                    break;
-                            }
-
-                            if (!success) {
-                                return false;
-                            }
-
-                            // copy temp pixels to outputTexture
-                            for (int32_t by = 0; by < blockDim; ++by) {
-                                int32_t yy = y + by;
-                                if (yy >= h) {
-                                    break;
-                                }
-
-                                for (int32_t bx = 0; bx < blockDim; ++bx) {
-                                    int32_t xx = x + bx;
-                                    if (xx >= w) {
-                                        break;  // go to next y above
-                                    }
-
-                                    dstPixels[yy * w + xx] = pixels[by * blockDim + bx];
-                                }
-                            }
-                        }
-                    }
-                }
-    #endif
-    #if COMPILE_SQUISH
-                else if (useSquish) {
-                    squish::TexFormat format = squish::kBC1;
-
-                    success = true;
-
-                    switch (pixelFormat) {
-                        case MyMTLPixelFormatBC1_RGBA:
-                        case MyMTLPixelFormatBC1_RGBA_sRGB:
-                            format = squish::kBC1;
-                            break;
-                        case MyMTLPixelFormatBC3_RGBA_sRGB:
-                        case MyMTLPixelFormatBC3_RGBA:
-                            format = squish::kBC3;
-                            break;
-                        case MyMTLPixelFormatBC4_RSnorm:
-                        case MyMTLPixelFormatBC4_RUnorm:
-                            format = squish::kBC4;
-                            break;
-                        case MyMTLPixelFormatBC5_RGSnorm:
-                        case MyMTLPixelFormatBC5_RGUnorm:
-                            format = squish::kBC5;
-                            break;
-                        default:
-                            KLOGE("Image", "decode unsupported format");
-                            success = false;
-                            break;
-                    }
-
-                    if (success) {
-                        // only handles bc1,3,4,5
-                        squish::DecompressImage(outputTexture.data(), w, h, srcData, format);
-                        success = true;
-                    }
-                }
-    #endif
-    #if COMPILE_ATE
-                else if (useATE) {
-                    ATEEncoder encoder;
-                    success = encoder.Decode(pixelFormat, (int32_t)srcMipLevel.length, srcImage.blockDims().y,
-                                             isVerbose,
-                                             w, h, srcData, outputTexture.data());
-                }
-    #endif
-            }
-            else if (isETCFormat(pixelFormat)) {
-                // etc via etc2comp
-    #if COMPILE_ETCENC
-                Etc::Image::Format format = Etc::Image::Format::R11;
-
-                success = true;
-
-                switch (pixelFormat) {
-                    case MyMTLPixelFormatEAC_R11Unorm:
-                        format = Etc::Image::Format::R11;
-                        break;
-                    case MyMTLPixelFormatEAC_R11Snorm:
-                        format = Etc::Image::Format::SIGNED_R11;
-                        break;
-                    case MyMTLPixelFormatEAC_RG11Unorm:
-                        format = Etc::Image::Format::RG11;
-                        break;
-                    case MyMTLPixelFormatEAC_RG11Snorm:
-                        format = Etc::Image::Format::SIGNED_RG11;
-                        break;
-
-                    case MyMTLPixelFormatETC2_RGB8:
-                        format = Etc::Image::Format::RGB8;
-                        break;
-                    case MyMTLPixelFormatETC2_RGB8_sRGB:
-                        format = Etc::Image::Format::SRGB8;
-                        break;
-                    case MyMTLPixelFormatEAC_RGBA8:
-                        format = Etc::Image::Format::RGBA8;
-                        break;
-                    case MyMTLPixelFormatEAC_RGBA8_sRGB:
-                        format = Etc::Image::Format::SRGBA8;
-                        break;
-
-                    default:
-                        KLOGE("Image", "decode unsupported format");
-                        success = false;
-                        break;
-                }
-
-                if (success) {
-                    Etc::Image etcImage(format, nullptr,
-                                        w, h, Etc::ErrorMetric::NUMERIC);
-
-                    success = etcImage.Decode(srcData, outputTexture.data()) == Etc::Image::SUCCESS;
-                }
-    #endif
-            }
-            else if (isASTCFormat(pixelFormat)) {
-                // ate can decode more than it encodes
-                if (false) {
-                    // just to chain if/else
-                }
-    #if COMPILE_ASTCENC
-                else if (useAstcenc) {
-                    // decode the mip
-                    astcenc_image dstImageASTC;
-                    dstImageASTC.dim_x = w;
-                    dstImageASTC.dim_y = h;
-                    dstImageASTC.dim_z = 1;  // Not using 3D blocks, not supported on iOS
-                    //dstImageASTC.dim_pad = 0;
-                    dstImageASTC.data_type = ASTCENC_TYPE_U8;
-                    
-                    
-                    // encode/encode still setup on array of 2d slices, so need address of data
-                    uint8_t* outData = outputTexture.data();
-                    dstImageASTC.data = (void**)&outData;
-
-                    int32_t srcDataLength = (int32_t)srcMipLevel.length;
-                    Int2 blockDims = srcImage.blockDims();
-
-                    astcenc_profile profile;
-                    profile = ASTCENC_PRF_LDR;  // isSrgb ? ASTCENC_PRF_LDR_SRGB : ASTCENC_PRF_LDR;
-                    if (isHDR) {
-                        profile = ASTCENC_PRF_HDR;  // TODO: also ASTCENC_PRF_HDR_RGB_LDR_A
-                    }
-
-                    astcenc_config config;
-                    astcenc_error error = astcenc_config_init(
-                        profile, blockDims.x, blockDims.y, 1, ASTCENC_PRE_FAST, ASTCENC_FLG_DECOMPRESS_ONLY, &config);
-                    if (error != ASTCENC_SUCCESS) {
-                        return false;
-                    }
-
-                    astcenc_context* codec_context = nullptr;
-                    error = astcenc_context_alloc(&config, 1, &codec_context);
-                    if (error != ASTCENC_SUCCESS) {
-                        return false;
-                    }
-                    // no swizzle
-                    astcenc_swizzle swizzleDecode = {ASTCENC_SWZ_R, ASTCENC_SWZ_G, ASTCENC_SWZ_B, ASTCENC_SWZ_A};
-
-                    error = astcenc_decompress_image(codec_context, srcData, srcDataLength, &dstImageASTC, swizzleDecode, 0);
-
-                    astcenc_context_free(codec_context);
-
-                    success = (error == ASTCENC_SUCCESS);
-                }
-    #endif
-    #if COMPILE_ATE
-                else if (useATE) {
-                    // this decods all except hdr/bc6
-                    ATEEncoder encoder;
-                    success = encoder.Decode(pixelFormat, (int32_t)srcMipLevel.length, srcImage.blockDims().y,
-                                             isVerbose,
-                                             w, h, srcData, outputTexture.data());
-                }
-    #endif
-            }
-            else {
-                KLOGE("Image", "unsupported pixel format for decode");
-                success = false;
-            }
-
-            // stop processing mips, since failed above
-            if (!success) {
-                break;
-            }
-
-            // swizzle the data back to a more viewable layout (f.e. gggr -> rg01)
-            // This swizzleText is currently explicit, but could be reversed from prop of content channels and preswizzle.
-            // It's hard to specify this swizzle for arbitrary content otherwise.
-            if (!swizzleText.empty()) {
-                ImageInfo::swizzleTextureLDR(w, h, (Color*)outputTexture.data(), swizzleText.c_str());
+            // decode the blocks to LDR RGBA8
+            if (!decodeBlocks(w, h, srcData, srcMipLevel.length, srcImage.pixelFormat, outputTexture, params)) {
+                return false;
             }
-
+            
             // write the mips out to the file, and code above can then decode into the same buffer
             // This isn't correct for cubes, arrays, and other types.  The mip length is only written out once for all mips.
             
@@ -823,9 +847,6 @@ bool Image::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage& dstIma
             if (!writeDataAtOffset(outputTexture.data(), dstMipLevel.length, dstMipOffset, dstFile, dstImage)) {
                 return false;
             }
-            
-            // next mip level
-            mipDown(w, h, d);
         }
     }
     
@@ -875,17 +896,17 @@ bool Image::resizeImage(int32_t wResize, int32_t hResize, bool resizePow2, Image
 }
 
 
-bool Image::encode(ImageInfo& info, KTXImage& dstImage) const
+bool KramEncoder::encode(ImageInfo& info, Image& singleImage,KTXImage& dstImage) const
 {
-    return encodeImpl(info, nullptr, dstImage);
+    return encodeImpl(info, singleImage, nullptr, dstImage);
 }
 
-bool Image::encode(ImageInfo& info, FILE* dstFile) const
+bool KramEncoder::encode(ImageInfo& info, Image& singleImage, FILE* dstFile) const
 {
     // dstImage will be ignored
     KTXImage dstImage;
     
-    return encodeImpl(info, dstFile, dstImage);
+    return encodeImpl(info, singleImage, dstFile, dstImage);
 }
 
 // Use this for in-place construction of mips
@@ -1181,7 +1202,7 @@ KTX2DescriptorFileBlock::KTX2DescriptorFileBlock(MyMTLPixelFormat format, bool i
     flags = isPremul ? KHR_DF_FLAG_ALPHA_PREMULTIPLIED : KHR_DF_FLAG_ALPHA_STRAIGHT;
 }
 
-void Image::addBaseProps(const ImageInfo& info, KTXImage& dstImage) const
+void KramEncoder::addBaseProps(const ImageInfo& info, KTXImage& dstImage) const
 {
     dstImage.addFormatProps();
 
@@ -1250,15 +1271,15 @@ struct ZSTDScope
 };
 
 
-bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const
+bool KramEncoder::encodeImpl(ImageInfo& info, Image& singleImage, FILE* dstFile, KTXImage& dstImage) const
 {
     KTXHeader& header = dstImage.header;
     MipConstructData mipConstructData;
     
     vector<Int2>& chunkOffsets = mipConstructData.chunkOffsets;
 
-    int32_t w = _width;
-    int32_t h = _height;
+    int32_t w = singleImage.width();
+    int32_t h = singleImage.height();
 
     // compute chunks, and adjust w/h based on that
     // the code allows a vertical or horizontal strip or grid of chunks
@@ -1325,7 +1346,7 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const
         // A better way would be to do mips in-place, but in-order, and compressing the large
         // to small mips into an array of open compressor streams.  Then only need one mip instead of
         // all levels in memory.
-        if (!writeKTX1FileOrImage(info,  mipConstructData, propsData, nullptr, dstImage)) {
+        if (!writeKTX1FileOrImage(info, singleImage, mipConstructData, propsData, nullptr, dstImage)) {
             return false;
         }
         
@@ -1533,7 +1554,7 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const
     }
     else {
         // this is purely ktx1 output path
-        if (!writeKTX1FileOrImage(info, mipConstructData, propsData, dstFile, dstImage)) {
+        if (!writeKTX1FileOrImage(info, singleImage, mipConstructData, propsData, dstFile, dstImage)) {
             return false;
         }
     }
@@ -1541,8 +1562,9 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const
     return true;
 }
 
-bool Image::writeKTX1FileOrImage(
+bool KramEncoder::writeKTX1FileOrImage(
      ImageInfo& info,
+     Image& singleImage,
      MipConstructData& mipConstructData,
      const vector<uint8_t>& propsData,
      FILE* dstFile, KTXImage& dstImage) const
@@ -1590,7 +1612,7 @@ bool Image::writeKTX1FileOrImage(
     }
 
     // build and weite out the mip data
-    if (!createMipsFromChunks(info, mipConstructData, dstFile, dstImage)) {
+    if (!createMipsFromChunks(info, singleImage, mipConstructData, dstFile, dstImage)) {
         return false;
     }
     
@@ -1665,8 +1687,9 @@ void printBCBlock(const uint8_t* bcBlock, MyMTLPixelFormat format) {
     }
 }
 
-bool Image::createMipsFromChunks(
+bool KramEncoder::createMipsFromChunks(
     ImageInfo& info,
+    Image& singleImage,
     MipConstructData& data,
     FILE* dstFile,
     KTXImage& dstImage
@@ -1721,14 +1744,14 @@ bool Image::createMipsFromChunks(
             srcImage.pixelsFloat = floatImage.data();
         }
         else {
-            srcImage.pixelsFloat = (float4*)_pixelsFloat.data();
+            srcImage.pixelsFloat = (float4*)singleImage.pixelsFloat().data();
         }
 
         // run this across all the source data
         // do this in-place before mips are generated
         if (doPremultiply) {
             if (info.isPrezero) {
-                for (const auto& pixel : _pixelsFloat) {
+                for (const auto& pixel : singleImage.pixelsFloat()) {
                     float alpha = pixel.w;
                     float4& pixelChange = const_cast<float4&>(pixel);
                     
@@ -1740,7 +1763,7 @@ bool Image::createMipsFromChunks(
                 }
             }
             else {
-                for (const auto& pixel : _pixelsFloat) {
+                for (const auto& pixel : singleImage.pixelsFloat()) {
                     float alpha = pixel.w;
                     float4& pixelChange = const_cast<float4&>(pixel);
                     pixelChange *= alpha;
@@ -1756,7 +1779,7 @@ bool Image::createMipsFromChunks(
             srcImage.pixels = copyImage.data();
         }
         else {
-            srcImage.pixels = (Color*)_pixels.data();
+            srcImage.pixels = (Color*)singleImage.pixels().data();
         }
 
         // used to store premul and linear color
@@ -1799,12 +1822,12 @@ bool Image::createMipsFromChunks(
         
         if (info.isHDR) {
             if (isMultichunk) {
-                const float4* srcPixels = (const float4*)_pixelsFloat.data();
+                const float4* srcPixels = (const float4*)singleImage.pixelsFloat().data();
                 for (int32_t y = 0; y < h; ++y) {
                     int32_t y0 = y * w;
                     
                     // offset into original strip/atlas
-                    int32_t yOffset = (y + chunkOffset.y) * _width + chunkOffset.x;
+                    int32_t yOffset = (y + chunkOffset.y) * singleImage.width() + chunkOffset.x;
 
                     for (int32_t x = 0; x < w; ++x) {
                         float4 c0 = srcPixels[yOffset + x];
@@ -1816,12 +1839,12 @@ bool Image::createMipsFromChunks(
         }
         else {
             if (isMultichunk) {
-                const Color* srcPixels = (const Color*)_pixels.data();
+                const Color* srcPixels = (const Color*)singleImage.pixels().data();
                 for (int32_t y = 0; y < h; ++y) {
                     int32_t y0 = y * w;
                     
                     // offset into original strip/atlas
-                    int32_t yOffset = (y + chunkOffset.y) * _width + chunkOffset.x;
+                    int32_t yOffset = (y + chunkOffset.y) * singleImage.width() + chunkOffset.x;
 
                     for (int32_t x = 0; x < w; ++x) {
                         Color c0 = srcPixels[yOffset + x];
@@ -1966,7 +1989,7 @@ bool Image::createMipsFromChunks(
 }
 
 // TODO: try to elim KTXImage passed into this
-bool Image::compressMipLevel(const ImageInfo& info, KTXImage& image,
+bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
                              ImageData& mipImage, TextureData& outputTexture,
                              int32_t mipStorageSize) const
 {
@@ -1982,6 +2005,7 @@ bool Image::compressMipLevel(const ImageInfo& info, KTXImage& image,
         switch (info.pixelFormat) {
             case MyMTLPixelFormatR8Unorm:
             case MyMTLPixelFormatRG8Unorm:
+            // no RGB8 writes
             case MyMTLPixelFormatRGBA8Unorm:
             case MyMTLPixelFormatRGBA8Unorm_sRGB: {
                 int32_t count = image.blockSize() / 1;
@@ -2010,6 +2034,7 @@ bool Image::compressMipLevel(const ImageInfo& info, KTXImage& image,
 
             case MyMTLPixelFormatR16Float:
             case MyMTLPixelFormatRG16Float:
+            // no RGB16Float writes
             case MyMTLPixelFormatRGBA16Float: {
                 int32_t count = image.blockSize() / 2;
 
@@ -2036,6 +2061,7 @@ bool Image::compressMipLevel(const ImageInfo& info, KTXImage& image,
             }
             case MyMTLPixelFormatR32Float:
             case MyMTLPixelFormatRG32Float:
+            // no RGB32Float writes
             case MyMTLPixelFormatRGBA32Float: {
                 int32_t count = image.blockSize() / 4;
 
diff --git a/libkram/kram/KramImage.h b/libkram/kram/KramImage.h
index 22b57578..7378bf95 100644
--- a/libkram/kram/KramImage.h
+++ b/libkram/kram/KramImage.h
@@ -43,16 +43,7 @@ class Image {
 
     bool loadImageFromKTX(const KTXImage& image);
 
-    // encode/ecode to a file
-    bool encode(ImageInfo& info, FILE* dstFile) const;
-
-    bool decode(const KTXImage& image, FILE* dstFile, TexEncoder decoder, bool isVerbose, const string& swizzleText) const;
     
-    // encode/decode to a memory block
-    bool encode(ImageInfo& info, KTXImage& dstImage) const;
-
-    bool decode(const KTXImage& image, KTXImage& dstImage, TexEncoder decoder, bool isVerbose, const string& swizzleText) const;
-
     // this is only for 2d images
     bool resizeImage(int32_t wResize, int32_t hResize, bool resizePow2, ImageResizeFilter filter = kImageResizeFilterPoint);
 
@@ -60,16 +51,72 @@ class Image {
     int32_t width() const { return _width; }
     int32_t height() const { return _height; }
 
-    const uint8_t* pixels() const { return _pixels.data(); }
-    const float4* pixelsFloat() const { return _pixelsFloat.data(); }
+    const vector<uint8_t>& pixels() const { return _pixels; }
+    const vector<float4>& pixelsFloat() const { return _pixelsFloat; }
 
     bool hasColor() const { return _hasColor; }
     bool hasAlpha() const { return _hasAlpha; }
 
 private:
-    bool encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const;
+    // pixel size of image
+    int32_t _width = 0;
+    int32_t _height = 0;
+
+    // this is whether png/ktx source image  format was L or LA or A or RGB
+    // if unknown then set to true, and the pixel walk will set to false
+    bool _hasColor = true;
+    bool _hasAlpha = true;
+
+    // this is the entire strip data, float version can be passed for HDR
+    // sources always 4 channels RGBA for 8 and 32f data.  16f promoted to 32f.
+    vector<uint8_t> _pixels;  // TODO: change to Color?
+    //vector<half4> _pixelsHalf; // TODO: add support to import fp16
+    vector<float4> _pixelsFloat;
+};
+
+class KramDecoderParams {
+public:
+    TexEncoder decoder = kTexEncoderUnknown; // will pick best available from format
+    bool isVerbose = false;
+    string swizzleText;
+};
+
+// The decoder can decode an entire KTX/KTX2 into RGBA8u/16F/32F data.
+// This is useful on platforms to display formats unsupported by the gpu, but the expanded pixels
+// can take up much more memory.
+class KramDecoder {
+public:
+    bool decode(const KTXImage& image, FILE* dstFile, const KramDecoderParams& params) const;
+    
+    bool decode(const KTXImage& image, KTXImage& dstImage, const KramDecoderParams& params) const;
+    
+    bool decodeBlocks(
+                int32_t w, int32_t h,
+                const uint8_t* blockData, uint32_t numBlocks, MyMTLPixelFormat blockFormat,
+                vector<uint8_t>& dstPixels, // currently Color
+                const KramDecoderParams& params) const;
     
-    bool decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage& dstImage, TexEncoder decoder, bool isVerbose, const string& swizzleText) const;
+private:
+    bool decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage& dstImage, const KramDecoderParams& params) const;
+};
+
+// The encoder takes a single-mip image, and in-place encodes mips and applies other
+// requested operations from ImageInfo as it writes those mips.   Note that KTX2 must
+// accumulate all mips if compressed so that offsets of where to write data are known.
+class KramEncoder {
+public:
+    // encode/ecode to a file
+    bool encode(ImageInfo& info, Image& singleImage, FILE* dstFile) const;
+
+    // encode/decode to a memory block
+    bool encode(ImageInfo& info, Image& singleImage, KTXImage& dstImage) const;
+
+    // TODO: supply encode() that takes a KTXImage src with mips already generated
+    // and then can encode them to a block format.  In-place mips from Image don't
+    // allow for custom mips, and also require conversion of KTXImage to Image.
+    
+private:
+    bool encodeImpl(ImageInfo& info, Image& singleImage, FILE* dstFile, KTXImage& dstImage) const;
 
     // compute how big mips will be
     void computeMipStorage(const KTXImage& image, int32_t& w, int32_t& h, int32_t& numSkippedMips,
@@ -86,32 +133,21 @@ class Image {
                                 const KTXImage& image, ImageData& srcImage,
                                 vector<Color>& tmpImage) const;
 
-    bool createMipsFromChunks(ImageInfo& info, MipConstructData& data,
+    bool createMipsFromChunks(ImageInfo& info,
+                              Image& singleImage,
+                              MipConstructData& data,
                               FILE* dstFile, KTXImage& dstImage) const;
 
     bool writeKTX1FileOrImage(
          ImageInfo& info,
+         Image& singleImage,
          MipConstructData& mipConstructData,
          const vector<uint8_t>& propsData,
          FILE* dstFile, KTXImage& dstImage) const;
 
     void addBaseProps(const ImageInfo& info, KTXImage& dstImage) const;
 
-private:
-    // pixel size of image
-    int32_t _width = 0;
-    int32_t _height = 0;
-
-    // this is whether png/ktx source image  format was L or LA or A or RGB
-    // if unknown then set to true, and the pixel walk will set to false
-    bool _hasColor = true;
-    bool _hasAlpha = true;
-
-    // this is the entire strip data, float version can be passed for HDR
-    // sources always 4 channels RGBA for 8 and 32f data.  16f promoted to 32f.
-    vector<uint8_t> _pixels;  // TODO: change to Color?
-    //vector<half4> _pixelsHalf; // TODO: add support to import fp16
-    vector<float4> _pixelsFloat;
 };
 
+
 }  // namespace kram
diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp
index 3fee97fa..391fd17a 100644
--- a/libkram/kram/KramImageInfo.cpp
+++ b/libkram/kram/KramImageInfo.cpp
@@ -1097,8 +1097,8 @@ void ImageInfo::initWithSourceImage(Image& sourceImage)
     // can only determine this after reading in the source texture
     int32_t w = sourceImage.width();
     int32_t h = sourceImage.height();
-    Color* srcPixels = (Color*)sourceImage.pixels();
-    float4* srcPixelsFloat = (float4*)sourceImage.pixelsFloat();
+    Color* srcPixels = (Color*)sourceImage.pixels().data();
+    float4* srcPixelsFloat = (float4*)sourceImage.pixelsFloat().data();
 
     isHDR = srcPixelsFloat != nullptr;
 
diff --git a/libkram/kram/KramMipper.cpp b/libkram/kram/KramMipper.cpp
index b20f9f77..79e85413 100644
--- a/libkram/kram/KramMipper.cpp
+++ b/libkram/kram/KramMipper.cpp
@@ -7,6 +7,8 @@
 #include <algorithm>
 #include <cassert>
 
+#include "KTXImage.h" // for mipDown
+
 namespace kram {
 
 using namespace std;
diff --git a/libkram/kram/KramSDFMipper.cpp b/libkram/kram/KramSDFMipper.cpp
index 7bb6f71d..aec353bd 100644
--- a/libkram/kram/KramSDFMipper.cpp
+++ b/libkram/kram/KramSDFMipper.cpp
@@ -7,6 +7,7 @@
 #include <algorithm>
 
 #include "KramMipper.h"
+#include "KTXImage.h" // for mipDown
 
 namespace kram {
 using namespace heman;
@@ -57,11 +58,8 @@ void SDFMipper::mipmap(ImageData& dstImage, int32_t mipLevel)
     int32_t h = srcBitmapImage.height;
     int32_t d = 1;
     
-    // can use shift with mip down, but this iterates
-    for (int32_t i = 0; i < mipLevel; ++i) {
-        mipDown(w, h, d);
-    }
-
+    mipDown(w, h, d, mipLevel);
+    
     dstImage.width = w;
     dstImage.height = h;
 

From 0e4f88e1075f78f6f4943ca05e75a0f1ec077238 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 27 May 2021 23:31:16 -0700
Subject: [PATCH 079/901] kram - small decode cleanup, thumbnailer error
 handling

improve decode to validate and set decoder if unknown
support unpackLevel, so compressed ktx2 can be sent to decode
don't need to unpack entire ktx2 if just decoding

add error handling to NSLog from thumbnailer.
add the ktx and ktx2 file types and a minimum dimension to appex plist
---
 kram-thumb/Info.plist               |  7 +++--
 kram-thumb/KramThumbnailProvider.mm | 38 +++++++++++++++++++-----
 kramv/KramLoader.mm                 | 11 ++++---
 libkram/kram/KTXImage.cpp           |  2 +-
 libkram/kram/KTXImage.h             |  3 +-
 libkram/kram/Kram.cpp               |  2 +-
 libkram/kram/KramImage.cpp          | 45 ++++++++++++++++++++++-------
 libkram/kram/KramLog.cpp            |  2 +-
 libkram/kram/KramLog.h              |  6 ++++
 9 files changed, 88 insertions(+), 28 deletions(-)

diff --git a/kram-thumb/Info.plist b/kram-thumb/Info.plist
index e6b0324d..34f68a99 100644
--- a/kram-thumb/Info.plist
+++ b/kram-thumb/Info.plist
@@ -27,9 +27,12 @@
 		<key>NSExtensionAttributes</key>
 		<dict>
 			<key>QLSupportedContentTypes</key>
-			<array/>
+			<array>
+				<string>org.khronos.ktx</string>
+				<string>public.ktx2</string>
+			</array>
 			<key>QLThumbnailMinimumDimension</key>
-			<integer>0</integer>
+			<integer>64</integer>
 		</dict>
 		<key>NSExtensionPointIdentifier</key>
 		<string>com.apple.quicklook.thumbnail</string>
diff --git a/kram-thumb/KramThumbnailProvider.mm b/kram-thumb/KramThumbnailProvider.mm
index 90c9a866..4efa2c91 100644
--- a/kram-thumb/KramThumbnailProvider.mm
+++ b/kram-thumb/KramThumbnailProvider.mm
@@ -22,34 +22,53 @@
 
 @implementation KramThumbnailProvider
 
+void KLOGF(const char* format, ...) {
+    string str;
+    
+    va_list args;
+    va_start(args, format);
+    /* int32_t len = */ append_vsprintf(str, format, args);
+    va_end(args);
+    
+    // log here, so it can see it in Console
+    NSLog(@"%@", [NSString stringWithUTF8String: str.c_str()]);
+}
+
 - (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest *)request completionHandler:(void (^)(QLThumbnailReply * _Nullable, NSError * _Nullable))handler {
 
     // This
     // Second way: Draw the thumbnail into a context passed to your block, set up with Core Graphics's coordinate system.
     handler([QLThumbnailReply replyWithContextSize:request.maximumSize drawingBlock:^BOOL(CGContextRef  _Nonnull context)
     {
-         const char* file = [request.fileURL fileSystemRepresentation];
+         const char* filename = [request.fileURL fileSystemRepresentation];
          
-         if (!(endsWith(file, ".ktx") || endsWith(file, ".ktx2"))) {
+         if (!(endsWith(filename, ".ktx") || endsWith(filename, ".ktx2"))) {
+             KLOGF("kramv %s only supports ktx/ktx2 files\n", filename);
              return NO;
          }
          
          // load the mmap file, and interpret it as a KTXImage
          MmapHelper mmapHelper;
-         if (!mmapHelper.open(file)) {
+         if (!mmapHelper.open(filename)) {
+             KLOGF("kramv %s failed to mmap\n", filename);
              return NO;
          }
+        
+         // TODO: might need to try FileHelper for non-local thumbnails
+        
          
          // open but leave the image compressed if KTX2 + zstd
          bool isInfoOnly = true;
          
          KTXImage image;
          if (!image.open(mmapHelper.data(), mmapHelper.dataLength(), isInfoOnly)) {
+             KLOGF("kramv %s failed to open\n", filename);
              return NO;
          }
          
         // no BC6 or ASTC HDR yet for thumbs, just do LDR first
         if (isHdrFormat(image.pixelFormat)) {
+            KLOGF("kramv %s doesn't support hdr thumbnails yet\n", filename);
             return NO;
         }
         
@@ -84,6 +103,7 @@ - (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest *)request complet
             mipData.resize(image.mipLevels[mipNumber].length * numChunks);
             uint8_t* dstData = mipData.data();
             if (!image.unpackLevel(mipNumber, srcData, dstData)) {
+                KLOGF("kramv %s failed to unpack mip\n", filename);
                 return NO;
             }
         }
@@ -108,6 +128,7 @@ - (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest *)request complet
             
             // want to just decode one chunk of the level that was unpacked abovve
             if (!decoder.decodeBlocks(w, h, mipData.data(), mipData.size(), image.pixelFormat, dstMipData, params)) {
+                KLOGF("kramv %s failed to decode blocks\n", filename);
                 return NO;
             }
             
@@ -125,9 +146,9 @@ - (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest *)request complet
 
         // Declare the pixel format for the vImage_Buffer
         vImage_CGImageFormat format = {
-         .bitsPerComponent = 8,
-         .bitsPerPixel = 32,
-         };
+            .bitsPerComponent   = 8,
+            .bitsPerPixel       = 32,
+        };
         
         format.bitmapInfo = kCGBitmapByteOrderDefault | (isPremul ? kCGImageAlphaPremultipliedLast: kCGImageAlphaLast);
         
@@ -139,7 +160,10 @@ - (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest *)request complet
         //CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB();
         vImage_Error err = 0;
         CGImageRef cgImage = vImageCreateCGImageFromBuffer( &buf, &format, NULL, NULL, kvImageNoAllocate, &err);
-
+        if (err) {
+            KLOGF("kramv %s failed create cgimage\n", filename);
+            return NO;
+        }
         CGRect rect = CGRectMake(0, 0, w, h);
 
         // The image is scaled—disproportionately, if necessary—to fit the bounds
diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index e2612552..062263a1 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -177,18 +177,21 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) {
     
     // see if it needs decode first
     bool needsDecode = false;
+    bool needsConvert = false;
     
+#if SUPPORT_RGB
     if (isInternalRGBFormat(image.pixelFormat)) {
-        needsDecode = true;
+        needsConvert = true;
     }
+#endif
 #if DO_DECODE
-    else if (isDecodeImageNeeded(image.pixelFormat)) {
+    if (isDecodeImageNeeded(image.pixelFormat)) {
         needsDecode = true;
     }
 #endif
     
     // open it again, but unpack the levels if supercompressed
-    if (needsDecode) {
+    if (needsConvert) {
         isInfoOnly = false;
         
         if (!image.open(imageData, imageDataLength, isInfoOnly)) {
@@ -197,7 +200,7 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) {
     }
     
 #if SUPPORT_RGB
-    if (isInternalRGBFormat(image.pixelFormat)) {
+    if (needsConvert) {
         // loads and converts image from RGB to RGBA
         Image rgbaImage;
         if (!rgbaImage.loadImageFromKTX(image))
diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index fa38aeec..1e39f397 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -1538,7 +1538,7 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength, bool i
     return true;
 }
 
-bool KTXImage::unpackLevel(uint32_t mipNumber, const uint8_t* srcData, uint8_t* dstData) {
+bool KTXImage::unpackLevel(uint32_t mipNumber, const uint8_t* srcData, uint8_t* dstData) const {
     
     // uncompressed level
     uint32_t numChunks = totalChunks();
diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h
index bc2bfae8..43af56e0 100644
--- a/libkram/kram/KTXImage.h
+++ b/libkram/kram/KTXImage.h
@@ -308,7 +308,7 @@ class KTXImage {
     bool isKTX2() const { return skipImageLength; }
     
     // can use on ktx1/2 files, does a decompress if needed
-    bool unpackLevel(uint32_t mipNumber, const uint8_t* srcData, uint8_t* dstData);
+    bool unpackLevel(uint32_t mipNumber, const uint8_t* srcData, uint8_t* dstData) const;
     
     // helpers to work with the mipLevels array, mipLength and levelLength are important to get right
     // mip data depends on format
@@ -328,7 +328,6 @@ class KTXImage {
     uint32_t totalChunks() const;
     size_t chunkOffset(uint32_t mipNumber, uint32_t chunkNumber) const { return mipLevels[mipNumber].offset + mipLevels[mipNumber].length * chunkNumber; }
     
-    
 private:
     bool openKTX2(const uint8_t* imageData, size_t imageDataLength, bool isInfoOnly);
 
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 84dc571a..898b0d50 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -1751,7 +1751,7 @@ static int32_t kramAppDecode(vector<const char*>& args)
 
     // TODO: for hdr decode, may need to walk blocks or ask caller to pass -hdr flag
     if (!validateFormatAndDecoder(srcImage.textureType, srcImage.pixelFormat, textureDecoder)) {
-        KLOGE("Kram", "format decode only supports ktx output");
+        KLOGE("Kram", "format decode only supports ktx and ktx2 output");
         return -1;
     }
 
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index f218fe8c..a814a6ce 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -436,6 +436,12 @@ bool KramDecoder::decodeBlocks(
     // could tie use flags to format filter, or encoder settings
     // or may want to disable if decoders don't gen correct output
     TexEncoder decoder = params.decoder;
+    
+    if (!validateFormatAndDecoder(MyMTLTextureType2D, blockFormat, decoder)) {
+        KLOGE("Kram", "block decode only supports specific block types");
+        return false;
+    }
+    
 #if COMPILE_ATE
     // Encode/decode formats differ depending on library version
     // but it's likely the fastest decoder.  Only on macOS/iOS.
@@ -774,8 +780,7 @@ bool KramDecoder::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage&
         dstImage.reserveImageData();
     }
     
-    bool success = false;
-
+    
     // 1d textures need to write out 0 width
     KTXHeader headerCopy = dstHeader;
     
@@ -802,21 +807,41 @@ bool KramDecoder::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage&
     // DONE: walk chunks here and seek to src and dst offsets in conversion
     // make sure to walk chunks in the exact same order they are written, array then face, or slice
     
+    bool success = true;
+
+    vector<uint8_t> mipStorage;
+    mipStorage.resize(srcImage.mipLengthLargest() * numChunks); // enough to hold biggest mip
+    
     for (uint32_t i = 0; i < srcImage.header.numberOfMipmapLevels; ++i) {
-        // TODO: to decode compressed KTX2 want to walk all chunks of a single level
+        // DONE: to decode compressed KTX2 want to walk all chunks of a single level
         // after decompressing the level.   This isn't doing unpackLevel and needs to here.
-        assert(!srcImage.isSupercompressed());
+        
+        const KTXImageLevel& srcMipLevel = srcImage.mipLevels[i];
+        
+        // this is offset to a given level
+        uint64_t mipBaseOffset = srcMipLevel.offset;
+        const uint8_t* srcLevelData = srcImage.fileData;
+        
+        if (srcImage.isSupercompressed()) {
+            
+            if (!srcImage.unpackLevel(i, srcLevelData + srcMipLevel.offset, mipStorage.data())) {
+                return false;
+            }
+            srcLevelData = mipStorage.data();
+            
+            // going to upload from mipStorage temp array
+            mipBaseOffset = 0;
+        }
         
         uint32_t w, h, d;
         srcImage.mipDimensions(i, w, h, d);
         
-        for (int32_t chunk = 0; chunk < numChunks; ++chunk) {
-      
-            const KTXImageLevel& dstMipLevel = dstImage.mipLevels[i];
-            outputTexture.resize(dstMipLevel.length);
+        const KTXImageLevel& dstMipLevel = dstImage.mipLevels[i];
+        outputTexture.resize(dstMipLevel.length);
 
-            const KTXImageLevel& srcMipLevel = srcImage.mipLevels[i];
-            const uint8_t* srcData = srcImage.fileData + srcMipLevel.offset + chunk * srcMipLevel.length;
+        
+        for (int32_t chunk = 0; chunk < numChunks; ++chunk) {
+            const uint8_t* srcData = srcLevelData + mipBaseOffset + chunk * srcMipLevel.length;
             
             // decode the blocks to LDR RGBA8
             if (!decodeBlocks(w, h, srcData, srcMipLevel.length, srcImage.pixelFormat, outputTexture, params)) {
diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp
index 4045ad15..dd58e523 100644
--- a/libkram/kram/KramLog.cpp
+++ b/libkram/kram/KramLog.cpp
@@ -48,7 +48,7 @@ void getErrorLogCaptureText(string& text) {
 // being parsed (f.e. mmapped Json) this can significantly slow a parser down.
 
 
-static int32_t append_vsprintf(string& str, const char* format, va_list args) 
+int32_t append_vsprintf(string& str, const char* format, va_list args) 
 {
     // for KLOGE("group", "%s", "text")
     if (strcmp(format, "%s") == 0) {
diff --git a/libkram/kram/KramLog.h b/libkram/kram/KramLog.h
index 38f48e2c..0bb5fa6c 100644
--- a/libkram/kram/KramLog.h
+++ b/libkram/kram/KramLog.h
@@ -50,7 +50,9 @@ using namespace std;
 
 // when set true, the internal string is cleared
 void setErrorLogCapture(bool enable);
+
 bool isErrorLogCapture();
+
 // return the text
 void getErrorLogCaptureText(string& text);
 
@@ -60,7 +62,11 @@ int32_t sprintf(string& str, const char* format, ...) __printflike(2, 3);
 // returns length of chars appended, -1 if failure
 int32_t append_sprintf(string& str, const char* format, ...) __printflike(2, 3);
 
+// returns length of chars appended, -1 if failure
+int32_t append_vsprintf(string& str, const char* format, va_list args);
+
 bool startsWith(const char* str, const string& substring);
+
 bool endsWithExtension(const char* str, const string& substring);
 
 // https://stackoverflow.com/questions/874134/find-out-if-string-ends-with-another-string-in-c

From c9bbb57ae382ce43912e1b889b8dc1e83e914641 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 28 May 2021 09:32:08 -0700
Subject: [PATCH 080/901] kram-thumb - support srgb

---
 kram-thumb/KramThumbnailProvider.mm |  7 ++++---
 libkram/kram/KTXImage.h             | 10 +++++-----
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/kram-thumb/KramThumbnailProvider.mm b/kram-thumb/KramThumbnailProvider.mm
index 4efa2c91..2f31350d 100644
--- a/kram-thumb/KramThumbnailProvider.mm
+++ b/kram-thumb/KramThumbnailProvider.mm
@@ -75,6 +75,7 @@ - (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest *)request complet
         // TODO: hookup to whether content is already premul with alpha
         // will have to come from props. ASTC always 4 channels but may hold other daa.
         bool isPremul = numChannelsOfFormat(image.pixelFormat) >= 4;
+        bool isSrgb = isSrgbFormat(image.pixelFormat);
         
         // unpack a level to get the blocks
         uint32_t mipNumber = 0;
@@ -150,16 +151,16 @@ - (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest *)request complet
             .bitsPerPixel       = 32,
         };
         
-        format.bitmapInfo = kCGBitmapByteOrderDefault | (isPremul ? kCGImageAlphaPremultipliedLast: kCGImageAlphaLast);
+        format.bitmapInfo = kCGBitmapByteOrderDefault | (isPremul ? kCGImageAlphaPremultipliedLast : kCGImageAlphaLast);
+        format.colorSpace = isSrgb ? CGColorSpaceCreateWithName(kCGColorSpaceSRGB) : CGColorSpaceCreateDeviceRGB();
         
         // don't need to allocate, can requse memory from mip
 
         // TODO: might want to convert to PNG, but maybe thumbnail system does that automatically?
         // see how big thumbs.db is after running this
         
-        //CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB();
         vImage_Error err = 0;
-        CGImageRef cgImage = vImageCreateCGImageFromBuffer( &buf, &format, NULL, NULL, kvImageNoAllocate, &err);
+        CGImageRef cgImage = vImageCreateCGImageFromBuffer(&buf, &format, NULL, NULL, kvImageNoAllocate, &err);
         if (err) {
             KLOGF("kramv %s failed create cgimage\n", filename);
             return NO;
diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h
index 43af56e0..d2880971 100644
--- a/libkram/kram/KTXImage.h
+++ b/libkram/kram/KTXImage.h
@@ -340,9 +340,9 @@ class KTXImage {
 
     // copied out of header, but also may be 1 instead of 0
     // also these can be modified, and often are non-zero even if header is
-    uint32_t width;
-    uint32_t height;
-    uint32_t depth;
+    uint32_t width = 0;
+    uint32_t height = 0;
+    uint32_t depth = 0;
 
     // for ktx2
     bool skipImageLength = false;
@@ -356,8 +356,8 @@ class KTXImage {
     vector<KTXImageLevel> mipLevels;  // offsets into fileData
 
     // this only holds data for mipLevels
-    size_t fileDataLength;
-    const uint8_t* fileData;  // mmap data
+    size_t fileDataLength = 0;
+    const uint8_t* fileData = nullptr;  // mmap data
 };
 
 // GL/D3D hobbled non-pow2 mips by only supporting round down, not round up

From 63b71310855050b9109c2a591887db3e93139513 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 29 May 2021 14:24:38 -0700
Subject: [PATCH 081/901] Kram - hook up shapes with basis

Could gen tan from normal, uv.  But its cheaper to handle basis in vertex shader.
---
 kramv/KramLoader.mm     |  24 +-----
 kramv/KramRenderer.mm   | 159 ++++++++++++++++++++++++++++------------
 kramv/KramShaders.h     |  12 ++-
 kramv/KramShaders.metal |  56 +++++++++++---
 kramv/KramViewerBase.h  |   3 +
 kramv/KramViewerMain.mm |  24 +++++-
 6 files changed, 198 insertions(+), 80 deletions(-)

diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index 062263a1..d60e3bb9 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -175,32 +175,15 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) {
         return nil;
     }
     
-    // see if it needs decode first
-    bool needsDecode = false;
-    bool needsConvert = false;
-    
 #if SUPPORT_RGB
     if (isInternalRGBFormat(image.pixelFormat)) {
-        needsConvert = true;
-    }
-#endif
-#if DO_DECODE
-    if (isDecodeImageNeeded(image.pixelFormat)) {
-        needsDecode = true;
-    }
-#endif
-    
-    // open it again, but unpack the levels if supercompressed
-    if (needsConvert) {
         isInfoOnly = false;
         
+        // reopen and unzip it all
         if (!image.open(imageData, imageDataLength, isInfoOnly)) {
             return nil;
         }
-    }
-    
-#if SUPPORT_RGB
-    if (needsConvert) {
+        
         // loads and converts image from RGB to RGBA
         Image rgbaImage;
         if (!rgbaImage.loadImageFromKTX(image))
@@ -234,8 +217,9 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) {
     if (originalFormat != nullptr) {
         *originalFormat = (MTLPixelFormat)image.pixelFormat;
     }
+    
 #if DO_DECODE
-    if (needsDecode) {
+    if (isDecodeImageNeeded(image.pixelFormat)) {
         KTXImage imageDecoded;
         if (!decodeImage(image, imageDecoded)) {
             return nil;
diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index eb42a481..8c97e852 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -77,6 +77,13 @@ @implementation Renderer
     KramLoader *_loader;
     MTKMesh *_mesh;
     
+    MDLVertexDescriptor *_mdlVertexDescriptor;
+    
+    MTKMesh *_meshPlane; // really a thin gox
+    MTKMesh *_meshBox;
+    MTKMesh *_meshSphere;
+    MTKMesh *_meshCylinder;
+    MTKMeshBufferAllocator *_metalAllocator;
     
     ShowSettings* _showSettings;
 }
@@ -93,6 +100,8 @@ -(nonnull instancetype)initWithMetalKitView:(nonnull MTKView *)view settings:(no
         _loader = [KramLoader new];
         _loader.device = _device;
         
+        _metalAllocator = [[MTKMeshBufferAllocator alloc] initWithDevice: _device];
+
         _inFlightSemaphore = dispatch_semaphore_create(MaxBuffersInFlight);
         [self _loadMetalWithView:view];
         [self _loadAssets];
@@ -139,6 +148,46 @@ - (void)_createSamplers
     _colorMapSamplerBilinearWrap = [_device newSamplerStateWithDescriptor:samplerDescriptor];
 }
     
+- (void)_createVertexDescriptor
+{
+    _mtlVertexDescriptor = [[MTLVertexDescriptor alloc] init];
+
+    _mtlVertexDescriptor.attributes[VertexAttributePosition].format = MTLVertexFormatFloat3;
+    _mtlVertexDescriptor.attributes[VertexAttributePosition].offset = 0;
+    _mtlVertexDescriptor.attributes[VertexAttributePosition].bufferIndex = BufferIndexMeshPosition;
+
+    _mtlVertexDescriptor.attributes[VertexAttributeTexcoord].format = MTLVertexFormatFloat2; // TODO: compress
+    _mtlVertexDescriptor.attributes[VertexAttributeTexcoord].offset = 0;
+    _mtlVertexDescriptor.attributes[VertexAttributeTexcoord].bufferIndex = BufferIndexMeshUV0;
+
+    _mtlVertexDescriptor.attributes[VertexAttributeNormal].format = MTLVertexFormatFloat3; // TODO: compress
+    _mtlVertexDescriptor.attributes[VertexAttributeNormal].offset = 0;
+    _mtlVertexDescriptor.attributes[VertexAttributeNormal].bufferIndex = BufferIndexMeshNormal;
+
+    _mtlVertexDescriptor.attributes[VertexAttributeTangent].format = MTLVertexFormatFloat4; // TODO: compress
+    _mtlVertexDescriptor.attributes[VertexAttributeTangent].offset = 0;
+    _mtlVertexDescriptor.attributes[VertexAttributeTangent].bufferIndex = BufferIndexMeshTangent;
+   
+    //_mtlVertexDescriptor.layouts[BufferIndexMeshPosition].stepRate = 1;
+    //_mtlVertexDescriptor.layouts[BufferIndexMeshPosition].stepFunction = MTLVertexStepFunctionPerVertex;
+
+    _mtlVertexDescriptor.layouts[BufferIndexMeshPosition].stride = 3*4;
+    _mtlVertexDescriptor.layouts[BufferIndexMeshUV0].stride = 2*4;
+    _mtlVertexDescriptor.layouts[BufferIndexMeshNormal].stride = 3*4;
+    _mtlVertexDescriptor.layouts[BufferIndexMeshTangent].stride = 4*4;
+    
+    //-----------------------
+    // for ModelIO
+    _mdlVertexDescriptor =
+        MTKModelIOVertexDescriptorFromMetal(_mtlVertexDescriptor);
+
+    _mdlVertexDescriptor.attributes[VertexAttributePosition].name  = MDLVertexAttributePosition;
+    _mdlVertexDescriptor.attributes[VertexAttributeTexcoord].name  = MDLVertexAttributeTextureCoordinate;
+    _mdlVertexDescriptor.attributes[VertexAttributeNormal].name    = MDLVertexAttributeNormal;
+    _mdlVertexDescriptor.attributes[VertexAttributeTangent].name   = MDLVertexAttributeTangent;
+
+}
+
 - (void)_loadMetalWithView:(nonnull MTKView *)view
 {
     /// Load Metal state objects and initialize renderer dependent view properties
@@ -151,24 +200,8 @@ - (void)_loadMetalWithView:(nonnull MTKView *)view
     
     view.sampleCount = 1;
 
-    _mtlVertexDescriptor = [[MTLVertexDescriptor alloc] init];
-
-    _mtlVertexDescriptor.attributes[VertexAttributePosition].format = MTLVertexFormatFloat3;
-    _mtlVertexDescriptor.attributes[VertexAttributePosition].offset = 0;
-    _mtlVertexDescriptor.attributes[VertexAttributePosition].bufferIndex = BufferIndexMeshPositions;
-
-    _mtlVertexDescriptor.attributes[VertexAttributeTexcoord].format = MTLVertexFormatFloat2;
-    _mtlVertexDescriptor.attributes[VertexAttributeTexcoord].offset = 0;
-    _mtlVertexDescriptor.attributes[VertexAttributeTexcoord].bufferIndex = BufferIndexMeshUV0;
-
-    _mtlVertexDescriptor.layouts[BufferIndexMeshPositions].stride = 12;
-    //_mtlVertexDescriptor.layouts[BufferIndexMeshPositions].stepRate = 1;
-    //_mtlVertexDescriptor.layouts[BufferIndexMeshPositions].stepFunction = MTLVertexStepFunctionPerVertex;
-
-    _mtlVertexDescriptor.layouts[BufferIndexMeshUV0].stride = 8;
-    //_mtlVertexDescriptor.layouts[BufferIndexMeshUV0].stepRate = 1;
-    //_mtlVertexDescriptor.layouts[BufferIndexMeshUV0].stepFunction = MTLVertexStepFunctionPerVertex;
-
+    [self _createVertexDescriptor];
+    
     [self _createRenderPipelines:view];
     
     //-----------------------
@@ -362,47 +395,78 @@ - (void)_createSampleRender
     _sampleTex = [_device newTextureWithDescriptor:textureDesc];
 }
 
-- (void)_loadAssets
+- (MTKMesh*)_createMeshAsset:(const char*)name mdlMesh:(MDLMesh*)mdlMesh
 {
-    /// Load assets into metal objects
+    NSError* error = nil;
 
-    NSError *error = nil;
+    //mdlMesh.vertexDescriptor = _mdlVertexDescriptor;
+    
+    [mdlMesh addOrthTanBasisForTextureCoordinateAttributeNamed: MDLVertexAttributeTextureCoordinate
+                                          normalAttributeNamed: MDLVertexAttributeNormal
+                                         tangentAttributeNamed: MDLVertexAttributeTangent];
+    
+    mdlMesh.vertexDescriptor = _mdlVertexDescriptor;
+    
+    // TODO: name the vertex attributes, can that be done in _mdlVertexDescriptor
+    // may have to set name on MTLBuffer range on IB and VB
+    
+    // now set it into mtk mesh
+    MTKMesh* mesh = [[MTKMesh alloc] initWithMesh:mdlMesh
+                                   device:_device
+                                    error:&error];
+    mesh.name = [NSString stringWithUTF8String:name];
 
-    MTKMeshBufferAllocator *metalAllocator = [[MTKMeshBufferAllocator alloc]
-                                              initWithDevice: _device];
+    if(!mesh || error)
+    {
+        NSLog(@"Error creating MetalKit mesh %@", error.localizedDescription);
+        return nil;
+    }
 
-#if 1 // TODO: replace box with fsq or fst, or use thin box for perspective/rotation
-    MDLMesh *mdlMesh = [MDLMesh newBoxWithDimensions:(vector_float3){1, 1, 1}
+    return mesh;
+}
+
+- (void)_loadAssets
+{
+    /// Load assets into metal objects
+    
+    MDLMesh *mdlMesh;
+    
+    mdlMesh = [MDLMesh newBoxWithDimensions:(vector_float3){1, 1, 1}
                                             segments:(vector_uint3){1, 1, 1}
                                         geometryType:MDLGeometryTypeTriangles
                                        inwardNormals:NO
-                                           allocator:metalAllocator];
+                                           allocator:_metalAllocator];
     
-#endif
+    _meshBox = [self _createMeshAsset:"MeshBox" mdlMesh:mdlMesh];
     
-    MDLVertexDescriptor *mdlVertexDescriptor =
-    MTKModelIOVertexDescriptorFromMetal(_mtlVertexDescriptor);
-
-    mdlVertexDescriptor.attributes[VertexAttributePosition].name  = MDLVertexAttributePosition;
-    mdlVertexDescriptor.attributes[VertexAttributeTexcoord].name  = MDLVertexAttributeTextureCoordinate;
-
-    mdlMesh.vertexDescriptor = mdlVertexDescriptor;
-
-    _mesh = [[MTKMesh alloc] initWithMesh:mdlMesh
-                                   device:_device
-                                    error:&error];
-    _mesh.name = @"BoxMesh";
+    // TOOO: have more shape types - this is box, need thin box (plane), and sphere, and cylinder
+    // eventually load usdz and gltf2 custom model.  Need 3d manipulation of shape like arcball
+    // and eyedropper is more complex.
+    
+    mdlMesh = [MDLMesh newEllipsoidWithRadii:(vector_float3){0.5, 0.5, 0.5} radialSegments:16 verticalSegments:16 geometryType:MDLGeometryTypeTriangles inwardNormals:NO hemisphere:NO allocator:_metalAllocator];
+               
+    _meshSphere = [self _createMeshAsset:"MeshSphere" mdlMesh:mdlMesh];
+    
+    mdlMesh = [MDLMesh newCylinderWithHeight:1.0
+                                       radii:(vector_float2){0.5, 0.5}
+                                            radialSegments:16
+                                        verticalSegments:1
+                                        geometryType:MDLGeometryTypeTriangles
+                                       inwardNormals:NO
+                                           allocator:_metalAllocator];
+    
+    _meshCylinder = [self _createMeshAsset:"MeshCylinder" mdlMesh:mdlMesh];
+    
+    _mesh = _meshBox;
     
-    if(!_mesh || error)
-    {
-        NSLog(@"Error creating MetalKit mesh %@", error.localizedDescription);
-    }
 }
 
 - (BOOL)loadTextureFromData:(const string&)fullFilename timestamp:(double)timestamp imageData:(nonnull const uint8_t*)imageData imageDataLength:(uint64_t)imageDataLength
 {
     // image can be decoded to rgba8u if platform can't display format natively
     // but still want to identify blockSize from original format
+    
+    // Note that modstamp can change, but content data hash may be the same
     bool isTextureChanged =
         (fullFilename != _showSettings->lastFilename) ||
         (timestamp != _showSettings->lastTimestamp);
@@ -420,7 +484,7 @@ - (BOOL)loadTextureFromData:(const string&)fullFilename timestamp:(double)timest
         // then can decode blocks in kramv
         
         KTXImage sourceImage;
-        if (!sourceImage.open(imageData,imageDataLength)) {
+        if (!sourceImage.open(imageData, imageDataLength)) {
             return NO;
         }
        
@@ -685,7 +749,12 @@ - (void)_updateGameState
     // this was stored so view could use it, but now that code calcs the transform via computeImageTransform
     _showSettings->projectionViewModelMatrix = projectionViewMatrix * _modelMatrix;
     
-   
+    // crude shape experiment
+    switch(_showSettings->meshNumber) {
+        case 0: _mesh = _meshBox; break;
+        case 1: _mesh = _meshSphere; break;
+        case 2: _mesh = _meshCylinder; break;
+    }
     
     //_rotation += .01;
 }
diff --git a/kramv/KramShaders.h b/kramv/KramShaders.h
index 3a192e0b..0b7e6b36 100644
--- a/kramv/KramShaders.h
+++ b/kramv/KramShaders.h
@@ -26,20 +26,24 @@
 typedef NS_ENUM(int32_t, BufferIndex)
 {
     // mesh
-    BufferIndexMeshPositions = 0, // pos
+    BufferIndexMeshPosition  = 0, // pos
     BufferIndexMeshUV0       = 1, // uv
+    BufferIndexMeshNormal    = 2, // normals
+    BufferIndexMeshTangent   = 3, // normals
     
-    BufferIndexUniforms      = 2,
-    BufferIndexUniformsLevel = 3,
+    BufferIndexUniforms      = 16,
+    BufferIndexUniformsLevel = 17,
     
     // for compute
-    BufferIndexUniformsCS = 0,
+    BufferIndexUniformsCS = 16,
 };
 
 typedef NS_ENUM(int32_t, VertexAttribute)
 {
     VertexAttributePosition  = 0,
     VertexAttributeTexcoord  = 1,
+    VertexAttributeNormal    = 2,
+    VertexAttributeTangent   = 3,
 };
 
 typedef NS_ENUM(int32_t, TextureIndex)
diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index bd430fed..c94e9537 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -185,27 +185,35 @@ half3 toNormal(half3 n)
 
 // use mikktspace, gen bitan in frag shader with sign, don't normalize vb/vt
 // see http://www.mikktspace.com/
-half3 transformNormal(half4 tangent, half3 vertexNormal,
-                      texture2d<half> texture, sampler s, float2 uv, bool isSigned = true)
+half3 transformNormal(half4 tangent, half3 vertexNormal, half3 bumpNormal)
 {
     // Normalize tangent/vertexNormal in vertex shader
     // but don't renormalize interpolated tangent, vertexNormal in fragment shader
     // Reconstruct bitan in frag shader
     // https://bgolus.medium.com/generating-perfect-normal-maps-for-unity-f929e673fc57
 
-    half4 nmap = texture.sample(s, uv);
-    if (!isSigned) {
-        nmap.xy = toSnorm8(nmap.xy);
-    }
-    half3 normal = toNormal(nmap.xyz);
     
     // now transform by basis and normalize from any shearing, and since interpolated basis vectors
     // are not normalized
     half3x3 tbn = half3x3(tangent.xyz, tangent.w * cross(vertexNormal, tangent.xyz), vertexNormal);
-    normal = tbn * normal;
-    return normalize(normal);
+    bumpNormal = tbn * bumpNormal;
+    return normalize(bumpNormal);
 }
 
+half3 transformNormal(half4 tangent, half3 vertexNormal,
+                      texture2d<half> texture, sampler s, float2 uv, bool isSigned = true)
+{
+    half4 nmap = texture.sample(s, uv);
+    if (!isSigned) {
+        nmap.xy = toSnorm8(nmap.xy);
+    }
+    half3 bumpNormal = toNormal(nmap.xyz);
+   
+    return transformNormal(tangent, vertexNormal, bumpNormal);
+}
+
+
+
 // TODO: have more bones, or read from texture instead of uniforms
 // can then do instanced skining, but vfetch lookup slower
 #define maxBones 128
@@ -259,7 +267,7 @@ float3x3 toFloat3x3(float4x4 m)
     return float3x3(m[0].xyz, m[1].xyz, m[2].xyz);
 }
 
-// this is for vertex shader
+// this is for vertex shader if tangent supplied
 void transformBasis(thread float3& tangent, thread float3& normal,
                     float4x4 modelToWorldTfm, bool isScaled = false)
 {
@@ -309,6 +317,10 @@ struct Vertex
 {
     float4 position [[attribute(VertexAttributePosition)]];
     float2 texCoord [[attribute(VertexAttributeTexcoord)]];
+    
+    // basis
+    float3 normal [[attribute(VertexAttributeNormal)]];; // consider hallf
+    float4 tangent [[attribute(VertexAttributeTangent)]];; // tan + bitanSign
 };
 
 struct ColorInOut
@@ -317,6 +329,10 @@ struct ColorInOut
     float3 texCoordXYZ;
     float2 texCoord;
     float3 worldPos;
+    
+    // basis
+    half3 normal;
+    half4 tangent;
 };
 
 ColorInOut DrawImageFunc(
@@ -332,6 +348,21 @@ ColorInOut DrawImageFunc(
     
     float4 worldPos = uniforms.modelMatrix * position;
     
+    // deal with full basis
+    
+    if (uniforms.isNormal && uniforms.isPreview) {
+        float3 tangent = in.tangent.xyz;
+        float3 normal = in.normal;
+        transformBasis(tangent, normal, uniforms.modelMatrix, false);
+        
+        out.normal = toHalf(normal);
+        out.tangent.xyz = toHalf(tangent);
+        out.tangent.w = toHalf(in.tangent.w);
+    }
+    else {
+        out.normal = toHalf(in.normal);
+        out.tangent = toHalf(in.tangent);
+    }
     // try adding pixel offset to pixel values
     worldPos.xy += uniformsLevel.drawOffset;
     
@@ -480,6 +511,8 @@ float4 DrawPixels(
         else if (uniforms.isNormal) {
             // light the normal map
             
+            
+            
             // add swizzle for ASTC/BC5nm, other 2 channels format can only store 01 in ba
             if (uniforms.isSwizzleAGToRG) {
                 c = float4(c.ag, 0, 1);
@@ -498,6 +531,9 @@ float4 DrawPixels(
             
             float3 n = c.xyz;
             
+            // handle the basis here
+            n = toFloat(transformNormal(in.tangent, in.normal, toHalf(n)));
+            
             // diffuse
             float dotNL = saturate(dot(n, lightDir));
             float3 diffuse = lightColor.xyz * dotNL;
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index 9b3f2e2c..d9015f69 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -167,6 +167,9 @@ class ShowSettings {
     
     string lastFilename;
     double lastTimestamp = 0.0;
+    
+    int32_t meshNumber = 0;
+    int32_t meshCount = 3;
 };
 
 float4x4 matrix4x4_translation(float tx, float ty, float tz);
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index a0c370bf..afc42d54 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -275,7 +275,11 @@ - (IBAction)showAboutDialog:(id)sender {
     Num2                 = 0x13,
     Num3                 = 0x14,
     Num4                 = 0x15,
-    // ...
+    Num5                 = 0x17,
+    Num6                 = 0x16,
+    Num7                 = 0x1A,
+    Num8                 = 0x1C,
+    Num9                 = 0x19,
     Num0                 = 0x1D,
     
     LeftBrace            = 0x21,
@@ -1862,6 +1866,24 @@ - (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
             }
             break;
             
+        // test out different shapes, not offiical support yet
+        case Key::Num8:
+            if (_showSettings->meshCount > 1) {
+                if (isShiftKeyDown) {
+                    _showSettings->meshNumber = _showSettings->meshNumber + _showSettings->meshCount - 1;
+                }
+                else {
+                    _showSettings->meshNumber++;
+                }
+                _showSettings->meshNumber = _showSettings->meshNumber % _showSettings->meshCount;
+                
+                sprintf(text, "Mesh %d/%d", _showSettings->meshNumber, _showSettings->meshCount);
+                isChanged = true;
+            }
+            break;
+            
+        // TODO: should probably have these wrap and not clamp to count limits
+            
         // mip up/down
         case Key::M:
             if (_showSettings->maxLOD > 1) {

From 0e6d2b84d4e9f1424192f7215164d8c5e9688b4a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 29 May 2021 17:12:16 -0700
Subject: [PATCH 082/901] kram - flip dx, dy again in heightToNormals, add
 basis transform

Needed to flip tangent.w  ModelIO doesn't caclulate this correctly.
Add facing support so if inside model, the faces look okay.
---
 kramv/KramShaders.metal        | 67 +++++++++++++++++++++-------------
 libkram/kram/KramImageInfo.cpp |  8 ++--
 2 files changed, 45 insertions(+), 30 deletions(-)

diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index c94e9537..6f51ca1d 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -185,17 +185,21 @@ half3 toNormal(half3 n)
 
 // use mikktspace, gen bitan in frag shader with sign, don't normalize vb/vt
 // see http://www.mikktspace.com/
-half3 transformNormal(half4 tangent, half3 vertexNormal, half3 bumpNormal)
+half3 transformNormal(half3 bumpNormal, half4 tangent, half3 vertexNormal)
 {
     // Normalize tangent/vertexNormal in vertex shader
     // but don't renormalize interpolated tangent, vertexNormal in fragment shader
     // Reconstruct bitan in frag shader
     // https://bgolus.medium.com/generating-perfect-normal-maps-for-unity-f929e673fc57
 
+    // ModelIO not generating correct bitan sign
+    // TODO: flip this on srcData, and not here
+    half bitangentSign = -tangent.w;
     
     // now transform by basis and normalize from any shearing, and since interpolated basis vectors
     // are not normalized
-    half3x3 tbn = half3x3(tangent.xyz, tangent.w * cross(vertexNormal, tangent.xyz), vertexNormal);
+    half3 bitangent =  bitangentSign * cross(vertexNormal, tangent.xyz);
+    half3x3 tbn = half3x3(tangent.xyz, bitangent, vertexNormal);
     bumpNormal = tbn * bumpNormal;
     return normalize(bumpNormal);
 }
@@ -209,7 +213,8 @@ half3 transformNormal(half4 tangent, half3 vertexNormal,
     }
     half3 bumpNormal = toNormal(nmap.xyz);
    
-    return transformNormal(tangent, vertexNormal, bumpNormal);
+    return transformNormal(bumpNormal,
+                           tangent, vertexNormal);
 }
 
 
@@ -258,8 +263,8 @@ void skinPosAndBasis(thread float4& position, thread float3& tangent, thread flo
     // not dealing with non-uniform scale correction
     // see scale2 handling in transformBasis, a little different with transpose of 3x4
     
-    tangent = (float4(tangent, 0.0) * bindPoseToBoneTransform);
     normal  = (float4(normal, 0.0)  * bindPoseToBoneTransform);
+    tangent = (float4(tangent, 0.0) * bindPoseToBoneTransform);
 }
 
 float3x3 toFloat3x3(float4x4 m)
@@ -268,23 +273,21 @@ float3x3 toFloat3x3(float4x4 m)
 }
 
 // this is for vertex shader if tangent supplied
-void transformBasis(thread float3& tangent, thread float3& normal,
+void transformBasis(thread float3& normal, thread float3& tangent,
                     float4x4 modelToWorldTfm, bool isScaled = false)
 {
     
     float3x3 m = toFloat3x3(modelToWorldTfm);
     
+    // note this is RinvT * n = (Rt)t = R, this is for simple inverse, inv scale handled below
+    // but uniform scale already handled by normalize
+    normal = m * normal;
+       
     // question here of whether tangent is transformed by m or mInvT
     // most apps assume m, but after averaging it can be just as off the surface as the normal
-    bool useInverseOnTangent = true;
-    if (useInverseOnTangent)
-        tangent = tangent * m;
-    else
-        tangent = m * tangent;
+    tangent = m * tangent;
+    
     
-    // note this is n * R = Rt * n, for simple affine transforms Rinv = Rt, invScale then handled below
-    normal = normal * m;
-       
     // have to apply invSquare of scale here to approximate invT
     // also make sure to identify inversion off determinant before instancing so that backfacing is correct
     // this is only needed if non-uniform scale present in modelToWorldTfm, could precompute scale2
@@ -300,13 +303,13 @@ void transformBasis(thread float3& tangent, thread float3& normal,
         scale2 = recip(max(0.0001 * 0.0001, scale2));
         
         // apply inverse
-        tangent *= scale2;
         normal  *= scale2;
+        tangent *= scale2;
     }
     
     // vertex shader normalize, but the fragment shader should not
-    tangent = normalize(tangent);
     normal  = normalize(normal);
+    tangent = normalize(tangent);
     
     // make sure to preserve bitan sign in tangent.w
 }
@@ -351,9 +354,9 @@ ColorInOut DrawImageFunc(
     // deal with full basis
     
     if (uniforms.isNormal && uniforms.isPreview) {
-        float3 tangent = in.tangent.xyz;
         float3 normal = in.normal;
-        transformBasis(tangent, normal, uniforms.modelMatrix, false);
+        float3 tangent = in.tangent.xyz;
+        transformBasis(normal, tangent, uniforms.modelMatrix, false);
         
         out.normal = toHalf(normal);
         out.tangent.xyz = toHalf(tangent);
@@ -470,6 +473,7 @@ vertex ColorInOut DrawVolumeVS(
 
 float4 DrawPixels(
     ColorInOut in [[stage_in]],
+    bool facing [[front_facing]],
     constant Uniforms& uniforms,
     float4 c,
     float2 textureSize
@@ -511,8 +515,6 @@ float4 DrawPixels(
         else if (uniforms.isNormal) {
             // light the normal map
             
-            
-            
             // add swizzle for ASTC/BC5nm, other 2 channels format can only store 01 in ba
             if (uniforms.isSwizzleAGToRG) {
                 c = float4(c.ag, 0, 1);
@@ -526,13 +528,20 @@ float4 DrawPixels(
             
             c.rgb = toNormal(c.rgb);
             
+            // flip the normal if facing is flipped
+            // TODO: needed for tangent too?
+            if (!facing) {
+                c.xyz = -c.xyz;
+                in.tangent.w = -in.tangent.w;
+            }
+            
             float3 lightDir = normalize(float3(1,1,1));
             float3 lightColor = float3(1,1,1);
             
             float3 n = c.xyz;
             
             // handle the basis here
-            n = toFloat(transformNormal(in.tangent, in.normal, toHalf(n)));
+            n = toFloat(transformNormal(toHalf(n), in.tangent, in.normal));
             
             // diffuse
             float dotNL = saturate(dot(n, lightDir));
@@ -776,6 +785,7 @@ float4 DrawPixels(
 
 fragment float4 Draw1DArrayPS(
     ColorInOut in [[stage_in]],
+    bool facing [[front_facing]],
     constant Uniforms& uniforms [[ buffer(BufferIndexUniforms) ]],
     constant UniformsLevel& uniformsLevel [[ buffer(BufferIndexUniformsLevel) ]],
     sampler colorSampler [[ sampler(SamplerIndexColor) ]],
@@ -790,11 +800,12 @@ fragment float4 Draw1DArrayPS(
     float2 textureSize = float2(colorMap.get_width(0), 1);
     // colorMap.get_num_mip_levels();
 
-    return DrawPixels(in, uniforms, c, textureSize);
+    return DrawPixels(in, facing, uniforms, c, textureSize);
 }
 
 fragment float4 DrawImagePS(
     ColorInOut in [[stage_in]],
+    bool facing [[front_facing]],
     constant Uniforms& uniforms [[ buffer(BufferIndexUniforms) ]],
     constant UniformsLevel& uniformsLevel [[ buffer(BufferIndexUniformsLevel) ]],
     sampler colorSampler [[ sampler(SamplerIndexColor) ]],
@@ -808,11 +819,12 @@ fragment float4 DrawImagePS(
     float2 textureSize = float2(colorMap.get_width(lod), colorMap.get_height(lod));
     // colorMap.get_num_mip_levels();
 
-    return DrawPixels(in, uniforms, c, textureSize);
+    return DrawPixels(in, facing, uniforms, c, textureSize);
 }
 
 fragment float4 DrawImageArrayPS(
     ColorInOut in [[stage_in]],
+    bool facing [[front_facing]],
     constant Uniforms& uniforms [[ buffer(BufferIndexUniforms) ]],
     constant UniformsLevel& uniformsLevel [[ buffer(BufferIndexUniformsLevel) ]],
     sampler colorSampler [[ sampler(SamplerIndexColor) ]],
@@ -826,12 +838,13 @@ fragment float4 DrawImageArrayPS(
     float2 textureSize = float2(colorMap.get_width(lod), colorMap.get_height(lod));
     // colorMap.get_num_mip_levels();
 
-    return DrawPixels(in, uniforms, c, textureSize);
+    return DrawPixels(in, facing, uniforms, c, textureSize);
 }
 
 
 fragment float4 DrawCubePS(
     ColorInOut in [[stage_in]],
+    bool facing [[front_facing]],
     constant Uniforms& uniforms [[ buffer(BufferIndexUniforms) ]],
     constant UniformsLevel& uniformsLevel [[ buffer(BufferIndexUniformsLevel) ]],
     sampler colorSampler [[ sampler(SamplerIndexColor) ]],
@@ -846,11 +859,12 @@ fragment float4 DrawCubePS(
     float2 textureSize = float2(w, w);
     // colorMap.get_num_mip_levels();
 
-    return DrawPixels(in, uniforms, c, textureSize);
+    return DrawPixels(in, facing, uniforms, c, textureSize);
 }
 
 fragment float4 DrawCubeArrayPS(
     ColorInOut in [[stage_in]],
+    bool facing [[front_facing]],
     constant Uniforms& uniforms [[ buffer(BufferIndexUniforms) ]],
     constant UniformsLevel& uniformsLevel [[ buffer(BufferIndexUniformsLevel) ]],
     sampler colorSampler [[ sampler(SamplerIndexColor) ]],
@@ -865,12 +879,13 @@ fragment float4 DrawCubeArrayPS(
     float2 textureSize = float2(w, w);
     // colorMap.get_num_mip_levels();
 
-    return DrawPixels(in, uniforms, c, textureSize);
+    return DrawPixels(in, facing, uniforms, c, textureSize);
 }
 
 
 fragment float4 DrawVolumePS(
     ColorInOut in [[stage_in]],
+    bool facing [[front_facing]],
     constant Uniforms& uniforms [[ buffer(BufferIndexUniforms) ]],
     constant UniformsLevel& uniformsLevel [[ buffer(BufferIndexUniformsLevel) ]],
     sampler colorSampler [[ sampler(SamplerIndexColor) ]],
@@ -895,7 +910,7 @@ fragment float4 DrawVolumePS(
     float2 textureSize = float2(colorMap.get_width(lod), colorMap.get_height(lod));
     // colorMap.get_num_mip_levels();
 
-    return DrawPixels(in, uniforms, c, textureSize);
+    return DrawPixels(in, facing, uniforms, c, textureSize);
 }
 
 //--------------------------------------------------
diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp
index 391fd17a..aba4df27 100644
--- a/libkram/kram/KramImageInfo.cpp
+++ b/libkram/kram/KramImageInfo.cpp
@@ -1305,8 +1305,8 @@ void ImageInfo::heightToNormals(int32_t w, int32_t h,
                 float dx = (cE - cW) * scaleX;
                 float dy = (cN - cS) * scaleY;
            
-                //dx = -dx;
-                //dy = -dy;
+                dx = -dx;
+                dy = -dy;
                 
                 float4 normal = float4m(dx, dy, 1.0f, 0.0f);
                 normal = normalize(normal);
@@ -1340,8 +1340,8 @@ void ImageInfo::heightToNormals(int32_t w, int32_t h,
                 float dx = (cE - cW) * scaleX;
                 float dy = (cN - cS) * scaleY;
            
-                //dx = -dx;
-                //dy = -dy;
+                dx = -dx;
+                dy = -dy;
                 
                 float4 normal = float4m(dx, dy, 1.0f, 0.0f);
                 normal = normalize(normal);

From 29e76533ceb97024f8ea519b78d9f27517e0a9eb Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 30 May 2021 01:44:19 -0700
Subject: [PATCH 083/901] CMake - fix including Metal source in builds for gpu
 capture

---
 kramv/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kramv/CMakeLists.txt b/kramv/CMakeLists.txt
index 11baacc3..97d844bf 100644
--- a/kramv/CMakeLists.txt
+++ b/kramv/CMakeLists.txt
@@ -81,7 +81,7 @@ set_target_properties(${myTargetApp} PROPERTIES
     #-------------------------
     # turn on shader capture support and indexing
     # why can't this just be a yes or no, there's "Yes, exclude source code"
-    XCODE_ATTRIBUTE_MTL_ENABLE_DEBUG_INFO "Yes, include source code"
+    XCODE_ATTRIBUTE_MTL_ENABLE_DEBUG_INFO INCLUDE_SOURCE
     XCODE_ATTRIBUTE_MTL_ENABLE_INDEX_STORE YES
 )
 

From a9473d301b66412f1552e570f7b2008039597a6a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 30 May 2021 01:49:07 -0700
Subject: [PATCH 084/901] kramv - fixup the 3d shapes as much as possible, fix
 eyedropper.

These are still all smushed, since xy-only scale is applied to the box to approximate a wxh image.
Flip the u direction of the sphere/cylinder primitives, since these are inverted from the box.
Invert the bitangent sign, since even with flipping u direction, the tangents are inverted from what they should be.
Split out 2d from 3d view matrices.
Fix eyedropper on archive by setting decodedFormat on that path, and fixing toSnorm8 call use.
Try to activate specular, but it looks bad in ortho and with the scaling.
---
 kramv/KramRenderer.mm   | 134 +++++++++++++++++++++++++++++-----------
 kramv/KramShaders.h     |   3 +-
 kramv/KramShaders.metal |  29 ++++++---
 kramv/KramViewerBase.h  |   4 +-
 kramv/KramViewerMain.mm |   9 ++-
 5 files changed, 128 insertions(+), 51 deletions(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 8c97e852..90542ca5 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -70,8 +70,14 @@ @implementation Renderer
     uint8_t _uniformBufferIndex;
 
     float4x4 _projectionMatrix;
+    
+    // 2d versions
     float4x4 _viewMatrix;
     float4x4 _modelMatrix;
+    
+    // 3d versions
+    float4x4 _viewMatrix3D;
+    float4x4 _modelMatrix3D;
 
     //float _rotation;
     KramLoader *_loader;
@@ -79,11 +85,12 @@ @implementation Renderer
     
     MDLVertexDescriptor *_mdlVertexDescriptor;
     
-    MTKMesh *_meshPlane; // really a thin gox
+    //MTKMesh *_meshPlane; // really a thin gox
     MTKMesh *_meshBox;
     MTKMesh *_meshSphere;
     MTKMesh *_meshCylinder;
     MTKMeshBufferAllocator *_metalAllocator;
+    bool _is3DView; // whether view is 3d for now
     
     ShowSettings* _showSettings;
 }
@@ -395,18 +402,30 @@ - (void)_createSampleRender
     _sampleTex = [_device newTextureWithDescriptor:textureDesc];
 }
 
-- (MTKMesh*)_createMeshAsset:(const char*)name mdlMesh:(MDLMesh*)mdlMesh
+- (MTKMesh*)_createMeshAsset:(const char*)name mdlMesh:(MDLMesh*)mdlMesh doFlipUV:(bool)doFlipUV
 {
     NSError* error = nil;
 
     //mdlMesh.vertexDescriptor = _mdlVertexDescriptor;
     
+    
+    mdlMesh.vertexDescriptor = _mdlVertexDescriptor;
+    
+    // flip the u coordinate
+    if (doFlipUV)
+    {
+        id<MDLMeshBuffer> uvs = mdlMesh.vertexBuffers[1];
+        float2* uvData = (float2*)uvs.map.bytes;
+        
+        for (uint32_t i = 0; i < mdlMesh.vertexCount; ++i) {
+            uvData[i].x = 1.0f - uvData[i].x;
+        }
+    }
+    
     [mdlMesh addOrthTanBasisForTextureCoordinateAttributeNamed: MDLVertexAttributeTextureCoordinate
                                           normalAttributeNamed: MDLVertexAttributeNormal
                                          tangentAttributeNamed: MDLVertexAttributeTangent];
     
-    mdlMesh.vertexDescriptor = _mdlVertexDescriptor;
-    
     // TODO: name the vertex attributes, can that be done in _mdlVertexDescriptor
     // may have to set name on MTLBuffer range on IB and VB
     
@@ -437,15 +456,19 @@ - (void)_loadAssets
                                        inwardNormals:NO
                                            allocator:_metalAllocator];
     
-    _meshBox = [self _createMeshAsset:"MeshBox" mdlMesh:mdlMesh];
+    _meshBox = [self _createMeshAsset:"MeshBox" mdlMesh:mdlMesh doFlipUV:false];
     
     // TOOO: have more shape types - this is box, need thin box (plane), and sphere, and cylinder
     // eventually load usdz and gltf2 custom model.  Need 3d manipulation of shape like arcball
     // and eyedropper is more complex.
     
+    // The sphere/cylinder shapes are v increasing in -Y, and u increasing conterclockwise,
+    // u is the opposite direction to the cube/plane, so need to flip those coords
+    // I think this has also flipped the tangents the wrong way.
+    
     mdlMesh = [MDLMesh newEllipsoidWithRadii:(vector_float3){0.5, 0.5, 0.5} radialSegments:16 verticalSegments:16 geometryType:MDLGeometryTypeTriangles inwardNormals:NO hemisphere:NO allocator:_metalAllocator];
-               
-    _meshSphere = [self _createMeshAsset:"MeshSphere" mdlMesh:mdlMesh];
+    
+    _meshSphere = [self _createMeshAsset:"MeshSphere" mdlMesh:mdlMesh doFlipUV:true];
     
     mdlMesh = [MDLMesh newCylinderWithHeight:1.0
                                        radii:(vector_float2){0.5, 0.5}
@@ -455,7 +478,7 @@ - (void)_loadAssets
                                        inwardNormals:NO
                                            allocator:_metalAllocator];
     
-    _meshCylinder = [self _createMeshAsset:"MeshCylinder" mdlMesh:mdlMesh];
+    _meshCylinder = [self _createMeshAsset:"MeshCylinder" mdlMesh:mdlMesh doFlipUV:true];
     
     _mesh = _meshBox;
     
@@ -492,6 +515,7 @@ - (BOOL)loadTextureFromData:(const string&)fullFilename timestamp:(double)timest
         _showSettings->imageInfoVerbose = kramInfoKTXToString(fullFilename, sourceImage, true);
        
         _showSettings->originalFormat = (MyMTLPixelFormat)originalFormatMTL;
+        _showSettings->decodedFormat = (MyMTLPixelFormat)texture.pixelFormat;
         
         _showSettings->lastFilename = fullFilename;
         _showSettings->lastTimestamp = timestamp;
@@ -659,8 +683,13 @@ - (BOOL)loadTextureImpl:(const string&)fullFilename isTextureChanged:(BOOL)isTex
     // have one of these for each texture added to the viewer
     float scaleX = MAX(1, texture.width);
     float scaleY = MAX(1, texture.height);
-    _modelMatrix = float4x4(float4m(scaleX, scaleY, 1.0f, 1.0f));
-    _modelMatrix = _modelMatrix * matrix4x4_translation(0.0f, 0.0f, -1.0);
+    _modelMatrix = float4x4(float4m(scaleX, scaleY, 1.0f, 1.0f)); // non uniform scale
+    _modelMatrix = _modelMatrix * matrix4x4_translation(0.0f, 0.0f, -1.0); // set z=-1 unit back
+    
+    // squashed 3d primitive in z, throws off normals
+    float scale = MAX(scaleX, scaleY);
+    _modelMatrix3D = float4x4(float4m(scale, scale, 1.0f, 1.0f)); // non uniform scale
+    _modelMatrix3D = _modelMatrix3D * matrix4x4_translation(0.0f, 0.0f, -1.0); // set z=-1 unit back
     
     return YES;
 }
@@ -670,10 +699,18 @@ - (float4x4)computeImageTransform:(float)panX panY:(float)panY zoom:(float)zoom
     float4x4 panTransform = matrix4x4_translation(-panX, panY, 0.0);
     
     // scale
-    float4x4 viewMatrix = float4x4(float4m(zoom, zoom, 1.0f, 1.0f));
-    viewMatrix = panTransform * viewMatrix;
-    
-    return _projectionMatrix * viewMatrix * _modelMatrix;
+    if (_is3DView) {
+        float4x4 viewMatrix = float4x4(float4m(zoom, zoom, 1.0f, 1.0f));  // non-uniform scale
+        viewMatrix = panTransform * viewMatrix;
+        
+        return _projectionMatrix * viewMatrix * _modelMatrix3D;
+    }
+    else {
+        float4x4 viewMatrix = float4x4(float4m(zoom, zoom, 1.0f, 1.0f)); // non-uniform scale
+        viewMatrix = panTransform * viewMatrix;
+        
+        return _projectionMatrix * viewMatrix * _modelMatrix;
+    }
 }
 
 - (void)_updateGameState
@@ -729,33 +766,60 @@ - (void)_updateGameState
     uniforms.debugMode = _showSettings->isPreview ? ShaderDebugMode::ShDebugModeNone : (ShaderDebugMode)_showSettings->debugMode;
     uniforms.channels = (ShaderTextureChannels)_showSettings->channels;
 
+    // crude shape experiment
+    _is3DView = true;
+    switch(_showSettings->meshNumber) {
+        case 0: _mesh = _meshBox; _is3DView = false; break;
+        case 1: _mesh = _meshBox; break;
+        case 2: _mesh = _meshSphere; break;
+        case 3: _mesh = _meshCylinder; break;
+    }
+    
     // translate
     float4x4 panTransform = matrix4x4_translation(-_showSettings->panX, _showSettings->panY, 0.0);
     
     // scale
-    _viewMatrix = float4x4(float4m(_showSettings->zoom, _showSettings->zoom, 1.0f, 1.0f));
-    _viewMatrix = panTransform * _viewMatrix;
-    
-    // viewMatrix should typically be the inverse
-    //_viewMatrix = simd_inverse(_viewMatrix);
-    
-    float4x4 projectionViewMatrix = _projectionMatrix * _viewMatrix;
-    
-    uniforms.projectionViewMatrix = projectionViewMatrix;
-
-    // works when only one texture, but switch to projectViewMatrix
-    uniforms.modelMatrix = _modelMatrix;
+    float zoom = _showSettings->zoom;
     
-    // this was stored so view could use it, but now that code calcs the transform via computeImageTransform
-    _showSettings->projectionViewModelMatrix = projectionViewMatrix * _modelMatrix;
-    
-    // crude shape experiment
-    switch(_showSettings->meshNumber) {
-        case 0: _mesh = _meshBox; break;
-        case 1: _mesh = _meshSphere; break;
-        case 2: _mesh = _meshCylinder; break;
+    if (_is3DView) {
+        _viewMatrix3D = float4x4(float4m(zoom, zoom, 1.0f, 1.0f)); // non-uniform
+        _viewMatrix3D = panTransform * _viewMatrix3D;
+        
+        // viewMatrix should typically be the inverse
+        //_viewMatrix = simd_inverse(_viewMatrix3D);
+       
+        float4x4 projectionViewMatrix = _projectionMatrix * _viewMatrix3D;
+        uniforms.projectionViewMatrix = projectionViewMatrix;
+        
+        // works when only one texture, but switch to projectViewMatrix
+        uniforms.modelMatrix = _modelMatrix3D;
+       
+        // this was stored so view could use it, but now that code calcs the transform via computeImageTransform
+        _showSettings->projectionViewModelMatrix = uniforms.projectionViewMatrix * uniforms.modelMatrix;
+        
+        // cache the camera position
+        uniforms.cameraPosition = inverse(_viewMatrix3D).columns[3].xyz; // this is all ortho
     }
-    
+    else {
+        _viewMatrix = float4x4(float4m(zoom, zoom, 1.0f, 1.0f));
+        _viewMatrix = panTransform * _viewMatrix;
+        
+        // viewMatrix should typically be the inverse
+        //_viewMatrix = simd_inverse(_viewMatrix3D);
+       
+        float4x4 projectionViewMatrix = _projectionMatrix * _viewMatrix;
+        uniforms.projectionViewMatrix = projectionViewMatrix;
+        
+        // works when only one texture, but switch to projectViewMatrix
+        uniforms.modelMatrix = _modelMatrix;
+       
+        // this was stored so view could use it, but now that code calcs the transform via computeImageTransform
+        _showSettings->projectionViewModelMatrix = uniforms.projectionViewMatrix * uniforms.modelMatrix ;
+        
+        // cache the camera position
+        uniforms.cameraPosition = inverse(_viewMatrix).columns[3].xyz; // this is all ortho
+    }
+
     //_rotation += .01;
 }
 
diff --git a/kramv/KramShaders.h b/kramv/KramShaders.h
index 0b7e6b36..6e363c69 100644
--- a/kramv/KramShaders.h
+++ b/kramv/KramShaders.h
@@ -96,7 +96,8 @@ struct Uniforms
 {
     simd::float4x4 projectionViewMatrix;
     simd::float4x4 modelMatrix;
-
+    simd::float3 cameraPosition; // world-space
+    
     bool isSigned;
     bool isNormal;
     bool isSwizzleAGToRG;
diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index 6f51ca1d..b77875ba 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -192,9 +192,11 @@ half3 transformNormal(half3 bumpNormal, half4 tangent, half3 vertexNormal)
     // Reconstruct bitan in frag shader
     // https://bgolus.medium.com/generating-perfect-normal-maps-for-unity-f929e673fc57
 
+    half bitangentSign = tangent.w;
+    
     // ModelIO not generating correct bitan sign
     // TODO: flip this on srcData, and not here
-    half bitangentSign = -tangent.w;
+    bitangentSign = -bitangentSign;
     
     // now transform by basis and normalize from any shearing, and since interpolated basis vectors
     // are not normalized
@@ -549,14 +551,20 @@ float4 DrawPixels(
             
             float3 specular = float3(0.0);
             
-            // this renders bright in one quadrant of wrap preview
+            // this renders bright in one quadrant of wrap preview, hard in ortho view
             // specular
-            //float3 v = normalize(in.worldPos); //  - worldCameraPos); // or worldCameraDir
-            //float3 r = normalize(reflect(lightDir, n));
-            //float dotRV = saturate(dot(r, v));
-            //dotRV = pow(dotRV, 4.0); // * saturate(dotNL * 8.0);  // no spec without diffuse
-            //specular = saturate(dotRV * lightColor.rgb);
-
+            bool doSpecular = false;
+            if (doSpecular) {
+                float3 view = normalize(in.worldPos - uniforms.cameraPosition);
+                float3 ref = normalize(reflect(view, n));
+                
+                // above can be interpolated
+                float dotRL = saturate(dot(ref, lightDir));
+                dotRL = pow(dotRL, 4.0); // * saturate(dotNL * 8.0);  // no spec without diffuse
+                specular = saturate(dotRL * lightColor.rgb);
+            }
+            
+            // Note: don't have any albedo yet, need second texture input
             float3 ambient = float3(0.1);
             c.xyz = ambient + diffuse + specular;
             
@@ -576,6 +584,11 @@ float4 DrawPixels(
                 c.xyz *= c.a;
             }
         }
+        
+        bool doShowUV = false;
+        if (doShowUV) {
+            c = float4(in.texCoord, 0.0, 1.0);
+        }
     }
     else {
         // handle single channel and SDF content
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index d9015f69..5976260c 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -147,7 +147,7 @@ class ShowSettings {
     
     // these control the view transform, zoomFit fits the image vertically to he view bound
     float zoomFit = 1.0f;
-    float zoom = 0.0f;
+    float zoom = 1.0f;
     float panX = 0.0f;
     float panY = 0.0f;
     
@@ -169,7 +169,7 @@ class ShowSettings {
     double lastTimestamp = 0.0;
     
     int32_t meshNumber = 0;
-    int32_t meshCount = 3;
+    int32_t meshCount = 4;
 };
 
 float4x4 matrix4x4_translation(float tx, float ty, float tz);
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index afc42d54..027b57ad 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -1014,10 +1014,9 @@ - (void)updateEyedropper {
         int mipX = _showSettings->imageBoundsX;
         int mipY = _showSettings->imageBoundsY;
         
-        for (int i = 0; i < mipLOD; ++i) {
-            mipX = mipX >> 1;
-            mipY = mipY >> 1;
-        }
+        mipX = mipX >> mipLOD;
+        mipY = mipY >> mipLOD;
+        
         mipX = std::max(1, mipX);
         mipY = std::max(1, mipY);
         
@@ -1050,7 +1049,7 @@ - (void)updateEyedropper {
         
         bool isDecodeSigned = isSignedFormat(_showSettings->decodedFormat);
         if (isSigned && !isDecodeSigned) {
-            c = toSnorm8(c.x);
+            c = toSnorm8(c);
         }
         
         if (isNormal) {

From 90f554d192d3e27e2c26fdffc7b5c772f1739d79 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 30 May 2021 10:47:36 -0700
Subject: [PATCH 085/901] kramv - first pass at 3d meshes

These are still ortho.
Fix bitangent sign on ModelIO prims.
Pass in invScale2 from transform, so don't have to recompute in VS.  This is 1.0 if uniform scale used.
Don't set non-uniform scale on modelMatrix in 3D views.  Using uniform scale now.
Increase ortho z range, so scale doesn't cause prims to clip.
---
 kramv/KramRenderer.mm   | 104 +++++++++++++++++++++++++++++++---------
 kramv/KramShaders.h     |   1 +
 kramv/KramShaders.metal |  44 +++++++++--------
 3 files changed, 107 insertions(+), 42 deletions(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 90542ca5..8cbc57a6 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -88,7 +88,8 @@ @implementation Renderer
     //MTKMesh *_meshPlane; // really a thin gox
     MTKMesh *_meshBox;
     MTKMesh *_meshSphere;
-    MTKMesh *_meshCylinder;
+    //MTKMesh *_meshCylinder;
+    MTKMesh *_meshCapsule;
     MTKMeshBufferAllocator *_metalAllocator;
     bool _is3DView; // whether view is 3d for now
     
@@ -406,15 +407,15 @@ - (MTKMesh*)_createMeshAsset:(const char*)name mdlMesh:(MDLMesh*)mdlMesh doFlipU
 {
     NSError* error = nil;
 
-    //mdlMesh.vertexDescriptor = _mdlVertexDescriptor;
-    
-    
     mdlMesh.vertexDescriptor = _mdlVertexDescriptor;
     
+    // ModelIO has the uv going counterclockwise on sphere/cylinder, but not on the box.
+    // And it also has a flipped bitangent.w.
+    
     // flip the u coordinate
     if (doFlipUV)
     {
-        id<MDLMeshBuffer> uvs = mdlMesh.vertexBuffers[1];
+        id<MDLMeshBuffer> uvs = mdlMesh.vertexBuffers[BufferIndexMeshUV0];
         float2* uvData = (float2*)uvs.map.bytes;
         
         for (uint32_t i = 0; i < mdlMesh.vertexCount; ++i) {
@@ -426,6 +427,18 @@ - (MTKMesh*)_createMeshAsset:(const char*)name mdlMesh:(MDLMesh*)mdlMesh doFlipU
                                           normalAttributeNamed: MDLVertexAttributeNormal
                                          tangentAttributeNamed: MDLVertexAttributeTangent];
     
+    // DONE: flip the bitangent.w sign here, and remove the flip in the shader
+    bool doFlipBitangent = true;
+    if (doFlipBitangent)
+    {
+        id<MDLMeshBuffer> uvs = mdlMesh.vertexBuffers[BufferIndexMeshTangent];
+        float4* uvData = (float4*)uvs.map.bytes;
+        
+        for (uint32_t i = 0; i < mdlMesh.vertexCount; ++i) {
+            uvData[i].w = -uvData[i].w;
+        }
+    }
+    
     // TODO: name the vertex attributes, can that be done in _mdlVertexDescriptor
     // may have to set name on MTLBuffer range on IB and VB
     
@@ -466,19 +479,34 @@ - (void)_loadAssets
     // u is the opposite direction to the cube/plane, so need to flip those coords
     // I think this has also flipped the tangents the wrong way.
     
+    // All prims are viewed with +Y, not +Z up
+    
     mdlMesh = [MDLMesh newEllipsoidWithRadii:(vector_float3){0.5, 0.5, 0.5} radialSegments:16 verticalSegments:16 geometryType:MDLGeometryTypeTriangles inwardNormals:NO hemisphere:NO allocator:_metalAllocator];
     
     _meshSphere = [self _createMeshAsset:"MeshSphere" mdlMesh:mdlMesh doFlipUV:true];
     
-    mdlMesh = [MDLMesh newCylinderWithHeight:1.0
-                                       radii:(vector_float2){0.5, 0.5}
-                                            radialSegments:16
-                                        verticalSegments:1
-                                        geometryType:MDLGeometryTypeTriangles
-                                       inwardNormals:NO
-                                           allocator:_metalAllocator];
-    
-    _meshCylinder = [self _createMeshAsset:"MeshCylinder" mdlMesh:mdlMesh doFlipUV:true];
+// this maps 1/3rd of texture to the caps, and just isn't a very good uv mapping, using capsule nistead
+//    mdlMesh = [MDLMesh newCylinderWithHeight:1.0
+//                                       radii:(vector_float2){0.5, 0.5}
+//                                            radialSegments:16
+//                                        verticalSegments:1
+//                                        geometryType:MDLGeometryTypeTriangles
+//                                       inwardNormals:NO
+//                                           allocator:_metalAllocator];
+//
+//    _meshCylinder = [self _createMeshAsset:"MeshCylinder" mdlMesh:mdlMesh doFlipUV:true];
+    
+    mdlMesh = [MDLMesh newCapsuleWithHeight:1.0
+                                   radii:(vector_float2){0.5, 0.25} // vertical cap subtracted from height
+                          radialSegments:16
+                        verticalSegments:1
+                      hemisphereSegments:16
+                            geometryType:MDLGeometryTypeTriangles
+                           inwardNormals:NO
+                               allocator:_metalAllocator];
+
+    
+    _meshCapsule = [self _createMeshAsset:"MeshCapsule" mdlMesh:mdlMesh doFlipUV:true];
     
     _mesh = _meshBox;
     
@@ -686,10 +714,10 @@ - (BOOL)loadTextureImpl:(const string&)fullFilename isTextureChanged:(BOOL)isTex
     _modelMatrix = float4x4(float4m(scaleX, scaleY, 1.0f, 1.0f)); // non uniform scale
     _modelMatrix = _modelMatrix * matrix4x4_translation(0.0f, 0.0f, -1.0); // set z=-1 unit back
     
-    // squashed 3d primitive in z, throws off normals
+    // uniform scaled 3d primitiv
     float scale = MAX(scaleX, scaleY);
-    _modelMatrix3D = float4x4(float4m(scale, scale, 1.0f, 1.0f)); // non uniform scale
-    _modelMatrix3D = _modelMatrix3D * matrix4x4_translation(0.0f, 0.0f, -1.0); // set z=-1 unit back
+    _modelMatrix3D = float4x4(float4m(scale, scale, scale, 1.0f)); // uniform scale
+    _modelMatrix3D = _modelMatrix3D * matrix4x4_translation(0.0f, 0.0f, -1.0f); // set z=-1 unit back
     
     return YES;
 }
@@ -700,7 +728,7 @@ - (float4x4)computeImageTransform:(float)panX panY:(float)panY zoom:(float)zoom
     
     // scale
     if (_is3DView) {
-        float4x4 viewMatrix = float4x4(float4m(zoom, zoom, 1.0f, 1.0f));  // non-uniform scale
+        float4x4 viewMatrix = float4x4(float4m(zoom, zoom, 1.0f, 1.0f));  // non-uniform scale is okay, affects ortho volume
         viewMatrix = panTransform * viewMatrix;
         
         return _projectionMatrix * viewMatrix * _modelMatrix3D;
@@ -713,6 +741,33 @@ - (float4x4)computeImageTransform:(float)panX panY:(float)panY zoom:(float)zoom
     }
 }
 
+bool almost_equal_elements(float3 v, float tol) {
+    return (fabs(v.x - v.y) < tol) && (fabs(v.x - v.z) < tol);
+}
+
+float3 inverseScaleSquared(float4x4 m) {
+    float3 scaleSquared = float3m(
+        length_squared(m.columns[0].xyz),
+        length_squared(m.columns[1].xyz),
+        length_squared(m.columns[2].xyz));
+    
+    // if uniform, then set scaleSquared all to 1
+    if (almost_equal_elements(scaleSquared, 1e-5)) {
+        scaleSquared = float3m(1.0);
+    }
+   
+    // don't divide by 0
+    float3 invScaleSquared = recip(simd::max(float3m(0.0001 * 0.0001), scaleSquared));
+        
+    // TODO: could also identify determinant here for flipping orient
+    
+    // Note: in 2D, scales is x,x,1, so always apply invScale2,
+    // and that messes up preview normals on sphere/cylinder.
+    // May be from trying to do all that math in half.
+    
+    return invScaleSquared;
+}
+
 - (void)_updateGameState
 {
     /// Update any game state before encoding rendering commands to our drawable
@@ -772,7 +827,8 @@ - (void)_updateGameState
         case 0: _mesh = _meshBox; _is3DView = false; break;
         case 1: _mesh = _meshBox; break;
         case 2: _mesh = _meshSphere; break;
-        case 3: _mesh = _meshCylinder; break;
+        //case 3: _mesh = _meshCylinder; break;
+        case 3: _mesh = _meshCapsule; break;
     }
     
     // translate
@@ -794,6 +850,8 @@ - (void)_updateGameState
         // works when only one texture, but switch to projectViewMatrix
         uniforms.modelMatrix = _modelMatrix3D;
        
+        uniforms.modelMatrixInvScale2 = inverseScaleSquared(_modelMatrix3D);
+        
         // this was stored so view could use it, but now that code calcs the transform via computeImageTransform
         _showSettings->projectionViewModelMatrix = uniforms.projectionViewMatrix * uniforms.modelMatrix;
         
@@ -813,6 +871,8 @@ - (void)_updateGameState
         // works when only one texture, but switch to projectViewMatrix
         uniforms.modelMatrix = _modelMatrix;
        
+        uniforms.modelMatrixInvScale2 = inverseScaleSquared(_modelMatrix);
+        
         // this was stored so view could use it, but now that code calcs the transform via computeImageTransform
         _showSettings->projectionViewModelMatrix = uniforms.projectionViewMatrix * uniforms.modelMatrix ;
         
@@ -935,7 +995,7 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer view:(nonnull MTKView *)vie
     [renderEncoder setCullMode:MTLCullModeBack];
     [renderEncoder setDepthStencilState:_depthStateFull];
 
-    [renderEncoder pushDebugGroup:@"DrawBox"];
+    [renderEncoder pushDebugGroup:@"DrawShape"];
 
     // set the mesh shape
     for (NSUInteger bufferIndex = 0; bufferIndex < _mesh.vertexBuffers.count; bufferIndex++)
@@ -1221,7 +1281,7 @@ - (void)drawSamples:(id<MTLCommandBuffer>)commandBuffer lookupX:(int32_t)lookupX
     id<MTLComputeCommandEncoder> renderEncoder = [commandBuffer computeCommandEncoder];
     renderEncoder.label = @"SampleCompute";
 
-    [renderEncoder pushDebugGroup:@"DrawBox"];
+    [renderEncoder pushDebugGroup:@"DrawShape"];
 
     UniformsCS uniforms;
     uniforms.uv.x = lookupX;
@@ -1297,7 +1357,7 @@ - (void)updateViewTransforms {
     
     //float aspect = size.width / (float)size.height;
     //_projectionMatrix = perspective_rhs(45.0f * (M_PI / 180.0f), aspect, 0.1f, 100.0f);
-    _projectionMatrix = orthographic_rhs(_showSettings->viewSizeX, _showSettings->viewSizeY, 0.1f, 100.0f, _showSettings->isReverseZ);
+    _projectionMatrix = orthographic_rhs(_showSettings->viewSizeX, _showSettings->viewSizeY, 0.1f, 100000.0f, _showSettings->isReverseZ);
     
     // DONE: adjust zoom to fit the entire image to the window
     _showSettings->zoomFit = MIN((float)_showSettings->viewSizeX,  (float)_showSettings->viewSizeY) /
diff --git a/kramv/KramShaders.h b/kramv/KramShaders.h
index 6e363c69..cb0d33e1 100644
--- a/kramv/KramShaders.h
+++ b/kramv/KramShaders.h
@@ -96,6 +96,7 @@ struct Uniforms
 {
     simd::float4x4 projectionViewMatrix;
     simd::float4x4 modelMatrix;
+    simd::float3 modelMatrixInvScale2; // to supply inverse
     simd::float3 cameraPosition; // world-space
     
     bool isSigned;
diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index b77875ba..245b4ce8 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -195,8 +195,8 @@ half3 transformNormal(half3 bumpNormal, half4 tangent, half3 vertexNormal)
     half bitangentSign = tangent.w;
     
     // ModelIO not generating correct bitan sign
-    // TODO: flip this on srcData, and not here
-    bitangentSign = -bitangentSign;
+    // DONE: flip this on srcData, and not here
+    //bitangentSign = -bitangentSign;
     
     // now transform by basis and normalize from any shearing, and since interpolated basis vectors
     // are not normalized
@@ -210,9 +210,13 @@ half3 transformNormal(half4 tangent, half3 vertexNormal,
                       texture2d<half> texture, sampler s, float2 uv, bool isSigned = true)
 {
     half4 nmap = texture.sample(s, uv);
+    
+    // unorm-only formats like ASTC need to convert
     if (!isSigned) {
         nmap.xy = toSnorm8(nmap.xy);
     }
+    
+    // rebuild the z term
     half3 bumpNormal = toNormal(nmap.xyz);
    
     return transformNormal(bumpNormal,
@@ -276,7 +280,7 @@ float3x3 toFloat3x3(float4x4 m)
 
 // this is for vertex shader if tangent supplied
 void transformBasis(thread float3& normal, thread float3& tangent,
-                    float4x4 modelToWorldTfm, bool isScaled = false)
+                    float4x4 modelToWorldTfm, float3 invScale2)
 {
     
     float3x3 m = toFloat3x3(modelToWorldTfm);
@@ -289,25 +293,24 @@ void transformBasis(thread float3& normal, thread float3& tangent,
     // most apps assume m, but after averaging it can be just as off the surface as the normal
     tangent = m * tangent;
     
-    
     // have to apply invSquare of scale here to approximate invT
     // also make sure to identify inversion off determinant before instancing so that backfacing is correct
     // this is only needed if non-uniform scale present in modelToWorldTfm, could precompute scale2
-    if (isScaled)
-    {
-        // compute scale squared from rows
-        float3 scale2 = float3(
-            length_squared(m[0].xyz),
-            length_squared(m[1].xyz),
-            length_squared(m[2].xyz));
-        
-        // do a max(1e4), but really don't have scale be super small
-        scale2 = recip(max(0.0001 * 0.0001, scale2));
+//    if (isScaled)
+//    {
+//        // compute scale squared from rows
+//        float3 scale2 = float3(
+//            length_squared(m[0].xyz),
+//            length_squared(m[1].xyz),
+//            length_squared(m[2].xyz));
+//
+//        // do a max(1e4), but really don't have scale be super small
+//        scale2 = recip(max(0.0001 * 0.0001, scale2));
         
         // apply inverse
-        normal  *= scale2;
-        tangent *= scale2;
-    }
+        normal  *= invScale2;
+        tangent *= invScale2;
+//    }
     
     // vertex shader normalize, but the fragment shader should not
     normal  = normalize(normal);
@@ -358,7 +361,7 @@ ColorInOut DrawImageFunc(
     if (uniforms.isNormal && uniforms.isPreview) {
         float3 normal = in.normal;
         float3 tangent = in.tangent.xyz;
-        transformBasis(normal, tangent, uniforms.modelMatrix, false);
+        transformBasis(normal, tangent, uniforms.modelMatrix, uniforms.modelMatrixInvScale2);
         
         out.normal = toHalf(normal);
         out.tangent.xyz = toHalf(tangent);
@@ -546,7 +549,8 @@ float4 DrawPixels(
             n = toFloat(transformNormal(toHalf(n), in.tangent, in.normal));
             
             // diffuse
-            float dotNL = saturate(dot(n, lightDir));
+            float dotNLUnsat = dot(n, lightDir);
+            float dotNL = saturate(dotNLUnsat);
             float3 diffuse = lightColor.xyz * dotNL;
             
             float3 specular = float3(0.0);
@@ -565,7 +569,7 @@ float4 DrawPixels(
             }
             
             // Note: don't have any albedo yet, need second texture input
-            float3 ambient = float3(0.1);
+            float3 ambient = mix(0.1, 0.3, saturate(dotNLUnsat * 0.5 + 0.5));
             c.xyz = ambient + diffuse + specular;
             
             c.a = 1;

From 44875cd7d2f31ccc60c244381120b8f190b58b7a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 30 May 2021 11:00:54 -0700
Subject: [PATCH 086/901] kram - fix Win build that doesn't have ATE

---
 libkram/kram/KramImage.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index a814a6ce..cbbbe803 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -461,13 +461,16 @@ bool KramDecoder::decodeBlocks(
     // copy srcData if using ATE, it says it needs 16-byte aligned data for encode
     // and assume for decode too.  Output texture is already 16-byte aligned.
     const uint8_t* srcData = blockData;
+    
+#if COMPILE_ATE
     vector<uint8_t> srcTexture;
     if (useATE && (((uintptr_t)srcData & 15) != 0)) {
         srcTexture.resize(blockDataSize);
         memcpy(srcTexture.data(), srcData, blockDataSize);
         srcData = srcTexture.data();
     }
-
+#endif
+    
     Int2 blockDims = blockDimsOfFormat(blockFormat);
     bool isVerbose = params.isVerbose;
     const string& swizzleText = params.swizzleText;

From 539187b777cc328ebcc8d8bd7bfe2ca59c749b9f Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 30 May 2021 12:08:29 -0700
Subject: [PATCH 087/901] kram - allow loading all texture types in
 LoadImageFromKTX

The call also now decodes ktx2 supercompressed levels.
Loads an entire level into a vertical strip, and then it can be decoded.
---
 kramv/KramLoader.mm        | 23 +++++-----
 libkram/kram/KramImage.cpp | 94 +++++++++++++++-----------------------
 libkram/kram/KramImage.h   |  9 +++-
 3 files changed, 55 insertions(+), 71 deletions(-)

diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index d60e3bb9..41c24951 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -177,14 +177,8 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) {
     
 #if SUPPORT_RGB
     if (isInternalRGBFormat(image.pixelFormat)) {
-        isInfoOnly = false;
-        
-        // reopen and unzip it all
-        if (!image.open(imageData, imageDataLength, isInfoOnly)) {
-            return nil;
-        }
-        
-        // loads and converts image from RGB to RGBA
+        // loads and converts top level mip from RGB to RGBA (RGB0)
+        // handles all texture types
         Image rgbaImage;
         if (!rgbaImage.loadImageFromKTX(image))
             return nil;
@@ -193,14 +187,21 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) {
         KTXImage rbgaImage2;
        
         ImageInfoArgs dstImageInfoArgs;
+        dstImageInfoArgs.textureType = image.textureType;
         dstImageInfoArgs.pixelFormat = remapInternalRGBFormat(image.pixelFormat);
-        dstImageInfoArgs.doMipmaps = false;
+        dstImageInfoArgs.doMipmaps = image.header.numberOfMipmapLevels > 1; // ignore 0
         dstImageInfoArgs.textureEncoder = kTexEncoderExplicit;
-        dstImageInfoArgs.swizzleText = "rgb1";
-        
+
+        // set chunk count, so it's explicit
+        // the chunks are loaded into a vertical strip
+        dstImageInfoArgs.chunksX = 1;
+        dstImageInfoArgs.chunksY =
+        dstImageInfoArgs.chunksCount = image.totalChunks();
+                                                         
         ImageInfo dstImageInfo;
         dstImageInfo.initWithArgs(dstImageInfoArgs);
        
+        // this will build mips if needed
         KramEncoder encoder;
         if (!encoder.encode(dstImageInfo, rgbaImage, rbgaImage2)) {
             return nil;
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index cbbbe803..6c50937b 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -114,47 +114,48 @@ Image::Image() : _width(0), _height(0), _hasColor(false), _hasAlpha(false)
 // this routine converts KTX to float4, but don't need if already matching 4 channels
 // could do other formata conversions here on more supported formats (101010A2, etc).
 
-// TODO: handle loading KTXImage with custom mips
-// TODO: handle loading KTXImage with other texture types (cube, array, etc)
-
 // TODO: image here is very specifically a single level of chunks of float4 or Color (RGBA8Unorm)
 // the encoder is only written to deal with those types.
 
-// TODO: for png need to turn grid/horizontal strip into a vertical strip if not already
-// that way can move through the chunks and overwrite them in-place.
-// That would avoid copying each chunk out in the encode, but have to do in reodering.
-// That way data is stored as KTX would instead of how PNG does.
-
 bool Image::loadImageFromKTX(const KTXImage& image)
 {
     // copy the data into a contiguous array
+    // a verticaly chunke image, will be converted to chunks in encode
     _width = image.width;
-    _height = image.height;
-
-    // TODO: handle more texture types with custom mips
-    if (image.textureType != MyMTLTextureType2D) {
-        KLOGE("Image", "Only support 2D texture type import for KTX");
-        return false;
-    }
-
-    // TODO: handle loading custom mips.  Save will currently box filter to build
-    // remaining mips but for SDF or coverage scaled alpha test, need to
-    // preserve original data.  Problem is that Image save to KTX/2 always does in-place
-    // mipgen.
+    _height = image.height * image.totalChunks();
     
     if (image.header.numberOfMipmapLevels > 1) {
-        KLOGW("Image", "Skipping custom mip levels from KTX load");
+        KLOGW("Image", "Skipping custom mip levels from KTX load, but will build them from top level");
     }
 
-    // so can call through to blockSize
-    KTXHeader header;
-    header.initFormatGL(image.pixelFormat);
-    //int32_t blockSize = image.blockSize();
-
     _hasColor = isColorFormat(image.pixelFormat);
     _hasAlpha = isAlphaFormat(image.pixelFormat);
 
     // TODO: this assumes 1,2,3 channel srcData has no rowPadding to say 4 bytes
+    return convertToFourChannel(image);
+}
+
+bool Image::convertToFourChannel(const KTXImage& image) {
+    
+    const uint32_t mipNumber = 0;
+    const auto& srcMipLevel = image.mipLevels[mipNumber];
+    
+    // this is offset to a given level
+    uint64_t mipBaseOffset = srcMipLevel.offset;
+    const uint8_t* srcLevelData = image.fileData;
+    
+    vector<uint8_t> mipStorage;
+    if (image.isSupercompressed()) {
+        
+        mipStorage.resize(image.mipLevelSize(mipNumber));
+        if (!image.unpackLevel(mipNumber, srcLevelData + srcMipLevel.offset, mipStorage.data())) {
+            return false;
+        }
+        srcLevelData = mipStorage.data();
+        
+        // going to upload from mipStorage temp array
+        mipBaseOffset = 0;
+    }
     
     switch (image.pixelFormat) {
         case MyMTLPixelFormatR8Unorm:
@@ -166,18 +167,15 @@ bool Image::loadImageFromKTX(const KTXImage& image)
         case MyMTLPixelFormatRGBA8Unorm_sRGB:
         case MyMTLPixelFormatRGBA8Unorm:
         {
-            const uint8_t* srcPixels =
-                image.fileData + image.mipLevels[0].offset;
-
+            const uint8_t* srcPixels = srcLevelData;
+          
             int32_t numSrcChannels = numChannelsOfFormat(image.pixelFormat);
            
-            // Note: clearing unspecified channels to 0000, not 0001
-            // can set swizzleText when encoding
             _pixels.resize(4 * _width * _height);
             
             Color* dstPixels = (Color*)_pixels.data();
 
-            Color dstTemp = {0,0,0,0};
+            Color dstTemp = {0,0,0,255};
             
             for (int32_t y = 0; y < _height; ++y) {
                 int32_t y0 = y * _width;
@@ -193,9 +191,6 @@ bool Image::loadImageFromKTX(const KTXImage& image)
                     dstPixels[dstX] = dstTemp;
                 }
             }
-
-            // caller can use swizzle after loading data here, and even compress
-            // content
             break;
         }
 
@@ -207,17 +202,14 @@ bool Image::loadImageFromKTX(const KTXImage& image)
         case MyMTLPixelFormatRGBA16Float: {
             int32_t numSrcChannels = numChannelsOfFormat(image.pixelFormat);
             
-            // Note: clearing unspecified channels to 0000, not 0001
-            // can set swizzleText when encoding
             _pixelsFloat.resize(_width * _height);
 
             // treat as float for per channel copies
             float4* dstPixels = _pixelsFloat.data();
 
-            const half* srcPixels =
-                (const half*)(image.fileData + image.mipLevels[0].offset);
-
-            half4 dstTemp = half4((half)0);
+            const half* srcPixels = (const half*)srcLevelData;
+               
+            half4 dstTemp = toHalf4(float4m(0.0f, 0.0f, 0.0f, 1.0f));
         
             for (int32_t y = 0; y < _height; ++y) {
                 int32_t y0 = y * _width;
@@ -235,12 +227,6 @@ bool Image::loadImageFromKTX(const KTXImage& image)
                     dstPixels[dstX] = toFloat4(dstTemp);
                 }
             }
-
-            // caller can swizzle
-            // caller can compress to BC6H or ASTC-HDR if encoders available
-            // some textures could even go to LDR, but would need to tonemap or
-            // clamp the values
-
             break;
         }
 
@@ -250,18 +236,15 @@ bool Image::loadImageFromKTX(const KTXImage& image)
         case MyMTLPixelFormatRGB32Float_internal:
 #endif
        case MyMTLPixelFormatRGBA32Float: {
-            const float* srcPixels =
-                (const float*)(image.fileData + image.mipLevels[0].offset);
+            const float* srcPixels = (const float*)srcLevelData;
 
             int32_t numSrcChannels = numChannelsOfFormat(image.pixelFormat);
            
-            // Note: clearing unspecified channels to 0000, not 0001
-            // can set swizzleText when encoding
             _pixelsFloat.resize(_width * _height);
            
             // treat as float for per channel copies
             float4* dstPixels = _pixelsFloat.data();
-            float4 dstTemp = float4m(0.0f);
+            float4 dstTemp = float4m(0.0f, 0.0f, 0.0f, 1.0f);
            
             for (int32_t y = 0; y < _height; ++y) {
                 int32_t y0 = y * _width;
@@ -277,12 +260,7 @@ bool Image::loadImageFromKTX(const KTXImage& image)
                     dstPixels[dstX] = dstTemp;
                 }
             }
-
-            // caller can swizzle
-            // caller can compress to BC6H or ASTC-HDR if encoders available
-            // some textures could even go to LDR, but would need to tonemap or
-            // clamp the values
-
+           
             break;
         }
         default:
diff --git a/libkram/kram/KramImage.h b/libkram/kram/KramImage.h
index 7378bf95..2619568c 100644
--- a/libkram/kram/KramImage.h
+++ b/libkram/kram/KramImage.h
@@ -31,8 +31,9 @@ enum ImageResizeFilter {
 
 struct MipConstructData;
 
-// TODO: this can only holds one level of mips, so custom mips aren't possible.
+// TODO: this can only hold one level of mips, so custom mips aren't possible.
 // Mipmap generation is all in-place to this storage.
+// Multiple chunks are possible in strip or grid form.
 class Image {
 public:
     Image();
@@ -41,13 +42,14 @@ class Image {
     bool loadImageFromPixels(const vector<uint8_t>& pixels, int32_t width,
                              int32_t height, bool hasColor, bool hasAlpha);
 
+    // convert top level to single-image
     bool loadImageFromKTX(const KTXImage& image);
 
     
     // this is only for 2d images
     bool resizeImage(int32_t wResize, int32_t hResize, bool resizePow2, ImageResizeFilter filter = kImageResizeFilterPoint);
 
-    // return state
+    // this is width and height of the strip/grid, chunks may be copied out of this
     int32_t width() const { return _width; }
     int32_t height() const { return _height; }
 
@@ -57,6 +59,9 @@ class Image {
     bool hasColor() const { return _hasColor; }
     bool hasAlpha() const { return _hasAlpha; }
 
+private:
+    bool convertToFourChannel(const KTXImage& image);
+
 private:
     // pixel size of image
     int32_t _width = 0;

From 54354d127b644d46becf14ad83eb5c4ec35ba0f0 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 30 May 2021 14:53:51 -0700
Subject: [PATCH 088/901] kram - more accurate and faster KTX/2 loading, add
 lighting to albedo preview

Track the chunk count in the Image when converting from KTX/2.  Also use levelLength, not mipLevelSize.
Also add some lighting when previewing albedo.  This is to match the normal preview which is also lit.
Clean up mipLevelSize -> mipLengthCalc to avoid confusion with levellLength.  These should move to size_t.
---
 kramv/KramShaders.metal        | 69 ++++++++++++++++++++--------------
 libkram/kram/KTXImage.cpp      | 14 +++----
 libkram/kram/KTXImage.h        |  4 +-
 libkram/kram/Kram.cpp          |  9 ++---
 libkram/kram/KramImage.cpp     | 40 ++++++++++----------
 libkram/kram/KramImage.h       |  8 +++-
 libkram/kram/KramImageInfo.cpp |  8 ++++
 7 files changed, 89 insertions(+), 63 deletions(-)

diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index 245b4ce8..2c2f9a6d 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -469,6 +469,37 @@ vertex ColorInOut DrawVolumeVS(
     return out;
 }
 
+float4 doLighting(float4 albedo, float3 viewDir, float3 n) {
+    
+    float3 lightDir = normalize(float3(1,1,1));
+    float3 lightColor = float3(1,1,1);
+    
+    // diffuse
+    float dotNLUnsat = dot(n, lightDir);
+    float dotNL = saturate(dotNLUnsat);
+    float3 diffuse = lightColor.xyz * dotNL;
+
+    float3 specular = float3(0.0);
+
+    // TODO: this renders bright in one quadrant of wrap preview, hard in ortho view
+    // specular
+    bool doSpecular = false;
+    if (doSpecular) {
+        float3 ref = normalize(reflect(viewDir, n));
+        
+        // above can be interpolated
+        float dotRL = saturate(dot(ref, lightDir));
+        dotRL = pow(dotRL, 4.0); // * saturate(dotNL * 8.0);  // no spec without diffuse
+        specular = saturate(dotRL * lightColor.rgb);
+    }
+
+    // Note: don't have any albedo yet, need second texture input
+    float3 ambient = mix(0.1, 0.3, saturate(dotNLUnsat * 0.5 + 0.5));
+    albedo.xyz *= (ambient + diffuse + specular);
+    
+    return albedo;
+}
+
 
 // TODO: do more test shapes, but that affects eyedropper
 // generate and pass down tangents + bitanSign in the geometry
@@ -540,42 +571,16 @@ float4 DrawPixels(
                 in.tangent.w = -in.tangent.w;
             }
             
-            float3 lightDir = normalize(float3(1,1,1));
-            float3 lightColor = float3(1,1,1);
             
             float3 n = c.xyz;
             
             // handle the basis here
             n = toFloat(transformNormal(toHalf(n), in.tangent, in.normal));
             
-            // diffuse
-            float dotNLUnsat = dot(n, lightDir);
-            float dotNL = saturate(dotNLUnsat);
-            float3 diffuse = lightColor.xyz * dotNL;
-            
-            float3 specular = float3(0.0);
-            
-            // this renders bright in one quadrant of wrap preview, hard in ortho view
-            // specular
-            bool doSpecular = false;
-            if (doSpecular) {
-                float3 view = normalize(in.worldPos - uniforms.cameraPosition);
-                float3 ref = normalize(reflect(view, n));
-                
-                // above can be interpolated
-                float dotRL = saturate(dot(ref, lightDir));
-                dotRL = pow(dotRL, 4.0); // * saturate(dotNL * 8.0);  // no spec without diffuse
-                specular = saturate(dotRL * lightColor.rgb);
-            }
-            
-            // Note: don't have any albedo yet, need second texture input
-            float3 ambient = mix(0.1, 0.3, saturate(dotNLUnsat * 0.5 + 0.5));
-            c.xyz = ambient + diffuse + specular;
-            
+            float3 viewDir = normalize(in.worldPos - uniforms.cameraPosition);
+            c = doLighting(float4(1.0), viewDir, n);
+
             c.a = 1;
-            
-            // TODO: add some specular, can this be combined with albedo texture in same folder?
-            // may want to change perspective for that, and give light controls
         }
         else {
             // to unorm
@@ -583,6 +588,12 @@ float4 DrawPixels(
                 c.xyz = toUnorm(c.xyz);
             }
             
+            // need an isAlbedo test
+            if (!uniforms.isSigned) {
+                float3 viewDir = normalize(in.worldPos - uniforms.cameraPosition);
+                c = doLighting(c, viewDir, toFloat(in.normal));
+            }
+            
             // to premul, but also need to see without premul
             if (uniforms.isPremul) {
                 c.xyz *= c.a;
diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index 1e39f397..9ee2ee67 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -765,7 +765,7 @@ const char* supercompressionName(KTX2Supercompression type)
 // https://docs.unity3d.com/ScriptReference/Experimental.Rendering.GraphicsFormat.html
 // Unity only handles 4,5,6,8,10,12 square block dimensions
 
-uint32_t KTXImage::mipLevelSize(uint32_t width_, uint32_t height_) const
+uint32_t KTXImage::mipLengthCalc(uint32_t width_, uint32_t height_) const
 {
     // TODO: ktx has 4 byte row alignment, fix that in calcs and code
     // data isn't fully packed on explicit formats like r8, rg8, r16f.
@@ -776,14 +776,14 @@ uint32_t KTXImage::mipLevelSize(uint32_t width_, uint32_t height_) const
     return count * size;
 }
 
-uint32_t KTXImage::mipLevelSize(uint32_t mipNumber) const
+uint32_t KTXImage::mipLengthCalc(uint32_t mipNumber) const
 {
     uint32_t w = width;
     uint32_t h = height;
     uint32_t d = depth;
     
     mipDown(w, h, d, mipNumber);
-    return mipLevelSize(w, h);
+    return mipLengthCalc(w, h);
 }
 
 uint32_t KTXImage::blockCountRows(uint32_t width_) const
@@ -1157,7 +1157,7 @@ void KTXImage::initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxS
             (h >= mipMinSize && h <= mipMaxSize));
         
         if (keepMip) {
-            level.length = mipLevelSize(w, h);
+            level.length = mipLengthCalc(w, h);
             
             if (mipLevels.empty()) {
                 // adjust the top dimensions
@@ -1183,7 +1183,7 @@ void KTXImage::initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxS
             
             if (keepMip && (mipLevels.size() < (size_t)maxMipLevels)) {
                 // length needs to be multiplied by chunk size before writing out
-                level.length = mipLevelSize(w, h);
+                level.length = mipLengthCalc(w, h);
                 
                 if (mipLevels.empty()) {
                     // adjust the top dimensions
@@ -1204,7 +1204,7 @@ void KTXImage::initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxS
     }
     else {
         // length needs to be multiplied by chunk size before writing out
-        level.length = mipLevelSize(w, h);
+        level.length = mipLengthCalc(w, h);
         
         mipLevels.push_back(level);
     }
@@ -1233,7 +1233,7 @@ void KTXImage::initMipLevels(size_t mipOffset)
     int32_t d = depth;
     
     for (uint32_t i = 0; i < numMips; ++i) {
-        size_t dataSize = mipLevelSize(w, h);
+        size_t dataSize = mipLengthCalc(w, h);
 
         uint32_t levelSize = dataSize * numChunks;
 
diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h
index d2880971..771b3d9c 100644
--- a/libkram/kram/KTXImage.h
+++ b/libkram/kram/KTXImage.h
@@ -315,8 +315,8 @@ class KTXImage {
     
     // mip
     void mipDimensions(uint32_t mipNumber, uint32_t& width_, uint32_t& height_, uint32_t& depth_) const;
-    uint32_t mipLevelSize(uint32_t width_, uint32_t height_) const;
-    uint32_t mipLevelSize(uint32_t mipNumber) const;
+    uint32_t mipLengthCalc(uint32_t width_, uint32_t height_) const;
+    uint32_t mipLengthCalc(uint32_t mipNumber) const;
     size_t mipLengthLargest() const { return mipLevels[0].length; }
     size_t mipLength(uint32_t mipNumber) const { return mipLevels[mipNumber].length; }
     
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 898b0d50..b4d4c75d 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -102,13 +102,12 @@ inline Color toGrayscaleRec709(Color c, const Mipper& mipper) {
 bool LoadKtx(const uint8_t* data, size_t dataSize, Image& sourceImage)
 {
     KTXImage image;
-    if (!image.open(data, dataSize)) {
+    bool isInfoOnly = true; // don't decompress entire image, only going to unpack top level mip
+    if (!image.open(data, dataSize, isInfoOnly)) {
         return false;
     }
 
-    // many different types of KTX files, for now only import from 2D type
-    // and only pull the first mip, but want to be able to pull custom mips from
-    // many types
+    // this loads the top level into the sourceImage, caller must set chunkY to totalChunks
     return sourceImage.loadImageFromKTX(image);
 }
 
@@ -1291,7 +1290,7 @@ string kramInfoToString(const string& srcFilename, bool isVerbose)
     // handle png and ktx
     if (isPNG) {
         // This was taken out of SetupSourceImage, dont want to decode PNG yet
-        // just peek tha the header.
+        // just peek at the header.
         const uint8_t* data = nullptr;
         int32_t dataSize = 0;
 
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index 6c50937b..aaf57685 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -131,6 +131,9 @@ bool Image::loadImageFromKTX(const KTXImage& image)
     _hasColor = isColorFormat(image.pixelFormat);
     _hasAlpha = isAlphaFormat(image.pixelFormat);
 
+    // preserve chunk count from the conversion
+    setChunksY(image.totalChunks());
+   
     // TODO: this assumes 1,2,3 channel srcData has no rowPadding to say 4 bytes
     return convertToFourChannel(image);
 }
@@ -144,10 +147,11 @@ bool Image::convertToFourChannel(const KTXImage& image) {
     uint64_t mipBaseOffset = srcMipLevel.offset;
     const uint8_t* srcLevelData = image.fileData;
     
+    
     vector<uint8_t> mipStorage;
     if (image.isSupercompressed()) {
         
-        mipStorage.resize(image.mipLevelSize(mipNumber));
+        mipStorage.resize(image.levelLength(mipNumber));
         if (!image.unpackLevel(mipNumber, srcLevelData + srcMipLevel.offset, mipStorage.data())) {
             return false;
         }
@@ -1994,19 +1998,23 @@ bool KramEncoder::createMipsFromChunks(
     return true;
 }
 
-// TODO: try to elim KTXImage passed into this
 bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
                              ImageData& mipImage, TextureData& outputTexture,
                              int32_t mipStorageSize) const
 {
     int32_t w = mipImage.width;
-
+    int32_t h = mipImage.height;
+    
     const Color* srcPixelData = mipImage.pixels;
     const float4* srcPixelDataFloat4 = mipImage.pixelsFloat;
 
-    int32_t h = mipImage.height;
-    ;
-
+    // TODO: try to elim KTXImage passed into this
+    // only use of image (can determine this from format)
+    int32_t numBlocks = image.blockCount(w, h);
+    int32_t blockSize = image.blockSize();
+    int32_t mipLength = image.mipLengthCalc(w, h);
+    Int2    blockDims = image.blockDims();
+    
     if (info.isExplicit) {
         switch (info.pixelFormat) {
             case MyMTLPixelFormatR8Unorm:
@@ -2014,7 +2022,7 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
             // no RGB8 writes
             case MyMTLPixelFormatRGBA8Unorm:
             case MyMTLPixelFormatRGBA8Unorm_sRGB: {
-                int32_t count = image.blockSize() / 1;
+                int32_t count = blockSize / 1;
 
                 uint8_t* dst = (uint8_t*)outputTexture.data.data();
 
@@ -2042,7 +2050,7 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
             case MyMTLPixelFormatRG16Float:
             // no RGB16Float writes
             case MyMTLPixelFormatRGBA16Float: {
-                int32_t count = image.blockSize() / 2;
+                int32_t count = blockSize / 2;
 
                 half* dst = (half*)outputTexture.data.data();
 
@@ -2069,7 +2077,7 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
             case MyMTLPixelFormatRG32Float:
             // no RGB32Float writes
             case MyMTLPixelFormatRGBA32Float: {
-                int32_t count = image.blockSize() / 4;
+                int32_t count = blockSize / 4;
 
                 float* dst = (float*)outputTexture.data.data();
 
@@ -2276,12 +2284,9 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
             const int32_t blockDim = 4;
             int32_t blocks_x = (w + blockDim - 1) / blockDim;
             //int32_t blocks_y = (h + blockDim - 1) / blockDim;
-            int32_t blockSize = image.blockSize();
             for (int32_t y = 0; y < h; y += blockDim) {
                 for (int32_t x = 0; x < w; x += blockDim) {
                     
-                    
-                    
                     // Have to copy to temp block, since encode doesn't test w/h edges
                     // copy src to 4x4 clamping the edge pixels
                     // TODO: do clamped edge pixels get weighted more then on non-multiple of 4 images ?
@@ -2386,7 +2391,7 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
 
             ATEEncoder encoder;
             success = encoder.Encode(
-                (int32_t)metalType(pixelFormatRemap), image.mipLevelSize(w, h), image.blockDims().y,
+                (int32_t)metalType(pixelFormatRemap), mipLength, blockDims.y,
                 info.hasAlpha,
                 info.isColorWeighted, info.isVerbose, info.quality, w, h,
                 (const uint8_t*)srcPixelData, outputTexture.data.data());
@@ -2398,7 +2403,7 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
             
             // find the 8,1 block and print it
 //            uint32_t numRowBlocks = image.blockCountRows(w);
-//            const uint8_t* block = outputTexture.data.data() + (numRowBlocks * 1 + 8) * image.blockSize();
+//            const uint8_t* block = outputTexture.data.data() + (numRowBlocks * 1 + 8) * blockSize;
 //            printBCBlock(block, pixelFormatRemap);
         }
 #endif
@@ -2466,9 +2471,6 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
         // have to remap endpoints to signed values (-1,1) to (0,127) for
         // (0,1) and (-128,-127,0) for (-1,0)/                        else
         if (success && info.isSigned && doRemapSnormEndpoints) {
-            int32_t numBlocks = image.blockCount(w, h);
-            int32_t blockSize = image.blockSize();
-
             int32_t blockSize16 = blockSize / sizeof(uint16_t);
 
             uint16_t* blockPtr = (uint16_t*)outputTexture.data.data();
@@ -2506,7 +2508,7 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
         if (info.useATE) {
             ATEEncoder encoder;
             bool success = encoder.Encode(
-                (int32_t)metalType(info.pixelFormat), image.mipLevelSize(w, h), image.blockDims().y,
+                (int32_t)metalType(info.pixelFormat), mipLength, blockDims.y,
                 info.hasAlpha,
                 info.isColorWeighted, info.isVerbose, info.quality, w, h,
                 (const uint8_t*)srcPixelData, outputTexture.data.data());
@@ -2555,7 +2557,7 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
             }
 
             // not generating 3d ASTC ever, even for 3D textures
-            Int2 blockDims = image.blockDims();
+            //Int2 blockDims = image.blockDims();
 
             // setup flags
             uint32_t flags = 0;
diff --git a/libkram/kram/KramImage.h b/libkram/kram/KramImage.h
index 2619568c..58bfaa4e 100644
--- a/libkram/kram/KramImage.h
+++ b/libkram/kram/KramImage.h
@@ -58,7 +58,11 @@ class Image {
 
     bool hasColor() const { return _hasColor; }
     bool hasAlpha() const { return _hasAlpha; }
-
+    
+    // if converted a KTX/2 image to Image, then this field will be non-zero
+    uint32_t chunksY() const { return _chunksY; }
+    void setChunksY(uint32_t chunksY) { _chunksY = chunksY; }
+    
 private:
     bool convertToFourChannel(const KTXImage& image);
 
@@ -77,6 +81,8 @@ class Image {
     vector<uint8_t> _pixels;  // TODO: change to Color?
     //vector<half4> _pixelsHalf; // TODO: add support to import fp16
     vector<float4> _pixelsFloat;
+    
+    uint32_t _chunksY = 0;
 };
 
 class KramDecoderParams {
diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp
index aba4df27..ac5c5801 100644
--- a/libkram/kram/KramImageInfo.cpp
+++ b/libkram/kram/KramImageInfo.cpp
@@ -1102,6 +1102,14 @@ void ImageInfo::initWithSourceImage(Image& sourceImage)
 
     isHDR = srcPixelsFloat != nullptr;
 
+    // transfer the chunk count, this was a ktx/2 import
+    if (sourceImage.chunksY() > 0) {
+        chunksX = 1;
+        
+        chunksY =
+        chunksCount = sourceImage.chunksY();;
+    }
+    
     // these come from png header, but hasn't walked pixels yet
     if (!sourceImage.hasAlpha()) {
         hasAlpha = false;

From 7741e848a357d14289706fa652316ce55ef75266 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 30 May 2021 15:31:52 -0700
Subject: [PATCH 089/901] kramv - fix specular for 3d views

Still causing a problem on mesh0 in 2dview likely from high non-uniform scaling.
---
 kramv/KramShaders.metal | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index 2c2f9a6d..d9982b94 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -481,9 +481,7 @@ float4 doLighting(float4 albedo, float3 viewDir, float3 n) {
 
     float3 specular = float3(0.0);
 
-    // TODO: this renders bright in one quadrant of wrap preview, hard in ortho view
-    // specular
-    bool doSpecular = false;
+    bool doSpecular = true;
     if (doSpecular) {
         float3 ref = normalize(reflect(viewDir, n));
         
@@ -493,9 +491,14 @@ float4 doLighting(float4 albedo, float3 viewDir, float3 n) {
         specular = saturate(dotRL * lightColor.rgb);
     }
 
-    // Note: don't have any albedo yet, need second texture input
     float3 ambient = mix(0.1, 0.3, saturate(dotNLUnsat * 0.5 + 0.5));
-    albedo.xyz *= (ambient + diffuse + specular);
+    
+    // attenuate, and not saturate below, so no HDR yet
+    specular *= 0.3;
+    diffuse *= 0.7;
+    //ambient *= 0.2;
+    
+    albedo.xyz *= saturate(ambient + diffuse + specular);
     
     return albedo;
 }
@@ -600,9 +603,10 @@ float4 DrawPixels(
             }
         }
         
+        // this allows viewing wrap
         bool doShowUV = false;
         if (doShowUV) {
-            c = float4(in.texCoord, 0.0, 1.0);
+            c = float4(fract(in.texCoord), 0.0, 1.0);
         }
     }
     else {

From 3f95d126bbce6f365cbd818b2408a26fb399640c Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 30 May 2021 16:54:40 -0700
Subject: [PATCH 090/901] kramv - fix crack in 3d rendering when clamp is used
 by insetting the uv coords.

Wrap doesn't hit this, but if border/transparent color is hit, then a visible gap is displayed on the primitives where 0/1 meet up.
---
 kramv/KramRenderer.mm   | 8 ++++++--
 kramv/KramShaders.h     | 1 +
 kramv/KramShaders.metal | 9 ++++++++-
 3 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 8cbc57a6..4bc1bdc5 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -417,9 +417,11 @@ - (MTKMesh*)_createMeshAsset:(const char*)name mdlMesh:(MDLMesh*)mdlMesh doFlipU
     {
         id<MDLMeshBuffer> uvs = mdlMesh.vertexBuffers[BufferIndexMeshUV0];
         float2* uvData = (float2*)uvs.map.bytes;
-        
+    
         for (uint32_t i = 0; i < mdlMesh.vertexCount; ++i) {
-            uvData[i].x = 1.0f - uvData[i].x;
+            float2& uv = uvData[i];
+            
+            uv.x = 1.0f - uv.x;
         }
     }
     
@@ -890,9 +892,11 @@ - (void)_setUniformsLevel:(UniformsLevel&)uniforms mipLOD:(int32_t)mipLOD
     uniforms.arrayOrSlice = 0;
     uniforms.face  = 0;
 
+    uniforms.textureSize = float4m(0.0f);
     MyMTLTextureType textureType = MyMTLTextureType2D;
     if (_colorMap) {
         textureType = (MyMTLTextureType)_colorMap.textureType;
+        uniforms.textureSize = float4m(_colorMap.width, _colorMap.height, 1.0f/_colorMap.width, 1.0f/_colorMap.height);
     }
     
     // TODO: set texture specific uniforms, but using single _colorMap for now
diff --git a/kramv/KramShaders.h b/kramv/KramShaders.h
index cb0d33e1..1cc36b4a 100644
--- a/kramv/KramShaders.h
+++ b/kramv/KramShaders.h
@@ -128,6 +128,7 @@ struct UniformsLevel {
     uint32_t face;
     uint32_t arrayOrSlice;
     simd::float2 drawOffset; // pixel offset to apply
+    simd::float4 textureSize; // width, height, 1/width, 1/height
 };
 
 // This is all tied to a single level sample
diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index d9982b94..6a0a6a31 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -377,12 +377,19 @@ ColorInOut DrawImageFunc(
     out.position = uniforms.projectionViewMatrix * worldPos;
     
     // this is a 2d coord always which is 0 to 1, or 0 to 2
-    out.texCoord.xy = in.texCoord;
     if (uniforms.isWrap) {
         // can make this a repeat value uniform
         float wrapAmount = 2.0;
+        out.texCoord.xy = in.texCoord;
         out.texCoord.xy *= wrapAmount;
     }
+    else {
+        // inset from edge by 1 texel, to avoid clamp boundary error
+        // does this have to adjust for mipLOD too?
+        float2 halfPixel = 0.5 * uniformsLevel.textureSize.zw;
+        
+        out.texCoord.xy = clamp(in.texCoord, halfPixel, float2(1.0) - halfPixel);
+    }
    
     // potentially 3d coord, and may be -1 to 1
     out.texCoordXYZ.xy = out.texCoord;

From 524f772a87074800115542eac0e1f8a3a94be49f Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 30 May 2021 18:57:21 -0700
Subject: [PATCH 091/901] kramv - sort archive so similar files are grouped
 more logically

miniz sorts the entries in the zip by filename, but doesn't expose the remap indices since it is optional internal state.  So expose that, and then use the remap table to order the ZipEntry list built up.
---
 kramv/KramRenderer.mm          |  8 +++-----
 kramv/KramViewerMain.mm        |  4 ++--
 libkram/kram/KramZipHelper.cpp | 22 ++++++++++++++--------
 libkram/miniz/miniz.cpp        |  7 +++++++
 libkram/miniz/miniz.h          |  4 ++++
 5 files changed, 30 insertions(+), 15 deletions(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 4bc1bdc5..6f971ac9 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -473,13 +473,11 @@ - (void)_loadAssets
     
     _meshBox = [self _createMeshAsset:"MeshBox" mdlMesh:mdlMesh doFlipUV:false];
     
-    // TOOO: have more shape types - this is box, need thin box (plane), and sphere, and cylinder
-    // eventually load usdz and gltf2 custom model.  Need 3d manipulation of shape like arcball
-    // and eyedropper is more complex.
-    
     // The sphere/cylinder shapes are v increasing in -Y, and u increasing conterclockwise,
     // u is the opposite direction to the cube/plane, so need to flip those coords
-    // I think this has also flipped the tangents the wrong way.
+    // I think this has also flipped the tangents the wrong way, but building tangents after
+    // flipping u direction doesn't flip the bitangent.  So bitangent.w is flipped.
+    // For sanity, Tangent is increasing u, and Bitangent is increasing v.
     
     // All prims are viewed with +Y, not +Z up
     
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 027b57ad..0620cd28 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -1876,7 +1876,7 @@ - (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
                 }
                 _showSettings->meshNumber = _showSettings->meshNumber % _showSettings->meshCount;
                 
-                sprintf(text, "Mesh %d/%d", _showSettings->meshNumber, _showSettings->meshCount);
+                sprintf(text, "Mesh %d %s", _showSettings->meshNumber, "Shape"); // TODO: put meshName in _showSettings
                 isChanged = true;
             }
             break;
@@ -2057,7 +2057,7 @@ -(BOOL)advanceTextureFromAchive:(BOOL)increment
     if (increment)
         _fileIndex = (_fileIndex + 1) % numEntries;
     else
-        _fileIndex = (_fileIndex - 1 + numEntries) % numEntries;
+        _fileIndex = (_fileIndex + numEntries - 1) % numEntries;
 
     // now lookup the filename and data at that entry
     const auto& entry = _zip.zipEntrys()[_fileIndex];
diff --git a/libkram/kram/KramZipHelper.cpp b/libkram/kram/KramZipHelper.cpp
index 4123653d..571201fd 100644
--- a/libkram/kram/KramZipHelper.cpp
+++ b/libkram/kram/KramZipHelper.cpp
@@ -66,6 +66,8 @@ void ZipHelper::initZipEntryTables() {
         totalFilenameSizes += mz_zip_reader_get_filename(zip.get(), i, nullptr, 0);
     }
         
+    const uint32_t* remappedIndices = mz_zip_reader_sorted_file_indices(zip.get());
+    
     allFilenames.resize(totalFilenameSizes);
 
     // allocate an array with the data from the archive that we care about
@@ -75,16 +77,18 @@ void ZipHelper::initZipEntryTables() {
     uint64_t length = 0;
     
     for (int32_t i = 0; i < numFiles; ++i) {
+        uint32_t sortedFileIndex = remappedIndices[i];
+        
         // file_stat does quite a bit of work, but only want a few fields out of it
         mz_zip_archive_file_stat stat;
-        mz_zip_reader_file_stat(zip.get(), i, &stat);
+        mz_zip_reader_file_stat(zip.get(), sortedFileIndex, &stat);
         if (stat.m_is_directory || !stat.m_is_supported) {
             continue;
         }
         
         // we may skip over directories above
         // so zipEntry array entry doesn't tie with fileIndex
-        assert((uint32_t)i == stat.m_file_index);
+        //assert((uint32_t)i == stat.m_file_index);
         
         // skipping directories and unsupported items
         
@@ -107,6 +111,7 @@ void ZipHelper::initZipEntryTables() {
         index++;
     }
          
+    
     // resize, since entries and filenames were skipped
     // this should change the addresses used above
     allFilenames.resize(length);
@@ -124,15 +129,16 @@ const ZipEntry* ZipHelper::zipEntry(const char* name) const {
         return nullptr;
     }
     
-    // have to search back until file index is found
+    // have to find the zipEntry, have skipped and sorted entries by filename
     // the array build skips directories, so those can throw off the fileIndex
+    
     int32_t numEntries = (int32_t)_zipEntrys.size();
-    int32_t search = index;
-    if (search >= numEntries) {
-        search = numEntries - 1;
-    }
+//    int32_t search = index;
+//    if (search >= numEntries) {
+//        search = numEntries - 1;
+//    }
     
-    for (int32_t i = search; i >= 0; --i) {
+    for (int32_t i = 0; i < numEntries; ++i) {
         if (_zipEntrys[i].fileIndex == index) {
             return &_zipEntrys[i];
         }
diff --git a/libkram/miniz/miniz.cpp b/libkram/miniz/miniz.cpp
index 62ea05c4..431c442f 100644
--- a/libkram/miniz/miniz.cpp
+++ b/libkram/miniz/miniz.cpp
@@ -3417,6 +3417,13 @@ static mz_bool mz_zip_reader_init_internal(mz_zip_archive *pZip, mz_uint flags)
     return MZ_TRUE;
 }
 
+const mz_uint32* mz_zip_reader_sorted_file_indices(mz_zip_archive *pZip)
+{
+    // these aren't offsets, it's a sorted array of the file index elements
+    return (const mz_uint32*)(pZip->m_pState->m_sorted_central_dir_offsets.m_p);
+}
+
+
 static MZ_FORCEINLINE mz_bool mz_zip_reader_filename_less(const mz_zip_array *pCentral_dir_array, const mz_zip_array *pCentral_dir_offsets, mz_uint l_index, mz_uint r_index)
 {
     const mz_uint8 *pL = &MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_array, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, l_index)), *pE;
diff --git a/libkram/miniz/miniz.h b/libkram/miniz/miniz.h
index e36d66e8..8867c3c6 100644
--- a/libkram/miniz/miniz.h
+++ b/libkram/miniz/miniz.h
@@ -1209,6 +1209,10 @@ mz_bool mz_zip_is_zip64(mz_zip_archive *pZip);
 /* The current max supported size is <= MZ_UINT32_MAX. */
 size_t mz_zip_get_central_dir_size(mz_zip_archive *pZip);
 
+/* Alec change - if files are sorted by filename, then this returns the remap table for each fileIndex */
+/* This was previously internal state, so use with caution.  It's an array of mz_uint32 */
+const mz_uint32* mz_zip_reader_sorted_file_indices(mz_zip_archive *pZip);
+
 /* Extracts a archive file to a memory buffer using no memory allocation. */
 /* There must be at least enough room on the stack to store the inflator's state (~34KB or so). */
 mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size);

From 84af8cc3d71820437ce21a0e6d97b9e45917145c Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 30 May 2021 19:31:57 -0700
Subject: [PATCH 092/901] kramv - fixup scale of the initial cube

When scale differs a lot, the specular starts to really go overbright.  So in 2D case, keep scaleZ set to max(scaleX, scaleY).
---
 kramv/KramRenderer.mm          | 21 +++++++++------------
 libkram/kram/KramZipHelper.cpp | 17 +++++------------
 2 files changed, 14 insertions(+), 24 deletions(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 6f971ac9..a2045e42 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -711,7 +711,8 @@ - (BOOL)loadTextureImpl:(const string&)fullFilename isTextureChanged:(BOOL)isTex
     // have one of these for each texture added to the viewer
     float scaleX = MAX(1, texture.width);
     float scaleY = MAX(1, texture.height);
-    _modelMatrix = float4x4(float4m(scaleX, scaleY, 1.0f, 1.0f)); // non uniform scale
+    float scaleZ = MAX(scaleX, scaleY); // don't want 1.0f, or specular is all off due to extreme scale differences
+    _modelMatrix = float4x4(float4m(scaleX, scaleY, scaleZ, 1.0f)); // non uniform scale
     _modelMatrix = _modelMatrix * matrix4x4_translation(0.0f, 0.0f, -1.0); // set z=-1 unit back
     
     // uniform scaled 3d primitiv
@@ -726,17 +727,15 @@ - (float4x4)computeImageTransform:(float)panX panY:(float)panY zoom:(float)zoom
     // translate
     float4x4 panTransform = matrix4x4_translation(-panX, panY, 0.0);
     
+    // non-uniform scale is okay here, only affects ortho volume
+    float4x4 viewMatrix = float4x4(float4m(zoom, zoom, 1.0f, 1.0f));
+    viewMatrix = panTransform * viewMatrix;
+    
     // scale
     if (_is3DView) {
-        float4x4 viewMatrix = float4x4(float4m(zoom, zoom, 1.0f, 1.0f));  // non-uniform scale is okay, affects ortho volume
-        viewMatrix = panTransform * viewMatrix;
-        
         return _projectionMatrix * viewMatrix * _modelMatrix3D;
     }
     else {
-        float4x4 viewMatrix = float4x4(float4m(zoom, zoom, 1.0f, 1.0f)); // non-uniform scale
-        viewMatrix = panTransform * viewMatrix;
-        
         return _projectionMatrix * viewMatrix * _modelMatrix;
     }
 }
@@ -759,11 +758,9 @@ float3 inverseScaleSquared(float4x4 m) {
     // don't divide by 0
     float3 invScaleSquared = recip(simd::max(float3m(0.0001 * 0.0001), scaleSquared));
         
-    // TODO: could also identify determinant here for flipping orient
-    
-    // Note: in 2D, scales is x,x,1, so always apply invScale2,
-    // and that messes up preview normals on sphere/cylinder.
-    // May be from trying to do all that math in half.
+    // TODO: could also identify determinant here for flipping orientation
+    // all shapes with negative determinant need orientation flipped for backfacing
+    // and need to be rendered together
     
     return invScaleSquared;
 }
diff --git a/libkram/kram/KramZipHelper.cpp b/libkram/kram/KramZipHelper.cpp
index 571201fd..a41a1141 100644
--- a/libkram/kram/KramZipHelper.cpp
+++ b/libkram/kram/KramZipHelper.cpp
@@ -86,11 +86,8 @@ void ZipHelper::initZipEntryTables() {
             continue;
         }
         
-        // we may skip over directories above
-        // so zipEntry array entry doesn't tie with fileIndex
-        //assert((uint32_t)i == stat.m_file_index);
-        
         // skipping directories and unsupported items
+        // also the ordering here is in filename not fileIndex order
         
         // copy all filenames into fixed storage that's all
         // contguous, so that can alis the strings for lookup
@@ -131,13 +128,9 @@ const ZipEntry* ZipHelper::zipEntry(const char* name) const {
     
     // have to find the zipEntry, have skipped and sorted entries by filename
     // the array build skips directories, so those can throw off the fileIndex
+    // TODO: do a binary search here, and don't use mz_zip call?
     
     int32_t numEntries = (int32_t)_zipEntrys.size();
-//    int32_t search = index;
-//    if (search >= numEntries) {
-//        search = numEntries - 1;
-//    }
-    
     for (int32_t i = 0; i < numEntries; ++i) {
         if (_zipEntrys[i].fileIndex == index) {
             return &_zipEntrys[i];
@@ -195,7 +188,8 @@ bool ZipHelper::extract(int32_t fileIndex, void* buffer, uint64_t bufferSize) co
     mz_bool success = mz_zip_reader_extract_to_mem(
         zip.get(), fileIndex, buffer, bufferSize, 0);
     
-    /* TODO: alternative using optimized Apple library
+    /* TODO: alternative using optimized Apple library libCompression
+     
        this can do partial compression, so don't check uncompressedSize always
        f.e. can look at first 64-byte header on KTX files which is much faster.
      
@@ -225,13 +219,12 @@ bool ZipHelper::extractRaw(const char *filename, const uint8_t** bufferData, uin
         return false;
     }
     
-    // this should really be in stat data
+    // this should really be cached with zipEntry data
     const uint8_t* data = mz_zip_reader_get_raw_data(zip.get(), entry->fileIndex);
     if (!data) {
         return false;
     }
     
-    // not sure if this is start of
     *bufferData = data;
     bufferDataSize = stat.m_uncomp_size;
     

From d57d06faab6be4d498613a4bafa48d212e3ad8d0 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 30 May 2021 20:00:15 -0700
Subject: [PATCH 093/901] kramv - turn off inset on 2d view, or on small 4x4
 textures can see the inset by a half pixel

The 4x4 blocks are 3x3 when insetting by half pixel.  So only turn this on for the 3d views.
---
 kramv/KramRenderer.mm   | 12 ++++++------
 kramv/KramShaders.h     |  1 +
 kramv/KramShaders.metal |  5 ++++-
 kramv/KramViewerBase.h  |  4 ++++
 4 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index a2045e42..e80ad4a7 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -91,7 +91,6 @@ @implementation Renderer
     //MTKMesh *_meshCylinder;
     MTKMesh *_meshCapsule;
     MTKMeshBufferAllocator *_metalAllocator;
-    bool _is3DView; // whether view is 3d for now
     
     ShowSettings* _showSettings;
 }
@@ -732,7 +731,7 @@ - (float4x4)computeImageTransform:(float)panX panY:(float)panY zoom:(float)zoom
     viewMatrix = panTransform * viewMatrix;
     
     // scale
-    if (_is3DView) {
+    if (_showSettings->is3DView) {
         return _projectionMatrix * viewMatrix * _modelMatrix3D;
     }
     else {
@@ -819,22 +818,23 @@ - (void)_updateGameState
     uniforms.channels = (ShaderTextureChannels)_showSettings->channels;
 
     // crude shape experiment
-    _is3DView = true;
+    _showSettings->is3DView = true;
     switch(_showSettings->meshNumber) {
-        case 0: _mesh = _meshBox; _is3DView = false; break;
+        case 0: _mesh = _meshBox; _showSettings->is3DView = false; break;
         case 1: _mesh = _meshBox; break;
         case 2: _mesh = _meshSphere; break;
         //case 3: _mesh = _meshCylinder; break;
         case 3: _mesh = _meshCapsule; break;
     }
-    
+    uniforms.is3DView = _showSettings->is3DView;
+   
     // translate
     float4x4 panTransform = matrix4x4_translation(-_showSettings->panX, _showSettings->panY, 0.0);
     
     // scale
     float zoom = _showSettings->zoom;
     
-    if (_is3DView) {
+    if (_showSettings->is3DView) {
         _viewMatrix3D = float4x4(float4m(zoom, zoom, 1.0f, 1.0f)); // non-uniform
         _viewMatrix3D = panTransform * _viewMatrix3D;
         
diff --git a/kramv/KramShaders.h b/kramv/KramShaders.h
index 1cc36b4a..d81b866e 100644
--- a/kramv/KramShaders.h
+++ b/kramv/KramShaders.h
@@ -108,6 +108,7 @@ struct Uniforms
     bool isWrap;
     bool isSDF;
     bool isPreview;
+    bool is3DView;
     
     uint32_t numChannels;
     
diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index 6a0a6a31..11786bdb 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -383,13 +383,16 @@ ColorInOut DrawImageFunc(
         out.texCoord.xy = in.texCoord;
         out.texCoord.xy *= wrapAmount;
     }
-    else {
+    else if (uniforms.is3DView) {
         // inset from edge by 1 texel, to avoid clamp boundary error
         // does this have to adjust for mipLOD too?
         float2 halfPixel = 0.5 * uniformsLevel.textureSize.zw;
         
         out.texCoord.xy = clamp(in.texCoord, halfPixel, float2(1.0) - halfPixel);
     }
+    else {
+        out.texCoord.xy = in.texCoord;
+    }
    
     // potentially 3d coord, and may be -1 to 1
     out.texCoordXYZ.xy = out.texCoord;
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index 5976260c..b985f75d 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -99,6 +99,10 @@ class ShowSettings {
     // this mode shows the content with lighting or with bilinear/mips active
     bool isPreview = false;
     
+    // the 2d view doesn't want to inset pixels for clamp, or point sampling is thrown off
+    // expecially on small 4x4 textures
+    bool is3DView = false;
+    
     // TODO: Might eliminate this, since mips are either built with or without srgb
     // and disabling with a MTLView caused many flags to have to be set on MTLTexture
     //bool isSRGBShown = true;

From 79e3a97d3de55c82d65737707377442935a8d6e3 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 30 May 2021 20:30:40 -0700
Subject: [PATCH 094/901] kramv - reduce the inset so only small amount of
 pixel lost, fix keyDown handling for unhandled keys

Still not seeing keyDown events while mouseMove is passed by NSTrackingArea.  Seems like an AppKit bug, or some flag isn't set.
---
 kramv/KramShaders.metal |  5 +++--
 kramv/KramViewerMain.mm | 17 ++++++++++++++---
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index 11786bdb..0021f1ee 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -384,9 +384,10 @@ ColorInOut DrawImageFunc(
         out.texCoord.xy *= wrapAmount;
     }
     else if (uniforms.is3DView) {
-        // inset from edge by 1 texel, to avoid clamp boundary error
+        // inset from edge by a fraction of a pixel, to avoid clamp boundary error
         // does this have to adjust for mipLOD too?
-        float2 halfPixel = 0.5 * uniformsLevel.textureSize.zw;
+        float2 onePixel = uniformsLevel.textureSize.zw;
+        float2 halfPixel = (1.0/16.0) * onePixel;
         
         out.texCoord.xy = clamp(in.texCoord, halfPixel, float2(1.0) - halfPixel);
     }
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 0620cd28..3091d6ef 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -1517,10 +1517,15 @@ - (void)keyDown:(NSEvent *)theEvent
     bool isShiftKeyDown = theEvent.modifierFlags & NSEventModifierFlagShift;
     uint32_t keyCode = theEvent.keyCode;
 
-    [self handleKey:keyCode isShiftKeyDown:isShiftKeyDown];
+    bool isHandled = [self handleKey:keyCode isShiftKeyDown:isShiftKeyDown];
+    if (!isHandled)
+    {
+        // this will bonk
+        [super keyDown:theEvent];
+    }
 }
 
-- (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
+- (bool)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
 {
     // Some data depends on the texture data (isSigned, isNormal, ..)
     bool isChanged = false;
@@ -1545,7 +1550,7 @@ - (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
         case Key::U:
             // this means no image loaded yet
             if (_noImageLoaded) {
-                return;
+                return true;
             }
             
             _buttonStack.hidden = !_buttonStack.hidden;
@@ -1936,6 +1941,9 @@ - (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
                 isChanged = true;
             }
             break;
+        default:
+            // non-handled key
+            return false;
     }
     
     if (!text.empty()) {
@@ -1949,6 +1957,7 @@ - (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
     if (isChanged) {
         self.needsDisplay = YES;
     }
+    return true;
 }
 
 
@@ -2342,6 +2351,8 @@ - (void)viewDidLoad
     // this is better than requesting mousemoved events, they're only sent when cursor is inside
     _trackingArea = [[NSTrackingArea alloc] initWithRect:_view.bounds
                 options: (NSTrackingMouseEnteredAndExited | NSTrackingMouseMoved |
+                          
+                          //NSTrackingActiveWhenFirstResponder
                           NSTrackingActiveInActiveApp
                           //NSTrackingActiveInKeyWindow
                           )

From dd20b705aafd46e7ccc4c858fb8c54441fbda1c6 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 30 May 2021 23:12:18 -0700
Subject: [PATCH 095/901] kramv - allow archive reloads

Added timestamp to track archive mods.
Also re-look up the previous filename, since it may not be in archive or at the same fileIndex.
---
 kramv/KramViewerMain.mm         | 43 +++++++++++++++++++++++++--------
 libkram/kram/KramFileHelper.cpp | 19 +++++++++++++++
 libkram/kram/KramFileHelper.h   |  3 +++
 3 files changed, 55 insertions(+), 10 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 3091d6ef..0d4f1b8b 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -20,11 +20,14 @@
 #import "KramShaders.h"
 #include "KramLog.h"
 #include "KramMipper.h"
+
+#include "KramFileHelper.h"
 #include "KramMmapHelper.h"
+#include "KramZipHelper.h"
+
 #include "KramImage.h"
 #include "KramViewerBase.h"
 #include "KramVersion.h" // keep kramv version in sync with libkram
-#include "KramZipHelper.h"
 
 #ifdef NDEBUG
 static bool doPrintPanZoom = false;
@@ -44,6 +47,8 @@ @interface MyMTKView : MTKView
 //@property (nonatomic, readwrite, nullable) NSPanGestureRecognizer* panGesture;
 @property (retain, nonatomic, readwrite, nullable) NSMagnificationGestureRecognizer* zoomGesture;
 
+@property (nonatomic, readwrite) double lastArchiveTimestamp;
+
 - (BOOL)loadTextureFromURL:(NSURL*)url;
 
 - (void)setHudText:(const char*)text;
@@ -489,7 +494,7 @@ - (nonnull ShowSettings*)showSettings {
 }
 
 - (NSStackView*)_addButtons {
-    const int32_t numButtons = 25; // 13;
+    const int32_t numButtons = 26; // 13;
     const char* names[numButtons*2] = {
         
         "?", "Help",
@@ -517,6 +522,7 @@ - (NSStackView*)_addButtons {
         "J", "Next",
         "L", "Reload",
         "0", "Fit",
+        "8", "Shape",
         
         // TODO: need to shift hud over a little
         // "UI", - add to show/hide buttons
@@ -1497,6 +1503,8 @@ - (IBAction)handleAction:(id)sender {
         keyCode = Key::L;
     else if (title == "0")
         keyCode = Key::Num0;
+    else if (title == "8")
+        keyCode = Key::Num8;
     
     else if (title == "R")
         keyCode = Key::R;
@@ -2030,8 +2038,6 @@ - (BOOL)performDragOperation:(id)sender {
 
 -(BOOL)loadArchive:(const char*)zipFilename
 {
-    // TODO: avoid loading the zip again if name and/or timestamp hasn't changed on it
-    
     _zipMmap.close();
     if (!_zipMmap.open(zipFilename)) {
         return NO;
@@ -2144,23 +2150,40 @@ - (BOOL)loadTextureFromURL:(NSURL*)url {
     }
     
     if (endsWithExtension(filename, ".zip")) {
-        if (!self.imageURL || ![self.imageURL isEqualTo:url]) {
-            BOOL isArchiveLoaded = [self loadArchive:filename];
+        auto archiveTimestamp = FileHelper::modificationTimestamp(filename);
+        
+        if (!self.imageURL || (!([self.imageURL isEqualTo:url])) || (self.lastArchiveTimestamp != archiveTimestamp)) {
+            
+            // copy this out before it's replaced
+            string existingFilename;
+            if (self.lastArchiveTimestamp)
+                existingFilename = _zip.zipEntrys()[_fileIndex].filename;
             
+            BOOL isArchiveLoaded = [self loadArchive:filename];
             if (!isArchiveLoaded) {
                 return NO;
             }
             
             // store the archive url
             self.imageURL = url;
-
+            self.lastArchiveTimestamp = archiveTimestamp;
+            
             // add it to recent docs
             NSDocumentController* dc = [NSDocumentController sharedDocumentController];
             [dc noteNewRecentDocumentURL:url];
+        
+            // now reload the filename if needed
+            const ZipEntry* formerEntry = _zip.zipEntry(existingFilename.c_str());
+            if (formerEntry) {
+                // lookup the index in the remapIndices table
+                _fileIndex = (uintptr_t)(formerEntry - &_zip.zipEntrys().front());
+            }
+            else {
+                _fileIndex = 0;
+            }
         }
-       
-        // now reload the filename if needed
-        const auto& entry = _zip.zipEntrys()[_fileIndex];
+        
+        const auto& entry =_zip.zipEntrys()[_fileIndex];
         const char* filename = entry.filename;
         double timestamp = entry.modificationDate;
         
diff --git a/libkram/kram/KramFileHelper.cpp b/libkram/kram/KramFileHelper.cpp
index aad88744..a93d23e5 100644
--- a/libkram/kram/KramFileHelper.cpp
+++ b/libkram/kram/KramFileHelper.cpp
@@ -277,4 +277,23 @@ int64_t FileHelper::size() const
     return (int64_t)stats.st_size;
 }
 
+uint64_t FileHelper::modificationTimestamp(const char* filename) {
+    struct stat stats;
+    if (stat(filename, &stats) < 0) {
+        return 0;
+    }
+    
+    // https://www.quora.com/What-is-the-difference-between-mtime-atime-and-ctime
+    // atime is last access time
+    // ctime when attributes change
+    // mtime when contents change
+    // folders mtime changes when files added/deleted
+    
+    // 32.32, only return seconds for now
+    // https://stackoverflow.com/questions/11373505/getting-the-last-modified-date-of-a-file-in-c
+    timespec timestamp = stats.st_mtimespec;
+    return timestamp.tv_sec;
+}
+
+
 }  // namespace kram
diff --git a/libkram/kram/KramFileHelper.h b/libkram/kram/KramFileHelper.h
index a50167e4..64ff56d5 100644
--- a/libkram/kram/KramFileHelper.h
+++ b/libkram/kram/KramFileHelper.h
@@ -45,6 +45,9 @@ class FileHelper {
     static bool readBytes(FILE* fp, uint8_t* data, int dataSize);
     static bool writeBytes(FILE* fp, const uint8_t* data, int dataSize);
 
+    // return mod stamp on filename
+    static uint64_t modificationTimestamp(const char* filename);
+        
     static size_t pagesize();
 
 private:

From d3f1c549b62db6ed4949261b986692b490b63813 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 30 May 2021 23:30:31 -0700
Subject: [PATCH 096/901] kram - fix stat call for Win, speed up info lookup
 with isInfoOnly

---
 kramv/KramRenderer.mm           |  3 ++-
 libkram/kram/KramFileHelper.cpp | 19 ++++++++++++++-----
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index e80ad4a7..e22e5e52 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -534,7 +534,8 @@ - (BOOL)loadTextureFromData:(const string&)fullFilename timestamp:(double)timest
         // then can decode blocks in kramv
         
         KTXImage sourceImage;
-        if (!sourceImage.open(imageData, imageDataLength)) {
+        bool isInfoOnly = true;
+        if (!sourceImage.open(imageData, imageDataLength, isInfoOnly)) {
             return NO;
         }
        
diff --git a/libkram/kram/KramFileHelper.cpp b/libkram/kram/KramFileHelper.cpp
index a93d23e5..67129de4 100644
--- a/libkram/kram/KramFileHelper.cpp
+++ b/libkram/kram/KramFileHelper.cpp
@@ -278,11 +278,21 @@ int64_t FileHelper::size() const
 }
 
 uint64_t FileHelper::modificationTimestamp(const char* filename) {
-    struct stat stats;
-    if (stat(filename, &stats) < 0) {
+  
+    // Win has to rename all this, so make it happy using wrappers from miniz
+    #if defined(_MSC_VER) || defined(__MINGW64__)
+    #define MZ_FILE_STAT_STRUCT _stat64
+    #define MZ_FILE_STAT _stat64
+    #else
+    #define MZ_FILE_STAT_STRUCT stat
+    #define MZ_FILE_STAT stat
+    #endif
+
+    struct MZ_FILE_STAT_STRUCT stats;
+    if (MZ_FILE_STAT(filename, &stats) < 0) {
         return 0;
     }
-    
+       
     // https://www.quora.com/What-is-the-difference-between-mtime-atime-and-ctime
     // atime is last access time
     // ctime when attributes change
@@ -291,8 +301,7 @@ uint64_t FileHelper::modificationTimestamp(const char* filename) {
     
     // 32.32, only return seconds for now
     // https://stackoverflow.com/questions/11373505/getting-the-last-modified-date-of-a-file-in-c
-    timespec timestamp = stats.st_mtimespec;
-    return timestamp.tv_sec;
+    return stats.st_mtime;
 }
 
 
From c1568641af0c852fbada06954c06747ecd13b326 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 30 May 2021 23:41:37 -0700
Subject: [PATCH 097/901] kramv - set shape state

---
 kramv/KramViewerMain.mm | 4 ++++
 plugin/kps/KPS.cpp      | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 0d4f1b8b..14a85c1c 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -1373,6 +1373,7 @@ - (void)updateUIControlState
     auto arrayState = toState(_showSettings->arrayNumber > 0);
     auto faceState = toState(_showSettings->faceNumber > 0);
     auto mipState = toState(_showSettings->mipLOD > 0);
+    auto meshState = toState(_showSettings->meshNumber > 0);
     
     // TODO: UI state, and vertical state
     auto uiState = toState(_buttonStack.hidden);
@@ -1381,6 +1382,7 @@ - (void)updateUIControlState
     auto infoState = Off;
     auto jumpState = Off;
     
+    
     // buttons
     [self findButton:"?"].state = helpState;
     [self findButton:"I"].state = infoState;
@@ -1399,6 +1401,7 @@ - (void)updateUIControlState
     
     [self findButton:"S"].state = showAllState;
     [self findButton:"O"].state = previewState;
+    [self findButton:"8"].state = meshState;
     [self findButton:"W"].state = wrapState;
     [self findButton:"D"].state = gridState;
     [self findButton:"E"].state = debugState;
@@ -1428,6 +1431,7 @@ - (void)updateUIControlState
    
     [self findMenuItem:"S"].state = showAllState;
     [self findMenuItem:"O"].state = previewState;
+    [self findMenuItem:"8"].state = meshState;
     [self findMenuItem:"W"].state = wrapState;
     [self findMenuItem:"D"].state = gridState;
     [self findMenuItem:"E"].state = debugState;
diff --git a/plugin/kps/KPS.cpp b/plugin/kps/KPS.cpp
index e3524f85..bf6c5e55 100755
--- a/plugin/kps/KPS.cpp
+++ b/plugin/kps/KPS.cpp
@@ -543,7 +543,7 @@ static void DoReadContinue(GlobalsPtr globals)
     }
     
     KTXImage srcImage;
-    if (!srcImage.open(data.data(), data.size())) {
+    if (!srcImage.open(data.data(), data.size())) { // TODO: consider using isInfoOnly
         HandleError(globals, "Read - Couldn't parse file");
         return;
     }

From 1f9b29695cb5db6746755f34b5b6e37b5af6d01d Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 31 May 2021 11:59:54 -0700
Subject: [PATCH 098/901] kramv - support combined albedo + normal views in
 preview

When texture-a/-d.ktx/2 is followed by texture-n.ktx/2 in an archive and they are 2d textures, then display them both together.
Fix texture loader.  Remove safety check on bufferOffset being 0 on loading.
---
 kramv/KramLoader.mm     |   8 +--
 kramv/KramRenderer.h    |   8 ++-
 kramv/KramRenderer.mm   |  42 ++++++++++++++--
 kramv/KramShaders.h     |   9 +++-
 kramv/KramShaders.metal | 106 ++++++++++++++++++++++++----------------
 kramv/KramViewerMain.mm |  30 +++++++++++-
 6 files changed, 150 insertions(+), 53 deletions(-)

diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index 41c24951..12f09b79 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -53,7 +53,7 @@ @implementation KramLoader {
     // only one of these for now
     id<MTLBuffer> _buffer;
     uint8_t* _data;
-    uint8_t _bufferOffset;
+    uint32_t _bufferOffset;
     
     vector<KramBlit> _blits;
     NSMutableArray<id<MTLTexture>>* _blitTextures;
@@ -621,9 +621,9 @@ inline uint64_t alignOffset(uint64_t offset, uint64_t alignment) {
     // TODO: first make sure have enough buffer to upload, otherwise need to queue this image
     // try not to load much until that's established
     // queue would need KTXImage and mmap to stay alive long enough for queue to be completed
-    if (_bufferOffset != 0) {
-        return nil;
-    }
+//    if (_bufferOffset != 0) {
+//        return nil;
+//    }
     
     id<MTLTexture> texture = [self createTexture:image isPrivate:true];
     if (!texture)
diff --git a/kramv/KramRenderer.h b/kramv/KramRenderer.h
index 082990ed..801ace1e 100644
--- a/kramv/KramRenderer.h
+++ b/kramv/KramRenderer.h
@@ -26,7 +26,13 @@ namespace kram {
 
 - (nonnull instancetype)initWithMetalKitView:(nonnull MTKView *)view settings:(nonnull kram::ShowSettings*)settings;
 
-- (BOOL)loadTextureFromData:(const std::string&)fullFilename timestamp:(double)timestamp imageData:(nonnull const uint8_t*)imageData imageDataLength:(uint64_t)imageDataLength;
+- (BOOL)loadTextureFromData:(const std::string&)fullFilename
+                  timestamp:(double)timestamp
+                  imageData:(nonnull const uint8_t*)imageData
+            imageDataLength:(uint64_t)imageDataLength
+            imageNormalData:(nullable const uint8_t*)imageNormalData
+      imageNormalDataLength:(uint64_t)imageNormalDataLength;
+
 
 - (BOOL)loadTexture:(nonnull NSURL *)url;
 
diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index e22e5e52..8f08b1e8 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -56,7 +56,7 @@ @implementation Renderer
 
     // TODO: Array< id<MTLTexture> > _textures;
     id <MTLTexture> _colorMap;
-    //id <MTLTexture> _colorMapView;
+    id <MTLTexture> _normalMap;
     
     id <MTLSamplerState> _colorMapSamplerWrap;
     id <MTLSamplerState> _colorMapSamplerClamp;
@@ -511,7 +511,13 @@ - (void)_loadAssets
     
 }
 
-- (BOOL)loadTextureFromData:(const string&)fullFilename timestamp:(double)timestamp imageData:(nonnull const uint8_t*)imageData imageDataLength:(uint64_t)imageDataLength
+- (BOOL)loadTextureFromData:(const string&)fullFilename
+                  timestamp:(double)timestamp
+                  imageData:(nonnull const uint8_t*)imageData
+            imageDataLength:(uint64_t)imageDataLength
+            imageNormalData:(nullable const uint8_t*)imageNormalData
+      imageNormalDataLength:(uint64_t)imageNormalDataLength
+
 {
     // image can be decoded to rgba8u if platform can't display format natively
     // but still want to identify blockSize from original format
@@ -522,13 +528,22 @@ - (BOOL)loadTextureFromData:(const string&)fullFilename timestamp:(double)timest
         (timestamp != _showSettings->lastTimestamp);
     
     if (isTextureChanged) {
-        // synchronously cpu upload from ktx file to texture
+        // synchronously cpu upload from ktx file to buffer, with eventual gpu blit from buffer to returned texture
         MTLPixelFormat originalFormatMTL = MTLPixelFormatInvalid;
         id<MTLTexture> texture = [_loader loadTextureFromData:imageData imageDataLength:imageDataLength originalFormat:&originalFormatMTL];
         if (!texture) {
             return NO;
         }
         
+        // hacking in the normal texture here, so can display them together during preview
+        id<MTLTexture> normalTexture;
+        if (imageNormalData) {
+            normalTexture = [_loader loadTextureFromData:imageNormalData imageDataLength:imageNormalDataLength originalFormat:nil];
+            if (!normalTexture) {
+                return NO;
+            }
+        }
+       
         // archive shouldn't contain png, so only support ktx/ktx2 here
         // TODO: have loader return KTXImage instead of parsing it again
         // then can decode blocks in kramv
@@ -550,6 +565,7 @@ - (BOOL)loadTextureFromData:(const string&)fullFilename timestamp:(double)timest
         
         @autoreleasepool {
             _colorMap = texture;
+            _normalMap = normalTexture;
         }
     }
     
@@ -592,6 +608,7 @@ - (BOOL)loadTexture:(nonnull NSURL *)url
         
         @autoreleasepool {
             _colorMap = texture;
+            _normalMap = nil;
         }
     }
     
@@ -796,6 +813,16 @@ - (void)_updateGameState
     
     uniforms.isPreview = _showSettings->isPreview;
     
+    uniforms.isNormalMapPreview = false;
+    if (uniforms.isPreview) {
+        uniforms.isNormalMapPreview = uniforms.isNormal || (_normalMap != nil);
+        
+        if (_normalMap != nil) {
+            uniforms.isNormalMapSigned = isSignedFormat((MyMTLPixelFormat)_normalMap.pixelFormat);
+            uniforms.isNormalMapSwizzleAGToRG = false; // TODO: need a prop for this
+        }
+    }
+        
     uniforms.gridX = 0;
     uniforms.gridY = 0;
     
@@ -1056,9 +1083,14 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer view:(nonnull MTKView *)vie
 
         
         // set the texture up
-        id<MTLTexture> texture = _colorMap;
-        [renderEncoder setFragmentTexture:texture
+        [renderEncoder setFragmentTexture:_colorMap
                                   atIndex:TextureIndexColor];
+        
+        // setup normal map
+        if (_normalMap && _showSettings->isPreview && _colorMap.textureType == MTLTextureType2D) {
+            [renderEncoder setFragmentTexture:_normalMap
+                                      atIndex:TextureIndexNormal];
+        }
 
         
diff --git a/kramv/KramShaders.h b/kramv/KramShaders.h
index d81b866e..fd8bb48e 100644
--- a/kramv/KramShaders.h
+++ b/kramv/KramShaders.h
@@ -49,7 +49,9 @@ typedef NS_ENUM(int32_t, VertexAttribute)
 typedef NS_ENUM(int32_t, TextureIndex)
 {
     TextureIndexColor = 0,
-    TextureIndexSamples = 1, // used for compute
+    TextureIndexNormal = 1,
+    
+    TextureIndexSamples = 2, // used for compute
 };
 
 typedef NS_ENUM(int32_t, SamplerIndex)
@@ -108,7 +110,12 @@ struct Uniforms
     bool isWrap;
     bool isSDF;
     bool isPreview;
+    
     bool is3DView;
+    bool isNormalMapPreview; // for isNormal or combined
+    
+    bool isNormalMapSigned;
+    bool isNormalMapSwizzleAGToRG;
     
     uint32_t numChannels;
     
diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index 0021f1ee..4ae2f668 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -183,6 +183,8 @@ half3 toNormal(half3 n)
     return n;
 }
 
+
+
 // use mikktspace, gen bitan in frag shader with sign, don't normalize vb/vt
 // see http://www.mikktspace.com/
 half3 transformNormal(half3 bumpNormal, half4 tangent, half3 vertexNormal)
@@ -219,11 +221,40 @@ half3 transformNormal(half4 tangent, half3 vertexNormal,
     // rebuild the z term
     half3 bumpNormal = toNormal(nmap.xyz);
    
-    return transformNormal(bumpNormal,
-                           tangent, vertexNormal);
+    return transformNormal(bumpNormal, tangent, vertexNormal);
 }
 
 
+float3 transformNormal(float4 nmap, half3 vertexNormal, half4 tangent,
+                        bool isSwizzleAGToRG, bool isSigned, bool isFrontFacing)
+{
+    // add swizzle for ASTC/BC5nm, other 2 channels format can only store 01 in ba
+    // could use hw swizzle for this
+    if (isSwizzleAGToRG) {
+        nmap = float4(nmap.ag, 0, 1);
+    }
+
+    // to signed, also for ASTC/BC5nm
+    if (!isSigned) {
+        // convert to signed normal to compute z
+        nmap.rg = toSnorm8(nmap.rg);
+    }
+    
+    float3 bumpNormal = nmap.xyz;
+    
+    bumpNormal = toNormal(bumpNormal);
+
+    // flip the normal if facing is flipped
+    // TODO: needed for tangent too?
+    if (!isFrontFacing) {
+        bumpNormal = -bumpNormal;
+        tangent.w = -tangent.w;
+    }
+    
+    // handle the basis here
+    bumpNormal = toFloat(transformNormal(toHalf(bumpNormal), tangent, vertexNormal));
+    return bumpNormal;
+}
 
 // TODO: have more bones, or read from texture instead of uniforms
 // can then do instanced skining, but vfetch lookup slower
@@ -358,7 +389,7 @@ ColorInOut DrawImageFunc(
     
     // deal with full basis
     
-    if (uniforms.isNormal && uniforms.isPreview) {
+    if (uniforms.isNormalMapPreview) {
         float3 normal = in.normal;
         float3 tangent = in.tangent.xyz;
         transformBasis(normal, tangent, uniforms.modelMatrix, uniforms.modelMatrixInvScale2);
@@ -383,11 +414,11 @@ ColorInOut DrawImageFunc(
         out.texCoord.xy = in.texCoord;
         out.texCoord.xy *= wrapAmount;
     }
-    else if (uniforms.is3DView) {
+    else if (uniforms.is3DView && !uniforms.isWrap) {
         // inset from edge by a fraction of a pixel, to avoid clamp boundary error
         // does this have to adjust for mipLOD too?
         float2 onePixel = uniformsLevel.textureSize.zw;
-        float2 halfPixel = (1.0/16.0) * onePixel;
+        float2 halfPixel = (1.0/4.0) * onePixel;
         
         out.texCoord.xy = clamp(in.texCoord, halfPixel, float2(1.0) - halfPixel);
     }
@@ -526,6 +557,7 @@ float4 DrawPixels(
     bool facing [[front_facing]],
     constant Uniforms& uniforms,
     float4 c,
+    float4 nmap,
     float2 textureSize
 )
 {
@@ -565,32 +597,10 @@ float4 DrawPixels(
         else if (uniforms.isNormal) {
             // light the normal map
             
-            // add swizzle for ASTC/BC5nm, other 2 channels format can only store 01 in ba
-            if (uniforms.isSwizzleAGToRG) {
-                c = float4(c.ag, 0, 1);
-            }
-            
-            // to signed
-            if (!uniforms.isSigned) {
-                // convert to signed normal to compute z
-                c.rg = toSnorm8(c.rg);
-            }
-            
-            c.rgb = toNormal(c.rgb);
-            
-            // flip the normal if facing is flipped
-            // TODO: needed for tangent too?
-            if (!facing) {
-                c.xyz = -c.xyz;
-                in.tangent.w = -in.tangent.w;
-            }
+            float3 n = transformNormal(c, in.normal, in.tangent,
+                                       uniforms.isSwizzleAGToRG, uniforms.isSigned, facing);
             
             
-            float3 n = c.xyz;
-            
-            // handle the basis here
-            n = toFloat(transformNormal(toHalf(n), in.tangent, in.normal));
-            
             float3 viewDir = normalize(in.worldPos - uniforms.cameraPosition);
             c = doLighting(float4(1.0), viewDir, n);
 
@@ -601,11 +611,18 @@ float4 DrawPixels(
             if (uniforms.isSigned) {
                 c.xyz = toUnorm(c.xyz);
             }
-            
-            // need an isAlbedo test
-            if (!uniforms.isSigned) {
+            else { // TODO: need an isAlbedo test
                 float3 viewDir = normalize(in.worldPos - uniforms.cameraPosition);
-                c = doLighting(c, viewDir, toFloat(in.normal));
+                
+                if (uniforms.isNormalMapPreview) {
+                    float3 n = transformNormal(nmap, in.normal, in.tangent,
+                                               uniforms.isNormalMapSwizzleAGToRG, uniforms.isNormalMapSigned, facing);
+                    
+                    c = doLighting(c, viewDir, n);
+                }
+                else {
+                    c = doLighting(c, viewDir, toFloat(in.normal));
+                }
             }
             
             // to premul, but also need to see without premul
@@ -843,7 +860,8 @@ fragment float4 Draw1DArrayPS(
     float2 textureSize = float2(colorMap.get_width(0), 1);
     // colorMap.get_num_mip_levels();
 
-    return DrawPixels(in, facing, uniforms, c, textureSize);
+    float4 n = float4(0,0,1,1);
+    return DrawPixels(in, facing, uniforms, c, n, textureSize);
 }
 
 fragment float4 DrawImagePS(
@@ -852,17 +870,19 @@ fragment float4 DrawImagePS(
     constant Uniforms& uniforms [[ buffer(BufferIndexUniforms) ]],
     constant UniformsLevel& uniformsLevel [[ buffer(BufferIndexUniformsLevel) ]],
     sampler colorSampler [[ sampler(SamplerIndexColor) ]],
-    texture2d<float> colorMap [[ texture(TextureIndexColor) ]]
+    texture2d<float> colorMap [[ texture(TextureIndexColor) ]],
+    texture2d<float> normalMap [[ texture(TextureIndexNormal) ]]
 )
 {
     float4 c = colorMap.sample(colorSampler, in.texCoordXYZ.xy);
-    
+    float4 n = normalMap.sample(colorSampler, in.texCoordXYZ.xy);
+   
     // here are the pixel dimensions of the lod
     uint lod = uniformsLevel.mipLOD;
     float2 textureSize = float2(colorMap.get_width(lod), colorMap.get_height(lod));
     // colorMap.get_num_mip_levels();
 
-    return DrawPixels(in, facing, uniforms, c, textureSize);
+    return DrawPixels(in, facing, uniforms, c, n, textureSize);
 }
 
 fragment float4 DrawImageArrayPS(
@@ -881,7 +901,8 @@ fragment float4 DrawImageArrayPS(
     float2 textureSize = float2(colorMap.get_width(lod), colorMap.get_height(lod));
     // colorMap.get_num_mip_levels();
 
-    return DrawPixels(in, facing, uniforms, c, textureSize);
+    float4 n = float4(0,0,1,1);
+    return DrawPixels(in, facing, uniforms, c, n, textureSize);
 }
 
 
@@ -902,7 +923,8 @@ fragment float4 DrawCubePS(
     float2 textureSize = float2(w, w);
     // colorMap.get_num_mip_levels();
 
-    return DrawPixels(in, facing, uniforms, c, textureSize);
+    float4 n = float4(0,0,1,1);
+    return DrawPixels(in, facing, uniforms, c, n, textureSize);
 }
 
 fragment float4 DrawCubeArrayPS(
@@ -922,7 +944,8 @@ fragment float4 DrawCubeArrayPS(
     float2 textureSize = float2(w, w);
     // colorMap.get_num_mip_levels();
 
-    return DrawPixels(in, facing, uniforms, c, textureSize);
+    float4 n = float4(0,0,1,1);
+    return DrawPixels(in, facing, uniforms, c, n, textureSize);
 }
 
 
@@ -953,7 +976,8 @@ fragment float4 DrawVolumePS(
     float2 textureSize = float2(colorMap.get_width(lod), colorMap.get_height(lod));
     // colorMap.get_num_mip_levels();
 
-    return DrawPixels(in, facing, uniforms, c, textureSize);
+    float4 n = float4(0,0,1,1);
+    return DrawPixels(in, facing, uniforms, c, n, textureSize);
 }
 
 //--------------------------------------------------
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 14a85c1c..422d5a2b 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -2101,9 +2101,37 @@ - (BOOL)loadTextureFromArchive:(const char*)filename timestamp:(double)timestamp
         return NO;
     }
      
+    // see if this is albedo, and then search for normal map in the same archive
+    const uint8_t* imageNormalData = nullptr;
+    uint64_t imageNormalDataLength = 0;
+    
+    string normalFilename = filename;
+    
+    // first only do this on albedo/diffuse textures
+    string search = "-a.ktx";
+    auto searchPos = normalFilename.find(search);
+    bool isFound = searchPos != string::npos;
+    
+    if (!isFound) {
+        search = "-d.ktx";
+        searchPos = normalFilename.find(search);
+        isFound = searchPos != string::npos;
+    }
+    
+    if (isFound) {
+        normalFilename = normalFilename.replace(searchPos, search.length(), "-n.ktx"); // works for ktx or ktx2 file
+    
+        if (!_zip.extractRaw(normalFilename.c_str(), &imageNormalData, imageNormalDataLength)) {
+            // ignore failure case here, this is just guessing there's a -n file
+        }
+    }
+    
     string fullFilename = filename;
     Renderer* renderer = (Renderer*)self.delegate;
-    if (![renderer loadTextureFromData:fullFilename timestamp:(double)timestamp imageData:imageData imageDataLength:imageDataLength]) {
+    if (![renderer loadTextureFromData:fullFilename timestamp:(double)timestamp
+                             imageData:imageData imageDataLength:imageDataLength
+                             imageNormalData:imageNormalData imageNormalDataLength:imageNormalDataLength])
+    {
         return NO;
     }
     

From 8d743e5c64ae185216d0484a1e76832695852bb7 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 31 May 2021 13:35:08 -0700
Subject: [PATCH 099/901] kramv - fix eyedropper, support array2d for combined
 color+normals

---
 kramv/KramRenderer.mm   | 46 +++++++++++++++++++++++++++++++----------
 kramv/KramShaders.metal | 17 ++++++++-------
 2 files changed, 44 insertions(+), 19 deletions(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 8f08b1e8..0b53f5ba 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -538,12 +538,19 @@ - (BOOL)loadTextureFromData:(const string&)fullFilename
         // hacking in the normal texture here, so can display them together during preview
         id<MTLTexture> normalTexture;
         if (imageNormalData) {
-            normalTexture = [_loader loadTextureFromData:imageNormalData imageDataLength:imageNormalDataLength originalFormat:nil];
-            if (!normalTexture) {
-                return NO;
+            KTXImage imageNormal;
+            if (imageNormal.open(imageNormalData, imageNormalDataLength, true)) {
+                // only have shaders that expects diffuse/normal to be same texture type
+                if (imageNormal.textureType == (MyMTLTextureType)texture.textureType &&
+                    (imageNormal.textureType == MyMTLTextureType2D || imageNormal.textureType == MyMTLTextureType2DArray))
+                {
+                    normalTexture = [_loader loadTextureFromData:imageNormalData imageDataLength:imageNormalDataLength originalFormat:nil];
+                    if (!normalTexture) {
+                        return NO;
+                    }
+                }
             }
         }
-       
         // archive shouldn't contain png, so only support ktx/ktx2 here
         // TODO: have loader return KTXImage instead of parsing it again
         // then can decode blocks in kramv
@@ -1002,12 +1009,16 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer view:(nonnull MTKView *)vie
     if (renderPassDescriptor == nil) {
         return;
     }
+    
     if (_colorMap == nil) {
         // this will clear target
         id<MTLRenderCommandEncoder> renderEncoder =
-        [commandBuffer renderCommandEncoderWithDescriptor:renderPassDescriptor];
-        renderEncoder.label = @"MainRender";
-        [renderEncoder endEncoding];
+            [commandBuffer renderCommandEncoderWithDescriptor:renderPassDescriptor];
+        
+        if (renderEncoder) {
+            renderEncoder.label = @"MainRender";
+            [renderEncoder endEncoding];
+        }
         
         return;
     }
@@ -1015,6 +1026,10 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer view:(nonnull MTKView *)vie
     /// Final pass rendering code here
     id<MTLRenderCommandEncoder> renderEncoder =
     [commandBuffer renderCommandEncoderWithDescriptor:renderPassDescriptor];
+    if (!renderEncoder) {
+        return;
+    }
+    
     renderEncoder.label = @"MainRender";
 
     // set raster state
@@ -1087,7 +1102,8 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer view:(nonnull MTKView *)vie
                                   atIndex:TextureIndexColor];
         
         // setup normal map
-        if (_normalMap && _showSettings->isPreview && _colorMap.textureType == MTLTextureType2D) {
+        if (_normalMap && _showSettings->isPreview)
+        {
             [renderEncoder setFragmentTexture:_normalMap
                                       atIndex:TextureIndexNormal];
         }
@@ -1265,6 +1281,9 @@ - (void)drawSample
     }
     
     id <MTLCommandBuffer> commandBuffer = [_commandQueue commandBuffer];
+    if (!commandBuffer)
+        return;
+    
     commandBuffer.label = @"MyCommand";
 
     int32_t textureLookupX = _showSettings->textureLookupX;
@@ -1277,9 +1296,11 @@ - (void)drawSample
     
     // Synchronize the managed texture.
     id <MTLBlitCommandEncoder> blitCommandEncoder = [commandBuffer blitCommandEncoder];
-    [blitCommandEncoder synchronizeResource:_sampleTex];
-    [blitCommandEncoder endEncoding];
-
+    if (blitCommandEncoder) {
+        [blitCommandEncoder synchronizeResource:_sampleTex];
+        [blitCommandEncoder endEncoding];
+    }
+    
     // After synchonization, copy value back to the cpu
     id<MTLTexture> texture = _sampleTex;
     [commandBuffer addCompletedHandler:^(id<MTLCommandBuffer> /* buffer */)
@@ -1311,6 +1332,9 @@ - (void)drawSamples:(id<MTLCommandBuffer>)commandBuffer lookupX:(int32_t)lookupX
     
     // Final pass rendering code here
     id<MTLComputeCommandEncoder> renderEncoder = [commandBuffer computeCommandEncoder];
+    if (!renderEncoder)
+        return;
+    
     renderEncoder.label = @"SampleCompute";
 
     [renderEncoder pushDebugGroup:@"DrawShape"];
diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index 4ae2f668..020de693 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -891,17 +891,18 @@ fragment float4 DrawImageArrayPS(
     constant Uniforms& uniforms [[ buffer(BufferIndexUniforms) ]],
     constant UniformsLevel& uniformsLevel [[ buffer(BufferIndexUniformsLevel) ]],
     sampler colorSampler [[ sampler(SamplerIndexColor) ]],
-    texture2d_array<float> colorMap [[ texture(TextureIndexColor) ]]
+    texture2d_array<float> colorMap [[ texture(TextureIndexColor) ]],
+    texture2d_array<float> normalMap [[ texture(TextureIndexNormal) ]]
 )
 {
     float4 c = colorMap.sample(colorSampler, in.texCoordXYZ.xy, uniformsLevel.arrayOrSlice);
+    float4 n = normalMap.sample(colorSampler, in.texCoordXYZ.xy, uniformsLevel.arrayOrSlice);
     
     // here are the pixel dimensions of the lod
     uint lod = uniformsLevel.mipLOD;
     float2 textureSize = float2(colorMap.get_width(lod), colorMap.get_height(lod));
     // colorMap.get_num_mip_levels();
 
-    float4 n = float4(0,0,1,1);
     return DrawPixels(in, facing, uniforms, c, n, textureSize);
 }
 
@@ -1077,7 +1078,7 @@ kernel void SampleImage1DArrayCS(
     texture1d_array<float, access::read> colorMap [[ texture(TextureIndexColor) ]],
     constant UniformsCS& uniforms [[ buffer(BufferIndexUniformsCS) ]],
     uint2 index [[thread_position_in_grid]],
-    texture2d<float, access::write> result
+    texture2d<float, access::write> result [[ texture(TextureIndexSamples) ]]
 )
 {
     // the for-loop is replaced with a collection of threads, each of which
@@ -1096,7 +1097,7 @@ kernel void SampleImageCS(
     texture2d<float, access::read> colorMap [[ texture(TextureIndexColor) ]],
     constant UniformsCS& uniforms [[ buffer(BufferIndexUniformsCS) ]],
     uint2 index [[thread_position_in_grid]],
-    texture2d<float, access::write> result
+    texture2d<float, access::write> result [[ texture(TextureIndexSamples) ]]
 )
 {
     // the for-loop is replaced with a collection of threads, each of which
@@ -1113,7 +1114,7 @@ kernel void SampleImageArrayCS(
     texture2d_array<float, access::read> colorMap [[ texture(TextureIndexColor) ]],
     constant UniformsCS& uniforms [[ buffer(BufferIndexUniformsCS) ]],
     uint2 index [[thread_position_in_grid]],
-    texture2d<float, access::write> result
+    texture2d<float, access::write> result [[ texture(TextureIndexSamples) ]]
 )
 {
     // the for-loop is replaced with a collection of threads, each of which
@@ -1132,7 +1133,7 @@ kernel void SampleCubeCS(
     texturecube<float, access::read> colorMap [[ texture(TextureIndexColor) ]],
     constant UniformsCS& uniforms [[ buffer(BufferIndexUniformsCS) ]],
     uint2 index [[thread_position_in_grid]],
-    texture2d<float, access::write> result
+    texture2d<float, access::write> result [[ texture(TextureIndexSamples) ]]
 )
 {
     // the for-loop is replaced with a collection of threads, each of which
@@ -1154,7 +1155,7 @@ kernel void SampleCubeArrayCS(
     texturecube_array<float, access::read> colorMap [[ texture(TextureIndexColor) ]],
     constant UniformsCS& uniforms [[ buffer(BufferIndexUniformsCS) ]],
     uint2 index [[thread_position_in_grid]],
-    texture2d<float, access::write> result
+    texture2d<float, access::write> result [[ texture(TextureIndexSamples) ]]
 )
 {
     // the for-loop is replaced with a collection of threads, each of which
@@ -1174,7 +1175,7 @@ kernel void SampleVolumeCS(
     texture3d<float, access::read> colorMap [[ texture(TextureIndexColor) ]],
     constant UniformsCS& uniforms [[ buffer(BufferIndexUniformsCS) ]],
     uint2 index [[thread_position_in_grid]],
-    texture2d<float, access::write> result
+    texture2d<float, access::write> result [[ texture(TextureIndexSamples) ]]
 )
 {
     // the for-loop is replaced with a collection of threads, each of which

From 7c02247e5ce53a4de9a72701627f8fc681bf0839 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 31 May 2021 13:40:46 -0700
Subject: [PATCH 100/901] kramv - more test cases, move to heights that gen
 normals to keep things in sync

run scripts/kramTests.sh to build the normal maps from the heights, and build the bundle
the bundle can be dropped onto kramv to see combined albedo + normal
---
 tests/src/GradientGray4x4-a.png                             | 3 +++
 tests/src/White4x4-a.png                                    | 3 +++
 tests/src/brick01-d.png                                     | 3 +++
 tests/src/brick01-h.png                                     | 3 +++
 tests/src/{collectorbarrelh-h.png => collectorbarrel-h.png} | 0
 tests/src/collectorbarrel-n.png                             | 3 ---
 tests/src/laying_rock7-d.png                                | 3 +++
 tests/src/laying_rock7-h.png                                | 3 +++
 tests/src/rockwall-d.png                                    | 3 +++
 tests/src/rockwall-h.png                                    | 3 +++
 tests/src/roots-d.png                                       | 3 +++
 tests/src/roots-h.png                                       | 3 +++
 12 files changed, 30 insertions(+), 3 deletions(-)
 create mode 100644 tests/src/GradientGray4x4-a.png
 create mode 100644 tests/src/White4x4-a.png
 create mode 100755 tests/src/brick01-d.png
 create mode 100755 tests/src/brick01-h.png
 rename tests/src/{collectorbarrelh-h.png => collectorbarrel-h.png} (100%)
 delete mode 100644 tests/src/collectorbarrel-n.png
 create mode 100755 tests/src/laying_rock7-d.png
 create mode 100755 tests/src/laying_rock7-h.png
 create mode 100755 tests/src/rockwall-d.png
 create mode 100755 tests/src/rockwall-h.png
 create mode 100755 tests/src/roots-d.png
 create mode 100755 tests/src/roots-h.png

diff --git a/tests/src/GradientGray4x4-a.png b/tests/src/GradientGray4x4-a.png
new file mode 100644
index 00000000..782ec70e
--- /dev/null
+++ b/tests/src/GradientGray4x4-a.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:70a0898336eb863668cf1ffc9edcaada4ab702f93b2b244276e07cc501825d8f
+size 1871
diff --git a/tests/src/White4x4-a.png b/tests/src/White4x4-a.png
new file mode 100644
index 00000000..486e1d69
--- /dev/null
+++ b/tests/src/White4x4-a.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d9767dc8547c1a7c1a3db989ce5b84fa28f6b7b026af5c850b9d01e942c65a59
+size 1412
diff --git a/tests/src/brick01-d.png b/tests/src/brick01-d.png
new file mode 100755
index 00000000..6614baa0
--- /dev/null
+++ b/tests/src/brick01-d.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5d2d2645121417c9559c9b5689546b44e7dcb139bb225e556634406345194ce2
+size 355401
diff --git a/tests/src/brick01-h.png b/tests/src/brick01-h.png
new file mode 100755
index 00000000..c45e07d1
--- /dev/null
+++ b/tests/src/brick01-h.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3e8ea48fdb25da97f55d3f4c939b8c0a8572e2ad6bbb6f1105d6fd340889c823
+size 74397
diff --git a/tests/src/collectorbarrelh-h.png b/tests/src/collectorbarrel-h.png
similarity index 100%
rename from tests/src/collectorbarrelh-h.png
rename to tests/src/collectorbarrel-h.png
diff --git a/tests/src/collectorbarrel-n.png b/tests/src/collectorbarrel-n.png
deleted file mode 100644
index 2cc4c0c1..00000000
--- a/tests/src/collectorbarrel-n.png
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:151e91179b05f8a127f635b30857525b9c3b124fd129971db370cbf9b9d6f6e1
-size 33987
diff --git a/tests/src/laying_rock7-d.png b/tests/src/laying_rock7-d.png
new file mode 100755
index 00000000..ae3bdf2b
--- /dev/null
+++ b/tests/src/laying_rock7-d.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eb7a51282ee5c9ca1314b53443ce59a9da93ba4cd1daea59280400e6d088938c
+size 540419
diff --git a/tests/src/laying_rock7-h.png b/tests/src/laying_rock7-h.png
new file mode 100755
index 00000000..6a6621c2
--- /dev/null
+++ b/tests/src/laying_rock7-h.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ecaa27e2311541d0882bd5c1b53a5e4818dc808a29a6081a53a540b318aeebf5
+size 87309
diff --git a/tests/src/rockwall-d.png b/tests/src/rockwall-d.png
new file mode 100755
index 00000000..b191fad5
--- /dev/null
+++ b/tests/src/rockwall-d.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:694da069137942b85428aa1990831cecdcd7d1f168c0b3e47d8773a9a4592dab
+size 705228
diff --git a/tests/src/rockwall-h.png b/tests/src/rockwall-h.png
new file mode 100755
index 00000000..5fe1e8c1
--- /dev/null
+++ b/tests/src/rockwall-h.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b64ce3b80d21a3bde9d223e8db37e1c365c24421c73717a54fe94632c5f61656
+size 68538
diff --git a/tests/src/roots-d.png b/tests/src/roots-d.png
new file mode 100755
index 00000000..067b5762
--- /dev/null
+++ b/tests/src/roots-d.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:50ef4f60c5ad4536d022cc9263d9444afe92181b00373a7b98699fc807c6df28
+size 604307
diff --git a/tests/src/roots-h.png b/tests/src/roots-h.png
new file mode 100755
index 00000000..29182ac8
--- /dev/null
+++ b/tests/src/roots-h.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5348fba75215a3aeea537dd50fa4943b1c1674473752346c4ebe1f05e22a83a0
+size 63375

From f01f71453512da831e983973d98a544094053211 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 31 May 2021 13:51:16 -0700
Subject: [PATCH 101/901] kramv - fix gap on clamp with full half pixel inset.

Might be able to reduce, but using highest mip textureSize right now.
---
 kramv/KramShaders.metal | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index 020de693..32639e57 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -418,7 +418,7 @@ ColorInOut DrawImageFunc(
         // inset from edge by a fraction of a pixel, to avoid clamp boundary error
         // does this have to adjust for mipLOD too?
         float2 onePixel = uniformsLevel.textureSize.zw;
-        float2 halfPixel = (1.0/4.0) * onePixel;
+        float2 halfPixel = 0.5 * onePixel;
         
         out.texCoord.xy = clamp(in.texCoord, halfPixel, float2(1.0) - halfPixel);
     }

From d0f45a1c5fbb979905a84e071107997cfd30c00d Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 31 May 2021 18:59:12 -0700
Subject: [PATCH 102/901] kramv - add folder drop, and filter unsupported
 folder/archive extensions

Now can use a folder(s) or an archive to view textures.  The same combined albedo+normal works on folder drop.
The folder drop doesn't yet support PNG.  The load data path only works on ktx/ktx2 files for now.
.zip bundles in a folder drop are ignored, so those can reside along with loose files.
---
 kramv/KramShaders.metal        |  28 +--
 kramv/KramViewerBase.h         |   5 +-
 kramv/KramViewerMain.mm        | 355 ++++++++++++++++++++++++++++-----
 libkram/kram/KramLog.cpp       |   2 +
 libkram/kram/KramZipHelper.cpp |  16 ++
 libkram/kram/KramZipHelper.h   |   3 +
 6 files changed, 351 insertions(+), 58 deletions(-)

diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index 32639e57..b064f983 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -515,15 +515,15 @@ float4 doLighting(float4 albedo, float3 viewDir, float3 n) {
     
     float3 lightDir = normalize(float3(1,1,1));
     float3 lightColor = float3(1,1,1);
-    
-    // diffuse
-    float dotNLUnsat = dot(n, lightDir);
-    float dotNL = saturate(dotNLUnsat);
-    float3 diffuse = lightColor.xyz * dotNL;
 
     float3 specular = float3(0.0);
-
+    float3 diffuse = float3(0.0);
+    float3 ambient = float3(0.0);
+    
     bool doSpecular = true;
+    bool doDiffuse = true;
+    bool doAmbient = true;
+    
     if (doSpecular) {
         float3 ref = normalize(reflect(viewDir, n));
         
@@ -533,10 +533,18 @@ float4 doLighting(float4 albedo, float3 viewDir, float3 n) {
         specular = saturate(dotRL * lightColor.rgb);
     }
 
-    float3 ambient = mix(0.1, 0.3, saturate(dotNLUnsat * 0.5 + 0.5));
+    if (doDiffuse) {
+        float dotNL = saturate(dot(n, lightDir));
+        diffuse = dotNL * lightColor.rgb;
+    }
+    
+    if (doAmbient) {
+        float dotNLUnsat = dot(n, lightDir);
+        ambient = mix(0.1, 0.3, saturate(dotNLUnsat * 0.5 + 0.5));
+    }
     
     // attenuate, and not saturate below, so no HDR yet
-    specular *= 0.3;
+    specular *= 0.8;
     diffuse *= 0.7;
     //ambient *= 0.2;
     
@@ -545,10 +553,6 @@ float4 doLighting(float4 albedo, float3 viewDir, float3 n) {
     return albedo;
 }
 
-
-// TODO: do more test shapes, but that affects eyedropper
-// generate and pass down tangents + bitanSign in the geometry
-
 // TODO: eliminate the toUnorm() calls below, rendering to rgba16f
 // but also need to remove conversion code on cpu side expecting unorm in eyedropper
 
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index b985f75d..d0d5a2ac 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -110,9 +110,12 @@ class ShowSettings {
     // draw with reverseZ to better match perspective
     bool isReverseZ = true;
     
-    // whether files are pulled from disk or zip archive.
+    // whether files are pulled from zip archive.
     bool isArchive = false;
     
+    // whether files are pulled from folder(s)
+    bool isFolder = false;
+    
     // can have up to 5 channels (xyz as xy, 2 other channels)
     int32_t numChannels = 0;
     
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 422d5a2b..d0166c32 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -421,8 +421,11 @@ @implementation MyMTKView
     // allow zip files to be dropped and opened, and can advance through bundle content
     ZipHelper _zip;
     MmapHelper _zipMmap;
-    int32_t _fileIndex;
+    int32_t _fileArchiveIndex;
     BOOL _noImageLoaded;
+    
+    vector<string> _folderFiles;
+    int32_t _fileFolderIndex;
 }
 
 - (void)awakeFromNib
@@ -1288,7 +1291,7 @@ - (void)updateUIAfterLoad {
     bool isFaceSliceHidden = _showSettings->faceCount <= 1 && _showSettings->sliceCount <= 1;
     bool isMipHidden = _showSettings->maxLOD <= 1;
     
-    bool isJumpToNextHidden = !_showSettings->isArchive;
+    bool isJumpToNextHidden = !(_showSettings->isArchive || _showSettings->isFolder);
     
     bool isRedHidden = false;
     bool isGreenHidden = _showSettings->numChannels <= 1;
@@ -1875,9 +1878,17 @@ - (bool)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
             
         case Key::J:
             if (![self findButton:"J"].isHidden) {
-                if ([self advanceTextureFromAchive:!isShiftKeyDown]) {
-                    isChanged = true;
-                    text = "Loaded " + _showSettings->lastFilename;
+                if (_showSettings->isArchive) {
+                    if ([self advanceTextureFromAchive:!isShiftKeyDown]) {
+                        isChanged = true;
+                        text = "Loaded " + _showSettings->lastFilename;
+                    }
+                }
+                else if (_showSettings->isFolder) {
+                    if ([self advanceTextureFromFolder:!isShiftKeyDown]) {
+                        isChanged = true;
+                        text = "Loaded " + _showSettings->lastFilename;
+                    }
                 }
             }
             break;
@@ -2053,9 +2064,18 @@ -(BOOL)loadArchive:(const char*)zipFilename
     if (!_zip.openForRead(_zipMmap.data(), _zipMmap.dataLength())) {
         return NO;
     }
+    
+    // filter out unsupported extensions
+    
+    _zip.filterExtensions({".ktx", ".ktx2"});
 
+    // don't switch to empty archive
+    if (_zip.zipEntrys().empty()) {
+        return NO;
+    }
+    
     // load the first entry in the archive
-    _fileIndex = 0;
+    _fileArchiveIndex = 0;
     
     return YES;
 }
@@ -2067,27 +2087,156 @@ -(BOOL)advanceTextureFromAchive:(BOOL)increment
         return NO;
     }
     
-    // this advances through the fileIndex of a dropped
-    size_t numEntries = _zip.zipEntrys().size();
-    if (numEntries == 0) {
+    if (_zip.zipEntrys().empty()) {
         return NO;
     }
+   
+    size_t numEntries = _zip.zipEntrys().size();
+    
+    if (increment)
+        _fileArchiveIndex++;
+    else
+        _fileArchiveIndex += numEntries - 1; // back 1
+
+    _fileArchiveIndex = _fileArchiveIndex % numEntries;
     
+    return [self loadTextureFromArchive];
+}
+
+-(BOOL)advanceTextureFromFolder:(BOOL)increment
+{
+    if (_folderFiles.empty()) {
+        // no archive loaded
+        return NO;
+    }
+
+    size_t numEntries = _folderFiles.size();
     if (increment)
-        _fileIndex = (_fileIndex + 1) % numEntries;
+        _fileFolderIndex++;
     else
-        _fileIndex = (_fileIndex + numEntries - 1) % numEntries;
+        _fileFolderIndex += numEntries - 1; // back 1
+
+    _fileFolderIndex = _fileFolderIndex % numEntries;
+    
+    return [self loadTextureFromFolder];
+}
 
+- (BOOL)loadTextureFromFolder
+{
     // now lookup the filename and data at that entry
-    const auto& entry = _zip.zipEntrys()[_fileIndex];
-    const char* filename = entry.filename;
-    double timestamp = (double)entry.modificationDate;
+    const char* filename = _folderFiles[_fileFolderIndex].c_str();
+    auto timestamp = FileHelper::modificationTimestamp(filename);
+    
+    // have already filtered filenames out, so this should never get hit
+    if (!(//endsWithExtension(filename, ".png") ||
+          endsWithExtension(filename, ".ktx") ||
+          endsWithExtension(filename, ".ktx2")) )
+    {
+        return NO;
+    }
+        
+    const uint8_t* imageData = nullptr;
+    uint64_t imageDataLength = 0;
+
+    // TODO: assuming can mmap here, but may need FileHelper fallback
+    MmapHelper imageMmap;
+    if (!imageMmap.open(filename)) {
+        return NO;
+    }
+    
+    imageData = imageMmap.data();
+    imageDataLength = imageMmap.dataLength();
+    
+    // see if this is albedo, and then search for normal map in the same archive
+    const uint8_t* imageNormalData = nullptr;
+    uint64_t imageNormalDataLength = 0;
+    MmapHelper imageNormalMmap;
+    
+    string normalFilename = filename;
+    
+    // first only do this on albedo/diffuse textures
+    string search = "-a.ktx";
+    auto searchPos = normalFilename.find(search);
+    bool isFound = searchPos != string::npos;
+    
+    if (!isFound) {
+        search = "-d.ktx";
+        searchPos = normalFilename.find(search);
+        isFound = searchPos != string::npos;
+    }
+    
+    if (isFound) {
+        normalFilename = normalFilename.replace(searchPos, search.length(), "-n.ktx"); // works for ktx or ktx2 file
+    
+        // binary search for the filename in the array, will have to be in same directory
+        isFound = false;
+        for (const auto& search : _folderFiles) {
+            if (search == normalFilename) {
+                isFound = true;
+                break;
+            }
+        }
+        
+        if (isFound) {
+            if (imageNormalMmap.open(normalFilename.c_str())) {
+                imageNormalData = imageNormalMmap.data();
+                imageNormalDataLength = imageNormalMmap.dataLength();
+            }
+        }
+    }
+    
+    string fullFilename = filename;
+    Renderer* renderer = (Renderer*)self.delegate;
+    if (![renderer loadTextureFromData:fullFilename timestamp:(double)timestamp
+                             imageData:imageData imageDataLength:imageDataLength
+                             imageNormalData:imageNormalData imageNormalDataLength:imageNormalDataLength])
+    {
+        return NO;
+    }
     
-    return [self loadTextureFromArchive:filename timestamp:timestamp];
+    // set title to filename, chop this to just file+ext, not directory
+    const char* filenameShort = strrchr(filename, '/');
+    if (filenameShort == nullptr) {
+        filenameShort = filename;
+    }
+    else {
+        filenameShort += 1;
+    }
+        
+    // was using subtitle, but that's macOS 11.0 feature.
+    string title = "kramv - ";
+    title += formatTypeName(_showSettings->originalFormat);
+    title += " - ";
+    title += filenameShort;
+    
+    self.window.title = [NSString stringWithUTF8String: title.c_str()];
+        
+    // doesn't set imageURL or update the recent document menu
+    
+    // show the controls
+    if (_noImageLoaded) {
+        _buttonStack.hidden = NO; // show controls
+        _noImageLoaded = NO;
+    }
+
+    _showSettings->isArchive = false;
+    _showSettings->isFolder = true;
+   
+    // show/hide button
+    [self updateUIAfterLoad];
+    
+    self.needsDisplay = YES;
+    return YES;
 }
 
-- (BOOL)loadTextureFromArchive:(const char*)filename timestamp:(double)timestamp
+- (BOOL)loadTextureFromArchive
 {
+    // now lookup the filename and data at that entry
+    const auto& entry = _zip.zipEntrys()[_fileArchiveIndex];
+    const char* filename = entry.filename;
+    double timestamp = (double)entry.modificationDate;
+    
+    // have already filtered filenames out, so this should never get hit
     if (!(//endsWithExtension(filename, ".png") ||
           endsWithExtension(filename, ".ktx") ||
           endsWithExtension(filename, ".ktx2")) )
@@ -2161,6 +2310,7 @@ - (BOOL)loadTextureFromArchive:(const char*)filename timestamp:(double)timestamp
     }
 
     _showSettings->isArchive = true;
+    _showSettings->isFolder = false;
    
     // show/hide button
     [self updateUIAfterLoad];
@@ -2181,57 +2331,98 @@ - (BOOL)loadTextureFromURL:(NSURL*)url {
         return NO;
     }
     
-    if (endsWithExtension(filename, ".zip")) {
-        auto archiveTimestamp = FileHelper::modificationTimestamp(filename);
+    // this likely means it's a local file directory
+    if (strchr(filename, '.') == nullptr) {
+        // make list of all file in the directory
         
-        if (!self.imageURL || (!([self.imageURL isEqualTo:url])) || (self.lastArchiveTimestamp != archiveTimestamp)) {
+        if (!self.imageURL || (!([self.imageURL isEqualTo:url]))) {
             
-            // copy this out before it's replaced
-            string existingFilename;
-            if (self.lastArchiveTimestamp)
-                existingFilename = _zip.zipEntrys()[_fileIndex].filename;
             
-            BOOL isArchiveLoaded = [self loadArchive:filename];
-            if (!isArchiveLoaded) {
+            NSDirectoryEnumerator *directoryEnumerator = [[NSFileManager defaultManager] enumeratorAtURL:url includingPropertiesForKeys:[NSArray array] options:0 errorHandler://nil
+               ^BOOL(NSURL *url, NSError *error) {
+                macroUnusedVar(url);
+                macroUnusedVar(error);
+
+                // handle error
                 return NO;
+                }
+            ];
+
+            vector<string> files;
+            while (NSURL *fileOrDirectoryURL = [directoryEnumerator nextObject]) {
+                const char* name = fileOrDirectoryURL.fileSystemRepresentation;
+                
+                // filter only types that are supported
+                if (endsWithExtension(name, ".ktx") ||
+                    endsWithExtension(name, ".ktx2")
+                    // || endsWithExtension(name, ".png") // TODO: can't support with KTXImage load path, needs PNG loader
+                    
+                    )
+                {
+                    files.push_back(name);
+                }
             }
             
-            // store the archive url
-            self.imageURL = url;
-            self.lastArchiveTimestamp = archiveTimestamp;
+            // don't change to this folder if it's devoid of content
+            if (files.empty()) {
+                return NO;
+            }
             
             // add it to recent docs
             NSDocumentController* dc = [NSDocumentController sharedDocumentController];
             [dc noteNewRecentDocumentURL:url];
         
-            // now reload the filename if needed
-            const ZipEntry* formerEntry = _zip.zipEntry(existingFilename.c_str());
-            if (formerEntry) {
-                // lookup the index in the remapIndices table
-                _fileIndex = (uintptr_t)(formerEntry - &_zip.zipEntrys().front());
-            }
-            else {
-                _fileIndex = 0;
+            // sort them
+            sort(files.begin(), files.end());
+            
+            // replicate archive logic below
+            
+            self.imageURL = url;
+            
+            // preserve old folder
+            string existingFilename;
+            if (_fileFolderIndex < (int32_t)_folderFiles.size())
+                existingFilename = _folderFiles[_fileFolderIndex];
+            else
+                _fileFolderIndex = 0;
+            
+            _folderFiles = files;
+            
+            // TODO: preserve filename before load, and restore that index, by finding that name in refreshed folder list
+            
+            if (!existingFilename.empty()) {
+                uint32_t index = 0;
+                for (const auto& fileIt : _folderFiles) {
+                    if (fileIt == existingFilename) {
+                        break;
+                    }
+                }
+                
+                _fileFolderIndex = index;
             }
         }
         
-        const auto& entry =_zip.zipEntrys()[_fileIndex];
-        const char* filename = entry.filename;
-        double timestamp = entry.modificationDate;
-        
+        // now load image from directory
+        _showSettings->isArchive = false;
+        _showSettings->isFolder = true;
+           
+        // now load the file at the index
         setErrorLogCapture(true);
         
-        BOOL success = [self loadTextureFromArchive:filename timestamp:timestamp];
+        BOOL success = [self loadTextureFromFolder];
         
         if (!success) {
+            // get back error text from the failed load
             string errorText;
             getErrorLogCaptureText(errorText);
             setErrorLogCapture(false);
             
+            const string& filename = _folderFiles[_fileFolderIndex];
+            
             // prepend filename
             string finalErrorText;
             append_sprintf(finalErrorText,
-                           "Could not load from archive:\n %s\n", filename);
+                           "Could not load from folder:\n %s\n", filename.c_str());
             finalErrorText += errorText;
             
             [self setHudText: finalErrorText.c_str()];
@@ -2240,8 +2431,12 @@ - (BOOL)loadTextureFromURL:(NSURL*)url {
         setErrorLogCapture(false);
         return success;
     }
-        
-    if (!(endsWithExtension(filename, ".png") ||
+    
+    //-------------------
+    
+    // file is not a supported extension
+    if (!(endsWithExtension(filename, ".zip") ||
+          endsWithExtension(filename, ".png") ||
           endsWithExtension(filename, ".ktx") ||
           endsWithExtension(filename, ".ktx2")) )
     {
@@ -2249,19 +2444,88 @@ - (BOOL)loadTextureFromURL:(NSURL*)url {
         
         string finalErrorText;
         append_sprintf(finalErrorText,
-                       "Could not load from archive:\n %s\n", filename);
+                       "Could not load from file:\n %s\n", filename);
         finalErrorText += errorText;
         
         [self setHudText: finalErrorText.c_str()];
         return NO;
     }
+    
+    //-------------------
+    
+    if (endsWithExtension(filename, ".zip")) {
+        auto archiveTimestamp = FileHelper::modificationTimestamp(filename);
+        
+        if (!self.imageURL || (!([self.imageURL isEqualTo:url])) || (self.lastArchiveTimestamp != archiveTimestamp)) {
+            
+            // copy this out before it's replaced
+            string existingFilename;
+            if (_fileArchiveIndex < (int32_t)_zip.zipEntrys().size())
+                existingFilename = _zip.zipEntrys()[_fileArchiveIndex].filename;
+            else
+                _fileArchiveIndex = 0;
+            
+            BOOL isArchiveLoaded = [self loadArchive:filename];
+            if (!isArchiveLoaded) {
+                return NO;
+            }
+            
+            // store the archive url
+            self.imageURL = url;
+            self.lastArchiveTimestamp = archiveTimestamp;
+            
+            // add it to recent docs
+            NSDocumentController* dc = [NSDocumentController sharedDocumentController];
+            [dc noteNewRecentDocumentURL:url];
+        
+            // now reload the filename if needed
+            if (!existingFilename.empty()) {
+                const ZipEntry* formerEntry = _zip.zipEntry(existingFilename.c_str());
+                if (formerEntry) {
+                    // lookup the index in the remapIndices table
+                    _fileArchiveIndex = (uintptr_t)(formerEntry - &_zip.zipEntrys().front());
+                }
+                else {
+                    _fileArchiveIndex = 0;
+                }
+            }
+        }
+        
+        setErrorLogCapture(true);
         
+        BOOL success = [self loadTextureFromArchive];
+        
+        if (!success) {
+            // get back error text from the failed load
+            string errorText;
+            getErrorLogCaptureText(errorText);
+            setErrorLogCapture(false);
+            
+            const auto& entry =_zip.zipEntrys()[_fileArchiveIndex];
+            const char* filename = entry.filename;
+            
+            // prepend filename
+            string finalErrorText;
+            append_sprintf(finalErrorText,
+                           "Could not load from archive:\n %s\n", filename);
+            finalErrorText += errorText;
+            
+            [self setHudText: finalErrorText.c_str()];
+        }
+        
+        setErrorLogCapture(false);
+        return success;
+    }
+        
+    //-------------------
+    
     Renderer* renderer = (Renderer*)self.delegate;
     setErrorLogCapture(true);
     
     BOOL success = [renderer loadTexture:url];
     
     if (!success) {
+        // get back error text from the failed load
         string errorText;
         getErrorLogCaptureText(errorText);
         setErrorLogCapture(false);
@@ -2310,6 +2574,7 @@ - (BOOL)loadTextureFromURL:(NSURL*)url {
     }
     
     _showSettings->isArchive = false;
+    _showSettings->isFolder = false;
    
     // show/hide button
     [self updateUIAfterLoad];
diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp
index dd58e523..1d0c2d4b 100644
--- a/libkram/kram/KramLog.cpp
+++ b/libkram/kram/KramLog.cpp
@@ -122,6 +122,8 @@ bool endsWith(const string& value, const string& ending)
     if (ending.size() > value.size()) {
         return false;
     }
+    
+    // reverse comparison at end of value
     return equal(ending.rbegin(), ending.rend(), value.rbegin());
 }
 
diff --git a/libkram/kram/KramZipHelper.cpp b/libkram/kram/KramZipHelper.cpp
index a41a1141..49aa5b27 100644
--- a/libkram/kram/KramZipHelper.cpp
+++ b/libkram/kram/KramZipHelper.cpp
@@ -44,6 +44,22 @@ bool ZipHelper::openForRead(const uint8_t* zipData_, uint64_t zipDataSize) { //
     return true;
 }
 
+void ZipHelper::filterExtensions(const vector<string>& extensions) {
+    
+    vector<ZipEntry> zipEntrysFiltered;
+    
+    std::copy_if(_zipEntrys.begin(), _zipEntrys.end(), std::back_inserter(zipEntrysFiltered), [&extensions](const auto& zipEntry) {
+        for (const auto& ext : extensions) {
+            if (endsWithExtension(zipEntry.filename, ext)) {
+                return true;
+            }
+        }
+        return false;
+    });
+    
+    _zipEntrys = zipEntrysFiltered;
+}
+
 void ZipHelper::close() {
     if (zip != nullptr) {
         mz_zip_end(zip.get());
diff --git a/libkram/kram/KramZipHelper.h b/libkram/kram/KramZipHelper.h
index e224c7f3..cf5208aa 100644
--- a/libkram/kram/KramZipHelper.h
+++ b/libkram/kram/KramZipHelper.h
@@ -35,6 +35,9 @@ struct ZipHelper {
     bool openForRead(const uint8_t* zipData, uint64_t zipDataSize);
     void close();
     
+    // Only keep entries that match the extensions provided
+    void filterExtensions(const vector<string>& extensions);
+
     // buffer is resized if smaller, can use to lookat headers (f.e. ktx or mod)
     // the zip decodes only the length of the buffer passed in, and this should be small
     // since an iterator is called once to extract data

From 1bacf05d78b921d8df17aa11bc74296cb7724a7e Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 31 May 2021 23:11:54 -0700
Subject: [PATCH 103/901] kramv - turn on sandbox

---
 kram-thumb/KramThumbnailProvider.mm |  2 +-
 kramv/KramShaders.metal             | 16 +++++++---------
 kramv/kramv.entitlements            |  2 +-
 3 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/kram-thumb/KramThumbnailProvider.mm b/kram-thumb/KramThumbnailProvider.mm
index 2f31350d..e65e6514 100644
--- a/kram-thumb/KramThumbnailProvider.mm
+++ b/kram-thumb/KramThumbnailProvider.mm
@@ -128,7 +128,7 @@ - (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest *)request complet
             vector<uint8_t> dstMipData;
             
             // want to just decode one chunk of the level that was unpacked abovve
-            if (!decoder.decodeBlocks(w, h, mipData.data(), mipData.size(), image.pixelFormat, dstMipData, params)) {
+            if (!decoder.decodeBlocks(w, h, mipData.data(), (int32_t)mipData.size(), image.pixelFormat, dstMipData, params)) {
                 KLOGF("kramv %s failed to decode blocks\n", filename);
                 return NO;
             }
diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index b064f983..faad3b9e 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -553,8 +553,8 @@ float4 doLighting(float4 albedo, float3 viewDir, float3 n) {
     return albedo;
 }
 
-// TODO: eliminate the toUnorm() calls below, rendering to rgba16f
-// but also need to remove conversion code on cpu side expecting unorm in eyedropper
+// TODO: eliminate the toUnorm() calls below, rendering to rgba16f but then present
+// doesn't have enough info to remap 16F to the display.
 
 float4 DrawPixels(
     ColorInOut in [[stage_in]],
@@ -1109,7 +1109,7 @@ kernel void SampleImageCS(
     uint2 uv = uniforms.uv; // tie into texture lookup
     // uv >>= uniforms.mipLOD;
     
-    // the color returned is linear
+    // the color is returned to linear rgba32f
     float4 color = colorMap.read(uv, uniforms.mipLOD);
     result.write(color, index);
 }
@@ -1128,7 +1128,7 @@ kernel void SampleImageArrayCS(
     
     uint arrayOrSlice = uniforms.arrayOrSlice;
     
-    // the color returned is linear
+    // the color is returned to linear rgba32f
     float4 color = colorMap.read(uv, arrayOrSlice, uniforms.mipLOD);
     result.write(color, index);
 }
@@ -1147,9 +1147,7 @@ kernel void SampleCubeCS(
     
     uint face = uniforms.face;
     
-    // This writes out linear float32, can do srgb conversion on cpu side
-    
-    // the color returned is linear
+    // the color is returned to linear rgba32f
     float4 color = colorMap.read(uv, face, uniforms.mipLOD);
     result.write(color, index);
 }
@@ -1170,7 +1168,7 @@ kernel void SampleCubeArrayCS(
     uint face = uniforms.face;
     uint arrayOrSlice = uniforms.arrayOrSlice;
     
-    // the color returned is linear
+    // the color is returned to linear rgba32f
     float4 color = colorMap.read(uv, face, arrayOrSlice, uniforms.mipLOD);
     result.write(color, index);
 }
@@ -1187,7 +1185,7 @@ kernel void SampleVolumeCS(
     uint3 uv = uint3(uniforms.uv, uniforms.arrayOrSlice); // tie into texture lookup
     //uv >>= uniforms.mipLOD);
     
-    // the color returned is linear
+    // the color is returned to linear rgba32f
     float4 color = colorMap.read(uv, uniforms.mipLOD);
     result.write(color, index);
 }
diff --git a/kramv/kramv.entitlements b/kramv/kramv.entitlements
index 311b32bd..18aff0ce 100644
--- a/kramv/kramv.entitlements
+++ b/kramv/kramv.entitlements
@@ -3,7 +3,7 @@
 <plist version="1.0">
 <dict>
 	<key>com.apple.security.app-sandbox</key>
-	<false/>
+	<true/>
 	<key>com.apple.security.files.user-selected.read-only</key>
 	<true/>
 </dict>

From 5e112685d38b32ec0bde0fd69dabfd5e5647b484 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 1 Jun 2021 08:25:03 -0700
Subject: [PATCH 104/901] kram - fix win build on ZipHelper

copy_if needs <algorithm> include
---
 libkram/kram/KramZipHelper.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/libkram/kram/KramZipHelper.cpp b/libkram/kram/KramZipHelper.cpp
index 49aa5b27..6e79ee5b 100644
--- a/libkram/kram/KramZipHelper.cpp
+++ b/libkram/kram/KramZipHelper.cpp
@@ -9,6 +9,9 @@
 //#include <stdio.h>
 //#include <unistd.h>
 
+#include <algorithm> // for copy_if
+#include <vector>
+
 #include "miniz.h"
 
 namespace kram {

From 3cfbca7410784e4521cb1f9ff9d15b3d47cea1a9 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 1 Jun 2021 08:45:19 -0700
Subject: [PATCH 105/901] kram - one more time Win stl fix

---
 libkram/kram/KramZipHelper.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libkram/kram/KramZipHelper.cpp b/libkram/kram/KramZipHelper.cpp
index 6e79ee5b..7f100aea 100644
--- a/libkram/kram/KramZipHelper.cpp
+++ b/libkram/kram/KramZipHelper.cpp
@@ -9,7 +9,8 @@
 //#include <stdio.h>
 //#include <unistd.h>
 
-#include <algorithm> // for copy_if
+#include <algorithm> // for copy_if on Win
+#include <iterator> // for copy_if on Win
 #include <vector>
 
 #include "miniz.h"

From 92ad9811edc9b8323bbb9b0712064ffddafb70c3 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 4 Jun 2021 22:20:51 -0700
Subject: [PATCH 106/901] kram - added KTXImageData to isolate the memory
 backing a KTXImage

There were too many steps to wrap mmap, file helper, and alias data from a bundle.
Switch FileHelper to size_t.  Used to be int, but that's limited to 32-bit.
Simplify kram setup code for dealing with files.  PNG is still not tied into KTXImage so has to replicate some things.
Bubbling KTXImage up and out of loading.  It's useful to decode blocks and reference in the viewer.
---
 kramv/KramRenderer.mm           |  52 +++-----
 kramv/KramViewerMain.mm         | 116 ++++++++++------
 libkram/kram/KTXImage.cpp       |  29 ++--
 libkram/kram/KTXImage.h         |   3 +-
 libkram/kram/Kram.cpp           | 229 ++++++++++++++++++--------------
 libkram/kram/Kram.h             |  17 +++
 libkram/kram/KramFileHelper.cpp | 102 ++------------
 libkram/kram/KramFileHelper.h   |  12 +-
 8 files changed, 276 insertions(+), 284 deletions(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 0b53f5ba..92defb1d 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -511,13 +511,10 @@ - (void)_loadAssets
     
 }
 
-- (BOOL)loadTextureFromData:(const string&)fullFilename
-                  timestamp:(double)timestamp
-                  imageData:(nonnull const uint8_t*)imageData
-            imageDataLength:(uint64_t)imageDataLength
-            imageNormalData:(nullable const uint8_t*)imageNormalData
-      imageNormalDataLength:(uint64_t)imageNormalDataLength
-
+- (BOOL)loadTextureFromImage:(const string&)fullFilename
+                   timestamp:(double)timestamp
+                       image:(kram::KTXImage&)image
+                 imageNormal:(kram::KTXImage*)imageNormal
 {
     // image can be decoded to rgba8u if platform can't display format natively
     // but still want to identify blockSize from original format
@@ -527,42 +524,28 @@ - (BOOL)loadTextureFromData:(const string&)fullFilename
         (fullFilename != _showSettings->lastFilename) ||
         (timestamp != _showSettings->lastTimestamp);
     
-    if (isTextureChanged) {
-        // synchronously cpu upload from ktx file to buffer, with eventual gpu blit from buffer to returned texture
+    if (isTextureChanged) {     
+        // synchronously cpu upload from ktx file to buffer, with eventual gpu blit from buffer to returned texture.  TODO: If buffer is full, then something needs to keep KTXImage and data alive.  This load may also decode the texture to RGBA8.
+        
         MTLPixelFormat originalFormatMTL = MTLPixelFormatInvalid;
-        id<MTLTexture> texture = [_loader loadTextureFromData:imageData imageDataLength:imageDataLength originalFormat:&originalFormatMTL];
+        id<MTLTexture> texture = [_loader loadTextureFromImage:image originalFormat:&originalFormatMTL];
         if (!texture) {
             return NO;
         }
         
         // hacking in the normal texture here, so can display them together during preview
         id<MTLTexture> normalTexture;
-        if (imageNormalData) {
-            KTXImage imageNormal;
-            if (imageNormal.open(imageNormalData, imageNormalDataLength, true)) {
-                // only have shaders that expects diffuse/normal to be same texture type
-                if (imageNormal.textureType == (MyMTLTextureType)texture.textureType &&
-                    (imageNormal.textureType == MyMTLTextureType2D || imageNormal.textureType == MyMTLTextureType2DArray))
-                {
-                    normalTexture = [_loader loadTextureFromData:imageNormalData imageDataLength:imageNormalDataLength originalFormat:nil];
-                    if (!normalTexture) {
-                        return NO;
-                    }
-                }
+        if (imageNormal) {
+            normalTexture = [_loader loadTextureFromImage:*imageNormal originalFormat:nil];
+            if (!normalTexture) {
+                return NO;
             }
         }
+        
         // archive shouldn't contain png, so only support ktx/ktx2 here
-        // TODO: have loader return KTXImage instead of parsing it again
-        // then can decode blocks in kramv
         
-        KTXImage sourceImage;
-        bool isInfoOnly = true;
-        if (!sourceImage.open(imageData, imageDataLength, isInfoOnly)) {
-            return NO;
-        }
-       
-        _showSettings->imageInfo = kramInfoKTXToString(fullFilename, sourceImage, false);
-        _showSettings->imageInfoVerbose = kramInfoKTXToString(fullFilename, sourceImage, true);
+        _showSettings->imageInfo = kramInfoKTXToString(fullFilename, image, false);
+        _showSettings->imageInfoVerbose = kramInfoKTXToString(fullFilename, image, true);
        
         _showSettings->originalFormat = (MyMTLPixelFormat)originalFormatMTL;
         _showSettings->decodedFormat = (MyMTLPixelFormat)texture.pixelFormat;
@@ -597,13 +580,16 @@ - (BOOL)loadTexture:(nonnull NSURL *)url
     // image can be decoded to rgba8u if platform can't display format natively
     // but still want to identify blockSize from original format
     if (isTextureChanged) {
-        // synchronously cpu upload from ktx file to texture
+        
         MTLPixelFormat originalFormatMTL = MTLPixelFormatInvalid;
         id<MTLTexture> texture = [_loader loadTextureFromURL:url originalFormat:&originalFormatMTL];
         if (!texture) {
             return NO;
         }
         
+        // This doesn't look for or load corresponding normal map, but should
+        
+        // TODO:: this reloads KTXImage twice over
         _showSettings->imageInfo = kramInfoToString(fullFilename, false);
         _showSettings->imageInfoVerbose = kramInfoToString(fullFilename, true);
         
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index d0166c32..f1617780 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -21,6 +21,7 @@
 #include "KramLog.h"
 #include "KramMipper.h"
 
+#include "Kram.h"
 #include "KramFileHelper.h"
 #include "KramMmapHelper.h"
 #include "KramZipHelper.h"
@@ -2134,27 +2135,10 @@ - (BOOL)loadTextureFromFolder
     {
         return NO;
     }
-        
-    const uint8_t* imageData = nullptr;
-    uint64_t imageDataLength = 0;
-
-    // TODO: assuming can mmap here, but may need FileHelper fallback
-    MmapHelper imageMmap;
-    if (!imageMmap.open(filename)) {
-        return NO;
-    }
-    
-    imageData = imageMmap.data();
-    imageDataLength = imageMmap.dataLength();
-    
-    // see if this is albedo, and then search for normal map in the same archive
-    const uint8_t* imageNormalData = nullptr;
-    uint64_t imageNormalDataLength = 0;
-    MmapHelper imageNormalMmap;
     
+    // first only do this on albedo/diffuse textures
     string normalFilename = filename;
     
-    // first only do this on albedo/diffuse textures
     string search = "-a.ktx";
     auto searchPos = normalFilename.find(search);
     bool isFound = searchPos != string::npos;
@@ -2177,22 +2161,42 @@ - (BOOL)loadTextureFromFolder
             }
         }
         
-        if (isFound) {
-            if (imageNormalMmap.open(normalFilename.c_str())) {
-                imageNormalData = imageNormalMmap.data();
-                imageNormalDataLength = imageNormalMmap.dataLength();
-            }
+        if (!isFound) {
+            normalFilename.clear();
         }
     }
     
+    //-------------------------------
+    
+    KTXImage image;
+    KTXImageData imageDataKTX;
+    
+    KTXImage imageNormal;
+    KTXImageData imageNormalDataKTX;
+    bool hasNormal = false;
+    
     string fullFilename = filename;
+    if (!imageDataKTX.open(fullFilename.c_str(), image)) {
+        return NO;
+    }
+    
+    if (isFound && imageNormalDataKTX.open(normalFilename.c_str(), imageNormal)) {
+        
+        // shaders only pull from albedo + normal on these texture types
+        if (imageNormal.textureType == image.textureType &&
+            (imageNormal.textureType == MyMTLTextureType2D ||
+             imageNormal.textureType == MyMTLTextureType2DArray))
+        {
+            hasNormal = true;
+        }
+    }
+    
     Renderer* renderer = (Renderer*)self.delegate;
-    if (![renderer loadTextureFromData:fullFilename timestamp:(double)timestamp
-                             imageData:imageData imageDataLength:imageDataLength
-                             imageNormalData:imageNormalData imageNormalDataLength:imageNormalDataLength])
-    {
+    if (![renderer loadTextureFromImage:fullFilename timestamp:timestamp image:image imageNormal:hasNormal ? &imageNormal : nullptr]) {
         return NO;
     }
+
+    //-------------------------------
     
     // set title to filename, chop this to just file+ext, not directory
     const char* filenameShort = strrchr(filename, '/');
@@ -2244,16 +2248,6 @@ - (BOOL)loadTextureFromArchive
         return NO;
     }
         
-    const uint8_t* imageData = nullptr;
-    uint64_t imageDataLength = 0;
-    if (!_zip.extractRaw(filename, &imageData, imageDataLength)) {
-        return NO;
-    }
-     
-    // see if this is albedo, and then search for normal map in the same archive
-    const uint8_t* imageNormalData = nullptr;
-    uint64_t imageNormalDataLength = 0;
-    
     string normalFilename = filename;
     
     // first only do this on albedo/diffuse textures
@@ -2268,22 +2262,62 @@ - (BOOL)loadTextureFromArchive
     }
     
     if (isFound) {
-        normalFilename = normalFilename.replace(searchPos, search.length(), "-n.ktx"); // works for ktx or ktx2 file
+        normalFilename = normalFilename.replace(searchPos, search.length(), "-n.ktx"); // works for
+    }
     
+    //---------------------------
+    
+    const uint8_t* imageData = nullptr;
+    uint64_t imageDataLength = 0;
+    
+    if (!_zip.extractRaw(filename, &imageData, imageDataLength)) {
+        return NO;
+    }
+     
+    const uint8_t* imageNormalData = nullptr;
+    uint64_t imageNormalDataLength = 0;
+    
+    // see if this is albedo, and then search for normal map in the same archive
+    if (isFound) {
         if (!_zip.extractRaw(normalFilename.c_str(), &imageNormalData, imageNormalDataLength)) {
             // ignore failure case here, this is just guessing there's a -n file
         }
     }
     
+    //---------------------------
+    
+    // files in archive are just offsets into the mmap
+    // That's why we can't just pass filenames to the renderer
+    KTXImage image;
+    KTXImageData imageDataKTX;
+    if (!imageDataKTX.open(imageData, imageDataLength, image)) {
+        return NO;
+    }
+    
+    KTXImage imageNormal;
+    KTXImageData imageNormalDataKTX;
+    bool hasNormal = false;
+    if (isFound && imageNormalDataKTX.open(imageNormalData, imageNormalDataLength, imageNormal)) {
+            
+        // shaders only pull from albedo + normal on these texture types
+        if (imageNormal.textureType == image.textureType &&
+            (imageNormal.textureType == MyMTLTextureType2D ||
+             imageNormal.textureType == MyMTLTextureType2DArray))
+        {
+            hasNormal = true;
+        }
+    }
+    
     string fullFilename = filename;
     Renderer* renderer = (Renderer*)self.delegate;
-    if (![renderer loadTextureFromData:fullFilename timestamp:(double)timestamp
-                             imageData:imageData imageDataLength:imageDataLength
-                             imageNormalData:imageNormalData imageNormalDataLength:imageNormalDataLength])
+    if (![renderer loadTextureFromImage:fullFilename timestamp:(double)timestamp
+                             image:image imageNormal:hasNormal ? &imageNormal : nullptr])
     {
         return NO;
     }
     
+    //---------------------------------
+    
     // set title to filename, chop this to just file+ext, not directory
     const char* filenameShort = strrchr(filename, '/');
     if (filenameShort == nullptr) {
diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index 9ee2ee67..1dd717f9 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -1502,11 +1502,7 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength, bool i
         
         // compute the decompressed size
         // Note: initMipLevels computes but doesn't store this
-        fileDataLength = mipLevels.back().offset + mipLevels.back().length * numChunks;
-        
-        // DONE: this memory is held in the class to keep it alive, mmap is no longer used
-        imageDataFromKTX2.resize(fileDataLength, 0);
-        fileData = imageDataFromKTX2.data();
+        reserveImageData();
         
         // TODO: may need to fill out length field in fileData
         
@@ -1601,22 +1597,33 @@ bool KTXImage::unpackLevel(uint32_t mipNumber, const uint8_t* srcData, uint8_t*
 }
 
 vector<uint8_t>& KTXImage::imageData() {
-    return imageDataFromKTX2;
+    return _imageData;
 }
 
 void KTXImage::reserveImageData() {
     int32_t numChunks = totalChunks();
     
     // on KTX1 the last mip is the smallest and last in the file
+    // on KTX2 the first mip is the largest and last in the file
+    const auto& firstMip = mipLevels[0];
     const auto& lastMip = mipLevels[header.numberOfMipmapLevels-1];
-    size_t totalKTXSize =
+    
+    size_t firstMipOffset =
+        firstMip.offset + firstMip.length * numChunks;
+    size_t lastMipOffset =
         lastMip.offset + lastMip.length * numChunks;
-    imageDataFromKTX2.resize(totalKTXSize);
-    memset(imageDataFromKTX2.data(), 0, totalKTXSize);
+    size_t totalSize = max(firstMipOffset, lastMipOffset);
     
-    fileDataLength = totalKTXSize;
-    fileData = imageDataFromKTX2.data();
+    reserveImageData(totalSize);
 }
 
+void KTXImage::reserveImageData(size_t totalSize) {
+    
+    _imageData.resize(totalSize);
+    memset(_imageData.data(), 0, totalSize);
+    
+    fileDataLength = totalSize;
+    fileData = _imageData.data();
+}
 
 }  // namespace kram
diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h
index 771b3d9c..e4a15441 100644
--- a/libkram/kram/KTXImage.h
+++ b/libkram/kram/KTXImage.h
@@ -299,6 +299,7 @@ class KTXImage {
    
     // this is where KTXImage holds all mip data internally
     void reserveImageData();
+    void reserveImageData(size_t totalSize);
     vector<uint8_t>& imageData();
 
     // for KTX2 files, the mips can be compressed using various encoders
@@ -332,7 +333,7 @@ class KTXImage {
     bool openKTX2(const uint8_t* imageData, size_t imageDataLength, bool isInfoOnly);
 
     // ktx2 mips are uncompressed to convert back to ktx1, but without the image offset
-    vector<uint8_t> imageDataFromKTX2;
+    vector<uint8_t> _imageData;
     
 public:  // TODO: bury this
     MyMTLTextureType textureType = MyMTLTextureType2D;
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index b4d4c75d..85d68b09 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -34,6 +34,67 @@ namespace kram {
 
 using namespace std;
 
+
+
+bool KTXImageData::open(const char* filename, KTXImage& image) {
+    bool useMmap = true;
+    if (!mmapHelper.open(filename)) {
+        useMmap = false;
+        
+        // open file, copy it to memory, then close it
+        FileHelper fileHelper;
+        if (!fileHelper.open(filename, "rb")) {
+            return false;
+        }
+        
+        // read the file into memory
+        size_t size = fileHelper.size();
+        if (size == (size_t)-1) {
+            return false;
+        }
+        
+        fileData.resize(size);
+        if (!fileHelper.read(fileData.data(), size)) {
+            return false;
+        }
+    }
+    
+    // read the KTXImage in from the data, it will alias mmap or fileData
+    if (useMmap) {
+        if (!image.open(mmapHelper.data(), mmapHelper.dataLength(), isInfoOnly)) {
+            return false;
+        }
+    }
+    else {
+        if (!image.open(fileData.data(), fileData.size(), isInfoOnly)) {
+            return false;
+        }
+    }
+    
+    return true;
+}
+
+bool KTXImageData::open(const uint8_t* data, size_t dataSize, KTXImage& image)
+{
+    if (!image.open(data, dataSize, isInfoOnly)) {
+        return false;
+    }
+    return true;
+}
+
+// decoding reads a ktx file into KTXImage (not Image)
+bool SetupSourceKTX(KTXImageData& srcImageData,
+                    const string& srcFilename,
+                    KTXImage& sourceImage)
+{
+    if (!srcImageData.open(srcFilename.c_str(), sourceImage)) {
+        KLOGE("Kram", "File input \"%s\" could not be opened for read.\n",
+              srcFilename.c_str());
+        return false;
+    }
+    return true;
+}
+
 // Twiddle pixels or blocks into Morton order.  Usually this is done during the upload of
 // linear-order block textures.  But on some platforms may be able to directly use the block
 // and pixel data if organized in the exact twiddle order the hw uses.
@@ -205,8 +266,8 @@ bool SetupTmpFile(FileHelper& tmpFileHelper, const char* suffix)
     return tmpFileHelper.openTemporaryFile(suffix, "w+b");
 }
 
-bool SetupSourceImage(MmapHelper& mmapHelper, FileHelper& fileHelper,
-                      vector<uint8_t>& fileBuffer,
+bool SetupSourceImage(//MmapHelper& mmapHelper, FileHelper& fileHelper,
+                      //vector<uint8_t>& fileBuffer,
                       const string& srcFilename, Image& sourceImage,
                       bool isPremulSrgb = false, bool isGray = false)
 {
@@ -219,28 +280,17 @@ bool SetupSourceImage(MmapHelper& mmapHelper, FileHelper& fileHelper,
         return false;
     }
 
+    // TODO: really KTXImageData
+    MmapHelper mmapHelper;
+    FileHelper fileHelper;
+    vector<uint8_t> fileData;
+
     // first try mmap, and then use file -> buffer
     bool useMmap = true;
     if (!mmapHelper.open(srcFilename.c_str())) {
-        // fallback to opening file if no mmap support or it didn't work
         useMmap = false;
-    }
-
-    if (useMmap) {
-        if (isKTX) {  // really want endsWidth
-            if (!LoadKtx(mmapHelper.data(), mmapHelper.dataLength(),
-                         sourceImage)) {
-                return false;  // error
-            }
-        }
-        else if (isPNG) {
-            if (!LoadPng(mmapHelper.data(), mmapHelper.dataLength(), isPremulSrgb, isGray,
-                         sourceImage)) {
-                return false;  // error
-            }
-        }
-    }
-    else {
+        
+        // fallback to opening file if no mmap support or it didn't work
         if (!fileHelper.open(srcFilename.c_str(), "rb")) {
             KLOGE("Kram", "File input \"%s\" could not be opened for read.\n",
                   srcFilename.c_str());
@@ -249,68 +299,50 @@ bool SetupSourceImage(MmapHelper& mmapHelper, FileHelper& fileHelper,
 
         // read entire png into memory
         size_t size = fileHelper.size();
-        fileBuffer.resize(size);
-
-        if (!fileHelper.read(fileBuffer.data(), size)) {
+        if (size == (size_t)-1) {
             return false;
         }
+        
+        fileData.resize(size);
 
-        if (isKTX) {
-            if (!LoadKtx(fileBuffer.data(), fileBuffer.size(),
-                         sourceImage)) {
+        if (!fileHelper.read(fileData.data(), size)) {
+            return false;
+        }
+    }
+    
+    if (isPNG) {
+        if (useMmap) {
+            if (!LoadPng(mmapHelper.data(), mmapHelper.dataLength(), isPremulSrgb, isGray,
+                     sourceImage)) {
                 return false;  // error
             }
         }
-        else if (isPNG) {
-            if (!LoadPng(fileBuffer.data(), fileHelper.size(), isPremulSrgb, isGray,
+        else {
+            if (!LoadPng(fileData.data(), fileData.size(), isPremulSrgb, isGray,
                          sourceImage)) {
                 return false;  // error
             }
         }
     }
-
-    return true;
-}
-
-// decoding reads a ktx file into KTXImage (not Image)
-bool SetupSourceKTX(MmapHelper& mmapHelper, FileHelper& fileHelper,
-                    vector<uint8_t>& fileBuffer,
-                    const string& srcFilename, KTXImage& sourceImage, bool isInfoOnly = false)
-{
-    // first try mmap, and then use file -> buffer
-    bool useMmap = true;
-    if (!mmapHelper.open(srcFilename.c_str())) {
-        // fallback to file system if no mmap or failed
-        useMmap = false;
-    }
-
-    if (useMmap) {
-        if (!sourceImage.open(mmapHelper.data(), mmapHelper.dataLength(), isInfoOnly)) {
-            return false;
-        }
-    }
     else {
-        if (!fileHelper.open(srcFilename.c_str(), "rb")) {
-            KLOGE("Kram", "File input \"%s\" could not be opened for read.\n",
-                  srcFilename.c_str());
-            return false;
-        }
-
-        // read entire ktx into memory
-        size_t size = fileHelper.size();
-        fileBuffer.resize(size);
-        if (!fileHelper.read(fileBuffer.data(), size)) {
-            return false;
+        if (useMmap) {
+            if (!LoadKtx(mmapHelper.data(), mmapHelper.dataLength(), sourceImage)) {
+                return false;  // error
+            }
         }
-
-        if (!sourceImage.open(fileBuffer.data(), (int32_t)fileBuffer.size(), isInfoOnly)) {
-            return false;
+        else {
+            if (!LoadKtx(fileData.data(), fileData.size(), sourceImage)) {
+                return false;  // error
+            }
         }
     }
-
+    
+    
     return true;
 }
 
+
+
 // better countof in C++11, https://www.g-truc.net/post-0708.html
 template <typename T, size_t N>
 constexpr size_t countof(T const (&)[N]) noexcept
@@ -1281,65 +1313,65 @@ string kramInfoToString(const string& srcFilename, bool isVerbose)
     bool isPNG = endsWith(srcFilename, ".png");
     bool isKTX = endsWith(srcFilename, ".ktx") || endsWith(srcFilename, ".ktx2");
 
-    MmapHelper srcMmapHelper;
-    FileHelper srcFileHelper;
-    vector<uint8_t> srcFileBuffer;
-
     string info;
 
     // handle png and ktx
     if (isPNG) {
+        MmapHelper srcMmapHelper;
+        vector<uint8_t> srcFileBuffer;
+
         // This was taken out of SetupSourceImage, dont want to decode PNG yet
         // just peek at the header.
-        const uint8_t* data = nullptr;
-        int32_t dataSize = 0;
-
+        
         // first try mmap, and then use file -> buffer
         bool useMmap = true;
         if (!srcMmapHelper.open(srcFilename.c_str())) {
             // fallback to file system if no mmap or it failed
             useMmap = false;
-        }
-
-        if (useMmap) {
-            data = srcMmapHelper.data();
-            dataSize = (int32_t)srcMmapHelper.dataLength();
-        }
-        else {
+            
+            FileHelper srcFileHelper;
             if (!srcFileHelper.open(srcFilename.c_str(), "rb")) {
-                KLOGE("Kram", "File input \"%s\" could not be opened for read.\n",
+                KLOGE("Kram", "File input \"%s\" could not be opened for info read.\n",
                       srcFilename.c_str());
                 return "";
             }
 
             // read entire png into memory
             // even though really just want to peek at header
-            int64_t size = srcFileHelper.size();
-            if (size < 0) return "";
+            uint64_t size = srcFileHelper.size();
+            if (size == (size_t)-1) {
+                return "";
+            }
             
             srcFileBuffer.resize(size);
             if (!srcFileHelper.read(srcFileBuffer.data(), size)) {
                 return "";
             }
+        }
 
+        const uint8_t* data = nullptr;
+        size_t dataSize = 0;
+
+        if (useMmap) {
+            data = srcMmapHelper.data();
+            dataSize = srcMmapHelper.dataLength();
+        }
+        else {
             data = srcFileBuffer.data();
-            dataSize = (int32_t)srcFileBuffer.size();
+            dataSize = srcFileBuffer.size();
         }
+        
         info = kramInfoPNGToString(srcFilename, data, dataSize, isVerbose);
         
     }
     else if (isKTX) {
         KTXImage srcImage;
-
-        // This means don't convert to KTX1, keep original data/offsets
-        // and also skip decompressing the mips
-        bool isInfoOnly = true;
+        KTXImageData srcImageData;
         
-        // Note: could change to not read any mips
-        bool success = SetupSourceKTX(srcMmapHelper, srcFileHelper, srcFileBuffer,
-                                      srcFilename, srcImage, isInfoOnly);
+        bool success = SetupSourceKTX(srcImageData, srcFilename, srcImage);
         if (!success) {
-            KLOGE("Kram", "info couldn't open ktx file");
+            KLOGE("Kram", "File input \"%s\" could not be opened for info read.\n",
+                  srcFilename.c_str());
             return "";
         }
         
@@ -1740,13 +1772,10 @@ static int32_t kramAppDecode(vector<const char*>& args)
     }
 
     KTXImage srcImage;
-    MmapHelper srcMmapHelper;
-    FileHelper srcFileHelper;
+    KTXImageData srcImageData;
     FileHelper tmpFileHelper;
-    vector<uint8_t> srcFileBuffer;
-
-    bool success = SetupSourceKTX(srcMmapHelper, srcFileHelper, srcFileBuffer,
-                                  srcFilename, srcImage);
+    
+    bool success = SetupSourceKTX(srcImageData, srcFilename, srcImage);
 
     // TODO: for hdr decode, may need to walk blocks or ask caller to pass -hdr flag
     if (!validateFormatAndDecoder(srcImage.textureType, srcImage.pixelFormat, textureDecoder)) {
@@ -2195,13 +2224,9 @@ static int32_t kramAppEncode(vector<const char*>& args)
     // The helper keeps ktx mips in mmap alive in case want to read them
     // incrementally. Fallback to read into fileBuffer if mmap fails.
     Image srcImage;
-    MmapHelper srcMmapHelper;
-    FileHelper srcFileHelper;
     FileHelper tmpFileHelper;
-    vector<uint8_t> srcFileBuffer;
-
-    bool success = SetupSourceImage(srcMmapHelper, srcFileHelper, srcFileBuffer,
-                                    srcFilename, srcImage, isPremulRgb, isGray);
+    
+    bool success = SetupSourceImage(srcFilename, srcImage, isPremulRgb, isGray);
 
     if (success) {
         success = SetupTmpFile(tmpFileHelper, isDstKTX2 ? ".ktx2" : ".ktx");
diff --git a/libkram/kram/Kram.h b/libkram/kram/Kram.h
index 5d715e97..4ad5eceb 100644
--- a/libkram/kram/Kram.h
+++ b/libkram/kram/Kram.h
@@ -5,6 +5,7 @@
 #pragma once
 
 #include <string>
+#include "KramMmapHelper.h"
 
 namespace kram {
 using namespace std;
@@ -12,6 +13,22 @@ using namespace std;
 class Image;
 class KTXImage;
 
+// This helper needs to stay alive since KTXImage aliases it
+// May be able to fold these into KTXImage since it has an internal vector already
+class KTXImageData {
+public:
+    // class keeps the data alive in mmapHelper or fileData
+    bool open(const char* filename, KTXImage& image);
+    
+    // class aliases data, so caller must keep alive.  Useful with bundle.
+    bool open(const uint8_t* data, size_t dataSize, KTXImage& image);
+    
+private:
+    MmapHelper mmapHelper;
+    vector<uint8_t> fileData;
+    bool isInfoOnly = true;
+};
+
 // helpers to source from a png or single level of a ktx
 bool LoadKtx(const uint8_t* data, size_t dataSize, Image& sourceImage);
 bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulSrgb, bool isGray, Image& sourceImage);
diff --git a/libkram/kram/KramFileHelper.cpp b/libkram/kram/KramFileHelper.cpp
index 67129de4..32c7f712 100644
--- a/libkram/kram/KramFileHelper.cpp
+++ b/libkram/kram/KramFileHelper.cpp
@@ -56,16 +56,16 @@ bool FileHelper::openTemporaryFile(const char* suffix, const char* access)
     return true;
 }
 
-bool FileHelper::read(uint8_t* data, int dataSize)
+bool FileHelper::read(uint8_t* data, size_t dataSize)
 {
     return FileHelper::readBytes(_fp, data, dataSize);
 }
-bool FileHelper::write(const uint8_t* data, int dataSize)
+bool FileHelper::write(const uint8_t* data, size_t dataSize)
 {
     return FileHelper::writeBytes(_fp, data, dataSize);
 }
 
-bool FileHelper::readBytes(FILE* fp, uint8_t* data, int dataSize)
+bool FileHelper::readBytes(FILE* fp, uint8_t* data, size_t dataSize)
 {
     size_t elementsRead = fread(data, 1, dataSize, fp);
     if (elementsRead != (size_t)dataSize) {
@@ -73,7 +73,7 @@ bool FileHelper::readBytes(FILE* fp, uint8_t* data, int dataSize)
     }
     return true;
 }
-bool FileHelper::writeBytes(FILE* fp, const uint8_t* data, int dataSize)
+bool FileHelper::writeBytes(FILE* fp, const uint8_t* data, size_t dataSize)
 {
     size_t elementsWritten = fwrite(data, 1, dataSize, fp);
     if (elementsWritten != (size_t)dataSize) {
@@ -107,14 +107,14 @@ bool FileHelper::copyTemporaryFileTo(const char* dstFilename)
     // since we're not closing, need to flush output
     fflush(_fp);
 
-    int size_ = size();
-    if (size_ < 0) {
+    size_t size_ = size();
+    if (size_ == (size_t)-1) {
         return false;
     }
     
     // DONE: copy in smaller buffered chunks
-    int maxBufferSize = 256*1024;
-    int bufferSize = min(size_, maxBufferSize);
+    size_t maxBufferSize = 256*1024;
+    size_t bufferSize = min(size_, maxBufferSize);
     vector<uint8_t> tmpBuf;
     tmpBuf.resize(bufferSize);
 
@@ -136,7 +136,7 @@ bool FileHelper::copyTemporaryFileTo(const char* dstFilename)
         return false;
     }
     
-    int bytesRemaining = size_;
+    size_t bytesRemaining = size_;
     while(bytesRemaining > 0) {
         int bytesToRead = min(bufferSize, bytesRemaining);
         bytesRemaining -= bytesToRead;
@@ -154,84 +154,6 @@ bool FileHelper::copyTemporaryFileTo(const char* dstFilename)
     return true;
 }
 
-/* This code was original attempt to move file, but it interfered with unlink of the file
-    since a closed file was needed for rename() and many many other issues.
- 
-bool FileHelper::moveTemporaryFileTo(const char* dstFilename)
-{
-    if (!_fp) return false;
-    if (_filename.empty()) return false;
-
-#if USE_TMPFILEPLUS
-    fclose(_fp);
-
-    // windows doesn't remove any existing file, so have to do it explicitly
-    //remove(dstFilename);
-    //
-    // now do the rename
-    // rename on Windows does a copy if different volumes, but no way to identify if it did or moved the file
-    // so tmp file would need to be auto deleted then.  Could call MoveFileEx twice, with and without COPY
-    // if the move failed.
-    //bool success = (rename(_filename.c_str(), dstFilename) == 0);
-
-    // this is probably better than remove/rename, and maybe works across volumes/partitions
-    // this can't replace directories and will fail, only for files
-    bool success = MoveFileEx(_filename.c_str(), dstFilename, MOVEFILE_REPLACE_EXISTING) == 0;
-    if (!success) {
-        MoveFileEx(_filename.c_str(), dstFilename, MOVEFILE_COPY_ALLOWED);
-        
-        // since move was expected, delete the source, it's had fclose already called
-        remove(_filename.c_str());
-    }
-    
-    // so that close doesn't do another fclose()
-    _fp = nullptr;
-    _isTmpFile = false;
-    _filename.clear();
-
-#else
-    // since we're not closing, need to flush output
-    fflush(_fp);
-
-    // somehow even though the file isn't closed, can rename it
-    // if an open temp file is closed, then the following fails since the fd/fp are released.
-
-    // rename() only works if tmp and filename are on same volume
-    // and must have an actual filename to call this, which tmpfile() doesn't supply
-    // this removes any old file present
-    bool success = (rename(_filename.c_str(), dstFilename) == 0);
-    
-    // Some impls of rename don't work with directories, but that's all the docs say.
-    // This is to fix rename also failing on mac/linux if going cross volume, win does copy behind the scenes
-    // but using USE_TMPFILEPLUS code above instead on Win.
-    if (!success) {
-        
-        size_t size_ = size();
-        vector<uint8_t> tmpBuf;
-        tmpBuf.resize(size_);
-        
-        rewind(_fp);
-        if (!read(tmpBuf.data(), size_)) {
-            return false;
-        }
-        // need to fopen file on other volume, then buffer copy the contents over to the over drive
-        FileHelper moveHelper;
-        if (!moveHelper.open(dstFilename, "w+b")) {
-            return false;
-        }
-        if (!moveHelper.write(tmpBuf.data(), size_)) {
-            return false;
-        }
-        
-        // close() should delete the original file
-    }
-#endif
-    // Note: this doesn't change _filename to dstFilename
-
-    return success;
-}
-*/
-
 bool FileHelper::open(const char* filename, const char* access)
 {
     close();
@@ -258,11 +180,11 @@ void FileHelper::close()
     _fp = nullptr;
 }
 
-int64_t FileHelper::size() const
+size_t FileHelper::size() const
 {
     // returns -1, so can't return size_t
     if (!_fp) {
-        return -1;
+        return (size_t)-1;
     }
 
     // otherwise fstat won't extract the size
@@ -272,7 +194,7 @@ int64_t FileHelper::size() const
 
     struct stat stats;
     if (fstat(fd, &stats) < 0) {
-        return -1;
+        return (size_t)-1;
     }
     return (int64_t)stats.st_size;
 }
diff --git a/libkram/kram/KramFileHelper.h b/libkram/kram/KramFileHelper.h
index 64ff56d5..304f3239 100644
--- a/libkram/kram/KramFileHelper.h
+++ b/libkram/kram/KramFileHelper.h
@@ -32,18 +32,18 @@ class FileHelper {
 
     void close();
 
-    // returns -1 if stat call fails
-    int64_t size() const;
+    // returns (size_t)-1 if stat call fails
+    size_t size() const;
 
     FILE* pointer() { return _fp; }
 
     // safe calls that test bytes read/written
-    bool read(uint8_t* data, int dataSize);
-    bool write(const uint8_t* data, int dataSize);
+    bool read(uint8_t* data, size_t dataSize);
+    bool write(const uint8_t* data, size_t dataSize);
 
     // if caller only has FILE* then can use these
-    static bool readBytes(FILE* fp, uint8_t* data, int dataSize);
-    static bool writeBytes(FILE* fp, const uint8_t* data, int dataSize);
+    static bool readBytes(FILE* fp, uint8_t* data, size_t dataSize);
+    static bool writeBytes(FILE* fp, const uint8_t* data, size_t dataSize);
 
     // return mod stamp on filename
     static uint64_t modificationTimestamp(const char* filename);

From 86bd26a038714cc1a54269a7e67278d62842912e Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 4 Jun 2021 23:30:10 -0700
Subject: [PATCH 107/901] kramv - add missing files related to simplifying
 loading

---
 kram-thumb/KramThumbnailProvider.mm | 39 +++++++++------------------
 kramv/KramLoader.h                  | 10 +++++--
 kramv/KramLoader.mm                 | 41 ++++++++++++++++++++---------
 kramv/KramRenderer.h                | 11 ++++----
 4 files changed, 54 insertions(+), 47 deletions(-)

diff --git a/kram-thumb/KramThumbnailProvider.mm b/kram-thumb/KramThumbnailProvider.mm
index e65e6514..cf264062 100644
--- a/kram-thumb/KramThumbnailProvider.mm
+++ b/kram-thumb/KramThumbnailProvider.mm
@@ -8,7 +8,6 @@
 #import "KramThumbnailProvider.h"
 
 #include "Kram.h"
-#include "KramMmapHelper.h"
 #include "KramLog.h"
 #include "KTXImage.h"
 #include "KramImage.h" // for KramDecoder
@@ -40,32 +39,20 @@ - (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest *)request complet
     // Second way: Draw the thumbnail into a context passed to your block, set up with Core Graphics's coordinate system.
     handler([QLThumbnailReply replyWithContextSize:request.maximumSize drawingBlock:^BOOL(CGContextRef  _Nonnull context)
     {
-         const char* filename = [request.fileURL fileSystemRepresentation];
-         
-         if (!(endsWith(filename, ".ktx") || endsWith(filename, ".ktx2"))) {
-             KLOGF("kramv %s only supports ktx/ktx2 files\n", filename);
-             return NO;
-         }
-         
-         // load the mmap file, and interpret it as a KTXImage
-         MmapHelper mmapHelper;
-         if (!mmapHelper.open(filename)) {
-             KLOGF("kramv %s failed to mmap\n", filename);
-             return NO;
-         }
-        
-         // TODO: might need to try FileHelper for non-local thumbnails
+        const char* filename = [request.fileURL fileSystemRepresentation];
+
+        if (!(endsWith(filename, ".ktx") || endsWith(filename, ".ktx2"))) {
+            KLOGF("kramv %s only supports ktx/ktx2 files\n", filename);
+            return NO;
+        }
+             
+        KTXImage image;
+        KTXImageData imageData;
         
-         
-         // open but leave the image compressed if KTX2 + zstd
-         bool isInfoOnly = true;
-         
-         KTXImage image;
-         if (!image.open(mmapHelper.data(), mmapHelper.dataLength(), isInfoOnly)) {
-             KLOGF("kramv %s failed to open\n", filename);
-             return NO;
-         }
-         
+        if (!imageData.open(filename, image)) {
+            KLOGF("kramv %s coould not open file\n", filename);
+            
+        }
         // no BC6 or ASTC HDR yet for thumbs, just do LDR first
         if (isHdrFormat(image.pixelFormat)) {
             KLOGF("kramv %s doesn't support hdr thumbnails yet\n", filename);
diff --git a/kramv/KramLoader.h b/kramv/KramLoader.h
index d9ecedda..0c117051 100644
--- a/kramv/KramLoader.h
+++ b/kramv/KramLoader.h
@@ -18,17 +18,23 @@
 
 #endif
 
+namespace kram {
+class KTXImage;
+}
 
 // This loads KTX and PNG data synchronously.  Will likely move to only loading KTX files, with a png -> ktx conversion.
 // The underlying KTXImage is not yet returned to the caller, but would be useful for prop queries.
 @interface KramLoader : NSObject
 
-// from mem, caller must keep data alive
+// from mem,  copied to MTLBuffer if available, if not caller must keep mem alive
 - (nullable id<MTLTexture>)loadTextureFromData:(nonnull const uint8_t *)imageData imageDataLength:(int32_t)imageDataLength originalFormat:(nullable MTLPixelFormat*)originalFormat;
 
-// from mem, if NSData then caller must keep data alive until blit
+// from mem, copied to MTLBuffer if available, if not caller must keep mem alive
 - (nullable id<MTLTexture>)loadTextureFromData:(nonnull NSData*)imageData originalFormat:(nullable MTLPixelFormat*)originalFormat;
 
+// load from a KTXImage
+- (nullable id<MTLTexture>)loadTextureFromImage:(const kram::KTXImage&)image  originalFormat:(nullable MTLPixelFormat*)originalFormat;
+
 // from url (mmap)
 - (nullable id<MTLTexture>)loadTextureFromURL:(nonnull NSURL *)url originalFormat:(nullable MTLPixelFormat*)originalFormat;
 
diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index 12f09b79..9367944b 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -94,7 +94,7 @@ bool isDecodeImageNeeded(MyMTLPixelFormat pixelFormat) {
     return needsDecode;
 }
 
-bool decodeImage(KTXImage& image, KTXImage& imageDecoded)
+bool decodeImage(const KTXImage& image, KTXImage& imageDecoded)
 {
     KramDecoderParams decoderParams;
     KramDecoder decoder;
@@ -174,7 +174,12 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) {
     if (!image.open(imageData, imageDataLength, isInfoOnly)) {
         return nil;
     }
-    
+
+    return [self loadTextureFromImage:image originalFormat:originalFormat];
+}
+
+- (nullable id<MTLTexture>)loadTextureFromImage:(const KTXImage&)image originalFormat:(nullable MTLPixelFormat*)originalFormat
+{
 #if SUPPORT_RGB
     if (isInternalRGBFormat(image.pixelFormat)) {
         // loads and converts top level mip from RGB to RGBA (RGB0)
@@ -311,12 +316,7 @@ static uint32_t numberOfMipmapLevels(const Image& image) {
     // TODO: could also ignore extension, and look at header/signature instead
     // files can be renamed to the incorrect extensions
     string filename = toLower(path);
-    
-    MmapHelper mmapHelper;
-    if (!mmapHelper.open(path)) {
-        return nil;
-    }
-               
+
     if (endsWithExtension(filename.c_str(), ".png")) {
         // set title to filename, chop this to just file+ext, not directory
         string filenameShort = filename;
@@ -340,14 +340,29 @@ static uint32_t numberOfMipmapLevels(const Image& image) {
         
         bool isSRGB = (!isNormal && !isSDF);
         
+        MmapHelper mmapHelper;
+        if (!mmapHelper.open(path)) {
+            return nil;
+        }
+        
+        // TODO: need FileHelper fallback here
+        
         return [self loadTextureFromPNGData:mmapHelper.data() dataSize:(int32_t)mmapHelper.dataLength() isSRGB:isSRGB originalFormat:originalFormat];
     }
-    
-    // route all data through the version that copies or does sync upload
-    return [self loadTextureFromData:mmapHelper.data() imageDataLength:(int32_t)mmapHelper.dataLength() originalFormat:originalFormat];
+    else {
+        KTXImage image;
+        KTXImageData imageData;
+        
+        if (!imageData.open(path, image)) {
+            return nil;
+        }
+        
+        // route all data through the version that copies or does sync upload
+        return [self loadTextureFromImage:image originalFormat:originalFormat];
+    }
 }
 
-- (nullable id<MTLTexture>)createTexture:(KTXImage&)image isPrivate:(bool)isPrivate {
+- (nullable id<MTLTexture>)createTexture:(const KTXImage&)image isPrivate:(bool)isPrivate {
     MTLTextureDescriptor *textureDescriptor = [[MTLTextureDescriptor alloc] init];
 
     // Indicate that each pixel has a blue, green, red, and alpha channel, where each channel is
@@ -611,7 +626,7 @@ inline uint64_t alignOffset(uint64_t offset, uint64_t alignment) {
 // Has a synchronous upload via replaceRegion that only works for shared/managed (f.e. ktx),
 // and another path for private that uses a blitEncoder and must have block aligned data (f.e. ktxa, ktx2).
 // Could repack ktx data into ktxa before writing to temporary file, or when copying NSData into MTLBuffer.
-- (nullable id<MTLTexture>)blitTextureFromImage:(KTXImage &)image
+- (nullable id<MTLTexture>)blitTextureFromImage:(const KTXImage &)image
 {
     if (_buffer == nil) {
         // this is enough to upload 4k x 4x @ RGBA8u with mips, 8k x 8k compressed with mips @96MB
diff --git a/kramv/KramRenderer.h b/kramv/KramRenderer.h
index 801ace1e..4f68d4ca 100644
--- a/kramv/KramRenderer.h
+++ b/kramv/KramRenderer.h
@@ -18,6 +18,7 @@
 
 namespace kram {
     class ShowSettings;
+    class KTXImage;
 }
 
 // Our platform independent renderer class.   Implements the MTKViewDelegate protocol which
@@ -26,12 +27,10 @@ namespace kram {
 
 - (nonnull instancetype)initWithMetalKitView:(nonnull MTKView *)view settings:(nonnull kram::ShowSettings*)settings;
 
-- (BOOL)loadTextureFromData:(const std::string&)fullFilename
-                  timestamp:(double)timestamp
-                  imageData:(nonnull const uint8_t*)imageData
-            imageDataLength:(uint64_t)imageDataLength
-            imageNormalData:(nullable const uint8_t*)imageNormalData
-      imageNormalDataLength:(uint64_t)imageNormalDataLength;
+- (BOOL)loadTextureFromImage:(const std::string&)fullFilename
+                   timestamp:(double)timestamp
+                       image:(kram::KTXImage&)image
+                 imageNormal:(nullable kram::KTXImage*)imageNormal;
 
 
 - (BOOL)loadTexture:(nonnull NSURL *)url;

From 8f40cfc3e8aced4e3325384c5fc9d5908fa9501a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 5 Jun 2021 00:48:53 -0700
Subject: [PATCH 108/901] Kramv - simplify loader to only use blit path, also
 kramv can load png into one level of KTXImage

This lost the mip support of BlitEncoder, but can add that back later or use Mipper.
This opens up pulling PNG from folder, not just KTX/2 files.  Need to call openPNG vs. open call, or detect off 4-5 bytes code at beginning of file.
Remove the cpu upload path.  This is slow and untwiddled, but useful for reference or when synchronous upload required.
Nothing is keeping the KTXImage data alive if the staging buffer is flooded.
---
 kramv/KramLoader.mm   | 52 ++++++++++++++--------------
 libkram/kram/Kram.cpp | 79 ++++++++++++++++++++++++++++++++++++++++---
 libkram/kram/Kram.h   |  2 ++
 3 files changed, 101 insertions(+), 32 deletions(-)

diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index 9367944b..7a55c863 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -216,7 +216,7 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) {
             *originalFormat = (MTLPixelFormat)rbgaImage2.pixelFormat; // TODO: should this return rgbaImage.pixelFormat ?
         }
         
-        return [self loadTextureFromImage:rbgaImage2];
+        return [self blitTextureFromImage:rbgaImage2];
     }
 #endif
    
@@ -231,7 +231,7 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) {
             return nil;
         }
         
-        return [self loadTextureFromImage:imageDecoded];
+        return [self blitTextureFromImage:imageDecoded];
     }
     else
 #endif
@@ -241,6 +241,8 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) {
     }
 }
 
+/*
+ 
 static uint32_t numberOfMipmapLevels(const Image& image) {
     uint32_t w = image.width();
     uint32_t h = image.height();
@@ -254,32 +256,22 @@ static uint32_t numberOfMipmapLevels(const Image& image) {
     return numberOfMips;
 }
     
-- (nullable id<MTLTexture>)loadTextureFromPNGData:(const uint8_t*)data dataSize:(int32_t)dataSize isSRGB:(BOOL)isSRGB originalFormat:(nullable MTLPixelFormat*)originalFormat
+- (nullable id<MTLTexture>)_loadTextureFromPNGData:(const uint8_t*)data dataSize:(int32_t)dataSize isSRGB:(BOOL)isSRGB originalFormat:(nullable MTLPixelFormat*)originalFormat
 {
     // can only load 8u and 16u from png, no hdr formats, no premul either, no props
     // this also doesn't handle strips like done in libkram.
     
-    Image sourceImage;
-    bool isLoaded = LoadPng(data, dataSize, false, false, sourceImage);
+   // Image sourceImage;
+    bool isLoaded = LoadPng(data, dataSize, false, false, image);
     if (!isLoaded) {
         return nil;
     }
     
-    KTXImage image;
-    image.width = sourceImage.width();
-    image.height = sourceImage.height();
-    image.depth = 0;
-    
-    image.header.numberOfArrayElements = 0;
-    image.header.numberOfMipmapLevels = numberOfMipmapLevels(sourceImage);
-    
-    image.textureType = MyMTLTextureType2D;
-    image.pixelFormat = isSRGB ? MyMTLPixelFormatRGBA8Unorm_sRGB : MyMTLPixelFormatRGBA8Unorm;
-    
+   
     // TODO: replace this with code that gens a KTXImage from png (and cpu mips)
     // instead of needing to use autogenmip that has it's own filters (probably a box)
     
-    id<MTLTexture> texture = [self createTexture:image isPrivate:false];
+    id<MTLTexture> texture = [self createTexture:image isPrivate:true];
     if (!texture) {
         return nil;
     }
@@ -288,6 +280,10 @@ static uint32_t numberOfMipmapLevels(const Image& image) {
         *originalFormat = (MTLPixelFormat)image.pixelFormat;
     }
     
+    // this means KTXImage must hold data
+    [self blitTextureFromImage:image];
+    
+    
     // cpu copy the bytes from the data object into the texture
     const MTLRegion region = {
         { 0, 0, 0 }, // MTLOrigin
@@ -300,6 +296,7 @@ static uint32_t numberOfMipmapLevels(const Image& image) {
                 mipmapLevel:0
                   withBytes:sourceImage.pixels().data()
                 bytesPerRow:bytesPerRow];
+  
     
     // have to schedule autogen inside render using MTLBlitEncoder
     if (image.header.numberOfMipmapLevels > 1) {
@@ -308,6 +305,7 @@ static uint32_t numberOfMipmapLevels(const Image& image) {
     
     return texture;
 }
+*/
 
 - (nullable id<MTLTexture>)loadTextureFromURL:(nonnull NSURL *)url originalFormat:(nullable MTLPixelFormat*)originalFormat {
     
@@ -317,6 +315,9 @@ static uint32_t numberOfMipmapLevels(const Image& image) {
     // files can be renamed to the incorrect extensions
     string filename = toLower(path);
 
+    KTXImage image;
+    KTXImageData imageDataKTX;
+    
     if (endsWithExtension(filename.c_str(), ".png")) {
         // set title to filename, chop this to just file+ext, not directory
         string filenameShort = filename;
@@ -339,21 +340,15 @@ static uint32_t numberOfMipmapLevels(const Image& image) {
         }
         
         bool isSRGB = (!isNormal && !isSDF);
-        
-        MmapHelper mmapHelper;
-        if (!mmapHelper.open(path)) {
+
+        if (!imageDataKTX.openPNG(path, isSRGB, image)) {
             return nil;
         }
         
-        // TODO: need FileHelper fallback here
-        
-        return [self loadTextureFromPNGData:mmapHelper.data() dataSize:(int32_t)mmapHelper.dataLength() isSRGB:isSRGB originalFormat:originalFormat];
+        return [self loadTextureFromImage:image originalFormat:originalFormat];
     }
     else {
-        KTXImage image;
-        KTXImageData imageData;
-        
-        if (!imageData.open(path, image)) {
+        if (!imageDataKTX.open(path, image)) {
             return nil;
         }
         
@@ -396,6 +391,8 @@ static uint32_t numberOfMipmapLevels(const Image& image) {
     return texture;
 }
 
+/* just for reference now
+ 
 // Has a synchronous upload via replaceRegion that only works for shared/managed (f.e. ktx),
 // and another path for private that uses a blitEncoder and must have block aligned data (f.e. ktxa, ktx2).
 // Could repack ktx data into ktxa before writing to temporary file, or when copying NSData into MTLBuffer.
@@ -536,6 +533,7 @@ static uint32_t numberOfMipmapLevels(const Image& image) {
         
     return texture;
 }
+*/
 
 //--------------------------
 
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 85d68b09..5ad78a4c 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -59,21 +59,90 @@ bool KTXImageData::open(const char* filename, KTXImage& image) {
         }
     }
     
-    // read the KTXImage in from the data, it will alias mmap or fileData
+    const uint8_t* data;
+    size_t dataSize;
     if (useMmap) {
-        if (!image.open(mmapHelper.data(), mmapHelper.dataLength(), isInfoOnly)) {
-            return false;
-        }
+        data = mmapHelper.data();
+        dataSize = mmapHelper.dataLength();
     }
     else {
-        if (!image.open(fileData.data(), fileData.size(), isInfoOnly)) {
+        data = fileData.data();
+        dataSize = fileData.size();
+    }
+    
+    // read the KTXImage in from the data, it will alias mmap or fileData
+    if (!image.open(data, dataSize, isInfoOnly)) {
+        return false;
+    }
+    
+    return true;
+}
+
+bool KTXImageData::openPNG(const char* filename, bool isSrgb, KTXImage& image) {
+    bool useMmap = true;
+    if (!mmapHelper.open(filename)) {
+        useMmap = false;
+        
+        // open file, copy it to memory, then close it
+        FileHelper fileHelper;
+        if (!fileHelper.open(filename, "rb")) {
+            return false;
+        }
+        
+        // read the file into memory
+        size_t size = fileHelper.size();
+        if (size == (size_t)-1) {
+            return false;
+        }
+        
+        fileData.resize(size);
+        if (!fileHelper.read(fileData.data(), size)) {
             return false;
         }
     }
     
+    const uint8_t* data;
+    size_t dataSize;
+    if (useMmap) {
+        data = mmapHelper.data();
+        dataSize = mmapHelper.dataLength();
+    }
+    else {
+        data = fileData.data();
+        dataSize = fileData.size();
+    }
+    
+    // the mmap/filehelper point to the png data
+    // use Image to 
+    
+    Image singleImage;
+    bool isLoaded = LoadPng(data, dataSize, false, false, singleImage);
+    if (!isLoaded) {
+        return false;
+    }
+    
+    // now move the png pixels into the KTXImage
+
+    image.width = singleImage.width();
+    image.height = singleImage.height();
+    image.depth = 0;
+
+    image.header.numberOfArrayElements = 0;
+    image.header.numberOfMipmapLevels = 1;
+    image.textureType = MyMTLTextureType2D;
+    image.pixelFormat = isSrgb ? MyMTLPixelFormatRGBA8Unorm_sRGB : MyMTLPixelFormatRGBA8Unorm;
+    
+    // TODO: support mips with blitEncoder or Mipper
+    // TODO: support chunks, but may need to copy horizontal to vertical
+    // TODO: png has 16u format useful for heights
+    
+    image.reserveImageData();
+    memcpy((uint8_t*)image.fileData + image.mipLevels[0].offset, singleImage.pixels().data(), image.levelLength(0));
+    
     return true;
 }
 
+
 bool KTXImageData::open(const uint8_t* data, size_t dataSize, KTXImage& image)
 {
     if (!image.open(data, dataSize, isInfoOnly)) {
diff --git a/libkram/kram/Kram.h b/libkram/kram/Kram.h
index 4ad5eceb..2b2051fa 100644
--- a/libkram/kram/Kram.h
+++ b/libkram/kram/Kram.h
@@ -23,6 +23,8 @@ class KTXImageData {
     // class aliases data, so caller must keep alive.  Useful with bundle.
     bool open(const uint8_t* data, size_t dataSize, KTXImage& image);
     
+    bool openPNG(const char* filename, bool isSrgb, KTXImage& image);
+
 private:
     MmapHelper mmapHelper;
     vector<uint8_t> fileData;

From 197e7d3fbff055578ba4c36632deb4a3eeef0283 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 5 Jun 2021 10:51:38 -0700
Subject: [PATCH 109/901] kramv - more loader improvements

Slowly bubbling the KTXImage up to caller.
Simplfy loader code in kram too.
Expose a close() call in KTXImageData to release memory, and use this in open calls so they don't fail on reuse.
---
 kramv/KramLoader.h      |   9 ++--
 kramv/KramLoader.mm     |  34 +++++++------
 kramv/KramRenderer.mm   |  19 +++++--
 kramv/KramViewerMain.mm |   2 +-
 libkram/kram/Kram.cpp   | 106 ++++++++++++++++++++++++++--------------
 libkram/kram/Kram.h     |  10 +++-
 6 files changed, 120 insertions(+), 60 deletions(-)

diff --git a/kramv/KramLoader.h b/kramv/KramLoader.h
index 0c117051..65e4decb 100644
--- a/kramv/KramLoader.h
+++ b/kramv/KramLoader.h
@@ -20,10 +20,10 @@
 
 namespace kram {
 class KTXImage;
+class KTXImageData;
 }
 
-// This loads KTX and PNG data synchronously.  Will likely move to only loading KTX files, with a png -> ktx conversion.
-// The underlying KTXImage is not yet returned to the caller, but would be useful for prop queries.
+// This loads KTX/2 and PNG data.  Moving towards KTX/2 files only, with a PNG to KTX/2 conversion.
 @interface KramLoader : NSObject
 
 // from mem,  copied to MTLBuffer if available, if not caller must keep mem alive
@@ -33,7 +33,10 @@ class KTXImage;
 - (nullable id<MTLTexture>)loadTextureFromData:(nonnull NSData*)imageData originalFormat:(nullable MTLPixelFormat*)originalFormat;
 
 // load from a KTXImage
-- (nullable id<MTLTexture>)loadTextureFromImage:(const kram::KTXImage&)image  originalFormat:(nullable MTLPixelFormat*)originalFormat;
+- (nullable id<MTLTexture>)loadTextureFromImage:(const kram::KTXImage&)image originalFormat:(nullable MTLPixelFormat*)originalFormat;
+
+// load into KTXImage and KTXImageData, can use with loadTextureFromImage
+- (BOOL)loadImageFromURL:(nonnull NSURL *)url image:(kram::KTXImage&)image imageData:(kram::KTXImageData&)imageData;
 
 // from url (mmap)
 - (nullable id<MTLTexture>)loadTextureFromURL:(nonnull NSURL *)url originalFormat:(nullable MTLPixelFormat*)originalFormat;
diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index 7a55c863..a882ed14 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -307,17 +307,14 @@ static uint32_t numberOfMipmapLevels(const Image& image) {
 }
 */
 
-- (nullable id<MTLTexture>)loadTextureFromURL:(nonnull NSURL *)url originalFormat:(nullable MTLPixelFormat*)originalFormat {
-    
+- (BOOL)loadImageFromURL:(nonnull NSURL *)url image:(KTXImage&)image imageData:(KTXImageData&)imageData
+{
     const char *path = [url.absoluteURL.path UTF8String];
 
     // TODO: could also ignore extension, and look at header/signature instead
     // files can be renamed to the incorrect extensions
     string filename = toLower(path);
 
-    KTXImage image;
-    KTXImageData imageDataKTX;
-    
     if (endsWithExtension(filename.c_str(), ".png")) {
         // set title to filename, chop this to just file+ext, not directory
         string filenameShort = filename;
@@ -341,20 +338,29 @@ static uint32_t numberOfMipmapLevels(const Image& image) {
         
         bool isSRGB = (!isNormal && !isSDF);
 
-        if (!imageDataKTX.openPNG(path, isSRGB, image)) {
-            return nil;
+        if (!imageData.openPNG(path, isSRGB, image)) {
+            return NO;
         }
-        
-        return [self loadTextureFromImage:image originalFormat:originalFormat];
     }
     else {
-        if (!imageDataKTX.open(path, image)) {
-            return nil;
+        if (!imageData.open(path, image)) {
+            return NO;
         }
-        
-        // route all data through the version that copies or does sync upload
-        return [self loadTextureFromImage:image originalFormat:originalFormat];
     }
+    
+    return YES;
+}
+
+- (nullable id<MTLTexture>)loadTextureFromURL:(nonnull NSURL *)url originalFormat:(nullable MTLPixelFormat*)originalFormat
+{
+    KTXImage image;
+    KTXImageData imageData;
+   
+    if (![self loadImageFromURL:url image:image imageData:imageData]) {
+        return nil;
+    }
+        
+    return [self loadTextureFromImage:image originalFormat:originalFormat];
 }
 
 - (nullable id<MTLTexture>)createTexture:(const KTXImage&)image isPrivate:(bool)isPrivate {
diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 92defb1d..93da1eda 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -580,18 +580,29 @@ - (BOOL)loadTexture:(nonnull NSURL *)url
     // image can be decoded to rgba8u if platform can't display format natively
     // but still want to identify blockSize from original format
     if (isTextureChanged) {
+        // TODO: hold onto these, so can reference block data
+        KTXImage image;
+        KTXImageData imageData;
+        
+        if (![_loader loadImageFromURL:url image:image imageData:imageData]) {
+            return NO;
+        }
         
         MTLPixelFormat originalFormatMTL = MTLPixelFormatInvalid;
-        id<MTLTexture> texture = [_loader loadTextureFromURL:url originalFormat:&originalFormatMTL];
+        id<MTLTexture> texture = [_loader loadTextureFromImage:image originalFormat:&originalFormatMTL];
         if (!texture) {
             return NO;
         }
         
         // This doesn't look for or load corresponding normal map, but should
         
-        // TODO:: this reloads KTXImage twice over
-        _showSettings->imageInfo = kramInfoToString(fullFilename, false);
-        _showSettings->imageInfoVerbose = kramInfoToString(fullFilename, true);
+        // this is not the png data, but info on converted png to ktx level
+        // But this avoids loading the image 2 more times
+        _showSettings->imageInfo = kramInfoKTXToString(fullFilename, image, false);
+        _showSettings->imageInfoVerbose = kramInfoKTXToString(fullFilename, image, true);
+        
+        //_showSettings->imageInfo = kramInfoToString(fullFilename, image, false);
+        //_showSettings->imageInfoVerbose = kramInfoToString(fullFilename, image, true);
         
         _showSettings->originalFormat = (MyMTLPixelFormat)originalFormatMTL;
         _showSettings->decodedFormat = (MyMTLPixelFormat)texture.pixelFormat;
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index f1617780..d8a15a45 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -2262,7 +2262,7 @@ - (BOOL)loadTextureFromArchive
     }
     
     if (isFound) {
-        normalFilename = normalFilename.replace(searchPos, search.length(), "-n.ktx"); // works for
+        normalFilename = normalFilename.replace(searchPos, search.length(), "-n.ktx");
     }
     
     //---------------------------
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 5ad78a4c..bd8a03d4 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -34,12 +34,19 @@ namespace kram {
 
 using namespace std;
 
+template<typename T>
+void releaseVector(vector<T>& v) {
+    v.clear();
+    v.shrink_to_fit();
+}
 
 
 bool KTXImageData::open(const char* filename, KTXImage& image) {
-    bool useMmap = true;
+    close();
+    
+    isMmap = true;
     if (!mmapHelper.open(filename)) {
-        useMmap = false;
+        isMmap = false;
         
         // open file, copy it to memory, then close it
         FileHelper fileHelper;
@@ -61,7 +68,7 @@ bool KTXImageData::open(const char* filename, KTXImage& image) {
     
     const uint8_t* data;
     size_t dataSize;
-    if (useMmap) {
+    if (isMmap) {
         data = mmapHelper.data();
         dataSize = mmapHelper.dataLength();
     }
@@ -71,17 +78,34 @@ bool KTXImageData::open(const char* filename, KTXImage& image) {
     }
     
     // read the KTXImage in from the data, it will alias mmap or fileData
-    if (!image.open(data, dataSize, isInfoOnly)) {
+    bool isLoaded = image.open(data, dataSize, isInfoOnly);
+    
+    // this means KTXImage is using it's own storage
+    if (!isLoaded || image.fileData != data) {
+        close();
+    }
+    
+    if (!isLoaded) {
         return false;
     }
     
     return true;
 }
 
+void KTXImageData::close() {
+    // don't need these anymore, singleImage holds the data
+    mmapHelper.close();
+    releaseVector(fileData);
+    isMmap = false;
+}
+
+
 bool KTXImageData::openPNG(const char* filename, bool isSrgb, KTXImage& image) {
-    bool useMmap = true;
+    close();
+    
+    isMmap = true;
     if (!mmapHelper.open(filename)) {
-        useMmap = false;
+        isMmap = false;
         
         // open file, copy it to memory, then close it
         FileHelper fileHelper;
@@ -103,7 +127,7 @@ bool KTXImageData::openPNG(const char* filename, bool isSrgb, KTXImage& image) {
     
     const uint8_t* data;
     size_t dataSize;
-    if (useMmap) {
+    if (isMmap) {
         data = mmapHelper.data();
         dataSize = mmapHelper.dataLength();
     }
@@ -117,6 +141,10 @@ bool KTXImageData::openPNG(const char* filename, bool isSrgb, KTXImage& image) {
     
     Image singleImage;
     bool isLoaded = LoadPng(data, dataSize, false, false, singleImage);
+    
+    // don't need png data anymore
+    close();
+    
     if (!isLoaded) {
         return false;
     }
@@ -132,8 +160,11 @@ bool KTXImageData::openPNG(const char* filename, bool isSrgb, KTXImage& image) {
     image.textureType = MyMTLTextureType2D;
     image.pixelFormat = isSrgb ? MyMTLPixelFormatRGBA8Unorm_sRGB : MyMTLPixelFormatRGBA8Unorm;
     
-    // TODO: support mips with blitEncoder or Mipper
-    // TODO: support chunks, but may need to copy horizontal to vertical
+    // TODO: support mips with blitEncoder but tha confuses mipCount in KTXImage
+    //     Mipper can also generate on cpu side.  Mipped can do premul conversion though.
+    
+    // TODO: support chunks and striped png, but may need to copy horizontal to vertical
+    
     // TODO: png has 16u format useful for heights
     
     image.reserveImageData();
@@ -145,6 +176,10 @@ bool KTXImageData::openPNG(const char* filename, bool isSrgb, KTXImage& image) {
 
 bool KTXImageData::open(const uint8_t* data, size_t dataSize, KTXImage& image)
 {
+    close();
+    
+    // image will likely alias incoming data, so KTXImageData is unused
+    
     if (!image.open(data, dataSize, isInfoOnly)) {
         return false;
     }
@@ -335,9 +370,7 @@ bool SetupTmpFile(FileHelper& tmpFileHelper, const char* suffix)
     return tmpFileHelper.openTemporaryFile(suffix, "w+b");
 }
 
-bool SetupSourceImage(//MmapHelper& mmapHelper, FileHelper& fileHelper,
-                      //vector<uint8_t>& fileBuffer,
-                      const string& srcFilename, Image& sourceImage,
+bool SetupSourceImage(const string& srcFilename, Image& sourceImage,
                       bool isPremulSrgb = false, bool isGray = false)
 {
     bool isKTX = endsWith(srcFilename, ".ktx") || endsWith(srcFilename, ".ktx2");
@@ -349,15 +382,19 @@ bool SetupSourceImage(//MmapHelper& mmapHelper, FileHelper& fileHelper,
         return false;
     }
 
-    // TODO: really KTXImageData
+    // TODO: basically KTXImageData, but the encode can't take in a KTXImage yet
+    // so here it's generate a single Image.  Also here the LoadKTX converts
+    // 1/2/3/4 channel formats to 4.
+    
     MmapHelper mmapHelper;
-    FileHelper fileHelper;
     vector<uint8_t> fileData;
 
     // first try mmap, and then use file -> buffer
-    bool useMmap = true;
+    bool isMmap = true;
     if (!mmapHelper.open(srcFilename.c_str())) {
-        useMmap = false;
+        isMmap = false;
+        
+        FileHelper fileHelper;
         
         // fallback to opening file if no mmap support or it didn't work
         if (!fileHelper.open(srcFilename.c_str(), "rb")) {
@@ -379,30 +416,27 @@ bool SetupSourceImage(//MmapHelper& mmapHelper, FileHelper& fileHelper,
         }
     }
     
+    const uint8_t* data;
+    size_t dataSize;
+    if (isMmap) {
+        data = mmapHelper.data();
+        dataSize = mmapHelper.dataLength();
+    }
+    else {
+        data = fileData.data();
+        dataSize = fileData.size();
+    }
+    
+    //-----------------------
+    
     if (isPNG) {
-        if (useMmap) {
-            if (!LoadPng(mmapHelper.data(), mmapHelper.dataLength(), isPremulSrgb, isGray,
-                     sourceImage)) {
-                return false;  // error
-            }
-        }
-        else {
-            if (!LoadPng(fileData.data(), fileData.size(), isPremulSrgb, isGray,
-                         sourceImage)) {
-                return false;  // error
-            }
+        if (!LoadPng(data, dataSize, isPremulSrgb, isGray, sourceImage)) {
+            return false;  // error
         }
     }
     else {
-        if (useMmap) {
-            if (!LoadKtx(mmapHelper.data(), mmapHelper.dataLength(), sourceImage)) {
-                return false;  // error
-            }
-        }
-        else {
-            if (!LoadKtx(fileData.data(), fileData.size(), sourceImage)) {
-                return false;  // error
-            }
+        if (!LoadKtx(data, dataSize, sourceImage)) {
+            return false;  // error
         }
     }
     
diff --git a/libkram/kram/Kram.h b/libkram/kram/Kram.h
index 2b2051fa..f2e75afc 100644
--- a/libkram/kram/Kram.h
+++ b/libkram/kram/Kram.h
@@ -13,8 +13,8 @@ using namespace std;
 class Image;
 class KTXImage;
 
-// This helper needs to stay alive since KTXImage aliases it
-// May be able to fold these into KTXImage since it has an internal vector already
+// This helper needs to stay alive since KTXImage may alias the data.
+// KTXImage also has an internal vector already, but fileData may point to the mmap or vector here.
 class KTXImageData {
 public:
     // class keeps the data alive in mmapHelper or fileData
@@ -23,11 +23,17 @@ class KTXImageData {
     // class aliases data, so caller must keep alive.  Useful with bundle.
     bool open(const uint8_t* data, size_t dataSize, KTXImage& image);
     
+    // Open png image into a KTXImage as a single-level mip
+    // Only handles 2d case and only srgba/rgba conversion.
     bool openPNG(const char* filename, bool isSrgb, KTXImage& image);
 
+    // This releases all memory associated with this class
+    void close();
+    
 private:
     MmapHelper mmapHelper;
     vector<uint8_t> fileData;
+    bool isMmap = false;
     bool isInfoOnly = true;
 };
 

From b9e6444217609601c84360d431e6cc5db7cef244 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecazam@users.noreply.github.com>
Date: Sat, 5 Jun 2021 11:16:49 -0700
Subject: [PATCH 110/901] Update README.md

---
 README.md | 37 ++++++++++++++++++++++---------------
 1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/README.md b/README.md
index 22dfb9cf..2abedd00 100644
--- a/README.md
+++ b/README.md
@@ -22,34 +22,38 @@ Many of the encoder sources can multithread a single image, but that is unused.
 Similar to a makefile system, the script sample kramtexture.py uses modstamps to skip textures that have already been processed.  If the source png/ktx is older than the ktx output, then the file is skipped.  Command line options are not yet compared, so if those change then use --force on the python script to rebuild all textures.  Also a crc/hash could be used instead when modstamp isn't sufficient or the same data could come from different folders.
 
 ### About kramv
-kramv is a viewer for the BC/ASTC/ETC2 and HDR KTX textures generated by kram from LDR PNG and LDR/HDR KTX sources.  kramv decodes ASTC/ETC2 textures on macOS Intel, where the GPU doesn't support them.  macOS with Apple Silicon supports all three formats, and doesn't need to decode.   
+kramv is a viewer for the BC/ASTC/ETC2 and HDR KTX/2 textures generated by kram from LDR PNG and LDR/HDR KTX/2 sources.  kramv decodes ASTC/ETC2 textures on macOS Intel, where the GPU doesn't support them.  macOS with Apple Silicon supports all three formats, and doesn't need to decode.   
 
-This is all in ObjC++ with the intent to port to Windows as time permits.  It's adapted from Apple's Metal sample app.  There's very little GUI and it's all controlled via keyboard to make the code easy to port and change, but the key features are useful for texture triage and analysis.  Drag and drop, and click-launch are supported.  Recently used textures are listed in the menu.  The app is currently single-document only, but I'd like to fix that.  Subsequent opens reuse the same document Window.  Can drop zip bundles of KTX/KTX2 files, and advance through all textures in the archive.
+kramv uses ObjC++ with the intent to port to Windows C++ as time permits.  Uses menus, buttons, and keyboard handling useful for texture triage and analysis.  Drag and drop folders, bundles, and click-to-launch are supported.  Recently used textures/folders/bundles are listed in the menu.  The app currently shows a single document at a time.  Subsequent opens reuse the same document Window.  With bundles and folders, kramv will attempt to pair albedo and normal maps together by filename for the preview. 
 
-Compute shaders are used to display a single pixel sample from the gpu texture.  This simplifies adding more viewable formats in the future, but there is not a cpu fallback.  The preview is rendered to a cube with a single face visible using shaders.  Preview mode provides lighting, sdf cutoff, and mip visuals for a given texture.
+Preview mode provides lighting, sdf cutoff, and mip visuals for a given texture.  Multiple shapes can help identify inconsistent normal maps.  The u-axis advances counterclockwise, and v-axis advances down on the shapes.  +Y OpenGL normals are assumed, not -Y DirectX convention.  Lighting appears up and to the right when normal maps are correctly specified.   
 
-In non-preview mode, point sampling in a pixel shader is used to show exact pixel values of a single mip, array, and face.  Debug modes provide pixel analysis.  KramLoader shows synchronous cpu upload to a private Metal texture, but does not yet supply the underlying KTXImage.  Pinch zoom and panning tries to keep the image from onscreen, and zoom is to the cursor so navigating feels intuitive.
+In non-preview mode, point sampling in a pixel shader is used to show exact pixel values of a single mip, array, and face.  Debug modes provide pixel analysis.  KramLoader shows synchronous cpu upload to a private Metal texture, but does not yet supply the underlying KTXImage.  Pinch-zoom and pan tries to keep the image from onscreen, and zoom is to the cursor so navigating feels intuitive.
+
+Compute shaders are used to sample a single pixel sample from the gpu texture for the eyedropper.  This simplifies adding more viewable formats in the future, but there is not a cpu fallback.  Normal.z is reconstructed and displayed in the hud, and linear and srgb channels are shown.
 
 ```
 Formats - R/RG/RGBA 8/16F/32F, BC/ETC2/ASTC,  RGB has limited import support
 Container Types - KTX, KTX2, PNG
 Content Types - Albedo, Normal, SDF, Height
-Debug modes - transparent, color, gray, +x, +y, xy >= 1
+Debug modes - transparent, color, non-zero, gray, +x, +y, xy >= 1
 Texture Types - 1darray (no mips), 2d, 2darray, 3d (no mips), cube, cube array
 
 ⇧ decrement any advance listed below
 
-/ - show keyboard shortcuts
+?/ - show keyboard shortcuts
 O - toggle preview, disables debug mode, shows lit normals, and mips and filtering are enabled
 ⇧D - toggle through none, pixel grid, block grid, atlas grid (32, 64, 128, 256), must be zoomed-in to see pixel grid
 ⇧E - advance debug mode, this is texture content specific 
 H - toggle hud
+U - toggle ui
+V - toggle vertical vs. horizontal buttons
 I - show texture info in overlay
-W - toggle repeat filter, scales uv from [0,1] to [0,2]
+W - toggle repeat filter, scales uv from [0,1] to [0,2] and changes sampler to wrap/repeat
 S - show all - arrays, faces, slices and mips all on-screen
 
-R/G/B/A - show channel in isolation
-P - toggle shader premul, the shader performs this after sampling but for point sampling it is correct
+R/G/B/A - show channel in isolation, alpha as grayscale
+P - toggle shader premul, shader does this post-sample so only correct for point-sampling not preview
 N - toggle signed/unsigned
 
 ⇧0 - refit the current mip image to 1x, or fit view.  (at 1x with ⇧).
@@ -58,8 +62,10 @@ N - toggle signed/unsigned
 ⇧Y advance array 
 ⇧F advance face
 ⇧M advance mip
+⇧8 advance shape (plane, unit box, sphere, capsule)
+
+⇧J advance bundle/folder image (can traverse zip of ktx/ktx2 files)
 
-⇧J advance bundle image (can traverse zip of ktx/ktx2 files)
 ```
 
 ### Limitations
@@ -88,7 +94,7 @@ ETC2_RGB8A1 - disabled, broken in ETC2 optimizations
 ASTC LDR - rrr1, rrrg/gggr, rgb1, rgba must be followed to avoid endpoint storage, requires swizzles
 ASTC HDR - encoder uses 8-bit source image, need 16f/32f passed to encoder, no hw L+A mode
 
-R/RG/RGBA 8/16F/32F - use ktx2ktx2 and ktx2sc KTX2 to supercompress, use as source formats
+R/RG/RGBA 8/16F/32F - use kram or ktx2ktx2+ktx2sc to generate supercompressed ktx2
 R8/RG8/R16F - input/output rowBytes not aligned to 4 bytes to match KTX spec, code changes needed
 
 PVRTC - unsupported, no open-source encoders, requires pow2 size
@@ -96,12 +102,13 @@ PVRTC - unsupported, no open-source encoders, requires pow2 size
 Containers
 PVR/DDS/Basis/Crunch - unsupoorted 
 
-KTX - breaks loads of mips with 4 byte length offset at the start of each level of mips, 
-  metadata/props aren't standardized and only ascii prop support so easy to dump out
+KTX - only uncompressed, mip levels are unaligned to block size from 4 byte length at chunk 0 
+  metadata/props aren't standardized or prevalent
+  libkram supports only text props for display in kramv
 
-KTX2 - works in kram and viewer, has aligned compressed levels of mips, 
+KTX2 - works in kram and viewer, has aligned levels of mips when uncompressed, 
   libkram supports None/Zlib/Zstd supercompression for read/write
-  doesn't support UASTC or BasisLZ yet
+  libkram does not support UASTC or BasisLZ yet
   
 ```
 

From 08dc556ae021b55e515ecf5e7a22afccff42bd33 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 5 Jun 2021 13:47:33 -0700
Subject: [PATCH 111/901] kramv - add tangent-less normals

Good to compare this against explicit tangents for perf and quality.   This saves storing/transforming tangents.   Need to compare mirroring with that vs. explicit tangent case.
---
 kramv/KramRenderer.mm   |   5 +-
 kramv/KramShaders.metal | 130 +++++++++++++++++++++++++++-------------
 2 files changed, 92 insertions(+), 43 deletions(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 93da1eda..04cdec54 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -542,7 +542,8 @@ - (BOOL)loadTextureFromImage:(const string&)fullFilename
             }
         }
         
-        // archive shouldn't contain png, so only support ktx/ktx2 here
+        // if archive contained png, then it's been converted to ktx
+        // so the info below may not reflect original data
         
         _showSettings->imageInfo = kramInfoKTXToString(fullFilename, image, false);
         _showSettings->imageInfoVerbose = kramInfoKTXToString(fullFilename, image, true);
@@ -598,6 +599,8 @@ - (BOOL)loadTexture:(nonnull NSURL *)url
         
         // this is not the png data, but info on converted png to ktx level
         // But this avoids loading the image 2 more times
+        // Size of png is very different than decompressed or recompressed ktx
+        
         _showSettings->imageInfo = kramInfoKTXToString(fullFilename, image, false);
         _showSettings->imageInfoVerbose = kramInfoKTXToString(fullFilename, image, true);
         
diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index faad3b9e..a0847c63 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -6,6 +6,9 @@
 
 using namespace metal;
 
+// whether to use model tangents or generate from normal in fragment shader
+constant bool useTangent = false;
+
 //---------------------------------
 // helpers
 
@@ -184,10 +187,46 @@ half3 toNormal(half3 n)
 }
 
 
+// https://www.gamasutra.com/blogs/RobertBasler/20131122/205462/Three_Normal_Mapping_Techniques_Explained_For_the_Mathematically_Uninclined.php?print=1
+// http://www.thetenthplanet.de/archives/1180
+// This generates the TBN from vertex normal and p and uv derivatives
+// Then transforms the bumpNormal to that space.  No tangent is needed.
+// The downside is this must all be fp32, and all done in fragment shader and use derivatives.
+// Derivatives are known to be caclulated differently depending on hw and different precision.
+half3 transformNormalByBasis(half3 vertexNormal, half3 bumpNormal, float3 worldPos, float2 uv)
+{
+    float3 N = toFloat(vertexNormal);
+    
+    // for OpenGL +Y convention, flip N.y
+    // but this doesn't match explicit tangents case, see if those are wrong.
+    //N.y = -N.y;
+    
+    // get edge vectors of the pixel triangle
+    float3 dp1 = dfdx(worldPos);
+    float3 dp2 = dfdy(worldPos);
+    float2 duv1 = dfdx(uv);
+    float2 duv2 = dfdy(uv);
+
+    // solve the linear system
+    float3 dp2perp = cross(dp2, N);
+    float3 dp1perp = cross(N, dp1);
+    float3 T = dp2perp * duv1.x + dp1perp * duv2.x;
+    float3 B = dp2perp * duv1.y + dp1perp * duv2.y;
+    float invmax = rsqrt(max(length_squared(T), length_squared(B)));
+    
+    // keeps relative magnitude of two vectors, they're not both unit vecs
+    T *= invmax;
+    B *= invmax;
+    
+    // construct a scale-invariant frame
+    // drop to half to match other call
+    bumpNormal = half3x3(toHalf(T), toHalf(B), vertexNormal) * bumpNormal;
+    return bumpNormal;
+}
 
 // use mikktspace, gen bitan in frag shader with sign, don't normalize vb/vt
 // see http://www.mikktspace.com/
-half3 transformNormal(half3 bumpNormal, half4 tangent, half3 vertexNormal)
+half3 transformNormalByBasis(half3 bumpNormal, half4 tangent, half3 vertexNormal)
 {
     // Normalize tangent/vertexNormal in vertex shader
     // but don't renormalize interpolated tangent, vertexNormal in fragment shader
@@ -208,7 +247,8 @@ half3 transformNormal(half3 bumpNormal, half4 tangent, half3 vertexNormal)
     return normalize(bumpNormal);
 }
 
-half3 transformNormal(half4 tangent, half3 vertexNormal,
+
+half3 transformNormal(half4 tangent, half3 vertexNormal, float3 worldPos,
                       texture2d<half> texture, sampler s, float2 uv, bool isSigned = true)
 {
     half4 nmap = texture.sample(s, uv);
@@ -221,17 +261,23 @@ half3 transformNormal(half4 tangent, half3 vertexNormal,
     // rebuild the z term
     half3 bumpNormal = toNormal(nmap.xyz);
    
-    return transformNormal(bumpNormal, tangent, vertexNormal);
+    if (useTangent)
+         bumpNormal = transformNormalByBasis(bumpNormal, tangent, vertexNormal);
+    else
+        bumpNormal = transformNormalByBasis(bumpNormal, vertexNormal, worldPos, uv);
+  
+    return bumpNormal;
 }
 
 
-float3 transformNormal(float4 nmap, half3 vertexNormal, half4 tangent,
+half3 transformNormal(half4 nmap, half3 vertexNormal, half4 tangent,
+                      float3 worldPos, float2 uv, // to gen TBN
                         bool isSwizzleAGToRG, bool isSigned, bool isFrontFacing)
 {
     // add swizzle for ASTC/BC5nm, other 2 channels format can only store 01 in ba
     // could use hw swizzle for this
     if (isSwizzleAGToRG) {
-        nmap = float4(nmap.ag, 0, 1);
+        nmap = half4(nmap.ag, 0, 1);
     }
 
     // to signed, also for ASTC/BC5nm
@@ -240,22 +286,30 @@ float3 transformNormal(float4 nmap, half3 vertexNormal, half4 tangent,
         nmap.rg = toSnorm8(nmap.rg);
     }
     
-    float3 bumpNormal = nmap.xyz;
+    half3 bumpNormal = nmap.xyz;
     
     bumpNormal = toNormal(bumpNormal);
 
-    // flip the normal if facing is flipped
-    // TODO: needed for tangent too?
-    if (!isFrontFacing) {
-        bumpNormal = -bumpNormal;
-        tangent.w = -tangent.w;
+    // handle the basis here (need worldPos and uv for other path)
+    if (useTangent) {
+        // flip the normal if facing is flipped
+        // TODO: needed for tangent too?
+        if (!isFrontFacing) {
+            bumpNormal = -bumpNormal;
+            tangent.w = -tangent.w;
+        }
+    
+        bumpNormal = transformNormalByBasis(bumpNormal, tangent, vertexNormal);
+    }
+    else {
+        bumpNormal = transformNormalByBasis(bumpNormal, vertexNormal, worldPos, uv);
     }
     
-    // handle the basis here
-    bumpNormal = toFloat(transformNormal(toHalf(bumpNormal), tangent, vertexNormal));
     return bumpNormal;
 }
 
+
+
 // TODO: have more bones, or read from texture instead of uniforms
 // can then do instanced skining, but vfetch lookup slower
 #define maxBones 128
@@ -301,7 +355,9 @@ void skinPosAndBasis(thread float4& position, thread float3& tangent, thread flo
     // see scale2 handling in transformBasis, a little different with transpose of 3x4
     
     normal  = (float4(normal, 0.0)  * bindPoseToBoneTransform);
-    tangent = (float4(tangent, 0.0) * bindPoseToBoneTransform);
+    
+    if (useTangent)
+        tangent = (float4(tangent, 0.0) * bindPoseToBoneTransform);
 }
 
 float3x3 toFloat3x3(float4x4 m)
@@ -319,33 +375,16 @@ void transformBasis(thread float3& normal, thread float3& tangent,
     // note this is RinvT * n = (Rt)t = R, this is for simple inverse, inv scale handled below
     // but uniform scale already handled by normalize
     normal = m * normal;
-       
+    normal *= invScale2;
+    normal = normalize(normal);
+   
     // question here of whether tangent is transformed by m or mInvT
     // most apps assume m, but after averaging it can be just as off the surface as the normal
-    tangent = m * tangent;
-    
-    // have to apply invSquare of scale here to approximate invT
-    // also make sure to identify inversion off determinant before instancing so that backfacing is correct
-    // this is only needed if non-uniform scale present in modelToWorldTfm, could precompute scale2
-//    if (isScaled)
-//    {
-//        // compute scale squared from rows
-//        float3 scale2 = float3(
-//            length_squared(m[0].xyz),
-//            length_squared(m[1].xyz),
-//            length_squared(m[2].xyz));
-//
-//        // do a max(1e4), but really don't have scale be super small
-//        scale2 = recip(max(0.0001 * 0.0001, scale2));
-        
-        // apply inverse
-        normal  *= invScale2;
+    if (useTangent) {
+        tangent = m * tangent;
         tangent *= invScale2;
-//    }
-    
-    // vertex shader normalize, but the fragment shader should not
-    normal  = normalize(normal);
-    tangent = normalize(tangent);
+        tangent = normalize(tangent);
+    }
     
     // make sure to preserve bitan sign in tangent.w
 }
@@ -395,6 +434,8 @@ ColorInOut DrawImageFunc(
         transformBasis(normal, tangent, uniforms.modelMatrix, uniforms.modelMatrixInvScale2);
         
         out.normal = toHalf(normal);
+        
+        // may be invalid if useTangent is false
         out.tangent.xyz = toHalf(tangent);
         out.tangent.w = toHalf(in.tangent.w);
     }
@@ -600,13 +641,15 @@ float4 DrawPixels(
         }
         else if (uniforms.isNormal) {
             // light the normal map
+            half4 nmapH = toHalf(c);
             
-            float3 n = transformNormal(c, in.normal, in.tangent,
+            half3 n = transformNormal(nmapH, in.normal, in.tangent,
+                                       in.worldPos, in.texCoord, // to build TBN
                                        uniforms.isSwizzleAGToRG, uniforms.isSigned, facing);
             
             
             float3 viewDir = normalize(in.worldPos - uniforms.cameraPosition);
-            c = doLighting(float4(1.0), viewDir, n);
+            c = doLighting(float4(1.0), viewDir, toFloat(n));
 
             c.a = 1;
         }
@@ -619,10 +662,13 @@ float4 DrawPixels(
                 float3 viewDir = normalize(in.worldPos - uniforms.cameraPosition);
                 
                 if (uniforms.isNormalMapPreview) {
-                    float3 n = transformNormal(nmap, in.normal, in.tangent,
+                    half4 nmapH = toHalf(nmap);
+                   
+                    half3 n = transformNormal(nmapH, in.normal, in.tangent,
+                                               in.worldPos, in.texCoord, // to build TBN
                                                uniforms.isNormalMapSwizzleAGToRG, uniforms.isNormalMapSigned, facing);
                     
-                    c = doLighting(c, viewDir, n);
+                    c = doLighting(c, viewDir, toFloat(n));
                 }
                 else {
                     c = doLighting(c, viewDir, toFloat(in.normal));

From 05d1be9a17afd71b9bd63fc4f68faf11a422bed5 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 5 Jun 2021 13:58:10 -0700
Subject: [PATCH 112/901] kramv - contrast grid on white, helps with gray
 images and show alpha

---
 kramv/KramShaders.metal | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index a0847c63..92e5e65f 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -885,7 +885,17 @@ float4 DrawPixels(
             // Just visualize the grid lines directly
             float lineIntensity = 1.0 - min(line, 1.0);
             
-            c.rgb = float3(lineIntensity) + (1.0 - lineIntensity) * c.rgb;
+            // determine proximity of white color to pixel
+            // and ensure contrast on this blend
+            float cDist = distance(float3(1.0), c.rgb);
+            
+            float lineColor = 1.0;
+            if (cDist < 0.2) {
+                lineColor = 0.5;
+            }
+            
+            c.rgb = mix(c.rgb, float3(lineColor), lineIntensity);
+            
             // nothing for alpha?
         }
     }

From 020de93fad6cd85746f60a3edac2364a1a3a86ed Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 5 Jun 2021 15:41:22 -0700
Subject: [PATCH 113/901] kram - fix png to ktx creation

reserveImageData() needs initMipLevels() called prior.
---
 libkram/kram/Kram.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index bd8a03d4..5a02ec12 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -167,7 +167,10 @@ bool KTXImageData::openPNG(const char* filename, bool isSrgb, KTXImage& image) {
     
     // TODO: png has 16u format useful for heights
     
+    image.initMipLevels(sizeof(KTXHeader)); // TODO: could also make this ktx2 with zstd compress
     image.reserveImageData();
+    memcpy((uint8_t*)image.fileData, &image.header, sizeof(KTXHeader));
+    
     memcpy((uint8_t*)image.fileData + image.mipLevels[0].offset, singleImage.pixels().data(), image.levelLength(0));
     
     return true;

From 5c2ff4bc70d8a952482795418c3038ee48e29e3a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 6 Jun 2021 12:12:13 -0700
Subject: [PATCH 114/901] kramv - fix sphere prim, add mirrored sphere, disable
 tan from normal

The author of tan-from-normal removed my inquiry about the algorithm and mirrored uv, so I implemented a mirrored uv test case with a sphere.  Note that the triangles aren't mirrored, only the uvs.  So the normals are flipped, but not the triangles.  Will add that other case later.

Tangent generation can handle that case, since it can compute correct normal/tangent and flip them.  But the tan-from-normal may need the faces inverted as well (and the normals flipped).

Had to rotate the sphere to match the cube/capsule.  It needed pos and normals rotated.  Now lighting is consistent.
Turned off specular in shader for now.
Attenuate diffuse off dot(vertNormal, bumpNormal).  So dark side doesn't have diffuse.
---
 kramv/KramRenderer.mm   | 136 ++++++++++++++++++++++++++++++++++++++--
 kramv/KramShaders.metal |  43 +++++++++----
 kramv/KramViewerBase.h  |   2 +-
 3 files changed, 165 insertions(+), 16 deletions(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 04cdec54..60e3689c 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -88,6 +88,7 @@ @implementation Renderer
     //MTKMesh *_meshPlane; // really a thin gox
     MTKMesh *_meshBox;
     MTKMesh *_meshSphere;
+    MTKMesh *_meshSphereMirrored;
     //MTKMesh *_meshCylinder;
     MTKMesh *_meshCapsule;
     MTKMeshBufferAllocator *_metalAllocator;
@@ -415,10 +416,10 @@ - (MTKMesh*)_createMeshAsset:(const char*)name mdlMesh:(MDLMesh*)mdlMesh doFlipU
     if (doFlipUV)
     {
         id<MDLMeshBuffer> uvs = mdlMesh.vertexBuffers[BufferIndexMeshUV0];
-        float2* uvData = (float2*)uvs.map.bytes;
+        packed_float2* uvData = (packed_float2*)uvs.map.bytes;
     
         for (uint32_t i = 0; i < mdlMesh.vertexCount; ++i) {
-            float2& uv = uvData[i];
+            auto& uv = uvData[i];
             
             uv.x = 1.0f - uv.x;
         }
@@ -433,9 +434,14 @@ - (MTKMesh*)_createMeshAsset:(const char*)name mdlMesh:(MDLMesh*)mdlMesh doFlipU
     if (doFlipBitangent)
     {
         id<MDLMeshBuffer> uvs = mdlMesh.vertexBuffers[BufferIndexMeshTangent];
-        float4* uvData = (float4*)uvs.map.bytes;
+        packed_float4* uvData = (packed_float4*)uvs.map.bytes;
         
         for (uint32_t i = 0; i < mdlMesh.vertexCount; ++i) {
+            if (uvData[i].w != -1.0f && uvData[i].w != 1.0f) {
+                int bp = 0;
+                bp = bp;
+            }
+            
             uvData[i].w = -uvData[i].w;
         }
     }
@@ -458,6 +464,11 @@ - (MTKMesh*)_createMeshAsset:(const char*)name mdlMesh:(MDLMesh*)mdlMesh doFlipU
     return mesh;
 }
 
+// why isn't this defined in simd lib?
+struct packed_float3 {
+    float x,y,z;
+};
+
 - (void)_loadAssets
 {
     /// Load assets into metal objects
@@ -481,8 +492,124 @@ - (void)_loadAssets
     // All prims are viewed with +Y, not +Z up
     
     mdlMesh = [MDLMesh newEllipsoidWithRadii:(vector_float3){0.5, 0.5, 0.5} radialSegments:16 verticalSegments:16 geometryType:MDLGeometryTypeTriangles inwardNormals:NO hemisphere:NO allocator:_metalAllocator];
+
+    float angle = M_PI * 0.5; // TODO: + or -
+    float2 cosSin = float2m(cos(angle), sin(angle));
+    
+    {
+        mdlMesh.vertexDescriptor = _mdlVertexDescriptor;
+        
+        id<MDLMeshBuffer> pos = mdlMesh.vertexBuffers[BufferIndexMeshPosition];
+        packed_float3* posData = (packed_float3*)pos.map.bytes;
+        
+        id<MDLMeshBuffer> normals = mdlMesh.vertexBuffers[BufferIndexMeshNormal];
+        packed_float3* normalData = (packed_float3*)normals.map.bytes;
+        
+        // vertexCount reports 306, but vertex 289+ are garbage
+        uint32_t numVertices = 289; // mdlMesh.vertexCount
+        
+        for (uint32_t i = 0; i < numVertices; ++i) {
+            {
+                auto& pos = posData[i];
+            
+                // dumb rotate about Y-axis
+                auto copy = pos;
+                
+                pos.x = copy.x * cosSin.x - copy.z * cosSin.y;
+                pos.z = copy.x * cosSin.y + copy.z * cosSin.x;
+            }
+            
+            {
+                auto& normal = normalData[i];
+                auto copy = normal;
+                normal.x = copy.x * cosSin.x - copy.z * cosSin.y;
+                normal.z = copy.x * cosSin.y + copy.z * cosSin.x;
+            }
+        }
+            
+    }
     
     _meshSphere = [self _createMeshAsset:"MeshSphere" mdlMesh:mdlMesh doFlipUV:true];
+       
+    
+    mdlMesh = [MDLMesh newEllipsoidWithRadii:(vector_float3){0.5, 0.5, 0.5} radialSegments:16 verticalSegments:16 geometryType:MDLGeometryTypeTriangles inwardNormals:NO hemisphere:NO allocator:_metalAllocator];
+    
+    
+    // ModelIO has the uv going counterclockwise on sphere/cylinder, but not on the box.
+    // And it also has a flipped bitangent.w.
+    
+    // flip the u coordinate
+    bool doFlipUV = true;
+    if (doFlipUV)
+    {
+        mdlMesh.vertexDescriptor = _mdlVertexDescriptor;
+        
+        id<MDLMeshBuffer> uvs = mdlMesh.vertexBuffers[BufferIndexMeshUV0];
+        packed_float2* uvData = (packed_float2*)uvs.map.bytes;
+    
+        // this is all aos
+        
+        id<MDLMeshBuffer> pos = mdlMesh.vertexBuffers[BufferIndexMeshPosition];
+        packed_float3* posData = (packed_float3*)pos.map.bytes;
+        
+        id<MDLMeshBuffer> normals = mdlMesh.vertexBuffers[BufferIndexMeshNormal];
+        packed_float3* normalData = (packed_float3*)normals.map.bytes;
+        
+        
+        // vertexCount reports 306, but vertex 289+ are garbage
+        uint32_t numVertices = 289; // mdlMesh.vertexCount
+        
+        for (uint32_t i = 0; i < numVertices; ++i) {
+            {
+                auto& pos = posData[i];
+                
+                // dumb rotate about Y-axis
+                auto copy = pos;
+                pos.x = copy.x * cosSin.x - copy.z * cosSin.y;
+                pos.z = copy.x * cosSin.y + copy.z * cosSin.x;
+            }
+            
+            {
+                auto& normal = normalData[i];
+                auto copy = normal;
+                normal.x = copy.x * cosSin.x - copy.z * cosSin.y;
+                normal.z = copy.x * cosSin.y + copy.z * cosSin.x;
+            }
+            
+            auto& uv = uvData[i];
+        
+            if (uv.x < 0.0 || uv.x > 1.0) {
+                int bp = 0;
+                bp = bp;
+            }
+            
+            // this makes it counterclockwise 0 to 1
+            float x = uv.x;
+            
+            x = 1.0f - x;
+            
+            // -1 to 1 counterclockwise
+            x = 2.0f * x - 1.0f;
+
+            if (x <= 0) {
+                // now -1 to 0 is 0 to 1 clockwise with 1 in back
+                x = 1.0f + x;
+            }
+            else {
+                // 0 to 1, now 1 to 0 with 1 in back
+                x = 1.0f - x;
+            }
+            
+            uv.x = x;
+        }
+        
+        // TODO: may need to flip tangent on the inverted side
+        // otherwise lighting is just wrong, but tangents generated in _createMeshAsset
+        // move that here, and flip the tangents in the loop
+    }
+        
+    _meshSphereMirrored = [self _createMeshAsset:"MeshSphereMirrored" mdlMesh:mdlMesh doFlipUV:false];
+    
     
 // this maps 1/3rd of texture to the caps, and just isn't a very good uv mapping, using capsule nistead
 //    mdlMesh = [MDLMesh newCylinderWithHeight:1.0
@@ -858,8 +985,9 @@ - (void)_updateGameState
         case 0: _mesh = _meshBox; _showSettings->is3DView = false; break;
         case 1: _mesh = _meshBox; break;
         case 2: _mesh = _meshSphere; break;
+        case 3: _mesh = _meshSphereMirrored; break;
         //case 3: _mesh = _meshCylinder; break;
-        case 3: _mesh = _meshCapsule; break;
+        case 4: _mesh = _meshCapsule; break;
     }
     uniforms.is3DView = _showSettings->is3DView;
    
diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index 92e5e65f..f3c47105 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -6,8 +6,10 @@
 
 using namespace metal;
 
-// whether to use model tangents or generate from normal in fragment shader
-constant bool useTangent = false;
+// Whether to use model tangents (true) or generate tangents from normal in fragment shader (false).
+// When set false, the algorithm doesn't adjust for mirrored uv
+// See meshSphereMirrored and set this to false.
+constant bool useTangent = true;
 
 //---------------------------------
 // helpers
@@ -193,7 +195,7 @@ half3 toNormal(half3 n)
 // Then transforms the bumpNormal to that space.  No tangent is needed.
 // The downside is this must all be fp32, and all done in fragment shader and use derivatives.
 // Derivatives are known to be caclulated differently depending on hw and different precision.
-half3 transformNormalByBasis(half3 vertexNormal, half3 bumpNormal, float3 worldPos, float2 uv)
+half3 transformNormalByBasis(half3 bumpNormal, half3 vertexNormal, float3 worldPos, float2 uv)
 {
     float3 N = toFloat(vertexNormal);
     
@@ -220,7 +222,8 @@ half3 transformNormalByBasis(half3 vertexNormal, half3 bumpNormal, float3 worldP
     
     // construct a scale-invariant frame
     // drop to half to match other call
-    bumpNormal = half3x3(toHalf(T), toHalf(B), vertexNormal) * bumpNormal;
+    bumpNormal = toHalf(float3x3(T, B, N) * toFloat(bumpNormal));
+    
     return bumpNormal;
 }
 
@@ -233,15 +236,23 @@ half3 transformNormalByBasis(half3 bumpNormal, half4 tangent, half3 vertexNormal
     // Reconstruct bitan in frag shader
     // https://bgolus.medium.com/generating-perfect-normal-maps-for-unity-f929e673fc57
 
+    
+    // so if eyevector
+    
+    
+    // TODO: there's facing too, could be inside model
+    
+    
     half bitangentSign = tangent.w;
+    half3 bitangent =  bitangentSign * cross(vertexNormal, tangent.xyz);
     
+   
     // ModelIO not generating correct bitan sign
     // DONE: flip this on srcData, and not here
     //bitangentSign = -bitangentSign;
     
     // now transform by basis and normalize from any shearing, and since interpolated basis vectors
     // are not normalized
-    half3 bitangent =  bitangentSign * cross(vertexNormal, tangent.xyz);
     half3x3 tbn = half3x3(tangent.xyz, bitangent, vertexNormal);
     bumpNormal = tbn * bumpNormal;
     return normalize(bumpNormal);
@@ -552,19 +563,20 @@ vertex ColorInOut DrawVolumeVS(
     return out;
 }
 
-float4 doLighting(float4 albedo, float3 viewDir, float3 n) {
+float4 doLighting(float4 albedo, float3 viewDir, float3 n, float3 vertexNormal) {
     
-    float3 lightDir = normalize(float3(1,1,1));
+    float3 lightDir = normalize(float3(1,1,1)); // looking down -Z axis
     float3 lightColor = float3(1,1,1);
 
     float3 specular = float3(0.0);
     float3 diffuse = float3(0.0);
     float3 ambient = float3(0.0);
     
-    bool doSpecular = true;
+    bool doSpecular = false; // this is a bit too bright, and can confuse
     bool doDiffuse = true;
     bool doAmbient = true;
     
+    
     if (doSpecular) {
         float3 ref = normalize(reflect(viewDir, n));
         
@@ -575,11 +587,20 @@ float4 doLighting(float4 albedo, float3 viewDir, float3 n) {
     }
 
     if (doDiffuse) {
+        
         float dotNL = saturate(dot(n, lightDir));
+        
+        // soften the terminator off the vertNormal
+        // this is so no diffuse if normal completely off from vertex normal
+        // also limiting diffuse lighting bump to lighting by vertex normal
+        float dotVertex = saturate(dot(vertexNormal, n));
+        dotNL *= saturate(9.0 * dotVertex);
+        
         diffuse = dotNL * lightColor.rgb;
     }
     
     if (doAmbient) {
+        // can misconstrue as diffuse with this, but make dark side not look flat
         float dotNLUnsat = dot(n, lightDir);
         ambient = mix(0.1, 0.3, saturate(dotNLUnsat * 0.5 + 0.5));
     }
@@ -649,7 +670,7 @@ float4 DrawPixels(
             
             
             float3 viewDir = normalize(in.worldPos - uniforms.cameraPosition);
-            c = doLighting(float4(1.0), viewDir, toFloat(n));
+            c = doLighting(float4(1.0), viewDir, toFloat(n), toFloat(in.normal));
 
             c.a = 1;
         }
@@ -668,10 +689,10 @@ float4 DrawPixels(
                                                in.worldPos, in.texCoord, // to build TBN
                                                uniforms.isNormalMapSwizzleAGToRG, uniforms.isNormalMapSigned, facing);
                     
-                    c = doLighting(c, viewDir, toFloat(n));
+                    c = doLighting(c, viewDir, toFloat(n), toFloat(in.normal));
                 }
                 else {
-                    c = doLighting(c, viewDir, toFloat(in.normal));
+                    c = doLighting(c, viewDir, toFloat(in.normal), toFloat(in.normal));
                 }
             }
             
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index d0d5a2ac..68aa2d12 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -176,7 +176,7 @@ class ShowSettings {
     double lastTimestamp = 0.0;
     
     int32_t meshNumber = 0;
-    int32_t meshCount = 4;
+    int32_t meshCount = 5;
 };
 
 float4x4 matrix4x4_translation(float tx, float ty, float tz);

From f05b90fc5cca1ba41303e7a0c2c78ec6de7941c3 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 6 Jun 2021 14:25:11 -0700
Subject: [PATCH 115/901] kramv - add shape/mesh channel viewing

Can look at depth, basis, or uv channels of the mesh visually.   These are always hard to interpret since they're just rgb unorm intensity, but better than nothing for now.
---
 kramv/KramRenderer.mm    | 35 +++++++++++++++++++++++--------
 kramv/KramShaders.h      | 21 ++++++++++++++++++-
 kramv/KramShaders.metal  | 34 +++++++++++++++++++++++++-----
 kramv/KramViewerBase.cpp | 45 +++++++++++++++++++++++++++++-----------
 kramv/KramViewerBase.h   | 23 ++++++++++++++++++--
 kramv/KramViewerMain.mm  | 31 +++++++++++++++++++++++----
 6 files changed, 156 insertions(+), 33 deletions(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 60e3689c..796f2933 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -416,7 +416,9 @@ - (MTKMesh*)_createMeshAsset:(const char*)name mdlMesh:(MDLMesh*)mdlMesh doFlipU
     if (doFlipUV)
     {
         id<MDLMeshBuffer> uvs = mdlMesh.vertexBuffers[BufferIndexMeshUV0];
-        packed_float2* uvData = (packed_float2*)uvs.map.bytes;
+        MDLMeshBufferMap *uvsMap = [uvs map];
+        
+        packed_float2* uvData = (packed_float2*)uvsMap.bytes;
     
         for (uint32_t i = 0; i < mdlMesh.vertexCount; ++i) {
             auto& uv = uvData[i];
@@ -434,7 +436,8 @@ - (MTKMesh*)_createMeshAsset:(const char*)name mdlMesh:(MDLMesh*)mdlMesh doFlipU
     if (doFlipBitangent)
     {
         id<MDLMeshBuffer> uvs = mdlMesh.vertexBuffers[BufferIndexMeshTangent];
-        packed_float4* uvData = (packed_float4*)uvs.map.bytes;
+        MDLMeshBufferMap *uvsMap = [uvs map];
+        packed_float4* uvData = (packed_float4*)uvsMap.bytes;
         
         for (uint32_t i = 0; i < mdlMesh.vertexCount; ++i) {
             if (uvData[i].w != -1.0f && uvData[i].w != 1.0f) {
@@ -493,17 +496,19 @@ - (void)_loadAssets
     
     mdlMesh = [MDLMesh newEllipsoidWithRadii:(vector_float3){0.5, 0.5, 0.5} radialSegments:16 verticalSegments:16 geometryType:MDLGeometryTypeTriangles inwardNormals:NO hemisphere:NO allocator:_metalAllocator];
 
-    float angle = M_PI * 0.5; // TODO: + or -
+    float angle = M_PI * 0.5;
     float2 cosSin = float2m(cos(angle), sin(angle));
     
     {
         mdlMesh.vertexDescriptor = _mdlVertexDescriptor;
         
         id<MDLMeshBuffer> pos = mdlMesh.vertexBuffers[BufferIndexMeshPosition];
-        packed_float3* posData = (packed_float3*)pos.map.bytes;
+        MDLMeshBufferMap *posMap = [pos map];
+        packed_float3* posData = (packed_float3*)posMap.bytes;
         
         id<MDLMeshBuffer> normals = mdlMesh.vertexBuffers[BufferIndexMeshNormal];
-        packed_float3* normalData = (packed_float3*)normals.map.bytes;
+        MDLMeshBufferMap *normalsMap = [normals map];
+        packed_float3* normalData = (packed_float3*)normalsMap.bytes;
         
         // vertexCount reports 306, but vertex 289+ are garbage
         uint32_t numVertices = 289; // mdlMesh.vertexCount
@@ -545,15 +550,18 @@ - (void)_loadAssets
         mdlMesh.vertexDescriptor = _mdlVertexDescriptor;
         
         id<MDLMeshBuffer> uvs = mdlMesh.vertexBuffers[BufferIndexMeshUV0];
-        packed_float2* uvData = (packed_float2*)uvs.map.bytes;
+        MDLMeshBufferMap *uvsMap = [uvs map];
+        packed_float2* uvData = (packed_float2*)uvsMap.bytes;
     
         // this is all aos
         
         id<MDLMeshBuffer> pos = mdlMesh.vertexBuffers[BufferIndexMeshPosition];
-        packed_float3* posData = (packed_float3*)pos.map.bytes;
+        MDLMeshBufferMap *posMap = [pos map];
+        packed_float3* posData = (packed_float3*)posMap.bytes;
         
         id<MDLMeshBuffer> normals = mdlMesh.vertexBuffers[BufferIndexMeshNormal];
-        packed_float3* normalData = (packed_float3*)normals.map.bytes;
+        MDLMeshBufferMap *normalsMap = [normals map];
+        packed_float3* normalData = (packed_float3*)normalsMap.bytes;
         
         
         // vertexCount reports 306, but vertex 289+ are garbage
@@ -859,6 +867,8 @@ - (BOOL)loadTextureImpl:(const string&)fullFilename isTextureChanged:(BOOL)isTex
     // be supported debugMode for new texture
     _showSettings->debugMode = DebugMode::DebugModeNone;
     
+    _showSettings->shapeChannel = ShapeChannel::ShapeChannelNone;
+    
     // have one of these for each texture added to the viewer
     float scaleX = MAX(1, texture.width);
     float scaleY = MAX(1, texture.height);
@@ -976,9 +986,16 @@ - (void)_updateGameState
     }
     
     // no debug mode when preview kicks on, make it possible to toggle back and forth more easily
-    uniforms.debugMode = _showSettings->isPreview ? ShaderDebugMode::ShDebugModeNone : (ShaderDebugMode)_showSettings->debugMode;
+    uniforms.debugMode = (ShaderDebugMode)_showSettings->debugMode;
+    uniforms.shapeChannel = (ShaderShapeChannel)_showSettings->shapeChannel;
     uniforms.channels = (ShaderTextureChannels)_showSettings->channels;
 
+    // turn these off in preview mode, but they may be useful?
+    if (_showSettings->isPreview) {
+        uniforms.debugMode = ShaderDebugMode::ShDebugModeNone;
+        uniforms.shapeChannel = ShaderShapeChannel::ShShapeChannelNone;
+    }
+   
     // crude shape experiment
     _showSettings->is3DView = true;
     switch(_showSettings->meshNumber) {
diff --git a/kramv/KramShaders.h b/kramv/KramShaders.h
index fd8bb48e..a556dfc8 100644
--- a/kramv/KramShaders.h
+++ b/kramv/KramShaders.h
@@ -92,6 +92,21 @@ typedef NS_ENUM(int32_t, ShaderDebugMode)
     ShDebugModeCount
 };
 
+// keep in sync with enum ShapeChannel
+typedef NS_ENUM(int32_t, ShaderShapeChannel)
+{
+    ShShapeChannelNone = 0,
+
+    ShShapeChannelDepth,
+    
+    ShShapeChannelUV0,
+    
+    ShShapeChannelNormal,
+    ShShapeChannelTangent,
+    ShShapeChannelBitangent
+};
+
+
 // TODO: placement of these elements in the struct breaks transfer
 // of data. This seems to work.  Alignment issues with mixing these differently.
 struct Uniforms
@@ -123,9 +138,13 @@ struct Uniforms
     uint32_t gridX;
     uint32_t gridY;
     
-    // can look at pixels that meet criteria of the debugMode
+    // View pixels that meet criteria of the debugMode
     ShaderDebugMode debugMode;
     
+    // View various aspects of shape geometry (depth, normal, tangent, ...)
+    ShaderShapeChannel shapeChannel;
+    
+    // View the r,g,b,a channels of the texture
     ShaderTextureChannels channels; // mask
 };
 
diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index f3c47105..23e84eef 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -9,6 +9,7 @@ using namespace metal;
 // Whether to use model tangents (true) or generate tangents from normal in fragment shader (false).
 // When set false, the algorithm doesn't adjust for mirrored uv
 // See meshSphereMirrored and set this to false.
+// TODO: hook this up to uniform and pass into calls
 constant bool useTangent = true;
 
 //---------------------------------
@@ -329,8 +330,8 @@ half3 transformNormal(half4 nmap, half3 vertexNormal, half4 tangent,
 void skinPosAndBasis(thread float4& position, thread float3& tangent, thread float3& normal,
                      uint4 indices, float4 weights, float3x4 bones[maxBones])
 {
-    // TODO: might do this as up to 12x vtex lookup, fetch from buffer texture
-    // but uniforms after setup would be faster if many bones
+    // TODO: might do this as up to 3x vtex lookup per bone, fetch from buffer texture
+    // but uniforms after setup would be faster if many bones.  Could support 1-n bones with vtex.
     // instances use same bones, but different indices/weights already
     // but could draw skinned variants with vtex lookup and not have so much upload prep
     
@@ -576,7 +577,6 @@ float4 doLighting(float4 albedo, float3 viewDir, float3 n, float3 vertexNormal)
     bool doDiffuse = true;
     bool doAmbient = true;
     
-    
     if (doSpecular) {
         float3 ref = normalize(reflect(viewDir, n));
         
@@ -763,6 +763,30 @@ float4 DrawPixels(
         }
     }
     
+    if (uniforms.shapeChannel != ShShapeChannelNone) {
+        // TODO: Really hard to interpret direction from color
+        // see about use the vector flow fields
+        
+        if (uniforms.shapeChannel == ShShapeChannelUV0) {
+            c.rgb = fract(in.texCoordXYZ);
+        }
+        else if (uniforms.shapeChannel == ShShapeChannelNormal) {
+            c.rgb = toUnorm(toFloat(in.normal));
+        }
+        else if (useTangent && uniforms.shapeChannel == ShShapeChannelTangent) {
+            // TODO: make this work with useTangent = false
+            c.rgb = toUnorm(toFloat(in.tangent.xyz));
+        }
+        else if (uniforms.shapeChannel == ShShapeChannelBitangent) {
+            // TODO: make this work with useTangent = false
+            half3 bitangent = cross(in.tangent.xyz, in.normal) * in.tangent.w;
+            c.rgb = toUnorm(toFloat(bitangent));
+        }
+        else if (uniforms.shapeChannel == ShShapeChannelDepth) {
+            c.rgb = saturate(in.position.z / in.position.w);
+        }
+    }
+    
     // mask to see one channel in isolation, this is really 0'ing out other channels
     // would be nice to be able to keep this set on each channel independently.
     switch(uniforms.channels)
@@ -798,11 +822,11 @@ float4 DrawPixels(
         float selector = sign(fmod(checker.x + checker.y, 2.0));
         float cb = mix(float(1), float(222.0/255.0), selector);
         
-        c.rgb = c.rgb + (1-c.a) * cb;
+        c.rgb = c.rgb + (1.0 - c.a) * cb;
         // nothing for alpha?
     }
 
-    
+   
     
     if (uniforms.debugMode != ShDebugModeNone && c.a != 0.0) {
         
diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index e241bd5d..a5c5c032 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -10,15 +10,36 @@ int32_t ShowSettings::totalChunks() const {
     return std::max(one, faceCount) * std::max(one, arrayCount) * std::max(one, sliceCount);
 }
 
-void ShowSettings::advanceDebugMode(bool isShiftKeyDown) {
+void ShowSettings::advanceShapeChannel(bool decrement) {
+    int32_t numEnums = ShapeChannelCount;
+    int32_t mode = shapeChannel;
+    if (decrement) {
+        mode += numEnums - 1;
+    }
+    else {
+        mode += 1;
+    }
+    
+    shapeChannel = (ShapeChannel)(mode % numEnums);
+    
+    // skip this channel for now, in ortho it's mostly pure white
+    if (shapeChannel == ShapeChannelDepth) {
+        advanceShapeChannel(decrement);
+    }
+}
+    
+void ShowSettings::advanceDebugMode(bool decrement) {
     int32_t numEnums = DebugModeCount;
-    if (isShiftKeyDown) {
-        debugMode = (DebugMode)(((int32_t)debugMode - 1 + numEnums) % numEnums);
+    int32_t mode = debugMode;
+    if (decrement) {
+        mode += numEnums - 1;
     }
     else {
-        debugMode = (DebugMode)(((int32_t)debugMode + 1) % numEnums);
+        mode += 1;
     }
     
+    debugMode = (DebugMode)(mode % numEnums);
+    
     MyMTLPixelFormat format = (MyMTLPixelFormat)originalFormat;
     bool isHdr = isHdrFormat(format);
     
@@ -27,20 +48,20 @@ void ShowSettings::advanceDebugMode(bool isShiftKeyDown) {
     bool isColor = isColorFormat(format);
     
     if (debugMode == DebugModeTransparent && (numChannels <= 3 || !isAlpha)) {
-        advanceDebugMode(isShiftKeyDown);
+        advanceDebugMode(decrement);
     }
     
-    // 2 channel textures don't really color or grayscale pixels
+    // 2 channel textures don't really have color or grayscale pixels
     if (debugMode == DebugModeColor && (numChannels <= 2 || !isColor)) {
-        advanceDebugMode(isShiftKeyDown);
+        advanceDebugMode(decrement);
     }
     
     if (debugMode == DebugModeGray && numChannels <= 2) {
-        advanceDebugMode(isShiftKeyDown);
+        advanceDebugMode(decrement);
     }
     
     if (debugMode == DebugModeHDR && !isHdr) {
-        advanceDebugMode(isShiftKeyDown);
+        advanceDebugMode(decrement);
     }
     
     // for 3 and for channel textures could skip these with more info about image (hasColor)
@@ -48,13 +69,13 @@ void ShowSettings::advanceDebugMode(bool isShiftKeyDown) {
 
     // for normals show directions
     if (debugMode == DebugModePosX && !(isNormal || isSDF)) {
-        advanceDebugMode(isShiftKeyDown);
+        advanceDebugMode(decrement);
     }
     if (debugMode == DebugModePosY && !(isNormal)) {
-        advanceDebugMode(isShiftKeyDown);
+        advanceDebugMode(decrement);
     }
     if (debugMode == DebugModeCircleXY && !(isNormal)) {
-        advanceDebugMode(isShiftKeyDown);
+        advanceDebugMode(decrement);
     }
     
     // TODO: have a clipping mode against a variable range too, only show pixels within that range
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index 68aa2d12..b8eaed05 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -48,6 +48,22 @@ enum DebugMode
     DebugModeCount
 };
 
+enum ShapeChannel
+{
+    ShapeChannelNone = 0,
+    
+    ShapeChannelDepth,
+    
+    ShapeChannelUV0,
+    
+    ShapeChannelNormal,
+    ShapeChannelTangent,
+    ShapeChannelBitangent,
+    
+    ShapeChannelCount
+};
+
+
 class ShowSettings {
 public:
     // Can mask various channels (r/g/b/a only, vs. all), may also add toggle of channel
@@ -160,6 +176,8 @@ class ShowSettings {
     
     DebugMode debugMode = DebugModeNone;
     
+    ShapeChannel shapeChannel = ShapeChannelNone;
+    
     float4x4 projectionViewModelMatrix;
     
     // cached on load, raw info about the texture from libkram
@@ -170,8 +188,9 @@ class ShowSettings {
     MyMTLPixelFormat originalFormat;
     MyMTLPixelFormat decodedFormat;
     
-    void advanceDebugMode(bool isShiftKeyDown);
-    
+    void advanceDebugMode(bool decrement);
+    void advanceShapeChannel(bool decrement);
+
     string lastFilename;
     double lastTimestamp = 0.0;
     
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index d8a15a45..f7f833cf 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -498,7 +498,7 @@ - (nonnull ShowSettings*)showSettings {
 }
 
 - (NSStackView*)_addButtons {
-    const int32_t numButtons = 26; // 13;
+    const int32_t numButtons = 27; // 13;
     const char* names[numButtons*2] = {
         
         "?", "Help",
@@ -527,6 +527,7 @@ - (NSStackView*)_addButtons {
         "L", "Reload",
         "0", "Fit",
         "8", "Shape",
+        "6", "Shape Channel",
         
         // TODO: need to shift hud over a little
         // "UI", - add to show/hide buttons
@@ -1378,6 +1379,7 @@ - (void)updateUIControlState
     auto faceState = toState(_showSettings->faceNumber > 0);
     auto mipState = toState(_showSettings->mipLOD > 0);
     auto meshState = toState(_showSettings->meshNumber > 0);
+    auto meshChannelState = toState(_showSettings->shapeChannel > 0); // TODO: rename to meshChannel
     
     // TODO: UI state, and vertical state
     auto uiState = toState(_buttonStack.hidden);
@@ -1406,6 +1408,7 @@ - (void)updateUIControlState
     [self findButton:"S"].state = showAllState;
     [self findButton:"O"].state = previewState;
     [self findButton:"8"].state = meshState;
+    [self findButton:"6"].state = meshChannelState;
     [self findButton:"W"].state = wrapState;
     [self findButton:"D"].state = gridState;
     [self findButton:"E"].state = debugState;
@@ -1436,6 +1439,8 @@ - (void)updateUIControlState
     [self findMenuItem:"S"].state = showAllState;
     [self findMenuItem:"O"].state = previewState;
     [self findMenuItem:"8"].state = meshState;
+    [self findMenuItem:"6"].state = meshChannelState;
+    
     [self findMenuItem:"W"].state = wrapState;
     [self findMenuItem:"D"].state = gridState;
     [self findMenuItem:"E"].state = debugState;
@@ -1509,11 +1514,14 @@ - (IBAction)handleAction:(id)sender {
         keyCode = Key::J;
     else if (title == "L")
         keyCode = Key::L;
+    
     else if (title == "0")
         keyCode = Key::Num0;
     else if (title == "8")
         keyCode = Key::Num8;
-    
+    else if (title == "6")
+        keyCode = Key::Num6;
+   
     else if (title == "R")
         keyCode = Key::R;
     else if (title == "G")
@@ -1522,8 +1530,7 @@ - (IBAction)handleAction:(id)sender {
         keyCode = Key::B;
     else if (title == "A")
         keyCode = Key::A;
-    
-    
+   
     if (keyCode >= 0)
         [self handleKey:keyCode isShiftKeyDown:isShiftKeyDown];
 }
@@ -1648,6 +1655,22 @@ - (bool)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
             }
             break;
             
+        case Key::Num6: {
+            _showSettings->advanceShapeChannel(isShiftKeyDown);
+            
+            switch(_showSettings->shapeChannel) {
+                case ShapeChannelNone: text = "Show Off"; break;
+                case ShapeChannelUV0: text = "Show UV0"; break;
+                case ShapeChannelNormal: text = "Show Normal"; break;
+                case ShapeChannelTangent: text = "Show Tangent"; break;
+                case ShapeChannelBitangent: text = "Show Bitangent"; break;
+                case ShapeChannelDepth: text = "Show Depth"; break;
+                default: break;
+            }
+            
+            isChanged = true;
+            break;
+        }
         case Key::E: {
             _showSettings->advanceDebugMode(isShiftKeyDown);
             

From 8a81fb8da45e0fb4fd42672688a7e6c647d90518 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 6 Jun 2021 20:56:28 -0700
Subject: [PATCH 116/901] kramv - more shape and sampler cleanup, pass
 useTangent in shaders, more shape channel modes

---
 kramv/KramRenderer.mm    | 97 +++++++++++++++++++++++++++++++++-------
 kramv/KramShaders.h      |  9 +++-
 kramv/KramShaders.metal  | 63 +++++++++++++++++++-------
 kramv/KramViewerBase.cpp | 36 +++++++++++++++
 kramv/KramViewerBase.h   | 12 ++++-
 kramv/KramViewerMain.mm  | 26 +----------
 6 files changed, 184 insertions(+), 59 deletions(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 796f2933..a48899a8 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -58,11 +58,18 @@ @implementation Renderer
     id <MTLTexture> _colorMap;
     id <MTLTexture> _normalMap;
     
-    id <MTLSamplerState> _colorMapSamplerWrap;
-    id <MTLSamplerState> _colorMapSamplerClamp;
+    // border is a better edge sample, but at edges it filters in the transparent color
+    // around the border which is undesirable.  It would be better if the hw did
+    // clamp to edge until uv outside 0 to 1.  This results in having to inset the uv by 0.5 px
+    // to avoid this artifact, but on small texturs that are 4x4, a 1 px inset is noticeable.
     
-    id <MTLSamplerState> _colorMapSamplerBilinearWrap;
-    id <MTLSamplerState> _colorMapSamplerBilinearClamp;
+    id <MTLSamplerState> _colorMapSamplerNearestWrap;
+    id <MTLSamplerState> _colorMapSamplerNearestBorder;
+    id <MTLSamplerState> _colorMapSamplerNearestEdge;
+    
+    id <MTLSamplerState> _colorMapSamplerFilterWrap;
+    id <MTLSamplerState> _colorMapSamplerFilterBorder;
+    id <MTLSamplerState> _colorMapSamplerFilterEdge;
     
     //id<MTLTexture> _sampleRT;
     id<MTLTexture> _sampleTex;
@@ -128,32 +135,52 @@ - (void)_createSamplers
     samplerDescriptor.sAddressMode = MTLSamplerAddressModeRepeat;
     samplerDescriptor.tAddressMode = MTLSamplerAddressModeRepeat;
     samplerDescriptor.rAddressMode = MTLSamplerAddressModeRepeat;
-    samplerDescriptor.label = @"colorMapSamplerWrap";
+    samplerDescriptor.label = @"colorMapSamplerNearestWrap";
     
-    _colorMapSamplerWrap = [_device newSamplerStateWithDescriptor:samplerDescriptor];
+    _colorMapSamplerNearestWrap = [_device newSamplerStateWithDescriptor:samplerDescriptor];
     
     samplerDescriptor.sAddressMode = MTLSamplerAddressModeClampToBorderColor;
     samplerDescriptor.tAddressMode = MTLSamplerAddressModeClampToBorderColor;
     samplerDescriptor.rAddressMode = MTLSamplerAddressModeClampToBorderColor;
-    samplerDescriptor.label = @"colorMapSamplerClamp";
+    samplerDescriptor.label = @"colorMapSamplerNearestBorder";
    
-    _colorMapSamplerClamp = [_device newSamplerStateWithDescriptor:samplerDescriptor];
+    _colorMapSamplerNearestBorder = [_device newSamplerStateWithDescriptor:samplerDescriptor];
+    
+    samplerDescriptor.sAddressMode = MTLSamplerAddressModeClampToEdge;
+    samplerDescriptor.tAddressMode = MTLSamplerAddressModeClampToEdge;
+    samplerDescriptor.rAddressMode = MTLSamplerAddressModeClampToEdge;
+    samplerDescriptor.label = @"colorMapSamplerNearsetEdge";
+    
+    _colorMapSamplerNearestEdge = [_device newSamplerStateWithDescriptor:samplerDescriptor];
+    
+    // -----
     
     // these are for preview mode
     // use the mips, and specify linear for min/mag for SDF case
     samplerDescriptor.minFilter = MTLSamplerMinMagFilterLinear;
     samplerDescriptor.magFilter = MTLSamplerMinMagFilterLinear;
     samplerDescriptor.mipFilter = MTLSamplerMipFilterLinear;
-    samplerDescriptor.label = @"colorMapSamplerBilinearClamp";
+    
+    samplerDescriptor.sAddressMode = MTLSamplerAddressModeClampToBorderColor;
+    samplerDescriptor.tAddressMode = MTLSamplerAddressModeClampToBorderColor;
+    samplerDescriptor.rAddressMode = MTLSamplerAddressModeClampToBorderColor;
+    samplerDescriptor.label = @"colorMapSamplerFilterBorder";
    
-    _colorMapSamplerBilinearClamp = [_device newSamplerStateWithDescriptor:samplerDescriptor];
+    _colorMapSamplerFilterBorder = [_device newSamplerStateWithDescriptor:samplerDescriptor];
+    
+    samplerDescriptor.sAddressMode = MTLSamplerAddressModeClampToEdge;
+    samplerDescriptor.tAddressMode = MTLSamplerAddressModeClampToEdge;
+    samplerDescriptor.rAddressMode = MTLSamplerAddressModeClampToEdge;
+    samplerDescriptor.label = @"colorMapSamplerFilterEdge";
+   
+    _colorMapSamplerFilterEdge = [_device newSamplerStateWithDescriptor:samplerDescriptor];
     
     samplerDescriptor.sAddressMode = MTLSamplerAddressModeRepeat;
     samplerDescriptor.tAddressMode = MTLSamplerAddressModeRepeat;
     samplerDescriptor.rAddressMode = MTLSamplerAddressModeRepeat;
     samplerDescriptor.label = @"colorMapSamplerBilinearWrap";
     
-    _colorMapSamplerBilinearWrap = [_device newSamplerStateWithDescriptor:samplerDescriptor];
+    _colorMapSamplerFilterWrap = [_device newSamplerStateWithDescriptor:samplerDescriptor];
 }
     
 - (void)_createVertexDescriptor
@@ -449,15 +476,35 @@ - (MTKMesh*)_createMeshAsset:(const char*)name mdlMesh:(MDLMesh*)mdlMesh doFlipU
         }
     }
     
-    // TODO: name the vertex attributes, can that be done in _mdlVertexDescriptor
-    // may have to set name on MTLBuffer range on IB and VB
-    
+   
     // now set it into mtk mesh
     MTKMesh* mesh = [[MTKMesh alloc] initWithMesh:mdlMesh
                                    device:_device
                                     error:&error];
     mesh.name = [NSString stringWithUTF8String:name];
 
+    
+    // these range names may onl show up when looking at geometry in capture
+    // These don't seem to appear as the buffer name that is suballocated from
+    {
+        // name the vertex range on the vb
+        MTKMeshBuffer* pos = mesh.vertexBuffers[BufferIndexMeshPosition];
+        MTKMeshBuffer* uvs = mesh.vertexBuffers[BufferIndexMeshUV0];
+        MTKMeshBuffer* normals = mesh.vertexBuffers[BufferIndexMeshNormal];
+        MTKMeshBuffer* tangents = mesh.vertexBuffers[BufferIndexMeshTangent];
+    
+        [pos.buffer addDebugMarker:@"Pos" range:NSMakeRange(pos.offset, pos.length)];
+        [uvs.buffer addDebugMarker:@"UV" range:NSMakeRange(uvs.offset, uvs.length)];
+        [normals.buffer addDebugMarker:@"Nor" range:NSMakeRange(normals.offset, normals.length)];
+        [tangents.buffer addDebugMarker:@"Tan" range:NSMakeRange(tangents.offset, tangents.length)];
+        
+        // This seems to already be named "ellisoid-Indices",
+        // need to do for ib as well
+        for (MTKSubmesh* submesh in mesh.submeshes) {
+            [submesh.indexBuffer.buffer addDebugMarker:mesh.name range:NSMakeRange(submesh.indexBuffer.offset, submesh.indexBuffer.length)];
+        }
+    }
+    
     if(!mesh || error)
     {
         NSLog(@"Error creating MetalKit mesh %@", error.localizedDescription);
@@ -531,6 +578,12 @@ - (void)_loadAssets
                 normal.z = copy.x * cosSin.y + copy.z * cosSin.x;
             }
         }
+        
+        // Hack - knock out all bogus tris from ModelIO that lead to garbage tris
+        for (uint32_t i = numVertices; i < mdlMesh.vertexCount; ++i) {
+            auto& pos = posData[i];
+            pos.x = NAN;
+        }
             
     }
     
@@ -611,6 +664,12 @@ - (void)_loadAssets
             uv.x = x;
         }
         
+        // Hack - knock out all bogus tris from ModelIO that lead to garbage tris
+        for (uint32_t i = numVertices; i < mdlMesh.vertexCount; ++i) {
+            auto& pos = posData[i];
+            pos.x = NAN;
+        }
+        
         // TODO: may need to flip tangent on the inverted side
         // otherwise lighting is just wrong, but tangents generated in _createMeshAsset
         // move that here, and flip the tangents in the loop
@@ -966,6 +1025,10 @@ - (void)_updateGameState
             uniforms.isNormalMapSwizzleAGToRG = false; // TODO: need a prop for this
         }
     }
+    
+    // TODO: tie to UI
+    // a few things to fix before enabling this
+    uniforms.useTangent = false;
         
     uniforms.gridX = 0;
     uniforms.gridY = 0;
@@ -1272,7 +1335,7 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer view:(nonnull MTKView *)vie
             
             // use exisiting lod, and mip
             [renderEncoder setFragmentSamplerState:
-                                  (canWrap && _showSettings->isWrap) ? _colorMapSamplerBilinearWrap : _colorMapSamplerBilinearClamp
+                                  (canWrap && _showSettings->isWrap) ? _colorMapSamplerFilterWrap : _colorMapSamplerFilterBorder
                                   atIndex:SamplerIndexColor];
             
             for(MTKSubmesh *submesh in _mesh.submeshes)
@@ -1347,7 +1410,7 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer view:(nonnull MTKView *)vie
                     
                     // force lod, and don't mip
                     [renderEncoder setFragmentSamplerState:
-                                          (canWrap && _showSettings->isWrap) ? _colorMapSamplerWrap : _colorMapSamplerClamp
+                                          (canWrap && _showSettings->isWrap) ? _colorMapSamplerNearestWrap : _colorMapSamplerNearestBorder
                                           lodMinClamp:mip
                                           lodMaxClamp:mip + 1
                                           atIndex:SamplerIndexColor];
@@ -1384,7 +1447,7 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer view:(nonnull MTKView *)vie
             
             // force lod, and don't mip
             [renderEncoder setFragmentSamplerState:
-                                  (canWrap && _showSettings->isWrap) ? _colorMapSamplerWrap : _colorMapSamplerClamp
+                                  (canWrap && _showSettings->isWrap) ? _colorMapSamplerNearestWrap : _colorMapSamplerNearestBorder
                                   lodMinClamp:mip
                                   lodMaxClamp:mip + 1
                                   atIndex:SamplerIndexColor];
diff --git a/kramv/KramShaders.h b/kramv/KramShaders.h
index a556dfc8..33b60c6d 100644
--- a/kramv/KramShaders.h
+++ b/kramv/KramShaders.h
@@ -101,9 +101,13 @@ typedef NS_ENUM(int32_t, ShaderShapeChannel)
     
     ShShapeChannelUV0,
     
+    ShShapeChannelFaceNormal,
+    
     ShShapeChannelNormal,
     ShShapeChannelTangent,
-    ShShapeChannelBitangent
+    ShShapeChannelBitangent,
+    
+    // ShShapeChannelBumpNormal,
 };
 
 
@@ -132,6 +136,9 @@ struct Uniforms
     bool isNormalMapSigned;
     bool isNormalMapSwizzleAGToRG;
     
+    // this means pull tangent from vertex
+    bool useTangent;
+    
     uint32_t numChannels;
     
     // control the pixel grid dimensions, can be block size, or pixel size
diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index 23e84eef..fc49f249 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -6,11 +6,10 @@
 
 using namespace metal;
 
-// Whether to use model tangents (true) or generate tangents from normal in fragment shader (false).
-// When set false, the algorithm doesn't adjust for mirrored uv
-// See meshSphereMirrored and set this to false.
-// TODO: hook this up to uniform and pass into calls
-constant bool useTangent = true;
+// TODO: Getting weird triangle artifacts on AMC 5500m on 16" MBP with useTangent = false.
+// Seems that uv derivatives used for basis generation are 0 in gpu capture
+// even though the uv itself are not.  That shouldn't be possible.
+// This results in large triangular artitfacts at the bottom of the sphere/capsule.
 
 //---------------------------------
 // helpers
@@ -189,6 +188,14 @@ half3 toNormal(half3 n)
     return n;
 }
 
+// This will result in comlier failed XPC_ERROR_CONNECTION_INTERRUPTED
+// was based on forum suggestion.  assert() does nothing in Metal.
+//#define myMetalAssert(x) \
+//    if (!(x)) { \
+//        device float* f = 0; \
+//        *f = 12; \
+//    }
+//#define myMetalAssert(x) assert(x)
 
 // https://www.gamasutra.com/blogs/RobertBasler/20131122/205462/Three_Normal_Mapping_Techniques_Explained_For_the_Mathematically_Uninclined.php?print=1
 // http://www.thetenthplanet.de/archives/1180
@@ -210,6 +217,13 @@ half3 transformNormalByBasis(half3 bumpNormal, half3 vertexNormal, float3 worldP
     float2 duv1 = dfdx(uv);
     float2 duv2 = dfdy(uv);
 
+    // getting non-zere uv with 0 length duv1/2 on MBP 16", this leaves missing bump artifacts
+    // in large triangle error so this is a patch to avoid that.
+    if ((length_squared(duv1) < 1e-12) &&
+        (length_squared(duv2) < 1e-12)) {
+        return vertexNormal;
+    }
+    
     // solve the linear system
     float3 dp2perp = cross(dp2, N);
     float3 dp1perp = cross(N, dp1);
@@ -218,7 +232,7 @@ half3 transformNormalByBasis(half3 bumpNormal, half3 vertexNormal, float3 worldP
     float invmax = rsqrt(max(length_squared(T), length_squared(B)));
     
     // keeps relative magnitude of two vectors, they're not both unit vecs
-    T *= invmax;
+    T *= -invmax; // had to flip this sign to get correct lighting
     B *= invmax;
     
     // construct a scale-invariant frame
@@ -261,6 +275,7 @@ half3 transformNormalByBasis(half3 bumpNormal, half4 tangent, half3 vertexNormal
 
 
 half3 transformNormal(half4 tangent, half3 vertexNormal, float3 worldPos,
+                      bool useTangent,
                       texture2d<half> texture, sampler s, float2 uv, bool isSigned = true)
 {
     half4 nmap = texture.sample(s, uv);
@@ -283,8 +298,8 @@ half3 transformNormal(half4 tangent, half3 vertexNormal, float3 worldPos,
 
 
 half3 transformNormal(half4 nmap, half3 vertexNormal, half4 tangent,
-                      float3 worldPos, float2 uv, // to gen TBN
-                        bool isSwizzleAGToRG, bool isSigned, bool isFrontFacing)
+                      float3 worldPos, float2 uv, bool useTangent, // to gen TBN from normal
+                      bool isSwizzleAGToRG, bool isSigned, bool isFrontFacing)
 {
     // add swizzle for ASTC/BC5nm, other 2 channels format can only store 01 in ba
     // could use hw swizzle for this
@@ -368,7 +383,8 @@ void skinPosAndBasis(thread float4& position, thread float3& tangent, thread flo
     
     normal  = (float4(normal, 0.0)  * bindPoseToBoneTransform);
     
-    if (useTangent)
+    // compiler will deadstrip if tangent unused by caller
+    //if (useTangent)
         tangent = (float4(tangent, 0.0) * bindPoseToBoneTransform);
 }
 
@@ -379,7 +395,7 @@ float3x3 toFloat3x3(float4x4 m)
 
 // this is for vertex shader if tangent supplied
 void transformBasis(thread float3& normal, thread float3& tangent,
-                    float4x4 modelToWorldTfm, float3 invScale2)
+                    float4x4 modelToWorldTfm, float3 invScale2, bool useTangent)
 {
     
     float3x3 m = toFloat3x3(modelToWorldTfm);
@@ -443,7 +459,7 @@ ColorInOut DrawImageFunc(
     if (uniforms.isNormalMapPreview) {
         float3 normal = in.normal;
         float3 tangent = in.tangent.xyz;
-        transformBasis(normal, tangent, uniforms.modelMatrix, uniforms.modelMatrixInvScale2);
+        transformBasis(normal, tangent, uniforms.modelMatrix, uniforms.modelMatrixInvScale2, uniforms.useTangent);
         
         out.normal = toHalf(normal);
         
@@ -665,7 +681,7 @@ float4 DrawPixels(
             half4 nmapH = toHalf(c);
             
             half3 n = transformNormal(nmapH, in.normal, in.tangent,
-                                       in.worldPos, in.texCoord, // to build TBN
+                                       in.worldPos, in.texCoord, uniforms.useTangent, // to build TBN
                                        uniforms.isSwizzleAGToRG, uniforms.isSigned, facing);
             
             
@@ -686,7 +702,7 @@ float4 DrawPixels(
                     half4 nmapH = toHalf(nmap);
                    
                     half3 n = transformNormal(nmapH, in.normal, in.tangent,
-                                               in.worldPos, in.texCoord, // to build TBN
+                                               in.worldPos, in.texCoord, uniforms.useTangent, // to build TBN
                                                uniforms.isNormalMapSwizzleAGToRG, uniforms.isNormalMapSigned, facing);
                     
                     c = doLighting(c, viewDir, toFloat(n), toFloat(in.normal));
@@ -773,18 +789,35 @@ float4 DrawPixels(
         else if (uniforms.shapeChannel == ShShapeChannelNormal) {
             c.rgb = toUnorm(toFloat(in.normal));
         }
-        else if (useTangent && uniforms.shapeChannel == ShShapeChannelTangent) {
+        else if (uniforms.useTangent && uniforms.shapeChannel == ShShapeChannelTangent) {
             // TODO: make this work with useTangent = false
+            // may have to call routine again, or pass back basis
+            
             c.rgb = toUnorm(toFloat(in.tangent.xyz));
         }
         else if (uniforms.shapeChannel == ShShapeChannelBitangent) {
             // TODO: make this work with useTangent = false
+            // may have to call routine again, or pass back basis
+            
             half3 bitangent = cross(in.tangent.xyz, in.normal) * in.tangent.w;
             c.rgb = toUnorm(toFloat(bitangent));
         }
         else if (uniforms.shapeChannel == ShShapeChannelDepth) {
             c.rgb = saturate(in.position.z / in.position.w);
         }
+        else if (uniforms.shapeChannel == ShShapeChannelFaceNormal) {
+            float3 faceNormal = -cross(dfdx(in.worldPos), dfdy(in.worldPos));
+            faceNormal = normalize(faceNormal);
+            
+            // TODO: incorporate facing?
+            
+            c.rgb = saturate(toUnorm(faceNormal));
+        }
+//        else if (uniforms.shapeChannel == ShShapeChannelBumpNormal) {
+//            c.rgb = saturate(bumpNormal);
+//        }
+        
+        c.a = 1.0;
     }
     
     // mask to see one channel in isolation, this is really 0'ing out other channels
@@ -826,8 +859,6 @@ float4 DrawPixels(
         // nothing for alpha?
     }
 
-   
-    
     if (uniforms.debugMode != ShDebugModeNone && c.a != 0.0) {
         
         bool isHighlighted = false;
diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index a5c5c032..643f340d 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -10,6 +10,42 @@ int32_t ShowSettings::totalChunks() const {
     return std::max(one, faceCount) * std::max(one, arrayCount) * std::max(one, sliceCount);
 }
 
+const char* ShowSettings::shapeChannelText() const {
+    const char* text = "";
+    
+    switch(shapeChannel) {
+        case ShapeChannelNone: text = "Show Off"; break;
+        case ShapeChannelUV0: text = "Show UV0"; break;
+        case ShapeChannelNormal: text = "Show Normal"; break;
+        case ShapeChannelTangent: text = "Show Tangent"; break;
+        case ShapeChannelBitangent: text = "Show Bitangent"; break;
+        case ShapeChannelDepth: text = "Show Depth"; break;
+        case ShapeChannelFaceNormal: text = "Show Faces"; break;
+        //case ShapeChannelBumpNormal: text = "Show Bumps"; break;
+        default: break;
+    }
+    
+    return text;
+}
+
+const char* ShowSettings::debugModeText() const {
+    const char* text = "";
+    
+    switch(debugMode) {
+        case DebugModeNone: text = "Debug Off"; break;
+        case DebugModeTransparent: text = "Debug Transparent"; break;
+        case DebugModeNonZero: text = "Debug NonZero"; break;
+        case DebugModeColor: text = "Debug Color"; break;
+        case DebugModeGray: text = "Debug Gray"; break;
+        case DebugModeHDR: text = "Debug HDR"; break;
+        case DebugModePosX: text = "Debug +X"; break;
+        case DebugModePosY: text = "Debug +Y"; break;
+        case DebugModeCircleXY: text = "Debug XY>=1"; break;
+        default: break;
+    }
+    return text;
+}
+
 void ShowSettings::advanceShapeChannel(bool decrement) {
     int32_t numEnums = ShapeChannelCount;
     int32_t mode = shapeChannel;
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index b8eaed05..5ba1568f 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -56,10 +56,17 @@ enum ShapeChannel
     
     ShapeChannelUV0,
     
-    ShapeChannelNormal,
+    ShapeChannelFaceNormal, // gen from dfdx and dfdy
+    
+    ShapeChannelNormal, // vertex normal
     ShapeChannelTangent,
     ShapeChannelBitangent,
     
+    // don't need bump, since can already see it, but what if combined diffuse + normal
+    // ShapeChannelBumpNormal,
+    
+    // ShapeChannelMipLevel, // can estimate mip chose off dfdx/dfdy, and pseudocolor
+    
     ShapeChannelCount
 };
 
@@ -191,6 +198,9 @@ class ShowSettings {
     void advanceDebugMode(bool decrement);
     void advanceShapeChannel(bool decrement);
 
+    const char* shapeChannelText() const;
+    const char* debugModeText() const;
+
     string lastFilename;
     double lastTimestamp = 0.0;
     
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index f7f833cf..f1aaf7db 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -1657,35 +1657,13 @@ - (bool)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
             
         case Key::Num6: {
             _showSettings->advanceShapeChannel(isShiftKeyDown);
-            
-            switch(_showSettings->shapeChannel) {
-                case ShapeChannelNone: text = "Show Off"; break;
-                case ShapeChannelUV0: text = "Show UV0"; break;
-                case ShapeChannelNormal: text = "Show Normal"; break;
-                case ShapeChannelTangent: text = "Show Tangent"; break;
-                case ShapeChannelBitangent: text = "Show Bitangent"; break;
-                case ShapeChannelDepth: text = "Show Depth"; break;
-                default: break;
-            }
-            
+            text = _showSettings->shapeChannelText();
             isChanged = true;
             break;
         }
         case Key::E: {
             _showSettings->advanceDebugMode(isShiftKeyDown);
-            
-            switch(_showSettings->debugMode) {
-                case DebugModeNone: text = "Debug Off"; break;
-                case DebugModeTransparent: text = "Debug Transparent"; break;
-                case DebugModeNonZero: text = "Debug NonZero"; break;
-                case DebugModeColor: text = "Debug Color"; break;
-                case DebugModeGray: text = "Debug Gray"; break;
-                case DebugModeHDR: text = "Debug HDR"; break;
-                case DebugModePosX: text = "Debug +X"; break;
-                case DebugModePosY: text = "Debug +Y"; break;
-                case DebugModeCircleXY: text = "Debug XY>=1"; break;
-                default: break;
-            }
+            text = _showSettings->debugModeText();
             isChanged = true;
             break;
         }

From 31c556bb3cffef718e64e225065538f946d4ea6c Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 6 Jun 2021 21:43:13 -0700
Subject: [PATCH 117/901] kramv - add doInvertX test

Shaders can get rendering shape and uv mirroring.  Add inversion test, but that doesn't mean code passes when invert is on.  Lighting looks flipped.  Pass the determinant (sign is inversion) via the inverseScale2.w term.
Add flip of winding for front vs. backfacing.  Shape is a bit pancaked in capture since view does non-uniform scale but not on z-axis.
---
 kramv/KramRenderer.mm   | 51 ++++++++++++++++++++++++++---------------
 kramv/KramShaders.h     |  2 +-
 kramv/KramShaders.metal |  2 +-
 kramv/KramViewerBase.h  |  1 +
 4 files changed, 36 insertions(+), 20 deletions(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index a48899a8..33cb7d18 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -467,10 +467,10 @@ - (MTKMesh*)_createMeshAsset:(const char*)name mdlMesh:(MDLMesh*)mdlMesh doFlipU
         packed_float4* uvData = (packed_float4*)uvsMap.bytes;
         
         for (uint32_t i = 0; i < mdlMesh.vertexCount; ++i) {
-            if (uvData[i].w != -1.0f && uvData[i].w != 1.0f) {
-                int bp = 0;
-                bp = bp;
-            }
+//            if (uvData[i].w != -1.0f && uvData[i].w != 1.0f) {
+//                int bp = 0;
+//                bp = bp;
+//            }
             
             uvData[i].w = -uvData[i].w;
         }
@@ -484,7 +484,7 @@ - (MTKMesh*)_createMeshAsset:(const char*)name mdlMesh:(MDLMesh*)mdlMesh doFlipU
     mesh.name = [NSString stringWithUTF8String:name];
 
     
-    // these range names may onl show up when looking at geometry in capture
+    // these range names may only show up when looking at geometry in capture
     // These don't seem to appear as the buffer name that is suballocated from
     {
         // name the vertex range on the vb
@@ -639,10 +639,10 @@ - (void)_loadAssets
             
             auto& uv = uvData[i];
         
-            if (uv.x < 0.0 || uv.x > 1.0) {
-                int bp = 0;
-                bp = bp;
-            }
+//            if (uv.x < 0.0 || uv.x > 1.0) {
+//                int bp = 0;
+//                bp = bp;
+//            }
             
             // this makes it counterclockwise 0 to 1
             float x = uv.x;
@@ -678,7 +678,7 @@ - (void)_loadAssets
     _meshSphereMirrored = [self _createMeshAsset:"MeshSphereMirrored" mdlMesh:mdlMesh doFlipUV:false];
     
     
-// this maps 1/3rd of texture to the caps, and just isn't a very good uv mapping, using capsule nistead
+// this maps 1/3rd of texture to the caps, and just isn't a very good uv mapping, using capsule instead
 //    mdlMesh = [MDLMesh newCylinderWithHeight:1.0
 //                                       radii:(vector_float2){0.5, 0.5}
 //                                            radialSegments:16
@@ -928,16 +928,19 @@ - (BOOL)loadTextureImpl:(const string&)fullFilename isTextureChanged:(BOOL)isTex
     
     _showSettings->shapeChannel = ShapeChannel::ShapeChannelNone;
     
+    // test rendering with inversion and mirroring
+    bool doInvertX = false;
+    
     // have one of these for each texture added to the viewer
     float scaleX = MAX(1, texture.width);
     float scaleY = MAX(1, texture.height);
     float scaleZ = MAX(scaleX, scaleY); // don't want 1.0f, or specular is all off due to extreme scale differences
-    _modelMatrix = float4x4(float4m(scaleX, scaleY, scaleZ, 1.0f)); // non uniform scale
+    _modelMatrix = float4x4(float4m(doInvertX ? -scaleX : scaleX, scaleY, scaleZ, 1.0f)); // non uniform scale
     _modelMatrix = _modelMatrix * matrix4x4_translation(0.0f, 0.0f, -1.0); // set z=-1 unit back
     
     // uniform scaled 3d primitiv
     float scale = MAX(scaleX, scaleY);
-    _modelMatrix3D = float4x4(float4m(scale, scale, scale, 1.0f)); // uniform scale
+    _modelMatrix3D = float4x4(float4m(doInvertX ? -scale : scale, scale, scale, 1.0f)); // uniform scale
     _modelMatrix3D = _modelMatrix3D * matrix4x4_translation(0.0f, 0.0f, -1.0f); // set z=-1 unit back
     
     return YES;
@@ -948,6 +951,8 @@ - (float4x4)computeImageTransform:(float)panX panY:(float)panY zoom:(float)zoom
     float4x4 panTransform = matrix4x4_translation(-panX, panY, 0.0);
     
     // non-uniform scale is okay here, only affects ortho volume
+    // setting this to uniform zoom and object is not visible, zoom can be 20x in x and y
+    
     float4x4 viewMatrix = float4x4(float4m(zoom, zoom, 1.0f, 1.0f));
     viewMatrix = panTransform * viewMatrix;
     
@@ -964,7 +969,11 @@ bool almost_equal_elements(float3 v, float tol) {
     return (fabs(v.x - v.y) < tol) && (fabs(v.x - v.z) < tol);
 }
 
-float3 inverseScaleSquared(float4x4 m) {
+const float3x3& toFloat3x3(const float4x4& m) {
+    return (const float3x3&)m;
+}
+
+float4 inverseScaleSquared(const float4x4& m) {
     float3 scaleSquared = float3m(
         length_squared(m.columns[0].xyz),
         length_squared(m.columns[1].xyz),
@@ -978,11 +987,12 @@ float3 inverseScaleSquared(float4x4 m) {
     // don't divide by 0
     float3 invScaleSquared = recip(simd::max(float3m(0.0001 * 0.0001), scaleSquared));
         
-    // TODO: could also identify determinant here for flipping orientation
+    // identify determinant here for flipping orientation
     // all shapes with negative determinant need orientation flipped for backfacing
-    // and need to be rendered together
+    // and need to be grouned together if rendering with instancing
+    float det = determinant(toFloat3x3(m));
     
-    return invScaleSquared;
+    return float4m(invScaleSquared, det);
 }
 
 - (void)_updateGameState
@@ -1028,7 +1038,7 @@ - (void)_updateGameState
     
     // TODO: tie to UI
     // a few things to fix before enabling this
-    uniforms.useTangent = false;
+    uniforms.useTangent = true;
         
     uniforms.gridX = 0;
     uniforms.gridY = 0;
@@ -1092,6 +1102,8 @@ - (void)_updateGameState
        
         uniforms.modelMatrixInvScale2 = inverseScaleSquared(_modelMatrix3D);
         
+        _showSettings->isInverted = uniforms.modelMatrixInvScale2.w < 0.0f;
+        
         // this was stored so view could use it, but now that code calcs the transform via computeImageTransform
         _showSettings->projectionViewModelMatrix = uniforms.projectionViewMatrix * uniforms.modelMatrix;
         
@@ -1113,6 +1125,8 @@ - (void)_updateGameState
        
         uniforms.modelMatrixInvScale2 = inverseScaleSquared(_modelMatrix);
         
+        _showSettings->isInverted = uniforms.modelMatrixInvScale2.w < 0.0f;
+        
         // this was stored so view could use it, but now that code calcs the transform via computeImageTransform
         _showSettings->projectionViewModelMatrix = uniforms.projectionViewMatrix * uniforms.modelMatrix ;
         
@@ -1241,7 +1255,8 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer view:(nonnull MTKView *)vie
     renderEncoder.label = @"MainRender";
 
     // set raster state
-    [renderEncoder setFrontFacingWinding:MTLWindingCounterClockwise];
+    [renderEncoder setFrontFacingWinding:_showSettings->isInverted ?
+                        MTLWindingCounterClockwise : MTLWindingCounterClockwise];
     [renderEncoder setCullMode:MTLCullModeBack];
     [renderEncoder setDepthStencilState:_depthStateFull];
 
diff --git a/kramv/KramShaders.h b/kramv/KramShaders.h
index 33b60c6d..f2027eab 100644
--- a/kramv/KramShaders.h
+++ b/kramv/KramShaders.h
@@ -117,7 +117,7 @@ struct Uniforms
 {
     simd::float4x4 projectionViewMatrix;
     simd::float4x4 modelMatrix;
-    simd::float3 modelMatrixInvScale2; // to supply inverse
+    simd::float4 modelMatrixInvScale2; // to supply inverse, w is determinant
     simd::float3 cameraPosition; // world-space
     
     bool isSigned;
diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index fc49f249..d93fcc6c 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -459,7 +459,7 @@ ColorInOut DrawImageFunc(
     if (uniforms.isNormalMapPreview) {
         float3 normal = in.normal;
         float3 tangent = in.tangent.xyz;
-        transformBasis(normal, tangent, uniforms.modelMatrix, uniforms.modelMatrixInvScale2, uniforms.useTangent);
+        transformBasis(normal, tangent, uniforms.modelMatrix, uniforms.modelMatrixInvScale2.xyz, uniforms.useTangent);
         
         out.normal = toHalf(normal);
         
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index 5ba1568f..22a51403 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -186,6 +186,7 @@ class ShowSettings {
     ShapeChannel shapeChannel = ShapeChannelNone;
     
     float4x4 projectionViewModelMatrix;
+    bool isInverted;
     
     // cached on load, raw info about the texture from libkram
     string imageInfo;

From 5b507b4547f341e8099a6f63a258b240e7dfa045 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 6 Jun 2021 22:21:40 -0700
Subject: [PATCH 118/901] kramv - fix zoom when doInvertX is true

computing rect needs to take into absolute bound so it doesn't go negative.
---
 kramv/KramViewerMain.mm | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index f1aaf7db..bb5afa40 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -780,7 +780,11 @@ - (void)handleGesture:(NSGestureRecognizer *)gestureRecognizer
     //pt1 /= pt1.w;
     
     // see that rectangle intersects the view, view is -1 to 1
-    CGRect imageRect = CGRectMake(pt0.x, pt0.y, pt1.x - pt0.x, pt1.y - pt0.y);
+    // this handles inversion
+    float2 ptOrigin = simd::min(pt0.xy, pt1.xy);
+    float2 ptSize = abs(pt0.xy - pt1.xy);
+    
+    CGRect imageRect = CGRectMake(ptOrigin.x, ptOrigin.y, ptSize.x, ptSize.y);
     CGRect viewRect = CGRectMake(-1.0f, -1.0f, 2.0f, 2.0f);
    
     int32_t numTexturesX = _showSettings->totalChunks();
@@ -1210,8 +1214,11 @@ - (void)scrollWheel:(NSEvent *)event
     //pt0 /= pt0.w;
     //pt1 /= pt1.w;
     
+    float2 ptOrigin = simd::min(pt0.xy, pt1.xy);
+    float2 ptSize = abs(pt0.xy - pt1.xy);
+    
     // see that rectangle intersects the view, view is -1 to 1
-    CGRect imageRect = CGRectMake(pt0.x, pt0.y, pt1.x - pt0.x, pt1.y - pt0.y);
+    CGRect imageRect = CGRectMake(ptOrigin.x, ptOrigin.y, ptSize.x, ptSize.y);
     CGRect viewRect = CGRectMake(-1.0f, -1.0f, 2.0f, 2.0f);
    
     int32_t numTexturesX = _showSettings->totalChunks();

From f7b410703456463d86311512d7e639f632ec6502 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 8 Jun 2021 09:17:43 -0700
Subject: [PATCH 119/901] kramv - different address mode, split up update vs.
 reset settings, add shape name to hud, display png info

Switch to clamp-to-edge from clamp-to-zero to avoid isHalfPixelInset and transparent pulldown at edges.
---
 kramv/KramRenderer.h     |   3 +-
 kramv/KramRenderer.mm    | 239 ++++++++++++++++++++++-----------------
 kramv/KramShaders.h      |   3 +
 kramv/KramShaders.metal  |   2 +-
 kramv/KramViewerBase.cpp |  28 +++++
 kramv/KramViewerBase.h   |   4 +-
 kramv/KramViewerMain.mm  |  15 +--
 7 files changed, 179 insertions(+), 115 deletions(-)

diff --git a/kramv/KramRenderer.h b/kramv/KramRenderer.h
index 4f68d4ca..31490ae2 100644
--- a/kramv/KramRenderer.h
+++ b/kramv/KramRenderer.h
@@ -30,7 +30,8 @@ namespace kram {
 - (BOOL)loadTextureFromImage:(const std::string&)fullFilename
                    timestamp:(double)timestamp
                        image:(kram::KTXImage&)image
-                 imageNormal:(nullable kram::KTXImage*)imageNormal;
+                 imageNormal:(nullable kram::KTXImage*)imageNormal
+                   isArchive:(BOOL)isArchive;
 
 
 - (BOOL)loadTexture:(nonnull NSURL *)url;
diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 33cb7d18..06d469c5 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -705,18 +705,23 @@ - (void)_loadAssets
     
 }
 
+static bool isPNGFilename(const char* filename) {
+    // should really lookg at first 4 bytes of data
+    return endsWithExtension(filename, ".png") || endsWithExtension(filename, ".PNG");
+}
+
 - (BOOL)loadTextureFromImage:(const string&)fullFilename
                    timestamp:(double)timestamp
                        image:(kram::KTXImage&)image
                  imageNormal:(kram::KTXImage*)imageNormal
+                    isArchive:(BOOL)isArchive
 {
     // image can be decoded to rgba8u if platform can't display format natively
     // but still want to identify blockSize from original format
     
     // Note that modstamp can change, but content data hash may be the same
-    bool isTextureChanged =
-        (fullFilename != _showSettings->lastFilename) ||
-        (timestamp != _showSettings->lastTimestamp);
+    bool isNewFile = (fullFilename != _showSettings->lastFilename);
+    bool isTextureChanged = isNewFile || (timestamp != _showSettings->lastTimestamp);
     
     if (isTextureChanged) {     
         // synchronously cpu upload from ktx file to buffer, with eventual gpu blit from buffer to returned texture.  TODO: If buffer is full, then something needs to keep KTXImage and data alive.  This load may also decode the texture to RGBA8.
@@ -738,10 +743,19 @@ - (BOOL)loadTextureFromImage:(const string&)fullFilename
         
         // if archive contained png, then it's been converted to ktx
         // so the info below may not reflect original data
+        // Would need original png data to look at header
+        // This is only info on image, not on imageNormal
+        
+        bool isPNG = isPNGFilename(fullFilename.c_str());
+        if (!isArchive && isPNG) {
+            _showSettings->imageInfo = kramInfoToString(fullFilename, false);
+            _showSettings->imageInfoVerbose = kramInfoToString(fullFilename, true);
+        }
+        else {
+            _showSettings->imageInfo = kramInfoKTXToString(fullFilename, image, false);
+            _showSettings->imageInfoVerbose = kramInfoKTXToString(fullFilename, image, true);
+        }
         
-        _showSettings->imageInfo = kramInfoKTXToString(fullFilename, image, false);
-        _showSettings->imageInfoVerbose = kramInfoKTXToString(fullFilename, image, true);
-       
         _showSettings->originalFormat = (MyMTLPixelFormat)originalFormatMTL;
         _showSettings->decodedFormat = (MyMTLPixelFormat)texture.pixelFormat;
         
@@ -752,9 +766,13 @@ - (BOOL)loadTextureFromImage:(const string&)fullFilename
             _colorMap = texture;
             _normalMap = normalTexture;
         }
+        
+        [self updateImageSettings:fullFilename image:image];
     }
     
-    return [self loadTextureImpl:fullFilename isTextureChanged:isTextureChanged];
+    [self resetSomeImageSettings:isNewFile];
+    
+    return YES;
 }
     
 - (BOOL)loadTexture:(nonnull NSURL *)url
@@ -768,9 +786,9 @@ - (BOOL)loadTexture:(nonnull NSURL *)url
     
     // DONE: tie this to url and modstamp differences
     double timestamp = fileDate.timeIntervalSince1970;
-    bool isTextureChanged =
-        (fullFilename != _showSettings->lastFilename) ||
-        (timestamp != _showSettings->lastTimestamp);
+    bool isNewFile =  (fullFilename != _showSettings->lastFilename);
+    
+    bool isTextureChanged = isNewFile || (timestamp != _showSettings->lastTimestamp);
     
     // image can be decoded to rgba8u if platform can't display format natively
     // but still want to identify blockSize from original format
@@ -794,12 +812,15 @@ - (BOOL)loadTexture:(nonnull NSURL *)url
         // this is not the png data, but info on converted png to ktx level
         // But this avoids loading the image 2 more times
         // Size of png is very different than decompressed or recompressed ktx
-        
-        _showSettings->imageInfo = kramInfoKTXToString(fullFilename, image, false);
-        _showSettings->imageInfoVerbose = kramInfoKTXToString(fullFilename, image, true);
-        
-        //_showSettings->imageInfo = kramInfoToString(fullFilename, image, false);
-        //_showSettings->imageInfoVerbose = kramInfoToString(fullFilename, image, true);
+        bool isPNG = isPNGFilename(fullFilename.c_str());
+        if (isPNG) {
+            _showSettings->imageInfo = kramInfoToString(fullFilename, false);
+            _showSettings->imageInfoVerbose = kramInfoToString(fullFilename, true);
+        }
+        else {
+            _showSettings->imageInfo = kramInfoKTXToString(fullFilename, image, false);
+            _showSettings->imageInfoVerbose = kramInfoKTXToString(fullFilename, image, true);
+        }
         
         _showSettings->originalFormat = (MyMTLPixelFormat)originalFormatMTL;
         _showSettings->decodedFormat = (MyMTLPixelFormat)texture.pixelFormat;
@@ -811,39 +832,33 @@ - (BOOL)loadTexture:(nonnull NSURL *)url
             _colorMap = texture;
             _normalMap = nil;
         }
+        
+        [self updateImageSettings:fullFilename image:image];
     }
     
-    return [self loadTextureImpl:fullFilename isTextureChanged:isTextureChanged];
+    [self resetSomeImageSettings:isNewFile];
+    
+    return YES;
 }
 
-
-
-- (BOOL)loadTextureImpl:(const string&)fullFilename isTextureChanged:(BOOL)isTextureChanged
+// only called on new or modstamp-changed image
+- (void)updateImageSettings:(const string&)fullFilename image:(KTXImage&)image
 {
-    if (isTextureChanged) {
-        Int2 blockDims = blockDimsOfFormat(_showSettings->originalFormat);
-        _showSettings->blockX = blockDims.x;
-        _showSettings->blockY = blockDims.y;
-    }
-    
+    // this is the actual format, may have been decoded
     id<MTLTexture> texture = _colorMap;
-    
     MyMTLPixelFormat format = (MyMTLPixelFormat)texture.pixelFormat;
-    MyMTLPixelFormat originalFormat = _showSettings->originalFormat;
     
-    // based on original or transcode?
+    // format may be trancoded to gpu-friendly format
+    MyMTLPixelFormat originalFormat = image.pixelFormat;
+    
+    _showSettings->blockX = image.blockDims().x;
+    _showSettings->blockY = image.blockDims().y;
+    
     _showSettings->isSigned = isSignedFormat(format);
     
-    // need a way to get at KTXImage, but would need to keep mmap alive
-    // this doesn't handle normals that are ASTC, so need more data from loader
     string fullFilenameCopy = fullFilename;
-
-    // this is so unreadable
     string filename = toLower(fullFilenameCopy);
 
-    // could cycle between rrr1 and r001.
-    int32_t numChannels = numChannelsOfFormat(originalFormat);
-    
     // set title to filename, chop this to just file+ext, not directory
     string filenameShort = filename;
     const char* filenameSlash = strrchr(filenameShort.c_str(), '/');
@@ -858,6 +873,9 @@ - (BOOL)loadTextureImpl:(const string&)fullFilename isTextureChanged:(BOOL)isTex
     bool isNormal = false;
     bool isSDF = false;
     
+    // could cycle between rrr1 and r001.
+    int32_t numChannels = numChannelsOfFormat(originalFormat);
+    
     // note that decoded textures are 3/4 channel even though they are normal/sdf originally, so test those first
     if (numChannels == 2 || endsWith(filenameShort, "-n") || endsWith(filenameShort, "_normal")) {
         isNormal = true;
@@ -878,8 +896,7 @@ - (BOOL)loadTextureImpl:(const string&)fullFilename isTextureChanged:(BOOL)isTex
     if (isAlbedo && endsWithExtension(filename.c_str(), ".png")) {
         _showSettings->isPremul = true; // convert to premul in shader, so can see other channels
     }
-        
-    if (isNormal || isSDF) {
+    else if (isNormal || isSDF) {
         _showSettings->isPremul = false;
     }
         
@@ -891,28 +908,51 @@ - (BOOL)loadTextureImpl:(const string&)fullFilename isTextureChanged:(BOOL)isTex
     
     _showSettings->isSwizzleAGToRG = false;
 
+// For best sdf and normal reconstruct from ASTC or BC3, must use RRR1 and GGGR or RRRG
+// BC1nm multiply r*a in the shader, but just use BC5 anymore.
 //    if (isASTCFormat(originalFormat) && isNormal) {
 //        // channels after = "ag01"
 //        _showSettings->isSwizzleAGToRG = true;
 //    }
         
-    // then can manipulate this after loading
-    _showSettings->mipLOD = 0;
-    _showSettings->faceNumber = 0;
-    _showSettings->arrayNumber = 0;
-    _showSettings->sliceNumber = 0;
-    
     // can derive these from texture queries
-    _showSettings->maxLOD = (int32_t)texture.mipmapLevelCount;
-    _showSettings->faceCount = (texture.textureType == MTLTextureTypeCube ||
-                               texture.textureType == MTLTextureTypeCubeArray) ? 6 : 0;
-    _showSettings->arrayCount = (int32_t)texture.arrayLength;
-    _showSettings->sliceCount = (int32_t)texture.depth;
-    
-    _showSettings->channels = TextureChannels::ModeRGBA;
+    _showSettings->maxLOD = (int32_t)image.header.numberOfMipmapLevels;
+    _showSettings->faceCount = (image.textureType == MyMTLTextureTypeCube ||
+                               image.textureType == MyMTLTextureTypeCubeArray) ? 6 : 0;
+    _showSettings->arrayCount = (int32_t)image.header.numberOfArrayElements;
+    _showSettings->sliceCount = (int32_t)image.depth;
+    
+    _showSettings->imageBoundsX = (int32_t)image.width;
+    _showSettings->imageBoundsY = (int32_t)image.height;
+}
+
+- (void)resetSomeImageSettings:(BOOL)isNewFile {
     
-    _showSettings->imageBoundsX = (int32_t)texture.width;
-    _showSettings->imageBoundsY = (int32_t)texture.height;
+    // only reset these on new texture, but have to revalidate
+    if (isNewFile) {
+        // then can manipulate this after loading
+        _showSettings->mipLOD = 0;
+        _showSettings->faceNumber = 0;
+        _showSettings->arrayNumber = 0;
+        _showSettings->sliceNumber = 0;
+        
+        
+        _showSettings->channels = TextureChannels::ModeRGBA;
+        
+        // wish could keep existing setting, but new texture might not
+        // be supported debugMode for new texture
+        _showSettings->debugMode = DebugMode::DebugModeNone;
+        
+        _showSettings->shapeChannel = ShapeChannel::ShapeChannelNone;
+    }
+    else {
+        // reloaded file may have different limits
+        _showSettings->mipLOD = std::min(_showSettings->mipLOD, _showSettings->maxLOD);
+        _showSettings->faceNumber = std::min(_showSettings->faceNumber, _showSettings->faceCount);
+        _showSettings->arrayNumber = std::min(_showSettings->arrayNumber, _showSettings->arrayCount);
+        _showSettings->sliceNumber = std::min(_showSettings->sliceNumber, _showSettings->sliceCount);
+    }
+
     
     [self updateViewTransforms];
     
@@ -922,18 +962,12 @@ - (BOOL)loadTextureImpl:(const string&)fullFilename isTextureChanged:(BOOL)isTex
     
     _showSettings->zoom = _showSettings->zoomFit;
     
-    // wish could keep existing setting, but new texture might not
-    // be supported debugMode for new texture
-    _showSettings->debugMode = DebugMode::DebugModeNone;
-    
-    _showSettings->shapeChannel = ShapeChannel::ShapeChannelNone;
-    
     // test rendering with inversion and mirroring
     bool doInvertX = false;
     
     // have one of these for each texture added to the viewer
-    float scaleX = MAX(1, texture.width);
-    float scaleY = MAX(1, texture.height);
+    float scaleX = MAX(1, _showSettings->imageBoundsX);
+    float scaleY = MAX(1, _showSettings->imageBoundsY);
     float scaleZ = MAX(scaleX, scaleY); // don't want 1.0f, or specular is all off due to extreme scale differences
     _modelMatrix = float4x4(float4m(doInvertX ? -scaleX : scaleX, scaleY, scaleZ, 1.0f)); // non uniform scale
     _modelMatrix = _modelMatrix * matrix4x4_translation(0.0f, 0.0f, -1.0); // set z=-1 unit back
@@ -942,8 +976,6 @@ - (BOOL)loadTextureImpl:(const string&)fullFilename isTextureChanged:(BOOL)isTex
     float scale = MAX(scaleX, scaleY);
     _modelMatrix3D = float4x4(float4m(doInvertX ? -scale : scale, scale, scale, 1.0f)); // uniform scale
     _modelMatrix3D = _modelMatrix3D * matrix4x4_translation(0.0f, 0.0f, -1.0f); // set z=-1 unit back
-    
-    return YES;
 }
 
 - (float4x4)computeImageTransform:(float)panX panY:(float)panY zoom:(float)zoom {
@@ -1017,12 +1049,12 @@ - (void)_updateGameState
     }
     
     uniforms.isCheckerboardShown = _showSettings->isCheckerboardShown;
-    bool canWrap = true;
-    if (textureType == MyMTLTextureTypeCube || textureType == MyMTLTextureTypeCubeArray) {
-        canWrap = false;
-    }
     
-    uniforms.isWrap = canWrap ? _showSettings->isWrap : false;
+    // addressing mode
+    bool isCube = (textureType == MyMTLTextureTypeCube || textureType == MyMTLTextureTypeCubeArray);
+    bool doWrap = !isCube &&  _showSettings->isWrap;
+    bool doEdge = !doWrap;
+    uniforms.isWrap = doWrap ? _showSettings->isWrap : false;
     
     uniforms.isPreview = _showSettings->isPreview;
     
@@ -1081,6 +1113,13 @@ - (void)_updateGameState
     }
     uniforms.is3DView = _showSettings->is3DView;
    
+    // on small textures can really see missing pixel (3 instead of 4 pixels)
+    // so only do this on the sphere/capsule which wrap-around uv space
+    uniforms.isInsetByHalfPixel = false;
+    if (_showSettings->meshNumber >= 2 && doEdge) {
+        uniforms.isInsetByHalfPixel = true;
+    }
+    
     // translate
     float4x4 panTransform = matrix4x4_translation(-_showSettings->panX, _showSettings->panY, 0.0);
     
@@ -1224,8 +1263,8 @@ - (void)drawInMTKView:(nonnull MTKView *)view
 }
 
 - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer view:(nonnull MTKView *)view {
-    /// Delay getting the currentRenderPassDescriptor until absolutely needed. This avoids
-    ///   holding onto the drawable and blocking the display pipeline any longer than necessary
+    // Delay getting the currentRenderPassDescriptor until absolutely needed. This avoids
+    //   holding onto the drawable and blocking the display pipeline any longer than necessary
     MTLRenderPassDescriptor* renderPassDescriptor = view.currentRenderPassDescriptor;
 
     if (renderPassDescriptor == nil) {
@@ -1245,7 +1284,7 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer view:(nonnull MTKView *)vie
         return;
     }
     
-    /// Final pass rendering code here
+    // Final pass rendering code here
     id<MTLRenderCommandEncoder> renderEncoder =
     [commandBuffer renderCommandEncoderWithDescriptor:renderPassDescriptor];
     if (!renderEncoder) {
@@ -1274,12 +1313,29 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer view:(nonnull MTKView *)vie
         }
     }
 
+    //---------------------------------------
+    // figure out the sampler
+
+    id <MTLSamplerState> sampler;
+
+    MyMTLTextureType textureType = (MyMTLTextureType)_colorMap.textureType;
+    
+    bool isCube = (textureType == MyMTLTextureTypeCube || textureType == MyMTLTextureTypeCubeArray);
+    bool doWrap = !isCube && _showSettings->isWrap;
+    bool doEdge = !doWrap;
+    
+    if (_showSettings->isPreview) {
+        sampler = doWrap ? _colorMapSamplerFilterWrap : (doEdge ? _colorMapSamplerFilterEdge : _colorMapSamplerFilterBorder);
+    }
+    else {
+        sampler = doWrap ? _colorMapSamplerNearestWrap : (doEdge ? _colorMapSamplerNearestEdge : _colorMapSamplerNearestBorder);
+    }
+    
+    //---------------------------------------
     //for (texture in _textures) // TODO: setup
     //if (_colorMap)
     {
         // TODO: set texture specific uniforms, but using single _colorMap for now
-        bool canWrap = true;
-        
         switch(_colorMap.textureType) {
             case MTLTextureType1DArray:
                 [renderEncoder setRenderPipelineState:_pipelineState1DArray];
@@ -1298,11 +1354,8 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer view:(nonnull MTKView *)vie
                 break;
             case MTLTextureTypeCube:
                 [renderEncoder setRenderPipelineState:_pipelineStateCube];
-                canWrap = false;
-                
                 break;
             case MTLTextureTypeCubeArray:
-                canWrap = false;
                 [renderEncoder setRenderPipelineState:_pipelineStateCubeArray];
                 break;
                 
@@ -1331,8 +1384,6 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer view:(nonnull MTKView *)vie
                                       atIndex:TextureIndexNormal];
         }
 
-        
-        
         UniformsLevel uniformsLevel;
         uniformsLevel.drawOffset = float2m(0.0f);
         
@@ -1349,9 +1400,7 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer view:(nonnull MTKView *)vie
                                      atIndex:BufferIndexUniformsLevel];
             
             // use exisiting lod, and mip
-            [renderEncoder setFragmentSamplerState:
-                                  (canWrap && _showSettings->isWrap) ? _colorMapSamplerFilterWrap : _colorMapSamplerFilterBorder
-                                  atIndex:SamplerIndexColor];
+            [renderEncoder setFragmentSamplerState:sampler atIndex:SamplerIndexColor];
             
             for(MTKSubmesh *submesh in _mesh.submeshes)
             {
@@ -1368,16 +1417,6 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer view:(nonnull MTKView *)vie
             int32_t h = _colorMap.height;
             //int32_t d = _colorMap.depth;
                         
-            MyMTLTextureType textureType = MyMTLTextureType2D;
-            if (_colorMap) {
-                textureType = (MyMTLTextureType)_colorMap.textureType;
-            }
-            
-            bool isCube = false;
-            if (textureType == MyMTLTextureTypeCube || textureType == MyMTLTextureTypeCubeArray) {
-                isCube = true;
-            }
-            
             // gap the contact sheet, note this 2 pixels is scaled on small textures by the zoom
             int32_t gap = _showSettings->showAllPixelGap; // * _showSettings->viewContentScaleFactor;
             
@@ -1424,11 +1463,10 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer view:(nonnull MTKView *)vie
                                              atIndex:BufferIndexUniformsLevel];
                     
                     // force lod, and don't mip
-                    [renderEncoder setFragmentSamplerState:
-                                          (canWrap && _showSettings->isWrap) ? _colorMapSamplerNearestWrap : _colorMapSamplerNearestBorder
-                                          lodMinClamp:mip
-                                          lodMaxClamp:mip + 1
-                                          atIndex:SamplerIndexColor];
+                    [renderEncoder setFragmentSamplerState:sampler
+                                               lodMinClamp:mip
+                                               lodMaxClamp:mip + 1
+                                                   atIndex:SamplerIndexColor];
                 
 
                     // TODO: since this isn't a preview, have mode to display all faces and mips on on screen
@@ -1461,11 +1499,10 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer view:(nonnull MTKView *)vie
                                      atIndex:BufferIndexUniformsLevel];
             
             // force lod, and don't mip
-            [renderEncoder setFragmentSamplerState:
-                                  (canWrap && _showSettings->isWrap) ? _colorMapSamplerNearestWrap : _colorMapSamplerNearestBorder
-                                  lodMinClamp:mip
-                                  lodMaxClamp:mip + 1
-                                  atIndex:SamplerIndexColor];
+            [renderEncoder setFragmentSamplerState:sampler
+                                       lodMinClamp:mip
+                                       lodMaxClamp:mip + 1
+                                           atIndex:SamplerIndexColor];
         
 
             // TODO: since this isn't a preview, have mode to display all faces and mips on on screen
diff --git a/kramv/KramShaders.h b/kramv/KramShaders.h
index f2027eab..554acdb4 100644
--- a/kramv/KramShaders.h
+++ b/kramv/KramShaders.h
@@ -136,6 +136,9 @@ struct Uniforms
     bool isNormalMapSigned;
     bool isNormalMapSwizzleAGToRG;
     
+    // this is used on wrap-around objects to avoid black transparent using clampToZero
+    bool isInsetByHalfPixel;
+    
     // this means pull tangent from vertex
     bool useTangent;
     
diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index d93fcc6c..9ead4bbc 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -483,7 +483,7 @@ ColorInOut DrawImageFunc(
         out.texCoord.xy = in.texCoord;
         out.texCoord.xy *= wrapAmount;
     }
-    else if (uniforms.is3DView && !uniforms.isWrap) {
+    else if (uniforms.is3DView && uniforms.isInsetByHalfPixel) {
         // inset from edge by a fraction of a pixel, to avoid clamp boundary error
         // does this have to adjust for mipLOD too?
         float2 onePixel = uniformsLevel.textureSize.zw;
diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index 643f340d..bf1f4cdc 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -10,6 +10,21 @@ int32_t ShowSettings::totalChunks() const {
     return std::max(one, faceCount) * std::max(one, arrayCount) * std::max(one, sliceCount);
 }
 
+const char* ShowSettings::meshNumberText() const {
+    const char* text = "";
+    
+    switch(meshNumber) {
+        case 0: text = "Shape Plane"; break;
+        case 1: text = "Shape Box"; break;
+        case 2: text = "Shape Sphere"; break;
+        case 3: text = "Shape Sphere MirrorU"; break;
+        case 4: text = "Shape Capsule"; break;
+        default: break;
+    }
+    
+    return text;
+}
+
 const char* ShowSettings::shapeChannelText() const {
     const char* text = "";
     
@@ -46,6 +61,19 @@ const char* ShowSettings::debugModeText() const {
     return text;
 }
 
+void ShowSettings::advanceMeshNumber(bool decrement) {
+    int32_t numEnums = meshCount;
+    int32_t number = meshNumber;
+    if (decrement) {
+        number += numEnums - 1;
+    }
+    else {
+        number += 1;
+    }
+    
+    meshNumber = number % numEnums;
+}
+
 void ShowSettings::advanceShapeChannel(bool decrement) {
     int32_t numEnums = ShapeChannelCount;
     int32_t mode = shapeChannel;
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index 22a51403..28261f03 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -195,10 +195,12 @@ class ShowSettings {
     // format before any transcode to supported formats
     MyMTLPixelFormat originalFormat;
     MyMTLPixelFormat decodedFormat;
-    
+   
+    void advanceMeshNumber(bool decrement);
     void advanceDebugMode(bool decrement);
     void advanceShapeChannel(bool decrement);
 
+    const char* meshNumberText() const;
     const char* shapeChannelText() const;
     const char* debugModeText() const;
 
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index bb5afa40..990a2392 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -1905,15 +1905,8 @@ - (bool)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
         // test out different shapes, not offiical support yet
         case Key::Num8:
             if (_showSettings->meshCount > 1) {
-                if (isShiftKeyDown) {
-                    _showSettings->meshNumber = _showSettings->meshNumber + _showSettings->meshCount - 1;
-                }
-                else {
-                    _showSettings->meshNumber++;
-                }
-                _showSettings->meshNumber = _showSettings->meshNumber % _showSettings->meshCount;
-                
-                sprintf(text, "Mesh %d %s", _showSettings->meshNumber, "Shape"); // TODO: put meshName in _showSettings
+                _showSettings->advanceMeshNumber(isShiftKeyDown);
+                text = _showSettings->meshNumberText();
                 isChanged = true;
             }
             break;
@@ -2200,7 +2193,7 @@ - (BOOL)loadTextureFromFolder
     }
     
     Renderer* renderer = (Renderer*)self.delegate;
-    if (![renderer loadTextureFromImage:fullFilename timestamp:timestamp image:image imageNormal:hasNormal ? &imageNormal : nullptr]) {
+    if (![renderer loadTextureFromImage:fullFilename timestamp:timestamp image:image imageNormal:hasNormal ? &imageNormal : nullptr isArchive:NO]) {
         return NO;
     }
 
@@ -2319,7 +2312,7 @@ - (BOOL)loadTextureFromArchive
     string fullFilename = filename;
     Renderer* renderer = (Renderer*)self.delegate;
     if (![renderer loadTextureFromImage:fullFilename timestamp:(double)timestamp
-                             image:image imageNormal:hasNormal ? &imageNormal : nullptr])
+                             image:image imageNormal:hasNormal ? &imageNormal : nullptr isArchive:YES])
     {
         return NO;
     }

From b36cd3d447740f4298d7575db8a539c1c2c8baad Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 8 Jun 2021 09:22:26 -0700
Subject: [PATCH 120/901] kramv - fix address logic

---
 kramv/KramRenderer.mm | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 06d469c5..ba61092e 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -1054,6 +1054,7 @@ - (void)_updateGameState
     bool isCube = (textureType == MyMTLTextureTypeCube || textureType == MyMTLTextureTypeCubeArray);
     bool doWrap = !isCube &&  _showSettings->isWrap;
     bool doEdge = !doWrap;
+    bool doZero = !doEdge;
     uniforms.isWrap = doWrap ? _showSettings->isWrap : false;
     
     uniforms.isPreview = _showSettings->isPreview;
@@ -1116,7 +1117,7 @@ - (void)_updateGameState
     // on small textures can really see missing pixel (3 instead of 4 pixels)
     // so only do this on the sphere/capsule which wrap-around uv space
     uniforms.isInsetByHalfPixel = false;
-    if (_showSettings->meshNumber >= 2 && doEdge) {
+    if (_showSettings->meshNumber >= 2 && doZero) {
         uniforms.isInsetByHalfPixel = true;
     }
     

From 0edac5bf033e9470a875cd57c01c769418169231 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 8 Jun 2021 21:04:32 -0700
Subject: [PATCH 121/901] kramv - support png with KTXImageData in
 folders/archives

This is a lot more work to translate png to props.  But they are prevalent as source in folders.
---
 kramv/KramLoader.mm     |   7 +-
 kramv/KramRenderer.mm   |   9 +--
 kramv/KramViewerMain.mm | 158 ++++++++++++++++++++++++++++------------
 libkram/kram/Kram.cpp   |  45 ++++++++++--
 libkram/kram/Kram.h     |  15 +++-
 5 files changed, 172 insertions(+), 62 deletions(-)

diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index a882ed14..6053733e 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -338,9 +338,14 @@ - (BOOL)loadImageFromURL:(nonnull NSURL *)url image:(KTXImage&)image imageData:(
         
         bool isSRGB = (!isNormal && !isSDF);
 
-        if (!imageData.openPNG(path, isSRGB, image)) {
+        if (!imageData.open(path, image)) {
             return NO;
         }
+        
+        // have to adjust the format if srgb
+        if (isSRGB) {
+            image.pixelFormat = MyMTLPixelFormatRGBA8Unorm_sRGB;
+        }
     }
     else {
         if (!imageData.open(path, image)) {
diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index ba61092e..8aa9148b 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -705,10 +705,7 @@ - (void)_loadAssets
     
 }
 
-static bool isPNGFilename(const char* filename) {
-    // should really lookg at first 4 bytes of data
-    return endsWithExtension(filename, ".png") || endsWithExtension(filename, ".PNG");
-}
+
 
 - (BOOL)loadTextureFromImage:(const string&)fullFilename
                    timestamp:(double)timestamp
@@ -892,8 +889,10 @@ - (void)updateImageSettings:(const string&)fullFilename image:(KTXImage&)image
     
     // textures are already premul, so don't need to premul in shader
     // should really have 3 modes, unmul, default, premul
+    bool isPNG = isPNGFilename(filename.c_str());
+    
     _showSettings->isPremul = false;
-    if (isAlbedo && endsWithExtension(filename.c_str(), ".png")) {
+    if (isAlbedo && isPNG) {
         _showSettings->isPremul = true; // convert to premul in shader, so can see other channels
     }
     else if (isNormal || isSDF) {
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 990a2392..dc6d08b2 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -2069,7 +2069,7 @@ -(BOOL)loadArchive:(const char*)zipFilename
     
     // filter out unsupported extensions
     
-    _zip.filterExtensions({".ktx", ".ktx2"});
+    _zip.filterExtensions({".ktx", ".ktx2", ".png"});
 
     // don't switch to empty archive
     if (_zip.zipEntrys().empty()) {
@@ -2123,14 +2123,29 @@ -(BOOL)advanceTextureFromFolder:(BOOL)increment
     return [self loadTextureFromFolder];
 }
 
+- (BOOL)findFilenameInFolders:(const string&)filename {
+    // TODO: binary search for the filename in the array, but would have to be in same directory
+    
+    bool isFound = false;
+    for (const auto& search : _folderFiles) {
+        if (search == filename) {
+            isFound = true;
+            break;
+        }
+    }
+    return isFound;
+}
+
 - (BOOL)loadTextureFromFolder
 {
     // now lookup the filename and data at that entry
     const char* filename = _folderFiles[_fileFolderIndex].c_str();
+    string fullFilename = filename;
     auto timestamp = FileHelper::modificationTimestamp(filename);
     
     // have already filtered filenames out, so this should never get hit
-    if (!(//endsWithExtension(filename, ".png") ||
+    bool isPNG = isPNGFilename(filename);
+    if (!(isPNG ||
           endsWithExtension(filename, ".ktx") ||
           endsWithExtension(filename, ".ktx2")) )
     {
@@ -2138,29 +2153,47 @@ - (BOOL)loadTextureFromFolder
     }
     
     // first only do this on albedo/diffuse textures
-    string normalFilename = filename;
+    string normalFilename;
     
-    string search = "-a.ktx";
-    auto searchPos = normalFilename.find(search);
-    bool isFound = searchPos != string::npos;
+    string search;
+    bool isFound = false;
+    string::size_type searchPos;
     
-    if (!isFound) {
-        search = "-d.ktx";
-        searchPos = normalFilename.find(search);
+    if (isPNG) {
+        // find matching png
+        search = "-a.png";
+        searchPos = fullFilename.find(search);
         isFound = searchPos != string::npos;
+        
+        if (!isFound) {
+            search = "-d.png";
+            searchPos = fullFilename.find(search);
+            isFound = searchPos != string::npos;
+        }
+    }
+    else {
+        // find matching ktx/2
+        search = "-a.ktx";
+        searchPos = fullFilename.find(search);
+        isFound = searchPos != string::npos;
+        
+        if (!isFound) {
+            search = "-d.ktx";
+            searchPos = fullFilename.find(search);
+            isFound = searchPos != string::npos;
+        }
     }
     
+    bool isSrgb = isFound;
+    
     if (isFound) {
-        normalFilename = normalFilename.replace(searchPos, search.length(), "-n.ktx"); // works for ktx or ktx2 file
-    
-        // binary search for the filename in the array, will have to be in same directory
-        isFound = false;
-        for (const auto& search : _folderFiles) {
-            if (search == normalFilename) {
-                isFound = true;
-                break;
-            }
-        }
+        // stupid stl mods fullFilename in the replace if not a copy
+        normalFilename = fullFilename;
+        
+        // this won't work for mix of png/ktx files, but that's okay
+        normalFilename = normalFilename.replace(searchPos, search.length(), isPNG ? "-n.png" : "-n.ktx");
+        
+        isFound = [self findFilenameInFolders:normalFilename];
         
         if (!isFound) {
             normalFilename.clear();
@@ -2176,7 +2209,7 @@ - (BOOL)loadTextureFromFolder
     KTXImageData imageNormalDataKTX;
     bool hasNormal = false;
     
-    string fullFilename = filename;
+    // this requires decode and conversion to RGBA8u
     if (!imageDataKTX.open(fullFilename.c_str(), image)) {
         return NO;
     }
@@ -2192,6 +2225,10 @@ - (BOOL)loadTextureFromFolder
         }
     }
     
+    if (isPNG && isSrgb) {
+        image.pixelFormat = MyMTLPixelFormatRGBA8Unorm_sRGB;
+    }
+    
     Renderer* renderer = (Renderer*)self.delegate;
     if (![renderer loadTextureFromImage:fullFilename timestamp:timestamp image:image imageNormal:hasNormal ? &imageNormal : nullptr isArchive:NO]) {
         return NO;
@@ -2239,49 +2276,73 @@ - (BOOL)loadTextureFromArchive
     // now lookup the filename and data at that entry
     const auto& entry = _zip.zipEntrys()[_fileArchiveIndex];
     const char* filename = entry.filename;
+    string fullFilename = filename;
     double timestamp = (double)entry.modificationDate;
     
     // have already filtered filenames out, so this should never get hit
-    if (!(//endsWithExtension(filename, ".png") ||
+    bool isPNG = isPNGFilename(filename);
+
+    if (!(isPNG ||
           endsWithExtension(filename, ".ktx") ||
           endsWithExtension(filename, ".ktx2")) )
     {
         return NO;
     }
         
-    string normalFilename = filename;
+    string normalFilename;
     
     // first only do this on albedo/diffuse textures
-    string search = "-a.ktx";
-    auto searchPos = normalFilename.find(search);
-    bool isFound = searchPos != string::npos;
     
-    if (!isFound) {
-        search = "-d.ktx";
-        searchPos = normalFilename.find(search);
+    string search;
+    bool isFound = false;
+    string::size_type searchPos;
+    
+    if (isPNG) {
+        search = "-a.png";
+        searchPos = fullFilename.find(search);
         isFound = searchPos != string::npos;
+        
+        if (!isFound) {
+            search = "-d.png";
+            searchPos = fullFilename.find(search);
+            isFound = searchPos != string::npos;
+        }
     }
-    
-    if (isFound) {
-        normalFilename = normalFilename.replace(searchPos, search.length(), "-n.ktx");
+    else {
+        search = "-a.ktx";
+        searchPos = fullFilename.find(search);
+        isFound = searchPos != string::npos;
+        
+        if (!isFound) {
+            search = "-d.ktx";
+            searchPos = fullFilename.find(search);
+            isFound = searchPos != string::npos;
+        }
     }
     
+    bool isSrgb = isFound;
+    
     //---------------------------
     
     const uint8_t* imageData = nullptr;
     uint64_t imageDataLength = 0;
-    
+   
+    const uint8_t* imageNormalData = nullptr;
+    uint64_t imageNormalDataLength = 0;
+   
+    // search for main file - can be albedo or normal
     if (!_zip.extractRaw(filename, &imageData, imageDataLength)) {
         return NO;
     }
      
-    const uint8_t* imageNormalData = nullptr;
-    uint64_t imageNormalDataLength = 0;
-    
-    // see if this is albedo, and then search for normal map in the same archive
+    // search for normal map in the same archive
     if (isFound) {
+        normalFilename = fullFilename;
+        
+        normalFilename = normalFilename.replace(searchPos, search.length(), isPNG ? "-n.png" : "-n.ktx");
+        
         if (!_zip.extractRaw(normalFilename.c_str(), &imageNormalData, imageNormalDataLength)) {
-            // ignore failure case here, this is just guessing there's a -n file
+            // ignore failure case here, this is just guessing there's a related normal file
         }
     }
     
@@ -2291,12 +2352,14 @@ - (BOOL)loadTextureFromArchive
     // That's why we can't just pass filenames to the renderer
     KTXImage image;
     KTXImageData imageDataKTX;
+    
+    KTXImage imageNormal;
+    KTXImageData imageNormalDataKTX;
+
     if (!imageDataKTX.open(imageData, imageDataLength, image)) {
         return NO;
     }
     
-    KTXImage imageNormal;
-    KTXImageData imageNormalDataKTX;
     bool hasNormal = false;
     if (isFound && imageNormalDataKTX.open(imageNormalData, imageNormalDataLength, imageNormal)) {
             
@@ -2309,7 +2372,10 @@ - (BOOL)loadTextureFromArchive
         }
     }
     
-    string fullFilename = filename;
+    if (isPNG && isSrgb) {
+        image.pixelFormat = MyMTLPixelFormatRGBA8Unorm_sRGB;
+    }
+    
     Renderer* renderer = (Renderer*)self.delegate;
     if (![renderer loadTextureFromImage:fullFilename timestamp:(double)timestamp
                              image:image imageNormal:hasNormal ? &imageNormal : nullptr isArchive:YES])
@@ -2388,11 +2454,11 @@ - (BOOL)loadTextureFromURL:(NSURL*)url {
                 const char* name = fileOrDirectoryURL.fileSystemRepresentation;
                 
                 // filter only types that are supported
-                if (endsWithExtension(name, ".ktx") ||
-                    endsWithExtension(name, ".ktx2")
-                    // || endsWithExtension(name, ".png") // TODO: can't support with KTXImage load path, needs PNG loader
-                    
-                    )
+                bool isPNG = isPNGFilename(name);
+
+                if (isPNG ||
+                    endsWithExtension(name, ".ktx") ||
+                    endsWithExtension(name, ".ktx2"))
                 {
                     files.push_back(name);
                 }
@@ -2471,7 +2537,7 @@ - (BOOL)loadTextureFromURL:(NSURL*)url {
     
     // file is not a supported extension
     if (!(endsWithExtension(filename, ".zip") ||
-          endsWithExtension(filename, ".png") ||
+          isPNGFilename(filename) ||
           endsWithExtension(filename, ".ktx") ||
           endsWithExtension(filename, ".ktx2")) )
     {
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 5a02ec12..232bfd67 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -41,9 +41,33 @@ void releaseVector(vector<T>& v) {
 }
 
 
+bool isPNGFilename(const char* filename) {
+    // should really lookg at first 4 bytes of data
+    return endsWithExtension(filename, ".png");
+}
+
+bool isPNGFilename(const uint8_t* data, size_t dataSize) {
+    // read the 4 chars at the beginning of the file
+    const uint32_t numChars = 8;
+    if (dataSize < numChars)
+        return false;
+    
+    const uint8_t kPngSignature[numChars] = { 137, 80, 78, 71, 13, 10, 26, 10 };
+    if (memcmp(data, kPngSignature, sizeof(kPngSignature)) != 0) {
+        return false;
+    }
+    
+    return true;
+}
+
+
 bool KTXImageData::open(const char* filename, KTXImage& image) {
     close();
     
+    if (isPNGFilename(filename)) {
+        return openPNG(filename, image);
+    }
+        
     isMmap = true;
     if (!mmapHelper.open(filename)) {
         isMmap = false;
@@ -100,8 +124,8 @@ void KTXImageData::close() {
 }
 
 
-bool KTXImageData::openPNG(const char* filename, bool isSrgb, KTXImage& image) {
-    close();
+bool KTXImageData::openPNG(const char* filename, KTXImage& image) {
+    //close();
     
     isMmap = true;
     if (!mmapHelper.open(filename)) {
@@ -135,9 +159,15 @@ bool KTXImageData::openPNG(const char* filename, bool isSrgb, KTXImage& image) {
         data = fileData.data();
         dataSize = fileData.size();
     }
-    
+
+    return openPNG(data, dataSize, image);
+}
+
+bool KTXImageData::openPNG(const uint8_t* data, size_t dataSize, KTXImage& image) {
+    //close();
+        
     // the mmap/filehelper point to the png data
-    // use Image to 
+    // use Image to
     
     Image singleImage;
     bool isLoaded = LoadPng(data, dataSize, false, false, singleImage);
@@ -158,7 +188,7 @@ bool KTXImageData::openPNG(const char* filename, bool isSrgb, KTXImage& image) {
     image.header.numberOfArrayElements = 0;
     image.header.numberOfMipmapLevels = 1;
     image.textureType = MyMTLTextureType2D;
-    image.pixelFormat = isSrgb ? MyMTLPixelFormatRGBA8Unorm_sRGB : MyMTLPixelFormatRGBA8Unorm;
+    image.pixelFormat = /*isSrgb ? MyMTLPixelFormatRGBA8Unorm_sRGB : */ MyMTLPixelFormatRGBA8Unorm;
     
     // TODO: support mips with blitEncoder but tha confuses mipCount in KTXImage
     //     Mipper can also generate on cpu side.  Mipped can do premul conversion though.
@@ -176,11 +206,14 @@ bool KTXImageData::openPNG(const char* filename, bool isSrgb, KTXImage& image) {
     return true;
 }
 
-
 bool KTXImageData::open(const uint8_t* data, size_t dataSize, KTXImage& image)
 {
     close();
     
+    if (isPNGFilename(data, dataSize)) {
+        return openPNG(data, dataSize, image);
+    }
+      
     // image will likely alias incoming data, so KTXImageData is unused
     
     if (!image.open(data, dataSize, isInfoOnly)) {
diff --git a/libkram/kram/Kram.h b/libkram/kram/Kram.h
index f2e75afc..c8deab11 100644
--- a/libkram/kram/Kram.h
+++ b/libkram/kram/Kram.h
@@ -23,13 +23,18 @@ class KTXImageData {
     // class aliases data, so caller must keep alive.  Useful with bundle.
     bool open(const uint8_t* data, size_t dataSize, KTXImage& image);
     
+    // This releases all memory associated with this class
+    void close();
+
+private:
     // Open png image into a KTXImage as a single-level mip
     // Only handles 2d case and only srgba/rgba conversion.
-    bool openPNG(const char* filename, bool isSrgb, KTXImage& image);
+    // Only returns non-srgb RGBA8, but format can be changed after for srgb
+    bool openPNG(const char* filename, KTXImage& image);
+
+    // The data version
+    bool openPNG(const uint8_t* data, size_t dataSize, KTXImage& image);
 
-    // This releases all memory associated with this class
-    void close();
-    
 private:
     MmapHelper mmapHelper;
     vector<uint8_t> fileData;
@@ -37,6 +42,8 @@ class KTXImageData {
     bool isInfoOnly = true;
 };
 
+bool isPNGFilename(const char* filename);
+
 // helpers to source from a png or single level of a ktx
 bool LoadKtx(const uint8_t* data, size_t dataSize, Image& sourceImage);
 bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulSrgb, bool isGray, Image& sourceImage);

From 6d8906ac2df3be525577c6f434a13bd297756c96 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 8 Jun 2021 22:25:59 -0700
Subject: [PATCH 122/901] kramv - tangent button to compare

Note that tan/bitan shapeChannel doesn't yet work with fragment tangents
---
 kramv/KramRenderer.mm   |  3 +--
 kramv/KramShaders.metal | 47 +++++++++++++----------------------------
 kramv/KramViewerBase.h  |  3 +++
 kramv/KramViewerMain.mm | 34 +++++++++++++++++++++++------
 4 files changed, 47 insertions(+), 40 deletions(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 8aa9148b..55c586d7 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -1068,9 +1068,8 @@ - (void)_updateGameState
         }
     }
     
-    // TODO: tie to UI
     // a few things to fix before enabling this
-    uniforms.useTangent = true;
+    uniforms.useTangent = _showSettings->useTangent;
         
     uniforms.gridX = 0;
     uniforms.gridY = 0;
diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index 9ead4bbc..c1eff9ce 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -232,9 +232,12 @@ half3 transformNormalByBasis(half3 bumpNormal, half3 vertexNormal, float3 worldP
     float invmax = rsqrt(max(length_squared(T), length_squared(B)));
     
     // keeps relative magnitude of two vectors, they're not both unit vecs
-    T *= -invmax; // had to flip this sign to get correct lighting
+    T *= invmax;
     B *= invmax;
     
+    // had to flip this sign to get lighting to match vertex data
+    T = -T;
+    
     // construct a scale-invariant frame
     // drop to half to match other call
     bumpNormal = toHalf(float3x3(T, B, N) * toFloat(bumpNormal));
@@ -251,17 +254,11 @@ half3 transformNormalByBasis(half3 bumpNormal, half4 tangent, half3 vertexNormal
     // Reconstruct bitan in frag shader
     // https://bgolus.medium.com/generating-perfect-normal-maps-for-unity-f929e673fc57
 
-    
-    // so if eyevector
-    
-    
     // TODO: there's facing too, could be inside model
     
-    
     half bitangentSign = tangent.w;
     half3 bitangent =  bitangentSign * cross(vertexNormal, tangent.xyz);
     
-   
     // ModelIO not generating correct bitan sign
     // DONE: flip this on srcData, and not here
     //bitangentSign = -bitangentSign;
@@ -273,30 +270,6 @@ half3 transformNormalByBasis(half3 bumpNormal, half4 tangent, half3 vertexNormal
     return normalize(bumpNormal);
 }
 
-
-half3 transformNormal(half4 tangent, half3 vertexNormal, float3 worldPos,
-                      bool useTangent,
-                      texture2d<half> texture, sampler s, float2 uv, bool isSigned = true)
-{
-    half4 nmap = texture.sample(s, uv);
-    
-    // unorm-only formats like ASTC need to convert
-    if (!isSigned) {
-        nmap.xy = toSnorm8(nmap.xy);
-    }
-    
-    // rebuild the z term
-    half3 bumpNormal = toNormal(nmap.xyz);
-   
-    if (useTangent)
-         bumpNormal = transformNormalByBasis(bumpNormal, tangent, vertexNormal);
-    else
-        bumpNormal = transformNormalByBasis(bumpNormal, vertexNormal, worldPos, uv);
-  
-    return bumpNormal;
-}
-
-
 half3 transformNormal(half4 nmap, half3 vertexNormal, half4 tangent,
                       float3 worldPos, float2 uv, bool useTangent, // to gen TBN from normal
                       bool isSwizzleAGToRG, bool isSigned, bool isFrontFacing)
@@ -335,7 +308,17 @@ half3 transformNormal(half4 nmap, half3 vertexNormal, half4 tangent,
     return bumpNormal;
 }
 
-
+half3 transformNormal(half4 tangent, half3 vertexNormal, float3 worldPos,
+                      bool useTangent,
+                      texture2d<half> texture, sampler s, float2 uv,
+                      bool isSigned, bool isSwizzleAGToRG, bool isFrontFacing)
+{
+    half4 nmap = texture.sample(s, uv);
+    
+    return transformNormal(nmap, vertexNormal, tangent,
+                           worldPos, uv, useTangent,
+                           isSwizzleAGToRG, isSigned, isFrontFacing);
+}
 
 // TODO: have more bones, or read from texture instead of uniforms
 // can then do instanced skining, but vfetch lookup slower
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index 28261f03..fa64a926 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -130,6 +130,9 @@ class ShowSettings {
     // and disabling with a MTLView caused many flags to have to be set on MTLTexture
     //bool isSRGBShown = true;
     
+    // whether to use normal to tangent (false), or vertex tangents (true)
+    bool useTangent = true;
+    
     // draw with reverseZ to better match perspective
     bool isReverseZ = true;
     
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index dc6d08b2..94d00c25 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -498,7 +498,7 @@ - (nonnull ShowSettings*)showSettings {
 }
 
 - (NSStackView*)_addButtons {
-    const int32_t numButtons = 27; // 13;
+    const int32_t numButtons = 29; // 13;
     const char* names[numButtons*2] = {
         
         "?", "Help",
@@ -526,8 +526,12 @@ - (NSStackView*)_addButtons {
         "J", "Next",
         "L", "Reload",
         "0", "Fit",
+        
+        "-", "",
+        
         "8", "Shape",
         "6", "Shape Channel",
+        "T", "Tangents",
         
         // TODO: need to shift hud over a little
         // "UI", - add to show/hide buttons
@@ -1385,9 +1389,11 @@ - (void)updateUIControlState
     auto arrayState = toState(_showSettings->arrayNumber > 0);
     auto faceState = toState(_showSettings->faceNumber > 0);
     auto mipState = toState(_showSettings->mipLOD > 0);
-    auto meshState = toState(_showSettings->meshNumber > 0);
-    auto meshChannelState = toState(_showSettings->shapeChannel > 0); // TODO: rename to meshChannel
     
+    auto meshState = toState(_showSettings->meshNumber > 0);
+    auto meshChannelState = toState(_showSettings->shapeChannel > 0);
+    auto tangentState = toState(_showSettings->useTangent);
+   
     // TODO: UI state, and vertical state
     auto uiState = toState(_buttonStack.hidden);
     
@@ -1419,6 +1425,7 @@ - (void)updateUIControlState
     [self findButton:"W"].state = wrapState;
     [self findButton:"D"].state = gridState;
     [self findButton:"E"].state = debugState;
+    [self findButton:"T"].state = tangentState;
     
     [self findButton:"P"].state = premulState;
     [self findButton:"N"].state = signedState;
@@ -1447,7 +1454,8 @@ - (void)updateUIControlState
     [self findMenuItem:"O"].state = previewState;
     [self findMenuItem:"8"].state = meshState;
     [self findMenuItem:"6"].state = meshChannelState;
-    
+    [self findMenuItem:"T"].state = tangentState;
+   
     [self findMenuItem:"W"].state = wrapState;
     [self findMenuItem:"D"].state = gridState;
     [self findMenuItem:"E"].state = debugState;
@@ -1519,15 +1527,20 @@ - (IBAction)handleAction:(id)sender {
         keyCode = Key::Y;
     else if (title == "J")
         keyCode = Key::J;
+    
+    // reload/refit
     else if (title == "L")
         keyCode = Key::L;
-    
     else if (title == "0")
         keyCode = Key::Num0;
+    
+    // mesh
     else if (title == "8")
         keyCode = Key::Num8;
     else if (title == "6")
         keyCode = Key::Num6;
+    else if (title == "T")
+        keyCode = Key::T;
    
     else if (title == "R")
         keyCode = Key::R;
@@ -1537,7 +1550,7 @@ - (IBAction)handleAction:(id)sender {
         keyCode = Key::B;
     else if (title == "A")
         keyCode = Key::A;
-   
+    
     if (keyCode >= 0)
         [self handleKey:keyCode isShiftKeyDown:isShiftKeyDown];
 }
@@ -1668,6 +1681,15 @@ - (bool)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
             isChanged = true;
             break;
         }
+        case Key::T: {
+            _showSettings->useTangent = !_showSettings->useTangent;
+            if (_showSettings->useTangent)
+                text = "Vertex Tangents";
+            else
+                text = "Fragment Tangents";
+            isChanged = true;
+            break;
+        }
         case Key::E: {
             _showSettings->advanceDebugMode(isShiftKeyDown);
             text = _showSettings->debugModeText();

From 1787e99d4b1b2b6565ec17735a0ac3a3417b63b7 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 9 Jun 2021 14:02:37 -0700
Subject: [PATCH 123/901] kramv - simplify normal lookup

---
 kramv/KramViewerMain.mm | 116 ++++++++++++++++------------------------
 1 file changed, 47 insertions(+), 69 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 94d00c25..fc28d890 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -2174,52 +2174,37 @@ - (BOOL)loadTextureFromFolder
         return NO;
     }
     
+    const char* ext = strrchr(filename, '.');
+    
     // first only do this on albedo/diffuse textures
-    string normalFilename;
     
-    string search;
-    bool isFound = false;
-    string::size_type searchPos;
+    // find matching png
+    string search = "-a";
+    search += ext;
     
-    if (isPNG) {
-        // find matching png
-        search = "-a.png";
-        searchPos = fullFilename.find(search);
-        isFound = searchPos != string::npos;
+    auto searchPos = fullFilename.find(search);
+    bool isFound = searchPos != string::npos;
+    
+    if (!isFound) {
+        search = "-d";
+        search += ext;
         
-        if (!isFound) {
-            search = "-d.png";
-            searchPos = fullFilename.find(search);
-            isFound = searchPos != string::npos;
-        }
-    }
-    else {
-        // find matching ktx/2
-        search = "-a.ktx";
         searchPos = fullFilename.find(search);
         isFound = searchPos != string::npos;
-        
-        if (!isFound) {
-            search = "-d.ktx";
-            searchPos = fullFilename.find(search);
-            isFound = searchPos != string::npos;
-        }
     }
     
     bool isSrgb = isFound;
     
+    string normalFilename;
+    bool hasNormal = false;
+    
     if (isFound) {
-        // stupid stl mods fullFilename in the replace if not a copy
         normalFilename = fullFilename;
+        normalFilename = normalFilename.erase(searchPos);
+        normalFilename += "-n";
+        normalFilename += ext;
         
-        // this won't work for mix of png/ktx files, but that's okay
-        normalFilename = normalFilename.replace(searchPos, search.length(), isPNG ? "-n.png" : "-n.ktx");
-        
-        isFound = [self findFilenameInFolders:normalFilename];
-        
-        if (!isFound) {
-            normalFilename.clear();
-        }
+        hasNormal = [self findFilenameInFolders:normalFilename];
     }
     
     //-------------------------------
@@ -2229,21 +2214,22 @@ - (BOOL)loadTextureFromFolder
     
     KTXImage imageNormal;
     KTXImageData imageNormalDataKTX;
-    bool hasNormal = false;
     
     // this requires decode and conversion to RGBA8u
     if (!imageDataKTX.open(fullFilename.c_str(), image)) {
         return NO;
     }
     
-    if (isFound && imageNormalDataKTX.open(normalFilename.c_str(), imageNormal)) {
-        
+    if (hasNormal && imageNormalDataKTX.open(normalFilename.c_str(), imageNormal)) {
         // shaders only pull from albedo + normal on these texture types
         if (imageNormal.textureType == image.textureType &&
             (imageNormal.textureType == MyMTLTextureType2D ||
              imageNormal.textureType == MyMTLTextureType2DArray))
         {
-            hasNormal = true;
+            //hasNormal = true;
+        }
+        else {
+            hasNormal = false;
         }
     }
     
@@ -2310,36 +2296,24 @@ - (BOOL)loadTextureFromArchive
     {
         return NO;
     }
-        
-    string normalFilename;
+     
+    const char* ext = strrchr(filename, '.');
+    
     
     // first only do this on albedo/diffuse textures
     
-    string search;
-    bool isFound = false;
-    string::size_type searchPos;
+    string search = "-a";
+    search += ext;
     
-    if (isPNG) {
-        search = "-a.png";
-        searchPos = fullFilename.find(search);
-        isFound = searchPos != string::npos;
+    auto searchPos = fullFilename.find(search);
+    bool isFound = searchPos != string::npos;
+    
+    if (!isFound) {
+        search = "-d";
+        search += ext;
         
-        if (!isFound) {
-            search = "-d.png";
-            searchPos = fullFilename.find(search);
-            isFound = searchPos != string::npos;
-        }
-    }
-    else {
-        search = "-a.ktx";
         searchPos = fullFilename.find(search);
         isFound = searchPos != string::npos;
-        
-        if (!isFound) {
-            search = "-d.ktx";
-            searchPos = fullFilename.find(search);
-            isFound = searchPos != string::npos;
-        }
     }
     
     bool isSrgb = isFound;
@@ -2358,14 +2332,16 @@ - (BOOL)loadTextureFromArchive
     }
      
     // search for normal map in the same archive
+    string normalFilename;
+    bool hasNormal = false;
+    
     if (isFound) {
         normalFilename = fullFilename;
+        normalFilename = normalFilename.erase(searchPos);
+        normalFilename += "-n";
+        normalFilename += ext;
         
-        normalFilename = normalFilename.replace(searchPos, search.length(), isPNG ? "-n.png" : "-n.ktx");
-        
-        if (!_zip.extractRaw(normalFilename.c_str(), &imageNormalData, imageNormalDataLength)) {
-            // ignore failure case here, this is just guessing there's a related normal file
-        }
+        hasNormal = _zip.extractRaw(normalFilename.c_str(), &imageNormalData, imageNormalDataLength);
     }
     
     //---------------------------
@@ -2382,15 +2358,17 @@ - (BOOL)loadTextureFromArchive
         return NO;
     }
     
-    bool hasNormal = false;
-    if (isFound && imageNormalDataKTX.open(imageNormalData, imageNormalDataLength, imageNormal)) {
-            
+    if (hasNormal && imageNormalDataKTX.open(imageNormalData, imageNormalDataLength, imageNormal)) {
+        
         // shaders only pull from albedo + normal on these texture types
         if (imageNormal.textureType == image.textureType &&
             (imageNormal.textureType == MyMTLTextureType2D ||
              imageNormal.textureType == MyMTLTextureType2DArray))
         {
-            hasNormal = true;
+            //hasNormal = true;
+        }
+        else {
+            hasNormal = false;
         }
     }
     

From f9bb8fb60ab38203b87833888c81b31b8a6be614 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 10 Jun 2021 21:16:03 -0700
Subject: [PATCH 124/901] kramv - add 4x aniso to preview, and add mip level
 shape channel

---
 kramv/KramRenderer.mm    |  1 +
 kramv/KramShaders.h      |  2 ++
 kramv/KramShaders.metal  | 51 +++++++++++++++++++++++++++++++++++++++-
 kramv/KramViewerBase.cpp |  1 +
 kramv/KramViewerBase.h   |  4 ++--
 5 files changed, 56 insertions(+), 3 deletions(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 55c586d7..78287aa4 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -160,6 +160,7 @@ - (void)_createSamplers
     samplerDescriptor.minFilter = MTLSamplerMinMagFilterLinear;
     samplerDescriptor.magFilter = MTLSamplerMinMagFilterLinear;
     samplerDescriptor.mipFilter = MTLSamplerMipFilterLinear;
+    samplerDescriptor.maxAnisotropy = 4; // 1,2,4,8,16 are choices
     
     samplerDescriptor.sAddressMode = MTLSamplerAddressModeClampToBorderColor;
     samplerDescriptor.tAddressMode = MTLSamplerAddressModeClampToBorderColor;
diff --git a/kramv/KramShaders.h b/kramv/KramShaders.h
index 554acdb4..f11491a4 100644
--- a/kramv/KramShaders.h
+++ b/kramv/KramShaders.h
@@ -107,6 +107,8 @@ typedef NS_ENUM(int32_t, ShaderShapeChannel)
     ShShapeChannelTangent,
     ShShapeChannelBitangent,
     
+    ShShapeChannelMipLevel,
+    
     // ShShapeChannelBumpNormal,
 };
 
diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index c1eff9ce..d71e86c4 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -144,6 +144,50 @@ float4 toFloat(half4 c)
 //-------------------------------------------
 // functions
 
+// https://bgolus.medium.com/anti-aliased-alpha-test-the-esoteric-alpha-to-coverage-8b177335ae4f
+float toMipLevel(float2 uv)
+{
+    float2 dx = dfdx(uv);
+    float2 dy = dfdy(uv);
+    
+    // a better approximation than fwidth
+    float deltaSquared = max(length_squared(dx), length_squared(dy));
+    
+    // 0.5 because squared, find mip level
+    return max(0.0, 0.5 * log2(deltaSquared));
+}
+
+// Also see here:
+// https://developer.nvidia.com/gpugems/gpugems2/part-iii-high-quality-rendering/chapter-28-mipmap-level-measurement
+//  100 percent, 25 percent, 6.3 percent, and 1.6 percent)
+
+float4 toMipLevelColor(float2 uv)
+{
+    // yellow, blue, green, red, black/transparent
+    // 1, 0.75, 0.5, 0.25, 0
+    // point sample from a texture with unique mip level colors
+    float lev = toMipLevel(uv);
+    float clev = saturate(lev / 4.0);
+    float alpha = saturate(1.0 - clev);
+    
+    const float3 colors[5] = {
+        float3(1,1,0), // yellow
+        float3(0,0,1), // blue
+        float3(0,1,0), // green
+        float3(1,0,0), // red
+        float3(0,0,0), // black
+    };
+    
+    float clev4 = clev * 4.0;
+    float3 low = colors[int(floor(clev4))];
+    float3 hi  = colors[int(round(clev4))];
+                  
+    float3 color = mix(low, hi, fract(clev4));
+    
+    // grayscale for now, but use colors so can see mips
+    return float4(color, alpha);
+}
+            
 // reconstruct normal from xy, n.z ignored
 float3 toNormal(float3 n)
 {
@@ -796,11 +840,16 @@ float4 DrawPixels(
             
             c.rgb = saturate(toUnorm(faceNormal));
         }
+        else if (uniforms.shapeChannel == ShShapeChannelMipLevel) {
+            c = toMipLevelColor(in.texCoord * textureSize.xy); // only for 2d textures
+        }
 //        else if (uniforms.shapeChannel == ShShapeChannelBumpNormal) {
 //            c.rgb = saturate(bumpNormal);
 //        }
         
-        c.a = 1.0;
+        if (uniforms.shapeChannel != ShShapeChannelMipLevel) {
+            c.a = 1.0;
+        }
     }
     
     // mask to see one channel in isolation, this is really 0'ing out other channels
diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index bf1f4cdc..2bdb8321 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -37,6 +37,7 @@ const char* ShowSettings::shapeChannelText() const {
         case ShapeChannelDepth: text = "Show Depth"; break;
         case ShapeChannelFaceNormal: text = "Show Faces"; break;
         //case ShapeChannelBumpNormal: text = "Show Bumps"; break;
+        case ShapeChannelMipLevel: text = "Show Mip Levels"; break;
         default: break;
     }
     
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index fa64a926..a0fc8eb1 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -62,11 +62,11 @@ enum ShapeChannel
     ShapeChannelTangent,
     ShapeChannelBitangent,
     
+    ShapeChannelMipLevel, // can estimate mip chose off dfdx/dfdy, and pseudocolor
+    
     // don't need bump, since can already see it, but what if combined diffuse + normal
     // ShapeChannelBumpNormal,
     
-    // ShapeChannelMipLevel, // can estimate mip chose off dfdx/dfdy, and pseudocolor
-    
     ShapeChannelCount
 };
 

From dbfcba74b3705c5983cc0ba05591e41a2539ab0a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 10 Jun 2021 21:30:02 -0700
Subject: [PATCH 125/901] kramv - use premul for mip level colors

---
 kramv/KramShaders.metal | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index d71e86c4..225918ba 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -138,6 +138,15 @@ float4 toFloat(half4 c)
     return float4(c);
 }
 
+float4 toPremul(float4 c) {
+    c.rgb *= c.a;
+    return c;
+}
+half4 toPremul(half4 c) {
+    c.rgb *= c.a;
+    return c;
+}
+
 // TODO: note that Metal must pass the same half3 from vertex to fragment shader
 // so can't mix a float vs with half fs.
 
@@ -182,10 +191,10 @@ float4 toMipLevelColor(float2 uv)
     float3 low = colors[int(floor(clev4))];
     float3 hi  = colors[int(round(clev4))];
                   
+    // lerp in unmul space
     float3 color = mix(low, hi, fract(clev4));
     
-    // grayscale for now, but use colors so can see mips
-    return float4(color, alpha);
+    return toPremul(float4(color, alpha));
 }
             
 // reconstruct normal from xy, n.z ignored

From 7a8ab10aa8d6ee4780da1ce2d322337b41e77a33 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 13 Jun 2021 13:55:11 -0700
Subject: [PATCH 126/901] kramv - add render sampler to see pixels from
 drawable in shapeChannel, show all, preview, etc

---
 kramv/KramRenderer.mm    | 172 +++++++++++++--------
 kramv/KramShaders.metal  |   7 +-
 kramv/KramViewerBase.cpp |   5 +
 kramv/KramViewerBase.h   |  10 +-
 kramv/KramViewerMain.mm  | 314 ++++++++++++++++++++++++++-------------
 5 files changed, 342 insertions(+), 166 deletions(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 78287aa4..a59d21ae 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -30,50 +30,52 @@
 @implementation Renderer
 {
     dispatch_semaphore_t _inFlightSemaphore;
-    id <MTLDevice> _device;
-    id <MTLCommandQueue> _commandQueue;
-
-    id <MTLBuffer> _dynamicUniformBuffer[MaxBuffersInFlight];
-    
-    id <MTLRenderPipelineState> _pipelineState1DArray;
-    id <MTLRenderPipelineState> _pipelineStateImage;
-    id <MTLRenderPipelineState> _pipelineStateImageArray;
-    id <MTLRenderPipelineState> _pipelineStateCube;
-    id <MTLRenderPipelineState> _pipelineStateCubeArray;
-    id <MTLRenderPipelineState> _pipelineStateVolume;
-    
-    id <MTLComputePipelineState> _pipelineState1DArrayCS;
-    id <MTLComputePipelineState> _pipelineStateImageCS;
-    id <MTLComputePipelineState> _pipelineStateImageArrayCS;
-    id <MTLComputePipelineState> _pipelineStateCubeCS;
-    id <MTLComputePipelineState> _pipelineStateCubeArrayCS;
-    id <MTLComputePipelineState> _pipelineStateVolumeCS;
-    
-    id <MTLDepthStencilState> _depthStateFull;
-    id <MTLDepthStencilState> _depthStateNone;
+    id<MTLDevice> _device;
+    id<MTLCommandQueue> _commandQueue;
+
+    id<MTLBuffer> _dynamicUniformBuffer[MaxBuffersInFlight];
+    
+    id<MTLRenderPipelineState> _pipelineState1DArray;
+    id<MTLRenderPipelineState> _pipelineStateImage;
+    id<MTLRenderPipelineState> _pipelineStateImageArray;
+    id<MTLRenderPipelineState> _pipelineStateCube;
+    id<MTLRenderPipelineState> _pipelineStateCubeArray;
+    id<MTLRenderPipelineState> _pipelineStateVolume;
+    
+    id<MTLComputePipelineState> _pipelineState1DArrayCS;
+    id<MTLComputePipelineState> _pipelineStateImageCS;
+    id<MTLComputePipelineState> _pipelineStateImageArrayCS;
+    id<MTLComputePipelineState> _pipelineStateCubeCS;
+    id<MTLComputePipelineState> _pipelineStateCubeArrayCS;
+    id<MTLComputePipelineState> _pipelineStateVolumeCS;
+    
+    id<MTLDepthStencilState> _depthStateFull;
+    id<MTLDepthStencilState> _depthStateNone;
    
     MTLVertexDescriptor *_mtlVertexDescriptor;
 
     // TODO: Array< id<MTLTexture> > _textures;
-    id <MTLTexture> _colorMap;
-    id <MTLTexture> _normalMap;
+    id<MTLTexture> _colorMap;
+    id<MTLTexture> _normalMap;
+    id<MTLTexture> _lastDrawableTexture;
     
     // border is a better edge sample, but at edges it filters in the transparent color
     // around the border which is undesirable.  It would be better if the hw did
     // clamp to edge until uv outside 0 to 1.  This results in having to inset the uv by 0.5 px
     // to avoid this artifact, but on small texturs that are 4x4, a 1 px inset is noticeable.
     
-    id <MTLSamplerState> _colorMapSamplerNearestWrap;
-    id <MTLSamplerState> _colorMapSamplerNearestBorder;
-    id <MTLSamplerState> _colorMapSamplerNearestEdge;
+    id<MTLSamplerState> _colorMapSamplerNearestWrap;
+    id<MTLSamplerState> _colorMapSamplerNearestBorder;
+    id<MTLSamplerState> _colorMapSamplerNearestEdge;
     
-    id <MTLSamplerState> _colorMapSamplerFilterWrap;
-    id <MTLSamplerState> _colorMapSamplerFilterBorder;
-    id <MTLSamplerState> _colorMapSamplerFilterEdge;
+    id<MTLSamplerState> _colorMapSamplerFilterWrap;
+    id<MTLSamplerState> _colorMapSamplerFilterBorder;
+    id<MTLSamplerState> _colorMapSamplerFilterEdge;
     
     //id<MTLTexture> _sampleRT;
-    id<MTLTexture> _sampleTex;
-    
+    id<MTLTexture> _sampleComputeTex;
+    id<MTLTexture> _sampleRenderTex;
+   
     uint8_t _uniformBufferIndex;
 
     float4x4 _projectionMatrix;
@@ -423,12 +425,23 @@ - (void)_createRenderPipelines:(MTKView*)view
 
 - (void)_createSampleRender
 {
-    // writing to this texture
-    MTLTextureDescriptor* textureDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA32Float width:1 height:1 mipmapped:NO];
+    {
+        // writing to this texture
+        MTLTextureDescriptor* textureDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA32Float width:1 height:1 mipmapped:NO];
+        
+        textureDesc.usage = MTLTextureUsageShaderWrite | MTLTextureUsageShaderRead;
+        textureDesc.storageMode = MTLStorageModeManaged;
+        _sampleComputeTex = [_device newTextureWithDescriptor:textureDesc];
+    }
     
-    textureDesc.usage = MTLTextureUsageShaderWrite | MTLTextureUsageShaderRead;
-    textureDesc.storageMode = MTLStorageModeManaged;
-    _sampleTex = [_device newTextureWithDescriptor:textureDesc];
+    {
+        // this must match drawable format due to using a blit to copy pixel out of drawable
+        MTLTextureDescriptor* textureDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA16Float width:1 height:1 mipmapped:NO];
+        //textureDesc.usage = MTLTextureUsageShaderWrite | MTLTextureUsageShaderRead;
+        textureDesc.storageMode = MTLStorageModeManaged;
+
+        _sampleRenderTex = [_device newTextureWithDescriptor:textureDesc];
+    }
 }
 
 - (MTKMesh*)_createMeshAsset:(const char*)name mdlMesh:(MDLMesh*)mdlMesh doFlipUV:(bool)doFlipUV
@@ -916,7 +929,7 @@ - (void)updateImageSettings:(const string&)fullFilename image:(KTXImage&)image
 //    }
         
     // can derive these from texture queries
-    _showSettings->maxLOD = (int32_t)image.header.numberOfMipmapLevels;
+    _showSettings->mipCount = (int32_t)image.header.numberOfMipmapLevels;
     _showSettings->faceCount = (image.textureType == MyMTLTextureTypeCube ||
                                image.textureType == MyMTLTextureTypeCubeArray) ? 6 : 0;
     _showSettings->arrayCount = (int32_t)image.header.numberOfArrayElements;
@@ -931,7 +944,7 @@ - (void)resetSomeImageSettings:(BOOL)isNewFile {
     // only reset these on new texture, but have to revalidate
     if (isNewFile) {
         // then can manipulate this after loading
-        _showSettings->mipLOD = 0;
+        _showSettings->mipNumber = 0;
         _showSettings->faceNumber = 0;
         _showSettings->arrayNumber = 0;
         _showSettings->sliceNumber = 0;
@@ -947,7 +960,7 @@ - (void)resetSomeImageSettings:(BOOL)isNewFile {
     }
     else {
         // reloaded file may have different limits
-        _showSettings->mipLOD = std::min(_showSettings->mipLOD, _showSettings->maxLOD);
+        _showSettings->mipNumber = std::min(_showSettings->mipNumber, _showSettings->mipCount);
         _showSettings->faceNumber = std::min(_showSettings->faceNumber, _showSettings->faceCount);
         _showSettings->arrayNumber = std::min(_showSettings->arrayNumber, _showSettings->arrayCount);
         _showSettings->sliceNumber = std::min(_showSettings->sliceNumber, _showSettings->sliceCount);
@@ -1257,6 +1270,9 @@ - (void)drawInMTKView:(nonnull MTKView *)view
         
         [self drawMain:commandBuffer view:view];
         
+        // hold onto this for sampling from it via eyedropper
+        _lastDrawableTexture = view.currentDrawable.texture;
+        
         [commandBuffer presentDrawable:view.currentDrawable];
         [commandBuffer commit];
     }
@@ -1389,7 +1405,7 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer view:(nonnull MTKView *)vie
         
         if (_showSettings->isPreview) {
             // upload this on each face drawn, since want to be able to draw all mips/levels at once
-            [self _setUniformsLevel:uniformsLevel mipLOD:_showSettings->mipLOD];
+            [self _setUniformsLevel:uniformsLevel mipLOD:_showSettings->mipNumber];
             
             [renderEncoder setVertexBytes:&uniformsLevel
                                     length:sizeof(uniformsLevel)
@@ -1420,7 +1436,7 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer view:(nonnull MTKView *)vie
             // gap the contact sheet, note this 2 pixels is scaled on small textures by the zoom
             int32_t gap = _showSettings->showAllPixelGap; // * _showSettings->viewContentScaleFactor;
             
-            for (int32_t mip = 0; mip < _showSettings->maxLOD; ++mip) {
+            for (int32_t mip = 0; mip < _showSettings->mipCount; ++mip) {
                 
                 // upload this on each face drawn, since want to be able to draw all mips/levels at once
                 [self _setUniformsLevel:uniformsLevel mipLOD:mip];
@@ -1485,7 +1501,7 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer view:(nonnull MTKView *)vie
             }
         }
         else {
-            int32_t mip = _showSettings->mipLOD;
+            int32_t mip = _showSettings->mipNumber;
             
             // upload this on each face drawn, since want to be able to draw all mips/levels at once
             [self _setUniformsLevel:uniformsLevel mipLOD:mip];
@@ -1531,11 +1547,6 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer view:(nonnull MTKView *)vie
 // want to run samples independent of redrawing the main view
 - (void)drawSample
 {
-    // Note: this is failing when running via Cmake
-    bool doSample = true;
-    if (!doSample) {
-        return;
-    }
     if (_colorMap == nil) {
         return;
     }
@@ -1546,25 +1557,57 @@ - (void)drawSample
     
     commandBuffer.label = @"MyCommand";
 
+    // this reads directly from compressed texture via a compute shader
     int32_t textureLookupX = _showSettings->textureLookupX;
     int32_t textureLookupY = _showSettings->textureLookupY;
     
-    int32_t textureLookupMipX = _showSettings->textureLookupMipX;
-    int32_t textureLookupMipY = _showSettings->textureLookupMipY;
-    
-    [self drawSamples:commandBuffer lookupX:textureLookupMipX lookupY:textureLookupMipY];
+    bool isDrawableBlit = _showSettings->isEyedropperFromDrawable();
     
-    // Synchronize the managed texture.
-    id <MTLBlitCommandEncoder> blitCommandEncoder = [commandBuffer blitCommandEncoder];
-    if (blitCommandEncoder) {
-        [blitCommandEncoder synchronizeResource:_sampleTex];
-        [blitCommandEncoder endEncoding];
+    // TODO: only don't blit for plane + no debug or shape
+    // otherwise want the pixel under the cursor, but this may include grid mixed in and other debug overlays
+    if (isDrawableBlit) {
+        MTLOrigin srcOrigin = MTLOriginMake(_showSettings->cursorX, _showSettings->cursorY, 0);
+        srcOrigin.x *= _showSettings->viewContentScaleFactor;
+        srcOrigin.y *= _showSettings->viewContentScaleFactor;
+       
+        // Note: here we don't know the uv in original texture, would have to write that out to another
+        // texture.  Also on shapes, texel may not change but lighting might.
+        
+        // can simply blit the color out of the render buffer
+        id <MTLBlitCommandEncoder> blitCommandEncoder = [commandBuffer blitCommandEncoder];
+        if (blitCommandEncoder) {
+            [blitCommandEncoder copyFromTexture:_lastDrawableTexture
+                                    sourceSlice:0 sourceLevel:0 sourceOrigin:srcOrigin sourceSize:MTLSizeMake(1,1,1)
+                                      toTexture:_sampleRenderTex
+                               destinationSlice:0 destinationLevel:0 destinationOrigin:MTLOriginMake(0,0,0)
+            ];
+            [blitCommandEncoder synchronizeResource:_sampleRenderTex];
+            [blitCommandEncoder endEncoding];
+        }
+    }
+    else {
+        
+        int32_t textureLookupMipX = _showSettings->textureLookupMipX;
+        int32_t textureLookupMipY = _showSettings->textureLookupMipY;
+        
+        [self drawSamples:commandBuffer lookupX:textureLookupMipX lookupY:textureLookupMipY];
+        
+        // Synchronize the managed texture.
+        id <MTLBlitCommandEncoder> blitCommandEncoder = [commandBuffer blitCommandEncoder];
+        if (blitCommandEncoder) {
+            [blitCommandEncoder synchronizeResource:_sampleComputeTex];
+            [blitCommandEncoder endEncoding];
+        }
     }
     
     // After synchonization, copy value back to the cpu
-    id<MTLTexture> texture = _sampleTex;
-    [commandBuffer addCompletedHandler:^(id<MTLCommandBuffer> /* buffer */)
+    id<MTLTexture> texture = isDrawableBlit ? _sampleRenderTex : _sampleComputeTex;
+    
+    [commandBuffer addCompletedHandler:^(id<MTLCommandBuffer> buffer)
     {
+        if (buffer.error != nil) {
+            return;
+        }
         // only 1 pixel in the texture right now
         float4 data;
         
@@ -1574,7 +1617,14 @@ - (void)drawSample
             { 1, 1, 1 }  // MTLSize
         };
         
-        [texture getBytes:&data bytesPerRow:16 fromRegion:region mipmapLevel:0];
+        if (isDrawableBlit) {
+            half4 data16f;
+            [texture getBytes:&data16f bytesPerRow:8 fromRegion:region mipmapLevel:0];
+            data = toFloat4(data16f);
+        }
+        else {
+            [texture getBytes:&data bytesPerRow:16 fromRegion:region mipmapLevel:0];
+        }
         
         // return the value at the sample
         _showSettings->textureResult = data;
@@ -1608,7 +1658,7 @@ - (void)drawSamples:(id<MTLCommandBuffer>)commandBuffer lookupX:(int32_t)lookupX
     if (_showSettings->sliceNumber) {
         uniforms.arrayOrSlice = _showSettings->sliceNumber;
     }
-    uniforms.mipLOD = _showSettings->mipLOD;
+    uniforms.mipLOD = _showSettings->mipNumber;
     
     // run compute here, don't need a shape
     switch(_colorMap.textureType) {
@@ -1642,7 +1692,7 @@ - (void)drawSamples:(id<MTLCommandBuffer>)commandBuffer lookupX:(int32_t)lookupX
     [renderEncoder setTexture:_colorMap
                               atIndex:TextureIndexColor];
     
-    [renderEncoder setTexture:_sampleTex
+    [renderEncoder setTexture:_sampleComputeTex
                       atIndex:TextureIndexSamples];
     
     [renderEncoder setBytes:&uniforms length:sizeof(UniformsCS) atIndex:BufferIndexUniformsCS];
diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index 225918ba..0af5919a 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -270,10 +270,11 @@ half3 transformNormalByBasis(half3 bumpNormal, half3 vertexNormal, float3 worldP
     float2 duv1 = dfdx(uv);
     float2 duv2 = dfdy(uv);
 
-    // getting non-zere uv with 0 length duv1/2 on MBP 16", this leaves missing bump artifacts
+    // getting non-zero uv with 0 length duv1/2 on MBP 16", this leaves missing bump artifacts
     // in large triangle error so this is a patch to avoid that.
-    if ((length_squared(duv1) < 1e-12) &&
-        (length_squared(duv2) < 1e-12)) {
+    if ((length_squared(duv1) < 1e-10) &&
+        (length_squared(duv2) < 1e-10)) {
+        //return 0.0h; // flag pixels with no bump
         return vertexNormal;
     }
     
diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index 2bdb8321..d53fac5f 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -62,6 +62,11 @@ const char* ShowSettings::debugModeText() const {
     return text;
 }
 
+bool ShowSettings::isEyedropperFromDrawable() {
+    return meshNumber > 0 || isPreview || isShowingAllLevelsAndMips || shapeChannel > 0;
+}
+
+
 void ShowSettings::advanceMeshNumber(bool decrement) {
     int32_t numEnums = meshCount;
     int32_t number = meshNumber;
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index a0fc8eb1..f219534d 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -80,8 +80,8 @@ class ShowSettings {
     int32_t showAllPixelGap = 2;
     
     // These control which texture is viewed in single texture mode
-    int32_t mipLOD = 0;
-    int32_t maxLOD = 1;
+    int32_t mipNumber = 0;
+    int32_t mipCount = 1;
     
     int32_t faceNumber = 0;
     int32_t faceCount = 0;
@@ -142,6 +142,9 @@ class ShowSettings {
     // whether files are pulled from folder(s)
     bool isFolder = false;
     
+    // can sample from drawable or from single source texture
+    bool isEyedropperFromDrawable();
+    
     // can have up to 5 channels (xyz as xy, 2 other channels)
     int32_t numChannels = 0;
     
@@ -161,6 +164,9 @@ class ShowSettings {
     int32_t textureLookupX = 0;
     int32_t textureLookupY = 0;
     
+    int32_t lastCursorX = 0;
+    int32_t lastCursorY = 0;
+    
     // exact pixel in the mip level
     int32_t textureLookupMipX = 0;
     int32_t textureLookupMipY = 0;
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index fc28d890..0b26aa62 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -710,7 +710,7 @@ - (void)doZoomMath:(float)newZoom newPan:(float2&)newPan {
     float minY = -0.5f;
     if (_showSettings->isShowingAllLevelsAndMips) {
         maxX += 1.0f * (_showSettings->totalChunks() - 1);
-        minY -= 1.0f * (_showSettings->maxLOD - 1);
+        minY -= 1.0f * (_showSettings->mipCount - 1);
     }
     
     // that's in model space (+/0.5f, +/0.5f), so convert to texture space
@@ -792,7 +792,7 @@ - (void)handleGesture:(NSGestureRecognizer *)gestureRecognizer
     CGRect viewRect = CGRectMake(-1.0f, -1.0f, 2.0f, 2.0f);
    
     int32_t numTexturesX = _showSettings->totalChunks();
-    int32_t numTexturesY = _showSettings->maxLOD;
+    int32_t numTexturesY = _showSettings->mipCount;
     
     if (_showSettings->isShowingAllLevelsAndMips) {
         imageRect.origin.y -= (numTexturesY - 1 ) * imageRect.size.height;
@@ -906,6 +906,12 @@ float4 toSnorm8(float4 c)
     return (255.0 / 127.0) * c - (128 / 127.0);
 }
 
+float4 toSnorm(float4 c)
+{
+    return 2.0f * c - 1.0f;
+}
+
+
 
 - (void)updateEyedropper {
     if ((!_showSettings->isHudShown)) {
@@ -919,6 +925,32 @@ - (void)updateEyedropper {
     
     // don't wait on renderer to update this matrix
     Renderer* renderer = (Renderer*)self.delegate;
+    
+    if (_showSettings->isEyedropperFromDrawable()) {
+        // this only needs the cursor location, but can't supply uv to displayPixelData
+        
+        if (_showSettings->lastCursorX != _showSettings->cursorX ||
+            _showSettings->lastCursorY != _showSettings->cursorY)
+        {
+            // TODO: this means pan/zoom doesn't update data, may want to track some absolute
+            // location in virtal canvas.
+            
+            _showSettings->lastCursorX = _showSettings->cursorX;
+            _showSettings->lastCursorY = _showSettings->cursorY;
+            
+            // This just samples from drawable, so no re-render is needed
+            [self showEyedropperData:float2m(0,0)];
+            
+            // TODO: remove this, but only way to get drawSamples to execute right now, but then
+            // entire texture re-renders and that's not power efficient.  Really just want to sample
+            // from the already rendered texture since content isn't animated.
+            
+            self.needsDisplay = YES;
+        }
+        
+        return;
+    }
+        
     float4x4 projectionViewModelMatrix = [renderer computeImageTransform:_showSettings->panX panY:_showSettings->panY zoom:_showSettings->zoom];
 
     // convert to clip space, or else need to apply additional viewport transform
@@ -952,8 +984,7 @@ - (void)updateEyedropper {
     pixel.x *= 0.999f;
     pixel.y *= 0.999f;
     
-    float uvX = pixel.x;
-    float uvY = pixel.y;
+    float2 uv = pixel.xy;
     
     // pixels are 0 based
     pixel.x *= _showSettings->imageBoundsX;
@@ -962,7 +993,7 @@ - (void)updateEyedropper {
 // TODO: finish this logic, need to account for gaps too, and then isolate to a given level and mip to sample
 //    if (_showSettings->isShowingAllLevelsAndMips) {
 //        pixel.x *= _showSettings->totalChunks();
-//        pixel.y *= _showSettings->maxLOD;
+//        pixel.y *= _showSettings->mipCount;
 //    }
     
     // TODO: clearing out the last px visited makes it hard to gather data
@@ -985,6 +1016,7 @@ - (void)updateEyedropper {
         return;
     }
 
+    
     // Note: fromView: nil returns isFlipped coordinate, fromView:self flips it back.
 
     int32_t newX = (int32_t)pixel.x;
@@ -996,21 +1028,96 @@ - (void)updateEyedropper {
         // Note: this only samples from the original texture via compute shaders
         // so preview mode pixel colors are not conveyed.  But can see underlying data driving preview.
         
-        MyMTLPixelFormat format = (MyMTLPixelFormat)_showSettings->originalFormat;
-        
-        // DONE: use these to format the text
-        bool isSrgb = isSrgbFormat(format);
-        bool isSigned = isSignedFormat(format);
-        bool isHdr = isHdrFormat(format);
-        int32_t numChannels = _showSettings->numChannels;
-        
         // %.0f rounds the value, but want truncation
         _showSettings->textureLookupX = newX;
         _showSettings->textureLookupY = newY;
+
+        [self showEyedropperData:uv];
+        
+        // TODO: remove this, but only way to get drawSamples to execute right now, but then
+        // entire texture re-renders and that's not power efficient.
+        self.needsDisplay = YES;
+
+    }
+}
+
+- (void)showEyedropperData:(float2)uv {
+    string text;
+    string tmp;
+    
+    float4 c = _showSettings->textureResult;
+    
+    // DONE: use these to format the text
+    MyMTLPixelFormat format = _showSettings->originalFormat;
+    bool isSrgb = isSrgbFormat(format);
+    bool isSigned = isSignedFormat(format);
+    
+    bool isHdr = isHdrFormat(format);
+    bool isFloat = isHdr;
+    
+    int32_t numChannels = _showSettings->numChannels;
+    
+    bool isNormal = _showSettings->isNormal;
+    bool isColor = !isNormal;
+    
+    bool isDirection = false;
+    bool isValue = false;
+    
+    if (_showSettings->isEyedropperFromDrawable()) {
         
+        // TODO: could write barycentric, then lookup uv from that
+        // then could show the block info.
+        
+        // interpret based on shapeChannel, debugMode, etc
+        switch(_showSettings->shapeChannel) {
+            case ShapeChannelDepth:
+                isValue = true;
+                isFloat = true;
+                numChannels = 1;
+                break;
+            case ShapeChannelUV0:
+                isValue = true;
+                isSigned = true;
+                numChannels = 2; // TODO: fix for 3d uvw
+                isFloat = true;
+                break;
+                
+            case ShapeChannelFaceNormal:
+            case ShapeChannelNormal:
+            case ShapeChannelTangent:
+            case ShapeChannelBitangent:
+                isSigned = false; // writing to 16f as unorm, so need conversion below
+                isDirection = true;
+                numChannels = 3;
+            
+                // convert unorm to snnorm
+                c = toSnorm(c);
+                break;
+                
+            case ShapeChannelMipLevel:
+                isValue = true;
+                isSigned = false;
+                isFloat = true;
+                
+                // viz is mipNumber as alpha
+                numChannels = 1;
+                c.r = 4.0 - (c.a * 4.0);
+                break;
+                
+            default:
+                break;
+        }
+        
+        // debug mode
+        
+        // preview vs. not
+        
+        
+    }
+    else {
+    
         // this will be out of sync with gpu eval, so may want to only display px from returned lookup
         // this will always be a linear color
-        float4 c = _showSettings->textureResult;
         
         int32_t x = _showSettings->textureResultX;
         int32_t y = _showSettings->textureResultY;
@@ -1025,10 +1132,7 @@ - (void)updateEyedropper {
         append_sprintf(text, "px:%d %d\n", x, y);
         
         // show block num
-        int mipLOD = _showSettings->mipLOD;
-        
-        // TODO: these block numbers are not accurate on Toof at 4x4
-        // there is resizing going on to the dimensions
+        int mipLOD = _showSettings->mipNumber;
         
         int mipX = _showSettings->imageBoundsX;
         int mipY = _showSettings->imageBoundsY;
@@ -1039,8 +1143,8 @@ - (void)updateEyedropper {
         mipX = std::max(1, mipX);
         mipY = std::max(1, mipY);
         
-        mipX = (int32_t)(uvX * mipX);
-        mipY = (int32_t)(uvY * mipY);
+        mipX = (int32_t)(uv.x * mipX);
+        mipY = (int32_t)(uv.y * mipY);
         
         _showSettings->textureLookupMipX = mipX;
         _showSettings->textureLookupMipY = mipY;
@@ -1063,96 +1167,102 @@ - (void)updateEyedropper {
         
         // TODO: more criteria here, can have 2 channel PBR metal-roughness
         // also have 4 channel normals where zw store other data.
-        bool isNormal = _showSettings->isNormal;
-        bool isFloat = isHdr;
         
         bool isDecodeSigned = isSignedFormat(_showSettings->decodedFormat);
         if (isSigned && !isDecodeSigned) {
             c = toSnorm8(c);
         }
+    }
+
+    if (isValue) {
+        printChannels(tmp, "val: ", c, numChannels, isFloat, isSigned);
+        text += tmp;
+    }
+    else if (isDirection) {
+        // print direction
+        isFloat = true;
+        isSigned = true;
         
-        if (isNormal) {
-            float nx = c.x;
-            float ny = c.y;
-            
-            // unorm -> snorm
-            if (!isSigned) {
-                nx = toSnorm8(nx);
-                ny = toSnorm8(ny);
-            }
-            
-            // Note: not clamping nx,ny to < 1 like in shader
-            
-            // this is always postive on tan-space normals
-            // assuming we're not viewing world normals
-            const float maxLen2 = 0.999 * 0.999;
-            float len2 = nx * nx + ny * ny;
-            if (len2 > maxLen2)
-                len2 = maxLen2;
-            
-            float nz = sqrt(1.0f - len2);
-            
-            // print the underlying color (some nmaps are xy in 4 channels)
-            string tmp;
-            printChannels(tmp, "ln: ", c, numChannels, isFloat, isSigned);
-            text += tmp;
+        printChannels(tmp, "dir: ", c, numChannels, isFloat, isSigned);
+        text += tmp;
+    }
+    else if (isNormal) {
+        float nx = c.x;
+        float ny = c.y;
+        
+        // unorm -> snorm
+        if (!isSigned) {
+            nx = toSnorm8(nx);
+            ny = toSnorm8(ny);
+        }
+        
+        // Note: not clamping nx,ny to < 1 like in shader
+        
+        // this is always postive on tan-space normals
+        // assuming we're not viewing world normals
+        const float maxLen2 = 0.999 * 0.999;
+        float len2 = nx * nx + ny * ny;
+        if (len2 > maxLen2)
+            len2 = maxLen2;
+        
+        float nz = sqrt(1.0f - len2);
+        
+        // print the underlying color (some nmaps are xy in 4 channels)
+        printChannels(tmp, "lin: ", c, numChannels, isFloat, isSigned);
+        text += tmp;
+        
+        // print direction
+        float4 d = float4m(nx,ny,nz,0.0f);
+        isFloat = true;
+        isSigned = true;
+        printChannels(tmp, "dir: ", d, 3, isFloat, isSigned);
+        text += tmp;
+    }
+    else if (isColor) {
+        // DONE: write some print helpers based on float4 and length
+        printChannels(tmp, "lin: ", c, numChannels, isFloat, isSigned);
+        text += tmp;
+        
+        if (isSrgb) {
+            // this saturates the value, so don't use for extended srgb
+            float4 s = linearToSRGB(c);
             
-            // print direction
-            float4 d = float4m(nx,ny,nz,0.0f);
-            isFloat = true;
-            isSigned = true;
-            printChannels(tmp, "dr: ", d, 3, isFloat, isSigned);
+            printChannels(tmp, "srg: ", s, numChannels, isFloat, isSigned);
             text += tmp;
         }
-        else {
-            // DONE: write some print helpers based on float4 and length
-            string tmp;
-            printChannels(tmp, "ln: ", c, numChannels, isFloat, isSigned);
+        
+        // display the premul values too, but not fully transparent pixels
+        if (c.a > 0.0 && c.a < 1.0f)
+        {
+            printChannels(tmp, "lnp: ", toPremul(c), numChannels, isFloat, isSigned);
             text += tmp;
             
+            // TODO: do we need the premul srgb color too?
             if (isSrgb) {
                 // this saturates the value, so don't use for extended srgb
                 float4 s = linearToSRGB(c);
                 
-                printChannels(tmp, "sr: ", s, numChannels, isFloat, isSigned);
-                text += tmp;
-            }
-            
-            // display the premul values too, but not fully transparent pixels
-            if (c.a > 0.0 && c.a < 1.0f)
-            {
-                printChannels(tmp, "lnp: ", toPremul(c), numChannels, isFloat, isSigned);
+                printChannels(tmp, "srp: ", toPremul(s), numChannels, isFloat, isSigned);
                 text += tmp;
-                
-                // TODO: do we need the premul srgb color too?
-                if (isSrgb) {
-                    // this saturates the value, so don't use for extended srgb
-                    float4 s = linearToSRGB(c);
-                    
-                    printChannels(tmp, "srp: ", toPremul(s), numChannels, isFloat, isSigned);
-                    text += tmp;
-                }
             }
         }
-        
-        [self setEyedropperText:text.c_str()];
+    }
+    
+    [self setEyedropperText:text.c_str()];
+
+    // TODO: range display of pixels is useful, only showing pixels that fall
+    // within a given range, but would need slider then, and determine range of pixels.
+    // TODO: Auto-range is also useful for depth (ignore far plane of 0 or 1).
+    
+    // TOOD: display histogram from compute, bin into buffer counts of pixels
+    
+    // DONE: stop clobbering hud text, need another set of labels
+    // and a zoom preview of the pixels under the cursor.
+    // Otherwise, can't really see the underlying color.
+    
+    // TODO: Stuff these on clipboard with a click, or use cmd+C?
 
-        // TODO: range display of pixels is useful, only showing pixels that fall
-        // within a given range, but would need slider then, and determine range of pixels.
-        // TODO: Auto-range is also useful for depth (ignore far plane of 0 or 1).
-        
-        // TOOD: display histogram from compute, bin into buffer counts of pixels
-        
-        // DONE: stop clobbering hud text, need another set of labels
-        // and a zoom preview of the pixels under the cursor.
-        // Otherwise, can't really see the underlying color.
-        
-        // TODO: Stuff these on clipboard with a click, or use cmd+C?
 
-        // TODO: remove this, but only way to get drawSamples to execute right now, but then
-        // entire texture re-renders and that's not power efficient.
-        self.needsDisplay = YES;
-    }
 }
 
 - (void)setEyedropperText:(const char*)text {
@@ -1226,7 +1336,7 @@ - (void)scrollWheel:(NSEvent *)event
     CGRect viewRect = CGRectMake(-1.0f, -1.0f, 2.0f, 2.0f);
    
     int32_t numTexturesX = _showSettings->totalChunks();
-    int32_t numTexturesY = _showSettings->maxLOD;
+    int32_t numTexturesY = _showSettings->mipCount;
     
     if (_showSettings->isShowingAllLevelsAndMips) {
         imageRect.origin.y -= (numTexturesY - 1 ) * imageRect.size.height;
@@ -1298,11 +1408,11 @@ - (void)updateUIAfterLoad {
     // here and in HandleKey.
     
     // base on showSettings, hide some fo the buttons
-    bool isShowAllHidden = _showSettings->totalChunks() <= 1 && _showSettings->maxLOD <= 1;
+    bool isShowAllHidden = _showSettings->totalChunks() <= 1 && _showSettings->mipCount <= 1;
     
     bool isArrayHidden = _showSettings->arrayCount <= 1;
     bool isFaceSliceHidden = _showSettings->faceCount <= 1 && _showSettings->sliceCount <= 1;
-    bool isMipHidden = _showSettings->maxLOD <= 1;
+    bool isMipHidden = _showSettings->mipCount <= 1;
     
     bool isJumpToNextHidden = !(_showSettings->isArchive || _showSettings->isFolder);
     
@@ -1388,7 +1498,7 @@ - (void)updateUIControlState
     
     auto arrayState = toState(_showSettings->arrayNumber > 0);
     auto faceState = toState(_showSettings->faceNumber > 0);
-    auto mipState = toState(_showSettings->mipLOD > 0);
+    auto mipState = toState(_showSettings->mipNumber > 0);
     
     auto meshState = toState(_showSettings->meshNumber > 0);
     auto meshChannelState = toState(_showSettings->shapeChannel > 0);
@@ -1723,7 +1833,7 @@ - (bool)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
             // This zoom needs to be checked against zoom limits
             // there's a cap on the zoom multiplier.
             // This is reducing zoom which expands the image.
-            zoom *= 1.0f / (1 << _showSettings->mipLOD);
+            zoom *= 1.0f / (1 << _showSettings->mipNumber);
             
             // even if zoom same, still do this since it resets the pan
             _showSettings->zoom = zoom;
@@ -1937,14 +2047,14 @@ - (bool)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
             
         // mip up/down
         case Key::M:
-            if (_showSettings->maxLOD > 1) {
+            if (_showSettings->mipCount > 1) {
                 if (isShiftKeyDown) {
-                    _showSettings->mipLOD = MAX(_showSettings->mipLOD - 1, 0);
+                    _showSettings->mipNumber = MAX(_showSettings->mipNumber - 1, 0);
                 }
                 else {
-                    _showSettings->mipLOD = MIN(_showSettings->mipLOD + 1, _showSettings->maxLOD - 1);
+                    _showSettings->mipNumber = MIN(_showSettings->mipNumber + 1, _showSettings->mipCount - 1);
                 }
-                sprintf(text, "Mip %d/%d", _showSettings->mipLOD, _showSettings->maxLOD);
+                sprintf(text, "Mip %d/%d", _showSettings->mipNumber, _showSettings->mipCount);
                 isChanged = true;
             }
             break;
@@ -2752,7 +2862,11 @@ - (void)viewDidLoad
     [super viewDidLoad];
 
     _view = (MyMTKView *)self.view;
-
+    
+    // have to disable this since reading back from textures
+    // that slows the blit to the screen
+    _view.framebufferOnly = NO;
+    
     _view.device = MTLCreateSystemDefaultDevice();
 
     if(!_view.device)

From f4a52142934ac6eab3f3bc2e35eebcc836b8d637 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 13 Jun 2021 17:09:49 -0700
Subject: [PATCH 127/901] kramv - debug fragment tangents, and fix bitangent
 shape shannel for vertex tangents

---
 kramv/KramShaders.metal | 71 +++++++++++++++++++++++++----------------
 1 file changed, 44 insertions(+), 27 deletions(-)

diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index 0af5919a..e2d75080 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -256,7 +256,8 @@ half3 toNormal(half3 n)
 // Then transforms the bumpNormal to that space.  No tangent is needed.
 // The downside is this must all be fp32, and all done in fragment shader and use derivatives.
 // Derivatives are known to be caclulated differently depending on hw and different precision.
-half3 transformNormalByBasis(half3 bumpNormal, half3 vertexNormal, float3 worldPos, float2 uv)
+
+float3x3 generateFragmentTangentBasis(half3 vertexNormal, float3 worldPos, float2 uv)
 {
     float3 N = toFloat(vertexNormal);
     
@@ -265,24 +266,24 @@ half3 transformNormalByBasis(half3 bumpNormal, half3 vertexNormal, float3 worldP
     //N.y = -N.y;
     
     // get edge vectors of the pixel triangle
-    float3 dp1 = dfdx(worldPos);
-    float3 dp2 = dfdy(worldPos);
-    float2 duv1 = dfdx(uv);
-    float2 duv2 = dfdy(uv);
+    float3 dpx = dfdx(worldPos);
+    float3 dpy = dfdy(worldPos);
+    float2 duvx = dfdx(uv);
+    float2 duvy = dfdy(uv);
 
     // getting non-zero uv with 0 length duv1/2 on MBP 16", this leaves missing bump artifacts
     // in large triangle error so this is a patch to avoid that.
-    if ((length_squared(duv1) < 1e-10) &&
-        (length_squared(duv2) < 1e-10)) {
-        //return 0.0h; // flag pixels with no bump
-        return vertexNormal;
-    }
+//    if ((length_squared(duvx) < 1e-10) &&
+//        (length_squared(duvy) < 1e-10)) {
+//        //return 0.0h; // flag pixels with no bump
+//        //return vertexNormal;
+//    }
     
     // solve the linear system
-    float3 dp2perp = cross(dp2, N);
-    float3 dp1perp = cross(N, dp1);
-    float3 T = dp2perp * duv1.x + dp1perp * duv2.x;
-    float3 B = dp2perp * duv1.y + dp1perp * duv2.y;
+    float3 dp2perp = cross(dpy, N);
+    float3 dp1perp = cross(N, dpx);
+    float3 T = dp2perp * duvx.x + dp1perp * duvy.x;
+    float3 B = dp2perp * duvx.y + dp1perp * duvy.y;
     float invmax = rsqrt(max(length_squared(T), length_squared(B)));
     
     // keeps relative magnitude of two vectors, they're not both unit vecs
@@ -292,9 +293,17 @@ half3 transformNormalByBasis(half3 bumpNormal, half3 vertexNormal, float3 worldP
     // had to flip this sign to get lighting to match vertex data
     T = -T;
     
+    float3x3 basis = float3x3(T, B, N);
+    return basis;
+}
+
+half3 transformNormalByBasis(half3 bumpNormal, half3 vertexNormal, float3 worldPos, float2 uv)
+{
+    float3x3 basis = generateFragmentTangentBasis(vertexNormal, worldPos, uv);
+    
     // construct a scale-invariant frame
     // drop to half to match other call
-    bumpNormal = toHalf(float3x3(T, B, N) * toFloat(bumpNormal));
+    bumpNormal = toHalf(basis * toFloat(bumpNormal));
     
     return bumpNormal;
 }
@@ -817,27 +826,35 @@ float4 DrawPixels(
     }
     
     if (uniforms.shapeChannel != ShShapeChannelNone) {
-        // TODO: Really hard to interpret direction from color
-        // see about use the vector flow fields
+        // Hard to interpret direction from color, but have eyedropper to decipher render color.
+        // See about using the vector flow fields to see values across render, but needs fsqd pass.
         
         if (uniforms.shapeChannel == ShShapeChannelUV0) {
+            // fract so wrap will show repeating uv in 0,1, and never negative or large values
+            // don't have mirror address modes yet.
             c.rgb = fract(in.texCoordXYZ);
         }
         else if (uniforms.shapeChannel == ShShapeChannelNormal) {
             c.rgb = toUnorm(toFloat(in.normal));
         }
-        else if (uniforms.useTangent && uniforms.shapeChannel == ShShapeChannelTangent) {
-            // TODO: make this work with useTangent = false
-            // may have to call routine again, or pass back basis
-            
-            c.rgb = toUnorm(toFloat(in.tangent.xyz));
+        else if (uniforms.shapeChannel == ShShapeChannelTangent) {
+            if (uniforms.useTangent) {
+                c.rgb = toUnorm(toFloat(in.tangent.xyz));
+            }
+            else {
+                float3x3 basis = generateFragmentTangentBasis(in.normal, in.worldPos, in.texCoord);
+                c.rgb = toUnorm(basis[0]);
+            }
         }
         else if (uniforms.shapeChannel == ShShapeChannelBitangent) {
-            // TODO: make this work with useTangent = false
-            // may have to call routine again, or pass back basis
-            
-            half3 bitangent = cross(in.tangent.xyz, in.normal) * in.tangent.w;
-            c.rgb = toUnorm(toFloat(bitangent));
+            if (uniforms.useTangent) {
+                half3 bitangent = cross(in.normal, in.tangent.xyz) * in.tangent.w;
+                c.rgb = toUnorm(toFloat(bitangent));
+            }
+            else {
+                float3x3 basis = generateFragmentTangentBasis(in.normal, in.worldPos, in.texCoord);
+                c.rgb = toUnorm(basis[1]); // bitan
+            }
         }
         else if (uniforms.shapeChannel == ShShapeChannelDepth) {
             c.rgb = saturate(in.position.z / in.position.w);

From 87a05fc8a04a0cab1a1cf77bc5c5027dee3f305d Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 13 Jun 2021 22:52:34 -0700
Subject: [PATCH 128/901] kramv - fix fragment basis

---
 kramv/KramRenderer.mm   |  17 +++++--
 kramv/KramShaders.metal | 107 ++++++++++++++++++++++++++++++++--------
 kramv/KramViewerMain.mm |   8 +--
 3 files changed, 106 insertions(+), 26 deletions(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index a59d21ae..517aadc8 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -975,19 +975,30 @@ - (void)resetSomeImageSettings:(BOOL)isNewFile {
     
     _showSettings->zoom = _showSettings->zoomFit;
     
-    // test rendering with inversion and mirroring
+    // test rendering with inversion and mirroring and non-uniform scale
     bool doInvertX = false;
+    bool doScaleX = false;
     
     // have one of these for each texture added to the viewer
     float scaleX = MAX(1, _showSettings->imageBoundsX);
     float scaleY = MAX(1, _showSettings->imageBoundsY);
     float scaleZ = MAX(scaleX, scaleY); // don't want 1.0f, or specular is all off due to extreme scale differences
-    _modelMatrix = float4x4(float4m(doInvertX ? -scaleX : scaleX, scaleY, scaleZ, 1.0f)); // non uniform scale
+    
+    float tmpScaleX = scaleX;
+    if (doInvertX) {
+        tmpScaleX = -tmpScaleX;
+    }
+    if (doScaleX) {
+        tmpScaleX *= 2.0f;
+    }
+    
+    _modelMatrix = float4x4(float4m(tmpScaleX, scaleY, scaleZ, 1.0f)); // non uniform scale
     _modelMatrix = _modelMatrix * matrix4x4_translation(0.0f, 0.0f, -1.0); // set z=-1 unit back
     
     // uniform scaled 3d primitiv
     float scale = MAX(scaleX, scaleY);
-    _modelMatrix3D = float4x4(float4m(doInvertX ? -scale : scale, scale, scale, 1.0f)); // uniform scale
+    
+    _modelMatrix3D = float4x4(float4m((doScaleX || doInvertX) ? tmpScaleX : scale, scale, scale, 1.0f)); // uniform scale
     _modelMatrix3D = _modelMatrix3D * matrix4x4_translation(0.0f, 0.0f, -1.0f); // set z=-1 unit back
 }
 
diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index e2d75080..307e2287 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -257,13 +257,14 @@ half3 toNormal(half3 n)
 // The downside is this must all be fp32, and all done in fragment shader and use derivatives.
 // Derivatives are known to be caclulated differently depending on hw and different precision.
 
-float3x3 generateFragmentTangentBasis(half3 vertexNormal, float3 worldPos, float2 uv)
+float3x3 generateFragmentTangentBasis(half3 vertexNormal, float3 worldPos, float2 uv, thread bool& success)
 {
+    // normalizing this didn't help the reconstruction
     float3 N = toFloat(vertexNormal);
     
     // for OpenGL +Y convention, flip N.y
     // but this doesn't match explicit tangents case, see if those are wrong.
-    //N.y = -N.y;
+    // N.y = -N.y;
     
     // get edge vectors of the pixel triangle
     float3 dpx = dfdx(worldPos);
@@ -271,24 +272,64 @@ float3x3 generateFragmentTangentBasis(half3 vertexNormal, float3 worldPos, float
     float2 duvx = dfdx(uv);
     float2 duvy = dfdy(uv);
 
-    // getting non-zero uv with 0 length duv1/2 on MBP 16", this leaves missing bump artifacts
-    // in large triangle error so this is a patch to avoid that.
-//    if ((length_squared(duvx) < 1e-10) &&
-//        (length_squared(duvy) < 1e-10)) {
-//        //return 0.0h; // flag pixels with no bump
-//        //return vertexNormal;
-//    }
+    // May be pixel noise from this when up close and the derivatives exceed float precision
+    // so this to identify one failure case where the uv derivatives are clamped to zero.
     
     // solve the linear system
     float3 dp2perp = cross(dpy, N);
     float3 dp1perp = cross(N, dpx);
     float3 T = dp2perp * duvx.x + dp1perp * duvy.x;
     float3 B = dp2perp * duvx.y + dp1perp * duvy.y;
-    float invmax = rsqrt(max(length_squared(T), length_squared(B)));
+   
+    // The author talks about preserving non-uniform scale of the worldPos, but the problem is that
+    // the duvx/y also can be scaled with respect to one another, and the code doesn't
+    // knock that out.  So with uniform scale and non-uniform uv, invmax also causes non-uniform scale of T/B.
+    // The normalize code below eliminates non-uniform worldPos scale and non-uniform uv scale.
+    // But we have a vertNormal that is also normalized.
+    
+    float Tlen = length_squared(T);
+    float Blen = length_squared(B);
+    
+    if (Tlen < 1e-10 || Blen < 1e-10) {
+        success = false;
+        return float3x3(0.0f);
+    }
+    
+    success = true;
+    
+#if 1
+    // Still see some less smooth gradation across sphere compared with vertex tangents
+    // Maybe N needs to be interpolated as float3 instead of half3 to use this?  Bitan looks
+    // smoother than the tangent.
     
+    // Eliminate scale invariance to match vertex basis which is normalized before interpolation.
+    // This loses that hemisphere is 1x v vertically, and u is 2x rate around the sphere.  Tan = 1/2 B then.
+    // Blocky triangles from this algorithm are because worldPos is linearly interpolated across
+    // the face of the flat poly, where vertex normals are smoothly interpolated across 3 points of triangle.
+
+    
+    // Tangent looks much more blocky than Bitangent across the sphere.  Why is that?
+
+    T *= rsqrt(Tlen);
+    B *= rsqrt(Blen);
+
+#else
+    // math seems off when sphere u is 2x the rate, tangent is calculated as 0.5 length
+    // but the stretch is already accounted for by position vs. uv rate.
+    // Don't want to scale N.x by 0.5, since it's really v that is more squished on model.
+
+    // Seeing tan/bitan that are 0.5 instead of 1.0 in length compared to the vertex tangents.
+    // This changes the lighting intensities since N is unit length.   See explanation above.
+    
+    // Note: min gens larger than 1 directions, but the normals look more correct
+    // like it's the inverse normal transform.  But lighting shifts.
+    
+    float invmax = rsqrt(max(Tlen, Blen));
+   
     // keeps relative magnitude of two vectors, they're not both unit vecs
     T *= invmax;
     B *= invmax;
+#endif
     
     // had to flip this sign to get lighting to match vertex data
     T = -T;
@@ -299,7 +340,12 @@ float3x3 generateFragmentTangentBasis(half3 vertexNormal, float3 worldPos, float
 
 half3 transformNormalByBasis(half3 bumpNormal, half3 vertexNormal, float3 worldPos, float2 uv)
 {
-    float3x3 basis = generateFragmentTangentBasis(vertexNormal, worldPos, uv);
+    bool success = false;
+    float3x3 basis = generateFragmentTangentBasis(vertexNormal, worldPos, uv, success);
+    
+    if (!success) {
+        return vertexNormal;
+    }
     
     // construct a scale-invariant frame
     // drop to half to match other call
@@ -502,7 +548,14 @@ ColorInOut DrawImageFunc(
     
     // deal with full basis
     
-    if (uniforms.isNormalMapPreview) {
+    bool needsBasis =
+        uniforms.isNormalMapPreview ||
+        // these need normal transformed to world space
+        uniforms.shapeChannel == ShaderShapeChannel::ShShapeChannelTangent ||
+        uniforms.shapeChannel == ShaderShapeChannel::ShShapeChannelNormal ||
+        uniforms.shapeChannel == ShaderShapeChannel::ShShapeChannelBitangent;
+
+    if (needsBasis) {
         float3 normal = in.normal;
         float3 tangent = in.tangent.xyz;
         transformBasis(normal, tangent, uniforms.modelMatrix, uniforms.modelMatrixInvScale2.xyz, uniforms.useTangent);
@@ -835,26 +888,40 @@ float4 DrawPixels(
             c.rgb = fract(in.texCoordXYZ);
         }
         else if (uniforms.shapeChannel == ShShapeChannelNormal) {
-            c.rgb = toUnorm(toFloat(in.normal));
+            c.rgb = toFloat(in.normal);
+            
+            c.rgb = toUnorm(c.rgb);
         }
         else if (uniforms.shapeChannel == ShShapeChannelTangent) {
             if (uniforms.useTangent) {
-                c.rgb = toUnorm(toFloat(in.tangent.xyz));
+                c.rgb = toFloat(in.tangent.xyz);
             }
             else {
-                float3x3 basis = generateFragmentTangentBasis(in.normal, in.worldPos, in.texCoord);
-                c.rgb = toUnorm(basis[0]);
+                bool success = false;
+                float3x3 basis = generateFragmentTangentBasis(in.normal, in.worldPos, in.texCoord, success);
+                if (!success)
+                    c.rgb = 0;
+                else
+                    c.rgb = basis[0];
             }
+            
+            c.rgb = toUnorm(c.rgb);
         }
         else if (uniforms.shapeChannel == ShShapeChannelBitangent) {
             if (uniforms.useTangent) {
                 half3 bitangent = cross(in.normal, in.tangent.xyz) * in.tangent.w;
-                c.rgb = toUnorm(toFloat(bitangent));
+                c.rgb = toFloat(bitangent);
             }
             else {
-                float3x3 basis = generateFragmentTangentBasis(in.normal, in.worldPos, in.texCoord);
-                c.rgb = toUnorm(basis[1]); // bitan
+                bool success = false;
+                float3x3 basis = generateFragmentTangentBasis(in.normal, in.worldPos, in.texCoord, success);
+                if (!success)
+                    c.rgb = 0;
+                else
+                    c.rgb = basis[1]; // bitan
             }
+            
+            c.rgb = toUnorm(c.rgb);
         }
         else if (uniforms.shapeChannel == ShShapeChannelDepth) {
             c.rgb = saturate(in.position.z / in.position.w);
@@ -865,7 +932,7 @@ float4 DrawPixels(
             
             // TODO: incorporate facing?
             
-            c.rgb = saturate(toUnorm(faceNormal));
+            c.rgb = toUnorm(faceNormal);
         }
         else if (uniforms.shapeChannel == ShShapeChannelMipLevel) {
             c = toMipLevelColor(in.texCoord * textureSize.xy); // only for 2d textures
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 0b26aa62..e170a41f 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -1071,22 +1071,24 @@ - (void)showEyedropperData:(float2)uv {
         // interpret based on shapeChannel, debugMode, etc
         switch(_showSettings->shapeChannel) {
             case ShapeChannelDepth:
+                isSigned = false; // using fract on uv
+                
                 isValue = true;
                 isFloat = true;
                 numChannels = 1;
                 break;
             case ShapeChannelUV0:
+                isSigned = false; // using fract on uv
+                
                 isValue = true;
-                isSigned = true;
-                numChannels = 2; // TODO: fix for 3d uvw
                 isFloat = true;
+                numChannels = 2; // TODO: fix for 3d uvw
                 break;
                 
             case ShapeChannelFaceNormal:
             case ShapeChannelNormal:
             case ShapeChannelTangent:
             case ShapeChannelBitangent:
-                isSigned = false; // writing to 16f as unorm, so need conversion below
                 isDirection = true;
                 numChannels = 3;
             

From 0a4f25e5d06ee8572522acdf8a7840f10f3fecea Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 14 Jun 2021 08:56:04 -0700
Subject: [PATCH 129/901] kramv - world basis needed for preview

This generates world space basis
---
 kramv/KramShaders.metal | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index 307e2287..075d54f0 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -548,14 +548,15 @@ ColorInOut DrawImageFunc(
     
     // deal with full basis
     
-    bool needsBasis =
-        uniforms.isNormalMapPreview ||
+    bool needsWorldBasis =
+        uniforms.isPreview ||
+        //uniforms.isNormalMapPreview ||
         // these need normal transformed to world space
         uniforms.shapeChannel == ShaderShapeChannel::ShShapeChannelTangent ||
         uniforms.shapeChannel == ShaderShapeChannel::ShShapeChannelNormal ||
         uniforms.shapeChannel == ShaderShapeChannel::ShShapeChannelBitangent;
 
-    if (needsBasis) {
+    if (needsWorldBasis) {
         float3 normal = in.normal;
         float3 tangent = in.tangent.xyz;
         transformBasis(normal, tangent, uniforms.modelMatrix, uniforms.modelMatrixInvScale2.xyz, uniforms.useTangent);

From 6fdcaa034b6bca0c56a7ed5efaa53fe731441570 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 14 Jun 2021 10:08:29 -0700
Subject: [PATCH 130/901] kramv - turn on specular

---
 kramv/KramRenderer.mm   | 15 ++++++++++++++-
 kramv/KramShaders.metal | 32 ++++++++++++++++++++++++--------
 2 files changed, 38 insertions(+), 9 deletions(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 517aadc8..ce2894ee 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -939,6 +939,8 @@ - (void)updateImageSettings:(const string&)fullFilename image:(KTXImage&)image
     _showSettings->imageBoundsY = (int32_t)image.height;
 }
 
+float zoom3D = 1.0f;
+
 - (void)resetSomeImageSettings:(BOOL)isNewFile {
     
     // only reset these on new texture, but have to revalidate
@@ -995,9 +997,17 @@ - (void)resetSomeImageSettings:(BOOL)isNewFile {
     _modelMatrix = float4x4(float4m(tmpScaleX, scaleY, scaleZ, 1.0f)); // non uniform scale
     _modelMatrix = _modelMatrix * matrix4x4_translation(0.0f, 0.0f, -1.0); // set z=-1 unit back
     
-    // uniform scaled 3d primitiv
+    // uniform scaled 3d primitive
     float scale = MAX(scaleX, scaleY);
     
+    // store the zoom into thew view matrix
+    // fragment tangents seem to break down at high model scale due to precision differences between worldPos and uv
+    static bool useZoom3D = false;
+    if (useZoom3D) {
+        zoom3D = scale; // * _showSettings->viewSizeX / 2.0f;
+        scale = 1.0;
+    }
+    
     _modelMatrix3D = float4x4(float4m((doScaleX || doInvertX) ? tmpScaleX : scale, scale, scale, 1.0f)); // uniform scale
     _modelMatrix3D = _modelMatrix3D * matrix4x4_translation(0.0f, 0.0f, -1.0f); // set z=-1 unit back
 }
@@ -1008,6 +1018,9 @@ - (float4x4)computeImageTransform:(float)panX panY:(float)panY zoom:(float)zoom
     
     // non-uniform scale is okay here, only affects ortho volume
     // setting this to uniform zoom and object is not visible, zoom can be 20x in x and y
+    if (_showSettings->is3DView) {
+        zoom *= zoom3D;
+    }
     
     float4x4 viewMatrix = float4x4(float4m(zoom, zoom, 1.0f, 1.0f));
     viewMatrix = panTransform * viewMatrix;
diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index 075d54f0..32469d96 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -276,8 +276,13 @@ float3x3 generateFragmentTangentBasis(half3 vertexNormal, float3 worldPos, float
     // so this to identify one failure case where the uv derivatives are clamped to zero.
     
     // solve the linear system
-    float3 dp2perp = cross(dpy, N);
     float3 dp1perp = cross(N, dpx);
+    float3 dp2perp = cross(dpy, N);
+    
+    // When one of the duvx or duvy is 0 or close to it, then that's when I see
+    // tangent differences to the vertex tangents.  dp2perp is knocked out by this.
+    // These artifacts are still present even moving scale into view matrix.
+    
     float3 T = dp2perp * duvx.x + dp1perp * duvy.x;
     float3 B = dp2perp * duvx.y + dp1perp * duvy.y;
    
@@ -681,6 +686,8 @@ vertex ColorInOut DrawVolumeVS(
 }
 
 float4 doLighting(float4 albedo, float3 viewDir, float3 n, float3 vertexNormal) {
+    if (albedo.a == 0.0)
+        return albedo;
     
     float3 lightDir = normalize(float3(1,1,1)); // looking down -Z axis
     float3 lightColor = float3(1,1,1);
@@ -689,22 +696,24 @@ float4 doLighting(float4 albedo, float3 viewDir, float3 n, float3 vertexNormal)
     float3 diffuse = float3(0.0);
     float3 ambient = float3(0.0);
     
-    bool doSpecular = false; // this is a bit too bright, and can confuse
+    bool doSpecular = true; // can confuse lighting review
     bool doDiffuse = true;
     bool doAmbient = true;
     
+    float dotNL = dot(n, lightDir);
+    
     if (doSpecular) {
         float3 ref = normalize(reflect(viewDir, n));
         
         // above can be interpolated
         float dotRL = saturate(dot(ref, lightDir));
-        dotRL = pow(dotRL, 4.0); // * saturate(dotNL * 8.0);  // no spec without diffuse
-        specular = saturate(dotRL * lightColor.rgb);
+        dotRL = pow(dotRL, 8.0) * saturate(dotNL * 8.0);  // no spec without diffuse
+        specular = dotRL * lightColor.rgb;
     }
 
     if (doDiffuse) {
         
-        float dotNL = saturate(dot(n, lightDir));
+        float dotNLSat = saturate(dotNL);
         
         // soften the terminator off the vertNormal
         // this is so no diffuse if normal completely off from vertex normal
@@ -712,13 +721,13 @@ float4 doLighting(float4 albedo, float3 viewDir, float3 n, float3 vertexNormal)
         float dotVertex = saturate(dot(vertexNormal, n));
         dotNL *= saturate(9.0 * dotVertex);
         
-        diffuse = dotNL * lightColor.rgb;
+        diffuse = dotNLSat * lightColor.rgb;
     }
     
     if (doAmbient) {
         // can misconstrue as diffuse with this, but make dark side not look flat
-        float dotNLUnsat = dot(n, lightDir);
-        ambient = mix(0.1, 0.3, saturate(dotNLUnsat * 0.5 + 0.5));
+        float dotNLUnsat = dotNL;
+        ambient = mix(0.1, 0.2, saturate(dotNLUnsat * 0.5 + 0.5));
     }
     
     // attenuate, and not saturate below, so no HDR yet
@@ -726,7 +735,14 @@ float4 doLighting(float4 albedo, float3 viewDir, float3 n, float3 vertexNormal)
     diffuse *= 0.7;
     //ambient *= 0.2;
     
+#if 0
+    // attenuating albedo with specular knocks it all out
     albedo.xyz *= saturate(ambient + diffuse + specular);
+#else
+    albedo.xyz *= saturate(diffuse + ambient);
+    albedo.xyz += specular;
+    albedo.xyz = saturate(albedo.xyz);
+#endif
     
     return albedo;
 }

From 464d09a521ade367efdf0ed1851c72273fb9baa0 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 14 Jun 2021 11:13:42 -0700
Subject: [PATCH 131/901] kramv - remove tangent tolerance

---
 kramv/KramShaders.metal | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index 32469d96..0840f564 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -295,7 +295,9 @@ float3x3 generateFragmentTangentBasis(half3 vertexNormal, float3 worldPos, float
     float Tlen = length_squared(T);
     float Blen = length_squared(B);
     
-    if (Tlen < 1e-10 || Blen < 1e-10) {
+    // Tried 1e-10 tolerance here, but code hits that when zooming in closely to a shape.  Normal map doesn't look good using vertNormal
+    // so instead only check for the zero case.
+    if (Tlen == 0.0 || Blen == 0.0) {
         success = false;
         return float3x3(0.0f);
     }
@@ -696,7 +698,10 @@ float4 doLighting(float4 albedo, float3 viewDir, float3 n, float3 vertexNormal)
     float3 diffuse = float3(0.0);
     float3 ambient = float3(0.0);
     
-    bool doSpecular = true; // can confuse lighting review
+    // Need lighting control in UI, otherwise specular just adds a big bright
+    // circle to all texture previews since it's additive.
+    
+    bool doSpecular = false; // can confuse lighting review
     bool doDiffuse = true;
     bool doAmbient = true;
     

From 6ea533cc85ccd3d03ba7e27379c576af4fd22255 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 19 Jun 2021 14:41:19 -0700
Subject: [PATCH 132/901] kramv - improve fragment tangents, don't crash on
 resize

---
 kramv/KramRenderer.mm   |  38 ++++++++-----
 kramv/KramShaders.metal | 117 +++++++++++++++-------------------------
 2 files changed, 69 insertions(+), 86 deletions(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index ce2894ee..0823232d 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -1575,6 +1575,10 @@ - (void)drawSample
         return;
     }
     
+    // this can occur during a resize
+    if (!_lastDrawableTexture)
+        return;
+    
     id <MTLCommandBuffer> commandBuffer = [_commandQueue commandBuffer];
     if (!commandBuffer)
         return;
@@ -1593,20 +1597,25 @@ - (void)drawSample
         MTLOrigin srcOrigin = MTLOriginMake(_showSettings->cursorX, _showSettings->cursorY, 0);
         srcOrigin.x *= _showSettings->viewContentScaleFactor;
         srcOrigin.y *= _showSettings->viewContentScaleFactor;
-       
-        // Note: here we don't know the uv in original texture, would have to write that out to another
-        // texture.  Also on shapes, texel may not change but lighting might.
         
-        // can simply blit the color out of the render buffer
-        id <MTLBlitCommandEncoder> blitCommandEncoder = [commandBuffer blitCommandEncoder];
-        if (blitCommandEncoder) {
-            [blitCommandEncoder copyFromTexture:_lastDrawableTexture
-                                    sourceSlice:0 sourceLevel:0 sourceOrigin:srcOrigin sourceSize:MTLSizeMake(1,1,1)
-                                      toTexture:_sampleRenderTex
-                               destinationSlice:0 destinationLevel:0 destinationOrigin:MTLOriginMake(0,0,0)
-            ];
-            [blitCommandEncoder synchronizeResource:_sampleRenderTex];
-            [blitCommandEncoder endEncoding];
+        if ((srcOrigin.x >= 0 && srcOrigin.x < _lastDrawableTexture.width) &&
+            (srcOrigin.y >= 0 && srcOrigin.y < _lastDrawableTexture.height))
+        {
+            
+            // Note: here we don't know the uv in original texture, would have to write that out to another
+            // texture.  Also on shapes, texel may not change but lighting might.
+            
+            // can simply blit the color out of the render buffer
+            id <MTLBlitCommandEncoder> blitCommandEncoder = [commandBuffer blitCommandEncoder];
+            if (blitCommandEncoder) {
+                [blitCommandEncoder copyFromTexture:_lastDrawableTexture
+                                        sourceSlice:0 sourceLevel:0 sourceOrigin:srcOrigin sourceSize:MTLSizeMake(1,1,1)
+                                          toTexture:_sampleRenderTex
+                                   destinationSlice:0 destinationLevel:0 destinationOrigin:MTLOriginMake(0,0,0)
+                ];
+                [blitCommandEncoder synchronizeResource:_sampleRenderTex];
+                [blitCommandEncoder endEncoding];
+            }
         }
     }
     else {
@@ -1731,6 +1740,9 @@ - (void)drawSamples:(id<MTLCommandBuffer>)commandBuffer lookupX:(int32_t)lookupX
 
 - (void)mtkView:(nonnull MTKView *)view drawableSizeWillChange:(CGSize)size
 {
+    // Don't crashing trying to readback from the cached drawable during a resize.
+    _lastDrawableTexture = nil;
+    
     /// Respond to drawable size or orientation changes here
     _showSettings->viewSizeX = size.width;
     _showSettings->viewSizeY = size.height;
diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index 0840f564..7971e633 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -256,99 +256,70 @@ half3 toNormal(half3 n)
 // Then transforms the bumpNormal to that space.  No tangent is needed.
 // The downside is this must all be fp32, and all done in fragment shader and use derivatives.
 // Derivatives are known to be caclulated differently depending on hw and different precision.
+float length_squared(float x) {
+    return x * x;
+}
 
-float3x3 generateFragmentTangentBasis(half3 vertexNormal, float3 worldPos, float2 uv, thread bool& success)
+bool generateFragmentTangentBasis(half3 vertexNormal, float3 worldPos, float2 uv, thread float3x3& basis)
 {
-    // normalizing this didn't help the reconstruction
     float3 N = toFloat(vertexNormal);
     
-    // for OpenGL +Y convention, flip N.y
-    // but this doesn't match explicit tangents case, see if those are wrong.
-    // N.y = -N.y;
+    // normalizing this didn't help the reconstruction
+    //N = normalize(N);
     
     // get edge vectors of the pixel triangle
     float3 dpx = dfdx(worldPos);
     float3 dpy = dfdy(worldPos);
+    
+    // could also pass isFrontFacing, should this almost always be true
+    //float3 faceNormal = cross(dpy, dpx); // because dpy is down on screen
+    //bool isFlipped = dot(faceNormal, N) > 0;
+    
+    // These are much smaller in magnitude than the position derivatives
     float2 duvx = dfdx(uv);
     float2 duvy = dfdy(uv);
 
-    // May be pixel noise from this when up close and the derivatives exceed float precision
-    // so this to identify one failure case where the uv derivatives are clamped to zero.
-    
     // solve the linear system
-    float3 dp1perp = cross(N, dpx);
-    float3 dp2perp = cross(dpy, N);
-    
+    float3 dp1perp = cross(N, dpx); // vertical
+    float3 dp2perp = cross(dpy, N); // horizontal
+      
     // When one of the duvx or duvy is 0 or close to it, then that's when I see
     // tangent differences to the vertex tangents.  dp2perp is knocked out by this.
     // These artifacts are still present even moving scale into view matrix.
     
-    float3 T = dp2perp * duvx.x + dp1perp * duvy.x;
-    float3 B = dp2perp * duvx.y + dp1perp * duvy.y;
-   
-    // The author talks about preserving non-uniform scale of the worldPos, but the problem is that
-    // the duvx/y also can be scaled with respect to one another, and the code doesn't
-    // knock that out.  So with uniform scale and non-uniform uv, invmax also causes non-uniform scale of T/B.
-    // The normalize code below eliminates non-uniform worldPos scale and non-uniform uv scale.
-    // But we have a vertNormal that is also normalized.
     
-    float Tlen = length_squared(T);
+    float3 B = dp2perp * duvx.y + dp1perp * duvy.y;
     float Blen = length_squared(B);
     
-    // Tried 1e-10 tolerance here, but code hits that when zooming in closely to a shape.  Normal map doesn't look good using vertNormal
-    // so instead only check for the zero case.
-    if (Tlen == 0.0 || Blen == 0.0) {
-        success = false;
-        return float3x3(0.0f);
-    }
-    
-    success = true;
-    
-#if 1
-    // Still see some less smooth gradation across sphere compared with vertex tangents
-    // Maybe N needs to be interpolated as float3 instead of half3 to use this?  Bitan looks
-    // smoother than the tangent.
-    
-    // Eliminate scale invariance to match vertex basis which is normalized before interpolation.
-    // This loses that hemisphere is 1x v vertically, and u is 2x rate around the sphere.  Tan = 1/2 B then.
-    // Blocky triangles from this algorithm are because worldPos is linearly interpolated across
-    // the face of the flat poly, where vertex normals are smoothly interpolated across 3 points of triangle.
-
-    
-    // Tangent looks much more blocky than Bitangent across the sphere.  Why is that?
-
-    T *= rsqrt(Tlen);
-    B *= rsqrt(Blen);
-
-#else
-    // math seems off when sphere u is 2x the rate, tangent is calculated as 0.5 length
-    // but the stretch is already accounted for by position vs. uv rate.
-    // Don't want to scale N.x by 0.5, since it's really v that is more squished on model.
-
-    // Seeing tan/bitan that are 0.5 instead of 1.0 in length compared to the vertex tangents.
-    // This changes the lighting intensities since N is unit length.   See explanation above.
-    
-    // Note: min gens larger than 1 directions, but the normals look more correct
-    // like it's the inverse normal transform.  But lighting shifts.
-    
-    float invmax = rsqrt(max(Tlen, Blen));
+    // could use B = dp1perp
+    if (Blen == 0.0)
+        return false;
+    
+   // float x = length_squared(duvx.x) + length_squared(duvy.x); // used for tangent
+   // float y = length_squared(duvx.y) + length_squared(duvy.y); // used for bitangent
+    
+    float3 T;
+    //if (x <= y) {
+        B *= rsqrt(Blen);
+        T = cross(B, N);
+ //   }
+//    else {
+//        T = dp2perp * duvx.x + dp1perp * duvy.x;
+//        float Tlen = length_squared(T);
+//
+//        T *= rsqrt(Tlen);
+//        T = -T;
+//        B = cross(N, T);
+//    }
    
-    // keeps relative magnitude of two vectors, they're not both unit vecs
-    T *= invmax;
-    B *= invmax;
-#endif
-    
-    // had to flip this sign to get lighting to match vertex data
-    T = -T;
-    
-    float3x3 basis = float3x3(T, B, N);
-    return basis;
+    basis = float3x3(T, B, N);
+    return true;
 }
 
 half3 transformNormalByBasis(half3 bumpNormal, half3 vertexNormal, float3 worldPos, float2 uv)
 {
-    bool success = false;
-    float3x3 basis = generateFragmentTangentBasis(vertexNormal, worldPos, uv, success);
+    float3x3 basis;
+    bool success = generateFragmentTangentBasis(vertexNormal, worldPos, uv, basis);
     
     if (!success) {
         return vertexNormal;
@@ -919,8 +890,8 @@ float4 DrawPixels(
                 c.rgb = toFloat(in.tangent.xyz);
             }
             else {
-                bool success = false;
-                float3x3 basis = generateFragmentTangentBasis(in.normal, in.worldPos, in.texCoord, success);
+                float3x3 basis;
+                bool success = generateFragmentTangentBasis(in.normal, in.worldPos, in.texCoord, basis);
                 if (!success)
                     c.rgb = 0;
                 else
@@ -935,8 +906,8 @@ float4 DrawPixels(
                 c.rgb = toFloat(bitangent);
             }
             else {
-                bool success = false;
-                float3x3 basis = generateFragmentTangentBasis(in.normal, in.worldPos, in.texCoord, success);
+                float3x3 basis;
+                bool success = generateFragmentTangentBasis(in.normal, in.worldPos, in.texCoord, basis);
                 if (!success)
                     c.rgb = 0;
                 else

From e755bf8becf6de1dc6a524b5622403eff08e2bfd Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 20 Jun 2021 11:20:15 -0700
Subject: [PATCH 133/901] kramv - fix ortho lighting

No camera orient, so viewDir is always 00-1
---
 kramv/KramShaders.metal | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index 7971e633..2d0b155c 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -6,11 +6,6 @@
 
 using namespace metal;
 
-// TODO: Getting weird triangle artifacts on AMC 5500m on 16" MBP with useTangent = false.
-// Seems that uv derivatives used for basis generation are 0 in gpu capture
-// even though the uv itself are not.  That shouldn't be possible.
-// This results in large triangular artitfacts at the bottom of the sphere/capsule.
-
 //---------------------------------
 // helpers
 
@@ -672,7 +667,7 @@ float4 doLighting(float4 albedo, float3 viewDir, float3 n, float3 vertexNormal)
     // Need lighting control in UI, otherwise specular just adds a big bright
     // circle to all texture previews since it's additive.
     
-    bool doSpecular = false; // can confuse lighting review
+    bool doSpecular = false; // can confuse lighting review, make option to enable or everything has bright white spot
     bool doDiffuse = true;
     bool doAmbient = true;
     
@@ -723,6 +718,15 @@ float4 doLighting(float4 albedo, float3 viewDir, float3 n, float3 vertexNormal)
     return albedo;
 }
 
+float3 calculateViewDir(float3 worldPos, float3 cameraPosition) {
+    // ortho case
+    return float3(0,0,-1);
+    
+    // TODO: need perspective preview
+    //return normalize(worldPos - cameraPosition);
+}
+
+
 // TODO: eliminate the toUnorm() calls below, rendering to rgba16f but then present
 // doesn't have enough info to remap 16F to the display.
 
@@ -777,7 +781,7 @@ float4 DrawPixels(
                                        uniforms.isSwizzleAGToRG, uniforms.isSigned, facing);
             
             
-            float3 viewDir = normalize(in.worldPos - uniforms.cameraPosition);
+            float3 viewDir = calculateViewDir(in.worldPos, uniforms.cameraPosition);
             c = doLighting(float4(1.0), viewDir, toFloat(n), toFloat(in.normal));
 
             c.a = 1;
@@ -788,7 +792,7 @@ float4 DrawPixels(
                 c.xyz = toUnorm(c.xyz);
             }
             else { // TODO: need an isAlbedo test
-                float3 viewDir = normalize(in.worldPos - uniforms.cameraPosition);
+                float3 viewDir = calculateViewDir(in.worldPos, uniforms.cameraPosition);
                 
                 if (uniforms.isNormalMapPreview) {
                     half4 nmapH = toHalf(nmap);

From 9c2608de6461a41df74758c237b47ab52bd62cdd Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 27 Jun 2021 22:45:33 -0700
Subject: [PATCH 134/901] kramv - fix mirrored uv with fragment tangents, more
 specular lighting work

---
 kramv/KramShaders.metal | 164 ++++++++++++++++++++++++++++++----------
 1 file changed, 122 insertions(+), 42 deletions(-)

diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index 2d0b155c..067f3599 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -9,6 +9,8 @@ using namespace metal;
 //---------------------------------
 // helpers
 
+//constant float PI = 3.1415927;
+
 float toUnorm8(float c)
 {
     return (127.0 / 255.0) * c + (128.0 / 255.0);
@@ -255,6 +257,11 @@ float length_squared(float x) {
     return x * x;
 }
 
+// how is this not a built-in?
+float cross(float2 lhs, float2 rhs) {
+    return lhs.x * rhs.y - rhs.x * lhs.y;
+}
+
 bool generateFragmentTangentBasis(half3 vertexNormal, float3 worldPos, float2 uv, thread float3x3& basis)
 {
     float3 N = toFloat(vertexNormal);
@@ -262,11 +269,24 @@ bool generateFragmentTangentBasis(half3 vertexNormal, float3 worldPos, float2 uv
     // normalizing this didn't help the reconstruction
     //N = normalize(N);
     
+    // Original code pases viewDir, but that is constant for ortho view and would only work for perspective.
+    // Comment was that cameraPos drops out since it's constant, but perspective viewDir is also typically normalized too.
+    // Here using worldPos but it has much larger magnitude than uv then.
+    
     // get edge vectors of the pixel triangle
     float3 dpx = dfdx(worldPos);
     float3 dpy = dfdy(worldPos);
     
+    //N = normalize(cross(dpy, dpx));
+    
+    //dpx.y = -dpx.y;
+    //dpy.y = -dpy.y;
+    
     // could also pass isFrontFacing, should this almost always be true
+    
+    // The math problem here seems related to that we're using the planar dpx/dpy.
+    // but the normal is interpolated on the sphere, and plane is likely closer to dNx/dNy.
+    
     //float3 faceNormal = cross(dpy, dpx); // because dpy is down on screen
     //bool isFlipped = dot(faceNormal, N) > 0;
     
@@ -274,39 +294,65 @@ bool generateFragmentTangentBasis(half3 vertexNormal, float3 worldPos, float2 uv
     float2 duvx = dfdx(uv);
     float2 duvy = dfdy(uv);
 
+    // flip T based on uv direction to handle mirrored UV
+    float uvPlaneSign = sign(cross(duvy, duvx));
+    
+#if 1
+    
+    // can't really tell this from using N
+    float3 useN;
+    
+    //float3 faceNormal = cross(dpy, dpx);
+    //useN = faceNormal;
+    
+    useN = N;
+    
     // solve the linear system
-    float3 dp1perp = cross(N, dpx); // vertical
-    float3 dp2perp = cross(dpy, N); // horizontal
-      
-    // When one of the duvx or duvy is 0 or close to it, then that's when I see
-    // tangent differences to the vertex tangents.  dp2perp is knocked out by this.
-    // These artifacts are still present even moving scale into view matrix.
+    float3 dp1perp = cross(useN, dpx); // vertical
+    float3 dp2perp = cross(dpy, useN); // horizontal
+#else
+    float3 dp1perp = -dpy;
+    float3 dp2perp = dpx;
+#endif
     
+    // could use B = dp1perp
+    //if (Blen == 0.0)
+    //    return false;
     
-    float3 B = dp2perp * duvx.y + dp1perp * duvy.y;
+    float3 T, B;
+
+#if 0
+    B = normalize(dp1perp);
+    T = -normalize(dp2perp);
+#elif 1
+    B = dp2perp * duvx.y + dp1perp * duvy.y;
     float Blen = length_squared(B);
+
+    // vertical ridges with T.y flipping sign
+    B *= rsqrt(Blen);
+    T = cross(B, N);
+    
+    // This switches to lhcs on left side of mirrored sphere
+    // May just be that ModelIO has generated bad basis on that left side.
+    T *= -uvPlaneSign;
+    
+#elif 0
+    // This calc just doesn't look as good
+    
+    // trapezoidal pattern wih T.y flipping sign
+    T = dp2perp * duvx.x + dp1perp * duvy.x;
+    float Tlen = length_squared(T);
+
+    T *= rsqrt(Tlen);
+    
+    //T = -T;
+    
+    // Fixes tangent on mirrored sphere but Bitangent is wrong, does this mean uv wrap switches to lhcs instead of rhcs?
+    T *= uvPlaneSign;
+    
+    B = cross(N, T);
+#endif
     
-    // could use B = dp1perp
-    if (Blen == 0.0)
-        return false;
-    
-   // float x = length_squared(duvx.x) + length_squared(duvy.x); // used for tangent
-   // float y = length_squared(duvx.y) + length_squared(duvy.y); // used for bitangent
-    
-    float3 T;
-    //if (x <= y) {
-        B *= rsqrt(Blen);
-        T = cross(B, N);
- //   }
-//    else {
-//        T = dp2perp * duvx.x + dp1perp * duvy.x;
-//        float Tlen = length_squared(T);
-//
-//        T *= rsqrt(Tlen);
-//        T = -T;
-//        B = cross(N, T);
-//    }
-   
     basis = float3x3(T, B, N);
     return true;
 }
@@ -653,7 +699,7 @@ vertex ColorInOut DrawVolumeVS(
     return out;
 }
 
-float4 doLighting(float4 albedo, float3 viewDir, float3 n, float3 vertexNormal) {
+float4 doLighting(float4 albedo, float3 viewDir, float3 bumpNormal, float3 vertexNormal) {
     if (albedo.a == 0.0)
         return albedo;
     
@@ -666,20 +712,55 @@ float4 doLighting(float4 albedo, float3 viewDir, float3 n, float3 vertexNormal)
     
     // Need lighting control in UI, otherwise specular just adds a big bright
     // circle to all texture previews since it's additive.
-    
+    bool doBlinnPhongSpecular = false;
     bool doSpecular = false; // can confuse lighting review, make option to enable or everything has bright white spot
     bool doDiffuse = true;
     bool doAmbient = true;
     
-    float dotNL = dot(n, lightDir);
+    // see here about energy normalization, not going to GGX just yet
+    // http://www.thetenthplanet.de/archives/255
+    float dotVertexNL = dot(vertexNormal, lightDir);
+    
+    float dotNL = dot(bumpNormal, lightDir);
     
     if (doSpecular) {
-        float3 ref = normalize(reflect(viewDir, n));
-        
-        // above can be interpolated
-        float dotRL = saturate(dot(ref, lightDir));
-        dotRL = pow(dotRL, 8.0) * saturate(dotNL * 8.0);  // no spec without diffuse
-        specular = dotRL * lightColor.rgb;
+        if (dotVertexNL > 0.0) {
+            float specularAmount;
+            
+            // in lieu of a roughness map, do this
+            // fake energy conservation by multiply with gloss
+            // https://www.youtube.com/watch?v=E4PHFnvMzFc&t=946s
+            float gloss = 0.6;
+            float specularExp = exp2(gloss * 11.0) + 2.0;
+            float energyNormalization = gloss;
+            
+            if (doBlinnPhongSpecular) {
+                // this doesn't look so good as a highlight in ortho at least
+                float3 E = -viewDir;
+                float3 H = normalize(lightDir + E);
+                float dotHN = saturate(dot(H, bumpNormal));
+                specularAmount = dotHN;
+                
+                // to make dotHN look like dotRL
+                // https://en.wikipedia.org/wiki/Blinn%E2%80%93Phong_reflection_model
+                specularExp *= 4.0;
+                
+                //energyNormalization = (specularExp + 1.0) / (2.0 * PI);
+            }
+            else {
+                // phong
+                // and seem to recall a conversion to above but H = (L+V)/2, the normalize knocks out the 1/2
+                float3 ref = normalize(reflect(viewDir, bumpNormal));
+                float dotRL = saturate(dot(ref, lightDir));
+                specularAmount = dotRL;
+                
+                //energyNormalization = (specularExp + 1.0) / (2.0 * PI);
+            }
+            
+            // above can be interpolated
+            specularAmount = pow(specularAmount, specularExp) * energyNormalization;
+            specular = specularAmount * lightColor.rgb;
+        }
     }
 
     if (doDiffuse) {
@@ -689,7 +770,7 @@ float4 doLighting(float4 albedo, float3 viewDir, float3 n, float3 vertexNormal)
         // soften the terminator off the vertNormal
         // this is so no diffuse if normal completely off from vertex normal
         // also limiting diffuse lighting bump to lighting by vertex normal
-        float dotVertex = saturate(dot(vertexNormal, n));
+        float dotVertex = saturate(dot(vertexNormal, bumpNormal));
         dotNL *= saturate(9.0 * dotVertex);
         
         diffuse = dotNLSat * lightColor.rgb;
@@ -726,9 +807,8 @@ float3 calculateViewDir(float3 worldPos, float3 cameraPosition) {
     //return normalize(worldPos - cameraPosition);
 }
 
-
-// TODO: eliminate the toUnorm() calls below, rendering to rgba16f but then present
-// doesn't have enough info to remap 16F to the display.
+// This is writing out to 16F and could write snorm data, but then that couldn't be displayed.
+// So code first converts to Unorm.
 
 float4 DrawPixels(
     ColorInOut in [[stage_in]],
@@ -1242,12 +1322,12 @@ fragment float4 DrawVolumePS(
 
 //--------------------------------------------------
 
+
 /* not using this yet, need a fsq and some frag coord to sample the normal map at discrete points
  
 // https://www.shadertoy.com/view/4s23DG
 // 2D vector field visualization by Morgan McGuire, @morgan3d, http://casual-effects.com
 
-constant float PI = 3.1415927;
 
 constant int   ARROW_V_STYLE = 1;
 constant int   ARROW_LINE_STYLE = 2;

From 98e7c85cd1d84eae67d72370c60b7373566979c3 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 3 Jul 2021 15:20:02 -0700
Subject: [PATCH 135/901] kramv - add lighting controls, add IQ's reflect call

Lighting is off key "5".   Either ambient + diffuse, or + specular.  The specular tends to leave bright highlights on flat shapes so want to be able to turn it off.
---
 LICENSE                  |  2 +-
 kramv/KramRenderer.mm    |  1 +
 kramv/KramShaders.h      |  8 ++++++
 kramv/KramShaders.metal  | 55 ++++++++++++++++++++++++++++++++--------
 kramv/KramViewerBase.cpp | 25 ++++++++++++++++++
 kramv/KramViewerBase.h   | 11 ++++++++
 kramv/KramViewerMain.mm  | 16 ++++++++++--
 7 files changed, 105 insertions(+), 13 deletions(-)

diff --git a/LICENSE b/LICENSE
index e7212644..a48f6407 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2020 Alec Miller
+Copyright (c) 2020-2021 Alec Miller
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 0823232d..1c0e90db 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -1077,6 +1077,7 @@ - (void)_updateGameState
     
     uniforms.isSDF = _showSettings->isSDF;
     uniforms.numChannels = _showSettings->numChannels;
+    uniforms.lightingMode = (ShaderLightingMode)_showSettings->lightingMode;
     
     MyMTLTextureType textureType = MyMTLTextureType2D;
     MyMTLPixelFormat textureFormat = MyMTLPixelFormatInvalid;
diff --git a/kramv/KramShaders.h b/kramv/KramShaders.h
index f11491a4..b686db05 100644
--- a/kramv/KramShaders.h
+++ b/kramv/KramShaders.h
@@ -112,6 +112,11 @@ typedef NS_ENUM(int32_t, ShaderShapeChannel)
     // ShShapeChannelBumpNormal,
 };
 
+typedef NS_ENUM(int32_t, ShaderLightingMode)
+{
+    ShLightingModeDiffuse = 0,
+    ShLightingModeSpecular,
+};
 
 // TODO: placement of these elements in the struct breaks transfer
 // of data. This seems to work.  Alignment issues with mixing these differently.
@@ -158,6 +163,9 @@ struct Uniforms
     
     // View the r,g,b,a channels of the texture
     ShaderTextureChannels channels; // mask
+    
+    // Can turn on/off specular
+    ShaderLightingMode lightingMode;
 };
 
 // uploaded separately, so multiple mips, faces, array can be drawn to the screen at one time
diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index 067f3599..3ab86961 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -699,7 +699,31 @@ vertex ColorInOut DrawVolumeVS(
     return out;
 }
 
-float4 doLighting(float4 albedo, float3 viewDir, float3 bumpNormal, float3 vertexNormal) {
+
+float3 reflectIQ(float3 v, float3 n)
+{
+#if 0
+    // traditional refect
+    // v - 2 * n * dot(v n)
+    float3 r = reflect(v, n);
+    
+    return r;
+#else
+    // Not sure why IQ uses the r notation
+    float3 r = n;
+    
+    // https://iquilezles.org/www/articles/dontflip/dontflip.htm
+    // works for any dimension
+    // also article has a clamp forumulation
+    
+    float k = dot(v, r);
+    
+    // reflect v if it's in the negative half plane defined by r
+    return (k > 0.0) ? v : (v - 2.0 * r * k);
+#endif
+}
+
+float4 doLighting(float4 albedo, float3 viewDir, float3 bumpNormal, float3 vertexNormal, ShaderLightingMode lightingMode) {
     if (albedo.a == 0.0)
         return albedo;
     
@@ -713,24 +737,35 @@ float4 doLighting(float4 albedo, float3 viewDir, float3 bumpNormal, float3 verte
     // Need lighting control in UI, otherwise specular just adds a big bright
     // circle to all texture previews since it's additive.
     bool doBlinnPhongSpecular = false;
-    bool doSpecular = false; // can confuse lighting review, make option to enable or everything has bright white spot
+    
+    bool doSpecular = true; // can confuse lighting review, make option to enable or everything has bright white spot
     bool doDiffuse = true;
     bool doAmbient = true;
     
+    if (lightingMode == ShLightingModeDiffuse)
+    {
+        doSpecular = false;
+    }
+    
     // see here about energy normalization, not going to GGX just yet
     // http://www.thetenthplanet.de/archives/255
-    float dotVertexNL = dot(vertexNormal, lightDir);
+    
+    // Note: this isn't the same as the faceNormal, the vertexNormal is interpolated
+    // see iq's trick for flipping lighting in reflectIQ.
+    
+    // Use reflectIQ to flip specular, 
+    //float dotVertexNL = dot(vertexNormal, lightDir);
     
     float dotNL = dot(bumpNormal, lightDir);
     
     if (doSpecular) {
-        if (dotVertexNL > 0.0) {
+        //if (dotVertexNL > 0.0) {
             float specularAmount;
             
             // in lieu of a roughness map, do this
             // fake energy conservation by multiply with gloss
             // https://www.youtube.com/watch?v=E4PHFnvMzFc&t=946s
-            float gloss = 0.6;
+            float gloss = 0.3;
             float specularExp = exp2(gloss * 11.0) + 2.0;
             float energyNormalization = gloss;
             
@@ -750,7 +785,7 @@ float4 doLighting(float4 albedo, float3 viewDir, float3 bumpNormal, float3 verte
             else {
                 // phong
                 // and seem to recall a conversion to above but H = (L+V)/2, the normalize knocks out the 1/2
-                float3 ref = normalize(reflect(viewDir, bumpNormal));
+                float3 ref = normalize(reflectIQ(viewDir, bumpNormal));
                 float dotRL = saturate(dot(ref, lightDir));
                 specularAmount = dotRL;
                 
@@ -760,7 +795,7 @@ float4 doLighting(float4 albedo, float3 viewDir, float3 bumpNormal, float3 verte
             // above can be interpolated
             specularAmount = pow(specularAmount, specularExp) * energyNormalization;
             specular = specularAmount * lightColor.rgb;
-        }
+       // }
     }
 
     if (doDiffuse) {
@@ -862,7 +897,7 @@ float4 DrawPixels(
             
             
             float3 viewDir = calculateViewDir(in.worldPos, uniforms.cameraPosition);
-            c = doLighting(float4(1.0), viewDir, toFloat(n), toFloat(in.normal));
+            c = doLighting(float4(1.0), viewDir, toFloat(n), toFloat(in.normal), uniforms.lightingMode);
 
             c.a = 1;
         }
@@ -881,10 +916,10 @@ float4 DrawPixels(
                                                in.worldPos, in.texCoord, uniforms.useTangent, // to build TBN
                                                uniforms.isNormalMapSwizzleAGToRG, uniforms.isNormalMapSigned, facing);
                     
-                    c = doLighting(c, viewDir, toFloat(n), toFloat(in.normal));
+                    c = doLighting(c, viewDir, toFloat(n), toFloat(in.normal), uniforms.lightingMode);
                 }
                 else {
-                    c = doLighting(c, viewDir, toFloat(in.normal), toFloat(in.normal));
+                    c = doLighting(c, viewDir, toFloat(in.normal), toFloat(in.normal), uniforms.lightingMode);
                 }
             }
             
diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index d53fac5f..b4316476 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -62,6 +62,17 @@ const char* ShowSettings::debugModeText() const {
     return text;
 }
 
+const char* ShowSettings::lightingModeText() const {
+    const char* text = "";
+    
+    switch(lightingMode) {
+        case LightingModeDiffuse: text = "Light Diffuse"; break;
+        case LightingModeSpecular: text = "Light Specular"; break;
+        default: break;
+    }
+    return text;
+}
+
 bool ShowSettings::isEyedropperFromDrawable() {
     return meshNumber > 0 || isPreview || isShowingAllLevelsAndMips || shapeChannel > 0;
 }
@@ -98,6 +109,20 @@ void ShowSettings::advanceShapeChannel(bool decrement) {
     }
 }
     
+void ShowSettings::advanceLightingMode(bool decrement) {
+    int32_t numEnums = LightingModeCount;
+    int32_t number = lightingMode;
+    if (decrement) {
+        number += numEnums - 1;
+    }
+    else {
+        number += 1;
+    }
+    
+    lightingMode = (LightingMode)(number % numEnums);
+}
+
+
 void ShowSettings::advanceDebugMode(bool decrement) {
     int32_t numEnums = DebugModeCount;
     int32_t mode = debugMode;
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index f219534d..fc17a450 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -70,6 +70,13 @@ enum ShapeChannel
     ShapeChannelCount
 };
 
+enum LightingMode
+{
+    LightingModeDiffuse = 0, // amb + diffuse
+    LightingModeSpecular = 1,  // amb + diffuse + specular
+    
+    LightingModeCount,
+};
 
 class ShowSettings {
 public:
@@ -194,6 +201,8 @@ class ShowSettings {
     
     ShapeChannel shapeChannel = ShapeChannelNone;
     
+    LightingMode lightingMode = LightingModeDiffuse;
+    
     float4x4 projectionViewModelMatrix;
     bool isInverted;
     
@@ -208,10 +217,12 @@ class ShowSettings {
     void advanceMeshNumber(bool decrement);
     void advanceDebugMode(bool decrement);
     void advanceShapeChannel(bool decrement);
+    void advanceLightingMode(bool decrement);
 
     const char* meshNumberText() const;
     const char* shapeChannelText() const;
     const char* debugModeText() const;
+    const char* lightingModeText() const;
 
     string lastFilename;
     double lastTimestamp = 0.0;
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index e170a41f..bc558e80 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -498,7 +498,7 @@ - (nonnull ShowSettings*)showSettings {
 }
 
 - (NSStackView*)_addButtons {
-    const int32_t numButtons = 29; // 13;
+    const int32_t numButtons = 30;
     const char* names[numButtons*2] = {
         
         "?", "Help",
@@ -531,6 +531,7 @@ - (NSStackView*)_addButtons {
         
         "8", "Shape",
         "6", "Shape Channel",
+        "5", "Lighting",
         "T", "Tangents",
         
         // TODO: need to shift hud over a little
@@ -1504,8 +1505,9 @@ - (void)updateUIControlState
     
     auto meshState = toState(_showSettings->meshNumber > 0);
     auto meshChannelState = toState(_showSettings->shapeChannel > 0);
+    auto lightingState = toState(_showSettings->lightingMode != LightingModeDiffuse);
     auto tangentState = toState(_showSettings->useTangent);
-   
+    
     // TODO: UI state, and vertical state
     auto uiState = toState(_buttonStack.hidden);
     
@@ -1534,6 +1536,7 @@ - (void)updateUIControlState
     [self findButton:"O"].state = previewState;
     [self findButton:"8"].state = meshState;
     [self findButton:"6"].state = meshChannelState;
+    [self findButton:"5"].state = lightingState;
     [self findButton:"W"].state = wrapState;
     [self findButton:"D"].state = gridState;
     [self findButton:"E"].state = debugState;
@@ -1566,6 +1569,7 @@ - (void)updateUIControlState
     [self findMenuItem:"O"].state = previewState;
     [self findMenuItem:"8"].state = meshState;
     [self findMenuItem:"6"].state = meshChannelState;
+    [self findMenuItem:"5"].state = lightingState;
     [self findMenuItem:"T"].state = tangentState;
    
     [self findMenuItem:"W"].state = wrapState;
@@ -1651,6 +1655,8 @@ - (IBAction)handleAction:(id)sender {
         keyCode = Key::Num8;
     else if (title == "6")
         keyCode = Key::Num6;
+    else if (title == "5")
+        keyCode = Key::Num5;
     else if (title == "T")
         keyCode = Key::T;
    
@@ -1793,6 +1799,12 @@ - (bool)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
             isChanged = true;
             break;
         }
+        case Key::Num5: {
+            _showSettings->advanceLightingMode(isShiftKeyDown);
+            text = _showSettings->lightingModeText();
+            isChanged = true;
+            break;
+        }
         case Key::T: {
             _showSettings->useTangent = !_showSettings->useTangent;
             if (_showSettings->useTangent)

From 49d81cad753116368940c0f660f9c7987b8d0e3f Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 3 Jul 2021 22:42:56 -0700
Subject: [PATCH 136/901] kramv - add shader hotloading

Run buildShaders.sh to rebuild the shaders into a .air file, then to a .metalllib.  These are written to the bin folder, but can be placed anywhere.  Drop this metallib onto the app, and then after running buildShaders.sh, can just select from the recently loaded menu item to hotload.  All shaders and pipelines are rebuilt.  The app starts off using the default.metallib that Xcode bundles into the app.  Can iterate more rapidly on look and feel in kramv this way.
---
 kramv/Info.plist        |  16 ++++++
 kramv/KramRenderer.h    |   2 +
 kramv/KramRenderer.mm   | 120 ++++++++++++++++++++++++++++------------
 kramv/KramViewerMain.mm |  15 +++++
 scripts/buildShaders.sh |   4 ++
 5 files changed, 123 insertions(+), 34 deletions(-)
 create mode 100755 scripts/buildShaders.sh

diff --git a/kramv/Info.plist b/kramv/Info.plist
index c3a1e263..63515502 100644
--- a/kramv/Info.plist
+++ b/kramv/Info.plist
@@ -70,6 +70,22 @@
 			<key>NSDocumentClass</key>
 			<string>KramDocument</string>
 		</dict>
+		<dict>
+			<key>CFBundleTypeIconSystemGenerated</key>
+			<integer>1</integer>
+			<key>CFBundleTypeName</key>
+			<string>METALLIB</string>
+			<key>CFBundleTypeRole</key>
+			<string>Viewer</string>
+			<key>LSHandlerRank</key>
+			<string>Default</string>
+			<key>LSItemContentTypes</key>
+			<array>
+				<string>application/octet-stream</string>
+			</array>
+			<key>NSDocumentClass</key>
+			<string>KramDocument</string>
+		</dict>
 	</array>
 	<key>CFBundleExecutable</key>
 	<string>$(EXECUTABLE_NAME)</string>
diff --git a/kramv/KramRenderer.h b/kramv/KramRenderer.h
index 31490ae2..7840b6d6 100644
--- a/kramv/KramRenderer.h
+++ b/kramv/KramRenderer.h
@@ -38,6 +38,8 @@ namespace kram {
 
 - (simd::float4x4)computeImageTransform:(float)panX panY:(float)panY zoom:(float)zoom;
 
+- (BOOL)hotloadShaders:(nonnull const char*)filename;
+
 @end
 
 
diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 1c0e90db..099cd710 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -27,6 +27,13 @@
 using namespace kram;
 using namespace simd;
 
+// Capture what we need to build the renderPieplines, without needing view
+struct ViewFramebufferData {
+    MTLPixelFormat colorPixelFormat = MTLPixelFormatInvalid;
+    MTLPixelFormat depthStencilPixelFormat = MTLPixelFormatInvalid;
+    uint32_t sampleCount = 0;
+};
+
 @implementation Renderer
 {
     dispatch_semaphore_t _inFlightSemaphore;
@@ -102,6 +109,11 @@ @implementation Renderer
     MTKMesh *_meshCapsule;
     MTKMeshBufferAllocator *_metalAllocator;
     
+    id<MTLLibrary> _shaderLibrary;
+    NSURL* _metallibFileURL;
+    NSDate* _metallibFileDate;
+    ViewFramebufferData _viewFramebuffer;
+    
     ShowSettings* _showSettings;
 }
 
@@ -223,24 +235,29 @@ - (void)_createVertexDescriptor
     _mdlVertexDescriptor.attributes[VertexAttributeTexcoord].name  = MDLVertexAttributeTextureCoordinate;
     _mdlVertexDescriptor.attributes[VertexAttributeNormal].name    = MDLVertexAttributeNormal;
     _mdlVertexDescriptor.attributes[VertexAttributeTangent].name   = MDLVertexAttributeTangent;
-
 }
 
+
+
 - (void)_loadMetalWithView:(nonnull MTKView *)view
 {
     /// Load Metal state objects and initialize renderer dependent view properties
 
-    view.depthStencilPixelFormat = MTLPixelFormatDepth32Float_Stencil8;
-    //view.colorPixelFormat = MTLPixelFormatBGRA8Unorm_sRGB; // TODO: adjust this to draw srgb or not, prefer RGBA
-    
-    // have a mix of linear color and normals, don't want srgb conversion until displayed
     view.colorPixelFormat = MTLPixelFormatRGBA16Float;
-    
+    view.depthStencilPixelFormat = MTLPixelFormatDepth32Float_Stencil8;
     view.sampleCount = 1;
 
+    _viewFramebuffer.colorPixelFormat = view.colorPixelFormat;
+    _viewFramebuffer.depthStencilPixelFormat = view.depthStencilPixelFormat;
+    _viewFramebuffer.sampleCount = view.sampleCount;
+    
     [self _createVertexDescriptor];
     
-    [self _createRenderPipelines:view];
+    // first time use the default library, if reload is called then use different library
+    _shaderLibrary = [_device newDefaultLibrary];
+
+    
+    [self _createRenderPipelines];
     
     //-----------------------
    
@@ -272,51 +289,87 @@ - (void)_loadMetalWithView:(nonnull MTKView *)view
     [self _createSampleRender];
 }
 
-- (void)_createComputePipelines
+- (BOOL)hotloadShaders:(const char*)filename
 {
-    id<MTLLibrary> defaultLibrary = [_device newDefaultLibrary];
+    NSURL* _metallibFileURL = [NSURL fileURLWithPath:[NSString stringWithUTF8String:filename]];
+
+    NSError* err = nil;
+    NSDate *fileDate = nil;
+    [_metallibFileURL getResourceValue:&fileDate forKey:NSURLContentModificationDateKey error:&err];
+
+    // only reload if the metallib changed timestamp, otherwise default.metallib has most recent copy
+    if (err != nil || [_metallibFileDate isEqualToDate:fileDate]) {
+        return NO;
+    }
+    _metallibFileDate = fileDate;
+    
+    // Now dynamically load the metallib
+    NSData* dataNS = [NSData dataWithContentsOfURL:_metallibFileURL options:NSDataReadingMappedIfSafe
+ error:&err];
+    if (dataNS == nil) {
+        return NO;
+    }
+    dispatch_data_t data = dispatch_data_create(dataNS.bytes, dataNS.length, dispatch_get_main_queue(), DISPATCH_DATA_DESTRUCTOR_DEFAULT);
+    
+    id<MTLLibrary> shaderLibrary = [_device newLibraryWithData:data error:&err];
+    if (err != nil) {
+        return NO;
+    }
+    _shaderLibrary = shaderLibrary;
+    
+    // rebuild the shaders and pipelines that use the shader
+    [self _createRenderPipelines];
 
+    [self _createComputePipelines];
+   
+    [self _createSampleRender];
+    
+    return YES;
+}
+
+- (void)_createComputePipelines
+{
     NSError *error = NULL;
     id<MTLFunction> computeFunction;
     
     //-----------------------
    
-    computeFunction = [defaultLibrary newFunctionWithName:@"SampleImageCS"];
+    computeFunction = [_shaderLibrary newFunctionWithName:@"SampleImageCS"];
     _pipelineStateImageCS = [_device newComputePipelineStateWithFunction:computeFunction error:&error];
     if (!_pipelineStateImageCS)
     {
         NSLog(@"Failed to create pipeline state, error %@", error);
     }
     
-    computeFunction = [defaultLibrary newFunctionWithName:@"SampleImageArrayCS"];
+    computeFunction = [_shaderLibrary newFunctionWithName:@"SampleImageArrayCS"];
     _pipelineStateImageArrayCS = [_device newComputePipelineStateWithFunction:computeFunction error:&error];
     if (!_pipelineStateImageArrayCS)
     {
         NSLog(@"Failed to create pipeline state, error %@", error);
     }
     
-    computeFunction = [defaultLibrary newFunctionWithName:@"SampleVolumeCS"];
+    computeFunction = [_shaderLibrary newFunctionWithName:@"SampleVolumeCS"];
     _pipelineStateVolumeCS = [_device newComputePipelineStateWithFunction:computeFunction error:&error];
     if (!_pipelineStateVolumeCS)
     {
         NSLog(@"Failed to create pipeline state, error %@", error);
     }
     
-    computeFunction = [defaultLibrary newFunctionWithName:@"SampleCubeCS"];
+    computeFunction = [_shaderLibrary newFunctionWithName:@"SampleCubeCS"];
     _pipelineStateCubeCS = [_device newComputePipelineStateWithFunction:computeFunction error:&error];
     if (!_pipelineStateCubeCS)
     {
         NSLog(@"Failed to create pipeline state, error %@", error);
     }
     
-    computeFunction = [defaultLibrary newFunctionWithName:@"SampleCubeArrayCS"];
+    computeFunction = [_shaderLibrary newFunctionWithName:@"SampleCubeArrayCS"];
     _pipelineStateCubeArrayCS = [_device newComputePipelineStateWithFunction:computeFunction error:&error];
     if (!_pipelineStateCubeArrayCS)
     {
         NSLog(@"Failed to create pipeline state, error %@", error);
     }
     
-    computeFunction = [defaultLibrary newFunctionWithName:@"SampleImage1DArrayCS"];
+    computeFunction = [_shaderLibrary newFunctionWithName:@"SampleImage1DArrayCS"];
     _pipelineState1DArrayCS = [_device newComputePipelineStateWithFunction:computeFunction error:&error];
     if (!_pipelineState1DArrayCS)
     {
@@ -324,30 +377,28 @@ - (void)_createComputePipelines
     }
 }
 
-- (void)_createRenderPipelines:(MTKView*)view
+- (void)_createRenderPipelines
 {
-    id<MTLLibrary> defaultLibrary = [_device newDefaultLibrary];
-
     id <MTLFunction> vertexFunction;
     id <MTLFunction> fragmentFunction;
     
     MTLRenderPipelineDescriptor *pipelineStateDescriptor = [[MTLRenderPipelineDescriptor alloc] init];
     pipelineStateDescriptor.label = @"DrawImagePipeline";
-    pipelineStateDescriptor.sampleCount = view.sampleCount;
+    pipelineStateDescriptor.sampleCount = _viewFramebuffer.sampleCount;
     pipelineStateDescriptor.vertexDescriptor = _mtlVertexDescriptor;
-    pipelineStateDescriptor.colorAttachments[0].pixelFormat = view.colorPixelFormat;
+    pipelineStateDescriptor.colorAttachments[0].pixelFormat = _viewFramebuffer.colorPixelFormat;
     
     // TODO: could drop these for images, but want a 3D preview of content
     // or might make these memoryless.
-    pipelineStateDescriptor.depthAttachmentPixelFormat = view.depthStencilPixelFormat;
-    pipelineStateDescriptor.stencilAttachmentPixelFormat = view.depthStencilPixelFormat;
+    pipelineStateDescriptor.depthAttachmentPixelFormat = _viewFramebuffer.depthStencilPixelFormat;
+    pipelineStateDescriptor.stencilAttachmentPixelFormat = _viewFramebuffer.depthStencilPixelFormat;
 
     NSError *error = NULL;
     
     //-----------------------
    
-    vertexFunction = [defaultLibrary newFunctionWithName:@"DrawImageVS"];
-    fragmentFunction = [defaultLibrary newFunctionWithName:@"DrawImagePS"];
+    vertexFunction = [_shaderLibrary newFunctionWithName:@"DrawImageVS"];
+    fragmentFunction = [_shaderLibrary newFunctionWithName:@"DrawImagePS"];
     pipelineStateDescriptor.vertexFunction = vertexFunction;
     pipelineStateDescriptor.fragmentFunction = fragmentFunction;
     
@@ -359,8 +410,8 @@ - (void)_createRenderPipelines:(MTKView*)view
 
     //-----------------------
    
-    vertexFunction = [defaultLibrary newFunctionWithName:@"DrawImageVS"]; // reused
-    fragmentFunction = [defaultLibrary newFunctionWithName:@"DrawImageArrayPS"];
+    vertexFunction = [_shaderLibrary newFunctionWithName:@"DrawImageVS"]; // reused
+    fragmentFunction = [_shaderLibrary newFunctionWithName:@"DrawImageArrayPS"];
     pipelineStateDescriptor.vertexFunction = vertexFunction;
     pipelineStateDescriptor.fragmentFunction = fragmentFunction;
     
@@ -372,8 +423,8 @@ - (void)_createRenderPipelines:(MTKView*)view
 
     //-----------------------
    
-    vertexFunction = [defaultLibrary newFunctionWithName:@"DrawImageVS"];
-    fragmentFunction = [defaultLibrary newFunctionWithName:@"Draw1DArrayPS"];
+    vertexFunction = [_shaderLibrary newFunctionWithName:@"DrawImageVS"];
+    fragmentFunction = [_shaderLibrary newFunctionWithName:@"Draw1DArrayPS"];
     pipelineStateDescriptor.vertexFunction = vertexFunction;
     pipelineStateDescriptor.fragmentFunction = fragmentFunction;
     
@@ -385,8 +436,8 @@ - (void)_createRenderPipelines:(MTKView*)view
     
     //-----------------------
    
-    vertexFunction = [defaultLibrary newFunctionWithName:@"DrawCubeVS"];
-    fragmentFunction = [defaultLibrary newFunctionWithName:@"DrawCubePS"];
+    vertexFunction = [_shaderLibrary newFunctionWithName:@"DrawCubeVS"];
+    fragmentFunction = [_shaderLibrary newFunctionWithName:@"DrawCubePS"];
     pipelineStateDescriptor.vertexFunction = vertexFunction;
     pipelineStateDescriptor.fragmentFunction = fragmentFunction;
     
@@ -398,8 +449,8 @@ - (void)_createRenderPipelines:(MTKView*)view
     
     //-----------------------
    
-    vertexFunction = [defaultLibrary newFunctionWithName:@"DrawCubeVS"]; // reused
-    fragmentFunction = [defaultLibrary newFunctionWithName:@"DrawCubeArrayPS"];
+    vertexFunction = [_shaderLibrary newFunctionWithName:@"DrawCubeVS"]; // reused
+    fragmentFunction = [_shaderLibrary newFunctionWithName:@"DrawCubeArrayPS"];
     pipelineStateDescriptor.vertexFunction = vertexFunction;
     pipelineStateDescriptor.fragmentFunction = fragmentFunction;
     
@@ -411,8 +462,8 @@ - (void)_createRenderPipelines:(MTKView*)view
     
     //-----------------------
     
-    vertexFunction = [defaultLibrary newFunctionWithName:@"DrawVolumeVS"];
-    fragmentFunction = [defaultLibrary newFunctionWithName:@"DrawVolumePS"];
+    vertexFunction = [_shaderLibrary newFunctionWithName:@"DrawVolumeVS"];
+    fragmentFunction = [_shaderLibrary newFunctionWithName:@"DrawVolumePS"];
     pipelineStateDescriptor.vertexFunction = vertexFunction;
     pipelineStateDescriptor.fragmentFunction = fragmentFunction;
     
@@ -1259,6 +1310,7 @@ - (void)_setUniformsLevel:(UniformsLevel&)uniforms mipLOD:(int32_t)mipLOD
 - (void)drawInMTKView:(nonnull MTKView *)view
 {
     @autoreleasepool {
+        
         /// Per frame updates here
 
         // TODO: move this out, needs to get called off mouseMove, but don't want to call drawMain
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index bc558e80..20f6dfc4 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -2659,6 +2659,21 @@ - (BOOL)loadTextureFromURL:(NSURL*)url {
     
     //-------------------
     
+    if (endsWithExtension(filename, ".metallib")) {
+       
+        Renderer* renderer = (Renderer*)self.delegate;
+        if ([renderer hotloadShaders: filename]) {
+            NSURL* metallibFileURL = [NSURL fileURLWithPath:[NSString stringWithUTF8String:filename]];
+
+            // add to recent docs, so can reload quickly
+            NSDocumentController* dc = [NSDocumentController sharedDocumentController];
+            [dc noteNewRecentDocumentURL:metallibFileURL];
+        
+            return YES;
+        }
+        return NO;
+    }
+          
     // file is not a supported extension
     if (!(endsWithExtension(filename, ".zip") ||
           isPNGFilename(filename) ||
diff --git a/scripts/buildShaders.sh b/scripts/buildShaders.sh
new file mode 100755
index 00000000..12879a2a
--- /dev/null
+++ b/scripts/buildShaders.sh
@@ -0,0 +1,4 @@
+#!/bin/zsh
+
+xcrun -sdk macosx metal -c ../kramv/KramShaders.metal -o ../bin/KramShaders.air
+xcrun -sdk macosx metallib ../bin/KramShaders.air -o ../bin/KramShaders.metallib
\ No newline at end of file

From ce91383a6d391383ba7e2b140dba76c38e90cb73 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 3 Jul 2021 23:18:24 -0700
Subject: [PATCH 137/901] kramv - remove the bin/.air file once the .metallib
 is built.

---
 scripts/buildShaders.sh | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/scripts/buildShaders.sh b/scripts/buildShaders.sh
index 12879a2a..1c01481f 100755
--- a/scripts/buildShaders.sh
+++ b/scripts/buildShaders.sh
@@ -1,4 +1,7 @@
 #!/bin/zsh
 
 xcrun -sdk macosx metal -c ../kramv/KramShaders.metal -o ../bin/KramShaders.air
-xcrun -sdk macosx metallib ../bin/KramShaders.air -o ../bin/KramShaders.metallib
\ No newline at end of file
+xcrun -sdk macosx metallib ../bin/KramShaders.air -o ../bin/KramShaders.metallib
+
+# don't need this after metallib built
+rm ../bin/KramShaders.air
\ No newline at end of file

From 2b39bc43eec24747035160305881568ac2e99587 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 4 Jul 2021 14:58:01 -0700
Subject: [PATCH 138/901] kramv - simplify compute/render shader/pipeline
 creation

---
 kramv/KramLoader.mm     |   4 +-
 kramv/KramRenderer.mm   | 171 +++++++++++++---------------------------
 kramv/KramViewerMain.mm |   4 +-
 3 files changed, 60 insertions(+), 119 deletions(-)

diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index 6053733e..f29d0e80 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -28,7 +28,7 @@
 using namespace simd;
 
 string kram::toLower(const string& text) {
-    return string([[[NSString stringWithUTF8String:text.c_str()] lowercaseString] UTF8String]);
+    return string([NSString stringWithUTF8String:text.c_str()].lowercaseString.UTF8String);
 }
 
 // defer data need to blit staging MTLBuffer to MTLTexture at the start of rendering
@@ -309,7 +309,7 @@ static uint32_t numberOfMipmapLevels(const Image& image) {
 
 - (BOOL)loadImageFromURL:(nonnull NSURL *)url image:(KTXImage&)image imageData:(KTXImageData&)imageData
 {
-    const char *path = [url.absoluteURL.path UTF8String];
+    const char *path = url.absoluteURL.path.UTF8String;
 
     // TODO: could also ignore extension, and look at header/signature instead
     // files can be renamed to the incorrect extensions
diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 099cd710..bd80117e 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -327,63 +327,47 @@ - (BOOL)hotloadShaders:(const char*)filename
     return YES;
 }
 
-- (void)_createComputePipelines
+- (id<MTLComputePipelineState>)_createComputePipeline:(const char*)name
 {
-    NSError *error = NULL;
-    id<MTLFunction> computeFunction;
-    
-    //-----------------------
-   
-    computeFunction = [_shaderLibrary newFunctionWithName:@"SampleImageCS"];
-    _pipelineStateImageCS = [_device newComputePipelineStateWithFunction:computeFunction error:&error];
-    if (!_pipelineStateImageCS)
-    {
-        NSLog(@"Failed to create pipeline state, error %@", error);
-    }
-    
-    computeFunction = [_shaderLibrary newFunctionWithName:@"SampleImageArrayCS"];
-    _pipelineStateImageArrayCS = [_device newComputePipelineStateWithFunction:computeFunction error:&error];
-    if (!_pipelineStateImageArrayCS)
-    {
-        NSLog(@"Failed to create pipeline state, error %@", error);
-    }
-    
-    computeFunction = [_shaderLibrary newFunctionWithName:@"SampleVolumeCS"];
-    _pipelineStateVolumeCS = [_device newComputePipelineStateWithFunction:computeFunction error:&error];
-    if (!_pipelineStateVolumeCS)
-    {
-        NSLog(@"Failed to create pipeline state, error %@", error);
-    }
+    NSString* nameNS = [NSString stringWithUTF8String:name];
+    NSError *error = nil;
+    id<MTLFunction> computeFunction = [_shaderLibrary newFunctionWithName:nameNS];
     
-    computeFunction = [_shaderLibrary newFunctionWithName:@"SampleCubeCS"];
-    _pipelineStateCubeCS = [_device newComputePipelineStateWithFunction:computeFunction error:&error];
-    if (!_pipelineStateCubeCS)
-    {
-        NSLog(@"Failed to create pipeline state, error %@", error);
+    id<MTLComputePipelineState> pipe;
+    if (computeFunction) {
+        computeFunction.label = nameNS;
+        
+        pipe = [_device newComputePipelineStateWithFunction:computeFunction error:&error];
     }
-    
-    computeFunction = [_shaderLibrary newFunctionWithName:@"SampleCubeArrayCS"];
-    _pipelineStateCubeArrayCS = [_device newComputePipelineStateWithFunction:computeFunction error:&error];
-    if (!_pipelineStateCubeArrayCS)
-    {
-        NSLog(@"Failed to create pipeline state, error %@", error);
+
+    if (!pipe) {
+        KLOGE("kramv", "Failed to create compute pipeline state for %s, error %s", name, error ? error.localizedDescription.UTF8String : "");
+        return nil;
     }
     
-    computeFunction = [_shaderLibrary newFunctionWithName:@"SampleImage1DArrayCS"];
-    _pipelineState1DArrayCS = [_device newComputePipelineStateWithFunction:computeFunction error:&error];
-    if (!_pipelineState1DArrayCS)
-    {
-        NSLog(@"Failed to create pipeline state, error %@", error);
-    }
+    return pipe;
 }
 
-- (void)_createRenderPipelines
+- (void)_createComputePipelines
 {
+    _pipelineStateImageCS       = [self _createComputePipeline:"SampleImageCS"];
+    _pipelineStateImageArrayCS  = [self _createComputePipeline:"SampleImageArrayCS"];
+    _pipelineStateVolumeCS      = [self _createComputePipeline:"SampleVolumeCS"];
+    _pipelineStateCubeCS        = [self _createComputePipeline:"SampleCubeCS"];
+    _pipelineStateCubeArrayCS   = [self _createComputePipeline:"SampleCubeArrayCS"];
+    _pipelineState1DArrayCS     = [self _createComputePipeline:"SampleImage1DArrayCS"];
+}
+
+- (id<MTLRenderPipelineState>)_createRenderPipeline:(const char*)vs fs:(const char*)fs
+{
+    NSString* vsNameNS = [NSString stringWithUTF8String:vs];
+    NSString* fsNameNS = [NSString stringWithUTF8String:fs];
+   
     id <MTLFunction> vertexFunction;
     id <MTLFunction> fragmentFunction;
     
     MTLRenderPipelineDescriptor *pipelineStateDescriptor = [[MTLRenderPipelineDescriptor alloc] init];
-    pipelineStateDescriptor.label = @"DrawImagePipeline";
+    pipelineStateDescriptor.label = fsNameNS;
     pipelineStateDescriptor.sampleCount = _viewFramebuffer.sampleCount;
     pipelineStateDescriptor.vertexDescriptor = _mtlVertexDescriptor;
     pipelineStateDescriptor.colorAttachments[0].pixelFormat = _viewFramebuffer.colorPixelFormat;
@@ -397,81 +381,38 @@ - (void)_createRenderPipelines
     
     //-----------------------
    
-    vertexFunction = [_shaderLibrary newFunctionWithName:@"DrawImageVS"];
-    fragmentFunction = [_shaderLibrary newFunctionWithName:@"DrawImagePS"];
-    pipelineStateDescriptor.vertexFunction = vertexFunction;
-    pipelineStateDescriptor.fragmentFunction = fragmentFunction;
+    vertexFunction = [_shaderLibrary newFunctionWithName:vsNameNS];
+    fragmentFunction = [_shaderLibrary newFunctionWithName:fsNameNS];
     
-    _pipelineStateImage = [_device newRenderPipelineStateWithDescriptor:pipelineStateDescriptor error:&error];
-    if (!_pipelineStateImage)
-    {
-        NSLog(@"Failed to create pipeline state, error %@", error);
-    }
-
-    //-----------------------
-   
-    vertexFunction = [_shaderLibrary newFunctionWithName:@"DrawImageVS"]; // reused
-    fragmentFunction = [_shaderLibrary newFunctionWithName:@"DrawImageArrayPS"];
-    pipelineStateDescriptor.vertexFunction = vertexFunction;
-    pipelineStateDescriptor.fragmentFunction = fragmentFunction;
-    
-    _pipelineStateImageArray = [_device newRenderPipelineStateWithDescriptor:pipelineStateDescriptor error:&error];
-    if (!_pipelineStateImageArray)
-    {
-        NSLog(@"Failed to create pipeline state, error %@", error);
-    }
-
-    //-----------------------
-   
-    vertexFunction = [_shaderLibrary newFunctionWithName:@"DrawImageVS"];
-    fragmentFunction = [_shaderLibrary newFunctionWithName:@"Draw1DArrayPS"];
-    pipelineStateDescriptor.vertexFunction = vertexFunction;
-    pipelineStateDescriptor.fragmentFunction = fragmentFunction;
+    id<MTLRenderPipelineState> pipe;
     
-    _pipelineState1DArray = [_device newRenderPipelineStateWithDescriptor:pipelineStateDescriptor error:&error];
-    if (!_pipelineState1DArray)
-    {
-        NSLog(@"Failed to create pipeline state, error %@", error);
-    }
-    
-    //-----------------------
-   
-    vertexFunction = [_shaderLibrary newFunctionWithName:@"DrawCubeVS"];
-    fragmentFunction = [_shaderLibrary newFunctionWithName:@"DrawCubePS"];
-    pipelineStateDescriptor.vertexFunction = vertexFunction;
-    pipelineStateDescriptor.fragmentFunction = fragmentFunction;
-    
-    _pipelineStateCube = [_device newRenderPipelineStateWithDescriptor:pipelineStateDescriptor error:&error];
-    if (!_pipelineStateCube)
-    {
-        NSLog(@"Failed to create pipeline state, error %@", error);
+    if (vertexFunction && fragmentFunction) {
+        vertexFunction.label = vsNameNS;
+        fragmentFunction.label = fsNameNS;
+       
+        pipelineStateDescriptor.vertexFunction = vertexFunction;
+        pipelineStateDescriptor.fragmentFunction = fragmentFunction;
+        
+        pipe = [_device newRenderPipelineStateWithDescriptor:pipelineStateDescriptor error:&error];
     }
     
-    //-----------------------
-   
-    vertexFunction = [_shaderLibrary newFunctionWithName:@"DrawCubeVS"]; // reused
-    fragmentFunction = [_shaderLibrary newFunctionWithName:@"DrawCubeArrayPS"];
-    pipelineStateDescriptor.vertexFunction = vertexFunction;
-    pipelineStateDescriptor.fragmentFunction = fragmentFunction;
-    
-    _pipelineStateCubeArray = [_device newRenderPipelineStateWithDescriptor:pipelineStateDescriptor error:&error];
-    if (!_pipelineStateCubeArray)
+    if (!pipe)
     {
-        NSLog(@"Failed to create pipeline state, error %@", error);
+        KLOGE("kramv", "Failed to create render pipeline state for %s, error %s", fs, error ? error.description.UTF8String : "");
+        return nil;
     }
     
-    //-----------------------
-    
-    vertexFunction = [_shaderLibrary newFunctionWithName:@"DrawVolumeVS"];
-    fragmentFunction = [_shaderLibrary newFunctionWithName:@"DrawVolumePS"];
-    pipelineStateDescriptor.vertexFunction = vertexFunction;
-    pipelineStateDescriptor.fragmentFunction = fragmentFunction;
-    
-    _pipelineStateVolume = [_device newRenderPipelineStateWithDescriptor:pipelineStateDescriptor error:&error];
-    if (!_pipelineStateVolume)
-    {
-        NSLog(@"Failed to create pipeline state, error %@", error);
-    }
+    return pipe;
+}
+
+- (void)_createRenderPipelines
+{
+    _pipelineStateImage         = [self _createRenderPipeline:"DrawImageVS" fs:"DrawImagePS"];
+    _pipelineStateImageArray    = [self _createRenderPipeline:"DrawImageVS" fs:"DrawImageArrayPS"];
+    _pipelineState1DArray       = [self _createRenderPipeline:"DrawImageVS" fs:"Draw1DArrayPS"];
+    _pipelineStateCube          = [self _createRenderPipeline:"DrawCubeVS" fs:"DrawCubePS"];
+    _pipelineStateCubeArray     = [self _createRenderPipeline:"DrawCubeVS" fs:"DrawCubeArrayPS"];
+    _pipelineStateVolume        = [self _createRenderPipeline:"DrawVolumeVS" fs:"DrawVolumePS"];
 }
 
 - (void)_createSampleRender
@@ -572,7 +513,7 @@ - (MTKMesh*)_createMeshAsset:(const char*)name mdlMesh:(MDLMesh*)mdlMesh doFlipU
     
     if(!mesh || error)
     {
-        NSLog(@"Error creating MetalKit mesh %@", error.localizedDescription);
+        KLOGE("kramv", "Error creating MetalKit mesh %s", error.localizedDescription.UTF8String);
         return nil;
     }
 
@@ -839,7 +780,7 @@ - (BOOL)loadTextureFromImage:(const string&)fullFilename
     
 - (BOOL)loadTexture:(nonnull NSURL *)url
 {
-    string fullFilename = [url.path UTF8String];
+    string fullFilename = url.path.UTF8String;
     
     // can use this to pull, or use fstat on FileHelper
     NSDate *fileDate = nil;
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 20f6dfc4..4827bedd 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -1595,11 +1595,11 @@ - (IBAction)handleAction:(id)sender {
     // sender is the UI element/NSButton
     if ([sender isKindOfClass:[NSButton class]]) {
         NSButton* button = (NSButton*)sender;
-        title = [button.title UTF8String];
+        title = button.title.UTF8String;
     }
     else if ([sender isKindOfClass:[NSMenuItem class]]) {
         NSMenuItem* menuItem = (NSMenuItem*)sender;
-        title = [menuItem.toolTip UTF8String];
+        title = menuItem.toolTip.UTF8String;
     }
     else {
         KLOGE("kram", "unknown UI element");

From 5bf199c0d7119dce17fcbbf92f26df420e9fb027 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 5 Jul 2021 23:01:00 -0700
Subject: [PATCH 139/901] kramv - small shader cleanup

---
 kramv/KramShaders.metal | 8 ++++----
 libkram/CMakeLists.txt  | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal
index 3ab86961..f48f5413 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/KramShaders.metal
@@ -536,8 +536,8 @@ struct Vertex
     float2 texCoord [[attribute(VertexAttributeTexcoord)]];
     
     // basis
-    float3 normal [[attribute(VertexAttributeNormal)]];; // consider hallf
-    float4 tangent [[attribute(VertexAttributeTangent)]];; // tan + bitanSign
+    float3 normal [[attribute(VertexAttributeNormal)]]; // consider half
+    float4 tangent [[attribute(VertexAttributeTangent)]]; // tan + bitanSign
 };
 
 struct ColorInOut
@@ -847,7 +847,7 @@ float3 calculateViewDir(float3 worldPos, float3 cameraPosition) {
 
 float4 DrawPixels(
     ColorInOut in [[stage_in]],
-    bool facing [[front_facing]],
+    bool facing,
     constant Uniforms& uniforms,
     float4 c,
     float4 nmap,
@@ -882,7 +882,7 @@ float4 DrawPixels(
             // distance to edge in pixels (scalar)
             float pixelDist = dist * onePixel;
 
-            // typicaly source recommends smoothstep, so that get a soft instead of hard ramp of alpha at edges
+            // typically source recommends smoothstep, so that get a soft instead of hard ramp of alpha at edges
             
             // store as preml alpha
             c.rgba = saturate(pixelDist);
diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index e5fd590b..7d5e3dbf 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -61,7 +61,7 @@ file(GLOB_RECURSE libSources CONFIGURE_DEPENDS
 	"${SOURCE_DIR}/astc-encoder/*.cpp"
 	"${SOURCE_DIR}/astc-encoder/*.h"
 
-    # ATE is Apple specifi+ macOS)
+    # ATE is Apple specific to macOS)
     "${SOURCE_DIR}/ate/*.mm"
     "${SOURCE_DIR}/ate/*.h"
 

From 44de29f34d57cba271f3a69fa6d554ddfe789885 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 6 Aug 2021 00:26:50 -0700
Subject: [PATCH 140/901] kram - add eastl files

---
 .../eastl/include/EABase/config/eacompiler.h  | 1778 +++++++
 .../include/EABase/config/eacompilertraits.h  | 2561 ++++++++++
 .../eastl/include/EABase/config/eaplatform.h  |  738 +++
 libkram/eastl/include/EABase/eabase.h         | 1011 ++++
 libkram/eastl/include/EABase/eahave.h         |  877 ++++
 libkram/eastl/include/EABase/earesult.h       |   62 +
 libkram/eastl/include/EABase/eastdarg.h       |   99 +
 libkram/eastl/include/EABase/eaunits.h        |   54 +
 libkram/eastl/include/EABase/int128.h         | 1268 +++++
 libkram/eastl/include/EABase/nullptr.h        |  102 +
 libkram/eastl/include/EABase/version.h        |   36 +
 libkram/eastl/include/EASTL/algorithm.h       | 4221 +++++++++++++++++
 libkram/eastl/include/EASTL/allocator.h       |  395 ++
 .../eastl/include/EASTL/allocator_malloc.h    |  130 +
 libkram/eastl/include/EASTL/any.h             |  652 +++
 libkram/eastl/include/EASTL/array.h           |  530 +++
 libkram/eastl/include/EASTL/atomic.h          | 1772 +++++++
 libkram/eastl/include/EASTL/bitset.h          | 2232 +++++++++
 libkram/eastl/include/EASTL/bitvector.h       | 1474 ++++++
 libkram/eastl/include/EASTL/bonus/adaptors.h  |   88 +
 .../eastl/include/EASTL/bonus/call_traits.h   |  117 +
 .../include/EASTL/bonus/compressed_pair.h     |  460 ++
 .../include/EASTL/bonus/fixed_ring_buffer.h   |   50 +
 .../include/EASTL/bonus/fixed_tuple_vector.h  |  210 +
 .../include/EASTL/bonus/intrusive_sdlist.h    |  694 +++
 .../include/EASTL/bonus/intrusive_slist.h     |  321 ++
 libkram/eastl/include/EASTL/bonus/list_map.h  |  932 ++++
 libkram/eastl/include/EASTL/bonus/lru_cache.h |  424 ++
 .../eastl/include/EASTL/bonus/ring_buffer.h   | 1581 ++++++
 .../eastl/include/EASTL/bonus/sort_extra.h    |  204 +
 .../eastl/include/EASTL/bonus/tuple_vector.h  | 1592 +++++++
 libkram/eastl/include/EASTL/chrono.h          |  744 +++
 libkram/eastl/include/EASTL/core_allocator.h  |   70 +
 .../include/EASTL/core_allocator_adapter.h    |  368 ++
 libkram/eastl/include/EASTL/deque.h           | 2687 +++++++++++
 libkram/eastl/include/EASTL/finally.h         |   93 +
 libkram/eastl/include/EASTL/fixed_allocator.h |  455 ++
 libkram/eastl/include/EASTL/fixed_function.h  |  218 +
 libkram/eastl/include/EASTL/fixed_hash_map.h  |  822 ++++
 libkram/eastl/include/EASTL/fixed_hash_set.h  |  782 +++
 libkram/eastl/include/EASTL/fixed_list.h      |  388 ++
 libkram/eastl/include/EASTL/fixed_map.h       |  580 +++
 libkram/eastl/include/EASTL/fixed_set.h       |  578 +++
 libkram/eastl/include/EASTL/fixed_slist.h     |  389 ++
 libkram/eastl/include/EASTL/fixed_string.h    |  805 ++++
 libkram/eastl/include/EASTL/fixed_substring.h |  265 ++
 libkram/eastl/include/EASTL/fixed_vector.h    |  625 +++
 libkram/eastl/include/EASTL/functional.h      | 1266 +++++
 libkram/eastl/include/EASTL/hash_map.h        |  580 +++
 libkram/eastl/include/EASTL/hash_set.h        |  468 ++
 libkram/eastl/include/EASTL/heap.h            |  685 +++
 .../eastl/include/EASTL/initializer_list.h    |   96 +
 .../include/EASTL/internal/atomic/arch/arch.h |   65 +
 .../internal/atomic/arch/arch_add_fetch.h     |  173 +
 .../internal/atomic/arch/arch_and_fetch.h     |  173 +
 .../atomic/arch/arch_cmpxchg_strong.h         |  430 ++
 .../internal/atomic/arch/arch_cmpxchg_weak.h  |  430 ++
 .../atomic/arch/arch_compiler_barrier.h       |   19 +
 .../internal/atomic/arch/arch_cpu_pause.h     |   25 +
 .../internal/atomic/arch/arch_exchange.h      |  173 +
 .../internal/atomic/arch/arch_fetch_add.h     |  173 +
 .../internal/atomic/arch/arch_fetch_and.h     |  173 +
 .../internal/atomic/arch/arch_fetch_or.h      |  173 +
 .../internal/atomic/arch/arch_fetch_sub.h     |  173 +
 .../internal/atomic/arch/arch_fetch_xor.h     |  173 +
 .../EASTL/internal/atomic/arch/arch_load.h    |  125 +
 .../atomic/arch/arch_memory_barrier.h         |   47 +
 .../internal/atomic/arch/arch_or_fetch.h      |  173 +
 .../internal/atomic/arch/arch_signal_fence.h  |   21 +
 .../EASTL/internal/atomic/arch/arch_store.h   |  113 +
 .../internal/atomic/arch/arch_sub_fetch.h     |  173 +
 .../internal/atomic/arch/arch_thread_fence.h  |   49 +
 .../internal/atomic/arch/arch_xor_fetch.h     |  173 +
 .../include/EASTL/internal/atomic/atomic.h    |  252 +
 .../EASTL/internal/atomic/atomic_asserts.h    |   75 +
 .../EASTL/internal/atomic/atomic_base_width.h |  346 ++
 .../EASTL/internal/atomic/atomic_casts.h      |  190 +
 .../EASTL/internal/atomic/atomic_flag.h       |  170 +
 .../internal/atomic/atomic_flag_standalone.h  |   69 +
 .../EASTL/internal/atomic/atomic_integral.h   |  343 ++
 .../EASTL/internal/atomic/atomic_macros.h     |   67 +
 .../atomic/atomic_macros/atomic_macros.h      |  145 +
 .../atomic_macros/atomic_macros_add_fetch.h   |   98 +
 .../atomic_macros/atomic_macros_and_fetch.h   |   98 +
 .../atomic/atomic_macros/atomic_macros_base.h |   65 +
 .../atomic_macros_cmpxchg_strong.h            |  245 +
 .../atomic_macros_cmpxchg_weak.h              |  245 +
 .../atomic_macros_compiler_barrier.h          |   30 +
 .../atomic_macros/atomic_macros_cpu_pause.h   |   22 +
 .../atomic_macros/atomic_macros_exchange.h    |   98 +
 .../atomic_macros/atomic_macros_fetch_add.h   |   98 +
 .../atomic_macros/atomic_macros_fetch_and.h   |   98 +
 .../atomic_macros/atomic_macros_fetch_or.h    |   98 +
 .../atomic_macros/atomic_macros_fetch_sub.h   |   98 +
 .../atomic_macros/atomic_macros_fetch_xor.h   |   98 +
 .../atomic/atomic_macros/atomic_macros_load.h |   75 +
 .../atomic_macros_memory_barrier.h            |   38 +
 .../atomic_macros/atomic_macros_or_fetch.h    |   98 +
 .../atomic_macros_signal_fence.h              |   34 +
 .../atomic_macros/atomic_macros_store.h       |   68 +
 .../atomic_macros/atomic_macros_sub_fetch.h   |   98 +
 .../atomic_macros_thread_fence.h              |   34 +
 .../atomic_macros/atomic_macros_xor_fetch.h   |   98 +
 .../internal/atomic/atomic_memory_order.h     |   44 +
 .../EASTL/internal/atomic/atomic_pointer.h    |  281 ++
 .../atomic/atomic_pop_compiler_options.h      |   11 +
 .../atomic/atomic_push_compiler_options.h     |   17 +
 .../internal/atomic/atomic_size_aligned.h     |  197 +
 .../EASTL/internal/atomic/atomic_standalone.h |  470 ++
 .../EASTL/internal/atomic/compiler/compiler.h |  120 +
 .../atomic/compiler/compiler_add_fetch.h      |  173 +
 .../atomic/compiler/compiler_and_fetch.h      |  173 +
 .../atomic/compiler/compiler_barrier.h        |   36 +
 .../atomic/compiler/compiler_cmpxchg_strong.h |  430 ++
 .../atomic/compiler/compiler_cmpxchg_weak.h   |  430 ++
 .../atomic/compiler/compiler_cpu_pause.h      |   32 +
 .../atomic/compiler/compiler_exchange.h       |  173 +
 .../atomic/compiler/compiler_fetch_add.h      |  173 +
 .../atomic/compiler/compiler_fetch_and.h      |  173 +
 .../atomic/compiler/compiler_fetch_or.h       |  173 +
 .../atomic/compiler/compiler_fetch_sub.h      |  173 +
 .../atomic/compiler/compiler_fetch_xor.h      |  173 +
 .../internal/atomic/compiler/compiler_load.h  |  139 +
 .../atomic/compiler/compiler_memory_barrier.h |   47 +
 .../atomic/compiler/compiler_or_fetch.h       |  173 +
 .../atomic/compiler/compiler_signal_fence.h   |   49 +
 .../internal/atomic/compiler/compiler_store.h |  113 +
 .../atomic/compiler/compiler_sub_fetch.h      |  173 +
 .../atomic/compiler/compiler_thread_fence.h   |   49 +
 .../atomic/compiler/compiler_xor_fetch.h      |  173 +
 .../atomic/compiler/gcc/compiler_gcc.h        |  154 +
 .../compiler/gcc/compiler_gcc_add_fetch.h     |  118 +
 .../compiler/gcc/compiler_gcc_and_fetch.h     |  118 +
 .../compiler/gcc/compiler_gcc_barrier.h       |   30 +
 .../gcc/compiler_gcc_cmpxchg_strong.h         |  182 +
 .../compiler/gcc/compiler_gcc_cmpxchg_weak.h  |  182 +
 .../compiler/gcc/compiler_gcc_cpu_pause.h     |   31 +
 .../compiler/gcc/compiler_gcc_exchange.h      |  118 +
 .../compiler/gcc/compiler_gcc_fetch_add.h     |  118 +
 .../compiler/gcc/compiler_gcc_fetch_and.h     |  118 +
 .../compiler/gcc/compiler_gcc_fetch_or.h      |  118 +
 .../compiler/gcc/compiler_gcc_fetch_sub.h     |  118 +
 .../compiler/gcc/compiler_gcc_fetch_xor.h     |  118 +
 .../atomic/compiler/gcc/compiler_gcc_load.h   |   90 +
 .../compiler/gcc/compiler_gcc_or_fetch.h      |  118 +
 .../compiler/gcc/compiler_gcc_signal_fence.h  |   38 +
 .../atomic/compiler/gcc/compiler_gcc_store.h  |   89 +
 .../compiler/gcc/compiler_gcc_sub_fetch.h     |  118 +
 .../compiler/gcc/compiler_gcc_thread_fence.h  |   38 +
 .../compiler/gcc/compiler_gcc_xor_fetch.h     |  118 +
 .../atomic/compiler/msvc/compiler_msvc.h      |  260 +
 .../compiler/msvc/compiler_msvc_add_fetch.h   |  104 +
 .../compiler/msvc/compiler_msvc_and_fetch.h   |  121 +
 .../compiler/msvc/compiler_msvc_barrier.h     |   31 +
 .../msvc/compiler_msvc_cmpxchg_strong.h       |  195 +
 .../msvc/compiler_msvc_cmpxchg_weak.h         |  162 +
 .../compiler/msvc/compiler_msvc_cpu_pause.h   |   27 +
 .../compiler/msvc/compiler_msvc_exchange.h    |  125 +
 .../compiler/msvc/compiler_msvc_fetch_add.h   |  101 +
 .../compiler/msvc/compiler_msvc_fetch_and.h   |  118 +
 .../compiler/msvc/compiler_msvc_fetch_or.h    |  118 +
 .../compiler/msvc/compiler_msvc_fetch_sub.h   |  104 +
 .../compiler/msvc/compiler_msvc_fetch_xor.h   |  118 +
 .../compiler/msvc/compiler_msvc_or_fetch.h    |  121 +
 .../msvc/compiler_msvc_signal_fence.h         |   34 +
 .../compiler/msvc/compiler_msvc_sub_fetch.h   |  107 +
 .../compiler/msvc/compiler_msvc_xor_fetch.h   |  121 +
 .../include/EASTL/internal/char_traits.h      |  464 ++
 libkram/eastl/include/EASTL/internal/config.h | 1877 ++++++++
 .../eastl/include/EASTL/internal/copy_help.h  |  215 +
 .../include/EASTL/internal/enable_shared.h    |   83 +
 .../eastl/include/EASTL/internal/fill_help.h  |  484 ++
 .../eastl/include/EASTL/internal/fixed_pool.h | 1631 +++++++
 .../eastl/include/EASTL/internal/function.h   |  161 +
 .../include/EASTL/internal/function_detail.h  |  673 +++
 .../include/EASTL/internal/function_help.h    |   51 +
 .../include/EASTL/internal/functional_base.h  |  389 ++
 .../include/EASTL/internal/generic_iterator.h |  208 +
 .../eastl/include/EASTL/internal/hashtable.h  | 3222 +++++++++++++
 .../eastl/include/EASTL/internal/in_place_t.h |   82 +
 .../include/EASTL/internal/integer_sequence.h |   74 +
 .../EASTL/internal/intrusive_hashtable.h      |  989 ++++
 libkram/eastl/include/EASTL/internal/mem_fn.h |  304 ++
 .../include/EASTL/internal/memory_base.h      |   37 +
 .../eastl/include/EASTL/internal/move_help.h  |  162 +
 .../include/EASTL/internal/pair_fwd_decls.h   |   16 +
 .../EASTL/internal/piecewise_construct_t.h    |   46 +
 .../include/EASTL/internal/red_black_tree.h   | 2400 ++++++++++
 .../eastl/include/EASTL/internal/smart_ptr.h  |  264 ++
 .../include/EASTL/internal/thread_support.h   |  244 +
 .../include/EASTL/internal/tuple_fwd_decls.h  |   56 +
 .../include/EASTL/internal/type_compound.h    |  800 ++++
 .../include/EASTL/internal/type_fundamental.h |  289 ++
 .../eastl/include/EASTL/internal/type_pod.h   | 1945 ++++++++
 .../include/EASTL/internal/type_properties.h  |  380 ++
 .../EASTL/internal/type_transformations.h     |  606 +++
 .../eastl/include/EASTL/intrusive_hash_map.h  |   98 +
 .../eastl/include/EASTL/intrusive_hash_set.h  |  100 +
 libkram/eastl/include/EASTL/intrusive_list.h  | 1315 +++++
 libkram/eastl/include/EASTL/intrusive_ptr.h   |  426 ++
 libkram/eastl/include/EASTL/iterator.h        | 1192 +++++
 libkram/eastl/include/EASTL/linked_array.h    |  336 ++
 libkram/eastl/include/EASTL/linked_ptr.h      |  426 ++
 libkram/eastl/include/EASTL/list.h            | 2168 +++++++++
 libkram/eastl/include/EASTL/map.h             |  684 +++
 libkram/eastl/include/EASTL/memory.h          | 1685 +++++++
 libkram/eastl/include/EASTL/meta.h            |  222 +
 libkram/eastl/include/EASTL/numeric.h         |  247 +
 libkram/eastl/include/EASTL/numeric_limits.h  | 1718 +++++++
 libkram/eastl/include/EASTL/optional.h        |  708 +++
 libkram/eastl/include/EASTL/priority_queue.h  |  491 ++
 libkram/eastl/include/EASTL/queue.h           |  366 ++
 libkram/eastl/include/EASTL/random.h          |  254 +
 libkram/eastl/include/EASTL/ratio.h           |  320 ++
 libkram/eastl/include/EASTL/safe_ptr.h        |  485 ++
 libkram/eastl/include/EASTL/scoped_array.h    |  237 +
 libkram/eastl/include/EASTL/scoped_ptr.h      |  256 +
 .../eastl/include/EASTL/segmented_vector.h    |  523 ++
 libkram/eastl/include/EASTL/set.h             |  655 +++
 libkram/eastl/include/EASTL/shared_array.h    |  434 ++
 libkram/eastl/include/EASTL/shared_ptr.h      | 1696 +++++++
 libkram/eastl/include/EASTL/slist.h           | 1930 ++++++++
 libkram/eastl/include/EASTL/sort.h            | 2019 ++++++++
 libkram/eastl/include/EASTL/span.h            |  427 ++
 libkram/eastl/include/EASTL/stack.h           |  346 ++
 libkram/eastl/include/EASTL/string.h          | 4100 ++++++++++++++++
 libkram/eastl/include/EASTL/string_hash_map.h |  189 +
 libkram/eastl/include/EASTL/string_map.h      |  167 +
 libkram/eastl/include/EASTL/string_view.h     |  631 +++
 libkram/eastl/include/EASTL/tuple.h           | 1006 ++++
 libkram/eastl/include/EASTL/type_traits.h     | 1060 +++++
 libkram/eastl/include/EASTL/unique_ptr.h      |  732 +++
 libkram/eastl/include/EASTL/unordered_map.h   |   55 +
 libkram/eastl/include/EASTL/unordered_set.h   |   53 +
 libkram/eastl/include/EASTL/utility.h         |  872 ++++
 libkram/eastl/include/EASTL/variant.h         | 1236 +++++
 libkram/eastl/include/EASTL/vector.h          | 2055 ++++++++
 libkram/eastl/include/EASTL/vector_map.h      |  906 ++++
 libkram/eastl/include/EASTL/vector_multimap.h |  843 ++++
 libkram/eastl/include/EASTL/vector_multiset.h |  764 +++
 libkram/eastl/include/EASTL/vector_set.h      |  793 ++++
 libkram/eastl/include/EASTL/version.h         |   15 +
 libkram/eastl/include/EASTL/weak_ptr.h        |   17 +
 libkram/eastl/source/allocator_eastl.cpp      |   56 +
 libkram/eastl/source/assert.cpp               |  108 +
 libkram/eastl/source/atomic.cpp               |   25 +
 libkram/eastl/source/fixed_pool.cpp           |   70 +
 libkram/eastl/source/hashtable.cpp            |  177 +
 libkram/eastl/source/intrusive_list.cpp       |   87 +
 libkram/eastl/source/numeric_limits.cpp       |  572 +++
 libkram/eastl/source/red_black_tree.cpp       |  518 ++
 libkram/eastl/source/string.cpp               |  464 ++
 libkram/eastl/source/thread_support.cpp       |  121 +
 253 files changed, 112522 insertions(+)
 create mode 100644 libkram/eastl/include/EABase/config/eacompiler.h
 create mode 100644 libkram/eastl/include/EABase/config/eacompilertraits.h
 create mode 100644 libkram/eastl/include/EABase/config/eaplatform.h
 create mode 100644 libkram/eastl/include/EABase/eabase.h
 create mode 100644 libkram/eastl/include/EABase/eahave.h
 create mode 100644 libkram/eastl/include/EABase/earesult.h
 create mode 100644 libkram/eastl/include/EABase/eastdarg.h
 create mode 100644 libkram/eastl/include/EABase/eaunits.h
 create mode 100644 libkram/eastl/include/EABase/int128.h
 create mode 100644 libkram/eastl/include/EABase/nullptr.h
 create mode 100644 libkram/eastl/include/EABase/version.h
 create mode 100644 libkram/eastl/include/EASTL/algorithm.h
 create mode 100644 libkram/eastl/include/EASTL/allocator.h
 create mode 100644 libkram/eastl/include/EASTL/allocator_malloc.h
 create mode 100644 libkram/eastl/include/EASTL/any.h
 create mode 100644 libkram/eastl/include/EASTL/array.h
 create mode 100644 libkram/eastl/include/EASTL/atomic.h
 create mode 100644 libkram/eastl/include/EASTL/bitset.h
 create mode 100644 libkram/eastl/include/EASTL/bitvector.h
 create mode 100644 libkram/eastl/include/EASTL/bonus/adaptors.h
 create mode 100644 libkram/eastl/include/EASTL/bonus/call_traits.h
 create mode 100644 libkram/eastl/include/EASTL/bonus/compressed_pair.h
 create mode 100644 libkram/eastl/include/EASTL/bonus/fixed_ring_buffer.h
 create mode 100644 libkram/eastl/include/EASTL/bonus/fixed_tuple_vector.h
 create mode 100644 libkram/eastl/include/EASTL/bonus/intrusive_sdlist.h
 create mode 100644 libkram/eastl/include/EASTL/bonus/intrusive_slist.h
 create mode 100644 libkram/eastl/include/EASTL/bonus/list_map.h
 create mode 100644 libkram/eastl/include/EASTL/bonus/lru_cache.h
 create mode 100644 libkram/eastl/include/EASTL/bonus/ring_buffer.h
 create mode 100644 libkram/eastl/include/EASTL/bonus/sort_extra.h
 create mode 100644 libkram/eastl/include/EASTL/bonus/tuple_vector.h
 create mode 100644 libkram/eastl/include/EASTL/chrono.h
 create mode 100644 libkram/eastl/include/EASTL/core_allocator.h
 create mode 100644 libkram/eastl/include/EASTL/core_allocator_adapter.h
 create mode 100644 libkram/eastl/include/EASTL/deque.h
 create mode 100644 libkram/eastl/include/EASTL/finally.h
 create mode 100644 libkram/eastl/include/EASTL/fixed_allocator.h
 create mode 100644 libkram/eastl/include/EASTL/fixed_function.h
 create mode 100644 libkram/eastl/include/EASTL/fixed_hash_map.h
 create mode 100644 libkram/eastl/include/EASTL/fixed_hash_set.h
 create mode 100644 libkram/eastl/include/EASTL/fixed_list.h
 create mode 100644 libkram/eastl/include/EASTL/fixed_map.h
 create mode 100644 libkram/eastl/include/EASTL/fixed_set.h
 create mode 100644 libkram/eastl/include/EASTL/fixed_slist.h
 create mode 100644 libkram/eastl/include/EASTL/fixed_string.h
 create mode 100644 libkram/eastl/include/EASTL/fixed_substring.h
 create mode 100644 libkram/eastl/include/EASTL/fixed_vector.h
 create mode 100644 libkram/eastl/include/EASTL/functional.h
 create mode 100644 libkram/eastl/include/EASTL/hash_map.h
 create mode 100644 libkram/eastl/include/EASTL/hash_set.h
 create mode 100644 libkram/eastl/include/EASTL/heap.h
 create mode 100644 libkram/eastl/include/EASTL/initializer_list.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/arch/arch.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/arch/arch_add_fetch.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/arch/arch_and_fetch.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/arch/arch_cmpxchg_strong.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/arch/arch_cmpxchg_weak.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/arch/arch_compiler_barrier.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/arch/arch_cpu_pause.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/arch/arch_exchange.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_add.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_and.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_or.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_sub.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_xor.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/arch/arch_load.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/arch/arch_memory_barrier.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/arch/arch_or_fetch.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/arch/arch_signal_fence.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/arch/arch_store.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/arch/arch_sub_fetch.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/arch/arch_thread_fence.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/arch/arch_xor_fetch.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_asserts.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_base_width.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_casts.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_flag.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_flag_standalone.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_integral.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_add_fetch.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_and_fetch.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_base.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_cmpxchg_strong.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_cmpxchg_weak.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_compiler_barrier.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_cpu_pause.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_exchange.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_add.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_and.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_or.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_sub.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_xor.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_load.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_memory_barrier.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_or_fetch.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_signal_fence.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_store.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_sub_fetch.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_thread_fence.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_xor_fetch.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_memory_order.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_pointer.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_pop_compiler_options.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_push_compiler_options.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_size_aligned.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_standalone.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/compiler.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_add_fetch.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_and_fetch.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_barrier.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_cmpxchg_strong.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_cmpxchg_weak.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_cpu_pause.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_exchange.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_add.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_and.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_or.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_sub.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_xor.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_load.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_memory_barrier.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_or_fetch.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_signal_fence.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_store.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_sub_fetch.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_thread_fence.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_xor_fetch.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_add_fetch.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_and_fetch.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_barrier.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_cmpxchg_strong.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_cmpxchg_weak.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_cpu_pause.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_exchange.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_add.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_and.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_or.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_sub.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_xor.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_load.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_or_fetch.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_signal_fence.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_store.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_sub_fetch.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_thread_fence.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_xor_fetch.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_add_fetch.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_and_fetch.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_barrier.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_cmpxchg_strong.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_cmpxchg_weak.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_cpu_pause.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_exchange.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_add.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_and.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_or.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_sub.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_xor.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_or_fetch.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_signal_fence.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_sub_fetch.h
 create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_xor_fetch.h
 create mode 100644 libkram/eastl/include/EASTL/internal/char_traits.h
 create mode 100644 libkram/eastl/include/EASTL/internal/config.h
 create mode 100644 libkram/eastl/include/EASTL/internal/copy_help.h
 create mode 100644 libkram/eastl/include/EASTL/internal/enable_shared.h
 create mode 100644 libkram/eastl/include/EASTL/internal/fill_help.h
 create mode 100644 libkram/eastl/include/EASTL/internal/fixed_pool.h
 create mode 100644 libkram/eastl/include/EASTL/internal/function.h
 create mode 100644 libkram/eastl/include/EASTL/internal/function_detail.h
 create mode 100644 libkram/eastl/include/EASTL/internal/function_help.h
 create mode 100644 libkram/eastl/include/EASTL/internal/functional_base.h
 create mode 100644 libkram/eastl/include/EASTL/internal/generic_iterator.h
 create mode 100644 libkram/eastl/include/EASTL/internal/hashtable.h
 create mode 100644 libkram/eastl/include/EASTL/internal/in_place_t.h
 create mode 100644 libkram/eastl/include/EASTL/internal/integer_sequence.h
 create mode 100644 libkram/eastl/include/EASTL/internal/intrusive_hashtable.h
 create mode 100644 libkram/eastl/include/EASTL/internal/mem_fn.h
 create mode 100644 libkram/eastl/include/EASTL/internal/memory_base.h
 create mode 100644 libkram/eastl/include/EASTL/internal/move_help.h
 create mode 100644 libkram/eastl/include/EASTL/internal/pair_fwd_decls.h
 create mode 100644 libkram/eastl/include/EASTL/internal/piecewise_construct_t.h
 create mode 100644 libkram/eastl/include/EASTL/internal/red_black_tree.h
 create mode 100644 libkram/eastl/include/EASTL/internal/smart_ptr.h
 create mode 100644 libkram/eastl/include/EASTL/internal/thread_support.h
 create mode 100644 libkram/eastl/include/EASTL/internal/tuple_fwd_decls.h
 create mode 100644 libkram/eastl/include/EASTL/internal/type_compound.h
 create mode 100644 libkram/eastl/include/EASTL/internal/type_fundamental.h
 create mode 100644 libkram/eastl/include/EASTL/internal/type_pod.h
 create mode 100644 libkram/eastl/include/EASTL/internal/type_properties.h
 create mode 100644 libkram/eastl/include/EASTL/internal/type_transformations.h
 create mode 100644 libkram/eastl/include/EASTL/intrusive_hash_map.h
 create mode 100644 libkram/eastl/include/EASTL/intrusive_hash_set.h
 create mode 100644 libkram/eastl/include/EASTL/intrusive_list.h
 create mode 100644 libkram/eastl/include/EASTL/intrusive_ptr.h
 create mode 100644 libkram/eastl/include/EASTL/iterator.h
 create mode 100644 libkram/eastl/include/EASTL/linked_array.h
 create mode 100644 libkram/eastl/include/EASTL/linked_ptr.h
 create mode 100644 libkram/eastl/include/EASTL/list.h
 create mode 100644 libkram/eastl/include/EASTL/map.h
 create mode 100644 libkram/eastl/include/EASTL/memory.h
 create mode 100644 libkram/eastl/include/EASTL/meta.h
 create mode 100644 libkram/eastl/include/EASTL/numeric.h
 create mode 100644 libkram/eastl/include/EASTL/numeric_limits.h
 create mode 100644 libkram/eastl/include/EASTL/optional.h
 create mode 100644 libkram/eastl/include/EASTL/priority_queue.h
 create mode 100644 libkram/eastl/include/EASTL/queue.h
 create mode 100644 libkram/eastl/include/EASTL/random.h
 create mode 100644 libkram/eastl/include/EASTL/ratio.h
 create mode 100644 libkram/eastl/include/EASTL/safe_ptr.h
 create mode 100644 libkram/eastl/include/EASTL/scoped_array.h
 create mode 100644 libkram/eastl/include/EASTL/scoped_ptr.h
 create mode 100644 libkram/eastl/include/EASTL/segmented_vector.h
 create mode 100644 libkram/eastl/include/EASTL/set.h
 create mode 100644 libkram/eastl/include/EASTL/shared_array.h
 create mode 100644 libkram/eastl/include/EASTL/shared_ptr.h
 create mode 100644 libkram/eastl/include/EASTL/slist.h
 create mode 100644 libkram/eastl/include/EASTL/sort.h
 create mode 100644 libkram/eastl/include/EASTL/span.h
 create mode 100644 libkram/eastl/include/EASTL/stack.h
 create mode 100644 libkram/eastl/include/EASTL/string.h
 create mode 100644 libkram/eastl/include/EASTL/string_hash_map.h
 create mode 100644 libkram/eastl/include/EASTL/string_map.h
 create mode 100644 libkram/eastl/include/EASTL/string_view.h
 create mode 100644 libkram/eastl/include/EASTL/tuple.h
 create mode 100644 libkram/eastl/include/EASTL/type_traits.h
 create mode 100644 libkram/eastl/include/EASTL/unique_ptr.h
 create mode 100644 libkram/eastl/include/EASTL/unordered_map.h
 create mode 100644 libkram/eastl/include/EASTL/unordered_set.h
 create mode 100644 libkram/eastl/include/EASTL/utility.h
 create mode 100644 libkram/eastl/include/EASTL/variant.h
 create mode 100644 libkram/eastl/include/EASTL/vector.h
 create mode 100644 libkram/eastl/include/EASTL/vector_map.h
 create mode 100644 libkram/eastl/include/EASTL/vector_multimap.h
 create mode 100644 libkram/eastl/include/EASTL/vector_multiset.h
 create mode 100644 libkram/eastl/include/EASTL/vector_set.h
 create mode 100644 libkram/eastl/include/EASTL/version.h
 create mode 100644 libkram/eastl/include/EASTL/weak_ptr.h
 create mode 100644 libkram/eastl/source/allocator_eastl.cpp
 create mode 100644 libkram/eastl/source/assert.cpp
 create mode 100644 libkram/eastl/source/atomic.cpp
 create mode 100644 libkram/eastl/source/fixed_pool.cpp
 create mode 100644 libkram/eastl/source/hashtable.cpp
 create mode 100644 libkram/eastl/source/intrusive_list.cpp
 create mode 100644 libkram/eastl/source/numeric_limits.cpp
 create mode 100644 libkram/eastl/source/red_black_tree.cpp
 create mode 100644 libkram/eastl/source/string.cpp
 create mode 100644 libkram/eastl/source/thread_support.cpp

diff --git a/libkram/eastl/include/EABase/config/eacompiler.h b/libkram/eastl/include/EABase/config/eacompiler.h
new file mode 100644
index 00000000..bd656ed9
--- /dev/null
+++ b/libkram/eastl/include/EABase/config/eacompiler.h
@@ -0,0 +1,1778 @@
+/*-----------------------------------------------------------------------------
+ * config/eacompiler.h
+ *
+ * Copyright (c) Electronic Arts Inc. All rights reserved.
+ *-----------------------------------------------------------------------------
+ * Currently supported defines include:
+ *     EA_COMPILER_GNUC
+ *     EA_COMPILER_ARM
+ *     EA_COMPILER_EDG
+ *     EA_COMPILER_SN
+ *     EA_COMPILER_MSVC
+ *     EA_COMPILER_METROWERKS
+ *     EA_COMPILER_INTEL
+ *     EA_COMPILER_BORLANDC
+ *     EA_COMPILER_IBM
+ *     EA_COMPILER_QNX
+ *     EA_COMPILER_GREEN_HILLS
+ *     EA_COMPILER_CLANG
+ *     EA_COMPILER_CLANG_CL
+ *     
+ *     EA_COMPILER_VERSION = <integer>
+ *     EA_COMPILER_NAME = <string>
+ *     EA_COMPILER_STRING = <string>
+ *     
+ *     EA_COMPILER_VA_COPY_REQUIRED
+ * 
+ *  C++98/03 functionality
+ *     EA_COMPILER_NO_STATIC_CONSTANTS
+ *     EA_COMPILER_NO_TEMPLATE_SPECIALIZATION
+ *     EA_COMPILER_NO_TEMPLATE_PARTIAL_SPECIALIZATION
+ *     EA_COMPILER_NO_MEMBER_TEMPLATES
+ *     EA_COMPILER_NO_MEMBER_TEMPLATE_SPECIALIZATION
+ *     EA_COMPILER_NO_TEMPLATE_TEMPLATES
+ *     EA_COMPILER_NO_MEMBER_TEMPLATE_FRIENDS
+ *     EA_COMPILER_NO_VOID_RETURNS
+ *     EA_COMPILER_NO_COVARIANT_RETURN_TYPE
+ *     EA_COMPILER_NO_DEDUCED_TYPENAME
+ *     EA_COMPILER_NO_ARGUMENT_DEPENDENT_LOOKUP
+ *     EA_COMPILER_NO_EXCEPTION_STD_NAMESPACE
+ *     EA_COMPILER_NO_EXPLICIT_FUNCTION_TEMPLATE_ARGUMENTS
+ *     EA_COMPILER_NO_RTTI
+ *     EA_COMPILER_NO_EXCEPTIONS
+ *     EA_COMPILER_NO_NEW_THROW_SPEC
+ *     EA_THROW_SPEC_NEW / EA_THROW_SPEC_DELETE
+ *     EA_COMPILER_NO_UNWIND
+ *     EA_COMPILER_NO_STANDARD_CPP_LIBRARY
+ *     EA_COMPILER_NO_STATIC_VARIABLE_INIT
+ *     EA_COMPILER_NO_STATIC_FUNCTION_INIT
+ *     EA_COMPILER_NO_VARIADIC_MACROS
+ *
+ *  C++11 functionality
+ *     EA_COMPILER_NO_RVALUE_REFERENCES
+ *     EA_COMPILER_NO_EXTERN_TEMPLATE
+ *     EA_COMPILER_NO_RANGE_BASED_FOR_LOOP
+ *     EA_COMPILER_NO_CONSTEXPR
+ *     EA_COMPILER_NO_OVERRIDE
+ *     EA_COMPILER_NO_INHERITANCE_FINAL
+ *     EA_COMPILER_NO_NULLPTR
+ *     EA_COMPILER_NO_AUTO
+ *     EA_COMPILER_NO_DECLTYPE
+ *     EA_COMPILER_NO_DEFAULTED_FUNCTIONS
+ *     EA_COMPILER_NO_DELETED_FUNCTIONS
+ *     EA_COMPILER_NO_LAMBDA_EXPRESSIONS
+ *     EA_COMPILER_NO_TRAILING_RETURN_TYPES
+ *     EA_COMPILER_NO_STRONGLY_TYPED_ENUMS
+ *     EA_COMPILER_NO_FORWARD_DECLARED_ENUMS
+ *     EA_COMPILER_NO_VARIADIC_TEMPLATES
+ *     EA_COMPILER_NO_TEMPLATE_ALIASES
+ *     EA_COMPILER_NO_INITIALIZER_LISTS
+ *     EA_COMPILER_NO_NORETURN
+ *     EA_COMPILER_NO_CARRIES_DEPENDENCY
+ *     EA_COMPILER_NO_FALLTHROUGH
+ *     EA_COMPILER_NO_NODISCARD
+ *     EA_COMPILER_NO_MAYBE_UNUSED
+ *     EA_COMPILER_NO_NONSTATIC_MEMBER_INITIALIZERS
+ *     EA_COMPILER_NO_RIGHT_ANGLE_BRACKETS
+ *     EA_COMPILER_NO_ALIGNOF
+ *     EA_COMPILER_NO_ALIGNAS
+ *     EA_COMPILER_NO_DELEGATING_CONSTRUCTORS
+ *     EA_COMPILER_NO_INHERITING_CONSTRUCTORS
+ *     EA_COMPILER_NO_USER_DEFINED_LITERALS
+ *     EA_COMPILER_NO_STANDARD_LAYOUT_TYPES
+ *     EA_COMPILER_NO_EXTENDED_SIZEOF
+ *     EA_COMPILER_NO_INLINE_NAMESPACES
+ *     EA_COMPILER_NO_UNRESTRICTED_UNIONS
+ *     EA_COMPILER_NO_EXPLICIT_CONVERSION_OPERATORS
+ *     EA_COMPILER_NO_FUNCTION_TEMPLATE_DEFAULT_ARGS
+ *     EA_COMPILER_NO_LOCAL_CLASS_TEMPLATE_PARAMETERS
+ *     EA_COMPILER_NO_NOEXCEPT
+ *     EA_COMPILER_NO_RAW_LITERALS
+ *     EA_COMPILER_NO_UNICODE_STRING_LITERALS
+ *     EA_COMPILER_NO_NEW_CHARACTER_TYPES
+ *     EA_COMPILER_NO_UNICODE_CHAR_NAME_LITERALS
+ *     EA_COMPILER_NO_UNIFIED_INITIALIZATION_SYNTAX
+ *     EA_COMPILER_NO_EXTENDED_FRIEND_DECLARATIONS
+ *
+ *  C++14 functionality
+ *     EA_COMPILER_NO_VARIABLE_TEMPLATES
+ * 
+ *  C++17 functionality
+ *     EA_COMPILER_NO_INLINE_VARIABLES
+ *     EA_COMPILER_NO_ALIGNED_NEW
+ *
+ *  C++20 functionality
+ *     EA_COMPILER_NO_DESIGNATED_INITIALIZERS
+ *     
+ *-----------------------------------------------------------------------------
+ *
+ * Supplemental documentation
+ *     EA_COMPILER_NO_STATIC_CONSTANTS
+ *         Code such as this is legal, but some compilers fail to compile it:
+ *             struct A{ static const a = 1; };
+ *
+ *     EA_COMPILER_NO_TEMPLATE_SPECIALIZATION
+ *         Some compilers fail to allow template specialization, such as with this:
+ *             template<class U> void DoSomething(U u);
+ *             void DoSomething(int x);
+ *
+ *     EA_COMPILER_NO_TEMPLATE_PARTIAL_SPECIALIZATION
+ *         Some compilers fail to allow partial template specialization, such as with this:
+ *             template <class T, class Allocator> class vector{ };         // Primary templated class.
+ *             template <class Allocator> class vector<bool, Allocator>{ }; // Partially specialized version.
+ *
+ *     EA_COMPILER_NO_MEMBER_TEMPLATES
+ *         Some compilers fail to allow member template functions such as this:
+ *             struct A{ template<class U> void DoSomething(U u); };
+ *
+ *     EA_COMPILER_NO_MEMBER_TEMPLATE_SPECIALIZATION
+ *         Some compilers fail to allow member template specialization, such as with this:
+ *             struct A{ 
+ *                 template<class U> void DoSomething(U u);
+ *                 void DoSomething(int x);
+ *             };
+ *
+ *     EA_COMPILER_NO_TEMPLATE_TEMPLATES
+ *         Code such as this is legal:
+ *             template<typename T, template<typename> class U>
+ *             U<T> SomeFunction(const U<T> x) { return x.DoSomething(); }
+ *
+ *     EA_COMPILER_NO_MEMBER_TEMPLATE_FRIENDS
+ *         Some compilers fail to compile templated friends, as with this:
+ *             struct A{ template<class U> friend class SomeFriend; };
+ *         This is described in the C++ Standard at 14.5.3.
+ *
+ *     EA_COMPILER_NO_VOID_RETURNS
+ *          This is legal C++:
+ *              void DoNothing1(){ };
+ *              void DoNothing2(){ return DoNothing1(); }
+ *
+ *     EA_COMPILER_NO_COVARIANT_RETURN_TYPE
+ *         See the C++ standard sec 10.3,p5.
+ *     
+ *     EA_COMPILER_NO_DEDUCED_TYPENAME
+ *         Some compilers don't support the use of 'typename' for 
+ *         dependent types in deduced contexts, as with this:
+ *             template <class T> void Function(T, typename T::type);
+ *
+ *     EA_COMPILER_NO_ARGUMENT_DEPENDENT_LOOKUP
+ *         Also known as Koenig lookup. Basically, if you have a function
+ *         that is a namespace and you call that function without prefixing
+ *         it with the namespace the compiler should look at any arguments
+ *         you pass to that function call and search their namespace *first* 
+ *         to see if the given function exists there.
+ *
+ *     EA_COMPILER_NO_EXCEPTION_STD_NAMESPACE
+ *         <exception> is in namespace std. Some std libraries fail to 
+ *         put the contents of <exception> in namespace std. The following 
+ *         code should normally be legal:
+ *             void Function(){ std::terminate(); }
+ *
+ *     EA_COMPILER_NO_EXPLICIT_FUNCTION_TEMPLATE_ARGUMENTS
+ *         Some compilers fail to execute DoSomething() properly, though they 
+ *         succeed in compiling it, as with this:
+ *             template <int i>
+ *             bool DoSomething(int j){ return i == j; };
+ *             DoSomething<1>(2);
+ *
+ *     EA_COMPILER_NO_EXCEPTIONS
+ *         The compiler is configured to disallow the use of try/throw/catch
+ *         syntax (often to improve performance). Use of such syntax in this 
+ *         case will cause a compilation error. 
+ *         
+ *     EA_COMPILER_NO_UNWIND
+ *         The compiler is configured to allow the use of try/throw/catch 
+ *         syntax and behaviour but disables the generation of stack unwinding 
+ *         code for responding to exceptions (often to improve performance).
+ *
+ *---------------------------------------------------------------------------*/
+
+#ifndef INCLUDED_eacompiler_H
+#define INCLUDED_eacompiler_H
+
+	#include <EABase/config/eaplatform.h>
+
+	// Note: This is used to generate the EA_COMPILER_STRING macros
+	#ifndef INTERNAL_STRINGIZE
+		#define INTERNAL_STRINGIZE(x) INTERNAL_PRIMITIVE_STRINGIZE(x)
+	#endif
+	#ifndef INTERNAL_PRIMITIVE_STRINGIZE
+		#define INTERNAL_PRIMITIVE_STRINGIZE(x) #x
+	#endif
+
+	// EA_COMPILER_HAS_FEATURE
+	#ifndef EA_COMPILER_HAS_FEATURE
+		#if defined(__clang__)
+			#define EA_COMPILER_HAS_FEATURE(x) __has_feature(x)
+		#else
+			#define EA_COMPILER_HAS_FEATURE(x) 0
+		#endif
+	#endif
+
+
+	// EA_COMPILER_HAS_BUILTIN
+	#ifndef EA_COMPILER_HAS_BUILTIN
+		#if defined(__clang__)
+			#define EA_COMPILER_HAS_BUILTIN(x) __has_builtin(x)
+		#else
+			#define EA_COMPILER_HAS_BUILTIN(x) 0
+		#endif
+	#endif
+
+
+	// EDG (EDG compiler front-end, used by other compilers such as SN)
+	#if defined(__EDG_VERSION__)
+		#define EA_COMPILER_EDG 1
+
+		#if defined(_MSC_VER)
+			#define EA_COMPILER_EDG_VC_MODE 1
+		#endif
+		#if defined(__GNUC__)
+			#define EA_COMPILER_EDG_GCC_MODE 1
+		#endif
+	#endif
+
+	// EA_COMPILER_WINRTCX_ENABLED
+	//
+	// Defined as 1 if the compiler has its available C++/CX support enabled, else undefined.
+	// This specifically means the corresponding compilation unit has been built with Windows Runtime
+	// Components enabled, usually via the '-ZW' compiler flags being used. This option allows for using
+	// ref counted hat-type '^' objects and other C++/CX specific keywords like "ref new"
+	#if !defined(EA_COMPILER_WINRTCX_ENABLED) && defined(__cplusplus_winrt)
+		#define EA_COMPILER_WINRTCX_ENABLED 1
+	#endif
+
+
+	// EA_COMPILER_CPP11_ENABLED
+	//
+	// Defined as 1 if the compiler has its available C++11 support enabled, else undefined.
+	// This does not mean that all of C++11 or any particular feature of C++11 is supported
+	// by the compiler. It means that whatever C++11 support the compiler has is enabled.
+	// This also includes existing and older compilers that still identify C++11 as C++0x.
+	//
+	// We cannot use (__cplusplus >= 201103L) alone because some compiler vendors have 
+	// decided to not define __cplusplus like thus until they have fully completed their
+	// C++11 support.
+	//
+	#if !defined(EA_COMPILER_CPP11_ENABLED) && defined(__cplusplus)
+		#if (__cplusplus >= 201103L)    // Clang and GCC defines this like so in C++11 mode.
+			#define EA_COMPILER_CPP11_ENABLED 1
+		#elif defined(__GNUC__) && defined(__GXX_EXPERIMENTAL_CXX0X__)
+			#define EA_COMPILER_CPP11_ENABLED 1
+		#elif defined(_MSC_VER) && _MSC_VER >= 1600         // Microsoft unilaterally enables its C++11 support; there is no way to disable it.
+			#define EA_COMPILER_CPP11_ENABLED 1
+		#elif defined(__EDG_VERSION__) // && ???
+			// To do: Is there a generic way to determine this?
+		#endif
+	#endif
+
+
+	// EA_COMPILER_CPP14_ENABLED
+	//
+	// Defined as 1 if the compiler has its available C++14 support enabled, else undefined.
+	// This does not mean that all of C++14 or any particular feature of C++14 is supported
+	// by the compiler. It means that whatever C++14 support the compiler has is enabled.
+	//
+	// We cannot use (__cplusplus >= 201402L) alone because some compiler vendors have 
+	// decided to not define __cplusplus like thus until they have fully completed their
+	// C++14 support.
+	#if !defined(EA_COMPILER_CPP14_ENABLED) && defined(__cplusplus)
+		#if (__cplusplus >= 201402L) 								// Clang and GCC defines this like so in C++14 mode.
+			#define EA_COMPILER_CPP14_ENABLED 1
+		#elif defined(_MSC_VER) && (_MSC_VER >= 1900)  	// VS2015+ 
+			#define EA_COMPILER_CPP14_ENABLED 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_CPP17_ENABLED
+	//
+	// Defined as 1 if the compiler has its available C++17 support enabled, else undefined.
+	// This does not mean that all of C++17 or any particular feature of C++17 is supported
+	// by the compiler. It means that whatever C++17 support the compiler has is enabled.
+ 	// 
+	// We cannot use (__cplusplus >= 201703L) alone because some compiler vendors have 
+	// decided to not define __cplusplus like thus until they have fully completed their
+	// C++17 support.
+	#if !defined(EA_COMPILER_CPP17_ENABLED) && defined(__cplusplus)
+		#if (__cplusplus >= 201703L) 
+			#define EA_COMPILER_CPP17_ENABLED 1
+		#elif defined(_MSVC_LANG) && (_MSVC_LANG >= 201703L) // C++17+
+			#define EA_COMPILER_CPP17_ENABLED 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_CPP20_ENABLED
+	//
+	// Defined as 1 if the compiler has its available C++20 support enabled, else undefined.
+	// This does not mean that all of C++20 or any particular feature of C++20 is supported
+	// by the compiler. It means that whatever C++20 support the compiler has is enabled.
+ 	//
+	// We cannot use (__cplusplus >= 202003L) alone because some compiler vendors have
+	// decided to not define __cplusplus like thus until they have fully completed their
+	// C++20 support.
+	#if !defined(EA_COMPILER_CPP20_ENABLED) && defined(__cplusplus)
+ 		// TODO(rparoin): enable once a C++20 value for the __cplusplus macro has been published
+		// #if (__cplusplus >= 202003L)
+		//     #define EA_COMPILER_CPP20_ENABLED 1
+		// #elif defined(_MSVC_LANG) && (_MSVC_LANG >= 202003L) // C++20+
+		//     #define EA_COMPILER_CPP20_ENABLED 1
+		// #endif
+	#endif
+
+
+
+	#if   defined(__ARMCC_VERSION)
+		// Note that this refers to the ARM RVCT compiler (armcc or armcpp), but there
+		// are other compilers that target ARM processors, such as GCC and Microsoft VC++.
+		// If you want to detect compiling for the ARM processor, check for EA_PROCESSOR_ARM
+		// being defined.
+		// This compiler is also identified by defined(__CC_ARM) || defined(__ARMCC__).
+		#define EA_COMPILER_RVCT    1
+		#define EA_COMPILER_ARM     1
+		#define EA_COMPILER_VERSION __ARMCC_VERSION
+		#define EA_COMPILER_NAME    "RVCT"
+	  //#define EA_COMPILER_STRING (defined below)
+
+	// Clang's GCC-compatible driver.
+	#elif defined(__clang__) && !defined(_MSC_VER)
+		#define EA_COMPILER_CLANG   1
+		#define EA_COMPILER_VERSION (__clang_major__ * 100 + __clang_minor__)
+		#define EA_COMPILER_NAME    "clang"
+		#define EA_COMPILER_STRING  EA_COMPILER_NAME __clang_version__
+
+	// GCC (a.k.a. GNUC)
+	#elif defined(__GNUC__) // GCC compilers exist for many platforms.
+		#define EA_COMPILER_GNUC    1
+		#define EA_COMPILER_VERSION (__GNUC__ * 1000 + __GNUC_MINOR__)
+		#define EA_COMPILER_NAME    "GCC"
+		#define EA_COMPILER_STRING  EA_COMPILER_NAME " compiler, version " INTERNAL_STRINGIZE( __GNUC__ ) "." INTERNAL_STRINGIZE( __GNUC_MINOR__ )
+
+		#if (__GNUC__ == 2) && (__GNUC_MINOR__ < 95) // If GCC < 2.95... 
+			#define EA_COMPILER_NO_MEMBER_TEMPLATES 1
+		#endif
+		#if (__GNUC__ == 2) && (__GNUC_MINOR__ <= 97) // If GCC <= 2.97...
+			#define EA_COMPILER_NO_MEMBER_TEMPLATE_FRIENDS 1
+		#endif
+		#if (__GNUC__ == 3) && ((__GNUC_MINOR__ == 1) || (__GNUC_MINOR__ == 2)) // If GCC 3.1 or 3.2 (but not pre 3.1 or post 3.2)...
+			#define EA_COMPILER_NO_EXPLICIT_FUNCTION_TEMPLATE_ARGUMENTS 1
+		#endif
+
+	// Borland C++
+	#elif defined(__BORLANDC__)
+		#define EA_COMPILER_BORLANDC 1
+		#define EA_COMPILER_VERSION  __BORLANDC__
+		#define EA_COMPILER_NAME     "Borland C"
+	  //#define EA_COMPILER_STRING (defined below)
+
+		#if (__BORLANDC__ <= 0x0550)      // If Borland C++ Builder 4 and 5...
+			#define EA_COMPILER_NO_MEMBER_TEMPLATE_FRIENDS 1
+		#endif
+		#if (__BORLANDC__ >= 0x561) && (__BORLANDC__ < 0x600)
+			#define EA_COMPILER_NO_MEMBER_FUNCTION_SPECIALIZATION 1
+		#endif
+
+
+	// Intel C++
+	// The Intel Windows compiler masquerades as VC++ and defines _MSC_VER.
+	// The Intel compiler is based on the EDG compiler front-end.
+	#elif defined(__ICL) || defined(__ICC)
+		#define EA_COMPILER_INTEL 1
+
+		// Should we enable the following? We probably should do so since enabling it does a lot more good than harm
+		// for users. The Intel Windows compiler does a pretty good job of emulating VC++ and so the user would likely
+		// have to handle few special cases where the Intel compiler doesn't emulate VC++ correctly.
+		#if defined(_MSC_VER)
+			#define EA_COMPILER_MSVC 1
+			#define EA_COMPILER_MICROSOFT 1
+		#endif
+
+		// Should we enable the following? This isn't as clear because as of this writing we don't know if the Intel 
+		// compiler truly emulates GCC well enough that enabling this does more good than harm.
+		#if defined(__GNUC__)
+			#define EA_COMPILER_GNUC 1
+		#endif
+
+		#if defined(__ICL)
+			#define EA_COMPILER_VERSION __ICL
+		#elif defined(__ICC)
+			#define EA_COMPILER_VERSION __ICC
+		#endif
+		#define EA_COMPILER_NAME "Intel C++"
+		#if defined(_MSC_VER)
+			#define EA_COMPILER_STRING  EA_COMPILER_NAME " compiler, version " INTERNAL_STRINGIZE( EA_COMPILER_VERSION ) ", EDG version " INTERNAL_STRINGIZE( __EDG_VERSION__ ) ", VC++ version " INTERNAL_STRINGIZE( _MSC_VER )
+		#elif defined(__GNUC__)
+			#define EA_COMPILER_STRING  EA_COMPILER_NAME " compiler, version " INTERNAL_STRINGIZE( EA_COMPILER_VERSION ) ", EDG version " INTERNAL_STRINGIZE( __EDG_VERSION__ ) ", GCC version " INTERNAL_STRINGIZE( __GNUC__ )
+		#else
+			#define EA_COMPILER_STRING  EA_COMPILER_NAME " compiler, version " INTERNAL_STRINGIZE( EA_COMPILER_VERSION ) ", EDG version " INTERNAL_STRINGIZE( __EDG_VERSION__ )
+		#endif
+
+
+	#elif defined(_MSC_VER)
+		#define EA_COMPILER_MSVC 1
+		#define EA_COMPILER_MICROSOFT 1
+		#define EA_COMPILER_VERSION _MSC_VER
+		#define EA_COMPILER_NAME "Microsoft Visual C++"
+	  //#define EA_COMPILER_STRING (defined below)
+
+		#if defined(__clang__)
+			// Clang's MSVC-compatible driver.
+			#define EA_COMPILER_CLANG_CL 1
+		#endif
+
+		#define EA_STANDARD_LIBRARY_MSVC 1
+		#define EA_STANDARD_LIBRARY_MICROSOFT 1
+
+		#if (_MSC_VER <= 1200) // If VC6.x and earlier...
+			#if (_MSC_VER < 1200)
+				#define EA_COMPILER_MSVCOLD 1
+			#else
+				#define EA_COMPILER_MSVC6 1
+			#endif
+
+			#if (_MSC_VER < 1200) // If VC5.x or earlier...
+				#define EA_COMPILER_NO_TEMPLATE_SPECIALIZATION 1
+			#endif
+			#define EA_COMPILER_NO_EXPLICIT_FUNCTION_TEMPLATE_ARGUMENTS 1     // The compiler compiles this OK, but executes it wrong. Fixed in VC7.0
+			#define EA_COMPILER_NO_VOID_RETURNS 1                             // The compiler fails to compile such cases. Fixed in VC7.0
+			#define EA_COMPILER_NO_EXCEPTION_STD_NAMESPACE 1                  // The compiler fails to compile such cases. Fixed in VC7.0
+			#define EA_COMPILER_NO_DEDUCED_TYPENAME 1                         // The compiler fails to compile such cases. Fixed in VC7.0
+			#define EA_COMPILER_NO_STATIC_CONSTANTS 1                         // The compiler fails to compile such cases. Fixed in VC7.0
+			#define EA_COMPILER_NO_COVARIANT_RETURN_TYPE 1                    // The compiler fails to compile such cases. Fixed in VC7.1
+			#define EA_COMPILER_NO_ARGUMENT_DEPENDENT_LOOKUP 1                // The compiler compiles this OK, but executes it wrong. Fixed in VC7.1
+			#define EA_COMPILER_NO_TEMPLATE_TEMPLATES 1                       // The compiler fails to compile such cases. Fixed in VC7.1
+			#define EA_COMPILER_NO_TEMPLATE_PARTIAL_SPECIALIZATION 1          // The compiler fails to compile such cases. Fixed in VC7.1
+			#define EA_COMPILER_NO_MEMBER_TEMPLATE_FRIENDS 1                  // The compiler fails to compile such cases. Fixed in VC7.1
+			//#define EA_COMPILER_NO_MEMBER_TEMPLATES 1                       // VC6.x supports member templates properly 95% of the time. So do we flag the remaining 5%?
+			//#define EA_COMPILER_NO_MEMBER_TEMPLATE_SPECIALIZATION 1         // VC6.x supports member templates properly 95% of the time. So do we flag the remaining 5%?
+
+		#elif (_MSC_VER <= 1300) // If VC7.0 and earlier...
+			#define EA_COMPILER_MSVC7 1
+
+			#define EA_COMPILER_NO_COVARIANT_RETURN_TYPE 1                    // The compiler fails to compile such cases. Fixed in VC7.1
+			#define EA_COMPILER_NO_ARGUMENT_DEPENDENT_LOOKUP 1                // The compiler compiles this OK, but executes it wrong. Fixed in VC7.1
+			#define EA_COMPILER_NO_TEMPLATE_TEMPLATES 1                       // The compiler fails to compile such cases. Fixed in VC7.1
+			#define EA_COMPILER_NO_TEMPLATE_PARTIAL_SPECIALIZATION 1          // The compiler fails to compile such cases. Fixed in VC7.1
+			#define EA_COMPILER_NO_MEMBER_TEMPLATE_FRIENDS 1                  // The compiler fails to compile such cases. Fixed in VC7.1
+			#define EA_COMPILER_NO_MEMBER_FUNCTION_SPECIALIZATION 1           // This is the case only for VC7.0 and not VC6 or VC7.1+. Fixed in VC7.1
+			//#define EA_COMPILER_NO_MEMBER_TEMPLATES 1                       // VC7.0 supports member templates properly 95% of the time. So do we flag the remaining 5%?
+
+		#elif (_MSC_VER < 1400) // VS2003       _MSC_VER of 1300 means VC7 (VS2003)
+			// The VC7.1 and later compiler is fairly close to the C++ standard 
+			// and thus has no compiler limitations that we are concerned about.
+			#define EA_COMPILER_MSVC7_2003 1
+			#define EA_COMPILER_MSVC7_1    1
+
+		#elif (_MSC_VER < 1500) // VS2005       _MSC_VER of 1400 means VC8 (VS2005)
+			#define EA_COMPILER_MSVC8_2005 1
+			#define EA_COMPILER_MSVC8_0    1
+
+		#elif (_MSC_VER < 1600) // VS2008.      _MSC_VER of 1500 means VC9 (VS2008)
+			#define EA_COMPILER_MSVC9_2008 1
+			#define EA_COMPILER_MSVC9_0    1
+
+		#elif (_MSC_VER < 1700) // VS2010       _MSC_VER of 1600 means VC10 (VS2010)
+			#define EA_COMPILER_MSVC_2010 1
+			#define EA_COMPILER_MSVC10_0  1
+
+		#elif (_MSC_VER < 1800) // VS2012       _MSC_VER of 1700 means VS2011/VS2012
+			#define EA_COMPILER_MSVC_2011 1   // Microsoft changed the name to VS2012 before shipping, despite referring to it as VS2011 up to just a few weeks before shipping.
+			#define EA_COMPILER_MSVC11_0  1
+			#define EA_COMPILER_MSVC_2012 1
+			#define EA_COMPILER_MSVC12_0  1
+
+		#elif (_MSC_VER < 1900) // VS2013       _MSC_VER of 1800 means VS2013
+			#define EA_COMPILER_MSVC_2013 1
+			#define EA_COMPILER_MSVC13_0  1
+
+		#elif (_MSC_VER < 1910) // VS2015       _MSC_VER of 1900 means VS2015
+			#define EA_COMPILER_MSVC_2015 1
+			#define EA_COMPILER_MSVC14_0  1
+
+		#elif (_MSC_VER < 1911) // VS2017       _MSC_VER of 1910 means VS2017
+			#define EA_COMPILER_MSVC_2017 1
+			#define EA_COMPILER_MSVC15_0  1
+
+		#endif
+
+
+	// IBM
+	#elif defined(__xlC__)
+		#define EA_COMPILER_IBM     1
+		#define EA_COMPILER_NAME    "IBM XL C"
+		#define EA_COMPILER_VERSION __xlC__
+		#define EA_COMPILER_STRING "IBM XL C compiler, version " INTERNAL_STRINGIZE( __xlC__ )
+
+	// Unknown
+	#else // Else the compiler is unknown
+
+		#define EA_COMPILER_VERSION 0
+		#define EA_COMPILER_NAME   "Unknown"
+
+	#endif
+
+	#ifndef EA_COMPILER_STRING
+		#define EA_COMPILER_STRING EA_COMPILER_NAME " compiler, version " INTERNAL_STRINGIZE(EA_COMPILER_VERSION)
+	#endif
+
+
+	// Deprecated definitions
+	// For backwards compatibility, should be supported for at least the life of EABase v2.0.x.
+	#ifndef EA_COMPILER_NO_TEMPLATE_PARTIAL_SPECIALIZATION
+		#define EA_COMPILER_PARTIAL_TEMPLATE_SPECIALIZATION 1
+	#endif
+	#ifndef EA_COMPILER_NO_TEMPLATE_SPECIALIZATION
+		#define EA_COMPILER_TEMPLATE_SPECIALIZATION 1
+	#endif
+	#ifndef EA_COMPILER_NO_MEMBER_TEMPLATES
+		#define EA_COMPILER_MEMBER_TEMPLATES 1
+	#endif
+	#ifndef EA_COMPILER_NO_MEMBER_TEMPLATE_SPECIALIZATION
+		#define EA_COMPILER_MEMBER_TEMPLATE_SPECIALIZATION 1
+	#endif
+
+
+
+	///////////////////////////////////////////////////////////////////////////////
+	// EA_COMPILER_VA_COPY_REQUIRED
+	//
+	// Defines whether va_copy must be used to copy or save va_list objects between uses.
+	// Some compilers on some platforms implement va_list whereby its contents  
+	// are destroyed upon usage, even if passed by value to another function. 
+	// With these compilers you can use va_copy to save and restore a va_list.
+	// Known compiler/platforms that destroy va_list contents upon usage include:
+	//     CodeWarrior on PowerPC
+	//     GCC on x86-64
+	// However, va_copy is part of the C99 standard and not part of earlier C and
+	// C++ standards. So not all compilers support it. VC++ doesn't support va_copy,
+	// but it turns out that VC++ doesn't usually need it on the platforms it supports,
+	// and va_copy can usually be implemented via memcpy(va_list, va_list) with VC++.
+	///////////////////////////////////////////////////////////////////////////////
+
+	#ifndef EA_COMPILER_VA_COPY_REQUIRED
+		#if   ((defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__clang__)) && (!defined(__i386__) || defined(__x86_64__)) && !defined(__ppc__) && !defined(__PPC__) && !defined(__PPC64__)
+			#define EA_COMPILER_VA_COPY_REQUIRED 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_RTTI
+	//
+	// If EA_COMPILER_NO_RTTI is defined, then RTTI (run-time type information)
+	// is not available (possibly due to being disabled by the user).
+	//
+	#if defined(__EDG_VERSION__) && !defined(__RTTI)
+		#define EA_COMPILER_NO_RTTI 1
+	#elif defined(__clang__) && !EA_COMPILER_HAS_FEATURE(cxx_rtti)
+		#define EA_COMPILER_NO_RTTI 1
+	#elif defined(__IBMCPP__) && !defined(__RTTI_ALL__)
+		#define EA_COMPILER_NO_RTTI 1
+	#elif defined(__GXX_ABI_VERSION) && !defined(__GXX_RTTI)
+		#define EA_COMPILER_NO_RTTI 1
+	#elif defined(_MSC_VER) && !defined(_CPPRTTI)
+		#define EA_COMPILER_NO_RTTI 1
+	#elif defined(__ARMCC_VERSION) && defined(__TARGET_CPU_MPCORE) && !defined(__RTTI)
+		#define EA_COMPILER_NO_RTTI 1
+	#endif
+
+
+
+	// EA_COMPILER_NO_EXCEPTIONS / EA_COMPILER_NO_UNWIND
+	//
+	// If EA_COMPILER_NO_EXCEPTIONS is defined, then the compiler is 
+	// configured to not recognize C++ exception-handling statements 
+	// such as try/catch/throw. Thus, when EA_COMPILER_NO_EXCEPTIONS is 
+	// defined, code that attempts to use exception handling statements
+	// will usually cause a compilation error. If is often desirable
+	// for projects to disable exception handling because exception 
+	// handling causes extra code and/or data generation which might
+	// not be needed, especially if it is known that exceptions won't
+	// be happening. When writing code that is to be portable between
+	// systems of which some enable exception handling while others
+	// don't, check for EA_COMPILER_NO_EXCEPTIONS being defined. 
+	//
+	#if !defined(EA_COMPILER_NO_EXCEPTIONS) && !defined(EA_COMPILER_NO_UNWIND)
+		#if defined(EA_COMPILER_GNUC) && defined(_NO_EX) // GCC on some platforms defines _NO_EX when exceptions are disabled.
+			#define EA_COMPILER_NO_EXCEPTIONS 1
+
+		#elif (defined(EA_COMPILER_CLANG) || defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_INTEL) || defined(EA_COMPILER_RVCT)) && !defined(__EXCEPTIONS) // GCC and most EDG-based compilers define __EXCEPTIONS when exception handling is enabled.
+			#define EA_COMPILER_NO_EXCEPTIONS 1
+
+		#elif (defined(EA_COMPILER_MSVC)) && !defined(_CPPUNWIND)
+			#define EA_COMPILER_NO_UNWIND 1
+
+		#endif // EA_COMPILER_NO_EXCEPTIONS / EA_COMPILER_NO_UNWIND
+	#endif // !defined(EA_COMPILER_NO_EXCEPTIONS) && !defined(EA_COMPILER_NO_UNWIND)
+
+
+	// ------------------------------------------------------------------------
+	// EA_DISABLE_ALL_VC_WARNINGS / EA_RESTORE_ALL_VC_WARNINGS
+	// 
+	// Disable and re-enable all warning(s) within code.
+	//
+	// Example usage:
+	//     EA_DISABLE_ALL_VC_WARNINGS()
+	//     <code>
+	//     EA_RESTORE_ALL_VC_WARNINGS()
+	//
+	//This is duplicated from EABase's eacompilertraits.h
+	#ifndef EA_DISABLE_ALL_VC_WARNINGS
+		#if defined(_MSC_VER)
+			#define EA_DISABLE_ALL_VC_WARNINGS()  \
+				__pragma(warning(push, 0)) \
+				__pragma(warning(disable: 4244 4265 4267 4350 4472 4509 4548 4623 4710 4985 6320 4755 4625 4626 4702)) // Some warnings need to be explicitly called out.
+		#else
+			#define EA_DISABLE_ALL_VC_WARNINGS()
+		#endif
+	#endif
+
+	//This is duplicated from EABase's eacompilertraits.h
+	#ifndef EA_RESTORE_ALL_VC_WARNINGS
+		#if defined(_MSC_VER)
+			#define EA_RESTORE_ALL_VC_WARNINGS()  \
+				__pragma(warning(pop))
+		#else
+			#define EA_RESTORE_ALL_VC_WARNINGS()
+		#endif
+	#endif
+
+	// Dinkumware
+	//This is duplicated from EABase's eahave.h
+	#if !defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && !defined(EA_NO_HAVE_DINKUMWARE_CPP_LIBRARY)
+		#if defined(__cplusplus)
+			EA_DISABLE_ALL_VC_WARNINGS()
+			#include <cstddef> // Need to trigger the compilation of yvals.h without directly using <yvals.h> because it might not exist.
+			EA_RESTORE_ALL_VC_WARNINGS()
+		#endif
+
+		#if defined(__cplusplus) && defined(_CPPLIB_VER) /* If using the Dinkumware Standard library... */
+			#define EA_HAVE_DINKUMWARE_CPP_LIBRARY 1
+		#else
+			#define EA_NO_HAVE_DINKUMWARE_CPP_LIBRARY 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_ALIGNED_NEW
+	//
+	//
+	#if !defined(EA_COMPILER_NO_ALIGNED_NEW)
+		#if defined(_HAS_ALIGNED_NEW) && _HAS_ALIGNED_NEW // VS2017 15.5 Preview 
+			// supported.
+		#elif defined(EA_COMPILER_CPP17_ENABLED)
+			// supported.
+		#else
+			#define EA_COMPILER_NO_ALIGNED_NEW 1
+		#endif
+	#endif
+
+	// EA_COMPILER_NO_NEW_THROW_SPEC / EA_THROW_SPEC_NEW / EA_THROW_SPEC_DELETE
+	//
+	// If defined then the compiler's version of operator new is not decorated
+	// with a throw specification. This is useful for us to know because we 
+	// often want to write our own overloaded operator new implementations.
+	// We need such operator new overrides to be declared identically to the
+	// way the compiler is defining operator new itself.
+	//
+	// Example usage:
+	//      void* operator new(std::size_t) EA_THROW_SPEC_NEW(std::bad_alloc);
+	//      void* operator new[](std::size_t) EA_THROW_SPEC_NEW(std::bad_alloc);
+	//      void* operator new(std::size_t, const std::nothrow_t&) EA_THROW_SPEC_NEW_NONE();
+	//      void* operator new[](std::size_t, const std::nothrow_t&) EA_THROW_SPEC_NEW_NONE();
+	//      void  operator delete(void*) EA_THROW_SPEC_DELETE_NONE();
+	//      void  operator delete[](void*) EA_THROW_SPEC_DELETE_NONE();
+	//      void  operator delete(void*, const std::nothrow_t&) EA_THROW_SPEC_DELETE_NONE();
+	//      void  operator delete[](void*, const std::nothrow_t&) EA_THROW_SPEC_DELETE_NONE();
+	//
+	#if defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY)
+		#if defined(_MSC_VER) && (_MSC_VER >= 1912)  // VS2017 15.3+ 
+			#define EA_THROW_SPEC_NEW(x)        noexcept(false)
+			#define EA_THROW_SPEC_NEW_NONE()    noexcept 
+			#define EA_THROW_SPEC_DELETE_NONE() noexcept 
+
+		#elif defined(_MSC_VER) && (_MSC_VER >= 1910)  // VS2017+
+			#define EA_THROW_SPEC_NEW(x)        throw(x)
+			#define EA_THROW_SPEC_NEW_NONE()    throw() 
+			#define EA_THROW_SPEC_DELETE_NONE() throw() 
+
+		#else
+			#if defined(EA_PLATFORM_SONY)
+				#define EA_THROW_SPEC_NEW(X)        _THROWS(X)
+			#elif defined(_MSC_VER)
+				// Disabled warning "nonstandard extension used: 'throw (...)'" as this warning is a W4 warning which is usually off by default
+				// and doesn't convey any important information but will still complain when building with /Wall (which most teams do)
+				#define EA_THROW_SPEC_NEW(X)        __pragma(warning(push)) __pragma(warning(disable: 4987)) _THROWS(X) __pragma(warning(pop))
+			#else
+				#define EA_THROW_SPEC_NEW(X)        _THROW1(X)
+			#endif
+			#define EA_THROW_SPEC_NEW_NONE()    _THROW0()
+			#define EA_THROW_SPEC_DELETE_NONE() _THROW0()
+
+		#endif
+	#elif defined(EA_COMPILER_NO_EXCEPTIONS) && !defined(EA_COMPILER_RVCT) && !defined(EA_PLATFORM_LINUX) && !defined(EA_PLATFORM_APPLE) && !defined(CS_UNDEFINED_STRING)
+		#define EA_COMPILER_NO_NEW_THROW_SPEC 1
+
+		#define EA_THROW_SPEC_NEW(x)
+		#define EA_THROW_SPEC_NEW_NONE()
+		#define EA_THROW_SPEC_DELETE_NONE()
+	#else
+		#define EA_THROW_SPEC_NEW(x)        throw(x)
+		#define EA_THROW_SPEC_NEW_NONE()    throw()
+		#define EA_THROW_SPEC_DELETE_NONE() throw()
+	#endif
+
+
+	// EA_COMPILER_NO_STANDARD_CPP_LIBRARY
+	//
+	// If defined, then the compiler doesn't provide a Standard C++ library.
+	//
+	#if defined(EA_PLATFORM_ANDROID)
+		// Disabled because EA's eaconfig/android_config/android_sdk packages currently 
+		// don't support linking STL libraries. Perhaps we can figure out what linker arguments
+		// are needed for an app so we can manually specify them and then re-enable this code.
+		//#include <android/api-level.h>
+		//
+		//#if (__ANDROID_API__ < 9) // Earlier versions of Android provide no std C++ STL implementation.
+			#define EA_COMPILER_NO_STANDARD_CPP_LIBRARY 1
+		//#endif
+	#endif
+
+
+	// EA_COMPILER_NO_STATIC_VARIABLE_INIT
+	//
+	// If defined, it means that global or static C++ variables will be 
+	// constructed. Not all compiler/platorm combinations support this. 
+	// User code that needs to be portable must avoid having C++ variables
+	// that construct before main. 
+	//
+	//#if defined(EA_PLATFORM_MOBILE)
+	//    #define EA_COMPILER_NO_STATIC_VARIABLE_INIT 1
+	//#endif
+
+
+	// EA_COMPILER_NO_STATIC_FUNCTION_INIT
+	//
+	// If defined, it means that functions marked as startup functions
+	// (e.g. __attribute__((constructor)) in GCC) are supported. It may
+	// be that some compiler/platform combinations don't support this.
+	//
+	//#if defined(XXX) // So far, all compiler/platforms we use support this.
+	//    #define EA_COMPILER_NO_STATIC_VARIABLE_INIT 1
+	//#endif
+
+	// EA_COMPILER_NO_VARIADIC_MACROS
+	// 
+	// If defined, the compiler doesn't support C99/C++11 variadic macros.
+	// With a variadic macro, you can do this:
+	//     #define MY_PRINTF(format, ...) printf(format, __VA_ARGS__)
+	//
+	#if !defined(EA_COMPILER_NO_VARIADIC_MACROS)
+		#if defined(_MSC_VER) && (_MSC_VER < 1500) // If earlier than VS2008..
+			#define EA_COMPILER_NO_VARIADIC_MACROS 1
+		#elif defined(__GNUC__) && (((__GNUC__ * 100) + __GNUC_MINOR__)) < 401 // If earlier than GCC 4.1..
+			#define EA_COMPILER_NO_VARIADIC_MACROS 1
+		#elif defined(EA_COMPILER_EDG) // Includes other compilers
+			// variadic macros are supported
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_RVALUE_REFERENCES
+	// 
+	// If defined, the compiler doesn't fully support C++11 rvalue reference semantics.
+	// This applies to the compiler only and not the Standard Library in use with the compiler,
+	// which is required by the Standard to have some support itself.
+	//
+	#if !defined(EA_COMPILER_NO_RVALUE_REFERENCES)
+		#if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (_MSC_VER >= 1600)                // VS2010+
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 403) // EDG 4.3+. 
+			// supported. Earlier EDG supported a subset of rvalue references. Implicit move constructors and assignment operators aren't supported until EDG 4.5.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && EA_COMPILER_HAS_FEATURE(cxx_rvalue_references)
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4005)   // GCC 4.5+
+			// supported.
+		#else
+			#define EA_COMPILER_NO_RVALUE_REFERENCES 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_EXTERN_TEMPLATE
+	// 
+	// If defined, the compiler doesn't support C++11 extern template.
+	// With extern templates, you can do this:
+	//     extern template void DoSomething(KnownType u);
+	//
+	#if !defined(EA_COMPILER_NO_EXTERN_TEMPLATE)
+		#if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (_MSC_VER >= 1700)                 // VS2012+...
+			// Extern template is supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 401)  // EDG 4.1+.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && defined(__apple_build_version__) && (EA_COMPILER_VERSION >= 401)
+			// Extern template is supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && !defined(__apple_build_version__)             // Clang other than Apple's Clang
+			// Extern template is supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4006)    // GCC 4.6+
+			// Extern template is supported.
+		#else
+			#define EA_COMPILER_NO_EXTERN_TEMPLATE 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_RANGE_BASED_FOR_LOOP
+	// 
+	// If defined, the compiler doesn't support C++11 range-based for loops.
+	// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2009/n2930.html
+	// You must #include <iterator> for range-based for loops to work.
+	// Example usage:
+	//    #include <iterator>
+	//    #include <vector>
+	//    std::vector<float> floatVector;
+	//    for(float& f : floatVector)
+	//        f += 1.0;
+	//
+	#if !defined(EA_COMPILER_NO_RANGE_BASED_FOR_LOOP)
+		#if defined(EA_COMPILER_CPP11_ENABLED) && (defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1700))      // VS2012+...
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 405)  // EDG 4.5+.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && (defined(__clang__)  && (EA_COMPILER_VERSION >=  300))  // Clang 3.x+
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && (defined(__GNUC__) && (EA_COMPILER_VERSION >= 4006))  // GCC 4.6+
+			// supported.
+		#else
+			#define EA_COMPILER_NO_RANGE_BASED_FOR_LOOP 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_CONSTEXPR
+	//
+	// Refers to C++11 = constexpr (const expression) declarations.
+	//
+	#if !defined(EA_COMPILER_NO_CONSTEXPR)
+		#if defined(EA_COMPILER_CPP11_ENABLED) && (defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1900)) // VS2015+... Not present in VC++ up to and including VS2013.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 406) // EDG 4.6+.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && EA_COMPILER_HAS_FEATURE(cxx_constexpr)
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4006) // GCC 4.6+
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1900) // VS 2015+
+			// supported.
+		#else
+			#define EA_COMPILER_NO_CONSTEXPR 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_CONSTEXPR_IF
+	//
+	// Refers to C++17 = constexpr if(const expression) conditionals.
+	//
+	#if !defined(EA_COMPILER_NO_CONSTEXPR_IF)
+		#if defined(EA_COMPILER_CPP17_ENABLED) && (defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1911)) // VS2017 15.3+
+			// supported.
+		#elif defined(EA_COMPILER_CPP17_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 309) // Clang 3.9+
+			// supported.
+		#elif defined(EA_COMPILER_CPP17_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 7000) // GCC 7+
+			// supported.
+		#else
+			#define EA_COMPILER_NO_CONSTEXPR_IF 1
+		#endif
+	#endif
+	
+
+	// EA_COMPILER_NO_OVERRIDE
+	// 
+	// Refers to the C++11 override specifier.
+	//
+	#ifndef EA_COMPILER_NO_OVERRIDE
+		#if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION > 1600)  // VC++ > VS2010, even without C++11 support. VS2010 does support override, however will generate warnings due to the keyword being 'non-standard'
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 209)    // Clang 2.9+
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4007)  // GCC 4.7+
+			// supported.
+		#else
+			#define EA_COMPILER_NO_OVERRIDE 1 
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_INHERITANCE_FINAL
+	// 
+	// Refers to the C++11 final specifier.
+	//
+	#ifndef EA_COMPILER_NO_INHERITANCE_FINAL
+		#if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1500)  // VS2008+, even without C++11 support.
+			// supported, though you need to use EA_INHERITANCE_FINAL for it to work with VS versions prior to 2012.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >=  209)   // Clang 2.9+
+			// supported
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4007)  // GCC 4.7+
+			// supported
+		#else
+			#define EA_COMPILER_NO_INHERITANCE_FINAL 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_AUTO
+	//
+	// Refers to C++11 auto.
+	//
+	#if !defined(EA_COMPILER_NO_AUTO)
+		#if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1600)     // VS2010+
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 401) // EDG 4.1+.
+			// supported with the exception of the usage of braced initializer lists as of EDG 4.3.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 209)   // Clang 2.9+, including Apple's Clang.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4004)   // GCC 4.4+
+			// supported.
+		#else
+			#define EA_COMPILER_NO_AUTO 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_NULLPTR
+	//
+	// Refers to C++11 nullptr (which is a built in type). std::nullptr_t is defined in C++11 <cstddef>.
+	// Note that <EABase/nullptr.h> implements a portable nullptr implementation.
+	//
+	#if !defined(EA_COMPILER_NO_NULLPTR)
+		#if (defined(_MSC_VER) && (_MSC_VER >= 1600)) && defined(EA_COMPILER_CPP11_ENABLED)
+			// supported
+		#elif defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4006) && defined(EA_COMPILER_CPP11_ENABLED)
+			// supported
+		#elif  defined(__clang__) && defined(EA_COMPILER_CPP11_ENABLED)
+			// supported
+		#elif defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 403) && defined(EA_COMPILER_CPP11_ENABLED)
+			// supported
+		#else
+			#define EA_COMPILER_NO_NULLPTR 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_DECLTYPE
+	//
+	// Refers to C++11 decltype.
+	//
+	#if !defined(EA_COMPILER_NO_DECLTYPE)
+		#if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1600)     // VS2010+
+			// supported, though VS2010 doesn't support the spec completely as specified in the final standard.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 401) // EDG 4.1+.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 209)   // Clang 2.9+, including Apple's Clang.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4003)   // GCC 4.3+
+			// supported.
+		#else
+			#define EA_COMPILER_NO_DECLTYPE 1
+		#endif
+	#endif
+
+
+
+	// EA_COMPILER_NO_DEFAULTED_FUNCTIONS
+	// EA_COMPILER_NO_DELETED_FUNCTIONS
+	//
+	// Refers to C++11 = default and = delete function declarations.
+	//
+	#if !defined(EA_COMPILER_NO_DEFAULTED_FUNCTIONS)
+		#if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1800)     // VS2013+
+			// supported, but as of VS2013 it isn't supported for defaulted move constructors and move assignment operators.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 401) // EDG 4.1+.
+			// supported, but as of EDG 4.3 it isn't supported for defaulted move constructors and move assignment operators until EDG 4.5.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >=  300)    // Clang 3.0+, including Apple's Clang
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4004)   // GCC 4.4+
+			// supported.
+		#else
+			// VC++ doesn't support it as of VS2012.
+			#define EA_COMPILER_NO_DEFAULTED_FUNCTIONS 1
+		#endif
+	#endif
+
+	#if !defined(EA_COMPILER_NO_DELETED_FUNCTIONS)
+		#if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1800)     // VS2013+
+			// supported, but as of VS2013 it isn't supported for defaulted move constructors and move assignment operators.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 401) // EDG 4.1+.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >=  209)    // Clang 2.9+
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4004)   // GCC 4.4+
+			// supported.
+		#else
+			// VC++ doesn't support it as of VS2012.
+			#define EA_COMPILER_NO_DELETED_FUNCTIONS 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_LAMBDA_EXPRESSIONS
+	//
+	// Refers to C++11 lambda expressions.
+	//
+	#if !defined(EA_COMPILER_NO_LAMBDA_EXPRESSIONS)
+		#if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1600)     // VS2010+
+			// supported, though VS2010 doesn't support the spec completely as specified in the final standard.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 401) // EDG 4.1+.
+			// supported. However, converting lambdas to function pointers is not supported until EDG 4.5.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 401) && defined(__apple_build_version__)
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 301) && !defined(__apple_build_version__)  // Clang 3.1+, not including Apple's Clang.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4004)   // GCC 4.4+
+			// supported.
+		#else
+			#define EA_COMPILER_NO_LAMBDA_EXPRESSIONS 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_TRAILING_RETURN_TYPES
+	//
+	// Refers to C++11 trailing-return-type. Also sometimes referred to as "incomplete return type".
+	//
+	#if !defined(EA_COMPILER_NO_TRAILING_RETURN_TYPES)
+		#if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1600)     // VS2010+
+			// supported, though VS2010 doesn't support the spec completely as specified in the final standard.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 402) // EDG 4.2+.
+			// supported. However, use of "this" in trailing return types is not supported untiil EDG 4.4
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 401) && defined(__apple_build_version__)
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 301) && !defined(__apple_build_version__)  // Clang 3.1+, not including Apple's Clang.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4004)   // GCC 4.4+
+			// supported.
+		#else
+			#define EA_COMPILER_NO_TRAILING_RETURN_TYPES 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_STRONGLY_TYPED_ENUMS
+	//
+	// Refers to C++11 strongly typed enums, which includes enum classes and sized enums. Doesn't include forward-declared enums.
+	//
+	#if !defined(EA_COMPILER_NO_STRONGLY_TYPED_ENUMS)
+		#if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1700)     // VS2012+
+			// supported. A subset of this is actually supported by VS2010.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 400) // EDG 4.0+.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 209)   // Clang 2.9+, including Apple's Clang.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4004)   // GCC 4.4+
+			// supported.
+		#else
+			#define EA_COMPILER_NO_STRONGLY_TYPED_ENUMS 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_FORWARD_DECLARED_ENUMS
+	//
+	// Refers to C++11 forward declared enums.
+	//
+	#if !defined(EA_COMPILER_NO_FORWARD_DECLARED_ENUMS)
+		#if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1700)     // VS2012+
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 405) // EDG 4.5+.
+			// supported. EDG 4.3 supports basic forward-declared enums, but not forward-declared strongly typed enums.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 401) && defined(__apple_build_version__)
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 301) && !defined(__apple_build_version__)  // Clang 3.1+, not including Apple's Clang.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4006)   // GCC 4.6+
+			// supported.
+		#else
+			#define EA_COMPILER_NO_FORWARD_DECLARED_ENUMS 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_VARIADIC_TEMPLATES
+	//
+	// Refers to C++11 variadic templates.
+	//
+	#if !defined(EA_COMPILER_NO_VARIADIC_TEMPLATES)
+		#if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1800)     // VS2013+.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (_MSC_FULL_VER == 170051025)    // VS2012 November Preview for Windows only.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 403) // EDG 4.3+.
+			// supported, though 4.1 has partial support for variadic templates.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 209)   // Clang 2.9+, including Apple's Clang.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4004)   // GCC 4.4+
+			// supported, though GCC 4.3 has partial support for variadic templates.
+		#else
+			#define EA_COMPILER_NO_VARIADIC_TEMPLATES 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_TEMPLATE_ALIASES
+	//
+	// Refers to C++11 alias templates.
+	// Example alias template usage:
+	//     template <typename T>
+	//     using Dictionary = eastl::map<eastl::string, T>;
+	//
+	//     Dictionary<int> StringIntDictionary;
+	//
+	#if !defined(EA_COMPILER_NO_TEMPLATE_ALIASES)
+		#if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1800)     // VS2013+.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 401) && defined(__apple_build_version__)
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 402) // EDG 4.2+.
+			// supported, though 4.1 has partial support for variadic templates.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 300) && !defined(__apple_build_version__) // Clang 3.0+, not including Apple's Clang.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4007)   // GCC 4.7+
+			// supported, though GCC 4.3 has partial support for variadic templates.
+		#else
+			#define EA_COMPILER_NO_TEMPLATE_ALIASES 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_VARIABLE_TEMPLATES
+	//
+	// Refers to C++14 variable templates.
+	// Example variable template usage:
+	//     template<class T>
+	//     constexpr T pi = T(3.1415926535897932385);
+	//
+	#if !defined(EA_COMPILER_NO_VARIABLE_TEMPLATES)
+		#if defined(_MSC_VER) && (_MSC_FULL_VER >= 190023918)    // VS2015 Update 2 and above.
+			// supported.
+		#elif defined(EA_COMPILER_CPP14_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 304) && !defined(__apple_build_version__)    // Clang 3.4+, not including Apple's Clang.
+			// supported.
+		#elif defined(EA_COMPILER_CPP14_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 5000)   // GCC 5+
+			// supported.
+		#elif !defined(EA_COMPILER_CPP14_ENABLED) 
+			#define EA_COMPILER_NO_VARIABLE_TEMPLATES 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_INLINE_VARIABLES
+	//
+	// Refers to C++17 inline variables that allows the definition of variables in header files
+	//
+	// Example usage:
+	//    struct Foo 
+	//    {
+	//        static inline constexpr int kConstant = 42;  // no out of class definition
+	//    };
+	//
+	// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2015/n4424.pdf
+	// http://en.cppreference.com/w/cpp/language/inline
+	//
+	#if !defined(EA_COMPILER_NO_INLINE_VARIABLES)
+		#define EA_COMPILER_NO_INLINE_VARIABLES 1
+	#endif
+
+
+	// EA_COMPILER_NO_INITIALIZER_LISTS
+	//
+	// Refers to C++11 initializer lists.
+	// This refers to the compiler support for this and not the Standard Library support (std::initializer_list).
+	//
+	#if !defined(EA_COMPILER_NO_INITIALIZER_LISTS)
+		#if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1800)     // VS2013+.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (_MSC_FULL_VER == 170051025)    // VS2012 November Preview for Windows only.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 405) // EDG 4.5+.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 401) && defined(__apple_build_version__)
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 301) && !defined(__apple_build_version__) // Clang 3.1+, not including Apple's Clang.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4004)   // GCC 4.4+
+			// supported, though GCC 4.3 has partial support for it.
+		#else
+			#define EA_COMPILER_NO_INITIALIZER_LISTS 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_NORETURN
+	//
+	// Refers to C++11 declaration attribute: noreturn.
+	// http://en.cppreference.com/w/cpp/language/attributes
+	// http://blog.aaronballman.com/2011/09/understanding-attributes/
+	//
+	#if !defined(EA_COMPILER_NO_NORETURN)
+		#if defined(EA_COMPILER_MSVC) && (EA_COMPILER_VERSION >= 1300)                                   // VS2003+
+			// supported via __declspec(noreturn). You need to use that or EA_NORETURN. VC++ up to VS2013 doesn't support any C++11 attribute types.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 402) // EDG 4.2+.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 401) && defined(__apple_build_version__)
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 300) && !defined(__apple_build_version__) // Clang 3.0+, not including Apple's Clang.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4008)   // GCC 4.8+
+			// supported.
+		#else
+			#define EA_COMPILER_NO_NORETURN 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_CARRIES_DEPENDENCY
+	// 
+	// Refers to C++11 declaration attribute: carries_dependency.
+	// http://en.cppreference.com/w/cpp/language/attributes
+	// http://blog.aaronballman.com/2011/09/understanding-attributes/
+	//
+	#if !defined(EA_COMPILER_NO_CARRIES_DEPENDENCY)
+		#if defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 401) && defined(__apple_build_version__)    // Apple clang 4.1+
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 402) // EDG 4.2+.
+			// supported; stricter than other compilers in its usage.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 300) && !defined(__apple_build_version__) // Clang 3.0+, not including Apple's Clang.
+			// supported.
+		// Currently GNUC doesn't appear to support this attribute.
+		//#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4008)                        // GCC 4.8+
+		//    // supported.
+		#else
+			#define EA_COMPILER_NO_CARRIES_DEPENDENCY 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_FALLTHROUGH
+	// 
+	// Refers to C++17 declaration attribute: fallthrough.
+	// http://en.cppreference.com/w/cpp/language/attributes
+	//
+	#if !defined(EA_COMPILER_NO_FALLTHROUGH)
+		#if defined(EA_COMPILER_CPP17_ENABLED) 
+			// supported.
+		#else
+			#define EA_COMPILER_NO_FALLTHROUGH 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_NODISCARD
+	// 
+	// Refers to C++17 declaration attribute: nodiscard.
+	// http://en.cppreference.com/w/cpp/language/attributes
+	//
+	#if !defined(EA_COMPILER_NO_NODISCARD)
+		#if defined(EA_COMPILER_CPP17_ENABLED) 
+			// supported.
+		#else
+			#define EA_COMPILER_NO_NODISCARD 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_MAYBE_UNUSED
+	// 
+	// Refers to C++17 declaration attribute: maybe_unused.
+	// http://en.cppreference.com/w/cpp/language/attributes
+	//
+	#if !defined(EA_COMPILER_NO_MAYBE_UNUSED)
+		#if defined(EA_COMPILER_CPP17_ENABLED) 
+			// supported.
+		#elif defined(EA_COMPILER_MSVC) && (EA_COMPILER_VERSION >= 1912) // VS2017 15.3+
+			// supported.
+		#else
+			#define EA_COMPILER_NO_MAYBE_UNUSED 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_STRUCTURED_BINDING
+	//
+	// Indicates if target compiler supports the C++17 "structured binding" language feature.
+	// https://en.cppreference.com/w/cpp/language/structured_binding
+	//
+	//
+	#if !defined(EA_COMPILER_NO_STRUCTURED_BINDING)
+		#if defined(EA_COMPILER_CPP17_ENABLED) 
+			// supported.
+		#elif defined(EA_COMPILER_MSVC) && (EA_COMPILER_VERSION >= 1912) // VS2017 15.3+
+			// supported.
+		#else
+			#define EA_COMPILER_NO_STRUCTURED_BINDING 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_DESIGNATED_INITIALIZERS
+	//
+	// Indicates the target compiler supports the C++20 "designated initializer" language feature.
+	// https://en.cppreference.com/w/cpp/language/aggregate_initialization
+	//
+	// Example:
+	//   struct A { int x; int y; };
+	//   A a = { .y = 42, .x = 1 };
+	//
+	#if !defined(EA_COMPILER_NO_DESIGNATED_INITIALIZERS)
+		#if defined(EA_COMPILER_CPP20_ENABLED)
+			// supported.
+		#else
+			#define EA_COMPILER_NO_DESIGNATED_INITIALIZERS 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_NONSTATIC_MEMBER_INITIALIZERS
+	//
+	// Refers to C++11 declaration attribute: carries_dependency.
+	// http://www.open-std.org/JTC1/SC22/WG21/docs/papers/2008/n2756.htm
+	//
+	#if !defined(EA_COMPILER_NO_NONSTATIC_MEMBER_INITIALIZERS)
+		#if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1800)                          // VS2013+.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 401) && defined(__apple_build_version__)  // Apple clang 4.1+
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 300) && !defined(__apple_build_version__) // Clang 3.0+, not including Apple's Clang.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4007)   // GCC 4.7+
+			// supported.
+		#else
+			#define EA_COMPILER_NO_NONSTATIC_MEMBER_INITIALIZERS 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_RIGHT_ANGLE_BRACKETS
+	//
+	// Defines if the compiler supports >> (as opposed to > >) in template 
+	// declarations such as typedef eastl::list<eastl::list<int>> ListList;
+	//
+	#if !defined(EA_COMPILER_NO_RIGHT_ANGLE_BRACKETS)
+		#if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1600)     // VS2010+
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 401) // EDG 4.1+.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 209)   // Clang 2.9+, including Apple's Clang.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4003)   // GCC 4.3+
+			// supported.
+		#else
+			#define EA_COMPILER_NO_RIGHT_ANGLE_BRACKETS 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_ALIGNOF
+	//
+	// Refers specifically to C++11 alignof and not old compiler extensions such as __alignof__(). 
+	// However, EABase provides a portable EA_ALIGN_OF which works for all compilers. 
+	//
+	#if !defined(EA_COMPILER_NO_ALIGNOF)
+		// Not supported by VC++ as of VS2013, though EA_ALIGN_OF is supported on all coompilers as an alternative.
+		#if defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 209)    // Clang 2.9+, including Apple's Clang.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4005)  // GCC 4.5+
+			// supported.
+		#else
+			#define EA_COMPILER_NO_ALIGNOF 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_ALIGNAS
+	//
+	// Refers to C++11 alignas.
+	//
+	#if !defined(EA_COMPILER_NO_ALIGNAS)
+		// Not supported by VC++ as of VS2013.
+		#if defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 401) && defined(__apple_build_version__)    // Apple clang 4.1+
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 300) && !defined(__apple_build_version__) // Clang 3.0+, not including Apple's Clang.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4008)   // GCC 4.8+
+			// supported.
+		#else
+			#define EA_COMPILER_NO_ALIGNAS 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_DELEGATING_CONSTRUCTORS
+	//
+	// Refers to C++11 constructor delegation.
+	// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2006/n1986.pdf
+	// https://www.ibm.com/developerworks/mydeveloperworks/blogs/5894415f-be62-4bc0-81c5-3956e82276f3/entry/c_0x_delegating_constructors
+	//
+	#if !defined(EA_COMPILER_NO_DELEGATING_CONSTRUCTORS)
+		#if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1800)                          // VS2013+.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 407) // EDG 4.7+.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 401) && defined(__apple_build_version__)  // Apple clang 4.1+
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 300) && !defined(__apple_build_version__) // Clang 3.0+, not including Apple's Clang.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4007)   // GCC 4.7+
+			// supported.
+		#else
+			#define EA_COMPILER_NO_DELEGATING_CONSTRUCTORS 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_INHERITING_CONSTRUCTORS
+	//
+	// Refers to C++11 constructor inheritance via 'using'.
+	// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2540.htm
+	//
+	#if !defined(EA_COMPILER_NO_INHERITING_CONSTRUCTORS)
+		// Not supported by VC++ as of VS2013.
+		#if defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && EA_COMPILER_HAS_FEATURE(cxx_inheriting_constructors)    // Clang
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4008)   // GCC 4.8+
+			// supported.
+		#else
+			#define EA_COMPILER_NO_INHERITING_CONSTRUCTORS 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_USER_DEFINED_LITERALS
+	//
+	// http://en.cppreference.com/w/cpp/language/user_literal
+	// http://stackoverflow.com/questions/237804/what-new-capabilities-do-user-defined-literals-add-to-c
+	//
+	#if !defined(EA_COMPILER_NO_USER_DEFINED_LITERALS)
+		// Not supported by VC++ as of VS2013.
+		#if defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 401) && defined(__apple_build_version__)    // Apple clang 4.1+
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 301) && !defined(__apple_build_version__) // Clang 3.1+, not including Apple's Clang.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4007)   // GCC 4.7+
+			// supported.
+		#else
+			#define EA_COMPILER_NO_USER_DEFINED_LITERALS 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_STANDARD_LAYOUT_TYPES
+	//     a.k.a. POD relaxation
+	//     http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2007/n2342.htm
+	//
+	#if !defined(EA_COMPILER_NO_STANDARD_LAYOUT_TYPES)
+		#if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1700)                            // VS2012+
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 401) && defined(__apple_build_version__)    // Apple clang 4.1+
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 300) && !defined(__apple_build_version__) // Clang 3.0+, not including Apple's Clang.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4005)   // GCC 4.5+
+			// supported.
+		#else
+			#define EA_COMPILER_NO_STANDARD_LAYOUT_TYPES 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_EXTENDED_SIZEOF
+	//
+	// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2007/n2253.html
+	// Allows you to do this: sizeof(SomeClass::mSomeMember)
+	//
+	#if !defined(EA_COMPILER_NO_EXTENDED_SIZEOF)
+		// Not supported by VC++ as of VS2013.
+		#if defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 401) && defined(__apple_build_version__)    // Apple clang 4.1+
+			// supported.
+		// Versions of EDG prior to 4.5 only support extended sizeof in non-member functions. Full support was added in 4.5
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 405) // EDG 4.5+.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 301) && !defined(__apple_build_version__) // Clang 3.1+, not including Apple's Clang.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4005)   // GCC 4.5+
+			// supported.
+		#else
+			#define EA_COMPILER_NO_EXTENDED_SIZEOF 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_INLINE_NAMESPACES
+	//
+	// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2535.htm
+	// http://blog.aaronballman.com/2011/07/inline-namespaces/
+	//
+	#if !defined(EA_COMPILER_NO_INLINE_NAMESPACES)
+		// Not supported by VC++ as of VS2013.
+		#if defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 405) // EDG 4.5+.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 209)   // Clang 2.9+, including Apple's Clang.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4004) // GCC 4.4+
+			// supported.
+		#else
+			#define EA_COMPILER_NO_INLINE_NAMESPACES 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_UNRESTRICTED_UNIONS
+	//
+	// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2544.pdf
+	//
+	#if !defined(EA_COMPILER_NO_UNRESTRICTED_UNIONS)
+		// Not supported by VC++ as of VS2013.
+		#if defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 406) // EDG 4.6+.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 401) && defined(__apple_build_version__)  // Apple clang 4.1+
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 301) && !defined(__apple_build_version__) // Clang 3.1+, not including Apple's Clang.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4006)   // GCC 4.6+
+			// supported.
+		#else
+			#define EA_COMPILER_NO_UNRESTRICTED_UNIONS 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_EXPLICIT_CONVERSION_OPERATORS
+	//
+	// http://en.wikipedia.org/wiki/C%2B%2B11#Explicit_conversion_operators
+	//
+	#if !defined(EA_COMPILER_NO_EXPLICIT_CONVERSION_OPERATORS)
+		#if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1800)                          // VS2013+.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (_MSC_FULL_VER == 170051025)                         // VS2012 November Preview for Windows only.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 404) // EDG 4.4+.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 401) && defined(__apple_build_version__)  // Apple clang 4.1+
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 300) && !defined(__apple_build_version__) // Clang 3.0+, not including Apple's Clang.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4005)   // GCC 4.5+
+			// supported.
+		#else
+			#define EA_COMPILER_NO_EXPLICIT_CONVERSION_OPERATORS 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_FUNCTION_TEMPLATE_DEFAULT_ARGS
+	//  
+	// The compiler does not support default template arguments for function templates. 
+	// http://stackoverflow.com/questions/2447458/default-template-arguments-for-function-templates
+	//
+	#if !defined(EA_COMPILER_NO_FUNCTION_TEMPLATE_DEFAULT_ARGS)
+		#if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1800)        // VS2013+.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 403)    // EDG 4.4+.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 209)      // Clang 2.9+, including Apple's Clang.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4003) // GCC 4.3+
+			// supported.
+		#else
+			#define EA_COMPILER_NO_FUNCTION_TEMPLATE_DEFAULT_ARGS 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_LOCAL_CLASS_TEMPLATE_PARAMETERS
+	//
+	// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2657.htm
+	// http://stackoverflow.com/questions/5751977/local-type-as-template-arguments-in-c
+	//
+	#if !defined(EA_COMPILER_NO_LOCAL_CLASS_TEMPLATE_PARAMETERS)
+		#if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1600)     // VS2010+
+			// supported.
+			#if (EA_COMPILER_VERSION < 1700)    // VS2010 generates a warning, but the C++ language now allows it.
+				#pragma warning(disable: 4836) // nonstandard extension used: local types or unnamed types cannot be used as template arguments.
+			#endif
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 402) // EDG 4.2+.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 209)   // Clang 2.9+, including Apple's Clang.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4005)   // GCC 4.5+
+			// supported.
+		#else
+			#define EA_COMPILER_NO_LOCAL_CLASS_TEMPLATE_PARAMETERS 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_NOEXCEPT
+	//
+	// C++11 noexcept
+	// http://en.cppreference.com/w/cpp/language/attributes
+	// http://en.cppreference.com/w/cpp/language/noexcept
+	//
+	#if !defined(EA_COMPILER_NO_NOEXCEPT)
+		#if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1900)     // VS2014+
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 401) && defined(__apple_build_version__)    // Apple clang 4.1+
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 405) // EDG 4.5+.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 300) && !defined(__apple_build_version__) // Clang 3.0+, not including Apple's Clang.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4006)   // GCC 4.6+
+			// supported.
+		#else
+			#define EA_COMPILER_NO_NOEXCEPT 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_RAW_LITERALS
+	//
+	// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2007/n2442.htm
+	// http://en.wikipedia.org/wiki/C%2B%2B11#New_string_literals
+	//
+	#if !defined(EA_COMPILER_NO_RAW_LITERALS)
+		#if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1800)                            // VS2013+.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 407) // EDG 4.7+.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 401) && defined(__apple_build_version__)    // Apple clang 4.1+
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 300) && !defined(__apple_build_version__) // Clang 3.0+, not including Apple's Clang.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4005)   // GCC 4.5+
+			// supported.
+		#else
+			#define EA_COMPILER_NO_RAW_LITERALS 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_UNICODE_STRING_LITERALS
+	//
+	// http://en.wikipedia.org/wiki/C%2B%2B11#New_string_literals
+	//
+	#if !defined(EA_COMPILER_NO_UNICODE_STRING_LITERALS)
+		// Not supported by VC++ as of VS2013.
+		#if defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 407) // EDG 4.7+.
+			// supported. It's not clear if it's v4.4 or v4.7 that adds this support.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 401) && defined(__apple_build_version__)    // Apple clang 4.1+
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 300) && !defined(__apple_build_version__) // Clang 3.0+, not including Apple's Clang.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4004)   // GCC 4.4+
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 407) // EDG 4.7+.
+			// supported. It's not clear if it's v4.4 or v4.7 that adds this support.
+		#else
+			#define EA_COMPILER_NO_UNICODE_STRING_LITERALS 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_NEW_CHARACTER_TYPES
+	//
+	// Refers to char16_t and char32_t as true native types (and not something simply typedef'd from uint16_t and uint32_t).
+	// http://en.cppreference.com/w/cpp/language/types
+	//
+	#if !defined(EA_COMPILER_NO_NEW_CHARACTER_TYPES)
+		#if defined(EA_COMPILER_NO_UNICODE_STRING_LITERALS) // Some compilers have had support for char16_t prior to support for u"", but it's not useful to have the former without the latter.
+			#define EA_COMPILER_NO_NEW_CHARACTER_TYPES 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_UNICODE_CHAR_NAME_LITERALS
+	//
+	// C++ 11 relaxed \u\U sequences in strings.
+	// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2007/n2170.html
+	//
+	#if !defined(EA_COMPILER_NO_UNICODE_CHAR_NAME_LITERALS)
+		// VC++ up till at least VS2013 supports \u and \U but supports them wrong with respect to the C++11 Standard.
+
+		#if defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 401) && defined(__apple_build_version__)    // Apple clang 4.1+
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 301) && !defined(__apple_build_version__) // Clang 3.1+, not including Apple's Clang.
+			// supported. 
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4005)   // GCC 4.5+
+			// supported.
+		#else
+			#define EA_COMPILER_NO_UNICODE_CHAR_NAME_LITERALS 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_UNIFIED_INITIALIZATION_SYNTAX
+	//
+	// http://en.wikipedia.org/wiki/C%2B%2B11#Uniform_initialization
+	//
+	#if !defined(EA_COMPILER_NO_UNIFIED_INITIALIZATION_SYNTAX)
+		#if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1800)                          // VS2013+.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 401) && defined(__apple_build_version__)  // Apple clang 4.1+
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 301) && !defined(__apple_build_version__) // Clang 3.1+, not including Apple's Clang.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4004)   // GCC 4.4+
+			// supported.
+		#else
+			#define EA_COMPILER_NO_UNIFIED_INITIALIZATION_SYNTAX 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_EXTENDED_FRIEND_DECLARATIONS
+	//
+	// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2005/n1791.pdf
+	//
+	#if !defined(EA_COMPILER_NO_EXTENDED_FRIEND_DECLARATIONS)
+		#if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1600)     // VS2010+
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 401) // EDG 4.1+.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 209)   // Clang 2.9+, including Apple's Clang.
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4007)   // GCC 4.7+
+			// supported.
+		#else
+			#define EA_COMPILER_NO_EXTENDED_FRIEND_DECLARATIONS 1
+		#endif
+	#endif
+
+
+	// EA_COMPILER_NO_THREAD_LOCAL
+	//
+	// Refers specifically to C++ thread_local, which is like compiler __thread implementations except
+	// that it also supports non-trivial classes (e.g. with ctors). EA_COMPILER_NO_THREAD_LOCAL refers
+	// specifically to full C++11 thread_local support. The EAThread package provides a wrapper for 
+	// __thread via EA_THREAD_LOCAL (which unfortunately sounds like C++ thread_local). 
+	//
+	// https://en.cppreference.com/w/cpp/keyword/thread_local
+	//
+	#if !defined(EA_COMPILER_NO_THREAD_LOCAL)
+		#if defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && EA_COMPILER_HAS_FEATURE(cxx_thread_local)
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1900)     // VS2015+
+			// supported.
+		#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4008)   // GCC 4.8+
+			// supported.
+		#else
+			#define EA_COMPILER_NO_THREAD_LOCAL 1
+		#endif
+	#endif
+
+
+#endif // INCLUDED_eacompiler_H
+
+
+
+
+
diff --git a/libkram/eastl/include/EABase/config/eacompilertraits.h b/libkram/eastl/include/EABase/config/eacompilertraits.h
new file mode 100644
index 00000000..1d8bcb43
--- /dev/null
+++ b/libkram/eastl/include/EABase/config/eacompilertraits.h
@@ -0,0 +1,2561 @@
+/*-----------------------------------------------------------------------------
+ * config/eacompilertraits.h
+ *
+ * Copyright (c) Electronic Arts Inc. All rights reserved.
+ *-----------------------------------------------------------------------------
+ * Currently supported defines include:
+ *    EA_PREPROCESSOR_JOIN
+ *    
+ *    EA_COMPILER_IS_ANSIC
+ *    EA_COMPILER_IS_C99
+ *    EA_COMPILER_IS_C11
+ *    EA_COMPILER_HAS_C99_TYPES
+ *    EA_COMPILER_IS_CPLUSPLUS
+ *    EA_COMPILER_MANAGED_CPP
+ *    EA_COMPILER_INTMAX_SIZE
+ *    EA_OFFSETOF
+ *    EA_SIZEOF_MEMBER
+ *
+ *    EA_ALIGN_OF()
+ *    EA_ALIGN_MAX_STATIC / EA_ALIGN_MAX_AUTOMATIC
+ *    EA_ALIGN() / EA_PREFIX_ALIGN() / EA_POSTFIX_ALIGN()
+ *    EA_ALIGNED()
+ *    EA_PACKED()
+ *
+ *    EA_LIKELY()
+ *    EA_UNLIKELY()
+ *    EA_INIT_PRIORITY()
+ *    EA_MAY_ALIAS()
+ *    EA_ASSUME()
+ *    EA_ANALYSIS_ASSUME()
+ *    EA_PURE
+ *    EA_WEAK
+ *    EA_UNUSED()
+ *    EA_EMPTY()
+ *
+ *    EA_WCHAR_T_NON_NATIVE
+ *    EA_WCHAR_SIZE = <n bytes>
+ *
+ *    EA_RESTRICT
+ *    EA_DEPRECATED   / EA_PREFIX_DEPRECATED   / EA_POSTFIX_DEPRECATED
+ *    EA_FORCE_INLINE / EA_PREFIX_FORCE_INLINE / EA_POSTFIX_FORCE_INLINE
+ *    EA_NO_INLINE    / EA_PREFIX_NO_INLINE    / EA_POSTFIX_NO_INLINE
+ *    EA_NO_VTABLE    / EA_CLASS_NO_VTABLE     / EA_STRUCT_NO_VTABLE
+ *    EA_PASCAL
+ *    EA_PASCAL_FUNC()
+ *    EA_SSE = [0 | 1]
+ *    EA_IMPORT
+ *    EA_EXPORT
+ *    EA_PRAGMA_ONCE_SUPPORTED
+ *    EA_ONCE
+ *    EA_OVERRIDE
+ *    EA_INHERITANCE_FINAL
+ *    EA_SEALED
+ *    EA_ABSTRACT
+ *    EA_CONSTEXPR / EA_CONSTEXPR_OR_CONST
+ *    EA_CONSTEXPR_IF 
+ *    EA_EXTERN_TEMPLATE
+ *    EA_NOEXCEPT
+ *    EA_NORETURN
+ *    EA_CARRIES_DEPENDENCY
+ *    EA_NON_COPYABLE / struct EANonCopyable
+ *    EA_OPTIMIZE_OFF / EA_OPTIMIZE_ON
+ *    EA_SIGNED_RIGHT_SHIFT_IS_UNSIGNED
+ *
+ *    EA_DISABLE_VC_WARNING    / EA_RESTORE_VC_WARNING / EA_DISABLE_ALL_VC_WARNINGS / EA_RESTORE_ALL_VC_WARNINGS
+ *    EA_DISABLE_GCC_WARNING   / EA_RESTORE_GCC_WARNING
+ *    EA_DISABLE_CLANG_WARNING / EA_RESTORE_CLANG_WARNING
+ *    EA_DISABLE_SN_WARNING    / EA_RESTORE_SN_WARNING / EA_DISABLE_ALL_SN_WARNINGS / EA_RESTORE_ALL_SN_WARNINGS
+ *    EA_DISABLE_GHS_WARNING   / EA_RESTORE_GHS_WARNING
+ *    EA_DISABLE_EDG_WARNING   / EA_RESTORE_EDG_WARNING
+ *    EA_DISABLE_CW_WARNING    / EA_RESTORE_CW_WARNING
+ *
+ *    EA_DISABLE_DEFAULT_CTOR
+ *    EA_DISABLE_COPY_CTOR
+ *    EA_DISABLE_MOVE_CTOR
+ *    EA_DISABLE_ASSIGNMENT_OPERATOR
+ *    EA_DISABLE_MOVE_OPERATOR
+ *
+ *  Todo:
+ *    Find a way to reliably detect wchar_t size at preprocessor time and 
+ *    implement it below for EA_WCHAR_SIZE.
+ *
+ *  Todo:
+ *    Find out how to support EA_PASCAL and EA_PASCAL_FUNC for systems in 
+ *    which it hasn't yet been found out for.
+ *---------------------------------------------------------------------------*/
+
+
+#ifndef INCLUDED_eacompilertraits_H
+#define INCLUDED_eacompilertraits_H
+
+	#include <EABase/config/eaplatform.h>
+	#include <EABase/config/eacompiler.h>
+
+
+	// Metrowerks uses #defines in its core C header files to define 
+	// the kind of information we need below (e.g. C99 compatibility)
+
+
+
+	// Determine if this compiler is ANSI C compliant and if it is C99 compliant.
+	#if defined(__STDC__)
+		#define EA_COMPILER_IS_ANSIC 1    // The compiler claims to be ANSI C
+
+		// Is the compiler a C99 compiler or equivalent?
+		// From ISO/IEC 9899:1999:
+		//    6.10.8 Predefined macro names
+		//    __STDC_VERSION__ The integer constant 199901L. (150)
+		//
+		//    150) This macro was not specified in ISO/IEC 9899:1990 and was 
+		//    specified as 199409L in ISO/IEC 9899/AMD1:1995. The intention 
+		//    is that this will remain an integer constant of type long int 
+		//    that is increased with each revision of this International Standard.
+		//
+		#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
+			#define EA_COMPILER_IS_C99 1
+		#endif
+
+ 		// Is the compiler a C11 compiler?
+ 		// From ISO/IEC 9899:2011:
+		//   Page 176, 6.10.8.1 (Predefined macro names) :
+ 		//   __STDC_VERSION__ The integer constant 201112L. (178)
+		//
+		#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)
+			#define EA_COMPILER_IS_C11 1
+		#endif
+	#endif
+
+	// Some compilers (e.g. GCC) define __USE_ISOC99 if they are not 
+	// strictly C99 compilers (or are simply C++ compilers) but are set 
+	// to use C99 functionality. Metrowerks defines _MSL_C99 as 1 in 
+	// this case, but 0 otherwise.
+	#if (defined(__USE_ISOC99) || (defined(_MSL_C99) && (_MSL_C99 == 1))) && !defined(EA_COMPILER_IS_C99)
+		#define EA_COMPILER_IS_C99 1
+	#endif
+ 
+	// Metrowerks defines C99 types (e.g. intptr_t) instrinsically when in C99 mode (-lang C99 on the command line).
+	#if (defined(_MSL_C99) && (_MSL_C99 == 1))
+		#define EA_COMPILER_HAS_C99_TYPES 1
+	#endif
+
+	#if defined(__GNUC__) 
+		#if (((__GNUC__ * 100) + __GNUC_MINOR__) >= 302) // Also, GCC defines _HAS_C9X.
+			#define EA_COMPILER_HAS_C99_TYPES 1 // The compiler is not necessarily a C99 compiler, but it defines C99 types.
+			
+			#ifndef __STDC_LIMIT_MACROS
+				#define __STDC_LIMIT_MACROS 1
+			#endif
+			
+			#ifndef __STDC_CONSTANT_MACROS
+				#define __STDC_CONSTANT_MACROS 1    // This tells the GCC compiler that we want it to use its native C99 types.
+			#endif
+		#endif
+	#endif
+
+	#if defined(_MSC_VER) && (_MSC_VER >= 1600)
+		#define EA_COMPILER_HAS_C99_TYPES 1
+	#endif
+
+	#ifdef  __cplusplus
+		#define EA_COMPILER_IS_CPLUSPLUS 1
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_PREPROCESSOR_JOIN
+	//
+	// This macro joins the two arguments together, even when one of  
+	// the arguments is itself a macro (see 16.3.1 in C++98 standard). 
+	// This is often used to create a unique name with __LINE__.
+	//
+	// For example, this declaration:
+	//    char EA_PREPROCESSOR_JOIN(unique_, __LINE__);
+	// expands to this:
+	//    char unique_73;
+	//
+	// Note that all versions of MSVC++ up to at least version 7.1 
+	// fail to properly compile macros that use __LINE__ in them
+	// when the "program database for edit and continue" option
+	// is enabled. The result is that __LINE__ gets converted to 
+	// something like __LINE__(Var+37).
+	//
+	#ifndef EA_PREPROCESSOR_JOIN
+		#define EA_PREPROCESSOR_JOIN(a, b)  EA_PREPROCESSOR_JOIN1(a, b)
+		#define EA_PREPROCESSOR_JOIN1(a, b) EA_PREPROCESSOR_JOIN2(a, b)
+		#define EA_PREPROCESSOR_JOIN2(a, b) a##b
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_STRINGIFY
+	//
+	// Example usage:
+	//     printf("Line: %s", EA_STRINGIFY(__LINE__));
+	//
+	#ifndef EA_STRINGIFY
+		#define EA_STRINGIFY(x)     EA_STRINGIFYIMPL(x)
+		#define EA_STRINGIFYIMPL(x) #x
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_IDENTITY
+	//
+	#ifndef EA_IDENTITY
+		#define EA_IDENTITY(x) x
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_COMPILER_MANAGED_CPP
+	// Defined if this is being compiled with Managed C++ extensions
+	#ifdef EA_COMPILER_MSVC
+		#if EA_COMPILER_VERSION >= 1300
+			#ifdef _MANAGED
+				#define EA_COMPILER_MANAGED_CPP 1
+			#endif
+		#endif
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_COMPILER_INTMAX_SIZE
+	//
+	// This is related to the concept of intmax_t uintmax_t, but is available 
+	// in preprocessor form as opposed to compile-time form. At compile-time
+	// you can use intmax_t and uintmax_t to use the actual types.
+	//
+	#if defined(__GNUC__) && defined(__x86_64__)
+		#define EA_COMPILER_INTMAX_SIZE 16  // intmax_t is __int128_t (GCC extension) and is 16 bytes.
+	#else
+		#define EA_COMPILER_INTMAX_SIZE 8   // intmax_t is int64_t and is 8 bytes.
+	#endif
+
+
+
+	// ------------------------------------------------------------------------
+	// EA_LPAREN / EA_RPAREN / EA_COMMA / EA_SEMI
+	//
+	// These are used for using special characters in macro-using expressions.
+	// Note that this macro intentionally uses (), as in some cases it can't 
+	// work unless it does.
+	//
+	// Example usage:
+	//     int x = SOME_MACRO(SomeTemplate<int EA_COMMA() int EACOMMA() char>);
+	//
+	#ifndef EA_LPAREN 
+		#define EA_LPAREN() (
+	#endif
+	#ifndef EA_RPAREN 
+		#define EA_RPAREN() )
+	#endif
+	#ifndef EA_COMMA 
+		#define EA_COMMA()  ,
+	#endif
+	#ifndef EA_SEMI 
+		#define EA_SEMI()   ;
+	#endif
+
+
+
+
+	// ------------------------------------------------------------------------
+	// EA_OFFSETOF
+	// Implements a portable version of the non-standard offsetof macro.
+	//
+	// The offsetof macro is guaranteed to only work with POD types. However, we wish to use
+	// it for non-POD types but where we know that offsetof will still work for the cases 
+	// in which we use it. GCC unilaterally gives a warning when using offsetof with a non-POD,
+	// even if the given usage happens to work. So we make a workaround version of offsetof
+	// here for GCC which has the same effect but tricks the compiler into not issuing the warning.
+	// The 65536 does the compiler fooling; the reinterpret_cast prevents the possibility of
+	// an overloaded operator& for the class getting in the way.
+	//
+	// Example usage:
+	//     struct A{ int x; int y; };
+	//     size_t n = EA_OFFSETOF(A, y);
+	//
+	#if defined(__GNUC__)                       // We can't use GCC 4's __builtin_offsetof because it mistakenly complains about non-PODs that are really PODs.
+		#define EA_OFFSETOF(struct_, member_)  ((size_t)(((uintptr_t)&reinterpret_cast<const volatile char&>((((struct_*)65536)->member_))) - 65536))
+	#else
+		#define EA_OFFSETOF(struct_, member_)  offsetof(struct_, member_)
+	#endif
+
+	// ------------------------------------------------------------------------
+	// EA_SIZEOF_MEMBER
+	// Implements a portable way to determine the size of a member. 
+	//
+	// The EA_SIZEOF_MEMBER simply returns the size of a member within a class or struct; member
+	// access rules still apply. We offer two approaches depending on the compiler's support for non-static member 
+	// initializers although most C++11 compilers support this.
+	//
+	// Example usage:
+	//     struct A{ int x; int y; };
+	//     size_t n = EA_SIZEOF_MEMBER(A, y);
+	//
+	#ifndef EA_COMPILER_NO_EXTENDED_SIZEOF
+		#define EA_SIZEOF_MEMBER(struct_, member_) (sizeof(struct_::member_))
+	#else
+		#define EA_SIZEOF_MEMBER(struct_, member_) (sizeof(((struct_*)0)->member_))
+	#endif
+
+	// ------------------------------------------------------------------------
+	// alignment expressions
+	//
+	// Here we define
+	//    EA_ALIGN_OF(type)         // Returns size_t.
+	//    EA_ALIGN_MAX_STATIC       // The max align value that the compiler will respect for EA_ALIGN for static data (global and static variables). Some compilers allow high values, some allow no more than 8. EA_ALIGN_MIN is assumed to be 1.
+	//    EA_ALIGN_MAX_AUTOMATIC    // The max align value for automatic variables (variables declared as local to a function).
+	//    EA_ALIGN(n)               // Used as a prefix. n is byte alignment, with being a power of two. Most of the time you can use this and avoid using EA_PREFIX_ALIGN/EA_POSTFIX_ALIGN.
+	//    EA_ALIGNED(t, v, n)       // Type, variable, alignment. Used to align an instance. You should need this only for unusual compilers.
+	//    EA_PACKED                 // Specifies that the given structure be packed (and not have its members aligned).
+	//
+	// Also we define the following for rare cases that it's needed.
+	//    EA_PREFIX_ALIGN(n)        // n is byte alignment, with being a power of two. You should need this only for unusual compilers.
+	//    EA_POSTFIX_ALIGN(n)       // Valid values for n are 1, 2, 4, 8, etc. You should need this only for unusual compilers.
+	//
+	// Example usage:
+	//    size_t x = EA_ALIGN_OF(int);                                  Non-aligned equivalents.        Meaning
+	//    EA_PREFIX_ALIGN(8) int x = 5;                                 int x = 5;                      Align x on 8 for compilers that require prefix attributes. Can just use EA_ALIGN instead.
+	//    EA_ALIGN(8) int x;                                            int x;                          Align x on 8 for compilers that allow prefix attributes.
+	//    int x EA_POSTFIX_ALIGN(8);                                    int x;                          Align x on 8 for compilers that require postfix attributes.
+	//    int x EA_POSTFIX_ALIGN(8) = 5;                                int x = 5;                      Align x on 8 for compilers that require postfix attributes.
+	//    int x EA_POSTFIX_ALIGN(8)(5);                                 int x(5);                       Align x on 8 for compilers that require postfix attributes.
+	//    struct EA_PREFIX_ALIGN(8) X { int x; } EA_POSTFIX_ALIGN(8);   struct X { int x; };            Define X as a struct which is aligned on 8 when used.
+	//    EA_ALIGNED(int, x, 8) = 5;                                    int x = 5;                      Align x on 8.
+	//    EA_ALIGNED(int, x, 16)(5);                                    int x(5);                       Align x on 16.
+	//    EA_ALIGNED(int, x[3], 16);                                    int x[3];                       Align x array on 16.
+	//    EA_ALIGNED(int, x[3], 16) = { 1, 2, 3 };                      int x[3] = { 1, 2, 3 };         Align x array on 16.
+	//    int x[3] EA_PACKED;                                           int x[3];                       Pack the 3 ints of the x array. GCC doesn't seem to support packing of int arrays.
+	//    struct EA_ALIGN(32) X { int x; int y; };                      struct X { int x; };            Define A as a struct which is aligned on 32 when used.
+	//    EA_ALIGN(32) struct X { int x; int y; } Z;                    struct X { int x; } Z;          Define A as a struct, and align the instance Z on 32.
+	//    struct X { int x EA_PACKED; int y EA_PACKED; };               struct X { int x; int y; };     Pack the x and y members of struct X.
+	//    struct X { int x; int y; } EA_PACKED;                         struct X { int x; int y; };     Pack the members of struct X.
+	//    typedef EA_ALIGNED(int, int16, 16); int16 n16;                typedef int int16; int16 n16;   Define int16 as an int which is aligned on 16.
+	//    typedef EA_ALIGNED(X, X16, 16); X16 x16;                      typedef X X16; X16 x16;         Define X16 as an X which is aligned on 16.
+
+	#if !defined(EA_ALIGN_MAX)                              // If the user hasn't globally set an alternative value...
+		#if defined(EA_PROCESSOR_ARM)                       // ARM compilers in general tend to limit automatic variables to 8 or less.
+			#define EA_ALIGN_MAX_STATIC    1048576
+			#define EA_ALIGN_MAX_AUTOMATIC       1          // Typically they support only built-in natural aligment types (both arm-eabi and apple-abi).
+		#elif defined(EA_PLATFORM_APPLE)
+			#define EA_ALIGN_MAX_STATIC    1048576
+			#define EA_ALIGN_MAX_AUTOMATIC      16
+		#else
+			#define EA_ALIGN_MAX_STATIC    1048576          // Arbitrarily high value. What is the actual max?
+			#define EA_ALIGN_MAX_AUTOMATIC 1048576
+		#endif
+	#endif
+
+	// EDG intends to be compatible with GCC but has a bug whereby it 
+	// fails to support calling a constructor in an aligned declaration when 
+	// using postfix alignment attributes. Prefix works for alignment, but does not align
+	// the size like postfix does.  Prefix also fails on templates.  So gcc style post fix
+	// is still used, but the user will need to use EA_POSTFIX_ALIGN before the constructor parameters.
+	#if defined(__GNUC__) && (__GNUC__ < 3)
+		#define EA_ALIGN_OF(type) ((size_t)__alignof__(type))
+		#define EA_ALIGN(n)
+		#define EA_PREFIX_ALIGN(n)
+		#define EA_POSTFIX_ALIGN(n) __attribute__((aligned(n)))
+		#define EA_ALIGNED(variable_type, variable, n) variable_type variable __attribute__((aligned(n)))
+		#define EA_PACKED __attribute__((packed))
+
+	// GCC 3.x+, IBM, and clang support prefix attributes.
+	#elif (defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__xlC__) || defined(__clang__)
+		#define EA_ALIGN_OF(type) ((size_t)__alignof__(type))
+		#define EA_ALIGN(n) __attribute__((aligned(n)))
+		#define EA_PREFIX_ALIGN(n)
+		#define EA_POSTFIX_ALIGN(n) __attribute__((aligned(n)))
+		#define EA_ALIGNED(variable_type, variable, n) variable_type variable __attribute__((aligned(n)))
+		#define EA_PACKED __attribute__((packed))
+
+	// Metrowerks supports prefix attributes.
+	// Metrowerks does not support packed alignment attributes.
+	#elif defined(EA_COMPILER_INTEL) || defined(CS_UNDEFINED_STRING) || (defined(EA_COMPILER_MSVC) && (EA_COMPILER_VERSION >= 1300))
+		#define EA_ALIGN_OF(type) ((size_t)__alignof(type))
+		#define EA_ALIGN(n) __declspec(align(n))
+		#define EA_PREFIX_ALIGN(n) EA_ALIGN(n)
+		#define EA_POSTFIX_ALIGN(n)
+		#define EA_ALIGNED(variable_type, variable, n) EA_ALIGN(n) variable_type variable
+		#define EA_PACKED // See EA_PRAGMA_PACK_VC for an alternative.
+
+	// Arm brand compiler
+	#elif defined(EA_COMPILER_ARM)
+		#define EA_ALIGN_OF(type) ((size_t)__ALIGNOF__(type))
+		#define EA_ALIGN(n) __align(n)
+		#define EA_PREFIX_ALIGN(n) __align(n)
+		#define EA_POSTFIX_ALIGN(n)
+		#define EA_ALIGNED(variable_type, variable, n) __align(n) variable_type variable
+		#define EA_PACKED __packed
+
+	#else // Unusual compilers
+		// There is nothing we can do about some of these. This is not as bad a problem as it seems.
+		// If the given platform/compiler doesn't support alignment specifications, then it's somewhat
+		// likely that alignment doesn't matter for that platform. Otherwise they would have defined 
+		// functionality to manipulate alignment.
+		#define EA_ALIGN(n)
+		#define EA_PREFIX_ALIGN(n)
+		#define EA_POSTFIX_ALIGN(n)
+		#define EA_ALIGNED(variable_type, variable, n) variable_type variable
+		#define EA_PACKED
+
+		#ifdef __cplusplus
+			template <typename T> struct EAAlignOf1 { enum { s = sizeof (T), value = s ^ (s & (s - 1)) }; };
+			template <typename T> struct EAAlignOf2;
+			template <int size_diff> struct helper { template <typename T> struct Val { enum { value = size_diff }; }; };
+			template <> struct helper<0> { template <typename T> struct Val { enum { value = EAAlignOf2<T>::value }; }; };
+			template <typename T> struct EAAlignOf2 { struct Big { T x; char c; };
+			enum { diff = sizeof (Big) - sizeof (T), value = helper<diff>::template Val<Big>::value }; };
+			template <typename T> struct EAAlignof3 { enum { x = EAAlignOf2<T>::value, y = EAAlignOf1<T>::value, value = x < y ? x : y }; };
+			#define EA_ALIGN_OF(type) ((size_t)EAAlignof3<type>::value)
+
+		#else
+			// C implementation of EA_ALIGN_OF
+			// This implementation works for most cases, but doesn't directly work 
+			// for types such as function pointer declarations. To work with those
+			// types you need to typedef the type and then use the typedef in EA_ALIGN_OF.
+			#define EA_ALIGN_OF(type) ((size_t)offsetof(struct { char c; type m; }, m))
+		#endif
+	#endif
+
+	// EA_PRAGMA_PACK_VC
+	//
+	// Wraps #pragma pack in a way that allows for cleaner code.
+	// 
+	// Example usage:
+	//    EA_PRAGMA_PACK_VC(push, 1)
+	//    struct X{ char c; int i; };
+	//    EA_PRAGMA_PACK_VC(pop)
+	//
+	#if !defined(EA_PRAGMA_PACK_VC)
+		#if defined(EA_COMPILER_MSVC)
+			#define EA_PRAGMA_PACK_VC(...) __pragma(pack(__VA_ARGS__))
+		#elif !defined(EA_COMPILER_NO_VARIADIC_MACROS)
+			#define EA_PRAGMA_PACK_VC(...)
+		#else
+			// No support. However, all compilers of significance to us support variadic macros.
+		#endif
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_LIKELY / EA_UNLIKELY
+	//
+	// Defined as a macro which gives a hint to the compiler for branch
+	// prediction. GCC gives you the ability to manually give a hint to 
+	// the compiler about the result of a comparison, though it's often
+	// best to compile shipping code with profiling feedback under both
+	// GCC (-fprofile-arcs) and VC++ (/LTCG:PGO, etc.). However, there 
+	// are times when you feel very sure that a boolean expression will
+	// usually evaluate to either true or false and can help the compiler
+	// by using an explicity directive...
+	//
+	// Example usage:
+	//    if(EA_LIKELY(a == 0)) // Tell the compiler that a will usually equal 0.
+	//       { ... }
+	//
+	// Example usage:
+	//    if(EA_UNLIKELY(a == 0)) // Tell the compiler that a will usually not equal 0.
+	//       { ... }
+	//
+	#ifndef EA_LIKELY
+		#if (defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__clang__)
+			#if defined(__cplusplus)
+				#define EA_LIKELY(x)   __builtin_expect(!!(x), true)
+				#define EA_UNLIKELY(x) __builtin_expect(!!(x), false) 
+			#else
+				#define EA_LIKELY(x)   __builtin_expect(!!(x), 1)
+				#define EA_UNLIKELY(x) __builtin_expect(!!(x), 0) 
+			#endif
+		#else
+			#define EA_LIKELY(x)   (x)
+			#define EA_UNLIKELY(x) (x)
+		#endif
+	#endif
+
+	// ------------------------------------------------------------------------
+	// EA_HAS_INCLUDE_AVAILABLE
+	//
+	// Used to guard against the EA_HAS_INCLUDE() macro on compilers that do not
+	// support said feature.
+	//
+	// Example usage:
+	//
+	// #if EA_HAS_INCLUDE_AVAILABLE
+	//     #if EA_HAS_INCLUDE("myinclude.h")
+    //         #include "myinclude.h"
+	//     #endif
+	// #endif
+	#if !defined(EA_HAS_INCLUDE_AVAILABLE)
+		#if EA_COMPILER_CPP17_ENABLED || EA_COMPILER_CLANG || EA_COMPILER_GNUC
+			#define EA_HAS_INCLUDE_AVAILABLE 1
+		#else
+			#define EA_HAS_INCLUDE_AVAILABLE 0
+		#endif
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_HAS_INCLUDE
+	//
+	// May be used in #if and #elif expressions to test for the existence
+	// of the header referenced in the operand. If possible it evaluates to a
+	// non-zero value and zero otherwise. The operand is the same form as the file
+	// in a #include directive.
+	//
+	// Example usage:
+	//
+	// #if EA_HAS_INCLUDE("myinclude.h")
+	//     #include "myinclude.h"
+	// #endif
+	//
+	// #if EA_HAS_INCLUDE(<myinclude.h>)
+	//     #include <myinclude.h>
+	// #endif
+
+	#if !defined(EA_HAS_INCLUDE)
+		#if EA_COMPILER_CPP17_ENABLED
+			#define EA_HAS_INCLUDE(x) __has_include(x)
+		#elif EA_COMPILER_CLANG
+			#define EA_HAS_INCLUDE(x) __has_include(x)
+		#elif EA_COMPILER_GNUC
+			#define EA_HAS_INCLUDE(x) __has_include(x)
+		#endif
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_INIT_PRIORITY_AVAILABLE
+	//
+	// This value is either not defined, or defined to 1.
+	// Defines if the GCC attribute init_priority is supported by the compiler.
+	//
+	#if !defined(EA_INIT_PRIORITY_AVAILABLE)
+		#if defined(__GNUC__) && !defined(__EDG__) // EDG typically #defines __GNUC__ but doesn't implement init_priority.
+			#define EA_INIT_PRIORITY_AVAILABLE 1 
+		#elif defined(__clang__)
+			#define EA_INIT_PRIORITY_AVAILABLE 1  // Clang implements init_priority
+		#endif
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_INIT_PRIORITY
+	//
+	// This is simply a wrapper for the GCC init_priority attribute that allows 
+	// multiplatform code to be easier to read. This attribute doesn't apply
+	// to VC++ because VC++ uses file-level pragmas to control init ordering.
+	//
+	// Example usage:
+	//     SomeClass gSomeClass EA_INIT_PRIORITY(2000);
+	//
+	#if !defined(EA_INIT_PRIORITY)
+		#if defined(EA_INIT_PRIORITY_AVAILABLE)
+			#define EA_INIT_PRIORITY(x)  __attribute__ ((init_priority (x)))
+		#else
+			#define EA_INIT_PRIORITY(x)
+		#endif
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_INIT_SEG_AVAILABLE
+	//
+	//
+	#if !defined(EA_INIT_SEG_AVAILABLE)
+		#if defined(_MSC_VER)
+			#define EA_INIT_SEG_AVAILABLE 1
+		#endif
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_INIT_SEG
+	//
+	// Specifies a keyword or code section that affects the order in which startup code is executed.
+	//
+	// https://docs.microsoft.com/en-us/cpp/preprocessor/init-seg?view=vs-2019
+	//
+	// Example:
+	// 		EA_INIT_SEG(compiler) MyType gMyTypeGlobal;	
+	// 		EA_INIT_SEG("my_section") MyOtherType gMyOtherTypeGlobal;	
+	//
+	#if !defined(EA_INIT_SEG)
+		#if defined(EA_INIT_SEG_AVAILABLE)
+			#define EA_INIT_SEG(x)                                                                                                \
+				__pragma(warning(push)) __pragma(warning(disable : 4074)) __pragma(warning(disable : 4075)) __pragma(init_seg(x)) \
+					__pragma(warning(pop))
+		#else
+			#define EA_INIT_SEG(x)
+		#endif
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_MAY_ALIAS_AVAILABLE
+	//
+	// Defined as 0, 1, or 2.
+	// Defines if the GCC attribute may_alias is supported by the compiler.
+	// Consists of a value 0 (unsupported, shouldn't be used), 1 (some support), 
+	// or 2 (full proper support). 
+	//
+	#ifndef EA_MAY_ALIAS_AVAILABLE
+		#if defined(__GNUC__) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 303)
+			#if   !defined(__EDG__)                 // define it as 1 while defining GCC's support as 2.
+				#define EA_MAY_ALIAS_AVAILABLE 2
+			#else
+				#define EA_MAY_ALIAS_AVAILABLE 0    
+			#endif                                  
+		#else 
+			#define EA_MAY_ALIAS_AVAILABLE 0
+		#endif
+	#endif
+
+
+	// EA_MAY_ALIAS
+	//
+	// Defined as a macro that wraps the GCC may_alias attribute. This attribute
+	// has no significance for VC++ because VC++ doesn't support the concept of 
+	// strict aliasing. Users should avoid writing code that breaks strict 
+	// aliasing rules; EA_MAY_ALIAS is for cases with no alternative.
+	//
+	// Example usage:
+	//    void* EA_MAY_ALIAS gPtr = NULL;
+	//
+	// Example usage:
+	//    typedef void* EA_MAY_ALIAS pvoid_may_alias;
+	//    pvoid_may_alias gPtr = NULL;
+	//
+	#if EA_MAY_ALIAS_AVAILABLE
+		#define EA_MAY_ALIAS __attribute__((__may_alias__))
+	#else
+		#define EA_MAY_ALIAS
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_ASSUME
+	//
+	// This acts the same as the VC++ __assume directive and is implemented 
+	// simply as a wrapper around it to allow portable usage of it and to take
+	// advantage of it if and when it appears in other compilers.
+	//
+	// Example usage:
+	//    void Function(int a) {
+	//       switch(a) {
+	//          case 1:
+	//             DoSomething(1);
+	//             break;
+	//          case 2:
+	//             DoSomething(-1);
+	//             break;
+	//          default:
+	//             EA_ASSUME(0); // This tells the optimizer that the default cannot be reached.
+	//       }
+	//    }
+	//
+	#ifndef EA_ASSUME
+		#if defined(_MSC_VER) && (_MSC_VER >= 1300) // If VC7.0 and later
+			#define EA_ASSUME(x) __assume(x)
+		#else
+			#define EA_ASSUME(x)
+		#endif
+	#endif
+
+
+
+	// ------------------------------------------------------------------------
+	// EA_ANALYSIS_ASSUME
+	//
+	// This acts the same as the VC++ __analysis_assume directive and is implemented 
+	// simply as a wrapper around it to allow portable usage of it and to take
+	// advantage of it if and when it appears in other compilers.
+	//
+	// Example usage:
+	//    char Function(char* p) {
+	//       EA_ANALYSIS_ASSUME(p != NULL);
+	//       return *p;
+	//    }
+	//
+	#ifndef EA_ANALYSIS_ASSUME
+		#if defined(_MSC_VER) && (_MSC_VER >= 1300) // If VC7.0 and later 
+			#define EA_ANALYSIS_ASSUME(x) __analysis_assume(!!(x)) // !! because that allows for convertible-to-bool in addition to bool.
+		#else
+			#define EA_ANALYSIS_ASSUME(x)
+		#endif
+	#endif
+
+
+
+	// ------------------------------------------------------------------------
+	// EA_DISABLE_VC_WARNING / EA_RESTORE_VC_WARNING
+	// 
+	// Disable and re-enable warning(s) within code.
+	// This is simply a wrapper for VC++ #pragma warning(disable: nnnn) for the
+	// purpose of making code easier to read due to avoiding nested compiler ifdefs
+	// directly in code.
+	//
+	// Example usage:
+	//     EA_DISABLE_VC_WARNING(4127 3244)
+	//     <code>
+	//     EA_RESTORE_VC_WARNING()
+	//
+	#ifndef EA_DISABLE_VC_WARNING
+		#if defined(_MSC_VER)
+			#define EA_DISABLE_VC_WARNING(w)  \
+				__pragma(warning(push))       \
+				__pragma(warning(disable:w))
+		#else
+			#define EA_DISABLE_VC_WARNING(w)
+		#endif
+	#endif
+
+	#ifndef EA_RESTORE_VC_WARNING
+		#if defined(_MSC_VER)
+			#define EA_RESTORE_VC_WARNING()   \
+				__pragma(warning(pop))
+		#else
+			#define EA_RESTORE_VC_WARNING()
+		#endif
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_ENABLE_VC_WARNING_AS_ERROR / EA_DISABLE_VC_WARNING_AS_ERROR
+	//
+	// Disable and re-enable treating a warning as error within code.
+	// This is simply a wrapper for VC++ #pragma warning(error: nnnn) for the
+	// purpose of making code easier to read due to avoiding nested compiler ifdefs
+	// directly in code.
+	//
+	// Example usage:
+	//     EA_ENABLE_VC_WARNING_AS_ERROR(4996)
+	//     <code>
+	//     EA_DISABLE_VC_WARNING_AS_ERROR()
+	//
+	#ifndef EA_ENABLE_VC_WARNING_AS_ERROR
+		#if defined(_MSC_VER)
+			#define EA_ENABLE_VC_WARNING_AS_ERROR(w) \
+					__pragma(warning(push)) \
+					__pragma(warning(error:w))
+		#else
+			#define EA_ENABLE_VC_WARNING_AS_ERROR(w)
+		#endif
+	#endif
+
+	#ifndef EA_DISABLE_VC_WARNING_AS_ERROR
+		#if defined(_MSC_VER)
+			#define EA_DISABLE_VC_WARNING_AS_ERROR() \
+				__pragma(warning(pop))
+		#else
+			#define EA_DISABLE_VC_WARNING_AS_ERROR()
+		#endif
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_DISABLE_GCC_WARNING / EA_RESTORE_GCC_WARNING
+	//
+	// Example usage:
+	//     // Only one warning can be ignored per statement, due to how GCC works.
+	//     EA_DISABLE_GCC_WARNING(-Wuninitialized)
+	//     EA_DISABLE_GCC_WARNING(-Wunused)
+	//     <code>
+	//     EA_RESTORE_GCC_WARNING()
+	//     EA_RESTORE_GCC_WARNING()
+	//
+	#ifndef EA_DISABLE_GCC_WARNING
+		#if defined(EA_COMPILER_GNUC)
+			#define EAGCCWHELP0(x) #x
+			#define EAGCCWHELP1(x) EAGCCWHELP0(GCC diagnostic ignored x)
+			#define EAGCCWHELP2(x) EAGCCWHELP1(#x)
+		#endif
+
+		#if defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4006) // Can't test directly for __GNUC__ because some compilers lie.
+			#define EA_DISABLE_GCC_WARNING(w)   \
+				_Pragma("GCC diagnostic push")  \
+				_Pragma(EAGCCWHELP2(w))
+		#elif defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4004)
+			#define EA_DISABLE_GCC_WARNING(w)   \
+				_Pragma(EAGCCWHELP2(w))
+		#else
+			#define EA_DISABLE_GCC_WARNING(w)
+		#endif
+	#endif
+
+	#ifndef EA_RESTORE_GCC_WARNING
+		#if defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4006)
+			#define EA_RESTORE_GCC_WARNING()    \
+				_Pragma("GCC diagnostic pop")
+		#else
+			#define EA_RESTORE_GCC_WARNING()
+		#endif
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_DISABLE_ALL_GCC_WARNINGS / EA_RESTORE_ALL_GCC_WARNINGS
+	//
+	// This isn't possible except via using _Pragma("GCC system_header"), though
+	// that has some limitations in how it works. Another means is to manually
+	// disable individual warnings within a GCC diagnostic push statement.
+	// GCC doesn't have as many warnings as VC++ and EDG and so this may be feasible.
+	// ------------------------------------------------------------------------
+
+
+	// ------------------------------------------------------------------------
+	// EA_ENABLE_GCC_WARNING_AS_ERROR / EA_DISABLE_GCC_WARNING_AS_ERROR
+	//
+	// Example usage:
+	//     // Only one warning can be treated as an error per statement, due to how GCC works.
+	//     EA_ENABLE_GCC_WARNING_AS_ERROR(-Wuninitialized)
+	//     EA_ENABLE_GCC_WARNING_AS_ERROR(-Wunused)
+	//     <code>
+	//     EA_DISABLE_GCC_WARNING_AS_ERROR()
+	//     EA_DISABLE_GCC_WARNING_AS_ERROR()
+	//
+	#ifndef EA_ENABLE_GCC_WARNING_AS_ERROR
+		#if defined(EA_COMPILER_GNUC)
+			#define EAGCCWERRORHELP0(x) #x
+			#define EAGCCWERRORHELP1(x) EAGCCWERRORHELP0(GCC diagnostic error x)
+			#define EAGCCWERRORHELP2(x) EAGCCWERRORHELP1(#x)
+		#endif
+
+		#if defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4006) // Can't test directly for __GNUC__ because some compilers lie.
+			#define EA_ENABLE_GCC_WARNING_AS_ERROR(w)   \
+				_Pragma("GCC diagnostic push")  \
+				_Pragma(EAGCCWERRORHELP2(w))
+		#elif defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4004)
+			#define EA_DISABLE_GCC_WARNING(w)   \
+				_Pragma(EAGCCWERRORHELP2(w))
+		#else
+			#define EA_DISABLE_GCC_WARNING(w)
+		#endif
+	#endif
+
+	#ifndef EA_DISABLE_GCC_WARNING_AS_ERROR
+		#if defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4006)
+			#define EA_DISABLE_GCC_WARNING_AS_ERROR()    \
+				_Pragma("GCC diagnostic pop")
+		#else
+			#define EA_DISABLE_GCC_WARNING_AS_ERROR()
+		#endif
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_DISABLE_CLANG_WARNING / EA_RESTORE_CLANG_WARNING
+	//
+	// Example usage:
+	//     // Only one warning can be ignored per statement, due to how clang works.
+	//     EA_DISABLE_CLANG_WARNING(-Wuninitialized)
+	//     EA_DISABLE_CLANG_WARNING(-Wunused)
+	//     <code>
+	//     EA_RESTORE_CLANG_WARNING()
+	//     EA_RESTORE_CLANG_WARNING()
+	//
+	#ifndef EA_DISABLE_CLANG_WARNING
+		#if defined(EA_COMPILER_CLANG) || defined(EA_COMPILER_CLANG_CL)
+			#define EACLANGWHELP0(x) #x
+			#define EACLANGWHELP1(x) EACLANGWHELP0(clang diagnostic ignored x)
+			#define EACLANGWHELP2(x) EACLANGWHELP1(#x)
+
+			#define EA_DISABLE_CLANG_WARNING(w)   \
+				_Pragma("clang diagnostic push")  \
+				_Pragma(EACLANGWHELP2(-Wunknown-warning-option))\
+				_Pragma(EACLANGWHELP2(w))
+		#else
+			#define EA_DISABLE_CLANG_WARNING(w)
+		#endif
+	#endif
+
+	#ifndef EA_RESTORE_CLANG_WARNING
+		#if defined(EA_COMPILER_CLANG) || defined(EA_COMPILER_CLANG_CL)
+			#define EA_RESTORE_CLANG_WARNING()    \
+				_Pragma("clang diagnostic pop")
+		#else
+			#define EA_RESTORE_CLANG_WARNING()
+		#endif
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_DISABLE_ALL_CLANG_WARNINGS / EA_RESTORE_ALL_CLANG_WARNINGS
+	//
+	// The situation for clang is the same as for GCC. See above.
+	// ------------------------------------------------------------------------
+
+
+	// ------------------------------------------------------------------------
+	// EA_ENABLE_CLANG_WARNING_AS_ERROR / EA_DISABLE_CLANG_WARNING_AS_ERROR
+	//
+	// Example usage:
+	//     // Only one warning can be treated as an error per statement, due to how clang works.
+	//     EA_ENABLE_CLANG_WARNING_AS_ERROR(-Wuninitialized)
+	//     EA_ENABLE_CLANG_WARNING_AS_ERROR(-Wunused)
+	//     <code>
+	//     EA_DISABLE_CLANG_WARNING_AS_ERROR()
+	//     EA_DISABLE_CLANG_WARNING_AS_ERROR()
+	//
+	#ifndef EA_ENABLE_CLANG_WARNING_AS_ERROR
+		#if defined(EA_COMPILER_CLANG) || defined(EA_COMPILER_CLANG_CL)
+			#define EACLANGWERRORHELP0(x) #x
+			#define EACLANGWERRORHELP1(x) EACLANGWERRORHELP0(clang diagnostic error x)
+			#define EACLANGWERRORHELP2(x) EACLANGWERRORHELP1(#x)
+
+			#define EA_ENABLE_CLANG_WARNING_AS_ERROR(w)   \
+				_Pragma("clang diagnostic push")  \
+				_Pragma(EACLANGWERRORHELP2(w))
+		#else
+			#define EA_DISABLE_CLANG_WARNING(w)
+		#endif
+	#endif
+
+	#ifndef EA_DISABLE_CLANG_WARNING_AS_ERROR
+		#if defined(EA_COMPILER_CLANG) || defined(EA_COMPILER_CLANG_CL)
+			#define EA_DISABLE_CLANG_WARNING_AS_ERROR()    \
+				_Pragma("clang diagnostic pop")
+		#else
+			#define EA_DISABLE_CLANG_WARNING_AS_ERROR()
+		#endif
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_DISABLE_SN_WARNING / EA_RESTORE_SN_WARNING
+	//
+	// Note that we define this macro specifically for the SN compiler instead of
+	// having a generic one for EDG-based compilers. The reason for this is that
+	// while SN is indeed based on EDG, SN has different warning value mappings
+	// and thus warning 1234 for SN is not the same as 1234 for all other EDG compilers.
+	//
+	// Example usage:
+	//     // Currently we are limited to one warning per line.
+	//     EA_DISABLE_SN_WARNING(1787)
+	//     EA_DISABLE_SN_WARNING(552)
+	//     <code>
+	//     EA_RESTORE_SN_WARNING()
+	//     EA_RESTORE_SN_WARNING()
+	//
+	#ifndef EA_DISABLE_SN_WARNING
+			#define EA_DISABLE_SN_WARNING(w)
+	#endif
+
+	#ifndef EA_RESTORE_SN_WARNING
+			#define EA_RESTORE_SN_WARNING()
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_DISABLE_ALL_SN_WARNINGS / EA_RESTORE_ALL_SN_WARNINGS
+	//
+	// Example usage:
+	//     EA_DISABLE_ALL_SN_WARNINGS()
+	//     <code>
+	//     EA_RESTORE_ALL_SN_WARNINGS()
+	//
+	#ifndef EA_DISABLE_ALL_SN_WARNINGS
+			#define EA_DISABLE_ALL_SN_WARNINGS()
+	#endif
+
+	#ifndef EA_RESTORE_ALL_SN_WARNINGS
+			#define EA_RESTORE_ALL_SN_WARNINGS()
+	#endif
+
+
+
+	// ------------------------------------------------------------------------
+	// EA_DISABLE_GHS_WARNING / EA_RESTORE_GHS_WARNING
+	//
+	// Disable warnings from the Green Hills compiler.
+	//
+	// Example usage:
+	//     EA_DISABLE_GHS_WARNING(193)
+	//     EA_DISABLE_GHS_WARNING(236, 5323)
+	//     <code>
+	//     EA_RESTORE_GHS_WARNING()
+	//     EA_RESTORE_GHS_WARNING()
+	//
+	#ifndef EA_DISABLE_GHS_WARNING
+			#define EA_DISABLE_GHS_WARNING(w)
+	#endif
+
+	#ifndef EA_RESTORE_GHS_WARNING
+			#define EA_RESTORE_GHS_WARNING()
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_DISABLE_ALL_GHS_WARNINGS / EA_RESTORE_ALL_GHS_WARNINGS
+	//
+	// #ifndef EA_DISABLE_ALL_GHS_WARNINGS
+	//     #if defined(EA_COMPILER_GREEN_HILLS)
+	//         #define EA_DISABLE_ALL_GHS_WARNINGS(w)  \_ 
+	//             _Pragma("_________")
+	//     #else
+	//         #define EA_DISABLE_ALL_GHS_WARNINGS(w)
+	//     #endif
+	// #endif
+	// 
+	// #ifndef EA_RESTORE_ALL_GHS_WARNINGS
+	//     #if defined(EA_COMPILER_GREEN_HILLS)
+	//         #define EA_RESTORE_ALL_GHS_WARNINGS()   \_
+	//             _Pragma("_________")
+	//     #else
+	//         #define EA_RESTORE_ALL_GHS_WARNINGS()
+	//     #endif
+	// #endif
+
+
+
+	// ------------------------------------------------------------------------
+	// EA_DISABLE_EDG_WARNING / EA_RESTORE_EDG_WARNING
+	//
+	// Example usage:
+	//     // Currently we are limited to one warning per line.
+	//     EA_DISABLE_EDG_WARNING(193)
+	//     EA_DISABLE_EDG_WARNING(236)
+	//     <code>
+	//     EA_RESTORE_EDG_WARNING()
+	//     EA_RESTORE_EDG_WARNING()
+	//
+	#ifndef EA_DISABLE_EDG_WARNING
+		// EDG-based compilers are inconsistent in how the implement warning pragmas.
+		#if defined(EA_COMPILER_EDG) && !defined(EA_COMPILER_INTEL) && !defined(EA_COMPILER_RVCT)
+			#define EAEDGWHELP0(x) #x
+			#define EAEDGWHELP1(x) EAEDGWHELP0(diag_suppress x)
+
+			#define EA_DISABLE_EDG_WARNING(w)   \
+				_Pragma("control %push diag")   \
+				_Pragma(EAEDGWHELP1(w))
+		#else
+			#define EA_DISABLE_EDG_WARNING(w)
+		#endif
+	#endif
+
+	#ifndef EA_RESTORE_EDG_WARNING
+		#if defined(EA_COMPILER_EDG) && !defined(EA_COMPILER_INTEL) && !defined(EA_COMPILER_RVCT)
+			#define EA_RESTORE_EDG_WARNING()   \
+				_Pragma("control %pop diag")
+		#else
+			#define EA_RESTORE_EDG_WARNING()
+		#endif
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_DISABLE_ALL_EDG_WARNINGS / EA_RESTORE_ALL_EDG_WARNINGS
+	//
+	//#ifndef EA_DISABLE_ALL_EDG_WARNINGS
+	//    #if defined(EA_COMPILER_EDG) && !defined(EA_COMPILER_SN)
+	//        #define EA_DISABLE_ALL_EDG_WARNINGS(w)  \_ 
+	//            _Pragma("_________")
+	//    #else
+	//        #define EA_DISABLE_ALL_EDG_WARNINGS(w)
+	//    #endif
+	//#endif
+	//
+	//#ifndef EA_RESTORE_ALL_EDG_WARNINGS
+	//    #if defined(EA_COMPILER_EDG) && !defined(EA_COMPILER_SN)
+	//        #define EA_RESTORE_ALL_EDG_WARNINGS()   \_
+	//            _Pragma("_________")
+	//    #else
+	//        #define EA_RESTORE_ALL_EDG_WARNINGS()
+	//    #endif
+	//#endif
+
+
+
+	// ------------------------------------------------------------------------
+	// EA_DISABLE_CW_WARNING / EA_RESTORE_CW_WARNING
+	//
+	// Note that this macro can only control warnings via numbers and not by 
+	// names. The reason for this is that the compiler's syntax for such 
+	// warnings is not the same as for numbers.
+	// 
+	// Example usage:
+	//     // Currently we are limited to one warning per line and must also specify the warning in the restore macro.
+	//     EA_DISABLE_CW_WARNING(10317)
+	//     EA_DISABLE_CW_WARNING(10324)
+	//     <code>
+	//     EA_RESTORE_CW_WARNING(10317)
+	//     EA_RESTORE_CW_WARNING(10324)
+	//
+	#ifndef EA_DISABLE_CW_WARNING
+		#define EA_DISABLE_CW_WARNING(w)
+	#endif
+
+	#ifndef EA_RESTORE_CW_WARNING
+
+		#define EA_RESTORE_CW_WARNING(w)
+
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_DISABLE_ALL_CW_WARNINGS / EA_RESTORE_ALL_CW_WARNINGS
+	//
+	#ifndef EA_DISABLE_ALL_CW_WARNINGS
+		#define EA_DISABLE_ALL_CW_WARNINGS()
+
+	#endif
+	
+	#ifndef EA_RESTORE_ALL_CW_WARNINGS
+		#define EA_RESTORE_ALL_CW_WARNINGS()
+	#endif
+
+
+
+	// ------------------------------------------------------------------------
+	// EA_PURE
+	// 
+	// This acts the same as the GCC __attribute__ ((pure)) directive and is
+	// implemented simply as a wrapper around it to allow portable usage of 
+	// it and to take advantage of it if and when it appears in other compilers.
+	//
+	// A "pure" function is one that has no effects except its return value and 
+	// its return value is a function of only the function's parameters or 
+	// non-volatile global variables. Any parameter or global variable access 
+	// must be read-only. Loop optimization and subexpression elimination can be 
+	// applied to such functions. A common example is strlen(): Given identical 
+	// inputs, the function's return value (its only effect) is invariant across 
+	// multiple invocations and thus can be pulled out of a loop and called but once.
+	//
+	// Example usage:
+	//    EA_PURE void Function();
+	//
+	#ifndef EA_PURE
+		#if defined(EA_COMPILER_GNUC)
+			#define EA_PURE __attribute__((pure))
+		#elif defined(EA_COMPILER_ARM)  // Arm brand compiler for ARM CPU
+			#define EA_PURE __pure
+		#else
+			#define EA_PURE
+		#endif
+	#endif
+
+
+
+	// ------------------------------------------------------------------------
+	// EA_WEAK
+	// EA_WEAK_SUPPORTED -- defined as 0 or 1.
+	// 
+	// GCC
+	// The weak attribute causes the declaration to be emitted as a weak
+	// symbol rather than a global. This is primarily useful in defining
+	// library functions which can be overridden in user code, though it
+	// can also be used with non-function declarations.
+	//
+	// VC++
+	// At link time, if multiple definitions of a COMDAT are seen, the linker 
+	// picks one and discards the rest. If the linker option /OPT:REF 
+	// is selected, then COMDAT elimination will occur to remove all the 
+	// unreferenced data items in the linker output.
+	//
+	// Example usage:
+	//    EA_WEAK void Function();
+	//
+	#ifndef EA_WEAK
+		#if defined(_MSC_VER) && (_MSC_VER >= 1300) // If VC7.0 and later 
+			#define EA_WEAK __declspec(selectany)
+			#define EA_WEAK_SUPPORTED 1
+		#elif defined(_MSC_VER) || (defined(__GNUC__) && defined(__CYGWIN__))
+			#define EA_WEAK
+			#define EA_WEAK_SUPPORTED 0
+		#elif defined(EA_COMPILER_ARM)  // Arm brand compiler for ARM CPU
+			#define EA_WEAK __weak
+			#define EA_WEAK_SUPPORTED 1
+		#else                           // GCC and IBM compilers, others.
+			#define EA_WEAK __attribute__((weak))
+			#define EA_WEAK_SUPPORTED 1
+		#endif
+	#endif
+
+
+
+	// ------------------------------------------------------------------------
+	// EA_UNUSED
+	// 
+	// Makes compiler warnings about unused variables go away.
+	//
+	// Example usage:
+	//    void Function(int x)
+	//    {
+	//        int y;
+	//        EA_UNUSED(x);
+	//        EA_UNUSED(y);
+	//    }
+	//
+	#ifndef EA_UNUSED
+		// The EDG solution below is pretty weak and needs to be augmented or replaced.
+		// It can't handle the C language, is limited to places where template declarations
+		// can be used, and requires the type x to be usable as a functions reference argument. 
+		#if defined(__cplusplus) && defined(__EDG__)
+			template <typename T>
+			inline void EABaseUnused(T const volatile & x) { (void)x; }
+			#define EA_UNUSED(x) EABaseUnused(x)
+		#else
+			#define EA_UNUSED(x) (void)x
+		#endif
+	#endif
+
+
+
+	// ------------------------------------------------------------------------
+	// EA_EMPTY
+	// 
+	// Allows for a null statement, usually for the purpose of avoiding compiler warnings.
+	//
+	// Example usage:
+	//    #ifdef EA_DEBUG
+	//        #define MyDebugPrintf(x, y) printf(x, y)
+	//    #else
+	//        #define MyDebugPrintf(x, y)  EA_EMPTY
+	//    #endif
+	//
+	#ifndef EA_EMPTY
+		#define EA_EMPTY (void)0
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_CURRENT_FUNCTION
+	//
+	// Provides a consistent way to get the current function name as a macro
+	// like the __FILE__ and __LINE__ macros work. The C99 standard specifies
+	// that __func__ be provided by the compiler, but most compilers don't yet
+	// follow that convention. However, many compilers have an alternative.
+	//
+	// We also define EA_CURRENT_FUNCTION_SUPPORTED for when it is not possible
+	// to have EA_CURRENT_FUNCTION work as expected.
+	//
+	// Defined inside a function because otherwise the macro might not be 
+	// defined and code below might not compile. This happens with some 
+	// compilers.
+	//
+	#ifndef EA_CURRENT_FUNCTION
+		#if defined __GNUC__ || (defined __ICC && __ICC >= 600)
+			#define EA_CURRENT_FUNCTION __PRETTY_FUNCTION__
+		#elif defined(__FUNCSIG__)
+			#define EA_CURRENT_FUNCTION __FUNCSIG__
+		#elif (defined __INTEL_COMPILER && __INTEL_COMPILER >= 600) || (defined __IBMCPP__ && __IBMCPP__ >= 500) || (defined CS_UNDEFINED_STRING && CS_UNDEFINED_STRING >= 0x4200)
+			#define EA_CURRENT_FUNCTION __FUNCTION__
+		#elif defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901
+			#define EA_CURRENT_FUNCTION __func__
+		#else
+			#define EA_CURRENT_FUNCTION "(unknown function)"
+		#endif
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// wchar_t
+	// Here we define:
+	//    EA_WCHAR_T_NON_NATIVE
+	//    EA_WCHAR_SIZE = <sizeof(wchar_t)>
+	//
+	#ifndef EA_WCHAR_T_NON_NATIVE
+		// Compilers that always implement wchar_t as native include:
+		//     COMEAU, new SN, and other EDG-based compilers.
+		//     GCC
+		//     Borland
+		//     SunPro
+		//     IBM Visual Age
+		#if defined(EA_COMPILER_INTEL)
+			#if (EA_COMPILER_VERSION < 700)
+				#define EA_WCHAR_T_NON_NATIVE 1
+			#else
+				#if (!defined(_WCHAR_T_DEFINED) && !defined(_WCHAR_T))
+					#define EA_WCHAR_T_NON_NATIVE 1
+				#endif
+			#endif
+		#elif defined(EA_COMPILER_MSVC) || (defined(EA_COMPILER_CLANG) && defined(EA_PLATFORM_WINDOWS))
+			#ifndef _NATIVE_WCHAR_T_DEFINED
+				#define EA_WCHAR_T_NON_NATIVE 1
+			#endif
+		#elif defined(__EDG_VERSION__) && (!defined(_WCHAR_T) && (__EDG_VERSION__ < 400)) // EDG prior to v4 uses _WCHAR_T to indicate if wchar_t is native. v4+ may define something else, but we're not currently aware of it.
+			#define EA_WCHAR_T_NON_NATIVE 1
+		#endif
+	#endif
+
+	#ifndef EA_WCHAR_SIZE // If the user hasn't specified that it is a given size...
+		#if defined(__WCHAR_MAX__) // GCC defines this for most platforms.
+			#if (__WCHAR_MAX__ == 2147483647) || (__WCHAR_MAX__ == 4294967295)
+				#define EA_WCHAR_SIZE 4
+			#elif (__WCHAR_MAX__ == 32767) || (__WCHAR_MAX__ == 65535)
+				#define EA_WCHAR_SIZE 2
+			#elif (__WCHAR_MAX__ == 127) || (__WCHAR_MAX__ == 255)
+				#define EA_WCHAR_SIZE 1
+			#else
+				#define EA_WCHAR_SIZE 4
+			#endif
+		#elif defined(WCHAR_MAX) // The SN and Arm compilers define this.
+			#if (WCHAR_MAX == 2147483647) || (WCHAR_MAX == 4294967295)
+				#define EA_WCHAR_SIZE 4
+			#elif (WCHAR_MAX == 32767) || (WCHAR_MAX == 65535)
+				#define EA_WCHAR_SIZE 2
+			#elif (WCHAR_MAX == 127) || (WCHAR_MAX == 255)
+				#define EA_WCHAR_SIZE 1
+			#else
+				#define EA_WCHAR_SIZE 4
+			#endif
+		#elif defined(__WCHAR_BIT) // Green Hills (and other versions of EDG?) uses this.
+			#if (__WCHAR_BIT == 16)
+				#define EA_WCHAR_SIZE 2
+			#elif (__WCHAR_BIT == 32)
+				#define EA_WCHAR_SIZE 4
+			#elif (__WCHAR_BIT == 8)
+				#define EA_WCHAR_SIZE 1
+			#else
+				#define EA_WCHAR_SIZE 4
+			#endif
+		#elif defined(_WCMAX) // The SN and Arm compilers define this.
+			#if (_WCMAX == 2147483647) || (_WCMAX == 4294967295)
+				#define EA_WCHAR_SIZE 4
+			#elif (_WCMAX == 32767) || (_WCMAX == 65535)
+				#define EA_WCHAR_SIZE 2
+			#elif (_WCMAX == 127) || (_WCMAX == 255)
+				#define EA_WCHAR_SIZE 1
+			#else
+				#define EA_WCHAR_SIZE 4
+			#endif
+		#elif defined(EA_PLATFORM_UNIX)
+			// It is standard on Unix to have wchar_t be int32_t or uint32_t.
+			// All versions of GNUC default to a 32 bit wchar_t, but EA has used 
+			// the -fshort-wchar GCC command line option to force it to 16 bit.
+			// If you know that the compiler is set to use a wchar_t of other than 
+			// the default, you need to manually define EA_WCHAR_SIZE for the build.
+			#define EA_WCHAR_SIZE 4
+		#else
+			// It is standard on Windows to have wchar_t be uint16_t.  GCC
+			// defines wchar_t as int by default.  Electronic Arts has
+			// standardized on wchar_t being an unsigned 16 bit value on all
+			// console platforms. Given that there is currently no known way to
+			// tell at preprocessor time what the size of wchar_t is, we declare
+			// it to be 2, as this is the Electronic Arts standard. If you have
+			// EA_WCHAR_SIZE != sizeof(wchar_t), then your code might not be
+			// broken, but it also won't work with wchar libraries and data from
+			// other parts of EA. Under GCC, you can force wchar_t to two bytes
+			// with the -fshort-wchar compiler argument.
+			#define EA_WCHAR_SIZE 2
+		#endif
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_RESTRICT
+	// 
+	// The C99 standard defines a new keyword, restrict, which allows for the 
+	// improvement of code generation regarding memory usage. Compilers can
+	// generate significantly faster code when you are able to use restrict.
+	// 
+	// Example usage:
+	//    void DoSomething(char* EA_RESTRICT p1, char* EA_RESTRICT p2);
+	//
+	#ifndef EA_RESTRICT
+		#if defined(EA_COMPILER_MSVC) && (EA_COMPILER_VERSION >= 1400) // If VC8 (VS2005) or later...
+			#define EA_RESTRICT __restrict
+		#elif defined(EA_COMPILER_CLANG)
+			#define EA_RESTRICT __restrict
+		#elif defined(EA_COMPILER_GNUC)     // Includes GCC and other compilers emulating GCC.
+			#define EA_RESTRICT __restrict  // GCC defines 'restrict' (as opposed to __restrict) in C99 mode only.
+		#elif defined(EA_COMPILER_ARM)
+			#define EA_RESTRICT __restrict
+		#elif defined(EA_COMPILER_IS_C99)
+			#define EA_RESTRICT restrict
+		#else
+			// If the compiler didn't support restricted pointers, defining EA_RESTRICT 
+			// away would result in compiling and running fine but you just wouldn't 
+			// the same level of optimization. On the other hand, all the major compilers 
+			// support restricted pointers.
+			#define EA_RESTRICT
+		#endif
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_DEPRECATED            // Used as a prefix.
+	// EA_PREFIX_DEPRECATED     // You should need this only for unusual compilers.
+	// EA_POSTFIX_DEPRECATED    // You should need this only for unusual compilers.
+	// EA_DEPRECATED_MESSAGE    // Used as a prefix and provides a deprecation message.
+	// 
+	// Example usage:
+	//    EA_DEPRECATED void Function();
+	//    EA_DEPRECATED_MESSAGE("Use 1.0v API instead") void Function();
+	//
+	// or for maximum portability:
+	//    EA_PREFIX_DEPRECATED void Function() EA_POSTFIX_DEPRECATED;
+	//
+
+	#ifndef EA_DEPRECATED
+		#if defined(EA_COMPILER_CPP14_ENABLED)
+			#define EA_DEPRECATED [[deprecated]]
+		#elif defined(EA_COMPILER_MSVC) && (EA_COMPILER_VERSION > 1300) // If VC7 (VS2003) or later...
+			#define EA_DEPRECATED __declspec(deprecated)
+		#elif defined(EA_COMPILER_MSVC)
+			#define EA_DEPRECATED 
+		#else
+			#define EA_DEPRECATED __attribute__((deprecated))
+		#endif
+	#endif
+
+	#ifndef EA_PREFIX_DEPRECATED
+		#if defined(EA_COMPILER_CPP14_ENABLED)
+			#define EA_PREFIX_DEPRECATED [[deprecated]]
+			#define EA_POSTFIX_DEPRECATED
+		#elif defined(EA_COMPILER_MSVC) && (EA_COMPILER_VERSION > 1300) // If VC7 (VS2003) or later...
+			#define EA_PREFIX_DEPRECATED __declspec(deprecated)
+			#define EA_POSTFIX_DEPRECATED
+		#elif defined(EA_COMPILER_MSVC)
+			#define EA_PREFIX_DEPRECATED
+			#define EA_POSTFIX_DEPRECATED
+		#else
+			#define EA_PREFIX_DEPRECATED
+			#define EA_POSTFIX_DEPRECATED __attribute__((deprecated))
+		#endif
+	#endif
+
+	#ifndef EA_DEPRECATED_MESSAGE
+		#if defined(EA_COMPILER_CPP14_ENABLED)
+			#define EA_DEPRECATED_MESSAGE(msg) [[deprecated(#msg)]]
+		#else
+			// Compiler does not support depreaction messages, explicitly drop the msg but still mark the function as deprecated
+			#define EA_DEPRECATED_MESSAGE(msg) EA_DEPRECATED
+		#endif
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_FORCE_INLINE              // Used as a prefix.
+	// EA_PREFIX_FORCE_INLINE       // You should need this only for unusual compilers.
+	// EA_POSTFIX_FORCE_INLINE      // You should need this only for unusual compilers.
+	//
+	// Example usage:
+	//     EA_FORCE_INLINE void Foo();                                // Implementation elsewhere.
+	//     EA_PREFIX_FORCE_INLINE void Foo() EA_POSTFIX_FORCE_INLINE; // Implementation elsewhere.
+	//
+	// Note that when the prefix version of this function is used, it replaces
+	// the regular C++ 'inline' statement. Thus you should not use both the 
+	// C++ inline statement and this macro with the same function declaration.
+	//
+	// To force inline usage under GCC 3.1+, you use this:
+	//    inline void Foo() __attribute__((always_inline));
+	//       or
+	//    inline __attribute__((always_inline)) void Foo();
+	//
+	// The CodeWarrior compiler doesn't have the concept of forcing inlining per function.
+	// 
+	#ifndef EA_FORCE_INLINE
+		#if defined(EA_COMPILER_MSVC)
+			#define EA_FORCE_INLINE __forceinline
+		#elif defined(EA_COMPILER_GNUC) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 301) || defined(EA_COMPILER_CLANG)
+			#if defined(__cplusplus)
+				#define EA_FORCE_INLINE inline __attribute__((always_inline))
+			#else
+				#define EA_FORCE_INLINE __inline__ __attribute__((always_inline))
+			#endif
+		#else
+			#if defined(__cplusplus)
+				#define EA_FORCE_INLINE inline
+			#else
+				#define EA_FORCE_INLINE __inline
+			#endif
+		#endif
+	#endif
+
+	#if   defined(EA_COMPILER_GNUC) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 301) || defined(EA_COMPILER_CLANG)
+		#define EA_PREFIX_FORCE_INLINE  inline
+		#define EA_POSTFIX_FORCE_INLINE __attribute__((always_inline))
+	#else
+		#define EA_PREFIX_FORCE_INLINE  inline
+		#define EA_POSTFIX_FORCE_INLINE 
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_FORCE_INLINE_LAMBDA
+	//
+	// EA_FORCE_INLINE_LAMBDA is used to force inline a call to a lambda when possible.
+	// Force inlining a lambda can be useful to reduce overhead in situations where a lambda may
+	// may only be called once, or inlining allows the compiler to apply other optimizations that wouldn't
+	// otherwise be possible.
+	//
+	// The ability to force inline a lambda is currently only available on a subset of compilers.
+	//
+	// Example usage:
+	//
+	//		auto lambdaFunction = []() EA_FORCE_INLINE_LAMBDA
+	//		{
+	//		};
+	//
+	#ifndef EA_FORCE_INLINE_LAMBDA
+		#if defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG)
+			#define EA_FORCE_INLINE_LAMBDA __attribute__((always_inline))
+		#else
+			#define EA_FORCE_INLINE_LAMBDA
+		#endif
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_NO_INLINE             // Used as a prefix. 
+	// EA_PREFIX_NO_INLINE      // You should need this only for unusual compilers.
+	// EA_POSTFIX_NO_INLINE     // You should need this only for unusual compilers.
+	//
+	// Example usage:
+	//     EA_NO_INLINE        void Foo();                       // Implementation elsewhere.
+	//     EA_PREFIX_NO_INLINE void Foo() EA_POSTFIX_NO_INLINE;  // Implementation elsewhere.
+	//
+	// That this declaration is incompatbile with C++ 'inline' and any
+	// variant of EA_FORCE_INLINE.
+	//
+	// To disable inline usage under VC++ priof to VS2005, you need to use this:
+	//    #pragma inline_depth(0) // Disable inlining.
+	//    void Foo() { ... }
+	//    #pragma inline_depth()  // Restore to default.
+	//
+	// Since there is no easy way to disable inlining on a function-by-function
+	// basis in VC++ prior to VS2005, the best strategy is to write platform-specific 
+	// #ifdefs in the code or to disable inlining for a given module and enable 
+	// functions individually with EA_FORCE_INLINE. 
+	// 
+	#ifndef EA_NO_INLINE
+		#if defined(EA_COMPILER_MSVC) && (EA_COMPILER_VERSION >= 1400) // If VC8 (VS2005) or later...
+			#define EA_NO_INLINE __declspec(noinline)
+		#elif defined(EA_COMPILER_MSVC)
+			#define EA_NO_INLINE
+		#else
+			#define EA_NO_INLINE __attribute__((noinline))
+		#endif
+	#endif
+
+	#if defined(EA_COMPILER_MSVC) && (EA_COMPILER_VERSION >= 1400) // If VC8 (VS2005) or later...
+		#define EA_PREFIX_NO_INLINE  __declspec(noinline)
+		#define EA_POSTFIX_NO_INLINE
+	#elif defined(EA_COMPILER_MSVC)
+		#define EA_PREFIX_NO_INLINE
+		#define EA_POSTFIX_NO_INLINE
+	#else
+		#define EA_PREFIX_NO_INLINE
+		#define EA_POSTFIX_NO_INLINE __attribute__((noinline))
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_NO_VTABLE
+	//
+	// Example usage:
+	//     class EA_NO_VTABLE X {
+	//        virtual void InterfaceFunction();
+	//     };
+	// 
+	//     EA_CLASS_NO_VTABLE(X) {
+	//        virtual void InterfaceFunction();
+	//     };
+	//
+	#ifdef EA_COMPILER_MSVC
+		#define EA_NO_VTABLE           __declspec(novtable)
+		#define EA_CLASS_NO_VTABLE(x)  class __declspec(novtable) x
+		#define EA_STRUCT_NO_VTABLE(x) struct __declspec(novtable) x
+	#else
+		#define EA_NO_VTABLE
+		#define EA_CLASS_NO_VTABLE(x)  class x
+		#define EA_STRUCT_NO_VTABLE(x) struct x
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_PASCAL
+	//
+	// Also known on PC platforms as stdcall.
+	// This convention causes the compiler to assume that the called function 
+	// will pop off the stack space used to pass arguments, unless it takes a 
+	// variable number of arguments. 
+	//
+	// Example usage:
+	//    this:
+	//       void DoNothing(int x);
+	//       void DoNothing(int x){}
+	//    would be written as this:
+	//       void EA_PASCAL_FUNC(DoNothing(int x));
+	//       void EA_PASCAL_FUNC(DoNothing(int x)){}
+	// 
+	#ifndef EA_PASCAL
+		#if defined(EA_COMPILER_MSVC)
+			#define EA_PASCAL __stdcall
+		#elif defined(EA_COMPILER_GNUC) && defined(EA_PROCESSOR_X86)
+			#define EA_PASCAL __attribute__((stdcall))
+		#else
+			// Some compilers simply don't support pascal calling convention.
+			// As a result, there isn't an issue here, since the specification of 
+			// pascal calling convention is for the purpose of disambiguating the
+			// calling convention that is applied.
+			#define EA_PASCAL
+		#endif
+	#endif
+
+	#ifndef EA_PASCAL_FUNC
+		#if defined(EA_COMPILER_MSVC)
+			#define EA_PASCAL_FUNC(funcname_and_paramlist)    __stdcall funcname_and_paramlist
+		#elif defined(EA_COMPILER_GNUC) && defined(EA_PROCESSOR_X86)
+			#define EA_PASCAL_FUNC(funcname_and_paramlist)    __attribute__((stdcall)) funcname_and_paramlist
+		#else
+			#define EA_PASCAL_FUNC(funcname_and_paramlist)    funcname_and_paramlist
+		#endif
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_SSE
+	// Visual C Processor Packs define _MSC_FULL_VER and are needed for SSE
+	// Intel C also has SSE support.
+	// EA_SSE is used to select FPU or SSE versions in hw_select.inl
+	//
+	// EA_SSE defines the level of SSE support:
+	//  0 indicates no SSE support
+	//  1 indicates SSE1 is supported
+	//  2 indicates SSE2 is supported
+	//  3 indicates SSE3 (or greater) is supported
+	//
+	// Note: SSE support beyond SSE3 can't be properly represented as a single
+	// version number.  Instead users should use specific SSE defines (e.g.
+	// EA_SSE4_2) to detect what specific support is available.  EA_SSE being
+	// equal to 3 really only indicates that SSE3 or greater is supported.
+	#ifndef EA_SSE
+		#if defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG)
+			#if defined(__SSE3__)
+				#define EA_SSE 3
+			#elif defined(__SSE2__)
+				#define EA_SSE 2
+			#elif defined(__SSE__) && __SSE__
+				#define EA_SSE 1
+			#else
+				#define EA_SSE 0
+			#endif
+		#elif (defined(EA_SSE3) && EA_SSE3) || defined EA_PLATFORM_XBOXONE || defined CS_UNDEFINED_STRING
+			#define EA_SSE 3
+		#elif defined(EA_SSE2) && EA_SSE2
+			#define EA_SSE 2
+		#elif defined(EA_PROCESSOR_X86) && defined(_MSC_FULL_VER) && !defined(__NOSSE__) && defined(_M_IX86_FP)
+			#define EA_SSE _M_IX86_FP 
+		#elif defined(EA_PROCESSOR_X86) && defined(EA_COMPILER_INTEL) && !defined(__NOSSE__)
+			#define EA_SSE 1
+		#elif defined(EA_PROCESSOR_X86_64)
+			// All x64 processors support SSE2 or higher
+			#define EA_SSE 2
+		#else
+			#define EA_SSE 0
+		#endif
+	#endif
+
+	// ------------------------------------------------------------------------
+	// We define separate defines for SSE support beyond SSE1.  These defines
+	// are particularly useful for detecting SSE4.x features since there isn't
+	// a single concept of SSE4.
+	//
+	// The following SSE defines are always defined.  0 indicates the
+	// feature/level of SSE is not supported, and 1 indicates support is
+	// available.
+	#ifndef EA_SSE2
+		#if EA_SSE >= 2
+			#define EA_SSE2 1
+		#else
+			#define EA_SSE2 0
+		#endif
+	#endif
+	#ifndef EA_SSE3
+		#if EA_SSE >= 3
+			#define EA_SSE3 1
+		#else
+			#define EA_SSE3 0
+		#endif
+	#endif
+	#ifndef EA_SSSE3
+		#if defined __SSSE3__ || defined EA_PLATFORM_XBOXONE || defined CS_UNDEFINED_STRING
+			#define EA_SSSE3 1
+		#else
+			#define EA_SSSE3 0
+		#endif
+	#endif
+	#ifndef EA_SSE4_1
+		#if defined __SSE4_1__ || defined EA_PLATFORM_XBOXONE || defined CS_UNDEFINED_STRING
+			#define EA_SSE4_1 1
+		#else
+			#define EA_SSE4_1 0
+		#endif
+	#endif
+	#ifndef EA_SSE4_2
+		#if defined __SSE4_2__ || defined EA_PLATFORM_XBOXONE || defined CS_UNDEFINED_STRING
+			#define EA_SSE4_2 1
+		#else
+			#define EA_SSE4_2 0
+		#endif
+	#endif
+	#ifndef EA_SSE4A
+		#if defined __SSE4A__ || defined EA_PLATFORM_XBOXONE || defined CS_UNDEFINED_STRING
+			#define EA_SSE4A 1
+		#else
+			#define EA_SSE4A 0
+		#endif
+	#endif
+
+	// ------------------------------------------------------------------------
+	// EA_AVX
+	// EA_AVX may be used to determine if Advanced Vector Extensions are available for the target architecture
+	//
+	// EA_AVX defines the level of AVX support:
+	//  0 indicates no AVX support
+	//  1 indicates AVX1 is supported
+	//  2 indicates AVX2 is supported
+	#ifndef EA_AVX
+		#if defined __AVX2__
+			#define EA_AVX 2
+		#elif defined __AVX__ || defined EA_PLATFORM_XBOXONE || defined CS_UNDEFINED_STRING
+			#define EA_AVX 1
+		#else
+			#define EA_AVX 0
+		#endif
+	#endif
+	#ifndef EA_AVX2
+		#if EA_AVX >= 2
+			#define EA_AVX2 1
+		#else
+			#define EA_AVX2 0
+		#endif
+	#endif
+
+	// EA_FP16C may be used to determine the existence of float <-> half conversion operations on an x86 CPU.
+	// (For example to determine if _mm_cvtph_ps or _mm_cvtps_ph could be used.)
+	#ifndef EA_FP16C
+		#if defined __F16C__ || defined EA_PLATFORM_XBOXONE || defined CS_UNDEFINED_STRING
+			#define EA_FP16C 1
+		#else
+			#define EA_FP16C 0
+		#endif
+	#endif
+
+	// EA_FP128 may be used to determine if __float128 is a supported type for use. This type is enabled by a GCC extension (_GLIBCXX_USE_FLOAT128)
+	// but has support by some implementations of clang (__FLOAT128__)
+	// PS4 does not support __float128 as of SDK 5.500 https://ps4.siedev.net/resources/documents/SDK/5.500/CPU_Compiler_ABI-Overview/0003.html
+	#ifndef EA_FP128
+		#if (defined __FLOAT128__ || defined _GLIBCXX_USE_FLOAT128) && !defined(EA_PLATFORM_SONY)
+			#define EA_FP128 1
+		#else
+			#define EA_FP128 0
+		#endif
+	#endif
+
+	// ------------------------------------------------------------------------
+	// EA_ABM
+	// EA_ABM may be used to determine if Advanced Bit Manipulation sets are available for the target architecture (POPCNT, LZCNT)
+	// 
+	#ifndef EA_ABM
+		#if defined(__ABM__) || defined(EA_PLATFORM_XBOXONE) || defined(EA_PLATFORM_SONY) || defined(CS_UNDEFINED_STRING)
+			#define EA_ABM 1
+		#else
+			#define EA_ABM 0
+		#endif
+	#endif
+
+	// ------------------------------------------------------------------------
+	// EA_NEON
+	// EA_NEON may be used to determine if NEON is supported.
+	#ifndef EA_NEON
+		#if defined(__ARM_NEON__) || defined(__ARM_NEON)
+			#define EA_NEON 1
+		#else
+			#define EA_NEON 0
+		#endif
+	#endif
+
+	// ------------------------------------------------------------------------
+	// EA_BMI
+	// EA_BMI may be used to determine if Bit Manipulation Instruction sets are available for the target architecture
+	//
+	// EA_BMI defines the level of BMI support:
+	//  0 indicates no BMI support
+	//  1 indicates BMI1 is supported
+	//  2 indicates BMI2 is supported
+	#ifndef EA_BMI
+		#if defined(__BMI2__)
+			#define EA_BMI 2
+		#elif defined(__BMI__) || defined(EA_PLATFORM_XBOXONE) || defined(CS_UNDEFINED_STRING)
+			#define EA_BMI 1
+		#else
+			#define EA_BMI 0
+		#endif
+	#endif
+	#ifndef EA_BMI2
+		#if EA_BMI >= 2
+			#define EA_BMI2 1
+		#else
+			#define EA_BMI2 0
+		#endif
+	#endif
+
+	// ------------------------------------------------------------------------
+	// EA_FMA3
+	// EA_FMA3 may be used to determine if Fused Multiply Add operations are available for the target architecture
+	// __FMA__ is defined only by GCC, Clang, and ICC; MSVC only defines __AVX__ and __AVX2__
+	// FMA3 was introduced alongside AVX2 on Intel Haswell
+	// All AMD processors support FMA3 if AVX2 is also supported
+	//
+	// EA_FMA3 defines the level of FMA3 support:
+	//  0 indicates no FMA3 support
+	//  1 indicates FMA3 is supported
+	#ifndef EA_FMA3
+		#if defined(__FMA__) || EA_AVX2 >= 1
+			#define EA_FMA3 1
+		#else
+			#define EA_FMA3 0
+		#endif
+	#endif
+
+	// ------------------------------------------------------------------------
+	// EA_TBM
+	// EA_TBM may be used to determine if Trailing Bit Manipulation instructions are available for the target architecture
+	#ifndef EA_TBM
+		#if defined(__TBM__)
+			#define EA_TBM 1
+		#else
+			#define EA_TBM 0
+		#endif
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_IMPORT
+	// import declaration specification
+	// specifies that the declared symbol is imported from another dynamic library.
+	#ifndef EA_IMPORT
+		#if defined(EA_COMPILER_MSVC)
+			#define EA_IMPORT __declspec(dllimport)
+		#else
+			#define EA_IMPORT
+		#endif
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_EXPORT
+	// export declaration specification
+	// specifies that the declared symbol is exported from the current dynamic library.
+	// this is not the same as the C++ export keyword. The C++ export keyword has been
+	// removed from the language as of C++11.
+	#ifndef EA_EXPORT
+		#if defined(EA_COMPILER_MSVC)
+			#define EA_EXPORT __declspec(dllexport)
+		#else
+			#define EA_EXPORT
+		#endif
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_PRAGMA_ONCE_SUPPORTED
+	// 
+	// This is a wrapper for the #pragma once preprocessor directive.
+	// It allows for some compilers (in particular VC++) to implement signifcantly
+	// faster include file preprocessing. #pragma once can be used to replace 
+	// header include guards or to augment them. However, #pragma once isn't 
+	// necessarily supported by all compilers and isn't guaranteed to be so in
+	// the future, so using #pragma once to replace traditional include guards 
+	// is not strictly portable. Note that a direct #define for #pragma once is
+	// impossible with VC++, due to limitations, but can be done with other 
+	// compilers/preprocessors via _Pragma("once").
+	// 
+	// Example usage (which includes traditional header guards for portability):
+	//    #ifndef SOMEPACKAGE_SOMEHEADER_H
+	//    #define SOMEPACKAGE_SOMEHEADER_H
+	//
+	//    #if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	//        #pragma once
+	//    #endif
+	//
+	//    <user code> 
+	//
+	//    #endif
+	//
+	#if defined(_MSC_VER) || defined(__GNUC__) || defined(__EDG__) || defined(__APPLE__)
+		#define EA_PRAGMA_ONCE_SUPPORTED 1
+	#endif
+
+
+
+	// ------------------------------------------------------------------------
+	// EA_ONCE
+	// 
+	// Example usage (which includes traditional header guards for portability):
+	//    #ifndef SOMEPACKAGE_SOMEHEADER_H
+	//    #define SOMEPACKAGE_SOMEHEADER_H
+	//
+	//    EA_ONCE()
+	//
+	//    <user code> 
+	//
+	//    #endif
+	//
+	#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+		#if defined(_MSC_VER)
+			#define EA_ONCE() __pragma(once)
+		#else
+			#define EA_ONCE() // _Pragma("once")   It turns out that _Pragma("once") isn't supported by many compilers.
+		#endif
+	#endif
+
+
+
+	// ------------------------------------------------------------------------
+	// EA_OVERRIDE
+	// 
+	// C++11 override
+	// See http://msdn.microsoft.com/en-us/library/jj678987.aspx for more information.
+	// You can use EA_FINAL_OVERRIDE to combine usage of EA_OVERRIDE and EA_INHERITANCE_FINAL in a single statement.
+	//
+	// Example usage: 
+	//        struct B     { virtual void f(int); };
+	//        struct D : B { void f(int) EA_OVERRIDE; };
+	// 
+	#ifndef EA_OVERRIDE
+		#if defined(EA_COMPILER_NO_OVERRIDE)
+			#define EA_OVERRIDE 
+		#else
+			#define EA_OVERRIDE override
+		#endif
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_INHERITANCE_FINAL
+	// 
+	// Portably wraps the C++11 final specifier.
+	// See http://msdn.microsoft.com/en-us/library/jj678985.aspx for more information.
+	// You can use EA_FINAL_OVERRIDE to combine usage of EA_OVERRIDE and EA_INHERITANCE_FINAL in a single statement.
+	// This is not called EA_FINAL because that term is used within EA to denote debug/release/final builds.
+	//
+	// Example usage:
+	//     struct B { virtual void f() EA_INHERITANCE_FINAL; };
+	// 
+	#ifndef EA_INHERITANCE_FINAL
+		#if defined(EA_COMPILER_NO_INHERITANCE_FINAL)
+			#define EA_INHERITANCE_FINAL
+		#elif (defined(_MSC_VER) && (EA_COMPILER_VERSION < 1700))  // Pre-VS2012
+			#define EA_INHERITANCE_FINAL sealed
+		#else
+			#define EA_INHERITANCE_FINAL final
+		#endif
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_FINAL_OVERRIDE
+	// 
+	// Portably wraps the C++11 override final specifiers combined.
+	//
+	// Example usage:
+	//     struct A            { virtual void f(); };
+	//     struct B : public A { virtual void f() EA_FINAL_OVERRIDE; };
+	// 
+	#ifndef EA_FINAL_OVERRIDE
+		#define EA_FINAL_OVERRIDE EA_OVERRIDE EA_INHERITANCE_FINAL
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_SEALED
+	// 
+	// This is deprecated, as the C++11 Standard has final (EA_INHERITANCE_FINAL) instead.
+	// See http://msdn.microsoft.com/en-us/library/0w2w91tf.aspx for more information.
+	// Example usage:
+	//     struct B { virtual void f() EA_SEALED; };
+	// 
+	#ifndef EA_SEALED
+		#if defined(EA_COMPILER_MSVC) && (EA_COMPILER_VERSION >= 1400) // VS2005 (VC8) and later
+			#define EA_SEALED sealed
+		#else
+			#define EA_SEALED 
+		#endif
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_ABSTRACT
+	// 
+	// This is a Microsoft language extension.
+	// See http://msdn.microsoft.com/en-us/library/b0z6b513.aspx for more information.
+	// Example usage:
+	//     struct X EA_ABSTRACT { virtual void f(){} };
+	// 
+	#ifndef EA_ABSTRACT
+		#if defined(EA_COMPILER_MSVC) && (EA_COMPILER_VERSION >= 1400) // VS2005 (VC8) and later
+			#define EA_ABSTRACT abstract
+		#else
+			#define EA_ABSTRACT 
+		#endif
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_CONSTEXPR
+	// EA_CONSTEXPR_OR_CONST
+	// 
+	// Portable wrapper for C++11's 'constexpr' support.
+	//
+	// See http://www.cprogramming.com/c++11/c++11-compile-time-processing-with-constexpr.html for more information.
+	// Example usage:
+	//     EA_CONSTEXPR int GetValue() { return 37; }
+	//     EA_CONSTEXPR_OR_CONST double gValue = std::sin(kTwoPi);
+	// 
+	#if !defined(EA_CONSTEXPR)
+		#if defined(EA_COMPILER_NO_CONSTEXPR)
+			#define EA_CONSTEXPR
+		#else
+			#define EA_CONSTEXPR constexpr
+		#endif
+	#endif
+
+	#if !defined(EA_CONSTEXPR_OR_CONST)
+		#if defined(EA_COMPILER_NO_CONSTEXPR)
+			#define EA_CONSTEXPR_OR_CONST const
+		#else
+			#define EA_CONSTEXPR_OR_CONST constexpr
+		#endif
+	#endif
+
+	// ------------------------------------------------------------------------
+	// EA_CONSTEXPR_IF
+	// 
+	// Portable wrapper for C++17's 'constexpr if' support.
+	//
+	// https://en.cppreference.com/w/cpp/language/if
+	// 
+	// Example usage:
+	// 
+	// EA_CONSTEXPR_IF(eastl::is_copy_constructible_v<T>) 
+	// 	{ ... }
+	// 
+	#if !defined(EA_CONSTEXPR_IF)
+		#if defined(EA_COMPILER_NO_CONSTEXPR_IF)
+			#define EA_CONSTEXPR_IF(predicate) if ((predicate))
+		#else
+			#define EA_CONSTEXPR_IF(predicate) if constexpr ((predicate))
+		#endif
+	#endif
+
+
+
+	// ------------------------------------------------------------------------
+	// EA_EXTERN_TEMPLATE
+	// 
+	// Portable wrapper for C++11's 'extern template' support.
+	//
+	// Example usage:
+	//     EA_EXTERN_TEMPLATE(class basic_string<char>);
+	//
+	#if !defined(EA_EXTERN_TEMPLATE)
+	#if defined(EA_COMPILER_NO_EXTERN_TEMPLATE)
+		#define EA_EXTERN_TEMPLATE(declaration)
+	#else
+		#define EA_EXTERN_TEMPLATE(declaration) extern template declaration
+	#endif
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_NOEXCEPT
+	// EA_NOEXCEPT_IF(predicate)
+	// EA_NOEXCEPT_EXPR(expression)
+	//
+	// Portable wrapper for C++11 noexcept
+	// http://en.cppreference.com/w/cpp/language/noexcept
+	// http://en.cppreference.com/w/cpp/language/noexcept_spec
+	//
+	// Example usage:
+	//     EA_NOEXCEPT
+	//     EA_NOEXCEPT_IF(predicate)
+	//     EA_NOEXCEPT_EXPR(expression)
+	//
+	//     This function never throws an exception.
+	//     void DoNothing() EA_NOEXCEPT
+	//         { }
+	//
+	//     This function throws an exception of T::T() throws an exception.
+	//     template <class T>
+	//     void DoNothing() EA_NOEXCEPT_IF(EA_NOEXCEPT_EXPR(T()))
+	//         { T t; }
+	//
+	#if !defined(EA_NOEXCEPT)
+		#if defined(EA_COMPILER_NO_NOEXCEPT)
+			#define EA_NOEXCEPT
+			#define EA_NOEXCEPT_IF(predicate)
+			#define EA_NOEXCEPT_EXPR(expression) false
+		#else
+			#define EA_NOEXCEPT noexcept
+			#define EA_NOEXCEPT_IF(predicate) noexcept((predicate))
+			#define EA_NOEXCEPT_EXPR(expression) noexcept((expression))
+		#endif
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_NORETURN
+	// 
+	// Wraps the C++11 noreturn attribute. See EA_COMPILER_NO_NORETURN
+	// http://en.cppreference.com/w/cpp/language/attributes
+	// http://msdn.microsoft.com/en-us/library/k6ktzx3s%28v=vs.80%29.aspx
+	// http://blog.aaronballman.com/2011/09/understanding-attributes/
+	//
+	// Example usage:
+	//     EA_NORETURN void SomeFunction()
+	//         { throw "error"; }
+	// 
+	#if !defined(EA_NORETURN)
+		#if defined(EA_COMPILER_MSVC) && (EA_COMPILER_VERSION >= 1300) // VS2003 (VC7) and later
+			#define EA_NORETURN __declspec(noreturn)
+		#elif defined(EA_COMPILER_NO_NORETURN)
+			#define EA_NORETURN
+		#else
+			#define EA_NORETURN [[noreturn]]
+		#endif
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_CARRIES_DEPENDENCY
+	// 
+	// Wraps the C++11 carries_dependency attribute
+	// http://en.cppreference.com/w/cpp/language/attributes
+	// http://blog.aaronballman.com/2011/09/understanding-attributes/
+	//
+	// Example usage:
+	//     EA_CARRIES_DEPENDENCY int* SomeFunction()
+	//         { return &mX; }
+	// 
+	//
+	#if !defined(EA_CARRIES_DEPENDENCY)
+		#if defined(EA_COMPILER_NO_CARRIES_DEPENDENCY)
+			#define EA_CARRIES_DEPENDENCY
+		#else
+			#define EA_CARRIES_DEPENDENCY [[carries_dependency]]
+		#endif
+	#endif
+
+	
+	// ------------------------------------------------------------------------
+	// EA_FALLTHROUGH
+	// 
+	// [[fallthrough] is a C++17 standard attribute that appears in switch
+	// statements to indicate that the fallthrough from the previous case in the
+	// switch statement is intentially and not a bug.
+	// 
+	// http://en.cppreference.com/w/cpp/language/attributes
+	//
+	// Example usage:
+	// 		void f(int n)
+	// 		{
+	// 			switch(n)
+	// 			{
+	// 				case 1:
+	// 				DoCase1();
+	// 				// Compiler may generate a warning for fallthrough behaviour
+	// 		 
+	// 				case 2: 
+	// 				DoCase2();
+	//
+	// 				EA_FALLTHROUGH;
+	// 				case 3:
+	// 				DoCase3();
+	// 			}
+	// 		}
+	//
+	#if !defined(EA_FALLTHROUGH)
+		#if defined(EA_COMPILER_NO_FALLTHROUGH)
+			#define EA_FALLTHROUGH
+		#else
+			#define EA_FALLTHROUGH [[fallthrough]]
+		#endif
+	#endif
+
+
+
+	// ------------------------------------------------------------------------
+	// EA_NODISCARD
+	// 
+	// [[nodiscard]] is a C++17 standard attribute that can be applied to a
+	// function declaration, enum, or class declaration.  If a any of the list
+	// previously are returned from a function (without the user explicitly
+	// casting to void) the addition of the [[nodiscard]] attribute encourages
+	// the compiler to generate a warning about the user discarding the return
+	// value. This is a useful practice to encourage client code to check API
+	// error codes. 
+	//
+	// http://en.cppreference.com/w/cpp/language/attributes
+	//
+	// Example usage:
+	// 
+	//     EA_NODISCARD int baz() { return 42; }
+	//     
+	//     void foo()
+	//     {
+	//         baz(); // warning: ignoring return value of function declared with 'nodiscard' attribute 
+	//     }
+	//
+	#if !defined(EA_NODISCARD)
+		#if defined(EA_COMPILER_NO_NODISCARD)
+			#define EA_NODISCARD
+		#else
+			#define EA_NODISCARD [[nodiscard]]
+		#endif
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_MAYBE_UNUSED
+	// 
+	// [[maybe_unused]] is a C++17 standard attribute that suppresses warnings
+	// on unused entities that are declared as maybe_unused.
+	//
+	// http://en.cppreference.com/w/cpp/language/attributes
+	//
+	// Example usage:
+	//    void foo(EA_MAYBE_UNUSED int i)
+	//    {
+	//        assert(i == 42);  // warning suppressed when asserts disabled.
+	//    }
+	//
+	#if !defined(EA_MAYBE_UNUSED)
+		#if defined(EA_COMPILER_NO_MAYBE_UNUSED)
+			#define EA_MAYBE_UNUSED
+		#else
+			#define EA_MAYBE_UNUSED [[maybe_unused]]
+		#endif
+	#endif
+
+	
+	// ------------------------------------------------------------------------
+	// EA_NO_UBSAN
+	// 
+	// The LLVM/Clang undefined behaviour sanitizer will not analyse a function tagged with the following attribute.
+	//
+	// https://clang.llvm.org/docs/UndefinedBehaviorSanitizer.html#disabling-instrumentation-with-attribute-no-sanitize-undefined
+	//
+	// Example usage:
+	//     EA_NO_UBSAN int SomeFunction() { ... }
+	//
+	#ifndef EA_NO_UBSAN
+		#if defined(EA_COMPILER_CLANG)
+			#define EA_NO_UBSAN __attribute__((no_sanitize("undefined")))
+		#else
+			#define EA_NO_UBSAN
+		#endif
+	#endif
+	
+
+	// ------------------------------------------------------------------------
+	// EA_NO_ASAN
+	// 
+	// The LLVM/Clang address sanitizer will not analyse a function tagged with the following attribute.
+	//
+	// https://clang.llvm.org/docs/AddressSanitizer.html#disabling-instrumentation-with-attribute-no-sanitize-address
+	//
+	// Example usage:
+	//     EA_NO_ASAN int SomeFunction() { ... }
+	//
+	#ifndef EA_NO_ASAN
+		#if defined(EA_COMPILER_CLANG)
+			#define EA_NO_ASAN __attribute__((no_sanitize("address")))
+		#else
+			#define EA_NO_ASAN
+		#endif
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_ASAN_ENABLED
+	//
+	// Defined as 0 or 1. It's value depends on the compile environment.
+	// Specifies whether the code is being built with Clang's Address Sanitizer.
+	//
+	#if defined(__has_feature)
+		#if __has_feature(address_sanitizer)
+			#define EA_ASAN_ENABLED 1
+		#else
+			#define EA_ASAN_ENABLED 0
+		#endif
+	#else
+		#define EA_ASAN_ENABLED 0
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_NON_COPYABLE
+	//
+	// This macro defines as a class as not being copy-constructable
+	// or assignable. This is useful for preventing class instances 
+	// from being passed to functions by value, is useful for preventing
+	// compiler warnings by some compilers about the inability to 
+	// auto-generate a copy constructor and assignment, and is useful 
+	// for simply declaring in the interface that copy semantics are
+	// not supported by the class. Your class needs to have at least a
+	// default constructor when using this macro.
+	//
+	// Beware that this class works by declaring a private: section of 
+	// the class in the case of compilers that don't support C++11 deleted
+	// functions. 
+	//
+	// Note: With some pre-C++11 compilers (e.g. Green Hills), you may need 
+	//       to manually define an instances of the hidden functions, even 
+	//       though they are not used.
+	//
+	// Example usage:
+	//    class Widget {
+	//       Widget();
+	//       . . .
+	//       EA_NON_COPYABLE(Widget)
+	//    };
+	//
+	#if !defined(EA_NON_COPYABLE)
+		#if defined(EA_COMPILER_NO_DELETED_FUNCTIONS)
+			#define EA_NON_COPYABLE(EAClass_)               \
+			  private:                                      \
+				EA_DISABLE_VC_WARNING(4822);	/* local class member function does not have a body	*/		\
+				EAClass_(const EAClass_&);                  \
+				void operator=(const EAClass_&);			\
+				EA_RESTORE_VC_WARNING();
+		#else
+			#define EA_NON_COPYABLE(EAClass_)               \
+				EA_DISABLE_VC_WARNING(4822);	/* local class member function does not have a body	*/		\
+				EAClass_(const EAClass_&) = delete;         \
+				void operator=(const EAClass_&) = delete;	\
+				EA_RESTORE_VC_WARNING();
+		#endif
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_FUNCTION_DELETE
+	//
+	// Semi-portable way of specifying a deleted function which allows for 
+	// cleaner code in class declarations. 
+	//
+	// Example usage:
+	//
+	//  class Example
+	//  {
+	//  private: // For portability with pre-C++11 compilers, make the function private.
+	//      void foo() EA_FUNCTION_DELETE;
+	//  };
+	//
+	// Note: EA_FUNCTION_DELETE'd functions should be private to prevent the 
+	// functions from being called even when the compiler does not support 
+	// deleted functions. Some compilers (e.g. Green Hills) that don't support 
+	// C++11 deleted functions can require that you define the function,
+	// which you can do in the associated source file for the class.
+	//
+	#if defined(EA_COMPILER_NO_DELETED_FUNCTIONS)
+		#define EA_FUNCTION_DELETE
+	#else
+		#define EA_FUNCTION_DELETE = delete
+	#endif
+
+	// ------------------------------------------------------------------------
+	// EA_DISABLE_DEFAULT_CTOR
+	//
+	// Disables the compiler generated default constructor. This macro is
+	// provided to improve portability and clarify intent of code.
+	//
+	// Example usage:
+	//
+	//  class Example
+	//  {
+	//  private:
+	//      EA_DISABLE_DEFAULT_CTOR(Example);
+	//  };
+	//
+	#define EA_DISABLE_DEFAULT_CTOR(ClassName) ClassName() EA_FUNCTION_DELETE
+
+	// ------------------------------------------------------------------------
+	// EA_DISABLE_COPY_CTOR
+	//
+	// Disables the compiler generated copy constructor. This macro is
+	// provided to improve portability and clarify intent of code.
+	//
+	// Example usage:
+	//
+	//  class Example
+	//  {
+	//  private:
+	//      EA_DISABLE_COPY_CTOR(Example);
+	//  };
+	//
+	#define EA_DISABLE_COPY_CTOR(ClassName) ClassName(const ClassName &) EA_FUNCTION_DELETE
+
+	// ------------------------------------------------------------------------
+	// EA_DISABLE_MOVE_CTOR
+	//
+	// Disables the compiler generated move constructor. This macro is
+	// provided to improve portability and clarify intent of code.
+	//
+	// Example usage:
+	//
+	//  class Example
+	//  {
+	//  private:
+	//      EA_DISABLE_MOVE_CTOR(Example);
+	//  };
+	//
+	#define EA_DISABLE_MOVE_CTOR(ClassName) ClassName(ClassName&&) EA_FUNCTION_DELETE
+
+	// ------------------------------------------------------------------------
+	// EA_DISABLE_ASSIGNMENT_OPERATOR
+	//
+	// Disables the compiler generated assignment operator. This macro is
+	// provided to improve portability and clarify intent of code.
+	//
+	// Example usage:
+	//
+	//  class Example
+	//  {
+	//  private:
+	//      EA_DISABLE_ASSIGNMENT_OPERATOR(Example);
+	//  };
+	//
+	#define EA_DISABLE_ASSIGNMENT_OPERATOR(ClassName) ClassName & operator=(const ClassName &) EA_FUNCTION_DELETE
+
+	// ------------------------------------------------------------------------
+	// EA_DISABLE_MOVE_OPERATOR
+	//
+	// Disables the compiler generated move operator. This macro is
+	// provided to improve portability and clarify intent of code.
+	//
+	// Example usage:
+	//
+	//  class Example
+	//  {
+	//  private:
+	//      EA_DISABLE_MOVE_OPERATOR(Example);
+	//  };
+	//
+	#define EA_DISABLE_MOVE_OPERATOR(ClassName) ClassName & operator=(ClassName&&) EA_FUNCTION_DELETE
+
+	// ------------------------------------------------------------------------
+	// EANonCopyable
+	//
+	// Declares a class as not supporting copy construction or assignment.
+	// May be more reliable with some situations that EA_NON_COPYABLE alone,
+	// though it may result in more code generation.
+	//
+	// Note that VC++ will generate warning C4625 and C4626 if you use EANonCopyable
+	// and you are compiling with /W4 and /Wall. There is no resolution but
+	// to redelare EA_NON_COPYABLE in your subclass or disable the warnings with
+	// code like this:
+	//     EA_DISABLE_VC_WARNING(4625 4626)
+	//     ...
+	//     EA_RESTORE_VC_WARNING()
+	//
+	// Example usage:
+	//     struct Widget : EANonCopyable {
+	//        . . .
+	//     };
+	//
+	#ifdef __cplusplus
+		struct EANonCopyable
+		{
+			#if defined(EA_COMPILER_NO_DEFAULTED_FUNCTIONS) ||  defined(__EDG__) 
+				// EDG doesn't appear to behave properly for the case of defaulted constructors; 
+				// it generates a mistaken warning about missing default constructors.					 
+				EANonCopyable() {}  // Putting {} here has the downside that it allows a class to create itself,
+				~EANonCopyable() {} // but avoids linker errors that can occur with some compilers (e.g. Green Hills).
+			#else
+				EANonCopyable() = default;
+			   ~EANonCopyable() = default;
+			#endif
+
+			EA_NON_COPYABLE(EANonCopyable)
+		};
+	#endif
+
+
+	// ------------------------------------------------------------------------
+	// EA_OPTIMIZE_OFF / EA_OPTIMIZE_ON
+	//
+	// Implements portable inline optimization enabling/disabling.
+	// Usage of these macros must be in order OFF then ON. This is 
+	// because the OFF macro pushes a set of settings and the ON
+	// macro pops them. The nesting of OFF/ON sets (e.g. OFF, OFF, ON, ON)
+	// is not guaranteed to work on all platforms.
+	//
+	// This is often used to allow debugging of some code that's 
+	// otherwise compiled with undebuggable optimizations. It's also
+	// useful for working around compiler code generation problems
+	// that occur in optimized builds.
+	//
+	// Some compilers (e.g. VC++) don't allow doing this within a function and 
+	// so the usage must be outside a function, as with the example below.
+	// GCC on x86 appears to have some problem with argument passing when 
+	// using EA_OPTIMIZE_OFF in optimized builds.
+	//
+	// Example usage:
+	//     // Disable optimizations for SomeFunction.
+	//     EA_OPTIMIZE_OFF()
+	//     void SomeFunction()
+	//     {
+	//         ...
+	//     }
+	//     EA_OPTIMIZE_ON()
+	//
+	#if !defined(EA_OPTIMIZE_OFF)
+		#if   defined(EA_COMPILER_MSVC)
+			#define EA_OPTIMIZE_OFF() __pragma(optimize("", off))
+		#elif defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION > 4004) && (defined(__i386__) || defined(__x86_64__)) // GCC 4.4+ - Seems to work only on x86/Linux so far. However, GCC 4.4 itself appears broken and screws up parameter passing conventions.
+			#define EA_OPTIMIZE_OFF()            \
+				_Pragma("GCC push_options")      \
+				_Pragma("GCC optimize 0")
+        #elif defined(EA_COMPILER_CLANG) && (!defined(EA_PLATFORM_ANDROID) || (EA_COMPILER_VERSION >= 380))
+            #define EA_OPTIMIZE_OFF() \
+				EA_DISABLE_CLANG_WARNING(-Wunknown-pragmas) \
+				_Pragma("clang optimize off") \
+				EA_RESTORE_CLANG_WARNING()
+		#else
+			#define EA_OPTIMIZE_OFF()
+		#endif
+	#endif
+
+	#if !defined(EA_OPTIMIZE_ON)
+		#if   defined(EA_COMPILER_MSVC)
+			#define EA_OPTIMIZE_ON() __pragma(optimize("", on))
+		#elif defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION > 4004) && (defined(__i386__) || defined(__x86_64__)) // GCC 4.4+ - Seems to work only on x86/Linux so far. However, GCC 4.4 itself appears broken and screws up parameter passing conventions.
+			#define EA_OPTIMIZE_ON() _Pragma("GCC pop_options")
+        #elif defined(EA_COMPILER_CLANG) && (!defined(EA_PLATFORM_ANDROID) || (EA_COMPILER_VERSION >= 380))
+            #define EA_OPTIMIZE_ON() \
+				EA_DISABLE_CLANG_WARNING(-Wunknown-pragmas) \
+				_Pragma("clang optimize on") \
+				EA_RESTORE_CLANG_WARNING()
+		#else
+			#define EA_OPTIMIZE_ON()
+		#endif
+	#endif
+
+
+
+	// ------------------------------------------------------------------------
+	// EA_SIGNED_RIGHT_SHIFT_IS_UNSIGNED
+	//
+	// Defined if right shifts of signed integers (i.e. arithmetic shifts) fail 
+	// to propogate the high bit downward, and thus preserve sign. Most hardware 
+	// and their corresponding compilers do this.
+	//
+	// <No current platform fails to propogate sign bits on right signed shifts>
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EABase/config/eaplatform.h b/libkram/eastl/include/EABase/config/eaplatform.h
new file mode 100644
index 00000000..37c1350a
--- /dev/null
+++ b/libkram/eastl/include/EABase/config/eaplatform.h
@@ -0,0 +1,738 @@
+/*-----------------------------------------------------------------------------
+ * config/eaplatform.h
+ *
+ * Copyright (c) Electronic Arts Inc. All rights reserved.
+ *-----------------------------------------------------------------------------
+ * Currently supported platform indentification defines include:
+ */
+#ifdef EA_PLATFORM_PS4 // ifdef for code stripping purposes 
+// EA_PLATFORM_PS4 (EA_PLATFORM_KETTLE)
+#endif
+#ifdef EA_PLATFORM_XBOXONE // ifdef for code stripping purposes 
+ // EA_PLATFORM_XBOXONE (EA_PLATFORM_CAPILANO)
+ // EA_PLATFORM_XBOXONE_XDK (EA_PLATFORM_CAPILANO_XDK), set by capilano_config package
+ // EA_PLATFORM_XBOXONE_ADK (EA_PLATFORM_CAPILANO_ADK), set by capilano_config package
+#endif
+//    EA_PLATFORM_ANDROID
+//    EA_PLATFORM_APPLE
+//    EA_PLATFORM_IPHONE
+//    EA_PLATFORM_IPHONE_SIMULATOR
+//    EA_PLATFORM_OSX
+//    EA_PLATFORM_LINUX
+//    EA_PLATFORM_SAMSUNG_TV
+//    EA_PLATFORM_WINDOWS
+//    EA_PLATFORM_WIN32
+//    EA_PLATFORM_WIN64
+//    EA_PLATFORM_WINDOWS_PHONE
+//    EA_PLATFORM_WINRT
+//    EA_PLATFORM_SUN
+//    EA_PLATFORM_LRB (Larrabee)
+//    EA_PLATFORM_POSIX     (pseudo-platform; may be defined along with another platform like EA_PLATFORM_LINUX, EA_PLATFORM_UNIX, EA_PLATFORM_QNX)
+//    EA_PLATFORM_UNIX      (pseudo-platform; may be defined along with another platform like EA_PLATFORM_LINUX)
+//    EA_PLATFORM_CYGWIN    (pseudo-platform; may be defined along with another platform like EA_PLATFORM_LINUX)
+//    EA_PLATFORM_MINGW     (pseudo-platform; may be defined along with another platform like EA_PLATFORM_WINDOWS)
+//    EA_PLATFORM_MICROSOFT (pseudo-platform; may be defined along with another platform like EA_PLATFORM_WINDOWS)
+//
+//    EA_ABI_ARM_LINUX      (a.k.a. "eabi". for all platforms that use the CodeSourcery GNU/Linux toolchain, like Android)
+//    EA_ABI_ARM_APPLE      (similar to eabi but not identical)
+//    EA_ABI_ARM64_APPLE    (similar to eabi but not identical) https://developer.apple.com/library/ios/documentation/Xcode/Conceptual/iPhoneOSABIReference/Articles/ARM64FunctionCallingConventions.html
+//    EA_ABI_ARM_WINCE      (similar to eabi but not identical)
+//
+// Other definitions emanated from this file inclue:
+//    EA_PLATFORM_NAME = <string>
+//    EA_PLATFORM_DESCRIPTION = <string>
+//    EA_PROCESSOR_XXX
+//    EA_MISALIGNED_SUPPORT_LEVEL=0|1|2
+//    EA_SYSTEM_LITTLE_ENDIAN | EA_SYSTEM_BIG_ENDIAN
+//    EA_ASM_STYLE_ATT | EA_ASM_STYLE_INTEL | EA_ASM_STYLE_MOTOROLA
+//    EA_PLATFORM_PTR_SIZE = <integer size in bytes>
+//    EA_PLATFORM_WORD_SIZE = <integer size in bytes>
+//    EA_CACHE_LINE_SIZE = <integer size in bytes>
+//---------------------------------------------------------------------------
+
+/*
+	EA_PLATFORM_MOBILE
+	EA_PLATFORM_MOBILE is a peer to EA_PLATORM_DESKTOP and EA_PLATFORM_CONSOLE. Their definition is qualitative rather
+	than quantitative, and refers to the general (usually weaker) capabilities of the machine. Mobile devices have a
+	similar set of weaknesses that are useful to generally categorize. The primary motivation is to avoid code that
+	tests for multiple mobile platforms on a line and needs to be updated every time we get a new one.
+	For example, mobile platforms tend to have weaker ARM processors, don't have full multiple processor support,
+	are hand-held, don't have mice (though may have touch screens or basic cursor controls), have writable solid
+	state permanent storage. Production user code shouldn't have too many expectations about the meaning of this define.
+
+	EA_PLATFORM_DESKTOP
+	This is similar to EA_PLATFORM_MOBILE in its qualitative nature and refers to platforms that are powerful.
+	For example, they nearly always have virtual memory, mapped memory, hundreds of GB of writable disk storage,
+	TCP/IP network connections, mice, keyboards, 512+ MB of RAM, multiprocessing, multiple display support.
+	Production user code shouldn't have too many expectations about the meaning of this define.
+
+	EA_PLATFORM_CONSOLE
+	This is similar to EA_PLATFORM_MOBILE in its qualitative nature and refers to platforms that are consoles.
+	This means platforms that are connected to TVs, are fairly powerful (especially graphics-wise), are tightly
+	controlled by vendors, tend not to have mapped memory, tend to have TCP/IP, don't have multiple process support
+	though they might have multiple CPUs, support TV output only. Production user code shouldn't have too many
+	expectations about the meaning of this define.
+
+*/
+
+
+#ifndef INCLUDED_eaplatform_H
+#define INCLUDED_eaplatform_H
+
+
+// Cygwin
+// This is a pseudo-platform which will be defined along with EA_PLATFORM_LINUX when
+// using the Cygwin build environment.
+#if defined(__CYGWIN__)
+	#define EA_PLATFORM_CYGWIN 1
+	#define EA_PLATFORM_DESKTOP 1
+#endif
+
+// MinGW
+// This is a pseudo-platform which will be defined along with EA_PLATFORM_WINDOWS when
+// using the MinGW Windows build environment.
+#if defined(__MINGW32__) || defined(__MINGW64__)
+	#define EA_PLATFORM_MINGW 1
+	#define EA_PLATFORM_DESKTOP 1
+#endif
+
+#if defined(EA_PLATFORM_PS4) || defined(__ORBIS__) || defined(EA_PLATFORM_KETTLE)
+	// PlayStation 4
+	// Orbis was Sony's code-name for the platform, which is now obsolete.
+	// Kettle was an EA-specific code-name for the platform, which is now obsolete.
+	#if defined(EA_PLATFORM_PS4)
+		#undef  EA_PLATFORM_PS4
+	#endif
+	#define EA_PLATFORM_PS4 1
+
+	// Backward compatibility:
+		#if defined(EA_PLATFORM_KETTLE)
+			#undef  EA_PLATFORM_KETTLE
+		#endif
+	// End backward compatbility
+
+	#define EA_PLATFORM_KETTLE 1
+	#define EA_PLATFORM_NAME "PS4"
+	#define EA_SYSTEM_LITTLE_ENDIAN 1
+	#define EA_PLATFORM_DESCRIPTION "PS4 on x64"
+	#define EA_PLATFORM_CONSOLE 1
+	#define EA_PLATFORM_SONY 1
+	#define EA_PLATFORM_POSIX 1
+	// #define EA_POSIX_THREADS_AVAILABLE 1  // POSIX threading API is available but discouraged.  Sony indicated use of the scePthreads* API is preferred. 
+	#define EA_PROCESSOR_X86_64 1
+	#if defined(__GNUC__) || defined(__clang__)
+		#define EA_ASM_STYLE_ATT 1
+	#endif
+
+#elif defined(EA_PLATFORM_XBOXONE) || defined(_DURANGO) || defined(_XBOX_ONE) || defined(EA_PLATFORM_CAPILANO) || defined(_GAMING_XBOX)
+	// XBox One
+	// Durango was Microsoft's code-name for the platform, which is now obsolete.
+	// Microsoft uses _DURANGO instead of some variation of _XBOX, though it's not natively defined by the compiler.
+	// Capilano was an EA-specific code-name for the platform, which is now obsolete.
+	#if defined(EA_PLATFORM_XBOXONE)
+		#undef  EA_PLATFORM_XBOXONE
+	#endif
+	#define EA_PLATFORM_XBOXONE 1
+
+	// Backward compatibility:
+		#if defined(EA_PLATFORM_CAPILANO)
+			#undef  EA_PLATFORM_CAPILANO
+		#endif
+		#define EA_PLATFORM_CAPILANO 1
+		#if defined(EA_PLATFORM_CAPILANO_XDK) && !defined(EA_PLATFORM_XBOXONE_XDK)
+			#define EA_PLATFORM_XBOXONE_XDK 1
+		#endif
+		#if defined(EA_PLATFORM_CAPILANO_ADK) && !defined(EA_PLATFORM_XBOXONE_ADK)
+			#define EA_PLATFORM_XBOXONE_ADK 1
+		#endif
+	// End backward compatibility
+
+	#if !defined(_DURANGO)
+		#define _DURANGO
+	#endif
+	#define EA_PLATFORM_NAME "XBox One"
+  //#define EA_PROCESSOR_X86  Currently our policy is that we don't define this, even though x64 is something of a superset of x86.
+	#define EA_PROCESSOR_X86_64 1
+	#define EA_SYSTEM_LITTLE_ENDIAN 1
+	#define EA_PLATFORM_DESCRIPTION "XBox One on x64"
+	#define EA_ASM_STYLE_INTEL 1
+	#define EA_PLATFORM_CONSOLE 1
+	#define EA_PLATFORM_MICROSOFT 1
+
+	// WINAPI_FAMILY defines - mirrored from winapifamily.h
+	#define EA_WINAPI_FAMILY_APP         1000
+	#define EA_WINAPI_FAMILY_DESKTOP_APP 1001
+	#define EA_WINAPI_FAMILY_PHONE_APP   1002
+	#define EA_WINAPI_FAMILY_TV_APP      1003
+	#define EA_WINAPI_FAMILY_TV_TITLE    1004
+	#define EA_WINAPI_FAMILY_GAMES       1006
+	
+	#if defined(WINAPI_FAMILY) 
+		#include <winapifamily.h>
+		#if defined(WINAPI_FAMILY_TV_TITLE) && WINAPI_FAMILY == WINAPI_FAMILY_TV_TITLE
+			#define EA_WINAPI_FAMILY EA_WINAPI_FAMILY_TV_TITLE
+		#elif defined(WINAPI_FAMILY_DESKTOP_APP) && WINAPI_FAMILY == WINAPI_FAMILY_DESKTOP_APP
+			#define EA_WINAPI_FAMILY EA_WINAPI_FAMILY_DESKTOP_APP
+		#elif defined(WINAPI_FAMILY_GAMES) && WINAPI_FAMILY == WINAPI_FAMILY_GAMES
+			#define EA_WINAPI_FAMILY EA_WINAPI_FAMILY_GAMES
+		#else
+			#error Unsupported WINAPI_FAMILY
+		#endif
+	#else
+		#error WINAPI_FAMILY should always be defined on Capilano.
+	#endif
+
+	// Macro to determine if a partition is enabled.
+	#define EA_WINAPI_FAMILY_PARTITION(Partition)	(Partition)
+
+	#if EA_WINAPI_FAMILY == EA_WINAPI_FAMILY_DESKTOP_APP
+		#define EA_WINAPI_PARTITION_CORE     1
+		#define EA_WINAPI_PARTITION_DESKTOP  1
+		#define EA_WINAPI_PARTITION_APP      1
+		#define EA_WINAPI_PARTITION_PC_APP   0
+		#define EA_WIANPI_PARTITION_PHONE    0
+		#define EA_WINAPI_PARTITION_TV_APP   0
+		#define EA_WINAPI_PARTITION_TV_TITLE 0
+		#define EA_WINAPI_PARTITION_GAMES    0
+	#elif EA_WINAPI_FAMILY == EA_WINAPI_FAMILY_TV_TITLE
+		#define EA_WINAPI_PARTITION_CORE     1
+		#define EA_WINAPI_PARTITION_DESKTOP  0
+		#define EA_WINAPI_PARTITION_APP      0
+		#define EA_WINAPI_PARTITION_PC_APP   0
+		#define EA_WIANPI_PARTITION_PHONE    0
+		#define EA_WINAPI_PARTITION_TV_APP   0
+		#define EA_WINAPI_PARTITION_TV_TITLE 1
+		#define EA_WINAPI_PARTITION_GAMES    0
+	#elif EA_WINAPI_FAMILY == EA_WINAPI_FAMILY_GAMES
+		#define EA_WINAPI_PARTITION_CORE     1
+		#define EA_WINAPI_PARTITION_DESKTOP  0
+		#define EA_WINAPI_PARTITION_APP      0
+		#define EA_WINAPI_PARTITION_PC_APP   0
+		#define EA_WIANPI_PARTITION_PHONE    0
+		#define EA_WINAPI_PARTITION_TV_APP   0
+		#define EA_WINAPI_PARTITION_TV_TITLE 0
+		#define EA_WINAPI_PARTITION_GAMES    1
+	#else
+		#error Unsupported WINAPI_FAMILY
+	#endif
+
+	#if EA_WINAPI_FAMILY_PARTITION(EA_WINAPI_PARTITION_GAMES)
+		#define CS_UNDEFINED_STRING 			1
+		#define CS_UNDEFINED_STRING 		1
+	#endif
+
+	#if EA_WINAPI_FAMILY_PARTITION(EA_WINAPI_PARTITION_TV_TITLE)
+		#define EA_PLATFORM_XBOXONE_XDK 	1
+	#endif
+#elif defined(EA_PLATFORM_LRB) || defined(__LRB__) || (defined(__EDG__) && defined(__ICC) && defined(__x86_64__))
+	#undef  EA_PLATFORM_LRB
+	#define EA_PLATFORM_LRB         1
+	#define EA_PLATFORM_NAME        "Larrabee"
+	#define EA_PLATFORM_DESCRIPTION "Larrabee on LRB1"
+	#define EA_PROCESSOR_X86_64 1
+	#if defined(BYTE_ORDER) && (BYTE_ORDER == 4321)
+		#define EA_SYSTEM_BIG_ENDIAN 1
+	#else
+		#define EA_SYSTEM_LITTLE_ENDIAN 1
+	#endif
+	#define EA_PROCESSOR_LRB 1
+	#define EA_PROCESSOR_LRB1 1       // Larrabee version 1
+	#define EA_ASM_STYLE_ATT 1        // Both types of asm style
+	#define EA_ASM_STYLE_INTEL 1      // are supported.
+	#define EA_PLATFORM_DESKTOP 1
+
+// Android (Google phone OS)
+#elif defined(EA_PLATFORM_ANDROID) || defined(__ANDROID__)
+	#undef  EA_PLATFORM_ANDROID
+	#define EA_PLATFORM_ANDROID 1
+	#define EA_PLATFORM_LINUX 1
+	#define EA_PLATFORM_UNIX 1
+	#define EA_PLATFORM_POSIX 1
+	#define EA_PLATFORM_NAME "Android"
+	#define EA_ASM_STYLE_ATT 1
+	#if defined(__arm__)
+		#define EA_ABI_ARM_LINUX 1  // a.k.a. "ARM eabi"
+		#define EA_PROCESSOR_ARM32 1
+		#define EA_PLATFORM_DESCRIPTION "Android on ARM"
+	#elif defined(__aarch64__)
+		#define EA_PROCESSOR_ARM64 1
+		#define EA_PLATFORM_DESCRIPTION "Android on ARM64"
+	#elif defined(__i386__)
+		#define EA_PROCESSOR_X86 1
+		#define EA_PLATFORM_DESCRIPTION "Android on x86"
+	#elif defined(__x86_64)
+		#define EA_PROCESSOR_X86_64 1
+		#define EA_PLATFORM_DESCRIPTION "Android on x64"
+	#else
+		#error Unknown processor
+	#endif
+	#if !defined(EA_SYSTEM_BIG_ENDIAN) && !defined(EA_SYSTEM_LITTLE_ENDIAN)
+		#define EA_SYSTEM_LITTLE_ENDIAN 1
+	#endif
+	#define EA_PLATFORM_MOBILE 1
+
+// Samsung SMART TV - a Linux-based smart TV
+#elif defined(EA_PLATFORM_SAMSUNG_TV)
+	#undef  EA_PLATFORM_SAMSUNG_TV
+	#define EA_PLATFORM_SAMSUNG_TV 1
+	#define EA_PLATFORM_LINUX 1
+	#define EA_PLATFORM_UNIX 1
+	#define EA_PLATFORM_POSIX 1
+	#define EA_PLATFORM_NAME "SamsungTV"
+	#define EA_PLATFORM_DESCRIPTION "Samsung SMART TV on ARM"
+	#define EA_ASM_STYLE_ATT 1
+	#define EA_SYSTEM_LITTLE_ENDIAN 1
+	#define EA_PROCESSOR_ARM32 1
+	#define EA_ABI_ARM_LINUX 1 // a.k.a. "ARM eabi"
+	#define EA_PROCESSOR_ARM7 1
+
+#elif defined(__APPLE__) && __APPLE__
+	#include <TargetConditionals.h>
+
+	// Apple family of operating systems.
+	#define EA_PLATFORM_APPLE
+	#define EA_PLATFORM_POSIX 1
+
+	// iPhone
+	// TARGET_OS_IPHONE will be undefined on an unknown compiler, and will be defined on gcc.
+	#if defined(EA_PLATFORM_IPHONE) || defined(__IPHONE__) || (defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE) || (defined(TARGET_IPHONE_SIMULATOR) && TARGET_IPHONE_SIMULATOR)
+		#undef  EA_PLATFORM_IPHONE
+		#define EA_PLATFORM_IPHONE 1
+		#define EA_PLATFORM_NAME "iPhone"
+		#define EA_ASM_STYLE_ATT 1
+		#define EA_POSIX_THREADS_AVAILABLE 1
+		#if defined(__arm__)
+			#define EA_ABI_ARM_APPLE 1
+			#define EA_PROCESSOR_ARM32 1
+			#define EA_SYSTEM_LITTLE_ENDIAN 1
+			#define EA_PLATFORM_DESCRIPTION "iPhone on ARM"
+		#elif defined(__aarch64__) || defined(__AARCH64)
+			#define EA_ABI_ARM64_APPLE 1
+			#define EA_PROCESSOR_ARM64 1
+			#define EA_SYSTEM_LITTLE_ENDIAN 1
+			#define EA_PLATFORM_DESCRIPTION "iPhone on ARM64"
+		#elif defined(__i386__)
+			#define EA_PLATFORM_IPHONE_SIMULATOR 1
+			#define EA_PROCESSOR_X86 1
+			#define EA_SYSTEM_LITTLE_ENDIAN 1
+			#define EA_PLATFORM_DESCRIPTION "iPhone simulator on x86"
+		#elif defined(__x86_64) || defined(__amd64)
+			#define EA_PROCESSOR_X86_64 1
+			#define EA_SYSTEM_LITTLE_ENDIAN 1
+			#define EA_PLATFORM_DESCRIPTION "iPhone simulator on x64"
+		#else
+			#error Unknown processor
+		#endif
+		#define EA_PLATFORM_MOBILE 1
+
+	// Macintosh OSX
+	// TARGET_OS_MAC is defined by the Metrowerks and older AppleC compilers.
+	// Howerver, TARGET_OS_MAC is defined to be 1 in all cases.
+	// __i386__ and __intel__ are defined by the GCC compiler.
+	// __dest_os is defined by the Metrowerks compiler.
+	// __MACH__ is defined by the Metrowerks and GCC compilers.
+	// powerc and __powerc are defined by the Metrowerks and GCC compilers.
+	#elif defined(EA_PLATFORM_OSX) || defined(__MACH__) || (defined(__MSL__) && (__dest_os == __mac_os_x))
+		#undef  EA_PLATFORM_OSX
+		#define EA_PLATFORM_OSX 1
+		#define EA_PLATFORM_UNIX 1
+		#define EA_PLATFORM_POSIX 1
+	  //#define EA_PLATFORM_BSD 1           We don't currently define this. OSX has some BSD history but a lot of the API is different.
+		#define EA_PLATFORM_NAME "OSX"
+		#if defined(__i386__) || defined(__intel__)
+			#define EA_PROCESSOR_X86 1
+			#define EA_SYSTEM_LITTLE_ENDIAN 1
+			#define EA_PLATFORM_DESCRIPTION "OSX on x86"
+		#elif defined(__x86_64) || defined(__amd64)
+			#define EA_PROCESSOR_X86_64 1
+			#define EA_SYSTEM_LITTLE_ENDIAN 1
+			#define EA_PLATFORM_DESCRIPTION "OSX on x64"
+		#elif defined(__arm__)
+			#define EA_ABI_ARM_APPLE 1
+			#define EA_PROCESSOR_ARM32 1
+			#define EA_SYSTEM_LITTLE_ENDIAN 1
+			#define EA_PLATFORM_DESCRIPTION "OSX on ARM"
+		#elif defined(__aarch64__) || defined(__AARCH64)
+			#define EA_ABI_ARM64_APPLE 1
+			#define EA_PROCESSOR_ARM64 1
+			#define EA_SYSTEM_LITTLE_ENDIAN 1
+			#define EA_PLATFORM_DESCRIPTION "OSX on ARM64"
+		#elif defined(__POWERPC64__) || defined(__powerpc64__)
+			#define EA_PROCESSOR_POWERPC 1
+			#define EA_PROCESSOR_POWERPC_64 1
+			#define EA_SYSTEM_BIG_ENDIAN 1
+			#define EA_PLATFORM_DESCRIPTION "OSX on PowerPC 64"
+		#elif defined(__POWERPC__) || defined(__powerpc__)
+			#define EA_PROCESSOR_POWERPC 1
+			#define EA_PROCESSOR_POWERPC_32 1
+			#define EA_SYSTEM_BIG_ENDIAN 1
+			#define EA_PLATFORM_DESCRIPTION "OSX on PowerPC"
+		#else
+			#error Unknown processor
+		#endif
+		#if defined(__GNUC__)
+			#define EA_ASM_STYLE_ATT 1
+		#else
+			#define EA_ASM_STYLE_MOTOROLA 1
+		#endif
+		#define EA_PLATFORM_DESKTOP 1
+	#else
+		#error Unknown Apple Platform
+	#endif
+
+// Linux
+// __linux and __linux__ are defined by the GCC and Borland compiler.
+// __i386__ and __intel__ are defined by the GCC compiler.
+// __i386__ is defined by the Metrowerks compiler.
+// _M_IX86 is defined by the Borland compiler.
+// __sparc__ is defined by the GCC compiler.
+// __powerpc__ is defined by the GCC compiler.
+// __ARM_EABI__ is defined by GCC on an ARM v6l (Raspberry Pi 1)
+// __ARM_ARCH_7A__ is defined by GCC on an ARM v7l (Raspberry Pi 2)
+#elif defined(EA_PLATFORM_LINUX) || (defined(__linux) || defined(__linux__))
+	#undef  EA_PLATFORM_LINUX
+	#define EA_PLATFORM_LINUX 1
+	#define EA_PLATFORM_UNIX 1
+	#define EA_PLATFORM_POSIX 1
+	#define EA_PLATFORM_NAME "Linux"
+	#if defined(__i386__) || defined(__intel__) || defined(_M_IX86)
+		#define EA_PROCESSOR_X86 1
+		#define EA_SYSTEM_LITTLE_ENDIAN 1
+		#define EA_PLATFORM_DESCRIPTION "Linux on x86"
+	#elif defined(__ARM_ARCH_7A__) || defined(__ARM_EABI__)
+		#define EA_ABI_ARM_LINUX 1
+		#define EA_PROCESSOR_ARM32 1
+		#define EA_PLATFORM_DESCRIPTION "Linux on ARM 6/7 32-bits"
+	#elif defined(__aarch64__) || defined(__AARCH64)
+		#define EA_PROCESSOR_ARM64 1
+		#define EA_PLATFORM_DESCRIPTION "Linux on ARM64"
+	#elif defined(__x86_64__)
+		#define EA_PROCESSOR_X86_64 1
+		#define EA_SYSTEM_LITTLE_ENDIAN 1
+		#define EA_PLATFORM_DESCRIPTION "Linux on x64"
+	#elif defined(__powerpc64__)
+		#define EA_PROCESSOR_POWERPC 1
+		#define EA_PROCESSOR_POWERPC_64 1
+		#define EA_SYSTEM_BIG_ENDIAN 1
+		#define EA_PLATFORM_DESCRIPTION "Linux on PowerPC 64"
+	#elif defined(__powerpc__)
+		#define EA_PROCESSOR_POWERPC 1
+		#define EA_PROCESSOR_POWERPC_32 1
+		#define EA_SYSTEM_BIG_ENDIAN 1
+		#define EA_PLATFORM_DESCRIPTION "Linux on PowerPC"
+	#else
+		#error Unknown processor
+		#error Unknown endianness
+	#endif
+	#if defined(__GNUC__)
+		#define EA_ASM_STYLE_ATT 1
+	#endif
+	#define EA_PLATFORM_DESKTOP 1
+
+
+#elif defined(EA_PLATFORM_BSD) || (defined(__BSD__) || defined(__FreeBSD__))
+	#undef  EA_PLATFORM_BSD
+	#define EA_PLATFORM_BSD 1
+	#define EA_PLATFORM_UNIX 1
+	#define EA_PLATFORM_POSIX 1     // BSD's posix complaince is not identical to Linux's
+	#define EA_PLATFORM_NAME "BSD Unix"
+	#if defined(__i386__) || defined(__intel__)
+		#define EA_PROCESSOR_X86 1
+		#define EA_SYSTEM_LITTLE_ENDIAN 1
+		#define EA_PLATFORM_DESCRIPTION "BSD on x86"
+	#elif defined(__x86_64__)
+		#define EA_PROCESSOR_X86_64 1
+		#define EA_SYSTEM_LITTLE_ENDIAN 1
+		#define EA_PLATFORM_DESCRIPTION "BSD on x64"
+	#elif defined(__powerpc64__)
+		#define EA_PROCESSOR_POWERPC 1
+		#define EA_PROCESSOR_POWERPC_64 1
+		#define EA_SYSTEM_BIG_ENDIAN 1
+		#define EA_PLATFORM_DESCRIPTION "BSD on PowerPC 64"
+	#elif defined(__powerpc__)
+		#define EA_PROCESSOR_POWERPC 1
+		#define EA_PROCESSOR_POWERPC_32 1
+		#define EA_SYSTEM_BIG_ENDIAN 1
+		#define EA_PLATFORM_DESCRIPTION "BSD on PowerPC"
+	#else
+		#error Unknown processor
+		#error Unknown endianness
+	#endif
+	#if !defined(EA_PLATFORM_FREEBSD) && defined(__FreeBSD__)
+		#define EA_PLATFORM_FREEBSD 1 // This is a variation of BSD.
+	#endif
+	#if defined(__GNUC__)
+		#define EA_ASM_STYLE_ATT 1
+	#endif
+	#define EA_PLATFORM_DESKTOP 1
+
+
+#elif defined(EA_PLATFORM_WINDOWS_PHONE)
+	#undef EA_PLATFORM_WINDOWS_PHONE
+	#define EA_PLATFORM_WINDOWS_PHONE 1
+	#define EA_PLATFORM_NAME "Windows Phone"
+	#if defined(_M_AMD64) || defined(_AMD64_) || defined(__x86_64__)
+		#define EA_PROCESSOR_X86_64 1
+		#define EA_SYSTEM_LITTLE_ENDIAN 1
+		#define EA_PLATFORM_DESCRIPTION "Windows Phone on x64"
+	#elif defined(_M_IX86) || defined(_X86_)
+		#define EA_PROCESSOR_X86 1
+		#define EA_SYSTEM_LITTLE_ENDIAN 1
+		#define EA_PLATFORM_DESCRIPTION "Windows Phone on X86"
+	#elif defined(_M_ARM)
+		#define EA_ABI_ARM_WINCE 1
+		#define EA_PROCESSOR_ARM32 1
+		#define EA_SYSTEM_LITTLE_ENDIAN 1
+		#define EA_PLATFORM_DESCRIPTION "Windows Phone on ARM"
+	#else //Possibly other Windows Phone variants
+		#error Unknown processor
+		#error Unknown endianness
+	#endif
+	#define EA_PLATFORM_MICROSOFT 1
+
+	// WINAPI_FAMILY defines - mirrored from winapifamily.h
+	#define EA_WINAPI_FAMILY_APP         1
+	#define EA_WINAPI_FAMILY_DESKTOP_APP 2
+	#define EA_WINAPI_FAMILY_PHONE_APP   3
+
+	#if defined(WINAPI_FAMILY)
+		#include <winapifamily.h>
+		#if WINAPI_FAMILY == WINAPI_FAMILY_PHONE_APP
+			#define EA_WINAPI_FAMILY EA_WINAPI_FAMILY_PHONE_APP
+		#else
+			#error Unsupported WINAPI_FAMILY for Windows Phone
+		#endif
+	#else
+		#error WINAPI_FAMILY should always be defined on Windows Phone.
+	#endif
+
+	// Macro to determine if a partition is enabled.
+	#define EA_WINAPI_FAMILY_PARTITION(Partition)   (Partition)
+
+	// Enable the appropriate partitions for the current family
+	#if EA_WINAPI_FAMILY == EA_WINAPI_FAMILY_PHONE_APP
+	#   define EA_WINAPI_PARTITION_CORE    1
+	#   define EA_WINAPI_PARTITION_PHONE   1
+	#   define EA_WINAPI_PARTITION_APP     1
+	#else
+	#   error Unsupported WINAPI_FAMILY for Windows Phone
+	#endif
+
+
+// Windows
+// _WIN32 is defined by the VC++, Intel and GCC compilers.
+// _WIN64 is defined by the VC++, Intel and GCC compilers.
+// __WIN32__ is defined by the Borland compiler.
+// __INTEL__ is defined by the Metrowerks compiler.
+// _M_IX86, _M_AMD64 and _M_IA64 are defined by the VC++, Intel, and Borland compilers.
+// _X86_, _AMD64_, and _IA64_ are defined by the Metrowerks compiler.
+// _M_ARM is defined by the VC++ compiler.
+#elif (defined(EA_PLATFORM_WINDOWS) || (defined(_WIN32) || defined(__WIN32__) || defined(_WIN64))) && !defined(CS_UNDEFINED_STRING)
+	#undef  EA_PLATFORM_WINDOWS
+	#define EA_PLATFORM_WINDOWS 1
+	#define EA_PLATFORM_NAME "Windows"
+	#ifdef _WIN64 // VC++ defines both _WIN32 and _WIN64 when compiling for Win64.
+		#define EA_PLATFORM_WIN64 1
+	#else
+		#define EA_PLATFORM_WIN32 1
+	#endif
+	#if defined(_M_AMD64) || defined(_AMD64_) || defined(__x86_64__)
+		#define EA_PROCESSOR_X86_64 1
+		#define EA_SYSTEM_LITTLE_ENDIAN 1
+		#define EA_PLATFORM_DESCRIPTION "Windows on x64"
+	#elif defined(_M_IX86) || defined(_X86_)
+		#define EA_PROCESSOR_X86 1
+		#define EA_SYSTEM_LITTLE_ENDIAN 1
+		#define EA_PLATFORM_DESCRIPTION "Windows on X86"
+	#elif defined(_M_IA64) || defined(_IA64_)
+		#define EA_PROCESSOR_IA64 1
+		#define EA_SYSTEM_LITTLE_ENDIAN 1
+		#define EA_PLATFORM_DESCRIPTION "Windows on IA-64"
+	#elif defined(_M_ARM)
+		#define EA_ABI_ARM_WINCE 1
+		#define EA_PROCESSOR_ARM32 1
+		#define EA_SYSTEM_LITTLE_ENDIAN 1
+		#define EA_PLATFORM_DESCRIPTION "Windows on ARM"
+	#elif defined(_M_ARM64)
+		#define EA_PROCESSOR_ARM64 1
+		#define EA_SYSTEM_LITTLE_ENDIAN 1
+		#define EA_PLATFORM_DESCRIPTION "Windows on ARM64"
+	#else //Possibly other Windows CE variants
+		#error Unknown processor
+		#error Unknown endianness
+	#endif
+	#if defined(__GNUC__)
+		#define EA_ASM_STYLE_ATT 1
+	#elif defined(_MSC_VER) || defined(__BORLANDC__) || defined(__ICL)
+		#define EA_ASM_STYLE_INTEL 1
+	#endif
+	#define EA_PLATFORM_DESKTOP 1
+	#define EA_PLATFORM_MICROSOFT 1
+
+	// WINAPI_FAMILY defines to support Windows 8 Metro Apps - mirroring winapifamily.h in the Windows 8 SDK
+	#define EA_WINAPI_FAMILY_APP         1000
+	#define EA_WINAPI_FAMILY_DESKTOP_APP 1001
+	#define EA_WINAPI_FAMILY_GAMES       1006
+
+	#if defined(WINAPI_FAMILY)
+		#if defined(_MSC_VER)
+			#pragma warning(push, 0)
+		#endif
+		#include <winapifamily.h>
+		#if defined(_MSC_VER)
+			#pragma warning(pop)
+		#endif
+		#if defined(WINAPI_FAMILY_DESKTOP_APP) && WINAPI_FAMILY == WINAPI_FAMILY_DESKTOP_APP
+			#define EA_WINAPI_FAMILY EA_WINAPI_FAMILY_DESKTOP_APP
+		#elif defined(WINAPI_FAMILY_APP) && WINAPI_FAMILY == WINAPI_FAMILY_APP
+			#define EA_WINAPI_FAMILY EA_WINAPI_FAMILY_APP
+		#elif defined(WINAPI_FAMILY_GAMES) && WINAPI_FAMILY == WINAPI_FAMILY_GAMES
+			#define EA_WINAPI_FAMILY EA_WINAPI_FAMILY_GAMES
+		#else
+			#error Unsupported WINAPI_FAMILY
+		#endif
+	#else
+		#define EA_WINAPI_FAMILY EA_WINAPI_FAMILY_DESKTOP_APP
+	#endif
+
+	#define EA_WINAPI_PARTITION_DESKTOP   1
+	#define EA_WINAPI_PARTITION_APP       1
+	#define EA_WINAPI_PARTITION_GAMES    (EA_WINAPI_FAMILY == EA_WINAPI_FAMILY_GAMES)
+
+	#define EA_WINAPI_FAMILY_PARTITION(Partition)   (Partition)
+
+	// EA_PLATFORM_WINRT
+	// This is a subset of Windows which is used for tablets and the "Metro" (restricted) Windows user interface.
+	// WinRT doesn't doesn't have access to the Windows "desktop" API, but WinRT can nevertheless run on 
+	// desktop computers in addition to tablets. The Windows Phone API is a subset of WinRT and is not included
+	// in it due to it being only a part of the API.
+	#if defined(__cplusplus_winrt)
+		#define EA_PLATFORM_WINRT 1 
+	#endif
+
+// Sun (Solaris)
+// __SUNPRO_CC is defined by the Sun compiler.
+// __sun is defined by the GCC compiler.
+// __i386 is defined by the Sun and GCC compilers.
+// __sparc is defined by the Sun and GCC compilers.
+#else
+	#error Unknown platform
+	#error Unknown processor
+	#error Unknown endianness
+#endif
+
+#ifndef EA_PROCESSOR_ARM
+	#if defined(EA_PROCESSOR_ARM32) || defined(EA_PROCESSOR_ARM64) || defined(EA_PROCESSOR_ARM7)
+		#define EA_PROCESSOR_ARM
+	#endif
+#endif
+
+// EA_PLATFORM_PTR_SIZE
+// Platform pointer size; same as sizeof(void*).
+// This is not the same as sizeof(int), as int is usually 32 bits on
+// even 64 bit platforms.
+//
+// _WIN64 is defined by Win64 compilers, such as VC++.
+// _M_IA64 is defined by VC++ and Intel compilers for IA64 processors.
+// __LP64__ is defined by HP compilers for the LP64 standard.
+// _LP64 is defined by the GCC and Sun compilers for the LP64 standard.
+// __ia64__ is defined by the GCC compiler for IA64 processors.
+// __arch64__ is defined by the Sparc compiler for 64 bit processors.
+// __mips64__ is defined by the GCC compiler for MIPS processors.
+// __powerpc64__ is defined by the GCC compiler for PowerPC processors.
+// __64BIT__ is defined by the AIX compiler for 64 bit processors.
+// __sizeof_ptr is defined by the ARM compiler (armcc, armcpp).
+//
+#ifndef EA_PLATFORM_PTR_SIZE
+	#if defined(__WORDSIZE) // Defined by some variations of GCC.
+		#define EA_PLATFORM_PTR_SIZE ((__WORDSIZE) / 8)
+	#elif defined(_WIN64) || defined(__LP64__) || defined(_LP64) || defined(_M_IA64) || defined(__ia64__) || defined(__arch64__) || defined(__aarch64__) || defined(__mips64__) || defined(__64BIT__) || defined(__Ptr_Is_64)
+		#define EA_PLATFORM_PTR_SIZE 8
+	#elif defined(__CC_ARM) && (__sizeof_ptr == 8)
+		#define EA_PLATFORM_PTR_SIZE 8
+	#else
+		#define EA_PLATFORM_PTR_SIZE 4
+	#endif
+#endif
+
+
+
+// EA_PLATFORM_WORD_SIZE
+// This defines the size of a machine word. This will be the same as
+// the size of registers on the machine but not necessarily the same
+// as the size of pointers on the machine. A number of 64 bit platforms
+// have 64 bit registers but 32 bit pointers.
+//
+#ifndef EA_PLATFORM_WORD_SIZE
+	#define EA_PLATFORM_WORD_SIZE EA_PLATFORM_PTR_SIZE
+#endif
+
+// EA_PLATFORM_MIN_MALLOC_ALIGNMENT
+// This defines the minimal alignment that the platform's malloc 
+// implementation will return. This should be used when writing custom
+// allocators to ensure that the alignment matches that of malloc
+#ifndef EA_PLATFORM_MIN_MALLOC_ALIGNMENT
+	#if defined(EA_PLATFORM_APPLE)
+		#define EA_PLATFORM_MIN_MALLOC_ALIGNMENT 16
+	#elif defined(EA_PLATFORM_ANDROID) && defined(EA_PROCESSOR_ARM)
+		#define EA_PLATFORM_MIN_MALLOC_ALIGNMENT 8
+	#elif defined(EA_PLATFORM_ANDROID) && defined(EA_PROCESSOR_X86_64)
+		#define EA_PLATFORM_MIN_MALLOC_ALIGNMENT 8
+	#else
+		#define EA_PLATFORM_MIN_MALLOC_ALIGNMENT (EA_PLATFORM_PTR_SIZE * 2)
+	#endif
+#endif
+
+
+// EA_MISALIGNED_SUPPORT_LEVEL
+// Specifies if the processor can read and write built-in types that aren't
+// naturally aligned.
+//    0 - not supported. Likely causes an exception.
+//    1 - supported but slow.
+//    2 - supported and fast.
+//
+#ifndef EA_MISALIGNED_SUPPORT_LEVEL
+	#if defined(EA_PROCESSOR_X86_64)
+		#define EA_MISALIGNED_SUPPORT_LEVEL 2
+	#else
+		#define EA_MISALIGNED_SUPPORT_LEVEL 0
+	#endif
+#endif
+
+// Macro to determine if a Windows API partition is enabled. Always false on non Microsoft platforms.
+#if !defined(EA_WINAPI_FAMILY_PARTITION)
+	#define EA_WINAPI_FAMILY_PARTITION(Partition) (0)
+#endif
+
+
+// EA_CACHE_LINE_SIZE
+// Specifies the cache line size broken down by compile target.
+// This the expected best guess values for the targets that we can make at compilation time.
+
+#ifndef EA_CACHE_LINE_SIZE
+	#if   defined(EA_PROCESSOR_X86)      
+		#define EA_CACHE_LINE_SIZE 32    // This is the minimum possible value.
+	#elif defined(EA_PROCESSOR_X86_64)  
+		#define EA_CACHE_LINE_SIZE 64    // This is the minimum possible value
+	#elif defined(EA_PROCESSOR_ARM32)
+		#define EA_CACHE_LINE_SIZE 32    // This varies between implementations and is usually 32 or 64. 
+	#elif defined(EA_PROCESSOR_ARM64)
+		#define EA_CACHE_LINE_SIZE 64    // Cache line Cortex-A8  (64 bytes) http://shervinemami.info/armAssembly.html however this remains to be mostly an assumption at this stage
+	#elif (EA_PLATFORM_WORD_SIZE == 4)
+		#define EA_CACHE_LINE_SIZE 32    // This is the minimum possible value
+	#else
+		#define EA_CACHE_LINE_SIZE 64    // This is the minimum possible value
+	#endif
+#endif
+
+
+#endif // INCLUDED_eaplatform_H
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EABase/eabase.h b/libkram/eastl/include/EABase/eabase.h
new file mode 100644
index 00000000..dab9e467
--- /dev/null
+++ b/libkram/eastl/include/EABase/eabase.h
@@ -0,0 +1,1011 @@
+/*-----------------------------------------------------------------------------
+ * eabase.h
+ *
+ * Copyright (c) Electronic Arts Inc. All rights reserved.
+ *---------------------------------------------------------------------------*/
+
+
+#ifndef INCLUDED_eabase_H
+#define INCLUDED_eabase_H
+
+
+// Identify the compiler and declare the EA_COMPILER_xxxx defines
+#include <EABase/config/eacompiler.h>
+
+// Identify traits which this compiler supports, or does not support
+#include <EABase/config/eacompilertraits.h>
+
+// Identify the platform and declare the EA_xxxx defines
+#include <EABase/config/eaplatform.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+// Always include version.h for backwards compatibility.
+#include <EABase/version.h>
+
+// Define common SI unit macros
+#include <EABase/eaunits.h>
+
+
+// ------------------------------------------------------------------------
+// The C++ standard defines size_t as a built-in type. Some compilers are
+// not standards-compliant in this respect, so we need an additional include.
+// The case is similar with wchar_t under C++.
+
+#if defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_MSVC) || defined(EA_WCHAR_T_NON_NATIVE) || defined(EA_PLATFORM_SONY)
+	#if defined(EA_COMPILER_MSVC)
+		#pragma warning(push, 0)
+		#pragma warning(disable: 4265 4365 4836 4574)
+	#endif
+	#include <stddef.h>
+	#if defined(EA_COMPILER_MSVC)
+		#pragma warning(pop)
+	#endif
+#endif
+
+// ------------------------------------------------------------------------
+// Include stddef.h on Apple's clang compiler to ensure the ptrdiff_t type
+// is defined.
+#if defined(EA_COMPILER_CLANG) && defined(EA_PLATFORM_APPLE)
+	#include <stddef.h>
+#endif
+
+// ------------------------------------------------------------------------
+// Include assert.h on C11 supported compilers so we may allow static_assert usage
+// http://en.cppreference.com/w/c/error/static_assert
+// C11 standard(ISO / IEC 9899:2011) :
+// 7.2/3 Diagnostics <assert.h>(p : 186)
+#if !defined(__cplusplus) && defined(__STDC_VERSION__)  && __STDC_VERSION__ >= 201100L
+	#include <assert.h>
+#endif
+
+
+// ------------------------------------------------------------------------
+// By default, GCC defines NULL as ((void*)0), which is the
+// C definition. This causes all sort of problems for C++ code, so it is
+// worked around by undefining NULL.
+
+#if defined(NULL)
+	#undef NULL
+#endif
+
+
+// ------------------------------------------------------------------------
+// Define the NULL pointer. This is normally defined in <stddef.h>, but we
+// don't want to force a global dependency on that header, so the definition
+// is duplicated here.
+
+#if defined(__cplusplus)
+	#define NULL 0
+#else
+	#define NULL ((void*)0)
+#endif
+
+
+// ------------------------------------------------------------------------
+// C98/99 Standard typedefs. From the ANSI ISO/IEC 9899 standards document
+// Most recent versions of the gcc-compiler come with these defined in
+// inttypes.h or stddef.h. Determining if they are predefined can be
+// tricky, so we expect some problems on non-standard compilers
+
+//#if (defined(_INTTYPES_H) || defined(_INTTYPES_H_)) && !defined(PRId64)
+//    #error "<inttypes.h> was #included before eabase.h, but without __STDC_FORMAT_MACROS #defined. You must #include eabase.h or an equivalent before #including C99 headers, or you must define __STDC_FORMAT_MACRO before #including system headrs."
+//#endif
+
+// ------------------------------------------------------------------------
+// We need to test this after we potentially include stddef.h, otherwise we
+// would have put this into the compilertraits header.
+#if !defined(EA_COMPILER_HAS_INTTYPES) && (!defined(_MSC_VER) || (_MSC_VER > 1500)) && (defined(EA_COMPILER_IS_C99) || defined(INT8_MIN) || defined(EA_COMPILER_HAS_C99_TYPES) || defined(_SN_STDINT_H))
+	#define EA_COMPILER_HAS_INTTYPES
+#endif
+
+#ifdef EA_COMPILER_HAS_INTTYPES // If the compiler supports inttypes...
+	// ------------------------------------------------------------------------
+	// Include the stdint header to define and derive the required types.
+	// Additionally include inttypes.h as many compilers, including variations
+	// of GCC define things in inttypes.h that the C99 standard says goes 
+	// in stdint.h.
+	//
+	// The C99 standard specifies that inttypes.h only define printf/scanf 
+	// format macros if __STDC_FORMAT_MACROS is defined before #including
+	// inttypes.h. For consistency, we do that here.
+	#ifndef __STDC_FORMAT_MACROS
+	   #define __STDC_FORMAT_MACROS
+	#endif
+	// The GCC PSP compiler defines standard int types (e.g. uint32_t) but not PRId8, etc.
+	// MSVC added support for inttypes.h header in VS2013.
+	#if !defined(EA_COMPILER_MSVC) || (defined(EA_COMPILER_MSVC) && EA_COMPILER_VERSION >= 1800)
+		#include <inttypes.h> // PRId8, SCNd8, etc.
+	#endif
+	#if defined(_MSC_VER)
+		#pragma warning(push, 0)
+	#endif
+	#include <stdint.h>   // int32_t, INT64_C, UINT8_MAX, etc.
+	#include <math.h>     // float_t, double_t, etc.
+	#include <float.h>    // FLT_EVAL_METHOD.
+	#if defined(_MSC_VER)
+		#pragma warning(pop)
+	#endif
+
+	#if !defined(FLT_EVAL_METHOD) && (defined(__FLT_EVAL_METHOD__) || defined(_FEVAL)) // GCC 3.x defines __FLT_EVAL_METHOD__ instead of the C99 standard FLT_EVAL_METHOD.
+		#ifdef __FLT_EVAL_METHOD__
+			#define FLT_EVAL_METHOD __FLT_EVAL_METHOD__
+		#else
+			#define FLT_EVAL_METHOD _FEVAL
+		#endif
+	#endif
+
+	// MinGW GCC (up to at least v4.3.0-20080502) mistakenly neglects to define float_t and double_t.
+	// This appears to be an acknowledged bug as of March 2008 and is scheduled to be fixed.
+	// Similarly, Android uses a mix of custom standard library headers which prior to SDK API level 21
+	// don't define float_t and double_t.
+	#if defined(__MINGW32__) || (defined(EA_PLATFORM_ANDROID) && !(defined(EA_ANDROID_SDK_LEVEL) && EA_ANDROID_SDK_LEVEL >= 21))
+		#if defined(__FLT_EVAL_METHOD__)  
+			#if(__FLT_EVAL_METHOD__== 0)
+				typedef float float_t;
+				typedef double double_t;
+			#elif(__FLT_EVAL_METHOD__ == 1)
+				typedef double float_t;
+				typedef double double_t;
+			#elif(__FLT_EVAL_METHOD__ == 2)
+				typedef long double float_t;
+				typedef long double double_t;
+			#endif
+		#else
+			typedef float  float_t;
+			typedef double double_t;
+		#endif
+	#endif 
+
+	// The CodeSourcery definitions of PRIxPTR and SCNxPTR are broken for 32 bit systems.
+	#if defined(__SIZEOF_SIZE_T__) && (__SIZEOF_SIZE_T__ == 4) && (defined(__have_long64) || defined(__have_longlong64))
+		#undef  PRIdPTR
+		#define PRIdPTR "d"
+		#undef  PRIiPTR
+		#define PRIiPTR "i"
+		#undef  PRIoPTR
+		#define PRIoPTR "o"
+		#undef  PRIuPTR
+		#define PRIuPTR "u"
+		#undef  PRIxPTR
+		#define PRIxPTR "x"
+		#undef  PRIXPTR
+		#define PRIXPTR "X"
+
+		#undef  SCNdPTR
+		#define SCNdPTR "d"
+		#undef  SCNiPTR
+		#define SCNiPTR "i"
+		#undef  SCNoPTR
+		#define SCNoPTR "o"
+		#undef  SCNuPTR
+		#define SCNuPTR "u"
+		#undef  SCNxPTR
+		#define SCNxPTR "x"
+	#endif
+#else // else we must implement types ourselves.
+
+	#if !defined(__BIT_TYPES_DEFINED__) && !defined(__int8_t_defined)
+		typedef signed char             int8_t;             //< 8 bit signed integer
+	#endif
+	#if !defined( __int8_t_defined )
+		typedef signed short            int16_t;            //< 16 bit signed integer
+		typedef signed int              int32_t;            //< 32 bit signed integer. This works for both 32 bit and 64 bit platforms, as we assume the LP64 is followed.
+		#define __int8_t_defined
+	#endif
+		typedef unsigned char           uint8_t;            //< 8 bit unsigned integer
+		typedef unsigned short         uint16_t;            //< 16 bit unsigned integer
+	#if !defined( __uint32_t_defined )
+		typedef unsigned int           uint32_t;            //< 32 bit unsigned integer. This works for both 32 bit and 64 bit platforms, as we assume the LP64 is followed.
+		#define __uint32_t_defined
+	#endif
+
+	// According to the C98/99 standard, FLT_EVAL_METHOD defines control the 
+	// width used for floating point _t types.
+	#if defined(_MSC_VER) && _MSC_VER >= 1800
+		// MSVC's math.h provides float_t, double_t under this condition.
+	#elif defined(FLT_EVAL_METHOD)
+		#if (FLT_EVAL_METHOD == 0)
+			typedef float           float_t;
+			typedef double          double_t;
+		#elif (FLT_EVAL_METHOD == 1)
+			typedef double          float_t;
+			typedef double          double_t;
+		#elif (FLT_EVAL_METHOD == 2)
+			typedef long double     float_t;
+			typedef long double     double_t;
+		#endif
+	#endif
+	
+   #if   defined(EA_COMPILER_MSVC) 
+	   typedef signed __int64      int64_t;
+	   typedef unsigned __int64    uint64_t;
+
+   #else
+	   typedef signed long long    int64_t;
+	   typedef unsigned long long  uint64_t;
+   #endif
+#endif
+
+
+// ------------------------------------------------------------------------
+// macros for declaring constants in a portable way.
+//
+// e.g. int64_t  x =  INT64_C(1234567812345678);
+// e.g. int64_t  x =  INT64_C(0x1111111122222222);
+// e.g. uint64_t x = UINT64_C(0x1111111122222222);
+//
+// Microsoft VC++'s definitions of INT8_C/UINT8_C/INT16_C/UINT16_C are like so:
+//    #define INT8_C(x)     (x)
+//    #define INT16_C(x)    (x)
+//    #define UINT8_C(x)    (x)
+//    #define UINT16_C(x)   (x)
+// To consider: undefine Microsoft's and use the casting versions below.
+// ------------------------------------------------------------------------
+
+#ifndef INT8_C_DEFINED // If the user hasn't already defined these...
+	#define INT8_C_DEFINED
+
+	#ifndef INT8_C
+		#define   INT8_C(x)    int8_t(x)   // For the majority of compilers and platforms, long is 32 bits and long long is 64 bits.
+	#endif
+	#ifndef UINT8_C
+		#define  UINT8_C(x)   uint8_t(x)
+	#endif
+	#ifndef INT16_C
+		#define  INT16_C(x)   int16_t(x)
+	#endif
+	#ifndef UINT16_C
+		#define UINT16_C(x)  uint16_t(x)     // Possibly we should make this be uint16_t(x##u). Let's see how compilers react before changing this.
+	#endif
+	#ifndef INT32_C
+	   #define  INT32_C(x)  x##L
+	#endif
+	#ifndef UINT32_C
+	   #define UINT32_C(x)  x##UL
+	#endif
+	#ifndef INT64_C
+		#define  INT64_C(x)  x##LL         // The way to deal with this is to compare ULONG_MAX to 0xffffffff and if not equal, then remove the L.
+	#endif
+	#ifndef UINT64_C
+		#define UINT64_C(x)  x##ULL        // We need to follow a similar approach for LL.
+	#endif
+   #ifndef UINTMAX_C
+	   #define UINTMAX_C(x) UINT64_C(x)
+   #endif
+#endif
+
+// ------------------------------------------------------------------------
+// type sizes
+#ifndef INT8_MAX_DEFINED // If the user hasn't already defined these...
+	#define INT8_MAX_DEFINED
+
+	// The value must be 2^(n-1)-1
+	#ifndef INT8_MAX
+		#define INT8_MAX                127
+	#endif
+	#ifndef INT16_MAX
+		#define INT16_MAX               32767
+	#endif
+	#ifndef INT32_MAX
+		#define INT32_MAX               2147483647
+	#endif
+	#ifndef INT64_MAX
+		#define INT64_MAX               INT64_C(9223372036854775807)
+	#endif
+	#ifndef INTMAX_MAX
+	   #define INTMAX_MAX               INT64_MAX
+	#endif
+	#ifndef INTPTR_MAX
+		#if EA_PLATFORM_PTR_SIZE == 4
+			#define INTPTR_MAX          INT32_MAX
+	   	#else
+			#define INTPTR_MAX          INT64_MAX
+		#endif
+	#endif
+
+	// The value must be either -2^(n-1) or 1-2(n-1).
+	#ifndef INT8_MIN
+		#define INT8_MIN                -128
+	#endif
+	#ifndef INT16_MIN
+		#define INT16_MIN               -32768
+	#endif
+	#ifndef INT32_MIN
+		#define INT32_MIN               (-INT32_MAX - 1)  // -2147483648
+	#endif
+	#ifndef INT64_MIN
+		#define INT64_MIN               (-INT64_MAX - 1)  // -9223372036854775808
+	#endif
+	#ifndef INTMAX_MIN
+	   #define INTMAX_MIN               INT64_MIN
+	#endif
+	#ifndef INTPTR_MIN
+		#if EA_PLATFORM_PTR_SIZE == 4
+			#define INTPTR_MIN          INT32_MIN
+	   	#else
+			#define INTPTR_MIN          INT64_MIN
+		#endif
+	#endif
+
+	// The value must be 2^n-1
+	#ifndef UINT8_MAX
+		#define UINT8_MAX               0xffU                        // 255
+	#endif
+	#ifndef UINT16_MAX
+		#define UINT16_MAX              0xffffU                      // 65535
+	#endif
+	#ifndef UINT32_MAX
+		#define UINT32_MAX              UINT32_C(0xffffffff)         // 4294967295
+	#endif
+	#ifndef UINT64_MAX
+		#define UINT64_MAX              UINT64_C(0xffffffffffffffff) // 18446744073709551615 
+	#endif
+	#ifndef UINTMAX_MAX
+	   #define UINTMAX_MAX              UINT64_MAX
+	#endif
+	#ifndef UINTPTR_MAX
+		#if EA_PLATFORM_PTR_SIZE == 4
+			#define UINTPTR_MAX          UINT32_MAX
+	   	#else
+			#define UINTPTR_MAX          UINT64_MAX
+		#endif
+	#endif
+#endif
+
+#ifndef FLT_EVAL_METHOD
+	#define FLT_EVAL_METHOD 0
+	typedef float               float_t;
+	typedef double              double_t;
+#endif
+
+#if defined(EA_COMPILER_HAS_INTTYPES) && (!defined(EA_COMPILER_MSVC) || (defined(EA_COMPILER_MSVC) && EA_COMPILER_VERSION >= 1800))
+	#define EA_COMPILER_HAS_C99_FORMAT_MACROS 
+#endif
+
+#ifndef EA_COMPILER_HAS_C99_FORMAT_MACROS 
+	// ------------------------------------------------------------------------
+	// sized printf and scanf format specifiers
+	// See the C99 standard, section 7.8.1 -- Macros for format specifiers.
+	//
+	// The C99 standard specifies that inttypes.h only define printf/scanf 
+	// format macros if __STDC_FORMAT_MACROS is defined before #including
+	// inttypes.h. For consistency, we define both __STDC_FORMAT_MACROS and
+	// the printf format specifiers here. We also skip the "least/most" 
+	// variations of these specifiers, as we've decided to do so with 
+	// basic types.
+	//
+	// For 64 bit systems, we assume the LP64 standard is followed 
+	// (as opposed to ILP64, etc.) For 32 bit systems, we assume the 
+	// ILP32 standard is followed. See:
+	//    http://www.opengroup.org/public/tech/aspen/lp64_wp.htm 
+	// for information about this. Thus, on both 32 and 64 bit platforms, 
+	// %l refers to 32 bit data while %ll refers to 64 bit data. 
+
+	#ifndef __STDC_FORMAT_MACROS
+	   #define __STDC_FORMAT_MACROS
+	#endif
+
+	#if defined(EA_COMPILER_MSVC) // VC++ 7.1+ understands long long as a data type but doesn't accept %ll as a printf specifier.
+		#define EA_PRI_64_LENGTH_SPECIFIER "I64"
+		#define EA_SCN_64_LENGTH_SPECIFIER "I64"
+	#else
+		#define EA_PRI_64_LENGTH_SPECIFIER "ll"
+		#define EA_SCN_64_LENGTH_SPECIFIER "ll"
+	#endif // It turns out that some platforms use %q to represent a 64 bit value, but these are not relevant to us at this time.
+
+	// Printf format specifiers
+	#if defined(EA_COMPILER_IS_C99) || defined(EA_COMPILER_GNUC)
+		#define PRId8     "hhd"
+		#define PRIi8     "hhi"
+		#define PRIo8     "hho"
+		#define PRIu8     "hhu"
+		#define PRIx8     "hhx"
+		#define PRIX8     "hhX"
+	#else // VC++, Borland, etc. which have no way to specify 8 bit values other than %c.
+		#define PRId8     "c"  // This may not work properly but it at least will not crash. Try using 16 bit versions instead.
+		#define PRIi8     "c"  //  "
+		#define PRIo8     "o"  //  "
+		#define PRIu8     "u"  //  "
+		#define PRIx8     "x"  //  "
+		#define PRIX8     "X"  //  "
+	#endif
+
+	#define PRId16        "hd"
+	#define PRIi16        "hi"
+	#define PRIo16        "ho"
+	#define PRIu16        "hu"
+	#define PRIx16        "hx"
+	#define PRIX16        "hX"
+
+	#define PRId32        "d" // This works for both 32 bit and 64 bit systems, as we assume LP64 conventions.
+	#define PRIi32        "i"
+	#define PRIo32        "o"
+	#define PRIu32        "u"
+	#define PRIx32        "x"
+	#define PRIX32        "X"
+
+	#define PRId64        EA_PRI_64_LENGTH_SPECIFIER "d"
+	#define PRIi64        EA_PRI_64_LENGTH_SPECIFIER "i"
+	#define PRIo64        EA_PRI_64_LENGTH_SPECIFIER "o"
+	#define PRIu64        EA_PRI_64_LENGTH_SPECIFIER "u"
+	#define PRIx64        EA_PRI_64_LENGTH_SPECIFIER "x"
+	#define PRIX64        EA_PRI_64_LENGTH_SPECIFIER "X"
+
+	#if (EA_PLATFORM_PTR_SIZE == 4)
+		#define PRIdPTR       PRId32 // Usage of pointer values will generate warnings with 
+		#define PRIiPTR       PRIi32 // some compilers because they are defined in terms of 
+		#define PRIoPTR       PRIo32 // integers. However, you can't simply use "p" because
+		#define PRIuPTR       PRIu32 // 'p' is interpreted in a specific and often different
+		#define PRIxPTR       PRIx32 // way by the library.
+		#define PRIXPTR       PRIX32
+	#elif (EA_PLATFORM_PTR_SIZE == 8)
+		#define PRIdPTR       PRId64
+		#define PRIiPTR       PRIi64
+		#define PRIoPTR       PRIo64
+		#define PRIuPTR       PRIu64
+		#define PRIxPTR       PRIx64
+		#define PRIXPTR       PRIX64
+	#endif
+
+	// Scanf format specifiers
+	#if defined(EA_COMPILER_IS_C99) || defined(EA_COMPILER_GNUC)
+		#define SCNd8     "hhd"
+		#define SCNi8     "hhi"
+		#define SCNo8     "hho"
+		#define SCNu8     "hhu"
+		#define SCNx8     "hhx"
+	#else // VC++, Borland, etc. which have no way to specify 8 bit values other than %c.
+		#define SCNd8     "c" // This will not work properly but it at least will not crash. Try using 16 bit versions instead.
+		#define SCNi8     "c" //  "
+		#define SCNo8     "c" //  "
+		#define SCNu8     "c" //  "
+		#define SCNx8     "c" //  "
+	#endif
+
+	#define SCNd16        "hd"
+	#define SCNi16        "hi"
+	#define SCNo16        "ho"
+	#define SCNu16        "hu"
+	#define SCNx16        "hx"
+
+	#define SCNd32        "d" // This works for both 32 bit and 64 bit systems, as we assume LP64 conventions.
+	#define SCNi32        "i"
+	#define SCNo32        "o"
+	#define SCNu32        "u"
+	#define SCNx32        "x"
+
+	#define SCNd64        EA_SCN_64_LENGTH_SPECIFIER "d"
+	#define SCNi64        EA_SCN_64_LENGTH_SPECIFIER "i"
+	#define SCNo64        EA_SCN_64_LENGTH_SPECIFIER "o"
+	#define SCNu64        EA_SCN_64_LENGTH_SPECIFIER "u"
+	#define SCNx64        EA_SCN_64_LENGTH_SPECIFIER "x"
+
+	#if defined(EA_COMPILER_MSVC) && (EA_COMPILER_VERSION >= 1900)
+		#define SCNdPTR       PRIdPTR
+		#define SCNiPTR       PRIiPTR
+		#define SCNoPTR       PRIoPTR
+		#define SCNuPTR       PRIuPTR
+		#define SCNxPTR       PRIxPTR
+	#elif (EA_PLATFORM_PTR_SIZE == 4)
+		#define SCNdPTR       SCNd32 // Usage of pointer values will generate warnings with 
+		#define SCNiPTR       SCNi32 // some compilers because they are defined in terms of 
+		#define SCNoPTR       SCNo32 // integers. However, you can't simply use "p" because
+		#define SCNuPTR       SCNu32 // 'p' is interpreted in a specific and often different
+		#define SCNxPTR       SCNx32 // way by the library.
+	#elif (EA_PLATFORM_PTR_SIZE == 8)
+		#define SCNdPTR       SCNd64
+		#define SCNiPTR       SCNi64
+		#define SCNoPTR       SCNo64
+		#define SCNuPTR       SCNu64
+		#define SCNxPTR       SCNx64
+	#endif
+#endif
+
+
+// ------------------------------------------------------------------------
+// bool8_t
+// The definition of a bool8_t is controversial with some, as it doesn't 
+// act just like built-in bool. For example, you can assign -100 to it.
+// 
+#ifndef BOOL8_T_DEFINED // If the user hasn't already defined this...
+	#define BOOL8_T_DEFINED
+	#if defined(EA_COMPILER_MSVC) || (defined(EA_COMPILER_INTEL) && defined(EA_PLATFORM_WINDOWS))
+		#if defined(__cplusplus)
+			typedef bool bool8_t;
+		#else
+			typedef int8_t bool8_t;
+		#endif
+	#else // EA_COMPILER_GNUC generally uses 4 bytes per bool.
+		typedef int8_t bool8_t;
+	#endif
+#endif
+
+
+// ------------------------------------------------------------------------
+// intptr_t / uintptr_t
+// Integer type guaranteed to be big enough to hold
+// a native pointer ( intptr_t is defined in STDDEF.H )
+//
+#if !defined(_INTPTR_T_DEFINED) && !defined(_intptr_t_defined) && !defined(EA_COMPILER_HAS_C99_TYPES)
+	#if (EA_PLATFORM_PTR_SIZE == 4)
+		typedef int32_t            intptr_t;
+	#elif (EA_PLATFORM_PTR_SIZE == 8)
+		typedef int64_t            intptr_t;
+	#endif 
+
+	#define _intptr_t_defined
+	#define _INTPTR_T_DEFINED
+#endif
+
+#if !defined(_UINTPTR_T_DEFINED) && !defined(_uintptr_t_defined) && !defined(EA_COMPILER_HAS_C99_TYPES)
+	#if (EA_PLATFORM_PTR_SIZE == 4)
+		typedef uint32_t           uintptr_t;
+	#elif (EA_PLATFORM_PTR_SIZE == 8)
+		typedef uint64_t           uintptr_t;
+	#endif 
+
+	#define _uintptr_t_defined
+	#define _UINTPTR_T_DEFINED
+#endif
+
+#if !defined(EA_COMPILER_HAS_INTTYPES)
+	#ifndef INTMAX_T_DEFINED
+		#define INTMAX_T_DEFINED
+
+		// At this time, all supported compilers have int64_t as the max
+		// integer type. Some compilers support a 128 bit integer type,
+		// but in some cases it is not a true int128_t but rather a 
+		// crippled data type. Also, it turns out that Unix 64 bit ABIs 
+		// require that intmax_t be int64_t and nothing larger. So we 
+		// play it safe here and set intmax_t to int64_t, even though 
+		// an int128_t type may exist.
+
+		typedef int64_t  intmax_t;
+		typedef uint64_t uintmax_t;
+	#endif
+#endif
+
+
+// ------------------------------------------------------------------------
+// ssize_t
+// signed equivalent to size_t.
+// This is defined by GCC (except the QNX implementation of GCC) but not by other compilers.
+//
+#if !defined(__GNUC__)
+	// As of this writing, all non-GCC compilers significant to us implement 
+	// uintptr_t the same as size_t. However, this isn't guaranteed to be 
+	// so for all compilers, as size_t may be based on int, long, or long long.
+	#if !defined(_SSIZE_T_) && !defined(_SSIZE_T_DEFINED)
+		#define _SSIZE_T_
+		#define _SSIZE_T_DEFINED
+
+		#if defined(_MSC_VER) && (EA_PLATFORM_PTR_SIZE == 8)
+			typedef __int64 ssize_t;
+		#else
+			typedef long ssize_t;
+		#endif
+	#endif
+#else
+	#include <sys/types.h>
+#endif
+
+
+// ------------------------------------------------------------------------
+// Character types
+//
+#if defined(EA_COMPILER_MSVC)
+	#if defined(EA_WCHAR_T_NON_NATIVE)
+	   // In this case, wchar_t is not defined unless we include 
+	   // wchar.h or if the compiler makes it built-in.
+	   #ifdef EA_COMPILER_MSVC
+		  #pragma warning(push, 3)
+	   #endif
+	   #include <wchar.h>
+	   #ifdef EA_COMPILER_MSVC
+		  #pragma warning(pop)
+	   #endif
+	#endif
+#endif
+
+
+// ------------------------------------------------------------------------
+// char8_t  -- Guaranteed to be equal to the compiler's char data type.
+//             Some compilers implement char8_t as unsigned, though char 
+//             is usually set to be signed.
+//
+// char16_t -- This is set to be an unsigned 16 bit value. If the compiler
+//             has wchar_t as an unsigned 16 bit value, then char16_t is 
+//             set to be the same thing as wchar_t in order to allow the 
+//             user to use char16_t with standard wchar_t functions.
+//
+// char32_t -- This is set to be an unsigned 32 bit value. If the compiler
+//             has wchar_t as an unsigned 32 bit value, then char32_t is 
+//             set to be the same thing as wchar_t in order to allow the 
+//             user to use char32_t with standard wchar_t functions.
+//
+// EA_CHAR8_UNIQUE
+// EA_CHAR16_NATIVE
+// EA_CHAR32_NATIVE
+// EA_WCHAR_UNIQUE
+//
+// VS2010 unilaterally defines char16_t and char32_t in its yvals.h header
+// unless _HAS_CHAR16_T_LANGUAGE_SUPPORT or _CHAR16T are defined. 
+// However, VS2010 does not support the C++0x u"" and U"" string literals, 
+// which makes its definition of char16_t and char32_t somewhat useless. 
+// Until VC++ supports string literals, the build system should define 
+// _CHAR16T and let EABase define char16_t and EA_CHAR16.
+//
+// GCC defines char16_t and char32_t in the C compiler in -std=gnu99 mode, 
+// as __CHAR16_TYPE__ and __CHAR32_TYPE__, and for the C++ compiler 
+// in -std=c++0x and -std=gnu++0x modes, as char16_t and char32_t too.
+//
+// The EA_WCHAR_UNIQUE symbol is defined to 1 if wchar_t is distinct from
+// char8_t, char16_t, and char32_t, and defined to 0 if not. In some cases, 
+// if the compiler does not support char16_t/char32_t, one of these two types 
+// is typically a typedef or define of wchar_t. For compilers that support 
+// the C++11 unicode character types often overloads must be provided to 
+// support existing code that passes a wide char string to a function that 
+// takes a unicode string.
+//
+// The EA_CHAR8_UNIQUE symbol is defined to 1 if char8_t is distinct type 
+// from char in the type system, and defined to 0 if otherwise.
+
+#if !defined(EA_CHAR16_NATIVE)
+	// To do: Change this to be based on EA_COMPILER_NO_NEW_CHARACTER_TYPES.
+	#if defined(_MSC_VER) && (_MSC_VER >= 1600) && defined(_HAS_CHAR16_T_LANGUAGE_SUPPORT) && _HAS_CHAR16_T_LANGUAGE_SUPPORT // VS2010+
+		#define EA_CHAR16_NATIVE 1
+	#elif defined(EA_COMPILER_CLANG) && defined(EA_COMPILER_CPP11_ENABLED)
+		#if __has_feature(cxx_unicode_literals)
+			#define EA_CHAR16_NATIVE 1
+		#elif (EA_COMPILER_VERSION >= 300) && !(defined(EA_PLATFORM_IPHONE) || defined(EA_PLATFORM_OSX))
+			#define EA_CHAR16_NATIVE 1
+		#elif defined(EA_PLATFORM_APPLE)
+			#define EA_CHAR16_NATIVE 1 
+		#else
+			#define EA_CHAR16_NATIVE 0
+		#endif
+	#elif defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 404) && defined(__CHAR16_TYPE__) && defined(EA_COMPILER_CPP11_ENABLED)// EDG 4.4+.
+		#define EA_CHAR16_NATIVE 1
+	#elif defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4004) && !defined(EA_COMPILER_EDG) && (defined(EA_COMPILER_CPP11_ENABLED) || defined(__STDC_VERSION__)) // g++ (C++ compiler) 4.4+ with -std=c++0x or gcc (C compiler) 4.4+ with -std=gnu99
+		#define EA_CHAR16_NATIVE 1
+	#else
+		#define EA_CHAR16_NATIVE 0
+	#endif
+#endif
+
+#if !defined(EA_CHAR32_NATIVE)                    // Microsoft currently ties char32_t language support to char16_t language support. So we use CHAR16_T here.
+	// To do: Change this to be based on EA_COMPILER_NO_NEW_CHARACTER_TYPES.
+	#if defined(_MSC_VER) && (_MSC_VER >= 1600) && defined(_HAS_CHAR16_T_LANGUAGE_SUPPORT) && _HAS_CHAR16_T_LANGUAGE_SUPPORT // VS2010+
+		#define EA_CHAR32_NATIVE 1
+	#elif defined(EA_COMPILER_CLANG) && defined(EA_COMPILER_CPP11_ENABLED)
+		#if __has_feature(cxx_unicode_literals)
+			#define EA_CHAR32_NATIVE 1
+		#elif (EA_COMPILER_VERSION >= 300) && !(defined(EA_PLATFORM_IPHONE) || defined(EA_PLATFORM_OSX))
+			#define EA_CHAR32_NATIVE 1
+		#elif defined(EA_PLATFORM_APPLE)
+			#define EA_CHAR32_NATIVE 1 
+		#else
+			#define EA_CHAR32_NATIVE 0
+		#endif
+	#elif defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 404) && defined(__CHAR32_TYPE__) && defined(EA_COMPILER_CPP11_ENABLED)// EDG 4.4+.
+		#define EA_CHAR32_NATIVE 1
+	#elif defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4004) && !defined(EA_COMPILER_EDG) && (defined(EA_COMPILER_CPP11_ENABLED) || defined(__STDC_VERSION__)) // g++ (C++ compiler) 4.4+ with -std=c++0x or gcc (C compiler) 4.4+ with -std=gnu99
+		#define EA_CHAR32_NATIVE 1
+	#else
+		#define EA_CHAR32_NATIVE 0
+	#endif
+#endif
+
+
+#if EA_CHAR16_NATIVE || EA_CHAR32_NATIVE
+	#define EA_WCHAR_UNIQUE 1
+#else
+	#define EA_WCHAR_UNIQUE 0
+#endif
+
+
+// EA_CHAR8_UNIQUE
+//
+// Check for char8_t support in the cpp type system. Moving forward from c++20,
+// the char8_t type allows users to overload function for character encoding.   
+//
+// EA_CHAR8_UNIQUE is 1 when the type is a unique in the type system and 
+// can there be used as a valid overload. EA_CHAR8_UNIQUE is 0 otherwise.
+//
+// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p0482r6.html
+//
+#ifdef __cpp_char8_t
+	#define CHAR8_T_DEFINED
+	#define EA_CHAR8_UNIQUE 1
+#else
+	#define EA_CHAR8_UNIQUE 0
+#endif
+
+
+#ifndef CHAR8_T_DEFINED // If the user hasn't already defined these...
+	#define CHAR8_T_DEFINED
+	#if defined(EA_PLATFORM_APPLE)
+		#define char8_t char    // The Apple debugger is too stupid to realize char8_t is typedef'd to char, so we #define it.
+	#else
+		typedef char char8_t;
+	#endif
+	
+	#if EA_CHAR16_NATIVE
+		// In C++, char16_t and char32_t are already defined by the compiler.
+		// In MS C, char16_t and char32_t are already defined by the compiler/standard library.
+		// In GCC C, __CHAR16_TYPE__ and __CHAR32_TYPE__ are defined instead, and we must define char16_t and char32_t from these.
+		#if defined(__GNUC__) && !defined(__GXX_EXPERIMENTAL_CXX0X__) && defined(__CHAR16_TYPE__) // If using GCC and compiling in C...
+			typedef __CHAR16_TYPE__ char16_t;
+			typedef __CHAR32_TYPE__ char32_t;
+		#endif
+	#elif (EA_WCHAR_SIZE == 2)
+		#if (defined(_MSC_VER) && (_MSC_VER >= 1600)) // if VS2010+ or using platforms that use Dinkumware under a compiler that doesn't natively support C++11 char16_t.
+			#if !defined(_CHAR16T)
+				#define _CHAR16T
+			#endif
+			#if !defined(_HAS_CHAR16_T_LANGUAGE_SUPPORT) || !_HAS_CHAR16_T_LANGUAGE_SUPPORT
+				typedef wchar_t  char16_t;
+				typedef uint32_t char32_t;
+			#endif
+		#else
+			typedef wchar_t  char16_t;
+			typedef uint32_t char32_t;
+		#endif
+	#else
+		typedef uint16_t char16_t;
+		#if defined(__cplusplus)
+			typedef wchar_t  char32_t;
+		#else
+			typedef uint32_t char32_t;
+		#endif
+	#endif
+#endif
+
+
+// CHAR8_MIN, CHAR8_MAX, etc.
+//
+#define EA_LIMITS_DIGITS_S(T)  ((sizeof(T) * 8) - 1)
+#define EA_LIMITS_DIGITS_U(T)  ((sizeof(T) * 8))
+#define EA_LIMITS_DIGITS(T)    ((EA_LIMITS_IS_SIGNED(T) ? EA_LIMITS_DIGITS_S(T) : EA_LIMITS_DIGITS_U(T)))
+#define EA_LIMITS_IS_SIGNED(T) ((T)(-1) < 0)
+#define EA_LIMITS_MIN_S(T)     ((T)((T)1 << EA_LIMITS_DIGITS_S(T)))
+#define EA_LIMITS_MIN_U(T)     ((T)0)
+#define EA_LIMITS_MIN(T)       ((EA_LIMITS_IS_SIGNED(T) ? EA_LIMITS_MIN_S(T) : EA_LIMITS_MIN_U(T)))
+#define EA_LIMITS_MAX_S(T)     ((T)(((((T)1 << (EA_LIMITS_DIGITS(T) - 1)) - 1) << 1) + 1))
+#define EA_LIMITS_MAX_U(T)     ((T)~(T)0)
+#define EA_LIMITS_MAX(T)       ((EA_LIMITS_IS_SIGNED(T) ? EA_LIMITS_MAX_S(T) : EA_LIMITS_MAX_U(T)))
+
+#if !defined(CHAR8_MIN)
+	#define CHAR8_MIN EA_LIMITS_MIN(char8_t)
+#endif
+
+#if !defined(CHAR8_MAX)
+	#define CHAR8_MAX EA_LIMITS_MAX(char8_t)
+#endif
+
+#if !defined(CHAR16_MIN)
+	#define CHAR16_MIN EA_LIMITS_MIN(char16_t)
+#endif
+
+#if !defined(CHAR16_MAX)
+	#define CHAR16_MAX EA_LIMITS_MAX(char16_t)
+#endif
+
+#if !defined(CHAR32_MIN)
+	#define CHAR32_MIN EA_LIMITS_MIN(char32_t)
+#endif
+
+#if !defined(CHAR32_MAX)
+	#define CHAR32_MAX EA_LIMITS_MAX(char32_t)
+#endif
+
+
+
+// EA_CHAR8 / EA_CHAR16 / EA_CHAR32 / EA_WCHAR 
+//
+// Supports usage of portable string constants.
+//
+// Example usage:
+//     const char16_t* str = EA_CHAR16("Hello world");
+//     const char32_t* str = EA_CHAR32("Hello world");
+//     const char16_t  c   = EA_CHAR16('\x3001');
+//     const char32_t  c   = EA_CHAR32('\x3001');
+//
+#ifndef EA_CHAR8
+	#if EA_CHAR8_UNIQUE
+		#define EA_CHAR8(s) u8 ## s
+	#else
+		#define EA_CHAR8(s) s
+	#endif
+#endif
+
+#ifndef EA_WCHAR
+	#define EA_WCHAR_(s) L ## s
+	#define EA_WCHAR(s)  EA_WCHAR_(s)
+#endif
+
+#ifndef EA_CHAR16
+	#if EA_CHAR16_NATIVE && !defined(_MSC_VER) // Microsoft doesn't support char16_t string literals.
+		#define EA_CHAR16_(s) u ## s
+		#define EA_CHAR16(s)  EA_CHAR16_(s)
+	#elif (EA_WCHAR_SIZE == 2)
+		#if defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(__cplusplus) // VS2015 supports u"" string literals.
+			#define EA_CHAR16_(s) u ## s
+			#define EA_CHAR16(s)  EA_CHAR16_(s)
+		#else
+			#define EA_CHAR16_(s) L ## s
+			#define EA_CHAR16(s)  EA_CHAR16_(s)
+		#endif
+	#else
+		//#define EA_CHAR16(s) // Impossible to implement efficiently.
+	#endif
+#endif
+
+#ifndef EA_CHAR32
+	#if EA_CHAR32_NATIVE && !defined(_MSC_VER) // Microsoft doesn't support char32_t string literals.
+		#define EA_CHAR32_(s) U ## s
+		#define EA_CHAR32(s)  EA_CHAR32_(s)
+	#elif (EA_WCHAR_SIZE == 2)
+		#if defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(__cplusplus) // VS2015 supports u"" string literals.
+			#define EA_CHAR32_(s) U ## s
+			#define EA_CHAR32(s)  EA_CHAR32_(s)
+		#else
+			//#define EA_CHAR32(s) // Impossible to implement.
+		#endif
+	#elif (EA_WCHAR_SIZE == 4)
+		#define EA_CHAR32_(s) L ## s
+		#define EA_CHAR32(s)  EA_CHAR32_(s)
+	#else
+		#error Unexpected size of wchar_t
+	#endif
+#endif
+
+// EAText8 / EAText16
+//
+// Provided for backwards compatibility with older code.
+//
+#if defined(EABASE_ENABLE_EATEXT_MACROS)
+	#define EAText8(x)   x
+	#define EAChar8(x)   x
+
+	#define EAText16(x)  EA_CHAR16(x)
+	#define EAChar16(x)  EA_CHAR16(x)
+#endif
+
+
+
+
+// ------------------------------------------------------------------------
+// EAArrayCount
+//
+// Returns the count of items in a built-in C array. This is a common technique
+// which is often used to help properly calculate the number of items in an 
+// array at runtime in order to prevent overruns, etc.
+//
+// Example usage:
+//     int array[75];
+//     size_t arrayCount = EAArrayCount(array); // arrayCount is 75.
+//
+#if defined(EA_COMPILER_NO_CONSTEXPR)
+	#ifndef EAArrayCount
+		#define EAArrayCount(x) (sizeof(x) / sizeof(x[0]))
+	#endif
+#else
+	// This C++11 version is a little smarter than the macro version above; 
+	// it can tell the difference between arrays and pointers. Other simpler
+	// templated versions have failed in various subtle ways.
+
+	template <typename T, size_t N>
+	char (&EAArraySizeHelper(T (&x)[N]))[N];
+
+	template <typename T, size_t N>
+	char (&EAArraySizeHelper(T (&&x)[N]))[N];
+
+	#define EAArrayCount(x) (sizeof(EAArraySizeHelper(x)))
+#endif
+
+
+// ------------------------------------------------------------------------
+// static_assert
+//
+// C++11 static_assert (a.k.a. compile-time assert).
+//
+// Specification:
+//     void static_assert(bool const_expression, const char* description);
+//
+// Example usage:
+//     static_assert(sizeof(int) == 4, "int must be 32 bits");
+//
+#if defined(_MSC_VER) && (_MSC_VER >= 1600) && defined(__cplusplus)
+	// static_assert is defined by the compiler for both C and C++.
+#elif !defined(__cplusplus) && defined(EA_PLATFORM_ANDROID) && ((defined(__STDC_VERSION__) && __STDC_VERSION__ < 201100L) || !defined(__STDC_VERSION__))
+	// AndroidNDK does not support static_assert despite claiming it's a C11 compiler
+	#define NEED_CUSTOM_STATIC_ASSERT
+#elif defined(__clang__) && defined(__cplusplus)
+	// We need to separate these checks on a new line, as the pre-processor on other compilers will fail on the _has_feature macros
+	#if !(__has_feature(cxx_static_assert) || __has_extension(cxx_static_assert))
+		#define NEED_CUSTOM_STATIC_ASSERT
+	#endif
+#elif defined(__GNUC__) && (defined(__GXX_EXPERIMENTAL_CXX0X__) || (defined(__cplusplus) && (__cplusplus >= 201103L)))
+	// static_assert is defined by the compiler.
+#elif defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 401) && defined(EA_COMPILER_CPP11_ENABLED)
+	// static_assert is defined by the compiler.
+#elif !defined(__cplusplus) && defined(__GLIBC__) && defined(__USE_ISOC11)
+	// static_assert is defined by the compiler.
+#elif !defined(__cplusplus) && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201100L
+	// static_assert is defined by the compiler.
+#else
+	#define NEED_CUSTOM_STATIC_ASSERT
+#endif
+
+#ifdef NEED_CUSTOM_STATIC_ASSERT
+	#ifdef __GNUC__
+		// On GCC the 'unused' attribute can be used to indicate a typedef is not actually used
+		// (such as in the static_assert implementation below).  New versions of GCC generate
+		// warnings for unused typedefs in function/method scopes.
+		#define EA_STATIC_ASSERT_UNUSED_ATTRIBUTE        __attribute__((unused))
+	#else
+		#define EA_STATIC_ASSERT_UNUSED_ATTRIBUTE
+	#endif
+	#define EA_STATIC_ASSERT_TOKEN_PASTE(a,b)        a ## b
+	#define EA_STATIC_ASSERT_CONCATENATE_HELPER(a,b) EA_STATIC_ASSERT_TOKEN_PASTE(a,b)
+
+	#if defined(__COUNTER__) // If this extension is available, which allows multiple statements per line...
+		#define static_assert(expression, description) typedef char EA_STATIC_ASSERT_CONCATENATE_HELPER(compileTimeAssert,__COUNTER__) [((expression) != 0) ? 1 : -1] EA_STATIC_ASSERT_UNUSED_ATTRIBUTE
+	#else
+		#define static_assert(expression, description) typedef char EA_STATIC_ASSERT_CONCATENATE_HELPER(compileTimeAssert,__LINE__) [((expression) != 0) ? 1 : -1] EA_STATIC_ASSERT_UNUSED_ATTRIBUTE
+	#endif
+
+	#undef NEED_CUSTOM_STATIC_ASSERT
+#endif
+
+// ------------------------------------------------------------------------
+// EA_IS_ENABLED
+//
+// EA_IS_ENABLED is intended to be used for detecting if compile time features are enabled or disabled.
+//
+// It has some advantages over using a standard #if or #ifdef tests:
+//	1) Fails to compile when passes numeric macro values. Valid options are strictly enabled or disabled.
+//	2) Fails to compile when passed undefined macro values rather than disabling by default
+//	3) Fails to compile when the passed macro is defined to but empty
+//
+// To use the macro, the calling code should create a define for the feature to enable or disable.  This feature define
+// must be set to either EA_ENABLED or EA_DISABLED.  (Do not try to set the feature define directly to some other
+// value.)
+//
+// Note: These macros are analogous to the Frostbite macro FB_USING used in combination with FB_OFF / FB_ON and are
+// designed to be compatible to support gradual migration.
+//
+// Example usage:
+//
+//      // The USER_PROVIDED_FEATURE_DEFINE should be defined as either
+//      // EA_ENABLED or EA_DISABLED.
+//      #define USER_PROVIDED_FEATURE_DEFINE EA_ENABLED
+//
+//      #if EA_IS_ENABLED(USER_PROVIDED_FEATURE_DEFINE)
+//          // USER_PROVIDED_FEATURE_DEFINE is enabled
+//      #else
+//          // USER_PROVIDED_FEATURE_DEFINE is disabled
+//      #endif
+//
+#define EA_ENABLED              111-
+#define EA_DISABLED             333-
+// NOTE: Numeric values for x will produce a parse error while empty values produce a divide by zero, and the test is a bool for proper negation behavior
+#define EA_IS_ENABLED(x) (333 == 333 * 111 / ((x 0) * (((x 0) == 333 ? 1 : 0) + ((x 0) == 111 ? 1 : 0))))
+
+
+
+// Define int128_t / uint128_t types.
+// NOTE(rparolin):  include file at the end because we want all the signed integral types defined.
+#ifdef __cplusplus
+	#include <EABase/int128.h>
+#endif
+
+#endif // Header include guard
+
+
+
+
diff --git a/libkram/eastl/include/EABase/eahave.h b/libkram/eastl/include/EABase/eahave.h
new file mode 100644
index 00000000..b0987be7
--- /dev/null
+++ b/libkram/eastl/include/EABase/eahave.h
@@ -0,0 +1,877 @@
+/*-----------------------------------------------------------------------------
+ * eahave.h
+ *
+ * Copyright (c) Electronic Arts Inc. All rights reserved.
+ *---------------------------------------------------------------------------*/
+
+
+/*-----------------------------------------------------------------------------
+	This file's functionality is preliminary and won't be considered stable until 
+	a future EABase version.
+ *---------------------------------------------------------------------------*/
+
+
+/*-----------------------------------------------------------------------------
+	This header identifies if the given facilities are available in the 
+	standard build environment the current compiler/linker/standard library/
+	operating system combination. This file may in some cases #include standard
+	headers in order to make availability determinations, such as to check 
+	compiler or SDK version numbers. However, it cannot be perfect.
+	This header does not identify compiler features, as those are defined in 
+	eacompiler.h and eacompilertraits.h. Rather this header is about library support.
+	This header does not identify platform or library conventions either, such
+	as whether the file paths use \ or / for directory separators.
+
+	We provide three types of HAVE features here:
+
+		- EA_HAVE_XXX_FEATURE - Have compiler feature. 
+		  Identifies if the compiler has or lacks some feature in the 
+		  current build. Sometimes you need to check to see if the 
+		  compiler is running in some mode in able to write portable code
+		  against it. For example, some compilers (e.g. VC++) have a 
+		  mode in which all language extensions are disabled. If you want
+		  to write code that works with that but still uses the extensions
+		  when available then you can check #if defined(EA_HAVE_EXTENSIONS_FEATURE).
+		  Features can be forcibly cancelled via EA_NO_HAVE_XXX_FEATURE.
+		  EA_NO_HAVE is useful for a build system or user to override the 
+		  defaults because it happens to know better.
+
+		- EA_HAVE_XXX_H - Have header file information. 
+		  Identifies if a given header file is available to the current 
+		  compile configuration. For example, some compilers provide a 
+		  malloc.h header, while others don't. For the former we define 
+		  EA_HAVE_MALLOC_H, while for the latter it remains undefined.
+		  If a header is missing then it may still be that the functions
+		  the header usually declares are declared in some other header.
+		  EA_HAVE_XXX does not include the possibility that our own code 
+		  provides versions of these headers, and in fact a purpose of 
+		  EA_HAVE_XXX is to decide if we should be using our own because
+		  the system doesn't provide one.
+		  Header availability can be forcibly cancelled via EA_NO_HAVE_XXX_H.
+		  EA_NO_HAVE is useful for a build system or user to override the 
+		  defaults because it happens to know better.
+
+		- EA_HAVE_XXX_DECL - Have function declaration information. 
+		  Identifies if a given function declaration is provided by 
+		  the current compile configuration. For example, some compiler
+		  standard libraries declare a wcslen function, while others
+		  don't. For the former we define EA_HAVE_WCSLEN_DECL, while for
+		  the latter it remains undefined. If a declaration of a function
+		  is missing then we assume the implementation is missing as well.
+		  EA_HAVE_XXX_DECL does not include the possibility that our 
+		  own code provides versions of these declarations, and in fact a 
+		  purpose of EA_HAVE_XXX_DECL is to decide if we should be using 
+		  our own because the system doesn't provide one.
+		  Declaration availability can be forcibly cancelled via EA_NO_HAVE_XXX_DECL.
+		  EA_NO_HAVE is useful for a build system or user to override the 
+		  defaults because it happens to know better.
+
+		- EA_HAVE_XXX_IMPL - Have function implementation information. 
+		  Identifies if a given function implementation is provided by
+		  the current compile and link configuration. For example, it's
+		  commonly the case that console platforms declare a getenv function 
+		  but don't provide a linkable implementation.
+		  In this case the user needs to provide such a function manually
+		  as part of the link. If the implementation is available then
+		  we define EA_HAVE_GETENV_IMPL, otherwise it remains undefined.
+		  Beware that sometimes a function may not seem to be present in 
+		  the Standard Library but in reality you need to link some auxiliary
+		  provided library for it. An example of this is the Unix real-time
+		  functions such as clock_gettime.
+		  EA_HAVE_XXX_IMPL does not include the possibility that our 
+		  own code provides versions of these implementations, and in fact a 
+		  purpose of EA_HAVE_XXX_IMPL is to decide if we should be using 
+		  our own because the system doesn't provide one.
+		  Implementation availability can be forcibly cancelled via EA_NO_HAVE_XXX_IMPL.
+		  EA_NO_HAVE is useful for a build system or user to override the 
+		  defaults because it happens to know better.
+
+	It's not practical to define EA_HAVE macros for every possible header,
+	declaration, and implementation, and so the user must simply know that
+	some headers, declarations, and implementations tend to require EA_HAVE
+	checking. Nearly every C Standard Library we've seen has a <string.h> 
+	header, a strlen declaration, and a linkable strlen implementation, 
+	so there's no need to provide EA_HAVE support for this. On the other hand
+	it's commonly the case that the C Standard Library doesn't have a malloc.h
+	header or an inet_ntop declaration.
+
+---------------------------------------------------------------------------*/
+
+
+#ifndef INCLUDED_eahave_H
+#define INCLUDED_eahave_H
+
+
+#include <EABase/eabase.h>
+
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+/* EA_HAVE_XXX_FEATURE */
+
+#if !defined(EA_HAVE_EXTENSIONS_FEATURE) && !defined(EA_NO_HAVE_EXTENSIONS_FEATURE)
+	#define EA_HAVE_EXTENSIONS_FEATURE 1
+#endif
+
+
+/* EA_HAVE_XXX_LIBRARY */
+
+// Dinkumware
+#if !defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && !defined(EA_NO_HAVE_DINKUMWARE_CPP_LIBRARY)
+	#if defined(__cplusplus)
+		EA_DISABLE_ALL_VC_WARNINGS()
+		#include <cstddef> // Need to trigger the compilation of yvals.h without directly using <yvals.h> because it might not exist.
+		EA_RESTORE_ALL_VC_WARNINGS()
+	#endif
+
+	#if defined(__cplusplus) && defined(_CPPLIB_VER) /* If using the Dinkumware Standard library... */
+		#define EA_HAVE_DINKUMWARE_CPP_LIBRARY 1
+	#else
+		#define EA_NO_HAVE_DINKUMWARE_CPP_LIBRARY 1
+	#endif
+#endif
+
+// GCC libstdc++
+#if !defined(EA_HAVE_LIBSTDCPP_LIBRARY) && !defined(EA_NO_HAVE_LIBSTDCPP_LIBRARY)
+	#if defined(__GLIBCXX__) /* If using libstdc++ ... */
+		#define EA_HAVE_LIBSTDCPP_LIBRARY 1
+	#else
+		#define EA_NO_HAVE_LIBSTDCPP_LIBRARY 1
+	#endif
+#endif
+
+// Clang libc++
+#if !defined(EA_HAVE_LIBCPP_LIBRARY) && !defined(EA_NO_HAVE_LIBCPP_LIBRARY)
+	#if EA_HAS_INCLUDE_AVAILABLE
+		#if EA_HAS_INCLUDE(<__config>)
+			#define EA_HAVE_LIBCPP_LIBRARY 1 // We could also #include <ciso646> and check if defined(_LIBCPP_VERSION).
+		#endif
+	#endif
+
+	#if !defined(EA_HAVE_LIBCPP_LIBRARY) 
+		#define EA_NO_HAVE_LIBCPP_LIBRARY 1
+	#endif
+#endif
+
+
+/* EA_HAVE_XXX_H */
+
+// #include <sys/types.h>
+#if !defined(EA_HAVE_SYS_TYPES_H) && !defined(EA_NO_HAVE_SYS_TYPES_H)
+		#define EA_HAVE_SYS_TYPES_H 1
+#endif
+
+// #include <io.h> (and not sys/io.h or asm/io.h)
+#if !defined(EA_HAVE_IO_H) && !defined(EA_NO_HAVE_IO_H)
+	// Unix doesn't have Microsoft's <io.h> but has the same functionality in <fcntl.h> and <sys/stat.h>.
+	#if defined(EA_PLATFORM_MICROSOFT)
+		#define EA_HAVE_IO_H 1
+	#else
+		#define EA_NO_HAVE_IO_H 1
+	#endif
+#endif
+
+// #include <inttypes.h>
+#if !defined(EA_HAVE_INTTYPES_H) && !defined(EA_NO_HAVE_INTTYPES_H)
+	#if !defined(EA_PLATFORM_MICROSOFT) 
+		#define EA_HAVE_INTTYPES_H 1
+	#else
+		#define EA_NO_HAVE_INTTYPES_H 1
+	#endif
+#endif
+
+// #include <unistd.h>
+#if !defined(EA_HAVE_UNISTD_H) && !defined(EA_NO_HAVE_UNISTD_H)
+	#if defined(EA_PLATFORM_UNIX)
+		#define EA_HAVE_UNISTD_H 1
+	#else
+		#define EA_NO_HAVE_UNISTD_H 1
+	#endif
+#endif
+
+// #include <sys/time.h>
+#if !defined(EA_HAVE_SYS_TIME_H) && !defined(EA_NO_HAVE_SYS_TIME_H)
+	#if !defined(EA_PLATFORM_MICROSOFT) && !defined(_CPPLIB_VER) /* _CPPLIB_VER indicates Dinkumware. */
+		#define EA_HAVE_SYS_TIME_H 1 /* defines struct timeval */
+	#else
+		#define EA_NO_HAVE_SYS_TIME_H 1
+	#endif
+#endif
+
+// #include <ptrace.h>
+#if !defined(EA_HAVE_SYS_PTRACE_H) && !defined(EA_NO_HAVE_SYS_PTRACE_H)
+	#if defined(EA_PLATFORM_UNIX) && !defined(__CYGWIN__) && (defined(EA_PLATFORM_DESKTOP) || defined(EA_PLATFORM_SERVER))
+		#define EA_HAVE_SYS_PTRACE_H 1 /* declares the ptrace function */
+	#else
+		#define EA_NO_HAVE_SYS_PTRACE_H 1
+	#endif
+#endif
+
+// #include <sys/stat.h>
+#if !defined(EA_HAVE_SYS_STAT_H) && !defined(EA_NO_HAVE_SYS_STAT_H)
+	#if (defined(EA_PLATFORM_UNIX) && !(defined(EA_PLATFORM_SONY) && defined(EA_PLATFORM_CONSOLE))) || defined(__APPLE__) || defined(EA_PLATFORM_ANDROID)
+		#define EA_HAVE_SYS_STAT_H 1 /* declares the stat struct and function */
+	#else
+		#define EA_NO_HAVE_SYS_STAT_H 1
+	#endif
+#endif
+
+// #include <locale.h>
+#if !defined(EA_HAVE_LOCALE_H) && !defined(EA_NO_HAVE_LOCALE_H)
+		#define EA_HAVE_LOCALE_H 1
+#endif
+
+// #include <signal.h>
+#if !defined(EA_HAVE_SIGNAL_H) && !defined(EA_NO_HAVE_SIGNAL_H)
+	#if !defined(EA_PLATFORM_BSD) && !defined(EA_PLATFORM_SONY) && !defined(CS_UNDEFINED_STRING)
+		#define EA_HAVE_SIGNAL_H 1
+	#else
+		#define EA_NO_HAVE_SIGNAL_H 1
+	#endif
+#endif
+
+// #include <sys/signal.h>
+#if !defined(EA_HAVE_SYS_SIGNAL_H) && !defined(EA_NO_HAVE_SYS_SIGNAL_H)
+	#if defined(EA_PLATFORM_BSD) || defined(EA_PLATFORM_SONY)
+		#define EA_HAVE_SYS_SIGNAL_H 1
+	#else
+		#define EA_NO_HAVE_SYS_SIGNAL_H 1
+	#endif
+#endif
+
+// #include <pthread.h>
+#if !defined(EA_HAVE_PTHREAD_H) && !defined(EA_NO_HAVE_PTHREAD_H)
+	#if defined(EA_PLATFORM_UNIX) || defined(EA_PLATFORM_APPLE) || defined(EA_PLATFORM_POSIX)
+		#define EA_HAVE_PTHREAD_H 1 /* It can be had under Microsoft/Windows with the http://sourceware.org/pthreads-win32/ library */
+	#else
+		#define EA_NO_HAVE_PTHREAD_H 1
+	#endif
+#endif
+
+// #include <wchar.h>
+#if !defined(EA_HAVE_WCHAR_H) && !defined(EA_NO_HAVE_WCHAR_H)
+	#if defined(EA_PLATFORM_DESKTOP) && defined(EA_PLATFORM_UNIX) && defined(EA_PLATFORM_SONY) && defined(EA_PLATFORM_APPLE)
+		#define EA_HAVE_WCHAR_H 1
+	#else
+		#define EA_NO_HAVE_WCHAR_H 1
+	#endif
+#endif
+
+// #include <malloc.h>
+#if !defined(EA_HAVE_MALLOC_H) && !defined(EA_NO_HAVE_MALLOC_H)
+	#if defined(_MSC_VER) || defined(__MINGW32__)
+		#define EA_HAVE_MALLOC_H 1
+	#else
+		#define EA_NO_HAVE_MALLOC_H 1
+	#endif
+#endif
+
+// #include <alloca.h>
+#if !defined(EA_HAVE_ALLOCA_H) && !defined(EA_NO_HAVE_ALLOCA_H)
+	#if !defined(EA_HAVE_MALLOC_H) && !defined(EA_PLATFORM_SONY)
+		#define EA_HAVE_ALLOCA_H 1
+	#else
+		#define EA_NO_HAVE_ALLOCA_H 1
+	#endif
+#endif
+
+// #include <execinfo.h>
+#if !defined(EA_HAVE_EXECINFO_H) && !defined(EA_NO_HAVE_EXECINFO_H)
+	#if (defined(EA_PLATFORM_LINUX) || defined(EA_PLATFORM_OSX)) && !defined(EA_PLATFORM_ANDROID)
+		#define EA_HAVE_EXECINFO_H 1
+	#else
+		#define EA_NO_HAVE_EXECINFO_H 1
+	#endif
+#endif
+
+// #include <semaphore.h> (Unix semaphore support)
+#if !defined(EA_HAVE_SEMAPHORE_H) && !defined(EA_NO_HAVE_SEMAPHORE_H)
+	#if defined(EA_PLATFORM_UNIX)
+		#define EA_HAVE_SEMAPHORE_H 1
+	#else
+		#define EA_NO_HAVE_SEMAPHORE_H 1
+	#endif
+#endif
+
+// #include <dirent.h> (Unix semaphore support)
+#if !defined(EA_HAVE_DIRENT_H) && !defined(EA_NO_HAVE_DIRENT_H)
+	#if defined(EA_PLATFORM_UNIX) && !defined(EA_PLATFORM_CONSOLE)
+		#define EA_HAVE_DIRENT_H 1
+	#else
+		#define EA_NO_HAVE_DIRENT_H 1
+	#endif
+#endif
+
+// #include <array>, <forward_list>, <ununordered_set>, <unordered_map>
+#if !defined(EA_HAVE_CPP11_CONTAINERS) && !defined(EA_NO_HAVE_CPP11_CONTAINERS)
+	#if defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && (_CPPLIB_VER >= 520) // Dinkumware. VS2010+
+		#define EA_HAVE_CPP11_CONTAINERS 1
+	#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4004) // Actually GCC 4.3 supports array and unordered_
+		#define EA_HAVE_CPP11_CONTAINERS 1
+	#elif defined(EA_HAVE_LIBCPP_LIBRARY) && (_LIBCPP_VERSION >= 1)
+		#define EA_HAVE_CPP11_CONTAINERS 1
+	#else
+		#define EA_NO_HAVE_CPP11_CONTAINERS 1
+	#endif
+#endif
+
+// #include <atomic>
+#if !defined(EA_HAVE_CPP11_ATOMIC) && !defined(EA_NO_HAVE_CPP11_ATOMIC)
+	#if defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && (_CPPLIB_VER >= 540) // Dinkumware. VS2012+
+		#define EA_HAVE_CPP11_ATOMIC 1
+	#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4007)
+		#define EA_HAVE_CPP11_ATOMIC 1
+	#elif defined(EA_HAVE_LIBCPP_LIBRARY) && (_LIBCPP_VERSION >= 1)
+		#define EA_HAVE_CPP11_ATOMIC 1
+	#else
+		#define EA_NO_HAVE_CPP11_ATOMIC 1
+	#endif
+#endif
+
+// #include <condition_variable>
+#if !defined(EA_HAVE_CPP11_CONDITION_VARIABLE) && !defined(EA_NO_HAVE_CPP11_CONDITION_VARIABLE)
+	#if defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && (_CPPLIB_VER >= 540) // Dinkumware. VS2012+
+		#define EA_HAVE_CPP11_CONDITION_VARIABLE 1
+	#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4007)
+		#define EA_HAVE_CPP11_CONDITION_VARIABLE 1
+	#elif defined(EA_HAVE_LIBCPP_LIBRARY) && (_LIBCPP_VERSION >= 1)
+		#define EA_HAVE_CPP11_CONDITION_VARIABLE 1
+	#else
+		#define EA_NO_HAVE_CPP11_CONDITION_VARIABLE 1
+	#endif
+#endif
+
+// #include <mutex>
+#if !defined(EA_HAVE_CPP11_MUTEX) && !defined(EA_NO_HAVE_CPP11_MUTEX)
+	#if defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && (_CPPLIB_VER >= 540) // Dinkumware. VS2012+
+		#define EA_HAVE_CPP11_MUTEX 1
+	#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4007)
+		#define EA_HAVE_CPP11_MUTEX 1
+	#elif defined(EA_HAVE_LIBCPP_LIBRARY) && (_LIBCPP_VERSION >= 1)
+		#define EA_HAVE_CPP11_MUTEX 1
+	#else
+		#define EA_NO_HAVE_CPP11_MUTEX 1
+	#endif
+#endif
+
+// #include <thread>
+#if !defined(EA_HAVE_CPP11_THREAD) && !defined(EA_NO_HAVE_CPP11_THREAD)
+	#if defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && (_CPPLIB_VER >= 540) // Dinkumware. VS2012+
+		#define EA_HAVE_CPP11_THREAD 1
+	#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4007)
+		#define EA_HAVE_CPP11_THREAD 1
+	#elif defined(EA_HAVE_LIBCPP_LIBRARY) && (_LIBCPP_VERSION >= 1)
+		#define EA_HAVE_CPP11_THREAD 1
+	#else
+		#define EA_NO_HAVE_CPP11_THREAD 1
+	#endif
+#endif
+
+// #include <future>
+#if !defined(EA_HAVE_CPP11_FUTURE) && !defined(EA_NO_HAVE_CPP11_FUTURE)
+	#if defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && (_CPPLIB_VER >= 540) // Dinkumware. VS2012+
+		#define EA_HAVE_CPP11_FUTURE 1
+	#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4005)
+		#define EA_HAVE_CPP11_FUTURE 1
+	#elif defined(EA_HAVE_LIBCPP_LIBRARY) && (_LIBCPP_VERSION >= 1)
+		#define EA_HAVE_CPP11_FUTURE 1
+	#else
+		#define EA_NO_HAVE_CPP11_FUTURE 1
+	#endif
+#endif
+
+
+// #include <type_traits>
+#if !defined(EA_HAVE_CPP11_TYPE_TRAITS) && !defined(EA_NO_HAVE_CPP11_TYPE_TRAITS)
+	#if defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && (_CPPLIB_VER >= 540) // Dinkumware. VS2012+
+		#define EA_HAVE_CPP11_TYPE_TRAITS 1
+	#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4007) // Prior versions of libstdc++ have incomplete support for C++11 type traits.
+		#define EA_HAVE_CPP11_TYPE_TRAITS 1
+	#elif defined(EA_HAVE_LIBCPP_LIBRARY) && (_LIBCPP_VERSION >= 1)
+		#define EA_HAVE_CPP11_TYPE_TRAITS 1
+	#else
+		#define EA_NO_HAVE_CPP11_TYPE_TRAITS 1
+	#endif
+#endif
+
+// #include <tuple>
+#if !defined(EA_HAVE_CPP11_TUPLES) && !defined(EA_NO_HAVE_CPP11_TUPLES)
+	#if defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && (_CPPLIB_VER >= 520) // Dinkumware. VS2010+
+		#define EA_HAVE_CPP11_TUPLES 1
+	#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4003)
+		#define EA_HAVE_CPP11_TUPLES 1
+	#elif defined(EA_HAVE_LIBCPP_LIBRARY) && (_LIBCPP_VERSION >= 1)
+		#define EA_HAVE_CPP11_TUPLES 1
+	#else
+		#define EA_NO_HAVE_CPP11_TUPLES 1
+	#endif
+#endif
+
+// #include <regex>
+#if !defined(EA_HAVE_CPP11_REGEX) && !defined(EA_NO_HAVE_CPP11_REGEX)
+	#if defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && (_CPPLIB_VER >= 540) && (defined(_HAS_EXCEPTIONS) && _HAS_EXCEPTIONS) // Dinkumware. VS2012+
+		#define EA_HAVE_CPP11_REGEX 1
+	#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4003)
+		#define EA_HAVE_CPP11_REGEX 1
+	#elif defined(EA_HAVE_LIBCPP_LIBRARY) && (_LIBCPP_VERSION >= 1)
+		#define EA_HAVE_CPP11_REGEX 1
+	#else
+		#define EA_NO_HAVE_CPP11_REGEX 1
+	#endif
+#endif
+
+// #include <random>
+#if !defined(EA_HAVE_CPP11_RANDOM) && !defined(EA_NO_HAVE_CPP11_RANDOM)
+	#if defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && (_CPPLIB_VER >= 520) // Dinkumware. VS2010+
+		#define EA_HAVE_CPP11_RANDOM 1
+	#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4005)
+		#define EA_HAVE_CPP11_RANDOM 1
+	#elif defined(EA_HAVE_LIBCPP_LIBRARY) && (_LIBCPP_VERSION >= 1)
+		#define EA_HAVE_CPP11_RANDOM 1
+	#else
+		#define EA_NO_HAVE_CPP11_RANDOM 1
+	#endif
+#endif
+
+// #include <chrono> 
+#if !defined(EA_HAVE_CPP11_CHRONO) && !defined(EA_NO_HAVE_CPP11_CHRONO)
+	#if defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && (_CPPLIB_VER >= 540) // Dinkumware. VS2012+
+		#define EA_HAVE_CPP11_CHRONO 1
+	#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4007) // chrono was broken in glibc prior to 4.7.
+		#define EA_HAVE_CPP11_CHRONO 1
+	#elif defined(EA_HAVE_LIBCPP_LIBRARY) && (_LIBCPP_VERSION >= 1)
+		#define EA_HAVE_CPP11_CHRONO 1
+	#else
+		#define EA_NO_HAVE_CPP11_CHRONO 1
+	#endif
+#endif
+
+// #include <scoped_allocator> 
+#if !defined(EA_HAVE_CPP11_SCOPED_ALLOCATOR) && !defined(EA_NO_HAVE_CPP11_SCOPED_ALLOCATOR)
+	#if defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && (_CPPLIB_VER >= 540) // Dinkumware. VS2012+
+		#define EA_HAVE_CPP11_SCOPED_ALLOCATOR 1
+	#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4007)
+		#define EA_HAVE_CPP11_SCOPED_ALLOCATOR 1
+	#elif defined(EA_HAVE_LIBCPP_LIBRARY) && (_LIBCPP_VERSION >= 1)
+		#define EA_HAVE_CPP11_SCOPED_ALLOCATOR 1
+	#else
+		#define EA_NO_HAVE_CPP11_SCOPED_ALLOCATOR 1
+	#endif
+#endif
+
+// #include <initializer_list> 
+#if !defined(EA_HAVE_CPP11_INITIALIZER_LIST) && !defined(EA_NO_HAVE_CPP11_INITIALIZER_LIST)
+	#if defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && (_CPPLIB_VER >= 520) && !defined(EA_COMPILER_NO_INITIALIZER_LISTS) // Dinkumware. VS2010+
+		#define EA_HAVE_CPP11_INITIALIZER_LIST 1
+	#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_CLANG) && (EA_COMPILER_VERSION >= 301) && !defined(EA_COMPILER_NO_INITIALIZER_LISTS) && !defined(EA_PLATFORM_APPLE)
+		#define EA_HAVE_CPP11_INITIALIZER_LIST 1
+	#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBCPP_LIBRARY) && defined(EA_COMPILER_CLANG) && (EA_COMPILER_VERSION >= 301) && !defined(EA_COMPILER_NO_INITIALIZER_LISTS) && !defined(EA_PLATFORM_APPLE)
+		#define EA_HAVE_CPP11_INITIALIZER_LIST 1
+	#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4004) && !defined(EA_COMPILER_NO_INITIALIZER_LISTS) && !defined(EA_PLATFORM_APPLE)
+		#define EA_HAVE_CPP11_INITIALIZER_LIST 1
+	#elif defined(EA_HAVE_LIBCPP_LIBRARY) && (_LIBCPP_VERSION >= 1) && !defined(EA_COMPILER_NO_INITIALIZER_LISTS)
+		#define EA_HAVE_CPP11_INITIALIZER_LIST 1
+	#else
+		#define EA_NO_HAVE_CPP11_INITIALIZER_LIST 1
+	#endif
+#endif
+
+// #include <system_error> 
+#if !defined(EA_HAVE_CPP11_SYSTEM_ERROR) && !defined(EA_NO_HAVE_CPP11_SYSTEM_ERROR)
+	#if defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && (_CPPLIB_VER >= 520) && !(defined(_HAS_CPP0X) && _HAS_CPP0X) // Dinkumware. VS2010+
+		#define EA_HAVE_CPP11_SYSTEM_ERROR 1
+	#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_CLANG) && (EA_COMPILER_VERSION >= 301) && !defined(EA_PLATFORM_APPLE)
+		#define EA_HAVE_CPP11_SYSTEM_ERROR 1
+	#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4004) && !defined(EA_PLATFORM_APPLE)
+		#define EA_HAVE_CPP11_SYSTEM_ERROR 1
+	#elif defined(EA_HAVE_LIBCPP_LIBRARY) && (_LIBCPP_VERSION >= 1)
+		#define EA_HAVE_CPP11_SYSTEM_ERROR 1
+	#else
+		#define EA_NO_HAVE_CPP11_SYSTEM_ERROR 1
+	#endif
+#endif
+
+// #include <codecvt> 
+#if !defined(EA_HAVE_CPP11_CODECVT) && !defined(EA_NO_HAVE_CPP11_CODECVT)
+	#if defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && (_CPPLIB_VER >= 520) // Dinkumware. VS2010+
+		#define EA_HAVE_CPP11_CODECVT 1
+	// Future versions of libc++ may support this header.  However, at the moment there isn't
+	// a reliable way of detecting if this header is available.
+	//#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4008)
+	//    #define EA_HAVE_CPP11_CODECVT 1
+	#elif defined(EA_HAVE_LIBCPP_LIBRARY) && (_LIBCPP_VERSION >= 1)
+		#define EA_HAVE_CPP11_CODECVT 1
+	#else
+		#define EA_NO_HAVE_CPP11_CODECVT 1
+	#endif
+#endif
+
+// #include <typeindex> 
+#if !defined(EA_HAVE_CPP11_TYPEINDEX) && !defined(EA_NO_HAVE_CPP11_TYPEINDEX)
+	#if defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && (_CPPLIB_VER >= 520) // Dinkumware. VS2010+
+		#define EA_HAVE_CPP11_TYPEINDEX 1
+	#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4006)
+		#define EA_HAVE_CPP11_TYPEINDEX 1
+	#elif defined(EA_HAVE_LIBCPP_LIBRARY) && (_LIBCPP_VERSION >= 1)
+		#define EA_HAVE_CPP11_TYPEINDEX 1
+	#else
+		#define EA_NO_HAVE_CPP11_TYPEINDEX 1
+	#endif
+#endif
+
+
+
+
+/* EA_HAVE_XXX_DECL */
+
+#if !defined(EA_HAVE_mkstemps_DECL) && !defined(EA_NO_HAVE_mkstemps_DECL)
+	#if defined(EA_PLATFORM_APPLE) || defined(CS_UNDEFINED_STRING)
+		#define EA_HAVE_mkstemps_DECL 1
+	#else
+		#define EA_NO_HAVE_mkstemps_DECL 1
+	#endif
+#endif
+
+#if !defined(EA_HAVE_gettimeofday_DECL) && !defined(EA_NO_HAVE_gettimeofday_DECL)
+	#if defined(EA_PLATFORM_POSIX) /* Posix means Linux, Unix, and Macintosh OSX, among others (including Linux-based mobile platforms). */
+		#define EA_HAVE_gettimeofday_DECL 1
+	#else
+		#define EA_NO_HAVE_gettimeofday_DECL 1
+	#endif
+#endif
+
+#if !defined(EA_HAVE_strcasecmp_DECL) && !defined(EA_NO_HAVE_strcasecmp_DECL)
+	#if !defined(EA_PLATFORM_MICROSOFT)
+		#define EA_HAVE_strcasecmp_DECL  1     /* This is found as stricmp when not found as strcasecmp */
+		#define EA_HAVE_strncasecmp_DECL 1
+	#else
+		#define EA_HAVE_stricmp_DECL  1
+		#define EA_HAVE_strnicmp_DECL 1
+	#endif
+#endif
+
+#if !defined(EA_HAVE_mmap_DECL) && !defined(EA_NO_HAVE_mmap_DECL)
+	#if defined(EA_PLATFORM_POSIX)
+		#define EA_HAVE_mmap_DECL 1 /* mmap functionality varies significantly between systems. */
+	#else
+		#define EA_NO_HAVE_mmap_DECL 1
+	#endif
+#endif
+
+#if !defined(EA_HAVE_fopen_DECL) && !defined(EA_NO_HAVE_fopen_DECL)
+		#define EA_HAVE_fopen_DECL 1 /* C FILE functionality such as fopen */
+#endif
+
+#if !defined(EA_HAVE_ISNAN) && !defined(EA_NO_HAVE_ISNAN)
+	#if defined(EA_PLATFORM_MICROSOFT) && !defined(EA_PLATFORM_MINGW)
+		#define EA_HAVE_ISNAN(x)  _isnan(x)          /* declared in <math.h> */
+		#define EA_HAVE_ISINF(x)  !_finite(x)
+	#elif defined(EA_PLATFORM_APPLE)
+		#define EA_HAVE_ISNAN(x)  std::isnan(x)      /* declared in <cmath> */
+		#define EA_HAVE_ISINF(x)  std::isinf(x)
+	#elif defined(EA_PLATFORM_ANDROID)
+		#define EA_HAVE_ISNAN(x)  __builtin_isnan(x) /* There are a number of standard libraries for Android and it's hard to tell them apart, so just go with builtins */
+		#define EA_HAVE_ISINF(x)  __builtin_isinf(x)
+	#elif defined(__GNUC__) && defined(__CYGWIN__)
+		#define EA_HAVE_ISNAN(x)  __isnand(x)        /* declared nowhere, it seems. */
+		#define EA_HAVE_ISINF(x)  __isinfd(x)
+	#else
+		#define EA_HAVE_ISNAN(x)  std::isnan(x)      /* declared in <cmath> */
+		#define EA_HAVE_ISINF(x)  std::isinf(x)
+	#endif
+#endif
+
+#if !defined(EA_HAVE_itoa_DECL) && !defined(EA_NO_HAVE_itoa_DECL)
+	#if defined(EA_COMPILER_MSVC)
+		#define EA_HAVE_itoa_DECL 1
+	#else
+		#define EA_NO_HAVE_itoa_DECL 1
+	#endif
+#endif
+
+#if !defined(EA_HAVE_nanosleep_DECL) && !defined(EA_NO_HAVE_nanosleep_DECL)
+	#if (defined(EA_PLATFORM_UNIX) && !defined(EA_PLATFORM_SONY)) || defined(EA_PLATFORM_IPHONE) || defined(EA_PLATFORM_OSX) || defined(EA_PLATFORM_SONY) || defined(CS_UNDEFINED_STRING)
+		#define EA_HAVE_nanosleep_DECL 1
+	#else
+		#define EA_NO_HAVE_nanosleep_DECL 1
+	#endif
+#endif
+
+#if !defined(EA_HAVE_utime_DECL) && !defined(EA_NO_HAVE_utime_DECL)
+	#if defined(EA_PLATFORM_MICROSOFT)
+		#define EA_HAVE_utime_DECL _utime
+	#elif EA_PLATFORM_UNIX
+		#define EA_HAVE_utime_DECL utime
+	#else
+		#define EA_NO_HAVE_utime_DECL 1
+	#endif
+#endif
+
+#if !defined(EA_HAVE_ftruncate_DECL) && !defined(EA_NO_HAVE_ftruncate_DECL)
+	#if !defined(__MINGW32__)
+		#define EA_HAVE_ftruncate_DECL 1
+	#else
+		#define EA_NO_HAVE_ftruncate_DECL 1
+	#endif
+#endif
+
+#if !defined(EA_HAVE_localtime_DECL) && !defined(EA_NO_HAVE_localtime_DECL)
+		#define EA_HAVE_localtime_DECL 1
+#endif
+
+#if !defined(EA_HAVE_pthread_getattr_np_DECL) && !defined(EA_NO_HAVE_pthread_getattr_np_DECL)
+	#if defined(EA_PLATFORM_LINUX)
+		#define EA_HAVE_pthread_getattr_np_DECL 1
+	#else
+		#define EA_NO_HAVE_pthread_getattr_np_DECL 1
+	#endif
+#endif
+
+
+
+/* EA_HAVE_XXX_IMPL*/
+
+#if !defined(EA_HAVE_WCHAR_IMPL) && !defined(EA_NO_HAVE_WCHAR_IMPL)
+	#if defined(EA_PLATFORM_DESKTOP)
+		#define EA_HAVE_WCHAR_IMPL 1      /* Specifies if wchar_t string functions are provided, such as wcslen, wprintf, etc. Implies EA_HAVE_WCHAR_H */
+	#else
+		#define EA_NO_HAVE_WCHAR_IMPL 1
+	#endif
+#endif
+
+#if !defined(EA_HAVE_getenv_IMPL) && !defined(EA_NO_HAVE_getenv_IMPL)
+	#if (defined(EA_PLATFORM_DESKTOP) || defined(EA_PLATFORM_UNIX)) && !defined(EA_PLATFORM_WINRT)
+		#define EA_HAVE_getenv_IMPL 1
+	#else
+		#define EA_NO_HAVE_getenv_IMPL 1
+	#endif
+#endif
+
+#if !defined(EA_HAVE_setenv_IMPL) && !defined(EA_NO_HAVE_setenv_IMPL)
+	#if defined(EA_PLATFORM_UNIX) && defined(EA_PLATFORM_POSIX)
+		#define EA_HAVE_setenv_IMPL 1
+	#else
+		#define EA_NO_HAVE_setenv_IMPL 1
+	#endif
+#endif
+
+#if !defined(EA_HAVE_unsetenv_IMPL) && !defined(EA_NO_HAVE_unsetenv_IMPL)
+	#if defined(EA_PLATFORM_UNIX) && defined(EA_PLATFORM_POSIX)
+		#define EA_HAVE_unsetenv_IMPL 1
+	#else
+		#define EA_NO_HAVE_unsetenv_IMPL 1
+	#endif
+#endif
+
+#if !defined(EA_HAVE_putenv_IMPL) && !defined(EA_NO_HAVE_putenv_IMPL)
+	#if (defined(EA_PLATFORM_DESKTOP) || defined(EA_PLATFORM_UNIX)) && !defined(EA_PLATFORM_WINRT)
+		#define EA_HAVE_putenv_IMPL 1        /* With Microsoft compilers you may need to use _putenv, as they have deprecated putenv. */
+	#else
+		#define EA_NO_HAVE_putenv_IMPL 1
+	#endif
+#endif
+
+#if !defined(EA_HAVE_time_IMPL) && !defined(EA_NO_HAVE_time_IMPL)
+		#define EA_HAVE_time_IMPL 1
+		#define EA_HAVE_clock_IMPL 1
+#endif
+
+// <cstdio> fopen()
+#if !defined(EA_HAVE_fopen_IMPL) && !defined(EA_NO_HAVE_fopen_IMPL)
+		#define EA_HAVE_fopen_IMPL 1  /* C FILE functionality such as fopen */
+#endif
+
+// <arpa/inet.h> inet_ntop()
+#if !defined(EA_HAVE_inet_ntop_IMPL) && !defined(EA_NO_HAVE_inet_ntop_IMPL)
+	#if (defined(EA_PLATFORM_UNIX) || defined(EA_PLATFORM_POSIX)) && !defined(EA_PLATFORM_SONY) && !defined(CS_UNDEFINED_STRING) 
+		#define EA_HAVE_inet_ntop_IMPL 1  /* This doesn't identify if the platform SDK has some alternative function that does the same thing; */
+		#define EA_HAVE_inet_pton_IMPL 1  /* it identifies strictly the <arpa/inet.h> inet_ntop and inet_pton functions. For example, Microsoft has InetNtop in <Ws2tcpip.h> */
+	#else
+		#define EA_NO_HAVE_inet_ntop_IMPL 1
+		#define EA_NO_HAVE_inet_pton_IMPL 1
+	#endif
+#endif
+
+// <time.h> clock_gettime()
+#if !defined(EA_HAVE_clock_gettime_IMPL) && !defined(EA_NO_HAVE_clock_gettime_IMPL)
+	#if defined(EA_PLATFORM_LINUX) || defined(__CYGWIN__) || (defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0)) || (defined(EA_PLATFORM_POSIX) && defined(_CPPLIB_VER) /*Dinkumware*/)
+		#define EA_HAVE_clock_gettime_IMPL 1 /* You need to link the 'rt' library to get this */
+	#else
+		#define EA_NO_HAVE_clock_gettime_IMPL 1
+	#endif
+#endif
+
+#if !defined(EA_HAVE_getcwd_IMPL) && !defined(EA_NO_HAVE_getcwd_IMPL)
+	#if (defined(EA_PLATFORM_DESKTOP) || defined(EA_PLATFORM_UNIX)) && !defined(EA_PLATFORM_ANDROID) && !defined(EA_PLATFORM_WINRT)
+		#define EA_HAVE_getcwd_IMPL 1       /* With Microsoft compilers you may need to use _getcwd, as they have deprecated getcwd. And in any case it's present at <direct.h> */
+	#else
+		#define EA_NO_HAVE_getcwd_IMPL 1
+	#endif
+#endif
+
+#if !defined(EA_HAVE_tmpnam_IMPL) && !defined(EA_NO_HAVE_tmpnam_IMPL)
+	#if (defined(EA_PLATFORM_DESKTOP) || defined(EA_PLATFORM_UNIX)) && !defined(EA_PLATFORM_ANDROID)
+		#define EA_HAVE_tmpnam_IMPL 1
+	#else
+		#define EA_NO_HAVE_tmpnam_IMPL 1
+	#endif
+#endif
+
+// nullptr, the built-in C++11 type.
+// This EA_HAVE is deprecated, as EA_COMPILER_NO_NULLPTR is more appropriate, given that nullptr is a compiler-level feature and not a library feature.
+#if !defined(EA_HAVE_nullptr_IMPL) && !defined(EA_NO_HAVE_nullptr_IMPL)
+	#if defined(EA_COMPILER_NO_NULLPTR)
+		#define EA_NO_HAVE_nullptr_IMPL 1
+	#else
+		#define EA_HAVE_nullptr_IMPL 1
+	#endif
+#endif
+
+// <cstddef> std::nullptr_t
+// Note that <EABase/nullptr.h> implements a portable nullptr implementation, but this 
+// EA_HAVE specifically refers to std::nullptr_t from the standard libraries.
+#if !defined(EA_HAVE_nullptr_t_IMPL) && !defined(EA_NO_HAVE_nullptr_t_IMPL)
+	#if defined(EA_COMPILER_CPP11_ENABLED)
+		// VS2010+ with its default Dinkumware standard library.
+		#if defined(_MSC_VER) && (_MSC_VER >= 1600) && defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY)
+			#define EA_HAVE_nullptr_t_IMPL 1
+
+		#elif defined(EA_HAVE_LIBCPP_LIBRARY) // clang/llvm libc++
+			#define EA_HAVE_nullptr_t_IMPL 1
+
+		#elif defined(EA_HAVE_LIBSTDCPP_LIBRARY) // GNU libstdc++
+			// Unfortunately __GLIBCXX__ date values don't go strictly in version ordering.
+			#if (__GLIBCXX__ >= 20110325) && (__GLIBCXX__ != 20120702) && (__GLIBCXX__ != 20110428)
+				#define EA_HAVE_nullptr_t_IMPL 1
+			#else
+				#define EA_NO_HAVE_nullptr_t_IMPL 1
+			#endif
+			
+		// We simply assume that the standard library (e.g. Dinkumware) provides std::nullptr_t.
+		#elif defined(__clang__)
+			#define EA_HAVE_nullptr_t_IMPL 1
+
+		// With GCC compiler >= 4.6, std::nullptr_t is always defined in <cstddef>, in practice.
+		#elif defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4006)
+			#define EA_HAVE_nullptr_t_IMPL 1
+
+		// The EDG compiler provides nullptr, but uses an older standard library that doesn't support std::nullptr_t.
+		#elif defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 403) 
+			#define EA_HAVE_nullptr_t_IMPL 1
+			
+		#else
+			#define EA_NO_HAVE_nullptr_t_IMPL 1
+		#endif
+	#else
+		#define EA_NO_HAVE_nullptr_t_IMPL 1
+	#endif
+#endif
+
+// <exception> std::terminate
+#if !defined(EA_HAVE_std_terminate_IMPL) && !defined(EA_NO_HAVE_std_terminate_IMPL)
+	#if !defined(EA_PLATFORM_IPHONE) && !defined(EA_PLATFORM_ANDROID)
+		#define EA_HAVE_std_terminate_IMPL 1 /* iOS doesn't appear to provide an implementation for std::terminate under the armv6 target. */
+	#else
+		#define EA_NO_HAVE_std_terminate_IMPL 1
+	#endif
+#endif
+
+// <iterator>: std::begin, std::end, std::prev, std::next, std::move_iterator.
+#if !defined(EA_HAVE_CPP11_ITERATOR_IMPL) && !defined(EA_NO_HAVE_CPP11_ITERATOR_IMPL)
+	#if defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && (_CPPLIB_VER >= 520) && !(defined(_HAS_CPP0X) && _HAS_CPP0X) // Dinkumware. VS2010+
+		#define EA_HAVE_CPP11_ITERATOR_IMPL 1
+	#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4006)
+		#define EA_HAVE_CPP11_ITERATOR_IMPL 1
+	#elif defined(EA_HAVE_LIBCPP_LIBRARY) && (_LIBCPP_VERSION >= 1)
+		#define EA_HAVE_CPP11_ITERATOR_IMPL 1
+	#else
+		#define EA_NO_HAVE_CPP11_ITERATOR_IMPL 1
+	#endif
+#endif
+
+// <memory>: std::weak_ptr, std::shared_ptr, std::unique_ptr, std::bad_weak_ptr, std::owner_less
+#if !defined(EA_HAVE_CPP11_SMART_POINTER_IMPL) && !defined(EA_NO_HAVE_CPP11_SMART_POINTER_IMPL)
+	#if defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && (_CPPLIB_VER >= 520) && !(defined(_HAS_CPP0X) && _HAS_CPP0X) // Dinkumware. VS2010+
+		#define EA_HAVE_CPP11_SMART_POINTER_IMPL 1
+	#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4004)
+		#define EA_HAVE_CPP11_SMART_POINTER_IMPL 1
+	#elif defined(EA_HAVE_LIBCPP_LIBRARY) && (_LIBCPP_VERSION >= 1)
+		#define EA_HAVE_CPP11_SMART_POINTER_IMPL 1
+	#else
+		#define EA_NO_HAVE_CPP11_SMART_POINTER_IMPL 1
+	#endif
+#endif
+
+// <functional>: std::function, std::mem_fn, std::bad_function_call, std::is_bind_expression, std::is_placeholder, std::reference_wrapper, std::hash, std::bind, std::ref, std::cref.
+#if !defined(EA_HAVE_CPP11_FUNCTIONAL_IMPL) && !defined(EA_NO_HAVE_CPP11_FUNCTIONAL_IMPL)
+	#if defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && (_CPPLIB_VER >= 520) && !(defined(_HAS_CPP0X) && _HAS_CPP0X) // Dinkumware. VS2010+
+		#define EA_HAVE_CPP11_FUNCTIONAL_IMPL 1
+	#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4004)
+		#define EA_HAVE_CPP11_FUNCTIONAL_IMPL 1
+	#elif defined(EA_HAVE_LIBCPP_LIBRARY) && (_LIBCPP_VERSION >= 1)
+		#define EA_HAVE_CPP11_FUNCTIONAL_IMPL 1
+	#else
+		#define EA_NO_HAVE_CPP11_FUNCTIONAL_IMPL 1
+	#endif
+#endif
+
+// <exception> std::current_exception, std::rethrow_exception, std::exception_ptr, std::make_exception_ptr
+#if !defined(EA_HAVE_CPP11_EXCEPTION_IMPL) && !defined(EA_NO_HAVE_CPP11_EXCEPTION_IMPL)
+	#if defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && (_CPPLIB_VER >= 520) && !(defined(_HAS_CPP0X) && _HAS_CPP0X) // Dinkumware. VS2010+
+		#define EA_HAVE_CPP11_EXCEPTION_IMPL 1
+	#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4004)
+		#define EA_HAVE_CPP11_EXCEPTION_IMPL 1
+	#elif defined(EA_HAVE_LIBCPP_LIBRARY) && (_LIBCPP_VERSION >= 1)
+		#define EA_HAVE_CPP11_EXCEPTION_IMPL 1
+	#else
+		#define EA_NO_HAVE_CPP11_EXCEPTION_IMPL 1
+	#endif
+#endif
+
+
+
+
+/* Implementations that all platforms seem to have: */
+/*
+	alloca
+	malloc
+	calloc
+	strtoll
+	strtoull
+	vsprintf
+	vsnprintf
+*/
+
+/* Implementations that we don't care about: */
+/*
+	bcopy   -- Just use memmove or some customized equivalent. bcopy offers no practical benefit.
+	strlcpy -- So few platforms have this built-in that we get no benefit from using it. Use EA::StdC::Strlcpy instead.
+	strlcat -- "
+*/
+
+
+
+/*-----------------------------------------------------------------------------
+	EABASE_USER_HAVE_HEADER
+	
+	This allows the user to define a header file to be #included after the 
+	eahave.h's contents are compiled. A primary use of this is to override
+	the contents of this header file. You can define the overhead header 
+	file name in-code or define it globally as part of your build file.
+	
+	Example usage:
+	   #define EABASE_USER_HAVE_HEADER "MyHaveOverrides.h" 
+	   #include <EABase/eahave.h>
+---------------------------------------------------------------------------*/
+
+#ifdef EABASE_USER_HAVE_HEADER
+	#include EABASE_USER_HAVE_HEADER
+#endif
+
+
+#endif /* Header include guard */
+
+
+
diff --git a/libkram/eastl/include/EABase/earesult.h b/libkram/eastl/include/EABase/earesult.h
new file mode 100644
index 00000000..d08b3460
--- /dev/null
+++ b/libkram/eastl/include/EABase/earesult.h
@@ -0,0 +1,62 @@
+/*-----------------------------------------------------------------------------
+ * earesult.h
+ *
+ * Copyright (c) Electronic Arts Inc. All rights reserved.
+ *---------------------------------------------------------------------------*/
+
+
+#ifndef INCLUDED_earesult_H
+#define INCLUDED_earesult_H
+
+
+#include <EABase/eabase.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once /* Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. */
+#endif
+
+
+
+/* This result type is width-compatible with most systems. */
+typedef int32_t ea_result_type;
+
+
+namespace EA
+{
+	typedef int32_t result_type;
+
+	enum
+	{
+#ifndef SUCCESS
+		// Deprecated
+		// Note: a public MS header has created a define of this name which causes a build error. Fortunately they
+		// define it to 0 which is compatible.
+		// see: WindowsSDK\8.1.51641-fb\installed\Include\um\RasError.h
+		SUCCESS =  0,
+#endif
+		// Deprecated
+		FAILURE = -1,
+
+		// These values are now the preferred constants
+		EA_SUCCESS =  0,
+		EA_FAILURE = -1,
+	};
+}
+
+
+/* Macro to simplify testing for success. */
+#ifndef EA_SUCCEEDED
+	#define EA_SUCCEEDED(result) ((result) >= 0)
+#endif
+
+/* Macro to simplfify testing for general failure. */
+#ifndef EA_FAILED
+	#define EA_FAILED(result) ((result) < 0)
+#endif
+
+
+#endif
+
+
+
+
diff --git a/libkram/eastl/include/EABase/eastdarg.h b/libkram/eastl/include/EABase/eastdarg.h
new file mode 100644
index 00000000..2c613eb8
--- /dev/null
+++ b/libkram/eastl/include/EABase/eastdarg.h
@@ -0,0 +1,99 @@
+/*-----------------------------------------------------------------------------
+ * eastdarg.h
+ *
+ * Copyright (c) Electronic Arts Inc. All rights reserved.
+ *---------------------------------------------------------------------------*/
+
+
+#ifndef INCLUDED_eastdarg_H
+#define INCLUDED_eastdarg_H
+
+
+#include <EABase/eabase.h>
+#include <stdarg.h>
+
+
+// VA_ARG_COUNT
+//
+// Returns the number of arguments passed to a macro's ... argument.
+// This applies to macros only and not functions.
+//
+// Example usage:
+//    assert(VA_ARG_COUNT() == 0);
+//    assert(VA_ARG_COUNT(a) == 1);
+//    assert(VA_ARG_COUNT(a, b) == 2);
+//    assert(VA_ARG_COUNT(a, b, c) == 3);
+//
+#if !defined(VA_ARG_COUNT)
+	#define VA_ARG_COUNT(...)                         VA_ARG_COUNT_II((VA_ARG_COUNT_PREFIX_ ## __VA_ARGS__ ## _VA_ARG_COUNT_POSTFIX,32,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0))
+	#define VA_ARG_COUNT_II(__args)                   VA_ARG_COUNT_I __args
+	#define VA_ARG_COUNT_PREFIX__VA_ARG_COUNT_POSTFIX ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0
+	#define VA_ARG_COUNT_I(_0,_1,_2,_3,_4,_5,_6,_7,_8,_9,_10,_11,_12,_13,_14,_15,_16,_17,_18,_19,_20,_21,_22,_23,_24,_25,_26,_27,_28,_29,_30,_31,N,...) N
+#endif
+
+
+// va_copy
+//
+// va_copy is required by C++11
+// C++11 and C99 require va_copy to be #defined and implemented.
+// http://en.cppreference.com/w/cpp/utility/variadic/va_copy
+//
+// Example usage:
+//     void Func(char* p, ...){
+//         va_list args, argsCopy;
+//         va_start(args, p);
+//         va_copy(argsCopy, args);
+//           (use args)
+//           (use argsCopy, which acts the same as args)
+//         va_end(args);
+//         va_end(argsCopy);
+//     }
+//
+#ifndef va_copy
+	#if defined(__va_copy) // GCC and others define this for non-C99 compatibility.
+		#define va_copy(dest, src) __va_copy((dest), (src))
+	#else
+		// This may not work for some platforms, depending on their ABI.
+		// It works for Microsoft x86,x64, and PowerPC-based platforms.
+		#define va_copy(dest, src) memcpy(&(dest), &(src), sizeof(va_list))
+	#endif
+#endif
+ 
+ 
+ 
+// va_list_reference
+//
+// va_list_reference is not part of the C or C++ standards.
+// It allows you to pass a va_list by reference to another
+// function instead of by value. You cannot simply use va_list&
+// as that won't work with many va_list implementations because 
+// they are implemented as arrays (which can't be passed by
+// reference to a function without decaying to a pointer).
+//
+// Example usage:
+//     void Test(va_list_reference args){
+//         printf("%d", va_arg(args, int));
+//     }
+//     void Func(char* p, ...){
+//         va_list args;
+//         va_start(args, p);
+//         Test(args); // Upon return args will be modified.
+//         va_end(args);
+//     }
+#ifndef va_list_reference
+	#if defined(EA_PLATFORM_MICROSOFT) || (EA_PLATFORM_PTR_SIZE == 4) || (defined(EA_PLATFORM_APPLE) && defined(EA_PROCESSOR_ARM64)) ||  defined(CS_UNDEFINED_STRING) || (defined(EA_PLATFORM_ANDROID) && defined(EA_PROCESSOR_ARM64))
+		// This is required for platform ABIs in which va_list is a struct or pointer.
+		#define va_list_reference va_list&
+	#else
+		// This is required for platform ABIs in which va_list is defined to be an array.
+		#define va_list_reference va_list
+	#endif
+#endif
+
+
+
+
+#endif /* Header include guard */
+
+
+
diff --git a/libkram/eastl/include/EABase/eaunits.h b/libkram/eastl/include/EABase/eaunits.h
new file mode 100644
index 00000000..22357234
--- /dev/null
+++ b/libkram/eastl/include/EABase/eaunits.h
@@ -0,0 +1,54 @@
+/*-----------------------------------------------------------------------------
+ * eaunits.h
+ *
+ * Copyright (c) Electronic Arts Inc. All rights reserved.
+ *---------------------------------------------------------------------------*/
+
+
+#ifndef INCLUDED_eaunits_h
+#define INCLUDED_eaunits_h
+
+#include <EABase/eabase.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+// Defining common SI unit macros.
+//
+// The mebibyte is a multiple of the unit byte for digital information. Technically a
+// megabyte (MB) is a power of ten, while a mebibyte (MiB) is a power of two,
+// appropriate for binary machines. Many Linux distributions use the unit, but it is
+// not widely acknowledged within the industry or media.
+// Reference: https://en.wikipedia.org/wiki/Mebibyte
+//
+// Examples:
+// 	auto size1 = EA_KILOBYTE(16);
+// 	auto size2 = EA_MEGABYTE(128);
+// 	auto size3 = EA_MEBIBYTE(8);
+// 	auto size4 = EA_GIBIBYTE(8);
+
+// define byte for completeness
+#define EA_BYTE(x) (x)
+
+// Decimal SI units
+#define EA_KILOBYTE(x) (size_t(x) * 1000)
+#define EA_MEGABYTE(x) (size_t(x) * 1000 * 1000)
+#define EA_GIGABYTE(x) (size_t(x) * 1000 * 1000 * 1000)
+#define EA_TERABYTE(x) (size_t(x) * 1000 * 1000 * 1000 * 1000)
+#define EA_PETABYTE(x) (size_t(x) * 1000 * 1000 * 1000 * 1000 * 1000)
+#define EA_EXABYTE(x)  (size_t(x) * 1000 * 1000 * 1000 * 1000 * 1000 * 1000)
+
+// Binary SI units
+#define EA_KIBIBYTE(x) (size_t(x) * 1024)
+#define EA_MEBIBYTE(x) (size_t(x) * 1024 * 1024)
+#define EA_GIBIBYTE(x) (size_t(x) * 1024 * 1024 * 1024)
+#define EA_TEBIBYTE(x) (size_t(x) * 1024 * 1024 * 1024 * 1024)
+#define EA_PEBIBYTE(x) (size_t(x) * 1024 * 1024 * 1024 * 1024 * 1024)
+#define EA_EXBIBYTE(x) (size_t(x) * 1024 * 1024 * 1024 * 1024 * 1024 * 1024)
+
+#endif // INCLUDED_earesult_H
+
+
+
+
diff --git a/libkram/eastl/include/EABase/int128.h b/libkram/eastl/include/EABase/int128.h
new file mode 100644
index 00000000..068d557a
--- /dev/null
+++ b/libkram/eastl/include/EABase/int128.h
@@ -0,0 +1,1268 @@
+/*-----------------------------------------------------------------------------
+ * eaint128_t.h
+ *
+ * Copyright (c) Electronic Arts Inc. All rights reserved.
+ *---------------------------------------------------------------------------*/
+
+
+#ifndef INCLUDED_int128_h
+#define INCLUDED_int128_h
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+// EA_INT128_INTRINSIC_AVAILABLE
+//
+#if (EA_COMPILER_INTMAX_SIZE >= 16) && (defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG))
+	// __int128_t/__uint128_t is supported
+	#define EA_INT128_INTRINSIC_AVAILABLE 1 
+#else
+	#define EA_INT128_INTRINSIC_AVAILABLE 0
+#endif
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+// EA_INT128_ALIGNAS
+//
+#if EA_INT128_INTRINSIC_AVAILABLE && !defined(EA_COMPILER_NO_ALIGNAS)
+	#define EA_INT128_ALIGNAS alignas(unsigned __int128)
+#else
+	#define EA_INT128_ALIGNAS
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+// EA_HAVE_INT128
+//
+// Indicates that EABase implements 128-bit integer types
+//
+#define EA_HAVE_INT128 1
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+// uint128_t_base
+//
+struct EA_INT128_ALIGNAS int128_t_base
+{
+	// Constructors / destructors
+	int128_t_base() = default;
+	int128_t_base(uint32_t nPart0, uint32_t nPart1, uint32_t nPart2, uint32_t nPart3);
+	int128_t_base(uint64_t nPart0, uint64_t nPart1);
+	int128_t_base(uint8_t value);
+	int128_t_base(uint16_t value);
+	int128_t_base(uint32_t value);
+	int128_t_base(uint64_t value);
+	int128_t_base(const int128_t_base& value) = default;
+
+	// Assignment operator
+	int128_t_base& operator=(const int128_t_base& value) = default;
+
+	// Explicit operators to convert back to basic types
+	EA_CONSTEXPR explicit operator bool() const;
+	EA_CONSTEXPR explicit operator char() const;
+	EA_CONSTEXPR explicit operator int() const;
+	EA_CONSTEXPR explicit operator long() const; 
+	EA_CONSTEXPR explicit operator long long() const;
+	EA_CONSTEXPR explicit operator short() const; 
+	EA_CONSTEXPR explicit operator signed char() const;
+	EA_CONSTEXPR explicit operator unsigned char() const;
+	EA_CONSTEXPR explicit operator unsigned int() const;
+	EA_CONSTEXPR explicit operator unsigned long long() const;
+	EA_CONSTEXPR explicit operator unsigned long() const;
+	EA_CONSTEXPR explicit operator unsigned short() const;
+#if EA_WCHAR_UNIQUE
+	// EA_CONSTEXPR explicit operator char16_t() const;
+	// EA_CONSTEXPR explicit operator char32_t() const;
+	// EA_CONSTEXPR explicit operator wchar_t() const;
+#endif
+	EA_CONSTEXPR explicit operator float() const;
+	EA_CONSTEXPR explicit operator double() const;
+	EA_CONSTEXPR explicit operator long double() const;
+#if EA_INT128_INTRINSIC_AVAILABLE
+	EA_CONSTEXPR explicit operator __int128() const;
+	EA_CONSTEXPR explicit operator unsigned __int128() const;
+#endif
+
+	// Math operators
+	static void OperatorPlus (const int128_t_base& value1, const int128_t_base& value2, int128_t_base& result);
+	static void OperatorMinus(const int128_t_base& value1, const int128_t_base& value2, int128_t_base& result);
+	static void OperatorMul  (const int128_t_base& value1, const int128_t_base& value2, int128_t_base& result);
+
+	// Shift operators
+	static void OperatorShiftRight(const int128_t_base& value, int nShift, int128_t_base& result);
+	static void OperatorShiftLeft (const int128_t_base& value, int nShift, int128_t_base& result);
+
+	// Unary arithmetic/logic operators
+	bool operator!() const;
+
+	// Logical operators
+	static void OperatorXOR(const int128_t_base& value1, const int128_t_base& value2, int128_t_base& result);
+	static void OperatorOR (const int128_t_base& value1, const int128_t_base& value2, int128_t_base& result);
+	static void OperatorAND(const int128_t_base& value1, const int128_t_base& value2, int128_t_base& result);
+
+	bool     IsZero() const;
+	void     SetZero();
+	void     TwosComplement();
+	void     InverseTwosComplement();
+
+	int      GetBit(int nIndex) const;
+	void     SetBit(int nIndex, int value);
+
+protected:
+	void DoubleToUint128(double value);
+
+	EA_CONSTEXPR uint64_t Low() const
+	{
+		return mPart0;
+	}
+
+	EA_CONSTEXPR uint64_t High() const
+	{
+		return mPart1;
+	}
+
+protected:
+	#ifdef EA_SYSTEM_BIG_ENDIAN
+		uint64_t mPart1;  // Most significant byte.
+		uint64_t mPart0;  // Least significant byte.
+	#else
+		uint64_t mPart0;  // Most significant byte.
+		uint64_t mPart1;  // Least significant byte.
+	#endif
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+// int128_t
+//
+// Implements signed 128 bit integer.
+//
+struct int128_t : public int128_t_base
+{
+	// Constructors / destructors
+	using int128_t_base::int128_t_base;
+
+	// Assignment operator
+	using int128_t_base::operator=;
+
+	// Unary arithmetic/logic operators
+	int128_t  operator-() const;
+	int128_t& operator++();
+	int128_t& operator--();
+	int128_t  operator++(int);
+	int128_t  operator--(int);
+	int128_t  operator~() const;
+	int128_t  operator+() const;
+
+	// Math operators
+	int128_t  operator+ (const int128_t& other);
+	int128_t  operator- (const int128_t& other);
+	int128_t  operator* (const int128_t& other);
+	int128_t  operator/ (const int128_t& other);
+	int128_t  operator% (const int128_t& other);
+	int128_t& operator+=(const int128_t& other);
+	int128_t& operator-=(const int128_t& other);
+	int128_t& operator*=(const int128_t& other);
+	int128_t& operator/=(const int128_t& other);
+	int128_t& operator%=(const int128_t& other);
+
+	// Shift operators
+	int128_t  operator>> (int nShift) const;
+	int128_t  operator<< (int nShift) const;
+	int128_t& operator>>=(int nShift);
+	int128_t& operator<<=(int nShift);
+
+	// Logical operators
+	int128_t  operator^ (const int128_t& other) const;
+	int128_t  operator| (const int128_t& other) const;
+	int128_t  operator& (const int128_t& other) const;
+	int128_t& operator^=(const int128_t& other);
+	int128_t& operator|=(const int128_t& other);
+	int128_t& operator&=(const int128_t& other);
+
+	// Equality operators
+	bool operator==(const int128_t& other) const;
+	bool operator!=(const int128_t& other) const;
+	bool operator> (const int128_t& other) const;
+	bool operator>=(const int128_t& other) const;
+	bool operator< (const int128_t& other) const;
+	bool operator<=(const int128_t& other) const;
+
+protected:
+	int compare(const int128_t& other) const;
+	void Negate();
+	void Modulus(const int128_t& divisor, int128_t& quotient, int128_t& remainder) const;
+	bool IsNegative() const;    // Returns true for value <  0
+	bool IsPositive() const;    // Returns true for value >= 0
+};
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+// uint128_t
+//
+// Implements unsigned 128 bit integer.
+//
+struct uint128_t : public int128_t_base
+{
+	// Constructors / destructors
+	using int128_t_base::int128_t_base;
+
+	// Assignment operator
+	using int128_t_base::operator=;
+
+	// Unary arithmetic/logic operators
+	uint128_t  operator-() const;
+	uint128_t& operator++();
+	uint128_t& operator--();
+	uint128_t  operator++(int);
+	uint128_t  operator--(int);
+	uint128_t  operator~() const;
+	uint128_t  operator+() const;
+
+	// Math operators
+	uint128_t  operator+ (const uint128_t& other);
+	uint128_t  operator- (const uint128_t& other);
+	uint128_t  operator* (const uint128_t& other);
+	uint128_t  operator/ (const uint128_t& other);
+	uint128_t  operator% (const uint128_t& other);
+	uint128_t& operator+=(const uint128_t& other);
+	uint128_t& operator-=(const uint128_t& other);
+	uint128_t& operator*=(const uint128_t& other);
+	uint128_t& operator/=(const uint128_t& other);
+	uint128_t& operator%=(const uint128_t& other);
+
+	// Shift operators
+	uint128_t  operator>> (int nShift) const;
+	uint128_t  operator<< (int nShift) const;
+	uint128_t& operator>>=(int nShift);
+	uint128_t& operator<<=(int nShift);
+
+	// Logical operators
+	uint128_t  operator^ (const uint128_t& other) const;
+	uint128_t  operator| (const uint128_t& other) const;
+	uint128_t  operator& (const uint128_t& other) const;
+	uint128_t& operator^=(const uint128_t& other);
+	uint128_t& operator|=(const uint128_t& other);
+	uint128_t& operator&=(const uint128_t& other);
+
+	// Equality operators
+	bool operator==(const uint128_t& other) const;
+	bool operator!=(const uint128_t& other) const;
+	bool operator> (const uint128_t& other) const;
+	bool operator>=(const uint128_t& other) const;
+	bool operator< (const uint128_t& other) const;
+	bool operator<=(const uint128_t& other) const;
+
+protected:
+	int  compare(const uint128_t& other) const;
+	void Negate();
+	void Modulus(const uint128_t& divisor, uint128_t& quotient, uint128_t& remainder) const;
+	bool IsNegative() const;    // Returns true for value <  0
+	bool IsPositive() const;    // Returns true for value >= 0
+};
+
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+// uint128_t_base implementation
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+EA_CONSTEXPR inline int128_t_base::operator bool() const               { return mPart0 || mPart1; }
+EA_CONSTEXPR inline int128_t_base::operator char() const               { return static_cast<char>(Low()); }
+#if EA_WCHAR_UNIQUE
+// EA_CONSTEXPR inline int128_t_base::operator char16_t() const           { return static_cast<char16_t>(Low()); }
+// EA_CONSTEXPR inline int128_t_base::operator char32_t() const           { return static_cast<char32_t>(Low()); }
+// EA_CONSTEXPR inline int128_t_base::operator wchar_t() const            { return static_cast<wchar_t>(Low()); }
+#endif
+EA_CONSTEXPR inline int128_t_base::operator int() const                { return static_cast<int>(Low()); }
+EA_CONSTEXPR inline int128_t_base::operator long() const               { return static_cast<long>(Low()); }
+EA_CONSTEXPR inline int128_t_base::operator long long() const          { return static_cast<long long>(Low()); }
+EA_CONSTEXPR inline int128_t_base::operator short() const              { return static_cast<short>(Low()); }
+EA_CONSTEXPR inline int128_t_base::operator signed char() const        { return static_cast<signed char>(Low()); }
+EA_CONSTEXPR inline int128_t_base::operator unsigned char() const      { return static_cast<unsigned char>(Low()); }
+EA_CONSTEXPR inline int128_t_base::operator unsigned int() const       { return static_cast<unsigned int>(Low()); }
+EA_CONSTEXPR inline int128_t_base::operator unsigned long long() const { return static_cast<unsigned long long>(Low()); }
+EA_CONSTEXPR inline int128_t_base::operator unsigned long() const	   { return static_cast<unsigned long>(Low()); }
+EA_CONSTEXPR inline int128_t_base::operator unsigned short() const     { return static_cast<unsigned short>(Low()); }
+EA_CONSTEXPR inline int128_t_base::operator float() const              { return static_cast<float>(Low()); }
+EA_CONSTEXPR inline int128_t_base::operator double() const             { return static_cast<double>(Low()); }
+EA_CONSTEXPR inline int128_t_base::operator long double() const        { return static_cast<long double>(Low()); }
+#if EA_INT128_INTRINSIC_AVAILABLE
+EA_CONSTEXPR inline int128_t_base::operator __int128() const           { return static_cast<__int128>(Low()); }
+EA_CONSTEXPR inline int128_t_base::operator unsigned __int128() const  { return static_cast<unsigned __int128>(Low()); }
+#endif
+
+inline void int128_t_base::SetBit(int nIndex, int value)
+{
+	// EA_ASSERT((nIndex >= 0) && (nIndex < 128));
+
+	const uint64_t nBitMask = ((uint64_t)1 << (nIndex % 64));
+
+	if(nIndex < 64)
+	{
+		if(value)
+			mPart0 = mPart0 |  nBitMask;
+		else
+			mPart0 = mPart0 & ~nBitMask;
+	}
+	else if(nIndex < 128)
+	{
+		if(value)
+			mPart1 = mPart1 |  nBitMask;
+		else
+			mPart1 = mPart1 & ~nBitMask;
+	}
+}
+
+inline int int128_t_base::GetBit(int nIndex) const
+{
+	// EA_ASSERT((nIndex >= 0) && (nIndex < 128));
+
+	const uint64_t nBitMask = ((uint64_t)1 << (nIndex % 64));
+
+	if(nIndex < 64)
+		return ((mPart0 & nBitMask) ? 1 : 0);
+	else if(nIndex < 128)
+		return ((mPart1 & nBitMask) ? 1 : 0);
+	return 0;
+}
+
+inline int128_t_base::int128_t_base(uint32_t nPart0, uint32_t nPart1, uint32_t nPart2, uint32_t nPart3)
+{
+	mPart1 = ((uint64_t)nPart3 << 32) + nPart2;
+	mPart0 = ((uint64_t)nPart1 << 32) + nPart0;
+}
+
+inline int128_t_base::int128_t_base(uint64_t nPart0, uint64_t nPart1)
+{
+	mPart1 = nPart1;
+	mPart0 = nPart0;
+}
+
+inline int128_t_base::int128_t_base(uint8_t value)
+{
+	mPart1 = 0;
+	mPart0 = value;
+}
+
+inline int128_t_base::int128_t_base(uint16_t value)
+{
+	mPart1 = 0;
+	mPart0 = value;
+}
+
+inline int128_t_base::int128_t_base(uint32_t value)
+{
+	mPart1 = 0;
+	mPart0 = value;
+}
+
+inline int128_t_base::int128_t_base(uint64_t value)
+{
+	mPart1 = 0;
+	mPart0 = value;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// OperatorPlus
+//
+// Returns: (value1 + value2) into result.
+// The output 'result' *is* allowed to point to the same memory as one of the inputs.
+// To consider: Fix 'defect' of this function whereby it doesn't implement overflow wraparound.
+//
+inline void int128_t_base::OperatorPlus(const int128_t_base& value1, const int128_t_base& value2, int128_t_base& result)
+{
+	uint64_t t      = value1.mPart0 + value2.mPart0;
+	uint64_t nCarry = (t < value1.mPart0) && (t < value2.mPart0);
+	result.mPart0   = t;
+	result.mPart1   = value1.mPart1 + value2.mPart1 + nCarry;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// OperatorMinus
+//
+// Returns: (value1 - value2) into result.
+// The output 'result' *is* allowed to point to the same memory as one of the inputs.
+// To consider: Fix 'defect' of this function whereby it doesn't implement overflow wraparound.
+//
+inline void int128_t_base::OperatorMinus(const int128_t_base& value1, const int128_t_base& value2, int128_t_base& result)
+{
+	uint64_t t      = (value1.mPart0 - value2.mPart0);
+	uint64_t nCarry = (value1.mPart0 < value2.mPart0) ? 1u : 0u;
+	result.mPart0   = t;
+	result.mPart1   = (value1.mPart1 - value2.mPart1) - nCarry;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// OperatorMul
+//
+// 64 bit systems:
+//    This is how it would be able to work if we could get a 128 bit result from
+//    two 64 bit values. None of the 64 bit systems that we are currently working
+//    with have C language support for multiplying two 64 bit numbers and retrieving
+//    the 128 bit result. However, many 64 bit platforms have support at the asm
+//    level for doing such a thing.
+//                                                      Part 1            Part 0
+//                                            0000000000000002  0000000000000001
+//                                         x  0000000000000002  0000000000000001
+//                                   -------------------------------------------
+//                                          | 0000000000000002  0000000000000001
+//                      +  0000000000000004 | 0000000000000002 (0000000000000000)
+//     -------------------------------------------------------------------------
+//
+inline void int128_t_base::OperatorMul(const int128_t_base& a, const int128_t_base& b, int128_t_base& result)
+{
+	// To consider: Use compiler or OS-provided custom functionality here, such as
+	//              Windows UnsignedMultiply128 and GCC's built-in int128_t.
+
+	#if defined(DISABLED_PLATFORM_WIN64)
+		// To do: Implement x86-64 asm here.
+
+	#else
+		// Else we are stuck doing something less efficient. In this case we 
+		// fall back to doing 32 bit multiplies as with 32 bit platforms.
+		result       = (a.mPart0 & 0xffffffff) *  (b.mPart0 & 0xffffffff);
+		int128_t v01 = (a.mPart0 & 0xffffffff) * ((b.mPart0 >> 32) & 0xffffffff);
+		int128_t v02 = (a.mPart0 & 0xffffffff) *  (b.mPart1 & 0xffffffff);
+		int128_t v03 = (a.mPart0 & 0xffffffff) * ((b.mPart1 >> 32) & 0xffffffff);
+
+		int128_t v10 = ((a.mPart0 >> 32) & 0xffffffff) *  (b.mPart0 & 0xffffffff);
+		int128_t v11 = ((a.mPart0 >> 32) & 0xffffffff) * ((b.mPart0 >> 32) & 0xffffffff);
+		int128_t v12 = ((a.mPart0 >> 32) & 0xffffffff) *  (b.mPart1 & 0xffffffff);
+
+		int128_t v20 = (a.mPart1 & 0xffffffff) *  (b.mPart0 & 0xffffffff);
+		int128_t v21 = (a.mPart1 & 0xffffffff) * ((b.mPart0 >> 32) & 0xffffffff);
+
+		int128_t v30 = ((a.mPart1 >> 32) & 0xffffffff) * (b.mPart0 & 0xffffffff);
+
+		// Do row addition, shifting as needed. 
+		OperatorPlus(result, v01 << 32, result);
+		OperatorPlus(result, v02 << 64, result);
+		OperatorPlus(result, v03 << 96, result);
+
+		OperatorPlus(result, v10 << 32, result);
+		OperatorPlus(result, v11 << 64, result);
+		OperatorPlus(result, v12 << 96, result);
+
+		OperatorPlus(result, v20 << 64, result);
+		OperatorPlus(result, v21 << 96, result);
+
+		OperatorPlus(result, v30 << 96, result);
+	#endif
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// OperatorShiftRight
+//
+// Returns: value >> nShift into result
+// The output 'result' may *not* be the same as one the input.
+// With rightward shifts of negative numbers, shift in zero from the left side.
+//
+inline void int128_t_base::OperatorShiftRight(const int128_t_base& value, int nShift, int128_t_base& result)
+{
+	if(nShift >= 0)
+	{
+		if(nShift < 64)
+		{   // 0 - 63
+			result.mPart1 = (value.mPart1 >> nShift);
+
+			if(nShift == 0)
+				result.mPart0 = (value.mPart0 >> nShift);
+			else
+				result.mPart0 = (value.mPart0 >> nShift) | (value.mPart1 << (64 - nShift));
+		}
+		else
+		{   // 64+
+			result.mPart1 = 0;
+			result.mPart0 = (value.mPart1 >> (nShift - 64));
+		}
+	}
+	else // (nShift < 0)
+		OperatorShiftLeft(value, -nShift, result);
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// OperatorShiftRight
+//
+// Returns: value << nShift into result
+// The output 'result' may *not* be the same as one the input.
+// With rightward shifts of negative numbers, shift in zero from the left side.
+//
+inline void int128_t_base::OperatorShiftLeft(const int128_t_base& value, int nShift, int128_t_base& result)
+{
+	if(nShift >= 0)
+	{
+		if(nShift < 64)
+		{
+			if(nShift) // We need to have a special case because CPUs convert a shift by 64 to a no-op.
+			{
+				// 1 - 63
+				result.mPart0 = (value.mPart0 << nShift);
+				result.mPart1 = (value.mPart1 << nShift) | (value.mPart0 >> (64 - nShift));
+			}
+			else
+			{
+				result.mPart0 = value.mPart0;
+				result.mPart1 = value.mPart1;
+			}
+		}
+		else
+		{   // 64+
+			result.mPart0 = 0;
+			result.mPart1 = (value.mPart0 << (nShift - 64));
+		}
+	}
+	else // (nShift < 0)
+		OperatorShiftRight(value, -nShift, result);
+}
+
+
+inline bool int128_t_base::operator!() const
+{
+	return (mPart0 == 0) && (mPart1 == 0);
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// OperatorXOR
+//
+// Returns: value1 ^ value2 into result
+// The output 'result' may be the same as one the input.
+//
+inline void int128_t_base::OperatorXOR(const int128_t_base& value1, const int128_t_base& value2, int128_t_base& result)
+{
+	result.mPart0 = (value1.mPart0 ^ value2.mPart0);
+	result.mPart1 = (value1.mPart1 ^ value2.mPart1);
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// OperatorOR
+//
+// Returns: value1 | value2 into result
+// The output 'result' may be the same as one the input.
+//
+inline void int128_t_base::OperatorOR(const int128_t_base& value1, const int128_t_base& value2, int128_t_base& result)
+{
+	result.mPart0 = (value1.mPart0 | value2.mPart0);
+	result.mPart1 = (value1.mPart1 | value2.mPart1);
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// OperatorAND
+//
+// Returns: value1 & value2 into result
+// The output 'result' may be the same as one the input.
+//
+inline void int128_t_base::OperatorAND(const int128_t_base& value1, const int128_t_base& value2, int128_t_base& result)
+{
+	result.mPart0 = (value1.mPart0 & value2.mPart0);
+	result.mPart1 = (value1.mPart1 & value2.mPart1);
+}
+
+
+inline bool int128_t_base::IsZero() const
+{
+	return (mPart0 == 0) && // Check mPart0 first as this will likely yield faster execution.
+		   (mPart1 == 0);
+}
+
+
+inline void int128_t_base::SetZero()
+{
+	mPart1 = 0;
+	mPart0 = 0;
+}
+
+
+inline void int128_t_base::TwosComplement()
+{
+	mPart1 = ~mPart1;
+	mPart0 = ~mPart0;
+
+	// What we want to do, but isn't available at this level:
+	// operator++();
+	// Alternative:
+	int128_t_base one((uint32_t)1);
+	OperatorPlus(*this, one, *this);
+}
+
+
+inline void int128_t_base::InverseTwosComplement()
+{
+	// What we want to do, but isn't available at this level:
+	// operator--();
+	// Alternative:
+	int128_t_base one((uint32_t)1);
+	OperatorMinus(*this, one, *this);
+
+	mPart1 = ~mPart1;
+	mPart0 = ~mPart0;
+}
+
+
+inline void int128_t_base::DoubleToUint128(double value)
+{
+	// Currently this function is limited to 64 bits of integer input.
+	// We need to make a better version of this function. Perhaps we should implement 
+	// it via dissecting the IEEE floating point format (sign, exponent, matissa).
+	// EA_ASSERT(fabs(value) < 18446744073709551616.0); // Assert that the input is <= 64 bits of integer.
+
+	mPart1 = 0;
+	mPart0 = (value >= 0 ? (uint64_t)value : (uint64_t)-value);
+}
+
+
+
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+// uint128_t implementation
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+
+inline uint128_t uint128_t::operator^(const uint128_t& other) const
+{
+	uint128_t temp;
+	uint128_t::OperatorXOR(*this, other, temp);
+	return temp;
+}
+
+inline uint128_t uint128_t::operator|(const uint128_t& other) const
+{
+	uint128_t temp;
+	uint128_t::OperatorOR(*this, other, temp);
+	return temp;
+}
+
+inline uint128_t uint128_t::operator&(const uint128_t& other) const
+{
+	uint128_t temp;
+	uint128_t::OperatorAND(*this, other, temp);
+	return temp;
+}
+
+inline uint128_t& uint128_t::operator^=(const uint128_t& value)
+{
+	OperatorXOR(*this, value, *this);
+	return *this;
+}
+
+inline uint128_t& uint128_t::operator|=(const uint128_t& value)
+{
+	OperatorOR(*this, value, *this);
+	return *this;
+}
+
+inline uint128_t& uint128_t::operator&=(const uint128_t& value)
+{
+	OperatorAND(*this, value, *this);
+	return *this;
+}
+
+// With rightward shifts of negative numbers, shift in zero from the left side.
+inline uint128_t uint128_t::operator>>(int nShift) const
+{
+	uint128_t temp;
+	OperatorShiftRight(*this, nShift, temp);
+	return temp;
+}
+
+// With rightward shifts of negative numbers, shift in zero from the left side.
+inline uint128_t uint128_t::operator<<(int nShift) const
+{
+	uint128_t temp;
+	OperatorShiftLeft(*this, nShift, temp);
+	return temp;
+}
+
+inline uint128_t& uint128_t::operator>>=(int nShift)
+{
+	uint128_t temp;
+	OperatorShiftRight(*this, nShift, temp);
+	*this = temp;
+	return *this;
+}
+
+inline uint128_t& uint128_t::operator<<=(int nShift)
+{
+	uint128_t temp;
+	OperatorShiftLeft(*this, nShift, temp);
+	*this = temp;
+	return *this;
+}
+
+inline uint128_t& uint128_t::operator+=(const uint128_t& value)
+{
+	OperatorPlus(*this, value, *this);
+	return *this;
+}
+
+inline uint128_t& uint128_t::operator-=(const uint128_t& value)
+{
+	OperatorMinus(*this, value, *this);
+	return *this;
+}
+
+inline uint128_t& uint128_t::operator*=(const uint128_t& value)
+{
+	*this = *this * value;
+	return *this;
+}
+
+inline uint128_t& uint128_t::operator/=(const uint128_t& value)
+{
+	*this = *this / value;
+	return *this;
+}
+
+inline uint128_t& uint128_t::operator%=(const uint128_t& value)
+{
+	*this = *this % value;
+	return *this;
+}
+
+inline uint128_t uint128_t::operator+(const uint128_t& other)
+{
+	uint128_t temp;
+	uint128_t::OperatorPlus(*this, other, temp);
+	return temp;
+}
+
+inline uint128_t uint128_t::operator-(const uint128_t& other)
+{
+	uint128_t temp;
+	uint128_t::OperatorMinus(*this, other, temp);
+	return temp;
+}
+
+inline uint128_t uint128_t::operator*(const uint128_t& other)
+{
+	uint128_t returnValue;
+	int128_t_base::OperatorMul(*this, other, returnValue);
+	return returnValue;
+}
+
+inline uint128_t uint128_t::operator/(const uint128_t& other)
+{
+	uint128_t remainder;
+	uint128_t quotient;
+	this->Modulus(other, quotient, remainder);
+	return quotient;
+}
+
+inline uint128_t uint128_t::operator%(const uint128_t& other)
+{
+	uint128_t remainder;
+	uint128_t quotient;
+	this->Modulus(other, quotient, remainder);
+	return remainder;
+}
+
+inline uint128_t uint128_t::operator+() const
+{
+	return *this;
+}
+
+inline uint128_t uint128_t::operator~() const
+{
+	return uint128_t(~mPart0, ~mPart1);
+}
+
+inline uint128_t& uint128_t::operator--()
+{
+	int128_t_base one((uint32_t)1);
+	OperatorMinus(*this, one, *this);
+	return *this;
+}
+
+inline uint128_t uint128_t::operator--(int)
+{
+	uint128_t temp((uint32_t)1);
+	OperatorMinus(*this, temp, temp);
+	return temp;
+}
+
+inline uint128_t uint128_t::operator++(int)
+{
+	uint128_t prev = *this;
+	uint128_t temp((uint32_t)1);
+	OperatorPlus(*this, temp, *this);
+	return prev;
+}
+
+inline uint128_t& uint128_t::operator++() 
+{
+	int128_t_base one((uint32_t)1);
+	OperatorPlus(*this, one, *this);
+	return *this;
+}
+
+inline void uint128_t::Negate()
+{
+	TwosComplement();
+}
+
+inline uint128_t uint128_t::operator-() const
+{
+	uint128_t returnValue(*this);
+	returnValue.Negate();
+	return returnValue;
+}
+
+// This function forms the basis of all logical comparison functions.
+// If value1 <  value2, the return value is -1.
+// If value1 == value2, the return value is 0.
+// If value1 >  value2, the return value is 1.
+inline int uint128_t::compare(const uint128_t& other) const
+{
+	// Compare individual parts. At this point, the two numbers have the same sign.
+	if(mPart1 == other.mPart1)
+	{
+		if(mPart0 == other.mPart0)
+			return 0;
+		else if(mPart0 > other.mPart0)
+			return 1;
+		// return -1; //Just fall through to the end.
+	}
+	else if(mPart1 > other.mPart1)
+		return 1;
+	return -1;
+}
+
+EA_DISABLE_VC_WARNING(4723) // warning C4723: potential divide by 0
+inline void uint128_t::Modulus(const uint128_t& divisor, uint128_t& quotient, uint128_t& remainder) const
+{
+	uint128_t tempDividend(*this);
+	uint128_t tempDivisor(divisor);
+
+	if(tempDivisor.IsZero())
+	{
+		// Force a divide by zero exception. 
+		// We know that tempDivisor.mPart0 is zero.
+		quotient.mPart0 /= tempDivisor.mPart0;
+	}
+	else if(tempDividend.IsZero())
+	{
+		quotient  = uint128_t((uint32_t)0);
+		remainder = uint128_t((uint32_t)0);
+	}
+	else
+	{
+		remainder.SetZero();
+
+		for(int i(0); i < 128; i++)
+		{
+			remainder += (uint32_t)tempDividend.GetBit(127 - i);
+			const bool bBit(remainder >= tempDivisor);
+			quotient.SetBit(127 - i, bBit);
+
+			if(bBit)
+				remainder -= tempDivisor;
+		 
+			if((i != 127) && !remainder.IsZero())
+				remainder <<= 1;
+		}
+	}
+}
+EA_RESTORE_VC_WARNING()
+
+inline bool uint128_t::operator==(const uint128_t& other) const
+{
+	return (mPart0 == other.mPart0) && // Check mPart0 first as this will likely yield faster execution.
+		   (mPart1 == other.mPart1);
+}
+
+inline bool uint128_t::operator< (const uint128_t& other) const { return (compare(other) < 0); }
+inline bool uint128_t::operator!=(const uint128_t& other) const { return !(*this == other); }
+inline bool uint128_t::operator> (const uint128_t& other) const { return other < *this; }
+inline bool uint128_t::operator>=(const uint128_t& other) const { return !(*this < other); }
+inline bool uint128_t::operator<=(const uint128_t& other) const { return !(other < *this); }
+
+inline bool uint128_t::IsNegative() const
+{   // True if value < 0
+	return false;
+}
+
+inline bool uint128_t::IsPositive() const
+{
+	// True of value >= 0
+	return true;
+}
+
+
+
+
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+// int128_t implementation
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+
+inline void int128_t::Negate()
+{
+	if (IsPositive())
+		TwosComplement();
+	else
+		InverseTwosComplement();
+}
+
+inline int128_t int128_t::operator-() const
+{
+	int128_t returnValue(*this);
+	returnValue.Negate();
+	return returnValue;
+}
+
+inline int128_t& int128_t::operator++()
+{
+	int128_t_base one((uint32_t)1);
+	OperatorPlus(*this, one, *this);
+	return *this;
+}
+
+inline int128_t& int128_t::operator--()
+{
+	int128_t_base one((uint32_t)1);
+	OperatorMinus(*this, one, *this);
+	return *this;
+}
+
+inline int128_t int128_t::operator++(int)
+{
+	int128_t prev = *this;
+	int128_t temp((uint32_t)1);
+	OperatorPlus(*this, temp, *this);
+	return prev;
+}
+
+inline int128_t int128_t::operator--(int)
+{
+	int128_t temp((uint32_t)1);
+	OperatorMinus(*this, temp, temp);
+	return temp;
+}
+
+inline int128_t int128_t::operator+() const
+{
+	return *this;
+}
+
+inline int128_t int128_t::operator~() const
+{
+	return int128_t(~mPart0, ~mPart1);
+}
+
+inline int128_t int128_t::operator+(const int128_t& other)
+{
+	int128_t temp;
+	int128_t::OperatorPlus(*this, other, temp);
+	return temp;
+}
+
+inline int128_t int128_t::operator-(const int128_t& other)
+{
+	int128_t temp;
+	int128_t::OperatorMinus(*this, other, temp);
+	return temp;
+}
+
+// This function forms the basis of all logical comparison functions.
+// If value1 <  value2, the return value is -1.
+// If value1 == value2, the return value is 0.
+// If value1 >  value2, the return value is 1.
+inline int int128_t::compare(const int128_t& other) const
+{
+	// Cache some values. Positive means >= 0. Negative means < 0 and thus means '!positive'.
+	const bool bValue1IsPositive(      IsPositive());
+	const bool bValue2IsPositive(other.IsPositive());
+
+	// Do positive/negative tests.
+	if(bValue1IsPositive != bValue2IsPositive)
+		return bValue1IsPositive ? 1 : -1;
+
+	// Compare individual parts. At this point, the two numbers have the same sign.
+	if(mPart1 == other.mPart1)
+	{
+		if(mPart0 == other.mPart0)
+			return 0;
+		else if(mPart0 > other.mPart0)
+			return 1;
+		// return -1; //Just fall through to the end.
+	}
+	else if(mPart1 > other.mPart1)
+		return 1;
+	return -1;
+}
+
+inline bool int128_t::operator==(const int128_t& other) const
+{
+	return (mPart0 == other.mPart0) && // Check mPart0 first as this will likely yield faster execution.
+		   (mPart1 == other.mPart1);
+}
+
+inline bool int128_t::operator!=(const int128_t& other) const
+{
+	return (mPart0 != other.mPart0) ||  // Check mPart0 first as this will likely yield faster execution.
+		   (mPart1 != other.mPart1);
+}
+
+inline bool int128_t::operator>(const int128_t& other) const
+{
+	return (compare(other) > 0);
+}
+
+inline bool int128_t::operator>=(const int128_t& other) const
+{
+	return (compare(other) >= 0);
+}
+
+inline bool int128_t::operator<(const int128_t& other) const
+{
+	return (compare(other) < 0);
+}
+
+inline bool int128_t::operator<=(const int128_t& other) const
+{
+	return (compare(other) <= 0);
+}
+
+inline bool int128_t::IsNegative() const
+{   // True if value < 0
+	return ((mPart1 & UINT64_C(0x8000000000000000)) != 0);
+}
+
+inline bool int128_t::IsPositive() const
+{   // True of value >= 0
+	return ((mPart1 & UINT64_C(0x8000000000000000)) == 0);
+}
+
+inline int128_t int128_t::operator*(const int128_t& other)
+{
+	int128_t a(*this);
+	int128_t b(other);
+	int128_t returnValue;
+
+	// Correctly handle negative values
+	bool bANegative(false);
+	bool bBNegative(false);
+
+	if(a.IsNegative())
+	{
+		bANegative = true;
+		a.Negate();
+	}
+
+	if(b.IsNegative())
+	{
+		bBNegative = true;
+		b.Negate();
+	}
+
+	int128_t_base::OperatorMul(a, b, returnValue);
+
+	// Do negation as needed.
+	if(bANegative != bBNegative)
+		returnValue.Negate();
+
+	return returnValue;
+}
+
+inline int128_t int128_t::operator/(const int128_t& other)
+{
+	int128_t remainder;
+	int128_t quotient;
+	this->Modulus(other, quotient, remainder);
+	return quotient;
+}
+
+inline int128_t int128_t::operator<<(int nShift) const
+{
+	int128_t temp;
+	OperatorShiftLeft(*this, nShift, temp);
+	return temp;
+}
+
+inline int128_t& int128_t::operator+=(const int128_t& value)
+{
+	OperatorPlus(*this, value, *this);
+	return *this;
+}
+
+inline int128_t& int128_t::operator-=(const int128_t& value)
+{
+	OperatorMinus(*this, value, *this);
+	return *this;
+}
+
+inline int128_t& int128_t::operator<<=(int nShift)
+{
+	int128_t temp;
+	OperatorShiftLeft(*this, nShift, temp);
+	*this = temp;
+	return *this;
+}
+
+inline int128_t& int128_t::operator*=(const int128_t& value)
+{
+	*this = *this * value;
+	return *this;
+}
+
+inline int128_t& int128_t::operator%=(const int128_t& value)
+{
+	*this = *this % value;
+	return *this;
+}
+
+inline int128_t int128_t::operator%(const int128_t& other)
+{
+	int128_t remainder;
+	int128_t quotient;
+	this->Modulus(other, quotient, remainder);
+	return remainder;
+}
+
+inline int128_t& int128_t::operator/=(const int128_t& value)
+{
+	*this = *this / value;
+	return *this;
+}
+
+// With rightward shifts of negative numbers, shift in zero from the left side.
+inline int128_t int128_t::operator>>(int nShift) const
+{
+	int128_t temp;
+	OperatorShiftRight(*this, nShift, temp);
+	return temp;
+}
+
+inline int128_t& int128_t::operator>>=(int nShift)
+{
+	int128_t temp;
+	OperatorShiftRight(*this, nShift, temp);
+	*this = temp;
+	return *this;
+}
+
+inline int128_t int128_t::operator^(const int128_t& other) const
+{
+	int128_t temp;
+	int128_t::OperatorXOR(*this, other, temp);
+	return temp;
+}
+
+inline int128_t int128_t::operator|(const int128_t& other) const
+{
+	int128_t temp;
+	int128_t::OperatorOR(*this, other, temp);
+	return temp;
+}
+
+
+inline int128_t int128_t::operator&(const int128_t& other) const
+{
+	int128_t temp;
+	int128_t::OperatorAND(*this, other, temp);
+	return temp;
+}
+
+inline int128_t& int128_t::operator^=(const int128_t& value)
+{
+	OperatorXOR(*this, value, *this);
+	return *this;
+}
+
+inline int128_t& int128_t::operator|=(const int128_t& value)
+{
+	OperatorOR(*this, value, *this);
+	return *this;
+}
+
+inline int128_t& int128_t::operator&=(const int128_t& value)
+{
+	OperatorAND(*this, value, *this);
+	return *this;
+}
+
+EA_DISABLE_VC_WARNING(4723) // warning C4723: potential divide by 0
+inline void int128_t::Modulus(const int128_t& divisor, int128_t& quotient, int128_t& remainder) const
+{
+	int128_t tempDividend(*this);
+	int128_t tempDivisor(divisor);
+
+	bool bDividendNegative = false;
+	bool bDivisorNegative = false;
+
+	if(tempDividend.IsNegative())
+	{
+		bDividendNegative = true;
+		tempDividend.Negate();
+	}
+	if(tempDivisor.IsNegative())
+	{
+		bDivisorNegative = true;
+		tempDivisor.Negate();
+	}
+
+	// Handle the special cases
+	if(tempDivisor.IsZero())
+	{
+		// Force a divide by zero exception. 
+		// We know that tempDivisor.mPart0 is zero.
+		quotient.mPart0 /= tempDivisor.mPart0;
+	}
+	else if(tempDividend.IsZero())
+	{
+		quotient  = int128_t((uint32_t)0);
+		remainder = int128_t((uint32_t)0);
+	}
+	else
+	{
+		remainder.SetZero();
+
+		for(int i(0); i < 128; i++)
+		{
+			remainder += (uint32_t)tempDividend.GetBit(127 - i);
+			const bool bBit(remainder >= tempDivisor);
+			quotient.SetBit(127 - i, bBit);
+
+			if(bBit)
+				remainder -= tempDivisor;
+		 
+			if((i != 127) && !remainder.IsZero())
+				remainder <<= 1;
+		}
+	}
+
+	if((bDividendNegative && !bDivisorNegative) || (!bDividendNegative && bDivisorNegative))
+	{
+		// Ensure the following formula applies for negative dividends
+		// dividend = divisor * quotient + remainder
+		quotient.Negate();
+	}
+}
+EA_RESTORE_VC_WARNING()
+
+
+
+
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+// INT128_C / UINT128_C
+//
+// The C99 language defines macros for portably defining constants of 
+// sized numeric types. For example, there might be:
+//     #define UINT64_C(x) x##ULL
+// Since our int128 data type is not a built-in type, we can't define a
+// UINT128_C macro as something that pastes ULLL at the end of the digits.
+// Instead we define it to create a temporary that is constructed from a 
+// string of the digits. This will work in most cases that suffix pasting
+// would work.
+//
+/* EA_CONSTEXPR */ inline uint128_t UINT128_C(uint64_t nPart1, uint64_t nPart0) { return uint128_t(nPart0, nPart1); }
+/* EA_CONSTEXPR */ inline int128_t INT128_C(int64_t nPart1, int64_t nPart0) { return int128_t(static_cast<uint64_t>(nPart0), static_cast<uint64_t>(nPart1)); }
+
+
+
+
+#endif // INCLUDED_int128_h
+
diff --git a/libkram/eastl/include/EABase/nullptr.h b/libkram/eastl/include/EABase/nullptr.h
new file mode 100644
index 00000000..d6629d50
--- /dev/null
+++ b/libkram/eastl/include/EABase/nullptr.h
@@ -0,0 +1,102 @@
+/*-----------------------------------------------------------------------------
+ * nullptr.h
+ *
+ * Copyright (c) Electronic Arts Inc. All rights reserved.
+ *---------------------------------------------------------------------------*/
+
+
+#include <EABase/eabase.h>
+#include <EABase/eahave.h>
+
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once /* Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. */
+#endif
+
+
+#if defined(EA_COMPILER_CPP11_ENABLED) && !defined(EA_COMPILER_NO_NULLPTR) && !defined(EA_HAVE_nullptr_t_IMPL)
+	// The compiler supports nullptr, but the standard library doesn't implement a declaration for std::nullptr_t. So we provide one.
+	namespace std { typedef decltype(nullptr) nullptr_t; }
+#endif
+
+
+
+#if defined(EA_COMPILER_NO_NULLPTR) // If the compiler lacks a native version... 
+
+	namespace std
+	{
+		class nullptr_t
+		{
+		public:
+			template<class T>               // When tested a pointer, acts as 0.
+			operator T*() const
+				{ return 0; }
+		 
+			template<class C, class T>      // When tested as a member pointer, acts as 0.
+			operator T C::*() const
+				{ return 0; }
+
+			typedef void* (nullptr_t::*bool_)() const;
+			operator bool_() const          // An rvalue of type std::nullptr_t can be converted to an rvalue of type bool; the resulting value is false.
+				{ return false; }           // We can't use operator bool(){ return false; } because bool is convertable to int which breaks other required functionality.
+
+			// We can't enable this without generating warnings about nullptr being uninitialized after being used when created without "= {}".
+			//void* mSizeofVoidPtr;         // sizeof(nullptr_t) == sizeof(void*). Needs to be public if nullptr_t is to be a POD.
+
+		private:
+			void operator&() const;         // Address cannot be taken.
+		};
+
+		inline nullptr_t nullptr_get()
+		{
+			nullptr_t n = { };              // std::nullptr exists.
+			return n;
+		}
+
+		#if !defined(nullptr) // If somebody hasn't already defined nullptr in a custom way...
+			#define nullptr nullptr_get()
+		#endif
+
+	} // namespace std
+
+
+	template<class T>
+	inline bool operator==(T* p, const std::nullptr_t)
+	{ return p == 0; }
+
+	template<class T>
+	inline bool operator==(const std::nullptr_t, T* p)
+	{ return p == 0; }
+
+	template<class T, class U>
+	inline bool operator==(T U::* p, const std::nullptr_t)
+	{ return p == 0; }
+
+	template<class T, class U>
+	inline bool operator==(const std::nullptr_t, T U::* p)
+	{ return p == 0; }
+
+	inline bool operator==(const std::nullptr_t, const std::nullptr_t)
+	{ return true; }
+
+	inline bool operator!=(const std::nullptr_t, const std::nullptr_t)
+	{ return false; }
+
+	inline bool operator<(const std::nullptr_t, const std::nullptr_t)
+	{ return false; }
+
+	inline bool operator>(const std::nullptr_t, const std::nullptr_t)
+	{ return false; }
+
+	inline bool operator<=(const std::nullptr_t, const std::nullptr_t)
+	{ return true; }
+
+	inline bool operator>=(const std::nullptr_t, const std::nullptr_t)
+	{ return true; }
+
+
+	using std::nullptr_t;   // exported to global namespace.
+	using std::nullptr_get; // exported to global namespace.
+
+#endif // EA_COMPILER_NO_NULLPTR
+
diff --git a/libkram/eastl/include/EABase/version.h b/libkram/eastl/include/EABase/version.h
new file mode 100644
index 00000000..b6e1b665
--- /dev/null
+++ b/libkram/eastl/include/EABase/version.h
@@ -0,0 +1,36 @@
+/*-----------------------------------------------------------------------------
+ * version.h
+ *
+ * Copyright (c) Electronic Arts Inc. All rights reserved.
+ *---------------------------------------------------------------------------*/
+
+#ifndef INCLUDED_EABASE_VERSION_H
+#define INCLUDED_EABASE_VERSION_H
+
+///////////////////////////////////////////////////////////////////////////////
+// EABASE_VERSION
+//
+// We more or less follow the conventional EA packaging approach to versioning 
+// here. A primary distinction here is that minor versions are defined as two
+// digit entities (e.g. .03") instead of minimal digit entities ".3"). The logic
+// here is that the value is a counter and not a floating point fraction.
+// Note that the major version doesn't have leading zeros.
+//
+// Example version strings:
+//      "0.91.00"   // Major version 0, minor version 91, patch version 0. 
+//      "1.00.00"   // Major version 1, minor and patch version 0.
+//      "3.10.02"   // Major version 3, minor version 10, patch version 02.
+//     "12.03.01"   // Major version 12, minor version 03, patch version 
+//
+// Example usage:
+//     printf("EABASE version: %s", EABASE_VERSION);
+//     printf("EABASE version: %d.%d.%d", EABASE_VERSION_N / 10000 % 100, EABASE_VERSION_N / 100 % 100, EABASE_VERSION_N % 100);
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EABASE_VERSION
+    #define EABASE_VERSION "2.09.12"
+    #define EABASE_VERSION_N 20912
+#endif
+
+#endif
diff --git a/libkram/eastl/include/EASTL/algorithm.h b/libkram/eastl/include/EASTL/algorithm.h
new file mode 100644
index 00000000..da35c2e2
--- /dev/null
+++ b/libkram/eastl/include/EASTL/algorithm.h
@@ -0,0 +1,4221 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// This file implements some of the primary algorithms from the C++ STL
+// algorithm library. These versions are just like that STL versions and so
+// are redundant. They are provided solely for the purpose of projects that
+// either cannot use standard C++ STL or want algorithms that have guaranteed
+// identical behaviour across platforms.
+///////////////////////////////////////////////////////////////////////////////
+
+
+///////////////////////////////////////////////////////////////////////////////
+// Definitions
+//
+// You will notice that we are very particular about the templated typenames
+// we use here. You will notice that we follow the C++ standard closely in
+// these respects. Each of these typenames have a specific meaning;
+// this is why we don't just label templated arguments with just letters
+// such as T, U, V, A, B. Here we provide a quick reference for the typenames
+// we use. See the C++ standard, section 25-8 for more details.
+//    --------------------------------------------------------------
+//    typename                     Meaning
+//    --------------------------------------------------------------
+//    T                            The value type.
+//    Compare                      A function which takes two arguments and returns the lesser of the two.
+//    Predicate                    A function which takes one argument returns true if the argument meets some criteria.
+//    BinaryPredicate              A function which takes two arguments and returns true if some criteria is met (e.g. they are equal).
+//    StrickWeakOrdering           A BinaryPredicate that compares two objects, returning true if the first precedes the second. Like Compare but has additional requirements. Used for sorting routines.
+//    Function                     A function which takes one argument and applies some operation to the target.
+//    Size                         A count or size.
+//    Generator                    A function which takes no arguments and returns a value (which will usually be assigned to an object).
+//    UnaryOperation               A function which takes one argument and returns a value (which will usually be assigned to second object).
+//    BinaryOperation              A function which takes two arguments and returns a value (which will usually be assigned to a third object).
+//    InputIterator                An input iterator (iterator you read from) which allows reading each element only once and only in a forward direction.
+//    ForwardIterator              An input iterator which is like InputIterator except it can be reset back to the beginning.
+//    BidirectionalIterator        An input iterator which is like ForwardIterator except it can be read in a backward direction as well.
+//    RandomAccessIterator         An input iterator which can be addressed like an array. It is a superset of all other input iterators.
+//    OutputIterator               An output iterator (iterator you write to) which allows writing each element only once in only in a forward direction.
+//
+// Note that with iterators that a function which takes an InputIterator will
+// also work with a ForwardIterator, BidirectionalIterator, or RandomAccessIterator.
+// The given iterator type is merely the -minimum- supported functionality the
+// iterator must support.
+///////////////////////////////////////////////////////////////////////////////
+
+
+///////////////////////////////////////////////////////////////////////////////
+// Optimizations
+//
+// There are a number of opportunities for opptimizations that we take here
+// in this library. The most obvious kinds are those that subsitute memcpy
+// in the place of a conventional loop for data types with which this is
+// possible. The algorithms here are optimized to a higher level than currently
+// available C++ STL algorithms from vendors such as Microsoft. This is especially
+// so for game programming on console devices, as we do things such as reduce
+// branching relative to other STL algorithm implementations. However, the
+// proper implementation of these algorithm optimizations is a fairly tricky
+// thing.
+//
+// The various things we look to take advantage of in order to implement
+// optimizations include:
+//    - Taking advantage of random access iterators.
+//    - Taking advantage of POD (plain old data) data types.
+//    - Taking advantage of type_traits in general.
+//    - Reducing branching and taking advantage of likely branch predictions.
+//    - Taking advantage of issues related to pointer and reference aliasing.
+//    - Improving cache coherency during memory accesses.
+//    - Making code more likely to be inlinable by the compiler.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+
+///////////////////////////////////////////////////////////////////////////////
+// Supported Algorithms
+//
+// Algorithms that we implement are listed here. Note that these items are not
+// all within this header file, as we split up the header files in order to
+// improve compilation performance. Items marked with '+' are items that are
+// extensions which don't exist in the C++ standard.
+//
+//    -------------------------------------------------------------------------------
+//      Algorithm                                   Notes
+//    -------------------------------------------------------------------------------
+//      adjacent_find
+//      adjacent_find<Compare>
+//      all_of                                      C++11
+//      any_of                                      C++11
+//      none_of                                     C++11
+//      binary_search
+//      binary_search<Compare>
+//     +binary_search_i
+//     +binary_search_i<Compare>
+//     +change_heap                                 Found in heap.h
+//     +change_heap<Compare>                        Found in heap.h
+//      clamp
+//      copy
+//      copy_if                                     C++11
+//      copy_n                                      C++11
+//      copy_backward
+//      count
+//      count_if
+//      equal
+//      equal<Compare>
+//      equal_range
+//      equal_range<Compare>
+//      fill
+//      fill_n
+//      find
+//      find_end
+//      find_end<Compare>
+//      find_first_of
+//      find_first_of<Compare>
+//     +find_first_not_of
+//     +find_first_not_of<Compare>
+//     +find_last_of
+//     +find_last_of<Compare>
+//     +find_last_not_of
+//     +find_last_not_of<Compare>
+//      find_if
+//      find_if_not
+//      for_each
+//      generate
+//      generate_n
+//     +identical
+//     +identical<Compare>
+//      iter_swap
+//      lexicographical_compare
+//      lexicographical_compare<Compare>
+//      lower_bound
+//      lower_bound<Compare>
+//      make_heap                                   Found in heap.h
+//      make_heap<Compare>                          Found in heap.h
+//      min
+//      min<Compare>
+//      max
+//      max<Compare>
+//     +min_alt                                     Exists to work around the problem of conflicts with min/max #defines on some systems.
+//     +min_alt<Compare>
+//     +max_alt
+//     +max_alt<Compare>
+//     +median
+//     +median<Compare>
+//      merge                                       Found in sort.h
+//      merge<Compare>                              Found in sort.h
+//      min_element
+//      min_element<Compare>
+//      max_element
+//      max_element<Compare>
+//      mismatch
+//      mismatch<Compare>
+//      move
+//      move_backward
+//      nth_element                                 Found in sort.h
+//      nth_element<Compare>                        Found in sort.h
+//      partial_sort                                Found in sort.h
+//      partial_sort<Compare>                       Found in sort.h
+//      push_heap                                   Found in heap.h
+//      push_heap<Compare>                          Found in heap.h
+//      pop_heap                                    Found in heap.h
+//      pop_heap<Compare>                           Found in heap.h
+//      random_shuffle<Random>
+//      remove
+//      remove_if
+//      remove_copy
+//      remove_copy_if
+//     +remove_heap                                 Found in heap.h
+//     +remove_heap<Compare>                        Found in heap.h
+//      replace
+//      replace_if
+//      replace_copy
+//      replace_copy_if
+//      reverse_copy
+//      reverse
+//      random_shuffle
+//      rotate
+//      rotate_copy
+//      search
+//      search<Compare>
+//      search_n
+//      set_difference
+//      set_difference<Compare>
+//      set_difference_2
+//      set_difference_2<Compare>
+//      set_decomposition
+//      set_decomposition<Compare>
+//      set_intersection
+//      set_intersection<Compare>
+//      set_symmetric_difference
+//      set_symmetric_difference<Compare>
+//      set_union
+//      set_union<Compare>
+//      sort                                        Found in sort.h
+//      sort<Compare>                               Found in sort.h
+//      sort_heap                                   Found in heap.h
+//      sort_heap<Compare>                          Found in heap.h
+//      stable_sort                                 Found in sort.h
+//      stable_sort<Compare>                        Found in sort.h
+//      swap
+//      swap_ranges
+//      transform
+//      transform<Operation>
+//      unique
+//      unique<Compare>
+//      upper_bound
+//      upper_bound<Compare>
+//      is_permutation
+//      is_permutation<Predicate>
+//      next_permutation
+//      next_permutation<Compare>
+//
+// Algorithms from the C++ standard that we don't implement are listed here.
+// Most of these items are absent because they aren't used very often.
+// They also happen to be the more complicated than other algorithms.
+// However, we can implement any of these functions for users that might
+// need them.
+//      includes
+//      includes<Compare>
+//      inplace_merge
+//      inplace_merge<Compare>
+//      partial_sort_copy
+//      partial_sort_copy<Compare>
+//      paritition
+//      prev_permutation
+//      prev_permutation<Compare>
+//      search_n<Compare>
+//      stable_partition
+//      unique_copy
+//      unique_copy<Compare>
+//
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ALGORITHM_H
+#define EASTL_ALGORITHM_H
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/type_traits.h>
+#include <EASTL/internal/move_help.h>
+#include <EASTL/internal/copy_help.h>
+#include <EASTL/internal/fill_help.h>
+#include <EASTL/initializer_list.h>
+#include <EASTL/iterator.h>
+#include <EASTL/functional.h>
+#include <EASTL/utility.h>
+#include <EASTL/internal/generic_iterator.h>
+#include <EASTL/random.h>
+
+EA_DISABLE_ALL_VC_WARNINGS();
+
+	#if defined(EA_COMPILER_MSVC) && (defined(EA_PROCESSOR_X86) || defined(EA_PROCESSOR_X86_64))
+		#include <intrin.h>
+	#endif
+
+	#include <stddef.h>
+	#include <string.h> // memcpy, memcmp, memmove
+
+EA_RESTORE_ALL_VC_WARNINGS();
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// min/max workaround
+//
+// MSVC++ has #defines for min/max which collide with the min/max algorithm
+// declarations. The following may still not completely resolve some kinds of
+// problems with MSVC++ #defines, though it deals with most cases in production
+// game code.
+//
+#if EASTL_NOMINMAX
+	#ifdef min
+		#undef min
+	#endif
+	#ifdef max
+		#undef max
+	#endif
+#endif
+
+
+
+
+namespace eastl
+{
+	/// min_element
+	///
+	/// min_element finds the smallest element in the range [first, last).
+	/// It returns the first iterator i in [first, last) such that no other
+	/// iterator in [first, last) points to a value smaller than *i.
+	/// The return value is last if and only if [first, last) is an empty range.
+	///
+	/// Returns: The first iterator i in the range [first, last) such that
+	/// for any iterator j in the range [first, last) the following corresponding
+	/// condition holds: !(*j < *i).
+	///
+	/// Complexity: Exactly 'max((last - first) - 1, 0)' applications of the
+	/// corresponding comparisons.
+	///
+	template <typename ForwardIterator>
+	ForwardIterator min_element(ForwardIterator first, ForwardIterator last)
+	{
+		if(first != last)
+		{
+			ForwardIterator currentMin = first;
+
+			while(++first != last)
+			{
+				if(*first < *currentMin)
+					currentMin = first;
+			}
+			return currentMin;
+		}
+		return first;
+	}
+
+
+	/// min_element
+	///
+	/// min_element finds the smallest element in the range [first, last).
+	/// It returns the first iterator i in [first, last) such that no other
+	/// iterator in [first, last) points to a value smaller than *i.
+	/// The return value is last if and only if [first, last) is an empty range.
+	///
+	/// Returns: The first iterator i in the range [first, last) such that
+	/// for any iterator j in the range [first, last) the following corresponding
+	/// conditions hold: compare(*j, *i) == false.
+	///
+	/// Complexity: Exactly 'max((last - first) - 1, 0)' applications of the
+	/// corresponding comparisons.
+	///
+	template <typename ForwardIterator, typename Compare>
+	ForwardIterator min_element(ForwardIterator first, ForwardIterator last, Compare compare)
+	{
+		if(first != last)
+		{
+			ForwardIterator currentMin = first;
+
+			while(++first != last)
+			{
+				if(compare(*first, *currentMin))
+					currentMin = first;
+			}
+			return currentMin;
+		}
+		return first;
+	}
+
+
+	/// max_element
+	///
+	/// max_element finds the largest element in the range [first, last).
+	/// It returns the first iterator i in [first, last) such that no other
+	/// iterator in [first, last) points to a value greater than *i.
+	/// The return value is last if and only if [first, last) is an empty range.
+	///
+	/// Returns: The first iterator i in the range [first, last) such that
+	/// for any iterator j in the range [first, last) the following corresponding
+	/// condition holds: !(*i < *j).
+	///
+	/// Complexity: Exactly 'max((last - first) - 1, 0)' applications of the
+	/// corresponding comparisons.
+	///
+	template <typename ForwardIterator>
+	ForwardIterator max_element(ForwardIterator first, ForwardIterator last)
+	{
+		if(first != last)
+		{
+			ForwardIterator currentMax = first;
+
+			while(++first != last)
+			{
+				if(*currentMax < *first)
+					currentMax = first;
+			}
+			return currentMax;
+		}
+		return first;
+	}
+
+
+	/// max_element
+	///
+	/// max_element finds the largest element in the range [first, last).
+	/// It returns the first iterator i in [first, last) such that no other
+	/// iterator in [first, last) points to a value greater than *i.
+	/// The return value is last if and only if [first, last) is an empty range.
+	///
+	/// Returns: The first iterator i in the range [first, last) such that
+	/// for any iterator j in the range [first, last) the following corresponding
+	/// condition holds: compare(*i, *j) == false.
+	///
+	/// Complexity: Exactly 'max((last - first) - 1, 0)' applications of the
+	/// corresponding comparisons.
+	///
+	template <typename ForwardIterator, typename Compare>
+	ForwardIterator max_element(ForwardIterator first, ForwardIterator last, Compare compare)
+	{
+		if(first != last)
+		{
+			ForwardIterator currentMax = first;
+
+			while(++first != last)
+			{
+				if(compare(*currentMax, *first))
+					currentMax = first;
+			}
+			return currentMax;
+		}
+		return first;
+	}
+
+
+	#if EASTL_MINMAX_ENABLED
+
+		/// min
+		///
+		/// Min returns the lesser of its two arguments; it returns the first
+		/// argument if neither is less than the other. The two arguments are
+		/// compared with operator <.
+		///
+		/// This min and our other min implementations are defined as returning:
+		///     b < a ? b : a
+		/// which for example may in practice result in something different than:
+		///     b <= a ? b : a
+		/// in the case where b is different from a (though they compare as equal).
+		/// We choose the specific ordering here because that's the ordering
+		/// done by other STL implementations.
+		///
+		/// Some compilers (e.g. VS20003 - VS2013) generate poor code for the case of
+		/// scalars returned by reference, so we provide a specialization for those cases.
+		/// The specialization returns T by value instead of reference, which is
+		/// not that the Standard specifies. The Standard allows you to use
+		/// an expression like &max(x, y), which would be impossible in this case.
+		/// However, we have found no actual code that uses min or max like this and
+		/// this specialization causes no problems in practice. Microsoft has acknowledged
+		/// the problem and may fix it for a future VS version.
+		///
+		template <typename T>
+		inline EA_CONSTEXPR typename eastl::enable_if<eastl::is_scalar<T>::value, T>::type
+		min(T a, T b)
+		{
+			return b < a ? b : a;
+		}
+
+		template <typename T>
+		inline EA_CONSTEXPR typename eastl::enable_if<!eastl::is_scalar<T>::value, const T&>::type
+		min(const T& a, const T& b)
+		{
+			return b < a ? b : a;
+		}
+
+		inline EA_CONSTEXPR float       min(float  a, float   b)           { return b < a ? b : a; }
+		inline EA_CONSTEXPR double      min(double a, double  b)           { return b < a ? b : a; }
+		inline EA_CONSTEXPR long double min(long double a, long double  b) { return b < a ? b : a; }
+
+	#endif // EASTL_MINMAX_ENABLED
+
+
+	/// min_alt
+	///
+	/// This is an alternative version of min that avoids any possible
+	/// collisions with Microsoft #defines of min and max.
+	///
+	/// See min(a, b) for detailed specifications.
+	///
+	template <typename T>
+	inline EA_CONSTEXPR typename eastl::enable_if<eastl::is_scalar<T>::value, T>::type
+	min_alt(T a, T b)
+	{
+		return b < a ? b : a;
+	}
+
+	template <typename T>
+	inline typename eastl::enable_if<!eastl::is_scalar<T>::value, const T&>::type
+	min_alt(const T& a, const T& b)
+	{
+		return b < a ? b : a;
+	}
+
+	inline EA_CONSTEXPR float       min_alt(float  a, float   b)           { return b < a ? b : a; }
+	inline EA_CONSTEXPR double      min_alt(double a, double  b)           { return b < a ? b : a; }
+	inline EA_CONSTEXPR long double min_alt(long double a, long double  b) { return b < a ? b : a; }
+
+
+	#if EASTL_MINMAX_ENABLED
+
+		/// min
+		///
+		/// Min returns the lesser of its two arguments; it returns the first
+		/// argument if neither is less than the other. The two arguments are
+		/// compared with the Compare function (or function object), which
+		/// takes two arguments and returns true if the first is less than
+		/// the second.
+		///
+		/// See min(a, b) for detailed specifications.
+		///
+		/// Example usage:
+		///    struct A{ int a; };
+		///    struct Struct{ bool operator()(const A& a1, const A& a2){ return a1.a < a2.a; } };
+		///
+		///    A a1, a2, a3;
+		///    a3 = min(a1, a2, Struct());
+		///
+		/// Example usage:
+		///    struct B{ int b; };
+		///    inline bool Function(const B& b1, const B& b2){ return b1.b < b2.b; }
+		///
+		///    B b1, b2, b3;
+		///    b3 = min(b1, b2, Function);
+		///
+		template <typename T, typename Compare>
+		inline const T&
+		min(const T& a, const T& b, Compare compare)
+		{
+			return compare(b, a) ? b : a;
+		}
+
+	#endif // EASTL_MINMAX_ENABLED
+
+
+	/// min_alt
+	///
+	/// This is an alternative version of min that avoids any possible
+	/// collisions with Microsoft #defines of min and max.
+	///
+	/// See min(a, b) for detailed specifications.
+	///
+	template <typename T, typename Compare>
+	inline const T&
+	min_alt(const T& a, const T& b, Compare compare)
+	{
+		return compare(b, a) ? b : a;
+	}
+
+
+	#if EASTL_MINMAX_ENABLED
+
+		/// max
+		///
+		/// Max returns the greater of its two arguments; it returns the first
+		/// argument if neither is greater than the other. The two arguments are
+		/// compared with operator < (and not operator >).
+		///
+		/// This min and our other min implementations are defined as returning:
+		///     a < b ? b : a
+		/// which for example may in practice result in something different than:
+		///     a <= b ? b : a
+		/// in the case where b is different from a (though they compare as equal).
+		/// We choose the specific ordering here because that's the ordering
+		/// done by other STL implementations.
+		///
+		template <typename T>
+		inline EA_CONSTEXPR typename eastl::enable_if<eastl::is_scalar<T>::value, T>::type
+		max(T a, T b)
+		{
+			return a < b ? b : a;
+		}
+
+		template <typename T>
+		inline EA_CONSTEXPR typename eastl::enable_if<!eastl::is_scalar<T>::value, const T&>::type
+		max(const T& a, const T& b)
+		{
+			return a < b ? b : a;
+		}
+
+		inline EA_CONSTEXPR float       max(float       a, float       b) { return a < b ? b : a; }
+		inline EA_CONSTEXPR double      max(double      a, double      b) { return a < b ? b : a; }
+		inline EA_CONSTEXPR long double max(long double a, long double b) { return a < b ? b : a; }
+
+	#endif // EASTL_MINMAX_ENABLED
+
+
+	/// max_alt
+	///
+	/// This is an alternative version of max that avoids any possible
+	/// collisions with Microsoft #defines of min and max.
+	///
+	template <typename T>
+	inline EA_CONSTEXPR typename eastl::enable_if<eastl::is_scalar<T>::value, T>::type
+	max_alt(T a, T b)
+	{
+		return a < b ? b : a;
+	}
+
+	template <typename T>
+	inline EA_CONSTEXPR typename eastl::enable_if<!eastl::is_scalar<T>::value, const T&>::type
+	max_alt(const T& a, const T& b)
+	{
+		return a < b ? b : a;
+	}
+
+	inline EA_CONSTEXPR float       max_alt(float       a, float       b) { return a < b ? b : a; }
+	inline EA_CONSTEXPR double      max_alt(double      a, double      b) { return a < b ? b : a; }
+	inline EA_CONSTEXPR long double max_alt(long double a, long double b) { return a < b ? b : a; }
+
+
+	#if EASTL_MINMAX_ENABLED
+		/// max
+		///
+		/// Min returns the lesser of its two arguments; it returns the first
+		/// argument if neither is less than the other. The two arguments are
+		/// compared with the Compare function (or function object), which
+		/// takes two arguments and returns true if the first is less than
+		/// the second.
+		///
+		template <typename T, typename Compare>
+		inline const T&
+		max(const T& a, const T& b, Compare compare)
+		{
+			return compare(a, b) ? b : a;
+		}
+	#endif
+
+
+	/// max_alt
+	///
+	/// This is an alternative version of max that avoids any possible
+	/// collisions with Microsoft #defines of min and max.
+	///
+	template <typename T, typename Compare>
+	inline const T&
+	max_alt(const T& a, const T& b, Compare compare)
+	{
+		return compare(a, b) ? b : a;
+	}
+
+
+	/// min(std::initializer_list)
+	///
+	template <typename T >
+	T min(std::initializer_list<T> ilist)
+	{
+		return *eastl::min_element(ilist.begin(), ilist.end());
+	}
+
+	/// min(std::initializer_list, Compare)
+	///
+	template <typename T, typename Compare>
+	T min(std::initializer_list<T> ilist, Compare compare)
+	{
+		return *eastl::min_element(ilist.begin(), ilist.end(), compare);
+	}
+
+
+	/// max(std::initializer_list)
+	///
+	template <typename T >
+	T max(std::initializer_list<T> ilist)
+	{
+		return *eastl::max_element(ilist.begin(), ilist.end());
+	}
+
+	/// max(std::initializer_list, Compare)
+	///
+	template <typename T, typename Compare>
+	T max(std::initializer_list<T> ilist, Compare compare)
+	{
+		return *eastl::max_element(ilist.begin(), ilist.end(), compare);
+	}
+
+
+	/// minmax_element
+	///
+	/// Returns: make_pair(first, first) if [first, last) is empty, otherwise make_pair(m, M),
+	/// where m is the first iterator in [first,last) such that no iterator in the range
+	/// refers to a smaller element, and where M is the last iterator in [first,last) such
+	/// that no iterator in the range refers to a larger element.
+	///
+	/// Complexity: At most max([(3/2)*(N - 1)], 0) applications of the corresponding predicate,
+	/// where N is distance(first, last).
+	///
+	template <typename ForwardIterator, typename Compare>
+	eastl::pair<ForwardIterator, ForwardIterator>
+	minmax_element(ForwardIterator first, ForwardIterator last, Compare compare)
+	{
+		eastl::pair<ForwardIterator, ForwardIterator> result(first, first);
+
+		if(!(first == last) && !(++first == last))
+		{
+			if(compare(*first, *result.first))
+			{
+				result.second = result.first;
+				result.first = first;
+			}
+			else
+				result.second = first;
+
+			while(++first != last)
+			{
+				ForwardIterator i = first;
+
+				if(++first == last)
+				{
+					if(compare(*i, *result.first))
+						result.first = i;
+					else if(!compare(*i, *result.second))
+						result.second = i;
+					break;
+				}
+				else
+				{
+					if(compare(*first, *i))
+					{
+						if(compare(*first, *result.first))
+							result.first = first;
+
+						if(!compare(*i, *result.second))
+							result.second = i;
+					}
+					else
+					{
+						if(compare(*i, *result.first))
+							result.first = i;
+
+						if(!compare(*first, *result.second))
+							result.second = first;
+					}
+				}
+			}
+		}
+
+		return result;
+	}
+
+
+	template <typename ForwardIterator>
+	eastl::pair<ForwardIterator, ForwardIterator>
+	minmax_element(ForwardIterator first, ForwardIterator last)
+	{
+		typedef typename eastl::iterator_traits<ForwardIterator>::value_type value_type;
+
+		return eastl::minmax_element(first, last, eastl::less<value_type>());
+	}
+
+
+
+	/// minmax
+	///
+	/// Requires: Type T shall be LessThanComparable.
+	/// Returns: pair<const T&, const T&>(b, a) if b is smaller than a, and pair<const T&, const T&>(a, b) otherwise.
+	/// Remarks: Returns pair<const T&, const T&>(a, b) when the arguments are equivalent.
+	/// Complexity: Exactly one comparison.
+	///
+
+	// The following optimization is a problem because it changes the return value in a way that would break
+	// users unless they used auto (e.g. auto result = minmax(17, 33); )
+	//
+	// template <typename T>
+	// inline EA_CONSTEXPR typename eastl::enable_if<eastl::is_scalar<T>::value, eastl::pair<T, T> >::type
+	// minmax(T a, T b)
+	// {
+	//     return (b < a) ? eastl::make_pair(b, a) : eastl::make_pair(a, b);
+	// }
+	//
+	// template <typename T>
+	// inline typename eastl::enable_if<!eastl::is_scalar<T>::value, eastl::pair<const T&, const T&> >::type
+	// minmax(const T& a, const T& b)
+	// {
+	//     return (b < a) ? eastl::make_pair(b, a) : eastl::make_pair(a, b);
+	// }
+
+	// It turns out that the following conforming definition of minmax generates a warning when used with VC++ up
+	// to at least VS2012. The VS2012 version of minmax is a broken and non-conforming definition, and we don't
+	// want to do that. We could do it for scalars alone, though we'd have to decide if we are going to do that
+	// for all compilers, because it changes the return value from a pair of references to a pair of values.
+	template <typename T>
+	inline eastl::pair<const T&, const T&>
+	minmax(const T& a, const T& b)
+	{
+		return (b < a) ? eastl::make_pair(b, a) : eastl::make_pair(a, b);
+	}
+
+
+	template <typename T, typename Compare>
+	eastl::pair<const T&, const T&>
+	minmax(const T& a, const T& b, Compare compare)
+	{
+		return compare(b, a) ? eastl::make_pair(b, a) : eastl::make_pair(a, b);
+	}
+
+
+
+	template <typename T>
+	eastl::pair<T, T>
+	minmax(std::initializer_list<T> ilist)
+	{
+		typedef typename std::initializer_list<T>::iterator iterator_type;
+		eastl::pair<iterator_type, iterator_type> iteratorPair = eastl::minmax_element(ilist.begin(), ilist.end());
+		return eastl::make_pair(*iteratorPair.first, *iteratorPair.second);
+	}
+
+	template <typename T, class Compare>
+	eastl::pair<T, T>
+	minmax(std::initializer_list<T> ilist, Compare compare)
+	{
+		typedef typename std::initializer_list<T>::iterator iterator_type;
+		eastl::pair<iterator_type, iterator_type> iteratorPair = eastl::minmax_element(ilist.begin(), ilist.end(), compare);
+		return eastl::make_pair(*iteratorPair.first, *iteratorPair.second);
+	}
+
+	template <typename T>
+	inline T&& median_impl(T&& a, T&& b, T&& c)
+	{
+		if(a < b)
+		{
+			if(b < c)
+				return eastl::forward<T>(b);
+			else if(a < c)
+				return eastl::forward<T>(c);
+			else
+				return eastl::forward<T>(a);
+		}
+		else if(a < c)
+			return eastl::forward<T>(a);
+		else if(b < c)
+			return eastl::forward<T>(c);
+		return eastl::forward<T>(b);
+	}
+
+	/// median
+	///
+	/// median finds which element of three (a, b, d) is in-between the other two.
+	/// If two or more elements are equal, the first (e.g. a before b) is chosen.
+	///
+	/// Complexity: Either two or three comparisons will be required, depending
+	/// on the values.
+	///
+	template <typename T>
+	inline const T& median(const T& a, const T& b, const T& c)
+	{
+		return median_impl(a, b, c);
+	}
+
+	/// median
+	///
+	/// median finds which element of three (a, b, d) is in-between the other two.
+	/// If two or more elements are equal, the first (e.g. a before b) is chosen.
+	///
+	/// Complexity: Either two or three comparisons will be required, depending
+	/// on the values.
+	///
+	template <typename T>
+	inline T&& median(T&& a, T&& b, T&& c)
+	{
+		return eastl::forward<T>(median_impl(eastl::forward<T>(a), eastl::forward<T>(b), eastl::forward<T>(c)));
+	}
+
+
+	template <typename T, typename Compare>
+	inline T&& median_impl(T&& a, T&& b, T&& c, Compare compare)
+	{
+		if(compare(a, b))
+		{
+			if(compare(b, c))
+				return eastl::forward<T>(b);
+			else if(compare(a, c))
+				return eastl::forward<T>(c);
+			else
+				return eastl::forward<T>(a);
+		}
+		else if(compare(a, c))
+			return eastl::forward<T>(a);
+		else if(compare(b, c))
+			return eastl::forward<T>(c);
+		return eastl::forward<T>(b);
+	}
+
+
+	/// median
+	///
+	/// median finds which element of three (a, b, d) is in-between the other two.
+	/// If two or more elements are equal, the first (e.g. a before b) is chosen.
+	///
+	/// Complexity: Either two or three comparisons will be required, depending
+	/// on the values.
+	///
+	template <typename T, typename Compare>
+	inline const T& median(const T& a, const T& b, const T& c, Compare compare)
+	{
+		return median_impl<const T&, Compare>(a, b, c, compare);
+	}
+
+	/// median
+	///
+	/// median finds which element of three (a, b, d) is in-between the other two.
+	/// If two or more elements are equal, the first (e.g. a before b) is chosen.
+	///
+	/// Complexity: Either two or three comparisons will be required, depending
+	/// on the values.
+	///
+	template <typename T, typename Compare>
+	inline T&& median(T&& a, T&& b, T&& c, Compare compare)
+	{
+		return eastl::forward<T>(median_impl<T&&, Compare>(eastl::forward<T>(a), eastl::forward<T>(b), eastl::forward<T>(c), compare));
+	}
+
+
+
+
+	/// all_of
+	///
+	/// Returns: true if the unary predicate p returns true for all elements in the range [first, last)
+	///
+	template <typename InputIterator, typename Predicate>
+	inline bool all_of(InputIterator first, InputIterator last, Predicate p)
+	{
+		for(; first != last; ++first)
+		{
+			if(!p(*first))
+				return false;
+		}
+		return true;
+	}
+
+
+	/// any_of
+	///
+	/// Returns: true if the unary predicate p returns true for any of the elements in the range [first, last)
+	///
+	template <typename InputIterator, typename Predicate>
+	inline bool any_of(InputIterator first, InputIterator last, Predicate p)
+	{
+		for(; first != last; ++first)
+		{
+			if(p(*first))
+				return true;
+		}
+		return false;
+	}
+
+
+	/// none_of
+	///
+	/// Returns: true if the unary predicate p returns true for none of the elements in the range [first, last)
+	///
+	template <typename InputIterator, typename Predicate>
+	inline bool none_of(InputIterator first, InputIterator last, Predicate p)
+	{
+		for(; first != last; ++first)
+		{
+			if(p(*first))
+				return false;
+		}
+		return true;
+	}
+
+
+	/// adjacent_find
+	///
+	/// Returns: The first iterator i such that both i and i + 1 are in the range
+	/// [first, last) for which the following corresponding conditions hold: *i == *(i + 1).
+	/// Returns last if no such iterator is found.
+	///
+	/// Complexity: Exactly 'find(first, last, value) - first' applications of the corresponding predicate.
+	///
+	template <typename ForwardIterator>
+	inline ForwardIterator
+	adjacent_find(ForwardIterator first, ForwardIterator last)
+	{
+		if(first != last)
+		{
+			ForwardIterator i = first;
+
+			for(++i; i != last; ++i)
+			{
+				if(*first == *i)
+					return first;
+				first = i;
+			}
+		}
+		return last;
+	}
+
+
+
+	/// adjacent_find
+	///
+	/// Returns: The first iterator i such that both i and i + 1 are in the range
+	/// [first, last) for which the following corresponding conditions hold: predicate(*i, *(i + 1)) != false.
+	/// Returns last if no such iterator is found.
+	///
+	/// Complexity: Exactly 'find(first, last, value) - first' applications of the corresponding predicate.
+	///
+	template <typename ForwardIterator, typename BinaryPredicate>
+	inline ForwardIterator
+	adjacent_find(ForwardIterator first, ForwardIterator last, BinaryPredicate predicate)
+	{
+		if(first != last)
+		{
+			ForwardIterator i = first;
+
+			for(++i; i != last; ++i)
+			{
+				if(predicate(*first, *i))
+					return first;
+				first = i;
+			}
+		}
+		return last;
+	}
+
+
+	/// shuffle
+	///
+	/// New for C++11
+	/// Randomizes a sequence of values via a user-supplied UniformRandomNumberGenerator.
+	/// The difference between this and the original random_shuffle function is that this uses the more
+	/// advanced and flexible UniformRandomNumberGenerator interface as opposed to the more
+	/// limited RandomNumberGenerator interface of random_shuffle.
+	///
+	/// Effects: Shuffles the elements in the range [first, last) with uniform distribution.
+	///
+	/// Complexity: Exactly '(last - first) - 1' swaps.
+	///
+	/// Example usage:
+	///     struct Rand{ eastl_size_t operator()(eastl_size_t n) { return (eastl_size_t)(rand() % n); } }; // Note: The C rand function is poor and slow.
+	///     Rand randInstance;
+	///     shuffle(pArrayBegin, pArrayEnd, randInstance);
+	///
+	// See the C++11 Standard, 26.5.1.3, Uniform random number generator requirements.
+	// Also http://en.cppreference.com/w/cpp/numeric/random/uniform_int_distribution
+
+	template <typename RandomAccessIterator, typename UniformRandomNumberGenerator>
+	void shuffle(RandomAccessIterator first, RandomAccessIterator last, UniformRandomNumberGenerator&& urng)
+	{
+		if(first != last)
+		{
+			typedef typename eastl::iterator_traits<RandomAccessIterator>::difference_type difference_type;
+			typedef typename eastl::make_unsigned<difference_type>::type                   unsigned_difference_type;
+			typedef typename eastl::uniform_int_distribution<unsigned_difference_type>     uniform_int_distribution;
+			typedef typename uniform_int_distribution::param_type                          uniform_int_distribution_param_type;
+
+			uniform_int_distribution uid;
+
+			for(RandomAccessIterator i = first + 1; i != last; ++i)
+				iter_swap(i, first + uid(urng, uniform_int_distribution_param_type(0, i - first)));
+		}
+	}
+
+
+	/// random_shuffle
+	///
+	/// Randomizes a sequence of values.
+	///
+	/// Effects: Shuffles the elements in the range [first, last) with uniform distribution.
+	///
+	/// Complexity: Exactly '(last - first) - 1' swaps.
+	///
+	/// Example usage:
+	///     eastl_size_t Rand(eastl_size_t n) { return (eastl_size_t)(rand() % n); } // Note: The C rand function is poor and slow.
+	///     pointer_to_unary_function<eastl_size_t, eastl_size_t> randInstance(Rand);
+	///     random_shuffle(pArrayBegin, pArrayEnd, randInstance);
+	///
+	/// Example usage:
+	///     struct Rand{ eastl_size_t operator()(eastl_size_t n) { return (eastl_size_t)(rand() % n); } }; // Note: The C rand function is poor and slow.
+	///     Rand randInstance;
+	///     random_shuffle(pArrayBegin, pArrayEnd, randInstance);
+	///
+	template <typename RandomAccessIterator, typename RandomNumberGenerator>
+	inline void random_shuffle(RandomAccessIterator first, RandomAccessIterator last, RandomNumberGenerator&& rng)
+	{
+		typedef typename eastl::iterator_traits<RandomAccessIterator>::difference_type difference_type;
+
+		// We must do 'rand((i - first) + 1)' here and cannot do 'rand(last - first)',
+		// as it turns out that the latter results in unequal distribution probabilities.
+		// http://www.cigital.com/papers/download/developer_gambling.php
+
+		for(RandomAccessIterator i = first + 1; i < last; ++i)
+			iter_swap(i, first + (difference_type)rng((eastl_size_t)((i - first) + 1)));
+	}
+
+
+	/// random_shuffle
+	///
+	/// Randomizes a sequence of values.
+	///
+	/// Effects: Shuffles the elements in the range [first, last) with uniform distribution.
+	///
+	/// Complexity: Exactly '(last - first) - 1' swaps.
+	///
+	/// Example usage:
+	///     random_shuffle(pArrayBegin, pArrayEnd);
+	///
+	/// *** Disabled until we decide if we want to get into the business of writing random number generators. ***
+	///
+	/// template <typename RandomAccessIterator>
+	/// inline void random_shuffle(RandomAccessIterator first, RandomAccessIterator last)
+	/// {
+	///     for(RandomAccessIterator i = first + 1; i < last; ++i)
+	///         iter_swap(i, first + SomeRangedRandomNumberGenerator((i - first) + 1));
+	/// }
+
+
+
+
+
+
+	/// move_n
+	///
+	/// Same as move(InputIterator, InputIterator, OutputIterator) except based on count instead of iterator range.
+	///
+	template <typename InputIterator, typename Size, typename OutputIterator>
+	inline OutputIterator
+	move_n_impl(InputIterator first, Size n, OutputIterator result, EASTL_ITC_NS::input_iterator_tag)
+	{
+		for(; n > 0; --n)
+			*result++ = eastl::move(*first++);
+		return result;
+	}
+
+	template <typename RandomAccessIterator, typename Size, typename OutputIterator>
+	inline OutputIterator
+	move_n_impl(RandomAccessIterator first, Size n, OutputIterator result, EASTL_ITC_NS::random_access_iterator_tag)
+	{
+		return eastl::move(first, first + n, result); // Take advantage of the optimizations present in the move algorithm.
+	}
+
+
+	template <typename InputIterator, typename Size, typename OutputIterator>
+	inline OutputIterator
+	move_n(InputIterator first, Size n, OutputIterator result)
+	{
+		typedef typename eastl::iterator_traits<InputIterator>::iterator_category IC;
+		return eastl::move_n_impl(first, n, result, IC());
+	}
+
+
+
+	/// copy_n
+	///
+	/// Same as copy(InputIterator, InputIterator, OutputIterator) except based on count instead of iterator range.
+	/// Effects: Copies exactly count values from the range beginning at first to the range beginning at result, if count > 0. Does nothing otherwise.
+	/// Returns: Iterator in the destination range, pointing past the last element copied if count>0 or first otherwise.
+	/// Complexity: Exactly count assignments, if count > 0.
+	///
+	template <typename InputIterator, typename Size, typename OutputIterator>
+	inline OutputIterator
+	copy_n_impl(InputIterator first, Size n, OutputIterator result, EASTL_ITC_NS::input_iterator_tag)
+	{
+		for(; n > 0; --n)
+			*result++ = *first++;
+		return result;
+	}
+
+	template <typename RandomAccessIterator, typename Size, typename OutputIterator>
+	inline OutputIterator
+	copy_n_impl(RandomAccessIterator first, Size n, OutputIterator result, EASTL_ITC_NS::random_access_iterator_tag)
+	{
+		return eastl::copy(first, first + n, result); // Take advantage of the optimizations present in the copy algorithm.
+	}
+
+
+	template <typename InputIterator, typename Size, typename OutputIterator>
+	inline OutputIterator
+	copy_n(InputIterator first, Size n, OutputIterator result)
+	{
+		typedef typename eastl::iterator_traits<InputIterator>::iterator_category IC;
+		return eastl::copy_n_impl(first, n, result, IC());
+	}
+
+
+	/// copy_if
+	///
+	/// Effects: Assigns to the result iterator only if the predicate is true.
+	///
+	template <typename InputIterator, typename OutputIterator, typename Predicate>
+	inline OutputIterator
+	copy_if(InputIterator first, InputIterator last, OutputIterator result, Predicate predicate)
+	{
+		// This implementation's performance could be improved by taking a more complicated approach like with the copy algorithm.
+		for(; first != last; ++first)
+		{
+			if(predicate(*first))
+				*result++ = *first;
+		}
+
+		return result;
+	}
+
+
+
+
+	// Implementation moving copying both trivial and non-trivial data via a lesser iterator than random-access.
+	template <typename /*BidirectionalIterator1Category*/, bool /*isMove*/, bool /*canMemmove*/>
+	struct move_and_copy_backward_helper
+	{
+		template <typename BidirectionalIterator1, typename BidirectionalIterator2>
+		static BidirectionalIterator2 move_or_copy_backward(BidirectionalIterator1 first, BidirectionalIterator1 last, BidirectionalIterator2 resultEnd)
+		{
+			while(first != last)
+				*--resultEnd = *--last;
+			return resultEnd; // resultEnd now points to the beginning of the destination sequence instead of the end.
+		}
+	};
+
+	// Specialization for moving non-trivial data via a lesser iterator than random-access.
+	template <typename BidirectionalIterator1Category>
+	struct move_and_copy_backward_helper<BidirectionalIterator1Category, true, false>
+	{
+		template <typename BidirectionalIterator1, typename BidirectionalIterator2>
+		static BidirectionalIterator2 move_or_copy_backward(BidirectionalIterator1 first, BidirectionalIterator1 last, BidirectionalIterator2 resultEnd)
+		{
+			while(first != last)
+				*--resultEnd = eastl::move(*--last);
+			return resultEnd; // resultEnd now points to the beginning of the destination sequence instead of the end.
+		}
+	};
+
+	// Specialization for moving non-trivial data via a random-access iterator. It's theoretically faster because the compiler can see the count when its a compile-time const.
+	template<>
+	struct move_and_copy_backward_helper<EASTL_ITC_NS::random_access_iterator_tag, true, false>
+	{
+		template<typename BidirectionalIterator1, typename BidirectionalIterator2>
+		static BidirectionalIterator2 move_or_copy_backward(BidirectionalIterator1 first, BidirectionalIterator1 last, BidirectionalIterator2 resultEnd)
+		{
+			typedef typename eastl::iterator_traits<BidirectionalIterator1>::difference_type difference_type;
+
+			for(difference_type n = (last - first); n > 0; --n)
+				*--resultEnd = eastl::move(*--last);
+			return resultEnd; // resultEnd now points to the beginning of the destination sequence instead of the end.
+		}
+	};
+
+	// Specialization for copying non-trivial data via a random-access iterator. It's theoretically faster because the compiler can see the count when its a compile-time const.
+	// This specialization converts the random access BidirectionalIterator1 last-first to an integral type. There's simple way for us to take advantage of a random access output iterator,
+	// as the range is specified by the input instead of the output, and distance(first, last) for a non-random-access iterator is potentially slow.
+	template <>
+	struct move_and_copy_backward_helper<EASTL_ITC_NS::random_access_iterator_tag, false, false>
+	{
+		template <typename BidirectionalIterator1, typename BidirectionalIterator2>
+		static BidirectionalIterator2 move_or_copy_backward(BidirectionalIterator1 first, BidirectionalIterator1 last, BidirectionalIterator2 resultEnd)
+		{
+			typedef typename eastl::iterator_traits<BidirectionalIterator1>::difference_type difference_type;
+
+			for(difference_type n = (last - first); n > 0; --n)
+				*--resultEnd = *--last;
+			return resultEnd; // resultEnd now points to the beginning of the destination sequence instead of the end.
+		}
+	};
+
+	// Specialization for when we can use memmove/memcpy. See the notes above for what conditions allow this.
+	template <bool isMove>
+	struct move_and_copy_backward_helper<EASTL_ITC_NS::random_access_iterator_tag, isMove, true>
+	{
+		template <typename T>
+		static T* move_or_copy_backward(const T* first, const T* last, T* resultEnd)
+		{
+			return (T*)memmove(resultEnd - (last - first), first, (size_t)((uintptr_t)last - (uintptr_t)first));
+			// We could use memcpy here if there's no range overlap, but memcpy is rarely much faster than memmove.
+		}
+	};
+
+	template <bool isMove, typename BidirectionalIterator1, typename BidirectionalIterator2>
+	inline BidirectionalIterator2 move_and_copy_backward_chooser(BidirectionalIterator1 first, BidirectionalIterator1 last, BidirectionalIterator2 resultEnd)
+	{
+		typedef typename eastl::iterator_traits<BidirectionalIterator1>::iterator_category IIC;
+		typedef typename eastl::iterator_traits<BidirectionalIterator2>::iterator_category OIC;
+		typedef typename eastl::iterator_traits<BidirectionalIterator1>::value_type        value_type_input;
+		typedef typename eastl::iterator_traits<BidirectionalIterator2>::value_type        value_type_output;
+
+		const bool canBeMemmoved = eastl::is_trivially_copyable<value_type_output>::value &&
+								   eastl::is_same<value_type_input, value_type_output>::value &&
+								  (eastl::is_pointer<BidirectionalIterator1>::value || eastl::is_same<IIC, eastl::contiguous_iterator_tag>::value) &&
+								  (eastl::is_pointer<BidirectionalIterator2>::value || eastl::is_same<OIC, eastl::contiguous_iterator_tag>::value);
+
+		return eastl::move_and_copy_backward_helper<IIC, isMove, canBeMemmoved>::move_or_copy_backward(first, last, resultEnd); // Need to chose based on the input iterator tag and not the output iterator tag, because containers accept input ranges of iterator types different than self.
+	}
+
+
+	// We have a second layer of unwrap_iterator calls because the original iterator might be something like move_iterator<generic_iterator<int*> > (i.e. doubly-wrapped).
+	template <bool isMove, typename BidirectionalIterator1, typename BidirectionalIterator2>
+	inline BidirectionalIterator2 move_and_copy_backward_unwrapper(BidirectionalIterator1 first, BidirectionalIterator1 last, BidirectionalIterator2 resultEnd)
+	{
+		return BidirectionalIterator2(eastl::move_and_copy_backward_chooser<isMove>(eastl::unwrap_iterator(first), eastl::unwrap_iterator(last), eastl::unwrap_iterator(resultEnd))); // Have to convert to BidirectionalIterator2 because result.base() could be a T*
+	}
+
+
+	/// move_backward
+	///
+	/// The elements are moved in reverse order (the last element is moved first), but their relative order is preserved.
+	/// After this operation the elements in the moved-from range will still contain valid values of the
+	/// appropriate type, but not necessarily the same values as before the move.
+	/// Returns the beginning of the result range.
+	/// Note: When moving between containers, the dest range must be valid; this function doesn't resize containers.
+	/// Note: If result is within [first, last), move must be used instead of move_backward.
+	///
+	/// Example usage:
+	///     eastl::move_backward(myArray.begin(), myArray.end(), myDestArray.end());
+	///
+	/// Reference implementation:
+	///     template <typename BidirectionalIterator1, typename BidirectionalIterator2>
+	///     BidirectionalIterator2 move_backward(BidirectionalIterator1 first, BidirectionalIterator1 last, BidirectionalIterator2 resultEnd)
+	///     {
+	///         while(last != first)
+	///             *--resultEnd = eastl::move(*--last);
+	///         return resultEnd;
+	///     }
+	///
+	template <typename BidirectionalIterator1, typename BidirectionalIterator2>
+	inline BidirectionalIterator2 move_backward(BidirectionalIterator1 first, BidirectionalIterator1 last, BidirectionalIterator2 resultEnd)
+	{
+		return eastl::move_and_copy_backward_unwrapper<true>(eastl::unwrap_iterator(first), eastl::unwrap_iterator(last), resultEnd);
+	}
+
+
+	/// copy_backward
+	///
+	/// copies memory in the range of [first, last) to the range *ending* with result.
+	///
+	/// Effects: Copies elements in the range [first, last) into the range
+	/// [result - (last - first), result) starting from last 1 and proceeding to first.
+	/// For each positive integer n <= (last - first), performs *(result n) = *(last - n).
+	///
+	/// Requires: result shall not be in the range [first, last).
+	///
+	/// Returns: result - (last - first). That is, returns the beginning of the result range.
+	///
+	/// Complexity: Exactly 'last - first' assignments.
+	///
+	template <typename BidirectionalIterator1, typename BidirectionalIterator2>
+	inline BidirectionalIterator2 copy_backward(BidirectionalIterator1 first, BidirectionalIterator1 last, BidirectionalIterator2 resultEnd)
+	{
+		const bool isMove = eastl::is_move_iterator<BidirectionalIterator1>::value; EA_UNUSED(isMove);
+
+		return eastl::move_and_copy_backward_unwrapper<isMove>(eastl::unwrap_iterator(first), eastl::unwrap_iterator(last), resultEnd);
+	}
+
+
+	/// count
+	///
+	/// Counts the number of items in the range of [first, last) which equal the input value.
+	///
+	/// Effects: Returns the number of iterators i in the range [first, last) for which the
+	/// following corresponding conditions hold: *i == value.
+	///
+	/// Complexity: At most 'last - first' applications of the corresponding predicate.
+	///
+	/// Note: The predicate version of count is count_if and not another variation of count.
+	/// This is because both versions would have three parameters and there could be ambiguity.
+	///
+	template <typename InputIterator, typename T>
+	inline typename eastl::iterator_traits<InputIterator>::difference_type
+	count(InputIterator first, InputIterator last, const T& value)
+	{
+		typename eastl::iterator_traits<InputIterator>::difference_type result = 0;
+
+		for(; first != last; ++first)
+		{
+			if(*first == value)
+				++result;
+		}
+		return result;
+	}
+
+
+	// C++ doesn't define a count with predicate, as it can effectively be synthesized via count_if
+	// with an appropriate predicate. However, it's often simpler to just have count with a predicate.
+	template <typename InputIterator, typename T, typename Predicate>
+	inline typename eastl::iterator_traits<InputIterator>::difference_type
+	count(InputIterator first, InputIterator last, const T& value, Predicate predicate)
+	{
+		typename eastl::iterator_traits<InputIterator>::difference_type result = 0;
+
+		for(; first != last; ++first)
+		{
+			if(predicate(*first, value))
+				++result;
+		}
+		return result;
+	}
+
+
+	/// count_if
+	///
+	/// Counts the number of items in the range of [first, last) which match
+	/// the input value as defined by the input predicate function.
+	///
+	/// Effects: Returns the number of iterators i in the range [first, last) for which the
+	/// following corresponding conditions hold: predicate(*i) != false.
+	///
+	/// Complexity: At most 'last - first' applications of the corresponding predicate.
+	///
+	/// Note: The non-predicate version of count_if is count and not another variation of count_if.
+	/// This is because both versions would have three parameters and there could be ambiguity.
+	///
+	template <typename InputIterator, typename Predicate>
+	inline typename eastl::iterator_traits<InputIterator>::difference_type
+	count_if(InputIterator first, InputIterator last, Predicate predicate)
+	{
+		typename eastl::iterator_traits<InputIterator>::difference_type result = 0;
+
+		for(; first != last; ++first)
+		{
+			if(predicate(*first))
+				++result;
+		}
+		return result;
+	}
+
+
+	/// find
+	///
+	/// finds the value within the unsorted range of [first, last).
+	///
+	/// Returns: The first iterator i in the range [first, last) for which
+	/// the following corresponding conditions hold: *i == value.
+	/// Returns last if no such iterator is found.
+	///
+	/// Complexity: At most 'last - first' applications of the corresponding predicate.
+	/// This is a linear search and not a binary one.
+	///
+	/// Note: The predicate version of find is find_if and not another variation of find.
+	/// This is because both versions would have three parameters and there could be ambiguity.
+	///
+	template <typename InputIterator, typename T>
+	inline InputIterator
+	find(InputIterator first, InputIterator last, const T& value)
+	{
+		while((first != last) && !(*first == value)) // Note that we always express value comparisons in terms of < or ==.
+			++first;
+		return first;
+	}
+
+
+	// C++ doesn't define a find with predicate, as it can effectively be synthesized via find_if
+	// with an appropriate predicate. However, it's often simpler to just have find with a predicate.
+	template <typename InputIterator, typename T, typename Predicate>
+	inline InputIterator
+	find(InputIterator first, InputIterator last, const T& value, Predicate predicate)
+	{
+		while((first != last) && !predicate(*first, value))
+			++first;
+		return first;
+	}
+
+
+
+	/// find_if
+	///
+	/// finds the value within the unsorted range of [first, last).
+	///
+	/// Returns: The first iterator i in the range [first, last) for which
+	/// the following corresponding conditions hold: pred(*i) != false.
+	/// Returns last if no such iterator is found.
+	/// If the sequence of elements to search for (i.e. first2 - last2) is empty,
+	/// the find always fails and last1 will be returned.
+	///
+	/// Complexity: At most 'last - first' applications of the corresponding predicate.
+	///
+	/// Note: The non-predicate version of find_if is find and not another variation of find_if.
+	/// This is because both versions would have three parameters and there could be ambiguity.
+	///
+	template <typename InputIterator, typename Predicate>
+	inline InputIterator
+	find_if(InputIterator first, InputIterator last, Predicate predicate)
+	{
+		while((first != last) && !predicate(*first))
+			++first;
+		return first;
+	}
+
+
+
+	/// find_if_not
+	///
+	/// find_if_not works the same as find_if except it tests for if the predicate
+	/// returns false for the elements instead of true.
+	///
+	template <typename InputIterator, typename Predicate>
+	inline InputIterator
+	find_if_not(InputIterator first, InputIterator last, Predicate predicate)
+	{
+		for(; first != last; ++first)
+		{
+			if(!predicate(*first))
+				return first;
+		}
+		return last;
+	}
+
+
+
+
+	/// find_first_of
+	///
+	/// find_first_of is similar to find in that it performs linear search through
+	/// a range of ForwardIterators. The difference is that while find searches
+	/// for one particular value, find_first_of searches for any of several values.
+	/// Specifically, find_first_of searches for the first occurrance in the
+	/// range [first1, last1) of any of the elements in [first2, last2).
+	/// This function is thus similar to the strpbrk standard C string function.
+	/// If the sequence of elements to search for (i.e. first2-last2) is empty,
+	/// the find always fails and last1 will be returned.
+	///
+	/// Effects: Finds an element that matches one of a set of values.
+	///
+	/// Returns: The first iterator i in the range [first1, last1) such that for some
+	/// integer j in the range [first2, last2) the following conditions hold: *i == *j.
+	/// Returns last1 if no such iterator is found.
+	///
+	/// Complexity: At most '(last1 - first1) * (last2 - first2)' applications of the
+	/// corresponding predicate.
+	///
+	template <typename ForwardIterator1, typename ForwardIterator2>
+	ForwardIterator1
+	find_first_of(ForwardIterator1 first1, ForwardIterator1 last1,
+				  ForwardIterator2 first2, ForwardIterator2 last2)
+	{
+		for(; first1 != last1; ++first1)
+		{
+			for(ForwardIterator2 i = first2; i != last2; ++i)
+			{
+				if(*first1 == *i)
+					return first1;
+			}
+		}
+		return last1;
+	}
+
+
+	/// find_first_of
+	///
+	/// find_first_of is similar to find in that it performs linear search through
+	/// a range of ForwardIterators. The difference is that while find searches
+	/// for one particular value, find_first_of searches for any of several values.
+	/// Specifically, find_first_of searches for the first occurrance in the
+	/// range [first1, last1) of any of the elements in [first2, last2).
+	/// This function is thus similar to the strpbrk standard C string function.
+	///
+	/// Effects: Finds an element that matches one of a set of values.
+	///
+	/// Returns: The first iterator i in the range [first1, last1) such that for some
+	/// integer j in the range [first2, last2) the following conditions hold: pred(*i, *j) != false.
+	/// Returns last1 if no such iterator is found.
+	///
+	/// Complexity: At most '(last1 - first1) * (last2 - first2)' applications of the
+	/// corresponding predicate.
+	///
+	template <typename ForwardIterator1, typename ForwardIterator2, typename BinaryPredicate>
+	ForwardIterator1
+	find_first_of(ForwardIterator1 first1, ForwardIterator1 last1,
+				  ForwardIterator2 first2, ForwardIterator2 last2,
+				  BinaryPredicate predicate)
+	{
+		for(; first1 != last1; ++first1)
+		{
+			for(ForwardIterator2 i = first2; i != last2; ++i)
+			{
+				if(predicate(*first1, *i))
+					return first1;
+			}
+		}
+		return last1;
+	}
+
+
+	/// find_first_not_of
+	///
+	/// Searches through first range for the first element that does not belong the second input range.
+	/// This is very much like the C++ string find_first_not_of function.
+	///
+	/// Returns: The first iterator i in the range [first1, last1) such that for some
+	/// integer j in the range [first2, last2) the following conditions hold: !(*i == *j).
+	/// Returns last1 if no such iterator is found.
+	///
+	/// Complexity: At most '(last1 - first1) * (last2 - first2)' applications of the
+	/// corresponding predicate.
+	///
+	template <class ForwardIterator1, class ForwardIterator2>
+	ForwardIterator1
+	find_first_not_of(ForwardIterator1 first1, ForwardIterator1 last1,
+					  ForwardIterator2 first2, ForwardIterator2 last2)
+	{
+		for(; first1 != last1; ++first1)
+		{
+			if(eastl::find(first2, last2, *first1) == last2)
+				break;
+		}
+
+		return first1;
+	}
+
+
+
+	/// find_first_not_of
+	///
+	/// Searches through first range for the first element that does not belong the second input range.
+	/// This is very much like the C++ string find_first_not_of function.
+	///
+	/// Returns: The first iterator i in the range [first1, last1) such that for some
+	/// integer j in the range [first2, last2) the following conditions hold: pred(*i, *j) == false.
+	/// Returns last1 if no such iterator is found.
+	///
+	/// Complexity: At most '(last1 - first1) * (last2 - first2)' applications of the
+	/// corresponding predicate.
+	///
+	template <class ForwardIterator1, class ForwardIterator2, class BinaryPredicate>
+	inline ForwardIterator1
+	find_first_not_of(ForwardIterator1 first1, ForwardIterator1 last1,
+					  ForwardIterator2 first2, ForwardIterator2 last2,
+					  BinaryPredicate predicate)
+	{
+		typedef typename eastl::iterator_traits<ForwardIterator1>::value_type value_type;
+
+		for(; first1 != last1; ++first1)
+		{
+			if(eastl::find_if(first2, last2, eastl::bind1st<BinaryPredicate, value_type>(predicate, *first1)) == last2)
+				break;
+		}
+
+		return first1;
+	}
+
+
+	template <class BidirectionalIterator1, class ForwardIterator2>
+	inline BidirectionalIterator1
+	find_last_of(BidirectionalIterator1 first1, BidirectionalIterator1 last1,
+				 ForwardIterator2 first2, ForwardIterator2 last2)
+	{
+		if((first1 != last1) && (first2 != last2))
+		{
+			BidirectionalIterator1 it1(last1);
+
+			while((--it1 != first1) && (eastl::find(first2, last2, *it1) == last2))
+				; // Do nothing
+
+			if((it1 != first1) || (eastl::find(first2, last2, *it1) != last2))
+				return it1;
+		}
+
+		return last1;
+	}
+
+
+	template <class BidirectionalIterator1, class ForwardIterator2, class BinaryPredicate>
+	BidirectionalIterator1
+	find_last_of(BidirectionalIterator1 first1, BidirectionalIterator1 last1,
+				 ForwardIterator2 first2, ForwardIterator2 last2,
+				 BinaryPredicate predicate)
+	{
+		typedef typename eastl::iterator_traits<BidirectionalIterator1>::value_type value_type;
+
+		if((first1 != last1) && (first2 != last2))
+		{
+			BidirectionalIterator1 it1(last1);
+
+			while((--it1 != first1) && (eastl::find_if(first2, last2, eastl::bind1st<BinaryPredicate, value_type>(predicate, *it1)) == last2))
+				; // Do nothing
+
+			if((it1 != first1) || (eastl::find_if(first2, last2, eastl::bind1st<BinaryPredicate, value_type>(predicate, *it1)) != last2))
+				return it1;
+		}
+
+		return last1;
+	}
+
+
+	template <class BidirectionalIterator1, class ForwardIterator2>
+	inline BidirectionalIterator1
+	find_last_not_of(BidirectionalIterator1 first1, BidirectionalIterator1 last1,
+					 ForwardIterator2 first2, ForwardIterator2 last2)
+	{
+		if((first1 != last1) && (first2 != last2))
+		{
+			BidirectionalIterator1 it1(last1);
+
+			while((--it1 != first1) && (eastl::find(first2, last2, *it1) != last2))
+				; // Do nothing
+
+			if((it1 != first1) || (eastl::find( first2, last2, *it1) == last2))
+				return it1;
+		}
+
+		return last1;
+	}
+
+
+	template <class BidirectionalIterator1, class ForwardIterator2, class BinaryPredicate>
+	inline BidirectionalIterator1
+	find_last_not_of(BidirectionalIterator1 first1, BidirectionalIterator1 last1,
+					 ForwardIterator2 first2, ForwardIterator2 last2,
+					 BinaryPredicate predicate)
+	{
+		typedef typename eastl::iterator_traits<BidirectionalIterator1>::value_type value_type;
+
+		if((first1 != last1) && (first2 != last2))
+		{
+			BidirectionalIterator1 it1(last1);
+
+			while((--it1 != first1) && (eastl::find_if(first2, last2, eastl::bind1st<BinaryPredicate, value_type>(predicate, *it1)) != last2))
+				; // Do nothing
+
+			if((it1 != first1) || (eastl::find_if(first2, last2, eastl::bind1st<BinaryPredicate, value_type>(predicate, *it1))) != last2)
+				return it1;
+		}
+
+		return last1;
+	}
+
+
+
+
+	/// for_each
+	///
+	/// Calls the Function function for each value in the range [first, last).
+	/// Function takes a single parameter: the current value.
+	///
+	/// Effects: Applies function to the result of dereferencing every iterator in
+	/// the range [first, last), starting from first and proceeding to last 1.
+	///
+	/// Returns: function.
+	///
+	/// Complexity: Applies function exactly 'last - first' times.
+	///
+	/// Note: If function returns a result, the result is ignored.
+	///
+	template <typename InputIterator, typename Function>
+	inline Function
+	for_each(InputIterator first, InputIterator last, Function function)
+	{
+		for(; first != last; ++first)
+			function(*first);
+		return function;
+	}
+
+	/// for_each_n
+	///
+	/// Calls the Function function for each value in the range [first, first + n).
+	/// Function takes a single parameter: the current value.
+	///
+	/// Effects: Applies function to the result of dereferencing every iterator in
+	/// the range [first, first + n), starting from first and proceeding to last 1.
+	///
+	/// Returns: first + n.
+	///
+	/// Complexity: Applies function exactly 'first + n' times.
+	///
+	/// Note:
+	////  * If function returns a result, the result is ignored.
+	////  * If n < 0, behaviour is undefined.
+	///
+	template <typename InputIterator, typename Size, typename Function>
+	EA_CPP14_CONSTEXPR inline InputIterator
+	for_each_n(InputIterator first, Size n, Function function)
+	{
+		for (Size i = 0; i < n; ++first, i++)
+			function(*first);
+		return first;
+	}
+
+
+	/// generate
+	///
+	/// Iterates the range of [first, last) and assigns to each element the
+	/// result of the function generator. Generator is a function which takes
+	/// no arguments.
+	///
+	/// Complexity: Exactly 'last - first' invocations of generator and assignments.
+	///
+	template <typename ForwardIterator, typename Generator>
+	inline void
+	generate(ForwardIterator first, ForwardIterator last, Generator generator)
+	{
+		for(; first != last; ++first) // We cannot call generate_n(first, last-first, generator)
+			*first = generator();     // because the 'last-first' might not be supported by the
+	}                                 // given iterator.
+
+
+	/// generate_n
+	///
+	/// Iterates an interator n times and assigns the result of generator
+	/// to each succeeding element. Generator is a function which takes
+	/// no arguments.
+	///
+	/// Complexity: Exactly n invocations of generator and assignments.
+	///
+	template <typename OutputIterator, typename Size, typename Generator>
+	inline OutputIterator
+	generate_n(OutputIterator first, Size n, Generator generator)
+	{
+		for(; n > 0; --n, ++first)
+			*first = generator();
+		return first;
+	}
+
+
+	/// transform
+	///
+	/// Iterates the input range of [first, last) and the output iterator result
+	/// and assigns the result of unaryOperation(input) to result.
+	///
+	/// Effects: Assigns through every iterator i in the range [result, result + (last1 - first1))
+	/// a new corresponding value equal to unaryOperation(*(first1 + (i - result)).
+	///
+	/// Requires: op shall not have any side effects.
+	///
+	/// Returns: result + (last1 - first1). That is, returns the end of the output range.
+	///
+	/// Complexity: Exactly 'last1 - first1' applications of unaryOperation.
+	///
+	/// Note: result may be equal to first.
+	///
+	template <typename InputIterator, typename OutputIterator, typename UnaryOperation>
+	inline OutputIterator
+	transform(InputIterator first, InputIterator last, OutputIterator result, UnaryOperation unaryOperation)
+	{
+		for(; first != last; ++first, ++result)
+			*result = unaryOperation(*first);
+		return result;
+	}
+
+
+	/// transform
+	///
+	/// Iterates the input range of [first, last) and the output iterator result
+	/// and assigns the result of binaryOperation(input1, input2) to result.
+	///
+	/// Effects: Assigns through every iterator i in the range [result, result + (last1 - first1))
+	/// a new corresponding value equal to binaryOperation(*(first1 + (i - result), *(first2 + (i - result))).
+	///
+	/// Requires: binaryOperation shall not have any side effects.
+	///
+	/// Returns: result + (last1 - first1). That is, returns the end of the output range.
+	///
+	/// Complexity: Exactly 'last1 - first1' applications of binaryOperation.
+	///
+	/// Note: result may be equal to first1 or first2.
+	///
+	template <typename InputIterator1, typename InputIterator2, typename OutputIterator, typename BinaryOperation>
+	inline OutputIterator
+	transform(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, OutputIterator result, BinaryOperation binaryOperation)
+	{
+		for(; first1 != last1; ++first1, ++first2, ++result)
+			*result = binaryOperation(*first1, *first2);
+		return result;
+	}
+
+
+	/// equal
+	///
+	/// Returns: true if for every iterator i in the range [first1, last1) the
+	/// following corresponding conditions hold: predicate(*i, *(first2 + (i - first1))) != false.
+	/// Otherwise, returns false.
+	///
+	/// Complexity: At most last1 first1 applications of the corresponding predicate.
+	///
+	/// To consider: Make specializations of this for scalar types and random access
+	/// iterators that uses memcmp or some trick memory comparison function.
+	/// We should verify that such a thing results in an improvement.
+	///
+	template <typename InputIterator1, typename InputIterator2>
+	EA_CPP14_CONSTEXPR inline bool equal(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2)
+	{
+		for(; first1 != last1; ++first1, ++first2)
+		{
+			if(!(*first1 == *first2)) // Note that we always express value comparisons in terms of < or ==.
+				return false;
+		}
+		return true;
+	}
+
+	/* Enable the following if there was shown to be some benefit. A glance and Microsoft VC++ memcmp
+		shows that it is not optimized in any way, much less one that would benefit us here.
+
+	inline bool equal(const bool* first1, const bool* last1, const bool* first2)
+		{ return (memcmp(first1, first2, (size_t)((uintptr_t)last1 - (uintptr_t)first1)) == 0); }
+
+	inline bool equal(const char* first1, const char* last1, const char* first2)
+		{ return (memcmp(first1, first2, (size_t)((uintptr_t)last1 - (uintptr_t)first1)) == 0); }
+
+	inline bool equal(const unsigned char* first1, const unsigned char* last1, const unsigned char* first2)
+		{ return (memcmp(first1, first2, (size_t)((uintptr_t)last1 - (uintptr_t)first1)) == 0); }
+
+	inline bool equal(const signed char* first1, const signed char* last1, const signed char* first2)
+		{ return (memcmp(first1, first2, (size_t)((uintptr_t)last1 - (uintptr_t)first1)) == 0); }
+
+	#ifndef EA_WCHAR_T_NON_NATIVE
+		inline bool equal(const wchar_t* first1, const wchar_t* last1, const wchar_t* first2)
+			{ return (memcmp(first1, first2, (size_t)((uintptr_t)last1 - (uintptr_t)first1)) == 0); }
+	#endif
+
+	inline bool equal(const int16_t* first1, const int16_t* last1, const int16_t* first2)
+		{ return (memcmp(first1, first2, (size_t)((uintptr_t)last1 - (uintptr_t)first1)) == 0); }
+
+	inline bool equal(const uint16_t* first1, const uint16_t* last1, const uint16_t* first2)
+		{ return (memcmp(first1, first2, (size_t)((uintptr_t)last1 - (uintptr_t)first1)) == 0); }
+
+	inline bool equal(const int32_t* first1, const int32_t* last1, const int32_t* first2)
+		{ return (memcmp(first1, first2, (size_t)((uintptr_t)last1 - (uintptr_t)first1)) == 0); }
+
+	inline bool equal(const uint32_t* first1, const uint32_t* last1, const uint32_t* first2)
+		{ return (memcmp(first1, first2, (size_t)((uintptr_t)last1 - (uintptr_t)first1)) == 0); }
+
+	inline bool equal(const int64_t* first1, const int64_t* last1, const int64_t* first2)
+		{ return (memcmp(first1, first2, (size_t)((uintptr_t)last1 - (uintptr_t)first1)) == 0); }
+
+	inline bool equal(const uint64_t* first1, const uint64_t* last1, const uint64_t* first2)
+		{ return (memcmp(first1, first2, (size_t)((uintptr_t)last1 - (uintptr_t)first1)) == 0); }
+	*/
+
+
+
+	/// equal
+	///
+	/// Returns: true if for every iterator i in the range [first1, last1) the
+	/// following corresponding conditions hold: pred(*i, *(first2 + (i first1))) != false.
+	/// Otherwise, returns false.
+	///
+	/// Complexity: At most last1 first1 applications of the corresponding predicate.
+	///
+	template <typename InputIterator1, typename InputIterator2, typename BinaryPredicate>
+	inline bool
+	equal(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, BinaryPredicate predicate)
+	{
+		for(; first1 != last1; ++first1, ++first2)
+		{
+			if(!predicate(*first1, *first2))
+				return false;
+		}
+		return true;
+	}
+
+
+
+	/// identical
+	///
+	/// Returns true if the two input ranges are equivalent.
+	/// There is a subtle difference between this algorithm and
+	/// the 'equal' algorithm. The equal algorithm assumes the
+	/// two ranges are of equal length. This algorithm efficiently
+	/// compares two ranges for both length equality and for
+	/// element equality. There is no other standard algorithm
+	/// that can do this.
+	///
+	/// Returns: true if the sequence of elements defined by the range
+	/// [first1, last1) is of the same length as the sequence of
+	/// elements defined by the range of [first2, last2) and if
+	/// the elements in these ranges are equal as per the
+	/// equal algorithm.
+	///
+	/// Complexity: At most 'min((last1 - first1), (last2 - first2))' applications
+	/// of the corresponding comparison.
+	///
+	template <typename InputIterator1, typename InputIterator2>
+	bool identical(InputIterator1 first1, InputIterator1 last1,
+				   InputIterator2 first2, InputIterator2 last2)
+	{
+		while((first1 != last1) && (first2 != last2) && (*first1 == *first2))
+		{
+			++first1;
+			++first2;
+		}
+		return (first1 == last1) && (first2 == last2);
+	}
+
+
+	/// identical
+	///
+	template <typename InputIterator1, typename InputIterator2, typename BinaryPredicate>
+	bool identical(InputIterator1 first1, InputIterator1 last1,
+				   InputIterator2 first2, InputIterator2 last2, BinaryPredicate predicate)
+	{
+		while((first1 != last1) && (first2 != last2) && predicate(*first1, *first2))
+		{
+			++first1;
+			++first2;
+		}
+		return (first1 == last1) && (first2 == last2);
+	}
+
+
+
+	/// lexicographical_compare
+	///
+	/// Returns: true if the sequence of elements defined by the range
+	/// [first1, last1) is lexicographically less than the sequence of
+	/// elements defined by the range [first2, last2). Returns false otherwise.
+	///
+	/// Complexity: At most 'min((last1 - first1), (last2 - first2))' applications
+	/// of the corresponding comparison.
+	///
+	/// Note: If two sequences have the same number of elements and their
+	/// corresponding elements are equivalent, then neither sequence is
+	/// lexicographically less than the other. If one sequence is a prefix
+	/// of the other, then the shorter sequence is lexicographically less
+	/// than the longer sequence. Otherwise, the lexicographical comparison
+	/// of the sequences yields the same result as the comparison of the first
+	/// corresponding pair of elements that are not equivalent.
+	///
+	template <typename InputIterator1, typename InputIterator2>
+	inline bool
+	lexicographical_compare(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2)
+	{
+		for(; (first1 != last1) && (first2 != last2); ++first1, ++first2)
+		{
+			if(*first1 < *first2)
+				return true;
+			if(*first2 < *first1)
+				return false;
+		}
+		return (first1 == last1) && (first2 != last2);
+	}
+
+	inline bool     // Specialization for const char*.
+	lexicographical_compare(const char* first1, const char* last1, const char* first2, const char* last2)
+	{
+		const ptrdiff_t n1(last1 - first1), n2(last2 - first2);
+		const int result = memcmp(first1, first2, (size_t)eastl::min_alt(n1, n2));
+		return result ? (result < 0) : (n1 < n2);
+	}
+
+	inline bool     // Specialization for char*.
+	lexicographical_compare(char* first1, char* last1, char* first2, char* last2)
+	{
+		const ptrdiff_t n1(last1 - first1), n2(last2 - first2);
+		const int result = memcmp(first1, first2, (size_t)eastl::min_alt(n1, n2));
+		return result ? (result < 0) : (n1 < n2);
+	}
+
+	inline bool     // Specialization for const unsigned char*.
+	lexicographical_compare(const unsigned char* first1, const unsigned char* last1, const unsigned char* first2, const unsigned char* last2)
+	{
+		const ptrdiff_t n1(last1 - first1), n2(last2 - first2);
+		const int result = memcmp(first1, first2, (size_t)eastl::min_alt(n1, n2));
+		return result ? (result < 0) : (n1 < n2);
+	}
+
+	inline bool     // Specialization for unsigned char*.
+	lexicographical_compare(unsigned char* first1, unsigned char* last1, unsigned char* first2, unsigned char* last2)
+	{
+		const ptrdiff_t n1(last1 - first1), n2(last2 - first2);
+		const int result = memcmp(first1, first2, (size_t)eastl::min_alt(n1, n2));
+		return result ? (result < 0) : (n1 < n2);
+	}
+
+	inline bool     // Specialization for const signed char*.
+	lexicographical_compare(const signed char* first1, const signed char* last1, const signed char* first2, const signed char* last2)
+	{
+		const ptrdiff_t n1(last1 - first1), n2(last2 - first2);
+		const int result = memcmp(first1, first2, (size_t)eastl::min_alt(n1, n2));
+		return result ? (result < 0) : (n1 < n2);
+	}
+
+	inline bool     // Specialization for signed char*.
+	lexicographical_compare(signed char* first1, signed char* last1, signed char* first2, signed char* last2)
+	{
+		const ptrdiff_t n1(last1 - first1), n2(last2 - first2);
+		const int result = memcmp(first1, first2, (size_t)eastl::min_alt(n1, n2));
+		return result ? (result < 0) : (n1 < n2);
+	}
+
+	#if defined(_MSC_VER) // If using the VC++ compiler (and thus bool is known to be a single byte)...
+		//Not sure if this is a good idea.
+		//inline bool     // Specialization for const bool*.
+		//lexicographical_compare(const bool* first1, const bool* last1, const bool* first2, const bool* last2)
+		//{
+		//    const ptrdiff_t n1(last1 - first1), n2(last2 - first2);
+		//    const int result = memcmp(first1, first2, (size_t)eastl::min_alt(n1, n2));
+		//    return result ? (result < 0) : (n1 < n2);
+		//}
+		//
+		//inline bool     // Specialization for bool*.
+		//lexicographical_compare(bool* first1, bool* last1, bool* first2, bool* last2)
+		//{
+		//    const ptrdiff_t n1(last1 - first1), n2(last2 - first2);
+		//    const int result = memcmp(first1, first2, (size_t)eastl::min_alt(n1, n2));
+		//    return result ? (result < 0) : (n1 < n2);
+		//}
+	#endif
+
+
+
+	/// lexicographical_compare
+	///
+	/// Returns: true if the sequence of elements defined by the range
+	/// [first1, last1) is lexicographically less than the sequence of
+	/// elements defined by the range [first2, last2). Returns false otherwise.
+	///
+	/// Complexity: At most 'min((last1 -first1), (last2 - first2))' applications
+	/// of the corresponding comparison.
+	///
+	/// Note: If two sequences have the same number of elements and their
+	/// corresponding elements are equivalent, then neither sequence is
+	/// lexicographically less than the other. If one sequence is a prefix
+	/// of the other, then the shorter sequence is lexicographically less
+	/// than the longer sequence. Otherwise, the lexicographical comparison
+	/// of the sequences yields the same result as the comparison of the first
+	/// corresponding pair of elements that are not equivalent.
+	///
+	/// Note: False is always returned if range 1 is exhausted before range 2.
+	/// The result of this is that you can't do a successful reverse compare
+	/// (e.g. use greater<> as the comparison instead of less<>) unless the
+	/// two sequences are of identical length. What you want to do is reverse
+	/// the order of the arguments in order to get the desired effect.
+	///
+	template <typename InputIterator1, typename InputIterator2, typename Compare>
+	inline bool
+	lexicographical_compare(InputIterator1 first1, InputIterator1 last1,
+							InputIterator2 first2, InputIterator2 last2, Compare compare)
+	{
+		for(; (first1 != last1) && (first2 != last2); ++first1, ++first2)
+		{
+			if(compare(*first1, *first2))
+				return true;
+			if(compare(*first2, *first1))
+				return false;
+		}
+		return (first1 == last1) && (first2 != last2);
+	}
+
+
+	/// mismatch
+	///
+	/// Finds the first position where the two ranges [first1, last1) and
+	/// [first2, first2 + (last1 - first1)) differ. The two versions of
+	/// mismatch use different tests for whether elements differ.
+	///
+	/// Returns: A pair of iterators i and j such that j == first2 + (i - first1)
+	/// and i is the first iterator in the range [first1, last1) for which the
+	/// following corresponding condition holds: !(*i == *(first2 + (i - first1))).
+	/// Returns the pair last1 and first2 + (last1 - first1) if such an iterator
+	/// i is not found.
+	///
+	/// Complexity: At most last1 first1 applications of the corresponding predicate.
+	///
+	template <class InputIterator1, class InputIterator2>
+	inline eastl::pair<InputIterator1, InputIterator2>
+	mismatch(InputIterator1 first1, InputIterator1 last1,
+			 InputIterator2 first2) // , InputIterator2 last2)
+	{
+		while((first1 != last1) && (*first1 == *first2)) // && (first2 != last2) <- C++ standard mismatch function doesn't check first2/last2.
+		{
+			++first1;
+			++first2;
+		}
+
+		return eastl::pair<InputIterator1, InputIterator2>(first1, first2);
+	}
+
+
+	/// mismatch
+	///
+	/// Finds the first position where the two ranges [first1, last1) and
+	/// [first2, first2 + (last1 - first1)) differ. The two versions of
+	/// mismatch use different tests for whether elements differ.
+	///
+	/// Returns: A pair of iterators i and j such that j == first2 + (i - first1)
+	/// and i is the first iterator in the range [first1, last1) for which the
+	/// following corresponding condition holds: pred(*i, *(first2 + (i - first1))) == false.
+	/// Returns the pair last1 and first2 + (last1 - first1) if such an iterator
+	/// i is not found.
+	///
+	/// Complexity: At most last1 first1 applications of the corresponding predicate.
+	///
+	template <class InputIterator1, class InputIterator2, class BinaryPredicate>
+	inline eastl::pair<InputIterator1, InputIterator2>
+	mismatch(InputIterator1 first1, InputIterator1 last1,
+			 InputIterator2 first2, // InputIterator2 last2,
+			 BinaryPredicate predicate)
+	{
+		while((first1 != last1) && predicate(*first1, *first2)) // && (first2 != last2) <- C++ standard mismatch function doesn't check first2/last2.
+		{
+			++first1;
+			++first2;
+		}
+
+		return eastl::pair<InputIterator1, InputIterator2>(first1, first2);
+	}
+
+
+	/// lower_bound
+	///
+	/// Finds the position of the first element in a sorted range that has a value
+	/// greater than or equivalent to a specified value.
+	///
+	/// Effects: Finds the first position into which value can be inserted without
+	/// violating the ordering.
+	///
+	/// Returns: The furthermost iterator i in the range [first, last) such that
+	/// for any iterator j in the range [first, i) the following corresponding
+	/// condition holds: *j < value.
+	///
+	/// Complexity: At most 'log(last - first) + 1' comparisons.
+	///
+	/// Optimizations: We have no need to specialize this implementation for random
+	/// access iterators (e.g. contiguous array), as the code below will already
+	/// take advantage of them.
+	///
+	template <typename ForwardIterator, typename T>
+	ForwardIterator
+	lower_bound(ForwardIterator first, ForwardIterator last, const T& value)
+	{
+		typedef typename eastl::iterator_traits<ForwardIterator>::difference_type DifferenceType;
+
+		DifferenceType d = eastl::distance(first, last); // This will be efficient for a random access iterator such as an array.
+
+		while(d > 0)
+		{
+			ForwardIterator i  = first;
+			DifferenceType  d2 = d >> 1; // We use '>>1' here instead of '/2' because MSVC++ for some reason generates significantly worse code for '/2'. Go figure.
+
+			eastl::advance(i, d2); // This will be efficient for a random access iterator such as an array.
+
+			if(*i < value)
+			{
+				// Disabled because std::lower_bound doesn't specify (23.3.3.3, p3) this can be done: EASTL_VALIDATE_COMPARE(!(value < *i)); // Validate that the compare function is sane.
+				first = ++i;
+				d    -= d2 + 1;
+			}
+			else
+				d = d2;
+		}
+		return first;
+	}
+
+
+	/// lower_bound
+	///
+	/// Finds the position of the first element in a sorted range that has a value
+	/// greater than or equivalent to a specified value. The input Compare function
+	/// takes two arguments and returns true if the first argument is less than
+	/// the second argument.
+	///
+	/// Effects: Finds the first position into which value can be inserted without
+	/// violating the ordering.
+	///
+	/// Returns: The furthermost iterator i in the range [first, last) such that
+	/// for any iterator j in the range [first, i) the following corresponding
+	/// condition holds: compare(*j, value) != false.
+	///
+	/// Complexity: At most 'log(last - first) + 1' comparisons.
+	///
+	/// Optimizations: We have no need to specialize this implementation for random
+	/// access iterators (e.g. contiguous array), as the code below will already
+	/// take advantage of them.
+	///
+	template <typename ForwardIterator, typename T, typename Compare>
+	ForwardIterator
+	lower_bound(ForwardIterator first, ForwardIterator last, const T& value, Compare compare)
+	{
+		typedef typename eastl::iterator_traits<ForwardIterator>::difference_type DifferenceType;
+
+		DifferenceType d = eastl::distance(first, last); // This will be efficient for a random access iterator such as an array.
+
+		while(d > 0)
+		{
+			ForwardIterator i  = first;
+			DifferenceType  d2 = d >> 1; // We use '>>1' here instead of '/2' because MSVC++ for some reason generates significantly worse code for '/2'. Go figure.
+
+			eastl::advance(i, d2); // This will be efficient for a random access iterator such as an array.
+
+			if(compare(*i, value))
+			{
+				// Disabled because std::lower_bound doesn't specify (23.3.3.1, p3) this can be done: EASTL_VALIDATE_COMPARE(!compare(value, *i)); // Validate that the compare function is sane.
+				first = ++i;
+				d    -= d2 + 1;
+			}
+			else
+				d = d2;
+		}
+		return first;
+	}
+
+
+
+	/// upper_bound
+	///
+	/// Finds the position of the first element in a sorted range that has a
+	/// value that is greater than a specified value.
+	///
+	/// Effects: Finds the furthermost position into which value can be inserted
+	/// without violating the ordering.
+	///
+	/// Returns: The furthermost iterator i in the range [first, last) such that
+	/// for any iterator j in the range [first, i) the following corresponding
+	/// condition holds: !(value < *j).
+	///
+	/// Complexity: At most 'log(last - first) + 1' comparisons.
+	///
+	template <typename ForwardIterator, typename T>
+	ForwardIterator
+	upper_bound(ForwardIterator first, ForwardIterator last, const T& value)
+	{
+		typedef typename eastl::iterator_traits<ForwardIterator>::difference_type DifferenceType;
+
+		DifferenceType len = eastl::distance(first, last);
+
+		while(len > 0)
+		{
+			ForwardIterator i    = first;
+			DifferenceType  len2 = len >> 1; // We use '>>1' here instead of '/2' because MSVC++ for some reason generates significantly worse code for '/2'. Go figure.
+
+			eastl::advance(i, len2);
+
+			if(!(value < *i)) // Note that we always express value comparisons in terms of < or ==.
+			{
+				first = ++i;
+				len -= len2 + 1;
+			}
+			else
+			{
+				// Disabled because std::upper_bound doesn't specify (23.3.3.2, p3) this can be done: EASTL_VALIDATE_COMPARE(!(*i < value)); // Validate that the compare function is sane.
+				len = len2;
+			}
+		}
+		return first;
+	}
+
+
+	/// upper_bound
+	///
+	/// Finds the position of the first element in a sorted range that has a
+	/// value that is greater than a specified value. The input Compare function
+	/// takes two arguments and returns true if the first argument is less than
+	/// the second argument.
+	///
+	/// Effects: Finds the furthermost position into which value can be inserted
+	/// without violating the ordering.
+	///
+	/// Returns: The furthermost iterator i in the range [first, last) such that
+	/// for any iterator j in the range [first, i) the following corresponding
+	/// condition holds: compare(value, *j) == false.
+	///
+	/// Complexity: At most 'log(last - first) + 1' comparisons.
+	///
+	template <typename ForwardIterator, typename T, typename Compare>
+	ForwardIterator
+	upper_bound(ForwardIterator first, ForwardIterator last, const T& value, Compare compare)
+	{
+		typedef typename eastl::iterator_traits<ForwardIterator>::difference_type DifferenceType;
+
+		DifferenceType len = eastl::distance(first, last);
+
+		while(len > 0)
+		{
+			ForwardIterator i    = first;
+			DifferenceType  len2 = len >> 1; // We use '>>1' here instead of '/2' because MSVC++ for some reason generates significantly worse code for '/2'. Go figure.
+
+			eastl::advance(i, len2);
+
+			if(!compare(value, *i))
+			{
+				first = ++i;
+				len -= len2 + 1;
+			}
+			else
+			{
+				// Disabled because std::upper_bound doesn't specify (23.3.3.2, p3) this can be done: EASTL_VALIDATE_COMPARE(!compare(*i, value)); // Validate that the compare function is sane.
+				len = len2;
+			}
+		}
+		return first;
+	}
+
+
+	/// equal_range
+	///
+	/// Effects: Finds the largest subrange [i, j) such that the value can be inserted
+	/// at any iterator k in it without violating the ordering. k satisfies the
+	/// corresponding conditions: !(*k < value) && !(value < *k).
+	///
+	/// Complexity: At most '2 * log(last - first) + 1' comparisons.
+	///
+	template <typename ForwardIterator, typename T>
+	pair<ForwardIterator, ForwardIterator>
+	equal_range(ForwardIterator first, ForwardIterator last, const T& value)
+	{
+		typedef pair<ForwardIterator, ForwardIterator> ResultType;
+		typedef typename eastl::iterator_traits<ForwardIterator>::difference_type DifferenceType;
+
+		DifferenceType d = eastl::distance(first, last);
+
+		while(d > 0)
+		{
+			ForwardIterator i(first);
+			DifferenceType  d2 = d >> 1; // We use '>>1' here instead of '/2' because MSVC++ for some reason generates significantly worse code for '/2'. Go figure.
+
+			eastl::advance(i, d2);
+
+			if(*i < value)
+			{
+				EASTL_VALIDATE_COMPARE(!(value < *i)); // Validate that the compare function is sane.
+				first = ++i;
+				d    -= d2 + 1;
+			}
+			else if(value < *i)
+			{
+				EASTL_VALIDATE_COMPARE(!(*i < value)); // Validate that the compare function is sane.
+				d    = d2;
+				last = i;
+			}
+			else
+			{
+				ForwardIterator j(i);
+
+				return ResultType(eastl::lower_bound(first, i, value),
+								  eastl::upper_bound(++j, last, value));
+			}
+		}
+		return ResultType(first, first);
+	}
+
+
+	/// equal_range
+	///
+	/// Effects: Finds the largest subrange [i, j) such that the value can be inserted
+	/// at any iterator k in it without violating the ordering. k satisfies the
+	/// corresponding conditions: compare(*k, value) == false && compare(value, *k) == false.
+	///
+	/// Complexity: At most '2 * log(last - first) + 1' comparisons.
+	///
+	template <typename ForwardIterator, typename T, typename Compare>
+	pair<ForwardIterator, ForwardIterator>
+	equal_range(ForwardIterator first, ForwardIterator last, const T& value, Compare compare)
+	{
+		typedef pair<ForwardIterator, ForwardIterator> ResultType;
+		typedef typename eastl::iterator_traits<ForwardIterator>::difference_type DifferenceType;
+
+		DifferenceType d = eastl::distance(first, last);
+
+		while(d > 0)
+		{
+			ForwardIterator i(first);
+			DifferenceType  d2 = d >> 1; // We use '>>1' here instead of '/2' because MSVC++ for some reason generates significantly worse code for '/2'. Go figure.
+
+			eastl::advance(i, d2);
+
+			if(compare(*i, value))
+			{
+				EASTL_VALIDATE_COMPARE(!compare(value, *i)); // Validate that the compare function is sane.
+				first = ++i;
+				d    -= d2 + 1;
+			}
+			else if(compare(value, *i))
+			{
+				EASTL_VALIDATE_COMPARE(!compare(*i, value)); // Validate that the compare function is sane.
+				d    = d2;
+				last = i;
+			}
+			else
+			{
+				ForwardIterator j(i);
+
+				return ResultType(eastl::lower_bound(first, i, value, compare),
+								  eastl::upper_bound(++j, last, value, compare));
+			}
+		}
+		return ResultType(first, first);
+	}
+
+
+	/// replace
+	///
+	/// Effects: Substitutes elements referred by the iterator i in the range [first, last)
+	/// with new_value, when the following corresponding conditions hold: *i == old_value.
+	///
+	/// Complexity: Exactly 'last - first' applications of the corresponding predicate.
+	///
+	/// Note: The predicate version of replace is replace_if and not another variation of replace.
+	/// This is because both versions would have the same parameter count and there could be ambiguity.
+	///
+	template <typename ForwardIterator, typename T>
+	inline void
+	replace(ForwardIterator first, ForwardIterator last, const T& old_value, const T& new_value)
+	{
+		for(; first != last; ++first)
+		{
+			if(*first == old_value)
+				*first = new_value;
+		}
+	}
+
+
+	/// replace_if
+	///
+	/// Effects: Substitutes elements referred by the iterator i in the range [first, last)
+	/// with new_value, when the following corresponding conditions hold: predicate(*i) != false.
+	///
+	/// Complexity: Exactly 'last - first' applications of the corresponding predicate.
+	///
+	/// Note: The predicate version of replace_if is replace and not another variation of replace_if.
+	/// This is because both versions would have the same parameter count and there could be ambiguity.
+	///
+	template <typename ForwardIterator, typename Predicate, typename T>
+	inline void
+	replace_if(ForwardIterator first, ForwardIterator last, Predicate predicate, const T& new_value)
+	{
+		for(; first != last; ++first)
+		{
+			if(predicate(*first))
+				*first = new_value;
+		}
+	}
+
+
+	/// remove_copy
+	///
+	/// Effects: Copies all the elements referred to by the iterator i in the range
+	/// [first, last) for which the following corresponding condition does not hold:
+	/// *i == value.
+	///
+	/// Requires: The ranges [first, last) and [result, result + (last - first)) shall not overlap.
+	///
+	/// Returns: The end of the resulting range.
+	///
+	/// Complexity: Exactly 'last - first' applications of the corresponding predicate.
+	///
+	template <typename InputIterator, typename OutputIterator, typename T>
+	inline OutputIterator
+	remove_copy(InputIterator first, InputIterator last, OutputIterator result, const T& value)
+	{
+		for(; first != last; ++first)
+		{
+			if(!(*first == value)) // Note that we always express value comparisons in terms of < or ==.
+			{
+				*result = eastl::move(*first);
+				++result;
+			}
+		}
+		return result;
+	}
+
+
+	/// remove_copy_if
+	///
+	/// Effects: Copies all the elements referred to by the iterator i in the range
+	/// [first, last) for which the following corresponding condition does not hold:
+	/// predicate(*i) != false.
+	///
+	/// Requires: The ranges [first, last) and [result, result + (last - first)) shall not overlap.
+	///
+	/// Returns: The end of the resulting range.
+	///
+	/// Complexity: Exactly 'last - first' applications of the corresponding predicate.
+	///
+	template <typename InputIterator, typename OutputIterator, typename Predicate>
+	inline OutputIterator
+	remove_copy_if(InputIterator first, InputIterator last, OutputIterator result, Predicate predicate)
+	{
+		for(; first != last; ++first)
+		{
+			if(!predicate(*first))
+			{
+				*result = eastl::move(*first);
+				++result;
+			}
+		}
+		return result;
+	}
+
+
+	/// remove
+	///
+	/// Effects: Eliminates all the elements referred to by iterator i in the
+	/// range [first, last) for which the following corresponding condition
+	/// holds: *i == value.
+	///
+	/// Returns: The end of the resulting range.
+	///
+	/// Complexity: Exactly 'last - first' applications of the corresponding predicate.
+	///
+	/// Note: The predicate version of remove is remove_if and not another variation of remove.
+	/// This is because both versions would have the same parameter count and there could be ambiguity.
+	///
+	/// Note: Since this function moves the element to the back of the heap and
+	/// doesn't actually remove it from the given container, the user must call
+	/// the container erase function if the user wants to erase the element
+	/// from the container.
+	///
+	/// Example usage:
+	///    vector<int> intArray;
+	///    ...
+	///    intArray.erase(remove(intArray.begin(), intArray.end(), 4), intArray.end()); // Erase all elements of value 4.
+	///
+	template <typename ForwardIterator, typename T>
+	inline ForwardIterator
+	remove(ForwardIterator first, ForwardIterator last, const T& value)
+	{
+		first = eastl::find(first, last, value);
+		if(first != last)
+		{
+			ForwardIterator i(first);
+			return eastl::remove_copy(++i, last, first, value);
+		}
+		return first;
+	}
+
+
+	/// remove_if
+	///
+	/// Effects: Eliminates all the elements referred to by iterator i in the
+	/// range [first, last) for which the following corresponding condition
+	/// holds: predicate(*i) != false.
+	///
+	/// Returns: The end of the resulting range.
+	///
+	/// Complexity: Exactly 'last - first' applications of the corresponding predicate.
+	///
+	/// Note: The predicate version of remove_if is remove and not another variation of remove_if.
+	/// This is because both versions would have the same parameter count and there could be ambiguity.
+	///
+	/// Note: Since this function moves the element to the back of the heap and
+	/// doesn't actually remove it from the given container, the user must call
+	/// the container erase function if the user wants to erase the element
+	/// from the container.
+	///
+	/// Example usage:
+	///    vector<int> intArray;
+	///    ...
+	///    intArray.erase(remove(intArray.begin(), intArray.end(), bind2nd(less<int>(), (int)3)), intArray.end()); // Erase all elements less than 3.
+	///
+	template <typename ForwardIterator, typename Predicate>
+	inline ForwardIterator
+	remove_if(ForwardIterator first, ForwardIterator last, Predicate predicate)
+	{
+		first = eastl::find_if(first, last, predicate);
+		if(first != last)
+		{
+			ForwardIterator i(first);
+			return eastl::remove_copy_if<ForwardIterator, ForwardIterator, Predicate>(++i, last, first, predicate);
+		}
+		return first;
+	}
+
+
+	/// replace_copy
+	///
+	/// Effects: Assigns to every iterator i in the range [result, result + (last - first))
+	/// either new_value or *(first + (i - result)) depending on whether the following
+	/// corresponding conditions hold: *(first + (i - result)) == old_value.
+	///
+	/// Requires: The ranges [first, last) and [result, result + (last - first)) shall not overlap.
+	///
+	/// Returns: result + (last - first).
+	///
+	/// Complexity: Exactly 'last - first' applications of the corresponding predicate.
+	///
+	/// Note: The predicate version of replace_copy is replace_copy_if and not another variation of replace_copy.
+	/// This is because both versions would have the same parameter count and there could be ambiguity.
+	///
+	template <typename InputIterator, typename OutputIterator, typename T>
+	inline OutputIterator
+	replace_copy(InputIterator first, InputIterator last, OutputIterator result, const T& old_value, const T& new_value)
+	{
+		for(; first != last; ++first, ++result)
+			*result = (*first == old_value) ? new_value : *first;
+		return result;
+	}
+
+
+	/// replace_copy_if
+	///
+	/// Effects: Assigns to every iterator i in the range [result, result + (last - first))
+	/// either new_value or *(first + (i - result)) depending on whether the following
+	/// corresponding conditions hold: predicate(*(first + (i - result))) != false.
+	///
+	/// Requires: The ranges [first, last) and [result, result+(lastfirst)) shall not overlap.
+	///
+	/// Returns: result + (last - first).
+	///
+	/// Complexity: Exactly 'last - first' applications of the corresponding predicate.
+	///
+	/// Note: The predicate version of replace_copy_if is replace_copy and not another variation of replace_copy_if.
+	/// This is because both versions would have the same parameter count and there could be ambiguity.
+	///
+	template <typename InputIterator, typename OutputIterator, typename Predicate, typename T>
+	inline OutputIterator
+	replace_copy_if(InputIterator first, InputIterator last, OutputIterator result, Predicate predicate, const T& new_value)
+	{
+		for(; first != last; ++first, ++result)
+			*result = predicate(*first) ? new_value : *first;
+		return result;
+	}
+
+
+
+
+	// reverse
+	//
+	// We provide helper functions which allow reverse to be implemented more
+	// efficiently for some types of iterators and types.
+	//
+	template <typename BidirectionalIterator>
+	inline void reverse_impl(BidirectionalIterator first, BidirectionalIterator last, EASTL_ITC_NS::bidirectional_iterator_tag)
+	{
+		for(; (first != last) && (first != --last); ++first) // We are not allowed to use operator <, <=, >, >= with a
+			eastl::iter_swap(first, last);                   // generic (bidirectional or otherwise) iterator.
+	}
+
+	template <typename RandomAccessIterator>
+	inline void reverse_impl(RandomAccessIterator first, RandomAccessIterator last, EASTL_ITC_NS::random_access_iterator_tag)
+	{
+		if(first != last)
+		{
+			for(; first < --last; ++first)      // With a random access iterator, we can use operator < to more efficiently implement
+				eastl::iter_swap(first, last);  // this algorithm. A generic iterator doesn't necessarily have an operator < defined.
+		}
+	}
+
+	/// reverse
+	///
+	/// Reverses the values within the range [first, last).
+	///
+	/// Effects: For each nonnegative integer i <= (last - first) / 2,
+	/// applies swap to all pairs of iterators first + i, (last i) - 1.
+	///
+	/// Complexity: Exactly '(last - first) / 2' swaps.
+	///
+	template <typename BidirectionalIterator>
+	inline void reverse(BidirectionalIterator first, BidirectionalIterator last)
+	{
+		typedef typename eastl::iterator_traits<BidirectionalIterator>::iterator_category IC;
+		eastl::reverse_impl(first, last, IC());
+	}
+
+
+
+	/// reverse_copy
+	///
+	/// Copies the range [first, last) in reverse order to the result.
+	///
+	/// Effects: Copies the range [first, last) to the range
+	/// [result, result + (last - first)) such that for any nonnegative
+	/// integer i < (last - first) the following assignment takes place:
+	/// *(result + (last - first) - i) = *(first + i)
+	///
+	/// Requires: The ranges [first, last) and [result, result + (last - first))
+	/// shall not overlap.
+	///
+	/// Returns: result + (last - first). That is, returns the end of the output range.
+	///
+	/// Complexity: Exactly 'last - first' assignments.
+	///
+	template <typename BidirectionalIterator, typename OutputIterator>
+	inline OutputIterator
+	reverse_copy(BidirectionalIterator first, BidirectionalIterator last, OutputIterator result)
+	{
+		for(; first != last; ++result)
+			*result = *--last;
+		return result;
+	}
+
+
+
+	/// search
+	///
+	/// Search finds a subsequence within the range [first1, last1) that is identical to [first2, last2)
+	/// when compared element-by-element. It returns an iterator pointing to the beginning of that
+	/// subsequence, or else last1 if no such subsequence exists. As such, it is very much like
+	/// the C strstr function, with the primary difference being that strstr uses 0-terminated strings
+	/// whereas search uses an end iterator to specify the end of a string.
+	///
+	/// Returns: The first iterator i in the range [first1, last1 - (last2 - first2)) such that for
+	/// any nonnegative integer n less than 'last2 - first2' the following corresponding condition holds:
+	/// *(i + n) == *(first2 + n). Returns last1 if no such iterator is found.
+	///
+	/// Complexity: At most (last1 first1) * (last2 first2) applications of the corresponding predicate.
+	///
+	template <typename ForwardIterator1, typename ForwardIterator2>
+	ForwardIterator1
+	search(ForwardIterator1 first1, ForwardIterator1 last1,
+		   ForwardIterator2 first2, ForwardIterator2 last2)
+	{
+		if(first2 != last2) // If there is anything to search for...
+		{
+			// We need to make a special case for a pattern of one element,
+			// as the logic below prevents one element patterns from working.
+			ForwardIterator2 temp2(first2);
+			++temp2;
+
+			if(temp2 != last2) // If what we are searching for has a length > 1...
+			{
+				ForwardIterator1 cur1(first1);
+				ForwardIterator2 p2;
+
+				while(first1 != last1)
+				{
+					// The following loop is the equivalent of eastl::find(first1, last1, *first2)
+					while((first1 != last1) && !(*first1 == *first2))
+						++first1;
+
+					if(first1 != last1)
+					{
+						p2   = temp2;
+						cur1 = first1;
+
+						if(++cur1 != last1)
+						{
+							while(*cur1 == *p2)
+							{
+								if(++p2 == last2)
+									return first1;
+
+								if(++cur1 == last1)
+									return last1;
+							}
+
+							++first1;
+							continue;
+						}
+					}
+					return last1;
+				}
+
+				// Fall through to the end.
+			}
+			else
+				return eastl::find(first1, last1, *first2);
+		}
+
+		return first1;
+
+
+		#if 0
+		/*  Another implementation which is a little more simpler but executes a little slower on average.
+			typedef typename eastl::iterator_traits<ForwardIterator1>::difference_type difference_type_1;
+			typedef typename eastl::iterator_traits<ForwardIterator2>::difference_type difference_type_2;
+
+			const difference_type_2 d2 = eastl::distance(first2, last2);
+
+			for(difference_type_1 d1 = eastl::distance(first1, last1); d1 >= d2; ++first1, --d1)
+			{
+				ForwardIterator1 temp1 = first1;
+
+				for(ForwardIterator2 temp2 = first2; ; ++temp1, ++temp2)
+				{
+					if(temp2 == last2)
+						return first1;
+					if(!(*temp1 == *temp2))
+						break;
+				}
+			}
+
+			return last1;
+		*/
+		#endif
+	}
+
+
+	/// search
+	///
+	/// Search finds a subsequence within the range [first1, last1) that is identical to [first2, last2)
+	/// when compared element-by-element. It returns an iterator pointing to the beginning of that
+	/// subsequence, or else last1 if no such subsequence exists. As such, it is very much like
+	/// the C strstr function, with the only difference being that strstr uses 0-terminated strings
+	/// whereas search uses an end iterator to specify the end of a string.
+	///
+	/// Returns: The first iterator i in the range [first1, last1 - (last2 - first2)) such that for
+	/// any nonnegative integer n less than 'last2 - first2' the following corresponding condition holds:
+	/// predicate(*(i + n), *(first2 + n)) != false. Returns last1 if no such iterator is found.
+	///
+	/// Complexity: At most (last1 first1) * (last2 first2) applications of the corresponding predicate.
+	///
+	template <typename ForwardIterator1, typename ForwardIterator2, typename BinaryPredicate>
+	ForwardIterator1
+	search(ForwardIterator1 first1, ForwardIterator1 last1,
+		   ForwardIterator2 first2, ForwardIterator2 last2,
+		   BinaryPredicate predicate)
+	{
+		typedef typename eastl::iterator_traits<ForwardIterator1>::difference_type difference_type_1;
+		typedef typename eastl::iterator_traits<ForwardIterator2>::difference_type difference_type_2;
+
+		difference_type_2 d2 = eastl::distance(first2, last2);
+
+		if(d2 != 0)
+		{
+			ForwardIterator1 i(first1);
+			eastl::advance(i, d2);
+
+			for(difference_type_1 d1 = eastl::distance(first1, last1); d1 >= d2; --d1)
+			{
+				if(eastl::equal<ForwardIterator1, ForwardIterator2, BinaryPredicate>(first1, i, first2, predicate))
+					return first1;
+				if(d1 > d2) // To do: Find a way to make the algorithm more elegant.
+				{
+					++first1;
+					++i;
+				}
+			}
+			return last1;
+		}
+		return first1; // Just like with strstr, we return first1 if the match string is empty.
+	}
+
+
+
+	// search_n helper functions
+	//
+	template <typename ForwardIterator, typename Size, typename T>
+	ForwardIterator     // Generic implementation.
+	search_n_impl(ForwardIterator first, ForwardIterator last, Size count, const T& value, EASTL_ITC_NS::forward_iterator_tag)
+	{
+		if(count <= 0)
+			return first;
+
+		Size d1 = (Size)eastl::distance(first, last); // Should d1 be of type Size, ptrdiff_t, or iterator_traits<ForwardIterator>::difference_type?
+													  // The problem with using iterator_traits<ForwardIterator>::difference_type is that
+		if(count > d1)                                // ForwardIterator may not be a true iterator but instead something like a pointer.
+			return last;
+
+		for(; d1 >= count; ++first, --d1)
+		{
+			ForwardIterator i(first);
+
+			for(Size n = 0; n < count; ++n, ++i, --d1)
+			{
+				if(!(*i == value)) // Note that we always express value comparisons in terms of < or ==.
+					goto not_found;
+			}
+			return first;
+
+			not_found:
+			first = i;
+		}
+		return last;
+	}
+
+	template <typename RandomAccessIterator, typename Size, typename T> inline
+	RandomAccessIterator    // Random access iterator implementation. Much faster than generic implementation.
+	search_n_impl(RandomAccessIterator first, RandomAccessIterator last, Size count, const T& value, EASTL_ITC_NS::random_access_iterator_tag)
+	{
+		if(count <= 0)
+			return first;
+		else if(count == 1)
+			return eastl::find(first, last, value);
+		else if(last > first)
+		{
+			RandomAccessIterator lookAhead;
+			RandomAccessIterator backTrack;
+
+			Size skipOffset = (count - 1);
+			Size tailSize = (Size)(last - first);
+			Size remainder;
+			Size prevRemainder;
+
+			for(lookAhead = first + skipOffset; tailSize >= count; lookAhead += count)
+			{
+				tailSize -= count;
+
+				if(*lookAhead == value)
+				{
+					remainder = skipOffset;
+
+					for(backTrack = lookAhead - 1; *backTrack == value; --backTrack)
+					{
+						if(--remainder == 0)
+							return (lookAhead - skipOffset); // success
+					}
+
+					if(remainder <= tailSize)
+					{
+						prevRemainder = remainder;
+
+						while(*(++lookAhead) == value)
+						{
+							if(--remainder == 0)
+								return (backTrack + 1); // success
+						}
+						tailSize -= (prevRemainder - remainder);
+					}
+					else
+						return last; // failure
+				}
+
+				// lookAhead here is always pointing to the element of the last mismatch.
+			}
+		}
+
+		return last; // failure
+	}
+
+
+	/// search_n
+	///
+	/// Returns: The first iterator i in the range [first, last count) such that
+	/// for any nonnegative integer n less than count the following corresponding
+	/// conditions hold: *(i + n) == value, pred(*(i + n),value) != false.
+	/// Returns last if no such iterator is found.
+	///
+	/// Complexity: At most '(last1 - first1) * count' applications of the corresponding predicate.
+	///
+	template <typename ForwardIterator, typename Size, typename T>
+	ForwardIterator
+	search_n(ForwardIterator first, ForwardIterator last, Size count, const T& value)
+	{
+		typedef typename eastl::iterator_traits<ForwardIterator>::iterator_category IC;
+		return eastl::search_n_impl(first, last, count, value, IC());
+	}
+
+
+	/// binary_search
+	///
+	/// Returns: true if there is an iterator i in the range [first last) that
+	/// satisfies the corresponding conditions: !(*i < value) && !(value < *i).
+	///
+	/// Complexity: At most 'log(last - first) + 2' comparisons.
+	///
+	/// Note: The reason binary_search returns bool instead of an iterator is
+	/// that search_n, lower_bound, or equal_range already return an iterator.
+	/// However, there are arguments that binary_search should return an iterator.
+	/// Note that we provide binary_search_i (STL extension) to return an iterator.
+	///
+	/// To use search_n to find an item, do this:
+	///     iterator i = search_n(begin, end, 1, value);
+	/// To use lower_bound to find an item, do this:
+	///     iterator i = lower_bound(begin, end, value);
+	///     if((i != last) && !(value < *i))
+	///         <use the iterator>
+	/// It turns out that the above lower_bound method is as fast as binary_search
+	/// would be if it returned an iterator.
+	///
+	template <typename ForwardIterator, typename T>
+	inline bool
+	binary_search(ForwardIterator first, ForwardIterator last, const T& value)
+	{
+		// To do: This can be made slightly faster by not using lower_bound.
+		ForwardIterator i(eastl::lower_bound<ForwardIterator, T>(first, last, value));
+		return ((i != last) && !(value < *i)); // Note that we always express value comparisons in terms of < or ==.
+	}
+
+
+	/// binary_search
+	///
+	/// Returns: true if there is an iterator i in the range [first last) that
+	/// satisfies the corresponding conditions: compare(*i, value) == false &&
+	/// compare(value, *i) == false.
+	///
+	/// Complexity: At most 'log(last - first) + 2' comparisons.
+	///
+	/// Note: See comments above regarding the bool return value of binary_search.
+	///
+	template <typename ForwardIterator, typename T, typename Compare>
+	inline bool
+	binary_search(ForwardIterator first, ForwardIterator last, const T& value, Compare compare)
+	{
+		// To do: This can be made slightly faster by not using lower_bound.
+		ForwardIterator i(eastl::lower_bound<ForwardIterator, T, Compare>(first, last, value, compare));
+		return ((i != last) && !compare(value, *i));
+	}
+
+
+	/// binary_search_i
+	///
+	/// Returns: iterator if there is an iterator i in the range [first last) that
+	/// satisfies the corresponding conditions: !(*i < value) && !(value < *i).
+	/// Returns last if the value is not found.
+	///
+	/// Complexity: At most 'log(last - first) + 2' comparisons.
+	///
+	template <typename ForwardIterator, typename T>
+	inline ForwardIterator
+	binary_search_i(ForwardIterator first, ForwardIterator last, const T& value)
+	{
+		// To do: This can be made slightly faster by not using lower_bound.
+		ForwardIterator i(eastl::lower_bound<ForwardIterator, T>(first, last, value));
+		if((i != last) && !(value < *i)) // Note that we always express value comparisons in terms of < or ==.
+			return i;
+		return last;
+	}
+
+
+	/// binary_search_i
+	///
+	/// Returns: iterator if there is an iterator i in the range [first last) that
+	/// satisfies the corresponding conditions: !(*i < value) && !(value < *i).
+	/// Returns last if the value is not found.
+	///
+	/// Complexity: At most 'log(last - first) + 2' comparisons.
+	///
+	template <typename ForwardIterator, typename T, typename Compare>
+	inline ForwardIterator
+	binary_search_i(ForwardIterator first, ForwardIterator last, const T& value, Compare compare)
+	{
+		// To do: This can be made slightly faster by not using lower_bound.
+		ForwardIterator i(eastl::lower_bound<ForwardIterator, T, Compare>(first, last, value, compare));
+		if((i != last) && !compare(value, *i))
+			return i;
+		return last;
+	}
+
+
+	/// unique
+	///
+	/// Given a sorted range, this function removes duplicated items.
+	/// Note that if you have a container then you will probably want
+	/// to call erase on the container with the return value if your
+	/// goal is to remove the duplicated items from the container.
+	///
+	/// Effects: Eliminates all but the first element from every consecutive
+	/// group of equal elements referred to by the iterator i in the range
+	/// [first, last) for which the following corresponding condition holds:
+	/// *i == *(i - 1).
+	///
+	/// Returns: The end of the resulting range.
+	///
+	/// Complexity: If the range (last - first) is not empty, exactly (last - first)
+	/// applications of the corresponding predicate, otherwise no applications of the predicate.
+	///
+	/// Example usage:
+	///    vector<int> intArray;
+	///    ...
+	///    intArray.erase(unique(intArray.begin(), intArray.end()), intArray.end());
+	///
+	template <typename ForwardIterator>
+	ForwardIterator unique(ForwardIterator first, ForwardIterator last)
+	{
+		first = eastl::adjacent_find<ForwardIterator>(first, last);
+
+		if(first != last) // We expect that there are duplicated items, else the user wouldn't be calling this function.
+		{
+			ForwardIterator dest(first);
+
+			for(++first; first != last; ++first)
+			{
+				if(!(*dest == *first)) // Note that we always express value comparisons in terms of < or ==.
+					*++dest = *first;
+			}
+			return ++dest;
+		}
+		return last;
+	}
+
+
+	/// unique
+	///
+	/// Given a sorted range, this function removes duplicated items.
+	/// Note that if you have a container then you will probably want
+	/// to call erase on the container with the return value if your
+	/// goal is to remove the duplicated items from the container.
+	///
+	/// Effects: Eliminates all but the first element from every consecutive
+	/// group of equal elements referred to by the iterator i in the range
+	/// [first, last) for which the following corresponding condition holds:
+	/// predicate(*i, *(i - 1)) != false.
+	///
+	/// Returns: The end of the resulting range.
+	///
+	/// Complexity: If the range (last - first) is not empty, exactly (last - first)
+	/// applications of the corresponding predicate, otherwise no applications of the predicate.
+	///
+	template <typename ForwardIterator, typename BinaryPredicate>
+	ForwardIterator unique(ForwardIterator first, ForwardIterator last, BinaryPredicate predicate)
+	{
+		first = eastl::adjacent_find<ForwardIterator, BinaryPredicate>(first, last, predicate);
+
+		if(first != last) // We expect that there are duplicated items, else the user wouldn't be calling this function.
+		{
+			ForwardIterator dest(first);
+
+			for(++first; first != last; ++first)
+			{
+				if(!predicate(*dest, *first))
+					*++dest = *first;
+			}
+			return ++dest;
+		}
+		return last;
+	}
+
+
+
+	// find_end
+	//
+	// We provide two versions here, one for a bidirectional iterators and one for
+	// regular forward iterators. Given that we are searching backward, it's a bit
+	// more efficient if we can use backwards iteration to implement our search,
+	// though this requires an iterator that can be reversed.
+	//
+	template <typename ForwardIterator1, typename ForwardIterator2>
+	ForwardIterator1
+	find_end_impl(ForwardIterator1 first1, ForwardIterator1 last1,
+				  ForwardIterator2 first2, ForwardIterator2 last2,
+				  EASTL_ITC_NS::forward_iterator_tag, EASTL_ITC_NS::forward_iterator_tag)
+	{
+		if(first2 != last2) // We have to do this check because the search algorithm below will return first1 (and not last1) if the first2/last2 range is empty.
+		{
+			for(ForwardIterator1 result(last1); ; )
+			{
+				const ForwardIterator1 resultNext(eastl::search(first1, last1, first2, last2));
+
+				if(resultNext != last1) // If another sequence was found...
+				{
+					first1 = result = resultNext;
+					++first1;
+				}
+				else
+					return result;
+			}
+		}
+		return last1;
+	}
+
+	template <typename BidirectionalIterator1, typename BidirectionalIterator2>
+	BidirectionalIterator1
+	find_end_impl(BidirectionalIterator1 first1, BidirectionalIterator1 last1,
+				  BidirectionalIterator2 first2, BidirectionalIterator2 last2,
+				  EASTL_ITC_NS::bidirectional_iterator_tag, EASTL_ITC_NS::bidirectional_iterator_tag)
+	{
+		typedef eastl::reverse_iterator<BidirectionalIterator1> reverse_iterator1;
+		typedef eastl::reverse_iterator<BidirectionalIterator2> reverse_iterator2;
+
+		reverse_iterator1 rresult(eastl::search(reverse_iterator1(last1), reverse_iterator1(first1),
+												reverse_iterator2(last2), reverse_iterator2(first2)));
+		if(rresult.base() != first1) // If we found something...
+		{
+			BidirectionalIterator1 result(rresult.base());
+
+			eastl::advance(result, -eastl::distance(first2, last2)); // We have an opportunity to optimize this, as the
+			return result;                                           // search function already calculates this distance.
+		}
+		return last1;
+	}
+
+	/// find_end
+	///
+	/// Finds the last occurrence of the second sequence in the first sequence.
+	/// As such, this function is much like the C string function strrstr and it
+	/// is also the same as a reversed version of 'search'. It is called find_end
+	/// instead of the possibly more consistent search_end simply because the C++
+	/// standard algorithms have such naming.
+	///
+	/// Returns an iterator between first1 and last1 if the sequence is found.
+	/// returns last1 (the end of the first seqence) if the sequence is not found.
+	///
+	template <typename ForwardIterator1, typename ForwardIterator2>
+	inline ForwardIterator1
+	find_end(ForwardIterator1 first1, ForwardIterator1 last1,
+			 ForwardIterator2 first2, ForwardIterator2 last2)
+	{
+		typedef typename eastl::iterator_traits<ForwardIterator1>::iterator_category IC1;
+		typedef typename eastl::iterator_traits<ForwardIterator2>::iterator_category IC2;
+
+		return eastl::find_end_impl(first1, last1, first2, last2, IC1(), IC2());
+	}
+
+
+
+
+	// To consider: Fold the predicate and non-predicate versions of
+	//              this algorithm into a single function.
+	template <typename ForwardIterator1, typename ForwardIterator2, typename BinaryPredicate>
+	ForwardIterator1
+	find_end_impl(ForwardIterator1 first1, ForwardIterator1 last1,
+				  ForwardIterator2 first2, ForwardIterator2 last2,
+				  BinaryPredicate predicate,
+				  EASTL_ITC_NS::forward_iterator_tag, EASTL_ITC_NS::forward_iterator_tag)
+	{
+		if(first2 != last2) // We have to do this check because the search algorithm below will return first1 (and not last1) if the first2/last2 range is empty.
+		{
+			for(ForwardIterator1 result = last1; ; )
+			{
+				const ForwardIterator1 resultNext(eastl::search<ForwardIterator1, ForwardIterator2, BinaryPredicate>(first1, last1, first2, last2, predicate));
+
+				if(resultNext != last1) // If another sequence was found...
+				{
+					first1 = result = resultNext;
+					++first1;
+				}
+				else
+					return result;
+			}
+		}
+		return last1;
+	}
+
+	template <typename BidirectionalIterator1, typename BidirectionalIterator2, typename BinaryPredicate>
+	BidirectionalIterator1
+	find_end_impl(BidirectionalIterator1 first1, BidirectionalIterator1 last1,
+				  BidirectionalIterator2 first2, BidirectionalIterator2 last2,
+				  BinaryPredicate predicate,
+				  EASTL_ITC_NS::bidirectional_iterator_tag, EASTL_ITC_NS::bidirectional_iterator_tag)
+	{
+		typedef eastl::reverse_iterator<BidirectionalIterator1> reverse_iterator1;
+		typedef eastl::reverse_iterator<BidirectionalIterator2> reverse_iterator2;
+
+		reverse_iterator1 rresult(eastl::search<reverse_iterator1, reverse_iterator2, BinaryPredicate>
+											   (reverse_iterator1(last1), reverse_iterator1(first1),
+												reverse_iterator2(last2), reverse_iterator2(first2),
+												predicate));
+		if(rresult.base() != first1) // If we found something...
+		{
+			BidirectionalIterator1 result(rresult.base());
+			eastl::advance(result, -eastl::distance(first2, last2));
+			return result;
+		}
+		return last1;
+	}
+
+
+	/// find_end
+	///
+	/// Effects: Finds a subsequence of equal values in a sequence.
+	///
+	/// Returns: The last iterator i in the range [first1, last1 - (last2 - first2))
+	/// such that for any nonnegative integer n < (last2 - first2), the following
+	/// corresponding conditions hold: pred(*(i+n),*(first2+n)) != false. Returns
+	/// last1 if no such iterator is found.
+	///
+	/// Complexity: At most (last2 - first2) * (last1 - first1 - (last2 - first2) + 1)
+	/// applications of the corresponding predicate.
+	///
+	template <typename ForwardIterator1, typename ForwardIterator2, typename BinaryPredicate>
+	inline ForwardIterator1
+	find_end(ForwardIterator1 first1, ForwardIterator1 last1,
+			 ForwardIterator2 first2, ForwardIterator2 last2,
+			 BinaryPredicate predicate)
+	{
+		typedef typename eastl::iterator_traits<ForwardIterator1>::iterator_category IC1;
+		typedef typename eastl::iterator_traits<ForwardIterator2>::iterator_category IC2;
+
+		return eastl::find_end_impl<ForwardIterator1, ForwardIterator2, BinaryPredicate>
+								   (first1, last1, first2, last2, predicate, IC1(), IC2());
+	}
+
+
+	/// set_difference
+	///
+	/// set_difference iterates over both input ranges and copies elements present
+	/// in the first range but not the second to the output range.
+	///
+	/// Effects: Copies the elements of the range [first1, last1) which are not
+	/// present in the range [first2, last2) to the range beginning at result.
+	/// The elements in the constructed range are sorted.
+	///
+	/// Requires: The input ranges must be sorted.
+	/// Requires: The output range shall not overlap with either of the original ranges.
+	///
+	/// Returns: The end of the output range.
+	///
+	/// Complexity: At most (2 * ((last1 - first1) + (last2 - first2)) - 1) comparisons.
+	///
+	template <typename InputIterator1, typename InputIterator2, typename OutputIterator>
+	OutputIterator set_difference(InputIterator1 first1, InputIterator1 last1,
+								  InputIterator2 first2, InputIterator2 last2,
+								  OutputIterator result)
+	{
+		while((first1 != last1) && (first2 != last2))
+		{
+			if(*first1 < *first2)
+			{
+				*result = *first1;
+				++first1;
+				++result;
+			}
+			else if(*first2 < *first1)
+				++first2;
+			else
+			{
+				++first1;
+				++first2;
+			}
+		}
+
+		return eastl::copy(first1, last1, result);
+	}
+
+
+	template <typename InputIterator1, typename InputIterator2, typename OutputIterator, typename Compare>
+	OutputIterator set_difference(InputIterator1 first1, InputIterator1 last1,
+								  InputIterator2 first2, InputIterator2 last2,
+								  OutputIterator result, Compare compare)
+	{
+		while((first1 != last1) && (first2 != last2))
+		{
+			if(compare(*first1, *first2))
+			{
+				EASTL_VALIDATE_COMPARE(!compare(*first2, *first1)); // Validate that the compare function is sane.
+				*result = *first1;
+				++first1;
+				++result;
+			}
+			else if(compare(*first2, *first1))
+			{
+				EASTL_VALIDATE_COMPARE(!compare(*first1, *first2)); // Validate that the compare function is sane.
+				++first2;
+			}
+			else
+			{
+				++first1;
+				++first2;
+			}
+		}
+
+		return eastl::copy(first1, last1, result);
+	}
+
+
+	/// set_difference_2
+	///
+	/// set_difference_2 iterates over both input ranges and copies elements present
+	/// in the first range but not the second to the first output range and copies
+	/// elements present in the second range but not in the first to the second output
+	/// range.
+	///
+	/// Effects: Copies the elements of the range [first1, last1) which are not
+	/// present in the range [first2, last2) to the first output range beginning at
+	/// result1 AND copies the element of range [first2, last2) which are not present
+	/// in the range [first1, last) to the second output range beginning at result2.
+	/// The elements in the constructed range are sorted.
+	///
+	/// Requires: The input ranges must be sorted.
+	/// Requires: The output ranges shall not overlap with either of the original ranges.
+	///
+	/// Returns:  Nothing.
+	///
+	/// Complexity: At most (2 * ((last1 - first1) + (last2 - first2)) - 1) comparisons.
+	///
+	template <typename InputIterator1, typename InputIterator2, typename OutputIterator, typename Compare>
+	void set_difference_2(InputIterator1 first1, InputIterator1 last1,
+	                      InputIterator2 first2, InputIterator2 last2,
+	                      OutputIterator result1, OutputIterator result2, Compare compare)
+	{
+		while ((first1 != last1) && (first2 != last2))
+		{
+			if (compare(*first1, *first2))
+			{
+				EASTL_VALIDATE_COMPARE(!compare(*first2, *first1)); // Validate that the compare function is sane.
+				*result1++ = *first1++;
+			}
+			else if (compare(*first2, *first1))
+			{
+				EASTL_VALIDATE_COMPARE(!compare(*first1, *first2)); // Validate that the compare function is sane.
+				*result2++ = *first2++;
+			}
+			else
+			{
+				++first1;
+				++first2;
+			}
+		}
+
+		eastl::copy(first2, last2, result2);
+		eastl::copy(first1, last1, result1);
+	}
+
+	/// set_difference_2
+	///
+	///  set_difference_2 with the default comparison object is eastl::less<>.
+	///
+	template <typename InputIterator1, typename InputIterator2, typename OutputIterator>
+	void set_difference_2(InputIterator1 first1, InputIterator1 last1,
+	                      InputIterator2 first2, InputIterator2 last2,
+	                      OutputIterator result1, OutputIterator result2)
+	{
+		eastl::set_difference_2(first1, last1, first2, last2, result1, result2, eastl::less<>{});
+	}
+
+
+	/// set_symmetric_difference
+	///
+	/// set_difference iterates over both input ranges and copies elements present
+	/// in the either range but not the other to the output range.
+	///
+	/// Effects: Copies the elements of the range [first1, last1) which are not
+	/// present in the range [first2, last2), and the elements of the range [first2, last2)
+	/// which are not present in the range [first1, last1) to the range beginning at result.
+	/// The elements in the constructed range are sorted.
+	///
+	/// Requires: The input ranges must be sorted.
+	/// Requires: The resulting range shall not overlap with either of the original ranges.
+	///
+	/// Returns: The end of the constructed range.
+	///
+	/// Complexity: At most (2 * ((last1 - first1) + (last2 - first2)) - 1) comparisons.
+	///
+	template <typename InputIterator1, typename InputIterator2, typename OutputIterator>
+	OutputIterator set_symmetric_difference(InputIterator1 first1, InputIterator1 last1,
+											InputIterator2 first2, InputIterator2 last2,
+											OutputIterator result)
+	{
+		while((first1 != last1) && (first2 != last2))
+		{
+			if(*first1 < *first2)
+			{
+				*result = *first1;
+				++first1;
+				++result;
+			}
+			else if(*first2 < *first1)
+			{
+				*result = *first2;
+				++first2;
+				++result;
+			}
+			else
+			{
+				++first1;
+				++first2;
+			}
+		}
+
+		return eastl::copy(first2, last2, eastl::copy(first1, last1, result));
+	}
+
+
+	template <typename InputIterator1, typename InputIterator2, typename OutputIterator, typename Compare>
+	OutputIterator set_symmetric_difference(InputIterator1 first1, InputIterator1 last1,
+											InputIterator2 first2, InputIterator2 last2,
+											OutputIterator result, Compare compare)
+	{
+		while((first1 != last1) && (first2 != last2))
+		{
+			if(compare(*first1, *first2))
+			{
+				EASTL_VALIDATE_COMPARE(!compare(*first2, *first1)); // Validate that the compare function is sane.
+				*result = *first1;
+				++first1;
+				++result;
+			}
+			else if(compare(*first2, *first1))
+			{
+				EASTL_VALIDATE_COMPARE(!compare(*first1, *first2)); // Validate that the compare function is sane.
+				*result = *first2;
+				++first2;
+				++result;
+			}
+			else
+			{
+				++first1;
+				++first2;
+			}
+		}
+
+		return eastl::copy(first2, last2, eastl::copy(first1, last1, result));
+	}
+
+
+	/// set_intersection
+	///
+	/// set_intersection over both ranges and copies elements present in
+	/// both ranges to the output range.
+	///
+	/// Effects: Constructs a sorted intersection of the elements from the
+	/// two ranges; that is, the set of elements that are present in both of the ranges.
+	///
+	/// Requires: The input ranges must be sorted.
+	/// Requires: The resulting range shall not overlap with either of the original ranges.
+	///
+	/// Returns: The end of the constructed range.
+	///
+	/// Complexity: At most 2 * ((last1 - first1) + (last2 - first2)) - 1)  comparisons.
+	///
+	/// Note: The copying operation is stable; if an element is present in both ranges,
+	/// the one from the first range is copied.
+	///
+	template <typename InputIterator1, typename InputIterator2, typename OutputIterator>
+	OutputIterator set_intersection(InputIterator1 first1, InputIterator1 last1,
+									InputIterator2 first2, InputIterator2 last2,
+									OutputIterator result)
+	{
+		while((first1 != last1) && (first2 != last2))
+		{
+			if(*first1 < *first2)
+				++first1;
+			else if(*first2 < *first1)
+				++first2;
+			else
+			{
+				*result = *first1;
+				++first1;
+				++first2;
+				++result;
+			}
+		}
+
+		return result;
+	}
+
+
+	template <typename InputIterator1, typename InputIterator2, typename OutputIterator, typename Compare>
+	OutputIterator set_intersection(InputIterator1 first1, InputIterator1 last1,
+									InputIterator2 first2, InputIterator2 last2,
+									OutputIterator result, Compare compare)
+	{
+		while((first1 != last1) && (first2 != last2))
+		{
+			if(compare(*first1, *first2))
+			{
+				EASTL_VALIDATE_COMPARE(!compare(*first2, *first1)); // Validate that the compare function is sane.
+				++first1;
+			}
+			else if(compare(*first2, *first1))
+			{
+				EASTL_VALIDATE_COMPARE(!compare(*first1, *first2)); // Validate that the compare function is sane.
+				++first2;
+			}
+			else
+			{
+				*result = *first1;
+				++first1;
+				++first2;
+				++result;
+			}
+		}
+
+		return result;
+	}
+
+
+
+	/// set_union
+	///
+	/// set_union iterates over both ranges and copies elements present in
+	/// both ranges to the output range.
+	///
+	/// Effects: Constructs a sorted union of the elements from the two ranges;
+	/// that is, the set of elements that are present in one or both of the ranges.
+	///
+	/// Requires: The input ranges must be sorted.
+	/// Requires: The resulting range shall not overlap with either of the original ranges.
+	///
+	/// Returns: The end of the constructed range.
+	///
+	/// Complexity: At most (2 * ((last1 - first1) + (last2 - first2)) - 1) comparisons.
+	///
+	/// Note: The copying operation is stable; if an element is present in both ranges,
+	/// the one from the first range is copied.
+	///
+	template <typename InputIterator1, typename InputIterator2, typename OutputIterator>
+	OutputIterator set_union(InputIterator1 first1, InputIterator1 last1,
+							 InputIterator2 first2, InputIterator2 last2,
+							 OutputIterator result)
+	{
+		while((first1 != last1) && (first2 != last2))
+		{
+			if(*first1 < *first2)
+			{
+				*result = *first1;
+				++first1;
+			}
+			else if(*first2 < *first1)
+			{
+				*result = *first2;
+				++first2;
+			}
+			else
+			{
+				*result = *first1;
+				++first1;
+				++first2;
+			}
+			++result;
+		}
+
+		return eastl::copy(first2, last2, eastl::copy(first1, last1, result));
+	}
+
+
+	template <typename InputIterator1, typename InputIterator2, typename OutputIterator, typename Compare>
+	OutputIterator set_union(InputIterator1 first1, InputIterator1 last1,
+							 InputIterator2 first2, InputIterator2 last2,
+							 OutputIterator result, Compare compare)
+	{
+		while((first1 != last1) && (first2 != last2))
+		{
+			if(compare(*first1, *first2))
+			{
+				EASTL_VALIDATE_COMPARE(!compare(*first2, *first1)); // Validate that the compare function is sane.
+				*result = *first1;
+				++first1;
+			}
+			else if(compare(*first2, *first1))
+			{
+				EASTL_VALIDATE_COMPARE(!compare(*first1, *first2)); // Validate that the compare function is sane.
+				*result = *first2;
+				++first2;
+			}
+			else
+			{
+				*result = *first1;
+				++first1;
+				++first2;
+			}
+			++result;
+		}
+
+		return eastl::copy(first2, last2, eastl::copy(first1, last1, result));
+	}
+
+
+	/// set_decomposition
+	///
+	/// set_decomposition iterates over both ranges and copies elements to one of the three
+	/// categories of output ranges.
+	///
+	/// Effects: Constructs three sorted containers of the elements from the two ranges.
+	///             * OutputIterator1 is elements only in Container1.
+	///             * OutputIterator2 is elements only in Container2.
+	///             * OutputIterator3 is elements that are in both Container1 and Container2.
+	///
+	/// Requires: The input ranges must be sorted.
+	/// Requires: The resulting ranges shall not overlap with either of the original ranges.
+	///
+	/// Returns: The end of the constructed range of elements in both Container1 and Container2.
+	///
+	/// Complexity: At most (2 * ((last1 - first1) + (last2 - first2)) - 1) comparisons.
+	///
+	template <typename InputIterator1, typename InputIterator2,
+	          typename OutputIterator1, typename OutputIterator2, typename OutputIterator3, typename Compare>
+	OutputIterator3 set_decomposition(InputIterator1 first1, InputIterator1 last1,
+	                                  InputIterator2 first2, InputIterator2 last2,
+	                                  OutputIterator1 result1, OutputIterator2 result2, OutputIterator3 result3, Compare compare)
+	{
+		while ((first1 != last1) && (first2 != last2))
+		{
+			if (compare(*first1, *first2))
+			{
+				EASTL_VALIDATE_COMPARE(!compare(*first2, *first1)); // Validate that the compare function is sane.
+				*result1++ = *first1++;
+			}
+			else if (compare(*first2, *first1))
+			{
+				EASTL_VALIDATE_COMPARE(!compare(*first1, *first2)); // Validate that the compare function is sane.
+				*result2++ = *first2++;
+			}
+			else
+			{
+				*result3++ = *first1++;
+				++first2;
+			}
+		}
+
+		eastl::copy(first1, last1, result1);
+		eastl::copy(first2, last2, result2);
+
+		return result3;
+	}
+
+	/// set_decomposition
+	///
+	///  set_decomposition with the default comparison object is eastl::less<>.
+	///
+	template <typename InputIterator1, typename InputIterator2,
+			  typename OutputIterator1, typename OutputIterator2, typename OutputIterator3>
+	OutputIterator3 set_decomposition(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2,
+							  OutputIterator1 result1, OutputIterator2 result2, OutputIterator3 result3)
+	{
+		return eastl::set_decomposition(first1, last1, first2, last2, result1, result2, result3, eastl::less<>{});
+	}
+
+
+	/// is_permutation
+	///
+	template<typename ForwardIterator1, typename ForwardIterator2>
+	bool is_permutation(ForwardIterator1 first1, ForwardIterator1 last1, ForwardIterator2 first2)
+	{
+		typedef typename eastl::iterator_traits<ForwardIterator1>::difference_type difference_type;
+
+		// Skip past any equivalent initial elements.
+		while((first1 != last1) && (*first1 == *first2))
+		{
+			++first1;
+			++first2;
+		}
+
+		if(first1 != last1)
+		{
+			const difference_type first1Size = eastl::distance(first1, last1);
+			ForwardIterator2 last2 = first2;
+			eastl::advance(last2, first1Size);
+
+			for(ForwardIterator1 i = first1; i != last1; ++i)
+			{
+				if(i == eastl::find(first1, i, *i))
+				{
+					const difference_type c = eastl::count(first2, last2, *i);
+
+					if((c == 0) || (c != eastl::count(i, last1, *i)))
+						return false;
+				}
+			}
+		}
+
+		return true;
+	}
+
+	/// is_permutation
+	///
+	template<typename ForwardIterator1, typename ForwardIterator2, class BinaryPredicate>
+	bool is_permutation(ForwardIterator1 first1, ForwardIterator1 last1, ForwardIterator2 first2, BinaryPredicate predicate)
+	{
+		typedef typename eastl::iterator_traits<ForwardIterator1>::difference_type difference_type;
+
+		// Skip past any equivalent initial elements.
+		while((first1 != last1) && predicate(*first1, *first2))
+		{
+			++first1;
+			++first2;
+		}
+
+		if(first1 != last1)
+		{
+			const difference_type first1Size = eastl::distance(first1, last1);
+			ForwardIterator2 last2 = first2;
+			eastl::advance(last2, first1Size);
+
+			for(ForwardIterator1 i = first1; i != last1; ++i)
+			{
+				if(i == eastl::find(first1, i, *i, predicate))
+				{
+					const difference_type c = eastl::count(first2, last2, *i, predicate);
+
+					if((c == 0) || (c != eastl::count(i, last1, *i, predicate)))
+						return false;
+				}
+			}
+		}
+
+		return true;
+	}
+
+
+	/// next_permutation
+	///
+	/// mutates the range [first, last) to the next permutation. Returns true if the
+	/// new range is not the final permutation (sorted like the starting permutation).
+	/// Permutations start with a sorted range, and false is returned when next_permutation
+	/// results in the initial sorted range, or if the range has <= 1 element.
+	/// Note that elements are compared by operator < (as usual) and that elements deemed
+	/// equal via this are not rearranged.
+	///
+	/// http://marknelson.us/2002/03/01/next-permutation/
+	/// Basically we start with an ordered range and reverse it's order one specifically
+	/// chosen swap and reverse at a time. It happens that this require going through every
+	/// permutation of the range. We use the same variable names as the document above.
+	///
+	/// To consider: Significantly improved permutation/combination functionality:
+	///    http://home.roadrunner.com/~hinnant/combinations.html
+	///
+	/// Example usage:
+	///     vector<int> intArray;
+	///     // <populate intArray>
+	///     sort(intArray.begin(), intArray.end());
+	///     do {
+	///         // <do something with intArray>
+	///     } while(next_permutation(intArray.begin(), intArray.end()));
+	///
+
+	template<typename BidirectionalIterator, typename Compare>
+	bool next_permutation(BidirectionalIterator first, BidirectionalIterator last, Compare compare)
+	{
+		if(first != last) // If there is anything in the range...
+		{
+			BidirectionalIterator i = last;
+
+			if(first != --i) // If the range has more than one item...
+			{
+				for(;;)
+				{
+					BidirectionalIterator ii(i), j;
+
+					if(compare(*--i, *ii)) // Find two consecutive values where the first is less than the second.
+					{
+						j = last;
+						while(!compare(*i, *--j)) // Find the final value that's greater than the first (it may be equal to the second).
+							{}
+						eastl::iter_swap(i, j);     // Swap the first and the final.
+						eastl::reverse(ii, last);   // Reverse the ranget from second to last.
+						return true;
+					}
+
+					if(i == first) // There are no two consecutive values where the first is less than the second, meaning the range is in reverse order. The reverse ordered range is always the last permutation.
+					{
+						eastl::reverse(first, last);
+						break; // We are done.
+					}
+				}
+			}
+		}
+
+		return false;
+	}
+
+	template<typename BidirectionalIterator>
+	bool next_permutation(BidirectionalIterator first, BidirectionalIterator last)
+	{
+		typedef typename eastl::iterator_traits<BidirectionalIterator>::value_type value_type;
+
+		return eastl::next_permutation(first, last, eastl::less<value_type>());
+	}
+
+
+
+	/// rotate
+	///
+	/// Effects: For each non-negative integer i < (last - first), places the element from the
+	/// position first + i into position first + (i + (last - middle)) % (last - first).
+	///
+	/// Returns: first + (last - middle). That is, returns where first went to.
+	///
+	/// Remarks: This is a left rotate.
+	///
+	/// Requires: [first,middle) and [middle,last) shall be valid ranges. ForwardIterator shall
+	/// satisfy the requirements of ValueSwappable (17.6.3.2). The type of *first shall satisfy
+	/// the requirements of MoveConstructible (Table 20) and the requirements of MoveAssignable.
+	///
+	/// Complexity: At most last - first swaps.
+	///
+	/// Note: While rotate works on ForwardIterators (e.g. slist) and BidirectionalIterators (e.g. list),
+	/// you can get much better performance (O(1) instead of O(n)) with slist and list rotation by
+	/// doing splice operations on those lists instead of calling this rotate function.
+	///
+	/// http://www.cs.bell-labs.com/cm/cs/pearls/s02b.pdf / http://books.google.com/books?id=kse_7qbWbjsC&pg=PA14&lpg=PA14&dq=Programming+Pearls+flipping+hands
+	/// http://books.google.com/books?id=tjOlkl7ecVQC&pg=PA189&lpg=PA189&dq=stepanov+Elements+of+Programming+rotate
+	/// http://stackoverflow.com/questions/21160875/why-is-stdrotate-so-fast
+	///
+	/// Strategy:
+	///     - We handle the special case of (middle == first) and (middle == last) no-ops
+	///       up front in the main rotate entry point.
+	///     - There's a basic ForwardIterator implementation (rotate_general_impl) which is
+	///       a fallback implementation that's not as fast as others but works for all cases.
+	///     - There's a slightly better BidirectionalIterator implementation.
+	///     - We have specialized versions for rotating elements that are is_trivially_move_assignable.
+	///       These versions will use memmove for when we have a RandomAccessIterator.
+	///     - We have a specialized version for rotating by only a single position, as that allows us
+	///       (with any iterator type) to avoid a lot of logic involved with algorithms like "flipping hands"
+	///       and achieve near optimal O(n) behavior. it turns out that rotate-by-one is a common use
+	///       case in practice.
+	///
+	namespace Internal
+	{
+		template<typename ForwardIterator>
+		ForwardIterator rotate_general_impl(ForwardIterator first, ForwardIterator middle, ForwardIterator last)
+		{
+			using eastl::swap;
+
+			ForwardIterator current = middle;
+
+			do {
+				swap(*first++, *current++);
+
+				if(first == middle)
+					middle = current;
+			} while(current != last);
+
+			ForwardIterator result = first;
+			current = middle;
+
+			while(current != last)
+			{
+				swap(*first++, *current++);
+
+				if(first == middle)
+					middle = current;
+				else if(current == last)
+					current = middle;
+			}
+
+			return result; // result points to first + (last - middle).
+		}
+
+
+		template <typename ForwardIterator>
+		ForwardIterator move_rotate_left_by_one(ForwardIterator first, ForwardIterator last)
+		{
+			typedef typename eastl::iterator_traits<ForwardIterator>::value_type value_type;
+
+			value_type temp(eastl::move(*first));
+			ForwardIterator result = eastl::move(eastl::next(first), last, first); // Note that while our template type is BidirectionalIterator, if the actual
+			*result = eastl::move(temp);                                           // iterator is a RandomAccessIterator then this move will be a memmove for trivial types.
+
+			return result; // result points to the final element in the range.
+		}
+
+
+		template <typename BidirectionalIterator>
+		BidirectionalIterator move_rotate_right_by_one(BidirectionalIterator first, BidirectionalIterator last)
+		{
+			typedef typename eastl::iterator_traits<BidirectionalIterator>::value_type value_type;
+
+			BidirectionalIterator beforeLast = eastl::prev(last);
+			value_type temp(eastl::move(*beforeLast));
+			BidirectionalIterator result = eastl::move_backward(first, beforeLast, last); // Note that while our template type is BidirectionalIterator, if the actual
+			*first = eastl::move(temp);                                                   // iterator is a RandomAccessIterator then this move will be a memmove for trivial types.
+
+			return result; // result points to the first element in the range.
+		}
+
+		template <typename /*IteratorCategory*/, bool /*is_trivially_move_assignable*/>
+		struct rotate_helper
+		{
+			template <typename ForwardIterator>
+			static ForwardIterator rotate_impl(ForwardIterator first, ForwardIterator middle, ForwardIterator last)
+				{ return Internal::rotate_general_impl(first, middle, last); }
+		};
+
+		template <>
+		struct rotate_helper<EASTL_ITC_NS::forward_iterator_tag, true>
+		{
+			template <typename ForwardIterator>
+			static ForwardIterator rotate_impl(ForwardIterator first, ForwardIterator middle, ForwardIterator last)
+			{
+				if(eastl::next(first) == middle) // If moving trivial types by a single element, memcpy is fast for that case.
+					return Internal::move_rotate_left_by_one(first, last);
+				return Internal::rotate_general_impl(first, middle, last);
+			}
+		};
+
+		template <>
+		struct rotate_helper<EASTL_ITC_NS::bidirectional_iterator_tag, false>
+		{
+			template <typename BidirectionalIterator>
+			static BidirectionalIterator rotate_impl(BidirectionalIterator first, BidirectionalIterator middle, BidirectionalIterator last)
+				{ return Internal::rotate_general_impl(first, middle, last); } // rotate_general_impl outperforms the flipping hands algorithm.
+
+			/*
+			// Simplest "flipping hands" implementation. Disabled because it's slower on average than rotate_general_impl.
+			template <typename BidirectionalIterator>
+			static BidirectionalIterator rotate_impl(BidirectionalIterator first, BidirectionalIterator middle, BidirectionalIterator last)
+			{
+				eastl::reverse(first, middle);
+				eastl::reverse(middle, last);
+				eastl::reverse(first, last);
+				return first + (last - middle); // This can be slow for large ranges because operator + and - are O(n).
+			}
+
+			// Smarter "flipping hands" implementation, but still disabled because benchmarks are showing it to be slower than rotate_general_impl.
+			template <typename BidirectionalIterator>
+			static BidirectionalIterator rotate_impl(BidirectionalIterator first, BidirectionalIterator middle, BidirectionalIterator last)
+			{
+				// This is the "flipping hands" algorithm.
+				eastl::reverse_impl(first,  middle, EASTL_ITC_NS::bidirectional_iterator_tag()); // Reverse the left side.
+				eastl::reverse_impl(middle, last,   EASTL_ITC_NS::bidirectional_iterator_tag()); // Reverse the right side.
+
+				// Reverse the entire range.
+				while((first != middle) && (middle != last))
+				{
+					eastl::iter_swap(first, --last);
+					++first;
+				}
+
+				if(first == middle) // Finish reversing the entire range.
+				{
+					eastl::reverse_impl(middle, last, bidirectional_iterator_tag());
+					return last;
+				}
+				else
+				{
+					eastl::reverse_impl(first, middle, bidirectional_iterator_tag());
+					return first;
+				}
+			}
+			*/
+		};
+
+		template <>
+		struct rotate_helper<EASTL_ITC_NS::bidirectional_iterator_tag, true>
+		{
+			template <typename BidirectionalIterator>
+			static BidirectionalIterator rotate_impl(BidirectionalIterator first, BidirectionalIterator middle, BidirectionalIterator last)
+			{
+				if(eastl::next(first) == middle) // If moving trivial types by a single element, memcpy is fast for that case.
+					return Internal::move_rotate_left_by_one(first, last);
+				if(eastl::next(middle) == last)
+					return Internal::move_rotate_right_by_one(first, last);
+				return Internal::rotate_general_impl(first, middle, last);
+			}
+		};
+
+		template <typename Integer>
+		inline Integer greatest_common_divisor(Integer x, Integer y)
+		{
+			do {
+				Integer t = (x % y);
+				x = y;
+				y = t;
+			} while(y);
+
+			return x;
+		}
+
+		template <>
+		struct rotate_helper<EASTL_ITC_NS::random_access_iterator_tag, false>
+		{
+			// This is the juggling algorithm, using move operations.
+			// In practice this implementation is about 25% faster than rotate_general_impl. We may want to
+			// consider sticking with just rotate_general_impl and avoid the code generation of this function.
+			template <typename RandomAccessIterator>
+			static RandomAccessIterator rotate_impl(RandomAccessIterator first, RandomAccessIterator middle, RandomAccessIterator last)
+			{
+				typedef typename iterator_traits<RandomAccessIterator>::difference_type difference_type;
+				typedef typename iterator_traits<RandomAccessIterator>::value_type value_type;
+
+				const difference_type m1 = (middle - first);
+				const difference_type m2 = (last - middle);
+				const difference_type g  = Internal::greatest_common_divisor(m1, m2);
+				value_type temp;
+
+				for(RandomAccessIterator p = first + g; p != first;)
+				{
+					temp = eastl::move(*--p);
+					RandomAccessIterator p1 = p;
+					RandomAccessIterator p2 = p + m1;
+					do
+					{
+						*p1 = eastl::move(*p2);
+						p1 = p2;
+						const difference_type d = (last - p2);
+
+						if(m1 < d)
+							p2 += m1;
+						else
+							p2 = first + (m1 - d);
+					} while(p2 != p);
+
+					*p1 = eastl::move(temp);
+				}
+
+				return first + m2;
+			}
+		};
+
+		template <>
+		struct rotate_helper<EASTL_ITC_NS::random_access_iterator_tag, true>
+		{
+			// Experiments were done which tested the performance of using an intermediate buffer
+			// to do memcpy's to as opposed to executing a swapping algorithm. It turns out this is
+			// actually slower than even rotate_general_impl, partly because the average case involves
+			// memcpy'ing a quarter of the element range twice. Experiments were done with various kinds
+			// of PODs with various element counts.
+
+			template <typename RandomAccessIterator>
+			static RandomAccessIterator rotate_impl(RandomAccessIterator first, RandomAccessIterator middle, RandomAccessIterator last)
+			{
+				if(eastl::next(first) == middle) // If moving trivial types by a single element, memcpy is fast for that case.
+					return Internal::move_rotate_left_by_one(first, last);
+				if(eastl::next(middle) == last)
+					return Internal::move_rotate_right_by_one(first, last);
+				if((last - first) < 32) // For small ranges rotate_general_impl is faster.
+					return Internal::rotate_general_impl(first, middle, last);
+				return Internal::rotate_helper<EASTL_ITC_NS::random_access_iterator_tag, false>::rotate_impl(first, middle, last);
+			}
+		};
+
+	} // namespace Internal
+
+
+	template <typename ForwardIterator>
+	ForwardIterator rotate(ForwardIterator first, ForwardIterator middle, ForwardIterator last)
+	{
+		if(middle != first)
+		{
+			if(middle != last)
+			{
+				typedef typename eastl::iterator_traits<ForwardIterator>::iterator_category IC;
+				typedef typename eastl::iterator_traits<ForwardIterator>::value_type        value_type;
+
+				return Internal::rotate_helper<IC, eastl::is_trivially_move_assignable<value_type>::value || // This is the best way of telling if we can move types via memmove, but without a conforming C++11 compiler it usually returns false.
+												   eastl::is_pod<value_type>::value                       || // This is a more conservative way of telling if we can move types via memmove, and most compilers support it, but it doesn't have as full of coverage as is_trivially_move_assignable.
+												   eastl::is_scalar<value_type>::value>                      // This is the most conservative means and works with all compilers, but works only for scalars.
+											   ::rotate_impl(first, middle, last);
+			}
+
+			return first;
+		}
+
+		return last;
+	}
+
+
+
+	/// rotate_copy
+	///
+	/// Similar to rotate except writes the output to the OutputIterator and
+	/// returns an OutputIterator to the element past the last element copied
+	/// (i.e. result + (last - first))
+	///
+	template <typename ForwardIterator, typename OutputIterator>
+	OutputIterator rotate_copy(ForwardIterator first, ForwardIterator middle, ForwardIterator last, OutputIterator result)
+	{
+		return eastl::copy(first, middle, eastl::copy(middle, last, result));
+	}
+
+
+
+	/// clamp
+	///
+	/// Returns a reference to a clamped value within the range of [lo, hi].
+	///
+	/// http://en.cppreference.com/w/cpp/algorithm/clamp
+	///
+	template <class T, class Compare>
+	EA_CONSTEXPR const T& clamp(const T& v, const T& lo, const T& hi, Compare comp)
+	{
+		// code collapsed to a single line due to constexpr requirements
+		return [&] { EASTL_ASSERT(!comp(hi, lo)); }(),
+			   comp(v, lo) ? lo : comp(hi, v) ? hi : v;
+	}
+
+	template <class T>
+	EA_CONSTEXPR const T& clamp(const T& v, const T& lo, const T& hi)
+	{
+		return eastl::clamp(v, lo, hi, eastl::less<>());
+	}
+
+
+
+} // namespace eastl
+
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/allocator.h b/libkram/eastl/include/EASTL/allocator.h
new file mode 100644
index 00000000..ad20e4d8
--- /dev/null
+++ b/libkram/eastl/include/EASTL/allocator.h
@@ -0,0 +1,395 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ALLOCATOR_H
+#define EASTL_ALLOCATOR_H
+
+
+#include <EASTL/internal/config.h>
+#include <EABase/nullptr.h>
+#include <stddef.h>
+
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+
+	/// alloc_flags
+	///
+	/// Defines allocation flags.
+	///
+	enum alloc_flags 
+	{
+		MEM_TEMP = 0, // Low memory, not necessarily actually temporary.
+		MEM_PERM = 1  // High memory, for things that won't be unloaded.
+	};
+
+
+	/// allocator
+	///
+	/// In this allocator class, note that it is not templated on any type and
+	/// instead it simply allocates blocks of memory much like the C malloc and
+	/// free functions. It can be thought of as similar to C++ std::allocator<char>.
+	/// The flags parameter has meaning that is specific to the allocation 
+	///
+	/// C++11's std::allocator (20.6.9) doesn't have a move constructor or assignment 
+	/// operator. This is possibly because std::allocators are associated with types
+	/// instead of as instances. The potential non-equivalance of C++ std::allocator
+	/// instances has been a source of some acknowledged design problems.
+	/// We don't implement support for move construction or assignment in eastl::allocator,
+	/// but users can define their own allocators which do have move functions and 
+	/// the eastl containers are compatible with such allocators (i.e. nothing unexpected
+	/// will happen).
+	///
+	class EASTL_API allocator
+	{
+	public:
+		EASTL_ALLOCATOR_EXPLICIT allocator(const char* pName = EASTL_NAME_VAL(EASTL_ALLOCATOR_DEFAULT_NAME));
+		allocator(const allocator& x);
+		allocator(const allocator& x, const char* pName);
+
+		allocator& operator=(const allocator& x);
+
+		void* allocate(size_t n, int flags = 0);
+		void* allocate(size_t n, size_t alignment, size_t offset, int flags = 0);
+		void  deallocate(void* p, size_t n);
+
+		const char* get_name() const;
+		void        set_name(const char* pName);
+
+	protected:
+		#if EASTL_NAME_ENABLED
+			const char* mpName; // Debug name, used to track memory.
+		#endif
+	};
+
+	bool operator==(const allocator& a, const allocator& b);
+	bool operator!=(const allocator& a, const allocator& b);
+
+
+
+	/// dummy_allocator
+	///
+	/// Defines an allocator which does nothing. It returns NULL from allocate calls.
+	///
+	class EASTL_API dummy_allocator
+	{
+	public:
+		EASTL_ALLOCATOR_EXPLICIT dummy_allocator(const char* = NULL) { }
+		dummy_allocator(const dummy_allocator&) { }
+		dummy_allocator(const dummy_allocator&, const char*) { }
+
+		dummy_allocator& operator=(const dummy_allocator&) { return *this; }
+
+		void* allocate(size_t, int = 0)                 { return NULL; }
+		void* allocate(size_t, size_t, size_t, int = 0) { return NULL; }
+		void  deallocate(void*, size_t)                 { }
+
+		const char* get_name() const      { return ""; }
+		void        set_name(const char*) { }
+	};
+
+	inline bool operator==(const dummy_allocator&, const dummy_allocator&) { return true;  }
+	inline bool operator!=(const dummy_allocator&, const dummy_allocator&) { return false; }
+
+
+
+	/// Defines a static default allocator which is constant across all types.
+	/// This is different from get_default_allocator, which is is bound at
+	/// compile-time and expected to differ per allocator type.
+	/// Currently this Default Allocator applies only to CoreAllocatorAdapter.
+	/// To consider: This naming of this function is too similar to get_default_allocator
+	/// and instead should be named something like GetStaticDefaultAllocator.
+	EASTL_API allocator* GetDefaultAllocator();
+	EASTL_API allocator* SetDefaultAllocator(allocator* pAllocator);
+
+
+	/// get_default_allocator
+	///
+	/// This templated function allows the user to implement a default allocator
+	/// retrieval function that any part of EASTL can use. EASTL containers take
+	/// an Allocator parameter which identifies an Allocator class to use. But 
+	/// different kinds of allocators have different mechanisms for retrieving 
+	/// a default allocator instance, and some don't even intrinsically support
+	/// such functionality. The user can override this get_default_allocator 
+	/// function in order to provide the glue between EASTL and whatever their
+	/// system's default allocator happens to be.
+	///
+	/// Example usage:
+	///     MyAllocatorType* gpSystemAllocator;
+	///     
+	///     MyAllocatorType* get_default_allocator(const MyAllocatorType*)
+	///         { return gpSystemAllocator; }
+	///
+	template <typename Allocator>
+	Allocator* get_default_allocator(const Allocator*);
+
+	EASTLAllocatorType* get_default_allocator(const EASTLAllocatorType*);
+
+
+	/// default_allocfreemethod
+	///
+	/// Implements a default allocfreemethod which uses the default global allocator.
+	/// This version supports only default alignment.
+	///
+	void* default_allocfreemethod(size_t n, void* pBuffer, void* /*pContext*/);
+
+
+	/// allocate_memory
+	///
+	/// This is a memory allocation dispatching function.
+	/// To do: Make aligned and unaligned specializations.
+	///        Note that to do this we will need to use a class with a static
+	///        function instead of a standalone function like below.
+	///
+	template <typename Allocator>
+	void* allocate_memory(Allocator& a, size_t n, size_t alignment, size_t alignmentOffset);
+
+
+} // namespace eastl
+
+
+
+
+
+
+#ifndef EASTL_USER_DEFINED_ALLOCATOR // If the user hasn't declared that he has defined a different allocator implementation elsewhere...
+
+	EA_DISABLE_ALL_VC_WARNINGS()
+	#include <new>
+	EA_RESTORE_ALL_VC_WARNINGS()
+
+	#if !EASTL_DLL // If building a regular library and not building EASTL as a DLL...
+		// It is expected that the application define the following
+		// versions of operator new for the application. Either that or the
+		// user needs to override the implementation of the allocator class.
+		void* operator new[](size_t size, const char* pName, int flags, unsigned debugFlags, const char* file, int line);
+		void* operator new[](size_t size, size_t alignment, size_t alignmentOffset, const char* pName, int flags, unsigned debugFlags, const char* file, int line);
+	#endif
+
+	namespace eastl
+	{
+		inline allocator::allocator(const char* EASTL_NAME(pName))
+		{
+			#if EASTL_NAME_ENABLED
+				mpName = pName ? pName : EASTL_ALLOCATOR_DEFAULT_NAME;
+			#endif
+		}
+
+
+		inline allocator::allocator(const allocator& EASTL_NAME(alloc))
+		{
+			#if EASTL_NAME_ENABLED
+				mpName = alloc.mpName;
+			#endif
+		}
+
+
+		inline allocator::allocator(const allocator&, const char* EASTL_NAME(pName))
+		{
+			#if EASTL_NAME_ENABLED
+				mpName = pName ? pName : EASTL_ALLOCATOR_DEFAULT_NAME;
+			#endif
+		}
+
+
+		inline allocator& allocator::operator=(const allocator& EASTL_NAME(alloc))
+		{
+			#if EASTL_NAME_ENABLED
+				mpName = alloc.mpName;
+			#endif
+			return *this;
+		}
+
+
+		inline const char* allocator::get_name() const
+		{
+			#if EASTL_NAME_ENABLED
+				return mpName;
+			#else
+				return EASTL_ALLOCATOR_DEFAULT_NAME;
+			#endif
+		}
+
+
+		inline void allocator::set_name(const char* EASTL_NAME(pName))
+		{
+			#if EASTL_NAME_ENABLED
+				mpName = pName;
+			#endif
+		}
+
+
+		inline void* allocator::allocate(size_t n, int flags)
+		{
+			#if EASTL_NAME_ENABLED
+				#define pName mpName
+			#else
+				#define pName EASTL_ALLOCATOR_DEFAULT_NAME
+			#endif
+
+			#if EASTL_DLL
+				return allocate(n, EASTL_SYSTEM_ALLOCATOR_MIN_ALIGNMENT, 0, flags);
+			#elif (EASTL_DEBUGPARAMS_LEVEL <= 0)
+				return ::new((char*)0, flags, 0, (char*)0,        0) char[n];
+			#elif (EASTL_DEBUGPARAMS_LEVEL == 1)
+				return ::new(   pName, flags, 0, (char*)0,        0) char[n];
+			#else
+				return ::new(   pName, flags, 0, __FILE__, __LINE__) char[n];
+			#endif
+		}
+
+
+		inline void* allocator::allocate(size_t n, size_t alignment, size_t offset, int flags)
+		{
+			#if EASTL_DLL
+				// We currently have no support for implementing flags when 
+				// using the C runtime library operator new function. The user 
+				// can use SetDefaultAllocator to override the default allocator.
+				EA_UNUSED(offset); EA_UNUSED(flags);
+
+				size_t adjustedAlignment = (alignment > EA_PLATFORM_PTR_SIZE) ? alignment : EA_PLATFORM_PTR_SIZE;
+
+				void* p = new char[n + adjustedAlignment + EA_PLATFORM_PTR_SIZE];
+				void* pPlusPointerSize = (void*)((uintptr_t)p + EA_PLATFORM_PTR_SIZE);
+				void* pAligned = (void*)(((uintptr_t)pPlusPointerSize + adjustedAlignment - 1) & ~(adjustedAlignment - 1));
+
+				void** pStoredPtr = (void**)pAligned - 1;
+				EASTL_ASSERT(pStoredPtr >= p);
+				*(pStoredPtr) = p;
+
+				EASTL_ASSERT(((size_t)pAligned & ~(alignment - 1)) == (size_t)pAligned);
+
+				return pAligned;
+			#elif (EASTL_DEBUGPARAMS_LEVEL <= 0)
+				return ::new(alignment, offset, (char*)0, flags, 0, (char*)0,        0) char[n];
+			#elif (EASTL_DEBUGPARAMS_LEVEL == 1)
+				return ::new(alignment, offset,    pName, flags, 0, (char*)0,        0) char[n];
+			#else
+				return ::new(alignment, offset,    pName, flags, 0, __FILE__, __LINE__) char[n];
+			#endif
+
+			#undef pName  // See above for the definition of this.
+		}
+
+
+		inline void allocator::deallocate(void* p, size_t)
+		{
+			#if EASTL_DLL
+				if (p != nullptr)
+				{
+					void* pOriginalAllocation = *((void**)p - 1);
+					delete[](char*)pOriginalAllocation;
+				}
+			#else
+				delete[](char*)p;
+			#endif
+		}
+
+
+		inline bool operator==(const allocator&, const allocator&)
+		{
+			return true; // All allocators are considered equal, as they merely use global new/delete.
+		}
+
+
+		inline bool operator!=(const allocator&, const allocator&)
+		{
+			return false; // All allocators are considered equal, as they merely use global new/delete.
+		}
+
+
+	} // namespace eastl
+
+
+#endif // EASTL_USER_DEFINED_ALLOCATOR
+
+
+
+namespace eastl
+{
+
+	template <typename Allocator>
+	inline Allocator* get_default_allocator(const Allocator*)
+	{
+		return NULL; // By default we return NULL; the user must make specialization of this function in order to provide their own implementation.
+	}
+
+
+	inline EASTLAllocatorType* get_default_allocator(const EASTLAllocatorType*)
+	{
+		return EASTLAllocatorDefault(); // For the built-in allocator EASTLAllocatorType, we happen to already have a function for returning the default allocator instance, so we provide it.
+	}
+
+
+	inline void* default_allocfreemethod(size_t n, void* pBuffer, void* /*pContext*/)
+	{
+		EASTLAllocatorType* const pAllocator = EASTLAllocatorDefault();
+
+		if(pBuffer) // If freeing...
+		{
+			EASTLFree(*pAllocator, pBuffer, n);
+			return NULL;  // The return value is meaningless for the free.
+		}
+		else // allocating
+			return EASTLAlloc(*pAllocator, n);
+	}
+
+
+	/// allocate_memory
+	///
+	/// This is a memory allocation dispatching function.
+	/// To do: Make aligned and unaligned specializations.
+	///        Note that to do this we will need to use a class with a static
+	///        function instead of a standalone function like below.
+	///
+	template <typename Allocator>
+	inline void* allocate_memory(Allocator& a, size_t n, size_t alignment, size_t alignmentOffset)
+	{
+		void *result;
+		if (alignment <= EASTL_ALLOCATOR_MIN_ALIGNMENT)
+		{
+			result = EASTLAlloc(a, n);
+			// Ensure the result is correctly aligned.  An assertion likely indicates a mismatch between EASTL_ALLOCATOR_MIN_ALIGNMENT and the minimum alignment
+			// of EASTLAlloc.  If there is a mismatch it may be necessary to define EASTL_ALLOCATOR_MIN_ALIGNMENT to be the minimum alignment of EASTLAlloc, or
+			// to increase the alignment of EASTLAlloc to match EASTL_ALLOCATOR_MIN_ALIGNMENT.
+			EASTL_ASSERT((reinterpret_cast<size_t>(result)& ~(alignment - 1)) == reinterpret_cast<size_t>(result));
+		}
+		else
+		{
+			result = EASTLAllocAligned(a, n, alignment, alignmentOffset);
+			// Ensure the result is correctly aligned.  An assertion here may indicate a bug in the allocator.
+			auto resultMinusOffset = (char*)result - alignmentOffset;
+			EA_UNUSED(resultMinusOffset);
+			EASTL_ASSERT((reinterpret_cast<size_t>(resultMinusOffset)& ~(alignment - 1)) == reinterpret_cast<size_t>(resultMinusOffset));
+		}
+		return result;
+	}
+
+}
+
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/allocator_malloc.h b/libkram/eastl/include/EASTL/allocator_malloc.h
new file mode 100644
index 00000000..a13d1165
--- /dev/null
+++ b/libkram/eastl/include/EASTL/allocator_malloc.h
@@ -0,0 +1,130 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ALLOCATOR_MALLOC_H
+#define EASTL_ALLOCATOR_MALLOC_H
+
+
+#include <EABase/eahave.h>
+#include <EASTL/allocator.h>
+#include <stddef.h>
+
+
+// EASTL_ALIGNED_MALLOC_AVAILABLE
+//
+// Identifies if the standard library provides a built-in aligned version of malloc.
+// Defined as 0 or 1, depending on the standard library or platform availability.
+// None of the viable C functions provides for an aligned malloc with offset, so we 
+// don't consider that supported in any case.
+//
+// Options for aligned allocations:
+// C11   aligned_alloc   http://linux.die.net/man/3/aligned_alloc
+// glibc memalign        http://linux.die.net/man/3/posix_memalign
+// Posix posix_memalign  http://pubs.opengroup.org/onlinepubs/000095399/functions/posix_memalign.html
+// VC++ _aligned_malloc  http://msdn.microsoft.com/en-us/library/8z34s9c6%28VS.80%29.aspx This is not suitable, since it has a limitation that you need to free via _aligned_free.
+//
+#if !defined EASTL_ALIGNED_MALLOC_AVAILABLE
+	#if defined(EA_PLATFORM_POSIX) // && !defined(EA_PLATFORM_APPLE)
+		// memalign is more consistently available than posix_memalign, though its location isn't consistent across 
+		// platforms and compiler libraries. Typically it's declared in one of three headers: stdlib.h, malloc.h, or malloc/malloc.h
+		#include <stdlib.h> // memalign, posix_memalign. 
+		#define EASTL_ALIGNED_MALLOC_AVAILABLE 1
+
+		#if EA_HAS_INCLUDE_AVAILABLE
+			#if EA_HAS_INCLUDE(<malloc/malloc.h>)
+				#include <malloc/malloc.h>
+			#elif EA_HAS_INCLUDE(<malloc.h>)
+				#include <malloc.h>
+			#endif
+		#elif defined(EA_PLATFORM_BSD)
+			#include <malloc/malloc.h>
+		#elif defined(EA_COMPILER_CLANG)
+			#if __has_include(<malloc/malloc.h>)
+				#include <malloc/malloc.h>
+			#elif __has_include(<malloc.h>)
+				#include <malloc.h>
+			#endif
+		#else
+			#include <malloc.h>
+		#endif
+	#else
+		#define EASTL_ALIGNED_MALLOC_AVAILABLE 0
+	#endif
+#endif
+
+
+namespace eastl
+{
+
+	///////////////////////////////////////////////////////////////////////////////
+	// allocator_malloc
+	//
+	// Implements an EASTL allocator that uses malloc/free as opposed to 
+	// new/delete or PPMalloc Malloc/Free. 
+	//
+	// Example usage:
+	//      vector<int, allocator_malloc> intVector;
+	// 
+	class allocator_malloc
+	{
+	public:
+		allocator_malloc(const char* = NULL)
+			{ }
+
+		allocator_malloc(const allocator_malloc&)
+			{ }
+
+		allocator_malloc(const allocator_malloc&, const char*)
+			{ }
+
+		allocator_malloc& operator=(const allocator_malloc&)
+			{ return *this; }
+
+		bool operator==(const allocator_malloc&)
+			{ return true; }
+
+		bool operator!=(const allocator_malloc&)
+			{ return false; }
+
+		void* allocate(size_t n, int /*flags*/ = 0)
+			{ return malloc(n); }
+
+		void* allocate(size_t n, size_t alignment, size_t alignmentOffset, int /*flags*/ = 0)
+		{ 
+			#if EASTL_ALIGNED_MALLOC_AVAILABLE
+				if((alignmentOffset % alignment) == 0) // We check for (offset % alignmnent == 0) instead of (offset == 0) because any block which is aligned on e.g. 64 also is aligned at an offset of 64 by definition. 
+					return memalign(alignment, n); // memalign is more consistently available than posix_memalign.
+			#else
+				if((alignment <= EASTL_SYSTEM_ALLOCATOR_MIN_ALIGNMENT) && ((alignmentOffset % alignment) == 0))
+					return malloc(n);
+			#endif
+			return NULL;
+		}
+
+		void deallocate(void* p, size_t /*n*/)
+			{ free(p); }
+
+		const char* get_name() const
+			{ return "allocator_malloc"; }
+
+		void set_name(const char*)
+			{ }
+	};
+
+
+} // namespace eastl
+
+
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/any.h b/libkram/eastl/include/EASTL/any.h
new file mode 100644
index 00000000..c2ef6388
--- /dev/null
+++ b/libkram/eastl/include/EASTL/any.h
@@ -0,0 +1,652 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+
+///////////////////////////////////////////////////////////////////////////////
+// This file implements the eastl::any which is part of the C++ standard STL
+// library specification.  
+//
+// eastl::any is a type-safe container for single values of any type.  Our
+// implementation makes use of the "small local buffer" optimization to avoid
+// unnecessary dynamic memory allocation if the specified type is eligible to
+// be stored in its local buffer.  The user type must satisfy the size
+// requirements and must be no-throw move-constructible to qualify for the local
+// buffer optimization.
+//
+// To consider:  Implement a fixed_any<SIZE> variant to allow users to customize
+// the size of the "small local buffer" optimization.
+//
+// http://en.cppreference.com/w/cpp/utility/any
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ANY_H
+#define EASTL_ANY_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+#include <EASTL/internal/config.h>
+#include <EASTL/internal/in_place_t.h>
+#if EASTL_RTTI_ENABLED
+	#include <typeinfo>
+#endif
+#if EASTL_EXCEPTIONS_ENABLED
+	#include <exception>
+#endif
+
+
+namespace eastl
+{
+	///////////////////////////////////////////////////////////////////////////////
+	// bad_any_cast
+	// 
+	// The type thrown by any_cast on failure.
+	//
+	// http://en.cppreference.com/w/cpp/utility/any/bad_any_cast
+	//
+	#if EASTL_EXCEPTIONS_ENABLED
+	struct bad_cast : std::exception
+	{
+		const char* what() const EA_NOEXCEPT EA_OVERRIDE
+			{ return "bad cast"; }
+	};
+
+	struct bad_any_cast : public bad_cast 
+	{
+		const char* what() const EA_NOEXCEPT EA_OVERRIDE
+			{ return "bad_any_cast"; }
+	};
+    #endif
+
+	namespace Internal
+	{
+		// utility to switch between exceptions and asserts
+		inline void DoBadAnyCast()
+		{
+			#if EASTL_EXCEPTIONS_ENABLED
+				throw bad_any_cast();
+			#else
+				EASTL_ASSERT_MSG(false, "bad_any_cast\n");
+
+				// NOTE(rparolin): CRASH!
+				// You crashed here because you requested a type that was not contained in the object.
+				// We choose to intentionally crash here instead of returning invalid data to the calling 
+				// code which could cause hard to track down bugs. 
+				*((volatile int*)0) = 0xDEADC0DE;
+			#endif
+		}
+
+		template<typename T, typename... Args>
+		void* DefaultConstruct(Args&&... args)
+		{
+			auto* pMem = EASTLAllocatorDefault()->allocate(sizeof(T), alignof(T), 0);
+
+			return ::new(pMem) T(eastl::forward<Args>(args)...);
+		}
+
+		template<typename T>
+		void DefaultDestroy(T* p)
+		{
+			p->~T();
+
+			EASTLAllocatorDefault()->deallocate(static_cast<void*>(p), sizeof(T));
+		}
+	}
+
+
+	///////////////////////////////////////////////////////////////////////////////
+	// 20.7.3, class any
+	//
+	class any
+	{
+		//////////////////////////////////////////////////////////////////////////////////////////
+		// storage_operation 
+		//
+		// operations supported by the storage handler 
+		//
+		enum class storage_operation
+		{
+			GET,
+			DESTROY,
+			COPY,
+			MOVE,
+			TYPE_INFO
+		};
+
+
+		//////////////////////////////////////////////////////////////////////////////////////////
+		// storage 
+		//
+		// the underlying storage type which enables the switching between objects stored in
+		// the heap and objects stored within the any type.
+		//
+		union storage
+		{
+			typedef aligned_storage_t<4 * sizeof(void*), alignment_of<void*>::value> internal_storage_t;
+
+			void* external_storage = nullptr;
+			internal_storage_t internal_storage;
+		};
+
+
+		//////////////////////////////////////////////////////////////////////////////////////////
+		// use_internal_storage 
+		//
+		// determines when the "local buffer optimization" is used 
+		//
+		template <typename T>
+		using use_internal_storage = bool_constant
+		<
+			is_nothrow_move_constructible<T>::value 
+			&& (sizeof(T) <= sizeof(storage)) &&
+			(alignment_of<storage>::value % alignment_of<T>::value == 0)  
+		>;
+
+
+		//////////////////////////////////////////////////////////////////////////////////////////
+		// non-member friend functions	
+		//
+    	template <class ValueType> friend const ValueType* any_cast(const any* pAny) EA_NOEXCEPT;
+		template <class ValueType> friend ValueType* any_cast(any* pAny) EA_NOEXCEPT; 
+		template <class ValueType> friend ValueType any_cast(const any& operand);
+		template <class ValueType> friend ValueType any_cast(any& operand);
+		template <class ValueType> friend ValueType any_cast(any&& operand);
+
+		//Adding Unsafe any cast operations
+		template <class ValueType> friend const ValueType* unsafe_any_cast(const any* pAny) EA_NOEXCEPT;
+		template <class ValueType> friend ValueType* unsafe_any_cast(any* pAny) EA_NOEXCEPT;
+
+
+		//////////////////////////////////////////////////////////////////////////////////////////
+		// internal storage handler
+		//
+		template <typename T>
+		struct storage_handler_internal
+		{
+			template <typename V>
+			static void construct(storage& s, V&& v)
+			{
+				::new(&s.internal_storage) T(eastl::forward<V>(v));
+			}
+
+			template <typename... Args>
+			static void construct_inplace(storage& s, Args... args)
+			{
+				::new(&s.internal_storage) T(eastl::forward<Args>(args)...);
+			}
+
+			template <class NT, class U, class... Args>
+			static void construct_inplace(storage& s, std::initializer_list<U> il, Args&&... args)
+			{
+				::new(&s.internal_storage) NT(il, eastl::forward<Args>(args)...);
+			}
+
+			static inline void destroy(any& refAny)
+			{
+				T& t = *static_cast<T*>(static_cast<void*>(&refAny.m_storage.internal_storage));
+				EA_UNUSED(t);
+				t.~T();
+
+				refAny.m_handler = nullptr;
+			}
+
+			static void* handler_func(storage_operation op, const any* pThis, any* pOther)
+			{
+				switch (op)
+				{
+					case storage_operation::GET:
+					{
+						EASTL_ASSERT(pThis);
+						return (void*)(&pThis->m_storage.internal_storage);
+					}
+					break;
+
+					case storage_operation::DESTROY:
+					{
+						EASTL_ASSERT(pThis);
+						destroy(const_cast<any&>(*pThis));
+					}
+					break;
+
+					case storage_operation::COPY:
+					{
+						EASTL_ASSERT(pThis);
+						EASTL_ASSERT(pOther);
+						construct(pOther->m_storage, *(T*)(&pThis->m_storage.internal_storage));
+					}
+					break;
+
+					case storage_operation::MOVE:
+					{
+						EASTL_ASSERT(pThis);
+						EASTL_ASSERT(pOther);
+						construct(pOther->m_storage, eastl::move(*(T*)(&pThis->m_storage.internal_storage)));
+						destroy(const_cast<any&>(*pThis));
+					}
+					break;
+
+					case storage_operation::TYPE_INFO:
+					{
+					#if EASTL_RTTI_ENABLED
+						return (void*)&typeid(T);
+					#endif
+					}
+					break;
+
+					default:
+					{
+						EASTL_ASSERT_MSG(false, "unknown storage operation\n");
+					}
+					break;
+				};
+
+				return nullptr;
+			}
+		};
+
+
+
+		//////////////////////////////////////////////////////////////////////////////////////////
+		// external storage handler
+		//
+		template <typename T>
+		struct storage_handler_external
+		{
+			template <typename V>
+			static inline void construct(storage& s, V&& v) 
+			{
+				s.external_storage = Internal::DefaultConstruct<T>(eastl::forward<V>(v));
+			}
+
+			template <typename... Args>
+			static inline void construct_inplace(storage& s, Args... args)
+			{
+				s.external_storage = Internal::DefaultConstruct<T>(eastl::forward<Args>(args)...);
+			}
+
+			template <class NT, class U, class... Args>
+			static inline void construct_inplace(storage& s, std::initializer_list<U> il, Args&&... args)
+			{
+				s.external_storage = Internal::DefaultConstruct<NT>(il, eastl::forward<Args>(args)...);
+			}
+
+			static inline void destroy(any& refAny)
+			{
+				Internal::DefaultDestroy(static_cast<T*>(refAny.m_storage.external_storage));
+
+				refAny.m_handler = nullptr;
+			}
+
+			static void* handler_func(storage_operation op, const any* pThis, any* pOther)
+			{
+				switch (op)
+				{
+					case storage_operation::GET:
+					{
+						EASTL_ASSERT(pThis);
+						EASTL_ASSERT(pThis->m_storage.external_storage);
+						return static_cast<void*>(pThis->m_storage.external_storage);
+					}
+					break;
+
+					case storage_operation::DESTROY:
+					{
+						EASTL_ASSERT(pThis);
+						destroy(*const_cast<any*>(pThis));
+					}
+					break;
+
+					case storage_operation::COPY:
+					{
+						EASTL_ASSERT(pThis);
+						EASTL_ASSERT(pOther);
+						construct(pOther->m_storage, *static_cast<T*>(pThis->m_storage.external_storage));
+					}
+					break;
+
+					case storage_operation::MOVE:
+					{
+						EASTL_ASSERT(pThis);
+						EASTL_ASSERT(pOther);
+						construct(pOther->m_storage, eastl::move(*(T*)(pThis->m_storage.external_storage)));
+						destroy(const_cast<any&>(*pThis));
+					}
+					break;
+
+					case storage_operation::TYPE_INFO:
+					{
+					#if EASTL_RTTI_ENABLED
+						return (void*)&typeid(T);
+					#endif
+					}
+					break;
+
+					default:
+					{
+						EASTL_ASSERT_MSG(false, "unknown storage operation\n");
+					}
+					break;
+				};
+
+				return nullptr;
+			}
+		};
+
+
+		//////////////////////////////////////////////////////////////////////////////////////////
+		// storage_handler_ptr 
+		//
+		// defines the function signature of the storage handler that both the internal and
+		// external storage handlers must implement to retrieve the underlying type of the any
+		// object.
+		//
+		using storage_handler_ptr = void* (*)(storage_operation, const any*, any*);
+
+
+		//////////////////////////////////////////////////////////////////////////////////////////
+		// storage_handler 
+		//
+		// based on the specified type T we select the appropriate underlying storage handler
+		// based on the 'use_internal_storage' trait.
+		//
+		template <typename T>
+		using storage_handler = typename conditional<use_internal_storage<T>::value,
+		                                             storage_handler_internal<T>,
+		                                             storage_handler_external<T>>::type;
+
+
+		//////////////////////////////////////////////////////////////////////////////////////////
+		// data layout
+		//
+		storage m_storage;
+		storage_handler_ptr m_handler;
+
+	public:
+			#ifndef EA_COMPILER_GNUC
+				// TODO(rparolin):  renable constexpr for GCC
+				EA_CONSTEXPR 
+			#endif
+			any() EA_NOEXCEPT 
+			: m_storage(), m_handler(nullptr) {}
+
+		any(const any& other) : m_handler(nullptr)
+		{
+			if (other.m_handler)
+			{
+				// NOTE(rparolin): You can not simply copy the underlying
+				// storage because it could hold a pointer to an object on the
+				// heap which breaks the copy semantics of the language. 
+				other.m_handler(storage_operation::COPY, &other, this);
+				m_handler = other.m_handler;
+			}
+		}
+
+		any(any&& other) EA_NOEXCEPT : m_handler(nullptr)
+		{ 
+			if(other.m_handler)
+			{
+				// NOTE(rparolin): You can not simply move the underlying
+				// storage because because the storage class has effectively
+				// type erased user type so we have to defer to the handler
+				// function to get the type back and pass on the move request.
+				m_handler = eastl::move(other.m_handler);
+				other.m_handler(storage_operation::MOVE, &other, this);
+			}
+		}
+
+		~any() { reset(); }
+
+		template <class ValueType>
+		any(ValueType&& value,
+		    typename eastl::enable_if<!eastl::is_same<typename eastl::decay<ValueType>::type, any>::value>::type* = 0)
+		{
+			typedef decay_t<ValueType> DecayedValueType;
+			static_assert(is_copy_constructible<DecayedValueType>::value, "ValueType must be copy-constructible");
+			storage_handler<DecayedValueType>::construct(m_storage, eastl::forward<ValueType>(value));
+			m_handler = &storage_handler<DecayedValueType>::handler_func;
+		}
+
+		template <class T, class... Args>
+		explicit any(in_place_type_t<T>, Args&&... args) 
+		{
+			typedef storage_handler<decay_t<T>> StorageHandlerT;
+			static_assert(eastl::is_constructible<T, Args...>::value, "T must be constructible with Args...");
+
+			StorageHandlerT::construct_inplace(m_storage, eastl::forward<Args>(args)...);
+			m_handler = &StorageHandlerT::handler_func;
+		}
+
+		template <class T, class U, class... Args>
+		explicit any(in_place_type_t<T>,
+		             std::initializer_list<U> il,
+		             Args&&... args,
+		             typename eastl::enable_if<eastl::is_constructible<T, std::initializer_list<U>&, Args...>::value,
+		                                       void>::type* = 0)
+		{
+			typedef storage_handler<decay_t<T>> StorageHandlerT;
+
+			StorageHandlerT::construct_inplace(m_storage, il, eastl::forward<Args>(args)...);
+			m_handler = &StorageHandlerT::handler_func;
+		}
+
+		// 20.7.3.2, assignments
+		template <class ValueType>
+		any& operator=(ValueType&& value)
+		{
+			static_assert(is_copy_constructible<decay_t<ValueType>>::value, "ValueType must be copy-constructible");
+			any(eastl::forward<ValueType>(value)).swap(*this);
+			return *this;
+		}
+
+		any& operator=(const any& other) 
+		{ 
+			any(other).swap(*this);
+			return *this; 
+		}
+
+		any& operator=(any&& other) EA_NOEXCEPT 
+		{ 
+			any(eastl::move(other)).swap(*this);
+			return *this; 
+		}
+
+        // 20.7.3.3, modifiers
+		#if EASTL_VARIADIC_TEMPLATES_ENABLED
+			template <class T, class... Args>
+			void emplace(Args&&... args)
+			{
+			    typedef storage_handler<decay_t<T>> StorageHandlerT;
+				static_assert(eastl::is_constructible<T, Args...>::value, "T must be constructible with Args...");
+
+			    reset();
+				StorageHandlerT::construct_inplace(m_storage, eastl::forward<Args>(args)...);
+				m_handler = &StorageHandlerT::handler_func;
+			}
+
+			template <class NT, class U, class... Args>
+		    typename eastl::enable_if<eastl::is_constructible<NT, std::initializer_list<U>&, Args...>::value, void>::type
+			emplace(std::initializer_list<U> il, Args&&... args)
+			{
+			    typedef storage_handler<decay_t<NT>> StorageHandlerT;
+
+				reset();
+				StorageHandlerT::construct_inplace(m_storage, il, eastl::forward<Args>(args)...);
+				m_handler = &StorageHandlerT::handler_func;
+			}
+        #endif
+
+		void reset() EA_NOEXCEPT 
+		{
+			if(m_handler)
+				m_handler(storage_operation::DESTROY, this, nullptr);
+		}
+
+		void swap(any& other) EA_NOEXCEPT 
+		{
+			if(this == &other)
+				return;
+
+			if(m_handler && other.m_handler)
+			{
+				any tmp;
+				tmp.m_handler = other.m_handler;
+				other.m_handler(storage_operation::MOVE, &other, &tmp);
+
+				other.m_handler = m_handler;
+				m_handler(storage_operation::MOVE, this, &other);
+
+				m_handler = tmp.m_handler;
+				tmp.m_handler(storage_operation::MOVE, &tmp, this);
+			}
+			else if (m_handler == nullptr && other.m_handler)
+			{
+				eastl::swap(m_handler, other.m_handler);
+				m_handler(storage_operation::MOVE, &other, this);
+			}
+			else if(m_handler && other.m_handler == nullptr)
+			{
+				eastl::swap(m_handler, other.m_handler);
+				other.m_handler(storage_operation::MOVE, this, &other);
+			}
+			//else if (m_handler == nullptr && other.m_handler == nullptr)
+			//{
+			//     // nothing to swap 
+			//}
+		}
+
+	    // 20.7.3.4, observers
+		bool has_value() const EA_NOEXCEPT { return m_handler != nullptr; }
+
+        #if EASTL_RTTI_ENABLED
+			inline const std::type_info& type() const EA_NOEXCEPT 
+			{
+				if(m_handler)
+				{
+					auto* pTypeInfo = m_handler(storage_operation::TYPE_INFO, this, nullptr);
+					return *static_cast<const std::type_info*>(pTypeInfo);
+				}
+				else
+				{
+					return typeid(void);
+				}
+			}
+		#endif
+	};
+
+
+
+	//////////////////////////////////////////////////////////////////////////////////////////
+	// 20.7.4, non-member functions
+	//
+	inline void swap(any& rhs, any& lhs) EA_NOEXCEPT { rhs.swap(lhs); }
+
+
+	//////////////////////////////////////////////////////////////////////////////////////////
+	// 20.7.4, The non-member any_cast functions provide type-safe access to the contained object.
+	//
+	template <class ValueType>
+	inline ValueType any_cast(const any& operand)
+	{
+		static_assert(eastl::is_reference<ValueType>::value || eastl::is_copy_constructible<ValueType>::value,
+		              "ValueType must be a reference or copy constructible");
+
+		auto* p = any_cast<typename add_const<typename remove_reference<ValueType>::type>::type>(&operand);
+
+		if(p == nullptr)
+			Internal::DoBadAnyCast();
+
+		return *p;
+	}
+
+	template <class ValueType>
+    inline ValueType any_cast(any& operand)
+    {
+		static_assert(eastl::is_reference<ValueType>::value || eastl::is_copy_constructible<ValueType>::value,
+		              "ValueType must be a reference or copy constructible");
+
+		auto* p = any_cast<typename remove_reference<ValueType>::type>(&operand);
+
+		if(p == nullptr)
+			Internal::DoBadAnyCast();
+
+		return *p;
+    }
+
+	template <class ValueType>
+	inline ValueType any_cast(any&& operand)
+	{
+		static_assert(eastl::is_reference<ValueType>::value || eastl::is_copy_constructible<ValueType>::value,
+		              "ValueType must be a reference or copy constructible");
+
+		auto* p = any_cast<typename remove_reference<ValueType>::type>(&operand);
+
+		if (p == nullptr)
+			Internal::DoBadAnyCast();
+
+		return *p;
+	}
+
+	// NOTE(rparolin): The runtime type check was commented out because in DLL builds the templated function pointer
+	// value will be different -- completely breaking the validation mechanism.  Due to the fact that eastl::any uses
+	// type erasure we can't refresh (on copy/move) the cached function pointer to the internal handler function because
+	// we don't statically know the type.
+	template <class ValueType>
+	inline const ValueType* any_cast(const any* pAny) EA_NOEXCEPT
+	{
+		return (pAny && pAny->m_handler EASTL_IF_NOT_DLL(== &any::storage_handler<decay_t<ValueType>>::handler_func)
+				#if EASTL_RTTI_ENABLED
+					&& pAny->type() == typeid(typename remove_reference<ValueType>::type)
+				#endif
+				) ?
+		           static_cast<const ValueType*>(pAny->m_handler(any::storage_operation::GET, pAny, nullptr)) :
+		           nullptr;
+	}
+
+	template <class ValueType>
+	inline ValueType* any_cast(any* pAny) EA_NOEXCEPT
+	{
+		return (pAny && pAny->m_handler EASTL_IF_NOT_DLL(== &any::storage_handler<decay_t<ValueType>>::handler_func)
+				#if EASTL_RTTI_ENABLED
+					&& pAny->type() == typeid(typename remove_reference<ValueType>::type)
+				#endif
+				) ?
+		           static_cast<ValueType*>(pAny->m_handler(any::storage_operation::GET, pAny, nullptr)) :
+		           nullptr;
+	}
+
+	//Unsafe operations - use with caution
+	template <class ValueType>
+	inline const ValueType* unsafe_any_cast(const any* pAny) EA_NOEXCEPT
+	{
+		return unsafe_any_cast<ValueType>(const_cast<any*>(pAny));
+	}
+
+	template <class ValueType>
+	inline ValueType* unsafe_any_cast(any* pAny) EA_NOEXCEPT
+	{
+		return static_cast<ValueType*>(pAny->m_handler(any::storage_operation::GET, pAny, nullptr));
+	}
+
+	//////////////////////////////////////////////////////////////////////////////////////////
+	// make_any
+	//
+	#if EASTL_VARIADIC_TEMPLATES_ENABLED
+		template <class T, class... Args>
+		inline any make_any(Args&&... args)
+		{
+			return any(eastl::in_place<T>, eastl::forward<Args>(args)...);
+		}
+
+		template <class T, class U, class... Args>
+		inline any make_any(std::initializer_list<U> il, Args&&... args)
+		{
+			return any(eastl::in_place<T>, il, eastl::forward<Args>(args)...);
+		}
+    #endif
+
+} // namespace eastl
+
+#endif // EASTL_ANY_H
diff --git a/libkram/eastl/include/EASTL/array.h b/libkram/eastl/include/EASTL/array.h
new file mode 100644
index 00000000..590aa94b
--- /dev/null
+++ b/libkram/eastl/include/EASTL/array.h
@@ -0,0 +1,530 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+
+///////////////////////////////////////////////////////////////////////////////
+// Implements a templated array class as per the C++ standard TR1 (technical
+// report 1, which is a list of proposed C++ library amendments).
+// The primary distinctions between this array and TR1 array are:
+//    - array::size_type is defined as eastl_size_t instead of size_t in order
+//      to save memory and run faster on 64 bit systems.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ARRAY_H
+#define EASTL_ARRAY_H
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/iterator.h>
+#include <EASTL/algorithm.h>
+#include <EASTL/utility.h>
+#include <stddef.h>
+
+#if EASTL_EXCEPTIONS_ENABLED
+	EA_DISABLE_ALL_VC_WARNINGS()
+	#include <stdexcept> // std::out_of_range, std::length_error.
+	EA_RESTORE_ALL_VC_WARNINGS()
+#endif
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+
+	///////////////////////////////////////////////////////////////////////
+	/// array
+	///
+	/// Implements a templated array class as per the C++ standard TR1.
+	/// This class allows you to use a built-in C style array like an STL vector.
+	/// It does not let you change its size, as it is just like a C built-in array.
+	/// Our implementation here strives to remove function call nesting, as that
+	/// makes it hard for us to profile debug builds due to function call overhead.
+	/// Note that this is intentionally a struct with public data, as per the
+	/// C++ standard update proposal requirements.
+	///
+	/// Example usage:
+	///    array<int, 5> a = { { 0, 1, 2, 3, 4 } }; // Strict compilers such as GCC require the double brackets.
+	///    a[2] = 4;
+	///    for(array<int, 5>::iterator i = a.begin(); i < a.end(); ++i)
+	///       *i = 0;
+	///
+	template <typename T, size_t N = 1>
+	struct array
+	{
+	public:
+		typedef array<T, N>                                   this_type;
+		typedef T                                             value_type;
+		typedef value_type&                                   reference;
+		typedef const value_type&                             const_reference;
+		typedef value_type*                                   iterator;
+		typedef const value_type*                             const_iterator;
+		typedef eastl::reverse_iterator<iterator>             reverse_iterator;
+		typedef eastl::reverse_iterator<const_iterator>       const_reverse_iterator;
+		typedef eastl_size_t                                  size_type;        // See config.h for the definition of eastl_size_t, which defaults to size_t.
+		typedef ptrdiff_t                                     difference_type;
+
+	public:
+		enum
+		{
+			count = N
+		};
+
+		// Note that the member data is intentionally public.
+		// This allows for aggregate initialization of the
+		// object (e.g. array<int, 5> a = { 0, 3, 2, 4 }; )
+		value_type mValue[N ? N : 1];
+
+	public:
+		// We intentionally provide no constructor, destructor, or assignment operator.
+
+		void fill(const value_type& value);
+
+		// Unlike the swap function for other containers, array::swap takes linear time,
+		// may exit via an exception, and does not cause iterators to become associated with the other container.
+		void swap(this_type& x) EA_NOEXCEPT_IF(eastl::is_nothrow_swappable<value_type>::value);
+
+		EA_CPP14_CONSTEXPR iterator       begin() EA_NOEXCEPT;
+		EA_CPP14_CONSTEXPR const_iterator begin() const EA_NOEXCEPT;
+		EA_CPP14_CONSTEXPR const_iterator cbegin() const EA_NOEXCEPT;
+
+		EA_CPP14_CONSTEXPR iterator       end() EA_NOEXCEPT;
+		EA_CPP14_CONSTEXPR const_iterator end() const EA_NOEXCEPT;
+		EA_CPP14_CONSTEXPR const_iterator cend() const EA_NOEXCEPT;
+
+		EA_CPP14_CONSTEXPR reverse_iterator       rbegin() EA_NOEXCEPT;
+		EA_CPP14_CONSTEXPR const_reverse_iterator rbegin() const EA_NOEXCEPT;
+		EA_CPP14_CONSTEXPR const_reverse_iterator crbegin() const EA_NOEXCEPT;
+
+		EA_CPP14_CONSTEXPR reverse_iterator       rend() EA_NOEXCEPT;
+		EA_CPP14_CONSTEXPR const_reverse_iterator rend() const EA_NOEXCEPT;
+		EA_CPP14_CONSTEXPR const_reverse_iterator crend() const EA_NOEXCEPT;
+
+		EA_CPP14_CONSTEXPR bool empty() const EA_NOEXCEPT;
+		EA_CPP14_CONSTEXPR size_type size() const EA_NOEXCEPT;
+		EA_CPP14_CONSTEXPR size_type max_size() const EA_NOEXCEPT;
+
+		EA_CPP14_CONSTEXPR T*       data() EA_NOEXCEPT;
+		EA_CPP14_CONSTEXPR const T* data() const EA_NOEXCEPT;
+
+		EA_CPP14_CONSTEXPR reference       operator[](size_type i);
+		EA_CPP14_CONSTEXPR const_reference operator[](size_type i) const;
+		EA_CPP14_CONSTEXPR const_reference at(size_type i) const;
+		EA_CPP14_CONSTEXPR reference       at(size_type i);
+
+		EA_CPP14_CONSTEXPR reference       front();
+		EA_CPP14_CONSTEXPR const_reference front() const;
+
+		EA_CPP14_CONSTEXPR reference       back();
+		EA_CPP14_CONSTEXPR const_reference back() const;
+
+		bool validate() const;
+		int  validate_iterator(const_iterator i) const;
+
+	}; // class array
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// template deduction guides
+	///////////////////////////////////////////////////////////////////////////
+	#ifdef __cpp_deduction_guides
+		template <class T, class... U> array(T, U...) -> array<T, 1 + sizeof...(U)>;
+	#endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// array
+	///////////////////////////////////////////////////////////////////////
+
+
+	template <typename T, size_t N>
+	inline void array<T, N>::fill(const value_type& value)
+	{
+		eastl::fill_n(&mValue[0], N, value);
+	}
+
+
+	template <typename T, size_t N>
+	inline void array<T, N>::swap(this_type& x) EA_NOEXCEPT_IF(eastl::is_nothrow_swappable<value_type>::value)
+	{
+		eastl::swap_ranges(&mValue[0], &mValue[N], &x.mValue[0]);
+	}
+
+
+	template <typename T, size_t N>
+	EA_CPP14_CONSTEXPR inline typename array<T, N>::iterator
+	array<T, N>::begin() EA_NOEXCEPT
+	{
+		return &mValue[0];
+	}
+
+
+	template <typename T, size_t N>
+	EA_CPP14_CONSTEXPR inline typename array<T, N>::const_iterator
+	array<T, N>::begin() const EA_NOEXCEPT
+	{
+		return &mValue[0];
+	}
+
+
+	template <typename T, size_t N>
+	EA_CPP14_CONSTEXPR inline typename array<T, N>::const_iterator
+	array<T, N>::cbegin() const EA_NOEXCEPT
+	{
+		return &mValue[0];
+	}
+
+
+	template <typename T, size_t N>
+	EA_CPP14_CONSTEXPR inline typename array<T, N>::iterator
+	array<T, N>::end() EA_NOEXCEPT
+	{
+		return &mValue[N];
+	}
+
+
+	template <typename T, size_t N>
+	EA_CPP14_CONSTEXPR inline typename array<T, N>::const_iterator
+	array<T, N>::end() const EA_NOEXCEPT
+	{
+		return &mValue[N];
+	}
+
+
+	template <typename T, size_t N>
+	EA_CPP14_CONSTEXPR inline typename array<T, N>::const_iterator
+	array<T, N>::cend() const EA_NOEXCEPT
+	{
+		return &mValue[N];
+	}
+
+
+	template <typename T, size_t N>
+	EA_CPP14_CONSTEXPR inline typename array<T, N>::reverse_iterator
+	array<T, N>::rbegin() EA_NOEXCEPT
+	{
+		return reverse_iterator(&mValue[N]);
+	}
+
+
+	template <typename T, size_t N>
+	EA_CPP14_CONSTEXPR inline typename array<T, N>::const_reverse_iterator
+	array<T, N>::rbegin() const EA_NOEXCEPT
+	{
+		return const_reverse_iterator(&mValue[N]);
+	}
+
+
+	template <typename T, size_t N>
+	EA_CPP14_CONSTEXPR inline typename array<T, N>::const_reverse_iterator
+	array<T, N>::crbegin() const EA_NOEXCEPT
+	{
+		return const_reverse_iterator(&mValue[N]);
+	}
+
+
+	template <typename T, size_t N>
+	EA_CPP14_CONSTEXPR inline typename array<T, N>::reverse_iterator
+	array<T, N>::rend() EA_NOEXCEPT
+	{
+		return reverse_iterator(&mValue[0]);
+	}
+
+
+	template <typename T, size_t N>
+	EA_CPP14_CONSTEXPR inline typename array<T, N>::const_reverse_iterator
+	array<T, N>::rend() const EA_NOEXCEPT
+	{
+		return const_reverse_iterator(static_cast<const_iterator>(&mValue[0]));
+	}
+
+
+	template <typename T, size_t N>
+	EA_CPP14_CONSTEXPR inline typename array<T, N>::const_reverse_iterator
+	array<T, N>::crend() const EA_NOEXCEPT
+	{
+		return const_reverse_iterator(static_cast<const_iterator>(&mValue[0]));
+	}
+
+
+	template <typename T, size_t N>
+	EA_CPP14_CONSTEXPR inline typename array<T, N>::size_type
+	array<T, N>::size() const EA_NOEXCEPT
+	{
+		return (size_type)N;
+	}
+
+
+	template <typename T, size_t N>
+	EA_CPP14_CONSTEXPR inline typename array<T, N>::size_type
+	array<T, N>::max_size() const EA_NOEXCEPT
+	{
+		return (size_type)N;
+	}
+
+
+	template <typename T, size_t N>
+	EA_CPP14_CONSTEXPR inline bool array<T, N>::empty() const EA_NOEXCEPT
+	{
+		return (N == 0);
+	}
+
+
+	template <typename T, size_t N>
+	EA_CPP14_CONSTEXPR inline typename array<T, N>::reference
+	array<T, N>::operator[](size_type i)
+	{
+		#if EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY(i >= N))
+				EASTL_FAIL_MSG("array::operator[] -- out of range");
+		#endif
+
+		EA_ANALYSIS_ASSUME(i < N);
+		return mValue[i];
+	}
+
+
+	template <typename T, size_t N>
+	EA_CPP14_CONSTEXPR inline typename array<T, N>::const_reference
+	array<T, N>::operator[](size_type i) const
+	{
+		#if EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY(i >= N))
+				EASTL_FAIL_MSG("array::operator[] -- out of range");
+
+		#endif
+
+		EA_ANALYSIS_ASSUME(i < N);
+		return mValue[i];
+	}
+
+
+	template <typename T, size_t N>
+	EA_CPP14_CONSTEXPR inline typename array<T, N>::reference
+	array<T, N>::front()
+	{
+		#if EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY(empty())) // We don't allow the user to reference an empty container.
+				EASTL_FAIL_MSG("array::front -- empty array");
+		#endif
+
+		return mValue[0];
+	}
+
+
+	template <typename T, size_t N>
+	EA_CPP14_CONSTEXPR inline typename array<T, N>::const_reference
+	array<T, N>::front() const
+	{
+		#if EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY(empty())) // We don't allow the user to reference an empty container.
+				EASTL_FAIL_MSG("array::front -- empty array");
+		#endif
+
+		return mValue[0];
+	}
+
+
+	template <typename T, size_t N>
+	EA_CPP14_CONSTEXPR inline typename array<T, N>::reference
+	array<T, N>::back()
+	{
+		#if EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY(empty())) // We don't allow the user to reference an empty container.
+				EASTL_FAIL_MSG("array::back -- empty array");
+		#endif
+
+		return mValue[N - 1];
+	}
+
+
+	template <typename T, size_t N>
+	EA_CPP14_CONSTEXPR inline typename array<T, N>::const_reference
+	array<T, N>::back() const
+	{
+		#if EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY(empty())) // We don't allow the user to reference an empty container.
+				EASTL_FAIL_MSG("array::back -- empty array");
+		#endif
+
+		return mValue[N - 1];
+	}
+
+
+	template <typename T, size_t N>
+	EA_CPP14_CONSTEXPR inline T* array<T, N>::data() EA_NOEXCEPT
+	{
+		return mValue;
+	}
+
+
+	template <typename T, size_t N>
+	EA_CPP14_CONSTEXPR inline const T* array<T, N>::data() const EA_NOEXCEPT
+	{
+		return mValue;
+	}
+
+
+	template <typename T, size_t N>
+	EA_CPP14_CONSTEXPR inline typename array<T, N>::const_reference array<T, N>::at(size_type i) const
+	{
+		#if EASTL_EXCEPTIONS_ENABLED
+			if(EASTL_UNLIKELY(i >= N))
+				throw std::out_of_range("array::at -- out of range");
+		#elif EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY(i >= N))
+				EASTL_FAIL_MSG("array::at -- out of range");
+		#endif
+
+		EA_ANALYSIS_ASSUME(i < N);
+		return static_cast<const_reference>(mValue[i]);
+	}
+
+
+	template <typename T, size_t N>
+	EA_CPP14_CONSTEXPR inline typename array<T, N>::reference array<T, N>::at(size_type i)
+	{
+		#if EASTL_EXCEPTIONS_ENABLED
+			if(EASTL_UNLIKELY(i >= N))
+				throw std::out_of_range("array::at -- out of range");
+		#elif EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY(i >= N))
+				EASTL_FAIL_MSG("array::at -- out of range");
+		#endif
+
+		EA_ANALYSIS_ASSUME(i < N);
+		return static_cast<reference>(mValue[i]);
+	}
+
+
+	template <typename T, size_t N>
+	inline bool array<T, N>::validate() const
+	{
+		return true; // There is nothing to do.
+	}
+
+
+	template <typename T, size_t N>
+	inline int array<T, N>::validate_iterator(const_iterator i) const
+	{
+		if(i >= mValue)
+		{
+			if(i < (mValue + N))
+				return (isf_valid | isf_current | isf_can_dereference);
+
+			if(i <= (mValue + N))
+				return (isf_valid | isf_current);
+		}
+
+		return isf_none;
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// global operators
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T, size_t N>
+	EA_CPP14_CONSTEXPR inline bool operator==(const array<T, N>& a, const array<T, N>& b)
+	{
+		return eastl::equal(&a.mValue[0], &a.mValue[N], &b.mValue[0]);
+	}
+
+
+	template <typename T, size_t N>
+	EA_CPP14_CONSTEXPR inline bool operator<(const array<T, N>& a, const array<T, N>& b)
+	{
+		return eastl::lexicographical_compare(&a.mValue[0], &a.mValue[N], &b.mValue[0], &b.mValue[N]);
+	}
+
+
+	template <typename T, size_t N>
+	EA_CPP14_CONSTEXPR inline bool operator!=(const array<T, N>& a, const array<T, N>& b)
+	{
+		return !eastl::equal(&a.mValue[0], &a.mValue[N], &b.mValue[0]);
+	}
+
+
+	template <typename T, size_t N>
+	EA_CPP14_CONSTEXPR inline bool operator>(const array<T, N>& a, const array<T, N>& b)
+	{
+		return eastl::lexicographical_compare(&b.mValue[0], &b.mValue[N], &a.mValue[0], &a.mValue[N]);
+	}
+
+
+	template <typename T, size_t N>
+	EA_CPP14_CONSTEXPR inline bool operator<=(const array<T, N>& a, const array<T, N>& b)
+	{
+		return !eastl::lexicographical_compare(&b.mValue[0], &b.mValue[N], &a.mValue[0], &a.mValue[N]);
+	}
+
+
+	template <typename T, size_t N>
+	EA_CPP14_CONSTEXPR inline bool operator>=(const array<T, N>& a, const array<T, N>& b)
+	{
+		return !eastl::lexicographical_compare(&a.mValue[0], &a.mValue[N], &b.mValue[0], &b.mValue[N]);
+	}
+
+
+	template <typename T, size_t N>
+	inline void swap(array<T, N>& a, array<T, N>& b)
+	{
+		eastl::swap_ranges(&a.mValue[0], &a.mValue[N], &b.mValue[0]);
+	}
+
+
+	///////////////////////////////////////////////////////////////////////
+	// to_array
+	///////////////////////////////////////////////////////////////////////
+	namespace internal
+	{
+		template<class T, size_t N, size_t... I>
+		EA_CONSTEXPR auto to_array(T (&a)[N], index_sequence<I...>)
+		{
+			return eastl::array<eastl::remove_cv_t<T>, N>{{a[I]...}};
+		}
+
+		template<class T, size_t N, size_t... I>
+		EA_CONSTEXPR auto to_array(T (&&a)[N], index_sequence<I...>)
+		{
+			return eastl::array<eastl::remove_cv_t<T>, N>{{eastl::move(a[I])...}};
+		}
+	}
+
+	template<class T, size_t N>
+	EA_CONSTEXPR eastl::array<eastl::remove_cv_t<T>, N> to_array(T (&a)[N])
+	{
+		static_assert(eastl::is_constructible_v<T, T&>, "element type T must be copy-initializable");
+		static_assert(!eastl::is_array_v<T>, "passing multidimensional arrays to to_array is ill-formed");
+		return internal::to_array(a, eastl::make_index_sequence<N>{});
+	}
+
+	template<class T, size_t N>
+	EA_CONSTEXPR eastl::array<eastl::remove_cv_t<T>, N> to_array(T (&&a)[N])
+	{
+		static_assert(eastl::is_move_constructible_v<T>, "element type T must be move-constructible");
+		static_assert(!eastl::is_array_v<T>, "passing multidimensional arrays to to_array is ill-formed");
+		return internal::to_array(eastl::move(a), eastl::make_index_sequence<N>{});
+	}
+
+
+} // namespace eastl
+
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/atomic.h b/libkram/eastl/include/EASTL/atomic.h
new file mode 100644
index 00000000..27117e9c
--- /dev/null
+++ b/libkram/eastl/include/EASTL/atomic.h
@@ -0,0 +1,1772 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_H
+#define EASTL_ATOMIC_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+//  Below is the documentation of the API of the eastl::atomic<T> library.
+//  This includes class and free functions.
+//  Anything marked with a '+' in front of the name is an extension to the std API.
+//
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// eastl::atomic<T> memory_order API
+//
+//  See below for full explanations on the memory orders and their guarantees.
+//
+//  - eastl::memory_order_relaxed
+//  - eastl::memory_order_acquire
+//  - eastl::memory_order_release
+//  - eastl::memory_order_acq_rel
+//  - eastl::memory_order_seq_cst
+//  - +eastl::memory_order_read_depends
+//
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// eastl::atomic<T> class API
+//
+//   All jargon and prerequisite knowledge is explained below.
+//
+//   Unless otherwise specified all orders except read_depends is a valid order
+//   on the given operation.
+//   Unless otherwise specified all operations are valid on all types T.
+//   If no order is provided, seq_cst memory ordering is used for the operation.
+//
+//   - atomic()        : Value-initializes the underlying object as T{}.
+//
+//   - atomic(T)       : Initializes the underlying object with a copy of T.
+//
+//   - T operator=(T)  : Atomically assigns T as store(T, seq_cst).
+//
+//   - is_lock_free()  : true if the operations are lockfree. Always true for eastl.
+//
+//   - store(T, order) : Atomically stores T affecting memory according to order.
+//                     : Valid orders are relaxed, release, and seq_cst.
+//
+//   - T load(order)   : Atomically loads T affecting memory according to order.
+//                     : Valid orders are relaxed, acquire, and seq_cst.
+//                     : If T is a pointer type, read_depends is another valid order.
+//
+//   - operator T()    : Atomically loads T as load(T, seq_cst).
+//
+//   - T exchange(T, order) : Atomically performs a RMW that replaces the current value with T.
+//                          : Memory is affected according to order.
+//                          : Returns the previous value stored before the RMW operation.
+//
+//   - bool compare_exchange_weak(T&, T, successOrder, failOrder)
+//        : Atomically compares the value stored with that of T& and if equal replaces it with T.
+//        : This is a RMW operation.
+//        : If the comparison fails, loads the observed value into T&. This is a load operation.
+//        : Memory is affected in the RMW operation according to successOrder.
+//        : Memory is affected in the load operation according to failOrder.
+//        : failOrder cannot be a stronger order than successOrder.
+//        : Returns true or false if the comparison succeeded and T was stored into the atomic object.
+//        :
+//        : The weak variant may fail even if the observed value of the atomic object equals T&.
+//        : This can yield performance gains on platforms with ld/str exclusive pair instructions especially
+//        : when the compare_exchange operation is done in a loop.
+//        : Only the bool return value can be used to determine if the operation was successful.
+//
+//   - bool compare_exchange_weak(T&, T, order)
+//        : Same as the above except that order is used for both the RMW and the load operation.
+//        : If order == acq_rel then the order of the load operation equals acquire.
+//        : If order == release then the order of the load operation equals relaxed.
+//
+//   - bool compare_exchange_strong(T&, T, successOrder, failOrder)
+//   - bool compare_exchange_strong(T&, T, order)
+//        : This operation is the same as the above weak variants
+//        : expect that it will not fail spuriously if the value stored equals T&.
+//
+//   The below operations are only valid for Integral types.
+//
+//   - T fetch_add(T, order)
+//        : Atomically performs a RMW that increments the value stored with T.
+//        : Returns the previous value stored before the RMW operation.
+//   - T fetch_sub(T, order)
+//        : Atomically performs a RMW that decrements the value stored with T.
+//        : Returns the previous value stored before the RMW operation.
+//   - T fetch_and(T, order)
+//        : Atomically performs a RMW that bit-wise and's the value stored with T.
+//        : Returns the previous value stored before the RMW operation.
+//   - T fetch_or(T, order)
+//        : Atomically performs a RMW that bit-wise or's the value stored with T.
+//        : Returns the previous value stored before the RMW operation.
+//   - T fetch_xor(T, order)
+//        : Atomically performs a RMW that bit-wise xor's the value stored with T.
+//        : Returns the previous value stored before the RMW operation.
+//
+//   - +T add_fetch(T, order)
+//        : Atomically performs a RMW that increments the value stored with T.
+//        : Returns the new updated value after the operation.
+//   - +T sub_fetch(T, order)
+//        : Atomically performs a RMW that decrements the value stored with T.
+//        : Returns the new updated value after the operation.
+//   - +T and_fetch(T, order)
+//        : Atomically performs a RMW that bit-wise and's the value stored with T.
+//        : Returns the new updated value after the operation.
+//   - +T or_fetch(T, order)
+//        : Atomically performs a RMW that bit-wise or's the value stored with T.
+//        : Returns the new updated value after the operation.
+//   - +T xor_fetch(T, order)
+//        : Atomically performs a RMW that bit-wise xor's the value stored with T.
+//        : Returns the new updated value after the operation.
+//
+//   - T operator++/--()
+//        : Atomically increments or decrements the atomic value by one.
+//        : Returns the previous value stored before the RMW operation.
+//        : Memory is affected according to seq_cst ordering.
+//
+//   - T ++/--operator()
+//        : Atomically increments or decrements the atomic value by one.
+//        : Returns the new updated value after the RMW operation.
+//        : Memory is affected according to seq_cst ordering.
+//
+//   - T operator+=/-=/&=/|=/^=(T)
+//        : Atomically adds, subtracts, bitwise and/or/xor the atomic object with T.
+//        : Returns the new updated value after the operation.
+//        : Memory is affected according to seq_cst ordering.
+//
+//
+//   The below operations are only valid for Pointer types
+//
+//   - T* fetch_add(ptrdiff_t val, order)
+//        : Atomically performs a RMW that increments the value store with sizeof(T) * val
+//        : Returns the previous value stored before the RMW operation.
+//   - T* fetch_sub(ptrdiff_t val, order)
+//        : Atomically performs a RMW that decrements the value store with sizeof(T) * val
+//        : Returns the previous value stored before the RMW operation.
+//
+//   - +T* add_fetch(ptrdiff_t val, order)
+//        : Atomically performs a RMW that increments the value store with sizeof(T) * val
+//        : Returns the new updated value after the operation.
+//   - +T* sub_fetch(ptrdiff_t val, order)
+//        : Atomically performs a RMW that decrements the value store with sizeof(T) * val
+//        : Returns the new updated value after the operation.
+//
+//   - T* operator++/--()
+//        : Atomically increments or decrements the atomic value by sizeof(T) * 1.
+//        : Returns the previous value stored before the RMW operation.
+//        : Memory is affected according to seq_cst ordering.
+//
+//   - T* ++/--operator()
+//        : Atomically increments or decrements the atomic value by sizeof(T) * 1.
+//        : Returns the new updated value after the RMW operation.
+//        : Memory is affected according to seq_cst ordering.
+//
+//
+//   - +EASTL_ATOMIC_HAS_[len]BIT Macro Definitions
+//        These macros provide the ability to compile-time switch on the availability of support for the specific
+//        bit width of an atomic object.
+//   Example:
+//
+//   #if defined(EASTL_ATOMIC_HAS_128BIT)
+//   #endif
+//
+//   Indicates the support for 128-bit atomic operations on an eastl::atomic<T> object.
+//
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// eastl::atomic_flag class API
+//
+//   Unless otherwise specified all orders except read_depends is a valid order
+//   on the given operation.
+//
+//   - atomic_flag()     : Initializes the flag to false.
+//
+//   - clear(order)
+//       : Atomically stores the value false to the flag.
+//       : Valid orders are relaxed, release, and seq_cst.
+//
+//   - bool test_and_set(order)
+//       : Atomically exchanges flag with true and returns the previous value that was held.
+//
+//   - bool test(order)
+//       : Atomically loads the flag value.
+//       : Valid orders are relaxed, acquire, and seq_cst.
+//
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// eastl::atomic standalone free function API
+//
+//   All class methods have a standalone free function that takes a pointer to the
+//   atomic object as the first argument. These functions just call the correct method
+//   on the atomic object for the given operation.
+//   These functions come in two variants, a non-explicit and an explicit variant
+//   that take on the form atomic_op() and atomic_op_explicit() respectively.
+//   The non-explicit variants take no order arguments and thus are all seq_cst.
+//   The explicit variants take an order argument.
+//   Only the standalone functions that do not have a class method equivalent pair will be
+//   documented here which includes all new extensions to the std API.
+//
+//   - +compiler_barrier()
+//        : Read-Write Compiler Barrier.
+//   - +compiler_barrier_data_dependency(const T&)
+//        : Read-Write Compiler Barrier.
+//        : Applies a fake input dependency on const T& so the compiler believes said variable is used.
+//        : Useful for example when writing benchmark or testing code with local variables that must not get dead-store eliminated.
+//   - +cpu_pause()
+//        : Prevents speculative memory order violations in spin-wait loops.
+//        : Allows giving up core resources, execution units, to other threads while in spin-wait loops.
+//   - atomic_thread_fence(order)
+//        : Read docs below.
+//   - atomic_signal_fence(order)
+//        : Prevents reordering with a signal handler.
+//   - +atomic_load_cond(const eastl::atomic<T>*, Predicate)
+//        : continuously loads the atomic object until Predicate is true
+//        : will properly ensure the spin-wait loop is optimal
+//        : very useful when needing to spin-wait for some condition to be true which is common is many lock-free algorithms
+//        : Memory is affected according to seq_cst ordering.
+//   - +atomic_load_cond_explicit(const eastl::atomic<T>*, Predicate, Order)
+//        : Same as above but takes an order for how memory is affected
+//
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+//   Deviations from the standard. This does not include new features added:
+//
+// 1.
+//   Description: Atomics are always lock free
+//   Reasoning  : We don't want people to fall into performance traps where implicit locking
+//                is done. If your user defined type is large enough to not support atomic
+//                instructions then your user code should do the locking.
+//
+// 2.
+//   Description: Atomic objects can not be volatile
+//   Reasoning  : Volatile objects do not make sense in the context of eastl::atomic<T>.
+//                Use the given memory orders to get the ordering you need.
+//                Atomic objects have to become visible on the bus. See below for details.
+//
+// 3.
+//   Description: Consume memory order is not supported
+//   Reasoning  : See below for the reasoning.
+//
+// 4.
+//   Description: ATOMIC_INIT() macros and the ATOMIC_LOCK_FREE macros are not implemented
+//   Reasoning  : Use the is_lock_free() method instead of the macros.
+//                ATOMIC_INIT() macros aren't needed since the default constructor value initializes.
+//
+// 5.
+//   Description: compare_exchange failure memory order cannot be stronger than success memory order
+//   Reasoning  : Besides the argument that it ideologically does not make sense that a failure
+//                of the atomic operation shouldn't have a stricter ordering guarantee than the
+//                success of it; if that is required then just make the whole operation stronger.
+//                This ability was added and allowed in C++17 only which makes supporting multiple
+//                C++ versions harder when using the compiler provided intrinsics since their behaviour
+//                is reliant on the C++ version being compiled. Also makes it harder to reason about code
+//                using these atomic ops since C++ versions vary the behaviour. We have also noticed
+//                that versions of compilers that say they support C++17 do not properly adhere to this
+//                new requirement in their intrinsics. Thus we will not support this.
+//
+// 6.
+//   Description: All memory orders are distinct types instead of enum values
+//   Reasoning  : This will not affect how the API is used in user code.
+//                It allows us to statically assert on invalid memory orders since they are compile-time types
+//                instead of potentially runtime enum values.
+//                Allows for more efficient code gen without the use of switch statements or if-else conditionals
+//                on the memory order enum values on compilers that do not provide intrinsics that take in a
+//                memory order, such as MSVC, especially in debug and debug-opt builds.
+//
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+//   ******** DISCLAIMER ********
+//
+//   This documentation is not meant to provide rigorous proofs on the memory models
+//   of specific architectures or the C++ memory model introduced in C++11. It is not
+//   meant to provide formal mathematical definitions and logic that shows that a given
+//   implementation adheres to the C++ memory model. This isn't meant to be some infallible
+//   oracle on memory models, barriers, observers, and architecture implementation details.
+//   What I do hope a reader gets out of this is the following. An understanding of the C++
+//   memory model and how that relates to implementations on various architectures. Various
+//   phenomena and ways that compilers and architectures can steer away from a sequentially
+//   consistent system. To provide examples on how to use this library with common patterns
+//   that will be seen in many code bases. Lastly I would like to provide insight and
+//   further readings into the lesser known topics that aren't shared outside people
+//   who live in this space and why certain things are done the way they are
+//   such as cumulativity of memory barriers as one example. Sometimes specifying barriers
+//   as LDLD/LDST/STST/STLD doesn't actually cut it, and finer grain semantics are needed
+//   to describe cumulativity of memory barriers.
+//
+//   ******** Layout of the Documentation ********
+//
+//   This document will first go through a variety of different hardware architectures with examples of the various kinds of
+//   reordering that is allowed by these architectures. We will use the memory barriers provided by the hardware to "fix" these
+//   examples.
+//   Then we will introduce the C++ memory model and revisit the examples using the platform agnostic abstract memory model to "fix"
+//   them.
+//   The hope here is that we get a sense of the various types of architectures and weak memory consistency provided by them and thus
+//   an appreciation for the design of the C++ abstract memory model.
+//
+//   ******** REFERENCES ********
+//   [1] Dekker's mutual exclusion algorithm made RW-safe
+//   [2] Handling Memory Ordering in Multithreaded Applications with Oracle Solaris
+//   [3] Evaluating the Cost of Atomic Operations on Modern Architectures
+//   [4] A Tutorial Introduction to the ARM and POWER Relaxed Memory Models
+//   [5] Memory Barriers: a Hardware View for Software Hackers
+//   [6] Memory Model = Instruction Reordering + Store Atomicity
+//   [7] ArMOR: Defending Against Memory Consistency Model Mismatches in Heterogeneous Architectures
+//   [8] Weak Memory Models: Balancing Definitional Simplicity and Implementation Flexibility
+//   [9] Repairing Sequential Consistency in C/C++11
+//   [10] A high-level operational semantics for hardware weak memory models
+//   [11] x86-TSO: A Rigorous and Usable Programmer's Model for x86 Multiprocessors
+//   [12] Simplifying ARM Concurrency: Multicopy-Atomic Axiomatic and Operational Models for ARMv8
+//   [13] Mixed-size Concurrency: ARM, POWER, C/C++11, and SC
+//   [14] P0668R4: Revising the C++ memory model
+//   [15] Constructing a Weak Memory Model
+//   [16] The Superfluous Load Queue
+//   [17] P0190R1: Proposal for New memory_order_consume Definition
+//
+//   ******** What does it mean to be Atomic? ********
+//
+//   The word atomic has been overloaded and can mean a lot of different things depending on the context,
+//   so let's digest it.
+//
+//   The first attribute for something to be atomic is that concurrent stores and loads
+//   must not tear or shear. This means if two threads write 0x01 and 0x02 at the same time
+//   then the only values that should ever be observed is 0x01 or 0x02. We can only see
+//   the whole write of 0x01 or 0x02, not 0x03 as an example. Many algorithms rely on
+//   this property; only very few such a Dekker's algorithm for mutual exclusion don't.
+//   Well actually a recent paper, [1], showed that Dekker's isn't safe without atomic
+//   loads and stores so this property is pretty fundamental and also hard to prove that
+//   your algorithm is safe without this property on loads and stores.
+//
+//   We need to ensure the compiler emits a single load instruction.
+//   If we are doing 64-bit loads on a 32-bit platform, we need to ensure the load is one
+//   instruction instead of 2 32-bit loads into two registers.
+//   Another example is if we have this struct, struct { int32_t i; int32_t k; }, even on
+//   a 64-bit system we have to ensure the compiler does one 64-bit load and not two
+//   32-bit loads for each individual member.
+//
+//   We also need to ensure the correct instruction is emitted. A general load instruction
+//   to do a 64-bit load on a 32-bit platform may perform a 64-bit load but it may not
+//   be atomic, it may be turned into two 32-bit loads behind the scenes in the cpu.
+//   For example on ARMv7 we would have to use ldrexd not ldrd for 64-bit loads
+//   on a 32-bit ARMv7 core.
+//
+//   An operation may be considered atomic if multiple sub-operations are done as one
+//   transactional unit. This is commonly known as a Read-Modify-Write, RMW, operation.
+//   Take a simple add operation; it is actually a load from memory into a register,
+//   a modification of said register and then a store back to memory. If two threads
+//   concurrently execute this add operation on the same memory location; any interleaving
+//   of the 3 sub-operations is possible. It is possible that if the initial value is 0,
+//   the result may be 1 because each thread executed in lockstep both loading 0, adding 1
+//   and then storing 1. A RMW operation may be considered atomic if the whole sequence of
+//   sub-operations are serialized as one transactional unit.
+//
+//   Atomicity may also refer to the order in which memory operations are observed and the
+//   dependencies between memory operations to different memory locations. As a quick example
+//   into the very thing we will be deep diving into that is not very intuitive. If I do, [STORE(A, 2); STORE(B, 1);],
+//   in one thread and another thread does, [r0 = LOAD(B); r1 = LOAD(A);]; if r0 == 1, thus we observed
+//   the store to B, will we observe r1 == 2. Our intuition tells us that well A was stored
+//   first and then B, so if I read the new value of B then I must also read the new value
+//   of A since the store to A happened before B so if I can see B then I must be able to
+//   see everything before B which includes A.
+//   This highlights the ordering of memory operations and why memory barriers and memory
+//   models are so heavily attached to atomic operations because one could classify something
+//   is atomic if the dependency highlighted in the above example is allowed to be maintained.
+//
+//   This is what people mean when you hear that volatile does NOT mean atomicity of the operation.
+//   Usually people imply a lot of implicit assumptions when they mark a variable as volatile.
+//   All volatile gives us is the ability to tell the compiler it may not assume anything
+//   about the state of that memory location. This means the compiler must always emit a load
+//   or store instruction, cannot perform constant folding, dead-store elimination, or
+//   do any sort of code movement on volatile variables.
+//
+//   ******** Preliminary Basics ********
+//
+//   It is expected that the reader understands what a cache is, how it is organized and how data
+//   is chunked into cachelines. It is helpful if the reader understands basic cache coherency
+//   protocols such as MSI or MESI.
+//   It is expected the reader understands alignment, especially natural alignment
+//   of the processor and why alignment is important for data access.
+//   The reader should have some understanding of how a processor executes instructions,
+//   basics of what Out-of-Order execution means and basics of what speculative execution means.
+//   It is expected that the reader has an understanding of threading, multi-threaded programming
+//   and the use of concurrency primitives such as mutexes.
+//   Memory Barrier, Barrier, Memory Fence and Fence are all interchangeable synonyms.
+//
+//   Independent memory operations can be performed or observed, depending on your perspective,
+//   in any order as long as the local cpu thinks its execution is happening in program order.
+//   This can be a problem for inter-cpu communications and thus we need some way to enforce
+//   that the compiler does not reorder instructions and that the cpu also does not reorder
+//   instructions. This is what a barrier is, it is an enforcement of ordering on memory instructions,
+//   so as the name suggests a barrier. Barriers can be one-sided or both-sided which means
+//   the barrier enforces a partial order above or below or on both sides of said barrier.
+//
+//   Processors will use tricks such as out-of-order execution, memory instruction buffering and
+//   combining, speculative loads and speculative execution, branch prediction and many types of caching even
+//   in various interconnects from the cpu to the memory itself. One key thing to note is that cpus
+//   do not physically reorder the instruction stream. Instructions are dispatched and retired
+//   in-order but executed out-of-order. Memory barriers will prevent these tricks from happening
+//   by controlling the interaction of multiple cpus.
+//
+//   Compilers will morph your code and physically move instructions around as long as the program
+//   has the same observed behaviour. This is becoming increasingly true with more optimization techniques
+//   such as Link Time Optimization becoming the norm where once people assumed compilers couldn't assume
+//   something outside the given TU and now because they have the whole program view they know everything.
+//   This means the compiler does indeed alter the instruction stream
+//   and compiler barriers are a way to tell them to not move any memory instructions across the barrier.
+//   This does not prevent a compiler from doing optimizations such as constant folding, merging of
+//   overlapping loads, or even dead store elimination. Compiler barriers are also very cheap and
+//   have zero impact on anything that the compiler knows isn't visible in memory such as local variables
+//   whose addresses do not escape the function even if their address is taken. You can think of it
+//   in terms of a sequence point as used with "volatile" qualified variables to denote a place in code where
+//   things must be stable and the compiler doesn't cache any variables in registers or do any reordering.
+//
+//   Memory Barriers come in many flavours that instill a partial or full ordering on memory operations.
+//   Some memory operations themselves have implicit ordering guarantees already, for example
+//   Total-Store Order, TSO, architectures like x86 guarantee that a store operation cannot be reordered with a
+//   previous store operation thus a memory barrier that only orders stores is not needed
+//   on this architecture other than ensuring the compiler doesn't do any shenanigans.
+//   Considering we have 4 permutations of memory operations; a common way to describe an ordering
+//   is via Load-Load/LDLD, Load-Store/LDST, Store-Store/STST or Store-Load/STLD notation. You read this
+//   notation as follows; STLD memory barrier means a load cannot be reordered with a previous store.
+//   For example, on TSO architecture we can say all stores provide a STST memory barrier,
+//   since a store cannot be reordered with a previous store.
+//
+//   Memory Barriers in itself are not a magic bullet, they come with caveats that must be known.
+//   Each cpu architecture also has its own flavours and guarantees provided by said memory barriers.
+//   There is no guarantee that memory instructions specified before a memory barrier will complete,
+//   be written to memory or fully propagated throughout the rest of the system, when the memory barrier
+//   instruction completes. The memory barrier creates a point in that local cpus queue of memory instructions
+//   whereby they must not cross. There is no guarantee that using a memory barrier on one cpu will have
+//   any effect at all on another remote cpu's observed view of memory. This also implies that executing
+//   a memory barrier does not hinder, incur, stall or enforce any other cpus to serialize with each other cpu.
+//   In order for a remote cpu to observe the correct effects it must also use a matching memory barrier.
+//   This means code communicating in 2 threads through memory must both be employing the use of memory barriers.
+//   For example, a store memory barrier that only orders stores, STST, in one thread must be paired with a load memory barrier
+//   that only orders loads, LDLD, in the other thread trying to observe those stores in the correct order.
+//
+//   ******** Memory Types && Devices ********
+//
+//   eastl::atomic<T> and accompanying memory barriers ONLY ORDER MEMORY to cpu-to-cpu communication through whatever the
+//   processor designates as normal cacheable memory. It does not order memory to devices. It does not provide any DMA ordering guarantees.
+//   It does not order memory with other memory types such as Write Combining. It strictly orders memory only to shared memory that is used
+//   to communicate between cpus only.
+//
+//   ******** Sequentially Consistent Machine ********
+//
+//   The most intuitive as well as the model people naturally expect a concurrent system to have is Sequential Consistency.
+//   You may have or definitely have heard this term if you dealt with any type of distributed system. Lamport's definition
+//   articulates this consistency model the best.
+//   Leslie Lamport: "the result of any execution is the same as if the operations of all the processors were executed in some
+//                    sequential order, and the operations of each individual processor appear in this sequence in the order
+//                    specified by its program".
+//
+//   A Sequentially Consistent machine is modelled as follows:
+//
+//   ------------               ------------
+//   | Thread 0 |      ...      | Thread N |
+//   ------------               ------------
+//       |  |                        |  |
+//       |  |                        |  |
+//   ----------------------------------------
+//   |                                      |
+//   |           Shared Memory              |
+//   |                                      |
+//   ----------------------------------------
+//
+//   This is a sequentially consistent machine. Each thread is executing instructions in program order which does loads and stores
+//   that are serialized in some order to the shared memory. This means all communication is done through the shared memory with one cpu
+//   doing one access at a time. This system has a couple key properties.
+//
+//   1. There is no local cpu memory reordering. Each cpu executes instructions in program order and all loads and stores must complete,
+//      be visible in the shared memory or be visible in a register before starting the next instruction.
+//   2. Each memory operation becomes visible to all cpus at the same time. If a store hits the shared memory, then all subsequent loads
+//      from every other cpu will always see the latest store.
+//
+//   A Sequentially Consistent machine has, Single-Copy Store Atomicity: All stores must become visible to all cores in the system at the same time.
+//
+//   ******** Adding Caches ********
+//
+//   Caches by nature implicitly add the potential for memory reordering. A centralized shared snoopy bus that we all learned in school
+//   makes it easy to implement sequential consistency with caches. Writes and reads are all serialized in a total order via the cache bus transaction
+//   ordering. Every modern day bus is not inorder, and most certainly not a shared centralized bus. Cache coherency guarantees that all memory operations
+//   will be propagated eventually to all parties, but it doesn't guarantee in what order or in what time frame. Once you add
+//   caches, various levels of caching and various interconnects between remote cpus, you inevitably run into the issue where
+//   some cpus observe the effects of a store before other cpus. Obviously we have weakly-ordered and strongly-ordered cpus with
+//   caches so why is that? The short answer is, where is the onus put, is it on the programmer or the hardware. Does the hardware
+//   have dependency tracking, is it able to determine when a memory order violation occurs such as rolling back its speculative execution
+//   and also how far along the chain of interconnects does the hardware wait before it determines that the memory operation has
+//   been acknowledged or is considered to satisfy its memory ordering guarantees. Again this is a very high level view of the system
+//   as a whole, but the takeaway is yes; caches do add the potential for reordering but other supporting hardware determines whether
+//   that is observable by the programmer. There is also some debate whether weakly-ordered processors are actually more performant
+//   than strongly-ordered cpus eluding to the fact that the hardware has a better picture of what is a violation versus the programmer
+//   having to emit far more barriers on weakly-ordered architectures in multi-threaded code which may actually not be needed because the
+//   hardware didn't commit a violation but it may have and we as the programmer cannot rely on may haves.
+//
+//   ******** Store Buffers ********
+//
+//   Obviously having all stores serialize results in unnecessary stalls. Store buffers alleviate this issue.
+//   Store buffers are simple fixed size structures that sit between the cpu and the memory hierarchy. This allows
+//   each cpu to record its write in the store buffer and then move onto the next instruction. The store buffer will
+//   eventually be flushed to the resulting memory hierarchy in FIFO order. How and when this flushing occurs is irrelevant to the
+//   understanding of a store buffer. A read from an address will grab the most recent write to the same address in the store buffer.
+//
+//   The introduction of a store buffer is our first dive into weaker memory consistency. The addition of this hardware turns the consistency model weaker,
+//   into one that is commonly known as TSO, Total-Store Order. This is the exact model used by x86 cpus and we will see what this means
+//   and what new effects are observed with the addition of the store buffer. Below is a diagram of how the machine may now look.
+//   This type of store buffer is known as a FIFO store buffer, FIFO write buffer, or Load/Store Queue in some literature. This type of
+//   store buffer introduces STLD reordering but still prevents STST reordering. We will take a look at another type of store buffer later.
+//   Even with this store buffer, stores to the same address can still be merged so that only the latest store is written to the cache assuming
+//   no other intermediary stores happen. x86 cpus do write merging even for consecutive stores, i.e. storing to A and A+1 can be merged into one two-byte store.
+//
+//   ------------               ------------
+//   | Thread 0 |      ...      | Thread N |
+//   ------------               ------------
+//       |  |                        |  |
+//       |  |                        |  |
+//    | Store  |                  | Store  |
+//    | Buffer |                  | Buffer |
+//       |  |                        |  |
+//   ----------------------------------------
+//   |                                      |
+//   |           Shared Memory              |
+//   |                                      |
+//   ----------------------------------------
+//
+//   ---- Store-Buffering / Dekker's Example ----
+//   This is a very common litmus test that showcases the introduction of STLD reordering. It is called Store-Buffering example because it is the only weaker
+//   behaviour observed under TSO and also called Dekker's Example as it famously breaks Dekker's mutual exclusion algorithm.
+//
+//   ---------------------------
+//   Initial State:
+//   x = 0; y = 0;
+//   ---------------------------
+//   Thread 0     |    Thread 1
+//   ---------------------------
+//   STORE(x, 1)  | STORE(y, 1)
+//   r0 = LOAD(y) | r1 = LOAD(x)
+//   ---------------------------
+//   Observed: r0 = 0 && r1 = 0
+//   ---------------------------
+//
+//   We would normally assume that any interleaving of the two threads cannot possibly end up with both loads reading 0. We assume that the observed outcome
+//   of r0 = 0 && r1 = 0 to be impossible, clearly that is not the case. Let's start by understanding the example with no reordering possible. Both threads
+//   run and their first instruction is to write the value 1 into either x or y, the next instruction then loads from the opposite variable. This means no
+//   matter the interleaving, one of the loads always executes after the other thread's store to that variable.
+//   We could observe r0 = 1 && r1 = 1 if both threads execute in lockstep.
+//   We could observe r0 = 0 && r1 = 1 if thread 0 executes and then thread 1 executes.
+//   We could observe r0 = 1 && r1 = 0 if thread 1 executes and then thread 0 executes.
+//   Since the stores always execute before that load in the other thread, one thread must always at least observe a store, so let's see why store buffers break this.
+//
+//   What will happen is that STORE(x, 1) is stored to the store buffer but not made globally visible yet.
+//   STORE(y, 1) is written to the store buffer and also is not made globally visible yet.
+//   Both loads now read the initial state of x and y which is 0. We got the r0 = 0 && r1 = 0 outcome and just observed a Store-Load reordering.
+//   It has appeared as if the loads have been reordered with the previous stores and thus executed before the stores.
+//   Notice even if we execute the instructions in order, a series of other hardware side effects made it appear as if the instructions have been reordered.
+//   We can solve this by placing a Store-Load barrier after the store and before the load as follows.
+//
+//   ---------------------------
+//   Thread 0     |    Thread 1
+//   ---------------------------
+//   STORE(x, 1)  | STORE(y, 1)
+//   STLD BARRIER | STLD BARRIER
+//   r0 = LOAD(y) | r1 = LOAD(x)
+//   ---------------------------
+//
+//   This STLD barrier effectively will flush the store buffer into the memory hierarchy ensuring all stores in the buffer are visible to all other cpus at the same time
+//   before executing the load instruction. Again nothing prevents a potential hardware from speculatively executing the load even with the STLD barrier, the hardware will have to do
+//   a proper rollback if it detected a memory order violation otherwise it can continue on with its speculative load. The barrier just delimits a stability point.
+//
+//   Most hardware does not provide granular barrier semantics such as STLD. Most provide a write memory barrier which only orders stores, STST, a read memory barrier
+//   which only orders loads, LDLD, and then a full memory barrier which is all 4 permutations. So on x86 we will have to use the mfence, memory fence, instruction
+//   which is a full memory barrier to get our desired STLD requirements.
+//
+//   TSO also has the property that we call, Multi-Copy Store Atomicity. This means a cpu sees its own stores before they become visible to other cpus,
+//   by forwarding them from the store buffer, but a store becomes visible to all other cpus at the same time when flushed from the store buffer.
+//
+//
+//   Let's look at a non-FIFO store buffer now as seen in ARM cpus as an example and we will use a standard Message Passing example to see how it manifests in even weaker consistency.
+//   A store buffer on ARM as an example allows write merging even with adjacent stores, is not a FIFO queue, any stores in the small hardware hash table may be ejected at any point
+//   due to a collision eviction or the availability of cachelines in the cache hierarchy meaning that stores may bypass the buffer entirely if that cacheline is already owned by that cpu.
+//   There is no guarantee that stores will be completed in order as in the FIFO case.
+//
+//   ---------------------------
+//   Initial State:
+//   x = 0; y = 0;
+//   ---------------------------
+//   Thread 0     |    Thread 1
+//   ---------------------------
+//   STORE(x, 1)  | while(LOAD(y) == 0);
+//   STORE(y, 1)  | r0 = LOAD(x)
+//   ---------------------------
+//   Observed: r0 = 0
+//   ---------------------------
+//
+//   This is a classic Message Passing example that is very commonly used in production code. We store some values and then set a flag, STORE(y, 1) in this case.
+//   The other thread waits until the flag is observed and then reads the value out of x. If we observed the flag then we should obviously see all stores before the flag was set.
+//   Given our familiarity with TSO consistency above we know this definitely works on TSO and it is impossible to observe the load of x returning 0 under that consistency model.
+//   Let's see how this breaks with a non-FIFO store buffer.
+//
+//   Thread 0 executes the STORE(x, 1) but the cacheline for x is not in thread 0's cache so we write to the store buffer and wait for the cacheline.
+//   Thread 1 executes the LOAD(y) and it also does not have y in its cacheline so it waits before completing the load.
+//   Thread 0 moves on to STORE(y, 1). It owns this cacheline, hypothetically, so it may bypass the store buffer and store directly to the cache.
+//   Thread 0 receives a message that Thread 1 needs y's cacheline, so it transfers the now modified cacheline to Thread 1.
+//   Thread 1 completes the load with the updated value of y = 1 and branches out of the while loop since we saw the new value of y.
+//   Thread 1 executes LOAD(x) which will return 0 since Thread 0 still hasn't flushed its store buffer waiting for x's cacheline.
+//   Thread 0 receives x's cacheline and now flushes x = 1 to the cache. Thread 1 will also have invalidated its cacheline for x that it brought in via the previous load.
+//
+//   We have now fallen victim to STST reordering, allowing Thread 1 to observe a load of x returning 0. Not only does this store buffer allow STLD reordering due to the nature of
+//   buffering stores, but it also allows another reordering; that of Store-Store reordering. It was observed as if Thread 0 executed STORE(y, 1) before STORE(x, 1) which completely
+//   broke our simple message passing scenario.
+//
+//   ---------------------------
+//   Thread 0     |    Thread 1
+//   ---------------------------
+//   STORE(x, 1)  | while(LOAD(y) == 0);
+//   STST BARRIER |
+//   STORE(y, 1)  | r0 = LOAD(x)
+//   ---------------------------
+//
+//   The STST memory barrier effectively ensures that the cpu will flush its store buffer before executing any subsequent stores. That is not entirely true, the cpu is still allowed
+//   to continue and execute stores to the store buffer as long as it doesn't flush them to the cache before the previous stores are flushed to the cache. If nothing becomes
+//   globally visible out of order then we are good.
+//   The example above will change how the processor executes due to the STST memory barrier. Thread 0 will execute STORE(y, 1), write to the store buffer and mark all current entries. Even though it owns the cacheline
+//   it cannot write the store to the cache until all marked entries, which are all the previous stores, are flushed to the cache. We have now fixed the message passing code by adding
+//   a STST or write memory barrier and thus it is no longer possible to observe the load of x returning 0.
+//
+//   ******** Invalidation Queues ********
+//
+//   Due to the cache coherency protocol in play, a write to a cacheline will have to send invalidation messages to all other cpus that may have that cacheline as well.
+//   Immediately executing and responding to invalidation messages can cause quite a stall especially if the cache is busy at the moment with other requests.
+//   The longer we wait to invalidate the cacheline, the longer the remote cpu doing the write is stalled waiting on us. We don't like this very much.
+//   Invalidation Queues are just that, we queue up the action of actually invalidating the cacheline but immediately respond to the request saying we did it anyway.
+//   Now the remote cpu thinks we invalidated said cacheline but actually it may very well still be in our cache ready to be read from. We just got weaker again, let's
+//   see how this manifests in code by starting from the end of our previous example.
+//
+//   ---------------------------
+//   Initial State:
+//   x = 0; y = 0;
+//   ---------------------------
+//   Thread 0     |    Thread 1
+//   ---------------------------
+//   STORE(x, 1)  | while(LOAD(y) == 0);
+//   STST BARRIER |
+//   STORE(y, 1)  | r0 = LOAD(x)
+//   ---------------------------
+//   Observed: r0 = 0
+//   ---------------------------
+//
+//   Thread 1 receives the invalidate x's cacheline message and queues it because it is busy.
+//   Thread 1 receives the invalidate y's cacheline message, but we don't have that cacheline so acknowledge immediately.
+//   Thread 1 executes LOAD(y), loads in y's cacheline and branches out of the loop.
+//   Thread 1 executes LOAD(x), and loads from the cache the old value of x because the invalidation message is still sitting in the invalidation queue.
+//
+//   We have just again observed the load of x returning 0 but from a different type of reordering now on the reader side.
+//   This is a form of LDLD, Load-Load, reordering as it appears as if LOAD(x) was executed before LOAD(y). This can be fixed as follows.
+//
+//   ---------------------------
+//   Thread 0     |    Thread 1
+//   ---------------------------
+//   STORE(x, 1)  | while(LOAD(y) == 0);
+//   STST BARRIER | LDLD BARRIER
+//   STORE(y, 1)  | r0 = LOAD(x)
+//   ---------------------------
+//
+//   The LDLD memory barrier essentially marks all entries currently in the invalidation queue. Any subsequent load must wait until all the marked entries have been
+//   processed. This ensures once we observe y = 1, we process all entries that came before y and that way we observe all the stores that happened before y.
+//   The insertion of the read memory barrier creates the required memory barrier pairing as discussed above and ensures that now our code executes as expected.
+//
+//   It must be made clear that these are not the only hardware structure additions or ways that can relax STST, STLD and LDLD orderings. These are merely
+//   2 structures that are common and ones that I choose to use as examples of how hardware can reduce ordering guarantees. Knowing how the hardware does this
+//   isn't always entirely clear but having a model that tells us what operations can be reordered is all we need to be able to reason about our code when executing on that hardware.
+//
+//   ******** Load Buffering ********
+//
+//   The analog of the Store Buffering example, this litmus test has two threads read from two different locations and then write to the other locations.
+//   The outcome of having LDST reordering is allowed and observable on many processors such as ARM.
+//
+//   ---------------------------
+//   Initial State:
+//   x = 0; y = 0;
+//   ---------------------------
+//   Thread 0     |    Thread 1
+//   ---------------------------
+//   r0 = LOAD(x) | r1 = LOAD(y)
+//   STORE(y, 1)  | STORE(x, 1)
+//   ---------------------------
+//   Observed: r0 = 1 && r1 = 1
+//   ---------------------------
+//
+//   This is possible because the processor does not have to wait for the other cpu's cacheline to arrive before storing into the cache.
+//   Assume Thread 0 owns y's cacheline and Thread 1 owns x's cacheline.
+//   The processor may execute the load and thus buffer the load waiting for the cacheline to arrive.
+//   The processor may continue onto the store and since each cpu owns their respective cacheline, store the result into the cache.
+//   The cpus now receive the cachelines for x and y with the now modified value.
+//   We have just observed the loads returning 1 and thus observed LDST reordering.
+//
+//   To forbid such outcome it suffices to add any full memory barrier to both threads or a local Read-After-Write/Read-To-Write dependency or a control dependency.
+//
+//   -------------------------------
+//   Thread 0       |    Thread 1
+//   -------------------------------
+//   r0 = LOAD(x)   | r1 = LOAD(y)
+//   if (r0 == 1)   | if (r1 == 1)
+//     STORE(y, 1)  |   STORE(x, 1)
+//   -------------------------------
+//
+//   -----------------------------------------------------
+//   Thread 0                 |    Thread 1
+//   -----------------------------------------------------
+//   r0 = LOAD(x)             | r1 = LOAD(y)
+//   STORE(&(y + r0 - r1), 1) | STORE(&(x + r1 - r1), 1)
+//   -----------------------------------------------------
+//
+//   Both fixes above ensure that both writes cannot be committed, made globally visible, until their program source code order preceding reads have been fully satisfied.
+//
+//   ******** Compiler Barriers ********
+//
+//   Compiler barriers are both-sided barriers that prevent loads and stores from moving down past the compiler barrier and
+//   loads and stores from moving up above the compiler barrier. Here we will see the various ways our code may be subject
+//   to compiler optimizations and why compiler barriers are needed. Note as stated above, compiler barriers may not
+//   prevent all compiler optimizations or transformations. Compiler barriers are usually implemented by reloading all
+//   variables that are currently cached in registers and flushing all stores in registers back to memory.
+//   This list isn't exhaustive but will hopefully try to outline what compiler barriers protect against and what they don't.
+//
+//   Compiler may reorder loads.
+//   LOAD A; LOAD B; -> LOAD B; LOAD A;
+//   LOAD A; operation on A; LOAD B; operation on B; -> LOAD A; LOAD B; operation on A; operation on B
+//
+//   Insert a compiler barrier in between the two loads to guarantee that they are kept in order.
+//   LOAD A; COMPILER_BARRIER; LOAD B;
+//   LOAD A; operation on A; COMPILER_BARRIER; LOAD B; operation on B;
+//
+//   The same with stores.
+//   STORE(A, 1); STORE(B, 1); -> STORE(B, 1); STORE(A, 1);
+//   operations and STORE result into A; operations and STORE result int B; -> all operations; STORE result into B; STORE result into A;
+//
+//   Insert a compiler barrier in between the two stores to guarantee that they are kept in order.
+//   It is not required that the multiple stores to A before the barrier are not merged into one final store.
+//   It is not required that the store to B after the barrier be written to memory, it may be cached in a register for some indeterminate
+//   amount of time as an example.
+//   STORE(A, 1); COMPILER_BARRIER; STORE(B, 1);
+//
+//   The compiler is allowed to merge overlapping loads and stores.
+//   Inserting a compiler barrier here will not prevent the compiler from doing this optimization as doing one wider load/store is
+//   technically still abiding by the guarantee that the loads/stores are not reordered with each other.
+//   LOAD A[0]; LOAD A[1]; -> A single wider LOAD instruction
+//   STORE(A[0], 1); STORE(A[1], 2); -> A single wider STORE instruction
+//
+//   Compilers do not have to reload the values pointers point to. This is especially common with RISC architectures with lots
+//   of general purpose registers or even compiler optimizations such as inlining or Link-Time Optimization.
+//   int i = *ptr; Do bunch of operations; if (*ptr) { do more; }
+//   It is entirely possible the compiler may remove the last if statement because it can keep the *ptr in a register
+//   and it may infer from the operations done on i that i is never 0.
+//
+//   int i = *ptr; Do bunch of operations; COMPILER_BARRIER; if (*ptr) { do more; }
+//   Inserting a compiler barrier at that location will cause the compiler to have reload *ptr thus keeping the if statement assuming
+//   no other optimizations take place, such as the compiler knowing that *ptr is always greater than 0.
+//
+//   The compiler is within its rights to also merge and reload loads as much as it pleases.
+//
+//   while (int tmp = LOAD(A))
+//        process_tmp(tmp)
+//
+//   Will be merged and transformed to
+//
+//   if (int tmp = LOAD(A))
+//        for (;;) process_tmp(tmp)
+//
+//   Inserting a compiler barrier will ensure that LOAD(A) is always reloaded and thus the unwanted transformation is avoided.
+//
+//   while (int tmp = LOAD(A))
+//   {
+//       process_tmp(tmp)
+//       COMPILER_BARRIER
+//   }
+//
+//   Under heavy register pressure scenarios, say the loop body was larger, the compiler may reload A as follows.
+//   Compiler barriers cannot prevent this from happening, even if we put it after process_tmp as above;
+//   the compiler still kept those loads above the barrier so it satisfied its contract even though it reloaded
+//   from A more than once.
+//
+//   while (int tmp = LOAD(A))
+//       process_tmp(LOAD(A))
+//
+//   In the above transformation it is possible that another cpu stores 0 into A. When we reload A for process_tmp, we pass 0
+//   to process_tmp() which it would actually never expect to observe. Because if we observed 0, the while loop condition
+//   would never be satisfied. If the compiler under register pressure instead stored and loaded tmp from its stack slot, that is fine
+//   because we are just storing and loading the original observed value from A. Obviously that is slower than just reloading from
+//   A again so an optimizing compiler may not do the stack slot store. This is an unwanted transformation which eastl::atomic<T> prevents
+//   even on relaxed loads.
+//
+//   The compiler is allowed to do dead-store elimination if it knows that value has already been stored, or that only the last store
+//   needs to be stored. The compiler does not assume or know that these variables are shared variables.
+//
+//   STORE(A, 1);         STORE(A, 1);
+//   OPERATIONS;     ->   OPERATIONS;
+//   STORE(A, 1);
+//
+//   The compiler is well within its rights to omit the second store to A. Assuming we are doing some fancy lockfree communication
+//   with another cpu and the last store is meant to ensure the ending value is 1 even if another cpu changed A in between; that
+//   assumption will not be satisfied. A compiler barrier will not prevent the last store from being dead-store removed.
+//
+//   STORE(A, 1);
+//   OPERATIONS;
+//   STORE(A, 2);
+//
+//   Assuming these stores are meant to denote some state changes to communicate with a remote cpu. The compiler is allowed to
+//   transform this as follows without a compiler barrier. Insert a compiler barrier between the two stores to prevent the transformation.
+//   Something like this will also require memory barriers, but that is not the point of this section.
+//
+//   STORE(A, 2);
+//   OPERATIONS;
+//
+//   The compiler is also allowed to invent stores as it may please.
+//   First on many RISC architectures storing an immediate value either involves loading the immediate from the .data section
+//   or combing a variety of load upper immediate and add or or immediate instructions to get our constant in a register and then
+//   doing a single 32-bit store instruction from said register. Some ISAs have 16-bit stores with immediate value so that a store
+//   may be broken into 2 16-bit store immediate values causing shearing. To reduce instruction dependencies it may also decide
+//   to do two add immediates and then two 16-bit stores again causing shearing.
+//
+//   lui $t0, 1       # t0 == 0x00010000
+//   ori $a0, $t0, 8  # t0 == 0x00010008
+//   strw $t0, 0($a1) # store t0 into address at a1
+//   ->
+//   ori $a0, $t0, 1   # t0 == 0x00000001
+//   ori $a0, $t1, 8   # t0 == 0x00000008
+//   strhw $t0, 0($a1) # store t0 lower half at a1
+//   strhw $t1, 2($a1) # store t1 upper half at a1
+//
+//   The above shows a potential transformation that a compiler barrier cannot solve for us.
+//
+//   A compiler may also introduce stores to save on branching. Let's see.
+//
+//   if (a)
+//     STORE(X, 10);
+//   else
+//     STORE(X, 20);
+//
+//   STORE(X, 20);
+//   if (a)
+//     STORE(X, 10);
+//
+//   This is a very common optimization as it saves a potentially more expensive branch instruction but breaks multi-threaded code.
+//   This is also another case where a compiler barrier doesn't give us the granularity we need.
+//   The branches may even be completely removed with the compiler instead choosing to use conditional move operations which would
+//   actually be compliant since there would be one store only done, an extra store wouldn't have been added.
+//
+//   You are now probably thinking that compiler barriers are useful and are definitely needed to tell the compiler to calm down
+//   and guarantee our hardware guarantees are valid because the code we wrote is the instructions that were emitted.
+//   But there are definitely lots of caveats where compiler barriers do not at all provide the guarantees we still need.
+//   This where eastl::atomic<T> comes into play, and under the relaxed memory ordering section it will be explained
+//   what the standard guarantees and how we achieve those guarantees, like ensuring the compiler never does dead-store elimination or reloads.
+//
+//   ******** Control Dependencies ********
+//
+//   Control dependencies are implicit local cpu ordering of memory instructions due to branching instructions, specifically
+//   only conditional branches. The problem is compilers do not understand control dependencies, and control dependencies
+//   are incredibly hard to understand. This is meant to make the reader aware they exist and to never use them
+//   because they shouldn't be needed at all with eastl::atomic<T>. Also control dependencies are categorized as LDLD or LDST,
+//   store control dependencies inherently do not make sense since the conditional branch loads and compares two values.
+//
+//   A LDLD control dependency is an anti-pattern since it is not guaranteed that any architecture will detect the memory-order violation.
+//   r0 = LOAD(A);
+//   if (r0)
+//       r1 = LOAD(B)
+//
+//   Given those sequence of instructions, it is entirely possible that a cpu attempts to speculatively predict and load the value of B
+//   before the branch instruction has finished executing. It is entirely allowed that the cpu loads from B, assume B is in cache and A
+//   is not in cache, before A. It is allowed, that even if the cpu was correct in it's prediction that it doesn't reload B and change the
+//   fact that it speculatively got lucky.
+//
+//   This is also what the x86 pause instruction inserted into spin wait loops is meant to solve.
+//   LOOP:
+//       r0 = LOAD(A);
+//       if (!r0) pause; goto LOOP;
+//
+//   In the above spin loop, after a couple of iterations the processor will fill the pipeline with speculated cmp and load instructions.
+//   x86 will catch a memory order violation if it sees that an external store was done to A and thus must flush the entire
+//   pipeline of all the speculated load A. Pause instruction tells the cpu to not do speculative loads so that the pipeline is not
+//   filled with all said speculative load instructions. This ensures we do not incur the costly pipeline flushes from memory order
+//   violations which are likely to occur in tight spin wait loops. This also allows other threads on the same physical core to use the
+//   core's resources better since our speculative nature won't be hogging it all.
+//
+//   A LDST control dependency is a true dependency in which the cpu cannot make a store visible to the system and other cpus until it
+//   knows its prediction is correct. Thus a LDST ordering is guaranteed and can be always relied upon as in the following example.
+//
+//   r0 = LOAD(A);
+//   if (r0)
+//       STORE(B, 1);
+//
+//   The fun part comes in with how does the compiler actually break all of this.
+//   First is that if the compiler can ensure that the value of A in the LDST example is always not zero, then it is always within its
+//   rights to completely remove the if statement which would lend us with no control dependency.
+//
+//   Things get more fun when we deal with conditionals with else and else if statements where the compiler might be able to employ
+//   invariant code motion optimizations. Take this example.
+//
+//   r0 = LOAD(A);
+//   r1 = LOAD(B);
+//   if (r0)
+//       STORE(B, 1);
+//       /* MORE CODE */
+//   else if (r1)
+//       STORE(B, 1);
+//       /* MORE CODE */
+//   else
+//       STORE(B, 1);
+//       /* MORE CODE */
+//
+//   If we were trying to be smart and entirely rely on the control dependency to ensure order, ya well just don't the compiler
+//   is always smarter. The compiler is well within its rights to move all the STORE(B, 1) up and above all the conditionals breaking
+//   our reliance on the LDST control dependency.
+//
+//   Things can get even more complicated especially in C++ when values may come from constexpr, inline, inline constexpr, static const, etc,
+//   variables and thus the compiler will do all sorts of transformations to reduce, remove, augment and change all your conditional code since
+//   it knows the values of the expressions or even parts of it at compile time. Even more aggressive optimizations like LTO might break code that was being cautious.
+//   Even adding simple short circuiting logic or your classic likely/unlikely macros can alter conditionals in ways you didn't expect.
+//   In short know enough about control dependencies to know not to ever use them.
+//
+//   ******** Multi-Copy Store Atomicity && Barrier Cumulativity ********
+//
+//   Single-Copy Store Atomicity: All stores must become visible to all cores in the system at the same time.
+//
+//   Multi-Copy Store Atomicity : This means a cpu sees its own stores before they become visible to other cpus, by forwarding them from the store buffer,
+//                                but a store becomes visible to all other cpus at the same time when flushed from the store buffer.
+//
+//   Non-Atomic Store Atomicity : A store becomes visible to different cpus at different times.
+//
+//   Those are the above variations of Store Atomicity. Most processors have Non-Atomic Store Atomicity and thus you must program to that lowest common denominator.
+//   We can use barriers, with some caveats, to restore Multi-Copy Store Atomicity to a Non-Atomic system though we need to define a new granular definition for
+//   memory barriers to define this behaviour. Simple LDLD/LDST/STST/STLD definition is not enough to categorize memory barriers at this level. Let's start off
+//   with a simple example that breaks under a Non-Atomic Store Atomicity system and what potential hardware features allow this behaviour to be observed.
+//
+//   NOTE: For all the below examples we assume no compile reordering and that the processor also executes the instructions with no local reorderings to make the examples simpler,
+//         to only show off the effects of Multi-Copy Store Atomicity. This is why we don't add any address dependencies, or mark explicit LDLD/LDST memory barriers.
+//         Thus you may assume all LDLD and LDST pairs have an address dependency between them, so that they are not reordered by the compiler or the local cpu.
+//
+//   ---------------------------------------------------------------------------------------------------------
+//   Write-To-Read Causality, WRC, Litmus Test
+//   ---------------------------------------------------------------------------------------------------------
+//   Initial State:
+//   X = 0; Y = 0;
+//   ---------------------------------------------------------------------------------------------------------
+//   Thread 0                 | Thread 1                          | Thread 2
+//   ---------------------------------------------------------------------------------------------------------
+//   STORE(X, 1)              | r0 = LOAD(X)                      | r1 = LOAD(Y)
+//                            | STORE(Y, r0)                      | r2 = LOAD(X)
+//   ---------------------------------------------------------------------------------------------------------
+//   Observed: r0 = 1 && r1 = 1 && r2 = 0
+//   ---------------------------------------------------------------------------------------------------------
+//
+//   Let's go over this example in detail and whether the outcome shown above can be observed. In this example Thread 0 stores 1 into X. If Thread 1 observes the write to X,
+//   it stores the observed value into Y. Thread 2 loads from Y then X. This means if the load from Y returns 1, then we intuitively know the global store order
+//   was 1 to X and then 1 to Y. So is it possible then that the load from X in Thread 2 can return 0 in that case? Under a Multi-Copy Store Atomicity system, that would be
+//   impossible because once 1 was stored to X all cpus see that store so if Thread 2 saw the store to Y which can only happen after the store to X was observed, then
+//   Thread 2 must also have observed the store to X and return 1. As you may well have figured out, it is possible under a Non-Atomic Store Atomicity system to still
+//   observe the load from X returning 0 even if the above load from Y returned 1 in Thread 2. This completely breaks our intuition of causality. Let's now understand what hardware may cause this.
+//
+//   This is possible on cpus that have Simultaneous Multi-Threading, SMT or HyperThreading in Intel parlance, which share resources such as store buffers or L1 cache.
+//   We are accustomed to the x86 way of SMT where each logical core shares Execution Units on the physical core but each logical core has their own statically partitioned
+//   cache and store buffer that is not visible to the other cpus. It is possible on cpus like ARMv7 or POWER, POWER9 supports 4 and even 8 threads per physical core, so
+//   to save on die space though yet enable this large number of threads per physical core it is common for these logical cores to all use the same store buffer or L1 cache
+//   per physical core on these processors. Let's take the above example and rerun it with this knowledge to get the observed behaviour outlined above.
+//
+//   Assume Thread 0, Thread 1, and Thread 2 run on cpu 0, cpu 1, and cpu 2 respectively. Assume that cpu 0 and cpu 1 are two logical cores on the same physical core so this processor
+//   has an SMT value of 2. Thread 0 will store 1 into X. This store may be in the store buffer or in the L1 cache that cpu 1 also shares with cpu 0, thus cpu 1 has early access to cpu 0's stores.
+//   Thread 1 loads X which it observed as 1 early and then stores 1 into Y. Thread 2 may see the load from Y returning 1 but now the load from X returning 0 all because cpu 1 got early
+//   access to cpu 0 store due to sharing a L1 cache or store buffer.
+//   We will come back on how to fix this example with the proper memory barriers for the Non-Atomic Store Atomicity systems, but we need to detour first.
+//
+//   We need to take a deeper dive into memory barriers to understand how to restore Multi-Copy Store Atomicity from a Non-Atomic Store Atomicity system.
+//   Let's start with a motivating example and we will be using the POWER architecture throughout this example because it encompasses all the possible observable behaviour.
+//   ARMv7 technically allows Non-Atomic Store Atomicity behaviour but no consumer ARMv7 chip actually observes this behaviour.
+//   ARMv8 reworked its model to specifically say it is a Multi-Copy Store Atomicity system.
+//   POWER is one of the last few popular consumer architectures that are guaranteed to have Non-Atomic Store Atomicity observable behaviour, thus we will be using it for the following examples.
+//
+//   To preface, POWER has two types of memory barriers called lwsync and sync. The following table lists the guarantees provided by TSO, x86, and the lwsync instruction.
+//   The table gives a hint as to why using our previous definition of LDLD/LDST/STST/STLD isn't granular enough to categorize memory barrier instructions.
+//
+//   TSO:                 |       POWER lwsync memory barrier:
+//       LDLD : YES       |                                    LDLD : YES
+//       LDST : YES       |                                    LDST : YES
+//       STST : YES       |                                    STST : YES
+//       STLD : NO        |                                    STLD : NO
+//   A cumulative : YES   |                            A cumulative : YES
+//   B cumulative : YES   |                            B cumulative : YES
+//   IRIW         : YES   |                            IRIW         : NO
+//
+//   The TSO memory model provided by x86 seems to be exactly the same as POWER if we add lwsync memory barrier instructions in between each of the memory instructions.
+//   This provides us the exact same ordering guarantees as the TSO memory model. If we just looked at the 4 permutations of reorderings we would be inclined to assume that
+//   TSO has the exact same ordering as sprinkling lwsync in our code in between every pair of memory instructions. That is not the case because memory barrier causality and cumulativity differ in subtle ways.
+//   In this case they differ by the implicit guarantees from the TSO memory model versus those provided by the POWER lwsync memory barrier.
+//   So the lwsync memory barrier prevents reordering with instructions that have causality but does not prevent reordering with instructions that are completely independent.
+//   Let's dive into these concepts a bit more.
+//
+//   Non-Atomic Store Atomicity architectures are prone to behaviours such as the non-causal outcome of the WRC test above. Architectures such as POWER defines memory barriers to enforce
+//   ordering with respect to memory accesses in remote cpus other than the cpu actually issuing the memory barrier. This is known as memory barrier cumulativity.
+//   How does the memory barrier issued on my cpu affect the view of memory accesses done by remote cpuss.
+//
+//   Cumulative memory barriers are defined as follows - Take your time this part is very non-trivial:
+//   A-Cumulative: We denote group A as the set of memory instructions in this cpu or other cpus that are ordered before the memory barrier in this cpu.
+//                 A-Cumulativity requires that memory instructions from any cpu that have performed prior to a memory load before the memory barrier on this cpu are also members of group A.
+//   B-Cumulative: We denote group B as the set of memory instructions in this cpu or other cpus that are ordered after the memory barrier in this cpu.
+//                 B-Cumulativity requires that memory instructions from any cpu that perform after a load and including the load in that cpu that returns the value of a store in group B are
+//                 also members of group B.
+//   IRIW        : enforces a global ordering even for memory instructions that have no causality. The memory instructions are completely independent.
+//
+//   ---------------------------------------------------------------------------------------------------------
+//   WRC Litmus Test
+//   ---------------------------------------------------------------------------------------------------------
+//   Thread 0                 | Thread 1                          | Thread 2
+//   ---------------------------------------------------------------------------------------------------------
+//   {i} : STORE(X, 1)        | {ii}  : r0 = LOAD(X)              | {v}  : r1 = LOAD(Y)
+//                            | {iii} : lwsync                    |
+//                            | {iv}  : STORE(Y, r0)              | {vi} : r2 = LOAD(X)
+//   ---------------------------------------------------------------------------------------------------------
+//   Outcome: r0 = 1 && r1 = 1 && r2 = 1
+//
+//   Group A of {iii} : {i} && {ii}
+//
+//   Group B of {iii} : {iv} && {v} && {vi}
+//   ---------------------------------------------------------------------------------------------------------
+//
+//   Using the WRC test again and inserting a POWER lwsync, don't concern yourself with why the memory barrier was inserted at that spot right now, we now see the distinctions of group A and group B.
+//   It demonstrates the A and B Cumulative nature of the lwsync instruction, {iii}. First group A, initially consists of {ii} and group B initially consists of {iv} from the local cpu that issued the lwsync.
+//   Since {ii} reads from {i} and assume {i} happens before {ii}, by definition of A-Cumulativity {i} is included in group A.
+//   Similarly {v} reads from {iv} and assume {iv} happens before {v}, then {v} is included in group B by definition of B-Cumulativity.
+//   {vi} is also included in group B since it happens after {v} by definition of B-Cumulativity.
+//
+//   WRC litmus test represents a scenario where only a A-Cumulative memory barrier is needed. The lwsync not only provides the needed local LDST memory barrier for the local thread but also ensures
+//   that any write Thread 1 has read from before the memory barrier is kept in order with any write Thread 1 does after the memory barrier as far as any other thread observes.
+//   In other words it ensures that any write that has propagated to Thread 1 before the memory barrier is propagated to any other thread before the second store after the memory barrier in Thread 1
+//   can propagate to other threads in the system. This is exactly the definition of A-Cumulativity and what we need to ensure that causality is maintained in the WRC Litmus Test example.
+//   With that lwsync in place it is now impossible to observe r0 = 1 && r1 = 1 && r2 = 0. The lwsync has restored causal ordering. Let's look at an example that requires B-Cumulativity.
+//
+//   ---------------------------------------------------------------------------------------------------------
+//   Example 2 from POWER manual
+//   ---------------------------------------------------------------------------------------------------------
+//   Initial State:
+//   X = 0; Y = 0; Z = 0
+//   ---------------------------------------------------------------------------------------------------------
+//   Thread 0                 | Thread 1                          | Thread 2
+//   ---------------------------------------------------------------------------------------------------------
+//   STORE(X, 1)              | r0 = LOAD(Y)                      | r1 = LOAD(Z)
+//   STORE(Y, 1)              | STORE(Z, r0)                      | r2 = LOAD(X)
+//   ---------------------------------------------------------------------------------------------------------
+//   Observed: r0 = 1 && r1 = 1 && r2 = 0
+//   ---------------------------------------------------------------------------------------------------------
+//
+//   This example is very similar to WRC except that we kinda extended the Message Passing through an additional shared variable instead.
+//   Think of this as Thread 0 writing some data into X, setting flag Y, Thread 1 waiting for flag Y then writing flag Z, and finally Thread 2 waiting for flag Z before reading the data.
+//   Take a minute to digest the above example and think about where a memory barrier, lwsync, should be placed. Don't peek at the solution below.
+//
+//   ---------------------------------------------------------------------------------------------------------
+//   Example 2 from POWER manual
+//   ---------------------------------------------------------------------------------------------------------
+//   Thread 0                 | Thread 1                          | Thread 2
+//   ---------------------------------------------------------------------------------------------------------
+//   STORE(X, 1)              | r0 = LOAD(Y)                      | r1 = LOAD(Z)
+//   lwsync                   |                                   |
+//   STORE(Y, 1)              | STORE(Z, r0)                      | r2 = LOAD(X)
+//   ---------------------------------------------------------------------------------------------------------
+//
+//   First the lwsync provides the needed local STST memory barrier for the local thread, thus the lwsync here ensures that the store to X propagates to Thread 1 before the store to Y.
+//   B-Cumulativity applied to all operations after the memory barrier ensure that the store to X is
+//   kept in order with respect to the store to Z as far as all other threads participating in the dependency chain are concerned. This is the exact definition of B-Cumulativity.
+//   With this one lwsync the outcome outlined above is impossible to observe. If r0 = 1 && r1 = 1 then r2 must be properly observed to be 1.
+//
+//   We know that lwsync only provides A-Cumulativity and B-Cumulativity. Now we will look at examples that have no causality constraints thus we need to grab heavier memory barriers
+//   that ensures in short we will say makes a store become visible to all processors, even those not on the dependency chains. Let's get to the first example.
+//
+//   ---------------------------------------------------------------------------------------------------------
+//   Independent Reads of Independent Writes, IRIW, coined by Doug Lea
+//   ---------------------------------------------------------------------------------------------------------
+//   Initial State:
+//   X = 0; Y = 0;
+//   ---------------------------------------------------------------------------------------------------------
+//   Thread 0                 | Thread 1                       | Thread 2               | Thread 3
+//   ---------------------------------------------------------------------------------------------------------
+//   STORE(X, 1)              | r0 = LOAD(X)                   | STORE(Y, 1)            | r2 = LOAD(Y)
+//                            | r1 = LOAD(Y)                   |                        | r3 = LOAD(X)
+//   ---------------------------------------------------------------------------------------------------------
+//   Observed: r0 = 1 && r1 = 0 && r2 = 1 && r3 = 0
+//   ---------------------------------------------------------------------------------------------------------
+//
+//   The IRIW example above clearly shows that writes can be propagated to different cpus in completely different orders.
+//   Thread 1 sees the store to X but not the store to Y while Thread 3 sees the store to Y but not the store to X, the complete opposite.
+//   Also to the keen eye you may have noticed this example is a slight modification of the Store Buffer example so try to guess where the memory barriers would go.
+//
+//   ---------------------------------------------------------------------------------------------------------
+//   Independent Reads of Independent Writes, IRIW, coined by Doug Lea
+//   ---------------------------------------------------------------------------------------------------------
+//   Thread 0                 | Thread 1                       | Thread 2               | Thread 3
+//   ---------------------------------------------------------------------------------------------------------
+//   STORE(X, 1)              | r0 = LOAD(X)                   | STORE(Y, 1)            | r2 = LOAD(Y)
+//                            | sync                           |                        | sync
+//                            | r1 = LOAD(Y)                   |                        | r3 = LOAD(X)
+//   ---------------------------------------------------------------------------------------------------------
+//
+//   To ensure that the above observation is forbidden we need to add a full sync memory barrier on both the reading threads. Think of sync as restoring sequential consistency.
+//   The sync memory barrier ensures that any writes that Thread 1 has read from before the memory barrier are fully propagated to all threads before the reads are satisfied after the memory barrier.
+//   The same can be said for Thread 3. This is why the sync memory barrier is needed because there is no partial causal ordering here or anything that can be considered for our A and  B Cumulativity definitions.
+//   We must ensure that all writes have been propagated to all cpus before proceeding. This gives way to the difference between sync and lwsync with regards to visibility of writes and cumulativity.
+//   sync guarantees that all program-order previous stores must have been propagated to all other cpus before the memory instructions after the memory barrier.
+//   lwsync does not ensure that stores before the memory barrier have actually propagated to any other cpu before memory instructions after the memory barrier, but it will keep stores before and after the
+//   lwsync in order as far as other cpus are concerned that are within the dependency chain.
+//
+//   Fun fact while ARMv7 claims to be Non-Atomic Store Atomicity no mainstream ARM implementation that I have seen has shown cases of Non-Atomic Store Atomicity.
+//   It's allowed by the ARMv7 memory model and thus you have to program to that. ARMv8 changes this and states that it has Multi-Copy Store Atomicity.
+//
+//   ******** Release-Acquire Semantics ********
+//
+//   The most useful and common cases where Release-Acquire Semantics are used in every day code is in message passing and mutexes. Let's get onto some examples and the C++ definition of Release-Acquire.
+//
+//   ACQUIRE:
+//   An Acquire operation is a one-way memory barrier whereby all loads and stores after the acquire operation cannot move up and above the acquire operation.
+//   Loads and stores before the acquire operation can move down past the acquire operation. An acquire operation should always be paired with a Release operation on the SAME atomic object.
+//
+//   RELEASE:
+//   A Release operation is a one-way memory barrier whereby all loads and stores before the release operation cannot move down and below the release operation.
+//   Loads and stores after the release operation can move up and above the release operation. A release operation should always be paired with an Acquire operation on the SAME atomic object.
+//
+//   Release-Acquire pair does not create a full memory barrier but it guarantees that all memory instructions before a Release operation on an atomic object M are visible after an Acquire
+//   operation on that same atomic object M. Thus these semantics usually are enough to preclude the need for any other memory barriers.
+//   The synchronization is established only between the threads Releasing and Acquiring the same atomic object M.
+//
+//   ---------------------------------------------------
+//   Critical Section
+//   ---------------------------------------------------
+//   Thread 0                 | Thread 1
+//   ---------------------------------------------------
+//   mtx.lock() - Acquire     | mtx.lock() - Acquire
+//   STORE(X, 1)              | r0 = LOAD(X)
+//   mtx.unlock() - Release   | mtx.unlock() - Release
+//   ---------------------------------------------------
+//
+//   A mutex only requires Release-Acquire semantics to protect the critical section. We do not care if operations above the lock leak into the critical section or that operations below the unlock leak into the
+//   critical section because they are outside the protected region of the lock()/unlock() pair. Release-Acquire semantics does guarantee that everything inside the critical section cannot leak out.
+//   Thus all accesses of all previous critical sections for the mutex are guaranteed to have completed and be visible when the mutex is handed off to the next party due to the Release-Acquire chaining.
+//   This also means that mutexes do not provide or restore Multi-Copy Store Atomicity to any memory instructions outside the mutex, like the IRIW example since it does not emit full memory barriers.
+//
+//   ------------------------------------------------------
+//   Message Passing
+//   ------------------------------------------------------
+//   Thread 0                 | Thread 1
+//   ------------------------------------------------------
+//   STORE(DATA, 1)           | while (!LOAD_ACQUIRE(FLAG))
+//                            |
+//   STORE_RELEASE(FLAG, 1)   | r0 = LOAD(DATA)
+//   ------------------------------------------------------
+//
+//   This is a common message passing idiom that also shows the use of Release-Acquire semantics. It should be obvious by the definitions outlined above why this works.
+//   An Acquire operation attached to a load needs to provide a LDLD and LDST memory barrier according to our definition of acquire. This is provided by default on x86 TSO thus no memory barrier is emitted.
+//   A Release operation attached to a store needs to provide a STST and LDST memory barrier according to our definition of release. This is provided by default on x86 TSO thus no memory barrier is emitted.
+//
+//   A couple of things of note here. One is that by attaching the semantics of a memory model directly to the memory instruction/operation itself we can take advantage of the fact the some processors
+//   already provide guarantees between memory instructions and thus we do not have to emit memory barriers. Another thing of note is that the memory model is directly attached to the operation,
+//   so you must do the Release-Acquire pairing on the SAME object which in this case is the FLAG variable. Doing an Acquire or Release on a separate object has no guarantee to observe an Acquire or Release on a different object.
+//   This better encapsulates the meaning of the code and also allows the processor to potentially do more optimizations since a stand alone memory barrier will order all memory instructions of a given type before and after the barrier.
+//   Where as the memory ordering attached to the load or store tells the processor that it only has to order memory instructions in relation to that specific load or store with the given memory order.
+//
+//
+//   ---------------------------------------------------------------------------------------------------------
+//   Release Attached to a Store VS. Standalone Fence
+//   ---------------------------------------------------------------------------------------------------------
+//   STORE(DATA, 1)               |  STORE(DATA, 1)
+//                                |  ATOMIC_THREAD_FENCE_RELEASE()
+//   STORE_RELEASE(FLAG, 1)       |  STORE_RELAXED(FLAG, 1)
+//   STORE_RELAXED(VAR, 2)        |  STORE_RELAXED(VAR, 2)
+//   ---------------------------------------------------------------------------------------------------------
+//   ARMv8 Assembly
+//   ---------------------------------------------------------------------------------------------------------
+//   str 1, DATA                  |  str 1, DATA
+//                                |  dmb ish
+//   stlr 1, FLAG                 |  str 1, FLAG
+//   str 2, VAR                   |  str 2, VAR
+//   ---------------------------------------------------------------------------------------------------------
+//
+//   In the above example the release is attached to the FLAG variable, thus synchronization only needs to be guaranteed for that atomic variable.
+//   It is entirely possible for the VAR relaxed store to be reordered above the release store.
+//   In the fence version, since the fence is standalone, there is no notion where the release is meant to be attached to thus the fence must prevent all subsequent relaxed stores
+//   from being reordered above the fence. The fence provides a stronger guarantee whereby now the VAR relaxed store cannot be moved up and above the release operation.
+//   Also notice the ARMv8 assembly is different, the release fence must use the stronger dmb ish barrier instead of the dedicated release store instruction.
+//   We dive more into fences provided by eastl::atomic<T> below.
+//
+//   Release-Acquire semantics also have the property that it must chain through multiple dependencies which is where our knowledge from the previous section comes into play.
+//   Everything on the Release-Acquire dependency chain must be visible to the next hop in the chain.
+//
+//   ---------------------------------------------------------------------------------------------------------
+//   Example 2 from POWER manual
+//   ---------------------------------------------------------------------------------------------------------
+//   Thread 0                 | Thread 1                          | Thread 2
+//   ---------------------------------------------------------------------------------------------------------
+//   STORE(X, 1)              | r0 = LOAD_ACQUIRE(Y)              | r1 = LOAD_ACQUIRE(Z)
+//   STORE_RELEASE(Y, 1)      | STORE_RELEASE(Z, r0)              | r2 = LOAD(X)
+//   ---------------------------------------------------------------------------------------------------------
+//
+//   ---------------------------------------------------------------------------------------------------------
+//   Write-To-Read Causality, WRC, Litmus Test
+//   ---------------------------------------------------------------------------------------------------------
+//   Thread 0                 | Thread 1                          | Thread 2
+//   ---------------------------------------------------------------------------------------------------------
+//   STORE(X, 1)              | r0 = LOAD(X)                      | r1 = LOAD_ACQUIRE(Y)
+//                            | STORE_RELEASE(Y, r0)              | r2 = LOAD(X)
+//   ---------------------------------------------------------------------------------------------------------
+//
+//   You may notice both of these examples from the previous section. We replaced the standalone POWER memory barrier instructions with Release-Acquire semantics attached directly to the operations where we want causality preserved.
+//   We have transformed those examples to use the eastl::atomic<T> memory model.
+//   Take a moment to digest these examples in relation to the definition of Release-Acquire semantics.
+//
+//   The Acquire chain can be satisfied by reading the value from the store release or any later stored headed by that release operation. The following examples will make this clearer.
+//
+//   ------------------------------------------------------
+//   Release Sequence Headed
+//   ------------------------------------------------------
+//   Initial State:
+//   DATA = 0; FLAG = 0;
+//   ------------------------------------------------------
+//   Thread 0                 | Thread 1
+//   ------------------------------------------------------
+//   STORE(DATA, 1)           | r0 = LOAD_ACQUIRE(FLAG)
+//                            |
+//   STORE_RELEASE(FLAG, 1)   | r1 = LOAD(DATA)
+//   STORE_RELAXED(FLAG, 3)   |
+//   ------------------------------------------------------
+//   Observed: r0 = 3 && r1 = 0
+//   ------------------------------------------------------
+//
+//   In the above example we may read the value 3 from FLAG which was not the release store, but it was headed by that release store. Thus we observed a later store and therefore it is still valid to then observe r1 = 1.
+//   The stores to FLAG from the STORE_RELEASE up to but not including the next STORE_RELEASE operation make up the release sequence headed by the first release store operation. Any store on that sequence can be used to enforce
+//   causality on the load acquire.
+//
+//   ******** Consume is currently not useful ********
+//
+//   Consume is a weaker form of an acquire barrier and creates the Release-Consume barrier pairing.
+//   Consume states that a load operation on an atomic object M cannot allow any loads or stores dependent on the value loaded by the operation to be reordered before the operation.
+//   To understand consume we must first understand dependent loads.
+//   You might encounter this being called a data dependency or an address dependency in some literature.
+//
+//   --------------------------------------------------------------
+//   Address Dependency
+//   --------------------------------------------------------------
+//   Initial State:
+//   DATA = 0; PTR = nullptr;
+//   --------------------------------------------------------------
+//   Thread 0                 | Thread 1
+//   --------------------------------------------------------------
+//   STORE(DATA, 1)           | r0 = LOAD(PTR) - typeof(r0) = int*
+//                            |
+//   STORE(PTR, &DATA)        | r1 = LOAD(r0)  - typeof(r1) = int
+//   --------------------------------------------------------------
+//
+//   There is a clear dependency here where we cannot load from *int until we actually read the int* from memory.
+//   Now it is possible for Thread 1's load from *ptr to be observed before the store to DATA, therefore it can lead to r0 = &DATA && r1 = 0.
+//   While this is a failure of causality, it is allowed by some cpus such as the DEC Alpha and I believe Blackfin as well.
+//   Thus a data dependency memory barrier must be inserted between the data dependent loads in Thread 1. Note that this would equate to a nop on any processor other than the DEC Alpha.
+//
+//   This can occur for a variety of hardware reasons. We learned about invalidation queues. It is possible that the invalidation for DATA gets buffered in Thread 1. DEC Alpha allows the Thread 1
+//   load from PTR to continue without marking the entries in its invalidation queue. Thus the subsequent load is allowed to return the old cached value of DATA instead of waiting for the
+//   marked entries in the invalidation queue to be processed. It is a design decision of the processor not to do proper dependency tracking here and instead relying on the programmer to insert memory barriers.
+//
+//   This data dependent ordering guarantee is useful because in places where we were using an Acquire memory barrier we can reduce it to this Consume memory barrier without any hardware barriers actually emitted on every modern processor.
+//   Let's take the above example, translate it to Acquire and Consume memory barriers and then translate it to the ARMv7 assembly and see the difference.
+//
+//   ---------------------------------------------------------------     ---------------------------------------------------------------
+//   Address Dependency - Release-Acquire                                Address Dependency - Release-Acquire - ARMv7 Assembly
+//   ---------------------------------------------------------------     ---------------------------------------------------------------
+//   Thread 0                  | Thread 1                                Thread 0                  | Thread 1
+//   ---------------------------------------------------------------     ---------------------------------------------------------------
+//   STORE(DATA, 1)            | r0 = LOAD_ACQUIRE(PTR)                  STORE(DATA, 1)            | r0 = LOAD(PTR)
+//                             |                                         dmb ish                   | dmb ish
+//   STORE_RELEASE(PTR, &DATA) | r1 = LOAD(r0)                           STORE(PTR, &DATA)         | r1 = LOAD(r0)
+//   ---------------------------------------------------------------     ---------------------------------------------------------------
+//
+//   To get Release-Acquire semantics on ARMv7 we need to emit dmb ish; memory barriers.
+//
+//   ---------------------------------------------------------------     ---------------------------------------------------------------
+//   Address Dependency - Release-Consume                                Address Dependency - Release-Consume - ARMv7 Assembly
+//   ---------------------------------------------------------------     ---------------------------------------------------------------
+//   Thread 0                  | Thread 1                                Thread 0                  | Thread 1
+//   ---------------------------------------------------------------     ---------------------------------------------------------------
+//   STORE(DATA, 1)            | r0 = LOAD_CONSUME(PTR)                  STORE(DATA, 1)            | r0 = LOAD(PTR)
+//                             |                                         dmb ish                   |
+//   STORE_RELEASE(PTR, &DATA) | r1 = LOAD(r0)                           STORE(PTR, &DATA)         | r1 = LOAD(r0)
+//   ---------------------------------------------------------------     ---------------------------------------------------------------
+//
+//   Data Dependencies can not only be created by read-after-write/RAW on registers, but also by RAW on memory locations too. Let's look at some more elaborate examples.
+//
+//   ---------------------------------------------------------------     ---------------------------------------------------------------
+//   Address Dependency on Registers - Release-Consume - ARMv7               Address Dependency on Memory - Release-Consume - ARMv7
+//   ---------------------------------------------------------------     ---------------------------------------------------------------
+//   Thread 0                  | Thread 1                                Thread 0                  | Thread 1
+//   ---------------------------------------------------------------     ---------------------------------------------------------------
+//   STORE(DATA, 1)            | r0 = LOAD(PTR)                          STORE(DATA, 1)            | r0 = LOAD(PTR)
+//                             | r1 = r0 + 0                                                       | STORE(TEMP, r0)
+//   dmb ish                   | r2 = r1 - 0                             dmb ish                   | r1 = LOAD(TEMP)
+//   STORE(PTR, &DATA)         | r3 = LOAD(r2)                           STORE(PTR, &DATA)         | r2 = LOAD(r1)
+//   ---------------------------------------------------------------     ---------------------------------------------------------------
+//
+//   The above shows a more elaborate example of how data dependent dependencies flow through RAW chains either through memory or through registers.
+//
+//   Notice by identifying that this is a data dependent operation and asking for a consume ordering, we can completely eliminate the memory barrier on Thread 1 since we know ARMv7 does not reorder data dependent loads. Neat.
+//   Unfortunately every major compiler upgrades a consume to an acquire ordering, because the consume ordering in the standard has a stronger guarantee and requires the compiler to do complicated dependency tracking.
+//   Dependency chains in source code must be mapped to dependency chains at the machine instruction level until a std::kill_dependency in the source code.
+//
+//   ----------------------------------------------------------------
+//   Non-Address Dependency && Multiple Chains
+//   ----------------------------------------------------------------
+//   Initial State:
+//   std::atomic<int> FLAG; int DATA[1] = 0;
+//   ----------------------------------------------------------------
+//   Thread 0                   | Thread 1
+//   ----------------------------------------------------------------
+//   STORE(DATA[0], 1)          | int f = LOAD_CONSUME(FLAG)
+//                              | int x = f
+//                              | if (x) return Func(x);
+//                              |
+//   STORE_RELEASE(FLAG, 1)     | Func(int y) return DATA[y - y]
+//   ----------------------------------------------------------------
+//
+//   This example is really concise but there is a lot going on. Let's digest it.
+//   First is that the standard allows consume ordering even on what we will call not true machine level dependencies like a ptr load and then a load from that ptr as shown in the previous examples.
+//   Here the dependency is between two ints, and the dependency chain on Thread 1 is as follows. f -> x -> y -> DATA[y - y]. The standard requires that source code dependencies on the loaded value
+//   from consume flow thru assignments and even thru function calls. Also notice we added a dependency on the dereference of DATA with the value loaded from consume which while it does nothing actually abides by the standard
+//   by enforcing a source code data dependent load on the consume operation. You may see this referred to as artificial data dependencies in other texts.
+//   If we assume the compiler is able to track all these dependencies, the question is how do we enforce these dependencies at the machine instruction level. Let's go back to our ptr dependent load example.
+//
+//   ----------------------------------------------------------------
+//   addi r0, pc, offset;
+//   ldr r1, 0(r0);
+//   ldr r2, 0(r1);
+//   ----------------------------------------------------------------
+//
+//   The above pseudo assembly does a pc relative calculation to find the address of ptr. We then load ptr and then continue the dependency chain by loading the int from the loaded ptr.
+//   Thus r0 has type of int**, which we use to load r1 an int* which we use to load our final value of r2 which is the int.
+//   The key observation here is that most instructions provided by most architectures only allow moving from a base register + offset into a destination register.
+//   This allows for trivial capturing of data dependent loads through pointers. But how do we capture the data dependency of DATA[y - y]. We would need something like this.
+//
+//   ----------------------------------------------------------------
+//   sub r1, r0, r0; // Assume r0 holds y from the Consume Operation
+//   add r3, r1, r2; // Assume r2 holds the address of DATA[0]
+//   ldr r4, 0(r3);
+//   ----------------------------------------------------------------
+//
+//   We cannot use two registers as both arguments to the load instruction. Thus to accomplish this you noticed we had to add indirect data dependencies through registers to compute the final address from the consume
+//   load of y and then load from the final computed address. The compiler would have to recognize all these dependencies and enforce that they be maintained in the generated assembly.
+//   The compiler must ensure the entire syntactic, source code, data-dependency chain is enforced in the generated assembly, no matter how long such chain may be.
+//   Because of this and other issues, every major compiler unilaterally promotes consume to an acquire operation across the board. Read reference [15] for more information.
+//   This completely removes the actual usefulness of consume for the pointer dependent case which is used quite heavily in concurrent read heavy data structures where updates are published via pointer swaps.
+//
+//   ******** read_depends use case - Release-ReadDepends Semantics ********
+//
+//   eastl::atomic<T> provides a weaker read_depends operation that only encapsulates the pointer dependency case above. Loading from a pointer and then loading the value from the loaded pointer.
+//   The read_depends operation can be used on loads from only an eastl::atomic<T*> type. The return pointer of the load must and can only be used to then further load values. And that is it.
+//   If you are unsure, upgrade this load to an acquire operation.
+//
+//   MyStruct* ptr = gAtomicPtr.load(memory_order_read_depends);
+//   int a = ptr->a;
+//   int b = ptr->b;
+//   return a + b;
+//
+//   The loads from ptr after the gAtomicPtr load ensure that the correct values of a and b are observed. This pairs with a Release operation on the writer side by releasing gAtomicPtr.
+//
+//
+//   As said above the returned pointer from a .load(memory_order_read_depends) can only be used to then further load values.
+//   Dereferencing(*) and Arrow Dereferencing(->) are valid operations on return values from .load(memory_order_read_depends).
+//
+//   MyStruct* ptr = gAtomicPtr.load(memory_order_read_depends);
+//   int a = ptr->a;  - VALID
+//   int a = *ptr;    - VALID
+//
+//   Since dereferencing is just indexing via some offset from some base address, this also means addition and subtraction of constants is ok.
+//
+//   int* ptr = gAtomicPtr.load(memory_order_read_depends);
+//   int a = *(ptr + 1)  - VALID
+//   int a = *(ptr - 1)  - VALID
+//
+//   Casts also work correctly since casting is just offsetting a pointer depending on the inheritance hierarchy or if using intrusive containers.
+//
+//   ReadDependsIntrusive** intrusivePtr = gAtomicPtr.load(memory_order_read_depends);
+//   ReadDependsIntrusive* ptr = ((ReadDependsIntrusive*)(((char*)intrusivePtr) - offsetof(ReadDependsIntrusive, next)));
+//
+//   Base* basePtr = gAtomicPtr.load(memory_order_read_depends);
+//   Dervied* derivedPtr = static_cast<Derived*>(basePtr);
+//
+//   Both of the above castings from the result of the load are valid for this memory order.
+//
+//   You can reinterpret_cast the returned pointer value to a uintptr_t to set bits, clear bits, or xor bits but the pointer must be casted back before doing anything else.
+//
+//   int* ptr = gAtomicPtr.load(memory_order_read_depends);
+//   ptr = reinterpret_cast<int*>(reinterpret_cast<uintptr_t>(ptr) & ~3);
+//
+//   Do not use any equality or relational operator (==, !=, >, <, >=, <=) results in the computation of offsets before dereferencing.
+//   As we learned above in the Control Dependencies section, CPUs will not order Load-Load Control Dependencies. Relational and equality operators are often compiled using branches.
+//   It doesn't have to be compiled to branched, condition instructions could be used. Or some architectures provide comparison instructions such as set less than which do not need
+//   branches when using the result of the relational operator in arithmetic statements. Then again short circuiting may need to introduct branches since C++ guarantees the
+//   rest of the expression must not be evaluated.
+//   The following odd code is forbidden.
+//
+//   int* ptr = gAtomicPtr.load(memory_order_read_depends);
+//   int* ptr2 = ptr + (ptr >= 0);
+//   int a = *ptr2;
+//
+//   Only equality comparisons against nullptr are allowed. This is becase the compiler cannot assume that the address of the loaded value is some known address and substitute our loaded value.
+//   int* ptr = gAtomicPtr.load(memory_order_read_depends);
+//   if (ptr == nullptr);  - VALID
+//   if (ptr != nullptr);  - VALID
+//
+//   Thus the above sentence that states:
+//   The return pointer of the load must and can only be used to then further load values. And that is it.
+//   must be respected by the programmer. This memory order is an optimization added for efficient read heavy pointer swapping data structures. IF you are unsure, use memory_order_acquire.
+//
+//   ******** Relaxed && eastl::atomic<T> guarantees ********
+//
+//   We saw various ways that compiler barriers do not help us and that we need something more granular to make sure accesses are not mangled by the compiler to be considered atomic.
+//   Ensuring these guarantees like preventing dead-store elimination or the splitting of stores into smaller sub stores is where the C/C++11
+//   standard comes into play to define what it means to operate on an atomic object.
+//   These basic guarantees are provided via new compiler intrinsics on gcc/clang that provide explicit indication to the compiler.
+//   Or on msvc by casting the underlying atomic T to a volatile T*, providing stronger compiler guarantees than the standard requires.
+//   Essentially volatile turns off all possible optimizations on that variable access and ensures all volatile variables cannot be
+//   reordered across sequence points. Again we are not using volatile here to guarantee atomicity, we are using it in its very intended purpose
+//   to tell the compiler it cannot assume anything about the contents of that variable. Now let's dive into the base guarantees of eastl::atomic<T>.
+//
+//   The standard defines the following for all operations on an atomic object M.
+//
+//   Write-Write Coherence:
+//   If an operation A modifies an atomic object M(store), happens before an operation B that modifies M(store), then A shall be earlier than B in the modification order of M.
+//
+//   Read-Read Coherence:
+//   If a value computation A on an atomic object M(load), happens before a value computation B on M(load), and A takes its value from a side effect X on M(from a previous store to M), then the value
+//   computed by B shall either be the value stored by X or some later side effect Y on M, where Y follows X in the modification order of M.
+//
+//   Read-Write Coherence:
+//   If a value computation A on an atomic object M(load), happens before an operation B that modifies M(store), then A shall take its value from a side effect X on M, where X precedes B in the modification
+//   order of M.
+//
+//   Write-Read Coherence:
+//   If a side effect X on an atomic object M(store), happens before a value computation B on M(load), then the evaluation of B must take its value from X or from some side effect Y that follows X in the
+//   modification order of M.
+//
+//   What does all this mean. This is just a pedantic way of saying that the preceding coherence requirements disallow compiler reordering of atomic operations to a single atomic object.
+//   This means all operations must be emitted by the compiler. Stores cannot be dead-store eliminated even if they are the only stores.
+//   Loads cannot have common subexpression elimination performed on them even if they are the only loads.
+//   Loads and Stores to the same atomic object cannot be reordered by the compiler.
+//   Compiler cannot introduce extra loads or stores to the atomic object.
+//   Compiler also cannot reload from an atomic object, it must save and store to a stack slot.
+//   Essentially this provides all the necessary guarantees needed when treating an object as atomic from the compilers point of view.
+//
+//   ******** Same Address LoadLoad Reordering ********
+//
+//   It is expected that same address operations cannot and are not reordered with each other. It is expected that operations to the same address have sequential consistency because
+//   they are to the same address. If you picture a cpu executing instructions, how is it possible to reorder instructions to the same address and yet keep program behaviour the same.
+//   Same Address LoadLoad Reordering is one weakening that is possible to do and keep observed program behaviour for a single-threaded program.
+//   More formally, A and B are two memory instructions onto the same address P, where A is program ordered before B. If A and B are both loads then their order need not be ordered.
+//   If B is a store then it cannot retire the store before A instruction completes. If A is a store and B is a load, then B must get its value forwarded from the store buffer or observe a later store
+//   from the cache. Thus Same Address LDST, STST, STLD cannot be reordered but Same Address LDLD can be reordered.
+//   Intel Itanium and SPARC RMO cpus allow and do Same Address LoadLoad Reordering.
+//   Let's look at an example.
+//
+//   ---------------------------
+//   Same Address LoadLoad
+//   ---------------------------
+//   Initial State:
+//   x = 0;
+//   ---------------------------
+//   Thread 0     |    Thread 1
+//   ---------------------------
+//   STORE(x, 1)  | r0 = LOAD(x)
+//                | r1 = LOAD(x)
+//   ---------------------------
+//   Observed: r0 = 1 && r0 = 0
+//   ---------------------------
+//
+//   Notice in the above example it has appeared as if the two loads from the same address have been reordered. If we first observed the new store of 1, then the next load should not observe a value in the past.
+//   Many programmers, expect same address sequential consistency, all accesses to a single address appear to execute in a sequential order.
+//   Notice this violates the Read-Read Coherence for all atomic objects defined by the std and thus provided by eastl::atomic<T>.
+//
+//   All operations on eastl::atomic<T> irrelevant of the memory ordering of the operation provides Same Address Sequential Consistency since it must abide by the coherence rules above.
+//
+//   ******** eastl::atomic_thread_fence ********
+//
+//   eastl::atomic_thread_fence(relaxed) : Provides no ordering guarantees
+//   eastl::atomic_thread_fence(acquire) : Prevents all prior loads from being reordered with all later loads and stores, LDLD && LDST memory barrier
+//   eastl::atomic_thread_fence(release) : Prevents all prior loads and stores from being reordered with all later stores, STST && LDST memory barrier
+//   eastl::atomic_thread_fence(acq_rel) : Union of acquire and release, LDLD && STST && LDST memory barrier
+//   eastl::atomic_thread_fence(seq_cst) : Full memory barrier that provides a single total order
+//
+//   See Reference [9] and Fence-Fence, Atomic-Fence, Fence-Atomic Synchronization, Atomics Order and Consistency in the C++ std.
+//
+//   ******** Atomic && Fence Synchronization ********
+//
+//   ---------------------------
+//   Fence-Fence Synchronization
+//   ---------------------------
+//   A release fence A synchronizes-with an acquire fence B if there exist operations X and Y on the same atomic object M, such that fence A is sequenced-before operation X and X modifies M,
+//   operation Y is sequenced-before B and Y reads the value written by X.
+//   In this case all non-atomic and relaxed atomic stores that are sequenced-before fence A will happen-before all non-atomic and relaxed atomic loads after fence B.
+//
+//   ----------------------------
+//   Atomic-Fence Synchronization
+//   ----------------------------
+//   An atomic release operation A on atomic object M synchronizes-with an acquire fence B if there exists some atomic operation X on atomic object M, such that X is sequenced-before B and reads
+//   the value written by A.
+//   In this case all non-atomic and relaxed atomic stores that are sequenced-before atomic release operation A will happen-before all non-atomic and relaxed atomic loads after fence B.
+//
+//   ----------------------------
+//   Fence-Atomic Synchronization
+//   ----------------------------
+//   A release fence A synchronizes-with an atomic acquire operation B on an atomic object M if there exists an atomic operation X such that A is sequenced-before X, X modifies M and B reads the
+//   value written by X.
+//   In this case all non-atomic and relaxed atomic stores that are sequenced-before fence A will happen-before all non-atomic and relaxed atomic loads after atomic acquire operation B.
+//
+//   This can be used to add synchronization to a series of several relaxed atomic operations, as in the following trivial example.
+//
+//   ----------------------------------------------------------------------------------------
+//   Initial State:
+//   x = 0;
+//   eastl::atomic<int> y = 0;
+//   z = 0;
+//   eastl::atomic<int> w = 0;
+//   ----------------------------------------------------------------------------------------
+//   Thread 0                                   | Thread 1
+//   ----------------------------------------------------------------------------------------
+//   x = 2                                      | r0 = y.load(memory_order_relaxed);
+//   z = 2                                      | r1 = w.load(memory_order_relaxed);
+//   atomic_thread_fence(memory_order_release); | atomic_thread_fence(memory_order_acquire);
+//   y.store(1, memory_order_relaxed);          | r2 = x
+//   w.store(1, memory_order_relaxed);          | r3 = z
+//   ----------------------------------------------------------------------------------------
+//   Observed: r0 = 1 && r1 = 1 && r2 = 0 && r3 = 0
+//   ----------------------------------------------------------------------------------------
+//
+//   ******** Atomic vs Standalone Fence ********
+//
+//   A sequentially consistent fence is stronger than a sequentially consistent operation because it is not tied to a specific atomic object.
+//   An atomic fence must provide synchronization with ANY atomic object whereas the ordering on the atomic object itself must only provide
+//   that ordering on that SAME atomic object. Thus this can provide cheaper guarantees on architectures with dependency tracking hardware.
+//   Let's look at a concrete example that will make this all clear.
+//
+//   ----------------------------------------------------------------------------------------
+//   Initial State:
+//   eastl::atomic<int> y = 0;
+//   eastl::atomic<int> z = 0;
+//   ----------------------------------------------------------------------------------------
+//   Thread 0                                   | Thread 1
+//   ----------------------------------------------------------------------------------------
+//   z.store(2, memory_order_relaxed);          | r0 = y.load(memory_order_relaxed);
+//   atomic_thread_fence(memory_order_seq_cst); | atomic_thread_fence(memory_order_seq_cst);
+//   y.store(1, memory_order_relaxed);          | r1 = z.load(memory_order_relaxed);
+//   ----------------------------------------------------------------------------------------
+//   Observed: r0 = 1 && r1 = 0
+//   ----------------------------------------------------------------------------------------
+//
+//   Here the two sequentially consistent fences synchronize-with each other thus ensuring that if we observe r0 = 1 then we also observe that r1 = 2.
+//   In the above example if we observe r0 = 1 it is impossible to observe r1 = 0.
+//
+//   ----------------------------------------------------------------------------------------
+//   Initial State:
+//   eastl::atomic<int> x = 0;
+//   eastl::atomic<int> y = 0;
+//   eastl::atomic<int> z = 0;
+//   ----------------------------------------------------------------------------------------
+//   Thread 0                                   | Thread 1
+//   ----------------------------------------------------------------------------------------
+//   z.store(2, memory_order_relaxed);          | r0 = y.load(memory_order_relaxed);
+//   x.fetch_add(1, memory_order_seq_cst);      | x.fetch_add(1, memory_order_seq_cst);
+//   y.store(1, memory_order_relaxed);          | r1 = z.load(memory_order_relaxed);
+//   ----------------------------------------------------------------------------------------
+//   Observed: r0 = 1 && r1 = 0
+//   ----------------------------------------------------------------------------------------
+//
+//   Here the two fetch_add sequentially consistent operations on x synchronize-with each other ensuring that if we observe r0 = 1 then we cannot observer r1 = 0;
+//   The thing to take note here is that we synchronized on the SAME atomic object, that being the atomic object x.
+//   Note that replacing the x.fetch_add() in Thread 1 with a sequentially consistent operation on another atomic object or a sequentially consistent fence can lead to
+//   observing r1 = 0 even if we observe r0 = 1. For example the following code may fail.
+//
+//   ----------------------------------------------------------------------------------------
+//   Initial State:
+//   eastl::atomic<int> x = 0;
+//   eastl::atomic<int> y = 0;
+//   eastl::atomic<int> z = 0;
+//   ----------------------------------------------------------------------------------------
+//   Thread 0                                   | Thread 1
+//   ----------------------------------------------------------------------------------------
+//   z.store(2, memory_order_relaxed);          | r0 = y.load(memory_order_relaxed);
+//                                              | x.fetch_add(1, memory_order_seq_cst);
+//   y.fetch_add(1, memory_order_seq_cst);      | r1 = z.load(memory_order_relaxed);
+//   ----------------------------------------------------------------------------------------
+//   Observed: r0 = 1 && r1 = 0
+//   ----------------------------------------------------------------------------------------
+//
+//   ----------------------------------------------------------------------------------------
+//   Initial State:
+//   eastl::atomic<int> x = 0;
+//   eastl::atomic<int> y = 0;
+//   eastl::atomic<int> z = 0;
+//   ----------------------------------------------------------------------------------------
+//   Thread 0                                   | Thread 1
+//   ----------------------------------------------------------------------------------------
+//   z.store(2, memory_order_relaxed);          | r0 = y.load(memory_order_relaxed);
+//   x.fetch_add(1, memory_order_seq_cst);      | atomic_thread_fence(memory_order_seq_cst);
+//   y.store(1, memory_order_relaxed);          | r1 = z.load(memory_order_relaxed);
+//   ----------------------------------------------------------------------------------------
+//   Observed: r0 = 1 && r1 = 0
+//   ----------------------------------------------------------------------------------------
+//
+//   In this example it is entirely possible that we observe r0 = 1 && r1 = 0 even though we have source code causality and sequentially consistent operations.
+//   Observability is tied to the atomic object on which the operation was performed and the thread fence doesn't synchronize-with the fetch_add because
+//   there is no load above the fence that reads the value from the fetch_add.
+//
+//   ******** Sequential Consistency Semantics ********
+//
+//   See section, Order and consistency, in the C++ std and Reference [9].
+//
+//   A load with memory_order_seq_cst performs an acquire operation
+//   A store with memory_order_seq_cst performs a release operation
+//   A RMW with memory_order_seq_cst performs both an acquire and a release operation
+//
+//   All memory_order_seq_cst operations exhibit the below single total order in which all threads observe all modifications in the same order
+//
+//   Paraphrasing, there is a single total order on all memory_order_seq_cst operations, S, such that each sequentially consistent operation B that loads a value from
+//   atomic object M observes either the result of the last sequentially consistent modification A on M, or some modification on M that isn't memory_order_seq_cst.
+//   For atomic modifications A and B on an atomic object M, B occurs after A in the total order of M if:
+//   there is a memory_order_seq_cst fence X whereby A is sequenced before X, and X precedes B,
+//   there is a memory_order_seq_cst fence Y whereby Y is sequenced before B, and A precedes Y,
+//   there are memory_order_seq_cst fences X and Y such that A is sequenced before X, Y is sequenced before B, and X precedes Y.
+//
+//   Let's look at some examples using memory_order_seq_cst.
+//
+//   ------------------------------------------------------------
+//   Store-Buffer
+//   ------------------------------------------------------------
+//   Initial State:
+//   x = 0; y = 0;
+//   ------------------------------------------------------------
+//   Thread 0                     |    Thread 1
+//   ------------------------------------------------------------
+//   STORE_RELAXED(x, 1)          | STORE_RELAXED(y, 1)
+//   ATOMIC_THREAD_FENCE(SEQ_CST) | ATOMIC_THREAD_FENCE(SEQ_CST)
+//   r0 = LOAD_RELAXED(y)         | r1 = LOAD_RELAXED(x)
+//   ------------------------------------------------------------
+//   Observed: r0 = 0 && r1 = 0
+//   ------------------------------------------------------------
+//
+//   ------------------------------------------------------------
+//   Store-Buffer
+//   ------------------------------------------------------------
+//   Initial State:
+//   x = 0; y = 0;
+//   ------------------------------------------------------------
+//   Thread 0                     |    Thread 1
+//   ------------------------------------------------------------
+//   STORE_SEQ_CST(x, 1)          | STORE_SEQ_CST(y, 1)
+//   r0 = LOAD_SEQ_CST(y)         | r1 = LOAD_SEQ_CST(x)
+//   ------------------------------------------------------------
+//   Observed: r0 = 0 && r1 = 0
+//   ------------------------------------------------------------
+//
+//   Both solutions above are correct to ensure that the end results cannot lead to both r0 and r1 returning 0. Notice that the second one requires memory_order_seq_cst on both
+//   operations to ensure they are in the total order, S, for all memory_order_seq_cst operations. The other example uses the stronger guarantee provided by a sequentially consistent fence.
+//
+//   ------------------------------------------------------------------------------------------------
+//   Read-To-Write Causality
+//   ------------------------------------------------------------------------------------------------
+//   Initial State:
+//   x = 0; y = 0;
+//   ------------------------------------------------------------------------------------------------
+//   Thread 0                     |    Thread 1                  |   Thread 2
+//   ------------------------------------------------------------------------------------------------
+//   STORE_SEQ_CST(x, 1)          | r0 = LOAD_RELAXED(x)         | STORE_RELAXED(y, 1)
+//                                | ATOMIC_THREAD_FENCE(SEQ_CST) | ATOMIC_THREAD_FENCE(SEQ_CST)
+//                                | r1 = LOAD_RELAXED(y)         | r2 = LOAD_RELAXED(x)
+//   ------------------------------------------------------------------------------------------------
+//   Observed: r0 = 1 && r1 = 0 && r2 = 0
+//   ------------------------------------------------------------------------------------------------
+//
+//   You'll notice this example is an in between example of the Store-Buffer and IRIW examples we have seen earlier. The store in Thread 0 needs to be sequentially consistent so it synchronizes with the
+//   thread fence in Thread 1. C++20 due to Reference [9], increased the strength of sequentially consistent fences has been increased to allow for the following.
+//
+//   ------------------------------------------------------------------------------------------------
+//   Read-To-Write Causality - C++20
+//   ------------------------------------------------------------------------------------------------
+//   Initial State:
+//   x = 0; y = 0;
+//   ------------------------------------------------------------------------------------------------
+//   Thread 0                     |    Thread 1                  |   Thread 2
+//   ------------------------------------------------------------------------------------------------
+//   STORE_RELAXED(x, 1)          | r0 = LOAD_RELAXED(x)         | STORE_RELAXED(y, 1)
+//                                | ATOMIC_THREAD_FENCE(SEQ_CST) | ATOMIC_THREAD_FENCE(SEQ_CST)
+//                                | r1 = LOAD_RELAXED(y)         | r2 = LOAD_RELAXED(x)
+//   ------------------------------------------------------------------------------------------------
+//   Observed: r0 = 1 && r1 = 0 && r2 = 0
+//   ------------------------------------------------------------------------------------------------
+//
+//   Notice we were able to turn the store in Thread 0 into a relaxed store and still properly observe either r1 or r2 returning 1.
+//   Note that all implementations of the C++11 standard for every architecture even now allows the C++20 behaviour.
+//   The C++20 standard memory model was brought up to recognize that all current implementations are able to implement them stronger.
+//
+//   ******** False Sharing ********
+//
+//   As we know operations work on the granularity of a cacheline. A RMW operation obviously must have some help from the cache to ensure the entire operation
+//   is seen as one whole unit. Conceptually we can think of this as the cpu's cache taking a lock on the cacheline, the cpu doing the read-modify-write operation on the
+//   locked cacheline, and then releasing the lock on the cacheline. This means during that time any other cpu needing that cacheline must wait for the lock to be released.
+//
+//   If we have two atomic objects doing RMW operations and they are within the same cacheline, they are unintentionally contending and serializing with each other even
+//   though they are two completely separate objects. This gives us the common name to this phenomona called false sharing.
+//   You can cacheline align your structure or the eastl::atomic<T> object to prevent false sharing.
+//
+//   ******** union of eastl::atomic<T> ********
+//
+//   union { eastl::atomic<uint8_t> atomic8; eastl::atomic<uint32_t> atomic32; };
+//
+//   While we know that operations operate at the granularity of a processor's cacheline size and so we may expect that storing and loading
+//   from different width atomic variables at the same address to not cause weird observable behaviour but it may.
+//   Store Buffers allow smaller stores to replace parts of larger loads that are forwarded from a store buffer.
+//   This means if there is 2 bytes of modified data in the store buffer that overlaps with a 4 byte load, the 2 bytes will be forwarded
+//   from the store buffer. This is even documented behaviour of the x86 store buffer in the x86 architecture manual.
+//   This behaviour can cause processors to observe values that have never and will never be visible on the bus to other processors.
+//   The use of a union with eastl::atomic<T> is not wrong but your code must be able to withstand these effects.
+//
+//   Assume everything starts out initially as zero.
+//
+//   -------------------------------------------------------------------------------------------------------
+//   Thread 0                 | Thread 1                          | Thread 2
+//   --------------------------------------------------------------------------------------------------------
+//   cmpxchg 0 -> 0x11111111  | cmpxchg 0x11111111 -> 0x22222222  | mov byte 0x33; mov 4 bytes into register;
+//   ---------------------------------------------------------------------------------------------------------
+//
+//   After all operations complete, the value in memory at that location is, 0x22222233.
+//   It is possible that the 4 byte load in thread 2 actually returns 0x11111133.
+//   Now 0x11111133 is an observed value that no other cpu could observe because it was never globally visible on the data bus.
+//
+//   If the value in memory is 0x22222233 then the first cmpxchg succeeded, then the second cmpxchg succeeded and finally our
+//   byte to memory was stored, yet our load returned 0x11111133. This is because store buffer contents can be forwarded to overlapping loads.
+//   It is possible that the byte store got put in the store buffer. Our load happened after the first cmpxchg with the byte forwarded.
+//   This behaviour is fine as long as your algorithm is able to cope with this kind of store buffer forwarding effects.
+//
+//   Reference [13] is a great read on more about this topic of mixed-size concurrency.
+//
+
+
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#include <EASTL/internal/atomic/atomic.h>
+#include <EASTL/internal/atomic/atomic_standalone.h>
+#include <EASTL/internal/atomic/atomic_flag.h>
+#include <EASTL/internal/atomic/atomic_flag_standalone.h>
+
+
+#endif /* EASTL_ATOMIC_H */
diff --git a/libkram/eastl/include/EASTL/bitset.h b/libkram/eastl/include/EASTL/bitset.h
new file mode 100644
index 00000000..d9261050
--- /dev/null
+++ b/libkram/eastl/include/EASTL/bitset.h
@@ -0,0 +1,2232 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// This file implements a bitset much like the C++ std::bitset class. 
+// The primary distinctions between this list and std::bitset are:
+//    - bitset is more efficient than some other std::bitset implementations,
+//      notably the bitset that comes with Microsoft and other 1st party platforms.
+//    - bitset is savvy to an environment that doesn't have exception handling,
+//      as is sometimes the case with console or embedded environments.
+//    - bitset is savvy to environments in which 'unsigned long' is not the 
+//      most efficient integral data type. std::bitset implementations use
+//      unsigned long, even if it is an inefficient integer type.
+//    - bitset removes as much function calls as practical, in order to allow
+//      debug builds to run closer in speed and code footprint to release builds.
+//    - bitset doesn't support string functionality. We can add this if 
+//      it is deemed useful.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_BITSET_H
+#define EASTL_BITSET_H
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/algorithm.h>
+
+EA_DISABLE_ALL_VC_WARNINGS();
+
+#include <stddef.h>
+#include <string.h>
+
+EA_RESTORE_ALL_VC_WARNINGS();
+
+#if EASTL_EXCEPTIONS_ENABLED
+	EA_DISABLE_ALL_VC_WARNINGS();
+
+	#include <stdexcept> // std::out_of_range, std::length_error.
+
+	EA_RESTORE_ALL_VC_WARNINGS();
+#endif
+
+EA_DISABLE_VC_WARNING(4127); // Conditional expression is constant
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+	// To consider: Enable this for backwards compatibility with any user code that might be using BitsetWordType:
+	// #define BitsetWordType EASTL_BITSET_WORD_TYPE_DEFAULT
+
+
+	/// BITSET_WORD_COUNT
+	///
+	/// Defines the number of words we use, based on the number of bits.
+	/// nBitCount refers to the number of bits in a bitset.
+	/// WordType refers to the type of integer word which stores bitet data. By default it is BitsetWordType.
+	///
+	#if !defined(__GNUC__) || (__GNUC__ >= 3) // GCC 2.x can't handle the simpler declaration below.
+		#define BITSET_WORD_COUNT(nBitCount, WordType) (nBitCount == 0 ? 1 : ((nBitCount - 1) / (8 * sizeof(WordType)) + 1))
+	#else
+		#define BITSET_WORD_COUNT(nBitCount, WordType) ((nBitCount - 1) / (8 * sizeof(WordType)) + 1)
+	#endif
+
+
+	/// EASTL_DISABLE_BITSET_ARRAYBOUNDS_WARNING
+	/// Before GCC 4.7 the '-Warray-bounds' buggy and was very likely to issue false positives for loops that are
+	/// difficult to evaluate.
+	/// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=45978
+	///
+	#if defined(__GNUC__) && (EA_COMPILER_VERSION > 4007) && defined(EA_PLATFORM_ANDROID) // Earlier than GCC 4.7 
+		#define EASTL_DISABLE_BITSET_ARRAYBOUNDS_WARNING 1
+	#else
+		#define EASTL_DISABLE_BITSET_ARRAYBOUNDS_WARNING 0
+	#endif
+
+
+
+	/// BitsetBase
+	///
+	/// This is a default implementation that works for any number of words.
+	///
+	template <size_t NW, typename WordType> // Templated on the number of words used to hold the bitset and the word type.
+	struct BitsetBase
+	{
+		typedef WordType                 word_type;
+		typedef BitsetBase<NW, WordType> this_type;
+	  #if EASTL_BITSET_SIZE_T
+		typedef size_t                   size_type;
+	  #else
+		typedef eastl_size_t             size_type;
+	  #endif
+
+		enum {
+			kBitsPerWord      = (8 * sizeof(word_type)),
+			kBitsPerWordMask  = (kBitsPerWord - 1),
+			kBitsPerWordShift = ((kBitsPerWord == 8) ? 3 : ((kBitsPerWord == 16) ? 4 : ((kBitsPerWord == 32) ? 5 : (((kBitsPerWord == 64) ? 6 : 7)))))
+		};
+
+	public:
+		word_type mWord[NW];
+
+	public:
+		BitsetBase();
+		BitsetBase(uint32_t value); // This exists only for compatibility with std::bitset, which has a 'long' constructor.
+	  //BitsetBase(uint64_t value); // Disabled because it causes conflicts with the 32 bit version with existing user code. Use from_uint64 to init from a uint64_t instead.
+
+		void operator&=(const this_type& x);
+		void operator|=(const this_type& x);
+		void operator^=(const this_type& x);
+
+		void operator<<=(size_type n);
+		void operator>>=(size_type n);
+
+		void flip();
+		void set();
+		void set(size_type i, bool value);
+		void reset();
+
+		bool operator==(const this_type& x) const;
+
+		bool      any() const;
+		size_type count() const;
+
+		void          from_uint32(uint32_t value);
+		void          from_uint64(uint64_t value);
+
+		unsigned long to_ulong() const;
+		uint32_t      to_uint32() const;
+		uint64_t      to_uint64() const;
+
+		word_type& DoGetWord(size_type i);
+		word_type  DoGetWord(size_type i) const;
+
+		size_type DoFindFirst() const;
+		size_type DoFindNext(size_type last_find) const;
+
+		size_type DoFindLast() const;                       // Returns NW * kBitsPerWord (the bit count) if no bits are set.
+		size_type DoFindPrev(size_type last_find) const;    // Returns NW * kBitsPerWord (the bit count) if no bits are set.
+
+	}; // class BitsetBase
+
+
+
+	/// BitsetBase<1, WordType>
+	/// 
+	/// This is a specialization for a bitset that fits within one word.
+	///
+	template <typename WordType>
+	struct BitsetBase<1, WordType>
+	{
+		typedef WordType                word_type;
+		typedef BitsetBase<1, WordType> this_type;
+	  #if EASTL_BITSET_SIZE_T
+		typedef size_t                  size_type;
+	  #else
+		typedef eastl_size_t            size_type;
+	  #endif
+
+		enum {
+			kBitsPerWord      = (8 * sizeof(word_type)),
+			kBitsPerWordMask  = (kBitsPerWord - 1),
+			kBitsPerWordShift = ((kBitsPerWord == 8) ? 3 : ((kBitsPerWord == 16) ? 4 : ((kBitsPerWord == 32) ? 5 : (((kBitsPerWord == 64) ? 6 : 7)))))
+		};
+
+	public:
+		word_type mWord[1]; // Defined as an array of 1 so that bitset can treat this BitsetBase like others.
+
+	public:
+		BitsetBase();
+		BitsetBase(uint32_t value);
+	  //BitsetBase(uint64_t value); // Disabled because it causes conflicts with the 32 bit version with existing user code. Use from_uint64 instead.
+
+		void operator&=(const this_type& x);
+		void operator|=(const this_type& x);
+		void operator^=(const this_type& x);
+
+		void operator<<=(size_type n);
+		void operator>>=(size_type n);
+
+		void flip();
+		void set();
+		void set(size_type i, bool value);
+		void reset();
+
+		bool operator==(const this_type& x) const;
+
+		bool      any() const;
+		size_type count() const;
+
+		void          from_uint32(uint32_t value);
+		void          from_uint64(uint64_t value);
+
+		unsigned long to_ulong() const;
+		uint32_t      to_uint32() const;
+		uint64_t      to_uint64() const;
+
+		word_type& DoGetWord(size_type);
+		word_type  DoGetWord(size_type) const;
+
+		size_type DoFindFirst() const;
+		size_type DoFindNext(size_type last_find) const;
+
+		size_type DoFindLast() const;                       // Returns 1 * kBitsPerWord (the bit count) if no bits are set.
+		size_type DoFindPrev(size_type last_find) const;    // Returns 1 * kBitsPerWord (the bit count) if no bits are set.
+
+	}; // BitsetBase<1, WordType>
+
+
+
+	/// BitsetBase<2, WordType>
+	/// 
+	/// This is a specialization for a bitset that fits within two words.
+	/// The difference here is that we avoid branching (ifs and loops).
+	///
+	template <typename WordType>
+	struct BitsetBase<2, WordType>
+	{
+		typedef WordType                 word_type;
+		typedef BitsetBase<2, WordType>  this_type;
+	  #if EASTL_BITSET_SIZE_T
+		typedef size_t                   size_type;
+	  #else
+		typedef eastl_size_t             size_type;
+	  #endif
+
+		enum {
+			kBitsPerWord      = (8 * sizeof(word_type)),
+			kBitsPerWordMask  = (kBitsPerWord - 1),
+			kBitsPerWordShift = ((kBitsPerWord == 8) ? 3 : ((kBitsPerWord == 16) ? 4 : ((kBitsPerWord == 32) ? 5 : (((kBitsPerWord == 64) ? 6 : 7)))))
+		};
+
+	public:
+		word_type mWord[2];
+
+	public:
+		BitsetBase();
+		BitsetBase(uint32_t value);
+	  //BitsetBase(uint64_t value); // Disabled because it causes conflicts with the 32 bit version with existing user code. Use from_uint64 instead.
+
+		void operator&=(const this_type& x);
+		void operator|=(const this_type& x);
+		void operator^=(const this_type& x);
+
+		void operator<<=(size_type n);
+		void operator>>=(size_type n);
+
+		void flip();
+		void set();
+		void set(size_type i, bool value);
+		void reset();
+
+		bool operator==(const this_type& x) const;
+
+		bool      any() const;
+		size_type count() const;
+
+		void          from_uint32(uint32_t value);
+		void          from_uint64(uint64_t value);
+
+		unsigned long to_ulong() const;
+		uint32_t      to_uint32() const;
+		uint64_t      to_uint64() const;
+
+		word_type& DoGetWord(size_type);
+		word_type  DoGetWord(size_type) const;
+
+		size_type DoFindFirst() const;
+		size_type DoFindNext(size_type last_find) const;
+
+		size_type DoFindLast() const;                       // Returns 2 * kBitsPerWord (the bit count) if no bits are set.
+		size_type DoFindPrev(size_type last_find) const;    // Returns 2 * kBitsPerWord (the bit count) if no bits are set.
+
+	}; // BitsetBase<2, WordType>
+
+
+
+
+	/// bitset
+	///
+	/// Implements a bitset much like the C++ std::bitset.
+	///
+	/// As of this writing we don't implement a specialization of bitset<0>,
+	/// as it is deemed an academic exercise that nobody would actually
+	/// use and it would increase code space and provide little practical
+	/// benefit. Note that this doesn't mean bitset<0> isn't supported; 
+	/// it means that our version of it isn't as efficient as it would be 
+	/// if a specialization was made for it.
+	///
+	/// - N can be any unsigned (non-zero) value, though memory usage is 
+	///   linear with respect to N, so large values of N use large amounts of memory.
+	/// - WordType must be one of [uint16_t, uint32_t, uint64_t, uint128_t] 
+	///   and the compiler must support the type. By default the WordType is
+	///   the largest native register type that the target platform supports.
+	///
+	template <size_t N, typename WordType = EASTL_BITSET_WORD_TYPE_DEFAULT>
+	class bitset : private BitsetBase<BITSET_WORD_COUNT(N, WordType), WordType>
+	{
+	public:
+		typedef BitsetBase<BITSET_WORD_COUNT(N, WordType), WordType>  base_type;
+		typedef bitset<N, WordType>                                   this_type;
+		typedef WordType                                              word_type;
+		typedef typename base_type::size_type                         size_type;
+
+		enum
+		{
+			kBitsPerWord      = (8 * sizeof(word_type)),
+			kBitsPerWordMask  = (kBitsPerWord - 1),
+			kBitsPerWordShift = ((kBitsPerWord == 8) ? 3 : ((kBitsPerWord == 16) ? 4 : ((kBitsPerWord == 32) ? 5 : (((kBitsPerWord == 64) ? 6 : 7))))),
+			kSize             = N,                               // The number of bits the bitset holds
+			kWordSize         = sizeof(word_type),               // The size of individual words the bitset uses to hold the bits.
+			kWordCount        = BITSET_WORD_COUNT(N, WordType)   // The number of words the bitset uses to hold the bits. sizeof(bitset<N, WordType>) == kWordSize * kWordCount.
+		};
+
+		using base_type::mWord;
+		using base_type::DoGetWord;
+		using base_type::DoFindFirst;
+		using base_type::DoFindNext;
+		using base_type::DoFindLast;
+		using base_type::DoFindPrev;
+		using base_type::to_ulong;
+		using base_type::to_uint32;
+		using base_type::to_uint64;
+		using base_type::count;
+		using base_type::any;
+
+	public:
+		/// reference
+		///
+		/// A reference is a reference to a specific bit in the bitset.
+		/// The C++ standard specifies that this be a nested class, 
+		/// though it is not clear if a non-nested reference implementation
+		/// would be non-conforming.
+		///
+		class reference
+		{
+		protected:
+			friend class bitset<N, WordType>;
+
+			word_type* mpBitWord;
+			size_type  mnBitIndex;
+		
+			reference(){} // The C++ standard specifies that this is private.
+	
+		public:
+			reference(const bitset& x, size_type i);
+
+			reference& operator=(bool value);
+			reference& operator=(const reference& x);
+
+			bool operator~() const;
+			operator bool() const // Defined inline because CodeWarrior fails to be able to compile it outside.
+			   { return (*mpBitWord & (static_cast<word_type>(1) << (mnBitIndex & kBitsPerWordMask))) != 0; }
+
+			reference& flip();
+		};
+
+	public:
+		friend class reference;
+
+		bitset();
+		bitset(uint32_t value);
+	  //bitset(uint64_t value); // Disabled because it causes conflicts with the 32 bit version with existing user code. Use from_uint64 instead.
+
+		// We don't define copy constructor and operator= because 
+		// the compiler-generated versions will suffice.
+
+		this_type& operator&=(const this_type& x);
+		this_type& operator|=(const this_type& x);
+		this_type& operator^=(const this_type& x);
+
+		this_type& operator<<=(size_type n);
+		this_type& operator>>=(size_type n);
+
+		this_type& set();
+		this_type& set(size_type i, bool value = true);
+
+		this_type& reset();
+		this_type& reset(size_type i);
+			
+		this_type& flip();
+		this_type& flip(size_type i);
+		this_type  operator~() const;
+
+		reference operator[](size_type i);
+		bool      operator[](size_type i) const;
+
+		const word_type* data() const;
+		word_type*       data();
+
+		void          from_uint32(uint32_t value);
+		void          from_uint64(uint64_t value);
+
+	  //unsigned long to_ulong()  const;    // We inherit this from the base class.
+	  //uint32_t      to_uint32() const;
+	  //uint64_t      to_uint64() const;
+
+	  //size_type count() const;            // We inherit this from the base class.
+		size_type size() const;
+
+		bool operator==(const this_type& x) const;
+		bool operator!=(const this_type& x) const;
+
+		bool test(size_type i) const;
+	  //bool any() const;                   // We inherit this from the base class.
+		bool all() const;
+		bool none() const;
+
+		this_type operator<<(size_type n) const;
+		this_type operator>>(size_type n) const;
+
+		// Finds the index of the first "on" bit, returns kSize if none are set.
+		size_type find_first() const;
+
+		// Finds the index of the next "on" bit after last_find, returns kSize if none are set.
+		size_type find_next(size_type last_find) const;
+
+		// Finds the index of the last "on" bit, returns kSize if none are set.
+		size_type find_last() const;
+
+		// Finds the index of the last "on" bit before last_find, returns kSize if none are set.
+		size_type find_prev(size_type last_find) const;
+
+	}; // bitset
+
+
+
+
+
+
+
+	/// BitsetCountBits
+	///
+	/// This is a fast trick way to count bits without branches nor memory accesses.
+	///
+	inline uint32_t BitsetCountBits(uint64_t x)
+	{
+		// GCC 3.x's implementation of UINT64_C is broken and fails to deal with 
+		// the code below correctly. So we make a workaround for it. Earlier and 
+		// later versions of GCC don't have this bug.
+
+		#if defined(__GNUC__) && (__GNUC__ == 3)
+			x = x - ((x >> 1) & 0x5555555555555555ULL);
+			x = (x & 0x3333333333333333ULL) + ((x >> 2) & 0x3333333333333333ULL);
+			x = (x + (x >> 4)) & 0x0F0F0F0F0F0F0F0FULL;
+			return (uint32_t)((x * 0x0101010101010101ULL) >> 56);
+		#else
+			x = x - ((x >> 1) & UINT64_C(0x5555555555555555));
+			x = (x & UINT64_C(0x3333333333333333)) + ((x >> 2) & UINT64_C(0x3333333333333333));
+			x = (x + (x >> 4)) & UINT64_C(0x0F0F0F0F0F0F0F0F);
+			return (uint32_t)((x * UINT64_C(0x0101010101010101)) >> 56);
+		#endif
+	}
+
+	inline uint32_t BitsetCountBits(uint32_t x)
+	{
+		x = x - ((x >> 1) & 0x55555555);
+		x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
+		x = (x + (x >> 4)) & 0x0F0F0F0F;
+		return (uint32_t)((x * 0x01010101) >> 24);
+	}
+
+	inline uint32_t BitsetCountBits(uint16_t x)
+	{
+		return BitsetCountBits((uint32_t)x);
+	}
+
+	inline uint32_t BitsetCountBits(uint8_t x)
+	{
+		return BitsetCountBits((uint32_t)x);
+	}
+
+
+	// const static char kBitsPerUint16[16] = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 };
+	#define EASTL_BITSET_COUNT_STRING "\0\1\1\2\1\2\2\3\1\2\2\3\2\3\3\4"
+
+
+	inline uint32_t GetFirstBit(uint8_t x)
+	{
+		if(x)
+		{
+			uint32_t n = 1;
+
+			if((x & 0x0000000F) == 0) { n +=  4; x >>=  4; }
+			if((x & 0x00000003) == 0) { n +=  2; x >>=  2; }
+
+			return (uint32_t)(n - (x & 1));
+		}
+
+		return 8;
+	}
+
+	inline uint32_t GetFirstBit(uint16_t x) // To do: Update this to use VC++ _BitScanForward, _BitScanForward64; GCC __builtin_ctz, __builtin_ctzl. VC++ __lzcnt16, __lzcnt, __lzcnt64 requires recent CPUs (2013+) and probably can't be used. http://en.wikipedia.org/wiki/Haswell_%28microarchitecture%29#New_features
+	{
+		if(x)
+		{
+			uint32_t n = 1;
+
+			if((x & 0x000000FF) == 0) { n +=  8; x >>=  8; }
+			if((x & 0x0000000F) == 0) { n +=  4; x >>=  4; }
+			if((x & 0x00000003) == 0) { n +=  2; x >>=  2; }
+
+			return (uint32_t)(n - (x & 1));
+		}
+
+		return 16;
+	}
+
+	inline uint32_t GetFirstBit(uint32_t x)
+	{
+		if(x)
+		{
+			uint32_t n = 1;
+
+			if((x & 0x0000FFFF) == 0) { n += 16; x >>= 16; }
+			if((x & 0x000000FF) == 0) { n +=  8; x >>=  8; }
+			if((x & 0x0000000F) == 0) { n +=  4; x >>=  4; }
+			if((x & 0x00000003) == 0) { n +=  2; x >>=  2; }
+
+			return (n - (x & 1));
+		}
+
+		return 32;
+	}
+
+	inline uint32_t GetFirstBit(uint64_t x)
+	{
+		if(x)
+		{
+			uint32_t n = 1;
+
+			if((x & 0xFFFFFFFF) == 0) { n += 32; x >>= 32; }
+			if((x & 0x0000FFFF) == 0) { n += 16; x >>= 16; }
+			if((x & 0x000000FF) == 0) { n +=  8; x >>=  8; }
+			if((x & 0x0000000F) == 0) { n +=  4; x >>=  4; }
+			if((x & 0x00000003) == 0) { n +=  2; x >>=  2; }
+
+			return (n - ((uint32_t)x & 1));
+		}
+
+		return 64;
+	}
+
+
+	#if EASTL_INT128_SUPPORTED
+		inline uint32_t GetFirstBit(eastl_uint128_t x)
+		{
+			if(x)
+			{
+				uint32_t n = 1;
+
+				if((x & UINT64_C(0xFFFFFFFFFFFFFFFF)) == 0) { n += 64; x >>= 64; }
+				if((x & 0xFFFFFFFF) == 0)                   { n += 32; x >>= 32; }
+				if((x & 0x0000FFFF) == 0)                   { n += 16; x >>= 16; }
+				if((x & 0x000000FF) == 0)                   { n +=  8; x >>=  8; }
+				if((x & 0x0000000F) == 0)                   { n +=  4; x >>=  4; }
+				if((x & 0x00000003) == 0)                   { n +=  2; x >>=  2; }
+
+				return (n - ((uint32_t)x & 1));
+			}
+
+			return 128;
+		}
+	#endif
+
+	inline uint32_t GetLastBit(uint8_t x)
+	{
+		if(x)
+		{
+			uint32_t n = 0;
+
+			if(x & 0xFFF0) { n +=  4; x >>=  4; }
+			if(x & 0xFFFC) { n +=  2; x >>=  2; }
+			if(x & 0xFFFE) { n +=  1;           }
+
+			return n;
+		}
+
+		return 8;
+	}
+
+	inline uint32_t GetLastBit(uint16_t x)
+	{
+		if(x)
+		{
+			uint32_t n = 0;
+
+			if(x & 0xFF00) { n +=  8; x >>=  8; }
+			if(x & 0xFFF0) { n +=  4; x >>=  4; }
+			if(x & 0xFFFC) { n +=  2; x >>=  2; }
+			if(x & 0xFFFE) { n +=  1;           }
+
+			return n;
+		}
+
+		return 16;
+	}
+
+	inline uint32_t GetLastBit(uint32_t x)
+	{
+		if(x)
+		{
+			uint32_t n = 0;
+
+			if(x & 0xFFFF0000) { n += 16; x >>= 16; }
+			if(x & 0xFFFFFF00) { n +=  8; x >>=  8; }
+			if(x & 0xFFFFFFF0) { n +=  4; x >>=  4; }
+			if(x & 0xFFFFFFFC) { n +=  2; x >>=  2; }
+			if(x & 0xFFFFFFFE) { n +=  1;           }
+
+			return n;
+		}
+
+		return 32;
+	}
+
+	inline uint32_t GetLastBit(uint64_t x)
+	{
+		if(x)
+		{
+			uint32_t n = 0;
+
+			if(x & UINT64_C(0xFFFFFFFF00000000)) { n += 32; x >>= 32; }
+			if(x & 0xFFFF0000)                   { n += 16; x >>= 16; }
+			if(x & 0xFFFFFF00)                   { n +=  8; x >>=  8; }
+			if(x & 0xFFFFFFF0)                   { n +=  4; x >>=  4; }
+			if(x & 0xFFFFFFFC)                   { n +=  2; x >>=  2; }
+			if(x & 0xFFFFFFFE)                   { n +=  1;           }
+
+			return n;
+		}
+
+		return 64;
+	}
+
+	#if EASTL_INT128_SUPPORTED
+		inline uint32_t GetLastBit(eastl_uint128_t x)
+		{
+			if(x)
+			{
+				uint32_t n = 0;
+				
+				eastl_uint128_t mask(UINT64_C(0xFFFFFFFF00000000)); // There doesn't seem to exist compiler support for INT128_C() by any compiler. EAStdC's int128_t supports it though.
+				mask <<= 64;
+
+				if(x & mask)                         { n += 64; x >>= 64; }
+				if(x & UINT64_C(0xFFFFFFFF00000000)) { n += 32; x >>= 32; }
+				if(x & UINT64_C(0x00000000FFFF0000)) { n += 16; x >>= 16; }
+				if(x & UINT64_C(0x00000000FFFFFF00)) { n +=  8; x >>=  8; }
+				if(x & UINT64_C(0x00000000FFFFFFF0)) { n +=  4; x >>=  4; }
+				if(x & UINT64_C(0x00000000FFFFFFFC)) { n +=  2; x >>=  2; }
+				if(x & UINT64_C(0x00000000FFFFFFFE)) { n +=  1;           }
+
+				return n;
+			}
+
+			return 128;
+		}
+	#endif
+
+
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// BitsetBase
+	//
+	// We tried two forms of array access here:
+	//     for(word_type *pWord(mWord), *pWordEnd(mWord + NW); pWord < pWordEnd; ++pWord)
+	//         *pWord = ...
+	// and
+	//     for(size_t i = 0; i < NW; i++)
+	//         mWord[i] = ...
+	//
+	// For our tests (~NW < 16), the latter (using []) access resulted in faster code. 
+	///////////////////////////////////////////////////////////////////////////
+
+	template <size_t NW, typename WordType>
+	inline BitsetBase<NW, WordType>::BitsetBase()
+	{
+		reset();
+	}
+
+
+	template <size_t NW, typename WordType>
+	inline BitsetBase<NW, WordType>::BitsetBase(uint32_t value)
+	{
+		// This implementation assumes that sizeof(value) <= sizeof(word_type).
+		//EASTL_CT_ASSERT(sizeof(value) <= sizeof(word_type)); Disabled because we now have support for uint8_t and uint16_t word types. It would be nice to have a runtime assert that tested this.
+
+		reset();
+		mWord[0] = static_cast<word_type>(value);
+	}
+
+
+	/*
+	template <size_t NW, typename WordType>
+	inline BitsetBase<NW, WordType>::BitsetBase(uint64_t value)
+	{
+		reset();
+
+		#if(EA_PLATFORM_WORD_SIZE == 4)
+			mWord[0] = static_cast<word_type>(value);
+
+			EASTL_CT_ASSERT(NW > 2); // We can assume this because we have specializations of BitsetBase for <1> and <2>.
+			//if(NW > 1) // NW is a template constant, but it would be a little messy to take advantage of it's const-ness.
+				mWord[1] = static_cast<word_type>(value >> 32);
+		#else
+			mWord[0] = static_cast<word_type>(value);
+		#endif
+	}
+	*/
+
+
+	template <size_t NW, typename WordType>
+	inline void BitsetBase<NW, WordType>::operator&=(const this_type& x)
+	{
+		for(size_t i = 0; i < NW; i++)
+			mWord[i] &= x.mWord[i];
+	}
+
+
+	template <size_t NW, typename WordType>
+	inline void BitsetBase<NW, WordType>::operator|=(const this_type& x)
+	{
+		for(size_t i = 0; i < NW; i++)
+			mWord[i] |= x.mWord[i];
+	}
+
+
+	template <size_t NW, typename WordType>
+	inline void BitsetBase<NW, WordType>::operator^=(const this_type& x)
+	{
+		for(size_t i = 0; i < NW; i++)
+			mWord[i] ^= x.mWord[i];
+	}
+
+
+	template <size_t NW, typename WordType>
+	inline void BitsetBase<NW, WordType>::operator<<=(size_type n)
+	{
+		const size_type nWordShift = (size_type)(n >> kBitsPerWordShift);
+
+		if(nWordShift)
+		{
+			for(int i = (int)(NW - 1); i >= 0; --i)
+				mWord[i] = (nWordShift <= (size_type)i) ? mWord[i - nWordShift] : (word_type)0;
+		}
+
+		if(n &= kBitsPerWordMask)
+		{
+			for(size_t i = (NW - 1); i > 0; --i)
+				mWord[i] = (word_type)((mWord[i] << n) | (mWord[i - 1] >> (kBitsPerWord - n)));
+			mWord[0] <<= n;
+		}
+
+		// We let the parent class turn off any upper bits.
+	}
+
+
+	template <size_t NW, typename WordType>
+	inline void BitsetBase<NW, WordType>::operator>>=(size_type n)
+	{
+		const size_type nWordShift = (size_type)(n >> kBitsPerWordShift);
+
+		if(nWordShift)
+		{
+			for(size_t i = 0; i < NW; ++i)
+				mWord[i] = ((nWordShift < (NW - i)) ? mWord[i + nWordShift] : (word_type)0);
+		}
+
+		if(n &= kBitsPerWordMask)
+		{
+			for(size_t i = 0; i < (NW - 1); ++i)
+				mWord[i] = (word_type)((mWord[i] >> n) | (mWord[i + 1] << (kBitsPerWord - n)));
+			mWord[NW - 1] >>= n;
+		}
+	}
+
+
+	template <size_t NW, typename WordType>
+	inline void BitsetBase<NW, WordType>::flip()
+	{
+		for(size_t i = 0; i < NW; i++)
+			mWord[i] = ~mWord[i];
+		// We let the parent class turn off any upper bits.
+	}
+
+
+	template <size_t NW, typename WordType>
+	inline void BitsetBase<NW, WordType>::set()
+	{
+		for(size_t i = 0; i < NW; i++)
+			mWord[i] = static_cast<word_type>(~static_cast<word_type>(0));
+		// We let the parent class turn off any upper bits.
+	}
+
+
+	template <size_t NW, typename WordType>
+	inline void BitsetBase<NW, WordType>::set(size_type i, bool value)
+	{
+		if(value)
+			mWord[i >> kBitsPerWordShift] |=  (static_cast<word_type>(1) << (i & kBitsPerWordMask));
+		else
+			mWord[i >> kBitsPerWordShift] &= ~(static_cast<word_type>(1) << (i & kBitsPerWordMask));
+	}
+
+
+	template <size_t NW, typename WordType>
+	inline void BitsetBase<NW, WordType>::reset()
+	{
+		if(NW > 16) // This is a constant expression and should be optimized away.
+		{
+			// This will be fastest if compiler intrinsic function optimizations are enabled.
+			memset(mWord, 0, sizeof(mWord));
+		}
+		else
+		{
+			for(size_t i = 0; i < NW; i++)
+				mWord[i] = 0;
+		}
+	}
+
+
+	template <size_t NW, typename WordType>
+	inline bool BitsetBase<NW, WordType>::operator==(const this_type& x) const
+	{
+		for(size_t i = 0; i < NW; i++)
+		{
+			if(mWord[i] != x.mWord[i])
+				return false;
+		}
+		return true;
+	}
+
+
+	template <size_t NW, typename WordType>
+	inline bool BitsetBase<NW, WordType>::any() const
+	{
+		for(size_t i = 0; i < NW; i++)
+		{
+			if(mWord[i])
+				return true;
+		}
+		return false;
+	}
+
+
+	template <size_t NW, typename WordType>
+	inline typename BitsetBase<NW, WordType>::size_type
+	BitsetBase<NW, WordType>::count() const
+	{
+		size_type n = 0;
+
+		for(size_t i = 0; i < NW; i++)
+		{
+			#if defined(__GNUC__) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 304) && !defined(EA_PLATFORM_ANDROID) // GCC 3.4 or later
+				#if(EA_PLATFORM_WORD_SIZE == 4)
+					n += (size_type)__builtin_popcountl(mWord[i]);
+				#else
+					n += (size_type)__builtin_popcountll(mWord[i]);
+				#endif
+			#elif defined(__GNUC__) && (__GNUC__ < 3)
+				n +=  BitsetCountBits(mWord[i]); // GCC 2.x compiler inexplicably blows up on the code below.
+			#else
+				// todo: use __popcnt16, __popcnt, __popcnt64 for msvc builds
+				// https://msdn.microsoft.com/en-us/library/bb385231(v=vs.140).aspx
+				for(word_type w = mWord[i]; w; w >>= 4)
+					n += EASTL_BITSET_COUNT_STRING[w & 0xF];
+
+				// Version which seems to run slower in benchmarks:
+				// n +=  BitsetCountBits(mWord[i]);
+			#endif
+
+		}
+		return n;
+	}
+
+
+	template <size_t NW, typename WordType>
+	inline void BitsetBase<NW, WordType>::from_uint32(uint32_t value)
+	{
+		reset();
+		mWord[0] = static_cast<word_type>(value);
+	}
+
+
+	template <size_t NW, typename WordType>
+	inline void BitsetBase<NW, WordType>::from_uint64(uint64_t value)
+	{
+		reset();
+
+		#if(EA_PLATFORM_WORD_SIZE == 4)
+			mWord[0] = static_cast<word_type>(value);
+
+			EASTL_CT_ASSERT(NW > 2); // We can assume this because we have specializations of BitsetBase for <1> and <2>.
+			//if(NW > 1) // NW is a template constant, but it would be a little messy to take advantage of it's const-ness.
+				mWord[1] = static_cast<word_type>(value >> 32);
+		#else
+			mWord[0] = static_cast<word_type>(value);
+		#endif
+	}
+
+
+	template <size_t NW, typename WordType>
+	inline unsigned long BitsetBase<NW, WordType>::to_ulong() const
+	{
+		#if EASTL_EXCEPTIONS_ENABLED
+			for(size_t i = 1; i < NW; ++i)
+			{
+				if(mWord[i])
+					throw std::overflow_error("BitsetBase::to_ulong");
+			}
+		#endif
+		return (unsigned long)mWord[0]; // Todo: We need to deal with the case whereby sizeof(word_type) < sizeof(unsigned long)
+	}
+
+
+	template <size_t NW, typename WordType>
+	inline uint32_t BitsetBase<NW, WordType>::to_uint32() const
+	{
+		#if EASTL_EXCEPTIONS_ENABLED
+			// Verify that high words or bits are not set and thus that to_uint32 doesn't lose information.
+			for(size_t i = 1; i < NW; ++i)
+			{
+				if(mWord[i])
+					throw std::overflow_error("BitsetBase::to_uint32");
+			}
+			
+			#if(EA_PLATFORM_WORD_SIZE > 4) // if we have 64 bit words...
+				if(mWord[0] >> 32)
+					throw std::overflow_error("BitsetBase::to_uint32");
+			#endif
+		#endif
+
+		return (uint32_t)mWord[0];
+	}
+
+
+	template <size_t NW, typename WordType>
+	inline uint64_t BitsetBase<NW, WordType>::to_uint64() const
+	{
+		#if EASTL_EXCEPTIONS_ENABLED
+			// Verify that high words are not set and thus that to_uint64 doesn't lose information.
+			
+			EASTL_CT_ASSERT(NW > 2); // We can assume this because we have specializations of BitsetBase for <1> and <2>.
+			for(size_t i = 2; i < NW; ++i)
+			{
+				if(mWord[i])
+					throw std::overflow_error("BitsetBase::to_uint64");
+			}
+		#endif
+
+		#if(EA_PLATFORM_WORD_SIZE == 4)
+			EASTL_CT_ASSERT(NW > 2); // We can assume this because we have specializations of BitsetBase for <1> and <2>.
+			return (mWord[1] << 32) | mWord[0];
+		#else
+			return (uint64_t)mWord[0];
+		#endif
+	}
+
+
+	template <size_t NW, typename WordType>
+	inline typename BitsetBase<NW, WordType>::word_type&
+	BitsetBase<NW, WordType>::DoGetWord(size_type i)
+	{
+		return mWord[i >> kBitsPerWordShift];
+	}
+
+
+	template <size_t NW, typename WordType>
+	inline typename BitsetBase<NW, WordType>::word_type
+	BitsetBase<NW, WordType>::DoGetWord(size_type i) const
+	{
+		return mWord[i >> kBitsPerWordShift];
+	}
+
+
+	template <size_t NW, typename WordType>
+	inline typename BitsetBase<NW, WordType>::size_type 
+	BitsetBase<NW, WordType>::DoFindFirst() const
+	{
+		for(size_type word_index = 0; word_index < NW; ++word_index)
+		{
+			const size_type fbiw = GetFirstBit(mWord[word_index]);
+
+			if(fbiw != kBitsPerWord)
+				return (word_index * kBitsPerWord) + fbiw;
+		}
+
+		return (size_type)NW * kBitsPerWord;
+	}
+
+
+#if EASTL_DISABLE_BITSET_ARRAYBOUNDS_WARNING
+EA_DISABLE_GCC_WARNING(-Warray-bounds)
+#endif
+
+	template <size_t NW, typename WordType>
+	inline typename BitsetBase<NW, WordType>::size_type 
+	BitsetBase<NW, WordType>::DoFindNext(size_type last_find) const
+	{
+		// Start looking from the next bit.
+		++last_find;
+
+		// Set initial state based on last find.
+		size_type word_index = static_cast<size_type>(last_find >> kBitsPerWordShift);
+		size_type bit_index  = static_cast<size_type>(last_find  & kBitsPerWordMask);
+
+		// To do: There probably is a more elegant way to write looping below.
+		if(word_index < NW)
+		{
+			// Mask off previous bits of the word so our search becomes a "find first".
+			word_type this_word = mWord[word_index] & (~static_cast<word_type>(0) << bit_index);
+
+			for(;;)
+			{
+				const size_type fbiw = GetFirstBit(this_word);
+
+				if(fbiw != kBitsPerWord)
+					return (word_index * kBitsPerWord) + fbiw;
+
+				if(++word_index < NW)
+					this_word = mWord[word_index];
+				else
+					break;
+			}
+		}
+
+		return (size_type)NW * kBitsPerWord;
+	}
+
+#if EASTL_DISABLE_BITSET_ARRAYBOUNDS_WARNING
+EA_RESTORE_GCC_WARNING()
+#endif
+
+
+
+	template <size_t NW, typename WordType>
+	inline typename BitsetBase<NW, WordType>::size_type 
+	BitsetBase<NW, WordType>::DoFindLast() const
+	{
+		for(size_type word_index = (size_type)NW; word_index > 0; --word_index)
+		{
+			const size_type lbiw = GetLastBit(mWord[word_index - 1]);
+
+			if(lbiw != kBitsPerWord)
+				return ((word_index - 1) * kBitsPerWord) + lbiw;
+		}
+
+		return (size_type)NW * kBitsPerWord;
+	}
+
+
+	template <size_t NW, typename WordType>
+	inline typename BitsetBase<NW, WordType>::size_type 
+	BitsetBase<NW, WordType>::DoFindPrev(size_type last_find) const
+	{
+		if(last_find > 0)
+		{
+			// Set initial state based on last find.
+			size_type word_index = static_cast<size_type>(last_find >> kBitsPerWordShift);
+			size_type bit_index  = static_cast<size_type>(last_find  & kBitsPerWordMask);
+
+			// Mask off subsequent bits of the word so our search becomes a "find last".
+			word_type mask      = (~static_cast<word_type>(0) >> (kBitsPerWord - 1 - bit_index)) >> 1; // We do two shifts here because many CPUs ignore requests to shift 32 bit integers by 32 bits, which could be the case above.
+			word_type this_word = mWord[word_index] & mask;
+
+			for(;;)
+			{
+				const size_type lbiw = GetLastBit(this_word);
+
+				if(lbiw != kBitsPerWord)
+					return (word_index * kBitsPerWord) + lbiw;
+
+				if(word_index > 0)
+					this_word = mWord[--word_index];
+				else
+					break;
+			}
+		}
+
+		return (size_type)NW * kBitsPerWord;
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// BitsetBase<1, WordType>
+	///////////////////////////////////////////////////////////////////////////
+
+	template <typename WordType>
+	inline BitsetBase<1, WordType>::BitsetBase()
+	{
+		mWord[0] = 0;
+	}
+
+
+	template <typename WordType>
+	inline BitsetBase<1, WordType>::BitsetBase(uint32_t value)
+	{
+		// This implementation assumes that sizeof(value) <= sizeof(word_type).
+		//EASTL_CT_ASSERT(sizeof(value) <= sizeof(word_type)); Disabled because we now have support for uint8_t and uint16_t word types. It would be nice to have a runtime assert that tested this.
+
+		mWord[0] = static_cast<word_type>(value);
+	}
+
+
+	/*
+	template <typename WordType>
+	inline BitsetBase<1, WordType>::BitsetBase(uint64_t value)
+	{
+		#if(EA_PLATFORM_WORD_SIZE == 4)
+			EASTL_ASSERT(value <= 0xffffffff);
+			mWord[0] = static_cast<word_type>(value);   // This potentially loses data, but that's what the user is requesting.
+		#else
+			mWord[0] = static_cast<word_type>(value);
+		#endif
+	}
+	*/
+
+
+	template <typename WordType>
+	inline void BitsetBase<1, WordType>::operator&=(const this_type& x)
+	{
+		mWord[0] &= x.mWord[0];
+	}
+
+
+	template <typename WordType>
+	inline void BitsetBase<1, WordType>::operator|=(const this_type& x)
+	{
+		mWord[0] |= x.mWord[0];
+	}
+
+
+	template <typename WordType>
+	inline void BitsetBase<1, WordType>::operator^=(const this_type& x)
+	{
+		mWord[0] ^= x.mWord[0];
+	}
+
+
+	template <typename WordType>
+	inline void BitsetBase<1, WordType>::operator<<=(size_type n)
+	{
+		mWord[0] <<= n;
+		// We let the parent class turn off any upper bits.
+	}
+
+
+	template <typename WordType>
+	inline void BitsetBase<1, WordType>::operator>>=(size_type n)
+	{
+		mWord[0] >>= n;
+	}
+
+
+	template <typename WordType>
+	inline void BitsetBase<1, WordType>::flip()
+	{
+		mWord[0] = ~mWord[0];
+		// We let the parent class turn off any upper bits.
+	}
+
+
+	template <typename WordType>
+	inline void BitsetBase<1, WordType>::set()
+	{
+		mWord[0] = static_cast<word_type>(~static_cast<word_type>(0));
+		// We let the parent class turn off any upper bits.
+	}
+
+
+	template <typename WordType>
+	inline void BitsetBase<1, WordType>::set(size_type i, bool value)
+	{
+		if(value)
+			mWord[0] |=  (static_cast<word_type>(1) << i);
+		else
+			mWord[0] &= ~(static_cast<word_type>(1) << i);
+	}
+
+
+	template <typename WordType>
+	inline void BitsetBase<1, WordType>::reset()
+	{
+		mWord[0] = 0;
+	}
+
+
+	template <typename WordType>
+	inline bool BitsetBase<1, WordType>::operator==(const this_type& x) const
+	{
+		return mWord[0] == x.mWord[0];
+	}
+
+
+	template <typename WordType>
+	inline bool BitsetBase<1, WordType>::any() const
+	{
+		return mWord[0] != 0;
+	}
+
+
+	template <typename WordType>
+	inline typename BitsetBase<1, WordType>::size_type
+	BitsetBase<1, WordType>::count() const
+	{
+		#if defined(__GNUC__) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 304) && !defined(EA_PLATFORM_ANDROID) // GCC 3.4 or later
+			#if(EA_PLATFORM_WORD_SIZE == 4)
+				return (size_type)__builtin_popcountl(mWord[0]);
+			#else
+				return (size_type)__builtin_popcountll(mWord[0]);
+			#endif
+		#elif defined(__GNUC__) && (__GNUC__ < 3)
+			return BitsetCountBits(mWord[0]); // GCC 2.x compiler inexplicably blows up on the code below.
+		#else
+			size_type n = 0;
+			for(word_type w = mWord[0]; w; w >>= 4)
+				n += EASTL_BITSET_COUNT_STRING[w & 0xF];
+			return n;
+		#endif
+	}
+
+
+	template <typename WordType>
+	inline void BitsetBase<1, WordType>::from_uint32(uint32_t value)
+	{
+		mWord[0] = static_cast<word_type>(value);
+	}
+
+
+	template <typename WordType>
+	inline void BitsetBase<1, WordType>::from_uint64(uint64_t value)
+	{
+		#if(EA_PLATFORM_WORD_SIZE == 4)
+			EASTL_ASSERT(value <= 0xffffffff);
+			mWord[0] = static_cast<word_type>(value);   // This potentially loses data, but that's what the user is requesting.
+		#else
+			mWord[0] = static_cast<word_type>(value);
+		#endif
+	}
+
+
+	template <typename WordType>
+	inline unsigned long BitsetBase<1, WordType>::to_ulong() const
+	{
+		#if EASTL_EXCEPTIONS_ENABLED
+			#if((EA_PLATFORM_WORD_SIZE > 4) && defined(EA_PLATFORM_MICROSOFT)) // If we are using 64 bit words but ulong is less than 64 bits... Microsoft platforms alone use a 32 bit long under 64 bit platforms.
+				// Verify that high bits are not set and thus that to_ulong doesn't lose information.
+				if(mWord[0] >> 32)
+					throw std::overflow_error("BitsetBase::to_ulong");
+			#endif
+		#endif
+
+		return static_cast<unsigned long>(mWord[0]);
+	}
+
+
+	template <typename WordType>
+	inline uint32_t BitsetBase<1, WordType>::to_uint32() const
+	{
+		#if EASTL_EXCEPTIONS_ENABLED
+			#if(EA_PLATFORM_WORD_SIZE > 4) // If we are using 64 bit words...
+				// Verify that high bits are not set and thus that to_uint32 doesn't lose information.
+				if(mWord[0] >> 32)
+					throw std::overflow_error("BitsetBase::to_uint32");
+			#endif
+		#endif
+
+		return static_cast<uint32_t>(mWord[0]);
+	}
+
+
+	template <typename WordType>
+	inline uint64_t BitsetBase<1, WordType>::to_uint64() const
+	{
+		// This implementation is the same regardless of the word size, and there is no possibility of overflow_error.
+		return static_cast<uint64_t>(mWord[0]);
+	}
+
+
+	template <typename WordType>
+	inline typename BitsetBase<1, WordType>::word_type&
+	BitsetBase<1, WordType>::DoGetWord(size_type)
+	{
+		return mWord[0];
+	}
+
+
+	template <typename WordType>
+	inline typename BitsetBase<1, WordType>::word_type
+	BitsetBase<1, WordType>::DoGetWord(size_type) const
+	{
+		return mWord[0];
+	}
+
+
+	template <typename WordType>
+	inline typename BitsetBase<1, WordType>::size_type
+	BitsetBase<1, WordType>::DoFindFirst() const
+	{
+		return GetFirstBit(mWord[0]);
+	}
+
+
+	template <typename WordType>
+	inline typename BitsetBase<1, WordType>::size_type 
+	BitsetBase<1, WordType>::DoFindNext(size_type last_find) const
+	{
+		if(++last_find < kBitsPerWord)
+		{
+			// Mask off previous bits of word so our search becomes a "find first".
+			const word_type this_word = mWord[0] & ((~static_cast<word_type>(0)) << last_find);
+
+			return GetFirstBit(this_word);
+		}
+
+		return kBitsPerWord;
+	}
+
+
+	template <typename WordType>
+	inline typename BitsetBase<1, WordType>::size_type 
+	BitsetBase<1, WordType>::DoFindLast() const
+	{
+		return GetLastBit(mWord[0]);
+	}
+
+
+	template <typename WordType>
+	inline typename BitsetBase<1, WordType>::size_type 
+	BitsetBase<1, WordType>::DoFindPrev(size_type last_find) const
+	{
+		if(last_find > 0)
+		{
+			// Mask off previous bits of word so our search becomes a "find first".
+			const word_type this_word = mWord[0] & ((~static_cast<word_type>(0)) >> (kBitsPerWord - last_find));
+
+			return GetLastBit(this_word);
+		}
+
+		return kBitsPerWord;
+	}
+
+
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// BitsetBase<2, WordType>
+	///////////////////////////////////////////////////////////////////////////
+
+	template <typename WordType>
+	inline BitsetBase<2, WordType>::BitsetBase()
+	{
+		mWord[0] = 0;
+		mWord[1] = 0;
+	}
+
+
+	template <typename WordType>
+	inline BitsetBase<2, WordType>::BitsetBase(uint32_t value)
+	{
+		// This implementation assumes that sizeof(value) <= sizeof(word_type).
+		//EASTL_CT_ASSERT(sizeof(value) <= sizeof(word_type)); Disabled because we now have support for uint8_t and uint16_t word types. It would be nice to have a runtime assert that tested this.
+
+		mWord[0] = static_cast<word_type>(value);
+		mWord[1] = 0;
+	}
+
+
+	/*
+	template <typename WordType>
+	inline BitsetBase<2, WordType>::BitsetBase(uint64_t value)
+	{
+		#if(EA_PLATFORM_WORD_SIZE == 4)
+			mWord[0] = static_cast<word_type>(value);
+			mWord[1] = static_cast<word_type>(value >> 32);
+		#else
+			mWord[0] = static_cast<word_type>(value);
+			mWord[1] = 0;
+		#endif
+	}
+	*/
+
+
+	template <typename WordType>
+	inline void BitsetBase<2, WordType>::operator&=(const this_type& x)
+	{
+		mWord[0] &= x.mWord[0];
+		mWord[1] &= x.mWord[1];
+	}
+
+
+	template <typename WordType>
+	inline void BitsetBase<2, WordType>::operator|=(const this_type& x)
+	{
+		mWord[0] |= x.mWord[0];
+		mWord[1] |= x.mWord[1];
+	}
+
+
+	template <typename WordType>
+	inline void BitsetBase<2, WordType>::operator^=(const this_type& x)
+	{
+		mWord[0] ^= x.mWord[0];
+		mWord[1] ^= x.mWord[1];
+	}
+
+
+	template <typename WordType>
+	inline void BitsetBase<2, WordType>::operator<<=(size_type n)
+	{
+		if(n) // to avoid a shift by kBitsPerWord, which is undefined
+		{
+			if(EASTL_UNLIKELY(n >= kBitsPerWord))   // parent expected to handle high bits and n >= 64
+			{
+				mWord[1] = mWord[0];
+				mWord[0] = 0;
+				n -= kBitsPerWord;
+			}
+
+			mWord[1] = (mWord[1] << n) | (mWord[0] >> (kBitsPerWord - n)); // Intentionally use | instead of +.
+			mWord[0] <<= n;
+			// We let the parent class turn off any upper bits.
+		}
+	}
+
+
+	template <typename WordType>
+	inline void BitsetBase<2, WordType>::operator>>=(size_type n)
+	{
+		if(n) // to avoid a shift by kBitsPerWord, which is undefined
+		{
+			if(EASTL_UNLIKELY(n >= kBitsPerWord))   // parent expected to handle n >= 64
+			{
+				mWord[0] = mWord[1];
+				mWord[1] = 0;
+				n -= kBitsPerWord;
+			}
+			
+			mWord[0] = (mWord[0] >> n) | (mWord[1] << (kBitsPerWord - n)); // Intentionally use | instead of +.
+			mWord[1] >>= n;
+		}
+	}
+
+
+	template <typename WordType>
+	inline void BitsetBase<2, WordType>::flip()
+	{
+		mWord[0] = ~mWord[0];
+		mWord[1] = ~mWord[1];
+		// We let the parent class turn off any upper bits.
+	}
+
+
+	template <typename WordType>
+	inline void BitsetBase<2, WordType>::set()
+	{
+		mWord[0] = ~static_cast<word_type>(0);
+		mWord[1] = ~static_cast<word_type>(0);
+		// We let the parent class turn off any upper bits.
+	}
+
+
+	template <typename WordType>
+	inline void BitsetBase<2, WordType>::set(size_type i, bool value)
+	{
+		if(value)
+			mWord[i >> kBitsPerWordShift] |=  (static_cast<word_type>(1) << (i & kBitsPerWordMask));
+		else
+			mWord[i >> kBitsPerWordShift] &= ~(static_cast<word_type>(1) << (i & kBitsPerWordMask));
+	}
+
+
+	template <typename WordType>
+	inline void BitsetBase<2, WordType>::reset()
+	{
+		mWord[0] = 0;
+		mWord[1] = 0;
+	}
+
+
+	template <typename WordType>
+	inline bool BitsetBase<2, WordType>::operator==(const this_type& x) const
+	{
+		return (mWord[0] == x.mWord[0]) && (mWord[1] == x.mWord[1]);
+	}
+
+
+	template <typename WordType>
+	inline bool BitsetBase<2, WordType>::any() const
+	{
+		// Or with two branches: { return (mWord[0] != 0) || (mWord[1] != 0); }
+		return (mWord[0] | mWord[1]) != 0; 
+	}
+
+	template <typename WordType>
+	inline typename BitsetBase<2, WordType>::size_type
+	BitsetBase<2, WordType>::count() const
+	{
+		#if defined(__GNUC__) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 304) // GCC 3.4 or later
+			#if(EA_PLATFORM_WORD_SIZE == 4)
+				return (size_type)__builtin_popcountl(mWord[0])  + (size_type)__builtin_popcountl(mWord[1]);
+			#else
+				return (size_type)__builtin_popcountll(mWord[0]) + (size_type)__builtin_popcountll(mWord[1]);
+			#endif
+
+		#else
+			return BitsetCountBits(mWord[0]) + BitsetCountBits(mWord[1]);
+		#endif
+	}
+
+
+	template <typename WordType>
+	inline void BitsetBase<2, WordType>::from_uint32(uint32_t value)
+	{
+		mWord[0] = static_cast<word_type>(value);
+		mWord[1] = 0;
+	}
+
+
+	template <typename WordType>
+	inline void BitsetBase<2, WordType>::from_uint64(uint64_t value)
+	{
+		#if(EA_PLATFORM_WORD_SIZE == 4)
+			mWord[0] = static_cast<word_type>(value);
+			mWord[1] = static_cast<word_type>(value >> 32);
+		#else
+			mWord[0] = static_cast<word_type>(value);
+			mWord[1] = 0;
+		#endif
+	}
+
+
+	template <typename WordType>
+	inline unsigned long BitsetBase<2, WordType>::to_ulong() const
+	{
+		#if EASTL_EXCEPTIONS_ENABLED
+			if(mWord[1])
+				throw std::overflow_error("BitsetBase::to_ulong");
+		#endif
+		return (unsigned long)mWord[0]; // Todo: We need to deal with the case whereby sizeof(word_type) < sizeof(unsigned long)
+	}
+
+
+	template <typename WordType>
+	inline uint32_t BitsetBase<2, WordType>::to_uint32() const
+	{
+		#if EASTL_EXCEPTIONS_ENABLED
+			// Verify that high words or bits are not set and thus that to_uint32 doesn't lose information.
+
+			#if(EA_PLATFORM_WORD_SIZE == 4)
+				if(mWord[1])
+					throw std::overflow_error("BitsetBase::to_uint32");
+			#else
+				if(mWord[1] || (mWord[0] >> 32))
+					throw std::overflow_error("BitsetBase::to_uint32");
+			#endif
+		#endif
+
+		return (uint32_t)mWord[0];
+	}
+
+
+	template <typename WordType>
+	inline uint64_t BitsetBase<2, WordType>::to_uint64() const
+	{
+		#if(EA_PLATFORM_WORD_SIZE == 4)
+			// There can't possibly be an overflow_error here.
+
+			return ((uint64_t)mWord[1] << 32) | mWord[0];
+		#else
+			#if EASTL_EXCEPTIONS_ENABLED
+				if(mWord[1])
+					throw std::overflow_error("BitsetBase::to_uint64");
+			#endif
+
+			return (uint64_t)mWord[0];
+		#endif
+	}
+
+
+	template <typename WordType>
+	inline typename BitsetBase<2, WordType>::word_type&
+	BitsetBase<2, WordType>::DoGetWord(size_type i)
+	{
+		return mWord[i >> kBitsPerWordShift];
+	}
+
+
+	template <typename WordType>
+	inline typename BitsetBase<2, WordType>::word_type
+	BitsetBase<2, WordType>::DoGetWord(size_type i) const
+	{
+		return mWord[i >> kBitsPerWordShift];
+	}
+
+
+	template <typename WordType>
+	inline typename BitsetBase<2, WordType>::size_type 
+	BitsetBase<2, WordType>::DoFindFirst() const
+	{
+		size_type fbiw = GetFirstBit(mWord[0]);
+
+		if(fbiw != kBitsPerWord)
+			return fbiw;
+
+		fbiw = GetFirstBit(mWord[1]);
+
+		if(fbiw != kBitsPerWord)
+			return kBitsPerWord + fbiw;
+
+		return 2 * kBitsPerWord;
+	}
+
+
+	template <typename WordType>
+	inline typename BitsetBase<2, WordType>::size_type 
+	BitsetBase<2, WordType>::DoFindNext(size_type last_find) const
+	{
+		// If the last find was in the first word, we must check it and then possibly the second.
+		if(++last_find < (size_type)kBitsPerWord)
+		{
+			// Mask off previous bits of word so our search becomes a "find first".
+			word_type this_word = mWord[0] & ((~static_cast<word_type>(0)) << last_find);
+
+			// Step through words.
+			size_type fbiw = GetFirstBit(this_word);
+
+			if(fbiw != kBitsPerWord)
+				return fbiw;
+
+			fbiw = GetFirstBit(mWord[1]);
+
+			if(fbiw != kBitsPerWord)
+				return kBitsPerWord + fbiw;
+		}
+		else if(last_find < (size_type)(2 * kBitsPerWord))
+		{
+			// The last find was in the second word, remove the bit count of the first word from the find.
+			last_find -= kBitsPerWord;
+
+			// Mask off previous bits of word so our search becomes a "find first".
+			word_type this_word = mWord[1] & ((~static_cast<word_type>(0)) << last_find);
+
+			const size_type fbiw = GetFirstBit(this_word);
+
+			if(fbiw != kBitsPerWord)
+				return kBitsPerWord + fbiw;
+		}
+
+		return 2 * kBitsPerWord;
+	}
+
+
+	template <typename WordType>
+	inline typename BitsetBase<2, WordType>::size_type 
+	BitsetBase<2, WordType>::DoFindLast() const
+	{
+		size_type lbiw = GetLastBit(mWord[1]);
+
+		if(lbiw != kBitsPerWord)
+			return kBitsPerWord + lbiw;
+
+		lbiw = GetLastBit(mWord[0]);
+
+		if(lbiw != kBitsPerWord)
+			return lbiw;
+
+		return 2 * kBitsPerWord;
+	}
+
+
+	template <typename WordType>
+	inline typename BitsetBase<2, WordType>::size_type 
+	BitsetBase<2, WordType>::DoFindPrev(size_type last_find) const
+	{
+		// If the last find was in the second word, we must check it and then possibly the first.
+		if(last_find > (size_type)kBitsPerWord)
+		{
+			// This has the same effect as last_find %= kBitsPerWord in our case.
+			last_find -= kBitsPerWord;
+
+			// Mask off previous bits of word so our search becomes a "find first".
+			word_type this_word = mWord[1] & ((~static_cast<word_type>(0)) >> (kBitsPerWord - last_find));
+
+			// Step through words.
+			size_type lbiw = GetLastBit(this_word);
+
+			if(lbiw != kBitsPerWord)
+				return kBitsPerWord + lbiw;
+
+			lbiw = GetLastBit(mWord[0]);
+
+			if(lbiw != kBitsPerWord)
+				return lbiw;
+		}
+		else if(last_find != 0)
+		{
+			// Mask off previous bits of word so our search becomes a "find first".
+			word_type this_word = mWord[0] & ((~static_cast<word_type>(0)) >> (kBitsPerWord - last_find));
+
+			const size_type lbiw = GetLastBit(this_word);
+
+			if(lbiw != kBitsPerWord)
+				return lbiw;
+		}
+
+		return 2 * kBitsPerWord;
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// bitset::reference
+	///////////////////////////////////////////////////////////////////////////
+
+	template <size_t N, typename WordType>
+	inline bitset<N, WordType>::reference::reference(const bitset& x, size_type i)
+		: mpBitWord(&const_cast<bitset&>(x).DoGetWord(i)),
+		  mnBitIndex(i & kBitsPerWordMask)
+	{   // We have an issue here because the above is casting away the const-ness of the source bitset.
+		// Empty
+	}
+
+
+	template <size_t N, typename WordType>
+	inline typename bitset<N, WordType>::reference&
+	bitset<N, WordType>::reference::operator=(bool value)
+	{
+		if(value)
+			*mpBitWord |=  (static_cast<word_type>(1) << (mnBitIndex & kBitsPerWordMask));
+		else
+			*mpBitWord &= ~(static_cast<word_type>(1) << (mnBitIndex & kBitsPerWordMask));
+		return *this;
+	}
+
+
+	template <size_t N, typename WordType>
+	inline typename bitset<N, WordType>::reference&
+	bitset<N, WordType>::reference::operator=(const reference& x)
+	{
+		if(*x.mpBitWord & (static_cast<word_type>(1) << (x.mnBitIndex & kBitsPerWordMask)))
+			*mpBitWord |=  (static_cast<word_type>(1) << (mnBitIndex & kBitsPerWordMask));
+		else
+			*mpBitWord &= ~(static_cast<word_type>(1) << (mnBitIndex & kBitsPerWordMask));
+		return *this;
+	}
+
+
+	template <size_t N, typename WordType>
+	inline bool bitset<N, WordType>::reference::operator~() const
+	{
+		return (*mpBitWord & (static_cast<word_type>(1) << (mnBitIndex & kBitsPerWordMask))) == 0;
+	}
+
+
+	//Defined inline in the class because Metrowerks fails to be able to compile it here.
+	//template <size_t N, typename WordType>
+	//inline bitset<N, WordType>::reference::operator bool() const
+	//{
+	//    return (*mpBitWord & (static_cast<word_type>(1) << (mnBitIndex & kBitsPerWordMask))) != 0;
+	//}
+
+
+	template <size_t N, typename WordType>
+	inline typename bitset<N, WordType>::reference&
+	bitset<N, WordType>::reference::flip()
+	{
+		*mpBitWord ^= static_cast<word_type>(1) << (mnBitIndex & kBitsPerWordMask);
+		return *this;
+	}
+
+
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// bitset
+	///////////////////////////////////////////////////////////////////////////
+
+	template <size_t N, typename WordType>
+	inline bitset<N, WordType>::bitset()
+		: base_type()
+	{
+		// Empty. The base class will set all bits to zero.
+	}
+
+	EA_DISABLE_VC_WARNING(6313)
+	template <size_t N, typename WordType>
+	inline bitset<N, WordType>::bitset(uint32_t value)
+		: base_type(value)
+	{
+		if((N & kBitsPerWordMask) || (N == 0)) // If there are any high bits to clear... (If we didn't have this check, then the code below would do the wrong thing when N == 32.
+			mWord[kWordCount - 1] &= ~(static_cast<word_type>(~static_cast<word_type>(0)) << (N & kBitsPerWordMask)); // This clears any high unused bits.
+	}
+	EA_RESTORE_VC_WARNING()
+
+	/*
+	template <size_t N, typename WordType>
+	inline bitset<N, WordType>::bitset(uint64_t value)
+		: base_type(value)
+	{
+		if((N & kBitsPerWordMask) || (N == 0)) // If there are any high bits to clear...
+			mWord[kWordCount - 1] &= ~(~static_cast<word_type>(0) << (N & kBitsPerWordMask)); // This clears any high unused bits.
+	}
+	*/
+
+
+	template <size_t N, typename WordType>
+	inline typename bitset<N, WordType>::this_type&
+	bitset<N, WordType>::operator&=(const this_type& x)
+	{
+		base_type::operator&=(x);
+		return *this;
+	}
+
+
+	template <size_t N, typename WordType>
+	inline typename bitset<N, WordType>::this_type&
+	bitset<N, WordType>::operator|=(const this_type& x)
+	{
+		base_type::operator|=(x);
+		return *this;
+	}
+
+
+	template <size_t N, typename WordType>
+	inline typename bitset<N, WordType>::this_type&
+	bitset<N, WordType>::operator^=(const this_type& x)
+	{
+		base_type::operator^=(x);
+		return *this;
+	}
+
+
+	template <size_t N, typename WordType>
+	inline typename bitset<N, WordType>::this_type&
+	bitset<N, WordType>::operator<<=(size_type n)
+	{
+		if(EASTL_LIKELY((intptr_t)n < (intptr_t)N))
+		{
+			EA_DISABLE_VC_WARNING(6313)
+			base_type::operator<<=(n);
+			if((N & kBitsPerWordMask) || (N == 0)) // If there are any high bits to clear... (If we didn't have this check, then the code below would do the wrong thing when N == 32.
+				mWord[kWordCount - 1] &= ~(static_cast<word_type>(~static_cast<word_type>(0)) << (N & kBitsPerWordMask)); // This clears any high unused bits. We need to do this so that shift operations proceed correctly.
+			EA_RESTORE_VC_WARNING()
+		}
+		else
+			base_type::reset();
+		return *this;
+	}
+
+
+	template <size_t N, typename WordType>
+	inline typename bitset<N, WordType>::this_type&
+	bitset<N, WordType>::operator>>=(size_type n)
+	{
+		if(EASTL_LIKELY(n < N))
+			base_type::operator>>=(n);
+		else
+			base_type::reset();
+		return *this;
+	}
+
+
+	template <size_t N, typename WordType>
+	inline typename bitset<N, WordType>::this_type&
+	bitset<N, WordType>::set()
+	{
+		base_type::set(); // This sets all bits.
+		if((N & kBitsPerWordMask) || (N == 0)) // If there are any high bits to clear... (If we didn't have this check, then the code below would do the wrong thing when N == 32.
+			mWord[kWordCount - 1] &= ~(static_cast<word_type>(~static_cast<word_type>(0)) << (N & kBitsPerWordMask)); // This clears any high unused bits. We need to do this so that shift operations proceed correctly.
+		return *this;
+	}
+
+
+	template <size_t N, typename WordType>
+	inline typename bitset<N, WordType>::this_type&
+	bitset<N, WordType>::set(size_type i, bool value)
+	{
+		if(i < N)
+			base_type::set(i, value);
+		else
+		{
+			#if EASTL_ASSERT_ENABLED
+				if(EASTL_UNLIKELY(!(i < N)))
+					EASTL_FAIL_MSG("bitset::set -- out of range");
+			#endif
+
+			#if EASTL_EXCEPTIONS_ENABLED
+				throw std::out_of_range("bitset::set");
+			#endif
+		}
+
+		return *this;
+	}
+
+
+	template <size_t N, typename WordType>
+	inline typename bitset<N, WordType>::this_type&
+	bitset<N, WordType>::reset()
+	{
+		base_type::reset();
+		return *this;
+	}
+
+
+	template <size_t N, typename WordType>
+	inline typename bitset<N, WordType>::this_type&
+	bitset<N, WordType>::reset(size_type i)
+	{
+		if(EASTL_LIKELY(i < N))
+			DoGetWord(i) &= ~(static_cast<word_type>(1) << (i & kBitsPerWordMask));
+		else
+		{
+			#if EASTL_ASSERT_ENABLED
+				if(EASTL_UNLIKELY(!(i < N)))
+					EASTL_FAIL_MSG("bitset::reset -- out of range");
+			#endif
+
+			#if EASTL_EXCEPTIONS_ENABLED
+				throw std::out_of_range("bitset::reset");
+			#endif
+		}
+
+		return *this;
+	}
+
+		
+	template <size_t N, typename WordType>
+	inline typename bitset<N, WordType>::this_type&
+	bitset<N, WordType>::flip()
+	{
+		EA_DISABLE_VC_WARNING(6313)
+		base_type::flip();
+		if((N & kBitsPerWordMask) || (N == 0)) // If there are any high bits to clear... (If we didn't have this check, then the code below would do the wrong thing when N == 32.
+			mWord[kWordCount - 1] &= ~(static_cast<word_type>(~static_cast<word_type>(0)) << (N & kBitsPerWordMask)); // This clears any high unused bits. We need to do this so that shift operations proceed correctly.
+		return *this;
+		EA_RESTORE_VC_WARNING()
+	}
+
+
+	template <size_t N, typename WordType>
+	inline typename bitset<N, WordType>::this_type&
+	bitset<N, WordType>::flip(size_type i)
+	{
+		if(EASTL_LIKELY(i < N))
+			DoGetWord(i) ^= (static_cast<word_type>(1) << (i & kBitsPerWordMask));
+		else
+		{
+			#if EASTL_ASSERT_ENABLED
+				if(EASTL_UNLIKELY(!(i < N)))
+					EASTL_FAIL_MSG("bitset::flip -- out of range");
+			#endif
+
+			#if EASTL_EXCEPTIONS_ENABLED
+				throw std::out_of_range("bitset::flip");
+			#endif
+		}
+		return *this;
+	}
+		
+
+	template <size_t N, typename WordType>
+	inline typename bitset<N, WordType>::this_type
+	bitset<N, WordType>::operator~() const
+	{
+		return this_type(*this).flip();
+	}
+
+
+	template <size_t N, typename WordType>
+	inline typename bitset<N, WordType>::reference
+	bitset<N, WordType>::operator[](size_type i)
+	{
+		#if EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY(!(i < N)))
+				EASTL_FAIL_MSG("bitset::operator[] -- out of range");
+		#endif
+
+		return reference(*this, i);
+	}
+
+
+	template <size_t N, typename WordType>
+	inline bool bitset<N, WordType>::operator[](size_type i) const
+	{
+		#if EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY(!(i < N)))
+				EASTL_FAIL_MSG("bitset::operator[] -- out of range");
+		#endif
+
+		return (DoGetWord(i) & (static_cast<word_type>(1) << (i & kBitsPerWordMask))) != 0;
+	}
+
+
+	template <size_t N, typename WordType>
+	inline const typename bitset<N, WordType>::word_type* bitset<N, WordType>::data() const
+	{
+		return base_type::mWord;
+	}
+
+
+	template <size_t N, typename WordType>
+	inline typename bitset<N, WordType>::word_type* bitset<N, WordType>::data()
+	{
+		return base_type::mWord;
+	}
+
+
+	template <size_t N, typename WordType>
+	inline void bitset<N, WordType>::from_uint32(uint32_t value)
+	{
+		base_type::from_uint32(value);
+
+		if((N & kBitsPerWordMask) || (N == 0)) // If there are any high bits to clear... (If we didn't have this check, then the code below would do the wrong thing when N == 32.
+			mWord[kWordCount - 1] &= ~(static_cast<word_type>(~static_cast<word_type>(0)) << (N & kBitsPerWordMask)); // This clears any high unused bits. We need to do this so that shift operations proceed correctly.
+	}
+
+
+	template <size_t N, typename WordType>
+	inline void bitset<N, WordType>::from_uint64(uint64_t value)
+	{
+		base_type::from_uint64(value);
+
+		if((N & kBitsPerWordMask) || (N == 0)) // If there are any high bits to clear... (If we didn't have this check, then the code below would do the wrong thing when N == 32.
+			mWord[kWordCount - 1] &= ~(static_cast<word_type>(~static_cast<word_type>(0)) << (N & kBitsPerWordMask)); // This clears any high unused bits. We need to do this so that shift operations proceed correctly.
+	}
+
+
+	// template <size_t N, typename WordType>
+	// inline unsigned long bitset<N, WordType>::to_ulong() const
+	// {
+	//     return base_type::to_ulong();
+	// }
+
+
+	// template <size_t N, typename WordType>
+	// inline uint32_t bitset<N, WordType>::to_uint32() const
+	// {
+	//     return base_type::to_uint32();
+	// }
+
+
+	// template <size_t N, typename WordType>
+	// inline uint64_t bitset<N, WordType>::to_uint64() const
+	// {
+	//     return base_type::to_uint64();
+	// }
+
+
+	// template <size_t N, typename WordType>
+	// inline typename bitset<N, WordType>::size_type
+	// bitset<N, WordType>::count() const
+	// {
+	//     return base_type::count();
+	// }
+
+
+	template <size_t N, typename WordType>
+	inline typename bitset<N, WordType>::size_type
+	bitset<N, WordType>::size() const
+	{
+		return (size_type)N;
+	}
+
+
+	template <size_t N, typename WordType>
+	inline bool bitset<N, WordType>::operator==(const this_type& x) const
+	{
+		return base_type::operator==(x);
+	}
+
+
+	template <size_t N, typename WordType>
+	inline bool bitset<N, WordType>::operator!=(const this_type& x) const
+	{
+		return !base_type::operator==(x);
+	}
+
+
+	template <size_t N, typename WordType>
+	inline bool bitset<N, WordType>::test(size_type i) const
+	{
+		if(EASTL_UNLIKELY(i < N))
+			return (DoGetWord(i) & (static_cast<word_type>(1) << (i & kBitsPerWordMask))) != 0;
+
+		#if EASTL_ASSERT_ENABLED
+			EASTL_FAIL_MSG("bitset::test -- out of range");
+		#endif
+
+		#if EASTL_EXCEPTIONS_ENABLED
+			throw std::out_of_range("bitset::test");
+		#else
+			return false;
+		#endif
+	}
+
+
+	// template <size_t N, typename WordType>
+	// inline bool bitset<N, WordType>::any() const
+	// {
+	//     return base_type::any();
+	// }
+
+
+	template <size_t N, typename WordType>
+	inline bool bitset<N, WordType>::all() const
+	{
+		return count() == size();
+	}
+
+
+	template <size_t N, typename WordType>
+	inline bool bitset<N, WordType>::none() const
+	{
+		return !base_type::any();
+	}
+
+
+	template <size_t N, typename WordType>
+	inline typename bitset<N, WordType>::this_type
+	bitset<N, WordType>::operator<<(size_type n) const
+	{
+		return this_type(*this).operator<<=(n);
+	}
+
+
+	template <size_t N, typename WordType>
+	inline typename bitset<N, WordType>::this_type
+	bitset<N, WordType>::operator>>(size_type n) const
+	{
+		return this_type(*this).operator>>=(n);
+	}
+
+
+	template <size_t N, typename WordType>
+	inline typename bitset<N, WordType>::size_type
+	bitset<N, WordType>::find_first() const
+	{
+		const size_type i = base_type::DoFindFirst();
+
+		if(i < kSize)
+			return i;
+		// Else i could be the base type bit count, so we clamp it to our size.
+
+		return kSize;
+	}
+
+
+	template <size_t N, typename WordType>
+	inline typename bitset<N, WordType>::size_type
+	bitset<N, WordType>::find_next(size_type last_find) const
+	{
+		const size_type i = base_type::DoFindNext(last_find);
+
+		if(i < kSize)
+			return i;
+		// Else i could be the base type bit count, so we clamp it to our size.
+
+		return kSize;
+	}
+
+
+	template <size_t N, typename WordType>
+	inline typename bitset<N, WordType>::size_type
+	bitset<N, WordType>::find_last() const
+	{
+		const size_type i = base_type::DoFindLast();
+
+		if(i < kSize)
+			return i;
+		// Else i could be the base type bit count, so we clamp it to our size.
+
+		return kSize;
+	}
+
+
+	template <size_t N, typename WordType>
+	inline typename bitset<N, WordType>::size_type
+	bitset<N, WordType>::find_prev(size_type last_find) const
+	{
+		const size_type i = base_type::DoFindPrev(last_find);
+
+		if(i < kSize)
+			return i;
+		// Else i could be the base type bit count, so we clamp it to our size.
+
+		return kSize;
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// global operators
+	///////////////////////////////////////////////////////////////////////////
+
+	template <size_t N, typename WordType>
+	inline bitset<N, WordType> operator&(const bitset<N, WordType>& a, const bitset<N, WordType>& b)
+	{
+		// We get betting inlining when we don't declare temporary variables.
+		return bitset<N, WordType>(a).operator&=(b);
+	}
+
+
+	template <size_t N, typename WordType>
+	inline bitset<N, WordType> operator|(const bitset<N, WordType>& a, const bitset<N, WordType>& b)
+	{
+		return bitset<N, WordType>(a).operator|=(b);
+	}
+
+
+	template <size_t N, typename WordType>
+	inline bitset<N, WordType> operator^(const bitset<N, WordType>& a, const bitset<N, WordType>& b)
+	{
+		return bitset<N, WordType>(a).operator^=(b);
+	}
+
+
+} // namespace eastl
+
+
+EA_RESTORE_VC_WARNING();
+
+#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/bitvector.h b/libkram/eastl/include/EASTL/bitvector.h
new file mode 100644
index 00000000..ade67823
--- /dev/null
+++ b/libkram/eastl/include/EASTL/bitvector.h
@@ -0,0 +1,1474 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// Implements a bit vector, which is essentially a vector of bool but which
+// uses bits instead of bytes. It is thus similar to the original std::vector<bool>.
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// Note: This code is not yet complete: it isn't tested and doesn't yet 
+//       support containers other than vector.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_BITVECTOR_H
+#define EASTL_BITVECTOR_H
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/vector.h>
+#include <EASTL/algorithm.h>
+#include <EASTL/bitset.h>
+
+EA_DISABLE_VC_WARNING(4480); // nonstandard extension used: specifying underlying type for enum
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+
+	/// EASTL_BITVECTOR_DEFAULT_NAME
+	///
+	/// Defines a default container name in the absence of a user-provided name.
+	///
+	#ifndef EASTL_BITVECTOR_DEFAULT_NAME
+		#define EASTL_BITVECTOR_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " bitvector" // Unless the user overrides something, this is "EASTL bitvector".
+	#endif
+
+	/// EASTL_BITVECTOR_DEFAULT_ALLOCATOR
+	///
+	#ifndef EASTL_BITVECTOR_DEFAULT_ALLOCATOR
+		#define EASTL_BITVECTOR_DEFAULT_ALLOCATOR allocator_type(EASTL_BITVECTOR_DEFAULT_NAME)
+	#endif
+
+
+
+	/// BitvectorWordType
+	/// Defines the integral data type used by bitvector.
+	typedef EASTL_BITSET_WORD_TYPE_DEFAULT BitvectorWordType;
+
+
+	template <typename Element>
+	class bitvector_const_iterator;
+
+
+	template <typename Element>
+	class bitvector_reference
+	{
+	public:
+		typedef eastl_size_t size_type;
+		bitvector_reference(Element* ptr, eastl_size_t i);
+
+		bitvector_reference& operator=(bool value);
+		bitvector_reference& operator=(const bitvector_reference& rhs);
+
+		operator bool() const // Defined here because some compilers fail otherwise.
+			{ return (*mpBitWord & (Element(1) << mnBitIndex)) != 0; }
+
+	protected:
+		friend class bitvector_const_iterator<Element>;
+
+		Element*  mpBitWord;
+		size_type mnBitIndex;
+
+		bitvector_reference() {}
+		void CopyFrom(const bitvector_reference& rhs);
+	};
+
+
+
+	template <typename Element>
+	class bitvector_const_iterator
+	{
+	public:
+		typedef EASTL_ITC_NS::random_access_iterator_tag iterator_category;
+		typedef bitvector_const_iterator<Element>        this_type;
+		typedef bool                                     value_type;
+		typedef bitvector_reference<Element>             reference_type;
+		typedef ptrdiff_t                                difference_type;
+		typedef Element                                  element_type;
+		typedef element_type*                            pointer;           // This is wrong. It needs to be someting that acts as a pointer to a bit.
+		typedef element_type&                            reference;         // This is not right. It needs to be someting that acts as a pointer to a bit.
+		typedef eastl_size_t                             size_type;
+
+	protected:
+		reference_type mReference;
+
+		enum
+		{
+			kBitCount = (8 * sizeof(Element))
+		};
+
+	public:
+		bool operator*() const;
+		bool operator[](difference_type n) const;
+
+		bitvector_const_iterator();
+		bitvector_const_iterator(const element_type* p, eastl_size_t i);
+		bitvector_const_iterator(const reference_type& referenceType);
+
+		bitvector_const_iterator& operator++();
+		bitvector_const_iterator  operator++(int);
+		bitvector_const_iterator& operator--();
+		bitvector_const_iterator  operator--(int);
+
+		bitvector_const_iterator& operator+=(difference_type dist);
+		bitvector_const_iterator& operator-=(difference_type dist);
+		bitvector_const_iterator  operator+ (difference_type dist) const;
+		bitvector_const_iterator  operator- (difference_type dist) const;
+
+		difference_type operator-(const this_type& rhs) const;
+
+		bitvector_const_iterator& operator= (const this_type& rhs);
+
+		bool operator==(const this_type& rhs) const;
+		bool operator!=(const this_type& rhs) const;
+
+		bool operator< (const this_type& rhs) const;
+		bool operator<=(const this_type& rhs) const;
+		bool operator> (const this_type& rhs) const;
+		bool operator>=(const this_type& rhs) const;
+
+		int validate(const element_type* pStart, const element_type* pEnd, eastl_size_t nExtraBits) const;
+
+	protected:
+		template <typename, typename, typename>
+		friend class bitvector;
+
+		reference_type& get_reference_type() { return mReference; }
+	};
+
+
+
+	template <typename Element>
+	class bitvector_iterator : public bitvector_const_iterator<Element>
+	{
+	public:
+		typedef EASTL_ITC_NS::random_access_iterator_tag iterator_category;
+		typedef bitvector_iterator                       this_type;
+		typedef bitvector_const_iterator<Element>        base_type;
+		typedef bool                                     value_type;
+		typedef bitvector_reference<Element>             reference_type;
+		typedef ptrdiff_t                                difference_type;
+		typedef Element                                  element_type;
+		typedef element_type*                            pointer;           // This is wrong. It needs to be someting that acts as a pointer to a bit.
+		typedef element_type&                            reference;         // This is not right. It needs to be someting that acts as a pointer to a bit.
+
+	public:
+		reference_type operator*() const;
+		reference_type operator[](difference_type n) const;
+
+		bitvector_iterator();
+		bitvector_iterator(element_type* p, eastl_size_t i);
+		bitvector_iterator(reference_type& referenceType);
+
+		bitvector_iterator& operator++()    { base_type::operator++(); return *this; }
+		bitvector_iterator& operator--()    { base_type::operator--(); return *this; }
+		bitvector_iterator  operator++(int);
+		bitvector_iterator  operator--(int);
+
+		bitvector_iterator& operator+=(difference_type dist) { base_type::operator+=(dist); return *this; }
+		bitvector_iterator& operator-=(difference_type dist) { base_type::operator-=(dist); return *this; }
+		bitvector_iterator  operator+ (difference_type dist) const;
+		bitvector_iterator  operator- (difference_type dist) const;
+
+		// We need this here because we are overloading operator-, so for some reason the
+		// other overload of the function can't be found unless it's explicitly specified.
+		difference_type operator-(const base_type& rhs) const { return base_type::operator-(rhs); }
+	};
+
+
+
+	/// bitvector
+	///
+	/// Implements an array of bits treated as boolean values.
+	/// bitvector is similar to vector<bool> but uses bits instead of bytes and 
+	/// allows the user to use other containers such as deque instead of vector.
+	/// bitvector is different from bitset in that bitset is less flexible but
+	/// uses less memory and has higher performance.
+	///
+	/// To consider: Rename the Element template parameter to WordType, for 
+	/// consistency with bitset.
+	///
+	template <typename Allocator = EASTLAllocatorType, 
+			  typename Element   = BitvectorWordType, 
+			  typename Container = eastl::vector<Element, Allocator> >
+	class bitvector
+	{
+	public:
+		typedef bitvector<Allocator, Element>               this_type;
+		typedef bool                                        value_type;
+		typedef bitvector_reference<Element>                reference;
+		typedef bool                                        const_reference;
+		typedef bitvector_iterator<Element>                 iterator;
+		typedef bitvector_const_iterator<Element>           const_iterator;
+		typedef eastl::reverse_iterator<iterator>           reverse_iterator;
+		typedef eastl::reverse_iterator<const_iterator>     const_reverse_iterator;
+		typedef Allocator                                   allocator_type;
+		typedef Element                                     element_type;
+		typedef Container                                   container_type;
+		typedef eastl_size_t                                size_type;
+		typedef ptrdiff_t                                   difference_type;
+
+		#if defined(_MSC_VER) && (_MSC_VER >= 1400) && (_MSC_VER <= 1600) && !EASTL_STD_CPP_ONLY  // _MSC_VER of 1400 means VS2005, 1600 means VS2010. VS2012 generates errors with usage of enum:size_type.
+			enum : size_type {                      // Use Microsoft enum language extension, allowing for smaller debug symbols than using a static const. Users have been affected by this.
+				npos     = container_type::npos,
+				kMaxSize = container_type::kMaxSize
+			};
+		#else
+			static const size_type npos     = container_type::npos;      /// 'npos' means non-valid position or simply non-position.
+			static const size_type kMaxSize = container_type::kMaxSize;  /// -1 is reserved for 'npos'. It also happens to be slightly beneficial that kMaxSize is a value less than -1, as it helps us deal with potential integer wraparound issues.
+		#endif
+
+		enum
+		{
+			kBitCount = 8 * sizeof(Element)
+		};
+
+	protected:
+		container_type mContainer;
+		size_type      mFreeBitCount;      // Unused bits in the last word of mContainer.
+
+	public:
+		bitvector();
+		explicit bitvector(const allocator_type& allocator);
+		explicit bitvector(size_type n, const allocator_type& allocator = EASTL_BITVECTOR_DEFAULT_ALLOCATOR);
+		bitvector(size_type n, value_type value, const allocator_type& allocator = EASTL_BITVECTOR_DEFAULT_ALLOCATOR);
+		bitvector(const bitvector& copy);
+
+		template <typename InputIterator>
+		bitvector(InputIterator first, InputIterator last);
+
+		bitvector& operator=(const bitvector& x);
+		void swap(this_type& x);
+
+		template <typename InputIterator>
+		void assign(InputIterator first, InputIterator last);
+
+		iterator       begin() EA_NOEXCEPT;
+		const_iterator begin() const EA_NOEXCEPT;
+		const_iterator cbegin() const EA_NOEXCEPT;
+
+		iterator       end() EA_NOEXCEPT;
+		const_iterator end() const EA_NOEXCEPT;
+		const_iterator cend() const EA_NOEXCEPT;
+
+		reverse_iterator       rbegin() EA_NOEXCEPT;
+		const_reverse_iterator rbegin() const EA_NOEXCEPT;
+		const_reverse_iterator crbegin() const EA_NOEXCEPT;
+
+		reverse_iterator       rend() EA_NOEXCEPT;
+		const_reverse_iterator rend() const EA_NOEXCEPT;
+		const_reverse_iterator crend() const EA_NOEXCEPT;
+
+		bool      empty() const EA_NOEXCEPT;
+		size_type size() const EA_NOEXCEPT;
+		size_type capacity() const EA_NOEXCEPT;
+
+		void resize(size_type n, value_type value);
+		void resize(size_type n);
+		void reserve(size_type n);
+		void set_capacity(size_type n = npos);                  // Revises the capacity to the user-specified value. Resizes the container to match the capacity if the requested capacity n is less than the current size. If n == npos then the capacity is reallocated (if necessary) such that capacity == size.
+
+		void push_back();
+		void push_back(value_type value);
+		void pop_back();
+
+		reference       front();
+		const_reference front() const;
+		reference       back();
+		const_reference back() const;
+
+		bool            test(size_type n, bool defaultValue) const; // Returns true if the bit index is < size() and set. Returns defaultValue if the bit is >= size().
+		void            set(size_type n, bool value);               // Resizes the container to accomodate n if necessary. 
+
+		reference       at(size_type n);                    // throws an out_of_range exception if n is invalid.
+		const_reference at(size_type n) const;
+
+		reference       operator[](size_type n);            // behavior is undefined if n is invalid.
+		const_reference operator[](size_type n) const;
+
+		/*
+		Work in progress:
+		template <bool value = true> iterator find_first();                                 // Finds the lowest "on" bit.
+		template <bool value = true> iterator find_next(const_iterator it);                 // Finds the next lowest "on" bit after it.
+		template <bool value = true> iterator find_last();                                  // Finds the index of the last "on" bit, returns size if none are set.
+		template <bool value = true> iterator find_prev(const_iterator it);                 // Finds the index of the last "on" bit before last_find, returns size if none are set.
+
+		template <bool value = true> const_iterator find_first() const;                     // Finds the lowest "on" bit.
+		template <bool value = true> const_iterator find_next(const_iterator it) const;     // Finds the next lowest "on" bit after it.
+		template <bool value = true> const_iterator find_last() const;                      // Finds the index of the last "on" bit, returns size if none are set.
+		template <bool value = true> const_iterator find_prev(const_iterator it) const;     // Finds the index of the last "on" bit before last_find, returns size if none are set.
+		*/
+
+		element_type*       data() EA_NOEXCEPT;
+		const element_type* data() const EA_NOEXCEPT;
+		
+		iterator insert(const_iterator position, value_type value);
+		void     insert(const_iterator position, size_type n, value_type value);
+
+		// template <typename InputIterator> Not yet implemented. See below for disabled definition.
+		// void insert(const_iterator position, InputIterator first, InputIterator last);
+
+		iterator erase(const_iterator position);
+		iterator erase(const_iterator first, const_iterator last);
+
+		reverse_iterator erase(const_reverse_iterator position);
+		reverse_iterator erase(const_reverse_iterator first, const_reverse_iterator last);
+
+		void clear();
+		void reset_lose_memory(); // This is a unilateral reset to an initially empty state. No destructors are called, no deallocation occurs.
+
+		container_type&       get_container();
+		const container_type& get_container() const;
+
+		bool validate() const;
+		int  validate_iterator(const_iterator i) const;
+	};
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// bitvector_reference
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename Element>
+	bitvector_reference<Element>::bitvector_reference(Element* p, eastl_size_t i)
+	  : mpBitWord(p), 
+		mnBitIndex(i)
+	{
+	}
+
+
+	template <typename Element>
+	bitvector_reference<Element>&
+	bitvector_reference<Element>::operator=(bool value)
+	{
+		const Element mask = (Element)(Element(1) << mnBitIndex);
+
+		if(value)
+			*mpBitWord |= mask;
+		else
+			*mpBitWord &= ~mask;
+
+		return *this;
+	}
+
+
+	template <typename Element>
+	bitvector_reference<Element>&
+	bitvector_reference<Element>::operator=(const bitvector_reference& rhs)
+	{
+		return (*this = (bool)rhs);
+	}
+
+
+	template <typename Element>
+	void bitvector_reference<Element>::CopyFrom(const bitvector_reference& rhs)
+	{
+		mpBitWord  = rhs.mpBitWord;
+		mnBitIndex = rhs.mnBitIndex;
+	}
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// bitvector_const_iterator
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename Element>
+	bitvector_const_iterator<Element>::bitvector_const_iterator()
+		: mReference(0, 0)
+	{
+	}
+
+
+	template <typename Element>
+	bitvector_const_iterator<Element>::bitvector_const_iterator(const Element* p, eastl_size_t i)
+		: mReference(const_cast<Element*>(p), i) // const_cast is safe here because we never let mReference leak and we don't modify it.
+	{
+	}
+
+
+	template <typename Element>
+	bitvector_const_iterator<Element>::bitvector_const_iterator(const reference_type& reference)
+		: mReference(reference)
+	{
+	}
+
+
+	template <typename Element>
+	bitvector_const_iterator<Element>&
+	bitvector_const_iterator<Element>::operator++()
+	{
+		++mReference.mnBitIndex;
+
+		if(mReference.mnBitIndex == kBitCount)
+		{
+			++mReference.mpBitWord;
+			mReference.mnBitIndex = 0;
+		}
+
+		return *this;
+	}
+
+
+	template <typename Element>
+	bitvector_const_iterator<Element>&
+	bitvector_const_iterator<Element>::operator--()
+	{
+		if(mReference.mnBitIndex == 0)
+		{
+			--mReference.mpBitWord;
+			mReference.mnBitIndex = kBitCount;
+		}
+
+		--mReference.mnBitIndex;
+		return *this;
+	}
+
+
+	template <typename Element>
+	bitvector_const_iterator<Element>
+	bitvector_const_iterator<Element>::operator++(int)
+	{
+		bitvector_const_iterator copy(*this);
+		++*this;
+		return copy;
+	}
+
+
+	template <typename Element>
+	bitvector_const_iterator<Element>
+	bitvector_const_iterator<Element>::operator--(int)
+	{
+		bitvector_const_iterator copy(*this);
+		--*this;
+		return copy;
+	}
+
+
+	template <typename Element>
+	bitvector_const_iterator<Element>&
+	bitvector_const_iterator<Element>::operator+=(difference_type n)
+	{
+		n += mReference.mnBitIndex;
+
+		if(n >= difference_type(0))
+		{
+			mReference.mpBitWord  += n / kBitCount;
+			mReference.mnBitIndex  = (size_type)(n % kBitCount);
+		}
+		else
+		{
+			// backwards is tricky
+			// figure out how many full words backwards we need to move
+			// n = [-1..-32] => 1
+			// n = [-33..-64] => 2
+			const size_type backwards = (size_type)(-n + kBitCount - 1);
+			mReference.mpBitWord -= backwards / kBitCount;
+
+			// -1 => 31; backwards = 32; 31 - (backwards % 32) = 31
+			// -2 => 30; backwards = 33; 31 - (backwards % 32) = 30
+			// -3 => 29; backwards = 34
+			// ..
+			// -32 => 0; backwards = 63; 31 - (backwards % 32) = 0
+			// -33 => 31; backwards = 64; 31 - (backwards % 32) = 31
+			mReference.mnBitIndex = (kBitCount - 1) - (backwards % kBitCount);
+		}
+
+		return *this;
+	}
+
+
+	template <typename Element>
+	bitvector_const_iterator<Element>&
+	bitvector_const_iterator<Element>::operator-=(difference_type n)
+	{
+		return (*this += -n);
+	}
+
+
+	template <typename Element>
+	bitvector_const_iterator<Element>
+	bitvector_const_iterator<Element>::operator+(difference_type n) const
+	{
+		bitvector_const_iterator copy(*this);
+		copy += n;
+		return copy;
+	}
+
+	
+	template <typename Element>
+	bitvector_const_iterator<Element>
+	bitvector_const_iterator<Element>::operator-(difference_type n) const
+	{
+		bitvector_const_iterator copy(*this);
+		copy -= n;
+		return copy;
+	}
+
+
+	template <typename Element>
+	typename bitvector_const_iterator<Element>::difference_type
+	bitvector_const_iterator<Element>::operator-(const this_type& rhs) const
+	{
+		return ((mReference.mpBitWord - rhs.mReference.mpBitWord) * kBitCount) + mReference.mnBitIndex - rhs.mReference.mnBitIndex;
+	}
+
+
+	template <typename Element>
+	bool bitvector_const_iterator<Element>::operator==(const this_type& rhs) const
+	{
+		return (mReference.mpBitWord == rhs.mReference.mpBitWord) && (mReference.mnBitIndex == rhs.mReference.mnBitIndex);
+	}
+
+
+	template <typename Element>
+	bool bitvector_const_iterator<Element>::operator!=(const this_type& rhs) const
+	{
+		return !(*this == rhs);
+	}
+
+
+	template <typename Element>
+	bool bitvector_const_iterator<Element>::operator<(const this_type& rhs) const
+	{
+		return (mReference.mpBitWord < rhs.mReference.mpBitWord) || 
+			   ((mReference.mpBitWord == rhs.mReference.mpBitWord) && (mReference.mnBitIndex < rhs.mReference.mnBitIndex));
+	}
+
+
+	template <typename Element>
+	bool bitvector_const_iterator<Element>::operator<=(const this_type& rhs) const
+	{
+		return (mReference.mpBitWord < rhs.mReference.mpBitWord) || 
+			   ((mReference.mpBitWord == rhs.mReference.mpBitWord) && (mReference.mnBitIndex <= rhs.mReference.mnBitIndex));
+	}
+
+
+	template <typename Element>
+	bool bitvector_const_iterator<Element>::operator>(const this_type& rhs) const
+	{
+		return !(*this <= rhs);
+	}
+
+
+	template <typename Element>
+	bool bitvector_const_iterator<Element>::operator>=(const this_type& rhs) const
+	{
+		return !(*this < rhs);
+	}
+
+
+	template <typename Element>
+	bool bitvector_const_iterator<Element>::operator*() const
+	{
+		return mReference;
+	}
+
+
+	template <typename Element>
+	bool bitvector_const_iterator<Element>::operator[](difference_type n) const
+	{
+		return *(*this + n);
+	}
+
+
+	template <typename Element>
+	bitvector_const_iterator<Element>& bitvector_const_iterator<Element>::operator= (const this_type& rhs)
+	{
+		mReference.CopyFrom(rhs.mReference);
+		return *this;
+	}
+
+
+	template <typename Element>
+	int bitvector_const_iterator<Element>::validate(const Element* pStart, const Element* pEnd, eastl_size_t nExtraBits) const
+	{
+		const Element* const pCurrent = mReference.mpBitWord;
+
+		if(pCurrent >= pStart)
+		{
+			if(nExtraBits == 0)
+			{
+				if(pCurrent == pEnd && mReference)
+					return eastl::isf_valid | eastl::isf_current;
+				else if(pCurrent < pEnd)
+					return eastl::isf_valid | eastl::isf_current | eastl::isf_can_dereference;
+			}
+			else if(pCurrent == (pEnd - 1))
+			{
+				const size_type bit     = mReference.mnBitIndex;
+				const size_type lastbit = kBitCount - nExtraBits;
+				
+				if(bit == lastbit)
+					return eastl::isf_valid | eastl::isf_current;
+				else if(bit < lastbit)
+					return eastl::isf_valid | eastl::isf_current | eastl::isf_can_dereference;
+			}
+			else if(pCurrent < pEnd)
+			{
+				return eastl::isf_valid | eastl::isf_current | eastl::isf_can_dereference;
+			}
+		}
+
+		return eastl::isf_none;
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// bitvector_iterator
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename Element>
+	bitvector_iterator<Element>::bitvector_iterator()
+		: base_type()
+	{
+	}
+
+	template <typename Element>
+	bitvector_iterator<Element>::bitvector_iterator(Element* p, eastl_size_t i)
+		: base_type(p, i)
+	{
+	}
+
+
+	template <typename Element>
+	bitvector_iterator<Element>::bitvector_iterator(reference_type& reference)
+		: base_type(reference)
+	{
+	}
+
+
+	template <typename Element>
+	typename bitvector_iterator<Element>::reference_type
+	bitvector_iterator<Element>::operator*() const
+	{
+		return base_type::mReference;
+	}
+
+
+	template <typename Element>
+	typename bitvector_iterator<Element>::reference_type
+	bitvector_iterator<Element>::operator[](difference_type n) const
+	{
+		return *(*this + n);
+	}
+
+
+	template <typename Element>
+	void MoveBits(bitvector_iterator<Element> start, 
+				  bitvector_iterator<Element> end, 
+				  bitvector_iterator<Element> dest)
+	{
+		// Slow implemenation; could optimize by moving a word at a time.
+		if(dest <= start)
+		{
+			while(start != end)
+			{
+				*dest = *start;
+				++dest;
+				++start;
+			}
+		}
+		else
+		{
+			// Need to move backwards
+			dest += (end - start);
+
+			while(start != end)
+			{
+				--dest;
+				--end;
+				*dest = *end;
+			}
+		}
+	}
+
+
+	template <typename Element>
+	bitvector_iterator<Element>
+	bitvector_iterator<Element>::operator++(int)
+	{
+		bitvector_iterator copy(*this);
+		++*this;
+		return copy;
+	}
+
+
+	template <typename Element>
+	bitvector_iterator<Element>
+	bitvector_iterator<Element>::operator--(int)
+	{
+		bitvector_iterator copy(*this);
+		--*this;
+		return copy;
+	}
+
+
+	template <typename Element>
+	bitvector_iterator<Element>
+	bitvector_iterator<Element>::operator+(difference_type n) const
+	{
+		bitvector_iterator copy(*this);
+		copy += n;
+		return copy;
+	}
+
+	
+	template <typename Element>
+	bitvector_iterator<Element>
+	bitvector_iterator<Element>::operator-(difference_type n) const
+	{
+		bitvector_iterator copy(*this);
+		copy -= n;
+		return copy;
+	}
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// bitvector
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename Allocator, typename Element, typename Container>
+	template <typename InputIterator>
+	void bitvector<Allocator, Element, Container>::assign(InputIterator first, InputIterator last)
+	{
+		// To consider: We can maybe specialize this on bitvector_iterator to do a fast bitwise copy.
+		// We can also specialize for random access iterators to figure out the size & reserve first.
+
+		clear();
+
+		while(first != last)
+		{
+			push_back(*first);
+			++first;
+		}
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	typename bitvector<Allocator, Element, Container>::iterator
+	bitvector<Allocator, Element, Container>::begin() EA_NOEXCEPT
+	{
+		return iterator(mContainer.begin(), 0);
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	typename bitvector<Allocator, Element, Container>::const_iterator
+	bitvector<Allocator, Element, Container>::begin() const EA_NOEXCEPT
+	{
+		return const_iterator(mContainer.begin(), 0);
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	typename bitvector<Allocator, Element, Container>::const_iterator
+	bitvector<Allocator, Element, Container>::cbegin() const EA_NOEXCEPT
+	{
+		return const_iterator(mContainer.begin(), 0);
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	typename bitvector<Allocator, Element, Container>::iterator
+	bitvector<Allocator, Element, Container>::end() EA_NOEXCEPT
+	{
+		return iterator(mContainer.end(), 0) - mFreeBitCount;
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	typename bitvector<Allocator, Element, Container>::const_iterator
+	bitvector<Allocator, Element, Container>::end() const EA_NOEXCEPT
+	{
+		return const_iterator(mContainer.end(), 0) - mFreeBitCount;
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	typename bitvector<Allocator, Element, Container>::const_iterator
+	bitvector<Allocator, Element, Container>::cend() const EA_NOEXCEPT
+	{
+		return const_iterator(mContainer.end(), 0) - mFreeBitCount;
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	bool bitvector<Allocator, Element, Container>::empty() const EA_NOEXCEPT
+	{
+		return mContainer.empty();
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	typename bitvector<Allocator, Element, Container>::size_type
+	bitvector<Allocator, Element, Container>::size() const EA_NOEXCEPT
+	{
+		return (mContainer.size() * kBitCount) - mFreeBitCount;
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	typename bitvector<Allocator, Element, Container>::size_type
+	bitvector<Allocator, Element, Container>::capacity() const EA_NOEXCEPT
+	{
+		return mContainer.capacity() * kBitCount;
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	void bitvector<Allocator, Element, Container>::set_capacity(size_type n)
+	{
+		if(n == npos)
+			mContainer.set_capacity(npos);
+		else
+			mContainer.set_capacity((n + kBitCount - 1) / kBitCount);
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	typename bitvector<Allocator, Element, Container>::reverse_iterator
+	bitvector<Allocator, Element, Container>::rbegin() EA_NOEXCEPT
+	{
+		return reverse_iterator(end());
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	typename bitvector<Allocator, Element, Container>::const_reverse_iterator
+	bitvector<Allocator, Element, Container>::rbegin() const EA_NOEXCEPT
+	{
+		return const_reverse_iterator(end());
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	typename bitvector<Allocator, Element, Container>::const_reverse_iterator
+	bitvector<Allocator, Element, Container>::crbegin() const EA_NOEXCEPT
+	{
+		return const_reverse_iterator(end());
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	typename bitvector<Allocator, Element, Container>::reverse_iterator
+	bitvector<Allocator, Element, Container>::rend() EA_NOEXCEPT
+	{
+		return reverse_iterator(begin());
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	typename bitvector<Allocator, Element, Container>::const_reverse_iterator
+	bitvector<Allocator, Element, Container>::rend() const EA_NOEXCEPT
+	{
+		return const_reverse_iterator(begin());
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	typename bitvector<Allocator, Element, Container>::const_reverse_iterator
+	bitvector<Allocator, Element, Container>::crend() const EA_NOEXCEPT
+	{
+		return const_reverse_iterator(begin());
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	typename bitvector<Allocator, Element, Container>::reference
+	bitvector<Allocator, Element, Container>::front()
+	{
+		EASTL_ASSERT(!empty());
+		return reference(&mContainer[0], 0);
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	typename bitvector<Allocator, Element, Container>::const_reference
+	bitvector<Allocator, Element, Container>::front() const
+	{
+		EASTL_ASSERT(!empty());
+
+		// To consider: make a better solution to this than const_cast.
+		return reference(const_cast<Element*>(&mContainer[0]), 0);
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	typename bitvector<Allocator, Element, Container>::reference
+	bitvector<Allocator, Element, Container>::back()
+	{
+		EASTL_ASSERT(!empty());
+		return *(--end());
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	typename bitvector<Allocator, Element, Container>::const_reference
+	bitvector<Allocator, Element, Container>::back() const
+	{
+		EASTL_ASSERT(!empty());
+		return *(--end());
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	void bitvector<Allocator, Element, Container>::push_back()
+	{
+		if(!mFreeBitCount)
+		{
+			mContainer.push_back();
+			mFreeBitCount = kBitCount;
+		}
+
+		--mFreeBitCount;
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	void bitvector<Allocator, Element, Container>::push_back(value_type value)
+	{
+		push_back();
+		*--end() = value;
+	}
+	
+
+	template <typename Allocator, typename Element, typename Container>
+	void bitvector<Allocator, Element, Container>::pop_back()
+	{
+		EASTL_ASSERT(!empty());
+
+		if(++mFreeBitCount == kBitCount)
+		{
+			mContainer.pop_back();
+			mFreeBitCount = 0;
+		}
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	void bitvector<Allocator, Element, Container>::reserve(size_type n)
+	{
+		const size_type wordCount = (n + kBitCount - 1) / kBitCount;
+		mContainer.reserve(wordCount);
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	void bitvector<Allocator, Element, Container>::resize(size_type n)
+	{
+		const size_type wordCount = (n + kBitCount - 1) / kBitCount;
+		const size_type extra     = (wordCount * kBitCount) - n;
+
+		mContainer.resize(wordCount);
+		mFreeBitCount = extra;
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	void bitvector<Allocator, Element, Container>::resize(size_type n, value_type value)
+	{
+		const size_type s = size();
+		if(n < s)
+			resize(n);
+
+		// Fill up to the end of a word
+		size_type newbits = n - s;
+
+		while(mFreeBitCount && newbits)
+		{
+			push_back(value);
+			--newbits;
+		}
+
+		// Fill the rest a word at a time
+		if(newbits)
+		{
+			element_type element(0);
+			if(value)
+				element = ~element;
+
+			const size_type words = (n + kBitCount - 1) / kBitCount;
+			const size_type extra = words * kBitCount - n;
+			mContainer.resize(words, element);
+			mFreeBitCount = extra;
+		}
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	bool bitvector<Allocator, Element, Container>::test(size_type n, bool defaultValue) const
+	{
+		if(n < size())
+			return *(begin() + (difference_type)n);
+
+		return defaultValue;
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	void bitvector<Allocator, Element, Container>::set(size_type n, bool value)
+	{
+		if(EASTL_UNLIKELY(n >= size()))
+			resize(n + 1);
+
+		*(begin() + (difference_type)n) = value;
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	typename bitvector<Allocator, Element, Container>::reference
+	bitvector<Allocator, Element, Container>::at(size_type n)
+	{
+		// The difference between at and operator[] is that at signals 
+		// if the requested position is out of range by throwing an 
+		// out_of_range exception.
+
+		#if EASTL_EXCEPTIONS_ENABLED
+			if(EASTL_UNLIKELY(n >= size()))
+				throw std::out_of_range("bitvector::at -- out of range");
+		#elif EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY(n >= size()))
+				EASTL_FAIL_MSG("bitvector::at -- out of range");
+		#endif
+		
+		return *(begin() + (difference_type)n);
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	typename bitvector<Allocator, Element, Container>::const_reference
+	bitvector<Allocator, Element, Container>::at(size_type n) const
+	{
+		#if EASTL_EXCEPTIONS_ENABLED
+			if(EASTL_UNLIKELY(n >= size()))
+				throw std::out_of_range("bitvector::at -- out of range");
+		#elif EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY(n >= size()))
+				EASTL_FAIL_MSG("bitvector::at -- out of range");
+		#endif
+		
+		return *(begin() + (difference_type)n);
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	typename bitvector<Allocator, Element, Container>::reference
+	bitvector<Allocator, Element, Container>::operator[](size_type n)
+	{
+		return *(begin() + (difference_type)n);
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	typename bitvector<Allocator, Element, Container>::const_reference
+	bitvector<Allocator, Element, Container>::operator[](size_type n) const
+	{
+		return *(begin() + (difference_type)n);
+	}
+
+
+/*
+	template <typename Allocator, typename Element, typename Container>
+	template <bool value>
+	typename bitvector<Allocator, Element, Container>::iterator 
+	bitvector<Allocator, Element, Container>::find_first()
+	{
+		return begin();
+	}
+
+	template <bool value> iterator find_next(const_iterator it);
+	template <bool value> iterator find_last();
+	template <bool value> iterator find_prev(const_iterator it);
+						
+	template <bool value> const_iterator find_first() const;
+	template <bool value> const_iterator find_next(const_iterator it) const;
+	template <bool value> const_iterator find_last() const;
+	template <bool value> const_iterator find_prev(const_iterator it) const;
+*/
+
+
+
+
+	template <typename Allocator, typename Element, typename Container>
+	inline typename bitvector<Allocator, Element, Container>::container_type&
+	bitvector<Allocator, Element, Container>::get_container()
+	{
+		return mContainer;
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	inline const typename bitvector<Allocator, Element, Container>::container_type&
+	bitvector<Allocator, Element, Container>::get_container() const
+	{
+		return mContainer;
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	bool bitvector<Allocator, Element, Container>::validate() const
+	{
+		if(!mContainer.validate())
+			return false;
+
+		if((unsigned)mFreeBitCount >= kBitCount)
+			return false;
+
+		return true;
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	int bitvector<Allocator, Element, Container>::validate_iterator(const_iterator i) const
+	{
+		return i.validate(mContainer.begin(), mContainer.end(), mFreeBitCount);
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	typename bitvector<Allocator, Element, Container>::element_type*
+	bitvector<Allocator, Element, Container>::data() EA_NOEXCEPT
+	{
+		return mContainer.data();
+	}
+	
+
+	template <typename Allocator, typename Element, typename Container>
+	const typename bitvector<Allocator, Element, Container>::element_type*
+	bitvector<Allocator, Element, Container>::data() const EA_NOEXCEPT
+	{
+		return mContainer.data();
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	typename bitvector<Allocator, Element, Container>::iterator
+	bitvector<Allocator, Element, Container>::insert(const_iterator position, value_type value)
+	{
+		iterator iPosition(position.get_reference_type()); // This is just a non-const version of position.
+
+		#if EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY(validate_iterator(iPosition) & eastl::isf_valid) == 0)
+				EASTL_FAIL_MSG("bitvector::insert -- invalid iterator");
+		#endif
+
+		// Save because we might reallocate
+		const typename iterator::difference_type n = iPosition - begin();
+		push_back();
+		iPosition = begin() + n;
+
+		MoveBits(iPosition, --end(), ++iterator(iPosition));
+		*iPosition = value;
+
+		return iPosition;
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	void bitvector<Allocator, Element, Container>::insert(const_iterator position, size_type n, value_type value)
+	{
+		iterator iPosition(position.get_reference_type()); // This is just a non-const version of position.
+
+		#if EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY(validate_iterator(iPosition) & eastl::isf_valid) == 0)
+				EASTL_FAIL_MSG("bitvector::insert -- invalid iterator");
+		#endif
+
+		// Save because we might reallocate.
+		const typename iterator::difference_type p = iPosition - begin();
+		resize(size() + n);
+		iPosition = begin() + p;
+
+		iterator insert_end = iPosition + n;
+		MoveBits(iPosition, end() - n, insert_end);
+
+		// To do: Optimize this to word-at-a-time for large inserts
+		while(iPosition != insert_end)
+		{
+			*iPosition = value;
+			++iPosition;
+		}
+	}
+
+
+	/*
+	The following is a placeholder for a future implementation. It turns out that a correct implementation of 
+	insert(pos, first, last) is a non-trivial exercise that would take a few hours to implement and test. 
+	The reasons why involve primarily the problem of handling the case where insertion source comes from 
+	within the container itself, and the case that first and last (note they are templated) might not refer 
+	to iterators might refer to a value/count pair. The C++ Standard requires you to handle this case and 
+	I (Paul Pedriana) believe that it applies even for a bitvector, given that bool is an integral type. 
+	So you have to set up a compile-time type traits function chooser. See vector, for example.
+
+	template <typename Allocator, typename Element, typename Container>
+	template <typename InputIterator>
+	void bitvector<Allocator, Element, Container>::insert(const_iterator position, InputIterator first, InputIterator last)
+	{
+		iterator iPosition(position.get_reference_type()); // This is just a non-const version of position.
+
+		// This implementation is probably broken due to not handling insertion into self.
+		// To do: Make a more efficient version of this.
+		difference_type distance = (iPosition - begin());
+
+		while(first != last)
+		{
+			insert(iPosition, *first);
+			iPosition = begin() + ++distance;
+			++first;
+		}
+	}
+	*/
+
+
+	template <typename Allocator, typename Element, typename Container>
+	typename bitvector<Allocator, Element, Container>::iterator
+	bitvector<Allocator, Element, Container>::erase(const_iterator position)
+	{
+		iterator iPosition(position.get_reference_type()); // This is just a non-const version of position.
+
+		#if EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY(validate_iterator(iPosition) & eastl::isf_can_dereference) == 0)
+				EASTL_FAIL_MSG("bitvector::erase -- invalid iterator");
+		#endif
+
+		MoveBits(++iterator(iPosition), end(), iPosition);
+		resize(size() - 1);
+
+		// Verify that no reallocation occurred.
+		EASTL_ASSERT(validate_iterator(iPosition) & eastl::isf_valid);
+		return iPosition;
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	typename bitvector<Allocator, Element, Container>::iterator
+	bitvector<Allocator, Element, Container>::erase(const_iterator first, const_iterator last)
+	{
+		iterator iFirst(first.get_reference_type()); // This is just a non-const version of first.
+		iterator iLast(last.get_reference_type());   // This is just a non-const version of last.
+
+		#if EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY(validate_iterator(iLast) & eastl::isf_valid) == 0)
+				EASTL_FAIL_MSG("bitvector::erase -- invalid iterator");
+		#endif
+
+		if(!(iFirst == iLast))
+		{
+			#if EASTL_ASSERT_ENABLED
+				if(EASTL_UNLIKELY(validate_iterator(iFirst) & eastl::isf_can_dereference) == 0)
+					EASTL_FAIL_MSG("bitvector::erase -- invalid iterator");
+			#endif
+
+			const size_type eraseCount = (size_type)(iLast - iFirst);
+			MoveBits(iLast, end(), iFirst);
+			resize(size() - eraseCount);
+
+			// Verify that no reallocation occurred.
+			#if EASTL_ASSERT_ENABLED
+				if(EASTL_UNLIKELY(validate_iterator(iFirst) & eastl::isf_valid) == 0)
+					EASTL_FAIL_MSG("bitvector::erase -- invalid iterator");
+			#endif
+		}
+
+		return iFirst;
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	typename bitvector<Allocator, Element, Container>::reverse_iterator
+	bitvector<Allocator, Element, Container>::erase(const_reverse_iterator position)
+	{
+		return reverse_iterator(erase((++position).base()));
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	typename bitvector<Allocator, Element, Container>::reverse_iterator
+	bitvector<Allocator, Element, Container>::erase(const_reverse_iterator first, const_reverse_iterator last)
+	{
+		// Version which erases in order from first to last.
+		// difference_type i(first.base() - last.base());
+		// while(i--)
+		//     first = erase(first);
+		// return first;
+
+		// Version which erases in order from last to first, but is slightly more efficient:
+		return reverse_iterator(erase(last.base(), first.base()));
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	void bitvector<Allocator, Element, Container>::swap(this_type& rhs)
+	{
+		mContainer.swap(rhs.mContainer);
+		eastl::swap(mFreeBitCount, rhs.mFreeBitCount);
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	void bitvector<Allocator, Element, Container>::reset_lose_memory()
+	{
+		mContainer.reset_lose_memory(); // intentional memory leak.
+		mFreeBitCount = 0;
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	void bitvector<Allocator, Element, Container>::clear()
+	{
+		mContainer.clear();
+		mFreeBitCount = 0;
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	bitvector<Allocator, Element, Container>&
+	bitvector<Allocator, Element, Container>::operator=(const bitvector& rhs)
+	{
+		// The following is OK if (&rhs == this)
+		mContainer = rhs.mContainer;
+		mFreeBitCount = rhs.mFreeBitCount;
+
+		return *this;
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	bitvector<Allocator, Element, Container>::bitvector()
+	  : mContainer(), 
+		mFreeBitCount(0)
+	{
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	bitvector<Allocator, Element, Container>::bitvector(const allocator_type& allocator)
+	  : mContainer(allocator), 
+		mFreeBitCount(0)
+	{
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	bitvector<Allocator, Element, Container>::bitvector(size_type n, const allocator_type& allocator)
+	   : mContainer((n + kBitCount - 1) / kBitCount, allocator)
+	{
+		mFreeBitCount = kBitCount - (n % kBitCount);
+
+		if(mFreeBitCount == kBitCount)
+			mFreeBitCount = 0;
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	bitvector<Allocator, Element, Container>::bitvector(size_type n, value_type value, const allocator_type& allocator)
+	  : mContainer((n + kBitCount - 1) / kBitCount, value ? ~element_type(0) : element_type(0), allocator)
+	{
+		mFreeBitCount = kBitCount - (n % kBitCount);
+
+		if(mFreeBitCount == kBitCount)
+			mFreeBitCount = 0;
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	bitvector<Allocator, Element, Container>::bitvector(const bitvector& copy)
+	  : mContainer(copy.mContainer), 
+		mFreeBitCount(copy.mFreeBitCount)
+	{
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	template <typename InputIterator>
+	bitvector<Allocator, Element, Container>::bitvector(InputIterator first, InputIterator last)
+	  : mContainer(), 
+		mFreeBitCount(0)
+	{
+		assign(first, last);
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// global operators
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename Allocator, typename Element, typename Container>
+	inline bool operator==(const bitvector<Allocator, Element, Container>& a, 
+						   const bitvector<Allocator, Element, Container>& b)
+	{
+		// To do: Replace this with a smart compare implementation. This is much slower than it needs to be.
+		return ((a.size() == b.size()) && eastl::equal(a.begin(), a.end(), b.begin()));
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	inline bool operator!=(const bitvector<Allocator, Element, Container>& a, 
+						   const bitvector<Allocator, Element, Container>& b)
+	{
+		return !operator==(a, b);
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	inline bool operator<(const bitvector<Allocator, Element, Container>& a, 
+						  const bitvector<Allocator, Element, Container>& b)
+	{
+		// To do: Replace this with a smart compare implementation. This is much slower than it needs to be.
+		return eastl::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end());
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	inline bool operator>(const bitvector<Allocator, Element, Container>& a, 
+						  const bitvector<Allocator, Element, Container>& b)
+	{
+		return b < a;
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	inline bool operator<=(const bitvector<Allocator, Element, Container>& a, 
+						   const bitvector<Allocator, Element, Container>& b)
+	{
+		return !(b < a);
+	}
+
+
+	template <typename Allocator, typename Element, typename Container>
+	inline bool operator>=(const bitvector<Allocator, Element, Container>& a, 
+						   const bitvector<Allocator, Element, Container>& b)
+	{
+		return !(a < b);
+	}
+
+	template <typename Allocator, typename Element, typename Container>
+	inline void swap(bitvector<Allocator, Element, Container>& a,
+					 bitvector<Allocator, Element, Container>& b)
+	{
+		a.swap(b);
+	}
+
+
+} // namespace eastl
+
+
+EA_RESTORE_VC_WARNING();
+
+#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/bonus/adaptors.h b/libkram/eastl/include/EASTL/bonus/adaptors.h
new file mode 100644
index 00000000..423cacdd
--- /dev/null
+++ b/libkram/eastl/include/EASTL/bonus/adaptors.h
@@ -0,0 +1,88 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ADAPTORS_H
+#define EASTL_ADAPTORS_H
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/internal/move_help.h>
+#include <EASTL/type_traits.h>
+#include <EASTL/iterator.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+EA_DISABLE_VC_WARNING(4512 4626)
+#if defined(_MSC_VER) && (_MSC_VER >= 1900) // VS2015+
+	EA_DISABLE_VC_WARNING(5027) // move assignment operator was implicitly defined as deleted
+#endif
+
+
+namespace eastl
+{
+	/// reverse
+	///
+	/// This adaptor allows reverse iteration of a container in ranged base for-loops.
+	///
+	/// for (auto& i : reverse(c)) { ... }
+	///
+	template <typename Container>
+	struct reverse_wrapper
+	{
+		template <typename C>
+		reverse_wrapper(C&& c)
+			: mContainer(eastl::forward<C>(c))
+		{
+			/**
+			 * NOTE:
+			 *
+			 * Due to reference collapsing rules of universal references Container type is either
+			 *
+			 * const C&  if the input is a const lvalue
+			 * C&        if the input is a non-const lvalue
+			 * C         if the input is an rvalue
+			 * const C   if the input is a const rvalue thus the object will have to be copied and the copy-ctor will be called
+			 *
+			 *
+			 * Thus we either move the whole container into this object or take a reference to the lvalue avoiding the copy.
+			 * The static_assert below ensures this.
+			 */
+			static_assert(eastl::is_same_v<C, Container>, "Reference collapsed deduced type must be the same as the deduced Container type!");
+		}
+
+		Container mContainer;
+	};
+
+	template <typename Container>
+	auto begin(const reverse_wrapper<Container>& w) -> decltype(eastl::rbegin(w.mContainer))
+	{
+		return eastl::rbegin(w.mContainer);
+	}
+
+	template <typename Container>
+	auto end(const reverse_wrapper<Container>& w) -> decltype(eastl::rend(w.mContainer))
+	{
+		return eastl::rend(w.mContainer);
+	}
+
+	template <typename Container>
+	reverse_wrapper<Container> reverse(Container&& c)
+	{
+		return reverse_wrapper<Container>(eastl::forward<Container>(c));
+	}
+
+} // namespace eastl
+
+#if defined(_MSC_VER) && (_MSC_VER >= 1900) // VS2015+
+	EA_RESTORE_VC_WARNING()
+#endif
+EA_RESTORE_VC_WARNING()
+
+#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/bonus/call_traits.h b/libkram/eastl/include/EASTL/bonus/call_traits.h
new file mode 100644
index 00000000..0995d051
--- /dev/null
+++ b/libkram/eastl/include/EASTL/bonus/call_traits.h
@@ -0,0 +1,117 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// The design for call_traits here is very similar to that found in template
+// metaprogramming libraries such as Boost, GCC, and Metrowerks, given that  
+// these libraries have established this interface as a defacto standard for 
+// solving this problem. Also, these are described in various books on the 
+// topic of template metaprogramming, such as "Modern C++ Design".
+//
+// See http://www.boost.org/libs/utility/call_traits.htm or search for 
+// call_traits in Google for a description of call_traits.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_CALL_TRAITS_H
+#define EASTL_CALL_TRAITS_H
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/type_traits.h>     
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+
+
+	template <typename T, bool small_>
+	struct ct_imp2 { typedef const T& param_type; };
+
+	template <typename T>
+	struct ct_imp2<T, true> { typedef const T param_type; };
+
+	template <typename T, bool isp, bool b1>
+	struct ct_imp { typedef const T& param_type; };
+
+	template <typename T, bool isp>
+	struct ct_imp<T, isp, true> { typedef typename ct_imp2<T, sizeof(T) <= sizeof(void*)>::param_type param_type; };
+
+	template <typename T, bool b1>
+	struct ct_imp<T, true, b1> { typedef T const param_type; };
+
+
+
+	template <typename T>
+	struct call_traits
+	{
+	public:
+		typedef T        value_type;
+		typedef T&       reference;
+		typedef const T& const_reference;
+		typedef typename ct_imp<T, is_pointer<T>::value, is_arithmetic<T>::value>::param_type param_type;
+	};
+
+
+	template <typename T>
+	struct call_traits<T&>
+	{
+		typedef T&       value_type;
+		typedef T&       reference;
+		typedef const T& const_reference;
+		typedef T&       param_type;
+	};
+
+
+	template <typename T, size_t N>
+	struct call_traits<T [N]>
+	{
+	private:
+		typedef T array_type[N];
+
+	public:
+		typedef const T*          value_type;
+		typedef array_type&       reference;
+		typedef const array_type& const_reference;
+		typedef const T* const    param_type;
+	};
+
+
+	template <typename T, size_t N>
+	struct call_traits<const T [N]>
+	{
+	private:
+		typedef const T array_type[N];
+
+	public:
+		typedef const T*          value_type;
+		typedef array_type&       reference;
+		typedef const array_type& const_reference;
+		typedef const T* const    param_type;
+	};
+
+
+} // namespace eastl
+
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/bonus/compressed_pair.h b/libkram/eastl/include/EASTL/bonus/compressed_pair.h
new file mode 100644
index 00000000..379642ba
--- /dev/null
+++ b/libkram/eastl/include/EASTL/bonus/compressed_pair.h
@@ -0,0 +1,460 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// The compressed pair class is very similar to std::pair, but if either of the 
+// template arguments are empty classes, then the "empty base-class optimization" 
+// is applied to compress the size of the pair.
+//
+// The design for compressed_pair here is very similar to that found in template
+// metaprogramming libraries such as Boost, GCC, and Metrowerks, given that  
+// these libraries have established this interface as a defacto standard for 
+// solving this problem. Also, these are described in various books on the 
+// topic of template metaprogramming, such as "Modern C++ Design".
+// 
+// template <typename T1, typename T2>
+// class compressed_pair
+// {
+// public:
+//     typedef T1                                                 first_type;
+//     typedef T2                                                 second_type;
+//     typedef typename call_traits<first_type>::param_type       first_param_type;
+//     typedef typename call_traits<second_type>::param_type      second_param_type;
+//     typedef typename call_traits<first_type>::reference        first_reference;
+//     typedef typename call_traits<second_type>::reference       second_reference;
+//     typedef typename call_traits<first_type>::const_reference  first_const_reference;
+//     typedef typename call_traits<second_type>::const_reference second_const_reference;
+// 
+//     compressed_pair() : base() {}
+//     compressed_pair(first_param_type x, second_param_type y);
+//     explicit compressed_pair(first_param_type x);
+//     explicit compressed_pair(second_param_type y);
+// 
+//     compressed_pair& operator=(const compressed_pair&);
+// 
+//     first_reference       first();
+//     first_const_reference first() const;
+// 
+//     second_reference       second();
+//     second_const_reference second() const;
+// 
+//     void swap(compressed_pair& y);
+// };
+// 
+// The two members of the pair can be accessed using the member functions first() 
+// and second(). Note that not all member functions can be instantiated for all 
+// template parameter types. In particular compressed_pair can be instantiated for 
+// reference and array types, however in these cases the range of constructors that 
+// can be used are limited. If types T1 and T2 are the same type, then there is 
+// only one version of the single-argument constructor, and this constructor 
+// initialises both values in the pair to the passed value.
+// 
+// Note that compressed_pair can not be instantiated if either of the template 
+// arguments is a union type, unless there is compiler support for is_union, 
+// or if is_union is specialised for the union type.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_COMPRESSED_PAIR_H
+#define EASTL_COMPRESSED_PAIR_H
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/type_traits.h>     
+#include <EASTL/bonus/call_traits.h>     
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+#if defined(_MSC_VER) && (_MSC_VER >= 1900)  // VS2015 or later
+	EA_DISABLE_VC_WARNING(4626 5027) // warning C4626: 'eastl::compressed_pair_imp<T1,T2,0>': assignment operator was implicitly defined as deleted because a base class assignment operator is inaccessible or deleted
+#endif
+
+namespace eastl
+{
+
+	template <typename T1, typename T2>
+	class compressed_pair;
+
+
+	template <typename T1, typename T2, bool isSame, bool firstEmpty, bool secondEmpty>
+	struct compressed_pair_switch;
+
+	template <typename T1, typename T2>
+	struct compressed_pair_switch<T1, T2, false, false, false>{ static const int value = 0; };
+
+	template <typename T1, typename T2>
+	struct compressed_pair_switch<T1, T2, false, true, false> { static const int value = 1; };
+
+	template <typename T1, typename T2>
+	struct compressed_pair_switch<T1, T2, false, false, true> { static const int value = 2; };
+
+	template <typename T1, typename T2>
+	struct compressed_pair_switch<T1, T2, false, true, true>  { static const int value = 3; };
+
+	template <typename T1, typename T2>
+	struct compressed_pair_switch<T1, T2, true, true, true>   { static const int value = 4; };
+
+	template <typename T1, typename T2>
+	struct compressed_pair_switch<T1, T2, true, false, false> { static const int value = 5; };
+
+	template <typename T1, typename T2, int version>
+	class compressed_pair_imp;
+
+
+
+	template <typename T>
+	inline void cp_swap(T& t1, T& t2)
+	{
+		T tTemp = t1;
+		t1 = t2;
+		t2 = tTemp;
+	}
+
+
+	// Derive from neither
+	template <typename T1, typename T2>
+	class compressed_pair_imp<T1, T2, 0>
+	{
+	public:
+		typedef T1                                                 first_type;
+		typedef T2                                                 second_type;
+		typedef typename call_traits<first_type>::param_type       first_param_type;
+		typedef typename call_traits<second_type>::param_type      second_param_type;
+		typedef typename call_traits<first_type>::reference        first_reference;
+		typedef typename call_traits<second_type>::reference       second_reference;
+		typedef typename call_traits<first_type>::const_reference  first_const_reference;
+		typedef typename call_traits<second_type>::const_reference second_const_reference;
+
+		compressed_pair_imp() {} 
+
+		compressed_pair_imp(first_param_type x, second_param_type y)
+			: mFirst(x), mSecond(y) {}
+
+		compressed_pair_imp(first_param_type x)
+			: mFirst(x) {}
+
+		compressed_pair_imp(second_param_type y)
+			: mSecond(y) {}
+
+		first_reference       first()       { return mFirst; }
+		first_const_reference first() const { return mFirst; }
+
+		second_reference       second()       { return mSecond; }
+		second_const_reference second() const { return mSecond; }
+
+		void swap(compressed_pair<T1, T2>& y)
+		{
+			cp_swap(mFirst, y.first());
+			cp_swap(mSecond, y.second());
+		}
+
+	private:
+		first_type  mFirst;
+		second_type mSecond;
+	};
+
+
+	// Derive from T1
+	template <typename T1, typename T2>
+	class compressed_pair_imp<T1, T2, 1> : private T1
+	{
+	public:
+		typedef T1                                                 first_type;
+		typedef T2                                                 second_type;
+		typedef typename call_traits<first_type>::param_type       first_param_type;
+		typedef typename call_traits<second_type>::param_type      second_param_type;
+		typedef typename call_traits<first_type>::reference        first_reference;
+		typedef typename call_traits<second_type>::reference       second_reference;
+		typedef typename call_traits<first_type>::const_reference  first_const_reference;
+		typedef typename call_traits<second_type>::const_reference second_const_reference;
+
+		compressed_pair_imp() {}
+
+		compressed_pair_imp(first_param_type x, second_param_type y)
+			: first_type(x), mSecond(y) {}
+
+		compressed_pair_imp(first_param_type x)
+			: first_type(x) {}
+
+		compressed_pair_imp(second_param_type y)
+			: mSecond(y) {}
+
+		first_reference       first()       { return *this; }
+		first_const_reference first() const { return *this; }
+
+		second_reference       second()       { return mSecond; }
+		second_const_reference second() const { return mSecond; }
+
+		void swap(compressed_pair<T1,T2>& y)
+		{
+			// No need to swap empty base class
+			cp_swap(mSecond, y.second());
+		}
+
+	private:
+		second_type mSecond;
+	};
+
+
+
+	// Derive from T2
+	template <typename T1, typename T2>
+	class compressed_pair_imp<T1, T2, 2> : private T2
+	{
+	public:
+		typedef T1                                                 first_type;
+		typedef T2                                                 second_type;
+		typedef typename call_traits<first_type>::param_type       first_param_type;
+		typedef typename call_traits<second_type>::param_type      second_param_type;
+		typedef typename call_traits<first_type>::reference        first_reference;
+		typedef typename call_traits<second_type>::reference       second_reference;
+		typedef typename call_traits<first_type>::const_reference  first_const_reference;
+		typedef typename call_traits<second_type>::const_reference second_const_reference;
+
+		compressed_pair_imp() {}
+
+		compressed_pair_imp(first_param_type x, second_param_type y)
+			: second_type(y), mFirst(x) {}
+
+		compressed_pair_imp(first_param_type x)
+			: mFirst(x) {}
+
+		compressed_pair_imp(second_param_type y)
+			: second_type(y) {}
+
+		first_reference       first()       { return mFirst; }
+		first_const_reference first() const { return mFirst; }
+
+		second_reference       second()       { return *this; }
+		second_const_reference second() const { return *this; }
+
+		void swap(compressed_pair<T1,T2>& y)
+		{
+			// No need to swap empty base class
+			cp_swap(mFirst, y.first());
+		}
+
+	private:
+		first_type mFirst;
+	};
+
+
+
+	// Derive from T1 and T2
+	template <typename T1, typename T2>
+	class compressed_pair_imp<T1, T2, 3> : private T1, private T2
+	{
+	public:
+		typedef T1                                                 first_type;
+		typedef T2                                                 second_type;
+		typedef typename call_traits<first_type>::param_type       first_param_type;
+		typedef typename call_traits<second_type>::param_type      second_param_type;
+		typedef typename call_traits<first_type>::reference        first_reference;
+		typedef typename call_traits<second_type>::reference       second_reference;
+		typedef typename call_traits<first_type>::const_reference  first_const_reference;
+		typedef typename call_traits<second_type>::const_reference second_const_reference;
+
+		compressed_pair_imp() {}
+
+		compressed_pair_imp(first_param_type x, second_param_type y)
+			: first_type(x), second_type(y) {}
+
+		compressed_pair_imp(first_param_type x)
+			: first_type(x) {}
+
+		compressed_pair_imp(second_param_type y)
+			: second_type(y) {}
+
+		first_reference       first()       { return *this; }
+		first_const_reference first() const { return *this; }
+
+		second_reference       second()       { return *this; }
+		second_const_reference second() const { return *this; }
+
+		// No need to swap empty bases
+		void swap(compressed_pair<T1, T2>&) 
+			{ }
+	};
+
+
+	// T1 == T2, T1 and T2 are both empty
+	// Note does not actually store an instance of T2 at all;
+	// but reuses T1 base class for both first() and second().
+	template <typename T1, typename T2>
+	class compressed_pair_imp<T1, T2, 4> : private T1
+	{
+	public:
+		typedef T1                                                 first_type;
+		typedef T2                                                 second_type;
+		typedef typename call_traits<first_type>::param_type       first_param_type;
+		typedef typename call_traits<second_type>::param_type      second_param_type;
+		typedef typename call_traits<first_type>::reference        first_reference;
+		typedef typename call_traits<second_type>::reference       second_reference;
+		typedef typename call_traits<first_type>::const_reference  first_const_reference;
+		typedef typename call_traits<second_type>::const_reference second_const_reference;
+
+		compressed_pair_imp() {}
+
+		compressed_pair_imp(first_param_type x, second_param_type)
+			: first_type(x) {}
+
+		compressed_pair_imp(first_param_type x)
+			: first_type(x) {}
+
+		first_reference       first()       { return *this; }
+		first_const_reference first() const { return *this; }
+
+		second_reference       second()       { return *this; }
+		second_const_reference second() const { return *this; }
+
+		void swap(compressed_pair<T1, T2>&) { }
+	};
+
+
+	// T1 == T2 and are not empty
+	template <typename T1, typename T2>
+	class compressed_pair_imp<T1, T2, 5>
+	{
+	public:
+		typedef T1                                                 first_type;
+		typedef T2                                                 second_type;
+		typedef typename call_traits<first_type>::param_type       first_param_type;
+		typedef typename call_traits<second_type>::param_type      second_param_type;
+		typedef typename call_traits<first_type>::reference        first_reference;
+		typedef typename call_traits<second_type>::reference       second_reference;
+		typedef typename call_traits<first_type>::const_reference  first_const_reference;
+		typedef typename call_traits<second_type>::const_reference second_const_reference;
+
+		compressed_pair_imp() {}
+
+		compressed_pair_imp(first_param_type x, second_param_type y)
+			: mFirst(x), mSecond(y) {}
+
+		compressed_pair_imp(first_param_type x)
+			: mFirst(x), mSecond(x) {}
+
+		first_reference       first()       { return mFirst; }
+		first_const_reference first() const { return mFirst; }
+
+		second_reference       second()       { return mSecond; }
+		second_const_reference second() const { return mSecond; }
+
+		void swap(compressed_pair<T1, T2>& y)
+		{
+			cp_swap(mFirst, y.first());
+			cp_swap(mSecond, y.second());
+		}
+
+	private:
+		first_type  mFirst;
+		second_type mSecond;
+	};
+
+
+
+	template <typename T1, typename T2>
+	class compressed_pair
+		: private compressed_pair_imp<T1, T2,
+					compressed_pair_switch<
+							T1,
+							T2,
+							is_same<typename remove_cv<T1>::type, typename remove_cv<T2>::type>::value,
+							is_empty<T1>::value,
+							is_empty<T2>::value>::value>
+	{
+	private:
+		typedef compressed_pair_imp<T1, T2,
+				compressed_pair_switch<
+						T1,
+						T2,
+						is_same<typename remove_cv<T1>::type, typename remove_cv<T2>::type>::value,
+						is_empty<T1>::value,
+						is_empty<T2>::value>::value> base;
+	public:
+		typedef T1                                                 first_type;
+		typedef T2                                                 second_type;
+		typedef typename call_traits<first_type>::param_type       first_param_type;
+		typedef typename call_traits<second_type>::param_type      second_param_type;
+		typedef typename call_traits<first_type>::reference        first_reference;
+		typedef typename call_traits<second_type>::reference       second_reference;
+		typedef typename call_traits<first_type>::const_reference  first_const_reference;
+		typedef typename call_traits<second_type>::const_reference second_const_reference;
+
+		compressed_pair() : base() {}
+		compressed_pair(first_param_type x, second_param_type y) : base(x, y) {}
+		explicit compressed_pair(first_param_type x) : base(x) {}
+		explicit compressed_pair(second_param_type y) : base(y) {}
+
+		first_reference       first()       { return base::first(); }
+		first_const_reference first() const { return base::first(); }
+
+		second_reference       second()       { return base::second(); }
+		second_const_reference second() const { return base::second(); }
+
+		void swap(compressed_pair& y) { base::swap(y); }
+	};
+
+
+	// Partial specialisation for case where T1 == T2:
+	template <typename T>
+	class compressed_pair<T, T>
+		: private compressed_pair_imp<T, T,
+					compressed_pair_switch<
+							T,
+							T,
+							is_same<typename remove_cv<T>::type, typename remove_cv<T>::type>::value,
+							is_empty<T>::value,
+							is_empty<T>::value>::value>
+	{
+	private:
+		typedef compressed_pair_imp<T, T,
+				compressed_pair_switch<
+						T,
+						T,
+						is_same<typename remove_cv<T>::type, typename remove_cv<T>::type>::value,
+						is_empty<T>::value,
+						is_empty<T>::value>::value> base;
+	public:
+		typedef T                                                  first_type;
+		typedef T                                                  second_type;
+		typedef typename call_traits<first_type>::param_type       first_param_type;
+		typedef typename call_traits<second_type>::param_type      second_param_type;
+		typedef typename call_traits<first_type>::reference        first_reference;
+		typedef typename call_traits<second_type>::reference       second_reference;
+		typedef typename call_traits<first_type>::const_reference  first_const_reference;
+		typedef typename call_traits<second_type>::const_reference second_const_reference;
+
+		compressed_pair() : base() {}
+		compressed_pair(first_param_type x, second_param_type y) : base(x, y) {}
+		explicit compressed_pair(first_param_type x) : base(x) {}
+
+		first_reference       first()       { return base::first(); }
+		first_const_reference first() const { return base::first(); }
+
+		second_reference       second()       { return base::second(); }
+		second_const_reference second() const { return base::second(); }
+
+		void swap(compressed_pair<T, T>& y) { base::swap(y); }
+	};
+
+
+	template <typename T1, typename T2>
+	inline void swap(compressed_pair<T1, T2>& x, compressed_pair<T1, T2>& y)
+	{
+		x.swap(y);
+	}
+
+
+} // namespace eastl
+
+#if defined(_MSC_VER) && (_MSC_VER >= 1900)  // VS2015 or later
+	EA_RESTORE_VC_WARNING()
+#endif
+
+#endif // Header include guard
+
+
+
diff --git a/libkram/eastl/include/EASTL/bonus/fixed_ring_buffer.h b/libkram/eastl/include/EASTL/bonus/fixed_ring_buffer.h
new file mode 100644
index 00000000..2bb54e47
--- /dev/null
+++ b/libkram/eastl/include/EASTL/bonus/fixed_ring_buffer.h
@@ -0,0 +1,50 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_FIXED_RING_BUFFER_H
+#define EASTL_FIXED_RING_BUFFER_H
+
+#include <EASTL/internal/config.h>
+#include <EASTL/fixed_vector.h>
+#include <EASTL/bonus/ring_buffer.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+namespace eastl
+{
+
+	/// fixed_ring_buffer
+	///
+	/// This is a convenience template alias for creating a fixed-sized
+	/// ring_buffer using eastl::fixed_vector as its storage container. This has
+	/// been tricky for users to get correct due to the constructor requirements
+	/// of eastl::ring_buffer leaking the implementation detail of the sentinel
+	/// value being used internally.  In addition, it was not obvious what the
+	/// correct allocator_type template parameter should be used for containers
+	/// providing both a default allocator type and an overflow allocator type.
+	///
+	/// We are over-allocating the fixed_vector container to accommodate the
+	/// ring_buffer sentinel to prevent that implementation detail leaking into
+	/// user code.
+	///
+	/// Example usage:
+	///
+	/// 	fixed_ring_buffer<int, 8> rb = {0, 1, 2, 3, 4, 5, 6, 7};
+	///  or
+	/// 	fixed_ring_buffer<int, 8> rb(8); // capacity doesn't need to respect sentinel
+	/// 	rb.push_back(0);
+	///
+	///
+#if !defined(EA_COMPILER_NO_TEMPLATE_ALIASES)
+	template <typename T, size_t N>
+	using fixed_ring_buffer =
+	    ring_buffer<T, fixed_vector<T, N + 1, false>, typename fixed_vector<T, N + 1, false>::overflow_allocator_type>;
+#endif
+
+} // namespace eastl
+
+#endif // Header include guard
+
diff --git a/libkram/eastl/include/EASTL/bonus/fixed_tuple_vector.h b/libkram/eastl/include/EASTL/bonus/fixed_tuple_vector.h
new file mode 100644
index 00000000..e9ce0ec0
--- /dev/null
+++ b/libkram/eastl/include/EASTL/bonus/fixed_tuple_vector.h
@@ -0,0 +1,210 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_FIXEDTUPLEVECTOR_H
+#define EASTL_FIXEDTUPLEVECTOR_H
+
+#include <EASTL/bonus/tuple_vector.h>
+#include <EASTL/internal/fixed_pool.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+namespace eastl
+{
+
+	/// EASTL_FIXED_TUPLE_VECTOR_DEFAULT_NAME
+	///
+	/// Defines a default container name in the absence of a user-provided name.
+	/// In the case of fixed-size containers, the allocator name always refers
+	/// to overflow allocations. 
+	///
+	#ifndef EASTL_FIXED_TUPLE_VECTOR_DEFAULT_NAME
+		#define EASTL_FIXED_TUPLE_VECTOR_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " fixed_tuple_vector" // Unless the user overrides something, this is "EASTL fixed_vector".
+	#endif
+
+
+	/// EASTL_FIXED_TUPLE_VECTOR_DEFAULT_ALLOCATOR
+	///
+	#ifndef EASTL_FIXED_TUPLE_VECTOR_DEFAULT_ALLOCATOR
+		#define EASTL_FIXED_TUPLE_VECTOR_DEFAULT_ALLOCATOR overflow_allocator_type(EASTL_FIXED_TUPLE_VECTOR_DEFAULT_NAME)
+	#endif
+
+// External interface of fixed_tuple_vector
+template <size_t nodeCount, bool bEnableOverflow, typename... Ts>
+class fixed_tuple_vector : public TupleVecInternal::TupleVecImpl<fixed_vector_allocator<
+	TupleVecInternal::TupleRecurser<Ts...>::GetTotalAllocationSize(nodeCount, 0), 1,
+	TupleVecInternal::TupleRecurser<Ts...>::GetTotalAlignment(), 0,
+	bEnableOverflow, EASTLAllocatorType>, make_index_sequence<sizeof...(Ts)>, Ts...>
+{
+public:
+	typedef fixed_vector_allocator<
+		TupleVecInternal::TupleRecurser<Ts...>::GetTotalAllocationSize(nodeCount, 0), 1,
+		TupleVecInternal::TupleRecurser<Ts...>::GetTotalAlignment(), 0,
+		bEnableOverflow, EASTLAllocatorType> fixed_allocator_type;
+	typedef aligned_buffer<fixed_allocator_type::kNodesSize, fixed_allocator_type::kNodeAlignment> aligned_buffer_type;
+	typedef fixed_tuple_vector<nodeCount, bEnableOverflow, Ts...> this_type;
+	typedef EASTLAllocatorType overflow_allocator_type;
+
+	typedef TupleVecInternal::TupleVecImpl<fixed_allocator_type, make_index_sequence<sizeof...(Ts)>, Ts...> base_type;
+	typedef typename base_type::size_type size_type;
+
+private:
+	aligned_buffer_type mBuffer;
+
+public:
+	fixed_tuple_vector()
+		: base_type(fixed_allocator_type(mBuffer.buffer), mBuffer.buffer, nodeCount, fixed_allocator_type::kNodeSize)
+	{ }
+
+	fixed_tuple_vector(const overflow_allocator_type& allocator)
+		: base_type(fixed_allocator_type(mBuffer.buffer, allocator), mBuffer.buffer, nodeCount, fixed_allocator_type::kNodeSize)
+	{ }
+
+	fixed_tuple_vector(this_type&& x)
+		: base_type(fixed_allocator_type(mBuffer.buffer), mBuffer.buffer, nodeCount, fixed_allocator_type::kNodeSize)
+	{ 
+		base_type::get_allocator().copy_overflow_allocator(x.get_allocator());
+		base_type::DoInitFromIterator(make_move_iterator(x.begin()), make_move_iterator(x.end()));
+		x.clear();
+	}
+
+	fixed_tuple_vector(this_type&& x, const overflow_allocator_type& allocator)
+		: base_type(fixed_allocator_type(mBuffer.buffer, allocator), mBuffer.buffer, nodeCount, fixed_allocator_type::kNodeSize)
+	{
+		base_type::DoInitFromIterator(make_move_iterator(x.begin()), make_move_iterator(x.end()));
+		x.clear();
+	}
+
+	fixed_tuple_vector(const this_type& x)
+		: base_type(fixed_allocator_type(mBuffer.buffer), mBuffer.buffer, nodeCount, fixed_allocator_type::kNodeSize)
+	{ 
+		base_type::get_allocator().copy_overflow_allocator(x.get_allocator());
+		base_type::DoInitFromIterator(x.begin(), x.end());
+	}
+
+	fixed_tuple_vector(const this_type& x, const overflow_allocator_type& allocator)
+		: base_type(fixed_allocator_type(mBuffer.buffer, allocator), mBuffer.buffer, nodeCount, fixed_allocator_type::kNodeSize)
+	{
+		base_type::DoInitFromIterator(x.begin(), x.end());
+	}
+
+	template <typename MoveIterBase>
+	fixed_tuple_vector(move_iterator<MoveIterBase> begin, move_iterator<MoveIterBase> end, const overflow_allocator_type& allocator = EASTL_FIXED_TUPLE_VECTOR_DEFAULT_ALLOCATOR)
+		: base_type(fixed_allocator_type(mBuffer.buffer, allocator), mBuffer.buffer, nodeCount, fixed_allocator_type::kNodeSize)
+	{
+		base_type::DoInitFromIterator(begin, end);
+	}
+
+	template <typename Iterator>
+	fixed_tuple_vector(Iterator begin, Iterator end, const overflow_allocator_type& allocator = EASTL_FIXED_TUPLE_VECTOR_DEFAULT_ALLOCATOR)
+		: base_type(fixed_allocator_type(mBuffer.buffer, allocator), mBuffer.buffer, nodeCount, fixed_allocator_type::kNodeSize)
+	{
+		base_type::DoInitFromIterator(begin, end);
+	}
+
+	fixed_tuple_vector(size_type n, const overflow_allocator_type& allocator = EASTL_FIXED_TUPLE_VECTOR_DEFAULT_ALLOCATOR)
+		: base_type(fixed_allocator_type(mBuffer.buffer, allocator), mBuffer.buffer, nodeCount, fixed_allocator_type::kNodeSize)
+	{
+		base_type::DoInitDefaultFill(n);
+	}
+
+	fixed_tuple_vector(size_type n, const Ts&... args)
+		: base_type(fixed_allocator_type(mBuffer.buffer), mBuffer.buffer, nodeCount, fixed_allocator_type::kNodeSize)
+	{
+		base_type::DoInitFillArgs(n, args...);
+	}
+
+	fixed_tuple_vector(size_type n, const Ts&... args, const overflow_allocator_type& allocator)
+		: base_type(fixed_allocator_type(mBuffer.buffer, allocator), mBuffer.buffer, nodeCount, fixed_allocator_type::kNodeSize)
+	{
+		base_type::DoInitFillArgs(n, args...);
+	}
+
+	fixed_tuple_vector(size_type n,
+				typename base_type::const_reference_tuple tup,
+				const overflow_allocator_type& allocator = EASTL_FIXED_TUPLE_VECTOR_DEFAULT_ALLOCATOR)
+		: base_type(fixed_allocator_type(mBuffer.buffer, allocator), mBuffer.buffer, nodeCount, fixed_allocator_type::kNodeSize)
+	{
+		base_type::DoInitFillTuple(n, tup);
+	}
+
+	fixed_tuple_vector(const typename base_type::value_tuple* first, const typename base_type::value_tuple* last,
+		const overflow_allocator_type& allocator = EASTL_FIXED_TUPLE_VECTOR_DEFAULT_ALLOCATOR)
+		: base_type(fixed_allocator_type(mBuffer.buffer, allocator), mBuffer.buffer, nodeCount, fixed_allocator_type::kNodeSize)
+	{
+		base_type::DoInitFromTupleArray(first, last);
+	}
+
+	fixed_tuple_vector(std::initializer_list<typename base_type::value_tuple> iList,
+		const overflow_allocator_type& allocator = EASTL_FIXED_TUPLE_VECTOR_DEFAULT_ALLOCATOR)
+		: base_type(fixed_allocator_type(mBuffer.buffer, allocator), mBuffer.buffer, nodeCount, fixed_allocator_type::kNodeSize)
+	{
+		base_type::DoInitFromTupleArray(iList.begin(), iList.end());
+	}
+
+	this_type& operator=(const this_type& other)
+	{
+		base_type::operator=(other);
+		return *this;
+	}
+
+	this_type& operator=(this_type&& other)
+	{
+		base_type::clear();
+		// OK to call DoInitFromIterator in a non-ctor scenario because clear() reset everything, more-or-less
+		base_type::DoInitFromIterator(make_move_iterator(other.begin()), make_move_iterator(other.end()));
+		other.clear();
+		return *this;
+	}
+
+	this_type& operator=(std::initializer_list<typename base_type::value_tuple> iList)
+	{
+		base_type::operator=(iList);
+		return *this;
+	}
+
+	void swap(this_type& x)
+	{
+		// If both containers are using the heap instead of local memory 
+		// then we can do a fast pointer swap instead of content swap.
+		if ((has_overflowed() && x.has_overflowed()) && (get_overflow_allocator() == x.get_overflow_allocator()))
+		{
+			base_type::swap(x);
+		}
+		else
+		{
+			// Fixed containers use a special swap that can deal with excessively large buffers.
+			eastl::fixed_swap(*this, x);
+		}
+	}
+
+	// Returns the max fixed size, which is the user-supplied nodeCount parameter.
+	size_type max_size() const { return nodeCount; }
+	// Returns true if the fixed space has been fully allocated. Note that if overflow is enabled,
+	// the container size can be greater than nodeCount but full() could return true because the
+	// fixed space may have a recently freed slot.
+	bool full() const { return (base_type::mNumElements >= nodeCount) || ((void*)base_type::mpData != (void*)mBuffer.buffer);	}
+	// Returns true if the allocations spilled over into the overflow allocator. Meaningful
+	// only if overflow is enabled.
+	bool has_overflowed() const { return ((void*)base_type::mpData != (void*)mBuffer.buffer); }
+	// Returns the value of the bEnableOverflow template parameter.
+	bool can_overflow() const { return bEnableOverflow; }
+
+	const overflow_allocator_type& get_overflow_allocator() const { return base_type::get_allocator().get_overflow_allocator(); }
+};
+
+
+template <size_t nodeCount, bool bEnableOverflow, typename... Ts>
+inline void swap(fixed_tuple_vector<nodeCount, bEnableOverflow, Ts...>& a,
+				fixed_tuple_vector<nodeCount, bEnableOverflow, Ts...>& b)
+{
+	a.swap(b);
+}
+
+
+}  // namespace eastl
+
+#endif  // EASTL_TUPLEVECTOR_H
diff --git a/libkram/eastl/include/EASTL/bonus/intrusive_sdlist.h b/libkram/eastl/include/EASTL/bonus/intrusive_sdlist.h
new file mode 100644
index 00000000..1b126d43
--- /dev/null
+++ b/libkram/eastl/include/EASTL/bonus/intrusive_sdlist.h
@@ -0,0 +1,694 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// intrusive_sdlist is a special kind of intrusive list which we say is 
+// "singly-doubly" linked. Instead of having a typical intrusive list node
+// which looks like this:
+//
+//     struct intrusive_sdlist_node {
+//         intrusive_sdlist_node *mpNext;
+//         intrusive_sdlist_node *mpPrev;
+//     };
+//
+// We instead have one that looks like this:
+//
+//     struct intrusive_sdlist_node {
+//         intrusive_sdlist_node*  mpNext;
+//         intrusive_sdlist_node** mppPrevNext;
+//     };
+// 
+// This may seem to be suboptimal, but it has one specific advantage: it allows
+// the intrusive_sdlist class to be the size of only one pointer instead of two.
+// This may seem like a minor optimization, but some users have wanted to create
+// thousands of empty instances of these.
+// This is because while an intrusive_list class looks like this:
+//
+//     class intrusive_list {
+//         intrusive_list_node mBaseNode;
+//     };
+//     
+// an intrusive_sdlist class looks like this:
+//
+//     class intrusive_sdlist {
+//         intrusive_sdlist_node* mpNext;
+//     };
+//  
+// So here we make a list of plusses and minuses of intrusive sdlists
+// compared to intrusive_lists and intrusive_slists:
+//
+//                          |   list   |   slist   |   sdlist
+//      ---------------------------------------------------------
+//      min size            |    8     |     4     |     4
+//      node size           |    8     |     4     |     8
+//      anonymous erase     |   yes    |     no    |     yes
+//      reverse iteration   |   yes    |     no    |     no
+//    
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_INTRUSIVE_SDLIST_H
+#define EASTL_INTRUSIVE_SDLIST_H
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/iterator.h>
+#include <EASTL/algorithm.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+
+
+	/// intrusive_sdlist_node
+	///
+	struct intrusive_sdlist_node
+	{
+		intrusive_sdlist_node*  mpNext;
+		intrusive_sdlist_node** mppPrevNext;
+	};
+
+
+	/// IntrusiveSDListIterator
+	///
+	template <typename T, typename Pointer, typename Reference>
+	struct IntrusiveSDListIterator
+	{
+		typedef IntrusiveSDListIterator<T, Pointer, Reference>   this_type;
+		typedef IntrusiveSDListIterator<T, T*, T&>               iterator;
+		typedef IntrusiveSDListIterator<T, const T*, const T&>   const_iterator;
+		typedef eastl_size_t                                     size_type;     // See config.h for the definition of eastl_size_t, which defaults to size_t.
+		typedef ptrdiff_t                                        difference_type;
+		typedef T                                                value_type;
+		typedef T                                                node_type;
+		typedef Pointer                                          pointer;
+		typedef Reference                                        reference;
+		typedef EASTL_ITC_NS::forward_iterator_tag               iterator_category;
+
+	public:
+		pointer mpNode;
+
+	public:
+		IntrusiveSDListIterator();
+		explicit IntrusiveSDListIterator(pointer pNode); // Note that you can also construct an iterator from T via this, since value_type == node_type.
+		IntrusiveSDListIterator(const iterator& x);
+
+		reference operator*() const;
+		pointer   operator->() const;
+
+		this_type& operator++();
+		this_type  operator++(int);
+
+	}; // struct IntrusiveSDListIterator
+
+
+
+
+	/// intrusive_sdlist_base
+	///
+	/// Provides a template-less base class for intrusive_sdlist.
+	///
+	class intrusive_sdlist_base
+	{
+	public:
+		typedef eastl_size_t size_type;     // See config.h for the definition of eastl_size_t, which defaults to size_t.
+		typedef ptrdiff_t    difference_type;
+
+	protected:
+		intrusive_sdlist_node* mpNext;
+
+	public:
+		intrusive_sdlist_base();
+
+		bool      empty() const;            ///< Returns true if the container is empty.
+		size_type size() const;             ///< Returns the number of elements in the list; O(n).
+
+		void      clear();                  ///< Clears the list; O(1). No deallocation occurs.
+		void      pop_front();              ///< Removes an element from the front of the list; O(1). The element must be present, but is not deallocated.
+		void      reverse();                ///< Reverses a list so that front and back are swapped; O(n).
+
+		//bool    validate() const;         ///< Scans a list for linkage inconsistencies; O(n) time, O(1) space. Returns false if errors are detected, such as loops or branching.
+
+	}; // class intrusive_sdlist_base
+
+
+
+	/// intrusive_sdlist
+	///
+	template <typename T = intrusive_sdlist_node>
+	class intrusive_sdlist : public intrusive_sdlist_base
+	{
+	public:
+		typedef intrusive_sdlist<T>                             this_type;
+		typedef intrusive_sdlist_base                           base_type;
+		typedef T                                               node_type;
+		typedef T                                               value_type;
+		typedef typename base_type::size_type                   size_type;
+		typedef typename base_type::difference_type             difference_type;
+		typedef T&                                              reference;
+		typedef const T&                                        const_reference;
+		typedef T*                                              pointer;
+		typedef const T*                                        const_pointer;
+		typedef IntrusiveSDListIterator<T, T*, T&>              iterator;
+		typedef IntrusiveSDListIterator<T, const T*, const T&>  const_iterator;
+		typedef eastl::reverse_iterator<iterator>               reverse_iterator;
+		typedef eastl::reverse_iterator<const_iterator>         const_reverse_iterator;
+
+	public:
+		intrusive_sdlist();                       ///< Creates an empty list.
+		intrusive_sdlist(const this_type& x);     ///< Creates an empty list; ignores the argument.
+		this_type& operator=(const this_type& x); ///< Clears the list; ignores the argument.
+
+		iterator         begin();                 ///< Returns an iterator pointing to the first element in the list.
+		const_iterator   begin() const;           ///< Returns a const_iterator pointing to the first element in the list.
+		const_iterator   cbegin() const;          ///< Returns a const_iterator pointing to the first element in the list.
+
+		iterator         end();                   ///< Returns an iterator pointing one-after the last element in the list.
+		const_iterator   end() const;             ///< Returns a const_iterator pointing one-after the last element in the list.
+		const_iterator   cend() const;            ///< Returns a const_iterator pointing one-after the last element in the list.
+
+		reference        front();                 ///< Returns a reference to the first element. The list must be empty.
+		const_reference  front() const;           ///< Returns a const reference to the first element. The list must be empty.
+
+		void             push_front(value_type& value);                  ///< Adds an element to the front of the list; O(1). The element is not copied. The element must not be in any other list.
+		void             push_back(value_type& value);                   ///< Adds an element to the back of the list; O(N). The element is not copied. The element must not be in any other list.
+		void             pop_back();                                     ///< Removes an element from the back of the list; O(N). The element must be present, but is not deallocated.
+
+		bool             contains(const value_type& value) const;        ///< Returns true if the given element is in the list; O(n). Equivalent to (locate(x) != end()).
+
+		iterator         locate(value_type& value);                      ///< Converts a reference to an object in the list back to an iterator, or returns end() if it is not part of the list. O(n)
+		const_iterator   locate(const value_type& value) const;          ///< Converts a const reference to an object in the list back to a const iterator, or returns end() if it is not part of the list. O(n)
+
+		iterator         insert(iterator position, value_type& value);   ///< Inserts an element before the element pointed to by the iterator. O(1)
+		iterator         erase(iterator position);                       ///< Erases the element pointed to by the iterator. O(1)
+		iterator         erase(iterator first, iterator last);           ///< Erases elements within the iterator range [first, last). O(1).
+		void             swap(intrusive_sdlist& x);                      ///< Swaps the contents of two intrusive lists; O(1).
+
+		static void      remove(value_type& value);                      ///< Erases an element from a list; O(1). Note that this is static so you don't need to know which list the element, although it must be in some list.
+
+		void  splice(iterator position, value_type& value);              ///< Moves the given element into this list before the element pointed to by position; O(1).
+																		 ///< Required: x must be in some list or have first/next pointers that point it itself.
+
+		void  splice(iterator position, this_type& x);                   ///< Moves the contents of a list into this list before the element pointed to by position; O(1).
+																		 ///< Required: &x != this (same as std::list).
+
+		void  splice(iterator position, this_type& x, iterator xPosition);      ///< Moves the given element pointed to i within the list x into the current list before
+																				///< the element pointed to by position; O(1).
+
+		void  splice(iterator position, this_type& x, iterator first, iterator last);   ///< Moves the range of elements [first, last) from list x into the current list before
+																						///< the element pointed to by position; O(1).
+																						///< Required: position must not be in [first, last). (same as std::list).
+		bool validate() const;
+		int  validate_iterator(const_iterator i) const;
+
+	}; // intrusive_sdlist
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// IntrusiveSDListIterator functions
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T, typename Pointer, typename Reference>
+	inline IntrusiveSDListIterator<T, Pointer, Reference>::IntrusiveSDListIterator()
+	{
+		#if EASTL_DEBUG
+			mpNode = NULL;
+		#endif
+	}
+
+	template <typename T, typename Pointer, typename Reference>
+	inline IntrusiveSDListIterator<T, Pointer, Reference>::IntrusiveSDListIterator(pointer pNode)
+		: mpNode(pNode)
+	{
+	}
+
+	template <typename T, typename Pointer, typename Reference>
+	inline IntrusiveSDListIterator<T, Pointer, Reference>::IntrusiveSDListIterator(const iterator& x)
+		: mpNode(x.mpNode)
+	{
+	}
+
+	template <typename T, typename Pointer, typename Reference>
+	inline typename IntrusiveSDListIterator<T, Pointer, Reference>::reference
+	IntrusiveSDListIterator<T, Pointer, Reference>::operator*() const
+	{
+		return *mpNode;
+	}
+
+	template <typename T, typename Pointer, typename Reference>
+	inline typename IntrusiveSDListIterator<T, Pointer, Reference>::pointer
+	IntrusiveSDListIterator<T, Pointer, Reference>::operator->() const
+	{
+		return mpNode;
+	}
+
+	template <typename T, typename Pointer, typename Reference>
+	inline typename IntrusiveSDListIterator<T, Pointer, Reference>::this_type&
+	IntrusiveSDListIterator<T, Pointer, Reference>::operator++()
+	{
+		mpNode = static_cast<node_type*>(mpNode->mpNext);
+		return *this;
+	}
+
+	template <typename T, typename Pointer, typename Reference>
+	inline typename IntrusiveSDListIterator<T, Pointer, Reference>::this_type
+	IntrusiveSDListIterator<T, Pointer, Reference>::operator++(int)
+	{
+		this_type temp = *this;
+		mpNode = static_cast<node_type*>(mpNode->mpNext);
+		return temp;
+	}
+
+	// The C++ defect report #179 requires that we support comparisons between const and non-const iterators.
+	// Thus we provide additional template paremeters here to support this. The defect report does not
+	// require us to support comparisons between reverse_iterators and const_reverse_iterators.
+	template <typename T, typename PointerA, typename ReferenceA, typename PointerB, typename ReferenceB>
+	inline bool operator==(const IntrusiveSDListIterator<T, PointerA, ReferenceA>& a, 
+						   const IntrusiveSDListIterator<T, PointerB, ReferenceB>& b)
+	{
+		return a.mpNode == b.mpNode;
+	}
+
+
+	template <typename T, typename PointerA, typename ReferenceA, typename PointerB, typename ReferenceB>
+	inline bool operator!=(const IntrusiveSDListIterator<T, PointerA, ReferenceA>& a, 
+						   const IntrusiveSDListIterator<T, PointerB, ReferenceB>& b)
+	{
+		return a.mpNode != b.mpNode;
+	}
+
+
+	// We provide a version of operator!= for the case where the iterators are of the 
+	// same type. This helps prevent ambiguity errors in the presence of rel_ops.
+	template <typename T, typename Pointer, typename Reference>
+	inline bool operator!=(const IntrusiveSDListIterator<T, Pointer, Reference>& a, 
+						   const IntrusiveSDListIterator<T, Pointer, Reference>& b)
+	{
+		return a.mpNode != b.mpNode;
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// intrusive_sdlist_base
+	///////////////////////////////////////////////////////////////////////
+
+	inline intrusive_sdlist_base::intrusive_sdlist_base() 
+		{ mpNext = NULL; }
+
+
+	inline bool intrusive_sdlist_base::empty() const
+		{ return mpNext == NULL; }
+
+
+	inline intrusive_sdlist_base::size_type intrusive_sdlist_base::size() const
+	{
+		size_type n = 0;
+		for(const intrusive_sdlist_node* pCurrent = mpNext; pCurrent; pCurrent = pCurrent->mpNext)
+			n++;
+		return n;
+	}
+
+
+	inline void intrusive_sdlist_base::clear()
+		{ mpNext = NULL; } // Note that we don't do anything with the list nodes.
+
+
+	inline void intrusive_sdlist_base::pop_front()
+	{
+		// To consider: Set mpNext's pointers to NULL in debug builds.
+		mpNext = mpNext->mpNext;
+		mpNext->mppPrevNext = &mpNext;
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// intrusive_sdlist
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T>
+	inline intrusive_sdlist<T>::intrusive_sdlist()
+	{
+	}
+
+
+	template <typename T>
+	inline intrusive_sdlist<T>::intrusive_sdlist(const this_type& /*x*/)
+	  : intrusive_sdlist_base()
+	{
+		// We intentionally ignore argument x.
+	}
+
+
+	template <typename T>
+	inline typename intrusive_sdlist<T>::this_type& intrusive_sdlist<T>::operator=(const this_type& /*x*/)
+	{ 
+		return *this; // We intentionally ignore argument x.
+	}
+
+
+	template <typename T>
+	inline typename intrusive_sdlist<T>::iterator intrusive_sdlist<T>::begin()
+		{ return iterator(static_cast<T*>(mpNext)); }
+
+
+	template <typename T>
+	inline typename intrusive_sdlist<T>::const_iterator intrusive_sdlist<T>::begin() const
+		{ return const_iterator(static_cast<T*>(const_cast<intrusive_sdlist_node*>(mpNext))); }
+
+
+	template <typename T>
+	inline typename intrusive_sdlist<T>::const_iterator intrusive_sdlist<T>::cbegin() const
+		{ return const_iterator(static_cast<T*>(const_cast<intrusive_sdlist_node*>(mpNext))); }
+
+
+	template <typename T>
+	inline typename intrusive_sdlist<T>::iterator intrusive_sdlist<T>::end()
+		{ return iterator(static_cast<T*>(NULL)); }
+
+
+	template <typename T>
+	inline typename intrusive_sdlist<T>::const_iterator intrusive_sdlist<T>::end() const
+		{ return const_iterator(static_cast<const T*>(NULL)); }
+
+
+	template <typename T>
+	inline typename intrusive_sdlist<T>::const_iterator intrusive_sdlist<T>::cend() const
+		{ return const_iterator(static_cast<const T*>(NULL)); }
+
+
+	template <typename T>
+	inline typename intrusive_sdlist<T>::reference intrusive_sdlist<T>::front()
+		{ return *static_cast<T*>(mpNext); }
+
+
+	template <typename T>
+	inline typename intrusive_sdlist<T>::const_reference intrusive_sdlist<T>::front() const
+		{ return *static_cast<const T*>(mpNext); }
+
+
+	template <typename T>
+	inline void intrusive_sdlist<T>::push_front(value_type& value)
+	{
+		value.mpNext = mpNext;
+		value.mppPrevNext = &mpNext;
+		if(mpNext)
+			mpNext->mppPrevNext = &value.mpNext;
+		mpNext = &value;
+	}
+
+
+	template <typename T>
+	inline void intrusive_sdlist<T>::push_back(value_type& value)
+	{
+		intrusive_sdlist_node*  pNext      =  mpNext;
+		intrusive_sdlist_node** ppPrevNext = &mpNext;
+
+		while(pNext)
+		{
+			ppPrevNext = &pNext->mpNext;
+			pNext      =  pNext->mpNext;
+		}
+
+		*ppPrevNext       = &value;
+		value.mppPrevNext = ppPrevNext;
+		value.mpNext      = NULL;
+	}
+
+
+	template <typename T>
+	inline void intrusive_sdlist<T>::pop_back()
+	{
+		node_type* pCurrent = static_cast<node_type*>(mpNext);
+
+		while(pCurrent->mpNext)
+			pCurrent = static_cast<node_type*>(pCurrent->mpNext);
+
+		*pCurrent->mppPrevNext = NULL;
+	}
+
+	template <typename T>
+	inline bool intrusive_sdlist<T>::contains(const value_type& value) const
+	{
+		const intrusive_sdlist_node* pCurrent;
+
+		for(pCurrent = mpNext; pCurrent; pCurrent = pCurrent->mpNext)
+		{
+			if(pCurrent == &value)
+				break;
+		}
+
+		return (pCurrent != NULL);
+	}
+
+
+	template <typename T>
+	inline typename intrusive_sdlist<T>::iterator intrusive_sdlist<T>::locate(value_type& value)
+	{
+		intrusive_sdlist_node* pCurrent;
+
+		for(pCurrent = static_cast<value_type*>(mpNext); pCurrent; pCurrent = pCurrent->mpNext)
+		{
+			if(pCurrent == &value)
+				break;
+		}
+
+		return iterator(static_cast<value_type*>(pCurrent));
+	}
+
+
+	template <typename T>
+	inline typename intrusive_sdlist<T>::const_iterator intrusive_sdlist<T>::locate(const T& value) const
+	{
+		const intrusive_sdlist_node* pCurrent;
+
+		for(pCurrent = static_cast<value_type*>(mpNext); pCurrent; pCurrent = pCurrent->mpNext)
+		{
+			if(pCurrent == &value)
+				break;
+		}
+
+		return const_iterator(static_cast<value_type*>(const_cast<intrusive_sdlist_node*>(pCurrent)));
+	}
+
+
+	template <typename T>
+	inline typename intrusive_sdlist<T>::iterator
+	intrusive_sdlist<T>::insert(iterator position, value_type& value)
+	{
+		value.mppPrevNext            = position.mpNode->mppPrevNext;
+		value.mpNext                 = position.mpNode;
+	   *value.mppPrevNext            = &value;
+		position.mpNode->mppPrevNext = &value.mpNext;
+
+		return iterator(&value);
+	}
+
+
+	template <typename T>
+	inline typename intrusive_sdlist<T>::iterator
+	intrusive_sdlist<T>::erase(iterator position)
+	{
+	   *position.mpNode->mppPrevNext         = position.mpNode->mpNext;
+		position.mpNode->mpNext->mppPrevNext = position.mpNode->mppPrevNext;
+
+		return iterator(position.mpNode);
+	}
+
+
+	template <typename T>
+	inline typename intrusive_sdlist<T>::iterator
+	intrusive_sdlist<T>::erase(iterator first, iterator last)
+	{
+		if(first.mpNode) // If not erasing the end...
+		{
+			*first.mpNode->mppPrevNext = last.mpNode;
+
+			if(last.mpNode) // If not erasing to the end...
+				last.mpNode->mppPrevNext = first.mpNode->mppPrevNext;
+		}
+
+		return last;
+	}
+
+
+	template <typename T>
+	inline void intrusive_sdlist<T>::remove(value_type& value)
+	{
+		*value.mppPrevNext = value.mpNext;
+		if(value.mpNext)
+			value.mpNext->mppPrevNext = value.mppPrevNext;
+	}
+
+
+	template <typename T>
+	void intrusive_sdlist<T>::swap(intrusive_sdlist& x)
+	{
+		// swap anchors
+		intrusive_sdlist_node* const temp(mpNext);
+		mpNext   = x.mpNext;
+		x.mpNext = temp;
+
+		if(x.mpNext)
+			x.mpNext->mppPrevNext = &mpNext;
+
+		if(mpNext)
+			mpNext->mppPrevNext = &x.mpNext;
+	}
+
+
+
+
+
+	// To do: Complete these splice functions. Might want to look at intrusive_sdlist for help.
+
+	template <typename T>
+	void intrusive_sdlist<T>::splice(iterator /*position*/, value_type& /*value*/)
+	{
+		EASTL_ASSERT(false); // If you need this working, ask Paul Pedriana or submit a working version for inclusion.
+	}
+
+
+	template <typename T>
+	void intrusive_sdlist<T>::splice(iterator /*position*/, intrusive_sdlist& /*x*/)
+	{
+		EASTL_ASSERT(false); // If you need this working, ask Paul Pedriana or submit a working version for inclusion.
+	}
+
+
+	template <typename T>
+	void intrusive_sdlist<T>::splice(iterator /*position*/, intrusive_sdlist& /*x*/, iterator /*xPosition*/)
+	{
+		EASTL_ASSERT(false); // If you need this working, ask Paul Pedriana or submit a working version for inclusion.
+	}
+
+
+	template <typename T>
+	void intrusive_sdlist<T>::splice(iterator /*position*/, intrusive_sdlist& /*x*/, iterator /*first*/, iterator /*last*/)
+	{
+		EASTL_ASSERT(false); // If you need this working, ask Paul Pedriana or submit a working version for inclusion.
+	}
+
+
+	template <typename T>
+	inline bool intrusive_sdlist<T>::validate() const
+	{
+		return true; // To do.
+	}
+
+
+	template <typename T>
+	inline int intrusive_sdlist<T>::validate_iterator(const_iterator i) const
+	{
+		// To do: Come up with a more efficient mechanism of doing this.
+
+		for(const_iterator temp = begin(), tempEnd = end(); temp != tempEnd; ++temp)
+		{
+			if(temp == i)
+				return (isf_valid | isf_current | isf_can_dereference);
+		}
+
+		if(i == end())
+			return (isf_valid | isf_current); 
+
+		return isf_none;
+	}
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// global operators
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T>
+	bool operator==(const intrusive_sdlist<T>& a, const intrusive_sdlist<T>& b)
+	{
+		// If we store an mSize member for intrusive_sdlist, we want to take advantage of it here.
+		typename intrusive_sdlist<T>::const_iterator ia   = a.begin();
+		typename intrusive_sdlist<T>::const_iterator ib   = b.begin();
+		typename intrusive_sdlist<T>::const_iterator enda = a.end();
+		typename intrusive_sdlist<T>::const_iterator endb = b.end();
+
+		while((ia != enda) && (ib != endb) && (*ia == *ib))
+		{
+			++ia;
+			++ib;
+		}
+		return (ia == enda) && (ib == endb);
+	}
+
+	template <typename T>
+	bool operator<(const intrusive_sdlist<T>& a, const intrusive_sdlist<T>& b)
+	{
+		return eastl::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end());
+	}
+
+	template <typename T>
+	bool operator!=(const intrusive_sdlist<T>& a, const intrusive_sdlist<T>& b)
+	{
+		return !(a == b);
+	}
+
+	template <typename T>
+	bool operator>(const intrusive_sdlist<T>& a, const intrusive_sdlist<T>& b)
+	{
+		return b < a;
+	}
+
+	template <typename T>
+	bool operator<=(const intrusive_sdlist<T>& a, const intrusive_sdlist<T>& b)
+	{
+		return !(b < a);
+	}
+
+	template <typename T>
+	bool operator>=(const intrusive_sdlist<T>& a, const intrusive_sdlist<T>& b)
+	{
+		return !(a < b);
+	}
+
+	template <typename T>
+	void swap(intrusive_sdlist<T>& a, intrusive_sdlist<T>& b)
+	{
+		a.swap(b);
+	}
+
+
+} // namespace eastl
+
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/bonus/intrusive_slist.h b/libkram/eastl/include/EASTL/bonus/intrusive_slist.h
new file mode 100644
index 00000000..28d445d9
--- /dev/null
+++ b/libkram/eastl/include/EASTL/bonus/intrusive_slist.h
@@ -0,0 +1,321 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+
+///////////////////////////////////////////////////////////////////////////////
+// *** Note ***
+// This implementation is incomplete.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_INTRUSIVE_SLIST_H
+#define EASTL_INTRUSIVE_SLIST_H
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/iterator.h>
+#include <EASTL/algorithm.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+
+	/// intrusive_slist_node
+	///
+	struct intrusive_slist_node
+	{
+		intrusive_slist_node* mpNext;
+	};
+
+
+	/// IntrusiveSListIterator
+	///
+	template <typename T, typename Pointer, typename Reference>
+	struct IntrusiveSListIterator
+	{
+		typedef IntrusiveSListIterator<T, Pointer, Reference>   this_type;
+		typedef IntrusiveSListIterator<T, T*, T&>               iterator;
+		typedef IntrusiveSListIterator<T, const T*, const T&>   const_iterator;
+		typedef eastl_size_t                                    size_type;     // See config.h for the definition of eastl_size_t, which defaults to size_t.
+		typedef ptrdiff_t                                       difference_type;
+		typedef T                                               value_type;
+		typedef T                                               node_type;
+		typedef Pointer                                         pointer;
+		typedef Reference                                       reference;
+		typedef EASTL_ITC_NS::forward_iterator_tag              iterator_category;
+
+	public:
+		node_type* mpNode;
+
+	public:
+		IntrusiveSListIterator();
+		explicit IntrusiveSListIterator(pointer pNode); // Note that you can also construct an iterator from T via this, since value_type == node_type.
+		IntrusiveSListIterator(const iterator& x);
+
+		reference operator*() const;
+		pointer   operator->() const;
+
+		this_type& operator++();
+		this_type  operator++(int);
+
+	}; // struct IntrusiveSListIterator
+
+
+
+	/// intrusive_slist_base
+	///
+	/// Provides a template-less base class for intrusive_slist.
+	///
+	class intrusive_slist_base
+	{
+	public:
+		typedef eastl_size_t size_type;     // See config.h for the definition of eastl_size_t, which defaults to size_t.
+		typedef ptrdiff_t    difference_type;
+
+	protected:
+		intrusive_slist_node* mpNext;
+
+	public:
+		intrusive_slist_base();
+
+		bool      empty() const;            ///< Returns true if the container is empty.
+		size_type size() const;             ///< Returns the number of elements in the list; O(n).
+
+		void      clear();                  ///< Clears the list; O(1). No deallocation occurs.
+		void      pop_front();              ///< Removes an element from the front of the list; O(1). The element must be present, but is not deallocated.
+		void      reverse();                ///< Reverses a list so that front and back are swapped; O(n).
+
+		//bool    validate() const;         ///< Scans a list for linkage inconsistencies; O(n) time, O(1) space. Returns false if errors are detected, such as loops or branching.
+
+	}; // class intrusive_slist_base
+
+
+
+	/// intrusive_slist
+	///
+	template <typename T = intrusive_slist_node>
+	class intrusive_slist : public intrusive_slist_base
+	{
+	public:
+		typedef intrusive_slist<T>                              this_type;
+		typedef intrusive_slist_base                            base_type;
+		typedef T                                               node_type;
+		typedef T                                               value_type;
+		typedef typename base_type::size_type                   size_type;
+		typedef typename base_type::difference_type             difference_type;
+		typedef T&                                              reference;
+		typedef const T&                                        const_reference;
+		typedef T*                                              pointer;
+		typedef const T*                                        const_pointer;
+		typedef IntrusiveSListIterator<T, T*, T&>               iterator;
+		typedef IntrusiveSListIterator<T, const T*, const T&>   const_iterator;
+
+	public:
+		intrusive_slist();                        ///< Creates an empty list.
+	  //intrusive_slist(const this_type& x);      ///< Creates an empty list; ignores the argument. To consider: Is this a useful function?
+	  //this_type& operator=(const this_type& x); ///< Clears the list; ignores the argument.       To consider: Is this a useful function?
+
+		iterator         begin();                 ///< Returns an iterator pointing to the first element in the list. O(1).
+		const_iterator   begin() const;           ///< Returns a const_iterator pointing to the first element in the list. O(1).
+		const_iterator   cbegin() const;          ///< Returns a const_iterator pointing to the first element in the list. O(1).
+		iterator         end();                   ///< Returns an iterator pointing one-after the last element in the list. O(1).
+		const_iterator   end() const;             ///< Returns a const_iterator pointing one-after the last element in the list. O(1).
+		const_iterator   cend() const;            ///< Returns a const_iterator pointing one-after the last element in the list. O(1).
+		iterator         before_begin();          ///< Returns iterator to position before begin. O(1).
+		const_iterator   before_begin() const;    ///< Returns iterator to previous position. O(1).
+		const_iterator   cbefore_begin() const;   ///< Returns iterator to previous position. O(1).
+
+		iterator         previous(const_iterator position);         ///< Returns iterator to previous position. O(n).
+		const_iterator   previous(const_iterator position) const;   ///< Returns iterator to previous position. O(n).
+
+		reference        front();                 ///< Returns a reference to the first element. The list must be empty.
+		const_reference  front() const;           ///< Returns a const reference to the first element. The list must be empty.
+
+		void             push_front(value_type& value);                 ///< Adds an element to the front of the list; O(1). The element is not copied. The element must not be in any other list.
+		void             pop_front();                                   ///< Removes an element from the back of the list; O(n). The element must be present, but is not deallocated.
+
+		bool             contains(const value_type& value) const;       ///< Returns true if the given element is in the list; O(n). Equivalent to (locate(x) != end()).
+
+		iterator         locate(value_type& value);                     ///< Converts a reference to an object in the list back to an iterator, or returns end() if it is not part of the list. O(n)
+		const_iterator   locate(const value_type& value) const;         ///< Converts a const reference to an object in the list back to a const iterator, or returns end() if it is not part of the list. O(n)
+
+		iterator insert(iterator position, value_type& value);          ///< Inserts an element before the element pointed to by the iterator. O(n)
+		iterator insert_after(iterator position, value_type& value);    ///< Inserts an element after the element pointed to by the iterator. O(1)
+
+		iterator erase(iterator position);                              ///< Erases the element pointed to by the iterator. O(n)
+		iterator erase_after(iterator position);                        ///< Erases the element after the element pointed to by the iterator. O(1)
+
+		iterator erase(iterator first, iterator last);                  ///< Erases elements within the iterator range [first, last). O(n).
+		iterator erase_after(iterator before_first, iterator last);     ///< Erases elements within the iterator range [before_first, last). O(1).
+
+		void swap(this_type& x);                                        ///< Swaps the contents of two intrusive lists; O(1).
+
+
+		void splice(iterator position, value_type& value);              ///< Moves the given element into this list before the element pointed to by position; O(n).
+																		///< Required: x must be in some list or have first/next pointers that point it itself.
+
+		void splice(iterator position, this_type& x);                   ///< Moves the contents of a list into this list before the element pointed to by position; O(n).
+																		///< Required: &x != this (same as std::list).
+
+		void splice(iterator position, this_type& x, iterator xPosition);      ///< Moves the given element pointed to i within the list x into the current list before
+																			   ///< the element pointed to by position; O(n).
+
+		void splice(iterator position, this_type& x, iterator first, iterator last);   ///< Moves the range of elements [first, last) from list x into the current list before
+																					   ///< the element pointed to by position; O(n).
+																					   ///< Required: position must not be in [first, last). (same as std::list).
+
+		void splice_after(iterator position, value_type& value);            ///< Moves the given element into this list after the element pointed to by position; O(1).
+																			///< Required: x must be in some list or have first/next pointers that point it itself.
+
+		void splice_after(iterator position, this_type& x);                 ///< Moves the contents of a list into this list after the element pointed to by position; O(n).
+																			///< Required: &x != this (same as std::list).
+
+		void splice_after(iterator position, this_type& x, iterator xPrevious); ///< Moves the element after xPrevious to be after position. O(1).
+																				///< Required: &x != this (same as std::list).
+
+		void splice_after(iterator position, this_type& x, iterator before_first, iterator before_last);  ///< Moves the elements in the range of [before_first+1, before_last+1) to be after position. O(1).
+
+		bool validate() const;
+		int  validate_iterator(const_iterator i) const;
+
+	}; // intrusive_slist
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// IntrusiveSListIterator
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T, typename Pointer, typename Reference>
+	inline IntrusiveSListIterator<T, Pointer, Reference>::IntrusiveSListIterator()
+	{
+		#if EASTL_DEBUG
+			mpNode = NULL;
+		#endif
+	}
+
+	template <typename T, typename Pointer, typename Reference>
+	inline IntrusiveSListIterator<T, Pointer, Reference>::IntrusiveSListIterator(pointer pNode)
+		: mpNode(pNode)
+	{
+	}
+
+	template <typename T, typename Pointer, typename Reference>
+	inline IntrusiveSListIterator<T, Pointer, Reference>::IntrusiveSListIterator(const iterator& x)
+		: mpNode(x.mpNode)
+	{
+	}
+
+
+	///////////////////////////////////////////////////////////////////////
+	// intrusive_slist_base
+	///////////////////////////////////////////////////////////////////////
+
+	// To do.
+
+
+	///////////////////////////////////////////////////////////////////////
+	// intrusive_slist
+	///////////////////////////////////////////////////////////////////////
+
+	// To do.
+
+
+	///////////////////////////////////////////////////////////////////////
+	// global operators
+	///////////////////////////////////////////////////////////////////////
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// global operators
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T>
+	bool operator==(const intrusive_slist<T>& a, const intrusive_slist<T>& b)
+	{
+		// If we store an mSize member for intrusive_slist, we want to take advantage of it here.
+		typename intrusive_slist<T>::const_iterator ia   = a.begin();
+		typename intrusive_slist<T>::const_iterator ib   = b.begin();
+		typename intrusive_slist<T>::const_iterator enda = a.end();
+		typename intrusive_slist<T>::const_iterator endb = b.end();
+
+		while((ia != enda) && (ib != endb) && (*ia == *ib))
+		{
+			++ia;
+			++ib;
+		}
+		return (ia == enda) && (ib == endb);
+	}
+
+	template <typename T>
+	bool operator<(const intrusive_slist<T>& a, const intrusive_slist<T>& b)
+	{
+		return eastl::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end());
+	}
+
+	template <typename T>
+	bool operator!=(const intrusive_slist<T>& a, const intrusive_slist<T>& b)
+	{
+		return !(a == b);
+	}
+
+	template <typename T>
+	bool operator>(const intrusive_slist<T>& a, const intrusive_slist<T>& b)
+	{
+		return b < a;
+	}
+
+	template <typename T>
+	bool operator<=(const intrusive_slist<T>& a, const intrusive_slist<T>& b)
+	{
+		return !(b < a);
+	}
+
+	template <typename T>
+	bool operator>=(const intrusive_slist<T>& a, const intrusive_slist<T>& b)
+	{
+		return !(a < b);
+	}
+
+	template <typename T>
+	void swap(intrusive_slist<T>& a, intrusive_slist<T>& b)
+	{
+		a.swap(b);
+	}
+
+} // namespace eastl
+
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/bonus/list_map.h b/libkram/eastl/include/EASTL/bonus/list_map.h
new file mode 100644
index 00000000..8a080d6d
--- /dev/null
+++ b/libkram/eastl/include/EASTL/bonus/list_map.h
@@ -0,0 +1,932 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_LIST_MAP_H
+#define EASTL_LIST_MAP_H
+
+
+#include <EASTL/map.h>
+
+
+namespace eastl
+{
+
+	/// EASTL_MAP_DEFAULT_NAME
+	///
+	/// Defines a default container name in the absence of a user-provided name.
+	///
+	#ifndef EASTL_LIST_MAP_DEFAULT_NAME
+		#define EASTL_LIST_MAP_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " list_map" // Unless the user overrides something, this is "EASTL list_map".
+	#endif
+
+	/// EASTL_MAP_DEFAULT_ALLOCATOR
+	///
+	#ifndef EASTL_LIST_MAP_DEFAULT_ALLOCATOR
+		#define EASTL_LIST_MAP_DEFAULT_ALLOCATOR allocator_type(EASTL_LIST_MAP_DEFAULT_NAME)
+	#endif
+
+
+	/// list_map_data_base
+	///
+	/// We define a list_map_data_base separately from list_map_data (below), because it 
+	/// allows us to have non-templated operations, and it makes it so that the 
+	/// list_map anchor node doesn't carry a T with it, which would waste space and 
+	/// possibly lead to surprising the user due to extra Ts existing that the user 
+	/// didn't explicitly create. The downside to all of this is that it makes debug 
+	/// viewing of an list_map harder, given that the node pointers are of type 
+	/// list_map_data_base and not list_map_data.
+	///
+	struct list_map_data_base
+	{
+		list_map_data_base* mpNext;
+		list_map_data_base* mpPrev;
+	};
+
+
+	/// list_map_data
+	///
+	template <typename Value>
+	struct list_map_data : public list_map_data_base
+	{
+		typedef Value       value_type;
+
+		list_map_data(const value_type& value);
+
+		value_type mValue; // This is a pair of key/value.
+	};
+
+
+	/// list_map_iterator
+	///
+	template <typename T, typename Pointer, typename Reference>
+	struct list_map_iterator
+	{
+		typedef list_map_iterator<T, Pointer, Reference>    this_type;
+		typedef list_map_iterator<T, T*, T&>                iterator;
+		typedef list_map_iterator<T, const T*, const T&>    const_iterator;
+		typedef eastl_size_t                                size_type;     // See config.h for the definition of eastl_size_t, which defaults to size_t.
+		typedef ptrdiff_t                                   difference_type;
+		typedef T                                           value_type;
+		typedef list_map_data_base                          base_node_type;
+		typedef list_map_data<T>                            node_type;
+		typedef Pointer                                     pointer;
+		typedef Reference                                   reference;
+		typedef EASTL_ITC_NS::bidirectional_iterator_tag    iterator_category;
+
+	public:
+		node_type* mpNode;
+
+	public:
+		list_map_iterator();
+		list_map_iterator(const base_node_type* pNode);
+		list_map_iterator(const iterator& x);
+
+		reference operator*() const;
+		pointer   operator->() const;
+
+		this_type& operator++();
+		this_type  operator++(int);
+
+		this_type& operator--();
+		this_type  operator--(int);
+
+	}; // list_map_iterator
+
+
+	/// use_value_first
+	///
+	/// operator()(x) simply returns x.mValue.first. Used in list_map.
+	/// This is similar to eastl::use_first, however it assumes that the input type is an object
+	/// whose mValue is an eastl::pair, and the first value in the pair is the desired return.
+	///
+	template <typename Object>
+	struct use_value_first
+	{
+		typedef Object argument_type;
+		typedef typename Object::value_type::first_type result_type;
+
+		const result_type& operator()(const Object& x) const
+			{ return x.mValue.first; }
+	};
+
+
+	/// list_map
+	///
+	/// Implements a map like container, which also provides functionality similar to a list.
+	/// 
+	/// Note: Like a map, keys must still be unique.  As such, push_back() and push_front() operations
+	///       return a bool indicating success, or failure if the entry's key is already in use.
+	///
+	/// list_map is designed to improve performance for situations commonly implemented as:
+	///     A map, which must be iterated over to find the oldest entry, or purge expired entries.
+	///     A list, which must be iterated over to remove a player's record when they sign off.
+	/// 
+	/// list_map requires a little more memory per node than either a list or map alone,
+	/// and many of list_map's functions have a higher operational cost (CPU time) than their
+	/// counterparts in list and map.  However, as the node count increases, list_map quickly outperforms
+	/// either a list or a map when find [by-index] and front/back type operations are required.
+	/// 
+	/// In essence, list_map avoids O(n) iterations at the expense of additional costs to quick (O(1) and O(log n) operations:
+	///     push_front(), push_back(), pop_front() and pop_back() have O(log n) operation time, similar to map::insert(), rather than O(1) time like a list,
+	///     however, front() and back() maintain O(1) operation time.
+	/// 
+	/// As a canonical example, consider a large backlog of player group invites, which are removed when either:
+	///     The invitation times out - in main loop:  while( !listMap.empty() && listMap.front().IsExpired() ) { listMap.pop_front(); }
+	///     The player rejects the outstanding invitation - on rejection:  iter = listMap.find(playerId);  if (iter != listMap.end()) { listMap.erase(iter); }
+	/// 
+	/// For a similar example, consider a high volume pending request container which must:
+	///     Time out old requests (similar to invites timing out above)
+	///     Remove requests once they've been handled (similar to rejecting invites above)
+	/// 
+	/// For such usage patterns, the performance benefits of list_map become dramatic with
+	/// common O(n) operations once the node count rises to hundreds or more.
+	/// 
+	/// When high performance is a priority, Containers with thousands of nodes or more
+	/// can quickly result in unacceptable performance when executing even infrequenty O(n) operations.
+	/// 
+	/// In order to maintain strong performance, avoid iterating over list_map whenever possible.
+	/// 
+	///////////////////////////////////////////////////////////////////////
+	/// find_as
+	/// In order to support the ability to have a tree of strings but
+	/// be able to do efficiently lookups via char pointers (i.e. so they
+	/// aren't converted to string objects), we provide the find_as
+	/// function. This function allows you to do a find with a key of a
+	/// type other than the tree's key type. See the find_as function
+	/// for more documentation on this.
+	///
+	///////////////////////////////////////////////////////////////////////
+	/// Pool allocation
+	/// If you want to make a custom memory pool for a list_map container, your pool 
+	/// needs to contain items of type list_map::node_type. So if you have a memory
+	/// pool that has a constructor that takes the size of pool items and the
+	/// count of pool items, you would do this (assuming that MemoryPool implements
+	/// the Allocator interface):
+	///     typedef list_map<Widget, int, less<Widget>, MemoryPool> WidgetMap;  // Delare your WidgetMap type.
+	///     MemoryPool myPool(sizeof(WidgetMap::node_type), 100);               // Make a pool of 100 Widget nodes.
+	///     WidgetMap myMap(&myPool);                                           // Create a map that uses the pool.
+	///
+	template <typename Key, typename T, typename Compare = eastl::less<Key>, typename Allocator = EASTLAllocatorType>
+	class list_map
+		: protected rbtree<Key, eastl::list_map_data<eastl::pair<const Key, T> >, Compare, Allocator, eastl::use_value_first<eastl::list_map_data<eastl::pair<const Key, T> > >, true, true>
+	{
+	public:
+		typedef rbtree<Key, eastl::list_map_data<eastl::pair<const Key, T> >, Compare, Allocator,
+					   eastl::use_value_first<eastl::list_map_data<eastl::pair<const Key, T> > >, true, true>   base_type;
+		typedef list_map<Key, T, Compare, Allocator>                                                            this_type;
+		typedef typename base_type::size_type                                                                   size_type;
+		typedef typename base_type::key_type                                                                    key_type;
+		typedef T                                                                                               mapped_type;
+		typedef typename eastl::pair<const Key, T>                                                              value_type;          // This is intentionally different from base_type::value_type
+		typedef value_type&                                                                                     reference;
+		typedef const value_type&                                                                               const_reference;
+		typedef typename base_type::node_type                                                                   node_type;           // Despite the internal and external values being different, we're keeping the node type the same as the base
+																																	 // in order to allow for pool allocation.  See EASTL/map.h for more information.
+		typedef typename eastl::list_map_iterator<value_type, value_type*, value_type&>                         iterator;            // This is intentionally different from base_type::iterator
+		typedef typename eastl::list_map_iterator<value_type, const value_type*, const value_type&>             const_iterator;      // This is intentionally different from base_type::const_iterator
+		typedef eastl::reverse_iterator<iterator>                                                               reverse_iterator;
+		typedef eastl::reverse_iterator<const_iterator>                                                         const_reverse_iterator;
+		typedef typename base_type::allocator_type                                                              allocator_type;
+		typedef typename eastl::pair<iterator, bool>                                                            insert_return_type;  // This is intentionally removed, as list_map doesn't support insert() functions, in favor of list like push_back and push_front
+		typedef typename eastl::use_first<value_type>                                                           extract_key;         // This is intentionally different from base_type::extract_key
+
+		using base_type::get_allocator;
+		using base_type::set_allocator;
+		using base_type::key_comp;
+		using base_type::empty;
+		using base_type::size;
+
+	protected:
+		typedef typename eastl::list_map_data<eastl::pair<const Key, T> >                                       internal_value_type;
+
+	protected:
+		// internal base node, acting as the sentinel for list like behaviors
+		list_map_data_base mNode;
+
+	public:
+		list_map(const allocator_type& allocator = EASTL_LIST_MAP_DEFAULT_ALLOCATOR);
+		list_map(const Compare& compare, const allocator_type& allocator = EASTL_MAP_DEFAULT_ALLOCATOR);
+
+		// To do: Implement the following:
+
+		//list_map(const this_type& x);
+		//list_map(this_type&& x);
+		//list_map(this_type&& x, const allocator_type& allocator);
+		//list_map(std::initializer_list<mapped_type> ilist, const Compare& compare = Compare(), const allocator_type& allocator = EASTL_LIST_MAP_DEFAULT_ALLOCATOR);
+
+		//template <typename Iterator>
+		//list_map(Iterator itBegin, Iterator itEnd);
+
+		//this_type& operator=(const this_type& x);
+		//this_type& operator=(std::initializer_list<mapped_type> ilist);
+		//this_type& operator=(this_type&& x);
+
+		//void swap(this_type& x);
+
+	public:
+		// iterators
+		iterator       begin() EA_NOEXCEPT;
+		const_iterator begin() const EA_NOEXCEPT;
+		const_iterator cbegin() const EA_NOEXCEPT;
+
+		iterator       end() EA_NOEXCEPT;
+		const_iterator end() const EA_NOEXCEPT;
+		const_iterator cend() const EA_NOEXCEPT;
+
+		reverse_iterator       rbegin() EA_NOEXCEPT;
+		const_reverse_iterator rbegin() const EA_NOEXCEPT;
+		const_reverse_iterator crbegin() const EA_NOEXCEPT;
+
+		reverse_iterator       rend() EA_NOEXCEPT;
+		const_reverse_iterator rend() const EA_NOEXCEPT;
+		const_reverse_iterator crend() const EA_NOEXCEPT;
+
+	public:
+		// List like methods
+		reference       front();
+		const_reference front() const;
+
+		reference       back();
+		const_reference back() const;
+
+		// push_front and push_back which takes in a key/value pair
+		bool            push_front(const value_type& value);
+		bool            push_back(const value_type& value);
+
+		// push_front and push_back which take key and value separately, for convenience
+		bool            push_front(const key_type& key, const mapped_type& value);
+		bool            push_back(const key_type& key, const mapped_type& value);
+
+		void            pop_front();
+		void            pop_back();
+
+	public:
+		// Map like methods
+		iterator        find(const key_type& key);
+		const_iterator  find(const key_type& key) const;
+
+		template <typename U, typename Compare2>
+		iterator       find_as(const U& u, Compare2 compare2);
+		template <typename U, typename Compare2>
+		const_iterator find_as(const U& u, Compare2 compare2) const;
+
+		size_type count(const key_type& key) const;
+		size_type erase(const key_type& key);
+
+	public:
+		// Shared methods which are common to list and map
+		iterator erase(const_iterator position);
+		reverse_iterator erase(const_reverse_iterator position);
+
+		void clear();
+		void reset_lose_memory();
+
+		bool validate() const;
+		int validate_iterator(const_iterator i) const;
+
+	public:
+		// list like functionality which is in consideration for implementation:
+		// iterator insert(const_iterator position, const value_type& value);
+		// void remove(const mapped_type& x); 
+		 
+	public:
+		// list like functionality which may be implemented, but is discouraged from implementation:
+		// due to the liklihood that they would require O(n) time to execute.
+		// template <typename Predicate>
+		// void remove_if(Predicate);
+		// void reverse();
+		// void sort();
+		// template<typename Compare>
+		// void sort(Compare compare);          
+
+	public:
+		// map like functionality which list_map does not support, due to abmiguity with list like functionality:
+		#if !defined(EA_COMPILER_NO_DELETED_FUNCTIONS)
+		template <typename InputIterator>
+			list_map(InputIterator first, InputIterator last, const Compare& compare, const allocator_type& allocator = EASTL_RBTREE_DEFAULT_ALLOCATOR) = delete;
+		 
+			insert_return_type insert(const value_type& value) = delete;
+			iterator insert(const_iterator position, const value_type& value) = delete;
+
+			template <typename InputIterator>
+			void insert(InputIterator first, InputIterator last) = delete;
+				  
+			insert_return_type insert(const key_type& key) = delete;
+		 
+			iterator erase(const_iterator first, const_iterator last) = delete;
+			reverse_iterator erase(reverse_iterator first, reverse_iterator last) = delete;
+		 
+			void erase(const key_type* first, const key_type* last) = delete;
+		 
+			iterator       lower_bound(const key_type& key) = delete;
+			const_iterator lower_bound(const key_type& key) const = delete;
+
+			iterator       upper_bound(const key_type& key) = delete;
+			const_iterator upper_bound(const key_type& key) const = delete;
+		 
+			eastl::pair<iterator, iterator>             equal_range(const key_type& key) = delete;
+			eastl::pair<const_iterator, const_iterator> equal_range(const key_type& key) const = delete;
+
+			mapped_type& operator[](const key_type& key) = delete; // Of map, multimap, set, and multimap, only map has operator[].
+		#endif
+
+	public:
+		// list like functionality which list_map does not support, due to ambiguity with map like functionality:
+		#if 0
+			reference push_front() = delete;
+			void*     push_front_uninitialized() = delete;
+
+			reference push_back() = delete;
+			void*     push_back_uninitialized() = delete;
+
+			iterator insert(const_iterator position) = delete;
+		 
+			void insert(const_iterator position, size_type n, const value_type& value) = delete;
+
+		template <typename InputIterator>
+			void insert(const_iterator position, InputIterator first, InputIterator last) = delete;
+		 
+			iterator erase(const_iterator first, const_iterator last) = delete;
+			reverse_iterator erase(const_reverse_iterator first, const_reverse_iterator last) = delete;
+		 
+			void splice(const_iterator position, this_type& x) = delete
+			void splice(const_iterator position, this_type& x, const_iterator i) = delete;
+			void splice(const_iterator position, this_type& x, const_iterator first, const_iterator last) = delete;
+
+			void merge(this_type& x) = delete;
+
+		template <typename Compare>
+			void merge(this_type& x, Compare compare) = delete;
+		 
+			void unique() = delete;  // Uniqueness is enforced by map functionality
+
+		template <typename BinaryPredicate>
+			void unique(BinaryPredicate) = delete;  // Uniqueness is enforced by map functionality
+		#endif
+
+	}; // list_map
+
+
+	///////////////////////////////////////////////////////////////////////
+	// list_map_data
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename Value>
+	inline list_map_data<Value>::list_map_data(const Value& value)
+	  : mValue(value)
+	{
+		mpNext = NULL; // GCC 4.8 is generating warnings about referencing these values in list_map::push_front unless we 
+		mpPrev = NULL; // initialize them here. The compiler seems to be mistaken, as our code isn't actually using them unintialized.
+	}
+
+
+	///////////////////////////////////////////////////////////////////////
+	// list_map_iterator
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T, typename Pointer, typename Reference>
+	inline list_map_iterator<T, Pointer, Reference>::list_map_iterator()
+		: mpNode(NULL)
+	{
+		// Empty
+	}
+
+
+	template <typename T, typename Pointer, typename Reference>
+	inline list_map_iterator<T, Pointer, Reference>::list_map_iterator(const base_node_type* pNode)
+		: mpNode(static_cast<node_type*>(const_cast<base_node_type*>(pNode)))
+	{
+		// Empty
+	}
+
+
+	template <typename T, typename Pointer, typename Reference>
+	inline list_map_iterator<T, Pointer, Reference>::list_map_iterator(const iterator& x)
+		: mpNode(const_cast<node_type*>(x.mpNode))
+	{
+		// Empty
+	} 
+
+
+	template <typename T, typename Pointer, typename Reference>
+	inline typename list_map_iterator<T, Pointer, Reference>::reference
+	list_map_iterator<T, Pointer, Reference>::operator*() const
+	{
+		return mpNode->mValue;
+	}
+
+
+	template <typename T, typename Pointer, typename Reference>
+	inline typename list_map_iterator<T, Pointer, Reference>::pointer
+	list_map_iterator<T, Pointer, Reference>::operator->() const
+	{
+		return &mpNode->mValue;
+	}
+
+
+	template <typename T, typename Pointer, typename Reference>
+	inline typename list_map_iterator<T, Pointer, Reference>::this_type&
+	list_map_iterator<T, Pointer, Reference>::operator++()
+	{
+		mpNode = static_cast<node_type*>(mpNode->mpNext);
+		return *this;
+	}
+
+
+	template <typename T, typename Pointer, typename Reference>
+	inline typename list_map_iterator<T, Pointer, Reference>::this_type
+	list_map_iterator<T, Pointer, Reference>::operator++(int)
+	{
+		this_type temp(*this);
+		mpNode = static_cast<node_type*>(mpNode->mpNext);
+		return temp;
+	}
+
+
+	template <typename T, typename Pointer, typename Reference>
+	inline typename list_map_iterator<T, Pointer, Reference>::this_type&
+	list_map_iterator<T, Pointer, Reference>::operator--()
+	{
+		mpNode = static_cast<node_type*>(mpNode->mpPrev);
+		return *this;
+	}
+
+
+	template <typename T, typename Pointer, typename Reference>
+	inline typename list_map_iterator<T, Pointer, Reference>::this_type 
+	list_map_iterator<T, Pointer, Reference>::operator--(int)
+	{
+		this_type temp(*this);
+		mpNode = static_cast<node_type*>(mpNode->mpPrev);
+		return temp;
+	}
+
+
+	// We provide additional template paremeters here to support comparisons between const and non-const iterators.
+	// See C++ defect report #179, or EASTL/list.h for more information.
+	template <typename T, typename PointerA, typename ReferenceA, typename PointerB, typename ReferenceB>
+	inline bool operator==(const list_map_iterator<T, PointerA, ReferenceA>& a, 
+						   const list_map_iterator<T, PointerB, ReferenceB>& b)
+	{
+		return a.mpNode == b.mpNode;
+	}
+
+
+	template <typename T, typename PointerA, typename ReferenceA, typename PointerB, typename ReferenceB>
+	inline bool operator!=(const list_map_iterator<T, PointerA, ReferenceA>& a, 
+						   const list_map_iterator<T, PointerB, ReferenceB>& b)
+	{
+		return a.mpNode != b.mpNode;
+	}
+
+
+	// We provide a version of operator!= for the case where the iterators are of the 
+	// same type. This helps prevent ambiguity errors in the presence of rel_ops.
+	template <typename T, typename Pointer, typename Reference>
+	inline bool operator!=(const list_map_iterator<T, Pointer, Reference>& a, 
+						   const list_map_iterator<T, Pointer, Reference>& b)
+	{
+		return a.mpNode != b.mpNode;
+	}
+
+
+	///////////////////////////////////////////////////////////////////////
+	// list_map
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline list_map<Key, T, Compare, Allocator>::list_map(const allocator_type& allocator)
+		: base_type(allocator)
+	{
+		mNode.mpNext = &mNode;
+		mNode.mpPrev = &mNode;
+	}
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline list_map<Key, T, Compare, Allocator>::list_map(const Compare& compare, const allocator_type& allocator)
+		: base_type(compare, allocator)
+	{
+		mNode.mpNext = &mNode;
+		mNode.mpPrev = &mNode;
+	}
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline typename list_map<Key, T, Compare, Allocator>::iterator 
+	list_map<Key, T, Compare, Allocator>::begin() EA_NOEXCEPT
+	{
+		return iterator(mNode.mpNext);
+	}
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline typename list_map<Key, T, Compare, Allocator>::const_iterator 
+	list_map<Key, T, Compare, Allocator>::begin() const EA_NOEXCEPT
+	{
+		return const_iterator(mNode.mpNext);
+	}
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline typename list_map<Key, T, Compare, Allocator>::const_iterator 
+	list_map<Key, T, Compare, Allocator>::cbegin() const EA_NOEXCEPT
+	{
+		return const_iterator(mNode.mpNext);
+	}
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline typename list_map<Key, T, Compare, Allocator>::iterator 
+	list_map<Key, T, Compare, Allocator>::end() EA_NOEXCEPT
+	{
+		return iterator(&mNode);
+	}
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline typename list_map<Key, T, Compare, Allocator>::const_iterator 
+	list_map<Key, T, Compare, Allocator>::end() const EA_NOEXCEPT
+	{
+		return const_iterator(&mNode);
+	}
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline typename list_map<Key, T, Compare, Allocator>::const_iterator 
+	list_map<Key, T, Compare, Allocator>::cend() const EA_NOEXCEPT
+	{
+		return const_iterator(&mNode);
+	}
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline typename list_map<Key, T, Compare, Allocator>::reverse_iterator 
+	list_map<Key, T, Compare, Allocator>::rbegin() EA_NOEXCEPT
+	{
+		return reverse_iterator(&mNode);
+	}
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline typename list_map<Key, T, Compare, Allocator>::const_reverse_iterator 
+	list_map<Key, T, Compare, Allocator>::rbegin() const EA_NOEXCEPT
+	{
+		return const_reverse_iterator(&mNode);
+	}
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline typename list_map<Key, T, Compare, Allocator>::const_reverse_iterator 
+	list_map<Key, T, Compare, Allocator>::crbegin() const EA_NOEXCEPT
+	{
+		return const_reverse_iterator(&mNode);
+	}
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline typename list_map<Key, T, Compare, Allocator>::reverse_iterator 
+	list_map<Key, T, Compare, Allocator>::rend() EA_NOEXCEPT
+	{
+		return reverse_iterator(mNode.mpNext);
+	}
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline typename list_map<Key, T, Compare, Allocator>::const_reverse_iterator 
+	list_map<Key, T, Compare, Allocator>::rend() const EA_NOEXCEPT
+	{
+		return const_reverse_iterator(mNode.mpNext);
+	}
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline typename list_map<Key, T, Compare, Allocator>::const_reverse_iterator 
+	list_map<Key, T, Compare, Allocator>::crend() const EA_NOEXCEPT
+	{
+		return const_reverse_iterator(mNode.mpNext);
+	}
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline typename list_map<Key, T, Compare, Allocator>::reference
+	list_map<Key, T, Compare, Allocator>::front()
+	{
+		#if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED
+			if (EASTL_UNLIKELY(static_cast<internal_value_type*>(mNode.mpNext) == &mNode))
+				EASTL_FAIL_MSG("list_map::front -- empty container");
+		#else
+			// We allow the user to reference an empty container.
+		#endif
+
+		return static_cast<internal_value_type*>(mNode.mpNext)->mValue;
+	}
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline typename list_map<Key, T, Compare, Allocator>::const_reference
+	list_map<Key, T, Compare, Allocator>::front() const
+	{
+		#if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED
+			if (EASTL_UNLIKELY(static_cast<internal_value_type*>(mNode.mpNext) == &mNode))
+				EASTL_FAIL_MSG("list_map::front -- empty container");
+		#else
+			// We allow the user to reference an empty container.
+		#endif
+
+		return static_cast<internal_value_type*>(mNode.mpNext)->mValue;
+	}
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline typename list_map<Key, T, Compare, Allocator>::reference
+	list_map<Key, T, Compare, Allocator>::back()
+	{
+		#if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED
+			if (EASTL_UNLIKELY(static_cast<internal_value_type*>(mNode.mpNext) == &mNode))
+				EASTL_FAIL_MSG("list_map::back -- empty container");
+		#else
+			// We allow the user to reference an empty container.
+		#endif
+
+		return static_cast<internal_value_type*>(mNode.mpPrev)->mValue;
+	}
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline typename list_map<Key, T, Compare, Allocator>::const_reference
+	list_map<Key, T, Compare, Allocator>::back() const
+	{
+		#if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED
+			if (EASTL_UNLIKELY(static_cast<internal_value_type*>(mNode.mpNext) == &mNode))
+				EASTL_FAIL_MSG("list_map::back -- empty container");
+		#else
+			// We allow the user to reference an empty container.
+		#endif
+
+		return static_cast<internal_value_type*>(mNode.mpPrev)->mValue;
+	}
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	bool list_map<Key, T, Compare, Allocator>::push_front(const value_type& value)
+	{
+		internal_value_type tempValue(value);
+		typename base_type::insert_return_type baseReturn = base_type::insert(tempValue);
+
+		// Did the insert succeed?
+		if (baseReturn.second)
+		{
+			internal_value_type* pNode = &(*baseReturn.first);
+
+			pNode->mpNext = mNode.mpNext;
+			pNode->mpPrev = &mNode;
+
+			mNode.mpNext->mpPrev = pNode;
+			mNode.mpNext = pNode;
+
+			return true;
+		}
+		else
+		{
+			return false;
+		}
+	}
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	bool list_map<Key, T, Compare, Allocator>::push_back(const value_type& value)
+	{
+		internal_value_type tempValue(value);
+		typename base_type::insert_return_type baseReturn = base_type::insert(tempValue);
+
+		// Did the insert succeed?
+		if (baseReturn.second)
+		{
+			internal_value_type* pNode = &(*baseReturn.first);
+
+			pNode->mpPrev = mNode.mpPrev;
+			pNode->mpNext = &mNode;
+
+			mNode.mpPrev->mpNext = pNode;
+			mNode.mpPrev = pNode;
+
+			return true;
+		}
+		else
+		{
+			return false;
+		}
+	}
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	bool list_map<Key, T, Compare, Allocator>::push_front(const key_type& key, const mapped_type& value)
+	{
+		return push_front(eastl::make_pair(key, value));
+	}
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	bool list_map<Key, T, Compare, Allocator>::push_back(const key_type& key, const mapped_type& value)
+	{
+		return push_back(eastl::make_pair(key, value));
+	}
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	void list_map<Key, T, Compare, Allocator>::pop_front()
+	{
+		#if EASTL_ASSERT_ENABLED
+			if (EASTL_UNLIKELY(empty()))
+				EASTL_FAIL_MSG("list_map::pop_front -- empty container");
+		#endif
+
+		erase(static_cast<internal_value_type*>(mNode.mpNext)->mValue.first);
+	}
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	void list_map<Key, T, Compare, Allocator>::pop_back()
+	{
+		#if EASTL_ASSERT_ENABLED
+			if (EASTL_UNLIKELY(empty()))
+				EASTL_FAIL_MSG("list_map::pop_back -- empty container");
+		#endif
+
+		erase(static_cast<internal_value_type*>(mNode.mpPrev)->mValue.first);
+	}
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline typename list_map<Key, T, Compare, Allocator>::iterator 
+	list_map<Key, T, Compare, Allocator>::find(const key_type& key)
+	{
+		typename base_type::iterator baseIter = base_type::find(key);
+		if (baseIter != base_type::end())
+		{
+			return iterator(&(*baseIter));
+		}
+		else
+		{
+			return end();
+		}
+	}
+	
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline typename list_map<Key, T, Compare, Allocator>::const_iterator
+	list_map<Key, T, Compare, Allocator>::find(const key_type& key) const
+	{
+		typename base_type::const_iterator baseIter = base_type::find(key);
+		if (baseIter != base_type::end())
+		{
+			return const_iterator(&(*baseIter));
+		}
+		else
+		{
+			return end();
+		}
+	}
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	template <typename U, typename Compare2>
+	inline typename list_map<Key, T, Compare, Allocator>::iterator
+	list_map<Key, T, Compare, Allocator>::find_as(const U& u, Compare2 compare2)
+	{
+		typename base_type::iterator baseIter = base_type::find_as(u, compare2);
+		if (baseIter != base_type::end())
+		{
+			return iterator(&(*baseIter));
+		}
+		else
+		{
+			return end();
+		}
+	}
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	template <typename U, typename Compare2>
+	inline typename list_map<Key, T, Compare, Allocator>::const_iterator
+	list_map<Key, T, Compare, Allocator>::find_as(const U& u, Compare2 compare2) const
+	{
+		typename base_type::const_iterator baseIter = base_type::find_as(u, compare2);
+		if (baseIter != base_type::end())
+		{
+			return const_iterator(&(*baseIter));
+		}
+		else
+		{
+			return end();
+		}
+	}
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline typename list_map<Key, T, Compare, Allocator>::size_type
+	list_map<Key, T, Compare, Allocator>::count(const key_type& key) const
+	{
+		const typename base_type::const_iterator it = base_type::find(key);
+		return (it != base_type::end()) ? 1 : 0;
+	}
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline typename list_map<Key, T, Compare, Allocator>::size_type
+	list_map<Key, T, Compare, Allocator>::erase(const key_type& key)
+	{
+		typename base_type::iterator baseIter = base_type::find(key);
+		if (baseIter != base_type::end())
+		{
+			internal_value_type* node = &(*baseIter);
+
+			node->mpNext->mpPrev = node->mpPrev;
+			node->mpPrev->mpNext = node->mpNext;
+
+			base_type::erase(baseIter);
+
+			return 1;
+		}
+		return 0;
+	}
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline typename list_map<Key, T, Compare, Allocator>::iterator
+	list_map<Key, T, Compare, Allocator>::erase(const_iterator position)
+	{
+		iterator posIter(position.mpNode); // Convert from const.
+		iterator eraseIter(posIter++);
+		erase(eraseIter->first);
+		return posIter;
+	}
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline typename list_map<Key, T, Compare, Allocator>::reverse_iterator
+	list_map<Key, T, Compare, Allocator>::erase(const_reverse_iterator position)
+	{
+		return reverse_iterator(erase((++position).base()));
+	}
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	void list_map<Key, T, Compare, Allocator>::clear()
+	{
+		base_type::clear();
+
+		mNode.mpNext = &mNode;
+		mNode.mpPrev = &mNode;
+	}
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	void list_map<Key, T, Compare, Allocator>::reset_lose_memory()
+	{
+		base_type::reset_lose_memory();
+
+		mNode.mpNext = &mNode;
+		mNode.mpPrev = &mNode;
+	}
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	bool list_map<Key, T, Compare, Allocator>::validate() const
+	{
+		if (!base_type::validate())
+		{
+			return false;
+		}
+
+		size_type nodeCount(0);
+		list_map_data_base* node = mNode.mpNext;
+		while (node != &mNode)
+		{
+			internal_value_type* data = static_cast<internal_value_type*>(node);
+			if (base_type::find(data->mValue.first) == base_type::end())
+			{
+				return false;
+			}
+			node = node->mpNext;
+			++nodeCount;
+		}
+		if (nodeCount != size())
+		{
+			return false;
+		}
+		nodeCount = 0;
+		node = mNode.mpPrev;
+		while (node != &mNode)
+		{
+			internal_value_type* data = static_cast<internal_value_type*>(node);
+			if (base_type::find(data->mValue.first) == base_type::end())
+			{
+				return false;
+			}
+			node = node->mpPrev;
+			++nodeCount;
+		}
+		if (nodeCount != size())
+		{
+			return false;
+		}
+
+		return true;
+	}
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	int list_map<Key, T, Compare, Allocator>::validate_iterator(const_iterator iter) const
+	{
+		for (const_iterator temp = begin(), tempEnd = end(); temp != tempEnd; ++temp)
+		{
+			if (temp == iter)
+			{
+				return (isf_valid | isf_current | isf_can_dereference);
+			}
+		}
+
+		if (iter == end())
+			return (isf_valid | isf_current); 
+
+		return isf_none;
+	}
+
+
+} // namespace eastl
+
+
+#endif // Header include guard
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/bonus/lru_cache.h b/libkram/eastl/include/EASTL/bonus/lru_cache.h
new file mode 100644
index 00000000..46d053dc
--- /dev/null
+++ b/libkram/eastl/include/EASTL/bonus/lru_cache.h
@@ -0,0 +1,424 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// lru_cache is a container that simplifies caching of objects in a map.
+// Basically, you give the container a key, like a string, and the data you want.
+// The container provides callback mechanisms to generate data if it's missing
+// as well as delete data when it's purged from the cache.  This container
+// uses a least recently used method: whatever the oldest item is will be 
+// replaced with a new entry.
+//
+// Algorithmically, the container is a combination of a map and a list.
+// The list stores the age of the entries by moving the entry to the head
+// of the list on each access, either by a call to get() or to touch().
+// The map is just the map as one would expect.
+//
+// This is useful for caching off data that is expensive to generate, 
+// for example text to speech wave files that are dynamically generated,
+// but that will need to be reused, as is the case in narration of menu
+// entries as a user scrolls through the entries.
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_LRUCACHE_H
+#define EASTL_LRUCACHE_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+#pragma once
+#endif
+
+#include <EASTL/list.h>
+#include <EASTL/unordered_map.h>
+#include <EASTL/optional.h>
+
+namespace eastl
+{
+	/// EASTL_LRUCACHE_DEFAULT_NAME
+	///
+	/// Defines a default container name in the absence of a user-provided name.
+	///
+	#ifndef EASTL_LRUCACHE_DEFAULT_NAME
+	#define EASTL_LRUCACHE_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " lru_cache" // Unless the user overrides something, this is "EASTL lru_cache".
+	#endif
+
+
+	/// EASTL_LRUCACHE_DEFAULT_ALLOCATOR
+	///
+	#ifndef EASTL_LRUCACHE_DEFAULT_ALLOCATOR
+	#define EASTL_LRUCACHE_DEFAULT_ALLOCATOR allocator_type(EASTL_LRUCACHE_DEFAULT_NAME)
+	#endif
+
+	/// lru_cache
+	///
+	/// Implements a caching map based off of a key and data.
+	/// LRUList parameter is any container that guarantees the validity of its iterator even after a modification (e.g. list)
+	/// LRUMap is any mapping container that can map a key to some data.  By default, we use unordered_set, but it might be better
+	/// to use hash_map or some other structure depending on your key/data combination.  For example, you may want to swap the 
+	/// map backing if using strings as keys or if the data objects are small.  In any case, unordered_set is a good default and should
+	/// work well enough since the purpose of this class is to cache results of expensive, order of milliseconds, operations
+	///
+	/// Algorithmic Performance (default data structures):
+	///		touch() -> O(1)
+	///		insert() / update(), get() / operator[] -> equivalent to unordered_set (O(1) on average, O(n) worst)
+	///		size() -> O(1)
+	///
+	/// All accesses to a given key (insert, update, get) will push that key to most recently used.
+	/// If the data objects are shared between threads, it would be best to use a smartptr to manage the lifetime of the data.
+	/// as it could be removed from the cache while in use by another thread.
+	template <typename Key,
+	          typename Value,
+	          typename Allocator = EASTLAllocatorType,
+	          typename list_type = eastl::list<Key, Allocator>,
+	          typename map_type = eastl::unordered_map<Key,
+	                                                   eastl::pair<Value, typename list_type::iterator>,
+	                                                   eastl::hash<Key>,
+	                                                   eastl::equal_to<Key>,
+	                                                   Allocator>>
+	class lru_cache
+	{
+	public:
+		using key_type = Key;
+		using value_type = Value;
+		using allocator_type = Allocator;
+		using size_type = eastl_size_t;
+		using list_iterator = typename list_type::iterator;
+		using map_iterator = typename map_type::iterator;
+		using data_container_type = eastl::pair<value_type, list_iterator>;
+		using iterator = typename map_type::iterator;
+		using const_iterator = typename map_type::const_iterator;
+		using this_type = lru_cache<key_type, value_type, Allocator, list_type, map_type>;
+		using create_callback_type = eastl::function<value_type(key_type)>;
+		using delete_callback_type = eastl::function<void(const value_type &)>;
+
+		/// lru_cache constructor
+		///
+		/// Creates a Key / Value map that only stores size Value objects until it deletes them.
+		/// For complex objects or operations, the creator and deletor callbacks can be used.
+		/// This works just like a regular map object: on access, the Value will be created if it doesn't exist, returned otherwise.
+		explicit lru_cache(size_type size,
+		                   const allocator_type& allocator = EASTL_LRUCACHE_DEFAULT_ALLOCATOR,
+		                   create_callback_type creator = nullptr,
+		                   delete_callback_type deletor = nullptr)
+		    : m_list(allocator)
+		    , m_map(allocator)
+		    , m_capacity(size)
+		    , m_create_callback(creator)
+		    , m_delete_callback(deletor)
+		{
+		}
+
+		/// lru_cache destructor
+		///
+		/// Iterates across every entry in the map and calls the deletor before calling the standard destructors
+		~lru_cache()
+		{
+			// Destruct everything we have cached
+			for (auto& iter : m_map)
+			{
+				if (m_delete_callback)
+					m_delete_callback(iter.second.first);
+			}
+		}
+
+		lru_cache(std::initializer_list<eastl::pair<Key, Value>> il)
+			: lru_cache(il.size())
+		{
+			for(auto& p : il)
+				insert_or_assign(p.first, p.second);
+		}
+
+		// TODO(rparolin):  Why do we prevent copies? And what about moves?
+		lru_cache(const this_type&) = delete;
+		this_type &operator=(const this_type&) = delete;
+
+		/// insert
+		/// 
+		/// insert key k with value v.
+		/// If key already exists, no change is made and the return value is false.
+		/// If the key doesn't exist, the data is added to the map and the return value is true.
+		bool insert(const key_type& k, const value_type& v)
+		{
+			if (m_map.find(k) == m_map.end())
+			{
+				make_space();
+
+				m_list.push_front(k);
+				m_map[k] = data_container_type(v, m_list.begin());
+
+				return true;
+			}
+			else
+			{
+				return false;
+			}
+		}
+
+		/// emplace
+		/// 
+		/// Places a new object in place k created with args
+		/// If the key already exists, it is replaced.
+		template <typename... Args>
+		void emplace(const key_type& k, Args&&... args)
+		{
+			make_space();
+
+			m_list.push_front(k);
+			m_map.emplace(k, data_container_type(eastl::forward<Args>(args)..., m_list.begin()));
+		}
+
+		/// insert_or_assign
+		///
+		/// Same as add, but replaces the data at key k, if it exists, with the new entry v
+		/// Note that the deletor for the old v will be called before it's replaced with the new value of v
+		void insert_or_assign(const key_type& k, const value_type& v)
+		{
+			auto iter = m_map.find(k);
+
+			if (m_map.find(k) != m_map.end())
+			{
+				assign(iter, v);
+			}
+			else
+			{
+				insert(k, v);
+			}
+		}
+
+		/// contains
+		/// 
+		/// Returns true if key k exists in the cache
+		bool contains(const key_type& k) const
+		{
+			return m_map.find(k) != m_map.end();
+		}
+
+		/// at
+		///
+		/// Retrives the data for key k, not valid if k does not exist
+		eastl::optional<value_type> at(const key_type& k)
+		{
+			auto iter = m_map.find(k);
+
+			if (iter != m_map.end())
+			{
+				return iter->second.first;
+			}
+			else
+			{
+				return eastl::nullopt;
+			}
+		}
+
+		/// get
+		///
+		/// Retrives the data for key k.  If no data exists, it will be created by calling the
+		/// creator.
+		value_type& get(const key_type& k)
+		{
+			auto iter = m_map.find(k);
+
+			// The entry exists in the cache
+			if (iter != m_map.end())
+			{
+				touch(k);
+				return iter->second.first;
+			}
+			else // The entry doesn't exist in the cache, so create one
+			{
+				// Add the entry to the map
+				insert(k, m_create_callback ? m_create_callback(k) : value_type());
+
+				// return the new data
+				return m_map[k].first;
+			}
+		}
+
+		/// Equivalent to get(k)
+		value_type& operator[](const key_type& k) { return get(k); }
+
+		/// erase
+		///
+		/// erases key k from the cache.
+		/// If k does not exist, returns false.  If k exists, returns true.
+		bool erase(const key_type& k)
+		{
+			auto iter = m_map.find(k);
+
+			if (iter != m_map.end())
+			{
+				m_list.erase(iter->second.second);
+
+				// Delete the actual entry
+				map_erase(iter);
+
+				return true;
+			}
+
+			return false;
+		}
+
+		/// erase_oldest
+		///
+		/// Removes the oldest entry from the cache.
+		void erase_oldest()
+		{
+			auto key = m_list.back();
+			m_list.pop_back();
+
+			// Delete the actual entry
+			auto iter = m_map.find(key);
+			map_erase(iter);
+		}
+
+		/// touch
+		///
+		/// Touches key k, marking it as most recently used.
+		/// If k does not exist, returns false.  If the touch was successful, returns true.
+		bool touch(const key_type& k)
+		{
+			auto iter = m_map.find(k);
+
+			if (iter != m_map.end())
+			{
+				touch(iter);
+				return true;
+			}
+
+			return false;
+		}
+
+		/// touch
+		///
+		/// Touches key at iterator iter, moving it to most recently used position
+		void touch(iterator& iter)
+		{
+			auto listRef = iter->second.second;
+
+			m_list.erase(listRef);
+			m_list.push_front(iter->first);
+			iter->second.second = m_list.begin();
+		}
+
+		/// assign
+		///
+		/// Updates key k with data v.
+		/// If key k does not exist, returns false and no changes are made.
+		/// If key k exists, existing data has its deletor called and key k's data is replaced with new v data
+		bool assign(const key_type& k, const value_type& v)
+		{
+			auto iter = m_map.find(k);
+
+			if (iter != m_map.end())
+			{
+				assign(iter, v);
+				return true;
+			}
+
+			return false;
+		}
+
+		/// assign
+		///
+		/// Updates data at spot iter with data v.
+		void assign(iterator& iter, const value_type& v)
+		{
+			if (m_delete_callback)
+				m_delete_callback(iter->second.first);
+			touch(iter);
+			iter->second.first = v;
+		}
+
+		// standard container functions
+		iterator begin()               EA_NOEXCEPT { return m_map.begin(); }
+		iterator end()                 EA_NOEXCEPT { return m_map.end(); }
+		iterator rbegin()              EA_NOEXCEPT { return m_map.rbegin(); }
+		iterator rend()                EA_NOEXCEPT { return m_map.rend(); }
+		const_iterator begin() const   EA_NOEXCEPT { return m_map.begin(); }
+		const_iterator cbegin() const  EA_NOEXCEPT { return m_map.cbegin(); }
+		const_iterator crbegin() const EA_NOEXCEPT { return m_map.crbegin(); }
+		const_iterator end() const     EA_NOEXCEPT { return m_map.end(); }
+		const_iterator cend() const    EA_NOEXCEPT { return m_map.cend(); }
+		const_iterator crend() const   EA_NOEXCEPT { return m_map.crend(); }
+
+		bool empty() const             EA_NOEXCEPT { return m_map.empty(); }
+		size_type size() const         EA_NOEXCEPT { return m_map.size(); }
+		size_type capacity() const     EA_NOEXCEPT { return m_capacity; }
+
+		void clear() EA_NOEXCEPT
+		{
+			// Since we have a delete callback, we want to reuse the trim function by cheating the max
+			// size to clear all the entries to avoid duplicating code.
+			auto old_max = m_capacity;
+
+			m_capacity = 0;
+			trim();
+			m_capacity = old_max;
+		}
+
+		/// resize
+		///
+		/// Resizes the cache.  Can be used to either expand or contract the cache.
+		/// In the case of a contraction, the oldest entries will be evicted with their respective
+		/// deletors called before completing.
+		void resize(size_type newSize)	
+		{
+			m_capacity = newSize;
+			trim();
+		}
+		
+		void setCreateCallback(create_callback_type callback) { m_create_callback = callback; }
+		void setDeleteCallback(delete_callback_type callback) { m_delete_callback = callback; }
+
+		// EASTL extensions
+		const allocator_type& get_allocator() const EA_NOEXCEPT					{ return m_map.get_allocator(); }
+		allocator_type&       get_allocator() EA_NOEXCEPT						{ return m_map.get_allocator(); }
+		void                  set_allocator(const allocator_type& allocator)	{ m_map.set_allocator(allocator); m_list.set_allocator(allocator); }
+
+		/// Does not reset the callbacks
+		void reset_lose_memory() EA_NOEXCEPT									{ m_map.reset_lose_memory(); m_list.reset_lose_memory(); }
+
+	private:
+		inline void map_erase(map_iterator pos)
+		{
+			if (m_delete_callback)
+				m_delete_callback(pos->second.first);
+			m_map.erase(pos);
+		}
+		
+		bool trim()
+		{
+			if (size() <= m_capacity)
+			{
+				return false; // No trim necessary
+			}
+
+			// We need to trim
+			do
+			{
+				erase_oldest();
+			} while (m_list.size() > m_capacity);
+
+			return true;
+		}
+
+		void make_space()
+		{
+			if (size() == m_capacity)
+			{
+				erase_oldest();
+			}
+		}
+
+	private:
+		list_type				m_list;
+		map_type				m_map;
+		size_type				m_capacity;
+		create_callback_type	m_create_callback;
+		delete_callback_type	m_delete_callback;
+	};
+}
+
+
+
+#endif
diff --git a/libkram/eastl/include/EASTL/bonus/ring_buffer.h b/libkram/eastl/include/EASTL/bonus/ring_buffer.h
new file mode 100644
index 00000000..fcd8fd2c
--- /dev/null
+++ b/libkram/eastl/include/EASTL/bonus/ring_buffer.h
@@ -0,0 +1,1581 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// A ring buffer is a FIFO (first-in, first-out) container which acts
+// much like a queue. The difference is that a ring buffer is implemented
+// via chasing pointers around a given container instead of like queue
+// adds to the writes to the end of the container are reads from the begin.
+// The benefit of a ring buffer is that memory allocations don't occur
+// and new elements are neither added nor removed from the container.
+// Elements in the container are simply assigned values in circles around
+// the container.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_RING_BUFFER_H
+#define EASTL_RING_BUFFER_H
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/iterator.h>
+#include <EASTL/vector.h>
+#include <EASTL/initializer_list.h>
+#include <stddef.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+	/// EASTL_RING_BUFFER_DEFAULT_NAME
+	///
+	/// Defines a default container name in the absence of a user-provided name.
+	///
+	#ifndef EASTL_RING_BUFFER_DEFAULT_NAME
+		#define EASTL_RING_BUFFER_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " ring_buffer" // Unless the user overrides something, this is "EASTL ring_buffer".
+	#endif
+
+	/// EASTL_RING_BUFFER_DEFAULT_ALLOCATOR
+	///
+	#ifndef EASTL_RING_BUFFER_DEFAULT_ALLOCATOR
+		#define EASTL_RING_BUFFER_DEFAULT_ALLOCATOR allocator_type(EASTL_RING_BUFFER_DEFAULT_NAME)
+	#endif
+
+
+	/// ring_buffer_iterator
+	///
+	/// We force this iterator to act like a random access iterator even if 
+	/// the underlying container doesn't support random access iteration.
+	/// Any BidirectionalIterator can be a RandomAccessIterator; it just
+	/// might be inefficient in some cases.
+	///
+	template <typename T, typename Pointer, typename Reference, typename Container>
+	struct ring_buffer_iterator
+	{
+	public:
+		typedef ring_buffer_iterator<T, Pointer, Reference, Container>   this_type;
+		typedef T                                                        value_type;
+		typedef Pointer                                                  pointer;
+		typedef Reference                                                reference;
+		typedef typename Container::size_type                            size_type;
+		typedef typename Container::difference_type                      difference_type;
+		typedef typename Container::iterator                             container_iterator;
+		typedef typename Container::const_iterator                       container_const_iterator;
+		typedef ring_buffer_iterator<T, T*, T&, Container>               iterator;
+		typedef ring_buffer_iterator<T, const T*, const T&, Container>   const_iterator;
+		typedef EASTL_ITC_NS::random_access_iterator_tag                 iterator_category;
+
+	public:
+		Container*         mpContainer;
+		container_iterator mContainerIterator;
+
+	public:
+		ring_buffer_iterator();
+		ring_buffer_iterator(Container* pContainer, const container_iterator& containerIterator);
+		ring_buffer_iterator(const iterator& x);
+
+		ring_buffer_iterator& operator=(const iterator& x);
+
+		reference operator*() const;
+		pointer   operator->() const;
+
+		this_type& operator++();
+		this_type  operator++(int);
+
+		this_type& operator--();
+		this_type  operator--(int);
+
+		this_type& operator+=(difference_type n);
+		this_type& operator-=(difference_type n);
+
+		this_type operator+(difference_type n) const;
+		this_type operator-(difference_type n) const;
+
+	protected:
+		void increment(difference_type n, EASTL_ITC_NS::input_iterator_tag);
+		void increment(difference_type n, EASTL_ITC_NS::random_access_iterator_tag);
+
+	}; // struct ring_buffer_iterator
+
+
+
+	/// ring_buffer
+	///
+	/// Implements a ring buffer via a given container type, which would 
+	/// typically be a vector or array, though any container which supports 
+	/// bidirectional iteration would work.
+	///
+	/// A ring buffer is a FIFO (first-in, first-out) container which acts
+	/// much like a queue. The difference is that a ring buffer is implemented
+	/// via chasing pointers around a container and moving the read and write
+	/// positions forward (and possibly wrapping around) as the container is 
+	/// read and written via pop_front and push_back.
+	///
+	/// The benefit of a ring buffer is that memory allocations don't occur
+	/// and new elements are neither added nor removed from the container.
+	/// Elements in the container are simply assigned values in circles around
+	/// the container.
+	/// 
+	/// ring_buffer is different from other containers -- including adapter
+	/// containers -- in how iteration is done. Iteration of a ring buffer
+	/// starts at the current begin position, proceeds to the end of the underlying
+	/// container, and continues at the begin of the underlying container until
+	/// the ring buffer's current end position. Thus a ring_buffer does 
+	/// indeed have a begin and an end, though the values of begin and end 
+	/// chase each other around the container. An empty ring_buffer is one 
+	/// in which end == begin, and a full ring_buffer is one in which 
+	/// end + 1 == begin. 
+	///
+	/// Example of a ring buffer layout, where + indicates queued items:
+	///    ++++++++++--------------------------------+++++++++
+	///              ^                               ^
+	///              end                             begin
+	///
+	/// Empty ring buffer:
+	///    ---------------------------------------------------
+	///                              ^
+	///                          begin / end
+	///
+	/// Full ring buffer. Note that one item is necessarily unused; it is 
+	/// analagous to a '\0' at the end of a C string:
+	///    +++++++++++++++++++++++++++++++++++++++++-+++++++++
+	///                                             ^^
+	///                                           end begin
+	///
+	/// A push_back operation on a ring buffer assigns the new value to end.  
+	/// If there is no more space in the buffer, this will result in begin
+	/// being overwritten and the begin position being moved foward one position.
+	/// The user can use the full() function to detect this condition.
+	/// Note that elements in a ring buffer are not created or destroyed as 
+	/// their are added and removed; they are merely assigned. Only on 
+	/// container construction and destruction are any elements created and 
+	/// destroyed.
+	///
+	/// The ring buffer can be used in either direction. By this we mean that 
+	/// you can use push_back to add items and pop_front to remove them; or you can
+	/// use push_front to add items and pop_back to remove them. You aren't 
+	/// limited to these operations; you can push or pop from either side
+	/// arbitrarily and you can insert or erase anywhere in the container.
+	///
+	/// The ring buffer requires the user to specify a Container type, which 
+	/// by default is vector. However, any container with bidirectional iterators
+	/// will work, such as list, deque, string or any of the fixed_* versions
+	/// of these containers, such as fixed_string. Since ring buffer works via copying
+	/// elements instead of allocating and freeing nodes, inserting in the middle
+	/// of a ring buffer based on list (instead of vector) is no more efficient.
+	///
+	/// To use the ring buffer, its container must be resized to the desired
+	/// ring buffer size. Changing the size of a ring buffer may cause ring
+	/// buffer iterators to invalidate.
+	///
+	/// An alternative to using a ring buffer is to use a list with a user-created 
+	/// node pool and custom allocator. There are various tradeoffs that result from this.
+	///
+	/// Example usage:
+	///     ring_buffer< int, list<int> > rb(100);
+	///     rb.push_back(1);
+	/// 
+	/// Example usage:
+	///     // Example of creating an on-screen debug log that shows 16 
+	///     // strings at a time and scrolls older strings away.
+	///
+	///     // Create ring buffer of 16 strings.
+	///     ring_buffer< string, vector<string> > debugLogText(16);
+	///     
+	///     // Reserve 128 chars for each line. This can make it so that no 
+	///     // runtime memory allocations occur.
+	///     for(vector<string>::iterator it = debugLogText.get_container().begin(),
+	///         itEnd = debugLogText.get_container().end(); it != itEnd; ++it)
+	///     {
+	///         (*it).reserve(128);
+	///     }
+	///     
+	///     // Add a new string, using push_front() and front() instead of 
+	///     // push_front(str) in order to avoid creating a temporary str.
+	///     debugLogText.push_front();
+	///     debugLogText.front() = "Player fired weapon";
+	///
+	template <typename T, typename Container = eastl::vector<T>, typename Allocator = typename Container::allocator_type>
+	class ring_buffer
+	{
+	public:
+		typedef ring_buffer<T, Container, Allocator>                   this_type;
+		typedef Container                                              container_type;
+		typedef Allocator                                              allocator_type;
+
+		typedef typename Container::value_type                         value_type;
+		typedef typename Container::reference                          reference;
+		typedef typename Container::const_reference                    const_reference;
+		typedef typename Container::size_type                          size_type;
+		typedef typename Container::difference_type                    difference_type;
+		typedef typename Container::iterator                           container_iterator;
+		typedef typename Container::const_iterator                     container_const_iterator;
+		typedef ring_buffer_iterator<T, T*, T&, Container>             iterator;
+		typedef ring_buffer_iterator<T, const T*, const T&, Container> const_iterator;
+		typedef eastl::reverse_iterator<iterator>                      reverse_iterator;
+		typedef eastl::reverse_iterator<const_iterator>                const_reverse_iterator;    
+
+	public:                             // We declare public so that global comparison operators can be implemented without adding an inline level and without tripping up GCC 2.x friend declaration failures. GCC (through at least v4.0) is poor at inlining and performance wins over correctness.
+		Container          c;           // We follow the naming convention established for stack, queue, priority_queue and name this 'c'. This variable must always have a size of at least 1, as even an empty ring_buffer has an unused terminating element.
+
+	protected:
+		container_iterator mBegin;      // We keep track of where our begin and end are by using Container iterators. 
+		container_iterator mEnd;
+		size_type          mSize;
+
+	public:
+		// There currently isn't a ring_buffer constructor that specifies an initial size, unlike other containers.
+		explicit ring_buffer(size_type cap = 0);                                // Construct with an initial capacity (but size of 0).
+		explicit ring_buffer(size_type cap, const allocator_type& allocator); 
+		explicit ring_buffer(const Container& x);
+		explicit ring_buffer(const allocator_type& allocator);
+		ring_buffer(const this_type& x);
+		ring_buffer(this_type&& x);
+		ring_buffer(this_type&& x, const allocator_type& allocator);
+		ring_buffer(std::initializer_list<value_type> ilist, const allocator_type& allocator = EASTL_RING_BUFFER_DEFAULT_ALLOCATOR); // This function sets the capacity to be equal to the size of the initializer list.
+
+		// No destructor necessary. Default will do.
+
+		this_type& operator=(const this_type& x);
+		this_type& operator=(std::initializer_list<value_type> ilist);
+		this_type& operator=(this_type&& x);
+
+		template <typename InputIterator>
+		void assign(InputIterator first, InputIterator last);
+
+		void swap(this_type& x);
+
+		iterator               begin()          EA_NOEXCEPT;
+		const_iterator         begin()    const EA_NOEXCEPT;
+		const_iterator         cbegin()   const EA_NOEXCEPT;
+
+		iterator               end()            EA_NOEXCEPT;
+		const_iterator         end()      const EA_NOEXCEPT;
+		const_iterator         cend()     const EA_NOEXCEPT;
+
+		reverse_iterator       rbegin()         EA_NOEXCEPT;
+		const_reverse_iterator rbegin()   const EA_NOEXCEPT;
+		const_reverse_iterator crbegin()  const EA_NOEXCEPT;
+
+		reverse_iterator       rend()           EA_NOEXCEPT;
+		const_reverse_iterator rend()     const EA_NOEXCEPT;
+		const_reverse_iterator crend()    const EA_NOEXCEPT;
+
+		bool                   empty()    const EA_NOEXCEPT;
+		bool                   full()     const EA_NOEXCEPT;
+		size_type              size()     const EA_NOEXCEPT;
+		size_type              capacity() const EA_NOEXCEPT;
+
+		void                   resize(size_type n);
+		void                   set_capacity(size_type n); // Sets the capacity to the given value, including values less than the current capacity. Adjusts the size downward if n < size, by throwing out the oldest elements in the buffer.
+		void                   reserve(size_type n);      // Reserve a given capacity. Doesn't decrease the capacity; it only increases it (for compatibility with other containers' behavior).
+
+		reference       front();
+		const_reference front() const;
+
+		reference       back();              
+		const_reference back() const;
+
+		void            push_back(const value_type& value);
+		reference       push_back();
+
+		void            push_front(const value_type& value);
+		reference       push_front();
+
+		void            pop_back();
+		void            pop_front();
+
+		reference       operator[](size_type n);
+		const_reference operator[](size_type n) const;
+
+		// To consider:
+		// size_type read(value_type* pDestination, size_type nCount);
+		// size_type read(iterator** pPosition1, iterator** pPosition2, size_type& nCount1, size_type& nCount2);
+
+		/* To do:
+			template <class... Args>
+			void emplace_front(Args&&... args);
+
+			template <class... Args>
+			void emplace_back(Args&&... args);
+
+			template <class... Args>
+			iterator emplace(const_iterator position, Args&&... args);
+		*/
+
+		iterator insert(const_iterator position, const value_type& value);
+		void     insert(const_iterator position, size_type n, const value_type& value);
+		void     insert(const_iterator position, std::initializer_list<value_type> ilist);
+
+		template <typename InputIterator>
+		void insert(const_iterator position, InputIterator first, InputIterator last);
+
+		iterator         erase(const_iterator position);
+		iterator         erase(const_iterator first, const_iterator last);
+		reverse_iterator erase(const_reverse_iterator position);
+		reverse_iterator erase(const_reverse_iterator first, const_reverse_iterator last);
+
+		void clear();
+
+		container_type&       get_container();
+		const container_type& get_container() const;
+
+		bool validate() const;
+		int  validate_iterator(const_iterator i) const;
+
+	protected:
+		//size_type DoGetSize(EASTL_ITC_NS::input_iterator_tag) const;
+		//size_type DoGetSize(EASTL_ITC_NS::random_access_iterator_tag) const;
+
+	}; // class ring_buffer
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// ring_buffer_iterator
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T, typename Pointer, typename Reference, typename Container>
+	ring_buffer_iterator<T, Pointer, Reference, Container>::ring_buffer_iterator()
+		: mpContainer(NULL), mContainerIterator()
+	{
+	}
+
+
+	template <typename T, typename Pointer, typename Reference, typename Container>
+	ring_buffer_iterator<T, Pointer, Reference, Container>::ring_buffer_iterator(Container* pContainer, const container_iterator& containerIterator)
+		: mpContainer(pContainer), mContainerIterator(containerIterator)
+	{
+	}
+
+
+	template <typename T, typename Pointer, typename Reference, typename Container>
+	ring_buffer_iterator<T, Pointer, Reference, Container>::ring_buffer_iterator(const iterator& x)
+		: mpContainer(x.mpContainer), mContainerIterator(x.mContainerIterator)
+	{
+	}
+
+
+	template <typename T, typename Pointer, typename Reference, typename Container>
+	ring_buffer_iterator<T, Pointer, Reference, Container>& 
+	ring_buffer_iterator<T, Pointer, Reference, Container>::operator=(const iterator& x)
+	{
+		mpContainer        = x.mpContainer;
+		mContainerIterator = x.mContainerIterator;
+		return *this;
+	}
+
+	template <typename T, typename Pointer, typename Reference, typename Container>
+	typename ring_buffer_iterator<T, Pointer, Reference, Container>::reference
+	ring_buffer_iterator<T, Pointer, Reference, Container>::operator*() const
+	{
+		return *mContainerIterator;
+	}
+
+
+	template <typename T, typename Pointer, typename Reference, typename Container>
+	typename ring_buffer_iterator<T, Pointer, Reference, Container>::pointer
+	ring_buffer_iterator<T, Pointer, Reference, Container>::operator->() const
+	{
+		return &*mContainerIterator;
+	}
+
+
+	template <typename T, typename Pointer, typename Reference, typename Container>
+	typename ring_buffer_iterator<T, Pointer, Reference, Container>::this_type&
+	ring_buffer_iterator<T, Pointer, Reference, Container>::operator++()
+	{
+		if(EASTL_UNLIKELY(++mContainerIterator == mpContainer->end()))
+			mContainerIterator = mpContainer->begin();
+		return *this;
+	}
+
+
+	template <typename T, typename Pointer, typename Reference, typename Container>
+	typename ring_buffer_iterator<T, Pointer, Reference, Container>::this_type
+	ring_buffer_iterator<T, Pointer, Reference, Container>::operator++(int)
+	{
+		const this_type temp(*this);
+		if(EASTL_UNLIKELY(++mContainerIterator == mpContainer->end()))
+			mContainerIterator = mpContainer->begin();
+		return temp;
+	}
+
+
+	template <typename T, typename Pointer, typename Reference, typename Container>
+	typename ring_buffer_iterator<T, Pointer, Reference, Container>::this_type&
+	ring_buffer_iterator<T, Pointer, Reference, Container>::operator--()
+	{
+		if(EASTL_UNLIKELY(mContainerIterator == mpContainer->begin()))
+			mContainerIterator = mpContainer->end();
+		--mContainerIterator;
+		return *this;
+	}
+
+
+	template <typename T, typename Pointer, typename Reference, typename Container>
+	typename ring_buffer_iterator<T, Pointer, Reference, Container>::this_type
+	ring_buffer_iterator<T, Pointer, Reference, Container>::operator--(int)
+	{
+		const this_type temp(*this);
+		if(EASTL_UNLIKELY(mContainerIterator == mpContainer->begin()))
+			mContainerIterator = mpContainer->end();
+		--mContainerIterator;
+		return temp;
+	}
+
+
+	template <typename T, typename Pointer, typename Reference, typename Container>
+	typename ring_buffer_iterator<T, Pointer, Reference, Container>::this_type&
+	ring_buffer_iterator<T, Pointer, Reference, Container>::operator+=(difference_type n)
+	{
+		typedef typename eastl::iterator_traits<container_iterator>::iterator_category IC;
+		increment(n, IC());
+		return *this;
+	}
+
+
+	template <typename T, typename Pointer, typename Reference, typename Container>
+	typename ring_buffer_iterator<T, Pointer, Reference, Container>::this_type&
+	ring_buffer_iterator<T, Pointer, Reference, Container>::operator-=(difference_type n)
+	{
+		typedef typename eastl::iterator_traits<container_iterator>::iterator_category IC;
+		increment(-n, IC());
+		return *this;
+	}
+
+
+	template <typename T, typename Pointer, typename Reference, typename Container>
+	typename ring_buffer_iterator<T, Pointer, Reference, Container>::this_type
+	ring_buffer_iterator<T, Pointer, Reference, Container>::operator+(difference_type n) const
+	{
+		return this_type(*this).operator+=(n);
+	}
+
+
+	template <typename T, typename Pointer, typename Reference, typename Container>
+	typename ring_buffer_iterator<T, Pointer, Reference, Container>::this_type
+	ring_buffer_iterator<T, Pointer, Reference, Container>::operator-(difference_type n) const
+	{
+		return this_type(*this).operator+=(-n);
+	}
+
+
+	template <typename T, typename Pointer, typename Reference, typename Container>
+	void ring_buffer_iterator<T, Pointer, Reference, Container>::increment(difference_type n, EASTL_ITC_NS::input_iterator_tag)
+	{
+		// n cannot be negative, as input iterators don't support reverse iteration.
+		while(n-- > 0)
+			operator++();
+	}
+
+
+	template <typename T, typename Pointer, typename Reference, typename Container>
+	void ring_buffer_iterator<T, Pointer, Reference, Container>::increment(difference_type n, EASTL_ITC_NS::random_access_iterator_tag)
+	{
+		// We make the assumption here that the user is incrementing from a valid
+		// starting position to a valid ending position. Thus *this + n yields a 
+		// valid iterator, including if n happens to be a negative value.
+
+		if(n >= 0)
+		{
+			const difference_type d = mpContainer->end() - mContainerIterator;
+
+			if(n < d)
+				mContainerIterator += n;
+			else
+				mContainerIterator = mpContainer->begin() + (n - d);
+		}
+		else
+		{
+			// Recall that n and d here will be negative and so the logic here works as intended.
+			const difference_type d = mpContainer->begin() - mContainerIterator;
+
+			if(n >= d)
+				mContainerIterator += n;
+			else
+				mContainerIterator = mpContainer->end() + (n - d);
+		}
+	}
+
+
+	// Random access iterators must support operator + and operator -.
+	// You can only add an integer to an iterator, and you cannot add two iterators.
+	template <typename T, typename Pointer, typename Reference, typename Container>
+	inline ring_buffer_iterator<T, Pointer, Reference, Container>
+	operator+(ptrdiff_t n, const ring_buffer_iterator<T, Pointer, Reference, Container>& x)
+	{
+		return x + n; // Implement (n + x) in terms of (x + n).
+	}
+
+
+	// You can only add an integer to an iterator, but you can subtract two iterators.
+	template <typename T, typename PointerA, typename ReferenceA, typename PointerB, typename ReferenceB, typename Container>
+	inline typename ring_buffer_iterator<T, PointerA, ReferenceA, Container>::difference_type
+	operator-(const ring_buffer_iterator<T, PointerA, ReferenceA, Container>& a,
+			  const ring_buffer_iterator<T, PointerB, ReferenceB, Container>& b)
+	{
+		typedef typename ring_buffer_iterator<T, PointerA, ReferenceA, Container>::difference_type difference_type;
+
+		// To do: If container_iterator is a random access iterator, then do a simple calculation.
+		// Otherwise, we have little choice but to iterate from a to b and count as we go.
+		// See the ring_buffer::size function for an implementation of this.
+
+		// Iteration implementation:
+		difference_type d = 0;
+
+		for(ring_buffer_iterator<T, PointerA, ReferenceA, Container> temp(b); temp != a; ++temp)
+			++d;
+
+		return d;
+	}
+
+
+	// The C++ defect report #179 requires that we support comparisons between const and non-const iterators.
+	// Thus we provide additional template paremeters here to support this. The defect report does not
+	// require us to support comparisons between reverse_iterators and const_reverse_iterators.
+	template <typename T, typename PointerA, typename ReferenceA, typename ContainerA,
+						  typename PointerB, typename ReferenceB, typename ContainerB>
+	inline bool operator==(const ring_buffer_iterator<T, PointerA, ReferenceA, ContainerA>& a, 
+						   const ring_buffer_iterator<T, PointerB, ReferenceB, ContainerB>& b)
+	{
+		// Perhaps we should compare the container pointer as well.
+		// However, for valid iterators this shouldn't be necessary.
+		return a.mContainerIterator == b.mContainerIterator;
+	}
+
+
+	template <typename T, typename PointerA, typename ReferenceA, typename ContainerA,
+						  typename PointerB, typename ReferenceB, typename ContainerB>
+	inline bool operator!=(const ring_buffer_iterator<T, PointerA, ReferenceA, ContainerA>& a, 
+						   const ring_buffer_iterator<T, PointerB, ReferenceB, ContainerB>& b)
+	{
+		// Perhaps we should compare the container pointer as well.
+		// However, for valid iterators this shouldn't be necessary.
+		return !(a.mContainerIterator == b.mContainerIterator);
+	}
+
+
+	// We provide a version of operator!= for the case where the iterators are of the 
+	// same type. This helps prevent ambiguity errors in the presence of rel_ops.
+	template <typename T, typename Pointer, typename Reference, typename Container>
+	inline bool operator!=(const ring_buffer_iterator<T, Pointer, Reference, Container>& a, 
+						   const ring_buffer_iterator<T, Pointer, Reference, Container>& b)
+	{
+		return !(a.mContainerIterator == b.mContainerIterator);
+	}
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// ring_buffer
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T, typename Container, typename Allocator>
+	ring_buffer<T, Container, Allocator>::ring_buffer(size_type cap)
+		: c() // Default construction with default allocator for the container.
+	{
+		// To do: This code needs to be amended to deal with possible exceptions 
+		// that could occur during the resize call below.
+
+		// We add one because the element at mEnd is necessarily unused.
+		c.resize(cap + 1); // Possibly we could construct 'c' with size, but c may not have such a ctor, though we rely on it having a resize function. 
+		mBegin = c.begin();
+		mEnd   = mBegin;
+		mSize  = 0;
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	ring_buffer<T, Container, Allocator>::ring_buffer(size_type cap, const allocator_type& allocator)
+		: c(allocator)
+	{
+		// To do: This code needs to be amended to deal with possible exceptions 
+		// that could occur during the resize call below.
+
+		// We add one because the element at mEnd is necessarily unused.
+		c.resize(cap + 1); // Possibly we could construct 'c' with size, but c may not have such a ctor, though we rely on it having a resize function. 
+		mBegin = c.begin();
+		mEnd   = mBegin;
+		mSize  = 0;
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	ring_buffer<T, Container, Allocator>::ring_buffer(const Container& x)
+		: c(x) // This copies elements from x, but unless the user is doing some tricks, the only thing that matters is that c.size() == x.size().
+	{
+		// To do: This code needs to be amended to deal with possible exceptions 
+		// that could occur during the resize call below.
+		if(c.empty())
+			c.resize(1);
+		mBegin = c.begin();
+		mEnd   = mBegin;
+		mSize  = 0;
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	ring_buffer<T, Container, Allocator>::ring_buffer(const allocator_type& allocator)
+		: c(allocator)
+	{
+		// To do: This code needs to be amended to deal with possible exceptions 
+		// that could occur during the resize call below.
+
+		// We add one because the element at mEnd is necessarily unused.
+		c.resize(1); // Possibly we could construct 'c' with size, but c may not have such a ctor, though we rely on it having a resize function.
+		mBegin = c.begin();
+		mEnd   = mBegin;
+		mSize  = 0;
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	ring_buffer<T, Container, Allocator>::ring_buffer(const this_type& x)
+		: c(x.c)
+	{
+		mBegin = c.begin();
+		mEnd   = mBegin;
+		mSize  = x.mSize;
+
+		eastl::advance(mBegin, eastl::distance(const_cast<this_type&>(x).c.begin(), x.mBegin)); // We can do a simple distance algorithm here, as there will be no wraparound.
+		eastl::advance(mEnd,   eastl::distance(const_cast<this_type&>(x).c.begin(), x.mEnd));
+	}
+
+	template <typename T, typename Container, typename Allocator>
+	ring_buffer<T, Container, Allocator>::ring_buffer(this_type&& x)
+	: c() // Default construction with default allocator for the container.
+	{
+		c.resize(1); // Possibly we could construct 'c' with size, but c may not have such a ctor, though we rely on it having a resize function.
+		mBegin = c.begin();
+		mEnd   = mBegin;
+		mSize  = 0;
+
+		swap(x); // We are leaving x in an unusual state by swapping default-initialized members with it, as it won't be usable and can be only destructible.
+	}
+
+	template <typename T, typename Container, typename Allocator>
+	ring_buffer<T, Container, Allocator>::ring_buffer(this_type&& x, const allocator_type& allocator)
+		: c(allocator)
+	{
+		c.resize(1); // Possibly we could construct 'c' with size, but c may not have such a ctor, though we rely on it having a resize function.
+		mBegin = c.begin();
+		mEnd   = mBegin;
+		mSize  = 0;
+
+		if(c.get_allocator() == x.c.get_allocator())
+			swap(x); // We are leaving x in an unusual state by swapping default-initialized members with it, as it won't be usable and can be only destructible.
+		else
+			operator=(x);
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	ring_buffer<T, Container, Allocator>::ring_buffer(std::initializer_list<value_type> ilist, const allocator_type& allocator)
+		: c(allocator)
+	{
+		c.resize((eastl_size_t)ilist.size() + 1);
+		mBegin = c.begin();
+		mEnd   = mBegin;
+		mSize  = 0;
+
+		assign(ilist.begin(), ilist.end());
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	typename ring_buffer<T, Container, Allocator>::this_type&
+	ring_buffer<T, Container, Allocator>::operator=(const this_type& x)
+	{
+		if(&x != this)
+		{
+			c = x.c;
+
+			mBegin = c.begin();
+			mEnd   = mBegin;
+			mSize  = x.mSize;
+
+			eastl::advance(mBegin, eastl::distance(const_cast<this_type&>(x).c.begin(), x.mBegin)); // We can do a simple distance algorithm here, as there will be no wraparound.
+			eastl::advance(mEnd,   eastl::distance(const_cast<this_type&>(x).c.begin(), x.mEnd));
+		}
+
+		return *this;
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	typename ring_buffer<T, Container, Allocator>::this_type&
+	ring_buffer<T, Container, Allocator>::operator=(this_type&& x)
+	{
+		swap(x);
+		return *this;
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	typename ring_buffer<T, Container, Allocator>::this_type&
+	ring_buffer<T, Container, Allocator>::operator=(std::initializer_list<value_type> ilist)
+	{
+		assign(ilist.begin(), ilist.end());
+		return *this;
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	template <typename InputIterator>
+	void ring_buffer<T, Container, Allocator>::assign(InputIterator first, InputIterator last)
+	{
+		// To consider: We can make specializations of this for pointer-based 
+		// iterators to PODs and turn the action into a memcpy.
+		clear();
+
+		for(; first != last; ++first)
+			push_back(*first);
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	void ring_buffer<T, Container, Allocator>::swap(this_type& x)
+	{
+		if(&x != this)
+		{
+			const difference_type dBegin  = eastl::distance(c.begin(), mBegin); // We can do a simple distance algorithm here, as there will be no wraparound.
+			const difference_type dEnd    = eastl::distance(c.begin(), mEnd);
+
+			const difference_type dxBegin = eastl::distance(x.c.begin(), x.mBegin);
+			const difference_type dxEnd   = eastl::distance(x.c.begin(), x.mEnd);
+
+			eastl::swap(c, x.c);
+			eastl::swap(mSize, x.mSize);
+
+			mBegin = c.begin();
+			eastl::advance(mBegin, dxBegin); // We can do a simple advance algorithm here, as there will be no wraparound.
+
+			mEnd = c.begin();
+			eastl::advance(mEnd, dxEnd);
+
+			x.mBegin = x.c.begin();
+			eastl::advance(x.mBegin, dBegin);
+
+			x.mEnd = x.c.begin();
+			eastl::advance(x.mEnd, dEnd);
+		}
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	typename ring_buffer<T, Container, Allocator>::iterator
+	ring_buffer<T, Container, Allocator>::begin() EA_NOEXCEPT
+	{
+		return iterator(&c, mBegin);
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	typename ring_buffer<T, Container, Allocator>::const_iterator
+	ring_buffer<T, Container, Allocator>::begin() const EA_NOEXCEPT
+	{
+		return const_iterator(const_cast<Container*>(&c), mBegin); // We trust that the const_iterator will respect const-ness.
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	typename ring_buffer<T, Container, Allocator>::const_iterator
+	ring_buffer<T, Container, Allocator>::cbegin() const EA_NOEXCEPT
+	{
+		return const_iterator(const_cast<Container*>(&c), mBegin); // We trust that the const_iterator will respect const-ness.
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	typename ring_buffer<T, Container, Allocator>::iterator
+	ring_buffer<T, Container, Allocator>::end() EA_NOEXCEPT
+	{
+		return iterator(&c, mEnd);
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	typename ring_buffer<T, Container, Allocator>::const_iterator
+	ring_buffer<T, Container, Allocator>::end() const EA_NOEXCEPT
+	{
+		return const_iterator(const_cast<Container*>(&c), mEnd); // We trust that the const_iterator will respect const-ness.
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	typename ring_buffer<T, Container, Allocator>::const_iterator
+	ring_buffer<T, Container, Allocator>::cend() const EA_NOEXCEPT
+	{
+		return const_iterator(const_cast<Container*>(&c), mEnd); // We trust that the const_iterator will respect const-ness.
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	typename ring_buffer<T, Container, Allocator>::reverse_iterator 
+	ring_buffer<T, Container, Allocator>::rbegin() EA_NOEXCEPT
+	{
+		return reverse_iterator(iterator(&c, mEnd));
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	typename ring_buffer<T, Container, Allocator>::const_reverse_iterator
+	ring_buffer<T, Container, Allocator>::rbegin() const EA_NOEXCEPT
+	{
+		return const_reverse_iterator(const_iterator(const_cast<Container*>(&c), mEnd));
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	typename ring_buffer<T, Container, Allocator>::const_reverse_iterator
+	ring_buffer<T, Container, Allocator>::crbegin() const EA_NOEXCEPT
+	{
+		return const_reverse_iterator(const_iterator(const_cast<Container*>(&c), mEnd));
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	typename ring_buffer<T, Container, Allocator>::reverse_iterator
+	ring_buffer<T, Container, Allocator>::rend() EA_NOEXCEPT
+	{
+		return reverse_iterator(iterator(&c, mBegin));
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	typename ring_buffer<T, Container, Allocator>::const_reverse_iterator
+	ring_buffer<T, Container, Allocator>::rend() const EA_NOEXCEPT
+	{
+		return const_reverse_iterator(const_iterator(const_cast<Container*>(&c), mBegin));
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	typename ring_buffer<T, Container, Allocator>::const_reverse_iterator
+	ring_buffer<T, Container, Allocator>::crend() const EA_NOEXCEPT
+	{
+		return const_reverse_iterator(const_iterator(const_cast<Container*>(&c), mBegin));
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	bool ring_buffer<T, Container, Allocator>::empty() const EA_NOEXCEPT
+	{
+		return mBegin == mEnd;
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	bool ring_buffer<T, Container, Allocator>::full() const EA_NOEXCEPT
+	{
+		// Implementation that relies on c.size() being a fast operation:
+		// return mSize == (c.size() - 1); // (c.size() - 1) == capacity(); we are attempting to reduce function calls.
+
+		// Version that has constant speed guarantees, but is still pretty fast.
+		const_iterator afterEnd(end());
+		++afterEnd;
+		return afterEnd.mContainerIterator == mBegin;
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	typename ring_buffer<T, Container, Allocator>::size_type
+	ring_buffer<T, Container, Allocator>::size() const EA_NOEXCEPT
+	{
+		return mSize;
+
+		// Alternatives:
+		// return eastl::distance(begin(), end());
+		// return end() - begin(); // This is more direct than using distance().
+		//typedef typename eastl::iterator_traits<container_iterator>::iterator_category IC;
+		//return DoGetSize(IC()); // This is more direct than using iterator math.
+	}
+
+
+	/*
+	template <typename T, typename Container, typename Allocator>
+	typename ring_buffer<T, Container, Allocator>::size_type
+	ring_buffer<T, Container, Allocator>::DoGetSize(EASTL_ITC_NS::input_iterator_tag) const
+	{
+		// We could alternatively just use eastl::distance() here, but we happen to
+		// know that such code would boil down to what we have here, and we might
+		// as well remove function calls where possible. 
+		difference_type d = 0;
+
+		for(const_iterator temp(begin()), tempEnd(end()); temp != tempEnd; ++temp)
+			++d;
+
+		return (size_type)d;
+	}
+	*/
+
+	/*
+	template <typename T, typename Container, typename Allocator>
+	typename ring_buffer<T, Container, Allocator>::size_type
+	ring_buffer<T, Container, Allocator>::DoGetSize(EASTL_ITC_NS::random_access_iterator_tag) const
+	{
+		// A simpler but less efficient implementation fo this function would be:
+		//     return eastl::distance(mBegin, mEnd);
+		//
+		// The calculation of distance here takes advantage of the fact that random
+		// access iterators' distances can be calculated by simple pointer calculation.
+		// Thus the code below boils down to a few subtractions when using a vector,
+		// string, or array as the Container type. 
+		//
+		const difference_type dBegin = eastl::distance(const_cast<Container&>(c).begin(), mBegin); // const_cast here solves a little compiler 
+		const difference_type dEnd   = eastl::distance(const_cast<Container&>(c).begin(), mEnd);   // argument matching problem.
+
+		if(dEnd >= dBegin)
+			return dEnd - dBegin;
+
+		return c.size() - (dBegin - dEnd);
+	}
+	*/
+
+
+	namespace Internal 
+	{
+		///////////////////////////////////////////////////////////////
+		// has_overflow_allocator
+		//
+		// returns true_type when the specified container type is an 
+		// eastl::fixed_*  container and therefore has an overflow 
+		// allocator type.
+		//
+		template <typename T, typename = void>
+		struct has_overflow_allocator : false_type {};
+
+		template <typename T>
+		struct has_overflow_allocator<T, void_t<decltype(declval<T>().get_overflow_allocator())>> : true_type {};
+
+
+		///////////////////////////////////////////////////////////////
+		// GetFixedContainerCtorAllocator
+		//
+		// eastl::fixed_* containers are only constructible via their 
+		// overflow allocator type. This helper select the appropriate 
+		// allocator from the specified container.
+		//
+		template <typename Container, bool UseOverflowAllocator = has_overflow_allocator<Container>()()>
+		struct GetFixedContainerCtorAllocator
+		{
+			auto& operator()(Container& c) { return c.get_overflow_allocator(); }
+		};
+
+		template <typename Container>
+		struct GetFixedContainerCtorAllocator<Container, false>
+		{
+			auto& operator()(Container& c) { return c.get_allocator(); }
+		};
+	} // namespace Internal
+
+
+	///////////////////////////////////////////////////////////////
+	// ContainerTemporary
+	// 
+	// Helper type which prevents utilizing excessive stack space 
+	// when creating temporaries when swapping/copying the underlying 
+	// ring_buffer container type. 
+	//
+	template <typename Container, bool UseHeapTemporary = (sizeof(Container) >= EASTL_MAX_STACK_USAGE)>
+	struct ContainerTemporary
+	{
+		Container mContainer;
+
+		ContainerTemporary(Container& parentContainer)
+		    : mContainer(Internal::GetFixedContainerCtorAllocator<Container>{}(parentContainer))
+		{
+		}
+
+		Container& get() { return mContainer; }
+	};
+
+	template <typename Container>
+	struct ContainerTemporary<Container, true>
+	{
+		typename Container::allocator_type* mAllocator;
+		Container* mContainer;
+
+		ContainerTemporary(Container& parentContainer)
+		    : mAllocator(&parentContainer.get_allocator())
+		    , mContainer(new (mAllocator->allocate(sizeof(Container))) Container)
+		{
+		}
+
+		~ContainerTemporary()
+		{
+			mContainer->~Container();
+			mAllocator->deallocate(mContainer, sizeof(Container));
+		}
+
+		Container& get() { return *mContainer; }
+	};
+
+
+	template <typename T, typename Container, typename Allocator>
+	void ring_buffer<T, Container, Allocator>::resize(size_type n)
+	{
+		// Note that if n > size(), we just move the end position out to
+		// the begin + n, with the data being the old end and the new end
+		// being stale values from the past. This is by design, as the concept
+		// of arbitrarily resizing a ring buffer like this is currently deemed
+		// to be vague in what it intends to do. We can only assume that the
+		// user knows what he is doing and will deal with the stale values.
+		EASTL_ASSERT(c.size() >= 1);
+		const size_type cap = (c.size() - 1);
+
+		mSize = n;
+
+		if(n > cap) // If we need to grow in capacity...
+		{
+			// Given that a growing operation will always result in memory allocation, 
+			// we currently implement this function via the usage of a temp container.
+			// This makes for a simple implementation, but in some cases it is less
+			// efficient. In particular, if the container is a node-based container like
+			// a (linked) list, this function would be faster if we simply added nodes
+			// to ourself. We would do this by inserting the nodes to be after end()
+			// and adjusting the begin() position if it was after end().
+
+			// To do: This code needs to be amended to deal with possible exceptions 
+			// that could occur during the resize call below.
+
+			ContainerTemporary<Container> cTemp(c);
+			cTemp.get().resize(n + 1);
+			eastl::copy(begin(), end(), cTemp.get().begin());
+			eastl::swap(c, cTemp.get());
+
+			mBegin = c.begin();
+			mEnd   = mBegin;
+			eastl::advance(mEnd, n); // We can do a simple advance algorithm on this because we know that mEnd will not wrap around.
+		}
+		else // We could do a check here for n != size(), but that would be costly and people don't usually resize things to their same size.
+		{
+			mEnd = mBegin;
+
+			// eastl::advance(mEnd, n); // We *cannot* use this because there may be wraparound involved.
+
+			// To consider: Possibly we should implement some more detailed logic to optimize the code here.
+			// We'd need to do different behaviour dending on whether the container iterator type is a 
+			// random access iterator or otherwise.
+
+			while(n--)
+			{
+				if(EASTL_UNLIKELY(++mEnd == c.end()))
+					mEnd = c.begin();
+			}
+		}
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	typename ring_buffer<T, Container, Allocator>::size_type
+	ring_buffer<T, Container, Allocator>::capacity() const EA_NOEXCEPT
+	{
+		EASTL_ASSERT(c.size() >= 1); // This is required because even an empty ring_buffer has one unused termination element, somewhat like a \0 at the end of a C string.
+
+		return (c.size() - 1); // Need to subtract one because the position at mEnd is unused.
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	void ring_buffer<T, Container, Allocator>::set_capacity(size_type n)
+	{
+		const size_type capacity = (c.size() - 1);
+
+		if(n != capacity)    // If we need to change capacity...
+		{
+			ContainerTemporary<Container> cTemp(c);
+			cTemp.get().resize(n + 1);
+
+			iterator itCopyBegin = begin();
+
+			if(n < mSize) // If we are shrinking the capacity, to less than our size...
+			{
+				eastl::advance(itCopyBegin, mSize - n);
+				mSize = n;
+			}
+
+			eastl::copy(itCopyBegin, end(), cTemp.get().begin());  // The begin-end range may in fact be larger than n, in which case values will be overwritten.
+			eastl::swap(c, cTemp.get());
+
+			mBegin = c.begin();
+			mEnd   = mBegin;
+			eastl::advance(mEnd, mSize); // We can do a simple advance algorithm on this because we know that mEnd will not wrap around.
+		}
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	void ring_buffer<T, Container, Allocator>::reserve(size_type n)
+	{
+		// We follow the pattern of vector and only do something if n > capacity.
+		EASTL_ASSERT(c.size() >= 1);
+
+		if(n > (c.size() - 1))    // If we need to grow in capacity... // (c.size() - 1) == capacity(); we are attempting to reduce function calls.
+		{
+			ContainerTemporary<Container> cTemp(c);
+			cTemp.get().resize(n + 1);
+			eastl::copy(begin(), end(), cTemp.get().begin());
+			eastl::swap(c, cTemp.get());
+
+			mBegin = c.begin();
+			mEnd   = mBegin;
+			eastl::advance(mEnd, mSize); // We can do a simple advance algorithm on this because we know that mEnd will not wrap around.
+		}
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	typename ring_buffer<T, Container, Allocator>::reference
+	ring_buffer<T, Container, Allocator>::front()
+	{
+		return *mBegin;
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	typename ring_buffer<T, Container, Allocator>::const_reference
+	ring_buffer<T, Container, Allocator>::front() const
+	{
+		return *mBegin;
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	typename ring_buffer<T, Container, Allocator>::reference
+	ring_buffer<T, Container, Allocator>::back()
+	{
+		// return *(end() - 1); // Can't use this because not all iterators support operator-.
+
+		iterator temp(end());   // To do: Find a way to construct this temporary in the return statement.
+		return *(--temp);       // We can do it by making all our containers' iterators support operator-.
+	}
+
+		
+	template <typename T, typename Container, typename Allocator>
+	typename ring_buffer<T, Container, Allocator>::const_reference
+	ring_buffer<T, Container, Allocator>::back() const
+	{
+		// return *(end() - 1); // Can't use this because not all iterators support operator-.
+
+		const_iterator temp(end()); // To do: Find a way to construct this temporary in the return statement.
+		return *(--temp);           // We can do it by making all our containers' iterators support operator-.
+	}
+
+
+	/// A push_back operation on a ring buffer assigns the new value to end.  
+	/// If there is no more space in the buffer, this will result in begin
+	/// being overwritten and the begin position being moved foward one position.
+	template <typename T, typename Container, typename Allocator>
+	void ring_buffer<T, Container, Allocator>::push_back(const value_type& value)
+	{
+		*mEnd = value;
+
+		if(++mEnd == c.end())
+			mEnd = c.begin();
+
+		if(mEnd == mBegin)
+		{
+			if(++mBegin == c.end())
+				mBegin = c.begin();
+		}
+		else
+			++mSize;
+	}
+
+
+	/// A push_back operation on a ring buffer assigns the new value to end.  
+	/// If there is no more space in the buffer, this will result in begin
+	/// being overwritten and the begin position being moved foward one position.
+	template <typename T, typename Container, typename Allocator>
+	typename ring_buffer<T, Container, Allocator>::reference
+	ring_buffer<T, Container, Allocator>::push_back()
+	{
+		// We don't do the following assignment, as the value at mEnd is already constructed;
+		// it is merely possibly not default-constructed. However, the spirit of push_back
+		// is that the user intends to do an assignment or data modification after the 
+		// push_back call. The user can always execute *back() = value_type() if he wants.
+		//*mEnd = value_type();
+
+		if(++mEnd == c.end())
+			mEnd = c.begin();
+
+		if(mEnd == mBegin)
+		{
+			if(++mBegin == c.end())
+				mBegin = c.begin();
+		}
+		else
+			++mSize;
+
+		return back();
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	void ring_buffer<T, Container, Allocator>::pop_back()
+	{
+		EASTL_ASSERT(mEnd != mBegin); // We assume that size() > 0 and thus that there is something to pop.
+
+		if(EASTL_UNLIKELY(mEnd == c.begin()))
+			mEnd = c.end();
+		--mEnd;
+		--mSize;
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	void ring_buffer<T, Container, Allocator>::push_front(const value_type& value)
+	{
+		if(EASTL_UNLIKELY(mBegin == c.begin()))
+			mBegin = c.end();
+
+		if(--mBegin == mEnd)
+		{
+			if(EASTL_UNLIKELY(mEnd == c.begin()))
+				mEnd = c.end();
+			--mEnd;
+		}
+		else
+			++mSize;
+
+		*mBegin = value;
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	typename ring_buffer<T, Container, Allocator>::reference
+	ring_buffer<T, Container, Allocator>::push_front()
+	{
+		if(EASTL_UNLIKELY(mBegin == c.begin()))
+			mBegin = c.end();
+
+		if(--mBegin == mEnd)
+		{
+			if(EASTL_UNLIKELY(mEnd == c.begin()))
+				mEnd = c.end();
+			--mEnd;
+		}
+		else
+			++mSize;
+
+		// See comments above in push_back for why we don't execute this:
+		// *mBegin = value_type();
+
+		return *mBegin; // Same as return front();
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	void ring_buffer<T, Container, Allocator>::pop_front()
+	{
+		EASTL_ASSERT(mBegin != mEnd); // We assume that mEnd > mBegin and thus that there is something to pop.
+
+		if(++mBegin == c.end())
+			mBegin = c.begin();
+		--mSize;
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	typename ring_buffer<T, Container, Allocator>::reference
+	ring_buffer<T, Container, Allocator>::operator[](size_type n)
+	{
+		// return *(begin() + n); // Can't use this because not all iterators support operator+.
+
+		// This should compile to code that is nearly as efficient as that above.
+		// The primary difference is the possible generation of a temporary in this case.
+		iterator temp(begin());
+		eastl::advance(temp, n);
+		return *(temp.mContainerIterator);
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	typename ring_buffer<T, Container, Allocator>::const_reference
+	ring_buffer<T, Container, Allocator>::operator[](size_type n) const
+	{
+		// return *(begin() + n); // Can't use this because not all iterators support operator+.
+
+		// This should compile to code that is nearly as efficient as that above.
+		// The primary difference is the possible generation of a temporary in this case.
+		const_iterator temp(begin());
+		eastl::advance(temp, n);
+		return *(temp.mContainerIterator);
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	typename ring_buffer<T, Container, Allocator>::iterator
+	ring_buffer<T, Container, Allocator>::insert(const_iterator position, const value_type& value)
+	{
+		// To consider: It would be faster if we could tell that position was in the first
+		// half of the container and instead of moving things after the position back, 
+		// we could move things before the position forward. 
+
+		iterator afterEnd(end());
+		iterator beforeEnd(afterEnd);
+
+		++afterEnd;
+
+		if(afterEnd.mContainerIterator == mBegin) // If we are at full capacity...
+			--beforeEnd;
+		else
+			push_back();
+
+		iterator itPosition(position.mpContainer, position.mContainerIterator); // We merely copy from const_iterator to iterator.
+		eastl::copy_backward(itPosition, beforeEnd, end());
+		*itPosition = value;
+
+		return itPosition;
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	void ring_buffer<T, Container, Allocator>::insert(const_iterator position, size_type n, const value_type& value)
+	{
+		// To do: This can be improved with a smarter version. However, 
+		// this is a little tricky because we need to deal with the case 
+		// whereby n is greater than the size of the container itself. 
+		while(n--)
+			insert(position, value);
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	void ring_buffer<T, Container, Allocator>::insert(const_iterator position, std::initializer_list<value_type> ilist)
+	{
+		insert(position, ilist.begin(), ilist.end());
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	template <typename InputIterator>
+	void ring_buffer<T, Container, Allocator>::insert(const_iterator position, InputIterator first, InputIterator last)
+	{
+		// To do: This can possibly be improved with a smarter version.
+		// However, this can be tricky if distance(first, last) is greater
+		// than the size of the container itself.
+		for(; first != last; ++first, ++position)
+			insert(position, *first);
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	typename ring_buffer<T, Container, Allocator>::iterator
+	ring_buffer<T, Container, Allocator>::erase(const_iterator position)
+	{
+		iterator itPosition(position.mpContainer, position.mContainerIterator); // We merely copy from const_iterator to iterator.
+		iterator iNext(itPosition);
+
+		eastl::copy(++iNext, end(), itPosition);
+		pop_back();
+
+		return itPosition;
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	typename ring_buffer<T, Container, Allocator>::iterator
+	ring_buffer<T, Container, Allocator>::erase(const_iterator first, const_iterator last)
+	{
+		iterator itFirst(first.mpContainer, first.mContainerIterator); // We merely copy from const_iterator to iterator.
+		iterator itLast(last.mpContainer, last.mContainerIterator);
+
+		typename iterator::difference_type d = eastl::distance(itFirst, itLast);
+
+		eastl::copy(itLast, end(), itFirst);
+
+		while(d--)      // To do: improve this implementation.
+			pop_back();
+
+		return itFirst;
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	typename ring_buffer<T, Container, Allocator>::reverse_iterator
+	ring_buffer<T, Container, Allocator>::erase(const_reverse_iterator position)
+	{
+		return reverse_iterator(erase((++position).base()));
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	typename ring_buffer<T, Container, Allocator>::reverse_iterator
+	ring_buffer<T, Container, Allocator>::erase(const_reverse_iterator first, const_reverse_iterator last)
+	{
+		// Version which erases in order from first to last.
+		// difference_type i(first.base() - last.base());
+		// while(i--)
+		//     first = erase(first);
+		// return first;
+
+		// Version which erases in order from last to first, but is slightly more efficient:
+		return reverse_iterator(erase((++last).base(), (++first).base()));
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	void ring_buffer<T, Container, Allocator>::clear()
+	{
+		// Don't clear the container; we use its valid data for our elements.
+		mBegin = c.begin();
+		mEnd   = c.begin();
+		mSize  = 0;
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	typename ring_buffer<T, Container, Allocator>::container_type&
+	ring_buffer<T, Container, Allocator>::get_container()
+	{
+		return c;
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	const typename ring_buffer<T, Container, Allocator>::container_type&
+	ring_buffer<T, Container, Allocator>::get_container() const
+	{
+		return c;
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	inline bool ring_buffer<T, Container, Allocator>::validate() const
+	{
+		if(!c.validate())  // This requires that the container implement the validate function. That pretty much 
+			return false;  // means that the container is an EASTL container and not a std STL container. 
+
+		if(c.empty())      // c must always have a size of at least 1, as even an empty ring_buffer has an unused terminating element.
+			return false;
+
+		if(size() > capacity())
+			return false;
+
+		if((validate_iterator(begin()) & (isf_valid | isf_current)) != (isf_valid | isf_current))
+			return false;
+
+		if((validate_iterator(end()) & (isf_valid | isf_current)) != (isf_valid | isf_current))
+			return false;
+
+		// Verify that the size calculation is consistent.
+		size_type n = 0;
+		for(const_iterator i(begin()), iEnd(end()); i != iEnd; ++i)
+			++n;
+		if(n != mSize)
+			return false;
+
+		return true;
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	inline int ring_buffer<T, Container, Allocator>::validate_iterator(const_iterator i) const
+	{
+		// To do: Replace this with a more efficient implementation if possible.
+
+		for(const_iterator temp = begin(), tempEnd = end(); temp != tempEnd; ++temp)
+		{
+			if(temp == i)
+				return (isf_valid | isf_current | isf_can_dereference);
+		}
+
+		if(i == end())
+			return (isf_valid | isf_current); 
+
+		return isf_none;
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// global operators
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T, typename Container, typename Allocator>
+	inline bool operator==(const ring_buffer<T, Container, Allocator>& a, const ring_buffer<T, Container, Allocator>& b)
+	{
+		return (a.size() == b.size()) && (a.c == b.c);
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	inline bool operator<(const ring_buffer<T, Container, Allocator>& a, const ring_buffer<T, Container, Allocator>& b)
+	{
+		const typename ring_buffer<T, Container, Allocator>::size_type sizeA = a.size();
+		const typename ring_buffer<T, Container, Allocator>::size_type sizeB = b.size();
+
+		if(sizeA == sizeB)
+			return (a.c < b.c);
+		return sizeA < sizeB;
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	inline bool operator!=(const ring_buffer<T, Container, Allocator>& a, const ring_buffer<T, Container, Allocator>& b)
+	{
+		return !(a == b);
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	inline bool operator>(const ring_buffer<T, Container, Allocator>& a, const ring_buffer<T, Container, Allocator>& b)
+	{
+		return (b < a);
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	inline bool operator<=(const ring_buffer<T, Container, Allocator>& a, const ring_buffer<T, Container, Allocator>& b)
+	{
+		return !(b < a);
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	inline bool operator>=(const ring_buffer<T, Container, Allocator>& a, const ring_buffer<T, Container, Allocator>& b)
+	{
+		return !(a < b);
+	}
+
+
+	template <typename T, typename Container, typename Allocator>
+	inline void swap(ring_buffer<T, Container, Allocator>& a, ring_buffer<T, Container, Allocator>& b)
+	{
+		a.swap(b);
+	}
+
+
+} // namespace eastl
+
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/bonus/sort_extra.h b/libkram/eastl/include/EASTL/bonus/sort_extra.h
new file mode 100644
index 00000000..5f9a0c46
--- /dev/null
+++ b/libkram/eastl/include/EASTL/bonus/sort_extra.h
@@ -0,0 +1,204 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+//////////////////////////////////////////////////////////////////////////////
+// This file implements additional sort algorithms beyond the basic set.
+// Included here are:
+//    selection_sort        -- Unstable.
+//    shaker_sort           -- Stable.
+//    bucket_sort           -- Stable. 
+//
+//////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_SORT_EXTRA_H
+#define EASTL_SORT_EXTRA_H
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/iterator.h>
+#include <EASTL/algorithm.h>
+#include <EASTL/functional.h>
+#include <EASTL/heap.h>
+#include <EASTL/sort.h>             // For backwards compatibility due to sorts moved from here to sort.h.
+#include <EASTL/allocator.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+	/// selection_sort
+	///
+	/// Implements the SelectionSort algorithm.
+	///
+	template <typename ForwardIterator, typename StrictWeakOrdering>
+	void selection_sort(ForwardIterator first, ForwardIterator last, StrictWeakOrdering compare)
+	{
+		ForwardIterator iCurrent, iMin;
+
+		for(; first != last; ++first)
+		{
+			iCurrent = first;
+			iMin     = iCurrent;
+
+			for(++iCurrent; iCurrent != last; ++iCurrent)
+			{
+				if(compare(*iCurrent, *iMin))
+				{
+					EASTL_VALIDATE_COMPARE(!compare(*iMin, *iCurrent)); // Validate that the compare function is sane.
+					iMin = iCurrent;
+				}
+			}
+
+			if(first != iMin)
+				eastl::iter_swap(first, iMin);
+		}
+	} // selection_sort
+
+	template <typename ForwardIterator>
+	inline void selection_sort(ForwardIterator first, ForwardIterator last)
+	{
+		typedef eastl::less<typename eastl::iterator_traits<ForwardIterator>::value_type> Less;
+
+		eastl::selection_sort<ForwardIterator, Less>(first, last, Less());
+	}
+
+
+
+	/// shaker_sort
+	///
+	/// Implements the ShakerSort algorithm, which is a sorting algorithm which 
+	/// improves on bubble_sort by sweeping both from left to right and right 
+	/// to left, resulting in less iteration.
+	///
+	template <typename BidirectionalIterator, typename StrictWeakOrdering>
+	void shaker_sort(BidirectionalIterator first, BidirectionalIterator last, StrictWeakOrdering compare)
+	{
+		if(first != last)
+		{
+			BidirectionalIterator iCurrent, iNext, iLastModified;
+
+			--last;
+
+			while(first != last)
+			{
+				iLastModified = first;
+
+				for(iCurrent = first; iCurrent != last; iCurrent = iNext)
+				{
+					iNext = iCurrent;
+					++iNext;
+
+					if(compare(*iNext, *iCurrent))
+					{
+						EASTL_VALIDATE_COMPARE(!compare(*iCurrent, *iNext)); // Validate that the compare function is sane.
+						iLastModified = iCurrent;
+						eastl::iter_swap(iCurrent, iNext);
+					}
+				}
+
+				last = iLastModified;
+
+				if(first != last)
+				{
+					for(iCurrent = last; iCurrent != first; iCurrent = iNext)
+					{
+						iNext = iCurrent;
+						--iNext;
+
+						if(compare(*iCurrent, *iNext))
+						{
+							EASTL_VALIDATE_COMPARE(!compare(*iNext, *iCurrent)); // Validate that the compare function is sane.
+							iLastModified = iCurrent;
+							eastl::iter_swap(iNext, iCurrent);
+						}
+					}
+					first = iLastModified;
+				}
+			}
+		}
+	} // shaker_sort
+
+	template <typename BidirectionalIterator>
+	inline void shaker_sort(BidirectionalIterator first, BidirectionalIterator last)
+	{
+		typedef eastl::less<typename eastl::iterator_traits<BidirectionalIterator>::value_type> Less;
+
+		eastl::shaker_sort<BidirectionalIterator, Less>(first, last, Less());
+	}
+
+
+
+	/// bucket_sort
+	///
+	/// Implements the BucketSort algorithm. 
+	///
+	/// Example usage:
+	///  const size_t kElementRange = 32;
+	///  vector<int>  intArray(1000);
+	///  
+	///  for(int i = 0; i < 1000; i++)
+	///     intArray[i] = rand() % kElementRange;
+	///  
+	///  vector< vector<int> > bucketArray(kElementRange);
+	///  bucket_sort(intArray.begin(), intArray.end(), bucketArray, eastl::hash_use_self<int>());
+	///
+	template <typename T>
+	struct hash_use_self
+	{
+		T operator()(const T& x) const
+			{ return x; }
+	};
+
+	// Requires buckeyArray to be an array of arrays with a size equal to the range of values
+	// returned by the hash function. The hash function is required to return a unique value
+	// for each uniquely sorted element. Usually the way this is done is the elements are 
+	// integers of a limited range (e.g. 0-64) and the hash function returns the element value
+	// itself. If you had a case where all elements were always even numbers (e.g. 0-128), 
+	// you could use a custom hash function that returns (element value / 2).
+	//
+	// The user is required to provide an empty bucketArray to this function. This function returns
+	// with the bucketArray non-empty. This function doesn't clear the bucketArray because that takes
+	// time and the user might not need it to be cleared, at least at that time.
+	// 
+	template <typename ForwardIterator, typename ContainerArray, typename HashFunction>
+	void bucket_sort(ForwardIterator first, ForwardIterator last, ContainerArray& bucketArray, HashFunction hash /*= hash_use_self*/)
+	{
+		for(ForwardIterator iInput = first; iInput != last; ++iInput)
+			bucketArray[hash(*iInput)].push_back(*iInput);
+
+		for(typename ContainerArray::const_iterator iBucket = bucketArray.begin(); iBucket != bucketArray.end(); ++iBucket)
+			first = eastl::copy((*iBucket).begin(), (*iBucket).end(), first);
+	}
+
+
+
+} // namespace eastl
+
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/bonus/tuple_vector.h b/libkram/eastl/include/EASTL/bonus/tuple_vector.h
new file mode 100644
index 00000000..7123c57f
--- /dev/null
+++ b/libkram/eastl/include/EASTL/bonus/tuple_vector.h
@@ -0,0 +1,1592 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// tuple_vector is a data container that is designed to abstract and simplify
+// the handling of a "structure of arrays" layout of data in memory. In
+// particular, it mimics the interface of vector, including functionality to do
+// inserts, erases, push_backs, and random-access. It also provides a
+// RandomAccessIterator and corresponding functionality, making it compatible
+// with most STL (and STL-esque) algorithms such as ranged-for loops, find_if,
+// remove_if, or sort.
+
+// When used or applied properly, this container can improve performance of
+// some algorithms through cache-coherent data accesses or allowing for
+// sensible SIMD programming, while keeping the structure of a single
+// container, to permit a developer to continue to use existing algorithms in
+// STL and the like.
+//
+// Consult doc/Bonus/tuple_vector_readme.md for more information.
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_TUPLEVECTOR_H
+#define EASTL_TUPLEVECTOR_H
+
+#include <EASTL/bonus/compressed_pair.h>
+#include <EASTL/internal/config.h>
+#include <EASTL/iterator.h>
+#include <EASTL/memory.h>
+#include <EASTL/tuple.h>
+#include <EASTL/utility.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+EA_DISABLE_VC_WARNING(4244) // warning C4244: 'conversion from '___' to '___', possible loss of data
+EA_DISABLE_VC_WARNING(4623) // warning C4623: default constructor was implicitly defined as deleted
+EA_DISABLE_VC_WARNING(4625) // warning C4625: copy constructor was implicitly defined as deleted
+EA_DISABLE_VC_WARNING(4510) // warning C4510: default constructor could not be generated
+
+namespace eastl
+{
+	/// EASTL_TUPLE_VECTOR_DEFAULT_NAME
+	///
+	/// Defines a default container name in the absence of a user-provided name.
+	///
+	#ifndef EASTL_TUPLE_VECTOR_DEFAULT_NAME
+	#define EASTL_TUPLE_VECTOR_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " tuple-vector" // Unless the user overrides something, this is "EASTL tuple-vector".
+	#endif
+
+
+	/// EASTL_TUPLE_VECTOR_DEFAULT_ALLOCATOR
+	///
+	#ifndef EASTL_TUPLE_VECTOR_DEFAULT_ALLOCATOR
+	#define EASTL_TUPLE_VECTOR_DEFAULT_ALLOCATOR allocator_type(EASTL_TUPLE_VECTOR_DEFAULT_NAME)
+	#endif
+
+namespace TupleVecInternal
+{
+
+// forward declarations
+template <eastl_size_t I, typename... Ts>
+struct tuplevec_element;
+
+template <eastl_size_t I, typename... Ts>
+using tuplevec_element_t = typename tuplevec_element<I, Ts...>::type;
+
+template <typename... Ts>
+struct TupleTypes {};
+
+template <typename Allocator, typename Indices, typename... Ts>
+class TupleVecImpl;
+
+template <typename... Ts>
+struct TupleRecurser;
+
+template <eastl_size_t I, typename... Ts>
+struct TupleIndexRecurser;
+
+template <eastl_size_t I, typename T>
+struct TupleVecLeaf;
+
+template <typename Indices, typename... Ts>
+struct TupleVecIter;
+
+// tuplevec_element helper to be able to isolate a type given an index
+template <eastl_size_t I>
+struct tuplevec_element<I>
+{
+	static_assert(I != I, "tuplevec_element index out of range");
+};
+
+template <typename T, typename... Ts>
+struct tuplevec_element<0, T, Ts...>
+{
+	tuplevec_element() = delete; // tuplevec_element should only be used for compile-time assistance, and never be instantiated
+	typedef T type;
+};
+
+template <eastl_size_t I, typename T, typename... Ts>
+struct tuplevec_element<I, T, Ts...>
+{
+	typedef tuplevec_element_t<I - 1, Ts...> type;
+};
+
+// attempt to isolate index given a type
+template <typename T, typename TupleVector>
+struct tuplevec_index
+{
+};
+
+template <typename T>
+struct tuplevec_index<T, TupleTypes<>>
+{
+	typedef void DuplicateTypeCheck;
+	tuplevec_index() = delete; // tuplevec_index should only be used for compile-time assistance, and never be instantiated
+	static const eastl_size_t index = 0;
+};
+
+template <typename T, typename... TsRest>
+struct tuplevec_index<T, TupleTypes<T, TsRest...>>
+{
+	typedef int DuplicateTypeCheck;
+	static_assert(is_void<typename tuplevec_index<T, TupleTypes<TsRest...>>::DuplicateTypeCheck>::value, "duplicate type T in tuple_vector::get<T>(); unique types must be provided in declaration, or only use get<eastl_size_t>()");
+
+	static const eastl_size_t index = 0;
+};
+
+template <typename T, typename Ts, typename... TsRest>
+struct tuplevec_index<T, TupleTypes<Ts, TsRest...>>
+{
+	typedef typename tuplevec_index<T, TupleTypes<TsRest...>>::DuplicateTypeCheck DuplicateTypeCheck;
+	static const eastl_size_t index = tuplevec_index<T, TupleTypes<TsRest...>>::index + 1;
+};
+
+template <typename Allocator, typename T, typename Indices, typename... Ts>
+struct tuplevec_index<T, TupleVecImpl<Allocator, Indices, Ts...>> : public tuplevec_index<T, TupleTypes<Ts...>>
+{
+};
+
+
+// helper to calculate the layout of the allocations for the tuple of types (esp. to take alignment into account)
+template <>
+struct TupleRecurser<>
+{
+	typedef eastl_size_t size_type;
+
+	// This class should never be instantiated. This is just a helper for working with static functions when anonymous functions don't work
+	// and provide some other utilities
+	TupleRecurser() = delete;
+		
+	static EA_CONSTEXPR size_type GetTotalAlignment()
+	{
+		return 0;
+	}
+
+	static EA_CONSTEXPR size_type GetTotalAllocationSize(size_type capacity, size_type offset)
+	{
+		EA_UNUSED(capacity);
+		return offset;
+	}
+
+	template<typename Allocator, size_type I, typename Indices, typename... VecTypes>
+	static pair<void*, size_type> DoAllocate(TupleVecImpl<Allocator, Indices, VecTypes...> &vec, void** ppNewLeaf, size_type capacity, size_type offset)
+	{
+		EA_UNUSED(ppNewLeaf);
+
+		// If n is zero, then we allocate no memory and just return NULL. 
+		// This is fine, as our default ctor initializes with NULL pointers. 
+		size_type alignment = TupleRecurser<VecTypes...>::GetTotalAlignment();
+		void* ptr = capacity ? allocate_memory(vec.get_allocator(), offset, alignment, 0) : nullptr;
+
+	#if EASTL_ASSERT_ENABLED
+		if (EASTL_UNLIKELY((size_t)ptr & (alignment - 1)) != 0)
+		{
+			EASTL_FAIL_MSG("tuple_vector::DoAllocate -- memory not alignment at requested alignment");
+		}
+	#endif
+
+		return make_pair(ptr, offset);
+	}
+
+	template<typename TupleVecImplType, size_type I>
+	static void SetNewData(TupleVecImplType &vec, void* pData, size_type capacity, size_type offset) 
+	{ 
+		EA_UNUSED(vec);
+		EA_UNUSED(pData);
+		EA_UNUSED(capacity);
+		EA_UNUSED(offset);
+	}
+};
+
+template <typename T, typename... Ts>
+struct TupleRecurser<T, Ts...> : TupleRecurser<Ts...>
+{
+	typedef eastl_size_t size_type;
+	
+	static EA_CONSTEXPR size_type GetTotalAlignment()
+	{
+		return max(static_cast<size_type>(alignof(T)), TupleRecurser<Ts...>::GetTotalAlignment());
+	}
+
+	static EA_CONSTEXPR size_type GetTotalAllocationSize(size_type capacity, size_type offset)
+	{
+		return TupleRecurser<Ts...>::GetTotalAllocationSize(capacity, CalculateAllocationSize(offset, capacity));
+	}
+
+	template<typename Allocator, size_type I, typename Indices, typename... VecTypes>
+	static pair<void*, size_type> DoAllocate(TupleVecImpl<Allocator, Indices, VecTypes...> &vec, void** ppNewLeaf, size_type capacity, size_type offset)
+	{
+		size_type allocationOffset = CalculatAllocationOffset(offset);
+		size_type allocationSize = CalculateAllocationSize(offset, capacity);
+		pair<void*, size_type> allocation = TupleRecurser<Ts...>::template DoAllocate<Allocator, I + 1, Indices, VecTypes...>(
+			vec, ppNewLeaf, capacity, allocationSize);
+		ppNewLeaf[I] = (void*)((uintptr_t)(allocation.first) + allocationOffset);
+		return allocation;
+	}
+
+	template<typename TupleVecImplType, size_type I>
+	static void SetNewData(TupleVecImplType &vec, void* pData, size_type capacity, size_type offset)
+	{
+		size_type allocationOffset = CalculatAllocationOffset(offset);
+		size_type allocationSize = CalculateAllocationSize(offset, capacity);
+		vec.TupleVecLeaf<I, T>::mpData = (T*)((uintptr_t)pData + allocationOffset);
+		TupleRecurser<Ts...>::template SetNewData<TupleVecImplType, I + 1>(vec, pData, capacity, allocationSize);
+	}
+
+private:
+	static EA_CONSTEXPR size_type CalculateAllocationSize(size_type offset, size_type capacity)
+	{
+		return CalculatAllocationOffset(offset) + sizeof(T) * capacity;
+	}
+
+	static EA_CONSTEXPR size_type CalculatAllocationOffset(size_type offset) { return (offset + alignof(T) - 1) & (~alignof(T) + 1); }
+};
+
+template <eastl_size_t I, typename T>
+struct TupleVecLeaf
+{
+	typedef eastl_size_t size_type;
+
+	void DoUninitializedMoveAndDestruct(const size_type begin, const size_type end, T* pDest)
+	{
+		T* pBegin = mpData + begin;
+		T* pEnd = mpData + end;
+		eastl::uninitialized_move_ptr_if_noexcept(pBegin, pEnd, pDest);
+		eastl::destruct(pBegin, pEnd);
+	}
+
+	void DoInsertAndFill(size_type pos, size_type n, size_type numElements, const T& arg)
+	{
+		T* pDest = mpData + pos;
+		T* pDataEnd = mpData + numElements;
+		const T temp = arg;
+		const size_type nExtra = (numElements - pos);
+		if (n < nExtra) // If the inserted values are entirely within initialized memory (i.e. are before mpEnd)...
+		{
+			eastl::uninitialized_move_ptr(pDataEnd - n, pDataEnd, pDataEnd);
+			eastl::move_backward(pDest, pDataEnd - n, pDataEnd); // We need move_backward because of potential overlap issues.
+			eastl::fill(pDest, pDest + n, temp);
+		}
+		else
+		{
+			eastl::uninitialized_fill_n_ptr(pDataEnd, n - nExtra, temp);
+			eastl::uninitialized_move_ptr(pDest, pDataEnd, pDataEnd + n - nExtra);
+			eastl::fill(pDest, pDataEnd, temp);
+		}
+	}
+
+	void DoInsertRange(T* pSrcBegin, T* pSrcEnd, T* pDestBegin, size_type numDataElements)
+	{
+		size_type pos = pDestBegin - mpData;
+		size_type n = pSrcEnd - pSrcBegin;
+		T* pDataEnd = mpData + numDataElements;
+		const size_type nExtra = numDataElements - pos;
+		if (n < nExtra) // If the inserted values are entirely within initialized memory (i.e. are before mpEnd)...
+		{
+			eastl::uninitialized_move_ptr(pDataEnd - n, pDataEnd, pDataEnd);
+			eastl::move_backward(pDestBegin, pDataEnd - n, pDataEnd); // We need move_backward because of potential overlap issues.
+			eastl::copy(pSrcBegin, pSrcEnd, pDestBegin);
+		}
+		else
+		{
+			eastl::uninitialized_copy(pSrcEnd - (n - nExtra), pSrcEnd, pDataEnd);
+			eastl::uninitialized_move_ptr(pDestBegin, pDataEnd, pDataEnd + n - nExtra);
+			eastl::copy(pSrcBegin, pSrcEnd - (n - nExtra), pDestBegin);
+		}
+	}
+
+	void DoInsertValue(size_type pos, size_type numElements, T&& arg)
+	{
+		T* pDest = mpData + pos;
+		T* pDataEnd = mpData + numElements;
+
+		eastl::uninitialized_move_ptr(pDataEnd - 1, pDataEnd, pDataEnd);
+		eastl::move_backward(pDest, pDataEnd - 1, pDataEnd); // We need move_backward because of potential overlap issues.
+		eastl::destruct(pDest);
+		::new (pDest) T(eastl::forward<T>(arg));
+	}
+
+	T* mpData = nullptr;
+};
+
+// swallow allows for parameter pack expansion of arguments as means of expanding operations performed
+// if a void function is used for operation expansion, it should be wrapped in (..., 0) so that the compiler
+// thinks it has a parameter to pass into the function
+template <typename... Ts>
+void swallow(Ts&&...) { }
+
+inline bool variadicAnd(bool cond) { return cond; }
+
+inline bool variadicAnd(bool cond, bool conds...) { return cond && variadicAnd(conds); }
+
+// Helper struct to check for strict compatibility between two iterators, whilst still allowing for
+// conversion between TupleVecImpl<Ts...>::iterator and TupleVecImpl<Ts...>::const_iterator. 
+template <bool IsSameSize, typename From, typename To>
+struct TupleVecIterCompatibleImpl : public false_type { };
+	
+template<>
+struct TupleVecIterCompatibleImpl<true, TupleTypes<>, TupleTypes<>> : public true_type { };
+
+template <typename From, typename... FromRest, typename To, typename... ToRest>
+struct TupleVecIterCompatibleImpl<true, TupleTypes<From, FromRest...>, TupleTypes<To, ToRest...>> : public integral_constant<bool,
+		TupleVecIterCompatibleImpl<true, TupleTypes<FromRest...>, TupleTypes<ToRest...>>::value &&
+		is_same<typename remove_const<From>::type, typename remove_const<To>::type>::value >
+{ };
+
+template <typename From, typename To>
+struct TupleVecIterCompatible;
+
+template<typename... Us, typename... Ts>
+struct TupleVecIterCompatible<TupleTypes<Us...>, TupleTypes<Ts...>> :
+	public TupleVecIterCompatibleImpl<sizeof...(Us) == sizeof...(Ts), TupleTypes<Us...>, TupleTypes<Ts...>>
+{ };
+
+// The Iterator operates by storing a persistent index internally,
+// and resolving the tuple of pointers to the various parts of the original tupleVec when dereferenced.
+// While resolving the tuple is a non-zero operation, it consistently generated better code than the alternative of
+// storing - and harmoniously updating on each modification - a full tuple of pointers to the tupleVec's data
+template <eastl_size_t... Indices, typename... Ts>
+struct TupleVecIter<index_sequence<Indices...>, Ts...>
+	: public iterator<random_access_iterator_tag, tuple<Ts...>, eastl_size_t, tuple<Ts*...>, tuple<Ts&...>>
+{
+private:
+	typedef TupleVecIter<index_sequence<Indices...>, Ts...> this_type;
+	typedef eastl_size_t size_type;
+
+	typedef iterator<random_access_iterator_tag, tuple<Ts...>, eastl_size_t, tuple<Ts*...>, tuple<Ts&...>> iter_type;
+
+	template<typename U, typename... Us> 
+	friend struct TupleVecIter;
+
+	template<typename U, typename V, typename... Us>
+	friend class TupleVecImpl;
+
+	template<typename U>
+	friend class move_iterator;
+public:
+	typedef typename iter_type::iterator_category iterator_category;
+	typedef typename iter_type::value_type value_type;
+	typedef typename iter_type::difference_type difference_type;
+	typedef typename iter_type::pointer pointer;
+	typedef typename iter_type::reference reference;
+
+	TupleVecIter() = default;
+
+	template<typename VecImplType>
+	TupleVecIter(VecImplType* tupleVec, size_type index)
+		: mIndex(index)
+		, mpData{(void*)tupleVec->TupleVecLeaf<Indices, Ts>::mpData...}
+	{ }
+
+	template <typename OtherIndicesType, typename... Us,
+			  typename = typename enable_if<TupleVecIterCompatible<TupleTypes<Us...>, TupleTypes<Ts...>>::value, bool>::type>
+	TupleVecIter(const TupleVecIter<OtherIndicesType, Us...>& other)
+		: mIndex(other.mIndex)
+		, mpData{other.mpData[Indices]...}
+	{
+	}
+
+	bool operator==(const TupleVecIter& other) const { return mIndex == other.mIndex && mpData[0] == other.mpData[0]; }
+	bool operator!=(const TupleVecIter& other) const { return mIndex != other.mIndex || mpData[0] != other.mpData[0]; }
+	reference operator*() const { return MakeReference(); }
+
+	this_type& operator++() { ++mIndex; return *this; }
+	this_type operator++(int)
+	{
+		this_type temp = *this;
+		++mIndex;
+		return temp;
+	}
+
+	this_type& operator--() { --mIndex; return *this; }
+	this_type operator--(int)
+	{
+		this_type temp = *this;
+		--mIndex;
+		return temp;
+	}
+
+	this_type& operator+=(difference_type n) { mIndex += n; return *this; }
+	this_type operator+(difference_type n) const
+	{
+		this_type temp = *this;
+		return temp += n;
+	}
+	friend this_type operator+(difference_type n, const this_type& rhs)
+	{
+		this_type temp = rhs;
+		return temp += n;
+	}
+
+	this_type& operator-=(difference_type n) { mIndex -= n; return *this; }
+	this_type operator-(difference_type n) const
+	{
+		this_type temp = *this;
+		return temp -= n;
+	}
+	friend this_type operator-(difference_type n, const this_type& rhs)
+	{
+		this_type temp = rhs;
+		return temp -= n;
+	}
+
+	difference_type operator-(const this_type& rhs) const { return mIndex - rhs.mIndex; }
+	bool operator<(const this_type& rhs) const { return mIndex < rhs.mIndex; }
+	bool operator>(const this_type& rhs) const { return mIndex > rhs.mIndex; }
+	bool operator>=(const this_type& rhs) const { return mIndex >= rhs.mIndex; }
+	bool operator<=(const this_type& rhs) const { return mIndex <= rhs.mIndex; }
+
+	reference operator[](const size_type n) const
+	{
+		return *(*this + n);
+	}
+
+private:
+
+	value_type MakeValue() const
+	{
+		return value_type(((Ts*)mpData[Indices])[mIndex]...);
+	}
+
+	reference MakeReference() const
+	{
+		return reference(((Ts*)mpData[Indices])[mIndex]...);
+	}
+
+	pointer MakePointer() const
+	{
+		return pointer(&((Ts*)mpData[Indices])[mIndex]...);
+	}
+
+	size_type mIndex = 0;
+	const void* mpData[sizeof...(Ts)];
+};
+
+// TupleVecImpl
+template <typename Allocator, eastl_size_t... Indices, typename... Ts>
+class TupleVecImpl<Allocator, index_sequence<Indices...>, Ts...> : public TupleVecLeaf<Indices, Ts>...
+{
+	typedef Allocator	allocator_type;
+	typedef index_sequence<Indices...> index_sequence_type;
+	typedef TupleVecImpl<Allocator, index_sequence_type, Ts...> this_type;
+	typedef TupleVecImpl<Allocator, index_sequence_type, const Ts...> const_this_type;
+
+public:
+	typedef TupleVecInternal::TupleVecIter<index_sequence_type, Ts...> iterator;
+	typedef TupleVecInternal::TupleVecIter<index_sequence_type, const Ts...> const_iterator;
+	typedef eastl::reverse_iterator<iterator> reverse_iterator;
+	typedef eastl::reverse_iterator<const_iterator> const_reverse_iterator;
+	typedef eastl_size_t size_type;
+	typedef eastl::tuple<Ts...> value_tuple;
+	typedef eastl::tuple<Ts&...> reference_tuple;
+	typedef eastl::tuple<const Ts&...> const_reference_tuple;
+	typedef eastl::tuple<Ts*...> ptr_tuple;
+	typedef eastl::tuple<const Ts*...> const_ptr_tuple;
+	typedef eastl::tuple<Ts&&...> rvalue_tuple;
+
+	TupleVecImpl()
+		: mDataSizeAndAllocator(0, EASTL_TUPLE_VECTOR_DEFAULT_ALLOCATOR)
+	{}
+
+	TupleVecImpl(const allocator_type& allocator)
+		: mDataSizeAndAllocator(0, allocator)
+	{}
+
+	TupleVecImpl(this_type&& x)
+		: mDataSizeAndAllocator(0, eastl::move(x.get_allocator()))
+	{
+		swap(x);
+	}
+
+	TupleVecImpl(this_type&& x, const Allocator& allocator) 
+		: mDataSizeAndAllocator(0, allocator)
+	{
+		if (get_allocator() == x.get_allocator()) // If allocators are equivalent, then we can safely swap member-by-member
+		{
+			swap(x);
+		}
+		else
+		{
+			this_type temp(eastl::move(*this));
+			temp.swap(x);
+		}
+	}
+
+	TupleVecImpl(const this_type& x) 
+		: mDataSizeAndAllocator(0, x.get_allocator())
+	{
+		DoInitFromIterator(x.begin(), x.end());
+	}
+
+	template<typename OtherAllocator>
+	TupleVecImpl(const TupleVecImpl<OtherAllocator, index_sequence_type, Ts...>& x, const Allocator& allocator)  
+		: mDataSizeAndAllocator(0, allocator)
+	{
+		DoInitFromIterator(x.begin(), x.end());
+	}
+
+	template<typename MoveIterBase>
+	TupleVecImpl(move_iterator<MoveIterBase> begin, move_iterator<MoveIterBase> end, const allocator_type& allocator = EASTL_TUPLE_VECTOR_DEFAULT_ALLOCATOR)
+		: mDataSizeAndAllocator(0, allocator)
+	{
+		DoInitFromIterator(begin, end);
+	}
+
+	TupleVecImpl(const_iterator begin, const_iterator end, const allocator_type& allocator = EASTL_TUPLE_VECTOR_DEFAULT_ALLOCATOR)
+		: mDataSizeAndAllocator(0, allocator )
+	{
+		DoInitFromIterator(begin, end);
+	}
+
+	TupleVecImpl(size_type n, const allocator_type& allocator = EASTL_TUPLE_VECTOR_DEFAULT_ALLOCATOR)
+		: mDataSizeAndAllocator(0, allocator)
+	{
+		DoInitDefaultFill(n);
+	}
+
+	TupleVecImpl(size_type n, const Ts&... args) 
+		: mDataSizeAndAllocator(0, EASTL_TUPLE_VECTOR_DEFAULT_ALLOCATOR)
+	{
+		DoInitFillArgs(n, args...);
+	}
+
+	TupleVecImpl(size_type n, const Ts&... args, const allocator_type& allocator) 
+		: mDataSizeAndAllocator(0, allocator)
+	{
+		DoInitFillArgs(n, args...);
+	}
+
+	TupleVecImpl(size_type n, const_reference_tuple tup, const allocator_type& allocator = EASTL_TUPLE_VECTOR_DEFAULT_ALLOCATOR)
+		: mDataSizeAndAllocator(0, allocator)
+	{
+		DoInitFillTuple(n, tup);
+	}
+
+	TupleVecImpl(const value_tuple* first, const value_tuple* last, const allocator_type& allocator = EASTL_TUPLE_VECTOR_DEFAULT_ALLOCATOR)
+		: mDataSizeAndAllocator(0, allocator)
+	{
+		DoInitFromTupleArray(first, last);
+	}
+
+	TupleVecImpl(std::initializer_list<value_tuple> iList, const allocator_type& allocator = EASTL_TUPLE_VECTOR_DEFAULT_ALLOCATOR)
+		: mDataSizeAndAllocator(0, allocator)
+	{
+		DoInitFromTupleArray(iList.begin(), iList.end());
+	}
+
+protected:
+	// ctor to provide a pre-allocated field of data that the container will own, specifically for fixed_tuple_vector
+	TupleVecImpl(const allocator_type& allocator, void* pData, size_type capacity, size_type dataSize)
+		: mpData(pData), mNumCapacity(capacity), mDataSizeAndAllocator(dataSize, allocator)
+	{
+		TupleRecurser<Ts...>::template SetNewData<this_type, 0>(*this, mpData, mNumCapacity, 0);
+	}
+
+public:
+	~TupleVecImpl()
+	{ 
+		swallow((eastl::destruct(TupleVecLeaf<Indices, Ts>::mpData, TupleVecLeaf<Indices, Ts>::mpData + mNumElements), 0)...);
+		if (mpData)
+			EASTLFree(get_allocator(), mpData, internalDataSize()); 
+	}
+
+	void assign(size_type n, const Ts&... args)
+	{
+		if (n > mNumCapacity)
+		{
+			this_type temp(n, args..., get_allocator()); // We have little choice but to reallocate with new memory.
+			swap(temp);
+		}
+		else if (n > mNumElements) // If n > mNumElements ...
+		{
+			size_type oldNumElements = mNumElements;
+			swallow((eastl::fill(TupleVecLeaf<Indices, Ts>::mpData, TupleVecLeaf<Indices, Ts>::mpData + oldNumElements, args), 0)...);
+			swallow((eastl::uninitialized_fill_ptr(TupleVecLeaf<Indices, Ts>::mpData + oldNumElements,
+					                       TupleVecLeaf<Indices, Ts>::mpData + n, args), 0)...);
+			mNumElements = n;
+		}
+		else // else 0 <= n <= mNumElements
+		{
+			swallow((eastl::fill(TupleVecLeaf<Indices, Ts>::mpData, TupleVecLeaf<Indices, Ts>::mpData + n, args), 0)...);
+			erase(begin() + n, end());
+		}
+	}
+
+	void assign(const_iterator first, const_iterator last)
+	{
+#if EASTL_ASSERT_ENABLED
+		if (EASTL_UNLIKELY(!validate_iterator_pair(first, last)))
+			EASTL_FAIL_MSG("tuple_vector::assign -- invalid iterator pair");
+#endif
+		size_type newNumElements = last - first;
+		if (newNumElements > mNumCapacity)
+		{
+			this_type temp(first, last, get_allocator());
+			swap(temp);
+		}
+		else
+		{
+			const void* ppOtherData[sizeof...(Ts)] = {first.mpData[Indices]...};
+			size_type firstIdx = first.mIndex;
+			size_type lastIdx = last.mIndex;
+			if (newNumElements > mNumElements) // If n > mNumElements ...
+			{
+				size_type oldNumElements = mNumElements;
+				swallow((eastl::copy((Ts*)(ppOtherData[Indices]) + firstIdx,
+						       (Ts*)(ppOtherData[Indices]) + firstIdx + oldNumElements,
+						       TupleVecLeaf<Indices, Ts>::mpData), 0)...);
+				swallow((eastl::uninitialized_copy_ptr((Ts*)(ppOtherData[Indices]) + firstIdx + oldNumElements,
+						                       (Ts*)(ppOtherData[Indices]) + lastIdx,
+						                       TupleVecLeaf<Indices, Ts>::mpData + oldNumElements), 0)...);
+				mNumElements = newNumElements;
+			}
+			else // else 0 <= n <= mNumElements
+			{
+				swallow((eastl::copy((Ts*)(ppOtherData[Indices]) + firstIdx, (Ts*)(ppOtherData[Indices]) + lastIdx,
+						       TupleVecLeaf<Indices, Ts>::mpData), 0)...);
+				erase(begin() + newNumElements, end());
+			}
+		}
+	}
+
+	void assign(const value_tuple* first, const value_tuple* last)
+	{
+#if EASTL_ASSERT_ENABLED
+		if (EASTL_UNLIKELY(first > last || first == nullptr || last == nullptr))
+			EASTL_FAIL_MSG("tuple_vector::assign from tuple array -- invalid ptrs");
+#endif
+		size_type newNumElements = last - first;
+		if (newNumElements > mNumCapacity)
+		{
+			this_type temp(first, last, get_allocator());
+			swap(temp);
+		}
+		else
+		{
+			if (newNumElements > mNumElements) // If n > mNumElements ...
+			{
+				size_type oldNumElements = mNumElements;
+				
+				DoCopyFromTupleArray(begin(), begin() + oldNumElements, first);
+				DoUninitializedCopyFromTupleArray(begin() + oldNumElements, begin() + newNumElements, first + oldNumElements);
+				mNumElements = newNumElements;
+			}
+			else // else 0 <= n <= mNumElements
+			{
+				DoCopyFromTupleArray(begin(), begin() + newNumElements, first);
+				erase(begin() + newNumElements, end());
+			}
+		}
+	}
+
+	reference_tuple push_back()
+	{
+		size_type oldNumElements = mNumElements;
+		size_type newNumElements = oldNumElements + 1;
+		size_type oldNumCapacity = mNumCapacity;
+		mNumElements = newNumElements;
+		DoGrow(oldNumElements, oldNumCapacity, newNumElements);
+		swallow(::new(TupleVecLeaf<Indices, Ts>::mpData + oldNumElements) Ts()...);
+		return back();
+	}
+
+	void push_back(const Ts&... args)
+	{
+		size_type oldNumElements = mNumElements;
+		size_type newNumElements = oldNumElements + 1;
+		size_type oldNumCapacity = mNumCapacity;
+		mNumElements = newNumElements;
+		DoGrow(oldNumElements, oldNumCapacity, newNumElements);
+		swallow(::new(TupleVecLeaf<Indices, Ts>::mpData + oldNumElements) Ts(args)...);
+	}
+
+	void push_back_uninitialized()
+	{
+		size_type oldNumElements = mNumElements;
+		size_type newNumElements = oldNumElements + 1;
+		size_type oldNumCapacity = mNumCapacity;
+		mNumElements = newNumElements;
+		DoGrow(oldNumElements, oldNumCapacity, newNumElements);
+	}
+	
+	reference_tuple emplace_back(Ts&&... args)
+	{
+		size_type oldNumElements = mNumElements;
+		size_type newNumElements = oldNumElements + 1;
+		size_type oldNumCapacity = mNumCapacity;
+		mNumElements = newNumElements;
+		DoGrow(oldNumElements, oldNumCapacity, newNumElements);
+		swallow(::new(TupleVecLeaf<Indices, Ts>::mpData + oldNumElements) Ts(eastl::forward<Ts>(args))...);
+		return back();
+	}
+
+	iterator emplace(const_iterator pos, Ts&&... args)
+	{
+#if EASTL_ASSERT_ENABLED
+		if (EASTL_UNLIKELY(validate_iterator(pos) == isf_none))
+			EASTL_FAIL_MSG("tuple_vector::emplace -- invalid iterator");
+#endif
+		size_type firstIdx = pos - cbegin();
+		size_type oldNumElements = mNumElements;
+		size_type newNumElements = mNumElements + 1;
+		size_type oldNumCapacity = mNumCapacity;
+		mNumElements = newNumElements;
+		if (newNumElements > oldNumCapacity || firstIdx != oldNumElements)
+		{
+			if (newNumElements > oldNumCapacity)
+			{
+				const size_type newCapacity = eastl::max(GetNewCapacity(oldNumCapacity), newNumElements);
+
+				void* ppNewLeaf[sizeof...(Ts)];
+				pair<void*, size_type> allocation =	TupleRecurser<Ts...>::template DoAllocate<allocator_type, 0, index_sequence_type, Ts...>(
+					*this, ppNewLeaf, newCapacity, 0);
+
+				swallow((TupleVecLeaf<Indices, Ts>::DoUninitializedMoveAndDestruct(
+					0, firstIdx, (Ts*)ppNewLeaf[Indices]), 0)...);
+				swallow((TupleVecLeaf<Indices, Ts>::DoUninitializedMoveAndDestruct(
+					firstIdx, oldNumElements, (Ts*)ppNewLeaf[Indices] + firstIdx + 1), 0)...);
+				swallow(::new ((Ts*)ppNewLeaf[Indices] + firstIdx) Ts(eastl::forward<Ts>(args))...);
+				swallow(TupleVecLeaf<Indices, Ts>::mpData = (Ts*)ppNewLeaf[Indices]...);
+
+				EASTLFree(get_allocator(), mpData, internalDataSize());
+				mpData = allocation.first;
+				mNumCapacity = newCapacity;
+				internalDataSize() = allocation.second;
+			}
+			else
+			{
+				swallow((TupleVecLeaf<Indices, Ts>::DoInsertValue(firstIdx, oldNumElements, eastl::forward<Ts>(args)), 0)...);
+			}
+		}
+		else
+		{
+			swallow(::new (TupleVecLeaf<Indices, Ts>::mpData + oldNumElements) Ts(eastl::forward<Ts>(args))...);
+		}
+		return begin() + firstIdx;
+	}
+
+	iterator insert(const_iterator pos, size_type n, const Ts&... args)
+	{
+#if EASTL_ASSERT_ENABLED
+		if (EASTL_UNLIKELY(validate_iterator(pos) == isf_none))
+			EASTL_FAIL_MSG("tuple_vector::insert -- invalid iterator");
+#endif
+		size_type firstIdx = pos - cbegin();
+		size_type lastIdx = firstIdx + n;
+		size_type oldNumElements = mNumElements;
+		size_type newNumElements = mNumElements + n;
+		size_type oldNumCapacity = mNumCapacity;
+		mNumElements = newNumElements;
+		if (newNumElements > oldNumCapacity || firstIdx != oldNumElements)
+		{
+			if (newNumElements > oldNumCapacity)
+			{
+				const size_type newCapacity = eastl::max(GetNewCapacity(oldNumCapacity), newNumElements);
+
+				void* ppNewLeaf[sizeof...(Ts)];
+				pair<void*, size_type> allocation = TupleRecurser<Ts...>::template DoAllocate<allocator_type, 0, index_sequence_type, Ts...>(
+						*this, ppNewLeaf, newCapacity, 0);
+
+				swallow((TupleVecLeaf<Indices, Ts>::DoUninitializedMoveAndDestruct(
+					0, firstIdx, (Ts*)ppNewLeaf[Indices]), 0)...);
+				swallow((TupleVecLeaf<Indices, Ts>::DoUninitializedMoveAndDestruct(
+					firstIdx, oldNumElements, (Ts*)ppNewLeaf[Indices] + lastIdx), 0)...);
+				swallow((eastl::uninitialized_fill_ptr((Ts*)ppNewLeaf[Indices] + firstIdx, (Ts*)ppNewLeaf[Indices] + lastIdx, args), 0)...);
+				swallow(TupleVecLeaf<Indices, Ts>::mpData = (Ts*)ppNewLeaf[Indices]...);
+		
+				EASTLFree(get_allocator(), mpData, internalDataSize());
+				mpData = allocation.first;
+				mNumCapacity = newCapacity;
+				internalDataSize() = allocation.second;
+			}
+			else
+			{
+				swallow((TupleVecLeaf<Indices, Ts>::DoInsertAndFill(firstIdx, n, oldNumElements, args), 0)...);
+			}
+		}
+		else
+		{
+			swallow((eastl::uninitialized_fill_ptr(TupleVecLeaf<Indices, Ts>::mpData + oldNumElements,
+					                       TupleVecLeaf<Indices, Ts>::mpData + newNumElements, args), 0)...);
+		}
+		return begin() + firstIdx;
+	}
+
+	iterator insert(const_iterator pos, const_iterator first, const_iterator last)
+	{
+#if EASTL_ASSERT_ENABLED
+		if (EASTL_UNLIKELY(validate_iterator(pos) == isf_none))
+			EASTL_FAIL_MSG("tuple_vector::insert -- invalid iterator");
+		if (EASTL_UNLIKELY(!validate_iterator_pair(first, last)))
+			EASTL_FAIL_MSG("tuple_vector::insert -- invalid iterator pair");
+#endif
+		size_type posIdx = pos - cbegin();
+		size_type firstIdx = first.mIndex;
+		size_type lastIdx = last.mIndex;
+		size_type numToInsert = last - first;
+		size_type oldNumElements = mNumElements;
+		size_type newNumElements = oldNumElements + numToInsert;
+		size_type oldNumCapacity = mNumCapacity;
+		mNumElements = newNumElements;
+		const void* ppOtherData[sizeof...(Ts)] = {first.mpData[Indices]...};
+		if (newNumElements > oldNumCapacity || posIdx != oldNumElements)
+		{
+			if (newNumElements > oldNumCapacity)
+			{
+				const size_type newCapacity = eastl::max(GetNewCapacity(oldNumCapacity), newNumElements);
+
+				void* ppNewLeaf[sizeof...(Ts)];
+				pair<void*, size_type> allocation = TupleRecurser<Ts...>::template DoAllocate<allocator_type, 0, index_sequence_type, Ts...>(
+						*this, ppNewLeaf, newCapacity, 0);
+
+				swallow((TupleVecLeaf<Indices, Ts>::DoUninitializedMoveAndDestruct(
+					0, posIdx, (Ts*)ppNewLeaf[Indices]), 0)...);
+				swallow((TupleVecLeaf<Indices, Ts>::DoUninitializedMoveAndDestruct(
+					posIdx, oldNumElements, (Ts*)ppNewLeaf[Indices] + posIdx + numToInsert), 0)...);
+				swallow((eastl::uninitialized_copy_ptr((Ts*)(ppOtherData[Indices]) + firstIdx,
+						                       (Ts*)(ppOtherData[Indices]) + lastIdx,
+						                       (Ts*)ppNewLeaf[Indices] + posIdx), 0)...);
+				swallow(TupleVecLeaf<Indices, Ts>::mpData = (Ts*)ppNewLeaf[Indices]...);
+				
+				EASTLFree(get_allocator(), mpData, internalDataSize());
+				mpData = allocation.first;
+				mNumCapacity = newCapacity;
+				internalDataSize() = allocation.second;
+			}
+			else
+			{
+				swallow((TupleVecLeaf<Indices, Ts>::DoInsertRange(
+					(Ts*)(ppOtherData[Indices]) + firstIdx, (Ts*)(ppOtherData[Indices]) + lastIdx,
+					TupleVecLeaf<Indices, Ts>::mpData + posIdx, oldNumElements), 0)...);
+			}
+		}
+		else
+		{
+			swallow((eastl::uninitialized_copy_ptr((Ts*)(ppOtherData[Indices]) + firstIdx,
+					                       (Ts*)(ppOtherData[Indices]) + lastIdx,
+					                       TupleVecLeaf<Indices, Ts>::mpData + posIdx), 0)...);
+		}
+		return begin() + posIdx;
+	}
+
+	iterator insert(const_iterator pos, const value_tuple* first, const value_tuple* last)
+	{
+#if EASTL_ASSERT_ENABLED
+		if (EASTL_UNLIKELY(validate_iterator(pos) == isf_none))
+			EASTL_FAIL_MSG("tuple_vector::insert -- invalid iterator");
+		if (EASTL_UNLIKELY(first > last || first == nullptr || last == nullptr))
+			EASTL_FAIL_MSG("tuple_vector::insert -- invalid source pointers");
+#endif
+		size_type posIdx = pos - cbegin();
+		size_type numToInsert = last - first;
+		size_type oldNumElements = mNumElements;
+		size_type newNumElements = oldNumElements + numToInsert;
+		size_type oldNumCapacity = mNumCapacity;
+		mNumElements = newNumElements;
+		if (newNumElements > oldNumCapacity || posIdx != oldNumElements)
+		{
+			if (newNumElements > oldNumCapacity)
+			{
+				const size_type newCapacity = eastl::max(GetNewCapacity(oldNumCapacity), newNumElements);
+
+				void* ppNewLeaf[sizeof...(Ts)];
+				pair<void*, size_type> allocation = TupleRecurser<Ts...>::template DoAllocate<allocator_type, 0, index_sequence_type, Ts...>(
+					*this, ppNewLeaf, newCapacity, 0);
+
+				swallow((TupleVecLeaf<Indices, Ts>::DoUninitializedMoveAndDestruct(
+					0, posIdx, (Ts*)ppNewLeaf[Indices]), 0)...);
+				swallow((TupleVecLeaf<Indices, Ts>::DoUninitializedMoveAndDestruct(
+					posIdx, oldNumElements, (Ts*)ppNewLeaf[Indices] + posIdx + numToInsert), 0)...);
+				
+				swallow(TupleVecLeaf<Indices, Ts>::mpData = (Ts*)ppNewLeaf[Indices]...);
+
+				// Do this after mpData is updated so that we can use new iterators
+				DoUninitializedCopyFromTupleArray(begin() + posIdx, begin() + posIdx + numToInsert, first);
+
+				EASTLFree(get_allocator(), mpData, internalDataSize());
+				mpData = allocation.first;
+				mNumCapacity = newCapacity;
+				internalDataSize() = allocation.second;
+			}
+			else
+			{
+				const size_type nExtra = oldNumElements - posIdx;
+				void* ppDataEnd[sizeof...(Ts)] = { (void*)(TupleVecLeaf<Indices, Ts>::mpData + oldNumElements)... };
+				void* ppDataBegin[sizeof...(Ts)] = { (void*)(TupleVecLeaf<Indices, Ts>::mpData + posIdx)... };
+				if (numToInsert < nExtra) // If the inserted values are entirely within initialized memory (i.e. are before mpEnd)...
+				{
+					swallow((eastl::uninitialized_move_ptr((Ts*)ppDataEnd[Indices] - numToInsert,
+						(Ts*)ppDataEnd[Indices], (Ts*)ppDataEnd[Indices]), 0)...);
+					// We need move_backward because of potential overlap issues.
+					swallow((eastl::move_backward((Ts*)ppDataBegin[Indices],
+						(Ts*)ppDataEnd[Indices] - numToInsert, (Ts*)ppDataEnd[Indices]), 0)...); 
+					
+					DoCopyFromTupleArray(pos, pos + numToInsert, first);
+				}
+				else
+				{
+					size_type numToInitialize = numToInsert - nExtra;
+					swallow((eastl::uninitialized_move_ptr((Ts*)ppDataBegin[Indices],
+						(Ts*)ppDataEnd[Indices], (Ts*)ppDataEnd[Indices] + numToInitialize), 0)...);
+					
+					DoCopyFromTupleArray(pos, begin() + oldNumElements, first);
+					DoUninitializedCopyFromTupleArray(begin() + oldNumElements, pos + numToInsert, first + nExtra);
+				}
+			}
+		}
+		else
+		{
+			DoUninitializedCopyFromTupleArray(pos, pos + numToInsert, first);
+		}
+		return begin() + posIdx;
+	}
+
+	iterator erase(const_iterator first, const_iterator last)
+	{
+#if EASTL_ASSERT_ENABLED
+		if (EASTL_UNLIKELY(validate_iterator(first) == isf_none || validate_iterator(last) == isf_none))
+			EASTL_FAIL_MSG("tuple_vector::erase -- invalid iterator");
+		if (EASTL_UNLIKELY(!validate_iterator_pair(first, last)))
+			EASTL_FAIL_MSG("tuple_vector::erase -- invalid iterator pair");
+#endif
+		if (first != last)
+		{
+			size_type firstIdx = first - cbegin();
+			size_type lastIdx = last - cbegin();
+			size_type oldNumElements = mNumElements;
+			size_type newNumElements = oldNumElements - (lastIdx - firstIdx);
+			mNumElements = newNumElements;
+			swallow((eastl::move(TupleVecLeaf<Indices, Ts>::mpData + lastIdx,
+					       TupleVecLeaf<Indices, Ts>::mpData + oldNumElements,
+					       TupleVecLeaf<Indices, Ts>::mpData + firstIdx), 0)...);
+			swallow((eastl::destruct(TupleVecLeaf<Indices, Ts>::mpData + newNumElements,
+					           TupleVecLeaf<Indices, Ts>::mpData + oldNumElements), 0)...);
+		}
+		return begin() + first.mIndex;
+	}
+	
+	iterator erase_unsorted(const_iterator pos)
+	{
+#if EASTL_ASSERT_ENABLED
+		if (EASTL_UNLIKELY(validate_iterator(pos) == isf_none))
+			EASTL_FAIL_MSG("tuple_vector::erase_unsorted -- invalid iterator");
+#endif
+		size_type oldNumElements = mNumElements;
+		size_type newNumElements = oldNumElements - 1;
+		mNumElements = newNumElements;
+		swallow((eastl::move(TupleVecLeaf<Indices, Ts>::mpData + newNumElements,
+				       TupleVecLeaf<Indices, Ts>::mpData + oldNumElements,
+				       TupleVecLeaf<Indices, Ts>::mpData + (pos - begin())), 0)...);
+		swallow((eastl::destruct(TupleVecLeaf<Indices, Ts>::mpData + newNumElements,
+				           TupleVecLeaf<Indices, Ts>::mpData + oldNumElements), 0)...);
+		return begin() + pos.mIndex;
+	}
+
+	void resize(size_type n)
+	{
+		size_type oldNumElements = mNumElements;
+		size_type oldNumCapacity = mNumCapacity;
+		mNumElements = n;
+		if (n > oldNumElements)
+		{
+			if (n > oldNumCapacity)
+			{
+				DoReallocate(oldNumElements, eastl::max<size_type>(GetNewCapacity(oldNumCapacity), n));
+			}
+			swallow((eastl::uninitialized_default_fill_n(TupleVecLeaf<Indices, Ts>::mpData + oldNumElements, n - oldNumElements), 0)...);
+		}
+		else
+		{
+			swallow((eastl::destruct(TupleVecLeaf<Indices, Ts>::mpData + n,
+					           TupleVecLeaf<Indices, Ts>::mpData + oldNumElements), 0)...);
+		}
+	}
+
+	void resize(size_type n, const Ts&... args)
+	{
+		size_type oldNumElements = mNumElements;
+		size_type oldNumCapacity = mNumCapacity;
+		mNumElements = n;
+		if (n > oldNumElements)
+		{
+			if (n > oldNumCapacity)
+			{
+				DoReallocate(oldNumElements, eastl::max<size_type>(GetNewCapacity(oldNumCapacity), n));
+			} 
+			swallow((eastl::uninitialized_fill_ptr(TupleVecLeaf<Indices, Ts>::mpData + oldNumElements,
+					                       TupleVecLeaf<Indices, Ts>::mpData + n, args), 0)...);
+		}
+		else
+		{
+			swallow((eastl::destruct(TupleVecLeaf<Indices, Ts>::mpData + n,
+					           TupleVecLeaf<Indices, Ts>::mpData + oldNumElements), 0)...);
+		}
+	}
+
+	void reserve(size_type n)
+	{
+		DoConditionalReallocate(mNumElements, mNumCapacity, n);
+	}
+
+	void shrink_to_fit()
+	{
+		this_type temp(move_iterator<iterator>(begin()), move_iterator<iterator>(end()), get_allocator());
+		swap(temp);
+	}
+
+	void clear() EA_NOEXCEPT
+	{
+		size_type oldNumElements = mNumElements;
+		mNumElements = 0;
+		swallow((eastl::destruct(TupleVecLeaf<Indices, Ts>::mpData, TupleVecLeaf<Indices, Ts>::mpData + oldNumElements), 0)...);
+	}
+
+	void pop_back()
+	{
+#if EASTL_ASSERT_ENABLED
+		if (EASTL_UNLIKELY(mNumElements <= 0))
+			EASTL_FAIL_MSG("tuple_vector::pop_back -- container is empty");
+#endif
+		size_type oldNumElements = mNumElements--;
+		swallow((eastl::destruct(TupleVecLeaf<Indices, Ts>::mpData + oldNumElements - 1,
+				           TupleVecLeaf<Indices, Ts>::mpData + oldNumElements), 0)...);
+	}
+
+	void swap(this_type& x)
+	{
+		swallow((eastl::swap(TupleVecLeaf<Indices, Ts>::mpData, x.TupleVecLeaf<Indices, Ts>::mpData), 0)...);
+		eastl::swap(mpData, x.mpData);
+		eastl::swap(mNumElements, x.mNumElements);
+		eastl::swap(mNumCapacity, x.mNumCapacity);
+		eastl::swap(get_allocator(), x.get_allocator());
+		eastl::swap(internalDataSize(), x.internalDataSize());
+	}
+
+	void assign(size_type n, const_reference_tuple tup) { assign(n, eastl::get<Indices>(tup)...); }
+	void assign(std::initializer_list<value_tuple> iList) { assign(iList.begin(), iList.end()); }
+
+	void push_back(Ts&&... args) { emplace_back(eastl::forward<Ts>(args)...); }
+	void push_back(const_reference_tuple tup) { push_back(eastl::get<Indices>(tup)...); }
+	void push_back(rvalue_tuple tup) { emplace_back(eastl::forward<Ts>(eastl::get<Indices>(tup))...); }
+
+	void emplace_back(rvalue_tuple tup) { emplace_back(eastl::forward<Ts>(eastl::get<Indices>(tup))...); }
+	void emplace(const_iterator pos, rvalue_tuple tup) { emplace(pos, eastl::forward<Ts>(eastl::get<Indices>(tup))...); }
+
+	iterator insert(const_iterator pos, const Ts&... args) { return insert(pos, 1, args...); }
+	iterator insert(const_iterator pos, Ts&&... args) { return emplace(pos, eastl::forward<Ts>(args)...); }
+	iterator insert(const_iterator pos, rvalue_tuple tup) { return emplace(pos, eastl::forward<Ts>(eastl::get<Indices>(tup))...); }
+	iterator insert(const_iterator pos, const_reference_tuple tup) { return insert(pos, eastl::get<Indices>(tup)...); }
+	iterator insert(const_iterator pos, size_type n, const_reference_tuple tup) { return insert(pos, n, eastl::get<Indices>(tup)...); }
+	iterator insert(const_iterator pos, std::initializer_list<value_tuple> iList) { return insert(pos, iList.begin(), iList.end()); }
+
+	iterator erase(const_iterator pos) { return erase(pos, pos + 1); }
+	reverse_iterator erase(const_reverse_iterator pos) { return reverse_iterator(erase((pos + 1).base(), (pos).base())); }
+	reverse_iterator erase(const_reverse_iterator first, const_reverse_iterator last) { return reverse_iterator(erase((last).base(), (first).base())); }
+	reverse_iterator erase_unsorted(const_reverse_iterator pos) { return reverse_iterator(erase_unsorted((pos + 1).base())); }
+
+	void resize(size_type n, const_reference_tuple tup) { resize(n, eastl::get<Indices>(tup)...); }
+
+	bool empty() const EA_NOEXCEPT { return mNumElements == 0; }
+	size_type size() const EA_NOEXCEPT { return mNumElements; }
+	size_type capacity() const EA_NOEXCEPT { return mNumCapacity; }
+
+	iterator begin() EA_NOEXCEPT { return iterator(this, 0); }
+	const_iterator begin() const EA_NOEXCEPT { return const_iterator((const_this_type*)(this), 0); }
+	const_iterator cbegin() const EA_NOEXCEPT { return const_iterator((const_this_type*)(this), 0); }
+
+	iterator end() EA_NOEXCEPT { return iterator(this, size()); }
+	const_iterator end() const EA_NOEXCEPT { return const_iterator((const_this_type*)(this), size()); }
+	const_iterator cend() const EA_NOEXCEPT { return const_iterator((const_this_type*)(this), size()); }
+
+	reverse_iterator rbegin() EA_NOEXCEPT { return reverse_iterator(end()); }
+	const_reverse_iterator rbegin() const  EA_NOEXCEPT { return const_reverse_iterator(end()); }
+	const_reverse_iterator crbegin() const EA_NOEXCEPT { return const_reverse_iterator(end()); }
+	
+	reverse_iterator rend() EA_NOEXCEPT { return reverse_iterator(begin()); }
+	const_reverse_iterator rend() const EA_NOEXCEPT { return const_reverse_iterator(begin()); }
+	const_reverse_iterator crend() const EA_NOEXCEPT { return const_reverse_iterator(begin()); }
+
+	ptr_tuple data() EA_NOEXCEPT { return ptr_tuple(TupleVecLeaf<Indices, Ts>::mpData...); }
+	const_ptr_tuple data() const EA_NOEXCEPT { return const_ptr_tuple(TupleVecLeaf<Indices, Ts>::mpData...); }
+
+	reference_tuple at(size_type n) 
+	{ 
+#if EASTL_EXCEPTIONS_ENABLED
+		if (EASTL_UNLIKELY(n >= mNumElements))
+			throw std::out_of_range("tuple_vector::at -- out of range");
+#elif EASTL_ASSERT_ENABLED
+		if (EASTL_UNLIKELY(n >= mNumElements))
+			EASTL_FAIL_MSG("tuple_vector::at -- out of range");
+#endif
+		return reference_tuple(*(TupleVecLeaf<Indices, Ts>::mpData + n)...); 
+	}
+
+	const_reference_tuple at(size_type n) const
+	{
+#if EASTL_EXCEPTIONS_ENABLED
+		if (EASTL_UNLIKELY(n >= mNumElements))
+			throw std::out_of_range("tuple_vector::at -- out of range");
+#elif EASTL_ASSERT_ENABLED
+		if (EASTL_UNLIKELY(n >= mNumElements))
+			EASTL_FAIL_MSG("tuple_vector::at -- out of range");
+#endif
+		return const_reference_tuple(*(TupleVecLeaf<Indices, Ts>::mpData + n)...); 
+	}
+	
+	reference_tuple operator[](size_type n) { return at(n); }
+	const_reference_tuple operator[](size_type n) const { return at(n); }
+	
+	reference_tuple front() 
+	{
+		#if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED
+			if (EASTL_UNLIKELY(mNumElements == 0)) // We don't allow the user to reference an empty container.
+				EASTL_FAIL_MSG("tuple_vector::front -- empty vector");
+		#else
+			// We allow the user to reference an empty container.
+		#endif
+
+		return at(0); 
+	}
+
+	const_reference_tuple front() const
+	{
+		#if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED
+			if (EASTL_UNLIKELY(mNumElements == 0)) // We don't allow the user to reference an empty container.
+				EASTL_FAIL_MSG("tuple_vector::front -- empty vector");
+		#else
+			// We allow the user to reference an empty container.
+		#endif
+
+		return at(0); 
+	}
+	
+	reference_tuple back() 
+	{
+		#if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED
+			if (EASTL_UNLIKELY(mNumElements == 0)) // We don't allow the user to reference an empty container.
+				EASTL_FAIL_MSG("tuple_vector::back -- empty vector");
+		#else
+			// We allow the user to reference an empty container.
+		#endif
+
+		return at(size() - 1); 
+	}
+
+	const_reference_tuple back() const 
+	{
+		#if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED
+			if (EASTL_UNLIKELY(mNumElements == 0)) // We don't allow the user to reference an empty container.
+				EASTL_FAIL_MSG("tuple_vector::back -- empty vector");
+		#else
+			// We allow the user to reference an empty container.
+		#endif
+
+		return at(size() - 1); 
+	}
+
+	template <size_type I>
+	tuplevec_element_t<I, Ts...>* get() 
+	{
+		typedef tuplevec_element_t<I, Ts...> Element;
+		return TupleVecLeaf<I, Element>::mpData;
+	}
+	template <size_type I>
+	const tuplevec_element_t<I, Ts...>* get() const
+	{
+		typedef tuplevec_element_t<I, Ts...> Element;
+		return TupleVecLeaf<I, Element>::mpData;
+	}
+
+	template <typename T>
+	T* get() 
+	{ 
+		typedef tuplevec_index<T, TupleTypes<Ts...>> Index;
+		return TupleVecLeaf<Index::index, T>::mpData;
+	}
+	template <typename T>
+	const T* get() const
+	{
+		typedef tuplevec_index<T, TupleTypes<Ts...>> Index;
+		return TupleVecLeaf<Index::index, T>::mpData;
+	}
+
+	this_type& operator=(const this_type& other)
+	{
+		if (this != &other)
+		{
+			clear();
+			assign(other.begin(), other.end());
+		}
+		return *this;
+	}
+
+	this_type& operator=(this_type&& other)
+	{
+		if (this != &other)
+		{
+			swap(other);
+		}
+		return *this;
+	}
+
+	this_type& operator=(std::initializer_list<value_tuple> iList) 
+	{
+		assign(iList.begin(), iList.end());
+		return *this; 
+	}
+
+	bool validate() const EA_NOEXCEPT
+	{
+		if (mNumElements > mNumCapacity)
+			return false;
+		if (!(variadicAnd(mpData <= TupleVecLeaf<Indices, Ts>::mpData...)))
+			return false;
+		void* pDataEnd = (void*)((uintptr_t)mpData + internalDataSize());
+		if (!(variadicAnd(pDataEnd >= TupleVecLeaf<Indices, Ts>::mpData...)))
+			return false;
+		return true;
+	}
+
+	int validate_iterator(const_iterator iter) const EA_NOEXCEPT
+	{
+		if (!(variadicAnd(iter.mpData[Indices] == TupleVecLeaf<Indices, Ts>::mpData...)))
+			return isf_none;
+		if (iter.mIndex < mNumElements)
+			return (isf_valid | isf_current | isf_can_dereference);
+		if (iter.mIndex <= mNumElements)
+			return (isf_valid | isf_current);
+		return isf_none;
+	}
+
+	static bool validate_iterator_pair(const_iterator first, const_iterator last) EA_NOEXCEPT
+	{
+		return (first.mIndex <= last.mIndex) && variadicAnd(first.mpData[Indices] == last.mpData[Indices]...);
+	}
+
+	template <typename Iterator, typename = typename enable_if<is_iterator_wrapper<Iterator>::value, bool>::type>
+	int validate_iterator(Iterator iter) const EA_NOEXCEPT { return validate_iterator(unwrap_iterator(iter)); }
+
+	template <typename Iterator, typename = typename enable_if<is_iterator_wrapper<Iterator>::value, bool>::type>
+	static bool validate_iterator_pair(Iterator first, Iterator last) EA_NOEXCEPT { return validate_iterator_pair(unwrap_iterator(first), unwrap_iterator(last)); }
+
+	allocator_type& get_allocator() EA_NOEXCEPT { return mDataSizeAndAllocator.second(); }
+	const allocator_type& get_allocator() const EA_NOEXCEPT { return mDataSizeAndAllocator.second(); }
+
+	void set_allocator(const allocator_type& alloc) { mDataSizeAndAllocator.second() = alloc; }
+
+protected:
+
+	void* mpData = nullptr;
+	size_type mNumElements = 0;
+	size_type mNumCapacity = 0;
+
+	compressed_pair<size_type, allocator_type> mDataSizeAndAllocator;
+
+	size_type& internalDataSize() EA_NOEXCEPT { return mDataSizeAndAllocator.first(); }
+	size_type const& internalDataSize() const EA_NOEXCEPT { return mDataSizeAndAllocator.first(); }
+
+	friend struct TupleRecurser<>;
+	template<typename... Us>
+	friend struct TupleRecurser;
+
+	template <typename MoveIterBase>
+	void DoInitFromIterator(move_iterator<MoveIterBase> begin, move_iterator<MoveIterBase> end)
+	{
+#if EASTL_ASSERT_ENABLED
+		if (EASTL_UNLIKELY(!validate_iterator_pair(begin, end)))
+			EASTL_FAIL_MSG("tuple_vector::erase -- invalid iterator pair");
+#endif
+		size_type newNumElements = (size_type)(end - begin);
+		const void* ppOtherData[sizeof...(Ts)] = { begin.base().mpData[Indices]... };
+		size_type beginIdx = begin.base().mIndex;
+		size_type endIdx = end.base().mIndex;
+		DoConditionalReallocate(0, mNumCapacity, newNumElements);
+		mNumElements = newNumElements;
+		swallow((eastl::uninitialized_move_ptr(eastl::move_iterator<Ts*>((Ts*)(ppOtherData[Indices]) + beginIdx),
+				                       eastl::move_iterator<Ts*>((Ts*)(ppOtherData[Indices]) + endIdx),
+				                       TupleVecLeaf<Indices, Ts>::mpData), 0)...);
+	}
+
+	void DoInitFromIterator(const_iterator begin, const_iterator end)
+	{
+#if EASTL_ASSERT_ENABLED
+		if (EASTL_UNLIKELY(!validate_iterator_pair(begin, end)))
+			EASTL_FAIL_MSG("tuple_vector::erase -- invalid iterator pair");
+#endif
+		size_type newNumElements = (size_type)(end - begin);
+		const void* ppOtherData[sizeof...(Ts)] = { begin.mpData[Indices]... };
+		size_type beginIdx = begin.mIndex;
+		size_type endIdx = end.mIndex;
+		DoConditionalReallocate(0, mNumCapacity, newNumElements);
+		mNumElements = newNumElements;
+		swallow((eastl::uninitialized_copy_ptr((Ts*)(ppOtherData[Indices]) + beginIdx,
+				                       (Ts*)(ppOtherData[Indices]) + endIdx,
+				                       TupleVecLeaf<Indices, Ts>::mpData), 0)...);
+	}
+
+	void DoInitFillTuple(size_type n, const_reference_tuple tup) { DoInitFillArgs(n, eastl::get<Indices>(tup)...); }
+
+	void DoInitFillArgs(size_type n, const Ts&... args)
+	{
+		DoConditionalReallocate(0, mNumCapacity, n);
+		mNumElements = n;
+		swallow((eastl::uninitialized_fill_ptr(TupleVecLeaf<Indices, Ts>::mpData, TupleVecLeaf<Indices, Ts>::mpData + n, args), 0)...);
+	}
+
+	void DoInitDefaultFill(size_type n)
+	{
+		DoConditionalReallocate(0, mNumCapacity, n);
+		mNumElements = n;
+		swallow((eastl::uninitialized_default_fill_n(TupleVecLeaf<Indices, Ts>::mpData, n), 0)...);
+	}
+
+	void DoInitFromTupleArray(const value_tuple* first, const value_tuple* last)
+	{
+#if EASTL_ASSERT_ENABLED
+		if (EASTL_UNLIKELY(first > last || first == nullptr || last == nullptr))
+			EASTL_FAIL_MSG("tuple_vector::ctor from tuple array -- invalid ptrs");
+#endif
+		size_type newNumElements = last - first;
+		DoConditionalReallocate(0, mNumCapacity, newNumElements);
+		mNumElements = newNumElements;
+		DoUninitializedCopyFromTupleArray(begin(), end(), first);
+	}
+
+	void DoCopyFromTupleArray(iterator destPos, iterator destEnd, const value_tuple* srcTuple)
+	{
+		// assign to constructed region
+		while (destPos < destEnd)
+		{
+			*destPos = *srcTuple;
+			++destPos;
+			++srcTuple;
+		}
+	}
+
+	void DoUninitializedCopyFromTupleArray(iterator destPos, iterator destEnd, const value_tuple* srcTuple)
+	{
+		// placement-new/copy-ctor to unconstructed regions
+		while (destPos < destEnd)
+		{
+			swallow(::new(eastl::get<Indices>(destPos.MakePointer())) Ts(eastl::get<Indices>(*srcTuple))...);
+			++destPos;
+			++srcTuple;
+		}
+	}
+
+	// Try to grow the size of the container "naturally" given the number of elements being used
+	void DoGrow(size_type oldNumElements, size_type oldNumCapacity, size_type requiredCapacity)
+	{
+		if (requiredCapacity > oldNumCapacity)
+			DoReallocate(oldNumElements, GetNewCapacity(requiredCapacity));
+	}
+
+	// Reallocate to the newCapacity (IFF it's actually larger, though)
+	void DoConditionalReallocate(size_type oldNumElements, size_type oldNumCapacity, size_type requiredCapacity)
+	{
+		if (requiredCapacity > oldNumCapacity)
+			DoReallocate(oldNumElements, requiredCapacity);
+	}
+
+	void DoReallocate(size_type oldNumElements, size_type requiredCapacity)
+	{
+		void* ppNewLeaf[sizeof...(Ts)];
+		pair<void*, size_type> allocation = TupleRecurser<Ts...>::template DoAllocate<allocator_type, 0, index_sequence_type, Ts...>(
+			*this, ppNewLeaf, requiredCapacity, 0);
+		swallow((TupleVecLeaf<Indices, Ts>::DoUninitializedMoveAndDestruct(0, oldNumElements, (Ts*)ppNewLeaf[Indices]), 0)...);
+		swallow(TupleVecLeaf<Indices, Ts>::mpData = (Ts*)ppNewLeaf[Indices]...);
+
+		EASTLFree(get_allocator(), mpData, internalDataSize());
+		mpData = allocation.first;
+		mNumCapacity = requiredCapacity;
+		internalDataSize() = allocation.second;
+	}
+
+	size_type GetNewCapacity(size_type oldNumCapacity)
+	{
+		return (oldNumCapacity > 0) ? (2 * oldNumCapacity) : 1;
+	}
+};
+
+}  // namespace TupleVecInternal
+
+// Move_iterator specialization for TupleVecIter.
+// An rvalue reference of a move_iterator would normaly be "tuple<Ts...> &&" whereas
+// what we actually want is "tuple<Ts&&...>". This specialization gives us that.
+template <eastl_size_t... Indices, typename... Ts>
+class move_iterator<TupleVecInternal::TupleVecIter<index_sequence<Indices...>, Ts...>>
+{
+public:
+	typedef TupleVecInternal::TupleVecIter<index_sequence<Indices...>, Ts...> iterator_type;
+	typedef iterator_type wrapped_iterator_type; // This is not in the C++ Standard; it's used by use to identify it as
+												 // a wrapping iterator type.
+	typedef iterator_traits<iterator_type> traits_type;
+	typedef typename traits_type::iterator_category iterator_category;
+	typedef typename traits_type::value_type value_type;
+	typedef typename traits_type::difference_type difference_type;
+	typedef typename traits_type::pointer pointer;
+	typedef tuple<Ts&&...> reference;
+	typedef move_iterator<iterator_type> this_type;
+
+protected:
+	iterator_type mIterator;
+
+public:
+	move_iterator() : mIterator() {}
+	explicit move_iterator(iterator_type mi) : mIterator(mi) {}
+
+	template <typename U>
+	move_iterator(const move_iterator<U>& mi) : mIterator(mi.base()) {}
+
+	iterator_type base() const { return mIterator; }
+	reference operator*() const { return eastl::move(MakeReference()); }
+	pointer operator->() const { return mIterator; }
+
+	this_type& operator++() { ++mIterator; return *this; }
+	this_type operator++(int) {
+		this_type tempMoveIterator = *this;
+		++mIterator;
+		return tempMoveIterator;
+	}
+
+	this_type& operator--() { --mIterator; return *this; }
+	this_type operator--(int)
+	{
+		this_type tempMoveIterator = *this;
+		--mIterator;
+		return tempMoveIterator;
+	}
+
+	this_type operator+(difference_type n) const { return move_iterator(mIterator + n); }
+	this_type& operator+=(difference_type n)
+	{
+		mIterator += n;
+		return *this;
+	}
+
+	this_type operator-(difference_type n) const { return move_iterator(mIterator - n); }
+	this_type& operator-=(difference_type n)
+	{
+		mIterator -= n;
+		return *this;
+	}
+
+	difference_type operator-(const this_type& rhs) const { return mIterator - rhs.mIterator; }
+	bool operator<(const this_type& rhs) const { return mIterator < rhs.mIterator; }
+	bool operator>(const this_type& rhs) const { return mIterator > rhs.mIterator; }
+	bool operator>=(const this_type& rhs) const { return mIterator >= rhs.mIterator; }
+	bool operator<=(const this_type& rhs) const { return mIterator <= rhs.mIterator; }
+
+	reference operator[](difference_type n) const { return *(*this + n); }
+
+private:
+	reference MakeReference() const
+	{
+		return reference(eastl::move(((Ts*)mIterator.mpData[Indices])[mIterator.mIndex])...);
+	}
+};
+
+template <typename AllocatorA, typename AllocatorB, typename Indices, typename... Ts>
+inline bool operator==(const TupleVecInternal::TupleVecImpl<AllocatorA, Indices, Ts...>& a,
+					   const TupleVecInternal::TupleVecImpl<AllocatorB, Indices, Ts...>& b)
+{
+	return ((a.size() == b.size()) && eastl::equal(a.begin(), a.end(), b.begin()));
+}
+
+template <typename AllocatorA, typename AllocatorB, typename Indices, typename... Ts>
+inline bool operator!=(const TupleVecInternal::TupleVecImpl<AllocatorA, Indices, Ts...>& a,
+					   const TupleVecInternal::TupleVecImpl<AllocatorB, Indices, Ts...>& b)
+{
+	return ((a.size() != b.size()) || !eastl::equal(a.begin(), a.end(), b.begin()));
+}
+
+template <typename AllocatorA, typename AllocatorB, typename Indices, typename... Ts>
+inline bool operator<(const TupleVecInternal::TupleVecImpl<AllocatorA, Indices, Ts...>& a,
+					  const TupleVecInternal::TupleVecImpl<AllocatorB, Indices, Ts...>& b)
+{
+	return eastl::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end());
+}
+
+template <typename AllocatorA, typename AllocatorB, typename Indices, typename... Ts>
+inline bool operator>(const TupleVecInternal::TupleVecImpl<AllocatorA, Indices, Ts...>& a,
+					  const TupleVecInternal::TupleVecImpl<AllocatorB, Indices, Ts...>& b)
+{
+	return b < a;
+}
+
+template <typename AllocatorA, typename AllocatorB, typename Indices, typename... Ts>
+inline bool operator<=(const TupleVecInternal::TupleVecImpl<AllocatorA, Indices, Ts...>& a,
+					   const TupleVecInternal::TupleVecImpl<AllocatorB, Indices, Ts...>& b)
+{
+	return !(b < a);
+}
+
+template <typename AllocatorA, typename AllocatorB, typename Indices, typename... Ts>
+inline bool operator>=(const TupleVecInternal::TupleVecImpl<AllocatorA, Indices, Ts...>& a,
+					   const TupleVecInternal::TupleVecImpl<AllocatorB, Indices, Ts...>& b)
+{
+	return !(a < b);
+}
+
+template <typename AllocatorA, typename AllocatorB, typename Indices, typename... Ts>
+inline void swap(TupleVecInternal::TupleVecImpl<AllocatorA, Indices, Ts...>& a,
+				TupleVecInternal::TupleVecImpl<AllocatorB, Indices, Ts...>& b)
+{
+	a.swap(b);
+}
+
+// A customization of swap is made for r-values of tuples-of-references - 
+// normally, swapping rvalues doesn't make sense, but in this case, we do want to 
+// swap the contents of what the tuple-of-references are referring to
+//
+// This is required due to TupleVecIter returning a value-type for its dereferencing,
+// as opposed to an actual real reference of some sort
+template<typename... Ts>
+inline
+typename enable_if<conjunction<is_swappable<Ts>...>::value>::type
+swap(tuple<Ts&...>&& a, tuple<Ts&...>&& b)
+{
+	a.swap(b);
+}
+
+template<typename... Ts>
+inline
+typename enable_if<!conjunction<is_swappable<Ts>...>::value>::type
+swap(tuple<Ts&...>&& a, tuple<Ts&...>&& b) = delete;
+
+
+// External interface of tuple_vector
+template <typename... Ts>
+class tuple_vector : public TupleVecInternal::TupleVecImpl<EASTLAllocatorType, make_index_sequence<sizeof...(Ts)>, Ts...>
+{
+	typedef tuple_vector<Ts...> this_type;
+	typedef TupleVecInternal::TupleVecImpl<EASTLAllocatorType, make_index_sequence<sizeof...(Ts)>, Ts...> base_type;
+	using base_type::base_type;
+
+public:
+	this_type& operator=(std::initializer_list<typename base_type::value_tuple> iList) 
+	{
+		base_type::operator=(iList);
+		return *this;
+	}
+};
+
+// Variant of tuple_vector that allows a user-defined allocator type (can't mix default template params with variadics)
+template <typename AllocatorType, typename... Ts>
+class tuple_vector_alloc
+	: public TupleVecInternal::TupleVecImpl<AllocatorType, make_index_sequence<sizeof...(Ts)>, Ts...>
+{
+	typedef tuple_vector_alloc<AllocatorType, Ts...> this_type;
+	typedef TupleVecInternal::TupleVecImpl<AllocatorType, make_index_sequence<sizeof...(Ts)>, Ts...> base_type;
+	using base_type::base_type;
+
+public:
+
+	this_type& operator=(std::initializer_list<typename base_type::value_tuple> iList)
+	{
+		base_type::operator=(iList);
+		return *this;
+	}
+};
+
+}  // namespace eastl
+
+EA_RESTORE_VC_WARNING()
+EA_RESTORE_VC_WARNING()
+EA_RESTORE_VC_WARNING()
+EA_RESTORE_VC_WARNING()
+
+#endif  // EASTL_TUPLEVECTOR_H
diff --git a/libkram/eastl/include/EASTL/chrono.h b/libkram/eastl/include/EASTL/chrono.h
new file mode 100644
index 00000000..453ab0f4
--- /dev/null
+++ b/libkram/eastl/include/EASTL/chrono.h
@@ -0,0 +1,744 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+
+///////////////////////////////////////////////////////////////////////////////
+// This file implements the eastl::chrono specification which is part of the
+// standard STL date and time library.  eastl::chrono implements all the
+// mechanisms required to capture and manipulate times retrieved from the
+// provided clocks.  It implements the all of the features to allow type safe
+// durations to be used in code.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_CHRONO_H
+#define EASTL_CHRONO_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once 
+#endif
+
+#include <EASTL/internal/config.h>
+#include <EASTL/type_traits.h>
+#include <EASTL/numeric_limits.h>
+#include <EASTL/ratio.h>
+
+
+// TODO:  move to platform specific cpp or header file
+#if  defined EA_PLATFORM_MICROSOFT
+	EA_DISABLE_ALL_VC_WARNINGS()
+
+	#ifndef WIN32_LEAN_AND_MEAN
+		#define WIN32_LEAN_AND_MEAN
+	#endif
+
+	#undef NOMINMAX
+	#define NOMINMAX
+
+	#include <Windows.h>
+
+	#ifdef min
+		#undef min
+	#endif
+	#ifdef max
+		#undef max
+	#endif
+
+	EA_RESTORE_ALL_VC_WARNINGS()
+#endif
+
+#if defined(EA_PLATFORM_MICROSOFT) && !defined(EA_PLATFORM_MINGW)
+	// Nothing to do
+#elif defined(EA_PLATFORM_SONY)
+	#include <Dinkum/threads/xtimec.h>
+	#include <kernel.h>
+#elif defined(EA_PLATFORM_APPLE)
+	#include <mach/mach_time.h>
+#elif defined(EA_PLATFORM_POSIX) || defined(EA_PLATFORM_MINGW) || defined(EA_PLATFORM_ANDROID) 
+	// Posix means Linux, Unix, and Macintosh OSX, among others (including Linux-based mobile platforms).
+	#if defined(EA_PLATFORM_MINGW)
+		#include <pthread_time.h>
+	#endif
+	#include <time.h>
+	#if (defined(CLOCK_REALTIME) || defined(CLOCK_MONOTONIC))
+		#include <errno.h>
+	#else
+		#include <sys/time.h>
+		#include <unistd.h>
+	#endif
+#endif
+
+
+namespace eastl
+{
+namespace chrono
+{
+	///////////////////////////////////////////////////////////////////////////////
+	// treat_as_floating_point
+	///////////////////////////////////////////////////////////////////////////////
+	template <class Rep>
+	struct treat_as_floating_point : is_floating_point<Rep> {};
+
+
+    ///////////////////////////////////////////////////////////////////////////////
+	// 20.12.4, duration_values
+	///////////////////////////////////////////////////////////////////////////////
+	template <class Rep>
+	struct duration_values
+	{
+	public:
+		EASTL_FORCE_INLINE static EA_CONSTEXPR Rep zero() { return Rep(0); }
+		EASTL_FORCE_INLINE static EA_CONSTEXPR Rep max()  { return eastl::numeric_limits<Rep>::max(); }
+		EASTL_FORCE_INLINE static EA_CONSTEXPR Rep min()  { return eastl::numeric_limits<Rep>::lowest(); }
+	};
+
+
+	///////////////////////////////////////////////////////////////////////////////
+	// duration fwd_decl
+	///////////////////////////////////////////////////////////////////////////////
+	template <typename Rep, typename Period = ratio<1>>
+	class duration;
+
+
+	namespace Internal
+	{
+		///////////////////////////////////////////////////////////////////////////////
+		// IsRatio 
+		///////////////////////////////////////////////////////////////////////////////
+		template <typename> struct IsRatio                                           : eastl::false_type {};
+		template <intmax_t N, intmax_t D> struct IsRatio<ratio<N, D>>                : eastl::true_type {};
+		template <intmax_t N, intmax_t D> struct IsRatio<const ratio<N, D>>          : eastl::true_type {};
+		template <intmax_t N, intmax_t D> struct IsRatio<volatile ratio<N, D>>       : eastl::true_type {};
+		template <intmax_t N, intmax_t D> struct IsRatio<const volatile ratio<N, D>> : eastl::true_type {};
+
+
+		///////////////////////////////////////////////////////////////////////////////
+		// IsDuration 
+		///////////////////////////////////////////////////////////////////////////////
+		template<typename> struct IsDuration                                                            : eastl::false_type{};
+		template<typename Rep, typename Period> struct IsDuration<duration<Rep, Period>>                : eastl::true_type{};
+		template<typename Rep, typename Period> struct IsDuration<const duration<Rep, Period>>          : eastl::true_type{};
+		template<typename Rep, typename Period> struct IsDuration<volatile duration<Rep, Period>>       : eastl::true_type{};
+		template<typename Rep, typename Period> struct IsDuration<const volatile duration<Rep, Period>> : eastl::true_type{};
+
+
+		///////////////////////////////////////////////////////////////////////////////
+		// RatioGCD 
+		///////////////////////////////////////////////////////////////////////////////
+		template <class Period1, class Period2>
+		struct RatioGCD
+		{
+			static_assert(IsRatio<Period1>::value, "Period1 is not a eastl::ratio type");
+			static_assert(IsRatio<Period2>::value, "Period2 is not a eastl::ratio type");
+
+			typedef ratio<eastl::Internal::gcd<Period1::num, Period2::num>::value,
+			              eastl::Internal::lcm<Period1::den, Period2::den>::value> type;
+		};
+	};
+
+
+	///////////////////////////////////////////////////////////////////////////////
+	// 20.12.5.7, duration_cast
+	///////////////////////////////////////////////////////////////////////////////
+	namespace Internal
+	{
+		template <typename FromDuration,
+		          typename ToDuration,
+		          typename CommonPeriod =
+		              typename ratio_divide<typename FromDuration::period, typename ToDuration::period>::type,
+		          typename CommonRep = typename eastl::decay<typename eastl::common_type<typename ToDuration::rep,
+		                                                                                 typename FromDuration::rep,
+		                                                                                 intmax_t>::type>::type,
+		          bool = CommonPeriod::num == 1,
+		          bool = CommonPeriod::den == 1>
+		struct DurationCastImpl;
+
+		template <typename FromDuration, typename ToDuration, typename CommonPeriod, typename CommonRep>
+		struct DurationCastImpl<FromDuration, ToDuration, CommonPeriod, CommonRep, true, true>
+		{
+			inline static ToDuration DoCast(const FromDuration& fd)
+			{
+				return ToDuration(static_cast<typename ToDuration::rep>(fd.count()));
+			}
+		};
+
+		template <typename FromDuration, typename ToDuration, typename CommonPeriod, typename CommonRep>
+		struct DurationCastImpl<FromDuration, ToDuration, CommonPeriod, CommonRep, false, true>
+		{
+			inline static ToDuration DoCast(const FromDuration& d)
+			{
+				return ToDuration(static_cast<typename ToDuration::rep>(static_cast<CommonRep>(d.count()) *
+				                                                        static_cast<CommonRep>(CommonPeriod::num)));
+			}
+		};
+
+		template <typename FromDuration, typename ToDuration, typename CommonPeriod, typename CommonRep>
+		struct DurationCastImpl<FromDuration, ToDuration, CommonPeriod, CommonRep, true, false>
+		{
+			inline static ToDuration DoCast(const FromDuration& d)
+			{
+				return ToDuration(static_cast<typename ToDuration::rep>(static_cast<CommonRep>(d.count()) /
+				                                                        static_cast<CommonRep>(CommonPeriod::den)));
+			}
+		};
+
+		template <typename FromDuration, typename ToDuration, typename CommonPeriod, typename CommonRep>
+		struct DurationCastImpl<FromDuration, ToDuration, CommonPeriod, CommonRep, false, false>
+		{
+			inline static ToDuration DoCast(const FromDuration& d)
+			{
+				return ToDuration(static_cast<typename ToDuration::rep>(static_cast<CommonRep>(d.count()) *
+				                                                        static_cast<CommonRep>(CommonPeriod::num) /
+				                                                        static_cast<CommonRep>(CommonPeriod::den)));
+			}
+		};
+	}; // namespace Internal
+
+
+	///////////////////////////////////////////////////////////////////////////////
+	// duration_cast 
+	///////////////////////////////////////////////////////////////////////////////
+	template <typename ToDuration, typename Rep, typename Period>
+	inline typename eastl::enable_if<Internal::IsDuration<ToDuration>::value, ToDuration>::type 
+	duration_cast(const duration<Rep, Period>& d)
+	{
+		typedef typename duration<Rep, Period>::this_type FromDuration;
+		return Internal::DurationCastImpl<FromDuration, ToDuration>::DoCast(d);
+	}
+
+
+	///////////////////////////////////////////////////////////////////////////////
+	// duration 
+	///////////////////////////////////////////////////////////////////////////////
+	template <class Rep, class Period>
+	class duration
+	{
+		Rep mRep;	
+
+	public:
+		typedef Rep rep;
+		typedef Period period;
+		typedef duration<Rep, Period> this_type;
+
+    #if defined(EA_COMPILER_NO_DEFAULTED_FUNCTIONS)
+		EA_CONSTEXPR duration() 
+			: mRep() {}
+
+		duration(const duration& other)
+			: mRep(Rep(other.mRep)) {}
+
+		duration& operator=(const duration& other)
+			{ mRep = other.mRep; return *this; }
+	#else
+		EA_CONSTEXPR duration() = default;
+		duration(const duration&) = default;
+		duration& operator=(const duration&) = default;
+    #endif
+
+
+		///////////////////////////////////////////////////////////////////////////////
+		// conversion constructors 
+		///////////////////////////////////////////////////////////////////////////////
+		template <class Rep2>
+		inline EA_CONSTEXPR explicit duration(
+		    const Rep2& rep2,
+		    typename eastl::enable_if<eastl::is_convertible<Rep2, Rep>::value &&
+		                              (treat_as_floating_point<Rep>::value ||
+		                               !treat_as_floating_point<Rep2>::value)>::type** = 0)
+		    : mRep(static_cast<Rep>(rep2)) {}
+
+
+		template <class Rep2, class Period2>
+		EA_CONSTEXPR duration(const duration<Rep2, Period2>& d2,
+		                      typename eastl::enable_if<treat_as_floating_point<Rep>::value ||
+		                                                    (eastl::ratio_divide<Period2, Period>::type::den == 1 &&
+		                                                     !treat_as_floating_point<Rep2>::value),
+		                                                void>::type** = 0)
+		    : mRep(duration_cast<duration>(d2).count()) {}
+
+		///////////////////////////////////////////////////////////////////////////////
+		// returns the count of ticks 
+		///////////////////////////////////////////////////////////////////////////////
+		EA_CONSTEXPR Rep count() const { return mRep; }
+
+		///////////////////////////////////////////////////////////////////////////////
+		// static accessors of special duration values 
+		///////////////////////////////////////////////////////////////////////////////
+		EA_CONSTEXPR inline static duration zero() { return duration(duration_values<Rep>::zero()); }
+		EA_CONSTEXPR inline static duration min()  { return duration(duration_values<Rep>::min()); }
+		EA_CONSTEXPR inline static duration max()  { return duration(duration_values<Rep>::max()); }
+
+		///////////////////////////////////////////////////////////////////////////////
+		// const arithmetic operations
+		///////////////////////////////////////////////////////////////////////////////
+		EA_CONSTEXPR inline duration operator+() const { return *this; }
+		EA_CONSTEXPR inline duration operator-() const { return duration(0-mRep); }
+
+		///////////////////////////////////////////////////////////////////////////////
+		// arithmetic operations
+		///////////////////////////////////////////////////////////////////////////////
+		inline duration operator++(int)                  { return duration(mRep++); }
+		inline duration operator--(int)                  { return duration(mRep--); }
+		inline duration& operator++()                    { ++mRep; return *this; }
+		inline duration& operator--()                    { --mRep; return *this; }
+		inline duration& operator+=(const duration& d)   { mRep += d.count(); return *this; }
+		inline duration& operator-=(const duration& d)   { mRep -= d.count(); return *this; }
+		inline duration& operator*=(const Rep& rhs)      { mRep *= rhs; return *this; }
+		inline duration& operator/=(const Rep& rhs)      { mRep /= rhs; return *this; }
+		inline duration& operator%=(const Rep& rhs)      { mRep %= rhs; return *this; }
+		inline duration& operator%=(const duration& d)   { mRep %= d.count(); return *this; }
+	};
+
+
+	///////////////////////////////////////////////////////////////////////////////
+	// 20.12.5.5, arithmetic operations with durations as arguments
+	///////////////////////////////////////////////////////////////////////////////
+	template <typename Rep1, typename Period1, typename Rep2, typename Period2>
+	typename eastl::common_type<duration<Rep1, Period1>, duration<Rep2, Period2>>::type EASTL_FORCE_INLINE
+	operator+(const duration<Rep1, Period1>& lhs, const duration<Rep2, Period2>& rhs)
+	{
+		typedef typename eastl::common_type<duration<Rep1, Period1>, duration<Rep2, Period2>>::type common_duration_t;
+		return common_duration_t(common_duration_t(lhs).count() + common_duration_t(rhs).count());
+	}
+
+	template <typename Rep1, typename Period1, typename Rep2, typename Period2>
+	typename eastl::common_type<duration<Rep1, Period1>, duration<Rep2, Period2>>::type EASTL_FORCE_INLINE
+	operator-(const duration<Rep1, Period1>& lhs, const duration<Rep2, Period2>& rhs)
+	{
+		typedef typename eastl::common_type<duration<Rep1, Period1>, duration<Rep2, Period2>>::type common_duration_t;
+		return common_duration_t(common_duration_t(lhs).count() - common_duration_t(rhs).count());
+	}
+
+	template <typename Rep1, typename Period1, typename Rep2>
+	duration<typename eastl::common_type<Rep1, Rep2>::type, Period1> EASTL_FORCE_INLINE
+	operator*(const duration<Rep1, Period1>& lhs, const Rep2& rhs)
+	{
+		typedef typename duration<eastl::common_type<Rep1, Rep2>, Period1>::type common_duration_t;
+		return common_duration_t(common_duration_t(lhs).count() * rhs);
+	}
+
+	template <typename Rep1, typename Rep2, typename Period2>
+	duration<typename eastl::common_type<Rep1, Rep2>::type, Period2> EASTL_FORCE_INLINE
+	operator*(const Rep1& lhs, const duration<Rep2, Period2>& rhs)
+	{
+		typedef duration<typename eastl::common_type<Rep1, Rep2>::type, Period2> common_duration_t;
+		return common_duration_t(lhs * common_duration_t(rhs).count());
+	}
+
+	template <typename Rep1, typename Period1, typename Rep2>
+	duration<typename eastl::common_type<Rep1, Rep2>::type, Period1> EASTL_FORCE_INLINE
+	operator/(const duration<Rep1, Period1>& lhs, const Rep2& rhs)
+	{
+		typedef duration<typename eastl::common_type<Rep1, Rep2>::type, Period1> common_duration_t;
+		return common_duration_t(common_duration_t(lhs).count() / rhs);
+	}
+
+	template <typename Rep1, typename Period1, typename Rep2, typename Period2>
+	typename eastl::common_type<duration<Rep1, Period1>, duration<Rep2, Period2>>::type EASTL_FORCE_INLINE
+	operator/(const duration<Rep1, Period1>& lhs, const duration<Rep2, Period2>& rhs)
+	{
+		typedef typename eastl::common_type<duration<Rep1, Period1>, duration<Rep2, Period2>>::type common_duration_t;
+		return common_duration_t(common_duration_t(lhs).count() / common_duration_t(rhs).count());
+	}
+
+	template <typename Rep1, typename Period1, typename Rep2>
+	duration<typename eastl::common_type<Rep1, Rep2>::type, Period1> EASTL_FORCE_INLINE
+	operator%(const duration<Rep1, Period1>& lhs, const Rep2& rhs)
+	{
+		typedef duration<typename eastl::common_type<Rep1, Rep2>::type, Period1> common_duration_t;
+		return common_duration_t(common_duration_t(lhs).count() % rhs);
+	}
+
+	template <typename Rep1, typename Period1, typename Rep2, typename Period2>
+	typename eastl::common_type<duration<Rep1, Period1>, duration<Rep2, Period2>>::type EASTL_FORCE_INLINE
+	operator%(const duration<Rep1, Period1>& lhs, const duration<Rep2, Period2>& rhs)
+	{
+		typedef typename eastl::common_type<duration<Rep1, Period1>, duration<Rep2, Period2>>::type common_duration_t;
+		return common_duration_t(common_duration_t(lhs).count() % common_duration_t(rhs).count());
+	}
+
+
+	///////////////////////////////////////////////////////////////////////////////
+	// 20.12.5.6, compares two durations
+	///////////////////////////////////////////////////////////////////////////////
+	template <typename Rep1, typename Period1, typename Rep2, typename Period2>
+	EASTL_FORCE_INLINE bool operator==(const duration<Rep1, Period1>& lhs,
+	                                                const duration<Rep2, Period2>& rhs)
+	{
+		typedef typename eastl::common_type<duration<Rep1, Period1>, duration<Rep2, Period2>>::type common_duration_t;
+		return common_duration_t(lhs).count() == common_duration_t(rhs).count();
+	}
+
+	template <typename Rep1, typename Period1, typename Rep2, typename Period2>
+	EASTL_FORCE_INLINE bool operator<(const duration<Rep1, Period1>& lhs,
+	                                               const duration<Rep2, Period2>& rhs)
+	{
+		typedef typename eastl::common_type<duration<Rep1, Period1>, duration<Rep2, Period2>>::type common_duration_t;
+		return common_duration_t(lhs).count() < common_duration_t(rhs).count();
+	}
+
+	template <typename Rep1, typename Period1, typename Rep2, typename Period2>
+	EASTL_FORCE_INLINE bool operator!=(const duration<Rep1, Period1>& lhs,
+	                                                const duration<Rep2, Period2>& rhs)
+	{
+		return !(lhs == rhs);
+	}
+
+	template <typename Rep1, typename Period1, typename Rep2, typename Period2>
+	EASTL_FORCE_INLINE bool operator<=(const duration<Rep1, Period1>& lhs,
+	                                                const duration<Rep2, Period2>& rhs)
+	{
+		return !(rhs < lhs);
+	}
+
+	template <typename Rep1, typename Period1, typename Rep2, typename Period2>
+	EASTL_FORCE_INLINE bool operator>(const duration<Rep1, Period1>& lhs,
+	                                               const duration<Rep2, Period2>& rhs)
+	{
+		return rhs < lhs;
+	}
+
+	template <typename Rep1, typename Period1, typename Rep2, typename Period2>
+	EASTL_FORCE_INLINE bool operator>=(const duration<Rep1, Period1>& lhs,
+	                                                const duration<Rep2, Period2>& rhs)
+	{
+		return !(lhs < rhs);
+	}
+
+
+	///////////////////////////////////////////////////////////////////////////////
+	// standard duration units
+	///////////////////////////////////////////////////////////////////////////////
+    typedef duration<long long, nano> nanoseconds;
+    typedef duration<long long, micro> microseconds;
+    typedef duration<long long, milli> milliseconds;
+    typedef duration<long long> seconds;
+    typedef duration<int, ratio<60>> minutes;
+    typedef duration<int, ratio<3600>> hours;
+
+
+	///////////////////////////////////////////////////////////////////////////////
+	// 20.12.6, time_point
+	///////////////////////////////////////////////////////////////////////////////
+	template <typename Clock, typename Duration = typename Clock::duration> 
+	class time_point
+	{
+		Duration mDuration;
+
+	public:
+		typedef Clock clock;
+		typedef Duration duration;
+		typedef typename Duration::rep rep;
+		typedef typename Duration::period period;
+
+		inline EA_CONSTEXPR time_point() : mDuration(Duration::zero()) {}
+		EA_CONSTEXPR explicit time_point(const Duration& other) : mDuration(other) {}
+
+		template <typename Duration2>
+		inline EA_CONSTEXPR time_point(
+		    const time_point<Clock, Duration2>& t,
+		    typename eastl::enable_if<eastl::is_convertible<Duration2, Duration>::value>::type** = 0)
+		    : mDuration(t.time_since_epoch()) {}
+
+		EA_CONSTEXPR Duration time_since_epoch() const { return mDuration; }
+
+		time_point& operator+=(const Duration& d) { mDuration += d; return *this; } 
+		time_point& operator-=(const Duration& d) { mDuration -= d; return *this; }
+
+		static EA_CONSTEXPR time_point min() { return time_point(Duration::min()); }
+		static EA_CONSTEXPR time_point max() { return time_point(Duration::max()); }
+	};
+
+
+	///////////////////////////////////////////////////////////////////////////////
+	// 20.12.6.5, time_point arithmetic
+	///////////////////////////////////////////////////////////////////////////////
+	template <class Clock, class Duration1, class Rep2, class Period2>
+	inline EA_CONSTEXPR time_point<Clock, typename eastl::common_type<Duration1, duration<Rep2, Period2>>::type>
+	operator+(const time_point<Clock, Duration1>& lhs, const duration<Rep2, Period2>& rhs)
+	{
+		typedef time_point<Clock, typename eastl::common_type<Duration1, duration<Rep2, Period2>>::type> common_timepoint_t;
+		return common_timepoint_t(lhs.time_since_epoch() + rhs);
+	}
+
+	template <class Rep1, class Period1, class Clock, class Duration2>
+	inline EA_CONSTEXPR time_point<Clock, typename eastl::common_type<Duration2, duration<Rep1, Period1>>::type>
+	operator+(const duration<Rep1, Period1>& lhs, const time_point<Clock, Duration2>& rhs)
+	{
+		typedef time_point<Clock, typename eastl::common_type<Duration2, duration<Rep1, Period1>>::type> common_timepoint_t;
+		return common_timepoint_t(lhs + rhs.time_since_epoch());
+	}
+
+	template <class Clock, class Duration1, class Rep2, class Period2>
+	inline EA_CONSTEXPR time_point<Clock, typename eastl::common_type<Duration1, duration<Rep2, Period2>>::type>
+	operator-(const time_point<Clock, Duration1>& lhs, const duration<Rep2, Period2>& rhs)
+	{
+		typedef time_point<Clock, typename eastl::common_type<Duration1, duration<Rep2, Period2>>::type> common_timepoint_t;
+		return common_timepoint_t(lhs.time_since_epoch() - rhs);
+	}
+
+	template <class Clock, class Duration1, class Duration2>
+	inline EA_CONSTEXPR typename eastl::common_type<Duration1, Duration2>::type operator-(
+	    const time_point<Clock, Duration1>& lhs,
+	    const time_point<Clock, Duration2>& rhs)
+	{
+		return lhs.time_since_epoch() - rhs.time_since_epoch();
+	}
+
+	template <class Clock, class Duration1, class Duration2>
+	inline EA_CONSTEXPR bool operator==(const time_point<Clock, Duration1>& lhs,
+	                                    const time_point<Clock, Duration2>& rhs)
+	{
+		return lhs.time_since_epoch() == rhs.time_since_epoch();
+	}
+
+	template <class Clock, class Duration1, class Duration2>
+	inline EA_CONSTEXPR bool operator!=(const time_point<Clock, Duration1>& lhs,
+	                                    const time_point<Clock, Duration2>& rhs)
+	{
+		return !(lhs == rhs);
+	}
+
+	template <class Clock, class Duration1, class Duration2>
+	inline EA_CONSTEXPR bool operator<(const time_point<Clock, Duration1>& lhs, const time_point<Clock, Duration2>& rhs)
+	{
+		return lhs.time_since_epoch() < rhs.time_since_epoch();
+	}
+
+	template <class Clock, class Duration1, class Duration2>
+	inline EA_CONSTEXPR bool operator<=(const time_point<Clock, Duration1>& lhs,
+	                                    const time_point<Clock, Duration2>& rhs)
+	{
+		return !(rhs < lhs);
+	}
+
+	template <class Clock, class Duration1, class Duration2>
+	inline EA_CONSTEXPR bool operator>(const time_point<Clock, Duration1>& lhs, const time_point<Clock, Duration2>& rhs)
+	{
+		return rhs < lhs;
+	}
+
+	template <class Clock, class Duration1, class Duration2>
+	inline EA_CONSTEXPR bool operator>=(const time_point<Clock, Duration1>& lhs,
+	                                    const time_point<Clock, Duration2>& rhs)
+	{
+		return !(lhs < rhs);
+	}
+
+
+	///////////////////////////////////////////////////////////////////////////////
+	// 20.12.6.7, time_point_cast
+	///////////////////////////////////////////////////////////////////////////////
+	template <typename ToDuration, typename Clock, typename Duration>
+	EA_CONSTEXPR time_point<Clock, ToDuration> time_point_cast(
+	    const time_point<Clock, Duration>& t,
+	    typename eastl::enable_if<Internal::IsDuration<ToDuration>::value>::type** = 0)
+	{
+		return time_point<Clock, ToDuration>(duration_cast<ToDuration>(t.time_since_epoch()));
+	}
+
+
+	///////////////////////////////////////////////////////////////////////////////
+	// 20.12.7, clocks
+	///////////////////////////////////////////////////////////////////////////////
+
+	namespace Internal
+	{
+		#if defined(EA_PLATFORM_MICROSOFT) && !defined(EA_PLATFORM_MINGW)
+			#define EASTL_NS_PER_TICK 1 
+		#elif defined EA_PLATFORM_SONY
+			#define EASTL_NS_PER_TICK _XTIME_NSECS_PER_TICK
+		#elif defined EA_PLATFORM_POSIX
+			#define EASTL_NS_PER_TICK _XTIME_NSECS_PER_TICK
+		#else
+			#define EASTL_NS_PER_TICK 100 
+		#endif
+
+		#if defined(EA_PLATFORM_POSIX) 
+			typedef chrono::nanoseconds::period SystemClock_Period;
+			typedef chrono::nanoseconds::period SteadyClock_Period;
+		#else
+			typedef eastl::ratio_multiply<eastl::ratio<EASTL_NS_PER_TICK, 1>, nano>::type SystemClock_Period; 
+			typedef eastl::ratio_multiply<eastl::ratio<EASTL_NS_PER_TICK, 1>, nano>::type SteadyClock_Period; 
+		#endif
+
+
+		///////////////////////////////////////////////////////////////////////////////
+		// Internal::GetTicks 
+		///////////////////////////////////////////////////////////////////////////////
+		inline uint64_t GetTicks()
+		{
+		#if defined EA_PLATFORM_MICROSOFT
+			auto queryFrequency = []
+			{
+				LARGE_INTEGER frequency;
+				QueryPerformanceFrequency(&frequency);
+				return double(1000000000.0L / frequency.QuadPart);  // nanoseconds per tick
+			};
+
+			auto queryCounter = []
+			{
+				LARGE_INTEGER counter;
+				QueryPerformanceCounter(&counter);
+				return counter.QuadPart;
+			};
+
+			EA_DISABLE_VC_WARNING(4640)  // warning C4640: construction of local static object is not thread-safe (VS2013)
+			static auto frequency = queryFrequency(); // cache cpu frequency on first call
+			EA_RESTORE_VC_WARNING()
+			return uint64_t(frequency * queryCounter());
+        #elif defined EA_PLATFORM_SONY
+			return sceKernelGetProcessTimeCounter();
+		#elif defined(EA_PLATFORM_APPLE)
+		   return mach_absolute_time();
+		#elif defined(EA_PLATFORM_POSIX) // Posix means Linux, Unix, and Macintosh OSX, among others (including Linux-based mobile platforms).
+			#if (defined(CLOCK_REALTIME) || defined(CLOCK_MONOTONIC))
+				timespec ts;
+				int result = clock_gettime(CLOCK_MONOTONIC, &ts);
+
+				if(result == EINVAL 
+					)
+					result = clock_gettime(CLOCK_REALTIME, &ts);
+
+				const uint64_t nNanoseconds = (uint64_t)ts.tv_nsec + ((uint64_t)ts.tv_sec * UINT64_C(1000000000));
+				return nNanoseconds;
+			#else
+				struct timeval tv;
+				gettimeofday(&tv, NULL);
+				const uint64_t nMicroseconds = (uint64_t)tv.tv_usec + ((uint64_t)tv.tv_sec * 1000000);
+				return nMicroseconds;
+			#endif
+        #else
+			#error "chrono not implemented for platform"
+		#endif
+		}
+	} // namespace Internal
+
+
+	///////////////////////////////////////////////////////////////////////////////
+	// system_clock 
+	///////////////////////////////////////////////////////////////////////////////
+	class system_clock
+	{
+	public:
+		typedef long long rep; // signed arithmetic type representing the number of ticks in the clock's duration
+		typedef Internal::SystemClock_Period period;
+		typedef chrono::duration<rep, period> duration; // duration<rep, period>, capable of representing negative durations
+		typedef chrono::time_point<system_clock> time_point;
+
+		// true if the time between ticks is always increases monotonically
+		EA_CONSTEXPR_OR_CONST static bool is_steady = false;
+
+		// returns a time point representing the current point in time.
+		static time_point now() EA_NOEXCEPT 
+		{ 
+			return time_point(duration(Internal::GetTicks())); 
+		}
+	};
+
+
+	///////////////////////////////////////////////////////////////////////////////
+	// steady_clock 
+	///////////////////////////////////////////////////////////////////////////////
+	class steady_clock
+	{
+	public:
+		typedef long long rep; // signed arithmetic type representing the number of ticks in the clock's duration
+		typedef Internal::SteadyClock_Period period;
+		typedef chrono::duration<rep, period> duration; // duration<rep, period>, capable of representing negative durations
+		typedef chrono::time_point<steady_clock> time_point;
+
+		// true if the time between ticks is always increases monotonically
+		EA_CONSTEXPR_OR_CONST static bool is_steady = true;
+
+		// returns a time point representing the current point in time.
+		static time_point now() EA_NOEXCEPT 
+		{ 
+			return time_point(duration(Internal::GetTicks())); 
+		}
+	};
+
+
+	///////////////////////////////////////////////////////////////////////////////
+	// high_resolution_clock 
+	///////////////////////////////////////////////////////////////////////////////
+	typedef system_clock high_resolution_clock;
+
+
+} // namespace chrono 
+
+
+	///////////////////////////////////////////////////////////////////////////////
+	// duration common_type specialization 
+	///////////////////////////////////////////////////////////////////////////////
+	template <typename Rep1, typename Period1, typename Rep2, typename Period2>
+	struct common_type<chrono::duration<Rep1, Period1>, chrono::duration<Rep2, Period2>>
+	{
+	    typedef chrono::duration<typename eastl::decay<typename eastl::common_type<Rep1, Rep2>::type>::type,
+	                             typename chrono::Internal::RatioGCD<Period1, Period2>::type> type;
+    };
+
+
+	///////////////////////////////////////////////////////////////////////////////
+	// time_point common_type specialization 
+	///////////////////////////////////////////////////////////////////////////////
+	template <typename Clock, typename Duration1, typename Duration2>
+	struct common_type<chrono::time_point<Clock, Duration1>, chrono::time_point<Clock, Duration2>>
+	{
+		typedef chrono::time_point<Clock, typename eastl::common_type<Duration1, Duration2>::type> type;
+	};
+
+
+	///////////////////////////////////////////////////////////////////////////////
+	// chrono_literals  
+	///////////////////////////////////////////////////////////////////////////////
+	#if EASTL_USER_LITERALS_ENABLED && EASTL_INLINE_NAMESPACES_ENABLED
+		EA_DISABLE_VC_WARNING(4455) // disable warning C4455: literal suffix identifiers that do not start with an underscore are reserved
+		inline namespace literals
+		{
+			inline namespace chrono_literals
+			{
+				///////////////////////////////////////////////////////////////////////////////
+				// integer chrono literals
+				///////////////////////////////////////////////////////////////////////////////
+				EA_CONSTEXPR chrono::hours operator"" h(unsigned long long h) { return chrono::hours(h); }
+				EA_CONSTEXPR chrono::minutes operator"" min(unsigned long long m) { return chrono::minutes(m); }
+				EA_CONSTEXPR chrono::seconds operator"" s(unsigned long long s) { return chrono::seconds(s); }
+				EA_CONSTEXPR chrono::milliseconds operator"" ms(unsigned long long ms) { return chrono::milliseconds(ms); }
+				EA_CONSTEXPR chrono::microseconds operator"" us(unsigned long long us) { return chrono::microseconds(us); }
+				EA_CONSTEXPR chrono::nanoseconds operator"" ns(unsigned long long ns) { return chrono::nanoseconds(ns); }
+
+				///////////////////////////////////////////////////////////////////////////////
+				// float chrono literals
+				///////////////////////////////////////////////////////////////////////////////
+				EA_CONSTEXPR chrono::duration<long double, ratio<3600, 1>> operator"" h(long double h)
+					{ return chrono::duration<long double, ratio<3600, 1>>(h); }
+				EA_CONSTEXPR chrono::duration<long double, ratio<60, 1>> operator"" min(long double m)
+					{ return chrono::duration<long double, ratio<60, 1>>(m); }
+				EA_CONSTEXPR chrono::duration<long double> operator"" s(long double s)
+					{ return chrono::duration<long double>(s); }
+				EA_CONSTEXPR chrono::duration<float, milli> operator"" ms(long double ms)
+					{ return chrono::duration<long double, milli>(ms); }
+				EA_CONSTEXPR chrono::duration<float, micro> operator"" us(long double us)
+					{ return chrono::duration<long double, micro>(us); }
+				EA_CONSTEXPR chrono::duration<float, nano> operator"" ns(long double ns)
+					{ return chrono::duration<long double, nano>(ns); }
+
+			} // namespace chrono_literals
+		}// namespace literals
+		EA_RESTORE_VC_WARNING() // warning: 4455
+	#endif
+
+} // namespace eastl
+
+
+#if EASTL_USER_LITERALS_ENABLED && EASTL_INLINE_NAMESPACES_ENABLED
+namespace chrono
+{
+	using namespace eastl::literals::chrono_literals;
+} // namespace chrono
+#endif
+
+
+#endif 
diff --git a/libkram/eastl/include/EASTL/core_allocator.h b/libkram/eastl/include/EASTL/core_allocator.h
new file mode 100644
index 00000000..e4374912
--- /dev/null
+++ b/libkram/eastl/include/EASTL/core_allocator.h
@@ -0,0 +1,70 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_CORE_ALLOCATOR_H
+#define EASTL_CORE_ALLOCATOR_H
+
+#if EASTL_CORE_ALLOCATOR_ENABLED
+
+#include <coreallocator/icoreallocator.h>
+
+namespace EA
+{
+	namespace Allocator
+	{
+		/// EASTLCoreAllocatorImpl
+		///
+		/// EASTL provides an out of the box implementation of the
+		/// ICoreAllocator interface.  This is provided as a convenience for
+		/// users who wish to provide ICoreAllocator implementations for EASTL to use.  
+		///
+		/// EASTL has a dependency on coreallocator so to provide an out of 
+		/// the box implementation for EASTLCoreAlloctor and EASTLCoreDeleter 
+		/// that can be used and tested.  Historically we could not test 
+		/// ICoreAllocator interface because we relied on the code being linked 
+		/// in user code.
+		///
+
+		class EASTLCoreAllocatorImpl : public ICoreAllocator
+		{
+		public:
+			virtual void* Alloc(size_t size, const char* name, unsigned int flags)
+			{
+				return ::operator new[](size, name, flags, 0, __FILE__, __LINE__);
+			}
+
+			virtual void* Alloc(size_t size, const char* name, unsigned int flags, unsigned int alignment, unsigned int alignOffset = 0)
+			{
+				return ::operator new[](size, alignment, alignOffset, name, flags, 0, __FILE__, __LINE__);
+			}
+
+			virtual void Free(void* ptr, size_t size = 0)
+			{
+				::operator delete(static_cast<char*>(ptr));
+			}
+
+			virtual void* AllocDebug(size_t size, const DebugParams debugParams, unsigned int flags)
+			{
+				return Alloc(size, debugParams.mName, flags);
+			}
+
+			virtual void* AllocDebug(size_t size, const DebugParams debugParams, unsigned int flags, unsigned int align, unsigned int alignOffset = 0)
+			{
+				return Alloc(size, debugParams.mName, flags, align, alignOffset);
+			}
+
+			static EASTLCoreAllocatorImpl* GetDefaultAllocator();
+		};
+
+		inline EASTLCoreAllocatorImpl* EASTLCoreAllocatorImpl::GetDefaultAllocator()
+		{
+			static EASTLCoreAllocatorImpl allocator;
+			return &allocator;
+		}
+	}
+}
+
+#endif // EASTL_CORE_ALLOCATOR_ENABLED
+#endif // EASTL_CORE_ALLOCATOR_H
+
diff --git a/libkram/eastl/include/EASTL/core_allocator_adapter.h b/libkram/eastl/include/EASTL/core_allocator_adapter.h
new file mode 100644
index 00000000..d6f18275
--- /dev/null
+++ b/libkram/eastl/include/EASTL/core_allocator_adapter.h
@@ -0,0 +1,368 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// Implements an EASTL allocator that uses an ICoreAllocator.
+// However, this header file is not dependent on ICoreAllocator or its package.
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_CORE_ALLOCATOR_ADAPTER_H
+#define EASTL_CORE_ALLOCATOR_ADAPTER_H
+
+#if EASTL_CORE_ALLOCATOR_ENABLED
+
+
+#include <EASTL/internal/config.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+/// EASTL_CORE_ALLOCATOR_ADAPTER_GET_DEFAULT_CORE_ALLOCATOR
+///
+/// This allows the application to override the default name for the default global core allocator.
+/// However, you must be careful in your usage of this, as if this file is shared between uses then 
+/// you will need to be careful that your override of this doesn't conflict with others. 
+///
+#ifndef EASTL_CORE_ALLOCATOR_ADAPTER_GET_DEFAULT_CORE_ALLOCATOR
+	#define EASTL_CORE_ALLOCATOR_ADAPTER_GET_DEFAULT_CORE_ALLOCATOR AllocatorType::GetDefaultAllocator
+#endif
+
+
+
+namespace EA
+{
+	namespace Allocator
+	{
+		/// CoreAllocatorAdapter
+		///
+		/// Implements the EASTL allocator interface.
+		/// Allocates memory from an instance of ICoreAllocator or another class with an equivalent interface.
+		/// ICoreAllocator is a pure-virtual memory allocation interface used by a number of EA games and 
+		/// shared libraries. It's completely unrelated to EASTL, but it's prevalent enough that it's useful
+		/// for EASTL to have a built-in adapter for this interface. ICoreAllocator is declared in the 
+		/// CoreAllocator package icoreallocator_interface.h header, but CoreAllocatorAdapter can work with
+		/// any equivalent interface, as defined below.
+		///
+		/// Expected interface:
+		///     enum AllocFlags {
+		///         kFlagTempMemory = 0,
+		///         kFlagPermMemory = 1
+		///     };
+		///     
+		///     struct CoreAllocator {
+		///         void* Alloc(size_t size, const char* name, unsigned int allocFlags);
+		///         void* Alloc(size_t size, const char* name, unsigned int allocFlags,     // Not required unless you are working with types that require custom alignment.
+		///                      unsigned int align, unsigned int alignOffset = 0);
+		///         void Free(void* block, size_t size = 0);
+		///         static CoreAllocator* GetDefaultAllocator();
+		///     };
+		///
+		/// Example usage:
+		///     #include <coreallocator/icoreallocator_interface.h>
+		///     typedef EA::Allocator::CoreAllocatorAdapter<EASTLTestCoreAllocator> Adapter;
+		///     eastl::list<Widget, Adapter> widgetList(Adapter("UI/WidgetList", pSomeCoreAllocator));
+		///     widgetList.push_back(Widget());
+		///
+		/// Example usage:
+		///     #include <MyEquivalentCoreAllocatorInterface.h>
+		///     eastl::list<Widget, CoreAllocatorAdapter<MyCoreAllocatorInterface> > widgetList;
+		///     widgetList.push_back(Widget());
+		///
+		/// Example usage:
+		///     #include <coreallocator/icoreallocator_interface.h>
+		///     typedef EA::Allocator::CoreAllocatorAdapter<EASTLTestCoreAllocator> Adapter;
+		///     typedef eastl::list<Widget, Adapter> WidgetList;
+		///     CoreAllocatorFixed<WidgetList::node_type> widgetCoreAllocator(pFixedAllocatorForWidgetListValueType); // CoreAllocatorFixed is a hypothetical implementation of the ICoreAllocator interface.
+		///     WidgetList widgetList(Adapter("UI/WidgetList", &widgetCoreAllocator));                                // Note that the widgetCoreAllocator is declared before and thus destroyed after the widget list.
+		///
+		template<class AllocatorType>
+		class CoreAllocatorAdapter
+		{
+		public:
+			typedef CoreAllocatorAdapter<AllocatorType> this_type;
+
+		public:
+			// To do: Make this constructor explicit, when there is no known code dependent on it being otherwise.
+			CoreAllocatorAdapter(const char* pName = EASTL_NAME_VAL(EASTL_ALLOCATOR_DEFAULT_NAME), AllocatorType* pAllocator = EASTL_CORE_ALLOCATOR_ADAPTER_GET_DEFAULT_CORE_ALLOCATOR());
+			CoreAllocatorAdapter(const char* pName, AllocatorType* pAllocator, int flags);
+			CoreAllocatorAdapter(const CoreAllocatorAdapter& x);
+			CoreAllocatorAdapter(const CoreAllocatorAdapter& x, const char* pName);
+
+			CoreAllocatorAdapter& operator=(const CoreAllocatorAdapter& x);
+
+			void* allocate(size_t n, int flags = 0);
+			void* allocate(size_t n, size_t alignment, size_t offset, int flags = 0);
+			void  deallocate(void* p, size_t n);
+
+			AllocatorType* get_allocator() const;
+			void           set_allocator(AllocatorType* pAllocator);
+
+			int  get_flags() const;
+			void set_flags(int flags);
+
+			const char* get_name() const;
+			void        set_name(const char* pName);
+
+		public: // Public because otherwise VC++ generates (possibly invalid) warnings about inline friend template specializations.
+			AllocatorType* mpCoreAllocator;
+			int            mnFlags;    // Allocation flags. See ICoreAllocator/AllocFlags.
+
+			#if EASTL_NAME_ENABLED
+				const char* mpName; // Debug name, used to track memory.
+			#endif
+		};
+
+		template<class AllocatorType>
+		bool operator==(const CoreAllocatorAdapter<AllocatorType>& a, const CoreAllocatorAdapter<AllocatorType>& b);
+
+		template<class AllocatorType>
+		bool operator!=(const CoreAllocatorAdapter<AllocatorType>& a, const CoreAllocatorAdapter<AllocatorType>& b);
+
+
+
+		/// EASTLICoreAllocator
+		///
+		/// Provides a standardized typedef for ICoreAllocator;
+		/// 
+		/// Example usage:
+		///     eastl::list<Widget, EASTLICoreAllocator> widgetList("UI/WidgetList", pSomeCoreAllocator);
+		///     widgetList.push_back(Widget());
+		///
+		class ICoreAllocator;
+		class EASTLCoreAllocatorImpl;
+
+		typedef CoreAllocatorAdapter<ICoreAllocator> EASTLICoreAllocatorAdapter;
+		typedef CoreAllocatorAdapter<EASTLCoreAllocatorImpl> EASTLCoreAllocatorAdapter;
+		typedef EASTLICoreAllocatorAdapter EASTLICoreAllocator;  // for backwards compatibility
+
+
+
+		/// EASTLICoreDeleter
+		///
+		/// Implements a functor which can free memory from the specified
+		/// ICoreAllocator interface.  This is a convenience object provided for
+		/// users who wish to have EASTL containers deallocate memory obtained from
+		/// ICoreAllocator interfaces.
+		///
+		template <class AllocatorType>
+		class CoreDeleterAdapter
+		{
+		public:
+			typedef CoreDeleterAdapter<AllocatorType> this_type;
+			AllocatorType* mpCoreAllocator;
+
+		public:
+			CoreDeleterAdapter(AllocatorType* pAllocator = EASTL_CORE_ALLOCATOR_ADAPTER_GET_DEFAULT_CORE_ALLOCATOR()) EA_NOEXCEPT 
+			: mpCoreAllocator(pAllocator) {}
+
+			~CoreDeleterAdapter() EA_NOEXCEPT {}
+
+			template <typename T>
+			void operator()(T* p)
+			{
+				p->~T();
+				mpCoreAllocator->Free(p);
+			}
+
+			CoreDeleterAdapter(const CoreDeleterAdapter& in) { mpCoreAllocator = in.mpCoreAllocator; }
+
+			CoreDeleterAdapter(CoreDeleterAdapter&& in)
+			{
+				mpCoreAllocator = in.mpCoreAllocator;
+				in.mpCoreAllocator = nullptr;
+			}
+
+			CoreDeleterAdapter& operator=(const CoreDeleterAdapter& in)
+			{
+				mpCoreAllocator = in.mpCoreAllocator;
+				return *this;
+			}
+
+			CoreDeleterAdapter& operator=(CoreDeleterAdapter&& in)
+			{
+				mpCoreAllocator = in.mpCoreAllocator;
+				in.mpCoreAllocator = nullptr;
+				return *this;
+			}
+		};
+
+
+
+		/// EASTLICoreDeleter
+		///
+		/// Provides a standardized typedef for ICoreAllocator implementations.
+		///
+		/// Example usage: 
+		///     eastl::shared_ptr<A> foo(pA, EASTLCoreDeleter());
+		///
+		typedef CoreDeleterAdapter<ICoreAllocator> EASTLICoreDeleterAdapter;
+		typedef CoreDeleterAdapter<EASTLCoreAllocatorImpl> EASTLCoreDeleterAdapter;
+
+	} // namespace Allocator
+
+} // namespace EA
+
+
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// Inlines
+///////////////////////////////////////////////////////////////////////////////
+
+namespace EA
+{
+	namespace Allocator
+	{
+		template<class AllocatorType>
+		inline CoreAllocatorAdapter<AllocatorType>::CoreAllocatorAdapter(const char* EASTL_NAME(pName), AllocatorType* pCoreAllocator)
+			: mpCoreAllocator(pCoreAllocator), mnFlags(0)
+		{
+			#if EASTL_NAME_ENABLED
+				mpName = pName ? pName : EASTL_ALLOCATOR_DEFAULT_NAME;
+			#endif
+		}
+
+		template<class AllocatorType>
+		inline CoreAllocatorAdapter<AllocatorType>::CoreAllocatorAdapter(const char* EASTL_NAME(pName), AllocatorType* pCoreAllocator, int flags)
+			: mpCoreAllocator(pCoreAllocator), mnFlags(flags)
+		{
+			#if EASTL_NAME_ENABLED
+				mpName = pName ? pName : EASTL_ALLOCATOR_DEFAULT_NAME;
+			#endif
+		}
+
+		template<class AllocatorType>
+		inline CoreAllocatorAdapter<AllocatorType>::CoreAllocatorAdapter(const CoreAllocatorAdapter& x)
+			: mpCoreAllocator(x.mpCoreAllocator), mnFlags(x.mnFlags)
+		{
+			#if EASTL_NAME_ENABLED
+				mpName = x.mpName;
+			#endif
+		}
+
+		template<class AllocatorType>
+		inline CoreAllocatorAdapter<AllocatorType>::CoreAllocatorAdapter(const CoreAllocatorAdapter& x, const char* EASTL_NAME(pName))
+			: mpCoreAllocator(x.mpCoreAllocator), mnFlags(x.mnFlags)
+		{
+			#if EASTL_NAME_ENABLED
+				mpName = pName ? pName : EASTL_ALLOCATOR_DEFAULT_NAME;
+			#endif
+		}
+
+		template<class AllocatorType>
+		inline CoreAllocatorAdapter<AllocatorType>& CoreAllocatorAdapter<AllocatorType>::operator=(const CoreAllocatorAdapter& x)
+		{
+			mpCoreAllocator = x.mpCoreAllocator;
+			mnFlags         = x.mnFlags;
+
+			#if EASTL_NAME_ENABLED
+				mpName = x.mpName;
+			#endif
+				
+			return *this;
+		}
+
+		template<class AllocatorType>
+		inline void* CoreAllocatorAdapter<AllocatorType>::allocate(size_t n, int /*flags*/)
+		{
+			// It turns out that EASTL itself doesn't use the flags parameter, 
+			// whereas the user here might well want to specify a flags 
+			// parameter. So we use ours instead of the one passed in.
+			return mpCoreAllocator->Alloc(n, EASTL_NAME_VAL(mpName), (unsigned)mnFlags);
+		}
+
+		template<class AllocatorType>
+		inline void* CoreAllocatorAdapter<AllocatorType>::allocate(size_t n, size_t alignment, size_t offset, int /*flags*/)
+		{
+			// It turns out that EASTL itself doesn't use the flags parameter, 
+			// whereas the user here might well want to specify a flags 
+			// parameter. So we use ours instead of the one passed in.
+			return mpCoreAllocator->Alloc(n, EASTL_NAME_VAL(mpName), (unsigned)mnFlags, (unsigned)alignment, (unsigned)offset);
+		}
+
+		template<class AllocatorType>
+		inline void CoreAllocatorAdapter<AllocatorType>::deallocate(void* p, size_t n)
+		{
+			return mpCoreAllocator->Free(p, n);
+		}
+
+		template<class AllocatorType>
+		inline AllocatorType* CoreAllocatorAdapter<AllocatorType>::get_allocator() const
+		{
+			return mpCoreAllocator;
+		}
+
+		template<class AllocatorType>
+		inline void CoreAllocatorAdapter<AllocatorType>::set_allocator(AllocatorType* pAllocator)
+		{
+			mpCoreAllocator = pAllocator;
+		}
+
+		template<class AllocatorType>
+		inline int CoreAllocatorAdapter<AllocatorType>::get_flags() const
+		{
+			return mnFlags;
+		}
+
+		template<class AllocatorType>
+		inline void CoreAllocatorAdapter<AllocatorType>::set_flags(int flags)
+		{
+			mnFlags = flags;
+		}
+
+		template<class AllocatorType>
+		inline const char* CoreAllocatorAdapter<AllocatorType>::get_name() const
+		{
+			#if EASTL_NAME_ENABLED
+				return mpName;
+			#else
+				return EASTL_ALLOCATOR_DEFAULT_NAME;
+			#endif
+		}
+
+		template<class AllocatorType>
+		inline void CoreAllocatorAdapter<AllocatorType>::set_name(const char* pName)
+		{
+			#if EASTL_NAME_ENABLED
+				mpName = pName;
+			#else
+				(void)pName;
+			#endif
+		}
+
+
+
+		template<class AllocatorType>
+		inline bool operator==(const CoreAllocatorAdapter<AllocatorType>& a, const CoreAllocatorAdapter<AllocatorType>& b)
+		{
+			return (a.mpCoreAllocator == b.mpCoreAllocator) &&
+				   (a.mnFlags         == b.mnFlags);
+		}
+
+		template<class AllocatorType>
+		inline bool operator!=(const CoreAllocatorAdapter<AllocatorType>& a, const CoreAllocatorAdapter<AllocatorType>& b)
+		{
+			return (a.mpCoreAllocator != b.mpCoreAllocator) ||
+				   (a.mnFlags         != b.mnFlags);
+		}
+
+
+	} // namespace Allocator
+
+} // namespace EA
+
+
+#endif // EASTL_CORE_ALLOCATOR_ENABLED
+#endif // Header include guard
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/deque.h b/libkram/eastl/include/EASTL/deque.h
new file mode 100644
index 00000000..c2d55b1c
--- /dev/null
+++ b/libkram/eastl/include/EASTL/deque.h
@@ -0,0 +1,2687 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+//////////////////////////////////////////////////////////////////////////////
+// deque design
+//
+// A deque (pronounced "deck") is a double-ended queue, though this is partially 
+// of a misnomer. A deque does indeed let you add and remove values from both ends
+// of the container, but it's not usually used for such a thing and instead is used
+// as a more flexible version of a vector. It provides operator[] (random access) 
+// and can insert items anywhere and not just at the front and back.
+// 
+// While you can implement a double-ended queue via a doubly-linked list, deque is 
+// instead implemented as a list of arrays. The benefit of this is that memory usage 
+// is lower and that random access can be had with decent efficiency. 
+// 
+// Our implementation of deque is just like every other implementation of deque,
+// as the C++ standard all but dictates that you make it work this way. Below 
+// we have a depiction of an array (or vector) of 48 items, with each node being 
+// a '+' character and extra capacity being a '-' character. What we have is one 
+// contiguous block of memory:
+// 
+//     ++++++++++++++++++++++++++++++++++++++++++++++++-----------------
+//     0                                              47
+// 
+// With a deque, the same array of 48 items would be implemented as multiple smaller
+// arrays of contiguous memory, each of fixed size. We will call these "sub-arrays."
+// In the case here, we have six arrays of 8 nodes:
+// 
+//     ++++++++ ++++++++ ++++++++ ++++++++ ++++++++ ++++++++
+// 
+// With an vector, item [0] is the first item and item [47] is the last item. With a 
+// deque, item [0] is usually not the first item and neither is item [47]. There is 
+// extra capacity on both the front side and the back side of the deque. So a deque
+// (of 24 items) actually looks like this:
+// 
+//     -------- -----+++ ++++++++ ++++++++ +++++--- --------
+//                   0                         23
+// 
+// To insert items at the front, you move into the capacity on the left, and to insert
+// items at the back, you append items on the right. As you can see, inserting an item
+// at the front doesn't require allocating new memory nor does it require moving any 
+// items in the container. It merely involves moving the pointer to the [0] item to
+// the left by one node.
+// 
+// We keep track of these sub-arrays by having an array of pointers, with each array 
+// entry pointing to each of the sub-arrays. We could alternatively use a linked
+// list of pointers, but it turns out we can implement our deque::operator[] more 
+// efficiently if we use an array of pointers instead of a list of pointers.
+//
+// To implement deque::iterator, we could keep a struct which is essentially this:
+//     struct iterator {
+//        int subArrayIndex;
+//        int subArrayOffset;
+//     }
+//
+// In practice, we implement iterators a little differently, but in reality our 
+// implementation isn't much different from the above. It turns out that it's most
+// simple if we also manage the location of item [0] and item [end] by using these
+// same iterators.
+//
+// To consider: Implement the deque as a circular deque instead of a linear one.
+//              This would use a similar subarray layout but iterators would
+//              wrap around when they reached the end of the subarray pointer list.
+//
+//////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_DEQUE_H
+#define EASTL_DEQUE_H
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/allocator.h>
+#include <EASTL/algorithm.h>
+#include <EASTL/type_traits.h>
+#include <EASTL/iterator.h>
+#include <EASTL/memory.h>
+#include <EASTL/initializer_list.h>
+
+EA_DISABLE_ALL_VC_WARNINGS()
+#include <new>
+#include <stddef.h>
+EA_RESTORE_ALL_VC_WARNINGS()
+
+#if EASTL_EXCEPTIONS_ENABLED
+	EA_DISABLE_ALL_VC_WARNINGS()
+	#include <stdexcept> // std::out_of_range, std::length_error.
+	EA_RESTORE_ALL_VC_WARNINGS()
+#endif
+
+
+// 4267 - 'argument' : conversion from 'size_t' to 'const uint32_t', possible loss of data. This is a bogus warning resulting from a bug in VC++.
+// 4345 - Behavior change: an object of POD type constructed with an initializer of the form () will be default-initialized
+// 4480 - nonstandard extension used: specifying underlying type for enum
+// 4530 - C++ exception handler used, but unwind semantics are not enabled. Specify /EHsc
+// 4571 - catch(...) semantics changed since Visual C++ 7.1; structured exceptions (SEH) are no longer caught.
+EA_DISABLE_VC_WARNING(4267 4345 4480 4530 4571);
+
+#if EASTL_EXCEPTIONS_ENABLED
+	// 4703 - potentially uninitialized local pointer variable used. VC++ is mistakenly analyzing the possibility of uninitialized variables, though it's not easy for it to do so.
+	// 4701 - potentially uninitialized local variable used.
+	EA_DISABLE_VC_WARNING(4703 4701)
+#endif
+
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+namespace eastl
+{
+
+	/// EASTL_DEQUE_DEFAULT_NAME
+	///
+	/// Defines a default container name in the absence of a user-provided name.
+	///
+	#ifndef EASTL_DEQUE_DEFAULT_NAME
+		#define EASTL_DEQUE_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " deque" // Unless the user overrides something, this is "EASTL deque".
+	#endif
+
+
+	/// EASTL_DEQUE_DEFAULT_ALLOCATOR
+	///
+	#ifndef EASTL_DEQUE_DEFAULT_ALLOCATOR
+		#define EASTL_DEQUE_DEFAULT_ALLOCATOR allocator_type(EASTL_DEQUE_DEFAULT_NAME)
+	#endif
+
+
+	/// DEQUE_DEFAULT_SUBARRAY_SIZE
+	///
+	/// Defines the default number of items in a subarray.
+	/// Note that the user has the option of specifying the subarray size
+	/// in the deque template declaration.
+	///
+	#if !defined(__GNUC__) || (__GNUC__ >= 3) // GCC 2.x can't handle the declaration below.
+		#define DEQUE_DEFAULT_SUBARRAY_SIZE(T) ((sizeof(T) <= 4) ? 64 : ((sizeof(T) <= 8) ? 32 : ((sizeof(T) <= 16) ? 16 : ((sizeof(T) <= 32) ? 8 : 4))))
+	#else
+		#define DEQUE_DEFAULT_SUBARRAY_SIZE(T) 16
+	#endif
+
+
+
+	/// DequeIterator
+	///
+	/// The DequeIterator provides both const and non-const iterators for deque. 
+	/// It also is used for the tracking of the begin and end for the deque.
+	///
+	template <typename T, typename Pointer, typename Reference, unsigned kDequeSubarraySize>
+	struct DequeIterator
+	{
+		typedef DequeIterator<T, Pointer, Reference, kDequeSubarraySize>  this_type;
+		typedef DequeIterator<T, T*, T&, kDequeSubarraySize>              iterator;
+		typedef DequeIterator<T, const T*, const T&, kDequeSubarraySize>  const_iterator;
+		typedef ptrdiff_t                                                 difference_type;
+		typedef EASTL_ITC_NS::random_access_iterator_tag                  iterator_category;
+		typedef T                                                         value_type;
+		typedef T*                                                        pointer;
+		typedef T&                                                        reference;
+
+	public:
+		DequeIterator();
+		DequeIterator(const iterator& x);
+
+		pointer   operator->() const;
+		reference operator*() const;
+
+		this_type& operator++();
+		this_type  operator++(int);
+
+		this_type& operator--();
+		this_type  operator--(int);
+
+		this_type& operator+=(difference_type n);
+		this_type& operator-=(difference_type n);
+
+		this_type operator+(difference_type n) const;
+		this_type operator-(difference_type n) const;
+
+	protected:
+		template <typename, typename, typename, unsigned>
+		friend struct DequeIterator;
+
+		template <typename, typename, unsigned>
+		friend struct DequeBase;
+
+		template <typename, typename, unsigned>
+		friend class deque;
+
+		template <typename U, typename PointerA, typename ReferenceA, typename PointerB, typename ReferenceB, unsigned kDequeSubarraySizeU>
+		friend bool operator==(const DequeIterator<U, PointerA, ReferenceA, kDequeSubarraySizeU>&, 
+							   const DequeIterator<U, PointerB, ReferenceB, kDequeSubarraySizeU>&);
+
+		template <typename U, typename PointerA, typename ReferenceA, typename PointerB, typename ReferenceB, unsigned kDequeSubarraySizeU>
+		friend bool operator!=(const DequeIterator<U, PointerA, ReferenceA, kDequeSubarraySizeU>&, 
+							   const DequeIterator<U, PointerB, ReferenceB, kDequeSubarraySizeU>&);
+
+		template <typename U, typename PointerU, typename ReferenceU, unsigned kDequeSubarraySizeU>
+		friend bool operator!=(const DequeIterator<U, PointerU, ReferenceU, kDequeSubarraySizeU>& a, 
+							   const DequeIterator<U, PointerU, ReferenceU, kDequeSubarraySizeU>& b);
+
+		template <typename U, typename PointerA, typename ReferenceA, typename PointerB, typename ReferenceB, unsigned kDequeSubarraySizeU>
+		friend bool operator< (const DequeIterator<U, PointerA, ReferenceA, kDequeSubarraySizeU>&, 
+							   const DequeIterator<U, PointerB, ReferenceB, kDequeSubarraySizeU>&);
+
+		template <typename U, typename PointerA, typename ReferenceA, typename PointerB, typename ReferenceB, unsigned kDequeSubarraySizeU>
+		friend bool operator> (const DequeIterator<U, PointerA, ReferenceA, kDequeSubarraySizeU>&, 
+							   const DequeIterator<U, PointerB, ReferenceB, kDequeSubarraySizeU>&);
+
+		template <typename U, typename PointerA, typename ReferenceA, typename PointerB, typename ReferenceB, unsigned kDequeSubarraySizeU>
+		friend bool operator<=(const DequeIterator<U, PointerA, ReferenceA, kDequeSubarraySizeU>&, 
+							   const DequeIterator<U, PointerB, ReferenceB, kDequeSubarraySizeU>&);
+
+		template <typename U, typename PointerA, typename ReferenceA, typename PointerB, typename ReferenceB, unsigned kDequeSubarraySizeU>
+		friend bool operator>=(const DequeIterator<U, PointerA, ReferenceA, kDequeSubarraySizeU>&, 
+							   const DequeIterator<U, PointerB, ReferenceB, kDequeSubarraySizeU>&);
+
+		template <typename U, typename PointerA, typename ReferenceA, typename PointerB, typename ReferenceB, unsigned kDequeSubarraySizeU>
+		friend typename DequeIterator<U, PointerA, ReferenceA, kDequeSubarraySizeU>::difference_type
+		operator-(const DequeIterator<U, PointerA, ReferenceA, kDequeSubarraySizeU>& a,
+				  const DequeIterator<U, PointerB, ReferenceB, kDequeSubarraySizeU>& b);
+
+	protected:
+		T*  mpCurrent;          // Where we currently point. Declared first because it's used most often.
+		T*  mpBegin;            // The beginning of the current subarray.
+		T*  mpEnd;              // The end of the current subarray. To consider: remove this member, as it is always equal to 'mpBegin + kDequeSubarraySize'. Given that deque subarrays usually consist of hundreds of bytes, this isn't a massive win. Also, now that we are implementing a zero-allocation new deque policy, mpEnd may in fact not be equal to 'mpBegin + kDequeSubarraySize'.
+		T** mpCurrentArrayPtr;  // Pointer to current subarray. We could alternatively implement this as a list node iterator if the deque used a linked list.
+
+		struct Increment {};
+		struct Decrement {};
+		struct FromConst {};
+
+		DequeIterator(T** pCurrentArrayPtr, T* pCurrent);
+		DequeIterator(const const_iterator& x, FromConst) : mpCurrent(x.mpCurrent), mpBegin(x.mpBegin), mpEnd(x.mpEnd), mpCurrentArrayPtr(x.mpCurrentArrayPtr){}
+		DequeIterator(const iterator&       x, Increment);
+		DequeIterator(const iterator&       x, Decrement);
+
+		this_type copy(const iterator& first, const iterator& last, true_type);  // true means that value_type has the type_trait has_trivial_relocate,
+		this_type copy(const iterator& first, const iterator& last, false_type); // false means it does not. 
+
+		void copy_backward(const iterator& first, const iterator& last, true_type);  // true means that value_type has the type_trait has_trivial_relocate,
+		void copy_backward(const iterator& first, const iterator& last, false_type); // false means it does not.
+
+		void SetSubarray(T** pCurrentArrayPtr);
+	};
+
+
+
+
+	/// DequeBase
+	///
+	/// The DequeBase implements memory allocation for deque.
+	/// See VectorBase (class vector) for an explanation of why we 
+	/// create this separate base class.
+	///
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	struct DequeBase
+	{
+		typedef T                                                        value_type;
+		typedef Allocator                                                allocator_type;
+		typedef eastl_size_t                                             size_type;     // See config.h for the definition of eastl_size_t, which defaults to size_t.
+		typedef ptrdiff_t                                                difference_type;
+		typedef DequeIterator<T, T*, T&, kDequeSubarraySize>             iterator;
+		typedef DequeIterator<T, const T*, const T&, kDequeSubarraySize> const_iterator;
+
+		static const size_type npos     = (size_type)-1;      /// 'npos' means non-valid position or simply non-position.
+		static const size_type kMaxSize = (size_type)-2;      /// -1 is reserved for 'npos'. It also happens to be slightly beneficial that kMaxSize is a value less than -1, as it helps us deal with potential integer wraparound issues.
+
+		enum
+		{
+			kMinPtrArraySize = 8,                               /// A new empty deque has a ptrArraySize of 0, but any allocated ptrArrays use this min size.
+			kSubarraySize    = kDequeSubarraySize               /// 
+		  //kNodeSize        = kDequeSubarraySize * sizeof(T)   /// Disabled because it prevents the ability to do this: struct X{ eastl::deque<X, EASTLAllocatorType, 16> mDequeOfSelf; };
+		};
+
+		enum Side       /// Defines the side of the deque: front or back.
+		{
+			kSideFront, /// Identifies the front side of the deque.
+			kSideBack   /// Identifies the back side of the deque.
+		};
+
+	protected:
+		T**             mpPtrArray;         // Array of pointers to subarrays.
+		size_type       mnPtrArraySize;     // Possibly we should store this as T** mpArrayEnd.
+		iterator        mItBegin;           // Where within the subarrays is our beginning.
+		iterator        mItEnd;             // Where within the subarrays is our end.
+		allocator_type  mAllocator;         // To do: Use base class optimization to make this go away.
+
+	public:
+		DequeBase(const allocator_type& allocator);
+		DequeBase(size_type n);
+		DequeBase(size_type n, const allocator_type& allocator);
+	   ~DequeBase();
+
+		const allocator_type& get_allocator() const EA_NOEXCEPT;
+		allocator_type&       get_allocator() EA_NOEXCEPT;
+		void                  set_allocator(const allocator_type& allocator);
+
+	protected:
+		T*       DoAllocateSubarray();
+		void     DoFreeSubarray(T* p);
+		void     DoFreeSubarrays(T** pBegin, T** pEnd);
+
+		T**      DoAllocatePtrArray(size_type n);
+		void     DoFreePtrArray(T** p, size_t n);
+
+		iterator DoReallocSubarray(size_type nAdditionalCapacity, Side allocationSide);
+		void     DoReallocPtrArray(size_type nAdditionalCapacity, Side allocationSide);
+
+		void     DoInit(size_type n);
+
+	}; // DequeBase
+
+
+
+
+	/// deque
+	///
+	/// Implements a conventional C++ double-ended queue. The implementation used here
+	/// is very much like any other deque implementations you may have seen, as it 
+	/// follows the standard algorithm for deque design. 
+	///
+	/// Note:
+	/// As of this writing, deque does not support zero-allocation initial emptiness.
+	/// A newly created deque with zero elements will still allocate a subarray
+	/// pointer set. We are looking for efficient and clean ways to get around this,
+	/// but current efforts have resulted in less efficient and more fragile code.
+	/// The logic of this class doesn't lend itself to a clean implementation. 
+	/// It turns out that deques are one of the least likely classes you'd want this
+	/// behaviour in, so until this functionality becomes very important to somebody,
+	/// we will leave it as-is. It can probably be solved by adding some extra code to
+	/// the Do* functions and adding good comments explaining the situation.
+	/// 
+	template <typename T, typename Allocator = EASTLAllocatorType, unsigned kDequeSubarraySize = DEQUE_DEFAULT_SUBARRAY_SIZE(T)>
+	class deque : public DequeBase<T, Allocator, kDequeSubarraySize>
+	{
+	public:
+		typedef DequeBase<T, Allocator, kDequeSubarraySize>              base_type;
+		typedef deque<T, Allocator, kDequeSubarraySize>                  this_type;
+		typedef T                                                        value_type;
+		typedef T*                                                       pointer;
+		typedef const T*                                                 const_pointer;
+		typedef T&                                                       reference;
+		typedef const T&                                                 const_reference;
+		typedef DequeIterator<T, T*, T&, kDequeSubarraySize>             iterator;
+		typedef DequeIterator<T, const T*, const T&, kDequeSubarraySize> const_iterator;
+		typedef eastl::reverse_iterator<iterator>                        reverse_iterator;
+		typedef eastl::reverse_iterator<const_iterator>                  const_reverse_iterator;
+		typedef typename base_type::size_type                            size_type;
+		typedef typename base_type::difference_type                      difference_type;
+		typedef typename base_type::allocator_type                       allocator_type;
+
+		using base_type::kSideFront;
+		using base_type::kSideBack;
+		using base_type::mpPtrArray;
+		using base_type::mnPtrArraySize;
+		using base_type::mItBegin;
+		using base_type::mItEnd;
+		using base_type::mAllocator;
+		using base_type::npos;
+		using base_type::DoAllocateSubarray;
+		using base_type::DoFreeSubarray;
+		using base_type::DoFreeSubarrays;
+		using base_type::DoAllocatePtrArray;
+		using base_type::DoFreePtrArray;
+		using base_type::DoReallocSubarray;
+		using base_type::DoReallocPtrArray;
+
+	public:
+		deque();
+		explicit deque(const allocator_type& allocator);
+		explicit deque(size_type n, const allocator_type& allocator = EASTL_DEQUE_DEFAULT_ALLOCATOR);
+		deque(size_type n, const value_type& value, const allocator_type& allocator = EASTL_DEQUE_DEFAULT_ALLOCATOR);
+		deque(const this_type& x);
+		deque(this_type&& x);
+		deque(this_type&& x, const allocator_type& allocator);
+		deque(std::initializer_list<value_type> ilist, const allocator_type& allocator = EASTL_DEQUE_DEFAULT_ALLOCATOR);
+
+		template <typename InputIterator>
+		deque(InputIterator first, InputIterator last); // allocator arg removed because VC7.1 fails on the default arg. To do: Make a second version of this function without a default arg.
+
+	   ~deque();
+
+		this_type& operator=(const this_type& x);
+		this_type& operator=(std::initializer_list<value_type> ilist);
+		this_type& operator=(this_type&& x);
+
+		void swap(this_type& x);
+
+		void assign(size_type n, const value_type& value);
+		void assign(std::initializer_list<value_type> ilist);
+
+		template <typename InputIterator>                       // It turns out that the C++ std::deque<int, int> specifies a two argument
+		void assign(InputIterator first, InputIterator last);   // version of assign that takes (int size, int value). These are not 
+																// iterators, so we need to do a template compiler trick to do the right thing.
+
+		iterator       begin() EA_NOEXCEPT;
+		const_iterator begin() const EA_NOEXCEPT;
+		const_iterator cbegin() const EA_NOEXCEPT;
+
+		iterator       end() EA_NOEXCEPT;
+		const_iterator end() const EA_NOEXCEPT;
+		const_iterator cend() const EA_NOEXCEPT;
+
+		reverse_iterator       rbegin() EA_NOEXCEPT;
+		const_reverse_iterator rbegin() const EA_NOEXCEPT;
+		const_reverse_iterator crbegin() const EA_NOEXCEPT;
+
+		reverse_iterator       rend() EA_NOEXCEPT;
+		const_reverse_iterator rend() const EA_NOEXCEPT;
+		const_reverse_iterator crend() const EA_NOEXCEPT;
+
+		bool      empty() const EA_NOEXCEPT; 
+		size_type size() const EA_NOEXCEPT;
+
+		void resize(size_type n, const value_type& value);
+		void resize(size_type n);
+
+		void shrink_to_fit();
+		void set_capacity(size_type n = base_type::npos);
+
+		reference       operator[](size_type n);
+		const_reference operator[](size_type n) const;
+
+		reference       at(size_type n);
+		const_reference at(size_type n) const;
+
+		reference       front();
+		const_reference front() const;
+
+		reference       back();
+		const_reference back() const;
+
+		void      push_front(const value_type& value);
+		reference push_front();
+		void      push_front(value_type&& value);
+
+		void      push_back(const value_type& value);
+		reference push_back();
+		void      push_back(value_type&& value);
+
+		void pop_front();
+		void pop_back();
+
+		template<class... Args>
+		iterator emplace(const_iterator position, Args&&... args);
+
+		template<class... Args>
+		void emplace_front(Args&&... args);
+
+		template<class... Args>
+		void emplace_back(Args&&... args);
+
+		iterator insert(const_iterator position, const value_type& value);
+		iterator insert(const_iterator position, value_type&& value);
+		void     insert(const_iterator position, size_type n, const value_type& value);
+		iterator insert(const_iterator position, std::initializer_list<value_type> ilist);
+
+		template <typename InputIterator>
+		void insert(const_iterator position, InputIterator first, InputIterator last);
+
+		iterator         erase(const_iterator position);
+		iterator         erase(const_iterator first, const_iterator last);
+		reverse_iterator erase(reverse_iterator position);
+		reverse_iterator erase(reverse_iterator first, reverse_iterator last);
+
+		void clear();
+		//void reset_lose_memory(); // Disabled until it can be implemented efficiently and cleanly.  // This is a unilateral reset to an initially empty state. No destructors are called, no deallocation occurs.
+
+		bool validate() const;
+		int  validate_iterator(const_iterator i) const;
+
+	protected:
+		template <typename Integer>
+		void DoInit(Integer n, Integer value, true_type);
+
+		template <typename InputIterator>
+		void DoInit(InputIterator first, InputIterator last, false_type);
+
+		template <typename InputIterator>
+		void DoInitFromIterator(InputIterator first, InputIterator last, EASTL_ITC_NS::input_iterator_tag);
+
+		template <typename ForwardIterator>
+		void DoInitFromIterator(ForwardIterator first, ForwardIterator last, EASTL_ITC_NS::forward_iterator_tag);
+
+		void DoFillInit(const value_type& value);
+
+		template <typename Integer>
+		void DoAssign(Integer n, Integer value, true_type);
+
+		template <typename InputIterator>
+		void DoAssign(InputIterator first, InputIterator last, false_type);
+
+		void DoAssignValues(size_type n, const value_type& value);
+
+		template <typename Integer>
+		void DoInsert(const const_iterator& position, Integer n, Integer value, true_type);
+
+		template <typename InputIterator>
+		void DoInsert(const const_iterator& position, const InputIterator& first, const InputIterator& last, false_type);
+
+		template <typename InputIterator>
+		void DoInsertFromIterator(const_iterator position, const InputIterator& first, const InputIterator& last, EASTL_ITC_NS::forward_iterator_tag);
+
+		void DoInsertValues(const_iterator position, size_type n, const value_type& value);
+
+		void DoSwap(this_type& x);
+	}; // class deque
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// DequeBase
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	DequeBase<T, Allocator, kDequeSubarraySize>::DequeBase(const allocator_type& allocator)
+		: mpPtrArray(NULL),
+		  mnPtrArraySize(0),
+		  mItBegin(),
+		  mItEnd(),
+		  mAllocator(allocator)
+	{
+		// It is assumed here that the deque subclass will init us when/as needed.
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	DequeBase<T, Allocator, kDequeSubarraySize>::DequeBase(size_type n)
+		: mpPtrArray(NULL),
+		  mnPtrArraySize(0),
+		  mItBegin(),
+		  mItEnd(),
+		  mAllocator(EASTL_DEQUE_DEFAULT_NAME)
+	{
+		// It's important to note that DoInit creates space for elements and assigns 
+		// mItBegin/mItEnd to point to them, but these elements are not constructed. 
+		// You need to immediately follow this constructor with code that constructs the values.
+		DoInit(n); 
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	DequeBase<T, Allocator, kDequeSubarraySize>::DequeBase(size_type n, const allocator_type& allocator)
+		: mpPtrArray(NULL),
+		  mnPtrArraySize(0),
+		  mItBegin(),
+		  mItEnd(),
+		  mAllocator(allocator)
+	{
+		// It's important to note that DoInit creates space for elements and assigns 
+		// mItBegin/mItEnd to point to them, but these elements are not constructed. 
+		// You need to immediately follow this constructor with code that constructs the values.
+		DoInit(n); 
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	DequeBase<T, Allocator, kDequeSubarraySize>::~DequeBase()
+	{
+		if(mpPtrArray)
+		{
+			DoFreeSubarrays(mItBegin.mpCurrentArrayPtr, mItEnd.mpCurrentArrayPtr + 1);
+			DoFreePtrArray(mpPtrArray, mnPtrArraySize);
+			mpPtrArray = nullptr;
+		}
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	const typename DequeBase<T, Allocator, kDequeSubarraySize>::allocator_type&
+	DequeBase<T, Allocator, kDequeSubarraySize>::get_allocator() const EA_NOEXCEPT
+	{
+		return mAllocator;
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	typename DequeBase<T, Allocator, kDequeSubarraySize>::allocator_type&
+	DequeBase<T, Allocator, kDequeSubarraySize>::get_allocator() EA_NOEXCEPT
+	{
+		return mAllocator;
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	void DequeBase<T, Allocator, kDequeSubarraySize>::set_allocator(const allocator_type& allocator)
+	{
+		// The only time you can set an allocator is with an empty unused container, such as right after construction.
+		if(EASTL_LIKELY(mAllocator != allocator))
+		{
+			if(EASTL_LIKELY(mpPtrArray && (mItBegin.mpCurrentArrayPtr == mItEnd.mpCurrentArrayPtr))) // If we are empty and so can safely deallocate the existing memory... We could also test for empty(), but that's a more expensive calculation and more involved clearing, though it would be more flexible.
+			{
+				DoFreeSubarrays(mItBegin.mpCurrentArrayPtr, mItEnd.mpCurrentArrayPtr + 1);
+				DoFreePtrArray(mpPtrArray, mnPtrArraySize);
+
+				mAllocator = allocator;
+				DoInit(0);
+			}
+			else
+			{
+				EASTL_FAIL_MSG("DequeBase::set_allocator -- atempt to change allocator after allocating elements.");
+			}
+		}
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	T* DequeBase<T, Allocator, kDequeSubarraySize>::DoAllocateSubarray()
+	{
+		T* p = (T*)allocate_memory(mAllocator, kDequeSubarraySize * sizeof(T), EASTL_ALIGN_OF(T), 0);
+		EASTL_ASSERT_MSG(p != nullptr, "the behaviour of eastl::allocators that return nullptr is not defined.");
+
+		#if EASTL_DEBUG
+			memset((void*)p, 0, kDequeSubarraySize * sizeof(T));
+		#endif
+
+		return (T*)p;
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	void DequeBase<T, Allocator, kDequeSubarraySize>::DoFreeSubarray(T* p)
+	{
+		if(p)
+			EASTLFree(mAllocator, p, kDequeSubarraySize * sizeof(T)); 
+	}
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	void DequeBase<T, Allocator, kDequeSubarraySize>::DoFreeSubarrays(T** pBegin, T** pEnd)
+	{
+		while(pBegin < pEnd)
+			DoFreeSubarray(*pBegin++);
+	}
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	T** DequeBase<T, Allocator, kDequeSubarraySize>::DoAllocatePtrArray(size_type n)
+	{
+		#if EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY(n >= 0x80000000))
+				EASTL_FAIL_MSG("deque::DoAllocatePtrArray -- improbably large request.");
+		#endif
+
+		T** pp = (T**)allocate_memory(mAllocator, n * sizeof(T*), EASTL_ALIGN_OF(T), 0);
+		EASTL_ASSERT_MSG(pp != nullptr, "the behaviour of eastl::allocators that return nullptr is not defined.");
+
+		#if EASTL_DEBUG
+			memset((void*)pp, 0, n * sizeof(T*));
+		#endif
+
+		return pp;
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	void DequeBase<T, Allocator, kDequeSubarraySize>::DoFreePtrArray(T** pp, size_t n)
+	{
+		if(pp)
+			EASTLFree(mAllocator, pp, n * sizeof(T*)); 
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	typename DequeBase<T, Allocator, kDequeSubarraySize>::iterator
+	DequeBase<T, Allocator, kDequeSubarraySize>::DoReallocSubarray(size_type nAdditionalCapacity, Side allocationSide)
+	{
+		// nAdditionalCapacity refers to the amount of additional space we need to be 
+		// able to store in this deque. Typically this function is called as part of 
+		// an insert or append operation. This is the function that makes sure there
+		// is enough capacity for the new elements to be copied into the deque.
+		// The new capacity here is always at the front or back of the deque.
+		// This function returns an iterator to that points to the new begin or
+		// the new end of the deque space, depending on allocationSide.
+
+		if(allocationSide == kSideFront)
+		{
+			// There might be some free space (nCurrentAdditionalCapacity) at the front of the existing subarray.
+			const size_type nCurrentAdditionalCapacity = (size_type)(mItBegin.mpCurrent - mItBegin.mpBegin);
+
+			if(EASTL_UNLIKELY(nCurrentAdditionalCapacity < nAdditionalCapacity)) // If we need to grow downward into a new subarray...
+			{
+				const difference_type nSubarrayIncrease = (difference_type)(((nAdditionalCapacity - nCurrentAdditionalCapacity) + kDequeSubarraySize - 1) / kDequeSubarraySize);
+				difference_type i;
+
+				if(nSubarrayIncrease > (mItBegin.mpCurrentArrayPtr - mpPtrArray)) // If there are not enough pointers in front of the current (first) one...
+					DoReallocPtrArray((size_type)(nSubarrayIncrease - (mItBegin.mpCurrentArrayPtr - mpPtrArray)), kSideFront);
+
+				#if EASTL_EXCEPTIONS_ENABLED
+					try
+					{
+				#endif
+						for(i = 1; i <= nSubarrayIncrease; ++i)
+							mItBegin.mpCurrentArrayPtr[-i] = DoAllocateSubarray();
+				#if EASTL_EXCEPTIONS_ENABLED
+					}
+					catch(...)
+					{
+						for(difference_type j = 1; j < i; ++j)
+							DoFreeSubarray(mItBegin.mpCurrentArrayPtr[-j]);
+						throw;
+					}
+				#endif
+			}
+
+			return mItBegin - (difference_type)nAdditionalCapacity;
+		}
+		else // else kSideBack
+		{
+			const size_type nCurrentAdditionalCapacity = (size_type)((mItEnd.mpEnd - 1) - mItEnd.mpCurrent);
+
+			if(EASTL_UNLIKELY(nCurrentAdditionalCapacity < nAdditionalCapacity)) // If we need to grow forward into a new subarray...
+			{
+				const difference_type nSubarrayIncrease = (difference_type)(((nAdditionalCapacity - nCurrentAdditionalCapacity) + kDequeSubarraySize - 1) / kDequeSubarraySize);
+				difference_type i;
+
+				if(nSubarrayIncrease > ((mpPtrArray + mnPtrArraySize) - mItEnd.mpCurrentArrayPtr) - 1)  // If there are not enough pointers after the current (last) one...
+					DoReallocPtrArray((size_type)(nSubarrayIncrease - (((mpPtrArray + mnPtrArraySize) - mItEnd.mpCurrentArrayPtr) - 1)), kSideBack);
+
+				#if EASTL_EXCEPTIONS_ENABLED
+					try
+					{
+				#endif
+						for(i = 1; i <= nSubarrayIncrease; ++i)
+							mItEnd.mpCurrentArrayPtr[i] = DoAllocateSubarray();
+				#if EASTL_EXCEPTIONS_ENABLED
+					}
+					catch(...)
+					{
+						for(difference_type j = 1; j < i; ++j)
+							DoFreeSubarray(mItEnd.mpCurrentArrayPtr[j]);
+						throw;
+					}
+				#endif
+			}
+
+			return mItEnd + (difference_type)nAdditionalCapacity;
+		}
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	void DequeBase<T, Allocator, kDequeSubarraySize>::DoReallocPtrArray(size_type nAdditionalCapacity, Side allocationSide)
+	{
+		// This function is not called unless the capacity is known to require a resize.
+		//
+		// We have an array of pointers (mpPtrArray), of which a segment of them are in use and 
+		// at either end of the array are zero or more unused pointers. This function is being
+		// called because we need to extend the capacity on either side of this array by 
+		// nAdditionalCapacity pointers. However, it's possible that if the user is continually
+		// using push_back and pop_front then the pointer array will continue to be extended 
+		// on the back side and unused on the front side. So while we are doing this resizing 
+		// here we also take the opportunity to recenter the pointers and thus be balanced.
+		// It man turn out that we don't even need to reallocate the pointer array in order
+		// to increase capacity on one side, as simply moving the pointers to the center may
+		// be enough to open up the requires space.
+		//
+		// Balanced pointer array     Unbalanced pointer array (unused space at front, no free space at back)
+		// ----++++++++++++----        ---------+++++++++++
+
+		const size_type nUnusedPtrCountAtFront = (size_type)(mItBegin.mpCurrentArrayPtr - mpPtrArray);
+		const size_type nUsedPtrCount          = (size_type)(mItEnd.mpCurrentArrayPtr - mItBegin.mpCurrentArrayPtr) + 1;
+		const size_type nUsedPtrSpace          = nUsedPtrCount * sizeof(void*);
+		const size_type nUnusedPtrCountAtBack  = (mnPtrArraySize - nUnusedPtrCountAtFront) - nUsedPtrCount;
+		value_type**    pPtrArrayBegin;
+
+		if((allocationSide == kSideBack) && (nAdditionalCapacity <= nUnusedPtrCountAtFront))  // If we can take advantage of unused pointers at the front without doing any reallocation...
+		{
+			if(nAdditionalCapacity < (nUnusedPtrCountAtFront / 2))  // Possibly use more space than required, if there's a lot of extra space.
+				nAdditionalCapacity = (nUnusedPtrCountAtFront / 2);
+
+			pPtrArrayBegin = mpPtrArray + (nUnusedPtrCountAtFront - nAdditionalCapacity);
+			memmove(pPtrArrayBegin, mItBegin.mpCurrentArrayPtr, nUsedPtrSpace);
+
+			#if EASTL_DEBUG
+				memset(pPtrArrayBegin + nUsedPtrCount, 0, (size_t)(mpPtrArray + mnPtrArraySize) - (size_t)(pPtrArrayBegin + nUsedPtrCount));
+			#endif
+		}
+		else if((allocationSide == kSideFront) && (nAdditionalCapacity <= nUnusedPtrCountAtBack)) // If we can take advantage of unused pointers at the back without doing any reallocation...
+		{
+			if(nAdditionalCapacity < (nUnusedPtrCountAtBack / 2))  // Possibly use more space than required, if there's a lot of extra space.
+				nAdditionalCapacity = (nUnusedPtrCountAtBack / 2);
+
+			pPtrArrayBegin = mItBegin.mpCurrentArrayPtr + nAdditionalCapacity;
+			memmove(pPtrArrayBegin, mItBegin.mpCurrentArrayPtr, nUsedPtrSpace);
+
+			#if EASTL_DEBUG
+				memset(mpPtrArray, 0, (size_t)((uintptr_t)pPtrArrayBegin - (uintptr_t)mpPtrArray));
+			#endif
+		}
+		else
+		{
+			// In this case we will have to do a reallocation.
+			const size_type    nNewPtrArraySize = mnPtrArraySize + eastl::max_alt(mnPtrArraySize, nAdditionalCapacity) + 2;  // Allocate extra capacity.
+			value_type** const pNewPtrArray     = DoAllocatePtrArray(nNewPtrArraySize);
+
+			pPtrArrayBegin = pNewPtrArray + (mItBegin.mpCurrentArrayPtr - mpPtrArray) + ((allocationSide == kSideFront) ? nAdditionalCapacity : 0);
+
+			// The following is equivalent to: eastl::copy(mItBegin.mpCurrentArrayPtr, mItEnd.mpCurrentArrayPtr + 1, pPtrArrayBegin);
+			// It's OK to use memcpy instead of memmove because the destination is guaranteed to non-overlap the source.
+			if(mpPtrArray) // Could also say: 'if(mItBegin.mpCurrentArrayPtr)' 
+				memcpy(pPtrArrayBegin, mItBegin.mpCurrentArrayPtr, nUsedPtrSpace);
+
+			DoFreePtrArray(mpPtrArray, mnPtrArraySize);
+
+			mpPtrArray     = pNewPtrArray;
+			mnPtrArraySize = nNewPtrArraySize;
+		}
+
+		// We need to reset the begin and end iterators, as code that calls this expects them to *not* be invalidated.
+		mItBegin.SetSubarray(pPtrArrayBegin);
+		mItEnd.SetSubarray((pPtrArrayBegin + nUsedPtrCount) - 1);
+	}
+	
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	void DequeBase<T, Allocator, kDequeSubarraySize>::DoInit(size_type n)
+	{
+		// This code is disabled because it doesn't currently work properly.
+		// We are trying to make it so that a deque can have a zero allocation 
+		// initial empty state, but we (OK, I) am having a hard time making
+		// this elegant and efficient. 
+		//if(n)
+		//{
+			const size_type nNewPtrArraySize = (size_type)((n / kDequeSubarraySize) + 1); // Always have at least one, even if n is zero.
+			const size_type kMinPtrArraySize_ = kMinPtrArraySize;
+
+			mnPtrArraySize = eastl::max_alt(kMinPtrArraySize_, (nNewPtrArraySize + 2)); 
+			mpPtrArray     = DoAllocatePtrArray(mnPtrArraySize);
+
+			value_type** const pPtrArrayBegin   = (mpPtrArray + ((mnPtrArraySize - nNewPtrArraySize) / 2)); // Try to place it in the middle.
+			value_type** const pPtrArrayEnd     = pPtrArrayBegin + nNewPtrArraySize;
+			value_type**       pPtrArrayCurrent = pPtrArrayBegin;
+
+			#if EASTL_EXCEPTIONS_ENABLED
+				try
+				{
+					try
+					{
+			#endif
+						while(pPtrArrayCurrent < pPtrArrayEnd)
+							*pPtrArrayCurrent++ = DoAllocateSubarray();
+			#if EASTL_EXCEPTIONS_ENABLED
+					}
+					catch(...)
+					{
+						DoFreeSubarrays(pPtrArrayBegin, pPtrArrayCurrent);
+						throw;
+					}
+				}
+				catch(...)
+				{
+					DoFreePtrArray(mpPtrArray, mnPtrArraySize);
+					mpPtrArray     = NULL;
+					mnPtrArraySize = 0;
+					throw;
+				}
+			#endif
+
+			mItBegin.SetSubarray(pPtrArrayBegin);
+			mItBegin.mpCurrent = mItBegin.mpBegin;
+
+			mItEnd.SetSubarray(pPtrArrayEnd - 1);
+			mItEnd.mpCurrent = mItEnd.mpBegin + (difference_type)(n % kDequeSubarraySize);
+		//}
+		//else // Else we do a zero-allocation initialization.
+		//{
+		//    mpPtrArray     = NULL;
+		//    mnPtrArraySize = 0;
+		//
+		//    mItBegin.mpCurrentArrayPtr = NULL;
+		//    mItBegin.mpBegin           = NULL;
+		//    mItBegin.mpEnd             = NULL; // We intentionally create a situation whereby the subarray that has no capacity.
+		//    mItBegin.mpCurrent         = NULL;
+		//
+		//    mItEnd = mItBegin;
+		//}
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// DequeIterator
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T, typename Pointer, typename Reference, unsigned kDequeSubarraySize>
+	DequeIterator<T, Pointer, Reference, kDequeSubarraySize>::DequeIterator()
+		: mpCurrent(NULL), mpBegin(NULL), mpEnd(NULL), mpCurrentArrayPtr(NULL)
+	{
+		// Empty
+	}
+
+
+	template <typename T, typename Pointer, typename Reference, unsigned kDequeSubarraySize>
+	DequeIterator<T, Pointer, Reference, kDequeSubarraySize>::DequeIterator(T** pCurrentArrayPtr, T* pCurrent)
+		: mpCurrent(pCurrent), mpBegin(*pCurrentArrayPtr), mpEnd(pCurrent + kDequeSubarraySize), mpCurrentArrayPtr(pCurrentArrayPtr)
+	{
+		// Empty
+	}
+
+
+	template <typename T, typename Pointer, typename Reference, unsigned kDequeSubarraySize>
+	DequeIterator<T, Pointer, Reference, kDequeSubarraySize>::DequeIterator(const iterator& x)
+		: mpCurrent(x.mpCurrent), mpBegin(x.mpBegin), mpEnd(x.mpEnd), mpCurrentArrayPtr(x.mpCurrentArrayPtr)
+	{
+		// Empty
+	}
+
+
+	template <typename T, typename Pointer, typename Reference, unsigned kDequeSubarraySize>
+	DequeIterator<T, Pointer, Reference, kDequeSubarraySize>::DequeIterator(const iterator& x, Increment)
+		: mpCurrent(x.mpCurrent), mpBegin(x.mpBegin), mpEnd(x.mpEnd), mpCurrentArrayPtr(x.mpCurrentArrayPtr)
+	{
+		operator++();
+	}
+
+
+	template <typename T, typename Pointer, typename Reference, unsigned kDequeSubarraySize>
+	DequeIterator<T, Pointer, Reference, kDequeSubarraySize>::DequeIterator(const iterator& x, Decrement)
+		: mpCurrent(x.mpCurrent), mpBegin(x.mpBegin), mpEnd(x.mpEnd), mpCurrentArrayPtr(x.mpCurrentArrayPtr)
+	{
+		operator--();
+	}
+
+
+	template <typename T, typename Pointer, typename Reference, unsigned kDequeSubarraySize>
+	typename DequeIterator<T, Pointer, Reference, kDequeSubarraySize>::pointer
+	DequeIterator<T, Pointer, Reference, kDequeSubarraySize>::operator->() const
+	{
+		return mpCurrent;
+	}
+
+
+	template <typename T, typename Pointer, typename Reference, unsigned kDequeSubarraySize>
+	typename DequeIterator<T, Pointer, Reference, kDequeSubarraySize>::reference
+	DequeIterator<T, Pointer, Reference, kDequeSubarraySize>::operator*() const
+	{
+		return *mpCurrent;
+	}
+
+
+	template <typename T, typename Pointer, typename Reference, unsigned kDequeSubarraySize>
+	typename DequeIterator<T, Pointer, Reference, kDequeSubarraySize>::this_type&
+	DequeIterator<T, Pointer, Reference, kDequeSubarraySize>::operator++()
+	{
+		if(EASTL_UNLIKELY(++mpCurrent == mpEnd))
+		{
+			mpBegin   = *++mpCurrentArrayPtr;
+			mpEnd     = mpBegin + kDequeSubarraySize;
+			mpCurrent = mpBegin;
+		}
+		return *this;
+	}
+
+
+	template <typename T, typename Pointer, typename Reference, unsigned kDequeSubarraySize>
+	typename DequeIterator<T, Pointer, Reference, kDequeSubarraySize>::this_type
+	DequeIterator<T, Pointer, Reference, kDequeSubarraySize>::operator++(int)
+	{
+		const this_type temp(*this);
+		operator++();
+		return temp;
+	}
+
+
+	template <typename T, typename Pointer, typename Reference, unsigned kDequeSubarraySize>
+	typename DequeIterator<T, Pointer, Reference, kDequeSubarraySize>::this_type&
+	DequeIterator<T, Pointer, Reference, kDequeSubarraySize>::operator--()
+	{
+		if(EASTL_UNLIKELY(mpCurrent == mpBegin))
+		{
+			mpBegin   = *--mpCurrentArrayPtr;
+			mpEnd     = mpBegin + kDequeSubarraySize;
+			mpCurrent = mpEnd; // fall through...
+		}
+		--mpCurrent;
+		return *this;
+	}
+
+
+	template <typename T, typename Pointer, typename Reference, unsigned kDequeSubarraySize>
+	typename DequeIterator<T, Pointer, Reference, kDequeSubarraySize>::this_type
+	DequeIterator<T, Pointer, Reference, kDequeSubarraySize>::operator--(int)
+	{
+		const this_type temp(*this);
+		operator--();
+		return temp;
+	}
+
+
+	template <typename T, typename Pointer, typename Reference, unsigned kDequeSubarraySize>
+	typename DequeIterator<T, Pointer, Reference, kDequeSubarraySize>::this_type&
+	DequeIterator<T, Pointer, Reference, kDequeSubarraySize>::operator+=(difference_type n)
+	{
+		const difference_type subarrayPosition = (mpCurrent - mpBegin) + n;
+
+		// Cast from signed to unsigned (size_t) in order to obviate the need to compare to < 0. 
+		if((size_t)subarrayPosition < (size_t)kDequeSubarraySize) // If the new position is within the current subarray (i.e. >= 0 && < kSubArraySize)...
+			mpCurrent += n;
+		else
+		{
+			// This implementation is a branchless version which works by offsetting 
+			// the math to always be in the positive range. Much of the values here
+			// reduce to constants and both the multiplication and division are of 
+			// power of two sizes and so this calculation ends up compiling down to 
+			// just one addition, one shift and one subtraction. This algorithm has 
+			// a theoretical weakness in that on 32 bit systems it will fail if the 
+			// value of n is >= (2^32 - 2^24) or 4,278,190,080 of if kDequeSubarraySize
+			// is >= 2^24 or 16,777,216.
+			EASTL_CT_ASSERT((kDequeSubarraySize & (kDequeSubarraySize - 1)) == 0); // Verify that it is a power of 2.
+			const difference_type subarrayIndex = (((16777216 + subarrayPosition) / (difference_type)kDequeSubarraySize)) - (16777216 / (difference_type)kDequeSubarraySize);
+
+			SetSubarray(mpCurrentArrayPtr + subarrayIndex);
+			mpCurrent = mpBegin + (subarrayPosition - (subarrayIndex * (difference_type)kDequeSubarraySize));
+		}
+		return *this;
+	}
+
+
+	template <typename T, typename Pointer, typename Reference, unsigned kDequeSubarraySize>
+	typename DequeIterator<T, Pointer, Reference, kDequeSubarraySize>::this_type&
+	DequeIterator<T, Pointer, Reference, kDequeSubarraySize>::operator-=(difference_type n)
+	{
+		return (*this).operator+=(-n);
+	}
+
+
+	template <typename T, typename Pointer, typename Reference, unsigned kDequeSubarraySize>
+	typename DequeIterator<T, Pointer, Reference, kDequeSubarraySize>::this_type
+	DequeIterator<T, Pointer, Reference, kDequeSubarraySize>::operator+(difference_type n) const
+	{
+		return this_type(*this).operator+=(n);
+	}
+
+
+	template <typename T, typename Pointer, typename Reference, unsigned kDequeSubarraySize>
+	typename DequeIterator<T, Pointer, Reference, kDequeSubarraySize>::this_type
+	DequeIterator<T, Pointer, Reference, kDequeSubarraySize>::operator-(difference_type n) const
+	{
+		return this_type(*this).operator+=(-n);
+	}
+
+
+	template <typename T, typename Pointer, typename Reference, unsigned kDequeSubarraySize>
+	typename DequeIterator<T, Pointer, Reference, kDequeSubarraySize>::this_type
+	DequeIterator<T, Pointer, Reference, kDequeSubarraySize>::copy(const iterator& first, const iterator& last, true_type)
+	{
+		// To do: Implement this as a loop which does memcpys between subarrays appropriately.
+		//        Currently we only do memcpy if the entire operation occurs within a single subarray.
+		if((first.mpBegin == last.mpBegin) && (first.mpBegin == mpBegin)) // If all operations are within the same subarray, implement the operation as a memmove.
+		{
+			memmove(mpCurrent, first.mpCurrent, (size_t)((uintptr_t)last.mpCurrent - (uintptr_t)first.mpCurrent));
+			return *this + (last.mpCurrent - first.mpCurrent);
+		}
+		return eastl::copy(eastl::make_move_iterator(first), eastl::make_move_iterator(last), eastl::make_move_iterator(*this)).base();
+	}
+
+
+	template <typename T, typename Pointer, typename Reference, unsigned kDequeSubarraySize>
+	typename DequeIterator<T, Pointer, Reference, kDequeSubarraySize>::this_type
+	DequeIterator<T, Pointer, Reference, kDequeSubarraySize>::copy(const iterator& first, const iterator& last, false_type)
+	{
+		return eastl::copy(eastl::make_move_iterator(first), eastl::make_move_iterator(last), eastl::make_move_iterator(*this)).base();
+	}
+
+
+	template <typename T, typename Pointer, typename Reference, unsigned kDequeSubarraySize>
+	void DequeIterator<T, Pointer, Reference, kDequeSubarraySize>::copy_backward(const iterator& first, const iterator& last, true_type)
+	{
+		// To do: Implement this as a loop which does memmoves between subarrays appropriately.
+		//        Currently we only do memcpy if the entire operation occurs within a single subarray.
+		if((first.mpBegin == last.mpBegin) && (first.mpBegin == mpBegin)) // If all operations are within the same subarray, implement the operation as a memcpy.
+			memmove(mpCurrent - (last.mpCurrent - first.mpCurrent), first.mpCurrent, (size_t)((uintptr_t)last.mpCurrent - (uintptr_t)first.mpCurrent));
+		else
+			eastl::copy_backward(eastl::make_move_iterator(first), eastl::make_move_iterator(last), eastl::make_move_iterator(*this));
+	}
+
+
+	template <typename T, typename Pointer, typename Reference, unsigned kDequeSubarraySize>
+	void DequeIterator<T, Pointer, Reference, kDequeSubarraySize>::copy_backward(const iterator& first, const iterator& last, false_type)
+	{
+		eastl::copy_backward(eastl::make_move_iterator(first), eastl::make_move_iterator(last), eastl::make_move_iterator(*this)).base();
+	}
+
+
+	template <typename T, typename Pointer, typename Reference, unsigned kDequeSubarraySize>
+	void DequeIterator<T, Pointer, Reference, kDequeSubarraySize>::SetSubarray(T** pCurrentArrayPtr)
+	{
+		mpCurrentArrayPtr = pCurrentArrayPtr;
+		mpBegin           = *pCurrentArrayPtr;
+		mpEnd             = mpBegin + kDequeSubarraySize;
+	}
+
+
+	// The C++ defect report #179 requires that we support comparisons between const and non-const iterators.
+	// Thus we provide additional template paremeters here to support this. The defect report does not
+	// require us to support comparisons between reverse_iterators and const_reverse_iterators.
+	template <typename T, typename PointerA, typename ReferenceA, typename PointerB, typename ReferenceB, unsigned kDequeSubarraySize>
+	inline bool operator==(const DequeIterator<T, PointerA, ReferenceA, kDequeSubarraySize>& a, 
+						   const DequeIterator<T, PointerB, ReferenceB, kDequeSubarraySize>& b)
+	{
+		return a.mpCurrent == b.mpCurrent;
+	}
+
+
+	template <typename T, typename PointerA, typename ReferenceA, typename PointerB, typename ReferenceB, unsigned kDequeSubarraySize>
+	inline bool operator!=(const DequeIterator<T, PointerA, ReferenceA, kDequeSubarraySize>& a, 
+						   const DequeIterator<T, PointerB, ReferenceB, kDequeSubarraySize>& b)
+	{
+		return a.mpCurrent != b.mpCurrent;
+	}
+
+
+	// We provide a version of operator!= for the case where the iterators are of the 
+	// same type. This helps prevent ambiguity errors in the presence of rel_ops.
+	template <typename T, typename Pointer, typename Reference, unsigned kDequeSubarraySize>
+	inline bool operator!=(const DequeIterator<T, Pointer, Reference, kDequeSubarraySize>& a, 
+						   const DequeIterator<T, Pointer, Reference, kDequeSubarraySize>& b)
+	{
+		return a.mpCurrent != b.mpCurrent;
+	}
+
+
+	template <typename T, typename PointerA, typename ReferenceA, typename PointerB, typename ReferenceB, unsigned kDequeSubarraySize>
+	inline bool operator<(const DequeIterator<T, PointerA, ReferenceA, kDequeSubarraySize>& a, 
+						  const DequeIterator<T, PointerB, ReferenceB, kDequeSubarraySize>& b)
+	{
+		return (a.mpCurrentArrayPtr == b.mpCurrentArrayPtr) ? (a.mpCurrent < b.mpCurrent) : (a.mpCurrentArrayPtr < b.mpCurrentArrayPtr);
+	}
+
+
+	template <typename T, typename PointerA, typename ReferenceA, typename PointerB, typename ReferenceB, unsigned kDequeSubarraySize>
+	inline bool operator>(const DequeIterator<T, PointerA, ReferenceA, kDequeSubarraySize>& a, 
+						  const DequeIterator<T, PointerB, ReferenceB, kDequeSubarraySize>& b)
+	{
+		return (a.mpCurrentArrayPtr == b.mpCurrentArrayPtr) ? (a.mpCurrent > b.mpCurrent) : (a.mpCurrentArrayPtr > b.mpCurrentArrayPtr);
+	}
+
+
+	template <typename T, typename PointerA, typename ReferenceA, typename PointerB, typename ReferenceB, unsigned kDequeSubarraySize>
+	inline bool operator<=(const DequeIterator<T, PointerA, ReferenceA, kDequeSubarraySize>& a, 
+						   const DequeIterator<T, PointerB, ReferenceB, kDequeSubarraySize>& b)
+	{
+		return (a.mpCurrentArrayPtr == b.mpCurrentArrayPtr) ? (a.mpCurrent <= b.mpCurrent) : (a.mpCurrentArrayPtr <= b.mpCurrentArrayPtr);
+	}
+
+
+	template <typename T, typename PointerA, typename ReferenceA, typename PointerB, typename ReferenceB, unsigned kDequeSubarraySize>
+	inline bool operator>=(const DequeIterator<T, PointerA, ReferenceA, kDequeSubarraySize>& a, 
+						   const DequeIterator<T, PointerB, ReferenceB, kDequeSubarraySize>& b)
+	{
+		return (a.mpCurrentArrayPtr == b.mpCurrentArrayPtr) ? (a.mpCurrent >= b.mpCurrent) : (a.mpCurrentArrayPtr >= b.mpCurrentArrayPtr);
+	}
+
+
+	// Random access iterators must support operator + and operator -.
+	// You can only add an integer to an iterator, and you cannot add two iterators.
+	template <typename T, typename Pointer, typename Reference, unsigned kDequeSubarraySize>
+	inline DequeIterator<T, Pointer, Reference, kDequeSubarraySize>
+	operator+(ptrdiff_t n, const DequeIterator<T, Pointer, Reference, kDequeSubarraySize>& x)
+	{
+		return x + n; // Implement (n + x) in terms of (x + n).
+	}
+
+
+	// You can only add an integer to an iterator, but you can subtract two iterators.
+	// The C++ defect report #179 mentioned above specifically refers to 
+	// operator - and states that we support the subtraction of const and non-const iterators.
+	template <typename T, typename PointerA, typename ReferenceA, typename PointerB, typename ReferenceB, unsigned kDequeSubarraySize>
+	inline typename DequeIterator<T, PointerA, ReferenceA, kDequeSubarraySize>::difference_type
+	operator-(const DequeIterator<T, PointerA, ReferenceA, kDequeSubarraySize>& a,
+			  const DequeIterator<T, PointerB, ReferenceB, kDequeSubarraySize>& b)
+	{
+		// This is a fairly clever algorithm that has been used in STL deque implementations since the original HP STL:
+		typedef typename DequeIterator<T, PointerA, ReferenceA, kDequeSubarraySize>::difference_type difference_type;
+
+		return ((difference_type)kDequeSubarraySize * ((a.mpCurrentArrayPtr - b.mpCurrentArrayPtr) - 1)) + (a.mpCurrent - a.mpBegin) + (b.mpEnd - b.mpCurrent);
+	}
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// deque
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	inline deque<T, Allocator, kDequeSubarraySize>::deque()
+		: base_type((size_type)0)
+	{
+		// Empty
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	inline deque<T, Allocator, kDequeSubarraySize>::deque(const allocator_type& allocator)
+		: base_type((size_type)0, allocator)
+	{
+		// Empty
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	inline deque<T, Allocator, kDequeSubarraySize>::deque(size_type n, const allocator_type& allocator)
+		: base_type(n, allocator)
+	{
+		DoFillInit(value_type());
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	inline deque<T, Allocator, kDequeSubarraySize>::deque(size_type n, const value_type& value, const allocator_type& allocator)
+		: base_type(n, allocator)
+	{
+		DoFillInit(value);
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	inline deque<T, Allocator, kDequeSubarraySize>::deque(const this_type& x)
+		: base_type(x.size(), x.mAllocator)
+	{
+		eastl::uninitialized_copy(x.mItBegin, x.mItEnd, mItBegin);
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	inline deque<T, Allocator, kDequeSubarraySize>::deque(this_type&& x)
+	  : base_type((size_type)0, x.mAllocator)
+	{
+		swap(x);
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	inline deque<T, Allocator, kDequeSubarraySize>::deque(this_type&& x, const allocator_type& allocator)
+	  : base_type((size_type)0, allocator)
+	{
+		swap(x); // member swap handles the case that x has a different allocator than our allocator by doing a copy.
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	inline deque<T, Allocator, kDequeSubarraySize>::deque(std::initializer_list<value_type> ilist, const allocator_type& allocator)
+		: base_type(allocator)
+	{
+		DoInit(ilist.begin(), ilist.end(), false_type());
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	template <typename InputIterator>
+	inline deque<T, Allocator, kDequeSubarraySize>::deque(InputIterator first, InputIterator last)
+		: base_type(EASTL_DEQUE_DEFAULT_ALLOCATOR) // Call the empty base constructor, which does nothing. We need to do all the work in our own DoInit.
+	{
+		DoInit(first, last, is_integral<InputIterator>());
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	inline deque<T, Allocator, kDequeSubarraySize>::~deque()
+	{
+		// Call destructors. Parent class will free the memory.
+		for(iterator itCurrent(mItBegin); itCurrent != mItEnd; ++itCurrent)
+			itCurrent.mpCurrent->~value_type();
+	} 
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	typename deque<T, Allocator, kDequeSubarraySize>::this_type& 
+	deque<T, Allocator, kDequeSubarraySize>::operator=(const this_type& x)
+	{
+		if(&x != this) // If not assigning to ourselves...
+		{
+			// If (EASTL_ALLOCATOR_COPY_ENABLED == 1) and the current contents are allocated by an 
+			// allocator that's unequal to x's allocator, we need to reallocate our elements with 
+			// our current allocator and reallocate it with x's allocator. If the allocators are 
+			// equal then we can use a more optimal algorithm that doesn't reallocate our elements
+			// but instead can copy them in place.
+
+			#if EASTL_ALLOCATOR_COPY_ENABLED
+				bool bSlowerPathwayRequired = (mAllocator != x.mAllocator);
+			#else
+				bool bSlowerPathwayRequired = false;
+			#endif
+
+			if(bSlowerPathwayRequired)
+			{
+				// We can't currently use set_capacity(0) or shrink_to_fit, because they 
+				// leave a remaining allocation with our old allocator. So we do a similar 
+				// thing but set our allocator to x.mAllocator while doing so.
+				this_type temp(x.mAllocator);
+				DoSwap(temp);
+				// Now we have an empty container with an allocator equal to x.mAllocator, ready to assign from x.
+			}
+
+			DoAssign(x.begin(), x.end(), eastl::false_type());
+		}
+
+		return *this;
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	inline typename deque<T, Allocator, kDequeSubarraySize>::this_type& 
+	deque<T, Allocator, kDequeSubarraySize>::operator=(this_type&& x)
+	{
+		if(this != &x)
+		{
+			set_capacity(0); // To consider: Are we really required to clear here? x is going away soon and will clear itself in its dtor.
+			swap(x);         // member swap handles the case that x has a different allocator than our allocator by doing a copy.
+		}
+		return *this; 
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	inline typename deque<T, Allocator, kDequeSubarraySize>::this_type& 
+	deque<T, Allocator, kDequeSubarraySize>::operator=(std::initializer_list<value_type> ilist)
+	{
+		DoAssign(ilist.begin(), ilist.end(), false_type());
+		return *this;
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	inline void deque<T, Allocator, kDequeSubarraySize>::assign(size_type n, const value_type& value)
+	{
+		DoAssignValues(n, value);
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	inline void deque<T, Allocator, kDequeSubarraySize>::assign(std::initializer_list<value_type> ilist)
+	{
+		DoAssign(ilist.begin(), ilist.end(), false_type());
+	}
+
+
+	// It turns out that the C++ std::deque specifies a two argument
+	// version of assign that takes (int size, int value). These are not
+	// iterators, so we need to do a template compiler trick to do the right thing. 
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	template <typename InputIterator>
+	inline void deque<T, Allocator, kDequeSubarraySize>::assign(InputIterator first, InputIterator last)
+	{
+		DoAssign(first, last, is_integral<InputIterator>());
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	inline typename deque<T, Allocator, kDequeSubarraySize>::iterator 
+	deque<T, Allocator, kDequeSubarraySize>::begin() EA_NOEXCEPT
+	{
+		return mItBegin;
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	inline typename deque<T, Allocator, kDequeSubarraySize>::const_iterator 
+	deque<T, Allocator, kDequeSubarraySize>::begin() const EA_NOEXCEPT
+	{
+		return mItBegin;
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	inline typename deque<T, Allocator, kDequeSubarraySize>::const_iterator 
+	deque<T, Allocator, kDequeSubarraySize>::cbegin() const EA_NOEXCEPT
+	{
+		return mItBegin;
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	inline typename deque<T, Allocator, kDequeSubarraySize>::iterator 
+	deque<T, Allocator, kDequeSubarraySize>::end() EA_NOEXCEPT
+	{
+		return mItEnd;
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	typename deque<T, Allocator, kDequeSubarraySize>::const_iterator
+	deque<T, Allocator, kDequeSubarraySize>::end() const EA_NOEXCEPT
+	{
+		return mItEnd;
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	inline typename deque<T, Allocator, kDequeSubarraySize>::const_iterator
+	deque<T, Allocator, kDequeSubarraySize>::cend() const EA_NOEXCEPT
+	{
+		return mItEnd;
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	inline typename deque<T, Allocator, kDequeSubarraySize>::reverse_iterator
+	deque<T, Allocator, kDequeSubarraySize>::rbegin() EA_NOEXCEPT
+	{
+		return reverse_iterator(mItEnd);
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	inline typename deque<T, Allocator, kDequeSubarraySize>::const_reverse_iterator
+	deque<T, Allocator, kDequeSubarraySize>::rbegin() const EA_NOEXCEPT
+	{
+		return const_reverse_iterator(mItEnd);
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	inline typename deque<T, Allocator, kDequeSubarraySize>::const_reverse_iterator
+	deque<T, Allocator, kDequeSubarraySize>::crbegin() const EA_NOEXCEPT
+	{
+		return const_reverse_iterator(mItEnd);
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	inline typename deque<T, Allocator, kDequeSubarraySize>::reverse_iterator
+	deque<T, Allocator, kDequeSubarraySize>::rend() EA_NOEXCEPT
+	{
+		return reverse_iterator(mItBegin);
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	inline typename deque<T, Allocator, kDequeSubarraySize>::const_reverse_iterator
+	deque<T, Allocator, kDequeSubarraySize>::rend() const EA_NOEXCEPT
+	{
+		return const_reverse_iterator(mItBegin);
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	inline typename deque<T, Allocator, kDequeSubarraySize>::const_reverse_iterator
+	deque<T, Allocator, kDequeSubarraySize>::crend() const EA_NOEXCEPT
+	{
+		return const_reverse_iterator(mItBegin);
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	inline bool deque<T, Allocator, kDequeSubarraySize>::empty() const EA_NOEXCEPT
+	{
+		return mItBegin.mpCurrent == mItEnd.mpCurrent;
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	typename deque<T, Allocator, kDequeSubarraySize>::size_type
+	inline deque<T, Allocator, kDequeSubarraySize>::size() const EA_NOEXCEPT
+	{
+		return (size_type)(mItEnd - mItBegin);
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	inline void deque<T, Allocator, kDequeSubarraySize>::resize(size_type n, const value_type& value)
+	{
+		const size_type nSizeCurrent = size();
+
+		if(n > nSizeCurrent) // We expect that more often than not, resizes will be upsizes.
+			insert(mItEnd, n - nSizeCurrent, value);
+		else
+			erase(mItBegin + (difference_type)n, mItEnd);
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	inline void deque<T, Allocator, kDequeSubarraySize>::resize(size_type n)
+	{
+		resize(n, value_type());
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	inline void deque<T, Allocator, kDequeSubarraySize>::shrink_to_fit()
+	{
+		this_type x(eastl::make_move_iterator(begin()), eastl::make_move_iterator(end()));
+		swap(x);
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	inline void deque<T, Allocator, kDequeSubarraySize>::set_capacity(size_type n)
+	{
+		// Currently there isn't a way to remove all allocations from a deque, as it 
+		// requires a single starting allocation for the subarrays. So we can't just
+		// free all memory without leaving it in a bad state. So the best means of 
+		// implementing set_capacity() is to do what we do below.
+
+		if(n == 0)
+		{
+			this_type temp(mAllocator);
+			DoSwap(temp);
+		}
+		else if(n < size())
+		{
+			// We currently ignore the request to reduce capacity. To do: Implement this
+			// and do it in a way that doesn't result in temporarily ~doubling our memory usage.
+			// That might involve trimming unused subarrays from the front or back of 
+			// the container.
+			resize(n);
+		}        
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	typename deque<T, Allocator, kDequeSubarraySize>::reference
+	deque<T, Allocator, kDequeSubarraySize>::operator[](size_type n)
+	{
+		#if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED
+			if (EASTL_UNLIKELY(n >= (size_type)(mItEnd - mItBegin)))
+				EASTL_FAIL_MSG("deque::operator[] -- out of range");
+		#elif EASTL_ASSERT_ENABLED
+			// We allow taking a reference to deque[0]
+			if (EASTL_UNLIKELY((n != 0) && n >= (size_type)(mItEnd - mItBegin)))
+				EASTL_FAIL_MSG("deque::operator[] -- out of range");
+		#endif
+
+		// See DequeIterator::operator+=() for an explanation of the code below.
+		iterator it(mItBegin);
+
+		const difference_type subarrayPosition = (difference_type)((it.mpCurrent - it.mpBegin) + (difference_type)n);
+		const difference_type subarrayIndex    = (((16777216 + subarrayPosition) / (difference_type)kDequeSubarraySize)) - (16777216 / (difference_type)kDequeSubarraySize);
+
+		return *(*(it.mpCurrentArrayPtr + subarrayIndex) + (subarrayPosition - (subarrayIndex * (difference_type)kDequeSubarraySize)));
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	typename deque<T, Allocator, kDequeSubarraySize>::const_reference
+	deque<T, Allocator, kDequeSubarraySize>::operator[](size_type n) const
+	{
+		#if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED
+			if (EASTL_UNLIKELY(n >= (size_type)(mItEnd - mItBegin)))
+				EASTL_FAIL_MSG("deque::operator[] -- out of range");
+		#elif EASTL_ASSERT_ENABLED
+			// We allow the user to use a reference to deque[0] of an empty container.
+			if (EASTL_UNLIKELY((n != 0) && n >= (size_type)(mItEnd - mItBegin)))
+				EASTL_FAIL_MSG("deque::operator[] -- out of range");
+		#endif
+
+		// See DequeIterator::operator+=() for an explanation of the code below.
+		iterator it(mItBegin);
+
+		const difference_type subarrayPosition = (it.mpCurrent - it.mpBegin) + (difference_type)n;
+		const difference_type subarrayIndex    = (((16777216 + subarrayPosition) / (difference_type)kDequeSubarraySize)) - (16777216 / (difference_type)kDequeSubarraySize);
+
+		return *(*(it.mpCurrentArrayPtr + subarrayIndex) + (subarrayPosition - (subarrayIndex * (difference_type)kDequeSubarraySize)));
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	typename deque<T, Allocator, kDequeSubarraySize>::reference
+	deque<T, Allocator, kDequeSubarraySize>::at(size_type n)
+	{
+		#if EASTL_EXCEPTIONS_ENABLED
+			if(n >= (size_type)(mItEnd - mItBegin))
+				throw std::out_of_range("deque::at -- out of range");
+		#elif EASTL_ASSERT_ENABLED
+			if(n >= (size_type)(mItEnd - mItBegin))
+				EASTL_FAIL_MSG("deque::at -- out of range");
+		#endif
+		return *(mItBegin.operator+((difference_type)n));
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	typename deque<T, Allocator, kDequeSubarraySize>::const_reference
+	deque<T, Allocator, kDequeSubarraySize>::at(size_type n) const
+	{
+		#if EASTL_EXCEPTIONS_ENABLED
+			if(n >= (size_type)(mItEnd - mItBegin))
+				throw std::out_of_range("deque::at -- out of range");
+		#elif EASTL_ASSERT_ENABLED
+			if(n >= (size_type)(mItEnd - mItBegin))
+				EASTL_FAIL_MSG("deque::at -- out of range");
+		#endif
+		return *(mItBegin.operator+((difference_type)n));
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	typename deque<T, Allocator, kDequeSubarraySize>::reference
+	deque<T, Allocator, kDequeSubarraySize>::front()
+	{
+		#if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED
+			if (EASTL_UNLIKELY((size_type)(mItEnd == mItBegin)))
+				EASTL_FAIL_MSG("deque::front -- empty deque");
+		#else
+			// We allow the user to reference an empty container.
+		#endif
+
+		return *mItBegin;
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	typename deque<T, Allocator, kDequeSubarraySize>::const_reference
+	deque<T, Allocator, kDequeSubarraySize>::front() const
+	{
+		#if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED
+			if (EASTL_UNLIKELY((size_type)(mItEnd == mItBegin)))
+				EASTL_FAIL_MSG("deque::front -- empty deque");
+		#else
+			// We allow the user to reference an empty container.
+		#endif
+
+		return *mItBegin;
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	typename deque<T, Allocator, kDequeSubarraySize>::reference
+	deque<T, Allocator, kDequeSubarraySize>::back()
+	{
+		#if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED
+			if (EASTL_UNLIKELY((size_type)(mItEnd == mItBegin)))
+				EASTL_FAIL_MSG("deque::back -- empty deque");
+		#else
+			// We allow the user to reference an empty container.
+		#endif
+
+		return *iterator(mItEnd, typename iterator::Decrement());
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	typename deque<T, Allocator, kDequeSubarraySize>::const_reference
+	deque<T, Allocator, kDequeSubarraySize>::back() const
+	{
+		#if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED
+			if (EASTL_UNLIKELY((size_type)(mItEnd == mItBegin)))
+				EASTL_FAIL_MSG("deque::back -- empty deque");
+		#else
+			// We allow the user to reference an empty container.
+		#endif
+
+		return *iterator(mItEnd, typename iterator::Decrement());
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	void deque<T, Allocator, kDequeSubarraySize>::push_front(const value_type& value)
+	{
+		emplace_front(value);
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	void deque<T, Allocator, kDequeSubarraySize>::push_front(value_type&& value)
+	{
+		emplace_front(eastl::move(value));
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	typename deque<T, Allocator, kDequeSubarraySize>::reference
+	deque<T, Allocator, kDequeSubarraySize>::push_front()
+	{
+		emplace_front(value_type());
+		return *mItBegin;   // Same as return front();
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	void deque<T, Allocator, kDequeSubarraySize>::push_back(const value_type& value)
+	{
+		emplace_back(value);
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	void deque<T, Allocator, kDequeSubarraySize>::push_back(value_type&& value)
+	{
+		emplace_back(eastl::move(value));
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	typename deque<T, Allocator, kDequeSubarraySize>::reference
+	deque<T, Allocator, kDequeSubarraySize>::push_back()
+	{
+		emplace_back(value_type());
+		return *iterator(mItEnd, typename iterator::Decrement()); // Same thing as return back();
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	void deque<T, Allocator, kDequeSubarraySize>::pop_front()
+	{
+		#if EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY((size_type)(mItEnd == mItBegin)))
+				EASTL_FAIL_MSG("deque::pop_front -- empty deque");
+		#endif
+
+		if((mItBegin.mpCurrent + 1) != mItBegin.mpEnd) // If the operation is very simple...
+			(mItBegin.mpCurrent++)->~value_type();
+		else
+		{
+			// This is executed only when we are popping the end (last) item off the front-most subarray.
+			// In this case we need to free the subarray and point mItBegin to the next subarray.
+			#ifdef EA_DEBUG
+				value_type** pp = mItBegin.mpCurrentArrayPtr;
+			#endif
+
+			mItBegin.mpCurrent->~value_type(); // mpCurrent == mpEnd - 1
+			DoFreeSubarray(mItBegin.mpBegin);
+			mItBegin.SetSubarray(mItBegin.mpCurrentArrayPtr + 1);
+			mItBegin.mpCurrent = mItBegin.mpBegin;
+
+			#ifdef EA_DEBUG
+				*pp = NULL;
+			#endif
+		}
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	void deque<T, Allocator, kDequeSubarraySize>::pop_back()
+	{
+		#if EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY((size_type)(mItEnd == mItBegin)))
+				EASTL_FAIL_MSG("deque::pop_back -- empty deque");
+		#endif
+
+		if(mItEnd.mpCurrent != mItEnd.mpBegin) // If the operation is very simple...
+			(--mItEnd.mpCurrent)->~value_type();
+		else
+		{
+			// This is executed only when we are popping the first item off the last subarray.
+			// In this case we need to free the subarray and point mItEnd to the previous subarray.
+			#ifdef EA_DEBUG
+				value_type** pp = mItEnd.mpCurrentArrayPtr;
+			#endif
+
+			DoFreeSubarray(mItEnd.mpBegin);
+			mItEnd.SetSubarray(mItEnd.mpCurrentArrayPtr - 1);
+			mItEnd.mpCurrent = mItEnd.mpEnd - 1;        // Recall that mItEnd points to one-past the last item in the container.
+			mItEnd.mpCurrent->~value_type();            // Thus we need to call the destructor on the item *before* that last item.
+
+			#ifdef EA_DEBUG
+				*pp = NULL;
+			#endif
+		}
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	template<class... Args>
+	typename deque<T, Allocator, kDequeSubarraySize>::iterator
+	deque<T, Allocator, kDequeSubarraySize>::emplace(const_iterator position, Args&&... args)
+	{
+		if(EASTL_UNLIKELY(position.mpCurrent == mItEnd.mpCurrent)) // If we are doing the same thing as push_back...
+		{
+			emplace_back(eastl::forward<Args>(args)...);
+			return iterator(mItEnd, typename iterator::Decrement()); // Unfortunately, we need to make an iterator here, as the above push_back is an operation that can invalidate existing iterators.
+		}
+		else if(EASTL_UNLIKELY(position.mpCurrent == mItBegin.mpCurrent)) // If we are doing the same thing as push_front...
+		{
+			emplace_front(eastl::forward<Args>(args)...);
+			return mItBegin;
+		}
+
+		iterator              itPosition(position, typename iterator::FromConst());
+		value_type  valueSaved(eastl::forward<Args>(args)...); // We need to save this because value may come from within our container. It would be somewhat tedious to make a workaround that could avoid this.
+		const difference_type i(itPosition - mItBegin);
+
+		#if EASTL_ASSERT_ENABLED
+			EASTL_ASSERT(!empty()); // The push_front and push_back calls below assume that we are non-empty. It turns out this is never called unless so.
+
+			if(EASTL_UNLIKELY(!(validate_iterator(itPosition) & isf_valid)))
+				EASTL_FAIL_MSG("deque::emplace -- invalid iterator");
+		#endif
+
+		if(i < (difference_type)(size() / 2)) // Should we insert at the front or at the back? We divide the range in half.
+		{
+			emplace_front(eastl::move(*mItBegin)); // This operation potentially invalidates all existing iterators and so we need to assign them anew relative to mItBegin below.
+
+			itPosition = mItBegin + i;
+
+			const iterator newPosition  (itPosition, typename iterator::Increment());
+				  iterator oldBegin     (mItBegin,   typename iterator::Increment());
+			const iterator oldBeginPlus1(oldBegin,   typename iterator::Increment());
+
+			oldBegin.copy(oldBeginPlus1, newPosition, eastl::has_trivial_relocate<value_type>());
+		}
+		else
+		{
+			emplace_back(eastl::move(*iterator(mItEnd, typename iterator::Decrement())));
+
+			itPosition = mItBegin + i;
+
+				  iterator oldBack      (mItEnd,  typename iterator::Decrement());
+			const iterator oldBackMinus1(oldBack, typename iterator::Decrement());
+
+			oldBack.copy_backward(itPosition, oldBackMinus1, eastl::has_trivial_relocate<value_type>());
+		}
+
+		*itPosition = eastl::move(valueSaved);
+
+		return itPosition;
+	}
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	template<class... Args>
+	void deque<T, Allocator, kDequeSubarraySize>::emplace_front(Args&&... args)
+	{
+		if(mItBegin.mpCurrent != mItBegin.mpBegin)                                         // If we have room in the first subarray... we hope that usually this 'new' pathway gets executed, as it is slightly faster.
+			::new((void*)--mItBegin.mpCurrent) value_type(eastl::forward<Args>(args)...);  // Construct in place. If args is a single arg of type value_type&& then it this will be a move construction.
+		else
+		{
+			// To consider: Detect if value isn't coming from within this container and handle that efficiently.
+			value_type  valueSaved(eastl::forward<Args>(args)...);                          // We need to make a temporary, because args may be a value_type that comes from within our container and the operations below may change the container. But we can use move instead of copy.
+
+			if(mItBegin.mpCurrentArrayPtr == mpPtrArray)                                   // If there are no more pointers in front of the current (first) one...
+				DoReallocPtrArray(1, kSideFront);
+
+			mItBegin.mpCurrentArrayPtr[-1] = DoAllocateSubarray();
+
+			#if EASTL_EXCEPTIONS_ENABLED
+				try
+				{
+			#endif
+					mItBegin.SetSubarray(mItBegin.mpCurrentArrayPtr - 1);
+					mItBegin.mpCurrent = mItBegin.mpEnd - 1;
+					::new((void*)mItBegin.mpCurrent) value_type(eastl::move(valueSaved));
+			#if EASTL_EXCEPTIONS_ENABLED
+				}
+				catch(...)
+				{
+					++mItBegin; // The exception could only occur in the new operation above, after we have incremented mItBegin. So we need to undo it.
+					DoFreeSubarray(mItBegin.mpCurrentArrayPtr[-1]);
+					throw;
+				}
+			#endif
+		}
+	}
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	template<class... Args>
+	void deque<T, Allocator, kDequeSubarraySize>::emplace_back(Args&&... args)
+	{
+		if((mItEnd.mpCurrent + 1) != mItEnd.mpEnd)                                       // If we have room in the last subarray... we hope that usually this 'new' pathway gets executed, as it is slightly faster.
+			::new((void*)mItEnd.mpCurrent++) value_type(eastl::forward<Args>(args)...);  // Construct in place. If args is a single arg of type value_type&& then it this will be a move construction.
+		else
+		{
+			// To consider: Detect if value isn't coming from within this container and handle that efficiently.
+			value_type  valueSaved(eastl::forward<Args>(args)...);                          // We need to make a temporary, because args may be a value_type that comes from within our container and the operations below may change the container. But we can use move instead of copy.
+			if(((mItEnd.mpCurrentArrayPtr - mpPtrArray) + 1) >= (difference_type)mnPtrArraySize) // If there are no more pointers after the current (last) one.
+				DoReallocPtrArray(1, kSideBack);
+
+			mItEnd.mpCurrentArrayPtr[1] = DoAllocateSubarray();
+
+			#if EASTL_EXCEPTIONS_ENABLED
+				try
+				{
+			#endif
+					::new((void*)mItEnd.mpCurrent) value_type(eastl::move(valueSaved)); // We can move valueSaved into position.
+					mItEnd.SetSubarray(mItEnd.mpCurrentArrayPtr + 1);
+					mItEnd.mpCurrent = mItEnd.mpBegin;
+			#if EASTL_EXCEPTIONS_ENABLED
+				}
+				catch(...)
+				{
+					// No need to execute '--mItEnd', as the exception could only occur in the new operation above before we set mItEnd.
+					DoFreeSubarray(mItEnd.mpCurrentArrayPtr[1]);
+					throw;
+				}
+			#endif
+		}
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	typename deque<T, Allocator, kDequeSubarraySize>::iterator
+	deque<T, Allocator, kDequeSubarraySize>::insert(const_iterator position, const value_type& value)
+	{
+		return emplace(position, value);
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	typename deque<T, Allocator, kDequeSubarraySize>::iterator
+	deque<T, Allocator, kDequeSubarraySize>::insert(const_iterator position, value_type&& value)
+	{
+		return emplace(position, eastl::move(value));
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	void deque<T, Allocator, kDequeSubarraySize>::insert(const_iterator position, size_type n, const value_type& value)
+	{
+		DoInsertValues(position, n, value);
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	template <typename InputIterator>
+	void deque<T, Allocator, kDequeSubarraySize>::insert(const_iterator position, InputIterator first, InputIterator last)
+	{
+		DoInsert(position, first, last, is_integral<InputIterator>()); // The C++ standard requires this sort of behaviour, as InputIterator might actually be Integer and 'first' is really 'count' and 'last' is really 'value'.
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	typename deque<T, Allocator, kDequeSubarraySize>::iterator
+	deque<T, Allocator, kDequeSubarraySize>::insert(const_iterator position, std::initializer_list<value_type> ilist)
+	{
+		const difference_type i(position - mItBegin);
+		DoInsert(position, ilist.begin(), ilist.end(), false_type());
+		return (mItBegin + i);
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	typename deque<T, Allocator, kDequeSubarraySize>::iterator
+	deque<T, Allocator, kDequeSubarraySize>::erase(const_iterator position)
+	{
+		#if EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY(!(validate_iterator(position) & isf_valid)))
+				EASTL_FAIL_MSG("deque::erase -- invalid iterator");
+
+			if(EASTL_UNLIKELY(position == end()))
+				EASTL_FAIL_MSG("deque::erase -- end() iterator is an invalid iterator for erase");
+		#endif
+
+		iterator itPosition(position, typename iterator::FromConst());
+		iterator itNext(itPosition, typename iterator::Increment());
+		const difference_type i(itPosition - mItBegin);
+
+		if(i < (difference_type)(size() / 2)) // Should we move the front entries forward or the back entries backward? We divide the range in half.
+		{
+			itNext.copy_backward(mItBegin, itPosition, eastl::has_trivial_relocate<value_type>());
+			pop_front();
+		}
+		else
+		{
+			itPosition.copy(itNext, mItEnd, eastl::has_trivial_relocate<value_type>());
+			pop_back();
+		}
+
+		return mItBegin + i;
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	typename deque<T, Allocator, kDequeSubarraySize>::iterator
+	deque<T, Allocator, kDequeSubarraySize>::erase(const_iterator first, const_iterator last)
+	{
+		iterator itFirst(first, typename iterator::FromConst());
+		iterator itLast(last, typename iterator::FromConst());
+
+		#if EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY(!(validate_iterator(itFirst) & isf_valid)))
+				EASTL_FAIL_MSG("deque::erase -- invalid iterator");
+			if(EASTL_UNLIKELY(!(validate_iterator(itLast) & isf_valid)))
+				EASTL_FAIL_MSG("deque::erase -- invalid iterator");
+		#endif
+
+		if((itFirst != mItBegin) || (itLast != mItEnd)) // If not erasing everything... (We expect that the user won't call erase(begin, end) because instead the user would just call clear.)
+		{
+			const difference_type n(itLast - itFirst);
+			const difference_type i(itFirst - mItBegin);
+
+			if(i < (difference_type)((size() - n) / 2)) // Should we move the front entries forward or the back entries backward? We divide the range in half.
+			{
+				const iterator itNewBegin(mItBegin + n);
+				value_type** const pPtrArrayBegin = mItBegin.mpCurrentArrayPtr;
+
+				itLast.copy_backward(mItBegin, itFirst, eastl::has_trivial_relocate<value_type>());
+
+				for(; mItBegin != itNewBegin; ++mItBegin) // Question: If value_type is a POD type, will the compiler generate this loop at all?
+					mItBegin.mpCurrent->~value_type();    //           If so, then we need to make a specialization for destructing PODs.
+
+				DoFreeSubarrays(pPtrArrayBegin, itNewBegin.mpCurrentArrayPtr);
+
+				// mItBegin = itNewBegin; <-- Not necessary, as the above loop makes it so already.
+			}
+			else // Else we will be moving back entries backward.
+			{
+				iterator itNewEnd(mItEnd - n);
+				value_type** const pPtrArrayEnd = itNewEnd.mpCurrentArrayPtr + 1;
+
+				itFirst.copy(itLast, mItEnd, eastl::has_trivial_relocate<value_type>());
+
+				for(iterator itTemp(itNewEnd); itTemp != mItEnd; ++itTemp)
+					itTemp.mpCurrent->~value_type();
+
+				DoFreeSubarrays(pPtrArrayEnd, mItEnd.mpCurrentArrayPtr + 1);
+
+				mItEnd = itNewEnd;
+			}
+
+			return mItBegin + i;
+		}
+
+		clear();
+		return mItEnd;
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	typename deque<T, Allocator, kDequeSubarraySize>::reverse_iterator
+	deque<T, Allocator, kDequeSubarraySize>::erase(reverse_iterator position)
+	{
+		return reverse_iterator(erase((++position).base()));
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	typename deque<T, Allocator, kDequeSubarraySize>::reverse_iterator
+	deque<T, Allocator, kDequeSubarraySize>::erase(reverse_iterator first, reverse_iterator last)
+	{
+		// Version which erases in order from first to last.
+		// difference_type i(first.base() - last.base());
+		// while(i--)
+		//     first = erase(first);
+		// return first;
+
+		// Version which erases in order from last to first, but is slightly more efficient:
+		return reverse_iterator(erase(last.base(), first.base()));
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	void deque<T, Allocator, kDequeSubarraySize>::clear()
+	{
+		// Destroy all values and all subarrays they belong to, except for the first one, 
+		// as we need to reserve some space for a valid mItBegin/mItEnd.
+		if(mItBegin.mpCurrentArrayPtr != mItEnd.mpCurrentArrayPtr) // If there are multiple subarrays (more often than not, this will be so)...
+		{
+			for(value_type* p1 = mItBegin.mpCurrent; p1 < mItBegin.mpEnd; ++p1)
+				p1->~value_type();
+			for(value_type* p2 = mItEnd.mpBegin; p2 < mItEnd.mpCurrent; ++p2)
+				p2->~value_type();
+			DoFreeSubarray(mItEnd.mpBegin); // Leave mItBegin with a valid subarray.
+		}
+		else
+		{
+			for(value_type* p = mItBegin.mpCurrent; p < mItEnd.mpCurrent; ++p)
+				p->~value_type();
+			// Don't free the one existing subarray, as we need it for mItBegin/mItEnd.
+		}
+
+		for(value_type** pPtrArray = mItBegin.mpCurrentArrayPtr + 1; pPtrArray < mItEnd.mpCurrentArrayPtr; ++pPtrArray)
+		{
+			for(value_type* p = *pPtrArray, *pEnd = *pPtrArray + kDequeSubarraySize; p < pEnd; ++p)
+				p->~value_type();
+			DoFreeSubarray(*pPtrArray);
+		}
+
+		mItEnd = mItBegin; // mItBegin/mItEnd will not be dereferencable.
+	}
+
+
+	//template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	//void deque<T, Allocator, kDequeSubarraySize>::reset_lose_memory()
+	//{
+	//    // The reset_lose_memory function is a special extension function which unilaterally 
+	//    // resets the container to an empty state without freeing the memory of 
+	//    // the contained objects. This is useful for very quickly tearing down a 
+	//    // container built into scratch memory.
+	//
+	//    // Currently we are unable to get this reset_lose_memory operation to work correctly 
+	//    // as we haven't been able to find a good way to have a deque initialize
+	//    // without allocating memory. We can lose the old memory, but DoInit 
+	//    // would necessarily do a ptrArray allocation. And this is not within
+	//    // our definition of how reset_lose_memory works.
+	//    base_type::DoInit(0);
+	//
+	//}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	void deque<T, Allocator, kDequeSubarraySize>::swap(deque& x)
+	{
+	#if defined(EASTL_DEQUE_LEGACY_SWAP_BEHAVIOUR_REQUIRES_COPY_CTOR) && EASTL_DEQUE_LEGACY_SWAP_BEHAVIOUR_REQUIRES_COPY_CTOR
+		if(mAllocator == x.mAllocator) // If allocators are equivalent...
+			DoSwap(x);
+		else // else swap the contents.
+		{
+			const this_type temp(*this); // Can't call eastl::swap because that would
+			*this = x;                   // itself call this member swap function.
+			x     = temp;
+		}
+	#else
+		// NOTE(rparolin): The previous implementation required T to be copy-constructible in the fall-back case where
+		// allocators with unique instances copied elements.  This was an unnecessary restriction and prevented the common
+		// usage of deque with non-copyable types (eg. eastl::deque<non_copyable> or eastl::deque<unique_ptr>). 
+		// 
+		// The previous implementation violated the following requirements of deque::swap so the fall-back code has
+		// been removed.  EASTL implicitly defines 'propagate_on_container_swap = false' therefore the fall-back case is
+		// undefined behaviour.  We simply swap the contents and the allocator as that is the common expectation of
+		// users and does not put the container into an invalid state since it can not free its memory via its current
+		// allocator instance.
+		//
+		DoSwap(x);
+	#endif
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	template <typename Integer>
+	void deque<T, Allocator, kDequeSubarraySize>::DoInit(Integer n, Integer value, true_type)
+	{
+		base_type::DoInit(n);  // Call the base uninitialized init function. 
+		DoFillInit(value);
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	template <typename InputIterator>
+	void deque<T, Allocator, kDequeSubarraySize>::DoInit(InputIterator first, InputIterator last, false_type)
+	{
+		typedef typename eastl::iterator_traits<InputIterator>::iterator_category IC;
+		DoInitFromIterator(first, last, IC());
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	template <typename InputIterator>
+	void deque<T, Allocator, kDequeSubarraySize>::DoInitFromIterator(InputIterator first, InputIterator last, EASTL_ITC_NS::input_iterator_tag)
+	{
+		base_type::DoInit(0); // Call the base uninitialized init function, but don't actually allocate any values.
+
+		#if EASTL_EXCEPTIONS_ENABLED
+			try
+			{
+		#endif
+				// We have little choice but to turn through the source iterator and call 
+				// push_back for each item. It can be slow because it will keep reallocating the 
+				// container memory as we go. We are not allowed to use distance() on an InputIterator.
+				for(; first != last; ++first)   // InputIterators by definition actually only allow you to iterate through them once.
+				{                               // Thus the standard *requires* that we do this (inefficient) implementation.  
+					push_back(*first);          // Luckily, InputIterators are in practice almost never used, so this code will likely never get executed.
+				}
+		#if EASTL_EXCEPTIONS_ENABLED
+			}
+			catch(...)
+			{
+				clear();
+				throw;
+			}
+		#endif
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	template <typename ForwardIterator>
+	void deque<T, Allocator, kDequeSubarraySize>::DoInitFromIterator(ForwardIterator first, ForwardIterator last, EASTL_ITC_NS::forward_iterator_tag)
+	{
+		typedef typename eastl::remove_const<ForwardIterator>::type non_const_iterator_type; // If T is a const type (e.g. const int) then we need to initialize it as if it were non-const.
+		typedef typename eastl::remove_const<value_type>::type      non_const_value_type;
+
+		const size_type n = (size_type)eastl::distance(first, last);
+		value_type** pPtrArrayCurrent;
+
+		base_type::DoInit(n); // Call the base uninitialized init function.
+
+		#if EASTL_EXCEPTIONS_ENABLED
+			try
+			{
+		#endif
+				for(pPtrArrayCurrent = mItBegin.mpCurrentArrayPtr; pPtrArrayCurrent < mItEnd.mpCurrentArrayPtr; ++pPtrArrayCurrent) // Copy to the known-to-be-completely-used subarrays.
+				{
+					// We implment an algorithm here whereby we use uninitialized_copy() and advance() instead of just iterating from first to last and constructing as we go. The reason for this is that we can take advantage of POD data types and implement construction as memcpy operations.
+					ForwardIterator current(first); // To do: Implement a specialization of this algorithm for non-PODs which eliminates the need for 'current'.
+
+					eastl::advance(current, kDequeSubarraySize);
+					eastl::uninitialized_copy((non_const_iterator_type)first, (non_const_iterator_type)current, (non_const_value_type*)*pPtrArrayCurrent);
+					first = current;
+				}
+
+				eastl::uninitialized_copy((non_const_iterator_type)first, (non_const_iterator_type)last, (non_const_value_type*)mItEnd.mpBegin);
+		#if EASTL_EXCEPTIONS_ENABLED
+			}
+			catch(...)
+			{
+				for(iterator itCurrent(mItBegin), itEnd(pPtrArrayCurrent, *pPtrArrayCurrent); itCurrent != itEnd; ++itCurrent)
+					itCurrent.mpCurrent->~value_type();
+				throw;
+			}
+		#endif
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	void deque<T, Allocator, kDequeSubarraySize>::DoFillInit(const value_type& value)
+	{
+		value_type** pPtrArrayCurrent = mItBegin.mpCurrentArrayPtr;
+
+		#if EASTL_EXCEPTIONS_ENABLED
+			try
+			{
+		#endif
+				while(pPtrArrayCurrent < mItEnd.mpCurrentArrayPtr)
+				{
+					eastl::uninitialized_fill(*pPtrArrayCurrent, *pPtrArrayCurrent + kDequeSubarraySize, value);
+					++pPtrArrayCurrent;
+				}
+				eastl::uninitialized_fill(mItEnd.mpBegin, mItEnd.mpCurrent, value);
+		#if EASTL_EXCEPTIONS_ENABLED
+			}
+			catch(...)
+			{
+				for(iterator itCurrent(mItBegin), itEnd(pPtrArrayCurrent, *pPtrArrayCurrent); itCurrent != itEnd; ++itCurrent)
+					itCurrent.mpCurrent->~value_type();
+				throw;
+			}
+		#endif
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	template <typename Integer>
+	void deque<T, Allocator, kDequeSubarraySize>::DoAssign(Integer n, Integer value, true_type) // false_type means this is the integer version instead of iterator version.
+	{
+		DoAssignValues(static_cast<size_type>(n), static_cast<value_type>(value));
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	template <typename InputIterator>
+	void deque<T, Allocator, kDequeSubarraySize>::DoAssign(InputIterator first, InputIterator last, false_type) // false_type means this is the iterator version instead of integer version.
+	{
+		// Actually, the implementation below requires first/last to be a ForwardIterator and not just an InputIterator.
+		// But Paul Pedriana if you somehow need to work with an InputIterator and we can deal with it.
+		const size_type n     = (size_type)eastl::distance(first, last);
+		const size_type nSize = size();
+
+		if(n > nSize) // If we are increasing the size...
+		{
+			InputIterator atEnd(first);
+
+			eastl::advance(atEnd, (difference_type)nSize);
+			eastl::copy(first, atEnd, mItBegin);
+			insert(mItEnd, atEnd, last);
+		}
+		else // n is <= size.
+		{
+			iterator itEnd(eastl::copy(first, last, mItBegin));
+
+			if(n < nSize) // If we need to erase any trailing elements...
+				erase(itEnd, mItEnd);
+		}
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	void deque<T, Allocator, kDequeSubarraySize>::DoAssignValues(size_type n, const value_type& value)
+	{
+		const size_type nSize = size();
+
+		if(n > nSize) // If we are increasing the size...
+		{
+			eastl::fill(mItBegin, mItEnd, value);
+			insert(mItEnd, n - nSize, value);
+		}
+		else
+		{
+			erase(mItBegin + (difference_type)n, mItEnd);
+			eastl::fill(mItBegin, mItEnd, value);
+		}
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	template <typename Integer>
+	void deque<T, Allocator, kDequeSubarraySize>::DoInsert(const const_iterator& position, Integer n, Integer value, true_type)
+	{
+		DoInsertValues(position, (size_type)n, (value_type)value);
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	template <typename InputIterator>
+	void deque<T, Allocator, kDequeSubarraySize>::DoInsert(const const_iterator& position, const InputIterator& first, const InputIterator& last, false_type)
+	{
+		typedef typename eastl::iterator_traits<InputIterator>::iterator_category IC;
+		DoInsertFromIterator(position, first, last, IC());
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	template <typename InputIterator>
+	void deque<T, Allocator, kDequeSubarraySize>::DoInsertFromIterator(const_iterator position, const InputIterator& first, const InputIterator& last, EASTL_ITC_NS::forward_iterator_tag)
+	{
+		const size_type n = (size_type)eastl::distance(first, last);
+
+		// This implementation is nearly identical to DoInsertValues below. 
+		// If you make a bug fix to one, you will likely want to fix the other.
+		if(position.mpCurrent == mItBegin.mpCurrent) // If inserting at the beginning or into an empty container...
+		{
+			iterator itNewBegin(DoReallocSubarray(n, kSideFront)); // itNewBegin to mItBegin refers to memory that isn't initialized yet; so it's not truly a valid iterator. Or at least not a dereferencable one.
+
+			#if EASTL_EXCEPTIONS_ENABLED
+				try
+				{
+			#endif
+					// We would like to use move here instead of copy when possible, which would be useful for 
+					// when inserting from a std::initializer_list, for example.
+					// To do: solve this by having a template or runtime parameter which specifies move vs copy.
+					eastl::uninitialized_copy(first, last, itNewBegin);
+					mItBegin = itNewBegin;
+			#if EASTL_EXCEPTIONS_ENABLED
+				}
+				catch(...)
+				{
+					DoFreeSubarrays(itNewBegin.mpCurrentArrayPtr, mItBegin.mpCurrentArrayPtr);
+					throw;
+				}
+			#endif
+		}
+		else if(EASTL_UNLIKELY(position.mpCurrent == mItEnd.mpCurrent)) // If inserting at the end (i.e. appending)...
+		{
+			const iterator itNewEnd(DoReallocSubarray(n, kSideBack)); // mItEnd to itNewEnd refers to memory that isn't initialized yet; so it's not truly a valid iterator. Or at least not a dereferencable one.
+
+			#if EASTL_EXCEPTIONS_ENABLED
+				try
+				{
+			#endif
+					// We would like to use move here instead of copy when possible, which would be useful for 
+					// when inserting from a std::initializer_list, for example.
+					// To do: solve this by having a template or runtime parameter which specifies move vs copy.
+					eastl::uninitialized_copy(first, last, mItEnd);
+					mItEnd = itNewEnd;
+			#if EASTL_EXCEPTIONS_ENABLED
+				}
+				catch(...)
+				{
+					DoFreeSubarrays(mItEnd.mpCurrentArrayPtr + 1, itNewEnd.mpCurrentArrayPtr + 1);
+					throw;
+				}
+			#endif
+		}
+		else
+		{
+			const difference_type nInsertionIndex = position - mItBegin;
+			const size_type       nSize           = size();
+
+			if(nInsertionIndex < (difference_type)(nSize / 2)) // If the insertion index is in the front half of the deque... grow the deque at the front.
+			{
+				const iterator itNewBegin(DoReallocSubarray(n, kSideFront)); // itNewBegin to mItBegin refers to memory that isn't initialized yet; so it's not truly a valid iterator. Or at least not a dereferencable one.
+				const iterator itOldBegin(mItBegin);
+				const iterator itPosition(mItBegin + nInsertionIndex); // We need to reset this value because the reallocation above can invalidate iterators.
+
+				#if EASTL_EXCEPTIONS_ENABLED
+					try
+					{
+				#endif
+						// We have a problem here: we would like to use move instead of copy, but it may be that the range to be inserted comes from
+						// this container and comes from the segment we need to move. So we can't use move operations unless we are careful to handle
+						// that situation. The newly inserted contents must be contents that were moved to and not moved from. To do: solve this.
+						if(nInsertionIndex >= (difference_type)n) // If the newly inserted items will be entirely within the old area...
+						{
+							iterator itUCopyEnd(mItBegin + (difference_type)n);
+
+							eastl::uninitialized_copy(mItBegin, itUCopyEnd, itNewBegin); // This can throw.
+							itUCopyEnd = eastl::copy(itUCopyEnd, itPosition, itOldBegin); // Recycle 'itUCopyEnd' to mean something else.
+							eastl::copy(first, last, itUCopyEnd);
+						}
+						else // Else the newly inserted items are going within the newly allocated area at the front.
+						{
+							InputIterator mid(first);
+
+							eastl::advance(mid, (difference_type)n - nInsertionIndex);
+							eastl::uninitialized_copy_copy(mItBegin, itPosition, first, mid, itNewBegin); // This can throw.
+							eastl::copy(mid, last, itOldBegin);
+						}
+						mItBegin = itNewBegin;
+				#if EASTL_EXCEPTIONS_ENABLED
+					}
+					catch(...)
+					{
+						DoFreeSubarrays(itNewBegin.mpCurrentArrayPtr, mItBegin.mpCurrentArrayPtr);
+						throw;
+					}
+				#endif
+			}
+			else
+			{
+				const iterator        itNewEnd(DoReallocSubarray(n, kSideBack));
+				const iterator        itOldEnd(mItEnd);
+				const difference_type nPushedCount = (difference_type)nSize - nInsertionIndex;
+				const iterator        itPosition(mItEnd - nPushedCount); // We need to reset this value because the reallocation above can invalidate iterators.
+
+				#if EASTL_EXCEPTIONS_ENABLED
+					try
+					{
+				#endif
+						// We have a problem here: we would like to use move instead of copy, but it may be that the range to be inserted comes from
+						// this container and comes from the segment we need to move. So we can't use move operations unless we are careful to handle
+						// that situation. The newly inserted contents must be contents that were moved to and not moved from. To do: solve this.
+						if(nPushedCount > (difference_type)n)
+						{
+							const iterator itUCopyEnd(mItEnd - (difference_type)n);
+
+							eastl::uninitialized_copy(itUCopyEnd, mItEnd, mItEnd);
+							eastl::copy_backward(itPosition, itUCopyEnd, itOldEnd);
+							eastl::copy(first, last, itPosition);
+						}
+						else
+						{
+							InputIterator mid(first);
+
+							eastl::advance(mid, nPushedCount);
+							eastl::uninitialized_copy_copy(mid, last, itPosition, mItEnd, mItEnd);
+							eastl::copy(first, mid, itPosition);
+						}
+						mItEnd = itNewEnd;
+				#if EASTL_EXCEPTIONS_ENABLED
+					}
+					catch(...)
+					{
+						DoFreeSubarrays(mItEnd.mpCurrentArrayPtr + 1, itNewEnd.mpCurrentArrayPtr + 1);
+						throw;
+					}
+				#endif
+			}
+		}
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	void deque<T, Allocator, kDequeSubarraySize>::DoInsertValues(const_iterator position, size_type n, const value_type& value)
+	{
+		#if EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY(!(validate_iterator(position) & isf_valid)))
+				EASTL_FAIL_MSG("deque::insert -- invalid iterator");
+		#endif
+
+		// This implementation is nearly identical to DoInsertFromIterator above. 
+		// If you make a bug fix to one, you will likely want to fix the other.
+		if(position.mpCurrent == mItBegin.mpCurrent) // If inserting at the beginning...
+		{
+			const iterator itNewBegin(DoReallocSubarray(n, kSideFront));
+
+			#if EASTL_EXCEPTIONS_ENABLED
+				try
+				{
+			#endif
+					// Note that we don't make a temp copy of 'value' here. This is because in a 
+					// deque, insertion at either the front or back doesn't cause a reallocation
+					// or move of data in the middle. That's a key feature of deques, in fact.
+					eastl::uninitialized_fill(itNewBegin, mItBegin, value);
+					mItBegin = itNewBegin;
+			#if EASTL_EXCEPTIONS_ENABLED
+				}
+				catch(...)
+				{
+					DoFreeSubarrays(itNewBegin.mpCurrentArrayPtr, mItBegin.mpCurrentArrayPtr);
+					throw;
+				}
+			#endif
+		}
+		else if(EASTL_UNLIKELY(position.mpCurrent == mItEnd.mpCurrent)) // If inserting at the end (i.e. appending)...
+		{
+			const iterator itNewEnd(DoReallocSubarray(n, kSideBack));
+
+			#if EASTL_EXCEPTIONS_ENABLED
+				try
+				{
+			#endif
+					// Note that we don't make a temp copy of 'value' here. This is because in a 
+					// deque, insertion at either the front or back doesn't cause a reallocation
+					// or move of data in the middle. That's a key feature of deques, in fact.
+					eastl::uninitialized_fill(mItEnd, itNewEnd, value);
+					mItEnd = itNewEnd;
+			#if EASTL_EXCEPTIONS_ENABLED
+				}
+				catch(...)
+				{
+					DoFreeSubarrays(mItEnd.mpCurrentArrayPtr + 1, itNewEnd.mpCurrentArrayPtr + 1);
+					throw;
+				}
+			#endif
+		}
+		else
+		{
+			// A key purpose of a deque is to implement insertions and removals more efficiently 
+			// than with a vector. We are inserting into the middle of the deque here. A quick and 
+			// dirty implementation of this would be to reallocate the subarrays and simply push 
+			// all values in the middle upward like you would do with a vector. Instead we implement
+			// the minimum amount of reallocations needed but may need to do some value moving, 
+			// as the subarray sizes need to remain constant and can have no holes in them.
+			const difference_type nInsertionIndex = position - mItBegin;
+			const size_type       nSize = size();
+			const value_type      valueSaved(value);
+
+			if(nInsertionIndex < (difference_type)(nSize / 2)) // If the insertion index is in the front half of the deque... grow the deque at the front.
+			{
+				const iterator itNewBegin(DoReallocSubarray(n, kSideFront));
+				const iterator itOldBegin(mItBegin);
+				const iterator itPosition(mItBegin + nInsertionIndex); // We need to reset this value because the reallocation above can invalidate iterators.
+
+				#if EASTL_EXCEPTIONS_ENABLED
+					try
+					{
+				#endif
+						if(nInsertionIndex >= (difference_type)n) // If the newly inserted items will be entirely within the old area...
+						{
+							iterator itUCopyEnd(mItBegin + (difference_type)n);
+
+							eastl::uninitialized_move_if_noexcept(mItBegin, itUCopyEnd, itNewBegin); // This can throw.
+							itUCopyEnd = eastl::move(itUCopyEnd, itPosition, itOldBegin); // Recycle 'itUCopyEnd' to mean something else.
+							eastl::fill(itUCopyEnd, itPosition, valueSaved);
+						}
+						else // Else the newly inserted items are going within the newly allocated area at the front.
+						{
+							eastl::uninitialized_move_fill(mItBegin, itPosition, itNewBegin, mItBegin, valueSaved); // This can throw.
+							eastl::fill(itOldBegin, itPosition, valueSaved);
+						}
+						mItBegin = itNewBegin;
+				#if EASTL_EXCEPTIONS_ENABLED
+					}
+					catch(...)
+					{
+						DoFreeSubarrays(itNewBegin.mpCurrentArrayPtr, mItBegin.mpCurrentArrayPtr);
+						throw;
+					}
+				#endif
+			}
+			else // Else the insertion index is in the back half of the deque, so grow the deque at the back.
+			{
+				const iterator        itNewEnd(DoReallocSubarray(n, kSideBack));
+				const iterator        itOldEnd(mItEnd);
+				const difference_type nPushedCount = (difference_type)nSize - nInsertionIndex;
+				const iterator        itPosition(mItEnd - nPushedCount); // We need to reset this value because the reallocation above can invalidate iterators.
+
+				#if EASTL_EXCEPTIONS_ENABLED
+					try
+					{
+				#endif
+						if(nPushedCount > (difference_type)n) // If the newly inserted items will be entirely within the old area...
+						{
+							iterator itUCopyEnd(mItEnd - (difference_type)n);
+
+							eastl::uninitialized_move_if_noexcept(itUCopyEnd, mItEnd, mItEnd); // This can throw.
+							itUCopyEnd = eastl::move_backward(itPosition, itUCopyEnd, itOldEnd); // Recycle 'itUCopyEnd' to mean something else.
+							eastl::fill(itPosition, itUCopyEnd, valueSaved);
+						}
+						else // Else the newly inserted items are going within the newly allocated area at the back.
+						{
+							eastl::uninitialized_fill_move(mItEnd, itPosition + (difference_type)n, valueSaved, itPosition, mItEnd); // This can throw.
+							eastl::fill(itPosition, itOldEnd, valueSaved);
+						}
+						mItEnd = itNewEnd;
+				#if EASTL_EXCEPTIONS_ENABLED
+					}
+					catch(...)
+					{
+						DoFreeSubarrays(mItEnd.mpCurrentArrayPtr + 1, itNewEnd.mpCurrentArrayPtr + 1);
+						throw;
+					}
+				#endif
+			}
+		}
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	inline void deque<T, Allocator, kDequeSubarraySize>::DoSwap(this_type& x)
+	{
+		eastl::swap(mpPtrArray,     x.mpPtrArray);
+		eastl::swap(mnPtrArraySize, x.mnPtrArraySize);
+		eastl::swap(mItBegin,       x.mItBegin);
+		eastl::swap(mItEnd,         x.mItEnd);
+		eastl::swap(mAllocator,     x.mAllocator);  // We do this even if EASTL_ALLOCATOR_COPY_ENABLED is 0.
+
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	inline bool deque<T, Allocator, kDequeSubarraySize>::validate() const
+	{
+		// To do: More detailed validation.
+		// To do: Try to make the validation resistant to crashes if the data is invalid.
+		if((end() - begin()) < 0)
+			return false;
+		return true;
+	}
+
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	inline int deque<T, Allocator, kDequeSubarraySize>::validate_iterator(const_iterator i) const
+	{
+		// To do: We don't currently track isf_current, will need to make it do so.
+		// To do: Fix the validation below, as it will not catch all invalid iterators.
+		if((i - begin()) < 0)
+			return isf_none;
+
+		if((end() - i) < 0)
+			return isf_none;
+
+		if(i == end())
+			return (isf_valid | isf_current);
+
+		return (isf_valid | isf_current | isf_can_dereference);
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// global operators
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	inline bool operator==(const deque<T, Allocator, kDequeSubarraySize>& a, const deque<T, Allocator, kDequeSubarraySize>& b)
+	{
+		return ((a.size() == b.size()) && eastl::equal(a.begin(), a.end(), b.begin()));
+	}
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	inline bool operator!=(const deque<T, Allocator, kDequeSubarraySize>& a, const deque<T, Allocator, kDequeSubarraySize>& b)
+	{
+		return ((a.size() != b.size()) || !eastl::equal(a.begin(), a.end(), b.begin()));
+	}
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	inline bool operator<(const deque<T, Allocator, kDequeSubarraySize>& a, const deque<T, Allocator, kDequeSubarraySize>& b)
+	{
+		return eastl::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end());
+	}
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	inline bool operator>(const deque<T, Allocator, kDequeSubarraySize>& a, const deque<T, Allocator, kDequeSubarraySize>& b)
+	{
+		return b < a;
+	}
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	inline bool operator<=(const deque<T, Allocator, kDequeSubarraySize>& a, const deque<T, Allocator, kDequeSubarraySize>& b)
+	{
+		return !(b < a);
+	}
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	inline bool operator>=(const deque<T, Allocator, kDequeSubarraySize>& a, const deque<T, Allocator, kDequeSubarraySize>& b)
+	{
+		return !(a < b);
+	}
+
+	template <typename T, typename Allocator, unsigned kDequeSubarraySize>
+	inline void swap(deque<T, Allocator, kDequeSubarraySize>& a, deque<T, Allocator, kDequeSubarraySize>& b)
+	{
+		a.swap(b);
+	}
+
+	///////////////////////////////////////////////////////////////////////
+	// erase / erase_if
+	//
+	// https://en.cppreference.com/w/cpp/container/deque/erase2
+	///////////////////////////////////////////////////////////////////////
+	template <class T, class Allocator, class U>
+	void erase(deque<T, Allocator>& c, const U& value)
+	{
+		// Erases all elements that compare equal to value from the container.
+		c.erase(eastl::remove(c.begin(), c.end(), value), c.end());
+	}
+
+	template <class T, class Allocator, class Predicate>
+	void erase_if(deque<T, Allocator>& c, Predicate predicate)
+	{
+		// Erases all elements that satisfy the predicate pred from the container.
+		c.erase(eastl::remove_if(c.begin(), c.end(), predicate), c.end());
+	}
+
+
+} // namespace eastl
+
+
+EA_RESTORE_VC_WARNING();
+#if EASTL_EXCEPTIONS_ENABLED
+	EA_RESTORE_VC_WARNING();
+#endif
+
+
+#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/finally.h b/libkram/eastl/include/EASTL/finally.h
new file mode 100644
index 00000000..b4ed5803
--- /dev/null
+++ b/libkram/eastl/include/EASTL/finally.h
@@ -0,0 +1,93 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// eastl::finally is an implementation of the popular cpp idiom RAII - Resource
+// Acquisition Is Initialization. eastl::finally guarantees that the user
+// provided callable will be executed upon whatever mechanism is used to leave
+// the current scope. This can guard against user errors but this is a popular
+// technique to write robust code in execution environments that have exceptions
+// enabled.
+//
+// Example:
+//     void foo()
+//     {
+//         void* p = malloc(128);
+//         auto _ = eastl::make_finally([&] { free(p); });
+//
+//         // Code that may throw an exception...
+//         
+//     }  // eastl::finally guaranteed to call 'free' at scope exit.
+//
+// References:
+// * https://www.bfilipek.com/2017/04/finalact.html
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_FINALLY_H
+#define EASTL_FINALLY_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+#include <EASTL/internal/config.h>
+#include <EASTL/internal/move_help.h>
+#include <EASTL/type_traits.h>
+
+namespace eastl
+{
+	///////////////////////////////////////////////////////////////////////////
+	// finally
+	//
+	// finally is the type that calls the users callback on scope exit.
+	//
+	template <typename Functor>
+	class finally
+	{
+		static_assert(!eastl::is_lvalue_reference_v<Functor>, "eastl::finally requires the callable is passed as an rvalue reference.");
+
+		Functor m_functor;
+		bool m_engaged = false;
+
+	public:
+		finally(Functor f) : m_functor(eastl::move(f)), m_engaged(true) {}
+
+		finally(finally&& other) : m_functor(eastl::move(other.m_functor)), m_engaged(other.m_engaged)
+		{
+			other.dismiss();
+		}
+
+		~finally() { execute(); }
+
+		finally(const finally&) = delete;
+		finally& operator=(const finally&) = delete;
+		finally& operator=(finally&&) = delete;
+
+		inline void dismiss() { m_engaged = false; }
+
+		inline void execute()
+		{
+			if (m_engaged)
+				m_functor();
+
+			dismiss();
+		}
+	};
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// make_finally
+	//
+	// this utility function is the standard mechansim to perform the required
+	// type deduction on the users provided callback inorder to create a
+	// 'finally' object.
+	//
+	template <typename F>
+	auto make_finally(F&& f)
+	{
+		return finally<F>(eastl::forward<F>(f));
+	}
+}
+
+#endif // EASTL_FINALLY_H
diff --git a/libkram/eastl/include/EASTL/fixed_allocator.h b/libkram/eastl/include/EASTL/fixed_allocator.h
new file mode 100644
index 00000000..488eae4a
--- /dev/null
+++ b/libkram/eastl/include/EASTL/fixed_allocator.h
@@ -0,0 +1,455 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// This file implements the following
+//     fixed_allocator
+//     fixed_allocator_with_overflow
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_FIXED_ALLOCATOR_H
+#define EASTL_FIXED_ALLOCATOR_H
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/internal/fixed_pool.h>
+#include <EASTL/functional.h>
+#include <EASTL/memory.h>
+#include <EASTL/allocator.h>
+#include <EASTL/type_traits.h>
+
+EA_DISABLE_ALL_VC_WARNINGS();
+
+#include <new>
+
+EA_RESTORE_ALL_VC_WARNINGS();
+
+EA_DISABLE_VC_WARNING(4275); // non dll-interface class used as base for DLL-interface classkey 'identifier'
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+
+	///////////////////////////////////////////////////////////////////////////
+	// fixed_allocator
+	///////////////////////////////////////////////////////////////////////////
+
+	/// fixed_allocator
+	///
+	/// Implements an allocator which allocates a single fixed size where 
+	/// the size, alignment, and memory used for the pool is defined at 
+	/// runtime by the user. This is different from fixed containers 
+	/// such as fixed_list whereby the size and alignment are determined
+	/// at compile time and the memory is directly built into the container's
+	/// member data.
+	///
+	/// If the pool's memory is exhausted or was never initialized, the 
+	/// allocate function returns NULL. Consider the fixed_allocator_with_overflow 
+	/// class as an alternative in order to deal with this situation.
+	///
+	/// This class requires the user to call container.get_allocator().init() 
+	/// after constructing the container. There currently isn't a way to 
+	/// construct the container with the initialization parameters, though
+	/// with some effort such a thing could probably be made possible. 
+	/// It's not as simple as it might first seem, due to the non-copyable
+	/// nature of fixed allocators. A side effect of this limitation is that
+	/// you cannot copy-construct a container using fixed_allocators.
+	///
+	/// Another side-effect is that you cannot swap two containers using
+	/// a fixed_allocator, as a swap requires temporary memory allocated by
+	/// an equivalent allocator, and such a thing cannot be done implicitly.
+	/// A workaround for the swap limitation is that you can implement your
+	/// own swap whereby you provide an explicitly created temporary object.
+	///
+	/// Note: Be careful to set the allocator's node size to the size of the 
+	/// container node and not the size of the contained object. Note that the 
+	/// example code below uses IntListNode.
+	///
+	/// Example usage:
+	///     typedef eastl::list<int, fixed_allocator> IntList;
+	///     typedef IntList::node_type                IntListNode;
+	///
+	///     IntListNode buffer[200];
+	///     IntList     intList;
+	///     intList.get_allocator().init(buffer, sizeof(buffer), sizeof(IntListNode), __alignof(IntListNode));
+	///
+	class EASTL_API fixed_allocator : public fixed_pool_base
+	{
+	public:
+		/// fixed_allocator
+		///
+		/// Default constructor. The user usually will need to call init() after  
+		/// constructing via this constructor. 
+		///
+		fixed_allocator(const char* /*pName*/ = EASTL_FIXED_POOL_DEFAULT_NAME)
+			: fixed_pool_base(NULL)
+		{
+		}
+
+
+		/// fixed_allocator
+		///
+		/// Copy constructor. The user usually will need to call init() after  
+		/// constructing via this constructor. By their nature, fixed-allocators
+		/// cannot be copied in any useful way, as by their nature the user
+		/// must manually initialize them.
+		///
+		fixed_allocator(const fixed_allocator&)
+			: fixed_pool_base(NULL)
+		{
+		}
+
+
+		/// operator=
+		///
+		/// By their nature, fixed-allocators cannot be copied in any 
+		/// useful way, as by their nature the user must manually 
+		/// initialize them.
+		///
+		fixed_allocator& operator=(const fixed_allocator&)
+		{
+			return *this;
+		}
+
+
+		// init
+		//
+		// No init here, as the base class version is sufficient.
+		//
+		//void init(void* pMemory, size_t memorySize, size_t nodeSize,
+		//            size_t alignment, size_t alignmentOffset = 0);
+
+
+		/// allocate
+		///
+		/// Allocates a new object of the size specified upon class initialization.
+		/// Returns NULL if there is no more memory. 
+		///
+		void* allocate(size_t n, int /*flags*/ = 0)
+		{
+			// To consider: Verify that 'n' is what the user initialized us with.
+
+			Link* pLink = mpHead;
+
+			if(pLink) // If we have space...
+			{
+				#if EASTL_FIXED_SIZE_TRACKING_ENABLED
+					if(++mnCurrentSize > mnPeakSize)
+						mnPeakSize = mnCurrentSize;
+				#endif
+
+				mpHead = pLink->mpNext;
+				return pLink;
+			}
+			else
+			{
+				// If there's no free node in the free list, just
+				// allocate another from the reserved memory area
+
+				if(mpNext != mpCapacity)
+				{
+					pLink = mpNext;
+					
+					mpNext = reinterpret_cast<Link*>(reinterpret_cast<char*>(mpNext) + n);
+
+					#if EASTL_FIXED_SIZE_TRACKING_ENABLED
+						if(++mnCurrentSize > mnPeakSize)
+							mnPeakSize = mnCurrentSize;
+					#endif
+
+					return pLink;
+				}
+
+				// EASTL_ASSERT(false); To consider: enable this assert. However, we intentionally disable it because this isn't necessarily an assertable error.
+				return NULL;
+			}
+		}
+
+
+		/// allocate
+		///
+		void* allocate(size_t n, size_t /*alignment*/, size_t /*offset*/, int flags = 0)
+		{
+			return allocate(n, flags);
+		}
+
+
+		/// deallocate
+		///
+		/// Frees the given object which was allocated by allocate(). 
+		/// If the given node was not allocated by allocate() then the behaviour 
+		/// is undefined.
+		///
+		void deallocate(void* p, size_t)
+		{
+			#if EASTL_FIXED_SIZE_TRACKING_ENABLED
+				--mnCurrentSize;
+			#endif
+
+			((Link*)p)->mpNext = mpHead;
+			mpHead = ((Link*)p);
+		}
+
+
+		using fixed_pool_base::can_allocate;
+
+
+		const char* get_name() const
+		{
+			return EASTL_FIXED_POOL_DEFAULT_NAME;
+		}
+
+
+		void set_name(const char*)
+		{
+			// Nothing to do. We don't allocate memory.
+		}
+
+	}; // fixed_allocator
+
+	bool operator==(const fixed_allocator& a, const fixed_allocator& b);
+	bool operator!=(const fixed_allocator& a, const fixed_allocator& b);
+
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// fixed_allocator_with_overflow
+	///////////////////////////////////////////////////////////////////////////
+
+	/// fixed_allocator_with_overflow
+	///
+	/// Implements an allocator which allocates a single fixed size where 
+	/// the size, alignment, and memory used for the pool is defined at 
+	/// runtime by the user. This is different from fixed containers 
+	/// such as fixed_list whereby the size and alignment are determined
+	/// at compile time and the memory is directly built into the container's
+	/// member data.
+	///
+	/// Note: Be careful to set the allocator's node size to the size of the 
+	/// container node and not the size of the contained object. Note that the 
+	/// example code below uses IntListNode.
+	///
+	/// This class requires the user to call container.get_allocator().init() 
+	/// after constructing the container. There currently isn't a way to 
+	/// construct the container with the initialization parameters, though
+	/// with some effort such a thing could probably be made possible. 
+	/// It's not as simple as it might first seem, due to the non-copyable
+	/// nature of fixed allocators. A side effect of this limitation is that
+	/// you cannot copy-construct a container using fixed_allocators.
+	///
+	/// Another side-effect is that you cannot swap two containers using
+	/// a fixed_allocator, as a swap requires temporary memory allocated by
+	/// an equivalent allocator, and such a thing cannot be done implicitly.
+	/// A workaround for the swap limitation is that you can implement your
+	/// own swap whereby you provide an explicitly created temporary object.
+	///
+	/// Example usage:
+	///     typedef eastl::list<int, fixed_allocator_with_overflow> IntList;
+	///     typedef IntList::node_type                              IntListNode;
+	///
+	///     IntListNode buffer[200];
+	///     IntList     intList;
+	///     intList.get_allocator().init(buffer, sizeof(buffer), sizeof(IntListNode), __alignof(IntListNode));
+	///
+	class EASTL_API fixed_allocator_with_overflow : public fixed_pool_base
+	{
+	public:
+		/// fixed_allocator_with_overflow
+		///
+		/// Default constructor. The user usually will need to call init() after  
+		/// constructing via this constructor. 
+		///
+		fixed_allocator_with_overflow(const char* pName = EASTL_FIXED_POOL_DEFAULT_NAME)
+			: fixed_pool_base(NULL)
+			, mOverflowAllocator(pName)
+			, mpPoolBegin(nullptr)
+			, mpPoolEnd(nullptr)
+			, mnNodeSize(0)
+		{
+		}
+
+
+		/// fixed_allocator_with_overflow
+		///
+		/// Copy constructor. The user usually will need to call init() after  
+		/// constructing via this constructor. By their nature, fixed-allocators
+		/// cannot be copied in any useful way, as by their nature the user
+		/// must manually initialize them.
+		///
+		fixed_allocator_with_overflow(const fixed_allocator_with_overflow&)
+			: fixed_pool_base(NULL)
+			, mpPoolBegin(nullptr)
+			, mpPoolEnd(nullptr)
+			, mnNodeSize(0)
+		{
+		}
+
+
+		/// operator=
+		///
+		/// By their nature, fixed-allocators cannot be copied in any 
+		/// useful way, as by their nature the user must manually 
+		/// initialize them.
+		///
+		fixed_allocator_with_overflow& operator=(const fixed_allocator_with_overflow& x)
+		{
+			#if EASTL_ALLOCATOR_COPY_ENABLED
+				mOverflowAllocator = x.mOverflowAllocator;
+			#else
+				(void)x;
+			#endif
+
+			return *this;
+		}
+
+
+		/// init
+		///
+		void init(void* pMemory, size_t memorySize, size_t nodeSize,
+					size_t alignment, size_t alignmentOffset = 0)
+		{
+			fixed_pool_base::init(pMemory, memorySize, nodeSize, alignment, alignmentOffset);
+
+			mpPoolBegin = pMemory;
+			mpPoolEnd   = (void*)((uintptr_t)pMemory + memorySize);
+			mnNodeSize  = (eastl_size_t)nodeSize;
+		}
+
+
+		/// allocate
+		///
+		/// Allocates a new object of the size specified upon class initialization.
+		/// Returns NULL if there is no more memory. 
+		///
+		void* allocate(size_t /*n*/, int /*flags*/ = 0)
+		{
+			// To consider: Verify that 'n' is what the user initialized us with.
+
+			void* p;
+
+			if(mpHead) // If we have space...
+			{
+				p      = mpHead;
+				mpHead = mpHead->mpNext;
+			}
+			else
+			{
+				// If there's no free node in the free list, just
+				// allocate another from the reserved memory area
+
+				if (mpNext != mpCapacity)
+				{
+					p = mpNext;
+					mpNext = reinterpret_cast<Link*>(reinterpret_cast<char*>(mpNext) + mnNodeSize);
+				}
+				else
+					p = mOverflowAllocator.allocate(mnNodeSize);
+			}
+
+			#if EASTL_FIXED_SIZE_TRACKING_ENABLED
+				if(p && (++mnCurrentSize > mnPeakSize))
+					mnPeakSize = mnCurrentSize;
+			#endif
+
+			return p;
+		}
+
+
+		/// allocate
+		///
+		void* allocate(size_t n, size_t /*alignment*/, size_t /*offset*/, int flags = 0)
+		{
+			return allocate(n, flags);
+		}
+
+
+		/// deallocate
+		///
+		/// Frees the given object which was allocated by allocate(). 
+		/// If the given node was not allocated by allocate() then the behaviour 
+		/// is undefined.
+		///
+		void deallocate(void* p, size_t)
+		{
+			#if EASTL_FIXED_SIZE_TRACKING_ENABLED
+				--mnCurrentSize;
+			#endif
+
+			if((p >= mpPoolBegin) && (p < mpPoolEnd))
+			{
+				((Link*)p)->mpNext = mpHead;
+				mpHead = ((Link*)p);
+			}
+			else
+				mOverflowAllocator.deallocate(p, (size_t)mnNodeSize);
+		}
+
+
+		using fixed_pool_base::can_allocate;
+
+
+		const char* get_name() const
+		{
+			return mOverflowAllocator.get_name();
+		}
+
+
+		void set_name(const char* pName)
+		{
+			mOverflowAllocator.set_name(pName);
+		}
+
+	protected:
+		EASTLAllocatorType mOverflowAllocator;  // To consider: Allow the user to define the type of this, presumably via a template parameter.
+		void*              mpPoolBegin;         // To consider: We have these member variables and ideally we shouldn't need them. The problem is that 
+		void*              mpPoolEnd;           //              the information about the pool buffer and object size is stored in the owning container 
+		eastl_size_t       mnNodeSize;          //              and we can't have access to it without increasing the amount of code we need and by templating 
+												//              more code. It may turn out that simply storing data here is smaller in the end.
+	}; // fixed_allocator_with_overflow         //              Granted, this class is usually used for debugging purposes, but perhaps there is an elegant solution.
+
+	bool operator==(const fixed_allocator_with_overflow& a, const fixed_allocator_with_overflow& b);
+	bool operator!=(const fixed_allocator_with_overflow& a, const fixed_allocator_with_overflow& b);
+
+
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// global operators
+	///////////////////////////////////////////////////////////////////////
+
+	inline bool operator==(const fixed_allocator&, const fixed_allocator&)
+	{
+		return false;
+	}
+
+	inline bool operator!=(const fixed_allocator&, const fixed_allocator&)
+	{
+		return false;
+	}
+
+	inline bool operator==(const fixed_allocator_with_overflow&, const fixed_allocator_with_overflow&)
+	{
+		return false;
+	}
+
+	inline bool operator!=(const fixed_allocator_with_overflow&, const fixed_allocator_with_overflow&)
+	{
+		return false;
+	}
+
+
+} // namespace eastl
+
+
+EA_RESTORE_VC_WARNING();
+
+#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/fixed_function.h b/libkram/eastl/include/EASTL/fixed_function.h
new file mode 100644
index 00000000..6aed768a
--- /dev/null
+++ b/libkram/eastl/include/EASTL/fixed_function.h
@@ -0,0 +1,218 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_FIXED_FUNCTION_H
+#define EASTL_FIXED_FUNCTION_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+#include <EASTL/internal/function_detail.h>
+
+namespace eastl
+{
+	template <int, typename>
+	class fixed_function;
+
+	namespace internal
+	{
+		template <typename>
+		struct is_fixed_function
+			: public eastl::false_type {};
+
+		template <int SIZE_IN_BYTES, typename R, typename... Args>
+		struct is_fixed_function<eastl::fixed_function<SIZE_IN_BYTES, R(Args...)>>
+			: public eastl::true_type {};
+
+		template<typename T>
+		EA_CONSTEXPR bool is_fixed_function_v = is_fixed_function<T>::value;
+	}
+
+	#define EASTL_INTERNAL_FIXED_FUNCTION_STATIC_ASSERT(TYPE)                    \
+		static_assert(sizeof(TYPE) <= sizeof(typename Base::FunctorStorageType), \
+					  "fixed_function local buffer is not large enough to hold the callable object.")
+
+    #define EASTL_INTERNAL_FIXED_FUNCTION_NEW_SIZE_STATIC_ASSERT(NEW_SIZE_IN_BYTES) \
+		static_assert(SIZE_IN_BYTES >= NEW_SIZE_IN_BYTES,                           \
+					  "fixed_function local buffer is not large enough to hold the new fixed_function type.")
+
+	template <typename Functor>
+	using EASTL_DISABLE_OVERLOAD_IF_FIXED_FUNCTION =
+	    eastl::disable_if_t<internal::is_fixed_function_v<eastl::decay_t<Functor>>>;
+
+
+	// fixed_function
+	//
+	template <int SIZE_IN_BYTES, typename R, typename... Args>
+	class fixed_function<SIZE_IN_BYTES, R(Args...)> : public internal::function_detail<SIZE_IN_BYTES, R(Args...)>
+	{
+		using Base = internal::function_detail<SIZE_IN_BYTES, R(Args...)>;
+
+	public:
+		using typename Base::result_type;
+
+		fixed_function() EA_NOEXCEPT = default;
+		fixed_function(std::nullptr_t p) EA_NOEXCEPT
+			: Base(p)
+		{
+		}
+
+		fixed_function(const fixed_function& other)
+			: Base(other)
+		{
+		}
+
+		fixed_function(fixed_function&& other)
+			: Base(eastl::move(other))
+		{
+		}
+
+		template <typename Functor,
+		          typename = EASTL_INTERNAL_FUNCTION_VALID_FUNCTION_ARGS(Functor, R, Args..., Base, fixed_function),
+		          typename = EASTL_DISABLE_OVERLOAD_IF_FIXED_FUNCTION<Functor>>
+		fixed_function(Functor functor)
+		    : Base(eastl::move(functor))
+		{
+			EASTL_INTERNAL_FIXED_FUNCTION_STATIC_ASSERT(Functor);
+		}
+
+		template<int NEW_SIZE_IN_BYTES>
+		fixed_function(const fixed_function<NEW_SIZE_IN_BYTES, R(Args...)>& other)
+			: Base(other)
+		{
+			EASTL_INTERNAL_FIXED_FUNCTION_NEW_SIZE_STATIC_ASSERT(NEW_SIZE_IN_BYTES);
+		}
+
+		template<int NEW_SIZE_IN_BYTES>
+		fixed_function(fixed_function<NEW_SIZE_IN_BYTES, R(Args...)>&& other)
+			: Base(eastl::move(other))
+		{
+			EASTL_INTERNAL_FIXED_FUNCTION_NEW_SIZE_STATIC_ASSERT(NEW_SIZE_IN_BYTES);
+		}
+
+		~fixed_function() EA_NOEXCEPT = default;
+
+		fixed_function& operator=(const fixed_function& other)
+		{
+			Base::operator=(other);
+			return *this;
+		}
+
+		fixed_function& operator=(fixed_function&& other)
+		{
+			Base::operator=(eastl::move(other));
+			return *this;
+		}
+
+		fixed_function& operator=(std::nullptr_t p) EA_NOEXCEPT
+		{
+			Base::operator=(p);
+			return *this;
+		}
+
+		template<int NEW_SIZE_IN_BYTES>
+		fixed_function& operator=(const fixed_function<NEW_SIZE_IN_BYTES, R(Args...)>& other)
+		{
+			EASTL_INTERNAL_FIXED_FUNCTION_NEW_SIZE_STATIC_ASSERT(NEW_SIZE_IN_BYTES);
+
+			Base::operator=(other);
+			return *this;
+		}
+
+		template<int NEW_SIZE_IN_BYTES>
+		fixed_function& operator=(fixed_function<NEW_SIZE_IN_BYTES, R(Args...)>&& other)
+		{
+			EASTL_INTERNAL_FIXED_FUNCTION_NEW_SIZE_STATIC_ASSERT(NEW_SIZE_IN_BYTES);
+
+			Base::operator=(eastl::move(other));
+			return *this;
+		}
+
+		template <typename Functor,
+		          typename = EASTL_INTERNAL_FUNCTION_VALID_FUNCTION_ARGS(Functor, R, Args..., Base, fixed_function),
+		          typename = EASTL_DISABLE_OVERLOAD_IF_FIXED_FUNCTION<Functor>>
+		fixed_function& operator=(Functor&& functor)
+		{
+			EASTL_INTERNAL_FIXED_FUNCTION_STATIC_ASSERT(eastl::decay_t<Functor>);
+			Base::operator=(eastl::forward<Functor>(functor));
+			return *this;
+		}
+
+		template <typename Functor>
+		fixed_function& operator=(eastl::reference_wrapper<Functor> f) EA_NOEXCEPT
+		{
+			EASTL_INTERNAL_FIXED_FUNCTION_STATIC_ASSERT(eastl::reference_wrapper<Functor>);
+			Base::operator=(f);
+			return *this;
+		}
+
+		void swap(fixed_function& other) EA_NOEXCEPT
+		{
+			Base::swap(other);
+		}
+
+		explicit operator bool() const EA_NOEXCEPT
+		{
+			return Base::operator bool();
+		}
+
+		R operator ()(Args... args) const
+		{
+			return Base::operator ()(eastl::forward<Args>(args)...);
+		}
+
+	#if EASTL_RTTI_ENABLED
+		const std::type_info& target_type() const EA_NOEXCEPT
+		{
+			return Base::target_type();
+		}
+
+		template <typename Functor>
+		Functor* target() EA_NOEXCEPT
+		{
+			return Base::target();
+		}
+
+		template <typename Functor>
+		const Functor* target() const EA_NOEXCEPT
+		{
+			return Base::target();
+		}
+	#endif
+	};
+
+	template <int S, typename R, typename... Args>
+	bool operator==(const fixed_function<S, R(Args...)>& f, std::nullptr_t) EA_NOEXCEPT
+	{
+		return !f;
+	}
+
+	template <int S, typename R, typename... Args>
+	bool operator==(std::nullptr_t, const fixed_function<S, R(Args...)>& f) EA_NOEXCEPT
+	{
+		return !f;
+	}
+
+	template <int S, typename R, typename... Args>
+	bool operator!=(const fixed_function<S, R(Args...)>& f, std::nullptr_t) EA_NOEXCEPT
+	{
+		return !!f;
+	}
+
+	template <int S, typename R, typename... Args>
+	bool operator!=(std::nullptr_t, const fixed_function<S, R(Args...)>& f) EA_NOEXCEPT
+	{
+		return !!f;
+	}
+
+	template <int S, typename R, typename... Args>
+	void swap(fixed_function<S, R(Args...)>& lhs, fixed_function<S, R(Args...)>& rhs)
+	{
+		lhs.swap(rhs);
+	}
+
+} // namespace eastl
+
+#endif // EASTL_FIXED_FUNCTION_H
diff --git a/libkram/eastl/include/EASTL/fixed_hash_map.h b/libkram/eastl/include/EASTL/fixed_hash_map.h
new file mode 100644
index 00000000..af6663dd
--- /dev/null
+++ b/libkram/eastl/include/EASTL/fixed_hash_map.h
@@ -0,0 +1,822 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// This file implements a hash_map and hash_multimap which use a fixed size 
+// memory pool for its buckets and nodes. 
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_FIXED_HASH_MAP_H
+#define EASTL_FIXED_HASH_MAP_H
+
+
+#include <EASTL/hash_map.h>
+#include <EASTL/internal/fixed_pool.h>
+
+EA_DISABLE_VC_WARNING(4127) // Conditional expression is constant
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+namespace eastl
+{
+	/// EASTL_FIXED_HASH_MAP_DEFAULT_NAME
+	///
+	/// Defines a default container name in the absence of a user-provided name.
+	/// In the case of fixed-size containers, the allocator name always refers
+	/// to overflow allocations. 
+	///
+	#ifndef EASTL_FIXED_HASH_MAP_DEFAULT_NAME
+		#define EASTL_FIXED_HASH_MAP_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " fixed_hash_map" // Unless the user overrides something, this is "EASTL fixed_hash_map".
+	#endif
+
+	#ifndef EASTL_FIXED_HASH_MULTIMAP_DEFAULT_NAME
+		#define EASTL_FIXED_HASH_MULTIMAP_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " fixed_hash_multimap" // Unless the user overrides something, this is "EASTL fixed_hash_multimap".
+	#endif
+
+
+	/// EASTL_FIXED_HASH_MAP_DEFAULT_ALLOCATOR
+	/// EASTL_FIXED_HASH_MULTIMAP_DEFAULT_ALLOCATOR
+	///
+	#ifndef EASTL_FIXED_HASH_MAP_DEFAULT_ALLOCATOR
+		#define EASTL_FIXED_HASH_MAP_DEFAULT_ALLOCATOR overflow_allocator_type(EASTL_FIXED_HASH_MAP_DEFAULT_NAME)
+	#endif
+
+	#ifndef EASTL_FIXED_HASH_MULTIMAP_DEFAULT_ALLOCATOR
+		#define EASTL_FIXED_HASH_MULTIMAP_DEFAULT_ALLOCATOR overflow_allocator_type(EASTL_FIXED_HASH_MULTIMAP_DEFAULT_NAME)
+	#endif
+
+
+
+	/// fixed_hash_map
+	///
+	/// Implements a hash_map with a fixed block of memory identified by the nodeCount and bucketCount
+	/// template parameters. 
+	///
+	/// Template parameters:
+	///     Key                    The key type for the map. This is a map of Key to T (value).
+	///     T                      The value type for the map.
+	///     nodeCount              The max number of objects to contain. This value must be >= 1.
+	///     bucketCount            The number of buckets to use. This value must be >= 2.
+	///     bEnableOverflow        Whether or not we should use the global heap if our object pool is exhausted.
+	///     Hash                   hash_set hash function. See hash_set.
+	///     Predicate              hash_set equality testing function. See hash_set.
+	///
+	template <typename Key, typename T, size_t nodeCount, size_t bucketCount = nodeCount + 1, bool bEnableOverflow = true,
+			  typename Hash = eastl::hash<Key>, typename Predicate = eastl::equal_to<Key>, bool bCacheHashCode = false, typename OverflowAllocator = EASTLAllocatorType>
+	class fixed_hash_map : public hash_map<Key, 
+										   T,
+										   Hash,
+										   Predicate,
+										   fixed_hashtable_allocator<
+												bucketCount + 1,
+												sizeof(typename hash_map<Key, T, Hash, Predicate, OverflowAllocator, bCacheHashCode>::node_type), 
+												nodeCount,
+												EASTL_ALIGN_OF(eastl::pair<Key, T>), 
+												0, 
+												bEnableOverflow,
+												OverflowAllocator>, 
+										   bCacheHashCode>
+	{
+	public:
+		typedef fixed_hashtable_allocator<bucketCount + 1, sizeof(typename hash_map<Key, T, Hash, Predicate, 
+						OverflowAllocator, bCacheHashCode>::node_type), nodeCount, EASTL_ALIGN_OF(eastl::pair<Key, T>), 0,
+						bEnableOverflow, OverflowAllocator>                                                                         fixed_allocator_type;
+		typedef typename fixed_allocator_type::overflow_allocator_type                                                              overflow_allocator_type;
+		typedef hash_map<Key, T, Hash, Predicate, fixed_allocator_type, bCacheHashCode>                                             base_type;
+		typedef fixed_hash_map<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator> this_type;
+		typedef typename base_type::value_type                                                                                      value_type;
+		typedef typename base_type::node_type                                                                                       node_type;
+		typedef typename base_type::size_type                                                                                       size_type;
+
+		enum { kMaxSize = nodeCount };
+
+		using base_type::mAllocator;
+		using base_type::clear;
+
+	protected:
+		node_type** mBucketBuffer[bucketCount + 1]; // '+1' because the hash table needs a null terminating bucket.
+		char        mNodeBuffer[fixed_allocator_type::kBufferSize]; // kBufferSize will take into account alignment requirements.
+
+	public:
+		explicit fixed_hash_map(const overflow_allocator_type& overflowAllocator);
+
+		explicit fixed_hash_map(const Hash& hashFunction = Hash(), 
+								const Predicate& predicate = Predicate());
+
+		fixed_hash_map(const Hash& hashFunction, 
+					   const Predicate& predicate,
+					   const overflow_allocator_type& overflowAllocator);
+
+		template <typename InputIterator>
+		fixed_hash_map(InputIterator first, InputIterator last, 
+						const Hash& hashFunction = Hash(), 
+						const Predicate& predicate = Predicate());
+
+		fixed_hash_map(const this_type& x);
+		fixed_hash_map(this_type&& x);
+		fixed_hash_map(this_type&& x, const overflow_allocator_type& overflowAllocator);
+		fixed_hash_map(std::initializer_list<value_type> ilist, const overflow_allocator_type& overflowAllocator = EASTL_FIXED_HASH_MAP_DEFAULT_ALLOCATOR);
+
+		this_type& operator=(const this_type& x);
+		this_type& operator=(std::initializer_list<value_type> ilist);
+		this_type& operator=(this_type&& x);
+
+		void swap(this_type& x);
+
+		void reset_lose_memory(); // This is a unilateral reset to an initially empty state. No destructors are called, no deallocation occurs.
+
+		size_type max_size() const;
+
+		const overflow_allocator_type& get_overflow_allocator() const EA_NOEXCEPT;
+		overflow_allocator_type&       get_overflow_allocator() EA_NOEXCEPT;
+		void                           set_overflow_allocator(const overflow_allocator_type& allocator);
+
+		void clear(bool clearBuckets); 
+	}; // fixed_hash_map
+
+
+
+
+
+	/// fixed_hash_multimap
+	///
+	/// Implements a hash_multimap with a fixed block of memory identified by the nodeCount and bucketCount
+	/// template parameters. 
+	///
+	/// Template parameters:
+	///     Key                    The key type for the map. This is a map of Key to T (value).
+	///     T                      The value type for the map.
+	///     nodeCount              The max number of objects to contain. This value must be >= 1.
+	///     bucketCount            The number of buckets to use. This value must be >= 2.
+	///     bEnableOverflow        Whether or not we should use the global heap if our object pool is exhausted.
+	///     Hash                   hash_set hash function. See hash_set.
+	///     Predicate              hash_set equality testing function. See hash_set.
+	///
+	template <typename Key, typename T, size_t nodeCount, size_t bucketCount = nodeCount + 1, bool bEnableOverflow = true,
+			  typename Hash = eastl::hash<Key>, typename Predicate = eastl::equal_to<Key>, bool bCacheHashCode = false, typename OverflowAllocator = EASTLAllocatorType>
+	class fixed_hash_multimap : public hash_multimap<Key,
+													 T,
+													 Hash,
+													 Predicate,
+													 fixed_hashtable_allocator<
+														bucketCount + 1, 
+														sizeof(typename hash_multimap<Key, T, Hash, Predicate, OverflowAllocator, bCacheHashCode>::node_type), 
+														nodeCount,
+														EASTL_ALIGN_OF(eastl::pair<Key, T>),
+														0, 
+														bEnableOverflow,
+														OverflowAllocator>, 
+													 bCacheHashCode>
+	{
+	public:
+		typedef fixed_hashtable_allocator<bucketCount + 1, sizeof(typename hash_multimap<Key, T, Hash, Predicate, 
+						OverflowAllocator, bCacheHashCode>::node_type), nodeCount, EASTL_ALIGN_OF(eastl::pair<Key, T>), 0, 
+						bEnableOverflow, OverflowAllocator>                                                                              fixed_allocator_type;
+		typedef typename fixed_allocator_type::overflow_allocator_type                                                                   overflow_allocator_type;
+		typedef hash_multimap<Key, T, Hash, Predicate, fixed_allocator_type, bCacheHashCode>                                             base_type;
+		typedef fixed_hash_multimap<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator> this_type;
+		typedef typename base_type::value_type                                                                                           value_type;
+		typedef typename base_type::node_type                                                                                            node_type;
+		typedef typename base_type::size_type                                                                                            size_type;
+
+		enum { kMaxSize = nodeCount };
+
+		using base_type::mAllocator;
+		using base_type::clear;
+
+	protected:
+		node_type** mBucketBuffer[bucketCount + 1]; // '+1' because the hash table needs a null terminating bucket.
+		char        mNodeBuffer[fixed_allocator_type::kBufferSize]; // kBufferSize will take into account alignment requirements.
+
+	public:
+		explicit fixed_hash_multimap(const overflow_allocator_type& overflowAllocator);
+
+		explicit fixed_hash_multimap(const Hash& hashFunction = Hash(), 
+										const Predicate& predicate = Predicate());
+
+		fixed_hash_multimap(const Hash& hashFunction,
+							const Predicate& predicate,
+							const overflow_allocator_type& overflowAllocator);
+
+		template <typename InputIterator>
+		fixed_hash_multimap(InputIterator first, InputIterator last, 
+						const Hash& hashFunction = Hash(), 
+						const Predicate& predicate = Predicate());
+
+		fixed_hash_multimap(const this_type& x);
+		fixed_hash_multimap(this_type&& x);
+		fixed_hash_multimap(this_type&& x, const overflow_allocator_type& overflowAllocator);
+		fixed_hash_multimap(std::initializer_list<value_type> ilist, const overflow_allocator_type& overflowAllocator = EASTL_FIXED_HASH_MULTIMAP_DEFAULT_ALLOCATOR);
+
+		this_type& operator=(const this_type& x);
+		this_type& operator=(std::initializer_list<value_type> ilist);
+		this_type& operator=(this_type&& x);
+
+		void swap(this_type& x);
+
+		void reset_lose_memory(); // This is a unilateral reset to an initially empty state. No destructors are called, no deallocation occurs.
+
+		size_type max_size() const;
+
+		const overflow_allocator_type& get_overflow_allocator() const EA_NOEXCEPT;
+		overflow_allocator_type&       get_overflow_allocator() EA_NOEXCEPT;
+		void                           set_overflow_allocator(const overflow_allocator_type& allocator);
+
+		void clear(bool clearBuckets); 
+	}; // fixed_hash_multimap
+
+
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// fixed_hash_map
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename Key, typename T, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline fixed_hash_map<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::
+	fixed_hash_map(const overflow_allocator_type& overflowAllocator)
+		: base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), Hash(), 
+					Predicate(), fixed_allocator_type(NULL, mBucketBuffer, overflowAllocator))
+	{
+		EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2));
+
+		if(!bEnableOverflow)
+			base_type::set_max_load_factor(10000.f); // Set it so that we will never resize.
+
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(EASTL_FIXED_HASH_MAP_DEFAULT_NAME);
+		#endif
+
+		mAllocator.reset(mNodeBuffer);
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline fixed_hash_map<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::
+	fixed_hash_map(const Hash& hashFunction, 
+				   const Predicate& predicate)
+		: base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), hashFunction, 
+					predicate, fixed_allocator_type(NULL, mBucketBuffer))
+	{
+		EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2));
+
+		if(!bEnableOverflow)
+			base_type::set_max_load_factor(10000.f); // Set it so that we will never resize.
+
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(EASTL_FIXED_HASH_MAP_DEFAULT_NAME);
+		#endif
+
+		mAllocator.reset(mNodeBuffer);
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline fixed_hash_map<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::
+	fixed_hash_map(const Hash& hashFunction, 
+				   const Predicate& predicate,
+				   const overflow_allocator_type& overflowAllocator)
+		: base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), hashFunction, 
+					predicate, fixed_allocator_type(NULL, mBucketBuffer, overflowAllocator))
+	{
+		EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2));
+
+		if(!bEnableOverflow)
+			base_type::set_max_load_factor(10000.f); // Set it so that we will never resize.
+
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(EASTL_FIXED_HASH_MAP_DEFAULT_NAME);
+		#endif
+
+		mAllocator.reset(mNodeBuffer);
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	template <typename InputIterator>
+	fixed_hash_map<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::
+	fixed_hash_map(InputIterator first, InputIterator last, 
+					const Hash& hashFunction, 
+					const Predicate& predicate)
+		: base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), hashFunction, 
+					predicate, fixed_allocator_type(NULL, mBucketBuffer))
+	{
+		EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2));
+
+		if(!bEnableOverflow)
+			base_type::set_max_load_factor(10000.f); // Set it so that we will never resize.
+
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(EASTL_FIXED_HASH_MAP_DEFAULT_NAME);
+		#endif
+
+		mAllocator.reset(mNodeBuffer);
+		base_type::insert(first, last);
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline fixed_hash_map<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::
+	fixed_hash_map(const this_type& x)
+		: base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), x.hash_function(), 
+					x.equal_function(), fixed_allocator_type(NULL, mBucketBuffer))
+	{
+		mAllocator.copy_overflow_allocator(x.mAllocator);
+
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(x.mAllocator.get_name());
+		#endif
+
+		EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2));
+
+		if(!bEnableOverflow)
+			base_type::set_max_load_factor(10000.f); // Set it so that we will never resize.
+
+		mAllocator.reset(mNodeBuffer);
+		base_type::insert(x.begin(), x.end());
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline fixed_hash_map<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::
+	fixed_hash_map(this_type&& x)
+		: base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), x.hash_function(), 
+					x.equal_function(), fixed_allocator_type(NULL, mBucketBuffer))
+	{
+		// This implementation is the same as above. If we could rely on using C++11 delegating constructor support then we could just call that here.
+		mAllocator.copy_overflow_allocator(x.mAllocator);
+
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(x.mAllocator.get_name());
+		#endif
+
+		EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2));
+
+		if(!bEnableOverflow)
+			base_type::set_max_load_factor(10000.f); // Set it so that we will never resize.
+
+		mAllocator.reset(mNodeBuffer);
+		base_type::insert(x.begin(), x.end());
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline fixed_hash_map<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::
+	fixed_hash_map(this_type&& x, const overflow_allocator_type& overflowAllocator)
+		: base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), x.hash_function(), 
+					x.equal_function(), fixed_allocator_type(NULL, mBucketBuffer, overflowAllocator))
+	{
+		// This implementation is the same as above. If we could rely on using C++11 delegating constructor support then we could just call that here.
+		mAllocator.copy_overflow_allocator(x.mAllocator);
+
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(x.mAllocator.get_name());
+		#endif
+
+		EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2));
+
+		if(!bEnableOverflow)
+			base_type::set_max_load_factor(10000.f); // Set it so that we will never resize.
+
+		mAllocator.reset(mNodeBuffer);
+		base_type::insert(x.begin(), x.end());
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline fixed_hash_map<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::
+	fixed_hash_map(std::initializer_list<value_type> ilist, const overflow_allocator_type& overflowAllocator)
+		: base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), Hash(), 
+					Predicate(), fixed_allocator_type(NULL, mBucketBuffer, overflowAllocator))
+	{
+		EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2));
+
+		if(!bEnableOverflow)
+			base_type::set_max_load_factor(10000.f); // Set it so that we will never resize.
+
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(EASTL_FIXED_HASH_MAP_DEFAULT_NAME);
+		#endif
+
+		mAllocator.reset(mNodeBuffer);
+		base_type::insert(ilist.begin(), ilist.end());
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline typename fixed_hash_map<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::this_type& 
+	fixed_hash_map<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::operator=(const this_type& x)
+	{
+		base_type::operator=(x);
+		return *this;
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline typename fixed_hash_map<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::this_type& 
+	fixed_hash_map<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::operator=(this_type&& x)
+	{
+		base_type::operator=(x);
+		return *this;
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline typename fixed_hash_map<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::this_type& 
+	fixed_hash_map<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::operator=(std::initializer_list<value_type> ilist)
+	{
+		base_type::clear();
+		base_type::insert(ilist.begin(), ilist.end());
+		return *this;
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline void fixed_hash_map<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::
+	swap(this_type& x)
+	{
+		// Fixed containers use a special swap that can deal with excessively large buffers.
+		eastl::fixed_swap(*this, x);
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline void fixed_hash_map<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::
+	reset_lose_memory()
+	{
+		base_type::mnBucketCount = (size_type)base_type::mRehashPolicy.GetPrevBucketCount((uint32_t)bucketCount);
+		base_type::mnElementCount = 0;
+		base_type::mRehashPolicy.mnNextResize = 0;
+		base_type::get_allocator().reset(mNodeBuffer);
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline typename fixed_hash_map<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::size_type 
+	fixed_hash_map<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::max_size() const
+	{
+		return kMaxSize;
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline const typename fixed_hash_map<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::overflow_allocator_type& 
+	fixed_hash_map<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::get_overflow_allocator() const EA_NOEXCEPT
+	{
+		return mAllocator.get_overflow_allocator();
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline typename fixed_hash_map<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::overflow_allocator_type& 
+	fixed_hash_map<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::get_overflow_allocator() EA_NOEXCEPT
+	{
+		return mAllocator.get_overflow_allocator();
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline void fixed_hash_map<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::
+	set_overflow_allocator(const overflow_allocator_type& allocator)
+	{
+		mAllocator.set_overflow_allocator(allocator);
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline void fixed_hash_map<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::
+	clear(bool clearBuckets)
+	{
+		base_type::DoFreeNodes(base_type::mpBucketArray, base_type::mnBucketCount);
+		if(clearBuckets)
+		{
+			base_type::DoFreeBuckets(base_type::mpBucketArray, base_type::mnBucketCount);
+			reset_lose_memory();
+		}
+		base_type::mpBucketArray = (node_type**)mBucketBuffer;
+		base_type::mnElementCount = 0;
+	}
+
+
+	///////////////////////////////////////////////////////////////////////
+	// global operators
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename Key, typename T, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode>
+	inline void swap(fixed_hash_map<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode>& a, 
+					 fixed_hash_map<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode>& b)
+	{
+		// Fixed containers use a special swap that can deal with excessively large buffers.
+		eastl::fixed_swap(a, b);
+	}
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// fixed_hash_multimap
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename Key, typename T, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline fixed_hash_multimap<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::
+	fixed_hash_multimap(const overflow_allocator_type& overflowAllocator)
+		: base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), Hash(), 
+					Predicate(), fixed_allocator_type(NULL, mBucketBuffer, overflowAllocator))
+	{
+		EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2));
+
+		if(!bEnableOverflow)
+			base_type::set_max_load_factor(10000.f); // Set it so that we will never resize.
+
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(EASTL_FIXED_HASH_MULTIMAP_DEFAULT_NAME);
+		#endif
+
+		mAllocator.reset(mNodeBuffer);
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline fixed_hash_multimap<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::
+	fixed_hash_multimap(const Hash& hashFunction, 
+						const Predicate& predicate)
+		: base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), hashFunction, 
+					predicate, fixed_allocator_type(NULL, mBucketBuffer))
+	{
+		EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2));
+
+		if(!bEnableOverflow)
+			base_type::set_max_load_factor(10000.f); // Set it so that we will never resize.
+
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(EASTL_FIXED_HASH_MULTIMAP_DEFAULT_NAME);
+		#endif
+
+		mAllocator.reset(mNodeBuffer);
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline fixed_hash_multimap<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::
+	fixed_hash_multimap(const Hash& hashFunction,
+						const Predicate& predicate,
+						const overflow_allocator_type& overflowAllocator)
+		: base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), hashFunction, 
+					predicate, fixed_allocator_type(NULL, mBucketBuffer, overflowAllocator))
+	{
+		EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2));
+
+		if(!bEnableOverflow)
+			base_type::set_max_load_factor(10000.f); // Set it so that we will never resize.
+
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(EASTL_FIXED_HASH_MULTIMAP_DEFAULT_NAME);
+		#endif
+
+		mAllocator.reset(mNodeBuffer);
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	template <typename InputIterator>
+	fixed_hash_multimap<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::
+	fixed_hash_multimap(InputIterator first, InputIterator last, 
+						const Hash& hashFunction, 
+						const Predicate& predicate)
+		: base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), hashFunction, 
+					predicate, fixed_allocator_type(NULL, mBucketBuffer))
+	{
+		EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2));
+
+		if(!bEnableOverflow)
+			base_type::set_max_load_factor(10000.f); // Set it so that we will never resize.
+
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(EASTL_FIXED_HASH_MULTIMAP_DEFAULT_NAME);
+		#endif
+
+		mAllocator.reset(mNodeBuffer);
+		base_type::insert(first, last);
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline fixed_hash_multimap<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::
+	fixed_hash_multimap(const this_type& x)
+		: base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), x.hash_function(), 
+					x.equal_function(),fixed_allocator_type(NULL, mBucketBuffer))
+	{
+		mAllocator.copy_overflow_allocator(x.mAllocator);
+
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(x.mAllocator.get_name());
+		#endif
+
+		EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2));
+
+		if(!bEnableOverflow)
+			base_type::set_max_load_factor(10000.f); // Set it so that we will never resize.
+
+		mAllocator.reset(mNodeBuffer);
+		base_type::insert(x.begin(), x.end());
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline fixed_hash_multimap<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::
+	fixed_hash_multimap(this_type&& x)
+		: base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), x.hash_function(), 
+					x.equal_function(),fixed_allocator_type(NULL, mBucketBuffer))
+	{
+		// This implementation is the same as above. If we could rely on using C++11 delegating constructor support then we could just call that here.
+		mAllocator.copy_overflow_allocator(x.mAllocator);
+
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(x.mAllocator.get_name());
+		#endif
+
+		EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2));
+
+		if(!bEnableOverflow)
+			base_type::set_max_load_factor(10000.f); // Set it so that we will never resize.
+
+		mAllocator.reset(mNodeBuffer);
+		base_type::insert(x.begin(), x.end());
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline fixed_hash_multimap<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::
+	fixed_hash_multimap(this_type&& x, const overflow_allocator_type& overflowAllocator)
+		: base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), x.hash_function(), 
+					x.equal_function(), fixed_allocator_type(NULL, mBucketBuffer, overflowAllocator))
+	{
+		// This implementation is the same as above. If we could rely on using C++11 delegating constructor support then we could just call that here.
+		mAllocator.copy_overflow_allocator(x.mAllocator);
+
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(x.mAllocator.get_name());
+		#endif
+
+		EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2));
+
+		if(!bEnableOverflow)
+			base_type::set_max_load_factor(10000.f); // Set it so that we will never resize.
+
+		mAllocator.reset(mNodeBuffer);
+		base_type::insert(x.begin(), x.end());
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline fixed_hash_multimap<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::
+	fixed_hash_multimap(std::initializer_list<value_type> ilist, const overflow_allocator_type& overflowAllocator)
+		: base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), Hash(), 
+					Predicate(), fixed_allocator_type(NULL, mBucketBuffer, overflowAllocator))
+	{
+		EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2));
+
+		if(!bEnableOverflow)
+			base_type::set_max_load_factor(10000.f); // Set it so that we will never resize.
+
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(EASTL_FIXED_HASH_MULTIMAP_DEFAULT_NAME);
+		#endif
+
+		mAllocator.reset(mNodeBuffer);
+		base_type::insert(ilist.begin(), ilist.end());
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline typename fixed_hash_multimap<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::this_type& 
+	fixed_hash_multimap<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::operator=(const this_type& x)
+	{
+		base_type::operator=(x);
+		return *this;
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline typename fixed_hash_multimap<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::this_type& 
+	fixed_hash_multimap<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::operator=(this_type&& x)
+	{
+		base_type::operator=(x);
+		return *this;
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline typename fixed_hash_multimap<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::this_type& 
+	fixed_hash_multimap<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::operator=(std::initializer_list<value_type> ilist)
+	{
+		base_type::clear();
+		base_type::insert(ilist.begin(), ilist.end());
+		return *this;
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline void fixed_hash_multimap<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::
+	swap(this_type& x)
+	{
+		// Fixed containers use a special swap that can deal with excessively large buffers.
+		eastl::fixed_swap(*this, x);
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline void fixed_hash_multimap<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::
+	reset_lose_memory()
+	{
+		base_type::mnBucketCount = (size_type)base_type::mRehashPolicy.GetPrevBucketCount((uint32_t)bucketCount);
+		base_type::mnElementCount = 0;
+		base_type::mRehashPolicy.mnNextResize = 0;
+		base_type::get_allocator().reset(mNodeBuffer);
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline typename fixed_hash_multimap<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::size_type
+	fixed_hash_multimap<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::max_size() const
+	{
+		return kMaxSize;
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline const typename fixed_hash_multimap<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::overflow_allocator_type& 
+	fixed_hash_multimap<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::get_overflow_allocator() const EA_NOEXCEPT
+	{
+		return mAllocator.get_overflow_allocator();
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline typename fixed_hash_multimap<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::overflow_allocator_type& 
+	fixed_hash_multimap<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::get_overflow_allocator() EA_NOEXCEPT
+	{
+		return mAllocator.get_overflow_allocator();
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline void fixed_hash_multimap<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::set_overflow_allocator(const overflow_allocator_type& allocator)
+	{
+		mAllocator.set_overflow_allocator(allocator);
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline void fixed_hash_multimap<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::
+	clear(bool clearBuckets)
+	{
+		base_type::DoFreeNodes(base_type::mpBucketArray, base_type::mnBucketCount);
+		if(clearBuckets)
+		{
+			base_type::DoFreeBuckets(base_type::mpBucketArray, base_type::mnBucketCount);
+			reset_lose_memory();
+		}
+		base_type::mpBucketArray = (node_type**)mBucketBuffer;
+		base_type::mnElementCount = 0;
+	}
+
+
+	///////////////////////////////////////////////////////////////////////
+	// global operators
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename Key, typename T, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode>
+	inline void swap(fixed_hash_multimap<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode>& a, 
+					 fixed_hash_multimap<Key, T, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode>& b)
+	{
+		// Fixed containers use a special swap that can deal with excessively large buffers.
+		eastl::fixed_swap(a, b);
+	}
+
+
+
+} // namespace eastl
+
+EA_RESTORE_VC_WARNING()
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/fixed_hash_set.h b/libkram/eastl/include/EASTL/fixed_hash_set.h
new file mode 100644
index 00000000..0db9f49f
--- /dev/null
+++ b/libkram/eastl/include/EASTL/fixed_hash_set.h
@@ -0,0 +1,782 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// This file implements a hash_set which uses a fixed size memory pool for 
+// its buckets and nodes. 
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_FIXED_HASH_SET_H
+#define EASTL_FIXED_HASH_SET_H
+
+
+#include <EASTL/hash_set.h>
+#include <EASTL/internal/fixed_pool.h>
+
+EA_DISABLE_VC_WARNING(4127) // Conditional expression is constant
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+	/// EASTL_FIXED_HASH_SET_DEFAULT_NAME
+	///
+	/// Defines a default container name in the absence of a user-provided name.
+	/// In the case of fixed-size containers, the allocator name always refers
+	/// to overflow allocations. 
+	///
+	#ifndef EASTL_FIXED_HASH_SET_DEFAULT_NAME
+		#define EASTL_FIXED_HASH_SET_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " fixed_hash_set" // Unless the user overrides something, this is "EASTL fixed_hash_set".
+	#endif
+
+	#ifndef EASTL_FIXED_HASH_MULTISET_DEFAULT_NAME
+		#define EASTL_FIXED_HASH_MULTISET_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " fixed_hash_multiset" // Unless the user overrides something, this is "EASTL fixed_hash_multiset".
+	#endif
+
+
+	/// EASTL_FIXED_HASH_SET_DEFAULT_ALLOCATOR
+	/// EASTL_FIXED_HASH_MULTISET_DEFAULT_ALLOCATOR
+	///
+	#ifndef EASTL_FIXED_HASH_SET_DEFAULT_ALLOCATOR
+		#define EASTL_FIXED_HASH_SET_DEFAULT_ALLOCATOR overflow_allocator_type(EASTL_FIXED_HASH_SET_DEFAULT_NAME)
+	#endif
+
+	#ifndef EASTL_FIXED_HASH_MULTISET_DEFAULT_ALLOCATOR
+		#define EASTL_FIXED_HASH_MULTISET_DEFAULT_ALLOCATOR overflow_allocator_type(EASTL_FIXED_HASH_MULTISET_DEFAULT_NAME)
+	#endif
+
+
+
+	/// fixed_hash_set
+	///
+	/// Implements a hash_set with a fixed block of memory identified by the nodeCount and bucketCount
+	/// template parameters. 
+	///
+	/// Template parameters:
+	///     Value                  The type of object the hash_set holds.
+	///     nodeCount              The max number of objects to contain. This value must be >= 1.
+	///     bucketCount            The number of buckets to use. This value must be >= 2.
+	///     bEnableOverflow        Whether or not we should use the global heap if our object pool is exhausted.
+	///     Hash                   hash_set hash function. See hash_set.
+	///     Predicate              hash_set equality testing function. See hash_set.
+	///
+	template <typename Value, size_t nodeCount, size_t bucketCount = nodeCount + 1, bool bEnableOverflow = true,
+			  typename Hash = eastl::hash<Value>, typename Predicate = eastl::equal_to<Value>, bool bCacheHashCode = false, typename OverflowAllocator = EASTLAllocatorType>
+	class fixed_hash_set : public hash_set<Value,
+										   Hash,
+										   Predicate,
+										   fixed_hashtable_allocator<
+												bucketCount + 1, 
+												sizeof(typename hash_set<Value, Hash, Predicate, OverflowAllocator, bCacheHashCode>::node_type), 
+												nodeCount, 
+												EASTL_ALIGN_OF(Value), 
+												0,
+												bEnableOverflow,
+												OverflowAllocator>, 
+										   bCacheHashCode>
+	{
+	public:
+		typedef fixed_hashtable_allocator<bucketCount + 1, sizeof(typename hash_set<Value, Hash, Predicate, 
+						OverflowAllocator, bCacheHashCode>::node_type), nodeCount, EASTL_ALIGN_OF(Value), 0,
+						bEnableOverflow, OverflowAllocator>                                                                        fixed_allocator_type;
+		typedef typename fixed_allocator_type::overflow_allocator_type                                                             overflow_allocator_type;
+		typedef fixed_hash_set<Value, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator> this_type;
+		typedef hash_set<Value, Hash, Predicate, fixed_allocator_type, bCacheHashCode>                                             base_type;
+		typedef typename base_type::value_type                                                                                     value_type;
+		typedef typename base_type::node_type                                                                                      node_type;
+		typedef typename base_type::size_type                                                                                      size_type;
+
+		enum { kMaxSize = nodeCount };
+
+		using base_type::mAllocator;
+
+	protected:
+		node_type** mBucketBuffer[bucketCount + 1]; // '+1' because the hash table needs a null terminating bucket.
+		char        mNodeBuffer[fixed_allocator_type::kBufferSize]; // kBufferSize will take into account alignment requirements.
+
+	public:
+		explicit fixed_hash_set(const overflow_allocator_type& overflowAllocator);
+
+		explicit fixed_hash_set(const Hash& hashFunction = Hash(), 
+								const Predicate& predicate = Predicate());
+
+		fixed_hash_set(const Hash& hashFunction, 
+					   const Predicate& predicate,
+					   const overflow_allocator_type& overflowAllocator);
+
+		template <typename InputIterator>
+		fixed_hash_set(InputIterator first, InputIterator last,
+					   const Hash& hashFunction = Hash(),
+					   const Predicate& predicate = Predicate());
+
+		fixed_hash_set(const this_type& x);
+		fixed_hash_set(this_type&& x);
+		fixed_hash_set(this_type&& x, const overflow_allocator_type& overflowAllocator);
+
+		fixed_hash_set(std::initializer_list<value_type> ilist, const overflow_allocator_type& overflowAllocator = EASTL_FIXED_HASH_SET_DEFAULT_ALLOCATOR);
+
+		this_type& operator=(const this_type& x);
+		this_type& operator=(std::initializer_list<value_type> ilist);
+		this_type& operator=(this_type&& x);
+
+		void swap(this_type& x);
+
+		void reset_lose_memory(); // This is a unilateral reset to an initially empty state. No destructors are called, no deallocation occurs.
+
+		size_type max_size() const;
+
+		const overflow_allocator_type& get_overflow_allocator() const EA_NOEXCEPT;
+		overflow_allocator_type&       get_overflow_allocator() EA_NOEXCEPT;
+		void                           set_overflow_allocator(const overflow_allocator_type& allocator);
+	}; // fixed_hash_set
+
+
+
+
+
+
+	/// fixed_hash_multiset
+	///
+	/// Implements a hash_multiset with a fixed block of memory identified by the nodeCount and bucketCount
+	/// template parameters. 
+	///
+	///     Value                  The type of object the hash_set holds.
+	///     nodeCount              The max number of objects to contain. This value must be >= 1.
+	///     bucketCount            The number of buckets to use. This value must be >= 2.
+	///     bEnableOverflow        Whether or not we should use the global heap if our object pool is exhausted.
+	///     Hash                   hash_set hash function. See hash_set.
+	///     Predicate              hash_set equality testing function. See hash_set.
+	///
+	template <typename Value, size_t nodeCount, size_t bucketCount = nodeCount + 1, bool bEnableOverflow = true,
+			  typename Hash = eastl::hash<Value>, typename Predicate = eastl::equal_to<Value>, bool bCacheHashCode = false, typename OverflowAllocator = EASTLAllocatorType>
+	class fixed_hash_multiset : public hash_multiset<Value,
+													 Hash,
+													 Predicate,
+													 fixed_hashtable_allocator<
+														bucketCount + 1, 
+														sizeof(typename hash_multiset<Value, Hash, Predicate, OverflowAllocator, bCacheHashCode>::node_type),
+														nodeCount,
+														EASTL_ALIGN_OF(Value), 
+														0, 
+														bEnableOverflow,
+														OverflowAllocator>,
+													 bCacheHashCode>
+	{
+	public:
+		typedef fixed_hashtable_allocator<bucketCount + 1, sizeof(typename hash_multiset<Value, Hash, Predicate, 
+					OverflowAllocator, bCacheHashCode>::node_type), nodeCount, EASTL_ALIGN_OF(Value), 0,
+					bEnableOverflow, OverflowAllocator>                                                                                 fixed_allocator_type;
+		typedef typename fixed_allocator_type::overflow_allocator_type                                                                  overflow_allocator_type;
+		typedef hash_multiset<Value, Hash, Predicate, fixed_allocator_type, bCacheHashCode>                                             base_type;
+		typedef fixed_hash_multiset<Value, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator> this_type;
+		typedef typename base_type::value_type                                                                                          value_type;
+		typedef typename base_type::node_type                                                                                           node_type;
+		typedef typename base_type::size_type                                                                                           size_type;
+
+		enum { kMaxSize = nodeCount };
+
+		using base_type::mAllocator;
+
+	protected:
+		node_type** mBucketBuffer[bucketCount + 1]; // '+1' because the hash table needs a null terminating bucket.
+		char        mNodeBuffer[fixed_allocator_type::kBufferSize]; // kBufferSize will take into account alignment requirements.
+
+	public:
+		explicit fixed_hash_multiset(const overflow_allocator_type& overflowAllocator);
+
+		explicit fixed_hash_multiset(const Hash& hashFunction = Hash(), 
+									 const Predicate& predicate = Predicate());
+
+		fixed_hash_multiset(const Hash& hashFunction, 
+							const Predicate& predicate,
+							const overflow_allocator_type& overflowAllocator);
+
+		template <typename InputIterator>
+		fixed_hash_multiset(InputIterator first, InputIterator last, 
+							const Hash& hashFunction = Hash(), 
+							const Predicate& predicate = Predicate());
+
+		fixed_hash_multiset(const this_type& x);
+		fixed_hash_multiset(this_type&& x);
+		fixed_hash_multiset(this_type&& x, const overflow_allocator_type& overflowAllocator);
+		fixed_hash_multiset(std::initializer_list<value_type> ilist, const overflow_allocator_type& overflowAllocator = EASTL_FIXED_HASH_MULTISET_DEFAULT_ALLOCATOR);
+
+		this_type& operator=(const this_type& x);
+		this_type& operator=(std::initializer_list<value_type> ilist);
+		this_type& operator=(this_type&& x);
+
+		void swap(this_type& x);
+
+		void reset_lose_memory(); // This is a unilateral reset to an initially empty state. No destructors are called, no deallocation occurs.
+
+		size_type max_size() const;
+
+		const overflow_allocator_type& get_overflow_allocator() const EA_NOEXCEPT;
+		overflow_allocator_type&       get_overflow_allocator() EA_NOEXCEPT;
+		void                           set_overflow_allocator(const overflow_allocator_type& allocator);
+	}; // fixed_hash_multiset
+
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// fixed_hash_set
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename Value, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline fixed_hash_set<Value, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::
+	fixed_hash_set(const overflow_allocator_type& overflowAllocator)
+		: base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), 
+					Hash(), Predicate(), fixed_allocator_type(NULL, mBucketBuffer, overflowAllocator))
+	{
+		EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2));
+
+		if(!bEnableOverflow)
+			base_type::set_max_load_factor(10000.f); // Set it so that we will never resize.
+
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(EASTL_FIXED_HASH_SET_DEFAULT_NAME);
+		#endif
+
+		mAllocator.reset(mNodeBuffer);
+	}
+
+
+	template <typename Value, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline fixed_hash_set<Value, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::
+	fixed_hash_set(const Hash& hashFunction, 
+				   const Predicate& predicate)
+		: base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), 
+					hashFunction, predicate, fixed_allocator_type(NULL, mBucketBuffer))
+	{
+		EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2));
+
+		if(!bEnableOverflow)
+			base_type::set_max_load_factor(10000.f); // Set it so that we will never resize.
+
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(EASTL_FIXED_HASH_SET_DEFAULT_NAME);
+		#endif
+
+		mAllocator.reset(mNodeBuffer);
+	}
+
+
+	template <typename Value, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline fixed_hash_set<Value, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::
+	fixed_hash_set(const Hash& hashFunction, 
+				   const Predicate& predicate,
+				   const overflow_allocator_type& overflowAllocator)
+		: base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), 
+					hashFunction, predicate, fixed_allocator_type(NULL, mBucketBuffer, overflowAllocator))
+	{
+		EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2));
+
+		if(!bEnableOverflow)
+			base_type::set_max_load_factor(10000.f); // Set it so that we will never resize.
+
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(EASTL_FIXED_HASH_SET_DEFAULT_NAME);
+		#endif
+
+		mAllocator.reset(mNodeBuffer);
+	}
+
+
+	template <typename Value, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	template <typename InputIterator>
+	fixed_hash_set<Value, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::
+	fixed_hash_set(InputIterator first, InputIterator last,
+				   const Hash& hashFunction,
+				   const Predicate& predicate)
+		: base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), hashFunction, 
+					predicate, fixed_allocator_type(NULL, mBucketBuffer))
+	{
+		EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2));
+
+		if(!bEnableOverflow)
+			base_type::set_max_load_factor(10000.f); // Set it so that we will never resize.
+
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(EASTL_FIXED_HASH_SET_DEFAULT_NAME);
+		#endif
+
+		mAllocator.reset(mNodeBuffer);
+		base_type::insert(first, last);
+	}
+
+
+	template <typename Value, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline fixed_hash_set<Value, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::
+	fixed_hash_set(const this_type& x)
+		: base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), x.hash_function(),
+					x.equal_function(), fixed_allocator_type(NULL, mBucketBuffer))
+	{
+		mAllocator.copy_overflow_allocator(x.mAllocator);
+
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(x.mAllocator.get_name());
+		#endif
+
+		EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2));
+
+		if(!bEnableOverflow)
+			base_type::set_max_load_factor(10000.f); // Set it so that we will never resize.
+
+		mAllocator.reset(mNodeBuffer);
+		base_type::insert(x.begin(), x.end());
+	}
+
+
+	template <typename Key, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline fixed_hash_set<Key, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::fixed_hash_set(this_type&& x)
+	: base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), x.hash_function(),
+					x.equal_function(), fixed_allocator_type(NULL, mBucketBuffer))
+	{
+		// This implementation is the same as above. If we could rely on using C++11 delegating constructor support then we could just call that here.
+		mAllocator.copy_overflow_allocator(x.mAllocator);
+
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(x.mAllocator.get_name());
+		#endif
+
+		EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2));
+
+		if(!bEnableOverflow)
+			base_type::set_max_load_factor(10000.f); // Set it so that we will never resize.
+
+		mAllocator.reset(mNodeBuffer);
+		base_type::insert(x.begin(), x.end());
+	}
+
+
+	template <typename Key, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline fixed_hash_set<Key, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::fixed_hash_set(this_type&& x, const overflow_allocator_type& overflowAllocator)
+		: base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), 
+					x.hash_function(), x.equal_function(), fixed_allocator_type(NULL, mBucketBuffer, overflowAllocator))
+	{
+		// This implementation is the same as above. If we could rely on using C++11 delegating constructor support then we could just call that here.
+		mAllocator.copy_overflow_allocator(x.mAllocator);
+
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(x.mAllocator.get_name());
+		#endif
+
+		EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2));
+
+		if(!bEnableOverflow)
+			base_type::set_max_load_factor(10000.f); // Set it so that we will never resize.
+
+		mAllocator.reset(mNodeBuffer);
+		base_type::insert(x.begin(), x.end());
+	}
+
+
+	template <typename Key, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline fixed_hash_set<Key, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::
+	fixed_hash_set(std::initializer_list<value_type> ilist, const overflow_allocator_type& overflowAllocator)
+		: base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), Hash(), 
+					Predicate(), fixed_allocator_type(NULL, mBucketBuffer, overflowAllocator))
+	{
+		EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2));
+
+		if(!bEnableOverflow)
+			base_type::set_max_load_factor(10000.f); // Set it so that we will never resize.
+
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(EASTL_FIXED_HASH_SET_DEFAULT_NAME);
+		#endif
+
+		mAllocator.reset(mNodeBuffer);
+		base_type::insert(ilist.begin(), ilist.end());
+	}
+
+
+	template <typename Value, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	typename fixed_hash_set<Value, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::this_type& 
+	fixed_hash_set<Value, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::operator=(const this_type& x)
+	{
+		base_type::operator=(x);
+		return *this;
+	}
+
+
+	template <typename Key, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline typename fixed_hash_set<Key, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::this_type& 
+	fixed_hash_set<Key, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::operator=(this_type&& x)
+	{
+		operator=(x);
+		return *this;
+	}
+
+
+	template <typename Key, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline typename fixed_hash_set<Key, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::this_type& 
+	fixed_hash_set<Key, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::operator=(std::initializer_list<value_type> ilist)
+	{
+		base_type::clear();
+		base_type::insert(ilist.begin(), ilist.end());
+		return *this;
+	}
+
+
+	template <typename Value, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline void fixed_hash_set<Value, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::
+	swap(this_type& x)
+	{
+		// We must do a brute-force swap, because fixed containers cannot share memory allocations.
+		// Note that we create a temp value on the stack. This approach may fail if the size of the 
+		// container is too large. We have a rule against allocating memory from the heap, and so 
+		// if the user wants to swap two large objects of this class, the user will currently need 
+		// to implement it manually. To consider: add code to allocate a temporary buffer if the 
+		// size of the container is too large for the stack.
+		EASTL_ASSERT(sizeof(x) < EASTL_MAX_STACK_USAGE); // It is dangerous to try to create objects that are too big for the stack.
+
+		const this_type temp(*this); // Can't call eastl::swap because that would
+		*this = x;                   // itself call this member swap function.
+		x     = temp;
+	}
+
+
+	template <typename Value, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	void fixed_hash_set<Value, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::
+	reset_lose_memory()
+	{
+		base_type::reset_lose_memory();
+		base_type::get_allocator().reset(mNodeBuffer);
+	}
+
+
+	template <typename Value, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline typename fixed_hash_set<Value, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::size_type 
+	fixed_hash_set<Value, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::max_size() const
+	{
+		return kMaxSize;
+	}
+
+
+	template <typename Value, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline const typename fixed_hash_set<Value, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::overflow_allocator_type& 
+	fixed_hash_set<Value, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::get_overflow_allocator() const EA_NOEXCEPT
+	{
+		return mAllocator.get_overflow_allocator();
+	}
+
+
+	template <typename Value, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline typename fixed_hash_set<Value, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::overflow_allocator_type& 
+	fixed_hash_set<Value, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::get_overflow_allocator() EA_NOEXCEPT
+	{
+		return mAllocator.get_overflow_allocator();
+	}
+
+
+	template <typename Value, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline void fixed_hash_set<Value, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::
+	set_overflow_allocator(const overflow_allocator_type& allocator)
+	{
+		mAllocator.set_overflow_allocator(allocator);
+	}
+
+	///////////////////////////////////////////////////////////////////////
+	// global operators
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename Value, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode>
+	inline void swap(fixed_hash_set<Value, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode>& a, 
+					 fixed_hash_set<Value, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode>& b)
+	{
+		a.swap(b);
+	}
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// fixed_hash_multiset
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename Value, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline fixed_hash_multiset<Value, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::
+	fixed_hash_multiset(const overflow_allocator_type& overflowAllocator)
+		: base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), Hash(), 
+					Predicate(), fixed_allocator_type(NULL, mBucketBuffer, overflowAllocator))
+	{
+		EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2));
+
+		if(!bEnableOverflow)
+			base_type::set_max_load_factor(10000.f); // Set it so that we will never resize.
+
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(EASTL_FIXED_HASH_MULTISET_DEFAULT_NAME);
+		#endif
+
+		mAllocator.reset(mNodeBuffer);
+	}
+
+
+	template <typename Value, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline fixed_hash_multiset<Value, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::
+	fixed_hash_multiset(const Hash& hashFunction, 
+						const Predicate& predicate)
+		: base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), hashFunction, 
+					predicate, fixed_allocator_type(NULL, mBucketBuffer))
+	{
+		EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2));
+
+		if(!bEnableOverflow)
+			base_type::set_max_load_factor(10000.f); // Set it so that we will never resize.
+
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(EASTL_FIXED_HASH_MULTISET_DEFAULT_NAME);
+		#endif
+
+		mAllocator.reset(mNodeBuffer);
+	}
+
+
+	template <typename Value, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline fixed_hash_multiset<Value, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::
+	fixed_hash_multiset(const Hash& hashFunction, 
+						const Predicate& predicate,
+						const overflow_allocator_type& overflowAllocator)
+		: base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), hashFunction, 
+					predicate, fixed_allocator_type(NULL, mBucketBuffer, overflowAllocator))
+	{
+		EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2));
+
+		if(!bEnableOverflow)
+			base_type::set_max_load_factor(10000.f); // Set it so that we will never resize.
+
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(EASTL_FIXED_HASH_MULTISET_DEFAULT_NAME);
+		#endif
+
+		mAllocator.reset(mNodeBuffer);
+	}
+
+
+	template <typename Value, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	template <typename InputIterator>
+	inline fixed_hash_multiset<Value, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::
+	fixed_hash_multiset(InputIterator first, InputIterator last, 
+						const Hash& hashFunction, 
+						const Predicate& predicate)
+		: base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), hashFunction, 
+					predicate, fixed_allocator_type(NULL, mBucketBuffer))
+	{
+		EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2));
+
+		if(!bEnableOverflow)
+			base_type::set_max_load_factor(10000.f); // Set it so that we will never resize.
+
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(EASTL_FIXED_HASH_MULTISET_DEFAULT_NAME);
+		#endif
+
+		mAllocator.reset(mNodeBuffer);
+		base_type::insert(first, last);
+	}
+
+
+	template <typename Value, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline fixed_hash_multiset<Value, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::
+	fixed_hash_multiset(const this_type& x)
+		: base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), x.hash_function(), 
+					x.equal_function(), fixed_allocator_type(NULL, mBucketBuffer))
+	{
+		mAllocator.copy_overflow_allocator(x.mAllocator);
+
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(x.mAllocator.get_name());
+		#endif
+
+		EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2));
+
+		if(!bEnableOverflow)
+			base_type::set_max_load_factor(10000.f); // Set it so that we will never resize.
+
+		mAllocator.reset(mNodeBuffer);
+		base_type::insert(x.begin(), x.end());
+	}
+
+
+	template <typename Key, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline fixed_hash_multiset<Key, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::fixed_hash_multiset(this_type&& x)
+		: base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), x.hash_function(),
+						x.equal_function(), fixed_allocator_type(NULL, mBucketBuffer))
+	{
+		// This implementation is the same as above. If we could rely on using C++11 delegating constructor support then we could just call that here.
+		mAllocator.copy_overflow_allocator(x.mAllocator);
+
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(x.mAllocator.get_name());
+		#endif
+
+		EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2));
+
+		if(!bEnableOverflow)
+			base_type::set_max_load_factor(10000.f); // Set it so that we will never resize.
+
+		mAllocator.reset(mNodeBuffer);
+		base_type::insert(x.begin(), x.end());
+	}
+
+
+	template <typename Key, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline fixed_hash_multiset<Key, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::fixed_hash_multiset(this_type&& x, const overflow_allocator_type& overflowAllocator)
+		: base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), 
+					x.hash_function(), x.equal_function(), fixed_allocator_type(NULL, mBucketBuffer, overflowAllocator))
+	{
+		// This implementation is the same as above. If we could rely on using C++11 delegating constructor support then we could just call that here.
+		mAllocator.copy_overflow_allocator(x.mAllocator);
+
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(x.mAllocator.get_name());
+		#endif
+
+		EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2));
+
+		if(!bEnableOverflow)
+			base_type::set_max_load_factor(10000.f); // Set it so that we will never resize.
+
+		mAllocator.reset(mNodeBuffer);
+		base_type::insert(x.begin(), x.end());
+	}
+
+
+	template <typename Key, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline fixed_hash_multiset<Key, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::
+	fixed_hash_multiset(std::initializer_list<value_type> ilist, const overflow_allocator_type& overflowAllocator)
+		: base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), Hash(), 
+					Predicate(), fixed_allocator_type(NULL, mBucketBuffer, overflowAllocator))
+	{
+		EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2));
+
+		if(!bEnableOverflow)
+			base_type::set_max_load_factor(10000.f); // Set it so that we will never resize.
+
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(EASTL_FIXED_HASH_MULTISET_DEFAULT_NAME);
+		#endif
+
+		mAllocator.reset(mNodeBuffer);
+		base_type::insert(ilist.begin(), ilist.end());
+	}
+
+
+	template <typename Value, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline typename fixed_hash_multiset<Value, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::this_type& 
+	fixed_hash_multiset<Value, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::operator=(const this_type& x)
+	{
+		base_type::operator=(x);
+		return *this;
+	}
+
+
+	template <typename Key, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline typename fixed_hash_multiset<Key, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::this_type& 
+	fixed_hash_multiset<Key, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::operator=(this_type&& x)
+	{
+		base_type::operator=(x);
+		return *this;
+	}
+
+
+	template <typename Key, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline typename fixed_hash_multiset<Key, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::this_type& 
+	fixed_hash_multiset<Key, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::operator=(std::initializer_list<value_type> ilist)
+	{
+		base_type::clear();
+		base_type::insert(ilist.begin(), ilist.end());
+		return *this;
+	}
+
+
+	template <typename Value, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline void fixed_hash_multiset<Value, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::
+	swap(this_type& x)
+	{
+		// Fixed containers use a special swap that can deal with excessively large buffers.
+		eastl::fixed_swap(*this, x);
+	}
+
+
+	template <typename Value, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline void fixed_hash_multiset<Value, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::
+	reset_lose_memory()
+	{
+		base_type::reset_lose_memory();
+		base_type::get_allocator().reset(mNodeBuffer);
+	}
+
+
+	template <typename Value, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline typename fixed_hash_multiset<Value, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::size_type 
+	fixed_hash_multiset<Value, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::max_size() const
+	{
+		return kMaxSize;
+	}
+
+
+	template <typename Value, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline const typename fixed_hash_multiset<Value, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::overflow_allocator_type& 
+	fixed_hash_multiset<Value, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::get_overflow_allocator() const EA_NOEXCEPT
+	{
+		return mAllocator.get_overflow_allocator();
+	}
+
+
+	template <typename Value, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline typename fixed_hash_multiset<Value, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::overflow_allocator_type& 
+	fixed_hash_multiset<Value, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::get_overflow_allocator() EA_NOEXCEPT
+	{
+		return mAllocator.get_overflow_allocator();
+	}
+
+
+	template <typename Value, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode, typename OverflowAllocator>
+	inline void fixed_hash_multiset<Value, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode, OverflowAllocator>::
+	set_overflow_allocator(const overflow_allocator_type& allocator)
+	{
+		mAllocator.set_overflow_allocator(allocator);
+	}
+
+
+	///////////////////////////////////////////////////////////////////////
+	// global operators
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename Value, size_t nodeCount, size_t bucketCount, bool bEnableOverflow, typename Hash, typename Predicate, bool bCacheHashCode>
+	inline void swap(fixed_hash_multiset<Value, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode>& a, 
+					 fixed_hash_multiset<Value, nodeCount, bucketCount, bEnableOverflow, Hash, Predicate, bCacheHashCode>& b)
+	{
+		// Fixed containers use a special swap that can deal with excessively large buffers.
+		eastl::fixed_swap(a, b);
+	}
+
+
+} // namespace eastl
+
+EA_RESTORE_VC_WARNING()
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/fixed_list.h b/libkram/eastl/include/EASTL/fixed_list.h
new file mode 100644
index 00000000..9e48089c
--- /dev/null
+++ b/libkram/eastl/include/EASTL/fixed_list.h
@@ -0,0 +1,388 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// This file implements a list which uses a fixed size memory pool for its nodes. 
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_FIXED_LIST_H
+#define EASTL_FIXED_LIST_H
+
+
+#include <EASTL/list.h>
+#include <EASTL/internal/fixed_pool.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+	/// EASTL_FIXED_LIST_DEFAULT_NAME
+	///
+	/// Defines a default container name in the absence of a user-provided name.
+	/// In the case of fixed-size containers, the allocator name always refers
+	/// to overflow allocations. 
+	///
+	#ifndef EASTL_FIXED_LIST_DEFAULT_NAME
+		#define EASTL_FIXED_LIST_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " fixed_list" // Unless the user overrides something, this is "EASTL fixed_list".
+	#endif
+
+
+	/// EASTL_FIXED_LIST_DEFAULT_ALLOCATOR
+	///
+	#ifndef EASTL_FIXED_LIST_DEFAULT_ALLOCATOR
+		#define EASTL_FIXED_LIST_DEFAULT_ALLOCATOR overflow_allocator_type(EASTL_FIXED_LIST_DEFAULT_NAME)
+	#endif
+
+
+
+	/// fixed_list
+	///
+	/// fixed_list is a list which uses a single block of contiguous memory 
+	/// for its nodes. The purpose of this is to reduce memory usage relative
+	/// to a conventional memory allocation system (with block headers), to 
+	/// increase allocation speed (often due to avoidance of mutex locks),
+	/// to increase performance (due to better memory locality), and to decrease
+	/// memory fragmentation due to the way that fixed block allocators work.
+	///
+	/// The primary downside to a fixed_list is that the number of nodes it
+	/// can contain is fixed upon its declaration. If you want a fixed_list
+	/// that doesn't have this limitation, then you probably don't want a
+	/// fixed_list. You can always create your own memory allocator that works
+	/// the way you want.
+	///
+	/// Template parameters:
+	///     T                      The type of object the list holds.
+	///     nodeCount              The max number of objects to contain.
+	///     bEnableOverflow        Whether or not we should use the overflow heap if our object pool is exhausted.
+	///     OverflowAllocator      Overflow allocator, which is only used if bEnableOverflow == true. Defaults to the global heap.
+	///
+	template <typename T, size_t nodeCount, bool bEnableOverflow = true, typename OverflowAllocator = EASTLAllocatorType>
+	class fixed_list : public list<T, fixed_node_allocator<sizeof(typename list<T>::node_type), 
+								   nodeCount, EASTL_ALIGN_OF(T), 0, bEnableOverflow, OverflowAllocator> >
+	{
+	public:
+		typedef fixed_node_allocator<sizeof(typename list<T>::node_type), nodeCount, 
+					 EASTL_ALIGN_OF(T), 0, bEnableOverflow, OverflowAllocator> fixed_allocator_type;
+		typedef OverflowAllocator                                              overflow_allocator_type;
+		typedef list<T, fixed_allocator_type>                                  base_type;
+		typedef fixed_list<T, nodeCount, bEnableOverflow, OverflowAllocator>   this_type;
+		typedef typename base_type::size_type                                  size_type;
+		typedef typename base_type::value_type                                 value_type;
+		typedef typename base_type::node_type                                  node_type;
+		typedef typename base_type::iterator                                   iterator;
+
+		enum { kMaxSize = nodeCount };
+
+		using base_type::assign;
+		using base_type::resize;
+		using base_type::insert;
+		using base_type::size;
+		using base_type::get_allocator;
+
+	protected:
+		char mBuffer[fixed_allocator_type::kBufferSize]; // kBufferSize will take into account alignment requirements.
+
+		using base_type::internalAllocator;
+
+	public:
+		fixed_list();
+		explicit fixed_list(const overflow_allocator_type& overflowAllocator); // Only applicable if bEnableOverflow is true.
+		explicit fixed_list(size_type n);                                      // Currently we don't support overflowAllocator specification for other constructors, for simplicity.
+		fixed_list(size_type n, const value_type& value);
+		fixed_list(const this_type& x);
+		fixed_list(this_type&& x);
+		fixed_list(this_type&&, const overflow_allocator_type& overflowAllocator);
+		fixed_list(std::initializer_list<value_type> ilist, const overflow_allocator_type& overflowAllocator = EASTL_FIXED_LIST_DEFAULT_ALLOCATOR);
+
+		template <typename InputIterator>
+		fixed_list(InputIterator first, InputIterator last);
+
+		this_type& operator=(const this_type& x);
+		this_type& operator=(std::initializer_list<value_type> ilist);
+		this_type& operator=(this_type&& x);
+
+		void      swap(this_type& x);
+		void      reset_lose_memory();      // This is a unilateral reset to an initially empty state. No destructors are called, no deallocation occurs.
+		size_type max_size() const;         // Returns the max fixed size, which is the user-supplied nodeCount parameter.
+		bool      full() const;             // Returns true if the fixed space has been fully allocated. Note that if overflow is enabled, the container size can be greater than nodeCount but full() could return true because the fixed space may have a recently freed slot. 
+		bool      has_overflowed() const;   // Returns true if the allocations spilled over into the overflow allocator. Meaningful only if overflow is enabled.
+		bool      can_overflow() const;     // Returns the value of the bEnableOverflow template parameter.
+
+		// OverflowAllocator
+		const overflow_allocator_type& get_overflow_allocator() const EA_NOEXCEPT;
+		overflow_allocator_type&       get_overflow_allocator() EA_NOEXCEPT;
+		void                           set_overflow_allocator(const overflow_allocator_type& allocator);
+	}; // fixed_list
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// fixed_list
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline fixed_list<T, nodeCount, bEnableOverflow, OverflowAllocator>::fixed_list()
+		: base_type(fixed_allocator_type(mBuffer))
+	{
+		#if EASTL_NAME_ENABLED
+			internalAllocator().set_name(EASTL_FIXED_LIST_DEFAULT_NAME);
+		#endif
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline fixed_list<T, nodeCount, bEnableOverflow, OverflowAllocator>::fixed_list(const overflow_allocator_type& overflowAllocator)
+		: base_type(fixed_allocator_type(mBuffer, overflowAllocator))
+	{
+		#if EASTL_NAME_ENABLED
+			internalAllocator().set_name(EASTL_FIXED_LIST_DEFAULT_NAME);
+		#endif
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline fixed_list<T, nodeCount, bEnableOverflow, OverflowAllocator>::fixed_list(size_type n)
+		: base_type(fixed_allocator_type(mBuffer))
+	{
+		#if EASTL_NAME_ENABLED
+			internalAllocator().set_name(EASTL_FIXED_LIST_DEFAULT_NAME);
+		#endif
+
+		resize(n);
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline fixed_list<T, nodeCount, bEnableOverflow, OverflowAllocator>::fixed_list(size_type n, const value_type& value)
+		: base_type(fixed_allocator_type(mBuffer))
+	{
+		#if EASTL_NAME_ENABLED
+			internalAllocator().set_name(EASTL_FIXED_LIST_DEFAULT_NAME);
+		#endif
+
+		resize(n, value);
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline fixed_list<T, nodeCount, bEnableOverflow, OverflowAllocator>::fixed_list(const this_type& x)
+		: base_type(fixed_allocator_type(mBuffer))
+	{
+		internalAllocator().copy_overflow_allocator(x.internalAllocator());
+
+		#if EASTL_NAME_ENABLED
+			internalAllocator().set_name(x.internalAllocator().get_name());
+		#endif
+
+		assign(x.begin(), x.end());
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline fixed_list<T, nodeCount, bEnableOverflow, OverflowAllocator>::fixed_list(this_type&& x)
+		: base_type(fixed_allocator_type(mBuffer))
+	{
+		// Since we are a fixed_list, we can't normally swap pointers unless both this and 
+		// x are using using overflow and the overflow allocators are equal. To do:
+		//if(has_overflowed() && x.has_overflowed() && (get_overflow_allocator() == x.get_overflow_allocator()))
+		//{
+		//    We can swap contents and may need to swap the allocators as well.
+		//}
+
+		// The following is currently identical to the fixed_vector(const this_type& x) code above. If it stays that
+		// way then we may want to make a shared implementation.
+		internalAllocator().copy_overflow_allocator(x.internalAllocator());
+
+		#if EASTL_NAME_ENABLED
+			internalAllocator().set_name(x.internalAllocator().get_name());
+		#endif
+
+		assign(x.begin(), x.end());
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline fixed_list<T, nodeCount, bEnableOverflow, OverflowAllocator>::fixed_list(this_type&& x, const overflow_allocator_type& overflowAllocator)
+		: base_type(fixed_allocator_type(mBuffer, overflowAllocator))
+	{
+		// See comments above.
+		internalAllocator().copy_overflow_allocator(x.internalAllocator());
+
+		#if EASTL_NAME_ENABLED
+			internalAllocator().set_name(x.internalAllocator().get_name());
+		#endif
+
+		assign(x.begin(), x.end());
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline fixed_list<T, nodeCount, bEnableOverflow, OverflowAllocator>::fixed_list(std::initializer_list<value_type> ilist, const overflow_allocator_type& overflowAllocator)
+		: base_type(fixed_allocator_type(mBuffer, overflowAllocator))
+	{
+		assign(ilist.begin(), ilist.end());
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	template <typename InputIterator>
+	fixed_list<T, nodeCount, bEnableOverflow, OverflowAllocator>::fixed_list(InputIterator first, InputIterator last)
+		: base_type(fixed_allocator_type(mBuffer))
+	{
+		#if EASTL_NAME_ENABLED
+			internalAllocator().set_name(EASTL_FIXED_LIST_DEFAULT_NAME);
+		#endif
+
+		assign(first, last);
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline typename fixed_list<T, nodeCount, bEnableOverflow, OverflowAllocator>::this_type&
+	fixed_list<T, nodeCount, bEnableOverflow, OverflowAllocator>::operator=(const this_type& x)
+	{
+		if(this != &x)
+		{
+			base_type::clear();
+
+			#if EASTL_ALLOCATOR_COPY_ENABLED
+				internalAllocator() = x.internalAllocator(); // The primary effect of this is to copy the overflow allocator.
+			#endif
+
+			base_type::assign(x.begin(), x.end()); // It would probably be better to implement this like list::operator=.
+		}
+		return *this;
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline typename fixed_list<T, nodeCount, bEnableOverflow, OverflowAllocator>::this_type&
+	fixed_list<T, nodeCount, bEnableOverflow, OverflowAllocator>::operator=(this_type&& x)
+	{
+		return operator=(x);
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline typename fixed_list<T, nodeCount, bEnableOverflow, OverflowAllocator>::this_type&
+	fixed_list<T, nodeCount, bEnableOverflow, OverflowAllocator>::operator=(std::initializer_list<value_type> ilist)
+	{
+		base_type::clear();
+		base_type::assign(ilist.begin(), ilist.end());
+		return *this;
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline void fixed_list<T, nodeCount, bEnableOverflow, OverflowAllocator>::swap(this_type& x)
+	{
+		// Fixed containers use a special swap that can deal with excessively large buffers.
+		eastl::fixed_swap(*this, x);
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline void fixed_list<T, nodeCount, bEnableOverflow, OverflowAllocator>::reset_lose_memory()
+	{
+		base_type::reset_lose_memory();
+		get_allocator().reset(mBuffer);
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline typename fixed_list<T, nodeCount, bEnableOverflow, OverflowAllocator>::size_type
+	fixed_list<T, nodeCount, bEnableOverflow, OverflowAllocator>::max_size() const
+	{
+		return kMaxSize;
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline bool fixed_list<T, nodeCount, bEnableOverflow, OverflowAllocator>::full() const
+	{
+		// Note: This implementation isn't right in the case of bEnableOverflow = true because it will return 
+		// false for the case that  there are free nodes from the buffer but also nodes from the dynamic heap. 
+		// This can happen if the container exceeds the fixed size and then frees some of the nodes from the fixed buffer.
+		// The only simple fix for this is to take on another member variable which tracks whether this overflow
+		// has occurred at some point in the past.
+		return !internalAllocator().can_allocate(); // This is the quickest way of detecting this. has_overflowed uses a different method because it can't use this quick method.
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline bool fixed_list<T, nodeCount, bEnableOverflow, OverflowAllocator>::has_overflowed() const
+	{
+		#if EASTL_FIXED_SIZE_TRACKING_ENABLED // If we can use this faster pathway (as size() may be slow)...
+			return (internalAllocator().mPool.mnPeakSize > kMaxSize);
+		#else
+			return (size() > kMaxSize);
+		#endif
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline bool fixed_list<T, nodeCount, bEnableOverflow, OverflowAllocator>::can_overflow() const
+	{
+		return bEnableOverflow;
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline const typename fixed_list<T, nodeCount, bEnableOverflow, OverflowAllocator>::overflow_allocator_type& 
+	fixed_list<T, nodeCount, bEnableOverflow, OverflowAllocator>::get_overflow_allocator() const EA_NOEXCEPT
+	{
+		return internalAllocator().get_overflow_allocator();
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline typename fixed_list<T, nodeCount, bEnableOverflow, OverflowAllocator>::overflow_allocator_type& 
+	fixed_list<T, nodeCount, bEnableOverflow, OverflowAllocator>::get_overflow_allocator() EA_NOEXCEPT
+	{
+		return internalAllocator().get_overflow_allocator();
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline void 
+	fixed_list<T, nodeCount, bEnableOverflow, OverflowAllocator>::set_overflow_allocator(const overflow_allocator_type& allocator)
+	{
+		internalAllocator().set_overflow_allocator(allocator);
+	}
+
+
+	///////////////////////////////////////////////////////////////////////
+	// global operators
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline void swap(fixed_list<T, nodeCount, bEnableOverflow, OverflowAllocator>& a, 
+					 fixed_list<T, nodeCount, bEnableOverflow, OverflowAllocator>& b)
+	{
+		// Fixed containers use a special swap that can deal with excessively large buffers.
+		eastl::fixed_swap(a, b);
+	}
+
+
+} // namespace eastl
+
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/fixed_map.h b/libkram/eastl/include/EASTL/fixed_map.h
new file mode 100644
index 00000000..c01db08f
--- /dev/null
+++ b/libkram/eastl/include/EASTL/fixed_map.h
@@ -0,0 +1,580 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// This file implements a map and multimap which use a fixed size memory 
+// pool for their nodes. 
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_FIXED_MAP_H
+#define EASTL_FIXED_MAP_H
+
+
+#include <EASTL/map.h>
+#include <EASTL/fixed_set.h> // Included because fixed_rbtree_base resides here.
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+	/// EASTL_FIXED_MAP_DEFAULT_NAME
+	///
+	/// Defines a default container name in the absence of a user-provided name.
+	/// In the case of fixed-size containers, the allocator name always refers
+	/// to overflow allocations. 
+	///
+	#ifndef EASTL_FIXED_MAP_DEFAULT_NAME
+		#define EASTL_FIXED_MAP_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " fixed_map" // Unless the user overrides something, this is "EASTL fixed_map".
+	#endif
+
+	#ifndef EASTL_FIXED_MULTIMAP_DEFAULT_NAME
+		#define EASTL_FIXED_MULTIMAP_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " fixed_multimap" // Unless the user overrides something, this is "EASTL fixed_multimap".
+	#endif
+
+
+	/// EASTL_FIXED_MAP_DEFAULT_ALLOCATOR
+	/// EASTL_FIXED_MULTIMAP_DEFAULT_ALLOCATOR
+	///
+	#ifndef EASTL_FIXED_MAP_DEFAULT_ALLOCATOR
+		#define EASTL_FIXED_MAP_DEFAULT_ALLOCATOR overflow_allocator_type(EASTL_FIXED_MAP_DEFAULT_NAME)
+	#endif
+
+	#ifndef EASTL_FIXED_MULTIMAP_DEFAULT_ALLOCATOR
+		#define EASTL_FIXED_MULTIMAP_DEFAULT_ALLOCATOR overflow_allocator_type(EASTL_FIXED_MULTIMAP_DEFAULT_NAME)
+	#endif
+
+
+
+	/// fixed_map
+	///
+	/// Implements a map with a fixed block of memory identified by the 
+	/// nodeCount template parameter. 
+	///
+	///     Key                    The key object (key in the key/value pair).
+	///     T                      The mapped object (value in the key/value pair).
+	///     nodeCount              The max number of objects to contain.
+	///     bEnableOverflow        Whether or not we should use the global heap if our object pool is exhausted.
+	///     Compare                Compare function/object for set ordering.
+	///     OverflowAllocator              Overflow allocator, which is only used if bEnableOverflow == true. Defaults to the global heap.
+	///
+	template <typename Key, typename T, size_t nodeCount, bool bEnableOverflow = true, typename Compare = eastl::less<Key>, typename OverflowAllocator = EASTLAllocatorType>
+	class fixed_map : public map<Key, T, Compare, fixed_node_allocator<sizeof(typename map<Key, T>::node_type), 
+		nodeCount, EASTL_ALIGN_OF(eastl::pair<Key, T>), 0, bEnableOverflow, OverflowAllocator> >
+	{
+	public:
+		typedef fixed_node_allocator<sizeof(typename map<Key, T>::node_type), nodeCount, 
+					 EASTL_ALIGN_OF(eastl::pair<Key, T>), 0, bEnableOverflow, OverflowAllocator>                           fixed_allocator_type;
+		typedef typename fixed_allocator_type::overflow_allocator_type                                                     overflow_allocator_type;
+		typedef fixed_map<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>                                  this_type;
+		typedef map<Key, T, Compare, fixed_allocator_type>                                                                 base_type;
+		typedef typename base_type::value_type                                                                             value_type;
+		typedef typename base_type::node_type                                                                              node_type;
+		typedef typename base_type::size_type                                                                              size_type;
+
+		enum { kMaxSize = nodeCount };
+		   
+		using base_type::insert;
+
+	protected:
+		char mBuffer[fixed_allocator_type::kBufferSize]; // kBufferSize will take into account alignment requirements.
+
+		using base_type::mAllocator;
+
+	public:
+		fixed_map();
+		explicit fixed_map(const overflow_allocator_type& overflowAllocator);
+		explicit fixed_map(const Compare& compare);
+		fixed_map(const this_type& x);
+		fixed_map(this_type&& x);
+		fixed_map(this_type&& x, const overflow_allocator_type& overflowAllocator);
+		fixed_map(std::initializer_list<value_type> ilist, const overflow_allocator_type& overflowAllocator = EASTL_FIXED_MAP_DEFAULT_ALLOCATOR);
+
+		template <typename InputIterator>
+		fixed_map(InputIterator first, InputIterator last);
+
+		this_type& operator=(const this_type& x);
+		this_type& operator=(std::initializer_list<value_type> ilist);
+		this_type& operator=(this_type&& x);
+
+		void swap(this_type& x);
+
+		void reset_lose_memory(); // This is a unilateral reset to an initially empty state. No destructors are called, no deallocation occurs.
+
+		size_type max_size() const;
+
+		const overflow_allocator_type& get_overflow_allocator() const EA_NOEXCEPT;
+		overflow_allocator_type&       get_overflow_allocator() EA_NOEXCEPT;
+		void                           set_overflow_allocator(const overflow_allocator_type& allocator);
+	}; // fixed_map
+
+
+
+
+	/// fixed_multimap
+	///
+	/// Implements a multimap with a fixed block of memory identified by the 
+	/// nodeCount template parameter. 
+	///
+	///     Key                    The key object (key in the key/value pair).
+	///     T                      The mapped object (value in the key/value pair).
+	///     nodeCount              The max number of objects to contain.
+	///     bEnableOverflow        Whether or not we should use the global heap if our object pool is exhausted.
+	///     Compare                Compare function/object for set ordering.
+	///     OverflowAllocator              Overflow allocator, which is only used if bEnableOverflow == true. Defaults to the global heap.
+	///
+	template <typename Key, typename T, size_t nodeCount, bool bEnableOverflow = true, typename Compare = eastl::less<Key>, typename OverflowAllocator = EASTLAllocatorType>
+	class fixed_multimap : public multimap<Key, T, Compare, fixed_node_allocator<sizeof(typename multimap<Key, T>::node_type), 
+										   nodeCount, EASTL_ALIGN_OF(eastl::pair<Key, T>), 0, bEnableOverflow, OverflowAllocator> >
+	{
+	public:
+		typedef fixed_node_allocator<sizeof(typename multimap<Key, T>::node_type), nodeCount, 
+					EASTL_ALIGN_OF(eastl::pair<Key, T>), 0, bEnableOverflow, OverflowAllocator>                                      fixed_allocator_type;
+		typedef typename fixed_allocator_type::overflow_allocator_type                                                               overflow_allocator_type;
+		typedef multimap<Key, T, Compare, fixed_allocator_type>                                                                      base_type;
+		typedef fixed_multimap<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>                                       this_type;
+		typedef typename base_type::value_type                                                                                       value_type;
+		typedef typename base_type::node_type                                                                                        node_type;
+		typedef typename base_type::size_type                                                                                        size_type;
+
+		enum { kMaxSize = nodeCount };
+
+		using base_type::insert;
+
+	protected:
+		char mBuffer[fixed_allocator_type::kBufferSize]; // kBufferSize will take into account alignment requirements.
+
+		using base_type::mAllocator;
+		using base_type::get_compare;
+
+	public:
+		fixed_multimap();
+		fixed_multimap(const overflow_allocator_type& overflowAllocator);
+		explicit fixed_multimap(const Compare& compare);
+		fixed_multimap(const this_type& x);
+		fixed_multimap(this_type&& x);
+		fixed_multimap(this_type&& x, const overflow_allocator_type& overflowAllocator);
+		fixed_multimap(std::initializer_list<value_type> ilist, const overflow_allocator_type& overflowAllocator = EASTL_FIXED_MULTIMAP_DEFAULT_ALLOCATOR);
+
+		template <typename InputIterator>
+		fixed_multimap(InputIterator first, InputIterator last);
+
+		this_type& operator=(const this_type& x);
+		this_type& operator=(std::initializer_list<value_type> ilist);
+		this_type& operator=(this_type&& x);
+
+		void swap(this_type& x);
+
+		void reset_lose_memory(); // This is a unilateral reset to an initially empty state. No destructors are called, no deallocation occurs.
+
+		size_type max_size() const;
+
+		const overflow_allocator_type& get_overflow_allocator() const EA_NOEXCEPT;
+		overflow_allocator_type&       get_overflow_allocator() EA_NOEXCEPT;
+		void                           set_overflow_allocator(const overflow_allocator_type& allocator);
+	}; // fixed_multimap
+
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// fixed_map
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename Key, typename T, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline fixed_map<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::fixed_map()
+		: base_type(fixed_allocator_type(mBuffer))
+	{
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(EASTL_FIXED_MAP_DEFAULT_NAME);
+		#endif
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline fixed_map<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::fixed_map(const overflow_allocator_type& overflowAllocator)
+		: base_type(fixed_allocator_type(mBuffer, overflowAllocator))
+	{
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(EASTL_FIXED_MAP_DEFAULT_NAME);
+		#endif
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline fixed_map<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::fixed_map(const Compare& compare)
+		: base_type(compare, fixed_allocator_type(mBuffer))
+	{
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(EASTL_FIXED_MAP_DEFAULT_NAME);
+		#endif
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline fixed_map<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::fixed_map(const this_type& x)
+		: base_type(x.get_compare(), fixed_allocator_type(mBuffer))
+	{
+		mAllocator.copy_overflow_allocator(x.mAllocator);
+
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(x.mAllocator.get_name());
+		#endif
+
+		base_type::operator=(x);
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline fixed_map<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::fixed_map(this_type&& x)
+		: base_type(x.get_compare(), fixed_allocator_type(mBuffer))
+	{
+		mAllocator.copy_overflow_allocator(x.mAllocator);
+
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(x.mAllocator.get_name());
+		#endif
+
+		base_type::operator=(x);
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline fixed_map<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::fixed_map(this_type&& x, const overflow_allocator_type& overflowAllocator)
+		: base_type(x.get_compare(), fixed_allocator_type(mBuffer, overflowAllocator))
+	{
+		mAllocator.copy_overflow_allocator(x.mAllocator);
+
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(x.mAllocator.get_name());
+		#endif
+
+		base_type::operator=(x);
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	fixed_map<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::fixed_map(std::initializer_list<value_type> ilist, const overflow_allocator_type& overflowAllocator)
+		: base_type(fixed_allocator_type(mBuffer, overflowAllocator))
+	{
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(EASTL_FIXED_MAP_DEFAULT_NAME);
+		#endif
+
+		insert(ilist.begin(), ilist.end());
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	template <typename InputIterator>
+	fixed_map<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::fixed_map(InputIterator first, InputIterator last)
+		: base_type(fixed_allocator_type(mBuffer))
+	{
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(EASTL_FIXED_MAP_DEFAULT_NAME);
+		#endif
+
+		insert(first, last);
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline typename fixed_map<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::this_type&
+	fixed_map<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::operator=(const this_type& x)
+	{
+		base_type::operator=(x);
+		return *this;
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline typename fixed_map<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::this_type&
+	fixed_map<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::operator=(std::initializer_list<value_type> ilist)
+	{
+		base_type::clear();
+		insert(ilist.begin(), ilist.end());
+		return *this;
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline typename fixed_map<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::this_type&
+	fixed_map<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::operator=(this_type&& x)
+	{
+		base_type::operator=(x);
+		return *this;
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline void fixed_map<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::swap(this_type& x)
+	{
+		// Fixed containers use a special swap that can deal with excessively large buffers.
+		eastl::fixed_swap(*this, x);
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline void fixed_map<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::reset_lose_memory()
+	{
+		base_type::reset_lose_memory();
+		base_type::get_allocator().reset(mBuffer);
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline typename fixed_map<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::size_type
+	fixed_map<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::max_size() const
+	{
+		return kMaxSize;
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline const typename fixed_map<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::overflow_allocator_type&
+	fixed_map<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::get_overflow_allocator() const EA_NOEXCEPT
+	{
+		return mAllocator.get_overflow_allocator();
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline typename fixed_map<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::overflow_allocator_type&
+	fixed_map<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::get_overflow_allocator() EA_NOEXCEPT
+	{
+		return mAllocator.get_overflow_allocator();
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline void
+	fixed_map<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::set_overflow_allocator(const overflow_allocator_type& allocator)
+	{
+		mAllocator.set_overflow_allocator(allocator);
+	}
+
+	///////////////////////////////////////////////////////////////////////
+	// global operators
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename Key, typename T, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline void swap(fixed_map<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>& a, 
+					 fixed_map<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>& b)
+	{
+		// Fixed containers use a special swap that can deal with excessively large buffers.
+		eastl::fixed_swap(a, b);
+	}
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// fixed_multimap
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename Key, typename T, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline fixed_multimap<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::fixed_multimap()
+		: base_type(fixed_allocator_type(mBuffer))
+	{
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(EASTL_FIXED_MULTIMAP_DEFAULT_NAME);
+		#endif
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline fixed_multimap<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::fixed_multimap(const overflow_allocator_type& overflowAllocator)
+		: base_type(fixed_allocator_type(mBuffer, overflowAllocator))
+	{
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(EASTL_FIXED_MULTIMAP_DEFAULT_NAME);
+		#endif
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline fixed_multimap<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::fixed_multimap(const Compare& compare)
+		: base_type(compare, fixed_allocator_type(mBuffer))
+	{
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(EASTL_FIXED_MULTIMAP_DEFAULT_NAME);
+		#endif
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline fixed_multimap<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::fixed_multimap(const this_type& x)
+		: base_type(x.get_compare(), fixed_allocator_type(mBuffer))
+	{
+		mAllocator.copy_overflow_allocator(x.mAllocator);
+
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(x.mAllocator.get_name());
+		#endif
+
+		base_type::operator=(x);
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline fixed_multimap<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::fixed_multimap(this_type&& x)
+		: base_type(x.get_compare(), fixed_allocator_type(mBuffer))
+	{
+		mAllocator.copy_overflow_allocator(x.mAllocator);
+
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(x.mAllocator.get_name());
+		#endif
+
+		base_type::operator=(x);
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline fixed_multimap<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::fixed_multimap(this_type&& x, const overflow_allocator_type& overflowAllocator)
+		: base_type(x.get_compare(), fixed_allocator_type(mBuffer, overflowAllocator))
+	{
+		mAllocator.copy_overflow_allocator(x.mAllocator);
+
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(x.mAllocator.get_name());
+		#endif
+
+		base_type::operator=(x);
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	fixed_multimap<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::fixed_multimap(std::initializer_list<value_type> ilist, const overflow_allocator_type& overflowAllocator)
+		: base_type(fixed_allocator_type(mBuffer, overflowAllocator))
+	{
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(EASTL_FIXED_MULTIMAP_DEFAULT_NAME);
+		#endif
+
+		insert(ilist.begin(), ilist.end());
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	template <typename InputIterator>
+	fixed_multimap<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::
+	fixed_multimap(InputIterator first, InputIterator last)
+		: base_type(fixed_allocator_type(mBuffer))
+	{
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(EASTL_FIXED_MULTIMAP_DEFAULT_NAME);
+		#endif
+
+		insert(first, last);
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline typename fixed_multimap<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::this_type& 
+	fixed_multimap<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::operator=(const this_type& x)
+	{
+		base_type::operator=(x);
+		return *this;
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline typename fixed_multimap<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::this_type&
+	fixed_multimap<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::operator=(std::initializer_list<value_type> ilist)
+	{
+		base_type::clear();
+		insert(ilist.begin(), ilist.end());
+		return *this;
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline typename fixed_multimap<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::this_type&
+	fixed_multimap<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::operator=(this_type&& x)
+	{
+		base_type::operator=(x);
+		return *this;
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline void fixed_multimap<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::swap(this_type& x)
+	{
+		// Fixed containers use a special swap that can deal with excessively large buffers.
+		eastl::fixed_swap(*this, x);
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline void fixed_multimap<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::reset_lose_memory()
+	{
+		base_type::reset_lose_memory();
+		base_type::get_allocator().reset(mBuffer);
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline typename fixed_multimap<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::size_type 
+	fixed_multimap<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::max_size() const
+	{
+		return kMaxSize;
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline const typename fixed_multimap<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::overflow_allocator_type&
+	fixed_multimap<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::get_overflow_allocator() const EA_NOEXCEPT
+	{
+		return mAllocator.get_overflow_allocator();
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline typename fixed_multimap<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::overflow_allocator_type&
+	fixed_multimap<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::get_overflow_allocator() EA_NOEXCEPT
+	{
+		return mAllocator.get_overflow_allocator();
+	}
+
+
+	template <typename Key, typename T, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline void
+	fixed_multimap<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::set_overflow_allocator(const overflow_allocator_type& allocator)
+	{
+		mAllocator.set_overflow_allocator(allocator);
+	}
+
+
+	///////////////////////////////////////////////////////////////////////
+	// global operators
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename Key, typename T, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline void swap(fixed_multimap<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>& a, 
+					 fixed_multimap<Key, T, nodeCount, bEnableOverflow, Compare, OverflowAllocator>& b)
+	{
+		// Fixed containers use a special swap that can deal with excessively large buffers.
+		eastl::fixed_swap(a, b);
+	}
+
+
+} // namespace eastl
+
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/fixed_set.h b/libkram/eastl/include/EASTL/fixed_set.h
new file mode 100644
index 00000000..e5f00236
--- /dev/null
+++ b/libkram/eastl/include/EASTL/fixed_set.h
@@ -0,0 +1,578 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// This file implements a set and multiset which use a fixed size memory 
+// pool for their nodes. 
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_FIXED_SET_H
+#define EASTL_FIXED_SET_H
+
+
+#include <EASTL/set.h>
+#include <EASTL/internal/fixed_pool.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+	/// EASTL_FIXED_SET_DEFAULT_NAME
+	///
+	/// Defines a default container name in the absence of a user-provided name.
+	/// In the case of fixed-size containers, the allocator name always refers
+	/// to overflow allocations. 
+	///
+	#ifndef EASTL_FIXED_SET_DEFAULT_NAME
+		#define EASTL_FIXED_SET_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " fixed_set" // Unless the user overrides something, this is "EASTL fixed_set".
+	#endif
+
+	#ifndef EASTL_FIXED_MULTISET_DEFAULT_NAME
+		#define EASTL_FIXED_MULTISET_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " fixed_multiset" // Unless the user overrides something, this is "EASTL fixed_multiset".
+	#endif
+
+
+	/// EASTL_FIXED_SET_DEFAULT_ALLOCATOR
+	/// EASTL_FIXED_MULTISET_DEFAULT_ALLOCATOR
+	///
+	#ifndef EASTL_FIXED_SET_DEFAULT_ALLOCATOR
+		#define EASTL_FIXED_SET_DEFAULT_ALLOCATOR overflow_allocator_type(EASTL_FIXED_SET_DEFAULT_NAME)
+	#endif
+
+	#ifndef EASTL_FIXED_MULTISET_DEFAULT_ALLOCATOR
+		#define EASTL_FIXED_MULTISET_DEFAULT_ALLOCATOR overflow_allocator_type(EASTL_FIXED_MULTISET_DEFAULT_NAME)
+	#endif
+
+
+
+	/// fixed_set
+	///
+	/// Implements a set with a fixed block of memory identified by the 
+	/// nodeCount template parameter. 
+	///
+	/// Template parameters:
+	///     Key                    The type of object the set holds (a.k.a. value).
+	///     nodeCount              The max number of objects to contain.
+	///     bEnableOverflow        Whether or not we should use the global heap if our object pool is exhausted.
+	///     Compare                Compare function/object for set ordering.
+	///     OverflowAllocator              Overflow allocator, which is only used if bEnableOverflow == true. Defaults to the global heap.
+	///
+	template <typename Key, size_t nodeCount, bool bEnableOverflow = true, typename Compare = eastl::less<Key>, typename OverflowAllocator = EASTLAllocatorType>
+	class fixed_set : public set<Key, Compare, fixed_node_allocator<sizeof(typename set<Key>::node_type), 
+								 nodeCount, EASTL_ALIGN_OF(Key), 0, bEnableOverflow, OverflowAllocator> >
+	{
+	public:
+		typedef fixed_node_allocator<sizeof(typename set<Key>::node_type), nodeCount, 
+					EASTL_ALIGN_OF(Key), 0, bEnableOverflow, OverflowAllocator>            fixed_allocator_type;
+		typedef typename fixed_allocator_type::overflow_allocator_type                     overflow_allocator_type;
+		typedef set<Key, Compare, fixed_allocator_type>                                    base_type;
+		typedef fixed_set<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>     this_type;
+		typedef typename base_type::value_type                                             value_type;
+		typedef typename base_type::node_type                                              node_type;
+		typedef typename base_type::size_type                                              size_type;
+
+		enum { kMaxSize = nodeCount };
+
+		using base_type::insert;
+
+	protected:
+		char mBuffer[fixed_allocator_type::kBufferSize]; // kBufferSize will take into account alignment requirements.
+
+		using base_type::mAllocator;
+		using base_type::get_compare;
+
+	public:
+		fixed_set();
+		fixed_set(const overflow_allocator_type& overflowAllocator);
+		explicit fixed_set(const Compare& compare);
+		fixed_set(const this_type& x);
+		fixed_set(this_type&& x);
+		fixed_set(this_type&& x, const overflow_allocator_type& overflowAllocator);
+		fixed_set(std::initializer_list<value_type> ilist, const overflow_allocator_type& overflowAllocator = EASTL_FIXED_SET_DEFAULT_ALLOCATOR);
+
+		template <typename InputIterator>
+		fixed_set(InputIterator first, InputIterator last);
+
+		this_type& operator=(const this_type& x);
+		this_type& operator=(std::initializer_list<value_type> ilist);
+		this_type& operator=(this_type&& x);
+
+		void swap(this_type& x);
+
+		void reset_lose_memory(); // This is a unilateral reset to an initially empty state. No destructors are called, no deallocation occurs.
+
+		size_type max_size() const;
+
+		const overflow_allocator_type& get_overflow_allocator() const EA_NOEXCEPT;
+		overflow_allocator_type&       get_overflow_allocator() EA_NOEXCEPT;
+		void                           set_overflow_allocator(const overflow_allocator_type& allocator);
+	}; // fixed_set
+
+
+
+
+
+
+	/// fixed_multiset
+	///
+	/// Implements a multiset with a fixed block of memory identified by the 
+	/// nodeCount template parameter. 
+	///
+	///     Key                    The type of object the set holds (a.k.a. value).
+	///     nodeCount              The max number of objects to contain.
+	///     bEnableOverflow        Whether or not we should use the global heap if our object pool is exhausted.
+	///     Compare                Compare function/object for set ordering.
+	///     OverflowAllocator              Overflow allocator, which is only used if bEnableOverflow == true. Defaults to the global heap.
+	///
+	template <typename Key, size_t nodeCount, bool bEnableOverflow = true, typename Compare = eastl::less<Key>, typename OverflowAllocator = EASTLAllocatorType>
+	class fixed_multiset : public multiset<Key, Compare, fixed_node_allocator<sizeof(typename multiset<Key>::node_type), 
+										   nodeCount, EASTL_ALIGN_OF(Key), 0, bEnableOverflow, OverflowAllocator> >
+	{
+	public:
+		typedef fixed_node_allocator<sizeof(typename multiset<Key>::node_type), nodeCount, 
+					 EASTL_ALIGN_OF(Key), 0, bEnableOverflow, OverflowAllocator>                fixed_allocator_type;
+		typedef typename fixed_allocator_type::overflow_allocator_type                          overflow_allocator_type;
+		typedef multiset<Key, Compare, fixed_allocator_type>                                    base_type;
+		typedef fixed_multiset<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>     this_type;
+		typedef typename base_type::value_type                                                  value_type;
+		typedef typename base_type::node_type                                                   node_type;
+		typedef typename base_type::size_type                                                   size_type;
+
+		enum { kMaxSize = nodeCount };
+
+		using base_type::insert;
+
+	protected:
+		char mBuffer[fixed_allocator_type::kBufferSize]; // kBufferSize will take into account alignment requirements.
+
+		using base_type::mAllocator;
+
+	public:
+		fixed_multiset();
+		fixed_multiset(const overflow_allocator_type& overflowAllocator);
+		explicit fixed_multiset(const Compare& compare);
+		fixed_multiset(const this_type& x);
+		fixed_multiset(this_type&& x);
+		fixed_multiset(this_type&& x, const overflow_allocator_type& overflowAllocator);
+		fixed_multiset(std::initializer_list<value_type> ilist, const overflow_allocator_type& overflowAllocator = EASTL_FIXED_MULTISET_DEFAULT_ALLOCATOR);
+
+		template <typename InputIterator>
+		fixed_multiset(InputIterator first, InputIterator last);
+
+		this_type& operator=(const this_type& x);
+		this_type& operator=(std::initializer_list<value_type> ilist);
+		this_type& operator=(this_type&& x);
+
+		void swap(this_type& x);
+
+		void reset_lose_memory(); // This is a unilateral reset to an initially empty state. No destructors are called, no deallocation occurs.
+
+		size_type max_size() const;
+
+		const overflow_allocator_type& get_overflow_allocator() const EA_NOEXCEPT;
+		overflow_allocator_type&       get_overflow_allocator() EA_NOEXCEPT;
+		void                           set_overflow_allocator(const overflow_allocator_type& allocator);
+	}; // fixed_multiset
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// fixed_set
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename Key, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline fixed_set<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::fixed_set()
+		: base_type(fixed_allocator_type(mBuffer))
+	{
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(EASTL_FIXED_SET_DEFAULT_NAME);
+		#endif
+	}
+
+
+	template <typename Key, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline fixed_set<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::fixed_set(const overflow_allocator_type& overflowAllocator)
+		: base_type(fixed_allocator_type(mBuffer, overflowAllocator))
+	{
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(EASTL_FIXED_SET_DEFAULT_NAME);
+		#endif
+	}
+
+
+	template <typename Key, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline fixed_set<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::fixed_set(const Compare& compare)
+		: base_type(compare, fixed_allocator_type(mBuffer))
+	{
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(EASTL_FIXED_SET_DEFAULT_NAME);
+		#endif
+	}
+
+
+	template <typename Key, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline fixed_set<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::fixed_set(const this_type& x)
+		: base_type(x.get_compare(), fixed_allocator_type(mBuffer))
+	{
+		mAllocator.copy_overflow_allocator(x.mAllocator);
+
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(x.mAllocator.get_name());
+		#endif
+
+		base_type::operator=(x);
+	}
+
+
+	template <typename Key, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline fixed_set<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::fixed_set(this_type&& x)
+		: base_type(x.get_compare(), fixed_allocator_type(mBuffer))
+	{
+		mAllocator.copy_overflow_allocator(x.mAllocator);
+
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(x.mAllocator.get_name());
+		#endif
+
+		base_type::operator=(x);
+	}
+
+
+	template <typename Key, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline fixed_set<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::fixed_set(this_type&& x, const overflow_allocator_type& overflowAllocator)
+		: base_type(x.get_compare(), fixed_allocator_type(mBuffer, overflowAllocator))
+	{
+		mAllocator.copy_overflow_allocator(x.mAllocator);
+
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(x.mAllocator.get_name());
+		#endif
+
+		base_type::operator=(x);
+	}
+
+
+	template <typename Key, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	fixed_set<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::fixed_set(std::initializer_list<value_type> ilist, const overflow_allocator_type& overflowAllocator)
+		: base_type(fixed_allocator_type(mBuffer, overflowAllocator))
+	{
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(EASTL_FIXED_SET_DEFAULT_NAME);
+		#endif
+
+		insert(ilist.begin(), ilist.end());
+	}
+
+
+	template <typename Key, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	template <typename InputIterator>
+	fixed_set<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::fixed_set(InputIterator first, InputIterator last)
+		: base_type(fixed_allocator_type(mBuffer))
+	{
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(EASTL_FIXED_SET_DEFAULT_NAME);
+		#endif
+
+		insert(first, last);
+	}
+
+
+	template <typename Key, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline typename fixed_set<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::this_type& 
+	fixed_set<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::operator=(const this_type& x)
+	{
+		base_type::operator=(x);
+		return *this;
+	}
+
+
+	template <typename Key, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline typename fixed_set<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::this_type&
+	fixed_set<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::operator=(std::initializer_list<value_type> ilist)
+	{
+		base_type::clear();
+		insert(ilist.begin(), ilist.end());
+		return *this;
+	}
+
+
+	template <typename Key, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline typename fixed_set<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::this_type&
+	fixed_set<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::operator=(this_type&& x)
+	{
+		base_type::operator=(x);
+		return *this;
+	}
+
+
+	template <typename Key, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline void fixed_set<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::swap(this_type& x)
+	{
+		// Fixed containers use a special swap that can deal with excessively large buffers.
+		eastl::fixed_swap(*this, x);
+	}
+
+
+	template <typename Key, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline void fixed_set<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::reset_lose_memory()
+	{
+		base_type::reset_lose_memory();
+		base_type::get_allocator().reset(mBuffer);
+	}
+
+
+	template <typename Key, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline typename fixed_set<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::size_type 
+	fixed_set<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::max_size() const
+	{
+		return kMaxSize;
+	}
+
+
+	template <typename Key, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline const typename fixed_set<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::overflow_allocator_type& 
+	fixed_set<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::get_overflow_allocator() const EA_NOEXCEPT
+	{
+		return mAllocator.get_overflow_allocator();
+	}
+
+
+	template <typename Key, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline typename fixed_set<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::overflow_allocator_type& 
+	fixed_set<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::get_overflow_allocator() EA_NOEXCEPT
+	{
+		return mAllocator.get_overflow_allocator();
+	}
+
+
+	template <typename Key, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline void fixed_set<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::set_overflow_allocator(const overflow_allocator_type& allocator)
+	{
+		mAllocator.set_overflow_allocator(allocator);
+	}
+
+
+	///////////////////////////////////////////////////////////////////////
+	// global operators
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename Key, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline void swap(fixed_set<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>& a, 
+					 fixed_set<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>& b)
+	{
+		// Fixed containers use a special swap that can deal with excessively large buffers.
+		eastl::fixed_swap(a, b);
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// fixed_multiset
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename Key, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline fixed_multiset<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::fixed_multiset()
+		: base_type(fixed_allocator_type(mBuffer))
+	{
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(EASTL_FIXED_MULTISET_DEFAULT_NAME);
+		#endif
+	}
+
+
+	template <typename Key, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline fixed_multiset<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::fixed_multiset(const overflow_allocator_type& overflowAllocator)
+		: base_type(fixed_allocator_type(mBuffer, overflowAllocator))
+	{
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(EASTL_FIXED_MULTISET_DEFAULT_NAME);
+		#endif
+	}
+
+
+	template <typename Key, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline fixed_multiset<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::fixed_multiset(const Compare& compare)
+		: base_type(compare, fixed_allocator_type(mBuffer))
+	{
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(EASTL_FIXED_MULTISET_DEFAULT_NAME);
+		#endif
+	}
+
+
+	template <typename Key, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline fixed_multiset<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::fixed_multiset(const this_type& x)
+		: base_type(x.get_compare(), fixed_allocator_type(mBuffer))
+	{
+		mAllocator.copy_overflow_allocator(x.mAllocator);
+
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(x.mAllocator.get_name());
+		#endif
+
+		base_type::operator=(x);
+	}
+
+
+	template <typename Key, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline fixed_multiset<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::fixed_multiset(this_type&& x)
+		: base_type(x.get_compare(), fixed_allocator_type(mBuffer))
+	{
+		mAllocator.copy_overflow_allocator(x.mAllocator);
+
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(x.mAllocator.get_name());
+		#endif
+
+		base_type::operator=(x);
+	}
+
+
+	template <typename Key, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline fixed_multiset<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::fixed_multiset(this_type&& x, const overflow_allocator_type& overflowAllocator)
+		: base_type(x.get_compare(), fixed_allocator_type(mBuffer, overflowAllocator))
+	{
+		mAllocator.copy_overflow_allocator(x.mAllocator);
+
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(x.mAllocator.get_name());
+		#endif
+
+		base_type::operator=(x);
+	}
+
+
+	template <typename Key, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	fixed_multiset<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::fixed_multiset(std::initializer_list<value_type> ilist, const overflow_allocator_type& overflowAllocator)
+		: base_type(fixed_allocator_type(mBuffer, overflowAllocator))
+	{
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(EASTL_FIXED_MULTISET_DEFAULT_NAME);
+		#endif
+
+		insert(ilist.begin(), ilist.end());
+	}
+
+
+	template <typename Key, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	template <typename InputIterator>
+	fixed_multiset<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::fixed_multiset(InputIterator first, InputIterator last)
+		: base_type(fixed_allocator_type(mBuffer))
+	{
+		#if EASTL_NAME_ENABLED
+			mAllocator.set_name(EASTL_FIXED_MULTISET_DEFAULT_NAME);
+		#endif
+
+		insert(first, last);
+	}
+
+
+	template <typename Key, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline typename fixed_multiset<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::this_type& 
+	fixed_multiset<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::operator=(const this_type& x)
+	{
+		base_type::operator=(x);
+		return *this;
+	}
+
+
+	template <typename Key, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline typename fixed_multiset<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::this_type&
+	fixed_multiset<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::operator=(std::initializer_list<value_type> ilist)
+	{
+		base_type::clear();
+		insert(ilist.begin(), ilist.end());
+		return *this;
+	}
+
+
+	template <typename Key, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline typename fixed_multiset<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::this_type&
+	fixed_multiset<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::operator=(this_type&& x)
+	{
+		base_type::operator=(x);
+		return *this;
+	}
+
+
+	template <typename Key, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline void fixed_multiset<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::swap(this_type& x)
+	{
+		// Fixed containers use a special swap that can deal with excessively large buffers.
+		eastl::fixed_swap(*this, x);
+	}
+
+
+	template <typename Key, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline void fixed_multiset<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::reset_lose_memory()
+	{
+		base_type::reset_lose_memory();
+		base_type::get_allocator().reset(mBuffer);
+	}
+
+
+	template <typename Key, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline typename fixed_multiset<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::size_type 
+	fixed_multiset<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::max_size() const
+	{
+		return kMaxSize;
+	}
+
+
+	template <typename Key, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline const typename fixed_multiset<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::overflow_allocator_type& 
+	fixed_multiset<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::get_overflow_allocator() const EA_NOEXCEPT
+	{
+		return mAllocator.get_overflow_allocator();
+	}
+
+
+	template <typename Key, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline typename fixed_multiset<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::overflow_allocator_type& 
+	fixed_multiset<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::get_overflow_allocator() EA_NOEXCEPT
+	{
+		return mAllocator.get_overflow_allocator();
+	}
+
+
+	template <typename Key, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline void fixed_multiset<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>::set_overflow_allocator(const overflow_allocator_type& allocator)
+	{
+		mAllocator.set_overflow_allocator(allocator);
+	}
+
+
+	///////////////////////////////////////////////////////////////////////
+	// global operators
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename Key, size_t nodeCount, bool bEnableOverflow, typename Compare, typename OverflowAllocator>
+	inline void swap(fixed_multiset<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>& a, 
+					 fixed_multiset<Key, nodeCount, bEnableOverflow, Compare, OverflowAllocator>& b)
+	{
+		// Fixed containers use a special swap that can deal with excessively large buffers.
+		eastl::fixed_swap(a, b);
+	}
+
+
+
+} // namespace eastl
+
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/fixed_slist.h b/libkram/eastl/include/EASTL/fixed_slist.h
new file mode 100644
index 00000000..85a7a7b3
--- /dev/null
+++ b/libkram/eastl/include/EASTL/fixed_slist.h
@@ -0,0 +1,389 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// This file implements an slist which uses a fixed size memory pool for its nodes. 
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_FIXED_SLIST_H
+#define EASTL_FIXED_SLIST_H
+
+
+#include <EASTL/slist.h>
+#include <EASTL/internal/fixed_pool.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+	/// EASTL_FIXED_SLIST_DEFAULT_NAME
+	///
+	/// Defines a default container name in the absence of a user-provided name.
+	/// In the case of fixed-size containers, the allocator name always refers
+	/// to overflow allocations. 
+	///
+	#ifndef EASTL_FIXED_SLIST_DEFAULT_NAME
+		#define EASTL_FIXED_SLIST_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " fixed_slist" // Unless the user overrides something, this is "EASTL fixed_slist".
+	#endif
+
+
+	/// EASTL_FIXED_SLIST_DEFAULT_ALLOCATOR
+	///
+	#ifndef EASTL_FIXED_SLIST_DEFAULT_ALLOCATOR
+		#define EASTL_FIXED_SLIST_DEFAULT_ALLOCATOR overflow_allocator_type(EASTL_FIXED_SLIST_DEFAULT_NAME)
+	#endif
+
+
+
+	/// fixed_slist
+	///
+	/// fixed_slist is an slist which uses a single block of contiguous memory 
+	/// for its nodes. The purpose of this is to reduce memory usage relative
+	/// to a conventional memory allocation system (with block headers), to 
+	/// increase allocation speed (often due to avoidance of mutex locks),
+	/// to increase performance (due to better memory locality), and to decrease
+	/// memory fragmentation due to the way that fixed block allocators work.
+	///
+	/// The primary downside to a fixed_slist is that the number of nodes it
+	/// can contain is fixed upon its declaration. If you want a fixed_slist
+	/// that doesn't have this limitation, then you probably don't want a
+	/// fixed_slist. You can always create your own memory allocator that works
+	/// the way you want.
+	///
+	/// Template parameters:
+	///     T                      The type of object the slist holds.
+	///     nodeCount              The max number of objects to contain.
+	///     bEnableOverflow        Whether or not we should use the overflow heap if our object pool is exhausted.
+	///     OverflowAllocator      Overflow allocator, which is only used if bEnableOverflow == true. Defaults to the global heap.
+	///
+	template <typename T, size_t nodeCount, bool bEnableOverflow = true, typename OverflowAllocator = EASTLAllocatorType>
+	class fixed_slist : public slist<T, fixed_node_allocator<sizeof(typename slist<T>::node_type), 
+									 nodeCount, EASTL_ALIGN_OF(T), 0, bEnableOverflow, OverflowAllocator> >
+	{
+	public:
+		typedef fixed_node_allocator<sizeof(typename slist<T>::node_type), nodeCount, 
+					 EASTL_ALIGN_OF(T), 0, bEnableOverflow, OverflowAllocator> fixed_allocator_type;
+		typedef OverflowAllocator                                              overflow_allocator_type;
+		typedef slist<T, fixed_allocator_type>                                 base_type;
+		typedef fixed_slist<T, nodeCount, bEnableOverflow, OverflowAllocator>  this_type;
+		typedef typename base_type::size_type                                  size_type;
+		typedef typename base_type::value_type                                 value_type;
+		typedef typename base_type::node_type                                  node_type;
+
+		enum { kMaxSize = nodeCount };
+
+		using base_type::assign;
+		using base_type::resize;
+		using base_type::size;
+
+	protected:
+		char mBuffer[fixed_allocator_type::kBufferSize]; // kBufferSize will take into account alignment requirements.
+
+		using base_type::internalAllocator;
+
+	public:
+		fixed_slist();
+		explicit fixed_slist(const overflow_allocator_type& overflowAllocator);             // Only applicable if bEnableOverflow is true.
+		explicit fixed_slist(size_type n);                                                  // Currently we don't support overflowAllocator specification for other constructors, for simplicity.
+		fixed_slist(size_type n, const value_type& value);
+		fixed_slist(const this_type& x);
+		fixed_slist(this_type&& x);
+		fixed_slist(this_type&&, const overflow_allocator_type&);
+		fixed_slist(std::initializer_list<value_type> ilist, const overflow_allocator_type& overflowAllocator = EASTL_FIXED_SLIST_DEFAULT_ALLOCATOR);
+
+		template <typename InputIterator>
+		fixed_slist(InputIterator first, InputIterator last);
+
+		this_type& operator=(const this_type& x);
+		this_type& operator=(std::initializer_list<value_type> ilist);
+		this_type& operator=(this_type&& x);
+
+		void      swap(this_type& x);
+		void      reset_lose_memory();      // This is a unilateral reset to an initially empty state. No destructors are called, no deallocation occurs.
+		size_type max_size() const;         // Returns the max fixed size, which is the user-supplied nodeCount parameter.
+		bool      full() const;             // Returns true if the fixed space has been fully allocated. Note that if overflow is enabled, the container size can be greater than nodeCount but full() could return true because the fixed space may have a recently freed slot. 
+		bool      has_overflowed() const;   // Returns true if the allocations spilled over into the overflow allocator. Meaningful only if overflow is enabled.
+		bool      can_overflow() const;     // Returns the value of the bEnableOverflow template parameter.
+
+		// OverflowAllocator
+		const overflow_allocator_type& get_overflow_allocator() const EA_NOEXCEPT;
+		overflow_allocator_type&       get_overflow_allocator() EA_NOEXCEPT;
+		void                           set_overflow_allocator(const overflow_allocator_type& allocator);
+	}; // fixed_slist
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// slist
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline fixed_slist<T, nodeCount, bEnableOverflow, OverflowAllocator>::fixed_slist()
+		: base_type(fixed_allocator_type(mBuffer))
+	{
+		#if EASTL_NAME_ENABLED
+			internalAllocator().set_name(EASTL_FIXED_SLIST_DEFAULT_NAME);
+		#endif
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline fixed_slist<T, nodeCount, bEnableOverflow, OverflowAllocator>::fixed_slist(const overflow_allocator_type& overflowAllocator)
+		: base_type(fixed_allocator_type(mBuffer, overflowAllocator))
+	{
+		#if EASTL_NAME_ENABLED
+			internalAllocator().set_name(EASTL_FIXED_SLIST_DEFAULT_NAME);
+		#endif
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline fixed_slist<T, nodeCount, bEnableOverflow, OverflowAllocator>::fixed_slist(size_type n)
+		: base_type(fixed_allocator_type(mBuffer))
+	{
+		#if EASTL_NAME_ENABLED
+			internalAllocator().set_name(EASTL_FIXED_SLIST_DEFAULT_NAME);
+		#endif
+
+		resize(n);
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline fixed_slist<T, nodeCount, bEnableOverflow, OverflowAllocator>::fixed_slist(size_type n, const value_type& value)
+		: base_type(fixed_allocator_type(mBuffer))
+	{
+		#if EASTL_NAME_ENABLED
+			internalAllocator().set_name(EASTL_FIXED_SLIST_DEFAULT_NAME);
+		#endif
+
+		resize(n, value);
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline fixed_slist<T, nodeCount, bEnableOverflow, OverflowAllocator>::fixed_slist(const this_type& x)
+		: base_type(fixed_allocator_type(mBuffer))
+	{
+		internalAllocator().copy_overflow_allocator(x.internalAllocator());
+
+		#if EASTL_NAME_ENABLED
+			internalAllocator().set_name(x.internalAllocator().get_name());
+		#endif
+
+		assign(x.begin(), x.end());
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline fixed_slist<T, nodeCount, bEnableOverflow, OverflowAllocator>::fixed_slist(this_type&& x)
+		: base_type(fixed_allocator_type(mBuffer))
+	{
+		// Since we are a fixed_list, we can't normally swap pointers unless both this and 
+		// x are using using overflow and the overflow allocators are equal. To do:
+		//if(has_overflowed() && x.has_overflowed() && (get_overflow_allocator() == x.get_overflow_allocator()))
+		//{
+		//    We can swap contents and may need to swap the allocators as well.
+		//}
+
+		// The following is currently identical to the fixed_vector(const this_type& x) code above. If it stays that
+		// way then we may want to make a shared implementation.
+		internalAllocator().copy_overflow_allocator(x.internalAllocator());
+
+		#if EASTL_NAME_ENABLED
+			internalAllocator().set_name(x.internalAllocator().get_name());
+		#endif
+
+		assign(x.begin(), x.end());
+	}
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline fixed_slist<T, nodeCount, bEnableOverflow, OverflowAllocator>::fixed_slist(this_type&& x, const overflow_allocator_type& overflowAllocator)
+		: base_type(fixed_allocator_type(mBuffer, overflowAllocator))
+	{
+		// See comments above.
+		internalAllocator().copy_overflow_allocator(x.internalAllocator());
+
+		#if EASTL_NAME_ENABLED
+			internalAllocator().set_name(x.internalAllocator().get_name());
+		#endif
+
+		assign(x.begin(), x.end());
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline fixed_slist<T, nodeCount, bEnableOverflow, OverflowAllocator>::fixed_slist(std::initializer_list<value_type> ilist, const overflow_allocator_type& overflowAllocator)
+		: base_type(fixed_allocator_type(mBuffer, overflowAllocator))
+	{
+		#if EASTL_NAME_ENABLED
+			internalAllocator().set_name(EASTL_FIXED_SLIST_DEFAULT_NAME);
+		#endif
+
+		assign(ilist.begin(), ilist.end());
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	template <typename InputIterator>
+	fixed_slist<T, nodeCount, bEnableOverflow, OverflowAllocator>::fixed_slist(InputIterator first, InputIterator last)
+		: base_type(fixed_allocator_type(mBuffer))
+	{
+		#if EASTL_NAME_ENABLED
+			internalAllocator().set_name(EASTL_FIXED_SLIST_DEFAULT_NAME);
+		#endif
+
+		assign(first, last);
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline typename fixed_slist<T, nodeCount, bEnableOverflow, OverflowAllocator>::this_type&
+	fixed_slist<T, nodeCount, bEnableOverflow, OverflowAllocator>::operator=(const this_type& x)
+	{
+		if(this != &x)
+		{
+			base_type::clear();
+
+			#if EASTL_ALLOCATOR_COPY_ENABLED
+				internalAllocator() = x.internalAllocator(); // The primary effect of this is to copy the overflow allocator.
+			#endif
+
+			base_type::assign(x.begin(), x.end()); // It would probably be better to implement this like slist::operator=.
+		}
+		return *this;
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline typename fixed_slist<T, nodeCount, bEnableOverflow, OverflowAllocator>::this_type&
+	fixed_slist<T, nodeCount, bEnableOverflow, OverflowAllocator>::operator=(this_type&& x)
+	{
+		return operator=(x);
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline typename fixed_slist<T, nodeCount, bEnableOverflow, OverflowAllocator>::this_type&
+	fixed_slist<T, nodeCount, bEnableOverflow, OverflowAllocator>::operator=(std::initializer_list<value_type> ilist)
+	{
+		base_type::clear();
+		base_type::assign(ilist.begin(), ilist.end());
+		return *this;
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline void fixed_slist<T, nodeCount, bEnableOverflow, OverflowAllocator>::swap(this_type& x)
+	{
+		// Fixed containers use a special swap that can deal with excessively large buffers.
+		eastl::fixed_swap(*this, x);
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline void fixed_slist<T, nodeCount, bEnableOverflow, OverflowAllocator>::reset_lose_memory()
+	{
+		base_type::reset_lose_memory();
+		base_type::get_allocator().reset(mBuffer);
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline typename fixed_slist<T, nodeCount, bEnableOverflow, OverflowAllocator>::size_type
+	fixed_slist<T, nodeCount, bEnableOverflow, OverflowAllocator>::max_size() const
+	{
+		return kMaxSize;
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline bool fixed_slist<T, nodeCount, bEnableOverflow, OverflowAllocator>::full() const
+	{
+		// Note: This implementation isn't right in the case of bEnableOverflow = true because it will return 
+		// false for the case that  there are free nodes from the buffer but also nodes from the dynamic heap. 
+		// This can happen if the container exceeds the fixed size and then frees some of the nodes from the fixed buffer.
+		return !internalAllocator().can_allocate(); // This is the quickest way of detecting this. has_overflowed uses a different method because it can't use this quick method.
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline bool fixed_slist<T, nodeCount, bEnableOverflow, OverflowAllocator>::has_overflowed() const
+	{
+		#if EASTL_FIXED_SIZE_TRACKING_ENABLED // If we can use this faster pathway (as size() may be slow)...
+			return (internalAllocator().mPool.mnPeakSize > kMaxSize);
+		#else
+			return (size() > kMaxSize);
+		#endif
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline bool fixed_slist<T, nodeCount, bEnableOverflow, OverflowAllocator>::can_overflow() const
+	{
+		return bEnableOverflow;
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline const typename fixed_slist<T, nodeCount, bEnableOverflow, OverflowAllocator>::overflow_allocator_type& 
+	fixed_slist<T, nodeCount, bEnableOverflow, OverflowAllocator>::get_overflow_allocator() const EA_NOEXCEPT
+	{
+		return internalAllocator().get_overflow_allocator();
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline typename fixed_slist<T, nodeCount, bEnableOverflow, OverflowAllocator>::overflow_allocator_type& 
+	fixed_slist<T, nodeCount, bEnableOverflow, OverflowAllocator>::get_overflow_allocator() EA_NOEXCEPT
+	{
+		return internalAllocator().get_overflow_allocator();
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline void 
+	fixed_slist<T, nodeCount, bEnableOverflow, OverflowAllocator>::set_overflow_allocator(const overflow_allocator_type& allocator)
+	{
+		internalAllocator().set_overflow_allocator(allocator);
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// global operators
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline void swap(fixed_slist<T, nodeCount, bEnableOverflow, OverflowAllocator>& a, 
+					 fixed_slist<T, nodeCount, bEnableOverflow, OverflowAllocator>& b)
+	{
+		// Fixed containers use a special swap that can deal with excessively large buffers.
+		eastl::fixed_swap(a, b);
+	}
+
+
+
+} // namespace eastl
+
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/fixed_string.h b/libkram/eastl/include/EASTL/fixed_string.h
new file mode 100644
index 00000000..f646302b
--- /dev/null
+++ b/libkram/eastl/include/EASTL/fixed_string.h
@@ -0,0 +1,805 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// This file implements a string which uses a fixed size memory pool.
+// The bEnableOverflow template parameter allows the container to resort to
+// heap allocations if the memory pool is exhausted.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_FIXED_STRING_H
+#define EASTL_FIXED_STRING_H
+
+#include <EASTL/internal/config.h>
+#include <EASTL/string.h>
+#include <EASTL/internal/fixed_pool.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+namespace eastl
+{
+	/// EASTL_FIXED_STRING_DEFAULT_NAME
+	///
+	/// Defines a default container name in the absence of a user-provided name.
+	/// In the case of fixed-size containers, the allocator name always refers
+	/// to overflow allocations.
+	///
+	#ifndef EASTL_FIXED_STRING_DEFAULT_NAME
+		#define EASTL_FIXED_STRING_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " fixed_string" // Unless the user overrides something, this is "EASTL fixed_string".
+	#endif
+
+
+
+	/// fixed_string
+	///
+	/// A fixed_string with bEnableOverflow == true is identical to a regular
+	/// string in terms of its behavior. All the expectations of regular string
+	/// apply to it and no additional expectations come from it. When bEnableOverflow
+	/// is false, fixed_string behaves like regular string with the exception that
+	/// its capacity can never increase. All operations you do on such a fixed_string
+	/// which require a capacity increase will result in undefined behavior or an
+	/// C++ allocation exception, depending on the configuration of EASTL.
+	///
+	/// Note: The nodeCount value is the amount of characters to allocate, which needs to
+	/// take into account a terminating zero. Thus if you want to store strings with a strlen
+	/// of 30, the nodeCount value must be at least 31.
+	///
+	/// Template parameters:
+	///     T                      The type of object the string holds (char, wchar_t, char8_t, char16_t, char32_t).
+	///     nodeCount              The max number of objects to contain.
+	///     bEnableOverflow        Whether or not we should use the overflow heap if our object pool is exhausted.
+	///     OverflowAllocator              Overflow allocator, which is only used if bEnableOverflow == true. Defaults to the global heap.
+	///
+	/// Notes:
+	///     The nodeCount value must be at least 2, one for a character and one for a terminating 0.
+	///
+	///     As of this writing, the string class necessarily reallocates when an insert of
+	///     self is done into self. As a result, the fixed_string class doesn't support
+	///     inserting self into self unless the bEnableOverflow template parameter is true.
+	///
+	/// Example usage:
+	///    fixed_string<char, 128 + 1, true> fixedString("hello world"); // Can hold up to a strlen of 128.
+	///
+	///    fixedString = "hola mundo";
+	///    fixedString.clear();
+	///    fixedString.resize(200);
+	///    fixedString.sprintf("%f", 1.5f);
+	///
+	template <typename T, int nodeCount, bool bEnableOverflow = true, typename OverflowAllocator = EASTLAllocatorType>
+	class fixed_string : public basic_string<T, fixed_vector_allocator<sizeof(T), nodeCount, EASTL_ALIGN_OF(T), 0, bEnableOverflow, OverflowAllocator> >
+	{
+	public:
+		typedef fixed_vector_allocator<sizeof(T), nodeCount, EASTL_ALIGN_OF(T),
+							0, bEnableOverflow, OverflowAllocator>              fixed_allocator_type;
+		typedef typename fixed_allocator_type::overflow_allocator_type          overflow_allocator_type;
+		typedef basic_string<T, fixed_allocator_type>                           base_type;
+		typedef fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>  this_type;
+		typedef typename base_type::size_type                                   size_type;
+		typedef typename base_type::value_type                                  value_type;
+		typedef typename base_type::CtorDoNotInitialize                         CtorDoNotInitialize;
+		typedef typename base_type::CtorSprintf                                 CtorSprintf;
+		typedef aligned_buffer<nodeCount * sizeof(T), EASTL_ALIGN_OF(T)>        aligned_buffer_type;
+
+		enum { kMaxSize = nodeCount - 1 }; // -1 because we need to save one element for the silent terminating null.
+
+		using base_type::npos;
+		using base_type::mPair;
+		using base_type::append;
+		using base_type::resize;
+		using base_type::clear;
+		using base_type::capacity;
+		using base_type::size;
+		using base_type::sprintf_va_list;
+		using base_type::DoAllocate;
+		using base_type::DoFree;
+		using base_type::internalLayout;
+		using base_type::get_allocator;
+
+	protected:
+		union // We define a union in order to avoid strict pointer aliasing issues with compilers like GCC.
+		{
+			value_type          mArray[1];
+			aligned_buffer_type mBuffer;     // Question: Why are we doing this aligned_buffer thing? Why not just do an array of value_type, given that we are using just strings of char types.
+		};
+
+	public:
+		fixed_string();
+		explicit fixed_string(const overflow_allocator_type& overflowAllocator);             // Only applicable if bEnableOverflow is true.
+		fixed_string(const base_type& x, size_type position, size_type n = base_type::npos); // Currently we don't support overflowAllocator specification for other constructors, for simplicity.
+		fixed_string(const value_type* p, size_type n);
+		fixed_string(const value_type* p);
+		fixed_string(size_type n, const value_type& value);
+		fixed_string(const this_type& x);
+		fixed_string(const this_type& x, const overflow_allocator_type& overflowAllocator);
+		fixed_string(const base_type& x);
+		fixed_string(const value_type* pBegin, const value_type* pEnd);
+		fixed_string(CtorDoNotInitialize, size_type n);
+		fixed_string(CtorSprintf, const value_type* pFormat, ...);
+		fixed_string(std::initializer_list<T> ilist, const overflow_allocator_type& overflowAllocator);
+		fixed_string(this_type&& x);
+		fixed_string(this_type&& x, const overflow_allocator_type& overflowAllocator);
+
+		this_type& operator=(const this_type& x);
+		this_type& operator=(const base_type& x);
+		this_type& operator=(const value_type* p);
+		this_type& operator=(const value_type c);
+		this_type& operator=(std::initializer_list<T> ilist);
+		this_type& operator=(this_type&& x);
+
+		void swap(this_type& x);
+
+		void      set_capacity(size_type n);
+		void      reset_lose_memory();          // This is a unilateral reset to an initially empty state. No destructors are called, no deallocation occurs.
+		size_type max_size() const;
+		bool      full() const;                 // Returns true if the fixed space has been fully allocated. Note that if overflow is enabled, the container size can be greater than nodeCount but full() could return true because the fixed space may have a recently freed slot.
+		bool      has_overflowed() const;       // Returns true if the allocations spilled over into the overflow allocator. Meaningful only if overflow is enabled.
+		bool      can_overflow() const;         // Returns the value of the bEnableOverflow template parameter.
+
+		// The inherited versions of substr/left/right call the basic_string constructor,
+		// which will call the overflow allocator and fail if bEnableOverflow == false
+		this_type substr(size_type position, size_type n) const;
+		this_type left(size_type n) const;
+		this_type right(size_type n) const;
+
+		// OverflowAllocator
+		const overflow_allocator_type& get_overflow_allocator() const EA_NOEXCEPT;
+		overflow_allocator_type&       get_overflow_allocator() EA_NOEXCEPT;
+		void                           set_overflow_allocator(const overflow_allocator_type& allocator);
+	}; // fixed_string
+
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// fixed_string
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::fixed_string()
+		: base_type(fixed_allocator_type(mBuffer.buffer))
+	{
+		#if EASTL_NAME_ENABLED
+			get_allocator().set_name(EASTL_FIXED_STRING_DEFAULT_NAME);
+		#endif
+
+		internalLayout().SetHeapBeginPtr(mArray);
+		internalLayout().SetHeapCapacity(nodeCount - 1);
+		internalLayout().SetHeapSize(0);
+
+		*internalLayout().HeapBeginPtr() = 0;
+	}
+
+
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::fixed_string(const overflow_allocator_type& overflowAllocator)
+		: base_type(fixed_allocator_type(mBuffer.buffer, overflowAllocator))
+	{
+		#if EASTL_NAME_ENABLED
+			get_allocator().set_name(EASTL_FIXED_STRING_DEFAULT_NAME);
+		#endif
+
+	   internalLayout().SetHeapBeginPtr(mArray);
+	   internalLayout().SetHeapCapacity(nodeCount - 1);
+	   internalLayout().SetHeapSize(0);
+
+	   *internalLayout().HeapBeginPtr() = 0;
+	}
+
+
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::fixed_string(const this_type& x)
+		: base_type(fixed_allocator_type(mBuffer.buffer))
+	{
+		get_allocator().copy_overflow_allocator(x.get_allocator());
+
+		#if EASTL_NAME_ENABLED
+			get_allocator().set_name(x.get_allocator().get_name());
+		#endif
+
+		internalLayout().SetHeapBeginPtr(mArray);
+		internalLayout().SetHeapCapacity(nodeCount - 1);
+		internalLayout().SetHeapSize(0);
+
+		*internalLayout().HeapBeginPtr() = 0;
+
+		append(x);
+	}
+
+
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::fixed_string(const this_type& x, const overflow_allocator_type& overflowAllocator)
+		: base_type(fixed_allocator_type(mBuffer.buffer, overflowAllocator))
+	{
+		get_allocator().copy_overflow_allocator(x.get_allocator());
+
+		#if EASTL_NAME_ENABLED
+			get_allocator().set_name(x.get_allocator().get_name());
+		#endif
+
+		internalLayout().SetHeapBeginPtr(mArray);
+		internalLayout().SetHeapCapacity(nodeCount - 1);
+		internalLayout().SetHeapSize(0);
+
+		*internalLayout().HeapBeginPtr() = 0;
+
+		append(x);
+	}
+
+
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::fixed_string(const base_type& x)
+		: base_type(fixed_allocator_type(mBuffer.buffer))
+	{
+		#if EASTL_NAME_ENABLED
+			get_allocator().set_name(x.get_allocator().get_name());
+		#endif
+
+		internalLayout().SetHeapBeginPtr(mArray);
+		internalLayout().SetHeapCapacity(nodeCount - 1);
+		internalLayout().SetHeapSize(0);
+
+		*internalLayout().HeapBeginPtr() = 0;
+
+		append(x);
+	}
+
+
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::fixed_string(const base_type& x, size_type position, size_type n)
+		: base_type(fixed_allocator_type(mBuffer.buffer))
+	{
+		#if EASTL_NAME_ENABLED
+			get_allocator().set_name(x.get_allocator().get_name());
+		#endif
+
+		internalLayout().SetHeapBeginPtr(mArray);
+		internalLayout().SetHeapCapacity(nodeCount - 1);
+		internalLayout().SetHeapSize(0);
+
+		*internalLayout().HeapBeginPtr() = 0;
+
+		append(x, position, n);
+	}
+
+
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::fixed_string(const value_type* p, size_type n)
+		: base_type(fixed_allocator_type(mBuffer.buffer))
+	{
+		#if EASTL_NAME_ENABLED
+			get_allocator().set_name(EASTL_FIXED_STRING_DEFAULT_NAME);
+		#endif
+
+	   internalLayout().SetHeapBeginPtr(mArray);
+	   internalLayout().SetHeapCapacity(nodeCount - 1);
+	   internalLayout().SetHeapSize(0);
+
+	   *internalLayout().HeapBeginPtr() = 0;
+
+		append(p, n);
+	}
+
+
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::fixed_string(const value_type* p)
+		: base_type(fixed_allocator_type(mBuffer.buffer))
+	{
+		#if EASTL_NAME_ENABLED
+			get_allocator().set_name(EASTL_FIXED_STRING_DEFAULT_NAME);
+		#endif
+
+		internalLayout().SetHeapBeginPtr(mArray);
+		internalLayout().SetHeapCapacity(nodeCount - 1);
+		internalLayout().SetHeapSize(0);
+
+		*internalLayout().HeapBeginPtr() = 0;
+
+		append(p); // There better be enough space to hold the assigned string.
+	}
+
+
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::fixed_string(size_type n, const value_type& value)
+		: base_type(fixed_allocator_type(mBuffer.buffer))
+	{
+		#if EASTL_NAME_ENABLED
+			get_allocator().set_name(EASTL_FIXED_STRING_DEFAULT_NAME);
+		#endif
+
+	   internalLayout().SetHeapBeginPtr(mArray);
+	   internalLayout().SetHeapCapacity(nodeCount - 1);
+	   internalLayout().SetHeapSize(0);
+
+	   *internalLayout().HeapBeginPtr() = 0;
+
+		append(n, value); // There better be enough space to hold the assigned string.
+	}
+
+
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::fixed_string(const value_type* pBegin, const value_type* pEnd)
+		: base_type(fixed_allocator_type(mBuffer.buffer))
+	{
+		#if EASTL_NAME_ENABLED
+			get_allocator().set_name(EASTL_FIXED_STRING_DEFAULT_NAME);
+		#endif
+
+		internalLayout().SetHeapBeginPtr(mArray);
+		internalLayout().SetHeapCapacity(nodeCount - 1);
+		internalLayout().SetHeapSize(0);
+
+		*internalLayout().HeapBeginPtr() = 0;
+
+		append(pBegin, pEnd);
+	}
+
+
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::fixed_string(CtorDoNotInitialize, size_type n)
+		: base_type(fixed_allocator_type(mBuffer.buffer))
+	{
+		#if EASTL_NAME_ENABLED
+			get_allocator().set_name(EASTL_FIXED_STRING_DEFAULT_NAME);
+		#endif
+
+		internalLayout().SetHeapBeginPtr(mArray);
+		internalLayout().SetHeapCapacity(nodeCount - 1);
+
+		if(n < nodeCount)
+		{
+			internalLayout().SetHeapSize(n);
+			*internalLayout().HeapEndPtr() = 0;
+		}
+		else
+		{
+			internalLayout().SetHeapSize(0);
+			*internalLayout().HeapEndPtr() = 0;
+
+			resize(n);
+		}
+	}
+
+
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::fixed_string(CtorSprintf, const value_type* pFormat, ...)
+		: base_type(fixed_allocator_type(mBuffer.buffer))
+	{
+		#if EASTL_NAME_ENABLED
+			get_allocator().set_name(EASTL_FIXED_STRING_DEFAULT_NAME);
+		#endif
+
+		internalLayout().SetHeapBeginPtr(mArray);
+		internalLayout().SetHeapCapacity(nodeCount - 1);
+		internalLayout().SetHeapSize(0);
+		*internalLayout().HeapBeginPtr() = 0;
+
+		va_list arguments;
+		va_start(arguments, pFormat);
+		sprintf_va_list(pFormat, arguments);
+		va_end(arguments);
+	}
+
+
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::fixed_string(std::initializer_list<T> ilist, const overflow_allocator_type& overflowAllocator)
+		: base_type(fixed_allocator_type(mBuffer.buffer, overflowAllocator))
+	{
+		#if EASTL_NAME_ENABLED
+			get_allocator().set_name(EASTL_FIXED_STRING_DEFAULT_NAME);
+		#endif
+
+		internalLayout().SetHeapBeginPtr(mArray);
+		internalLayout().SetHeapCapacity(nodeCount - 1);
+		internalLayout().SetHeapSize(0);
+
+		*internalLayout().HeapBeginPtr() = 0;
+
+		append(ilist.begin(), ilist.end());
+	}
+
+
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::fixed_string(this_type&& x)
+		: base_type(fixed_allocator_type(mBuffer.buffer))
+	{
+		// We copy from x instead of trade with it. We need to do so because fixed_ containers use local memory buffers.
+		#if EASTL_NAME_ENABLED
+			get_allocator().set_name(x.get_allocator().get_name());
+		#endif
+
+		internalLayout().SetHeapBeginPtr(mArray);
+		internalLayout().SetHeapCapacity(nodeCount - 1);
+		internalLayout().SetHeapSize(0);
+
+		*internalLayout().HeapBeginPtr() = 0;
+
+		append(x); // Let x destruct its own items.
+	}
+
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::fixed_string(this_type&& x, const overflow_allocator_type& overflowAllocator)
+		: base_type(fixed_allocator_type(mBuffer.buffer, overflowAllocator))
+	{
+		// We copy from x instead of trade with it. We need to do so because fixed_ containers use local memory buffers.
+		#if EASTL_NAME_ENABLED
+			get_allocator().set_name(x.get_allocator().get_name());
+		#endif
+
+		internalLayout().SetHeapBeginPtr(mArray);
+		internalLayout().SetHeapCapacity(nodeCount - 1);
+		internalLayout().SetHeapSize(0);
+
+		*internalLayout().HeapBeginPtr() = 0;
+
+		append(x); // Let x destruct its own items.
+	}
+
+
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline typename fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::this_type&
+	fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::operator=(const this_type& x)
+	{
+		if(this != &x)
+		{
+			clear();
+
+			#if EASTL_ALLOCATOR_COPY_ENABLED
+				get_allocator() = x.get_allocator();
+			#endif
+
+			append(x);
+		}
+		return *this;
+	}
+
+
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline typename fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::
+	this_type& fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::operator=(const base_type& x)
+	{
+		if(static_cast<base_type*>(this) != &x)
+		{
+			clear();
+
+			#if EASTL_ALLOCATOR_COPY_ENABLED
+				get_allocator() = x.get_allocator();
+			#endif
+
+			append(x);
+		}
+		return *this;
+	}
+
+
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline typename fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::
+	this_type& fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::operator=(const value_type* p)
+	{
+		if(internalLayout().HeapBeginPtr() != p)
+		{
+			clear();
+			append(p);
+		}
+		return *this;
+	}
+
+
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline typename fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::
+	this_type& fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::operator=(const value_type c)
+	{
+		clear();
+		append((size_type)1, c);
+		return *this;
+	}
+
+
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline typename fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::
+	this_type& fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::operator=(std::initializer_list<T> ilist)
+	{
+		clear();
+		append(ilist.begin(), ilist.end());
+		return *this;
+	}
+
+
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline typename fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::
+	this_type& fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::operator=(this_type&& x)
+	{
+		// We copy from x instead of trade with it. We need to do so because fixed_ containers use local memory buffers.
+
+		// if(static_cast<base_type*>(this) != &x) This should be impossible, so we disable it until proven otherwise.
+		{
+			clear();
+
+			#if EASTL_ALLOCATOR_COPY_ENABLED
+				get_allocator() = x.get_allocator();
+			#endif
+
+			append(x); // Let x destruct its own items.
+		}
+		return *this;
+	}
+
+
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline void fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::swap(this_type& x)
+	{
+		// Fixed containers use a special swap that can deal with excessively large buffers.
+		eastl::fixed_swap(*this, x);
+	}
+
+
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline void fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::set_capacity(size_type n)
+	{
+		const size_type nPrevSize     = internalLayout().GetSize();
+		const size_type nPrevCapacity = capacity();
+
+		if(n == npos)       // If the user means to set the capacity so that it equals the size (i.e. free excess capacity)...
+			n = nPrevSize;
+
+		if(n != nPrevCapacity)  // If the request results in a capacity change...
+		{
+			const size_type allocSize = (n + 1); // +1 because the terminating 0 isn't included in the supplied capacity value. So now n refers the amount of memory we need.
+
+			if(can_overflow() && (((uintptr_t)internalLayout().HeapBeginPtr() != (uintptr_t)mBuffer.buffer) || (allocSize > kMaxSize))) // If we are or would be using dynamically allocated memory instead of our fixed-size member buffer...
+			{
+				T* const pNewData = (allocSize <= kMaxSize) ? (T*)&mBuffer.buffer[0] : DoAllocate(allocSize);
+				T* const pCopyEnd = (n < nPrevSize) ? (internalLayout().HeapBeginPtr() + n) : internalLayout().HeapEndPtr();
+				CharStringUninitializedCopy(internalLayout().HeapBeginPtr(), pCopyEnd, pNewData);  // Copy [internalLayout().heap.mpBegin, pCopyEnd) to pNewData.
+				if((uintptr_t)internalLayout().HeapBeginPtr() != (uintptr_t)mBuffer.buffer)
+					DoFree(internalLayout().HeapBeginPtr(), internalLayout().GetHeapCapacity() + 1);
+
+				internalLayout().SetHeapSize((size_type)(pCopyEnd - internalLayout().HeapBeginPtr()));
+				internalLayout().SetHeapBeginPtr(pNewData);
+				internalLayout().SetHeapCapacity(allocSize - 1);
+			} // Else the new capacity would be within our fixed buffer.
+			else if(n < nPrevSize) // If the newly requested capacity is less than our size, we do what vector::set_capacity does and resize, even though we actually aren't reducing the capacity.
+				resize(n);
+		}
+	}
+
+
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline void fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::reset_lose_memory()
+	{
+		internalLayout().SetHeapBeginPtr(mArray);
+		internalLayout().SetHeapSize(0);
+		internalLayout().SetHeapCapacity(nodeCount - 1);
+	}
+
+
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline typename fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::
+	size_type fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::max_size() const
+	{
+		return kMaxSize;
+	}
+
+
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline bool fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::full() const
+	{
+		// If size >= capacity, then we are definitely full.
+		// Also, if our size is smaller but we've switched away from mBuffer due to a previous overflow, then we are considered full.
+		return ((size_t)(internalLayout().HeapEndPtr() - internalLayout().HeapBeginPtr()) >= kMaxSize) || ((void*)internalLayout().HeapBeginPtr() != (void*)mBuffer.buffer);
+	}
+
+
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline bool fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::has_overflowed() const
+	{
+		// This will be incorrect for the case that bOverflowEnabled is true and the container was resized
+		// down to a small size where the fixed buffer could take over ownership of the data again.
+		// The only simple fix for this is to take on another member variable which tracks whether this overflow
+		// has occurred at some point in the past.
+		return ((void*)internalLayout().HeapBeginPtr() != (void*)mBuffer.buffer);
+	}
+
+
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline bool fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::can_overflow() const
+	{
+		return bEnableOverflow;
+	}
+
+
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline typename fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::
+	this_type fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::substr(size_type position, size_type n) const
+	{
+		#if EASTL_STRING_OPT_RANGE_ERRORS
+			if(position > internalLayout().GetSize())
+				base_type::ThrowRangeException();
+		#endif
+
+			return fixed_string(internalLayout().HeapBeginPtr() + position,
+								internalLayout().HeapBeginPtr() + position + eastl::min_alt(n, internalLayout().GetSize() - position));
+	}
+
+
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline typename fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::
+	this_type fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::left(size_type n) const
+	{
+		const size_type nLength = size();
+		if(n < nLength)
+			return fixed_string(internalLayout().HeapBeginPtr(), internalLayout().HeapBeginPtr() + n);
+		return *this;
+	}
+
+
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline typename fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::
+	this_type fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::right(size_type n) const
+	{
+		const size_type nLength = size();
+		if(n < nLength)
+			return fixed_string(internalLayout().HeapEndPtr() - n, internalLayout().HeapEndPtr());
+		return *this;
+	}
+
+
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline const typename fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::
+	overflow_allocator_type& fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::get_overflow_allocator() const EA_NOEXCEPT
+	{
+		return get_allocator().get_overflow_allocator();
+	}
+
+
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline typename fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::
+	overflow_allocator_type& fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::get_overflow_allocator() EA_NOEXCEPT
+	{
+		return get_allocator().get_overflow_allocator();
+	}
+
+
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline void
+	fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::set_overflow_allocator(const overflow_allocator_type& allocator)
+	{
+		get_allocator().set_overflow_allocator(allocator);
+	}
+
+
+	///////////////////////////////////////////////////////////////////////
+	// global operators
+	///////////////////////////////////////////////////////////////////////
+
+
+	// Operator +
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator> operator+(const fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>& a,
+																			 const fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>& b)
+	{
+		// We have a problem here because need to return an fixed_string by value. This will typically result in it
+		// using stack space equal to its size. That size may be too large to be workable.
+		typedef fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator> this_type;
+
+		this_type result(const_cast<this_type&>(a).get_overflow_allocator());
+		result.append(a);
+		result.append(b);
+		return result;
+	}
+
+
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator> operator+(const typename fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::value_type* p,
+																			 const fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>& b)
+	{
+		typedef fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator> this_type;
+
+		const typename this_type::size_type n = (typename this_type::size_type)CharStrlen(p);
+		this_type result(const_cast<this_type&>(b).get_overflow_allocator());
+		result.append(p, p + n);
+		result.append(b);
+		return result;
+	}
+
+
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator> operator+(typename fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::value_type c,
+																				const fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>& b)
+	{
+		typedef fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator> this_type;
+
+		this_type result(const_cast<this_type&>(b).get_overflow_allocator());
+		result.push_back(c);
+		result.append(b);
+		return result;
+	}
+
+
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator> operator+(const fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>& a,
+																	const typename fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::value_type* p)
+	{
+		typedef fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator> this_type;
+
+		const typename this_type::size_type n = (typename this_type::size_type)CharStrlen(p);
+		this_type result(const_cast<this_type&>(a).get_overflow_allocator());
+		result.append(a);
+		result.append(p, p + n);
+		return result;
+	}
+
+
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator> operator+(const fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>& a,
+																		  typename fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::value_type c)
+	{
+		typedef fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator> this_type;
+
+		this_type result(const_cast<this_type&>(a).get_overflow_allocator());
+		result.append(a);
+		result.push_back(c);
+		return result;
+	}
+
+
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator> operator+(fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>&& a,
+																			 fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>&& b)
+	{
+		a.append(b); // Using an rvalue by name results in it becoming an lvalue.
+		return eastl::move(a);
+	}
+
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator> operator+(fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>&& a,
+																	   const fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>& b)
+	{
+		a.append(b);
+		return eastl::move(a);
+	}
+
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator> operator+(const typename fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::value_type* p,
+																							fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>&& b)
+	{
+		b.insert(0, p);
+		return eastl::move(b);
+	}
+
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator> operator+(fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>&& a,
+															  const typename fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::value_type* p)
+	{
+		a.append(p);
+		return eastl::move(a);
+	}
+
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator> operator+(fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>&& a,
+																	typename fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>::value_type c)
+	{
+		a.push_back(c);
+		return eastl::move(a);
+	}
+
+
+	// operator ==, !=, <, >, <=, >= come from the string implementations.
+
+	template <typename T, int nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline void swap(fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>& a,
+					 fixed_string<T, nodeCount, bEnableOverflow, OverflowAllocator>& b)
+	{
+		// Fixed containers use a special swap that can deal with excessively large buffers.
+		eastl::fixed_swap(a, b);
+	}
+
+
+} // namespace eastl
+
+#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/fixed_substring.h b/libkram/eastl/include/EASTL/fixed_substring.h
new file mode 100644
index 00000000..033052f4
--- /dev/null
+++ b/libkram/eastl/include/EASTL/fixed_substring.h
@@ -0,0 +1,265 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_FIXED_SUBSTRING_H
+#define EASTL_FIXED_SUBSTRING_H
+
+
+#include <EASTL/string.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+
+	/// fixed_substring
+	///
+	/// Implements a string which is a reference to a segment of characters.
+	/// This class is efficient because it allocates no memory and copies no
+	/// memory during construction and assignment, but rather refers directly
+	/// to the segment of chracters. A common use of this is to have a
+	/// fixed_substring efficiently refer to a substring within another string.
+	///
+	/// You cannot directly resize a fixed_substring (e.g. via resize, insert,
+	/// append, erase), but you can assign a different substring to it.
+	/// You can modify the characters within a substring in place.
+	/// As of this writing, in the name of being lean and simple it is the
+	/// user's responsibility to not call unsupported resizing functions
+	/// such as those listed above. A detailed listing of the functions which
+	/// are not supported is given below in the class declaration.
+	///
+	/// The c_str function doesn't act as one might hope, as it simply
+	/// returns the pointer to the beginning of the string segment and the
+	/// 0-terminator may be beyond the end of the segment. If you want to
+	/// always be able to use c_str as expected, use the fixed string solution
+	/// we describe below.
+	///
+	/// Another use of fixed_substring is to provide C++ string-like functionality
+	/// with a C character array. This allows you to work on a C character array
+	/// as if it were a C++ string as opposed using the C string API. Thus you
+	/// can do this:
+	///
+	///    void DoSomethingForUser(char* timeStr, size_t timeStrCapacity)
+	///    {
+	///        fixed_substring tmp(timeStr, timeStrCapacity);
+	///        tmp  = "hello ";
+	///        tmp += "world";
+	///    }
+	///
+	/// Note that this class constructs and assigns from const string pointers
+	/// and const string objects, yet this class does not declare its member
+	/// data as const. This is a concession in order to allow this implementation
+	/// to be simple and lean. It is the user's responsibility to make sure
+	/// that strings that should not or can not be modified are either not
+	/// used by fixed_substring or are not modified by fixed_substring.
+	///
+	/// A more flexible alternative to fixed_substring is fixed_string.
+	/// fixed_string has none of the functional limitations that fixed_substring
+	/// has and like fixed_substring it doesn't allocate memory. However,
+	/// fixed_string makes a *copy* of the source string and uses local
+	/// memory to store that copy. Also, fixed_string objects on the stack
+	/// are going to have a limit as to their maximum size.
+	///
+	/// Notes:
+	///     As of this writing, the string class necessarily reallocates when
+	///     an insert of self is done into self. As a result, the fixed_substring
+	///     class doesn't support inserting self into self.
+	///
+	/// Example usage:
+	///     basic_string<char>    str("hello world");
+	///     fixed_substring<char> sub(str, 2, 5);      // sub == "llo w"
+	///
+	template <typename T>
+	class fixed_substring : public basic_string<T>
+	{
+	public:
+		typedef basic_string<T>                     base_type;
+		typedef fixed_substring<T>                  this_type;
+		typedef typename base_type::size_type       size_type;
+		typedef typename base_type::value_type      value_type;
+		typedef typename base_type::iterator        iterator;
+		typedef typename base_type::const_iterator  const_iterator;
+
+		using base_type::npos;
+		using base_type::mPair;
+		using base_type::AllocateSelf;
+		using base_type::internalLayout;
+		using base_type::get_allocator;
+
+	private:
+
+		void SetInternalHeapLayout(value_type* pBeginPtr, size_type nSize, size_type nCap)
+		{
+			internalLayout().SetHeapBeginPtr(pBeginPtr);
+			internalLayout().SetHeapSize(nSize);
+			internalLayout().SetHeapCapacity(nCap);
+		}
+
+
+	public:
+		fixed_substring()
+			: base_type()
+		{
+		}
+
+		fixed_substring(const base_type& x)
+			: base_type()
+		{
+			#if EASTL_NAME_ENABLED
+				get_allocator().set_name(x.get_allocator().get_name());
+			#endif
+
+			assign(x);
+		}
+
+		// We gain no benefit from having an rvalue move constructor or assignment operator,
+		// as this class is a const class.
+
+		fixed_substring(const base_type& x, size_type position, size_type n = base_type::npos)
+			: base_type()
+		{
+			#if EASTL_NAME_ENABLED
+				get_allocator().set_name(x.get_allocator().get_name());
+			#endif
+
+			assign(x, position, n);
+		}
+
+		fixed_substring(const value_type* p, size_type n)
+			: base_type()
+		{
+			assign(p, n);
+		}
+
+		fixed_substring(const value_type* p)
+			: base_type()
+		{
+			 assign(p);
+		}
+
+		fixed_substring(const value_type* pBegin, const value_type* pEnd)
+			: base_type()
+		{
+			assign(pBegin, pEnd);
+		}
+
+		~fixed_substring()
+		{
+			// We need to reset, as otherwise the parent destructor will
+			// attempt to free our memory.
+			AllocateSelf();
+		}
+
+		this_type& operator=(const base_type& x)
+		{
+			assign(x);
+			return *this;
+		}
+
+		this_type& operator=(const value_type* p)
+		{
+			assign(p);
+			return *this;
+		}
+
+		this_type& assign(const base_type& x)
+		{
+			// By design, we need to cast away const-ness here.
+			SetInternalHeapLayout(const_cast<value_type*>(x.data()), x.size(), x.size());
+			return *this;
+		}
+
+		this_type& assign(const base_type& x, size_type position, size_type n)
+		{
+			// By design, we need to cast away const-ness here.
+			SetInternalHeapLayout(const_cast<value_type*>(x.data()) + position, n, n);
+			return *this;
+		}
+
+		this_type& assign(const value_type* p, size_type n)
+		{
+			// By design, we need to cast away const-ness here.
+			SetInternalHeapLayout(const_cast<value_type*>(p), n, n);
+			return *this;
+		}
+
+		this_type& assign(const value_type* p)
+		{
+			// By design, we need to cast away const-ness here.
+			SetInternalHeapLayout(const_cast<value_type*>(p), (size_type)CharStrlen(p), (size_type)CharStrlen(p));
+			return *this;
+		}
+
+		this_type& assign(const value_type* pBegin, const value_type* pEnd)
+		{
+			// By design, we need to cast away const-ness here.
+			SetInternalHeapLayout(const_cast<value_type*>(pBegin), (size_type)(pEnd - pBegin), (size_type)(pEnd - pBegin));
+			return *this;
+		}
+
+
+		// Partially supported functionality
+		//
+		// When using fixed_substring on a character sequence that is within another
+		// string, the following functions may do one of two things:
+		//     1 Attempt to reallocate
+		//     2 Write a 0 char at the end of the fixed_substring
+		//
+		// Item #1 will result in a crash, due to the attempt by the underlying
+		// string class to free the substring memory. Item #2 will result in a 0
+		// char being written to the character array. Item #2 may or may not be
+		// a problem, depending on how you use fixed_substring. Thus the following
+		// functions cannot be used safely.
+
+		#if 0 // !defined(EA_COMPILER_NO_DELETED_FUNCTIONS) We may want to enable these deletions after some investigation of possible user impact.
+			this_type&  operator=(value_type c) = delete;
+			void        resize(size_type n, value_type c) = delete;
+			void        resize(size_type n) = delete;
+			void        reserve(size_type = 0) = delete;
+			void        set_capacity(size_type n) = delete;
+			void        clear() = delete;
+			this_type&  operator+=(const base_type& x) = delete;
+			this_type&  operator+=(const value_type* p) = delete;
+			this_type&  operator+=(value_type c) = delete;
+			this_type&  append(const base_type& x) = delete;
+			this_type&  append(const base_type& x, size_type position, size_type n) = delete;
+			this_type&  append(const value_type* p, size_type n) = delete;
+			this_type&  append(const value_type* p) = delete;
+			this_type&  append(size_type n) = delete;
+			this_type&  append(size_type n, value_type c) = delete;
+			this_type&  append(const value_type* pBegin, const value_type* pEnd) = delete;
+			this_type&  append_sprintf_va_list(const value_type* pFormat, va_list arguments) = delete;
+			this_type&  append_sprintf(const value_type* pFormat, ...) = delete;
+			void        push_back(value_type c) = delete;
+			void        pop_back() = delete;
+			this_type&  assign(size_type n, value_type c) = delete;
+			this_type&  insert(size_type position, const base_type& x) = delete;
+			this_type&  insert(size_type position, const base_type& x, size_type beg, size_type n) = delete;
+			this_type&  insert(size_type position, const value_type* p, size_type n) = delete;
+			this_type&  insert(size_type position, const value_type* p) = delete;
+			this_type&  insert(size_type position, size_type n, value_type c) = delete;
+			iterator    insert(const_iterator p, value_type c) = delete;
+			void        insert(const_iterator p, size_type n, value_type c) = delete;
+			void        insert(const_iterator p, const value_type* pBegin, const value_type* pEnd) = delete;
+			this_type&  erase(size_type position = 0, size_type n = npos) = delete;
+			iterator    erase(const_iterator p) = delete;
+			iterator    erase(const_iterator pBegin, const_iterator pEnd) = delete;
+			void        swap(base_type& x) = delete;
+			this_type&  sprintf_va_list(const value_type* pFormat, va_list arguments) = delete;
+			this_type&  sprintf(const value_type* pFormat, ...) = delete;
+		#endif
+
+	}; // fixed_substring
+
+
+} // namespace eastl
+
+
+
+#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/fixed_vector.h b/libkram/eastl/include/EASTL/fixed_vector.h
new file mode 100644
index 00000000..1dc482bd
--- /dev/null
+++ b/libkram/eastl/include/EASTL/fixed_vector.h
@@ -0,0 +1,625 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// This file implements a vector which uses a fixed size memory pool. 
+// The bEnableOverflow template parameter allows the container to resort to
+// heap allocations if the memory pool is exhausted.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_FIXED_VECTOR_H
+#define EASTL_FIXED_VECTOR_H
+
+
+#include <EASTL/vector.h>
+#include <EASTL/internal/fixed_pool.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+	/// EASTL_FIXED_VECTOR_DEFAULT_NAME
+	///
+	/// Defines a default container name in the absence of a user-provided name.
+	/// In the case of fixed-size containers, the allocator name always refers
+	/// to overflow allocations. 
+	///
+	#ifndef EASTL_FIXED_VECTOR_DEFAULT_NAME
+		#define EASTL_FIXED_VECTOR_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " fixed_vector" // Unless the user overrides something, this is "EASTL fixed_vector".
+	#endif
+
+
+	/// EASTL_FIXED_VECTOR_DEFAULT_ALLOCATOR
+	///
+	#ifndef EASTL_FIXED_VECTOR_DEFAULT_ALLOCATOR
+		#define EASTL_FIXED_VECTOR_DEFAULT_ALLOCATOR overflow_allocator_type(EASTL_FIXED_VECTOR_DEFAULT_NAME)
+	#endif
+
+
+	/// fixed_vector
+	///
+	/// A fixed_vector with bEnableOverflow == true is identical to a regular 
+	/// vector in terms of its behavior. All the expectations of regular vector
+	/// apply to it and no additional expectations come from it. When bEnableOverflow
+	/// is false, fixed_vector behaves like regular vector with the exception that 
+	/// its capacity can never increase. All operations you do on such a fixed_vector
+	/// which require a capacity increase will result in undefined behavior or an 
+	/// C++ allocation exception, depending on the configuration of EASTL.
+	///
+	/// Template parameters:
+	///     T                      The type of object the vector holds.
+	///     nodeCount              The max number of objects to contain.
+	///     bEnableOverflow        Whether or not we should use the overflow heap if our object pool is exhausted.
+	///     OverflowAllocator              Overflow allocator, which is only used if bEnableOverflow == true. Defaults to the global heap.
+	///
+	/// Note: The nodeCount value must be at least 1.
+	///
+	/// Example usage:
+	///    fixed_vector<Widget, 128, true> fixedVector);
+	///
+	///    fixedVector.push_back(Widget());
+	///    fixedVector.resize(200);
+	///    fixedVector.clear();
+	///
+	template <typename T, size_t nodeCount, bool bEnableOverflow = true, typename OverflowAllocator = typename eastl::type_select<bEnableOverflow, EASTLAllocatorType, EASTLDummyAllocatorType>::type>
+	class fixed_vector : public vector<T, fixed_vector_allocator<sizeof(T), nodeCount, EASTL_ALIGN_OF(T), 0, bEnableOverflow, OverflowAllocator> >
+	{
+	public:
+		typedef fixed_vector_allocator<sizeof(T), nodeCount, EASTL_ALIGN_OF(T), 
+							0, bEnableOverflow, OverflowAllocator>              fixed_allocator_type;
+		typedef OverflowAllocator                                               overflow_allocator_type;
+		typedef vector<T, fixed_allocator_type>                                 base_type;
+		typedef fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>  this_type;
+		typedef typename base_type::size_type                                   size_type;
+		typedef typename base_type::value_type                                  value_type;
+		typedef typename base_type::reference                                   reference;
+		typedef typename base_type::iterator                                    iterator;
+		typedef typename base_type::const_iterator                              const_iterator;
+		typedef aligned_buffer<nodeCount * sizeof(T), EASTL_ALIGN_OF(T)>        aligned_buffer_type;
+
+		enum { kMaxSize = nodeCount };
+
+		using base_type::get_allocator;
+		using base_type::mpBegin;
+		using base_type::mpEnd;
+		using base_type::internalCapacityPtr;
+		using base_type::resize;
+		using base_type::clear;
+		using base_type::size;
+		using base_type::assign;
+		using base_type::npos;
+		using base_type::DoAllocate;
+		using base_type::DoFree;
+		using base_type::DoAssign;
+		using base_type::DoAssignFromIterator;
+
+	protected:
+		aligned_buffer_type mBuffer;
+
+	public:
+		fixed_vector();
+		explicit fixed_vector(const overflow_allocator_type& overflowAllocator); // Only applicable if bEnableOverflow is true.
+		explicit fixed_vector(size_type n);                                      // Currently we don't support overflowAllocator specification for other constructors, for simplicity.
+		fixed_vector(size_type n, const value_type& value);
+		fixed_vector(const this_type& x);
+		fixed_vector(this_type&& x);
+		fixed_vector(this_type&& x, const overflow_allocator_type& overflowAllocator);
+		fixed_vector(std::initializer_list<T> ilist, const overflow_allocator_type& overflowAllocator = EASTL_FIXED_VECTOR_DEFAULT_ALLOCATOR);
+
+		template <typename InputIterator>
+		fixed_vector(InputIterator first, InputIterator last);
+
+		this_type& operator=(const this_type& x);
+		this_type& operator=(std::initializer_list<T> ilist);
+		this_type& operator=(this_type&& x);
+
+		void swap(this_type& x);
+
+		void      set_capacity(size_type n);
+		void      clear(bool freeOverflow);
+		void      reset_lose_memory();          // This is a unilateral reset to an initially empty state. No destructors are called, no deallocation occurs.
+		size_type max_size() const;             // Returns the max fixed size, which is the user-supplied nodeCount parameter.
+		bool      full() const;                 // Returns true if the fixed space has been fully allocated. Note that if overflow is enabled, the container size can be greater than nodeCount but full() could return true because the fixed space may have a recently freed slot. 
+		bool      has_overflowed() const;       // Returns true if the allocations spilled over into the overflow allocator. Meaningful only if overflow is enabled.
+		bool      can_overflow() const;         // Returns the value of the bEnableOverflow template parameter.
+
+		void*     push_back_uninitialized();
+		void      push_back(const value_type& value);   // We implement push_back here because we have a specialization that's 
+		reference push_back();                          // smaller for the case of overflow being disabled.
+		void      push_back(value_type&& value);
+
+		// OverflowAllocator
+		const overflow_allocator_type& get_overflow_allocator() const EA_NOEXCEPT;
+		overflow_allocator_type&       get_overflow_allocator() EA_NOEXCEPT;
+		void                           set_overflow_allocator(const overflow_allocator_type& allocator);
+
+	protected:
+		void*     DoPushBackUninitialized(true_type);
+		void*     DoPushBackUninitialized(false_type);
+
+		void      DoPushBack(true_type, const value_type& value);
+		void      DoPushBack(false_type, const value_type& value);
+
+		void      DoPushBackMove(true_type, value_type&& value);
+		void      DoPushBackMove(false_type, value_type&& value);
+
+		reference DoPushBack(false_type);
+		reference DoPushBack(true_type);
+
+	}; // fixed_vector
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// fixed_vector
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>::fixed_vector()
+		: base_type(fixed_allocator_type(mBuffer.buffer))
+	{
+		#if EASTL_NAME_ENABLED
+			get_allocator().set_name(EASTL_FIXED_VECTOR_DEFAULT_NAME);
+		#endif
+
+		mpBegin = mpEnd = (value_type*)&mBuffer.buffer[0];
+		internalCapacityPtr() = mpBegin + nodeCount;
+	}
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>::fixed_vector(const overflow_allocator_type& overflowAllocator)
+		: base_type(fixed_allocator_type(mBuffer.buffer, overflowAllocator))
+	{
+		#if EASTL_NAME_ENABLED
+			get_allocator().set_name(EASTL_FIXED_VECTOR_DEFAULT_NAME);
+		#endif
+
+		mpBegin = mpEnd = (value_type*)&mBuffer.buffer[0];
+		internalCapacityPtr() = mpBegin + nodeCount;
+	}
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>::fixed_vector(size_type n)
+		: base_type(fixed_allocator_type(mBuffer.buffer))
+	{
+		#if EASTL_NAME_ENABLED
+			get_allocator().set_name(EASTL_FIXED_VECTOR_DEFAULT_NAME);
+		#endif
+
+		mpBegin = mpEnd = (value_type*)&mBuffer.buffer[0];
+		internalCapacityPtr() = mpBegin + nodeCount;
+		resize(n);
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>::fixed_vector(size_type n, const value_type& value)
+		: base_type(fixed_allocator_type(mBuffer.buffer))
+	{
+		#if EASTL_NAME_ENABLED
+			get_allocator().set_name(EASTL_FIXED_VECTOR_DEFAULT_NAME);
+		#endif
+
+		mpBegin = mpEnd = (value_type*)&mBuffer.buffer[0];
+		internalCapacityPtr() = mpBegin + nodeCount;
+		resize(n, value);
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>::fixed_vector(const this_type& x)
+		: base_type(fixed_allocator_type(mBuffer.buffer))
+	{
+		get_allocator().copy_overflow_allocator(x.get_allocator());
+
+		#if EASTL_NAME_ENABLED
+			get_allocator().set_name(x.get_allocator().get_name());
+		#endif
+
+		mpBegin = mpEnd = (value_type*)&mBuffer.buffer[0];
+		internalCapacityPtr() = mpBegin + nodeCount;
+		base_type::template DoAssign<const_iterator, false>(x.begin(), x.end(), false_type());
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>::fixed_vector(this_type&& x)
+		: base_type(fixed_allocator_type(mBuffer.buffer))
+	{
+		// Since we are a fixed_vector, we can't swap pointers. We can possibly so something like fixed_swap or
+		// we can just do an assignment from x. If we want to do the former then we need to have some complicated
+		// code to deal with overflow or no overflow, and whether the memory is in the fixed-size buffer or in 
+		// the overflow allocator. 90% of the time the memory should be in the fixed buffer, in which case
+		// a simple assignment is no worse than the fancy pathway.
+
+		// Since we are a fixed_list, we can't normally swap pointers unless both this and 
+		// x are using using overflow and the overflow allocators are equal. To do:
+		//if(has_overflowed() && x.has_overflowed() && (get_overflow_allocator() == x.get_overflow_allocator()))
+		//{
+		//    We can swap contents and may need to swap the allocators as well.
+		//}
+
+		// The following is currently identical to the fixed_vector(const this_type& x) code above. If it stays that
+		// way then we may want to make a shared implementation.
+		get_allocator().copy_overflow_allocator(x.get_allocator());
+
+		#if EASTL_NAME_ENABLED
+			get_allocator().set_name(x.get_allocator().get_name());
+		#endif
+
+		mpBegin = mpEnd = (value_type*)&mBuffer.buffer[0];
+		internalCapacityPtr() = mpBegin + nodeCount;
+		base_type::template DoAssign<move_iterator<iterator>, true>(eastl::make_move_iterator(x.begin()), eastl::make_move_iterator(x.end()), false_type());
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>::fixed_vector(this_type&& x, const overflow_allocator_type& overflowAllocator)
+		: base_type(fixed_allocator_type(mBuffer.buffer, overflowAllocator))
+	{
+		// See the discussion above.
+
+		// The following is currently identical to the fixed_vector(const this_type& x) code above. If it stays that
+		// way then we may want to make a shared implementation.
+		get_allocator().copy_overflow_allocator(x.get_allocator());
+
+		#if EASTL_NAME_ENABLED
+			get_allocator().set_name(x.get_allocator().get_name());
+		#endif
+
+		mpBegin = mpEnd = (value_type*)&mBuffer.buffer[0];
+		internalCapacityPtr() = mpBegin + nodeCount;
+		base_type::template DoAssign<iterator, true>(x.begin(), x.end(), false_type());
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>::fixed_vector(std::initializer_list<T> ilist, const overflow_allocator_type& overflowAllocator)
+		: base_type(fixed_allocator_type(mBuffer.buffer, overflowAllocator))
+	{
+		typedef typename std::initializer_list<value_type>::iterator InputIterator;
+		typedef typename eastl::iterator_traits<InputIterator>::iterator_category IC;
+
+		mpBegin = mpEnd = (value_type*)&mBuffer.buffer[0];
+		internalCapacityPtr() = mpBegin + nodeCount;
+		base_type::template DoAssignFromIterator<InputIterator, false>(ilist.begin(), ilist.end(), IC());
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	template <typename InputIterator>
+	fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>::fixed_vector(InputIterator first, InputIterator last)
+		: base_type(fixed_allocator_type(mBuffer.buffer))
+	{
+		#if EASTL_NAME_ENABLED
+			get_allocator().set_name(EASTL_FIXED_VECTOR_DEFAULT_NAME);
+		#endif
+
+		mpBegin = mpEnd = (value_type*)&mBuffer.buffer[0];
+		internalCapacityPtr() = mpBegin + nodeCount;
+		base_type::template DoAssign<InputIterator, false>(first, last, is_integral<InputIterator>());
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline typename fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>::this_type& 
+	fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>::operator=(const this_type& x)
+	{
+		if(this != &x)
+		{
+			clear();
+
+			#if EASTL_ALLOCATOR_COPY_ENABLED
+				get_allocator() = x.get_allocator(); // The primary effect of this is to copy the overflow allocator.
+			#endif
+
+			base_type::template DoAssign<const_iterator, false>(x.begin(), x.end(), false_type()); // Shorter route.
+		}
+		return *this;
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline typename fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>::this_type& 
+	fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>::operator=(std::initializer_list<T> ilist)
+	{
+		typedef typename std::initializer_list<value_type>::iterator InputIterator;
+		typedef typename eastl::iterator_traits<InputIterator>::iterator_category IC;
+
+		clear();
+		base_type::template DoAssignFromIterator<InputIterator, false>(ilist.begin(), ilist.end(), IC());
+		return *this;
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline typename fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>::this_type& 
+	fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>::operator=(this_type&& x)
+	{
+		// Since we are a fixed_vector, we can't swap pointers. We can possibly do something like fixed_swap or
+		// we can just do an assignment from x. If we want to do the former then we need to have some complicated
+		// code to deal with overflow or no overflow, and whether the memory is in the fixed-size buffer or in 
+		// the overflow allocator. 90% of the time the memory should be in the fixed buffer, in which case
+		// a simple assignment is no worse than the fancy pathway.
+		if (this != &x)
+		{
+			clear();
+
+			#if EASTL_ALLOCATOR_COPY_ENABLED
+				get_allocator() = x.get_allocator(); // The primary effect of this is to copy the overflow allocator.
+			#endif
+
+			base_type::template DoAssign<move_iterator<iterator>, true>(eastl::make_move_iterator(x.begin()), eastl::make_move_iterator(x.end()), false_type()); // Shorter route.
+		}
+		return *this;
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline void fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>::swap(this_type& x)
+	{
+		if((has_overflowed() && x.has_overflowed()) && (get_overflow_allocator() == x.get_overflow_allocator())) // If both containers are using the heap instead of local memory 
+		{                                                                                                        // then we can do a fast pointer swap instead of content swap.
+			eastl::swap(mpBegin,    x.mpBegin);
+			eastl::swap(mpEnd,      x.mpEnd);
+			eastl::swap(internalCapacityPtr(), x.internalCapacityPtr());
+		}
+		else
+		{
+			// Fixed containers use a special swap that can deal with excessively large buffers.
+			eastl::fixed_swap(*this, x);
+		}
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline void fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>::set_capacity(size_type n)
+	{
+		const size_type nPrevSize     = (size_type)(mpEnd - mpBegin);
+		const size_type nPrevCapacity = (size_type)(internalCapacityPtr() - mpBegin);
+
+		if(n == npos)       // If the user means to set the capacity so that it equals the size (i.e. free excess capacity)...
+			n = nPrevSize;
+
+		if(n != nPrevCapacity)  // If the request results in a capacity change...
+		{
+			if(can_overflow() && (((uintptr_t)mpBegin != (uintptr_t)mBuffer.buffer) || (n > kMaxSize))) // If we are or would be using dynamically allocated memory instead of our fixed-size member buffer...
+			{
+				T* const pNewData = (n <= kMaxSize) ? (T*)&mBuffer.buffer[0] : DoAllocate(n);
+				T* const pCopyEnd = (n < nPrevSize) ? (mpBegin + n) : mpEnd;
+				eastl::uninitialized_move_ptr(mpBegin, pCopyEnd, pNewData); // Move [mpBegin, pCopyEnd) to p.
+				eastl::destruct(mpBegin, mpEnd);
+				if((uintptr_t)mpBegin != (uintptr_t)mBuffer.buffer)
+					DoFree(mpBegin, (size_type)(internalCapacityPtr() - mpBegin));
+
+				mpEnd      = pNewData + (pCopyEnd - mpBegin);
+				mpBegin    = pNewData;
+				internalCapacityPtr() = mpBegin + n;
+			} // Else the new capacity would be within our fixed buffer.
+			else if(n < nPrevSize) // If the newly requested capacity is less than our size, we do what vector::set_capacity does and resize, even though we actually aren't reducing the capacity.
+				resize(n);
+		}
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename Allocator>
+	inline void fixed_vector<T, nodeCount, bEnableOverflow, Allocator>::clear(bool freeOverflow)
+	{
+		base_type::clear();
+		if (freeOverflow && mpBegin != (value_type*)&mBuffer.buffer[0])
+		{
+			EASTLFree(get_allocator(), mpBegin, (internalCapacityPtr() - mpBegin) * sizeof(T));
+			mpBegin = mpEnd = (value_type*)&mBuffer.buffer[0];
+			internalCapacityPtr() = mpBegin + nodeCount;
+		}
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline void fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>::reset_lose_memory()
+	{
+		mpBegin = mpEnd = (value_type*)&mBuffer.buffer[0];
+		internalCapacityPtr() = mpBegin + nodeCount;
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline typename fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>::size_type
+	fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>::max_size() const
+	{
+		return kMaxSize;
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline bool fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>::full() const
+	{
+		// If size >= capacity, then we are definitely full. 
+		// Also, if our size is smaller but we've switched away from mBuffer due to a previous overflow, then we are considered full.
+		return ((size_t)(mpEnd - mpBegin) >= kMaxSize) || ((void*)mpBegin != (void*)mBuffer.buffer);
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline bool fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>::has_overflowed() const
+	{
+		// This will be incorrect for the case that bOverflowEnabled is true and the container was resized
+		// down to a small size where the fixed buffer could take over ownership of the data again.
+		// The only simple fix for this is to take on another member variable which tracks whether this overflow
+		// has occurred at some point in the past.
+		return ((void*)mpBegin != (void*)mBuffer.buffer);
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline bool fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>::can_overflow() const
+	{
+		return bEnableOverflow;
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline void* fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>::push_back_uninitialized()
+	{
+		return DoPushBackUninitialized(typename type_select<bEnableOverflow, true_type, false_type>::type());
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline void* fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>::DoPushBackUninitialized(true_type)
+	{
+		return base_type::push_back_uninitialized();
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline void* fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>::DoPushBackUninitialized(false_type)
+	{
+		EASTL_ASSERT(mpEnd < internalCapacityPtr());
+
+		return mpEnd++;
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline void fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>::push_back(const value_type& value)
+	{
+		DoPushBack(typename type_select<bEnableOverflow, true_type, false_type>::type(), value);
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline void fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>::DoPushBack(true_type, const value_type& value)
+	{
+		base_type::push_back(value);
+	}
+
+
+	// This template specializes for overflow NOT enabled.
+	// In this configuration, there is no need for the heavy weight push_back() which tests to see if the container should grow (it never will)
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline void fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>::DoPushBack(false_type, const value_type& value)
+	{
+		EASTL_ASSERT(mpEnd < internalCapacityPtr());
+
+		::new((void*)mpEnd++) value_type(value);
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline typename fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>::reference fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>::push_back()
+	{
+		return DoPushBack(typename type_select<bEnableOverflow, true_type, false_type>::type());
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline typename fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>::reference fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>::DoPushBack(true_type)
+	{
+		return base_type::push_back();
+	}
+
+
+	// This template specializes for overflow NOT enabled.
+	// In this configuration, there is no need for the heavy weight push_back() which tests to see if the container should grow (it never will)
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline typename fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>::reference fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>::DoPushBack(false_type)
+	{
+		EASTL_ASSERT(mpEnd < internalCapacityPtr());
+
+		::new((void*)mpEnd++) value_type;    // Note that this isn't value_type() as that syntax doesn't work on all compilers for POD types.
+
+		return *(mpEnd - 1);        // Same as return back();
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline void fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>::push_back(value_type&& value)
+	{
+		DoPushBackMove(typename type_select<bEnableOverflow, true_type, false_type>::type(), eastl::move(value));
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline void fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>::DoPushBackMove(true_type, value_type&& value)
+	{
+		base_type::push_back(eastl::move(value)); // This will call vector::push_back(value_type &&), and possibly swap value with *mpEnd.
+	}
+
+
+	// This template specializes for overflow NOT enabled.
+	// In this configuration, there is no need for the heavy weight push_back() which tests to see if the container should grow (it never will)
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline void fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>::DoPushBackMove(false_type, value_type&& value)
+	{
+		EASTL_ASSERT(mpEnd < internalCapacityPtr());
+
+		::new((void*)mpEnd++) value_type(eastl::move(value)); // This will call the value_type(value_type&&) constructor, and possibly swap value with *mpEnd.
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline const typename fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>::overflow_allocator_type& 
+	fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>::get_overflow_allocator() const EA_NOEXCEPT
+	{
+		return get_allocator().get_overflow_allocator();
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline typename fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>::overflow_allocator_type& 
+	fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>::get_overflow_allocator() EA_NOEXCEPT
+	{
+		return get_allocator().get_overflow_allocator();
+	}
+
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline void 
+	fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>::set_overflow_allocator(const overflow_allocator_type& allocator)
+	{
+		get_allocator().set_overflow_allocator(allocator);
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// global operators
+	///////////////////////////////////////////////////////////////////////
+
+	// operator ==, !=, <, >, <=, >= come from the vector implementations.
+
+	template <typename T, size_t nodeCount, bool bEnableOverflow, typename OverflowAllocator>
+	inline void swap(fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>& a, 
+					 fixed_vector<T, nodeCount, bEnableOverflow, OverflowAllocator>& b)
+	{
+		// Fixed containers use a special swap that can deal with excessively large buffers.
+		eastl::fixed_swap(a, b);
+	}
+
+
+
+} // namespace eastl
+
+
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/functional.h b/libkram/eastl/include/EASTL/functional.h
new file mode 100644
index 00000000..556bf020
--- /dev/null
+++ b/libkram/eastl/include/EASTL/functional.h
@@ -0,0 +1,1266 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_FUNCTIONAL_H
+#define EASTL_FUNCTIONAL_H
+
+
+#include <EABase/eabase.h>
+#include <EASTL/internal/config.h>
+#include <EASTL/internal/move_help.h>
+#include <EASTL/type_traits.h>
+#include <EASTL/internal/functional_base.h>
+#include <EASTL/internal/mem_fn.h>
+
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+	///////////////////////////////////////////////////////////////////////
+	// Primary C++ functions
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T = void>
+	struct plus : public binary_function<T, T, T>
+	{
+		EA_CPP14_CONSTEXPR T operator()(const T& a, const T& b) const
+			{ return a + b; }
+	};
+
+	// http://en.cppreference.com/w/cpp/utility/functional/plus_void
+	template <>
+	struct plus<void> 
+	{
+		typedef int is_transparent;
+		template<typename A, typename B>
+		EA_CPP14_CONSTEXPR auto operator()(A&& a, B&& b) const
+			-> decltype(eastl::forward<A>(a) + eastl::forward<B>(b))
+			{ return eastl::forward<A>(a) + eastl::forward<B>(b); }
+	};
+
+	template <typename T = void>
+	struct minus : public binary_function<T, T, T>
+	{
+		EA_CPP14_CONSTEXPR T operator()(const T& a, const T& b) const
+			{ return a - b; }
+	};
+
+	// http://en.cppreference.com/w/cpp/utility/functional/minus_void
+	template <>
+	struct minus<void> 
+	{
+		typedef int is_transparent;
+		template<typename A, typename B>
+		EA_CPP14_CONSTEXPR auto operator()(A&& a, B&& b) const
+			-> decltype(eastl::forward<A>(a) - eastl::forward<B>(b))
+			{ return eastl::forward<A>(a) - eastl::forward<B>(b); }
+	};
+
+	template <typename T = void>
+	struct multiplies : public binary_function<T, T, T>
+	{
+		EA_CPP14_CONSTEXPR T operator()(const T& a, const T& b) const
+			{ return a * b; }
+	};
+
+	// http://en.cppreference.com/w/cpp/utility/functional/multiplies_void
+	template <>
+	struct multiplies<void> 
+	{
+		typedef int is_transparent;
+		template<typename A, typename B>
+		EA_CPP14_CONSTEXPR auto operator()(A&& a, B&& b) const
+			-> decltype(eastl::forward<A>(a) * eastl::forward<B>(b))
+			{ return eastl::forward<A>(a) * eastl::forward<B>(b); }
+	};
+
+    template <typename T = void>
+    struct divides : public binary_function<T, T, T>
+    {
+		EA_CPP14_CONSTEXPR T operator()(const T& a, const T& b) const
+			{ return a / b; }
+	};
+
+	// http://en.cppreference.com/w/cpp/utility/functional/divides_void
+	template <>
+	struct divides<void> 
+	{
+		typedef int is_transparent;
+		template<typename A, typename B>
+		EA_CPP14_CONSTEXPR auto operator()(A&& a, B&& b) const
+			-> decltype(eastl::forward<A>(a) / eastl::forward<B>(b))
+			{ return eastl::forward<A>(a) / eastl::forward<B>(b); }
+	};
+
+    template <typename T = void>
+    struct modulus : public binary_function<T, T, T>
+    {
+		EA_CPP14_CONSTEXPR T operator()(const T& a, const T& b) const
+			{ return a % b; }
+	};
+
+	// http://en.cppreference.com/w/cpp/utility/functional/modulus_void
+	template <>
+	struct modulus<void> 
+	{
+		typedef int is_transparent;
+		template<typename A, typename B>
+		EA_CPP14_CONSTEXPR auto operator()(A&& a, B&& b) const
+			-> decltype(eastl::forward<A>(a) % eastl::forward<B>(b))
+			{ return eastl::forward<A>(a) % eastl::forward<B>(b); }
+	};
+
+    template <typename T = void>
+    struct negate : public unary_function<T, T>
+    {
+		EA_CPP14_CONSTEXPR T operator()(const T& a) const
+			{ return -a; }
+	};
+
+	// http://en.cppreference.com/w/cpp/utility/functional/negate_void
+	template <>
+	struct negate<void> 
+	{
+		typedef int is_transparent;
+		template<typename T>
+		EA_CPP14_CONSTEXPR auto operator()(T&& t) const
+			-> decltype(-eastl::forward<T>(t))
+			{ return -eastl::forward<T>(t); }
+	};
+
+	template <typename T = void>
+	struct equal_to : public binary_function<T, T, bool>
+	{
+		EA_CPP14_CONSTEXPR bool operator()(const T& a, const T& b) const
+			{ return a == b; }
+	};
+
+	// http://en.cppreference.com/w/cpp/utility/functional/equal_to_void
+	template <>
+	struct equal_to<void> 
+	{
+		typedef int is_transparent;
+		template<typename A, typename B>
+		EA_CPP14_CONSTEXPR auto operator()(A&& a, B&& b) const
+			-> decltype(eastl::forward<A>(a) == eastl::forward<B>(b))
+			{ return eastl::forward<A>(a) == eastl::forward<B>(b); }
+	};
+
+	template <typename T, typename Compare>
+	bool validate_equal_to(const T& a, const T& b, Compare compare)
+	{
+		return compare(a, b) == compare(b, a);
+	}
+
+    template <typename T = void>
+    struct not_equal_to : public binary_function<T, T, bool>
+    {
+		EA_CPP14_CONSTEXPR bool operator()(const T& a, const T& b) const
+			{ return a != b; }
+	};
+
+	// http://en.cppreference.com/w/cpp/utility/functional/not_equal_to_void
+	template <>
+	struct not_equal_to<void> 
+	{
+		typedef int is_transparent;
+		template<typename A, typename B>
+		EA_CPP14_CONSTEXPR auto operator()(A&& a, B&& b) const
+			-> decltype(eastl::forward<A>(a) != eastl::forward<B>(b))
+			{ return eastl::forward<A>(a) != eastl::forward<B>(b); }
+	};
+
+	template <typename T, typename Compare>
+	bool validate_not_equal_to(const T& a, const T& b, Compare compare)
+	{
+		return compare(a, b) == compare(b, a); // We want the not equal comparison results to be equal.
+	}
+
+	/// str_equal_to
+	///
+	/// Compares two 0-terminated string types.
+	/// The T types are expected to be iterators or act like iterators.
+	/// The expected behavior of str_less is the same as (strcmp(p1, p2) == 0).
+	///
+	/// Example usage:
+	///     hash_set<const char*, hash<const char*>, str_equal_to<const char*> > stringHashSet;
+	///
+	/// Note:
+	/// You couldn't use str_equal_to like this:
+	///     bool result = equal("hi", "hi" + 2, "ho", str_equal_to<const char*>());
+	/// This is because equal tests an array of something, with each element by
+	/// the comparison function. But str_equal_to tests an array of something itself.
+	///
+	/// To consider: Update this code to use existing word-based comparison optimizations, 
+	/// such as that used in the EAStdC Strcmp function.
+	///
+	template <typename T>
+	struct str_equal_to : public binary_function<T, T, bool>
+	{
+		EA_CPP14_CONSTEXPR bool operator()(T a, T b) const
+		{
+			while(*a && (*a == *b))
+			{
+				++a;
+				++b;
+			}
+			return (*a == *b);
+		}
+	};
+
+	template <typename T = void>
+	struct greater : public binary_function<T, T, bool>
+	{
+		EA_CPP14_CONSTEXPR bool operator()(const T& a, const T& b) const
+			{ return a > b; }
+	};
+
+	// http://en.cppreference.com/w/cpp/utility/functional/greater_void
+	template <>
+	struct greater<void>
+	{
+		template<typename A, typename B>
+		EA_CPP14_CONSTEXPR auto operator()(A&& a, B&& b) const
+			-> decltype(eastl::forward<A>(a) > eastl::forward<B>(b))
+			{ return eastl::forward<A>(a) > eastl::forward<B>(b); }
+	};
+
+	template <typename T, typename Compare>
+	bool validate_greater(const T& a, const T& b, Compare compare)
+	{
+		return !compare(a, b) || !compare(b, a); // If (a > b), then !(b > a)
+	}
+
+
+	template <typename T, typename Compare>
+	bool validate_less(const T& a, const T& b, Compare compare)
+	{
+		return !compare(a, b) || !compare(b, a); // If (a < b), then !(b < a)
+	}
+
+	/// str_less
+	///
+	/// Compares two 0-terminated string types. 
+	/// The T types are expected to be iterators or act like iterators, 
+	/// and that includes being a pointer to a C character array.
+	/// The expected behavior of str_less is the same as (strcmp(p1, p2) < 0).
+	/// This function is not Unicode-correct and it's not guaranteed to work
+	/// with all Unicode strings.
+	///
+	/// Example usage:
+	///     set<const char*, str_less<const char*> > stringSet;
+	///
+	/// To consider: Update this code to use existing word-based comparison optimizations, 
+	/// such as that used in the EAStdC Strcmp function.
+	///
+	template <typename T>
+	struct str_less : public binary_function<T, T, bool>
+	{
+		bool operator()(T a, T b) const
+		{
+			while(static_cast<typename make_unsigned<typename remove_pointer<T>::type>::type>(*a) == 
+				  static_cast<typename make_unsigned<typename remove_pointer<T>::type>::type>(*b))
+			{
+				if(*a == 0)
+					return (*b != 0);
+				++a;
+				++b;
+			}
+
+			char aValue = static_cast<typename remove_pointer<T>::type>(*a);
+			char bValue = static_cast<typename remove_pointer<T>::type>(*b);
+
+			typename make_unsigned<char>::type aValueU = static_cast<typename make_unsigned<char>::type>(aValue);
+			typename make_unsigned<char>::type bValueU = static_cast<typename make_unsigned<char>::type>(bValue);
+
+			return aValueU < bValueU;
+
+			//return (static_cast<typename make_unsigned<typename remove_pointer<T>::type>::type>(*a) < 
+			//        static_cast<typename make_unsigned<typename remove_pointer<T>::type>::type>(*b));
+		}
+	};
+
+    template <typename T = void>
+    struct greater_equal : public binary_function<T, T, bool>
+    {
+		EA_CPP14_CONSTEXPR bool operator()(const T& a, const T& b) const
+			{ return a >= b; }
+	};
+
+	// http://en.cppreference.com/w/cpp/utility/functional/greater_equal_void
+	template <>
+	struct greater_equal<void>
+	{
+		template<typename A, typename B>
+		EA_CPP14_CONSTEXPR auto operator()(A&& a, B&& b) const
+			-> decltype(eastl::forward<A>(a) >= eastl::forward<B>(b))
+			{ return eastl::forward<A>(a) >= eastl::forward<B>(b); }
+	};
+
+	template <typename T, typename Compare>
+	bool validate_greater_equal(const T& a, const T& b, Compare compare)
+	{
+		return !compare(a, b) || !compare(b, a); // If (a >= b), then !(b >= a)
+	}
+
+	template <typename T = void>
+	struct less_equal : public binary_function<T, T, bool>
+	{
+		EA_CPP14_CONSTEXPR bool operator()(const T& a, const T& b) const
+			{ return a <= b; }
+	};
+
+	// http://en.cppreference.com/w/cpp/utility/functional/less_equal_void
+	template <>
+	struct less_equal<void>
+	{
+		template<typename A, typename B>
+		EA_CPP14_CONSTEXPR auto operator()(A&& a, B&& b) const
+			-> decltype(eastl::forward<A>(a) <= eastl::forward<B>(b))
+			{ return eastl::forward<A>(a) <= eastl::forward<B>(b); }
+	};
+
+	template <typename T, typename Compare>
+	bool validate_less_equal(const T& a, const T& b, Compare compare)
+	{
+		return !compare(a, b) || !compare(b, a); // If (a <= b), then !(b <= a)
+	}
+
+	template <typename T = void>
+	struct logical_and : public binary_function<T, T, bool>
+	{
+		EA_CPP14_CONSTEXPR bool operator()(const T& a, const T& b) const
+			{ return a && b; }
+	};
+	
+	// http://en.cppreference.com/w/cpp/utility/functional/logical_and_void
+	template <>
+	struct logical_and<void>
+	{
+		template<typename A, typename B>
+		EA_CPP14_CONSTEXPR auto operator()(A&& a, B&& b) const
+			-> decltype(eastl::forward<A>(a) && eastl::forward<B>(b))
+			{ return eastl::forward<A>(a) && eastl::forward<B>(b); }
+	};
+
+    template <typename T = void>
+    struct logical_or : public binary_function<T, T, bool>
+    {
+		EA_CPP14_CONSTEXPR bool operator()(const T& a, const T& b) const
+			{ return a || b; }
+	};
+
+	// http://en.cppreference.com/w/cpp/utility/functional/logical_or_void
+	template <>
+	struct logical_or<void>
+	{
+		template<typename A, typename B>
+		EA_CPP14_CONSTEXPR auto operator()(A&& a, B&& b) const
+			-> decltype(eastl::forward<A>(a) || eastl::forward<B>(b))
+			{ return eastl::forward<A>(a) || eastl::forward<B>(b); }
+	};
+
+    template <typename T = void>
+    struct logical_not : public unary_function<T, bool>
+    {
+		EA_CPP14_CONSTEXPR bool operator()(const T& a) const
+			{ return !a; }
+	};
+
+	// http://en.cppreference.com/w/cpp/utility/functional/logical_not_void
+	template <>
+	struct logical_not<void>
+	{
+		template<typename T>
+		EA_CPP14_CONSTEXPR auto operator()(T&& t) const
+			-> decltype(!eastl::forward<T>(t))
+			{ return !eastl::forward<T>(t); }
+	};
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// Dual type functions
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T, typename U>
+	struct equal_to_2 : public binary_function<T, U, bool>
+	{
+		EA_CPP14_CONSTEXPR bool operator()(const T& a, const U& b) const
+			{ return a == b; }
+		EA_CPP14_CONSTEXPR bool operator()(const U& b, const T& a) const   // If you are getting a 'operator() already defined' error related to on this line while compiling a 
+			{ return b == a; }                                             // hashtable class (e.g. hash_map), it's likely that you are using hashtable::find_as when you should
+	};                                                                     // be using hashtable::find instead. The problem is that (const T, U) collide. To do: make this work.
+
+	template <typename T>
+	struct equal_to_2<T, T> : public equal_to<T>
+	{
+	};
+
+
+	template <typename T, typename U>
+	struct not_equal_to_2 : public binary_function<T, U, bool>
+	{
+		EA_CPP14_CONSTEXPR bool operator()(const T& a, const U& b) const
+			{ return a != b; }
+		EA_CPP14_CONSTEXPR bool operator()(const U& b, const T& a) const
+			{ return b != a; }
+	};
+
+	template <typename T>
+	struct not_equal_to_2<T, T> : public not_equal_to<T>
+	{
+	};
+
+
+	template <typename T, typename U>
+	struct less_2 : public binary_function<T, U, bool>
+	{
+		EA_CPP14_CONSTEXPR bool operator()(const T& a, const U& b) const
+			{ return a < b; }
+		EA_CPP14_CONSTEXPR bool operator()(const U& b, const T& a) const
+			{ return b < a; }
+	};
+
+	template <typename T>
+	struct less_2<T, T> : public less<T>
+	{
+	};
+
+
+
+
+	/// unary_negate
+	///
+	template <typename Predicate>
+	class unary_negate : public unary_function<typename Predicate::argument_type, bool>
+	{
+		protected:
+			Predicate mPredicate;
+		public:
+			explicit unary_negate(const Predicate& a)
+				: mPredicate(a) {}
+			EA_CPP14_CONSTEXPR bool operator()(const typename Predicate::argument_type& a) const
+				{ return !mPredicate(a); }
+	};
+
+	template <typename Predicate>
+	inline EA_CPP14_CONSTEXPR unary_negate<Predicate> not1(const Predicate& predicate)
+		{ return unary_negate<Predicate>(predicate); }
+
+
+
+	/// binary_negate
+	///
+	template <typename Predicate>
+	class binary_negate : public binary_function<typename Predicate::first_argument_type, typename Predicate::second_argument_type, bool>
+	{
+		protected:
+			Predicate mPredicate;
+		public:
+			explicit binary_negate(const Predicate& a)
+				: mPredicate(a) { }
+			EA_CPP14_CONSTEXPR bool operator()(const typename Predicate::first_argument_type& a, const typename Predicate::second_argument_type& b) const
+				{ return !mPredicate(a, b); }
+	};
+
+	template <typename Predicate>
+	inline EA_CPP14_CONSTEXPR binary_negate<Predicate> not2(const Predicate& predicate)
+		{ return binary_negate<Predicate>(predicate); }
+
+
+
+	/// unary_compose
+	///
+	template<typename Operation1, typename Operation2>
+	struct unary_compose : public unary_function<typename Operation2::argument_type, typename Operation1::result_type>
+	{
+	protected:
+		Operation1 op1;
+		Operation2 op2;
+
+	public:
+		unary_compose(const Operation1& x, const Operation2& y)
+			: op1(x), op2(y) {}
+
+		typename Operation1::result_type operator()(const typename Operation2::argument_type& x) const
+			{ return op1(op2(x)); }
+
+		typename Operation1::result_type operator()(typename Operation2::argument_type& x) const
+			{ return op1(op2(x)); }
+	};
+
+	template<typename Operation1,typename Operation2>
+	inline unary_compose<Operation1,Operation2>
+	compose1(const Operation1& op1, const Operation2& op2)
+	{
+		return unary_compose<Operation1, Operation2>(op1,op2);
+	}
+
+
+	/// binary_compose
+	///
+	template <class Operation1, class Operation2, class Operation3>
+	class binary_compose : public unary_function<typename Operation2::argument_type, typename Operation1::result_type> 
+	{
+	protected:
+		Operation1 op1;
+		Operation2 op2;
+		Operation3 op3;
+
+	public:
+		// Support binary functors too.
+		typedef typename Operation2::argument_type first_argument_type;
+		typedef typename Operation3::argument_type second_argument_type;
+
+		binary_compose(const Operation1& x, const Operation2& y, const Operation3& z) 
+			: op1(x), op2(y), op3(z) { }
+
+		typename Operation1::result_type operator()(const typename Operation2::argument_type& x) const 
+			{ return op1(op2(x),op3(x)); }
+
+		typename Operation1::result_type operator()(typename Operation2::argument_type& x) const 
+			{ return op1(op2(x),op3(x)); }
+
+		typename Operation1::result_type operator()(const typename Operation2::argument_type& x,const typename Operation3::argument_type& y) const 
+			{ return op1(op2(x),op3(y)); }
+
+		typename Operation1::result_type operator()(typename Operation2::argument_type& x, typename Operation3::argument_type& y) const 
+			{ return op1(op2(x),op3(y)); }
+	};
+
+
+	template <class Operation1, class Operation2, class Operation3>
+	inline binary_compose<Operation1, Operation2, Operation3>
+	compose2(const Operation1& op1, const Operation2& op2, const Operation3& op3)
+	{
+		return binary_compose<Operation1, Operation2, Operation3>(op1, op2, op3);
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// pointer_to_unary_function
+	///////////////////////////////////////////////////////////////////////
+
+	/// pointer_to_unary_function
+	///
+	/// This is an adapter template which converts a pointer to a standalone
+	/// function to a function object. This allows standalone functions to 
+	/// work in many cases where the system requires a function object.
+	///
+	/// Example usage:
+	///     ptrdiff_t Rand(ptrdiff_t n) { return rand() % n; } // Note: The C rand function is poor and slow.
+	///     pointer_to_unary_function<ptrdiff_t, ptrdiff_t> randInstance(Rand);
+	///     random_shuffle(pArrayBegin, pArrayEnd, randInstance);
+	///
+	template <typename Arg, typename Result>
+	class pointer_to_unary_function : public unary_function<Arg, Result>
+	{
+	protected:
+		Result (*mpFunction)(Arg);
+
+	public:
+		pointer_to_unary_function()
+			{ }
+
+		explicit pointer_to_unary_function(Result (*pFunction)(Arg))
+			: mpFunction(pFunction) { }
+
+		Result operator()(Arg x) const
+			{ return mpFunction(x); } 
+	};
+
+
+	/// ptr_fun
+	///
+	/// This ptr_fun is simply shorthand for usage of pointer_to_unary_function.
+	///
+	/// Example usage (actually, you don't need to use ptr_fun here, but it works anyway):
+	///    int factorial(int x) { return (x > 1) ? (x * factorial(x - 1)) : x; }
+	///    transform(pIntArrayBegin, pIntArrayEnd, pIntArrayBegin, ptr_fun(factorial));
+	///
+	template <typename Arg, typename Result>
+	inline pointer_to_unary_function<Arg, Result>
+	ptr_fun(Result (*pFunction)(Arg))
+		{ return pointer_to_unary_function<Arg, Result>(pFunction); }
+
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// pointer_to_binary_function
+	///////////////////////////////////////////////////////////////////////
+
+	/// pointer_to_binary_function
+	///
+	/// This is an adapter template which converts a pointer to a standalone
+	/// function to a function object. This allows standalone functions to 
+	/// work in many cases where the system requires a function object.
+	///
+	template <typename Arg1, typename Arg2, typename Result>
+	class pointer_to_binary_function : public binary_function<Arg1, Arg2, Result>
+	{
+	protected:
+		Result (*mpFunction)(Arg1, Arg2);
+
+	public:
+		pointer_to_binary_function()
+			{ }
+
+		explicit pointer_to_binary_function(Result (*pFunction)(Arg1, Arg2))
+			: mpFunction(pFunction) {}
+
+		Result operator()(Arg1 x, Arg2 y) const
+			{ return mpFunction(x, y); }
+	};
+
+
+	/// This ptr_fun is simply shorthand for usage of pointer_to_binary_function.
+	///
+	/// Example usage (actually, you don't need to use ptr_fun here, but it works anyway):
+	///    int multiply(int x, int y) { return x * y; }
+	///    transform(pIntArray1Begin, pIntArray1End, pIntArray2Begin, pIntArray1Begin, ptr_fun(multiply));
+	///
+	template <typename Arg1, typename Arg2, typename Result>
+	inline pointer_to_binary_function<Arg1, Arg2, Result>
+	ptr_fun(Result (*pFunction)(Arg1, Arg2))
+		{ return pointer_to_binary_function<Arg1, Arg2, Result>(pFunction); }
+
+
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// mem_fun
+	// mem_fun1
+	//
+	// Note that mem_fun calls member functions via *pointers* to classes 
+	// and not instances of classes. mem_fun_ref is for calling functions
+	// via instances of classes or references to classes.
+	//
+	// NOTE:
+	// mem_fun was deprecated in C++11 and removed in C++17, in favor 
+	// of the more general mem_fn and bind.
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	/// mem_fun_t
+	///
+	/// Member function with no arguments.
+	///
+	template <typename Result, typename T> 
+	class mem_fun_t : public unary_function<T*, Result>
+	{
+	public:
+		typedef Result (T::*MemberFunction)();
+
+		inline explicit mem_fun_t(MemberFunction pMemberFunction)
+			: mpMemberFunction(pMemberFunction)
+		{
+			// Empty
+		}
+
+		inline Result operator()(T* pT) const
+		{
+			return (pT->*mpMemberFunction)();
+		}
+
+	protected:
+		MemberFunction mpMemberFunction;
+	};
+
+
+	/// mem_fun1_t
+	///
+	/// Member function with one argument.
+	///
+	template <typename Result, typename T, typename Argument>
+	class mem_fun1_t : public binary_function<T*, Argument, Result>
+	{
+	public:
+		typedef Result (T::*MemberFunction)(Argument);
+
+		inline explicit mem_fun1_t(MemberFunction pMemberFunction)
+			: mpMemberFunction(pMemberFunction)
+		{
+			// Empty
+		}
+
+		inline Result operator()(T* pT, Argument arg) const
+		{
+			return (pT->*mpMemberFunction)(arg);
+		}
+
+	protected:
+		MemberFunction mpMemberFunction;
+	};
+
+
+	/// const_mem_fun_t
+	///
+	/// Const member function with no arguments.
+	/// Note that we inherit from unary_function<const T*, Result>
+	/// instead of what the C++ standard specifies: unary_function<T*, Result>.
+	/// The C++ standard is in error and this has been recognized by the defect group.
+	///
+	template <typename Result, typename T>
+	class const_mem_fun_t : public unary_function<const T*, Result>
+	{
+	public:
+		typedef Result (T::*MemberFunction)() const;
+
+		inline explicit const_mem_fun_t(MemberFunction pMemberFunction)
+			: mpMemberFunction(pMemberFunction)
+		{
+			// Empty
+		}
+
+		inline Result operator()(const T* pT) const
+		{
+			return (pT->*mpMemberFunction)();
+		}
+
+	protected:
+		MemberFunction mpMemberFunction;
+	};
+
+
+	/// const_mem_fun1_t
+	///
+	/// Const member function with one argument.
+	/// Note that we inherit from unary_function<const T*, Result>
+	/// instead of what the C++ standard specifies: unary_function<T*, Result>.
+	/// The C++ standard is in error and this has been recognized by the defect group.
+	///
+	template <typename Result, typename T, typename Argument>
+	class const_mem_fun1_t : public binary_function<const T*, Argument, Result>
+	{
+	public:
+		typedef Result (T::*MemberFunction)(Argument) const;
+
+		inline explicit const_mem_fun1_t(MemberFunction pMemberFunction)
+			: mpMemberFunction(pMemberFunction)
+		{
+			// Empty
+		}
+
+		inline Result operator()(const T* pT, Argument arg) const
+		{
+			return (pT->*mpMemberFunction)(arg);
+		}
+
+	protected:
+		MemberFunction mpMemberFunction;
+	};
+
+
+	/// mem_fun
+	///
+	/// This is the high level interface to the mem_fun_t family.
+	///
+	/// Example usage:
+	///    struct TestClass { void print() { puts("hello"); } }
+	///    TestClass* pTestClassArray[3] = { ... };
+	///    for_each(pTestClassArray, pTestClassArray + 3, &TestClass::print);
+	///
+	/// Note: using conventional inlining here to avoid issues on GCC/Linux
+	///
+	template <typename Result, typename T>
+	inline mem_fun_t<Result, T>
+	mem_fun(Result (T::*MemberFunction)())
+	{
+		return eastl::mem_fun_t<Result, T>(MemberFunction);
+	}
+
+	template <typename Result, typename T, typename Argument>
+	inline mem_fun1_t<Result, T, Argument>
+	mem_fun(Result (T::*MemberFunction)(Argument))
+	{
+		return eastl::mem_fun1_t<Result, T, Argument>(MemberFunction);
+	}
+
+	template <typename Result, typename T>
+	inline const_mem_fun_t<Result, T>
+	mem_fun(Result (T::*MemberFunction)() const)
+	{
+		return eastl::const_mem_fun_t<Result, T>(MemberFunction);
+	}
+
+	template <typename Result, typename T, typename Argument>
+	inline const_mem_fun1_t<Result, T, Argument>
+	mem_fun(Result (T::*MemberFunction)(Argument) const)
+	{
+		return eastl::const_mem_fun1_t<Result, T, Argument>(MemberFunction);
+	}
+
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// mem_fun_ref
+	// mem_fun1_ref
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	/// mem_fun_ref_t
+	///
+	template <typename Result, typename T>
+	class mem_fun_ref_t : public unary_function<T, Result>
+	{
+	public:
+		typedef Result (T::*MemberFunction)();
+
+		inline explicit mem_fun_ref_t(MemberFunction pMemberFunction)
+			: mpMemberFunction(pMemberFunction)
+		{
+			// Empty
+		}
+
+		inline Result operator()(T& t) const
+		{
+			return (t.*mpMemberFunction)();
+		}
+
+	protected:
+		MemberFunction mpMemberFunction;
+	};
+
+
+	/// mem_fun1_ref_t
+	///
+	template <typename Result, typename T, typename Argument>
+	class mem_fun1_ref_t : public binary_function<T, Argument, Result>
+	{
+	public:
+		typedef Result (T::*MemberFunction)(Argument);
+
+		inline explicit mem_fun1_ref_t(MemberFunction pMemberFunction)
+			: mpMemberFunction(pMemberFunction)
+		{
+			// Empty
+		}
+
+		inline Result operator()(T& t, Argument arg) const
+		{
+			return (t.*mpMemberFunction)(arg);
+		}
+
+	protected:
+		MemberFunction mpMemberFunction;
+	};
+
+
+	/// const_mem_fun_ref_t
+	///
+	template <typename Result, typename T>
+	class const_mem_fun_ref_t : public unary_function<T, Result>
+	{
+	public:
+		typedef Result (T::*MemberFunction)() const;
+
+		inline explicit const_mem_fun_ref_t(MemberFunction pMemberFunction)
+			: mpMemberFunction(pMemberFunction)
+		{
+			// Empty
+		}
+
+		inline Result operator()(const T& t) const
+		{
+			return (t.*mpMemberFunction)();
+		}
+
+	protected:
+		MemberFunction mpMemberFunction;
+	};
+
+
+	/// const_mem_fun1_ref_t
+	///
+	template <typename Result, typename T, typename Argument>
+	class const_mem_fun1_ref_t : public binary_function<T, Argument, Result>
+	{
+	public:
+		typedef Result (T::*MemberFunction)(Argument) const;
+
+		inline explicit const_mem_fun1_ref_t(MemberFunction pMemberFunction)
+			: mpMemberFunction(pMemberFunction)
+		{
+			// Empty
+		}
+
+		inline Result operator()(const T& t, Argument arg) const
+		{
+			return (t.*mpMemberFunction)(arg);
+		}
+
+	protected:
+		MemberFunction mpMemberFunction;
+	};
+
+
+	/// mem_fun_ref
+	/// Example usage:
+	///    struct TestClass { void print() { puts("hello"); } }
+	///    TestClass testClassArray[3];
+	///    for_each(testClassArray, testClassArray + 3, &TestClass::print);
+	///
+	/// Note: using conventional inlining here to avoid issues on GCC/Linux
+	///
+	template <typename Result, typename T>
+	inline mem_fun_ref_t<Result, T>
+	mem_fun_ref(Result (T::*MemberFunction)())
+	{
+		return eastl::mem_fun_ref_t<Result, T>(MemberFunction);
+	}
+
+	template <typename Result, typename T, typename Argument>
+	inline mem_fun1_ref_t<Result, T, Argument>
+	mem_fun_ref(Result (T::*MemberFunction)(Argument))
+	{
+		return eastl::mem_fun1_ref_t<Result, T, Argument>(MemberFunction);
+	}
+
+	template <typename Result, typename T>
+	inline const_mem_fun_ref_t<Result, T>
+	mem_fun_ref(Result (T::*MemberFunction)() const)
+	{
+		return eastl::const_mem_fun_ref_t<Result, T>(MemberFunction);
+	}
+
+	template <typename Result, typename T, typename Argument>
+	inline const_mem_fun1_ref_t<Result, T, Argument>
+	mem_fun_ref(Result (T::*MemberFunction)(Argument) const)
+	{
+		return eastl::const_mem_fun1_ref_t<Result, T, Argument>(MemberFunction);
+	}
+
+
+	// not_fn_ret
+	// not_fn_ret is a implementation specified return type of eastl::not_fn.
+	// The type name is not specified but it does have mandated functions that conforming implementations must support.
+	//
+	// http://en.cppreference.com/w/cpp/utility/functional/not_fn
+	//
+	template <typename F>
+	struct not_fn_ret
+	{
+		explicit not_fn_ret(F&& f) : mDecayF(eastl::forward<F>(f)) {}
+		not_fn_ret(not_fn_ret&& f) = default;
+		not_fn_ret(const not_fn_ret& f) = default;
+
+		// overloads for lvalues
+		template <class... Args>
+		auto operator()(Args&&... args) &
+		    -> decltype(!eastl::declval<eastl::invoke_result_t<eastl::decay_t<F>&, Args...>>())
+		{ return !eastl::invoke(mDecayF, eastl::forward<Args>(args)...); }
+
+		template <class... Args>
+		auto operator()(Args&&... args) const &
+		    -> decltype(!eastl::declval<eastl::invoke_result_t<eastl::decay_t<F> const&, Args...>>())
+		{ return !eastl::invoke(mDecayF, eastl::forward<Args>(args)...); }
+
+		// overloads for rvalues
+		template <class... Args>
+		auto operator()(Args&&... args) &&
+		    -> decltype(!eastl::declval<eastl::invoke_result_t<eastl::decay_t<F>, Args...>>())
+		{ return !eastl::invoke(eastl::move(mDecayF), eastl::forward<Args>(args)...); }
+
+		template <class... Args>
+		auto operator()(Args&&... args) const &&
+		    -> decltype(!eastl::declval<eastl::invoke_result_t<eastl::decay_t<F> const, Args...>>())
+		{ return !eastl::invoke(eastl::move(mDecayF), eastl::forward<Args>(args)...); }
+
+		eastl::decay_t<F> mDecayF;
+	};
+
+	/// not_fn
+	///
+	/// Creates an implementation specified functor that returns the complement of the callable object it was passed.
+	/// not_fn is intended to replace the C++03-era negators eastl::not1 and eastl::not2.
+	///
+	/// http://en.cppreference.com/w/cpp/utility/functional/not_fn
+	///
+	/// Example usage:
+	///
+	///		auto nf = eastl::not_fn([]{ return false; });
+	///     assert(nf());  // return true
+	///
+	template <class F>
+	inline not_fn_ret<F> not_fn(F&& f)
+	{
+		return not_fn_ret<F>(eastl::forward<F>(f));
+	}
+
+
+	///////////////////////////////////////////////////////////////////////
+	// hash
+	///////////////////////////////////////////////////////////////////////
+	namespace Internal
+	{
+		// utility to disable the generic template specialization that is
+		// used for enum types only.
+		template <typename T, bool Enabled>
+		struct EnableHashIf {};
+
+		template <typename T>
+		struct EnableHashIf<T, true>
+		{
+			size_t operator()(T p) const { return size_t(p); }
+		};
+	} // namespace Internal
+
+
+	template <typename T> struct hash;
+
+	template <typename T>
+	struct hash : Internal::EnableHashIf<T, is_enum_v<T>> {};
+
+	template <typename T> struct hash<T*> // Note that we use the pointer as-is and don't divide by sizeof(T*). This is because the table is of a prime size and this division doesn't benefit distribution.
+		{ size_t operator()(T* p) const { return size_t(uintptr_t(p)); } };
+
+	template <> struct hash<bool>
+		{ size_t operator()(bool val) const { return static_cast<size_t>(val); } };
+
+	template <> struct hash<char>
+		{ size_t operator()(char val) const { return static_cast<size_t>(val); } };
+
+	template <> struct hash<signed char>
+		{ size_t operator()(signed char val) const { return static_cast<size_t>(val); } };
+
+	template <> struct hash<unsigned char>
+		{ size_t operator()(unsigned char val) const { return static_cast<size_t>(val); } };
+
+	#if defined(EA_CHAR8_UNIQUE) && EA_CHAR8_UNIQUE
+		template <> struct hash<char8_t>
+			{ size_t operator()(char8_t val) const { return static_cast<size_t>(val); } };
+	#endif
+
+	#if defined(EA_CHAR16_NATIVE) && EA_CHAR16_NATIVE
+		template <> struct hash<char16_t>
+			{ size_t operator()(char16_t val) const { return static_cast<size_t>(val); } };
+	#endif
+
+	#if defined(EA_CHAR32_NATIVE) && EA_CHAR32_NATIVE
+		template <> struct hash<char32_t>
+			{ size_t operator()(char32_t val) const { return static_cast<size_t>(val); } };
+	#endif
+
+	// If wchar_t is a native type instead of simply a define to an existing type...
+	#if !defined(EA_WCHAR_T_NON_NATIVE)
+		template <> struct hash<wchar_t>
+			{ size_t operator()(wchar_t val) const { return static_cast<size_t>(val); } };
+	#endif
+
+	template <> struct hash<signed short>
+		{ size_t operator()(signed short val) const { return static_cast<size_t>(val); } };
+
+	template <> struct hash<unsigned short>
+		{ size_t operator()(unsigned short val) const { return static_cast<size_t>(val); } };
+
+	template <> struct hash<signed int>
+		{ size_t operator()(signed int val) const { return static_cast<size_t>(val); } };
+
+	template <> struct hash<unsigned int>
+		{ size_t operator()(unsigned int val) const { return static_cast<size_t>(val); } };
+
+	template <> struct hash<signed long>
+		{ size_t operator()(signed long val) const { return static_cast<size_t>(val); } };
+
+	template <> struct hash<unsigned long>
+		{ size_t operator()(unsigned long val) const { return static_cast<size_t>(val); } };
+
+	template <> struct hash<signed long long>
+		{ size_t operator()(signed long long val) const { return static_cast<size_t>(val); } };
+
+	template <> struct hash<unsigned long long>
+		{ size_t operator()(unsigned long long val) const { return static_cast<size_t>(val); } };
+
+	template <> struct hash<float>
+		{ size_t operator()(float val) const { return static_cast<size_t>(val); } };
+
+	template <> struct hash<double>
+		{ size_t operator()(double val) const { return static_cast<size_t>(val); } };
+
+	template <> struct hash<long double>
+		{ size_t operator()(long double val) const { return static_cast<size_t>(val); } };
+
+	#if defined(EA_HAVE_INT128) && EA_HAVE_INT128
+	template <> struct hash<uint128_t>
+		{ size_t operator()(uint128_t val) const { return static_cast<size_t>(val); } };
+	#endif
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// string hashes
+	//
+	// Note that our string hashes here intentionally are slow for long strings.
+	// The reasoning for this is so:
+	//    - The large majority of hashed strings are only a few bytes long.
+	//    - The hash function is significantly more efficient if it can make this assumption.
+	//    - The user is welcome to make a custom hash for those uncommon cases where
+	//      long strings need to be hashed. Indeed, the user can probably make a 
+	//      special hash customized for such strings that's better than what we provide.
+	///////////////////////////////////////////////////////////////////////////
+
+	template <> struct hash<char*>
+	{
+		size_t operator()(const char* p) const
+		{
+			uint32_t c, result = 2166136261U;   // FNV1 hash. Perhaps the best string hash. Intentionally uint32_t instead of size_t, so the behavior is the same regardless of size.
+			while((c = (uint8_t)*p++) != 0)     // Using '!=' disables compiler warnings.
+				result = (result * 16777619) ^ c;
+			return (size_t)result;
+		}
+	};
+
+	template <> struct hash<const char*>
+	{
+		size_t operator()(const char* p) const
+		{
+			uint32_t c, result = 2166136261U;   // Intentionally uint32_t instead of size_t, so the behavior is the same regardless of size.
+			while((c = (uint8_t)*p++) != 0)     // cast to unsigned 8 bit.
+				result = (result * 16777619) ^ c;
+			return (size_t)result;
+		}
+	};
+
+#if EA_CHAR8_UNIQUE
+	template <> struct hash<char8_t*>
+	{
+		size_t operator()(const char8_t* p) const
+		{
+			uint32_t c, result = 2166136261U;   // FNV1 hash. Perhaps the best string hash. Intentionally uint32_t instead of size_t, so the behavior is the same regardless of size.
+			while((c = (uint8_t)*p++) != 0)     // Using '!=' disables compiler warnings.
+				result = (result * 16777619) ^ c;
+			return (size_t)result;
+		}
+	};
+
+	template <> struct hash<const char8_t*>
+	{
+		size_t operator()(const char8_t* p) const
+		{
+			uint32_t c, result = 2166136261U;   // Intentionally uint32_t instead of size_t, so the behavior is the same regardless of size.
+			while((c = (uint8_t)*p++) != 0)     // cast to unsigned 8 bit.
+				result = (result * 16777619) ^ c;
+			return (size_t)result;
+		}
+	};
+#endif
+
+
+	template <> struct hash<char16_t*>
+	{
+		size_t operator()(const char16_t* p) const
+		{
+			uint32_t c, result = 2166136261U;   // Intentionally uint32_t instead of size_t, so the behavior is the same regardless of size.
+			while((c = (uint16_t)*p++) != 0)    // cast to unsigned 16 bit.
+				result = (result * 16777619) ^ c;
+			return (size_t)result;
+		}
+	};
+
+	template <> struct hash<const char16_t*>
+	{
+		size_t operator()(const char16_t* p) const
+		{
+			uint32_t c, result = 2166136261U;   // Intentionally uint32_t instead of size_t, so the behavior is the same regardless of size.
+			while((c = (uint16_t)*p++) != 0)    // cast to unsigned 16 bit.
+				result = (result * 16777619) ^ c;
+			return (size_t)result;
+		}
+	};
+
+	template <> struct hash<char32_t*>
+	{
+		size_t operator()(const char32_t* p) const
+		{
+			uint32_t c, result = 2166136261U;   // Intentionally uint32_t instead of size_t, so the behavior is the same regardless of size.
+			while((c = (uint32_t)*p++) != 0)    // cast to unsigned 32 bit.
+				result = (result * 16777619) ^ c;
+			return (size_t)result;
+		}
+	};
+
+	template <> struct hash<const char32_t*>
+	{
+		size_t operator()(const char32_t* p) const
+		{
+			uint32_t c, result = 2166136261U;   // Intentionally uint32_t instead of size_t, so the behavior is the same regardless of size.
+			while((c = (uint32_t)*p++) != 0)    // cast to unsigned 32 bit.
+				result = (result * 16777619) ^ c;
+			return (size_t)result;
+		}
+	};
+
+#if defined(EA_WCHAR_UNIQUE) && EA_WCHAR_UNIQUE
+	template<> struct hash<wchar_t*>
+	{
+		size_t operator()(const wchar_t* p) const
+		{
+			uint32_t c, result = 2166136261U;    // Intentionally uint32_t instead of size_t, so the behavior is the same regardless of size.
+			while ((c = (uint32_t)*p++) != 0)    // cast to unsigned 32 bit.
+				result = (result * 16777619) ^ c;
+			return (size_t)result;
+		}
+	};
+
+	template<> struct hash<const wchar_t*>
+	{
+		size_t operator()(const wchar_t* p) const
+		{
+			uint32_t c, result = 2166136261U;    // Intentionally uint32_t instead of size_t, so the behavior is the same regardless of size.
+			while ((c = (uint32_t)*p++) != 0)    // cast to unsigned 32 bit.
+				result = (result * 16777619) ^ c;
+			return (size_t)result;
+		}
+	};
+#endif
+
+	/// string_hash
+	///
+	/// Defines a generic string hash for an arbitrary EASTL basic_string container.
+	///
+	/// Example usage:
+	///    eastl::hash_set<MyString, eastl::string_hash<MyString> > hashSet;
+	///
+	template <typename String>
+	struct string_hash
+	{
+		typedef String                                         string_type;
+		typedef typename String::value_type                    value_type;
+		typedef typename eastl::add_unsigned<value_type>::type unsigned_value_type;
+
+		size_t operator()(const string_type& s) const
+		{
+			const unsigned_value_type* p = (const unsigned_value_type*)s.c_str();
+			uint32_t c, result = 2166136261U;   // Intentionally uint32_t instead of size_t, so the behavior is the same regardless of size.
+			while((c = *p++) != 0)
+				result = (result * 16777619) ^ c;
+			return (size_t)result;
+		}
+	};
+
+
+} // namespace eastl
+
+#include <EASTL/internal/function.h>
+
+#endif // Header include guard
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/hash_map.h b/libkram/eastl/include/EASTL/hash_map.h
new file mode 100644
index 00000000..c363597f
--- /dev/null
+++ b/libkram/eastl/include/EASTL/hash_map.h
@@ -0,0 +1,580 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// This file is based on the TR1 (technical report 1) reference implementation
+// of the unordered_set/unordered_map C++ classes as of about 4/2005. Most likely
+// many or all C++ library vendors' implementations of this classes will be 
+// based off of the reference version and so will look pretty similar to this
+// file as well as other vendors' versions. 
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_HASH_MAP_H
+#define EASTL_HASH_MAP_H
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/internal/hashtable.h>
+#include <EASTL/functional.h>
+#include <EASTL/utility.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+
+	/// EASTL_HASH_MAP_DEFAULT_NAME
+	///
+	/// Defines a default container name in the absence of a user-provided name.
+	///
+	#ifndef EASTL_HASH_MAP_DEFAULT_NAME
+		#define EASTL_HASH_MAP_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " hash_map" // Unless the user overrides something, this is "EASTL hash_map".
+	#endif
+
+
+	/// EASTL_HASH_MULTIMAP_DEFAULT_NAME
+	///
+	/// Defines a default container name in the absence of a user-provided name.
+	///
+	#ifndef EASTL_HASH_MULTIMAP_DEFAULT_NAME
+		#define EASTL_HASH_MULTIMAP_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " hash_multimap" // Unless the user overrides something, this is "EASTL hash_multimap".
+	#endif
+
+
+	/// EASTL_HASH_MAP_DEFAULT_ALLOCATOR
+	///
+	#ifndef EASTL_HASH_MAP_DEFAULT_ALLOCATOR
+		#define EASTL_HASH_MAP_DEFAULT_ALLOCATOR allocator_type(EASTL_HASH_MAP_DEFAULT_NAME)
+	#endif
+
+	/// EASTL_HASH_MULTIMAP_DEFAULT_ALLOCATOR
+	///
+	#ifndef EASTL_HASH_MULTIMAP_DEFAULT_ALLOCATOR
+		#define EASTL_HASH_MULTIMAP_DEFAULT_ALLOCATOR allocator_type(EASTL_HASH_MULTIMAP_DEFAULT_NAME)
+	#endif
+
+
+
+	/// hash_map
+	///
+	/// Implements a hash_map, which is a hashed associative container.
+	/// Lookups are O(1) (that is, they are fast) but the container is 
+	/// not sorted. Note that lookups are only O(1) if the hash table
+	/// is well-distributed (non-colliding). The lookup approaches
+	/// O(n) behavior as the table becomes increasingly poorly distributed.
+	///
+	/// set_max_load_factor
+	/// If you want to make a hashtable never increase its bucket usage,
+	/// call set_max_load_factor with a very high value such as 100000.f.
+	///
+	/// bCacheHashCode
+	/// We provide the boolean bCacheHashCode template parameter in order 
+	/// to allow the storing of the hash code of the key within the map. 
+	/// When this option is disabled, the rehashing of the table will 
+	/// call the hash function on the key. Setting bCacheHashCode to true 
+	/// is useful for cases whereby the calculation of the hash value for
+	/// a contained object is very expensive.
+	///
+	/// find_as
+	/// In order to support the ability to have a hashtable of strings but
+	/// be able to do efficiently lookups via char pointers (i.e. so they 
+	/// aren't converted to string objects), we provide the find_as 
+	/// function. This function allows you to do a find with a key of a
+	/// type other than the hashtable key type.
+	///
+	/// Example find_as usage:
+	///     hash_map<string, int> hashMap;
+	///     i = hashMap.find_as("hello");    // Use default hash and compare.
+	///
+	/// Example find_as usage (namespaces omitted for brevity):
+	///     hash_map<string, int> hashMap;
+	///     i = hashMap.find_as("hello", hash<char*>(), equal_to_2<string, char*>());
+	///
+	template <typename Key, typename T, typename Hash = eastl::hash<Key>, typename Predicate = eastl::equal_to<Key>, 
+			  typename Allocator = EASTLAllocatorType, bool bCacheHashCode = false>
+	class hash_map
+		: public hashtable<Key, eastl::pair<const Key, T>, Allocator, eastl::use_first<eastl::pair<const Key, T> >, Predicate,
+							Hash, mod_range_hashing, default_ranged_hash, prime_rehash_policy, bCacheHashCode, true, true>
+	{
+	public:
+		typedef hashtable<Key, eastl::pair<const Key, T>, Allocator, 
+						  eastl::use_first<eastl::pair<const Key, T> >, 
+						  Predicate, Hash, mod_range_hashing, default_ranged_hash, 
+						  prime_rehash_policy, bCacheHashCode, true, true>        base_type;
+		typedef hash_map<Key, T, Hash, Predicate, Allocator, bCacheHashCode>      this_type;
+		typedef typename base_type::size_type                                     size_type;
+		typedef typename base_type::key_type                                      key_type;
+		typedef T                                                                 mapped_type;
+		typedef typename base_type::value_type                                    value_type;     // NOTE: 'value_type = pair<const key_type, mapped_type>'.
+		typedef typename base_type::allocator_type                                allocator_type;
+		typedef typename base_type::node_type                                     node_type;
+		typedef typename base_type::insert_return_type                            insert_return_type;
+		typedef typename base_type::iterator                                      iterator;
+		typedef typename base_type::const_iterator                                const_iterator;
+
+		using base_type::insert;
+
+	public:
+		/// hash_map
+		///
+		/// Default constructor.
+		///
+		explicit hash_map(const allocator_type& allocator = EASTL_HASH_MAP_DEFAULT_ALLOCATOR)
+			: base_type(0, Hash(), mod_range_hashing(), default_ranged_hash(), 
+						Predicate(), eastl::use_first<eastl::pair<const Key, T> >(), allocator)
+		{
+			// Empty
+		}
+
+
+		/// hash_map
+		///
+		/// Constructor which creates an empty container, but start with nBucketCount buckets.
+		/// We default to a small nBucketCount value, though the user really should manually 
+		/// specify an appropriate value in order to prevent memory from being reallocated.
+		///
+		explicit hash_map(size_type nBucketCount, const Hash& hashFunction = Hash(), 
+						  const Predicate& predicate = Predicate(), const allocator_type& allocator = EASTL_HASH_MAP_DEFAULT_ALLOCATOR)
+			: base_type(nBucketCount, hashFunction, mod_range_hashing(), default_ranged_hash(), 
+						predicate, eastl::use_first<eastl::pair<const Key, T> >(), allocator)
+		{
+			// Empty
+		}
+
+
+		hash_map(const this_type& x)
+		  : base_type(x)
+		{
+		}
+
+
+		hash_map(this_type&& x)
+		  : base_type(eastl::move(x))
+		{
+		}
+
+
+		hash_map(this_type&& x, const allocator_type& allocator)
+		  : base_type(eastl::move(x), allocator)
+		{
+		}
+
+
+		/// hash_map
+		///
+		/// initializer_list-based constructor. 
+		/// Allows for initializing with brace values (e.g. hash_map<int, char*> hm = { {3,"c"}, {4,"d"}, {5,"e"} }; )
+		///     
+		hash_map(std::initializer_list<value_type> ilist, size_type nBucketCount = 0, const Hash& hashFunction = Hash(), 
+				   const Predicate& predicate = Predicate(), const allocator_type& allocator = EASTL_HASH_MAP_DEFAULT_ALLOCATOR)
+			: base_type(ilist.begin(), ilist.end(), nBucketCount, hashFunction, mod_range_hashing(), default_ranged_hash(), 
+						predicate, eastl::use_first<eastl::pair<const Key, T> >(), allocator)
+		{
+			// Empty
+		}
+
+
+		/// hash_map
+		///
+		/// An input bucket count of <= 1 causes the bucket count to be equal to the number of 
+		/// elements in the input range.
+		///
+		template <typename ForwardIterator>
+		hash_map(ForwardIterator first, ForwardIterator last, size_type nBucketCount = 0, const Hash& hashFunction = Hash(), 
+				 const Predicate& predicate = Predicate(), const allocator_type& allocator = EASTL_HASH_MAP_DEFAULT_ALLOCATOR)
+			: base_type(first, last, nBucketCount, hashFunction, mod_range_hashing(), default_ranged_hash(), 
+						predicate, eastl::use_first<eastl::pair<const Key, T> >(), allocator)
+		{
+			// Empty
+		}
+
+
+		this_type& operator=(const this_type& x)
+		{
+			return static_cast<this_type&>(base_type::operator=(x));
+		}
+
+
+		this_type& operator=(std::initializer_list<value_type> ilist)
+		{
+			return static_cast<this_type&>(base_type::operator=(ilist));
+		}
+
+
+		this_type& operator=(this_type&& x)
+		{
+			return static_cast<this_type&>(base_type::operator=(eastl::move(x)));
+		}
+
+
+		/// insert
+		///
+		/// This is an extension to the C++ standard. We insert a default-constructed 
+		/// element with the given key. The reason for this is that we can avoid the 
+		/// potentially expensive operation of creating and/or copying a mapped_type
+		/// object on the stack.
+		insert_return_type insert(const key_type& key)
+		{
+			return base_type::DoInsertKey(true_type(), key);
+		}
+
+		T& at(const key_type& k)
+		{
+			iterator it = base_type::find(k);
+
+			if (it == base_type::end())
+			{
+				#if EASTL_EXCEPTIONS_ENABLED
+					// throw exeption if exceptions enabled
+					throw std::out_of_range("invalid hash_map<K, T> key");
+				#else
+					// assert false if asserts enabled
+					EASTL_ASSERT_MSG(false, "invalid hash_map<K, T> key");
+				#endif
+			}
+			// undefined behaviour if exceptions and asserts are disabled and it == end()
+			return it->second;
+		}
+
+
+		const T& at(const key_type& k) const
+		{
+			const_iterator it = base_type::find(k);
+
+			if (it == base_type::end())
+			{
+				#if EASTL_EXCEPTIONS_ENABLED
+					// throw exeption if exceptions enabled
+					throw std::out_of_range("invalid hash_map<K, T> key");
+				#else
+					// assert false if asserts enabled
+					EASTL_ASSERT_MSG(false, "invalid hash_map<K, T> key");
+				#endif
+			}
+			// undefined behaviour if exceptions and asserts are disabled and it == end()
+			return it->second;
+		}
+
+
+		insert_return_type insert(key_type&& key)
+		{
+			return base_type::DoInsertKey(true_type(), eastl::move(key));
+		}
+
+
+		mapped_type& operator[](const key_type& key)
+		{
+			return (*base_type::DoInsertKey(true_type(), key).first).second;
+
+			// Slower reference version:
+			//const typename base_type::iterator it = base_type::find(key);
+			//if(it != base_type::end())
+			//    return (*it).second;
+			//return (*base_type::insert(value_type(key, mapped_type())).first).second;
+		}
+
+		mapped_type& operator[](key_type&& key)
+		{
+			// The Standard states that this function "inserts the value value_type(std::move(key), mapped_type())"
+			return (*base_type::DoInsertKey(true_type(), eastl::move(key)).first).second;
+		}
+
+
+	}; // hash_map
+
+	/// hash_map erase_if
+	///
+	/// https://en.cppreference.com/w/cpp/container/unordered_map/erase_if
+	template <typename Key, typename T, typename Hash, typename Predicate, typename Allocator, bool bCacheHashCode, typename UserPredicate>
+	void erase_if(eastl::hash_map<Key, T, Hash, Predicate, Allocator, bCacheHashCode>& c, UserPredicate predicate)
+	{
+		// Erases all elements that satisfy the predicate from the container.
+		for (auto i = c.begin(), last = c.end(); i != last;)
+		{
+			if (predicate(*i))
+			{
+				i = c.erase(i);
+			}
+			else
+			{
+				++i;
+			}
+		}
+	}
+
+
+	/// hash_multimap
+	///
+	/// Implements a hash_multimap, which is the same thing as a hash_map 
+	/// except that contained elements need not be unique. See the 
+	/// documentation for hash_set for details.
+	///
+	template <typename Key, typename T, typename Hash = eastl::hash<Key>, typename Predicate = eastl::equal_to<Key>,
+			  typename Allocator = EASTLAllocatorType, bool bCacheHashCode = false>
+	class hash_multimap
+		: public hashtable<Key, eastl::pair<const Key, T>, Allocator, eastl::use_first<eastl::pair<const Key, T> >, Predicate,
+						   Hash, mod_range_hashing, default_ranged_hash, prime_rehash_policy, bCacheHashCode, true, false>
+	{
+	public:
+		typedef hashtable<Key, eastl::pair<const Key, T>, Allocator, 
+						  eastl::use_first<eastl::pair<const Key, T> >, 
+						  Predicate, Hash, mod_range_hashing, default_ranged_hash, 
+						  prime_rehash_policy, bCacheHashCode, true, false>           base_type;
+		typedef hash_multimap<Key, T, Hash, Predicate, Allocator, bCacheHashCode>     this_type;
+		typedef typename base_type::size_type                                         size_type;
+		typedef typename base_type::key_type                                          key_type;
+		typedef T                                                                     mapped_type;
+		typedef typename base_type::value_type                                        value_type;     // Note that this is pair<const key_type, mapped_type>.
+		typedef typename base_type::allocator_type                                    allocator_type;
+		typedef typename base_type::node_type                                         node_type;
+		typedef typename base_type::insert_return_type                                insert_return_type;
+		typedef typename base_type::iterator                                          iterator;
+
+		using base_type::insert;
+
+	private:
+		using base_type::try_emplace;
+		using base_type::insert_or_assign;
+
+	public:
+		/// hash_multimap
+		///
+		/// Default constructor.
+		///
+		explicit hash_multimap(const allocator_type& allocator = EASTL_HASH_MULTIMAP_DEFAULT_ALLOCATOR)
+			: base_type(0, Hash(), mod_range_hashing(), default_ranged_hash(), 
+						Predicate(), eastl::use_first<eastl::pair<const Key, T> >(), allocator)
+		{
+			// Empty
+		}
+
+
+		/// hash_multimap
+		///
+		/// Constructor which creates an empty container, but start with nBucketCount buckets.
+		/// We default to a small nBucketCount value, though the user really should manually 
+		/// specify an appropriate value in order to prevent memory from being reallocated.
+		///
+		explicit hash_multimap(size_type nBucketCount, const Hash& hashFunction = Hash(), 
+							   const Predicate& predicate = Predicate(), const allocator_type& allocator = EASTL_HASH_MULTIMAP_DEFAULT_ALLOCATOR)
+			: base_type(nBucketCount, hashFunction, mod_range_hashing(), default_ranged_hash(), 
+						predicate, eastl::use_first<eastl::pair<const Key, T> >(), allocator)
+		{
+			// Empty
+		}
+
+
+		hash_multimap(const this_type& x)
+		  : base_type(x)
+		{
+		}
+
+
+		hash_multimap(this_type&& x)
+		  : base_type(eastl::move(x))
+		{
+		}
+
+
+		hash_multimap(this_type&& x, const allocator_type& allocator)
+		  : base_type(eastl::move(x), allocator)
+		{
+		}
+
+
+		/// hash_multimap
+		///
+		/// initializer_list-based constructor. 
+		/// Allows for initializing with brace values (e.g. hash_multimap<int, char*> hm = { {3,"c"}, {3,"C"}, {4,"d"} }; )
+		///     
+		hash_multimap(std::initializer_list<value_type> ilist, size_type nBucketCount = 0, const Hash& hashFunction = Hash(), 
+				   const Predicate& predicate = Predicate(), const allocator_type& allocator = EASTL_HASH_MULTIMAP_DEFAULT_ALLOCATOR)
+			: base_type(ilist.begin(), ilist.end(), nBucketCount, hashFunction, mod_range_hashing(), default_ranged_hash(), 
+						predicate, eastl::use_first<eastl::pair<const Key, T> >(), allocator)
+		{
+			// Empty
+		}
+
+
+		/// hash_multimap
+		///
+		/// An input bucket count of <= 1 causes the bucket count to be equal to the number of 
+		/// elements in the input range.
+		///
+		template <typename ForwardIterator>
+		hash_multimap(ForwardIterator first, ForwardIterator last, size_type nBucketCount = 0, const Hash& hashFunction = Hash(), 
+					  const Predicate& predicate = Predicate(), const allocator_type& allocator = EASTL_HASH_MULTIMAP_DEFAULT_ALLOCATOR)
+			: base_type(first, last, nBucketCount, hashFunction, mod_range_hashing(), default_ranged_hash(), 
+						predicate, eastl::use_first<eastl::pair<const Key, T> >(), allocator)
+		{
+			// Empty
+		}
+
+
+		this_type& operator=(const this_type& x)
+		{
+			return static_cast<this_type&>(base_type::operator=(x));
+		}
+
+
+		this_type& operator=(std::initializer_list<value_type> ilist)
+		{
+			return static_cast<this_type&>(base_type::operator=(ilist));
+		}
+
+
+		this_type& operator=(this_type&& x)
+		{
+			return static_cast<this_type&>(base_type::operator=(eastl::move(x)));
+		}
+
+
+		/// insert
+		///
+		/// This is an extension to the C++ standard. We insert a default-constructed 
+		/// element with the given key. The reason for this is that we can avoid the 
+		/// potentially expensive operation of creating and/or copying a mapped_type
+		/// object on the stack.
+		insert_return_type insert(const key_type& key)
+		{
+			return base_type::DoInsertKey(false_type(), key);
+		}
+
+
+		insert_return_type insert(key_type&& key)
+		{
+			return base_type::DoInsertKey(false_type(), eastl::move(key));
+		}
+
+	}; // hash_multimap
+
+	/// hash_multimap erase_if
+	///
+	/// https://en.cppreference.com/w/cpp/container/unordered_multimap/erase_if
+	template <typename Key, typename T, typename Hash, typename Predicate, typename Allocator, bool bCacheHashCode, typename UserPredicate>
+	void erase_if(eastl::hash_multimap<Key, T, Hash, Predicate, Allocator, bCacheHashCode>& c, UserPredicate predicate)
+	{
+		// Erases all elements that satisfy the predicate from the container.
+		for (auto i = c.begin(), last = c.end(); i != last;)
+		{
+			if (predicate(*i))
+			{
+				i = c.erase(i);
+			}
+			else
+			{
+				++i;
+			}
+		}
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// global operators
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename Key, typename T, typename Hash, typename Predicate, typename Allocator, bool bCacheHashCode>
+	inline bool operator==(const hash_map<Key, T, Hash, Predicate, Allocator, bCacheHashCode>& a, 
+						   const hash_map<Key, T, Hash, Predicate, Allocator, bCacheHashCode>& b)
+	{
+		typedef typename hash_map<Key, T, Hash, Predicate, Allocator, bCacheHashCode>::const_iterator const_iterator;
+
+		// We implement branching with the assumption that the return value is usually false.
+		if(a.size() != b.size())
+			return false;
+
+		// For map (with its unique keys), we need only test that each element in a can be found in b,
+		// as there can be only one such pairing per element. multimap needs to do a something more elaborate.
+		for(const_iterator ai = a.begin(), aiEnd = a.end(), biEnd = b.end(); ai != aiEnd; ++ai)
+		{
+			const_iterator bi = b.find(ai->first);
+
+			if((bi == biEnd) || !(*ai == *bi))  // We have to compare the values, because lookups are done by keys alone but the full value_type of a map is a key/value pair. 
+				return false;                   // It's possible that two elements in the two containers have identical keys but different values.
+		}
+
+		return true;
+	}
+
+	template <typename Key, typename T, typename Hash, typename Predicate, typename Allocator, bool bCacheHashCode>
+	inline bool operator!=(const hash_map<Key, T, Hash, Predicate, Allocator, bCacheHashCode>& a, 
+						   const hash_map<Key, T, Hash, Predicate, Allocator, bCacheHashCode>& b)
+	{
+		return !(a == b);
+	}
+
+
+	template <typename Key, typename T, typename Hash, typename Predicate, typename Allocator, bool bCacheHashCode>
+	inline bool operator==(const hash_multimap<Key, T, Hash, Predicate, Allocator, bCacheHashCode>& a, 
+						   const hash_multimap<Key, T, Hash, Predicate, Allocator, bCacheHashCode>& b)
+	{
+		typedef typename hash_multimap<Key, T, Hash, Predicate, Allocator, bCacheHashCode>::const_iterator const_iterator;
+		typedef typename eastl::iterator_traits<const_iterator>::difference_type difference_type;
+
+		// We implement branching with the assumption that the return value is usually false.
+		if(a.size() != b.size())
+			return false;
+
+		// We can't simply search for each element of a in b, as it may be that the bucket for 
+		// two elements in a has those same two elements in b but in different order (which should 
+		// still result in equality). Also it's possible that one bucket in a has two elements which 
+		// both match a solitary element in the equivalent bucket in b (which shouldn't result in equality).
+		eastl::pair<const_iterator, const_iterator> aRange;
+		eastl::pair<const_iterator, const_iterator> bRange;
+
+		for(const_iterator ai = a.begin(), aiEnd = a.end(); ai != aiEnd; ai = aRange.second) // For each element in a...
+		{
+			aRange = a.equal_range(ai->first); // Get the range of elements in a that are equal to ai.
+			bRange = b.equal_range(ai->first); // Get the range of elements in b that are equal to ai.
+
+			// We need to verify that aRange == bRange. First make sure the range sizes are equivalent...
+			const difference_type aDistance = eastl::distance(aRange.first, aRange.second);
+			const difference_type bDistance = eastl::distance(bRange.first, bRange.second);
+
+			if(aDistance != bDistance)
+				return false;
+
+			// At this point, aDistance > 0 and aDistance == bDistance.
+			// Implement a fast pathway for the case that there's just a single element.
+			if(aDistance == 1)
+			{
+				if(!(*aRange.first == *bRange.first)) // We have to compare the values, because lookups are done by keys alone but the full value_type of a map is a key/value pair. 
+					return false;                     // It's possible that two elements in the two containers have identical keys but different values. Ditto for the permutation case below.
+			}
+			else
+			{
+				// Check to see if these aRange and bRange are any permutation of each other. 
+				// This check gets slower as there are more elements in the range.
+				if(!eastl::is_permutation(aRange.first, aRange.second, bRange.first))
+					return false;
+			}
+		}
+
+		return true;
+	}
+
+	template <typename Key, typename T, typename Hash, typename Predicate, typename Allocator, bool bCacheHashCode>
+	inline bool operator!=(const hash_multimap<Key, T, Hash, Predicate, Allocator, bCacheHashCode>& a, 
+						   const hash_multimap<Key, T, Hash, Predicate, Allocator, bCacheHashCode>& b)
+	{
+		return !(a == b);
+	}
+
+
+} // namespace eastl
+
+
+#endif // Header include guard
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/hash_set.h b/libkram/eastl/include/EASTL/hash_set.h
new file mode 100644
index 00000000..c075975d
--- /dev/null
+++ b/libkram/eastl/include/EASTL/hash_set.h
@@ -0,0 +1,468 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// This file is based on the TR1 (technical report 1) reference implementation
+// of the unordered_set/unordered_map C++ classes as of about 4/2005. Most likely
+// many or all C++ library vendors' implementations of this classes will be 
+// based off of the reference version and so will look pretty similar to this
+// file as well as other vendors' versions. 
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_HASH_SET_H
+#define EASTL_HASH_SET_H
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/internal/hashtable.h>
+#include <EASTL/functional.h>
+#include <EASTL/utility.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+
+	/// EASTL_HASH_SET_DEFAULT_NAME
+	///
+	/// Defines a default container name in the absence of a user-provided name.
+	///
+	#ifndef EASTL_HASH_SET_DEFAULT_NAME
+		#define EASTL_HASH_SET_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " hash_set" // Unless the user overrides something, this is "EASTL hash_set".
+	#endif
+
+
+	/// EASTL_HASH_MULTISET_DEFAULT_NAME
+	///
+	/// Defines a default container name in the absence of a user-provided name.
+	///
+	#ifndef EASTL_HASH_MULTISET_DEFAULT_NAME
+		#define EASTL_HASH_MULTISET_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " hash_multiset" // Unless the user overrides something, this is "EASTL hash_multiset".
+	#endif
+
+
+	/// EASTL_HASH_SET_DEFAULT_ALLOCATOR
+	///
+	#ifndef EASTL_HASH_SET_DEFAULT_ALLOCATOR
+		#define EASTL_HASH_SET_DEFAULT_ALLOCATOR allocator_type(EASTL_HASH_SET_DEFAULT_NAME)
+	#endif
+
+	/// EASTL_HASH_MULTISET_DEFAULT_ALLOCATOR
+	///
+	#ifndef EASTL_HASH_MULTISET_DEFAULT_ALLOCATOR
+		#define EASTL_HASH_MULTISET_DEFAULT_ALLOCATOR allocator_type(EASTL_HASH_MULTISET_DEFAULT_NAME)
+	#endif
+
+
+
+	/// hash_set
+	///
+	/// Implements a hash_set, which is a hashed unique-item container.
+	/// Lookups are O(1) (that is, they are fast) but the container is 
+	/// not sorted. Note that lookups are only O(1) if the hash table
+	/// is well-distributed (non-colliding). The lookup approaches
+	/// O(n) behavior as the table becomes increasingly poorly distributed.
+	///
+	/// set_max_load_factor
+	/// If you want to make a hashtable never increase its bucket usage,
+	/// call set_max_load_factor with a very high value such as 100000.f.
+	///
+	/// bCacheHashCode
+	/// We provide the boolean bCacheHashCode template parameter in order 
+	/// to allow the storing of the hash code of the key within the map. 
+	/// When this option is disabled, the rehashing of the table will 
+	/// call the hash function on the key. Setting bCacheHashCode to true 
+	/// is useful for cases whereby the calculation of the hash value for
+	/// a contained object is very expensive.
+	///
+	/// find_as
+	/// In order to support the ability to have a hashtable of strings but
+	/// be able to do efficiently lookups via char pointers (i.e. so they 
+	/// aren't converted to string objects), we provide the find_as 
+	/// function. This function allows you to do a find with a key of a
+	/// type other than the hashtable key type.
+	///
+	/// Example find_as usage:
+	///     hash_set<string> hashSet;
+	///     i = hashSet.find_as("hello");    // Use default hash and compare.
+	///
+	/// Example find_as usage (namespaces omitted for brevity):
+	///     hash_set<string> hashSet;
+	///     i = hashSet.find_as("hello", hash<char*>(), equal_to_2<string, char*>());
+	///
+	template <typename Value, typename Hash = eastl::hash<Value>, typename Predicate = eastl::equal_to<Value>, 
+			  typename Allocator = EASTLAllocatorType, bool bCacheHashCode = false>
+	class hash_set
+		: public hashtable<Value, Value, Allocator, eastl::use_self<Value>, Predicate,
+						   Hash, mod_range_hashing, default_ranged_hash, 
+						   prime_rehash_policy, bCacheHashCode, false, true>
+	{
+	public:
+		typedef hashtable<Value, Value, Allocator, eastl::use_self<Value>, Predicate, 
+						  Hash, mod_range_hashing, default_ranged_hash,
+						  prime_rehash_policy, bCacheHashCode, false, true>       base_type;
+		typedef hash_set<Value, Hash, Predicate, Allocator, bCacheHashCode>       this_type;
+		typedef typename base_type::size_type                                     size_type;
+		typedef typename base_type::value_type                                    value_type;
+		typedef typename base_type::allocator_type                                allocator_type;
+		typedef typename base_type::node_type                                     node_type;
+
+	public:
+		/// hash_set
+		///
+		/// Default constructor.
+		/// 
+		explicit hash_set(const allocator_type& allocator = EASTL_HASH_SET_DEFAULT_ALLOCATOR)
+			: base_type(0, Hash(), mod_range_hashing(), default_ranged_hash(), Predicate(), eastl::use_self<Value>(), allocator)
+		{
+			// Empty
+		}
+
+
+		/// hash_set
+		///
+		/// Constructor which creates an empty container, but start with nBucketCount buckets.
+		/// We default to a small nBucketCount value, though the user really should manually 
+		/// specify an appropriate value in order to prevent memory from being reallocated.
+		///
+		explicit hash_set(size_type nBucketCount, const Hash& hashFunction = Hash(), const Predicate& predicate = Predicate(), 
+						  const allocator_type& allocator = EASTL_HASH_SET_DEFAULT_ALLOCATOR)
+			: base_type(nBucketCount, hashFunction, mod_range_hashing(), default_ranged_hash(), predicate, eastl::use_self<Value>(), allocator)
+		{
+			// Empty
+		}
+
+
+		hash_set(const this_type& x)
+		  : base_type(x)
+		{
+		}
+
+
+		hash_set(this_type&& x)
+		  : base_type(eastl::move(x))
+		{
+		}
+
+
+		hash_set(this_type&& x, const allocator_type& allocator)
+		  : base_type(eastl::move(x), allocator)
+		{
+		}
+
+
+		/// hash_set
+		///
+		/// initializer_list-based constructor. 
+		/// Allows for initializing with brace values (e.g. hash_set<int> hs = { 3, 4, 5, }; )
+		///     
+		hash_set(std::initializer_list<value_type> ilist, size_type nBucketCount = 0, const Hash& hashFunction = Hash(), 
+				   const Predicate& predicate = Predicate(), const allocator_type& allocator = EASTL_HASH_SET_DEFAULT_ALLOCATOR)
+			: base_type(ilist.begin(), ilist.end(), nBucketCount, hashFunction, mod_range_hashing(), default_ranged_hash(), predicate, eastl::use_self<Value>(), allocator)
+		{
+			// Empty
+		}
+
+
+		/// hash_set
+		///
+		/// An input bucket count of <= 1 causes the bucket count to be equal to the number of 
+		/// elements in the input range.
+		///
+		template <typename FowardIterator>
+		hash_set(FowardIterator first, FowardIterator last, size_type nBucketCount = 0, const Hash& hashFunction = Hash(), 
+				 const Predicate& predicate = Predicate(), const allocator_type& allocator = EASTL_HASH_SET_DEFAULT_ALLOCATOR)
+			: base_type(first, last, nBucketCount, hashFunction, mod_range_hashing(), default_ranged_hash(), predicate, eastl::use_self<Value>(), allocator)
+		{
+			// Empty
+		}
+
+
+		this_type& operator=(const this_type& x)
+		{
+			return static_cast<this_type&>(base_type::operator=(x));
+		}
+
+
+		this_type& operator=(std::initializer_list<value_type> ilist)
+		{
+			return static_cast<this_type&>(base_type::operator=(ilist));
+		}
+
+
+		this_type& operator=(this_type&& x)
+		{
+			return static_cast<this_type&>(base_type::operator=(eastl::move(x)));
+		}
+
+	}; // hash_set
+
+	/// hash_set erase_if
+	///
+	/// https://en.cppreference.com/w/cpp/container/unordered_set/erase_if
+	template <typename Value, typename Hash, typename Predicate, typename Allocator, bool bCacheHashCode, typename UserPredicate>
+	void erase_if(eastl::hash_set<Value, Hash, Predicate, Allocator, bCacheHashCode>& c, UserPredicate predicate)
+	{
+		// Erases all elements that satisfy the predicate pred from the container.
+		for (auto i = c.begin(), last = c.end(); i != last;)
+		{
+			if (predicate(*i))
+			{
+				i = c.erase(i);
+			}
+			else
+			{
+				++i;
+			}
+		}
+	}
+
+
+	/// hash_multiset
+	///
+	/// Implements a hash_multiset, which is the same thing as a hash_set 
+	/// except that contained elements need not be unique. See the documentation 
+	/// for hash_set for details.
+	///
+	template <typename Value, typename Hash = eastl::hash<Value>, typename Predicate = eastl::equal_to<Value>, 
+			  typename Allocator = EASTLAllocatorType, bool bCacheHashCode = false>
+	class hash_multiset
+		: public hashtable<Value, Value, Allocator, eastl::use_self<Value>, Predicate,
+						   Hash, mod_range_hashing, default_ranged_hash,
+						   prime_rehash_policy, bCacheHashCode, false, false>
+	{
+	public:
+		typedef hashtable<Value, Value, Allocator, eastl::use_self<Value>, Predicate,
+						  Hash, mod_range_hashing, default_ranged_hash,
+						  prime_rehash_policy, bCacheHashCode, false, false>          base_type;
+		typedef hash_multiset<Value, Hash, Predicate, Allocator, bCacheHashCode>      this_type;
+		typedef typename base_type::size_type                                         size_type;
+		typedef typename base_type::value_type                                        value_type;
+		typedef typename base_type::allocator_type                                    allocator_type;
+		typedef typename base_type::node_type                                         node_type;
+
+	public:
+		/// hash_multiset
+		///
+		/// Default constructor.
+		/// 
+		explicit hash_multiset(const allocator_type& allocator = EASTL_HASH_MULTISET_DEFAULT_ALLOCATOR)
+			: base_type(0, Hash(), mod_range_hashing(), default_ranged_hash(), Predicate(), eastl::use_self<Value>(), allocator)
+		{
+			// Empty
+		}
+
+
+		/// hash_multiset
+		///
+		/// Constructor which creates an empty container, but start with nBucketCount buckets.
+		/// We default to a small nBucketCount value, though the user really should manually 
+		/// specify an appropriate value in order to prevent memory from being reallocated.
+		///
+		explicit hash_multiset(size_type nBucketCount, const Hash& hashFunction = Hash(), 
+							   const Predicate& predicate = Predicate(), const allocator_type& allocator = EASTL_HASH_MULTISET_DEFAULT_ALLOCATOR)
+			: base_type(nBucketCount, hashFunction, mod_range_hashing(), default_ranged_hash(), predicate, eastl::use_self<Value>(), allocator)
+		{
+			// Empty
+		}
+
+
+		hash_multiset(const this_type& x)
+		  : base_type(x)
+		{
+		}
+
+
+		hash_multiset(this_type&& x)
+		  : base_type(eastl::move(x))
+		{
+		}
+
+
+		hash_multiset(this_type&& x, const allocator_type& allocator)
+		  : base_type(eastl::move(x), allocator)
+		{
+		}
+
+
+		/// hash_multiset
+		///
+		/// initializer_list-based constructor. 
+		/// Allows for initializing with brace values (e.g. hash_set<int> hs = { 3, 3, 4, }; )
+		///     
+		hash_multiset(std::initializer_list<value_type> ilist, size_type nBucketCount = 0, const Hash& hashFunction = Hash(), 
+				   const Predicate& predicate = Predicate(), const allocator_type& allocator = EASTL_HASH_MULTISET_DEFAULT_ALLOCATOR)
+			: base_type(ilist.begin(), ilist.end(), nBucketCount, hashFunction, mod_range_hashing(), default_ranged_hash(), predicate, eastl::use_self<Value>(), allocator)
+		{
+			// Empty
+		}
+
+
+		/// hash_multiset
+		///
+		/// An input bucket count of <= 1 causes the bucket count to be equal to the number of 
+		/// elements in the input range.
+		///
+		template <typename FowardIterator>
+		hash_multiset(FowardIterator first, FowardIterator last, size_type nBucketCount = 0, const Hash& hashFunction = Hash(), 
+					  const Predicate& predicate = Predicate(), const allocator_type& allocator = EASTL_HASH_MULTISET_DEFAULT_ALLOCATOR)
+			: base_type(first, last, nBucketCount, hashFunction, mod_range_hashing(), default_ranged_hash(), predicate, eastl::use_self<Value>(), allocator)
+		{
+			// Empty
+		}
+
+
+		this_type& operator=(const this_type& x)
+		{
+			return static_cast<this_type&>(base_type::operator=(x));
+		}
+
+
+		this_type& operator=(std::initializer_list<value_type> ilist)
+		{
+			return static_cast<this_type&>(base_type::operator=(ilist));
+		}
+
+
+		this_type& operator=(this_type&& x)
+		{
+			return static_cast<this_type&>(base_type::operator=(eastl::move(x)));
+		}
+
+	}; // hash_multiset
+
+	/// hash_multiset erase_if
+	///
+	/// https://en.cppreference.com/w/cpp/container/unordered_multiset/erase_if
+	template <typename Value, typename Hash, typename Predicate, typename Allocator, bool bCacheHashCode, typename UserPredicate>
+	void erase_if(eastl::hash_multiset<Value, Hash, Predicate, Allocator, bCacheHashCode>& c, UserPredicate predicate)
+	{
+		// Erases all elements that satisfy the predicate pred from the container.
+		for (auto i = c.begin(), last = c.end(); i != last;)
+		{
+			if (predicate(*i))
+			{
+				i = c.erase(i);
+			}
+			else
+			{
+				++i;
+			}
+		}
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// global operators
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename Value, typename Hash, typename Predicate, typename Allocator, bool bCacheHashCode>
+	inline bool operator==(const hash_set<Value, Hash, Predicate, Allocator, bCacheHashCode>& a, 
+						   const hash_set<Value, Hash, Predicate, Allocator, bCacheHashCode>& b)
+	{
+		typedef typename hash_set<Value, Hash, Predicate, Allocator, bCacheHashCode>::const_iterator const_iterator;
+
+		// We implement branching with the assumption that the return value is usually false.
+		if(a.size() != b.size())
+			return false;
+
+		// For set (with its unique keys), we need only test that each element in a can be found in b,
+		// as there can be only one such pairing per element. multiset needs to do a something more elaborate.
+		for(const_iterator ai = a.begin(), aiEnd = a.end(), biEnd = b.end(); ai != aiEnd; ++ai)
+		{
+			const_iterator bi = b.find(*ai);
+
+			if((bi == biEnd) || !(*ai == *bi)) // We have to compare values in addition to making sure the lookups succeeded. This is because the lookup is done via the user-supplised Predicate
+				return false;                  // which isn't strictly required to be identical to the Value operator==, though 99% of the time it will be so.  
+		}
+
+		return true;
+	}
+
+	template <typename Value, typename Hash, typename Predicate, typename Allocator, bool bCacheHashCode>
+	inline bool operator!=(const hash_set<Value, Hash, Predicate, Allocator, bCacheHashCode>& a, 
+						   const hash_set<Value, Hash, Predicate, Allocator, bCacheHashCode>& b)
+	{
+		return !(a == b);
+	}
+
+
+	template <typename Value, typename Hash, typename Predicate, typename Allocator, bool bCacheHashCode>
+	inline bool operator==(const hash_multiset<Value, Hash, Predicate, Allocator, bCacheHashCode>& a, 
+						   const hash_multiset<Value, Hash, Predicate, Allocator, bCacheHashCode>& b)
+	{
+		typedef typename hash_multiset<Value, Hash, Predicate, Allocator, bCacheHashCode>::const_iterator const_iterator;
+		typedef typename eastl::iterator_traits<const_iterator>::difference_type difference_type;
+
+		// We implement branching with the assumption that the return value is usually false.
+		if(a.size() != b.size())
+			return false;
+
+		// We can't simply search for each element of a in b, as it may be that the bucket for 
+		// two elements in a has those same two elements in b but in different order (which should 
+		// still result in equality). Also it's possible that one bucket in a has two elements which 
+		// both match a solitary element in the equivalent bucket in b (which shouldn't result in equality).
+		eastl::pair<const_iterator, const_iterator> aRange;
+		eastl::pair<const_iterator, const_iterator> bRange;
+
+		for(const_iterator ai = a.begin(), aiEnd = a.end(); ai != aiEnd; ai = aRange.second) // For each element in a...
+		{
+			aRange = a.equal_range(*ai); // Get the range of elements in a that are equal to ai.
+			bRange = b.equal_range(*ai); // Get the range of elements in b that are equal to ai.
+
+			// We need to verify that aRange == bRange. First make sure the range sizes are equivalent...
+			const difference_type aDistance = eastl::distance(aRange.first, aRange.second);
+			const difference_type bDistance = eastl::distance(bRange.first, bRange.second);
+
+			if(aDistance != bDistance)
+				return false;
+
+			// At this point, aDistance > 0 and aDistance == bDistance.
+			// Implement a fast pathway for the case that there's just a single element.
+			if(aDistance == 1)
+			{
+				if(!(*aRange.first == *bRange.first))   // We have to compare values in addition to making sure the distance (element count) was equal. This is because the lookup is done via the user-supplised Predicate
+					return false;                       // which isn't strictly required to be identical to the Value operator==, though 99% of the time it will be so. Ditto for the is_permutation usage below.
+			}
+			else
+			{
+				// Check to see if these aRange and bRange are any permutation of each other. 
+				// This check gets slower as there are more elements in the range.
+				if(!eastl::is_permutation(aRange.first, aRange.second, bRange.first))
+					return false;
+			}
+		}
+
+		return true;
+	}
+
+	template <typename Value, typename Hash, typename Predicate, typename Allocator, bool bCacheHashCode>
+	inline bool operator!=(const hash_multiset<Value, Hash, Predicate, Allocator, bCacheHashCode>& a, 
+						   const hash_multiset<Value, Hash, Predicate, Allocator, bCacheHashCode>& b)
+	{
+		return !(a == b);
+	}
+
+} // namespace eastl
+
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/heap.h b/libkram/eastl/include/EASTL/heap.h
new file mode 100644
index 00000000..f0e770b9
--- /dev/null
+++ b/libkram/eastl/include/EASTL/heap.h
@@ -0,0 +1,685 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// This file implements heap functionality much like the std C++ heap algorithms.
+// Such heaps are not the same thing as memory heaps or pools, but rather are
+// semi-sorted random access containers which have the primary purpose of 
+// supporting the implementation of priority_queue and similar data structures.
+// 
+// The primary distinctions between this heap functionality and std::heap are:
+//    - This heap exposes some extra functionality such as is_heap and change_heap.
+//    - This heap is more efficient than versions found in typical STL 
+//      implementations such as STLPort, Microsoft, and Metrowerks. This comes
+//      about due to better use of array dereferencing and branch prediction.
+//      You should expect of 5-30%, depending on the usage and platform.
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// The publicly usable functions we define are:
+//    push_heap     -- Adds an entry to a heap.                             Same as C++ std::push_heap.
+//    pop_heap      -- Removes the top entry from a heap.                   Same as C++ std::pop_heap.
+//    make_heap     -- Converts an array to a heap.                         Same as C++ std::make_heap.
+//    sort_heap     -- Sorts a heap in place.                               Same as C++ std::sort_heap.
+//    remove_heap   -- Removes an arbitrary entry from a heap.
+//    change_heap   -- Changes the priority of an entry in the heap.
+//    is_heap       -- Returns true if an array appears is in heap format.   Same as C++11 std::is_heap.
+//    is_heap_until -- Returns largest part of the range which is a heap.    Same as C++11 std::is_heap_until.
+///////////////////////////////////////////////////////////////////////////////
+
+
+
+#ifndef EASTL_HEAP_H
+#define EASTL_HEAP_H
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/iterator.h>
+#include <stddef.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+
+	///////////////////////////////////////////////////////////////////////
+	// promote_heap (internal function)
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename RandomAccessIterator, typename Distance, typename T, typename ValueType>
+	inline void promote_heap_impl(RandomAccessIterator first, Distance topPosition, Distance position, T value)
+	{
+		for(Distance parentPosition = (position - 1) >> 1; // This formula assumes that (position > 0). // We use '>> 1' instead of '/ 2' because we have seen VC++ generate better code with >>.
+			(position > topPosition) && (*(first + parentPosition) < value);
+			parentPosition = (position - 1) >> 1)
+		{
+			*(first + position) = eastl::forward<ValueType>(*(first + parentPosition)); // Swap the node with its parent.
+			position = parentPosition;
+		}
+
+		*(first + position) = eastl::forward<ValueType>(value);
+	}
+
+	/// promote_heap
+	///
+	/// Moves a value in the heap from a given position upward until 
+	/// it is sorted correctly. It's kind of like bubble-sort, except that
+	/// instead of moving linearly from the back of a list to the front,
+	/// it moves from the bottom of the tree up the branches towards the
+	/// top. But otherwise is just like bubble-sort.
+	///
+	/// This function requires that the value argument refer to a value
+	/// that is currently not within the heap.
+	///
+	template <typename RandomAccessIterator, typename Distance, typename T>
+	inline void promote_heap(RandomAccessIterator first, Distance topPosition, Distance position, const T& value)
+	{
+		typedef typename iterator_traits<RandomAccessIterator>::value_type value_type;
+		promote_heap_impl<RandomAccessIterator, Distance, const T&, const value_type>(first, topPosition, position, value);
+	}
+
+
+	/// promote_heap
+	///
+	/// Moves a value in the heap from a given position upward until 
+	/// it is sorted correctly. It's kind of like bubble-sort, except that
+	/// instead of moving linearly from the back of a list to the front,
+	/// it moves from the bottom of the tree up the branches towards the
+	/// top. But otherwise is just like bubble-sort.
+	///
+	/// This function requires that the value argument refer to a value
+	/// that is currently not within the heap.
+	///
+	template <typename RandomAccessIterator, typename Distance, typename T>
+	inline void promote_heap(RandomAccessIterator first, Distance topPosition, Distance position, T&& value)
+	{
+		typedef typename iterator_traits<RandomAccessIterator>::value_type value_type;
+		promote_heap_impl<RandomAccessIterator, Distance, T&&, value_type>(first, topPosition, position, eastl::forward<T>(value));
+	}
+
+
+	template <typename RandomAccessIterator, typename Distance, typename T, typename Compare, typename ValueType>
+	inline void promote_heap_impl(RandomAccessIterator first, Distance topPosition, Distance position, T value, Compare compare)
+	{
+		for(Distance parentPosition = (position - 1) >> 1; // This formula assumes that (position > 0). // We use '>> 1' instead of '/ 2' because we have seen VC++ generate better code with >>.
+			(position > topPosition) && compare(*(first + parentPosition), value);
+			parentPosition = (position - 1) >> 1)
+		{
+			*(first + position) = eastl::forward<ValueType>(*(first + parentPosition)); // Swap the node with its parent.
+			position = parentPosition;
+		}
+
+		*(first + position) = eastl::forward<ValueType>(value);
+	}
+
+
+	/// promote_heap
+	///
+	/// Takes a Compare(a, b) function (or function object) which returns true if a < b.
+	/// For example, you could use the standard 'less' comparison object.
+	///
+	/// The Compare function must work equivalently to the compare function used
+	/// to make and maintain the heap.
+	///
+	/// This function requires that the value argument refer to a value
+	/// that is currently not within the heap.
+	///
+	template <typename RandomAccessIterator, typename Distance, typename T, typename Compare>
+	inline void promote_heap(RandomAccessIterator first, Distance topPosition, Distance position, const T& value, Compare compare)
+	{
+		typedef typename iterator_traits<RandomAccessIterator>::value_type value_type;
+		promote_heap_impl<RandomAccessIterator, Distance, const T&, Compare, const value_type>(first, topPosition, position, value, compare);
+	}
+
+
+	/// promote_heap
+	///
+	/// Takes a Compare(a, b) function (or function object) which returns true if a < b.
+	/// For example, you could use the standard 'less' comparison object.
+	///
+	/// The Compare function must work equivalently to the compare function used
+	/// to make and maintain the heap.
+	///
+	/// This function requires that the value argument refer to a value
+	/// that is currently not within the heap.
+	///
+	template <typename RandomAccessIterator, typename Distance, typename T, typename Compare>
+	inline void promote_heap(RandomAccessIterator first, Distance topPosition, Distance position, T&& value, Compare compare)
+	{
+		typedef typename iterator_traits<RandomAccessIterator>::value_type value_type;
+		promote_heap_impl<RandomAccessIterator, Distance, T&&, Compare, value_type>(first, topPosition, position, eastl::forward<T>(value), compare);
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// adjust_heap (internal function)
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename RandomAccessIterator, typename Distance, typename T, typename ValueType>
+	void adjust_heap_impl(RandomAccessIterator first, Distance topPosition, Distance heapSize, Distance position, T value)
+	{
+		// We do the conventional approach of moving the position down to the 
+		// bottom then inserting the value at the back and moving it up.
+		Distance childPosition = (2 * position) + 2;
+
+		for(; childPosition < heapSize; childPosition = (2 * childPosition) + 2)
+		{
+			if(*(first + childPosition) < *(first + (childPosition - 1))) // Choose the larger of the two children.
+				--childPosition;
+			*(first + position) = eastl::forward<ValueType>(*(first + childPosition)); // Swap positions with this child.
+			position = childPosition;
+		}
+
+		if(childPosition == heapSize) // If we are at the very last index of the bottom...
+		{
+			*(first + position) = eastl::forward<ValueType>(*(first + (childPosition - 1)));
+			position = childPosition - 1;
+		}
+
+		eastl::promote_heap<RandomAccessIterator, Distance, T>(first, topPosition, position, eastl::forward<ValueType>(value));
+	}
+
+	/// adjust_heap
+	///
+	/// Given a position that has just been vacated, this function moves
+	/// new values into that vacated position appropriately. The value
+	/// argument is an entry which will be inserted into the heap after
+	/// we move nodes into the positions that were vacated.
+	///
+	/// This function requires that the value argument refer to a value
+	/// that is currently not within the heap.
+	///
+	template <typename RandomAccessIterator, typename Distance, typename T>
+	void adjust_heap(RandomAccessIterator first, Distance topPosition, Distance heapSize, Distance position, const T& value)
+	{
+		typedef typename iterator_traits<RandomAccessIterator>::value_type value_type;
+		adjust_heap_impl<RandomAccessIterator, Distance, const T&, const value_type>(first, topPosition, heapSize, position, eastl::forward<const T&>(value));
+	}
+
+
+	/// adjust_heap
+	///
+	/// Given a position that has just been vacated, this function moves
+	/// new values into that vacated position appropriately. The value
+	/// argument is an entry which will be inserted into the heap after
+	/// we move nodes into the positions that were vacated.
+	///
+	/// This function requires that the value argument refer to a value
+	/// that is currently not within the heap.
+	///
+	template <typename RandomAccessIterator, typename Distance, typename T>
+	void adjust_heap(RandomAccessIterator first, Distance topPosition, Distance heapSize, Distance position, T&& value)
+	{
+		typedef typename iterator_traits<RandomAccessIterator>::value_type value_type;
+		adjust_heap_impl<RandomAccessIterator, Distance, T&&, value_type>(first, topPosition, heapSize, position, eastl::forward<T>(value));
+	}
+
+
+	template <typename RandomAccessIterator, typename Distance, typename T, typename Compare, typename ValueType>
+	void adjust_heap_impl(RandomAccessIterator first, Distance topPosition, Distance heapSize, Distance position, T value, Compare compare)
+	{
+		// We do the conventional approach of moving the position down to the 
+		// bottom then inserting the value at the back and moving it up.
+		Distance childPosition = (2 * position) + 2;
+
+		for(; childPosition < heapSize; childPosition = (2 * childPosition) + 2)
+		{
+			if(compare(*(first + childPosition), *(first + (childPosition - 1)))) // Choose the larger of the two children.
+				--childPosition;
+			*(first + position) = eastl::forward<ValueType>(*(first + childPosition)); // Swap positions with this child.
+			position = childPosition;
+		}
+
+		if(childPosition == heapSize) // If we are at the bottom...
+		{
+			*(first + position) = eastl::forward<ValueType>(*(first + (childPosition - 1)));
+			position = childPosition - 1;
+		}
+
+		eastl::promote_heap<RandomAccessIterator, Distance, T, Compare>(first, topPosition, position, eastl::forward<ValueType>(value), compare);
+	}
+
+	/// adjust_heap
+	///
+	/// The Compare function must work equivalently to the compare function used
+	/// to make and maintain the heap.
+	///
+	/// This function requires that the value argument refer to a value
+	/// that is currently not within the heap.
+	///
+	template <typename RandomAccessIterator, typename Distance, typename T, typename Compare>
+	void adjust_heap(RandomAccessIterator first, Distance topPosition, Distance heapSize, Distance position, const T& value, Compare compare)
+	{
+		typedef typename iterator_traits<RandomAccessIterator>::value_type value_type;
+		adjust_heap_impl<RandomAccessIterator, Distance, const T&, Compare, const value_type>(first, topPosition, heapSize, position, eastl::forward<const T&>(value), compare);
+	}
+
+
+	/// adjust_heap
+	///
+	/// The Compare function must work equivalently to the compare function used
+	/// to make and maintain the heap.
+	///
+	/// This function requires that the value argument refer to a value
+	/// that is currently not within the heap.
+	///
+	template <typename RandomAccessIterator, typename Distance, typename T, typename Compare>
+	void adjust_heap(RandomAccessIterator first, Distance topPosition, Distance heapSize, Distance position, T&& value, Compare compare)
+	{
+		typedef typename iterator_traits<RandomAccessIterator>::value_type value_type;
+		adjust_heap_impl<RandomAccessIterator, Distance, T&&, Compare, value_type>(first, topPosition, heapSize, position, eastl::forward<T>(value), compare);
+	}
+
+
+	///////////////////////////////////////////////////////////////////////
+	// push_heap
+	///////////////////////////////////////////////////////////////////////
+
+	/// push_heap
+	///
+	/// Adds an item to a heap (which is an array). The item necessarily
+	/// comes from the back of the heap (array). Thus, the insertion of a 
+	/// new item in a heap is a two step process: push_back and push_heap.
+	///
+	/// Example usage:
+	///    vector<int> heap;
+	///    
+	///    heap.push_back(3);
+	///    push_heap(heap.begin(), heap.end()); // Places '3' appropriately.
+	///
+	template <typename RandomAccessIterator>
+	inline void push_heap(RandomAccessIterator first, RandomAccessIterator last)
+	{
+		typedef typename eastl::iterator_traits<RandomAccessIterator>::difference_type difference_type;
+		typedef typename eastl::iterator_traits<RandomAccessIterator>::value_type      value_type;
+
+		const value_type tempBottom(eastl::forward<value_type>(*(last - 1)));
+
+		eastl::promote_heap<RandomAccessIterator, difference_type, value_type>
+						   (first, (difference_type)0, (difference_type)(last - first - 1), eastl::forward<const value_type>(tempBottom));
+	}
+
+
+	/// push_heap
+	///
+	/// This version is useful for cases where your object comparison is unusual 
+	/// or where you want to have the heap store pointers to objects instead of 
+	/// storing the objects themselves (often in order to improve cache coherency
+	/// while doing sorting).
+	///
+	/// The Compare function must work equivalently to the compare function used
+	/// to make and maintain the heap.
+	///
+	template <typename RandomAccessIterator, typename Compare>
+	inline void push_heap(RandomAccessIterator first, RandomAccessIterator last, Compare compare)
+	{
+		typedef typename eastl::iterator_traits<RandomAccessIterator>::difference_type difference_type;
+		typedef typename eastl::iterator_traits<RandomAccessIterator>::value_type      value_type;
+
+		const value_type tempBottom(*(last - 1));
+
+		eastl::promote_heap<RandomAccessIterator, difference_type, value_type, Compare>
+						   (first, (difference_type)0, (difference_type)(last - first - 1), tempBottom, compare);
+	}
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// pop_heap
+	///////////////////////////////////////////////////////////////////////
+
+	/// pop_heap
+	///
+	/// Removes the first item from the heap (which is an array), and adjusts
+	/// the heap so that the highest priority item becomes the new first item.
+	///
+	/// Example usage:
+	///    vector<int> heap;
+	///    
+	///    heap.push_back(2);
+	///    heap.push_back(3);
+	///    heap.push_back(1);
+	///    <use heap[0], which is the highest priority item in the heap>
+	///    pop_heap(heap.begin(), heap.end());  // Moves heap[0] to the back of the heap and adjusts the heap.
+	///    heap.pop_back();                     // Remove value that was just at the top of the heap
+	///
+	template <typename RandomAccessIterator>
+	inline void pop_heap(RandomAccessIterator first, RandomAccessIterator last)
+	{
+		typedef typename eastl::iterator_traits<RandomAccessIterator>::difference_type difference_type;
+		typedef typename eastl::iterator_traits<RandomAccessIterator>::value_type      value_type;
+
+		value_type tempBottom(eastl::forward<value_type>(*(last - 1)));
+		*(last - 1) = eastl::forward<value_type>(*first);
+		eastl::adjust_heap<RandomAccessIterator, difference_type, value_type>
+						  (first, (difference_type)0, (difference_type)(last - first - 1), 0, eastl::forward<value_type>(tempBottom));
+	}
+
+
+
+	/// pop_heap
+	///
+	/// This version is useful for cases where your object comparison is unusual 
+	/// or where you want to have the heap store pointers to objects instead of 
+	/// storing the objects themselves (often in order to improve cache coherency
+	/// while doing sorting).
+	///
+	/// The Compare function must work equivalently to the compare function used
+	/// to make and maintain the heap.
+	///
+	template <typename RandomAccessIterator, typename Compare>
+	inline void pop_heap(RandomAccessIterator first, RandomAccessIterator last, Compare compare)
+	{
+		typedef typename eastl::iterator_traits<RandomAccessIterator>::difference_type difference_type;
+		typedef typename eastl::iterator_traits<RandomAccessIterator>::value_type      value_type;
+
+		value_type tempBottom(eastl::forward<value_type>(*(last - 1)));
+		*(last - 1) = eastl::forward<value_type>(*first);
+		eastl::adjust_heap<RandomAccessIterator, difference_type, value_type, Compare>
+						  (first, (difference_type)0, (difference_type)(last - first - 1), 0, eastl::forward<value_type>(tempBottom), compare);
+	}
+
+
+	///////////////////////////////////////////////////////////////////////
+	// make_heap
+	///////////////////////////////////////////////////////////////////////
+
+
+	/// make_heap
+	///
+	/// Given an array, this function converts it into heap format.
+	/// The complexity is O(n), where n is count of the range.
+	/// The input range is not required to be in any order.
+	///
+	template <typename RandomAccessIterator>
+	void make_heap(RandomAccessIterator first, RandomAccessIterator last)
+	{
+		// We do bottom-up heap construction as per Sedgewick. Such construction is O(n).
+		typedef typename eastl::iterator_traits<RandomAccessIterator>::difference_type difference_type;
+		typedef typename eastl::iterator_traits<RandomAccessIterator>::value_type      value_type;
+
+		const difference_type heapSize = last - first;
+
+		if(heapSize >= 2) // If there is anything to do... (we need this check because otherwise the math fails below).
+		{
+			difference_type parentPosition = ((heapSize - 2) >> 1) + 1; // We use '>> 1' instead of '/ 2' because we have seen VC++ generate better code with >>.
+
+			do{
+				--parentPosition;
+				value_type temp(eastl::forward<value_type>(*(first + parentPosition)));
+				eastl::adjust_heap<RandomAccessIterator, difference_type, value_type>
+								  (first, parentPosition, heapSize, parentPosition, eastl::forward<value_type>(temp));
+			} while(parentPosition != 0);
+		}
+	}
+
+
+	template <typename RandomAccessIterator, typename Compare>
+	void make_heap(RandomAccessIterator first, RandomAccessIterator last, Compare compare)
+	{
+		typedef typename eastl::iterator_traits<RandomAccessIterator>::difference_type difference_type;
+		typedef typename eastl::iterator_traits<RandomAccessIterator>::value_type      value_type;
+
+		const difference_type heapSize = last - first;
+
+		if(heapSize >= 2) // If there is anything to do... (we need this check because otherwise the math fails below).
+		{
+			difference_type parentPosition = ((heapSize - 2) >> 1) + 1; // We use '>> 1' instead of '/ 2' because we have seen VC++ generate better code with >>.
+
+			do{
+				--parentPosition;
+				value_type temp(eastl::forward<value_type>(*(first + parentPosition)));
+				eastl::adjust_heap<RandomAccessIterator, difference_type, value_type, Compare>
+								  (first, parentPosition, heapSize, parentPosition, eastl::forward<value_type>(temp), compare);
+			} while(parentPosition != 0);
+		}
+	}
+
+
+	///////////////////////////////////////////////////////////////////////
+	// sort_heap
+	///////////////////////////////////////////////////////////////////////
+
+	/// sort_heap
+	///
+	/// After the application if this algorithm, the range it was applied to 
+	/// is no longer a heap, though it will be a reverse heap (smallest first).
+	/// The item with the lowest priority will be first, and the highest last.
+	/// This is not a stable sort because the relative order of equivalent 
+	/// elements is not necessarily preserved.
+	/// The range referenced must be valid; all pointers must be dereferenceable 
+	/// and within the sequence the last position is reachable from the first 
+	/// by incrementation.
+	/// The complexity is at most O(n * log(n)), where n is count of the range.
+	///
+	template <typename RandomAccessIterator>
+	inline void sort_heap(RandomAccessIterator first, RandomAccessIterator last)
+	{
+		for(; (last - first) > 1; --last) // We simply use the heap to sort itself.
+			eastl::pop_heap<RandomAccessIterator>(first, last);
+	}
+
+
+	/// sort_heap
+	///
+	/// The Compare function must work equivalently to the compare function used
+	/// to make and maintain the heap.
+	///
+	template <typename RandomAccessIterator, typename Compare>
+	inline void sort_heap(RandomAccessIterator first, RandomAccessIterator last, Compare compare)
+	{
+		for(; (last - first) > 1; --last) // We simply use the heap to sort itself.
+			eastl::pop_heap<RandomAccessIterator, Compare>(first, last, compare);
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// remove_heap
+	///////////////////////////////////////////////////////////////////////
+
+	/// remove_heap
+	///
+	/// Removes an arbitrary entry from the heap and adjusts the heap appropriately.
+	/// This function is unlike pop_heap in that pop_heap moves the top item
+	/// to the back of the heap, whereas remove_heap moves an arbitrary item to
+	/// the back of the heap.
+	///
+	/// Note: Since this function moves the element to the back of the heap and 
+	/// doesn't actually remove it from the given container, the user must call
+	/// the container erase function if the user wants to erase the element 
+	/// from the container.
+	///
+	template <typename RandomAccessIterator, typename Distance>
+	inline void remove_heap(RandomAccessIterator first, Distance heapSize, Distance position)
+	{
+		typedef typename eastl::iterator_traits<RandomAccessIterator>::difference_type difference_type;
+		typedef typename eastl::iterator_traits<RandomAccessIterator>::value_type      value_type;
+
+		const value_type tempBottom(*(first + heapSize - 1));
+		*(first + heapSize - 1) = *(first + position);
+		eastl::adjust_heap<RandomAccessIterator, difference_type, value_type>
+						  (first, (difference_type)0, (difference_type)(heapSize - 1), (difference_type)position, tempBottom);
+	}
+
+
+	/// remove_heap
+	///
+	/// The Compare function must work equivalently to the compare function used
+	/// to make and maintain the heap.
+	///
+	/// Note: Since this function moves the element to the back of the heap and 
+	/// doesn't actually remove it from the given container, the user must call
+	/// the container erase function if the user wants to erase the element 
+	/// from the container.
+	///
+	template <typename RandomAccessIterator, typename Distance, typename Compare>
+	inline void remove_heap(RandomAccessIterator first, Distance heapSize, Distance position, Compare compare)
+	{
+		typedef typename eastl::iterator_traits<RandomAccessIterator>::difference_type difference_type;
+		typedef typename eastl::iterator_traits<RandomAccessIterator>::value_type      value_type;
+
+		const value_type tempBottom(*(first + heapSize - 1));
+		*(first + heapSize - 1) = *(first + position);
+		eastl::adjust_heap<RandomAccessIterator, difference_type, value_type, Compare>
+						  (first, (difference_type)0, (difference_type)(heapSize - 1), (difference_type)position, tempBottom, compare);
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// change_heap
+	///////////////////////////////////////////////////////////////////////
+
+	/// change_heap
+	///
+	/// Given a value in the heap that has changed in priority, this function
+	/// adjusts the heap appropriately. The heap size remains unchanged after
+	/// this operation. 
+	///
+	template <typename RandomAccessIterator, typename Distance>
+	inline void change_heap(RandomAccessIterator first, Distance heapSize, Distance position)
+	{
+		typedef typename eastl::iterator_traits<RandomAccessIterator>::difference_type difference_type;
+		typedef typename eastl::iterator_traits<RandomAccessIterator>::value_type      value_type;
+
+		eastl::remove_heap<RandomAccessIterator, Distance>(first, heapSize, position);
+
+		value_type tempBottom(*(first + heapSize - 1));
+
+		eastl::promote_heap<RandomAccessIterator, difference_type, value_type>
+						   (first, (difference_type)0, (difference_type)(heapSize - 1), tempBottom);
+	}
+
+
+	/// change_heap
+	///
+	/// The Compare function must work equivalently to the compare function used
+	/// to make and maintain the heap.
+	///
+	template <typename RandomAccessIterator, typename Distance, typename Compare>
+	inline void change_heap(RandomAccessIterator first, Distance heapSize, Distance position, Compare compare)
+	{
+		typedef typename eastl::iterator_traits<RandomAccessIterator>::difference_type difference_type;
+		typedef typename eastl::iterator_traits<RandomAccessIterator>::value_type      value_type;
+
+		eastl::remove_heap<RandomAccessIterator, Distance, Compare>(first, heapSize, position, compare);
+
+		value_type tempBottom(*(first + heapSize - 1));
+
+		eastl::promote_heap<RandomAccessIterator, difference_type, value_type, Compare>
+						   (first, (difference_type)0, (difference_type)(heapSize - 1), tempBottom, compare);
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_heap_until
+	///////////////////////////////////////////////////////////////////////
+
+	/// is_heap_until
+	///
+	template <typename RandomAccessIterator>
+	inline RandomAccessIterator is_heap_until(RandomAccessIterator first, RandomAccessIterator last)
+	{
+		int counter = 0;
+
+		for(RandomAccessIterator child = first + 1; child < last; ++child, counter ^= 1)
+		{
+			if(*first < *child)  // We must use operator <, and are not allowed to use > or >= here.
+				return child;
+			first += counter; // counter switches between 0 and 1 every time through.
+		}
+
+		return last;
+	}
+
+
+	/// is_heap_until
+	///
+	/// The Compare function must work equivalently to the compare function used
+	/// to make and maintain the heap.
+	///
+	template <typename RandomAccessIterator, typename Compare>
+	inline RandomAccessIterator is_heap_until(RandomAccessIterator first, RandomAccessIterator last, Compare compare)
+	{
+		int counter = 0;
+
+		for(RandomAccessIterator child = first + 1; child < last; ++child, counter ^= 1)
+		{
+			if(compare(*first, *child))
+				return child;
+			first += counter; // counter switches between 0 and 1 every time through.
+		}
+
+		return last;
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_heap
+	///////////////////////////////////////////////////////////////////////
+
+	/// is_heap
+	///
+	/// This is a useful debugging algorithm for verifying that a random  
+	/// access container is in heap format. 
+	///
+	template <typename RandomAccessIterator>
+	inline bool is_heap(RandomAccessIterator first, RandomAccessIterator last)
+	{
+		return (eastl::is_heap_until(first, last) == last);
+	}
+
+
+	/// is_heap
+	///
+	/// The Compare function must work equivalently to the compare function used
+	/// to make and maintain the heap.
+	///
+	template <typename RandomAccessIterator, typename Compare>
+	inline bool is_heap(RandomAccessIterator first, RandomAccessIterator last, Compare compare)
+	{
+		return (eastl::is_heap_until(first, last, compare) == last);
+	}
+
+
+	// To consider: The following may be a faster implementation for most cases.
+	//
+	// template <typename RandomAccessIterator>
+	// inline bool is_heap(RandomAccessIterator first, RandomAccessIterator last)
+	// {
+	//     if(((uintptr_t)(last - first) & 1) == 0) // If the range has an even number of elements...
+	//         --last;
+	// 
+	//     RandomAccessIterator parent = first, child = (first + 1);
+	// 
+	//     for(; child < last; child += 2, ++parent)
+	//     {
+	//         if((*parent < *child) || (*parent < *(child + 1)))
+	//             return false;
+	//     }
+	// 
+	//     if((((uintptr_t)(last - first) & 1) == 0) && (*parent < *child))
+	//         return false;
+	// 
+	//     return true;
+	// }
+
+
+} // namespace eastl
+
+
+#endif // Header include guard
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/initializer_list.h b/libkram/eastl/include/EASTL/initializer_list.h
new file mode 100644
index 00000000..028fb4f8
--- /dev/null
+++ b/libkram/eastl/include/EASTL/initializer_list.h
@@ -0,0 +1,96 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+//
+// This file #includes <initializer_list> if it's available, else it defines 
+// its own version of std::initializer_list. It does not define eastl::initializer_list
+// because that would not provide any use, due to how the C++11 Standard works.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_INITIALIZER_LIST_H
+#define EASTL_INITIALIZER_LIST_H
+
+
+#include <EASTL/internal/config.h>
+#include <EABase/eahave.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+#if defined(EA_HAVE_CPP11_INITIALIZER_LIST) // If the compiler can generate calls to std::initializer_list...
+
+	// The initializer_list type must be declared in the std namespace, as that's the 
+	// namespace the compiler uses when generating code to use it.
+	EA_DISABLE_ALL_VC_WARNINGS()
+	#include <initializer_list>
+	EA_RESTORE_ALL_VC_WARNINGS()
+
+#else
+
+	// If you get an error here about initializer_list being already defined, then the EA_HAVE_CPP11_INITIALIZER_LIST define from <EABase/eahave.h> needs to be updated.
+	namespace std
+	{
+		// See the C++11 Standard, section 18.9.
+		template<class E> 
+		class initializer_list
+		{
+		public:
+			typedef E         value_type;
+			typedef const E&  reference;
+			typedef const E&  const_reference;
+			typedef size_t    size_type;
+			typedef const E*  iterator;             // Must be const, as initializer_list (and its mpArray) is an immutable temp object.
+			typedef const E*  const_iterator;
+
+		private:
+			iterator  mpArray;
+			size_type mArraySize;
+
+			// This constructor is private, but the C++ compiler has the ability to call it, as per the C++11 Standard.
+			initializer_list(const_iterator pArray, size_type arraySize)
+			  : mpArray(pArray), mArraySize(arraySize) { }
+
+		public:
+			initializer_list() EA_NOEXCEPT  // EA_NOEXCEPT requires a recent version of EABase.  
+			  : mpArray(NULL), mArraySize(0) { }
+
+			size_type      size()  const EA_NOEXCEPT { return mArraySize; }
+			const_iterator begin() const EA_NOEXCEPT { return mpArray; }            // Must be const_iterator, as initializer_list (and its mpArray) is an immutable temp object.
+			const_iterator end()   const EA_NOEXCEPT { return mpArray + mArraySize; }
+		};
+
+
+		template<class T>
+		const T* begin(std::initializer_list<T> ilist) EA_NOEXCEPT
+		{
+			return ilist.begin();
+		}
+
+		template<class T>
+		const T* end(std::initializer_list<T> ilist) EA_NOEXCEPT
+		{
+			return ilist.end();
+		}
+	}
+
+#endif
+
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch.h
new file mode 100644
index 00000000..4924a591
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch.h
@@ -0,0 +1,65 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_ARCH_H
+#define EASTL_ATOMIC_INTERNAL_ARCH_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// Include the architecture specific implementations
+//
+#if defined(EA_PROCESSOR_X86) || defined(EA_PROCESSOR_X86_64)
+
+	#include "x86/arch_x86.h"
+
+#elif defined(EA_PROCESSOR_ARM32) || defined(EA_PROCESSOR_ARM64)
+
+	#include "arm/arch_arm.h"
+
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#include "arch_fetch_add.h"
+#include "arch_fetch_sub.h"
+
+#include "arch_fetch_and.h"
+#include "arch_fetch_xor.h"
+#include "arch_fetch_or.h"
+
+#include "arch_add_fetch.h"
+#include "arch_sub_fetch.h"
+
+#include "arch_and_fetch.h"
+#include "arch_xor_fetch.h"
+#include "arch_or_fetch.h"
+
+#include "arch_exchange.h"
+
+#include "arch_cmpxchg_weak.h"
+#include "arch_cmpxchg_strong.h"
+
+#include "arch_load.h"
+#include "arch_store.h"
+
+#include "arch_compiler_barrier.h"
+
+#include "arch_cpu_pause.h"
+
+#include "arch_memory_barrier.h"
+
+#include "arch_signal_fence.h"
+
+#include "arch_thread_fence.h"
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_ARCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_add_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_add_fetch.h
new file mode 100644
index 00000000..65771f89
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_add_fetch.h
@@ -0,0 +1,173 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_ARCH_ADD_FETCH_H
+#define EASTL_ATOMIC_INTERNAL_ARCH_ADD_FETCH_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ARCH_ATOMIC_ADD_FETCH_*_N(type, type ret, type * ptr, type val)
+//
+#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_RELAXED_8)
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_ACQUIRE_8)
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_ACQUIRE_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_ACQUIRE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_RELEASE_8)
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_RELEASE_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_RELEASE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_ACQ_REL_8)
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_ACQ_REL_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_ACQ_REL_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_SEQ_CST_8)
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_SEQ_CST_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_SEQ_CST_8_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_RELAXED_16)
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_ACQUIRE_16)
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_ACQUIRE_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_ACQUIRE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_RELEASE_16)
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_RELEASE_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_RELEASE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_ACQ_REL_16)
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_ACQ_REL_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_ACQ_REL_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_SEQ_CST_16)
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_SEQ_CST_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_SEQ_CST_16_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_RELAXED_32)
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_ACQUIRE_32)
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_ACQUIRE_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_ACQUIRE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_RELEASE_32)
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_RELEASE_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_RELEASE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_ACQ_REL_32)
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_ACQ_REL_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_ACQ_REL_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_SEQ_CST_32)
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_SEQ_CST_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_SEQ_CST_32_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_RELAXED_64)
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_ACQUIRE_64)
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_ACQUIRE_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_ACQUIRE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_RELEASE_64)
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_RELEASE_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_RELEASE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_ACQ_REL_64)
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_ACQ_REL_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_ACQ_REL_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_SEQ_CST_64)
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_SEQ_CST_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_SEQ_CST_64_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_RELAXED_128)
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_ACQUIRE_128)
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_ACQUIRE_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_ACQUIRE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_RELEASE_128)
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_RELEASE_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_RELEASE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_ACQ_REL_128)
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_ACQ_REL_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_ACQ_REL_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_SEQ_CST_128)
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_SEQ_CST_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_ADD_FETCH_SEQ_CST_128_AVAILABLE 0
+#endif
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_ARCH_ADD_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_and_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_and_fetch.h
new file mode 100644
index 00000000..df7ba35d
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_and_fetch.h
@@ -0,0 +1,173 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_ARCH_AND_FETCH_H
+#define EASTL_ATOMIC_INTERNAL_ARCH_AND_FETCH_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ARCH_ATOMIC_AND_FETCH_*_N(type, type ret, type * ptr, type val)
+//
+#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_RELAXED_8)
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_ACQUIRE_8)
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_ACQUIRE_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_ACQUIRE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_RELEASE_8)
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_RELEASE_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_RELEASE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_ACQ_REL_8)
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_ACQ_REL_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_ACQ_REL_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_SEQ_CST_8)
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_SEQ_CST_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_SEQ_CST_8_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_RELAXED_16)
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_ACQUIRE_16)
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_ACQUIRE_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_ACQUIRE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_RELEASE_16)
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_RELEASE_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_RELEASE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_ACQ_REL_16)
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_ACQ_REL_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_ACQ_REL_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_SEQ_CST_16)
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_SEQ_CST_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_SEQ_CST_16_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_RELAXED_32)
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_ACQUIRE_32)
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_ACQUIRE_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_ACQUIRE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_RELEASE_32)
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_RELEASE_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_RELEASE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_ACQ_REL_32)
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_ACQ_REL_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_ACQ_REL_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_SEQ_CST_32)
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_SEQ_CST_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_SEQ_CST_32_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_RELAXED_64)
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_ACQUIRE_64)
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_ACQUIRE_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_ACQUIRE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_RELEASE_64)
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_RELEASE_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_RELEASE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_ACQ_REL_64)
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_ACQ_REL_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_ACQ_REL_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_SEQ_CST_64)
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_SEQ_CST_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_SEQ_CST_64_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_RELAXED_128)
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_ACQUIRE_128)
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_ACQUIRE_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_ACQUIRE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_RELEASE_128)
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_RELEASE_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_RELEASE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_ACQ_REL_128)
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_ACQ_REL_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_ACQ_REL_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_SEQ_CST_128)
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_SEQ_CST_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_AND_FETCH_SEQ_CST_128_AVAILABLE 0
+#endif
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_ARCH_AND_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_cmpxchg_strong.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_cmpxchg_strong.h
new file mode 100644
index 00000000..1005dc33
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_cmpxchg_strong.h
@@ -0,0 +1,430 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_ARCH_CMPXCHG_STRONG_H
+#define EASTL_ATOMIC_INTERNAL_ARCH_CMPXCHG_STRONG_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_*_*_N(type, bool ret, type * ptr, type * expected, type desired)
+//
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_8)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_8)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_8)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_8)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_8)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_8)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_8)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_8)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_8)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_8_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_16)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_16)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_16)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_16)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_16)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_16)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_16)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_16)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_16)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_16_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_32)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_32)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_32)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_32)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_32)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_32)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_32)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_32)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_32)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_32_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_64)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_64)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_64)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_64)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_64)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_64)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_64)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_64)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_64)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_64_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_128)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_128)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_128)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_128)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_128)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_128)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_128)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_128)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_128)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_128_AVAILABLE 0
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_*_N(type, bool ret, type * ptr, type * expected, type desired)
+//
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_8_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_8_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_8(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_8(type, ret, ptr, expected, desired)
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_8_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_8_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_8(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_8(type, ret, ptr, expected, desired)
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_8_AVAILABLE	\
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_8_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_8(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_8(type, ret, ptr, expected, desired)
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_8_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_8_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_8(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_8(type, ret, ptr, expected, desired)
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_8_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_8_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_8(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_8(type, ret, ptr, expected, desired)
+
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_16_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_16_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_16(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_16(type, ret, ptr, expected, desired)
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_16_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_16_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_16(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_16(type, ret, ptr, expected, desired)
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_16_AVAILABLE	\
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_16_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_16(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_16(type, ret, ptr, expected, desired)
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_16_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_16_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_16(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_16(type, ret, ptr, expected, desired)
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_16_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_16_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_16(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_16(type, ret, ptr, expected, desired)
+
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_32_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_32_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_32(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_32(type, ret, ptr, expected, desired)
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_32_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_32_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_32(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_32(type, ret, ptr, expected, desired)
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_32_AVAILABLE	\
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_32_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_32(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_32(type, ret, ptr, expected, desired)
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_32_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_32_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_32(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_32(type, ret, ptr, expected, desired)
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_32_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_32_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_32(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_32(type, ret, ptr, expected, desired)
+
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_64_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_64_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_64(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_64(type, ret, ptr, expected, desired)
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_64_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_64_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_64(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_64(type, ret, ptr, expected, desired)
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_64_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_64_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_64(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_64(type, ret, ptr, expected, desired)
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_64_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_64_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_64(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_64(type, ret, ptr, expected, desired)
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_64_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_64_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_64(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_64(type, ret, ptr, expected, desired)
+
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_128_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_128_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_128(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_128(type, ret, ptr, expected, desired)
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_128_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_128_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_128(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_128(type, ret, ptr, expected, desired)
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_128_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_128_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_128(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_128(type, ret, ptr, expected, desired)
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_128_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_128_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_128(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_128(type, ret, ptr, expected, desired)
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_128_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_128_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_128(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_128(type, ret, ptr, expected, desired)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_ARCH_CMPXCHG_STRONG_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_cmpxchg_weak.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_cmpxchg_weak.h
new file mode 100644
index 00000000..5ce26386
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_cmpxchg_weak.h
@@ -0,0 +1,430 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_ARCH_CMPXCHG_WEAK_H
+#define EASTL_ATOMIC_INTERNAL_ARCH_CMPXCHG_WEAK_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_*_*_N(type, bool ret, type * ptr, type * expected, type desired)
+//
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_8)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_8)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_8)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_8)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_8)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_8)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_8)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_8)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_8)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_8_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_16)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_16)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_16)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_16)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_16)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_16)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_16)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_16)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_16)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_16_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_32)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_32)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_32)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_32)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_32)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_32)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_32)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_32)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_32)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_32_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_64)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_64)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_64)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_64)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_64)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_64)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_64)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_64)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_64)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_64_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_128)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_128)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_128)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_128)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_128)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_128)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_128)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_128)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_128)
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_128_AVAILABLE 0
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_*_N(type, bool ret, type * ptr, type * expected, type desired)
+//
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_8_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_8_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_8(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_8(type, ret, ptr, expected, desired)
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_8_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_8_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_8(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_8(type, ret, ptr, expected, desired)
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_8_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_8_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_8(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_8(type, ret, ptr, expected, desired)
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_8_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_8_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_8(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_8(type, ret, ptr, expected, desired)
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_8_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_8_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_8(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_8(type, ret, ptr, expected, desired)
+
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_16_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_16_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_16(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_16(type, ret, ptr, expected, desired)
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_16_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_16_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_16(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_16(type, ret, ptr, expected, desired)
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_16_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_16_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_16(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_16(type, ret, ptr, expected, desired)
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_16_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_16_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_16(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_16(type, ret, ptr, expected, desired)
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_16_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_16_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_16(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_16(type, ret, ptr, expected, desired)
+
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_32_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_32_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_32(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_32(type, ret, ptr, expected, desired)
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_32_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_32_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_32(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_32(type, ret, ptr, expected, desired)
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_32_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_32_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_32(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_32(type, ret, ptr, expected, desired)
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_32_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_32_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_32(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_32(type, ret, ptr, expected, desired)
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_32_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_32_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_32(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_32(type, ret, ptr, expected, desired)
+
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_64_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_64_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_64(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_64(type, ret, ptr, expected, desired)
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_64_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_64_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_64(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_64(type, ret, ptr, expected, desired)
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_64_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_64_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_64(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_64(type, ret, ptr, expected, desired)
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_64_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_64_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_64(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_64(type, ret, ptr, expected, desired)
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_64_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_64_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_64(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_64(type, ret, ptr, expected, desired)
+
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_128_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_128_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_128(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_128(type, ret, ptr, expected, desired)
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_128_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_128_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_128(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_128(type, ret, ptr, expected, desired)
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_128_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_128_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_128(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_128(type, ret, ptr, expected, desired)
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_128_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_128_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_128(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_128(type, ret, ptr, expected, desired)
+
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_128_AVAILABLE		\
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_128_AVAILABLE
+#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_128(type, ret, ptr, expected, desired) \
+	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_128(type, ret, ptr, expected, desired)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_ARCH_CMPXCHG_WEAK_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_compiler_barrier.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_compiler_barrier.h
new file mode 100644
index 00000000..0652469b
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_compiler_barrier.h
@@ -0,0 +1,19 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_ARCH_COMPILER_BARRIER_H
+#define EASTL_ATOMIC_INTERNAL_ARCH_COMPILER_BARRIER_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+#define EASTL_ARCH_ATOMIC_COMPILER_BARRIER_AVAILABLE 0
+
+#define EASTL_ARCH_ATOMIC_COMPILER_BARRIER_DATA_DEPENDENCY_AVAILABLE 0
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_ARCH_COMPILER_BARRIER_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_cpu_pause.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_cpu_pause.h
new file mode 100644
index 00000000..e8c2d1d7
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_cpu_pause.h
@@ -0,0 +1,25 @@
+/////////////////////////////////////////////////////////////////////////////////
+// copyright (c) electronic arts inc. all rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_ARCH_CPU_PAUSE_H
+#define EASTL_ATOMIC_INTERNAL_ARCH_CPU_PAUSE_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ARCH_ATOMIC_CPU_PAUSE()
+//
+#if defined(EASTL_ARCH_ATOMIC_CPU_PAUSE)
+	#define EASTL_ARCH_ATOMIC_CPU_PAUSE_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CPU_PAUSE_AVAILABLE 0
+#endif
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_ARCH_CPU_PAUSE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_exchange.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_exchange.h
new file mode 100644
index 00000000..76003188
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_exchange.h
@@ -0,0 +1,173 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_ARCH_EXCHANGE_H
+#define EASTL_ATOMIC_INTERNAL_ARCH_EXCHANGE_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ARCH_ATOMIC_EXCHANGE_*_N(type, type ret, type * ptr, type val)
+//
+#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_RELAXED_8)
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_ACQUIRE_8)
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_ACQUIRE_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_ACQUIRE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_RELEASE_8)
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_RELEASE_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_RELEASE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_ACQ_REL_8)
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_ACQ_REL_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_ACQ_REL_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_SEQ_CST_8)
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_SEQ_CST_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_SEQ_CST_8_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_RELAXED_16)
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_ACQUIRE_16)
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_ACQUIRE_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_ACQUIRE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_RELEASE_16)
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_RELEASE_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_RELEASE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_ACQ_REL_16)
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_ACQ_REL_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_ACQ_REL_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_SEQ_CST_16)
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_SEQ_CST_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_SEQ_CST_16_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_RELAXED_32)
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_ACQUIRE_32)
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_ACQUIRE_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_ACQUIRE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_RELEASE_32)
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_RELEASE_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_RELEASE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_ACQ_REL_32)
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_ACQ_REL_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_ACQ_REL_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_SEQ_CST_32)
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_SEQ_CST_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_SEQ_CST_32_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_RELAXED_64)
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_ACQUIRE_64)
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_ACQUIRE_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_ACQUIRE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_RELEASE_64)
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_RELEASE_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_RELEASE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_ACQ_REL_64)
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_ACQ_REL_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_ACQ_REL_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_SEQ_CST_64)
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_SEQ_CST_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_SEQ_CST_64_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_RELAXED_128)
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_ACQUIRE_128)
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_ACQUIRE_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_ACQUIRE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_RELEASE_128)
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_RELEASE_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_RELEASE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_ACQ_REL_128)
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_ACQ_REL_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_ACQ_REL_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_SEQ_CST_128)
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_SEQ_CST_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_EXCHANGE_SEQ_CST_128_AVAILABLE 0
+#endif
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_ARCH_EXCHANGE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_add.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_add.h
new file mode 100644
index 00000000..71907f70
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_add.h
@@ -0,0 +1,173 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_ARCH_FETCH_ADD_H
+#define EASTL_ATOMIC_INTERNAL_ARCH_FETCH_ADD_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ARCH_ATOMIC_FETCH_ADD_*_N(type, type ret, type * ptr, type val)
+//
+#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_RELAXED_8)
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_ACQUIRE_8)
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_ACQUIRE_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_ACQUIRE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_RELEASE_8)
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_RELEASE_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_RELEASE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_ACQ_REL_8)
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_ACQ_REL_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_ACQ_REL_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_SEQ_CST_8)
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_SEQ_CST_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_SEQ_CST_8_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_RELAXED_16)
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_ACQUIRE_16)
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_ACQUIRE_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_ACQUIRE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_RELEASE_16)
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_RELEASE_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_RELEASE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_ACQ_REL_16)
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_ACQ_REL_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_ACQ_REL_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_SEQ_CST_16)
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_SEQ_CST_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_SEQ_CST_16_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_RELAXED_32)
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_ACQUIRE_32)
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_ACQUIRE_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_ACQUIRE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_RELEASE_32)
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_RELEASE_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_RELEASE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_ACQ_REL_32)
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_ACQ_REL_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_ACQ_REL_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_SEQ_CST_32)
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_SEQ_CST_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_SEQ_CST_32_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_RELAXED_64)
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_ACQUIRE_64)
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_ACQUIRE_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_ACQUIRE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_RELEASE_64)
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_RELEASE_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_RELEASE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_ACQ_REL_64)
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_ACQ_REL_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_ACQ_REL_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_SEQ_CST_64)
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_SEQ_CST_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_SEQ_CST_64_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_RELAXED_128)
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_ACQUIRE_128)
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_ACQUIRE_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_ACQUIRE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_RELEASE_128)
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_RELEASE_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_RELEASE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_ACQ_REL_128)
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_ACQ_REL_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_ACQ_REL_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_SEQ_CST_128)
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_SEQ_CST_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_ADD_SEQ_CST_128_AVAILABLE 0
+#endif
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_ARCH_FETCH_ADD_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_and.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_and.h
new file mode 100644
index 00000000..f2b39a4c
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_and.h
@@ -0,0 +1,173 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_ARCH_FETCH_AND_H
+#define EASTL_ATOMIC_INTERNAL_ARCH_FETCH_AND_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ARCH_ATOMIC_FETCH_AND_*_N(type, type ret, type * ptr, type val)
+//
+#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_RELAXED_8)
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_ACQUIRE_8)
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_ACQUIRE_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_ACQUIRE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_RELEASE_8)
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_RELEASE_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_RELEASE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_ACQ_REL_8)
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_ACQ_REL_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_ACQ_REL_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_SEQ_CST_8)
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_SEQ_CST_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_SEQ_CST_8_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_RELAXED_16)
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_ACQUIRE_16)
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_ACQUIRE_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_ACQUIRE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_RELEASE_16)
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_RELEASE_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_RELEASE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_ACQ_REL_16)
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_ACQ_REL_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_ACQ_REL_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_SEQ_CST_16)
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_SEQ_CST_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_SEQ_CST_16_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_RELAXED_32)
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_ACQUIRE_32)
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_ACQUIRE_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_ACQUIRE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_RELEASE_32)
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_RELEASE_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_RELEASE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_ACQ_REL_32)
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_ACQ_REL_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_ACQ_REL_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_SEQ_CST_32)
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_SEQ_CST_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_SEQ_CST_32_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_RELAXED_64)
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_ACQUIRE_64)
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_ACQUIRE_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_ACQUIRE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_RELEASE_64)
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_RELEASE_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_RELEASE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_ACQ_REL_64)
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_ACQ_REL_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_ACQ_REL_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_SEQ_CST_64)
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_SEQ_CST_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_SEQ_CST_64_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_RELAXED_128)
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_ACQUIRE_128)
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_ACQUIRE_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_ACQUIRE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_RELEASE_128)
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_RELEASE_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_RELEASE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_ACQ_REL_128)
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_ACQ_REL_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_ACQ_REL_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_SEQ_CST_128)
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_SEQ_CST_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_AND_SEQ_CST_128_AVAILABLE 0
+#endif
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_ARCH_FETCH_AND_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_or.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_or.h
new file mode 100644
index 00000000..dd6dd0db
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_or.h
@@ -0,0 +1,173 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_ARCH_FETCH_OR_H
+#define EASTL_ATOMIC_INTERNAL_ARCH_FETCH_OR_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ARCH_ATOMIC_FETCH_OR_*_N(type, type ret, type * ptr, type val)
+//
+#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_RELAXED_8)
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_ACQUIRE_8)
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_ACQUIRE_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_ACQUIRE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_RELEASE_8)
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_RELEASE_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_RELEASE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_ACQ_REL_8)
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_ACQ_REL_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_ACQ_REL_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_SEQ_CST_8)
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_SEQ_CST_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_SEQ_CST_8_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_RELAXED_16)
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_ACQUIRE_16)
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_ACQUIRE_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_ACQUIRE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_RELEASE_16)
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_RELEASE_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_RELEASE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_ACQ_REL_16)
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_ACQ_REL_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_ACQ_REL_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_SEQ_CST_16)
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_SEQ_CST_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_SEQ_CST_16_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_RELAXED_32)
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_ACQUIRE_32)
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_ACQUIRE_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_ACQUIRE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_RELEASE_32)
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_RELEASE_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_RELEASE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_ACQ_REL_32)
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_ACQ_REL_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_ACQ_REL_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_SEQ_CST_32)
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_SEQ_CST_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_SEQ_CST_32_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_RELAXED_64)
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_ACQUIRE_64)
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_ACQUIRE_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_ACQUIRE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_RELEASE_64)
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_RELEASE_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_RELEASE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_ACQ_REL_64)
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_ACQ_REL_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_ACQ_REL_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_SEQ_CST_64)
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_SEQ_CST_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_SEQ_CST_64_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_RELAXED_128)
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_ACQUIRE_128)
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_ACQUIRE_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_ACQUIRE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_RELEASE_128)
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_RELEASE_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_RELEASE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_ACQ_REL_128)
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_ACQ_REL_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_ACQ_REL_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_SEQ_CST_128)
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_SEQ_CST_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_OR_SEQ_CST_128_AVAILABLE 0
+#endif
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_ARCH_FETCH_OR_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_sub.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_sub.h
new file mode 100644
index 00000000..ea63db73
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_sub.h
@@ -0,0 +1,173 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_ARCH_FETCH_SUB_H
+#define EASTL_ATOMIC_INTERNAL_ARCH_FETCH_SUB_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ARCH_ATOMIC_FETCH_SUB_*_N(type, type ret, type * ptr, type val)
+//
+#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_RELAXED_8)
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_ACQUIRE_8)
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_ACQUIRE_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_ACQUIRE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_RELEASE_8)
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_RELEASE_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_RELEASE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_ACQ_REL_8)
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_ACQ_REL_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_ACQ_REL_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_SEQ_CST_8)
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_SEQ_CST_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_SEQ_CST_8_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_RELAXED_16)
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_ACQUIRE_16)
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_ACQUIRE_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_ACQUIRE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_RELEASE_16)
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_RELEASE_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_RELEASE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_ACQ_REL_16)
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_ACQ_REL_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_ACQ_REL_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_SEQ_CST_16)
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_SEQ_CST_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_SEQ_CST_16_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_RELAXED_32)
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_ACQUIRE_32)
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_ACQUIRE_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_ACQUIRE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_RELEASE_32)
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_RELEASE_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_RELEASE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_ACQ_REL_32)
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_ACQ_REL_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_ACQ_REL_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_SEQ_CST_32)
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_SEQ_CST_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_SEQ_CST_32_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_RELAXED_64)
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_ACQUIRE_64)
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_ACQUIRE_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_ACQUIRE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_RELEASE_64)
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_RELEASE_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_RELEASE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_ACQ_REL_64)
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_ACQ_REL_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_ACQ_REL_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_SEQ_CST_64)
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_SEQ_CST_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_SEQ_CST_64_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_RELAXED_128)
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_ACQUIRE_128)
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_ACQUIRE_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_ACQUIRE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_RELEASE_128)
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_RELEASE_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_RELEASE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_ACQ_REL_128)
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_ACQ_REL_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_ACQ_REL_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_SEQ_CST_128)
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_SEQ_CST_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_SUB_SEQ_CST_128_AVAILABLE 0
+#endif
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_ARCH_FETCH_SUB_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_xor.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_xor.h
new file mode 100644
index 00000000..b41ad2d4
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_xor.h
@@ -0,0 +1,173 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_ARCH_FETCH_XOR_H
+#define EASTL_ATOMIC_INTERNAL_ARCH_FETCH_XOR_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ARCH_ATOMIC_FETCH_XOR_*_N(type, type ret, type * ptr, type val)
+//
+#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_RELAXED_8)
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_ACQUIRE_8)
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_ACQUIRE_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_ACQUIRE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_RELEASE_8)
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_RELEASE_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_RELEASE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_ACQ_REL_8)
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_ACQ_REL_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_ACQ_REL_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_SEQ_CST_8)
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_SEQ_CST_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_SEQ_CST_8_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_RELAXED_16)
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_ACQUIRE_16)
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_ACQUIRE_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_ACQUIRE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_RELEASE_16)
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_RELEASE_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_RELEASE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_ACQ_REL_16)
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_ACQ_REL_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_ACQ_REL_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_SEQ_CST_16)
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_SEQ_CST_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_SEQ_CST_16_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_RELAXED_32)
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_ACQUIRE_32)
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_ACQUIRE_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_ACQUIRE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_RELEASE_32)
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_RELEASE_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_RELEASE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_ACQ_REL_32)
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_ACQ_REL_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_ACQ_REL_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_SEQ_CST_32)
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_SEQ_CST_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_SEQ_CST_32_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_RELAXED_64)
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_ACQUIRE_64)
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_ACQUIRE_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_ACQUIRE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_RELEASE_64)
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_RELEASE_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_RELEASE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_ACQ_REL_64)
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_ACQ_REL_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_ACQ_REL_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_SEQ_CST_64)
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_SEQ_CST_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_SEQ_CST_64_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_RELAXED_128)
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_ACQUIRE_128)
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_ACQUIRE_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_ACQUIRE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_RELEASE_128)
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_RELEASE_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_RELEASE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_ACQ_REL_128)
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_ACQ_REL_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_ACQ_REL_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_SEQ_CST_128)
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_SEQ_CST_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_FETCH_XOR_SEQ_CST_128_AVAILABLE 0
+#endif
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_ARCH_FETCH_XOR_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_load.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_load.h
new file mode 100644
index 00000000..eea7cf49
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_load.h
@@ -0,0 +1,125 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_ARCH_LOAD_H
+#define EASTL_ATOMIC_INTERNAL_ARCH_LOAD_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ARCH_ATOMIC_LOAD_*_N(type, type ret, type * ptr)
+//
+#if defined(EASTL_ARCH_ATOMIC_LOAD_RELAXED_8)
+	#define EASTL_ARCH_ATOMIC_LOAD_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_LOAD_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_LOAD_ACQUIRE_8)
+	#define EASTL_ARCH_ATOMIC_LOAD_ACQUIRE_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_LOAD_ACQUIRE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_LOAD_SEQ_CST_8)
+	#define EASTL_ARCH_ATOMIC_LOAD_SEQ_CST_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_LOAD_SEQ_CST_8_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_LOAD_RELAXED_16)
+	#define EASTL_ARCH_ATOMIC_LOAD_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_LOAD_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_LOAD_ACQUIRE_16)
+	#define EASTL_ARCH_ATOMIC_LOAD_ACQUIRE_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_LOAD_ACQUIRE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_LOAD_SEQ_CST_16)
+	#define EASTL_ARCH_ATOMIC_LOAD_SEQ_CST_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_LOAD_SEQ_CST_16_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_LOAD_RELAXED_32)
+	#define EASTL_ARCH_ATOMIC_LOAD_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_LOAD_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_LOAD_ACQUIRE_32)
+	#define EASTL_ARCH_ATOMIC_LOAD_ACQUIRE_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_LOAD_ACQUIRE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_LOAD_SEQ_CST_32)
+	#define EASTL_ARCH_ATOMIC_LOAD_SEQ_CST_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_LOAD_SEQ_CST_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_LOAD_READ_DEPENDS_32)
+	#define EASTL_ARCH_ATOMIC_LOAD_READ_DEPENDS_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_LOAD_READ_DEPENDS_32_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_LOAD_RELAXED_64)
+	#define EASTL_ARCH_ATOMIC_LOAD_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_LOAD_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_LOAD_ACQUIRE_64)
+	#define EASTL_ARCH_ATOMIC_LOAD_ACQUIRE_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_LOAD_ACQUIRE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_LOAD_SEQ_CST_64)
+	#define EASTL_ARCH_ATOMIC_LOAD_SEQ_CST_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_LOAD_SEQ_CST_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_LOAD_READ_DEPENDS_64)
+	#define EASTL_ARCH_ATOMIC_LOAD_READ_DEPENDS_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_LOAD_READ_DEPENDS_64_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_LOAD_RELAXED_128)
+	#define EASTL_ARCH_ATOMIC_LOAD_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_LOAD_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_LOAD_ACQUIRE_128)
+	#define EASTL_ARCH_ATOMIC_LOAD_ACQUIRE_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_LOAD_ACQUIRE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_LOAD_SEQ_CST_128)
+	#define EASTL_ARCH_ATOMIC_LOAD_SEQ_CST_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_LOAD_SEQ_CST_128_AVAILABLE 0
+#endif
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_ARCH_LOAD_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_memory_barrier.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_memory_barrier.h
new file mode 100644
index 00000000..c6cc6bfc
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_memory_barrier.h
@@ -0,0 +1,47 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_ARCH_MEMORY_BARRIER_H
+#define EASTL_ATOMIC_INTERNAL_ARCH_MEMORY_BARRIER_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ARCH_ATOMIC_CPU_MB()
+//
+#if defined(EASTL_ARCH_ATOMIC_CPU_MB)
+	#define EASTL_ARCH_ATOMIC_CPU_MB_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CPU_MB_AVAILABLE 0
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ARCH_ATOMIC_CPU_WMB()
+//
+#if defined(EASTL_ARCH_ATOMIC_CPU_WMB)
+	#define EASTL_ARCH_ATOMIC_CPU_WMB_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CPU_WMB_AVAILABLE 0
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ARCH_ATOMIC_CPU_RMB()
+//
+#if defined(EASTL_ARCH_ATOMIC_CPU_RMB)
+	#define EASTL_ARCH_ATOMIC_CPU_RMB_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_CPU_RMB_AVAILABLE 0
+#endif
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_ARCH_MEMORY_BARRIER_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_or_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_or_fetch.h
new file mode 100644
index 00000000..110326b4
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_or_fetch.h
@@ -0,0 +1,173 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_ARCH_OR_FETCH_H
+#define EASTL_ATOMIC_INTERNAL_ARCH_OR_FETCH_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ARCH_ATOMIC_OR_FETCH_*_N(type, type ret, type * ptr, type val)
+//
+#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_RELAXED_8)
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_ACQUIRE_8)
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_ACQUIRE_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_ACQUIRE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_RELEASE_8)
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_RELEASE_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_RELEASE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_ACQ_REL_8)
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_ACQ_REL_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_ACQ_REL_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_SEQ_CST_8)
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_SEQ_CST_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_SEQ_CST_8_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_RELAXED_16)
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_ACQUIRE_16)
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_ACQUIRE_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_ACQUIRE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_RELEASE_16)
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_RELEASE_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_RELEASE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_ACQ_REL_16)
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_ACQ_REL_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_ACQ_REL_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_SEQ_CST_16)
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_SEQ_CST_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_SEQ_CST_16_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_RELAXED_32)
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_ACQUIRE_32)
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_ACQUIRE_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_ACQUIRE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_RELEASE_32)
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_RELEASE_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_RELEASE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_ACQ_REL_32)
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_ACQ_REL_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_ACQ_REL_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_SEQ_CST_32)
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_SEQ_CST_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_SEQ_CST_32_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_RELAXED_64)
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_ACQUIRE_64)
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_ACQUIRE_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_ACQUIRE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_RELEASE_64)
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_RELEASE_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_RELEASE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_ACQ_REL_64)
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_ACQ_REL_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_ACQ_REL_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_SEQ_CST_64)
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_SEQ_CST_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_SEQ_CST_64_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_RELAXED_128)
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_ACQUIRE_128)
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_ACQUIRE_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_ACQUIRE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_RELEASE_128)
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_RELEASE_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_RELEASE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_ACQ_REL_128)
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_ACQ_REL_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_ACQ_REL_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_SEQ_CST_128)
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_SEQ_CST_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_OR_FETCH_SEQ_CST_128_AVAILABLE 0
+#endif
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_ARCH_OR_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_signal_fence.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_signal_fence.h
new file mode 100644
index 00000000..65b64fc2
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_signal_fence.h
@@ -0,0 +1,21 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_ARCH_SIGNAL_FENCE_H
+#define EASTL_ATOMIC_INTERNAL_ARCH_SIGNAL_FENCE_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+#define EASTL_ARCH_ATOMIC_SIGNAL_FENCE_RELAXED_AVAILABLE 0
+#define EASTL_ARCH_ATOMIC_SIGNAL_FENCE_ACQUIRE_AVAILABLE 0
+#define EASTL_ARCH_ATOMIC_SIGNAL_FENCE_RELEASE_AVAILABLE 0
+#define EASTL_ARCH_ATOMIC_SIGNAL_FENCE_ACQ_REL_AVAILABLE 0
+#define EASTL_ARCH_ATOMIC_SIGNAL_FENCE_SEQ_CST_AVAILABLE 0
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_ARCH_SIGNAL_FENCE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_store.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_store.h
new file mode 100644
index 00000000..9a4112cb
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_store.h
@@ -0,0 +1,113 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_ARCH_STORE_H
+#define EASTL_ATOMIC_INTERNAL_ARCH_STORE_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ARCH_ATOMIC_STORE_*_N(type, type * ptr, type val)
+//
+#if defined(EASTL_ARCH_ATOMIC_STORE_RELAXED_8)
+	#define EASTL_ARCH_ATOMIC_STORE_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_STORE_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_STORE_RELEASE_8)
+	#define EASTL_ARCH_ATOMIC_STORE_RELEASE_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_STORE_RELEASE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_STORE_SEQ_CST_8)
+	#define EASTL_ARCH_ATOMIC_STORE_SEQ_CST_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_STORE_SEQ_CST_8_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_STORE_RELAXED_16)
+	#define EASTL_ARCH_ATOMIC_STORE_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_STORE_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_STORE_RELEASE_16)
+	#define EASTL_ARCH_ATOMIC_STORE_RELEASE_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_STORE_RELEASE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_STORE_SEQ_CST_16)
+	#define EASTL_ARCH_ATOMIC_STORE_SEQ_CST_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_STORE_SEQ_CST_16_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_STORE_RELAXED_32)
+	#define EASTL_ARCH_ATOMIC_STORE_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_STORE_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_STORE_RELEASE_32)
+	#define EASTL_ARCH_ATOMIC_STORE_RELEASE_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_STORE_RELEASE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_STORE_SEQ_CST_32)
+	#define EASTL_ARCH_ATOMIC_STORE_SEQ_CST_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_STORE_SEQ_CST_32_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_STORE_RELAXED_64)
+	#define EASTL_ARCH_ATOMIC_STORE_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_STORE_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_STORE_RELEASE_64)
+	#define EASTL_ARCH_ATOMIC_STORE_RELEASE_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_STORE_RELEASE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_STORE_SEQ_CST_64)
+	#define EASTL_ARCH_ATOMIC_STORE_SEQ_CST_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_STORE_SEQ_CST_64_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_STORE_RELAXED_128)
+	#define EASTL_ARCH_ATOMIC_STORE_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_STORE_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_STORE_RELEASE_128)
+	#define EASTL_ARCH_ATOMIC_STORE_RELEASE_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_STORE_RELEASE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_STORE_SEQ_CST_128)
+	#define EASTL_ARCH_ATOMIC_STORE_SEQ_CST_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_STORE_SEQ_CST_128_AVAILABLE 0
+#endif
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_ARCH_STORE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_sub_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_sub_fetch.h
new file mode 100644
index 00000000..20241b14
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_sub_fetch.h
@@ -0,0 +1,173 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_ARCH_SUB_FETCH_H
+#define EASTL_ATOMIC_INTERNAL_ARCH_SUB_FETCH_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ARCH_ATOMIC_SUB_FETCH_*_N(type, type ret, type * ptr, type val)
+//
+#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_RELAXED_8)
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_ACQUIRE_8)
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_ACQUIRE_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_ACQUIRE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_RELEASE_8)
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_RELEASE_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_RELEASE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_ACQ_REL_8)
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_ACQ_REL_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_ACQ_REL_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_SEQ_CST_8)
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_SEQ_CST_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_SEQ_CST_8_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_RELAXED_16)
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_ACQUIRE_16)
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_ACQUIRE_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_ACQUIRE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_RELEASE_16)
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_RELEASE_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_RELEASE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_ACQ_REL_16)
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_ACQ_REL_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_ACQ_REL_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_SEQ_CST_16)
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_SEQ_CST_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_SEQ_CST_16_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_RELAXED_32)
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_ACQUIRE_32)
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_ACQUIRE_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_ACQUIRE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_RELEASE_32)
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_RELEASE_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_RELEASE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_ACQ_REL_32)
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_ACQ_REL_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_ACQ_REL_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_SEQ_CST_32)
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_SEQ_CST_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_SEQ_CST_32_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_RELAXED_64)
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_ACQUIRE_64)
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_ACQUIRE_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_ACQUIRE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_RELEASE_64)
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_RELEASE_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_RELEASE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_ACQ_REL_64)
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_ACQ_REL_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_ACQ_REL_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_SEQ_CST_64)
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_SEQ_CST_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_SEQ_CST_64_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_RELAXED_128)
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_ACQUIRE_128)
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_ACQUIRE_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_ACQUIRE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_RELEASE_128)
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_RELEASE_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_RELEASE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_ACQ_REL_128)
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_ACQ_REL_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_ACQ_REL_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_SEQ_CST_128)
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_SEQ_CST_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_SUB_FETCH_SEQ_CST_128_AVAILABLE 0
+#endif
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_ARCH_SUB_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_thread_fence.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_thread_fence.h
new file mode 100644
index 00000000..676fbf19
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_thread_fence.h
@@ -0,0 +1,49 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_ARCH_THREAD_FENCE_H
+#define EASTL_ATOMIC_INTERNAL_ARCH_THREAD_FENCE_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ARCH_ATOMIC_THREAD_FENCE_*()
+//
+#if defined(EASTL_ARCH_ATOMIC_THREAD_FENCE_RELAXED)
+	#define EASTL_ARCH_ATOMIC_THREAD_FENCE_RELAXED_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_THREAD_FENCE_RELAXED_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_THREAD_FENCE_ACQUIRE)
+	#define EASTL_ARCH_ATOMIC_THREAD_FENCE_ACQUIRE_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_THREAD_FENCE_ACQUIRE_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_THREAD_FENCE_RELEASE)
+	#define EASTL_ARCH_ATOMIC_THREAD_FENCE_RELEASE_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_THREAD_FENCE_RELEASE_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_THREAD_FENCE_ACQ_REL)
+	#define EASTL_ARCH_ATOMIC_THREAD_FENCE_ACQ_REL_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_THREAD_FENCE_ACQ_REL_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_THREAD_FENCE_SEQ_CST)
+	#define EASTL_ARCH_ATOMIC_THREAD_FENCE_SEQ_CST_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_THREAD_FENCE_SEQ_CST_AVAILABLE 0
+#endif
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_ARCH_THREAD_FENCE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_xor_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_xor_fetch.h
new file mode 100644
index 00000000..63548c22
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_xor_fetch.h
@@ -0,0 +1,173 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_ARCH_XOR_FETCH_H
+#define EASTL_ATOMIC_INTERNAL_ARCH_XOR_FETCH_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ARCH_ATOMIC_XOR_FETCH_*_N(type, type ret, type * ptr, type val)
+//
+#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_RELAXED_8)
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_ACQUIRE_8)
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_ACQUIRE_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_ACQUIRE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_RELEASE_8)
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_RELEASE_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_RELEASE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_ACQ_REL_8)
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_ACQ_REL_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_ACQ_REL_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_SEQ_CST_8)
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_SEQ_CST_8_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_SEQ_CST_8_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_RELAXED_16)
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_ACQUIRE_16)
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_ACQUIRE_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_ACQUIRE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_RELEASE_16)
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_RELEASE_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_RELEASE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_ACQ_REL_16)
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_ACQ_REL_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_ACQ_REL_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_SEQ_CST_16)
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_SEQ_CST_16_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_SEQ_CST_16_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_RELAXED_32)
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_ACQUIRE_32)
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_ACQUIRE_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_ACQUIRE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_RELEASE_32)
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_RELEASE_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_RELEASE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_ACQ_REL_32)
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_ACQ_REL_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_ACQ_REL_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_SEQ_CST_32)
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_SEQ_CST_32_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_SEQ_CST_32_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_RELAXED_64)
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_ACQUIRE_64)
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_ACQUIRE_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_ACQUIRE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_RELEASE_64)
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_RELEASE_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_RELEASE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_ACQ_REL_64)
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_ACQ_REL_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_ACQ_REL_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_SEQ_CST_64)
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_SEQ_CST_64_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_SEQ_CST_64_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_RELAXED_128)
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_ACQUIRE_128)
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_ACQUIRE_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_ACQUIRE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_RELEASE_128)
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_RELEASE_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_RELEASE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_ACQ_REL_128)
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_ACQ_REL_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_ACQ_REL_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_SEQ_CST_128)
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_SEQ_CST_128_AVAILABLE 1
+#else
+	#define EASTL_ARCH_ATOMIC_XOR_FETCH_SEQ_CST_128_AVAILABLE 0
+#endif
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_ARCH_XOR_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic.h b/libkram/eastl/include/EASTL/internal/atomic/atomic.h
new file mode 100644
index 00000000..e1c5286e
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic.h
@@ -0,0 +1,252 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_H
+#define EASTL_ATOMIC_INTERNAL_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/internal/move_help.h>
+#include <EASTL/internal/memory_base.h>
+#include <EASTL/type_traits.h>
+
+#include "atomic_macros.h"
+#include "atomic_casts.h"
+
+#include "atomic_memory_order.h"
+#include "atomic_asserts.h"
+
+#include "atomic_size_aligned.h"
+#include "atomic_base_width.h"
+
+#include "atomic_integral.h"
+
+#include "atomic_pointer.h"
+
+
+/////////////////////////////////////////////////////////////////////////////////
+
+
+/**
+ * NOTE:
+ *
+ * All of the actual implementation is done via the ATOMIC_MACROS in the compiler or arch sub folders.
+ * The C++ code is merely boilerplate around these macros that actually implement the atomic operations.
+ * The C++ boilerplate is also hidden behind macros.
+ * This may seem more complicated but this is all meant to reduce copy-pasting and to ensure all operations
+ * all end up going down to one macro that does the actual implementation.
+ * The reduced code duplication makes it easier to verify the implementation and reason about it.
+ * Ensures we do not have to re-implement the same code for compilers that do not support generic builtins such as MSVC.
+ * Ensures for compilers that have separate intrinsics for different widths, that C++ boilerplate isn't copy-pasted leading to programmer errors.
+ * Ensures if we ever have to implement a new platform, only the low-level leaf macros have to be implemented, everything else will be generated for you.
+ */
+
+
+#include "atomic_push_compiler_options.h"
+
+
+namespace eastl
+{
+
+
+namespace internal
+{
+
+
+	template <typename T>
+	struct is_atomic_lockfree_size
+	{
+		static EASTL_CPP17_INLINE_VARIABLE constexpr bool value = false ||
+		#if defined(EASTL_ATOMIC_HAS_8BIT)
+			sizeof(T) == 1 ||
+		#endif
+		#if defined(EASTL_ATOMIC_HAS_16BIT)
+			sizeof(T) == 2 ||
+		#endif
+		#if defined(EASTL_ATOMIC_HAS_32BIT)
+			sizeof(T) == 4 ||
+		#endif
+		#if defined(EASTL_ATOMIC_HAS_64BIT)
+			sizeof(T) == 8 ||
+		#endif
+		#if defined(EASTL_ATOMIC_HAS_128BIT)
+			sizeof(T) == 16 ||
+		#endif
+		false;
+	};
+
+
+	template <typename T>
+	struct is_user_type_suitable_for_primary_template
+	{
+		static EASTL_CPP17_INLINE_VARIABLE constexpr bool value = eastl::internal::is_atomic_lockfree_size<T>::value;
+	};
+
+
+	template <typename T>
+	using select_atomic_inherit_0 = typename eastl::conditional<eastl::is_same_v<bool, T> || eastl::internal::is_user_type_suitable_for_primary_template<T>::value,
+																eastl::internal::atomic_base_width<T>, /* True */
+																eastl::internal::atomic_invalid_type<T> /* False */
+																>::type;
+
+	template <typename T>
+	using select_atomic_inherit  = select_atomic_inherit_0<T>;
+
+
+} // namespace internal
+
+
+#define EASTL_ATOMIC_CLASS_IMPL(type, base, valueType, differenceType)	\
+	private:															\
+																		\
+		EASTL_ATOMIC_STATIC_ASSERT_TYPE(type);							\
+																		\
+		using Base = base;												\
+																		\
+	public:																\
+																		\
+		typedef valueType value_type;									\
+		typedef differenceType difference_type;							\
+																		\
+	public:																\
+																		\
+		static EASTL_CPP17_INLINE_VARIABLE constexpr bool is_always_lock_free = eastl::internal::is_atomic_lockfree_size<type>::value; \
+																		\
+	public: /* deleted ctors && assignment operators */					\
+																		\
+		atomic(const atomic&) EA_NOEXCEPT = delete;						\
+																		\
+		atomic& operator=(const atomic&)          EA_NOEXCEPT = delete; \
+		atomic& operator=(const atomic&) volatile EA_NOEXCEPT = delete; \
+																		\
+	public: /* ctors */													\
+																		\
+		EA_CONSTEXPR atomic(type desired) EA_NOEXCEPT					\
+			: Base{ desired }											\
+		{																\
+		}																\
+																		\
+		EA_CONSTEXPR atomic() EA_NOEXCEPT_IF(eastl::is_nothrow_default_constructible_v<type>) = default; \
+																		\
+	public:																\
+																		\
+		bool is_lock_free() const EA_NOEXCEPT							\
+		{																\
+			return eastl::internal::is_atomic_lockfree_size<type>::value; \
+		}																\
+																		\
+		bool is_lock_free() const volatile EA_NOEXCEPT					\
+		{																\
+			EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(type);			\
+			return false;												\
+		}
+
+
+#define EASTL_ATOMIC_USING_ATOMIC_BASE(type)		\
+	public:											\
+													\
+		using Base::operator=;						\
+		using Base::store;							\
+		using Base::load;							\
+		using Base::exchange;						\
+		using Base::compare_exchange_weak;			\
+		using Base::compare_exchange_strong;		\
+													\
+	public:											\
+													\
+		operator type() const volatile EA_NOEXCEPT	\
+		{											\
+			EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(T); \
+		}											\
+													\
+		operator type() const EA_NOEXCEPT			\
+		{											\
+			return load(eastl::memory_order_seq_cst); \
+		}
+
+
+#define EASTL_ATOMIC_USING_ATOMIC_INTEGRAL()	\
+	public:										\
+												\
+		using Base::fetch_add;					\
+		using Base::add_fetch;					\
+												\
+		using Base::fetch_sub;					\
+		using Base::sub_fetch;					\
+												\
+		using Base::fetch_and;					\
+		using Base::and_fetch;					\
+												\
+		using Base::fetch_or;					\
+		using Base::or_fetch;					\
+												\
+		using Base::fetch_xor;					\
+		using Base::xor_fetch;					\
+												\
+		using Base::operator++;					\
+		using Base::operator--;					\
+		using Base::operator+=;					\
+		using Base::operator-=;					\
+		using Base::operator&=;					\
+		using Base::operator|=;					\
+		using Base::operator^=;
+
+
+#define EASTL_ATOMIC_USING_ATOMIC_POINTER()		\
+	public:										\
+												\
+		using Base::fetch_add;					\
+		using Base::add_fetch;					\
+		using Base::fetch_sub;					\
+		using Base::sub_fetch;					\
+												\
+		using Base::operator++;					\
+		using Base::operator--;					\
+		using Base::operator+=;					\
+		using Base::operator-=;
+
+
+template <typename T, typename = void>
+struct atomic : protected eastl::internal::select_atomic_inherit<T>
+{
+	EASTL_ATOMIC_CLASS_IMPL(T, eastl::internal::select_atomic_inherit<T>, T, T)
+
+	EASTL_ATOMIC_USING_ATOMIC_BASE(T)
+};
+
+
+template <typename T>
+struct atomic<T, eastl::enable_if_t<eastl::is_integral_v<T> && !eastl::is_same_v<bool, T>>> : protected eastl::internal::atomic_integral_width<T>
+{
+	EASTL_ATOMIC_CLASS_IMPL(T, eastl::internal::atomic_integral_width<T>, T, T)
+
+	EASTL_ATOMIC_USING_ATOMIC_BASE(T)
+
+	EASTL_ATOMIC_USING_ATOMIC_INTEGRAL()
+};
+
+
+template <typename T>
+struct atomic<T*> : protected eastl::internal::atomic_pointer_width<T*>
+{
+	EASTL_ATOMIC_CLASS_IMPL(T*, eastl::internal::atomic_pointer_width<T*>, T*, ptrdiff_t)
+
+	EASTL_ATOMIC_USING_ATOMIC_BASE(T*)
+
+	EASTL_ATOMIC_USING_ATOMIC_POINTER()
+};
+
+
+} // namespace eastl
+
+
+#include "atomic_pop_compiler_options.h"
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_asserts.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_asserts.h
new file mode 100644
index 00000000..9324a479
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_asserts.h
@@ -0,0 +1,75 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_STATIC_ASSERTS_H
+#define EASTL_ATOMIC_INTERNAL_STATIC_ASSERTS_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+#define EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(type)				\
+	static_assert(!eastl::is_same<type, type>::value, "eastl::atomic<T> : volatile eastl::atomic<T> is not what you expect! Read the docs in EASTL/atomic.h! Use the memory orders to access the atomic object!");
+
+#define EASTL_ATOMIC_STATIC_ASSERT_INVALID_MEMORY_ORDER(type)			\
+	static_assert(!eastl::is_same<type, type>::value, "eastl::atomic<T> : invalid memory order for the given operation!");
+
+#define EASTL_ATOMIC_STATIC_ASSERT_TYPE(type)							\
+	/* User Provided T must not be cv qualified */						\
+	static_assert(!eastl::is_const<type>::value, "eastl::atomic<T> : Template Typename T cannot be const!"); \
+	static_assert(!eastl::is_volatile<type>::value, "eastl::atomic<T> : Template Typename T cannot be volatile! Use the memory orders to access the underlying type for the guarantees you need."); \
+	/* T must satisfy StandardLayoutType */								\
+	static_assert(eastl::is_standard_layout<type>::value, "eastl::atomic<T> : Must have standard layout!"); \
+	/* T must be TriviallyCopyable but it does not have to be TriviallyConstructible */ \
+	static_assert(eastl::is_trivially_copyable<type>::value, "eastl::atomci<T> : Template Typename T must be trivially copyable!"); \
+	static_assert(eastl::is_copy_constructible<type>::value, "eastl::atomic<T> : Template Typename T must be copy constructible!"); \
+	static_assert(eastl::is_move_constructible<type>::value, "eastl::atomic<T> : Template Typename T must be move constructible!"); \
+	static_assert(eastl::is_copy_assignable<type>::value, "eastl::atomic<T> : Template Typename T must be copy assignable!"); \
+	static_assert(eastl::is_move_assignable<type>::value, "eastl::atomic<T> : Template Typename T must be move assignable!"); \
+	static_assert(eastl::is_trivially_destructible<type>::value, "eastl::atomic<T> : Must be trivially destructible!"); \
+	static_assert(eastl::internal::is_atomic_lockfree_size<type>::value, "eastl::atomic<T> : Template Typename T must be a lockfree size!");
+
+#define EASTL_ATOMIC_STATIC_ASSERT_TYPE_IS_OBJECT(type) \
+	static_assert(eastl::is_object<type>::value, "eastl::atomic<T> : Template Typename T must be an object type!");
+
+#define EASTL_ATOMIC_ASSERT_ALIGNED(alignment)							\
+	EASTL_ASSERT((alignment & (alignment - 1)) == 0);					\
+	EASTL_ASSERT((reinterpret_cast<uintptr_t>(this) & (alignment - 1)) == 0)
+
+
+namespace eastl
+{
+
+
+namespace internal
+{
+
+
+	template <typename T>
+	struct atomic_invalid_type
+	{
+		/**
+		 * class Test { int i; int j; int k; }; sizeof(Test) == 96 bits
+		 *
+		 * std::atomic allows non-primitive types to be used for the template type.
+		 * This causes the api to degrade to locking for types that cannot fit into the lockfree size
+		 * of the target platform such as std::atomic<Test> leading to performance traps.
+		 *
+		 * If this static_assert() fired, this means your template type T is larger than any atomic instruction
+		 * supported on the given platform.
+		 */
+
+		static_assert(!eastl::is_same<T, T>::value, "eastl::atomic<T> : invalid template type T!");
+	};
+
+
+} // namespace internal
+
+
+} // namespace eastl
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_STATIC_ASSERTS_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_base_width.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_base_width.h
new file mode 100644
index 00000000..ca476182
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_base_width.h
@@ -0,0 +1,346 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_BASE_WIDTH_H
+#define EASTL_ATOMIC_INTERNAL_BASE_WIDTH_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+#include "atomic_push_compiler_options.h"
+
+
+namespace eastl
+{
+
+
+namespace internal
+{
+
+
+	template <typename T, unsigned width = sizeof(T)>
+	struct atomic_base_width;
+
+	/**
+	 * NOTE:
+	 *
+	 * T does not have to be trivially default constructible but it still
+	 * has to be a trivially copyable type for the primary atomic template.
+	 * Thus we must type pun into whatever storage type of the given fixed width
+	 * the platform designates. This ensures T does not have to be trivially constructible.
+	 */
+
+#define EASTL_ATOMIC_BASE_FIXED_WIDTH_TYPE(bits)				\
+	EA_PREPROCESSOR_JOIN(EASTL_ATOMIC_FIXED_WIDTH_TYPE_, bits)
+
+
+#define EASTL_ATOMIC_STORE_FUNC_IMPL(op, bits)							\
+	EASTL_ATOMIC_BASE_FIXED_WIDTH_TYPE(bits) fixedWidthDesired = EASTL_ATOMIC_TYPE_PUN_CAST(EASTL_ATOMIC_BASE_FIXED_WIDTH_TYPE(bits), desired); \
+	EA_PREPROCESSOR_JOIN(op, bits)(EASTL_ATOMIC_BASE_FIXED_WIDTH_TYPE(bits), \
+								   EASTL_ATOMIC_TYPE_CAST(EASTL_ATOMIC_BASE_FIXED_WIDTH_TYPE(bits), this->GetAtomicAddress()), \
+								   fixedWidthDesired)
+
+
+#define EASTL_ATOMIC_LOAD_FUNC_IMPL(op, bits)							\
+	EASTL_ATOMIC_BASE_FIXED_WIDTH_TYPE(bits) retVal;					\
+	EA_PREPROCESSOR_JOIN(op, bits)(EASTL_ATOMIC_BASE_FIXED_WIDTH_TYPE(bits), \
+								   retVal,								\
+								   EASTL_ATOMIC_TYPE_CAST(EASTL_ATOMIC_BASE_FIXED_WIDTH_TYPE(bits), this->GetAtomicAddress())); \
+	return EASTL_ATOMIC_TYPE_PUN_CAST(T, retVal);
+
+
+#define EASTL_ATOMIC_EXCHANGE_FUNC_IMPL(op, bits)						\
+	EASTL_ATOMIC_BASE_FIXED_WIDTH_TYPE(bits) retVal;					\
+	EASTL_ATOMIC_BASE_FIXED_WIDTH_TYPE(bits) fixedWidthDesired = EASTL_ATOMIC_TYPE_PUN_CAST(EASTL_ATOMIC_BASE_FIXED_WIDTH_TYPE(bits), desired); \
+	EA_PREPROCESSOR_JOIN(op, bits)(EASTL_ATOMIC_BASE_FIXED_WIDTH_TYPE(bits), \
+								   retVal,								\
+								   EASTL_ATOMIC_TYPE_CAST(EASTL_ATOMIC_BASE_FIXED_WIDTH_TYPE(bits), this->GetAtomicAddress()), \
+								   fixedWidthDesired);					\
+	return EASTL_ATOMIC_TYPE_PUN_CAST(T, retVal);
+
+
+#define EASTL_ATOMIC_CMPXCHG_FUNC_IMPL(op, bits)						\
+	bool retVal;														\
+	EASTL_ATOMIC_BASE_FIXED_WIDTH_TYPE(bits) fixedWidthDesired = EASTL_ATOMIC_TYPE_PUN_CAST(EASTL_ATOMIC_BASE_FIXED_WIDTH_TYPE(bits), desired); \
+	EA_PREPROCESSOR_JOIN(op, bits)(EASTL_ATOMIC_BASE_FIXED_WIDTH_TYPE(bits), \
+								   retVal,								\
+								   EASTL_ATOMIC_TYPE_CAST(EASTL_ATOMIC_BASE_FIXED_WIDTH_TYPE(bits), this->GetAtomicAddress()), \
+								   EASTL_ATOMIC_TYPE_CAST(EASTL_ATOMIC_BASE_FIXED_WIDTH_TYPE(bits), &expected), \
+								   fixedWidthDesired);					\
+	return retVal;
+
+
+#define EASTL_ATOMIC_BASE_OP_JOIN(op, Order)						\
+	EA_PREPROCESSOR_JOIN(EA_PREPROCESSOR_JOIN(EASTL_ATOMIC_, op), Order)
+
+
+#define EASTL_ATOMIC_BASE_CMPXCHG_FUNCS_IMPL(funcName, cmpxchgOp, bits)	\
+	using Base::funcName;												\
+																		\
+	bool funcName(T& expected, T desired) EA_NOEXCEPT					\
+	{																	\
+		EASTL_ATOMIC_CMPXCHG_FUNC_IMPL(EASTL_ATOMIC_BASE_OP_JOIN(cmpxchgOp, _SEQ_CST_), bits); \
+	}																	\
+																		\
+	bool funcName(T& expected, T desired,								\
+				  eastl::internal::memory_order_relaxed_s) EA_NOEXCEPT	\
+	{																	\
+		EASTL_ATOMIC_CMPXCHG_FUNC_IMPL(EASTL_ATOMIC_BASE_OP_JOIN(cmpxchgOp, _RELAXED_), bits); \
+	}																	\
+																		\
+	bool funcName(T& expected, T desired,								\
+				  eastl::internal::memory_order_acquire_s) EA_NOEXCEPT	\
+	{																	\
+		EASTL_ATOMIC_CMPXCHG_FUNC_IMPL(EASTL_ATOMIC_BASE_OP_JOIN(cmpxchgOp, _ACQUIRE_), bits); \
+	}																	\
+																		\
+	bool funcName(T& expected, T desired,								\
+				  eastl::internal::memory_order_release_s) EA_NOEXCEPT	\
+	{																	\
+		EASTL_ATOMIC_CMPXCHG_FUNC_IMPL(EASTL_ATOMIC_BASE_OP_JOIN(cmpxchgOp, _RELEASE_), bits); \
+	}																	\
+																		\
+	bool funcName(T& expected, T desired,								\
+				  eastl::internal::memory_order_acq_rel_s) EA_NOEXCEPT	\
+	{																	\
+		EASTL_ATOMIC_CMPXCHG_FUNC_IMPL(EASTL_ATOMIC_BASE_OP_JOIN(cmpxchgOp, _ACQ_REL_), bits); \
+	}																	\
+																		\
+	bool funcName(T& expected, T desired,								\
+				  eastl::internal::memory_order_seq_cst_s) EA_NOEXCEPT	\
+	{																	\
+		EASTL_ATOMIC_CMPXCHG_FUNC_IMPL(EASTL_ATOMIC_BASE_OP_JOIN(cmpxchgOp, _SEQ_CST_), bits); \
+	}																	\
+																		\
+	bool funcName(T& expected, T desired,								\
+				  eastl::internal::memory_order_relaxed_s,				\
+				  eastl::internal::memory_order_relaxed_s) EA_NOEXCEPT	\
+	{																	\
+		EASTL_ATOMIC_CMPXCHG_FUNC_IMPL(EASTL_ATOMIC_BASE_OP_JOIN(cmpxchgOp, _RELAXED_RELAXED_), bits); \
+	}																	\
+																		\
+	bool funcName(T& expected, T desired,								\
+				  eastl::internal::memory_order_acquire_s,				\
+				  eastl::internal::memory_order_relaxed_s) EA_NOEXCEPT	\
+	{																	\
+		EASTL_ATOMIC_CMPXCHG_FUNC_IMPL(EASTL_ATOMIC_BASE_OP_JOIN(cmpxchgOp, _ACQUIRE_RELAXED_), bits); \
+	}																	\
+																		\
+	bool funcName(T& expected, T desired,								\
+				  eastl::internal::memory_order_acquire_s,				\
+				  eastl::internal::memory_order_acquire_s) EA_NOEXCEPT	\
+	{																	\
+		EASTL_ATOMIC_CMPXCHG_FUNC_IMPL(EASTL_ATOMIC_BASE_OP_JOIN(cmpxchgOp, _ACQUIRE_ACQUIRE_), bits); \
+	}																	\
+																		\
+	bool funcName(T& expected, T desired,								\
+				  eastl::internal::memory_order_release_s,				\
+				  eastl::internal::memory_order_relaxed_s) EA_NOEXCEPT	\
+	{																	\
+		EASTL_ATOMIC_CMPXCHG_FUNC_IMPL(EASTL_ATOMIC_BASE_OP_JOIN(cmpxchgOp, _RELEASE_RELAXED_), bits); \
+	}																	\
+																		\
+	bool funcName(T& expected, T desired,								\
+				  eastl::internal::memory_order_acq_rel_s,				\
+				  eastl::internal::memory_order_relaxed_s) EA_NOEXCEPT	\
+	{																	\
+		EASTL_ATOMIC_CMPXCHG_FUNC_IMPL(EASTL_ATOMIC_BASE_OP_JOIN(cmpxchgOp, _ACQ_REL_RELAXED_), bits); \
+	}																	\
+																		\
+	bool funcName(T& expected, T desired,								\
+				  eastl::internal::memory_order_acq_rel_s,				\
+				  eastl::internal::memory_order_acquire_s) EA_NOEXCEPT	\
+	{																	\
+		EASTL_ATOMIC_CMPXCHG_FUNC_IMPL(EASTL_ATOMIC_BASE_OP_JOIN(cmpxchgOp, _ACQ_REL_ACQUIRE_), bits); \
+	}																	\
+																		\
+	bool funcName(T& expected, T desired,								\
+				  eastl::internal::memory_order_seq_cst_s,				\
+				  eastl::internal::memory_order_relaxed_s) EA_NOEXCEPT	\
+	{																	\
+		EASTL_ATOMIC_CMPXCHG_FUNC_IMPL(EASTL_ATOMIC_BASE_OP_JOIN(cmpxchgOp, _SEQ_CST_RELAXED_), bits); \
+	}																	\
+																		\
+	bool funcName(T& expected, T desired,								\
+				  eastl::internal::memory_order_seq_cst_s,				\
+				  eastl::internal::memory_order_acquire_s) EA_NOEXCEPT	\
+	{																	\
+		EASTL_ATOMIC_CMPXCHG_FUNC_IMPL(EASTL_ATOMIC_BASE_OP_JOIN(cmpxchgOp, _SEQ_CST_ACQUIRE_), bits); \
+	}																	\
+																		\
+	bool funcName(T& expected, T desired,								\
+				  eastl::internal::memory_order_seq_cst_s,				\
+				  eastl::internal::memory_order_seq_cst_s) EA_NOEXCEPT	\
+	{																	\
+		EASTL_ATOMIC_CMPXCHG_FUNC_IMPL(EASTL_ATOMIC_BASE_OP_JOIN(cmpxchgOp, _SEQ_CST_SEQ_CST_), bits); \
+	}
+
+#define EASTL_ATOMIC_BASE_CMPXCHG_WEAK_FUNCS_IMPL(bits)					\
+	EASTL_ATOMIC_BASE_CMPXCHG_FUNCS_IMPL(compare_exchange_weak, CMPXCHG_WEAK, bits)
+
+#define EASTL_ATOMIC_BASE_CMPXCHG_STRONG_FUNCS_IMPL(bits)				\
+	EASTL_ATOMIC_BASE_CMPXCHG_FUNCS_IMPL(compare_exchange_strong, CMPXCHG_STRONG, bits)
+
+
+#define EASTL_ATOMIC_BASE_WIDTH_SPECIALIZE(bytes, bits)					\
+	template <typename T>												\
+	struct atomic_base_width<T, bytes> : public atomic_size_aligned<T>	\
+	{																	\
+	private:															\
+																		\
+		static_assert(EA_ALIGN_OF(atomic_size_aligned<T>) == bytes, "eastl::atomic<T> must be sizeof(T) aligned!"); \
+		static_assert(EA_ALIGN_OF(atomic_size_aligned<T>) == sizeof(T), "eastl::atomic<T> must be sizeof(T) aligned!"); \
+		using Base = atomic_size_aligned<T>;							\
+																		\
+	public: /* ctors */													\
+																		\
+		EA_CONSTEXPR atomic_base_width(T desired) EA_NOEXCEPT			\
+			: Base{ desired }											\
+		{																\
+		}																\
+																		\
+		EA_CONSTEXPR atomic_base_width() EA_NOEXCEPT_IF(eastl::is_nothrow_default_constructible_v<T>) = default; \
+																		\
+		atomic_base_width(const atomic_base_width&) EA_NOEXCEPT = delete; \
+																		\
+	public: /* store */													\
+																		\
+		using Base::store;												\
+																		\
+		void store(T desired) EA_NOEXCEPT								\
+		{																\
+			EASTL_ATOMIC_STORE_FUNC_IMPL(EASTL_ATOMIC_STORE_SEQ_CST_, bits); \
+		}																\
+																		\
+		void store(T desired, eastl::internal::memory_order_relaxed_s) EA_NOEXCEPT	\
+		{																\
+			EASTL_ATOMIC_STORE_FUNC_IMPL(EASTL_ATOMIC_STORE_RELAXED_, bits); \
+		}																\
+																		\
+		void store(T desired, eastl::internal::memory_order_release_s) EA_NOEXCEPT	\
+		{																\
+			EASTL_ATOMIC_STORE_FUNC_IMPL(EASTL_ATOMIC_STORE_RELEASE_, bits); \
+		}																\
+																		\
+		void store(T desired, eastl::internal::memory_order_seq_cst_s) EA_NOEXCEPT	\
+		{																\
+			EASTL_ATOMIC_STORE_FUNC_IMPL(EASTL_ATOMIC_STORE_SEQ_CST_, bits); \
+		}																\
+																		\
+	public: /* load */													\
+																		\
+		using Base::load;												\
+																		\
+		T load() const EA_NOEXCEPT										\
+		{																\
+			EASTL_ATOMIC_LOAD_FUNC_IMPL(EASTL_ATOMIC_LOAD_SEQ_CST_, bits); \
+		}																\
+																		\
+		T load(eastl::internal::memory_order_relaxed_s) const EA_NOEXCEPT \
+		{																\
+			EASTL_ATOMIC_LOAD_FUNC_IMPL(EASTL_ATOMIC_LOAD_RELAXED_, bits); \
+		}																\
+																		\
+		T load(eastl::internal::memory_order_acquire_s) const EA_NOEXCEPT \
+		{																\
+			EASTL_ATOMIC_LOAD_FUNC_IMPL(EASTL_ATOMIC_LOAD_ACQUIRE_, bits); \
+		}																\
+																		\
+		T load(eastl::internal::memory_order_seq_cst_s) const EA_NOEXCEPT \
+		{																\
+			EASTL_ATOMIC_LOAD_FUNC_IMPL(EASTL_ATOMIC_LOAD_SEQ_CST_, bits); \
+		}																\
+																		\
+	public: /* exchange */												\
+																		\
+		using Base::exchange;											\
+																		\
+		T exchange(T desired) EA_NOEXCEPT								\
+		{																\
+			EASTL_ATOMIC_EXCHANGE_FUNC_IMPL(EASTL_ATOMIC_EXCHANGE_SEQ_CST_, bits); \
+		}																\
+																		\
+		T exchange(T desired, eastl::internal::memory_order_relaxed_s) EA_NOEXCEPT \
+		{																\
+			EASTL_ATOMIC_EXCHANGE_FUNC_IMPL(EASTL_ATOMIC_EXCHANGE_RELAXED_, bits); \
+		}																\
+																		\
+		T exchange(T desired, eastl::internal::memory_order_acquire_s) EA_NOEXCEPT	\
+		{																\
+			EASTL_ATOMIC_EXCHANGE_FUNC_IMPL(EASTL_ATOMIC_EXCHANGE_ACQUIRE_, bits); \
+		}																\
+																		\
+		T exchange(T desired, eastl::internal::memory_order_release_s) EA_NOEXCEPT	\
+		{																\
+			EASTL_ATOMIC_EXCHANGE_FUNC_IMPL(EASTL_ATOMIC_EXCHANGE_RELEASE_, bits); \
+		}																\
+																		\
+		T exchange(T desired, eastl::internal::memory_order_acq_rel_s) EA_NOEXCEPT	\
+		{																\
+			EASTL_ATOMIC_EXCHANGE_FUNC_IMPL(EASTL_ATOMIC_EXCHANGE_ACQ_REL_, bits); \
+		}																\
+																		\
+		T exchange(T desired, eastl::internal::memory_order_seq_cst_s) EA_NOEXCEPT	\
+		{																\
+			EASTL_ATOMIC_EXCHANGE_FUNC_IMPL(EASTL_ATOMIC_EXCHANGE_SEQ_CST_, bits); \
+		}																\
+																		\
+	public: /* compare_exchange_weak */									\
+																		\
+		EASTL_ATOMIC_BASE_CMPXCHG_WEAK_FUNCS_IMPL(bits)					\
+																		\
+	public: /* compare_exchange_strong */								\
+																		\
+		EASTL_ATOMIC_BASE_CMPXCHG_STRONG_FUNCS_IMPL(bits)				\
+																		\
+	public: /* assignment operator */									\
+																		\
+		using Base::operator=;											\
+																		\
+		T operator=(T desired) EA_NOEXCEPT								\
+		{																\
+			store(desired, eastl::memory_order_seq_cst);				\
+			return desired;												\
+		}																\
+																		\
+		atomic_base_width& operator=(const atomic_base_width&)          EA_NOEXCEPT = delete; \
+		atomic_base_width& operator=(const atomic_base_width&) volatile EA_NOEXCEPT = delete; \
+																		\
+	};
+
+
+#if defined(EASTL_ATOMIC_HAS_8BIT)
+	EASTL_ATOMIC_BASE_WIDTH_SPECIALIZE(1, 8)
+#endif
+
+#if defined(EASTL_ATOMIC_HAS_16BIT)
+	EASTL_ATOMIC_BASE_WIDTH_SPECIALIZE(2, 16)
+#endif
+
+#if defined(EASTL_ATOMIC_HAS_32BIT)
+	EASTL_ATOMIC_BASE_WIDTH_SPECIALIZE(4, 32)
+#endif
+
+#if defined(EASTL_ATOMIC_HAS_64BIT)
+	EASTL_ATOMIC_BASE_WIDTH_SPECIALIZE(8, 64)
+#endif
+
+#if defined(EASTL_ATOMIC_HAS_128BIT)
+	EASTL_ATOMIC_BASE_WIDTH_SPECIALIZE(16, 128)
+#endif
+
+
+} // namespace internal
+
+
+} // namespace eastl
+
+
+#include "atomic_pop_compiler_options.h"
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_BASE_WIDTH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_casts.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_casts.h
new file mode 100644
index 00000000..54b9ed27
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_casts.h
@@ -0,0 +1,190 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_CASTS_H
+#define EASTL_ATOMIC_INTERNAL_CASTS_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+#include <EASTL/internal/type_transformations.h>
+
+
+#include <string.h>
+
+
+namespace eastl
+{
+
+
+namespace internal
+{
+
+
+template <typename T>
+EASTL_FORCE_INLINE volatile T* AtomicVolatileCast(T* ptr) EA_NOEXCEPT
+{
+	static_assert(!eastl::is_volatile<volatile T*>::value, "eastl::atomic<T> : pointer must not be volatile, the pointed to type must be volatile!");
+	static_assert(eastl::is_volatile<volatile T>::value, "eastl::atomic<T> : the pointed to type must be volatile!");
+
+	return reinterpret_cast<volatile T*>(ptr);
+}
+
+
+/**
+ * NOTE:
+ *
+ * Some compiler intrinsics do not operate on pointer types thus
+ * doing atomic operations on pointers must be casted to the suitable
+ * sized unsigned integral type.
+ *
+ * Some compiler intrinsics aren't generics and thus structs must also
+ * be casted to the appropriate sized unsigned integral type.
+ *
+ * Atomic operations on an int* might have to be casted to a uint64_t on
+ * a platform with 8-byte pointers as an example.
+ *
+ * Also doing an atomic operation on a struct, we must ensure that we observe
+ * the whole struct as one atomic unit with no shearing between the members.
+ * A load of a struct with two uint32_t members must be one uint64_t load,
+ * not two separate uint32_t loads, thus casted to the suitable sized
+ * unsigned integral type.
+ */
+template <typename Integral, typename T>
+EASTL_FORCE_INLINE volatile Integral* AtomicVolatileIntegralCast(T* ptr) EA_NOEXCEPT
+{
+	static_assert(!eastl::is_volatile<volatile Integral*>::value, "eastl::atomic<T> : pointer must not be volatile, the pointed to type must be volatile!");
+	static_assert(eastl::is_volatile<volatile Integral>::value, "eastl::atomic<T> : the pointed to type must be volatile!");
+	static_assert(eastl::is_integral<Integral>::value, "eastl::atomic<T> : Integral cast must cast to an Integral type!");
+	static_assert(sizeof(Integral) == sizeof(T), "eastl::atomic<T> : Integral and T must be same size for casting!");
+
+	return reinterpret_cast<volatile Integral*>(ptr);
+}
+
+template <typename Integral, typename T>
+EASTL_FORCE_INLINE Integral* AtomicIntegralCast(T* ptr) EA_NOEXCEPT
+{
+	static_assert(eastl::is_integral<Integral>::value, "eastl::atomic<T> : Integral cast must cast to an Integral type!");
+	static_assert(sizeof(Integral) == sizeof(T), "eastl::atomic<T> : Integral and T must be same size for casting!");
+
+	return reinterpret_cast<Integral*>(ptr);
+}
+
+
+/**
+ * NOTE:
+ *
+ * These casts are meant to be used with unions or structs of larger types that must be casted
+ * down to the smaller integral types. Like with 128-bit atomics and msvc intrinsics.
+ *
+ * struct Foo128 { __int64 array[2]; }; can be casted to a __int64*
+ * since a poiter to Foo128 is a pointer to the first member.
+ */
+template <typename ToType, typename FromType>
+EASTL_FORCE_INLINE volatile ToType* AtomicVolatileTypeCast(FromType* ptr) EA_NOEXCEPT
+{
+	static_assert(!eastl::is_volatile<volatile ToType*>::value, "eastl::atomic<T> : pointer must not be volatile, the pointed to type must be volatile!");
+	static_assert(eastl::is_volatile<volatile ToType>::value, "eastl::atomic<T> : the pointed to type must be volatile!");
+
+	return reinterpret_cast<volatile ToType*>(ptr);
+}
+
+template <typename ToType, typename FromType>
+EASTL_FORCE_INLINE ToType* AtomicTypeCast(FromType* ptr) EA_NOEXCEPT
+{
+	return reinterpret_cast<ToType*>(ptr);
+}
+
+
+/**
+ * NOTE:
+ *
+ * This is a compiler guaranteed safe type punning.
+ * This is useful when dealing with user defined structs.
+ * struct Test { uint32_t; unint32_t; };
+ *
+ * Example:
+ * uint64_t atomicLoad = *((volatile uint64_t*)&Test);
+ * Test load = AtomicTypePunCast<Test, uint64_t>(atomicLoad);
+ *
+ * uint64_t comparand = AtomicTypePunCast<uint64_t, Test>(Test);
+ * cmpxchg(&Test, comparand, desired);
+ *
+ * This can be implemented in many different ways depending on the compiler such
+ * as thru a union, memcpy, reinterpret_cast<Test&>(atomicLoad), etc.
+ */
+template <typename Pun, typename T, eastl::enable_if_t<!eastl::is_same_v<Pun, T>, int> = 0>
+EASTL_FORCE_INLINE Pun AtomicTypePunCast(const T& fromType) EA_NOEXCEPT
+{
+	static_assert(sizeof(Pun) == sizeof(T), "eastl::atomic<T> : Pun and T must be the same size for type punning!");
+
+	/**
+	 * aligned_storage ensures we can TypePun objects that aren't trivially default constructible
+	 * but still trivially copyable.
+	 */
+	typename eastl::aligned_storage<sizeof(Pun), alignof(Pun)>::type ret;
+	memcpy(eastl::addressof(ret), eastl::addressof(fromType), sizeof(Pun));
+	return reinterpret_cast<Pun&>(ret);
+}
+
+template <typename Pun, typename T, eastl::enable_if_t<eastl::is_same_v<Pun, T>, int> = 0>
+EASTL_FORCE_INLINE Pun AtomicTypePunCast(const T& fromType) EA_NOEXCEPT
+{
+	return fromType;
+}
+
+
+template <typename T>
+EASTL_FORCE_INLINE T AtomicNegateOperand(T val) EA_NOEXCEPT
+{
+	static_assert(eastl::is_integral<T>::value, "eastl::atomic<T> : Integral Negation must be an Integral type!");
+	static_assert(!eastl::is_volatile<T>::value, "eastl::atomic<T> : T must not be volatile!");
+
+	return static_cast<T>(0U - static_cast<eastl::make_unsigned_t<T>>(val));
+}
+
+EASTL_FORCE_INLINE ptrdiff_t AtomicNegateOperand(ptrdiff_t val) EA_NOEXCEPT
+{
+	return -val;
+}
+
+
+} // namespace internal
+
+
+} // namespace eastl
+
+
+/**
+ *  NOTE:
+ *
+ *  These macros are meant to prevent inclusion hell.
+ *  Also so that it fits with the style of the rest of the atomic macro implementation.
+ */
+#define EASTL_ATOMIC_VOLATILE_CAST(ptr)			\
+	eastl::internal::AtomicVolatileCast((ptr))
+
+#define EASTL_ATOMIC_VOLATILE_INTEGRAL_CAST(IntegralType, ptr)		\
+	eastl::internal::AtomicVolatileIntegralCast<IntegralType>((ptr))
+
+#define EASTL_ATOMIC_INTEGRAL_CAST(IntegralType, ptr)		\
+	eastl::internal::AtomicIntegralCast<IntegralType>((ptr))
+
+#define EASTL_ATOMIC_VOLATILE_TYPE_CAST(ToType, ptr)		\
+	eastl::internal::AtomicVolatileTypeCast<ToType>((ptr))
+
+#define EASTL_ATOMIC_TYPE_CAST(ToType, ptr)			\
+	eastl::internal::AtomicTypeCast<ToType>((ptr))
+
+#define EASTL_ATOMIC_TYPE_PUN_CAST(PunType, fromType)		\
+	eastl::internal::AtomicTypePunCast<PunType>((fromType))
+
+#define EASTL_ATOMIC_NEGATE_OPERAND(val)		\
+	eastl::internal::AtomicNegateOperand((val))
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_CASTS_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_flag.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_flag.h
new file mode 100644
index 00000000..e135d612
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_flag.h
@@ -0,0 +1,170 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNA_ATOMIC_FLAG_H
+#define EASTL_ATOMIC_INTERNA_ATOMIC_FLAG_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+#include "atomic_push_compiler_options.h"
+
+
+namespace eastl
+{
+
+
+class atomic_flag
+{
+public: /* ctors */
+
+	EA_CONSTEXPR atomic_flag(bool desired) EA_NOEXCEPT
+		: mFlag{ desired }
+	{
+	}
+
+	EA_CONSTEXPR atomic_flag() EA_NOEXCEPT
+		: mFlag{ false }
+	{
+	}
+
+public: /* deleted ctors && assignment operators */
+
+	atomic_flag(const atomic_flag&) EA_NOEXCEPT = delete;
+
+	atomic_flag& operator=(const atomic_flag&)          EA_NOEXCEPT = delete;
+	atomic_flag& operator=(const atomic_flag&) volatile EA_NOEXCEPT = delete;
+
+public: /* clear */
+
+	template <typename Order>
+	void clear(Order order) volatile EA_NOEXCEPT
+	{
+		EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(Order);
+	}
+
+	template <typename Order>
+	void clear(Order order) EA_NOEXCEPT
+	{
+		EASTL_ATOMIC_STATIC_ASSERT_INVALID_MEMORY_ORDER(Order);
+	}
+
+	void clear(eastl::internal::memory_order_relaxed_s) EA_NOEXCEPT
+	{
+		mFlag.store(false, eastl::memory_order_relaxed);
+	}
+
+	void clear(eastl::internal::memory_order_release_s) EA_NOEXCEPT
+	{
+		mFlag.store(false, eastl::memory_order_release);
+	}
+
+	void clear(eastl::internal::memory_order_seq_cst_s) EA_NOEXCEPT
+	{
+		mFlag.store(false, eastl::memory_order_seq_cst);
+	}
+
+	void clear() EA_NOEXCEPT
+	{
+		mFlag.store(false, eastl::memory_order_seq_cst);
+	}
+
+public: /* test_and_set */
+
+	template <typename Order>
+	bool test_and_set(Order order) volatile EA_NOEXCEPT
+	{
+		EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(Order);
+		return false;
+	}
+
+	template <typename Order>
+	bool test_and_set(Order order) EA_NOEXCEPT
+	{
+		EASTL_ATOMIC_STATIC_ASSERT_INVALID_MEMORY_ORDER(Order);
+		return false;
+	}
+
+	bool test_and_set(eastl::internal::memory_order_relaxed_s) EA_NOEXCEPT
+	{
+		return mFlag.exchange(true, eastl::memory_order_relaxed);
+	}
+
+	bool test_and_set(eastl::internal::memory_order_acquire_s) EA_NOEXCEPT
+	{
+		return mFlag.exchange(true, eastl::memory_order_acquire);
+	}
+
+	bool test_and_set(eastl::internal::memory_order_release_s) EA_NOEXCEPT
+	{
+		return mFlag.exchange(true, eastl::memory_order_release);
+	}
+
+	bool test_and_set(eastl::internal::memory_order_acq_rel_s) EA_NOEXCEPT
+	{
+		return mFlag.exchange(true, eastl::memory_order_acq_rel);
+	}
+
+	bool test_and_set(eastl::internal::memory_order_seq_cst_s) EA_NOEXCEPT
+	{
+		return mFlag.exchange(true, eastl::memory_order_seq_cst);
+	}
+
+	bool test_and_set() EA_NOEXCEPT
+	{
+		return mFlag.exchange(true, eastl::memory_order_seq_cst);
+	}
+
+public: /* test */
+
+	template <typename Order>
+	bool test(Order order) const volatile EA_NOEXCEPT
+	{
+		EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(Order);
+		return false;
+	}
+
+	template <typename Order>
+	bool test(Order order) const EA_NOEXCEPT
+	{
+		EASTL_ATOMIC_STATIC_ASSERT_INVALID_MEMORY_ORDER(Order);
+		return false;
+	}
+
+	bool test(eastl::internal::memory_order_relaxed_s) const EA_NOEXCEPT
+	{
+		return mFlag.load(eastl::memory_order_relaxed);
+	}
+
+	bool test(eastl::internal::memory_order_acquire_s) const EA_NOEXCEPT
+	{
+		return mFlag.load(eastl::memory_order_acquire);
+	}
+
+	bool test(eastl::internal::memory_order_seq_cst_s) const EA_NOEXCEPT
+	{
+		return mFlag.load(eastl::memory_order_seq_cst);
+	}
+
+	bool test() const EA_NOEXCEPT
+	{
+		return mFlag.load(eastl::memory_order_seq_cst);
+	}
+
+private:
+
+	eastl::atomic<bool> mFlag;
+};
+
+
+} // namespace eastl
+
+
+#include "atomic_pop_compiler_options.h"
+
+
+#endif /* EASTL_ATOMIC_INTERNA_ATOMIC_FLAG_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_flag_standalone.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_flag_standalone.h
new file mode 100644
index 00000000..b5284bed
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_flag_standalone.h
@@ -0,0 +1,69 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_FLAG_STANDALONE_H
+#define EASTL_ATOMIC_INTERNAL_FLAG_STANDALONE_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+namespace eastl
+{
+
+
+////////////////////////////////////////////////////////////////////////////////
+//
+// bool atomic_flag_test_and_set(eastl::atomic<T>*)
+//
+EASTL_FORCE_INLINE bool atomic_flag_test_and_set(eastl::atomic_flag* atomicObj) EA_NOEXCEPT
+{
+	return atomicObj->test_and_set();
+}
+
+template <typename Order>
+EASTL_FORCE_INLINE bool atomic_flag_test_and_set_explicit(eastl::atomic_flag* atomicObj, Order order)
+{
+	return atomicObj->test_and_set(order);
+}
+
+
+////////////////////////////////////////////////////////////////////////////////
+//
+// bool atomic_flag_clear(eastl::atomic<T>*)
+//
+EASTL_FORCE_INLINE void atomic_flag_clear(eastl::atomic_flag* atomicObj)
+{
+	atomicObj->clear();
+}
+
+template <typename Order>
+EASTL_FORCE_INLINE void atomic_flag_clear_explicit(eastl::atomic_flag* atomicObj, Order order)
+{
+	atomicObj->clear(order);
+}
+
+
+////////////////////////////////////////////////////////////////////////////////
+//
+// bool atomic_flag_test(eastl::atomic<T>*)
+//
+EASTL_FORCE_INLINE bool atomic_flag_test(eastl::atomic_flag* atomicObj)
+{
+	return atomicObj->test();
+}
+
+template <typename Order>
+EASTL_FORCE_INLINE bool atomic_flag_test_explicit(eastl::atomic_flag* atomicObj, Order order)
+{
+	return atomicObj->test(order);
+}
+
+
+} // namespace eastl
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_FLAG_STANDALONE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_integral.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_integral.h
new file mode 100644
index 00000000..7c94db32
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_integral.h
@@ -0,0 +1,343 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_INTEGRAL_H
+#define EASTL_ATOMIC_INTERNAL_INTEGRAL_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+#include "atomic_push_compiler_options.h"
+
+
+namespace eastl
+{
+
+
+namespace internal
+{
+
+
+#define EASTL_ATOMIC_INTEGRAL_STATIC_ASSERT_FUNCS_IMPL(funcName)	\
+	template <typename Order>										\
+	T funcName(T arg, Order order) EA_NOEXCEPT						\
+	{																\
+		EASTL_ATOMIC_STATIC_ASSERT_INVALID_MEMORY_ORDER(T);			\
+	}																\
+																	\
+	template <typename Order>										\
+	T funcName(T arg, Order order) volatile EA_NOEXCEPT				\
+	{																\
+		EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(T);				\
+	}																\
+																	\
+	T funcName(T arg) volatile EA_NOEXCEPT							\
+	{																\
+		EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(T);				\
+	}
+
+
+#define EASTL_ATOMIC_INTEGRAL_STATIC_ASSERT_INC_DEC_OPERATOR_IMPL(operatorOp) \
+	T operator operatorOp() volatile EA_NOEXCEPT						\
+	{																	\
+		EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(T);					\
+	}																	\
+																		\
+	T operator operatorOp(int) volatile EA_NOEXCEPT						\
+	{																	\
+		EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(T);					\
+	}
+
+
+#define EASTL_ATOMIC_INTEGRAL_STATIC_ASSERT_ASSIGNMENT_OPERATOR_IMPL(operatorOp) \
+	T operator operatorOp(T arg) volatile EA_NOEXCEPT					\
+	{																	\
+		EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(T);					\
+	}
+
+
+	template <typename T, unsigned width = sizeof(T)>
+	struct atomic_integral_base : public atomic_base_width<T, width>
+	{
+	private:
+
+		using Base = atomic_base_width<T, width>;
+
+	public: /* ctors */
+
+		EA_CONSTEXPR atomic_integral_base(T desired) EA_NOEXCEPT
+			: Base{ desired }
+		{
+		}
+
+		EA_CONSTEXPR atomic_integral_base() EA_NOEXCEPT = default;
+
+		atomic_integral_base(const atomic_integral_base&) EA_NOEXCEPT = delete;
+
+	public: /* assignment operator */
+
+		using Base::operator=;
+
+		atomic_integral_base& operator=(const atomic_integral_base&)          EA_NOEXCEPT = delete;
+		atomic_integral_base& operator=(const atomic_integral_base&) volatile EA_NOEXCEPT = delete;
+
+	public: /* fetch_add */
+
+		EASTL_ATOMIC_INTEGRAL_STATIC_ASSERT_FUNCS_IMPL(fetch_add)
+
+	public: /* add_fetch */
+
+		EASTL_ATOMIC_INTEGRAL_STATIC_ASSERT_FUNCS_IMPL(add_fetch)
+
+	public: /* fetch_sub */
+
+		EASTL_ATOMIC_INTEGRAL_STATIC_ASSERT_FUNCS_IMPL(fetch_sub)
+
+	public: /* sub_fetch */
+
+		EASTL_ATOMIC_INTEGRAL_STATIC_ASSERT_FUNCS_IMPL(sub_fetch)
+
+	public: /* fetch_and */
+
+		EASTL_ATOMIC_INTEGRAL_STATIC_ASSERT_FUNCS_IMPL(fetch_and)
+
+	public: /* and_fetch */
+
+		EASTL_ATOMIC_INTEGRAL_STATIC_ASSERT_FUNCS_IMPL(and_fetch)
+
+	public: /* fetch_or */
+
+		EASTL_ATOMIC_INTEGRAL_STATIC_ASSERT_FUNCS_IMPL(fetch_or)
+
+	public: /* or_fetch */
+
+		EASTL_ATOMIC_INTEGRAL_STATIC_ASSERT_FUNCS_IMPL(or_fetch)
+
+	public: /* fetch_xor */
+
+		EASTL_ATOMIC_INTEGRAL_STATIC_ASSERT_FUNCS_IMPL(fetch_xor)
+
+	public: /* xor_fetch */
+
+		EASTL_ATOMIC_INTEGRAL_STATIC_ASSERT_FUNCS_IMPL(xor_fetch)
+
+	public: /* operator++ && operator-- */
+
+		EASTL_ATOMIC_INTEGRAL_STATIC_ASSERT_INC_DEC_OPERATOR_IMPL(++)
+
+		EASTL_ATOMIC_INTEGRAL_STATIC_ASSERT_INC_DEC_OPERATOR_IMPL(--)
+
+	public: /* operator+= && operator-= */
+
+		EASTL_ATOMIC_INTEGRAL_STATIC_ASSERT_ASSIGNMENT_OPERATOR_IMPL(+=)
+
+		EASTL_ATOMIC_INTEGRAL_STATIC_ASSERT_ASSIGNMENT_OPERATOR_IMPL(-=)
+
+	public: /* operator&= */
+
+		EASTL_ATOMIC_INTEGRAL_STATIC_ASSERT_ASSIGNMENT_OPERATOR_IMPL(&=)
+
+	public: /* operator|= */
+
+		EASTL_ATOMIC_INTEGRAL_STATIC_ASSERT_ASSIGNMENT_OPERATOR_IMPL(|=)
+
+	public: /* operator^= */
+
+		EASTL_ATOMIC_INTEGRAL_STATIC_ASSERT_ASSIGNMENT_OPERATOR_IMPL(^=)
+
+	};
+
+
+	template <typename T, unsigned width = sizeof(T)>
+	struct atomic_integral_width;
+
+#define EASTL_ATOMIC_INTEGRAL_FUNC_IMPL(op, bits)						\
+	T retVal;															\
+	EA_PREPROCESSOR_JOIN(op, bits)(T, retVal, this->GetAtomicAddress(), arg); \
+	return retVal;
+
+#define EASTL_ATOMIC_INTEGRAL_FETCH_IMPL(funcName, op, bits)	\
+	T funcName(T arg) EA_NOEXCEPT								\
+	{															\
+		EASTL_ATOMIC_INTEGRAL_FUNC_IMPL(op, bits);				\
+	}
+
+#define EASTL_ATOMIC_INTEGRAL_FETCH_ORDER_IMPL(funcName, orderType, op, bits) \
+	T funcName(T arg, orderType) EA_NOEXCEPT							\
+	{																	\
+		EASTL_ATOMIC_INTEGRAL_FUNC_IMPL(op, bits);						\
+	}
+
+#define EASTL_ATOMIC_INTEGRAL_FETCH_OP_JOIN(fetchOp, Order)				\
+	EA_PREPROCESSOR_JOIN(EA_PREPROCESSOR_JOIN(EASTL_ATOMIC_, fetchOp), Order)
+
+#define EASTL_ATOMIC_INTEGRAL_FETCH_FUNCS_IMPL(funcName, fetchOp, bits) \
+	using Base::funcName;												\
+																		\
+	EASTL_ATOMIC_INTEGRAL_FETCH_IMPL(funcName, EASTL_ATOMIC_INTEGRAL_FETCH_OP_JOIN(fetchOp, _SEQ_CST_), bits) \
+																		\
+	EASTL_ATOMIC_INTEGRAL_FETCH_ORDER_IMPL(funcName, eastl::internal::memory_order_relaxed_s, \
+										   EASTL_ATOMIC_INTEGRAL_FETCH_OP_JOIN(fetchOp, _RELAXED_), bits) \
+																		\
+	EASTL_ATOMIC_INTEGRAL_FETCH_ORDER_IMPL(funcName, eastl::internal::memory_order_acquire_s, \
+										   EASTL_ATOMIC_INTEGRAL_FETCH_OP_JOIN(fetchOp, _ACQUIRE_), bits) \
+																		\
+	EASTL_ATOMIC_INTEGRAL_FETCH_ORDER_IMPL(funcName, eastl::internal::memory_order_release_s, \
+										   EASTL_ATOMIC_INTEGRAL_FETCH_OP_JOIN(fetchOp, _RELEASE_), bits) \
+																		\
+	EASTL_ATOMIC_INTEGRAL_FETCH_ORDER_IMPL(funcName, eastl::internal::memory_order_acq_rel_s, \
+										   EASTL_ATOMIC_INTEGRAL_FETCH_OP_JOIN(fetchOp, _ACQ_REL_), bits) \
+																		\
+	EASTL_ATOMIC_INTEGRAL_FETCH_ORDER_IMPL(funcName, eastl::internal::memory_order_seq_cst_s, \
+										   EASTL_ATOMIC_INTEGRAL_FETCH_OP_JOIN(fetchOp, _SEQ_CST_), bits)
+
+#define EASTL_ATOMIC_INTEGRAL_FETCH_INC_DEC_OPERATOR_IMPL(operatorOp, preFuncName, postFuncName) \
+	using Base::operator operatorOp;									\
+																		\
+	T operator operatorOp() EA_NOEXCEPT									\
+	{																	\
+		return preFuncName(1, eastl::memory_order_seq_cst);				\
+	}																	\
+																		\
+	T operator operatorOp(int) EA_NOEXCEPT								\
+	{																	\
+		return postFuncName(1, eastl::memory_order_seq_cst);			\
+	}
+
+#define EASTL_ATOMIC_INTEGRAL_FETCH_ASSIGNMENT_OPERATOR_IMPL(operatorOp, funcName) \
+	using Base::operator operatorOp;									\
+																		\
+	T operator operatorOp(T arg) EA_NOEXCEPT							\
+	{																	\
+		return funcName(arg, eastl::memory_order_seq_cst);				\
+	}
+
+
+#define EASTL_ATOMIC_INTEGRAL_WIDTH_SPECIALIZE(bytes, bits)				\
+	template <typename T>												\
+	struct atomic_integral_width<T, bytes> : public atomic_integral_base<T, bytes> \
+	{																	\
+	private:															\
+																		\
+		using Base = atomic_integral_base<T, bytes>;					\
+																		\
+	public: /* ctors */													\
+																		\
+		EA_CONSTEXPR atomic_integral_width(T desired) EA_NOEXCEPT		\
+			: Base{ desired }											\
+		{																\
+		}																\
+																		\
+		EA_CONSTEXPR atomic_integral_width() EA_NOEXCEPT = default;		\
+																		\
+		atomic_integral_width(const atomic_integral_width&) EA_NOEXCEPT = delete; \
+																		\
+	public: /* assignment operator */									\
+																		\
+		using Base::operator=;											\
+																		\
+		atomic_integral_width& operator=(const atomic_integral_width&)          EA_NOEXCEPT = delete; \
+		atomic_integral_width& operator=(const atomic_integral_width&) volatile EA_NOEXCEPT = delete; \
+																		\
+	public: /* fetch_add */												\
+																		\
+		EASTL_ATOMIC_INTEGRAL_FETCH_FUNCS_IMPL(fetch_add, FETCH_ADD, bits) \
+																		\
+	public: /* add_fetch */												\
+																		\
+		EASTL_ATOMIC_INTEGRAL_FETCH_FUNCS_IMPL(add_fetch, ADD_FETCH, bits) \
+																		\
+	public: /* fetch_sub */												\
+																		\
+		EASTL_ATOMIC_INTEGRAL_FETCH_FUNCS_IMPL(fetch_sub, FETCH_SUB, bits) \
+																		\
+	public: /* sub_fetch */												\
+																		\
+		EASTL_ATOMIC_INTEGRAL_FETCH_FUNCS_IMPL(sub_fetch, SUB_FETCH, bits) \
+																		\
+	public: /* fetch_and */												\
+																		\
+		EASTL_ATOMIC_INTEGRAL_FETCH_FUNCS_IMPL(fetch_and, FETCH_AND, bits) \
+																		\
+	public: /* and_fetch */												\
+																		\
+		EASTL_ATOMIC_INTEGRAL_FETCH_FUNCS_IMPL(and_fetch, AND_FETCH, bits) \
+																		\
+	public: /* fetch_or */												\
+																		\
+		EASTL_ATOMIC_INTEGRAL_FETCH_FUNCS_IMPL(fetch_or, FETCH_OR, bits) \
+																		\
+	public: /* or_fetch */												\
+																		\
+		EASTL_ATOMIC_INTEGRAL_FETCH_FUNCS_IMPL(or_fetch, OR_FETCH, bits) \
+																		\
+	public: /* fetch_xor */												\
+																		\
+		EASTL_ATOMIC_INTEGRAL_FETCH_FUNCS_IMPL(fetch_xor, FETCH_XOR, bits) \
+																		\
+	public: /* xor_fetch */												\
+																		\
+		EASTL_ATOMIC_INTEGRAL_FETCH_FUNCS_IMPL(xor_fetch, XOR_FETCH, bits) \
+																		\
+	public: /* operator++ && operator-- */								\
+																		\
+		EASTL_ATOMIC_INTEGRAL_FETCH_INC_DEC_OPERATOR_IMPL(++, add_fetch, fetch_add) \
+																		\
+		EASTL_ATOMIC_INTEGRAL_FETCH_INC_DEC_OPERATOR_IMPL(--, sub_fetch, fetch_sub) \
+																		\
+	public: /* operator+= && operator-= */								\
+																		\
+		EASTL_ATOMIC_INTEGRAL_FETCH_ASSIGNMENT_OPERATOR_IMPL(+=, add_fetch) \
+																		\
+		EASTL_ATOMIC_INTEGRAL_FETCH_ASSIGNMENT_OPERATOR_IMPL(-=, sub_fetch) \
+																		\
+	public: /* operator&= */											\
+																		\
+		EASTL_ATOMIC_INTEGRAL_FETCH_ASSIGNMENT_OPERATOR_IMPL(&=, and_fetch) \
+																		\
+	public: /* operator|= */											\
+																		\
+		EASTL_ATOMIC_INTEGRAL_FETCH_ASSIGNMENT_OPERATOR_IMPL(|=, or_fetch) \
+																		\
+	public: /* operator^= */											\
+																		\
+		EASTL_ATOMIC_INTEGRAL_FETCH_ASSIGNMENT_OPERATOR_IMPL(^=, xor_fetch) \
+																		\
+	};
+
+
+#if defined(EASTL_ATOMIC_HAS_8BIT)
+	EASTL_ATOMIC_INTEGRAL_WIDTH_SPECIALIZE(1, 8)
+#endif
+
+#if defined(EASTL_ATOMIC_HAS_16BIT)
+	EASTL_ATOMIC_INTEGRAL_WIDTH_SPECIALIZE(2, 16)
+#endif
+
+#if defined(EASTL_ATOMIC_HAS_32BIT)
+	EASTL_ATOMIC_INTEGRAL_WIDTH_SPECIALIZE(4, 32)
+#endif
+
+#if defined(EASTL_ATOMIC_HAS_64BIT)
+	EASTL_ATOMIC_INTEGRAL_WIDTH_SPECIALIZE(8, 64)
+#endif
+
+#if defined(EASTL_ATOMIC_HAS_128BIT)
+	EASTL_ATOMIC_INTEGRAL_WIDTH_SPECIALIZE(16, 128)
+#endif
+
+
+} // namespace internal
+
+
+} // namespace eastl
+
+
+#include "atomic_pop_compiler_options.h"
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_INTEGRAL_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros.h
new file mode 100644
index 00000000..756a4b4d
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros.h
@@ -0,0 +1,67 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_MACROS_H
+#define EASTL_ATOMIC_INTERNAL_MACROS_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// The reason for the implementation separating out into a compiler and architecture
+// folder is as follows.
+//
+// The compiler directory is meant to implement atomics using the compiler provided
+// intrinsics. This also implies that usually the same compiler instrinsic implementation
+// can be used for any architecture the compiler supports. If a compiler provides intrinsics
+// to support barriers or atomic operations, then that implementation should be in the
+// compiler directory.
+//
+// The arch directory is meant to manually implement atomics for a specific architecture
+// such as power or x86. There may be some compiler specific code in this directory because
+// GCC inline assembly syntax may be different than another compiler as an example.
+//
+// The arch directory can also be used to implement some atomic operations ourselves
+// if we deem the compiler provided implementation to be inefficient for the given
+// architecture or we need to do some things manually for a given compiler.
+//
+// The atomic_macros directory implements the macros that the rest of the atomic
+// library uses. These macros will expand to either the compiler or arch implemented
+// macro. The arch implemented macro is given priority over the compiler implemented
+// macro if both are implemented otherwise whichever is implemented is chosen or
+// an error is emitted if none are implemented.
+//
+// The implementation being all macros has a couple nice side effects as well.
+//
+// 1. All the implementation ends up funneling into one low level macro implementation
+//    which makes it easy to verify correctness, reduce copy-paste errors and differences
+//    in various platform implementations.
+//
+// 2. Allows for the implementation to be implemented efficiently on compilers that do not
+//    directly implement the C++ memory model in their intrinsics such as msvc.
+//
+// 3. Allows for the implementation of atomics that may not be supported on the given platform,
+//    such as 128-bit atomics on 32-bit platforms since the macros will only ever be expanded
+//    on platforms that support said features. This makes implementing said features pretty easy
+//    since we do not have to worry about complicated feature detection in the low level implementations.
+//
+// The macro implementation may asume that all passed in types are trivially constructible thus it is
+// free to create local variables of the passed in types as it may please.
+// It may also assume that all passed in types are trivially copyable as well.
+// It cannot assume any passed in type is any given type thus is a specific type if needed, it must do an
+// EASTL_ATOMIC_TYPE_PUN_CAST() to the required type.
+//
+
+
+#include "compiler/compiler.h"
+#include "arch/arch.h"
+
+#include "atomic_macros/atomic_macros.h"
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_MACROS_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros.h
new file mode 100644
index 00000000..941ac51c
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros.h
@@ -0,0 +1,145 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_ATOMIC_MACROS_H
+#define EASTL_ATOMIC_INTERNAL_ATOMIC_MACROS_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+#include "atomic_macros_base.h"
+
+#include "atomic_macros_fetch_add.h"
+#include "atomic_macros_fetch_sub.h"
+
+#include "atomic_macros_fetch_and.h"
+#include "atomic_macros_fetch_xor.h"
+#include "atomic_macros_fetch_or.h"
+
+#include "atomic_macros_add_fetch.h"
+#include "atomic_macros_sub_fetch.h"
+
+#include "atomic_macros_and_fetch.h"
+#include "atomic_macros_xor_fetch.h"
+#include "atomic_macros_or_fetch.h"
+
+#include "atomic_macros_exchange.h"
+
+#include "atomic_macros_cmpxchg_weak.h"
+#include "atomic_macros_cmpxchg_strong.h"
+
+#include "atomic_macros_load.h"
+#include "atomic_macros_store.h"
+
+#include "atomic_macros_compiler_barrier.h"
+
+#include "atomic_macros_cpu_pause.h"
+
+#include "atomic_macros_memory_barrier.h"
+
+#include "atomic_macros_signal_fence.h"
+
+#include "atomic_macros_thread_fence.h"
+
+
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#if defined(EASTL_COMPILER_ATOMIC_HAS_8BIT) || defined(EASTL_ARCH_ATOMIC_HAS_8BIT)
+
+	#define EASTL_ATOMIC_HAS_8BIT
+
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_HAS_16BIT) || defined(EASTL_ARCH_ATOMIC_HAS_16BIT)
+
+	#define EASTL_ATOMIC_HAS_16BIT
+
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_HAS_32BIT) || defined(EASTL_ARCH_ATOMIC_HAS_32BIT)
+
+	#define EASTL_ATOMIC_HAS_32BIT
+
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_HAS_64BIT) || defined(EASTL_ARCH_ATOMIC_HAS_64BIT)
+
+	#define EASTL_ATOMIC_HAS_64BIT
+
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_HAS_128BIT) || defined(EASTL_ARCH_ATOMIC_HAS_128BIT)
+
+	#define EASTL_ATOMIC_HAS_128BIT
+
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#if defined(EASTL_ARCH_ATOMIC_FIXED_WIDTH_TYPE_8)
+
+	#define EASTL_ATOMIC_FIXED_WIDTH_TYPE_8 EASTL_ARCH_ATOMIC_FIXED_WIDTH_TYPE_8
+
+#elif defined(EASTL_COMPILER_ATOMIC_FIXED_WIDTH_TYPE_8)
+
+	#define EASTL_ATOMIC_FIXED_WIDTH_TYPE_8 EASTL_COMPILER_ATOMIC_FIXED_WIDTH_TYPE_8
+
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_FIXED_WIDTH_TYPE_16)
+
+	#define EASTL_ATOMIC_FIXED_WIDTH_TYPE_16 EASTL_ARCH_ATOMIC_FIXED_WIDTH_TYPE_16
+
+#elif defined(EASTL_COMPILER_ATOMIC_FIXED_WIDTH_TYPE_16)
+
+	#define EASTL_ATOMIC_FIXED_WIDTH_TYPE_16 EASTL_COMPILER_ATOMIC_FIXED_WIDTH_TYPE_16
+
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_FIXED_WIDTH_TYPE_32)
+
+	#define EASTL_ATOMIC_FIXED_WIDTH_TYPE_32 EASTL_ARCH_ATOMIC_FIXED_WIDTH_TYPE_32
+
+#elif defined(EASTL_COMPILER_ATOMIC_FIXED_WIDTH_TYPE_32)
+
+	#define EASTL_ATOMIC_FIXED_WIDTH_TYPE_32 EASTL_COMPILER_ATOMIC_FIXED_WIDTH_TYPE_32
+
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_FIXED_WIDTH_TYPE_64)
+
+	#define EASTL_ATOMIC_FIXED_WIDTH_TYPE_64 EASTL_ARCH_ATOMIC_FIXED_WIDTH_TYPE_64
+
+#elif defined(EASTL_COMPILER_ATOMIC_FIXED_WIDTH_TYPE_64)
+
+	#define EASTL_ATOMIC_FIXED_WIDTH_TYPE_64 EASTL_COMPILER_ATOMIC_FIXED_WIDTH_TYPE_64
+
+#endif
+
+
+#if defined(EASTL_ARCH_ATOMIC_FIXED_WIDTH_TYPE_128)
+
+	#define EASTL_ATOMIC_FIXED_WIDTH_TYPE_128 EASTL_ARCH_ATOMIC_FIXED_WIDTH_TYPE_128
+
+#elif defined(EASTL_COMPILER_ATOMIC_FIXED_WIDTH_TYPE_128)
+
+	#define EASTL_ATOMIC_FIXED_WIDTH_TYPE_128 EASTL_COMPILER_ATOMIC_FIXED_WIDTH_TYPE_128
+
+#endif
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_ATOMIC_MACROS_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_add_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_add_fetch.h
new file mode 100644
index 00000000..f551a07c
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_add_fetch.h
@@ -0,0 +1,98 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_MACROS_ADD_FETCH_H
+#define EASTL_ATOMIC_INTERNAL_MACROS_ADD_FETCH_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ATOMIC_ADD_FETCH_*_N(type, type ret, type * ptr, type val)
+//
+#define EASTL_ATOMIC_ADD_FETCH_RELAXED_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_RELAXED_8)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_ADD_FETCH_ACQUIRE_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_ACQUIRE_8)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_ADD_FETCH_RELEASE_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_RELEASE_8)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_ADD_FETCH_ACQ_REL_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_ACQ_REL_8)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_ADD_FETCH_SEQ_CST_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_SEQ_CST_8)(type, ret, ptr, val)
+
+
+#define EASTL_ATOMIC_ADD_FETCH_RELAXED_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_RELAXED_16)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_ADD_FETCH_ACQUIRE_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_ACQUIRE_16)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_ADD_FETCH_RELEASE_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_RELEASE_16)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_ADD_FETCH_ACQ_REL_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_ACQ_REL_16)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_ADD_FETCH_SEQ_CST_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_SEQ_CST_16)(type, ret, ptr, val)
+
+
+#define EASTL_ATOMIC_ADD_FETCH_RELAXED_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_RELAXED_32)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_ADD_FETCH_ACQUIRE_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_ACQUIRE_32)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_ADD_FETCH_RELEASE_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_RELEASE_32)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_ADD_FETCH_ACQ_REL_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_ACQ_REL_32)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_ADD_FETCH_SEQ_CST_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_SEQ_CST_32)(type, ret, ptr, val)
+
+
+#define EASTL_ATOMIC_ADD_FETCH_RELAXED_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_RELAXED_64)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_ADD_FETCH_ACQUIRE_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_ACQUIRE_64)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_ADD_FETCH_RELEASE_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_RELEASE_64)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_ADD_FETCH_ACQ_REL_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_ACQ_REL_64)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_ADD_FETCH_SEQ_CST_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_SEQ_CST_64)(type, ret, ptr, val)
+
+
+#define EASTL_ATOMIC_ADD_FETCH_RELAXED_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_RELAXED_128)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_ADD_FETCH_ACQUIRE_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_ACQUIRE_128)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_ADD_FETCH_RELEASE_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_RELEASE_128)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_ADD_FETCH_ACQ_REL_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_ACQ_REL_128)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_ADD_FETCH_SEQ_CST_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_SEQ_CST_128)(type, ret, ptr, val)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_MACROS_ADD_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_and_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_and_fetch.h
new file mode 100644
index 00000000..69127223
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_and_fetch.h
@@ -0,0 +1,98 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_MACROS_AND_FETCH_H
+#define EASTL_ATOMIC_INTERNAL_MACROS_AND_FETCH_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ATOMIC_AND_FETCH_*_N(type, type ret, type * ptr, type val)
+//
+#define EASTL_ATOMIC_AND_FETCH_RELAXED_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_RELAXED_8)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_AND_FETCH_ACQUIRE_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_ACQUIRE_8)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_AND_FETCH_RELEASE_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_RELEASE_8)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_AND_FETCH_ACQ_REL_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_ACQ_REL_8)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_AND_FETCH_SEQ_CST_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_SEQ_CST_8)(type, ret, ptr, val)
+
+
+#define EASTL_ATOMIC_AND_FETCH_RELAXED_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_RELAXED_16)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_AND_FETCH_ACQUIRE_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_ACQUIRE_16)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_AND_FETCH_RELEASE_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_RELEASE_16)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_AND_FETCH_ACQ_REL_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_ACQ_REL_16)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_AND_FETCH_SEQ_CST_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_SEQ_CST_16)(type, ret, ptr, val)
+
+
+#define EASTL_ATOMIC_AND_FETCH_RELAXED_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_RELAXED_32)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_AND_FETCH_ACQUIRE_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_ACQUIRE_32)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_AND_FETCH_RELEASE_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_RELEASE_32)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_AND_FETCH_ACQ_REL_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_ACQ_REL_32)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_AND_FETCH_SEQ_CST_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_SEQ_CST_32)(type, ret, ptr, val)
+
+
+#define EASTL_ATOMIC_AND_FETCH_RELAXED_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_RELAXED_64)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_AND_FETCH_ACQUIRE_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_ACQUIRE_64)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_AND_FETCH_RELEASE_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_RELEASE_64)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_AND_FETCH_ACQ_REL_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_ACQ_REL_64)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_AND_FETCH_SEQ_CST_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_SEQ_CST_64)(type, ret, ptr, val)
+
+
+#define EASTL_ATOMIC_AND_FETCH_RELAXED_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_RELAXED_128)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_AND_FETCH_ACQUIRE_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_ACQUIRE_128)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_AND_FETCH_RELEASE_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_RELEASE_128)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_AND_FETCH_ACQ_REL_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_ACQ_REL_128)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_AND_FETCH_SEQ_CST_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_SEQ_CST_128)(type, ret, ptr, val)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_MACROS_AND_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_base.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_base.h
new file mode 100644
index 00000000..f03720d9
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_base.h
@@ -0,0 +1,65 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_MACROS_BASE_H
+#define EASTL_ATOMIC_INTERNAL_MACROS_BASE_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+#define EASTL_ATOMIC_INTERNAL_COMPILER_AVAILABLE(op)					\
+	EA_PREPROCESSOR_JOIN(EA_PREPROCESSOR_JOIN(EASTL_COMPILER_, op), _AVAILABLE)
+
+#define EASTL_ATOMIC_INTERNAL_ARCH_AVAILABLE(op)						\
+	EA_PREPROCESSOR_JOIN(EA_PREPROCESSOR_JOIN(EASTL_ARCH_, op), _AVAILABLE)
+
+#define EASTL_ATOMIC_INTERNAL_NOT_IMPLEMENTED_ERROR(...)				\
+	static_assert(false, "eastl::atomic<T> atomic macro not implemented!")
+
+
+/* Compiler && Arch Not Implemented */
+#define EASTL_ATOMIC_INTERNAL_OP_PATTERN_00(op) \
+	EASTL_ATOMIC_INTERNAL_NOT_IMPLEMENTED_ERROR
+
+/* Arch Implemented */
+#define EASTL_ATOMIC_INTERNAL_OP_PATTERN_01(op) \
+	EA_PREPROCESSOR_JOIN(EASTL_ARCH_, op)
+
+/* Compiler Implmented */
+#define EASTL_ATOMIC_INTERNAL_OP_PATTERN_10(op) \
+	EA_PREPROCESSOR_JOIN(EASTL_COMPILER_, op)
+
+/* Compiler && Arch Implemented */
+#define EASTL_ATOMIC_INTERNAL_OP_PATTERN_11(op) \
+	EA_PREPROCESSOR_JOIN(EASTL_ARCH_, op)
+
+
+/* This macro creates the pattern macros above for the 2x2 True-False truth table */
+#define EASTL_ATOMIC_INTERNAL_OP_HELPER1(compiler, arch, op)			\
+	EA_PREPROCESSOR_JOIN(EASTL_ATOMIC_INTERNAL_OP_PATTERN_, EA_PREPROCESSOR_JOIN(compiler, arch))(op)
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// EASTL_ATOMIC_CHOOSE_OP_IMPL
+//
+// This macro chooses between the compiler or architecture implementation for a
+// given atomic operation.
+//
+// USAGE:
+//
+// EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_RELAXED_8)(ret, ptr, val)
+//
+#define EASTL_ATOMIC_CHOOSE_OP_IMPL(op)					\
+	EASTL_ATOMIC_INTERNAL_OP_HELPER1(					\
+		EASTL_ATOMIC_INTERNAL_COMPILER_AVAILABLE(op),	\
+		EASTL_ATOMIC_INTERNAL_ARCH_AVAILABLE(op),		\
+		op												\
+		)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_MACROS_BASE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_cmpxchg_strong.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_cmpxchg_strong.h
new file mode 100644
index 00000000..3cff4935
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_cmpxchg_strong.h
@@ -0,0 +1,245 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_MACROS_CMPXCHG_STRONG_H
+#define EASTL_ATOMIC_INTERNAL_MACROS_CMPXCHG_STRONG_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ATOMIC_CMPXCHG_STRONG_*_*_N(type, bool ret, type * ptr, type * expected, type desired)
+//
+#define EASTL_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_8(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_8)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_8(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_8)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_8(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_8)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_8(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_8)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_8(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_8)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_8(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_8)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_8(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_8)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_8(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_8)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_8(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_8)(type, ret, ptr, expected, desired)
+
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_16(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_16)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_16(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_16)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_16(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_16)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_16(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_16)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_16(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_16)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_16(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_16)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_16(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_16)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_16(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_16)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_16(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_16)(type, ret, ptr, expected, desired)
+
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_32(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_32)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_32(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_32)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_32(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_32)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_32(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_32)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_32(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_32)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_32(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_32)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_32(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_32)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_32(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_32)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_32(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_32)(type, ret, ptr, expected, desired)
+
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_64(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_64)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_64(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_64)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_64(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_64)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_64(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_64)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_64(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_64)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_64(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_64)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_64(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_64)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_64(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_64)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_64(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_64)(type, ret, ptr, expected, desired)
+
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_128(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_128)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_128(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_128)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_128(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_128)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_128(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_128)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_128(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_128)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_128(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_128)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_128(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_128)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_128(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_128)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_128(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_128)(type, ret, ptr, expected, desired)
+
+
+/////////////////////////////////////////////////////////////////////////////////
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ATOMIC_CMPXCHG_STRONG_*(bool ret, type * ptr, type * expected, type desired)
+//
+#define EASTL_ATOMIC_CMPXCHG_STRONG_RELAXED_8(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_RELAXED_8)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQUIRE_8(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQUIRE_8)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_RELEASE_8(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_RELEASE_8)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQ_REL_8(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQ_REL_8)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_SEQ_CST_8(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_SEQ_CST_8)(type, ret, ptr, expected, desired)
+
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_RELAXED_16(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_RELAXED_16)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQUIRE_16(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQUIRE_16)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_RELEASE_16(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_RELEASE_16)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQ_REL_16(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQ_REL_16)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_SEQ_CST_16(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_SEQ_CST_16)(type, ret, ptr, expected, desired)
+
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_RELAXED_32(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_RELAXED_32)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQUIRE_32(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQUIRE_32)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_RELEASE_32(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_RELEASE_32)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQ_REL_32(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQ_REL_32)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_SEQ_CST_32(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_SEQ_CST_32)(type, ret, ptr, expected, desired)
+
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_RELAXED_64(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_RELAXED_64)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQUIRE_64(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQUIRE_64)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_RELEASE_64(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_RELEASE_64)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQ_REL_64(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQ_REL_64)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_SEQ_CST_64(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_SEQ_CST_64)(type, ret, ptr, expected, desired)
+
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_RELAXED_128(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_RELAXED_128)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQUIRE_128(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQUIRE_128)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_RELEASE_128(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_RELEASE_128)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQ_REL_128(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQ_REL_128)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_STRONG_SEQ_CST_128(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_SEQ_CST_128)(type, ret, ptr, expected, desired)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_MACROS_CMPXCHG_STRONG_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_cmpxchg_weak.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_cmpxchg_weak.h
new file mode 100644
index 00000000..60ea8b0b
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_cmpxchg_weak.h
@@ -0,0 +1,245 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_MACROS_CMPXCHG_WEAK_H
+#define EASTL_ATOMIC_INTERNAL_MACROS_CMPXCHG_WEAK_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ATOMIC_CMPXCHG_WEAK_*_*_N(type, bool ret, type * ptr, type * expected, type desired)
+//
+#define EASTL_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_8(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_8)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_8(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_8)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_8(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_8)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_8(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_8)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_8(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_8)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_8(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_8)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_8(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_8)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_8(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_8)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_8(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_8)(type, ret, ptr, expected, desired)
+
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_16(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_16)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_16(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_16)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_16(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_16)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_16(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_16)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_16(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_16)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_16(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_16)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_16(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_16)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_16(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_16)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_16(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_16)(type, ret, ptr, expected, desired)
+
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_32(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_32)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_32(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_32)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_32(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_32)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_32(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_32)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_32(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_32)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_32(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_32)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_32(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_32)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_32(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_32)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_32(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_32)(type, ret, ptr, expected, desired)
+
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_64(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_64)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_64(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_64)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_64(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_64)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_64(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_64)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_64(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_64)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_64(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_64)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_64(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_64)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_64(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_64)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_64(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_64)(type, ret, ptr, expected, desired)
+
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_128(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_128)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_128(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_128)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_128(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_128)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_128(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_128)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_128(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_128)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_128(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_128)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_128(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_128)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_128(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_128)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_128(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_128)(type, ret, ptr, expected, desired)
+
+
+/////////////////////////////////////////////////////////////////////////////////
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ATOMIC_CMPXCHG_WEAK_*(bool ret, type * ptr, type * expected, type desired)
+//
+#define EASTL_ATOMIC_CMPXCHG_WEAK_RELAXED_8(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_RELAXED_8)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQUIRE_8(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQUIRE_8)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_RELEASE_8(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_RELEASE_8)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQ_REL_8(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQ_REL_8)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_SEQ_CST_8(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_SEQ_CST_8)(type, ret, ptr, expected, desired)
+
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_RELAXED_16(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_RELAXED_16)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQUIRE_16(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQUIRE_16)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_RELEASE_16(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_RELEASE_16)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQ_REL_16(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQ_REL_16)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_SEQ_CST_16(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_SEQ_CST_16)(type, ret, ptr, expected, desired)
+
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_RELAXED_32(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_RELAXED_32)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQUIRE_32(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQUIRE_32)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_RELEASE_32(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_RELEASE_32)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQ_REL_32(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQ_REL_32)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_SEQ_CST_32(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_SEQ_CST_32)(type, ret, ptr, expected, desired)
+
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_RELAXED_64(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_RELAXED_64)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQUIRE_64(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQUIRE_64)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_RELEASE_64(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_RELEASE_64)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQ_REL_64(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQ_REL_64)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_SEQ_CST_64(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_SEQ_CST_64)(type, ret, ptr, expected, desired)
+
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_RELAXED_128(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_RELAXED_128)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQUIRE_128(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQUIRE_128)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_RELEASE_128(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_RELEASE_128)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQ_REL_128(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQ_REL_128)(type, ret, ptr, expected, desired)
+
+#define EASTL_ATOMIC_CMPXCHG_WEAK_SEQ_CST_128(type, ret, ptr, expected, desired) \
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_SEQ_CST_128)(type, ret, ptr, expected, desired)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_MACROS_CMPXCHG_WEAK_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_compiler_barrier.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_compiler_barrier.h
new file mode 100644
index 00000000..96ea6d0b
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_compiler_barrier.h
@@ -0,0 +1,30 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_MACROS_COMPILER_BARRIER_H
+#define EASTL_ATOMIC_INTERNAL_MACROS_COMPILER_BARRIER_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ATOMIC_COMPILER_BARRIER()
+//
+#define EASTL_ATOMIC_COMPILER_BARRIER()						\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_COMPILER_BARRIER)()
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ATOMIC_COMPILER_BARRIER_DATA_DEPENDENCY(const T&, type)
+//
+#define EASTL_ATOMIC_COMPILER_BARRIER_DATA_DEPENDENCY(val, type)		\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_COMPILER_BARRIER_DATA_DEPENDENCY)(val, type)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_MACROS_COMPILER_BARRIER_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_cpu_pause.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_cpu_pause.h
new file mode 100644
index 00000000..e027b576
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_cpu_pause.h
@@ -0,0 +1,22 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_MACROS_CPU_PAUSE_H
+#define EASTL_ATOMIC_INTERNAL_MACROS_CPU_PAUSE_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ATOMIC_CPU_PAUSE()
+//
+#define EASTL_ATOMIC_CPU_PAUSE()					\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CPU_PAUSE)()
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_MACROS_CPU_PAUSE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_exchange.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_exchange.h
new file mode 100644
index 00000000..0681318f
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_exchange.h
@@ -0,0 +1,98 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_MACROS_EXCHANGE_H
+#define EASTL_ATOMIC_INTERNAL_MACROS_EXCHANGE_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ATOMIC_EXCHANGE_*_N(type, type ret, type * ptr, type val)
+//
+#define EASTL_ATOMIC_EXCHANGE_RELAXED_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_RELAXED_8)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_EXCHANGE_ACQUIRE_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_ACQUIRE_8)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_EXCHANGE_RELEASE_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_RELEASE_8)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_EXCHANGE_ACQ_REL_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_ACQ_REL_8)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_EXCHANGE_SEQ_CST_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_SEQ_CST_8)(type, ret, ptr, val)
+
+
+#define EASTL_ATOMIC_EXCHANGE_RELAXED_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_RELAXED_16)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_EXCHANGE_ACQUIRE_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_ACQUIRE_16)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_EXCHANGE_RELEASE_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_RELEASE_16)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_EXCHANGE_ACQ_REL_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_ACQ_REL_16)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_EXCHANGE_SEQ_CST_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_SEQ_CST_16)(type, ret, ptr, val)
+
+
+#define EASTL_ATOMIC_EXCHANGE_RELAXED_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_RELAXED_32)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_EXCHANGE_ACQUIRE_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_ACQUIRE_32)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_EXCHANGE_RELEASE_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_RELEASE_32)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_EXCHANGE_ACQ_REL_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_ACQ_REL_32)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_EXCHANGE_SEQ_CST_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_SEQ_CST_32)(type, ret, ptr, val)
+
+
+#define EASTL_ATOMIC_EXCHANGE_RELAXED_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_RELAXED_64)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_EXCHANGE_ACQUIRE_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_ACQUIRE_64)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_EXCHANGE_RELEASE_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_RELEASE_64)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_EXCHANGE_ACQ_REL_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_ACQ_REL_64)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_EXCHANGE_SEQ_CST_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_SEQ_CST_64)(type, ret, ptr, val)
+
+
+#define EASTL_ATOMIC_EXCHANGE_RELAXED_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_RELAXED_128)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_EXCHANGE_ACQUIRE_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_ACQUIRE_128)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_EXCHANGE_RELEASE_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_RELEASE_128)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_EXCHANGE_ACQ_REL_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_ACQ_REL_128)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_EXCHANGE_SEQ_CST_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_SEQ_CST_128)(type, ret, ptr, val)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_MACROS_EXCHANGE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_add.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_add.h
new file mode 100644
index 00000000..701fdf37
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_add.h
@@ -0,0 +1,98 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_MACROS_FETCH_ADD_H
+#define EASTL_ATOMIC_INTERNAL_MACROS_FETCH_ADD_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ATOMIC_FETCH_ADD_*_N(type, type ret, type * ptr, type val)
+//
+#define EASTL_ATOMIC_FETCH_ADD_RELAXED_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_RELAXED_8)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_ADD_ACQUIRE_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_ACQUIRE_8)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_ADD_RELEASE_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_RELEASE_8)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_ADD_ACQ_REL_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_ACQ_REL_8)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_ADD_SEQ_CST_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_SEQ_CST_8)(type, ret, ptr, val)
+
+
+#define EASTL_ATOMIC_FETCH_ADD_RELAXED_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_RELAXED_16)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_ADD_ACQUIRE_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_ACQUIRE_16)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_ADD_RELEASE_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_RELEASE_16)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_ADD_ACQ_REL_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_ACQ_REL_16)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_ADD_SEQ_CST_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_SEQ_CST_16)(type, ret, ptr, val)
+
+
+#define EASTL_ATOMIC_FETCH_ADD_RELAXED_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_RELAXED_32)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_ADD_ACQUIRE_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_ACQUIRE_32)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_ADD_RELEASE_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_RELEASE_32)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_ADD_ACQ_REL_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_ACQ_REL_32)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_ADD_SEQ_CST_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_SEQ_CST_32)(type, ret, ptr, val)
+
+
+#define EASTL_ATOMIC_FETCH_ADD_RELAXED_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_RELAXED_64)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_ADD_ACQUIRE_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_ACQUIRE_64)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_ADD_RELEASE_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_RELEASE_64)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_ADD_ACQ_REL_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_ACQ_REL_64)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_ADD_SEQ_CST_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_SEQ_CST_64)(type, ret, ptr, val)
+
+
+#define EASTL_ATOMIC_FETCH_ADD_RELAXED_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_RELAXED_128)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_ADD_ACQUIRE_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_ACQUIRE_128)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_ADD_RELEASE_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_RELEASE_128)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_ADD_ACQ_REL_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_ACQ_REL_128)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_ADD_SEQ_CST_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_SEQ_CST_128)(type, ret, ptr, val)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_MACROS_FETCH_ADD_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_and.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_and.h
new file mode 100644
index 00000000..831f1bfe
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_and.h
@@ -0,0 +1,98 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_MACROS_FETCH_AND_H
+#define EASTL_ATOMIC_INTERNAL_MACROS_FETCH_AND_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ATOMIC_FETCH_AND_*_N(type, type ret, type * ptr, type val)
+//
+#define EASTL_ATOMIC_FETCH_AND_RELAXED_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_RELAXED_8)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_AND_ACQUIRE_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_ACQUIRE_8)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_AND_RELEASE_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_RELEASE_8)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_AND_ACQ_REL_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_ACQ_REL_8)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_AND_SEQ_CST_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_SEQ_CST_8)(type, ret, ptr, val)
+
+
+#define EASTL_ATOMIC_FETCH_AND_RELAXED_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_RELAXED_16)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_AND_ACQUIRE_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_ACQUIRE_16)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_AND_RELEASE_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_RELEASE_16)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_AND_ACQ_REL_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_ACQ_REL_16)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_AND_SEQ_CST_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_SEQ_CST_16)(type, ret, ptr, val)
+
+
+#define EASTL_ATOMIC_FETCH_AND_RELAXED_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_RELAXED_32)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_AND_ACQUIRE_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_ACQUIRE_32)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_AND_RELEASE_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_RELEASE_32)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_AND_ACQ_REL_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_ACQ_REL_32)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_AND_SEQ_CST_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_SEQ_CST_32)(type, ret, ptr, val)
+
+
+#define EASTL_ATOMIC_FETCH_AND_RELAXED_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_RELAXED_64)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_AND_ACQUIRE_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_ACQUIRE_64)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_AND_RELEASE_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_RELEASE_64)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_AND_ACQ_REL_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_ACQ_REL_64)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_AND_SEQ_CST_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_SEQ_CST_64)(type, ret, ptr, val)
+
+
+#define EASTL_ATOMIC_FETCH_AND_RELAXED_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_RELAXED_128)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_AND_ACQUIRE_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_ACQUIRE_128)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_AND_RELEASE_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_RELEASE_128)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_AND_ACQ_REL_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_ACQ_REL_128)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_AND_SEQ_CST_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_SEQ_CST_128)(type, ret, ptr, val)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_MACROS_FETCH_AND_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_or.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_or.h
new file mode 100644
index 00000000..b1322970
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_or.h
@@ -0,0 +1,98 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_MACROS_FETCH_OR_H
+#define EASTL_ATOMIC_INTERNAL_MACROS_FETCH_OR_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ATOMIC_FETCH_OR_*_N(type, type ret, type * ptr, type val)
+//
+#define EASTL_ATOMIC_FETCH_OR_RELAXED_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_RELAXED_8)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_OR_ACQUIRE_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_ACQUIRE_8)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_OR_RELEASE_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_RELEASE_8)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_OR_ACQ_REL_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_ACQ_REL_8)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_OR_SEQ_CST_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_SEQ_CST_8)(type, ret, ptr, val)
+
+
+#define EASTL_ATOMIC_FETCH_OR_RELAXED_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_RELAXED_16)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_OR_ACQUIRE_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_ACQUIRE_16)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_OR_RELEASE_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_RELEASE_16)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_OR_ACQ_REL_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_ACQ_REL_16)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_OR_SEQ_CST_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_SEQ_CST_16)(type, ret, ptr, val)
+
+
+#define EASTL_ATOMIC_FETCH_OR_RELAXED_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_RELAXED_32)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_OR_ACQUIRE_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_ACQUIRE_32)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_OR_RELEASE_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_RELEASE_32)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_OR_ACQ_REL_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_ACQ_REL_32)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_OR_SEQ_CST_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_SEQ_CST_32)(type, ret, ptr, val)
+
+
+#define EASTL_ATOMIC_FETCH_OR_RELAXED_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_RELAXED_64)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_OR_ACQUIRE_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_ACQUIRE_64)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_OR_RELEASE_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_RELEASE_64)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_OR_ACQ_REL_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_ACQ_REL_64)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_OR_SEQ_CST_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_SEQ_CST_64)(type, ret, ptr, val)
+
+
+#define EASTL_ATOMIC_FETCH_OR_RELAXED_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_RELAXED_128)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_OR_ACQUIRE_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_ACQUIRE_128)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_OR_RELEASE_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_RELEASE_128)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_OR_ACQ_REL_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_ACQ_REL_128)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_OR_SEQ_CST_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_SEQ_CST_128)(type, ret, ptr, val)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_MACROS_FETCH_OR_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_sub.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_sub.h
new file mode 100644
index 00000000..00980643
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_sub.h
@@ -0,0 +1,98 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_MACROS_FETCH_SUB_H
+#define EASTL_ATOMIC_INTERNAL_MACROS_FETCH_SUB_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ATOMIC_FETCH_SUB_*_N(type, type ret, type * ptr, type val)
+//
+#define EASTL_ATOMIC_FETCH_SUB_RELAXED_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_RELAXED_8)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_SUB_ACQUIRE_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_ACQUIRE_8)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_SUB_RELEASE_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_RELEASE_8)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_SUB_ACQ_REL_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_ACQ_REL_8)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_SUB_SEQ_CST_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_SEQ_CST_8)(type, ret, ptr, val)
+
+
+#define EASTL_ATOMIC_FETCH_SUB_RELAXED_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_RELAXED_16)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_SUB_ACQUIRE_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_ACQUIRE_16)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_SUB_RELEASE_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_RELEASE_16)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_SUB_ACQ_REL_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_ACQ_REL_16)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_SUB_SEQ_CST_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_SEQ_CST_16)(type, ret, ptr, val)
+
+
+#define EASTL_ATOMIC_FETCH_SUB_RELAXED_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_RELAXED_32)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_SUB_ACQUIRE_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_ACQUIRE_32)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_SUB_RELEASE_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_RELEASE_32)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_SUB_ACQ_REL_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_ACQ_REL_32)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_SUB_SEQ_CST_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_SEQ_CST_32)(type, ret, ptr, val)
+
+
+#define EASTL_ATOMIC_FETCH_SUB_RELAXED_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_RELAXED_64)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_SUB_ACQUIRE_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_ACQUIRE_64)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_SUB_RELEASE_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_RELEASE_64)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_SUB_ACQ_REL_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_ACQ_REL_64)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_SUB_SEQ_CST_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_SEQ_CST_64)(type, ret, ptr, val)
+
+
+#define EASTL_ATOMIC_FETCH_SUB_RELAXED_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_RELAXED_128)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_SUB_ACQUIRE_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_ACQUIRE_128)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_SUB_RELEASE_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_RELEASE_128)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_SUB_ACQ_REL_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_ACQ_REL_128)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_SUB_SEQ_CST_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_SEQ_CST_128)(type, ret, ptr, val)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_MACROS_FETCH_SUB_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_xor.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_xor.h
new file mode 100644
index 00000000..2887ea56
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_xor.h
@@ -0,0 +1,98 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_MACROS_FETCH_XOR_H
+#define EASTL_ATOMIC_INTERNAL_MACROS_FETCH_XOR_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ATOMIC_FETCH_XOR_*_N(type, type ret, type * ptr, type val)
+//
+#define EASTL_ATOMIC_FETCH_XOR_RELAXED_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_RELAXED_8)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_XOR_ACQUIRE_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_ACQUIRE_8)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_XOR_RELEASE_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_RELEASE_8)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_XOR_ACQ_REL_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_ACQ_REL_8)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_XOR_SEQ_CST_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_SEQ_CST_8)(type, ret, ptr, val)
+
+
+#define EASTL_ATOMIC_FETCH_XOR_RELAXED_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_RELAXED_16)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_XOR_ACQUIRE_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_ACQUIRE_16)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_XOR_RELEASE_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_RELEASE_16)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_XOR_ACQ_REL_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_ACQ_REL_16)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_XOR_SEQ_CST_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_SEQ_CST_16)(type, ret, ptr, val)
+
+
+#define EASTL_ATOMIC_FETCH_XOR_RELAXED_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_RELAXED_32)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_XOR_ACQUIRE_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_ACQUIRE_32)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_XOR_RELEASE_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_RELEASE_32)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_XOR_ACQ_REL_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_ACQ_REL_32)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_XOR_SEQ_CST_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_SEQ_CST_32)(type, ret, ptr, val)
+
+
+#define EASTL_ATOMIC_FETCH_XOR_RELAXED_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_RELAXED_64)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_XOR_ACQUIRE_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_ACQUIRE_64)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_XOR_RELEASE_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_RELEASE_64)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_XOR_ACQ_REL_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_ACQ_REL_64)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_XOR_SEQ_CST_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_SEQ_CST_64)(type, ret, ptr, val)
+
+
+#define EASTL_ATOMIC_FETCH_XOR_RELAXED_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_RELAXED_128)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_XOR_ACQUIRE_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_ACQUIRE_128)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_XOR_RELEASE_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_RELEASE_128)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_XOR_ACQ_REL_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_ACQ_REL_128)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_FETCH_XOR_SEQ_CST_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_SEQ_CST_128)(type, ret, ptr, val)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_MACROS_FETCH_XOR_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_load.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_load.h
new file mode 100644
index 00000000..76580593
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_load.h
@@ -0,0 +1,75 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_MACROS_LOAD_H
+#define EASTL_ATOMIC_INTERNAL_MACROS_LOAD_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ATOMIC_LOAD_*_N(type, type ret, type * ptr)
+//
+#define EASTL_ATOMIC_LOAD_RELAXED_8(type, ret, ptr)						\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_LOAD_RELAXED_8)(type, ret, ptr)
+
+#define EASTL_ATOMIC_LOAD_ACQUIRE_8(type, ret, ptr)						\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_LOAD_ACQUIRE_8)(type, ret, ptr)
+
+#define EASTL_ATOMIC_LOAD_SEQ_CST_8(type, ret, ptr)						\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_LOAD_SEQ_CST_8)(type, ret, ptr)
+
+
+#define EASTL_ATOMIC_LOAD_RELAXED_16(type, ret, ptr)					\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_LOAD_RELAXED_16)(type, ret, ptr)
+
+#define EASTL_ATOMIC_LOAD_ACQUIRE_16(type, ret, ptr)					\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_LOAD_ACQUIRE_16)(type, ret, ptr)
+
+#define EASTL_ATOMIC_LOAD_SEQ_CST_16(type, ret, ptr)					\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_LOAD_SEQ_CST_16)(type, ret, ptr)
+
+
+#define EASTL_ATOMIC_LOAD_RELAXED_32(type, ret, ptr)					\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_LOAD_RELAXED_32)(type, ret, ptr)
+
+#define EASTL_ATOMIC_LOAD_ACQUIRE_32(type, ret, ptr)					\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_LOAD_ACQUIRE_32)(type, ret, ptr)
+
+#define EASTL_ATOMIC_LOAD_SEQ_CST_32(type, ret, ptr)					\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_LOAD_SEQ_CST_32)(type, ret, ptr)
+
+
+#define EASTL_ATOMIC_LOAD_RELAXED_64(type, ret, ptr)					\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_LOAD_RELAXED_64)(type, ret, ptr)
+
+#define EASTL_ATOMIC_LOAD_ACQUIRE_64(type, ret, ptr)					\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_LOAD_ACQUIRE_64)(type, ret, ptr)
+
+#define EASTL_ATOMIC_LOAD_SEQ_CST_64(type, ret, ptr)					\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_LOAD_SEQ_CST_64)(type, ret, ptr)
+
+
+#define EASTL_ATOMIC_LOAD_RELAXED_128(type, ret, ptr)					\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_LOAD_RELAXED_128)(type, ret, ptr)
+
+#define EASTL_ATOMIC_LOAD_ACQUIRE_128(type, ret, ptr)					\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_LOAD_ACQUIRE_128)(type, ret, ptr)
+
+#define EASTL_ATOMIC_LOAD_SEQ_CST_128(type, ret, ptr)					\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_LOAD_SEQ_CST_128)(type, ret, ptr)
+
+
+#define EASTL_ATOMIC_LOAD_READ_DEPENDS_32(type, ret, ptr)				\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_LOAD_READ_DEPENDS_32)(type, ret, ptr)
+
+#define EASTL_ATOMIC_LOAD_READ_DEPENDS_64(type, ret, ptr)				\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_LOAD_READ_DEPENDS_64)(type, ret, ptr)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_MACROS_LOAD_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_memory_barrier.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_memory_barrier.h
new file mode 100644
index 00000000..14f7be92
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_memory_barrier.h
@@ -0,0 +1,38 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_MACROS_MEMORY_BARRIER_H
+#define EASTL_ATOMIC_INTERNAL_MACROS_MEMORY_BARRIER_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ATOMIC_CPU_MB()
+//
+#define EASTL_ATOMIC_CPU_MB()						\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CPU_MB)()
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ATOMIC_CPU_WMB()
+//
+#define EASTL_ATOMIC_CPU_WMB()						\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CPU_WMB)()
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ATOMIC_CPU_RMB()
+//
+#define EASTL_ATOMIC_CPU_RMB()						\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CPU_RMB)()
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_MACROS_MEMORY_BARRIER_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_or_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_or_fetch.h
new file mode 100644
index 00000000..c9ebd6e3
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_or_fetch.h
@@ -0,0 +1,98 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_MACROS_OR_FETCH_H
+#define EASTL_ATOMIC_INTERNAL_MACROS_OR_FETCH_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ATOMIC_OR_FETCH_*_N(type, type ret, type * ptr, type val)
+//
+#define EASTL_ATOMIC_OR_FETCH_RELAXED_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_RELAXED_8)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_OR_FETCH_ACQUIRE_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_ACQUIRE_8)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_OR_FETCH_RELEASE_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_RELEASE_8)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_OR_FETCH_ACQ_REL_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_ACQ_REL_8)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_OR_FETCH_SEQ_CST_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_SEQ_CST_8)(type, ret, ptr, val)
+
+
+#define EASTL_ATOMIC_OR_FETCH_RELAXED_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_RELAXED_16)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_OR_FETCH_ACQUIRE_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_ACQUIRE_16)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_OR_FETCH_RELEASE_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_RELEASE_16)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_OR_FETCH_ACQ_REL_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_ACQ_REL_16)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_OR_FETCH_SEQ_CST_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_SEQ_CST_16)(type, ret, ptr, val)
+
+
+#define EASTL_ATOMIC_OR_FETCH_RELAXED_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_RELAXED_32)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_OR_FETCH_ACQUIRE_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_ACQUIRE_32)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_OR_FETCH_RELEASE_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_RELEASE_32)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_OR_FETCH_ACQ_REL_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_ACQ_REL_32)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_OR_FETCH_SEQ_CST_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_SEQ_CST_32)(type, ret, ptr, val)
+
+
+#define EASTL_ATOMIC_OR_FETCH_RELAXED_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_RELAXED_64)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_OR_FETCH_ACQUIRE_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_ACQUIRE_64)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_OR_FETCH_RELEASE_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_RELEASE_64)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_OR_FETCH_ACQ_REL_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_ACQ_REL_64)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_OR_FETCH_SEQ_CST_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_SEQ_CST_64)(type, ret, ptr, val)
+
+
+#define EASTL_ATOMIC_OR_FETCH_RELAXED_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_RELAXED_128)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_OR_FETCH_ACQUIRE_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_ACQUIRE_128)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_OR_FETCH_RELEASE_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_RELEASE_128)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_OR_FETCH_ACQ_REL_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_ACQ_REL_128)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_OR_FETCH_SEQ_CST_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_SEQ_CST_128)(type, ret, ptr, val)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_MACROS_OR_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_signal_fence.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_signal_fence.h
new file mode 100644
index 00000000..dd16b106
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_signal_fence.h
@@ -0,0 +1,34 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_MACROS_SIGNAL_FENCE_H
+#define EASTL_ATOMIC_INTERNAL_MACROS_SIGNAL_FENCE_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ATOMIC_SIGNAL_FENCE_*()
+//
+#define EASTL_ATOMIC_SIGNAL_FENCE_RELAXED()						\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SIGNAL_FENCE_RELAXED)()
+
+#define EASTL_ATOMIC_SIGNAL_FENCE_ACQUIRE()						\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SIGNAL_FENCE_ACQUIRE)()
+
+#define EASTL_ATOMIC_SIGNAL_FENCE_RELEASE()						\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SIGNAL_FENCE_RELEASE)()
+
+#define EASTL_ATOMIC_SIGNAL_FENCE_ACQ_REL()						\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SIGNAL_FENCE_ACQ_REL)()
+
+#define EASTL_ATOMIC_SIGNAL_FENCE_SEQ_CST()						\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SIGNAL_FENCE_SEQ_CST)()
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_MACROS_SIGNAL_FENCE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_store.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_store.h
new file mode 100644
index 00000000..64b662e1
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_store.h
@@ -0,0 +1,68 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_MACROS_STORE_H
+#define EASTL_ATOMIC_INTERNAL_MACROS_STORE_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ATOMIC_STORE_*_N(type, type * ptr, type val)
+//
+#define EASTL_ATOMIC_STORE_RELAXED_8(type, ptr, val)						\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_STORE_RELAXED_8)(type, ptr, val)
+
+#define EASTL_ATOMIC_STORE_RELEASE_8(type, ptr, val)						\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_STORE_RELEASE_8)(type, ptr, val)
+
+#define EASTL_ATOMIC_STORE_SEQ_CST_8(type, ptr, val)						\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_STORE_SEQ_CST_8)(type, ptr, val)
+
+
+#define EASTL_ATOMIC_STORE_RELAXED_16(type, ptr, val)						\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_STORE_RELAXED_16)(type, ptr, val)
+
+#define EASTL_ATOMIC_STORE_RELEASE_16(type, ptr, val)						\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_STORE_RELEASE_16)(type, ptr, val)
+
+#define EASTL_ATOMIC_STORE_SEQ_CST_16(type, ptr, val)						\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_STORE_SEQ_CST_16)(type, ptr, val)
+
+
+#define EASTL_ATOMIC_STORE_RELAXED_32(type, ptr, val)						\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_STORE_RELAXED_32)(type, ptr, val)
+
+#define EASTL_ATOMIC_STORE_RELEASE_32(type, ptr, val)						\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_STORE_RELEASE_32)(type, ptr, val)
+
+#define EASTL_ATOMIC_STORE_SEQ_CST_32(type, ptr, val)						\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_STORE_SEQ_CST_32)(type, ptr, val)
+
+
+#define EASTL_ATOMIC_STORE_RELAXED_64(type, ptr, val)						\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_STORE_RELAXED_64)(type, ptr, val)
+
+#define EASTL_ATOMIC_STORE_RELEASE_64(type, ptr, val)						\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_STORE_RELEASE_64)(type, ptr, val)
+
+#define EASTL_ATOMIC_STORE_SEQ_CST_64(type, ptr, val)						\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_STORE_SEQ_CST_64)(type, ptr, val)
+
+
+#define EASTL_ATOMIC_STORE_RELAXED_128(type, ptr, val)					\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_STORE_RELAXED_128)(type, ptr, val)
+
+#define EASTL_ATOMIC_STORE_RELEASE_128(type, ptr, val)					\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_STORE_RELEASE_128)(type, ptr, val)
+
+#define EASTL_ATOMIC_STORE_SEQ_CST_128(type, ptr, val)					\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_STORE_SEQ_CST_128)(type, ptr, val)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_MACROS_STORE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_sub_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_sub_fetch.h
new file mode 100644
index 00000000..330f38e9
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_sub_fetch.h
@@ -0,0 +1,98 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_MACROS_SUB_FETCH_H
+#define EASTL_ATOMIC_INTERNAL_MACROS_SUB_FETCH_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ATOMIC_SUB_FETCH_*_N(type, type ret, type * ptr, type val)
+//
+#define EASTL_ATOMIC_SUB_FETCH_RELAXED_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_RELAXED_8)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_SUB_FETCH_ACQUIRE_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_ACQUIRE_8)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_SUB_FETCH_RELEASE_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_RELEASE_8)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_SUB_FETCH_ACQ_REL_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_ACQ_REL_8)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_SUB_FETCH_SEQ_CST_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_SEQ_CST_8)(type, ret, ptr, val)
+
+
+#define EASTL_ATOMIC_SUB_FETCH_RELAXED_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_RELAXED_16)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_SUB_FETCH_ACQUIRE_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_ACQUIRE_16)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_SUB_FETCH_RELEASE_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_RELEASE_16)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_SUB_FETCH_ACQ_REL_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_ACQ_REL_16)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_SUB_FETCH_SEQ_CST_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_SEQ_CST_16)(type, ret, ptr, val)
+
+
+#define EASTL_ATOMIC_SUB_FETCH_RELAXED_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_RELAXED_32)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_SUB_FETCH_ACQUIRE_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_ACQUIRE_32)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_SUB_FETCH_RELEASE_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_RELEASE_32)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_SUB_FETCH_ACQ_REL_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_ACQ_REL_32)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_SUB_FETCH_SEQ_CST_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_SEQ_CST_32)(type, ret, ptr, val)
+
+
+#define EASTL_ATOMIC_SUB_FETCH_RELAXED_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_RELAXED_64)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_SUB_FETCH_ACQUIRE_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_ACQUIRE_64)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_SUB_FETCH_RELEASE_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_RELEASE_64)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_SUB_FETCH_ACQ_REL_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_ACQ_REL_64)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_SUB_FETCH_SEQ_CST_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_SEQ_CST_64)(type, ret, ptr, val)
+
+
+#define EASTL_ATOMIC_SUB_FETCH_RELAXED_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_RELAXED_128)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_SUB_FETCH_ACQUIRE_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_ACQUIRE_128)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_SUB_FETCH_RELEASE_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_RELEASE_128)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_SUB_FETCH_ACQ_REL_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_ACQ_REL_128)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_SUB_FETCH_SEQ_CST_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_SEQ_CST_128)(type, ret, ptr, val)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_MACROS_SUB_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_thread_fence.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_thread_fence.h
new file mode 100644
index 00000000..26492c59
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_thread_fence.h
@@ -0,0 +1,34 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_MACROS_THREAD_FENCE_H
+#define EASTL_ATOMIC_INTERNAL_MACROS_THREAD_FENCE_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ATOMIC_THREAD_FENCE_*()
+//
+#define EASTL_ATOMIC_THREAD_FENCE_RELAXED()						\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_THREAD_FENCE_RELAXED)()
+
+#define EASTL_ATOMIC_THREAD_FENCE_ACQUIRE()						\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_THREAD_FENCE_ACQUIRE)()
+
+#define EASTL_ATOMIC_THREAD_FENCE_RELEASE()						\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_THREAD_FENCE_RELEASE)()
+
+#define EASTL_ATOMIC_THREAD_FENCE_ACQ_REL()						\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_THREAD_FENCE_ACQ_REL)()
+
+#define EASTL_ATOMIC_THREAD_FENCE_SEQ_CST()						\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_THREAD_FENCE_SEQ_CST)()
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_MACROS_THREAD_FENCE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_xor_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_xor_fetch.h
new file mode 100644
index 00000000..42276470
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_xor_fetch.h
@@ -0,0 +1,98 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_MACROS_XOR_FETCH_H
+#define EASTL_ATOMIC_INTERNAL_MACROS_XOR_FETCH_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_ATOMIC_XOR_FETCH_*_N(type, type ret, type * ptr, type val)
+//
+#define EASTL_ATOMIC_XOR_FETCH_RELAXED_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_RELAXED_8)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_XOR_FETCH_ACQUIRE_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_ACQUIRE_8)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_XOR_FETCH_RELEASE_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_RELEASE_8)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_XOR_FETCH_ACQ_REL_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_ACQ_REL_8)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_XOR_FETCH_SEQ_CST_8(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_SEQ_CST_8)(type, ret, ptr, val)
+
+
+#define EASTL_ATOMIC_XOR_FETCH_RELAXED_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_RELAXED_16)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_XOR_FETCH_ACQUIRE_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_ACQUIRE_16)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_XOR_FETCH_RELEASE_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_RELEASE_16)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_XOR_FETCH_ACQ_REL_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_ACQ_REL_16)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_XOR_FETCH_SEQ_CST_16(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_SEQ_CST_16)(type, ret, ptr, val)
+
+
+#define EASTL_ATOMIC_XOR_FETCH_RELAXED_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_RELAXED_32)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_XOR_FETCH_ACQUIRE_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_ACQUIRE_32)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_XOR_FETCH_RELEASE_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_RELEASE_32)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_XOR_FETCH_ACQ_REL_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_ACQ_REL_32)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_XOR_FETCH_SEQ_CST_32(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_SEQ_CST_32)(type, ret, ptr, val)
+
+
+#define EASTL_ATOMIC_XOR_FETCH_RELAXED_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_RELAXED_64)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_XOR_FETCH_ACQUIRE_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_ACQUIRE_64)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_XOR_FETCH_RELEASE_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_RELEASE_64)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_XOR_FETCH_ACQ_REL_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_ACQ_REL_64)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_XOR_FETCH_SEQ_CST_64(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_SEQ_CST_64)(type, ret, ptr, val)
+
+
+#define EASTL_ATOMIC_XOR_FETCH_RELAXED_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_RELAXED_128)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_XOR_FETCH_ACQUIRE_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_ACQUIRE_128)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_XOR_FETCH_RELEASE_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_RELEASE_128)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_XOR_FETCH_ACQ_REL_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_ACQ_REL_128)(type, ret, ptr, val)
+
+#define EASTL_ATOMIC_XOR_FETCH_SEQ_CST_128(type, ret, ptr, val)			\
+	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_SEQ_CST_128)(type, ret, ptr, val)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_MACROS_XOR_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_memory_order.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_memory_order.h
new file mode 100644
index 00000000..b1c14035
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_memory_order.h
@@ -0,0 +1,44 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_MEMORY_ORDER_H
+#define EASTL_ATOMIC_INTERNAL_MEMORY_ORDER_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+namespace eastl
+{
+
+
+namespace internal
+{
+
+
+struct memory_order_relaxed_s {};
+struct memory_order_read_depends_s {};
+struct memory_order_acquire_s {};
+struct memory_order_release_s {};
+struct memory_order_acq_rel_s {};
+struct memory_order_seq_cst_s {};
+
+
+} // namespace internal
+
+
+EASTL_CPP17_INLINE_VARIABLE constexpr auto memory_order_relaxed = internal::memory_order_relaxed_s{};
+EASTL_CPP17_INLINE_VARIABLE constexpr auto memory_order_read_depends = internal::memory_order_read_depends_s{};
+EASTL_CPP17_INLINE_VARIABLE constexpr auto memory_order_acquire = internal::memory_order_acquire_s{};
+EASTL_CPP17_INLINE_VARIABLE constexpr auto memory_order_release = internal::memory_order_release_s{};
+EASTL_CPP17_INLINE_VARIABLE constexpr auto memory_order_acq_rel = internal::memory_order_acq_rel_s{};
+EASTL_CPP17_INLINE_VARIABLE constexpr auto memory_order_seq_cst = internal::memory_order_seq_cst_s{};
+
+
+} // namespace eastl
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_MEMORY_ORDER_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_pointer.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_pointer.h
new file mode 100644
index 00000000..18f6691c
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_pointer.h
@@ -0,0 +1,281 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_POINTER_H
+#define EASTL_ATOMIC_INTERNAL_POINTER_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+#include "atomic_push_compiler_options.h"
+
+
+namespace eastl
+{
+
+
+namespace internal
+{
+
+
+	template <typename T, unsigned width = sizeof(T)>
+	struct atomic_pointer_base;
+
+#define EASTL_ATOMIC_POINTER_STATIC_ASSERT_FUNCS_IMPL(funcName)		\
+	template <typename Order>										\
+	T* funcName(ptrdiff_t arg, Order order) EA_NOEXCEPT				\
+	{																\
+		EASTL_ATOMIC_STATIC_ASSERT_INVALID_MEMORY_ORDER(T);			\
+	}																\
+																	\
+	template <typename Order>										\
+	T* funcName(ptrdiff_t arg, Order order) volatile EA_NOEXCEPT	\
+	{																\
+		EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(T);				\
+	}																\
+																	\
+	T* funcName(ptrdiff_t arg) volatile EA_NOEXCEPT					\
+	{																\
+		EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(T);				\
+	}
+
+#define EASTL_ATOMIC_POINTER_STATIC_ASSERT_INC_DEC_OPERATOR_IMPL(operatorOp) \
+	T* operator operatorOp() volatile EA_NOEXCEPT						\
+	{																	\
+		EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(T);					\
+	}																	\
+																		\
+	T* operator operatorOp(int) volatile EA_NOEXCEPT					\
+	{																	\
+		EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(T);					\
+	}
+
+#define EASTL_ATOMIC_POINTER_STATIC_ASSERT_ASSIGNMENT_OPERATOR_IMPL(operatorOp) \
+	T* operator operatorOp(ptrdiff_t arg) volatile EA_NOEXCEPT			\
+	{																	\
+		EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(T);					\
+	}
+
+
+	template <typename T, unsigned width>
+	struct atomic_pointer_base<T*, width> : public atomic_base_width<T*, width>
+	{
+	private:
+
+		using Base = atomic_base_width<T*, width>;
+
+	public: /* ctors */
+
+		EA_CONSTEXPR atomic_pointer_base(T* desired) EA_NOEXCEPT
+			: Base{ desired }
+		{
+		}
+
+		EA_CONSTEXPR atomic_pointer_base() EA_NOEXCEPT = default;
+
+		atomic_pointer_base(const atomic_pointer_base&) EA_NOEXCEPT = delete;
+
+	public: /* assignment operators */
+
+		using Base::operator=;
+
+		atomic_pointer_base& operator=(const atomic_pointer_base&)          EA_NOEXCEPT = delete;
+		atomic_pointer_base& operator=(const atomic_pointer_base&) volatile EA_NOEXCEPT = delete;
+
+	public: /* fetch_add */
+
+		EASTL_ATOMIC_POINTER_STATIC_ASSERT_FUNCS_IMPL(fetch_add)
+
+	public: /* add_fetch */
+
+		EASTL_ATOMIC_POINTER_STATIC_ASSERT_FUNCS_IMPL(add_fetch)
+
+	public: /* fetch_sub */
+
+		EASTL_ATOMIC_POINTER_STATIC_ASSERT_FUNCS_IMPL(fetch_sub)
+
+	public: /* sub_fetch */
+
+		EASTL_ATOMIC_POINTER_STATIC_ASSERT_FUNCS_IMPL(sub_fetch)
+
+	public: /* operator++ && operator-- */
+
+		EASTL_ATOMIC_POINTER_STATIC_ASSERT_INC_DEC_OPERATOR_IMPL(++)
+
+		EASTL_ATOMIC_POINTER_STATIC_ASSERT_INC_DEC_OPERATOR_IMPL(--)
+
+	public: /* operator+= && operator-= */
+
+		EASTL_ATOMIC_POINTER_STATIC_ASSERT_ASSIGNMENT_OPERATOR_IMPL(+=)
+
+		EASTL_ATOMIC_POINTER_STATIC_ASSERT_ASSIGNMENT_OPERATOR_IMPL(-=)
+
+	};
+
+
+	template <typename T, unsigned width = sizeof(T)>
+	struct atomic_pointer_width;
+
+#define EASTL_ATOMIC_POINTER_FUNC_IMPL(op, bits)						\
+	T* retVal;															\
+	{																	\
+		ptr_integral_type retType;										\
+		ptr_integral_type addend = static_cast<ptr_integral_type>(arg) * static_cast<ptr_integral_type>(sizeof(T)); \
+																		\
+		EA_PREPROCESSOR_JOIN(op, bits)(ptr_integral_type, retType, EASTL_ATOMIC_INTEGRAL_CAST(ptr_integral_type, this->GetAtomicAddress()), addend); \
+																		\
+		retVal = reinterpret_cast<T*>(retType);							\
+	}																	\
+	return retVal;
+
+#define EASTL_ATOMIC_POINTER_FETCH_IMPL(funcName, op, bits)		\
+	T* funcName(ptrdiff_t arg) EA_NOEXCEPT						\
+	{															\
+		EASTL_ATOMIC_STATIC_ASSERT_TYPE_IS_OBJECT(T);			\
+		EASTL_ATOMIC_POINTER_FUNC_IMPL(op, bits);				\
+	}
+
+#define EASTL_ATOMIC_POINTER_FETCH_ORDER_IMPL(funcName, orderType, op, bits) \
+	T* funcName(ptrdiff_t arg, orderType) EA_NOEXCEPT					\
+	{																	\
+		EASTL_ATOMIC_STATIC_ASSERT_TYPE_IS_OBJECT(T);					\
+		EASTL_ATOMIC_POINTER_FUNC_IMPL(op, bits);						\
+	}
+
+#define EASTL_ATOMIC_POINTER_FETCH_OP_JOIN(fetchOp, Order)				\
+	EA_PREPROCESSOR_JOIN(EA_PREPROCESSOR_JOIN(EASTL_ATOMIC_, fetchOp), Order)
+
+#define EASTL_ATOMIC_POINTER_FETCH_FUNCS_IMPL(funcName, fetchOp, bits)	\
+	using Base::funcName;												\
+																		\
+	EASTL_ATOMIC_POINTER_FETCH_IMPL(funcName, EASTL_ATOMIC_POINTER_FETCH_OP_JOIN(fetchOp, _SEQ_CST_), bits) \
+																		\
+	EASTL_ATOMIC_POINTER_FETCH_ORDER_IMPL(funcName, eastl::internal::memory_order_relaxed_s, \
+										  EASTL_ATOMIC_POINTER_FETCH_OP_JOIN(fetchOp, _RELAXED_), bits) \
+																		\
+	EASTL_ATOMIC_POINTER_FETCH_ORDER_IMPL(funcName, eastl::internal::memory_order_acquire_s, \
+										  EASTL_ATOMIC_POINTER_FETCH_OP_JOIN(fetchOp, _ACQUIRE_), bits) \
+																		\
+	EASTL_ATOMIC_POINTER_FETCH_ORDER_IMPL(funcName, eastl::internal::memory_order_release_s, \
+										  EASTL_ATOMIC_POINTER_FETCH_OP_JOIN(fetchOp, _RELEASE_), bits) \
+																		\
+	EASTL_ATOMIC_POINTER_FETCH_ORDER_IMPL(funcName, eastl::internal::memory_order_acq_rel_s, \
+										  EASTL_ATOMIC_POINTER_FETCH_OP_JOIN(fetchOp, _ACQ_REL_), bits) \
+																		\
+	EASTL_ATOMIC_POINTER_FETCH_ORDER_IMPL(funcName, eastl::internal::memory_order_seq_cst_s, \
+										  EASTL_ATOMIC_POINTER_FETCH_OP_JOIN(fetchOp, _SEQ_CST_), bits)
+
+#define EASTL_ATOMIC_POINTER_FETCH_INC_DEC_OPERATOR_IMPL(operatorOp, preFuncName, postFuncName) \
+	using Base::operator operatorOp;									\
+																		\
+	T* operator operatorOp() EA_NOEXCEPT								\
+	{																	\
+		return preFuncName(1, eastl::memory_order_seq_cst);				\
+	}																	\
+																		\
+	T* operator operatorOp(int) EA_NOEXCEPT								\
+	{																	\
+		return postFuncName(1, eastl::memory_order_seq_cst);			\
+	}
+
+#define EASTL_ATOMIC_POINTER_FETCH_ASSIGNMENT_OPERATOR_IMPL(operatorOp, funcName) \
+	using Base::operator operatorOp;									\
+																		\
+	T* operator operatorOp(ptrdiff_t arg) EA_NOEXCEPT					\
+	{																	\
+		return funcName(arg, eastl::memory_order_seq_cst);				\
+	}
+
+
+#define EASTL_ATOMIC_POINTER_WIDTH_SPECIALIZE(bytes, bits)				\
+	template <typename T>												\
+	struct atomic_pointer_width<T*, bytes> : public atomic_pointer_base<T*, bytes> \
+	{																	\
+	private:															\
+																		\
+		using Base = atomic_pointer_base<T*, bytes>;					\
+		using u_ptr_integral_type = EA_PREPROCESSOR_JOIN(EA_PREPROCESSOR_JOIN(uint, bits), _t); \
+		using ptr_integral_type = EA_PREPROCESSOR_JOIN(EA_PREPROCESSOR_JOIN(int, bits), _t); \
+																		\
+	public: /* ctors */													\
+																		\
+		EA_CONSTEXPR atomic_pointer_width(T* desired) EA_NOEXCEPT		\
+			: Base{ desired }											\
+		{																\
+		}																\
+																		\
+		EA_CONSTEXPR atomic_pointer_width() EA_NOEXCEPT = default;		\
+																		\
+		atomic_pointer_width(const atomic_pointer_width&) EA_NOEXCEPT = delete; \
+																		\
+	public: /* assignment operators */									\
+																		\
+		using Base::operator=;											\
+																		\
+		atomic_pointer_width& operator=(const atomic_pointer_width&)          EA_NOEXCEPT = delete; \
+		atomic_pointer_width& operator=(const atomic_pointer_width&) volatile EA_NOEXCEPT = delete; \
+																		\
+	public: /* fetch_add */												\
+																		\
+		EASTL_ATOMIC_POINTER_FETCH_FUNCS_IMPL(fetch_add, FETCH_ADD, bits) \
+																		\
+	public: /* add_fetch */												\
+																		\
+		EASTL_ATOMIC_POINTER_FETCH_FUNCS_IMPL(add_fetch, ADD_FETCH, bits) \
+																		\
+	public: /* fetch_sub */												\
+																		\
+		EASTL_ATOMIC_POINTER_FETCH_FUNCS_IMPL(fetch_sub, FETCH_SUB, bits) \
+																		\
+	public: /* sub_fetch */												\
+																		\
+		EASTL_ATOMIC_POINTER_FETCH_FUNCS_IMPL(sub_fetch, SUB_FETCH, bits) \
+																		\
+	public: /* operator++ && operator-- */								\
+																		\
+		EASTL_ATOMIC_POINTER_FETCH_INC_DEC_OPERATOR_IMPL(++, add_fetch, fetch_add) \
+																		\
+		EASTL_ATOMIC_POINTER_FETCH_INC_DEC_OPERATOR_IMPL(--, sub_fetch, fetch_sub) \
+																		\
+	public: /* operator+= && operator-= */								\
+																		\
+		EASTL_ATOMIC_POINTER_FETCH_ASSIGNMENT_OPERATOR_IMPL(+=, add_fetch) \
+																		\
+		EASTL_ATOMIC_POINTER_FETCH_ASSIGNMENT_OPERATOR_IMPL(-=, sub_fetch) \
+																		\
+	public:																\
+																		\
+		using Base::load;												\
+																		\
+		T* load(eastl::internal::memory_order_read_depends_s) EA_NOEXCEPT \
+		{																\
+			T* retPointer;												\
+			EA_PREPROCESSOR_JOIN(EASTL_ATOMIC_LOAD_READ_DEPENDS_, bits)(T*, retPointer, this->GetAtomicAddress()); \
+			return retPointer;											\
+		}																\
+	};
+
+
+#if defined(EASTL_ATOMIC_HAS_32BIT) && EA_PLATFORM_PTR_SIZE == 4
+	EASTL_ATOMIC_POINTER_WIDTH_SPECIALIZE(4, 32)
+#endif
+
+#if defined(EASTL_ATOMIC_HAS_64BIT) && EA_PLATFORM_PTR_SIZE == 8
+	EASTL_ATOMIC_POINTER_WIDTH_SPECIALIZE(8, 64)
+#endif
+
+
+} // namespace internal
+
+
+} // namespace eastl
+
+
+#include "atomic_pop_compiler_options.h"
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_POINTER_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_pop_compiler_options.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_pop_compiler_options.h
new file mode 100644
index 00000000..92f241a1
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_pop_compiler_options.h
@@ -0,0 +1,11 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+
+/* NOTE: No Header Guard */
+
+
+EA_RESTORE_VC_WARNING();
+
+EA_RESTORE_CLANG_WARNING();
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_push_compiler_options.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_push_compiler_options.h
new file mode 100644
index 00000000..c5a54715
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_push_compiler_options.h
@@ -0,0 +1,17 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+
+/* NOTE: No Header Guard */
+
+
+// 'class' : multiple assignment operators specified
+EA_DISABLE_VC_WARNING(4522);
+
+// misaligned atomic operation may incur significant performance penalty
+// The above warning is emitted in earlier versions of clang incorrectly.
+// All eastl::atomic<T> objects are size aligned.
+// This is static and runtime asserted.
+// Thus we disable this warning.
+EA_DISABLE_CLANG_WARNING(-Watomic-alignment);
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_size_aligned.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_size_aligned.h
new file mode 100644
index 00000000..db23e478
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_size_aligned.h
@@ -0,0 +1,197 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_SIZE_ALIGNED_H
+#define EASTL_ATOMIC_INTERNAL_SIZE_ALIGNED_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+#include "atomic_push_compiler_options.h"
+
+
+namespace eastl
+{
+
+
+namespace internal
+{
+
+
+#define EASTL_ATOMIC_SIZE_ALIGNED_STATIC_ASSERT_CMPXCHG_IMPL(funcName)	\
+	template <typename OrderSuccess, typename OrderFailure>				\
+	bool funcName(T& expected, T desired,								\
+				  OrderSuccess orderSuccess,							\
+				  OrderFailure orderFailure) EA_NOEXCEPT				\
+	{																	\
+		EASTL_ATOMIC_STATIC_ASSERT_INVALID_MEMORY_ORDER(T);				\
+		return false;													\
+	}																	\
+																		\
+	template <typename OrderSuccess, typename OrderFailure>				\
+	bool funcName(T& expected, T desired,								\
+				  OrderSuccess orderSuccess,							\
+				  OrderFailure orderFailure) volatile EA_NOEXCEPT		\
+	{																	\
+		EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(T);					\
+		return false;													\
+	}																	\
+																		\
+	template <typename Order>											\
+	bool funcName(T& expected, T desired,								\
+				  Order order) EA_NOEXCEPT								\
+	{																	\
+		EASTL_ATOMIC_STATIC_ASSERT_INVALID_MEMORY_ORDER(T);				\
+		return false;													\
+	}																	\
+																		\
+	template <typename Order>											\
+	bool funcName(T& expected, T desired,								\
+				  Order order) volatile EA_NOEXCEPT						\
+	{																	\
+		EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(T);					\
+		return false;													\
+	}																	\
+																		\
+	bool funcName(T& expected, T desired) volatile EA_NOEXCEPT			\
+	{																	\
+		EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(T);					\
+		return false;													\
+	}
+
+#define EASTL_ATOMIC_SIZE_ALIGNED_STATIC_ASSERT_CMPXCHG_WEAK_IMPL()		\
+	EASTL_ATOMIC_SIZE_ALIGNED_STATIC_ASSERT_CMPXCHG_IMPL(compare_exchange_weak)
+
+#define EASTL_ATOMIC_SIZE_ALIGNED_STATIC_ASSERT_CMPXCHG_STRONG_IMPL()	\
+	EASTL_ATOMIC_SIZE_ALIGNED_STATIC_ASSERT_CMPXCHG_IMPL(compare_exchange_strong)
+
+
+	template<typename T>
+	struct atomic_size_aligned
+	{
+	public: /* ctors */
+
+		EA_CONSTEXPR atomic_size_aligned(T desired) EA_NOEXCEPT
+			: mAtomic{ desired }
+		{
+		}
+
+		EA_CONSTEXPR atomic_size_aligned() EA_NOEXCEPT_IF(eastl::is_nothrow_default_constructible_v<T>)
+			: mAtomic{} /* Value-Initialize which will Zero-Initialize Trivial Constructible types */
+		{
+		}
+
+		atomic_size_aligned(const atomic_size_aligned&) EA_NOEXCEPT = delete;
+
+	public: /* store */
+
+		template <typename Order>
+		void store(T desired, Order order) EA_NOEXCEPT
+		{
+			EASTL_ATOMIC_STATIC_ASSERT_INVALID_MEMORY_ORDER(T);
+		}
+
+		template <typename Order>
+		void store(T desired, Order order) volatile EA_NOEXCEPT
+		{
+			EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(T);
+		}
+
+		void store(T desired) volatile EA_NOEXCEPT
+		{
+			EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(T);
+		}
+
+	public: /* load */
+
+		template <typename Order>
+		T load(Order order) const EA_NOEXCEPT
+		{
+			EASTL_ATOMIC_STATIC_ASSERT_INVALID_MEMORY_ORDER(T);
+		}
+
+		template <typename Order>
+		T load(Order order) const volatile EA_NOEXCEPT
+		{
+			EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(T);
+		}
+
+		T load() const volatile EA_NOEXCEPT
+		{
+			EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(T);
+		}
+
+	public: /* exchange */
+
+		template <typename Order>
+		T exchange(T desired, Order order) EA_NOEXCEPT
+		{
+			EASTL_ATOMIC_STATIC_ASSERT_INVALID_MEMORY_ORDER(T);
+		}
+
+		template <typename Order>
+		T exchange(T desired, Order order) volatile EA_NOEXCEPT
+		{
+			EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(T);
+		}
+
+		T exchange(T desired) volatile EA_NOEXCEPT
+		{
+			EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(T);
+		}
+
+	public: /* compare_exchange_weak */
+
+		EASTL_ATOMIC_SIZE_ALIGNED_STATIC_ASSERT_CMPXCHG_WEAK_IMPL()
+
+	public: /* compare_exchange_strong */
+
+		EASTL_ATOMIC_SIZE_ALIGNED_STATIC_ASSERT_CMPXCHG_STRONG_IMPL()
+
+	public: /* assignment operator */
+
+		T operator=(T desired) volatile EA_NOEXCEPT
+		{
+			EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(T);
+		}
+
+		atomic_size_aligned& operator=(const atomic_size_aligned&)          EA_NOEXCEPT = delete;
+		atomic_size_aligned& operator=(const atomic_size_aligned&) volatile EA_NOEXCEPT = delete;
+
+	protected: /* Accessors */
+
+		T* GetAtomicAddress() const EA_NOEXCEPT
+		{
+			return eastl::addressof(mAtomic);
+		}
+
+	private:
+
+		/**
+		 * Some compilers such as MSVC will align 64-bit values on 32-bit machines on
+		 * 4-byte boundaries which can ruin the atomicity guarantees.
+		 *
+		 * Ensure everything is size aligned.
+		 *
+		 * mutable is needed in cases such as when loads are only guaranteed to be atomic
+		 * using a compare exchange, such as for 128-bit atomics, so we need to be able
+		 * to have write access to the variable as one example.
+		 */
+		EA_ALIGN(sizeof(T)) mutable T mAtomic;
+	};
+
+
+} // namespace internal
+
+
+} // namespace eastl
+
+
+#include "atomic_pop_compiler_options.h"
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_SIZE_ALIGNED_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_standalone.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_standalone.h
new file mode 100644
index 00000000..011d5fb3
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_standalone.h
@@ -0,0 +1,470 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_STANDALONE_H
+#define EASTL_ATOMIC_INTERNAL_STANDALONE_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+namespace eastl
+{
+
+
+////////////////////////////////////////////////////////////////////////////////
+//
+// bool atomic_compare_exchange_strong(eastl::atomic<T>*, T* expected, T desired)
+//
+template <typename T>
+EASTL_FORCE_INLINE bool atomic_compare_exchange_strong(eastl::atomic<T>* atomicObj,
+													   typename eastl::atomic<T>::value_type* expected,
+													   typename eastl::atomic<T>::value_type desired) EA_NOEXCEPT
+{
+	return atomicObj->compare_exchange_strong(*expected, desired);
+}
+
+template <typename T, typename OrderSuccess, typename OrderFailure>
+EASTL_FORCE_INLINE bool atomic_compare_exchange_strong_explicit(eastl::atomic<T>* atomicObj,
+																typename eastl::atomic<T>::value_type* expected,
+																typename eastl::atomic<T>::value_type desired,
+																OrderSuccess orderSuccess, OrderFailure orderFailure) EA_NOEXCEPT
+{
+	return atomicObj->compare_exchange_strong(*expected, desired, orderSuccess, orderFailure);
+}
+
+
+////////////////////////////////////////////////////////////////////////////////
+//
+// bool atomic_compare_exchange_weak(eastl::atomic<T>*, T* expected, T desired)
+//
+template <typename T>
+EASTL_FORCE_INLINE bool atomic_compare_exchange_weak(eastl::atomic<T>* atomicObj,
+													 typename eastl::atomic<T>::value_type* expected,
+													 typename eastl::atomic<T>::value_type desired) EA_NOEXCEPT
+{
+	return atomicObj->compare_exchange_weak(*expected, desired);
+}
+
+template <typename T, typename OrderSuccess, typename OrderFailure>
+EASTL_FORCE_INLINE bool atomic_compare_exchange_weak_explicit(eastl::atomic<T>* atomicObj,
+															  typename eastl::atomic<T>::value_type* expected,
+															  typename eastl::atomic<T>::value_type desired,
+															  OrderSuccess orderSuccess, OrderFailure orderFailure) EA_NOEXCEPT
+{
+	return atomicObj->compare_exchange_weak(*expected, desired, orderSuccess, orderFailure);
+}
+
+
+////////////////////////////////////////////////////////////////////////////////
+//
+// T atomic_fetch_xor(eastl::atomic<T>*, T arg)
+//
+template <typename T>
+EASTL_FORCE_INLINE typename eastl::atomic<T>::value_type atomic_fetch_xor(eastl::atomic<T>* atomicObj,
+																		  typename eastl::atomic<T>::value_type arg) EA_NOEXCEPT
+{
+	return atomicObj->fetch_xor(arg);
+}
+
+template <typename T, typename Order>
+EASTL_FORCE_INLINE typename eastl::atomic<T>::value_type atomic_fetch_xor_explicit(eastl::atomic<T>* atomicObj,
+																				   typename eastl::atomic<T>::value_type arg,
+																				   Order order) EA_NOEXCEPT
+{
+	return atomicObj->fetch_xor(arg, order);
+}
+
+
+////////////////////////////////////////////////////////////////////////////////
+//
+// T atomic_xor_fetch(eastl::atomic<T>*, T arg)
+//
+template <typename T>
+EASTL_FORCE_INLINE typename eastl::atomic<T>::value_type atomic_xor_fetch(eastl::atomic<T>* atomicObj,
+																		  typename eastl::atomic<T>::value_type arg) EA_NOEXCEPT
+{
+	return atomicObj->xor_fetch(arg);
+}
+
+template <typename T, typename Order>
+EASTL_FORCE_INLINE typename eastl::atomic<T>::value_type atomic_xor_fetch_explicit(eastl::atomic<T>* atomicObj,
+																				   typename eastl::atomic<T>::value_type arg,
+																				   Order order) EA_NOEXCEPT
+{
+	return atomicObj->xor_fetch(arg, order);
+}
+
+
+////////////////////////////////////////////////////////////////////////////////
+//
+// T atomic_fetch_or(eastl::atomic<T>*, T arg)
+//
+template <typename T>
+EASTL_FORCE_INLINE typename eastl::atomic<T>::value_type atomic_fetch_or(eastl::atomic<T>* atomicObj,
+																		 typename eastl::atomic<T>::value_type arg) EA_NOEXCEPT
+{
+	return atomicObj->fetch_or(arg);
+}
+
+template <typename T, typename Order>
+EASTL_FORCE_INLINE typename eastl::atomic<T>::value_type atomic_fetch_or_explicit(eastl::atomic<T>* atomicObj,
+																				  typename eastl::atomic<T>::value_type arg,
+																				  Order order) EA_NOEXCEPT
+{
+	return atomicObj->fetch_or(arg, order);
+}
+
+
+////////////////////////////////////////////////////////////////////////////////
+//
+// T atomic_or_fetch(eastl::atomic<T>*, T arg)
+//
+template <typename T>
+EASTL_FORCE_INLINE typename eastl::atomic<T>::value_type atomic_or_fetch(eastl::atomic<T>* atomicObj,
+																		 typename eastl::atomic<T>::value_type arg) EA_NOEXCEPT
+{
+	return atomicObj->or_fetch(arg);
+}
+
+template <typename T, typename Order>
+EASTL_FORCE_INLINE typename eastl::atomic<T>::value_type atomic_or_fetch_explicit(eastl::atomic<T>* atomicObj,
+																				  typename eastl::atomic<T>::value_type arg,
+																				  Order order) EA_NOEXCEPT
+{
+	return atomicObj->or_fetch(arg, order);
+}
+
+
+////////////////////////////////////////////////////////////////////////////////
+//
+// T atomic_fetch_and(eastl::atomic<T>*, T arg)
+//
+template <typename T>
+EASTL_FORCE_INLINE typename eastl::atomic<T>::value_type atomic_fetch_and(eastl::atomic<T>* atomicObj,
+																		  typename eastl::atomic<T>::value_type arg) EA_NOEXCEPT
+{
+	return atomicObj->fetch_and(arg);
+}
+
+template <typename T, typename Order>
+EASTL_FORCE_INLINE typename eastl::atomic<T>::value_type atomic_fetch_and_explicit(eastl::atomic<T>* atomicObj,
+																				   typename eastl::atomic<T>::value_type arg,
+																				   Order order) EA_NOEXCEPT
+{
+	return atomicObj->fetch_and(arg, order);
+}
+
+
+////////////////////////////////////////////////////////////////////////////////
+//
+// T atomic_and_fetch(eastl::atomic<T>*, T arg)
+//
+template <typename T>
+EASTL_FORCE_INLINE typename eastl::atomic<T>::value_type atomic_and_fetch(eastl::atomic<T>* atomicObj,
+																		  typename eastl::atomic<T>::value_type arg) EA_NOEXCEPT
+{
+	return atomicObj->and_fetch(arg);
+}
+
+template <typename T, typename Order>
+EASTL_FORCE_INLINE typename eastl::atomic<T>::value_type atomic_and_fetch_explicit(eastl::atomic<T>* atomicObj,
+																				   typename eastl::atomic<T>::value_type arg,
+																				   Order order) EA_NOEXCEPT
+{
+	return atomicObj->and_fetch(arg, order);
+}
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// T atomic_fetch_sub(eastl::atomic<T>*, T arg)
+//
+template <typename T>
+EASTL_FORCE_INLINE typename eastl::atomic<T>::value_type atomic_fetch_sub(eastl::atomic<T>* atomicObj,
+																		  typename eastl::atomic<T>::difference_type arg) EA_NOEXCEPT
+{
+	return atomicObj->fetch_sub(arg);
+}
+
+template <typename T, typename Order>
+EASTL_FORCE_INLINE typename eastl::atomic<T>::value_type atomic_fetch_sub_explicit(eastl::atomic<T>* atomicObj,
+																				   typename eastl::atomic<T>::difference_type arg,
+																				   Order order) EA_NOEXCEPT
+{
+	return atomicObj->fetch_sub(arg, order);
+}
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// T atomic_sub_fetch(eastl::atomic<T>*, T arg)
+//
+template <typename T>
+EASTL_FORCE_INLINE typename eastl::atomic<T>::value_type atomic_sub_fetch(eastl::atomic<T>* atomicObj,
+																		  typename eastl::atomic<T>::difference_type arg) EA_NOEXCEPT
+{
+	return atomicObj->sub_fetch(arg);
+}
+
+template <typename T, typename Order>
+EASTL_FORCE_INLINE typename eastl::atomic<T>::value_type atomic_sub_fetch_explicit(eastl::atomic<T>* atomicObj,
+																				   typename eastl::atomic<T>::difference_type arg,
+																				   Order order) EA_NOEXCEPT
+{
+	return atomicObj->sub_fetch(arg, order);
+}
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// T atomic_fetch_add(eastl::atomic<T>*, T arg)
+//
+template <typename T>
+EASTL_FORCE_INLINE typename eastl::atomic<T>::value_type atomic_fetch_add(eastl::atomic<T>* atomicObj,
+																		  typename eastl::atomic<T>::difference_type arg) EA_NOEXCEPT
+{
+	return atomicObj->fetch_add(arg);
+}
+
+template <typename T, typename Order>
+EASTL_FORCE_INLINE typename eastl::atomic<T>::value_type atomic_fetch_add_explicit(eastl::atomic<T>* atomicObj,
+																				   typename eastl::atomic<T>::difference_type arg,
+																				   Order order) EA_NOEXCEPT
+{
+	return atomicObj->fetch_add(arg, order);
+}
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// T atomic_add_fetch(eastl::atomic<T>*, T arg)
+//
+template <typename T>
+EASTL_FORCE_INLINE typename eastl::atomic<T>::value_type atomic_add_fetch(eastl::atomic<T>* atomicObj,
+																		  typename eastl::atomic<T>::difference_type arg) EA_NOEXCEPT
+{
+	return atomicObj->add_fetch(arg);
+}
+
+template <typename T, typename Order>
+EASTL_FORCE_INLINE typename eastl::atomic<T>::value_type atomic_add_fetch_explicit(eastl::atomic<T>* atomicObj,
+																				   typename eastl::atomic<T>::difference_type arg,
+																				   Order order) EA_NOEXCEPT
+{
+	return atomicObj->add_fetch(arg, order);
+}
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// T atomic_exchange(eastl::atomic<T>*, T desired)
+//
+template <typename T>
+EASTL_FORCE_INLINE typename eastl::atomic<T>::value_type atomic_exchange(eastl::atomic<T>* atomicObj,
+																		 typename eastl::atomic<T>::value_type desired) EA_NOEXCEPT
+{
+	return atomicObj->exchange(desired);
+}
+
+template <typename T, typename Order>
+EASTL_FORCE_INLINE typename eastl::atomic<T>::value_type atomic_exchange_explicit(eastl::atomic<T>* atomicObj,
+																				  typename eastl::atomic<T>::value_type desired,
+																				  Order order) EA_NOEXCEPT
+{
+	return atomicObj->exchange(desired, order);
+}
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// T atomic_load(const eastl::atomic<T>*)
+//
+template <typename T>
+EASTL_FORCE_INLINE typename eastl::atomic<T>::value_type atomic_load(const eastl::atomic<T>* atomicObj) EA_NOEXCEPT
+{
+	return atomicObj->load();
+}
+
+template <typename T, typename Order>
+EASTL_FORCE_INLINE typename eastl::atomic<T>::value_type atomic_load_explicit(const eastl::atomic<T>* atomicObj, Order order) EA_NOEXCEPT
+{
+	return atomicObj->load(order);
+}
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// T atomic_load_cond(const eastl::atomic<T>*)
+//
+template <typename T, typename Predicate>
+EASTL_FORCE_INLINE typename eastl::atomic<T>::value_type atomic_load_cond(const eastl::atomic<T>* atomicObj, Predicate pred) EA_NOEXCEPT
+{
+	for (;;)
+	{
+		typename eastl::atomic<T>::value_type ret = atomicObj->load();
+
+		if (pred(ret))
+		{
+			return ret;
+		}
+
+		EASTL_ATOMIC_CPU_PAUSE();
+	}
+}
+
+template <typename T, typename Predicate, typename Order>
+EASTL_FORCE_INLINE typename eastl::atomic<T>::value_type atomic_load_cond_explicit(const eastl::atomic<T>* atomicObj, Predicate pred, Order order) EA_NOEXCEPT
+{
+	for (;;)
+	{
+		typename eastl::atomic<T>::value_type ret = atomicObj->load(order);
+
+		if (pred(ret))
+		{
+			return ret;
+		}
+
+		EASTL_ATOMIC_CPU_PAUSE();
+	}
+}
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void atomic_store(eastl::atomic<T>*, T)
+//
+template <typename T>
+EASTL_FORCE_INLINE void atomic_store(eastl::atomic<T>* atomicObj, typename eastl::atomic<T>::value_type desired) EA_NOEXCEPT
+{
+	atomicObj->store(desired);
+}
+
+template <typename T, typename Order>
+EASTL_FORCE_INLINE void atomic_store_explicit(eastl::atomic<T>* atomicObj, typename eastl::atomic<T>::value_type desired, Order order) EA_NOEXCEPT
+{
+	atomicObj->store(desired, order);
+}
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void eastl::atomic_thread_fence(Order)
+//
+template <typename Order>
+EASTL_FORCE_INLINE void atomic_thread_fence(Order) EA_NOEXCEPT
+{
+	EASTL_ATOMIC_STATIC_ASSERT_INVALID_MEMORY_ORDER(Order);
+}
+
+EASTL_FORCE_INLINE void atomic_thread_fence(eastl::internal::memory_order_relaxed_s) EA_NOEXCEPT
+{
+	EASTL_ATOMIC_THREAD_FENCE_RELAXED();
+}
+
+EASTL_FORCE_INLINE void atomic_thread_fence(eastl::internal::memory_order_acquire_s) EA_NOEXCEPT
+{
+	EASTL_ATOMIC_THREAD_FENCE_ACQUIRE();
+}
+
+EASTL_FORCE_INLINE void atomic_thread_fence(eastl::internal::memory_order_release_s) EA_NOEXCEPT
+{
+	EASTL_ATOMIC_THREAD_FENCE_RELEASE();
+}
+
+EASTL_FORCE_INLINE void atomic_thread_fence(eastl::internal::memory_order_acq_rel_s) EA_NOEXCEPT
+{
+	EASTL_ATOMIC_THREAD_FENCE_ACQ_REL();
+}
+
+EASTL_FORCE_INLINE void atomic_thread_fence(eastl::internal::memory_order_seq_cst_s) EA_NOEXCEPT
+{
+	EASTL_ATOMIC_THREAD_FENCE_SEQ_CST();
+}
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void eastl::atomic_signal_fence(Order)
+//
+template <typename Order>
+EASTL_FORCE_INLINE void atomic_signal_fence(Order) EA_NOEXCEPT
+{
+	EASTL_ATOMIC_STATIC_ASSERT_INVALID_MEMORY_ORDER(Order);
+}
+
+EASTL_FORCE_INLINE void atomic_signal_fence(eastl::internal::memory_order_relaxed_s) EA_NOEXCEPT
+{
+	EASTL_ATOMIC_SIGNAL_FENCE_RELAXED();
+}
+
+EASTL_FORCE_INLINE void atomic_signal_fence(eastl::internal::memory_order_acquire_s) EA_NOEXCEPT
+{
+	EASTL_ATOMIC_SIGNAL_FENCE_ACQUIRE();
+}
+
+EASTL_FORCE_INLINE void atomic_signal_fence(eastl::internal::memory_order_release_s) EA_NOEXCEPT
+{
+	EASTL_ATOMIC_SIGNAL_FENCE_RELEASE();
+}
+
+EASTL_FORCE_INLINE void atomic_signal_fence(eastl::internal::memory_order_acq_rel_s) EA_NOEXCEPT
+{
+	EASTL_ATOMIC_SIGNAL_FENCE_ACQ_REL();
+}
+
+EASTL_FORCE_INLINE void atomic_signal_fence(eastl::internal::memory_order_seq_cst_s) EA_NOEXCEPT
+{
+	EASTL_ATOMIC_SIGNAL_FENCE_SEQ_CST();
+}
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void eastl::compiler_barrier()
+//
+EASTL_FORCE_INLINE void compiler_barrier() EA_NOEXCEPT
+{
+	EASTL_ATOMIC_COMPILER_BARRIER();
+}
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void eastl::compiler_barrier_data_dependency(const T&)
+//
+template <typename T>
+EASTL_FORCE_INLINE void compiler_barrier_data_dependency(const T& val) EA_NOEXCEPT
+{
+	EASTL_ATOMIC_COMPILER_BARRIER_DATA_DEPENDENCY(val, T);
+}
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void eastl::cpu_pause()
+//
+EASTL_FORCE_INLINE void cpu_pause() EA_NOEXCEPT
+{
+	EASTL_ATOMIC_CPU_PAUSE();
+}
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// bool eastl::atomic_is_lock_free(eastl::atomic<T>*)
+//
+template <typename T>
+EASTL_FORCE_INLINE bool atomic_is_lock_free(const eastl::atomic<T>* atomicObj) EA_NOEXCEPT
+{
+	return atomicObj->is_lock_free();
+}
+
+
+} // namespace eastl
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_STANDALONE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler.h
new file mode 100644
index 00000000..65a4cd00
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler.h
@@ -0,0 +1,120 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// Include the compiler specific implementations
+//
+#if defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG)
+
+	#include "gcc/compiler_gcc.h"
+
+#elif defined(EA_COMPILER_MSVC)
+
+	#include "msvc/compiler_msvc.h"
+
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+
+
+namespace eastl
+{
+
+
+namespace internal
+{
+
+
+/**
+ * NOTE:
+ *
+ * This can be used by specific compiler implementations to implement a data dependency compiler barrier.
+ * Some compiler barriers do not take in input dependencies as is possible with the gcc asm syntax.
+ * Thus we need a way to create a false dependency on the input variable so the compiler does not dead-store
+ * remove it.
+ * A volatile function pointer ensures the compiler must always load the function pointer and call thru it
+ * since the compiler cannot reason about any side effects. Thus the compiler must always assume the
+ * input variable may be accessed and thus cannot be dead-stored. This technique works even in the presence
+ * of Link-Time Optimization. A compiler barrier with a data dependency is useful in these situations.
+ *
+ * void foo()
+ * {
+ *    eastl::vector<int> v;
+ *    while (Benchmark.ContinueRunning())
+ *    {
+ *      v.push_back(0);
+ *      eastl::compiler_barrier(); OR eastl::compiler_barrier_data_dependency(v);
+ *    }
+ * }
+ *
+ * We are trying to benchmark the push_back function of a vector. The vector v has only local scope.
+ * The compiler is well within its writes to remove all accesses to v even with the compiler barrier
+ * because there are no observable uses of the vector v.
+ * The compiler barrier data dependency ensures there is an input dependency on the variable so that
+ * it isn't removed. This is also useful when writing test code that the compiler may remove.
+ */
+
+typedef void (*CompilerBarrierDataDependencyFuncPtr)(void*);
+
+extern EASTL_API volatile CompilerBarrierDataDependencyFuncPtr gCompilerBarrierDataDependencyFunc;
+
+
+#define EASTL_COMPILER_ATOMIC_COMPILER_BARRIER_DATA_DEPENDENCY_FUNC(ptr) \
+	eastl::internal::gCompilerBarrierDataDependencyFunc(ptr)
+
+
+} // namespace internal
+
+
+} // namespace eastl
+
+
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#include "compiler_fetch_add.h"
+#include "compiler_fetch_sub.h"
+
+#include "compiler_fetch_and.h"
+#include "compiler_fetch_xor.h"
+#include "compiler_fetch_or.h"
+
+#include "compiler_add_fetch.h"
+#include "compiler_sub_fetch.h"
+
+#include "compiler_and_fetch.h"
+#include "compiler_xor_fetch.h"
+#include "compiler_or_fetch.h"
+
+#include "compiler_exchange.h"
+
+#include "compiler_cmpxchg_weak.h"
+#include "compiler_cmpxchg_strong.h"
+
+#include "compiler_load.h"
+#include "compiler_store.h"
+
+#include "compiler_barrier.h"
+
+#include "compiler_cpu_pause.h"
+
+#include "compiler_memory_barrier.h"
+
+#include "compiler_signal_fence.h"
+
+#include "compiler_thread_fence.h"
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_add_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_add_fetch.h
new file mode 100644
index 00000000..763921c4
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_add_fetch.h
@@ -0,0 +1,173 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_ADD_FETCH_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_ADD_FETCH_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_ADD_FETCH_*_N(type, type ret, type * ptr, type val)
+//
+#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_8)
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_8)
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_8)
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_8)
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_8)
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_8_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_16)
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_16)
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_16)
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_16)
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_16)
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_16_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_32)
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_32)
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_32)
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_32)
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_32)
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_32_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_64)
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_64)
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_64)
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_64)
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_64)
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_64_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_128)
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_128)
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_128)
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_128)
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_128)
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_128_AVAILABLE 0
+#endif
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_ADD_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_and_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_and_fetch.h
new file mode 100644
index 00000000..7b1e0a42
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_and_fetch.h
@@ -0,0 +1,173 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_AND_FETCH_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_AND_FETCH_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_AND_FETCH_*_N(type, type ret, type * ptr, type val)
+//
+#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_8)
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_8)
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_8)
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_8)
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_8)
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_8_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_16)
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_16)
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_16)
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_16)
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_16)
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_16_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_32)
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_32)
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_32)
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_32)
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_32)
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_32_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_64)
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_64)
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_64)
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_64)
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_64)
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_64_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_128)
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_128)
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_128)
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_128)
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_128)
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_128_AVAILABLE 0
+#endif
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_AND_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_barrier.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_barrier.h
new file mode 100644
index 00000000..550070e3
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_barrier.h
@@ -0,0 +1,36 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_BARRIER_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_BARRIER_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_COMPILER_BARRIER()
+//
+#if defined(EASTL_COMPILER_ATOMIC_COMPILER_BARRIER)
+	#define EASTL_COMPILER_ATOMIC_COMPILER_BARRIER_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_COMPILER_BARRIER_AVAILABLE 0
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_COMPILER_BARRIER_DATA_DEPENDENCY(const T&, type)
+//
+#if defined(EASTL_COMPILER_ATOMIC_COMPILER_BARRIER_DATA_DEPENDENCY)
+	#define EASTL_COMPILER_ATOMIC_COMPILER_BARRIER_DATA_DEPENDENCY_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_COMPILER_BARRIER_DATA_DEPENDENCY_AVAILABLE 0
+#endif
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_BARRIER_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_cmpxchg_strong.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_cmpxchg_strong.h
new file mode 100644
index 00000000..2ee29711
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_cmpxchg_strong.h
@@ -0,0 +1,430 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_CMPXCHG_STRONG_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_CMPXCHG_STRONG_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_*_*_N(type, bool ret, type * ptr, type * expected, type desired)
+//
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_8)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_8)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_8)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_8)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_8)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_8)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_8)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_8)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_8)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_8_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_16)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_16)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_16)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_16)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_16)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_16)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_16)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_16)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_16)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_16_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_32)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_32)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_32)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_32)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_32)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_32)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_32)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_32)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_32)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_32_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_64)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_64)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_64)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_64)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_64)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_64)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_64)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_64)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_64)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_64_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_128)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_128)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_128)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_128)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_128)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_128)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_128)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_128)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_128)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_128_AVAILABLE 0
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_*_N(type, bool ret, type * ptr, type * expected, type desired)
+//
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_8_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_8_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_8(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_8(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_8_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_8_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_8(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_8(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_8_AVAILABLE	\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_8_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_8(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_8(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_8_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_8_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_8(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_8(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_8_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_8_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_8(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_8(type, ret, ptr, expected, desired)
+
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_16_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_16_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_16(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_16(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_16_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_16_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_16(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_16(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_16_AVAILABLE	\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_16_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_16(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_16(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_16_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_16_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_16(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_16(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_16_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_16_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_16(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_16(type, ret, ptr, expected, desired)
+
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_32_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_32_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_32(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_32(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_32_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_32_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_32(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_32(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_32_AVAILABLE	\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_32_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_32(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_32(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_32_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_32_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_32(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_32(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_32_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_32_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_32(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_32(type, ret, ptr, expected, desired)
+
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_64_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_64_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_64(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_64(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_64_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_64_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_64(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_64(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_64_AVAILABLE	\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_64_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_64(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_64(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_64_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_64_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_64(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_64(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_64_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_64_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_64(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_64(type, ret, ptr, expected, desired)
+
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_128_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_128_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_128(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_128(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_128_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_128_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_128(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_128(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_128_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_128_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_128(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_128(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_128_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_128_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_128(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_128(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_128_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_128_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_128(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_128(type, ret, ptr, expected, desired)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_CMPXCHG_STRONG_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_cmpxchg_weak.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_cmpxchg_weak.h
new file mode 100644
index 00000000..9bc1a621
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_cmpxchg_weak.h
@@ -0,0 +1,430 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_CMPXCHG_WEAK_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_CMPXCHG_WEAK_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_*_*_N(type, bool ret, type * ptr, type * expected, type desired)
+//
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_8)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_8)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_8)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_8)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_8)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_8)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_8)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_8)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_8)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_8_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_16)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_16)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_16)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_16)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_16)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_16)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_16)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_16)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_16)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_16_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_32)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_32)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_32)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_32)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_32)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_32)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_32)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_32)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_32)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_32_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_64)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_64)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_64)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_64)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_64)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_64)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_64)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_64)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_64)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_64_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_128)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_128)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_128)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_128)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_128)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_128)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_128)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_128)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_128)
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_128_AVAILABLE 0
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_*_N(type, bool ret, type * ptr, type * expected, type desired)
+//
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_8_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_8_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_8(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_8(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_8_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_8_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_8(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_8(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_8_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_8_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_8(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_8(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_8_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_8_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_8(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_8(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_8_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_8_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_8(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_8(type, ret, ptr, expected, desired)
+
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_16_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_16_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_16(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_16(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_16_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_16_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_16(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_16(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_16_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_16_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_16(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_16(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_16_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_16_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_16(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_16(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_16_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_16_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_16(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_16(type, ret, ptr, expected, desired)
+
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_32_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_32_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_32(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_32(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_32_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_32_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_32(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_32(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_32_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_32_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_32(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_32(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_32_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_32_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_32(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_32(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_32_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_32_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_32(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_32(type, ret, ptr, expected, desired)
+
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_64_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_64_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_64(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_64(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_64_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_64_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_64(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_64(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_64_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_64_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_64(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_64(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_64_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_64_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_64(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_64(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_64_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_64_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_64(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_64(type, ret, ptr, expected, desired)
+
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_128_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_128_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_128(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_128(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_128_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_128_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_128(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_128(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_128_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_128_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_128(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_128(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_128_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_128_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_128(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_128(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_128_AVAILABLE		\
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_128_AVAILABLE
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_128(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_128(type, ret, ptr, expected, desired)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_CMPXCHG_WEAK_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_cpu_pause.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_cpu_pause.h
new file mode 100644
index 00000000..073b3fbb
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_cpu_pause.h
@@ -0,0 +1,32 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_CPU_PAUSE_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_CPU_PAUSE_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_CPU_PAUSE()
+//
+#if defined(EASTL_COMPILER_ATOMIC_CPU_PAUSE)
+
+	#define EASTL_COMPILER_ATOMIC_CPU_PAUSE_AVAILABLE 1
+
+#else
+
+	#define EASTL_COMPILER_ATOMIC_CPU_PAUSE()		\
+		((void)0)
+
+	#define EASTL_COMPILER_ATOMIC_CPU_PAUSE_AVAILABLE 1
+
+#endif
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_CPU_PAUSE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_exchange.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_exchange.h
new file mode 100644
index 00000000..d82b199d
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_exchange.h
@@ -0,0 +1,173 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_EXCHANGE_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_EXCHANGE_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_EXCHANGE_*_N(type, type ret, type * ptr, type val)
+//
+#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_8)
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_8)
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_8)
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_8)
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_8)
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_8_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_16)
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_16)
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_16)
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_16)
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_16)
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_16_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_32)
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_32)
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_32)
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_32)
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_32)
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_32_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_64)
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_64)
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_64)
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_64)
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_64)
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_64_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_128)
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_128)
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_128)
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_128)
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_128)
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_128_AVAILABLE 0
+#endif
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_EXCHANGE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_add.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_add.h
new file mode 100644
index 00000000..e6c4238f
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_add.h
@@ -0,0 +1,173 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_ADD_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_ADD_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_FETCH_ADD_*_N(type, type ret, type * ptr, type val)
+//
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_8)
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_8)
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_8)
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_8)
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_8)
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_8_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_16)
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_16)
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_16)
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_16)
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_16)
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_16_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_32)
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_32)
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_32)
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_32)
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_32)
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_32_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_64)
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_64)
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_64)
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_64)
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_64)
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_64_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_128)
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_128)
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_128)
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_128)
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_128)
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_128_AVAILABLE 0
+#endif
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_ADD_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_and.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_and.h
new file mode 100644
index 00000000..b0976fc7
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_and.h
@@ -0,0 +1,173 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_AND_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_AND_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_FETCH_AND_*_N(type, type ret, type * ptr, type val)
+//
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_8)
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_8)
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_8)
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_8)
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_8)
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_8_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_16)
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_16)
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_16)
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_16)
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_16)
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_16_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_32)
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_32)
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_32)
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_32)
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_32)
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_32_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_64)
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_64)
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_64)
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_64)
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_64)
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_64_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_128)
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_128)
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_128)
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_128)
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_128)
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_128_AVAILABLE 0
+#endif
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_AND_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_or.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_or.h
new file mode 100644
index 00000000..2e6cfdac
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_or.h
@@ -0,0 +1,173 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_OR_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_OR_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_FETCH_OR_*_N(type, type ret, type * ptr, type val)
+//
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_8)
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_8)
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_8)
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_8)
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_8)
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_8_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_16)
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_16)
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_16)
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_16)
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_16)
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_16_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_32)
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_32)
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_32)
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_32)
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_32)
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_32_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_64)
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_64)
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_64)
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_64)
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_64)
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_64_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_128)
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_128)
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_128)
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_128)
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_128)
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_128_AVAILABLE 0
+#endif
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_OR_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_sub.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_sub.h
new file mode 100644
index 00000000..d7ed86cc
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_sub.h
@@ -0,0 +1,173 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_SUB_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_SUB_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_FETCH_SUB_*_N(type, type ret, type * ptr, type val)
+//
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_8)
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_8)
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_8)
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_8)
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_8)
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_8_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_16)
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_16)
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_16)
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_16)
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_16)
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_16_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_32)
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_32)
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_32)
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_32)
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_32)
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_32_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_64)
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_64)
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_64)
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_64)
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_64)
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_64_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_128)
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_128)
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_128)
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_128)
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_128)
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_128_AVAILABLE 0
+#endif
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_SUB_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_xor.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_xor.h
new file mode 100644
index 00000000..10cf7d90
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_xor.h
@@ -0,0 +1,173 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_XOR_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_XOR_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_FETCH_XOR_*_N(type, type ret, type * ptr, type val)
+//
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_8)
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_8)
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_8)
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_8)
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_8)
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_8_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_16)
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_16)
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_16)
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_16)
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_16)
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_16_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_32)
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_32)
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_32)
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_32)
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_32)
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_32_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_64)
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_64)
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_64)
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_64)
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_64)
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_64_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_128)
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_128)
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_128)
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_128)
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_128)
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_128_AVAILABLE 0
+#endif
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_XOR_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_load.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_load.h
new file mode 100644
index 00000000..734dbb80
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_load.h
@@ -0,0 +1,139 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_LOAD_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_LOAD_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_LOAD_*_N(type, type ret, type * ptr)
+//
+#if defined(EASTL_COMPILER_ATOMIC_LOAD_RELAXED_8)
+	#define EASTL_COMPILER_ATOMIC_LOAD_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_LOAD_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_LOAD_ACQUIRE_8)
+	#define EASTL_COMPILER_ATOMIC_LOAD_ACQUIRE_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_LOAD_ACQUIRE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_LOAD_SEQ_CST_8)
+	#define EASTL_COMPILER_ATOMIC_LOAD_SEQ_CST_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_LOAD_SEQ_CST_8_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_LOAD_RELAXED_16)
+	#define EASTL_COMPILER_ATOMIC_LOAD_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_LOAD_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_LOAD_ACQUIRE_16)
+	#define EASTL_COMPILER_ATOMIC_LOAD_ACQUIRE_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_LOAD_ACQUIRE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_LOAD_SEQ_CST_16)
+	#define EASTL_COMPILER_ATOMIC_LOAD_SEQ_CST_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_LOAD_SEQ_CST_16_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_LOAD_RELAXED_32)
+	#define EASTL_COMPILER_ATOMIC_LOAD_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_LOAD_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_LOAD_ACQUIRE_32)
+	#define EASTL_COMPILER_ATOMIC_LOAD_ACQUIRE_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_LOAD_ACQUIRE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_LOAD_SEQ_CST_32)
+	#define EASTL_COMPILER_ATOMIC_LOAD_SEQ_CST_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_LOAD_SEQ_CST_32_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_LOAD_RELAXED_64)
+	#define EASTL_COMPILER_ATOMIC_LOAD_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_LOAD_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_LOAD_ACQUIRE_64)
+	#define EASTL_COMPILER_ATOMIC_LOAD_ACQUIRE_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_LOAD_ACQUIRE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_LOAD_SEQ_CST_64)
+	#define EASTL_COMPILER_ATOMIC_LOAD_SEQ_CST_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_LOAD_SEQ_CST_64_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_LOAD_RELAXED_128)
+	#define EASTL_COMPILER_ATOMIC_LOAD_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_LOAD_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_LOAD_ACQUIRE_128)
+	#define EASTL_COMPILER_ATOMIC_LOAD_ACQUIRE_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_LOAD_ACQUIRE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_LOAD_SEQ_CST_128)
+	#define EASTL_COMPILER_ATOMIC_LOAD_SEQ_CST_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_LOAD_SEQ_CST_128_AVAILABLE 0
+#endif
+
+
+/**
+ * NOTE:
+ *
+ * These are used for data-dependent reads thru a pointer. It is safe
+ * to assume that pointer-sized reads are atomic on any given platform.
+ * This implementation assumes the hardware doesn't reorder dependent
+ * loads unlike the DEC Alpha.
+ */
+#define EASTL_COMPILER_ATOMIC_LOAD_READ_DEPENDS_N(type, ret, ptr)		\
+	{																	\
+		static_assert(eastl::is_pointer_v<type>, "eastl::atomic<T> : Read Depends Type must be a Pointer Type!"); \
+		static_assert(eastl::is_pointer_v<eastl::remove_pointer_t<decltype(ptr)>>, "eastl::atomic<T> : Read Depends Ptr must be a Pointer to a Pointer!"); \
+																		\
+		ret = (*EASTL_ATOMIC_VOLATILE_CAST(ptr));						\
+	}
+
+#define EASTL_COMPILER_ATOMIC_LOAD_READ_DEPENDS_32(type, ret, ptr)	\
+	EASTL_COMPILER_ATOMIC_LOAD_READ_DEPENDS_N(type, ret, ptr)
+
+#define EASTL_COMPILER_ATOMIC_LOAD_READ_DEPENDS_64(type, ret, ptr)	\
+	EASTL_COMPILER_ATOMIC_LOAD_READ_DEPENDS_N(type, ret, ptr)
+
+#define EASTL_COMPILER_ATOMIC_LOAD_READ_DEPENDS_32_AVAILABLE 1
+#define EASTL_COMPILER_ATOMIC_LOAD_READ_DEPENDS_64_AVAILABLE 1
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_LOAD_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_memory_barrier.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_memory_barrier.h
new file mode 100644
index 00000000..ac3923c6
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_memory_barrier.h
@@ -0,0 +1,47 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MEMORY_BARRIER_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_MEMORY_BARRIER_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_CPU_MB()
+//
+#if defined(EASTL_COMPILER_ATOMIC_CPU_MB)
+	#define EASTL_COMPILER_ATOMIC_CPU_MB_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CPU_MB_AVAILABLE 0
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_CPU_WMB()
+//
+#if defined(EASTL_COMPILER_ATOMIC_CPU_WMB)
+	#define EASTL_COMPILER_ATOMIC_CPU_WMB_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CPU_WMB_AVAILABLE 0
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_CPU_RMB()
+//
+#if defined(EASTL_COMPILER_ATOMIC_CPU_RMB)
+	#define EASTL_COMPILER_ATOMIC_CPU_RMB_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_CPU_RMB_AVAILABLE 0
+#endif
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MEMORY_BARRIER_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_or_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_or_fetch.h
new file mode 100644
index 00000000..a26a72c7
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_or_fetch.h
@@ -0,0 +1,173 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_OR_FETCH_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_OR_FETCH_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_OR_FETCH_*_N(type, type ret, type * ptr, type val)
+//
+#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_8)
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_8)
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_8)
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_8)
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_8)
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_8_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_16)
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_16)
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_16)
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_16)
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_16)
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_16_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_32)
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_32)
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_32)
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_32)
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_32)
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_32_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_64)
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_64)
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_64)
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_64)
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_64)
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_64_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_128)
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_128)
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_128)
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_128)
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_128)
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_128_AVAILABLE 0
+#endif
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_OR_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_signal_fence.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_signal_fence.h
new file mode 100644
index 00000000..25b0b741
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_signal_fence.h
@@ -0,0 +1,49 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_SIGNAL_FENCE_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_SIGNAL_FENCE_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_*()
+//
+#if defined(EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_RELAXED)
+	#define EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_RELAXED_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_RELAXED_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_ACQUIRE)
+	#define EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_ACQUIRE_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_ACQUIRE_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_RELEASE)
+	#define EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_RELEASE_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_RELEASE_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_ACQ_REL)
+	#define EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_ACQ_REL_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_ACQ_REL_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_SEQ_CST)
+	#define EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_SEQ_CST_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_SEQ_CST_AVAILABLE 0
+#endif
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_SIGNAL_FENCE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_store.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_store.h
new file mode 100644
index 00000000..1a553e2a
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_store.h
@@ -0,0 +1,113 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_STORE_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_STORE_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_STORE_*_N(type, type * ptr, type val)
+//
+#if defined(EASTL_COMPILER_ATOMIC_STORE_RELAXED_8)
+	#define EASTL_COMPILER_ATOMIC_STORE_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_STORE_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_STORE_RELEASE_8)
+	#define EASTL_COMPILER_ATOMIC_STORE_RELEASE_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_STORE_RELEASE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_STORE_SEQ_CST_8)
+	#define EASTL_COMPILER_ATOMIC_STORE_SEQ_CST_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_STORE_SEQ_CST_8_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_STORE_RELAXED_16)
+	#define EASTL_COMPILER_ATOMIC_STORE_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_STORE_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_STORE_RELEASE_16)
+	#define EASTL_COMPILER_ATOMIC_STORE_RELEASE_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_STORE_RELEASE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_STORE_SEQ_CST_16)
+	#define EASTL_COMPILER_ATOMIC_STORE_SEQ_CST_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_STORE_SEQ_CST_16_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_STORE_RELAXED_32)
+	#define EASTL_COMPILER_ATOMIC_STORE_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_STORE_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_STORE_RELEASE_32)
+	#define EASTL_COMPILER_ATOMIC_STORE_RELEASE_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_STORE_RELEASE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_STORE_SEQ_CST_32)
+	#define EASTL_COMPILER_ATOMIC_STORE_SEQ_CST_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_STORE_SEQ_CST_32_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_STORE_RELAXED_64)
+	#define EASTL_COMPILER_ATOMIC_STORE_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_STORE_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_STORE_RELEASE_64)
+	#define EASTL_COMPILER_ATOMIC_STORE_RELEASE_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_STORE_RELEASE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_STORE_SEQ_CST_64)
+	#define EASTL_COMPILER_ATOMIC_STORE_SEQ_CST_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_STORE_SEQ_CST_64_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_STORE_RELAXED_128)
+	#define EASTL_COMPILER_ATOMIC_STORE_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_STORE_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_STORE_RELEASE_128)
+	#define EASTL_COMPILER_ATOMIC_STORE_RELEASE_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_STORE_RELEASE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_STORE_SEQ_CST_128)
+	#define EASTL_COMPILER_ATOMIC_STORE_SEQ_CST_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_STORE_SEQ_CST_128_AVAILABLE 0
+#endif
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_STORE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_sub_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_sub_fetch.h
new file mode 100644
index 00000000..4b7eea92
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_sub_fetch.h
@@ -0,0 +1,173 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_SUB_FETCH_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_SUB_FETCH_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_SUB_FETCH_*_N(type, type ret, type * ptr, type val)
+//
+#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_8)
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_8)
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_8)
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_8)
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_8)
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_8_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_16)
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_16)
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_16)
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_16)
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_16)
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_16_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_32)
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_32)
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_32)
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_32)
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_32)
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_32_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_64)
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_64)
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_64)
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_64)
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_64)
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_64_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_128)
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_128)
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_128)
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_128)
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_128)
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_128_AVAILABLE 0
+#endif
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_SUB_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_thread_fence.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_thread_fence.h
new file mode 100644
index 00000000..01d8f0f9
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_thread_fence.h
@@ -0,0 +1,49 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_THREAD_FENCE_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_THREAD_FENCE_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_THREAD_FENCE_*()
+//
+#if defined(EASTL_COMPILER_ATOMIC_THREAD_FENCE_RELAXED)
+	#define EASTL_COMPILER_ATOMIC_THREAD_FENCE_RELAXED_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_THREAD_FENCE_RELAXED_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_THREAD_FENCE_ACQUIRE)
+	#define EASTL_COMPILER_ATOMIC_THREAD_FENCE_ACQUIRE_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_THREAD_FENCE_ACQUIRE_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_THREAD_FENCE_RELEASE)
+	#define EASTL_COMPILER_ATOMIC_THREAD_FENCE_RELEASE_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_THREAD_FENCE_RELEASE_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_THREAD_FENCE_ACQ_REL)
+	#define EASTL_COMPILER_ATOMIC_THREAD_FENCE_ACQ_REL_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_THREAD_FENCE_ACQ_REL_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_THREAD_FENCE_SEQ_CST)
+	#define EASTL_COMPILER_ATOMIC_THREAD_FENCE_SEQ_CST_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_THREAD_FENCE_SEQ_CST_AVAILABLE 0
+#endif
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_THREAD_FENCE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_xor_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_xor_fetch.h
new file mode 100644
index 00000000..05680bd1
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_xor_fetch.h
@@ -0,0 +1,173 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_XOR_FETCH_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_XOR_FETCH_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_XOR_FETCH_*_N(type, type ret, type * ptr, type val)
+//
+#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_8)
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_8)
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_8)
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_8)
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_8_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_8)
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_8_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_8_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_16)
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_16)
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_16)
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_16)
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_16_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_16)
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_16_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_16_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_32)
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_32)
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_32)
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_32)
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_32_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_32)
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_32_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_32_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_64)
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_64)
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_64)
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_64)
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_64_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_64)
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_64_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_64_AVAILABLE 0
+#endif
+
+
+#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_128)
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_128)
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_128)
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_128)
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_128_AVAILABLE 0
+#endif
+
+#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_128)
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_128_AVAILABLE 1
+#else
+	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_128_AVAILABLE 0
+#endif
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_XOR_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc.h
new file mode 100644
index 00000000..26a99c20
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc.h
@@ -0,0 +1,154 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/**
+ * NOTE:
+ *
+ * gcc __atomic builtins may defer to function calls in libatomic.so for architectures that do not
+ * support atomic instructions of a given size. These functions will be implemented with pthread_mutex_t.
+ * It also requires the explicit linking against the compiler runtime libatomic.so.
+ * On architectures that do not support atomics, like armv6 the builtins may defer to kernel helpers
+ * or on classic uniprocessor systems just disable interrupts.
+ *
+ * We do not want to have to link against libatomic.so or fall into the trap of our atomics degrading
+ * into locks. We would rather have user-code explicitly use locking primitives if their code cannot
+ * be satisfied with atomic instructions on the given platform.
+ */
+static_assert(__atomic_always_lock_free(1, 0), "eastl::atomic<T> where sizeof(T) == 1 must be lock-free!");
+static_assert(__atomic_always_lock_free(2, 0), "eastl::atomic<T> where sizeof(T) == 2 must be lock-free!");
+static_assert(__atomic_always_lock_free(4, 0), "eastl::atomic<T> where sizeof(T) == 4 must be lock-free!");
+#if EA_PLATFORM_PTR_SIZE == 8
+	static_assert(__atomic_always_lock_free(8, 0), "eastl::atomic<T> where sizeof(T) == 8 must be lock-free!");
+#endif
+
+/**
+ * NOTE:
+ *
+ * The following can fail on gcc/clang on 64-bit systems.
+ * Firstly, it depends on the -march setting on clang whether or not it calls out to libatomic for 128-bit operations.
+ * Second, gcc always calls out to libatomic for 128-bit atomics. It is unclear if it uses locks
+ * or tries to look at the cpuid and use cmpxchg16b if its available.
+ * gcc mailing lists argue that since load must be implemented with cmpxchg16b, then the __atomic bultin
+ * cannot be used in read-only memory which is why they always call out to libatomic.
+ * There is no way to tell gcc to not do that, unfortunately.
+ * We don't care about the read-only restriction because our eastl::atomic<T> object is mutable
+ * and also msvc doesn't enforce this restriction thus to be fully platform agnostic we cannot either.
+ *
+ * Therefore, the follow static_assert is commented out for the time being, as it always fails on these compilers.
+ * We still guarantee 128-bit atomics are lock-free by handrolling the inline assembly ourselves.
+ *
+ * static_assert(__atomic_always_lock_free(16, 0), "eastl::atomic<T> where sizeof(T) == 16 must be lock-free!");
+ */
+
+/**
+ * NOTE:
+ *
+ * Why do we do the cast to the unsigned fixed width types for every operation even though gcc/clang builtins are generics?
+ * Well gcc/clang correctly-incorrectly call out to libatomic and do locking on user types that may be potentially misaligned.
+ * struct UserType { uint8_t a,b; }; This given struct is 2 bytes in size but has only 1 byte alignment.
+ * gcc/clang cannot and doesn't know that we always guarantee every type T is size aligned within eastl::atomic<T>.
+ * Therefore it always emits calls into libatomic and does locking for structs like these which we do not want.
+ * Therefore you'll notice we always cast each atomic ptr type to the equivalent unsigned fixed width type when doing the atomic operations.
+ * This ensures all user types are size aligned and thus are lock free.
+ */
+
+
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#define EASTL_COMPILER_ATOMIC_HAS_8BIT
+#define EASTL_COMPILER_ATOMIC_HAS_16BIT
+#define EASTL_COMPILER_ATOMIC_HAS_32BIT
+#define EASTL_COMPILER_ATOMIC_HAS_64BIT
+
+#if EA_PLATFORM_PTR_SIZE == 8
+	#define EASTL_COMPILER_ATOMIC_HAS_128BIT
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#define EASTL_COMPILER_ATOMIC_FIXED_WIDTH_TYPE_8 uint8_t
+#define EASTL_COMPILER_ATOMIC_FIXED_WIDTH_TYPE_16 uint16_t
+#define EASTL_COMPILER_ATOMIC_FIXED_WIDTH_TYPE_32 uint32_t
+#define EASTL_COMPILER_ATOMIC_FIXED_WIDTH_TYPE_64 uint64_t
+#define EASTL_COMPILER_ATOMIC_FIXED_WIDTH_TYPE_128 __uint128_t
+
+
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#define EASTL_GCC_ATOMIC_FETCH_INTRIN_N(integralType, fetchIntrinsic, type, ret, ptr, val, gccMemoryOrder) \
+	{																	\
+		integralType retIntegral;										\
+		integralType valIntegral = EASTL_ATOMIC_TYPE_PUN_CAST(integralType, (val)); \
+																		\
+		retIntegral = fetchIntrinsic(EASTL_ATOMIC_VOLATILE_INTEGRAL_CAST(integralType, (ptr)), valIntegral, gccMemoryOrder); \
+																		\
+		ret = EASTL_ATOMIC_TYPE_PUN_CAST(type, retIntegral);			\
+	}
+
+#define EASTL_GCC_ATOMIC_CMPXCHG_INTRIN_N(integralType, type, ret, ptr, expected, desired, weak, successOrder, failOrder) \
+	ret = __atomic_compare_exchange(EASTL_ATOMIC_VOLATILE_INTEGRAL_CAST(integralType, (ptr)),							  \
+									EASTL_ATOMIC_INTEGRAL_CAST(integralType, (expected)), 								  \
+									EASTL_ATOMIC_INTEGRAL_CAST(integralType, &(desired)), 								  \
+									weak, successOrder, failOrder)
+
+#define EASTL_GCC_ATOMIC_EXCHANGE_INTRIN_N(integralType, type, ret, ptr, val, gccMemoryOrder) \
+	{																	\
+		integralType retIntegral;										\
+		integralType valIntegral = EASTL_ATOMIC_TYPE_PUN_CAST(integralType, (val)); \
+																		\
+		__atomic_exchange(EASTL_ATOMIC_VOLATILE_INTEGRAL_CAST(integralType, (ptr)), \
+						  &valIntegral, &retIntegral, gccMemoryOrder);	\
+																		\
+		ret = EASTL_ATOMIC_TYPE_PUN_CAST(type, retIntegral);			\
+	}
+
+
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#include "compiler_gcc_fetch_add.h"
+#include "compiler_gcc_fetch_sub.h"
+
+#include "compiler_gcc_fetch_and.h"
+#include "compiler_gcc_fetch_xor.h"
+#include "compiler_gcc_fetch_or.h"
+
+#include "compiler_gcc_add_fetch.h"
+#include "compiler_gcc_sub_fetch.h"
+
+#include "compiler_gcc_and_fetch.h"
+#include "compiler_gcc_xor_fetch.h"
+#include "compiler_gcc_or_fetch.h"
+
+#include "compiler_gcc_exchange.h"
+
+#include "compiler_gcc_cmpxchg_weak.h"
+#include "compiler_gcc_cmpxchg_strong.h"
+
+#include "compiler_gcc_load.h"
+#include "compiler_gcc_store.h"
+
+#include "compiler_gcc_barrier.h"
+
+#include "compiler_gcc_cpu_pause.h"
+
+#include "compiler_gcc_signal_fence.h"
+
+#include "compiler_gcc_thread_fence.h"
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_add_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_add_fetch.h
new file mode 100644
index 00000000..1d19196b
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_add_fetch.h
@@ -0,0 +1,118 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_ADD_FETCH_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_ADD_FETCH_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+#define EASTL_GCC_ATOMIC_ADD_FETCH_N(integralType, type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_FETCH_INTRIN_N(integralType, __atomic_add_fetch, type, ret, ptr, val, gccMemoryOrder)
+
+
+#define EASTL_GCC_ATOMIC_ADD_FETCH_8(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_ADD_FETCH_N(uint8_t, type, ret, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_ADD_FETCH_16(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_ADD_FETCH_N(uint16_t, type, ret, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_ADD_FETCH_32(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_ADD_FETCH_N(uint32_t, type, ret, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_ADD_FETCH_64(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_ADD_FETCH_N(uint64_t, type, ret, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_ADD_FETCH_128(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_ADD_FETCH_N(__uint128_t, type, ret, ptr, val, gccMemoryOrder)
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_ADD_FETCH_*_N(type, type ret, type * ptr, type val)
+//
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_ADD_FETCH_8(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_ADD_FETCH_16(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_ADD_FETCH_32(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_ADD_FETCH_64(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_128(type, ret, ptr, val) \
+	EASTL_GCC_ATOMIC_ADD_FETCH_128(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_ADD_FETCH_8(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_ADD_FETCH_16(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_ADD_FETCH_32(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_ADD_FETCH_64(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_128(type, ret, ptr, val) \
+	EASTL_GCC_ATOMIC_ADD_FETCH_128(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_ADD_FETCH_8(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_ADD_FETCH_16(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_ADD_FETCH_32(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_ADD_FETCH_64(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_128(type, ret, ptr, val) \
+	EASTL_GCC_ATOMIC_ADD_FETCH_128(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_ADD_FETCH_8(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_ADD_FETCH_16(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_ADD_FETCH_32(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_ADD_FETCH_64(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_128(type, ret, ptr, val) \
+	EASTL_GCC_ATOMIC_ADD_FETCH_128(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_ADD_FETCH_8(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_ADD_FETCH_16(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_ADD_FETCH_32(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_ADD_FETCH_64(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_128(type, ret, ptr, val) \
+	EASTL_GCC_ATOMIC_ADD_FETCH_128(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_ADD_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_and_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_and_fetch.h
new file mode 100644
index 00000000..a35307f0
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_and_fetch.h
@@ -0,0 +1,118 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_AND_FETCH_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_AND_FETCH_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+#define EASTL_GCC_ATOMIC_AND_FETCH_N(integralType, type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_FETCH_INTRIN_N(integralType, __atomic_and_fetch, type, ret, ptr, val, gccMemoryOrder)
+
+
+#define EASTL_GCC_ATOMIC_AND_FETCH_8(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_AND_FETCH_N(uint8_t, type, ret, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_AND_FETCH_16(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_AND_FETCH_N(uint16_t, type, ret, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_AND_FETCH_32(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_AND_FETCH_N(uint32_t, type, ret, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_AND_FETCH_64(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_AND_FETCH_N(uint64_t, type, ret, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_AND_FETCH_128(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_AND_FETCH_N(__uint128_t, type, ret, ptr, val, gccMemoryOrder)
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_AND_FETCH_*_N(type, type ret, type * ptr, type val)
+//
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_AND_FETCH_8(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_AND_FETCH_16(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_AND_FETCH_32(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_AND_FETCH_64(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_128(type, ret, ptr, val) \
+	EASTL_GCC_ATOMIC_AND_FETCH_128(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_AND_FETCH_8(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_AND_FETCH_16(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_AND_FETCH_32(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_AND_FETCH_64(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_128(type, ret, ptr, val) \
+	EASTL_GCC_ATOMIC_AND_FETCH_128(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_AND_FETCH_8(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_AND_FETCH_16(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_AND_FETCH_32(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_AND_FETCH_64(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_128(type, ret, ptr, val) \
+	EASTL_GCC_ATOMIC_AND_FETCH_128(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_AND_FETCH_8(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_AND_FETCH_16(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_AND_FETCH_32(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_AND_FETCH_64(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_128(type, ret, ptr, val) \
+	EASTL_GCC_ATOMIC_AND_FETCH_128(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_AND_FETCH_8(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_AND_FETCH_16(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_AND_FETCH_32(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_AND_FETCH_64(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_128(type, ret, ptr, val) \
+	EASTL_GCC_ATOMIC_AND_FETCH_128(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_AND_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_barrier.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_barrier.h
new file mode 100644
index 00000000..64e8e541
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_barrier.h
@@ -0,0 +1,30 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_BARRIER_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_BARRIER_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_COMPILER_BARRIER()
+//
+#define EASTL_COMPILER_ATOMIC_COMPILER_BARRIER()	\
+	__asm__ __volatile__ ("" ::: "memory")
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_COMPILER_BARRIER_DATA_DEPENDENCY(const T&, type)
+//
+#define EASTL_COMPILER_ATOMIC_COMPILER_BARRIER_DATA_DEPENDENCY(val, type) \
+	__asm__ __volatile__ ("" : /* Output Operands */ : "r"(&(val)) : "memory")
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_BARRIER_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_cmpxchg_strong.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_cmpxchg_strong.h
new file mode 100644
index 00000000..3e47cf2e
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_cmpxchg_strong.h
@@ -0,0 +1,182 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_CMPXCHG_STRONG_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_CMPXCHG_STRONG_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+#define EASTL_GCC_ATOMIC_CMPXCHG_STRONG_N(integralType, type, ret, ptr, expected, desired, successOrder, failOrder) \
+	EASTL_GCC_ATOMIC_CMPXCHG_INTRIN_N(integralType, type, ret, ptr, expected, desired, false, successOrder, failOrder)
+
+
+#define EASTL_GCC_ATOMIC_CMPXCHG_STRONG_8(type, ret, ptr, expected, desired, successOrder, failOrder) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_N(uint8_t, type, ret, ptr, expected, desired, successOrder, failOrder)
+
+#define EASTL_GCC_ATOMIC_CMPXCHG_STRONG_16(type, ret, ptr, expected, desired, successOrder, failOrder) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_N(uint16_t, type, ret, ptr, expected, desired, successOrder, failOrder)
+
+#define EASTL_GCC_ATOMIC_CMPXCHG_STRONG_32(type, ret, ptr, expected, desired, successOrder, failOrder) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_N(uint32_t, type, ret, ptr, expected, desired, successOrder, failOrder)
+
+#define EASTL_GCC_ATOMIC_CMPXCHG_STRONG_64(type, ret, ptr, expected, desired, successOrder, failOrder) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_N(uint64_t, type, ret, ptr, expected, desired, successOrder, failOrder)
+
+#define EASTL_GCC_ATOMIC_CMPXCHG_STRONG_128(type, ret, ptr, expected, desired, successOrder, failOrder) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_N(__uint128_t, type, ret, ptr, expected, desired, successOrder, failOrder)
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_*_*_N(type, bool ret, type * ptr, type * expected, type desired)
+//
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_8(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_8(type, ret, ptr, expected, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_16(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_16(type, ret, ptr, expected, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_32(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_32(type, ret, ptr, expected, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_64(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_64(type, ret, ptr, expected, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_128(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_128(type, ret, ptr, expected, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED)
+
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_8(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_8(type, ret, ptr, expected, desired, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_16(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_16(type, ret, ptr, expected, desired, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_32(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_32(type, ret, ptr, expected, desired, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_64(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_64(type, ret, ptr, expected, desired, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_128(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_128(type, ret, ptr, expected, desired, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)
+
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_8(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_8(type, ret, ptr, expected, desired, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_16(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_16(type, ret, ptr, expected, desired, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_32(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_32(type, ret, ptr, expected, desired, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_64(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_64(type, ret, ptr, expected, desired, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_128(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_128(type, ret, ptr, expected, desired, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE)
+
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_8(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_8(type, ret, ptr, expected, desired, __ATOMIC_RELEASE, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_16(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_16(type, ret, ptr, expected, desired, __ATOMIC_RELEASE, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_32(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_32(type, ret, ptr, expected, desired, __ATOMIC_RELEASE, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_64(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_64(type, ret, ptr, expected, desired, __ATOMIC_RELEASE, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_128(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_128(type, ret, ptr, expected, desired, __ATOMIC_RELEASE, __ATOMIC_RELAXED)
+
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_8(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_8(type, ret, ptr, expected, desired, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_16(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_16(type, ret, ptr, expected, desired, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_32(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_32(type, ret, ptr, expected, desired, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_64(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_64(type, ret, ptr, expected, desired, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_128(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_128(type, ret, ptr, expected, desired, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED)
+
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_8(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_8(type, ret, ptr, expected, desired, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_16(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_16(type, ret, ptr, expected, desired, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_32(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_32(type, ret, ptr, expected, desired, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_64(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_64(type, ret, ptr, expected, desired, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_128(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_128(type, ret, ptr, expected, desired, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)
+
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_8(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_8(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_16(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_16(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_32(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_32(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_64(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_64(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_128(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_128(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED)
+
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_8(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_8(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_16(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_16(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_32(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_32(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_64(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_64(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_128(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_128(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE)
+
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_8(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_8(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_16(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_16(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_32(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_32(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_64(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_64(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_128(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_128(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_CMPXCHG_STRONG_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_cmpxchg_weak.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_cmpxchg_weak.h
new file mode 100644
index 00000000..f55fe3a3
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_cmpxchg_weak.h
@@ -0,0 +1,182 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_CMPXCHG_WEAK_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_CMPXCHG_WEAK_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+#define EASTL_GCC_ATOMIC_CMPXCHG_WEAK_N(integralType, type, ret, ptr, expected, desired, successOrder, failOrder) \
+	EASTL_GCC_ATOMIC_CMPXCHG_INTRIN_N(integralType, type, ret, ptr, expected, desired, true, successOrder, failOrder)
+
+
+#define EASTL_GCC_ATOMIC_CMPXCHG_WEAK_8(type, ret, ptr, expected, desired, successOrder, failOrder) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_N(uint8_t, type, ret, ptr, expected, desired, successOrder, failOrder)
+
+#define EASTL_GCC_ATOMIC_CMPXCHG_WEAK_16(type, ret, ptr, expected, desired, successOrder, failOrder) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_N(uint16_t, type, ret, ptr, expected, desired, successOrder, failOrder)
+
+#define EASTL_GCC_ATOMIC_CMPXCHG_WEAK_32(type, ret, ptr, expected, desired, successOrder, failOrder) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_N(uint32_t, type, ret, ptr, expected, desired, successOrder, failOrder)
+
+#define EASTL_GCC_ATOMIC_CMPXCHG_WEAK_64(type, ret, ptr, expected, desired, successOrder, failOrder) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_N(uint64_t, type, ret, ptr, expected, desired, successOrder, failOrder)
+
+#define EASTL_GCC_ATOMIC_CMPXCHG_WEAK_128(type, ret, ptr, expected, desired, successOrder, failOrder) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_N(__uint128_t, type, ret, ptr, expected, desired, successOrder, failOrder)
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_*_*_N(type, bool ret, type * ptr, type * expected, type desired)
+//
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_8(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_8(type, ret, ptr, expected, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_16(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_16(type, ret, ptr, expected, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_32(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_32(type, ret, ptr, expected, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_64(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_64(type, ret, ptr, expected, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_128(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_128(type, ret, ptr, expected, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED)
+
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_8(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_8(type, ret, ptr, expected, desired, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_16(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_16(type, ret, ptr, expected, desired, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_32(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_32(type, ret, ptr, expected, desired, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_64(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_64(type, ret, ptr, expected, desired, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_128(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_128(type, ret, ptr, expected, desired, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)
+
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_8(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_8(type, ret, ptr, expected, desired, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_16(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_16(type, ret, ptr, expected, desired, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_32(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_32(type, ret, ptr, expected, desired, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_64(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_64(type, ret, ptr, expected, desired, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_128(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_128(type, ret, ptr, expected, desired, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE)
+
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_8(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_8(type, ret, ptr, expected, desired, __ATOMIC_RELEASE, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_16(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_16(type, ret, ptr, expected, desired, __ATOMIC_RELEASE, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_32(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_32(type, ret, ptr, expected, desired, __ATOMIC_RELEASE, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_64(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_64(type, ret, ptr, expected, desired, __ATOMIC_RELEASE, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_128(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_128(type, ret, ptr, expected, desired, __ATOMIC_RELEASE, __ATOMIC_RELAXED)
+
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_8(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_8(type, ret, ptr, expected, desired, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_16(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_16(type, ret, ptr, expected, desired, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_32(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_32(type, ret, ptr, expected, desired, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_64(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_64(type, ret, ptr, expected, desired, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_128(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_128(type, ret, ptr, expected, desired, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED)
+
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_8(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_8(type, ret, ptr, expected, desired, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_16(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_16(type, ret, ptr, expected, desired, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_32(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_32(type, ret, ptr, expected, desired, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_64(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_64(type, ret, ptr, expected, desired, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_128(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_128(type, ret, ptr, expected, desired, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)
+
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_8(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_8(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_16(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_16(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_32(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_32(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_64(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_64(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_128(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_128(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED)
+
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_8(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_8(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_16(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_16(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_32(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_32(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_64(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_64(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_128(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_128(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE)
+
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_8(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_8(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_16(type,ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_16(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_32(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_32(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_64(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_64(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_128(type, ret, ptr, expected, desired) \
+	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_128(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_CMPXCHG_WEAK_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_cpu_pause.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_cpu_pause.h
new file mode 100644
index 00000000..9d4ac35e
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_cpu_pause.h
@@ -0,0 +1,31 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_CPU_PAUSE_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_CPU_PAUSE_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_CPU_PAUSE()
+//
+#if defined(EA_PROCESSOR_X86) || defined(EA_PROCESSOR_X86_64)
+
+	#define EASTL_COMPILER_ATOMIC_CPU_PAUSE()		\
+		__asm__ __volatile__ ("pause")
+
+#elif defined(EA_PROCESSOR_ARM32) || defined(EA_PROCESSOR_ARM64)
+
+	#define EASTL_COMPILER_ATOMIC_CPU_PAUSE()		\
+		__asm__ __volatile__ ("yield")
+
+#endif
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_CPU_PAUSE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_exchange.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_exchange.h
new file mode 100644
index 00000000..a3325547
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_exchange.h
@@ -0,0 +1,118 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_EXCHANGE_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_EXCHANGE_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+#define EASTL_GCC_ATOMIC_EXCHANGE_N(integralType, type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_EXCHANGE_INTRIN_N(integralType, type, ret, ptr, val, gccMemoryOrder)
+
+
+#define EASTL_GCC_ATOMIC_EXCHANGE_8(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_EXCHANGE_N(uint8_t, type, ret, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_EXCHANGE_16(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_EXCHANGE_N(uint16_t, type, ret, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_EXCHANGE_32(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_EXCHANGE_N(uint32_t, type, ret, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_EXCHANGE_64(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_EXCHANGE_N(uint64_t, type, ret, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_EXCHANGE_128(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_EXCHANGE_N(__uint128_t, type, ret, ptr, val, gccMemoryOrder)
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_EXCHANGE_*_N(type, type ret, type * ptr, type val)
+//
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_EXCHANGE_8(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_EXCHANGE_16(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_EXCHANGE_32(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_EXCHANGE_64(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_128(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_EXCHANGE_128(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_EXCHANGE_8(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_EXCHANGE_16(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_EXCHANGE_32(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_EXCHANGE_64(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_128(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_EXCHANGE_128(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_EXCHANGE_8(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_EXCHANGE_16(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_EXCHANGE_32(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_EXCHANGE_64(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_128(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_EXCHANGE_128(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_EXCHANGE_8(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_EXCHANGE_16(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_EXCHANGE_32(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_EXCHANGE_64(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_128(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_EXCHANGE_128(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_EXCHANGE_8(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_EXCHANGE_16(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_EXCHANGE_32(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_EXCHANGE_64(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_128(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_EXCHANGE_128(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_EXCHANGE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_add.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_add.h
new file mode 100644
index 00000000..98abbb83
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_add.h
@@ -0,0 +1,118 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_ADD_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_ADD_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+#define EASTL_GCC_ATOMIC_FETCH_ADD_N(integralType, type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_FETCH_INTRIN_N(integralType, __atomic_fetch_add, type, ret, ptr, val, gccMemoryOrder)
+
+
+#define EASTL_GCC_ATOMIC_FETCH_ADD_8(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_FETCH_ADD_N(uint8_t, type, ret, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_FETCH_ADD_16(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_FETCH_ADD_N(uint16_t, type, ret, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_FETCH_ADD_32(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_FETCH_ADD_N(uint32_t, type, ret, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_FETCH_ADD_64(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_FETCH_ADD_N(uint64_t, type, ret, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_FETCH_ADD_128(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_FETCH_ADD_N(__uint128_t, type, ret, ptr, val, gccMemoryOrder)
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_FETCH_ADD_*_N(type, type ret, type * ptr, type val)
+//
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_ADD_8(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_ADD_16(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_ADD_32(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_ADD_64(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_128(type, ret, ptr, val) \
+	EASTL_GCC_ATOMIC_FETCH_ADD_128(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_ADD_8(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_ADD_16(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_ADD_32(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_ADD_64(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_128(type, ret, ptr, val) \
+	EASTL_GCC_ATOMIC_FETCH_ADD_128(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_ADD_8(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_ADD_16(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_ADD_32(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_ADD_64(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_128(type, ret, ptr, val) \
+	EASTL_GCC_ATOMIC_FETCH_ADD_128(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_ADD_8(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_ADD_16(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_ADD_32(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_ADD_64(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_128(type, ret, ptr, val) \
+	EASTL_GCC_ATOMIC_FETCH_ADD_128(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_ADD_8(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_ADD_16(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_ADD_32(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_ADD_64(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_128(type, ret, ptr, val) \
+	EASTL_GCC_ATOMIC_FETCH_ADD_128(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_ADD_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_and.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_and.h
new file mode 100644
index 00000000..0dfb81db
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_and.h
@@ -0,0 +1,118 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_AND_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_AND_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+#define EASTL_GCC_ATOMIC_FETCH_AND_N(integralType, type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_FETCH_INTRIN_N(integralType, __atomic_fetch_and, type, ret, ptr, val, gccMemoryOrder)
+
+
+#define EASTL_GCC_ATOMIC_FETCH_AND_8(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_FETCH_AND_N(uint8_t, type, ret, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_FETCH_AND_16(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_FETCH_AND_N(uint16_t, type, ret, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_FETCH_AND_32(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_FETCH_AND_N(uint32_t, type, ret, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_FETCH_AND_64(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_FETCH_AND_N(uint64_t, type, ret, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_FETCH_AND_128(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_FETCH_AND_N(__uint128_t, type, ret, ptr, val, gccMemoryOrder)
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_FETCH_AND_*_N(type, type ret, type * ptr, type val)
+//
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_AND_8(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_AND_16(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_AND_32(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_AND_64(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_128(type, ret, ptr, val) \
+	EASTL_GCC_ATOMIC_FETCH_AND_128(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_AND_8(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_AND_16(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_AND_32(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_AND_64(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_128(type, ret, ptr, val) \
+	EASTL_GCC_ATOMIC_FETCH_AND_128(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_AND_8(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_AND_16(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_AND_32(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_AND_64(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_128(type, ret, ptr, val) \
+	EASTL_GCC_ATOMIC_FETCH_AND_128(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_AND_8(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_AND_16(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_AND_32(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_AND_64(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_128(type, ret, ptr, val) \
+	EASTL_GCC_ATOMIC_FETCH_AND_128(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_AND_8(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_AND_16(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_AND_32(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_AND_64(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_128(type, ret, ptr, val) \
+	EASTL_GCC_ATOMIC_FETCH_AND_128(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_AND_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_or.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_or.h
new file mode 100644
index 00000000..ba259b74
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_or.h
@@ -0,0 +1,118 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_OR_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_OR_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+#define EASTL_GCC_ATOMIC_FETCH_OR_N(integralType, type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_FETCH_INTRIN_N(integralType, __atomic_fetch_or, type, ret, ptr, val, gccMemoryOrder)
+
+
+#define EASTL_GCC_ATOMIC_FETCH_OR_8(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_FETCH_OR_N(uint8_t, type, ret, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_FETCH_OR_16(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_FETCH_OR_N(uint16_t, type, ret, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_FETCH_OR_32(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_FETCH_OR_N(uint32_t, type, ret, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_FETCH_OR_64(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_FETCH_OR_N(uint64_t, type, ret, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_FETCH_OR_128(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_FETCH_OR_N(__uint128_t, type, ret, ptr, val, gccMemoryOrder)
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_FETCH_OR_*_N(type, type ret, type * ptr, type val)
+//
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_OR_8(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_OR_16(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_OR_32(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_OR_64(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_128(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_OR_128(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_OR_8(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_OR_16(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_OR_32(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_OR_64(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_128(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_OR_128(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_OR_8(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_OR_16(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_OR_32(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_OR_64(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_128(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_OR_128(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_OR_8(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_OR_16(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_OR_32(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_OR_64(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_128(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_OR_128(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_OR_8(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_OR_16(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_OR_32(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_OR_64(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_128(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_OR_128(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_OR_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_sub.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_sub.h
new file mode 100644
index 00000000..c8be225e
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_sub.h
@@ -0,0 +1,118 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_SUB_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_SUB_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+#define EASTL_GCC_ATOMIC_FETCH_SUB_N(integralType, type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_FETCH_INTRIN_N(integralType, __atomic_fetch_sub, type, ret, ptr, val, gccMemoryOrder)
+
+
+#define EASTL_GCC_ATOMIC_FETCH_SUB_8(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_FETCH_SUB_N(uint8_t, type, ret, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_FETCH_SUB_16(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_FETCH_SUB_N(uint16_t, type, ret, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_FETCH_SUB_32(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_FETCH_SUB_N(uint32_t, type, ret, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_FETCH_SUB_64(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_FETCH_SUB_N(uint64_t, type, ret, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_FETCH_SUB_128(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_FETCH_SUB_N(__uint128_t, type, ret, ptr, val, gccMemoryOrder)
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_FETCH_SUB_*_N(type, type ret, type * ptr, type val)
+//
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_SUB_8(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_SUB_16(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_SUB_32(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_SUB_64(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_128(type, ret, ptr, val) \
+	EASTL_GCC_ATOMIC_FETCH_SUB_128(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_SUB_8(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_SUB_16(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_SUB_32(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_SUB_64(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_128(type, ret, ptr, val) \
+	EASTL_GCC_ATOMIC_FETCH_SUB_128(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_SUB_8(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_SUB_16(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_SUB_32(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_SUB_64(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_128(type, ret, ptr, val) \
+	EASTL_GCC_ATOMIC_FETCH_SUB_128(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_SUB_8(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_SUB_16(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_SUB_32(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_SUB_64(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_128(type, ret, ptr, val) \
+	EASTL_GCC_ATOMIC_FETCH_SUB_128(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_SUB_8(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_SUB_16(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_SUB_32(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_SUB_64(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_128(type, ret, ptr, val) \
+	EASTL_GCC_ATOMIC_FETCH_SUB_128(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_SUB_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_xor.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_xor.h
new file mode 100644
index 00000000..4ec6d676
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_xor.h
@@ -0,0 +1,118 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_XOR_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_XOR_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+#define EASTL_GCC_ATOMIC_FETCH_XOR_N(integralType, type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_FETCH_INTRIN_N(integralType, __atomic_fetch_xor, type, ret, ptr, val, gccMemoryOrder)
+
+
+#define EASTL_GCC_ATOMIC_FETCH_XOR_8(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_FETCH_XOR_N(uint8_t, type, ret, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_FETCH_XOR_16(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_FETCH_XOR_N(uint16_t, type, ret, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_FETCH_XOR_32(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_FETCH_XOR_N(uint32_t, type, ret, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_FETCH_XOR_64(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_FETCH_XOR_N(uint64_t, type, ret, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_FETCH_XOR_128(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_FETCH_XOR_N(__uint128_t, type, ret, ptr, val, gccMemoryOrder)
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_FETCH_XOR_*_N(type, type ret, type * ptr, type val)
+//
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_XOR_8(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_XOR_16(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_XOR_32(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_XOR_64(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_128(type, ret, ptr, val) \
+	EASTL_GCC_ATOMIC_FETCH_XOR_128(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_XOR_8(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_XOR_16(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_XOR_32(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_XOR_64(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_128(type, ret, ptr, val) \
+	EASTL_GCC_ATOMIC_FETCH_XOR_128(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_XOR_8(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_XOR_16(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_XOR_32(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_XOR_64(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_128(type, ret, ptr, val) \
+	EASTL_GCC_ATOMIC_FETCH_XOR_128(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_XOR_8(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_XOR_16(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_XOR_32(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_XOR_64(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_128(type, ret, ptr, val) \
+	EASTL_GCC_ATOMIC_FETCH_XOR_128(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_XOR_8(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_XOR_16(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_XOR_32(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_FETCH_XOR_64(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_128(type, ret, ptr, val) \
+	EASTL_GCC_ATOMIC_FETCH_XOR_128(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_XOR_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_load.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_load.h
new file mode 100644
index 00000000..a4a3ebf1
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_load.h
@@ -0,0 +1,90 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_LOAD_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_LOAD_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+#define EASTL_GCC_ATOMIC_LOAD_N(integralType, type, ret, ptr, gccMemoryOrder) \
+	{																	\
+		integralType retIntegral;										\
+		__atomic_load(EASTL_ATOMIC_VOLATILE_INTEGRAL_CAST(integralType, (ptr)), &retIntegral, gccMemoryOrder); \
+																		\
+		ret = EASTL_ATOMIC_TYPE_PUN_CAST(type, retIntegral);			\
+	}
+
+#define EASTL_GCC_ATOMIC_LOAD_8(type, ret, ptr, gccMemoryOrder)			\
+	EASTL_GCC_ATOMIC_LOAD_N(uint8_t, type, ret, ptr, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_LOAD_16(type, ret, ptr, gccMemoryOrder)		\
+	EASTL_GCC_ATOMIC_LOAD_N(uint16_t, type, ret, ptr, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_LOAD_32(type, ret, ptr, gccMemoryOrder)		\
+	EASTL_GCC_ATOMIC_LOAD_N(uint32_t, type, ret, ptr, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_LOAD_64(type, ret, ptr, gccMemoryOrder)		\
+	EASTL_GCC_ATOMIC_LOAD_N(uint64_t, type, ret, ptr, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_LOAD_128(type, ret, ptr, gccMemoryOrder)		\
+	EASTL_GCC_ATOMIC_LOAD_N(__uint128_t, type, ret, ptr, gccMemoryOrder)
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_LOAD_*_N(type, type ret, type * ptr)
+//
+#define EASTL_COMPILER_ATOMIC_LOAD_RELAXED_8(type, ret, ptr)	\
+	EASTL_GCC_ATOMIC_LOAD_8(type, ret, ptr, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_LOAD_RELAXED_16(type, ret, ptr)	\
+	EASTL_GCC_ATOMIC_LOAD_16(type, ret, ptr, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_LOAD_RELAXED_32(type, ret, ptr)	\
+	EASTL_GCC_ATOMIC_LOAD_32(type, ret, ptr, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_LOAD_RELAXED_64(type, ret, ptr)	\
+	EASTL_GCC_ATOMIC_LOAD_64(type, ret, ptr, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_LOAD_RELAXED_128(type, ret, ptr)	\
+	EASTL_GCC_ATOMIC_LOAD_128(type, ret, ptr, __ATOMIC_RELAXED)
+
+
+#define EASTL_COMPILER_ATOMIC_LOAD_ACQUIRE_8(type, ret, ptr)	\
+	EASTL_GCC_ATOMIC_LOAD_8(type, ret, ptr, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_LOAD_ACQUIRE_16(type, ret, ptr)	\
+	EASTL_GCC_ATOMIC_LOAD_16(type, ret, ptr, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_LOAD_ACQUIRE_32(type, ret, ptr)	\
+	EASTL_GCC_ATOMIC_LOAD_32(type, ret, ptr, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_LOAD_ACQUIRE_64(type, ret, ptr)	\
+	EASTL_GCC_ATOMIC_LOAD_64(type, ret, ptr, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_LOAD_ACQUIRE_128(type, ret, ptr)	\
+	EASTL_GCC_ATOMIC_LOAD_128(type, ret, ptr, __ATOMIC_ACQUIRE)
+
+
+#define EASTL_COMPILER_ATOMIC_LOAD_SEQ_CST_8(type, ret, ptr)	\
+	EASTL_GCC_ATOMIC_LOAD_8(type, ret, ptr, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_LOAD_SEQ_CST_16(type, ret, ptr)	\
+	EASTL_GCC_ATOMIC_LOAD_16(type, ret, ptr, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_LOAD_SEQ_CST_32(type, ret, ptr)	\
+	EASTL_GCC_ATOMIC_LOAD_32(type, ret, ptr, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_LOAD_SEQ_CST_64(type, ret, ptr)	\
+	EASTL_GCC_ATOMIC_LOAD_64(type, ret, ptr, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_LOAD_SEQ_CST_128(type, ret, ptr)	\
+	EASTL_GCC_ATOMIC_LOAD_128(type, ret, ptr, __ATOMIC_SEQ_CST)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_LOAD_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_or_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_or_fetch.h
new file mode 100644
index 00000000..9e4db3e1
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_or_fetch.h
@@ -0,0 +1,118 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_OR_FETCH_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_OR_FETCH_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+#define EASTL_GCC_ATOMIC_OR_FETCH_N(integralType, type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_FETCH_INTRIN_N(integralType, __atomic_or_fetch, type, ret, ptr, val, gccMemoryOrder)
+
+
+#define EASTL_GCC_ATOMIC_OR_FETCH_8(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_OR_FETCH_N(uint8_t, type, ret, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_OR_FETCH_16(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_OR_FETCH_N(uint16_t, type, ret, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_OR_FETCH_32(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_OR_FETCH_N(uint32_t, type, ret, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_OR_FETCH_64(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_OR_FETCH_N(uint64_t, type, ret, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_OR_FETCH_128(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_OR_FETCH_N(__uint128_t, type, ret, ptr, val, gccMemoryOrder)
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_OR_FETCH_*_N(type, type ret, type * ptr, type val)
+//
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_OR_FETCH_8(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_OR_FETCH_16(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_OR_FETCH_32(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_OR_FETCH_64(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_128(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_OR_FETCH_128(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_OR_FETCH_8(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_OR_FETCH_16(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_OR_FETCH_32(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_OR_FETCH_64(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_128(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_OR_FETCH_128(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_OR_FETCH_8(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_OR_FETCH_16(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_OR_FETCH_32(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_OR_FETCH_64(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_128(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_OR_FETCH_128(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_OR_FETCH_8(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_OR_FETCH_16(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_OR_FETCH_32(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_OR_FETCH_64(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_128(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_OR_FETCH_128(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_OR_FETCH_8(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_OR_FETCH_16(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_OR_FETCH_32(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_OR_FETCH_64(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_128(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_OR_FETCH_128(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_OR_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_signal_fence.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_signal_fence.h
new file mode 100644
index 00000000..16dff14f
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_signal_fence.h
@@ -0,0 +1,38 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_SIGNAL_FENCE_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_SIGNAL_FENCE_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+#define EASTL_GCC_ATOMIC_SIGNAL_FENCE(gccMemoryOrder)	\
+	__atomic_signal_fence(gccMemoryOrder)
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_*()
+//
+#define EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_RELAXED()	\
+	EASTL_GCC_ATOMIC_SIGNAL_FENCE(__ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_ACQUIRE()	\
+	EASTL_GCC_ATOMIC_SIGNAL_FENCE(__ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_RELEASE()	\
+	EASTL_GCC_ATOMIC_SIGNAL_FENCE(__ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_ACQ_REL()	\
+	EASTL_GCC_ATOMIC_SIGNAL_FENCE(__ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_SEQ_CST()	\
+	EASTL_GCC_ATOMIC_SIGNAL_FENCE(__ATOMIC_SEQ_CST)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_SIGNAL_FENCE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_store.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_store.h
new file mode 100644
index 00000000..04a28ac4
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_store.h
@@ -0,0 +1,89 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_STORE_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_STORE_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+#define EASTL_GCC_ATOMIC_STORE_N(integralType, ptr, val, gccMemoryOrder) \
+	{																	 \
+		integralType valIntegral = EASTL_ATOMIC_TYPE_PUN_CAST(integralType, (val)); \
+		__atomic_store(EASTL_ATOMIC_VOLATILE_INTEGRAL_CAST(integralType, (ptr)), &valIntegral, gccMemoryOrder); \
+	}
+
+
+#define EASTL_GCC_ATOMIC_STORE_8(ptr, val, gccMemoryOrder)		\
+	EASTL_GCC_ATOMIC_STORE_N(uint8_t, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_STORE_16(ptr, val, gccMemoryOrder)		\
+	EASTL_GCC_ATOMIC_STORE_N(uint16_t, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_STORE_32(ptr, val, gccMemoryOrder)		\
+	EASTL_GCC_ATOMIC_STORE_N(uint32_t, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_STORE_64(ptr, val, gccMemoryOrder)		\
+	EASTL_GCC_ATOMIC_STORE_N(uint64_t, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_STORE_128(ptr, val, gccMemoryOrder)	\
+	EASTL_GCC_ATOMIC_STORE_N(__uint128_t, ptr, val, gccMemoryOrder)
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_STORE_*_N(type, type * ptr, type val)
+//
+#define EASTL_COMPILER_ATOMIC_STORE_RELAXED_8(type, ptr, val)	\
+	EASTL_GCC_ATOMIC_STORE_8(ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_STORE_RELAXED_16(type, ptr, val)	\
+	EASTL_GCC_ATOMIC_STORE_16(ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_STORE_RELAXED_32(type, ptr, val)	\
+	EASTL_GCC_ATOMIC_STORE_32(ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_STORE_RELAXED_64(type, ptr, val)	\
+	EASTL_GCC_ATOMIC_STORE_64(ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_STORE_RELAXED_128(type, ptr, val)	\
+	EASTL_GCC_ATOMIC_STORE_128(ptr, val, __ATOMIC_RELAXED)
+
+
+#define EASTL_COMPILER_ATOMIC_STORE_RELEASE_8(type, ptr, val)	\
+	EASTL_GCC_ATOMIC_STORE_8(ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_STORE_RELEASE_16(type, ptr, val)	\
+	EASTL_GCC_ATOMIC_STORE_16(ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_STORE_RELEASE_32(type, ptr, val)	\
+	EASTL_GCC_ATOMIC_STORE_32(ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_STORE_RELEASE_64(type, ptr, val)	\
+	EASTL_GCC_ATOMIC_STORE_64(ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_STORE_RELEASE_128(type, ptr, val)	\
+	EASTL_GCC_ATOMIC_STORE_128(ptr, val, __ATOMIC_RELEASE)
+
+
+#define EASTL_COMPILER_ATOMIC_STORE_SEQ_CST_8(type, ptr, val)	\
+	EASTL_GCC_ATOMIC_STORE_8(ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_STORE_SEQ_CST_16(type, ptr, val)	\
+	EASTL_GCC_ATOMIC_STORE_16(ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_STORE_SEQ_CST_32(type, ptr, val)	\
+	EASTL_GCC_ATOMIC_STORE_32(ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_STORE_SEQ_CST_64(type, ptr, val)	\
+	EASTL_GCC_ATOMIC_STORE_64(ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_STORE_SEQ_CST_128(type, ptr, val)	\
+	EASTL_GCC_ATOMIC_STORE_128(ptr, val, __ATOMIC_SEQ_CST)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_STORE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_sub_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_sub_fetch.h
new file mode 100644
index 00000000..62f8cd91
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_sub_fetch.h
@@ -0,0 +1,118 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_SUB_FETCH_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_SUB_FETCH_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+#define EASTL_GCC_ATOMIC_SUB_FETCH_N(integralType, type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_FETCH_INTRIN_N(integralType, __atomic_sub_fetch, type, ret, ptr, val, gccMemoryOrder)
+
+
+#define EASTL_GCC_ATOMIC_SUB_FETCH_8(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_SUB_FETCH_N(uint8_t, type, ret, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_SUB_FETCH_16(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_SUB_FETCH_N(uint16_t, type, ret, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_SUB_FETCH_32(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_SUB_FETCH_N(uint32_t, type, ret, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_SUB_FETCH_64(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_SUB_FETCH_N(uint64_t, type, ret, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_SUB_FETCH_128(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_SUB_FETCH_N(__uint128_t, type, ret, ptr, val, gccMemoryOrder)
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_SUB_FETCH_*_N(type, type ret, type * ptr, type val)
+//
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_SUB_FETCH_8(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_SUB_FETCH_16(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_SUB_FETCH_32(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_SUB_FETCH_64(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_128(type, ret, ptr, val) \
+	EASTL_GCC_ATOMIC_SUB_FETCH_128(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_SUB_FETCH_8(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_SUB_FETCH_16(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_SUB_FETCH_32(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_SUB_FETCH_64(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_128(type, ret, ptr, val) \
+	EASTL_GCC_ATOMIC_SUB_FETCH_128(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_SUB_FETCH_8(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_SUB_FETCH_16(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_SUB_FETCH_32(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_SUB_FETCH_64(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_128(type, ret, ptr, val) \
+	EASTL_GCC_ATOMIC_SUB_FETCH_128(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_SUB_FETCH_8(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_SUB_FETCH_16(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_SUB_FETCH_32(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_SUB_FETCH_64(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_128(type, ret, ptr, val) \
+	EASTL_GCC_ATOMIC_SUB_FETCH_128(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_SUB_FETCH_8(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_SUB_FETCH_16(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_SUB_FETCH_32(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_SUB_FETCH_64(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_128(type, ret, ptr, val) \
+	EASTL_GCC_ATOMIC_SUB_FETCH_128(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_SUB_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_thread_fence.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_thread_fence.h
new file mode 100644
index 00000000..0dd005e4
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_thread_fence.h
@@ -0,0 +1,38 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_THREAD_FENCE_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_THREAD_FENCE_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+#define EASTL_GCC_ATOMIC_THREAD_FENCE(gccMemoryOrder) \
+	__atomic_thread_fence(gccMemoryOrder)
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_THREAD_FENCE_*()
+//
+#define EASTL_COMPILER_ATOMIC_THREAD_FENCE_RELAXED()	\
+	EASTL_GCC_ATOMIC_THREAD_FENCE(__ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_THREAD_FENCE_ACQUIRE()	\
+	EASTL_GCC_ATOMIC_THREAD_FENCE(__ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_THREAD_FENCE_RELEASE()	\
+	EASTL_GCC_ATOMIC_THREAD_FENCE(__ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_THREAD_FENCE_ACQ_REL()	\
+	EASTL_GCC_ATOMIC_THREAD_FENCE(__ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_THREAD_FENCE_SEQ_CST()	\
+	EASTL_GCC_ATOMIC_THREAD_FENCE(__ATOMIC_SEQ_CST)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_THREAD_FENCE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_xor_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_xor_fetch.h
new file mode 100644
index 00000000..4827d79f
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_xor_fetch.h
@@ -0,0 +1,118 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_XOR_FETCH_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_XOR_FETCH_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+#define EASTL_GCC_ATOMIC_XOR_FETCH_N(integralType, type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_FETCH_INTRIN_N(integralType, __atomic_xor_fetch, type, ret, ptr, val, gccMemoryOrder)
+
+
+#define EASTL_GCC_ATOMIC_XOR_FETCH_8(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_XOR_FETCH_N(uint8_t, type, ret, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_XOR_FETCH_16(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_XOR_FETCH_N(uint16_t, type, ret, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_XOR_FETCH_32(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_XOR_FETCH_N(uint32_t, type, ret, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_XOR_FETCH_64(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_XOR_FETCH_N(uint64_t, type, ret, ptr, val, gccMemoryOrder)
+
+#define EASTL_GCC_ATOMIC_XOR_FETCH_128(type, ret, ptr, val, gccMemoryOrder) \
+	EASTL_GCC_ATOMIC_XOR_FETCH_N(__uint128_t, type, ret, ptr, val, gccMemoryOrder)
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_XOR_FETCH_*_N(type, type ret, type * ptr, type val)
+//
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_XOR_FETCH_8(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_XOR_FETCH_16(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_XOR_FETCH_32(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_XOR_FETCH_64(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_128(type, ret, ptr, val) \
+	EASTL_GCC_ATOMIC_XOR_FETCH_128(type, ret, ptr, val, __ATOMIC_RELAXED)
+
+
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_XOR_FETCH_8(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_XOR_FETCH_16(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_XOR_FETCH_32(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_XOR_FETCH_64(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_128(type, ret, ptr, val) \
+	EASTL_GCC_ATOMIC_XOR_FETCH_128(type, ret, ptr, val, __ATOMIC_ACQUIRE)
+
+
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_XOR_FETCH_8(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_XOR_FETCH_16(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_XOR_FETCH_32(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_XOR_FETCH_64(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_128(type, ret, ptr, val) \
+	EASTL_GCC_ATOMIC_XOR_FETCH_128(type, ret, ptr, val, __ATOMIC_RELEASE)
+
+
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_XOR_FETCH_8(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_XOR_FETCH_16(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_XOR_FETCH_32(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_XOR_FETCH_64(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_128(type, ret, ptr, val) \
+	EASTL_GCC_ATOMIC_XOR_FETCH_128(type, ret, ptr, val, __ATOMIC_ACQ_REL)
+
+
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_8(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_XOR_FETCH_8(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_16(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_XOR_FETCH_16(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_32(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_XOR_FETCH_32(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_64(type, ret, ptr, val)	\
+	EASTL_GCC_ATOMIC_XOR_FETCH_64(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_128(type, ret, ptr, val) \
+	EASTL_GCC_ATOMIC_XOR_FETCH_128(type, ret, ptr, val, __ATOMIC_SEQ_CST)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_XOR_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc.h
new file mode 100644
index 00000000..6df8c05f
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc.h
@@ -0,0 +1,260 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+EA_DISABLE_ALL_VC_WARNINGS();
+#include <Windows.h>
+#include <intrin.h>
+EA_RESTORE_ALL_VC_WARNINGS();
+
+
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#define EASTL_COMPILER_ATOMIC_HAS_8BIT
+#define EASTL_COMPILER_ATOMIC_HAS_16BIT
+#define EASTL_COMPILER_ATOMIC_HAS_32BIT
+#define EASTL_COMPILER_ATOMIC_HAS_64BIT
+
+#if EA_PLATFORM_PTR_SIZE == 8
+	#define EASTL_COMPILER_ATOMIC_HAS_128BIT
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#define EASTL_COMPILER_ATOMIC_FIXED_WIDTH_TYPE_8 char
+#define EASTL_COMPILER_ATOMIC_FIXED_WIDTH_TYPE_16 short
+#define EASTL_COMPILER_ATOMIC_FIXED_WIDTH_TYPE_32 long
+#define EASTL_COMPILER_ATOMIC_FIXED_WIDTH_TYPE_64 __int64
+
+namespace eastl
+{
+
+namespace internal
+{
+
+struct FixedWidth128
+{
+	__int64 value[2];
+};
+
+} // namespace internal
+
+} // namespace eastl
+
+#define EASTL_COMPILER_ATOMIC_FIXED_WIDTH_TYPE_128 eastl::internal::FixedWidth128
+
+
+/////////////////////////////////////////////////////////////////////////////////
+
+
+/**
+ * NOTE:
+ *
+ * Unfortunately MSVC Intrinsics depend on the architecture
+ * that we are compiling for.
+ * These are some indirection macros to make our lives easier and
+ * ensure the least possible amount of copy-paste to reduce programmer errors.
+ *
+ * All compiler implementations end up deferring to the below macros.
+ */
+#if defined(EA_PROCESSOR_X86) || defined(EA_PROCESSOR_X86_64)
+
+
+	#define EASTL_MSVC_ATOMIC_FETCH_OP(ret, ptr, val, MemoryOrder, Intrinsic) \
+		ret = Intrinsic(ptr, val)
+
+	#define EASTL_MSVC_ATOMIC_EXCHANGE_OP(ret, ptr, val, MemoryOrder, Intrinsic) \
+		ret = Intrinsic(ptr, val)
+
+	#define EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_OP(ret, ptr, comparand, exchange, MemoryOrder, Intrinsic) \
+		ret = Intrinsic(ptr, exchange, comparand)
+
+	#define EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_128_OP(ret, ptr, comparandResult, exchangeHigh, exchangeLow, MemoryOrder) \
+		ret = _InterlockedCompareExchange128_np(ptr, exchangeHigh, exchangeLow, comparandResult)
+
+
+#elif defined(EA_PROCESSOR_ARM32) || defined(EA_PROCESSOR_ARM64)
+
+
+	#define EASTL_MSVC_INTRINSIC_RELAXED(Intrinsic) \
+		EA_PREPROCESSOR_JOIN(Intrinsic, _nf)
+
+	#define EASTL_MSVC_INTRINSIC_ACQUIRE(Intrinsic)	\
+		EA_PREPROCESSOR_JOIN(Intrinsic, _acq)
+
+	#define EASTL_MSVC_INTRINSIC_RELEASE(Intrinsic)	\
+		EA_PREPROCESSOR_JOIN(Intrinsic, _rel)
+
+	#define EASTL_MSVC_INTRINSIC_ACQ_REL(Intrinsic)	\
+		Intrinsic
+
+	#define EASTL_MSVC_INTRINSIC_SEQ_CST(Intrinsic)	\
+		Intrinsic
+
+
+	#define EASTL_MSVC_ATOMIC_FETCH_OP(ret, ptr, val, MemoryOrder, Intrinsic) \
+		ret = EA_PREPROCESSOR_JOIN(EASTL_MSVC_INTRINSIC_, MemoryOrder)(Intrinsic)(ptr, val)
+
+	#define EASTL_MSVC_ATOMIC_EXCHANGE_OP(ret, ptr, val, MemoryOrder, Intrinsic) \
+		ret = EA_PREPROCESSOR_JOIN(EASTL_MSVC_INTRINSIC_, MemoryOrder)(Intrinsic)(ptr, val)
+
+	#define EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_OP(ret, ptr, comparand, exchange, MemoryOrder, Intrinsic) \
+		ret = EA_PREPROCESSOR_JOIN(EASTL_MSVC_INTRINSIC_, MemoryOrder)(Intrinsic)(ptr, exchange, comparand)
+
+	#define EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_128_OP(ret, ptr, comparandResult, exchangeHigh, exchangeLow, MemoryOrder) \
+		ret = EA_PREPROCESSOR_JOIN(EASTL_MSVC_INTRINSIC_, MemoryOrder)(_InterlockedCompareExchange128)(ptr, exchangeHigh, exchangeLow, comparandResult)
+
+
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#define EASTL_MSVC_NOP_POST_INTRIN_COMPUTE(ret, lhs, rhs)
+
+#define EASTL_MSVC_NOP_PRE_INTRIN_COMPUTE(ret, val) \
+	ret = (val)
+
+
+#define EASTL_MSVC_ATOMIC_FETCH_INTRIN_N(integralType, fetchIntrinsic, type, ret, ptr, val, MemoryOrder, PRE_INTRIN_COMPUTE, POST_INTRIN_COMPUTE) \
+	{																	\
+		integralType retIntegral;										\
+		type valCompute;												\
+																		\
+		PRE_INTRIN_COMPUTE(valCompute, (val));							\
+		const integralType valIntegral = EASTL_ATOMIC_TYPE_PUN_CAST(integralType, valCompute); \
+																		\
+		EASTL_MSVC_ATOMIC_FETCH_OP(retIntegral, EASTL_ATOMIC_VOLATILE_INTEGRAL_CAST(integralType, (ptr)), \
+								   valIntegral, MemoryOrder, fetchIntrinsic); \
+																		\
+		ret = EASTL_ATOMIC_TYPE_PUN_CAST(type, retIntegral);			\
+		POST_INTRIN_COMPUTE(ret, ret, (val));							\
+	}
+
+#define EASTL_MSVC_ATOMIC_EXCHANGE_INTRIN_N(integralType, exchangeIntrinsic, type, ret, ptr, val, MemoryOrder) \
+	{																	\
+		integralType retIntegral;										\
+		EASTL_MSVC_ATOMIC_EXCHANGE_OP(retIntegral, EASTL_ATOMIC_VOLATILE_INTEGRAL_CAST(integralType, (ptr)), \
+									  EASTL_ATOMIC_TYPE_PUN_CAST(integralType, (val)), MemoryOrder, \
+									  exchangeIntrinsic);				\
+																		\
+		ret = EASTL_ATOMIC_TYPE_PUN_CAST(type, retIntegral);			\
+	}
+
+#define EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_INTRIN_N(integralType, cmpxchgStrongIntrinsic, type, ret, ptr, expected, desired, MemoryOrder) \
+	{																	\
+		integralType comparandIntegral = EASTL_ATOMIC_TYPE_PUN_CAST(integralType, *(expected)); \
+		integralType oldIntegral;										\
+		EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_OP(oldIntegral, EASTL_ATOMIC_VOLATILE_INTEGRAL_CAST(integralType, (ptr)), \
+											comparandIntegral, EASTL_ATOMIC_TYPE_PUN_CAST(integralType, (desired)), \
+											MemoryOrder, cmpxchgStrongIntrinsic); \
+																		\
+		if (oldIntegral == comparandIntegral)							\
+		{																\
+			ret = true;													\
+		}																\
+		else															\
+		{																\
+			*(expected) = EASTL_ATOMIC_TYPE_PUN_CAST(type, oldIntegral); \
+			ret = false;												\
+		}																\
+	}
+
+/**
+ * In my own opinion, I found the wording on Microsoft docs a little confusing.
+ * ExchangeHigh means the top 8 bytes so (ptr + 8).
+ * ExchangeLow means the low 8 butes so (ptr).
+ * Endianness does not matter since we are just loading data and comparing data.
+ * Thought of as memcpy() and memcmp() function calls whereby the layout of the
+ * data itself is irrelevant.
+ * Only after we type pun back to the original type, and load from memory does
+ * the layout of the data matter again.
+ */
+#define EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_INTRIN_128(type, ret, ptr, expected, desired, MemoryOrder) \
+	{																	\
+		union TypePun													\
+		{																\
+			type templateType;											\
+																		\
+			struct exchange128											\
+			{															\
+				__int64 value[2];										\
+			};															\
+																		\
+			struct exchange128 exchangePun;								\
+		};																\
+																		\
+		union TypePun typePun = { (desired) };							\
+																		\
+		unsigned char cmpxchgRetChar;									\
+		cmpxchgRetChar = EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_128_OP(cmpxchgRetChar, EASTL_ATOMIC_VOLATILE_TYPE_CAST(__int64, (ptr)), \
+																 EASTL_ATOMIC_TYPE_CAST(__int64, (expected)), \
+																 typePun.exchangePun.value[1], typePun.exchangePun.value[0], \
+																 MemoryOrder); \
+																		\
+		ret = static_cast<bool>(cmpxchgRetChar);						\
+	}
+
+
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#define EASTL_MSVC_ATOMIC_FETCH_OP_N(integralType, fetchIntrinsic, type, ret, ptr, val, MemoryOrder, PRE_INTRIN_COMPUTE) \
+	EASTL_MSVC_ATOMIC_FETCH_INTRIN_N(integralType, fetchIntrinsic, type, ret, ptr, val, MemoryOrder, PRE_INTRIN_COMPUTE, EASTL_MSVC_NOP_POST_INTRIN_COMPUTE)
+
+#define EASTL_MSVC_ATOMIC_OP_FETCH_N(integralType, fetchIntrinsic, type, ret, ptr, val, MemoryOrder, PRE_INTRIN_COMPUTE, POST_INTRIN_COMPUTE) \
+	EASTL_MSVC_ATOMIC_FETCH_INTRIN_N(integralType, fetchIntrinsic, type, ret, ptr, val, MemoryOrder, PRE_INTRIN_COMPUTE, POST_INTRIN_COMPUTE)
+
+#define EASTL_MSVC_ATOMIC_EXCHANGE_OP_N(integralType, exchangeIntrinsic, type, ret, ptr, val, MemoryOrder) \
+	EASTL_MSVC_ATOMIC_EXCHANGE_INTRIN_N(integralType, exchangeIntrinsic, type, ret, ptr, val, MemoryOrder)
+
+#define EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_OP_N(integralType, cmpxchgStrongIntrinsic, type, ret, ptr, expected, desired, MemoryOrder) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_INTRIN_N(integralType, cmpxchgStrongIntrinsic, type, ret, ptr, expected, desired, MemoryOrder)
+
+#define EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_OP_128(type, ret, ptr, expected, desired, MemoryOrder) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_INTRIN_128(type, ret, ptr, expected, desired, MemoryOrder)
+
+
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#include "compiler_msvc_fetch_add.h"
+#include "compiler_msvc_fetch_sub.h"
+
+#include "compiler_msvc_fetch_and.h"
+#include "compiler_msvc_fetch_xor.h"
+#include "compiler_msvc_fetch_or.h"
+
+#include "compiler_msvc_add_fetch.h"
+#include "compiler_msvc_sub_fetch.h"
+
+#include "compiler_msvc_and_fetch.h"
+#include "compiler_msvc_xor_fetch.h"
+#include "compiler_msvc_or_fetch.h"
+
+#include "compiler_msvc_exchange.h"
+
+#include "compiler_msvc_cmpxchg_weak.h"
+#include "compiler_msvc_cmpxchg_strong.h"
+
+#include "compiler_msvc_barrier.h"
+
+#include "compiler_msvc_cpu_pause.h"
+
+#include "compiler_msvc_signal_fence.h"
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_add_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_add_fetch.h
new file mode 100644
index 00000000..12fc4b04
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_add_fetch.h
@@ -0,0 +1,104 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_ADD_FETCH_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_ADD_FETCH_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+#define EASTL_MSVC_ADD_FETCH_POST_INTRIN_COMPUTE(ret, val, addend)	\
+	ret = (val) + (addend)
+
+#define EASTL_MSVC_ATOMIC_ADD_FETCH_N(integralType, addIntrinsic, type, ret, ptr, val, MemoryOrder) \
+	EASTL_MSVC_ATOMIC_OP_FETCH_N(integralType, addIntrinsic, type, ret, ptr, val, MemoryOrder, \
+								 EASTL_MSVC_NOP_PRE_INTRIN_COMPUTE, EASTL_MSVC_ADD_FETCH_POST_INTRIN_COMPUTE)
+
+
+#define EASTL_MSVC_ATOMIC_ADD_FETCH_8(type, ret, ptr, val, MemoryOrder)	\
+	EASTL_MSVC_ATOMIC_ADD_FETCH_N(char, _InterlockedExchangeAdd8, type, ret, ptr, val, MemoryOrder)
+
+#define EASTL_MSVC_ATOMIC_ADD_FETCH_16(type, ret, ptr, val, MemoryOrder) \
+	EASTL_MSVC_ATOMIC_ADD_FETCH_N(short, _InterlockedExchangeAdd16, type, ret, ptr, val, MemoryOrder)
+
+#define EASTL_MSVC_ATOMIC_ADD_FETCH_32(type, ret, ptr, val, MemoryOrder) \
+	EASTL_MSVC_ATOMIC_ADD_FETCH_N(long, _InterlockedExchangeAdd, type, ret, ptr, val, MemoryOrder)
+
+#define EASTL_MSVC_ATOMIC_ADD_FETCH_64(type, ret, ptr, val, MemoryOrder) \
+	EASTL_MSVC_ATOMIC_ADD_FETCH_N(__int64, _InterlockedExchangeAdd64, type, ret, ptr, val, MemoryOrder)
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_ADD_FETCH_*_N(type, type ret, type * ptr, type val)
+//
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_8(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_ADD_FETCH_8(type, ret, ptr, val, RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_ADD_FETCH_16(type, ret, ptr, val, RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_ADD_FETCH_32(type, ret, ptr, val, RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_ADD_FETCH_64(type, ret, ptr, val, RELAXED)
+
+
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_8(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_ADD_FETCH_8(type, ret, ptr, val, ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_ADD_FETCH_16(type, ret, ptr, val, ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_ADD_FETCH_32(type, ret, ptr, val, ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_ADD_FETCH_64(type, ret, ptr, val, ACQUIRE)
+
+
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_8(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_ADD_FETCH_8(type, ret, ptr, val, RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_ADD_FETCH_16(type, ret, ptr, val, RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_ADD_FETCH_32(type, ret, ptr, val, RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_ADD_FETCH_64(type, ret, ptr, val, RELEASE)
+
+
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_8(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_ADD_FETCH_8(type, ret, ptr, val, ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_ADD_FETCH_16(type, ret, ptr, val, ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_ADD_FETCH_32(type, ret, ptr, val, ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_ADD_FETCH_64(type, ret, ptr, val, ACQ_REL)
+
+
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_8(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_ADD_FETCH_8(type, ret, ptr, val, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_ADD_FETCH_16(type, ret, ptr, val, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_ADD_FETCH_32(type, ret, ptr, val, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_ADD_FETCH_64(type, ret, ptr, val, SEQ_CST)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_ADD_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_and_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_and_fetch.h
new file mode 100644
index 00000000..70ec577f
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_and_fetch.h
@@ -0,0 +1,121 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_AND_FETCH_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_AND_FETCH_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+#if defined(EA_PROCESSOR_X86_64)
+
+	#define EASTL_MSVC_ATOMIC_AND_FETCH_INTRIN_8 _InterlockedAnd8_np
+	#define EASTL_MSVC_ATOMIC_AND_FETCH_INTRIN_16 _InterlockedAnd16_np
+	#define EASTL_MSVC_ATOMIC_AND_FETCH_INTRIN_32 _InterlockedAnd_np
+	#define EASTL_MSVC_ATOMIC_AND_FETCH_INTRIN_64 _InterlockedAnd64_np
+
+#else
+
+	#define EASTL_MSVC_ATOMIC_AND_FETCH_INTRIN_8 _InterlockedAnd8
+	#define EASTL_MSVC_ATOMIC_AND_FETCH_INTRIN_16 _InterlockedAnd16
+	#define EASTL_MSVC_ATOMIC_AND_FETCH_INTRIN_32 _InterlockedAnd
+	#define EASTL_MSVC_ATOMIC_AND_FETCH_INTRIN_64 _InterlockedAnd64
+
+#endif
+
+
+#define EASTL_MSVC_AND_FETCH_POST_INTRIN_COMPUTE(ret, val, andend)	\
+	ret = (val) & (andend)
+
+#define EASTL_MSVC_ATOMIC_AND_FETCH_N(integralType, andIntrinsic, type, ret, ptr, val, MemoryOrder) \
+	EASTL_MSVC_ATOMIC_OP_FETCH_N(integralType, andIntrinsic, type, ret, ptr, val, MemoryOrder, \
+								 EASTL_MSVC_NOP_PRE_INTRIN_COMPUTE, EASTL_MSVC_AND_FETCH_POST_INTRIN_COMPUTE)
+
+
+#define EASTL_MSVC_ATOMIC_AND_FETCH_8(type, ret, ptr, val, MemoryOrder)	\
+	EASTL_MSVC_ATOMIC_AND_FETCH_N(char, EASTL_MSVC_ATOMIC_AND_FETCH_INTRIN_8, type, ret, ptr, val, MemoryOrder)
+
+#define EASTL_MSVC_ATOMIC_AND_FETCH_16(type, ret, ptr, val, MemoryOrder) \
+	EASTL_MSVC_ATOMIC_AND_FETCH_N(short, EASTL_MSVC_ATOMIC_AND_FETCH_INTRIN_16, type, ret, ptr, val, MemoryOrder)
+
+#define EASTL_MSVC_ATOMIC_AND_FETCH_32(type, ret, ptr, val, MemoryOrder) \
+	EASTL_MSVC_ATOMIC_AND_FETCH_N(long, EASTL_MSVC_ATOMIC_AND_FETCH_INTRIN_32, type, ret, ptr, val, MemoryOrder)
+
+#define EASTL_MSVC_ATOMIC_AND_FETCH_64(type, ret, ptr, val, MemoryOrder) \
+	EASTL_MSVC_ATOMIC_AND_FETCH_N(__int64, EASTL_MSVC_ATOMIC_AND_FETCH_INTRIN_64, type, ret, ptr, val, MemoryOrder)
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_AND_FETCH_*_N(type, type ret, type * ptr, type val)
+//
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_8(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_AND_FETCH_8(type, ret, ptr, val, RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_AND_FETCH_16(type, ret, ptr, val, RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_AND_FETCH_32(type, ret, ptr, val, RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_AND_FETCH_64(type, ret, ptr, val, RELAXED)
+
+
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_8(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_AND_FETCH_8(type, ret, ptr, val, ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_AND_FETCH_16(type, ret, ptr, val, ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_AND_FETCH_32(type, ret, ptr, val, ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_AND_FETCH_64(type, ret, ptr, val, ACQUIRE)
+
+
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_8(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_AND_FETCH_8(type, ret, ptr, val, RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_AND_FETCH_16(type, ret, ptr, val, RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_AND_FETCH_32(type, ret, ptr, val, RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_AND_FETCH_64(type, ret, ptr, val, RELEASE)
+
+
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_8(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_AND_FETCH_8(type, ret, ptr, val, ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_AND_FETCH_16(type, ret, ptr, val, ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_AND_FETCH_32(type, ret, ptr, val, ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_AND_FETCH_64(type, ret, ptr, val, ACQ_REL)
+
+
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_8(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_AND_FETCH_8(type, ret, ptr, val, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_AND_FETCH_16(type, ret, ptr, val, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_AND_FETCH_32(type, ret, ptr, val, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_AND_FETCH_64(type, ret, ptr, val, SEQ_CST)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_AND_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_barrier.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_barrier.h
new file mode 100644
index 00000000..02e2d03a
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_barrier.h
@@ -0,0 +1,31 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_BARRIER_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_BARRIER_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_COMPILER_BARRIER()
+//
+#define EASTL_COMPILER_ATOMIC_COMPILER_BARRIER()	\
+	_ReadWriteBarrier()
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_COMPILER_BARRIER_DATA_DEPENDENCY(const T&, type)
+//
+#define EASTL_COMPILER_ATOMIC_COMPILER_BARRIER_DATA_DEPENDENCY(val, type) \
+	EASTL_COMPILER_ATOMIC_COMPILER_BARRIER_DATA_DEPENDENCY_FUNC(const_cast<type*>(eastl::addressof((val)))); \
+	EASTL_ATOMIC_COMPILER_BARRIER()
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_BARRIER_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_cmpxchg_strong.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_cmpxchg_strong.h
new file mode 100644
index 00000000..42117a1a
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_cmpxchg_strong.h
@@ -0,0 +1,195 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_CMPXCHG_STRONG_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_CMPXCHG_STRONG_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+#if defined(EA_PROCESSOR_X86_64)
+
+	#define EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_INTRIN_8 _InterlockedCompareExchange8
+	#define EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_INTRIN_16 _InterlockedCompareExchange16_np
+	#define EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_INTRIN_32 _InterlockedCompareExchange_np
+	#define EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_INTRIN_64 _InterlockedCompareExchange64_np
+
+#else
+
+	#define EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_INTRIN_8 _InterlockedCompareExchange8
+	#define EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_INTRIN_16 _InterlockedCompareExchange16
+	#define EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_INTRIN_32 _InterlockedCompareExchange
+	#define EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_INTRIN_64 _InterlockedCompareExchange64
+
+#endif
+
+
+#define EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_8(type, ret, ptr, expected, desired, MemoryOrder) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_OP_N(char, EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_INTRIN_8, type, ret, ptr, expected, desired, MemoryOrder)
+
+#define EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_16(type, ret, ptr, expected, desired, MemoryOrder) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_OP_N(short, EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_INTRIN_16, type, ret, ptr, expected, desired, MemoryOrder)
+
+#define EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_32(type, ret, ptr, expected, desired, MemoryOrder) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_OP_N(long, EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_INTRIN_32, type, ret, ptr, expected, desired, MemoryOrder)
+
+#define EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_64(type, ret, ptr, expected, desired, MemoryOrder) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_OP_N(__int64, EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_INTRIN_64, type, ret, ptr, expected, desired, MemoryOrder)
+
+#define EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_128(type, ret, ptr, expected, desired, MemoryOrder) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_OP_128(type, ret, ptr, expected, desired, MemoryOrder)
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_*_*_N(type, bool ret, type * ptr, type * expected, type desired)
+//
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_8(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_8(type, ret, ptr, expected, desired, RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_16(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_16(type, ret, ptr, expected, desired, RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_32(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_32(type, ret, ptr, expected, desired, RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_64(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_64(type, ret, ptr, expected, desired, RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_128(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_128(type, ret, ptr, expected, desired, RELAXED)
+
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_8(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_8(type, ret, ptr, expected, desired, ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_16(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_16(type, ret, ptr, expected, desired, ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_32(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_32(type, ret, ptr, expected, desired, ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_64(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_64(type, ret, ptr, expected, desired, ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_128(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_128(type, ret, ptr, expected, desired, ACQUIRE)
+
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_8(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_8(type, ret, ptr, expected, desired, ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_16(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_16(type, ret, ptr, expected, desired, ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_32(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_32(type, ret, ptr, expected, desired, ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_64(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_64(type, ret, ptr, expected, desired, ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_128(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_128(type, ret, ptr, expected, desired, ACQUIRE)
+
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_8(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_8(type, ret, ptr, expected, desired, RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_16(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_16(type, ret, ptr, expected, desired, RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_32(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_32(type, ret, ptr, expected, desired, RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_64(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_64(type, ret, ptr, expected, desired, RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_128(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_128(type, ret, ptr, expected, desired, RELEASE)
+
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_8(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_8(type, ret, ptr, expected, desired, ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_16(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_16(type, ret, ptr, expected, desired, ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_32(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_32(type, ret, ptr, expected, desired, ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_64(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_64(type, ret, ptr, expected, desired, ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_128(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_128(type, ret, ptr, expected, desired, ACQ_REL)
+
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_8(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_8(type, ret, ptr, expected, desired, ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_16(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_16(type, ret, ptr, expected, desired, ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_32(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_32(type, ret, ptr, expected, desired, ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_64(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_64(type, ret, ptr, expected, desired, ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_128(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_128(type, ret, ptr, expected, desired, ACQ_REL)
+
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_8(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_8(type, ret, ptr, expected, desired, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_16(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_16(type, ret, ptr, expected, desired, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_32(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_32(type, ret, ptr, expected, desired, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_64(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_64(type, ret, ptr, expected, desired, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_128(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_128(type, ret, ptr, expected, desired, SEQ_CST)
+
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_8(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_8(type, ret, ptr, expected, desired, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_16(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_16(type, ret, ptr, expected, desired, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_32(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_32(type, ret, ptr, expected, desired, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_64(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_64(type, ret, ptr, expected, desired, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_128(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_128(type, ret, ptr, expected, desired, SEQ_CST)
+
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_8(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_8(type, ret, ptr, expected, desired, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_16(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_16(type, ret, ptr, expected, desired, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_32(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_32(type, ret, ptr, expected, desired, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_64(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_64(type, ret, ptr, expected, desired, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_128(type, ret, ptr, expected, desired) \
+	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_128(type, ret, ptr, expected, desired, SEQ_CST)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_CMPXCHG_STRONG_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_cmpxchg_weak.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_cmpxchg_weak.h
new file mode 100644
index 00000000..8f4147ac
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_cmpxchg_weak.h
@@ -0,0 +1,162 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_CMPXCHG_WEAK_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_CMPXCHG_WEAK_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_*_*_N(type, bool ret, type * ptr, type * expected, type desired)
+//
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_8(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_8(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_16(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_16(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_32(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_32(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_64(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_64(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_128(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_128(type, ret, ptr, expected, desired)
+
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_8(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_8(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_16(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_16(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_32(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_32(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_64(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_64(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_128(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_128(type, ret, ptr, expected, desired)
+
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_8(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_8(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_16(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_16(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_32(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_32(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_64(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_64(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_128(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_128(type, ret, ptr, expected, desired)
+
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_8(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_8(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_16(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_16(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_32(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_32(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_64(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_64(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_128(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_128(type, ret, ptr, expected, desired)
+
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_8(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_8(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_16(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_16(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_32(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_32(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_64(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_64(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_128(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_128(type, ret, ptr, expected, desired)
+
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_8(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_8(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_16(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_16(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_32(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_32(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_64(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_64(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_128(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_128(type, ret, ptr, expected, desired)
+
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_8(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_8(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_16(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_16(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_32(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_32(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_64(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_64(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_128(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_128(type, ret, ptr, expected, desired)
+
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_8(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_8(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_16(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_16(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_32(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_32(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_64(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_64(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_128(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_128(type, ret, ptr, expected, desired)
+
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_8(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_8(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_16(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_16(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_32(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_32(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_64(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_64(type, ret, ptr, expected, desired)
+
+#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_128(type, ret, ptr, expected, desired) \
+	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_128(type, ret, ptr, expected, desired)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_CMPXCHG_WEAK_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_cpu_pause.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_cpu_pause.h
new file mode 100644
index 00000000..720701ab
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_cpu_pause.h
@@ -0,0 +1,27 @@
+/////////////////////////////////////////////////////////////////////////////////
+// copyright (c) electronic arts inc. all rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_CPU_PAUSE_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_CPU_PAUSE_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_CPU_PAUSE()
+//
+// NOTE:
+// Rather obscure macro in Windows.h that expands to pause or rep; nop on
+// compatible x86 cpus or the arm yield on compatible arm processors.
+// This is nicer than switching on platform specific intrinsics.
+//
+#define EASTL_COMPILER_ATOMIC_CPU_PAUSE()		\
+	YieldProcessor()
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_CPU_PAUSE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_exchange.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_exchange.h
new file mode 100644
index 00000000..323f1fae
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_exchange.h
@@ -0,0 +1,125 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_EXCHANGE_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_EXCHANGE_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+#define EASTL_MSVC_ATOMIC_EXCHANGE_8(type, ret, ptr, val, MemoryOrder)	\
+	EASTL_MSVC_ATOMIC_EXCHANGE_OP_N(char, _InterlockedExchange8, type, ret, ptr, val, MemoryOrder)
+
+#define EASTL_MSVC_ATOMIC_EXCHANGE_16(type, ret, ptr, val, MemoryOrder)	\
+	EASTL_MSVC_ATOMIC_EXCHANGE_OP_N(short, _InterlockedExchange16, type, ret, ptr, val, MemoryOrder)
+
+#define EASTL_MSVC_ATOMIC_EXCHANGE_32(type, ret, ptr, val, MemoryOrder)	\
+	EASTL_MSVC_ATOMIC_EXCHANGE_OP_N(long, _InterlockedExchange, type, ret, ptr, val, MemoryOrder)
+
+#define EASTL_MSVC_ATOMIC_EXCHANGE_64(type, ret, ptr, val, MemoryOrder)	\
+	EASTL_MSVC_ATOMIC_EXCHANGE_OP_N(__int64, _InterlockedExchange64, type, ret, ptr, val, MemoryOrder)
+
+#define EASTL_MSVC_ATOMIC_EXCHANGE_128(type, ret, ptr, val, MemoryOrder) \
+	{																	\
+		bool cmpxchgRet;												\
+		/* This is intentionally a non-atomic 128-bit load which may observe shearing. */ \
+		/* Either we do not observe *(ptr) but then the cmpxchg will fail and the observed */ \
+		/* atomic load will be returned. Or the non-atomic load got lucky and the cmpxchg succeeds */ \
+		/* because the observed value equals the value in *(ptr) thus we optimistically do a non-atomic load. */ \
+		ret = *(ptr);													\
+		do																\
+		{																\
+			EA_PREPROCESSOR_JOIN(EA_PREPROCESSOR_JOIN(EASTL_ATOMIC_CMPXCHG_STRONG_, MemoryOrder), _128)(type, cmpxchgRet, ptr, &(ret), val); \
+		} while (!cmpxchgRet);											\
+	}
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_EXCHANGE_*_N(type, type ret, type * ptr, type val)
+//
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_8(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_EXCHANGE_8(type, ret, ptr, val, RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_EXCHANGE_16(type, ret, ptr, val, RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_EXCHANGE_32(type, ret, ptr, val, RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_EXCHANGE_64(type, ret, ptr, val, RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_128(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_EXCHANGE_128(type, ret, ptr, val, RELAXED)
+
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_8(type, ret, ptr, val) \
+	EASTL_MSVC_ATOMIC_EXCHANGE_8(type, ret, ptr, val, ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_EXCHANGE_16(type, ret, ptr, val, ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_EXCHANGE_32(type, ret, ptr, val, ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_EXCHANGE_64(type, ret, ptr, val, ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_128(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_EXCHANGE_128(type, ret, ptr, val, ACQUIRE)
+
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_8(type, ret, ptr, val) \
+	EASTL_MSVC_ATOMIC_EXCHANGE_8(type, ret, ptr, val, RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_EXCHANGE_16(type, ret, ptr, val, RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_EXCHANGE_32(type, ret, ptr, val, RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_EXCHANGE_64(type, ret, ptr, val, RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_128(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_EXCHANGE_128(type, ret, ptr, val, RELEASE)
+
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_8(type, ret, ptr, val) \
+	EASTL_MSVC_ATOMIC_EXCHANGE_8(type, ret, ptr, val, ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_EXCHANGE_16(type, ret, ptr, val, ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_EXCHANGE_32(type, ret, ptr, val, ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_EXCHANGE_64(type, ret, ptr, val, ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_128(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_EXCHANGE_128(type, ret, ptr, val, ACQ_REL)
+
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_8(type, ret, ptr, val) \
+	EASTL_MSVC_ATOMIC_EXCHANGE_8(type, ret, ptr, val, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_EXCHANGE_16(type, ret, ptr, val, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_EXCHANGE_32(type, ret, ptr, val, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_EXCHANGE_64(type, ret, ptr, val, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_128(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_EXCHANGE_128(type, ret, ptr, val, SEQ_CST)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_EXCHANGE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_add.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_add.h
new file mode 100644
index 00000000..a951740e
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_add.h
@@ -0,0 +1,101 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_ADD_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_ADD_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+#define EASTL_MSVC_ATOMIC_FETCH_ADD_N(integralType, addIntrinsic, type, ret, ptr, val, MemoryOrder) \
+	EASTL_MSVC_ATOMIC_FETCH_OP_N(integralType, addIntrinsic, type, ret, ptr, val, MemoryOrder, \
+								 EASTL_MSVC_NOP_PRE_INTRIN_COMPUTE)
+
+
+#define EASTL_MSVC_ATOMIC_FETCH_ADD_8(type, ret, ptr, val, MemoryOrder)	\
+	EASTL_MSVC_ATOMIC_FETCH_ADD_N(char, _InterlockedExchangeAdd8, type, ret, ptr, val, MemoryOrder)
+
+#define EASTL_MSVC_ATOMIC_FETCH_ADD_16(type, ret, ptr, val, MemoryOrder) \
+	EASTL_MSVC_ATOMIC_FETCH_ADD_N(short, _InterlockedExchangeAdd16, type, ret, ptr, val, MemoryOrder)
+
+#define EASTL_MSVC_ATOMIC_FETCH_ADD_32(type, ret, ptr, val, MemoryOrder) \
+	EASTL_MSVC_ATOMIC_FETCH_ADD_N(long, _InterlockedExchangeAdd, type, ret, ptr, val, MemoryOrder)
+
+#define EASTL_MSVC_ATOMIC_FETCH_ADD_64(type, ret, ptr, val, MemoryOrder) \
+	EASTL_MSVC_ATOMIC_FETCH_ADD_N(__int64, _InterlockedExchangeAdd64, type, ret, ptr, val, MemoryOrder)
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_FETCH_ADD_*_N(type, type ret, type * ptr, type val)
+//
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_8(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_ADD_8(type, ret, ptr, val, RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_ADD_16(type, ret, ptr, val, RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_ADD_32(type, ret, ptr, val, RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_ADD_64(type, ret, ptr, val, RELAXED)
+
+
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_8(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_ADD_8(type, ret, ptr, val, ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_ADD_16(type, ret, ptr, val, ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_ADD_32(type, ret, ptr, val, ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_ADD_64(type, ret, ptr, val, ACQUIRE)
+
+
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_8(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_ADD_8(type, ret, ptr, val, RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_ADD_16(type, ret, ptr, val, RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_ADD_32(type, ret, ptr, val, RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_ADD_64(type, ret, ptr, val, RELEASE)
+
+
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_8(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_ADD_8(type, ret, ptr, val, ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_ADD_16(type, ret, ptr, val, ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_ADD_32(type, ret, ptr, val, ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_ADD_64(type, ret, ptr, val, ACQ_REL)
+
+
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_8(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_ADD_8(type, ret, ptr, val, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_ADD_16(type, ret, ptr, val, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_ADD_32(type, ret, ptr, val, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_ADD_64(type, ret, ptr, val, SEQ_CST)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_ADD_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_and.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_and.h
new file mode 100644
index 00000000..96f78942
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_and.h
@@ -0,0 +1,118 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_AND_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_AND_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+#if defined(EA_PROCESSOR_X86_64)
+
+	#define EASTL_MSVC_ATOMIC_FETCH_AND_INTRIN_8 _InterlockedAnd8_np
+	#define EASTL_MSVC_ATOMIC_FETCH_AND_INTRIN_16 _InterlockedAnd16_np
+	#define EASTL_MSVC_ATOMIC_FETCH_AND_INTRIN_32 _InterlockedAnd_np
+	#define EASTL_MSVC_ATOMIC_FETCH_AND_INTRIN_64 _InterlockedAnd64_np
+
+#else
+
+	#define EASTL_MSVC_ATOMIC_FETCH_AND_INTRIN_8 _InterlockedAnd8
+	#define EASTL_MSVC_ATOMIC_FETCH_AND_INTRIN_16 _InterlockedAnd16
+	#define EASTL_MSVC_ATOMIC_FETCH_AND_INTRIN_32 _InterlockedAnd
+	#define EASTL_MSVC_ATOMIC_FETCH_AND_INTRIN_64 _InterlockedAnd64
+
+#endif
+
+
+#define EASTL_MSVC_ATOMIC_FETCH_AND_N(integralType, andIntrinsic, type, ret, ptr, val, MemoryOrder) \
+	EASTL_MSVC_ATOMIC_FETCH_OP_N(integralType, andIntrinsic, type, ret, ptr, val, MemoryOrder, \
+								 EASTL_MSVC_NOP_PRE_INTRIN_COMPUTE)
+
+
+#define EASTL_MSVC_ATOMIC_FETCH_AND_8(type, ret, ptr, val, MemoryOrder)	\
+	EASTL_MSVC_ATOMIC_FETCH_AND_N(char, EASTL_MSVC_ATOMIC_FETCH_AND_INTRIN_8, type, ret, ptr, val, MemoryOrder)
+
+#define EASTL_MSVC_ATOMIC_FETCH_AND_16(type, ret, ptr, val, MemoryOrder) \
+	EASTL_MSVC_ATOMIC_FETCH_AND_N(short, EASTL_MSVC_ATOMIC_FETCH_AND_INTRIN_16, type, ret, ptr, val, MemoryOrder)
+
+#define EASTL_MSVC_ATOMIC_FETCH_AND_32(type, ret, ptr, val, MemoryOrder) \
+	EASTL_MSVC_ATOMIC_FETCH_AND_N(long, EASTL_MSVC_ATOMIC_FETCH_AND_INTRIN_32, type, ret, ptr, val, MemoryOrder)
+
+#define EASTL_MSVC_ATOMIC_FETCH_AND_64(type, ret, ptr, val, MemoryOrder) \
+	EASTL_MSVC_ATOMIC_FETCH_AND_N(__int64, EASTL_MSVC_ATOMIC_FETCH_AND_INTRIN_64, type, ret, ptr, val, MemoryOrder)
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_FETCH_AND_*_N(type, type ret, type * ptr, type val)
+//
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_8(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_AND_8(type, ret, ptr, val, RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_AND_16(type, ret, ptr, val, RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_AND_32(type, ret, ptr, val, RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_AND_64(type, ret, ptr, val, RELAXED)
+
+
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_8(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_AND_8(type, ret, ptr, val, ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_AND_16(type, ret, ptr, val, ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_AND_32(type, ret, ptr, val, ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_AND_64(type, ret, ptr, val, ACQUIRE)
+
+
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_8(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_AND_8(type, ret, ptr, val, RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_AND_16(type, ret, ptr, val, RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_AND_32(type, ret, ptr, val, RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_AND_64(type, ret, ptr, val, RELEASE)
+
+
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_8(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_AND_8(type, ret, ptr, val, ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_AND_16(type, ret, ptr, val, ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_AND_32(type, ret, ptr, val, ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_AND_64(type, ret, ptr, val, ACQ_REL)
+
+
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_8(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_AND_8(type, ret, ptr, val, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_AND_16(type, ret, ptr, val, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_AND_32(type, ret, ptr, val, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_AND_64(type, ret, ptr, val, SEQ_CST)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_AND_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_or.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_or.h
new file mode 100644
index 00000000..2792fc3d
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_or.h
@@ -0,0 +1,118 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_OR_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_OR_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+#if defined(EA_PROCESSOR_X86_64)
+
+	#define EASTL_MSVC_ATOMIC_FETCH_OR_INTRIN_8 _InterlockedOr8_np
+	#define EASTL_MSVC_ATOMIC_FETCH_OR_INTRIN_16 _InterlockedOr16_np
+	#define EASTL_MSVC_ATOMIC_FETCH_OR_INTRIN_32 _InterlockedOr_np
+	#define EASTL_MSVC_ATOMIC_FETCH_OR_INTRIN_64 _InterlockedOr64_np
+
+#else
+
+	#define EASTL_MSVC_ATOMIC_FETCH_OR_INTRIN_8 _InterlockedOr8
+	#define EASTL_MSVC_ATOMIC_FETCH_OR_INTRIN_16 _InterlockedOr16
+	#define EASTL_MSVC_ATOMIC_FETCH_OR_INTRIN_32 _InterlockedOr
+	#define EASTL_MSVC_ATOMIC_FETCH_OR_INTRIN_64 _InterlockedOr64
+
+#endif
+
+
+#define EASTL_MSVC_ATOMIC_FETCH_OR_N(integralType, orIntrinsic, type, ret, ptr, val, MemoryOrder) \
+	EASTL_MSVC_ATOMIC_FETCH_OP_N(integralType, orIntrinsic, type, ret, ptr, val, MemoryOrder, \
+								 EASTL_MSVC_NOP_PRE_INTRIN_COMPUTE)
+
+
+#define EASTL_MSVC_ATOMIC_FETCH_OR_8(type, ret, ptr, val, MemoryOrder)	\
+	EASTL_MSVC_ATOMIC_FETCH_OR_N(char, EASTL_MSVC_ATOMIC_FETCH_OR_INTRIN_8, type, ret, ptr, val, MemoryOrder)
+
+#define EASTL_MSVC_ATOMIC_FETCH_OR_16(type, ret, ptr, val, MemoryOrder)	\
+	EASTL_MSVC_ATOMIC_FETCH_OR_N(short, EASTL_MSVC_ATOMIC_FETCH_OR_INTRIN_16, type, ret, ptr, val, MemoryOrder)
+
+#define EASTL_MSVC_ATOMIC_FETCH_OR_32(type, ret, ptr, val, MemoryOrder)	\
+	EASTL_MSVC_ATOMIC_FETCH_OR_N(long, EASTL_MSVC_ATOMIC_FETCH_OR_INTRIN_32, type, ret, ptr, val, MemoryOrder)
+
+#define EASTL_MSVC_ATOMIC_FETCH_OR_64(type, ret, ptr, val, MemoryOrder)	\
+	EASTL_MSVC_ATOMIC_FETCH_OR_N(long long, EASTL_MSVC_ATOMIC_FETCH_OR_INTRIN_64, type, ret, ptr, val, MemoryOrder)
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_FETCH_OR_*_N(type, type ret, type * ptr, type val)
+//
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_8(type, ret, ptr, val) \
+	EASTL_MSVC_ATOMIC_FETCH_OR_8(type, ret, ptr, val, RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_16(type, ret, ptr, val) \
+	EASTL_MSVC_ATOMIC_FETCH_OR_16(type, ret, ptr, val, RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_32(type, ret, ptr, val) \
+	EASTL_MSVC_ATOMIC_FETCH_OR_32(type, ret, ptr, val, RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_64(type, ret, ptr, val) \
+	EASTL_MSVC_ATOMIC_FETCH_OR_64(type, ret, ptr, val, RELAXED)
+
+
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_8(type, ret, ptr, val) \
+	EASTL_MSVC_ATOMIC_FETCH_OR_8(type, ret, ptr, val, ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_OR_16(type, ret, ptr, val, ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_OR_32(type, ret, ptr, val, ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_OR_64(type, ret, ptr, val, ACQUIRE)
+
+
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_8(type, ret, ptr, val) \
+	EASTL_MSVC_ATOMIC_FETCH_OR_8(type, ret, ptr, val, RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_OR_16(type, ret, ptr, val, RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_OR_32(type, ret, ptr, val, RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_OR_64(type, ret, ptr, val, RELEASE)
+
+
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_8(type, ret, ptr, val) \
+	EASTL_MSVC_ATOMIC_FETCH_OR_8(type, ret, ptr, val, ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_OR_16(type, ret, ptr, val, ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_OR_32(type, ret, ptr, val, ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_OR_64(type, ret, ptr, val, ACQ_REL)
+
+
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_8(type, ret, ptr, val) \
+	EASTL_MSVC_ATOMIC_FETCH_OR_8(type, ret, ptr, val, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_OR_16(type, ret, ptr, val, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_OR_32(type, ret, ptr, val, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_OR_64(type, ret, ptr, val, SEQ_CST)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_OR_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_sub.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_sub.h
new file mode 100644
index 00000000..6d5d9e3a
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_sub.h
@@ -0,0 +1,104 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_SUB_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_SUB_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+#define EASTL_MSVC_FETCH_SUB_PRE_INTRIN_COMPUTE(ret, val) \
+	ret = EASTL_ATOMIC_NEGATE_OPERAND((val))
+
+#define EASTL_MSVC_ATOMIC_FETCH_SUB_N(integralType, subIntrinsic, type, ret, ptr, val, MemoryOrder) \
+	EASTL_MSVC_ATOMIC_FETCH_OP_N(integralType, subIntrinsic, type, ret, ptr, val, MemoryOrder, \
+								 EASTL_MSVC_FETCH_SUB_PRE_INTRIN_COMPUTE)
+
+
+#define EASTL_MSVC_ATOMIC_FETCH_SUB_8(type, ret, ptr, val, MemoryOrder)	\
+	EASTL_MSVC_ATOMIC_FETCH_SUB_N(char, _InterlockedExchangeAdd8, type, ret, ptr, val, MemoryOrder)
+
+#define EASTL_MSVC_ATOMIC_FETCH_SUB_16(type, ret, ptr, val, MemoryOrder) \
+	EASTL_MSVC_ATOMIC_FETCH_SUB_N(short, _InterlockedExchangeAdd16, type, ret, ptr, val, MemoryOrder)
+
+#define EASTL_MSVC_ATOMIC_FETCH_SUB_32(type, ret, ptr, val, MemoryOrder) \
+	EASTL_MSVC_ATOMIC_FETCH_SUB_N(long, _InterlockedExchangeAdd, type, ret, ptr, val, MemoryOrder)
+
+#define EASTL_MSVC_ATOMIC_FETCH_SUB_64(type, ret, ptr, val, MemoryOrder) \
+	EASTL_MSVC_ATOMIC_FETCH_SUB_N(__int64, _InterlockedExchangeAdd64, type, ret, ptr, val, MemoryOrder)
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_FETCH_SUB_*_N(type, type ret, type * ptr, type val)
+//
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_8(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_SUB_8(type, ret, ptr, val, RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_SUB_16(type, ret, ptr, val, RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_SUB_32(type, ret, ptr, val, RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_SUB_64(type, ret, ptr, val, RELAXED)
+
+
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_8(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_SUB_8(type, ret, ptr, val, ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_SUB_16(type, ret, ptr, val, ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_SUB_32(type, ret, ptr, val, ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_SUB_64(type, ret, ptr, val, ACQUIRE)
+
+
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_8(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_SUB_8(type, ret, ptr, val, RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_SUB_16(type, ret, ptr, val, RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_SUB_32(type, ret, ptr, val, RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_SUB_64(type, ret, ptr, val, RELEASE)
+
+
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_8(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_SUB_8(type, ret, ptr, val, ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_SUB_16(type, ret, ptr, val, ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_SUB_32(type, ret, ptr, val, ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_SUB_64(type, ret, ptr, val, ACQ_REL)
+
+
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_8(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_SUB_8(type, ret, ptr, val, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_SUB_16(type, ret, ptr, val, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_SUB_32(type, ret, ptr, val, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_SUB_64(type, ret, ptr, val, SEQ_CST)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_SUB_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_xor.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_xor.h
new file mode 100644
index 00000000..371153e9
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_xor.h
@@ -0,0 +1,118 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_XOR_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_XOR_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+#if defined(EA_PROCESSOR_X86_64)
+
+	#define EASTL_MSVC_ATOMIC_FETCH_XOR_INTRIN_8 _InterlockedXor8_np
+	#define EASTL_MSVC_ATOMIC_FETCH_XOR_INTRIN_16 _InterlockedXor16_np
+	#define EASTL_MSVC_ATOMIC_FETCH_XOR_INTRIN_32 _InterlockedXor_np
+	#define EASTL_MSVC_ATOMIC_FETCH_XOR_INTRIN_64 _InterlockedXor64_np
+
+#else
+
+	#define EASTL_MSVC_ATOMIC_FETCH_XOR_INTRIN_8 _InterlockedXor8
+	#define EASTL_MSVC_ATOMIC_FETCH_XOR_INTRIN_16 _InterlockedXor16
+	#define EASTL_MSVC_ATOMIC_FETCH_XOR_INTRIN_32 _InterlockedXor
+	#define EASTL_MSVC_ATOMIC_FETCH_XOR_INTRIN_64 _InterlockedXor64
+
+#endif
+
+
+#define EASTL_MSVC_ATOMIC_FETCH_XOR_N(integralType, xorIntrinsic, type, ret, ptr, val, MemoryOrder) \
+	EASTL_MSVC_ATOMIC_FETCH_OP_N(integralType, xorIntrinsic, type, ret, ptr, val, MemoryOrder, \
+								 EASTL_MSVC_NOP_PRE_INTRIN_COMPUTE)
+
+
+#define EASTL_MSVC_ATOMIC_FETCH_XOR_8(type, ret, ptr, val, MemoryOrder)	\
+	EASTL_MSVC_ATOMIC_FETCH_XOR_N(char, EASTL_MSVC_ATOMIC_FETCH_XOR_INTRIN_8, type, ret, ptr, val, MemoryOrder)
+
+#define EASTL_MSVC_ATOMIC_FETCH_XOR_16(type, ret, ptr, val, MemoryOrder) \
+	EASTL_MSVC_ATOMIC_FETCH_XOR_N(short, EASTL_MSVC_ATOMIC_FETCH_XOR_INTRIN_16, type, ret, ptr, val, MemoryOrder)
+
+#define EASTL_MSVC_ATOMIC_FETCH_XOR_32(type, ret, ptr, val, MemoryOrder) \
+	EASTL_MSVC_ATOMIC_FETCH_XOR_N(long, EASTL_MSVC_ATOMIC_FETCH_XOR_INTRIN_32, type, ret, ptr, val, MemoryOrder)
+
+#define EASTL_MSVC_ATOMIC_FETCH_XOR_64(type, ret, ptr, val, MemoryOrder) \
+	EASTL_MSVC_ATOMIC_FETCH_XOR_N(__int64, EASTL_MSVC_ATOMIC_FETCH_XOR_INTRIN_64, type, ret, ptr, val, MemoryOrder)
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_FETCH_XOR_*_N(type, type ret, type * ptr, type val)
+//
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_8(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_XOR_8(type, ret, ptr, val, RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_XOR_16(type, ret, ptr, val, RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_XOR_32(type, ret, ptr, val, RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_XOR_64(type, ret, ptr, val, RELAXED)
+
+
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_8(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_XOR_8(type, ret, ptr, val, ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_XOR_16(type, ret, ptr, val, ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_XOR_32(type, ret, ptr, val, ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_XOR_64(type, ret, ptr, val, ACQUIRE)
+
+
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_8(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_XOR_8(type, ret, ptr, val, RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_XOR_16(type, ret, ptr, val, RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_XOR_32(type, ret, ptr, val, RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_XOR_64(type, ret, ptr, val, RELEASE)
+
+
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_8(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_XOR_8(type, ret, ptr, val, ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_XOR_16(type, ret, ptr, val, ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_XOR_32(type, ret, ptr, val, ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_XOR_64(type, ret, ptr, val, ACQ_REL)
+
+
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_8(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_XOR_8(type, ret, ptr, val, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_XOR_16(type, ret, ptr, val, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_XOR_32(type, ret, ptr, val, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_FETCH_XOR_64(type, ret, ptr, val, SEQ_CST)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_XOR_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_or_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_or_fetch.h
new file mode 100644
index 00000000..c5b5fac3
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_or_fetch.h
@@ -0,0 +1,121 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_OR_FETCH_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_OR_FETCH_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+#if defined(EA_PROCESSOR_X86_64)
+
+	#define EASTL_MSVC_ATOMIC_OR_FETCH_INTRIN_8 _InterlockedOr8_np
+	#define EASTL_MSVC_ATOMIC_OR_FETCH_INTRIN_16 _InterlockedOr16_np
+	#define EASTL_MSVC_ATOMIC_OR_FETCH_INTRIN_32 _InterlockedOr_np
+	#define EASTL_MSVC_ATOMIC_OR_FETCH_INTRIN_64 _InterlockedOr64_np
+
+#else
+
+	#define EASTL_MSVC_ATOMIC_OR_FETCH_INTRIN_8 _InterlockedOr8
+	#define EASTL_MSVC_ATOMIC_OR_FETCH_INTRIN_16 _InterlockedOr16
+	#define EASTL_MSVC_ATOMIC_OR_FETCH_INTRIN_32 _InterlockedOr
+	#define EASTL_MSVC_ATOMIC_OR_FETCH_INTRIN_64 _InterlockedOr64
+
+#endif
+
+
+#define EASTL_MSVC_OR_FETCH_POST_INTRIN_COMPUTE(ret, val, orend)	\
+	ret = (val) | (orend)
+
+#define EASTL_MSVC_ATOMIC_OR_FETCH_N(integralType, orIntrinsic, type, ret, ptr, val, MemoryOrder) \
+	EASTL_MSVC_ATOMIC_OP_FETCH_N(integralType, orIntrinsic, type, ret, ptr, val, MemoryOrder, \
+								 EASTL_MSVC_NOP_PRE_INTRIN_COMPUTE, EASTL_MSVC_OR_FETCH_POST_INTRIN_COMPUTE)
+
+
+#define EASTL_MSVC_ATOMIC_OR_FETCH_8(type, ret, ptr, val, MemoryOrder)	\
+	EASTL_MSVC_ATOMIC_OR_FETCH_N(char, EASTL_MSVC_ATOMIC_OR_FETCH_INTRIN_8, type, ret, ptr, val, MemoryOrder)
+
+#define EASTL_MSVC_ATOMIC_OR_FETCH_16(type, ret, ptr, val, MemoryOrder)	\
+	EASTL_MSVC_ATOMIC_OR_FETCH_N(short, EASTL_MSVC_ATOMIC_OR_FETCH_INTRIN_16, type, ret, ptr, val, MemoryOrder)
+
+#define EASTL_MSVC_ATOMIC_OR_FETCH_32(type, ret, ptr, val, MemoryOrder)	\
+	EASTL_MSVC_ATOMIC_OR_FETCH_N(long, EASTL_MSVC_ATOMIC_OR_FETCH_INTRIN_32, type, ret, ptr, val, MemoryOrder)
+
+#define EASTL_MSVC_ATOMIC_OR_FETCH_64(type, ret, ptr, val, MemoryOrder)	\
+	EASTL_MSVC_ATOMIC_OR_FETCH_N(__int64, EASTL_MSVC_ATOMIC_OR_FETCH_INTRIN_64, type, ret, ptr, val, MemoryOrder)
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_OR_FETCH_*_N(type, type ret, type * ptr, type val)
+//
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_8(type, ret, ptr, val) \
+	EASTL_MSVC_ATOMIC_OR_FETCH_8(type, ret, ptr, val, RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_16(type, ret, ptr, val) \
+	EASTL_MSVC_ATOMIC_OR_FETCH_16(type, ret, ptr, val, RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_32(type, ret, ptr, val) \
+	EASTL_MSVC_ATOMIC_OR_FETCH_32(type, ret, ptr, val, RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_64(type, ret, ptr, val) \
+	EASTL_MSVC_ATOMIC_OR_FETCH_64(type, ret, ptr, val, RELAXED)
+
+
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_8(type, ret, ptr, val) \
+	EASTL_MSVC_ATOMIC_OR_FETCH_8(type, ret, ptr, val, ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_OR_FETCH_16(type, ret, ptr, val, ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_OR_FETCH_32(type, ret, ptr, val, ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_OR_FETCH_64(type, ret, ptr, val, ACQUIRE)
+
+
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_8(type, ret, ptr, val) \
+	EASTL_MSVC_ATOMIC_OR_FETCH_8(type, ret, ptr, val, RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_OR_FETCH_16(type, ret, ptr, val, RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_OR_FETCH_32(type, ret, ptr, val, RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_OR_FETCH_64(type, ret, ptr, val, RELEASE)
+
+
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_8(type, ret, ptr, val) \
+	EASTL_MSVC_ATOMIC_OR_FETCH_8(type, ret, ptr, val, ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_OR_FETCH_16(type, ret, ptr, val, ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_OR_FETCH_32(type, ret, ptr, val, ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_OR_FETCH_64(type, ret, ptr, val, ACQ_REL)
+
+
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_8(type, ret, ptr, val) \
+	EASTL_MSVC_ATOMIC_OR_FETCH_8(type, ret, ptr, val, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_OR_FETCH_16(type, ret, ptr, val, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_OR_FETCH_32(type, ret, ptr, val, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_OR_FETCH_64(type, ret, ptr, val, SEQ_CST)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_OR_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_signal_fence.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_signal_fence.h
new file mode 100644
index 00000000..f35f5772
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_signal_fence.h
@@ -0,0 +1,34 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_SIGNAL_FENCE_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_SIGNAL_FENCE_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_*()
+//
+#define EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_RELAXED()	\
+	EASTL_ATOMIC_COMPILER_BARRIER()
+
+#define EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_ACQUIRE()	\
+	EASTL_ATOMIC_COMPILER_BARRIER()
+
+#define EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_RELEASE()	\
+	EASTL_ATOMIC_COMPILER_BARRIER()
+
+#define EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_ACQ_REL()	\
+	EASTL_ATOMIC_COMPILER_BARRIER()
+
+#define EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_SEQ_CST()	\
+	EASTL_ATOMIC_COMPILER_BARRIER()
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_SIGNAL_FENCE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_sub_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_sub_fetch.h
new file mode 100644
index 00000000..6fb61e29
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_sub_fetch.h
@@ -0,0 +1,107 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_SUB_FETCH_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_SUB_FETCH_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+#define EASTL_MSVC_SUB_FETCH_PRE_INTRIN_COMPUTE(ret, val) \
+	ret = EASTL_ATOMIC_NEGATE_OPERAND((val))
+
+#define EASTL_MSVC_SUB_FETCH_POST_INTRIN_COMPUTE(ret, val, subend)	\
+	ret = (val) - (subend)
+
+#define EASTL_MSVC_ATOMIC_SUB_FETCH_N(integralType, subIntrinsic, type, ret, ptr, val, MemoryOrder) \
+	EASTL_MSVC_ATOMIC_OP_FETCH_N(integralType, subIntrinsic, type, ret, ptr, val, MemoryOrder, \
+								 EASTL_MSVC_SUB_FETCH_PRE_INTRIN_COMPUTE, EASTL_MSVC_SUB_FETCH_POST_INTRIN_COMPUTE)
+
+
+#define EASTL_MSVC_ATOMIC_SUB_FETCH_8(type, ret, ptr, val, MemoryOrder)	\
+	EASTL_MSVC_ATOMIC_SUB_FETCH_N(char, _InterlockedExchangeAdd8, type, ret, ptr, val, MemoryOrder)
+
+#define EASTL_MSVC_ATOMIC_SUB_FETCH_16(type, ret, ptr, val, MemoryOrder) \
+	EASTL_MSVC_ATOMIC_SUB_FETCH_N(short, _InterlockedExchangeAdd16, type, ret, ptr, val, MemoryOrder)
+
+#define EASTL_MSVC_ATOMIC_SUB_FETCH_32(type, ret, ptr, val, MemoryOrder) \
+	EASTL_MSVC_ATOMIC_SUB_FETCH_N(long, _InterlockedExchangeAdd, type, ret, ptr, val, MemoryOrder)
+
+#define EASTL_MSVC_ATOMIC_SUB_FETCH_64(type, ret, ptr, val, MemoryOrder) \
+	EASTL_MSVC_ATOMIC_SUB_FETCH_N(__int64, _InterlockedExchangeAdd64, type, ret, ptr, val, MemoryOrder)
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_SUB_FETCH_*_N(type, type ret, type * ptr, type val)
+//
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_8(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_SUB_FETCH_8(type, ret, ptr, val, RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_SUB_FETCH_16(type, ret, ptr, val, RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_SUB_FETCH_32(type, ret, ptr, val, RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_SUB_FETCH_64(type, ret, ptr, val, RELAXED)
+
+
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_8(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_SUB_FETCH_8(type, ret, ptr, val, ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_SUB_FETCH_16(type, ret, ptr, val, ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_SUB_FETCH_32(type, ret, ptr, val, ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_SUB_FETCH_64(type, ret, ptr, val, ACQUIRE)
+
+
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_8(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_SUB_FETCH_8(type, ret, ptr, val, RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_SUB_FETCH_16(type, ret, ptr, val, RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_SUB_FETCH_32(type, ret, ptr, val, RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_SUB_FETCH_64(type, ret, ptr, val, RELEASE)
+
+
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_8(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_SUB_FETCH_8(type, ret, ptr, val, ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_SUB_FETCH_16(type, ret, ptr, val, ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_SUB_FETCH_32(type, ret, ptr, val, ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_SUB_FETCH_64(type, ret, ptr, val, ACQ_REL)
+
+
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_8(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_SUB_FETCH_8(type, ret, ptr, val, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_SUB_FETCH_16(type, ret, ptr, val, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_SUB_FETCH_32(type, ret, ptr, val, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_SUB_FETCH_64(type, ret, ptr, val, SEQ_CST)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_SUB_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_xor_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_xor_fetch.h
new file mode 100644
index 00000000..44ffff90
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_xor_fetch.h
@@ -0,0 +1,121 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_XOR_FETCH_H
+#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_XOR_FETCH_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+#if defined(EA_PROCESSOR_X86_64)
+
+	#define EASTL_MSVC_ATOMIC_XOR_FETCH_INTRIN_8 _InterlockedXor8_np
+	#define EASTL_MSVC_ATOMIC_XOR_FETCH_INTRIN_16 _InterlockedXor16_np
+	#define EASTL_MSVC_ATOMIC_XOR_FETCH_INTRIN_32 _InterlockedXor_np
+	#define EASTL_MSVC_ATOMIC_XOR_FETCH_INTRIN_64 _InterlockedXor64_np
+
+#else
+
+	#define EASTL_MSVC_ATOMIC_XOR_FETCH_INTRIN_8 _InterlockedXor8
+	#define EASTL_MSVC_ATOMIC_XOR_FETCH_INTRIN_16 _InterlockedXor16
+	#define EASTL_MSVC_ATOMIC_XOR_FETCH_INTRIN_32 _InterlockedXor
+	#define EASTL_MSVC_ATOMIC_XOR_FETCH_INTRIN_64 _InterlockedXor64
+
+#endif
+
+
+#define EASTL_MSVC_XOR_FETCH_POST_INTRIN_COMPUTE(ret, val, xorend)	\
+	ret = (val) ^ (xorend)
+
+#define EASTL_MSVC_ATOMIC_XOR_FETCH_N(integralType, xorIntrinsic, type, ret, ptr, val, MemoryOrder) \
+	EASTL_MSVC_ATOMIC_OP_FETCH_N(integralType, xorIntrinsic, type, ret, ptr, val, MemoryOrder, \
+								 EASTL_MSVC_NOP_PRE_INTRIN_COMPUTE, EASTL_MSVC_XOR_FETCH_POST_INTRIN_COMPUTE)
+
+
+#define EASTL_MSVC_ATOMIC_XOR_FETCH_8(type, ret, ptr, val, MemoryOrder)	\
+	EASTL_MSVC_ATOMIC_XOR_FETCH_N(char, EASTL_MSVC_ATOMIC_XOR_FETCH_INTRIN_8, type, ret, ptr, val, MemoryOrder)
+
+#define EASTL_MSVC_ATOMIC_XOR_FETCH_16(type, ret, ptr, val, MemoryOrder) \
+	EASTL_MSVC_ATOMIC_XOR_FETCH_N(short, EASTL_MSVC_ATOMIC_XOR_FETCH_INTRIN_16, type, ret, ptr, val, MemoryOrder)
+
+#define EASTL_MSVC_ATOMIC_XOR_FETCH_32(type, ret, ptr, val, MemoryOrder) \
+	EASTL_MSVC_ATOMIC_XOR_FETCH_N(long, EASTL_MSVC_ATOMIC_XOR_FETCH_INTRIN_32, type, ret, ptr, val, MemoryOrder)
+
+#define EASTL_MSVC_ATOMIC_XOR_FETCH_64(type, ret, ptr, val, MemoryOrder) \
+	EASTL_MSVC_ATOMIC_XOR_FETCH_N(__int64, EASTL_MSVC_ATOMIC_XOR_FETCH_INTRIN_64, type, ret, ptr, val, MemoryOrder)
+
+
+/////////////////////////////////////////////////////////////////////////////////
+//
+// void EASTL_COMPILER_ATOMIC_XOR_FETCH_*_N(type, type ret, type * ptr, type val)
+//
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_8(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_XOR_FETCH_8(type, ret, ptr, val, RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_XOR_FETCH_16(type, ret, ptr, val, RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_XOR_FETCH_32(type, ret, ptr, val, RELAXED)
+
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_XOR_FETCH_64(type, ret, ptr, val, RELAXED)
+
+
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_8(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_XOR_FETCH_8(type, ret, ptr, val, ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_XOR_FETCH_16(type, ret, ptr, val, ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_XOR_FETCH_32(type, ret, ptr, val, ACQUIRE)
+
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_XOR_FETCH_64(type, ret, ptr, val, ACQUIRE)
+
+
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_8(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_XOR_FETCH_8(type, ret, ptr, val, RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_XOR_FETCH_16(type, ret, ptr, val, RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_XOR_FETCH_32(type, ret, ptr, val, RELEASE)
+
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_XOR_FETCH_64(type, ret, ptr, val, RELEASE)
+
+
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_8(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_XOR_FETCH_8(type, ret, ptr, val, ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_XOR_FETCH_16(type, ret, ptr, val, ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_XOR_FETCH_32(type, ret, ptr, val, ACQ_REL)
+
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_XOR_FETCH_64(type, ret, ptr, val, ACQ_REL)
+
+
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_8(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_XOR_FETCH_8(type, ret, ptr, val, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_16(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_XOR_FETCH_16(type, ret, ptr, val, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_32(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_XOR_FETCH_32(type, ret, ptr, val, SEQ_CST)
+
+#define EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_64(type, ret, ptr, val)	\
+	EASTL_MSVC_ATOMIC_XOR_FETCH_64(type, ret, ptr, val, SEQ_CST)
+
+
+#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_XOR_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/char_traits.h b/libkram/eastl/include/EASTL/internal/char_traits.h
new file mode 100644
index 00000000..62fe79b9
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/char_traits.h
@@ -0,0 +1,464 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// This file implements similar functionality to char_traits which is part of
+// the C++ standard STL library specification. This is intended for internal
+// EASTL use only.  Functionality can be accessed through the eastl::string or
+// eastl::string_view types.  
+//
+// http://en.cppreference.com/w/cpp/string/char_traits
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_CHAR_TRAITS_H
+#define EASTL_CHAR_TRAITS_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+#include <EASTL/internal/config.h>
+#include <EASTL/type_traits.h>
+
+EA_DISABLE_ALL_VC_WARNINGS()
+#include <ctype.h>              // toupper, etc.
+#include <string.h>             // memset, etc.
+EA_RESTORE_ALL_VC_WARNINGS()
+
+namespace eastl
+{
+	///////////////////////////////////////////////////////////////////////////////
+	/// DecodePart
+	///
+	/// These implement UTF8/UCS2/UCS4 encoding/decoding.
+	///
+	EASTL_API bool DecodePart(const char*& pSrc, const char* pSrcEnd, char*&     pDest, char*     pDestEnd);
+	EASTL_API bool DecodePart(const char*& pSrc, const char* pSrcEnd, char16_t*& pDest, char16_t* pDestEnd);
+	EASTL_API bool DecodePart(const char*& pSrc, const char* pSrcEnd, char32_t*& pDest, char32_t* pDestEnd);
+
+	EASTL_API bool DecodePart(const char16_t*& pSrc, const char16_t* pSrcEnd, char*&     pDest, char*     pDestEnd);
+	EASTL_API bool DecodePart(const char16_t*& pSrc, const char16_t* pSrcEnd, char16_t*& pDest, char16_t* pDestEnd);
+	EASTL_API bool DecodePart(const char16_t*& pSrc, const char16_t* pSrcEnd, char32_t*& pDest, char32_t* pDestEnd);
+
+	EASTL_API bool DecodePart(const char32_t*& pSrc, const char32_t* pSrcEnd, char*&     pDest, char*     pDestEnd);
+	EASTL_API bool DecodePart(const char32_t*& pSrc, const char32_t* pSrcEnd, char16_t*& pDest, char16_t* pDestEnd);
+	EASTL_API bool DecodePart(const char32_t*& pSrc, const char32_t* pSrcEnd, char32_t*& pDest, char32_t* pDestEnd);
+
+	EASTL_API bool DecodePart(const int*& pSrc, const int* pSrcEnd, char*&     pDest, char*     pDestEnd);
+	EASTL_API bool DecodePart(const int*& pSrc, const int* pSrcEnd, char16_t*& pDest, char16_t* pDestEnd);
+	EASTL_API bool DecodePart(const int*& pSrc, const int* pSrcEnd, char32_t*& pDest, char32_t* pDestEnd);
+
+	#if EA_CHAR8_UNIQUE
+		bool DecodePart(const char8_t*& pSrc, const char8_t* pSrcEnd, char8_t*&  pDest, char8_t*  pDestEnd);
+
+		bool DecodePart(const char8_t*& pSrc, const char8_t* pSrcEnd, char*&     pDest, char*     pDestEnd);
+		bool DecodePart(const char8_t*& pSrc, const char8_t* pSrcEnd, char16_t*& pDest, char16_t* pDestEnd);
+		bool DecodePart(const char8_t*& pSrc, const char8_t* pSrcEnd, char32_t*& pDest, char32_t* pDestEnd);
+
+		bool DecodePart(const char*&     pSrc, const char*     pSrcEnd, char8_t*& pDest, char8_t* pDestEnd);
+		bool DecodePart(const char16_t*& pSrc, const char16_t* pSrcEnd, char8_t*& pDest, char8_t* pDestEnd);
+		bool DecodePart(const char32_t*& pSrc, const char32_t* pSrcEnd, char8_t*& pDest, char8_t* pDestEnd);
+	#endif
+
+	#if EA_WCHAR_UNIQUE
+		bool DecodePart(const wchar_t*& pSrc, const wchar_t* pSrcEnd, wchar_t*&  pDest, wchar_t*  pDestEnd);
+
+		bool DecodePart(const wchar_t*& pSrc, const wchar_t* pSrcEnd, char*&     pDest, char*     pDestEnd);
+		bool DecodePart(const wchar_t*& pSrc, const wchar_t* pSrcEnd, char16_t*& pDest, char16_t* pDestEnd);
+		bool DecodePart(const wchar_t*& pSrc, const wchar_t* pSrcEnd, char32_t*& pDest, char32_t* pDestEnd);
+
+		bool DecodePart(const char*&     pSrc, const char*     pSrcEnd, wchar_t*& pDest, wchar_t* pDestEnd);
+		bool DecodePart(const char16_t*& pSrc, const char16_t* pSrcEnd, wchar_t*& pDest, wchar_t* pDestEnd);
+		bool DecodePart(const char32_t*& pSrc, const char32_t* pSrcEnd, wchar_t*& pDest, wchar_t* pDestEnd);
+	#endif
+
+	#if EA_CHAR8_UNIQUE && EA_WCHAR_UNIQUE
+		bool DecodePart(const char8_t*& pSrc, const char8_t* pSrcEnd, wchar_t*& pDest, wchar_t* pDestEnd);
+		bool DecodePart(const wchar_t*& pSrc, const wchar_t* pSrcEnd, char8_t*& pDest, char8_t* pDestEnd);
+	#endif
+
+
+	#if EA_WCHAR_UNIQUE
+		inline bool DecodePart(const wchar_t*& pSrc, const wchar_t* pSrcEnd, wchar_t*& pDest, wchar_t* pDestEnd)
+		{
+			return DecodePart(reinterpret_cast<const char*&>(pSrc), reinterpret_cast<const char*>(pSrcEnd), reinterpret_cast<char*&>(pDest), reinterpret_cast<char*&>(pDestEnd));
+		}
+
+		inline bool DecodePart(const wchar_t*& pSrc, const wchar_t* pSrcEnd, char*& pDest, char* pDestEnd)
+		{
+		#if (EA_WCHAR_SIZE == 2)
+			return DecodePart(reinterpret_cast<const char16_t*&>(pSrc), reinterpret_cast<const char16_t*>(pSrcEnd), pDest, pDestEnd);
+		#elif (EA_WCHAR_SIZE == 4)
+			return DecodePart(reinterpret_cast<const char32_t*&>(pSrc), reinterpret_cast<const char32_t*>(pSrcEnd), pDest, pDestEnd);
+		#endif
+		}
+
+		inline bool DecodePart(const wchar_t*& pSrc, const wchar_t* pSrcEnd, char16_t*& pDest, char16_t* pDestEnd)
+		{
+		#if (EA_WCHAR_SIZE == 2)
+			return DecodePart(reinterpret_cast<const char16_t*&>(pSrc), reinterpret_cast<const char16_t*>(pSrcEnd), pDest, pDestEnd);
+		#elif (EA_WCHAR_SIZE == 4)
+			return DecodePart(reinterpret_cast<const char32_t*&>(pSrc), reinterpret_cast<const char32_t*>(pSrcEnd), pDest, pDestEnd);
+		#endif
+		}
+
+		inline bool DecodePart(const wchar_t*& pSrc, const wchar_t* pSrcEnd, char32_t*& pDest, char32_t* pDestEnd)
+		{
+		#if (EA_WCHAR_SIZE == 2)
+			return DecodePart(reinterpret_cast<const char16_t*&>(pSrc), reinterpret_cast<const char16_t*>(pSrcEnd), pDest, pDestEnd);
+		#elif (EA_WCHAR_SIZE == 4)
+			return DecodePart(reinterpret_cast<const char32_t*&>(pSrc), reinterpret_cast<const char32_t*>(pSrcEnd), pDest, pDestEnd);
+		#endif
+		}
+
+		inline bool DecodePart(const char*& pSrc, const char* pSrcEnd, wchar_t*& pDest, wchar_t* pDestEnd)
+		{
+		#if (EA_WCHAR_SIZE == 2)
+			return DecodePart(pSrc, pSrcEnd, reinterpret_cast<char16_t*&>(pDest), reinterpret_cast<char16_t*>(pDestEnd));
+		#elif (EA_WCHAR_SIZE == 4)
+			return DecodePart(pSrc, pSrcEnd, reinterpret_cast<char32_t*&>(pDest), reinterpret_cast<char32_t*>(pDestEnd));
+		#endif
+		}
+
+		inline bool DecodePart(const char16_t*& pSrc, const char16_t* pSrcEnd, wchar_t*& pDest, wchar_t* pDestEnd)
+		{
+		#if (EA_WCHAR_SIZE == 2)
+			return DecodePart(pSrc, pSrcEnd, reinterpret_cast<char16_t*&>(pDest), reinterpret_cast<char16_t*>(pDestEnd));
+		#elif (EA_WCHAR_SIZE == 4)
+			return DecodePart(pSrc, pSrcEnd, reinterpret_cast<char32_t*&>(pDest), reinterpret_cast<char32_t*>(pDestEnd));
+		#endif
+		}
+
+		inline bool DecodePart(const char32_t*& pSrc, const char32_t* pSrcEnd, wchar_t*& pDest, wchar_t* pDestEnd)
+		{
+		#if (EA_WCHAR_SIZE == 2)
+			return DecodePart(pSrc, pSrcEnd, reinterpret_cast<char16_t*&>(pDest), reinterpret_cast<char16_t*>(pDestEnd));
+		#elif (EA_WCHAR_SIZE == 4)
+			return DecodePart(pSrc, pSrcEnd, reinterpret_cast<char32_t*&>(pDest), reinterpret_cast<char32_t*>(pDestEnd));
+		#endif
+		}
+	#endif
+
+	#if EA_CHAR8_UNIQUE
+	    inline bool DecodePart(const char8_t*& pSrc, const char8_t* pSrcEnd, char8_t*& pDest, char8_t* pDestEnd)
+	    {
+		    return DecodePart(reinterpret_cast<const char*&>(pSrc), reinterpret_cast<const char*>(pSrcEnd), reinterpret_cast<char*&>(pDest), reinterpret_cast<char*&>(pDestEnd));
+	    }
+
+	    inline bool DecodePart(const char8_t*& pSrc, const char8_t* pSrcEnd, char*& pDest, char* pDestEnd)
+	    {
+		    return DecodePart(reinterpret_cast<const char*&>(pSrc), reinterpret_cast<const char*>(pSrcEnd), pDest, pDestEnd);
+	    }
+
+	    inline bool DecodePart(const char8_t*& pSrc, const char8_t* pSrcEnd, char16_t*& pDest, char16_t* pDestEnd)
+	    {
+		    return DecodePart(reinterpret_cast<const char*&>(pSrc), reinterpret_cast<const char*>(pSrcEnd), pDest, pDestEnd);
+	    }
+
+	    inline bool DecodePart(const char8_t*& pSrc, const char8_t* pSrcEnd, char32_t*& pDest, char32_t* pDestEnd)
+	    {
+		    return DecodePart(reinterpret_cast<const char*&>(pSrc), reinterpret_cast<const char*>(pSrcEnd), pDest, pDestEnd);
+	    }
+
+		inline bool DecodePart(const char*& pSrc, const char* pSrcEnd, char8_t*& pDest, char8_t* pDestEnd)
+		{
+			return DecodePart(pSrc, pSrcEnd, reinterpret_cast<char*&>(pDest), reinterpret_cast<char*&>(pDestEnd));
+		}
+
+		inline bool DecodePart(const char16_t*& pSrc, const char16_t* pSrcEnd, char8_t*& pDest, char8_t* pDestEnd)
+		{
+			return DecodePart(pSrc, pSrcEnd, reinterpret_cast<char*&>(pDest), reinterpret_cast<char*&>(pDestEnd));
+		}
+
+		inline bool DecodePart(const char32_t*& pSrc, const char32_t* pSrcEnd, char8_t*& pDest, char8_t* pDestEnd)
+		{
+			return DecodePart(pSrc, pSrcEnd, reinterpret_cast<char*&>(pDest), reinterpret_cast<char*&>(pDestEnd));
+		}
+    #endif
+
+	#if EA_CHAR8_UNIQUE && EA_WCHAR_UNIQUE
+		inline bool DecodePart(const char8_t*&  pSrc, const char8_t*  pSrcEnd, wchar_t*&  pDest, wchar_t*  pDestEnd)
+		{
+		#if (EA_WCHAR_SIZE == 2)
+		    return DecodePart(pSrc, pSrcEnd, reinterpret_cast<char16_t*&>(pDest), reinterpret_cast<char16_t*>(pDestEnd));
+		#elif (EA_WCHAR_SIZE == 4)
+		    return DecodePart(pSrc, pSrcEnd, reinterpret_cast<char32_t*&>(pDest), reinterpret_cast<char32_t*>(pDestEnd));
+		#endif
+		}
+
+		inline bool DecodePart(const wchar_t*&  pSrc, const wchar_t*  pSrcEnd, char8_t*&  pDest, char8_t*  pDestEnd)
+		{
+		#if (EA_WCHAR_SIZE == 2)
+			return DecodePart(reinterpret_cast<const char16_t*&>(pSrc), reinterpret_cast<const char16_t*>(pSrcEnd), reinterpret_cast<char*&>(pDest), reinterpret_cast<char*>(pDestEnd));
+		#elif (EA_WCHAR_SIZE == 4)
+			return DecodePart(reinterpret_cast<const char32_t*&>(pSrc), reinterpret_cast<const char32_t*>(pSrcEnd), reinterpret_cast<char*&>(pDest), reinterpret_cast<char*>(pDestEnd));
+		#endif
+		}
+	#endif
+
+	///////////////////////////////////////////////////////////////////////////////
+	// 'char traits' functionality
+	//
+	inline char CharToLower(char c)
+		{ return (char)tolower((uint8_t)c); }
+
+	template<typename T>
+	inline T CharToLower(T c)
+		{ if((unsigned)c <= 0xff) return (T)tolower((uint8_t)c); return c; }
+
+
+	inline char CharToUpper(char c)
+		{ return (char)toupper((uint8_t)c); }
+
+	template<typename T>
+	inline T CharToUpper(T c)
+		{ if((unsigned)c <= 0xff) return (T)toupper((uint8_t)c); return c; }
+
+
+	template <typename T>
+	int Compare(const T* p1, const T* p2, size_t n)
+	{
+		for(; n > 0; ++p1, ++p2, --n)
+		{
+			if(*p1 != *p2)
+				return (static_cast<typename make_unsigned<T>::type>(*p1) < 
+						static_cast<typename make_unsigned<T>::type>(*p2)) ? -1 : 1;
+		}
+		return 0;
+	}
+
+	inline int Compare(const char* p1, const char* p2, size_t n)
+	{
+		return memcmp(p1, p2, n);
+	}
+
+
+	template <typename T>
+	inline int CompareI(const T* p1, const T* p2, size_t n)
+	{
+		for(; n > 0; ++p1, ++p2, --n)
+		{
+			const T c1 = CharToLower(*p1);
+			const T c2 = CharToLower(*p2);
+
+			if(c1 != c2)
+				return (static_cast<typename make_unsigned<T>::type>(c1) < 
+						static_cast<typename make_unsigned<T>::type>(c2)) ? -1 : 1;
+		}
+		return 0;
+	}
+
+
+	template<typename T>
+	inline const T* Find(const T* p, T c, size_t n)
+	{
+		for(; n > 0; --n, ++p)
+		{
+			if(*p == c)
+				return p;
+		}
+
+		return NULL;
+	}
+
+	inline const char* Find(const char* p, char c, size_t n)
+	{
+		return (const char*)memchr(p, c, n);
+	}
+
+
+	template<typename T>
+	inline EA_CPP14_CONSTEXPR size_t CharStrlen(const T* p)
+	{
+		const auto* pCurrent = p;
+		while(*pCurrent)
+			++pCurrent;
+		return (size_t)(pCurrent - p);
+	}
+
+
+	template <typename T>
+	inline T* CharStringUninitializedCopy(const T* pSource, const T* pSourceEnd, T* pDestination)
+	{
+		memmove(pDestination, pSource, (size_t)(pSourceEnd - pSource) * sizeof(T));
+		return pDestination + (pSourceEnd - pSource);
+	}
+
+
+	template <typename T>
+	const T* CharTypeStringFindEnd(const T* pBegin, const T* pEnd, T c)
+	{
+		const T* pTemp = pEnd;
+		while(--pTemp >= pBegin)
+		{
+			if(*pTemp == c)
+				return pTemp;
+		}
+
+		return pEnd;
+	}
+
+    
+	template <typename T>
+	const T* CharTypeStringRSearch(const T* p1Begin, const T* p1End, 
+								   const T* p2Begin, const T* p2End)
+	{
+		// Test for zero length strings, in which case we have a match or a failure, 
+		// but the return value is the same either way.
+		if((p1Begin == p1End) || (p2Begin == p2End))
+			return p1Begin;
+
+		// Test for a pattern of length 1.
+		if((p2Begin + 1) == p2End)
+			return CharTypeStringFindEnd(p1Begin, p1End, *p2Begin);
+
+		// Test for search string length being longer than string length.
+		if((p2End - p2Begin) > (p1End - p1Begin))
+			return p1End;
+
+		// General case.
+		const T* pSearchEnd = (p1End - (p2End - p2Begin) + 1);
+		const T* pCurrent1;
+		const T* pCurrent2;
+
+		while(pSearchEnd != p1Begin)
+		{
+			// Search for the last occurrence of *p2Begin.
+			pCurrent1 = CharTypeStringFindEnd(p1Begin, pSearchEnd, *p2Begin);
+			if(pCurrent1 == pSearchEnd) // If the first char of p2 wasn't found, 
+				return p1End;           // then we immediately have failure.
+
+			// In this case, *pTemp == *p2Begin. So compare the rest.
+			pCurrent2 = p2Begin;
+			while(*pCurrent1++ == *pCurrent2++)
+			{
+				if(pCurrent2 == p2End)
+					return (pCurrent1 - (p2End - p2Begin));
+			}
+
+			// A smarter algorithm might know to subtract more than just one,
+			// but in most cases it won't make much difference anyway.
+			--pSearchEnd;
+		}
+
+		return p1End;
+	}
+
+
+	template <typename T>
+	inline const T* CharTypeStringFindFirstOf(const T* p1Begin, const T* p1End, const T* p2Begin, const T* p2End)
+	{
+		for (; p1Begin != p1End; ++p1Begin)
+		{
+			for (const T* pTemp = p2Begin; pTemp != p2End; ++pTemp)
+			{
+				if (*p1Begin == *pTemp)
+					return p1Begin;
+			}
+		}
+		return p1End;
+	}
+
+
+	template <typename T>
+	inline const T* CharTypeStringRFindFirstNotOf(const T* p1RBegin, const T* p1REnd, const T* p2Begin, const T* p2End)
+	{
+		for (; p1RBegin != p1REnd; --p1RBegin)
+		{
+			const T* pTemp;
+			for (pTemp = p2Begin; pTemp != p2End; ++pTemp)
+			{
+				if (*(p1RBegin - 1) == *pTemp)
+					break;
+			}
+			if (pTemp == p2End)
+				return p1RBegin;
+		}
+		return p1REnd;
+	}
+
+
+	template <typename T>
+	inline const T* CharTypeStringFindFirstNotOf(const T* p1Begin, const T* p1End, const T* p2Begin, const T* p2End)
+	{
+		for (; p1Begin != p1End; ++p1Begin)
+		{
+			const T* pTemp;
+			for (pTemp = p2Begin; pTemp != p2End; ++pTemp)
+			{
+				if (*p1Begin == *pTemp)
+					break;
+			}
+			if (pTemp == p2End)
+				return p1Begin;
+		}
+		return p1End;
+	}
+
+
+	template <typename T>
+	inline const T* CharTypeStringRFindFirstOf(const T* p1RBegin, const T* p1REnd, const T* p2Begin, const T* p2End)
+	{
+		for (; p1RBegin != p1REnd; --p1RBegin)
+		{
+			for (const T* pTemp = p2Begin; pTemp != p2End; ++pTemp)
+			{
+				if (*(p1RBegin - 1) == *pTemp)
+					return p1RBegin;
+			}
+		}
+		return p1REnd;
+	}
+
+
+	template <typename T>
+	inline const T* CharTypeStringRFind(const T* pRBegin, const T* pREnd, const T c)
+	{
+		while (pRBegin > pREnd)
+		{
+			if (*(pRBegin - 1) == c)
+				return pRBegin;
+			--pRBegin;
+		}
+		return pREnd;
+	}
+
+
+	inline char* CharStringUninitializedFillN(char* pDestination, size_t n, const char c)
+	{
+		if(n) // Some compilers (e.g. GCC 4.3+) generate a warning (which can't be disabled) if you call memset with a size of 0.
+			memset(pDestination, (uint8_t)c, (size_t)n);
+		return pDestination + n;
+	}
+
+	template<typename T>
+	inline T* CharStringUninitializedFillN(T* pDestination, size_t n, const T c)
+	{
+		T * pDest           = pDestination;
+		const T* const pEnd = pDestination + n;
+		while(pDest < pEnd)
+			*pDest++ = c;
+		return pDestination + n;
+	}
+
+
+	inline char* CharTypeAssignN(char* pDestination, size_t n, char c)
+	{
+		if(n) // Some compilers (e.g. GCC 4.3+) generate a warning (which can't be disabled) if you call memset with a size of 0.
+			return (char*)memset(pDestination, c, (size_t)n);
+		return pDestination;
+	}
+
+	template<typename T>
+	inline T* CharTypeAssignN(T* pDestination, size_t n, T c)
+	{
+		T* pDest            = pDestination;
+		const T* const pEnd = pDestination + n;
+		while(pDest < pEnd)
+			*pDest++ = c;
+		return pDestination;
+	}
+} // namespace eastl
+
+#endif // EASTL_CHAR_TRAITS_H
diff --git a/libkram/eastl/include/EASTL/internal/config.h b/libkram/eastl/include/EASTL/internal/config.h
new file mode 100644
index 00000000..530bbc87
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/config.h
@@ -0,0 +1,1877 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_INTERNAL_CONFIG_H
+#define EASTL_INTERNAL_CONFIG_H
+
+
+///////////////////////////////////////////////////////////////////////////////
+// ReadMe
+//
+// This is the EASTL configuration file. All configurable parameters of EASTL
+// are controlled through this file. However, all the settings here can be
+// manually overridden by the user. There are three ways for a user to override
+// the settings in this file:
+//
+//     - Simply edit this file.
+//     - Define EASTL_USER_CONFIG_HEADER.
+//     - Predefine individual defines (e.g. EASTL_ASSERT).
+//
+///////////////////////////////////////////////////////////////////////////////
+
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_USER_CONFIG_HEADER
+//
+// This allows the user to define a header file to be #included before the
+// EASTL config.h contents are compiled. A primary use of this is to override
+// the contents of this config.h file. Note that all the settings below in
+// this file are user-overridable.
+//
+// Example usage:
+//     #define EASTL_USER_CONFIG_HEADER "MyConfigOverrides.h"
+//     #include <EASTL/vector.h>
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifdef EASTL_USER_CONFIG_HEADER
+	#include EASTL_USER_CONFIG_HEADER
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_EABASE_DISABLED
+//
+// The user can disable EABase usage and manually supply the configuration
+// via defining EASTL_EABASE_DISABLED and defining the appropriate entities
+// globally or via the above EASTL_USER_CONFIG_HEADER.
+//
+// Example usage:
+//     #define EASTL_EABASE_DISABLED
+//     #include <EASTL/vector.h>
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_EABASE_DISABLED
+	#include <EABase/eabase.h>
+#endif
+#include <EABase/eahave.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_VERSION
+//
+// We more or less follow the conventional EA packaging approach to versioning
+// here. A primary distinction here is that minor versions are defined as two
+// digit entities (e.g. .03") instead of minimal digit entities ".3"). The logic
+// here is that the value is a counter and not a floating point fraction.
+// Note that the major version doesn't have leading zeros.
+//
+// Example version strings:
+//      "0.91.00"   // Major version 0, minor version 91, patch version 0.
+//      "1.00.00"   // Major version 1, minor and patch version 0.
+//      "3.10.02"   // Major version 3, minor version 10, patch version 02.
+//     "12.03.01"   // Major version 12, minor version 03, patch version
+//
+// Example usage:
+//     printf("EASTL version: %s", EASTL_VERSION);
+//     printf("EASTL version: %d.%d.%d", EASTL_VERSION_N / 10000 % 100, EASTL_VERSION_N / 100 % 100, EASTL_VERSION_N % 100);
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_VERSION
+	#define EASTL_VERSION   "3.17.06"
+	#define EASTL_VERSION_N  31706
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EA_COMPILER_NO_STANDARD_CPP_LIBRARY
+//
+// Defined as 1 or undefined.
+// Implements support for the definition of EA_COMPILER_NO_STANDARD_CPP_LIBRARY for the case
+// of using EABase versions prior to the addition of its EA_COMPILER_NO_STANDARD_CPP_LIBRARY support.
+//
+#if !defined(EA_COMPILER_NO_STANDARD_CPP_LIBRARY)
+	#if defined(EA_PLATFORM_ANDROID)
+		// Disabled because EA's eaconfig/android_config/android_sdk packages currently
+		// don't support linking STL libraries. Perhaps we can figure out what linker arguments
+		// are needed for an app so we can manually specify them and then re-enable this code.
+		//
+		//#include <android/api-level.h>
+		//
+		//#if (__ANDROID_API__ < 9) // Earlier versions of Android provide no std C++ STL implementation.
+			#define EA_COMPILER_NO_STANDARD_CPP_LIBRARY 1
+		//#endif
+	#endif
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EA_NOEXCEPT
+//
+// Defined as a macro. Provided here for backward compatibility with older
+// EABase versions prior to 2.00.40 that don't yet define it themselves.
+//
+#if !defined(EA_NOEXCEPT)
+	#define EA_NOEXCEPT
+	#define EA_NOEXCEPT_IF(predicate)
+	#define EA_NOEXCEPT_EXPR(expression) false
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EA_CPP14_CONSTEXPR
+//
+// Defined as constexpr when a C++14 compiler is present. Defines it as nothing
+// when using a C++11 compiler.
+// C++14 relaxes the specification for constexpr such that it allows more
+// kinds of expressions. Since a C++11 compiler doesn't allow this, we need
+// to make a unique define for C++14 constexpr. This macro should be used only
+// when you are using it with code that specfically requires C++14 constexpr
+// functionality beyond the regular C++11 constexpr functionality.
+// http://en.wikipedia.org/wiki/C%2B%2B14#Relaxed_constexpr_restrictions
+//
+#if !defined(EA_CPP14_CONSTEXPR)
+	#if defined(EA_COMPILER_CPP14_ENABLED)
+		#define EA_CPP14_CONSTEXPR constexpr
+	#else
+		#define EA_CPP14_CONSTEXPR  // not supported
+		#define EA_NO_CPP14_CONSTEXPR
+	#endif
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL namespace
+//
+// We define this so that users that #include this config file can reference
+// these namespaces without seeing any other files that happen to use them.
+///////////////////////////////////////////////////////////////////////////////
+
+/// EA Standard Template Library
+namespace eastl
+{
+	// Intentionally empty.
+}
+
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_DEBUG
+//
+// Defined as an integer >= 0. Default is 1 for debug builds and 0 for
+// release builds. This define is also a master switch for the default value
+// of some other settings.
+//
+// Example usage:
+//    #if EASTL_DEBUG
+//       ...
+//    #endif
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_DEBUG
+	#if defined(EA_DEBUG) || defined(_DEBUG)
+		#define EASTL_DEBUG 1
+	#else
+		#define EASTL_DEBUG 0
+	#endif
+#endif
+
+// Developer debug. Helps EASTL developers assert EASTL is coded correctly.
+// Normally disabled for users since it validates internal things and not user things.
+#ifndef EASTL_DEV_DEBUG
+	#define EASTL_DEV_DEBUG 0
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_DEBUGPARAMS_LEVEL
+//
+// EASTL_DEBUGPARAMS_LEVEL controls what debug information is passed through to
+// the allocator by default.
+// This value may be defined by the user ... if not it will default to 1 for
+// EA_DEBUG builds, otherwise 0.
+//
+//  0 - no debug information is passed through to allocator calls.
+//  1 - 'name' is passed through to allocator calls.
+//  2 - 'name', __FILE__, and __LINE__ are passed through to allocator calls.
+//
+// This parameter mirrors the equivalent parameter in the CoreAllocator package.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_DEBUGPARAMS_LEVEL
+	#if EASTL_DEBUG
+		#define EASTL_DEBUGPARAMS_LEVEL 2
+	#else
+		#define EASTL_DEBUGPARAMS_LEVEL 0
+	#endif
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_DLL
+//
+// Defined as 0 or 1. The default is dependent on the definition of EA_DLL.
+// If EA_DLL is defined, then EASTL_DLL is 1, else EASTL_DLL is 0.
+// EA_DLL is a define that controls DLL builds within the EAConfig build system.
+// EASTL_DLL controls whether EASTL is built and used as a DLL.
+// Normally you wouldn't do such a thing, but there are use cases for such
+// a thing, particularly in the case of embedding C++ into C# applications.
+//
+#ifndef EASTL_DLL
+	#if defined(EA_DLL)
+		#define EASTL_DLL 1
+	#else
+		#define EASTL_DLL 0
+	#endif
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_IF_NOT_DLL
+//
+// Utility to include expressions only for static builds.
+//
+#ifndef EASTL_IF_NOT_DLL
+	#if EASTL_DLL
+		#define EASTL_IF_NOT_DLL(x)
+	#else
+		#define EASTL_IF_NOT_DLL(x) x
+	#endif
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_API
+//
+// This is used to label functions as DLL exports under Microsoft platforms.
+// If EA_DLL is defined, then the user is building EASTL as a DLL and EASTL's
+// non-templated functions will be exported. EASTL template functions are not
+// labelled as EASTL_API (and are thus not exported in a DLL build). This is
+// because it's not possible (or at least unsafe) to implement inline templated
+// functions in a DLL.
+//
+// Example usage of EASTL_API:
+//    EASTL_API int someVariable = 10;      // Export someVariable in a DLL build.
+//
+//    struct EASTL_API SomeClass{           // Export SomeClass and its member functions in a DLL build.
+//        EASTL_LOCAL void PrivateMethod(); // Not exported.
+//    };
+//
+//    EASTL_API void SomeFunction();        // Export SomeFunction in a DLL build.
+//
+//
+#if defined(EA_DLL) && !defined(EASTL_DLL)
+	#define EASTL_DLL 1
+#endif
+
+#ifndef EASTL_API // If the build file hasn't already defined this to be dllexport...
+	#if EASTL_DLL
+		#if defined(_MSC_VER)
+			#define EASTL_API      __declspec(dllimport)
+			#define EASTL_LOCAL
+		#elif defined(__CYGWIN__)
+			#define EASTL_API      __attribute__((dllimport))
+			#define EASTL_LOCAL
+		#elif (defined(__GNUC__) && (__GNUC__ >= 4))
+			#define EASTL_API      __attribute__ ((visibility("default")))
+			#define EASTL_LOCAL    __attribute__ ((visibility("hidden")))
+		#else
+			#define EASTL_API
+			#define EASTL_LOCAL
+		#endif
+	#else
+		#define EASTL_API
+		#define EASTL_LOCAL
+	#endif
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_EASTDC_API
+//
+// This is used for importing EAStdC functions into EASTL, possibly via a DLL import.
+//
+#ifndef EASTL_EASTDC_API
+	#if EASTL_DLL
+		#if defined(_MSC_VER)
+			#define EASTL_EASTDC_API      __declspec(dllimport)
+			#define EASTL_EASTDC_LOCAL
+		#elif defined(__CYGWIN__)
+			#define EASTL_EASTDC_API      __attribute__((dllimport))
+			#define EASTL_EASTDC_LOCAL
+		#elif (defined(__GNUC__) && (__GNUC__ >= 4))
+			#define EASTL_EASTDC_API      __attribute__ ((visibility("default")))
+			#define EASTL_EASTDC_LOCAL    __attribute__ ((visibility("hidden")))
+		#else
+			#define EASTL_EASTDC_API
+			#define EASTL_EASTDC_LOCAL
+		#endif
+	#else
+		#define EASTL_EASTDC_API
+		#define EASTL_EASTDC_LOCAL
+	#endif
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_EASTDC_VSNPRINTF
+//
+// Defined as 0 or 1. By default it is 1.
+//
+// When enabled EASTL uses EAStdC's Vsnprintf function directly instead of
+// having the user provide a global Vsnprintf8/16/32 function. The benefit
+// of this is that it will allow EASTL to just link to EAStdC's Vsnprintf
+// without the user doing anything. The downside is that any users who aren't
+// already using EAStdC will either need to now depend on EAStdC or globally
+// define this property to be 0 and simply provide functions that have the same
+// names. See the usage of EASTL_EASTDC_VSNPRINTF in string.h for more info.
+//
+#if !defined(EASTL_EASTDC_VSNPRINTF)
+	#define EASTL_EASTDC_VSNPRINTF 1
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_NAME_ENABLED / EASTL_NAME / EASTL_NAME_VAL
+//
+// Used to wrap debug string names. In a release build, the definition
+// goes away. These are present to avoid release build compiler warnings
+// and to make code simpler.
+//
+// Example usage of EASTL_NAME:
+//    // pName will defined away in a release build and thus prevent compiler warnings.
+//    void allocator::set_name(const char* EASTL_NAME(pName))
+//    {
+//        #if EASTL_NAME_ENABLED
+//            mpName = pName;
+//        #endif
+//    }
+//
+// Example usage of EASTL_NAME_VAL:
+//    // "xxx" is defined to NULL in a release build.
+//    vector<T, Allocator>::vector(const allocator_type& allocator = allocator_type(EASTL_NAME_VAL("xxx")));
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_NAME_ENABLED
+	#define EASTL_NAME_ENABLED EASTL_DEBUG
+#endif
+
+#ifndef EASTL_NAME
+	#if EASTL_NAME_ENABLED
+		#define EASTL_NAME(x)      x
+		#define EASTL_NAME_VAL(x)  x
+	#else
+		#define EASTL_NAME(x)
+		#define EASTL_NAME_VAL(x) ((const char*)NULL)
+	#endif
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_DEFAULT_NAME_PREFIX
+//
+// Defined as a string literal. Defaults to "EASTL".
+// This define is used as the default name for EASTL where such a thing is
+// referenced in EASTL. For example, if the user doesn't specify an allocator
+// name for their deque, it is named "EASTL deque". However, you can override
+// this to say "SuperBaseball deque" by changing EASTL_DEFAULT_NAME_PREFIX.
+//
+// Example usage (which is simply taken from how deque.h uses this define):
+//     #ifndef EASTL_DEQUE_DEFAULT_NAME
+//         #define EASTL_DEQUE_DEFAULT_NAME   EASTL_DEFAULT_NAME_PREFIX " deque"
+//     #endif
+//
+#ifndef EASTL_DEFAULT_NAME_PREFIX
+	#define EASTL_DEFAULT_NAME_PREFIX "EASTL"
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_ASSERT_ENABLED
+//
+// Defined as 0 or non-zero. Default is same as EASTL_DEBUG.
+// If EASTL_ASSERT_ENABLED is non-zero, then asserts will be executed via
+// the assertion mechanism.
+//
+// Example usage:
+//     #if EASTL_ASSERT_ENABLED
+//         EASTL_ASSERT(v.size() > 17);
+//     #endif
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_ASSERT_ENABLED
+	#define EASTL_ASSERT_ENABLED EASTL_DEBUG
+#endif
+
+// Developer assert. Helps EASTL developers assert EASTL is coded correctly.
+// Normally disabled for users since it validates internal things and not user things.
+#ifndef EASTL_DEV_ASSERT_ENABLED
+	#define EASTL_DEV_ASSERT_ENABLED EASTL_DEV_DEBUG
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_EMPTY_REFERENCE_ASSERT_ENABLED
+//
+// Defined as 0 or non-zero. Default is same as EASTL_ASSERT_ENABLED.
+// This is like EASTL_ASSERT_ENABLED, except it is for empty container
+// references. Sometime people like to be able to take a reference to
+// the front of the container, but not use it if the container is empty.
+// In practice it's often easier and more efficient to do this than to write
+// extra code to check if the container is empty.
+//
+// NOTE: If this is enabled, EASTL_ASSERT_ENABLED must also be enabled
+//
+// Example usage:
+//     template <typename T, typename Allocator>
+//     inline typename vector<T, Allocator>::reference
+//     vector<T, Allocator>::front()
+//     {
+//         #if EASTL_ASSERT_ENABLED
+//             EASTL_ASSERT(mpEnd > mpBegin);
+//         #endif
+//
+//         return *mpBegin;
+//     }
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_EMPTY_REFERENCE_ASSERT_ENABLED
+	#define EASTL_EMPTY_REFERENCE_ASSERT_ENABLED EASTL_ASSERT_ENABLED
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// SetAssertionFailureFunction
+//
+// Allows the user to set a custom assertion failure mechanism.
+//
+// Example usage:
+//     void Assert(const char* pExpression, void* pContext);
+//     SetAssertionFailureFunction(Assert, this);
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_ASSERTION_FAILURE_DEFINED
+	#define EASTL_ASSERTION_FAILURE_DEFINED
+
+	namespace eastl
+	{
+		typedef void (*EASTL_AssertionFailureFunction)(const char* pExpression, void* pContext);
+		EASTL_API void SetAssertionFailureFunction(EASTL_AssertionFailureFunction pFunction, void* pContext);
+
+		// These are the internal default functions that implement asserts.
+		EASTL_API void AssertionFailure(const char* pExpression);
+		EASTL_API void AssertionFailureFunctionDefault(const char* pExpression, void* pContext);
+	}
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_ASSERT
+//
+// Assertion macro. Can be overridden by user with a different value.
+//
+// Example usage:
+//    EASTL_ASSERT(intVector.size() < 100);
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_ASSERT
+	#if EASTL_ASSERT_ENABLED
+		#define EASTL_ASSERT(expression) \
+			EA_DISABLE_VC_WARNING(4127) \
+			do { \
+				EA_ANALYSIS_ASSUME(expression); \
+				(void)((expression) || (eastl::AssertionFailure(#expression), 0)); \
+			} while (0) \
+			EA_RESTORE_VC_WARNING()
+	#else
+		#define EASTL_ASSERT(expression)
+	#endif
+#endif
+
+// Developer assert. Helps EASTL developers assert EASTL is coded correctly.
+// Normally disabled for users since it validates internal things and not user things.
+#ifndef EASTL_DEV_ASSERT
+	#if EASTL_DEV_ASSERT_ENABLED
+		#define EASTL_DEV_ASSERT(expression) \
+			EA_DISABLE_VC_WARNING(4127) \
+			do { \
+				EA_ANALYSIS_ASSUME(expression); \
+				(void)((expression) || (eastl::AssertionFailure(#expression), 0)); \
+			} while(0) \
+			EA_RESTORE_VC_WARNING()
+	#else
+		#define EASTL_DEV_ASSERT(expression)
+	#endif
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_ASSERT_MSG
+//
+// Example usage:
+//    EASTL_ASSERT_MSG(false, "detected error condition!");
+//
+///////////////////////////////////////////////////////////////////////////////
+#ifndef EASTL_ASSERT_MSG
+	#if EASTL_ASSERT_ENABLED
+		#define EASTL_ASSERT_MSG(expression, message) \
+			EA_DISABLE_VC_WARNING(4127) \
+			do { \
+				EA_ANALYSIS_ASSUME(expression); \
+				(void)((expression) || (eastl::AssertionFailure(message), 0)); \
+			} while (0) \
+			EA_RESTORE_VC_WARNING()
+	#else
+		#define EASTL_ASSERT_MSG(expression, message)
+	#endif
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_FAIL_MSG
+//
+// Failure macro. Can be overridden by user with a different value.
+//
+// Example usage:
+//    EASTL_FAIL("detected error condition!");
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_FAIL_MSG
+	#if EASTL_ASSERT_ENABLED
+		#define EASTL_FAIL_MSG(message) (eastl::AssertionFailure(message))
+	#else
+		#define EASTL_FAIL_MSG(message)
+	#endif
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_CT_ASSERT / EASTL_CT_ASSERT_NAMED
+//
+// EASTL_CT_ASSERT is a macro for compile time assertion checks, useful for
+// validating *constant* expressions. The advantage over using EASTL_ASSERT
+// is that errors are caught at compile time instead of runtime.
+//
+// Example usage:
+//     EASTL_CT_ASSERT(sizeof(uint32_t) == 4);
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#define EASTL_CT_ASSERT(expression) static_assert(expression, #expression)
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_CT_ASSERT_MSG
+//
+// EASTL_CT_ASSERT_MSG is a macro for compile time assertion checks, useful for
+// validating *constant* expressions. The advantage over using EASTL_ASSERT
+// is that errors are caught at compile time instead of runtime.
+// The message must be a string literal.
+//
+// Example usage:
+//     EASTL_CT_ASSERT_MSG(sizeof(uint32_t) == 4, "The size of uint32_t must be 4.");
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#define EASTL_CT_ASSERT_MSG(expression, message) static_assert(expression, message)
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_DEBUG_BREAK / EASTL_DEBUG_BREAK_OVERRIDE
+//
+// This function causes an app to immediately stop under the debugger.
+// It is implemented as a macro in order to allow stopping at the site
+// of the call.
+//
+// EASTL_DEBUG_BREAK_OVERRIDE allows one to define EASTL_DEBUG_BREAK directly.
+// This is useful in cases where you desire to disable EASTL_DEBUG_BREAK
+// but do not wish to (or cannot) define a custom void function() to replace
+// EASTL_DEBUG_BREAK callsites.
+//
+// Example usage:
+//     EASTL_DEBUG_BREAK();
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_DEBUG_BREAK_OVERRIDE
+    #ifndef EASTL_DEBUG_BREAK
+        #if defined(_MSC_VER) && (_MSC_VER >= 1300)
+            #define EASTL_DEBUG_BREAK() __debugbreak()    // This is a compiler intrinsic which will map to appropriate inlined asm for the platform.
+        #elif (defined(EA_PROCESSOR_ARM) && !defined(EA_PROCESSOR_ARM64)) && defined(__APPLE__)
+            #define EASTL_DEBUG_BREAK() asm("trap")
+        #elif defined(EA_PROCESSOR_ARM64) && defined(__APPLE__)
+            #include <signal.h>
+            #include <unistd.h>
+            #define EASTL_DEBUG_BREAK() kill( getpid(), SIGINT )
+		#elif defined(EA_PROCESSOR_ARM64) && defined(__GNUC__)
+			#define EASTL_DEBUG_BREAK() asm("brk 10")
+		#elif defined(EA_PROCESSOR_ARM) && defined(__GNUC__)
+			#define EASTL_DEBUG_BREAK() asm("BKPT 10")     // The 10 is arbitrary. It's just a unique id.
+		#elif defined(EA_PROCESSOR_ARM) && defined(__ARMCC_VERSION)
+			#define EASTL_DEBUG_BREAK() __breakpoint(10)
+		#elif defined(EA_PROCESSOR_POWERPC)               // Generic PowerPC.
+			#define EASTL_DEBUG_BREAK() asm(".long 0")    // This triggers an exception by executing opcode 0x00000000.
+		#elif (defined(EA_PROCESSOR_X86) || defined(EA_PROCESSOR_X86_64)) && defined(EA_ASM_STYLE_INTEL)
+			#define EASTL_DEBUG_BREAK() { __asm int 3 }
+		#elif (defined(EA_PROCESSOR_X86) || defined(EA_PROCESSOR_X86_64)) && (defined(EA_ASM_STYLE_ATT) || defined(__GNUC__))
+			#define EASTL_DEBUG_BREAK() asm("int3")
+		#else
+			void EASTL_DEBUG_BREAK(); // User must define this externally.
+		#endif
+	#else
+		void EASTL_DEBUG_BREAK(); // User must define this externally.
+	#endif
+#else
+	#ifndef EASTL_DEBUG_BREAK
+		#if EASTL_DEBUG_BREAK_OVERRIDE == 1
+			// define an empty callable to satisfy the call site.
+			#define EASTL_DEBUG_BREAK ([]{})
+		#else
+			#define EASTL_DEBUG_BREAK EASTL_DEBUG_BREAK_OVERRIDE
+		#endif
+	#else
+		#error EASTL_DEBUG_BREAK is already defined yet you would like to override it. Please ensure no other headers are already defining EASTL_DEBUG_BREAK before this header (config.h) is included
+	#endif
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_ALLOCATOR_COPY_ENABLED
+//
+// Defined as 0 or 1. Default is 0 (disabled) until some future date.
+// If enabled (1) then container operator= copies the allocator from the
+// source container. It ideally should be set to enabled but for backwards
+// compatibility with older versions of EASTL it is currently set to 0.
+// Regardless of whether this value is 0 or 1, this container copy constructs
+// or copy assigns allocators.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_ALLOCATOR_COPY_ENABLED
+	#define EASTL_ALLOCATOR_COPY_ENABLED 0
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_FIXED_SIZE_TRACKING_ENABLED
+//
+// Defined as an integer >= 0. Default is same as EASTL_DEBUG.
+// If EASTL_FIXED_SIZE_TRACKING_ENABLED is enabled, then fixed
+// containers in debug builds track the max count of objects
+// that have been in the container. This allows for the tuning
+// of fixed container sizes to their minimum required size.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_FIXED_SIZE_TRACKING_ENABLED
+	#define EASTL_FIXED_SIZE_TRACKING_ENABLED EASTL_DEBUG
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_RTTI_ENABLED
+//
+// Defined as 0 or 1. Default is 1 if RTTI is supported by the compiler.
+// This define exists so that we can use some dynamic_cast operations in the
+// code without warning. dynamic_cast is only used if the specifically refers
+// to it; EASTL won't do dynamic_cast behind your back.
+//
+// Example usage:
+//     #if EASTL_RTTI_ENABLED
+//         pChildClass = dynamic_cast<ChildClass*>(pParentClass);
+//     #endif
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_RTTI_ENABLED
+	// The VC++ default Standard Library (Dinkumware) disables major parts of RTTI
+	// (e.g. type_info) if exceptions are disabled, even if RTTI itself is enabled.
+	// _HAS_EXCEPTIONS is defined by Dinkumware to 0 or 1 (disabled or enabled).
+	#if defined(EA_COMPILER_NO_RTTI) || (defined(_MSC_VER) && defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && !(defined(_HAS_EXCEPTIONS) && _HAS_EXCEPTIONS))
+		#define EASTL_RTTI_ENABLED 0
+	#else
+		#define EASTL_RTTI_ENABLED 1
+	#endif
+#endif
+
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_EXCEPTIONS_ENABLED
+//
+// Defined as 0 or 1. Default is to follow what the compiler settings are.
+// The user can predefine EASTL_EXCEPTIONS_ENABLED to 0 or 1; however, if the
+// compiler is set to disable exceptions then EASTL_EXCEPTIONS_ENABLED is
+// forced to a value of 0 regardless of the user predefine.
+//
+// Note that we do not enable EASTL exceptions by default if the compiler
+// has exceptions enabled. To enable EASTL_EXCEPTIONS_ENABLED you need to
+// manually set it to 1.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#if !defined(EASTL_EXCEPTIONS_ENABLED) || ((EASTL_EXCEPTIONS_ENABLED == 1) && defined(EA_COMPILER_NO_EXCEPTIONS))
+	#define EASTL_EXCEPTIONS_ENABLED 0
+#endif
+
+
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_STRING_OPT_XXXX
+//
+// Enables some options / optimizations options that cause the string class
+// to behave slightly different from the C++ standard basic_string. These are
+// options whereby you can improve performance by avoiding operations that
+// in practice may never occur for you.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_STRING_OPT_EXPLICIT_CTORS
+	// Defined as 0 or 1. Default is 0.
+	// Defines if we should implement explicity in constructors where the C++
+	// standard string does not. The advantage of enabling explicit constructors
+	// is that you can do this: string s = "hello"; in addition to string s("hello");
+	// The disadvantage of enabling explicity constructors is that there can be
+	// silent conversions done which impede performance if the user isn't paying
+	// attention.
+	// C++ standard string ctors are not explicit.
+	#define EASTL_STRING_OPT_EXPLICIT_CTORS 0
+#endif
+
+#ifndef EASTL_STRING_OPT_LENGTH_ERRORS
+	// Defined as 0 or 1. Default is equal to EASTL_EXCEPTIONS_ENABLED.
+	// Defines if we check for string values going beyond kMaxSize
+	// (a very large value) and throw exections if so.
+	// C++ standard strings are expected to do such checks.
+	#define EASTL_STRING_OPT_LENGTH_ERRORS EASTL_EXCEPTIONS_ENABLED
+#endif
+
+#ifndef EASTL_STRING_OPT_RANGE_ERRORS
+	// Defined as 0 or 1. Default is equal to EASTL_EXCEPTIONS_ENABLED.
+	// Defines if we check for out-of-bounds references to string
+	// positions and throw exceptions if so. Well-behaved code shouldn't
+	// refence out-of-bounds positions and so shouldn't need these checks.
+	// C++ standard strings are expected to do such range checks.
+	#define EASTL_STRING_OPT_RANGE_ERRORS EASTL_EXCEPTIONS_ENABLED
+#endif
+
+#ifndef EASTL_STRING_OPT_ARGUMENT_ERRORS
+	// Defined as 0 or 1. Default is 0.
+	// Defines if we check for NULL ptr arguments passed to string
+	// functions by the user and throw exceptions if so. Well-behaved code
+	// shouldn't pass bad arguments and so shouldn't need these checks.
+	// Also, some users believe that strings should check for NULL pointers
+	// in all their arguments and do no-ops if so. This is very debatable.
+	// C++ standard strings are not required to check for such argument errors.
+	#define EASTL_STRING_OPT_ARGUMENT_ERRORS 0
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_BITSET_SIZE_T
+//
+// Defined as 0 or 1. Default is 1.
+// Controls whether bitset uses size_t or eastl_size_t.
+//
+#ifndef EASTL_BITSET_SIZE_T
+	#define EASTL_BITSET_SIZE_T 1
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_INT128_SUPPORTED
+//
+// Defined as 0 or 1.
+//
+#ifndef EASTL_INT128_SUPPORTED
+	#if defined(__SIZEOF_INT128__) || (defined(EA_COMPILER_INTMAX_SIZE) && (EA_COMPILER_INTMAX_SIZE >= 16))
+		#define EASTL_INT128_SUPPORTED 1
+	#else
+		#define EASTL_INT128_SUPPORTED 0
+	#endif
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_DEFAULT_ALLOCATOR_ALIGNED_ALLOCATIONS_SUPPORTED
+//
+// Defined as 0 or 1.
+// Tells if you can use the default EASTL allocator to do aligned allocations,
+// which for most uses tells if you can store aligned objects in containers
+// that use default allocators. It turns out that when built as a DLL for
+// some platforms, EASTL doesn't have a way to do aligned allocations, as it
+// doesn't have a heap that supports it. There is a way to work around this
+// with dynamically defined allocators, but that's currently a to-do.
+//
+#ifndef EASTL_DEFAULT_ALLOCATOR_ALIGNED_ALLOCATIONS_SUPPORTED
+	#if EASTL_DLL
+		#define EASTL_DEFAULT_ALLOCATOR_ALIGNED_ALLOCATIONS_SUPPORTED 0
+	#else
+		#define EASTL_DEFAULT_ALLOCATOR_ALIGNED_ALLOCATIONS_SUPPORTED 1
+	#endif
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_INT128_DEFINED
+//
+// Defined as 0 or 1.
+// Specifies whether eastl_int128_t/eastl_uint128_t have been typedef'd yet.
+//
+#ifndef EASTL_INT128_DEFINED
+	#if EASTL_INT128_SUPPORTED
+		#define EASTL_INT128_DEFINED 1
+
+		#if defined(__SIZEOF_INT128__) || defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG)
+			typedef __int128_t   eastl_int128_t;
+			typedef __uint128_t eastl_uint128_t;
+		#else
+			typedef  int128_t  eastl_int128_t;  // The EAStdC package defines an EA::StdC::int128_t and uint128_t type,
+			typedef uint128_t eastl_uint128_t;  // though they are currently within the EA::StdC namespace.
+		#endif
+	#endif
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_BITSET_WORD_TYPE_DEFAULT / EASTL_BITSET_WORD_SIZE_DEFAULT
+//
+// Defined as an integral power of two type, usually uint32_t or uint64_t.
+// Specifies the word type that bitset should use internally to implement
+// storage. By default this is the platform register word size, but there
+// may be reasons to use a different value.
+//
+// Defines the integral data type used by bitset by default.
+// You can override this default on a bitset-by-bitset case by supplying a
+// custom bitset WordType template parameter.
+//
+// The C++ standard specifies that the std::bitset word type be unsigned long,
+// but that isn't necessarily the most efficient data type for the given platform.
+// We can follow the standard and be potentially less efficient or we can do what
+// is more efficient but less like the C++ std::bitset.
+//
+#if !defined(EASTL_BITSET_WORD_TYPE_DEFAULT)
+	#if defined(EASTL_BITSET_WORD_SIZE)         // EASTL_BITSET_WORD_SIZE is deprecated, but we temporarily support the ability for the user to specify it. Use EASTL_BITSET_WORD_TYPE_DEFAULT instead.
+		#if (EASTL_BITSET_WORD_SIZE == 4)
+			#define EASTL_BITSET_WORD_TYPE_DEFAULT uint32_t
+			#define EASTL_BITSET_WORD_SIZE_DEFAULT 4
+		#else
+			#define EASTL_BITSET_WORD_TYPE_DEFAULT uint64_t
+			#define EASTL_BITSET_WORD_SIZE_DEFAULT 8
+		#endif
+	#elif (EA_PLATFORM_WORD_SIZE == 16)                     // EA_PLATFORM_WORD_SIZE is defined in EABase.
+		#define EASTL_BITSET_WORD_TYPE_DEFAULT uint128_t
+		#define EASTL_BITSET_WORD_SIZE_DEFAULT 16
+	#elif (EA_PLATFORM_WORD_SIZE == 8)
+		#define EASTL_BITSET_WORD_TYPE_DEFAULT uint64_t
+		#define EASTL_BITSET_WORD_SIZE_DEFAULT 8
+	#elif (EA_PLATFORM_WORD_SIZE == 4)
+		#define EASTL_BITSET_WORD_TYPE_DEFAULT uint32_t
+		#define EASTL_BITSET_WORD_SIZE_DEFAULT 4
+	#else
+		#define EASTL_BITSET_WORD_TYPE_DEFAULT uint16_t
+		#define EASTL_BITSET_WORD_SIZE_DEFAULT 2
+	#endif
+#endif
+
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_LIST_SIZE_CACHE
+//
+// Defined as 0 or 1. Default is 1. Changed from 0 in version 1.16.01.
+// If defined as 1, the list and slist containers (and possibly any additional
+// containers as well) keep a member mSize (or similar) variable which allows
+// the size() member function to execute in constant time (a.k.a. O(1)).
+// There are debates on both sides as to whether it is better to have this
+// cached value or not, as having it entails some cost (memory and code).
+// To consider: Make list size caching an optional template parameter.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_LIST_SIZE_CACHE
+	#define EASTL_LIST_SIZE_CACHE 1
+#endif
+
+#ifndef EASTL_SLIST_SIZE_CACHE
+	#define EASTL_SLIST_SIZE_CACHE 1
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_MAX_STACK_USAGE
+//
+// Defined as an integer greater than zero. Default is 4000.
+// There are some places in EASTL where temporary objects are put on the
+// stack. A common example of this is in the implementation of container
+// swap functions whereby a temporary copy of the container is made.
+// There is a problem, however, if the size of the item created on the stack
+// is very large. This can happen with fixed-size containers, for example.
+// The EASTL_MAX_STACK_USAGE define specifies the maximum amount of memory
+// (in bytes) that the given platform/compiler will safely allow on the stack.
+// Platforms such as Windows will generally allow larger values than embedded
+// systems or console machines, but it is usually a good idea to stick with
+// a max usage value that is portable across all platforms, lest the user be
+// surprised when something breaks as it is ported to another platform.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_MAX_STACK_USAGE
+	#define EASTL_MAX_STACK_USAGE 4000
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_VA_COPY_ENABLED
+//
+// Defined as 0 or 1. Default is 1 for compilers that need it, 0 for others.
+// Some compilers on some platforms implement va_list whereby its contents
+// are destroyed upon usage, even if passed by value to another function.
+// With these compilers you can use va_copy to save and restore a va_list.
+// Known compiler/platforms that destroy va_list contents upon usage include:
+//     CodeWarrior on PowerPC
+//     GCC on x86-64
+// However, va_copy is part of the C99 standard and not part of earlier C and
+// C++ standards. So not all compilers support it. VC++ doesn't support va_copy,
+// but it turns out that VC++ doesn't usually need it on the platforms it supports,
+// and va_copy can usually be implemented via memcpy(va_list, va_list) with VC++.
+//
+// Example usage:
+//     void Function(va_list arguments)
+//     {
+//         #if EASTL_VA_COPY_ENABLED
+//             va_list argumentsCopy;
+//             va_copy(argumentsCopy, arguments);
+//         #endif
+//         <use arguments or argumentsCopy>
+//         #if EASTL_VA_COPY_ENABLED
+//             va_end(argumentsCopy);
+//         #endif
+//     }
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_VA_COPY_ENABLED
+	#if   ((defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__clang__)) && (!defined(__i386__) || defined(__x86_64__)) && !defined(__ppc__) && !defined(__PPC__) && !defined(__PPC64__)
+		#define EASTL_VA_COPY_ENABLED 1
+	#else
+		#define EASTL_VA_COPY_ENABLED 0
+	#endif
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_OPERATOR_EQUALS_OTHER_ENABLED
+//
+// Defined as 0 or 1. Default is 0 until such day that it's deemed safe.
+// When enabled, enables operator= for other char types, e.g. for code
+// like this:
+//     eastl::string8  s8;
+//     eastl::string16 s16;
+//     s8 = s16;
+// This option is considered experimental, and may exist as such for an
+// indefinite amount of time.
+//
+#if !defined(EASTL_OPERATOR_EQUALS_OTHER_ENABLED)
+	#define EASTL_OPERATOR_EQUALS_OTHER_ENABLED 0
+#endif
+///////////////////////////////////////////////////////////////////////////////
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_LIST_PROXY_ENABLED
+//
+#if !defined(EASTL_LIST_PROXY_ENABLED)
+	// GCC with -fstrict-aliasing has bugs (or undocumented functionality in their
+	// __may_alias__ implementation. The compiler gets confused about function signatures.
+	// VC8 (1400) doesn't need the proxy because it has built-in smart debugging capabilities.
+	#if defined(EASTL_DEBUG) && !defined(__GNUC__) && (!defined(_MSC_VER) || (_MSC_VER < 1400))
+		#define EASTL_LIST_PROXY_ENABLED 1
+		#define EASTL_LIST_PROXY_MAY_ALIAS EASTL_MAY_ALIAS
+	#else
+		#define EASTL_LIST_PROXY_ENABLED 0
+		#define EASTL_LIST_PROXY_MAY_ALIAS
+	#endif
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_STD_ITERATOR_CATEGORY_ENABLED
+//
+// Defined as 0 or 1. Default is 0.
+// If defined as non-zero, EASTL iterator categories (iterator.h's input_iterator_tag,
+// forward_iterator_tag, etc.) are defined to be those from std C++ in the std
+// namespace. The reason for wanting to enable such a feature is that it allows
+// EASTL containers and algorithms to work with std STL containes and algorithms.
+// The default value was changed from 1 to 0 in EASL 1.13.03, January 11, 2012.
+// The reason for the change was that almost nobody was taking advantage of it and
+// it was slowing down compile times for some compilers quite a bit due to them
+// having a lot of headers behind <iterator>.
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_STD_ITERATOR_CATEGORY_ENABLED
+	#define EASTL_STD_ITERATOR_CATEGORY_ENABLED 0
+#endif
+
+#if EASTL_STD_ITERATOR_CATEGORY_ENABLED
+	#define EASTL_ITC_NS std
+#else
+	#define EASTL_ITC_NS eastl
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_VALIDATION_ENABLED
+//
+// Defined as an integer >= 0. Default is to be equal to EASTL_DEBUG.
+// If nonzero, then a certain amount of automatic runtime validation is done.
+// Runtime validation is not considered the same thing as asserting that user
+// input values are valid. Validation refers to internal consistency checking
+// of the validity of containers and their iterators. Validation checking is
+// something that often involves significantly more than basic assertion
+// checking, and it may sometimes be desirable to disable it.
+// This macro would generally be used internally by EASTL.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_VALIDATION_ENABLED
+	#define EASTL_VALIDATION_ENABLED EASTL_DEBUG
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_VALIDATE_COMPARE
+//
+// Defined as EASTL_ASSERT or defined away. Default is EASTL_ASSERT if EASTL_VALIDATION_ENABLED is enabled.
+// This is used to validate user-supplied comparison functions, particularly for sorting purposes.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_VALIDATE_COMPARE_ENABLED
+	#define EASTL_VALIDATE_COMPARE_ENABLED EASTL_VALIDATION_ENABLED
+#endif
+
+#if EASTL_VALIDATE_COMPARE_ENABLED
+	#define EASTL_VALIDATE_COMPARE EASTL_ASSERT
+#else
+	#define EASTL_VALIDATE_COMPARE(expression)
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_VALIDATE_INTRUSIVE_LIST
+//
+// Defined as an integral value >= 0. Controls the amount of automatic validation
+// done by intrusive_list. A value of 0 means no automatic validation is done.
+// As of this writing, EASTL_VALIDATE_INTRUSIVE_LIST defaults to 0, as it makes
+// the intrusive_list_node become a non-POD, which may be an issue for some code.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_VALIDATE_INTRUSIVE_LIST
+	#define EASTL_VALIDATE_INTRUSIVE_LIST 0
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_FORCE_INLINE
+//
+// Defined as a "force inline" expression or defined away.
+// You generally don't need to use forced inlining with the Microsoft and
+// Metrowerks compilers, but you may need it with the GCC compiler (any version).
+//
+// Example usage:
+//     template <typename T, typename Allocator>
+//     EASTL_FORCE_INLINE typename vector<T, Allocator>::size_type
+//     vector<T, Allocator>::size() const
+//        { return mpEnd - mpBegin; }
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_FORCE_INLINE
+	#define EASTL_FORCE_INLINE EA_FORCE_INLINE
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_MAY_ALIAS
+//
+// Defined as a macro that wraps the GCC may_alias attribute. This attribute
+// has no significance for VC++ because VC++ doesn't support the concept of
+// strict aliasing. Users should avoid writing code that breaks strict
+// aliasing rules; EASTL_MAY_ALIAS is for cases with no alternative.
+//
+// Example usage:
+//    uint32_t value EASTL_MAY_ALIAS;
+//
+// Example usage:
+//    typedef uint32_t EASTL_MAY_ALIAS value_type;
+//    value_type value;
+//
+#if defined(__GNUC__) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 303) && !defined(EA_COMPILER_RVCT)
+	#define EASTL_MAY_ALIAS __attribute__((__may_alias__))
+#else
+	#define EASTL_MAY_ALIAS
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_LIKELY / EASTL_UNLIKELY
+//
+// Defined as a macro which gives a hint to the compiler for branch
+// prediction. GCC gives you the ability to manually give a hint to
+// the compiler about the result of a comparison, though it's often
+// best to compile shipping code with profiling feedback under both
+// GCC (-fprofile-arcs) and VC++ (/LTCG:PGO, etc.). However, there
+// are times when you feel very sure that a boolean expression will
+// usually evaluate to either true or false and can help the compiler
+// by using an explicity directive...
+//
+// Example usage:
+//    if(EASTL_LIKELY(a == 0)) // Tell the compiler that a will usually equal 0.
+//       { ... }
+//
+// Example usage:
+//    if(EASTL_UNLIKELY(a == 0)) // Tell the compiler that a will usually not equal 0.
+//       { ... }
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_LIKELY
+	#if defined(__GNUC__) && (__GNUC__ >= 3)
+		#define EASTL_LIKELY(x)   __builtin_expect(!!(x), true)
+		#define EASTL_UNLIKELY(x) __builtin_expect(!!(x), false)
+	#else
+		#define EASTL_LIKELY(x)   (x)
+		#define EASTL_UNLIKELY(x) (x)
+	#endif
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_STD_TYPE_TRAITS_AVAILABLE
+//
+// Defined as 0 or 1; default is based on auto-detection.
+// Specifies whether Standard C++11 <type_traits> support exists.
+// Sometimes the auto-detection below fails to work properly and the
+// user needs to override it. Does not define whether the compiler provides
+// built-in compiler type trait support (e.g. __is_abstract()), as some
+// compilers will EASTL_STD_TYPE_TRAITS_AVAILABLE = 0, but have built
+// in type trait support.
+//
+#ifndef EASTL_STD_TYPE_TRAITS_AVAILABLE
+	/* Disabled because we don't currently need it.
+	#if defined(_MSC_VER) && (_MSC_VER >= 1500)  // VS2008 or later
+		#pragma warning(push, 0)
+			#include <yvals.h>
+		#pragma warning(pop)
+		#if ((defined(_HAS_TR1) && _HAS_TR1) || _MSC_VER >= 1700)  // VS2012 (1700) and later has built-in type traits support.
+			#define EASTL_STD_TYPE_TRAITS_AVAILABLE 1
+			#include <type_traits>
+		#else
+			#define EASTL_STD_TYPE_TRAITS_AVAILABLE 0
+		#endif
+
+	#elif defined(EA_COMPILER_CLANG) || (defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4003) && !defined(__GCCXML__)) && !defined(EA_COMPILER_NO_STANDARD_CPP_LIBRARY)
+		#include <cstddef> // This will define __GLIBCXX__ if using GNU's libstdc++ and _LIBCPP_VERSION if using clang's libc++.
+
+		#if defined(EA_COMPILER_CLANG) && !defined(EA_PLATFORM_APPLE) // As of v3.0.0, Apple's clang doesn't support type traits.
+			// http://clang.llvm.org/docs/LanguageExtensions.html#checking_type_traits
+			// Clang has some built-in compiler trait support. This support doesn't currently
+			// directly cover all our type_traits, though the C++ Standard Library that's used
+			// with clang could fill that in.
+			#define EASTL_STD_TYPE_TRAITS_AVAILABLE 1
+		#endif
+
+		#if !defined(EASTL_STD_TYPE_TRAITS_AVAILABLE)
+			#if defined(_LIBCPP_VERSION) // This is defined by clang's libc++.
+				#include <type_traits>
+
+			#elif defined(__GLIBCXX__) && (__GLIBCXX__ >= 20090124) // It's not clear if this is the oldest version that has type traits; probably it isn't.
+				#define EASTL_STD_TYPE_TRAITS_AVAILABLE 1
+
+				#if defined(__GXX_EXPERIMENTAL_CXX0X__) // To do: Update this test to include conforming C++11 implementations.
+					#include <type_traits>
+				#else
+					#include <tr1/type_traits>
+				#endif
+			#else
+				#define EASTL_STD_TYPE_TRAITS_AVAILABLE 0
+			#endif
+		#endif
+
+	#elif defined(__MSL_CPP__) && (__MSL_CPP__ >= 0x8000) // CodeWarrior compiler.
+		#define EASTL_STD_TYPE_TRAITS_AVAILABLE 0
+		// To do: Implement support for this (via modifying the EASTL type
+		//        traits headers, as CodeWarrior provides this.
+	#else
+		#define EASTL_STD_TYPE_TRAITS_AVAILABLE 0
+	#endif
+	*/
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE
+//
+// Defined as 0 or 1; default is based on auto-detection.
+// Specifies whether the compiler provides built-in compiler type trait support
+// (e.g. __is_abstract()). Does not specify any details about which traits
+// are available or what their standards-compliance is. Nevertheless this is a
+// useful macro identifier for our type traits implementation.
+//
+#ifndef EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE
+	#if defined(_MSC_VER) && (_MSC_VER >= 1500) // VS2008 or later
+		#pragma warning(push, 0)
+			#include <yvals.h>
+		#pragma warning(pop)
+		#if ((defined(_HAS_TR1) && _HAS_TR1) || _MSC_VER >= 1700)  // VS2012 (1700) and later has built-in type traits support.
+			#define EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE 1
+		#else
+			#define EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE 0
+		#endif
+	#elif defined(EA_COMPILER_CLANG) && defined(__APPLE__) && defined(_CXXCONFIG) // Apple clang but with GCC's libstdc++.
+		#define EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE 0
+	#elif defined(EA_COMPILER_CLANG)
+		#define EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE 1
+	#elif defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4003) && !defined(__GCCXML__)
+		#define EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE 1
+	#elif defined(__MSL_CPP__) && (__MSL_CPP__ >= 0x8000) // CodeWarrior compiler.
+		#define EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE 1
+	#else
+		#define EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE 0
+	#endif
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_RESET_ENABLED
+//
+// Defined as 0 or 1; default is 1 for the time being.
+// The reset_lose_memory function works the same as reset, as described below.
+//
+// Specifies whether the container reset functionality is enabled. If enabled
+// then <container>::reset forgets its memory, otherwise it acts as the clear
+// function. The reset function is potentially dangerous, as it (by design)
+// causes containers to not free their memory.
+// This option has no applicability to the bitset::reset function, as bitset
+// isn't really a container. Also it has no applicability to the smart pointer
+// wrappers (e.g. intrusive_ptr).
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_RESET_ENABLED
+	#define EASTL_RESET_ENABLED 0
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_MINMAX_ENABLED
+//
+// Defined as 0 or 1; default is 1.
+// Specifies whether the min and max algorithms are available.
+// It may be useful to disable the min and max algorithms because sometimes
+// #defines for min and max exist which would collide with EASTL min and max.
+// Note that there are already alternative versions of min and max in EASTL
+// with the min_alt and max_alt functions. You can use these without colliding
+// with min/max macros that may exist.
+//
+///////////////////////////////////////////////////////////////////////////////
+#ifndef EASTL_MINMAX_ENABLED
+	#define EASTL_MINMAX_ENABLED 1
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_NOMINMAX
+//
+// Defined as 0 or 1; default is 1.
+// MSVC++ has #defines for min/max which collide with the min/max algorithm
+// declarations. If EASTL_NOMINMAX is defined as 1, then we undefine min and
+// max if they are #defined by an external library. This allows our min and
+// max definitions in algorithm.h to work as expected. An alternative to
+// the enabling of EASTL_NOMINMAX is to #define NOMINMAX in your project
+// settings if you are compiling for Windows.
+// Note that this does not control the availability of the EASTL min and max
+// algorithms; the EASTL_MINMAX_ENABLED configuration parameter does that.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_NOMINMAX
+	#define EASTL_NOMINMAX 1
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_STD_CPP_ONLY
+//
+// Defined as 0 or 1; default is 0.
+// Disables the use of compiler language extensions. We use compiler language
+// extensions only in the case that they provide some benefit that can't be
+// had any other practical way. But sometimes the compiler is set to disable
+// language extensions or sometimes one compiler's preprocesor is used to generate
+// code for another compiler, and so it's necessary to disable language extension usage.
+//
+// Example usage:
+//     #if defined(_MSC_VER) && !EASTL_STD_CPP_ONLY
+//         enum : size_type { npos = container_type::npos };    // Microsoft extension which results in significantly smaller debug symbols.
+//     #else
+//         static const size_type npos = container_type::npos;
+//     #endif
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_STD_CPP_ONLY
+	#define EASTL_STD_CPP_ONLY 0
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_NO_RVALUE_REFERENCES
+//
+// Defined as 0 or 1.
+// This is the same as EABase EA_COMPILER_NO_RVALUE_REFERENCES except that it
+// follows the convention of being always defined, as 0 or 1.
+///////////////////////////////////////////////////////////////////////////////
+#if !defined(EASTL_NO_RVALUE_REFERENCES)
+	#if defined(EA_COMPILER_NO_RVALUE_REFERENCES)
+		#define EASTL_NO_RVALUE_REFERENCES 1
+	#else
+		#define EASTL_NO_RVALUE_REFERENCES 0
+	#endif
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_MOVE_SEMANTICS_ENABLED
+//
+// Defined as 0 or 1.
+// If enabled then C++11-like functionality with rvalue references and move
+// operations is enabled.
+///////////////////////////////////////////////////////////////////////////////
+#if !defined(EASTL_MOVE_SEMANTICS_ENABLED)
+	#if EASTL_NO_RVALUE_REFERENCES // If the compiler doesn't support rvalue references or EASTL is configured to disable them...
+		#define EASTL_MOVE_SEMANTICS_ENABLED 0
+	#else
+		#define EASTL_MOVE_SEMANTICS_ENABLED 1
+	#endif
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_VARIADIC_TEMPLATES_ENABLED
+//
+// Defined as 0 or 1.
+// If enabled then C++11-like functionality with variadic templates is enabled.
+///////////////////////////////////////////////////////////////////////////////
+#if !defined(EASTL_VARIADIC_TEMPLATES_ENABLED)
+	#if defined(EA_COMPILER_NO_VARIADIC_TEMPLATES) // If the compiler doesn't support variadic templates
+		#define EASTL_VARIADIC_TEMPLATES_ENABLED 0
+	#else
+		#define EASTL_VARIADIC_TEMPLATES_ENABLED 1
+	#endif
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_VARIABLE_TEMPLATES_ENABLED
+//
+// Defined as 0 or 1.
+// If enabled then C++11-like functionality with variable templates is enabled.
+///////////////////////////////////////////////////////////////////////////////
+#if !defined(EASTL_VARIABLE_TEMPLATES_ENABLED)
+	#if((EABASE_VERSION_N < 20605) || defined(EA_COMPILER_NO_VARIABLE_TEMPLATES))
+		#define EASTL_VARIABLE_TEMPLATES_ENABLED 0
+	#else
+		#define EASTL_VARIABLE_TEMPLATES_ENABLED 1
+	#endif
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_INLINE_VARIABLE_ENABLED
+//
+// Defined as 0 or 1.
+// If enabled then C++17-like functionality with inline variable is enabled.
+///////////////////////////////////////////////////////////////////////////////
+#if !defined(EASTL_INLINE_VARIABLE_ENABLED)
+	#if((EABASE_VERSION_N < 20707) || defined(EA_COMPILER_NO_INLINE_VARIABLES))
+		#define EASTL_INLINE_VARIABLE_ENABLED 0
+	#else
+		#define EASTL_INLINE_VARIABLE_ENABLED 1
+	#endif
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_CPP17_INLINE_VARIABLE
+//
+// Used to prefix a variable as inline when C++17 inline variables are available
+// Usage: EASTL_CPP17_INLINE_VARIABLE constexpr bool type_trait_v = type_trait::value
+///////////////////////////////////////////////////////////////////////////////
+#if !defined(EASTL_CPP17_INLINE_VARIABLE)
+	#if EASTL_INLINE_VARIABLE_ENABLED
+		#define EASTL_CPP17_INLINE_VARIABLE inline
+	#else
+		#define EASTL_CPP17_INLINE_VARIABLE
+	#endif
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_HAVE_CPP11_TYPE_TRAITS
+//
+// Defined as 0 or 1.
+// This is the same as EABase EA_HAVE_CPP11_TYPE_TRAITS except that it
+// follows the convention of being always defined, as 0 or 1. Note that this
+// identifies if the Standard Library has C++11 type traits and not if EASTL
+// has its equivalents to C++11 type traits.
+///////////////////////////////////////////////////////////////////////////////
+#if !defined(EASTL_HAVE_CPP11_TYPE_TRAITS)
+	// To do: Change this to use the EABase implementation once we have a few months of testing
+	// of this and we are sure it works right. Do this at some point after ~January 2014.
+	#if defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && (_CPPLIB_VER >= 540) // Dinkumware. VS2012+
+		#define EASTL_HAVE_CPP11_TYPE_TRAITS 1
+	#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4007) // Prior versions of libstdc++ have incomplete support for C++11 type traits.
+		#define EASTL_HAVE_CPP11_TYPE_TRAITS 1
+	#elif defined(EA_HAVE_LIBCPP_LIBRARY) && (_LIBCPP_VERSION >= 1)
+		#define EASTL_HAVE_CPP11_TYPE_TRAITS 1
+	#else
+		#define EASTL_HAVE_CPP11_TYPE_TRAITS 0
+	#endif
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EA_COMPILER_NO_FUNCTION_TEMPLATE_DEFAULT_ARGS undef
+//
+// We need revise this macro to be undefined in some cases, in case the user
+// isn't using an updated EABase.
+///////////////////////////////////////////////////////////////////////////////
+#if defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 403) // It may in fact be supported by 4.01 or 4.02 but we don't have compilers to test with.
+	#if defined(EA_COMPILER_NO_FUNCTION_TEMPLATE_DEFAULT_ARGS)
+		#undef EA_COMPILER_NO_FUNCTION_TEMPLATE_DEFAULT_ARGS
+	#endif
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_NO_RANGE_BASED_FOR_LOOP
+//
+// Defined as 0 or 1.
+// This is the same as EABase EA_COMPILER_NO_RANGE_BASED_FOR_LOOP except that it
+// follows the convention of being always defined, as 0 or 1.
+///////////////////////////////////////////////////////////////////////////////
+#if !defined(EASTL_NO_RANGE_BASED_FOR_LOOP)
+	#if defined(EA_COMPILER_NO_RANGE_BASED_FOR_LOOP)
+		#define EASTL_NO_RANGE_BASED_FOR_LOOP 1
+	#else
+		#define EASTL_NO_RANGE_BASED_FOR_LOOP 0
+	#endif
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_ALIGN_OF
+//
+// Determines the alignment of a type.
+//
+// Example usage:
+//    size_t alignment = EASTL_ALIGN_OF(int);
+//
+///////////////////////////////////////////////////////////////////////////////
+#ifndef EASTL_ALIGN_OF
+	#define EASTL_ALIGN_OF alignof
+#endif
+
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// eastl_size_t
+//
+// Defined as an unsigned integer type, usually either size_t or uint32_t.
+// Defaults to size_t to match std STL unless the user specifies to use
+// uint32_t explicitly via the EASTL_SIZE_T_32BIT define
+//
+// Example usage:
+//     eastl_size_t n = intVector.size();
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_SIZE_T_32BIT        // Defines whether EASTL_SIZE_T uses uint32_t/int32_t as opposed to size_t/ssize_t.
+	#define EASTL_SIZE_T_32BIT 0  // This makes a difference on 64 bit platforms because they use a 64 bit size_t.
+#endif                            // By default we do the same thing as std STL and use size_t.
+
+#ifndef EASTL_SIZE_T
+	#if (EASTL_SIZE_T_32BIT == 0) || (EA_PLATFORM_WORD_SIZE == 4)
+		#include <stddef.h>
+		#define EASTL_SIZE_T  size_t
+		#define EASTL_SSIZE_T intptr_t
+	#else
+		#define EASTL_SIZE_T  uint32_t
+		#define EASTL_SSIZE_T int32_t
+	#endif
+#endif
+
+typedef EASTL_SIZE_T  eastl_size_t;  // Same concept as std::size_t.
+typedef EASTL_SSIZE_T eastl_ssize_t; // Signed version of eastl_size_t. Concept is similar to Posix's ssize_t.
+
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// AddRef / Release
+//
+// AddRef and Release are used for "intrusive" reference counting. By the term
+// "intrusive", we mean that the reference count is maintained by the object
+// and not by the user of the object. Given that an object implements referencing
+// counting, the user of the object needs to be able to increment and decrement
+// that reference count. We do that via the venerable AddRef and Release functions
+// which the object must supply. These defines here allow us to specify the name
+// of the functions. They could just as well be defined to addref and delref or
+// IncRef and DecRef.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTLAddRef
+	#define EASTLAddRef AddRef
+#endif
+
+#ifndef EASTLRelease
+	#define EASTLRelease Release
+#endif
+
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_ALLOCATOR_EXPLICIT_ENABLED
+//
+// Defined as 0 or 1. Default is 0 for now but ideally would be changed to
+// 1 some day. It's 0 because setting it to 1 breaks some existing code.
+// This option enables the allocator ctor to be explicit, which avoids
+// some undesirable silent conversions, especially with the string class.
+//
+// Example usage:
+//     class allocator
+//     {
+//     public:
+//         EASTL_ALLOCATOR_EXPLICIT allocator(const char* pName);
+//     };
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_ALLOCATOR_EXPLICIT_ENABLED
+	#define EASTL_ALLOCATOR_EXPLICIT_ENABLED 0
+#endif
+
+#if EASTL_ALLOCATOR_EXPLICIT_ENABLED
+	#define EASTL_ALLOCATOR_EXPLICIT explicit
+#else
+	#define EASTL_ALLOCATOR_EXPLICIT
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_ALLOCATOR_MIN_ALIGNMENT
+//
+// Defined as an integral power-of-2 that's >= 1.
+// Identifies the minimum alignment that EASTL should assume its allocators
+// use. There is code within EASTL that decides whether to do a Malloc or
+// MallocAligned call and it's typically better if it can use the Malloc call.
+// But this requires knowing what the minimum possible alignment is.
+#if !defined(EASTL_ALLOCATOR_MIN_ALIGNMENT)
+	#define EASTL_ALLOCATOR_MIN_ALIGNMENT EA_PLATFORM_MIN_MALLOC_ALIGNMENT
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_SYSTEM_ALLOCATOR_MIN_ALIGNMENT
+//
+// Identifies the minimum alignment that EASTL should assume system allocations
+// from malloc and new will have.
+#if !defined(EASTL_SYSTEM_ALLOCATOR_MIN_ALIGNMENT)
+	#if defined(EA_PLATFORM_MICROSOFT) || defined(EA_PLATFORM_APPLE)
+		#define EASTL_SYSTEM_ALLOCATOR_MIN_ALIGNMENT 16
+	#else
+		#define EASTL_SYSTEM_ALLOCATOR_MIN_ALIGNMENT (EA_PLATFORM_PTR_SIZE * 2)
+	#endif
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL allocator
+//
+// The EASTL allocator system allows you to redefine how memory is allocated
+// via some defines that are set up here. In the container code, memory is
+// allocated via macros which expand to whatever the user has them set to
+// expand to. Given that there are multiple allocator systems available,
+// this system allows you to configure it to use whatever system you want,
+// provided your system meets the requirements of this library.
+// The requirements are:
+//
+//     - Must be constructable via a const char* (name) parameter.
+//       Some uses of allocators won't require this, however.
+//     - Allocate a block of memory of size n and debug name string.
+//     - Allocate a block of memory of size n, debug name string,
+//       alignment a, and offset o.
+//     - Free memory allocated via either of the allocation functions above.
+//     - Provide a default allocator instance which can be used if the user
+//       doesn't provide a specific one.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+// namespace eastl
+// {
+//     class allocator
+//     {
+//         allocator(const char* pName = NULL);
+//
+//         void* allocate(size_t n, int flags = 0);
+//         void* allocate(size_t n, size_t alignment, size_t offset, int flags = 0);
+//         void  deallocate(void* p, size_t n);
+//
+//         const char* get_name() const;
+//         void        set_name(const char* pName);
+//     };
+//
+//     allocator* GetDefaultAllocator(); // This is used for anonymous allocations.
+// }
+
+#ifndef EASTLAlloc // To consider: Instead of calling through pAllocator, just go directly to operator new, since that's what allocator does.
+	#define EASTLAlloc(allocator, n) (allocator).allocate(n);
+#endif
+
+#ifndef EASTLAllocFlags // To consider: Instead of calling through pAllocator, just go directly to operator new, since that's what allocator does.
+	#define EASTLAllocFlags(allocator, n, flags) (allocator).allocate(n, flags);
+#endif
+
+#ifndef EASTLAllocAligned
+	#define EASTLAllocAligned(allocator, n, alignment, offset) (allocator).allocate((n), (alignment), (offset))
+#endif
+
+#ifndef EASTLAllocAlignedFlags
+	#define EASTLAllocAlignedFlags(allocator, n, alignment, offset, flags) (allocator).allocate((n), (alignment), (offset), (flags))
+#endif
+
+#ifndef EASTLFree
+	#define EASTLFree(allocator, p, size) (allocator).deallocate((void*)(p), (size)) // Important to cast to void* as p may be non-const.
+#endif
+
+#ifndef EASTLAllocatorType
+	#define EASTLAllocatorType eastl::allocator
+#endif
+
+#ifndef EASTLDummyAllocatorType
+	#define EASTLDummyAllocatorType eastl::dummy_allocator
+#endif
+
+#ifndef EASTLAllocatorDefault
+	// EASTLAllocatorDefault returns the default allocator instance. This is not a global
+	// allocator which implements all container allocations but is the allocator that is
+	// used when EASTL needs to allocate memory internally. There are very few cases where
+	// EASTL allocates memory internally, and in each of these it is for a sensible reason
+	// that is documented to behave as such.
+	#define EASTLAllocatorDefault eastl::GetDefaultAllocator
+#endif
+
+
+/// EASTL_ALLOCATOR_DEFAULT_NAME
+///
+/// Defines a default allocator name in the absence of a user-provided name.
+///
+#ifndef EASTL_ALLOCATOR_DEFAULT_NAME
+	#define EASTL_ALLOCATOR_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX // Unless the user overrides something, this is "EASTL".
+#endif
+
+/// EASTL_USE_FORWARD_WORKAROUND
+///
+/// This is to workaround a compiler bug that we found in VS2013. Update 1 did not fix it.
+/// This should be fixed in a future release of VS2013 http://accentuable4.rssing.com/browser.php?indx=3511740&item=15696
+///
+#ifndef EASTL_USE_FORWARD_WORKAROUND
+	#if defined(_MSC_FULL_VER) && _MSC_FULL_VER == 180021005 || (defined(__EDG_VERSION__) && (__EDG_VERSION__ < 405))// VS2013 initial release
+		#define EASTL_USE_FORWARD_WORKAROUND 1
+	#else
+		#define EASTL_USE_FORWARD_WORKAROUND 0
+	#endif
+#endif
+
+
+/// EASTL_TUPLE_ENABLED
+/// EASTL tuple implementation depends on variadic template support
+#if EASTL_VARIADIC_TEMPLATES_ENABLED && !defined(EA_COMPILER_NO_TEMPLATE_ALIASES)
+	#define EASTL_TUPLE_ENABLED 1
+#else
+	#define EASTL_TUPLE_ENABLED 0
+#endif
+
+
+/// EASTL_FUNCTION_ENABLED
+///
+#ifndef EASTL_FUNCTION_ENABLED
+	#define EASTL_FUNCTION_ENABLED 1
+#endif
+
+
+/// EASTL_USER_LITERALS_ENABLED
+#ifndef EASTL_USER_LITERALS_ENABLED
+	#if defined(EA_COMPILER_CPP14_ENABLED)
+		#define EASTL_USER_LITERALS_ENABLED 1
+
+		// Disabling the Clang/GCC/MSVC warning about using user defined literals without a leading '_' as they are
+		// reserved for standard libary usage.
+		EA_DISABLE_CLANG_WARNING(-Wuser-defined-literals)
+		EA_DISABLE_CLANG_WARNING(-Wreserved-user-defined-literal)
+		EA_DISABLE_GCC_WARNING(-Wliteral-suffix)
+		#ifdef _MSC_VER
+			#pragma warning(disable: 4455) // disable warning C4455: literal suffix identifiers that do not start with an underscore are reserved
+		#endif
+
+	#else
+		#define EASTL_USER_LITERALS_ENABLED 0
+	#endif
+#endif
+
+
+/// EASTL_INLINE_NAMESPACES_ENABLED
+#ifndef EASTL_INLINE_NAMESPACES_ENABLED
+	#if defined(EA_COMPILER_CPP14_ENABLED)
+		#define EASTL_INLINE_NAMESPACES_ENABLED 1
+	#else
+		#define EASTL_INLINE_NAMESPACES_ENABLED 0
+	#endif
+#endif
+
+
+/// EASTL_CORE_ALLOCATOR_ENABLED
+#ifndef EASTL_CORE_ALLOCATOR_ENABLED
+	#define EASTL_CORE_ALLOCATOR_ENABLED 0
+#endif
+
+/// EASTL_OPENSOURCE
+/// This is enabled when EASTL is building built in an "open source" mode.  Which is a mode that eliminates code
+/// dependencies on other technologies that have not been released publically.
+/// EASTL_OPENSOURCE = 0, is the default.
+/// EASTL_OPENSOURCE = 1, utilizes technologies that not publically available.
+///
+#ifndef EASTL_OPENSOURCE
+	#define EASTL_OPENSOURCE 0
+#endif
+
+
+/// EASTL_OPTIONAL_ENABLED
+#if defined(EA_COMPILER_MSVC_2012)
+	#define EASTL_OPTIONAL_ENABLED 0
+#elif defined(EA_COMPILER_MSVC_2013)
+	#define EASTL_OPTIONAL_ENABLED 0
+#elif defined(EA_COMPILER_MSVC_2015)
+	#define EASTL_OPTIONAL_ENABLED 1
+#elif EASTL_VARIADIC_TEMPLATES_ENABLED && !defined(EA_COMPILER_NO_TEMPLATE_ALIASES) && !defined(EA_COMPILER_NO_DEFAULTED_FUNCTIONS) && defined(EA_COMPILER_CPP11_ENABLED)
+	#define EASTL_OPTIONAL_ENABLED 1
+#else
+	#define EASTL_OPTIONAL_ENABLED 0
+#endif
+
+
+/// EASTL_HAS_UNIQUE_OBJECT_REPRESENTATIONS_AVAILABLE
+#if defined(_MSC_VER) && (_MSC_VER >= 1913)  // VS2017+
+	#define EASTL_HAS_UNIQUE_OBJECT_REPRESENTATIONS_AVAILABLE 1
+#elif defined(EA_COMPILER_CLANG)
+	#if !__is_identifier(__has_unique_object_representations)
+		#define EASTL_HAS_UNIQUE_OBJECT_REPRESENTATIONS_AVAILABLE 1
+	#else
+		#define EASTL_HAS_UNIQUE_OBJECT_REPRESENTATIONS_AVAILABLE 0
+	#endif
+#else
+	#define EASTL_HAS_UNIQUE_OBJECT_REPRESENTATIONS_AVAILABLE 0
+#endif
+
+
+/// EASTL_ENABLE_PAIR_FIRST_ELEMENT_CONSTRUCTOR
+/// This feature define allows users to toggle the problematic eastl::pair implicit
+/// single element constructor.
+#ifndef EASTL_ENABLE_PAIR_FIRST_ELEMENT_CONSTRUCTOR
+	#define EASTL_ENABLE_PAIR_FIRST_ELEMENT_CONSTRUCTOR 0
+#endif
+
+/// EASTL_SYSTEM_BIG_ENDIAN_STATEMENT
+/// EASTL_SYSTEM_LITTLE_ENDIAN_STATEMENT
+/// These macros allow you to write endian specific macros as statements.
+/// This allows endian specific code to be macro expanded from within other macros
+///
+#if defined(EA_SYSTEM_BIG_ENDIAN)
+	#define EASTL_SYSTEM_BIG_ENDIAN_STATEMENT(...) __VA_ARGS__
+#else
+	#define EASTL_SYSTEM_BIG_ENDIAN_STATEMENT(...)
+#endif
+
+#if defined(EA_SYSTEM_LITTLE_ENDIAN)
+	#define EASTL_SYSTEM_LITTLE_ENDIAN_STATEMENT(...) __VA_ARGS__
+#else
+	#define EASTL_SYSTEM_LITTLE_ENDIAN_STATEMENT(...)
+#endif
+
+
+#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/internal/copy_help.h b/libkram/eastl/include/EASTL/internal/copy_help.h
new file mode 100644
index 00000000..e5fb2abd
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/copy_help.h
@@ -0,0 +1,215 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_INTERNAL_COPY_HELP_H
+#define EASTL_INTERNAL_COPY_HELP_H
+
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+#include <EASTL/internal/config.h>
+#include <EASTL/type_traits.h>
+#include <EASTL/iterator.h>
+#include <string.h> // memcpy, memcmp, memmove
+
+
+namespace eastl
+{
+	/// move / move_n / move_backward 
+	/// copy / copy_n / copy_backward
+	///
+	/// We want to optimize move, move_n, move_backward, copy, copy_backward, copy_n to do memmove operations
+	/// when possible. 
+	///
+	/// We could possibly use memcpy, though it has stricter overlap requirements than the move and copy 
+	/// algorithms and would require a runtime if/else to choose it over memmove. In particular, memcpy 
+	/// allows no range overlap at all, whereas move/copy allow output end overlap and move_backward/copy_backward 
+	/// allow output begin overlap. Despite this it might be useful to use memcpy for any platforms where
+	/// memcpy is significantly faster than memmove, and since in most cases the copy/move operation in fact
+	/// doesn't target overlapping memory and so memcpy would be usable.
+	///
+	/// We can use memmove/memcpy if the following hold true:
+	///     InputIterator and OutputIterator are of the same type.
+	///     InputIterator and OutputIterator are of type contiguous_iterator_tag or simply are pointers (the two are virtually synonymous).
+	///     is_trivially_copyable<T>::value is true. i.e. the constructor T(const T& t) (or T(T&& t) if present) can be replaced by memmove(this, &t, sizeof(T))
+	/// 
+	/// copy normally differs from move, but there is a case where copy is the same as move: when copy is 
+	/// used with a move_iterator. We handle that case here by detecting that copy is being done with a 
+	/// move_iterator and redirect it to move (which can take advantage of memmove/memcpy).
+	///
+	/// The generic_iterator class is typically used for wrapping raw memory pointers so they can act like 
+	/// formal iterators. Since pointers provide an opportunity for memmove/memcpy operations, we can 
+	/// detect a generic iterator and use it's wrapped type as a pointer if it happens to be one.
+
+	// Implementation moving copying both trivial and non-trivial data via a lesser iterator than random-access.
+	template <typename /*InputIteratorCategory*/, bool /*isMove*/, bool /*canMemmove*/>
+	struct move_and_copy_helper
+	{
+		template <typename InputIterator, typename OutputIterator>
+		static OutputIterator move_or_copy(InputIterator first, InputIterator last, OutputIterator result)
+		{
+			for(; first != last; ++result, ++first)
+				*result = *first;
+			return result;
+		}
+	};
+
+	// Specialization for copying non-trivial data via a random-access iterator. It's theoretically faster because the compiler can see the count when its a compile-time const.
+	// This specialization converts the random access InputIterator last-first to an integral type. There's simple way for us to take advantage of a random access output iterator,
+	// as the range is specified by the input instead of the output, and distance(first, last) for a non-random-access iterator is potentially slow.
+	template <> 
+	struct move_and_copy_helper<EASTL_ITC_NS::random_access_iterator_tag, false, false>
+	{
+		template <typename InputIterator, typename OutputIterator>
+		static OutputIterator move_or_copy(InputIterator first, InputIterator last, OutputIterator result)
+		{
+			typedef typename eastl::iterator_traits<InputIterator>::difference_type difference_type;
+
+			for(difference_type n = (last - first); n > 0; --n, ++first, ++result)
+				*result = *first;
+
+			return result;
+		}
+	};
+
+	// Specialization for moving non-trivial data via a lesser iterator than random-access.
+	template <typename InputIteratorCategory>
+	struct move_and_copy_helper<InputIteratorCategory, true, false>
+	{
+		template <typename InputIterator, typename OutputIterator>
+		static OutputIterator move_or_copy(InputIterator first, InputIterator last, OutputIterator result)
+		{
+			for(; first != last; ++result, ++first)
+				*result = eastl::move(*first);
+			return result;
+		}
+	};
+	
+	// Specialization for moving non-trivial data via a random-access iterator. It's theoretically faster because the compiler can see the count when its a compile-time const.
+	template <>
+	struct move_and_copy_helper<EASTL_ITC_NS::random_access_iterator_tag, true, false>
+	{
+		template <typename InputIterator, typename OutputIterator>
+		static OutputIterator move_or_copy(InputIterator first, InputIterator last, OutputIterator result)
+		{
+			typedef typename eastl::iterator_traits<InputIterator>::difference_type difference_type;
+
+			for(difference_type n = (last - first); n > 0; --n, ++first, ++result)
+				*result = eastl::move(*first);
+
+			return result;
+		}
+	};
+
+	// Specialization for when we can use memmove/memcpy. See the notes above for what conditions allow this.
+	template <bool isMove>
+	struct move_and_copy_helper<EASTL_ITC_NS::random_access_iterator_tag, isMove, true>
+	{
+		template <typename T>
+		static T* move_or_copy(const T* first, const T* last, T* result)
+		{
+			if (EASTL_UNLIKELY(first == last))
+				return result;
+
+			// We could use memcpy here if there's no range overlap, but memcpy is rarely much faster than memmove.
+			return (T*)memmove(result, first, (size_t)((uintptr_t)last - (uintptr_t)first)) + (last - first);
+		}
+	};
+
+
+
+	template <bool isMove, typename InputIterator, typename OutputIterator>
+	inline OutputIterator move_and_copy_chooser(InputIterator first, InputIterator last, OutputIterator result)
+	{
+		typedef typename eastl::iterator_traits<InputIterator>::iterator_category  IIC;
+		typedef typename eastl::iterator_traits<OutputIterator>::iterator_category OIC;
+		typedef typename eastl::iterator_traits<InputIterator>::value_type         value_type_input;
+		typedef typename eastl::iterator_traits<OutputIterator>::value_type        value_type_output;
+
+		const bool canBeMemmoved = eastl::is_trivially_copyable<value_type_output>::value && 
+								   eastl::is_same<value_type_input, value_type_output>::value && 
+								  (eastl::is_pointer<InputIterator>::value  || eastl::is_same<IIC, eastl::contiguous_iterator_tag>::value) && 
+								  (eastl::is_pointer<OutputIterator>::value || eastl::is_same<OIC, eastl::contiguous_iterator_tag>::value);
+
+		return eastl::move_and_copy_helper<IIC, isMove, canBeMemmoved>::move_or_copy(first, last, result); // Need to chose based on the input iterator tag and not the output iterator tag, because containers accept input ranges of iterator types different than self.
+	}
+
+
+	// We have a second layer of unwrap_iterator calls because the original iterator might be something like move_iterator<generic_iterator<int*> > (i.e. doubly-wrapped).
+	template <bool isMove, typename InputIterator, typename OutputIterator>
+	inline OutputIterator move_and_copy_unwrapper(InputIterator first, InputIterator last, OutputIterator result)
+	{
+		return OutputIterator(eastl::move_and_copy_chooser<isMove>(eastl::unwrap_iterator(first), eastl::unwrap_iterator(last), eastl::unwrap_iterator(result))); // Have to convert to OutputIterator because result.base() could be a T*
+	}
+
+
+	/// move
+	///
+	/// After this operation the elements in the moved-from range will still contain valid values of the 
+	/// appropriate type, but not necessarily the same values as before the move. 
+	/// Returns the end of the result range.
+	/// Note: When moving between containers, the dest range must be valid; this function doesn't resize containers.
+	/// Note: if result is within [first, last), move_backward must be used instead of move. 
+	///
+	/// Example usage:
+	///     eastl::move(myArray.begin(), myArray.end(), myDestArray.begin());
+	///
+	/// Reference implementation:
+	///     template <typename InputIterator, typename OutputIterator>
+	///     OutputIterator move(InputIterator first, InputIterator last, OutputIterator result)
+	///     {
+	///         while(first != last)
+	///             *result++ = eastl::move(*first++);
+	///         return result;
+	///     }
+
+	template <typename InputIterator, typename OutputIterator>
+	inline OutputIterator move(InputIterator first, InputIterator last, OutputIterator result)
+	{
+		return eastl::move_and_copy_unwrapper<true>(eastl::unwrap_iterator(first), eastl::unwrap_iterator(last), result);
+	}
+
+
+	/// copy
+	///
+	/// Effects: Copies elements in the range [first, last) into the range [result, result + (last - first))
+	/// starting from first and proceeding to last. For each nonnegative integer n < (last - first),
+	/// performs *(result + n) = *(first + n).
+	///
+	/// Returns: result + (last - first). That is, returns the end of the result. Note that this 
+	/// is different from how memmove/memcpy work, as they return the beginning of the result.
+	///
+	/// Requires: result shall not be in the range [first, last). But the end of the result range
+	/// may in fact be within the input rante.
+	///
+	/// Complexity: Exactly 'last - first' assignments.
+	///
+	template <typename InputIterator, typename OutputIterator>
+	inline OutputIterator copy(InputIterator first, InputIterator last, OutputIterator result)
+	{
+		const bool isMove = eastl::is_move_iterator<InputIterator>::value; EA_UNUSED(isMove);
+
+		return eastl::move_and_copy_unwrapper<isMove>(eastl::unwrap_iterator(first), eastl::unwrap_iterator(last), result);
+	}
+} // namespace eastl
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/internal/enable_shared.h b/libkram/eastl/include/EASTL/internal/enable_shared.h
new file mode 100644
index 00000000..ac5f0729
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/enable_shared.h
@@ -0,0 +1,83 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_INTERNAL_ENABLE_SHARED_H
+#define EASTL_INTERNAL_ENABLE_SHARED_H
+
+
+#include <EABase/eabase.h>
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+namespace eastl
+{
+
+	/// enable_shared_from_this
+	///
+	/// This is a helper mixin class that allows you to make any class
+	/// export a shared_ptr instance that is associated with the class 
+	/// instance. Any class that inherits from this class gets two functions:
+	///    shared_ptr<T> shared_from_this();
+	///    shared_ptr<T> shared_from_this() const;
+	/// If you call shared_from_this, you get back a shared_ptr that 
+	/// refers to the class. A second call to shared_from_this returns
+	/// another shared_ptr that is shared with the first one. 
+	///
+	/// The trick that happens which is not so obvious here (and which is
+	/// not mentioned at all in the Boost documentation of their version
+	/// of this) is that the shared_ptr constructor detects that the 
+	/// class has an enable_shared_from_this mixin and sets up this system
+	/// automatically for the user. This is done with template tricks.
+	///
+	/// For some additional explanation, see the Boost documentation for 
+	/// their description of their version of enable_shared_from_this.
+	///
+	template <typename T>
+	class enable_shared_from_this
+	{
+	public:
+		shared_ptr<T> shared_from_this()
+			{ return shared_ptr<T>(mWeakPtr); }
+
+		shared_ptr<const T> shared_from_this() const
+			{ return shared_ptr<const T>(mWeakPtr); }
+
+		weak_ptr<T> weak_from_this()
+			{ return mWeakPtr; }
+
+		weak_ptr<const T> weak_from_this() const
+			{ return mWeakPtr; }
+
+	public: // This is public because the alternative fails on some compilers that we need to support.
+		mutable weak_ptr<T> mWeakPtr;
+
+	protected:
+		template <typename U> friend class shared_ptr;
+
+		EA_CONSTEXPR enable_shared_from_this() EA_NOEXCEPT
+			{ }
+
+		enable_shared_from_this(const enable_shared_from_this&) EA_NOEXCEPT
+			{ }
+
+		enable_shared_from_this& operator=(const enable_shared_from_this&) EA_NOEXCEPT
+			{ return *this; }
+
+		~enable_shared_from_this()
+			{ }
+
+	}; // enable_shared_from_this
+
+} // namespace eastl
+
+
+#endif // Header include guard
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/internal/fill_help.h b/libkram/eastl/include/EASTL/internal/fill_help.h
new file mode 100644
index 00000000..235a24ee
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/fill_help.h
@@ -0,0 +1,484 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_INTERNAL_FILL_HELP_H
+#define EASTL_INTERNAL_FILL_HELP_H
+
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+#include <EASTL/internal/config.h>
+
+#if defined(EA_COMPILER_MICROSOFT) && (defined(EA_PROCESSOR_X86) || defined(EA_PROCESSOR_X86_64))
+#include <intrin.h>
+#endif
+
+namespace eastl
+{
+	// fill
+	//
+	// We implement some fill helper functions in order to allow us to optimize it
+	// where possible.
+	//
+	template <bool bIsScalar>
+	struct fill_imp
+	{
+		template <typename ForwardIterator, typename T>
+		static void do_fill(ForwardIterator first, ForwardIterator last, const T& value)
+		{
+			// The C++ standard doesn't specify whether we need to create a temporary
+			// or not, but all std STL implementations are written like what we have here.
+			for(; first != last; ++first)
+				*first = value;
+		}
+	};
+
+	template <>
+	struct fill_imp<true>
+	{
+		template <typename ForwardIterator, typename T>
+		static void do_fill(ForwardIterator first, ForwardIterator last, const T& value)
+		{
+			typedef typename eastl::iterator_traits<ForwardIterator>::value_type value_type;
+			// We create a temp and fill from that because value might alias to the 
+			// destination range and so the compiler would be forced into generating 
+			// less efficient code.
+			for(const T temp = value; first != last; ++first)
+			{
+				EA_UNUSED(temp);
+				*first = static_cast<value_type>(temp);
+			}
+		}
+	};
+
+	/// fill
+	///
+	/// fill is like memset in that it assigns a single value repeatedly to a 
+	/// destination range. It allows for any type of iterator (not just an array)
+	/// and the source value can be any type, not just a byte.
+	/// Note that the source value (which is a reference) can come from within 
+	/// the destination range.
+	///
+	/// Effects: Assigns value through all the iterators in the range [first, last).
+	///
+	/// Complexity: Exactly 'last - first' assignments.
+	///
+	/// Note: The C++ standard doesn't specify anything about the value parameter
+	/// coming from within the first-last range. All std STL implementations act
+	/// as if the standard specifies that value must not come from within this range.
+	///
+	template <typename ForwardIterator, typename T>
+	inline void fill(ForwardIterator first, ForwardIterator last, const T& value)
+	{
+		eastl::fill_imp< is_scalar<T>::value >::do_fill(first, last, value);
+
+		// Possibly better implementation, as it will deal with small PODs as well as scalars:
+		// bEasyCopy is true if the type has a trivial constructor (e.g. is a POD) and if 
+		// it is small. Thus any built-in type or any small user-defined struct will qualify.
+		//const bool bEasyCopy = eastl::type_and<eastl::has_trivial_constructor<T>::value, 
+		//                                       eastl::integral_constant<bool, (sizeof(T) <= 16)>::value;
+		//eastl::fill_imp<bEasyCopy>::do_fill(first, last, value);
+
+	}
+
+	#if(defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG)) && (defined(EA_PROCESSOR_X86) || defined(EA_PROCESSOR_X86_64))
+		#if defined(EA_PROCESSOR_X86_64)
+			template <typename Value>
+			inline void fill(uint64_t* first, uint64_t* last, Value c)
+			{
+				uintptr_t count = (uintptr_t)(last - first);
+				uint64_t  value = (uint64_t)(c);
+
+				__asm__ __volatile__ ("cld\n\t"
+									  "rep stosq\n\t"
+									   : "+c" (count), "+D" (first), "=m" (first)
+									   : "a" (value)
+									   : "cc" );
+			}
+
+
+			template <typename Value>
+			inline void fill(int64_t* first, int64_t* last, Value c)
+			{
+				uintptr_t count = (uintptr_t)(last - first);
+				int64_t   value = (int64_t)(c);
+
+				__asm__ __volatile__ ("cld\n\t"
+									  "rep stosq\n\t"
+									   : "+c" (count), "+D" (first), "=m" (first)
+									   : "a" (value)
+									   : "cc" );
+			}
+		#endif
+
+		template <typename Value>
+		inline void fill(uint32_t* first, uint32_t* last, Value c)
+		{
+			uintptr_t count = (uintptr_t)(last - first);
+			uint32_t  value = (uint32_t)(c);
+
+			__asm__ __volatile__ ("cld\n\t"
+								  "rep stosl\n\t"
+								   : "+c" (count), "+D" (first), "=m" (first)
+								   : "a" (value)
+								   : "cc" );
+		}
+
+
+		template <typename Value>
+		inline void fill(int32_t* first, int32_t* last, Value c)
+		{
+			uintptr_t count = (uintptr_t)(last - first);
+			int32_t   value = (int32_t)(c);
+
+			__asm__ __volatile__ ("cld\n\t"
+								  "rep stosl\n\t"
+								   : "+c" (count), "+D" (first), "=m" (first)
+								   : "a" (value)
+								   : "cc" );
+		}
+
+
+		template <typename Value>
+		inline void fill(uint16_t* first, uint16_t* last, Value c)
+		{
+			uintptr_t count = (uintptr_t)(last - first);
+			uint16_t  value = (uint16_t)(c);
+
+			__asm__ __volatile__ ("cld\n\t"
+								  "rep stosw\n\t"
+								   : "+c" (count), "+D" (first), "=m" (first)
+								   : "a" (value)
+								   : "cc" );
+		}
+
+
+		template <typename Value>
+		inline void fill(int16_t* first, int16_t* last, Value c)
+		{
+			uintptr_t count = (uintptr_t)(last - first);
+			int16_t   value = (int16_t)(c);
+
+			__asm__ __volatile__ ("cld\n\t"
+								  "rep stosw\n\t"
+								   : "+c" (count), "+D" (first), "=m" (first)
+								   : "a" (value)
+								   : "cc" );
+		}
+
+	#elif defined(EA_COMPILER_MICROSOFT) && (defined(EA_PROCESSOR_X86) || defined(EA_PROCESSOR_X86_64))
+		#if defined(EA_PROCESSOR_X86_64)
+			template <typename Value>
+			inline void fill(uint64_t* first, uint64_t* last, Value c)
+			{
+				__stosq(first, (uint64_t)c, (size_t)(last - first));
+			}
+
+			template <typename Value>
+			inline void fill(int64_t* first, int64_t* last, Value c)
+			{
+				__stosq((uint64_t*)first, (uint64_t)c, (size_t)(last - first));
+			}
+		#endif
+
+		template <typename Value>
+		inline void fill(uint32_t* first, uint32_t* last, Value c)
+		{
+			__stosd((unsigned long*)first, (unsigned long)c, (size_t)(last - first));
+		}
+
+		template <typename Value>
+		inline void fill(int32_t* first, int32_t* last, Value c)
+		{
+			__stosd((unsigned long*)first, (unsigned long)c, (size_t)(last - first));
+		}
+
+		template <typename Value>
+		inline void fill(uint16_t* first, uint16_t* last, Value c)
+		{
+			__stosw(first, (uint16_t)c, (size_t)(last - first));
+		}
+
+		template <typename Value>
+		inline void fill(int16_t* first, int16_t* last, Value c)
+		{
+			__stosw((uint16_t*)first, (uint16_t)c, (size_t)(last - first));
+		}
+	#endif
+
+
+	inline void fill(char* first, char* last, const char& c) // It's debateable whether we should use 'char& c' or 'char c' here.
+	{
+		memset(first, (unsigned char)c, (size_t)(last - first));
+	}
+
+	inline void fill(char* first, char* last, const int c) // This is used for cases like 'fill(first, last, 0)'.
+	{
+		memset(first, (unsigned char)c, (size_t)(last - first));
+	}
+
+	inline void fill(unsigned char* first, unsigned char* last, const unsigned char& c)
+	{
+		memset(first, (unsigned char)c, (size_t)(last - first));
+	}
+
+	inline void fill(unsigned char* first, unsigned char* last, const int c)
+	{
+		memset(first, (unsigned char)c, (size_t)(last - first));
+	}
+
+	inline void fill(signed char* first, signed char* last, const signed char& c)
+	{
+		memset(first, (unsigned char)c, (size_t)(last - first));
+	}
+
+	inline void fill(signed char* first, signed char* last, const int c)
+	{
+		memset(first, (unsigned char)c, (size_t)(last - first));
+	}
+
+	#if defined(_MSC_VER) || defined(__BORLANDC__) || defined(__ICL) // ICL = Intel compiler
+		inline void fill(bool* first, bool* last, const bool& b)
+		{
+			memset(first, (char)b, (size_t)(last - first));
+		}
+	#endif
+
+
+
+
+	// fill_n
+	//
+	// We implement some fill helper functions in order to allow us to optimize it
+	// where possible.
+	//
+	template <bool bIsScalar>
+	struct fill_n_imp
+	{
+		template <typename OutputIterator, typename Size, typename T>
+		static OutputIterator do_fill(OutputIterator first, Size n, const T& value)
+		{
+			for(; n-- > 0; ++first)
+				*first = value;
+			return first;
+		}
+	};
+
+	template <>
+	struct fill_n_imp<true>
+	{
+		template <typename OutputIterator, typename Size, typename T>
+		static OutputIterator do_fill(OutputIterator first, Size n, const T& value)
+		{
+			typedef typename eastl::iterator_traits<OutputIterator>::value_type value_type;
+
+			// We create a temp and fill from that because value might alias to 
+			// the destination range and so the compiler would be forced into 
+			// generating less efficient code.
+			for(const T temp = value; n-- > 0; ++first)
+				*first = static_cast<value_type>(temp);
+			return first;
+		}
+	};
+
+	/// fill_n
+	///
+	/// The fill_n function is very much like memset in that a copies a source value
+	/// n times into a destination range. The source value may come from within 
+	/// the destination range.
+	///
+	/// Effects: Assigns value through all the iterators in the range [first, first + n).
+	///
+	/// Complexity: Exactly n assignments.
+	///
+	template <typename OutputIterator, typename Size, typename T>
+	OutputIterator fill_n(OutputIterator first, Size n, const T& value)
+	{
+		return eastl::fill_n_imp<is_scalar<T>::value>::do_fill(first, n, value);
+	}
+
+	template <typename Size>
+	inline char* fill_n(char* first, Size n, const char& c)
+	{
+		return (char*)memset(first, (char)c, (size_t)n) + n;
+	}
+
+	template <typename Size>
+	inline unsigned char* fill_n(unsigned char* first, Size n, const unsigned char& c)
+	{
+		return (unsigned char*)memset(first, (unsigned char)c, (size_t)n) + n;
+	}
+
+	template <typename Size>
+	inline signed char* fill_n(signed char* first, Size n, const signed char& c)
+	{
+		return (signed char*)memset(first, (signed char)c, n) + (size_t)n;
+	}
+
+	#if defined(_MSC_VER) || defined(__BORLANDC__) || defined(__ICL) // ICL = Intel compiler
+		template <typename Size>
+		inline bool* fill_n(bool* first, Size n, const bool& b)
+		{
+			return (bool*)memset(first, (char)b, n) + (size_t)n;
+		}
+	#endif
+
+	#if(defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG)) && (defined(EA_PROCESSOR_X86) || defined(EA_PROCESSOR_X86_64))
+		#if defined(EA_PROCESSOR_X86_64)
+			template <typename Size, typename Value>
+			inline uint64_t* fill_n(uint64_t* first, Size n, Value c)
+			{
+				uintptr_t count = (uintptr_t)(n);
+				uint64_t  value = (uint64_t)(c);
+
+				__asm__ __volatile__ ("cld\n\t"
+									  "rep stosq\n\t"
+									   : "+c" (count), "+D" (first), "=m" (first)
+									   : "a" (value)
+									   : "cc" );
+				return first; // first is updated by the code above.
+			}
+
+
+			template <typename Size, typename Value>
+			inline int64_t* fill_n(int64_t* first, Size n, Value c)
+			{
+				uintptr_t count = (uintptr_t)(n);
+				int64_t   value = (int64_t)(c);
+
+				__asm__ __volatile__ ("cld\n\t"
+									  "rep stosq\n\t"
+									   : "+c" (count), "+D" (first), "=m" (first)
+									   : "a" (value)
+									   : "cc" );
+				return first; // first is updated by the code above.
+			}
+		#endif
+
+		template <typename Size, typename Value>
+		inline uint32_t* fill_n(uint32_t* first, Size n, Value c)
+		{
+			uintptr_t count = (uintptr_t)(n);
+			uint32_t  value = (uint32_t)(c);
+
+			__asm__ __volatile__ ("cld\n\t"
+								  "rep stosl\n\t"
+								   : "+c" (count), "+D" (first), "=m" (first)
+								   : "a" (value)
+								   : "cc" );
+			return first; // first is updated by the code above.
+		}
+
+
+		template <typename Size, typename Value>
+		inline int32_t* fill_n(int32_t* first, Size n, Value c)
+		{
+			uintptr_t count = (uintptr_t)(n);
+			int32_t   value = (int32_t)(c);
+
+			__asm__ __volatile__ ("cld\n\t"
+								  "rep stosl\n\t"
+								   : "+c" (count), "+D" (first), "=m" (first)
+								   : "a" (value)
+								   : "cc" );
+			return first; // first is updated by the code above.
+		}
+
+
+		template <typename Size, typename Value>
+		inline uint16_t* fill_n(uint16_t* first, Size n, Value c)
+		{
+			uintptr_t count = (uintptr_t)(n);
+			uint16_t  value = (uint16_t)(c);
+
+			__asm__ __volatile__ ("cld\n\t"
+								  "rep stosw\n\t"
+								   : "+c" (count), "+D" (first), "=m" (first)
+								   : "a" (value)
+								   : "cc" );
+			return first; // first is updated by the code above.
+		}
+
+
+		template <typename Size, typename Value>
+		inline int16_t* fill_n(int16_t* first, Size n, Value c)
+		{
+			uintptr_t count = (uintptr_t)(n);
+			int16_t   value = (int16_t)(c);
+
+			__asm__ __volatile__ ("cld\n\t"
+								  "rep stosw\n\t"
+								   : "+c" (count), "+D" (first), "=m" (first)
+								   : "a" (value)
+								   : "cc" );
+			return first; // first is updated by the code above.
+		}
+
+	#elif defined(EA_COMPILER_MICROSOFT) && (defined(EA_PROCESSOR_X86) || defined(EA_PROCESSOR_X86_64))
+		#if defined(EA_PROCESSOR_X86_64)
+			template <typename Size, typename Value>
+			inline uint64_t* fill_n(uint64_t* first, Size n, Value c)
+			{
+				__stosq(first, (uint64_t)c, (size_t)n);
+				return first + n;
+			}
+
+			template <typename Size, typename Value>
+			inline int64_t* fill_n(int64_t* first, Size n, Value c)
+			{
+				__stosq((uint64_t*)first, (uint64_t)c, (size_t)n);
+				return first + n;
+			}
+		#endif
+
+		template <typename Size, typename Value>
+		inline uint32_t* fill_n(uint32_t* first, Size n, Value c)
+		{
+			__stosd((unsigned long*)first, (unsigned long)c, (size_t)n);
+			return first + n;
+		}
+
+		template <typename Size, typename Value>
+		inline int32_t* fill_n(int32_t* first, Size n, Value c)
+		{
+			__stosd((unsigned long*)first, (unsigned long)c, (size_t)n);
+			return first + n;
+		}
+
+		template <typename Size, typename Value>
+		inline uint16_t* fill_n(uint16_t* first, Size n, Value c)
+		{
+			__stosw(first, (uint16_t)c, (size_t)n);
+			return first + n;
+		}
+
+		template <typename Size, typename Value>
+		inline int16_t* fill_n(int16_t* first, Size n, Value c)
+		{
+			__stosw((uint16_t*)first, (uint16_t)c, (size_t)n);
+			return first + n;
+		}
+	#endif
+
+} // namespace eastl
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/internal/fixed_pool.h b/libkram/eastl/include/EASTL/internal/fixed_pool.h
new file mode 100644
index 00000000..5a380046
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/fixed_pool.h
@@ -0,0 +1,1631 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// This file implements the following
+//     aligned_buffer
+//     fixed_pool_base
+//     fixed_pool
+//     fixed_pool_with_overflow
+//     fixed_hashtable_allocator
+//     fixed_vector_allocator
+//     fixed_swap
+//
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_INTERNAL_FIXED_POOL_H
+#define EASTL_INTERNAL_FIXED_POOL_H
+
+
+#include <EABase/eabase.h>
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+#include <EASTL/internal/config.h>
+#include <EASTL/functional.h>
+#include <EASTL/memory.h>
+#include <EASTL/allocator.h>
+#include <EASTL/type_traits.h>
+
+
+EA_DISABLE_ALL_VC_WARNINGS();
+#include <new>
+EA_RESTORE_ALL_VC_WARNINGS();
+
+// 4275 - non dll-interface class used as base for DLL-interface classkey 'identifier'
+EA_DISABLE_VC_WARNING(4275);
+
+
+namespace eastl
+{
+
+	/// EASTL_FIXED_POOL_DEFAULT_NAME
+	///
+	/// Defines a default allocator name in the absence of a user-provided name.
+	///
+	#ifndef EASTL_FIXED_POOL_DEFAULT_NAME
+		#define EASTL_FIXED_POOL_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " fixed_pool" // Unless the user overrides something, this is "EASTL fixed_pool".
+	#endif
+
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// aligned_buffer
+	///////////////////////////////////////////////////////////////////////////
+
+	/// aligned_buffer
+	///
+	/// This is useful for creating a buffer of the same size and alignment 
+	/// of a given struct or class. This is useful for creating memory pools
+	/// that support both size and alignment requirements of stored objects
+	/// but without wasting space in over-allocating. 
+	///
+	/// Note that we implement this via struct specializations, as some 
+	/// compilers such as VC++ do not support specification of alignments
+	/// in any way other than via an integral constant.
+	///
+	/// Example usage:
+	///    struct Widget{ }; // This class has a given size and alignment.
+	///
+	///    Declare a char buffer of equal size and alignment to Widget.
+	///    aligned_buffer<sizeof(Widget), EASTL_ALIGN_OF(Widget)> mWidgetBuffer; 
+	///
+	///    Declare an array this time.
+	///    aligned_buffer<sizeof(Widget), EASTL_ALIGN_OF(Widget)> mWidgetArray[15]; 
+	///
+	typedef char EASTL_MAY_ALIAS aligned_buffer_char; 
+
+	template <size_t size, size_t alignment>
+	struct aligned_buffer { aligned_buffer_char buffer[size]; };
+
+	template<size_t size>
+	struct aligned_buffer<size, 2>    { EA_PREFIX_ALIGN(2) aligned_buffer_char buffer[size] EA_POSTFIX_ALIGN(2); };
+
+	template<size_t size>
+	struct aligned_buffer<size, 4>    { EA_PREFIX_ALIGN(4) aligned_buffer_char buffer[size] EA_POSTFIX_ALIGN(4); };
+
+	template<size_t size>
+	struct aligned_buffer<size, 8>    { EA_PREFIX_ALIGN(8) aligned_buffer_char buffer[size] EA_POSTFIX_ALIGN(8); };
+
+	template<size_t size>
+	struct aligned_buffer<size, 16>   { EA_PREFIX_ALIGN(16) aligned_buffer_char buffer[size] EA_POSTFIX_ALIGN(16); };
+
+	template<size_t size>
+	struct aligned_buffer<size, 32>   { EA_PREFIX_ALIGN(32) aligned_buffer_char buffer[size] EA_POSTFIX_ALIGN(32); };
+
+	template<size_t size>
+	struct aligned_buffer<size, 64>   { EA_PREFIX_ALIGN(64) aligned_buffer_char buffer[size] EA_POSTFIX_ALIGN(64); };
+
+	template<size_t size>
+	struct aligned_buffer<size, 128>  { EA_PREFIX_ALIGN(128) aligned_buffer_char buffer[size] EA_POSTFIX_ALIGN(128); };
+
+	template<size_t size>
+	struct aligned_buffer<size, 256>  { EA_PREFIX_ALIGN(256) aligned_buffer_char buffer[size] EA_POSTFIX_ALIGN(256); };
+
+	template<size_t size>
+	struct aligned_buffer<size, 512>  { EA_PREFIX_ALIGN(512) aligned_buffer_char buffer[size] EA_POSTFIX_ALIGN(512); };
+
+	template<size_t size>
+	struct aligned_buffer<size, 1024> { EA_PREFIX_ALIGN(1024) aligned_buffer_char buffer[size] EA_POSTFIX_ALIGN(1024); };
+
+	template<size_t size>
+	struct aligned_buffer<size, 2048> { EA_PREFIX_ALIGN(2048) aligned_buffer_char buffer[size] EA_POSTFIX_ALIGN(2048); };
+
+	template<size_t size>
+	struct aligned_buffer<size, 4096> { EA_PREFIX_ALIGN(4096) aligned_buffer_char buffer[size] EA_POSTFIX_ALIGN(4096); };
+
+
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// fixed_pool_base
+	///////////////////////////////////////////////////////////////////////////
+
+	/// fixed_pool_base
+	///
+	/// This is a base class for the implementation of fixed-size pools.
+	/// In particular, the fixed_pool and fixed_pool_with_overflow classes
+	/// are based on fixed_pool_base.
+	///
+	struct fixed_pool_base
+	{
+	public:
+		/// fixed_pool_base
+		///
+		fixed_pool_base(void* pMemory = NULL)
+			: mpHead((Link*)pMemory)
+			, mpNext((Link*)pMemory)
+			, mpCapacity((Link*)pMemory)
+			, mnNodeSize(0) // This is normally set in the init function.
+		{
+			#if EASTL_FIXED_SIZE_TRACKING_ENABLED
+				mnCurrentSize = 0;
+				mnPeakSize    = 0;
+			#endif
+		}
+
+
+		/// fixed_pool_base
+		///
+		// Disabled because the default is sufficient. While it normally makes no sense to deep copy
+		// this data, our usage of this class is such that this is OK and wanted.
+		//
+		// fixed_pool_base(const fixed_pool_base& x)
+		// {
+		// }
+
+
+		/// operator=
+		///
+		fixed_pool_base& operator=(const fixed_pool_base&)
+		{
+			// By design we do nothing. We don't attempt to deep-copy member data. 
+			return *this;
+		}
+
+
+		/// init
+		///
+		/// Initializes a fixed_pool with a given set of parameters.
+		/// You cannot call this function twice else the resulting 
+		/// behaviour will be undefined. You can only call this function
+		/// after constructing the fixed_pool with the default constructor.
+		///
+		EASTL_API void init(void* pMemory, size_t memorySize, size_t nodeSize,
+							size_t alignment, size_t alignmentOffset = 0);
+
+
+		/// peak_size
+		///
+		/// Returns the maximum number of outstanding allocations there have been
+		/// at any one time. This represents a high water mark for the allocation count.
+		///
+		size_t peak_size() const
+		{
+			#if EASTL_FIXED_SIZE_TRACKING_ENABLED
+				return mnPeakSize;
+			#else
+				return 0;
+			#endif
+		}
+
+
+		/// can_allocate
+		///
+		/// Returns true if there are any free links.
+		///
+		bool can_allocate() const
+		{
+			return (mpHead != NULL) || (mpNext != mpCapacity);
+		}
+
+	public:
+		/// Link
+		/// Implements a singly-linked list.
+		struct Link
+		{
+			Link* mpNext;
+		};
+
+		Link*   mpHead;
+		Link*   mpNext;
+		Link*   mpCapacity;
+		size_t  mnNodeSize;
+
+		#if EASTL_FIXED_SIZE_TRACKING_ENABLED
+			uint32_t mnCurrentSize; /// Current number of allocated nodes.
+			uint32_t mnPeakSize;    /// Max number of allocated nodes at any one time.
+		#endif
+
+	}; // fixed_pool_base
+
+
+
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// fixed_pool
+	///////////////////////////////////////////////////////////////////////////
+
+	/// fixed_pool
+	///
+	/// Implements a simple fixed pool allocator for use by fixed-size containers. 
+	/// This is not a generic eastl allocator which can be plugged into an arbitrary
+	/// eastl container, as it simplifies some functions are arguments for the 
+	/// purpose of efficiency.
+	/// 
+	class EASTL_API fixed_pool : public fixed_pool_base
+	{
+	public:
+		/// fixed_pool
+		///
+		/// Default constructor. User usually will want to call init() after  
+		/// constructing via this constructor. The pMemory argument is for the 
+		/// purposes of temporarily storing a pointer to the buffer to be used.
+		/// Even though init may have a pMemory argument, this arg is useful 
+		/// for temporary storage, as per copy construction.
+		///
+		fixed_pool(void* pMemory = NULL)
+			: fixed_pool_base(pMemory)
+		{
+		}
+
+
+		/// fixed_pool
+		///
+		/// Constructs a fixed_pool with a given set of parameters.
+		///
+		fixed_pool(void* pMemory, size_t memorySize, size_t nodeSize, 
+					size_t alignment, size_t alignmentOffset = 0)
+		{
+			init(pMemory, memorySize, nodeSize, alignment, alignmentOffset);
+		}
+
+
+		/// fixed_pool
+		///
+		// Disabled because the default is sufficient. While it normally makes no sense to deep copy
+		// this data, our usage of this class is such that this is OK and wanted.
+		//
+		// fixed_pool(const fixed_pool& x)
+		// {
+		// }
+
+
+		/// operator=
+		///
+		fixed_pool& operator=(const fixed_pool&)
+		{
+			// By design we do nothing. We don't attempt to deep-copy member data. 
+			return *this;
+		}
+
+
+		/// allocate
+		///
+		/// Allocates a new object of the size specified upon class initialization.
+		/// Returns NULL if there is no more memory. 
+		///
+		void* allocate()
+		{
+			Link* pLink = mpHead;
+
+			if(pLink) // If we have space...
+			{
+				#if EASTL_FIXED_SIZE_TRACKING_ENABLED
+					if(++mnCurrentSize > mnPeakSize)
+						mnPeakSize = mnCurrentSize;
+				#endif
+
+				mpHead = pLink->mpNext;
+				return pLink;
+			}
+			else
+			{
+				// If there's no free node in the free list, just
+				// allocate another from the reserved memory area
+
+				if(mpNext != mpCapacity)
+				{
+					pLink = mpNext;
+					
+					mpNext = reinterpret_cast<Link*>(reinterpret_cast<char*>(mpNext) + mnNodeSize);
+
+					#if EASTL_FIXED_SIZE_TRACKING_ENABLED
+						if(++mnCurrentSize > mnPeakSize)
+							mnPeakSize = mnCurrentSize;
+					#endif
+
+					return pLink;
+				}
+
+				return NULL;
+			}
+		}
+
+		void* allocate(size_t /*alignment*/, size_t /*offset*/)
+		{
+			return allocate();
+		}
+		
+		/// deallocate
+		///
+		/// Frees the given object which was allocated by allocate(). 
+		/// If the given node was not allocated by allocate() then the behaviour 
+		/// is undefined.
+		///
+		void deallocate(void* p)
+		{
+			#if EASTL_FIXED_SIZE_TRACKING_ENABLED
+				--mnCurrentSize;
+			#endif
+
+			((Link*)p)->mpNext = mpHead;
+			mpHead = ((Link*)p);
+		}
+
+
+		using fixed_pool_base::can_allocate;
+
+
+		const char* get_name() const
+		{
+			return EASTL_FIXED_POOL_DEFAULT_NAME;
+		}
+
+
+		void set_name(const char*)
+		{
+			// Nothing to do. We don't allocate memory.
+		}
+
+	}; // fixed_pool
+
+
+
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// fixed_pool_with_overflow
+	///////////////////////////////////////////////////////////////////////////
+
+	/// fixed_pool_with_overflow
+	///
+	template <typename OverflowAllocator = EASTLAllocatorType>
+	class fixed_pool_with_overflow : public fixed_pool_base
+	{
+	public:
+		typedef OverflowAllocator overflow_allocator_type;
+
+
+		fixed_pool_with_overflow(void* pMemory = NULL)
+			: fixed_pool_base(pMemory),
+			  mOverflowAllocator(EASTL_FIXED_POOL_DEFAULT_NAME)
+		{
+			// Leave mpPoolBegin, mpPoolEnd uninitialized.
+		}
+
+
+		fixed_pool_with_overflow(void* pMemory, const overflow_allocator_type& allocator)
+			: fixed_pool_base(pMemory),
+			  mOverflowAllocator(allocator)
+		{
+			// Leave mpPoolBegin, mpPoolEnd uninitialized.
+		}
+
+
+		fixed_pool_with_overflow(void* pMemory, size_t memorySize, size_t nodeSize, 
+								 size_t alignment, size_t alignmentOffset = 0)
+			: mOverflowAllocator(EASTL_FIXED_POOL_DEFAULT_NAME)
+		{
+			fixed_pool_base::init(pMemory, memorySize, nodeSize, alignment, alignmentOffset);
+
+			mpPoolBegin = pMemory;
+		}
+
+
+		fixed_pool_with_overflow(void* pMemory, size_t memorySize, size_t nodeSize, 
+								 size_t alignment, size_t alignmentOffset,
+								 const overflow_allocator_type& allocator)
+			: mOverflowAllocator(allocator)
+		{
+			fixed_pool_base::init(pMemory, memorySize, nodeSize, alignment, alignmentOffset);
+
+			mpPoolBegin = pMemory;
+		}
+
+
+		// Disabled because the default is sufficient. While it normally makes no sense to deep copy
+		// this data, our usage of this class is such that this is OK and wanted.
+		//
+		//fixed_pool_with_overflow(const fixed_pool_with_overflow& x)
+		//{
+		//    ...
+		//}
+
+
+		fixed_pool_with_overflow& operator=(const fixed_pool_with_overflow& x)
+		{
+			#if EASTL_ALLOCATOR_COPY_ENABLED
+				mOverflowAllocator = x.mOverflowAllocator;
+			#else
+				(void)x;
+			#endif
+
+			return *this;
+		}
+
+
+		void init(void* pMemory, size_t memorySize, size_t nodeSize,
+					size_t alignment, size_t alignmentOffset = 0)
+		{
+			fixed_pool_base::init(pMemory, memorySize, nodeSize, alignment, alignmentOffset);
+
+			mpPoolBegin = pMemory;
+		}
+
+
+		void* allocate()
+		{
+			void* p     = NULL;
+			Link* pLink = mpHead;
+
+			if(pLink)
+			{
+				// Unlink from chain
+				p      = pLink;
+				mpHead = pLink->mpNext;
+			}
+			else
+			{
+				// If there's no free node in the free list, just
+				// allocate another from the reserved memory area
+
+				if(mpNext != mpCapacity)
+				{
+					p      = pLink = mpNext;
+					mpNext = reinterpret_cast<Link*>(reinterpret_cast<char*>(mpNext) + mnNodeSize);
+				}
+				else
+					p = mOverflowAllocator.allocate(mnNodeSize);
+			}
+
+			#if EASTL_FIXED_SIZE_TRACKING_ENABLED
+				if(p && (++mnCurrentSize > mnPeakSize))
+					mnPeakSize = mnCurrentSize;
+			#endif
+
+			return p;
+		}
+
+
+		void* allocate(size_t alignment, size_t alignmentOffset)
+		{
+			void* p = NULL;
+			Link* pLink = mpHead;
+
+			if (pLink)
+			{
+				// Unlink from chain
+				p = pLink;
+				mpHead = pLink->mpNext;
+			}
+			else
+			{
+				// If there's no free node in the free list, just
+				// allocate another from the reserved memory area
+
+				if (mpNext != mpCapacity)
+				{
+					p = pLink = mpNext;
+					mpNext = reinterpret_cast<Link*>(reinterpret_cast<char*>(mpNext)+mnNodeSize);
+				}
+				else
+				{
+					p = allocate_memory(mOverflowAllocator, mnNodeSize, alignment, alignmentOffset);
+					EASTL_ASSERT_MSG(p != nullptr, "the behaviour of eastl::allocators that return nullptr is not defined.");
+				}
+
+			}
+
+			#if EASTL_FIXED_SIZE_TRACKING_ENABLED
+				if (p && (++mnCurrentSize > mnPeakSize))
+					mnPeakSize = mnCurrentSize;
+			#endif
+
+			return p;
+		}
+
+		void deallocate(void* p)
+		{
+			#if EASTL_FIXED_SIZE_TRACKING_ENABLED
+				--mnCurrentSize;
+			#endif
+
+			if((p >= mpPoolBegin) && (p < mpCapacity))
+			{
+				((Link*)p)->mpNext = mpHead;
+				mpHead = ((Link*)p);
+			}
+			else
+				mOverflowAllocator.deallocate(p, (size_t)mnNodeSize);
+		}
+
+
+		using fixed_pool_base::can_allocate;
+
+
+		const char* get_name() const
+		{
+			return mOverflowAllocator.get_name();
+		}
+
+
+		void set_name(const char* pName)
+		{
+			mOverflowAllocator.set_name(pName);
+		}
+
+
+		const overflow_allocator_type& get_overflow_allocator() const
+		{
+			return mOverflowAllocator;
+		}
+
+
+		overflow_allocator_type& get_overflow_allocator()
+		{
+			return mOverflowAllocator;
+		}
+		
+
+		void set_overflow_allocator(const overflow_allocator_type& overflowAllocator)
+		{
+			mOverflowAllocator = overflowAllocator;
+		}
+	public:
+		OverflowAllocator mOverflowAllocator; 
+		void*             mpPoolBegin;         // Ideally we wouldn't need this member variable. he problem is that the information about the pool buffer and object size is stored in the owning container and we can't have access to it without increasing the amount of code we need and by templating more code. It may turn out that simply storing data here is smaller in the end.
+
+	}; // fixed_pool_with_overflow              
+
+
+
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// fixed_node_allocator
+	///////////////////////////////////////////////////////////////////////////
+
+	/// fixed_node_allocator
+	///
+	/// Note: This class was previously named fixed_node_pool, but was changed because this name
+	///       was inconsistent with the other allocators here which ended with _allocator.
+	///
+	/// Implements a fixed_pool with a given node count, alignment, and alignment offset.
+	/// fixed_node_allocator is like fixed_pool except it is templated on the node type instead
+	/// of being a generic allocator. All it does is pass allocations through to
+	/// the fixed_pool base. This functionality is separate from fixed_pool because there
+	/// are other uses for fixed_pool.
+	///
+	/// We template on kNodeSize instead of node_type because the former allows for the
+	/// two different node_types of the same size to use the same template implementation.
+	///
+	/// Template parameters:
+	///     nodeSize               The size of the object to allocate.
+	///     nodeCount              The number of objects the pool contains.
+	///     nodeAlignment          The alignment of the objects to allocate.
+	///     nodeAlignmentOffset    The alignment offset of the objects to allocate.
+	///     bEnableOverflow        Whether or not we should use the overflow heap if our object pool is exhausted.
+	///     OverflowAllocator      Overflow allocator, which is only used if bEnableOverflow == true. Defaults to the global heap.
+	///
+	template <size_t nodeSize, size_t nodeCount, size_t nodeAlignment, size_t nodeAlignmentOffset, bool bEnableOverflow, typename OverflowAllocator = EASTLAllocatorType>
+	class fixed_node_allocator
+	{
+	public:
+		typedef typename type_select<bEnableOverflow, fixed_pool_with_overflow<OverflowAllocator>, fixed_pool>::type  pool_type;
+		typedef fixed_node_allocator<nodeSize, nodeCount, nodeAlignment, nodeAlignmentOffset, bEnableOverflow, OverflowAllocator>   this_type;
+		typedef OverflowAllocator overflow_allocator_type;
+
+		enum
+		{
+			kNodeSize            = nodeSize,
+			kNodeCount           = nodeCount,
+			kNodesSize           = nodeCount * nodeSize, // Note that the kBufferSize calculation assumes that the compiler sets sizeof(T) to be a multiple alignof(T), and so sizeof(T) is always >= alignof(T).
+			kBufferSize          = kNodesSize + ((nodeAlignment > 1) ? nodeSize-1 : 0) + nodeAlignmentOffset,
+			kNodeAlignment       = nodeAlignment,
+			kNodeAlignmentOffset = nodeAlignmentOffset
+		};
+
+	public:
+		pool_type mPool;
+
+	public:
+		//fixed_node_allocator(const char* pName)
+		//{
+		//    mPool.set_name(pName);
+		//}
+
+
+		fixed_node_allocator(void* pNodeBuffer)
+			: mPool(pNodeBuffer, kNodesSize, kNodeSize, kNodeAlignment, kNodeAlignmentOffset)
+		{
+		}
+
+
+		fixed_node_allocator(void* pNodeBuffer, const overflow_allocator_type& allocator)
+			: mPool(pNodeBuffer, kNodesSize, kNodeSize, kNodeAlignment, kNodeAlignmentOffset, allocator)
+		{
+		}
+
+
+		/// fixed_node_allocator
+		///
+		/// Note that we are copying x.mpHead to our own fixed_pool. This at first may seem 
+		/// broken, as fixed pools cannot take over ownership of other fixed pools' memory.
+		/// However, we declare that this copy ctor can only ever be safely called when 
+		/// the user has intentionally pre-seeded the source with the destination pointer.
+		/// This is somewhat playing with fire, but it allows us to get around chicken-and-egg
+		/// problems with containers being their own allocators, without incurring any memory
+		/// costs or extra code costs. There's another reason for this: we very strongly want
+		/// to avoid full copying of instances of fixed_pool around, especially via the stack.
+		/// Larger pools won't even be able to fit on many machine's stacks. So this solution
+		/// is also a mechanism to prevent that situation from existing and being used. 
+		/// Perhaps some day we'll find a more elegant yet costless way around this. 
+		///
+		fixed_node_allocator(const this_type& x)
+			: mPool(x.mPool.mpNext, kNodesSize, kNodeSize, kNodeAlignment, kNodeAlignmentOffset, x.mPool.mOverflowAllocator)
+		{
+		}
+
+
+		this_type& operator=(const this_type& x)
+		{
+			mPool = x.mPool;
+			return *this;
+		}
+
+
+		void* allocate(size_t n, int /*flags*/ = 0)
+		{
+			(void)n;
+			EASTL_ASSERT(n == kNodeSize);
+			return mPool.allocate();
+		}
+
+
+		void* allocate(size_t n, size_t alignment, size_t offset, int /*flags*/ = 0)
+		{
+			(void)n;
+			EASTL_ASSERT(n == kNodeSize);
+			return mPool.allocate(alignment, offset);
+		}
+
+
+		void deallocate(void* p, size_t)
+		{
+			mPool.deallocate(p);
+		}
+
+
+		/// can_allocate
+		///
+		/// Returns true if there are any free links.
+		///
+		bool can_allocate() const
+		{
+			return mPool.can_allocate();
+		}
+
+
+		/// reset
+		///
+		/// This function unilaterally resets the fixed pool back to a newly initialized
+		/// state. This is useful for using in tandem with container reset functionality.
+		///
+		void reset(void* pNodeBuffer)
+		{
+			mPool.init(pNodeBuffer, kBufferSize, kNodeSize, kNodeAlignment, kNodeAlignmentOffset);
+		}
+
+
+		const char* get_name() const
+		{
+			return mPool.get_name();
+		}
+
+
+		void set_name(const char* pName)
+		{
+			mPool.set_name(pName);
+		}
+
+
+		const overflow_allocator_type& get_overflow_allocator() const EA_NOEXCEPT
+		{
+			return mPool.mOverflowAllocator;
+		}
+
+
+		overflow_allocator_type& get_overflow_allocator() EA_NOEXCEPT
+		{
+			return mPool.mOverflowAllocator;
+		}
+
+
+		void set_overflow_allocator(const overflow_allocator_type& allocator)
+		{
+			mPool.mOverflowAllocator = allocator;
+		}
+
+
+		void copy_overflow_allocator(const this_type& x)  // This function exists so we can write generic code that works for allocators that do and don't have overflow allocators.
+		{
+			mPool.mOverflowAllocator = x.mPool.mOverflowAllocator;
+		}
+
+	}; // fixed_node_allocator
+
+
+	// This is a near copy of the code above, with the only difference being 
+	// the 'false' bEnableOverflow template parameter, the pool_type and this_type typedefs, 
+	// and the get_overflow_allocator / set_overflow_allocator functions.
+	template <size_t nodeSize, size_t nodeCount, size_t nodeAlignment, size_t nodeAlignmentOffset, typename OverflowAllocator>
+	class fixed_node_allocator<nodeSize, nodeCount, nodeAlignment, nodeAlignmentOffset, false, OverflowAllocator>
+	{
+	public:
+		typedef fixed_pool pool_type;
+		typedef fixed_node_allocator<nodeSize, nodeCount, nodeAlignment, nodeAlignmentOffset, false, OverflowAllocator>   this_type;
+		typedef OverflowAllocator overflow_allocator_type;
+
+		enum
+		{
+			kNodeSize            = nodeSize,
+			kNodeCount           = nodeCount,
+			kNodesSize           = nodeCount * nodeSize, // Note that the kBufferSize calculation assumes that the compiler sets sizeof(T) to be a multiple alignof(T), and so sizeof(T) is always >= alignof(T).
+			kBufferSize          = kNodesSize + ((nodeAlignment > 1) ? nodeSize-1 : 0) + nodeAlignmentOffset,
+			kNodeAlignment       = nodeAlignment,
+			kNodeAlignmentOffset = nodeAlignmentOffset
+		};
+
+	public:
+		pool_type mPool;
+
+	public:
+		fixed_node_allocator(void* pNodeBuffer)
+			: mPool(pNodeBuffer, kNodesSize, kNodeSize, kNodeAlignment, kNodeAlignmentOffset)
+		{
+		}
+
+
+		fixed_node_allocator(void* pNodeBuffer, const overflow_allocator_type& /*allocator*/) // allocator is unused because bEnableOverflow is false in this specialization.
+			: mPool(pNodeBuffer, kNodesSize, kNodeSize, kNodeAlignment, kNodeAlignmentOffset)
+		{
+		}
+
+
+		/// fixed_node_allocator
+		///
+		/// Note that we are copying x.mpHead to our own fixed_pool. This at first may seem 
+		/// broken, as fixed pools cannot take over ownership of other fixed pools' memory.
+		/// However, we declare that this copy ctor can only ever be safely called when 
+		/// the user has intentionally pre-seeded the source with the destination pointer.
+		/// This is somewhat playing with fire, but it allows us to get around chicken-and-egg
+		/// problems with containers being their own allocators, without incurring any memory
+		/// costs or extra code costs. There's another reason for this: we very strongly want
+		/// to avoid full copying of instances of fixed_pool around, especially via the stack.
+		/// Larger pools won't even be able to fit on many machine's stacks. So this solution
+		/// is also a mechanism to prevent that situation from existing and being used. 
+		/// Perhaps some day we'll find a more elegant yet costless way around this. 
+		///
+		fixed_node_allocator(const this_type& x)            // No need to copy the overflow allocator, because bEnableOverflow is false in this specialization.
+			: mPool(x.mPool.mpNext, kNodesSize, kNodeSize, kNodeAlignment, kNodeAlignmentOffset)
+		{
+		}
+
+
+		this_type& operator=(const this_type& x)
+		{
+			mPool = x.mPool;
+			return *this;
+		}
+
+
+		void* allocate(size_t n, int /*flags*/ = 0)
+		{
+			(void)n;
+			EASTL_ASSERT(n == kNodeSize);
+			return mPool.allocate();
+		}
+
+
+		void* allocate(size_t n, size_t alignment, size_t offset, int /*flags*/ = 0)
+		{
+			(void)n;
+			EASTL_ASSERT(n == kNodeSize);
+			return mPool.allocate(alignment, offset);
+		}
+
+
+		void deallocate(void* p, size_t)
+		{
+			mPool.deallocate(p);
+		}
+
+
+		bool can_allocate() const
+		{
+			return mPool.can_allocate();
+		}
+
+
+		void reset(void* pNodeBuffer)
+		{
+			mPool.init(pNodeBuffer, kBufferSize, kNodeSize, kNodeAlignment, kNodeAlignmentOffset);
+		}
+
+
+		const char* get_name() const
+		{
+			return mPool.get_name();
+		}
+
+
+		void set_name(const char* pName)
+		{
+			mPool.set_name(pName);
+		}
+
+
+		const overflow_allocator_type& get_overflow_allocator() const EA_NOEXCEPT
+		{
+			EASTL_ASSERT(false);
+			overflow_allocator_type* pNULL = NULL;
+			return *pNULL; // This is not pretty, but it should never execute. This is here only to allow this to compile.
+		}
+
+
+		overflow_allocator_type& get_overflow_allocator() EA_NOEXCEPT
+		{
+			EASTL_ASSERT(false);
+			overflow_allocator_type* pNULL = NULL;
+			return *pNULL; // This is not pretty, but it should never execute. This is here only to allow this to compile.
+		}
+
+
+		void set_overflow_allocator(const overflow_allocator_type& /*allocator*/)
+		{
+			// We don't have an overflow allocator.
+			EASTL_ASSERT(false);
+		}
+
+
+		void copy_overflow_allocator(const this_type&)  // This function exists so we can write generic code that works for allocators that do and don't have overflow allocators.
+		{
+			// We don't have an overflow allocator.
+		}
+
+	}; // fixed_node_allocator
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// global operators
+	///////////////////////////////////////////////////////////////////////
+
+	template <size_t nodeSize, size_t nodeCount, size_t nodeAlignment, size_t nodeAlignmentOffset, bool bEnableOverflow, typename OverflowAllocator>
+	inline bool operator==(const fixed_node_allocator<nodeSize, nodeCount, nodeAlignment, nodeAlignmentOffset, bEnableOverflow, OverflowAllocator>& a, 
+						   const fixed_node_allocator<nodeSize, nodeCount, nodeAlignment, nodeAlignmentOffset, bEnableOverflow, OverflowAllocator>& b)
+	{
+		return (&a == &b); // They are only equal if they are the same object.
+	}
+
+
+	template <size_t nodeSize, size_t nodeCount, size_t nodeAlignment, size_t nodeAlignmentOffset, bool bEnableOverflow, typename OverflowAllocator>
+	inline bool operator!=(const fixed_node_allocator<nodeSize, nodeCount, nodeAlignment, nodeAlignmentOffset, bEnableOverflow, OverflowAllocator>& a, 
+						   const fixed_node_allocator<nodeSize, nodeCount, nodeAlignment, nodeAlignmentOffset, bEnableOverflow, OverflowAllocator>& b)
+	{
+		return (&a != &b); // They are only equal if they are the same object.
+	}
+
+
+
+
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// fixed_hashtable_allocator
+	///////////////////////////////////////////////////////////////////////////
+
+	/// fixed_hashtable_allocator
+	///
+	/// Provides a base class for fixed hashtable allocations.
+	/// To consider: Have this inherit from fixed_node_allocator.
+	///
+	/// Template parameters:
+	///     bucketCount            The fixed number of hashtable buckets to provide.
+	///     nodeCount              The number of objects the pool contains.
+	///     nodeAlignment          The alignment of the objects to allocate.
+	///     nodeAlignmentOffset    The alignment offset of the objects to allocate.
+	///     bEnableOverflow        Whether or not we should use the overflow heap if our object pool is exhausted.
+	///     OverflowAllocator      Overflow allocator, which is only used if bEnableOverflow == true. Defaults to the global heap.
+	///
+	template <size_t bucketCount, size_t nodeSize, size_t nodeCount, size_t nodeAlignment, size_t nodeAlignmentOffset, bool bEnableOverflow, typename OverflowAllocator = EASTLAllocatorType>
+	class fixed_hashtable_allocator
+	{
+	public:
+		typedef typename type_select<bEnableOverflow, fixed_pool_with_overflow<OverflowAllocator>, fixed_pool>::type                                 pool_type;
+		typedef fixed_hashtable_allocator<bucketCount, nodeSize, nodeCount, nodeAlignment, nodeAlignmentOffset, bEnableOverflow, OverflowAllocator>  this_type;
+		typedef OverflowAllocator overflow_allocator_type;
+
+		enum
+		{
+			kBucketCount         = bucketCount + 1, // '+1' because the hash table needs a null terminating bucket.
+			kBucketsSize         = bucketCount * sizeof(void*),
+			kNodeSize            = nodeSize,
+			kNodeCount           = nodeCount,
+			kNodesSize           = nodeCount * nodeSize, // Note that the kBufferSize calculation assumes that the compiler sets sizeof(T) to be a multiple alignof(T), and so sizeof(T) is always >= alignof(T).
+			kBufferSize          = kNodesSize + ((nodeAlignment > 1) ? nodeSize-1 : 0) + nodeAlignmentOffset, // Don't need to include kBucketsSize in this calculation, as fixed_hash_xxx containers have a separate buffer for buckets.
+			kNodeAlignment       = nodeAlignment,
+			kNodeAlignmentOffset = nodeAlignmentOffset,
+			kAllocFlagBuckets    = 0x00400000               // Flag to allocator which indicates that we are allocating buckets and not nodes.
+		};
+
+	protected:
+		pool_type mPool;
+		void*     mpBucketBuffer;
+
+	public:
+		// Disabled because it causes compile conflicts.
+		//fixed_hashtable_allocator(const char* pName)
+		//{
+		//    mPool.set_name(pName);
+		//}
+
+		fixed_hashtable_allocator(void* pNodeBuffer)
+			: mPool(pNodeBuffer, kBufferSize, kNodeSize, kNodeAlignment, kNodeAlignmentOffset),
+			  mpBucketBuffer(NULL)
+		{
+			// EASTL_ASSERT(false); // As it stands now, this is not supposed to be called.
+		}
+
+
+		fixed_hashtable_allocator(void* pNodeBuffer, const overflow_allocator_type& allocator)
+			: mPool(pNodeBuffer, kBufferSize, kNodeSize, kNodeAlignment, kNodeAlignmentOffset, allocator),
+			  mpBucketBuffer(NULL)
+		{
+			// EASTL_ASSERT(false); // As it stands now, this is not supposed to be called.
+		}
+
+
+		fixed_hashtable_allocator(void* pNodeBuffer, void* pBucketBuffer)
+			: mPool(pNodeBuffer, kBufferSize, kNodeSize, kNodeAlignment, kNodeAlignmentOffset),
+			  mpBucketBuffer(pBucketBuffer)
+		{
+		}
+
+
+		fixed_hashtable_allocator(void* pNodeBuffer, void* pBucketBuffer, const overflow_allocator_type& allocator)
+			: mPool(pNodeBuffer, kBufferSize, kNodeSize, kNodeAlignment, kNodeAlignmentOffset, allocator),
+			  mpBucketBuffer(pBucketBuffer)
+		{
+		}
+
+
+		/// fixed_hashtable_allocator
+		///
+		/// Note that we are copying x.mpHead and mpBucketBuffer to our own fixed_pool. 
+		/// See the discussion above in fixed_node_allocator for important information about this.
+		///
+		fixed_hashtable_allocator(const this_type& x)
+			: mPool(x.mPool.mpHead, kBufferSize, kNodeSize, kNodeAlignment, kNodeAlignmentOffset, x.mPool.mOverflowAllocator),
+			  mpBucketBuffer(x.mpBucketBuffer)
+		{
+		}
+
+
+		fixed_hashtable_allocator& operator=(const fixed_hashtable_allocator& x)
+		{
+			mPool = x.mPool;
+			return *this;
+		}
+
+
+		void* allocate(size_t n, int flags = 0)
+		{
+			// We expect that the caller uses kAllocFlagBuckets when it wants us to allocate buckets instead of nodes.
+			EASTL_CT_ASSERT(kAllocFlagBuckets == 0x00400000); // Currently we expect this to be so, because the hashtable has a copy of this enum.
+
+			if((flags & kAllocFlagBuckets) == 0) // If we are allocating nodes and (probably) not buckets...
+			{
+				EASTL_ASSERT(n == kNodeSize); EA_UNUSED(n); 
+				return mPool.allocate();
+			}
+
+			// If bucket size no longer fits within local buffer...
+			if ((flags & kAllocFlagBuckets) == kAllocFlagBuckets && (n > kBucketsSize))
+				return get_overflow_allocator().allocate(n);
+
+			EASTL_ASSERT(n <= kBucketsSize);
+			return mpBucketBuffer;
+		}
+
+
+		void* allocate(size_t n, size_t alignment, size_t offset, int flags = 0)
+		{
+			// We expect that the caller uses kAllocFlagBuckets when it wants us to allocate buckets instead of nodes.
+			if ((flags & kAllocFlagBuckets) == 0) // If we are allocating nodes and (probably) not buckets...
+			{
+				EASTL_ASSERT(n == kNodeSize); EA_UNUSED(n);
+				return mPool.allocate(alignment, offset);
+			}
+
+			// If bucket size no longer fits within local buffer...
+			if ((flags & kAllocFlagBuckets) == kAllocFlagBuckets && (n > kBucketsSize))
+				return get_overflow_allocator().allocate(n, alignment, offset);
+
+			EASTL_ASSERT(n <= kBucketsSize);
+			return mpBucketBuffer;
+		}
+
+
+		void deallocate(void* p, size_t)
+		{
+			if(p != mpBucketBuffer) // If we are freeing a node and not buckets...
+				mPool.deallocate(p);
+		}
+
+
+		bool can_allocate() const
+		{
+			return mPool.can_allocate();
+		}
+
+
+		void reset(void* pNodeBuffer)
+		{
+			// No need to modify mpBucketBuffer, as that is constant.
+			mPool.init(pNodeBuffer, kBufferSize, kNodeSize, kNodeAlignment, kNodeAlignmentOffset);
+		}
+
+
+		const char* get_name() const
+		{
+			return mPool.get_name();
+		}
+
+
+		void set_name(const char* pName)
+		{
+			mPool.set_name(pName);
+		}
+
+
+		const overflow_allocator_type& get_overflow_allocator() const
+		{
+			return mPool.mOverflowAllocator;
+		}
+
+
+		overflow_allocator_type& get_overflow_allocator()
+		{
+			return mPool.mOverflowAllocator;
+		}
+
+
+		void set_overflow_allocator(const overflow_allocator_type& allocator)
+		{
+			mPool.mOverflowAllocator = allocator;
+		}
+
+
+		void copy_overflow_allocator(const this_type& x)  // This function exists so we can write generic code that works for allocators that do and don't have overflow allocators.
+		{
+			mPool.mOverflowAllocator = x.mPool.mOverflowAllocator;
+		}
+
+	}; // fixed_hashtable_allocator
+
+
+	// This is a near copy of the code above, with the only difference being 
+	// the 'false' bEnableOverflow template parameter, the pool_type and this_type typedefs, 
+	// and the get_overflow_allocator / set_overflow_allocator functions.
+	template <size_t bucketCount, size_t nodeSize, size_t nodeCount, size_t nodeAlignment, size_t nodeAlignmentOffset, typename OverflowAllocator>
+	class fixed_hashtable_allocator<bucketCount, nodeSize, nodeCount, nodeAlignment, nodeAlignmentOffset, false, OverflowAllocator>
+	{
+	public:
+		typedef fixed_pool pool_type;
+		typedef fixed_hashtable_allocator<bucketCount, nodeSize, nodeCount, nodeAlignment, nodeAlignmentOffset, false, OverflowAllocator>  this_type;
+		typedef OverflowAllocator overflow_allocator_type;
+
+		enum
+		{
+			kBucketCount         = bucketCount + 1, // '+1' because the hash table needs a null terminating bucket.
+			kBucketsSize         = bucketCount * sizeof(void*),
+			kNodeSize            = nodeSize,
+			kNodeCount           = nodeCount,
+			kNodesSize           = nodeCount * nodeSize, // Note that the kBufferSize calculation assumes that the compiler sets sizeof(T) to be a multiple alignof(T), and so sizeof(T) is always >= alignof(T).
+			kBufferSize          = kNodesSize + ((nodeAlignment > 1) ? nodeSize-1 : 0) + nodeAlignmentOffset, // Don't need to include kBucketsSize in this calculation, as fixed_hash_xxx containers have a separate buffer for buckets.
+			kNodeAlignment       = nodeAlignment,
+			kNodeAlignmentOffset = nodeAlignmentOffset,
+			kAllocFlagBuckets    = 0x00400000               // Flag to allocator which indicates that we are allocating buckets and not nodes.
+		};
+
+	protected:
+		pool_type mPool;
+		void*     mpBucketBuffer;
+
+	public:
+		// Disabled because it causes compile conflicts.
+		//fixed_hashtable_allocator(const char* pName)
+		//{
+		//    mPool.set_name(pName);
+		//}
+
+		fixed_hashtable_allocator(void* pNodeBuffer)
+			: mPool(pNodeBuffer, kBufferSize, kNodeSize, kNodeAlignment, kNodeAlignmentOffset),
+			  mpBucketBuffer(NULL)
+		{
+			// EASTL_ASSERT(false); // As it stands now, this is not supposed to be called.
+		}
+
+		fixed_hashtable_allocator(void* pNodeBuffer, const overflow_allocator_type& /*allocator*/) // allocator is unused because bEnableOverflow is false in this specialization.
+			: mPool(pNodeBuffer, kBufferSize, kNodeSize, kNodeAlignment, kNodeAlignmentOffset),
+			  mpBucketBuffer(NULL)
+		{
+			// EASTL_ASSERT(false); // As it stands now, this is not supposed to be called.
+		}
+
+
+		fixed_hashtable_allocator(void* pNodeBuffer, void* pBucketBuffer)
+			: mPool(pNodeBuffer, kBufferSize, kNodeSize, kNodeAlignment, kNodeAlignmentOffset),
+			  mpBucketBuffer(pBucketBuffer)
+		{
+		}
+
+
+		fixed_hashtable_allocator(void* pNodeBuffer, void* pBucketBuffer, const overflow_allocator_type& /*allocator*/) // allocator is unused because bEnableOverflow is false in this specialization.
+			: mPool(pNodeBuffer, kBufferSize, kNodeSize, kNodeAlignment, kNodeAlignmentOffset),
+			  mpBucketBuffer(pBucketBuffer)
+		{
+		}
+
+
+		/// fixed_hashtable_allocator
+		///
+		/// Note that we are copying x.mpHead and mpBucketBuffer to our own fixed_pool. 
+		/// See the discussion above in fixed_node_allocator for important information about this.
+		///
+		fixed_hashtable_allocator(const this_type& x)   // No need to copy the overflow allocator, because bEnableOverflow is false in this specialization.
+			: mPool(x.mPool.mpHead, kBufferSize, kNodeSize, kNodeAlignment, kNodeAlignmentOffset),
+			  mpBucketBuffer(x.mpBucketBuffer)
+		{
+		}
+
+
+		fixed_hashtable_allocator& operator=(const fixed_hashtable_allocator& x)
+		{
+			mPool = x.mPool;
+			return *this;
+		}
+
+
+		void* allocate(size_t n, int flags = 0)
+		{
+			// We expect that the caller uses kAllocFlagBuckets when it wants us to allocate buckets instead of nodes.
+			EASTL_CT_ASSERT(kAllocFlagBuckets == 0x00400000); // Currently we expect this to be so, because the hashtable has a copy of this enum.
+			if((flags & kAllocFlagBuckets) == 0) // If we are allocating nodes and (probably) not buckets...
+			{
+				EASTL_ASSERT(n == kNodeSize);  (void)n; // Make unused var warning go away.
+				return mPool.allocate();
+			}
+
+			// Don't allow hashtable buckets to overflow in this case.
+			EASTL_ASSERT(n <= kBucketsSize);
+			return mpBucketBuffer;
+		}
+
+
+		void* allocate(size_t n, size_t alignment, size_t offset, int flags = 0)
+		{
+			// We expect that the caller uses kAllocFlagBuckets when it wants us to allocate buckets instead of nodes.
+			if((flags & kAllocFlagBuckets) == 0) // If we are allocating nodes and (probably) not buckets...
+			{
+				EASTL_ASSERT(n == kNodeSize); (void)n; // Make unused var warning go away.
+				return mPool.allocate(alignment, offset);
+			}
+
+			// Don't allow hashtable buckets to overflow in this case.
+			EASTL_ASSERT(n <= kBucketsSize);
+			return mpBucketBuffer;
+		}
+
+
+		void deallocate(void* p, size_t)
+		{
+			if(p != mpBucketBuffer) // If we are freeing a node and not buckets...
+				mPool.deallocate(p);
+		}
+
+
+		bool can_allocate() const
+		{
+			return mPool.can_allocate();
+		}
+
+
+		void reset(void* pNodeBuffer)
+		{
+			// No need to modify mpBucketBuffer, as that is constant.
+			mPool.init(pNodeBuffer, kBufferSize, kNodeSize, kNodeAlignment, kNodeAlignmentOffset);
+		}
+
+
+		const char* get_name() const
+		{
+			return mPool.get_name();
+		}
+
+
+		void set_name(const char* pName)
+		{
+			mPool.set_name(pName);
+		}
+
+
+		const overflow_allocator_type& get_overflow_allocator() const EA_NOEXCEPT
+		{
+			EASTL_ASSERT(false);
+			overflow_allocator_type* pNULL = NULL;
+			return *pNULL; // This is not pretty, but it should never execute. This is here only to allow this to compile.
+		}
+
+
+		overflow_allocator_type& get_overflow_allocator() EA_NOEXCEPT
+		{
+			EASTL_ASSERT(false);
+			overflow_allocator_type* pNULL = NULL;
+			return *pNULL; // This is not pretty, but it should never execute. This is here only to allow this to compile.
+		}
+
+		void set_overflow_allocator(const overflow_allocator_type& /*allocator*/)
+		{
+			// We don't have an overflow allocator.
+			EASTL_ASSERT(false);
+		}
+
+		void copy_overflow_allocator(const this_type&)  // This function exists so we can write generic code that works for allocators that do and don't have overflow allocators.
+		{
+			// We don't have an overflow allocator.
+		}
+
+	}; // fixed_hashtable_allocator
+
+
+	///////////////////////////////////////////////////////////////////////
+	// global operators
+	///////////////////////////////////////////////////////////////////////
+
+	template <size_t bucketCount, size_t nodeSize, size_t nodeCount, size_t nodeAlignment, size_t nodeAlignmentOffset, bool bEnableOverflow, typename OverflowAllocator>
+	inline bool operator==(const fixed_hashtable_allocator<bucketCount, nodeSize, nodeCount, nodeAlignment, nodeAlignmentOffset, bEnableOverflow, OverflowAllocator>& a, 
+						   const fixed_hashtable_allocator<bucketCount, nodeSize, nodeCount, nodeAlignment, nodeAlignmentOffset, bEnableOverflow, OverflowAllocator>& b)
+	{
+		return (&a == &b); // They are only equal if they are the same object.
+	}
+
+
+	template <size_t bucketCount, size_t nodeSize, size_t nodeCount, size_t nodeAlignment, size_t nodeAlignmentOffset, bool bEnableOverflow, typename OverflowAllocator>
+	inline bool operator!=(const fixed_hashtable_allocator<bucketCount, nodeSize, nodeCount, nodeAlignment, nodeAlignmentOffset, bEnableOverflow, OverflowAllocator>& a, 
+						   const fixed_hashtable_allocator<bucketCount, nodeSize, nodeCount, nodeAlignment, nodeAlignmentOffset, bEnableOverflow, OverflowAllocator>& b)
+	{
+		return (&a != &b); // They are only equal if they are the same object.
+	}
+
+
+
+
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// fixed_vector_allocator
+	///////////////////////////////////////////////////////////////////////////
+
+	/// fixed_vector_allocator
+	///
+	/// Template parameters:
+	///     nodeSize               The size of individual objects.
+	///     nodeCount              The number of objects the pool contains.
+	///     nodeAlignment          The alignment of the objects to allocate.
+	///     nodeAlignmentOffset    The alignment offset of the objects to allocate.
+	///     bEnableOverflow        Whether or not we should use the overflow heap if our object pool is exhausted.
+	///     OverflowAllocator      Overflow allocator, which is only used if bEnableOverflow == true. Defaults to the global heap.
+	///
+	template <size_t nodeSize, size_t nodeCount, size_t nodeAlignment, size_t nodeAlignmentOffset, bool bEnableOverflow, typename OverflowAllocator = EASTLAllocatorType>
+	class fixed_vector_allocator
+	{
+	public:
+		typedef fixed_vector_allocator<nodeSize, nodeCount, nodeAlignment, nodeAlignmentOffset, bEnableOverflow, OverflowAllocator>  this_type;
+		typedef OverflowAllocator overflow_allocator_type;
+
+		enum
+		{
+			kNodeSize            = nodeSize,
+			kNodeCount           = nodeCount,
+			kNodesSize           = nodeCount * nodeSize, // Note that the kBufferSize calculation assumes that the compiler sets sizeof(T) to be a multiple alignof(T), and so sizeof(T) is always >= alignof(T).
+			kBufferSize          = kNodesSize + ((nodeAlignment > 1) ? nodeSize-1 : 0) + nodeAlignmentOffset,
+			kNodeAlignment       = nodeAlignment,
+			kNodeAlignmentOffset = nodeAlignmentOffset
+		};
+
+	public:
+		overflow_allocator_type mOverflowAllocator;
+		void*                   mpPoolBegin;         // To consider: Find some way to make this data unnecessary, without increasing template proliferation.
+
+	public:
+		// Disabled because it causes compile conflicts.
+		//fixed_vector_allocator(const char* pName = NULL)
+		//{
+		//    mOverflowAllocator.set_name(pName);
+		//}
+
+		fixed_vector_allocator(void* pNodeBuffer = nullptr)
+			: mpPoolBegin(pNodeBuffer)
+		{
+		}
+
+		fixed_vector_allocator(void* pNodeBuffer, const overflow_allocator_type& allocator)
+			: mOverflowAllocator(allocator), mpPoolBegin(pNodeBuffer)
+		{
+		}
+
+		// Disabled because the default is sufficient.
+		//fixed_vector_allocator(const fixed_vector_allocator& x)
+		//{
+		//    mpPoolBegin        = x.mpPoolBegin;
+		//    mOverflowAllocator = x.mOverflowAllocator;
+		//}
+
+		fixed_vector_allocator& operator=(const fixed_vector_allocator& x)
+		{
+			// We leave our mpPoolBegin variable alone.
+
+			#if EASTL_ALLOCATOR_COPY_ENABLED
+				mOverflowAllocator = x.mOverflowAllocator;
+			#else
+				(void)x;
+			#endif
+
+			return *this;
+		}
+
+		void* allocate(size_t n, int flags = 0)
+		{
+			return mOverflowAllocator.allocate(n, flags);
+		}
+
+		void* allocate(size_t n, size_t alignment, size_t offset, int flags = 0)
+		{
+			return mOverflowAllocator.allocate(n, alignment, offset, flags);
+		}
+
+		void deallocate(void* p, size_t n)
+		{
+			if(p != mpPoolBegin)
+				mOverflowAllocator.deallocate(p, n); // Can't do this to our own allocation.
+		}
+
+		const char* get_name() const
+		{
+			return mOverflowAllocator.get_name();
+		}
+
+		void set_name(const char* pName)
+		{
+			mOverflowAllocator.set_name(pName);
+		}
+
+		const overflow_allocator_type& get_overflow_allocator() const EA_NOEXCEPT
+		{
+			return mOverflowAllocator;
+		}
+
+		overflow_allocator_type& get_overflow_allocator() EA_NOEXCEPT
+		{
+			return mOverflowAllocator;
+		}
+
+		void set_overflow_allocator(const overflow_allocator_type& allocator)
+		{
+			mOverflowAllocator = allocator;
+		}
+
+		void copy_overflow_allocator(const this_type& x)  // This function exists so we can write generic code that works for allocators that do and don't have overflow allocators.
+		{
+			mOverflowAllocator = x.mOverflowAllocator;
+		}
+
+	}; // fixed_vector_allocator
+
+
+	template <size_t nodeSize, size_t nodeCount, size_t nodeAlignment, size_t nodeAlignmentOffset, typename OverflowAllocator>
+	class fixed_vector_allocator<nodeSize, nodeCount, nodeAlignment, nodeAlignmentOffset, false, OverflowAllocator>
+	{
+	public:
+		typedef fixed_vector_allocator<nodeSize, nodeCount, nodeAlignment, nodeAlignmentOffset, false, OverflowAllocator>  this_type;
+		typedef OverflowAllocator overflow_allocator_type;
+
+		enum
+		{
+			kNodeSize            = nodeSize,
+			kNodeCount           = nodeCount,
+			kNodesSize           = nodeCount * nodeSize, // Note that the kBufferSize calculation assumes that the compiler sets sizeof(T) to be a multiple alignof(T), and so sizeof(T) is always >= alignof(T).
+			kBufferSize          = kNodesSize + ((nodeAlignment > 1) ? nodeSize-1 : 0) + nodeAlignmentOffset,
+			kNodeAlignment       = nodeAlignment,
+			kNodeAlignmentOffset = nodeAlignmentOffset
+		};
+
+		// Disabled because it causes compile conflicts.
+		//fixed_vector_allocator(const char* = NULL) // This char* parameter is present so that this class can be like the other version.
+		//{
+		//}
+
+		fixed_vector_allocator()
+		{
+		}
+
+		fixed_vector_allocator(void* /*pNodeBuffer*/)
+		{
+		}
+
+		fixed_vector_allocator(void* /*pNodeBuffer*/, const overflow_allocator_type& /*allocator*/)  // allocator is unused because bEnableOverflow is false in this specialization.
+		{
+		}
+
+		/// fixed_vector_allocator
+		///
+		// Disabled because there is nothing to do. No member data. And the default for this is sufficient.
+		// fixed_vector_allocator(const fixed_vector_allocator&)
+		// {
+		// }
+
+		// Disabled because there is nothing to do. No member data.
+		//fixed_vector_allocator& operator=(const fixed_vector_allocator& x)
+		//{
+		//    return *this;
+		//}
+
+		void* allocate(size_t /*n*/, int /*flags*/ = 0)
+		{
+			EASTL_ASSERT(false); // A fixed_vector should not reallocate, else the user has exhausted its space.
+			return NULL;
+		}
+
+		void* allocate(size_t /*n*/, size_t /*alignment*/, size_t /*offset*/, int /*flags*/ = 0)
+		{
+			EASTL_ASSERT(false);
+			return NULL;
+		}
+
+		void deallocate(void* /*p*/, size_t /*n*/)
+		{
+		}
+
+		const char* get_name() const
+		{
+			return EASTL_FIXED_POOL_DEFAULT_NAME;
+		}
+
+		void set_name(const char* /*pName*/)
+		{
+		}
+
+		const overflow_allocator_type& get_overflow_allocator() const EA_NOEXCEPT
+		{
+			EASTL_ASSERT(false);
+			overflow_allocator_type* pNULL = NULL;
+			return *pNULL; // This is not pretty, but it should never execute. This is here only to allow this to compile.
+		}
+
+		overflow_allocator_type& get_overflow_allocator() EA_NOEXCEPT
+		{
+			EASTL_ASSERT(false);
+			overflow_allocator_type* pNULL = NULL;
+			return *pNULL; // This is not pretty, but it should never execute. This is here only to allow this to compile.
+		}
+
+		void set_overflow_allocator(const overflow_allocator_type& /*allocator*/)
+		{
+			// We don't have an overflow allocator.
+			EASTL_ASSERT(false);
+		}
+
+		void copy_overflow_allocator(const this_type&)  // This function exists so we can write generic code that works for allocators that do and don't have overflow allocators.
+		{
+			// We don't have an overflow allocator.
+		}
+
+	}; // fixed_vector_allocator
+
+
+	///////////////////////////////////////////////////////////////////////
+	// global operators
+	///////////////////////////////////////////////////////////////////////
+
+	template <size_t nodeSize, size_t nodeCount, size_t nodeAlignment, size_t nodeAlignmentOffset, bool bEnableOverflow, typename OverflowAllocator>
+	inline bool operator==(const fixed_vector_allocator<nodeSize, nodeCount, nodeAlignment, nodeAlignmentOffset, bEnableOverflow, OverflowAllocator>& a, 
+						   const fixed_vector_allocator<nodeSize, nodeCount, nodeAlignment, nodeAlignmentOffset, bEnableOverflow, OverflowAllocator>& b)
+	{
+		return (&a == &b); // They are only equal if they are the same object.
+	}
+
+
+	template <size_t nodeSize, size_t nodeCount, size_t nodeAlignment, size_t nodeAlignmentOffset, bool bEnableOverflow, typename OverflowAllocator>
+	inline bool operator!=(const fixed_vector_allocator<nodeSize, nodeCount, nodeAlignment, nodeAlignmentOffset, bEnableOverflow, OverflowAllocator>& a, 
+						   const fixed_vector_allocator<nodeSize, nodeCount, nodeAlignment, nodeAlignmentOffset, bEnableOverflow, OverflowAllocator>& b)
+	{
+		return (&a != &b); // They are only equal if they are the same object.
+	}
+
+
+
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// fixed_swap
+	///////////////////////////////////////////////////////////////////////////
+
+	/// fixed_swap
+	///
+	/// This function implements a swap suitable for fixed containers.
+	/// This is an issue because the size of fixed containers can be very 
+	/// large, due to their having the container buffer within themselves.
+	/// Note that we are referring to sizeof(container) and not the total
+	/// sum of memory allocated by the container from the heap. 
+	///
+	///
+	/// This implementation switches at compile time whether or not the 
+	/// temporary is allocated on the stack or the heap as some compilers
+	/// will allocate the (large) stack frame regardless of which code
+	/// path is picked.
+	template <typename Container, bool UseHeapTemporary>
+	class fixed_swap_impl
+	{
+	public: 
+		static void swap(Container& a, Container& b);
+	};
+
+
+	template <typename Container>
+	class fixed_swap_impl<Container, false>
+	{
+	public:
+		static void swap(Container& a, Container& b)
+		{
+			Container temp(EASTL_MOVE(a)); // Can't use global swap because that could
+			a = EASTL_MOVE(b);             // itself call this swap function in return.
+			b = EASTL_MOVE(temp);
+		}
+	};
+
+
+	template <typename Container>
+	class fixed_swap_impl<Container, true>
+	{
+	public:
+		static void swap(Container& a, Container& b)
+		{
+			EASTLAllocatorType allocator(*EASTLAllocatorDefault(), EASTL_TEMP_DEFAULT_NAME);
+			void* const pMemory = allocator.allocate(sizeof(a));
+
+			if(pMemory)
+			{
+				Container* pTemp = ::new(pMemory) Container(EASTL_MOVE(a));
+				a = EASTL_MOVE(b);
+				b = EASTL_MOVE(*pTemp);
+
+				pTemp->~Container();
+				allocator.deallocate(pMemory, sizeof(a));
+			}
+		}
+	};
+
+
+	template<typename Container>
+	void fixed_swap(Container& a, Container& b)
+	{
+		return fixed_swap_impl<Container, sizeof(Container) >= EASTL_MAX_STACK_USAGE>::swap(a, b);
+	}
+
+
+
+} // namespace eastl
+
+
+EA_RESTORE_VC_WARNING();
+
+
+#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/internal/function.h b/libkram/eastl/include/EASTL/internal/function.h
new file mode 100644
index 00000000..6e857f0b
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/function.h
@@ -0,0 +1,161 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_FUNCTION_H
+#define EASTL_FUNCTION_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+#include <EASTL/internal/function_detail.h>
+
+namespace eastl
+{
+
+	/// EASTL_FUNCTION_DEFAULT_CAPTURE_SSO_SIZE
+	///
+	/// Defines the size of the SSO buffer which is used to hold the specified capture state of the callable.
+	///
+	#ifndef EASTL_FUNCTION_DEFAULT_CAPTURE_SSO_SIZE
+		#define EASTL_FUNCTION_DEFAULT_CAPTURE_SSO_SIZE (2 * sizeof(void*))
+	#endif
+
+	static_assert(EASTL_FUNCTION_DEFAULT_CAPTURE_SSO_SIZE >= sizeof(void*), "functor storage must be able to hold at least a pointer!");
+
+	template <typename>
+	class function;
+
+	template <typename R, typename... Args>
+	class function<R(Args...)> : public internal::function_detail<EASTL_FUNCTION_DEFAULT_CAPTURE_SSO_SIZE, R(Args...)>
+	{
+	private:
+		using Base = internal::function_detail<EASTL_FUNCTION_DEFAULT_CAPTURE_SSO_SIZE, R(Args...)>;
+	public:
+		using typename Base::result_type;
+
+		function() EA_NOEXCEPT = default;
+		function(std::nullptr_t p) EA_NOEXCEPT
+			: Base(p)
+		{
+		}
+
+		function(const function& other)
+			: Base(other)
+		{
+		}
+
+		function(function&& other)
+			: Base(eastl::move(other))
+		{
+		}
+
+		template <typename Functor, typename = EASTL_INTERNAL_FUNCTION_VALID_FUNCTION_ARGS(Functor, R, Args..., Base, function)>
+		function(Functor functor)
+			: Base(eastl::move(functor))
+		{
+		}
+
+		~function() EA_NOEXCEPT = default;
+
+		function& operator=(const function& other)
+		{
+			Base::operator=(other);
+			return *this;
+		}
+
+		function& operator=(function&& other)
+		{
+			Base::operator=(eastl::move(other));
+			return *this;
+		}
+
+		function& operator=(std::nullptr_t p) EA_NOEXCEPT
+		{
+			Base::operator=(p);
+			return *this;
+		}
+
+		template <typename Functor, typename = EASTL_INTERNAL_FUNCTION_VALID_FUNCTION_ARGS(Functor, R, Args..., Base, function)>
+		function& operator=(Functor&& functor)
+		{
+			Base::operator=(eastl::forward<Functor>(functor));
+			return *this;
+		}
+
+		template <typename Functor>
+		function& operator=(eastl::reference_wrapper<Functor> f) EA_NOEXCEPT
+		{
+			Base::operator=(f);
+			return *this;
+		}
+
+		void swap(function& other) EA_NOEXCEPT
+		{
+			Base::swap(other);
+		}
+
+		explicit operator bool() const EA_NOEXCEPT
+		{
+			return Base::operator bool();
+		}
+
+		R operator ()(Args... args) const
+		{
+			return Base::operator ()(eastl::forward<Args>(args)...);
+		}
+
+	#if EASTL_RTTI_ENABLED
+		const std::type_info& target_type() const EA_NOEXCEPT
+		{
+			return Base::target_type();
+		}
+
+		template <typename Functor>
+		Functor* target() EA_NOEXCEPT
+		{
+			return Base::target();
+		}
+
+		template <typename Functor>
+		const Functor* target() const EA_NOEXCEPT
+		{
+			return Base::target();
+		}
+	#endif // EASTL_RTTI_ENABLED
+	};
+
+	template <typename R, typename... Args>
+	bool operator==(const function<R(Args...)>& f, std::nullptr_t) EA_NOEXCEPT
+	{
+		return !f;
+	}
+
+	template <typename R, typename... Args>
+	bool operator==(std::nullptr_t, const function<R(Args...)>& f) EA_NOEXCEPT
+	{
+		return !f;
+	}
+
+	template <typename R, typename... Args>
+	bool operator!=(const function<R(Args...)>& f, std::nullptr_t) EA_NOEXCEPT
+	{
+		return !!f;
+	}
+
+	template <typename R, typename... Args>
+	bool operator!=(std::nullptr_t, const function<R(Args...)>& f) EA_NOEXCEPT
+	{
+		return !!f;
+	}
+
+	template <typename R, typename... Args>
+	void swap(function<R(Args...)>& lhs, function<R(Args...)>& rhs)
+	{
+		lhs.swap(rhs);
+	}
+
+} // namespace eastl
+
+#endif // EASTL_FUNCTION_H
diff --git a/libkram/eastl/include/EASTL/internal/function_detail.h b/libkram/eastl/include/EASTL/internal/function_detail.h
new file mode 100644
index 00000000..dc18b631
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/function_detail.h
@@ -0,0 +1,673 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_FUNCTION_DETAIL_H
+#define EASTL_FUNCTION_DETAIL_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+#include <EABase/eabase.h>
+#include <EABase/nullptr.h>
+#include <EABase/config/eacompilertraits.h>
+
+#include <EASTL/internal/config.h>
+#include <EASTL/internal/functional_base.h>
+#include <EASTL/internal/move_help.h>
+#include <EASTL/internal/function_help.h>
+
+#include <EASTL/type_traits.h>
+#include <EASTL/utility.h>
+#include <EASTL/allocator.h>
+
+#if EASTL_RTTI_ENABLED
+	#include <typeinfo>
+#endif
+
+#if EASTL_EXCEPTIONS_ENABLED
+	EA_DISABLE_ALL_VC_WARNINGS()
+	#include <new>
+	#include <exception>
+	EA_RESTORE_ALL_VC_WARNINGS()
+#endif
+
+namespace eastl
+{
+	#if EASTL_EXCEPTIONS_ENABLED
+		class bad_function_call : public std::exception
+		{
+		public:
+			bad_function_call() EA_NOEXCEPT = default;
+
+			const char* what() const EA_NOEXCEPT EA_OVERRIDE
+			{
+				return "bad function_detail call";
+			}
+		};
+	#endif
+
+	namespace internal
+	{
+		class unused_class {};
+
+		union functor_storage_alignment
+		{
+			void (*unused_func_ptr)(void);
+			void (unused_class::*unused_func_mem_ptr)(void);
+			void* unused_ptr;
+		};
+
+		template <int SIZE_IN_BYTES>
+		struct functor_storage
+		{
+			static_assert(SIZE_IN_BYTES >= 0, "local buffer storage cannot have a negative size!");
+			template <typename Ret>
+			Ret& GetStorageTypeRef() const
+			{
+				return *reinterpret_cast<Ret*>(const_cast<char*>(&storage[0]));
+			}
+
+			union
+			{
+				functor_storage_alignment align;
+				char storage[SIZE_IN_BYTES];
+			};
+		};
+
+		template <>
+		struct functor_storage<0>
+		{
+			template <typename Ret>
+			Ret& GetStorageTypeRef() const
+			{
+				return *reinterpret_cast<Ret*>(const_cast<char*>(&storage[0]));
+			}
+
+			union
+			{
+				functor_storage_alignment align;
+				char storage[sizeof(functor_storage_alignment)];
+			};
+		};
+
+		template <typename Functor, int SIZE_IN_BYTES>
+		struct is_functor_inplace_allocatable
+		{
+			static constexpr bool value =
+			    sizeof(Functor) <= sizeof(functor_storage<SIZE_IN_BYTES>) &&
+			    (eastl::alignment_of_v<functor_storage<SIZE_IN_BYTES>> % eastl::alignment_of_v<Functor>) == 0;
+		};
+
+
+		/// function_base_detail
+		///
+		template <int SIZE_IN_BYTES>
+		class function_base_detail
+		{
+		public:
+			using FunctorStorageType = functor_storage<SIZE_IN_BYTES>;
+			FunctorStorageType mStorage;
+
+			enum ManagerOperations : int
+			{
+				MGROPS_DESTRUCT_FUNCTOR = 0,
+				MGROPS_COPY_FUNCTOR = 1,
+				MGROPS_MOVE_FUNCTOR = 2,
+			#if EASTL_RTTI_ENABLED
+				MGROPS_GET_TYPE_INFO = 3,
+				MGROPS_GET_FUNC_PTR = 4,
+			#endif
+			};
+
+			// Functor can be allocated inplace
+			template <typename Functor, typename = void>
+			class function_manager_base
+			{
+			public:
+
+				static Functor* GetFunctorPtr(const FunctorStorageType& storage) EA_NOEXCEPT
+				{
+					return &(storage.template GetStorageTypeRef<Functor>());
+				}
+
+				template <typename T>
+				static void CreateFunctor(FunctorStorageType& storage, T&& functor)
+				{
+					::new (GetFunctorPtr(storage)) Functor(eastl::forward<T>(functor));
+				}
+
+				static void DestructFunctor(FunctorStorageType& storage)
+				{
+					GetFunctorPtr(storage)->~Functor();
+				}
+
+				static void CopyFunctor(FunctorStorageType& to, const FunctorStorageType& from)
+				{
+					::new (GetFunctorPtr(to)) Functor(*GetFunctorPtr(from));
+				}
+
+				static void MoveFunctor(FunctorStorageType& to, FunctorStorageType& from) EA_NOEXCEPT
+				{
+					::new (GetFunctorPtr(to)) Functor(eastl::move(*GetFunctorPtr(from)));
+				}
+
+				static void* Manager(void* to, void* from, typename function_base_detail::ManagerOperations ops) EA_NOEXCEPT
+				{
+					switch (ops)
+					{
+						case MGROPS_DESTRUCT_FUNCTOR:
+						{
+							DestructFunctor(*static_cast<FunctorStorageType*>(to));
+						}
+						break;
+						case MGROPS_COPY_FUNCTOR:
+						{
+							CopyFunctor(*static_cast<FunctorStorageType*>(to),
+							            *static_cast<const FunctorStorageType*>(from));
+						}
+						break;
+						case MGROPS_MOVE_FUNCTOR:
+						{
+							MoveFunctor(*static_cast<FunctorStorageType*>(to), *static_cast<FunctorStorageType*>(from));
+							DestructFunctor(*static_cast<FunctorStorageType*>(from));
+						}
+						break;
+						default:
+							break;
+					}
+					return nullptr;
+				}
+			};
+
+			// Functor is allocated on the heap
+			template <typename Functor>
+			class function_manager_base<Functor, typename eastl::enable_if<!is_functor_inplace_allocatable<Functor, SIZE_IN_BYTES>::value>::type>
+			{
+			public:
+				static Functor* GetFunctorPtr(const FunctorStorageType& storage) EA_NOEXCEPT
+				{
+					return storage.template GetStorageTypeRef<Functor*>();
+				}
+
+				static Functor*& GetFunctorPtrRef(const FunctorStorageType& storage) EA_NOEXCEPT
+				{
+					return storage.template GetStorageTypeRef<Functor*>();
+				}
+
+				template <typename T>
+				static void CreateFunctor(FunctorStorageType& storage, T&& functor)
+				{
+					auto& allocator = *EASTLAllocatorDefault();
+					Functor* func = static_cast<Functor*>(allocator.allocate(sizeof(Functor), alignof(Functor), 0));
+
+				#if EASTL_EXCEPTIONS_ENABLED
+					if (!func)
+					{
+						throw std::bad_alloc();
+					}
+				#else
+					EASTL_ASSERT_MSG(func != nullptr, "Allocation failed!");
+				#endif
+
+					::new (static_cast<void*>(func)) Functor(eastl::forward<T>(functor));
+					GetFunctorPtrRef(storage) = func;
+				}
+
+				static void DestructFunctor(FunctorStorageType& storage)
+				{
+					Functor* func = GetFunctorPtr(storage);
+					if (func)
+					{
+						auto& allocator = *EASTLAllocatorDefault();
+						func->~Functor();
+						allocator.deallocate(static_cast<void*>(func), sizeof(Functor));
+					}
+				}
+
+				static void CopyFunctor(FunctorStorageType& to, const FunctorStorageType& from)
+				{
+					auto& allocator = *EASTLAllocatorDefault();
+					Functor* func = static_cast<Functor*>(allocator.allocate(sizeof(Functor), alignof(Functor), 0));
+				#if EASTL_EXCEPTIONS_ENABLED
+					if (!func)
+					{
+						throw std::bad_alloc();
+					}
+				#else
+					EASTL_ASSERT_MSG(func != nullptr, "Allocation failed!");
+				#endif
+					::new (static_cast<void*>(func)) Functor(*GetFunctorPtr(from));
+					GetFunctorPtrRef(to) = func;
+				}
+
+				static void MoveFunctor(FunctorStorageType& to, FunctorStorageType& from) EA_NOEXCEPT
+				{
+					Functor* func = GetFunctorPtr(from);
+					GetFunctorPtrRef(to) = func;
+					GetFunctorPtrRef(from) = nullptr;
+				}
+
+				static void* Manager(void* to, void* from, typename function_base_detail::ManagerOperations ops) EA_NOEXCEPT
+				{
+					switch (ops)
+					{
+						case MGROPS_DESTRUCT_FUNCTOR:
+						{
+							DestructFunctor(*static_cast<FunctorStorageType*>(to));
+						}
+						break;
+						case MGROPS_COPY_FUNCTOR:
+						{
+							CopyFunctor(*static_cast<FunctorStorageType*>(to),
+							            *static_cast<const FunctorStorageType*>(from));
+						}
+						break;
+						case MGROPS_MOVE_FUNCTOR:
+						{
+							MoveFunctor(*static_cast<FunctorStorageType*>(to), *static_cast<FunctorStorageType*>(from));
+							// Moved ptr, no need to destruct ourselves
+						}
+						break;
+						default:
+							break;
+					}
+					return nullptr;
+				}
+			};
+
+			template <typename Functor, typename R, typename... Args>
+			class function_manager final : public function_manager_base<Functor>
+			{
+			public:
+				using Base = function_manager_base<Functor>;
+
+			#if EASTL_RTTI_ENABLED
+				static void* GetTypeInfo() EA_NOEXCEPT
+				{
+					return reinterpret_cast<void*>(const_cast<std::type_info*>(&typeid(Functor)));
+				}
+
+				static void* Manager(void* to, void* from, typename function_base_detail::ManagerOperations ops) EA_NOEXCEPT
+				{
+					switch (ops)
+					{
+						case MGROPS_GET_TYPE_INFO:
+						{
+							return GetTypeInfo();
+						}
+						break;
+						case MGROPS_GET_FUNC_PTR:
+						{
+							return static_cast<void*>(Base::GetFunctorPtr(*static_cast<FunctorStorageType*>(to)));
+						}
+						break;
+						default:
+						{
+							return Base::Manager(to, from, ops);
+						}
+						break;
+					}
+				}
+			#endif // EASTL_RTTI_ENABLED
+
+				/**
+				 * NOTE:
+				 *
+				 * The order of arguments here is vital to the call optimization. Let's dig into why and look at some asm.
+				 * We have two invoker signatures to consider:
+				 *   R Invoker(const FunctorStorageType& functor, Args... args)
+				 *   R Invoker(Args... args, const FunctorStorageType& functor)
+				 *
+				 * Assume we are using the Windows x64 Calling Convention where the first 4 arguments are passed into
+				 * RCX, RDX, R8, R9. This optimization works for any Calling Convention, we are just using Windows x64 for
+				 * this example.
+				 *
+				 * Given the following member function: void TestMemberFunc(int a, int b)
+				 *  RCX == this
+				 *  RDX == a
+				 *  R8  == b
+				 *
+				 * All three arguments to the function including the hidden this pointer, which in C++ is always the first argument
+				 * are passed into the first three registers.
+				 * The function call chain for eastl::function<>() is as follows:
+				 *  operator ()(this, Args... args) -> Invoker(Args... args, this->mStorage) -> StoredFunction(Args... arg)
+				 *
+				 * Let's look at what is happening at the asm level with the different Invoker function signatures and why.
+				 *
+				 * You will notice that operator ()() and Invoker() have the arguments reversed. operator ()() just directly calls
+				 * to Invoker(), it is a tail call, so we force inline the call operator to ensure we directly call to the Invoker().
+				 * Most compilers always inline it anyways by default; have been instances where it doesn't even though the asm ends
+				 * up being cheaper.
+				 * call -> call -> call versus call -> call
+				 *
+				 * eastl::function<int(int, int)> = FunctionPointer
+				 *
+				 * Assume we have the above eastl::function object that holds a pointer to a function as the internal callable.
+				 *
+				 * Invoker(this->mStorage, Args... args) is called with the follow arguments in registers:
+				 *  RCX = this  |  RDX = a  |  R8 = b
+				 *
+				 * Inside Invoker() we use RCX to deference into the eastl::function object and get the function pointer to call.
+				 * This function to call has signature Func(int, int) and thus requires its arguments in registers RCX and RDX.
+				 * The compiler must shift all the arguments towards the left. The full asm looks something as follows.
+				 *
+				 * Calling Invoker:                       Inside Invoker:
+				 *
+				 * mov rcx, this                          mov rax, [rcx]
+				 * mov rdx, a                             mov rcx, rdx
+				 * mov r8, b                              mov rdx, r8
+				 * call [rcx + offset to Invoker]         jmp [rax]
+				 *
+				 * Notice how the compiler shifts all the arguments before calling the callable and also we only use the this pointer
+				 * to access the internal storage inside the eastl::function object.
+				 *
+				 * Invoker(Args... args, this->mStorage) is called with the following arguments in registers:
+				 *  RCX = a  |  RDX = b  |  R8 = this
+				 *
+				 * You can see we no longer have to shift the arguments down when going to call the internal stored callable.
+				 *
+				 * Calling Invoker:                      Inside Invoker:
+				 *
+				 * mov rcx, a                            mov rax, [r8]
+				 * mov rdx, b                            jmp [rax]
+				 * mov r8, this
+				 * call [r8 + offset to Invoker]
+				 *
+				 * The generated asm does a straight tail jmp to the loaded function pointer. The arguments are already in the correct
+				 * registers.
+				 *
+				 * For Functors or Lambdas with no captures, this gives us another free register to use to pass arguments since the this
+				 * is at the end, it can be passed onto the stack if we run out of registers. Since the callable has no captures; inside
+				 * the Invoker(), we won't ever need to touch this thus we can just call the operator ()() or let the compiler inline it.
+				 *
+				 * For a callable with captures there is no perf hit since the callable in the common case is inlined and the pointer to the callable
+				 * buffer is passed in a register which the compiler can use to access the captures.
+				 *
+				 * For eastl::function<void(const T&, int, int)> that a holds a pointer to member function. The this pointers is implicitly
+				 * the first argument in the argument list, const T&, and the member function pointer will be called on that object.
+				 * This prevents any argument shifting since the this for the member function pointer is already in RCX.
+				 *
+				 * This is why having this at the end of the argument list is important for generating efficient Invoker() thunks.
+				 */
+				static R Invoker(Args... args, const FunctorStorageType& functor)
+				{
+					return eastl::invoke(*Base::GetFunctorPtr(functor), eastl::forward<Args>(args)...);
+				}
+			};
+
+			function_base_detail() EA_NOEXCEPT = default;
+			~function_base_detail() EA_NOEXCEPT = default;
+		};
+
+		#define EASTL_INTERNAL_FUNCTION_VALID_FUNCTION_ARGS(FUNCTOR, RET, ARGS, BASE, MYSELF)  \
+			typename eastl::enable_if_t<eastl::is_invocable_r_v<RET, FUNCTOR, ARGS> &&         \
+										!eastl::is_base_of_v<BASE, eastl::decay_t<FUNCTOR>> && \
+										!eastl::is_same_v<eastl::decay_t<FUNCTOR>, MYSELF>>
+
+		#define EASTL_INTERNAL_FUNCTION_DETAIL_VALID_FUNCTION_ARGS(FUNCTOR, RET, ARGS, MYSELF) \
+			EASTL_INTERNAL_FUNCTION_VALID_FUNCTION_ARGS(FUNCTOR, RET, ARGS, MYSELF, MYSELF)
+
+
+		/// function_detail
+		///
+		template <int, typename>
+		class function_detail;
+
+		template <int SIZE_IN_BYTES, typename R, typename... Args>
+		class function_detail<SIZE_IN_BYTES, R(Args...)> : public function_base_detail<SIZE_IN_BYTES>
+		{
+		public:
+			using result_type = R;
+
+		protected:
+			using Base = function_base_detail<SIZE_IN_BYTES>;
+			using FunctorStorageType = typename function_base_detail<SIZE_IN_BYTES>::FunctorStorageType;
+			using Base::mStorage;
+
+		public:
+			function_detail() EA_NOEXCEPT = default;
+			function_detail(std::nullptr_t) EA_NOEXCEPT {}
+
+			function_detail(const function_detail& other)
+			{
+				if (this != &other)
+				{
+					Copy(other);
+				}
+			}
+
+			function_detail(function_detail&& other)
+			{
+				if (this != &other)
+				{
+					Move(eastl::move(other));
+				}
+			}
+
+			template <typename Functor, typename = EASTL_INTERNAL_FUNCTION_DETAIL_VALID_FUNCTION_ARGS(Functor, R, Args..., function_detail)>
+			function_detail(Functor functor)
+			{
+				CreateForwardFunctor(eastl::move(functor));
+			}
+
+			~function_detail() EA_NOEXCEPT
+			{
+				Destroy();
+			}
+
+			function_detail& operator=(const function_detail& other)
+			{
+				if (this != &other)
+				{
+					Destroy();
+					Copy(other);
+				}
+
+				return *this;
+			}
+
+			function_detail& operator=(function_detail&& other)
+			{
+				if(this != &other)
+				{
+					Destroy();
+					Move(eastl::move(other));
+				}
+
+				return *this;
+			}
+
+			function_detail& operator=(std::nullptr_t) EA_NOEXCEPT
+			{
+				Destroy();
+				mMgrFuncPtr = nullptr;
+				mInvokeFuncPtr = &DefaultInvoker;
+
+				return *this;
+			}
+
+			template <typename Functor, typename = EASTL_INTERNAL_FUNCTION_DETAIL_VALID_FUNCTION_ARGS(Functor, R, Args..., function_detail)>
+			function_detail& operator=(Functor&& functor)
+			{
+				Destroy();
+				CreateForwardFunctor(eastl::forward<Functor>(functor));
+				return *this;
+			}
+
+			template <typename Functor>
+			function_detail& operator=(eastl::reference_wrapper<Functor> f) EA_NOEXCEPT
+			{
+				Destroy();
+				CreateForwardFunctor(f);
+				return *this;
+			}
+
+			void swap(function_detail& other) EA_NOEXCEPT
+			{
+				if(this == &other)
+					return;
+
+				FunctorStorageType tempStorage;
+				if (other.HaveManager())
+				{
+					(void)(*other.mMgrFuncPtr)(static_cast<void*>(&tempStorage), static_cast<void*>(&other.mStorage),
+											   Base::ManagerOperations::MGROPS_MOVE_FUNCTOR);
+				}
+
+				if (HaveManager())
+				{
+					(void)(*mMgrFuncPtr)(static_cast<void*>(&other.mStorage), static_cast<void*>(&mStorage),
+										 Base::ManagerOperations::MGROPS_MOVE_FUNCTOR);
+				}
+
+				if (other.HaveManager())
+				{
+					(void)(*other.mMgrFuncPtr)(static_cast<void*>(&mStorage), static_cast<void*>(&tempStorage),
+											   Base::ManagerOperations::MGROPS_MOVE_FUNCTOR);
+				}
+
+				eastl::swap(mMgrFuncPtr, other.mMgrFuncPtr);
+				eastl::swap(mInvokeFuncPtr, other.mInvokeFuncPtr);
+			}
+
+			explicit operator bool() const EA_NOEXCEPT
+			{
+				return HaveManager();
+			}
+
+			EASTL_FORCE_INLINE R operator ()(Args... args) const
+			{
+				return (*mInvokeFuncPtr)(eastl::forward<Args>(args)..., this->mStorage);
+			}
+
+			#if EASTL_RTTI_ENABLED
+				const std::type_info& target_type() const EA_NOEXCEPT
+				{
+					if (HaveManager())
+					{
+						void* ret = (*mMgrFuncPtr)(nullptr, nullptr, Base::ManagerOperations::MGROPS_GET_TYPE_INFO);
+						return *(static_cast<const std::type_info*>(ret));
+					}
+					return typeid(void);
+				}
+
+				template <typename Functor>
+				Functor* target() EA_NOEXCEPT
+				{
+					if (HaveManager() && target_type() == typeid(Functor))
+					{
+						void* ret = (*mMgrFuncPtr)(static_cast<void*>(&mStorage), nullptr,
+												   Base::ManagerOperations::MGROPS_GET_FUNC_PTR);
+						return ret ? static_cast<Functor*>(ret) : nullptr;
+					}
+					return nullptr;
+				}
+
+				template <typename Functor>
+				const Functor* target() const EA_NOEXCEPT
+				{
+					if (HaveManager() && target_type() == typeid(Functor))
+					{
+						void* ret = (*mMgrFuncPtr)(static_cast<void*>(&mStorage), nullptr,
+												   Base::ManagerOperations::MGROPS_GET_FUNC_PTR);
+						return ret ? static_cast<const Functor*>(ret) : nullptr;
+					}
+					return nullptr;
+				}
+			#endif // EASTL_RTTI_ENABLED
+
+		private:
+			bool HaveManager() const EA_NOEXCEPT
+			{
+				return (mMgrFuncPtr != nullptr);
+			}
+
+			void Destroy() EA_NOEXCEPT
+			{
+				if (HaveManager())
+				{
+					(void)(*mMgrFuncPtr)(static_cast<void*>(&mStorage), nullptr,
+					                     Base::ManagerOperations::MGROPS_DESTRUCT_FUNCTOR);
+				}
+			}
+
+			void Copy(const function_detail& other)
+			{
+				if (other.HaveManager())
+				{
+					(void)(*other.mMgrFuncPtr)(static_cast<void*>(&mStorage),
+					                           const_cast<void*>(static_cast<const void*>(&other.mStorage)),
+					                           Base::ManagerOperations::MGROPS_COPY_FUNCTOR);
+				}
+
+				mMgrFuncPtr = other.mMgrFuncPtr;
+				mInvokeFuncPtr = other.mInvokeFuncPtr;
+			}
+
+			void Move(function_detail&& other)
+			{
+				if (other.HaveManager())
+				{
+					(void)(*other.mMgrFuncPtr)(static_cast<void*>(&mStorage), static_cast<void*>(&other.mStorage),
+					                           Base::ManagerOperations::MGROPS_MOVE_FUNCTOR);
+				}
+
+				mMgrFuncPtr = other.mMgrFuncPtr;
+				mInvokeFuncPtr = other.mInvokeFuncPtr;
+				other.mMgrFuncPtr = nullptr;
+				other.mInvokeFuncPtr = &DefaultInvoker;
+			}
+
+			template <typename Functor>
+			void CreateForwardFunctor(Functor&& functor)
+			{
+				using DecayedFunctorType = typename eastl::decay<Functor>::type;
+				using FunctionManagerType = typename Base::template function_manager<DecayedFunctorType, R, Args...>;
+
+				if (internal::is_null(functor))
+				{
+					mMgrFuncPtr = nullptr;
+					mInvokeFuncPtr = &DefaultInvoker;
+				}
+				else
+				{
+					mMgrFuncPtr = &FunctionManagerType::Manager;
+					mInvokeFuncPtr = &FunctionManagerType::Invoker;
+					FunctionManagerType::CreateFunctor(mStorage, eastl::forward<Functor>(functor));
+				}
+			}
+
+		private:
+			typedef void* (*ManagerFuncPtr)(void*, void*, typename Base::ManagerOperations);
+			typedef R (*InvokeFuncPtr)(Args..., const FunctorStorageType&);
+
+			EA_DISABLE_GCC_WARNING(-Wreturn-type);
+			EA_DISABLE_CLANG_WARNING(-Wreturn-type);
+			EA_DISABLE_VC_WARNING(4716); // 'function' must return a value
+			// We cannot assume that R is default constructible.
+			// This function is called only when the function object CANNOT be called because it is empty,
+			// it will always throw or assert so we never use the return value anyways and neither should the caller.
+			static R DefaultInvoker(Args... /*args*/, const FunctorStorageType& /*functor*/)
+			{
+				#if EASTL_EXCEPTIONS_ENABLED
+					throw eastl::bad_function_call();
+				#else
+					EASTL_ASSERT_MSG(false, "function_detail call on an empty function_detail<R(Args..)>");
+				#endif
+			};
+			EA_RESTORE_VC_WARNING();
+			EA_RESTORE_CLANG_WARNING();
+			EA_RESTORE_GCC_WARNING();
+
+
+			ManagerFuncPtr mMgrFuncPtr = nullptr;
+			InvokeFuncPtr mInvokeFuncPtr = &DefaultInvoker;
+		};
+
+	} // namespace internal
+
+} // namespace eastl
+
+#endif // EASTL_FUNCTION_DETAIL_H
diff --git a/libkram/eastl/include/EASTL/internal/function_help.h b/libkram/eastl/include/EASTL/internal/function_help.h
new file mode 100644
index 00000000..04481d37
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/function_help.h
@@ -0,0 +1,51 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_INTERNAL_FUNCTION_HELP_H
+#define EASTL_INTERNAL_FUNCTION_HELP_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+#include <EASTL/internal/config.h>
+#include <EASTL/type_traits.h>
+
+namespace eastl
+{
+	namespace internal
+	{
+
+		//////////////////////////////////////////////////////////////////////
+		// is_null
+		//
+		template <typename T>
+		bool is_null(const T&)
+		{
+			return false;
+		}
+
+		template <typename Result, typename... Arguments>
+		bool is_null(Result (*const& function_pointer)(Arguments...))
+		{
+			return function_pointer == nullptr;
+		}
+
+		template <typename Result, typename Class, typename... Arguments>
+		bool is_null(Result (Class::*const& function_pointer)(Arguments...))
+		{
+			return function_pointer == nullptr;
+		}
+
+		template <typename Result, typename Class, typename... Arguments>
+		bool is_null(Result (Class::*const& function_pointer)(Arguments...) const)
+		{
+			return function_pointer == nullptr;
+		}
+
+	} // namespace internal
+} // namespace eastl
+
+#endif // Header include guard
+
diff --git a/libkram/eastl/include/EASTL/internal/functional_base.h b/libkram/eastl/include/EASTL/internal/functional_base.h
new file mode 100644
index 00000000..a7d2dc91
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/functional_base.h
@@ -0,0 +1,389 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_INTERNAL_FUNCTIONAL_BASE_H
+#define EASTL_INTERNAL_FUNCTIONAL_BASE_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+#include <EASTL/internal/config.h>
+#include <EASTL/internal/memory_base.h>
+#include <EASTL/internal/move_help.h>
+#include <EASTL/type_traits.h>
+
+namespace eastl
+{
+	// foward declaration for swap
+	template <typename T>
+	inline void swap(T& a, T& b)
+	    EA_NOEXCEPT_IF(eastl::is_nothrow_move_constructible<T>::value&& eastl::is_nothrow_move_assignable<T>::value);
+
+
+	/// invoke
+	///
+	/// invoke is a generalized function-call operator which works on function pointers, member function
+	/// pointers, callable objects and member pointers.
+	///
+	/// For (member/non-member) function pointers and callable objects, it returns the result of calling
+	/// the function/object with the specified arguments. For member data pointers, it simply returns
+	/// the member.
+	///
+	/// Note that there are also reference_wrapper specializations of invoke, which need to be defined
+	/// later since reference_wrapper uses invoke in its implementation. Those are defined immediately
+	/// after the definition of reference_wrapper.
+	///
+	/// http://en.cppreference.com/w/cpp/utility/functional/invoke
+	///
+	template <typename R, typename C, typename T, typename... Args>
+	auto invoke_impl(R C::*func, T&& obj, Args&&... args) ->
+	    typename enable_if<is_base_of<C, decay_t<decltype(obj)>>::value,
+	                       decltype((eastl::forward<T>(obj).*func)(eastl::forward<Args>(args)...))>::type
+	{
+		return (eastl::forward<T>(obj).*func)(eastl::forward<Args>(args)...);
+	}
+
+	template <typename F, typename... Args>
+	auto invoke_impl(F&& func, Args&&... args) -> decltype(eastl::forward<F>(func)(eastl::forward<Args>(args)...))
+	{
+		return eastl::forward<F>(func)(eastl::forward<Args>(args)...);
+	}
+
+
+	template <typename R, typename C, typename T, typename... Args>
+	auto invoke_impl(R C::*func, T&& obj, Args&&... args) -> decltype(((*eastl::forward<T>(obj)).*func)(eastl::forward<Args>(args)...))
+	{
+		return ((*eastl::forward<T>(obj)).*func)(eastl::forward<Args>(args)...);
+	}
+
+	template <typename M, typename C, typename T>
+	auto invoke_impl(M C::*member, T&& obj) ->
+	typename enable_if<
+		is_base_of<C, decay_t<decltype(obj)>>::value,
+		decltype(obj.*member)
+	>::type
+	{
+		return obj.*member;
+	}
+
+	template <typename M, typename C, typename T>
+	auto invoke_impl(M C::*member, T&& obj) -> decltype((*eastl::forward<T>(obj)).*member)
+	{
+		return (*eastl::forward<T>(obj)).*member;
+	}
+
+	template <typename F, typename... Args>
+	inline decltype(auto) invoke(F&& func, Args&&... args)
+	{
+		return invoke_impl(eastl::forward<F>(func), eastl::forward<Args>(args)...);
+	}
+
+	template <typename F, typename = void, typename... Args>
+	struct invoke_result_impl {
+	};
+
+	template <typename F, typename... Args>
+	struct invoke_result_impl<F, void_t<decltype(invoke_impl(eastl::declval<decay_t<F>>(), eastl::declval<Args>()...))>, Args...>
+	{
+		typedef decltype(invoke_impl(eastl::declval<decay_t<F>>(), eastl::declval<Args>()...)) type;
+	};
+
+	template <typename F, typename... Args>
+	struct invoke_result : public invoke_result_impl<F, void, Args...> {};
+
+	#if !defined(EA_COMPILER_NO_TEMPLATE_ALIASES)
+		template <typename F, typename... Args>
+		using invoke_result_t = typename invoke_result<F, Args...>::type;
+	#endif
+
+	template <typename F, typename = void, typename... Args>
+	struct is_invocable_impl : public eastl::false_type {};
+
+	template <typename F, typename... Args>
+	struct is_invocable_impl<F, void_t<typename eastl::invoke_result<F, Args...>::type>, Args...> : public eastl::true_type {};
+
+	template <typename F, typename... Args>
+	struct is_invocable : public is_invocable_impl<F, void, Args...> {};
+
+	template <typename R, typename F, typename = void, typename... Args>
+	struct is_invocable_r_impl : public eastl::false_type {};
+
+	template <typename R, typename F, typename... Args>
+	struct is_invocable_r_impl<R, F, void_t<typename invoke_result<F, Args...>::type>, Args...>
+		: public is_convertible<typename invoke_result<F, Args...>::type, R> {};
+
+	template <typename R, typename F, typename... Args>
+	struct is_invocable_r : public is_invocable_r_impl<R, F, void, Args...> {};
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <typename F, typename... Args>
+		EASTL_CPP17_INLINE_VARIABLE EA_CONSTEXPR bool is_invocable_v = is_invocable<F, Args...>::value;
+
+		template <typename R, typename F, typename... Args>
+		EASTL_CPP17_INLINE_VARIABLE EA_CONSTEXPR bool is_invocable_r_v = is_invocable_r<R, F, Args...>::value;
+	#endif
+
+	/// allocator_arg_t
+	///
+	/// allocator_arg_t is an empty class type used to disambiguate the overloads of
+	/// constructors and member functions of allocator-aware objects, including tuple,
+	/// function, promise, and packaged_task.
+	/// http://en.cppreference.com/w/cpp/memory/allocator_arg_t
+	///
+	struct allocator_arg_t
+	{};
+
+
+	/// allocator_arg
+	///
+	/// allocator_arg is a constant of type allocator_arg_t used to disambiguate, at call site,
+	/// the overloads of the constructors and member functions of allocator-aware objects,
+	/// such as tuple, function, promise, and packaged_task.
+	/// http://en.cppreference.com/w/cpp/memory/allocator_arg
+	///
+	#if !defined(EA_COMPILER_NO_CONSTEXPR)
+		EA_CONSTEXPR allocator_arg_t allocator_arg = allocator_arg_t();
+	#endif
+
+
+	template <typename Argument, typename Result>
+	struct unary_function
+	{
+		typedef Argument argument_type;
+		typedef Result   result_type;
+	};
+
+
+	template <typename Argument1, typename Argument2, typename Result>
+	struct binary_function
+	{
+		typedef Argument1 first_argument_type;
+		typedef Argument2 second_argument_type;
+		typedef Result    result_type;
+	};
+
+
+	/// less<T>
+	template <typename T = void>
+	struct less : public binary_function<T, T, bool>
+	{
+		EA_CPP14_CONSTEXPR bool operator()(const T& a, const T& b) const
+			{ return a < b; }
+	};
+
+	// http://en.cppreference.com/w/cpp/utility/functional/less_void
+	template <>
+	struct less<void>
+	{
+		template<typename A, typename B>
+		EA_CPP14_CONSTEXPR auto operator()(A&& a, B&& b) const
+			-> decltype(eastl::forward<A>(a) < eastl::forward<B>(b))
+			{ return eastl::forward<A>(a) < eastl::forward<B>(b); }
+	};
+
+
+	/// reference_wrapper
+	template <typename T>
+	class reference_wrapper
+	{
+	public:
+		typedef T type;
+
+		reference_wrapper(T&) EA_NOEXCEPT;
+		reference_wrapper(T&&) = delete;
+		reference_wrapper(const reference_wrapper<T>& x) EA_NOEXCEPT;
+
+		reference_wrapper& operator=(const reference_wrapper<T>& x) EA_NOEXCEPT;
+
+		operator T& () const EA_NOEXCEPT;
+		T& get() const EA_NOEXCEPT;
+
+		template <typename... ArgTypes>
+		typename eastl::result_of<T&(ArgTypes&&...)>::type operator() (ArgTypes&&...) const;
+
+	private:
+		T* val;
+	};
+
+	template <typename T>
+	reference_wrapper<T>::reference_wrapper(T &v) EA_NOEXCEPT
+		: val(eastl::addressof(v))
+	{}
+
+	template <typename T>
+	reference_wrapper<T>::reference_wrapper(const reference_wrapper<T>& other) EA_NOEXCEPT
+		: val(other.val)
+	{}
+
+	template <typename T>
+	reference_wrapper<T>& reference_wrapper<T>::operator=(const reference_wrapper<T>& other) EA_NOEXCEPT
+	{
+		val = other.val;
+		return *this;
+	}
+
+	template <typename T>
+	reference_wrapper<T>::operator T&() const EA_NOEXCEPT
+	{
+		return *val;
+	}
+
+	template <typename T>
+	T& reference_wrapper<T>::get() const EA_NOEXCEPT
+	{
+		return *val;
+	}
+
+	template <typename T>
+	template <typename... ArgTypes>
+	typename eastl::result_of<T&(ArgTypes&&...)>::type reference_wrapper<T>::operator() (ArgTypes&&... args) const
+	{
+		return eastl::invoke(*val, eastl::forward<ArgTypes>(args)...);
+	}
+
+	// reference_wrapper-specific utilties
+	template <typename T>
+	reference_wrapper<T> ref(T& t) EA_NOEXCEPT
+	{
+		return eastl::reference_wrapper<T>(t); 
+	}
+
+	template <typename T>
+	void ref(const T&&) = delete;
+
+	template <typename T>
+	reference_wrapper<T> ref(reference_wrapper<T>t) EA_NOEXCEPT
+	{
+		return eastl::ref(t.get());
+	}
+
+	template <typename T>
+	reference_wrapper<const T> cref(const T& t) EA_NOEXCEPT
+	{
+		return eastl::reference_wrapper<const T>(t);
+	}
+
+	template <typename T>
+	void cref(const T&&) = delete;
+
+	template <typename T>
+	reference_wrapper<const T> cref(reference_wrapper<T> t) EA_NOEXCEPT
+	{
+		return eastl::cref(t.get());
+	}
+
+
+	// reference_wrapper-specific type traits
+	template <typename T>
+	struct is_reference_wrapper_helper
+		: public eastl::false_type {};
+
+	template <typename T>
+	struct is_reference_wrapper_helper<eastl::reference_wrapper<T> >
+		: public eastl::true_type {};
+
+	template <typename T>
+	struct is_reference_wrapper
+		: public eastl::is_reference_wrapper_helper<typename eastl::remove_cv<T>::type> {};
+
+
+	// Helper which adds a reference to a type when given a reference_wrapper of that type.
+	template <typename T>
+	struct remove_reference_wrapper
+		{ typedef T type; };
+
+	template <typename T>
+	struct remove_reference_wrapper< eastl::reference_wrapper<T> >
+		{ typedef T& type; };
+
+	template <typename T>
+	struct remove_reference_wrapper< const eastl::reference_wrapper<T> >
+		{ typedef T& type; };
+
+	// reference_wrapper specializations of invoke
+	// These have to come after reference_wrapper is defined, but reference_wrapper needs to have a
+	// definition of invoke, so these specializations need to come after everything else has been defined.
+	template <typename R, typename C, typename T, typename... Args>
+	auto invoke_impl(R (C::*func)(Args...), T&& obj, Args&&... args) ->
+		typename enable_if<is_reference_wrapper<typename remove_reference<T>::type>::value,
+						   decltype((obj.get().*func)(eastl::forward<Args>(args)...))>::type
+	{
+		return (obj.get().*func)(eastl::forward<Args>(args)...);
+	}
+
+	template <typename M, typename C, typename T>
+	auto invoke_impl(M(C::*member), T&& obj) ->
+	    typename enable_if<is_reference_wrapper<typename remove_reference<T>::type>::value,
+	                       decltype(obj.get().*member)>::type
+	{
+		return obj.get().*member;
+	}
+
+
+	///////////////////////////////////////////////////////////////////////
+	// bind
+	///////////////////////////////////////////////////////////////////////
+
+	/// bind1st
+	///
+	template <typename Operation>
+	class binder1st : public unary_function<typename Operation::second_argument_type, typename Operation::result_type>
+	{
+		protected:
+			typename Operation::first_argument_type value;
+			Operation op;
+
+		public:
+			binder1st(const Operation& x, const typename Operation::first_argument_type& y)
+				: value(y), op(x) { }
+
+			typename Operation::result_type operator()(const typename Operation::second_argument_type& x) const
+				{ return op(value, x); }
+
+			typename Operation::result_type operator()(typename Operation::second_argument_type& x) const
+				{ return op(value, x); }
+	};
+
+
+	template <typename Operation, typename T>
+	inline binder1st<Operation> bind1st(const Operation& op, const T& x)
+	{
+		typedef typename Operation::first_argument_type value;
+		return binder1st<Operation>(op, value(x));
+	}
+
+
+	/// bind2nd
+	///
+	template <typename Operation>
+	class binder2nd : public unary_function<typename Operation::first_argument_type, typename Operation::result_type>
+	{
+		protected:
+			Operation op;
+			typename Operation::second_argument_type value;
+
+		public:
+			binder2nd(const Operation& x, const typename Operation::second_argument_type& y)
+				: op(x), value(y) { }
+
+			typename Operation::result_type operator()(const typename Operation::first_argument_type& x) const
+				{ return op(x, value); }
+
+			typename Operation::result_type operator()(typename Operation::first_argument_type& x) const
+				{ return op(x, value); }
+	};
+
+
+	template <typename Operation, typename T>
+	inline binder2nd<Operation> bind2nd(const Operation& op, const T& x)
+	{
+		typedef typename Operation::second_argument_type value;
+		return binder2nd<Operation>(op, value(x));
+	}
+
+} // namespace eastl
+
+#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/internal/generic_iterator.h b/libkram/eastl/include/EASTL/internal/generic_iterator.h
new file mode 100644
index 00000000..b32998a8
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/generic_iterator.h
@@ -0,0 +1,208 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// Implements a generic iterator from a given iteratable type, such as a pointer.
+// We cannot put this file into our own iterator.h file because we need to 
+// still be able to use this file when we have our iterator.h disabled.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_INTERNAL_GENERIC_ITERATOR_H
+#define EASTL_INTERNAL_GENERIC_ITERATOR_H
+
+
+#include <EABase/eabase.h>
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+#include <EASTL/internal/config.h>
+#include <EASTL/iterator.h>
+#include <EASTL/type_traits.h>
+
+// There is no warning number 'number'.
+// Member template functions cannot be used for copy-assignment or copy-construction.
+EA_DISABLE_VC_WARNING(4619 4217);
+
+
+namespace eastl
+{
+
+	/// generic_iterator
+	///
+	/// Converts something which can be iterated into a formal iterator.
+	/// While this class' primary purpose is to allow the conversion of 
+	/// a pointer to an iterator, you can convert anything else to an 
+	/// iterator by defining an iterator_traits<> specialization for that
+	/// object type. See EASTL iterator.h for this.
+	///
+	/// Example usage:
+	///     typedef generic_iterator<int*>       IntArrayIterator;
+	///     typedef generic_iterator<int*, char> IntArrayIteratorOther;
+	///
+	template <typename Iterator, typename Container = void>
+	class generic_iterator
+	{
+	protected:
+		Iterator mIterator;
+
+	public:
+		typedef typename eastl::iterator_traits<Iterator>::iterator_category iterator_category;
+		typedef typename eastl::iterator_traits<Iterator>::value_type        value_type;
+		typedef typename eastl::iterator_traits<Iterator>::difference_type   difference_type;
+		typedef typename eastl::iterator_traits<Iterator>::reference         reference;
+		typedef typename eastl::iterator_traits<Iterator>::pointer           pointer;
+		typedef Iterator                                                     iterator_type;
+		typedef iterator_type                                                wrapped_iterator_type;   // This is not in the C++ Standard; it's used by use to identify it as a wrapping iterator type.
+		typedef Container                                                    container_type;
+		typedef generic_iterator<Iterator, Container>                        this_type;
+
+		generic_iterator()
+			: mIterator(iterator_type()) { }
+
+		explicit generic_iterator(const iterator_type& x)
+			: mIterator(x) { }
+
+		this_type& operator=(const iterator_type& x)
+			{ mIterator = x; return *this; }
+
+		template <typename Iterator2>
+		generic_iterator(const generic_iterator<Iterator2, Container>& x)
+			: mIterator(x.base()) { }
+
+		reference operator*() const
+			{ return *mIterator; }
+
+		pointer operator->() const
+			{ return mIterator; }
+
+		this_type& operator++()
+			{ ++mIterator; return *this; }
+
+		this_type operator++(int)
+			{ return this_type(mIterator++); }
+
+		this_type& operator--()
+			{ --mIterator; return *this; }
+
+		this_type operator--(int)
+			{ return this_type(mIterator--); }
+
+		reference operator[](const difference_type& n) const
+			{ return mIterator[n]; }
+
+		this_type& operator+=(const difference_type& n)
+			{ mIterator += n; return *this; }
+
+		this_type operator+(const difference_type& n) const
+			{ return this_type(mIterator + n); }
+
+		this_type& operator-=(const difference_type& n)
+			{ mIterator -= n; return *this; }
+
+		this_type operator-(const difference_type& n) const
+			{ return this_type(mIterator - n); }
+
+		const iterator_type& base() const
+			{ return mIterator; }
+
+	}; // class generic_iterator
+
+
+	template <typename IteratorL, typename IteratorR, typename Container>
+	inline bool operator==(const generic_iterator<IteratorL, Container>& lhs, const generic_iterator<IteratorR, Container>& rhs)
+		{ return lhs.base() == rhs.base(); }
+
+	template <typename Iterator, typename Container>
+	inline bool operator==(const generic_iterator<Iterator, Container>& lhs, const generic_iterator<Iterator, Container>& rhs)
+		{ return lhs.base() == rhs.base(); }
+
+	template <typename IteratorL, typename IteratorR, typename Container>
+	inline bool operator!=(const generic_iterator<IteratorL, Container>& lhs, const generic_iterator<IteratorR, Container>& rhs)
+		{ return lhs.base() != rhs.base(); }
+
+	template <typename Iterator, typename Container>
+	inline bool operator!=(const generic_iterator<Iterator, Container>& lhs, const generic_iterator<Iterator, Container>& rhs)
+		{ return lhs.base() != rhs.base(); }
+
+	template <typename IteratorL, typename IteratorR, typename Container>
+	inline bool operator<(const generic_iterator<IteratorL, Container>& lhs, const generic_iterator<IteratorR, Container>& rhs)
+		{ return lhs.base() < rhs.base(); }
+
+	template <typename Iterator, typename Container>
+	inline bool operator<(const generic_iterator<Iterator, Container>& lhs, const generic_iterator<Iterator, Container>& rhs)
+		{ return lhs.base() < rhs.base(); }
+
+	template <typename IteratorL, typename IteratorR, typename Container>
+	inline bool operator>(const generic_iterator<IteratorL, Container>& lhs, const generic_iterator<IteratorR, Container>& rhs)
+		{ return lhs.base() > rhs.base(); }
+
+	template <typename Iterator, typename Container>
+	inline bool operator>(const generic_iterator<Iterator, Container>& lhs, const generic_iterator<Iterator, Container>& rhs)
+		{ return lhs.base() > rhs.base(); }
+
+	template <typename IteratorL, typename IteratorR, typename Container>
+	inline bool operator<=(const generic_iterator<IteratorL, Container>& lhs, const generic_iterator<IteratorR, Container>& rhs)
+		{ return lhs.base() <= rhs.base(); }
+
+	template <typename Iterator, typename Container>
+	inline bool operator<=(const generic_iterator<Iterator, Container>& lhs, const generic_iterator<Iterator, Container>& rhs)
+		{ return lhs.base() <= rhs.base(); }
+
+	template <typename IteratorL, typename IteratorR, typename Container>
+	inline bool operator>=(const generic_iterator<IteratorL, Container>& lhs, const generic_iterator<IteratorR, Container>& rhs)
+		{ return lhs.base() >= rhs.base(); }
+
+	template <typename Iterator, typename Container>
+	inline bool operator>=(const generic_iterator<Iterator, Container>& lhs, const generic_iterator<Iterator, Container>& rhs)
+		{ return lhs.base() >= rhs.base(); }
+
+	template <typename IteratorL, typename IteratorR, typename Container>
+	inline typename generic_iterator<IteratorL, Container>::difference_type
+	operator-(const generic_iterator<IteratorL, Container>& lhs, const generic_iterator<IteratorR, Container>& rhs)
+		{ return lhs.base() - rhs.base(); }
+
+	template <typename Iterator, typename Container>
+	inline generic_iterator<Iterator, Container>
+	operator+(typename generic_iterator<Iterator, Container>::difference_type n, const generic_iterator<Iterator, Container>& x)
+		{ return generic_iterator<Iterator, Container>(x.base() + n); }
+
+
+
+	/// is_generic_iterator
+	///
+	/// Tells if an iterator is one of these generic_iterators. This is useful if you want to 
+	/// write code that uses miscellaneous iterators but wants to tell if they are generic_iterators.
+	/// A primary reason to do so is that you can get at the pointer within the generic_iterator.
+	///
+	template <typename Iterator>
+	struct is_generic_iterator : public false_type { };
+
+	template <typename Iterator, typename Container>
+	struct is_generic_iterator<generic_iterator<Iterator, Container> > : public true_type { };
+
+
+	/// unwrap_generic_iterator
+	///
+	/// Returns Iterator::get_base() if it's a generic_iterator, else returns Iterator as-is.
+	///
+	/// Example usage:
+	///      vector<int> intVector;
+	///      eastl::generic_iterator<vector<int>::iterator> genericIterator(intVector.begin());
+	///      vector<int>::iterator it = unwrap_generic_iterator(genericIterator);
+	///
+	template <typename Iterator>
+	inline typename eastl::is_iterator_wrapper_helper<Iterator, eastl::is_generic_iterator<Iterator>::value>::iterator_type unwrap_generic_iterator(Iterator it)
+		{ return eastl::is_iterator_wrapper_helper<Iterator, eastl::is_generic_iterator<Iterator>::value>::get_base(it); }
+
+
+} // namespace eastl
+
+
+EA_RESTORE_VC_WARNING();
+
+
+#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/internal/hashtable.h b/libkram/eastl/include/EASTL/internal/hashtable.h
new file mode 100644
index 00000000..bb6d27eb
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/hashtable.h
@@ -0,0 +1,3222 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// This file implements a hashtable, much like the C++11 unordered_set/unordered_map.
+// proposed classes.
+// The primary distinctions between this hashtable and C++11 unordered containers are:
+//    - hashtable is savvy to an environment that doesn't have exception handling,
+//      as is sometimes the case with console or embedded environments.
+//    - hashtable is slightly more space-efficient than a conventional std hashtable 
+//      implementation on platforms with 64 bit size_t.  This is 
+//      because std STL uses size_t (64 bits) in data structures whereby 32 bits 
+//      of data would be fine.
+//    - hashtable can contain objects with alignment requirements. TR1 hash tables 
+//      cannot do so without a bit of tedious non-portable effort.
+//    - hashtable supports debug memory naming natively.
+//    - hashtable provides a find function that lets you specify a type that is 
+//      different from the hash table key type. This is particularly useful for 
+//      the storing of string objects but finding them by char pointers.
+//    - hashtable provides a lower level insert function which lets the caller 
+//      specify the hash code and optionally the node instance.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_INTERNAL_HASHTABLE_H
+#define EASTL_INTERNAL_HASHTABLE_H
+
+
+#include <EABase/eabase.h>
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+#include <EASTL/internal/config.h>
+#include <EASTL/type_traits.h>
+#include <EASTL/allocator.h>
+#include <EASTL/iterator.h>
+#include <EASTL/functional.h>
+#include <EASTL/utility.h>
+#include <EASTL/algorithm.h>
+#include <EASTL/initializer_list.h>
+#include <EASTL/tuple.h>
+#include <string.h>
+
+EA_DISABLE_ALL_VC_WARNINGS()
+	#include <new>
+	#include <stddef.h>
+EA_RESTORE_ALL_VC_WARNINGS()
+
+// 4512 - 'class' : assignment operator could not be generated.
+// 4530 - C++ exception handler used, but unwind semantics are not enabled. Specify /EHsc
+// 4571 - catch(...) semantics changed since Visual C++ 7.1; structured exceptions (SEH) are no longer caught.
+EA_DISABLE_VC_WARNING(4512 4530 4571);
+
+
+namespace eastl
+{
+
+	/// EASTL_HASHTABLE_DEFAULT_NAME
+	///
+	/// Defines a default container name in the absence of a user-provided name.
+	///
+	#ifndef EASTL_HASHTABLE_DEFAULT_NAME
+		#define EASTL_HASHTABLE_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " hashtable" // Unless the user overrides something, this is "EASTL hashtable".
+	#endif
+
+
+	/// EASTL_HASHTABLE_DEFAULT_ALLOCATOR
+	///
+	#ifndef EASTL_HASHTABLE_DEFAULT_ALLOCATOR
+		#define EASTL_HASHTABLE_DEFAULT_ALLOCATOR allocator_type(EASTL_HASHTABLE_DEFAULT_NAME)
+	#endif
+
+	
+	/// kHashtableAllocFlagBuckets
+	/// Flag to allocator which indicates that we are allocating buckets and not nodes.
+	enum { kHashtableAllocFlagBuckets = 0x00400000 };
+
+
+	/// gpEmptyBucketArray
+	///
+	/// A shared representation of an empty hash table. This is present so that
+	/// a new empty hashtable allocates no memory. It has two entries, one for 
+	/// the first lone empty (NULL) bucket, and one for the non-NULL trailing sentinel.
+	/// 
+	extern EASTL_API void* gpEmptyBucketArray[2];
+
+
+	/// EASTL_MACRO_SWAP
+	///
+	/// Use EASTL_MACRO_SWAP because GCC (at least v4.6-4.8) has a bug where it fails to compile eastl::swap(mpBucketArray, x.mpBucketArray).
+	///
+	#define EASTL_MACRO_SWAP(Type, a, b) \
+		{ Type temp = a; a = b; b = temp; }
+
+
+	/// hash_node
+	///
+	/// A hash_node stores an element in a hash table, much like a 
+	/// linked list node stores an element in a linked list. 
+	/// A hash_node additionally can, via template parameter,
+	/// store a hash code in the node to speed up hash calculations 
+	/// and comparisons in some cases.
+	/// 
+	template <typename Value, bool bCacheHashCode>
+	struct hash_node;
+
+	EA_DISABLE_VC_WARNING(4625 4626) // "copy constructor / assignment operator could not be generated because a base class copy constructor is inaccessible or deleted"
+	#ifdef EA_COMPILER_MSVC_2015
+		EA_DISABLE_VC_WARNING(5026) // disable warning: "move constructor was implicitly defined as deleted"
+	#endif
+		template <typename Value>
+		struct hash_node<Value, true>
+		{
+			hash_node() = default;
+			hash_node(const hash_node&) = default;
+			hash_node(hash_node&&) = default;
+
+			Value        mValue;
+			hash_node*   mpNext;
+			eastl_size_t mnHashCode;      // See config.h for the definition of eastl_size_t, which defaults to size_t.
+		} EASTL_MAY_ALIAS;
+
+		template <typename Value>
+		struct hash_node<Value, false>
+		{
+			hash_node() = default;
+			hash_node(const hash_node&) = default;
+			hash_node(hash_node&&) = default;
+
+		    Value      mValue;
+			hash_node* mpNext;
+		} EASTL_MAY_ALIAS;
+
+	#ifdef EA_COMPILER_MSVC_2015
+		EA_RESTORE_VC_WARNING()
+	#endif
+	EA_RESTORE_VC_WARNING()
+
+
+	// has_hashcode_member
+	//
+	// Custom type-trait that checks for the existence of a class data member 'mnHashCode'.  
+	//
+	// In order to explicitly instantiate the hashtable without error we need to SFINAE away the functions that will
+	// fail to compile based on if the 'hash_node' contains a 'mnHashCode' member dictated by the hashtable template
+	// parameters. The hashtable support this level of configuration to allow users to choose which between the space vs.
+	// time optimization.
+	//
+	namespace Internal
+	{
+		template <class T>
+		struct has_hashcode_member 
+		{
+		private:
+			template <class U> static eastl::no_type test(...);
+			template <class U> static eastl::yes_type test(decltype(U::mnHashCode)* = 0);
+		public:
+			static const bool value = sizeof(test<T>(0)) == sizeof(eastl::yes_type);
+		};
+	}
+	
+	static_assert(Internal::has_hashcode_member<hash_node<int, true>>::value, "contains a mnHashCode member");
+	static_assert(!Internal::has_hashcode_member<hash_node<int, false>>::value, "doesn't contain a mnHashCode member");
+
+	// convenience macros to increase the readability of the code paths that must SFINAE on if the 'hash_node'
+	// contains the cached hashed value or not. 
+	#define ENABLE_IF_HAS_HASHCODE(T, RT) typename eastl::enable_if<Internal::has_hashcode_member<T>::value, RT>::type*
+	#define ENABLE_IF_HASHCODE_EASTLSIZET(T, RT) typename eastl::enable_if<eastl::is_convertible<T, eastl_size_t>::value, RT>::type
+	#define ENABLE_IF_TRUETYPE(T) typename eastl::enable_if<T::value>::type*
+	#define DISABLE_IF_TRUETYPE(T) typename eastl::enable_if<!T::value>::type*
+
+
+	/// node_iterator_base
+	///
+	/// Node iterators iterate nodes within a given bucket.
+	///
+	/// We define a base class here because it is shared by both const and
+	/// non-const iterators.
+	///
+	template <typename Value, bool bCacheHashCode>
+	struct node_iterator_base
+	{
+		typedef hash_node<Value, bCacheHashCode> node_type;
+
+		node_type* mpNode;
+
+		node_iterator_base(node_type* pNode)
+			: mpNode(pNode) { }
+
+		void increment()
+			{ mpNode = mpNode->mpNext; }
+	};
+
+
+
+	/// node_iterator
+	///
+	/// Node iterators iterate nodes within a given bucket.
+	///
+	/// The bConst parameter defines if the iterator is a const_iterator
+	/// or an iterator.
+	///
+	template <typename Value, bool bConst, bool bCacheHashCode>
+	struct node_iterator : public node_iterator_base<Value, bCacheHashCode>
+	{
+	public:
+		typedef node_iterator_base<Value, bCacheHashCode>                base_type;
+		typedef node_iterator<Value, bConst, bCacheHashCode>             this_type;
+		typedef typename base_type::node_type                            node_type;
+		typedef Value                                                    value_type;
+		typedef typename type_select<bConst, const Value*, Value*>::type pointer;
+		typedef typename type_select<bConst, const Value&, Value&>::type reference;
+		typedef ptrdiff_t                                                difference_type;
+		typedef EASTL_ITC_NS::forward_iterator_tag                       iterator_category;
+
+	public:
+		explicit node_iterator(node_type* pNode = NULL)
+			: base_type(pNode) { }
+
+		node_iterator(const node_iterator<Value, true, bCacheHashCode>& x)
+			: base_type(x.mpNode) { }
+
+		reference operator*() const
+			{ return base_type::mpNode->mValue; }
+
+		pointer operator->() const
+			{ return &(base_type::mpNode->mValue); }
+
+		node_iterator& operator++()
+			{ base_type::increment(); return *this; }
+
+		node_iterator operator++(int)
+			{ node_iterator temp(*this); base_type::increment(); return temp; }
+
+	}; // node_iterator
+
+
+
+	/// hashtable_iterator_base
+	///
+	/// A hashtable_iterator iterates the entire hash table and not just
+	/// nodes within a single bucket. Users in general will use a hash
+	/// table iterator much more often, as it is much like other container
+	/// iterators (e.g. vector::iterator).
+	///
+	/// We define a base class here because it is shared by both const and
+	/// non-const iterators.
+	///
+	template <typename Value, bool bCacheHashCode>
+	struct hashtable_iterator_base
+	{
+	public:
+		typedef hashtable_iterator_base<Value, bCacheHashCode> this_type;
+		typedef hash_node<Value, bCacheHashCode>               node_type;
+
+	protected:
+		template <typename, typename, typename, typename, typename, typename, typename, typename, typename, bool, bool, bool>
+		friend class hashtable;
+
+		template <typename, bool, bool>
+		friend struct hashtable_iterator;
+
+		template <typename V, bool b>
+		friend bool operator==(const hashtable_iterator_base<V, b>&, const hashtable_iterator_base<V, b>&);
+
+		template <typename V, bool b>
+		friend bool operator!=(const hashtable_iterator_base<V, b>&, const hashtable_iterator_base<V, b>&);
+
+		node_type*  mpNode;      // Current node within current bucket.
+		node_type** mpBucket;    // Current bucket.
+
+	public:
+		hashtable_iterator_base(node_type* pNode, node_type** pBucket)
+			: mpNode(pNode), mpBucket(pBucket) { }
+
+		void increment_bucket()
+		{
+			++mpBucket;
+			while(*mpBucket == NULL) // We store an extra bucket with some non-NULL value at the end 
+				++mpBucket;          // of the bucket array so that finding the end of the bucket
+			mpNode = *mpBucket;      // array is quick and simple.
+		}
+
+		void increment()
+		{
+			mpNode = mpNode->mpNext;
+
+			while(mpNode == NULL)
+				mpNode = *++mpBucket;
+		}
+
+	}; // hashtable_iterator_base
+
+
+
+
+	/// hashtable_iterator
+	///
+	/// A hashtable_iterator iterates the entire hash table and not just
+	/// nodes within a single bucket. Users in general will use a hash
+	/// table iterator much more often, as it is much like other container
+	/// iterators (e.g. vector::iterator).
+	///
+	/// The bConst parameter defines if the iterator is a const_iterator
+	/// or an iterator.
+	///
+	template <typename Value, bool bConst, bool bCacheHashCode>
+	struct hashtable_iterator : public hashtable_iterator_base<Value, bCacheHashCode>
+	{
+	public:
+		typedef hashtable_iterator_base<Value, bCacheHashCode>           base_type;
+		typedef hashtable_iterator<Value, bConst, bCacheHashCode>        this_type;
+		typedef hashtable_iterator<Value, false, bCacheHashCode>         this_type_non_const;
+		typedef typename base_type::node_type                            node_type;
+		typedef Value                                                    value_type;
+		typedef typename type_select<bConst, const Value*, Value*>::type pointer;
+		typedef typename type_select<bConst, const Value&, Value&>::type reference;
+		typedef ptrdiff_t                                                difference_type;
+		typedef EASTL_ITC_NS::forward_iterator_tag                       iterator_category;
+
+	public:
+		hashtable_iterator(node_type* pNode = NULL, node_type** pBucket = NULL)
+			: base_type(pNode, pBucket) { }
+
+		hashtable_iterator(node_type** pBucket)
+			: base_type(*pBucket, pBucket) { }
+
+		hashtable_iterator(const this_type_non_const& x)
+			: base_type(x.mpNode, x.mpBucket) { }
+
+		reference operator*() const
+			{ return base_type::mpNode->mValue; }
+
+		pointer operator->() const
+			{ return &(base_type::mpNode->mValue); }
+
+		hashtable_iterator& operator++()
+			{ base_type::increment(); return *this; }
+
+		hashtable_iterator operator++(int)
+			{ hashtable_iterator temp(*this); base_type::increment(); return temp; }
+
+		const node_type* get_node() const
+			{ return base_type::mpNode; }
+
+	}; // hashtable_iterator
+
+
+
+
+	/// ht_distance
+	///
+	/// This function returns the same thing as distance() for 
+	/// forward iterators but returns zero for input iterators.
+	/// The reason why is that input iterators can only be read
+	/// once, and calling distance() on an input iterator destroys
+	/// the ability to read it. This ht_distance is used only for
+	/// optimization and so the code will merely work better with
+	/// forward iterators that input iterators.
+	///
+	template <typename Iterator>
+	inline typename eastl::iterator_traits<Iterator>::difference_type
+	distance_fw_impl(Iterator /*first*/, Iterator /*last*/, EASTL_ITC_NS::input_iterator_tag)
+	{
+		return 0;
+	}
+
+	template <typename Iterator>
+	inline typename eastl::iterator_traits<Iterator>::difference_type
+	distance_fw_impl(Iterator first, Iterator last, EASTL_ITC_NS::forward_iterator_tag)
+		{ return eastl::distance(first, last); }
+
+	template <typename Iterator>
+	inline typename eastl::iterator_traits<Iterator>::difference_type
+	ht_distance(Iterator first, Iterator last)
+	{
+		typedef typename eastl::iterator_traits<Iterator>::iterator_category IC;
+		return distance_fw_impl(first, last, IC());
+	}
+
+
+
+
+	/// mod_range_hashing
+	///
+	/// Implements the algorithm for conversion of a number in the range of
+	/// [0, SIZE_T_MAX] to the range of [0, BucketCount).
+	///
+	struct mod_range_hashing
+	{
+		uint32_t operator()(size_t r, uint32_t n) const
+			{ return r % n; }
+	};
+
+
+	/// default_ranged_hash
+	///
+	/// Default ranged hash function H. In principle it should be a
+	/// function object composed from objects of type H1 and H2 such that
+	/// h(k, n) = h2(h1(k), n), but that would mean making extra copies of
+	/// h1 and h2. So instead we'll just use a tag to tell class template
+	/// hashtable to do that composition.
+	///
+	struct default_ranged_hash{ };
+
+
+	/// prime_rehash_policy
+	///
+	/// Default value for rehash policy. Bucket size is (usually) the
+	/// smallest prime that keeps the load factor small enough.
+	///
+	struct EASTL_API prime_rehash_policy
+	{
+	public:
+		float            mfMaxLoadFactor;
+		float            mfGrowthFactor;
+		mutable uint32_t mnNextResize;
+
+	public:
+		prime_rehash_policy(float fMaxLoadFactor = 1.f)
+			: mfMaxLoadFactor(fMaxLoadFactor), mfGrowthFactor(2.f), mnNextResize(0) { }
+
+		float GetMaxLoadFactor() const
+			{ return mfMaxLoadFactor; }
+
+		/// Return a bucket count no greater than nBucketCountHint, 
+		/// Don't update member variables while at it.
+		static uint32_t GetPrevBucketCountOnly(uint32_t nBucketCountHint);
+
+		/// Return a bucket count no greater than nBucketCountHint.
+		/// This function has a side effect of updating mnNextResize.
+		uint32_t GetPrevBucketCount(uint32_t nBucketCountHint) const;
+
+		/// Return a bucket count no smaller than nBucketCountHint.
+		/// This function has a side effect of updating mnNextResize.
+		uint32_t GetNextBucketCount(uint32_t nBucketCountHint) const;
+
+		/// Return a bucket count appropriate for nElementCount elements.
+		/// This function has a side effect of updating mnNextResize.
+		uint32_t GetBucketCount(uint32_t nElementCount) const;
+
+		/// nBucketCount is current bucket count, nElementCount is current element count,
+		/// and nElementAdd is number of elements to be inserted. Do we need 
+		/// to increase bucket count? If so, return pair(true, n), where 
+		/// n is the new bucket count. If not, return pair(false, 0).
+		eastl::pair<bool, uint32_t>
+		GetRehashRequired(uint32_t nBucketCount, uint32_t nElementCount, uint32_t nElementAdd) const;
+	};
+
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// Base classes for hashtable. We define these base classes because 
+	// in some cases we want to do different things depending on the 
+	// value of a policy class. In some cases the policy class affects
+	// which member functions and nested typedefs are defined; we handle that
+	// by specializing base class templates. Several of the base class templates
+	// need to access other members of class template hashtable, so we use
+	// the "curiously recurring template pattern" (parent class is templated 
+	// on type of child class) for them.
+	///////////////////////////////////////////////////////////////////////
+
+
+	/// rehash_base
+	///
+	/// Give hashtable the get_max_load_factor functions if the rehash 
+	/// policy is prime_rehash_policy.
+	///
+	template <typename RehashPolicy, typename Hashtable>
+	struct rehash_base { };
+
+	template <typename Hashtable>
+	struct rehash_base<prime_rehash_policy, Hashtable>
+	{
+		// Returns the max load factor, which is the load factor beyond
+		// which we rebuild the container with a new bucket count.
+		float get_max_load_factor() const
+		{
+			const Hashtable* const pThis = static_cast<const Hashtable*>(this);
+			return pThis->rehash_policy().GetMaxLoadFactor();
+		}
+
+		// If you want to make the hashtable never rehash (resize), 
+		// set the max load factor to be a very high number (e.g. 100000.f).
+		void set_max_load_factor(float fMaxLoadFactor)
+		{
+			Hashtable* const pThis = static_cast<Hashtable*>(this);
+			pThis->rehash_policy(prime_rehash_policy(fMaxLoadFactor));    
+		}
+	};
+
+
+
+
+	/// hash_code_base
+	///
+	/// Encapsulates two policy issues that aren't quite orthogonal.
+	///   (1) The difference between using a ranged hash function and using
+	///       the combination of a hash function and a range-hashing function.
+	///       In the former case we don't have such things as hash codes, so
+	///       we have a dummy type as placeholder.
+	///   (2) Whether or not we cache hash codes. Caching hash codes is
+	///       meaningless if we have a ranged hash function. This is because
+	///       a ranged hash function converts an object directly to its
+	///       bucket index without ostensibly using a hash code.
+	/// We also put the key extraction and equality comparison function 
+	/// objects here, for convenience.
+	///
+	template <typename Key, typename Value, typename ExtractKey, typename Equal, 
+			  typename H1, typename H2, typename H, bool bCacheHashCode>
+	struct hash_code_base;
+
+
+	/// hash_code_base
+	///
+	/// Specialization: ranged hash function, no caching hash codes. 
+	/// H1 and H2 are provided but ignored. We define a dummy hash code type.
+	///
+	template <typename Key, typename Value, typename ExtractKey, typename Equal, typename H1, typename H2, typename H>
+	struct hash_code_base<Key, Value, ExtractKey, Equal, H1, H2, H, false>
+	{
+	protected:
+		ExtractKey  mExtractKey;    // To do: Make this member go away entirely, as it never has any data.
+		Equal       mEqual;         // To do: Make this instance use zero space when it is zero size.
+		H           mRangedHash;    // To do: Make this instance use zero space when it is zero size
+
+	public:
+		H1 hash_function() const
+			{ return H1(); }
+
+		Equal equal_function() const // Deprecated. Use key_eq() instead, as key_eq is what the new C++ standard 
+			{ return mEqual; }       // has specified in its hashtable (unordered_*) proposal.
+
+		const Equal& key_eq() const
+			{ return mEqual; }
+
+		Equal& key_eq()
+			{ return mEqual; }
+
+	protected:
+		typedef void*    hash_code_t;
+		typedef uint32_t bucket_index_t;
+
+		hash_code_base(const ExtractKey& extractKey, const Equal& eq, const H1&, const H2&, const H& h)
+			: mExtractKey(extractKey), mEqual(eq), mRangedHash(h) { }
+
+		hash_code_t get_hash_code(const Key& key) const
+		{
+			EA_UNUSED(key);
+			return NULL;
+		}
+
+		bucket_index_t bucket_index(hash_code_t, uint32_t) const
+			{ return (bucket_index_t)0; }
+
+		bucket_index_t bucket_index(const Key& key, hash_code_t, uint32_t nBucketCount) const
+			{ return (bucket_index_t)mRangedHash(key, nBucketCount); }
+
+		bucket_index_t bucket_index(const hash_node<Value, false>* pNode, uint32_t nBucketCount) const
+			{ return (bucket_index_t)mRangedHash(mExtractKey(pNode->mValue), nBucketCount); }
+
+		bool compare(const Key& key, hash_code_t, hash_node<Value, false>* pNode) const
+			{ return mEqual(key, mExtractKey(pNode->mValue)); }
+
+		void copy_code(hash_node<Value, false>*, const hash_node<Value, false>*) const
+			{ } // Nothing to do.
+
+		void set_code(hash_node<Value, false>* pDest, hash_code_t c) const
+		{
+			EA_UNUSED(pDest);
+			EA_UNUSED(c);
+		}
+
+		void base_swap(hash_code_base& x)
+		{
+			eastl::swap(mExtractKey, x.mExtractKey);
+			eastl::swap(mEqual,      x.mEqual);
+			eastl::swap(mRangedHash, x.mRangedHash);
+		}
+
+	}; // hash_code_base
+
+
+
+	// No specialization for ranged hash function while caching hash codes.
+	// That combination is meaningless, and trying to do it is an error.
+
+
+	/// hash_code_base
+	///
+	/// Specialization: ranged hash function, cache hash codes. 
+	/// This combination is meaningless, so we provide only a declaration
+	/// and no definition.
+	///
+	template <typename Key, typename Value, typename ExtractKey, typename Equal, typename H1, typename H2, typename H>
+	struct hash_code_base<Key, Value, ExtractKey, Equal, H1, H2, H, true>;
+
+
+
+	/// hash_code_base
+	///
+	/// Specialization: hash function and range-hashing function, 
+	/// no caching of hash codes. H is provided but ignored. 
+	/// Provides typedef and accessor required by TR1.
+	///
+	template <typename Key, typename Value, typename ExtractKey, typename Equal, typename H1, typename H2>
+	struct hash_code_base<Key, Value, ExtractKey, Equal, H1, H2, default_ranged_hash, false>
+	{
+	protected:
+		ExtractKey  mExtractKey;
+		Equal       mEqual;
+		H1          m_h1;
+		H2          m_h2;
+
+	public:
+		typedef H1 hasher;
+
+		H1 hash_function() const
+			{ return m_h1; }
+
+		Equal equal_function() const // Deprecated. Use key_eq() instead, as key_eq is what the new C++ standard 
+			{ return mEqual; }       // has specified in its hashtable (unordered_*) proposal.
+
+		const Equal& key_eq() const
+			{ return mEqual; }
+
+		Equal& key_eq()
+			{ return mEqual; }
+
+	protected:
+		typedef size_t hash_code_t;
+		typedef uint32_t bucket_index_t;
+		typedef hash_node<Value, false> node_type;
+
+		hash_code_base(const ExtractKey& ex, const Equal& eq, const H1& h1, const H2& h2, const default_ranged_hash&)
+			: mExtractKey(ex), mEqual(eq), m_h1(h1), m_h2(h2) { }
+
+		hash_code_t get_hash_code(const Key& key) const
+			{ return (hash_code_t)m_h1(key); }
+
+		bucket_index_t bucket_index(hash_code_t c, uint32_t nBucketCount) const
+			{ return (bucket_index_t)m_h2(c, nBucketCount); }
+
+		bucket_index_t bucket_index(const Key&, hash_code_t c, uint32_t nBucketCount) const
+			{ return (bucket_index_t)m_h2(c, nBucketCount); }
+
+		bucket_index_t bucket_index(const node_type* pNode, uint32_t nBucketCount) const
+			{ return (bucket_index_t)m_h2((hash_code_t)m_h1(mExtractKey(pNode->mValue)), nBucketCount); }
+
+		bool compare(const Key& key, hash_code_t, node_type* pNode) const
+			{ return mEqual(key, mExtractKey(pNode->mValue)); }
+
+		void copy_code(node_type*, const node_type*) const
+			{ } // Nothing to do.
+
+		void set_code(node_type*, hash_code_t) const
+			{ } // Nothing to do.
+
+		void base_swap(hash_code_base& x)
+		{
+			eastl::swap(mExtractKey, x.mExtractKey);
+			eastl::swap(mEqual,      x.mEqual);
+			eastl::swap(m_h1,        x.m_h1);
+			eastl::swap(m_h2,        x.m_h2);
+		}
+
+	}; // hash_code_base
+
+
+
+	/// hash_code_base
+	///
+	/// Specialization: hash function and range-hashing function, 
+	/// caching hash codes. H is provided but ignored. 
+	/// Provides typedef and accessor required by TR1.
+	///
+	template <typename Key, typename Value, typename ExtractKey, typename Equal, typename H1, typename H2>
+	struct hash_code_base<Key, Value, ExtractKey, Equal, H1, H2, default_ranged_hash, true>
+	{
+	protected:
+		ExtractKey  mExtractKey;
+		Equal       mEqual;
+		H1          m_h1;
+		H2          m_h2;
+
+	public:
+		typedef H1 hasher;
+
+		H1 hash_function() const
+			{ return m_h1; }
+
+		Equal equal_function() const // Deprecated. Use key_eq() instead, as key_eq is what the new C++ standard 
+			{ return mEqual; }       // has specified in its hashtable (unordered_*) proposal.
+
+		const Equal& key_eq() const
+			{ return mEqual; }
+
+		Equal& key_eq()
+			{ return mEqual; }
+
+	protected:
+		typedef uint32_t hash_code_t;
+		typedef uint32_t bucket_index_t;
+		typedef hash_node<Value, true> node_type;
+
+		hash_code_base(const ExtractKey& ex, const Equal& eq, const H1& h1, const H2& h2, const default_ranged_hash&)
+			: mExtractKey(ex), mEqual(eq), m_h1(h1), m_h2(h2) { }
+
+		hash_code_t get_hash_code(const Key& key) const
+			{ return (hash_code_t)m_h1(key); }
+
+		bucket_index_t bucket_index(hash_code_t c, uint32_t nBucketCount) const
+			{ return (bucket_index_t)m_h2(c, nBucketCount); }
+
+		bucket_index_t bucket_index(const Key&, hash_code_t c, uint32_t nBucketCount) const
+			{ return (bucket_index_t)m_h2(c, nBucketCount); }
+
+		bucket_index_t bucket_index(const node_type* pNode, uint32_t nBucketCount) const
+			{ return (bucket_index_t)m_h2((uint32_t)pNode->mnHashCode, nBucketCount); }
+
+		bool compare(const Key& key, hash_code_t c, node_type* pNode) const
+			{ return (pNode->mnHashCode == c) && mEqual(key, mExtractKey(pNode->mValue)); }
+
+		void copy_code(node_type* pDest, const node_type* pSource) const
+			{ pDest->mnHashCode = pSource->mnHashCode; }
+
+		void set_code(node_type* pDest, hash_code_t c) const
+			{ pDest->mnHashCode = c; }
+
+		void base_swap(hash_code_base& x)
+		{
+			eastl::swap(mExtractKey, x.mExtractKey);
+			eastl::swap(mEqual,      x.mEqual);
+			eastl::swap(m_h1,        x.m_h1);
+			eastl::swap(m_h2,        x.m_h2);
+		}
+
+	}; // hash_code_base
+
+
+
+
+
+	///////////////////////////////////////////////////////////////////////////
+	/// hashtable
+	///
+	/// Key and Value: arbitrary CopyConstructible types.
+	///
+	/// ExtractKey: function object that takes a object of type Value
+	/// and returns a value of type Key.
+	///
+	/// Equal: function object that takes two objects of type k and returns
+	/// a bool-like value that is true if the two objects are considered equal.
+	///
+	/// H1: a hash function. A unary function object with argument type
+	/// Key and result type size_t. Return values should be distributed
+	/// over the entire range [0, numeric_limits<uint32_t>::max()].
+	///
+	/// H2: a range-hashing function (in the terminology of Tavori and
+	/// Dreizin). This is a function which takes the output of H1 and 
+	/// converts it to the range of [0, n]. Usually it merely takes the
+	/// output of H1 and mods it to n.
+	///
+	/// H: a ranged hash function (Tavori and Dreizin). This is merely
+	/// a class that combines the functionality of H1 and H2 together, 
+	/// possibly in some way that is somehow improved over H1 and H2
+	/// It is a binary function whose argument types are Key and size_t 
+	/// and whose result type is uint32_t. Given arguments k and n, the 
+	/// return value is in the range [0, n). Default: h(k, n) = h2(h1(k), n). 
+	/// If H is anything other than the default, H1 and H2 are ignored, 
+	/// as H is thus overriding H1 and H2.
+	///
+	/// RehashPolicy: Policy class with three members, all of which govern
+	/// the bucket count. nBucket(n) returns a bucket count no smaller
+	/// than n. GetBucketCount(n) returns a bucket count appropriate
+	/// for an element count of n. GetRehashRequired(nBucketCount, nElementCount, nElementAdd)
+	/// determines whether, if the current bucket count is nBucket and the
+	/// current element count is nElementCount, we need to increase the bucket
+	/// count. If so, returns pair(true, n), where n is the new
+	/// bucket count. If not, returns pair(false, <anything>).
+	///
+	/// Currently it is hard-wired that the number of buckets never
+	/// shrinks. Should we allow RehashPolicy to change that?
+	///
+	/// bCacheHashCode: true if we store the value of the hash
+	/// function along with the value. This is a time-space tradeoff.
+	/// Storing it may improve lookup speed by reducing the number of 
+	/// times we need to call the Equal function.
+	///
+	/// bMutableIterators: true if hashtable::iterator is a mutable
+	/// iterator, false if iterator and const_iterator are both const 
+	/// iterators. This is true for hash_map and hash_multimap,
+	/// false for hash_set and hash_multiset.
+	///
+	/// bUniqueKeys: true if the return value of hashtable::count(k)
+	/// is always at most one, false if it may be an arbitrary number. 
+	/// This is true for hash_set and hash_map and is false for 
+	/// hash_multiset and hash_multimap.
+	///
+	///////////////////////////////////////////////////////////////////////
+	/// Note:
+	/// If you want to make a hashtable never increase its bucket usage,
+	/// call set_max_load_factor with a very high value such as 100000.f.
+	///
+	/// find_as
+	/// In order to support the ability to have a hashtable of strings but
+	/// be able to do efficiently lookups via char pointers (i.e. so they 
+	/// aren't converted to string objects), we provide the find_as 
+	/// function. This function allows you to do a find with a key of a
+	/// type other than the hashtable key type. See the find_as function
+	/// for more documentation on this.
+	///
+	/// find_by_hash
+	/// In the interest of supporting fast operations wherever possible,
+	/// we provide a find_by_hash function which finds a node using its
+	/// hash code.  This is useful for cases where the node's hash is
+	/// already known, allowing us to avoid a redundant hash operation
+	/// in the normal find path.
+	/// 
+	template <typename Key, typename Value, typename Allocator, typename ExtractKey, 
+			  typename Equal, typename H1, typename H2, typename H, 
+			  typename RehashPolicy, bool bCacheHashCode, bool bMutableIterators, bool bUniqueKeys>
+	class hashtable
+		:   public rehash_base<RehashPolicy, hashtable<Key, Value, Allocator, ExtractKey, Equal, H1, H2, H, RehashPolicy, bCacheHashCode, bMutableIterators, bUniqueKeys> >,
+			public hash_code_base<Key, Value, ExtractKey, Equal, H1, H2, H, bCacheHashCode>
+	{
+	public:
+		typedef Key                                                                                 key_type;
+		typedef Value                                                                               value_type;
+		typedef typename ExtractKey::result_type                                                    mapped_type;
+		typedef hash_code_base<Key, Value, ExtractKey, Equal, H1, H2, H, bCacheHashCode>            hash_code_base_type;
+		typedef typename hash_code_base_type::hash_code_t                                           hash_code_t;
+		typedef Allocator                                                                           allocator_type;
+		typedef Equal                                                                               key_equal;
+		typedef ptrdiff_t                                                                           difference_type;
+		typedef eastl_size_t                                                                        size_type;     // See config.h for the definition of eastl_size_t, which defaults to size_t.
+		typedef value_type&                                                                         reference;
+		typedef const value_type&                                                                   const_reference;
+		typedef node_iterator<value_type, !bMutableIterators, bCacheHashCode>                       local_iterator;
+		typedef node_iterator<value_type, true,               bCacheHashCode>                       const_local_iterator;
+		typedef hashtable_iterator<value_type, !bMutableIterators, bCacheHashCode>                  iterator;
+		typedef hashtable_iterator<value_type, true,               bCacheHashCode>                  const_iterator;
+		typedef hash_node<value_type, bCacheHashCode>                                               node_type;
+		typedef typename type_select<bUniqueKeys, eastl::pair<iterator, bool>, iterator>::type      insert_return_type;
+		typedef hashtable<Key, Value, Allocator, ExtractKey, Equal, H1, H2, H, 
+							RehashPolicy, bCacheHashCode, bMutableIterators, bUniqueKeys>           this_type;
+		typedef RehashPolicy                                                                        rehash_policy_type;
+		typedef ExtractKey                                                                          extract_key_type;
+		typedef H1                                                                                  h1_type;
+		typedef H2                                                                                  h2_type;
+		typedef H                                                                                   h_type;
+		typedef integral_constant<bool, bUniqueKeys>                                                has_unique_keys_type;
+
+		using hash_code_base_type::key_eq;
+		using hash_code_base_type::hash_function;
+		using hash_code_base_type::mExtractKey;
+		using hash_code_base_type::get_hash_code;
+		using hash_code_base_type::bucket_index;
+		using hash_code_base_type::compare;
+		using hash_code_base_type::set_code;
+		using hash_code_base_type::copy_code;
+
+		static const bool kCacheHashCode = bCacheHashCode;
+
+		enum
+		{
+			// This enumeration is deprecated in favor of eastl::kHashtableAllocFlagBuckets.
+			kAllocFlagBuckets = eastl::kHashtableAllocFlagBuckets                  // Flag to allocator which indicates that we are allocating buckets and not nodes.
+		};
+
+	protected:
+		node_type**     mpBucketArray;
+		size_type       mnBucketCount;
+		size_type       mnElementCount;
+		RehashPolicy    mRehashPolicy;  // To do: Use base class optimization to make this go away.
+		allocator_type  mAllocator;     // To do: Use base class optimization to make this go away.
+
+	public:
+		hashtable(size_type nBucketCount, const H1&, const H2&, const H&, const Equal&, const ExtractKey&, 
+				  const allocator_type& allocator = EASTL_HASHTABLE_DEFAULT_ALLOCATOR);
+		
+		template <typename FowardIterator>
+		hashtable(FowardIterator first, FowardIterator last, size_type nBucketCount, 
+				  const H1&, const H2&, const H&, const Equal&, const ExtractKey&, 
+				  const allocator_type& allocator = EASTL_HASHTABLE_DEFAULT_ALLOCATOR); 
+		
+		hashtable(const hashtable& x);
+
+		// initializer_list ctor support is implemented in subclasses (e.g. hash_set).
+		// hashtable(initializer_list<value_type>, size_type nBucketCount, const H1&, const H2&, const H&, 
+		//           const Equal&, const ExtractKey&, const allocator_type& allocator = EASTL_HASHTABLE_DEFAULT_ALLOCATOR);
+
+		hashtable(this_type&& x);
+		hashtable(this_type&& x, const allocator_type& allocator);
+	   ~hashtable();
+
+		const allocator_type& get_allocator() const EA_NOEXCEPT;
+		allocator_type&       get_allocator() EA_NOEXCEPT;
+		void                  set_allocator(const allocator_type& allocator);
+
+		this_type& operator=(const this_type& x);
+		this_type& operator=(std::initializer_list<value_type> ilist);
+		this_type& operator=(this_type&& x);
+
+		void swap(this_type& x);
+
+		iterator begin() EA_NOEXCEPT
+		{
+			iterator i(mpBucketArray);
+			if(!i.mpNode)
+				i.increment_bucket();
+			return i;
+		}
+
+		const_iterator begin() const EA_NOEXCEPT
+		{
+			const_iterator i(mpBucketArray);
+			if(!i.mpNode)
+				i.increment_bucket();
+			return i;
+		}
+
+		const_iterator cbegin() const EA_NOEXCEPT
+			{ return begin(); }
+
+		iterator end() EA_NOEXCEPT
+			{ return iterator(mpBucketArray + mnBucketCount); }
+
+		const_iterator end() const EA_NOEXCEPT
+			{ return const_iterator(mpBucketArray + mnBucketCount); }
+
+		const_iterator cend() const EA_NOEXCEPT
+			{ return const_iterator(mpBucketArray + mnBucketCount); }
+
+		// Returns an iterator to the first item in bucket n.
+		local_iterator begin(size_type n) EA_NOEXCEPT
+			{ return local_iterator(mpBucketArray[n]); }
+
+		const_local_iterator begin(size_type n) const EA_NOEXCEPT
+			{ return const_local_iterator(mpBucketArray[n]); }
+
+		const_local_iterator cbegin(size_type n) const EA_NOEXCEPT
+			{ return const_local_iterator(mpBucketArray[n]); }
+
+		// Returns an iterator to the last item in a bucket returned by begin(n).
+		local_iterator end(size_type) EA_NOEXCEPT
+			{ return local_iterator(NULL); }
+
+		const_local_iterator end(size_type) const EA_NOEXCEPT
+			{ return const_local_iterator(NULL); }
+
+		const_local_iterator cend(size_type) const EA_NOEXCEPT
+			{ return const_local_iterator(NULL); }
+
+		bool empty() const EA_NOEXCEPT
+			{ return mnElementCount == 0; }
+
+		size_type size() const EA_NOEXCEPT
+			{ return mnElementCount; }
+
+		size_type bucket_count() const EA_NOEXCEPT
+			{ return mnBucketCount; }
+
+		size_type bucket_size(size_type n) const EA_NOEXCEPT
+			{ return (size_type)eastl::distance(begin(n), end(n)); }
+
+		//size_type bucket(const key_type& k) const EA_NOEXCEPT
+		//    { return bucket_index(k, (hash code here), (uint32_t)mnBucketCount); }
+
+		// Returns the ratio of element count to bucket count. A return value of 1 means 
+		// there's an optimal 1 bucket for each element.
+		float load_factor() const EA_NOEXCEPT
+			{ return (float)mnElementCount / (float)mnBucketCount; }
+
+		// Inherited from the base class.
+		// Returns the max load factor, which is the load factor beyond
+		// which we rebuild the container with a new bucket count.
+		// get_max_load_factor comes from rehash_base.
+		//    float get_max_load_factor() const;
+
+		// Inherited from the base class.
+		// If you want to make the hashtable never rehash (resize), 
+		// set the max load factor to be a very high number (e.g. 100000.f).
+		// set_max_load_factor comes from rehash_base.
+		//    void set_max_load_factor(float fMaxLoadFactor);
+
+		/// Generalization of get_max_load_factor. This is an extension that's
+		/// not present in C++ hash tables (unordered containers).
+		const rehash_policy_type& rehash_policy() const EA_NOEXCEPT
+			{ return mRehashPolicy; }
+
+		/// Generalization of set_max_load_factor. This is an extension that's
+		/// not present in C++ hash tables (unordered containers).
+		void rehash_policy(const rehash_policy_type& rehashPolicy);
+
+		template <class... Args>
+		insert_return_type emplace(Args&&... args);
+
+		template <class... Args>
+		iterator emplace_hint(const_iterator position, Args&&... args);
+
+		template <class... Args> insert_return_type try_emplace(const key_type& k, Args&&... args);
+		template <class... Args> insert_return_type try_emplace(key_type&& k, Args&&... args);
+		template <class... Args> iterator           try_emplace(const_iterator position, const key_type& k, Args&&... args);
+		template <class... Args> iterator           try_emplace(const_iterator position, key_type&& k, Args&&... args);
+
+		insert_return_type                     insert(const value_type& value);
+		insert_return_type                     insert(value_type&& otherValue);
+		iterator                               insert(const_iterator hint, const value_type& value);
+		iterator                               insert(const_iterator hint, value_type&& value);
+		void                                   insert(std::initializer_list<value_type> ilist);
+		template <typename InputIterator> void insert(InputIterator first, InputIterator last);
+	  //insert_return_type                     insert(node_type&& nh);
+	  //iterator                               insert(const_iterator hint, node_type&& nh);
+
+		// This overload attempts to mitigate the overhead associated with mismatched cv-quality elements of
+		// the hashtable pair. It can avoid copy overhead because it will perfect forward the user provided pair types
+		// until it can constructed in-place in the allocated hashtable node.  
+		//
+		// Ideally we would remove this overload as it deprecated and removed in C++17 but it currently causes
+		// performance regressions for hashtables with complex keys (keys that allocate resources).
+		template <class P,
+		          class = typename eastl::enable_if_t<
+					#if EASTL_ENABLE_PAIR_FIRST_ELEMENT_CONSTRUCTOR
+		              !eastl::is_same_v<eastl::decay_t<P>, key_type> &&
+					#endif
+		              !eastl::is_literal_type_v<P> &&
+		              eastl::is_constructible_v<value_type, P&&>>>
+		insert_return_type insert(P&& otherValue);
+
+		// Non-standard extension
+		template <class P> // See comments below for the const value_type& equivalent to this function.
+		insert_return_type insert(hash_code_t c, node_type* pNodeNew, P&& otherValue);
+
+		// We provide a version of insert which lets the caller directly specify the hash value and 
+		// a potential node to insert if needed. This allows for less thread contention in the case
+		// of a thread-shared hash table that's accessed during a mutex lock, because the hash calculation
+		// and node creation is done outside of the lock. If pNodeNew is supplied by the user (i.e. non-NULL) 
+		// then it must be freeable via the hash table's allocator. If the return value is true then this function 
+		// took over ownership of pNodeNew, else pNodeNew is still owned by the caller to free or to pass 
+		// to another call to insert. pNodeNew need not be assigned the value by the caller, as the insert
+		// function will assign value to pNodeNew upon insertion into the hash table. pNodeNew may be 
+		// created by the user with the allocate_uninitialized_node function, and freed by the free_uninitialized_node function.
+		insert_return_type insert(hash_code_t c, node_type* pNodeNew, const value_type& value);
+
+		template <class M> eastl::pair<iterator, bool> insert_or_assign(const key_type& k, M&& obj);
+		template <class M> eastl::pair<iterator, bool> insert_or_assign(key_type&& k, M&& obj);
+		template <class M> iterator                    insert_or_assign(const_iterator hint, const key_type& k, M&& obj);
+		template <class M> iterator                    insert_or_assign(const_iterator hint, key_type&& k, M&& obj);
+
+		// Used to allocate and free memory used by insert(const value_type& value, hash_code_t c, node_type* pNodeNew).
+		node_type* allocate_uninitialized_node();
+		void       free_uninitialized_node(node_type* pNode);
+
+		iterator         erase(const_iterator position);
+		iterator         erase(const_iterator first, const_iterator last);
+		size_type        erase(const key_type& k);
+
+		void clear();
+		void clear(bool clearBuckets);                  // If clearBuckets is true, we free the bucket memory and set the bucket count back to the newly constructed count.
+		void reset_lose_memory() EA_NOEXCEPT;           // This is a unilateral reset to an initially empty state. No destructors are called, no deallocation occurs.
+		void rehash(size_type nBucketCount);
+		void reserve(size_type nElementCount);
+
+		iterator       find(const key_type& key);
+		const_iterator find(const key_type& key) const;
+
+		/// Implements a find whereby the user supplies a comparison of a different type
+		/// than the hashtable value_type. A useful case of this is one whereby you have
+		/// a container of string objects but want to do searches via passing in char pointers.
+		/// The problem is that without this kind of find, you need to do the expensive operation
+		/// of converting the char pointer to a string so it can be used as the argument to the 
+		/// find function.
+		///
+		/// Example usage (namespaces omitted for brevity):
+		///     hash_set<string> hashSet;
+		///     hashSet.find_as("hello");    // Use default hash and compare.
+		///
+		/// Example usage (note that the predicate uses string as first type and char* as second):
+		///     hash_set<string> hashSet;
+		///     hashSet.find_as("hello", hash<char*>(), equal_to_2<string, char*>());
+		///
+		template <typename U, typename UHash, typename BinaryPredicate>
+		iterator       find_as(const U& u, UHash uhash, BinaryPredicate predicate);
+
+		template <typename U, typename UHash, typename BinaryPredicate>
+		const_iterator find_as(const U& u, UHash uhash, BinaryPredicate predicate) const;
+
+		template <typename U>
+		iterator       find_as(const U& u);
+
+		template <typename U>
+		const_iterator find_as(const U& u) const;
+
+		// Note: find_by_hash and find_range_by_hash both perform a search based on a hash value.
+		// It is important to note that multiple hash values may map to the same hash bucket, so
+		// it would be incorrect to assume all items returned match the hash value that
+		// was searched for.
+
+		/// Implements a find whereby the user supplies the node's hash code.
+		/// It returns an iterator to the first element that matches the given hash. However, there may be multiple elements that match the given hash.
+
+		template<typename HashCodeT>
+		ENABLE_IF_HASHCODE_EASTLSIZET(HashCodeT, iterator) find_by_hash(HashCodeT c)
+		{
+			EASTL_CT_ASSERT_MSG(bCacheHashCode,
+				"find_by_hash(hash_code_t c) is designed to avoid recomputing hashes, "
+				"so it requires cached hash codes.  Consider setting template parameter "
+				"bCacheHashCode to true or using find_by_hash(const key_type& k, hash_code_t c) instead.");
+
+			const size_type n = (size_type)bucket_index(c, (uint32_t)mnBucketCount);
+
+			node_type* const pNode = DoFindNode(mpBucketArray[n], c);
+
+			return pNode ? iterator(pNode, mpBucketArray + n) :
+						   iterator(mpBucketArray + mnBucketCount); // iterator(mpBucketArray + mnBucketCount) == end()
+		}
+
+		template<typename HashCodeT>
+		ENABLE_IF_HASHCODE_EASTLSIZET(HashCodeT, const_iterator) find_by_hash(HashCodeT c) const
+		{
+			EASTL_CT_ASSERT_MSG(bCacheHashCode,
+								"find_by_hash(hash_code_t c) is designed to avoid recomputing hashes, "
+								"so it requires cached hash codes.  Consider setting template parameter "
+								"bCacheHashCode to true or using find_by_hash(const key_type& k, hash_code_t c) instead.");
+
+			const size_type n = (size_type)bucket_index(c, (uint32_t)mnBucketCount);
+
+			node_type* const pNode = DoFindNode(mpBucketArray[n], c);
+
+			return pNode ?
+					   const_iterator(pNode, mpBucketArray + n) :
+					   const_iterator(mpBucketArray + mnBucketCount); // iterator(mpBucketArray + mnBucketCount) == end()
+		}
+
+		iterator find_by_hash(const key_type& k, hash_code_t c)
+		{
+			const size_type n = (size_type)bucket_index(c, (uint32_t)mnBucketCount);
+
+			node_type* const pNode = DoFindNode(mpBucketArray[n], k, c);
+			return pNode ? iterator(pNode, mpBucketArray + n) : iterator(mpBucketArray + mnBucketCount); // iterator(mpBucketArray + mnBucketCount) == end()
+		}
+
+		const_iterator find_by_hash(const key_type& k, hash_code_t c) const
+		{
+			const size_type n = (size_type)bucket_index(c, (uint32_t)mnBucketCount);
+
+			node_type* const pNode = DoFindNode(mpBucketArray[n], k, c);
+			return pNode ? const_iterator(pNode, mpBucketArray + n) : const_iterator(mpBucketArray + mnBucketCount); // iterator(mpBucketArray + mnBucketCount) == end()
+		}
+
+		// Returns a pair that allows iterating over all nodes in a hash bucket
+		//   first in the pair returned holds the iterator for the beginning of the bucket,
+		//   second in the pair returned holds the iterator for the end of the bucket,
+		// If no bucket is found, both values in the pair are set to end().
+		//
+		// See also the note above.
+		eastl::pair<iterator, iterator> find_range_by_hash(hash_code_t c);
+		eastl::pair<const_iterator, const_iterator> find_range_by_hash(hash_code_t c) const;
+
+		size_type count(const key_type& k) const EA_NOEXCEPT;
+
+		eastl::pair<iterator, iterator>             equal_range(const key_type& k);
+		eastl::pair<const_iterator, const_iterator> equal_range(const key_type& k) const;
+
+		bool validate() const;
+		int  validate_iterator(const_iterator i) const;
+
+	protected:
+		// We must remove one of the 'DoGetResultIterator' overloads from the overload-set (via SFINAE) because both can
+		// not compile successfully at the same time. The 'bUniqueKeys' template parameter chooses at compile-time the
+		// type of 'insert_return_type' between a pair<iterator,bool> and a raw iterator. We must pick between the two
+		// overloads that unpacks the iterator from the pair or simply passes the provided iterator to the caller based
+		// on the class template parameter.
+		template <typename BoolConstantT>
+		iterator DoGetResultIterator(BoolConstantT,
+		                             const insert_return_type& irt,
+		                             ENABLE_IF_TRUETYPE(BoolConstantT) = nullptr) const EA_NOEXCEPT
+		{
+			return irt.first;
+		}
+
+		template <typename BoolConstantT>
+		iterator DoGetResultIterator(BoolConstantT,
+		                             const insert_return_type& irt,
+		                             DISABLE_IF_TRUETYPE(BoolConstantT) = nullptr) const EA_NOEXCEPT
+		{
+			return irt;
+		}
+
+		node_type*  DoAllocateNodeFromKey(const key_type& key);
+		node_type*  DoAllocateNodeFromKey(key_type&& key);
+		void        DoFreeNode(node_type* pNode);
+		void        DoFreeNodes(node_type** pBucketArray, size_type);
+
+		node_type** DoAllocateBuckets(size_type n);
+		void        DoFreeBuckets(node_type** pBucketArray, size_type n);
+
+		template <typename BoolConstantT, class... Args, ENABLE_IF_TRUETYPE(BoolConstantT) = nullptr>
+		eastl::pair<iterator, bool> DoInsertValue(BoolConstantT, Args&&... args);
+
+		template <typename BoolConstantT, class... Args, DISABLE_IF_TRUETYPE(BoolConstantT) = nullptr>
+		iterator DoInsertValue(BoolConstantT, Args&&... args);
+
+
+		template <typename BoolConstantT>
+		eastl::pair<iterator, bool> DoInsertValueExtra(BoolConstantT,
+													   const key_type& k,
+													   hash_code_t c,
+													   node_type* pNodeNew,
+													   value_type&& value,
+													   ENABLE_IF_TRUETYPE(BoolConstantT) = nullptr);
+
+		template <typename BoolConstantT>
+		eastl::pair<iterator, bool> DoInsertValue(BoolConstantT,
+												  value_type&& value,
+												  ENABLE_IF_TRUETYPE(BoolConstantT) = nullptr);
+
+		template <typename BoolConstantT>
+		iterator DoInsertValueExtra(BoolConstantT,
+									const key_type& k,
+									hash_code_t c,
+									node_type* pNodeNew,
+									value_type&& value,
+									DISABLE_IF_TRUETYPE(BoolConstantT) = nullptr);
+
+		template <typename BoolConstantT>
+		iterator DoInsertValue(BoolConstantT, value_type&& value, DISABLE_IF_TRUETYPE(BoolConstantT) = nullptr);
+
+
+		template <typename BoolConstantT>
+		eastl::pair<iterator, bool> DoInsertValueExtra(BoolConstantT,
+													   const key_type& k,
+													   hash_code_t c,
+													   node_type* pNodeNew,
+													   const value_type& value,
+													   ENABLE_IF_TRUETYPE(BoolConstantT) = nullptr);
+
+		template <typename BoolConstantT>
+		eastl::pair<iterator, bool> DoInsertValue(BoolConstantT,
+		                                          const value_type& value,
+		                                          ENABLE_IF_TRUETYPE(BoolConstantT) = nullptr);
+
+		template <typename BoolConstantT>
+		iterator DoInsertValueExtra(BoolConstantT,
+		                            const key_type& k,
+		                            hash_code_t c,
+		                            node_type* pNodeNew,
+		                            const value_type& value,
+		                            DISABLE_IF_TRUETYPE(BoolConstantT) = nullptr);
+
+		template <typename BoolConstantT>
+		iterator DoInsertValue(BoolConstantT, const value_type& value, DISABLE_IF_TRUETYPE(BoolConstantT) = nullptr);
+
+		template <class... Args>
+		node_type* DoAllocateNode(Args&&... args);
+		node_type* DoAllocateNode(value_type&& value);
+		node_type* DoAllocateNode(const value_type& value);
+
+		// DoInsertKey is supposed to get hash_code_t c  = get_hash_code(key).
+		// it is done in case application has it's own hashset/hashmap-like containter, where hash code is for some reason known prior the insert
+		// this allows to save some performance, especially with heavy hash functions
+		eastl::pair<iterator, bool> DoInsertKey(true_type, const key_type& key, hash_code_t c);
+		iterator                    DoInsertKey(false_type, const key_type& key, hash_code_t c);
+		eastl::pair<iterator, bool> DoInsertKey(true_type, key_type&& key, hash_code_t c);
+		iterator                    DoInsertKey(false_type, key_type&& key, hash_code_t c);
+
+		// We keep DoInsertKey overload without third parameter, for compatibility with older revisions of EASTL (3.12.07 and earlier)
+		// It used to call get_hash_code as a first call inside the DoInsertKey.
+		eastl::pair<iterator, bool> DoInsertKey(true_type, const key_type& key)  { return DoInsertKey(true_type(),  key, get_hash_code(key)); }
+		iterator                    DoInsertKey(false_type, const key_type& key) { return DoInsertKey(false_type(), key, get_hash_code(key)); }
+		eastl::pair<iterator, bool> DoInsertKey(true_type, key_type&& key)       { return DoInsertKey(true_type(),  eastl::move(key), get_hash_code(key)); }
+		iterator                    DoInsertKey(false_type, key_type&& key)      { return DoInsertKey(false_type(), eastl::move(key), get_hash_code(key)); }
+
+		void       DoRehash(size_type nBucketCount);
+		node_type* DoFindNode(node_type* pNode, const key_type& k, hash_code_t c) const;
+
+		template <typename T>
+		ENABLE_IF_HAS_HASHCODE(T, node_type) DoFindNode(T* pNode, hash_code_t c) const
+		{
+			for (; pNode; pNode = pNode->mpNext)
+			{
+				if (pNode->mnHashCode == c)
+					return pNode;
+			}
+			return NULL;
+		}
+
+		template <typename U, typename BinaryPredicate>
+		node_type* DoFindNodeT(node_type* pNode, const U& u, BinaryPredicate predicate) const;
+
+	}; // class hashtable
+
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// node_iterator_base
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename Value, bool bCacheHashCode>
+	inline bool operator==(const node_iterator_base<Value, bCacheHashCode>& a, const node_iterator_base<Value, bCacheHashCode>& b)
+		{ return a.mpNode == b.mpNode; }
+
+	template <typename Value, bool bCacheHashCode>
+	inline bool operator!=(const node_iterator_base<Value, bCacheHashCode>& a, const node_iterator_base<Value, bCacheHashCode>& b)
+		{ return a.mpNode != b.mpNode; }
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// hashtable_iterator_base
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename Value, bool bCacheHashCode>
+	inline bool operator==(const hashtable_iterator_base<Value, bCacheHashCode>& a, const hashtable_iterator_base<Value, bCacheHashCode>& b)
+		{ return a.mpNode == b.mpNode; }
+
+	template <typename Value, bool bCacheHashCode>
+	inline bool operator!=(const hashtable_iterator_base<Value, bCacheHashCode>& a, const hashtable_iterator_base<Value, bCacheHashCode>& b)
+		{ return a.mpNode != b.mpNode; }
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// hashtable
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>
+	::hashtable(size_type nBucketCount, const H1& h1, const H2& h2, const H& h,
+				const Eq& eq, const EK& ek, const allocator_type& allocator)
+		:   rehash_base<RP, hashtable>(),
+			hash_code_base<K, V, EK, Eq, H1, H2, H, bC>(ek, eq, h1, h2, h),
+			mnBucketCount(0),
+			mnElementCount(0),
+			mRehashPolicy(),
+			mAllocator(allocator)
+	{
+		if(nBucketCount < 2)  // If we are starting in an initially empty state, with no memory allocation done.
+			reset_lose_memory();
+		else // Else we are creating a potentially non-empty hashtable...
+		{
+			EASTL_ASSERT(nBucketCount < 10000000);
+			mnBucketCount = (size_type)mRehashPolicy.GetNextBucketCount((uint32_t)nBucketCount);
+			mpBucketArray = DoAllocateBuckets(mnBucketCount); // mnBucketCount will always be at least 2.
+		}
+	}
+
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	template <typename FowardIterator>
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::hashtable(FowardIterator first, FowardIterator last, size_type nBucketCount, 
+																	 const H1& h1, const H2& h2, const H& h, 
+																	 const Eq& eq, const EK& ek, const allocator_type& allocator)
+		:   rehash_base<rehash_policy_type, hashtable>(),
+			hash_code_base<key_type, value_type, extract_key_type, key_equal, h1_type, h2_type, h_type, kCacheHashCode>(ek, eq, h1, h2, h),
+		  //mnBucketCount(0), // This gets re-assigned below.
+			mnElementCount(0),
+			mRehashPolicy(),
+			mAllocator(allocator)
+	{
+		if(nBucketCount < 2)
+		{
+			const size_type nElementCount = (size_type)eastl::ht_distance(first, last);
+			mnBucketCount = (size_type)mRehashPolicy.GetBucketCount((uint32_t)nElementCount);
+		}
+		else
+		{
+			EASTL_ASSERT(nBucketCount < 10000000);
+			mnBucketCount = nBucketCount;
+		}
+
+		mpBucketArray = DoAllocateBuckets(mnBucketCount); // mnBucketCount will always be at least 2.
+
+		#if EASTL_EXCEPTIONS_ENABLED
+			try
+			{
+		#endif
+				for(; first != last; ++first)
+					insert(*first);
+		#if EASTL_EXCEPTIONS_ENABLED
+			}
+			catch(...)
+			{
+				clear();
+				DoFreeBuckets(mpBucketArray, mnBucketCount);
+				throw;
+			}
+		#endif
+	}
+
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::hashtable(const this_type& x)
+		:   rehash_base<RP, hashtable>(x),
+			hash_code_base<K, V, EK, Eq, H1, H2, H, bC>(x),
+			mnBucketCount(x.mnBucketCount),
+			mnElementCount(x.mnElementCount),
+			mRehashPolicy(x.mRehashPolicy),
+			mAllocator(x.mAllocator)
+	{
+		if(mnElementCount) // If there is anything to copy...
+		{
+			mpBucketArray = DoAllocateBuckets(mnBucketCount); // mnBucketCount will be at least 2.
+
+			#if EASTL_EXCEPTIONS_ENABLED
+				try
+				{
+			#endif
+					for(size_type i = 0; i < x.mnBucketCount; ++i)
+					{
+						node_type*  pNodeSource = x.mpBucketArray[i];
+						node_type** ppNodeDest  = mpBucketArray + i;
+
+						while(pNodeSource)
+						{
+							*ppNodeDest = DoAllocateNode(pNodeSource->mValue);
+							copy_code(*ppNodeDest, pNodeSource);
+							ppNodeDest = &(*ppNodeDest)->mpNext;
+							pNodeSource = pNodeSource->mpNext;
+						}
+					}
+			#if EASTL_EXCEPTIONS_ENABLED
+				}
+				catch(...)
+				{
+					clear();
+					DoFreeBuckets(mpBucketArray, mnBucketCount);
+					throw;
+				}
+			#endif
+		}
+		else
+		{
+			// In this case, instead of allocate memory and copy nothing from x, 
+			// we reset ourselves to a zero allocation state.
+			reset_lose_memory();
+		}
+	}
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::hashtable(this_type&& x)
+		:   rehash_base<RP, hashtable>(x),
+			hash_code_base<K, V, EK, Eq, H1, H2, H, bC>(x),
+			mnBucketCount(0),
+			mnElementCount(0),
+			mRehashPolicy(x.mRehashPolicy),
+			mAllocator(x.mAllocator)
+	{
+		reset_lose_memory(); // We do this here the same as we do it in the default ctor because it puts the container in a proper initial empty state. This code would be cleaner if we could rely on being able to use C++11 delegating constructors and just call the default ctor here.
+		swap(x);
+	}
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::hashtable(this_type&& x, const allocator_type& allocator)
+		:   rehash_base<RP, hashtable>(x),
+			hash_code_base<K, V, EK, Eq, H1, H2, H, bC>(x),
+			mnBucketCount(0),
+			mnElementCount(0),
+			mRehashPolicy(x.mRehashPolicy),
+			mAllocator(allocator)
+	{
+		reset_lose_memory(); // We do this here the same as we do it in the default ctor because it puts the container in a proper initial empty state. This code would be cleaner if we could rely on being able to use C++11 delegating constructors and just call the default ctor here.
+		swap(x); // swap will directly or indirectly handle the possibility that mAllocator != x.mAllocator.
+	}
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	inline const typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::allocator_type&
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::get_allocator() const EA_NOEXCEPT
+	{
+		return mAllocator;
+	}
+
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	inline typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::allocator_type&
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::get_allocator() EA_NOEXCEPT
+	{
+		return mAllocator;
+	}
+
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	inline void hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::set_allocator(const allocator_type& allocator)
+	{
+		mAllocator = allocator;
+	}
+
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	inline typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::this_type&
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::operator=(const this_type& x)
+	{
+		if(this != &x)
+		{
+			clear();
+
+			#if EASTL_ALLOCATOR_COPY_ENABLED
+				mAllocator = x.mAllocator;
+			#endif
+
+			insert(x.begin(), x.end());
+		}
+		return *this;
+	}
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	inline typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::this_type&
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::operator=(this_type&& x)
+	{
+		if(this != &x)
+		{
+			clear();        // To consider: Are we really required to clear here? x is going away soon and will clear itself in its dtor.
+			swap(x);        // member swap handles the case that x has a different allocator than our allocator by doing a copy.
+		}
+		return *this;
+	}
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	inline typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::this_type&
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::operator=(std::initializer_list<value_type> ilist)
+	{
+		// The simplest means of doing this is to clear and insert. There probably isn't a generic
+		// solution that's any more efficient without having prior knowledge of the ilist contents.
+		clear();
+		insert(ilist.begin(), ilist.end());
+		return *this;
+	}
+
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	inline hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::~hashtable()
+	{
+		clear();
+		DoFreeBuckets(mpBucketArray, mnBucketCount);
+	}
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::node_type*
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::DoAllocateNodeFromKey(const key_type& key)
+	{
+		node_type* const pNode = (node_type*)allocate_memory(mAllocator, sizeof(node_type), EASTL_ALIGN_OF(value_type), 0);
+		EASTL_ASSERT_MSG(pNode != nullptr, "the behaviour of eastl::allocators that return nullptr is not defined.");
+
+		#if EASTL_EXCEPTIONS_ENABLED
+			try
+			{
+		#endif
+				::new(eastl::addressof(pNode->mValue)) value_type(pair_first_construct, key);
+				pNode->mpNext = NULL;
+				return pNode;
+		#if EASTL_EXCEPTIONS_ENABLED
+			}
+			catch(...)
+			{
+				EASTLFree(mAllocator, pNode, sizeof(node_type));
+				throw;
+			}
+		#endif
+	}
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+				typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::node_type*
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::DoAllocateNodeFromKey(key_type&& key)
+	{
+		node_type* const pNode = (node_type*)allocate_memory(mAllocator, sizeof(node_type), EASTL_ALIGN_OF(value_type), 0);
+		EASTL_ASSERT_MSG(pNode != nullptr, "the behaviour of eastl::allocators that return nullptr is not defined.");
+
+		#if EASTL_EXCEPTIONS_ENABLED
+			try
+			{
+		#endif
+				::new(eastl::addressof(pNode->mValue)) value_type(pair_first_construct, eastl::move(key));
+				pNode->mpNext = NULL;
+				return pNode;
+		#if EASTL_EXCEPTIONS_ENABLED
+			}
+			catch(...)
+			{
+				EASTLFree(mAllocator, pNode, sizeof(node_type));
+				throw;
+			}
+		#endif
+	}
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	inline void hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::DoFreeNode(node_type* pNode)
+	{
+		pNode->~node_type();
+		EASTLFree(mAllocator, pNode, sizeof(node_type));
+	}
+
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	void hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::DoFreeNodes(node_type** pNodeArray, size_type n)
+	{
+		for(size_type i = 0; i < n; ++i)
+		{
+			node_type* pNode = pNodeArray[i];
+			while(pNode)
+			{
+				node_type* const pTempNode = pNode;
+				pNode = pNode->mpNext;
+				DoFreeNode(pTempNode);
+			}
+			pNodeArray[i] = NULL;
+		}
+	}
+
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::node_type**
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::DoAllocateBuckets(size_type n)
+	{
+		// We allocate one extra bucket to hold a sentinel, an arbitrary
+		// non-null pointer. Iterator increment relies on this.
+		EASTL_ASSERT(n > 1); // We reserve an mnBucketCount of 1 for the shared gpEmptyBucketArray.
+		EASTL_CT_ASSERT(kHashtableAllocFlagBuckets == 0x00400000); // Currently we expect this to be so, because the allocator has a copy of this enum.
+		node_type** const pBucketArray = (node_type**)EASTLAllocAlignedFlags(mAllocator, (n + 1) * sizeof(node_type*), EASTL_ALIGN_OF(node_type*), 0, kHashtableAllocFlagBuckets);
+		//eastl::fill(pBucketArray, pBucketArray + n, (node_type*)NULL);
+		memset(pBucketArray, 0, n * sizeof(node_type*));
+		pBucketArray[n] = reinterpret_cast<node_type*>((uintptr_t)~0);
+		return pBucketArray;
+	}
+
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	inline void hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::DoFreeBuckets(node_type** pBucketArray, size_type n)
+	{
+		// If n <= 1, then pBucketArray is from the shared gpEmptyBucketArray. We don't test 
+		// for pBucketArray == &gpEmptyBucketArray because one library have a different gpEmptyBucketArray
+		// than another but pass a hashtable to another. So we go by the size.
+		if(n > 1)
+			EASTLFree(mAllocator, pBucketArray, (n + 1) * sizeof(node_type*)); // '+1' because DoAllocateBuckets allocates nBucketCount + 1 buckets in order to have a NULL sentinel at the end.
+	}
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	void hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::swap(this_type& x)
+	{
+		hash_code_base<K, V, EK, Eq, H1, H2, H, bC>::base_swap(x); // hash_code_base has multiple implementations, so we let them handle the swap.
+		eastl::swap(mRehashPolicy, x.mRehashPolicy);
+		EASTL_MACRO_SWAP(node_type**, mpBucketArray, x.mpBucketArray);
+		eastl::swap(mnBucketCount, x.mnBucketCount);
+		eastl::swap(mnElementCount, x.mnElementCount);
+
+		if (mAllocator != x.mAllocator) // If allocators are not equivalent...
+		{
+			eastl::swap(mAllocator, x.mAllocator);
+		}
+	}
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	inline void hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::rehash_policy(const rehash_policy_type& rehashPolicy)
+	{
+		mRehashPolicy = rehashPolicy;
+
+		const size_type nBuckets = rehashPolicy.GetBucketCount((uint32_t)mnElementCount);
+
+		if(nBuckets > mnBucketCount)
+			DoRehash(nBuckets);
+	}
+
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	inline typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::iterator
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::find(const key_type& k)
+	{
+		const hash_code_t c = get_hash_code(k);
+		const size_type   n = (size_type)bucket_index(k, c, (uint32_t)mnBucketCount);
+
+		node_type* const pNode = DoFindNode(mpBucketArray[n], k, c);
+		return pNode ? iterator(pNode, mpBucketArray + n) : iterator(mpBucketArray + mnBucketCount); // iterator(mpBucketArray + mnBucketCount) == end()
+	}
+
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	inline typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::const_iterator
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::find(const key_type& k) const
+	{
+		const hash_code_t c = get_hash_code(k);
+		const size_type   n = (size_type)bucket_index(k, c, (uint32_t)mnBucketCount);
+
+		node_type* const pNode = DoFindNode(mpBucketArray[n], k, c);
+		return pNode ? const_iterator(pNode, mpBucketArray + n) : const_iterator(mpBucketArray + mnBucketCount); // iterator(mpBucketArray + mnBucketCount) == end()
+	}
+
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	template <typename U, typename UHash, typename BinaryPredicate>
+	inline typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::iterator
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::find_as(const U& other, UHash uhash, BinaryPredicate predicate)
+	{
+		const hash_code_t c = (hash_code_t)uhash(other);
+		const size_type   n = (size_type)(c % mnBucketCount); // This assumes we are using the mod range policy.
+
+		node_type* const pNode = DoFindNodeT(mpBucketArray[n], other, predicate);
+		return pNode ? iterator(pNode, mpBucketArray + n) : iterator(mpBucketArray + mnBucketCount); // iterator(mpBucketArray + mnBucketCount) == end()
+	}
+
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	template <typename U, typename UHash, typename BinaryPredicate>
+	inline typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::const_iterator
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::find_as(const U& other, UHash uhash, BinaryPredicate predicate) const
+	{
+		const hash_code_t c = (hash_code_t)uhash(other);
+		const size_type   n = (size_type)(c % mnBucketCount); // This assumes we are using the mod range policy.
+
+		node_type* const pNode = DoFindNodeT(mpBucketArray[n], other, predicate);
+		return pNode ? const_iterator(pNode, mpBucketArray + n) : const_iterator(mpBucketArray + mnBucketCount); // iterator(mpBucketArray + mnBucketCount) == end()
+	}
+
+
+	/// hashtable_find
+	///
+	/// Helper function that defaults to using hash<U> and equal_to_2<T, U>.
+	/// This makes it so that by default you don't need to provide these.
+	/// Note that the default hash functions may not be what you want, though.
+	///
+	/// Example usage. Instead of this:
+	///     hash_set<string> hashSet;
+	///     hashSet.find("hello", hash<char*>(), equal_to_2<string, char*>());
+	///
+	/// You can use this:
+	///     hash_set<string> hashSet;
+	///     hashtable_find(hashSet, "hello");
+	///
+	template <typename H, typename U>
+	inline typename H::iterator hashtable_find(H& hashTable, U u)
+		{ return hashTable.find_as(u, eastl::hash<U>(), eastl::equal_to_2<const typename H::key_type, U>()); }
+
+	template <typename H, typename U>
+	inline typename H::const_iterator hashtable_find(const H& hashTable, U u)
+		{ return hashTable.find_as(u, eastl::hash<U>(), eastl::equal_to_2<const typename H::key_type, U>()); }
+
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	template <typename U>
+	inline typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::iterator
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::find_as(const U& other)
+		{ return eastl::hashtable_find(*this, other); }
+		// VC++ doesn't appear to like the following, though it seems correct to me.
+		// So we implement the workaround above until we can straighten this out.
+		//{ return find_as(other, eastl::hash<U>(), eastl::equal_to_2<const key_type, U>()); }
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	template <typename U>
+	inline typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::const_iterator
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::find_as(const U& other) const
+		{ return eastl::hashtable_find(*this, other); }
+		// VC++ doesn't appear to like the following, though it seems correct to me.
+		// So we implement the workaround above until we can straighten this out.
+		//{ return find_as(other, eastl::hash<U>(), eastl::equal_to_2<const key_type, U>()); }
+
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq, 
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	eastl::pair<typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::const_iterator,
+				typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::const_iterator>
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::find_range_by_hash(hash_code_t c) const
+	{
+		const size_type start = (size_type)bucket_index(c, (uint32_t)mnBucketCount);
+		node_type* const pNodeStart = mpBucketArray[start];
+
+		if (pNodeStart)
+		{
+			eastl::pair<const_iterator, const_iterator> pair(const_iterator(pNodeStart, mpBucketArray + start), 
+															 const_iterator(pNodeStart, mpBucketArray + start));
+			pair.second.increment_bucket();
+			return pair;
+		}
+
+		return eastl::pair<const_iterator, const_iterator>(const_iterator(mpBucketArray + mnBucketCount),
+														   const_iterator(mpBucketArray + mnBucketCount));
+	}
+
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq, 
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	eastl::pair<typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::iterator,
+				typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::iterator>
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::find_range_by_hash(hash_code_t c)
+	{
+		const size_type start = (size_type)bucket_index(c, (uint32_t)mnBucketCount);
+		node_type* const pNodeStart = mpBucketArray[start];
+
+		if (pNodeStart)
+		{
+			eastl::pair<iterator, iterator> pair(iterator(pNodeStart, mpBucketArray + start), 
+												 iterator(pNodeStart, mpBucketArray + start));
+			pair.second.increment_bucket();
+			return pair;
+
+		}
+
+		return eastl::pair<iterator, iterator>(iterator(mpBucketArray + mnBucketCount),
+											   iterator(mpBucketArray + mnBucketCount));
+	}
+
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::size_type
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::count(const key_type& k) const EA_NOEXCEPT
+	{
+		const hash_code_t c      = get_hash_code(k);
+		const size_type   n      = (size_type)bucket_index(k, c, (uint32_t)mnBucketCount);
+		size_type         result = 0;
+
+		// To do: Make a specialization for bU (unique keys) == true and take 
+		// advantage of the fact that the count will always be zero or one in that case. 
+		for(node_type* pNode = mpBucketArray[n]; pNode; pNode = pNode->mpNext)
+		{
+			if(compare(k, c, pNode))
+				++result;
+		}
+		return result;
+	}
+
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	eastl::pair<typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::iterator,
+				typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::iterator>
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::equal_range(const key_type& k)
+	{
+		const hash_code_t c     = get_hash_code(k);
+		const size_type   n     = (size_type)bucket_index(k, c, (uint32_t)mnBucketCount);
+		node_type**       head  = mpBucketArray + n;
+		node_type*        pNode = DoFindNode(*head, k, c);
+
+		if(pNode)
+		{
+			node_type* p1 = pNode->mpNext;
+
+			for(; p1; p1 = p1->mpNext)
+			{
+				if(!compare(k, c, p1))
+					break;
+			}
+
+			iterator first(pNode, head);
+			iterator last(p1, head);
+
+			if(!p1)
+				last.increment_bucket();
+
+			return eastl::pair<iterator, iterator>(first, last);
+		}
+
+		return eastl::pair<iterator, iterator>(iterator(mpBucketArray + mnBucketCount),  // iterator(mpBucketArray + mnBucketCount) == end()
+											   iterator(mpBucketArray + mnBucketCount));
+	}
+
+
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	eastl::pair<typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::const_iterator,
+				typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::const_iterator>
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::equal_range(const key_type& k) const
+	{
+		const hash_code_t c     = get_hash_code(k);
+		const size_type   n     = (size_type)bucket_index(k, c, (uint32_t)mnBucketCount);
+		node_type**       head  = mpBucketArray + n;
+		node_type*        pNode = DoFindNode(*head, k, c);
+
+		if(pNode)
+		{
+			node_type* p1 = pNode->mpNext;
+
+			for(; p1; p1 = p1->mpNext)
+			{
+				if(!compare(k, c, p1))
+					break;
+			}
+
+			const_iterator first(pNode, head);
+			const_iterator last(p1, head);
+
+			if(!p1)
+				last.increment_bucket();
+
+			return eastl::pair<const_iterator, const_iterator>(first, last);
+		}
+
+		return eastl::pair<const_iterator, const_iterator>(const_iterator(mpBucketArray + mnBucketCount),  // iterator(mpBucketArray + mnBucketCount) == end()
+														   const_iterator(mpBucketArray + mnBucketCount));
+	}
+
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	inline typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::node_type* 
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::DoFindNode(node_type* pNode, const key_type& k, hash_code_t c) const
+	{
+		for(; pNode; pNode = pNode->mpNext)
+		{
+			if(compare(k, c, pNode))
+				return pNode;
+		}
+		return NULL;
+	}
+
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	template <typename U, typename BinaryPredicate>
+	inline typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::node_type* 
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::DoFindNodeT(node_type* pNode, const U& other, BinaryPredicate predicate) const
+	{
+		for(; pNode; pNode = pNode->mpNext)
+		{
+			if(predicate(mExtractKey(pNode->mValue), other)) // Intentionally compare with key as first arg and other as second arg.
+				return pNode;
+		}
+		return NULL;
+	}
+
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	template <typename BoolConstantT, class... Args, ENABLE_IF_TRUETYPE(BoolConstantT)>
+	eastl::pair<typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::iterator, bool>
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::DoInsertValue(BoolConstantT, Args&&... args) // true_type means bUniqueKeys is true.
+	{
+		// Adds the value to the hash table if not already present. 
+		// If already present then the existing value is returned via an iterator/bool pair.
+
+		// We have a chicken-and-egg problem here. In order to know if and where to insert the value, we need to get the 
+		// hashtable key for the value. But we don't explicitly have a value argument, we have a templated Args&&... argument.
+		// We need the value_type in order to proceed, but that entails getting an instance of a value_type from the args.
+		// And it may turn out that the value is already present in the hashtable and we need to cancel the insertion, 
+		// despite having obtained a value_type to put into the hashtable. We have mitigated this problem somewhat by providing
+		// specializations of the insert function for const value_type& and value_type&&, and so the only time this function
+		// should get called is when args refers to arguments to construct a value_type.
+
+		node_type* const  pNodeNew = DoAllocateNode(eastl::forward<Args>(args)...);
+		const key_type&   k        = mExtractKey(pNodeNew->mValue);
+		const hash_code_t c        = get_hash_code(k);
+		size_type         n        = (size_type)bucket_index(k, c, (uint32_t)mnBucketCount);
+		node_type* const  pNode    = DoFindNode(mpBucketArray[n], k, c);
+
+		if(pNode == NULL) // If value is not present... add it.
+		{
+			const eastl::pair<bool, uint32_t> bRehash = mRehashPolicy.GetRehashRequired((uint32_t)mnBucketCount, (uint32_t)mnElementCount, (uint32_t)1);
+
+			set_code(pNodeNew, c); // This is a no-op for most hashtables.
+
+			#if EASTL_EXCEPTIONS_ENABLED
+				try
+				{
+			#endif
+					if(bRehash.first)
+					{
+						n = (size_type)bucket_index(k, c, (uint32_t)bRehash.second);
+						DoRehash(bRehash.second);
+					}
+
+					EASTL_ASSERT((uintptr_t)mpBucketArray != (uintptr_t)&gpEmptyBucketArray[0]);
+					pNodeNew->mpNext = mpBucketArray[n];
+					mpBucketArray[n] = pNodeNew;
+					++mnElementCount;
+
+					return eastl::pair<iterator, bool>(iterator(pNodeNew, mpBucketArray + n), true);
+			#if EASTL_EXCEPTIONS_ENABLED
+				}
+				catch(...)
+				{
+					DoFreeNode(pNodeNew);
+					throw;
+				}
+			#endif
+		}
+		else
+		{
+			// To do: We have an inefficiency to deal with here. We allocated a node above but we are freeing it here because
+			// it turned out it wasn't needed. But we needed to create the node in order to get the hashtable key for
+			// the node. One possible resolution is to create specializations: DoInsertValue(true_type, value_type&&) and 
+			// DoInsertValue(true_type, const value_type&) which don't need to create a node up front in order to get the 
+			// hashtable key. Probably most users would end up using these pathways instead of this Args... pathway.
+			// While we should considering handling this to-do item, a lot of the performance limitations of maps and sets 
+			// in practice is with finding elements rather than adding (potentially redundant) new elements.
+			DoFreeNode(pNodeNew);
+		}
+
+		return eastl::pair<iterator, bool>(iterator(pNode, mpBucketArray + n), false);
+	}
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	template <typename BoolConstantT, class... Args, DISABLE_IF_TRUETYPE(BoolConstantT)>
+	typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::iterator
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::DoInsertValue(BoolConstantT, Args&&... args) // false_type means bUniqueKeys is false.
+	{
+		const eastl::pair<bool, uint32_t> bRehash = mRehashPolicy.GetRehashRequired((uint32_t)mnBucketCount, (uint32_t)mnElementCount, (uint32_t)1);
+
+		if(bRehash.first)
+			DoRehash(bRehash.second);
+
+		node_type*        pNodeNew = DoAllocateNode(eastl::forward<Args>(args)...);
+		const key_type&   k        = mExtractKey(pNodeNew->mValue);
+		const hash_code_t c        = get_hash_code(k);
+		const size_type   n        = (size_type)bucket_index(k, c, (uint32_t)mnBucketCount);
+
+		set_code(pNodeNew, c); // This is a no-op for most hashtables.
+
+		// To consider: Possibly make this insertion not make equal elements contiguous.
+		// As it stands now, we insert equal values contiguously in the hashtable.
+		// The benefit is that equal_range can work in a sensible manner and that
+		// erase(value) can more quickly find equal values. The downside is that
+		// this insertion operation taking some extra time. How important is it to
+		// us that equal_range span all equal items? 
+		node_type* const pNodePrev = DoFindNode(mpBucketArray[n], k, c);
+
+		if(pNodePrev == NULL)
+		{
+			EASTL_ASSERT((void**)mpBucketArray != &gpEmptyBucketArray[0]);
+			pNodeNew->mpNext = mpBucketArray[n];
+			mpBucketArray[n] = pNodeNew;
+		}
+		else
+		{
+			pNodeNew->mpNext  = pNodePrev->mpNext;
+			pNodePrev->mpNext = pNodeNew;
+		}
+
+		++mnElementCount;
+
+		return iterator(pNodeNew, mpBucketArray + n);
+	}
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	template <class... Args>
+	typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::node_type*
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::DoAllocateNode(Args&&... args)
+	{
+		node_type* const pNode = (node_type*)allocate_memory(mAllocator, sizeof(node_type), EASTL_ALIGN_OF(value_type), 0);
+		EASTL_ASSERT_MSG(pNode != nullptr, "the behaviour of eastl::allocators that return nullptr is not defined.");
+
+		#if EASTL_EXCEPTIONS_ENABLED
+			try
+			{
+		#endif
+				::new(eastl::addressof(pNode->mValue)) value_type(eastl::forward<Args>(args)...);
+				pNode->mpNext = NULL;
+				return pNode;
+		#if EASTL_EXCEPTIONS_ENABLED
+			}
+			catch(...)
+			{
+				EASTLFree(mAllocator, pNode, sizeof(node_type));
+				throw;
+			}
+		#endif
+	}
+
+
+	////////////////////////////////////////////////////////////////////////////////////////////////////
+	// Note: The following insertion-related functions are nearly copies of the above three functions,
+	// but are for value_type&& and const value_type& arguments. It's useful for us to have the functions
+	// below, even when using a fully compliant C++11 compiler that supports the above functions. 
+	// The reason is because the specializations below are slightly more efficient because they can delay
+	// the creation of a node until it's known that it will be needed.
+	////////////////////////////////////////////////////////////////////////////////////////////////////
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	template <typename BoolConstantT>
+	eastl::pair<typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::iterator, bool>
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::DoInsertValueExtra(BoolConstantT, const key_type& k,
+		hash_code_t c, node_type* pNodeNew, value_type&& value, ENABLE_IF_TRUETYPE(BoolConstantT)) // true_type means bUniqueKeys is true.
+	{
+		// Adds the value to the hash table if not already present. 
+		// If already present then the existing value is returned via an iterator/bool pair.
+		size_type         n     = (size_type)bucket_index(k, c, (uint32_t)mnBucketCount);
+		node_type* const  pNode = DoFindNode(mpBucketArray[n], k, c);
+
+		if(pNode == NULL) // If value is not present... add it.
+		{
+			const eastl::pair<bool, uint32_t> bRehash = mRehashPolicy.GetRehashRequired((uint32_t)mnBucketCount, (uint32_t)mnElementCount, (uint32_t)1);
+
+			// Allocate the new node before doing the rehash so that we don't 
+			// do a rehash if the allocation throws.
+			#if EASTL_EXCEPTIONS_ENABLED
+				bool nodeAllocated;  // If exceptions are enabled then we we need to track if we allocated the node so we can free it in the catch block.
+			#endif
+
+			if(pNodeNew)
+			{
+				::new(eastl::addressof(pNodeNew->mValue)) value_type(eastl::move(value)); // It's expected that pNodeNew was allocated with allocate_uninitialized_node.
+				#if EASTL_EXCEPTIONS_ENABLED
+					nodeAllocated = false;
+				#endif
+			}
+			else
+			{
+				pNodeNew = DoAllocateNode(eastl::move(value));
+				#if EASTL_EXCEPTIONS_ENABLED
+					nodeAllocated = true;
+				#endif
+			}
+
+			set_code(pNodeNew, c); // This is a no-op for most hashtables.
+
+			#if EASTL_EXCEPTIONS_ENABLED
+				try
+				{
+			#endif
+					if(bRehash.first)
+					{
+						n = (size_type)bucket_index(k, c, (uint32_t)bRehash.second);
+						DoRehash(bRehash.second);
+					}
+
+					EASTL_ASSERT((uintptr_t)mpBucketArray != (uintptr_t)&gpEmptyBucketArray[0]);
+					pNodeNew->mpNext = mpBucketArray[n];
+					mpBucketArray[n] = pNodeNew;
+					++mnElementCount;
+
+					return eastl::pair<iterator, bool>(iterator(pNodeNew, mpBucketArray + n), true);
+			#if EASTL_EXCEPTIONS_ENABLED
+				}
+				catch(...)
+				{
+					if(nodeAllocated) // If we allocated the node within this function, free it. Else let the caller retain ownership of it.
+						DoFreeNode(pNodeNew);
+					throw;
+				}
+			#endif
+		}
+		// Else the value is already present, so don't add a new node. And don't free pNodeNew.
+
+		return eastl::pair<iterator, bool>(iterator(pNode, mpBucketArray + n), false);
+	}
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	template <typename BoolConstantT>
+	eastl::pair<typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::iterator, bool>
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::DoInsertValue(BoolConstantT, value_type&& value, ENABLE_IF_TRUETYPE(BoolConstantT)) // true_type means bUniqueKeys is true.
+	{
+		const key_type&   k = mExtractKey(value);
+		const hash_code_t c = get_hash_code(k);
+
+		return DoInsertValueExtra(true_type(), k, c, NULL, eastl::move(value));
+	}
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	template <typename BoolConstantT>
+	typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::iterator
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::DoInsertValueExtra(BoolConstantT, const key_type& k, hash_code_t c, node_type* pNodeNew, value_type&& value, 
+			DISABLE_IF_TRUETYPE(BoolConstantT)) // false_type means bUniqueKeys is false.
+	{
+		const eastl::pair<bool, uint32_t> bRehash = mRehashPolicy.GetRehashRequired((uint32_t)mnBucketCount, (uint32_t)mnElementCount, (uint32_t)1);
+
+		if(bRehash.first)
+			DoRehash(bRehash.second); // Note: We don't need to wrap this call with try/catch because there's nothing we would need to do in the catch.
+
+		const size_type n = (size_type)bucket_index(k, c, (uint32_t)mnBucketCount);
+
+		if(pNodeNew)
+			::new(eastl::addressof(pNodeNew->mValue)) value_type(eastl::move(value)); // It's expected that pNodeNew was allocated with allocate_uninitialized_node.
+		else
+			pNodeNew = DoAllocateNode(eastl::move(value));
+
+		set_code(pNodeNew, c); // This is a no-op for most hashtables.
+
+		// To consider: Possibly make this insertion not make equal elements contiguous.
+		// As it stands now, we insert equal values contiguously in the hashtable.
+		// The benefit is that equal_range can work in a sensible manner and that
+		// erase(value) can more quickly find equal values. The downside is that
+		// this insertion operation taking some extra time. How important is it to
+		// us that equal_range span all equal items? 
+		node_type* const pNodePrev = DoFindNode(mpBucketArray[n], k, c);
+
+		if(pNodePrev == NULL)
+		{
+			EASTL_ASSERT((void**)mpBucketArray != &gpEmptyBucketArray[0]);
+			pNodeNew->mpNext = mpBucketArray[n];
+			mpBucketArray[n] = pNodeNew;
+		}
+		else
+		{
+			pNodeNew->mpNext  = pNodePrev->mpNext;
+			pNodePrev->mpNext = pNodeNew;
+		}
+
+		++mnElementCount;
+
+		return iterator(pNodeNew, mpBucketArray + n);
+	}
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	template<typename BoolConstantT>
+	typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::iterator
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::DoInsertValue(BoolConstantT, value_type&& value, DISABLE_IF_TRUETYPE(BoolConstantT)) // false_type means bUniqueKeys is false.
+	{
+		const key_type&   k = mExtractKey(value);
+		const hash_code_t c = get_hash_code(k);
+
+		return DoInsertValueExtra(false_type(), k, c, NULL, eastl::move(value));
+	}
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::node_type*
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::DoAllocateNode(value_type&& value)
+	{
+		node_type* const pNode = (node_type*)allocate_memory(mAllocator, sizeof(node_type), EASTL_ALIGN_OF(value_type), 0);
+		EASTL_ASSERT_MSG(pNode != nullptr, "the behaviour of eastl::allocators that return nullptr is not defined.");
+
+		#if EASTL_EXCEPTIONS_ENABLED
+			try
+			{
+		#endif
+				::new(eastl::addressof(pNode->mValue)) value_type(eastl::move(value));
+				pNode->mpNext = NULL;
+				return pNode;
+		#if EASTL_EXCEPTIONS_ENABLED
+			}
+			catch(...)
+			{
+				EASTLFree(mAllocator, pNode, sizeof(node_type));
+				throw;
+			}
+		#endif
+	}
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	template<typename BoolConstantT>
+	eastl::pair<typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::iterator, bool>
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::DoInsertValueExtra(BoolConstantT, const key_type& k, hash_code_t c, node_type* pNodeNew, const value_type& value, 
+			ENABLE_IF_TRUETYPE(BoolConstantT)) // true_type means bUniqueKeys is true.
+	{
+		// Adds the value to the hash table if not already present. 
+		// If already present then the existing value is returned via an iterator/bool pair.
+		size_type         n     = (size_type)bucket_index(k, c, (uint32_t)mnBucketCount);
+		node_type* const  pNode = DoFindNode(mpBucketArray[n], k, c);
+
+		if(pNode == NULL) // If value is not present... add it.
+		{
+			const eastl::pair<bool, uint32_t> bRehash = mRehashPolicy.GetRehashRequired((uint32_t)mnBucketCount, (uint32_t)mnElementCount, (uint32_t)1);
+
+			// Allocate the new node before doing the rehash so that we don't 
+			// do a rehash if the allocation throws.
+			#if EASTL_EXCEPTIONS_ENABLED
+				bool nodeAllocated;  // If exceptions are enabled then we we need to track if we allocated the node so we can free it in the catch block.
+			#endif
+
+			if(pNodeNew)
+			{
+				::new(eastl::addressof(pNodeNew->mValue)) value_type(value); // It's expected that pNodeNew was allocated with allocate_uninitialized_node.
+				#if EASTL_EXCEPTIONS_ENABLED
+					nodeAllocated = false;
+				#endif
+			}
+			else
+			{
+				pNodeNew = DoAllocateNode(value);
+				#if EASTL_EXCEPTIONS_ENABLED
+					nodeAllocated = true;
+				#endif
+			}
+
+			set_code(pNodeNew, c); // This is a no-op for most hashtables.
+
+			#if EASTL_EXCEPTIONS_ENABLED
+				try
+				{
+			#endif
+					if(bRehash.first)
+					{
+						n = (size_type)bucket_index(k, c, (uint32_t)bRehash.second);
+						DoRehash(bRehash.second);
+					}
+
+					EASTL_ASSERT((uintptr_t)mpBucketArray != (uintptr_t)&gpEmptyBucketArray[0]);
+					pNodeNew->mpNext = mpBucketArray[n];
+					mpBucketArray[n] = pNodeNew;
+					++mnElementCount;
+
+					return eastl::pair<iterator, bool>(iterator(pNodeNew, mpBucketArray + n), true);
+			#if EASTL_EXCEPTIONS_ENABLED
+				}
+				catch(...)
+				{
+					if(nodeAllocated) // If we allocated the node within this function, free it. Else let the caller retain ownership of it.
+						DoFreeNode(pNodeNew);
+					throw;
+				}
+			#endif
+		}
+		// Else the value is already present, so don't add a new node. And don't free pNodeNew.
+
+		return eastl::pair<iterator, bool>(iterator(pNode, mpBucketArray + n), false);
+	}
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+				typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	template<typename BoolConstantT>
+	eastl::pair<typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::iterator, bool>
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::DoInsertValue(BoolConstantT, const value_type& value, ENABLE_IF_TRUETYPE(BoolConstantT)) // true_type means bUniqueKeys is true.
+	{
+		const key_type&   k = mExtractKey(value);
+		const hash_code_t c = get_hash_code(k);
+
+		return DoInsertValueExtra(true_type(), k, c, NULL, value);
+	}
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+				typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	template <typename BoolConstantT>
+	typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::iterator
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::DoInsertValueExtra(BoolConstantT, const key_type& k, hash_code_t c, node_type* pNodeNew, const value_type& value,
+			DISABLE_IF_TRUETYPE(BoolConstantT)) // false_type means bUniqueKeys is false.
+	{
+		const eastl::pair<bool, uint32_t> bRehash = mRehashPolicy.GetRehashRequired((uint32_t)mnBucketCount, (uint32_t)mnElementCount, (uint32_t)1);
+
+		if(bRehash.first)
+			DoRehash(bRehash.second); // Note: We don't need to wrap this call with try/catch because there's nothing we would need to do in the catch.
+
+		const size_type n = (size_type)bucket_index(k, c, (uint32_t)mnBucketCount);
+
+		if(pNodeNew)
+			::new(eastl::addressof(pNodeNew->mValue)) value_type(value); // It's expected that pNodeNew was allocated with allocate_uninitialized_node.
+		else
+			pNodeNew = DoAllocateNode(value);
+
+		set_code(pNodeNew, c); // This is a no-op for most hashtables.
+
+		// To consider: Possibly make this insertion not make equal elements contiguous.
+		// As it stands now, we insert equal values contiguously in the hashtable.
+		// The benefit is that equal_range can work in a sensible manner and that
+		// erase(value) can more quickly find equal values. The downside is that
+		// this insertion operation taking some extra time. How important is it to
+		// us that equal_range span all equal items? 
+		node_type* const pNodePrev = DoFindNode(mpBucketArray[n], k, c);
+
+		if(pNodePrev == NULL)
+		{
+			EASTL_ASSERT((void**)mpBucketArray != &gpEmptyBucketArray[0]);
+			pNodeNew->mpNext = mpBucketArray[n];
+			mpBucketArray[n] = pNodeNew;
+		}
+		else
+		{
+			pNodeNew->mpNext  = pNodePrev->mpNext;
+			pNodePrev->mpNext = pNodeNew;
+		}
+
+		++mnElementCount;
+
+		return iterator(pNodeNew, mpBucketArray + n);
+	}
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+				typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	template<typename BoolConstantT>
+	typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::iterator
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::DoInsertValue(BoolConstantT, const value_type& value, DISABLE_IF_TRUETYPE(BoolConstantT)) // false_type means bUniqueKeys is false.
+	{
+		const key_type&   k = mExtractKey(value);
+		const hash_code_t c = get_hash_code(k);
+
+		return DoInsertValueExtra(false_type(), k, c, NULL, value);
+	}
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::node_type*
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::DoAllocateNode(const value_type& value)
+	{
+		node_type* const pNode = (node_type*)allocate_memory(mAllocator, sizeof(node_type), EASTL_ALIGN_OF(value_type), 0);
+		EASTL_ASSERT_MSG(pNode != nullptr, "the behaviour of eastl::allocators that return nullptr is not defined.");
+
+		#if EASTL_EXCEPTIONS_ENABLED
+			try
+			{
+		#endif
+				::new(eastl::addressof(pNode->mValue)) value_type(value);
+				pNode->mpNext = NULL;
+				return pNode;
+		#if EASTL_EXCEPTIONS_ENABLED
+			}
+			catch(...)
+			{
+				EASTLFree(mAllocator, pNode, sizeof(node_type));
+				throw;
+			}
+		#endif
+	}
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::node_type*
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::allocate_uninitialized_node()
+	{
+		// We don't wrap this in try/catch because users of this function are expected to do that themselves as needed.
+		node_type* const pNode = (node_type*)allocate_memory(mAllocator, sizeof(node_type), EASTL_ALIGN_OF(value_type), 0);
+		EASTL_ASSERT_MSG(pNode != nullptr, "the behaviour of eastl::allocators that return nullptr is not defined.");
+		// Leave pNode->mValue uninitialized.
+		pNode->mpNext = NULL;
+		return pNode;
+	}
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	void hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::free_uninitialized_node(node_type* pNode)
+	{
+		// pNode->mValue is expected to be uninitialized.
+		EASTLFree(mAllocator, pNode, sizeof(node_type));
+	}
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	eastl::pair<typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::iterator, bool>
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::DoInsertKey(true_type, const key_type& key, const hash_code_t c) // true_type means bUniqueKeys is true.
+	{
+		size_type         n     = (size_type)bucket_index(key, c, (uint32_t)mnBucketCount);
+		node_type* const  pNode = DoFindNode(mpBucketArray[n], key, c);
+
+		if(pNode == NULL)
+		{
+			const eastl::pair<bool, uint32_t> bRehash = mRehashPolicy.GetRehashRequired((uint32_t)mnBucketCount, (uint32_t)mnElementCount, (uint32_t)1);
+
+			// Allocate the new node before doing the rehash so that we don't
+			// do a rehash if the allocation throws.
+			node_type* const pNodeNew = DoAllocateNodeFromKey(key);
+			set_code(pNodeNew, c); // This is a no-op for most hashtables.
+
+			#if EASTL_EXCEPTIONS_ENABLED
+				try
+				{
+			#endif
+					if(bRehash.first)
+					{
+						n = (size_type)bucket_index(key, c, (uint32_t)bRehash.second);
+						DoRehash(bRehash.second);
+					}
+
+					EASTL_ASSERT((void**)mpBucketArray != &gpEmptyBucketArray[0]);
+					pNodeNew->mpNext = mpBucketArray[n];
+					mpBucketArray[n] = pNodeNew;
+					++mnElementCount;
+
+					return eastl::pair<iterator, bool>(iterator(pNodeNew, mpBucketArray + n), true);
+			#if EASTL_EXCEPTIONS_ENABLED
+				}
+				catch(...)
+				{
+					DoFreeNode(pNodeNew);
+					throw;
+				}
+			#endif
+		}
+
+		return eastl::pair<iterator, bool>(iterator(pNode, mpBucketArray + n), false);
+	}
+
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::iterator
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::DoInsertKey(false_type, const key_type& key, const hash_code_t c) // false_type means bUniqueKeys is false.
+	{
+		const eastl::pair<bool, uint32_t> bRehash = mRehashPolicy.GetRehashRequired((uint32_t)mnBucketCount, (uint32_t)mnElementCount, (uint32_t)1);
+
+		if(bRehash.first)
+			DoRehash(bRehash.second);
+
+		const size_type   n = (size_type)bucket_index(key, c, (uint32_t)mnBucketCount);
+
+		node_type* const pNodeNew = DoAllocateNodeFromKey(key);
+		set_code(pNodeNew, c); // This is a no-op for most hashtables.
+
+		// To consider: Possibly make this insertion not make equal elements contiguous.
+		// As it stands now, we insert equal values contiguously in the hashtable.
+		// The benefit is that equal_range can work in a sensible manner and that
+		// erase(value) can more quickly find equal values. The downside is that
+		// this insertion operation taking some extra time. How important is it to
+		// us that equal_range span all equal items? 
+		node_type* const pNodePrev = DoFindNode(mpBucketArray[n], key, c);
+
+		if(pNodePrev == NULL)
+		{
+			EASTL_ASSERT((void**)mpBucketArray != &gpEmptyBucketArray[0]);
+			pNodeNew->mpNext = mpBucketArray[n];
+			mpBucketArray[n] = pNodeNew;
+		}
+		else
+		{
+			pNodeNew->mpNext  = pNodePrev->mpNext;
+			pNodePrev->mpNext = pNodeNew;
+		}
+
+		++mnElementCount;
+
+		return iterator(pNodeNew, mpBucketArray + n);
+	}
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+				typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	eastl::pair<typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::iterator, bool>
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::DoInsertKey(true_type, key_type&& key, const hash_code_t c) // true_type means bUniqueKeys is true.
+	{
+		size_type         n     = (size_type)bucket_index(key, c, (uint32_t)mnBucketCount);
+		node_type* const  pNode = DoFindNode(mpBucketArray[n], key, c);
+
+		if(pNode == NULL)
+		{
+			const eastl::pair<bool, uint32_t> bRehash = mRehashPolicy.GetRehashRequired((uint32_t)mnBucketCount, (uint32_t)mnElementCount, (uint32_t)1);
+
+			// Allocate the new node before doing the rehash so that we don't
+			// do a rehash if the allocation throws.
+			node_type* const pNodeNew = DoAllocateNodeFromKey(eastl::move(key));
+			set_code(pNodeNew, c); // This is a no-op for most hashtables.
+
+			#if EASTL_EXCEPTIONS_ENABLED
+				try
+				{
+			#endif
+					if(bRehash.first)
+					{
+						n = (size_type)bucket_index(key, c, (uint32_t)bRehash.second);
+						DoRehash(bRehash.second);
+					}
+
+					EASTL_ASSERT((void**)mpBucketArray != &gpEmptyBucketArray[0]);
+					pNodeNew->mpNext = mpBucketArray[n];
+					mpBucketArray[n] = pNodeNew;
+					++mnElementCount;
+
+					return eastl::pair<iterator, bool>(iterator(pNodeNew, mpBucketArray + n), true);
+			#if EASTL_EXCEPTIONS_ENABLED
+				}
+				catch(...)
+				{
+					DoFreeNode(pNodeNew);
+					throw;
+				}
+			#endif
+		}
+
+		return eastl::pair<iterator, bool>(iterator(pNode, mpBucketArray + n), false);
+	}
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+				typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::iterator
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::DoInsertKey(false_type, key_type&& key, const hash_code_t c) // false_type means bUniqueKeys is false.
+	{
+		const eastl::pair<bool, uint32_t> bRehash = mRehashPolicy.GetRehashRequired((uint32_t)mnBucketCount, (uint32_t)mnElementCount, (uint32_t)1);
+
+		if(bRehash.first)
+			DoRehash(bRehash.second);
+
+		const size_type   n = (size_type)bucket_index(key, c, (uint32_t)mnBucketCount);
+
+		node_type* const pNodeNew = DoAllocateNodeFromKey(eastl::move(key));
+		set_code(pNodeNew, c); // This is a no-op for most hashtables.
+
+		// To consider: Possibly make this insertion not make equal elements contiguous.
+		// As it stands now, we insert equal values contiguously in the hashtable.
+		// The benefit is that equal_range can work in a sensible manner and that
+		// erase(value) can more quickly find equal values. The downside is that
+		// this insertion operation taking some extra time. How important is it to
+		// us that equal_range span all equal items? 
+		node_type* const pNodePrev = DoFindNode(mpBucketArray[n], key, c);
+
+		if(pNodePrev == NULL)
+		{
+			EASTL_ASSERT((void**)mpBucketArray != &gpEmptyBucketArray[0]);
+			pNodeNew->mpNext = mpBucketArray[n];
+			mpBucketArray[n] = pNodeNew;
+		}
+		else
+		{
+			pNodeNew->mpNext  = pNodePrev->mpNext;
+			pNodePrev->mpNext = pNodeNew;
+		}
+
+		++mnElementCount;
+
+		return iterator(pNodeNew, mpBucketArray + n);
+	}
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+				typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	template <class... Args>
+	typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::insert_return_type
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::emplace(Args&&... args)
+	{
+		return DoInsertValue(has_unique_keys_type(), eastl::forward<Args>(args)...); // Need to use forward instead of move because Args&& is a "universal reference" instead of an rvalue reference.
+	}
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+				typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	template <class... Args>
+	typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::iterator
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::emplace_hint(const_iterator, Args&&... args)
+	{
+		// We currently ignore the iterator argument as a hint.
+		insert_return_type result = DoInsertValue(has_unique_keys_type(), eastl::forward<Args>(args)...);
+		return DoGetResultIterator(has_unique_keys_type(), result);
+	}
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+	          typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	template <class... Args>
+	// inline eastl::pair<typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::iterator, bool>
+	inline typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::insert_return_type
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::try_emplace(const key_type& key, Args&&... args)
+	{
+		return DoInsertValue(has_unique_keys_type(), piecewise_construct, eastl::forward_as_tuple(key),
+		                     eastl::forward_as_tuple(eastl::forward<Args>(args)...));
+	}
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+	          typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	template <class... Args>
+	// inline eastl::pair<typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::iterator, bool>
+	inline typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::insert_return_type
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::try_emplace(key_type&& key, Args&&... args)
+	{
+		return DoInsertValue(has_unique_keys_type(), piecewise_construct, eastl::forward_as_tuple(eastl::move(key)),
+		                     eastl::forward_as_tuple(eastl::forward<Args>(args)...));
+	}
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+				typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	template <class... Args>
+	inline typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::iterator
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::try_emplace(const_iterator, const key_type& key, Args&&... args)
+	{
+		insert_return_type result = DoInsertValue(
+		    has_unique_keys_type(),
+		    value_type(piecewise_construct, eastl::forward_as_tuple(key), eastl::forward_as_tuple(eastl::forward<Args>(args)...)));
+
+		return DoGetResultIterator(has_unique_keys_type(), result);
+	}
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+				typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	template <class... Args>
+	inline typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::iterator
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::try_emplace(const_iterator, key_type&& key, Args&&... args)
+	{
+		insert_return_type result =
+		    DoInsertValue(has_unique_keys_type(), value_type(piecewise_construct, eastl::forward_as_tuple(eastl::move(key)),
+		                                                     eastl::forward_as_tuple(eastl::forward<Args>(args)...)));
+
+		return DoGetResultIterator(has_unique_keys_type(), result);
+	}
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::insert_return_type
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::insert(value_type&& otherValue)
+	{
+		return DoInsertValue(has_unique_keys_type(), eastl::move(otherValue));
+	}
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	template <class P>
+	typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::insert_return_type
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::insert(hash_code_t c, node_type* pNodeNew, P&& otherValue)
+	{
+		// pNodeNew->mValue is expected to be uninitialized.
+		value_type value(eastl::forward<P>(otherValue)); // Need to use forward instead of move because P&& is a "universal reference" instead of an rvalue reference.
+		const key_type& k = mExtractKey(value);
+		return DoInsertValueExtra(has_unique_keys_type(), k, c, pNodeNew, eastl::move(value));
+	}
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::iterator
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::insert(const_iterator, value_type&& value)
+	{
+		// We currently ignore the iterator argument as a hint.
+		insert_return_type result = DoInsertValue(has_unique_keys_type(), value_type(eastl::move(value)));
+		return DoGetResultIterator(has_unique_keys_type(), result);
+	}
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::insert_return_type
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::insert(const value_type& value) 
+	{
+		return DoInsertValue(has_unique_keys_type(), value);
+	}
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::insert_return_type
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::insert(hash_code_t c, node_type* pNodeNew, const value_type& value) 
+	{
+		// pNodeNew->mValue is expected to be uninitialized.
+		const key_type& k = mExtractKey(value);
+		return DoInsertValueExtra(has_unique_keys_type(), k, c, pNodeNew, value);
+	}
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+	          typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	template <typename P, class>
+	typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::insert_return_type
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::insert(P&& otherValue)
+	{
+		return emplace(eastl::forward<P>(otherValue));
+	}
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::iterator
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::insert(const_iterator, const value_type& value)
+	{
+		// We ignore the first argument (hint iterator). It's not likely to be useful for hashtable containers.
+		insert_return_type result = DoInsertValue(has_unique_keys_type(), value);
+		return DoGetResultIterator(has_unique_keys_type(), result);
+	}
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	void hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::insert(std::initializer_list<value_type> ilist)
+	{
+		insert(ilist.begin(), ilist.end());
+	}
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	template <typename InputIterator>
+	void
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::insert(InputIterator first, InputIterator last)
+	{
+		const uint32_t nElementAdd = (uint32_t)eastl::ht_distance(first, last);
+		const eastl::pair<bool, uint32_t> bRehash = mRehashPolicy.GetRehashRequired((uint32_t)mnBucketCount, (uint32_t)mnElementCount, nElementAdd);
+
+		if(bRehash.first)
+			DoRehash(bRehash.second);
+
+		for(; first != last; ++first)
+			DoInsertValue(has_unique_keys_type(), *first);
+	}
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+	          typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	template <class M>
+	eastl::pair<typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::iterator, bool>
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::insert_or_assign(const key_type& k, M&& obj)
+	{
+		auto iter = find(k);
+		if(iter == end())
+		{
+			return insert(value_type(piecewise_construct, eastl::forward_as_tuple(k), eastl::forward_as_tuple(eastl::forward<M>(obj))));
+		}
+		else
+		{
+			iter->second = eastl::forward<M>(obj);
+			return {iter, false};
+		}
+	}
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	template <class M>
+	eastl::pair<typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::iterator, bool>
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::insert_or_assign(key_type&& k, M&& obj)
+	{
+		auto iter = find(k);
+		if(iter == end())
+		{
+			return insert(value_type(piecewise_construct, eastl::forward_as_tuple(eastl::move(k)), eastl::forward_as_tuple(eastl::forward<M>(obj))));
+		}
+		else
+		{
+			iter->second = eastl::forward<M>(obj);
+			return {iter, false};
+		}
+	}
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	template <class M>
+	typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::iterator 
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::insert_or_assign(const_iterator, const key_type& k, M&& obj)
+	{
+		return insert_or_assign(k, eastl::forward<M>(obj)).first; // we ignore the iterator hint
+	}
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	template <class M>
+	typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::iterator 
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::insert_or_assign(const_iterator, key_type&& k, M&& obj)
+	{
+		return insert_or_assign(eastl::move(k), eastl::forward<M>(obj)).first; // we ignore the iterator hint
+	}
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::iterator
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::erase(const_iterator i)
+	{
+		iterator iNext(i.mpNode, i.mpBucket); // Convert from const_iterator to iterator while constructing.
+		++iNext;
+
+		node_type* pNode        =  i.mpNode;
+		node_type* pNodeCurrent = *i.mpBucket;
+
+		if(pNodeCurrent == pNode)
+			*i.mpBucket = pNodeCurrent->mpNext;
+		else
+		{
+			// We have a singly-linked list, so we have no choice but to
+			// walk down it till we find the node before the node at 'i'.
+			node_type* pNodeNext = pNodeCurrent->mpNext;
+
+			while(pNodeNext != pNode)
+			{
+				pNodeCurrent = pNodeNext;
+				pNodeNext    = pNodeCurrent->mpNext;
+			}
+
+			pNodeCurrent->mpNext = pNodeNext->mpNext;
+		}
+
+		DoFreeNode(pNode);
+		--mnElementCount;
+
+		return iNext;
+	}
+
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	inline typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::iterator
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::erase(const_iterator first, const_iterator last)
+	{
+		while(first != last)
+			first = erase(first);
+		return iterator(first.mpNode, first.mpBucket);
+	}
+
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	typename hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::size_type 
+	hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::erase(const key_type& k)
+	{
+		// To do: Reimplement this function to do a single loop and not try to be 
+		// smart about element contiguity. The mechanism here is only a benefit if the 
+		// buckets are heavily overloaded; otherwise this mechanism may be slightly slower.
+
+		const hash_code_t c = get_hash_code(k);
+		const size_type   n = (size_type)bucket_index(k, c, (uint32_t)mnBucketCount);
+		const size_type   nElementCountSaved = mnElementCount;
+
+		node_type** pBucketArray = mpBucketArray + n;
+
+		while(*pBucketArray && !compare(k, c, *pBucketArray))
+			pBucketArray = &(*pBucketArray)->mpNext;
+
+		while(*pBucketArray && compare(k, c, *pBucketArray))
+		{
+			node_type* const pNode = *pBucketArray;
+			*pBucketArray = pNode->mpNext;
+			DoFreeNode(pNode);
+			--mnElementCount;
+		}
+
+		return nElementCountSaved - mnElementCount;
+	}
+
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	inline void hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::clear()
+	{
+		DoFreeNodes(mpBucketArray, mnBucketCount);
+		mnElementCount = 0;
+	}
+
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	inline void hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::clear(bool clearBuckets)
+	{
+		DoFreeNodes(mpBucketArray, mnBucketCount);
+		if(clearBuckets)
+		{
+			DoFreeBuckets(mpBucketArray, mnBucketCount);
+			reset_lose_memory();
+		}
+		mnElementCount = 0;
+	}
+
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	inline void hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::reset_lose_memory() EA_NOEXCEPT
+	{
+		// The reset function is a special extension function which unilaterally 
+		// resets the container to an empty state without freeing the memory of 
+		// the contained objects. This is useful for very quickly tearing down a 
+		// container built into scratch memory.
+		mnBucketCount  = 1;
+
+		#ifdef _MSC_VER
+			mpBucketArray = (node_type**)&gpEmptyBucketArray[0];
+		#else
+			void* p = &gpEmptyBucketArray[0];
+			memcpy(&mpBucketArray, &p, sizeof(mpBucketArray)); // Other compilers implement strict aliasing and casting is thus unsafe.
+		#endif
+
+		mnElementCount = 0;
+		mRehashPolicy.mnNextResize = 0;
+	}
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	inline void hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::reserve(size_type nElementCount)
+	{
+		rehash(mRehashPolicy.GetBucketCount(uint32_t(nElementCount)));
+	}
+
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	inline void hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::rehash(size_type nBucketCount)
+	{
+		// Note that we unilaterally use the passed in bucket count; we do not attempt migrate it
+		// up to the next prime number. We leave it at the user's discretion to do such a thing.
+		DoRehash(nBucketCount);
+	}
+
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	void hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::DoRehash(size_type nNewBucketCount)
+	{
+		node_type** const pBucketArray = DoAllocateBuckets(nNewBucketCount); // nNewBucketCount should always be >= 2.
+
+		#if EASTL_EXCEPTIONS_ENABLED
+			try
+			{
+		#endif
+				node_type* pNode;
+
+				for(size_type i = 0; i < mnBucketCount; ++i)
+				{
+					while((pNode = mpBucketArray[i]) != NULL) // Using '!=' disables compiler warnings.
+					{
+						const size_type nNewBucketIndex = (size_type)bucket_index(pNode, (uint32_t)nNewBucketCount);
+
+						mpBucketArray[i] = pNode->mpNext;
+						pNode->mpNext    = pBucketArray[nNewBucketIndex];
+						pBucketArray[nNewBucketIndex] = pNode;
+					}
+				}
+
+				DoFreeBuckets(mpBucketArray, mnBucketCount);
+				mnBucketCount = nNewBucketCount;
+				mpBucketArray = pBucketArray;
+		#if EASTL_EXCEPTIONS_ENABLED
+			}
+			catch(...)
+			{
+				// A failure here means that a hash function threw an exception.
+				// We can't restore the previous state without calling the hash
+				// function again, so the only sensible recovery is to delete everything.
+				DoFreeNodes(pBucketArray, nNewBucketCount);
+				DoFreeBuckets(pBucketArray, nNewBucketCount);
+				DoFreeNodes(mpBucketArray, mnBucketCount);
+				mnElementCount = 0;
+				throw;
+			}
+		#endif
+	}
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	inline bool hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::validate() const
+	{
+		// Verify our empty bucket array is unmodified.
+		if(gpEmptyBucketArray[0] != NULL)
+			return false;
+
+		if(gpEmptyBucketArray[1] != (void*)uintptr_t(~0))
+			return false;
+
+		// Verify that we have at least one bucket. Calculations can  
+		// trigger division by zero exceptions otherwise.
+		if(mnBucketCount == 0)
+			return false;
+
+		// Verify that gpEmptyBucketArray is used correctly.
+		// gpEmptyBucketArray is only used when initially empty.
+		if((void**)mpBucketArray == &gpEmptyBucketArray[0])
+		{
+			if(mnElementCount) // gpEmptyBucketArray is used only for empty hash tables.
+				return false;
+
+			if(mnBucketCount != 1) // gpEmptyBucketArray is used exactly an only for mnBucketCount == 1.
+				return false;
+		}
+		else
+		{
+			if(mnBucketCount < 2) // Small bucket counts *must* use gpEmptyBucketArray.
+				return false;
+		}
+
+		// Verify that the element count matches mnElementCount. 
+		size_type nElementCount = 0;
+
+		for(const_iterator temp = begin(), tempEnd = end(); temp != tempEnd; ++temp)
+			++nElementCount;
+
+		if(nElementCount != mnElementCount)
+			return false;
+
+		// To do: Verify that individual elements are in the expected buckets.
+
+		return true;
+	}
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	int hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>::validate_iterator(const_iterator i) const
+	{
+		// To do: Come up with a more efficient mechanism of doing this.
+
+		for(const_iterator temp = begin(), tempEnd = end(); temp != tempEnd; ++temp)
+		{
+			if(temp == i)
+				return (isf_valid | isf_current | isf_can_dereference);
+		}
+
+		if(i == end())
+			return (isf_valid | isf_current); 
+
+		return isf_none;
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// global operators
+	///////////////////////////////////////////////////////////////////////
+
+	// operator==, != have been moved to the specific container subclasses (e.g. hash_map).
+
+	// The following comparison operators are deprecated and will likely be removed in a  
+	// future version of this package.
+	//
+	// Comparing hash tables for less-ness is an odd thing to do. We provide it for 
+	// completeness, though the user is advised to be wary of how they use this.
+	//
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	inline bool operator<(const hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>& a, 
+						  const hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>& b)
+	{
+		// This requires hash table elements to support operator<. Since the hash table
+		// doesn't compare elements via less (it does so via equals), we must use the 
+		// globally defined operator less for the elements.
+		return eastl::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end());
+	}
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	inline bool operator>(const hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>& a, 
+						  const hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>& b)
+	{
+		return b < a;
+	}
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	inline bool operator<=(const hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>& a, 
+						   const hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>& b)
+	{
+		return !(b < a);
+	}
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	inline bool operator>=(const hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>& a, 
+						   const hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>& b)
+	{
+		return !(a < b);
+	}
+
+
+	template <typename K, typename V, typename A, typename EK, typename Eq,
+			  typename H1, typename H2, typename H, typename RP, bool bC, bool bM, bool bU>
+	inline void swap(const hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>& a, 
+					 const hashtable<K, V, A, EK, Eq, H1, H2, H, RP, bC, bM, bU>& b)
+	{
+		a.swap(b);
+	}
+
+
+} // namespace eastl
+
+
+EA_RESTORE_VC_WARNING();
+
+
+#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/internal/in_place_t.h b/libkram/eastl/include/EASTL/internal/in_place_t.h
new file mode 100644
index 00000000..79acd184
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/in_place_t.h
@@ -0,0 +1,82 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_INTERNAL_IN_PLACE_T_H
+#define EASTL_INTERNAL_IN_PLACE_T_H
+
+
+#include <EABase/eabase.h>
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+namespace eastl
+{
+	namespace Internal
+	{
+		struct in_place_tag {};
+		template <class> struct in_place_type_tag {};
+		template <size_t> struct in_place_index_tag {};
+	}
+
+	///////////////////////////////////////////////////////////////////////////////
+	/// in_place_tag
+	///
+	/// http://en.cppreference.com/w/cpp/utility/in_place_tag
+	///
+	struct in_place_tag
+	{
+		in_place_tag() = delete;
+
+	private:
+		explicit in_place_tag(Internal::in_place_tag) {}
+		friend inline in_place_tag Internal_ConstructInPlaceTag();
+	};
+
+	// internal factory function for in_place_tag
+	inline in_place_tag Internal_ConstructInPlaceTag() { return in_place_tag(Internal::in_place_tag{}); }
+
+
+	///////////////////////////////////////////////////////////////////////////////
+	/// in_place_t / in_place_type_t / in_place_index_t
+	///
+	/// used to disambiguate overloads that take arguments (possibly a parameter
+	/// pack) for in-place construction of some value.
+	/// 
+	/// http://en.cppreference.com/w/cpp/utility/optional/in_place_t
+	///
+	using in_place_t = in_place_tag(&)(Internal::in_place_tag);
+
+	template <class T>
+	using in_place_type_t = in_place_tag(&)(Internal::in_place_type_tag<T>);
+
+	template <size_t N>
+	using in_place_index_t = in_place_tag(&)(Internal::in_place_index_tag<N>);
+
+
+	///////////////////////////////////////////////////////////////////////////////
+	/// in_place / in_place<T> / in_place<size_t>
+	/// 
+	/// http://en.cppreference.com/w/cpp/utility/in_place
+	///
+	inline in_place_tag in_place(Internal::in_place_tag) { return Internal_ConstructInPlaceTag(); }
+
+	template <class T>
+	inline in_place_tag in_place(Internal::in_place_type_tag<T>) { return Internal_ConstructInPlaceTag(); }
+
+	template <std::size_t I>
+	inline in_place_tag in_place(Internal::in_place_index_tag<I>) { return Internal_ConstructInPlaceTag(); }
+
+
+} // namespace eastl
+
+
+#endif // Header include guard
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/internal/integer_sequence.h b/libkram/eastl/include/EASTL/internal/integer_sequence.h
new file mode 100644
index 00000000..88cf1b1b
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/integer_sequence.h
@@ -0,0 +1,74 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_INTEGER_SEQUENCE_H
+#define EASTL_INTEGER_SEQUENCE_H
+
+#include <EABase/config/eacompiler.h>
+#include <EASTL/internal/config.h>
+#include <EASTL/type_traits.h>
+
+namespace eastl
+{
+
+#if EASTL_VARIADIC_TEMPLATES_ENABLED && !defined(EA_COMPILER_NO_TEMPLATE_ALIASES)
+
+// integer_sequence
+template <typename T, T... Ints>
+class integer_sequence
+{
+public:
+	typedef T value_type;
+	static_assert(is_integral<T>::value, "eastl::integer_sequence can only be instantiated with an integral type");
+	static EA_CONSTEXPR size_t size() EA_NOEXCEPT { return sizeof...(Ints); }
+};
+
+template <size_t N, typename IndexSeq>
+struct make_index_sequence_impl;
+
+template <size_t N, size_t... Is>
+struct make_index_sequence_impl<N, integer_sequence<size_t, Is...>>
+{
+	typedef typename make_index_sequence_impl<N - 1, integer_sequence<size_t, N - 1, Is...>>::type type;
+};
+
+template <size_t... Is>
+struct make_index_sequence_impl<0, integer_sequence<size_t, Is...>>
+{
+	typedef integer_sequence<size_t, Is...> type;
+};
+
+template <size_t... Is>
+using index_sequence = integer_sequence<size_t, Is...>;
+
+template <size_t N>
+using make_index_sequence = typename make_index_sequence_impl<N, integer_sequence<size_t>>::type;
+
+template <typename Target, typename Seq>
+struct integer_sequence_convert_impl;
+
+template <typename Target, size_t... Is>
+struct integer_sequence_convert_impl<Target, integer_sequence<size_t, Is...>>
+{
+	typedef integer_sequence<Target, Is...> type;
+};
+
+template <typename T, size_t N>
+struct make_integer_sequence_impl
+{
+	typedef typename integer_sequence_convert_impl<T, make_index_sequence<N>>::type type;
+};
+
+template <typename T, size_t N>
+using make_integer_sequence = typename make_integer_sequence_impl<T, N>::type;
+
+// Helper alias template that converts any type parameter pack into an index sequence of the same length
+template<typename... T>
+using index_sequence_for = make_index_sequence<sizeof...(T)>;
+
+#endif  // EASTL_VARIADIC_TEMPLATES_ENABLED
+
+}  // namespace eastl
+
+#endif  // EASTL_INTEGER_SEQUENCE_H
diff --git a/libkram/eastl/include/EASTL/internal/intrusive_hashtable.h b/libkram/eastl/include/EASTL/internal/intrusive_hashtable.h
new file mode 100644
index 00000000..dccca5b1
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/intrusive_hashtable.h
@@ -0,0 +1,989 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// This file implements an intrusive hash table, which is a hash table whereby
+// the container nodes are the hash table objects themselves. This has benefits
+// primarily in terms of memory management. There are some minor limitations
+// that result from this.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+
+
+#ifndef EASTL_INTERNAL_INTRUSIVE_HASHTABLE_H
+#define EASTL_INTERNAL_INTRUSIVE_HASHTABLE_H
+
+
+#include <EABase/eabase.h>
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+#include <EASTL/internal/config.h>
+#include <EASTL/internal/hashtable.h>
+#include <EASTL/type_traits.h>
+#include <EASTL/iterator.h>
+#include <EASTL/functional.h>
+#include <EASTL/utility.h>
+#include <EASTL/algorithm.h>
+
+EA_DISABLE_ALL_VC_WARNINGS();
+#include <new>
+#include <stddef.h>
+#include <string.h>
+EA_RESTORE_ALL_VC_WARNINGS();
+
+
+namespace eastl
+{
+
+	/// intrusive_hash_node
+	///
+	/// A hash_node stores an element in a hash table, much like a 
+	/// linked list node stores an element in a linked list. 
+	/// An intrusive_hash_node additionally can, via template parameter,
+	/// store a hash code in the node to speed up hash calculations 
+	/// and comparisons in some cases.
+	///
+	/// To consider: Make a version of intrusive_hash_node which is
+	/// templated on the container type. This would allow for the 
+	/// mpNext pointer to be the container itself and thus allow
+	/// for easier debugging. 
+	/// 
+	/// Example usage:
+	///   struct Widget : public intrusive_hash_node{ ... };
+	/// 
+	///   struct Dagget : public intrusive_hash_node_key<int>{ ... };
+	/// 
+	struct intrusive_hash_node
+	{
+		intrusive_hash_node* mpNext;
+	};
+
+
+	template <typename Key>
+	struct intrusive_hash_node_key : public intrusive_hash_node
+	{
+		typedef Key key_type;
+		Key mKey;
+	};
+
+
+
+	/// intrusive_node_iterator
+	///
+	/// Node iterators iterate nodes within a given bucket.
+	///
+	/// The bConst parameter defines if the iterator is a const_iterator
+	/// or an iterator.
+	///
+	template <typename Value, bool bConst>
+	struct intrusive_node_iterator
+	{
+	public:
+		typedef intrusive_node_iterator<Value, bConst>                   this_type;
+		typedef Value                                                    value_type;
+		typedef Value                                                    node_type;
+		typedef ptrdiff_t                                                difference_type;
+		typedef typename type_select<bConst, const Value*, Value*>::type pointer;
+		typedef typename type_select<bConst, const Value&, Value&>::type reference;
+		typedef EASTL_ITC_NS::forward_iterator_tag                       iterator_category;
+
+	public:
+		node_type* mpNode;
+
+	public:
+		intrusive_node_iterator()
+			: mpNode(NULL) { }
+
+		explicit intrusive_node_iterator(value_type* pNode)
+			: mpNode(pNode) { }
+
+		intrusive_node_iterator(const intrusive_node_iterator<Value, true>& x)
+			: mpNode(x.mpNode) { }
+
+		reference operator*() const
+			{ return *mpNode; }
+
+		pointer operator->() const
+			{ return mpNode; }
+
+		this_type& operator++()
+			{ mpNode = static_cast<node_type*>(mpNode->mpNext); return *this; }
+
+		this_type operator++(int)
+			{ this_type temp(*this); mpNode = static_cast<node_type*>(mpNode->mpNext); return temp; }
+
+	}; // intrusive_node_iterator
+
+
+
+
+	/// intrusive_hashtable_iterator_base
+	///
+	/// An intrusive_hashtable_iterator_base iterates the entire hash table and 
+	/// not just nodes within a single bucket. Users in general will use a hash
+	/// table iterator much more often, as it is much like other container
+	/// iterators (e.g. vector::iterator).
+	///
+	/// We define a base class here because it is shared by both const and
+	/// non-const iterators.
+	///
+	template <typename Value>
+	struct intrusive_hashtable_iterator_base
+	{
+	public:
+		typedef Value value_type;
+
+	protected:
+		template <typename, typename, typename, typename, size_t, bool, bool>
+		friend class intrusive_hashtable;
+
+		template <typename, bool>
+		friend struct intrusive_hashtable_iterator;
+
+		template <typename V>
+		friend bool operator==(const intrusive_hashtable_iterator_base<V>&, const intrusive_hashtable_iterator_base<V>&);
+
+		template <typename V>
+		friend bool operator!=(const intrusive_hashtable_iterator_base<V>&, const intrusive_hashtable_iterator_base<V>&);
+
+		value_type*  mpNode;      // Current node within current bucket.
+		value_type** mpBucket;    // Current bucket.
+
+	public:
+		intrusive_hashtable_iterator_base(value_type* pNode, value_type** pBucket)
+			: mpNode(pNode), mpBucket(pBucket) { }
+
+		void increment_bucket()
+		{
+			++mpBucket;
+			while(*mpBucket == NULL) // We store an extra bucket with some non-NULL value at the end 
+				++mpBucket;          // of the bucket array so that finding the end of the bucket
+			mpNode = *mpBucket;      // array is quick and simple.
+		}
+
+		void increment()
+		{
+			mpNode = static_cast<value_type*>(mpNode->mpNext);
+
+			while(mpNode == NULL)
+				mpNode = *++mpBucket;
+		}
+
+	}; // intrusive_hashtable_iterator_base
+
+
+
+
+	/// intrusive_hashtable_iterator
+	///
+	/// An intrusive_hashtable_iterator iterates the entire hash table and not 
+	/// just nodes within a single bucket. Users in general will use a hash
+	/// table iterator much more often, as it is much like other container
+	/// iterators (e.g. vector::iterator).
+	///
+	/// The bConst parameter defines if the iterator is a const_iterator
+	/// or an iterator.
+	///
+	template <typename Value, bool bConst>
+	struct intrusive_hashtable_iterator : public intrusive_hashtable_iterator_base<Value>
+	{
+	public:
+		typedef intrusive_hashtable_iterator_base<Value>                    base_type;
+		typedef intrusive_hashtable_iterator<Value, bConst>                 this_type;
+		typedef intrusive_hashtable_iterator<Value, false>                  this_type_non_const;
+		typedef typename base_type::value_type                              value_type;
+		typedef typename type_select<bConst, const Value*, Value*>::type    pointer;
+		typedef typename type_select<bConst, const Value&, Value&>::type    reference;
+		typedef ptrdiff_t                                                   difference_type;
+		typedef EASTL_ITC_NS::forward_iterator_tag                          iterator_category;
+
+	public:
+		intrusive_hashtable_iterator()
+			: base_type(NULL, NULL) { }
+
+		explicit intrusive_hashtable_iterator(value_type* pNode, value_type** pBucket)
+			: base_type(pNode, pBucket) { }
+
+		explicit intrusive_hashtable_iterator(value_type** pBucket)
+			: base_type(*pBucket, pBucket) { }
+
+		intrusive_hashtable_iterator(const this_type_non_const& x)
+			: base_type(x.mpNode, x.mpBucket) { }
+
+		reference operator*() const
+			{ return *base_type::mpNode; }
+
+		pointer operator->() const
+			{ return base_type::mpNode; }
+
+		this_type& operator++()
+			{ base_type::increment(); return *this; }
+
+		this_type operator++(int)
+			{ this_type temp(*this); base_type::increment(); return temp; }
+
+	}; // intrusive_hashtable_iterator
+
+
+
+	/// use_intrusive_key
+	///
+	/// operator()(x) returns x.mKey. Used in maps, as opposed to sets.
+	/// This is a template policy implementation; it is an alternative to 
+	/// the use_self template implementation, which is used for sets.
+	///
+	template <typename Node, typename Key>
+	struct use_intrusive_key // : public unary_function<T, T> // Perhaps we want to make it a subclass of unary_function.
+	{
+		typedef Key result_type;
+
+		const result_type& operator()(const Node& x) const
+			{ return x.mKey; }
+	};
+
+
+
+	///////////////////////////////////////////////////////////////////////////
+	/// intrusive_hashtable
+	///
+	template <typename Key, typename Value, typename Hash, typename Equal, 
+			  size_t bucketCount, bool bConstIterators, bool bUniqueKeys>
+	class intrusive_hashtable
+	{
+	public:
+		typedef intrusive_hashtable<Key, Value, Hash, Equal, 
+									bucketCount, bConstIterators, bUniqueKeys>            this_type;
+		typedef Key                                                                       key_type;
+		typedef Value                                                                     value_type;
+		typedef Value                                                                     mapped_type;
+		typedef Value                                                                     node_type;
+		typedef uint32_t                                                                  hash_code_t;
+		typedef Equal                                                                     key_equal;
+		typedef ptrdiff_t                                                                 difference_type;
+		typedef eastl_size_t                                                              size_type;     // See config.h for the definition of eastl_size_t, which defaults to size_t.
+		typedef value_type&                                                               reference;
+		typedef const value_type&                                                         const_reference;
+		typedef intrusive_node_iterator<value_type, bConstIterators>                      local_iterator;
+		typedef intrusive_node_iterator<value_type, true>                                 const_local_iterator;
+		typedef intrusive_hashtable_iterator<value_type, bConstIterators>                 iterator;
+		typedef intrusive_hashtable_iterator<value_type, true>                            const_iterator;
+		typedef typename type_select<bUniqueKeys, pair<iterator, bool>, iterator>::type   insert_return_type;
+		typedef typename type_select<bConstIterators, eastl::use_self<Value>, 
+												 eastl::use_intrusive_key<Value, key_type> >::type  extract_key;
+
+		enum
+		{
+			kBucketCount = bucketCount
+		};
+
+	protected:
+		node_type* mBucketArray[kBucketCount + 1]; // '+1' because we have an end bucket which is non-NULL so iterators always stop on it.
+		size_type  mnElementCount;
+		Hash       mHash;           // To do: Use base class optimization to make this go away when it is of zero size.
+		Equal      mEqual;          // To do: Use base class optimization to make this go away when it is of zero size.
+
+	public:
+		intrusive_hashtable(const Hash&, const Equal&);
+
+		void swap(this_type& x);
+
+		iterator begin() EA_NOEXCEPT
+		{
+			iterator i(mBucketArray);
+			if(!i.mpNode)
+				i.increment_bucket();
+			return i;
+		}
+
+		const_iterator begin() const EA_NOEXCEPT
+		{
+			const_iterator i(const_cast<node_type**>(mBucketArray));
+			if(!i.mpNode)
+				i.increment_bucket();
+			return i;
+		}
+
+		const_iterator cbegin() const EA_NOEXCEPT
+		{
+			return begin();
+		}
+
+		iterator end() EA_NOEXCEPT
+			{ return iterator(mBucketArray + kBucketCount); }
+
+		const_iterator end() const EA_NOEXCEPT
+			{ return const_iterator(const_cast<node_type**>(mBucketArray) + kBucketCount); }
+
+		const_iterator cend() const EA_NOEXCEPT
+			{ return const_iterator(const_cast<node_type**>(mBucketArray) + kBucketCount); }
+
+		local_iterator begin(size_type n) EA_NOEXCEPT
+			{ return local_iterator(mBucketArray[n]); }
+
+		const_local_iterator begin(size_type n) const EA_NOEXCEPT
+			{ return const_local_iterator(mBucketArray[n]); }
+
+		const_local_iterator cbegin(size_type n) const EA_NOEXCEPT
+			{ return const_local_iterator(mBucketArray[n]); }
+
+		local_iterator end(size_type) EA_NOEXCEPT
+			{ return local_iterator(NULL); }
+
+		const_local_iterator end(size_type) const EA_NOEXCEPT
+			{ return const_local_iterator(NULL); }
+
+		const_local_iterator cend(size_type) const EA_NOEXCEPT
+			{ return const_local_iterator(NULL); }
+
+		size_type size() const EA_NOEXCEPT
+			{ return mnElementCount; }
+
+		bool empty() const EA_NOEXCEPT
+			{ return mnElementCount == 0; }
+
+		size_type bucket_count() const  EA_NOEXCEPT // This function is unnecessary, as the user can directly reference
+			{ return kBucketCount; }                // intrusive_hashtable::kBucketCount as a constant.
+
+		size_type bucket_size(size_type n) const EA_NOEXCEPT
+			{ return (size_type)eastl::distance(begin(n), end(n)); }
+
+		size_type bucket(const key_type& k) const EA_NOEXCEPT
+			{ return (size_type)(mHash(k) % kBucketCount); }
+
+	public:
+		float load_factor() const EA_NOEXCEPT
+			{ return (float)mnElementCount / (float)kBucketCount; }
+
+	public:
+		insert_return_type insert(value_type& value) 
+			{ return DoInsertValue(value, integral_constant<bool, bUniqueKeys>()); }
+
+		insert_return_type insert(const_iterator, value_type& value)
+			{ return insert(value); } // To consider: We might be able to use the iterator argument to specify a specific insertion location.
+
+		template <typename InputIterator>
+		void insert(InputIterator first, InputIterator last);
+
+	public:
+		iterator  erase(const_iterator position);
+		iterator  erase(const_iterator first, const_iterator last);
+		size_type erase(const key_type& k);
+		iterator  remove(value_type& value);            // Removes by value instead of by iterator. This is an O(1) operation, due to this hashtable being 'intrusive'.
+
+		void clear();
+
+	public:
+		iterator       find(const key_type& k);
+		const_iterator find(const key_type& k) const;
+
+		/// Implements a find whereby the user supplies a comparison of a different type
+		/// than the hashtable value_type. A useful case of this is one whereby you have
+		/// a container of string objects but want to do searches via passing in char pointers.
+		/// The problem is that without this kind of find, you need to do the expensive operation
+		/// of converting the char pointer to a string so it can be used as the argument to the 
+		/// find function.
+		///
+		/// Example usage:
+		///     hash_set<string> hashSet;
+		///     hashSet.find_as("hello");    // Use default hash and compare.
+		///
+		/// Example usage (namespaces omitted for brevity):
+		///     hash_set<string> hashSet;
+		///     hashSet.find_as("hello", hash<char*>(), equal_to_2<string, char*>());
+		///
+		template <typename U, typename UHash, typename BinaryPredicate>
+		iterator       find_as(const U& u, UHash uhash, BinaryPredicate predicate);
+
+		template <typename U, typename UHash, typename BinaryPredicate>
+		const_iterator find_as(const U& u, UHash uhash, BinaryPredicate predicate) const;
+
+		template <typename U>
+		iterator       find_as(const U& u);
+
+		template <typename U>
+		const_iterator find_as(const U& u) const;
+
+		size_type      count(const key_type& k) const;
+
+		// The use for equal_range in a hash_table seems somewhat questionable.
+		// The primary reason for its existence is to replicate the interface of set/map.
+		eastl::pair<iterator, iterator>             equal_range(const key_type& k);
+		eastl::pair<const_iterator, const_iterator> equal_range(const key_type& k) const;
+
+	public:
+		bool validate() const;
+		int  validate_iterator(const_iterator i) const;
+
+	public:
+		Hash hash_function() const
+			{ return mHash; }
+
+		Equal equal_function() const    // Deprecated. Use key_eq() instead, as key_eq is what the new C++ standard 
+			{ return mEqual; }          // has specified in its hashtable (unordered_*) proposal.
+
+		const key_equal& key_eq() const 
+			{ return mEqual; }
+
+		key_equal& key_eq()
+			{ return mEqual; }
+
+	protected:
+		eastl::pair<iterator, bool> DoInsertValue(value_type&, true_type);  // true_type means bUniqueKeys is true.
+		iterator                    DoInsertValue(value_type&, false_type); // false_type means bUniqueKeys is false.
+
+		node_type* DoFindNode(node_type* pNode, const key_type& k) const;
+
+		template <typename U, typename BinaryPredicate>
+		node_type* DoFindNode(node_type* pNode, const U& u, BinaryPredicate predicate) const;
+
+	}; // class intrusive_hashtable
+
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// node_iterator_base
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename Value, bool bConst>
+	inline bool operator==(const intrusive_node_iterator<Value, bConst>& a, 
+						   const intrusive_node_iterator<Value, bConst>& b)
+		{ return a.mpNode == b.mpNode; }
+
+	template <typename Value, bool bConst>
+	inline bool operator!=(const intrusive_node_iterator<Value, bConst>& a,
+						   const intrusive_node_iterator<Value, bConst>& b)
+		{ return a.mpNode != b.mpNode; }
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// hashtable_iterator_base
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename Value>
+	inline bool operator==(const intrusive_hashtable_iterator_base<Value>& a,
+						   const intrusive_hashtable_iterator_base<Value>& b)
+		{ return a.mpNode == b.mpNode; }
+
+
+	template <typename Value>
+	inline bool operator!=(const intrusive_hashtable_iterator_base<Value>& a,
+						   const intrusive_hashtable_iterator_base<Value>& b)
+		{ return a.mpNode != b.mpNode; }
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// intrusive_hashtable
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename K, typename V, typename H, typename Eq, size_t bC, bool bM, bool bU>
+	inline intrusive_hashtable<K, V, H, Eq, bC, bM, bU>::intrusive_hashtable(const H& h, const Eq& eq)
+		: mnElementCount(0),
+		  mHash(h),
+		  mEqual(eq)
+	{
+		memset(mBucketArray, 0, kBucketCount * sizeof(mBucketArray[0]));
+		mBucketArray[kBucketCount] = reinterpret_cast<node_type*>((uintptr_t)~0);
+	}
+
+
+	template <typename K, typename V, typename H, typename Eq, size_t bC, bool bM, bool bU>
+	void intrusive_hashtable<K, V, H, Eq, bC, bM, bU>::swap(this_type& x)
+	{
+		for(size_t i = 0; i < kBucketCount; i++)
+			eastl::swap(mBucketArray[i], x.mBucketArray[i]);
+
+		eastl::swap(mnElementCount, x.mnElementCount);
+		eastl::swap(mHash,          x.mHash);
+		eastl::swap(mEqual,         x.mEqual);
+	}
+
+
+	template <typename K, typename V, typename H, typename Eq, size_t bC, bool bM, bool bU>
+	inline typename intrusive_hashtable<K, V, H, Eq, bC, bM, bU>::iterator
+	intrusive_hashtable<K, V, H, Eq, bC, bM, bU>::find(const key_type& k)
+	{
+		const size_type n = (size_type)(mHash(k) % kBucketCount);
+		node_type* const pNode = DoFindNode(mBucketArray[n], k);
+		return pNode ? iterator(pNode, mBucketArray + n) : iterator(mBucketArray + kBucketCount);
+	}
+
+
+	template <typename K, typename V, typename H, typename Eq, size_t bC, bool bM, bool bU>
+	inline typename intrusive_hashtable<K, V, H, Eq, bC, bM, bU>::const_iterator
+	intrusive_hashtable<K, V, H, Eq, bC, bM, bU>::find(const key_type& k) const
+	{
+		const size_type n = (size_type)(mHash(k) % kBucketCount);
+		node_type* const pNode = DoFindNode(mBucketArray[n], k);
+		return pNode ? const_iterator(pNode, const_cast<node_type**>(mBucketArray) + n) : const_iterator(const_cast<node_type**>(mBucketArray) + kBucketCount);
+	}
+
+
+	template <typename K, typename V, typename H, typename Eq, size_t bC, bool bM, bool bU>
+	template <typename U, typename UHash, typename BinaryPredicate>
+	inline typename intrusive_hashtable<K, V, H, Eq, bC, bM, bU>::iterator
+	intrusive_hashtable<K, V, H, Eq, bC, bM, bU>::find_as(const U& other, UHash uhash, BinaryPredicate predicate)
+	{
+		const size_type n = (size_type)(uhash(other) % kBucketCount);
+		node_type* const pNode = DoFindNode(mBucketArray[n], other, predicate);
+		return pNode ? iterator(pNode, mBucketArray + n) : iterator(mBucketArray + kBucketCount);
+	}
+
+
+	template <typename K, typename V, typename H, typename Eq, size_t bC, bool bM, bool bU>
+	template <typename U, typename UHash, typename BinaryPredicate>
+	inline typename intrusive_hashtable<K, V, H, Eq, bC, bM, bU>::const_iterator
+	intrusive_hashtable<K, V, H, Eq, bC, bM, bU>::find_as(const U& other, UHash uhash, BinaryPredicate predicate) const
+	{
+		const size_type n = (size_type)(uhash(other) % kBucketCount);
+		node_type* const pNode = DoFindNode(mBucketArray[n], other, predicate);
+		return pNode ? const_iterator(pNode, const_cast<node_type**>(mBucketArray) + n) : const_iterator(const_cast<node_type**>(mBucketArray) + kBucketCount);
+	}
+
+
+	/// intrusive_hashtable_find
+	///
+	/// Helper function that defaults to using hash<U> and equal_to_2<T, U>.
+	/// This makes it so that by default you don't need to provide these.
+	/// Note that the default hash functions may not be what you want, though.
+	///
+	/// Example usage. Instead of this:
+	///     hash_set<string> hashSet;
+	///     hashSet.find("hello", hash<char*>(), equal_to_2<string, char*>());
+	///
+	/// You can use this:
+	///     hash_set<string> hashSet;
+	///     hashtable_find(hashSet, "hello");
+	///
+	template <typename H, typename U>
+	inline typename H::iterator intrusive_hashtable_find(H& hashTable, const U& u)
+		{ return hashTable.find_as(u, eastl::hash<U>(), eastl::equal_to_2<const typename H::key_type, U>()); }
+
+	template <typename H, typename U>
+	inline typename H::const_iterator intrusive_hashtable_find(const H& hashTable, const U& u)
+		{ return hashTable.find_as(u, eastl::hash<U>(), eastl::equal_to_2<const typename H::key_type, U>()); }
+
+
+
+	template <typename K, typename V, typename H, typename Eq, size_t bC, bool bM, bool bU>
+	template <typename U>
+	inline typename intrusive_hashtable<K, V, H, Eq, bC, bM, bU>::iterator
+	intrusive_hashtable<K, V, H, Eq, bC, bM, bU>::find_as(const U& other)
+		{ return eastl::intrusive_hashtable_find(*this, other); }
+		// VC++ doesn't appear to like the following, though it seems correct to me.
+		// So we implement the workaround above until we can straighten this out.
+		//{ return find_as(other, eastl::hash<U>(), eastl::equal_to_2<const key_type, U>()); }
+
+
+	template <typename K, typename V, typename H, typename Eq, size_t bC, bool bM, bool bU>
+	template <typename U>
+	inline typename intrusive_hashtable<K, V, H, Eq, bC, bM, bU>::const_iterator
+	intrusive_hashtable<K, V, H, Eq, bC, bM, bU>::find_as(const U& other) const
+		{ return eastl::intrusive_hashtable_find(*this, other); }
+		// VC++ doesn't appear to like the following, though it seems correct to me.
+		// So we implement the workaround above until we can straighten this out.
+		//{ return find_as(other, eastl::hash<U>(), eastl::equal_to_2<const key_type, U>()); }
+
+
+	template <typename K, typename V, typename H, typename Eq, size_t bC, bool bM, bool bU>
+	typename intrusive_hashtable<K, V, H, Eq, bC, bM, bU>::size_type
+	intrusive_hashtable<K, V, H, Eq, bC, bM, bU>::count(const key_type& k) const
+	{
+		const size_type n = (size_type)(mHash(k) % kBucketCount);
+		size_type   result = 0;
+		extract_key extractKey; // extract_key is empty and thus this ctor is a no-op.
+
+		// To do: Make a specialization for bU (unique keys) == true and take 
+		// advantage of the fact that the count will always be zero or one in that case. 
+		for(node_type* pNode = mBucketArray[n]; pNode; pNode = static_cast<node_type*>(pNode->mpNext))
+		{
+			if(mEqual(k, extractKey(*pNode)))
+				++result;
+		}
+		return result;
+	}
+
+
+	template <typename K, typename V, typename H, typename Eq, size_t bC, bool bM, bool bU>
+	eastl::pair<typename intrusive_hashtable<K, V, H, Eq, bC, bM, bU>::iterator,
+				typename intrusive_hashtable<K, V, H, Eq, bC, bM, bU>::iterator>
+	intrusive_hashtable<K, V, H, Eq, bC, bM, bU>::equal_range(const key_type& k)
+	{
+		const size_type n     = (size_type)(mHash(k) % kBucketCount);
+		node_type**     head  = mBucketArray + n;
+		node_type*      pNode = DoFindNode(*head, k);
+		extract_key     extractKey; // extract_key is empty and thus this ctor is a no-op.
+
+		if(pNode)
+		{
+			node_type* p1 = static_cast<node_type*>(pNode->mpNext);
+
+			for(; p1; p1 = static_cast<node_type*>(p1->mpNext))
+			{
+				if(!mEqual(k, extractKey(*p1)))
+					break;
+			}
+
+			iterator first(pNode, head);
+			iterator last(p1, head);
+
+			if(!p1)
+				last.increment_bucket();
+
+			return eastl::pair<iterator, iterator>(first, last);
+		}
+
+		return eastl::pair<iterator, iterator>(iterator(mBucketArray + kBucketCount),
+											   iterator(mBucketArray + kBucketCount));
+	}
+
+
+
+
+	template <typename K, typename V, typename H, typename Eq, size_t bC, bool bM, bool bU>
+	eastl::pair<typename intrusive_hashtable<K, V, H, Eq, bC, bM, bU>::const_iterator,
+				typename intrusive_hashtable<K, V, H, Eq, bC, bM, bU>::const_iterator>
+	intrusive_hashtable<K, V, H, Eq, bC, bM, bU>::equal_range(const key_type& k) const
+	{
+		const size_type n     = (size_type)(mHash(k) % kBucketCount);
+		node_type**     head  = const_cast<node_type**>(mBucketArray + n);
+		node_type*      pNode = DoFindNode(*head, k);
+		extract_key     extractKey; // extract_key is empty and thus this ctor is a no-op.
+
+		if(pNode)
+		{
+			node_type* p1 = static_cast<node_type*>(pNode->mpNext);
+
+			for(; p1; p1 = static_cast<node_type*>(p1->mpNext))
+			{
+				if(!mEqual(k, extractKey(*p1)))
+					break;
+			}
+
+			const_iterator first(pNode, head);
+			const_iterator last(p1, head);
+
+			if(!p1)
+				last.increment_bucket();
+
+			return eastl::pair<const_iterator, const_iterator>(first, last);
+		}
+
+		return eastl::pair<const_iterator, const_iterator>(const_iterator(const_cast<node_type**>(mBucketArray) + kBucketCount),
+														   const_iterator(const_cast<node_type**>(mBucketArray) + kBucketCount));
+	}
+
+
+	template <typename K, typename V, typename H, typename Eq, size_t bC, bool bM, bool bU>
+	inline typename intrusive_hashtable<K, V, H, Eq, bC, bM, bU>::node_type* 
+	intrusive_hashtable<K, V, H, Eq, bC, bM, bU>::DoFindNode(node_type* pNode, const key_type& k) const
+	{
+		extract_key extractKey; // extract_key is empty and thus this ctor is a no-op.
+
+		for(; pNode; pNode = static_cast<node_type*>(pNode->mpNext))
+		{
+			if(mEqual(k, extractKey(*pNode)))
+				return pNode;
+		}
+		return NULL;
+	}
+
+
+	template <typename K, typename V, typename H, typename Eq, size_t bC, bool bM, bool bU>
+	template <typename U, typename BinaryPredicate>
+	inline typename intrusive_hashtable<K, V, H, Eq, bC, bM, bU>::node_type* 
+	intrusive_hashtable<K, V, H, Eq, bC, bM, bU>::DoFindNode(node_type* pNode, const U& other, BinaryPredicate predicate) const
+	{
+		extract_key extractKey; // extract_key is empty and thus this ctor is a no-op.
+
+		for(; pNode; pNode = static_cast<node_type*>(pNode->mpNext))
+		{
+			if(predicate(extractKey(*pNode), other)) // Intentionally compare with key as first arg and other as second arg.
+				return pNode;
+		}
+		return NULL;
+	}
+
+
+	template <typename K, typename V, typename H, typename Eq, size_t bC, bool bM, bool bU>
+	eastl::pair<typename intrusive_hashtable<K, V, H, Eq, bC, bM, bU>::iterator, bool>
+	intrusive_hashtable<K, V, H, Eq, bC, bM, bU>::DoInsertValue(value_type& value, true_type) // true_type means bUniqueKeys is true.
+	{
+		// For sets (as opposed to maps), one could argue that all insertions are successful,
+		// as all elements are unique. However, the equal function might not think so.
+		extract_key      extractKey; // extract_key is empty and thus this ctor is a no-op.
+		const size_type  n     = (size_type)(mHash(extractKey(value)) % kBucketCount);
+		node_type* const pNode = DoFindNode(mBucketArray[n], extractKey(value));
+
+		if(pNode == NULL)
+		{
+			value.mpNext = mBucketArray[n];
+			mBucketArray[n] = &value;
+			++mnElementCount;
+
+			return eastl::pair<iterator, bool>(iterator(&value, mBucketArray + n), true);
+		}
+
+		return eastl::pair<iterator, bool>(iterator(pNode, mBucketArray + n), false);
+	}
+
+
+	template <typename K, typename V, typename H, typename Eq, size_t bC, bool bM, bool bU>
+	typename intrusive_hashtable<K, V, H, Eq, bC, bM, bU>::iterator
+	intrusive_hashtable<K, V, H, Eq, bC, bM, bU>::DoInsertValue(value_type& value, false_type) // false_type means bUniqueKeys is false.
+	{
+		extract_key      extractKey; // extract_key is empty and thus this ctor is a no-op.
+		const size_type  n         = (size_type)(mHash(extractKey(value)) % kBucketCount);
+		node_type* const pNodePrev = DoFindNode(mBucketArray[n], extractKey(value));
+
+		if(pNodePrev == NULL)
+		{
+			value.mpNext    = mBucketArray[n];
+			mBucketArray[n] = &value;
+		}
+		else
+		{
+			value.mpNext      = pNodePrev->mpNext;
+			pNodePrev->mpNext = &value;
+		}
+
+		++mnElementCount;
+
+		return iterator(&value, mBucketArray + n);
+	}
+
+
+
+	template <typename K, typename V, typename H, typename Eq, size_t bC, bool bM, bool bU>
+	template <typename InputIterator>
+	inline void intrusive_hashtable<K, V, H, Eq, bC, bM, bU>::insert(InputIterator first, InputIterator last)
+	{
+		for(; first != last; ++first)
+			insert(*first);
+	}
+
+
+	template <typename K, typename V, typename H, typename Eq, size_t bC, bool bM, bool bU>
+	typename intrusive_hashtable<K, V, H, Eq, bC, bM, bU>::iterator
+	intrusive_hashtable<K, V, H, Eq, bC, bM, bU>::erase(const_iterator i)
+	{
+		iterator iNext(i.mpNode, i.mpBucket);
+		++iNext;
+
+		node_type* pNode        =  i.mpNode;
+		node_type* pNodeCurrent = *i.mpBucket;
+
+		if(pNodeCurrent == pNode)
+			*i.mpBucket = static_cast<node_type*>(pNodeCurrent->mpNext);
+		else
+		{
+			// We have a singly-linked list, so we have no choice but to
+			// walk down it till we find the node before the node at 'i'.
+			node_type* pNodeNext = static_cast<node_type*>(pNodeCurrent->mpNext);
+
+			while(pNodeNext != pNode)
+			{
+				pNodeCurrent = pNodeNext;
+				pNodeNext    = static_cast<node_type*>(pNodeCurrent->mpNext);
+			}
+
+			pNodeCurrent->mpNext = static_cast<node_type*>(pNodeNext->mpNext);
+		}
+
+		// To consider: In debug builds set the node mpNext to NULL.
+		--mnElementCount;
+
+		return iNext;
+	}
+
+
+	template <typename K, typename V, typename H, typename Eq, size_t bC, bool bM, bool bU>
+	inline typename intrusive_hashtable<K, V, H, Eq, bC, bM, bU>::iterator
+	intrusive_hashtable<K, V, H, Eq, bC, bM, bU>::erase(const_iterator first, const_iterator last)
+	{
+		while(first != last)
+			first = erase(first);
+		return iterator(first.mpNode, first.mpBucket);
+	}
+
+
+	template <typename K, typename V, typename H, typename Eq, size_t bC, bool bM, bool bU>
+	typename intrusive_hashtable<K, V, H, Eq, bC, bM, bU>::size_type 
+	intrusive_hashtable<K, V, H, Eq, bC, bM, bU>::erase(const key_type& k)
+	{
+		const size_type n = (size_type)(mHash(k) % kBucketCount);
+		const size_type nElementCountSaved = mnElementCount;
+		node_type*&     pNodeBase = mBucketArray[n];
+		extract_key     extractKey; // extract_key is empty and thus this ctor is a no-op.
+
+		// Note by Paul Pedriana:
+		// We have two loops here, and I'm not finding any easy way to having just one
+		// loop without changing the requirements of the hashtable node definition. 
+		// It's a problem of taking an address of a variable and converting it to the 
+		// address of another type without knowing what that type is. Perhaps I'm a 
+		// little overly tired, so if there is a simple solution I am probably missing it.
+
+		while(pNodeBase && mEqual(k, extractKey(*pNodeBase)))
+		{
+			pNodeBase = static_cast<node_type*>(pNodeBase->mpNext);
+			--mnElementCount;
+		}
+
+		node_type* pNodePrev = pNodeBase;
+
+		if(pNodePrev)
+		{
+			node_type* pNodeCur;
+
+			while((pNodeCur = static_cast<node_type*>(pNodePrev->mpNext)) != NULL)
+			{
+				if(mEqual(k, extractKey(*pNodeCur)))
+				{
+					pNodePrev->mpNext = static_cast<node_type*>(pNodeCur->mpNext);
+					--mnElementCount; // To consider: In debug builds set the node mpNext to NULL.
+				}
+				else
+					pNodePrev = static_cast<node_type*>(pNodePrev->mpNext);
+			}
+		}
+
+		return nElementCountSaved - mnElementCount;
+	}
+
+
+	template <typename K, typename V, typename H, typename Eq, size_t bC, bool bM, bool bU>
+	inline typename intrusive_hashtable<K, V, H, Eq, bC, bM, bU>::iterator
+	intrusive_hashtable<K, V, H, Eq, bC, bM, bU>::remove(value_type& value)
+	{
+		extract_key     extractKey; // extract_key is empty and thus this ctor is a no-op.
+		const size_type n = (size_type)(mHash(extractKey(value)) % kBucketCount);
+
+		return erase(iterator(&value, &mBucketArray[n]));
+	}
+
+
+	template <typename K, typename V, typename H, typename Eq, size_t bC, bool bM, bool bU>
+	inline void intrusive_hashtable<K, V, H, Eq, bC, bM, bU>::clear()
+	{
+		// To consider: In debug builds set the node mpNext to NULL.
+		memset(mBucketArray, 0, kBucketCount * sizeof(mBucketArray[0]));
+		mnElementCount = 0;
+	}
+
+
+	template <typename K, typename V, typename H, typename Eq, size_t bC, bool bM, bool bU>
+	inline bool intrusive_hashtable<K, V, H, Eq, bC, bM, bU>::validate() const
+	{
+		// Verify that the element count matches mnElementCount. 
+		size_type nElementCount = 0;
+
+		for(const_iterator temp = begin(), tempEnd = end(); temp != tempEnd; ++temp)
+			++nElementCount;
+
+		if(nElementCount != mnElementCount)
+			return false;
+
+		// To do: Verify that individual elements are in the expected buckets.
+
+		return true;
+	}
+
+
+	template <typename K, typename V, typename H, typename Eq, size_t bC, bool bM, bool bU>
+	int intrusive_hashtable<K, V, H, Eq, bC, bM, bU>::validate_iterator(const_iterator i) const
+	{
+		// To do: Come up with a more efficient mechanism of doing this.
+
+		for(const_iterator temp = begin(), tempEnd = end(); temp != tempEnd; ++temp)
+		{
+			if(temp == i)
+				return (isf_valid | isf_current | isf_can_dereference);
+		}
+
+		if(i == end())
+			return (isf_valid | isf_current); 
+
+		return isf_none;
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// global operators
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename K, typename V, typename H, typename Eq, size_t bC, bool bM, bool bU>
+	inline bool operator==(const intrusive_hashtable<K, V, H, Eq, bC, bM, bU>& a, 
+						   const intrusive_hashtable<K, V, H, Eq, bC, bM, bU>& b)
+	{
+		return (a.size() == b.size()) && eastl::equal(a.begin(), a.end(), b.begin());
+	}
+
+
+	template <typename K, typename V, typename H, typename Eq, size_t bC, bool bM, bool bU>
+	inline bool operator!=(const intrusive_hashtable<K, V, H, Eq, bC, bM, bU>& a, 
+						   const intrusive_hashtable<K, V, H, Eq, bC, bM, bU>& b)
+	{
+		return !(a == b);
+	}
+
+
+	// Comparing hash tables for less-ness is an odd thing to do. We provide it for 
+	// completeness, though the user is advised to be wary of how they use this.
+	template <typename K, typename V, typename H, typename Eq, size_t bC, bool bM, bool bU>
+	inline bool operator<(const intrusive_hashtable<K, V, H, Eq, bC, bM, bU>& a, 
+						  const intrusive_hashtable<K, V, H, Eq, bC, bM, bU>& b)
+	{
+		// This requires hash table elements to support operator<. Since the hash table
+		// doesn't compare elements via less (it does so via equals), we must use the 
+		// globally defined operator less for the elements.
+		return eastl::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end());
+	}
+
+
+	template <typename K, typename V, typename H, typename Eq, size_t bC, bool bM, bool bU>
+	inline bool operator>(const intrusive_hashtable<K, V, H, Eq, bC, bM, bU>& a, 
+						  const intrusive_hashtable<K, V, H, Eq, bC, bM, bU>& b)
+	{
+		return b < a;
+	}
+
+
+	template <typename K, typename V, typename H, typename Eq, size_t bC, bool bM, bool bU>
+	inline bool operator<=(const intrusive_hashtable<K, V, H, Eq, bC, bM, bU>& a, 
+						   const intrusive_hashtable<K, V, H, Eq, bC, bM, bU>& b)
+	{
+		return !(b < a);
+	}
+
+
+	template <typename K, typename V, typename H, typename Eq, size_t bC, bool bM, bool bU>
+	inline bool operator>=(const intrusive_hashtable<K, V, H, Eq, bC, bM, bU>& a, 
+						   const intrusive_hashtable<K, V, H, Eq, bC, bM, bU>& b)
+	{
+		return !(a < b);
+	}
+
+
+	template <typename K, typename V, typename H, typename Eq, size_t bC, bool bM, bool bU>
+	inline void swap(const intrusive_hashtable<K, V, H, Eq, bC, bM, bU>& a, 
+					 const intrusive_hashtable<K, V, H, Eq, bC, bM, bU>& b)
+	{
+		a.swap(b);
+	}
+
+
+} // namespace eastl
+
+
+
+#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/internal/mem_fn.h b/libkram/eastl/include/EASTL/internal/mem_fn.h
new file mode 100644
index 00000000..1d3e7b3f
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/mem_fn.h
@@ -0,0 +1,304 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_INTERNAL_MEM_FN_H
+#define EASTL_INTERNAL_MEM_FN_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+#pragma once
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+// The code in this file is a modification of the libcxx implementation.  We copy
+// the license information here as required.
+//
+// We implement only enough of mem_fn to implement eastl::function.
+////////////////////////////////////////////////////////////////////////////////
+
+//===------------------------ functional ----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+namespace eastl
+{
+	//
+	// apply_cv
+	//
+	template <class T, class U,
+			  bool = is_const<typename remove_reference<T>::type>::value,
+			  bool = is_volatile<typename remove_reference<T>::type>::value>
+	struct apply_cv { typedef U type; };
+
+	template <class T, class U> struct apply_cv<T, U, true, false>   { typedef const U type; };
+	template <class T, class U> struct apply_cv<T, U, false, true>   { typedef volatile U type; };
+	template <class T, class U> struct apply_cv<T, U, true, true>    { typedef const volatile U type; };
+	template <class T, class U> struct apply_cv<T&, U, false, false> { typedef U& type; };
+	template <class T, class U> struct apply_cv<T&, U, true, false>  { typedef const U& type; };
+	template <class T, class U> struct apply_cv<T&, U, false, true>  { typedef volatile U& type; };
+	template <class T, class U> struct apply_cv<T&, U, true, true>   { typedef const volatile U& type; };
+
+
+
+	//
+	// has_result_type
+	//
+	template <class T>
+	struct has_result_type
+	{
+	private:
+		template <class U>
+		static eastl::no_type test(...);
+
+		template <class U>
+		static eastl::yes_type test(typename U::result_type* = 0);
+
+	public:
+		static const bool value = sizeof(test<T>(0)) == sizeof(eastl::yes_type);
+	};
+
+
+
+	//
+	// derives_from_unary_function
+	// derives_from_binary_function
+	//
+	template <class T>
+	struct derives_from_unary_function
+	{
+	private:
+		static eastl::no_type test(...);
+
+		template <class A, class R>
+		static unary_function<A, R> test(const volatile unary_function<A, R>*);
+
+	public:
+		static const bool value = !is_same<decltype(test((T*)0)), eastl::no_type>::value;
+		typedef decltype(test((T*)0)) type;
+	};
+
+	template <class T>
+	struct derives_from_binary_function
+	{
+	private:
+		static eastl::no_type test(...);
+		template <class A1, class A2, class R>
+		static binary_function<A1, A2, R> test(const volatile binary_function<A1, A2, R>*);
+
+	public:
+		static const bool value = !is_same<decltype(test((T*)0)), eastl::no_type>::value;
+		typedef decltype(test((T*)0)) type;
+	};
+
+
+
+	//
+	// maybe_derives_from_unary_function
+	// maybe_derives_from_binary_function
+	//
+	template <class T, bool = derives_from_unary_function<T>::value>
+	struct maybe_derive_from_unary_function // bool is true
+		: public derives_from_unary_function<T>::type { };
+
+	template <class T>
+	struct maybe_derive_from_unary_function<T, false> { };
+
+	template <class T, bool = derives_from_binary_function<T>::value>
+	struct maybe_derive_from_binary_function // bool is true
+		: public derives_from_binary_function<T>::type { };
+
+	template <class T>
+	struct maybe_derive_from_binary_function<T, false> { };
+
+
+
+	//
+	// weak_result_type_imp
+	//
+	template <class T, bool = has_result_type<T>::value>
+	struct weak_result_type_imp // bool is true
+		: public maybe_derive_from_unary_function<T>,
+		  public maybe_derive_from_binary_function<T>
+	{
+		typedef typename T::result_type result_type;
+	};
+
+	template <class T>
+	struct weak_result_type_imp<T, false> : public maybe_derive_from_unary_function<T>,
+											public maybe_derive_from_binary_function<T> { };
+
+
+
+	//
+	// weak_result_type
+	//
+	template <class T>
+	struct weak_result_type : public weak_result_type_imp<T> { };
+
+	// 0 argument case 
+	template <class R> struct weak_result_type<R()> { typedef R result_type; }; 
+	template <class R> struct weak_result_type<R(&)()> { typedef R result_type; }; 
+	template <class R> struct weak_result_type<R (*)()> { typedef R result_type; }; 
+
+	// 1 argument case 
+	template <class R, class A1> struct weak_result_type<R(A1)> : public unary_function<A1, R> { }; 
+	template <class R, class A1> struct weak_result_type<R(&)(A1)> : public unary_function<A1, R> { }; 
+	template <class R, class A1> struct weak_result_type<R (*)(A1)> : public unary_function<A1, R> { }; 
+	template <class R, class C> struct weak_result_type<R (C::*)()> : public unary_function<C*, R> { }; 
+	template <class R, class C> struct weak_result_type<R (C::*)() const> : public unary_function<const C*, R> { }; 
+	template <class R, class C> struct weak_result_type<R (C::*)() volatile> : public unary_function<volatile C*, R> { }; 
+	template <class R, class C> struct weak_result_type<R (C::*)() const volatile> : public unary_function<const volatile C*, R> { };
+
+	// 2 argument case 
+	template <class R, class A1, class A2> struct weak_result_type<R(A1, A2)> : public binary_function<A1, A2, R> { }; 
+	template <class R, class A1, class A2> struct weak_result_type<R (*)(A1, A2)> : public binary_function<A1, A2, R> { }; 
+	template <class R, class A1, class A2> struct weak_result_type<R(&)(A1, A2)> : public binary_function<A1, A2, R> { }; 
+	template <class R, class C, class A1> struct weak_result_type<R (C::*)(A1)> : public binary_function<C*, A1, R> { }; 
+	template <class R, class C, class A1> struct weak_result_type<R (C::*)(A1) const> : public binary_function<const C*, A1, R> { }; 
+	template <class R, class C, class A1> struct weak_result_type<R (C::*)(A1) volatile> : public binary_function<volatile C*, A1, R> { }; 
+	template <class R, class C, class A1> struct weak_result_type<R (C::*)(A1) const volatile> : public binary_function<const volatile C*, A1, R> { };
+
+	// 3 or more arguments
+#if EASTL_VARIADIC_TEMPLATES_ENABLED 
+	template <class R, class A1, class A2, class A3, class... A4> struct weak_result_type<R(A1, A2, A3, A4...)> { typedef R result_type; }; 
+	template <class R, class A1, class A2, class A3, class... A4> struct weak_result_type<R(&)(A1, A2, A3, A4...)> { typedef R result_type; }; 
+	template <class R, class A1, class A2, class A3, class... A4> struct weak_result_type<R (*)(A1, A2, A3, A4...)> { typedef R result_type; }; 
+	template <class R, class C, class A1, class A2, class... A3> struct weak_result_type<R (C::*)(A1, A2, A3...)> { typedef R result_type; }; 
+	template <class R, class C, class A1, class A2, class... A3> struct weak_result_type<R (C::*)(A1, A2, A3...) const> { typedef R result_type; }; 
+	template <class R, class C, class A1, class A2, class... A3> struct weak_result_type<R (C::*)(A1, A2, A3...) volatile> { typedef R result_type; }; 
+	template <class R, class C, class A1, class A2, class... A3> struct weak_result_type<R (C::*)(A1, A2, A3...) const volatile> { typedef R result_type; };
+#endif
+
+	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+	// mem_fn_impl
+	//
+	template <class T>
+	class mem_fn_impl 
+#if defined(_MSC_VER) && (_MSC_VER >= 1900)  // VS2015 or later
+		// Due to a (seemingly random) internal compiler error on VS2013 we disable eastl::unary_function and
+		// binary_function support for eastl::mem_fn as its not widely (if at all) used.  If you require this support
+		// on VS2013 or below please contact us.
+		: public weak_result_type<T>
+#endif
+	{
+	public:
+		typedef T type;
+
+	private:
+		type func;
+
+	public:
+		EASTL_FORCE_INLINE mem_fn_impl(type _func) : func(_func) {}
+
+#if EASTL_VARIADIC_TEMPLATES_ENABLED
+	    template <class... ArgTypes>
+	    typename invoke_result<type, ArgTypes...>::type operator()(ArgTypes&&... args) const
+	    {
+		    return invoke(func, eastl::forward<ArgTypes>(args)...);
+	    }
+#else
+	    typename invoke_result<type>::type operator()() const { return invoke_impl(func); }
+
+	    template <class A0>
+	    typename invoke_result0<type, A0>::type operator()(A0& a0) const
+	    {
+		    return invoke(func, a0);
+	    }
+
+	    template <class A0, class A1>
+	    typename invoke_result1<type, A0, A1>::type operator()(A0& a0, A1& a1) const
+	    {
+		    return invoke(func, a0, a1);
+	    }
+
+	    template <class A0, class A1, class A2>
+	    typename invoke_result2<type, A0, A1, A2>::type operator()(A0& a0, A1& a1, A2& a2) const
+	    {
+		    return invoke(func, a0, a1, a2);
+	    }
+#endif
+    };  // mem_fn_impl
+
+
+
+	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+	// mem_fn -> mem_fn_impl adapters
+	//
+	template <class R, class T>
+	EASTL_FORCE_INLINE mem_fn_impl<R T::*> mem_fn(R T::*pm)
+	{ return mem_fn_impl<R T::*>(pm); }
+
+	template <class R, class T>
+	EASTL_FORCE_INLINE mem_fn_impl<R (T::*)()> mem_fn(R (T::*pm)())
+	{ return mem_fn_impl<R (T::*)()>(pm); }
+
+	template <class R, class T, class A0>
+	EASTL_FORCE_INLINE mem_fn_impl<R (T::*)(A0)> mem_fn(R (T::*pm)(A0))
+	{ return mem_fn_impl<R (T::*)(A0)>(pm); } 
+
+	template <class R, class T, class A0, class A1>
+	EASTL_FORCE_INLINE mem_fn_impl<R (T::*)(A0, A1)> mem_fn(R (T::*pm)(A0, A1))
+	{ return mem_fn_impl<R (T::*)(A0, A1)>(pm); }
+
+	template <class R, class T, class A0, class A1, class A2>
+	EASTL_FORCE_INLINE mem_fn_impl<R (T::*)(A0, A1, A2)> mem_fn(R (T::*pm)(A0, A1, A2))
+	{ return mem_fn_impl<R (T::*)(A0, A1, A2)>(pm); }
+
+	template <class R, class T>
+	EASTL_FORCE_INLINE mem_fn_impl<R (T::*)() const> mem_fn(R (T::*pm)() const)
+	{ return mem_fn_impl<R (T::*)() const>(pm); }
+
+	template <class R, class T, class A0>
+	EASTL_FORCE_INLINE mem_fn_impl<R (T::*)(A0) const> mem_fn(R (T::*pm)(A0) const)
+	{ return mem_fn_impl<R (T::*)(A0) const>(pm); }
+
+	template <class R, class T, class A0, class A1>
+	EASTL_FORCE_INLINE mem_fn_impl<R (T::*)(A0, A1) const> mem_fn(R (T::*pm)(A0, A1) const)
+	{ return mem_fn_impl<R (T::*)(A0, A1) const>(pm); }
+
+	template <class R, class T, class A0, class A1, class A2>
+	EASTL_FORCE_INLINE mem_fn_impl<R (T::*)(A0, A1, A2) const> mem_fn(R (T::*pm)(A0, A1, A2) const)
+	{ return mem_fn_impl<R (T::*)(A0, A1, A2) const>(pm); }
+
+	template <class R, class T>
+	EASTL_FORCE_INLINE mem_fn_impl<R (T::*)() volatile> mem_fn(R (T::*pm)() volatile)
+	{ return mem_fn_impl<R (T::*)() volatile>(pm); }
+
+	template <class R, class T, class A0>
+	EASTL_FORCE_INLINE mem_fn_impl<R (T::*)(A0) volatile> mem_fn(R (T::*pm)(A0) volatile)
+	{ return mem_fn_impl<R (T::*)(A0) volatile>(pm); }
+
+	template <class R, class T, class A0, class A1>
+	EASTL_FORCE_INLINE mem_fn_impl<R (T::*)(A0, A1) volatile> mem_fn(R (T::*pm)(A0, A1) volatile)
+	{ return mem_fn_impl<R (T::*)(A0, A1) volatile>(pm); }
+
+	template <class R, class T, class A0, class A1, class A2>
+	EASTL_FORCE_INLINE mem_fn_impl<R (T::*)(A0, A1, A2) volatile> mem_fn(R (T::*pm)(A0, A1, A2) volatile)
+	{ return mem_fn_impl<R (T::*)(A0, A1, A2) volatile>(pm); }
+
+	template <class R, class T>
+	EASTL_FORCE_INLINE mem_fn_impl<R (T::*)() const volatile> mem_fn(R (T::*pm)() const volatile)
+	{ return mem_fn_impl<R (T::*)() const volatile>(pm); }
+
+	template <class R, class T, class A0>
+	EASTL_FORCE_INLINE mem_fn_impl<R (T::*)(A0) const volatile> mem_fn(R (T::*pm)(A0) const volatile)
+	{ return mem_fn_impl<R (T::*)(A0) const volatile>(pm); }
+
+	template <class R, class T, class A0, class A1>
+	EASTL_FORCE_INLINE mem_fn_impl<R (T::*)(A0, A1) const volatile> mem_fn(R (T::*pm)(A0, A1) const volatile)
+	{ return mem_fn_impl<R (T::*)(A0, A1) const volatile>(pm); }
+
+	template <class R, class T, class A0, class A1, class A2>
+	EASTL_FORCE_INLINE mem_fn_impl<R (T::*)(A0, A1, A2) const volatile> mem_fn(R (T::*pm)(A0, A1, A2) const volatile)
+	{ return mem_fn_impl<R (T::*)(A0, A1, A2) const volatile>(pm); }
+
+} // namespace eastl
+
+#endif // EASTL_INTERNAL_MEM_FN_H
diff --git a/libkram/eastl/include/EASTL/internal/memory_base.h b/libkram/eastl/include/EASTL/internal/memory_base.h
new file mode 100644
index 00000000..b1c3490b
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/memory_base.h
@@ -0,0 +1,37 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_INTERNAL_MEMORY_BASE_H
+#define EASTL_INTERNAL_MEMORY_BASE_H
+
+#include <EASTL/internal/config.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+////////////////////////////////////////////////////////////////////////////////////////////
+// This file contains basic functionality found in the standard library 'memory' header that
+// have limited or no dependencies.  This allows us to utilize these utilize these functions
+// in other EASTL code while avoid circular dependencies.
+////////////////////////////////////////////////////////////////////////////////////////////
+
+namespace eastl
+{
+	/// addressof
+	///
+	/// From the C++11 Standard, section 20.6.12.1
+	/// Returns the actual address of the object or function referenced by r, even in the presence of an overloaded operator&.
+	///
+	template<typename T>
+	T* addressof(T& value) EA_NOEXCEPT
+	{
+		return reinterpret_cast<T*>(&const_cast<char&>(reinterpret_cast<const volatile char&>(value)));
+	}
+
+} // namespace eastl
+
+#endif // EASTL_INTERNAL_MEMORY_BASE_H
+
diff --git a/libkram/eastl/include/EASTL/internal/move_help.h b/libkram/eastl/include/EASTL/internal/move_help.h
new file mode 100644
index 00000000..97990df6
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/move_help.h
@@ -0,0 +1,162 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_INTERNAL_MOVE_HELP_H
+#define EASTL_INTERNAL_MOVE_HELP_H
+
+
+#include <EABase/eabase.h>
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+#include <EASTL/internal/config.h>
+#include <EASTL/type_traits.h>
+
+
+// C++11's rvalue references aren't supported by earlier versions of C++.
+// It turns out that in a number of cases under earlier C++ versions we can 
+// write code that uses rvalues similar to lvalues. We have macros below for
+// such cases. For example, eastl::move (same as std::move) can be treated
+// as a no-op under C++03, though with the consequence that move functionality
+// isn't taken advantage of.
+
+
+/// EASTL_MOVE
+/// Acts like eastl::move when possible. Same as C++11 std::move.
+///
+/// EASTL_MOVE_INLINE
+/// Acts like eastl::move but is implemented inline instead of a function call.
+/// This allows code to be faster in debug builds in particular.
+/// Depends on C++ compiler decltype support or a similar extension.
+///
+/// EASTL_FORWARD
+/// Acts like eastl::forward when possible. Same as C++11 std::forward.
+///
+/// EASTL_FORWARD_INLINE
+/// Acts like eastl::forward but is implemented inline instead of a function call.
+/// This allows code to be faster in debug builds in particular.
+///
+#define EASTL_MOVE(x)              eastl::move(x)
+#if !defined(EA_COMPILER_NO_DECLTYPE)
+	#define EASTL_MOVE_INLINE(x)   static_cast<typename eastl::remove_reference<decltype(x)>::type&&>(x)
+#elif defined(__GNUC__)
+	#define EASTL_MOVE_INLINE(x)   static_cast<typename eastl::remove_reference<__typeof__(x)>::type&&>(x)
+#else
+	#define EASTL_MOVE_INLINE(x)   eastl::move(x)
+#endif
+
+#define EASTL_FORWARD(T, x)        eastl::forward<T>(x)
+#define EASTL_FORWARD_INLINE(T, x) eastl::forward<T>(x)  // Need to investigate how to properly make a macro for this. (eastl::is_reference<T>::value ? static_cast<T&&>(static_cast<T&>(x)) : static_cast<T&&>(x))
+
+
+
+
+/// EASTL_MOVE_RANGE
+/// Acts like the eastl::move algorithm when possible. Same as C++11 std::move.
+/// Note to be confused with the single argument move: (typename remove_reference<T>::type&& move(T&& x))
+/// http://en.cppreference.com/w/cpp/algorithm/move
+/// http://en.cppreference.com/w/cpp/algorithm/move_backward
+///
+#define EASTL_MOVE_RANGE(first, last, result)             eastl::move(first, last, result)
+#define EASTL_MOVE_BACKWARD_RANGE(first, last, resultEnd) eastl::move_backward(first, last, resultEnd)
+
+
+namespace eastl
+{
+	// forward
+	//
+	// forwards the argument to another function exactly as it was passed to the calling function.
+	// Not to be confused with move, this is specifically for echoing templated argument types
+	// to another function. move is specifically about making a type be an rvalue reference (i.e. movable) type.
+	//
+	// Example usage:
+	//     template <class T>
+	//     void WrapperFunction(T&& arg)
+	//         { foo(eastl::forward<T>(arg)); } 
+	//
+	//     template <class... Args>
+	//     void WrapperFunction(Args&&... args)
+	//         { foo(eastl::forward<Args>(args)...); } 
+	//
+	// See the C++ Standard, section 20.2.3
+	// http://en.cppreference.com/w/cpp/utility/forward
+	//
+	template <typename T>
+	EA_CPP14_CONSTEXPR T&& forward(typename eastl::remove_reference<T>::type& x) EA_NOEXCEPT
+	{
+		return static_cast<T&&>(x);
+	}
+
+
+	template <typename T>
+	EA_CPP14_CONSTEXPR T&& forward(typename eastl::remove_reference<T>::type&& x) EA_NOEXCEPT
+	{
+		static_assert(!is_lvalue_reference<T>::value, "forward T isn't lvalue reference");
+		return static_cast<T&&>(x);
+	}
+
+
+	// move
+	//
+	// move obtains an rvalue reference to its argument and converts it to an xvalue.
+	// Returns, by definition: static_cast<typename remove_reference<T>::type&&>(t).
+	// The primary use of this is to pass a move'd type to a function which takes T&&,
+	// and thus select that function instead of (e.g.) a function which takes T or T&.
+	// See the C++ Standard, section 20.2.3
+	// http://en.cppreference.com/w/cpp/utility/move
+	//
+	template <typename T>
+	EA_CPP14_CONSTEXPR typename eastl::remove_reference<T>::type&&
+	move(T&& x) EA_NOEXCEPT
+	{
+		return static_cast<typename eastl::remove_reference<T>::type&&>(x);
+	}
+
+
+	// move_if_noexcept
+	//
+	// Returns T&& if move-constructing T throws no exceptions. Instead returns const T& if
+	// move-constructing T throws exceptions or has no accessible copy constructor.
+	// The purpose of this is to use automatically use copy construction instead of move 
+	// construction when the move may possible throw an exception.
+	// See the C++ Standard, section 20.2.3
+	// http://en.cppreference.com/w/cpp/utility/move_if_noexcept
+	//
+	#if EASTL_EXCEPTIONS_ENABLED
+		template <typename T> 
+		EA_CPP14_CONSTEXPR typename eastl::conditional<!eastl::is_nothrow_move_constructible<T>::value && 
+														eastl::is_copy_constructible<T>::value, const T&, T&&>::type 
+		move_if_noexcept(T& x) EA_NOEXCEPT
+		{
+			return eastl::move(x);
+		}
+	#else
+		template <typename T> 
+		EA_CPP14_CONSTEXPR T&&
+		move_if_noexcept(T& x) EA_NOEXCEPT
+		{
+			return eastl::move(x);
+		}
+	#endif
+
+} // namespace eastl
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/internal/pair_fwd_decls.h b/libkram/eastl/include/EASTL/internal/pair_fwd_decls.h
new file mode 100644
index 00000000..a716482d
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/pair_fwd_decls.h
@@ -0,0 +1,16 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_PAIR_FWD_DECLS_H
+#define EASTL_PAIR_FWD_DECLS_H
+
+#include <EASTL/internal/config.h>
+
+namespace eastl
+{
+	template <typename T1, typename T2>
+	struct pair;
+}
+
+#endif // EASTL_PAIR_FWD_DECLS_H
diff --git a/libkram/eastl/include/EASTL/internal/piecewise_construct_t.h b/libkram/eastl/include/EASTL/internal/piecewise_construct_t.h
new file mode 100644
index 00000000..d853f0ea
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/piecewise_construct_t.h
@@ -0,0 +1,46 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_INTERNAL_PIECEWISE_CONSTRUCT_T_H
+#define EASTL_INTERNAL_PIECEWISE_CONSTRUCT_T_H
+
+
+#include <EABase/eabase.h>
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+namespace eastl
+{
+	///////////////////////////////////////////////////////////////////////////////
+	/// piecewise_construct_t
+	///
+	/// http://en.cppreference.com/w/cpp/utility/piecewise_construct_t
+	///
+	struct piecewise_construct_t
+	{
+		explicit piecewise_construct_t() = default;
+	};
+
+
+	///////////////////////////////////////////////////////////////////////////////
+	/// piecewise_construct
+	/// 
+	/// A tag type used to disambiguate between function overloads that take two tuple arguments.
+	///
+	/// http://en.cppreference.com/w/cpp/utility/piecewise_construct
+	///
+	EA_CONSTEXPR piecewise_construct_t piecewise_construct = eastl::piecewise_construct_t();
+
+} // namespace eastl
+
+
+#endif // Header include guard
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/internal/red_black_tree.h b/libkram/eastl/include/EASTL/internal/red_black_tree.h
new file mode 100644
index 00000000..7448bd42
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/red_black_tree.h
@@ -0,0 +1,2400 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_RED_BLACK_TREE_H
+#define EASTL_RED_BLACK_TREE_H
+
+
+#include <EABase/eabase.h>
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+#include <EASTL/internal/config.h>
+#include <EASTL/type_traits.h>
+#include <EASTL/allocator.h>
+#include <EASTL/iterator.h>
+#include <EASTL/utility.h>
+#include <EASTL/algorithm.h>
+#include <EASTL/initializer_list.h>
+#include <EASTL/tuple.h>
+
+EA_DISABLE_ALL_VC_WARNINGS()
+#include <new>
+#include <stddef.h>
+EA_RESTORE_ALL_VC_WARNINGS()
+
+
+// 4512 - 'class' : assignment operator could not be generated
+// 4530 - C++ exception handler used, but unwind semantics are not enabled. Specify /EHsc
+// 4571 - catch(...) semantics changed since Visual C++ 7.1; structured exceptions (SEH) are no longer caught.
+EA_DISABLE_VC_WARNING(4512 4530 4571);
+
+
+namespace eastl
+{
+
+	/// EASTL_RBTREE_DEFAULT_NAME
+	///
+	/// Defines a default container name in the absence of a user-provided name.
+	///
+	#ifndef EASTL_RBTREE_DEFAULT_NAME
+		#define EASTL_RBTREE_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " rbtree" // Unless the user overrides something, this is "EASTL rbtree".
+	#endif
+
+
+	/// EASTL_RBTREE_DEFAULT_ALLOCATOR
+	///
+	#ifndef EASTL_RBTREE_DEFAULT_ALLOCATOR
+		#define EASTL_RBTREE_DEFAULT_ALLOCATOR allocator_type(EASTL_RBTREE_DEFAULT_NAME)
+	#endif
+
+
+	/// EASTL_RBTREE_LEGACY_SWAP_BEHAVIOUR_REQUIRES_COPY_CTOR
+	///
+	#ifndef EASTL_RBTREE_LEGACY_SWAP_BEHAVIOUR_REQUIRES_COPY_CTOR
+		#define EASTL_RBTREE_LEGACY_SWAP_BEHAVIOUR_REQUIRES_COPY_CTOR 0
+	#endif
+
+
+	/// RBTreeColor
+	///
+	enum RBTreeColor
+	{
+		kRBTreeColorRed,
+		kRBTreeColorBlack
+	};
+
+
+
+	/// RBTreeColor
+	///
+	enum RBTreeSide
+	{
+		kRBTreeSideLeft,
+		kRBTreeSideRight
+	};
+
+
+
+	/// rbtree_node_base
+	///
+	/// We define a rbtree_node_base separately from rbtree_node (below), because it 
+	/// allows us to have non-templated operations, and it makes it so that the 
+	/// rbtree anchor node doesn't carry a T with it, which would waste space and 
+	/// possibly lead to surprising the user due to extra Ts existing that the user 
+	/// didn't explicitly create. The downside to all of this is that it makes debug 
+	/// viewing of an rbtree harder, given that the node pointers are of type 
+	/// rbtree_node_base and not rbtree_node.
+	///
+	struct rbtree_node_base
+	{
+		typedef rbtree_node_base this_type;
+
+	public:
+		this_type* mpNodeRight;  // Declared first because it is used most often.
+		this_type* mpNodeLeft;
+		this_type* mpNodeParent;
+		char       mColor;       // We only need one bit here, would be nice if we could stuff that bit somewhere else.
+	};
+
+
+	/// rbtree_node
+	///
+	template <typename Value>
+	struct rbtree_node : public rbtree_node_base
+	{
+		Value mValue; // For set and multiset, this is the user's value, for map and multimap, this is a pair of key/value.
+
+		// This type is never constructed, so to avoid a MSVC warning we "delete" the copy constructor.
+		//
+		// Potentially we could provide a constructor that would satisfy the compiler and change the code to use this constructor
+		// instead of constructing mValue in place within an unconstructed rbtree_node.
+		#if defined(_MSC_VER)
+			rbtree_node(const rbtree_node&) = delete;
+		#endif
+	};
+
+
+
+
+	// rbtree_node_base functions
+	//
+	// These are the fundamental functions that we use to maintain the 
+	// tree. The bulk of the work of the tree maintenance is done in 
+	// these functions.
+	//
+	EASTL_API rbtree_node_base* RBTreeIncrement    (const rbtree_node_base* pNode);
+	EASTL_API rbtree_node_base* RBTreeDecrement    (const rbtree_node_base* pNode);
+	EASTL_API rbtree_node_base* RBTreeGetMinChild  (const rbtree_node_base* pNode);
+	EASTL_API rbtree_node_base* RBTreeGetMaxChild  (const rbtree_node_base* pNode);
+	EASTL_API size_t            RBTreeGetBlackCount(const rbtree_node_base* pNodeTop,
+													const rbtree_node_base* pNodeBottom);
+	EASTL_API void              RBTreeInsert       (      rbtree_node_base* pNode,
+														  rbtree_node_base* pNodeParent, 
+														  rbtree_node_base* pNodeAnchor,
+														  RBTreeSide insertionSide);
+	EASTL_API void              RBTreeErase        (      rbtree_node_base* pNode,
+														  rbtree_node_base* pNodeAnchor); 
+
+
+
+
+
+
+
+	/// rbtree_iterator
+	///
+	template <typename T, typename Pointer, typename Reference>
+	struct rbtree_iterator
+	{
+		typedef rbtree_iterator<T, Pointer, Reference>      this_type;
+		typedef rbtree_iterator<T, T*, T&>                  iterator;
+		typedef rbtree_iterator<T, const T*, const T&>      const_iterator;
+		typedef eastl_size_t                                size_type;     // See config.h for the definition of eastl_size_t, which defaults to size_t.
+		typedef ptrdiff_t                                   difference_type;
+		typedef T                                           value_type;
+		typedef rbtree_node_base                            base_node_type;
+		typedef rbtree_node<T>                              node_type;
+		typedef Pointer                                     pointer;
+		typedef Reference                                   reference;
+		typedef EASTL_ITC_NS::bidirectional_iterator_tag    iterator_category;
+
+	public:
+		node_type* mpNode;
+
+	public:
+		rbtree_iterator();
+		explicit rbtree_iterator(const node_type* pNode);
+		rbtree_iterator(const iterator& x);
+
+		reference operator*() const;
+		pointer   operator->() const;
+
+		rbtree_iterator& operator++();
+		rbtree_iterator  operator++(int);
+
+		rbtree_iterator& operator--();
+		rbtree_iterator  operator--(int);
+
+	}; // rbtree_iterator
+
+
+	///////////////////////////////////////////////////////////////////////////////
+	// rb_base_compare_ebo
+	//
+	// Utilizes the "empty base-class optimization" to reduce the size of the rbtree
+	// when its Compare template argument is an empty class.
+	///////////////////////////////////////////////////////////////////////////////
+
+	template <typename Compare, bool /*isEmpty*/ = is_empty<Compare>::value>
+	struct rb_base_compare_ebo
+	{
+	protected:
+		rb_base_compare_ebo() : mCompare() {}
+		rb_base_compare_ebo(const Compare& compare) : mCompare(compare) {}
+
+		Compare& get_compare() { return mCompare; }
+		const Compare& get_compare() const { return mCompare; }
+
+		template <typename T>
+		bool compare(const T& lhs, const T& rhs) 
+		{
+			return mCompare(lhs, rhs);
+		}
+
+		template <typename T>
+		bool compare(const T& lhs, const T& rhs) const
+		{
+			return mCompare(lhs, rhs);
+		}
+
+	private:
+		Compare mCompare;
+	};
+
+	template <typename Compare>
+	struct rb_base_compare_ebo<Compare, true> : private Compare
+	{
+	protected:
+		rb_base_compare_ebo() {}
+		rb_base_compare_ebo(const Compare& compare) : Compare(compare) {}
+
+		Compare& get_compare() { return *this; }
+		const Compare& get_compare() const { return *this; }
+
+		template <typename T>
+		bool compare(const T& lhs, const T& rhs) 
+		{
+			return Compare::operator()(lhs, rhs);
+		}
+
+		template <typename T>
+		bool compare(const T& lhs, const T& rhs) const
+		{
+			return Compare::operator()(lhs, rhs);
+		}
+	};
+
+
+
+	///////////////////////////////////////////////////////////////////////////////
+    // rb_base
+    //
+    // This class allows us to use a generic rbtree as the basis of map, multimap,
+    // set, and multiset transparently. The vital template parameters for this are 
+    // the ExtractKey and the bUniqueKeys parameters.
+    //
+    // If the rbtree has a value type of the form pair<T1, T2> (i.e. it is a map or
+    // multimap and not a set or multiset) and a key extraction policy that returns 
+    // the first part of the pair, the rbtree gets a mapped_type typedef. 
+    // If it satisfies those criteria and also has unique keys, then it also gets an 
+    // operator[] (which only map and set have and multimap and multiset don't have).
+    //
+    ///////////////////////////////////////////////////////////////////////////////
+
+
+
+	/// rb_base
+	/// This specialization is used for 'set'. In this case, Key and Value 
+	/// will be the same as each other and ExtractKey will be eastl::use_self.
+	///
+	template <typename Key, typename Value, typename Compare, typename ExtractKey, bool bUniqueKeys, typename RBTree>
+	struct rb_base : public rb_base_compare_ebo<Compare>
+	{
+		typedef ExtractKey extract_key;
+
+	protected:
+		using rb_base_compare_ebo<Compare>::compare;
+		using rb_base_compare_ebo<Compare>::get_compare;
+
+	public:
+		rb_base() {}
+		rb_base(const Compare& compare) : rb_base_compare_ebo<Compare>(compare) {}
+	};
+
+
+	/// rb_base
+	/// This class is used for 'multiset'.
+	/// In this case, Key and Value will be the same as each 
+	/// other and ExtractKey will be eastl::use_self.
+	///
+	template <typename Key, typename Value, typename Compare, typename ExtractKey, typename RBTree>
+	struct rb_base<Key, Value, Compare, ExtractKey, false, RBTree> : public rb_base_compare_ebo<Compare>
+	{
+		typedef ExtractKey extract_key;
+
+	protected:
+		using rb_base_compare_ebo<Compare>::compare;
+		using rb_base_compare_ebo<Compare>::get_compare;
+
+	public:
+		rb_base() {}
+		rb_base(const Compare& compare) : rb_base_compare_ebo<Compare>(compare) {}
+	};
+
+
+	/// rb_base
+	/// This specialization is used for 'map'.
+	///
+	template <typename Key, typename Pair, typename Compare, typename RBTree>
+	struct rb_base<Key, Pair, Compare, eastl::use_first<Pair>, true, RBTree> : public rb_base_compare_ebo<Compare>
+	{
+		typedef eastl::use_first<Pair> extract_key;
+
+		using rb_base_compare_ebo<Compare>::compare;
+		using rb_base_compare_ebo<Compare>::get_compare;
+
+	public:
+		rb_base() {}
+		rb_base(const Compare& compare) : rb_base_compare_ebo<Compare>(compare) {}
+	};
+
+
+	/// rb_base
+	/// This specialization is used for 'multimap'.
+	///
+	template <typename Key, typename Pair, typename Compare, typename RBTree>
+	struct rb_base<Key, Pair, Compare, eastl::use_first<Pair>, false, RBTree> : public rb_base_compare_ebo<Compare>
+	{
+		typedef eastl::use_first<Pair> extract_key;
+
+		using rb_base_compare_ebo<Compare>::compare;
+		using rb_base_compare_ebo<Compare>::get_compare;
+
+	public:
+		rb_base() {}
+		rb_base(const Compare& compare) : rb_base_compare_ebo<Compare>(compare) {}
+	};
+
+
+	/// rbtree
+	///
+	/// rbtree is the red-black tree basis for the map, multimap, set, and multiset 
+	/// containers. Just about all the work of those containers is done here, and 
+	/// they are merely a shell which sets template policies that govern the code
+	/// generation for this rbtree.
+	///
+	/// This rbtree implementation is pretty much the same as all other modern 
+	/// rbtree implementations, as the topic is well known and researched. We may
+	/// choose to implement a "relaxed balancing" option at some point in the 
+	/// future if it is deemed worthwhile. Most rbtree implementations don't do this.
+	///
+	/// The primary rbtree member variable is mAnchor, which is a node_type and 
+	/// acts as the end node. However, like any other node, it has mpNodeLeft,
+	/// mpNodeRight, and mpNodeParent members. We do the conventional trick of 
+	/// assigning begin() (left-most rbtree node) to mpNodeLeft, assigning 
+	/// 'end() - 1' (a.k.a. rbegin()) to mpNodeRight, and assigning the tree root
+	/// node to mpNodeParent. 
+	///
+	/// Compare (functor): This is a comparison class which defaults to 'less'.
+	/// It is a common STL thing which takes two arguments and returns true if  
+	/// the first is less than the second.
+	///
+	/// ExtractKey (functor): This is a class which gets the key from a stored
+	/// node. With map and set, the node is a pair, whereas with set and multiset
+	/// the node is just the value. ExtractKey will be either eastl::use_first (map and multimap)
+	/// or eastl::use_self (set and multiset).
+	///
+	/// bMutableIterators (bool): true if rbtree::iterator is a mutable
+	/// iterator, false if iterator and const_iterator are both const iterators. 
+	/// It will be true for map and multimap and false for set and multiset.
+	///
+	/// bUniqueKeys (bool): true if the keys are to be unique, and false if there
+	/// can be multiple instances of a given key. It will be true for set and map 
+	/// and false for multiset and multimap.
+	///
+	/// To consider: Add an option for relaxed tree balancing. This could result 
+	/// in performance improvements but would require a more complicated implementation.
+	///
+	///////////////////////////////////////////////////////////////////////
+	/// find_as
+	/// In order to support the ability to have a tree of strings but
+	/// be able to do efficiently lookups via char pointers (i.e. so they
+	/// aren't converted to string objects), we provide the find_as
+	/// function. This function allows you to do a find with a key of a
+	/// type other than the tree's key type. See the find_as function
+	/// for more documentation on this.
+	///
+	template <typename Key, typename Value, typename Compare, typename Allocator, 
+			  typename ExtractKey, bool bMutableIterators, bool bUniqueKeys>
+	class rbtree
+		: public rb_base<Key, Value, Compare, ExtractKey, bUniqueKeys, 
+							rbtree<Key, Value, Compare, Allocator, ExtractKey, bMutableIterators, bUniqueKeys> >
+	{
+	public:
+		typedef ptrdiff_t                                                                       difference_type;
+		typedef eastl_size_t                                                                    size_type;     // See config.h for the definition of eastl_size_t, which defaults to size_t.
+		typedef Key                                                                             key_type;
+		typedef Value                                                                           value_type;
+		typedef rbtree_node<value_type>                                                         node_type;
+		typedef value_type&                                                                     reference;
+		typedef const value_type&                                                               const_reference;
+		typedef value_type*                                                                     pointer;
+		typedef const value_type*                                                               const_pointer;
+
+		typedef typename type_select<bMutableIterators, 
+					rbtree_iterator<value_type, value_type*, value_type&>, 
+					rbtree_iterator<value_type, const value_type*, const value_type&> >::type   iterator;
+		typedef rbtree_iterator<value_type, const value_type*, const value_type&>               const_iterator;
+		typedef eastl::reverse_iterator<iterator>                                               reverse_iterator;
+		typedef eastl::reverse_iterator<const_iterator>                                         const_reverse_iterator;
+
+		typedef Allocator                                                                       allocator_type;
+		typedef Compare                                                                         key_compare;
+		typedef typename type_select<bUniqueKeys, eastl::pair<iterator, bool>, iterator>::type  insert_return_type;  // map/set::insert return a pair, multimap/multiset::iterator return an iterator.
+		typedef rbtree<Key, Value, Compare, Allocator, 
+						ExtractKey, bMutableIterators, bUniqueKeys>                             this_type;
+		typedef rb_base<Key, Value, Compare, ExtractKey, bUniqueKeys, this_type>                base_type;
+		typedef integral_constant<bool, bUniqueKeys>                                            has_unique_keys_type;
+		typedef typename base_type::extract_key                                                 extract_key;
+
+	protected:
+		using base_type::compare;
+		using base_type::get_compare;
+
+	public:
+		rbtree_node_base  mAnchor;      /// This node acts as end() and its mpLeft points to begin(), and mpRight points to rbegin() (the last node on the right).
+		size_type         mnSize;       /// Stores the count of nodes in the tree (not counting the anchor node).
+		allocator_type    mAllocator;   // To do: Use base class optimization to make this go away.
+
+	public:
+		// ctor/dtor
+		rbtree();
+		rbtree(const allocator_type& allocator);
+		rbtree(const Compare& compare, const allocator_type& allocator = EASTL_RBTREE_DEFAULT_ALLOCATOR);
+		rbtree(const this_type& x);
+		rbtree(this_type&& x);
+		rbtree(this_type&& x, const allocator_type& allocator);
+
+		template <typename InputIterator>
+		rbtree(InputIterator first, InputIterator last, const Compare& compare, const allocator_type& allocator = EASTL_RBTREE_DEFAULT_ALLOCATOR);
+
+	   ~rbtree();
+
+	public:
+		// properties
+		const allocator_type& get_allocator() const EA_NOEXCEPT;
+		allocator_type&       get_allocator() EA_NOEXCEPT;
+		void                  set_allocator(const allocator_type& allocator);
+
+		const key_compare& key_comp() const { return get_compare(); }
+		key_compare&       key_comp()       { return get_compare(); }
+
+		this_type& operator=(const this_type& x);
+		this_type& operator=(std::initializer_list<value_type> ilist);
+		this_type& operator=(this_type&& x);
+
+		void swap(this_type& x);
+
+	public: 
+		// iterators
+		iterator        begin() EA_NOEXCEPT;
+		const_iterator  begin() const EA_NOEXCEPT;
+		const_iterator  cbegin() const EA_NOEXCEPT;
+
+		iterator        end() EA_NOEXCEPT;
+		const_iterator  end() const EA_NOEXCEPT;
+		const_iterator  cend() const EA_NOEXCEPT;
+
+		reverse_iterator        rbegin() EA_NOEXCEPT;
+		const_reverse_iterator  rbegin() const EA_NOEXCEPT;
+		const_reverse_iterator  crbegin() const EA_NOEXCEPT;
+
+		reverse_iterator        rend() EA_NOEXCEPT;
+		const_reverse_iterator  rend() const EA_NOEXCEPT;
+		const_reverse_iterator  crend() const EA_NOEXCEPT;
+
+	public:
+		bool      empty() const EA_NOEXCEPT;
+		size_type size() const EA_NOEXCEPT;
+
+		template <class... Args>
+		insert_return_type emplace(Args&&... args);
+
+		template <class... Args> 
+		iterator emplace_hint(const_iterator position, Args&&... args);
+
+		template <class... Args> eastl::pair<iterator, bool> try_emplace(const key_type& k, Args&&... args);
+		template <class... Args> eastl::pair<iterator, bool> try_emplace(key_type&& k, Args&&... args);
+		template <class... Args> iterator                    try_emplace(const_iterator position, const key_type& k, Args&&... args);
+		template <class... Args> iterator                    try_emplace(const_iterator position, key_type&& k, Args&&... args);
+
+		// Standard conversion overload to avoid the overhead of mismatched 'pair<const Key, Value>' types.
+		template <class P, class = typename eastl::enable_if<eastl::is_constructible<value_type, P&&>::value>::type> 
+		insert_return_type insert(P&& otherValue);
+
+		// Currently limited to value_type instead of P because it collides with insert(InputIterator, InputIterator).
+		// To allow this to work with templated P we need to implement a compile-time specialization for the
+		// case that P&& is const_iterator and have that specialization handle insert(InputIterator, InputIterator)
+		// instead of insert(InputIterator, InputIterator). Curiously, neither libstdc++ nor libc++
+		// implement this function either, which suggests they ran into the same problem I did here
+		// and haven't yet resolved it (at least as of March 2014, GCC 4.8.1).
+		iterator insert(const_iterator hint, value_type&& value);
+
+		/// map::insert and set::insert return a pair, while multimap::insert and
+		/// multiset::insert return an iterator.
+		insert_return_type insert(const value_type& value);
+
+		// C++ standard: inserts value if and only if there is no element with 
+		// key equivalent to the key of t in containers with unique keys; always 
+		// inserts value in containers with equivalent keys. Always returns the 
+		// iterator pointing to the element with key equivalent to the key of value. 
+		// iterator position is a hint pointing to where the insert should start
+		// to search. However, there is a potential defect/improvement report on this behaviour:
+		// LWG issue #233 (http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2005/n1780.html)
+		// We follow the same approach as SGI STL/STLPort and use the position as
+		// a forced insertion position for the value when possible.
+		iterator insert(const_iterator position, const value_type& value);
+
+		void insert(std::initializer_list<value_type> ilist);
+
+		template <typename InputIterator>
+		void insert(InputIterator first, InputIterator last);
+
+		// TODO(rparolin):
+		// insert_return_type insert(node_type&& nh);
+		// iterator insert(const_iterator hint, node_type&& nh);
+
+		template <class M> pair<iterator, bool> insert_or_assign(const key_type& k, M&& obj);
+		template <class M> pair<iterator, bool> insert_or_assign(key_type&& k, M&& obj);
+		template <class M> iterator             insert_or_assign(const_iterator hint, const key_type& k, M&& obj);
+		template <class M> iterator             insert_or_assign(const_iterator hint, key_type&& k, M&& obj);
+
+		iterator         erase(const_iterator position);
+		iterator         erase(const_iterator first, const_iterator last);
+		reverse_iterator erase(const_reverse_iterator position);
+		reverse_iterator erase(const_reverse_iterator first, const_reverse_iterator last);
+
+		// For some reason, multiple STL versions make a specialization 
+		// for erasing an array of key_types. I'm pretty sure we don't
+		// need this, but just to be safe we will follow suit. 
+		// The implementation is trivial. Returns void because the values
+		// could well be randomly distributed throughout the tree and thus
+		// a return value would be nearly meaningless.
+		void erase(const key_type* first, const key_type* last);
+
+		void clear();
+		void reset_lose_memory(); // This is a unilateral reset to an initially empty state. No destructors are called, no deallocation occurs.
+
+		iterator       find(const key_type& key);
+		const_iterator find(const key_type& key) const;
+
+		/// Implements a find whereby the user supplies a comparison of a different type
+		/// than the tree's value_type. A useful case of this is one whereby you have
+		/// a container of string objects but want to do searches via passing in char pointers.
+		/// The problem is that without this kind of find, you need to do the expensive operation
+		/// of converting the char pointer to a string so it can be used as the argument to the
+		/// find function.
+		///
+		/// Example usage (note that the compare uses string as first type and char* as second):
+		///     set<string> strings;
+		///     strings.find_as("hello", less_2<string, const char*>());
+		///
+		template <typename U, typename Compare2> iterator       find_as(const U& u, Compare2 compare2);
+		template <typename U, typename Compare2> const_iterator find_as(const U& u, Compare2 compare2) const;
+
+		iterator       lower_bound(const key_type& key);
+		const_iterator lower_bound(const key_type& key) const;
+
+		iterator       upper_bound(const key_type& key);
+		const_iterator upper_bound(const key_type& key) const;
+
+		bool validate() const;
+		int  validate_iterator(const_iterator i) const;
+
+	protected:
+		node_type* DoAllocateNode();
+		void       DoFreeNode(node_type* pNode);
+
+		node_type* DoCreateNodeFromKey(const key_type& key);
+
+		template<class... Args>
+		node_type* DoCreateNode(Args&&... args);
+		node_type* DoCreateNode(const value_type& value);
+		node_type* DoCreateNode(value_type&& value);
+		node_type* DoCreateNode(const node_type* pNodeSource, node_type* pNodeParent);
+
+		node_type* DoCopySubtree(const node_type* pNodeSource, node_type* pNodeDest);
+		void       DoNukeSubtree(node_type* pNode);
+
+		template <class... Args>
+		eastl::pair<iterator, bool> DoInsertValue(true_type, Args&&... args);
+
+		template <class... Args>
+		iterator DoInsertValue(false_type, Args&&... args);
+
+		eastl::pair<iterator, bool> DoInsertValue(true_type, value_type&& value);
+		iterator DoInsertValue(false_type, value_type&& value);
+
+		template <class... Args>
+		iterator DoInsertValueImpl(node_type* pNodeParent, bool bForceToLeft, const key_type& key, Args&&... args);
+		iterator DoInsertValueImpl(node_type* pNodeParent, bool bForceToLeft, const key_type& key, node_type* pNodeNew);
+
+		eastl::pair<iterator, bool> DoInsertKey(true_type, const key_type& key);
+		iterator                    DoInsertKey(false_type, const key_type& key);
+
+		template <class... Args>
+		iterator DoInsertValueHint(true_type, const_iterator position, Args&&... args);
+
+		template <class... Args>
+		iterator DoInsertValueHint(false_type, const_iterator position, Args&&... args);
+
+		iterator DoInsertValueHint(true_type, const_iterator position, value_type&& value);
+		iterator DoInsertValueHint(false_type, const_iterator position, value_type&& value);
+
+		iterator DoInsertKey(true_type, const_iterator position, const key_type& key);  // By design we return iterator and not a pair.
+		iterator DoInsertKey(false_type, const_iterator position, const key_type& key);
+		iterator DoInsertKeyImpl(node_type* pNodeParent, bool bForceToLeft, const key_type& key);
+
+		node_type* DoGetKeyInsertionPositionUniqueKeys(bool& canInsert, const key_type& key);
+		node_type* DoGetKeyInsertionPositionNonuniqueKeys(const key_type& key);
+
+		node_type* DoGetKeyInsertionPositionUniqueKeysHint(const_iterator position, bool& bForceToLeft, const key_type& key);
+		node_type* DoGetKeyInsertionPositionNonuniqueKeysHint(const_iterator position, bool& bForceToLeft, const key_type& key);
+
+	}; // rbtree
+
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// rbtree_node_base functions
+	///////////////////////////////////////////////////////////////////////
+
+	EASTL_API inline rbtree_node_base* RBTreeGetMinChild(const rbtree_node_base* pNodeBase)
+	{
+		while(pNodeBase->mpNodeLeft) 
+			pNodeBase = pNodeBase->mpNodeLeft;
+		return const_cast<rbtree_node_base*>(pNodeBase);
+	}
+
+	EASTL_API inline rbtree_node_base* RBTreeGetMaxChild(const rbtree_node_base* pNodeBase)
+	{
+		while(pNodeBase->mpNodeRight) 
+			pNodeBase = pNodeBase->mpNodeRight;
+		return const_cast<rbtree_node_base*>(pNodeBase);
+	}
+
+	// The rest of the functions are non-trivial and are found in 
+	// the corresponding .cpp file to this file.
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// rbtree_iterator functions
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T, typename Pointer, typename Reference>
+	rbtree_iterator<T, Pointer, Reference>::rbtree_iterator()
+		: mpNode(NULL) { }
+
+
+	template <typename T, typename Pointer, typename Reference>
+	rbtree_iterator<T, Pointer, Reference>::rbtree_iterator(const node_type* pNode)
+		: mpNode(static_cast<node_type*>(const_cast<node_type*>(pNode))) { }
+
+
+	template <typename T, typename Pointer, typename Reference>
+	rbtree_iterator<T, Pointer, Reference>::rbtree_iterator(const iterator& x)
+		: mpNode(x.mpNode) { }
+
+
+	template <typename T, typename Pointer, typename Reference>
+	typename rbtree_iterator<T, Pointer, Reference>::reference
+	rbtree_iterator<T, Pointer, Reference>::operator*() const
+		{ return mpNode->mValue; }
+
+
+	template <typename T, typename Pointer, typename Reference>
+	typename rbtree_iterator<T, Pointer, Reference>::pointer
+	rbtree_iterator<T, Pointer, Reference>::operator->() const
+		{ return &mpNode->mValue; }
+
+
+	template <typename T, typename Pointer, typename Reference>
+	typename rbtree_iterator<T, Pointer, Reference>::this_type&
+	rbtree_iterator<T, Pointer, Reference>::operator++()
+	{
+		mpNode = static_cast<node_type*>(RBTreeIncrement(mpNode));
+		return *this;
+	}
+
+
+	template <typename T, typename Pointer, typename Reference>
+	typename rbtree_iterator<T, Pointer, Reference>::this_type
+	rbtree_iterator<T, Pointer, Reference>::operator++(int)
+	{
+		this_type temp(*this);
+		mpNode = static_cast<node_type*>(RBTreeIncrement(mpNode));
+		return temp;
+	}
+
+
+	template <typename T, typename Pointer, typename Reference>
+	typename rbtree_iterator<T, Pointer, Reference>::this_type&
+	rbtree_iterator<T, Pointer, Reference>::operator--()
+	{
+		mpNode = static_cast<node_type*>(RBTreeDecrement(mpNode));
+		return *this;
+	}
+
+
+	template <typename T, typename Pointer, typename Reference>
+	typename rbtree_iterator<T, Pointer, Reference>::this_type
+	rbtree_iterator<T, Pointer, Reference>::operator--(int)
+	{
+		this_type temp(*this);
+		mpNode = static_cast<node_type*>(RBTreeDecrement(mpNode));
+		return temp;
+	}
+
+
+	// The C++ defect report #179 requires that we support comparisons between const and non-const iterators.
+	// Thus we provide additional template paremeters here to support this. The defect report does not
+	// require us to support comparisons between reverse_iterators and const_reverse_iterators.
+	template <typename T, typename PointerA, typename ReferenceA, typename PointerB, typename ReferenceB>
+	inline bool operator==(const rbtree_iterator<T, PointerA, ReferenceA>& a, 
+						   const rbtree_iterator<T, PointerB, ReferenceB>& b)
+	{
+		return a.mpNode == b.mpNode;
+	}
+
+
+	template <typename T, typename PointerA, typename ReferenceA, typename PointerB, typename ReferenceB>
+	inline bool operator!=(const rbtree_iterator<T, PointerA, ReferenceA>& a, 
+						   const rbtree_iterator<T, PointerB, ReferenceB>& b)
+	{
+		return a.mpNode != b.mpNode;
+	}
+
+
+	// We provide a version of operator!= for the case where the iterators are of the 
+	// same type. This helps prevent ambiguity errors in the presence of rel_ops.
+	template <typename T, typename Pointer, typename Reference>
+	inline bool operator!=(const rbtree_iterator<T, Pointer, Reference>& a, 
+						   const rbtree_iterator<T, Pointer, Reference>& b)
+	{
+		return a.mpNode != b.mpNode;
+	}
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// rbtree functions
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	inline rbtree<K, V, C, A, E, bM, bU>::rbtree()
+		: mAnchor(),
+		  mnSize(0),
+		  mAllocator(EASTL_RBTREE_DEFAULT_NAME)
+	{
+		reset_lose_memory();
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	inline rbtree<K, V, C, A, E, bM, bU>::rbtree(const allocator_type& allocator)
+		: mAnchor(),
+		  mnSize(0),
+		  mAllocator(allocator)
+	{
+		reset_lose_memory();
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	inline rbtree<K, V, C, A, E, bM, bU>::rbtree(const C& compare, const allocator_type& allocator)
+		: base_type(compare),
+		  mAnchor(),
+		  mnSize(0),
+		  mAllocator(allocator)
+	{
+		reset_lose_memory();
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	inline rbtree<K, V, C, A, E, bM, bU>::rbtree(const this_type& x)
+		: base_type(x.get_compare()),
+		  mAnchor(),
+		  mnSize(0),
+		  mAllocator(x.mAllocator)
+	{
+		reset_lose_memory();
+
+		if(x.mAnchor.mpNodeParent) // mAnchor.mpNodeParent is the rb_tree root node.
+		{
+			mAnchor.mpNodeParent = DoCopySubtree((const node_type*)x.mAnchor.mpNodeParent, (node_type*)&mAnchor);
+			mAnchor.mpNodeRight  = RBTreeGetMaxChild(mAnchor.mpNodeParent);
+			mAnchor.mpNodeLeft   = RBTreeGetMinChild(mAnchor.mpNodeParent);
+			mnSize               = x.mnSize;
+		}
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	inline rbtree<K, V, C, A, E, bM, bU>::rbtree(this_type&& x)
+		: base_type(x.get_compare()),
+		  mAnchor(),
+		  mnSize(0),
+		  mAllocator(x.mAllocator)
+	{
+		reset_lose_memory();
+		swap(x);
+	}
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	inline rbtree<K, V, C, A, E, bM, bU>::rbtree(this_type&& x, const allocator_type& allocator)
+		: base_type(x.get_compare()),
+		  mAnchor(),
+		  mnSize(0),
+		  mAllocator(allocator)
+	{
+		reset_lose_memory();
+		swap(x); // swap will directly or indirectly handle the possibility that mAllocator != x.mAllocator.
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	template <typename InputIterator>
+	inline rbtree<K, V, C, A, E, bM, bU>::rbtree(InputIterator first, InputIterator last, const C& compare, const allocator_type& allocator)
+		: base_type(compare),
+		  mAnchor(),
+		  mnSize(0),
+		  mAllocator(allocator)
+	{
+		reset_lose_memory();
+
+		#if EASTL_EXCEPTIONS_ENABLED
+			try
+			{
+		#endif
+				for(; first != last; ++first)
+					insert(*first);
+		#if EASTL_EXCEPTIONS_ENABLED
+			}
+			catch(...)
+			{
+				clear();
+				throw;
+			}
+		#endif
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	inline rbtree<K, V, C, A, E, bM, bU>::~rbtree()
+	{
+		// Erase the entire tree. DoNukeSubtree is not a 
+		// conventional erase function, as it does no rebalancing.
+		DoNukeSubtree((node_type*)mAnchor.mpNodeParent);
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	inline const typename rbtree<K, V, C, A, E, bM, bU>::allocator_type&
+	rbtree<K, V, C, A, E, bM, bU>::get_allocator() const EA_NOEXCEPT
+	{
+		return mAllocator;
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	inline typename rbtree<K, V, C, A, E, bM, bU>::allocator_type&
+	rbtree<K, V, C, A, E, bM, bU>::get_allocator() EA_NOEXCEPT
+	{
+		return mAllocator;
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	inline void rbtree<K, V, C, A, E, bM, bU>::set_allocator(const allocator_type& allocator)
+	{
+		mAllocator = allocator;
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	inline typename rbtree<K, V, C, A, E, bM, bU>::size_type
+	rbtree<K, V, C, A, E, bM, bU>::size() const EA_NOEXCEPT
+		{ return mnSize; }
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	inline bool rbtree<K, V, C, A, E, bM, bU>::empty() const EA_NOEXCEPT
+		{ return (mnSize == 0); }
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	inline typename rbtree<K, V, C, A, E, bM, bU>::iterator
+	rbtree<K, V, C, A, E, bM, bU>::begin() EA_NOEXCEPT
+		{ return iterator(static_cast<node_type*>(mAnchor.mpNodeLeft)); }
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	inline typename rbtree<K, V, C, A, E, bM, bU>::const_iterator
+	rbtree<K, V, C, A, E, bM, bU>::begin() const EA_NOEXCEPT
+		{ return const_iterator(static_cast<node_type*>(const_cast<rbtree_node_base*>(mAnchor.mpNodeLeft))); }
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	inline typename rbtree<K, V, C, A, E, bM, bU>::const_iterator
+	rbtree<K, V, C, A, E, bM, bU>::cbegin() const EA_NOEXCEPT
+		{ return const_iterator(static_cast<node_type*>(const_cast<rbtree_node_base*>(mAnchor.mpNodeLeft))); }
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	inline typename rbtree<K, V, C, A, E, bM, bU>::iterator
+	rbtree<K, V, C, A, E, bM, bU>::end() EA_NOEXCEPT
+		{ return iterator(static_cast<node_type*>(&mAnchor)); }
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	inline typename rbtree<K, V, C, A, E, bM, bU>::const_iterator
+	rbtree<K, V, C, A, E, bM, bU>::end() const EA_NOEXCEPT
+		{ return const_iterator(static_cast<node_type*>(const_cast<rbtree_node_base*>(&mAnchor))); }
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	inline typename rbtree<K, V, C, A, E, bM, bU>::const_iterator
+	rbtree<K, V, C, A, E, bM, bU>::cend() const EA_NOEXCEPT
+		{ return const_iterator(static_cast<node_type*>(const_cast<rbtree_node_base*>(&mAnchor))); }
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	inline typename rbtree<K, V, C, A, E, bM, bU>::reverse_iterator
+	rbtree<K, V, C, A, E, bM, bU>::rbegin() EA_NOEXCEPT
+		{ return reverse_iterator(end()); }
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	inline typename rbtree<K, V, C, A, E, bM, bU>::const_reverse_iterator
+	rbtree<K, V, C, A, E, bM, bU>::rbegin() const EA_NOEXCEPT
+		{ return const_reverse_iterator(end()); }
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	inline typename rbtree<K, V, C, A, E, bM, bU>::const_reverse_iterator
+	rbtree<K, V, C, A, E, bM, bU>::crbegin() const EA_NOEXCEPT
+		{ return const_reverse_iterator(end()); }
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	inline typename rbtree<K, V, C, A, E, bM, bU>::reverse_iterator
+	rbtree<K, V, C, A, E, bM, bU>::rend() EA_NOEXCEPT
+		{ return reverse_iterator(begin()); }
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	inline typename rbtree<K, V, C, A, E, bM, bU>::const_reverse_iterator
+	rbtree<K, V, C, A, E, bM, bU>::rend() const EA_NOEXCEPT
+		{ return const_reverse_iterator(begin()); }
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	inline typename rbtree<K, V, C, A, E, bM, bU>::const_reverse_iterator
+	rbtree<K, V, C, A, E, bM, bU>::crend() const EA_NOEXCEPT
+		{ return const_reverse_iterator(begin()); }
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	inline typename rbtree<K, V, C, A, E, bM, bU>::this_type&
+	rbtree<K, V, C, A, E, bM, bU>::operator=(const this_type& x)
+	{
+		if(this != &x)
+		{
+			clear();
+
+			#if EASTL_ALLOCATOR_COPY_ENABLED
+				mAllocator = x.mAllocator;
+			#endif
+
+			get_compare() = x.get_compare();
+
+			if(x.mAnchor.mpNodeParent) // mAnchor.mpNodeParent is the rb_tree root node.
+			{
+				mAnchor.mpNodeParent = DoCopySubtree((const node_type*)x.mAnchor.mpNodeParent, (node_type*)&mAnchor);
+				mAnchor.mpNodeRight  = RBTreeGetMaxChild(mAnchor.mpNodeParent);
+				mAnchor.mpNodeLeft   = RBTreeGetMinChild(mAnchor.mpNodeParent);
+				mnSize               = x.mnSize;
+			}
+		}
+		return *this;
+	}
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	inline typename rbtree<K, V, C, A, E, bM, bU>::this_type&
+	rbtree<K, V, C, A, E, bM, bU>::operator=(this_type&& x)
+	{
+		if(this != &x)
+		{
+			clear();        // To consider: Are we really required to clear here? x is going away soon and will clear itself in its dtor.
+			swap(x);        // member swap handles the case that x has a different allocator than our allocator by doing a copy.
+		}
+		return *this; 
+	}
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	inline typename rbtree<K, V, C, A, E, bM, bU>::this_type&
+	rbtree<K, V, C, A, E, bM, bU>::operator=(std::initializer_list<value_type> ilist)
+	{
+		// The simplest means of doing this is to clear and insert. There probably isn't a generic
+		// solution that's any more efficient without having prior knowledge of the ilist contents.
+		clear();
+
+		for(typename std::initializer_list<value_type>::iterator it = ilist.begin(), itEnd = ilist.end(); it != itEnd; ++it)
+			DoInsertValue(has_unique_keys_type(), eastl::move(*it));
+
+		return *this;
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	void rbtree<K, V, C, A, E, bM, bU>::swap(this_type& x)
+	{
+	#if EASTL_RBTREE_LEGACY_SWAP_BEHAVIOUR_REQUIRES_COPY_CTOR
+		if(mAllocator == x.mAllocator) // If allocators are equivalent...
+	#endif
+		{
+			// Most of our members can be exchaged by a basic swap:
+			// We leave mAllocator as-is.
+			eastl::swap(mnSize,        x.mnSize);
+			eastl::swap(get_compare(), x.get_compare());
+		#if !EASTL_RBTREE_LEGACY_SWAP_BEHAVIOUR_REQUIRES_COPY_CTOR
+			eastl::swap(mAllocator,          x.mAllocator);
+		#endif
+
+
+			// However, because our anchor node is a part of our class instance and not 
+			// dynamically allocated, we can't do a swap of it but must do a more elaborate
+			// procedure. This is the downside to having the mAnchor be like this, but 
+			// otherwise we consider it a good idea to avoid allocating memory for a 
+			// nominal container instance.
+
+			// We optimize for the expected most common case: both pointers being non-null.
+			if(mAnchor.mpNodeParent && x.mAnchor.mpNodeParent) // If both pointers are non-null...
+			{
+				eastl::swap(mAnchor.mpNodeRight,  x.mAnchor.mpNodeRight);
+				eastl::swap(mAnchor.mpNodeLeft,   x.mAnchor.mpNodeLeft);
+				eastl::swap(mAnchor.mpNodeParent, x.mAnchor.mpNodeParent);
+
+				// We need to fix up the anchors to point to themselves (we can't just swap them).
+				mAnchor.mpNodeParent->mpNodeParent   = &mAnchor;
+				x.mAnchor.mpNodeParent->mpNodeParent = &x.mAnchor;
+			}
+			else if(mAnchor.mpNodeParent)
+			{
+				x.mAnchor.mpNodeRight  = mAnchor.mpNodeRight;
+				x.mAnchor.mpNodeLeft   = mAnchor.mpNodeLeft;
+				x.mAnchor.mpNodeParent = mAnchor.mpNodeParent;
+				x.mAnchor.mpNodeParent->mpNodeParent = &x.mAnchor;
+
+				// We need to fix up our anchor to point it itself (we can't have it swap with x).
+				mAnchor.mpNodeRight  = &mAnchor;
+				mAnchor.mpNodeLeft   = &mAnchor;
+				mAnchor.mpNodeParent = NULL;
+			}
+			else if(x.mAnchor.mpNodeParent)
+			{
+				mAnchor.mpNodeRight  = x.mAnchor.mpNodeRight;
+				mAnchor.mpNodeLeft   = x.mAnchor.mpNodeLeft;
+				mAnchor.mpNodeParent = x.mAnchor.mpNodeParent;
+				mAnchor.mpNodeParent->mpNodeParent = &mAnchor;
+
+				// We need to fix up x's anchor to point it itself (we can't have it swap with us).
+				x.mAnchor.mpNodeRight  = &x.mAnchor;
+				x.mAnchor.mpNodeLeft   = &x.mAnchor;
+				x.mAnchor.mpNodeParent = NULL;
+			} // Else both are NULL and there is nothing to do.
+		}
+	#if EASTL_RBTREE_LEGACY_SWAP_BEHAVIOUR_REQUIRES_COPY_CTOR
+		else
+		{
+			const this_type temp(*this); // Can't call eastl::swap because that would
+			*this = x;                   // itself call this member swap function.
+			x     = temp;
+		}
+	#endif
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	template <class... Args>
+	inline typename rbtree<K, V, C, A, E, bM, bU>::insert_return_type // map/set::insert return a pair, multimap/multiset::iterator return an iterator.
+	rbtree<K, V, C, A, E, bM, bU>::emplace(Args&&... args)
+	{
+		return DoInsertValue(has_unique_keys_type(), eastl::forward<Args>(args)...);
+	}
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	template <class... Args> 
+	typename rbtree<K, V, C, A, E, bM, bU>::iterator
+	rbtree<K, V, C, A, E, bM, bU>::emplace_hint(const_iterator position, Args&&... args)
+	{
+		return DoInsertValueHint(has_unique_keys_type(), position, eastl::forward<Args>(args)...);
+	}
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	template <class... Args>
+	inline eastl::pair<typename rbtree<K, V, C, A, E, bM, bU>::iterator, bool>
+	rbtree<K, V, C, A, E, bM, bU>::try_emplace(const key_type& key, Args&&... args)
+	{
+		return DoInsertValue(has_unique_keys_type(), piecewise_construct, eastl::forward_as_tuple(key), eastl::forward_as_tuple(eastl::forward<Args>(args)...));
+	}
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	template <class... Args>
+	inline eastl::pair<typename rbtree<K, V, C, A, E, bM, bU>::iterator, bool>
+	rbtree<K, V, C, A, E, bM, bU>::try_emplace(key_type&& key, Args&&... args)
+	{
+		return DoInsertValue(has_unique_keys_type(), piecewise_construct, eastl::forward_as_tuple(eastl::move(key)), eastl::forward_as_tuple(eastl::forward<Args>(args)...));
+	}
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	template <class... Args>
+	inline typename rbtree<K, V, C, A, E, bM, bU>::iterator
+	rbtree<K, V, C, A, E, bM, bU>::try_emplace(const_iterator position, const key_type& key, Args&&... args)
+	{
+		return DoInsertValueHint(
+		    has_unique_keys_type(), position,
+		    piecewise_construct, eastl::forward_as_tuple(key), eastl::forward_as_tuple(eastl::forward<Args>(args)...));
+	}
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	template <class... Args>
+	inline typename rbtree<K, V, C, A, E, bM, bU>::iterator
+	rbtree<K, V, C, A, E, bM, bU>::try_emplace(const_iterator position, key_type&& key, Args&&... args)
+	{
+		return DoInsertValueHint(
+		    has_unique_keys_type(), position,
+		    piecewise_construct, eastl::forward_as_tuple(eastl::move(key)), eastl::forward_as_tuple(eastl::forward<Args>(args)...));
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	template <class P, class>
+	inline typename rbtree<K, V, C, A, E, bM, bU>::insert_return_type // map/set::insert return a pair, multimap/multiset::iterator return an iterator.
+	rbtree<K, V, C, A, E, bM, bU>::insert(P&& otherValue)
+	{ 
+		// Need to use forward instead of move because P&& is a "universal reference" instead of an rvalue reference.
+		return emplace(eastl::forward<P>(otherValue));
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	inline typename rbtree<K, V, C, A, E, bM, bU>::iterator 
+	rbtree<K, V, C, A, E, bM, bU>::insert(const_iterator position, value_type&& value)
+	{
+		return DoInsertValueHint(has_unique_keys_type(), position, eastl::move(value));
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	inline typename rbtree<K, V, C, A, E, bM, bU>::insert_return_type // map/set::insert return a pair, multimap/multiset::iterator return an iterator.
+	rbtree<K, V, C, A, E, bM, bU>::insert(const value_type& value)
+	{
+		return DoInsertValue(has_unique_keys_type(), value);
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	typename rbtree<K, V, C, A, E, bM, bU>::iterator
+	rbtree<K, V, C, A, E, bM, bU>::insert(const_iterator position, const value_type& value)
+	{
+		return DoInsertValueHint(has_unique_keys_type(), position, value);
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	template <class M>
+	eastl::pair<typename rbtree<K, V, C, A, E, bM, bU>::iterator, bool>
+	rbtree<K, V, C, A, E, bM, bU>::insert_or_assign(const key_type& k, M&& obj)
+	{
+		auto iter = find(k);
+
+		if(iter == end())
+		{
+			return insert(value_type(piecewise_construct, eastl::forward_as_tuple(k), eastl::forward_as_tuple(eastl::forward<M>(obj))));
+		}
+		else
+		{
+			iter->second = eastl::forward<M>(obj);
+			return {iter, false};
+		}
+	}
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	template <class M>
+	eastl::pair<typename rbtree<K, V, C, A, E, bM, bU>::iterator, bool>
+	rbtree<K, V, C, A, E, bM, bU>::insert_or_assign(key_type&& k, M&& obj)
+	{
+		auto iter = find(k);
+
+		if(iter == end())
+		{
+			return insert(value_type(piecewise_construct, eastl::forward_as_tuple(eastl::move(k)), eastl::forward_as_tuple(eastl::forward<M>(obj))));
+		}
+		else
+		{
+			iter->second = eastl::forward<M>(obj);
+			return {iter, false};
+		}
+	}
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	template <class M>
+	typename rbtree<K, V, C, A, E, bM, bU>::iterator
+	rbtree<K, V, C, A, E, bM, bU>::insert_or_assign(const_iterator hint, const key_type& k, M&& obj)
+	{
+		auto iter = find(k);
+
+		if(iter == end())
+		{
+			return insert(hint, value_type(piecewise_construct, eastl::forward_as_tuple(k), eastl::forward_as_tuple(eastl::forward<M>(obj))));
+		}
+		else
+		{
+			iter->second = eastl::forward<M>(obj);
+			return iter;
+		}
+	}
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	template <class M>
+	typename rbtree<K, V, C, A, E, bM, bU>::iterator
+	rbtree<K, V, C, A, E, bM, bU>::insert_or_assign(const_iterator hint, key_type&& k, M&& obj)
+	{
+		auto iter = find(k);
+
+		if(iter == end())
+		{
+			return insert(hint, value_type(piecewise_construct, eastl::forward_as_tuple(eastl::move(k)), eastl::forward_as_tuple(eastl::forward<M>(obj))));
+		}
+		else
+		{
+			iter->second = eastl::forward<M>(obj);
+			return iter;
+		}
+	}
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	typename rbtree<K, V, C, A, E, bM, bU>::node_type*
+	rbtree<K, V, C, A, E, bM, bU>::DoGetKeyInsertionPositionUniqueKeys(bool& canInsert, const key_type& key)
+	{
+		// This code is essentially a slightly modified copy of the the rbtree::insert 
+		// function whereby this version takes a key and not a full value_type.
+		extract_key extractKey;
+
+		node_type* pCurrent    = (node_type*)mAnchor.mpNodeParent; // Start with the root node.
+		node_type* pLowerBound = (node_type*)&mAnchor;             // Set it to the container end for now.
+		node_type* pParent;                                        // This will be where we insert the new node.
+
+		bool bValueLessThanNode = true; // If the tree is empty, this will result in an insertion at the front.
+
+		// Find insertion position of the value. This will either be a position which 
+		// already contains the value, a position which is greater than the value or
+		// end(), which we treat like a position which is greater than the value.
+		while(EASTL_LIKELY(pCurrent)) // Do a walk down the tree.
+		{
+			bValueLessThanNode = compare(key, extractKey(pCurrent->mValue));
+			pLowerBound        = pCurrent;
+
+			if(bValueLessThanNode)
+			{
+				EASTL_VALIDATE_COMPARE(!compare(extractKey(pCurrent->mValue), key)); // Validate that the compare function is sane.
+				pCurrent = (node_type*)pCurrent->mpNodeLeft;
+			}
+			else
+				pCurrent = (node_type*)pCurrent->mpNodeRight;
+		}
+
+		pParent = pLowerBound; // pLowerBound is actually upper bound right now (i.e. it is > value instead of <=), but we will make it the lower bound below.
+
+		if(bValueLessThanNode) // If we ended up on the left side of the last parent node...
+		{
+			if(EASTL_LIKELY(pLowerBound != (node_type*)mAnchor.mpNodeLeft)) // If the tree was empty or if we otherwise need to insert at the very front of the tree...
+			{
+				// At this point, pLowerBound points to a node which is > than value.
+				// Move it back by one, so that it points to a node which is <= value.
+				pLowerBound = (node_type*)RBTreeDecrement(pLowerBound);
+			}
+			else
+			{
+				canInsert = true;
+				return pLowerBound;
+			}
+		}
+
+		// Since here we require values to be unique, we will do nothing if the value already exists.
+		if(compare(extractKey(pLowerBound->mValue), key)) // If the node is < the value (i.e. if value is >= the node)...
+		{
+			EASTL_VALIDATE_COMPARE(!compare(key, extractKey(pLowerBound->mValue))); // Validate that the compare function is sane.
+			canInsert = true;
+			return pParent;
+		}
+
+		// The item already exists (as found by the compare directly above), so return false.
+		canInsert = false;
+		return pLowerBound;
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	typename rbtree<K, V, C, A, E, bM, bU>::node_type*
+	rbtree<K, V, C, A, E, bM, bU>::DoGetKeyInsertionPositionNonuniqueKeys(const key_type& key)
+	{
+		// This is the pathway for insertion of non-unique keys (multimap and multiset, but not map and set).
+		node_type* pCurrent  = (node_type*)mAnchor.mpNodeParent; // Start with the root node.
+		node_type* pRangeEnd = (node_type*)&mAnchor;             // Set it to the container end for now.
+		extract_key extractKey;
+
+		while(pCurrent)
+		{
+			pRangeEnd = pCurrent;
+
+			if(compare(key, extractKey(pCurrent->mValue)))
+			{
+				EASTL_VALIDATE_COMPARE(!compare(extractKey(pCurrent->mValue), key)); // Validate that the compare function is sane.
+				pCurrent = (node_type*)pCurrent->mpNodeLeft;
+			}
+			else
+				pCurrent = (node_type*)pCurrent->mpNodeRight;
+		}
+
+		return pRangeEnd;
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	eastl::pair<typename rbtree<K, V, C, A, E, bM, bU>::iterator, bool> 
+	rbtree<K, V, C, A, E, bM, bU>::DoInsertValue(true_type, value_type&& value)
+	{
+		extract_key extractKey;
+		key_type    key(extractKey(value));
+		bool        canInsert;
+		node_type*  pPosition = DoGetKeyInsertionPositionUniqueKeys(canInsert, key);
+
+		if(canInsert)
+		{
+			const iterator itResult(DoInsertValueImpl(pPosition, false, key, eastl::move(value)));
+			return pair<iterator, bool>(itResult, true);
+		}
+
+		return pair<iterator, bool>(iterator(pPosition), false);
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	typename rbtree<K, V, C, A, E, bM, bU>::iterator 
+	rbtree<K, V, C, A, E, bM, bU>::DoInsertValue(false_type, value_type&& value)
+	{
+		extract_key extractKey;
+		key_type    key(extractKey(value));
+		node_type*  pPosition = DoGetKeyInsertionPositionNonuniqueKeys(key);
+
+		return DoInsertValueImpl(pPosition, false, key, eastl::move(value));
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	template <class... Args>
+	eastl::pair<typename rbtree<K, V, C, A, E, bM, bU>::iterator, bool>
+	rbtree<K, V, C, A, E, bM, bU>::DoInsertValue(true_type, Args&&... args) // true_type means keys are unique.
+	{
+		// This is the pathway for insertion of unique keys (map and set, but not multimap and multiset).
+		// Note that we return a pair and not an iterator. This is because the C++ standard for map
+		// and set is to return a pair and not just an iterator.
+
+		node_type* pNodeNew = DoCreateNode(eastl::forward<Args>(args)...); // Note that pNodeNew->mpLeft, mpRight, mpParent, will be uninitialized.
+		const key_type& key = extract_key{}(pNodeNew->mValue);
+
+		bool        canInsert;
+		node_type*  pPosition = DoGetKeyInsertionPositionUniqueKeys(canInsert, key);
+
+		if(canInsert)
+		{
+			iterator itResult(DoInsertValueImpl(pPosition, false, key, pNodeNew));
+			return pair<iterator, bool>(itResult, true);
+		}
+
+		DoFreeNode(pNodeNew);
+		return pair<iterator, bool>(iterator(pPosition), false);
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	template <class... Args>
+	typename rbtree<K, V, C, A, E, bM, bU>::iterator
+	rbtree<K, V, C, A, E, bM, bU>::DoInsertValue(false_type, Args&&... args) // false_type means keys are not unique.
+	{
+		// We have a problem here if sizeof(value_type) is too big for the stack. We may want to consider having a specialization for large value_types.
+		// To do: Change this so that we call DoCreateNode(eastl::forward<Args>(args)...) here and use the value from the resulting pNode to get the 
+		// key, and make DoInsertValueImpl take that node as an argument. That way there is no value created on the stack.
+
+		node_type* const pNodeNew = DoCreateNode(eastl::forward<Args>(args)...); // Note that pNodeNew->mpLeft, mpRight, mpParent, will be uninitialized.
+		const key_type& key = extract_key{}(pNodeNew->mValue);
+
+		node_type* pPosition = DoGetKeyInsertionPositionNonuniqueKeys(key);
+
+		return DoInsertValueImpl(pPosition, false, key, pNodeNew);
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	template <class... Args>
+	typename rbtree<K, V, C, A, E, bM, bU>::iterator
+	rbtree<K, V, C, A, E, bM, bU>::DoInsertValueImpl(node_type* pNodeParent, bool bForceToLeft, const key_type& key, Args&&... args)
+	{
+		node_type* const pNodeNew = DoCreateNode(eastl::forward<Args>(args)...); // Note that pNodeNew->mpLeft, mpRight, mpParent, will be uninitialized.
+
+		return DoInsertValueImpl(pNodeParent, bForceToLeft, key, pNodeNew);
+	}
+
+	
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	typename rbtree<K, V, C, A, E, bM, bU>::iterator
+	rbtree<K, V, C, A, E, bM, bU>::DoInsertValueImpl(node_type* pNodeParent, bool bForceToLeft, const key_type& key, node_type* pNodeNew)
+	{
+		EASTL_ASSERT_MSG(pNodeNew != nullptr, "node to insert to the rbtree must not be null");
+
+		RBTreeSide  side;
+		extract_key extractKey;
+
+		// The reason we may want to have bForceToLeft == true is that pNodeParent->mValue and value may be equal.
+		// In that case it doesn't matter what side we insert on, except that the C++ LWG #233 improvement report
+		// suggests that we should use the insert hint position to force an ordering. So that's what we do.
+		if(bForceToLeft || (pNodeParent == &mAnchor) || compare(key, extractKey(pNodeParent->mValue)))
+			side = kRBTreeSideLeft;
+		else
+			side = kRBTreeSideRight;
+
+		RBTreeInsert(pNodeNew, pNodeParent, &mAnchor, side);
+		mnSize++;
+
+		return iterator(pNodeNew);
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	eastl::pair<typename rbtree<K, V, C, A, E, bM, bU>::iterator, bool>
+	rbtree<K, V, C, A, E, bM, bU>::DoInsertKey(true_type, const key_type& key) // true_type means keys are unique.
+	{
+		// This is the pathway for insertion of unique keys (map and set, but not multimap and multiset).
+		// Note that we return a pair and not an iterator. This is because the C++ standard for map
+		// and set is to return a pair and not just an iterator.
+		bool       canInsert;
+		node_type* pPosition = DoGetKeyInsertionPositionUniqueKeys(canInsert, key);
+
+		if(canInsert)
+		{
+			const iterator itResult(DoInsertKeyImpl(pPosition, false, key));
+			return pair<iterator, bool>(itResult, true);
+		}
+
+		return pair<iterator, bool>(iterator(pPosition), false);
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	typename rbtree<K, V, C, A, E, bM, bU>::iterator
+	rbtree<K, V, C, A, E, bM, bU>::DoInsertKey(false_type, const key_type& key) // false_type means keys are not unique.
+	{
+		node_type* pPosition = DoGetKeyInsertionPositionNonuniqueKeys(key);
+
+		return DoInsertKeyImpl(pPosition, false, key);
+	}
+
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	typename rbtree<K, V, C, A, E, bM, bU>::node_type*
+	rbtree<K, V, C, A, E, bM, bU>::DoGetKeyInsertionPositionUniqueKeysHint(const_iterator position, bool& bForceToLeft, const key_type& key)
+	{
+		extract_key extractKey;
+
+		if((position.mpNode != mAnchor.mpNodeRight) && (position.mpNode != &mAnchor)) // If the user specified a specific insertion position...
+		{
+			iterator itNext(position.mpNode);
+			++itNext;
+
+			// To consider: Change this so that 'position' specifies the position after 
+			// where the insertion goes and not the position before where the insertion goes.
+			// Doing so would make this more in line with user expectations and with LWG #233.
+			const bool bPositionLessThanValue = compare(extractKey(position.mpNode->mValue), key);
+
+			if(bPositionLessThanValue) // If (value > *position)...
+			{
+				EASTL_VALIDATE_COMPARE(!compare(key, extractKey(position.mpNode->mValue))); // Validate that the compare function is sane.
+
+				const bool bValueLessThanNext = compare(key, extractKey(itNext.mpNode->mValue));
+
+				if(bValueLessThanNext) // If value < *itNext...
+				{
+					EASTL_VALIDATE_COMPARE(!compare(extractKey(itNext.mpNode->mValue), key)); // Validate that the compare function is sane.
+
+					if(position.mpNode->mpNodeRight)
+					{
+						bForceToLeft = true; // Specifically insert in front of (to the left of) itNext (and thus after 'position').
+						return itNext.mpNode;
+					}
+
+					bForceToLeft = false;
+					return position.mpNode;
+				}
+			}
+
+			bForceToLeft = false;
+			return NULL;  // The above specified hint was not useful, then we do a regular insertion.
+		}
+
+		if(mnSize && compare(extractKey(((node_type*)mAnchor.mpNodeRight)->mValue), key))
+		{
+			EASTL_VALIDATE_COMPARE(!compare(key, extractKey(((node_type*)mAnchor.mpNodeRight)->mValue))); // Validate that the compare function is sane.
+			bForceToLeft = false;
+			return (node_type*)mAnchor.mpNodeRight;
+		}
+
+		bForceToLeft = false;
+		return NULL; // The caller can do a default insert.
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	typename rbtree<K, V, C, A, E, bM, bU>::node_type*
+	rbtree<K, V, C, A, E, bM, bU>::DoGetKeyInsertionPositionNonuniqueKeysHint(const_iterator position, bool& bForceToLeft, const key_type& key)
+	{
+		extract_key extractKey;
+
+		if((position.mpNode != mAnchor.mpNodeRight) && (position.mpNode != &mAnchor)) // If the user specified a specific insertion position...
+		{
+			iterator itNext(position.mpNode);
+			++itNext;
+
+			// To consider: Change this so that 'position' specifies the position after 
+			// where the insertion goes and not the position before where the insertion goes.
+			// Doing so would make this more in line with user expectations and with LWG #233.
+			if(!compare(key, extractKey(position.mpNode->mValue)) && // If value >= *position && 
+			   !compare(extractKey(itNext.mpNode->mValue), key))     // if value <= *itNext...
+			{
+				if(position.mpNode->mpNodeRight) // If there are any nodes to the right... [this expression will always be true as long as we aren't at the end()]
+				{
+					bForceToLeft = true; // Specifically insert in front of (to the left of) itNext (and thus after 'position').
+					return itNext.mpNode;
+				}
+
+				bForceToLeft = false;
+				return position.mpNode;
+			}
+
+			bForceToLeft = false;
+			return NULL; // The above specified hint was not useful, then we do a regular insertion.
+		}
+
+		// This pathway shouldn't be commonly executed, as the user shouldn't be calling 
+		// this hinted version of insert if the user isn't providing a useful hint.
+		if(mnSize && !compare(key, extractKey(((node_type*)mAnchor.mpNodeRight)->mValue))) // If we are non-empty and the value is >= the last node...
+		{
+			bForceToLeft =false;
+			return (node_type*)mAnchor.mpNodeRight;
+		}
+
+		bForceToLeft = false;
+		return NULL;
+	}
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	template <class... Args>
+	typename rbtree<K, V, C, A, E, bM, bU>::iterator
+	rbtree<K, V, C, A, E, bM, bU>::DoInsertValueHint(true_type, const_iterator position, Args&&... args) // true_type means keys are unique.
+	{
+		// This is the pathway for insertion of unique keys (map and set, but not multimap and multiset).
+		//
+		// We follow the same approach as SGI STL/STLPort and use the position as
+		// a forced insertion position for the value when possible.
+
+		node_type* pNodeNew = DoCreateNode(eastl::forward<Args>(args)...); // Note that pNodeNew->mpLeft, mpRight, mpParent, will be uninitialized.
+		const key_type& key(extract_key{}(pNodeNew->mValue));
+
+		bool       bForceToLeft;
+		node_type* pPosition = DoGetKeyInsertionPositionUniqueKeysHint(position, bForceToLeft, key);
+
+		if (!pPosition)
+		{
+			bool        canInsert;
+			pPosition = DoGetKeyInsertionPositionUniqueKeys(canInsert, key);
+
+			if (!canInsert)
+			{
+				DoFreeNode(pNodeNew);
+				return iterator(pPosition);
+			}
+
+			bForceToLeft = false;
+		}
+
+		return DoInsertValueImpl(pPosition, bForceToLeft, key, pNodeNew);
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	template <class... Args>
+	typename rbtree<K, V, C, A, E, bM, bU>::iterator
+	rbtree<K, V, C, A, E, bM, bU>::DoInsertValueHint(false_type, const_iterator position, Args&&... args) // false_type means keys are not unique.
+	{
+		// This is the pathway for insertion of non-unique keys (multimap and multiset, but not map and set).
+		//
+		// We follow the same approach as SGI STL/STLPort and use the position as
+		// a forced insertion position for the value when possible.
+
+		node_type* pNodeNew = DoCreateNode(eastl::forward<Args>(args)...); // Note that pNodeNew->mpLeft, mpRight, mpParent, will be uninitialized.
+		const key_type& key(extract_key{}(pNodeNew->mValue));
+
+		bool        bForceToLeft;
+		node_type*  pPosition = DoGetKeyInsertionPositionNonuniqueKeysHint(position, bForceToLeft, key);
+
+		if (!pPosition)
+		{
+			pPosition = DoGetKeyInsertionPositionNonuniqueKeys(key);
+			bForceToLeft = false;
+		}
+
+		return DoInsertValueImpl(pPosition, bForceToLeft, key, pNodeNew);
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	typename rbtree<K, V, C, A, E, bM, bU>::iterator
+	rbtree<K, V, C, A, E, bM, bU>::DoInsertValueHint(true_type, const_iterator position, value_type&& value) // true_type means keys are unique.
+	{
+		// This is the pathway for insertion of unique keys (map and set, but not multimap and multiset).
+		//
+		// We follow the same approach as SGI STL/STLPort and use the position as
+		// a forced insertion position for the value when possible.
+
+		extract_key extractKey;
+		key_type    key(extractKey(value));
+		bool        bForceToLeft;
+		node_type*  pPosition = DoGetKeyInsertionPositionUniqueKeysHint(position, bForceToLeft, key);
+
+		if(pPosition)
+			return DoInsertValueImpl(pPosition, bForceToLeft, key, eastl::move(value));
+		else
+			return DoInsertValue(has_unique_keys_type(), eastl::move(value)).first;
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	typename rbtree<K, V, C, A, E, bM, bU>::iterator
+	rbtree<K, V, C, A, E, bM, bU>::DoInsertValueHint(false_type, const_iterator position, value_type&& value) // false_type means keys are not unique.
+	{
+		// This is the pathway for insertion of non-unique keys (multimap and multiset, but not map and set).
+		//
+		// We follow the same approach as SGI STL/STLPort and use the position as
+		// a forced insertion position for the value when possible.
+		extract_key extractKey;
+		key_type    key(extractKey(value));
+		bool        bForceToLeft;
+		node_type*  pPosition = DoGetKeyInsertionPositionNonuniqueKeysHint(position, bForceToLeft, key);
+
+		if(pPosition)
+			return DoInsertValueImpl(pPosition, bForceToLeft, key, eastl::move(value));
+		else
+			return DoInsertValue(has_unique_keys_type(), eastl::move(value));
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	typename rbtree<K, V, C, A, E, bM, bU>::iterator
+	rbtree<K, V, C, A, E, bM, bU>::DoInsertKey(true_type, const_iterator position, const key_type& key) // true_type means keys are unique.
+	{
+		bool       bForceToLeft;
+		node_type* pPosition = DoGetKeyInsertionPositionUniqueKeysHint(position, bForceToLeft, key);
+
+		if(pPosition)
+			return DoInsertKeyImpl(pPosition, bForceToLeft, key);
+		else
+			return DoInsertKey(has_unique_keys_type(), key).first;
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	typename rbtree<K, V, C, A, E, bM, bU>::iterator
+	rbtree<K, V, C, A, E, bM, bU>::DoInsertKey(false_type, const_iterator position, const key_type& key) // false_type means keys are not unique.
+	{
+		// This is the pathway for insertion of non-unique keys (multimap and multiset, but not map and set).
+		//
+		// We follow the same approach as SGI STL/STLPort and use the position as
+		// a forced insertion position for the value when possible.
+		bool       bForceToLeft;
+		node_type* pPosition = DoGetKeyInsertionPositionNonuniqueKeysHint(position, bForceToLeft, key);
+
+		if(pPosition)
+			return DoInsertKeyImpl(pPosition, bForceToLeft, key);
+		else
+			return DoInsertKey(has_unique_keys_type(), key); // We are empty or we are inserting at the end.
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	typename rbtree<K, V, C, A, E, bM, bU>::iterator
+	rbtree<K, V, C, A, E, bM, bU>::DoInsertKeyImpl(node_type* pNodeParent, bool bForceToLeft, const key_type& key)
+	{
+		RBTreeSide  side;
+		extract_key extractKey;
+
+		// The reason we may want to have bForceToLeft == true is that pNodeParent->mValue and value may be equal.
+		// In that case it doesn't matter what side we insert on, except that the C++ LWG #233 improvement report
+		// suggests that we should use the insert hint position to force an ordering. So that's what we do.
+		if(bForceToLeft || (pNodeParent == &mAnchor) || compare(key, extractKey(pNodeParent->mValue)))
+			side = kRBTreeSideLeft;
+		else
+			side = kRBTreeSideRight;
+
+		node_type* const pNodeNew = DoCreateNodeFromKey(key); // Note that pNodeNew->mpLeft, mpRight, mpParent, will be uninitialized.
+		RBTreeInsert(pNodeNew, pNodeParent, &mAnchor, side);
+		mnSize++;
+
+		return iterator(pNodeNew);
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	void rbtree<K, V, C, A, E, bM, bU>::insert(std::initializer_list<value_type> ilist)
+	{
+		for(typename std::initializer_list<value_type>::iterator it = ilist.begin(), itEnd = ilist.end(); it != itEnd; ++it)
+			DoInsertValue(has_unique_keys_type(), eastl::move(*it));
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	template <typename InputIterator>
+	void rbtree<K, V, C, A, E, bM, bU>::insert(InputIterator first, InputIterator last)
+	{
+		for( ; first != last; ++first)
+			DoInsertValue(has_unique_keys_type(), *first); // Or maybe we should call 'insert(end(), *first)' instead. If the first-last range was sorted then this might make some sense.
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	inline void rbtree<K, V, C, A, E, bM, bU>::clear()
+	{
+		// Erase the entire tree. DoNukeSubtree is not a 
+		// conventional erase function, as it does no rebalancing.
+		DoNukeSubtree((node_type*)mAnchor.mpNodeParent);
+		reset_lose_memory();
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	inline void rbtree<K, V, C, A, E, bM, bU>::reset_lose_memory()
+	{
+		// The reset_lose_memory function is a special extension function which unilaterally 
+		// resets the container to an empty state without freeing the memory of 
+		// the contained objects. This is useful for very quickly tearing down a 
+		// container built into scratch memory.
+		mAnchor.mpNodeRight  = &mAnchor;
+		mAnchor.mpNodeLeft   = &mAnchor;
+		mAnchor.mpNodeParent = NULL;
+		mAnchor.mColor       = kRBTreeColorRed;
+		mnSize               = 0;
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	inline typename rbtree<K, V, C, A, E, bM, bU>::iterator
+	rbtree<K, V, C, A, E, bM, bU>::erase(const_iterator position)
+	{
+		const iterator iErase(position.mpNode);
+		--mnSize; // Interleave this between the two references to itNext. We expect no exceptions to occur during the code below.
+		++position;
+		RBTreeErase(iErase.mpNode, &mAnchor);
+		DoFreeNode(iErase.mpNode);
+		return iterator(position.mpNode);
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	typename rbtree<K, V, C, A, E, bM, bU>::iterator
+	rbtree<K, V, C, A, E, bM, bU>::erase(const_iterator first, const_iterator last)
+	{
+		// We expect that if the user means to clear the container, they will call clear.
+		if(EASTL_LIKELY((first.mpNode != mAnchor.mpNodeLeft) || (last.mpNode != &mAnchor))) // If (first != begin or last != end) ...
+		{
+			// Basic implementation:
+			while(first != last)
+				first = erase(first);
+			return iterator(first.mpNode);
+
+			// Inlined implementation:
+			//size_type n = 0;
+			//while(first != last)
+			//{
+			//    const iterator itErase(first);
+			//    ++n;
+			//    ++first;
+			//    RBTreeErase(itErase.mpNode, &mAnchor);
+			//    DoFreeNode(itErase.mpNode);
+			//}
+			//mnSize -= n;
+			//return first;
+		}
+
+		clear();
+		return iterator((node_type*)&mAnchor); // Same as: return end();
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	inline typename rbtree<K, V, C, A, E, bM, bU>::reverse_iterator
+	rbtree<K, V, C, A, E, bM, bU>::erase(const_reverse_iterator position)
+	{
+		return reverse_iterator(erase((++position).base()));
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	typename rbtree<K, V, C, A, E, bM, bU>::reverse_iterator
+	rbtree<K, V, C, A, E, bM, bU>::erase(const_reverse_iterator first, const_reverse_iterator last)
+	{
+		// Version which erases in order from first to last.
+		// difference_type i(first.base() - last.base());
+		// while(i--)
+		//     first = erase(first);
+		// return first;
+
+		// Version which erases in order from last to first, but is slightly more efficient:
+		return reverse_iterator(erase((++last).base(), (++first).base()));
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	inline void rbtree<K, V, C, A, E, bM, bU>::erase(const key_type* first, const key_type* last)
+	{
+		// We have no choice but to run a loop like this, as the first/last range could
+		// have values that are discontiguously located in the tree. And some may not 
+		// even be in the tree.
+		while(first != last)
+			erase(*first++);
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	typename rbtree<K, V, C, A, E, bM, bU>::iterator
+	rbtree<K, V, C, A, E, bM, bU>::find(const key_type& key)
+	{
+		// To consider: Implement this instead via calling lower_bound and 
+		// inspecting the result. The following is an implementation of this:
+		//    const iterator it(lower_bound(key));
+		//    return ((it.mpNode == &mAnchor) || compare(key, extractKey(it.mpNode->mValue))) ? iterator(&mAnchor) : it;
+		// We don't currently implement the above because in practice people tend to call 
+		// find a lot with trees, but very uncommonly call lower_bound.
+		extract_key extractKey;
+
+		node_type* pCurrent  = (node_type*)mAnchor.mpNodeParent; // Start with the root node.
+		node_type* pRangeEnd = (node_type*)&mAnchor;             // Set it to the container end for now.
+
+		while(EASTL_LIKELY(pCurrent)) // Do a walk down the tree.
+		{
+			if(EASTL_LIKELY(!compare(extractKey(pCurrent->mValue), key))) // If pCurrent is >= key...
+			{
+				pRangeEnd = pCurrent;
+				pCurrent  = (node_type*)pCurrent->mpNodeLeft;
+			}
+			else
+			{
+				EASTL_VALIDATE_COMPARE(!compare(key, extractKey(pCurrent->mValue))); // Validate that the compare function is sane.
+				pCurrent  = (node_type*)pCurrent->mpNodeRight;
+			}
+		}
+
+		if(EASTL_LIKELY((pRangeEnd != &mAnchor) && !compare(key, extractKey(pRangeEnd->mValue))))
+			return iterator(pRangeEnd);
+		return iterator((node_type*)&mAnchor);
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	inline typename rbtree<K, V, C, A, E, bM, bU>::const_iterator
+	rbtree<K, V, C, A, E, bM, bU>::find(const key_type& key) const
+	{
+		typedef rbtree<K, V, C, A, E, bM, bU> rbtree_type;
+		return const_iterator(const_cast<rbtree_type*>(this)->find(key));
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	template <typename U, typename Compare2>
+	typename rbtree<K, V, C, A, E, bM, bU>::iterator
+	rbtree<K, V, C, A, E, bM, bU>::find_as(const U& u, Compare2 compare2)
+	{
+		extract_key extractKey;
+
+		node_type* pCurrent  = (node_type*)mAnchor.mpNodeParent; // Start with the root node.
+		node_type* pRangeEnd = (node_type*)&mAnchor;             // Set it to the container end for now.
+
+		while(EASTL_LIKELY(pCurrent)) // Do a walk down the tree.
+		{
+			if(EASTL_LIKELY(!compare2(extractKey(pCurrent->mValue), u))) // If pCurrent is >= u...
+			{
+				pRangeEnd = pCurrent;
+				pCurrent  = (node_type*)pCurrent->mpNodeLeft;
+			}
+			else
+			{
+				EASTL_VALIDATE_COMPARE(!compare2(u, extractKey(pCurrent->mValue))); // Validate that the compare function is sane.
+				pCurrent  = (node_type*)pCurrent->mpNodeRight;
+			}
+		}
+
+		if(EASTL_LIKELY((pRangeEnd != &mAnchor) && !compare2(u, extractKey(pRangeEnd->mValue))))
+			return iterator(pRangeEnd);
+		return iterator((node_type*)&mAnchor);
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	template <typename U, typename Compare2>
+	inline typename rbtree<K, V, C, A, E, bM, bU>::const_iterator
+	rbtree<K, V, C, A, E, bM, bU>::find_as(const U& u, Compare2 compare2) const
+	{
+		typedef rbtree<K, V, C, A, E, bM, bU> rbtree_type;
+		return const_iterator(const_cast<rbtree_type*>(this)->find_as(u, compare2));
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	typename rbtree<K, V, C, A, E, bM, bU>::iterator
+	rbtree<K, V, C, A, E, bM, bU>::lower_bound(const key_type& key)
+	{
+		extract_key extractKey;
+
+		node_type* pCurrent  = (node_type*)mAnchor.mpNodeParent; // Start with the root node.
+		node_type* pRangeEnd = (node_type*)&mAnchor;             // Set it to the container end for now.
+
+		while(EASTL_LIKELY(pCurrent)) // Do a walk down the tree.
+		{
+			if(EASTL_LIKELY(!compare(extractKey(pCurrent->mValue), key))) // If pCurrent is >= key...
+			{
+				pRangeEnd = pCurrent;
+				pCurrent  = (node_type*)pCurrent->mpNodeLeft;
+			}
+			else
+			{
+				EASTL_VALIDATE_COMPARE(!compare(key, extractKey(pCurrent->mValue))); // Validate that the compare function is sane.
+				pCurrent  = (node_type*)pCurrent->mpNodeRight;
+			}
+		}
+
+		return iterator(pRangeEnd);
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	inline typename rbtree<K, V, C, A, E, bM, bU>::const_iterator
+	rbtree<K, V, C, A, E, bM, bU>::lower_bound(const key_type& key) const
+	{
+		typedef rbtree<K, V, C, A, E, bM, bU> rbtree_type;
+		return const_iterator(const_cast<rbtree_type*>(this)->lower_bound(key));
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	typename rbtree<K, V, C, A, E, bM, bU>::iterator
+	rbtree<K, V, C, A, E, bM, bU>::upper_bound(const key_type& key)
+	{
+		extract_key extractKey;
+
+		node_type* pCurrent  = (node_type*)mAnchor.mpNodeParent; // Start with the root node.
+		node_type* pRangeEnd = (node_type*)&mAnchor;             // Set it to the container end for now.
+
+		while(EASTL_LIKELY(pCurrent)) // Do a walk down the tree.
+		{
+			if(EASTL_LIKELY(compare(key, extractKey(pCurrent->mValue)))) // If key is < pCurrent...
+			{
+				EASTL_VALIDATE_COMPARE(!compare(extractKey(pCurrent->mValue), key)); // Validate that the compare function is sane.
+				pRangeEnd = pCurrent;
+				pCurrent  = (node_type*)pCurrent->mpNodeLeft;
+			}
+			else
+				pCurrent  = (node_type*)pCurrent->mpNodeRight;
+		}
+
+		return iterator(pRangeEnd);
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	inline typename rbtree<K, V, C, A, E, bM, bU>::const_iterator
+	rbtree<K, V, C, A, E, bM, bU>::upper_bound(const key_type& key) const
+	{
+		typedef rbtree<K, V, C, A, E, bM, bU> rbtree_type;
+		return const_iterator(const_cast<rbtree_type*>(this)->upper_bound(key));
+	}
+
+
+	// To do: Move this validate function entirely to a template-less implementation.
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	bool rbtree<K, V, C, A, E, bM, bU>::validate() const
+	{
+		// Red-black trees have the following canonical properties which we validate here:
+		//   1 Every node is either red or black.
+		//   2 Every leaf (NULL) is black by defintion. Any number of black nodes may appear in a sequence. 
+		//   3 If a node is red, then both its children are black. Thus, on any path from 
+		//     the root to a leaf, red nodes must not be adjacent.
+		//   4 Every simple path from a node to a descendant leaf contains the same number of black nodes.
+		//   5 The mnSize member of the tree must equal the number of nodes in the tree.
+		//   6 The tree is sorted as per a conventional binary tree.
+		//   7 The comparison function is sane; it obeys strict weak ordering. If compare(a,b) is true, then compare(b,a) must be false. Both cannot be true.
+
+		extract_key extractKey;
+
+		if(mnSize)
+		{
+			// Verify basic integrity.
+			//if(!mAnchor.mpNodeParent || (mAnchor.mpNodeLeft == mAnchor.mpNodeRight))
+			//    return false;             // Fix this for case of empty tree.
+
+			if(mAnchor.mpNodeLeft != RBTreeGetMinChild(mAnchor.mpNodeParent))
+				return false;
+
+			if(mAnchor.mpNodeRight != RBTreeGetMaxChild(mAnchor.mpNodeParent))
+				return false;
+
+			const size_t nBlackCount   = RBTreeGetBlackCount(mAnchor.mpNodeParent, mAnchor.mpNodeLeft);
+			size_type    nIteratedSize = 0;
+
+			for(const_iterator it = begin(); it != end(); ++it, ++nIteratedSize)
+			{
+				const node_type* const pNode      = (const node_type*)it.mpNode;
+				const node_type* const pNodeRight = (const node_type*)pNode->mpNodeRight;
+				const node_type* const pNodeLeft  = (const node_type*)pNode->mpNodeLeft;
+
+				// Verify #7 above.
+				if(pNodeRight && compare(extractKey(pNodeRight->mValue), extractKey(pNode->mValue)) && compare(extractKey(pNode->mValue), extractKey(pNodeRight->mValue))) // Validate that the compare function is sane.
+					return false;
+
+				// Verify #7 above.
+				if(pNodeLeft && compare(extractKey(pNodeLeft->mValue), extractKey(pNode->mValue)) && compare(extractKey(pNode->mValue), extractKey(pNodeLeft->mValue))) // Validate that the compare function is sane.
+					return false;
+
+				// Verify item #1 above.
+				if((pNode->mColor != kRBTreeColorRed) && (pNode->mColor != kRBTreeColorBlack))
+					return false;
+
+				// Verify item #3 above.
+				if(pNode->mColor == kRBTreeColorRed)
+				{
+					if((pNodeRight && (pNodeRight->mColor == kRBTreeColorRed)) ||
+					   (pNodeLeft  && (pNodeLeft->mColor  == kRBTreeColorRed)))
+						return false;
+				}
+
+				// Verify item #6 above.
+				if(pNodeRight && compare(extractKey(pNodeRight->mValue), extractKey(pNode->mValue)))
+					return false;
+
+				if(pNodeLeft && compare(extractKey(pNode->mValue), extractKey(pNodeLeft->mValue)))
+					return false;
+
+				if(!pNodeRight && !pNodeLeft) // If we are at a bottom node of the tree...
+				{
+					// Verify item #4 above.
+					if(RBTreeGetBlackCount(mAnchor.mpNodeParent, pNode) != nBlackCount)
+						return false;
+				}
+			}
+
+			// Verify item #5 above.
+			if(nIteratedSize != mnSize)
+				return false;
+
+			return true;
+		}
+		else
+		{
+			if((mAnchor.mpNodeLeft != &mAnchor) || (mAnchor.mpNodeRight != &mAnchor))
+				return false;
+		}
+
+		return true;
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	inline int rbtree<K, V, C, A, E, bM, bU>::validate_iterator(const_iterator i) const
+	{
+		// To do: Come up with a more efficient mechanism of doing this.
+
+		for(const_iterator temp = begin(), tempEnd = end(); temp != tempEnd; ++temp)
+		{
+			if(temp == i)
+				return (isf_valid | isf_current | isf_can_dereference);
+		}
+
+		if(i == end())
+			return (isf_valid | isf_current); 
+
+		return isf_none;
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	inline typename rbtree<K, V, C, A, E, bM, bU>::node_type*
+	rbtree<K, V, C, A, E, bM, bU>::DoAllocateNode()
+	{
+		auto* pNode = (node_type*)allocate_memory(mAllocator, sizeof(node_type), EASTL_ALIGN_OF(node_type), 0);
+		EASTL_ASSERT_MSG(pNode != nullptr, "the behaviour of eastl::allocators that return nullptr is not defined.");
+
+		return pNode;
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	inline void rbtree<K, V, C, A, E, bM, bU>::DoFreeNode(node_type* pNode)
+	{
+		pNode->~node_type();
+		EASTLFree(mAllocator, pNode, sizeof(node_type));
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	typename rbtree<K, V, C, A, E, bM, bU>::node_type*
+	rbtree<K, V, C, A, E, bM, bU>::DoCreateNodeFromKey(const key_type& key)
+	{
+		// Note that this function intentionally leaves the node pointers uninitialized.
+		// The caller would otherwise just turn right around and modify them, so there's
+		// no point in us initializing them to anything (except in a debug build).
+		node_type* const pNode = DoAllocateNode();
+
+		#if EASTL_EXCEPTIONS_ENABLED
+			try
+			{
+		#endif
+				::new (eastl::addressof(pNode->mValue)) value_type(pair_first_construct, key);
+
+		#if EASTL_EXCEPTIONS_ENABLED
+			}
+			catch(...)
+			{
+				DoFreeNode(pNode);
+				throw;
+			}
+		#endif
+
+		#if EASTL_DEBUG
+			pNode->mpNodeRight  = NULL;
+			pNode->mpNodeLeft   = NULL;
+			pNode->mpNodeParent = NULL;
+			pNode->mColor       = kRBTreeColorBlack;
+		#endif
+
+		return pNode;
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	typename rbtree<K, V, C, A, E, bM, bU>::node_type*
+	rbtree<K, V, C, A, E, bM, bU>::DoCreateNode(const value_type& value)
+	{
+		// Note that this function intentionally leaves the node pointers uninitialized.
+		// The caller would otherwise just turn right around and modify them, so there's
+		// no point in us initializing them to anything (except in a debug build).
+		node_type* const pNode = DoAllocateNode();
+
+		#if EASTL_EXCEPTIONS_ENABLED
+			try
+			{
+		#endif
+				::new(eastl::addressof(pNode->mValue)) value_type(value);
+		#if EASTL_EXCEPTIONS_ENABLED
+			}
+			catch(...)
+			{
+				DoFreeNode(pNode);
+				throw;
+			}
+		#endif
+
+		#if EASTL_DEBUG
+			pNode->mpNodeRight  = NULL;
+			pNode->mpNodeLeft   = NULL;
+			pNode->mpNodeParent = NULL;
+			pNode->mColor       = kRBTreeColorBlack;
+		#endif
+
+		return pNode;
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	typename rbtree<K, V, C, A, E, bM, bU>::node_type*
+	rbtree<K, V, C, A, E, bM, bU>::DoCreateNode(value_type&& value)
+	{
+		// Note that this function intentionally leaves the node pointers uninitialized.
+		// The caller would otherwise just turn right around and modify them, so there's
+		// no point in us initializing them to anything (except in a debug build).
+		node_type* const pNode = DoAllocateNode();
+
+		#if EASTL_EXCEPTIONS_ENABLED
+			try
+			{
+		#endif
+				::new(eastl::addressof(pNode->mValue)) value_type(eastl::move(value));
+		#if EASTL_EXCEPTIONS_ENABLED
+			}
+			catch(...)
+			{
+				DoFreeNode(pNode);
+				throw;
+			}
+		#endif
+
+		#if EASTL_DEBUG
+			pNode->mpNodeRight  = NULL;
+			pNode->mpNodeLeft   = NULL;
+			pNode->mpNodeParent = NULL;
+			pNode->mColor       = kRBTreeColorBlack;
+		#endif
+
+		return pNode;
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	template<class... Args>
+	typename rbtree<K, V, C, A, E, bM, bU>::node_type*
+	rbtree<K, V, C, A, E, bM, bU>::DoCreateNode(Args&&... args)
+	{
+		// Note that this function intentionally leaves the node pointers uninitialized.
+		// The caller would otherwise just turn right around and modify them, so there's
+		// no point in us initializing them to anything (except in a debug build).
+		node_type* const pNode = DoAllocateNode();
+
+		#if EASTL_EXCEPTIONS_ENABLED
+			try
+			{
+		#endif
+				::new(eastl::addressof(pNode->mValue)) value_type(eastl::forward<Args>(args)...);
+		#if EASTL_EXCEPTIONS_ENABLED
+			}
+			catch(...)
+			{
+				DoFreeNode(pNode);
+				throw;
+			}
+		#endif
+
+		#if EASTL_DEBUG
+			pNode->mpNodeRight  = NULL;
+			pNode->mpNodeLeft   = NULL;
+			pNode->mpNodeParent = NULL;
+			pNode->mColor       = kRBTreeColorBlack;
+		#endif
+
+		return pNode;
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	typename rbtree<K, V, C, A, E, bM, bU>::node_type*
+	rbtree<K, V, C, A, E, bM, bU>::DoCreateNode(const node_type* pNodeSource, node_type* pNodeParent)
+	{
+		node_type* const pNode = DoCreateNode(pNodeSource->mValue);
+
+		pNode->mpNodeRight  = NULL;
+		pNode->mpNodeLeft   = NULL;
+		pNode->mpNodeParent = pNodeParent;
+		pNode->mColor       = pNodeSource->mColor;
+
+		return pNode;
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	typename rbtree<K, V, C, A, E, bM, bU>::node_type*
+	rbtree<K, V, C, A, E, bM, bU>::DoCopySubtree(const node_type* pNodeSource, node_type* pNodeDest)
+	{
+		node_type* const pNewNodeRoot = DoCreateNode(pNodeSource, pNodeDest);
+
+		#if EASTL_EXCEPTIONS_ENABLED
+			try
+			{
+		#endif
+				// Copy the right side of the tree recursively.
+				if(pNodeSource->mpNodeRight)
+					pNewNodeRoot->mpNodeRight = DoCopySubtree((const node_type*)pNodeSource->mpNodeRight, pNewNodeRoot);
+
+				node_type* pNewNodeLeft;
+
+				for(pNodeSource = (node_type*)pNodeSource->mpNodeLeft, pNodeDest = pNewNodeRoot; 
+					pNodeSource;
+					pNodeSource = (node_type*)pNodeSource->mpNodeLeft, pNodeDest = pNewNodeLeft)
+				{
+					pNewNodeLeft = DoCreateNode(pNodeSource, pNodeDest);
+
+					pNodeDest->mpNodeLeft = pNewNodeLeft;
+
+					// Copy the right side of the tree recursively.
+					if(pNodeSource->mpNodeRight)
+						pNewNodeLeft->mpNodeRight = DoCopySubtree((const node_type*)pNodeSource->mpNodeRight, pNewNodeLeft);
+				}
+		#if EASTL_EXCEPTIONS_ENABLED
+			}
+			catch(...)
+			{
+				DoNukeSubtree(pNewNodeRoot);
+				throw;
+			}
+		#endif
+
+		return pNewNodeRoot;
+	}
+
+
+	template <typename K, typename V, typename C, typename A, typename E, bool bM, bool bU>
+	void rbtree<K, V, C, A, E, bM, bU>::DoNukeSubtree(node_type* pNode)
+	{
+		while(pNode) // Recursively traverse the tree and destroy items as we go.
+		{
+			DoNukeSubtree((node_type*)pNode->mpNodeRight);
+
+			node_type* const pNodeLeft = (node_type*)pNode->mpNodeLeft;
+			DoFreeNode(pNode);
+			pNode = pNodeLeft;
+		}
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// global operators
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename K, typename V, typename A, typename C, typename E, bool bM, bool bU>
+	inline bool operator==(const rbtree<K, V, C, A, E, bM, bU>& a, const rbtree<K, V, C, A, E, bM, bU>& b)
+	{
+		return (a.size() == b.size()) && eastl::equal(a.begin(), a.end(), b.begin());
+	}
+
+
+	// Note that in operator< we do comparisons based on the tree value_type with operator<() of the
+	// value_type instead of the tree's Compare function. For set/multiset, the value_type is T, while
+	// for map/multimap the value_type is a pair<Key, T>. operator< for pair can be seen by looking
+	// utility.h, but it basically is uses the operator< for pair.first and pair.second. The C++ standard
+	// appears to require this behaviour, whether intentionally or not. If anything, a good reason to do
+	// this is for consistency. A map and a vector that contain the same items should compare the same.
+	template <typename K, typename V, typename A, typename C, typename E, bool bM, bool bU>
+	inline bool operator<(const rbtree<K, V, C, A, E, bM, bU>& a, const rbtree<K, V, C, A, E, bM, bU>& b)
+	{
+		return eastl::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end());
+	}
+
+
+	template <typename K, typename V, typename A, typename C, typename E, bool bM, bool bU>
+	inline bool operator!=(const rbtree<K, V, C, A, E, bM, bU>& a, const rbtree<K, V, C, A, E, bM, bU>& b)
+	{
+		return !(a == b);
+	}
+
+
+	template <typename K, typename V, typename A, typename C, typename E, bool bM, bool bU>
+	inline bool operator>(const rbtree<K, V, C, A, E, bM, bU>& a, const rbtree<K, V, C, A, E, bM, bU>& b)
+	{
+		return b < a;
+	}
+
+
+	template <typename K, typename V, typename A, typename C, typename E, bool bM, bool bU>
+	inline bool operator<=(const rbtree<K, V, C, A, E, bM, bU>& a, const rbtree<K, V, C, A, E, bM, bU>& b)
+	{
+		return !(b < a);
+	}
+
+
+	template <typename K, typename V, typename A, typename C, typename E, bool bM, bool bU>
+	inline bool operator>=(const rbtree<K, V, C, A, E, bM, bU>& a, const rbtree<K, V, C, A, E, bM, bU>& b)
+	{
+		return !(a < b);
+	}
+
+
+	template <typename K, typename V, typename A, typename C, typename E, bool bM, bool bU>
+	inline void swap(rbtree<K, V, C, A, E, bM, bU>& a, rbtree<K, V, C, A, E, bM, bU>& b)
+	{
+		a.swap(b);
+	}
+
+
+} // namespace eastl
+
+
+EA_RESTORE_VC_WARNING();
+
+
+#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/internal/smart_ptr.h b/libkram/eastl/include/EASTL/internal/smart_ptr.h
new file mode 100644
index 00000000..f1d52e1b
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/smart_ptr.h
@@ -0,0 +1,264 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_INTERNAL_SMART_PTR_H
+#define EASTL_INTERNAL_SMART_PTR_H
+
+
+#include <EABase/eabase.h>
+#include <EASTL/type_traits.h>
+#include <EASTL/memory.h>
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+namespace eastl
+{
+
+	namespace Internal
+	{
+		// Tells if the Deleter type has a typedef for pointer to T. If so then return it, 
+		// else return T*. The large majority of the time the pointer type will be T*.
+		// The C++11 Standard requires that scoped_ptr let the deleter define the pointer type.
+		//
+		// Example usage:
+		//     typedef typename unique_pointer_type<int, SomeDeleter>::type pointer
+		//
+		template <typename T, typename Deleter>
+		class unique_pointer_type
+		{
+			template <typename U>
+			static typename U::pointer test(typename U::pointer*);
+
+			template <typename U>
+			static T* test(...);
+
+		public:
+			typedef decltype(test<typename eastl::remove_reference<Deleter>::type>(0)) type;
+		};
+
+
+		///////////////////////////////////////////////////////////////////////
+		// is_array_cv_convertible
+		//
+		// Tells if the array pointer P1 is cv-convertible to array pointer P2.
+		// The two types have two be equivalent pointer types and be convertible
+		// when you consider const/volatile properties of them.
+		//
+		// Example usage:
+		//     is_array_cv_convertible<int, Base*>::value             => false
+		//     is_array_cv_convertible<Base, Base*>::value            => false
+		//     is_array_cv_convertible<double*, bool*>::value         => false
+		//     is_array_cv_convertible<Subclass*, Base*>::value       => false
+		//     is_array_cv_convertible<const Base*, Base*>::value     => false
+		//     is_array_cv_convertible<Base*, Base*>::value           => true
+		//     is_array_cv_convertible<Base*, const Base*>::value     => true
+		//     is_array_cv_convertible<Base*, volatile Base*>::value  => true
+		///////////////////////////////////////////////////////////////////////
+
+		#define EASTL_TYPE_TRAIT_is_array_cv_convertible_CONFORMANCE 1
+
+		template <typename P1, typename P2, bool = eastl::is_same_v<eastl::remove_cv_t<typename pointer_traits<P1>::element_type>,
+																    eastl::remove_cv_t<typename pointer_traits<P2>::element_type>>>
+		struct is_array_cv_convertible_impl 
+			: public eastl::is_convertible<P1, P2> {};  // Return true if P1 is convertible to P2.
+
+		template <typename P1, typename P2>
+		struct is_array_cv_convertible_impl<P1, P2, false> 
+			: public eastl::false_type {};              // P1's underlying type is not the same as P2's, so it can't be converted, even if P2 refers to a subclass of P1. Parent == Child, but Parent[] != Child[]
+
+		template <typename P1, typename P2, bool = eastl::is_scalar_v<P1> && !eastl::is_pointer_v<P1>>
+		struct is_array_cv_convertible
+			: public is_array_cv_convertible_impl<P1, P2> {};
+
+		template <typename P1, typename P2>
+		struct is_array_cv_convertible<P1, P2, true>
+			: public eastl::false_type {};              // P1 is scalar not a pointer, so it can't be converted to a pointer.
+
+
+		///////////////////////////////////////////////////////////////////////
+		// is_derived
+		//
+		// Given two (possibly identical) types Base and Derived, is_base_of<Base, Derived>::value == true 
+		// if and only if Base is a direct or indirect base class of Derived. This is like is_base_of<Base, Derived> 
+		// but returns false if Derived is the same as Base. So is_derived is true only if Derived is actually a subclass 
+		// of Base and not Base itself.
+		//
+		// is_derived may only be applied to complete types.
+		//
+		// Example usage:
+		//     is_derived<int, int>::value             => false
+		//     is_derived<int, bool>::value            => false
+		//     is_derived<Parent, Child>::value        => true
+		//     is_derived<Child, Parent>::value        => false
+		///////////////////////////////////////////////////////////////////////
+
+		#if EASTL_TYPE_TRAIT_is_base_of_CONFORMANCE
+			#define EASTL_TYPE_TRAIT_is_derived_CONFORMANCE 1
+
+			template <typename Base, typename Derived>
+			struct is_derived : public eastl::integral_constant<bool, eastl::is_base_of<Base, Derived>::value && !eastl::is_same<typename eastl::remove_cv<Base>::type, typename eastl::remove_cv<Derived>::type>::value> {};
+		#else
+			#define EASTL_TYPE_TRAIT_is_derived_CONFORMANCE 0
+
+			template <typename Base, typename Derived> // This returns true if Derived is unrelated to Base. That's a wrong answer, but is better for us than returning false for compilers that don't support is_base_of.
+			struct is_derived : public eastl::integral_constant<bool, !eastl::is_same<typename eastl::remove_cv<Base>::type, typename eastl::remove_cv<Derived>::type>::value> {};
+		#endif
+
+
+		///////////////////////////////////////////////////////////////////////
+		// is_safe_array_conversion
+		//
+		// Say you have two array types: T* t and U* u. You want to assign the u to t but only if 
+		// that's a safe thing to do. As shown in the logic below, the array conversion
+		// is safe if U* and T* are convertible, if U is an array, and if either U or T is not 
+		// a pointer or U is not derived from T.
+		//
+		// Note: Usage of this class could be replaced with is_array_cv_convertible usage.
+		// To do: Do this replacement and test it.
+		// 
+		///////////////////////////////////////////////////////////////////////
+
+		template <typename T, typename T_pointer, typename U, typename U_pointer>
+		struct is_safe_array_conversion : public eastl::integral_constant<bool, eastl::is_convertible<U_pointer, T_pointer>::value &&
+																				eastl::is_array<U>::value && 
+																				(!eastl::is_pointer<U_pointer>::value || !is_pointer<T_pointer>::value || !Internal::is_derived<T, typename eastl::remove_extent<U>::type>::value)> {};
+
+	} // namespace Internal
+
+
+
+	
+
+
+
+	/// default_delete
+	///
+	/// C++11 smart pointer default delete function class.
+	///
+	/// Provides a default way to delete an object. This default is simply to call delete on the 
+	/// object pointer. You can provide an alternative to this class or you can override this on 
+	/// a class-by-class basis like the following:
+	///     template <>
+	///     struct smart_ptr_deleter<MyClass>
+	///     {
+	///         void operator()(MyClass* p) const
+	///            { SomeCustomFunction(p); }
+	///     };
+	///
+	template <typename T>
+	struct default_delete
+	{
+		#if defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION <= 4006) // GCC prior to 4.7 has a bug with noexcept here.
+			EA_CONSTEXPR default_delete() = default;
+		#else
+			EA_CONSTEXPR default_delete() EA_NOEXCEPT = default;
+		#endif
+
+		template <typename U>  // Enable if T* can be constructed with U* (i.e. U* is convertible to T*).
+		default_delete(const default_delete<U>&, typename eastl::enable_if<is_convertible<U*, T*>::value>::type* = 0) EA_NOEXCEPT {}
+
+		void operator()(T* p) const EA_NOEXCEPT
+			{ delete p; }
+	};
+
+
+	template <typename T>
+	struct default_delete<T[]> // Specialization for arrays.
+	{
+		#if defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION <= 4006) // GCC prior to 4.7 has a bug with noexcept here.
+			EA_CONSTEXPR default_delete() = default;
+		#else
+			EA_CONSTEXPR default_delete() EA_NOEXCEPT = default;
+		#endif
+
+		template <typename U> // This ctor is enabled if T is equal to or a base of U, and if U is less or equal const/volatile-qualified than T.
+		default_delete(const default_delete<U[]>&, typename eastl::enable_if<Internal::is_array_cv_convertible<U*, T*>::value>::type* = 0) EA_NOEXCEPT {}
+
+		void operator()(T* p) const EA_NOEXCEPT
+			{ delete[] p; }
+	};
+
+
+
+
+	/// smart_ptr_deleter
+	///
+	/// Deprecated in favor of the C++11 name: default_delete
+	///
+	template <typename T>
+	struct smart_ptr_deleter
+	{
+		typedef T value_type;
+
+		void operator()(const value_type* p) const // We use a const argument type in order to be most flexible with what types we accept. 
+			{ delete const_cast<value_type*>(p); }
+	};
+
+	template <>
+	struct smart_ptr_deleter<void>
+	{
+		typedef void value_type;
+
+		void operator()(const void* p) const
+			{ delete[] (char*)p; } // We don't seem to have much choice but to cast to a scalar type.
+	};
+
+	template <>
+	struct smart_ptr_deleter<const void>
+	{
+		typedef void value_type;
+
+		void operator()(const void* p) const
+			{ delete[] (char*)p; } // We don't seem to have much choice but to cast to a scalar type.
+	};
+
+
+
+	/// smart_array_deleter
+	///
+	/// Deprecated in favor of the C++11 name: default_delete
+	///
+	template <typename T>
+	struct smart_array_deleter
+	{
+		typedef T value_type;
+
+		void operator()(const value_type* p) const // We use a const argument type in order to be most flexible with what types we accept. 
+			{ delete[] const_cast<value_type*>(p); }
+	};
+
+	template <>
+	struct smart_array_deleter<void>
+	{
+		typedef void value_type;
+
+		void operator()(const void* p) const
+			{ delete[] (char*)p; } // We don't seem to have much choice but to cast to a scalar type.
+	};
+
+
+} // namespace eastl
+
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/internal/thread_support.h b/libkram/eastl/include/EASTL/internal/thread_support.h
new file mode 100644
index 00000000..80386d20
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/thread_support.h
@@ -0,0 +1,244 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_INTERNAL_THREAD_SUPPORT_H
+#define EASTL_INTERNAL_THREAD_SUPPORT_H
+
+
+#include <EABase/eabase.h>
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+#include <EASTL/internal/config.h>
+
+/////////////////////////////////////////////////////////////////////////////////////////////////////
+// NOTE(rparolin): We need a fallback mutex implementation because the Microsoft implementation 
+// of std::mutex can not be included in managed-cpp code.
+//
+// fatal error C1189: <mutex> is not supported when compiling with /clr or /clr:pure 
+/////////////////////////////////////////////////////////////////////////////////////////////////////
+#if defined(EA_HAVE_CPP11_MUTEX) && !defined(EA_COMPILER_MANAGED_CPP)
+	#define EASTL_CPP11_MUTEX_ENABLED 1
+#else
+	#define EASTL_CPP11_MUTEX_ENABLED 0
+#endif
+
+#if EASTL_CPP11_MUTEX_ENABLED
+	EA_DISABLE_ALL_VC_WARNINGS()
+	#include <mutex>
+	EA_RESTORE_ALL_VC_WARNINGS()
+#endif
+
+#if defined(EA_PLATFORM_MICROSOFT)
+	// Cannot include Windows headers in our headers, as they kill builds with their #defines.
+#elif defined(EA_PLATFORM_POSIX)
+	#include <pthread.h>
+#endif
+
+// copy constructor could not be generated because a base class copy constructor is inaccessible or deleted.
+// assignment operator could not be generated because a base class assignment operator is inaccessible or deleted.
+// non dll-interface class used as base for DLL-interface classkey 'identifier'.
+EA_DISABLE_VC_WARNING(4625 4626 4275);
+
+
+#if defined(EA_PLATFORM_MICROSOFT)
+	#if defined(EA_PROCESSOR_POWERPC)
+		extern "C" long  __stdcall _InterlockedIncrement(long volatile* Addend);
+		#pragma intrinsic (_InterlockedIncrement)
+
+		extern "C" long  __stdcall _InterlockedDecrement(long volatile* Addend);
+		#pragma intrinsic (_InterlockedDecrement)
+
+		extern "C" long  __stdcall _InterlockedCompareExchange(long volatile* Dest, long Exchange, long Comp);
+		#pragma intrinsic (_InterlockedCompareExchange)
+	#else
+		extern "C" long  _InterlockedIncrement(long volatile* Addend);
+		#pragma intrinsic (_InterlockedIncrement)
+
+		extern "C" long _InterlockedDecrement(long volatile* Addend);
+		#pragma intrinsic (_InterlockedDecrement)
+
+		extern "C" long _InterlockedCompareExchange(long volatile* Dest, long Exchange, long Comp);
+		#pragma intrinsic (_InterlockedCompareExchange)
+	#endif
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_THREAD_SUPPORT_AVAILABLE
+//
+// Defined as 0 or 1, based on existing support.
+// Identifies if thread support (e.g. atomics, mutexes) is available for use.
+// The large majority of EASTL doesn't use thread support, but a few parts 
+// of it (e.g. shared_ptr) do.
+///////////////////////////////////////////////////////////////////////////////
+
+#if !defined(EASTL_THREAD_SUPPORT_AVAILABLE)
+	#if defined(EA_COMPILER_CLANG) || (defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4003))
+		#define EASTL_THREAD_SUPPORT_AVAILABLE 1
+	#elif defined(EA_COMPILER_MSVC)
+		#define EASTL_THREAD_SUPPORT_AVAILABLE 1
+	#else
+		#define EASTL_THREAD_SUPPORT_AVAILABLE 0
+	#endif
+#endif
+
+
+namespace eastl
+{
+	namespace Internal
+	{
+		/// atomic_increment
+		/// Returns the new value.
+		inline int32_t atomic_increment(int32_t* p32) EA_NOEXCEPT
+		{
+			#if defined(EA_COMPILER_CLANG) || (defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4003))
+				return __sync_add_and_fetch(p32, 1);
+			#elif defined(EA_COMPILER_MSVC)
+				static_assert(sizeof(long) == sizeof(int32_t), "unexpected size");
+				return _InterlockedIncrement((volatile long*)p32);
+			#elif defined(EA_COMPILER_GNUC)
+				int32_t result;
+				__asm__ __volatile__ ("lock; xaddl %0, %1"
+									: "=r" (result), "=m" (*p32)
+									: "0" (1), "m" (*p32)
+									: "memory"
+									);
+				return result + 1;
+			#else
+				EASTL_FAIL_MSG("EASTL thread safety is not implemented yet. See EAThread for how to do this for the given platform.");
+				return ++*p32;
+			#endif
+		}
+
+		/// atomic_decrement
+		/// Returns the new value.
+		inline int32_t atomic_decrement(int32_t* p32) EA_NOEXCEPT
+		{
+			#if defined(EA_COMPILER_CLANG) || (defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4003))
+				return __sync_add_and_fetch(p32, -1);
+			#elif defined(EA_COMPILER_MSVC)
+				return _InterlockedDecrement((volatile long*)p32); // volatile long cast is OK because int32_t == long on Microsoft platforms.
+			#elif defined(EA_COMPILER_GNUC)
+				int32_t result;
+				__asm__ __volatile__ ("lock; xaddl %0, %1"
+									: "=r" (result), "=m" (*p32)
+									: "0" (-1), "m" (*p32)
+									: "memory"
+									);
+				return result - 1;
+			#else
+				EASTL_FAIL_MSG("EASTL thread safety is not implemented yet. See EAThread for how to do this for the given platform.");
+				return --*p32;
+			#endif
+		}
+
+
+		/// atomic_compare_and_swap
+		/// Safely sets the value to a new value if the original value is equal to
+		/// a condition value. Returns true if the condition was met and the
+		/// assignment occurred. The comparison and value setting are done as
+		/// an atomic operation and thus another thread cannot intervene between
+		/// the two as would be the case with simple C code.
+		inline bool atomic_compare_and_swap(int32_t* p32, int32_t newValue, int32_t condition)
+		{
+			#if defined(EA_COMPILER_CLANG) || (defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4003))
+				return __sync_bool_compare_and_swap(p32, condition, newValue);
+			#elif defined(EA_COMPILER_MSVC)
+				return ((int32_t)_InterlockedCompareExchange((volatile long*)p32, (long)newValue, (long)condition) == condition);
+			#elif defined(EA_COMPILER_GNUC)
+				// GCC Inline ASM Constraints     
+				// r  <--> Any general purpose register  
+				// a  <--> The a register.  
+				// 1  <--> The constraint '1' for operand 2 says that it must occupy the same location as operand 1.
+				// =a <--> output registers 
+				// =r <--> output registers 
+
+				int32_t result;
+				__asm__ __volatile__(
+					"lock; cmpxchgl %3, (%1) \n"                    // Test *p32 against EAX, if same, then *p32 = newValue 
+					: "=a" (result), "=r" (p32)                     // outputs
+					: "a" (condition), "r" (newValue), "1" (p32)    // inputs
+					: "memory"                                      // clobbered
+					);
+				return result == condition;
+			#else
+				EASTL_FAIL_MSG("EASTL thread safety is not implemented yet. See EAThread for how to do this for the given platform.");
+				if(*p32 == condition)
+				{
+					*p32 = newValue;
+					return true;
+				}
+				return false;
+			#endif
+		}
+
+
+		// mutex
+		#if EASTL_CPP11_MUTEX_ENABLED
+			using std::mutex;
+		#else
+			class EASTL_API mutex
+			{
+			public:
+				mutex();
+			   ~mutex();
+
+				void lock();
+				void unlock();
+
+			protected:
+				#if defined(EA_PLATFORM_MICROSOFT)
+					#if defined(_WIN64)
+						uint64_t mMutexBuffer[40 / sizeof(uint64_t)]; // CRITICAL_SECTION is 40 bytes on Win64.
+					#elif defined(_WIN32)
+						uint32_t mMutexBuffer[24 / sizeof(uint32_t)]; // CRITICAL_SECTION is 24 bytes on Win32.
+					#endif
+				#elif defined(EA_PLATFORM_POSIX)
+					pthread_mutex_t mMutex;
+				#endif
+			};
+		#endif
+
+
+		// auto_mutex
+		class EASTL_API auto_mutex
+		{
+		public:
+			EA_FORCE_INLINE auto_mutex(mutex& mutex) : pMutex(&mutex)
+				{ pMutex->lock(); }
+
+			EA_FORCE_INLINE ~auto_mutex()
+				{ pMutex->unlock(); }
+
+		protected:
+			mutex* pMutex;
+
+			auto_mutex(const auto_mutex&) = delete;
+			void operator=(const auto_mutex&) = delete;
+		};
+
+
+		// shared_ptr_auto_mutex
+		class EASTL_API shared_ptr_auto_mutex : public auto_mutex
+		{
+		public:
+			shared_ptr_auto_mutex(const void* pSharedPtr);
+
+			shared_ptr_auto_mutex(const shared_ptr_auto_mutex&) = delete;
+			void operator=(shared_ptr_auto_mutex&&) = delete;
+		};
+
+
+	} // namespace Internal
+
+} // namespace eastl
+
+
+EA_RESTORE_VC_WARNING();
+
+
+#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/internal/tuple_fwd_decls.h b/libkram/eastl/include/EASTL/internal/tuple_fwd_decls.h
new file mode 100644
index 00000000..a2c773cd
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/tuple_fwd_decls.h
@@ -0,0 +1,56 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_TUPLE_FWD_DECLS_H
+#define EASTL_TUPLE_FWD_DECLS_H
+
+#include <EASTL/internal/config.h>
+
+#if EASTL_TUPLE_ENABLED
+
+namespace eastl
+{
+	template <typename... T>
+	class tuple;
+
+	template <typename Tuple>
+	class tuple_size;
+
+	template <size_t I, typename Tuple>
+	class tuple_element;
+
+	template <size_t I, typename Tuple>
+	using tuple_element_t = typename tuple_element<I, Tuple>::type;
+
+	// const typename for tuple_element_t, for when tuple or TupleImpl cannot itself be const
+	template <size_t I, typename Tuple>
+	using const_tuple_element_t = typename conditional<
+						is_lvalue_reference<tuple_element_t<I, Tuple>>::value,
+							 add_lvalue_reference_t<const remove_reference_t<tuple_element_t<I, Tuple>>>,
+							 const tuple_element_t<I, Tuple>
+						>::type;
+
+	// get
+	template <size_t I, typename... Ts_>
+	tuple_element_t<I, tuple<Ts_...>>& get(tuple<Ts_...>& t);
+
+	template <size_t I, typename... Ts_>
+	const_tuple_element_t<I, tuple<Ts_...>>& get(const tuple<Ts_...>& t);
+
+	template <size_t I, typename... Ts_>
+	tuple_element_t<I, tuple<Ts_...>>&& get(tuple<Ts_...>&& t);
+
+	template <typename T, typename... ts_>
+	T& get(tuple<ts_...>& t);
+
+	template <typename T, typename... ts_>
+	const T& get(const tuple<ts_...>& t);
+
+	template <typename T, typename... ts_>
+	T&& get(tuple<ts_...>&& t);
+}
+
+#endif  // EASTL_VARIADIC_TEMPLATES_ENABLED
+
+#endif  // EASTL_TUPLE_FWD_DECLS_H
diff --git a/libkram/eastl/include/EASTL/internal/type_compound.h b/libkram/eastl/include/EASTL/internal/type_compound.h
new file mode 100644
index 00000000..178a7342
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/type_compound.h
@@ -0,0 +1,800 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_INTERNAL_TYPE_COMPOUND_H
+#define EASTL_INTERNAL_TYPE_COMPOUND_H
+
+
+#include <EABase/eabase.h>
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+// Until we revise the code below to handle EDG warnings, we don't have much choice but to disable them.
+#if defined(__EDG_VERSION__)
+	#pragma diag_suppress=1931 // operand of sizeof is not a type, variable, or dereferenced pointer expression
+#endif
+
+
+namespace eastl
+{
+
+	///////////////////////////////////////////////////////////////////////
+	// extent
+	//
+	// extent<T, I>::value is an integral type representing the number of 
+	// elements in the Ith dimension of array type T.
+	// 
+	// For a given array type T[N], extent<T[N]>::value == N.
+	// For a given multi-dimensional array type T[M][N], extent<T[M][N], 0>::value == N.
+	// For a given multi-dimensional array type T[M][N], extent<T[M][N], 1>::value == M.
+	// For a given array type T and a given dimension I where I >= rank<T>::value, extent<T, I>::value == 0.
+	// For a given array type of unknown extent T[], extent<T[], 0>::value == 0.
+	// For a given non-array type T and an arbitrary dimension I, extent<T, I>::value == 0.
+	// 
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_extent_CONFORMANCE 1    // extent is conforming.
+
+	template<typename T, unsigned N> 
+	struct extent_help : public eastl::integral_constant<size_t, 0> {};
+
+	template<typename T, unsigned I>
+	struct extent_help<T[I], 0> : public eastl::integral_constant<size_t, I> {};
+
+	template<typename T, unsigned N, unsigned I>
+	struct extent_help<T[I], N> : public eastl::extent_help<T, N - 1> { };
+
+	template<typename T, unsigned N>
+	struct extent_help<T[], N> : public eastl::extent_help<T, N - 1> {};
+
+	template<typename T, unsigned N = 0> // extent uses unsigned instead of size_t.
+	struct extent : public eastl::extent_help<T, N> { };
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template<typename T, unsigned N = 0> 
+		EA_CONSTEXPR auto extent_v = extent<T, N>::value;
+	#endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_array
+	//
+	// is_array<T>::value == true if and only if T is an array type, 
+	// including unbounded array types.
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_is_array_CONFORMANCE 1    // is_array is conforming; doesn't make mistakes.
+
+	template<typename T>
+	struct is_array : public eastl::false_type {};
+
+	template<typename T>
+	struct is_array<T[]> : public eastl::true_type {};
+
+	template<typename T, size_t N>
+	struct is_array<T[N]> : public eastl::true_type {};
+
+	#if !defined(EA_COMPILER_NO_TEMPLATE_ALIASES)
+		template<typename T>
+		EA_CONSTEXPR bool is_array_v = is_array<T>::value;
+	#endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_array_of_known_bounds
+	//
+	// Not part of the C++11 Standard.
+	// is_array_of_known_bounds<T>::value is true if T is an array and is 
+	// of known bounds. is_array_of_unknown_bounds<int[3]>::value == true,
+	// while is_array_of_unknown_bounds<int[]>::value = false.
+	// 
+	///////////////////////////////////////////////////////////////////////
+
+	template<typename T>
+	struct is_array_of_known_bounds
+		: public eastl::integral_constant<bool, eastl::extent<T>::value != 0> {};
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_array_of_unknown_bounds
+	//
+	// Not part of the C++11 Standard.
+	// is_array_of_unknown_bounds<T>::value is true if T is an array but is 
+	// of unknown bounds. is_array_of_unknown_bounds<int[3]>::value == false,
+	// while is_array_of_unknown_bounds<int[]>::value = true.
+	// 
+	///////////////////////////////////////////////////////////////////////
+
+	template<typename T>
+	struct is_array_of_unknown_bounds
+		: public eastl::integral_constant<bool, eastl::is_array<T>::value && (eastl::extent<T>::value == 0)> {};
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_member_function_pointer
+	//
+	// is_member_function_pointer<T>::value == true if and only if T is a 
+	// pointer to member function type.
+	//
+	///////////////////////////////////////////////////////////////////////
+	// We detect member functions with 0 to N arguments. We can extend this
+	// for additional arguments if necessary.
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_is_member_function_pointer_CONFORMANCE 1    // is_member_function_pointer is conforming; doesn't make mistakes.
+
+	// To do: Revise this to support C++11 variadic templates when possible.
+	// To do: We can probably also use remove_cv to simply the multitude of types below.
+
+	template <typename T> struct is_mem_fun_pointer_value : public false_type{};
+
+	template <typename R, typename T> struct is_mem_fun_pointer_value<R (T::*)()> : public true_type{};
+	template <typename R, typename T> struct is_mem_fun_pointer_value<R (T::*)() const> : public true_type{};
+	template <typename R, typename T> struct is_mem_fun_pointer_value<R (T::*)() volatile> : public true_type{};
+	template <typename R, typename T> struct is_mem_fun_pointer_value<R (T::*)() const volatile> : public true_type{};
+
+	template <typename R, typename T, typename Arg0> struct is_mem_fun_pointer_value<R (T::*)(Arg0)> : public true_type{};
+	template <typename R, typename T, typename Arg0> struct is_mem_fun_pointer_value<R (T::*)(Arg0) const> : public true_type{};
+	template <typename R, typename T, typename Arg0> struct is_mem_fun_pointer_value<R (T::*)(Arg0) volatile> : public true_type{};
+	template <typename R, typename T, typename Arg0> struct is_mem_fun_pointer_value<R (T::*)(Arg0) const volatile> : public true_type{};
+
+	template <typename R, typename T, typename Arg0, typename Arg1> struct is_mem_fun_pointer_value<R (T::*)(Arg0, Arg1)> : public true_type{};
+	template <typename R, typename T, typename Arg0, typename Arg1> struct is_mem_fun_pointer_value<R (T::*)(Arg0, Arg1) const> : public true_type{};
+	template <typename R, typename T, typename Arg0, typename Arg1> struct is_mem_fun_pointer_value<R (T::*)(Arg0, Arg1) volatile> : public true_type{};
+	template <typename R, typename T, typename Arg0, typename Arg1> struct is_mem_fun_pointer_value<R (T::*)(Arg0, Arg1) const volatile> : public true_type{};
+
+	template <typename R, typename T, typename Arg0, typename Arg1, typename Arg2> struct is_mem_fun_pointer_value<R (T::*)(Arg0, Arg1, Arg2)> : public true_type{};
+	template <typename R, typename T, typename Arg0, typename Arg1, typename Arg2> struct is_mem_fun_pointer_value<R (T::*)(Arg0, Arg1, Arg2) const> : public true_type{};
+	template <typename R, typename T, typename Arg0, typename Arg1, typename Arg2> struct is_mem_fun_pointer_value<R (T::*)(Arg0, Arg1, Arg2) volatile> : public true_type{};
+	template <typename R, typename T, typename Arg0, typename Arg1, typename Arg2> struct is_mem_fun_pointer_value<R (T::*)(Arg0, Arg1, Arg2) const volatile> : public true_type{};
+
+	template <typename R, typename T, typename Arg0, typename Arg1, typename Arg2, typename Arg3> struct is_mem_fun_pointer_value<R (T::*)(Arg0, Arg1, Arg2, Arg3)> : public true_type{};
+	template <typename R, typename T, typename Arg0, typename Arg1, typename Arg2, typename Arg3> struct is_mem_fun_pointer_value<R (T::*)(Arg0, Arg1, Arg2, Arg3) const> : public true_type{};
+	template <typename R, typename T, typename Arg0, typename Arg1, typename Arg2, typename Arg3> struct is_mem_fun_pointer_value<R (T::*)(Arg0, Arg1, Arg2, Arg3) volatile> : public true_type{};
+	template <typename R, typename T, typename Arg0, typename Arg1, typename Arg2, typename Arg3> struct is_mem_fun_pointer_value<R (T::*)(Arg0, Arg1, Arg2, Arg3) const volatile> : public true_type{};
+
+	template <typename R, typename T, typename Arg0, typename Arg1, typename Arg2, typename Arg3, typename Arg4> struct is_mem_fun_pointer_value<R (T::*)(Arg0, Arg1, Arg2, Arg3, Arg4)> : public true_type{};
+	template <typename R, typename T, typename Arg0, typename Arg1, typename Arg2, typename Arg3, typename Arg4> struct is_mem_fun_pointer_value<R (T::*)(Arg0, Arg1, Arg2, Arg3, Arg4) const> : public true_type{};
+	template <typename R, typename T, typename Arg0, typename Arg1, typename Arg2, typename Arg3, typename Arg4> struct is_mem_fun_pointer_value<R (T::*)(Arg0, Arg1, Arg2, Arg3, Arg4) volatile> : public true_type{};
+	template <typename R, typename T, typename Arg0, typename Arg1, typename Arg2, typename Arg3, typename Arg4> struct is_mem_fun_pointer_value<R (T::*)(Arg0, Arg1, Arg2, Arg3, Arg4) const volatile> : public true_type{};
+
+	template <typename R, typename T, typename Arg0, typename Arg1, typename Arg2, typename Arg3, typename Arg4, typename Arg5> struct is_mem_fun_pointer_value<R (T::*)(Arg0, Arg1, Arg2, Arg3, Arg4, Arg5)> : public true_type{};
+	template <typename R, typename T, typename Arg0, typename Arg1, typename Arg2, typename Arg3, typename Arg4, typename Arg5> struct is_mem_fun_pointer_value<R (T::*)(Arg0, Arg1, Arg2, Arg3, Arg4, Arg5) const> : public true_type{};
+	template <typename R, typename T, typename Arg0, typename Arg1, typename Arg2, typename Arg3, typename Arg4, typename Arg5> struct is_mem_fun_pointer_value<R (T::*)(Arg0, Arg1, Arg2, Arg3, Arg4, Arg5) volatile> : public true_type{};
+	template <typename R, typename T, typename Arg0, typename Arg1, typename Arg2, typename Arg3, typename Arg4, typename Arg5> struct is_mem_fun_pointer_value<R (T::*)(Arg0, Arg1, Arg2, Arg3, Arg4, Arg5) const volatile> : public true_type{};
+
+	template <typename R, typename T, typename Arg0, typename Arg1, typename Arg2, typename Arg3, typename Arg4, typename Arg5, typename Arg6> struct is_mem_fun_pointer_value<R (T::*)(Arg0, Arg1, Arg2, Arg3, Arg4, Arg5, Arg6)> : public true_type{};
+	template <typename R, typename T, typename Arg0, typename Arg1, typename Arg2, typename Arg3, typename Arg4, typename Arg5, typename Arg6> struct is_mem_fun_pointer_value<R (T::*)(Arg0, Arg1, Arg2, Arg3, Arg4, Arg5, Arg6) const> : public true_type{};
+	template <typename R, typename T, typename Arg0, typename Arg1, typename Arg2, typename Arg3, typename Arg4, typename Arg5, typename Arg6> struct is_mem_fun_pointer_value<R (T::*)(Arg0, Arg1, Arg2, Arg3, Arg4, Arg5, Arg6) volatile> : public true_type{};
+	template <typename R, typename T, typename Arg0, typename Arg1, typename Arg2, typename Arg3, typename Arg4, typename Arg5, typename Arg6> struct is_mem_fun_pointer_value<R (T::*)(Arg0, Arg1, Arg2, Arg3, Arg4, Arg5, Arg6) const volatile> : public true_type{};
+
+	template <typename R, typename T, typename Arg0, typename Arg1, typename Arg2, typename Arg3, typename Arg4, typename Arg5, typename Arg6, typename Arg7> struct is_mem_fun_pointer_value<R (T::*)(Arg0, Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7)> : public true_type{};
+	template <typename R, typename T, typename Arg0, typename Arg1, typename Arg2, typename Arg3, typename Arg4, typename Arg5, typename Arg6, typename Arg7> struct is_mem_fun_pointer_value<R (T::*)(Arg0, Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7) const> : public true_type{};
+	template <typename R, typename T, typename Arg0, typename Arg1, typename Arg2, typename Arg3, typename Arg4, typename Arg5, typename Arg6, typename Arg7> struct is_mem_fun_pointer_value<R (T::*)(Arg0, Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7) volatile> : public true_type{};
+	template <typename R, typename T, typename Arg0, typename Arg1, typename Arg2, typename Arg3, typename Arg4, typename Arg5, typename Arg6, typename Arg7> struct is_mem_fun_pointer_value<R (T::*)(Arg0, Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7) const volatile> : public true_type{};
+
+	template <typename T> 
+	struct is_member_function_pointer : public integral_constant<bool, is_mem_fun_pointer_value<T>::value>{};
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template<typename T>
+		EA_CONSTEXPR bool is_member_function_pointer_v = is_member_function_pointer<T>::value;
+	#endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_member_pointer
+	//
+	// is_member_pointer<T>::value == true if and only if:
+	//    is_member_object_pointer<T>::value == true, or
+	//    is_member_function_pointer<T>::value == true
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_is_member_pointer_CONFORMANCE 1    // is_member_pointer is conforming; doesn't make mistakes.
+
+	template <typename T> 
+	struct is_member_pointer 
+		: public eastl::integral_constant<bool, eastl::is_member_function_pointer<T>::value>{};
+
+	template <typename T, typename U>
+	struct is_member_pointer<U T::*> 
+		: public eastl::true_type{};
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template<typename T>
+		EA_CONSTEXPR bool is_member_pointer_v = is_member_pointer<T>::value;
+	#endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_member_object_pointer
+	//
+	// is_member_object_pointer<T>::value == true if and only if T is a 
+	// pointer to data member type.
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_is_member_object_pointer_CONFORMANCE 1    // is_member_object_pointer is conforming; doesn't make mistakes.
+
+	template<typename T>
+	struct is_member_object_pointer : public eastl::integral_constant<bool,
+																	  eastl::is_member_pointer<T>::value &&
+																	 !eastl::is_member_function_pointer<T>::value
+																	 > {};
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template<typename T>
+		EA_CONSTEXPR bool is_member_object_pointer_v = is_member_object_pointer<T>::value;
+	#endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_pointer
+	//
+	// is_pointer<T>::value == true if and only if T is a pointer type. 
+	// This category includes function pointer types, but not pointer to 
+	// member types.
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_is_pointer_CONFORMANCE 1    // is_pointer is conforming; doesn't make mistakes.
+
+	template <typename T> struct is_pointer_helper : public false_type{};
+
+	template <typename T> struct is_pointer_helper<T*>                : public true_type{};
+	template <typename T> struct is_pointer_helper<T* const>          : public true_type{};
+	template <typename T> struct is_pointer_helper<T* volatile>       : public true_type{};
+	template <typename T> struct is_pointer_helper<T* const volatile> : public true_type{};
+
+	template <typename T>
+	struct is_pointer_value : public type_and<is_pointer_helper<T>::value, type_not<is_member_pointer<T>::value>::value> {};
+
+	template <typename T> 
+	struct is_pointer : public integral_constant<bool, is_pointer_value<T>::value>{};
+
+	#if !defined(EA_COMPILER_NO_TEMPLATE_ALIASES)
+		template<typename T>
+		EA_CONSTEXPR bool is_pointer_v = is_pointer<T>::value;
+	#endif
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_convertible
+	//
+	// Given two (possible identical) types From and To, is_convertible<From, To>::value == true 
+	// if and only if an lvalue of type From can be implicitly converted to type To, 
+	// or is_void<To>::value == true
+	// 
+	// An instance of the type predicate holds true if the expression To to = from;, where from is an object of type From, is well-formed.
+	//
+	// is_convertible may only be applied to complete types.
+	// Type To may not be an abstract type. 
+	// If the conversion is ambiguous, the program is ill-formed. 
+	// If either or both of From and To are class types, and the conversion would invoke 
+	// non-public member functions of either From or To (such as a private constructor of To, 
+	// or a private conversion operator of From), the program is ill-formed.
+	//
+	// Note that without compiler help, both is_convertible and is_base 
+	// can produce compiler errors if the conversion is ambiguous. 
+	// Example:
+	//    struct A {};
+	//    struct B : A {};
+	//    struct C : A {};
+	//    struct D : B, C {};
+	//    is_convertible<D*, A*>::value; // Generates compiler error.
+	///////////////////////////////////////////////////////////////////////
+
+	#if EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(_MSC_VER) || (defined(EA_COMPILER_CLANG) && EA_COMPILER_HAS_FEATURE(is_convertible_to)))
+		#define EASTL_TYPE_TRAIT_is_convertible_CONFORMANCE 1    // is_convertible is conforming.
+
+		// Problem: VC++ reports that int is convertible to short, yet if you construct a short from an int then VC++ generates a warning:
+		//          warning C4242: 'initializing' : conversion from 'int' to 'short', possible loss of data. We can deal with this by making
+		//          is_convertible be false for conversions that could result in loss of data. Or we could make another trait called is_lossless_convertible
+		//          and use that appropriately in our code. Or we could put the onus on the user to work around such warnings.
+		template <typename From, typename To>
+		struct is_convertible : public integral_constant<bool, __is_convertible_to(From, To)>{};
+
+	#else
+		#define EASTL_TYPE_TRAIT_is_convertible_CONFORMANCE 1
+
+		template<typename From, typename To, bool = eastl::is_void<From>::value || eastl::is_function<To>::value || eastl::is_array<To>::value >
+		struct is_convertible_helper // Anything is convertible to void. Nothing is convertible to a function or an array.
+			{ static const bool value = eastl::is_void<To>::value; };
+
+		template<typename From, typename To>
+		class is_convertible_helper<From, To, false>
+		{
+			template<typename To1>
+			static void ToFunction(To1);    // We try to call this function with an instance of From. It is valid if From can be converted to To.
+
+			template<typename /*From1*/, typename /*To1*/>
+			static eastl::no_type is(...);
+
+			template<typename From1, typename To1>
+			static decltype(ToFunction<To1>(eastl::declval<From1>()), eastl::yes_type()) is(int);
+
+		public:
+			static const bool value = sizeof(is<From, To>(0)) == 1;
+		};
+
+		template<typename From, typename To>
+		struct is_convertible
+			: public integral_constant<bool, is_convertible_helper<From, To>::value> {};
+
+	#endif
+
+	#if !defined(EA_COMPILER_NO_TEMPLATE_ALIASES)
+		template<typename From, typename To>
+		EA_CONSTEXPR bool is_convertible_v = is_convertible<From, To>::value;
+	#endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_nothrow_convertible
+	// 
+	// https://en.cppreference.com/w/cpp/types/is_convertible
+	//
+	// template<typename From, typename To>
+	// struct is_explicitly_convertible
+	//     : public is_constructible<To, From> {};
+	///////////////////////////////////////////////////////////////////////
+	// TODO(rparolin):  implement type-trait
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_explicitly_convertible
+	// 
+	// This sometime-seen extension trait is the same as is_constructible
+	// and so we don't define it.
+	//
+	// template<typename From, typename To>
+	// struct is_explicitly_convertible
+	//     : public is_constructible<To, From> {};
+	///////////////////////////////////////////////////////////////////////
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_union
+	//
+	// is_union<T>::value == true if and only if T is a union type.
+	//
+	// There is no way to tell if a type is a union without compiler help.
+	// As of this writing, only Metrowerks v8+ supports such functionality
+	// via 'msl::is_union<T>::value'. The user can force something to be 
+	// evaluated as a union via EASTL_DECLARE_UNION.
+	///////////////////////////////////////////////////////////////////////
+	#if EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(_MSC_VER) || defined(EA_COMPILER_GNUC) || (defined(EA_COMPILER_CLANG) && EA_COMPILER_HAS_FEATURE(is_union)))
+		#define EASTL_TYPE_TRAIT_is_union_CONFORMANCE 1    // is_union is conforming.
+
+		template <typename T> 
+		struct is_union : public integral_constant<bool, __is_union(T)>{};
+	#else
+		#define EASTL_TYPE_TRAIT_is_union_CONFORMANCE 0    // is_union is not fully conforming.
+
+		template <typename T> struct is_union : public false_type{};
+	#endif
+
+	#define EASTL_DECLARE_UNION(T) namespace eastl{ template <> struct is_union<T> : public true_type{}; template <> struct is_union<const T> : public true_type{}; }
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template<typename T>
+		EA_CONSTEXPR bool is_union_v = is_union<T>::value;
+	#endif
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_class
+	//
+	// is_class<T>::value == true if and only if T is a class or struct 
+	// type (and not a union type).
+	//
+	// Without specific compiler help, it is not possible to 
+	// distinguish between unions and classes. As a result, is_class
+	// will erroneously evaluate to true for union types.
+	///////////////////////////////////////////////////////////////////////
+	#if EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(_MSC_VER) || defined(EA_COMPILER_GNUC) || (defined(EA_COMPILER_CLANG) && EA_COMPILER_HAS_FEATURE(is_class)))
+		#define EASTL_TYPE_TRAIT_is_class_CONFORMANCE 1    // is_class is conforming.
+
+		template <typename T> 
+		struct is_class : public integral_constant<bool, __is_class(T)>{};
+	#elif defined(__EDG__)
+		#define EASTL_TYPE_TRAIT_is_class_CONFORMANCE   EASTL_TYPE_TRAIT_is_union_CONFORMANCE
+	   
+		typedef char yes_array_type[1];
+		typedef char no_array_type[2];
+		template <typename U> static yes_array_type& is_class_helper(void (U::*)());
+		template <typename U> static no_array_type& is_class_helper(...);
+
+		template <typename T> 
+		struct is_class : public integral_constant<bool,
+			sizeof(is_class_helper<T>(0)) == sizeof(yes_array_type) && !is_union<T>::value
+		>{};
+	#elif !defined(__GNUC__) || (((__GNUC__ * 100) + __GNUC_MINOR__) >= 304) // Not GCC or GCC 3.4+
+		#define EASTL_TYPE_TRAIT_is_class_CONFORMANCE   EASTL_TYPE_TRAIT_is_union_CONFORMANCE
+
+		template <typename U> static yes_type is_class_helper(void (U::*)());
+		template <typename U> static no_type  is_class_helper(...);
+
+		template <typename T> 
+		struct is_class : public integral_constant<bool,
+			sizeof(is_class_helper<T>(0)) == sizeof(yes_type) && !is_union<T>::value
+		>{};
+	#else
+		#define EASTL_TYPE_TRAIT_is_class_CONFORMANCE 0    // is_class is not fully conforming.
+
+		// GCC 2.x version, due to GCC being broken.
+		template <typename T> 
+		struct is_class : public false_type{};
+	#endif
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template<typename T>
+		EA_CONSTEXPR bool is_class_v = is_class<T>::value;
+	#endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_enum
+	//
+	// is_enum<T>::value == true if and only if T is an enumeration type.
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#if EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(_MSC_VER) || defined(EA_COMPILER_GNUC) || (defined(EA_COMPILER_CLANG) && EA_COMPILER_HAS_FEATURE(is_enum)))
+		#define EASTL_TYPE_TRAIT_is_enum_CONFORMANCE 1     // is_enum is conforming. 
+
+		template <typename T> 
+		struct is_enum : public integral_constant<bool, __is_enum(T)>{};
+	#else
+		#define EASTL_TYPE_TRAIT_is_enum_CONFORMANCE 1    // is_enum is conforming.
+
+		struct int_convertible{ int_convertible(int); };
+
+		template <bool is_arithmetic_or_reference>
+		struct is_enum_helper { template <typename T> struct nest : public is_convertible<T, int_convertible>{}; };
+
+		template <>
+		struct is_enum_helper<true> { template <typename T> struct nest : public false_type {}; };
+
+		template <typename T>
+		struct is_enum_helper2
+		{
+			typedef type_or<is_arithmetic<T>::value, is_reference<T>::value, is_class<T>::value> selector;
+			typedef is_enum_helper<selector::value> helper_t;
+			typedef typename add_reference<T>::type ref_t;
+			typedef typename helper_t::template nest<ref_t> result;
+		};
+
+		template <typename T> 
+		struct is_enum : public integral_constant<bool, is_enum_helper2<T>::result::value>{};
+
+		template <> struct is_enum<void> : public false_type {};
+		template <> struct is_enum<void const> : public false_type {};
+		template <> struct is_enum<void volatile> : public false_type {};
+		template <> struct is_enum<void const volatile> : public false_type {};
+	#endif
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template<typename T>
+		EA_CONSTEXPR bool is_enum_v = is_enum<T>::value;
+	#endif
+
+	#define EASTL_DECLARE_ENUM(T) namespace eastl{ template <> struct is_enum<T> : public true_type{}; template <> struct is_enum<const T> : public true_type{}; }
+
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_polymorphic
+	// 
+	// is_polymorphic<T>::value == true if and only if T is a class or struct 
+	// that declares or inherits a virtual function. is_polymorphic may only 
+	// be applied to complete types.
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#if EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(_MSC_VER) || defined(EA_COMPILER_GNUC) || (defined(EA_COMPILER_CLANG) && EA_COMPILER_HAS_FEATURE(is_polymorphic)))
+		#define EASTL_TYPE_TRAIT_is_polymorphic_CONFORMANCE 1    // is_polymorphic is conforming. 
+
+		template <typename T> 
+		struct is_polymorphic : public integral_constant<bool, __is_polymorphic(T)>{};
+	#else
+		#define EASTL_TYPE_TRAIT_is_polymorphic_CONFORMANCE 1    // is_polymorphic is conforming.
+
+		template <typename T>
+		struct is_polymorphic_imp1
+		{
+			typedef typename remove_cv<T>::type t;
+
+			struct helper_1 : public t
+			{
+				helper_1();
+				~helper_1() throw();
+				char pad[64];
+			};
+
+			struct helper_2 : public t
+			{
+				helper_2();
+				virtual ~helper_2() throw();
+				#ifndef _MSC_VER
+					virtual void foo();
+				#endif
+				char pad[64];
+			};
+
+			static const bool value = (sizeof(helper_1) == sizeof(helper_2));
+		};
+
+		template <typename T>
+		struct is_polymorphic_imp2{ static const bool value = false; };
+
+		template <bool is_class>
+		struct is_polymorphic_selector{ template <typename T> struct rebind{ typedef is_polymorphic_imp2<T> type; }; };
+
+		template <>
+		struct is_polymorphic_selector<true>{ template <typename T> struct rebind{ typedef is_polymorphic_imp1<T> type; }; };
+
+		template <typename T>
+		struct is_polymorphic_value{
+			typedef is_polymorphic_selector<is_class<T>::value> selector;
+			typedef typename selector::template rebind<T> binder;
+			typedef typename binder::type imp_type;
+			static const bool value = imp_type::value;
+		};
+
+		template <typename T> 
+		struct is_polymorphic : public integral_constant<bool, is_polymorphic_value<T>::value>{};
+	#endif
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template<typename T>
+		EA_CONSTEXPR bool is_polymorphic_v = is_polymorphic<T>::value;
+	#endif
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_object
+	//
+	// is_object<T>::value == true if and only if:
+	//    is_reference<T>::value == false, and
+	//    is_function<T>::value == false, and
+	//    is_void<T>::value == false
+	//
+	// The C++ standard, section 3.9p9, states: "An object type is a
+	// (possibly cv-qualified) type that is not a function type, not a 
+	// reference type, and not incomplete (except for an incompletely
+	// defined object type).
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_is_object_CONFORMANCE  (EASTL_TYPE_TRAIT_is_reference_CONFORMANCE && EASTL_TYPE_TRAIT_is_void_CONFORMANCE && EASTL_TYPE_TRAIT_is_function_CONFORMANCE)
+
+	template <typename T> 
+	struct is_object : public integral_constant<bool,
+		!is_reference<T>::value && !is_void<T>::value && !is_function<T>::value
+	>{};
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template<typename T>
+		EA_CONSTEXPR bool is_object_v = is_object<T>::value;
+	#endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_scalar
+	//
+	// is_scalar<T>::value == true if and only if:
+	//    is_arithmetic<T>::value == true, or
+	//    is_enum<T>::value == true, or
+	//    is_pointer<T>::value == true, or
+	//    is_member_pointer<T>::value == true, or
+	//    is_null_pointer<T>::value == true
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_is_scalar_CONFORMANCE 1    // is_scalar is conforming.
+
+	template <typename T>
+	struct is_scalar : public integral_constant<bool,
+	                                            is_arithmetic<T>::value || is_enum<T>::value || is_pointer<T>::value ||
+	                                                is_member_pointer<T>::value ||
+	                                                is_null_pointer<T>::value> {};
+
+	template <typename T> struct is_scalar<T*>                : public true_type {};
+	template <typename T> struct is_scalar<T* const>          : public true_type {};
+	template <typename T> struct is_scalar<T* volatile>       : public true_type {};
+	template <typename T> struct is_scalar<T* const volatile> : public true_type {};
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template<typename T>
+		EA_CONSTEXPR bool is_scalar_v = is_scalar<T>::value;
+	#endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_compound
+	//
+	// Compound means anything but fundamental. See C++ standard, section 3.9.2.
+	//
+	// is_compound<T>::value == true if and only if:
+	//    is_fundamental<T>::value == false
+	//
+	// Thus, is_compound<T>::value == true if and only if:
+	//    is_floating_point<T>::value == false, and
+	//    is_integral<T>::value == false, and
+	//    is_void<T>::value == false
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_is_compound_CONFORMANCE  EASTL_TYPE_TRAIT_is_fundamental_CONFORMANCE
+
+	template <typename T> 
+	struct is_compound : public integral_constant<bool, !is_fundamental<T>::value>{};
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template<typename T>
+		EA_CONSTEXPR bool is_compound_v = is_compound<T>::value;
+	#endif
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// decay
+	//
+	// Converts the type T to its decayed equivalent. That means doing 
+	// lvalue to rvalue, array to pointer, function to pointer conversions,
+	// and removal of const and volatile.
+	// This is the type conversion silently applied by the compiler to 
+	// all function arguments when passed by value. 
+
+	#define EASTL_TYPE_TRAIT_decay_CONFORMANCE 1    // decay is conforming.
+
+	template<typename T>
+	struct decay
+	{
+		typedef typename eastl::remove_reference<T>::type U;
+
+		typedef typename eastl::conditional< 
+			eastl::is_array<U>::value,
+			typename eastl::remove_extent<U>::type*,
+			typename eastl::conditional< 
+				eastl::is_function<U>::value,
+				typename eastl::add_pointer<U>::type,
+				typename eastl::remove_cv<U>::type
+			>::type
+		>::type type;
+	};
+
+
+	// decay_t is the C++14 using typedef for typename decay<T>::type, though
+	// it requires only C++11 compiler functionality to implement.
+	// We provide a backwards-compatible means to access it through a macro for pre-C++11 compilers.
+	#if defined(EA_COMPILER_NO_TEMPLATE_ALIASES)
+		#define EASTL_DECAY_T(T) typename decay<T>::type
+	#else
+		template<typename T>
+		using decay_t = typename decay<T>::type;
+		#define EASTL_DECAY_T(T) decay_t<T>
+	#endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// common_type
+	// 
+	// Determines the common type among all types T..., that is the type all T... 
+	// can be implicitly converted to.
+	//
+	// It is intended that this be specialized by the user for cases where it
+	// is useful to do so. Example specialization:
+	//     template <typename Class1, typename Class2>
+	//     struct common_type<MyClass1, MyClass2>{ typedef MyBaseClassB type; };
+	//
+	// The member typedef type shall be defined as set out in 20.9.7.6,p3. All types in
+	// the parameter pack T shall be complete or (possibly cv) void. A program may 
+	// specialize this trait if at least one template parameter in the specialization 
+	// is a user-defined type. Note: Such specializations are needed when only  
+	// explicit conversions are desired among the template arguments.
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_common_type_CONFORMANCE 1    // common_type is conforming.
+
+	template<typename... T>
+	struct common_type;
+
+	template<typename T>
+	struct common_type<T>
+		{ typedef decay_t<T> type; }; // Question: Should we use T or decay_t<T> here? The C++11 Standard specifically (20.9.7.6,p3) specifies that it be without decay, but libc++ uses decay.
+
+	template<typename T, typename U>
+	struct common_type<T, U>
+	{
+		typedef decay_t<decltype(true ? declval<T>() : declval<U>())> type; // The type of a tertiary expression is set by the compiler to be the common type of the two result types.
+	};
+
+	template<typename T, typename U, typename... V>
+	struct common_type<T, U, V...>
+		{ typedef typename common_type<typename common_type<T, U>::type, V...>::type type; };
+
+
+	// common_type_t is the C++14 using typedef for typename common_type<T...>::type.
+	// We provide a backwards-compatible means to access it through a macro for pre-C++11 compilers.
+	#if defined(EA_COMPILER_NO_TEMPLATE_ALIASES)
+		#define EASTL_COMMON_TYPE_T(...) typename common_type<__VA_ARGS__>::type
+	#else
+		template <typename... T>
+		using common_type_t = typename common_type<T...>::type;
+		#define EASTL_COMMON_TYPE_T(...) common_type_t<__VA_ARGS__>
+	#endif
+
+	///////////////////////////////////////////////////////////////////////
+	// is_final
+	///////////////////////////////////////////////////////////////////////
+	#if EA_COMPILER_HAS_FEATURE(is_final)
+		template <typename T>
+		struct is_final : public integral_constant<bool, __is_final(T)> {};
+	#else
+		// no compiler support so we always return false
+		template <typename T>
+		struct is_final : public false_type {};
+	#endif
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template<typename T>
+		EA_CONSTEXPR bool is_final_v = is_final<T>::value;
+	#endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_aggregate
+	//
+	// https://en.cppreference.com/w/cpp/language/aggregate_initialization
+	//
+	// An aggregate is one of the following types:
+	// * array type
+	// * class type (typically, struct or union), that has
+	//     * no private or protected non-static data members
+	//     * no user-provided constructors (explicitly defaulted or deleted constructors are allowed)
+	//     * no user-provided, inherited, or explicit constructors 
+	//         * (explicitly defaulted or deleted constructors are allowed)
+	//     * no virtual, private, or protected (since C++17) base classes
+	//     * no virtual member functions
+	//     * no default member initializers
+	//
+	///////////////////////////////////////////////////////////////////////
+	#if EA_COMPILER_HAS_FEATURE(is_aggregate) || defined(_MSC_VER) && (_MSC_VER >= 1916)  // VS2017 15.9+
+		#define EASTL_TYPE_TRAIT_is_aggregate_CONFORMANCE 1  
+
+		template <typename T>
+		struct is_aggregate : public integral_constant<bool, __is_aggregate(T)> {};
+	#else
+		#define EASTL_TYPE_TRAIT_is_aggregate_CONFORMANCE 0 
+
+		// no compiler support so we always return false
+		template <typename T>
+		struct is_aggregate : public false_type {};
+	#endif
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <typename T>
+		EA_CONSTEXPR bool is_aggregate_v = is_aggregate<T>::value;
+	#endif
+} // namespace eastl
+
+
+#endif // Header include guard
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/internal/type_fundamental.h b/libkram/eastl/include/EASTL/internal/type_fundamental.h
new file mode 100644
index 00000000..950d15e3
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/type_fundamental.h
@@ -0,0 +1,289 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_INTERNAL_TYPE_FUNDAMENTAL_H
+#define EASTL_INTERNAL_TYPE_FUNDAMENTAL_H
+
+
+#include <EABase/eabase.h>
+#include <EABase/nullptr.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+namespace eastl
+{
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_void
+	//
+	// is_void<T>::value == true if and only if T is one of the following types:
+	//    [const][volatile] void
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_is_void_CONFORMANCE 1    // is_void is conforming.
+
+	template <typename T> struct is_void : public false_type{};
+
+	template <> struct is_void<void>                : public true_type{};
+	template <> struct is_void<void const>          : public true_type{};
+	template <> struct is_void<void volatile>       : public true_type{};
+	template <> struct is_void<void const volatile> : public true_type{};
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		EA_CONSTEXPR bool is_void_v = is_void<T>::value;
+	#endif
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// has_void_arg
+	//
+	// utility which identifies if any of the given template arguments is void.
+	//
+	// TODO(rparolin):  refactor with fold expressions when C++17 compilers are widely available.
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename ...Args> 
+	struct has_void_arg;
+
+	template <> 
+	struct has_void_arg<> 
+		: public eastl::false_type {};
+
+	template <typename A0, typename ...Args>
+	struct has_void_arg<A0, Args...>
+		{ static const bool value = (eastl::is_void<A0>::value || eastl::has_void_arg<Args...>::value); };
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_null_pointer
+	//
+	// C++14 type trait. Refers only to nullptr_t and not NULL (0).
+	// eastl::is_null_pointer<nullptr>::value == true
+	// eastl::is_null_pointer<std::nullptr_t>::value == true
+	// eastl::is_null_pointer<void*>::value == false
+	// eastl::is_null_pointer<NULL>::value == [cannot compile]
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#if defined(EA_COMPILER_CPP11_ENABLED) && !defined(EA_COMPILER_NO_DECLTYPE) && !defined(_MSC_VER) // VC++'s handling of decltype(nullptr) is broken.
+		#define EASTL_TYPE_TRAIT_is_null_pointer_CONFORMANCE 1
+
+		template <typename T> 
+		struct is_null_pointer : public eastl::is_same<typename eastl::remove_cv<T>::type, decltype(nullptr)> {}; // A C++11 compiler defines nullptr, but you need a C++11 standard library to declare std::nullptr_t. So it's safer to compare against decltype(nullptr) than to use std::nullptr_t, because we may have a C++11 compiler but C++98 library (happens with Apple frequently).
+	#else
+		#define EASTL_TYPE_TRAIT_is_null_pointer_CONFORMANCE 1
+
+		template <typename T> 
+		struct is_null_pointer : public eastl::is_same<typename eastl::remove_cv<T>::type, std::nullptr_t> {};
+	#endif
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		EA_CONSTEXPR bool is_null_pointer_v = is_null_pointer<T>::value;
+	#endif
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_integral
+	//
+	// is_integral<T>::value == true if and only if T  is one of the following types:
+	//    [const] [volatile] bool
+	//    [const] [volatile] char
+	//    [const] [volatile] signed char
+	//    [const] [volatile] unsigned char
+	//    [const] [volatile] wchar_t
+	//    [const] [volatile] short
+	//    [const] [volatile] int
+	//    [const] [volatile] long
+	//    [const] [volatile] long long
+	//    [const] [volatile] unsigned short
+	//    [const] [volatile] unsigned int
+	//    [const] [volatile] unsigned long
+	//    [const] [volatile] unsigned long long
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_is_integral_CONFORMANCE 1    // is_integral is conforming.
+
+	template <typename T> struct is_integral_helper           : public false_type{};
+
+	template <> struct is_integral_helper<unsigned char>      : public true_type{};
+	template <> struct is_integral_helper<unsigned short>     : public true_type{};
+	template <> struct is_integral_helper<unsigned int>       : public true_type{};
+	template <> struct is_integral_helper<unsigned long>      : public true_type{};
+	template <> struct is_integral_helper<unsigned long long> : public true_type{};
+
+	template <> struct is_integral_helper<signed char>        : public true_type{};
+	template <> struct is_integral_helper<signed short>       : public true_type{};
+	template <> struct is_integral_helper<signed int>         : public true_type{};
+	template <> struct is_integral_helper<signed long>        : public true_type{};
+	template <> struct is_integral_helper<signed long long>   : public true_type{};
+
+	template <> struct is_integral_helper<bool>               : public true_type{};
+	template <> struct is_integral_helper<char>               : public true_type{};
+	#if defined(EA_CHAR16_NATIVE) && EA_CHAR16_NATIVE
+		template <> struct is_integral_helper<char16_t>       : public true_type{};
+	#endif
+	#if defined(EA_CHAR32_NATIVE) && EA_CHAR32_NATIVE
+		template <> struct is_integral_helper<char32_t>       : public true_type{};
+	#endif
+	#ifndef EA_WCHAR_T_NON_NATIVE // If wchar_t is a native type instead of simply a define to an existing type which is already handled above...
+		template <> struct is_integral_helper<wchar_t>        : public true_type{};
+	#endif
+	#if EASTL_INT128_SUPPORTED && (defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG))
+		template <> struct is_integral_helper<__int128_t>     : public true_type{};
+		template <> struct is_integral_helper<__uint128_t>    : public true_type{};
+	#endif
+
+	template <typename T>
+	struct is_integral : public eastl::is_integral_helper<typename eastl::remove_cv<T>::type>{};
+
+	#define EASTL_DECLARE_INTEGRAL(T)                                             \
+	namespace eastl{                                                              \
+		template <> struct is_integral<T>                : public true_type{};    \
+		template <> struct is_integral<const T>          : public true_type{};    \
+		template <> struct is_integral<volatile T>       : public true_type{};    \
+		template <> struct is_integral<const volatile T> : public true_type{};    \
+	}
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		EA_CONSTEXPR bool is_integral_v = is_integral<T>::value;
+	#endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_floating_point
+	//
+	// is_floating_point<T>::value == true if and only if T is one of the following types:
+	//    [const] [volatile] float
+	//    [const] [volatile] double
+	//    [const] [volatile] long double
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_is_floating_point_CONFORMANCE 1    // is_floating_point is conforming.
+
+	template <typename T> struct is_floating_point_helper    : public false_type{};
+
+	template <> struct is_floating_point_helper<float>       : public true_type{};
+	template <> struct is_floating_point_helper<double>      : public true_type{};
+	template <> struct is_floating_point_helper<long double> : public true_type{};
+
+	template <typename T>
+	struct is_floating_point : public eastl::is_floating_point_helper<typename eastl::remove_cv<T>::type>{};
+
+	#define EASTL_DECLARE_FLOATING_POINT(T)                                             \
+	namespace eastl{                                                                    \
+		template <> struct is_floating_point<T>                : public true_type{};    \
+		template <> struct is_floating_point<const T>          : public true_type{};    \
+		template <> struct is_floating_point<volatile T>       : public true_type{};    \
+		template <> struct is_floating_point<const volatile T> : public true_type{};    \
+	}
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		EA_CONSTEXPR bool is_floating_point_v = is_floating_point<T>::value;
+	#endif
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_arithmetic
+	//
+	// is_arithmetic<T>::value == true if and only if:
+	//    is_floating_point<T>::value == true, or
+	//    is_integral<T>::value == true
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_is_arithmetic_CONFORMANCE 1    // is_arithmetic is conforming.
+
+	template <typename T>
+	struct is_arithmetic 
+		: public integral_constant<bool, is_integral<T>::value || is_floating_point<T>::value> {};
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template<typename T>
+		EA_CONSTEXPR bool is_arithmetic_v = is_arithmetic<T>::value;
+	#endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_fundamental
+	//
+	// is_fundamental<T>::value == true if and only if:
+	//    is_floating_point<T>::value == true, or
+	//    is_integral<T>::value == true, or
+	//    is_void<T>::value == true
+	//    is_null_pointer<T>::value == true
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_is_fundamental_CONFORMANCE 1    // is_fundamental is conforming.
+
+	template <typename T>
+	struct is_fundamental
+		: public bool_constant<is_void_v<T> || is_integral_v<T> || is_floating_point_v<T> || is_null_pointer_v<T>> {};
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template<typename T>
+		EA_CONSTEXPR bool is_fundamental_v = is_fundamental<T>::value;
+	#endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_hat_type
+	//
+	// is_hat_type<T>::value == true if and only if:
+	//    underlying type is a C++/CX '^' type such as: Foo^
+	//	  meaning the type is heap allocated and ref-counted
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T> struct is_hat_type_helper : public false_type {};
+
+	#if (EABASE_VERSION_N > 20607 && defined(EA_COMPILER_WINRTCX_ENABLED)) || defined(__cplusplus_winrt)
+		template <typename T> struct is_hat_type_helper<T^> : public true_type{};
+	#endif 
+
+	template <typename T>
+	struct is_hat_type : public eastl::is_hat_type_helper<T> {};
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template<typename T>
+		EA_CONSTEXPR bool is_hat_type_v = is_hat_type<T>::value;
+	#endif
+
+} // namespace eastl
+
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/internal/type_pod.h b/libkram/eastl/include/EASTL/internal/type_pod.h
new file mode 100644
index 00000000..8726a7e6
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/type_pod.h
@@ -0,0 +1,1945 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_INTERNAL_TYPE_POD_H
+#define EASTL_INTERNAL_TYPE_POD_H
+
+
+#include <EABase/eabase.h>
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+#include <limits.h>
+#include <EASTL/type_traits.h>
+
+namespace eastl
+{
+	///////////////////////////////////////////////////////////////////////
+	// is_empty
+	// 
+	// is_empty<T>::value == true if and only if T is an empty class or struct. 
+	// is_empty may only be applied to complete types.
+	//
+	// is_empty cannot be used with union types until is_union can be made to work.
+	///////////////////////////////////////////////////////////////////////
+	#if EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(_MSC_VER) || defined(EA_COMPILER_GNUC) || (defined(EA_COMPILER_CLANG) && EA_COMPILER_HAS_FEATURE(is_empty)))
+		#define EASTL_TYPE_TRAIT_is_empty_CONFORMANCE 1    // is_empty is conforming. 
+
+		template <typename T> 
+		struct is_empty : public integral_constant<bool, __is_empty(T)>{};
+	#else
+		#define EASTL_TYPE_TRAIT_is_empty_CONFORMANCE 1    // is_empty is fully conforming.
+
+		template <typename T>
+		struct is_empty_helper_t1 : public T { char m[64]; };
+		struct is_empty_helper_t2            { char m[64]; };
+
+		// The inheritance in empty_helper_t1 will not work with non-class types
+		template <typename T, bool is_a_class = false>
+		struct is_empty_helper : public eastl::false_type{};
+
+		template <typename T>
+		struct is_empty_helper<T, true> : public eastl::integral_constant<bool,
+			sizeof(is_empty_helper_t1<T>) == sizeof(is_empty_helper_t2)
+		>{};
+
+		template <typename T>
+		struct is_empty_helper2
+		{
+			typedef typename eastl::remove_cv<T>::type _T;
+			typedef eastl::is_empty_helper<_T, eastl::is_class<_T>::value> type;
+		};
+
+		template <typename T> 
+		struct is_empty : public eastl::is_empty_helper2<T>::type {};
+	#endif
+
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		EA_CONSTEXPR bool is_empty_v = is_empty<T>::value;
+    #endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_pod
+	// 
+	// is_pod<T>::value == true if and only if, for a given type T:
+	//    - is_scalar<T>::value == true, or
+	//    - T is a class or struct that has no user-defined copy assignment 
+	//      operator or destructor, and T has no non-static data members M for 
+	//      which is_pod<M>::value == false, and no members of reference type, or
+	//    - T is the type of an array of objects E for which is_pod<E>::value == true
+	//
+	// is_pod may only be applied to complete types.
+	//
+	// Without some help from the compiler or user, is_pod will not report 
+	// that a struct or class is a POD, but will correctly report that 
+	// built-in types such as int are PODs. The user can help the compiler
+	// by using the EASTL_DECLARE_POD macro on a class.
+	///////////////////////////////////////////////////////////////////////
+
+	#if defined(EA_COMPILER_MSVC)
+		#define EASTL_TYPE_TRAIT_is_pod_CONFORMANCE 1    // is_pod is conforming. Actually as of VS2008 it is apparently not fully conforming, as it flags the following as a non-pod: struct Pod{ Pod(){} };
+
+		EA_DISABLE_VC_WARNING(4647)
+		template <typename T> // We check for has_trivial_constructor only because the VC++ is_pod does. Is it due to some compiler bug?
+		struct is_pod : public eastl::integral_constant<bool, (__has_trivial_constructor(T) && __is_pod(T) && !eastl::is_hat_type<T>::value) || eastl::is_void<T>::value || eastl::is_scalar<T>::value>{};
+		EA_RESTORE_VC_WARNING()
+	
+	#elif EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(EA_COMPILER_GNUC) || (defined(EA_COMPILER_CLANG) && EA_COMPILER_HAS_FEATURE(is_pod)))
+		#define EASTL_TYPE_TRAIT_is_pod_CONFORMANCE 1    // is_pod is conforming.
+
+		template <typename T> 
+		struct is_pod : public eastl::integral_constant<bool, __is_pod(T) || eastl::is_void<T>::value || eastl::is_scalar<T>::value>{};
+	#else
+		#define EASTL_TYPE_TRAIT_is_pod_CONFORMANCE 0    // is_pod is not conforming. Can return false negatives.
+
+		template <typename T> // There's not much we can do here without some compiler extension.
+		struct is_pod : public eastl::integral_constant<bool, eastl::is_void<T>::value || eastl::is_scalar<typename eastl::remove_all_extents<T>::type>::value>{};
+	#endif
+
+	template <typename T, size_t N>
+	struct is_pod<T[N]> : public is_pod<T>{};
+
+	template <typename T>
+	struct is_POD : public is_pod<T>{};  // Backwards compatibility.
+
+	#define EASTL_DECLARE_IS_POD(T, isPod)                                                                   \
+		namespace eastl {                                                                                    \
+			template <> struct is_pod<T>                : public eastl::integral_constant<bool, isPod>  { }; \
+			template <> struct is_pod<const T>          : public eastl::integral_constant<bool, isPod>  { }; \
+			template <> struct is_pod<volatile T>       : public eastl::integral_constant<bool, isPod>  { }; \
+			template <> struct is_pod<const volatile T> : public eastl::integral_constant<bool, isPod>  { }; \
+		}
+
+	// Old style macro, for bacwards compatibility:
+	#define EASTL_DECLARE_POD(T) namespace eastl{ template <> struct is_pod<T> : public true_type{}; template <> struct is_pod<const T> : public true_type{}; }
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		EA_CONSTEXPR bool is_pod_v = is_pod<T>::value;
+    #endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_standard_layout
+	//
+	#if EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && ((defined(EA_COMPILER_MSVC) && (_MSC_VER >= 1700)) || (defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4006)) || (defined(EA_COMPILER_CLANG) && EA_COMPILER_HAS_FEATURE(is_standard_layout)))
+		#define EASTL_TYPE_TRAIT_is_standard_layout_CONFORMANCE 1    // is_standard_layout is conforming.
+
+		template <typename T> 
+		struct is_standard_layout : public eastl::integral_constant<bool, __is_standard_layout(T) || eastl::is_void<T>::value || eastl::is_scalar<T>::value>{};
+	#else
+		#define EASTL_TYPE_TRAIT_is_standard_layout_CONFORMANCE 0    // is_standard_layout is not conforming. Can return false negatives.
+
+		template <typename T> // There's not much we can do here without some compiler extension.
+		struct is_standard_layout : public eastl::integral_constant<bool, is_void<T>::value || is_scalar<T>::value>{};
+	#endif
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		EA_CONSTEXPR bool is_standard_layout_v = is_standard_layout<T>::value;
+    #endif
+
+	#define EASTL_DECLARE_IS_STANDARD_LAYOUT(T, isStandardLayout)                                                    \
+		namespace eastl {                                                                                            \
+			template <> struct is_standard_layout<T>                : public eastl::integral_constant<bool, isStandardLayout>  { }; \
+			template <> struct is_standard_layout<const T>          : public eastl::integral_constant<bool, isStandardLayout>  { }; \
+			template <> struct is_standard_layout<volatile T>       : public eastl::integral_constant<bool, isStandardLayout>  { }; \
+			template <> struct is_standard_layout<const volatile T> : public eastl::integral_constant<bool, isStandardLayout>  { }; \
+		}
+
+	// Old style macro, for bacwards compatibility:
+	#define EASTL_DECLARE_STANDARD_LAYOUT(T) namespace eastl{ template <> struct is_standard_layout<T> : public true_type{}; template <> struct is_standard_layout<const T> : public true_type{}; }
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// has_trivial_constructor
+	//
+	// has_trivial_constructor<T>::value == true if and only if T is a class 
+	// or struct that has a trivial constructor. A constructor is trivial if
+	//    - it is implicitly defined by the compiler, and
+	//    - is_polymorphic<T>::value == false, and
+	//    - T has no virtual base classes, and
+	//    - for every direct base class of T, has_trivial_constructor<B>::value == true, 
+	//      where B is the type of the base class, and
+	//    - for every nonstatic data member of T that has class type or array 
+	//      of class type, has_trivial_constructor<M>::value == true, 
+	//      where M is the type of the data member
+	//
+	// has_trivial_constructor may only be applied to complete types.
+	//
+	// Without from the compiler or user, has_trivial_constructor will not 
+	// report that a class or struct has a trivial constructor. 
+	// The user can use EASTL_DECLARE_TRIVIAL_CONSTRUCTOR to help the compiler.
+	//
+	// A default constructor for a class X is a constructor of class X that 
+	// can be called without an argument.
+	///////////////////////////////////////////////////////////////////////
+
+	#if defined(_MSC_VER) && (_MSC_VER >= 1600) // VS2010+
+		#define EASTL_TYPE_TRAIT_has_trivial_constructor_CONFORMANCE 1    // has_trivial_constructor is conforming.
+
+		template <typename T> 
+		struct has_trivial_constructor : public eastl::integral_constant<bool, (__has_trivial_constructor(T) || eastl::is_pod<T>::value) && !eastl::is_hat_type<T>::value>{};
+	#elif EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(_MSC_VER) || defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG))
+		#define EASTL_TYPE_TRAIT_has_trivial_constructor_CONFORMANCE 1    // has_trivial_constructor is conforming.
+
+		template <typename T> 
+		struct has_trivial_constructor : public eastl::integral_constant<bool, __has_trivial_constructor(T) || eastl::is_pod<T>::value>{};
+	#else
+		#define EASTL_TYPE_TRAIT_has_trivial_constructor_CONFORMANCE 0    // has_trivial_constructor is not fully conforming. Can return false negatives.
+
+		// With current compilers, this is all we can do.
+		template <typename T> 
+		struct has_trivial_constructor : public eastl::is_pod<T> {};
+	#endif
+
+	#define EASTL_DECLARE_HAS_TRIVIAL_CONSTRUCTOR(T, hasTrivialConstructor)                                                     \
+		namespace eastl {                                                                                                       \
+			template <> struct has_trivial_constructor<T>  : public eastl::integral_constant<bool, hasTrivialConstructor>  { }; \
+		}
+
+	// Old style macro, for bacwards compatibility:
+	#define EASTL_DECLARE_TRIVIAL_CONSTRUCTOR(T) namespace eastl{ template <> struct has_trivial_constructor<T> : public true_type{}; template <> struct has_trivial_constructor<const T> : public true_type{}; }
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// has_trivial_copy
+	//
+	// has_trivial_copy<T>::value == true if and only if T is a class or 
+	// struct that has a trivial copy constructor. A copy constructor is 
+	// trivial if
+	//   - it is implicitly defined by the compiler, and
+	//   - is_polymorphic<T>::value == false, and
+	//   - T has no virtual base classes, and
+	//   - for every direct base class of T, has_trivial_copy<B>::value == true, 
+	//     where B is the type of the base class, and
+	//   - for every nonstatic data member of T that has class type or array 
+	//     of class type, has_trivial_copy<M>::value == true, where M is the 
+	//     type of the data member
+	//
+	// has_trivial_copy may only be applied to complete types.
+	//
+	// Another way of looking at this is:
+	// A copy constructor for class X is trivial if it is implicitly 
+	// declared and if all the following are true:
+	//    - Class X has no virtual functions (10.3) and no virtual base classes (10.1).
+	//    - Each direct base class of X has a trivial copy constructor.
+	//    - For all the nonstatic data members of X that are of class type 
+	//      (or array thereof), each such class type has a trivial copy constructor;
+	//      otherwise the copy constructor is nontrivial.
+	//
+	// Without help from the compiler or user, has_trivial_copy will not report 
+	// that a class or struct has a trivial copy constructor. The user can 
+	// use EASTL_DECLARE_TRIVIAL_COPY to help the compiler.
+	///////////////////////////////////////////////////////////////////////
+
+	#if defined(_MSC_VER)
+		#define EASTL_TYPE_TRAIT_has_trivial_copy_CONFORMANCE 1    // has_trivial_copy is conforming.
+
+		template <typename T> 
+		struct has_trivial_copy : public eastl::integral_constant<bool, (__has_trivial_copy(T) || eastl::is_pod<T>::value) && !eastl::is_volatile<T>::value && !eastl::is_hat_type<T>::value>{};
+	#elif EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG))
+		#define EASTL_TYPE_TRAIT_has_trivial_copy_CONFORMANCE 1    // has_trivial_copy is conforming.
+
+		template <typename T> 
+		struct has_trivial_copy : public eastl::integral_constant<bool, (__has_trivial_copy(T) || eastl::is_pod<T>::value) && (!eastl::is_volatile<T>::value && !eastl::is_reference<T>::value)>{};
+	#else
+		#define EASTL_TYPE_TRAIT_has_trivial_copy_CONFORMANCE 0   // has_trivial_copy is not fully conforming. Can return false negatives.
+
+		template <typename T> 
+		struct has_trivial_copy : public eastl::integral_constant<bool, eastl::is_pod<T>::value && !eastl::is_volatile<T>::value>{};
+	#endif
+
+	#define EASTL_DECLARE_HAS_TRIVIAL_COPY(T, hasTrivialCopy)                                                    \
+		namespace eastl {                                                                                        \
+			template <> struct has_trivial_copy<T> : public eastl::integral_constant<bool, hasTrivialCopy>  { }; \
+		}
+
+	// Old style macro, for bacwards compatibility:
+	#define EASTL_DECLARE_TRIVIAL_COPY(T) namespace eastl{ template <> struct has_trivial_copy<T> : public true_type{}; template <> struct has_trivial_copy<const T> : public true_type{}; }
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// has_trivial_assign
+	//
+	// has_trivial_assign<T>::value == true if and only if T is a class or 
+	// struct that has a trivial copy assignment operator. A copy assignment 
+	// operator is trivial if:
+	//    - it is implicitly defined by the compiler, and
+	//    - is_polymorphic<T>::value == false, and
+	//    - T has no virtual base classes, and
+	//    - for every direct base class of T, has_trivial_assign<B>::value == true, 
+	//      where B is the type of the base class, and
+	//    - for every nonstatic data member of T that has class type or array 
+	//      of class type, has_trivial_assign<M>::value == true, where M is 
+	//      the type of the data member.
+	//
+	// has_trivial_assign may only be applied to complete types.
+	//
+	// Without  from the compiler or user, has_trivial_assign will not 
+	// report that a class or struct has trivial assignment. The user 
+	// can use EASTL_DECLARE_TRIVIAL_ASSIGN to help the compiler.
+	///////////////////////////////////////////////////////////////////////
+
+	#if defined(_MSC_VER) && (_MSC_VER >= 1600)
+		#define EASTL_TYPE_TRAIT_has_trivial_assign_CONFORMANCE 1    // has_trivial_assign is conforming.
+
+		template <typename T> 
+		struct has_trivial_assign : public integral_constant<bool, (__has_trivial_assign(T) || eastl::is_pod<T>::value) && !eastl::is_const<T>::value && !eastl::is_volatile<T>::value && !eastl::is_hat_type<T>::value>{};
+	#elif EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(_MSC_VER) || defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG))
+		#define EASTL_TYPE_TRAIT_has_trivial_assign_CONFORMANCE 1    // has_trivial_assign is conforming.
+
+		template <typename T> 
+		struct has_trivial_assign : public integral_constant<bool, (__has_trivial_assign(T) || eastl::is_pod<T>::value) && !eastl::is_const<T>::value && !eastl::is_volatile<T>::value>{};
+	#else
+		#define EASTL_TYPE_TRAIT_has_trivial_assign_CONFORMANCE 0  // is_pod is not fully conforming. Can return false negatives.
+
+		template <typename T> 
+		struct has_trivial_assign : public integral_constant<bool,
+			is_pod<T>::value && !is_const<T>::value && !is_volatile<T>::value
+		>{};
+	#endif
+
+	#define EASTL_DECLARE_HAS_TRIVIAL_ASSIGN(T, hasTrivialAssign)                                                    \
+		namespace eastl {                                                                                            \
+			template <> struct has_trivial_assign<T> : public eastl::integral_constant<bool, hasTrivialAssign>  { }; \
+		}
+
+	// Old style macro, for bacwards compatibility:
+	#define EASTL_DECLARE_TRIVIAL_ASSIGN(T) namespace eastl{ template <> struct has_trivial_assign<T> : public true_type{}; template <> struct has_trivial_assign<const T> : public true_type{}; }
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// has_trivial_destructor
+	//
+	// has_trivial_destructor<T>::value == true if and only if T is a class 
+	// or struct that has a trivial destructor. A destructor is trivial if
+	//    - it is implicitly defined by the compiler, and
+	//    - for every direct base class of T, has_trivial_destructor<B>::value == true, 
+	//      where B is the type of the base class, and
+	//    - for every nonstatic data member of T that has class type or 
+	//      array of class type, has_trivial_destructor<M>::value == true, 
+	//      where M is the type of the data member
+	//
+	// has_trivial_destructor may only be applied to complete types.
+	//
+	// Without from the compiler or user, has_trivial_destructor will not 
+	// report that a class or struct has a trivial destructor. 
+	// The user can use EASTL_DECLARE_TRIVIAL_DESTRUCTOR to help the compiler.
+	///////////////////////////////////////////////////////////////////////
+
+	#if defined(_MSC_VER) && (_MSC_VER >= 1600)
+		#define EASTL_TYPE_TRAIT_has_trivial_destructor_CONFORMANCE 1    // has_trivial_destructor is conforming.
+
+		template <typename T> 
+		struct has_trivial_destructor : public eastl::integral_constant<bool, (__has_trivial_destructor(T) || eastl::is_pod<T>::value) && !eastl::is_hat_type<T>::value>{};
+	#elif EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(_MSC_VER) || defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG))
+		#define EASTL_TYPE_TRAIT_has_trivial_destructor_CONFORMANCE 1    // has_trivial_destructor is conforming.
+
+		template <typename T> 
+		struct has_trivial_destructor : public eastl::integral_constant<bool, __has_trivial_destructor(T) || eastl::is_pod<T>::value>{};
+	#else
+		#define EASTL_TYPE_TRAIT_has_trivial_destructor_CONFORMANCE 0  // is_pod is not fully conforming. Can return false negatives.
+
+		// With current compilers, this is all we can do.
+		template <typename T> 
+		struct has_trivial_destructor : public eastl::is_pod<T>{};
+	#endif
+
+	#define EASTL_DECLARE_HAS_TRIVIAL_DESTRUCTOR(T, hasTrivialDestructor)                                                    \
+		namespace eastl {                                                                                                    \
+			template <> struct has_trivial_destructor<T> : public eastl::integral_constant<bool, hasTrivialDestructor>  { }; \
+		}
+
+	// Old style macro, for bacwards compatibility:
+	#define EASTL_DECLARE_TRIVIAL_DESTRUCTOR(T) namespace eastl{ template <> struct has_trivial_destructor<T> : public true_type{}; template <> struct has_trivial_destructor<const T> : public true_type{}; }
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		EA_CONSTEXPR bool has_trivial_destructor_v = has_trivial_destructor<T>::value;
+    #endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// has_trivial_relocate
+	//
+	// This is an EA extension to the type traits standard.
+	// This trait is deprecated under conforming C++11 compilers, as C++11 
+	// move functionality supercedes this functionality and we want to 
+	// migrate away from it in the future.
+	//
+	// A trivially relocatable object is one that can be safely memmove'd 
+	// to uninitialized memory. construction, assignment, and destruction 
+	// properties are not addressed by this trait. A type that has the 
+	// is_fundamental trait would always have the has_trivial_relocate trait. 
+	// A type that has the has_trivial_constructor, has_trivial_copy or 
+	// has_trivial_assign traits would usally have the has_trivial_relocate 
+	// trait, but this is not strictly guaranteed.
+	//
+	// The user can use EASTL_DECLARE_TRIVIAL_RELOCATE to help the compiler.
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_has_trivial_relocate_CONFORMANCE 0  // is_pod is not fully conforming. Can return false negatives.
+
+	template <typename T>
+	struct has_trivial_relocate : public eastl::bool_constant<eastl::is_pod_v<T> && !eastl::is_volatile_v<T>> {};
+
+    #define EASTL_DECLARE_TRIVIAL_RELOCATE(T) namespace eastl{ template <> struct has_trivial_relocate<T> : public true_type{}; template <> struct has_trivial_relocate<const T> : public true_type{}; }
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// has_nothrow_constructor
+	//
+	// has_nothrow_constructor<T>::value == true if and only if T is a 
+	// class or struct whose default constructor has an empty throw specification.
+	// 
+	// has_nothrow_constructor may only be applied to complete types.
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#if EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG))
+		#define EASTL_TYPE_TRAIT_has_nothrow_constructor_CONFORMANCE 1
+
+		template <typename T> 
+		struct has_nothrow_constructor 
+			: public eastl::integral_constant<bool, __has_nothrow_constructor(T)>{};
+
+	#elif EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && defined(_MSC_VER)
+		// Microsoft's implementation of __has_nothrow_constructor is crippled and returns true only if T is a class that has an explicit constructor.
+		// "Returns true if the default constructor has an empty exception specification."
+		#define EASTL_TYPE_TRAIT_has_nothrow_constructor_CONFORMANCE 0
+
+		template <typename T> // This is mistakenly returning true for an unbounded array of scalar type. 
+		struct has_nothrow_constructor : public eastl::integral_constant<bool, __has_nothrow_constructor(T) || eastl::is_scalar<typename eastl::remove_all_extents<T>::type>::value || eastl::is_reference<T>::value>{};
+
+	#else
+		#define EASTL_TYPE_TRAIT_has_nothrow_constructor_CONFORMANCE 0  // has_nothrow_constructor is not fully conforming. Can return false negatives.
+
+		template <typename T>
+		struct has_nothrow_constructor // To do: Improve this to include other types that can work.
+			{ static const bool value = eastl::is_scalar<typename eastl::remove_all_extents<T>::type>::value || eastl::is_reference<T>::value; };
+	#endif
+
+	#define EASTL_DECLARE_HAS_NOTHROW_CONSTRUCTOR(T, hasNothrowConstructor)                                                    \
+		namespace eastl {                                                                                                      \
+			template <> struct has_nothrow_constructor<T> : public eastl::integral_constant<bool, hasNothrowConstructor>  { }; \
+		}
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// has_nothrow_copy
+	//
+	// has_nothrow_copy<T>::value == true if and only if T is a class or 
+	// struct whose copy constructor has an empty throw specification.
+	//
+	// has_nothrow_copy may only be applied to complete types.
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#if EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG))
+		#define EASTL_TYPE_TRAIT_has_nothrow_copy_CONFORMANCE 1
+
+		template <typename T> 
+		struct has_nothrow_copy : public eastl::integral_constant<bool, __has_nothrow_copy(T)>{};
+
+	#elif EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && defined(_MSC_VER)
+		// Microsoft's implementation of __has_nothrow_copy is crippled and returns true only if T is a class that has a copy constructor.
+		// "Returns true if the copy constructor has an empty exception specification."
+		#define EASTL_TYPE_TRAIT_has_nothrow_copy_CONFORMANCE 0
+
+		template <typename T> 
+		struct has_nothrow_copy : public eastl::integral_constant<bool, __has_nothrow_copy(T) || eastl::is_scalar<typename eastl::remove_all_extents<T>::type>::value || eastl::is_reference<T>::value>{};
+
+	#else
+		#define EASTL_TYPE_TRAIT_has_nothrow_copy_CONFORMANCE 0  // has_nothrow_copy is not fully conforming. Can return false negatives.
+
+		template <typename T>
+		struct has_nothrow_copy // To do: Improve this to include other types that can work.
+			{ static const bool value = eastl::is_scalar<typename eastl::remove_all_extents<T>::type>::value || eastl::is_reference<T>::value; };
+	#endif
+
+	#define EASTL_DECLARE_HAS_NOTHROW_COPY(T, hasNothrowCopy)                                                    \
+		namespace eastl {                                                                                        \
+			template <> struct has_nothrow_copy<T> : public eastl::integral_constant<bool, hasNothrowCopy>  { }; \
+		}
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// has_nothrow_assign
+	//
+	// has_nothrow_assign<T>::value == true if and only if T is a class or 
+	// struct whose copy assignment operator has an empty throw specification.
+	// 
+	// has_nothrow_assign may only be applied to complete types.
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#if EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG))
+		#define EASTL_TYPE_TRAIT_has_nothrow_assign_CONFORMANCE 1
+
+		template <typename T> 
+		struct has_nothrow_assign : public eastl::integral_constant<bool, __has_nothrow_assign(T)>{};
+
+	#elif EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && defined(_MSC_VER)
+		// Microsoft's implementation of __has_nothrow_assign is crippled and returns true only if T is a class that has an assignment operator.
+		// "Returns true if a copy assignment operator has an empty exception specification."
+		#define EASTL_TYPE_TRAIT_has_nothrow_assign_CONFORMANCE 0
+
+		template <typename T> // This is mistakenly returning true for an unbounded array of scalar type. 
+		struct has_nothrow_assign : public eastl::integral_constant<bool, __has_nothrow_assign(T) || eastl::is_scalar<typename eastl::remove_all_extents<T>::type>::value || eastl::is_reference<T>::value>{};
+	#else
+		#define EASTL_TYPE_TRAIT_has_nothrow_assign_CONFORMANCE 0  // has_nothrow_assign is not fully conforming. Can return false negatives.
+
+		template <typename T>
+		struct has_nothrow_assign // To do: Improve this to include other types that can work.
+			{ static const bool value = eastl::is_scalar<typename eastl::remove_all_extents<T>::type>::value || eastl::is_reference<T>::value; } ;
+	#endif
+
+	#define EASTL_DECLARE_HAS_NOTHROW_ASSIGN(T, hasNothrowAssign)                                                    \
+		namespace eastl {                                                                                            \
+			template <> struct has_nothrow_assign<T> : public eastl::integral_constant<bool, hasNothrowAssign>  { }; \
+		}
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// has_virtual_destructor
+	//
+	// has_virtual_destructor<T>::value == true if and only if T is a class 
+	// or struct with a virtual destructor.
+	//
+	// has_virtual_destructor may only be applied to complete types.
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#if EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(_MSC_VER) || defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG))
+		#define EASTL_TYPE_TRAIT_has_virtual_destructor_CONFORMANCE 1
+
+		template <typename T> 
+		struct has_virtual_destructor : public eastl::integral_constant<bool, __has_virtual_destructor(T)>{};
+	#else
+		#define EASTL_TYPE_TRAIT_has_virtual_destructor_CONFORMANCE 0   // has_virtual_destructor is not fully conforming. Can return false negatives.
+
+		template <typename T>
+		struct has_virtual_destructor : public eastl::false_type{};
+	#endif
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		EA_CONSTEXPR bool has_virtual_destructor_v = has_virtual_destructor<T>::value;
+    #endif
+
+	#define EASTL_DECLARE_HAS_VIRTUAL_DESTRUCTOR(T, hasVirtualDestructor)                                                                   \
+		namespace eastl {                                                                                                                   \
+			template <> struct has_virtual_destructor<T>                : public eastl::integral_constant<bool, hasVirtualDestructor>  { }; \
+			template <> struct has_virtual_destructor<const T>          : public eastl::integral_constant<bool, hasVirtualDestructor>  { }; \
+			template <> struct has_virtual_destructor<volatile T>       : public eastl::integral_constant<bool, hasVirtualDestructor>  { }; \
+			template <> struct has_virtual_destructor<const volatile T> : public eastl::integral_constant<bool, hasVirtualDestructor>  { }; \
+		}
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_literal_type
+	//
+	// See the C++11 Standard, section 2.9,p10.
+	// A type is a literal type if it is:
+	//     - a scalar type; or
+	//     - a reference type referring to a literal type; or
+	//     - an array of literal type; or
+	//     - a class type (Clause 9) that has all of the following properties:
+	//         - it has a trivial destructor,
+	//         - every constructor call and full-expression in the brace-or-equal-initializer s for non-static data members (if any) is a constant expression (5.19),
+	//         - it is an aggregate type (8.5.1) or has at least one constexpr constructor or constructor template that is not a copy or move constructor, and
+	//         - all of its non-static data members and base classes are of literal types.
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#if EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(EA_COMPILER_CLANG) && EA_COMPILER_HAS_FEATURE(is_literal))
+		#define EASTL_TYPE_TRAIT_is_literal_type_CONFORMANCE 1
+
+		template <typename T> 
+		struct is_literal_type : public eastl::integral_constant<bool, __is_literal(T)>{};
+
+	#elif EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && ((defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4006)) || (defined(_MSC_VER) && (_MSC_VER >= 1700))) // VS2012+
+		#if defined(EA_COMPILER_GNUC) && (!defined(EA_COMPILER_CPP11_ENABLED) || (EA_COMPILER_VERSION < 4007))
+			#define EASTL_TYPE_TRAIT_is_literal_type_CONFORMANCE 0 // It seems that in this case GCC supports the compiler intrinsic but reports it as false when it's true.
+		#else
+			#define EASTL_TYPE_TRAIT_is_literal_type_CONFORMANCE 1
+		#endif
+
+		template <typename T> 
+		struct is_literal_type : public eastl::integral_constant<bool, __is_literal_type(T)>{};
+
+	#else
+		#define EASTL_TYPE_TRAIT_is_literal_type_CONFORMANCE 0
+
+		// It's not clear if this trait can be fully implemented without explicit compiler support.
+		// For now we assume that it can't be but implement something that gets it right at least 
+		// some of the time. Recall that partial positives and false negatives are OK (though not ideal),
+		// while false positives are not OK for us to generate.
+
+		template <typename T> // This is not a complete implementation and will be true for only some literal types (the basic ones).
+		struct is_literal_type : public eastl::integral_constant<bool, eastl::is_scalar<typename eastl::remove_reference<typename eastl::remove_all_extents<T>::type>::type>::value>{};
+	#endif
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		EA_CONSTEXPR bool is_literal_type_v = is_literal_type<T>::value;
+    #endif
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_abstract
+	//
+	// is_abstract<T>::value == true if and only if T is a class or struct 
+	// that has at least one pure virtual function. is_abstract may only 
+	// be applied to complete types.
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#if EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(_MSC_VER) || defined(EA_COMPILER_GNUC) || (defined(EA_COMPILER_CLANG) && EA_COMPILER_HAS_FEATURE(is_abstract)))
+		#define EASTL_TYPE_TRAIT_is_abstract_CONFORMANCE 1    // is_abstract is conforming.
+
+		template <typename T> 
+		struct is_abstract : public integral_constant<bool, __is_abstract(T)>{};
+	#else
+		#define EASTL_TYPE_TRAIT_is_abstract_CONFORMANCE 0
+
+		template<typename T, bool = !eastl::is_object<T>::value>
+		class is_abstract_helper
+		{
+			template<typename>
+			static eastl::yes_type test(...);
+
+			template<typename T1>
+			static eastl::no_type test(T1(*)[1]);  // The following: 'typedef SomeAbstractClass (*SomeFunctionType)[1];' is invalid (can't have an array of abstract types) and thus doesn't choose this path.
+	
+		public:
+			static const bool value = (sizeof(test<T>(NULL)) == sizeof(eastl::yes_type));
+		};
+  
+		template <typename T>
+		struct is_abstract_helper<T, true>
+			{ static const bool value = false; };
+
+		template <typename T>
+		struct is_abstract
+			: public integral_constant<bool, is_abstract_helper<T>::value> { };
+
+	#endif
+
+	#define EASTL_DECLARE_IS_ABSTRACT(T, isAbstract)                                                                   \
+		namespace eastl {                                                                                              \
+			template <> struct is_abstract<T>                : public eastl::integral_constant<bool, isAbstract>  { }; \
+			template <> struct is_abstract<const T>          : public eastl::integral_constant<bool, isAbstract>  { }; \
+			template <> struct is_abstract<volatile T>       : public eastl::integral_constant<bool, isAbstract>  { }; \
+			template <> struct is_abstract<const volatile T> : public eastl::integral_constant<bool, isAbstract>  { }; \
+		}
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		EA_CONSTEXPR bool is_abstract_v = is_abstract<T>::value;
+	#endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_trivially_copyable
+	//
+	// T is a trivially copyable type (3.9) T shall be a complete type,
+	// (possibly cv-qualified) void, or an array of unknown bound.
+	//
+	// 3.9,p3: For any trivially copyable type T, if two pointers to T 
+	// point to distinct T objects obj1 and obj2, where neither obj1 nor 
+	// obj2 is a base-class subobject, if the underlying bytes making 
+	// up obj1 are copied into obj2, obj2 shall subsequently hold the 
+	// same value as obj1. In other words, you can memcpy/memmove it.
+	///////////////////////////////////////////////////////////////////////
+	#if EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && ((defined(_MSC_VER) && (_MSC_VER >= 1700)) || (defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 5003)) || (defined(EA_COMPILER_CLANG) && EA_COMPILER_HAS_FEATURE(is_trivially_copyable)))
+		#define EASTL_TYPE_TRAIT_is_trivially_copyable_CONFORMANCE 1
+
+		// https://connect.microsoft.com/VisualStudio/feedback/details/808827/c-std-is-trivially-copyable-produces-wrong-result-for-arrays
+		// 
+		// From Microsoft:
+		//   We're working on fixing this. When overhauling <type_traits> in VC 2013, I incorrectly believed that is_trivially_copyable was a synonym
+		//   for is_trivially_copy_constructible. I've asked the compiler team to provide a compiler hook with 100% accurate answers. (Currently, the
+		//   compiler hook has incorrect answers for volatile scalars, volatile data members, and various scenarios for defaulted/deleted/private
+		//   special member functions - I wrote an exhaustive test case to exercise the complicated Standardese.) When the compiler hook is fixed,
+		//   I'll change <type_traits> to invoke it.
+		//
+		// Microsoft broken VS2013 STL implementation:
+		//   template<class _Ty>
+		//   struct is_trivially_copyable
+		//       : is_trivially_copy_constructible<_Ty>::type
+		//   {   // determine whether _Ty has a trivial copy constructor
+		//   };
+		//
+
+		template <typename T>
+		struct is_trivially_copyable { static const bool value = __is_trivially_copyable(T); };
+
+	#elif EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(EA_COMPILER_MSVC) || defined(EA_COMPILER_GNUC))
+		#define EASTL_TYPE_TRAIT_is_trivially_copyable_CONFORMANCE 1
+
+		// Micrsoft (prior to VS2012) and GCC have __has_trivial_copy, but it may not be identical with the goals of this type trait.
+		template <typename T> 
+		struct is_trivially_copyable : public integral_constant<bool, (__has_trivial_copy(T) || eastl::is_pod<typename eastl::remove_all_extents<T>::type>::value) && (!eastl::is_void<T>::value && !eastl::is_volatile<T>::value && !eastl::is_reference<T>::value)>{};
+	#else
+		#define EASTL_TYPE_TRAIT_is_trivially_copyable_CONFORMANCE 0  // Generates false negatives.
+
+		template <typename T>
+		struct is_trivially_copyable { static const bool value = eastl::is_scalar<typename eastl::remove_all_extents<T>::type>::value; };
+	#endif
+
+	#define EASTL_DECLARE_IS_TRIVIALLY_COPYABLE(T, isTriviallyCopyable)                                                                   \
+		namespace eastl {                                                                                                                 \
+			template <> struct is_trivially_copyable<T>                : public eastl::integral_constant<bool, isTriviallyCopyable>  { }; \
+			template <> struct is_trivially_copyable<const T>          : public eastl::integral_constant<bool, isTriviallyCopyable>  { }; \
+			template <> struct is_trivially_copyable<volatile T>       : public eastl::integral_constant<bool, isTriviallyCopyable>  { }; \
+			template <> struct is_trivially_copyable<const volatile T> : public eastl::integral_constant<bool, isTriviallyCopyable>  { }; \
+		}
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		EA_CONSTEXPR bool is_trivially_copyable_v = is_trivially_copyable<T>::value;
+    #endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_constructible
+	//
+	// See the C++11 Standard, section 20.9.4.3,p6.
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_is_constructible_CONFORMANCE 1
+
+	#if EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(_MSC_VER) || (defined(EA_COMPILER_CLANG) && EA_COMPILER_HAS_FEATURE(is_constructible)))
+		template<typename T, typename... Args>
+		struct is_constructible : public bool_constant<__is_constructible(T, Args...) > {};
+	#else
+		// We implement a copy of move here has move_internal. We are currently stuck doing this because our move
+		// implementation is in <utility.h> and <utility.h> currently #includes us, and so we have a header 
+		// chicken-and-egg problem. To do: Resolve this, probably by putting eastl::move somewhere else.
+		template <typename T>
+		inline typename eastl::remove_reference<T>::type&& move_internal(T&& x) EA_NOEXCEPT
+		{ return ((typename eastl::remove_reference<T>::type&&)x); }
+
+		template <typename T, class ...Args>
+		typename first_type_select<eastl::true_type, decltype(eastl::move_internal(T(eastl::declval<Args>()...)))>::type is(T&&, Args&& ...);
+
+		template <typename T>
+		struct can_construct_scalar_helper
+		{
+			static eastl::true_type can(T);
+			static eastl::false_type can(...);
+		};
+
+		template <typename ...Args>
+		eastl::false_type is(argument_sink, Args&& ...);
+
+		// Except for scalars and references (handled below), check for constructibility via decltype.
+		template <bool, typename T, typename... Args>
+		struct is_constructible_helper_2    // argument_sink will catch all T that is not constructible from the Args and denote false_type
+			: public eastl::identity<decltype(is(eastl::declval<T>(), eastl::declval<Args>()...))>::type {};
+
+		template <typename T>
+		struct is_constructible_helper_2<true, T>
+			: public eastl::is_scalar<T> {};
+
+		template <typename T, typename Arg0> // We handle the case of multiple arguments below (by disallowing them).
+		struct is_constructible_helper_2<true, T, Arg0>
+			: public eastl::identity<decltype(can_construct_scalar_helper<T>::can(eastl::declval<Arg0>()))>::type {};
+
+		// Scalars and references can be constructed only with 0 or 1 argument. e.g the following is an invalid expression: int(17, 23)
+		template <typename T, typename Arg0, typename ...Args>
+		struct is_constructible_helper_2<true, T, Arg0, Args...>
+			: public eastl::false_type {};
+
+		template <bool, typename T, typename... Args>
+		struct is_constructible_helper_1
+			: public is_constructible_helper_2<eastl::is_scalar<T>::value || eastl::is_reference<T>::value, T, Args...> {};
+
+		// Unilaterally dismiss void, abstract, unknown bound arrays, and function types as not constructible.
+		template <typename T, typename... Args>
+		struct is_constructible_helper_1<true, T, Args...>
+			: public false_type {};
+
+		// is_constructible
+		template <typename T, typename... Args>
+		struct is_constructible
+			: public is_constructible_helper_1<(eastl::is_abstract<typename eastl::remove_all_extents<T>::type>::value || 
+												eastl::is_array_of_unknown_bounds<T>::value                            ||
+												eastl::is_function<typename eastl::remove_all_extents<T>::type>::value || 
+												eastl::has_void_arg<T, Args...>::value), 
+												T, Args...> {};
+
+		// Array types are constructible if constructed with no arguments and if their element type is default-constructible
+		template <typename Array, size_t N>
+		struct is_constructible_helper_2<false, Array[N]>
+			: public eastl::is_constructible<typename eastl::remove_all_extents<Array>::type> {};
+
+		// Arrays with arguments are not constructible. e.g. the following is an invalid expression: int[3](37, 34, 12)
+		template <typename Array, size_t N, typename ...Args>
+		struct is_constructible_helper_2<false, Array[N], Args...>
+			: public eastl::false_type {};
+
+	#endif
+
+
+	// You need to manually declare const/volatile variants individually if you want them.
+	#define EASTL_DECLARE_IS_CONSTRUCTIBLE(T, U, isConstructible)                                                     \
+		namespace eastl {                                                                                             \
+			template <> struct is_constructible<T, U>  : public eastl::integral_constant<bool, isConstructible>  { }; \
+		}
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T, class... Args>
+		EA_CONSTEXPR bool is_constructible_v = is_constructible<T, Args...>::value;
+    #endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_trivially_constructible
+	//
+	// is_constructible<T, Args...>::value is true and the variable definition
+	// for is_constructible, as defined below, is known to call no operation 
+	// that is not trivial (3.9, 12). T and all types in the parameter pack 
+	// Args shall be complete types, (possibly cv-qualified) void, or arrays 
+	// of unknown bound.
+	//
+	// Note:
+	// C++11's is_trivially_constructible sounds the same as the pre-standard
+	// has_trivial_constructor type trait (which we also support here). However,
+	// the definition of has_trivial_constructor has never been formally standardized
+	// and so we can't just blindly equate the two to each other. Since we are 
+	// moving forward with C++11 and deprecating the old type traits, we leave
+	// the old ones as-is, though we defer to them in cases where we don't seem
+	// to have a good alternative.
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#if defined(EA_COMPILER_NO_VARIADIC_TEMPLATES)
+
+		#define EASTL_TYPE_TRAIT_is_trivially_constructible_CONFORMANCE 0
+
+		// In this version we allow only zero or one argument (Arg). We can add more arguments
+		// by creating a number of extra specializations. It's probably not possible to 
+		// simplify the implementation with recursive templates because ctor argument 
+		// presence is specific.
+		//
+		// To consider: we can fold the two implementations below by making a macro that's defined
+		// has __is_trivially_constructible(T) or eastl::has_trivial_copy<T>::value, depending on
+		// whether the __is_trivially_constructible compiler intrinsic is available.
+
+		// If the compiler has this trait built-in (which ideally all compilers would have since it's necessary for full conformance) use it.
+		#if EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(EA_COMPILER_CLANG) && EA_COMPILER_HAS_FEATURE(is_trivially_constructible))
+
+			template <typename T, typename Arg0 = eastl::unused>
+			struct is_trivially_constructible 
+				: public eastl::false_type {};
+
+			template <typename T>
+			struct is_trivially_constructible<T, eastl::unused>
+				: public eastl::integral_constant<bool, __is_trivially_constructible(T)> {};
+
+			template <typename T>
+			struct is_trivially_constructible<T, T>
+				: public eastl::integral_constant<bool, __is_trivially_constructible(T)> {};
+
+			template <typename T>
+			struct is_trivially_constructible<T, T&>
+				: public eastl::integral_constant<bool, __is_trivially_constructible(T)> {};
+
+			template <typename T>
+			struct is_trivially_constructible<T, const T&>
+				: public eastl::integral_constant<bool, __is_trivially_constructible(T)> {};
+
+			template <typename T>
+			struct is_trivially_constructible<T, volatile T&>
+				: public eastl::integral_constant<bool, __is_trivially_constructible(T)> {};
+
+			template <typename T>
+			struct is_trivially_constructible<T, const volatile T&>
+				: public eastl::integral_constant<bool, __is_trivially_constructible(T)> {};
+
+		#else
+
+			template <typename T, typename Arg0 = eastl::unused>
+			struct is_trivially_constructible 
+				: public eastl::false_type {};
+
+			template <typename T>
+			struct is_trivially_constructible<T, eastl::unused>
+				: public eastl::integral_constant<bool, eastl::is_constructible<T>::value && eastl::has_trivial_constructor<typename eastl::remove_all_extents<T>::type>::value> {};
+
+			template <typename T>
+			struct is_trivially_constructible<T, T>
+				: public eastl::integral_constant<bool, eastl::is_constructible<T>::value && eastl::has_trivial_copy<T>::value> {};
+
+			template <typename T>
+			struct is_trivially_constructible<T, T&>
+				: public eastl::integral_constant<bool, eastl::is_constructible<T>::value && eastl::has_trivial_copy<T>::value> {};
+
+			template <typename T>
+			struct is_trivially_constructible<T, const T&>
+				: public eastl::integral_constant<bool, eastl::is_constructible<T>::value && eastl::has_trivial_copy<T>::value> {};
+
+			template <typename T>
+			struct is_trivially_constructible<T, volatile T&>
+				: public eastl::integral_constant<bool, eastl::is_constructible<T>::value && eastl::has_trivial_copy<T>::value> {};
+
+			template <typename T>
+			struct is_trivially_constructible<T, const volatile T&>
+				: public eastl::integral_constant<bool, eastl::is_constructible<T>::value && eastl::has_trivial_copy<T>::value> {};
+
+		#endif
+
+	#else
+
+		// If the compiler has this trait built-in (which ideally all compilers would have since it's necessary for full conformance) use it.
+		#if EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(EA_COMPILER_CLANG) && EA_COMPILER_HAS_FEATURE(is_trivially_constructible))
+			#define EASTL_TYPE_TRAIT_is_trivially_constructible_CONFORMANCE 1
+
+			// We have a problem with clang here as of clang 3.4: __is_trivially_constructible(int[]) is false, yet I believe it should be true.
+			// Until it gets resolved, what we do is check for is_constructible along with __is_trivially_constructible().
+			template <typename T, typename... Args>
+			struct is_trivially_constructible
+				: public eastl::integral_constant<bool, eastl::is_constructible<T, Args...>::value && __is_trivially_constructible(T, Args...)> {};
+
+		#else
+
+			#define EASTL_TYPE_TRAIT_is_trivially_constructible_CONFORMANCE 0   // This is 0 but in fact it will work for most real-world cases due to the has_trivial_constructor specialization below.
+
+			template <typename T, typename... Args>
+			struct is_trivially_constructible
+				: public eastl::false_type {};
+
+			template <typename T>
+			struct is_trivially_constructible<T>
+				: public eastl::integral_constant<bool, eastl::is_constructible<T>::value && eastl::has_trivial_constructor<typename eastl::remove_all_extents<T>::type>::value> {};
+
+			// It's questionable whether we can use has_trivial_copy here, as it could theoretically create a false-positive.
+			template <typename T>
+			struct is_trivially_constructible<T, T>
+				: public eastl::integral_constant<bool, eastl::is_constructible<T>::value && eastl::has_trivial_copy<T>::value> {};
+
+			template <typename T>
+			struct is_trivially_constructible<T, T&&>
+				: public eastl::integral_constant<bool, eastl::is_constructible<T>::value && eastl::has_trivial_copy<T>::value> {};
+
+			template <typename T>
+			struct is_trivially_constructible<T, T&>
+				: public eastl::integral_constant<bool, eastl::is_constructible<T>::value && eastl::has_trivial_copy<T>::value> {};
+
+			template <typename T>
+			struct is_trivially_constructible<T, const T&>
+				: public eastl::integral_constant<bool, eastl::is_constructible<T>::value && eastl::has_trivial_copy<T>::value> {};
+
+			template <typename T>
+			struct is_trivially_constructible<T, volatile T&>
+				: public eastl::integral_constant<bool, eastl::is_constructible<T>::value && eastl::has_trivial_copy<T>::value> {};
+
+			template <typename T>
+			struct is_trivially_constructible<T, const volatile T&>
+				: public eastl::integral_constant<bool, eastl::is_constructible<T>::value && eastl::has_trivial_copy<T>::value> {};
+
+		#endif
+
+	#endif
+
+
+	#define EASTL_DECLARE_IS_TRIVIALLY_CONSTRUCTIBLE(T, isTriviallyConstructible)                                                                   \
+		namespace eastl {                                                                                                                           \
+			template <> struct is_trivially_constructible<T>                : public eastl::integral_constant<bool, isTriviallyConstructible>  { }; \
+			template <> struct is_trivially_constructible<const T>          : public eastl::integral_constant<bool, isTriviallyConstructible>  { }; \
+			template <> struct is_trivially_constructible<volatile T>       : public eastl::integral_constant<bool, isTriviallyConstructible>  { }; \
+			template <> struct is_trivially_constructible<const volatile T> : public eastl::integral_constant<bool, isTriviallyConstructible>  { }; \
+		}
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		EA_CONSTEXPR bool is_trivially_constructible_v = is_trivially_constructible<T>::value;
+    #endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_trivially_default_constructible
+	//
+	// is_trivially_constructible<T>::value is true.
+	// This is thus identical to is_trivially_constructible.
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_is_trivially_default_constructible_CONFORMANCE EASTL_TYPE_TRAIT_is_trivially_constructible_CONFORMANCE
+
+	template <typename T>
+	struct is_trivially_default_constructible 
+		: public eastl::is_trivially_constructible<T> {};
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		EA_CONSTEXPR bool is_trivially_default_constructible_v = is_trivially_default_constructible<T>::value;
+    #endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_trivial
+	//
+	// is_trivial<T>::value == true if T is a scalar type, a trivially copyable 
+	// class with a trivial default constructor, or array of such type/class, 
+	// possibly cv-qualified), provides the member constant value equal true. 
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_is_trivial_CONFORMANCE ((EASTL_TYPE_TRAIT_is_trivially_default_constructible_CONFORMANCE && EASTL_TYPE_TRAIT_is_trivially_copyable_CONFORMANCE) ? 1 : 0)
+
+	#if defined(_MSC_VER) && _MSC_VER == 1800
+		template<bool, typename T>
+		struct is_trivial_helper
+			: public eastl::integral_constant<bool, eastl::is_trivially_copyable<T>::value && eastl::is_trivially_default_constructible<T>::value>{};
+
+		template<typename T>
+		struct is_trivial_helper<true, T>
+			: public false_type{};
+
+		template <typename T>
+		struct is_trivial
+			: public is_trivial_helper<(EA_ALIGN_OF(T) > EA_PLATFORM_MIN_MALLOC_ALIGNMENT), T>::type{};
+	#else
+		// All other compilers seem to be able to handle aligned types passed as value
+		template <typename T>
+		struct is_trivial 
+			: public eastl::integral_constant<bool, eastl::is_trivially_copyable<T>::value && eastl::is_trivially_default_constructible<T>::value> {};
+	#endif
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		EA_CONSTEXPR bool is_trivial_v = is_trivial<T>::value;
+    #endif
+
+	
+
+	///////////////////////////////////////////////////////////////////////
+	// is_nothrow_constructible
+	//
+	// is_constructible<T, Args...>::value is true and the variable definition
+	// for is_constructible, as defined below, is known not to throw any
+	// exceptions (5.3.7). T and all types in the parameter pack Args shall
+	// be complete types, (possibly cv-qualified) void, or arrays of unknown bound.
+	//
+	///////////////////////////////////////////////////////////////////////
+	#if defined(EA_COMPILER_NO_NOEXCEPT)
+
+		#define EASTL_TYPE_TRAIT_is_nothrow_constructible_CONFORMANCE 0
+
+		template <typename T, typename... Args>
+		struct is_nothrow_constructible
+			: public eastl::false_type {};
+
+		template <typename T>
+		struct is_nothrow_constructible<T>
+			: public eastl::integral_constant<bool, eastl::has_nothrow_constructor<T>::value> {};
+
+		template <typename T>
+		struct is_nothrow_constructible<T, T>
+			: public eastl::integral_constant<bool, eastl::has_nothrow_copy<T>::value> {};
+
+		template <typename T>
+		struct is_nothrow_constructible<T, const T&>
+			: public eastl::integral_constant<bool, eastl::has_nothrow_copy<T>::value> {};
+
+		template <typename T>
+		struct is_nothrow_constructible<T, T&>
+			: public eastl::integral_constant<bool, eastl::has_nothrow_copy<T>::value> {};
+
+		template <typename T>
+		struct is_nothrow_constructible<T, T&&>
+			: public eastl::integral_constant<bool, eastl::has_nothrow_copy<T>::value> {};
+
+	#else
+		#if defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION < 4008)
+			#define EASTL_TYPE_TRAIT_is_nothrow_constructible_CONFORMANCE 0 // GCC up to v4.7's noexcept is broken and fails to generate true for the case of compiler-generated constructors.
+		#else
+			#define EASTL_TYPE_TRAIT_is_nothrow_constructible_CONFORMANCE EASTL_TYPE_TRAIT_is_constructible_CONFORMANCE
+		#endif
+	   
+		////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+		// *_noexcept_wrapper implements a workaround for VS2015 preview.  A standards conforming noexcept operator allows variadic template expansion.
+		// There appears to be an issue with VS2015 preview that prevents variadic template expansion into a noexcept operator that is passed directly 
+		// to a template parameter.
+		// 
+		// The fix hoists the noexcept expression into a separate struct and caches the result of the expression.  This result is then passed to integral_constant.
+		//
+		// Example code from Clang libc++
+		// template <class _Tp, class... _Args>
+		// struct __libcpp_is_nothrow_constructible<[>is constructible*/true, /*is reference<]false, _Tp, _Args...>
+		//     : public integral_constant<bool, noexcept(_Tp(declval<_Args>()...))> { };
+		//
+		
+		template <typename T, typename... Args>
+		struct is_nothrow_constructible_helper_noexcept_wrapper
+			{ static const bool value = noexcept(T(eastl::declval<Args>()...)); };
+
+		template <bool, typename T, typename... Args>
+		struct is_nothrow_constructible_helper;
+
+		template <typename T, typename... Args>
+		struct is_nothrow_constructible_helper<true, T, Args...>
+			: public eastl::integral_constant<bool, is_nothrow_constructible_helper_noexcept_wrapper<T, Args...>::value> {};
+
+		template<typename T, typename Arg>
+		struct is_nothrow_constructible_helper<true, T, Arg>
+			: public eastl::integral_constant<bool, noexcept(T(eastl::declval<Arg>()))> {};
+
+		template<typename T>
+		struct is_nothrow_constructible_helper<true, T>
+			: public eastl::integral_constant<bool, noexcept(T())> {};
+
+		template <typename T, typename... Args>
+		struct is_nothrow_constructible_helper<false, T, Args...>
+			: public eastl::false_type {};
+
+		template <typename T, typename... Args>
+		struct is_nothrow_constructible
+			: public eastl::is_nothrow_constructible_helper<eastl::is_constructible<T, Args...>::value, T, Args...> {};
+
+		template <typename T, size_t N>
+		struct is_nothrow_constructible<T[N]>
+			: public eastl::is_nothrow_constructible_helper<eastl::is_constructible<T>::value, T> {};
+	#endif
+
+	#define EASTL_DECLARE_IS_NOTHROW_CONSTRUCTIBLE(T, isNothrowConstructible)                                                    \
+		namespace eastl{                                                                                                         \
+			template <> struct is_nothrow_constructible<T> : public eastl::integral_constant<bool, isNothrowConstructible>  { }; \
+		}
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T, typename... Args>
+		EA_CONSTEXPR bool is_nothrow_constructible_v = is_nothrow_constructible<T, Args...>::value;
+    #endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_default_constructible
+	//
+	// is_constructible<T>::value is true.
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_is_default_constructible_CONFORMANCE EASTL_TYPE_TRAIT_is_constructible_CONFORMANCE
+
+	template <typename T>
+	struct is_default_constructible
+		: public eastl::is_constructible<T> {};
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		EA_CONSTEXPR bool is_default_constructible_v = is_default_constructible<T>::value;
+    #endif
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_nothrow_default_constructible
+	///////////////////////////////////////////////////////////////////////
+	// TODO(rparolin):  implement type-trait
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_copy_constructible
+	//
+	// is_constructible<T, const T&>::value is true.
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_is_copy_constructible_CONFORMANCE EASTL_TYPE_TRAIT_is_constructible_CONFORMANCE
+
+	template <typename T>
+	struct is_copy_constructible
+		: public eastl::is_constructible<T, typename eastl::add_lvalue_reference<typename eastl::add_const<T>::type>::type> {};
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		EA_CONSTEXPR bool is_copy_constructible_v = is_copy_constructible<T>::value;
+    #endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_trivially_copy_constructible
+	//
+	// is_trivially_constructible<T, const T&>::value is true.
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_is_trivially_copy_constructible_CONFORMANCE EASTL_TYPE_TRAIT_is_trivially_constructible_CONFORMANCE
+
+	template <typename T> 
+	struct is_trivially_copy_constructible
+		: public eastl::is_trivially_constructible<T, typename eastl::add_lvalue_reference<typename eastl::add_const<T>::type>::type> {};
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		EA_CONSTEXPR bool is_trivially_copy_constructible_v = is_trivially_copy_constructible<T>::value;
+    #endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_nothrow_copy_constructible
+	//
+	// is_nothrow_-constructible<T, const T&>::value is true.
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_is_nothrow_copy_constructible_CONFORMANCE EASTL_TYPE_TRAIT_is_nothrow_constructible_CONFORMANCE
+
+	template <typename T>
+	struct is_nothrow_copy_constructible
+		: public is_nothrow_constructible<T, typename eastl::add_lvalue_reference<typename eastl::add_const<T>::type>::type> {};
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		EA_CONSTEXPR bool is_nothrow_copy_constructible_v = is_nothrow_copy_constructible<T>::value;
+    #endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_move_constructible
+	//
+	// is_constructible<T, T&&>::value is true.
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_is_move_constructible_CONFORMANCE EASTL_TYPE_TRAIT_is_constructible_CONFORMANCE
+
+	template <typename T>
+	struct is_move_constructible
+		: public eastl::is_constructible<T, typename eastl::add_rvalue_reference<T>::type> {};
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		EA_CONSTEXPR bool is_move_constructible_v = is_move_constructible<T>::value;
+    #endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_trivially_move_constructible
+	//
+	// is_trivially_constructible<T, T&&>::value is true.
+	// T shall be a complete type, (possibly cv-qualified) void, or an 
+	// array of unknown bound.
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_is_trivially_move_constructible_CONFORMANCE EASTL_TYPE_TRAIT_is_trivially_constructible_CONFORMANCE
+
+	template <typename T> 
+	struct is_trivially_move_constructible
+		: public eastl::is_trivially_constructible<T, typename eastl::add_rvalue_reference<T>::type> {};
+
+	#define EASTL_DECLARE_IS_TRIVIALLY_MOVE_CONSTRUCTIBLE(T, isTrivallyMoveConstructible)                                                      \
+		namespace eastl{                                                                                                                       \
+			template <> struct is_trivially_move_constructible<T>  : public eastl::integral_constant<bool, isTriviallyMoveConstructible>  { }; \
+		}
+	
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		EA_CONSTEXPR bool is_trivially_move_constructible_v = is_trivially_move_constructible<T>::value;
+    #endif
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_assignable
+	//
+	// The expression declval<T>() = declval<U>() is well-formed when treated as an unevaluated operand.
+	// Access checking is performed as if in a context unrelated to T and U. Only the validity of
+	// the immediate context of the assignment expression is considered. The compilation of the expression 
+	// can result in side effects such as the instantiation of class template specializations and function 
+	// template specializations, the generation of implicitly-defined functions, and so on. Such side 
+	// effects are not in the "immediate context" and can result in the program being ill-formed.
+	//
+	// Note: 
+	// This type trait has a misleading and counter-intuitive name. It does not indicate whether an instance
+	// of U can be assigned to an instance of T (e.g. t = u). Instead it indicates whether the assignment can be 
+	// done after adding rvalue references to both, as in add_rvalue_reference<T>::type = add_rvalue_reference<U>::type.
+	// A counterintuitive result of this is that is_assignable<int, int>::value == false. The is_copy_assignable
+	// trait indicates if a type can be assigned to its own type, though there isn't a standard C++ way to tell
+	// if an arbitrary type is assignable to another type.
+	// http://stackoverflow.com/questions/19920213/why-is-stdis-assignable-counter-intuitive
+	//
+	// Note:
+	// A true is_assignable value doesn't guarantee that the expression is compile-able, the compiler checks 
+	// only that the assignment matches before compilation. In particular, if you have templated operator=
+	// for a class, the compiler will always say is_assignable is true, regardless of what's being tested
+	// on the right hand side of the expression. It may actually turn out during compilation that the 
+	// templated operator= fails to compile because in practice it doesn't accept every possible type for
+	// the right hand side of the expression.
+	//
+	// Expected results:                                        
+	//     is_assignable<void, void>::value             == false
+	//     is_assignable<int&, int>::value              == true 
+	//     is_assignable<int, int>::value               == false
+	//     is_assignable<int, int&>::value              == false
+	//     is_assignable<bool, bool>::value             == false
+	//     is_assignable<int, float>::value             == false
+	//     is_assignable<int[], int[]>::value           == false
+	//     is_assignable<char*, int*>::value            == false
+	//     is_assignable<char*, const char*>::value     == false
+	//     is_assignable<const char*, char*>::value     == false
+	//     is_assignable<PodA, PodB*>::value            == false
+	//     is_assignable<Assignable, Assignable>::value == true 
+	//     is_assignable<Assignable, Unrelated>::value  == false
+	//
+	// Note: 
+	// Our implementation here yields different results than does the std::is_assignable from Dinkumware-based Standard
+	// Libraries, but yields similar results to the std::is_assignable from GCC's libstdc++ and clang's libc++. It may
+	// possibly be that the Dinkumware results are intentionally different for some practical purpose or because they
+	// represent the spirit or the Standard but not the letter of the Standard. 
+	//
+	///////////////////////////////////////////////////////////////////////
+	#define EASTL_TYPE_TRAIT_is_assignable_CONFORMANCE 1
+
+	template<typename T, typename U>
+	struct is_assignable_helper
+	{
+		template<typename, typename>
+		static eastl::no_type is(...);
+
+		template<typename T1, typename U1>
+		static decltype(eastl::declval<T1>() = eastl::declval<U1>(), eastl::yes_type()) is(int);
+
+		static const bool value = (sizeof(is<T, U>(0)) == sizeof(eastl::yes_type));
+	};
+
+	template<typename T, typename U>
+	struct is_assignable : 
+		public eastl::integral_constant<bool, eastl::is_assignable_helper<T, U>::value> {};
+
+	// The main purpose of this function is to help the non-conforming case above.
+	// Note: We don't handle const/volatile variations here, as we expect the user to 
+	// manually specify any such variations via this macro.
+	// Example usage: 
+	//     EASTL_DECLARE_IS_ASSIGNABLE(int, int, false)
+	//
+	#define EASTL_DECLARE_IS_ASSIGNABLE(T, U, isAssignable)                                                    \
+		namespace eastl {                                                                                      \
+			template <> struct is_assignable<T, U> : public eastl::integral_constant<bool, isAssignable>  { }; \
+		}
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T, class U>
+		EA_CONSTEXPR bool is_assignable_v = is_assignable<T, U>::value;
+    #endif
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_lvalue_assignable
+	//
+	// This is an EASTL extension function which is like is_assignable but
+	// works for arbitrary assignments and not just rvalue assignments.
+	// This function provides an intuitive assignability test, as opposed
+	// to is_assignable.
+	//
+	// Note: is_lvalue_assignable<T, T> === is_copy_assignable<T>
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_is_lvalue_assignable_CONFORMANCE EASTL_TYPE_TRAIT_is_assignable_CONFORMANCE
+
+	template <typename T, typename U> 
+	struct is_lvalue_assignable
+		: public eastl::is_assignable<typename eastl::add_lvalue_reference<T>::type,
+									  typename eastl::add_lvalue_reference<typename eastl::add_const<U>::type>::type> {};
+
+	#define EASTL_DECLARE_IS_LVALUE_ASSIGNABLE(T, U, isLvalueAssignable)                                                    \
+		namespace eastl {                                                                                                   \
+			template <> struct is_lvalue_assignable<T, U> : public eastl::integral_constant<bool, isLvalueAssignable>  { }; \
+		}
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_trivially_assignable
+	//
+	// is_assignable<T, U>::value is true and the assignment, as defined by
+	// is_assignable, is known to call no operation that is not trivial (3.9, 12).
+	// T and U shall be complete types, (possibly cv-qualified) void, or
+	// arrays of unknown bound
+	///////////////////////////////////////////////////////////////////////
+
+	#if EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(EA_COMPILER_CLANG) && EA_COMPILER_HAS_FEATURE(is_trivially_assignable))
+		#define EASTL_TYPE_TRAIT_is_trivially_assignable_CONFORMANCE 1
+
+		template <typename T, typename U>
+		struct is_trivially_assignable
+			: eastl::integral_constant<bool, __is_trivially_assignable(T, U)> {};
+
+	#elif EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(_MSC_VER) && (_MSC_VER >= 1800))
+		#define EASTL_TYPE_TRAIT_is_trivially_assignable_CONFORMANCE EASTL_TYPE_TRAIT_is_assignable_CONFORMANCE
+
+		// This code path is attempting to work around the issue with VS2013 __is_trivially_assignable compiler intrinsic documented in the link
+		// below.  todo: Re-evaluate in VS2014.  
+		//
+		// https://connect.microsoft.com/VisualStudio/feedback/details/806233/std-is-trivially-copyable-const-int-n-and-std-is-trivially-copyable-int-n-incorrect
+
+		template <bool A, typename T, typename U>
+		struct is_trivially_assignable_helper;
+
+		template <typename T, typename U>
+		struct is_trivially_assignable_helper<true, T, U> : eastl::integral_constant<bool, __is_trivially_assignable(T, U)>{};
+
+		template <typename T, typename U>
+		struct is_trivially_assignable_helper<false, T, U> : false_type{};
+
+		template <typename T, typename U>
+		struct is_trivially_assignable
+			: eastl::integral_constant<bool, is_trivially_assignable_helper< eastl::is_assignable<T, U>::value, T, U >::value> {};
+
+	#elif EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(EA_COMPILER_MSVC) || defined(EA_COMPILER_GNUC))
+		#define EASTL_TYPE_TRAIT_is_trivially_assignable_CONFORMANCE EASTL_TYPE_TRAIT_is_assignable_CONFORMANCE
+
+		// Micrsoft (up till at least VS2012) and GCC have __has_trivial_assign, but it may not be identical with the goals of this type trait.
+		// The Microsoft type trait headers suggest that a future version of VS will have a __is_trivially_assignable intrinsic, but we
+		// need to come up with something in the meantime. To do: Re-evalulate this for VS2013+ when it becomes available.
+		template <typename T, typename U>
+		struct is_trivially_assignable
+			: eastl::integral_constant<bool, eastl::is_assignable<T, U>::value && 
+									   (eastl::is_pod<typename eastl::remove_reference<T>::type>::value || __has_trivial_assign(typename eastl::remove_reference<T>::type))> {};
+	#else
+
+		#define EASTL_TYPE_TRAIT_is_trivially_assignable_CONFORMANCE 0  // Generates false negatives.
+
+		template <typename T, typename U>
+		struct is_trivially_assignable
+			: public eastl::false_type {};
+
+		template <typename T>
+		struct is_trivially_assignable<T&, T>
+			: public eastl::integral_constant<bool, eastl::is_scalar<T>::value> {};
+
+		template <typename T>
+		struct is_trivially_assignable<T&, T&>
+			: public eastl::integral_constant<bool, eastl::is_scalar<T>::value> {};
+
+		template <typename T>
+		struct is_trivially_assignable<T&, const T&>
+			: public eastl::integral_constant<bool, eastl::is_scalar<T>::value> {};
+
+		template <typename T>
+		struct is_trivially_assignable<T&, T&&>
+			: public eastl::integral_constant<bool, eastl::is_scalar<T>::value> {};
+
+	#endif
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T, class U>
+		EA_CONSTEXPR bool is_trivially_assignable_v = is_trivially_assignable<T, U>::value;
+    #endif
+
+	// The main purpose of this function is to help the non-conforming case above.
+	// Note: We don't handle const/volatile variations here, as we expect the user to 
+	// manually specify any such variations via this macro.
+	// Example usage: 
+	//     EASTL_DECLARE_IS_TRIVIALLY_ASSIGNABLE(int, int, false)
+	//
+	#define EASTL_DECLARE_IS_TRIVIALLY_ASSIGNABLE(T, U, isTriviallyAssignable)                                                   \
+		namespace eastl {                                                                                                        \
+			template <> struct is_trivially_assignable<T, U> : public eastl::integral_constant<bool, isTriviallyAssignable> { }; \
+		}
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_nothrow_assignable
+	//
+	// is_assignable<T, U>::value is true and the assignment is known
+	// not to throw any exceptions (5.3.7). T and U shall be complete
+	// types, (possibly cv-qualified) void, or arrays of unknown bound.
+	//
+	///////////////////////////////////////////////////////////////////////
+		
+	#if defined(_MSC_VER) && (_MSC_VER >= 1800) // VS2013+
+		#define EASTL_TYPE_TRAIT_is_nothrow_assignable_CONFORMANCE 1
+
+		template <typename T, typename U>
+		struct is_nothrow_assignable
+			: eastl::integral_constant<bool, __is_nothrow_assignable(T, U)> {};
+
+	#elif defined(EA_COMPILER_NO_NOEXCEPT) || defined(__EDG_VERSION__) // EDG mis-compiles the conforming code below and so must be placed here.
+		#define EASTL_TYPE_TRAIT_is_nothrow_assignable_CONFORMANCE 0
+
+		template <typename T, typename U>
+		struct is_nothrow_assignable
+			: public false_type {};
+
+		// Note that the following are crippled in that they support only assignment of T types to other T types.
+		template <typename T>
+		struct is_nothrow_assignable<T&, T>
+			: public eastl::integral_constant<bool, eastl::has_nothrow_assign<T>::value> {};
+
+		template <typename T>
+		struct is_nothrow_assignable<T&, T&>
+			: public eastl::integral_constant<bool, eastl::has_nothrow_assign<T>::value> {};
+
+		template <typename T>
+		struct is_nothrow_assignable<T&, const T&>
+			: public eastl::integral_constant<bool, eastl::has_nothrow_assign<T>::value> {};
+
+	#else
+		#define EASTL_TYPE_TRAIT_is_nothrow_assignable_CONFORMANCE 1
+
+		template <bool, typename T, typename U>
+		struct is_nothrow_assignable_helper;
+
+		template <typename T, typename U>
+		struct is_nothrow_assignable_helper<false, T, U> 
+			: public false_type {};
+
+		template <typename T, typename U>
+		struct is_nothrow_assignable_helper<true, T, U> // Set to true if the assignment (same as is_assignable) cannot generate an exception.
+			: public eastl::integral_constant<bool, noexcept(eastl::declval<T>() = eastl::declval<U>()) >
+		{
+		};
+
+		template <typename T, typename U>
+		struct is_nothrow_assignable
+			: public eastl::is_nothrow_assignable_helper<eastl::is_assignable<T, U>::value, T, U>
+		{
+		};
+	#endif
+
+	#define EASTL_DECLARE_IS_NOTHROW_ASSIGNABLE(T, isNothrowAssignable)                                                                   \
+		namespace eastl{                                                                                                                  \
+			template <> struct is_nothrow_assignable<T>                : public eastl::integral_constant<bool, isNothrowAssignable>  { }; \
+			template <> struct is_nothrow_assignable<const T>          : public eastl::integral_constant<bool, isNothrowAssignable>  { }; \
+			template <> struct is_nothrow_assignable<volatile T>       : public eastl::integral_constant<bool, isNothrowAssignable>  { }; \
+			template <> struct is_nothrow_assignable<const volatile T> : public eastl::integral_constant<bool, isNothrowAssignable>  { }; \
+		}
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T, class U>
+		EA_CONSTEXPR bool is_nothrow_assignable_v = is_nothrow_assignable<T, U>::value;
+	#endif
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_copy_assignable
+	//
+	// is_assignable<T&, const T&>::value is true. T shall be a complete type,
+	// (possibly cv -qualified) void, or an array of unknown bound.
+	//
+	// This (and not is_assignable) is the type trait you use to tell if you 
+	// can do an arbitrary assignment. is_assignable tells if you can do an 
+	// assignment specifically to an rvalue and not in general. 
+	// http://stackoverflow.com/a/19921030/725009
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_is_copy_assignable_CONFORMANCE EASTL_TYPE_TRAIT_is_assignable_CONFORMANCE
+
+	template <typename T> 
+	struct is_copy_assignable
+		: public eastl::is_assignable<typename eastl::add_lvalue_reference<T>::type,
+									  typename eastl::add_lvalue_reference<typename eastl::add_const<T>::type>::type> {};
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		EA_CONSTEXPR bool is_copy_assignable_v = is_copy_assignable<T>::value;
+    #endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_trivially_copy_assignable
+	//
+	// is_trivially_assignable<T&, const T&>::value is true. T shall be a 
+	// complete type, (possibly cv-qualified) void, or an array of unknown bound.
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_is_trivially_copy_assignable_CONFORMANCE EASTL_TYPE_TRAIT_is_trivially_assignable_CONFORMANCE
+
+#if EASTL_TYPE_TRAIT_is_trivially_copy_assignable_CONFORMANCE
+	template <typename T>
+	struct is_trivially_copy_assignable
+		: public eastl::is_trivially_assignable<typename eastl::add_lvalue_reference<T>::type,
+												typename eastl::add_lvalue_reference<typename eastl::add_const<T>::type>::type> {};
+#else
+	template <typename T>
+	struct is_trivially_copy_assignable
+		: public integral_constant<bool,
+			eastl::is_scalar<T>::value || eastl::is_pod<T>::value || eastl::is_trivially_assignable<typename eastl::add_lvalue_reference<T>::type, typename eastl::add_lvalue_reference<typename eastl::add_const<T>::type>::type>::value
+		> {};
+#endif
+
+	#define EASTL_DECLARE_IS_TRIVIALLY_COPY_ASSIGNABLE(T, isTriviallyCopyAssignable)                                                    \
+		namespace eastl {                                                                                                               \
+			template <> struct is_trivially_copy_assignable<T> : public eastl::integral_constant<bool, isTriviallyCopyAssignable>  { }; \
+		}
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		EA_CONSTEXPR bool is_trivially_copy_assignable_v = is_trivially_copy_assignable<T>::value;
+    #endif
+
+	///////////////////////////////////////////////////////////////////////
+	// is_nothrow_copy_assignable
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_is_nothrow_copy_assignable_CONFORMANCE EASTL_TYPE_TRAIT_is_nothrow_assignable_CONFORMANCE
+
+	template <typename T>
+	struct is_nothrow_copy_assignable
+		: public eastl::is_nothrow_assignable<typename eastl::add_lvalue_reference<T>::type,
+											  typename eastl::add_lvalue_reference<typename eastl::add_const<T>::type>::type> {};
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		EA_CONSTEXPR bool is_nothrow_copy_assignable_v = is_nothrow_copy_assignable<T>::value;
+    #endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_move_assignable
+	//
+	// is_assignable<T&, T&&>::value is true. T shall be a complete type,
+	// (possibly cv -qualified) void, or an array of unknown bound.
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_is_move_assignable_CONFORMANCE EASTL_TYPE_TRAIT_is_assignable_CONFORMANCE
+
+	template <typename T>
+	struct is_move_assignable
+		: public eastl::is_assignable<typename eastl::add_lvalue_reference<T>::type,
+									  typename eastl::add_rvalue_reference<T>::type> {};
+
+	#define EASTL_DECLARE_IS_MOVE_ASSIGNABLE(T, isMoveAssignable)                                                                   \
+		namespace eastl{                                                                                                            \
+			template <> struct is_move_assignable<T>                : public eastl::integral_constant<bool, isMoveAssignable>  { }; \
+			template <> struct is_move_assignable<const T>          : public eastl::integral_constant<bool, isMoveAssignable>  { }; \
+			template <> struct is_move_assignable<volatile T>       : public eastl::integral_constant<bool, isMoveAssignable>  { }; \
+			template <> struct is_move_assignable<const volatile T> : public eastl::integral_constant<bool, isMoveAssignable>  { }; \
+		}
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		EA_CONSTEXPR bool is_move_assignable_v = is_move_assignable<T>::value;
+    #endif
+
+
+ 
+	///////////////////////////////////////////////////////////////////////
+	// is_trivially_move_assignable
+	//
+	// is_trivially_-assignable<T&, T&&>::value is true. T shall be a complete type,
+	// (possibly cv-qualified) void, or an array of unknown bound.
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_is_trivially_move_assignable_CONFORMANCE EASTL_TYPE_TRAIT_is_trivially_assignable_CONFORMANCE
+
+	template <typename T>
+	struct is_trivially_move_assignable
+		: public eastl::is_trivially_assignable<typename eastl::add_lvalue_reference<T>::type,
+												typename eastl::add_rvalue_reference<T>::type> {};
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		EA_CONSTEXPR bool is_trivially_move_assignable_v = is_trivially_move_assignable<T>::value;
+	#endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_nothrow_move_assignable
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_is_nothrow_move_assignable_CONFORMANCE EASTL_TYPE_TRAIT_is_nothrow_assignable_CONFORMANCE
+
+	template <typename T>
+	struct is_nothrow_move_assignable
+		: public eastl::is_nothrow_assignable<typename eastl::add_lvalue_reference<T>::type,
+											  typename eastl::add_rvalue_reference<T>::type> {};
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		EA_CONSTEXPR bool is_nothrow_move_assignable_v = is_nothrow_move_assignable<T>::value;
+	#endif
+
+	///////////////////////////////////////////////////////////////////////
+	// is_destructible
+	//
+	// For a complete type T and given 
+	//     template <class U>
+	//     struct test { U u; };
+	// test<T>::~test() is not deleted (C++11 "= delete").
+	// T shall be a complete type, (possibly cv-qualified) void, or an array of unknown bound.
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#if 0 // defined(_MSC_VER) && (_MSC_VER >= 1800) // VS2013+ -- Disabled due to __is_destructible being broken in VC++ versions up to at least VS2013. A ticket will be submitted for this
+		#define EASTL_TYPE_TRAIT_is_destructible_CONFORMANCE 1
+
+		template <typename T>
+		struct is_destructible
+			: integral_constant<bool, __is_destructible(T)> {};
+
+	#elif defined(EA_COMPILER_NO_DECLTYPE) || defined(EA_COMPILER_NO_FUNCTION_TEMPLATE_DEFAULT_ARGS) || defined(_MSC_VER) || defined(__EDG_VERSION__) // VS2012 and EDG mis-compile the conforming code below and so must be placed here.
+		#define EASTL_TYPE_TRAIT_is_destructible_CONFORMANCE 0
+
+		// This implementation works for almost all cases, with the primary exception being the 
+		// case that the user declared the destructor as deleted. To deal with that case the 
+		// user needs to use EASTL_DECLARE_IS_NOT_DESTRUCTIBLE to cause is_destructible<T>::value
+		// to be false.
+
+		template <typename T>
+		struct is_destructible
+			: public eastl::integral_constant<bool, !eastl::is_array_of_unknown_bounds<T>::value && 
+													!eastl::is_void<T>::value                    && 
+													!eastl::is_function<T>::value                && 
+													!eastl::is_abstract<T>::value> {};
+	#else
+		#define EASTL_TYPE_TRAIT_is_destructible_CONFORMANCE 1
+
+		template <typename U>
+		struct destructible_test_helper{ U u; };
+
+		template <typename>
+		eastl::false_type destructible_test_function(...);
+		
+		template <typename T, typename U = decltype(eastl::declval<eastl::destructible_test_helper<T> >().~destructible_test_helper<T>())>
+		eastl::true_type destructible_test_function(int);
+
+		template <typename T, bool = eastl::is_array_of_unknown_bounds<T>::value || // Exclude these types from being considered destructible.
+									 eastl::is_void<T>::value                    || 
+									 eastl::is_function<T>::value                || 
+									 eastl::is_abstract<T>::value>
+		struct is_destructible_helper
+			: public eastl::identity<decltype(eastl::destructible_test_function<T>(0))>::type {}; // Need to wrap decltype with identity because some compilers otherwise don't like the bare decltype usage.
+
+		template <typename T>
+		struct is_destructible_helper<T, true>
+			: public eastl::false_type {};
+
+		template <typename T>
+		struct is_destructible
+			: public is_destructible_helper<T> {};
+
+	#endif
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		EA_CONSTEXPR bool is_destructible_v = is_destructible<T>::value;
+    #endif
+
+	#define EASTL_DECLARE_IS_DESTRUCTIBLE(T, isDestructible)												                \
+		namespace eastl{                                                                                                    \
+			template <> struct is_destructible<T>                : public eastl::integral_constant<bool, isDestructible>{}; \
+			template <> struct is_destructible<const T>          : public eastl::integral_constant<bool, isDestructible>{}; \
+			template <> struct is_destructible<volatile T>       : public eastl::integral_constant<bool, isDestructible>{}; \
+			template <> struct is_destructible<const volatile T> : public eastl::integral_constant<bool, isDestructible>{}; \
+		}
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_trivially_destructible
+	//
+	// is_destructible<T>::value is true and the indicated destructor is 
+	// known to be trivial. T shall be a complete type, (possibly cv-qualified)
+	// void, or an array of unknown bound.
+	//
+	// A destructor is trivial if it is not user-provided and if:
+	//    - the destructor is not virtual,
+	//    - all of the direct base classes of its class have trivial destructors, and
+	//    - for all of the non-static data members of its class that are of 
+	//      class type (or array thereof), each such class has a trivial destructor.
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#if 0 // defined(_MSC_VER) && (_MSC_VER >= 1800) // VS2013+ -- Disabled due to __is_trivially_destructible being broken in VC++ versions up to at least VS2013. A ticket will be submitted for this
+		#define EASTL_TYPE_TRAIT_is_trivially_destructible_CONFORMANCE 1
+
+		template <typename T>
+		struct is_trivially_destructible
+			: integral_constant<bool, __is_trivially_destructible(T)> {};
+
+	#elif EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(_MSC_VER) || defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG))
+		#define EASTL_TYPE_TRAIT_is_trivially_destructible_CONFORMANCE EASTL_TYPE_TRAIT_is_destructible_CONFORMANCE
+
+		template <typename T>
+		struct is_trivially_destructible // Can't use just __has_trivial_destructor(T) because some compilers give it slightly different meaning, and are just plain broken, such as VC++'s __has_trivial_destructor, which says false for fundamental types.
+			: public integral_constant<bool, eastl::is_destructible<T>::value && ((__has_trivial_destructor(T) && !eastl::is_hat_type<T>::value)|| eastl::is_scalar<typename eastl::remove_all_extents<T>::type>::value)> {};
+
+	#else
+		#define EASTL_TYPE_TRAIT_is_trivially_destructible_CONFORMANCE 0
+
+		template <typename T>
+		struct is_trivially_destructible_helper
+			: public integral_constant<bool, (eastl::is_pod<T>::value || eastl::is_scalar<T>::value || eastl::is_reference<T>::value) && !eastl::is_void<T>::value> {};
+
+		template <typename T> 
+		struct is_trivially_destructible
+			: public eastl::is_trivially_destructible_helper<typename eastl::remove_all_extents<T>::type> {};
+	#endif
+
+	#define EASTL_DECLARE_IS_TRIVIALLY_DESTRUCTIBLE(T, isTriviallyDestructible)                                                       \
+		namespace eastl{                                                                                                              \
+			template <> struct is_trivially_destructible<T>                : public eastl::integral_constant<bool, isTriviallyDestructible>{}; \
+			template <> struct is_trivially_destructible<const T>          : public eastl::integral_constant<bool, isTriviallyDestructible>{}; \
+			template <> struct is_trivially_destructible<volatile T>       : public eastl::integral_constant<bool, isTriviallyDestructible>{}; \
+			template <> struct is_trivially_destructible<const volatile T> : public eastl::integral_constant<bool, isTriviallyDestructible>{}; \
+		}
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		EA_CONSTEXPR bool is_trivially_destructible_v = is_trivially_destructible<T>::value;
+    #endif
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_nothrow_destructible
+	//
+	// is_destructible<T>::value is true and the indicated destructor is 
+	// known not to throw any exceptions (5.3.7). T shall be a complete type, 
+	// (possibly cv-qualified) void, or an array of unknown bound.
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#if 0 // defined(_MSC_VER) && (_MSC_VER >= 1800) // VS2013+ -- Disabled due to __is_nothrow_destructible being broken in VC++ versions up to at least VS2013. A ticket will be submitted for this
+		#define EASTL_TYPE_TRAIT_is_nothrow_destructible_CONFORMANCE ((_MSC_VER >= 1900) ? 1 : 0) // VS2013 (1800) doesn't support noexcept and so can't support all usage of this properly (in particular default exception specifications defined in [C++11 Standard, 15.4 paragraph 14].
+
+		template <typename T>
+		struct is_nothrow_destructible
+			: integral_constant<bool, __is_nothrow_destructible(T)> {};
+
+	#elif defined(EA_COMPILER_NO_NOEXCEPT) 
+		#define EASTL_TYPE_TRAIT_is_nothrow_destructible_CONFORMANCE 0
+
+		template <typename T>
+		struct is_nothrow_destructible_helper
+			: public eastl::integral_constant<bool, eastl::is_scalar<T>::value || eastl::is_reference<T>::value> {};
+
+		template <typename T> 
+		struct is_nothrow_destructible
+			: public eastl::is_nothrow_destructible_helper<typename eastl::remove_all_extents<T>::type> {};
+
+	#else
+		#if defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION < 4008)
+			#define EASTL_TYPE_TRAIT_is_nothrow_destructible_CONFORMANCE 0 // GCC up to v4.7's noexcept is broken and fails to generate true for the case of compiler-generated destructors.
+		#else
+			#define EASTL_TYPE_TRAIT_is_nothrow_destructible_CONFORMANCE EASTL_TYPE_TRAIT_is_destructible_CONFORMANCE
+		#endif
+		////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+		// *_noexcept_wrapper implements a workaround for VS2015.  A standards conforming noexcept operator allows variadic template expansion.
+		// There appears to be an issue with VS2015 that prevents variadic template expansion into a noexcept operator that is passed directly 
+		// to a template parameter.
+		// 
+		// The fix hoists the noexcept expression into a separate struct and caches the result of the expression.  This result is then passed to integral_constant.
+		//
+		// Example code from Clang libc++
+		// template <class _Tp, class... _Args>
+		// struct __libcpp_is_nothrow_constructible<[>is constructible*/true, /*is reference<]false, _Tp, _Args...>
+		//     : public integral_constant<bool, noexcept(_Tp(declval<_Args>()...))> { };
+		//
+			
+		template <typename T>
+		struct is_nothrow_destructible_helper_noexcept_wrapper
+			{ static const bool value = noexcept(eastl::declval<T&>().~T()); };
+
+		template <typename T, bool>
+		struct is_nothrow_destructible_helper;
+
+		template <typename T>
+		struct is_nothrow_destructible_helper<T, false>
+			: public eastl::false_type {};
+
+		template <typename T>
+		struct is_nothrow_destructible_helper<T, true>     // If the expression T::~T is a noexcept expression then it's nothrow.
+			: public eastl::integral_constant<bool, is_nothrow_destructible_helper_noexcept_wrapper<T>::value > {};
+
+		template <typename T>
+		struct is_nothrow_destructible                      // A type needs to at least be destructible before it could be nothrow destructible.
+			: public eastl::is_nothrow_destructible_helper<T, eastl::is_destructible<T>::value> {};
+
+		template <typename T, size_t N>                     // An array is nothrow destructible if its element type is nothrow destructible.
+		struct is_nothrow_destructible<T[N]>                // To consider: Replace this with a remove_all_extents pathway.
+			: public eastl::is_nothrow_destructible<T> {};
+
+		template <typename T>
+		struct is_nothrow_destructible<T&>                  // A reference type cannot throw while being destructed. It's just a reference.
+			: public eastl::true_type {};
+
+		template <typename T>
+		struct is_nothrow_destructible<T&&>                 // An rvalue reference type cannot throw while being destructed.
+			: public eastl::true_type {};
+
+	#endif
+
+	#define EASTL_DECLARE_IS_NOTHROW_DESTRUCTIBLE(T, isNoThrowDestructible)                                                    \
+		namespace eastl{                                                                                                       \
+			template <> struct is_nothrow_destructible<T>                { static const bool value = isNoThrowDestructible; }; \
+			template <> struct is_nothrow_destructible<const T>          { static const bool value = isNoThrowDestructible; }; \
+			template <> struct is_nothrow_destructible<volatile T>       { static const bool value = isNoThrowDestructible; }; \
+			template <> struct is_nothrow_destructible<const volatile T> { static const bool value = isNoThrowDestructible; }; \
+		}
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		EA_CONSTEXPR bool is_nothrow_destructible_v = is_nothrow_destructible<T>::value;
+	#endif
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_nothrow_default_constructible
+	//
+	///////////////////////////////////////////////////////////////////////
+	#define EASTL_TYPE_TRAIT_is_nothrow_default_constructible_CONFORMANCE EASTL_TYPE_TRAIT_is_nothrow_constructible_CONFORMANCE
+
+	template <typename T>
+	struct is_nothrow_default_constructible
+		: public eastl::is_nothrow_constructible<T> {};
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		EA_CONSTEXPR bool is_nothrow_default_constructible_v = is_nothrow_default_constructible<T>::value;
+	#endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_nothrow_move_constructible
+	//
+	///////////////////////////////////////////////////////////////////////
+	#define EASTL_TYPE_TRAIT_is_nothrow_move_constructible_CONFORMANCE EASTL_TYPE_TRAIT_is_nothrow_constructible_CONFORMANCE
+
+	template <typename T>
+	struct is_nothrow_move_constructible
+	    : public eastl::is_nothrow_constructible<T, typename eastl::add_rvalue_reference<T>::type> {};
+
+    #if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		EA_CONSTEXPR bool is_nothrow_move_constructible_v = is_nothrow_move_constructible<T>::value;
+	#endif
+
+
+} // namespace eastl
+
+
+#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/internal/type_properties.h b/libkram/eastl/include/EASTL/internal/type_properties.h
new file mode 100644
index 00000000..5276f878
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/type_properties.h
@@ -0,0 +1,380 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_INTERNAL_TYPE_PROPERTIES_H
+#define EASTL_INTERNAL_TYPE_PROPERTIES_H
+
+
+#include <EABase/eabase.h>
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+#include <limits.h>
+#include <EASTL/internal/type_compound.h>
+
+
+namespace eastl
+{
+
+
+	///////////////////////////////////////////////////////////////////////
+	// underlying_type
+	//
+	// Defines a member typedef type of type that is the underlying type for the enumeration T.
+	// Requires explicit compiler support to implement.
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#if EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && ((defined(_MSC_VER) && (_MSC_VER >= 1700)) || (defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4007)) || defined(EA_COMPILER_CLANG)) // VS2012+
+		#define EASTL_TYPE_TRAIT_underlying_type_CONFORMANCE 1    // underlying_type is conforming.
+
+		template <typename T>
+		struct underlying_type{ typedef __underlying_type(T) type; };
+
+	#else
+		#define EASTL_TYPE_TRAIT_underlying_type_CONFORMANCE 0
+
+		template <typename T>
+		struct underlying_type{ typedef int type; };    // This is of course wrong, but we emulate libstdc++ and typedef it as int.
+	#endif
+
+	#if !defined(EA_COMPILER_NO_TEMPLATE_ALIASES)
+		template <typename T>
+		using underlying_type_t = typename underlying_type<T>::type;
+	#endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// has_unique_object_representations
+	//
+	// If T is TriviallyCopyable and if any two objects of type T with the same
+	// value have the same object representation, value is true. For any other
+	// type, value is false.
+	//
+	// http://en.cppreference.com/w/cpp/types/has_unique_object_representations
+	///////////////////////////////////////////////////////////////////////
+	#if EASTL_HAS_UNIQUE_OBJECT_REPRESENTATIONS_AVAILABLE
+		#define EASTL_TYPE_TRAIT_has_unique_object_representations_CONFORMANCE 1
+
+		template <typename T>
+		struct has_unique_object_representations
+			: public integral_constant<bool, __has_unique_object_representations(remove_cv_t<remove_all_extents_t<T>>)>
+		{
+		};
+
+	#else
+		#define EASTL_TYPE_TRAIT_has_unique_object_representations_CONFORMANCE 0
+
+		template <typename T>
+		struct has_unique_object_representations
+			: public integral_constant<bool, is_integral_v<remove_cv_t<remove_all_extents_t<T>>>> // only integral types (floating point types excluded).
+		{
+		};
+
+	#endif
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		EA_CONSTEXPR auto has_unique_object_representations_v = has_unique_object_representations<T>::value;
+	#endif
+
+
+    ///////////////////////////////////////////////////////////////////////
+	// is_signed
+	//
+	// is_signed<T>::value == true if and only if T is one of the following types:
+	//    [const] [volatile] char (maybe)
+	//    [const] [volatile] signed char
+	//    [const] [volatile] short
+	//    [const] [volatile] int
+	//    [const] [volatile] long
+	//    [const] [volatile] long long
+	//    [const] [volatile] float
+	//    [const] [volatile] double
+	//    [const] [volatile] long double
+	//
+	// Used to determine if a integral type is signed or unsigned.
+	// Given that there are some user-made classes which emulate integral
+	// types, we provide the EASTL_DECLARE_SIGNED macro to allow you to
+	// set a given class to be identified as a signed type.
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_is_signed_CONFORMANCE 1    // is_signed is conforming.
+
+	template <typename T> struct is_signed_helper : public false_type{};
+
+	template <> struct is_signed_helper<signed char>      : public true_type{};
+	template <> struct is_signed_helper<signed short>     : public true_type{};
+	template <> struct is_signed_helper<signed int>       : public true_type{};
+	template <> struct is_signed_helper<signed long>      : public true_type{};
+	template <> struct is_signed_helper<signed long long> : public true_type{};
+	template <> struct is_signed_helper<float>            : public true_type{};
+	template <> struct is_signed_helper<double>           : public true_type{};
+	template <> struct is_signed_helper<long double>      : public true_type{};
+
+	#if (CHAR_MAX == SCHAR_MAX)
+		template <> struct is_signed_helper<char>         : public true_type{};
+	#endif
+	#ifndef EA_WCHAR_T_NON_NATIVE // If wchar_t is a native type instead of simply a define to an existing type...
+		#if defined(__WCHAR_MAX__) && ((__WCHAR_MAX__ == 2147483647) || (__WCHAR_MAX__ == 32767)) // GCC defines __WCHAR_MAX__ for most platforms.
+			template <> struct is_signed_helper<wchar_t>  : public true_type{};
+		#endif
+	#endif
+
+	template <typename T>
+	struct is_signed : public eastl::is_signed_helper<typename eastl::remove_cv<T>::type>{};
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		EA_CONSTEXPR bool is_signed_v = is_signed<T>::value;
+	#endif
+
+	#define EASTL_DECLARE_SIGNED(T)                                             \
+	namespace eastl{                                                            \
+		template <> struct is_signed<T>                : public true_type{};    \
+		template <> struct is_signed<const T>          : public true_type{};    \
+		template <> struct is_signed<volatile T>       : public true_type{};    \
+		template <> struct is_signed<const volatile T> : public true_type{};    \
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_unsigned
+	//
+	// is_unsigned<T>::value == true if and only if T is one of the following types:
+	//    [const] [volatile] char (maybe)
+	//    [const] [volatile] unsigned char
+	//    [const] [volatile] unsigned short
+	//    [const] [volatile] unsigned int
+	//    [const] [volatile] unsigned long
+	//    [const] [volatile] unsigned long long
+	//
+	// Used to determine if a integral type is signed or unsigned.
+	// Given that there are some user-made classes which emulate integral
+	// types, we provide the EASTL_DECLARE_UNSIGNED macro to allow you to
+	// set a given class to be identified as an unsigned type.
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_is_unsigned_CONFORMANCE 1    // is_unsigned is conforming.
+
+	template <typename T> struct is_unsigned_helper : public false_type{};
+
+	template <> struct is_unsigned_helper<unsigned char>      : public true_type{};
+	template <> struct is_unsigned_helper<unsigned short>     : public true_type{};
+	template <> struct is_unsigned_helper<unsigned int>       : public true_type{};
+	template <> struct is_unsigned_helper<unsigned long>      : public true_type{};
+	template <> struct is_unsigned_helper<unsigned long long> : public true_type{};
+
+	#if (CHAR_MAX == UCHAR_MAX)
+		template <> struct is_unsigned_helper<char>           : public true_type{};
+	#endif
+	#ifndef EA_WCHAR_T_NON_NATIVE // If wchar_t is a native type instead of simply a define to an existing type...
+		#if defined(_MSC_VER) || (defined(__WCHAR_MAX__) && ((__WCHAR_MAX__ == 4294967295U) || (__WCHAR_MAX__ == 65535))) // GCC defines __WCHAR_MAX__ for most platforms.
+			template <> struct is_unsigned_helper<wchar_t>    : public true_type{};
+		#endif
+	#endif
+
+	template <typename T>
+	struct is_unsigned : public eastl::is_unsigned_helper<typename eastl::remove_cv<T>::type>{};
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		EA_CONSTEXPR bool is_unsigned_v = is_unsigned<T>::value;
+	#endif
+
+	#define EASTL_DECLARE_UNSIGNED(T)                                             \
+	namespace eastl{                                                              \
+		template <> struct is_unsigned<T>                : public true_type{};    \
+		template <> struct is_unsigned<const T>          : public true_type{};    \
+		template <> struct is_unsigned<volatile T>       : public true_type{};    \
+		template <> struct is_unsigned<const volatile T> : public true_type{};    \
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// alignment_of
+	//
+	// alignment_of<T>::value is an integral value representing, in bytes,
+	// the memory alignment of objects of type T.
+	//
+	// alignment_of may only be applied to complete types.
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_alignment_of_CONFORMANCE 1    // alignment_of is conforming.
+
+	template <typename T>
+	struct alignment_of_value{ static const size_t value = EASTL_ALIGN_OF(T); };
+
+	template <typename T>
+	struct alignment_of : public integral_constant<size_t, alignment_of_value<T>::value>{};
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		EA_CONSTEXPR size_t alignment_of_v = alignment_of<T>::value;
+	#endif
+
+
+    ///////////////////////////////////////////////////////////////////////
+	// is_aligned
+	//
+	// Defined as true if the type has alignment requirements greater
+	// than default alignment, which is taken to be 8. This allows for
+	// doing specialized object allocation and placement for such types.
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_is_aligned_CONFORMANCE 1    // is_aligned is conforming.
+
+	template <typename T>
+	struct is_aligned_value{ static const bool value = (EASTL_ALIGN_OF(T) > 8); };
+
+	template <typename T>
+	struct is_aligned : public integral_constant<bool, is_aligned_value<T>::value>{};
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		EA_CONSTEXPR size_t is_aligned_v = is_aligned<T>::value;
+	#endif
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// rank
+	//
+	// rank<T>::value is an integral value representing the number of
+	// dimensions possessed by an array type. For example, given a
+	// multi-dimensional array type T[M][N], std::tr1::rank<T[M][N]>::value == 2.
+	// For a given non-array type T, std::tr1::rank<T>::value == 0.
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_rank_CONFORMANCE 1    // rank is conforming.
+
+	template<typename T>
+	struct rank : public eastl::integral_constant<size_t, 0> {};
+
+	template<typename T>
+	struct rank<T[]> : public eastl::integral_constant<size_t, rank<T>::value + 1> {};
+
+	template<typename T, size_t N>
+	struct rank<T[N]> : public eastl::integral_constant<size_t, rank<T>::value + 1> {};
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		EA_CONSTEXPR auto rank_v = rank<T>::value;
+	#endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_base_of
+	//
+	// Given two (possibly identical) types Base and Derived, is_base_of<Base, Derived>::value == true
+	// if and only if Base is a direct or indirect base class of Derived,
+	// or Base and Derived are the same type.
+	//
+	// is_base_of may only be applied to complete types.
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#if EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(_MSC_VER) || defined(EA_COMPILER_GNUC) || (defined(EA_COMPILER_CLANG) && EA_COMPILER_HAS_FEATURE(is_base_of)))
+		#define EASTL_TYPE_TRAIT_is_base_of_CONFORMANCE 1    // is_base_of is conforming.
+
+		template <typename Base, typename Derived>
+		struct is_base_of : public eastl::integral_constant<bool, __is_base_of(Base, Derived) || eastl::is_same<Base, Derived>::value>{};
+
+		#if EASTL_VARIABLE_TEMPLATES_ENABLED
+			template <typename Base, typename Derived>
+			EASTL_CPP17_INLINE_VARIABLE EA_CONSTEXPR bool is_base_of_v = is_base_of<Base, Derived>::value;
+		#endif
+	#else
+		// Not implemented yet.
+		// This appears to be implementable.
+		#define EASTL_TYPE_TRAIT_is_base_of_CONFORMANCE 0
+	#endif
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_lvalue_reference
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_is_lvalue_reference_CONFORMANCE 1    // is_lvalue_reference is conforming.
+
+	template<typename T> struct is_lvalue_reference     : public eastl::false_type {};
+	template<typename T> struct is_lvalue_reference<T&> : public eastl::true_type {};
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template<typename T>
+		EA_CONSTEXPR bool is_lvalue_reference_v = is_lvalue_reference<T>::value;
+	#endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_rvalue_reference
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_is_rvalue_reference_CONFORMANCE 1    // is_rvalue_reference is conforming.
+
+	template <typename T> struct is_rvalue_reference      : public eastl::false_type {};
+	template <typename T> struct is_rvalue_reference<T&&> : public eastl::true_type {};
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template<typename T>
+		EA_CONSTEXPR bool is_rvalue_reference_v = is_rvalue_reference<T>::value;
+	#endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// result_of
+	//
+	///////////////////////////////////////////////////////////////////////
+	#define EASTL_TYPE_TRAIT_result_of_CONFORMANCE 1    // result_of is conforming.
+
+	template<typename> struct result_of;
+
+	template<typename F, typename... ArgTypes>
+	struct result_of<F(ArgTypes...)>
+		{ typedef decltype(eastl::declval<F>()(eastl::declval<ArgTypes>()...)) type; };
+
+
+	// result_of_t is the C++14 using typedef for typename result_of<T>::type.
+	// We provide a backwards-compatible means to access it through a macro for pre-C++11 compilers.
+	#if defined(EA_COMPILER_NO_TEMPLATE_ALIASES)
+		#define EASTL_RESULT_OF_T(T) typename result_of<T>::type
+	#else
+		template <typename T>
+		using result_of_t = typename result_of<T>::type;
+		#define EASTL_RESULT_OF_T(T) result_of_t<T>
+	#endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// has_equality
+	//
+	// Determines if the specified type can be tested for equality.
+	//
+	///////////////////////////////////////////////////////////////////////
+	template <typename, typename = eastl::void_t<>>
+	struct has_equality : eastl::false_type {};
+
+	template <typename T>
+	struct has_equality<T, eastl::void_t<decltype(eastl::declval<T>() == eastl::declval<T>())>> : eastl::true_type
+	{
+	};
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		EA_CONSTEXPR auto has_equality_v = has_equality<T>::value;
+	#endif
+
+} // namespace eastl
+
+
+#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/internal/type_transformations.h b/libkram/eastl/include/EASTL/internal/type_transformations.h
new file mode 100644
index 00000000..cffa65e5
--- /dev/null
+++ b/libkram/eastl/include/EASTL/internal/type_transformations.h
@@ -0,0 +1,606 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_INTERNAL_TYPE_TRANFORMATIONS_H
+#define EASTL_INTERNAL_TYPE_TRANFORMATIONS_H
+
+
+#include <EABase/eabase.h>
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+#include <limits.h>
+
+
+namespace eastl
+{
+
+	///////////////////////////////////////////////////////////////////////
+	// add_const
+	//
+	// Add const to a type.
+	//
+	// Tor a given type T, add_const<T>::type is equivalent to T 
+	// const if is_const<T>::value == false, and
+	//    - is_void<T>::value == true, or
+	//    - is_object<T>::value == true.
+	//
+	// Otherwise, add_const<T>::type is equivalent to T.
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_add_const_CONFORMANCE 1    // add_const is conforming.   
+
+	template <typename T, bool = eastl::is_const<T>::value || eastl::is_reference<T>::value || eastl::is_function<T>::value>
+	struct add_const_helper
+		{ typedef T type; };
+
+	template <typename T>
+	struct add_const_helper<T, false>
+		{ typedef const T type; };
+
+	template <typename T>
+	struct  add_const
+		{ typedef typename eastl::add_const_helper<T>::type type; };
+	
+	// add_const_t is the C++17 using typedef for typename add_const<T>::type.
+	// We provide a backwards-compatible means to access it through a macro for pre-C++11 compilers.
+	#if defined(EA_COMPILER_NO_TEMPLATE_ALIASES)
+		#define EASTL_ADD_CONST_T(T) typename add_const<T>::type
+	#else
+		template <typename T>
+		using add_const_t = typename add_const<T>::type;
+		#define EASTL_ADD_CONST_T(T) add_const_t<T>
+	#endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// add_volatile
+	//
+	// Add volatile to a type.
+	// 
+	// For a given type T, add_volatile<T>::type is equivalent to T volatile 
+	// if is_volatile<T>::value == false, and
+	//   - is_void<T>::value == true, or
+	//   - is_object<T>::value == true.
+	//
+	// Otherwise, add_volatile<T>::type is equivalent to T.
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_add_volatile_CONFORMANCE 1    // add_volatile is conforming.
+
+	template <typename T, bool = eastl::is_volatile<T>::value || eastl::is_reference<T>::value || eastl::is_function<T>::value>
+	struct add_volatile_helper
+		{ typedef T type; };
+
+	template <typename T>
+	struct add_volatile_helper<T, false>
+		{ typedef volatile T type; };
+
+	template <typename T> struct add_volatile
+		{ typedef typename eastl::add_volatile_helper<T>::type type; };
+
+	template <class T> using add_volatile_t = typename add_volatile<T>::type;
+
+
+    ///////////////////////////////////////////////////////////////////////
+	// add_cv
+	//
+	// The add_cv transformation trait adds const and volatile qualification 
+	// to the type to which it is applied. For a given type T, 
+	// add_volatile<T>::type is equivalent to add_const<add_volatile<T>::type>::type.
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_add_cv_CONFORMANCE 1    // add_cv is conforming.
+
+	template<typename T>
+	struct add_cv
+	{
+		typedef typename add_const<typename add_volatile<T>::type>::type type;
+	};
+
+	template <class T> using add_cv_t = typename add_cv<T>::type;
+
+
+    ///////////////////////////////////////////////////////////////////////
+	// make_signed
+	//
+	// Used to convert an integral type to its signed equivalent, if not already.
+	// T shall be a (possibly const and/or volatile-qualified) integral type 
+	// or enumeration but not a bool type.;
+	//
+	// The user can define their own make_signed overrides for their own 
+	// types by making a template specialization like done below and adding
+	// it to the user's code.
+	///////////////////////////////////////////////////////////////////////
+
+	// To do: This implementation needs to be updated to support C++11 conformance (recognition of enums) and 
+	// to support volatile-qualified types. It will probably be useful to have it fail for unsupported types.
+	#define EASTL_TYPE_TRAIT_make_signed_CONFORMANCE 0    // make_signed is only partially conforming.
+
+	template <typename T> struct make_signed { typedef T type; };
+
+	template <> struct make_signed<unsigned char>            { typedef signed char            type; };
+	template <> struct make_signed<const unsigned char>      { typedef const signed char      type; };
+	template <> struct make_signed<unsigned short>           { typedef signed short           type; };
+	template <> struct make_signed<const unsigned short>     { typedef const signed short     type; };
+	template <> struct make_signed<unsigned int>             { typedef signed int             type; };
+	template <> struct make_signed<const unsigned int>       { typedef const signed int       type; };
+	template <> struct make_signed<unsigned long>            { typedef signed long            type; };
+	template <> struct make_signed<const unsigned long>      { typedef const signed long      type; };
+	template <> struct make_signed<unsigned long long>       { typedef signed long long       type; };
+	template <> struct make_signed<const unsigned long long> { typedef const signed long long type; };
+
+	#if (defined(CHAR_MAX) && defined(UCHAR_MAX) && (CHAR_MAX == UCHAR_MAX)) // If char is unsigned, we convert char to signed char. However, if char is signed then make_signed returns char itself and not signed char.
+		template <> struct make_signed<char>                 { typedef signed char            type; };
+		template <> struct make_signed<const char>           { typedef signed char            type; };
+	#endif
+
+	#ifndef EA_WCHAR_T_NON_NATIVE // If wchar_t is a native type instead of simply a define to an existing type...
+		#if (defined(__WCHAR_MAX__) && (__WCHAR_MAX__ == 4294967295U)) // If wchar_t is a 32 bit unsigned value...
+			template<>
+			struct make_signed<wchar_t>
+			{ typedef int32_t type; };
+		#elif (defined(__WCHAR_MAX__) && (__WCHAR_MAX__ == 65535))     // If wchar_t is a 16 bit unsigned value...
+			template<>
+			struct make_signed<wchar_t>
+			{ typedef int16_t type; };
+		#elif (defined(__WCHAR_MAX__) && (__WCHAR_MAX__ == 255))       // If wchar_t is an 8 bit unsigned value...
+			template<>
+			struct make_signed<wchar_t>
+			{ typedef int8_t type; };
+		#endif
+	#endif
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		using make_signed_t = typename make_signed<T>::type;
+	#endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// add_signed
+	//
+	// This is not a C++11 type trait, and is here for backwards compatibility
+	// only. Use the C++11 make_unsigned type trait instead.
+	///////////////////////////////////////////////////////////////////////
+
+	template<class T>
+	struct add_signed : public make_signed<T>
+	{ typedef typename eastl::make_signed<T>::type type; };
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// make_unsigned
+	//
+	// Used to convert an integral type to its signed equivalent, if not already.
+	// T shall be a (possibly const and/or volatile-qualified) integral type 
+	// or enumeration but not a bool type.;
+	//
+	// The user can define their own make_signed overrides for their own 
+	// types by making a template specialization like done below and adding
+	// it to the user's code.
+	///////////////////////////////////////////////////////////////////////
+
+	// To do: This implementation needs to be updated to support C++11 conformance (recognition of enums) and 
+	// to support volatile-qualified types. It will probably be useful to have it fail for unsupported types.
+	#define EASTL_TYPE_TRAIT_make_unsigned_CONFORMANCE 0    // make_unsigned is only partially conforming.
+
+	template <typename T> struct make_unsigned { typedef T type; };
+
+	template <> struct make_unsigned<signed char>            { typedef unsigned char            type; };
+	template <> struct make_unsigned<const signed char>      { typedef const unsigned char      type; };
+	template <> struct make_unsigned<signed short>           { typedef unsigned short           type; };
+	template <> struct make_unsigned<const signed short>     { typedef const unsigned short     type; };
+	template <> struct make_unsigned<signed int>             { typedef unsigned int             type; };
+	template <> struct make_unsigned<const signed int>       { typedef const unsigned int       type; };
+	template <> struct make_unsigned<signed long>            { typedef unsigned long            type; };
+	template <> struct make_unsigned<const signed long>      { typedef const unsigned long      type; };
+	template <> struct make_unsigned<signed long long>       { typedef unsigned long long       type; };
+	template <> struct make_unsigned<const signed long long> { typedef const unsigned long long type; };
+
+	#if (CHAR_MIN < 0) // If char is signed, we convert char to unsigned char. However, if char is unsigned then make_unsigned returns char itself and not unsigned char.
+		template <> struct make_unsigned<char>                 { typedef unsigned char          type; };
+		template <> struct make_unsigned<const char>           { typedef unsigned char          type; };
+	#endif
+
+	#ifndef EA_WCHAR_T_NON_NATIVE // If wchar_t is a native type instead of simply a define to an existing type...
+		#if (defined(__WCHAR_MAX__) && (__WCHAR_MAX__ != 4294967295U)) // If wchar_t is a 32 bit signed value...
+			template<>
+			struct make_unsigned<wchar_t>
+			{ typedef uint32_t type; };
+		#elif (defined(__WCHAR_MAX__) && (__WCHAR_MAX__ != 65535))     // If wchar_t is a 16 bit signed value...
+			template<>
+			struct make_unsigned<wchar_t>
+			{ typedef uint16_t type; };
+		#elif (defined(__WCHAR_MAX__) && (__WCHAR_MAX__ != 255))       // If wchar_t is an 8 bit signed value...
+			template<>
+			struct make_unsigned<wchar_t>
+			{ typedef uint8_t type; };
+		#endif
+	#endif
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		using make_unsigned_t = typename make_unsigned<T>::type;
+	#endif
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// add_unsigned
+	//
+	// This is not a C++11 type trait, and is here for backwards compatibility
+	// only. Use the C++11 make_unsigned type trait instead.
+	// 
+	// Adds unsigned-ness to the given type. 
+	// Modifies only integral values; has no effect on others.
+	// add_unsigned<int>::type is unsigned int
+	// add_unsigned<unsigned int>::type is unsigned int
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	template<class T>
+	struct add_unsigned : public make_unsigned<T>
+	{ typedef typename eastl::make_signed<T>::type type; };
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// remove_pointer
+	//
+	// Remove pointer from a type.
+	//
+	// The remove_pointer transformation trait removes top-level indirection 
+	// by pointer (if any) from the type to which it is applied. Pointers to 
+	// members are not affected. For a given type T, remove_pointer<T*>::type 
+	// is equivalent to T.
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_remove_pointer_CONFORMANCE 1
+
+	template<typename T> struct remove_pointer                    { typedef T type; };
+	template<typename T> struct remove_pointer<T*>                { typedef T type; };
+	template<typename T> struct remove_pointer<T* const>          { typedef T type; };
+	template<typename T> struct remove_pointer<T* volatile>       { typedef T type; };
+	template<typename T> struct remove_pointer<T* const volatile> { typedef T type; };
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		using remove_pointer_t = typename remove_pointer<T>::type;
+    #endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// add_pointer
+	//
+	// Add pointer to a type.
+	// Provides the member typedef type which is the type T*. If T is a 
+	// reference type, then type is a pointer to the referred type. 
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_add_pointer_CONFORMANCE 1
+
+	template<class T>
+	struct add_pointer { typedef typename eastl::remove_reference<T>::type* type; };
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		using add_pointer_t = typename add_pointer<T>::type;
+    #endif
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// remove_extent
+	//
+	// The remove_extent transformation trait removes a dimension from an array.
+	// For a given non-array type T, remove_extent<T>::type is equivalent to T.
+	// For a given array type T[N], remove_extent<T[N]>::type is equivalent to T.
+	// For a given array type const T[N], remove_extent<const T[N]>::type is equivalent to const T.
+	// For example, given a multi-dimensional array type T[M][N], remove_extent<T[M][N]>::type is equivalent to T[N].
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_remove_extent_CONFORMANCE 1    // remove_extent is conforming.
+
+	template<class T>           struct remove_extent       { typedef T type; };
+	template<class T>           struct remove_extent<T[]>  { typedef T type; };
+	template<class T, size_t N> struct remove_extent<T[N]> { typedef T type; };
+
+	#if !defined(EA_COMPILER_NO_TEMPLATE_ALIASES)
+		template <typename T>
+		using remove_extent_t = typename remove_extent<T>::type;
+	#endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// remove_all_extents
+	//
+	// The remove_all_extents transformation trait removes all dimensions from an array.
+	// For a given non-array type T, remove_all_extents<T>::type is equivalent to T.
+	// For a given array type T[N], remove_all_extents<T[N]>::type is equivalent to T.
+	// For a given array type const T[N], remove_all_extents<const T[N]>::type is equivalent to const T.
+	// For example, given a multi-dimensional array type T[M][N], remove_all_extents<T[M][N]>::type is equivalent to T.
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_remove_all_extents_CONFORMANCE 1    // remove_all_extents is conforming.
+
+	template<typename T>           struct remove_all_extents       { typedef T type; };
+	template<typename T, size_t N> struct remove_all_extents<T[N]> { typedef typename eastl::remove_all_extents<T>::type type; };
+	template<typename T>           struct remove_all_extents<T[]>  { typedef typename eastl::remove_all_extents<T>::type type; };
+
+	#if !defined(EA_COMPILER_NO_TEMPLATE_ALIASES)
+		template <typename T>
+		using remove_all_extents_t = typename remove_all_extents<T>::type;
+	#endif
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// aligned_storage
+	//
+	// The aligned_storage transformation trait provides a type that is 
+	// suitably aligned to store an object whose size is does not exceed length 
+	// and whose alignment is a divisor of alignment. When using aligned_storage, 
+	// length must be non-zero, and alignment must >= alignment_of<T>::value 
+	// for some type T. We require the alignment value to be a power-of-two.
+	//
+	// GCC versions prior to 4.4 don't properly support this with stack-based
+	// variables. The EABase EA_ALIGN_MAX_AUTOMATIC define identifies the 
+	// extent to which stack (automatic) variables can be aligned for the 
+	// given compiler/platform combination.
+	//
+	// Example usage:
+	//     aligned_storage<sizeof(Widget), alignment_of(Widget)>::type widget;
+	//     Widget* pWidget = new(&widget) Widget;
+	//
+	//     aligned_storage<sizeof(Widget), 64>::type widgetAlignedTo64;
+	//     Widget* pWidget = new(&widgetAlignedTo64) Widget;
+	//
+	//     aligned_storage<sizeof(Widget), alignment_of(Widget)>::type widgetArray[37];
+	//     Widget* pWidgetArray = new(widgetArray) Widget[37];
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_aligned_storage_CONFORMANCE 1    // aligned_storage is conforming.
+
+	#if defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4008)
+		// New versions of GCC do not support using 'alignas' with a value greater than 128.
+		// However, this code using the GNU standard alignment attribute works properly.
+		template<size_t N, size_t Align = EASTL_ALIGN_OF(double)>
+		struct aligned_storage
+		{
+			struct type { unsigned char mCharData[N]; } EA_ALIGN(Align);
+		};
+	#elif (EABASE_VERSION_N >= 20040) && !defined(EA_COMPILER_NO_ALIGNAS) // If C++11 alignas is supported...
+		template<size_t N, size_t Align = EASTL_ALIGN_OF(double)>
+		struct aligned_storage
+		{
+			typedef struct {
+				alignas(Align) unsigned char mCharData[N];
+			} type;
+		};
+
+	#elif defined(EA_COMPILER_MSVC) || (defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION < 4007)) || defined(EA_COMPILER_EDG) // At some point GCC fixed their attribute(align) to support non-literals, though it's not clear what version aside from being no later than 4.7 and no earlier than 4.2.
+		// Some compilers don't allow you to to use EA_ALIGNED with anything by a numeric literal, 
+		// so we can't use the simpler code like we do further below for other compilers. We support
+		// only up to so much of an alignment value here.
+		template<size_t N, size_t Align>
+		struct aligned_storage_helper { struct type{ unsigned char mCharData[N]; }; };
+
+		template<size_t N> struct aligned_storage_helper<N,    2> { struct EA_ALIGN(   2) type{ unsigned char mCharData[N]; }; };
+		template<size_t N> struct aligned_storage_helper<N,    4> { struct EA_ALIGN(   4) type{ unsigned char mCharData[N]; }; };
+		template<size_t N> struct aligned_storage_helper<N,    8> { struct EA_ALIGN(   8) type{ unsigned char mCharData[N]; }; };
+		template<size_t N> struct aligned_storage_helper<N,   16> { struct EA_ALIGN(  16) type{ unsigned char mCharData[N]; }; };
+		template<size_t N> struct aligned_storage_helper<N,   32> { struct EA_ALIGN(  32) type{ unsigned char mCharData[N]; }; };
+		template<size_t N> struct aligned_storage_helper<N,   64> { struct EA_ALIGN(  64) type{ unsigned char mCharData[N]; }; };
+		template<size_t N> struct aligned_storage_helper<N,  128> { struct EA_ALIGN( 128) type{ unsigned char mCharData[N]; }; };
+		template<size_t N> struct aligned_storage_helper<N,  256> { struct EA_ALIGN( 256) type{ unsigned char mCharData[N]; }; };
+		template<size_t N> struct aligned_storage_helper<N,  512> { struct EA_ALIGN( 512) type{ unsigned char mCharData[N]; }; };
+		template<size_t N> struct aligned_storage_helper<N, 1024> { struct EA_ALIGN(1024) type{ unsigned char mCharData[N]; }; };
+		template<size_t N> struct aligned_storage_helper<N, 2048> { struct EA_ALIGN(2048) type{ unsigned char mCharData[N]; }; };
+		template<size_t N> struct aligned_storage_helper<N, 4096> { struct EA_ALIGN(4096) type{ unsigned char mCharData[N]; }; };
+
+		template<size_t N, size_t Align = EASTL_ALIGN_OF(double)>
+		struct aligned_storage
+		{
+			typedef typename aligned_storage_helper<N, Align>::type type;
+		};
+
+	#else
+		template<size_t N, size_t Align = EASTL_ALIGN_OF(double)>
+		struct aligned_storage
+		{
+			union type
+			{
+				unsigned char mCharData[N];
+				struct EA_ALIGN(Align) mStruct{ }; 
+			};
+		};
+	#endif
+
+	#if defined(EA_COMPILER_NO_TEMPLATE_ALIASES)
+		#define EASTL_ALIGNED_STORAGE_T(N, Align) typename eastl::aligned_storage_t<N, Align>::type
+	#else
+		template <size_t N, size_t Align = EASTL_ALIGN_OF(double)>
+		using aligned_storage_t = typename aligned_storage<N, Align>::type;
+		#define EASTL_ALIGNED_STORAGE_T(N, Align) eastl::aligned_storage_t<N, Align>
+	#endif
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// aligned_union
+	//
+	// The member typedef type shall be a POD type suitable for use as
+	// uninitialized storage for any object whose type is listed in Types; 
+	// its size shall be at least Len. The static member alignment_value 
+	// shall be an integral constant of type std::size_t whose value is 
+	// the strictest alignment of all types listed in Types.
+	// Note that the resulting type is not a C/C++ union, but simply memory 
+	// block (of pod type) that can be used to placement-new an actual 
+	// C/C++ union of the types. The actual union you declare can be a non-POD union.
+	//
+	// Example usage:
+	//     union MyUnion {
+	//         char  c;
+	//         int   i;
+	//         float f;
+	//
+	//         MyUnion(float fValue) : f(fValue) {}
+	//     };
+	// 
+	//     aligned_union<sizeof(MyUnion), char, int, float>::type myUnionStorage;
+	//     MyUnion* pMyUnion = new(&myUnionStorage) MyUnion(21.4f);
+	//     pMyUnion->i = 37;
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#if defined(EA_COMPILER_NO_VARIADIC_TEMPLATES) || !EASTL_TYPE_TRAIT_static_max_CONFORMANCE
+		#define EASTL_TYPE_TRAIT_aligned_union_CONFORMANCE 0    // aligned_union is not conforming, as it supports only a two-member unions.
+
+		// To consider: Expand this to include more possible types. We may want to convert this to be a recursive 
+		//              template instead of like below.
+		template <size_t minSize, typename Type0, typename Type1 = char, typename Type2 = char, typename Type3 = char>
+		struct aligned_union
+		{
+			static const size_t size0           = eastl::static_max<minSize, sizeof(Type0)>::value;
+			static const size_t size1           = eastl::static_max<size0,   sizeof(Type1)>::value;
+			static const size_t size2           = eastl::static_max<size1,   sizeof(Type2)>::value;
+			static const size_t size            = eastl::static_max<size2,   sizeof(Type3)>::value;
+
+			static const size_t alignment0      = eastl::static_max<EA_ALIGN_OF(Type0), EA_ALIGN_OF(Type1)>::value;
+			static const size_t alignment1      = eastl::static_max<alignment0,         EA_ALIGN_OF(Type2)>::value;
+			static const size_t alignment_value = eastl::static_max<alignment1,         EA_ALIGN_OF(Type3)>::value;
+
+			typedef typename eastl::aligned_storage<size, alignment_value>::type type;
+		};
+
+		#if defined(EA_COMPILER_NO_TEMPLATE_ALIASES)
+			// To do: define macro.
+		#else
+			template <size_t minSize, typename Type0, typename Type1 = char, typename Type2 = char, typename Type3 = char>
+			using aligned_union_t = typename aligned_union<minSize, Type0, Type1, Type2, Type3>::type;
+		#endif
+	#else
+		#define EASTL_TYPE_TRAIT_aligned_union_CONFORMANCE 1    // aligned_union is conforming.
+
+		template <size_t minSize, typename Type0, typename ...TypeN>
+		struct aligned_union
+		{
+			static const size_t size            = eastl::static_max<minSize, sizeof(Type0), sizeof(TypeN)...>::value;
+			static const size_t alignment_value = eastl::static_max<EA_ALIGN_OF(Type0), EA_ALIGN_OF(TypeN)...>::value;
+
+			typedef typename eastl::aligned_storage<size, alignment_value>::type type;
+		};
+
+		#if defined(EA_COMPILER_NO_TEMPLATE_ALIASES)
+			// To do: define macro.
+		#else
+			template <size_t minSize, typename... TypeN>
+			using aligned_union_t = typename aligned_union<minSize, TypeN...>::type;
+        #endif
+
+	#endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// union_cast
+	//
+	// Safely converts between unrelated types that have a binary equivalency.
+	// This appoach is required by strictly conforming C++ compilers because
+	// directly using a C or C++ cast between unrelated types is fraught with 
+	// the possibility of undefined runtime behavior due to type aliasing.
+	// The Source and Dest types must be POD types due to the use of a union 
+	// in C++ versions prior to C++11. C++11 relaxes the definition of a POD
+	// such that it allows a classes with trivial default constructors whereas 
+	// previous versions did not, so beware of this when writing portable code.
+	//
+	// Example usage:
+	//    float f32 = 1.234f;
+	//    uint32_t n32 = union_cast<uint32_t>(f32);
+	//
+	// Example possible mis-usage:
+	// The following is valid only if you are aliasing the pointer value and 
+	// not what it points to. Most of the time the user intends the latter, 
+	// which isn't strictly possible.
+	//    Widget* pWidget = CreateWidget();
+	//    Foo*    pFoo    = union_cast<Foo*>(pWidget);
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename DestType, typename SourceType>
+	DestType union_cast(SourceType sourceValue)
+	{
+		EASTL_CT_ASSERT((sizeof(DestType) == sizeof(SourceType)) && 
+						(EA_ALIGN_OF(DestType) == EA_ALIGN_OF(SourceType)));               // To support differening alignments, we would need to use a memcpy-based solution or find a way to make the two union members align with each other.
+		//EASTL_CT_ASSERT(is_pod<DestType>::value && is_pod<SourceType>::value);           // Disabled because we don't want to restrict what the user can do, as some compiler's definitions of is_pod aren't up to C++11 Standards.
+		//EASTL_CT_ASSERT(!is_pointer<DestType>::value && !is_pointer<SourceType>::value); // Disabled because it's valid to alias pointers as long as you are aliasong the pointer value and not what it points to.
+
+		union {
+			SourceType sourceValue;
+			DestType   destValue;
+		} u;
+		u.sourceValue = sourceValue;
+
+		return u.destValue;
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// void_t 
+	//
+	// Maps a sequence of any types to void.  This utility class is used in
+	// template meta programming to simplify compile time reflection mechanisms
+	// required by the standard library.
+	//
+	// http://en.cppreference.com/w/cpp/types/void_t
+	//
+	// Example:
+	//    template <typename T, typename = void>
+	//    struct is_iterable : false_type {};
+	//
+	//    template <typename T>
+	//    struct is_iterable<T, void_t<decltype(declval<T>().begin()), 
+	//                                 decltype(declval<T>().end())>> : true_type {};
+	//
+	///////////////////////////////////////////////////////////////////////
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class...>
+		using void_t = void;
+	#endif
+
+
+} // namespace eastl
+
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/intrusive_hash_map.h b/libkram/eastl/include/EASTL/intrusive_hash_map.h
new file mode 100644
index 00000000..37f16188
--- /dev/null
+++ b/libkram/eastl/include/EASTL/intrusive_hash_map.h
@@ -0,0 +1,98 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_INTRUSIVE_HASH_MAP_H
+#define EASTL_INTRUSIVE_HASH_MAP_H
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/internal/intrusive_hashtable.h>
+#include <EASTL/functional.h>
+#include <EASTL/utility.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+
+	/// intrusive_hash_map
+	///
+	/// Template parameters:
+	///     Key             The key object (key in the key/value pair). T must contain a member of type Key named mKey.
+	///     T               The type of object the map holds (a.k.a. value).
+	///     bucketCount     The number of buckets to use. Best if it's a prime number.
+	///     Hash            Hash function. See functional.h for examples of hash functions.
+	///     Equal           Equality testing predicate; tells if two elements are equal.
+	///
+	template <typename Key, typename T, size_t bucketCount, typename Hash = eastl::hash<Key>, typename Equal = eastl::equal_to<Key> >
+	class intrusive_hash_map : public intrusive_hashtable<Key, T, Hash, Equal, bucketCount, false, true>
+	{
+	public:
+		typedef intrusive_hashtable<Key, T, Hash, Equal, bucketCount, false, true>  base_type;
+		typedef intrusive_hash_map<Key, T, bucketCount, Hash, Equal>                this_type;
+
+	public:
+		explicit intrusive_hash_map(const Hash& h = Hash(), const Equal& eq = Equal())
+			: base_type(h, eq)
+		{
+			// Empty
+		}
+
+		// To consider: Is this feasible, given how initializer_list works by creating a temporary array? Even if it is feasible, is it a good idea?
+		//intrusive_hash_map(std::initializer_list<value_type> ilist);
+
+	}; // intrusive_hash_map
+
+
+
+
+	/// intrusive_hash_multimap
+	///
+	/// Implements a intrusive_hash_multimap, which is the same thing as a intrusive_hash_map 
+	/// except that contained elements need not be unique. See the documentation 
+	/// for intrusive_hash_map for details.
+	///
+	/// Template parameters:
+	///     Key             The key object (key in the key/value pair). T must contain a member of type Key named mKey.
+	///     T               The type of object the map holds (a.k.a. value).
+	///     bucketCount     The number of buckets to use. Best if it's a prime number.
+	///     Hash            Hash function. See functional.h for examples of hash functions.
+	///     Equal           Equality testing predicate; tells if two elements are equal.
+	///
+	template <typename Key, typename T, size_t bucketCount, typename Hash = eastl::hash<Key>, typename Equal = eastl::equal_to<Key> >
+	class intrusive_hash_multimap : public intrusive_hashtable<Key, T, Hash, Equal, bucketCount, false, false>
+	{
+	public:
+		typedef intrusive_hashtable<Key, T, Hash, Equal, bucketCount, false, false>  base_type;
+		typedef intrusive_hash_multimap<Key, T, bucketCount, Hash, Equal>            this_type;
+
+	public:
+		explicit intrusive_hash_multimap(const Hash& h = Hash(), const Equal& eq = Equal())
+			: base_type(h, eq)
+		{
+			// Empty
+		}
+
+		// To consider: Is this feasible, given how initializer_list works by creating a temporary array? Even if it is feasible, is it a good idea?
+		//intrusive_hash_multimap(std::initializer_list<value_type> ilist);
+
+	}; // intrusive_hash_multimap
+
+
+
+
+} // namespace eastl
+
+
+#endif // Header include guard
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/intrusive_hash_set.h b/libkram/eastl/include/EASTL/intrusive_hash_set.h
new file mode 100644
index 00000000..a25d03a6
--- /dev/null
+++ b/libkram/eastl/include/EASTL/intrusive_hash_set.h
@@ -0,0 +1,100 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_INTRUSIVE_HASH_SET_H
+#define EASTL_INTRUSIVE_HASH_SET_H
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/internal/intrusive_hashtable.h>
+#include <EASTL/functional.h>
+#include <EASTL/utility.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+
+	/// intrusive_hash_set
+	///
+	/// Template parameters:
+	///     T               The type of object the set holds (a.k.a. value).
+	///     bucketCount     The number of buckets to use. Best if it's a prime number.
+	///     Hash            Hash function. See functional.h for examples of hash functions.
+	///     Equal           Equality testing predicate; tells if two elements are equal.
+	///
+	template <typename T, size_t bucketCount, typename Hash = eastl::hash<T>, typename Equal = eastl::equal_to<T> >
+	class intrusive_hash_set : public intrusive_hashtable<T, T, Hash, Equal, bucketCount, true, true>
+	{
+	public:
+		typedef intrusive_hashtable<T, T, Hash, Equal, bucketCount, true, true>     base_type;
+		typedef intrusive_hash_set<T, bucketCount, Hash, Equal>                     this_type;
+
+	public:
+		explicit intrusive_hash_set(const Hash& h = Hash(), const Equal& eq = Equal())
+			: base_type(h, eq)
+		{
+			// Empty
+		}
+
+		// To consider: Is this feasible, given how initializer_list works by creating a temporary array? Even if it is feasible, is it a good idea?
+		//intrusive_hash_set(std::initializer_list<value_type> ilist);
+
+	}; // intrusive_hash_set
+
+
+
+
+	/// intrusive_hash_multiset
+	///
+	/// Implements a intrusive_hash_multiset, which is the same thing as a intrusive_hash_set 
+	/// except that contained elements need not be unique. See the documentation 
+	/// for intrusive_hash_set for details.
+	///
+	/// Template parameters:
+	///     T               The type of object the set holds (a.k.a. value).
+	///     bucketCount     The number of buckets to use. Best if it's a prime number.
+	///     Hash            Hash function. See functional.h for examples of hash functions.
+	///     Equal           Equality testing predicate; tells if two elements are equal.
+	///
+	template <typename T, size_t bucketCount, typename Hash = eastl::hash<T>, typename Equal = eastl::equal_to<T> >
+	class intrusive_hash_multiset : public intrusive_hashtable<T, T, Hash, Equal, bucketCount, true, false>
+	{
+	public:
+		typedef intrusive_hashtable<T, T, Hash, Equal, bucketCount, true, false>    base_type;
+		typedef intrusive_hash_multiset<T, bucketCount, Hash, Equal>                this_type;
+
+	public:
+		explicit intrusive_hash_multiset(const Hash& h = Hash(), const Equal& eq = Equal())
+			: base_type(h, eq)
+		{
+			// Empty
+		}
+
+		// To consider: Is this feasible, given how initializer_list works by creating a temporary array? Even if it is feasible, is it a good idea?
+		//intrusive_hash_multiset(std::initializer_list<value_type> ilist);
+
+	}; // intrusive_hash_multiset
+
+
+} // namespace eastl
+
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/intrusive_list.h b/libkram/eastl/include/EASTL/intrusive_list.h
new file mode 100644
index 00000000..18d7e93a
--- /dev/null
+++ b/libkram/eastl/include/EASTL/intrusive_list.h
@@ -0,0 +1,1315 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+
+///////////////////////////////////////////////////////////////////////////////
+// The intrusive list container is similar to a list, with the primary
+// different being that intrusive lists allow you to control memory
+// allocation.
+//
+// * Intrusive lists store the nodes directly in the data items. This
+//   is done by deriving the object from intrusive_list_node.
+//
+// * The container does no memory allocation -- it works entirely with
+//   the submitted nodes. This does mean that it is the client's job to 
+//   free the nodes in an intrusive list, though.
+//
+// * Valid node pointers can be converted back to iterators in O(1).
+//   This is because objects in the list are also nodes in the list.
+//
+// * intrusive_list does not support copy construction or assignment; 
+//   the push, pop, and insert operations take ownership of the 
+//   passed object.
+//
+// Usage notes:
+//
+// * You can use an intrusive_list directly with the standard nodes
+//   if you have some other way of converting the node pointer back
+//   to your data pointer.
+//
+// * Remember that the list destructor doesn't deallocate nodes -- it can't.
+//
+// * The size is not cached; this makes size() linear time but splice() is
+//   constant time. This does mean that you can remove() an element without
+//   having to figure out which list it is in, however.
+//
+// * You can insert a node into multiple intrusive_lists. One way to do so
+//   is to (ab)use inheritance:
+//
+//      struct NodeA : public intrusive_list_node {};
+//      struct NodeB : public intrusive_list_node {};
+//      struct Object : public NodeA, nodeB {};
+//
+//      intrusive_list<NodeA> listA;
+//      intrusive_list<NodeB> listB;
+//
+//      listA.push_back(obj);
+//      listB.push_back(obj);
+//
+// * find() vs. locate()
+//   The find(v) algorithm returns an iterator p such that *p == v; intrusive_list::locate(v) 
+//   returns an iterator p such that &*p == &v. intrusive_list<> doesn't have find() mainly 
+//   because list<> doesn't have it either, but there's no reason it couldn't. intrusive_list
+//   uses the name 'find' because:
+//      - So as not to confuse the member function with the well-defined free function from algorithm.h.
+//      - Because it is not API-compatible with eastl::find().
+//      - Because it simply locates an object within the list based on its node entry and doesn't perform before any value-based searches or comparisons.
+//
+// Differences between intrusive_list and std::list:
+//
+// Issue                            std::list       intrusive_list
+// --------------------------------------------------------------
+// Automatic node ctor/dtor         Yes             No
+// Can memmove() container          Maybe*          No
+// Same item in list twice          Yes(copy/byref) No
+// Can store non-copyable items     No              Yes
+// size()                           O(1) or O(n)    O(n)
+// clear()                          O(n)            O(1)
+// erase(range)                     O(n)            O(1)
+// splice(range)                    O(1) or O(n)    O(1)
+// Convert reference to iterator    No              O(1)
+// Remove without container         No              O(1)
+// Nodes in mixed allocators        No              Yes
+//
+// *) Not required by standard but can be done with some STL implementations.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_INTRUSIVE_LIST_H
+#define EASTL_INTRUSIVE_LIST_H
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/iterator.h>
+#include <EASTL/algorithm.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+
+	/// intrusive_list_node
+	///
+	/// By design this must be a POD, as user structs will be inheriting from 
+	/// it and they may wish to remain POD themselves. However, if the 
+	/// EASTL_VALIDATE_INTRUSIVE_LIST option is enabled
+	/// 
+	struct intrusive_list_node
+	{
+		intrusive_list_node* mpNext;
+		intrusive_list_node* mpPrev;
+
+		#if EASTL_VALIDATE_INTRUSIVE_LIST
+			intrusive_list_node()       // Implemented inline because GCC can't deal with member functions
+			{                           // of may-alias classes being defined outside the declaration.
+				mpNext = mpPrev = NULL;
+			}
+
+			~intrusive_list_node()
+			{
+				#if EASTL_ASSERT_ENABLED
+					if(mpNext || mpPrev)
+						EASTL_FAIL_MSG("~intrusive_list_node(): List is non-empty.");
+				#endif
+			}
+		#endif
+	} EASTL_MAY_ALIAS;  // It's not clear if this really should be needed. An old GCC compatible compiler is generating some crashing optimized code when strict aliasing is enabled, but analysis of it seems to blame the compiler. However, this topic can be tricky.
+
+
+
+	/// intrusive_list_iterator
+	///
+	template <typename T, typename Pointer, typename Reference>
+	class intrusive_list_iterator
+	{
+	public:
+		typedef intrusive_list_iterator<T, Pointer, Reference>   this_type;
+		typedef intrusive_list_iterator<T, T*, T&>               iterator;
+		typedef intrusive_list_iterator<T, const T*, const T&>   const_iterator;
+		typedef T                                                value_type;
+		typedef T                                                node_type;
+		typedef ptrdiff_t                                        difference_type;
+		typedef Pointer                                          pointer;
+		typedef Reference                                        reference;
+		typedef EASTL_ITC_NS::bidirectional_iterator_tag         iterator_category;
+
+	public:
+		pointer mpNode; // Needs to be public for operator==() to work
+
+	public:
+		intrusive_list_iterator();
+		explicit intrusive_list_iterator(pointer pNode);  // Note that you can also construct an iterator from T via this, since value_type == node_type.
+		intrusive_list_iterator(const iterator& x);
+
+		reference operator*() const;
+		pointer   operator->() const;
+
+		intrusive_list_iterator& operator++();
+		intrusive_list_iterator& operator--();
+
+		intrusive_list_iterator operator++(int);
+		intrusive_list_iterator operator--(int);
+
+	}; // class intrusive_list_iterator
+
+
+
+	/// intrusive_list_base
+	///
+	class intrusive_list_base
+	{
+	public:
+		typedef eastl_size_t size_type;     // See config.h for the definition of this, which defaults to size_t.
+		typedef ptrdiff_t    difference_type;
+
+	protected:
+		intrusive_list_node mAnchor;          ///< Sentinel node (end). All data nodes are linked in a ring from this node.
+
+	public:
+		intrusive_list_base();
+	   ~intrusive_list_base();
+
+		bool            empty() const EA_NOEXCEPT;
+		eastl_size_t    size() const EA_NOEXCEPT;  ///< Returns the number of elements in the list; O(n).
+		void            clear() EA_NOEXCEPT;       ///< Clears the list; O(1). No deallocation occurs.
+		void            pop_front();               ///< Removes an element from the front of the list; O(1). The element must exist, but is not deallocated.
+		void            pop_back();                ///< Removes an element from the back of the list; O(1). The element must exist, but is not deallocated.
+		EASTL_API void  reverse() EA_NOEXCEPT;     ///< Reverses a list so that front and back are swapped; O(n).
+
+		EASTL_API bool  validate() const;          ///< Scans a list for linkage inconsistencies; O(n) time, O(1) space. Returns false if errors are detected, such as loops or branching.
+
+	}; // class intrusive_list_base
+
+
+
+	/// intrusive_list
+	///
+	/// Example usage:
+	///    struct IntNode : public eastl::intrusive_list_node {
+	///        int mX;
+	///        IntNode(int x) : mX(x) { }
+	///    };
+	///    
+	///    IntNode nodeA(0);
+	///    IntNode nodeB(1);
+	///    
+	///    intrusive_list<IntNode> intList;
+	///    intList.push_back(nodeA);
+	///    intList.push_back(nodeB);
+	///    intList.remove(nodeA);
+	///    
+	template <typename T = intrusive_list_node>
+	class intrusive_list : public intrusive_list_base
+	{
+	public:
+		typedef intrusive_list<T>                               this_type;
+		typedef intrusive_list_base                             base_type;
+		typedef T                                               node_type;
+		typedef T                                               value_type;
+		typedef typename base_type::size_type                   size_type;
+		typedef typename base_type::difference_type             difference_type;
+		typedef T&                                              reference;
+		typedef const T&                                        const_reference;
+		typedef T*                                              pointer;
+		typedef const T*                                        const_pointer;
+		typedef intrusive_list_iterator<T, T*, T&>              iterator;
+		typedef intrusive_list_iterator<T, const T*, const T&>  const_iterator;
+		typedef eastl::reverse_iterator<iterator>               reverse_iterator;
+		typedef eastl::reverse_iterator<const_iterator>         const_reverse_iterator;
+
+	public:
+		intrusive_list();                                ///< Creates an empty list.
+		intrusive_list(const this_type& x);              ///< Creates an empty list; ignores the argument.
+	  //intrusive_list(std::initializer_list<value_type> ilist); To consider: Is this feasible, given how initializer_list works by creating a temporary array? Even if it is feasible, is it a good idea?
+
+		this_type&  operator=(const this_type& x);       ///< Clears the list; ignores the argument.
+		void        swap(this_type&);                    ///< Swaps the contents of two intrusive lists; O(1).
+
+		iterator                begin() EA_NOEXCEPT;                 ///< Returns an iterator pointing to the first element in the list.
+		const_iterator          begin() const EA_NOEXCEPT;           ///< Returns a const_iterator pointing to the first element in the list.
+		const_iterator          cbegin() const EA_NOEXCEPT;          ///< Returns a const_iterator pointing to the first element in the list.
+
+		iterator                end() EA_NOEXCEPT;                   ///< Returns an iterator pointing one-after the last element in the list.
+		const_iterator          end() const EA_NOEXCEPT;             ///< Returns a const_iterator pointing one-after the last element in the list.
+		const_iterator          cend() const EA_NOEXCEPT;            ///< Returns a const_iterator pointing one-after the last element in the list.
+
+		reverse_iterator        rbegin() EA_NOEXCEPT;                ///< Returns a reverse_iterator pointing at the end of the list (start of the reverse sequence).
+		const_reverse_iterator  rbegin() const EA_NOEXCEPT;          ///< Returns a const_reverse_iterator pointing at the end of the list (start of the reverse sequence).
+		const_reverse_iterator  crbegin() const EA_NOEXCEPT;         ///< Returns a const_reverse_iterator pointing at the end of the list (start of the reverse sequence).
+
+		reverse_iterator        rend() EA_NOEXCEPT;                  ///< Returns a reverse_iterator pointing at the start of the list (end of the reverse sequence).
+		const_reverse_iterator  rend() const EA_NOEXCEPT;            ///< Returns a const_reverse_iterator pointing at the start of the list (end of the reverse sequence).
+		const_reverse_iterator  crend() const EA_NOEXCEPT;           ///< Returns a const_reverse_iterator pointing at the start of the list (end of the reverse sequence).
+		
+		reference               front();                 ///< Returns a reference to the first element. The list must be non-empty.
+		const_reference         front() const;           ///< Returns a const reference to the first element. The list must be non-empty.
+		reference               back();                  ///< Returns a reference to the last element. The list must be non-empty.
+		const_reference         back() const;            ///< Returns a const reference to the last element. The list must be non-empty.
+
+		void        push_front(value_type& x);             ///< Adds an element to the front of the list; O(1). The element is not copied. The element must not be in any other list.
+		void        push_back(value_type& x);              ///< Adds an element to the back of the list; O(1). The element is not copied. The element must not be in any other list.
+
+		bool        contains(const value_type& x) const;   ///< Returns true if the given element is in the list; O(n). Equivalent to (locate(x) != end()).
+
+		iterator        locate(value_type& x);             ///< Converts a reference to an object in the list back to an iterator, or returns end() if it is not part of the list. O(n)
+		const_iterator  locate(const value_type& x) const; ///< Converts a const reference to an object in the list back to a const iterator, or returns end() if it is not part of the list. O(n)
+
+		iterator    insert(const_iterator pos, value_type& x);   ///< Inserts an element before the element pointed to by the iterator. O(1)
+		iterator    erase(const_iterator pos);                   ///< Erases the element pointed to by the iterator. O(1)
+		iterator    erase(const_iterator pos, const_iterator last);    ///< Erases elements within the iterator range [pos, last). O(1)
+
+		reverse_iterator erase(const_reverse_iterator pos);
+		reverse_iterator erase(const_reverse_iterator pos, const_reverse_iterator last);
+
+		static void remove(value_type& value);                    ///< Erases an element from a list; O(1). Note that this is static so you don't need to know which list the element, although it must be in some list.
+
+		void               splice(const_iterator pos, value_type& x);
+				///< Moves the given element into this list before the element pointed to by pos; O(1).
+				///< Required: x must be in some list or have first/next pointers that point it itself.
+
+		void               splice(const_iterator pos, intrusive_list& x);
+				///< Moves the contents of a list into this list before the element pointed to by pos; O(1).
+				///< Required: &x != this (same as std::list).
+
+		void               splice(const_iterator pos, intrusive_list& x, const_iterator i);
+				///< Moves the given element pointed to i within the list x into the current list before
+				///< the element pointed to by pos; O(1).
+
+		void               splice(const_iterator pos, intrusive_list& x, const_iterator first, const_iterator last);
+				///< Moves the range of elements [first, last) from list x into the current list before
+				///< the element pointed to by pos; O(1).
+				///< Required: pos must not be in [first, last). (same as std::list).
+
+	public:
+		// Sorting functionality
+		// This is independent of the global sort algorithms, as lists are 
+		// linked nodes and can be sorted more efficiently by moving nodes
+		// around in ways that global sort algorithms aren't privy to.
+
+		void merge(this_type& x);
+
+		template <typename Compare>
+		void merge(this_type& x, Compare compare);
+
+		void unique();
+
+		template <typename BinaryPredicate>
+		void unique(BinaryPredicate);
+
+		void sort();
+
+		template<typename Compare>
+		void sort(Compare compare);
+
+	public:
+		// bool validate() const; // Inherited from parent.
+		int     validate_iterator(const_iterator i) const;
+
+	}; // intrusive_list
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// intrusive_list_node
+	///////////////////////////////////////////////////////////////////////
+
+	// Moved to be inline within the class because the may-alias attribute is
+	// triggering what appears to be a bug in GCC that effectively requires 
+	// may-alias structs to implement inline member functions within the class
+	// declaration. We don't have a .cpp file for 
+	// #if EASTL_VALIDATE_INTRUSIVE_LIST
+	//     inline intrusive_list_node::intrusive_list_node()
+	//     {
+	//         mpNext = mpPrev = NULL;
+	//     }
+	//
+	//     inline intrusive_list_node::~intrusive_list_node()
+	//     {
+	//         #if EASTL_ASSERT_ENABLED
+	//             if(mpNext || mpPrev)
+	//                 EASTL_FAIL_MSG("~intrusive_list_node(): List is non-empty.");
+	//         #endif
+	//     }
+	// #endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// intrusive_list_iterator
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T, typename Pointer, typename Reference>
+	inline intrusive_list_iterator<T, Pointer, Reference>::intrusive_list_iterator()
+	{
+		#if EASTL_DEBUG
+			mpNode = NULL;
+		#endif
+	}
+
+
+	template <typename T, typename Pointer, typename Reference>
+	inline intrusive_list_iterator<T, Pointer, Reference>::intrusive_list_iterator(pointer pNode)
+		: mpNode(pNode)
+	{
+		// Empty
+	}
+
+
+	template <typename T, typename Pointer, typename Reference>
+	inline intrusive_list_iterator<T, Pointer, Reference>::intrusive_list_iterator(const iterator& x)
+		: mpNode(x.mpNode)
+	{
+		// Empty
+	}
+
+
+	template <typename T, typename Pointer, typename Reference>
+	inline typename intrusive_list_iterator<T, Pointer, Reference>::reference
+	intrusive_list_iterator<T, Pointer, Reference>::operator*() const
+	{
+		return *mpNode;
+	}
+
+
+	template <typename T, typename Pointer, typename Reference>
+	inline typename intrusive_list_iterator<T, Pointer, Reference>::pointer
+	intrusive_list_iterator<T, Pointer, Reference>::operator->() const
+	{
+		return mpNode;
+	}
+
+
+	template <typename T, typename Pointer, typename Reference>
+	inline typename intrusive_list_iterator<T, Pointer, Reference>::this_type&
+	intrusive_list_iterator<T, Pointer, Reference>::operator++()
+	{
+		mpNode = static_cast<node_type*>(mpNode->mpNext);
+		return *this;
+	}
+
+
+	template <typename T, typename Pointer, typename Reference>
+	inline typename intrusive_list_iterator<T, Pointer, Reference>::this_type
+	intrusive_list_iterator<T, Pointer, Reference>::operator++(int)
+	{
+		intrusive_list_iterator it(*this);
+		mpNode = static_cast<node_type*>(mpNode->mpNext);
+		return it;
+	}
+
+
+	template <typename T, typename Pointer, typename Reference>
+	inline typename intrusive_list_iterator<T, Pointer, Reference>::this_type&
+	intrusive_list_iterator<T, Pointer, Reference>::operator--()
+	{
+		mpNode = static_cast<node_type*>(mpNode->mpPrev);
+		return *this;
+	}
+
+
+	template <typename T, typename Pointer, typename Reference>
+	inline typename intrusive_list_iterator<T, Pointer, Reference>::this_type
+	intrusive_list_iterator<T, Pointer, Reference>::operator--(int)
+	{
+		intrusive_list_iterator it(*this);
+		mpNode = static_cast<node_type*>(mpNode->mpPrev);
+		return it;
+	}
+
+
+	// The C++ defect report #179 requires that we support comparisons between const and non-const iterators.
+	// Thus we provide additional template paremeters here to support this. The defect report does not
+	// require us to support comparisons between reverse_iterators and const_reverse_iterators.
+	template <typename T, typename PointerA, typename ReferenceA, typename PointerB, typename ReferenceB>
+	inline bool operator==(const intrusive_list_iterator<T, PointerA, ReferenceA>& a, 
+							const intrusive_list_iterator<T, PointerB, ReferenceB>& b)
+	{
+		return a.mpNode == b.mpNode;
+	}
+
+
+	template <typename T, typename PointerA, typename ReferenceA, typename PointerB, typename ReferenceB>
+	inline bool operator!=(const intrusive_list_iterator<T, PointerA, ReferenceA>& a, 
+							const intrusive_list_iterator<T, PointerB, ReferenceB>& b)
+	{
+		return a.mpNode != b.mpNode;
+	}
+
+
+	// We provide a version of operator!= for the case where the iterators are of the 
+	// same type. This helps prevent ambiguity errors in the presence of rel_ops.
+	template <typename T, typename Pointer, typename Reference>
+	inline bool operator!=(const intrusive_list_iterator<T, Pointer, Reference>& a, 
+						   const intrusive_list_iterator<T, Pointer, Reference>& b)
+	{
+		return a.mpNode != b.mpNode;
+	}
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// intrusive_list_base
+	///////////////////////////////////////////////////////////////////////
+
+	inline intrusive_list_base::intrusive_list_base() 
+	{
+		mAnchor.mpNext = mAnchor.mpPrev = &mAnchor;
+	}
+
+	inline intrusive_list_base::~intrusive_list_base()
+	{
+		#if EASTL_VALIDATE_INTRUSIVE_LIST
+			clear();
+			mAnchor.mpNext = mAnchor.mpPrev = NULL;
+		#endif
+	}
+
+
+	inline bool intrusive_list_base::empty() const EA_NOEXCEPT
+	{
+		return mAnchor.mpPrev == &mAnchor;
+	}
+
+
+	inline intrusive_list_base::size_type intrusive_list_base::size() const EA_NOEXCEPT
+	{
+		const intrusive_list_node* p = &mAnchor;
+		size_type n = (size_type)-1;
+
+		do {
+			++n;
+			p = p->mpNext;
+		} while(p != &mAnchor);
+
+		return n;
+	}
+
+
+	inline void intrusive_list_base::clear() EA_NOEXCEPT
+	{
+		#if EASTL_VALIDATE_INTRUSIVE_LIST
+			// Need to clear out all the next/prev pointers in the elements;
+			// this makes this operation O(n) instead of O(1).
+			intrusive_list_node* pNode = mAnchor.mpNext;
+
+			while(pNode != &mAnchor)
+			{
+				intrusive_list_node* const pNextNode = pNode->mpNext;
+				pNode->mpNext = pNode->mpPrev = NULL;
+				pNode = pNextNode;
+			}
+		#endif
+
+		mAnchor.mpNext = mAnchor.mpPrev = &mAnchor;
+	}
+
+
+	inline void intrusive_list_base::pop_front()
+	{
+		#if EASTL_VALIDATE_INTRUSIVE_LIST
+			intrusive_list_node* const pNode = mAnchor.mpNext;
+		#endif
+
+		mAnchor.mpNext->mpNext->mpPrev = &mAnchor;
+		mAnchor.mpNext = mAnchor.mpNext->mpNext;
+
+		#if EASTL_VALIDATE_INTRUSIVE_LIST
+			if(pNode != &mAnchor)
+				pNode->mpNext = pNode->mpPrev = NULL;
+			#if EASTL_ASSERT_ENABLED
+			else
+				EASTL_FAIL_MSG("intrusive_list::pop_front(): empty list.");
+			#endif
+		#endif
+	}
+
+
+	inline void intrusive_list_base::pop_back()
+	{
+		#if EASTL_VALIDATE_INTRUSIVE_LIST
+			intrusive_list_node* const pNode = mAnchor.mpPrev;
+		#endif
+
+		mAnchor.mpPrev->mpPrev->mpNext = &mAnchor;
+		mAnchor.mpPrev = mAnchor.mpPrev->mpPrev;
+
+		#if EASTL_VALIDATE_INTRUSIVE_LIST
+			if(pNode != &mAnchor)
+				pNode->mpNext = pNode->mpPrev = NULL;
+			#if EASTL_ASSERT_ENABLED
+			else
+				EASTL_FAIL_MSG("intrusive_list::pop_back(): empty list.");
+			#endif
+		#endif
+	}
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// intrusive_list
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T>
+	inline intrusive_list<T>::intrusive_list()
+	{
+	}
+
+
+	template <typename T>
+	inline intrusive_list<T>::intrusive_list(const this_type& /*x*/)
+	  : intrusive_list_base()
+	{
+		// We intentionally ignore argument x.
+		// To consider: Shouldn't this function simply not exist? Is there a useful purpose for having this function?
+		// There should be a comment here about it, though my first guess is that this exists to quell VC++ level 4/-Wall compiler warnings.
+	}
+
+
+	template <typename T>
+	inline typename intrusive_list<T>::this_type& intrusive_list<T>::operator=(const this_type& /*x*/)
+	{ 
+		// We intentionally ignore argument x.
+		// See notes above in the copy constructor about questioning the existence of this function.
+		return *this;
+	}
+
+
+	template <typename T>
+	inline typename intrusive_list<T>::iterator intrusive_list<T>::begin() EA_NOEXCEPT
+	{
+		return iterator(static_cast<T*>(mAnchor.mpNext));
+	}
+
+
+	template <typename T>
+	inline typename intrusive_list<T>::const_iterator intrusive_list<T>::begin() const EA_NOEXCEPT
+	{
+		return const_iterator(static_cast<T*>(mAnchor.mpNext));
+	}
+
+
+	template <typename T>
+	inline typename intrusive_list<T>::const_iterator intrusive_list<T>::cbegin() const EA_NOEXCEPT
+	{
+		return const_iterator(static_cast<T*>(mAnchor.mpNext));
+	}
+
+
+	template <typename T>
+	inline typename intrusive_list<T>::iterator intrusive_list<T>::end() EA_NOEXCEPT
+	{
+		return iterator(static_cast<T*>(&mAnchor));
+	}
+
+
+	template <typename T>
+	inline typename intrusive_list<T>::const_iterator intrusive_list<T>::end() const EA_NOEXCEPT
+	{
+		return const_iterator(static_cast<const T*>(&mAnchor));
+	}
+
+
+	template <typename T>
+	inline typename intrusive_list<T>::const_iterator intrusive_list<T>::cend() const EA_NOEXCEPT
+	{
+		return const_iterator(static_cast<const T*>(&mAnchor));
+	}
+
+
+	template <typename T>
+	inline typename intrusive_list<T>::reverse_iterator intrusive_list<T>::rbegin() EA_NOEXCEPT
+	{
+		return reverse_iterator(iterator(static_cast<T*>(&mAnchor)));
+	}
+
+
+	template <typename T>
+	inline typename intrusive_list<T>::const_reverse_iterator intrusive_list<T>::rbegin() const EA_NOEXCEPT
+	{
+		return const_reverse_iterator(const_iterator(static_cast<const T*>(&mAnchor)));
+	}
+
+
+	template <typename T>
+	inline typename intrusive_list<T>::const_reverse_iterator intrusive_list<T>::crbegin() const EA_NOEXCEPT
+	{
+		return const_reverse_iterator(const_iterator(static_cast<const T*>(&mAnchor)));
+	}
+
+
+	template <typename T>
+	inline typename intrusive_list<T>::reverse_iterator intrusive_list<T>::rend() EA_NOEXCEPT
+	{
+		return reverse_iterator(iterator(static_cast<T*>(mAnchor.mpNext)));
+	}
+
+
+	template <typename T>
+	inline typename intrusive_list<T>::const_reverse_iterator intrusive_list<T>::rend() const EA_NOEXCEPT
+	{
+		return const_reverse_iterator(const_iterator(static_cast<const T*>(mAnchor.mpNext)));
+	}
+
+
+	template <typename T>
+	inline typename intrusive_list<T>::const_reverse_iterator intrusive_list<T>::crend() const EA_NOEXCEPT
+	{
+		return const_reverse_iterator(const_iterator(static_cast<const T*>(mAnchor.mpNext)));
+	}
+
+
+	template <typename T>
+	inline typename intrusive_list<T>::reference intrusive_list<T>::front()
+	{
+		#if EASTL_VALIDATE_INTRUSIVE_LIST && EASTL_ASSERT_ENABLED
+			if(mAnchor.mpNext == &mAnchor)
+				EASTL_FAIL_MSG("intrusive_list::front(): empty list.");
+		#endif
+
+		return *static_cast<T*>(mAnchor.mpNext);
+	}
+
+
+	template <typename T>
+	inline typename intrusive_list<T>::const_reference intrusive_list<T>::front() const
+	{
+		#if EASTL_VALIDATE_INTRUSIVE_LIST && EASTL_ASSERT_ENABLED
+			if(mAnchor.mpNext == &mAnchor)
+				EASTL_FAIL_MSG("intrusive_list::front(): empty list.");
+		#endif
+
+		return *static_cast<const T*>(mAnchor.mpNext);
+	}
+
+
+	template <typename T>
+	inline typename intrusive_list<T>::reference intrusive_list<T>::back()
+	{
+		#if EASTL_VALIDATE_INTRUSIVE_LIST && EASTL_ASSERT_ENABLED
+			if(mAnchor.mpNext == &mAnchor)
+				EASTL_FAIL_MSG("intrusive_list::back(): empty list.");
+		#endif
+
+		return *static_cast<T*>(mAnchor.mpPrev);
+	}
+
+
+	template <typename T>
+	inline typename intrusive_list<T>::const_reference intrusive_list<T>::back() const
+	{
+		#if EASTL_VALIDATE_INTRUSIVE_LIST && EASTL_ASSERT_ENABLED
+			if(mAnchor.mpNext == &mAnchor)
+				EASTL_FAIL_MSG("intrusive_list::back(): empty list.");
+		#endif
+
+		return *static_cast<const T*>(mAnchor.mpPrev);
+	}
+
+
+	template <typename T>
+	inline void intrusive_list<T>::push_front(value_type& x)
+	{
+		#if EASTL_VALIDATE_INTRUSIVE_LIST && EASTL_ASSERT_ENABLED
+			if(x.mpNext || x.mpPrev)
+				EASTL_FAIL_MSG("intrusive_list::push_front(): element already on a list.");
+		#endif
+
+		x.mpNext = mAnchor.mpNext;
+		x.mpPrev = &mAnchor;
+		mAnchor.mpNext = &x;
+		x.mpNext->mpPrev = &x;
+	}
+
+
+	template <typename T>
+	inline void intrusive_list<T>::push_back(value_type& x)
+	{
+		#if EASTL_VALIDATE_INTRUSIVE_LIST && EASTL_ASSERT_ENABLED
+			if(x.mpNext || x.mpPrev)
+				EASTL_FAIL_MSG("intrusive_list::push_back(): element already on a list.");
+		#endif
+
+		x.mpPrev = mAnchor.mpPrev;
+		x.mpNext = &mAnchor;
+		mAnchor.mpPrev = &x;
+		x.mpPrev->mpNext = &x;
+	}
+
+
+	template <typename T>
+	inline bool intrusive_list<T>::contains(const value_type& x) const
+	{
+		for(const intrusive_list_node* p = mAnchor.mpNext; p != &mAnchor; p = p->mpNext)
+		{
+			if(p == &x)
+				return true;
+		}
+
+		return false;
+	}
+
+
+	template <typename T>
+	inline typename intrusive_list<T>::iterator intrusive_list<T>::locate(value_type& x)
+	{
+		for(intrusive_list_node* p = (T*)mAnchor.mpNext; p != &mAnchor; p = p->mpNext)
+		{
+			if(p == &x)
+				return iterator(static_cast<T*>(p));
+		}
+
+		return iterator((T*)&mAnchor);
+	}
+
+
+	template <typename T>
+	inline typename intrusive_list<T>::const_iterator intrusive_list<T>::locate(const value_type& x) const
+	{
+		for(const intrusive_list_node* p = mAnchor.mpNext; p != &mAnchor; p = p->mpNext)
+		{
+			if(p == &x)
+				return const_iterator(static_cast<const T*>(p));
+		}
+
+		return const_iterator((T*)&mAnchor);
+	}
+
+
+	template <typename T>
+	inline typename intrusive_list<T>::iterator intrusive_list<T>::insert(const_iterator pos, value_type& x)
+	{
+		#if EASTL_VALIDATE_INTRUSIVE_LIST && EASTL_ASSERT_ENABLED
+			if(x.mpNext || x.mpPrev)
+				EASTL_FAIL_MSG("intrusive_list::insert(): element already on a list.");
+		#endif
+
+		intrusive_list_node& next = *const_cast<node_type*>(pos.mpNode);
+		intrusive_list_node& prev = *static_cast<node_type*>(next.mpPrev);
+		prev.mpNext = next.mpPrev = &x;
+		x.mpPrev    = &prev;
+		x.mpNext    = &next;
+
+		return iterator(&x);
+	}
+
+
+	template <typename T>
+	inline typename intrusive_list<T>::iterator
+	intrusive_list<T>::erase(const_iterator pos)
+	{
+		intrusive_list_node& prev = *static_cast<node_type*>(pos.mpNode->mpPrev);
+		intrusive_list_node& next = *static_cast<node_type*>(pos.mpNode->mpNext);
+		prev.mpNext = &next;
+		next.mpPrev = &prev;
+
+		#if EASTL_VALIDATE_INTRUSIVE_LIST
+			iterator ii(const_cast<node_type*>(pos.mpNode));
+			ii.mpNode->mpPrev = ii.mpNode->mpNext = NULL;
+		#endif
+
+		return iterator(static_cast<node_type*>(&next));
+	}
+
+
+	template <typename T>
+	inline typename intrusive_list<T>::iterator
+	intrusive_list<T>::erase(const_iterator first, const_iterator last)
+	{
+		intrusive_list_node& prev = *static_cast<node_type*>(first.mpNode->mpPrev);
+		intrusive_list_node& next = *const_cast<node_type*>(last.mpNode);
+
+		#if EASTL_VALIDATE_INTRUSIVE_LIST
+			// need to clear out all the next/prev pointers in the elements;
+			// this makes this operation O(n) instead of O(1), sadly, although
+			// it's technically amortized O(1) since you could count yourself
+			// as paying this cost with each insert.
+			intrusive_list_node* pCur = const_cast<node_type*>(first.mpNode);
+
+			while(pCur != &next)
+			{
+				intrusive_list_node* const pCurNext = pCur->mpNext;
+				pCur->mpPrev = pCur->mpNext = NULL;
+				pCur = pCurNext;
+			}
+		#endif
+
+		prev.mpNext = &next;
+		next.mpPrev = &prev;
+
+		return iterator(const_cast<node_type*>(last.mpNode));
+	}
+
+
+	template <typename T>
+	inline typename intrusive_list<T>::reverse_iterator
+	intrusive_list<T>::erase(const_reverse_iterator position)
+	{
+		return reverse_iterator(erase((++position).base()));
+	}
+
+
+	template <typename T>
+	inline typename intrusive_list<T>::reverse_iterator
+	intrusive_list<T>::erase(const_reverse_iterator first, const_reverse_iterator last)
+	{
+		// Version which erases in order from first to last.
+		// difference_type i(first.base() - last.base());
+		// while(i--)
+		//     first = erase(first);
+		// return first;
+
+		// Version which erases in order from last to first, but is slightly more efficient:
+		return reverse_iterator(erase((++last).base(), (++first).base()));
+	}
+
+
+	template <typename T>
+	void intrusive_list<T>::swap(intrusive_list& x)
+	{
+		// swap anchors
+		intrusive_list_node temp(mAnchor);
+		mAnchor   = x.mAnchor;
+		x.mAnchor = temp;
+
+		// Fixup node pointers into the anchor, since the addresses of 
+		// the anchors must stay the same with each list.
+		if(mAnchor.mpNext == &x.mAnchor)
+			mAnchor.mpNext = mAnchor.mpPrev = &mAnchor;
+		else
+			mAnchor.mpNext->mpPrev = mAnchor.mpPrev->mpNext = &mAnchor;
+
+		if(x.mAnchor.mpNext == &mAnchor)
+			x.mAnchor.mpNext = x.mAnchor.mpPrev = &x.mAnchor;
+		else
+			x.mAnchor.mpNext->mpPrev = x.mAnchor.mpPrev->mpNext = &x.mAnchor;
+
+		#if EASTL_VALIDATE_INTRUSIVE_LIST
+			temp.mpPrev = temp.mpNext = NULL;
+		#endif
+	}
+
+
+	template <typename T>
+	void intrusive_list<T>::splice(const_iterator pos, value_type& value)
+	{
+		// Note that splice(pos, x, pos) and splice(pos+1, x, pos)
+		// are valid and need to be handled correctly.
+
+		if(pos.mpNode != &value)
+		{
+			// Unlink item from old list.
+			intrusive_list_node& oldNext = *value.mpNext;
+			intrusive_list_node& oldPrev = *value.mpPrev;
+			oldNext.mpPrev = &oldPrev;
+			oldPrev.mpNext = &oldNext;
+
+			// Relink item into new list.
+			intrusive_list_node& newNext = *const_cast<node_type*>(pos.mpNode);
+			intrusive_list_node& newPrev = *newNext.mpPrev;
+
+			newPrev.mpNext = &value;
+			newNext.mpPrev = &value;
+			value.mpPrev = &newPrev;
+			value.mpNext = &newNext;
+		}
+	}
+
+
+	template <typename T>
+	void intrusive_list<T>::splice(const_iterator pos, intrusive_list& x)
+	{
+		// Note: &x == this is prohibited, so self-insertion is not a problem.
+		if(x.mAnchor.mpNext != &x.mAnchor) // If the list 'x' isn't empty...
+		{
+			intrusive_list_node& next       = *const_cast<node_type*>(pos.mpNode);
+			intrusive_list_node& prev       = *static_cast<node_type*>(next.mpPrev);
+			intrusive_list_node& insertPrev = *static_cast<node_type*>(x.mAnchor.mpNext);
+			intrusive_list_node& insertNext = *static_cast<node_type*>(x.mAnchor.mpPrev);
+
+			prev.mpNext       = &insertPrev;
+			insertPrev.mpPrev = &prev;
+			insertNext.mpNext = &next;
+			next.mpPrev       = &insertNext;
+			x.mAnchor.mpPrev  = x.mAnchor.mpNext = &x.mAnchor;
+		}
+	}
+
+
+	template <typename T>
+	void intrusive_list<T>::splice(const_iterator pos, intrusive_list& /*x*/, const_iterator i)
+	{
+		// Note: &x == this is prohibited, so self-insertion is not a problem.
+
+		// Note that splice(pos, x, pos) and splice(pos + 1, x, pos)
+		// are valid and need to be handled correctly.
+
+		// We don't need to check if the source list is empty, because 
+		// this function expects a valid iterator from the source list,
+		// and thus the list cannot be empty in such a situation.
+
+		iterator ii(const_cast<node_type*>(i.mpNode)); // Make a temporary non-const version.
+
+		if(pos != ii)
+		{
+			// Unlink item from old list.
+			intrusive_list_node& oldNext = *ii.mpNode->mpNext;
+			intrusive_list_node& oldPrev = *ii.mpNode->mpPrev;
+			oldNext.mpPrev = &oldPrev;
+			oldPrev.mpNext = &oldNext;
+
+			// Relink item into new list.
+			intrusive_list_node& newNext = *const_cast<node_type*>(pos.mpNode);
+			intrusive_list_node& newPrev = *newNext.mpPrev;
+
+			newPrev.mpNext = ii.mpNode;
+			newNext.mpPrev = ii.mpNode;
+			ii.mpNode->mpPrev = &newPrev;
+			ii.mpNode->mpNext = &newNext;
+		}
+	}
+
+
+	template <typename T>
+	void intrusive_list<T>::splice(const_iterator pos, intrusive_list& /*x*/, const_iterator first, const_iterator last)
+	{
+		// Note: &x == this is prohibited, so self-insertion is not a problem.
+		if(first != last)
+		{
+			intrusive_list_node& insertPrev = *const_cast<node_type*>(first.mpNode);
+			intrusive_list_node& insertNext = *static_cast<node_type*>(last.mpNode->mpPrev);
+
+			// remove from old list
+			insertNext.mpNext->mpPrev = insertPrev.mpPrev;
+			insertPrev.mpPrev->mpNext = insertNext.mpNext;
+
+			// insert into this list
+			intrusive_list_node& next = *const_cast<node_type*>(pos.mpNode);
+			intrusive_list_node& prev = *static_cast<node_type*>(next.mpPrev);
+
+			prev.mpNext       = &insertPrev;
+			insertPrev.mpPrev = &prev;
+			insertNext.mpNext = &next;
+			next.mpPrev       = &insertNext;
+		}
+	}
+
+
+	template <typename T>
+	inline void intrusive_list<T>::remove(value_type& value)
+	{
+		intrusive_list_node& prev = *value.mpPrev;
+		intrusive_list_node& next = *value.mpNext;
+		prev.mpNext = &next;
+		next.mpPrev = &prev;
+
+		#if EASTL_VALIDATE_INTRUSIVE_LIST
+			value.mpPrev = value.mpNext = NULL;
+		#endif
+	}
+
+
+	template <typename T>
+	void intrusive_list<T>::merge(this_type& x)
+	{
+		if(this != &x)
+		{
+			iterator       first(begin());
+			iterator       firstX(x.begin());
+			const iterator last(end());
+			const iterator lastX(x.end());
+
+			while((first != last) && (firstX != lastX))
+			{
+				if(*firstX < *first)
+				{
+					iterator next(firstX);
+
+					splice(first, x, firstX, ++next);
+					firstX = next;
+				}
+				else
+					++first;
+			}
+
+			if(firstX != lastX)
+				splice(last, x, firstX, lastX);
+		}
+	}
+
+
+	template <typename T>
+	template <typename Compare>
+	void intrusive_list<T>::merge(this_type& x, Compare compare)
+	{
+		if(this != &x)
+		{
+			iterator       first(begin());
+			iterator       firstX(x.begin());
+			const iterator last(end());
+			const iterator lastX(x.end());
+
+			while((first != last) && (firstX != lastX))
+			{
+				if(compare(*firstX, *first))
+				{
+					iterator next(firstX);
+
+					splice(first, x, firstX, ++next);
+					firstX = next;
+				}
+				else
+					++first;
+			}
+
+			if(firstX != lastX)
+				splice(last, x, firstX, lastX);
+		}
+	}
+
+
+	template <typename T>
+	void intrusive_list<T>::unique()
+	{
+		iterator       first(begin());
+		const iterator last(end());
+
+		if(first != last)
+		{
+			iterator next(first);
+
+			while(++next != last)
+			{
+				if(*first == *next)
+					erase(next);
+				else
+					first = next;
+				next = first;
+			}
+		}
+	}
+
+
+	template <typename T>
+	template <typename BinaryPredicate>
+	void intrusive_list<T>::unique(BinaryPredicate predicate)
+	{
+		iterator       first(begin());
+		const iterator last(end());
+
+		if(first != last)
+		{
+			iterator next(first);
+
+			while(++next != last)
+			{
+				if(predicate(*first, *next))
+					erase(next);
+				else
+					first = next;
+				next = first;
+			}
+		}
+	}
+
+
+	template <typename T>
+	void intrusive_list<T>::sort()
+	{
+		// We implement the algorithm employed by Chris Caulfield whereby we use recursive
+		// function calls to sort the list. The sorting of a very large list may fail due to stack overflow
+		// if the stack is exhausted. The limit depends on the platform and the avaialble stack space.
+
+		// Easier-to-understand version of the 'if' statement:
+		// iterator i(begin());
+		// if((i != end()) && (++i != end())) // If the size is >= 2 (without calling the more expensive size() function)...
+
+		// Faster, more inlinable version of the 'if' statement:
+		if((static_cast<node_type*>(mAnchor.mpNext) != &mAnchor) &&
+		   (static_cast<node_type*>(mAnchor.mpNext) != static_cast<node_type*>(mAnchor.mpPrev)))
+		{
+			// Split the array into 2 roughly equal halves.
+			this_type leftList;     // This should cause no memory allocation.
+			this_type rightList;
+
+			// We find an iterator which is in the middle of the list. The fastest way to do 
+			// this is to iterate from the base node both forwards and backwards with two 
+			// iterators and stop when they meet each other. Recall that our size() function 
+			// is not O(1) but is instead O(n), at least when EASTL_LIST_SIZE_CACHE is disabled.
+			#if EASTL_LIST_SIZE_CACHE
+				iterator mid(begin());
+				eastl::advance(mid, size() / 2);
+			#else
+				iterator mid(begin()), tail(end());
+
+				while((mid != tail) && (++mid != tail))
+					--tail;
+			#endif
+
+			// Move the left half of this into leftList and the right half into rightList.
+			leftList.splice(leftList.begin(), *this, begin(), mid);
+			rightList.splice(rightList.begin(), *this);
+
+			// Sort the sub-lists.
+			leftList.sort();
+			rightList.sort();
+
+			// Merge the two halves into this list.
+			splice(begin(), leftList);
+			merge(rightList);
+		}
+	}
+
+
+	template <typename T>
+	template<typename Compare>
+	void intrusive_list<T>::sort(Compare compare)
+	{
+		// We implement the algorithm employed by Chris Caulfield whereby we use recursive
+		// function calls to sort the list. The sorting of a very large list may fail due to stack overflow
+		// if the stack is exhausted. The limit depends on the platform and the avaialble stack space.
+
+		// Easier-to-understand version of the 'if' statement:
+		// iterator i(begin());
+		// if((i != end()) && (++i != end())) // If the size is >= 2 (without calling the more expensive size() function)...
+
+		// Faster, more inlinable version of the 'if' statement:
+		if((static_cast<node_type*>(mAnchor.mpNext) != &mAnchor) &&
+		   (static_cast<node_type*>(mAnchor.mpNext) != static_cast<node_type*>(mAnchor.mpPrev)))
+		{
+			// Split the array into 2 roughly equal halves.
+			this_type leftList;     // This should cause no memory allocation.
+			this_type rightList;
+
+			// We find an iterator which is in the middle of the list. The fastest way to do 
+			// this is to iterate from the base node both forwards and backwards with two 
+			// iterators and stop when they meet each other. Recall that our size() function 
+			// is not O(1) but is instead O(n), at least when EASTL_LIST_SIZE_CACHE is disabled.
+			#if EASTL_LIST_SIZE_CACHE
+				iterator mid(begin());
+				eastl::advance(mid, size() / 2);
+			#else
+				iterator mid(begin()), tail(end());
+
+				while((mid != tail) && (++mid != tail))
+					--tail;
+			#endif
+
+			// Move the left half of this into leftList and the right half into rightList.
+			leftList.splice(leftList.begin(), *this, begin(), mid);
+			rightList.splice(rightList.begin(), *this);
+
+			// Sort the sub-lists.
+			leftList.sort(compare);
+			rightList.sort(compare);
+
+			// Merge the two halves into this list.
+			splice(begin(), leftList);
+			merge(rightList, compare);
+		}
+	}
+
+
+	template <typename T>
+	inline int intrusive_list<T>::validate_iterator(const_iterator i) const
+	{
+		// To do: Come up with a more efficient mechanism of doing this.
+
+		for(const_iterator temp = begin(), tempEnd = end(); temp != tempEnd; ++temp)
+		{
+			if(temp == i)
+				return (isf_valid | isf_current | isf_can_dereference);
+		}
+
+		if(i == end())
+			return (isf_valid | isf_current); 
+
+		return isf_none;
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// global operators
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T>
+	bool operator==(const intrusive_list<T>& a, const intrusive_list<T>& b)
+	{
+		// If we store an mSize member for intrusive_list, we want to take advantage of it here.
+		typename intrusive_list<T>::const_iterator ia   = a.begin();
+		typename intrusive_list<T>::const_iterator ib   = b.begin();
+		typename intrusive_list<T>::const_iterator enda = a.end();
+		typename intrusive_list<T>::const_iterator endb = b.end();
+
+		while((ia != enda) && (ib != endb) && (*ia == *ib))
+		{
+			++ia;
+			++ib;
+		}
+		return (ia == enda) && (ib == endb);
+	}
+
+	template <typename T>
+	bool operator!=(const intrusive_list<T>& a, const intrusive_list<T>& b)
+	{
+		return !(a == b);
+	}
+
+	template <typename T>
+	bool operator<(const intrusive_list<T>& a, const intrusive_list<T>& b)
+	{
+		return eastl::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end());
+	}
+
+	template <typename T>
+	bool operator>(const intrusive_list<T>& a, const intrusive_list<T>& b)
+	{
+		return b < a;
+	}
+
+	template <typename T>
+	bool operator<=(const intrusive_list<T>& a, const intrusive_list<T>& b)
+	{
+		return !(b < a);
+	}
+
+	template <typename T>
+	bool operator>=(const intrusive_list<T>& a, const intrusive_list<T>& b)
+	{
+		return !(a < b);
+	}
+
+	template <typename T>
+	void swap(intrusive_list<T>& a, intrusive_list<T>& b)
+	{
+		a.swap(b);
+	}
+
+
+} // namespace eastl
+
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/intrusive_ptr.h b/libkram/eastl/include/EASTL/intrusive_ptr.h
new file mode 100644
index 00000000..af4e686f
--- /dev/null
+++ b/libkram/eastl/include/EASTL/intrusive_ptr.h
@@ -0,0 +1,426 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_INTRUSIVE_PTR_H
+#define EASTL_INTRUSIVE_PTR_H
+
+
+#include <EASTL/internal/config.h>
+#include <stddef.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+	// We provide default implementations of AddRef and Release in the eastl namespace.
+	// The user can override these on a per-class basis by defining their own specialized
+	// intrusive_ptr_add_ref and intrusive_ptr_release functions. User-defined specializations
+	// do not need to exist in the eastl namespace, but should preferably be in the namespace 
+	// of the templated class T.
+	template <typename T>
+	void intrusive_ptr_add_ref(T* p)
+	{
+		p->AddRef();
+	}
+	 
+	template <typename T>
+	void intrusive_ptr_release(T* p)
+	{
+		p->Release();
+	}
+
+
+	//////////////////////////////////////////////////////////////////////////////
+	/// intrusive_ptr
+	/// 
+	/// This is a class that acts like the C++ auto_ptr class except that instead
+	/// of deleting its member data when it goes out of scope, it releases its
+	/// member data when it goes out of scope. This class thus requires that the 
+	/// templated data type have an AddRef and Release function (or whatever is
+	/// configured to be the two refcount functions).
+	/// 
+	/// This class is useful for automatically releasing an object when this 
+	/// class goes out of scope. See below for some usage.
+	/// You should be careful about putting instances of this class as members of 
+	/// another class. If you do so, then the intrusive_ptr destructor will only
+	/// be called if the object that owns it is destructed. This creates a potential
+	/// chicken-and-egg situation. What if the intrusive_ptr member contains a 
+	/// pointer to an object that has a reference on the object that owns the 
+	/// intrusive_ptr member? The answer is that the neither object can ever be
+	/// destructed. The solution is to:
+	///    1) Be very careful about what objects you put into member intrusive_ptr objects.
+	///    2) Clear out your intrusive_ptr members in your shutdown function.
+	///    3) Simply don't use intrusive_ptr objects as class members.
+	///
+	/// Example usage:
+	///    intrusive_ptr<IWidget> pWidget = new Widget;
+	///    pWidget = new Widget;
+	///    pWidget->Reset();
+	///
+	template <typename T>
+	class intrusive_ptr
+	{
+	protected:
+		// Friend declarations.
+		template <typename U> friend class intrusive_ptr;
+		typedef intrusive_ptr<T> this_type;
+
+		T* mpObject;
+
+	public:
+		/// element_type
+		/// This typedef is present for consistency with the C++ standard library 
+		/// auto_ptr template. It allows users to refer to the templated type via
+		/// a typedef. This is sometimes useful to be able to do.
+		/// 
+		/// Example usage:
+		///    intrusive_ptr<IWidget> ip;
+		///    void DoSomething(intrusive_ptr<IWidget>::element_type someType);
+		///
+		typedef T element_type;
+
+		/// intrusive_ptr
+		/// Default constructor. The member object is set to NULL.
+		intrusive_ptr()
+			: mpObject(NULL)
+		{
+			// Empty
+		}
+
+		/// intrusive_ptr
+		/// Provides a constructor which takes ownership of a pointer.
+		/// The incoming pointer is AddRefd.
+		///
+		/// Example usage:
+		///    intrusive_ptr<Widget> pWidget(new Widget);
+		intrusive_ptr(T* p, bool bAddRef = true)
+			: mpObject(p) 
+		{
+			if(mpObject && bAddRef)
+				intrusive_ptr_add_ref(mpObject);  // Intentionally do not prefix the call with eastl:: but instead allow namespace lookup to resolve the namespace.
+		} 
+
+		/// intrusive_ptr
+		/// Construction from self type.
+		intrusive_ptr(const intrusive_ptr& ip) 
+			: mpObject(ip.mpObject) 
+		{
+			if(mpObject)
+				intrusive_ptr_add_ref(mpObject);
+		}
+
+
+		/// intrusive_ptr
+		/// move constructor
+		intrusive_ptr(intrusive_ptr&& ip) 
+			: mpObject(nullptr)
+		{
+			swap(ip);
+		}
+
+		/// intrusive_ptr
+		/// Provides a constructor which copies a pointer from another intrusive_ptr.
+		/// The incoming pointer is AddRefd. The source intrusive_ptr object maintains
+		/// its AddRef on the pointer.
+		///
+		/// Example usage:
+		///    intrusive_ptr<Widget> pWidget1;
+		///    intrusive_ptr<Widget> pWidget2(pWidget1);
+		template <typename U>
+		intrusive_ptr(const intrusive_ptr<U>& ip) 
+			: mpObject(ip.mpObject) 
+		{
+			if(mpObject)
+				intrusive_ptr_add_ref(mpObject);
+		}
+
+		/// intrusive_ptr
+		/// Releases the owned pointer.
+		~intrusive_ptr() 
+		{
+			if(mpObject)
+				intrusive_ptr_release(mpObject);
+		}
+
+
+		/// operator=
+		/// Assignment to self type.
+		intrusive_ptr& operator=(const intrusive_ptr& ip)
+		{
+			return operator=(ip.mpObject);
+		}
+
+
+		/// operator=
+		/// Move assignment operator 
+		intrusive_ptr& operator=(intrusive_ptr&& ip)
+		{
+			swap(ip);
+			return *this;
+		}
+
+
+		/// operator =
+		/// Assigns an intrusive_ptr object to this intrusive_ptr object.
+		/// The incoming pointer is AddRefd. The source intrusive_ptr object 
+		/// maintains its AddRef on the pointer. If there is an existing member
+		/// pointer, it is Released before the incoming pointer is assigned.
+		/// If the incoming pointer is equal to the existing pointer, no    
+		/// action is taken. The incoming pointer is AddRefd before any 
+		/// member pointer is Released.
+		template <typename U>
+		intrusive_ptr& operator=(const intrusive_ptr<U>& ip)       
+		{
+			return operator=(ip.mpObject);
+		}
+
+		/// operator=
+		/// Assigns an intrusive_ptr object to this intrusive_ptr object.
+		/// The incoming pointer is AddRefd. If there is an existing member
+		/// pointer, it is Released before the incoming pointer is assigned.
+		/// If the incoming pointer is equal to the existing pointer, no    
+		/// action is taken. The incoming pointer is AddRefd before any 
+		/// member pointer is Released.
+		intrusive_ptr& operator=(T* pObject)
+		{
+			if(pObject != mpObject)
+			{
+				T* const pTemp = mpObject; // Create temporary to prevent possible problems with re-entrancy.
+				if(pObject)
+					intrusive_ptr_add_ref(pObject);
+				mpObject = pObject;
+				if(pTemp)
+					intrusive_ptr_release(pTemp);
+			}
+			return *this;
+		}
+
+		/// operator *
+		/// Returns a reference to the contained object.
+		T& operator *() const 
+		{
+			return *mpObject;
+		}
+
+		/// operator *
+		/// Returns a pointer to the contained object, allowing the 
+		/// user to use this container as if it were contained pointer itself. 
+		T* operator ->() const
+		{
+			return mpObject;
+		}
+
+		/// get()
+		/// Returns a pointer to the contained object. 
+		T* get() const
+		{
+			return mpObject;
+		}
+
+		/// reset
+		/// Releases the owned object and clears our reference to it.
+		void reset() 
+		{
+			T* const pTemp = mpObject;
+			mpObject = NULL;
+			if(pTemp)
+				intrusive_ptr_release(pTemp);
+		}
+
+		/// swap
+		/// Exchanges the owned pointer beween two intrusive_ptr objects.
+		void swap(this_type& ip)
+		{
+			T* const pTemp = mpObject;
+			mpObject = ip.mpObject;
+			ip.mpObject = pTemp;
+		}
+
+		/// attach
+		/// Sets an intrusive_ptr pointer without calling AddRef() on
+		/// the pointed object. The intrusive_ptr thus eventually only does a
+		/// Release() on the object. This is useful for assuming a reference
+		/// that someone else has handed you and making sure it is always
+		/// released, even if you return in the middle of a function or an
+		/// exception is thrown.
+		///
+		void attach(T* pObject)
+		{
+			T* const pTemp = mpObject;
+			mpObject = pObject;
+			if(pTemp)
+				intrusive_ptr_release(pTemp);
+		}
+
+		/// detach
+		/// Surrenders the reference held by an intrusive_ptr pointer -- 
+		/// it returns the current reference and nulls the pointer. If the returned
+		/// pointer is non-null it must be released. This is useful in functions
+		/// that must return a reference while possibly being aborted by a return
+		/// or thrown exception:
+		///
+		/// bool GetFoo(T** pp){
+		///    intrusive_ptr<T> p(PrivateGetFoo());
+		///    if(p->Method())
+		///        return false;
+		///    *pp = p.detach();
+		///    return true;
+		/// }
+		T* detach()
+		{
+			T* const pTemp = mpObject;
+			mpObject = NULL;
+			return pTemp;
+		}
+
+		/// Implicit operator bool
+		/// Allows for using a intrusive_ptr as a boolean. 
+		/// Example usage:
+		///    intrusive_ptr<Widget> ptr = new Widget;
+		///    if(ptr)
+		///        ++*ptr;
+		///     
+		/// Note that below we do not use operator bool(). The reason for this
+		/// is that booleans automatically convert up to short, int, float, etc.
+		/// The result is that this: if(intrusivePtr == 1) would yield true (bad).
+		typedef T* (this_type::*bool_)() const;
+		operator bool_() const
+		{
+			if(mpObject)
+				return &this_type::get;
+			return NULL;
+		}
+
+		/// operator!
+		/// This returns the opposite of operator bool; it returns true if 
+		/// the owned pointer is null. Some compilers require this and some don't.
+		///    intrusive_ptr<Widget> ptr = new Widget;
+		///    if(!ptr)
+		///        assert(false);
+		bool operator!() const
+		{
+			return (mpObject == NULL);
+		}
+
+	}; // class intrusive_ptr
+
+
+	/// get_pointer
+	/// returns intrusive_ptr::get() via the input intrusive_ptr. 
+	template <typename T>
+	inline T* get_pointer(const intrusive_ptr<T>& intrusivePtr)
+	{
+		return intrusivePtr.get();
+	}
+
+	/// swap
+	/// Exchanges the owned pointer beween two intrusive_ptr objects.
+	/// This non-member version is useful for compatibility of intrusive_ptr
+	/// objects with the C++ Standard Library and other libraries.
+	template <typename T>
+	inline void swap(intrusive_ptr<T>& intrusivePtr1, intrusive_ptr<T>& intrusivePtr2)
+	{
+		intrusivePtr1.swap(intrusivePtr2);
+	}
+
+
+	template <typename T, typename U>
+	bool operator==(intrusive_ptr<T> const& iPtr1, intrusive_ptr<U> const& iPtr2)
+	{
+		return (iPtr1.get() == iPtr2.get());
+	}
+
+	template <typename T, typename U>
+	bool operator!=(intrusive_ptr<T> const& iPtr1, intrusive_ptr<U> const& iPtr2)
+	{
+		return (iPtr1.get() != iPtr2.get());
+	}
+
+	template <typename T>
+	bool operator==(intrusive_ptr<T> const& iPtr1, T* p)
+	{
+		return (iPtr1.get() == p);
+	}
+
+	template <typename T>
+	bool operator!=(intrusive_ptr<T> const& iPtr1, T* p)
+	{
+		return (iPtr1.get() != p);
+	}
+
+	template <typename T>
+	bool operator==(T* p, intrusive_ptr<T> const& iPtr2)
+	{
+		return (p == iPtr2.get());
+	}
+
+	template <typename T>
+	bool operator!=(T* p, intrusive_ptr<T> const& iPtr2)
+	{
+		return (p != iPtr2.get());
+	}
+
+	template <typename T, typename U>
+	bool operator<(intrusive_ptr<T> const& iPtr1, intrusive_ptr<U> const& iPtr2)
+	{
+		return ((uintptr_t)iPtr1.get() < (uintptr_t)iPtr2.get());
+	}
+
+
+	/// static_pointer_cast
+	/// Returns an intrusive_ptr<T> static-casted from a intrusive_ptr<U>.
+	template <class T, class U>
+	intrusive_ptr<T> static_pointer_cast(const intrusive_ptr<U>& intrusivePtr)
+	{
+		return static_cast<T*>(intrusivePtr.get());
+	}
+
+
+	#if EASTL_RTTI_ENABLED
+
+	/// dynamic_pointer_cast
+	/// Returns an intrusive_ptr<T> dynamic-casted from a intrusive_ptr<U>.
+	template <class T, class U>
+	intrusive_ptr<T> dynamic_pointer_cast(const intrusive_ptr<U>& intrusivePtr)
+	{
+		return dynamic_cast<T*>(intrusivePtr.get());
+	}
+
+	#endif
+
+
+} // namespace eastl
+
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/iterator.h b/libkram/eastl/include/EASTL/iterator.h
new file mode 100644
index 00000000..d2dc8993
--- /dev/null
+++ b/libkram/eastl/include/EASTL/iterator.h
@@ -0,0 +1,1192 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_ITERATOR_H
+#define EASTL_ITERATOR_H
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/internal/move_help.h>
+#include <EASTL/initializer_list.h>
+
+EA_DISABLE_ALL_VC_WARNINGS();
+
+#include <stddef.h>
+
+EA_RESTORE_ALL_VC_WARNINGS();
+
+// If the user has specified that we use std iterator
+// categories instead of EASTL iterator categories,
+// then #include <iterator>.
+#if EASTL_STD_ITERATOR_CATEGORY_ENABLED
+	EA_DISABLE_ALL_VC_WARNINGS();
+
+	#include <iterator>
+
+	EA_RESTORE_ALL_VC_WARNINGS();
+#endif
+
+
+EA_DISABLE_VC_WARNING(4619); // There is no warning number 'number'.
+EA_DISABLE_VC_WARNING(4217); // Member template functions cannot be used for copy-assignment or copy-construction.
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+	/// iterator_status_flag
+	///
+	/// Defines the validity status of an iterator. This is primarily used for
+	/// iterator validation in debug builds. These are implemented as OR-able
+	/// flags (as opposed to mutually exclusive values) in order to deal with
+	/// the nature of iterator status. In particular, an iterator may be valid
+	/// but not dereferencable, as in the case with an iterator to container end().
+	/// An iterator may be valid but also dereferencable, as in the case with an
+	/// iterator to container begin().
+	///
+	enum iterator_status_flag
+	{
+		isf_none            = 0x00, /// This is called none and not called invalid because it is not strictly the opposite of invalid.
+		isf_valid           = 0x01, /// The iterator is valid, which means it is in the range of [begin, end].
+		isf_current         = 0x02, /// The iterator is valid and points to the same element it did when created. For example, if an iterator points to vector::begin() but an element is inserted at the front, the iterator is valid but not current. Modification of elements in place do not make iterators non-current.
+		isf_can_dereference = 0x04  /// The iterator is dereferencable, which means it is in the range of [begin, end). It may or may not be current.
+	};
+
+
+
+	// The following declarations are taken directly from the C++ standard document.
+	//    input_iterator_tag, etc.
+	//    iterator
+	//    iterator_traits
+	//    reverse_iterator
+
+	// Iterator categories
+	// Every iterator is defined as belonging to one of the iterator categories that
+	// we define here. These categories come directly from the C++ standard.
+	#if !EASTL_STD_ITERATOR_CATEGORY_ENABLED // If we are to use our own iterator category definitions...
+		struct input_iterator_tag { };
+		struct output_iterator_tag { };
+		struct forward_iterator_tag       : public input_iterator_tag { };
+		struct bidirectional_iterator_tag : public forward_iterator_tag { };
+		struct random_access_iterator_tag : public bidirectional_iterator_tag { };
+		struct contiguous_iterator_tag    : public random_access_iterator_tag { };  // Extension to the C++ standard. Contiguous ranges are more than random access, they are physically contiguous.
+	#endif
+
+
+	// struct iterator
+	template <typename Category, typename T, typename Distance = ptrdiff_t,
+			  typename Pointer = T*, typename Reference = T&>
+	struct iterator
+	{
+		typedef Category  iterator_category;
+		typedef T         value_type;
+		typedef Distance  difference_type;
+		typedef Pointer   pointer;
+		typedef Reference reference;
+	};
+
+
+	// struct iterator_traits
+	template <typename Iterator>
+	struct iterator_traits
+	{
+		typedef typename Iterator::iterator_category iterator_category;
+		typedef typename Iterator::value_type        value_type;
+		typedef typename Iterator::difference_type   difference_type;
+		typedef typename Iterator::pointer           pointer;
+		typedef typename Iterator::reference         reference;
+	};
+
+	template <typename T>
+	struct iterator_traits<T*>
+	{
+		typedef EASTL_ITC_NS::random_access_iterator_tag iterator_category;     // To consider: Change this to contiguous_iterator_tag for the case that
+		typedef T                                        value_type;            //              EASTL_ITC_NS is "eastl" instead of "std".
+		typedef ptrdiff_t                                difference_type;
+		typedef T*                                       pointer;
+		typedef T&                                       reference;
+	};
+
+	template <typename T>
+	struct iterator_traits<const T*>
+	{
+		typedef EASTL_ITC_NS::random_access_iterator_tag iterator_category;
+		typedef T                                        value_type;
+		typedef ptrdiff_t                                difference_type;
+		typedef const T*                                 pointer;
+		typedef const T&                                 reference;
+	};
+
+
+
+
+	/// is_iterator_wrapper
+	///
+	/// Tells if an Iterator type is a wrapper type as opposed to a regular type.
+	/// Relies on the class declaring a typedef called wrapped_iterator_type.
+	///
+	/// Examples of wrapping iterators:
+	///     reverse_iterator
+	///     generic_iterator
+	///     move_iterator
+	/// Examples of non-wrapping iterators:
+	///     iterator
+	///     list::iterator
+	///     char*
+	///
+	/// Example behavior:
+	///     is_iterator_wrapper(int*)::value                                              => false
+	///     is_iterator_wrapper(eastl::array<char>*)::value                               => false
+	///     is_iterator_wrapper(eastl::vector<int>::iterator)::value                      => false
+	///     is_iterator_wrapper(eastl::generic_iterator<int*>)::value                     => true
+	///     is_iterator_wrapper(eastl::move_iterator<eastl::array<int>::iterator>)::value => true
+	///
+	template<typename Iterator>
+	class is_iterator_wrapper
+	{
+		template<typename>
+		static eastl::no_type test(...);
+
+		template<typename U>
+		static eastl::yes_type test(typename U::wrapped_iterator_type*, typename eastl::enable_if<eastl::is_class<U>::value>::type* = 0);
+
+	public:
+		EA_DISABLE_VC_WARNING(6334)
+		static const bool value = (sizeof(test<Iterator>(NULL)) == sizeof(eastl::yes_type));
+		EA_RESTORE_VC_WARNING()
+	};
+
+
+	/// unwrap_iterator
+	///
+	/// Takes a wrapper Iterator (e.g. move_iterator, reverse_iterator, generic_iterator) instance
+	/// and returns the wrapped iterator type. If Iterator is not a wrapper (including being a pointer),
+	/// or is not an iterator, then this function returns it as-is.
+	/// unwrap_iterator unwraps only a single layer of iterator at a time. You need to call it twice,
+	/// for example, to unwrap two layers of iterators.
+	///
+	/// Example usage:
+	///     int* pInt             = unwrap_iterator(&pIntArray[15]);
+	///     int* pInt             = unwrap_iterator(generic_iterator(&pIntArray[15]));
+	///     MyVector::iterator it = unwrap_iterator(myVector.begin());
+	///     MyVector::iterator it = unwrap_iterator(move_iterator(myVector.begin()));
+	///
+	template <typename Iterator, bool isWrapper>
+	struct is_iterator_wrapper_helper
+	{
+		typedef Iterator iterator_type;
+
+		static iterator_type get_base(Iterator it)
+			{ return it; }
+	};
+
+
+	template <typename Iterator>
+	struct is_iterator_wrapper_helper<Iterator, true>
+	{
+		typedef typename Iterator::iterator_type iterator_type;
+
+		static iterator_type get_base(Iterator it)
+			{ return it.base(); }
+	};
+
+	template <typename Iterator>
+	inline typename is_iterator_wrapper_helper<Iterator, eastl::is_iterator_wrapper<Iterator>::value>::iterator_type unwrap_iterator(Iterator it)
+		{ return eastl::is_iterator_wrapper_helper<Iterator, eastl::is_iterator_wrapper<Iterator>::value>::get_base(it); }
+
+
+
+	/// reverse_iterator
+	///
+	/// From the C++ standard:
+	/// Bidirectional and random access iterators have corresponding reverse
+	/// iterator adaptors that iterate through the data structure in the
+	/// opposite direction. They have the same signatures as the corresponding
+	/// iterators. The fundamental relation between a reverse iterator and its
+	/// corresponding iterator i is established by the identity:
+	///     &*(reverse_iterator(i)) == &*(i - 1).
+	/// This mapping is dictated by the fact that while there is always a pointer
+	/// past the end of an array, there might not be a valid pointer before the
+	/// beginning of an array.
+	///
+	template <typename Iterator>
+	class reverse_iterator : public iterator<typename eastl::iterator_traits<Iterator>::iterator_category,
+											 typename eastl::iterator_traits<Iterator>::value_type,
+											 typename eastl::iterator_traits<Iterator>::difference_type,
+											 typename eastl::iterator_traits<Iterator>::pointer,
+											 typename eastl::iterator_traits<Iterator>::reference>
+	{
+	public:
+		typedef Iterator                                                   iterator_type;
+		typedef iterator_type                                              wrapped_iterator_type;   // This is not in the C++ Standard; it's used by use to identify it as a wrapping iterator type.
+		typedef typename eastl::iterator_traits<Iterator>::pointer         pointer;
+		typedef typename eastl::iterator_traits<Iterator>::reference       reference;
+		typedef typename eastl::iterator_traits<Iterator>::difference_type difference_type;
+
+	protected:
+		Iterator mIterator;
+
+	public:
+		EA_CPP14_CONSTEXPR reverse_iterator()      // It's important that we construct mIterator, because if Iterator
+			: mIterator() { }   // is a pointer, there's a difference between doing it and not.
+
+		EA_CPP14_CONSTEXPR explicit reverse_iterator(iterator_type i)
+			: mIterator(i) { }
+
+		EA_CPP14_CONSTEXPR reverse_iterator(const reverse_iterator& ri)
+			: mIterator(ri.mIterator) { }
+
+		template <typename U>
+		EA_CPP14_CONSTEXPR reverse_iterator(const reverse_iterator<U>& ri)
+			: mIterator(ri.base()) { }
+
+		// This operator= isn't in the standard, but the the C++
+		// library working group has tentatively approved it, as it
+		// allows const and non-const reverse_iterators to interoperate.
+		template <typename U>
+		EA_CPP14_CONSTEXPR reverse_iterator<Iterator>& operator=(const reverse_iterator<U>& ri)
+			{ mIterator = ri.base(); return *this; }
+
+		EA_CPP14_CONSTEXPR iterator_type base() const
+			{ return mIterator; }
+
+		EA_CPP14_CONSTEXPR reference operator*() const
+		{
+			iterator_type i(mIterator);
+			return *--i;
+		}
+
+		EA_CPP14_CONSTEXPR pointer operator->() const
+			{ return &(operator*()); }
+
+		EA_CPP14_CONSTEXPR reverse_iterator& operator++()
+			{ --mIterator; return *this; }
+
+		EA_CPP14_CONSTEXPR reverse_iterator operator++(int)
+		{
+			reverse_iterator ri(*this);
+			--mIterator;
+			return ri;
+		}
+
+		EA_CPP14_CONSTEXPR reverse_iterator& operator--()
+			{ ++mIterator; return *this; }
+
+		EA_CPP14_CONSTEXPR reverse_iterator operator--(int)
+		{
+			reverse_iterator ri(*this);
+			++mIterator;
+			return ri;
+		}
+
+		EA_CPP14_CONSTEXPR reverse_iterator operator+(difference_type n) const
+			{ return reverse_iterator(mIterator - n); }
+
+		EA_CPP14_CONSTEXPR reverse_iterator& operator+=(difference_type n)
+			{ mIterator -= n; return *this; }
+
+		EA_CPP14_CONSTEXPR reverse_iterator operator-(difference_type n) const
+			{ return reverse_iterator(mIterator + n); }
+
+		EA_CPP14_CONSTEXPR reverse_iterator& operator-=(difference_type n)
+			{ mIterator += n; return *this; }
+
+		// http://cplusplus.github.io/LWG/lwg-defects.html#386,
+		// http://llvm.org/bugs/show_bug.cgi?id=17883
+		// random_access_iterator operator[] is merely required to return something convertible to reference.
+		// reverse_iterator operator[] can't necessarily know what to return as the underlying iterator
+		// operator[] may return something other than reference.
+		EA_CPP14_CONSTEXPR reference operator[](difference_type n) const
+			{ return mIterator[-n - 1]; }
+	};
+
+
+	// The C++ library working group has tentatively approved the usage of two
+	// template parameters (Iterator1 and Iterator2) in order to allow reverse_iterators
+	// and const_reverse iterators to be comparable. This is a similar issue to the
+	// C++ defect report #179 regarding comparison of container iterators and const_iterators.
+	//
+	// libstdc++ reports that std::relops breaks the usage of two iterator types and if we
+	// want to support relops then we need to also make versions of each of below with
+	// a single template parameter to placate std::relops. But relops is hardly used due to
+	// the troubles it causes and so we are avoiding support here until somebody complains about it.
+	template <typename Iterator1, typename Iterator2>
+	EA_CPP14_CONSTEXPR inline bool
+	operator==(const reverse_iterator<Iterator1>& a, const reverse_iterator<Iterator2>& b)
+		{ return a.base() == b.base(); }
+
+
+	template <typename Iterator1, typename Iterator2>
+	EA_CPP14_CONSTEXPR inline bool
+	operator<(const reverse_iterator<Iterator1>& a, const reverse_iterator<Iterator2>& b)
+		{ return a.base() > b.base(); }
+
+
+	template <typename Iterator1, typename Iterator2>
+	EA_CPP14_CONSTEXPR inline bool
+	operator!=(const reverse_iterator<Iterator1>& a, const reverse_iterator<Iterator2>& b)
+		{ return a.base() != b.base(); }
+
+
+	template <typename Iterator1, typename Iterator2>
+	EA_CPP14_CONSTEXPR inline bool
+	operator>(const reverse_iterator<Iterator1>& a, const reverse_iterator<Iterator2>& b)
+		{ return a.base() < b.base(); }
+
+
+	template <typename Iterator1, typename Iterator2>
+	EA_CPP14_CONSTEXPR inline bool
+	operator<=(const reverse_iterator<Iterator1>& a, const reverse_iterator<Iterator2>& b)
+		{ return a.base() >= b.base(); }
+
+
+	template <typename Iterator1, typename Iterator2>
+	EA_CPP14_CONSTEXPR inline bool
+	operator>=(const reverse_iterator<Iterator1>& a, const reverse_iterator<Iterator2>& b)
+		{ return a.base() <= b.base(); }
+
+
+	template <typename Iterator1, typename Iterator2>
+	EA_CPP14_CONSTEXPR inline typename reverse_iterator<Iterator1>::difference_type
+	operator-(const reverse_iterator<Iterator1>& a, const reverse_iterator<Iterator2>& b)
+		{ return b.base() - a.base(); }
+
+
+	template <typename Iterator>
+	EA_CPP14_CONSTEXPR inline reverse_iterator<Iterator>
+	operator+(typename reverse_iterator<Iterator>::difference_type n, const reverse_iterator<Iterator>& a)
+		{ return reverse_iterator<Iterator>(a.base() - n); }
+
+
+	/// is_reverse_iterator
+	///
+	/// This is a type traits extension utility.
+	/// Given an iterator, tells if it's a reverse_iterator vs anything else.
+	/// If it's a reverse iterator wrapped by another iterator then value is false.
+	/// To consider: Detect that if it's a move_iterator<reverse_iterator> and unwrap
+	/// move_iterator so we can detect that underneath it's reverse_iterator.
+	///
+	template <typename T>
+	struct is_reverse_iterator
+		: public eastl::false_type {};
+
+	template<typename Iterator>
+	struct is_reverse_iterator< eastl::reverse_iterator<Iterator> >
+		: public eastl::true_type {};
+
+
+
+	/// unwrap_reverse_iterator
+	///
+	/// Returns Iterator::get_base() if it's a reverse_iterator, else returns Iterator as-is.
+	///
+	/// Example usage:
+	///      vector<int> intVector;
+	///      eastl::reverse_iterator<vector<int>::iterator> reverseIterator(intVector.begin());
+	///      vector<int>::iterator it = unwrap_reverse_iterator(reverseIterator);
+	///
+	/// Disabled until there is considered a good use for it.
+	/// template <typename Iterator>
+	/// inline typename eastl::is_iterator_wrapper_helper<Iterator, eastl::is_reverse_iterator<Iterator>::value>::iterator_type unwrap_reverse_iterator(Iterator it)
+	///     { return eastl::is_iterator_wrapper_helper<Iterator, eastl::is_reverse_iterator<Iterator>::value>::get_base(it); }
+
+
+
+	/// move_iterator
+	///
+	/// From the C++11 Standard, section 24.5.3.1:
+	/// Class template move_iterator is an iterator adaptor with the same behavior as the underlying iterator
+	/// except that its dereference operator implicitly converts the value returned by the underlying iterator's
+	/// dereference operator to an rvalue reference. Some generic algorithms can be called with move iterators to
+	/// replace copying with moving.
+
+	template<typename Iterator>
+	class move_iterator // Don't inherit from iterator.
+	{
+	public:
+		typedef Iterator                                iterator_type;
+		typedef iterator_type                           wrapped_iterator_type;   // This is not in the C++ Standard; it's used by use to identify it as a wrapping iterator type.
+		typedef iterator_traits<Iterator>               traits_type;
+		typedef typename traits_type::iterator_category iterator_category;
+		typedef typename traits_type::value_type        value_type;
+		typedef typename traits_type::difference_type   difference_type;
+		typedef Iterator                                pointer;
+		typedef value_type&&                            reference;
+
+	protected:
+		iterator_type mIterator;
+
+	public:
+		move_iterator()
+		  : mIterator()
+		{
+		}
+
+		explicit move_iterator(iterator_type mi)
+		  : mIterator(mi) { }
+
+		template<typename U>
+		move_iterator(const move_iterator<U>& mi)
+		  : mIterator(mi.base())
+		{
+		}
+
+		iterator_type base() const
+			{ return mIterator; }
+
+		reference operator*() const
+			{ return eastl::move(*mIterator); }
+
+		pointer operator->() const
+			{ return mIterator; }
+
+		move_iterator& operator++()
+		{
+			++mIterator;
+			return *this;
+		}
+
+		move_iterator operator++(int)
+		{
+			move_iterator tempMoveIterator = *this;
+			++mIterator;
+			return tempMoveIterator;
+		}
+
+		move_iterator& operator--()
+		{
+			--mIterator;
+			return *this;
+		}
+
+		move_iterator operator--(int)
+		{
+			move_iterator tempMoveIterator = *this;
+			--mIterator;
+			return tempMoveIterator;
+		}
+
+		move_iterator operator+(difference_type n) const
+			{ return move_iterator(mIterator + n); }
+
+		move_iterator& operator+=(difference_type n)
+		{
+			mIterator += n;
+			return *this;
+		}
+
+		move_iterator operator-(difference_type n) const
+			{ return move_iterator(mIterator - n); }
+
+		move_iterator& operator-=(difference_type n)
+		{
+			mIterator -= n;
+			return *this;
+		}
+
+		reference operator[](difference_type n) const
+			{ return eastl::move(mIterator[n]); }
+	};
+
+	template<typename Iterator1, typename Iterator2>
+	inline bool
+	operator==(const move_iterator<Iterator1>& a, const move_iterator<Iterator2>& b)
+		{ return a.base() == b.base(); }
+
+
+	template<typename Iterator1, typename Iterator2>
+	inline bool
+	operator!=(const move_iterator<Iterator1>& a, const move_iterator<Iterator2>& b)
+		{ return !(a == b); }
+
+
+	template<typename Iterator1, typename Iterator2>
+	inline bool
+	operator<(const move_iterator<Iterator1>& a, const move_iterator<Iterator2>& b)
+		{ return a.base() < b.base(); }
+
+
+	template<typename Iterator1, typename Iterator2>
+	inline bool
+	operator<=(const move_iterator<Iterator1>& a, const move_iterator<Iterator2>& b)
+		{ return !(b < a); }
+
+
+	template<typename Iterator1, typename Iterator2>
+	inline bool
+	operator>(const move_iterator<Iterator1>& a, const move_iterator<Iterator2>& b)
+		{ return b < a; }
+
+
+	template<typename Iterator1, typename Iterator2>
+	inline bool
+	operator>=(const move_iterator<Iterator1>& a, const move_iterator<Iterator2>& b)
+		{ return !(a < b); }
+
+
+	template<typename Iterator1, typename Iterator2>
+	inline auto
+	operator-(const move_iterator<Iterator1>& a, const move_iterator<Iterator2>& b) -> decltype(a.base() - b.base())
+		{ return a.base() - b.base(); }
+
+
+	template<typename Iterator>
+	inline move_iterator<Iterator>
+	operator+(typename move_iterator<Iterator>::difference_type n, const move_iterator<Iterator>& a)
+		{ return a + n; }
+
+
+	template<typename Iterator>
+	inline move_iterator<Iterator> make_move_iterator(Iterator i)
+		{ return move_iterator<Iterator>(i); }
+
+
+	// make_move_if_noexcept_iterator returns move_iterator<Iterator> if the Iterator is of a noexcept type;
+	// otherwise returns Iterator as-is. The point of this is to be able to avoid moves that can generate exceptions and instead
+	// fall back to copies or whatever the default IteratorType::operator* returns for use by copy/move algorithms.
+	// To consider: merge the conditional expression usage here with the one used by move_if_noexcept, as they are the same condition.
+	#if EASTL_EXCEPTIONS_ENABLED
+		template <typename Iterator, typename IteratorType = typename eastl::conditional<eastl::is_nothrow_move_constructible<typename eastl::iterator_traits<Iterator>::value_type>::value ||
+																						 !eastl::is_copy_constructible<typename eastl::iterator_traits<Iterator>::value_type>::value,
+																						 eastl::move_iterator<Iterator>, Iterator>::type>
+		inline IteratorType make_move_if_noexcept_iterator(Iterator i)
+			{ return IteratorType(i); }
+	#else
+		// Else there are no exceptions and thus we always return a move_iterator.
+		template <typename Iterator>
+		inline eastl::move_iterator<Iterator> make_move_if_noexcept_iterator(Iterator i)
+			{ return eastl::move_iterator<Iterator>(i); }
+	#endif
+
+
+
+	/// is_move_iterator
+	///
+	/// This is a type traits extension utility.
+	/// Given an iterator, tells if it's a move iterator vs anything else.
+	/// Example usage (though somewhat useless):
+	///     template <typename T>
+	///     bool IsMoveIterator() { return typename eastl::is_move_iterator<T>::value; }
+	///
+	template <typename T>
+	struct is_move_iterator
+		: public eastl::false_type {};
+
+	template<typename Iterator>
+	struct is_move_iterator< eastl::move_iterator<Iterator> >
+		: public eastl::true_type {};
+
+
+	/// unwrap_move_iterator
+	///
+	/// Returns Iterator::get_base() if it's a move_iterator, else returns Iterator as-is.
+	///
+	/// Example usage:
+	///      vector<int> intVector;
+	///      eastl::move_iterator<vector<int>::iterator> moveIterator(intVector.begin());
+	///      vector<int>::iterator it = unwrap_move_iterator(moveIterator);
+	///
+	template <typename Iterator>
+	inline typename eastl::is_iterator_wrapper_helper<Iterator, eastl::is_move_iterator<Iterator>::value>::iterator_type unwrap_move_iterator(Iterator it)
+		{ return eastl::is_iterator_wrapper_helper<Iterator, eastl::is_move_iterator<Iterator>::value>::get_base(it); }
+
+
+
+
+	/// back_insert_iterator
+	///
+	/// A back_insert_iterator is simply a class that acts like an iterator but when you
+	/// assign a value to it, it calls push_back on the container with the value.
+	///
+	template <typename Container>
+	class back_insert_iterator : public iterator<EASTL_ITC_NS::output_iterator_tag, void, void, void, void>
+	{
+	public:
+		typedef back_insert_iterator<Container>     this_type;
+		typedef Container                           container_type;
+		typedef typename Container::const_reference const_reference;
+
+	protected:
+		Container& container;
+
+	public:
+		//back_insert_iterator(); // Not valid. Must construct with a Container.
+
+		//back_insert_iterator(const this_type& x) // Compiler-implemented
+		//    : container(x.container) { }
+
+		explicit back_insert_iterator(Container& x)
+			: container(x) { }
+
+		back_insert_iterator& operator=(const_reference value)
+			{ container.push_back(value); return *this; }
+
+		back_insert_iterator& operator=(typename Container::value_type&& value)
+			{ container.push_back(eastl::move(value)); return *this; }
+
+		back_insert_iterator& operator*()
+			{ return *this; }
+
+		back_insert_iterator& operator++()
+			{ return *this; } // This is by design.
+
+		back_insert_iterator operator++(int)
+			{ return *this; } // This is by design.
+
+	protected:
+		void operator=(const this_type&){} // Declared to avoid compiler warnings about inability to generate this function.
+	};
+
+
+	/// back_inserter
+	///
+	/// Creates an instance of a back_insert_iterator.
+	///
+	template <typename Container>
+	inline back_insert_iterator<Container>
+	back_inserter(Container& x)
+		{ return back_insert_iterator<Container>(x); }
+
+
+
+
+	/// front_insert_iterator
+	///
+	/// A front_insert_iterator is simply a class that acts like an iterator but when you
+	/// assign a value to it, it calls push_front on the container with the value.
+	///
+	template <typename Container>
+	class front_insert_iterator : public iterator<EASTL_ITC_NS::output_iterator_tag, void, void, void, void>
+	{
+	public:
+		typedef front_insert_iterator<Container>    this_type;
+		typedef Container                           container_type;
+		typedef typename Container::const_reference const_reference;
+
+	protected:
+		Container& container;
+
+	public:
+		//front_insert_iterator(); // Not valid. Must construct with a Container.
+
+		//front_insert_iterator(const this_type& x) // Compiler-implemented
+		//    : container(x.container) { }
+
+		explicit front_insert_iterator(Container& x)
+			: container(x) { }
+
+		front_insert_iterator& operator=(const_reference value)
+			{ container.push_front(value); return *this; }
+
+		front_insert_iterator& operator*()
+			{ return *this; }
+
+		front_insert_iterator& operator++()
+			{ return *this; } // This is by design.
+
+		front_insert_iterator operator++(int)
+			{ return *this; } // This is by design.
+
+	protected:
+		void operator=(const this_type&){} // Declared to avoid compiler warnings about inability to generate this function.
+	};
+
+
+	/// front_inserter
+	///
+	/// Creates an instance of a front_insert_iterator.
+	///
+	template <typename Container>
+	inline front_insert_iterator<Container>
+	front_inserter(Container& x)
+		{ return front_insert_iterator<Container>(x); }
+
+
+
+
+	/// insert_iterator
+	///
+	/// An insert_iterator is like an iterator except that when you assign a value to it,
+	/// the insert_iterator inserts the value into the container and increments the iterator.
+	///
+	/// insert_iterator is an iterator adaptor that functions as an OutputIterator:
+	/// assignment through an insert_iterator inserts an object into a container.
+	/// Specifically, if ii is an insert_iterator, then ii keeps track of a container c and
+	/// an insertion point p; the expression *ii = x performs the insertion container.insert(p, x).
+	///
+	/// If you assign through an insert_iterator several times, then you will be inserting
+	/// several elements into the underlying container. In the case of a sequence, they will
+	/// appear at a particular location in the underlying sequence, in the order in which
+	/// they were inserted: one of the arguments to insert_iterator's constructor is an
+	/// iterator p, and the new range will be inserted immediately before p.
+	///
+	template <typename Container>
+	class insert_iterator : public iterator<EASTL_ITC_NS::output_iterator_tag, void, void, void, void>
+	{
+	public:
+		typedef Container                           container_type;
+		typedef typename Container::iterator        iterator_type;
+		typedef typename Container::const_reference const_reference;
+
+	protected:
+		Container&     container;
+		iterator_type  it;
+
+	public:
+		// This assignment operator is defined more to stop compiler warnings (e.g. VC++ C4512)
+		// than to be useful. However, it does allow an insert_iterator to be assigned to another
+		// insert iterator provided that they point to the same container.
+		insert_iterator& operator=(const insert_iterator& x)
+		{
+			EASTL_ASSERT(&x.container == &container);
+			it = x.it;
+			return *this;
+		}
+
+		insert_iterator(Container& x, iterator_type itNew)
+			: container(x), it(itNew) {}
+
+		insert_iterator& operator=(const_reference value)
+		{
+			it = container.insert(it, value);
+			++it;
+			return *this;
+		}
+
+		insert_iterator& operator*()
+			{ return *this; }
+
+		insert_iterator& operator++()
+			{ return *this; } // This is by design.
+
+		insert_iterator& operator++(int)
+			{ return *this; } // This is by design.
+
+	}; // insert_iterator
+
+
+	/// inserter
+	///
+	/// Creates an instance of an insert_iterator.
+	///
+	template <typename Container, typename Iterator>
+	inline eastl::insert_iterator<Container>
+	inserter(Container& x, Iterator i)
+	{
+		typedef typename Container::iterator iterator;
+		return eastl::insert_iterator<Container>(x, iterator(i));
+	}
+
+
+	/// is_insert_iterator
+	///
+	/// This is a type traits extension utility.
+	/// Given an iterator, tells if it's an insert_iterator vs anything else.
+	/// If it's a insert_iterator wrapped by another iterator then value is false.
+	///
+	template <typename T>
+	struct is_insert_iterator
+		: public eastl::false_type {};
+
+	template<typename Iterator>
+	struct is_insert_iterator< eastl::insert_iterator<Iterator> >
+		: public eastl::true_type {};
+
+
+
+
+	//////////////////////////////////////////////////////////////////////////////////
+	/// distance
+	///
+	/// Implements the distance() function. There are two versions, one for
+	/// random access iterators (e.g. with vector) and one for regular input
+	/// iterators (e.g. with list). The former is more efficient.
+	///
+	template <typename InputIterator>
+	EA_CONSTEXPR
+	inline typename eastl::iterator_traits<InputIterator>::difference_type
+	distance_impl(InputIterator first, InputIterator last, EASTL_ITC_NS::input_iterator_tag)
+	{
+		typename eastl::iterator_traits<InputIterator>::difference_type n = 0;
+
+		while(first != last)
+		{
+			++first;
+			++n;
+		}
+		return n;
+	}
+
+	template <typename RandomAccessIterator>
+	EA_CONSTEXPR
+	inline typename eastl::iterator_traits<RandomAccessIterator>::difference_type
+	distance_impl(RandomAccessIterator first, RandomAccessIterator last, EASTL_ITC_NS::random_access_iterator_tag)
+	{
+		return last - first;
+	}
+
+	// Special version defined so that std C++ iterators can be recognized by
+	// this function. Unfortunately, this function treats all foreign iterators
+	// as InputIterators and thus can seriously hamper performance in the case
+	// of large ranges of bidirectional_iterator_tag iterators.
+	//template <typename InputIterator>
+	//inline typename eastl::iterator_traits<InputIterator>::difference_type
+	//distance_impl(InputIterator first, InputIterator last, ...)
+	//{
+	//    typename eastl::iterator_traits<InputIterator>::difference_type n = 0;
+	//
+	//    while(first != last)
+	//    {
+	//        ++first;
+	//        ++n;
+	//    }
+	//    return n;
+	//}
+
+	template <typename InputIterator>
+	EA_CONSTEXPR
+	inline typename eastl::iterator_traits<InputIterator>::difference_type
+	distance(InputIterator first, InputIterator last)
+	{
+		typedef typename eastl::iterator_traits<InputIterator>::iterator_category IC;
+
+		return eastl::distance_impl(first, last, IC());
+	}
+
+
+
+
+	//////////////////////////////////////////////////////////////////////////////////
+	/// advance
+	///
+	/// Implements the advance() function. There are three versions, one for
+	/// random access iterators (e.g. with vector), one for bidirectional
+	/// iterators (list) and one for regular input iterators (e.g. with slist).
+	///
+	template <typename InputIterator, typename Distance>
+	inline void
+	advance_impl(InputIterator& i, Distance n, EASTL_ITC_NS::input_iterator_tag)
+	{
+		while(n--)
+			++i;
+	}
+
+	template <bool signedDistance>
+	struct advance_bi_impl
+	{
+		template <typename BidirectionalIterator, typename Distance>
+		static void advance_impl(BidirectionalIterator& i, Distance n) // Specialization for unsigned distance type.
+		{
+			while(n--)
+				++i;
+		}
+	};
+
+	template <>
+	struct advance_bi_impl<true>
+	{
+		template <typename BidirectionalIterator, typename Distance>
+		static void advance_impl(BidirectionalIterator& i, Distance n) // Specialization for signed distance type.
+		{
+			if(n > 0)
+			{
+				while(n--)
+					++i;
+			}
+			else
+			{
+				while(n++)
+					--i;
+			}
+		}
+	};
+
+	template <typename BidirectionalIterator, typename Distance>
+	inline void
+	advance_impl(BidirectionalIterator& i, Distance n, EASTL_ITC_NS::bidirectional_iterator_tag)
+	{
+		advance_bi_impl<eastl::is_signed<Distance>::value>::advance_impl(i, n);
+	}
+
+	template <typename RandomAccessIterator, typename Distance>
+	inline void
+	advance_impl(RandomAccessIterator& i, Distance n, EASTL_ITC_NS::random_access_iterator_tag)
+	{
+		i += n;
+	}
+
+	// Special version defined so that std C++ iterators can be recognized by
+	// this function. Unfortunately, this function treats all foreign iterators
+	// as InputIterators and thus can seriously hamper performance in the case
+	// of large ranges of bidirectional_iterator_tag iterators.
+	//template <typename InputIterator, typename Distance>
+	//inline void
+	//advance_impl(InputIterator& i, Distance n, ...)
+	//{
+	//    while(n--)
+	//        ++i;
+	//}
+
+	template <typename InputIterator, typename Distance>
+	inline void
+	advance(InputIterator& i, Distance n)
+	{
+		typedef typename eastl::iterator_traits<InputIterator>::iterator_category IC;
+
+		eastl::advance_impl(i, n, IC());
+	}
+
+
+	// eastl::next / eastl::prev
+	// Return the nth/-nth successor of iterator it.
+	//
+	// http://en.cppreference.com/w/cpp/iterator/next
+	//
+	template<typename InputIterator>
+	inline InputIterator
+	next(InputIterator it, typename eastl::iterator_traits<InputIterator>::difference_type n = 1)
+	{
+		eastl::advance(it, n);
+		return it;
+	}
+
+	template<typename InputIterator>
+	inline InputIterator
+	prev(InputIterator it, typename eastl::iterator_traits<InputIterator>::difference_type n = 1)
+	{
+		eastl::advance(it, -n);
+		return it;
+	}
+
+
+#if defined(EA_COMPILER_CPP11_ENABLED) && EA_COMPILER_CPP11_ENABLED
+
+	// eastl::data
+	//
+	// http://en.cppreference.com/w/cpp/iterator/data
+	//
+	template <class Container>
+	EA_CPP14_CONSTEXPR auto data(Container& c) -> decltype(c.data())
+		{ return c.data(); }
+
+	template <class Container>
+	EA_CPP14_CONSTEXPR auto data(const Container& c) -> decltype(c.data())
+		{ return c.data(); }
+
+	template <class T, size_t N>
+	EA_CPP14_CONSTEXPR T* data(T(&array)[N]) EA_NOEXCEPT
+		{ return array; }
+
+	template <class E>
+	EA_CPP14_CONSTEXPR const E* data(std::initializer_list<E> il) EA_NOEXCEPT
+		{ return il.begin(); }
+
+
+	// eastl::size
+	//
+	// http://en.cppreference.com/w/cpp/iterator/size
+	//
+	template <class C>
+	EA_CPP14_CONSTEXPR auto size(const C& c) -> decltype(c.size())
+		{ return c.size(); }
+
+	template <class T, size_t N>
+	EA_CPP14_CONSTEXPR size_t size(const T (&)[N]) EA_NOEXCEPT
+		{ return N; }
+
+
+	// eastl::ssize
+	//
+	// https://en.cppreference.com/w/cpp/iterator/size
+	//
+	template <class T, ptrdiff_t N>
+	EA_CPP14_CONSTEXPR ptrdiff_t ssize(const T(&)[N]) EA_NOEXCEPT
+		{ return N; }
+
+	template <class C>
+	EA_CPP14_CONSTEXPR auto ssize(const C& c)
+	    -> eastl::common_type_t<ptrdiff_t, eastl::make_signed_t<decltype(c.size())>>
+	{
+		using R = eastl::common_type_t<ptrdiff_t, eastl::make_signed_t<decltype(c.size())>>;
+		return static_cast<R>(c.size());
+	}
+
+
+	// eastl::empty
+	//
+	// http://en.cppreference.com/w/cpp/iterator/empty
+	//
+	template <class Container>
+	EA_CPP14_CONSTEXPR auto empty(const Container& c) -> decltype(c.empty())
+		{ return c.empty(); }
+
+	template <class T, size_t N>
+	EA_CPP14_CONSTEXPR bool empty(const T (&)[N]) EA_NOEXCEPT
+		{ return false; }
+
+	template <class E>
+	EA_CPP14_CONSTEXPR bool empty(std::initializer_list<E> il) EA_NOEXCEPT
+		{ return il.size() == 0; }
+
+#endif // defined(EA_COMPILER_CPP11_ENABLED) && EA_COMPILER_CPP11_ENABLED
+
+
+	// eastl::begin / eastl::end
+	// http://en.cppreference.com/w/cpp/iterator/begin
+	//
+	// In order to enable eastl::begin and eastl::end, the compiler needs to have conforming support
+	// for argument-dependent lookup if it supports C++11 range-based for loops. The reason for this is
+	// that in C++11 range-based for loops result in usage of std::begin/std::end, but allow that to
+	// be overridden by argument-dependent lookup:
+	//     C++11 Standard, section 6.5.4, paragraph 1.
+	//     "otherwise, begin-expr and end-expr are begin(__range) and end(__range), respectively,
+	//      where begin and end are looked up with argument-dependent lookup (3.4.2). For the
+	//      purposes of this name lookup, namespace std is an associated namespace."
+	// It turns out that one compiler has a problem: GCC 4.6. That version added support for
+	// range-based for loops but has broken argument-dependent lookup which was fixed in GCC 4.7.
+	//
+	#if (defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION == 4006))
+		#define EASTL_BEGIN_END_ENABLED 0
+	#else
+		#define EASTL_BEGIN_END_ENABLED 1
+	#endif
+
+	#if EASTL_BEGIN_END_ENABLED
+		template <typename Container>
+		EA_CPP14_CONSTEXPR inline auto begin(Container& container) -> decltype(container.begin())
+		{
+			return container.begin();
+		}
+
+		template <typename Container>
+		EA_CPP14_CONSTEXPR inline auto begin(const Container& container) -> decltype(container.begin())
+		{
+			return container.begin();
+		}
+
+		template <typename Container>
+		EA_CPP14_CONSTEXPR inline auto cbegin(const Container& container) -> decltype(container.begin())
+		{
+			return container.begin();
+		}
+
+		template <typename Container>
+		EA_CPP14_CONSTEXPR inline auto end(Container& container) -> decltype(container.end())
+		{
+			return container.end();
+		}
+
+		template <typename Container>
+		EA_CPP14_CONSTEXPR inline auto end(const Container& container) -> decltype(container.end())
+		{
+			return container.end();
+		}
+
+		template <typename Container>
+		EA_CPP14_CONSTEXPR inline auto cend(const Container& container) -> decltype(container.end())
+		{
+			return container.end();
+		}
+
+		template <typename Container>
+		EA_CPP14_CONSTEXPR inline auto rbegin(Container& container) -> decltype(container.rbegin())
+		{
+			return container.rbegin();
+		}
+
+		template <typename Container>
+		EA_CPP14_CONSTEXPR inline auto rbegin(const Container& container) -> decltype(container.rbegin())
+		{
+			return container.rbegin();
+		}
+
+		template <typename Container>
+		EA_CPP14_CONSTEXPR inline auto rend(Container& container) -> decltype(container.rend())
+		{
+			return container.rend();
+		}
+
+		template <typename Container>
+		EA_CPP14_CONSTEXPR inline auto rend(const Container& container) -> decltype(container.rend())
+		{
+			return container.rend();
+		}
+
+		template <typename Container>
+		EA_CPP14_CONSTEXPR inline auto crbegin(const Container& container) -> decltype(eastl::rbegin(container))
+		{
+			return container.rbegin();
+		}
+
+		template <typename Container>
+		EA_CPP14_CONSTEXPR inline auto crend(const Container& container) -> decltype(eastl::rend(container))
+		{
+			return container.rend();
+		}
+
+		template<typename T, size_t arraySize>
+		EA_CPP14_CONSTEXPR inline T* begin(T (&arrayObject)[arraySize])
+		{
+			return arrayObject;
+		}
+
+		template<typename T, size_t arraySize>
+		EA_CPP14_CONSTEXPR inline T* end(T (&arrayObject)[arraySize])
+		{
+			return (arrayObject + arraySize);
+		}
+
+		template <typename T, size_t arraySize>
+		EA_CPP14_CONSTEXPR inline reverse_iterator<T*> rbegin(T (&arrayObject)[arraySize])
+		{
+			return reverse_iterator<T*>(arrayObject + arraySize);
+		}
+
+		template <typename T, size_t arraySize>
+		EA_CPP14_CONSTEXPR inline reverse_iterator<T*> rend(T (&arrayObject)[arraySize])
+		{
+			return reverse_iterator<T*>(arrayObject);
+		}
+
+		template <typename E>
+		EA_CPP14_CONSTEXPR inline reverse_iterator<const E*> rbegin(std::initializer_list<E> ilist)
+		{
+			return eastl::reverse_iterator<const E*>(ilist.end());
+		}
+
+		template <typename E>
+		EA_CPP14_CONSTEXPR inline reverse_iterator<const E*> rend(std::initializer_list<E> ilist)
+		{
+			return eastl::reverse_iterator<const E*>(ilist.begin());
+		}
+
+		template <typename Iterator>
+		EA_CPP14_CONSTEXPR reverse_iterator<Iterator> make_reverse_iterator(Iterator i)
+			{ return reverse_iterator<Iterator>(i); }
+
+	#endif // EASTL_BEGIN_END_ENABLED
+
+} // namespace eastl
+
+
+
+// Some compilers (e.g. GCC 4.6) support range-based for loops, but have a bug with
+// respect to argument-dependent lookup which results on them unilaterally using std::begin/end
+// with range-based for loops. To work around this we #include <iterator> for this case in
+// order to make std::begin/end visible to users of <eastl/iterator.h>, for portability.
+#if !EASTL_BEGIN_END_ENABLED && !defined(EA_COMPILER_NO_RANGE_BASED_FOR_LOOP)
+	#include <iterator>
+#endif
+
+
+
+EA_RESTORE_VC_WARNING();
+EA_RESTORE_VC_WARNING();
+
+#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/linked_array.h b/libkram/eastl/include/EASTL/linked_array.h
new file mode 100644
index 00000000..88d99146
--- /dev/null
+++ b/libkram/eastl/include/EASTL/linked_array.h
@@ -0,0 +1,336 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// This class implements a linked_array template, which is an array version
+// of linked_ptr. See linked_ptr for detailed documentation.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_LINKED_ARRAY_H
+#define EASTL_LINKED_ARRAY_H
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/internal/smart_ptr.h>   // Defines smart_array_deleter
+#include <EASTL/linked_ptr.h>           // Defines linked_ptr_base 
+#include <stddef.h>                     // Definition of ptrdiff_t
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+
+	/// class linked_array
+	///
+	/// This class implements a linked_array template, which is an array version
+	/// of linked_ptr. See linked_ptr for detailed documentation.
+	///
+	template <typename T, typename Deleter = smart_array_deleter<T> >
+	class linked_array
+	{
+	
+	protected:
+	
+		/// this_type
+		/// This is an alias for linked_array<T>, this class.
+		typedef linked_array<T> this_type;
+
+		/// deleter_type
+		typedef Deleter deleter_type;
+
+		T* mpArray;
+		mutable const this_type* mpPrev;
+		mutable const this_type* mpNext;
+
+		void link(const linked_array& linkedArray)
+		{   // This code can only be called when we are in a reset state.
+			// assert(!mpArray && (mpNext == mpPrev));
+			mpNext             = linkedArray.mpNext;
+			mpNext->mpPrev     = this;
+			mpPrev             = &linkedArray;
+			linkedArray.mpNext = this;
+		}
+
+	public:
+		/// element_type
+		/// Synonym for type T, useful for external code to reference the 
+		/// type in a generic way.
+		typedef T element_type;
+
+
+		/// linked_array
+		/// Takes ownership of the pointer. It is OK if the input pointer is null.
+		explicit linked_array(T* pArray = NULL) 
+			: mpArray(pArray)
+		{
+			mpPrev = mpNext = this;
+		}
+
+
+		/// linked_array
+		/// Shares ownership of a pointer with another instance of linked_array.
+		linked_array(const linked_array& linkedArray)
+			: mpArray(linkedArray.mpArray)
+		{
+			if(mpArray)
+				link(linkedArray);
+			else
+				mpPrev = mpNext = this;
+		}
+
+
+		/// ~linked_array
+		/// Removes this object from the of objects using the shared pointer.
+		/// If this object is the last owner of the shared pointer, the shared 
+		/// pointer is deleted.
+		~linked_array() 
+		{
+			reset();
+		}
+
+
+		/// operator=
+		/// Copies another linked_array to this object. Note that this object
+		/// may already own a shared pointer with another different pointer
+		/// (but still of the same type) before this call. In that case,
+		/// this function removes ownership of the old pointer and takes shared 
+		/// ownership of the new pointer and increments its reference count.
+		linked_array& operator=(const linked_array& linkedArray)
+		{
+			if(linkedArray.mpArray != mpArray)
+			{
+				reset(linkedArray.mpArray);
+				if(linkedArray.mpArray)
+					link(linkedArray);
+			}
+			return *this;
+		}
+
+
+		/// operator=
+		/// Assigns a new pointer. If the new pointer is equivalent
+		/// to the current pointer, nothing is done. Otherwise the 
+		/// current pointer is unlinked and possibly destroyed.
+		/// The new pointer can be NULL.
+		linked_array& operator=(T* pArray)
+		{
+			reset(pArray);
+			return *this;
+		}
+
+
+		/// reset
+		/// Releases the owned pointer and takes ownership of the 
+		/// passed in pointer. If the passed in pointer is the same
+		/// as the owned pointer, nothing is done. The passed in pointer
+		/// can be null, in which case the use count is set to 1.
+		void reset(T* pArray = NULL)
+		{
+			if(pArray != mpArray)
+			{
+				if(unique())
+				{
+					deleter_type del;
+					del(mpArray);
+				}
+				else
+				{
+					mpPrev->mpNext  = mpNext;
+					mpNext->mpPrev  = mpPrev;
+					mpPrev = mpNext = this;
+				}
+				mpArray = pArray;
+			}
+		}
+
+
+		/// swap
+		/// Exchanges the owned pointer beween two linkedArray objects.
+		///
+		/// This function is disabled as it is currently deemed unsafe.
+		/// The problem is that the only way to implement this function
+		/// is to transfer pointers between the objects; you cannot 
+		/// transfer the linked list membership between the objects. 
+		/// Thus unless both linked_array objects were 'unique()', the 
+		/// shared pointers would be duplicated amongst containers, 
+		/// resulting in a crash.
+		//void swap(linked_array& linkedArray)
+		//{
+		//    if(linkedArray.mpArray != mpArray)
+		//    {   // This is only safe if both linked_arrays are unique().
+		//        linkedArray::element_type* const pArrayTemp = linkedArray.mpArray;
+		//        linkedArray.reset(mpArray);
+		//        reset(pArrayTemp);
+		//    }
+		//}
+
+
+		/// operator[]
+		/// Returns a reference to the specified item in the owned pointer array. 
+		T& operator[](ptrdiff_t i) const
+		{
+			// assert(mpArray && (i >= 0));
+			return mpArray[i];
+		}
+
+
+		/// operator*
+		/// Returns the owner pointer dereferenced.
+		T& operator*() const
+		{
+			return *mpArray;
+		}
+
+
+		/// operator->
+		/// Allows access to the owned pointer via operator->()
+		T* operator->() const
+		{
+			return mpArray;
+		}
+
+
+		/// get
+		/// Returns the owned pointer. Note that this class does 
+		/// not provide an operator T() function. This is because such
+		/// a thing (automatic conversion) is deemed unsafe.
+		T* get() const
+		{
+			return mpArray;
+		}
+
+
+		/// use_count
+		/// Returns the use count of the shared pointer.
+		/// The return value is one if the owned pointer is null.
+		/// This function is provided for compatibility with the 
+		/// proposed C++ standard and for debugging purposes. It is not
+		/// intended for runtime use given that its execution time is
+		/// not constant.
+		int use_count() const
+		{
+			int useCount(1);
+			
+			for(const linked_ptr_base* pCurrent = this; pCurrent->mpNext != this; pCurrent = pCurrent->mpNext)
+				++useCount;
+
+			return useCount;
+		}
+
+
+		/// unique
+		/// Returns true if the use count of the owned pointer is one.
+		/// The return value is true if the owned pointer is null.
+		bool unique() const
+		{
+			return (mpNext == this);
+		}
+
+
+		/// Implicit operator bool
+		/// Allows for using a linked_array as a boolean. 
+		/// Note that below we do not use operator bool(). The reason for this
+		/// is that booleans automatically convert up to short, int, float, etc.
+		/// The result is that this: if(linkedArray == 1) would yield true (bad).
+		typedef T* (this_type::*bool_)() const;
+		operator bool_() const
+		{
+			if(mpArray)
+				return &this_type::get;
+			return NULL;
+		}
+
+
+		/// operator!
+		/// This returns the opposite of operator bool; it returns true if 
+		/// the owned pointer is null. Some compilers require this and some don't.
+		bool operator!()
+		{
+			return (mpArray == NULL);
+		}
+
+
+		/// force_delete
+		/// Forces deletion of the shared pointer. Fixes all references to the 
+		/// pointer by any other owners to be NULL.
+		void force_delete()
+		{
+			T* const pArray = mpArray;
+
+			this_type* p = this;
+			do
+			{
+				this_type* const pNext = const_cast<this_type*>(p->mpNext);
+				p->mpArray = NULL;
+				p->mpNext  = p->mpPrev = p;
+				p = pNext;
+			}
+			while(p != this);
+
+			deleter_type del;
+			del(pArray);
+		}
+
+	}; // class linked_array
+
+
+
+	/// get_pointer
+	/// Returns linked_array::get() via the input linked_array. Provided for compatibility
+	/// with certain well-known libraries that use this functionality.
+	template <typename T>
+	inline T* get_pointer(const linked_array<T>& linkedArray)
+	{
+		return linkedArray.get();
+	}
+
+
+	/// operator==
+	/// Compares two linked_array objects for equality. Equality is defined as 
+	/// being true when the pointer shared between two linked_array objects is equal.
+	template <typename T, typename TD, typename U, typename UD>
+	inline bool operator==(const linked_array<T, TD>& linkedArray1, const linked_array<U, UD>& linkedArray2)
+	{
+		return (linkedArray1.get() == linkedArray2.get());
+	}
+
+
+	/// operator!=
+	/// Compares two linked_array objects for inequality. Equality is defined as 
+	/// being true when the pointer shared between two linked_array objects is equal.
+	template <typename T, typename TD, typename U, typename UD>
+	inline bool operator!=(const linked_array<T, TD>& linkedArray1, const linked_array<U, UD>& linkedArray2)
+	{
+		return (linkedArray1.get() != linkedArray2.get());
+	}
+
+
+	/// operator<
+	/// Returns which linked_array is 'less' than the other. Useful when storing
+	/// sorted containers of linked_array objects.
+	template <typename T, typename TD, typename U, typename UD>
+	inline bool operator<(const linked_array<T, TD>& linkedArray1, const linked_array<U, UD>& linkedArray2)
+	{
+		return (linkedArray1.get() < linkedArray2.get());
+	}
+
+
+} // namespace eastl
+
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/linked_ptr.h b/libkram/eastl/include/EASTL/linked_ptr.h
new file mode 100644
index 00000000..f57681a9
--- /dev/null
+++ b/libkram/eastl/include/EASTL/linked_ptr.h
@@ -0,0 +1,426 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_LINKED_PTR_H
+#define EASTL_LINKED_PTR_H
+
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/internal/smart_ptr.h>   // Defines smart_ptr_deleter
+#include <EASTL/allocator.h>
+#include <stddef.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+
+	/// linked_ptr_base
+	///
+	/// This class allows linked_ptr<T> and linked_ptr<U> to share the same 
+	/// base nodes and thus be in the same linked list.
+	///
+	struct linked_ptr_base
+	{
+		mutable linked_ptr_base* mpPrev;
+		mutable linked_ptr_base* mpNext;
+	};
+
+
+	/// linked_ptr
+	///
+	/// This class implements a linked_ptr template. A linked_ptr is like the C++
+	/// Standard Library auto_ptr except that it allows sharing of pointers between
+	/// instances of auto_ptr via reference counting. linked_ptr objects can safely
+	/// be copied and can safely be used in C++ Standard Library containers such
+	/// as std::vector or std::list. This implementation, however, is not thread-safe.
+	/// you would need to use a separate linked_ptr_mt (multi-threaded) to get 
+	/// thread safety.
+	///
+	/// linked_ptr is a variation of shared_ptr (a.k.a. counted_ptr) which differs 
+	/// in that instead of being implemented by a shared integer stored on the heap, 
+	/// it is implemented by linked list stored within the linked_ptr object itself.
+	/// The result is that no memory is explicitly allocated from the heap, though
+	/// the cost of each linked_ptr object is 12 bytes of memory (32 bit machine)
+	/// instead of 4 bytes for the case of shared_ptr (depending on the heap).
+	///
+	template <typename T, typename Deleter = smart_ptr_deleter<T> >
+	class linked_ptr : public linked_ptr_base
+	{
+	protected:
+		template <typename U, typename D> friend class linked_ptr;
+
+		/// this_type
+		/// This is an alias for linked_ptr<T>, this class.
+		typedef linked_ptr<T> this_type;
+
+		/// deleter_type
+		typedef Deleter deleter_type;
+
+		T* mpValue; /// The owned pointer.
+
+		template <typename U, typename D>
+		void link(const linked_ptr<U, D>& linkedPtr)
+		{   // This code can only be called when we are in a reset state.
+			// assert(!mpValue && (mpNext == mpPrev));
+			mpNext           = linkedPtr.mpNext;
+			mpNext->mpPrev   = this;
+			mpPrev           = const_cast<linked_ptr<U, D>*>(&linkedPtr);
+			linkedPtr.mpNext = this;
+		}
+
+	public:
+		/// element_type
+		/// Synonym for type T, useful for external code to reference the 
+		/// type in a generic way.
+		typedef T element_type;
+
+
+		/// linked_ptr
+		/// Default constructor.
+		linked_ptr() 
+			: mpValue(NULL)
+		{
+			mpPrev = mpNext = this;
+		}
+
+
+		/// linked_ptr
+		/// Takes ownership of the pointer. It is OK if the input pointer is null.
+		template <typename U>
+		explicit linked_ptr(U* pValue) 
+			: mpValue(pValue)
+		{
+			mpPrev = mpNext = this;
+		}
+
+
+		/// linked_ptr
+		/// Construction with self type.
+		/// If we want a shared_ptr constructor that is templated on linked_ptr<U>,
+		/// then we need to make it in addition to this function, as otherwise 
+		/// the compiler will generate this function and things will go wrong.
+		linked_ptr(const linked_ptr& linkedPtr)
+			: mpValue(linkedPtr.mpValue)
+		{
+			if(mpValue)
+				link(linkedPtr);
+			else
+				mpPrev = mpNext = this;
+		}
+
+
+		/// linked_ptr
+		/// Shares ownership of a pointer with another instance of linked_ptr.
+		template <typename U, typename D>
+		linked_ptr(const linked_ptr<U, D>& linkedPtr)
+			: mpValue(linkedPtr.mpValue)
+		{
+			if(mpValue)
+				link(linkedPtr);
+			else
+				mpPrev = mpNext = this;
+		}
+
+
+		/// ~linked_ptr
+		/// Removes this object from the of objects using the shared pointer.
+		/// If this object is the last owner of the shared pointer, the shared 
+		/// pointer is deleted.
+		~linked_ptr() 
+		{
+			reset();
+		}
+
+
+		/// operator=
+		/// If we want a shared_ptr operator= that is templated on linked_ptr<U>,
+		/// then we need to make it in addition to this function, as otherwise 
+		/// the compiler will generate this function and things will go wrong.
+		linked_ptr& operator=(const linked_ptr& linkedPtr)
+		{
+			if(linkedPtr.mpValue != mpValue)
+			{
+				reset(linkedPtr.mpValue);
+				if(linkedPtr.mpValue)
+					link(linkedPtr);
+			}
+			return *this;
+		}
+
+
+		/// operator=
+		/// Copies another linked_ptr to this object. Note that this object
+		/// may already own a shared pointer with another different pointer
+		/// (but still of the same type) before this call. In that case,
+		/// this function removes ownership of the old pointer and takes shared 
+		/// ownership of the new pointer and increments its reference count.
+		template <typename U, typename D>
+		linked_ptr& operator=(const linked_ptr<U, D>& linkedPtr)
+		{
+			if(linkedPtr.mpValue != mpValue)
+			{
+				reset(linkedPtr.mpValue);
+				if(linkedPtr.mpValue)
+					link(linkedPtr);
+			}
+			return *this;
+		}
+
+
+		/// operator=
+		/// Assigns a new pointer. If the new pointer is equivalent
+		/// to the current pointer, nothing is done. Otherwise the 
+		/// current pointer is unlinked and possibly destroyed.
+		/// The new pointer can be NULL.
+		template <typename U>
+		linked_ptr& operator=(U* pValue)
+		{
+			reset(pValue);
+			return *this;
+		}
+
+
+		/// reset
+		/// Releases the owned pointer and takes ownership of the 
+		/// passed in pointer. If the passed in pointer is the same
+		/// as the owned pointer, nothing is done. The passed in pointer
+		/// can be NULL, in which case the use count is set to 1.
+		template <typename U>
+		void reset(U* pValue)
+		{
+			if(pValue != mpValue)
+			{
+				if(unique())
+				{
+					deleter_type del;
+					del(mpValue);
+				}
+				else
+				{
+					mpPrev->mpNext  = mpNext;
+					mpNext->mpPrev  = mpPrev;
+					mpPrev = mpNext = this;
+				}
+				mpValue = pValue;
+			}
+		}
+
+
+		/// reset
+		/// Resets the container with NULL. If the current pointer
+		/// is non-NULL, it is unlinked and possibly destroyed.
+		void reset()
+		{
+			reset((T*)NULL);
+		}
+
+
+		/// swap
+		/// Exchanges the owned pointer beween two linkedPtr objects.
+		///
+		/// This function is disabled as it is currently deemed unsafe.
+		/// The problem is that the only way to implement this function
+		/// is to transfer pointers between the objects; you cannot 
+		/// transfer the linked list membership between the objects. 
+		/// Thus unless both linked_ptr objects were 'unique()', the 
+		/// shared pointers would be duplicated amongst containers, 
+		/// resulting in a crash.
+		//template <typename U, typename D>
+		//void swap(linked_ptr<U, D>& linkedPtr)
+		//{
+		//    if(linkedPtr.mpValue != mpValue)
+		//    {   // This is only safe if both linked_ptrs are unique().
+		//        linkedPtr::element_type* const pValueTemp = linkedPtr.mpValue;
+		//        linkedPtr.reset(mpValue);
+		//        reset(pValueTemp);
+		//    }
+		//}
+
+
+		/// operator*
+		/// Returns the owner pointer dereferenced.
+		T& operator*() const
+		{
+			return *mpValue;
+		}
+
+
+		/// operator->
+		/// Allows access to the owned pointer via operator->()
+		T* operator->() const
+		{
+			return mpValue;
+		}
+
+
+		/// get
+		/// Returns the owned pointer. Note that this class does 
+		/// not provide an operator T() function. This is because such
+		/// a thing (automatic conversion) is deemed unsafe.
+		T* get() const
+		{
+			return mpValue;
+		}
+
+
+		/// use_count
+		/// Returns the use count of the shared pointer.
+		/// The return value is one if the owned pointer is null.
+		/// This function is provided for compatibility with the 
+		/// proposed C++ standard and for debugging purposes. It is not
+		/// intended for runtime use given that its execution time is
+		/// not constant.
+		int use_count() const
+		{
+			int useCount(1);
+			
+			for(const linked_ptr_base* pCurrent = static_cast<const linked_ptr_base*>(this); 
+					pCurrent->mpNext != static_cast<const linked_ptr_base*>(this); pCurrent = pCurrent->mpNext)
+				++useCount;
+
+			return useCount;
+		}
+
+
+		/// unique
+		/// Returns true if the use count of the owned pointer is one.
+		/// The return value is true if the owned pointer is null.
+		bool unique() const
+		{
+			return (mpNext == static_cast<const linked_ptr_base*>(this));
+		}
+
+
+		/// Implicit operator bool
+		/// Allows for using a linked_ptr as a boolean. 
+		/// Note that below we do not use operator bool(). The reason for this
+		/// is that booleans automatically convert up to short, int, float, etc.
+		/// The result is that this: if(linkedPtr == 1) would yield true (bad).
+		typedef T* (this_type::*bool_)() const;
+		operator bool_() const
+		{
+			if(mpValue)
+				return &this_type::get;
+			return NULL;
+		}
+
+
+		/// operator!
+		/// This returns the opposite of operator bool; it returns true if 
+		/// the owned pointer is null. Some compilers require this and some don't.
+		bool operator!()
+		{
+			return (mpValue == NULL);
+		}
+
+
+		/// detach
+		/// Returns ownership of the pointer to the caller.  Fixes all
+		/// references to the pointer by any other owners to be NULL.
+		/// This function can work properly only if all entries in the list 
+		/// refer to type T and none refer to any other type (e.g. U).
+		T* detach()
+		{
+			T* const pValue = mpValue;
+
+			linked_ptr_base* p = this;
+			do
+			{
+				linked_ptr_base* const pNext = p->mpNext;
+				static_cast<this_type*>(p)->mpValue = NULL;
+				p->mpNext = p->mpPrev = p;
+				p = pNext;
+			}
+			while(p != this);
+
+			return pValue;
+		}
+
+		/// force_delete
+		/// Forces deletion of the shared pointer. Fixes all references to the 
+		/// pointer by any other owners to be NULL.
+		/// This function can work properly only if all entries in the list 
+		/// refer to type T and none refer to any other type (e.g. U).
+		void force_delete()
+		{
+			T* const pValue = detach();
+			Deleter del;
+			del(pValue);
+		}
+
+	}; // class linked_ptr
+
+
+
+	/// get_pointer
+	/// Returns linked_ptr::get() via the input linked_ptr. Provided for compatibility
+	/// with certain well-known libraries that use this functionality.
+	template <typename T, typename D>
+	inline T* get_pointer(const linked_ptr<T, D>& linkedPtr)
+	{
+		return linkedPtr.get();
+	}
+
+
+	/// operator==
+	/// Compares two linked_ptr objects for equality. Equality is defined as 
+	/// being true when the pointer shared between two linked_ptr objects is equal.
+	template <typename T, typename TD, typename U, typename UD>
+	inline bool operator==(const linked_ptr<T, TD>& linkedPtr1, const linked_ptr<U, UD>& linkedPtr2)
+	{
+		return (linkedPtr1.get() == linkedPtr2.get());
+	}
+
+
+	/// operator!=
+	/// Compares two linked_ptr objects for inequality. Equality is defined as 
+	/// being true when the pointer shared between two linked_ptr objects is equal.
+	template <typename T, typename TD, typename U, typename UD>
+	inline bool operator!=(const linked_ptr<T, TD>& linkedPtr1, const linked_ptr<U, UD>& linkedPtr2)
+	{
+		return (linkedPtr1.get() != linkedPtr2.get());
+	}
+
+
+	/// operator<
+	/// Returns which linked_ptr is 'less' than the other. Useful when storing
+	/// sorted containers of linked_ptr objects.
+	template <typename T, typename TD, typename U, typename UD>
+	inline bool operator<(const linked_ptr<T, TD>& linkedPtr1, const linked_ptr<U, UD>& linkedPtr2)
+	{
+		return (linkedPtr1.get() < linkedPtr2.get());
+	}
+
+
+} // namespace eastl
+
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/list.h b/libkram/eastl/include/EASTL/list.h
new file mode 100644
index 00000000..680dcad7
--- /dev/null
+++ b/libkram/eastl/include/EASTL/list.h
@@ -0,0 +1,2168 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// This file implements a doubly-linked list, much like the C++ std::list class.
+// The primary distinctions between this list and std::list are:
+//    - list doesn't implement some of the less-frequently used functions 
+//      of std::list. Any required functions can be added at a later time.
+//    - list has a couple extension functions that increase performance.
+//    - list can contain objects with alignment requirements. std::list cannot
+//      do so without a bit of tedious non-portable effort.
+//    - list has optimizations that don't exist in the STL implementations 
+//      supplied by library vendors for our targeted platforms.
+//    - list supports debug memory naming natively.
+//    - list::size() by default is not a constant time function, like the list::size 
+//      in some std implementations such as STLPort and SGI STL but unlike the 
+//      list in Dinkumware and Metrowerks. The EASTL_LIST_SIZE_CACHE option can change this.
+//    - list provides a guaranteed portable node definition that allows users
+//      to write custom fixed size node allocators that are portable.
+//    - list is easier to read, debug, and visualize.
+//    - list is savvy to an environment that doesn't have exception handling,
+//      as is sometimes the case with console or embedded environments.
+//    - list has less deeply nested function calls and allows the user to 
+//      enable forced inlining in debug builds in order to reduce bloat.
+//    - list doesn't keep a member size variable. This means that list is 
+//      smaller than std::list (depends on std::list) and that for most operations
+//      it is faster than std::list. However, the list::size function is slower.
+//    - list::size_type is defined as eastl_size_t instead of size_t in order to 
+//      save memory and run faster on 64 bit systems.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_LIST_H
+#define EASTL_LIST_H
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/allocator.h>
+#include <EASTL/type_traits.h>
+#include <EASTL/iterator.h>
+#include <EASTL/algorithm.h>
+#include <EASTL/initializer_list.h>
+#include <EASTL/bonus/compressed_pair.h>
+
+EA_DISABLE_ALL_VC_WARNINGS()
+#include <new>
+#include <stddef.h>
+EA_RESTORE_ALL_VC_WARNINGS()
+
+
+// 4530 - C++ exception handler used, but unwind semantics are not enabled. Specify /EHsc
+// 4345 - Behavior change: an object of POD type constructed with an initializer of the form () will be default-initialized
+// 4571 - catch(...) semantics changed since Visual C++ 7.1; structured exceptions (SEH) are no longer caught.
+// 4623 - default constructor was implicitly defined as deleted
+EA_DISABLE_VC_WARNING(4530 4345 4571 4623);
+
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+
+	/// EASTL_LIST_DEFAULT_NAME
+	///
+	/// Defines a default container name in the absence of a user-provided name.
+	///
+	#ifndef EASTL_LIST_DEFAULT_NAME
+		#define EASTL_LIST_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " list" // Unless the user overrides something, this is "EASTL list".
+	#endif
+
+
+	/// EASTL_LIST_DEFAULT_ALLOCATOR
+	///
+	#ifndef EASTL_LIST_DEFAULT_ALLOCATOR
+		#define EASTL_LIST_DEFAULT_ALLOCATOR allocator_type(EASTL_LIST_DEFAULT_NAME)
+	#endif
+
+
+
+	/// ListNodeBase
+	///
+	/// We define a ListNodeBase separately from ListNode (below), because it allows
+	/// us to have non-templated operations such as insert, remove (below), and it 
+	/// makes it so that the list anchor node doesn't carry a T with it, which would
+	/// waste space and possibly lead to surprising the user due to extra Ts existing
+	/// that the user didn't explicitly create. The downside to all of this is that 
+	/// it makes debug viewing of a list harder, given that the node pointers are of 
+	/// type ListNodeBase and not ListNode. However, see ListNodeBaseProxy below.
+	///
+	struct ListNodeBase
+	{
+		ListNodeBase* mpNext;
+		ListNodeBase* mpPrev;
+
+		void        insert(ListNodeBase* pNext) EA_NOEXCEPT;                                // Inserts this standalone node before the node pNext in pNext's list. 
+		void        remove() EA_NOEXCEPT;                                                   // Removes this node from the list it's in. Leaves this node's mpNext/mpPrev invalid.
+		void        splice(ListNodeBase* pFirst, ListNodeBase* pLast) EA_NOEXCEPT;          // Removes [pFirst,pLast) from the list it's in and inserts it before this in this node's list.
+		void        reverse() EA_NOEXCEPT;                                                  // Reverses the order of nodes in the circular list this node is a part of.
+		static void swap(ListNodeBase& a, ListNodeBase& b) EA_NOEXCEPT;                     // Swaps the nodes a and b in the lists to which they belong.
+
+		void        insert_range(ListNodeBase* pFirst, ListNodeBase* pFinal) EA_NOEXCEPT;   // Differs from splice in that first/final aren't in another list.
+		static void remove_range(ListNodeBase* pFirst, ListNodeBase* pFinal) EA_NOEXCEPT;   // 
+	} EASTL_LIST_PROXY_MAY_ALIAS;
+
+
+	#if EASTL_LIST_PROXY_ENABLED
+
+		/// ListNodeBaseProxy
+		///
+		/// In debug builds, we define ListNodeBaseProxy to be the same thing as
+		/// ListNodeBase, except it is templated on the parent ListNode class.
+		/// We do this because we want users in debug builds to be able to easily
+		/// view the list's contents in a debugger GUI. We do this only in a debug
+		/// build for the reasons described above: that ListNodeBase needs to be
+		/// as efficient as possible and not cause code bloat or extra function 
+		/// calls (inlined or not).
+		///
+		/// ListNodeBaseProxy *must* be separate from its parent class ListNode 
+		/// because the list class must have a member node which contains no T value.
+		/// It is thus incorrect for us to have one single ListNode class which
+		/// has mpNext, mpPrev, and mValue. So we do a recursive template trick in 
+		/// the definition and use of SListNodeBaseProxy.
+		///
+		template <typename LN>
+		struct ListNodeBaseProxy
+		{
+			LN* mpNext;
+			LN* mpPrev;
+		};
+
+		template <typename T>
+		struct ListNode : public ListNodeBaseProxy< ListNode<T> >
+		{
+			T mValue;
+		};
+
+	#else
+
+		EA_DISABLE_VC_WARNING(4625 4626)
+		template <typename T>
+		struct ListNode : public ListNodeBase
+		{
+			T mValue;
+		};
+		EA_RESTORE_VC_WARNING()
+
+	#endif
+
+
+
+
+	/// ListIterator
+	///
+	template <typename T, typename Pointer, typename Reference>
+	struct ListIterator
+	{
+		typedef ListIterator<T, Pointer, Reference>         this_type;
+		typedef ListIterator<T, T*, T&>                     iterator;
+		typedef ListIterator<T, const T*, const T&>         const_iterator;
+		typedef eastl_size_t                                size_type;     // See config.h for the definition of eastl_size_t, which defaults to size_t.
+		typedef ptrdiff_t                                   difference_type;
+		typedef T                                           value_type;
+		typedef ListNode<T>                                 node_type;
+		typedef Pointer                                     pointer;
+		typedef Reference                                   reference;
+		typedef EASTL_ITC_NS::bidirectional_iterator_tag    iterator_category;
+
+	public:
+		node_type* mpNode;
+
+	public:
+		ListIterator() EA_NOEXCEPT;
+		ListIterator(const ListNodeBase* pNode) EA_NOEXCEPT;
+		ListIterator(const iterator& x) EA_NOEXCEPT;
+
+		this_type next() const EA_NOEXCEPT;
+		this_type prev() const EA_NOEXCEPT;
+
+		reference operator*() const EA_NOEXCEPT;
+		pointer   operator->() const EA_NOEXCEPT;
+
+		this_type& operator++() EA_NOEXCEPT;
+		this_type  operator++(int) EA_NOEXCEPT;
+
+		this_type& operator--() EA_NOEXCEPT;
+		this_type  operator--(int) EA_NOEXCEPT;
+
+	}; // ListIterator
+
+
+
+
+	/// ListBase
+	///
+	/// See VectorBase (class vector) for an explanation of why we 
+	/// create this separate base class.
+	///
+	template <typename T, typename Allocator>
+	class ListBase
+	{
+	public:
+		typedef T                                    value_type;
+		typedef Allocator                            allocator_type;
+		typedef ListNode<T>                          node_type;
+		typedef eastl_size_t                         size_type;     // See config.h for the definition of eastl_size_t, which defaults to size_t.
+		typedef ptrdiff_t                            difference_type;
+		#if EASTL_LIST_PROXY_ENABLED
+			typedef ListNodeBaseProxy< ListNode<T> > base_node_type;
+		#else
+			typedef ListNodeBase                     base_node_type; // We use ListNodeBase instead of ListNode<T> because we don't want to create a T.
+		#endif
+
+	protected:
+		eastl::compressed_pair<base_node_type, allocator_type>  mNodeAllocator;
+		#if EASTL_LIST_SIZE_CACHE
+			size_type  mSize;
+		#endif
+
+		base_node_type& internalNode() EA_NOEXCEPT { return mNodeAllocator.first(); }
+		base_node_type const& internalNode() const EA_NOEXCEPT { return mNodeAllocator.first(); }
+		allocator_type& internalAllocator() EA_NOEXCEPT { return mNodeAllocator.second(); }
+		const allocator_type& internalAllocator() const EA_NOEXCEPT { return mNodeAllocator.second(); }
+
+	public:
+		const allocator_type& get_allocator() const EA_NOEXCEPT;
+		allocator_type&       get_allocator() EA_NOEXCEPT;
+		void                  set_allocator(const allocator_type& allocator);
+
+	protected:
+		ListBase();
+		ListBase(const allocator_type& a);
+	   ~ListBase();
+
+		node_type* DoAllocateNode();
+		void       DoFreeNode(node_type* pNode);
+
+		void DoInit() EA_NOEXCEPT;
+		void DoClear();
+
+	}; // ListBase
+
+
+
+
+	/// list
+	///
+	/// -- size() is O(n) --
+	/// Note that as of this writing, list::size() is an O(n) operation when EASTL_LIST_SIZE_CACHE is disabled. 
+	/// That is, getting the size of the list is not a fast operation, as it requires traversing the list and 
+	/// counting the nodes. We could make list::size() be fast by having a member mSize variable. There are reasons 
+	/// for having such functionality and reasons for not having such functionality. We currently choose
+	/// to not have a member mSize variable as it would add four bytes to the class, add a tiny amount
+	/// of processing to functions such as insert and erase, and would only serve to improve the size
+	/// function, but no others. The alternative argument is that the C++ standard states that std::list
+	/// should be an O(1) operation (i.e. have a member size variable), most C++ standard library list
+	/// implementations do so, the size is but an integer which is quick to update, and many users 
+	/// expect to have a fast size function. The EASTL_LIST_SIZE_CACHE option changes this.
+	/// To consider: Make size caching an optional template parameter.
+	///
+	/// Pool allocation
+	/// If you want to make a custom memory pool for a list container, your pool 
+	/// needs to contain items of type list::node_type. So if you have a memory
+	/// pool that has a constructor that takes the size of pool items and the
+	/// count of pool items, you would do this (assuming that MemoryPool implements
+	/// the Allocator interface):
+	///     typedef list<Widget, MemoryPool> WidgetList;           // Delare your WidgetList type.
+	///     MemoryPool myPool(sizeof(WidgetList::node_type), 100); // Make a pool of 100 Widget nodes.
+	///     WidgetList myList(&myPool);                            // Create a list that uses the pool.
+	///
+	template <typename T, typename Allocator = EASTLAllocatorType>
+	class list : public ListBase<T, Allocator>
+	{
+		typedef ListBase<T, Allocator>                  base_type;
+		typedef list<T, Allocator>                      this_type;
+
+	public:
+		typedef T                                       value_type;
+		typedef T*                                      pointer;
+		typedef const T*                                const_pointer;
+		typedef T&                                      reference;
+		typedef const T&                                const_reference;
+		typedef ListIterator<T, T*, T&>                 iterator;
+		typedef ListIterator<T, const T*, const T&>     const_iterator;
+		typedef eastl::reverse_iterator<iterator>       reverse_iterator;
+		typedef eastl::reverse_iterator<const_iterator> const_reverse_iterator;
+		typedef typename base_type::size_type           size_type;
+		typedef typename base_type::difference_type     difference_type;
+		typedef typename base_type::allocator_type      allocator_type;
+		typedef typename base_type::node_type           node_type;
+		typedef typename base_type::base_node_type      base_node_type;
+
+		using base_type::mNodeAllocator;
+		using base_type::DoAllocateNode;
+		using base_type::DoFreeNode;
+		using base_type::DoClear;
+		using base_type::DoInit;
+		using base_type::get_allocator;
+		#if EASTL_LIST_SIZE_CACHE
+			using base_type::mSize;
+		#endif
+		using base_type::internalNode;
+		using base_type::internalAllocator;
+
+	public:
+		list();
+		list(const allocator_type& allocator);
+		explicit list(size_type n, const allocator_type& allocator = EASTL_LIST_DEFAULT_ALLOCATOR);
+		list(size_type n, const value_type& value, const allocator_type& allocator = EASTL_LIST_DEFAULT_ALLOCATOR);
+		list(const this_type& x);
+		list(const this_type& x, const allocator_type& allocator);
+		list(this_type&& x);
+		list(this_type&&, const allocator_type&);
+		list(std::initializer_list<value_type> ilist, const allocator_type& allocator = EASTL_LIST_DEFAULT_ALLOCATOR);
+
+		template <typename InputIterator>
+		list(InputIterator first, InputIterator last); // allocator arg removed because VC7.1 fails on the default arg. To do: Make a second version of this function without a default arg.
+
+		this_type& operator=(const this_type& x);
+		this_type& operator=(std::initializer_list<value_type> ilist);
+		this_type& operator=(this_type&& x);
+
+		// In the case that the two containers' allocators are unequal, swap copies elements instead
+		// of replacing them in place. In this case swap is an O(n) operation instead of O(1).
+		void swap(this_type& x);
+
+		void assign(size_type n, const value_type& value);
+
+		template <typename InputIterator>                       // It turns out that the C++ std::list specifies a two argument
+		void assign(InputIterator first, InputIterator last);   // version of assign that takes (int size, int value). These are not 
+																// iterators, so we need to do a template compiler trick to do the right thing.
+		void assign(std::initializer_list<value_type> ilist);
+
+		iterator       begin() EA_NOEXCEPT;
+		const_iterator begin() const EA_NOEXCEPT;
+		const_iterator cbegin() const EA_NOEXCEPT;
+
+		iterator       end() EA_NOEXCEPT;
+		const_iterator end() const EA_NOEXCEPT;
+		const_iterator cend() const EA_NOEXCEPT;
+
+		reverse_iterator       rbegin() EA_NOEXCEPT;
+		const_reverse_iterator rbegin() const EA_NOEXCEPT;
+		const_reverse_iterator crbegin() const EA_NOEXCEPT;
+
+		reverse_iterator       rend() EA_NOEXCEPT;
+		const_reverse_iterator rend() const EA_NOEXCEPT;
+		const_reverse_iterator crend() const EA_NOEXCEPT;
+
+		bool      empty() const EA_NOEXCEPT;
+		size_type size() const EA_NOEXCEPT;
+
+		void resize(size_type n, const value_type& value);
+		void resize(size_type n);
+
+		reference       front();
+		const_reference front() const;
+
+		reference       back();
+		const_reference back() const;
+
+		template <typename... Args>
+		void emplace_front(Args&&... args);
+
+		template <typename... Args>
+		void emplace_back(Args&&... args);
+
+		void      push_front(const value_type& value);
+		void      push_front(value_type&& x);
+		reference push_front();
+		void*     push_front_uninitialized();
+
+		void      push_back(const value_type& value);
+		void      push_back(value_type&& x);
+		reference push_back();
+		void*     push_back_uninitialized();
+
+		void pop_front();
+		void pop_back();
+
+		template <typename... Args>
+		iterator emplace(const_iterator position, Args&&... args);
+
+		iterator insert(const_iterator position);
+		iterator insert(const_iterator position, const value_type& value);
+		iterator insert(const_iterator position, value_type&& x);
+		iterator insert(const_iterator position, std::initializer_list<value_type> ilist);
+		iterator insert(const_iterator position, size_type n, const value_type& value);
+
+		template <typename InputIterator>
+		iterator insert(const_iterator position, InputIterator first, InputIterator last);
+
+		iterator erase(const_iterator position);
+		iterator erase(const_iterator first, const_iterator last);
+
+		reverse_iterator erase(const_reverse_iterator position);
+		reverse_iterator erase(const_reverse_iterator first, const_reverse_iterator last);
+
+		void clear() EA_NOEXCEPT;
+		void reset_lose_memory() EA_NOEXCEPT;    // This is a unilateral reset to an initially empty state. No destructors are called, no deallocation occurs.
+
+		void remove(const T& x);
+
+		template <typename Predicate>
+		void remove_if(Predicate);
+
+		void reverse() EA_NOEXCEPT;
+
+		// splice inserts elements in the range [first,last) before position and removes the elements from x.
+		// In the case that the two containers' allocators are unequal, splice copies elements 
+		// instead of splicing them. In this case elements are not removed from x, and iterators 
+		// into the spliced elements from x continue to point to the original values in x.
+		void splice(const_iterator position, this_type& x);
+		void splice(const_iterator position, this_type& x, const_iterator i);
+		void splice(const_iterator position, this_type& x, const_iterator first, const_iterator last);
+		void splice(const_iterator position, this_type&& x);
+		void splice(const_iterator position, this_type&& x, const_iterator i);
+		void splice(const_iterator position, this_type&& x, const_iterator first, const_iterator last);
+
+	public:
+		// For merge, see notes for splice regarding the handling of unequal allocators.
+		void merge(this_type& x);
+		void merge(this_type&& x);
+
+		template <typename Compare>
+		void merge(this_type& x, Compare compare);
+
+		template <typename Compare>
+		void merge(this_type&& x, Compare compare);
+
+		void unique();
+
+		template <typename BinaryPredicate>
+		void unique(BinaryPredicate);
+
+		// Sorting functionality
+		// This is independent of the global sort algorithms, as lists are 
+		// linked nodes and can be sorted more efficiently by moving nodes
+		// around in ways that global sort algorithms aren't privy to.
+		void sort();
+
+		template<typename Compare>
+		void sort(Compare compare);
+
+	public:
+		bool validate() const;
+		int  validate_iterator(const_iterator i) const;
+
+	protected:
+		node_type* DoCreateNode();
+
+		template<typename... Args>
+		node_type* DoCreateNode(Args&&... args);
+
+		template <typename Integer>
+		void DoAssign(Integer n, Integer value, true_type);
+
+		template <typename InputIterator>
+		void DoAssign(InputIterator first, InputIterator last, false_type);
+
+		void DoAssignValues(size_type n, const value_type& value);
+
+		template <typename Integer>
+		void DoInsert(ListNodeBase* pNode, Integer n, Integer value, true_type);
+
+		template <typename InputIterator>
+		void DoInsert(ListNodeBase* pNode, InputIterator first, InputIterator last, false_type);
+
+		void DoInsertValues(ListNodeBase* pNode, size_type n, const value_type& value);
+	   
+		template<typename... Args>
+		void DoInsertValue(ListNodeBase* pNode, Args&&... args);
+
+		void DoErase(ListNodeBase* pNode);
+
+		void DoSwap(this_type& x);
+
+		template <typename Compare>
+		iterator DoSort(iterator i1, iterator end2, size_type n, Compare& compare);
+
+	}; // class list
+
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// ListNodeBase
+	///////////////////////////////////////////////////////////////////////
+
+	// Swaps the nodes a and b in the lists to which they belong. This is similar to 
+	// splicing a into b's list and b into a's list at the same time.
+	// Works by swapping the members of a and b, and fixes up the lists that a and b 
+	// were part of to point to the new members.
+	inline void ListNodeBase::swap(ListNodeBase& a, ListNodeBase& b) EA_NOEXCEPT
+	{
+		const ListNodeBase temp(a);
+		a = b;
+		b = temp;
+
+		if(a.mpNext == &b)
+			a.mpNext = a.mpPrev = &a;
+		else
+			a.mpNext->mpPrev = a.mpPrev->mpNext = &a;
+
+		if(b.mpNext == &a)
+			b.mpNext = b.mpPrev = &b;
+		else
+			b.mpNext->mpPrev = b.mpPrev->mpNext = &b;
+	}
+
+
+	// splices the [first,last) range from its current list into our list before this node.
+	inline void ListNodeBase::splice(ListNodeBase* first, ListNodeBase* last) EA_NOEXCEPT
+	{
+		// We assume that [first, last] are not within our list.
+		last->mpPrev->mpNext  = this;
+		first->mpPrev->mpNext = last;
+		this->mpPrev->mpNext  = first;
+
+		ListNodeBase* const pTemp = this->mpPrev;
+		this->mpPrev  = last->mpPrev;
+		last->mpPrev  = first->mpPrev;
+		first->mpPrev = pTemp;
+	}
+
+
+	inline void ListNodeBase::reverse() EA_NOEXCEPT
+	{
+		ListNodeBase* pNode = this;
+		do
+		{
+			EA_ANALYSIS_ASSUME(pNode != NULL);
+			ListNodeBase* const pTemp = pNode->mpNext;
+			pNode->mpNext = pNode->mpPrev;
+			pNode->mpPrev = pTemp;
+			pNode         = pNode->mpPrev;
+		} 
+		while(pNode != this);
+	}
+
+
+	inline void ListNodeBase::insert(ListNodeBase* pNext) EA_NOEXCEPT
+	{
+		mpNext = pNext;
+		mpPrev = pNext->mpPrev;
+		pNext->mpPrev->mpNext = this;
+		pNext->mpPrev = this;
+	}
+
+
+	// Removes this node from the list that it's in. Assumes that the 
+	// node is within a list and thus that its prev/next pointers are valid.
+	inline void ListNodeBase::remove() EA_NOEXCEPT
+	{
+		mpNext->mpPrev = mpPrev;
+		mpPrev->mpNext = mpNext;
+	}
+
+
+	// Inserts the standalone range [pFirst, pFinal] before pPosition. Assumes that the
+	// range is not within a list and thus that it's prev/next pointers are not valid.
+	// Assumes that this node is within a list and thus that its prev/next pointers are valid.
+	inline void ListNodeBase::insert_range(ListNodeBase* pFirst, ListNodeBase* pFinal) EA_NOEXCEPT
+	{
+		mpPrev->mpNext = pFirst;
+		pFirst->mpPrev = mpPrev;
+		mpPrev         = pFinal;
+		pFinal->mpNext = this;
+	}
+
+
+	// Removes the range [pFirst, pFinal] from the list that it's in. Assumes that the 
+	// range is within a list and thus that its prev/next pointers are valid.
+	inline void ListNodeBase::remove_range(ListNodeBase* pFirst, ListNodeBase* pFinal) EA_NOEXCEPT
+	{
+		pFinal->mpNext->mpPrev = pFirst->mpPrev;
+		pFirst->mpPrev->mpNext = pFinal->mpNext;
+	}
+
+
+	///////////////////////////////////////////////////////////////////////
+	// ListIterator
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T, typename Pointer, typename Reference>
+	inline ListIterator<T, Pointer, Reference>::ListIterator() EA_NOEXCEPT
+		: mpNode() // To consider: Do we really need to intialize mpNode?
+	{
+		// Empty
+	}
+
+
+	template <typename T, typename Pointer, typename Reference>
+	inline ListIterator<T, Pointer, Reference>::ListIterator(const ListNodeBase* pNode) EA_NOEXCEPT
+		: mpNode(static_cast<node_type*>((ListNode<T>*)const_cast<ListNodeBase*>(pNode))) // All this casting is in the name of making runtime debugging much easier on the user.
+	{
+		// Empty
+	}
+
+
+	template <typename T, typename Pointer, typename Reference>
+	inline ListIterator<T, Pointer, Reference>::ListIterator(const iterator& x) EA_NOEXCEPT
+		: mpNode(const_cast<node_type*>(x.mpNode))
+	{
+		// Empty
+	} 
+
+
+	template <typename T, typename Pointer, typename Reference>
+	inline typename ListIterator<T, Pointer, Reference>::this_type
+	ListIterator<T, Pointer, Reference>::next() const EA_NOEXCEPT
+	{
+		return ListIterator(mpNode->mpNext);
+	}
+
+
+	template <typename T, typename Pointer, typename Reference>
+	inline typename ListIterator<T, Pointer, Reference>::this_type
+	ListIterator<T, Pointer, Reference>::prev() const EA_NOEXCEPT
+	{
+		return ListIterator(mpNode->mpPrev);
+	}
+
+
+	template <typename T, typename Pointer, typename Reference>
+	inline typename ListIterator<T, Pointer, Reference>::reference
+	ListIterator<T, Pointer, Reference>::operator*() const EA_NOEXCEPT
+	{
+		return mpNode->mValue;
+	}
+
+
+	template <typename T, typename Pointer, typename Reference>
+	inline typename ListIterator<T, Pointer, Reference>::pointer
+	ListIterator<T, Pointer, Reference>::operator->() const EA_NOEXCEPT
+	{
+		return &mpNode->mValue;
+	}
+
+
+	template <typename T, typename Pointer, typename Reference>
+	inline typename ListIterator<T, Pointer, Reference>::this_type&
+	ListIterator<T, Pointer, Reference>::operator++() EA_NOEXCEPT
+	{
+		mpNode = static_cast<node_type*>(mpNode->mpNext);
+		return *this;
+	}
+
+
+	template <typename T, typename Pointer, typename Reference>
+	inline typename ListIterator<T, Pointer, Reference>::this_type
+	ListIterator<T, Pointer, Reference>::operator++(int) EA_NOEXCEPT
+	{
+		this_type temp(*this);
+		mpNode = static_cast<node_type*>(mpNode->mpNext);
+		return temp;
+	}
+
+
+	template <typename T, typename Pointer, typename Reference>
+	inline typename ListIterator<T, Pointer, Reference>::this_type&
+	ListIterator<T, Pointer, Reference>::operator--() EA_NOEXCEPT
+	{
+		mpNode = static_cast<node_type*>(mpNode->mpPrev);
+		return *this;
+	}
+
+
+	template <typename T, typename Pointer, typename Reference>
+	inline typename ListIterator<T, Pointer, Reference>::this_type 
+	ListIterator<T, Pointer, Reference>::operator--(int) EA_NOEXCEPT
+	{
+		this_type temp(*this);
+		mpNode = static_cast<node_type*>(mpNode->mpPrev);
+		return temp;
+	}
+
+
+	// The C++ defect report #179 requires that we support comparisons between const and non-const iterators.
+	// Thus we provide additional template paremeters here to support this. The defect report does not
+	// require us to support comparisons between reverse_iterators and const_reverse_iterators.
+	template <typename T, typename PointerA, typename ReferenceA, typename PointerB, typename ReferenceB>
+	inline bool operator==(const ListIterator<T, PointerA, ReferenceA>& a, 
+						   const ListIterator<T, PointerB, ReferenceB>& b) EA_NOEXCEPT
+	{
+		return a.mpNode == b.mpNode;
+	}
+
+
+	template <typename T, typename PointerA, typename ReferenceA, typename PointerB, typename ReferenceB>
+	inline bool operator!=(const ListIterator<T, PointerA, ReferenceA>& a, 
+						   const ListIterator<T, PointerB, ReferenceB>& b) EA_NOEXCEPT
+	{
+		return a.mpNode != b.mpNode;
+	}
+
+
+	// We provide a version of operator!= for the case where the iterators are of the 
+	// same type. This helps prevent ambiguity errors in the presence of rel_ops.
+	template <typename T, typename Pointer, typename Reference>
+	inline bool operator!=(const ListIterator<T, Pointer, Reference>& a, 
+						   const ListIterator<T, Pointer, Reference>& b) EA_NOEXCEPT
+	{
+		return a.mpNode != b.mpNode;
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// ListBase
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T, typename Allocator>
+	inline ListBase<T, Allocator>::ListBase()
+		: mNodeAllocator(base_node_type(), allocator_type(EASTL_LIST_DEFAULT_NAME))
+		  #if EASTL_LIST_SIZE_CACHE
+		  , mSize(0)
+		  #endif
+	{
+		DoInit();
+	}
+
+	template <typename T, typename Allocator>
+	inline ListBase<T, Allocator>::ListBase(const allocator_type& allocator)
+		: mNodeAllocator(base_node_type(), allocator)
+		  #if EASTL_LIST_SIZE_CACHE
+		  , mSize(0)
+		  #endif
+	{
+		DoInit();
+	}
+
+
+	template <typename T, typename Allocator>
+	inline ListBase<T, Allocator>::~ListBase()
+	{
+		DoClear();
+	}
+
+
+	template <typename T, typename Allocator>
+	const typename ListBase<T, Allocator>::allocator_type&
+	ListBase<T, Allocator>::get_allocator() const EA_NOEXCEPT
+	{
+		return internalAllocator();
+	}
+
+
+	template <typename T, typename Allocator>
+	typename ListBase<T, Allocator>::allocator_type&
+	ListBase<T, Allocator>::get_allocator() EA_NOEXCEPT
+	{
+		return internalAllocator();
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void ListBase<T, Allocator>::set_allocator(const allocator_type& allocator)
+	{
+		EASTL_ASSERT((internalAllocator() == allocator) || (static_cast<node_type*>(internalNode().mpNext) == &internalNode())); // We can only assign a different allocator if we are empty of elements.
+		internalAllocator() = allocator;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename ListBase<T, Allocator>::node_type*
+	ListBase<T, Allocator>::DoAllocateNode()
+	{
+		node_type* pNode = (node_type*)allocate_memory(internalAllocator(), sizeof(node_type), EASTL_ALIGN_OF(T), 0);
+		EASTL_ASSERT(pNode != nullptr);
+		return pNode;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void ListBase<T, Allocator>::DoFreeNode(node_type* p)
+	{
+		EASTLFree(internalAllocator(), p, sizeof(node_type));
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void ListBase<T, Allocator>::DoInit() EA_NOEXCEPT
+	{
+		internalNode().mpNext = (ListNode<T>*)&internalNode();
+		internalNode().mpPrev = (ListNode<T>*)&internalNode();
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void ListBase<T, Allocator>::DoClear()
+	{
+		node_type* p = static_cast<node_type*>(internalNode().mpNext);
+
+		while(p != &internalNode())
+		{
+			node_type* const pTemp = p;
+			p = static_cast<node_type*>(p->mpNext);
+			pTemp->~node_type();
+			EASTLFree(internalAllocator(), pTemp, sizeof(node_type));
+		}
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// list
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T, typename Allocator>
+	inline list<T, Allocator>::list()
+		: base_type()
+	{
+		// Empty
+	}
+
+
+	template <typename T, typename Allocator>
+	inline list<T, Allocator>::list(const allocator_type& allocator)
+		: base_type(allocator)
+	{
+		// Empty
+	}
+
+
+	template <typename T, typename Allocator>
+	inline list<T, Allocator>::list(size_type n, const allocator_type& allocator)
+		: base_type(allocator)
+	{
+		DoInsertValues((ListNodeBase*)&internalNode(), n, value_type());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline list<T, Allocator>::list(size_type n, const value_type& value, const allocator_type& allocator)
+		: base_type(allocator) 
+	{
+		DoInsertValues((ListNodeBase*)&internalNode(), n, value);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline list<T, Allocator>::list(const this_type& x)
+		: base_type(x.internalAllocator())
+	{
+		DoInsert((ListNodeBase*)&internalNode(), const_iterator((ListNodeBase*)x.internalNode().mpNext), const_iterator((ListNodeBase*)&x.internalNode()), false_type());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline list<T, Allocator>::list(const this_type& x, const allocator_type& allocator)
+		: base_type(allocator)
+	{
+		DoInsert((ListNodeBase*)&internalNode(), const_iterator((ListNodeBase*)x.internalNode().mpNext), const_iterator((ListNodeBase*)&x.internalNode()), false_type());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline list<T, Allocator>::list(this_type&& x)
+		: base_type(eastl::move(x.internalAllocator()))
+	{
+		swap(x);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline list<T, Allocator>::list(this_type&& x, const allocator_type& allocator)
+		: base_type(allocator)
+	{
+		swap(x); // member swap handles the case that x has a different allocator than our allocator by doing a copy.
+	}
+
+
+	template <typename T, typename Allocator>
+	inline list<T, Allocator>::list(std::initializer_list<value_type> ilist, const allocator_type& allocator)
+		: base_type(allocator)
+	{
+		DoInsert((ListNodeBase*)&internalNode(), ilist.begin(), ilist.end(), false_type());
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename InputIterator>
+	list<T, Allocator>::list(InputIterator first, InputIterator last)
+		: base_type(EASTL_LIST_DEFAULT_ALLOCATOR)
+	{
+		//insert(const_iterator((ListNodeBase*)&internalNode()), first, last);
+		DoInsert((ListNodeBase*)&internalNode(), first, last, is_integral<InputIterator>());
+	}
+
+
+	template <typename T, typename Allocator>
+	typename list<T, Allocator>::iterator
+	inline list<T, Allocator>::begin() EA_NOEXCEPT
+	{
+		return iterator((ListNodeBase*)internalNode().mpNext);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename list<T, Allocator>::const_iterator
+	list<T, Allocator>::begin() const EA_NOEXCEPT
+	{
+		return const_iterator((ListNodeBase*)internalNode().mpNext);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename list<T, Allocator>::const_iterator
+	list<T, Allocator>::cbegin() const EA_NOEXCEPT
+	{
+		return const_iterator((ListNodeBase*)internalNode().mpNext);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename list<T, Allocator>::iterator
+	list<T, Allocator>::end() EA_NOEXCEPT
+	{
+		return iterator((ListNodeBase*)&internalNode());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename list<T, Allocator>::const_iterator
+	list<T, Allocator>::end() const EA_NOEXCEPT
+	{
+		return const_iterator((ListNodeBase*)&internalNode());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename list<T, Allocator>::const_iterator
+	list<T, Allocator>::cend() const EA_NOEXCEPT
+	{
+		return const_iterator((ListNodeBase*)&internalNode());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename list<T, Allocator>::reverse_iterator
+	list<T, Allocator>::rbegin() EA_NOEXCEPT
+	{
+		return reverse_iterator((ListNodeBase*)&internalNode());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename list<T, Allocator>::const_reverse_iterator
+	list<T, Allocator>::rbegin() const EA_NOEXCEPT
+	{
+		return const_reverse_iterator((ListNodeBase*)&internalNode());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename list<T, Allocator>::const_reverse_iterator
+	list<T, Allocator>::crbegin() const EA_NOEXCEPT
+	{
+		return const_reverse_iterator((ListNodeBase*)&internalNode());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename list<T, Allocator>::reverse_iterator
+	list<T, Allocator>::rend() EA_NOEXCEPT
+	{
+		return reverse_iterator((ListNodeBase*)internalNode().mpNext);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename list<T, Allocator>::const_reverse_iterator
+	list<T, Allocator>::rend() const EA_NOEXCEPT
+	{
+		return const_reverse_iterator((ListNodeBase*)internalNode().mpNext);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename list<T, Allocator>::const_reverse_iterator
+	list<T, Allocator>::crend() const EA_NOEXCEPT
+	{
+		return const_reverse_iterator((ListNodeBase*)internalNode().mpNext);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename list<T, Allocator>::reference
+	list<T, Allocator>::front()
+	{
+		#if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED
+			if (EASTL_UNLIKELY(static_cast<node_type*>(internalNode().mpNext) == &internalNode()))
+				EASTL_FAIL_MSG("list::front -- empty container");
+		#else
+			// We allow the user to reference an empty container.
+		#endif
+
+		return static_cast<node_type*>(internalNode().mpNext)->mValue;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename list<T, Allocator>::const_reference
+	list<T, Allocator>::front() const
+	{
+		#if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED
+			if (EASTL_UNLIKELY(static_cast<node_type*>(internalNode().mpNext) == &internalNode()))
+				EASTL_FAIL_MSG("list::front -- empty container");
+		#else
+			// We allow the user to reference an empty container.
+		#endif
+
+		return static_cast<node_type*>(internalNode().mpNext)->mValue;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename list<T, Allocator>::reference
+	list<T, Allocator>::back()
+	{
+		#if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED
+			if (EASTL_UNLIKELY(static_cast<node_type*>(internalNode().mpNext) == &internalNode()))
+				EASTL_FAIL_MSG("list::back -- empty container");
+		#else
+			// We allow the user to reference an empty container.
+		#endif
+
+		return static_cast<node_type*>(internalNode().mpPrev)->mValue;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename list<T, Allocator>::const_reference
+	list<T, Allocator>::back() const
+	{
+		#if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED
+			if (EASTL_UNLIKELY(static_cast<node_type*>(internalNode().mpNext) == &internalNode()))
+				EASTL_FAIL_MSG("list::back -- empty container");
+		#else
+			// We allow the user to reference an empty container.
+		#endif
+
+		return static_cast<node_type*>(internalNode().mpPrev)->mValue;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline bool list<T, Allocator>::empty() const EA_NOEXCEPT
+	{
+		#if EASTL_LIST_SIZE_CACHE
+			return (mSize == 0);
+		#else
+			return static_cast<node_type*>(internalNode().mpNext) == &internalNode();
+		#endif
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename list<T, Allocator>::size_type
+	list<T, Allocator>::size() const EA_NOEXCEPT
+	{
+		#if EASTL_LIST_SIZE_CACHE
+			return mSize;
+		#else
+			#if EASTL_DEBUG
+				const ListNodeBase* p = (ListNodeBase*)internalNode().mpNext;
+				size_type n = 0;
+				while(p != (ListNodeBase*)&internalNode())
+				{
+					++n;
+					p = (ListNodeBase*)p->mpNext;
+				}
+				return n;
+			#else
+				// The following optimizes to slightly better code than the code above.
+				return (size_type)eastl::distance(const_iterator((ListNodeBase*)internalNode().mpNext), const_iterator((ListNodeBase*)&internalNode()));
+			#endif
+		#endif
+	}
+
+
+	template <typename T, typename Allocator>
+	typename list<T, Allocator>::this_type&
+	list<T, Allocator>::operator=(const this_type& x)
+	{
+		if(this != &x) // If not assigning to self...
+		{
+			// If (EASTL_ALLOCATOR_COPY_ENABLED == 1) and the current contents are allocated by an 
+			// allocator that's unequal to x's allocator, we need to reallocate our elements with 
+			// our current allocator and reallocate it with x's allocator. If the allocators are 
+			// equal then we can use a more optimal algorithm that doesn't reallocate our elements
+			// but instead can copy them in place.
+
+			#if EASTL_ALLOCATOR_COPY_ENABLED
+				bool bSlowerPathwayRequired = (internalAllocator() != x.internalAllocator());
+			#else
+				bool bSlowerPathwayRequired = false;
+			#endif
+
+			if(bSlowerPathwayRequired)
+			{
+				clear();
+
+				#if EASTL_ALLOCATOR_COPY_ENABLED
+					internalAllocator() = x.internalAllocator();
+				#endif
+			}
+
+			DoAssign(x.begin(), x.end(), eastl::false_type());
+		}
+
+		return *this;
+	}
+
+
+	template <typename T, typename Allocator>
+	typename list<T, Allocator>::this_type&
+	list<T, Allocator>::operator=(this_type&& x)
+	{
+		if(this != &x)
+		{
+			clear();        // To consider: Are we really required to clear here? x is going away soon and will clear itself in its dtor.
+			swap(x);        // member swap handles the case that x has a different allocator than our allocator by doing a copy.
+		}
+		return *this;
+	}
+
+
+	template <typename T, typename Allocator>
+	typename list<T, Allocator>::this_type&
+	list<T, Allocator>::operator=(std::initializer_list<value_type> ilist)
+	{
+		DoAssign(ilist.begin(), ilist.end(), false_type());
+		return *this;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void list<T, Allocator>::assign(size_type n, const value_type& value)
+	{
+		DoAssignValues(n, value);
+	}
+
+
+	// It turns out that the C++ std::list specifies a two argument
+	// version of assign that takes (int size, int value). These are not 
+	// iterators, so we need to do a template compiler trick to do the right thing.
+	template <typename T, typename Allocator>
+	template <typename InputIterator>
+	inline void list<T, Allocator>::assign(InputIterator first, InputIterator last)
+	{
+		DoAssign(first, last, is_integral<InputIterator>());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void list<T, Allocator>::assign(std::initializer_list<value_type> ilist)
+	{
+		DoAssign(ilist.begin(), ilist.end(), false_type());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void list<T, Allocator>::clear() EA_NOEXCEPT
+	{
+		DoClear();
+		DoInit();
+		#if EASTL_LIST_SIZE_CACHE
+			mSize = 0;
+		#endif
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void list<T, Allocator>::reset_lose_memory() EA_NOEXCEPT
+	{
+		// The reset_lose_memory function is a special extension function which unilaterally 
+		// resets the container to an empty state without freeing the memory of 
+		// the contained objects. This is useful for very quickly tearing down a 
+		// container built into scratch memory.
+		DoInit();
+		#if EASTL_LIST_SIZE_CACHE
+			mSize = 0;
+		#endif
+	}
+
+
+	template <typename T, typename Allocator>
+	void list<T, Allocator>::resize(size_type n, const value_type& value)
+	{
+		iterator current((ListNodeBase*)internalNode().mpNext);
+		size_type i = 0;
+
+		while((current.mpNode != &internalNode()) && (i < n))
+		{
+			++current;  
+			++i;
+		}
+		if(i == n)
+			erase(current, (ListNodeBase*)&internalNode());
+		else
+			insert((ListNodeBase*)&internalNode(), n - i, value);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void list<T, Allocator>::resize(size_type n)
+	{
+		resize(n, value_type());
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename... Args>
+	void list<T, Allocator>::emplace_front(Args&&... args)
+	{
+		DoInsertValue((ListNodeBase*)internalNode().mpNext, eastl::forward<Args>(args)...);
+	}
+
+	template <typename T, typename Allocator>
+	template <typename... Args>
+	void list<T, Allocator>::emplace_back(Args&&... args)
+	{
+		DoInsertValue((ListNodeBase*)&internalNode(), eastl::forward<Args>(args)...);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void list<T, Allocator>::push_front(const value_type& value)
+	{
+		DoInsertValue((ListNodeBase*)internalNode().mpNext, value);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void list<T, Allocator>::push_front(value_type&& value)
+	{
+		emplace(begin(), eastl::move(value));
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename list<T, Allocator>::reference
+	list<T, Allocator>::push_front()
+	{
+		node_type* const pNode = DoCreateNode();
+		((ListNodeBase*)pNode)->insert((ListNodeBase*)internalNode().mpNext);
+		#if EASTL_LIST_SIZE_CACHE
+			++mSize;
+		#endif
+		return static_cast<node_type*>(internalNode().mpNext)->mValue; // Same as return front();
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void* list<T, Allocator>::push_front_uninitialized()
+	{
+		node_type* const pNode = DoAllocateNode();
+		((ListNodeBase*)pNode)->insert((ListNodeBase*)internalNode().mpNext);
+		#if EASTL_LIST_SIZE_CACHE
+			++mSize;
+		#endif
+		return &pNode->mValue;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void list<T, Allocator>::pop_front()
+	{
+		#if EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY(static_cast<node_type*>(internalNode().mpNext) == &internalNode()))
+				EASTL_FAIL_MSG("list::pop_front -- empty container");
+		#endif
+
+		DoErase((ListNodeBase*)internalNode().mpNext);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void list<T, Allocator>::push_back(const value_type& value)
+	{
+		DoInsertValue((ListNodeBase*)&internalNode(), value);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void list<T, Allocator>::push_back(value_type&& value)
+	{
+		emplace(end(), eastl::move(value));
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename list<T, Allocator>::reference
+	list<T, Allocator>::push_back()
+	{
+		node_type* const pNode = DoCreateNode();
+		((ListNodeBase*)pNode)->insert((ListNodeBase*)&internalNode());
+		#if EASTL_LIST_SIZE_CACHE
+			++mSize;
+		#endif
+		return static_cast<node_type*>(internalNode().mpPrev)->mValue;  // Same as return back();
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void* list<T, Allocator>::push_back_uninitialized()
+	{
+		node_type* const pNode = DoAllocateNode();
+		((ListNodeBase*)pNode)->insert((ListNodeBase*)&internalNode());
+		#if EASTL_LIST_SIZE_CACHE
+			++mSize;
+		#endif
+		return &pNode->mValue;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void list<T, Allocator>::pop_back()
+	{
+		#if EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY(static_cast<node_type*>(internalNode().mpNext) == &internalNode()))
+				EASTL_FAIL_MSG("list::pop_back -- empty container");
+		#endif
+
+		DoErase((ListNodeBase*)internalNode().mpPrev);
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename... Args>
+	inline typename list<T, Allocator>::iterator
+	list<T, Allocator>::emplace(const_iterator position, Args&&... args)
+	{
+		DoInsertValue(position.mpNode, eastl::forward<Args>(args)...);
+		return iterator(position.mpNode->mpPrev);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename list<T, Allocator>::iterator
+	list<T, Allocator>::insert(const_iterator position)
+	{
+		node_type* const pNode = DoCreateNode(value_type());
+		((ListNodeBase*)pNode)->insert((ListNodeBase*)position.mpNode);
+		#if EASTL_LIST_SIZE_CACHE
+			++mSize;
+		#endif
+		return (ListNodeBase*)pNode;
+	}
+
+	
+	template <typename T, typename Allocator>
+	inline typename list<T, Allocator>::iterator
+	list<T, Allocator>::insert(const_iterator position, const value_type& value)
+	{
+		node_type* const pNode = DoCreateNode(value);
+		((ListNodeBase*)pNode)->insert((ListNodeBase*)position.mpNode);
+		#if EASTL_LIST_SIZE_CACHE
+			++mSize;
+		#endif
+		return (ListNodeBase*)pNode;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename list<T, Allocator>::iterator
+	list<T, Allocator>::insert(const_iterator position, value_type&& value)
+	{
+		return emplace(position, eastl::move(value));
+	}
+
+	template <typename T, typename Allocator>
+	inline typename list<T, Allocator>::iterator
+	list<T, Allocator>::insert(const_iterator position, size_type n, const value_type& value)
+	{
+		iterator itPrev(position.mpNode);
+		--itPrev;
+		DoInsertValues((ListNodeBase*)position.mpNode, n, value);
+		return ++itPrev; // Inserts in front of position, returns iterator to new elements. 
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename InputIterator>
+	inline typename list<T, Allocator>::iterator
+	list<T, Allocator>::insert(const_iterator position, InputIterator first, InputIterator last)
+	{
+		iterator itPrev(position.mpNode);
+		--itPrev;
+		DoInsert((ListNodeBase*)position.mpNode, first, last, is_integral<InputIterator>());
+		return ++itPrev; // Inserts in front of position, returns iterator to new elements. 
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename list<T, Allocator>::iterator 
+	list<T, Allocator>::insert(const_iterator position, std::initializer_list<value_type> ilist)
+	{
+		iterator itPrev(position.mpNode);
+		--itPrev;
+		DoInsert((ListNodeBase*)position.mpNode, ilist.begin(), ilist.end(), false_type());
+		return ++itPrev; // Inserts in front of position, returns iterator to new elements. 
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename list<T, Allocator>::iterator
+	list<T, Allocator>::erase(const_iterator position)
+	{
+		++position;
+		DoErase((ListNodeBase*)position.mpNode->mpPrev);
+		return iterator(position.mpNode);
+	}
+
+
+	template <typename T, typename Allocator>
+	typename list<T, Allocator>::iterator
+	list<T, Allocator>::erase(const_iterator first, const_iterator last)
+	{
+		while(first != last)
+			first = erase(first);
+		return iterator(last.mpNode);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename list<T, Allocator>::reverse_iterator
+	list<T, Allocator>::erase(const_reverse_iterator position)
+	{
+		return reverse_iterator(erase((++position).base()));
+	}
+
+
+	template <typename T, typename Allocator>
+	typename list<T, Allocator>::reverse_iterator
+	list<T, Allocator>::erase(const_reverse_iterator first, const_reverse_iterator last)
+	{
+		// Version which erases in order from first to last.
+		// difference_type i(first.base() - last.base());
+		// while(i--)
+		//     first = erase(first);
+		// return first;
+
+		// Version which erases in order from last to first, but is slightly more efficient:
+		const_iterator itLastBase((++last).base());
+		const_iterator itFirstBase((++first).base());
+
+		return reverse_iterator(erase(itLastBase, itFirstBase));
+	}
+
+
+	template <typename T, typename Allocator>
+	void list<T, Allocator>::remove(const value_type& value)
+	{
+		iterator current((ListNodeBase*)internalNode().mpNext);
+
+		while(current.mpNode != &internalNode())
+		{
+			if(EASTL_LIKELY(!(*current == value)))
+				++current; // We have duplicate '++current' statements here and below, but the logic here forces this.
+			else
+			{
+				++current;
+				DoErase((ListNodeBase*)current.mpNode->mpPrev);
+			}
+		}
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename Predicate>
+	inline void list<T, Allocator>::remove_if(Predicate predicate)
+	{
+		for(iterator first((ListNodeBase*)internalNode().mpNext), last((ListNodeBase*)&internalNode()); first != last; )
+		{
+			iterator temp(first);
+			++temp;
+			if(predicate(first.mpNode->mValue))
+				DoErase((ListNodeBase*)first.mpNode);
+			first = temp;
+		}
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void list<T, Allocator>::reverse() EA_NOEXCEPT
+	{
+		((ListNodeBase&)internalNode()).reverse();
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void list<T, Allocator>::splice(const_iterator position, this_type& x)
+	{
+		// Splicing operations cannot succeed if the two containers use unequal allocators.
+		// This issue is not addressed in the C++ 1998 standard but is discussed in the 
+		// LWG defect reports, such as #431. There is no simple solution to this problem.
+		// One option is to throw an exception. Another option which probably captures the
+		// user intent most of the time is to copy the range from the source to the dest and 
+		// remove it from the source. 
+
+		if(internalAllocator() == x.internalAllocator())
+		{
+			#if EASTL_LIST_SIZE_CACHE
+				if(x.mSize)
+				{
+					((ListNodeBase*)position.mpNode)->splice((ListNodeBase*)x.internalNode().mpNext, (ListNodeBase*)&x.internalNode());
+					mSize += x.mSize;
+					x.mSize = 0;
+				}
+			#else
+				if(!x.empty())
+					((ListNodeBase*)position.mpNode)->splice((ListNodeBase*)x.internalNode().mpNext, (ListNodeBase*)&x.internalNode());
+			#endif
+		}
+		else
+		{
+			insert(position, x.begin(), x.end());
+			x.clear();
+		}
+	}
+
+	template <typename T, typename Allocator>
+	inline void list<T, Allocator>::splice(const_iterator position, this_type&& x)
+	{
+		return splice(position, x); // This will call splice(const_iterator, const this_type&);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void list<T, Allocator>::splice(const_iterator position, list& x, const_iterator i)
+	{
+		if(internalAllocator() == x.internalAllocator())
+		{
+			iterator i2(i.mpNode);
+			++i2;
+			if((position != i) && (position != i2))
+			{
+				((ListNodeBase*)position.mpNode)->splice((ListNodeBase*)i.mpNode, (ListNodeBase*)i2.mpNode);
+
+				#if EASTL_LIST_SIZE_CACHE
+					++mSize;
+					--x.mSize;
+				#endif
+			}
+		}
+		else
+		{
+			insert(position, *i);
+			x.erase(i);
+		}
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void list<T, Allocator>::splice(const_iterator position, list<T,Allocator>&& x, const_iterator i)
+	{
+		return splice(position, x, i); // This will call splice(const_iterator, const this_type&, const_iterator);
+	}
+		
+
+	template <typename T, typename Allocator>
+	inline void list<T, Allocator>::splice(const_iterator position, this_type& x, const_iterator first, const_iterator last)
+	{
+		if(internalAllocator() == x.internalAllocator())
+		{
+			#if EASTL_LIST_SIZE_CACHE
+				const size_type n = (size_type)eastl::distance(first, last);
+
+				if(n)
+				{
+					((ListNodeBase*)position.mpNode)->splice((ListNodeBase*)first.mpNode, (ListNodeBase*)last.mpNode);
+					mSize += n;
+					x.mSize -= n;
+				}
+			#else
+				if(first != last)
+					((ListNodeBase*)position.mpNode)->splice((ListNodeBase*)first.mpNode, (ListNodeBase*)last.mpNode);
+			#endif
+		}
+		else
+		{
+			insert(position, first, last);
+			x.erase(first, last);
+		}
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void list<T, Allocator>::splice(const_iterator position, list<T,Allocator>&& x, const_iterator first, const_iterator last)
+	{
+		return splice(position, x, first, last); // This will call splice(const_iterator, const this_type&, const_iterator, const_iterator);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void list<T, Allocator>::swap(this_type& x)
+	{
+		if(internalAllocator() == x.internalAllocator()) // If allocators are equivalent...
+			DoSwap(x);
+		else // else swap the contents.
+		{
+			const this_type temp(*this); // Can't call eastl::swap because that would
+			*this = x;                   // itself call this member swap function.
+			x     = temp;
+		}
+	}
+
+
+	template <typename T, typename Allocator>
+	void list<T, Allocator>::merge(this_type& x)
+	{
+		if(this != &x)
+		{
+			iterator       first(begin());
+			iterator       firstX(x.begin());
+			const iterator last(end());
+			const iterator lastX(x.end());
+
+			while((first != last) && (firstX != lastX))
+			{
+				if(*firstX < *first)
+				{
+					iterator next(firstX);
+
+					splice(first, x, firstX, ++next);
+					firstX = next;
+				}
+				else
+					++first;
+			}
+
+			if(firstX != lastX)
+				splice(last, x, firstX, lastX);
+		}
+	}
+
+
+	template <typename T, typename Allocator>
+	void list<T, Allocator>::merge(this_type&& x)
+	{
+		return merge(x); // This will call merge(this_type&)
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename Compare>
+	void list<T, Allocator>::merge(this_type& x, Compare compare)
+	{
+		if(this != &x)
+		{
+			iterator       first(begin());
+			iterator       firstX(x.begin());
+			const iterator last(end());
+			const iterator lastX(x.end());
+
+			while((first != last) && (firstX != lastX))
+			{
+				if(compare(*firstX, *first))
+				{
+					iterator next(firstX);
+
+					splice(first, x, firstX, ++next);
+					firstX = next;
+				}
+				else
+					++first;
+			}
+
+			if(firstX != lastX)
+				splice(last, x, firstX, lastX);
+		}
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename Compare>
+	void list<T, Allocator>::merge(this_type&& x, Compare compare)
+	{
+		return merge(x, compare); // This will call merge(this_type&, Compare)
+	}
+
+
+	template <typename T, typename Allocator>
+	void list<T, Allocator>::unique()
+	{
+		iterator       first(begin());
+		const iterator last(end());
+
+		if(first != last)
+		{
+			iterator next(first);
+
+			while(++next != last)
+			{
+				if(*first == *next)
+					DoErase((ListNodeBase*)next.mpNode);
+				else
+					first = next;
+				next = first;
+			}
+		}
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename BinaryPredicate>
+	void list<T, Allocator>::unique(BinaryPredicate predicate)
+	{
+		iterator       first(begin());
+		const iterator last(end());
+
+		if(first != last)
+		{
+			iterator next(first);
+
+			while(++next != last)
+			{
+				if(predicate(*first, *next))
+					DoErase((ListNodeBase*)next.mpNode);
+				else
+					first = next;
+				next = first;
+			}
+		}
+	}
+
+
+	template <typename T, typename Allocator>
+	void list<T, Allocator>::sort()
+	{
+		eastl::less<value_type> compare;
+		DoSort(begin(), end(), size(), compare);
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename Compare>
+	void list<T, Allocator>::sort(Compare compare)
+	{
+		DoSort(begin(), end(), size(), compare);
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename Compare>
+	typename list<T, Allocator>::iterator
+	list<T, Allocator>::DoSort(iterator i1, iterator end2, size_type n, Compare& compare)
+	{
+		// A previous version of this function did this by creating temporary lists, 
+		// but that was incompatible with fixed_list because the sizes could be too big.
+		// We sort subsegments by recursive descent. Then merge as we ascend.
+		// Return an iterator to the beginning of the sorted subsegment.
+		// Start with a special case for small node counts.
+		switch (n)
+		{
+			case 0:
+			case 1:
+				return i1;
+
+			case 2:
+				// Potentialy swap these two nodes and return the resulting first of them.
+				if(compare(*--end2, *i1))
+				{
+					end2.mpNode->remove();
+					end2.mpNode->insert(i1.mpNode);
+					return end2;
+				}
+				return i1;
+
+			case 3:
+			{
+				// We do a list insertion sort. Measurements showed this improved performance 3-12%.
+				iterator lowest = i1;
+
+				for(iterator current = i1.next(); current != end2; ++current)
+				{
+					if(compare(*current, *lowest))
+						lowest = current;
+				}
+
+				if(lowest == i1)
+					++i1;
+				else
+				{
+					lowest.mpNode->remove();
+					lowest.mpNode->insert(i1.mpNode);
+				}
+
+				if(compare(*--end2, *i1)) // At this point, i1 refers to the second element in this three element segment.
+				{
+					end2.mpNode->remove();
+					end2.mpNode->insert(i1.mpNode);
+				}
+
+				return lowest;
+			}
+		 }
+
+		// Divide the range into two parts are recursively sort each part. Upon return we will have
+		// two halves that are each sorted but we'll need to merge the two together before returning.
+		iterator  result;
+		size_type nMid = (n / 2);
+		iterator  end1 = eastl::next(i1, (difference_type)nMid);
+				  i1   = DoSort(i1, end1, nMid, compare);        // Return the new beginning of the first sorted sub-range.
+		iterator  i2   = DoSort(end1, end2, n - nMid, compare);  // Return the new beginning of the second sorted sub-range.
+
+		// If the start of the second list is before the start of the first list, insert the first list 
+		// into the second at an appropriate starting place. 
+		if(compare(*i2, *i1))
+		{
+			// Find the position to insert the first list into the second list. 
+			iterator ix = i2.next();
+			while((ix != end2) && compare(*ix, *i1))
+				++ix;
+
+			// Cut out the initial segment of the second list and move it to be in front of the first list. 
+			ListNodeBase* i2Cut     = i2.mpNode;
+			ListNodeBase* i2CutLast = ix.mpNode->mpPrev;
+			result = i2;
+			end1   = i2 = ix;
+			ListNodeBase::remove_range(i2Cut, i2CutLast);
+			i1.mpNode->insert_range(i2Cut, i2CutLast);
+		}
+		else
+		{
+			result = i1;
+			end1   = i2;
+		}
+
+		// Merge the two segments. We do this by merging the second sub-segment into the first, by walking forward in each of the two sub-segments.
+		for(++i1; (i1 != end1) && (i2 != end2); ++i1) // while still working on either segment...
+		{
+			if(compare(*i2, *i1)) // If i2 is less than i1 and it needs to be merged in front of i1...
+			{
+				// Find the position to insert the i2 list into the i1 list. 
+				iterator ix = i2.next();
+				while((ix != end2) && compare(*ix, *i1))
+					++ix;
+
+				// Cut this section of the i2 sub-segment out and merge into the appropriate place in the i1 list.
+				ListNodeBase* i2Cut     = i2.mpNode;
+				ListNodeBase* i2CutLast = ix.mpNode->mpPrev;
+				if(end1 == i2)
+					end1 = ix;
+				i2 = ix;
+				ListNodeBase::remove_range(i2Cut, i2CutLast);
+				i1.mpNode->insert_range(i2Cut, i2CutLast);
+			}
+		}
+
+		return result;
+	}
+
+
+	template <typename T, typename Allocator>
+	template<typename... Args>
+	inline typename list<T, Allocator>::node_type*
+	list<T, Allocator>::DoCreateNode(Args&&... args)
+	{
+		node_type* const pNode = DoAllocateNode();  // pNode is of type node_type, but it's uninitialized memory.
+
+		#if EASTL_EXCEPTIONS_ENABLED
+			try
+			{
+				::new((void*)&pNode->mValue) value_type(eastl::forward<Args>(args)...);
+			}
+			catch(...)
+			{
+				DoFreeNode(pNode);
+				throw;
+			}
+		#else
+			::new((void*)&pNode->mValue) value_type(eastl::forward<Args>(args)...);
+		#endif
+
+		return pNode;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename list<T, Allocator>::node_type*
+	list<T, Allocator>::DoCreateNode()
+	{
+		node_type* const pNode = DoAllocateNode();
+
+		#if EASTL_EXCEPTIONS_ENABLED
+			try
+			{
+				::new((void*)&pNode->mValue) value_type();
+			}
+			catch(...)
+			{
+				DoFreeNode(pNode);
+				throw;
+			}
+		#else
+			::new((void*)&pNode->mValue) value_type;
+		#endif
+
+		return pNode;
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename Integer>
+	inline void list<T, Allocator>::DoAssign(Integer n, Integer value, true_type)
+	{
+		DoAssignValues(static_cast<size_type>(n), static_cast<value_type>(value));
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename InputIterator>
+	void list<T, Allocator>::DoAssign(InputIterator first, InputIterator last, false_type)
+	{
+		node_type* pNode = static_cast<node_type*>(internalNode().mpNext);
+
+		for(; (pNode != &internalNode()) && (first != last); ++first)
+		{
+			pNode->mValue = *first;
+			pNode         = static_cast<node_type*>(pNode->mpNext);
+		}
+
+		if(first == last)
+			erase(const_iterator((ListNodeBase*)pNode), (ListNodeBase*)&internalNode());
+		else
+			DoInsert((ListNodeBase*)&internalNode(), first, last, false_type());
+	}
+
+
+	template <typename T, typename Allocator>
+	void list<T, Allocator>::DoAssignValues(size_type n, const value_type& value)
+	{
+		node_type* pNode  = static_cast<node_type*>(internalNode().mpNext);
+
+		for(; (pNode != &internalNode()) && (n > 0); --n)
+		{
+			pNode->mValue = value;
+			pNode         = static_cast<node_type*>(pNode->mpNext);
+		}
+
+		if(n)
+			DoInsertValues((ListNodeBase*)&internalNode(), n, value);
+		else
+			erase(const_iterator((ListNodeBase*)pNode), (ListNodeBase*)&internalNode());
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename Integer>
+	inline void list<T, Allocator>::DoInsert(ListNodeBase* pNode, Integer n, Integer value, true_type)
+	{
+		DoInsertValues(pNode, static_cast<size_type>(n), static_cast<value_type>(value));
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename InputIterator>
+	inline void list<T, Allocator>::DoInsert(ListNodeBase* pNode, InputIterator first, InputIterator last, false_type)
+	{
+		for(; first != last; ++first)
+			DoInsertValue(pNode, *first);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void list<T, Allocator>::DoInsertValues(ListNodeBase* pNode, size_type n, const value_type& value)
+	{
+		for(; n > 0; --n)
+			DoInsertValue(pNode, value);
+	}
+
+
+	template <typename T, typename Allocator>
+	template<typename... Args>
+	inline void list<T, Allocator>::DoInsertValue(ListNodeBase* pNode, Args&&... args)
+	{
+		node_type* const pNodeNew = DoCreateNode(eastl::forward<Args>(args)...);
+		((ListNodeBase*)pNodeNew)->insert(pNode);
+		#if EASTL_LIST_SIZE_CACHE
+			++mSize;
+		#endif
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void list<T, Allocator>::DoErase(ListNodeBase* pNode)
+	{
+		pNode->remove();
+		((node_type*)pNode)->~node_type();
+		DoFreeNode(((node_type*)pNode));
+		#if EASTL_LIST_SIZE_CACHE
+			--mSize;
+		#endif
+
+		/* Test version that uses union intermediates
+		union
+		{
+			ListNodeBase* mpBase;
+			node_type*    mpNode;
+		} node = { pNode };
+
+		node.mpNode->~node_type();
+		node.mpBase->remove();
+		DoFreeNode(node.mpNode);
+		#if EASTL_LIST_SIZE_CACHE
+			--mSize;
+		#endif
+		*/
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void list<T, Allocator>::DoSwap(this_type& x)
+	{
+		ListNodeBase::swap((ListNodeBase&)internalNode(), (ListNodeBase&)x.internalNode()); // We need to implement a special swap because we can't do a shallow swap.
+		eastl::swap(internalAllocator(), x.internalAllocator()); // We do this even if EASTL_ALLOCATOR_COPY_ENABLED is 0.
+		#if EASTL_LIST_SIZE_CACHE
+			eastl::swap(mSize, x.mSize);
+		#endif
+	}
+
+
+	template <typename T, typename Allocator>
+	inline bool list<T, Allocator>::validate() const
+	{
+		#if EASTL_LIST_SIZE_CACHE
+			size_type n = 0;
+
+			for(const_iterator i(begin()), iEnd(end()); i != iEnd; ++i)
+				++n;
+
+			if(n != mSize)
+				return false;
+		#endif
+
+		// To do: More validation.
+		return true;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline int list<T, Allocator>::validate_iterator(const_iterator i) const
+	{
+		// To do: Come up with a more efficient mechanism of doing this.
+
+		for(const_iterator temp = begin(), tempEnd = end(); temp != tempEnd; ++temp)
+		{
+			if(temp == i)
+				return (isf_valid | isf_current | isf_can_dereference);
+		}
+
+		if(i == end())
+			return (isf_valid | isf_current); 
+
+		return isf_none;
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// global operators
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T, typename Allocator>
+	bool operator==(const list<T, Allocator>& a, const list<T, Allocator>& b)
+	{
+		typename list<T, Allocator>::const_iterator ia   = a.begin();
+		typename list<T, Allocator>::const_iterator ib   = b.begin();
+		typename list<T, Allocator>::const_iterator enda = a.end();
+
+		#if EASTL_LIST_SIZE_CACHE
+			if(a.size() == b.size())
+			{
+				while((ia != enda) && (*ia == *ib))
+				{
+					++ia;
+					++ib;
+				}
+				return (ia == enda);
+			}
+			return false;
+		#else
+			typename list<T, Allocator>::const_iterator endb = b.end();
+
+			while((ia != enda) && (ib != endb) && (*ia == *ib))
+			{
+				++ia;
+				++ib;
+			}
+			return (ia == enda) && (ib == endb);
+		#endif
+	}
+
+	template <typename T, typename Allocator>
+	bool operator<(const list<T, Allocator>& a, const list<T, Allocator>& b)
+	{
+		return eastl::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end());
+	}
+
+	template <typename T, typename Allocator>
+	bool operator!=(const list<T, Allocator>& a, const list<T, Allocator>& b)
+	{
+		return !(a == b);
+	}
+
+	template <typename T, typename Allocator>
+	bool operator>(const list<T, Allocator>& a, const list<T, Allocator>& b)
+	{
+		return b < a;
+	}
+
+	template <typename T, typename Allocator>
+	bool operator<=(const list<T, Allocator>& a, const list<T, Allocator>& b)
+	{
+		return !(b < a);
+	}
+
+	template <typename T, typename Allocator>
+	bool operator>=(const list<T, Allocator>& a, const list<T, Allocator>& b)
+	{
+		return !(a < b);
+	}
+
+	template <typename T, typename Allocator>
+	void swap(list<T, Allocator>& a, list<T, Allocator>& b)
+	{
+		a.swap(b);
+	}
+
+
+	///////////////////////////////////////////////////////////////////////
+	// erase / erase_if
+	//
+	// https://en.cppreference.com/w/cpp/container/list/erase2
+	///////////////////////////////////////////////////////////////////////
+	template <class T, class Allocator, class U>
+	void erase(list<T, Allocator>& c, const U& value)
+	{
+		// Erases all elements that compare equal to value from the container.
+		c.remove_if([&](auto& elem) { return elem == value; });
+	}
+
+	template <class T, class Allocator, class Predicate>
+	void erase_if(list<T, Allocator>& c, Predicate predicate)
+	{
+		// Erases all elements that satisfy the predicate pred from the container.
+		c.remove_if(predicate);
+	}
+
+
+} // namespace eastl
+
+
+EA_RESTORE_SN_WARNING()
+
+EA_RESTORE_VC_WARNING();
+
+
+#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/map.h b/libkram/eastl/include/EASTL/map.h
new file mode 100644
index 00000000..0e6c1d0f
--- /dev/null
+++ b/libkram/eastl/include/EASTL/map.h
@@ -0,0 +1,684 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+//////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_MAP_H
+#define EASTL_MAP_H
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/internal/red_black_tree.h>
+#include <EASTL/functional.h>
+#include <EASTL/utility.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+
+	/// EASTL_MAP_DEFAULT_NAME
+	///
+	/// Defines a default container name in the absence of a user-provided name.
+	///
+	#ifndef EASTL_MAP_DEFAULT_NAME
+		#define EASTL_MAP_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " map" // Unless the user overrides something, this is "EASTL map".
+	#endif
+
+
+	/// EASTL_MULTIMAP_DEFAULT_NAME
+	///
+	/// Defines a default container name in the absence of a user-provided name.
+	///
+	#ifndef EASTL_MULTIMAP_DEFAULT_NAME
+		#define EASTL_MULTIMAP_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " multimap" // Unless the user overrides something, this is "EASTL multimap".
+	#endif
+
+
+	/// EASTL_MAP_DEFAULT_ALLOCATOR
+	///
+	#ifndef EASTL_MAP_DEFAULT_ALLOCATOR
+		#define EASTL_MAP_DEFAULT_ALLOCATOR allocator_type(EASTL_MAP_DEFAULT_NAME)
+	#endif
+
+	/// EASTL_MULTIMAP_DEFAULT_ALLOCATOR
+	///
+	#ifndef EASTL_MULTIMAP_DEFAULT_ALLOCATOR
+		#define EASTL_MULTIMAP_DEFAULT_ALLOCATOR allocator_type(EASTL_MULTIMAP_DEFAULT_NAME)
+	#endif
+
+
+
+	/// map
+	///
+	/// Implements a canonical map. 
+	///
+	/// The large majority of the implementation of this class is found in the rbtree
+	/// base class. We control the behaviour of rbtree via template parameters.
+	///
+	/// Pool allocation
+	/// If you want to make a custom memory pool for a map container, your pool 
+	/// needs to contain items of type map::node_type. So if you have a memory
+	/// pool that has a constructor that takes the size of pool items and the
+	/// count of pool items, you would do this (assuming that MemoryPool implements
+	/// the Allocator interface):
+	///     typedef map<Widget, int, less<Widget>, MemoryPool> WidgetMap;  // Delare your WidgetMap type.
+	///     MemoryPool myPool(sizeof(WidgetMap::node_type), 100);          // Make a pool of 100 Widget nodes.
+	///     WidgetMap myMap(&myPool);                                      // Create a map that uses the pool.
+	///
+	template <typename Key, typename T, typename Compare = eastl::less<Key>, typename Allocator = EASTLAllocatorType>
+	class map
+		: public rbtree<Key, eastl::pair<const Key, T>, Compare, Allocator, eastl::use_first<eastl::pair<const Key, T> >, true, true>
+	{
+	public:
+		typedef rbtree<Key, eastl::pair<const Key, T>, Compare, Allocator,
+						eastl::use_first<eastl::pair<const Key, T> >, true, true>   base_type;
+		typedef map<Key, T, Compare, Allocator>                                     this_type;
+		typedef typename base_type::size_type                                       size_type;
+		typedef typename base_type::key_type                                        key_type;
+		typedef T                                                                   mapped_type;
+		typedef typename base_type::value_type                                      value_type;
+		typedef typename base_type::node_type                                       node_type;
+		typedef typename base_type::iterator                                        iterator;
+		typedef typename base_type::const_iterator                                  const_iterator;
+		typedef typename base_type::allocator_type                                  allocator_type;
+		typedef typename base_type::insert_return_type                              insert_return_type;
+		typedef typename base_type::extract_key                                     extract_key;
+		// Other types are inherited from the base class.
+
+		using base_type::begin;
+		using base_type::end;
+		using base_type::find;
+		using base_type::lower_bound;
+		using base_type::upper_bound;
+		using base_type::insert;
+		using base_type::erase;
+
+	protected:
+		using base_type::compare;
+		using base_type::get_compare;
+
+	public:
+		class value_compare 
+		{
+		protected:
+			friend class map;
+			Compare compare;
+			value_compare(Compare c) : compare(c) {}
+
+		public:
+			typedef bool       result_type;
+			typedef value_type first_argument_type;
+			typedef value_type second_argument_type;
+
+			bool operator()(const value_type& x, const value_type& y) const 
+				{ return compare(x.first, y.first); }
+		};
+
+	public:
+		map(const allocator_type& allocator = EASTL_MAP_DEFAULT_ALLOCATOR);
+		map(const Compare& compare, const allocator_type& allocator = EASTL_MAP_DEFAULT_ALLOCATOR);
+		map(const this_type& x);
+		map(this_type&& x);
+		map(this_type&& x, const allocator_type& allocator);
+		map(std::initializer_list<value_type> ilist, const Compare& compare = Compare(), const allocator_type& allocator = EASTL_MAP_DEFAULT_ALLOCATOR);
+
+		template <typename Iterator>
+		map(Iterator itBegin, Iterator itEnd); // allocator arg removed because VC7.1 fails on the default arg. To consider: Make a second version of this function without a default arg.
+
+		this_type& operator=(const this_type& x) { return (this_type&)base_type::operator=(x); }
+		this_type& operator=(std::initializer_list<value_type> ilist) { return (this_type&)base_type::operator=(ilist); }
+		this_type& operator=(this_type&& x) { return (this_type&)base_type::operator=(eastl::move(x)); }
+
+	public:
+		/// This is an extension to the C++ standard. We insert a default-constructed 
+		/// element with the given key. The reason for this is that we can avoid the 
+		/// potentially expensive operation of creating and/or copying a mapped_type
+		/// object on the stack. Note that C++11 move insertions and variadic emplace
+		/// support make this extension mostly no longer necessary.
+		insert_return_type insert(const Key& key);
+
+		value_compare value_comp() const;
+
+		size_type erase(const Key& key);
+		size_type count(const Key& key) const;
+
+		eastl::pair<iterator, iterator>             equal_range(const Key& key);
+		eastl::pair<const_iterator, const_iterator> equal_range(const Key& key) const;
+
+		T& operator[](const Key& key); // Of map, multimap, set, and multimap, only map has operator[].
+		T& operator[](Key&& key); 
+
+		T& at(const Key& key);
+		const T& at(const Key& key) const;
+
+	}; // map
+
+
+
+
+
+
+	/// multimap
+	///
+	/// Implements a canonical multimap.
+	///
+	/// The large majority of the implementation of this class is found in the rbtree
+	/// base class. We control the behaviour of rbtree via template parameters.
+	///
+	/// Pool allocation
+	/// If you want to make a custom memory pool for a multimap container, your pool 
+	/// needs to contain items of type multimap::node_type. So if you have a memory
+	/// pool that has a constructor that takes the size of pool items and the
+	/// count of pool items, you would do this (assuming that MemoryPool implements
+	/// the Allocator interface):
+	///     typedef multimap<Widget, int, less<Widget>, MemoryPool> WidgetMap;  // Delare your WidgetMap type.
+	///     MemoryPool myPool(sizeof(WidgetMap::node_type), 100);               // Make a pool of 100 Widget nodes.
+	///     WidgetMap myMap(&myPool);                                           // Create a map that uses the pool.
+	///
+	template <typename Key, typename T, typename Compare = eastl::less<Key>, typename Allocator = EASTLAllocatorType>
+	class multimap
+		: public rbtree<Key, eastl::pair<const Key, T>, Compare, Allocator, eastl::use_first<eastl::pair<const Key, T> >, true, false>
+	{
+	public:
+		typedef rbtree<Key, eastl::pair<const Key, T>, Compare, Allocator, 
+						eastl::use_first<eastl::pair<const Key, T> >, true, false>  base_type;
+		typedef multimap<Key, T, Compare, Allocator>                                this_type;
+		typedef typename base_type::size_type                                       size_type;
+		typedef typename base_type::key_type                                        key_type;
+		typedef T                                                                   mapped_type;
+		typedef typename base_type::value_type                                      value_type;
+		typedef typename base_type::node_type                                       node_type;
+		typedef typename base_type::iterator                                        iterator;
+		typedef typename base_type::const_iterator                                  const_iterator;
+		typedef typename base_type::allocator_type                                  allocator_type;
+		typedef typename base_type::insert_return_type                              insert_return_type;
+		typedef typename base_type::extract_key                                     extract_key;
+		// Other types are inherited from the base class.
+
+		using base_type::begin;
+		using base_type::end;
+		using base_type::find;
+		using base_type::lower_bound;
+		using base_type::upper_bound;
+		using base_type::insert;
+		using base_type::erase;
+
+	protected:
+		using base_type::compare;
+		using base_type::get_compare;
+
+	public:
+		class value_compare 
+		{
+		protected:
+			friend class multimap;
+			Compare compare;
+			value_compare(Compare c) : compare(c) {}
+
+		public:
+			typedef bool       result_type;
+			typedef value_type first_argument_type;
+			typedef value_type second_argument_type;
+
+			bool operator()(const value_type& x, const value_type& y) const 
+				{ return compare(x.first, y.first); }
+		};
+
+	public:
+		multimap(const allocator_type& allocator = EASTL_MULTIMAP_DEFAULT_ALLOCATOR);
+		multimap(const Compare& compare, const allocator_type& allocator = EASTL_MULTIMAP_DEFAULT_ALLOCATOR);
+		multimap(const this_type& x);
+		multimap(this_type&& x);
+		multimap(this_type&& x, const allocator_type& allocator);
+		multimap(std::initializer_list<value_type> ilist, const Compare& compare = Compare(), const allocator_type& allocator = EASTL_MULTIMAP_DEFAULT_ALLOCATOR);
+
+		template <typename Iterator>
+		multimap(Iterator itBegin, Iterator itEnd); // allocator arg removed because VC7.1 fails on the default arg. To consider: Make a second version of this function without a default arg.
+
+		this_type& operator=(const this_type& x) { return (this_type&)base_type::operator=(x); }
+		this_type& operator=(std::initializer_list<value_type> ilist) { return (this_type&)base_type::operator=(ilist); }
+		this_type& operator=(this_type&& x) { return (this_type&)base_type::operator=(eastl::move(x)); }
+
+	public:
+		/// This is an extension to the C++ standard. We insert a default-constructed 
+		/// element with the given key. The reason for this is that we can avoid the 
+		/// potentially expensive operation of creating and/or copying a mapped_type
+		/// object on the stack. Note that C++11 move insertions and variadic emplace
+		/// support make this extension mostly no longer necessary.
+		insert_return_type insert(const Key& key);
+
+		value_compare value_comp() const;
+
+		size_type erase(const Key& key);
+		size_type count(const Key& key) const;
+
+		eastl::pair<iterator, iterator>             equal_range(const Key& key);
+		eastl::pair<const_iterator, const_iterator> equal_range(const Key& key) const;
+
+		/// equal_range_small
+		/// This is a special version of equal_range which is optimized for the 
+		/// case of there being few or no duplicated keys in the tree.
+		eastl::pair<iterator, iterator>             equal_range_small(const Key& key);
+		eastl::pair<const_iterator, const_iterator> equal_range_small(const Key& key) const;
+
+	private:
+		// these base member functions are not included in multimaps
+		using base_type::try_emplace;
+		using base_type::insert_or_assign;
+	}; // multimap
+
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// map
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline map<Key, T, Compare, Allocator>::map(const allocator_type& allocator)
+		: base_type(allocator)
+	{
+	}
+
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline map<Key, T, Compare, Allocator>::map(const Compare& compare, const allocator_type& allocator)
+		: base_type(compare, allocator)
+	{
+	}
+
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline map<Key, T, Compare, Allocator>::map(const this_type& x)
+		: base_type(x)
+	{
+	}
+
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline map<Key, T, Compare, Allocator>::map(this_type&& x)
+		: base_type(eastl::move(x))
+	{
+	}
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline map<Key, T, Compare, Allocator>::map(this_type&& x, const allocator_type& allocator)
+		: base_type(eastl::move(x), allocator)
+	{
+	}
+
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline map<Key, T, Compare, Allocator>::map(std::initializer_list<value_type> ilist, const Compare& compare, const allocator_type& allocator)
+		: base_type(ilist.begin(), ilist.end(), compare, allocator)
+	{
+	}
+
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	template <typename Iterator>
+	inline map<Key, T, Compare, Allocator>::map(Iterator itBegin, Iterator itEnd)
+		: base_type(itBegin, itEnd, Compare(), EASTL_MAP_DEFAULT_ALLOCATOR)
+	{
+	}
+
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline typename map<Key, T, Compare, Allocator>::insert_return_type
+	map<Key, T, Compare, Allocator>::insert(const Key& key)
+	{
+		return base_type::DoInsertKey(true_type(), key);
+	}
+
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline typename map<Key, T, Compare, Allocator>::value_compare 
+	map<Key, T, Compare, Allocator>::value_comp() const
+	{
+		return value_compare(get_compare());
+	}
+
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline typename map<Key, T, Compare, Allocator>::size_type
+	map<Key, T, Compare, Allocator>::erase(const Key& key)
+	{
+		const iterator it(find(key));
+
+		if(it != end()) // If it exists...
+		{
+			base_type::erase(it);
+			return 1;
+		}
+		return 0;
+	}
+
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline typename map<Key, T, Compare, Allocator>::size_type
+	map<Key, T, Compare, Allocator>::count(const Key& key) const
+	{
+		const const_iterator it(find(key));
+		return (it != end()) ? 1 : 0;
+	}
+
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline eastl::pair<typename map<Key, T, Compare, Allocator>::iterator,
+					   typename map<Key, T, Compare, Allocator>::iterator>
+	map<Key, T, Compare, Allocator>::equal_range(const Key& key)
+	{
+		// The resulting range will either be empty or have one element,
+		// so instead of doing two tree searches (one for lower_bound and 
+		// one for upper_bound), we do just lower_bound and see if the 
+		// result is a range of size zero or one.
+		const iterator itLower(lower_bound(key));
+
+		if((itLower == end()) || compare(key, itLower.mpNode->mValue.first)) // If at the end or if (key is < itLower)...
+			return eastl::pair<iterator, iterator>(itLower, itLower);
+
+		iterator itUpper(itLower);
+		return eastl::pair<iterator, iterator>(itLower, ++itUpper);
+	}
+	
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline eastl::pair<typename map<Key, T, Compare, Allocator>::const_iterator, 
+					   typename map<Key, T, Compare, Allocator>::const_iterator>
+	map<Key, T, Compare, Allocator>::equal_range(const Key& key) const
+	{
+		// See equal_range above for comments.
+		const const_iterator itLower(lower_bound(key));
+
+		if((itLower == end()) || compare(key, itLower.mpNode->mValue.first)) // If at the end or if (key is < itLower)...
+			return eastl::pair<const_iterator, const_iterator>(itLower, itLower);
+
+		const_iterator itUpper(itLower);
+		return eastl::pair<const_iterator, const_iterator>(itLower, ++itUpper);
+	}
+
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline T& map<Key, T, Compare, Allocator>::operator[](const Key& key)
+	{
+		iterator itLower(lower_bound(key)); // itLower->first is >= key.
+
+		if((itLower == end()) || compare(key, (*itLower).first))
+		{
+			itLower = base_type::DoInsertKey(true_type(), itLower, key);
+		}
+
+		return (*itLower).second;
+
+		// Reference implementation of this function, which may not be as fast:
+		//iterator it(base_type::insert(eastl::pair<iterator, iterator>(key, T())).first);
+		//return it->second;
+	}
+
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline T& map<Key, T, Compare, Allocator>::operator[](Key&& key)
+	{
+		iterator itLower(lower_bound(key)); // itLower->first is >= key.
+
+		if((itLower == end()) || compare(key, (*itLower).first))
+		{
+			itLower = base_type::DoInsertKey(true_type(), itLower, eastl::move(key));
+		}
+
+		return (*itLower).second;
+
+		// Reference implementation of this function, which may not be as fast:
+		//iterator it(base_type::insert(eastl::pair<iterator, iterator>(key, T())).first);
+		//return it->second;
+	}
+
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline T& map<Key, T, Compare, Allocator>::at(const Key& key)
+	{
+		iterator itLower(lower_bound(key)); // itLower->first is >= key.
+
+		if(itLower == end())
+		{
+			#if EASTL_EXCEPTIONS_ENABLED
+				throw std::out_of_range("map::at key does not exist");
+			#else
+				EASTL_FAIL_MSG("map::at key does not exist");
+			#endif
+		}
+
+		return (*itLower).second;
+	}
+
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline const T& map<Key, T, Compare, Allocator>::at(const Key& key) const
+	{
+		const_iterator itLower(lower_bound(key)); // itLower->first is >= key.
+
+		if(itLower == end())
+		{
+			#if EASTL_EXCEPTIONS_ENABLED
+				throw std::out_of_range("map::at key does not exist");
+			#else
+				EASTL_FAIL_MSG("map::at key does not exist");
+			#endif
+		}
+
+		return (*itLower).second;
+	}
+
+
+	///////////////////////////////////////////////////////////////////////
+	// erase_if
+	//
+	// https://en.cppreference.com/w/cpp/container/map/erase_if
+	///////////////////////////////////////////////////////////////////////
+	template <class Key, class T, class Compare, class Allocator, class Predicate>
+	void erase_if(map<Key, T, Compare, Allocator>& c, Predicate predicate)
+	{
+		for (auto i = c.begin(), last = c.end(); i != last;)
+		{
+			if (predicate(*i))
+			{
+				i = c.erase(i);
+			}
+			else
+			{
+				++i;
+			}
+		}
+	}
+
+
+	///////////////////////////////////////////////////////////////////////
+	// multimap
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline multimap<Key, T, Compare, Allocator>::multimap(const allocator_type& allocator)
+		: base_type(allocator)
+	{
+	}
+
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline multimap<Key, T, Compare, Allocator>::multimap(const Compare& compare, const allocator_type& allocator)
+		: base_type(compare, allocator)
+	{
+	}
+
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline multimap<Key, T, Compare, Allocator>::multimap(const this_type& x)
+		: base_type(x)
+	{
+	}
+
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline multimap<Key, T, Compare, Allocator>::multimap(this_type&& x)
+		: base_type(eastl::move(x))
+	{
+	}
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline multimap<Key, T, Compare, Allocator>::multimap(this_type&& x, const allocator_type& allocator)
+		: base_type(eastl::move(x), allocator)
+	{
+	}
+
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline multimap<Key, T, Compare, Allocator>::multimap(std::initializer_list<value_type> ilist, const Compare& compare, const allocator_type& allocator)
+		: base_type(ilist.begin(), ilist.end(), compare, allocator)
+	{
+	}
+
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	template <typename Iterator>
+	inline multimap<Key, T, Compare, Allocator>::multimap(Iterator itBegin, Iterator itEnd)
+		: base_type(itBegin, itEnd, Compare(), EASTL_MULTIMAP_DEFAULT_ALLOCATOR)
+	{
+	}
+
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline typename multimap<Key, T, Compare, Allocator>::insert_return_type
+	multimap<Key, T, Compare, Allocator>::insert(const Key& key)
+	{
+		return base_type::DoInsertKey(false_type(), key);
+	}
+
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline typename multimap<Key, T, Compare, Allocator>::value_compare 
+	multimap<Key, T, Compare, Allocator>::value_comp() const
+	{
+		return value_compare(get_compare());
+	}
+
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline typename multimap<Key, T, Compare, Allocator>::size_type
+	multimap<Key, T, Compare, Allocator>::erase(const Key& key)
+	{
+		const eastl::pair<iterator, iterator> range(equal_range(key));
+		const size_type n = (size_type)eastl::distance(range.first, range.second);
+		base_type::erase(range.first, range.second);
+		return n;
+	}
+
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline typename multimap<Key, T, Compare, Allocator>::size_type
+	multimap<Key, T, Compare, Allocator>::count(const Key& key) const
+	{
+		const eastl::pair<const_iterator, const_iterator> range(equal_range(key));
+		return (size_type)eastl::distance(range.first, range.second);
+	}
+
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline eastl::pair<typename multimap<Key, T, Compare, Allocator>::iterator,
+					   typename multimap<Key, T, Compare, Allocator>::iterator>
+	multimap<Key, T, Compare, Allocator>::equal_range(const Key& key)
+	{
+		// There are multiple ways to implement equal_range. The implementation mentioned
+		// in the C++ standard and which is used by most (all?) commercial STL implementations
+		// is this:
+		//    return eastl::pair<iterator, iterator>(lower_bound(key), upper_bound(key));
+		//
+		// This does two tree searches -- one for the lower bound and one for the 
+		// upper bound. This works well for the case whereby you have a large container
+		// and there are lots of duplicated values. We provide an alternative version
+		// of equal_range called equal_range_small for cases where the user is confident
+		// that the number of duplicated items is only a few.
+
+		return eastl::pair<iterator, iterator>(lower_bound(key), upper_bound(key));
+	}
+
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline eastl::pair<typename multimap<Key, T, Compare, Allocator>::const_iterator, 
+					   typename multimap<Key, T, Compare, Allocator>::const_iterator>
+	multimap<Key, T, Compare, Allocator>::equal_range(const Key& key) const
+	{
+		// See comments above in the non-const version of equal_range.
+		return eastl::pair<const_iterator, const_iterator>(lower_bound(key), upper_bound(key));
+	}
+
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline eastl::pair<typename multimap<Key, T, Compare, Allocator>::iterator,
+					   typename multimap<Key, T, Compare, Allocator>::iterator>
+	multimap<Key, T, Compare, Allocator>::equal_range_small(const Key& key)
+	{
+		// We provide alternative version of equal_range here which works faster
+		// for the case where there are at most small number of potential duplicated keys.
+		const iterator itLower(lower_bound(key));
+		iterator       itUpper(itLower);
+
+		while((itUpper != end()) && !compare(key, itUpper.mpNode->mValue.first))
+			++itUpper;
+
+		return eastl::pair<iterator, iterator>(itLower, itUpper);
+	}
+
+
+	template <typename Key, typename T, typename Compare, typename Allocator>
+	inline eastl::pair<typename multimap<Key, T, Compare, Allocator>::const_iterator, 
+					   typename multimap<Key, T, Compare, Allocator>::const_iterator>
+	multimap<Key, T, Compare, Allocator>::equal_range_small(const Key& key) const
+	{
+		// We provide alternative version of equal_range here which works faster
+		// for the case where there are at most small number of potential duplicated keys.
+		const const_iterator itLower(lower_bound(key));
+		const_iterator       itUpper(itLower);
+
+		while((itUpper != end()) && !compare(key, itUpper.mpNode->mValue.first))
+			++itUpper;
+
+		return eastl::pair<const_iterator, const_iterator>(itLower, itUpper);
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// erase_if
+	//
+	// https://en.cppreference.com/w/cpp/container/multimap/erase_if
+	///////////////////////////////////////////////////////////////////////
+	template <class Key, class T, class Compare, class Allocator, class Predicate>
+	void erase_if(multimap<Key, T, Compare, Allocator>& c, Predicate predicate)
+	{
+		// Erases all elements that satisfy the predicate pred from the container.
+		for (auto i = c.begin(), last = c.end(); i != last;)
+		{
+			if (predicate(*i))
+			{
+				i = c.erase(i);
+			}
+			else
+			{
+				++i;
+			}
+		}
+	}
+
+} // namespace eastl
+
+
+#endif // Header include guard
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/memory.h b/libkram/eastl/include/EASTL/memory.h
new file mode 100644
index 00000000..cf24b41a
--- /dev/null
+++ b/libkram/eastl/include/EASTL/memory.h
@@ -0,0 +1,1685 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// This file implements the following functions from the C++ standard that 
+// are found in the <memory> header:
+//
+// Temporary memory:
+//    get_temporary_buffer
+//    return_temporary_buffer
+//
+// Utility:
+//    late_constructed                  - Extention to standard functionality.
+//
+// Uninitialized operations:
+//    These are the same as the copy, fill, and fill_n algorithms, except that 
+//    they *construct* the destination with the source values rather than assign
+//    the destination with the source values. 
+//
+//    uninitialized_copy
+//    uninitialized_copy_n
+//    uninitialized_default_construct
+//    uninitialized_default_construct_n
+//    uninitialized_move
+//    uninitialized_move_if_noexcept    - Extention to standard functionality.
+//    uninitialized_move_n
+//    uninitialized_fill
+//    uninitialized_fill_n
+//    uninitialized_value_construct
+//    uninitialized_value_construct_n
+//    uninitialized_default_fill        - Extention to standard functionality.
+//    uninitialized_default_fill_n      - Extention to standard functionality.
+//    uninitialized_relocate            - Extention to standard functionality.
+//    uninitialized_copy_ptr            - Extention to standard functionality.
+//    uninitialized_move_ptr            - Extention to standard functionality.
+//    uninitialized_move_ptr_if_noexcept- Extention to standard functionality.
+//    uninitialized_fill_ptr            - Extention to standard functionality.
+//    uninitialized_fill_n_ptr          - Extention to standard functionality.
+//    uninitialized_copy_fill           - Extention to standard functionality.
+//    uninitialized_fill_copy           - Extention to standard functionality.
+//    uninitialized_copy_copy           - Extention to standard functionality.
+//
+// In-place destructor helpers:
+//    destruct(T*)                      - Non-standard extension.
+//    destruct(first, last)             - Non-standard extension.
+//    destroy_at(T*)
+//    destroy(first, last)
+//    destroy_n(first, n)
+//
+// Alignment
+//    align
+//    align_advance                     - Extention to standard functionality.
+//
+// Allocator-related
+//    uses_allocator
+//    allocator_arg_t
+//    allocator_arg
+//
+// Pointers
+//    pointer_traits
+//
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_MEMORY_H
+#define EASTL_MEMORY_H
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/internal/memory_base.h>
+#include <EASTL/internal/generic_iterator.h>
+#include <EASTL/internal/pair_fwd_decls.h>
+#include <EASTL/internal/functional_base.h>
+#include <EASTL/algorithm.h>
+#include <EASTL/type_traits.h>
+#include <EASTL/allocator.h>
+#include <EASTL/iterator.h>
+#include <EASTL/utility.h>
+#include <EASTL/numeric_limits.h>
+
+EA_DISABLE_ALL_VC_WARNINGS()
+#include <stdlib.h>
+#include <new>
+EA_RESTORE_ALL_VC_WARNINGS()
+
+
+// 4530 - C++ exception handler used, but unwind semantics are not enabled. Specify /EHsc
+// 4146 - unary minus operator applied to unsigned type, result still unsigned
+// 4571 - catch(...) semantics changed since Visual C++ 7.1; structured exceptions (SEH) are no longer caught.
+EA_DISABLE_VC_WARNING(4530 4146 4571);
+
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+namespace eastl
+{
+
+	/// EASTL_TEMP_DEFAULT_NAME
+	///
+	/// Defines a default container name in the absence of a user-provided name.
+	///
+	#ifndef EASTL_TEMP_DEFAULT_NAME
+		#define EASTL_TEMP_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " temp" // Unless the user overrides something, this is "EASTL temp".
+	#endif
+
+
+	/// get_temporary_buffer
+	///
+	/// From the C++ standard, section 20.4.3:
+	///    1 Effects: Obtains a pointer to storage sufficient to store up to n adjacent T objects.
+	///    2 Returns: A pair containing the buffer's address and capacity (in the units of sizeof(T)), 
+	///               or a pair of 0 values if no storage can be obtained.
+	///
+	/// Note: The return value is space to hold T elements, but no T elements are constructed.
+	///
+	/// Our implementation here differs slightly in that we have alignment, alignmentOffset, and pName arguments.
+	/// Note that you can use the EASTL_NAME_VAL macro to make names go away in release builds.
+	///
+	/// Example usage:
+	///    pair<int*, ptrdiff_t> pr = get_temporary_buffer<int>(100, 0, 0, EASTL_NAME_VAL("Temp int array"));
+	///    memset(pr.first, 0, 100 * sizeof(int));
+	///    return_temporary_buffer(pr.first);
+	/// 
+	template <typename T>
+	eastl::pair<T*, ptrdiff_t> get_temporary_buffer(ptrdiff_t n, size_t alignment = 1, size_t alignmentOffset = 0, const char* pName = EASTL_TEMP_DEFAULT_NAME)
+	{
+		EASTLAllocatorType allocator(*EASTLAllocatorDefault(), pName);
+		return eastl::pair<T*, ptrdiff_t>(static_cast<T*>(EASTLAllocAligned(allocator, n * sizeof(T), alignment, alignmentOffset)), n);
+	}
+
+
+	/// return_temporary_buffer
+	///
+	/// From the C++ standard, section 20.4.3:
+	///    3 Effects:  Deallocates the buffer to which p points.
+	///    4 Requires: The buffer shall have been previously allocated by get_temporary_buffer.
+	///
+	/// Note: This function merely frees space and does not destruct any T elements.
+	///
+	/// Example usage:
+	///    pair<int*, ptrdiff_t> pr = get_temporary_buffer<int>(300);
+	///    memset(pr.first, 0, 300 * sizeof(int));
+	///    return_temporary_buffer(pr.first, pr.second);
+	/// 
+	template <typename T>
+	void return_temporary_buffer(T* p, ptrdiff_t n = 0)
+	{
+		EASTLAllocatorType& allocator(*EASTLAllocatorDefault());
+		EASTLFree(allocator, p, n * sizeof(T));
+	}
+
+
+
+	/// late_constructed
+	///
+	/// Implements a smart pointer type which separates the memory allocation of an object from 
+	/// the object's construction. The primary use case is to declare a global variable of the
+	/// late_construction type, which allows the memory to be global but the constructor executes
+	/// at some point after main() begins as opposed to before main, which is often dangerous
+	/// for non-trivial types.
+	///
+	/// The autoConstruct template parameter controls whether the object is automatically default
+	/// constructed upon first reference or must be manually constructed upon the first use of 
+	/// operator * or ->. autoConstruct is convenient but it causes * and -> to be slightly slower
+	/// and may result in construction at an inconvenient time. 
+	///
+	/// The autoDestruct template parameter controls whether the object, if constructed, is automatically
+	/// destructed when ~late_constructed() is called or must be manually destructed via a call to
+	/// destruct().
+	///
+	/// While construction can be automatic or manual, automatic destruction support is always present.
+	/// Thus you aren't required in any case to manually call destruct. However, you may safely manually
+	/// destruct the object at any time before the late_constructed destructor is executed. 
+	///
+	/// You may still use late_constructed after calling destruct(), including calling construct()
+	/// again to reconstruct the instance. destruct returns the late_constructed instance to a 
+	/// state equivalent to before construct was called.
+	///
+	/// Caveat: While late_constructed instances can be declared in global scope and initialize 
+	/// prior to main() executing, you cannot otherwise use such globally declared instances prior
+	/// to main with guaranteed behavior unless you can ensure that the late_constructed instance
+	/// is itself constructed prior to your use of it.
+	///
+	/// Example usage (demonstrating manual-construction):
+	///     late_constructed<Widget, false> gWidget;
+	///
+	///     void main(){
+	///         gWidget.construct(kScrollbarType, kVertical, "MyScrollbar");
+	///         gWidget->SetValue(15);
+	///         gWidget.destruct();
+	///     }
+	/// 
+	/// Example usage (demonstrating auto-construction):
+	///     late_constructed<Widget, true> gWidget;
+	///
+	///     void main(){
+	///         gWidget->SetValue(15);
+	///         // You may want to call destruct here, but aren't required to do so unless the Widget type requires it.
+	///     }
+	/// 
+	template <typename T, bool autoConstruct = true, bool autoDestruct = true>
+	class late_constructed
+	{
+	public:
+		using this_type    = late_constructed<T, autoConstruct, autoDestruct>;
+		using value_type   = T;
+		using storage_type = eastl::aligned_storage_t<sizeof(value_type), eastl::alignment_of_v<value_type>>;
+
+		late_constructed() EA_NOEXCEPT       // In the case of the late_constructed instance being at global scope, we rely on the 
+		  : mStorage(), mpValue(nullptr) {}  // compiler executing this constructor or placing the instance in auto-zeroed-at-startup memory.
+
+		~late_constructed()
+		{
+			if (autoDestruct && mpValue)
+				(*mpValue).~value_type();
+		}
+
+		template <typename... Args>
+		void construct(Args&&... args)
+		{
+			if(!mpValue)
+				mpValue = new (&mStorage) value_type(eastl::forward<Args>(args)...);
+		}
+
+		bool is_constructed() const EA_NOEXCEPT
+			{ return mpValue != nullptr; }
+
+		void destruct()
+		{
+			if(mpValue)
+			{
+				(*mpValue).~value_type();
+				mpValue = nullptr;
+			}
+		}
+
+		value_type& operator*() EA_NOEXCEPT
+		{
+			if(!mpValue)
+				construct();
+
+			EA_ANALYSIS_ASSUME(mpValue);
+			return *mpValue;
+		}
+
+		const value_type& operator*() const EA_NOEXCEPT
+		{
+			if(!mpValue)
+				construct();
+
+			EA_ANALYSIS_ASSUME(mpValue);
+			return *mpValue;
+		}
+
+		value_type* operator->() EA_NOEXCEPT
+		{
+			if(!mpValue)
+				construct();
+			return mpValue;
+		}
+
+		const value_type* operator->() const EA_NOEXCEPT
+		{
+			if(!mpValue)
+				construct();
+			return mpValue;
+		}
+
+		value_type* get() EA_NOEXCEPT
+		{
+			if(!mpValue)
+				construct(); 
+			return mpValue;
+		}
+
+		const value_type* get() const EA_NOEXCEPT
+		{
+			if(!mpValue)
+				construct(); 
+			return mpValue;
+		}
+
+	protected:
+		storage_type mStorage; // Declared first because it may have aligment requirements, and it would be more space-efficient if it was first.
+		value_type*  mpValue;
+	};
+
+
+	// Specialization that doesn't auto-construct on demand.
+	template <typename T, bool autoDestruct>
+	class late_constructed<T, false, autoDestruct> : public late_constructed<T, true, autoDestruct>
+	{
+	public:
+		typedef late_constructed<T, true, autoDestruct> base_type;
+
+		typename base_type::value_type& operator*() EA_NOEXCEPT
+			{ EASTL_ASSERT(base_type::mpValue); return *base_type::mpValue; }
+
+		const typename base_type::value_type& operator*() const EA_NOEXCEPT
+			{ EASTL_ASSERT(base_type::mpValue); return *base_type::mpValue; }
+
+		typename base_type::value_type* operator->() EA_NOEXCEPT
+			{ EASTL_ASSERT(base_type::mpValue); return base_type::mpValue; }
+
+		const typename base_type::value_type* operator->() const EA_NOEXCEPT
+			{ EASTL_ASSERT(base_type::mpValue); return base_type::mpValue; }
+
+		typename base_type::value_type* get() EA_NOEXCEPT
+			{ return base_type::mpValue; }
+
+		const typename base_type::value_type* get() const EA_NOEXCEPT
+			{ return base_type::mpValue; }
+	};
+
+
+
+	/// raw_storage_iterator
+	///
+	/// From the C++11 Standard, section 20.6.10 p1
+	/// raw_storage_iterator is provided to enable algorithms to store their results into uninitialized memory.
+	/// The formal template parameter OutputIterator is required to have its operator* return an object for
+	/// which operator& is defined and returns a pointer to T, and is also required to satisfy the requirements 
+	/// of an output iterator (24.2.4).
+
+	template <typename OutputIterator, typename T>
+	class raw_storage_iterator : public iterator<EASTL_ITC_NS::output_iterator_tag, void, void, void, void>
+	{
+	protected:
+		OutputIterator mIterator;
+
+	public:
+		explicit raw_storage_iterator(OutputIterator iterator)
+		  : mIterator(iterator) 
+		{
+		}
+
+		raw_storage_iterator& operator*() 
+		{
+			return *this;
+		}
+
+		raw_storage_iterator& operator=(const T& value)
+		{
+			::new(eastl::addressof(*mIterator)) T(value);
+			return *this;
+		}
+
+		raw_storage_iterator<OutputIterator, T>& operator++()
+		{
+			++mIterator;
+			return *this;
+		}
+
+		raw_storage_iterator<OutputIterator, T> operator++(int)
+		{
+			raw_storage_iterator<OutputIterator, T> tempIterator = *this;
+			++mIterator;
+			return tempIterator;
+		}
+	};
+
+
+	/// uninitialized_relocate (formerly named uninitialized_move prior to C++11)
+	///
+	/// This utility is deprecated in favor of C++11 rvalue move functionality.
+	///
+	/// uninitialized_relocate takes a constructed sequence of objects and an
+	/// uninitialized destination buffer. In the case of any exception thrown
+	/// while moving the objects, any newly constructed objects are guaranteed
+	/// to be destructed and the input left fully constructed.
+	///
+	/// In the case where you need to do multiple moves atomically, split the
+	/// calls into uninitialized_relocate_start/abort/commit.
+	///
+	/// uninitialized_relocate_start can possibly throw an exception. If it does,
+	/// you don't need to do anything. However, if it returns without throwing
+	/// an exception you need to guarantee that either uninitialized_relocate_abort
+	/// or uninitialized_relocate_commit is called.
+	///
+	/// Both uninitialized_relocate_abort and uninitialize_move_commit are
+	/// guaranteed to not throw C++ exceptions.
+	namespace Internal
+	{
+		template <bool hasTrivialMove, typename iteratorTag>
+		struct uninitialized_relocate_impl
+		{
+			template <typename ForwardIterator, typename ForwardIteratorDest>
+			static ForwardIteratorDest do_move_start(ForwardIterator first, ForwardIterator last, ForwardIteratorDest dest)
+			{
+				typedef typename eastl::iterator_traits<ForwardIterator>::value_type value_type;
+
+				#if EASTL_EXCEPTIONS_ENABLED
+					ForwardIteratorDest origDest(dest);
+					try
+					{
+				#endif
+						for(; first != last; ++first, ++dest)
+							::new((void*)eastl::addressof(*dest)) value_type(*first);
+				#if EASTL_EXCEPTIONS_ENABLED
+					}
+					catch(...)
+					{
+						for(; origDest < dest; ++origDest)
+							(*origDest).~value_type();
+						throw;
+					}
+				#endif
+
+				return dest;
+			}
+
+			template <typename ForwardIterator, typename ForwardIteratorDest>
+			static ForwardIteratorDest do_move_commit(ForwardIterator first, ForwardIterator last, ForwardIteratorDest dest) //throw()
+			{
+				typedef typename eastl::iterator_traits<ForwardIterator>::value_type value_type;
+				for(; first != last; ++first, ++dest)
+					(*first).~value_type();
+
+				return dest;
+			}
+
+			template <typename ForwardIterator, typename ForwardIteratorDest>
+			static ForwardIteratorDest do_move_abort(ForwardIterator first, ForwardIterator last, ForwardIteratorDest dest) //throw()
+			{
+				typedef typename eastl::iterator_traits<ForwardIterator>::value_type value_type;
+				for(; first != last; ++first, ++dest)
+					(*dest).~value_type();
+				return dest;
+			}
+		};
+
+		template <>
+		struct uninitialized_relocate_impl<true, EASTL_ITC_NS::random_access_iterator_tag>
+		{
+			template <typename T>
+			static T* do_move_start(T* first, T* last, T* dest)
+			{
+				return (T*)memcpy(dest, first, (size_t)((uintptr_t)last - (uintptr_t)first)) + (last - first);
+			}
+
+			template <typename T>
+			static T* do_move_commit(T* first, T* last, T* dest)
+			{
+				return dest + (last - first);
+			}
+
+			template <typename T>
+			static T* do_move_abort(T* first, T* last, T* dest)
+			{
+				return dest + (last - first);
+			}
+		};
+	}
+
+
+	/// uninitialized_relocate_start, uninitialized_relocate_commit, uninitialized_relocate_abort
+	///
+	/// This utility is deprecated in favor of C++11 rvalue move functionality.
+	///
+	/// After calling uninitialized_relocate_start, if it doesn't throw an exception,
+	/// both the source and destination iterators point to undefined data. If it
+	/// does throw an exception, the destination remains uninitialized and the source
+	/// is as it was before.
+	///
+	/// In order to make the iterators valid again you need to call either uninitialized_relocate_abort
+	/// or uninitialized_relocate_commit. The abort call makes the original source
+	/// iterator valid again, and commit makes the destination valid. Both abort
+	/// and commit are guaranteed to not throw C++ exceptions.
+	///
+	/// Example usage:
+	///     iterator dest2 = uninitialized_relocate_start(first, last, dest);
+	///     try {
+	///         // some code here that might throw an exception
+	///     }
+	///     catch(...)
+	///     {
+	///         uninitialized_relocate_abort(first, last, dest);
+	///         throw;
+	///     }
+	///     uninitialized_relocate_commit(first, last, dest);
+	///
+	template <typename ForwardIterator, typename ForwardIteratorDest>
+	inline ForwardIteratorDest uninitialized_relocate_start(ForwardIterator first, ForwardIterator last, ForwardIteratorDest dest)
+	{
+		typedef typename eastl::iterator_traits<ForwardIterator>::iterator_category IC;
+		typedef typename eastl::iterator_traits<ForwardIterator>::value_type value_type_input;
+		typedef typename eastl::iterator_traits<ForwardIteratorDest>::value_type value_type_output;
+
+		const bool bHasTrivialMove = type_and<has_trivial_relocate<value_type_input>::value,
+												is_pointer<ForwardIterator>::value,
+												is_pointer<ForwardIteratorDest>::value,
+												is_same<value_type_input, value_type_output>::value>::value;
+
+		return Internal::uninitialized_relocate_impl<bHasTrivialMove, IC>::do_move_start(first, last, dest);
+	}
+
+	template <typename ForwardIterator, typename ForwardIteratorDest>
+	inline ForwardIteratorDest uninitialized_relocate_commit(ForwardIterator first, ForwardIterator last, ForwardIteratorDest dest)
+	{
+		typedef typename eastl::iterator_traits<ForwardIterator>::iterator_category IC;
+		typedef typename eastl::iterator_traits<ForwardIterator>::value_type value_type_input;
+		typedef typename eastl::iterator_traits<ForwardIteratorDest>::value_type value_type_output;
+
+		const bool bHasTrivialMove = type_and<has_trivial_relocate<value_type_input>::value,
+												is_pointer<ForwardIterator>::value,
+												is_pointer<ForwardIteratorDest>::value,
+												is_same<value_type_input, value_type_output>::value>::value;
+
+		return Internal::uninitialized_relocate_impl<bHasTrivialMove, IC>::do_move_commit(first, last, dest);
+	}
+
+	template <typename ForwardIterator, typename ForwardIteratorDest>
+	inline ForwardIteratorDest uninitialized_relocate_abort(ForwardIterator first, ForwardIterator last, ForwardIteratorDest dest)
+	{
+		typedef typename eastl::iterator_traits<ForwardIterator>::iterator_category IC;
+		typedef typename eastl::iterator_traits<ForwardIterator>::value_type value_type_input;
+		typedef typename eastl::iterator_traits<ForwardIteratorDest>::value_type value_type_output;
+
+		const bool bHasTrivialMove = type_and<has_trivial_relocate<value_type_input>::value,
+												is_pointer<ForwardIterator>::value,
+												is_pointer<ForwardIteratorDest>::value,
+												is_same<value_type_input, value_type_output>::value>::value;
+
+		return Internal::uninitialized_relocate_impl<bHasTrivialMove, IC>::do_move_abort(first, last, dest);
+	}
+
+	/// uninitialized_relocate
+	///
+	/// See above for documentation.
+	///
+	template <typename ForwardIterator, typename ForwardIteratorDest>
+	inline ForwardIteratorDest uninitialized_relocate(ForwardIterator first, ForwardIterator last, ForwardIteratorDest dest)
+	{
+		ForwardIteratorDest result = uninitialized_relocate_start(first, last, dest);
+		eastl::uninitialized_relocate_commit(first, last, dest);
+
+		return result;
+	}
+
+
+
+
+
+	// uninitialized_copy
+	//
+	namespace Internal
+	{
+		template <typename InputIterator, typename ForwardIterator>
+		inline ForwardIterator uninitialized_copy_impl(InputIterator first, InputIterator last, ForwardIterator dest, true_type)
+		{
+			return eastl::copy(first, last, dest); // The copy() in turn will use memcpy for POD types.
+		}
+
+		template <typename InputIterator, typename ForwardIterator>
+		inline ForwardIterator uninitialized_copy_impl(InputIterator first, InputIterator last, ForwardIterator dest, false_type)
+		{
+			typedef typename eastl::iterator_traits<ForwardIterator>::value_type value_type;
+			ForwardIterator currentDest(dest);
+
+			#if EASTL_EXCEPTIONS_ENABLED
+				try
+				{
+			#endif
+					for(; first != last; ++first, ++currentDest)
+						::new(static_cast<void*>(eastl::addressof(*currentDest))) value_type(*first);
+			#if EASTL_EXCEPTIONS_ENABLED
+				}
+				catch(...)
+				{
+					for(; dest < currentDest; ++dest)
+						(*dest).~value_type();
+					throw;
+				}
+			#endif
+
+			return currentDest;
+		}
+	}
+
+	/// uninitialized_copy
+	///
+	/// Copies a source range to a destination, copy-constructing the destination with
+	/// the source values (and not *assigning* the destination with the source values).
+	/// Returns the end of the destination range (i.e. dest + (last - first)).
+	///
+	/// Declaration:
+	///    template <typename InputIterator, typename ForwardIterator>
+	///    ForwardIterator uninitialized_copy(InputIterator sourceFirst, InputIterator sourceLast, ForwardIterator destination);
+	/// 
+	/// Example usage:
+	///    SomeClass* pArray = malloc(10 * sizeof(SomeClass));
+	///    uninitialized_copy(pSourceDataBegin, pSourceDataBegin + 10, pArray);
+	///
+	template <typename InputIterator, typename ForwardIterator>
+	inline ForwardIterator uninitialized_copy(InputIterator first, InputIterator last, ForwardIterator result)
+	{
+		typedef typename eastl::iterator_traits<ForwardIterator>::value_type value_type;
+
+		// We use is_trivial, which in the C++11 Standard means is_trivially_copyable and is_trivially_default_constructible.
+		return Internal::uninitialized_copy_impl(first, last, result, eastl::is_trivial<value_type>());
+	}
+
+
+	/// uninitialized_copy_n
+	///
+	/// Copies count elements from a range beginning at first to an uninitialized memory area 
+	/// beginning at dest. The elements in the uninitialized area are constructed using copy constructor.
+	/// If an exception is thrown during the initialization, the function has no final effects. 
+	///
+	/// first:        Beginning of the range of the elements to copy.
+	/// dest:         Beginning of the destination range.
+	/// return value: Iterator of dest type to the element past the last element copied.
+	///
+	namespace Internal
+	{
+		template <typename InputIterator, typename Count, typename ForwardIterator, typename IteratorTag>
+		struct uninitialized_copy_n_impl
+		{
+			static ForwardIterator impl(InputIterator first, Count n, ForwardIterator dest)
+			{
+				typedef typename eastl::iterator_traits<ForwardIterator>::value_type value_type;
+				ForwardIterator currentDest(dest);
+
+				#if EASTL_EXCEPTIONS_ENABLED
+					try
+					{
+				#endif
+						for(; n > 0; --n, ++first, ++currentDest)
+							::new((void*)(eastl::addressof(*currentDest))) value_type(*first);
+				#if EASTL_EXCEPTIONS_ENABLED
+					}
+					catch(...)
+					{
+						for(; dest < currentDest; ++dest)
+							(*dest).~value_type();
+						throw;
+					}
+				#endif
+
+				return currentDest;
+			}
+		};
+
+		template <typename InputIterator, typename Count, typename ForwardIterator>
+		struct uninitialized_copy_n_impl<InputIterator, Count, ForwardIterator, EASTL_ITC_NS::random_access_iterator_tag>
+		{
+			static inline ForwardIterator impl(InputIterator first, Count n, ForwardIterator dest)
+			{
+				return eastl::uninitialized_copy(first, first + n, dest);
+			}
+		};
+	}
+
+	template<typename InputIterator, typename Count, typename ForwardIterator>
+	inline ForwardIterator uninitialized_copy_n(InputIterator first, Count n, ForwardIterator dest)
+	{
+		typedef typename eastl::iterator_traits<InputIterator>::iterator_category IC;
+		return Internal::uninitialized_copy_n_impl<InputIterator, Count, ForwardIterator, IC>::impl(first, n, dest);
+	}
+
+
+
+	/// uninitialized_copy_ptr
+	///
+	/// This is a specialization of uninitialized_copy for iterators that are pointers. We use it because 
+	/// internally it uses generic_iterator to make pointers act like regular eastl::iterator.
+	///
+	template <typename First, typename Last, typename Result>
+	inline Result uninitialized_copy_ptr(First first, Last last, Result result)
+	{
+		typedef typename eastl::iterator_traits<generic_iterator<Result, void> >::value_type value_type;
+		const generic_iterator<Result, void> i(Internal::uninitialized_copy_impl(eastl::generic_iterator<First, void>(first), // generic_iterator makes a pointer act like an iterator.
+																				 eastl::generic_iterator<Last, void>(last), 
+																				 eastl::generic_iterator<Result, void>(result), 
+																				 eastl::is_trivially_copy_assignable<value_type>()));
+		return i.base();
+	}
+
+
+
+	/// uninitialized_move_ptr
+	///
+	/// This is a specialization of uninitialized_move for iterators that are pointers. We use it because 
+	/// internally it uses generic_iterator to make pointers act like regular eastl::iterator.
+	///
+	namespace Internal
+	{
+		template <typename InputIterator, typename ForwardIterator>
+		inline ForwardIterator uninitialized_move_impl(InputIterator first, InputIterator last, ForwardIterator dest, true_type)
+		{
+			return eastl::copy(first, last, dest); // The copy() in turn will use memcpy for is_trivially_copy_assignable (e.g. POD) types.
+		}
+
+		template <typename InputIterator, typename ForwardIterator>
+		inline ForwardIterator uninitialized_move_impl(InputIterator first, InputIterator last, ForwardIterator dest, false_type)
+		{
+			typedef typename eastl::iterator_traits<ForwardIterator>::value_type value_type;
+			ForwardIterator currentDest(dest);
+
+			// We must run a loop over every element and move-construct it at the new location.
+			#if EASTL_EXCEPTIONS_ENABLED
+				try
+				{
+			#endif
+					for(; first != last; ++first, ++currentDest)
+						::new((void*)eastl::addressof(*currentDest)) value_type(eastl::move(*first)); // If value_type has a move constructor then it will be used here.
+			#if EASTL_EXCEPTIONS_ENABLED
+				}
+				catch(...)
+				{
+					// We have a problem here: If an exception occurs while doing the loop below then we will
+					// have values that were moved from the source to the dest that may need to be moved back 
+					// in the catch. What does the C++11 Standard say about this? And what happens if there's an 
+					// exception while moving them back? We may want to trace through a conforming C++11 Standard
+					// Library to see what it does and do something similar. Given that rvalue references are 
+					// objects that are going away, we may not need to move the values back, though that has the 
+					// side effect of a certain kind of lost elements problem.
+					for(; dest < currentDest; ++dest)
+						(*dest).~value_type();
+					throw;
+				}
+			#endif
+
+			return currentDest;
+		}
+	}
+
+	template <typename First, typename Last, typename Result>
+	inline Result uninitialized_move_ptr(First first, Last last, Result dest)
+	{
+		typedef typename eastl::iterator_traits<generic_iterator<Result, void> >::value_type value_type;
+		const generic_iterator<Result, void> i(Internal::uninitialized_move_impl(eastl::generic_iterator<First, void>(first), // generic_iterator makes a pointer act like an iterator.
+																				 eastl::generic_iterator<Last, void>(last), 
+																				 eastl::generic_iterator<Result, void>(dest), 
+																				 eastl::is_trivially_copy_assignable<value_type>())); // is_trivially_copy_assignable identifies if copy assignment would be as valid as move assignment, which means we have the opportunity to memcpy/memmove optimization.
+		return i.base();
+	}
+
+
+
+
+	/// uninitialized_move
+	///
+	/// Moves a source range to a destination, move-constructing the destination with
+	/// the source values (and not *assigning* the destination with the source values).
+	/// Returns the end of the destination range (i.e. dest + (last - first)).
+	///
+	/// uninitialized_move is not part of any current C++ Standard, up to C++14.
+	///
+	/// Declaration:
+	///    template <typename InputIterator, typename ForwardIterator>
+	///    ForwardIterator uninitialized_move(InputIterator sourceFirst, InputIterator sourceLast, ForwardIterator destination);
+	/// 
+	/// Example usage:
+	///    SomeClass* pArray = malloc(10 * sizeof(SomeClass));
+	///    uninitialized_move(pSourceDataBegin, pSourceDataBegin + 10, pArray);
+	///
+	template <typename InputIterator, typename ForwardIterator>
+	inline ForwardIterator uninitialized_move(InputIterator first, InputIterator last, ForwardIterator dest)
+	{
+		return eastl::uninitialized_copy(eastl::make_move_iterator(first), eastl::make_move_iterator(last), dest);
+	}
+
+
+	/// uninitialized_move_if_noexcept
+	///
+	/// If the iterated type can be moved without exceptions, move construct the dest with the input. Else copy-construct
+	/// the dest witih the input. If move isn't supported by the compiler, do regular copy. 
+	///
+	template <typename InputIterator, typename ForwardIterator>
+	inline ForwardIterator uninitialized_move_if_noexcept(InputIterator first, InputIterator last, ForwardIterator dest)
+	{
+		return eastl::uninitialized_copy(eastl::make_move_if_noexcept_iterator(first), eastl::make_move_if_noexcept_iterator(last), dest);
+	}
+
+
+	/// uninitialized_move_ptr_if_noexcept
+	///
+	template <typename First, typename Last, typename Result>
+	inline Result uninitialized_move_ptr_if_noexcept(First first, Last last, Result dest)
+	{
+		#if EASTL_EXCEPTIONS_ENABLED
+			return eastl::uninitialized_move_if_noexcept(first, last, dest);
+		#else
+			return eastl::uninitialized_move_ptr(first, last, dest);
+		#endif
+	}
+
+
+	/// uninitialized_move_n
+	///
+	/// Moves count elements from a range beginning at first to an uninitialized memory area 
+	/// beginning at dest. The elements in the uninitialized area are constructed using copy constructor.
+	/// If an exception is thrown during the initialization, the function has no final effects. 
+	///
+	/// first:        Beginning of the range of the elements to move.
+	/// dest:         Beginning of the destination range.
+	/// return value: Iterator of dest type to the element past the last element moved.
+	///
+	template<typename InputIterator, typename Count, typename ForwardIterator>
+	inline ForwardIterator uninitialized_move_n(InputIterator first, Count n, ForwardIterator dest)
+	{
+		return eastl::uninitialized_copy_n(eastl::make_move_iterator(first), n, dest);
+	}
+
+	// Disable warning C4345 - behavior change: an object of POD type constructed with an initializer of the form ()
+	// will be default-initialized.
+	// This is the behavior we intend below.
+	EA_DISABLE_VC_WARNING(4345)
+	/// uninitialized_default_fill
+	///
+	/// Default-constructs the elements in the destination range.
+	/// Returns void. It wouldn't be useful to return the end of the destination range,
+	/// as that is the same as the 'last' input parameter.
+	///
+	/// Declaration:
+	///    template <typename ForwardIterator, typename T>
+	///    void uninitialized_default_fill(ForwardIterator destinationFirst, ForwardIterator destinationLast);
+	///
+	template <typename ForwardIterator>
+	inline void uninitialized_default_fill(ForwardIterator first, ForwardIterator last)
+	{
+		typedef typename eastl::iterator_traits<ForwardIterator>::value_type value_type;
+		ForwardIterator currentDest(first);
+
+	#if EASTL_EXCEPTIONS_ENABLED
+		try
+		{
+	#endif
+			for (; currentDest != last; ++currentDest)
+				::new (eastl::addressof(*currentDest)) value_type();
+	#if EASTL_EXCEPTIONS_ENABLED
+		}
+		catch (...)
+		{
+			for (; first < currentDest; ++first)
+				(*first).~value_type();
+			throw;
+		}
+	#endif
+	}
+
+	/// uninitialized_default_fill_n
+	///
+	/// Default-constructs the range of [first, first + n).
+	/// Returns void as per the C++ standard, though returning the end input iterator
+	/// value may be of use.
+	///
+	/// Declaration:
+	///    template <typename ForwardIterator, typename Count, typename T>
+	///    void uninitialized_default_fill_n(ForwardIterator destination, Count n);
+	///
+	namespace Internal
+	{
+		template <typename ForwardIterator, typename Count>
+		inline void uninitialized_default_fill_n_impl(ForwardIterator first, Count n, false_type)
+		{
+			typedef typename eastl::iterator_traits<ForwardIterator>::value_type value_type;
+			ForwardIterator currentDest(first);
+
+		#if EASTL_EXCEPTIONS_ENABLED
+			try
+			{
+		#endif
+				for (; n > 0; --n, ++currentDest)
+					::new (eastl::addressof(*currentDest)) value_type();
+		#if EASTL_EXCEPTIONS_ENABLED
+			}
+			catch (...)
+			{
+				for (; first < currentDest; ++first)
+					(*first).~value_type();
+				throw;
+			}
+		#endif
+		}
+
+		template <typename ForwardIterator, typename Count>
+		inline void uninitialized_default_fill_n_impl(ForwardIterator first, Count n, true_type)
+		{
+			typedef typename eastl::iterator_traits<ForwardIterator>::value_type value_type;
+			memset(first, 0, sizeof(value_type) * n);
+		}
+	}
+
+	template <typename ForwardIterator, typename Count>
+	inline void uninitialized_default_fill_n(ForwardIterator first, Count n)
+	{
+		typedef typename eastl::iterator_traits<ForwardIterator>::value_type value_type;
+		Internal::uninitialized_default_fill_n_impl(first, n, is_scalar<value_type>());
+	}
+	EA_RESTORE_VC_WARNING()
+
+	/// uninitialized_default_construct
+	///
+	/// Constructs objects in the uninitialized storage designated by the range [first, last) by default-initialization.
+	///
+	/// Default-initialization:
+	///  If T is a class, the default constructor is called; otherwise, no initialization is done, resulting in
+	///  indeterminate values.
+	/// 
+	/// http://en.cppreference.com/w/cpp/memory/uninitialized_default_construct
+	///
+	template <typename ForwardIterator>
+	inline void uninitialized_default_construct(ForwardIterator first, ForwardIterator last)
+	{
+		typedef typename eastl::iterator_traits<ForwardIterator>::value_type value_type;
+		ForwardIterator currentDest(first);
+
+	#if EASTL_EXCEPTIONS_ENABLED
+		try
+		{
+	#endif
+			for (; currentDest != last; ++currentDest)
+				::new (eastl::addressof(*currentDest)) value_type;
+	#if EASTL_EXCEPTIONS_ENABLED
+		}
+		catch (...)
+		{
+			for (; first < currentDest; ++first)
+				(*first).~value_type();
+			throw;
+		}
+	#endif
+	}
+
+	/// uninitialized_default_construct_n
+	///
+	/// Constructs n objects in the uninitialized storage starting at first by default-initialization.
+	/// 
+	/// http://en.cppreference.com/w/cpp/memory/uninitialized_default_construct_n
+	///
+	template <typename ForwardIterator, typename Count>
+	inline ForwardIterator uninitialized_default_construct_n(ForwardIterator first, Count n)
+	{
+		typedef typename eastl::iterator_traits<ForwardIterator>::value_type value_type;
+		ForwardIterator currentDest(first);
+
+	#if EASTL_EXCEPTIONS_ENABLED
+		try
+		{
+	#endif
+			for (; n > 0; --n, ++currentDest)
+				::new (eastl::addressof(*currentDest)) value_type;
+			return currentDest;
+	#if EASTL_EXCEPTIONS_ENABLED
+		}
+		catch (...)
+		{
+			for (; first < currentDest; ++first)
+				(*first).~value_type();
+			throw;
+		}
+	#endif
+	}
+
+	/// uninitialized_fill
+	///
+	/// Copy-constructs the elements in the destination range with the given input value.
+	/// Returns void. It wouldn't be useful to return the end of the destination range, 
+	/// as that is the same as the 'last' input parameter.
+	///
+	/// Declaration:
+	///    template <typename ForwardIterator, typename T>
+	///    void uninitialized_fill(ForwardIterator destinationFirst, ForwardIterator destinationLast, const T& value);
+	///
+	namespace Internal
+	{
+		template <typename ForwardIterator, typename T>
+		inline void uninitialized_fill_impl(ForwardIterator first, ForwardIterator last, const T& value, true_type)
+		{
+			eastl::fill(first, last, value);
+		}
+
+		template <typename ForwardIterator, typename T>
+		void uninitialized_fill_impl(ForwardIterator first, ForwardIterator last, const T& value, false_type)
+		{
+			typedef typename eastl::iterator_traits<ForwardIterator>::value_type value_type;
+			ForwardIterator currentDest(first);
+
+			#if EASTL_EXCEPTIONS_ENABLED
+				try
+				{
+			#endif
+					for(; currentDest != last; ++currentDest)
+						::new((void*)eastl::addressof(*currentDest)) value_type(value);
+			#if EASTL_EXCEPTIONS_ENABLED
+				}
+				catch(...)
+				{
+					for(; first < currentDest; ++first)
+						(*first).~value_type();
+					throw;
+				}
+			#endif
+		}
+	}
+
+	template <typename ForwardIterator, typename T>
+	inline void uninitialized_fill(ForwardIterator first, ForwardIterator last, const T& value)
+	{
+		typedef typename eastl::iterator_traits<ForwardIterator>::value_type value_type;
+		Internal::uninitialized_fill_impl(first, last, value, eastl::is_trivially_copy_assignable<value_type>());
+	}
+
+	/// uninitialized_value_construct
+	///
+	/// Constructs objects in the uninitialized storage range [first, last) by value-initialization.
+	///
+	/// Value-Initialization:
+	/// If T is a class, the object is default-initialized (after being zero-initialized if T's default 
+	/// constructor is not user-provided/deleted); otherwise, the object is zero-initialized.
+	///
+	/// http://en.cppreference.com/w/cpp/memory/uninitialized_value_construct
+	///
+	template <class ForwardIterator>
+	void uninitialized_value_construct(ForwardIterator first, ForwardIterator last)
+	{
+		typedef typename eastl::iterator_traits<ForwardIterator>::value_type value_type;
+		ForwardIterator currentDest(first);
+
+    #if EASTL_EXCEPTIONS_ENABLED
+		try
+		{
+	#endif
+			for (; currentDest != last; ++currentDest)
+				::new (eastl::addressof(*currentDest)) value_type();
+	#if EASTL_EXCEPTIONS_ENABLED
+		}
+		catch (...)
+		{
+			for (; first < currentDest; ++first)
+				(*first).~value_type();
+			throw;
+		}
+	#endif
+	}
+
+	/// uninitialized_value_construct_n
+	///
+	/// Constructs n objects in the uninitialized storage starting at first by value-initialization.
+	///
+	/// Value-Initialization:
+	/// If T is a class, the object is default-initialized (after being zero-initialized if T's default 
+	/// constructor is not user-provided/deleted); otherwise, the object is zero-initialized.
+	///
+	/// http://en.cppreference.com/w/cpp/memory/uninitialized_value_construct_n
+	///
+	template <class ForwardIterator, class Count>
+	ForwardIterator uninitialized_value_construct_n(ForwardIterator first, Count n)
+	{
+		typedef typename eastl::iterator_traits<ForwardIterator>::value_type value_type;
+		ForwardIterator currentDest(first);
+
+	#if EASTL_EXCEPTIONS_ENABLED
+		try
+		{
+	#endif
+			for (; n > 0; --n, ++currentDest)
+				::new (eastl::addressof(*currentDest)) value_type();
+			return currentDest;
+	#if EASTL_EXCEPTIONS_ENABLED
+		}
+		catch (...)
+		{
+			for (; first < currentDest; ++first)
+				(*first).~value_type();
+			throw;
+		}
+	#endif
+	}
+
+	/// uninitialized_fill_ptr
+	///
+	/// This is a specialization of uninitialized_fill for iterators that are pointers.
+	/// It exists so that we can declare a value_type for the iterator, which you 
+	/// can't do with a pointer by itself.
+	///
+	template <typename T>
+	inline void uninitialized_fill_ptr(T* first, T* last, const T& value)
+	{
+		typedef typename eastl::iterator_traits<eastl::generic_iterator<T*, void> >::value_type value_type;
+		Internal::uninitialized_fill_impl(eastl::generic_iterator<T*, void>(first),
+		                                  eastl::generic_iterator<T*, void>(last), value,
+		                                  eastl::is_trivially_copy_assignable<value_type>());
+	}
+
+	/// uninitialized_fill_n
+	///
+	/// Copy-constructs the range of [first, first + n) with the given input value.
+	/// Returns void as per the C++ standard, though returning the end input iterator
+	/// value may be of use.
+	///
+	/// Declaration:
+	///    template <typename ForwardIterator, typename Count, typename T>
+	///    void uninitialized_fill_n(ForwardIterator destination, Count n, const T& value);
+	///
+	namespace Internal
+	{
+		template <typename ForwardIterator, typename Count, typename T>
+		inline void uninitialized_fill_n_impl(ForwardIterator first, Count n, const T& value, true_type)
+		{
+			eastl::fill_n(first, n, value);
+		}
+
+		template <typename ForwardIterator, typename Count, typename T>
+		void uninitialized_fill_n_impl(ForwardIterator first, Count n, const T& value, false_type)
+		{
+			typedef typename eastl::iterator_traits<ForwardIterator>::value_type value_type;
+			ForwardIterator currentDest(first);
+
+			#if EASTL_EXCEPTIONS_ENABLED
+				try
+				{
+			#endif
+					for(; n > 0; --n, ++currentDest)
+						::new((void*)eastl::addressof(*currentDest)) value_type(value);
+			#if EASTL_EXCEPTIONS_ENABLED
+				}
+				catch(...)
+				{
+					for(; first < currentDest; ++first)
+						(*first).~value_type();
+					throw;
+				}
+			#endif
+		}
+	}
+
+	template <typename ForwardIterator, typename Count, typename T>
+	inline void uninitialized_fill_n(ForwardIterator first, Count n, const T& value)
+	{
+		typedef typename eastl::iterator_traits<ForwardIterator>::value_type value_type;
+		Internal::uninitialized_fill_n_impl(first, n, value, eastl::is_trivially_copy_assignable<value_type>());
+	}
+
+
+
+	/// uninitialized_fill_n_ptr
+	///
+	/// This is a specialization of uninitialized_fill_n for iterators that are pointers.
+	/// It exists so that we can declare a value_type for the iterator, which you 
+	/// can't do with a pointer by itself.
+	///
+	template <typename T, typename Count>
+	inline void uninitialized_fill_n_ptr(T* first, Count n, const T& value)
+	{
+		typedef typename eastl::iterator_traits<generic_iterator<T*, void> >::value_type value_type;
+		Internal::uninitialized_fill_n_impl(eastl::generic_iterator<T*, void>(first), n, value, eastl::is_trivially_copy_assignable<value_type>());
+	}
+
+
+
+
+	/// uninitialized_copy_fill
+	///
+	/// Copies [first1, last1) into [first2, first2 + (last1 - first1)) then
+	/// fills [first2 + (last1 - first1), last2) with value.
+	///
+	template <typename InputIterator, typename ForwardIterator, typename T>
+	inline void uninitialized_copy_fill(InputIterator first1, InputIterator last1,
+										ForwardIterator first2, ForwardIterator last2, const T& value)
+	{
+		const ForwardIterator mid(eastl::uninitialized_copy(first1, last1, first2));
+
+		#if EASTL_EXCEPTIONS_ENABLED
+			typedef typename eastl::iterator_traits<ForwardIterator>::value_type value_type;
+			try
+			{
+		#endif
+				eastl::uninitialized_fill(mid, last2, value);
+		#if EASTL_EXCEPTIONS_ENABLED
+			}
+			catch(...)
+			{
+				for(; first2 < mid; ++first2)
+					(*first2).~value_type();
+				throw;
+			}
+		#endif
+	}
+
+
+	/// uninitialized_move_fill
+	///
+	/// Moves [first1, last1) into [first2, first2 + (last1 - first1)) then
+	/// fills [first2 + (last1 - first1), last2) with value.
+	///
+	template <typename InputIterator, typename ForwardIterator, typename T>
+	inline void uninitialized_move_fill(InputIterator first1, InputIterator last1,
+										ForwardIterator first2, ForwardIterator last2, const T& value)
+	{
+		const ForwardIterator mid(eastl::uninitialized_move(first1, last1, first2));
+
+		#if EASTL_EXCEPTIONS_ENABLED
+			typedef typename eastl::iterator_traits<ForwardIterator>::value_type value_type;
+			try
+			{
+		#endif
+				eastl::uninitialized_fill(mid, last2, value);
+		#if EASTL_EXCEPTIONS_ENABLED
+			}
+			catch(...)
+			{
+				for(; first2 < mid; ++first2)
+					(*first2).~value_type();
+				throw;
+			}
+		#endif
+	}
+
+
+
+
+
+	/// uninitialized_fill_copy
+	///
+	/// Fills [result, mid) with value then copies [first, last) into [mid, mid + (last - first)).
+	///
+	template <typename ForwardIterator, typename T, typename InputIterator>
+	inline ForwardIterator
+	uninitialized_fill_copy(ForwardIterator result, ForwardIterator mid, const T& value, InputIterator first, InputIterator last)
+	{
+		eastl::uninitialized_fill(result, mid, value);
+
+		#if EASTL_EXCEPTIONS_ENABLED
+			typedef typename eastl::iterator_traits<ForwardIterator>::value_type value_type;
+			try
+			{
+		#endif
+				return eastl::uninitialized_copy(first, last, mid);
+		#if EASTL_EXCEPTIONS_ENABLED
+			}
+			catch(...)
+			{
+				for(; result < mid; ++result)
+					(*result).~value_type();
+				throw;
+			}
+		#endif
+	}
+
+
+	/// uninitialized_fill_move
+	///
+	/// Fills [result, mid) with value then copies [first, last) into [mid, mid + (last - first)).
+	///
+	template <typename ForwardIterator, typename T, typename InputIterator>
+	inline ForwardIterator
+	uninitialized_fill_move(ForwardIterator result, ForwardIterator mid, const T& value, InputIterator first, InputIterator last)
+	{
+		eastl::uninitialized_fill(result, mid, value);
+
+		#if EASTL_EXCEPTIONS_ENABLED
+			typedef typename eastl::iterator_traits<ForwardIterator>::value_type value_type;
+			try
+			{
+		#endif
+				return eastl::uninitialized_move(first, last, mid);
+		#if EASTL_EXCEPTIONS_ENABLED
+			}
+			catch(...)
+			{
+				for(; result < mid; ++result)
+					(*result).~value_type();
+				throw;
+			}
+		#endif
+	}
+
+
+
+	/// uninitialized_copy_copy
+	///
+	/// Copies [first1, last1) into [result, result + (last1 - first1)) then
+	/// copies [first2, last2) into [result, result + (last1 - first1) + (last2 - first2)).
+	///
+	template <typename InputIterator1, typename InputIterator2, typename ForwardIterator>
+	inline ForwardIterator
+	uninitialized_copy_copy(InputIterator1 first1, InputIterator1 last1,
+							InputIterator2 first2, InputIterator2 last2,
+							ForwardIterator result)
+	{
+		const ForwardIterator mid(eastl::uninitialized_copy(first1, last1, result));
+
+		#if EASTL_EXCEPTIONS_ENABLED
+			typedef typename eastl::iterator_traits<ForwardIterator>::value_type value_type;
+			try
+			{
+		#endif
+				return eastl::uninitialized_copy(first2, last2, mid);
+		#if EASTL_EXCEPTIONS_ENABLED
+			}
+			catch(...)
+			{
+				for(; result < mid; ++result)
+					(*result).~value_type();
+				throw;
+			}
+		#endif
+	}
+
+
+
+	/// destruct
+	///
+	/// Calls the destructor of a given object.
+	///
+	/// Note that we don't have a specialized version of this for objects 
+	/// with trivial destructors, such as integers. This is because the 
+	/// compiler can already see in our version here that the destructor
+	/// is a no-op.
+	/// 
+	template <typename T>
+	inline void destruct(T* p)
+	{
+		// https://msdn.microsoft.com/query/dev14.query?appId=Dev14IDEF1&l=EN-US&k=k(C4100)&rd=true
+		// "C4100 can also be issued when code calls a destructor on a otherwise unreferenced parameter
+		//  of primitive type. This is a limitation of the Visual C++ compiler."
+		EA_UNUSED(p);
+		p->~T();
+	}
+
+
+
+	// destruct(first, last)
+	//
+	template <typename ForwardIterator>
+	inline void destruct_impl(ForwardIterator /*first*/, ForwardIterator /*last*/, true_type) // true means the type has a trivial destructor.
+	{
+		// Empty. The type has a trivial destructor.
+	}
+
+	template <typename ForwardIterator>
+	inline void destruct_impl(ForwardIterator first, ForwardIterator last, false_type) // false means the type has a significant destructor.
+	{
+		typedef typename eastl::iterator_traits<ForwardIterator>::value_type value_type;
+
+		for(; first != last; ++first)
+			(*first).~value_type();
+	}
+
+	/// destruct
+	///
+	/// Calls the destructor on a range of objects.
+	///
+	/// We have a specialization for objects with trivial destructors, such as
+	/// PODs. In this specialization the destruction of the range is a no-op.
+	///
+	template <typename ForwardIterator>
+	inline void destruct(ForwardIterator first, ForwardIterator last)
+	{
+		typedef typename eastl::iterator_traits<ForwardIterator>::value_type value_type;
+		destruct_impl(first, last, eastl::has_trivial_destructor<value_type>());
+	}
+
+
+	/// destroy_at
+	///
+	/// Calls the destructor of a given object.
+	///
+	/// Note that we don't have a specialized version of this for objects 
+	/// with trivial destructors, such as integers. This is because the 
+	/// compiler can already see in our version here that the destructor
+	/// is a no-op.
+	///
+	/// This is the same as eastl::destruct but we included for C++17 compliance.
+	///
+	/// http://en.cppreference.com/w/cpp/memory/destroy_at
+	/// 
+	template <typename T>
+	inline void destroy_at(T* p)
+	{
+		EA_UNUSED(p);
+		p->~T();
+	}
+
+
+	/// destroy 
+	///
+	/// Calls the destructor on a range of objects.
+	///
+	/// http://en.cppreference.com/w/cpp/memory/destroy
+	///
+	template <typename ForwardIterator>
+	inline void destroy(ForwardIterator first, ForwardIterator last)
+	{
+		for (; first != last; ++first)
+			eastl::destroy_at(eastl::addressof(*first));
+	}
+
+
+	/// destroy_n
+	///
+	/// Calls the destructor on the n objects in the range.
+	///
+	/// http://en.cppreference.com/w/cpp/memory/destroy_n
+	///
+	template <typename ForwardIterator, typename Size>
+	ForwardIterator destroy_n(ForwardIterator first, Size n)
+	{
+		for (; n > 0; ++first, --n)
+			eastl::destroy_at(eastl::addressof(*first));
+
+		return first;
+	}
+
+
+	/// align
+	///
+	/// Same as C++11 std::align. http://en.cppreference.com/w/cpp/memory/align
+	/// If it is possible to fit size bytes of storage aligned by alignment into the buffer pointed to by
+	/// ptr with length space, the function updates ptr to point to the first possible address of such storage,
+	/// decreases space by the number of bytes used for alignment, and returns the new ptr value. Otherwise, 
+	/// the function returns NULL and leaves ptr and space unmodified. 
+	///
+	/// Example usage:
+	///     char   buffer[512];
+	///     size_t space = sizeof(buffer);
+	///     void*  p  = buffer;
+	///     void*  p1 = eastl::align(16,  3, p, space); p = (char*)p +  3; space -=  3;
+	///     void*  p2 = eastl::align(32, 78, p, space); p = (char*)p + 78; space -= 78;
+	///     void*  p3 = eastl::align(64,  9, p, space); p = (char*)p +  9; space -=  9;
+
+	inline void* align(size_t alignment, size_t size, void*& ptr, size_t& space)
+	{
+		if(space >= size)
+		{
+			char*  ptrAligned = (char*)(((size_t)ptr + (alignment - 1)) & -alignment);
+			size_t offset     = (size_t)(ptrAligned - (char*)ptr);
+
+			if((space - size) >= offset) // Have to implement this in terms of subtraction instead of addition in order to handle possible overflow.
+			{
+				ptr    = ptrAligned;
+				space -= offset;
+
+				return ptrAligned;
+			}
+		}
+
+		return NULL;
+	}
+
+
+	/// align_advance
+	/// 
+	/// Same as align except ptr and space can be adjusted to reflect remaining space.
+	/// Not present in the C++ Standard.
+	/// Note that the example code here is similar to align but simpler.
+	///
+	/// Example usage:
+	///     char   buffer[512];
+	///     size_t space = sizeof(buffer);
+	///     void*  p  = buffer;
+	///     void*  p1 = eastl::align_advance(16,  3, p, space, &p, &space); // p is advanced and space reduced accordingly.
+	///     void*  p2 = eastl::align_advance(32, 78, p, space, &p, &space);
+	///     void*  p3 = eastl::align_advance(64,  9, p, space, &p, &space);
+	///     void*  p4 = eastl::align_advance(16, 33, p, space);
+
+	inline void* align_advance(size_t alignment, size_t size, void* ptr, size_t space, void** ptrAdvanced = NULL, size_t* spaceReduced = NULL)
+	{
+		if(space >= size)
+		{
+			char*  ptrAligned = (char*)(((size_t)ptr + (alignment - 1)) & -alignment);
+			size_t offset     = (size_t)(ptrAligned - (char*)ptr);
+
+			if((space - size) >= offset) // Have to implement this in terms of subtraction instead of addition in order to handle possible overflow.
+			{
+				if(ptrAdvanced)
+					*ptrAdvanced = (ptrAligned + size);
+				if(spaceReduced)
+					*spaceReduced = (space - (offset + size));
+
+				return ptrAligned;
+			}
+		}
+
+		return NULL;
+	}
+
+
+	///////////////////////////////////////////////////////////////////////
+	// uses_allocator
+	// 
+	// Determines if the class T has an allocator_type member typedef
+	// which Allocator is convertible to. 
+	//
+	// http://en.cppreference.com/w/cpp/memory/uses_allocator
+	//
+	// A program may specialize this template to derive from true_type for a 
+	// user-defined type T that does not have a nested allocator_type but 
+	// nonetheless can be constructed with an allocator where either:
+	//    - the first argument of a constructor has type allocator_arg_t and 
+	//      the second argument has type Allocator.
+	//    or
+	//    - the last argument of a constructor has type Allocator.
+	//
+	// Example behavilor:
+	//     uses_allocator<vector>::value => true
+	//     uses_allocator<int>::value    => false
+	//
+	// This is useful for writing generic code for containers when you can't
+	// know ahead of time that the container has an allocator_type. 
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T>
+	struct has_allocator_type_helper
+	{
+	private:
+		template <typename>
+		static eastl::no_type test(...);
+
+		template <typename U>
+		static eastl::yes_type test(typename U::allocator_type* = NULL);
+
+	public:
+		static const bool value = sizeof(test<T>(NULL)) == sizeof(eastl::yes_type);
+	};
+
+
+	template <typename T, typename Allocator, bool = has_allocator_type_helper<T>::value>
+	struct uses_allocator_impl
+		: public integral_constant<bool, eastl::is_convertible<Allocator, typename T::allocator_type>::value>
+	{
+	};
+
+	template <typename T, typename Allocator>
+	struct uses_allocator_impl<T, Allocator, false>
+		: public eastl::false_type
+	{
+	};
+
+	template <typename T, typename Allocator>
+	struct uses_allocator
+		: public uses_allocator_impl<T, Allocator>{ };
+
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// pointer_traits
+	// 
+	// C++11 Standard section 20.6.3
+	// Provides information about a pointer type, mostly for the purpose 
+	// of handling the case where the pointer type isn't a built-in T* but 
+	// rather is a class that acts like a pointer.
+	//
+	// A user-defined Pointer has the following properties, by example:
+	//     template <class T, class... MoreArgs>
+	//     struct Pointer
+	//     {
+	//         typedef Pointer pointer;                         // required for use by pointer_traits.
+	//         typedef T1      element_type;                    // optional for use by pointer_traits.
+	//         typedef T2      difference_type;                 // optional for use by pointer_traits.
+	//
+	//         template <class Other>
+	//         using rebind = typename Ptr<Other, MoreArgs...>; // optional for use by pointer_traits.
+	//     
+	//         static pointer pointer_to(element_type& obj);    // required for use by pointer_traits.
+	//     };
+	// 
+	//
+	// Example usage:
+	//     template <typename Pointer>
+	//     typename pointer_traits::element_type& GetElementPointedTo(Pointer p)
+	//      { return *p; }
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	namespace Internal
+	{
+		// pointer_element_type
+		template <typename Pointer>
+		struct has_element_type // has_element_type<T>::value is true if T has an element_type member typedef.
+		{
+		private:
+			template <typename U> static eastl::no_type  test(...);
+			template <typename U> static eastl::yes_type test(typename U::element_type* = 0);
+		public:
+			static const bool value = sizeof(test<Pointer>(0)) == sizeof(eastl::yes_type);
+		};
+
+		template <typename Pointer, bool = has_element_type<Pointer>::value>
+		struct pointer_element_type 
+		{
+			using type = Pointer;
+		};
+
+		template <typename Pointer>
+		struct pointer_element_type<Pointer, true>
+			{ typedef typename Pointer::element_type type; };
+
+		template <template <typename, typename...> class Pointer, typename T, typename... Args>
+		struct pointer_element_type<Pointer<T, Args...>, false>
+			{ typedef T type; };
+
+
+		// pointer_difference_type
+		template <typename Pointer>
+		struct has_difference_type // has_difference_type<T>::value is true if T has an difference_type member typedef.
+		{
+		private:
+			template <typename U> static eastl::no_type  test(...);
+			template <typename U> static eastl::yes_type test(typename U::difference_type* = 0);
+		public:
+			static const bool value = sizeof((test<Pointer>(0))) == sizeof(eastl::yes_type);
+		};
+
+		template <typename Pointer, bool = has_difference_type<Pointer>::value>
+		struct pointer_difference_type
+			{ typedef typename Pointer::difference_type type; };
+
+		template <typename Pointer>
+		struct pointer_difference_type<Pointer, false>
+			{ typedef ptrdiff_t type; };
+
+
+		// pointer_rebind
+		// The following isn't correct, as it is unilaterally requiring that Pointer typedef its 
+		// own rebind. We can fix this if needed to make it optional (in which case it would return 
+		// its own type), but we don't currently use rebind in EASTL (as we have a different allocator
+		// system than the C++ Standard Library has) and this is currently moot.
+		template <typename Pointer, typename U>
+		struct pointer_rebind
+		{
+			typedef typename Pointer::template rebind<U> type;
+		};
+
+
+	} // namespace Internal
+
+
+	template <typename Pointer>
+	struct pointer_traits
+	{
+		typedef Pointer                                                   pointer;
+		typedef typename Internal::pointer_element_type<pointer>::type    element_type;
+		typedef typename Internal::pointer_difference_type<pointer>::type difference_type;
+
+		#if defined(EA_COMPILER_NO_TEMPLATE_ALIASES)
+			template <typename U>
+			struct rebind { typedef typename Internal::pointer_rebind<pointer, U>::type other; };
+		#else
+			template <typename U>
+			using rebind = typename Internal::pointer_rebind<pointer, U>::type;
+		#endif
+
+	public:
+		static pointer pointer_to(typename eastl::conditional<eastl::is_void<element_type>::value, void, element_type>::type& r) // 20.6.3.2: if element_type is (possibly cv-qualified) void, the type of r is unspecified; otherwise, it is T&.
+			{ return pointer::pointer_to(r); } // The C++11 Standard requires that Pointer provides a static pointer_to function.
+	};
+
+
+	template <typename T>
+	struct pointer_traits<T*>
+	{
+		typedef T*        pointer;
+		typedef T         element_type;
+		typedef ptrdiff_t difference_type;
+
+		#if defined(EA_COMPILER_NO_TEMPLATE_ALIASES)
+			template <typename U>
+			struct rebind { typedef U* other; };
+		#else
+			template <typename U>
+			using rebind = U*;
+		#endif
+
+	public:
+		static pointer pointer_to(typename eastl::conditional<eastl::is_void<element_type>::value, void, element_type>::type& r) EA_NOEXCEPT
+			{ return eastl::addressof(r); } // 20.6.3.2: if element_type is (possibly cv-qualified) void, the type of r is unspecified; otherwise, it is T&.
+	};
+
+} // namespace eastl
+
+
+EA_RESTORE_VC_WARNING();
+
+
+#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/meta.h b/libkram/eastl/include/EASTL/meta.h
new file mode 100644
index 00000000..09880b7a
--- /dev/null
+++ b/libkram/eastl/include/EASTL/meta.h
@@ -0,0 +1,222 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_META_H
+#define EASTL_META_H
+
+#include <EASTL/internal/config.h>
+#include <EASTL/type_traits.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+////////////////////////////////////////////////////////////////////////////////////////////
+// This file contains meta programming utilities that are internal to EASTL. We reserve
+// the right to change this file at any time as it is only intended to be used internally.
+////////////////////////////////////////////////////////////////////////////////////////////
+
+namespace eastl
+{
+	namespace meta
+	{
+		////////////////////////////////////////////////////////////////////////////////////////////
+		// get_type_index_v
+		//
+		// Linearly searches a typelist using compile-time recursion to inspect each T in
+		// the typelist and returns its index, if the type is found.  If the T isn't found
+		// in the typelist -1 is returned.
+		//
+		namespace Internal
+		{
+			template <int I, typename T, typename... Types>
+			struct get_type_index;
+
+			template <int I, typename T, typename Head, typename... Types>
+			struct get_type_index<I, T, Head, Types...>
+			{
+				static const int value = is_same_v<T, Head> ? I : get_type_index<I + 1, T, Types...>::value;
+			};
+
+			template <int I, typename T>
+			struct get_type_index<I, T>
+			{
+				static const int value = -1;
+			};
+		}
+
+		template<typename T, typename... Types>
+		struct get_type_index
+		{
+			static const int value = Internal::get_type_index<0, T, Types...>::value;
+		};
+
+		template <typename T, typename... Ts>
+		constexpr int get_type_index_v = get_type_index<T, Ts...>::value;
+
+
+		////////////////////////////////////////////////////////////////////////////////////////////
+		// get_type_at
+		//
+		// This traverses the variadic type list and retrieves the type at the user provided index.
+		//
+		template <size_t I, typename... Ts>
+			struct get_type_at_helper;
+
+		template <size_t I, typename Head, typename... Tail>
+			struct get_type_at_helper<I, Head, Tail...>
+			{ typedef typename get_type_at_helper<I - 1, Tail...>::type type; };
+
+		template <typename Head, typename... Tail>
+			struct get_type_at_helper<0, Head, Tail...>
+			{ typedef Head type; };
+
+		template <int I, typename... Ts>
+		using get_type_at_t = typename get_type_at_helper<I, Ts...>::type;
+
+
+		////////////////////////////////////////////////////////////////////////////////////////////
+		// type_count_v 
+		//
+		// Returns the number of occurrences of type T in a typelist.
+		//
+		template <typename T, typename... Types>
+		struct type_count;
+
+		template <typename T, typename H, typename... Types>
+		struct type_count<T, H, Types...>
+		{
+			static const int value = (is_same_v<T, H> ? 1 : 0) + type_count<T, Types...>::value;
+		};
+
+		template <typename T> struct type_count<T> { static const int value = 0; };
+
+		template <typename T, typename... Ts>
+		constexpr int type_count_v = type_count<T, Ts...>::value;
+
+
+
+		////////////////////////////////////////////////////////////////////////////////////////////
+		// duplicate_type_check_v
+		// 
+		// Checks if a type T occurs in a typelist more than once.
+		//
+		template <typename T, typename... Types>
+		struct duplicate_type_check
+		{
+			static const bool value = (type_count<T, Types...>::value == 1);
+		};
+
+		template <typename... Ts>
+		constexpr bool duplicate_type_check_v = duplicate_type_check<Ts...>::value;
+
+
+		//////////////////////////////////////////////////////////////////////////////////
+		// type_list 
+		//
+		// type_list is a simple struct that allows us to pass template parameter packs
+		// around in a single struct, and deduce parameter packs from function arguments
+		// like so:
+		//
+		//     template <typename... Ts> void foo(type_list<Ts...>);
+		//     foo(type_list<A, B, C>); // deduces Ts... as A, B, C
+		//
+		template <typename...> struct type_list {};
+
+
+		//////////////////////////////////////////////////////////////////////////////////
+		// unique_type_list
+		//
+		// unique_type_list is a meta-function which takes a parameter pack as its
+		// argument, and returns a type_list with duplicate types removed, like so:
+		//
+		//    unique_type_list<int, int, string>::type;    // type = type_list<int, string>
+		//    unique_type_list<int, string, string>::type; // type = type_list<int, string>
+		//
+		// To use unique_type_list, specialize a variadic class template for a single
+		// type parameter, which is type_list<Ts...>:
+		//
+		//    template <typename... Ts> struct foo {};
+		//    template <typename... Ts> struct foo<type_list<Ts...>> {};
+		//
+		// Then instantiate the template with unique_type_list_t<Ts...> as its parameter:
+		//
+		//    template <typename... Ts> struct bar : public foo<unique_type_list_t<Ts...>> {}
+		//
+		// See overload_set below for examples.
+		template <typename T, typename... Ts>
+		struct unique_type_list : public unique_type_list<Ts...>
+		{
+			template <typename... Args>
+			static enable_if_t<!disjunction_v<is_same<T, Args>...>, type_list<T, Args...>>
+			types(type_list<Args...>);
+
+			template <typename... Args>
+			static enable_if_t<disjunction_v<is_same<T, Args>...>, type_list<Args...>>
+			types(type_list<Args...>);
+
+			typedef decltype(types(declval<typename unique_type_list<Ts...>::type>())) type;
+		};
+
+		template <typename T>
+		struct unique_type_list<T>
+		{
+			using type = type_list<T>;
+		};
+
+		template <typename... Ts>
+		using unique_type_list_t = typename unique_type_list<Ts...>::type;
+
+
+		////////////////////////////////////////////////////////////////////////////////////////////
+		// overload_resolution_t 
+		// 
+		// Given an input type and a typelist (which is a stand-in for alternative
+		// function overloads) this traits will return the same type chosen as if
+		// overload_resolution has selected a function to run.
+		//
+
+		// a single overload of an individual type
+		template <typename T>
+		struct overload
+		{
+			// Overload is implicitly convertible to the surrogated function
+			// call for pointer to member functions (pmf). This gets around
+			// variadic pack expansion in a class using statement being a C++17
+			// language feature. It is the core mechanism of aggregating all the
+			// individual overloads into the overload_set structure.
+			using F = T (*)(T);
+			operator F() const { return nullptr; }
+		};
+
+		template <typename...> struct overload_set_impl;
+
+		template <typename... Ts>
+		struct overload_set_impl<type_list<Ts...>> : public overload<Ts>... {};
+
+		template <typename... Ts>
+		struct overload_set : public overload_set_impl<unique_type_list_t<Ts...>>
+		{
+			// encapsulates the overloads matching the types of the variadic pack
+		};
+
+		EA_DISABLE_VC_WARNING(4242 4244) // conversion from 'T' to 'U', possible loss of data.
+		template <typename T, typename OverloadSet, typename ResultT = decltype(declval<OverloadSet>()(declval<T>()))>
+		struct overload_resolution
+		{
+			// capture the return type of the function the compiler selected by
+			// performing overload resolution on the overload set parameter
+			using type = ResultT;
+		};
+
+		EA_RESTORE_VC_WARNING()
+
+		template <typename T, typename OverloadSet>
+		using overload_resolution_t = typename overload_resolution<decay_t<T>, OverloadSet>::type;
+
+	} // namespace meta 
+} // namespace eastl
+
+#endif // EASTL_META_H
+
diff --git a/libkram/eastl/include/EASTL/numeric.h b/libkram/eastl/include/EASTL/numeric.h
new file mode 100644
index 00000000..4b83c945
--- /dev/null
+++ b/libkram/eastl/include/EASTL/numeric.h
@@ -0,0 +1,247 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// This file defines numeric algorithms just like the std C++ <numeric> 
+// algorithm header does. 
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_NUMERIC_H
+#define EASTL_NUMERIC_H
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/iterator.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+
+	/// accumulate
+	///
+	/// Accumulates the values in the range [first, last) using operator+.  
+	/// The initial value is init. The values are processed in order.
+	///
+	template <typename InputIterator, typename T>
+	T accumulate(InputIterator first, InputIterator last, T init)
+	{
+		// The C++ standard specifies that we use (init = init + first).
+		// However, for non-built-in types, this is less efficent than 
+		// operator +=, as no temporary is created. Until a serious problem 
+		// is found with using operator +=, we'll use it.
+
+		for(; first != last; ++first)
+			init += *first;
+		return init;
+	}
+
+
+	/// accumulate
+	///
+	/// Accumulates the values in the range [first, last) using binary_op.  
+	/// The initial value is init. The values are processed in order.
+	///
+	template <typename InputIterator, typename T, typename BinaryOperation>
+	T accumulate(InputIterator first, InputIterator last, T init, BinaryOperation binary_op)
+	{
+		for(; first != last; ++first)
+			init = binary_op(init, *first);
+		return init;
+	}
+
+
+
+	/// iota
+	///
+	/// Requires: T shall be convertible to ForwardIterator's value type. The expression ++val, 
+	/// where val has type T, shall be well formed.
+	/// Effects: For each element referred to by the iterator i in the range [first, last), 
+	/// assigns *i = value and increments value as if by ++value.
+	/// Complexity: Exactly last - first increments and assignments.
+	/// Example usage: seeding a deck of cards with values 0-51.
+	///
+	template <typename ForwardIterator, typename T>
+	void iota(ForwardIterator first, ForwardIterator last, T value)
+	{
+		while(first != last)
+		{
+			*first++ = value;
+			++value;
+		}
+	}
+
+
+	/// inner_product
+	///
+	/// Starting with an initial value of init, multiplies successive
+	/// elements from the two ranges and adds each product into the accumulated
+	/// value using operator+. The values in the ranges are processed in order.
+	/// 
+	template <typename InputIterator1, typename InputIterator2, typename T>
+	T inner_product(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, T init)
+	{
+		// The C++ standard specifies that we use (init = init + (*first1 * *first2)).
+		// However, for non-built-in types, this is less efficent than 
+		// operator +=, as no temporary is created. Until a serious problem 
+		// is found with using operator +=, we'll use it.
+
+		for(; first1 != last1; ++first1, ++first2)
+			init += (*first1 * *first2);
+		return init;
+	}
+
+
+	/// inner_product
+	///
+	/// Starting with an initial value of init, applies binary_op2 to
+	/// successive elements from the two ranges and accumulates each result 
+	/// into the accumulated value using binary_op1. The values in the 
+	/// ranges are processed in order.
+	/// 
+	template <typename InputIterator1, typename InputIterator2, typename T, typename BinaryOperation1, typename BinaryOperation2>
+	T inner_product(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, T init, 
+					BinaryOperation1 binary_op1, BinaryOperation2 binary_op2)
+	{
+		for(; first1 != last1; ++first1, ++first2)
+			init = binary_op1(init, binary_op2(*first1, *first2));
+		return init;
+	}
+
+
+
+
+
+	/// partial_sum
+	///
+	/// Accumulates the values in the range [first, last) using operator+.
+	/// As each successive input value is added into the total, that partial 
+	/// sum is written to result. Therefore, the first value in result is the
+	/// first value of the input, the second value in result is the sum of the
+	/// first and second input values, and so on.
+	/// 
+	template <typename InputIterator, typename OutputIterator>
+	OutputIterator partial_sum(InputIterator first, InputIterator last, OutputIterator result)
+	{
+		typedef typename iterator_traits<InputIterator>::value_type value_type;
+
+		if(first != last)
+		{
+			value_type value(*first);
+			
+			for(*result = value; ++first != last; *++result = value)
+				value += *first; // See discussions above on the decision use += instead of +.
+
+			++result;
+		}
+
+		return result;
+	}
+
+
+	/// partial_sum
+	///
+	/// Accumulates the values in the range [first,last) using binary_op.
+	/// As each successive input value is added into the total, that partial 
+	/// sum is written to result. Therefore, the first value in result is the
+	/// first value of the input, the second value in result is the sum of the
+	/// first and second input values, and so on.
+
+	template <typename InputIterator, typename OutputIterator, typename BinaryOperation>
+	OutputIterator partial_sum(InputIterator first, InputIterator last, OutputIterator result, BinaryOperation binary_op)
+	{
+		typedef typename iterator_traits<InputIterator>::value_type value_type;
+
+		if(first != last)
+		{
+			value_type value(*first);
+
+			for(*result = value; ++first != last; *++result = value)
+				value = binary_op(value, *first);
+
+			++result;
+		}
+
+		return result;
+	}
+
+
+
+
+
+	/// adjacent_difference
+	///
+	/// Computes the difference between adjacent values in the range
+	/// [first, last) using operator- and writes the result to result.
+	/// 
+	template <typename InputIterator, typename OutputIterator>
+	OutputIterator adjacent_difference(InputIterator first, InputIterator last, OutputIterator result)
+	{
+		typedef typename iterator_traits<InputIterator>::value_type value_type;
+
+		if(first != last)
+		{
+			value_type value(*first);
+
+			for(*result = value; ++first != last; )
+			{
+				const value_type temp(*first);
+
+				*++result = temp - value;
+				value = temp;
+			}
+
+			++result;
+		}
+
+		return result;
+	}
+
+
+	/// adjacent_difference
+	///
+	/// Computes the difference between adjacent values in the range
+	/// [first, last) using binary_op and writes the result to result.
+	/// 
+	template <typename InputIterator, typename OutputIterator, typename BinaryOperation>
+	OutputIterator adjacent_difference(InputIterator first, InputIterator last, OutputIterator result, BinaryOperation binary_op)
+	{
+		typedef typename iterator_traits<InputIterator>::value_type value_type;
+
+		if(first != last)
+		{
+			value_type value(*first);
+			
+			for(*result = value; ++first != last; )
+			{
+				const value_type temp(*first);
+
+				*++result = binary_op(temp, value);
+				value = temp;
+			}
+
+			++result;
+		}
+
+		return result;
+	}
+
+
+
+} // namespace eastl
+
+
+#endif // Header include guard
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/numeric_limits.h b/libkram/eastl/include/EASTL/numeric_limits.h
new file mode 100644
index 00000000..c2770c9e
--- /dev/null
+++ b/libkram/eastl/include/EASTL/numeric_limits.h
@@ -0,0 +1,1718 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// We support eastl::numeric_limits for the following types. Sized types such
+// as int32_t are covered by these basic types, with the exception of int128_t.
+//
+//     bool
+//     char                 (distinct from signed and unsigned char)
+//     unsigned char,
+//     signed char, 
+//     wchar_t
+//     char16_t             (when char16_t is a distict type)
+//     char32_t             (when char32_t is a distinct type)
+//     unsigned short,
+//     signed short
+//     unsigned int
+//     signed int
+//     unsigned long
+//     signed long
+//     signed long long
+//     unsigned long long
+//     uint128_t            (when supported natively by the compiler)
+//     int128_t             (when supported natively by the compiler)
+//     float
+//     double
+//     long double
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_NUMERIC_LIMITS_H
+#define EASTL_NUMERIC_LIMITS_H
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/type_traits.h>
+#include <limits.h>                 // C limits.h header
+#include <float.h>
+#if defined(_CPPLIB_VER)            // Dinkumware.
+	#include <ymath.h>
+#endif
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+// Disable Warnings:
+//   4310 - cast truncates constant value
+//   4296 - expression is always false
+EA_DISABLE_VC_WARNING(4310 4296)
+
+// EASTL_CUSTOM_FLOAT_CONSTANTS_REQUIRED
+//
+// Defined as 0 or 1.
+// Indicates whether we need to define our own implementations of inf, nan, snan, denorm floating point constants. 
+//
+#if !defined(EASTL_CUSTOM_FLOAT_CONSTANTS_REQUIRED)
+	#if (defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG) && defined(__FLT_MIN__)) || defined(_CPPLIB_VER) // __FLT_MIN__ detects if it's really GCC/clang and not a mimic. _CPPLIB_VER (Dinkumware) covers VC++, and Microsoft platforms.
+		#define EASTL_CUSTOM_FLOAT_CONSTANTS_REQUIRED 0
+	#else
+		#define EASTL_CUSTOM_FLOAT_CONSTANTS_REQUIRED 1
+	#endif
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// min/max workaround
+//
+// MSVC++ has #defines for min/max which collide with the min/max algorithm
+// declarations. The following may still not completely resolve some kinds of
+// problems with MSVC++ #defines, though it deals with most cases in production
+// game code.
+//
+#if EASTL_NOMINMAX
+	#ifdef min
+		#undef min
+	#endif
+	#ifdef max
+		#undef max
+	#endif
+#endif
+
+
+// EA_CONSTEXPR
+// EA_CONSTEXPR is defined in EABase 2.00.38 and later.
+#if !defined(EA_CONSTEXPR)
+	#define EA_CONSTEXPR
+#endif
+
+// EA_CONSTEXPR_OR_CONST
+// EA_CONSTEXPR_OR_CONST is defined in EABase 2.00.39 and later.
+#if !defined(EA_CONSTEXPR_OR_CONST)
+	#define EA_CONSTEXPR_OR_CONST const
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_LIMITS macros
+// These apply to integral types only.
+///////////////////////////////////////////////////////////////////////////////
+
+// true or false.
+#define EASTL_LIMITS_IS_SIGNED(T)    ((T)(-1) < 0)
+
+// The min possible value of T. 
+#define EASTL_LIMITS_MIN_S(T) ((T)((T)1 << EASTL_LIMITS_DIGITS_S(T)))
+#define EASTL_LIMITS_MIN_U(T) ((T)0)
+#define EASTL_LIMITS_MIN(T)   ((EASTL_LIMITS_IS_SIGNED(T) ? EASTL_LIMITS_MIN_S(T) : EASTL_LIMITS_MIN_U(T)))
+
+// The max possible value of T. 
+#define EASTL_LIMITS_MAX_S(T) ((T)(((((T)1 << (EASTL_LIMITS_DIGITS(T) - 1)) - 1) << 1) + 1))
+#define EASTL_LIMITS_MAX_U(T) ((T)~(T)0)
+#define EASTL_LIMITS_MAX(T)   ((EASTL_LIMITS_IS_SIGNED(T) ? EASTL_LIMITS_MAX_S(T) : EASTL_LIMITS_MAX_U(T)))
+
+// The number of bits in the representation of T.
+#define EASTL_LIMITS_DIGITS_S(T) ((sizeof(T) * CHAR_BIT) - 1)
+#define EASTL_LIMITS_DIGITS_U(T) ((sizeof(T) * CHAR_BIT))
+#define EASTL_LIMITS_DIGITS(T)   ((EASTL_LIMITS_IS_SIGNED(T) ? EASTL_LIMITS_DIGITS_S(T) : EASTL_LIMITS_DIGITS_U(T)))
+
+// The number of decimal digits that can be represented by T.
+#define EASTL_LIMITS_DIGITS10_S(T) ((EASTL_LIMITS_DIGITS_S(T) * 643L) / 2136) // (643 / 2136) ~= log10(2).
+#define EASTL_LIMITS_DIGITS10_U(T) ((EASTL_LIMITS_DIGITS_U(T) * 643L) / 2136)
+#define EASTL_LIMITS_DIGITS10(T)   ((EASTL_LIMITS_IS_SIGNED(T) ? EASTL_LIMITS_DIGITS10_S(T) : EASTL_LIMITS_DIGITS10_U(T)))
+
+
+
+
+
+
+namespace eastl
+{
+	// See C++11 18.3.2.5
+	enum float_round_style
+	{
+		round_indeterminate       = -1,    /// Intermediate.
+		round_toward_zero         =  0,    /// To zero.
+		round_to_nearest          =  1,    /// To the nearest representable value.
+		round_toward_infinity     =  2,    /// To infinity.
+		round_toward_neg_infinity =  3     /// To negative infinity.
+	};
+
+	// See C++11 18.3.2.6
+	enum float_denorm_style
+	{
+		denorm_indeterminate = -1,          /// It cannot be determined whether or not the type allows denormalized values.
+		denorm_absent        =  0,          /// The type does not allow denormalized values.
+		denorm_present       =  1           /// The type allows denormalized values.
+	};
+
+
+	namespace Internal
+	{
+		// Defines default values for numeric_limits, which can be overridden by class specializations.
+		// See C++11 18.3.2.3
+		struct numeric_limits_base
+		{
+			// true if the type has an explicit specialization defined in the template class; false if not. 
+			static EA_CONSTEXPR_OR_CONST bool is_specialized = false;
+
+			// Integer types: the number of *bits* in the representation of T.
+			// Floating types: the number of digits in the mantissa of T (same as FLT_MANT_DIG, DBL_MANT_DIG or LDBL_MANT_DIG).
+			static EA_CONSTEXPR_OR_CONST int digits = 0;
+
+			// The number of decimal digits that can be represented by T.
+			// Equivalent to FLT_DIG, DBL_DIG or LDBL_DIG for floating types.
+			static EA_CONSTEXPR_OR_CONST int digits10 = 0;
+
+			// The number of decimal digits required to make sure that two distinct values of the type have distinct decimal representations.
+			static EA_CONSTEXPR_OR_CONST int max_digits10 = 0;
+
+			// True if the type is signed.
+			static EA_CONSTEXPR_OR_CONST bool is_signed = false;
+
+			// True if the type is integral.
+			static EA_CONSTEXPR_OR_CONST bool is_integer = false;
+
+			// True if the type uses an exact representation. All integral types are
+			// exact, but other types can be exact as well.
+			static EA_CONSTEXPR_OR_CONST bool is_exact = false;
+
+			// Integer types: the base of the representation. Always 2 for integers.
+			// Floating types: the base of the exponent representation. Always FLT_RADIX (typically 2) for float.
+			static EA_CONSTEXPR_OR_CONST int radix = 0;
+
+			// The minimum integral radix-based exponent representable by the type.
+			static EA_CONSTEXPR_OR_CONST int min_exponent = 0;
+
+			// The minimum integral base 10 exponent representable by the type.
+			static EA_CONSTEXPR_OR_CONST int min_exponent10 = 0;
+
+			// The maximum integral radix-based exponent representable by the type.
+			static EA_CONSTEXPR_OR_CONST int max_exponent = 0;
+
+			// The maximum integral base 10 exponent representable by the type.
+			static EA_CONSTEXPR_OR_CONST int max_exponent10 = 0;
+
+			// True if the type has a representation for positive infinity.
+			static EA_CONSTEXPR_OR_CONST bool has_infinity = false;
+
+			//  True if the type has a representation for a quiet (non-signaling) NaN.
+			static EA_CONSTEXPR_OR_CONST bool has_quiet_NaN = false;
+
+			// True if the type has a representation for a signaling NaN.
+			static EA_CONSTEXPR_OR_CONST bool has_signaling_NaN = false;
+
+			// An enumeration which identifies denormalization behavior.
+			// In practice the application can change this at runtime via hardware-specific commands.
+			static EA_CONSTEXPR_OR_CONST float_denorm_style has_denorm = denorm_absent;
+
+			// True if the loss of accuracy is detected as a denormalization loss.
+			// Typically false for integer types and true for floating point types.
+			static EA_CONSTEXPR_OR_CONST bool has_denorm_loss = false;
+
+			// True if the type has a bounded set of representable values. Typically true for 
+			// all built-in numerial types (integer and floating point).
+			static EA_CONSTEXPR_OR_CONST bool is_bounded = false;
+
+			// True if the type has a modulo representation (if it's possible to add two
+			// positive numbers and have a result that wraps around to a third number
+			// that is less. Typically true for integers and false for floating types.
+			static EA_CONSTEXPR_OR_CONST bool is_modulo = false;
+
+			// True if trapping (arithmetic exception generation) is implemented for this type.
+			// Typically true for integer types (div by zero) and false for floating point types,
+			// though in practice the application may be able to change floating point to trap at runtime.
+			static EA_CONSTEXPR_OR_CONST bool traps = false;
+
+			// True if tiny-ness is detected before rounding.
+			static EA_CONSTEXPR_OR_CONST bool tinyness_before = false;
+
+			// An enumeration which identifies default rounding behavior.
+			// In practice the application can change this at runtime via hardware-specific commands.
+			static EA_CONSTEXPR_OR_CONST float_round_style round_style = round_toward_zero;
+
+			// True if the type is floating point and follows the IEC 559 standard (IEEE 754).
+			// In practice the application or OS can change this at runtime via hardware-specific commands or via compiler optimizations.
+			static EA_CONSTEXPR_OR_CONST bool is_iec559 = false;
+		};
+
+
+		#if EASTL_CUSTOM_FLOAT_CONSTANTS_REQUIRED
+			extern EASTL_API float gFloatInfinity;
+			extern EASTL_API float gFloatNaN;
+			extern EASTL_API float gFloatSNaN;
+			extern EASTL_API float gFloatDenorm;
+
+			extern EASTL_API double gDoubleInfinity;
+			extern EASTL_API double gDoubleNaN;
+			extern EASTL_API double gDoubleSNaN;
+			extern EASTL_API double gDoubleDenorm;
+
+			extern EASTL_API long double gLongDoubleInfinity;
+			extern EASTL_API long double gLongDoubleNaN;
+			extern EASTL_API long double gLongDoubleSNaN;
+			extern EASTL_API long double gLongDoubleDenorm;
+		#endif
+
+	} // namespace Internal
+
+
+	// Default numeric_limits.
+	// See C++11 18.3.2.3
+	template<typename T>
+	class numeric_limits : public Internal::numeric_limits_base
+	{
+	public:
+		typedef T value_type;
+
+		static value_type min()
+			{ return value_type(0); }
+
+		static value_type max()
+			{ return value_type(0); }
+
+		static value_type lowest()
+			{ return min(); }
+
+		static value_type epsilon()
+			{ return value_type(0); }
+
+		static value_type round_error()
+			{ return value_type(0); }
+
+		static value_type denorm_min()
+			{ return value_type(0); }
+
+		static value_type infinity()
+			{ return value_type(0); }
+
+		static value_type quiet_NaN()
+			{ return value_type(0); }
+
+		static value_type signaling_NaN()
+			{ return value_type(0); }
+	};
+
+
+	// Const/volatile variations of numeric_limits.
+	template<typename T>
+	class numeric_limits<const T> : public numeric_limits<T>
+	{
+	};
+
+	template<typename T>
+	class numeric_limits<volatile T> : public numeric_limits<T>
+	{
+	};
+
+	template<typename T>
+	class numeric_limits<const volatile T> : public numeric_limits<T>
+	{
+	};
+
+
+
+	// numeric_limits<bool>
+	template<>
+	struct numeric_limits<bool>
+	{
+		typedef bool value_type;
+
+		static EA_CONSTEXPR_OR_CONST bool               is_specialized    = true;
+		static EA_CONSTEXPR_OR_CONST int                digits            = 1;      // In practice bool is stores as a byte, or sometimes an int.
+		static EA_CONSTEXPR_OR_CONST int                digits10          = 0;
+		static EA_CONSTEXPR_OR_CONST int                max_digits10      = 0;
+		static EA_CONSTEXPR_OR_CONST bool               is_signed         = false;  // In practice bool may be implemented as signed char.
+		static EA_CONSTEXPR_OR_CONST bool               is_integer        = true;
+		static EA_CONSTEXPR_OR_CONST bool               is_exact          = true;
+		static EA_CONSTEXPR_OR_CONST int                radix             = 2;
+		static EA_CONSTEXPR_OR_CONST int                min_exponent      = 0;
+		static EA_CONSTEXPR_OR_CONST int                min_exponent10    = 0;
+		static EA_CONSTEXPR_OR_CONST int                max_exponent      = 0;
+		static EA_CONSTEXPR_OR_CONST int                max_exponent10    = 0;
+		static EA_CONSTEXPR_OR_CONST bool               is_bounded        = true;
+		static EA_CONSTEXPR_OR_CONST bool               is_modulo         = false;
+		static EA_CONSTEXPR_OR_CONST bool               traps             = true;   // Should this be true or false? Given that it's implemented in hardware as an integer type, we use true.
+		static EA_CONSTEXPR_OR_CONST bool               tinyness_before   = false;
+		static EA_CONSTEXPR_OR_CONST float_round_style  round_style       = round_toward_zero;
+		static EA_CONSTEXPR_OR_CONST bool               has_infinity      = false;
+		static EA_CONSTEXPR_OR_CONST bool               has_quiet_NaN     = false;
+		static EA_CONSTEXPR_OR_CONST bool               has_signaling_NaN = false;
+		static EA_CONSTEXPR_OR_CONST float_denorm_style has_denorm        = denorm_absent;
+		static EA_CONSTEXPR_OR_CONST bool               has_denorm_loss   = false;
+		static EA_CONSTEXPR_OR_CONST bool               is_iec559         = false;
+
+		static EA_CONSTEXPR value_type min() 
+			{ return false; }
+
+		static EA_CONSTEXPR value_type max()
+			{ return true; }
+
+		static EA_CONSTEXPR value_type lowest() 
+			{ return false; }
+
+		static EA_CONSTEXPR value_type epsilon() 
+			{ return false; }
+
+		static EA_CONSTEXPR value_type round_error() 
+			{ return false; }
+
+		static EA_CONSTEXPR value_type infinity() 
+			{ return value_type(); }
+
+		static EA_CONSTEXPR value_type quiet_NaN() 
+			{ return value_type(); }
+
+		static EA_CONSTEXPR value_type signaling_NaN()
+			{ return value_type(); }
+
+		static EA_CONSTEXPR value_type denorm_min() 
+			{ return value_type(); }
+	};
+
+
+	// numeric_limits<char>
+	template<>
+	struct numeric_limits<char>
+	{
+		typedef char value_type;
+
+		static EA_CONSTEXPR_OR_CONST bool               is_specialized    = true;
+		static EA_CONSTEXPR_OR_CONST int                digits            = EASTL_LIMITS_DIGITS(value_type);
+		static EA_CONSTEXPR_OR_CONST int                digits10          = EASTL_LIMITS_DIGITS10(value_type);
+		static EA_CONSTEXPR_OR_CONST int                max_digits10      = 0;
+		static EA_CONSTEXPR_OR_CONST bool               is_signed         = EASTL_LIMITS_IS_SIGNED(value_type);
+		static EA_CONSTEXPR_OR_CONST bool               is_integer        = true;
+		static EA_CONSTEXPR_OR_CONST bool               is_exact          = true;
+		static EA_CONSTEXPR_OR_CONST int                radix             = 2;
+		static EA_CONSTEXPR_OR_CONST int                min_exponent      = 0;
+		static EA_CONSTEXPR_OR_CONST int                min_exponent10    = 0;
+		static EA_CONSTEXPR_OR_CONST int                max_exponent      = 0;
+		static EA_CONSTEXPR_OR_CONST int                max_exponent10    = 0;
+		static EA_CONSTEXPR_OR_CONST bool               is_bounded        = true;
+		static EA_CONSTEXPR_OR_CONST bool               is_modulo         = true;
+		static EA_CONSTEXPR_OR_CONST bool               traps             = true;
+		static EA_CONSTEXPR_OR_CONST bool               tinyness_before   = false;
+		static EA_CONSTEXPR_OR_CONST float_round_style  round_style       = round_toward_zero;
+		static EA_CONSTEXPR_OR_CONST bool               has_infinity      = false;
+		static EA_CONSTEXPR_OR_CONST bool               has_quiet_NaN     = false;
+		static EA_CONSTEXPR_OR_CONST bool               has_signaling_NaN = false;
+		static EA_CONSTEXPR_OR_CONST float_denorm_style has_denorm        = denorm_absent;
+		static EA_CONSTEXPR_OR_CONST bool               has_denorm_loss   = false;
+		static EA_CONSTEXPR_OR_CONST bool               is_iec559         = false;
+
+		static EA_CONSTEXPR value_type min() 
+			{ return EASTL_LIMITS_MIN(value_type); }
+
+		static EA_CONSTEXPR value_type max()
+			{ return EASTL_LIMITS_MAX(value_type); }
+
+		static EA_CONSTEXPR value_type lowest() 
+			{ return EASTL_LIMITS_MIN(value_type); }
+
+		static EA_CONSTEXPR value_type epsilon() 
+			{ return 0; }
+
+		static EA_CONSTEXPR value_type round_error() 
+			{ return 0; }
+
+		static EA_CONSTEXPR value_type infinity() 
+			{ return value_type(); }    // Question: Should we return 0 here or value_type()?
+
+		static EA_CONSTEXPR value_type quiet_NaN() 
+			{ return value_type(); }
+
+		static EA_CONSTEXPR value_type signaling_NaN()
+			{ return value_type(); }
+
+		static EA_CONSTEXPR value_type denorm_min() 
+			{ return (value_type)0; }
+	};
+
+
+	// numeric_limits<unsigned char>
+	template<>
+	struct numeric_limits<unsigned char>
+	{
+		typedef unsigned char value_type;
+
+		static EA_CONSTEXPR_OR_CONST bool               is_specialized    = true;
+		static EA_CONSTEXPR_OR_CONST int                digits            = EASTL_LIMITS_DIGITS_U(value_type);
+		static EA_CONSTEXPR_OR_CONST int                digits10          = EASTL_LIMITS_DIGITS10_U(value_type);
+		static EA_CONSTEXPR_OR_CONST int                max_digits10      = 0;
+		static EA_CONSTEXPR_OR_CONST bool               is_signed         = false;
+		static EA_CONSTEXPR_OR_CONST bool               is_integer        = true;
+		static EA_CONSTEXPR_OR_CONST bool               is_exact          = true;
+		static EA_CONSTEXPR_OR_CONST int                radix             = 2;
+		static EA_CONSTEXPR_OR_CONST int                min_exponent      = 0;
+		static EA_CONSTEXPR_OR_CONST int                min_exponent10    = 0;
+		static EA_CONSTEXPR_OR_CONST int                max_exponent      = 0;
+		static EA_CONSTEXPR_OR_CONST int                max_exponent10    = 0;
+		static EA_CONSTEXPR_OR_CONST bool               is_bounded        = true;
+		static EA_CONSTEXPR_OR_CONST bool               is_modulo         = true;
+		static EA_CONSTEXPR_OR_CONST bool               traps             = true;
+		static EA_CONSTEXPR_OR_CONST bool               tinyness_before   = false;
+		static EA_CONSTEXPR_OR_CONST float_round_style  round_style       = round_toward_zero;
+		static EA_CONSTEXPR_OR_CONST bool               has_infinity      = false;
+		static EA_CONSTEXPR_OR_CONST bool               has_quiet_NaN     = false;
+		static EA_CONSTEXPR_OR_CONST bool               has_signaling_NaN = false;
+		static EA_CONSTEXPR_OR_CONST float_denorm_style has_denorm        = denorm_absent;
+		static EA_CONSTEXPR_OR_CONST bool               has_denorm_loss   = false;
+		static EA_CONSTEXPR_OR_CONST bool               is_iec559         = false;
+
+		static EA_CONSTEXPR value_type min() 
+			{ return 0; }
+
+		static EA_CONSTEXPR value_type max()
+			{ return EASTL_LIMITS_MAX_U(value_type); }
+
+		static EA_CONSTEXPR value_type lowest() 
+			{ return 0; }
+
+		static EA_CONSTEXPR value_type epsilon() 
+			{ return 0; }
+
+		static EA_CONSTEXPR value_type round_error() 
+			{ return 0; }
+
+		static EA_CONSTEXPR value_type infinity() 
+			{ return value_type(); }
+
+		static EA_CONSTEXPR value_type quiet_NaN() 
+			{ return value_type(); }
+
+		static EA_CONSTEXPR value_type signaling_NaN()
+			{ return value_type(); }
+
+		static EA_CONSTEXPR value_type denorm_min() 
+			{ return (value_type)0; }
+	};
+
+
+	// numeric_limits<signed char>
+	template<>
+	struct numeric_limits<signed char>
+	{
+		typedef signed char value_type;
+
+		static EA_CONSTEXPR_OR_CONST bool               is_specialized    = true;
+		static EA_CONSTEXPR_OR_CONST int                digits            = EASTL_LIMITS_DIGITS_S(value_type);
+		static EA_CONSTEXPR_OR_CONST int                digits10          = EASTL_LIMITS_DIGITS10_S(value_type);
+		static EA_CONSTEXPR_OR_CONST int                max_digits10      = 0;
+		static EA_CONSTEXPR_OR_CONST bool               is_signed         = true;
+		static EA_CONSTEXPR_OR_CONST bool               is_integer        = true;
+		static EA_CONSTEXPR_OR_CONST bool               is_exact          = true;
+		static EA_CONSTEXPR_OR_CONST int                radix             = 2;
+		static EA_CONSTEXPR_OR_CONST int                min_exponent      = 0;
+		static EA_CONSTEXPR_OR_CONST int                min_exponent10    = 0;
+		static EA_CONSTEXPR_OR_CONST int                max_exponent      = 0;
+		static EA_CONSTEXPR_OR_CONST int                max_exponent10    = 0;
+		static EA_CONSTEXPR_OR_CONST bool               is_bounded        = true;
+		static EA_CONSTEXPR_OR_CONST bool               is_modulo         = true;
+		static EA_CONSTEXPR_OR_CONST bool               traps             = true;
+		static EA_CONSTEXPR_OR_CONST bool               tinyness_before   = false;
+		static EA_CONSTEXPR_OR_CONST float_round_style  round_style       = round_toward_zero;
+		static EA_CONSTEXPR_OR_CONST bool               has_infinity      = false;
+		static EA_CONSTEXPR_OR_CONST bool               has_quiet_NaN     = false;
+		static EA_CONSTEXPR_OR_CONST bool               has_signaling_NaN = false;
+		static EA_CONSTEXPR_OR_CONST float_denorm_style has_denorm        = denorm_absent;
+		static EA_CONSTEXPR_OR_CONST bool               has_denorm_loss   = false;
+		static EA_CONSTEXPR_OR_CONST bool               is_iec559         = false;
+
+		static EA_CONSTEXPR value_type min() 
+			{ return EASTL_LIMITS_MIN_S(value_type); }
+
+		static EA_CONSTEXPR value_type max()
+			{ return EASTL_LIMITS_MAX_S(value_type); }
+
+		static EA_CONSTEXPR value_type lowest() 
+			{ return EASTL_LIMITS_MIN_S(value_type); }
+
+		static EA_CONSTEXPR value_type epsilon() 
+			{ return 0; }
+
+		static EA_CONSTEXPR value_type round_error() 
+			{ return 0; }
+
+		static EA_CONSTEXPR value_type infinity() 
+			{ return value_type(); }
+
+		static EA_CONSTEXPR value_type quiet_NaN() 
+			{ return value_type(); }
+
+		static EA_CONSTEXPR value_type signaling_NaN()
+			{ return value_type(); }
+
+		static EA_CONSTEXPR value_type denorm_min() 
+			{ return (value_type)0; }
+	};
+
+
+	// numeric_limits<wchar_t>
+	// VC++ has the option of making wchar_t simply be unsigned short. If that's enabled then  
+	// the code below could possibly cause compile failures due to redundancy. The best resolution 
+	// may be to use __wchar_t here for VC++ instead of wchar_t, as __wchar_t is always a true 
+	// unique type under VC++. http://social.msdn.microsoft.com/Forums/en-US/vclanguage/thread/9059330a-7cce-4d0d-a8e0-e1dcb63322bd/
+	template<>
+	struct numeric_limits<wchar_t>
+	{
+		typedef wchar_t value_type;
+
+		static EA_CONSTEXPR_OR_CONST bool               is_specialized    = true;
+		static EA_CONSTEXPR_OR_CONST int                digits            = EASTL_LIMITS_DIGITS(value_type);
+		static EA_CONSTEXPR_OR_CONST int                digits10          = EASTL_LIMITS_DIGITS10(value_type);
+		static EA_CONSTEXPR_OR_CONST int                max_digits10      = 0;
+		static EA_CONSTEXPR_OR_CONST bool               is_signed         = EASTL_LIMITS_IS_SIGNED(value_type);
+		static EA_CONSTEXPR_OR_CONST bool               is_integer        = true;
+		static EA_CONSTEXPR_OR_CONST bool               is_exact          = true;
+		static EA_CONSTEXPR_OR_CONST int                radix             = 2;
+		static EA_CONSTEXPR_OR_CONST int                min_exponent      = 0;
+		static EA_CONSTEXPR_OR_CONST int                min_exponent10    = 0;
+		static EA_CONSTEXPR_OR_CONST int                max_exponent      = 0;
+		static EA_CONSTEXPR_OR_CONST int                max_exponent10    = 0;
+		static EA_CONSTEXPR_OR_CONST bool               is_bounded        = true;
+		static EA_CONSTEXPR_OR_CONST bool               is_modulo         = true;
+		static EA_CONSTEXPR_OR_CONST bool               traps             = true;
+		static EA_CONSTEXPR_OR_CONST bool               tinyness_before   = false;
+		static EA_CONSTEXPR_OR_CONST float_round_style  round_style       = round_toward_zero;
+		static EA_CONSTEXPR_OR_CONST bool               has_infinity      = false;
+		static EA_CONSTEXPR_OR_CONST bool               has_quiet_NaN     = false;
+		static EA_CONSTEXPR_OR_CONST bool               has_signaling_NaN = false;
+		static EA_CONSTEXPR_OR_CONST float_denorm_style has_denorm        = denorm_absent;
+		static EA_CONSTEXPR_OR_CONST bool               has_denorm_loss   = false;
+		static EA_CONSTEXPR_OR_CONST bool               is_iec559         = false;
+
+		static EA_CONSTEXPR value_type min() 
+			{ return EASTL_LIMITS_MIN(value_type); }
+
+		static EA_CONSTEXPR value_type max()
+			{ return EASTL_LIMITS_MAX(value_type); }
+
+		static EA_CONSTEXPR value_type lowest() 
+			{ return EASTL_LIMITS_MIN(value_type); }
+
+		static EA_CONSTEXPR value_type epsilon() 
+			{ return 0; }
+
+		static EA_CONSTEXPR value_type round_error() 
+			{ return 0; }
+
+		static EA_CONSTEXPR value_type infinity() 
+			{ return value_type(); }
+
+		static EA_CONSTEXPR value_type quiet_NaN() 
+			{ return value_type(); }
+
+		static EA_CONSTEXPR value_type signaling_NaN()
+			{ return value_type(); }
+
+		static EA_CONSTEXPR value_type denorm_min() 
+			{ return (value_type)0; }
+	};
+
+
+	#if EA_CHAR16_NATIVE // If char16_t is a true unique type (as called for by the C++11 Standard)...
+
+		// numeric_limits<char16_t>
+		template<>
+		struct numeric_limits<char16_t>
+		{
+			typedef char16_t value_type;
+
+			static EA_CONSTEXPR_OR_CONST bool               is_specialized    = true;
+			static EA_CONSTEXPR_OR_CONST int                digits            = EASTL_LIMITS_DIGITS(value_type);
+			static EA_CONSTEXPR_OR_CONST int                digits10          = EASTL_LIMITS_DIGITS10(value_type);
+			static EA_CONSTEXPR_OR_CONST int                max_digits10      = 0;
+			static EA_CONSTEXPR_OR_CONST bool               is_signed         = EASTL_LIMITS_IS_SIGNED(value_type);
+			static EA_CONSTEXPR_OR_CONST bool               is_integer        = true;
+			static EA_CONSTEXPR_OR_CONST bool               is_exact          = true;
+			static EA_CONSTEXPR_OR_CONST int                radix             = 2;
+			static EA_CONSTEXPR_OR_CONST int                min_exponent      = 0;
+			static EA_CONSTEXPR_OR_CONST int                min_exponent10    = 0;
+			static EA_CONSTEXPR_OR_CONST int                max_exponent      = 0;
+			static EA_CONSTEXPR_OR_CONST int                max_exponent10    = 0;
+			static EA_CONSTEXPR_OR_CONST bool               is_bounded        = true;
+			static EA_CONSTEXPR_OR_CONST bool               is_modulo         = true;
+			static EA_CONSTEXPR_OR_CONST bool               traps             = true;
+			static EA_CONSTEXPR_OR_CONST bool               tinyness_before   = false;
+			static EA_CONSTEXPR_OR_CONST float_round_style  round_style       = round_toward_zero;
+			static EA_CONSTEXPR_OR_CONST bool               has_infinity      = false;
+			static EA_CONSTEXPR_OR_CONST bool               has_quiet_NaN     = false;
+			static EA_CONSTEXPR_OR_CONST bool               has_signaling_NaN = false;
+			static EA_CONSTEXPR_OR_CONST float_denorm_style has_denorm        = denorm_absent;
+			static EA_CONSTEXPR_OR_CONST bool               has_denorm_loss   = false;
+			static EA_CONSTEXPR_OR_CONST bool               is_iec559         = false;
+
+			static EA_CONSTEXPR value_type min() 
+				{ return EASTL_LIMITS_MIN(value_type); }
+
+			static EA_CONSTEXPR value_type max()
+				{ return EASTL_LIMITS_MAX(value_type); }
+
+			static EA_CONSTEXPR value_type lowest() 
+				{ return EASTL_LIMITS_MIN(value_type); }
+
+			static EA_CONSTEXPR value_type epsilon() 
+				{ return 0; }
+
+			static EA_CONSTEXPR value_type round_error() 
+				{ return 0; }
+
+			static EA_CONSTEXPR value_type infinity() 
+				{ return value_type(); }
+
+			static EA_CONSTEXPR value_type quiet_NaN() 
+				{ return value_type(); }
+
+			static EA_CONSTEXPR value_type signaling_NaN()
+				{ return value_type(); }
+
+			static EA_CONSTEXPR value_type denorm_min() 
+				{ return (value_type)0; }
+		};
+
+	#endif
+
+
+	#if EA_CHAR32_NATIVE // If char32_t is a true unique type (as called for by the C++11 Standard)...
+
+		// numeric_limits<char32_t>
+		template<>
+		struct numeric_limits<char32_t>
+		{
+			typedef char32_t value_type;
+
+			static EA_CONSTEXPR_OR_CONST bool               is_specialized    = true;
+			static EA_CONSTEXPR_OR_CONST int                digits            = EASTL_LIMITS_DIGITS(value_type);
+			static EA_CONSTEXPR_OR_CONST int                digits10          = EASTL_LIMITS_DIGITS10(value_type);
+			static EA_CONSTEXPR_OR_CONST int                max_digits10      = 0;
+			static EA_CONSTEXPR_OR_CONST bool               is_signed         = EASTL_LIMITS_IS_SIGNED(value_type);
+			static EA_CONSTEXPR_OR_CONST bool               is_integer        = true;
+			static EA_CONSTEXPR_OR_CONST bool               is_exact          = true;
+			static EA_CONSTEXPR_OR_CONST int                radix             = 2;
+			static EA_CONSTEXPR_OR_CONST int                min_exponent      = 0;
+			static EA_CONSTEXPR_OR_CONST int                min_exponent10    = 0;
+			static EA_CONSTEXPR_OR_CONST int                max_exponent      = 0;
+			static EA_CONSTEXPR_OR_CONST int                max_exponent10    = 0;
+			static EA_CONSTEXPR_OR_CONST bool               is_bounded        = true;
+			static EA_CONSTEXPR_OR_CONST bool               is_modulo         = true;
+			static EA_CONSTEXPR_OR_CONST bool               traps             = true;
+			static EA_CONSTEXPR_OR_CONST bool               tinyness_before   = false;
+			static EA_CONSTEXPR_OR_CONST float_round_style  round_style       = round_toward_zero;
+			static EA_CONSTEXPR_OR_CONST bool               has_infinity      = false;
+			static EA_CONSTEXPR_OR_CONST bool               has_quiet_NaN     = false;
+			static EA_CONSTEXPR_OR_CONST bool               has_signaling_NaN = false;
+			static EA_CONSTEXPR_OR_CONST float_denorm_style has_denorm        = denorm_absent;
+			static EA_CONSTEXPR_OR_CONST bool               has_denorm_loss   = false;
+			static EA_CONSTEXPR_OR_CONST bool               is_iec559         = false;
+
+			static EA_CONSTEXPR value_type min() 
+				{ return EASTL_LIMITS_MIN(value_type); }
+
+			static EA_CONSTEXPR value_type max()
+				{ return EASTL_LIMITS_MAX(value_type); }
+
+			static EA_CONSTEXPR value_type lowest() 
+				{ return EASTL_LIMITS_MIN(value_type); }
+
+			static EA_CONSTEXPR value_type epsilon() 
+				{ return 0; }
+
+			static EA_CONSTEXPR value_type round_error() 
+				{ return 0; }
+
+			static EA_CONSTEXPR value_type infinity() 
+				{ return value_type(); }
+
+			static EA_CONSTEXPR value_type quiet_NaN() 
+				{ return value_type(); }
+
+			static EA_CONSTEXPR value_type signaling_NaN()
+				{ return value_type(); }
+
+			static EA_CONSTEXPR value_type denorm_min() 
+				{ return (value_type)0; }
+		};
+
+	#endif
+
+
+	// numeric_limits<unsigned short>
+	template<>
+	struct numeric_limits<unsigned short>
+	{
+		typedef unsigned short value_type;
+
+		static EA_CONSTEXPR_OR_CONST bool               is_specialized    = true;
+		static EA_CONSTEXPR_OR_CONST int                digits            = EASTL_LIMITS_DIGITS_U(value_type);
+		static EA_CONSTEXPR_OR_CONST int                digits10          = EASTL_LIMITS_DIGITS10_U(value_type);
+		static EA_CONSTEXPR_OR_CONST int                max_digits10      = 0;
+		static EA_CONSTEXPR_OR_CONST bool               is_signed         = false;
+		static EA_CONSTEXPR_OR_CONST bool               is_integer        = true;
+		static EA_CONSTEXPR_OR_CONST bool               is_exact          = true;
+		static EA_CONSTEXPR_OR_CONST int                radix             = 2;
+		static EA_CONSTEXPR_OR_CONST int                min_exponent      = 0;
+		static EA_CONSTEXPR_OR_CONST int                min_exponent10    = 0;
+		static EA_CONSTEXPR_OR_CONST int                max_exponent      = 0;
+		static EA_CONSTEXPR_OR_CONST int                max_exponent10    = 0;
+		static EA_CONSTEXPR_OR_CONST bool               is_bounded        = true;
+		static EA_CONSTEXPR_OR_CONST bool               is_modulo         = true;
+		static EA_CONSTEXPR_OR_CONST bool               traps             = true;
+		static EA_CONSTEXPR_OR_CONST bool               tinyness_before   = false;
+		static EA_CONSTEXPR_OR_CONST float_round_style  round_style       = round_toward_zero;
+		static EA_CONSTEXPR_OR_CONST bool               has_infinity      = false;
+		static EA_CONSTEXPR_OR_CONST bool               has_quiet_NaN     = false;
+		static EA_CONSTEXPR_OR_CONST bool               has_signaling_NaN = false;
+		static EA_CONSTEXPR_OR_CONST float_denorm_style has_denorm        = denorm_absent;
+		static EA_CONSTEXPR_OR_CONST bool               has_denorm_loss   = false;
+		static EA_CONSTEXPR_OR_CONST bool               is_iec559         = false;
+
+		static EA_CONSTEXPR value_type min() 
+			{ return 0; }
+
+		static EA_CONSTEXPR value_type max()
+			{ return EASTL_LIMITS_MAX_U(value_type); }
+
+		static EA_CONSTEXPR value_type lowest() 
+			{ return 0; }
+
+		static EA_CONSTEXPR value_type epsilon() 
+			{ return 0; }
+
+		static EA_CONSTEXPR value_type round_error() 
+			{ return 0; }
+
+		static EA_CONSTEXPR value_type infinity() 
+			{ return value_type(); }
+
+		static EA_CONSTEXPR value_type quiet_NaN() 
+			{ return value_type(); }
+
+		static EA_CONSTEXPR value_type signaling_NaN()
+			{ return value_type(); }
+
+		static EA_CONSTEXPR value_type denorm_min() 
+			{ return static_cast<value_type>(0); }
+	};
+
+
+	// numeric_limits<signed short>
+	template<>
+	struct numeric_limits<signed short>
+	{
+		typedef signed short value_type;
+
+		static EA_CONSTEXPR_OR_CONST bool               is_specialized    = true;
+		static EA_CONSTEXPR_OR_CONST int                digits            = EASTL_LIMITS_DIGITS_S(value_type);
+		static EA_CONSTEXPR_OR_CONST int                digits10          = EASTL_LIMITS_DIGITS10_S(value_type);
+		static EA_CONSTEXPR_OR_CONST int                max_digits10      = 0;
+		static EA_CONSTEXPR_OR_CONST bool               is_signed         = true;
+		static EA_CONSTEXPR_OR_CONST bool               is_integer        = true;
+		static EA_CONSTEXPR_OR_CONST bool               is_exact          = true;
+		static EA_CONSTEXPR_OR_CONST int                radix             = 2;
+		static EA_CONSTEXPR_OR_CONST int                min_exponent      = 0;
+		static EA_CONSTEXPR_OR_CONST int                min_exponent10    = 0;
+		static EA_CONSTEXPR_OR_CONST int                max_exponent      = 0;
+		static EA_CONSTEXPR_OR_CONST int                max_exponent10    = 0;
+		static EA_CONSTEXPR_OR_CONST bool               is_bounded        = true;
+		static EA_CONSTEXPR_OR_CONST bool               is_modulo         = true;
+		static EA_CONSTEXPR_OR_CONST bool               traps             = true;
+		static EA_CONSTEXPR_OR_CONST bool               tinyness_before   = false;
+		static EA_CONSTEXPR_OR_CONST float_round_style  round_style       = round_toward_zero;
+		static EA_CONSTEXPR_OR_CONST bool               has_infinity      = false;
+		static EA_CONSTEXPR_OR_CONST bool               has_quiet_NaN     = false;
+		static EA_CONSTEXPR_OR_CONST bool               has_signaling_NaN = false;
+		static EA_CONSTEXPR_OR_CONST float_denorm_style has_denorm        = denorm_absent;
+		static EA_CONSTEXPR_OR_CONST bool               has_denorm_loss   = false;
+		static EA_CONSTEXPR_OR_CONST bool               is_iec559         = false;
+
+		static EA_CONSTEXPR value_type min() 
+			{ return EASTL_LIMITS_MIN_S(value_type); }
+
+		static EA_CONSTEXPR value_type max()
+			{ return EASTL_LIMITS_MAX_S(value_type); }
+
+		static EA_CONSTEXPR value_type lowest() 
+			{ return EASTL_LIMITS_MIN_S(value_type); }
+
+		static EA_CONSTEXPR value_type epsilon() 
+			{ return 0; }
+
+		static EA_CONSTEXPR value_type round_error() 
+			{ return 0; }
+
+		static EA_CONSTEXPR value_type infinity() 
+			{ return value_type(); }
+
+		static EA_CONSTEXPR value_type quiet_NaN() 
+			{ return value_type(); }
+
+		static EA_CONSTEXPR value_type signaling_NaN()
+			{ return value_type(); }
+
+		static EA_CONSTEXPR value_type denorm_min() 
+			{ return static_cast<value_type>(0); }
+	};
+
+
+
+	// numeric_limits<unsigned int>
+	template<>
+	struct numeric_limits<unsigned int>
+	{
+		typedef unsigned int value_type;
+
+		static EA_CONSTEXPR_OR_CONST bool               is_specialized    = true;
+		static EA_CONSTEXPR_OR_CONST int                digits            = EASTL_LIMITS_DIGITS_U(value_type);
+		static EA_CONSTEXPR_OR_CONST int                digits10          = EASTL_LIMITS_DIGITS10_U(value_type);
+		static EA_CONSTEXPR_OR_CONST int                max_digits10      = 0;
+		static EA_CONSTEXPR_OR_CONST bool               is_signed         = false;
+		static EA_CONSTEXPR_OR_CONST bool               is_integer        = true;
+		static EA_CONSTEXPR_OR_CONST bool               is_exact          = true;
+		static EA_CONSTEXPR_OR_CONST int                radix             = 2;
+		static EA_CONSTEXPR_OR_CONST int                min_exponent      = 0;
+		static EA_CONSTEXPR_OR_CONST int                min_exponent10    = 0;
+		static EA_CONSTEXPR_OR_CONST int                max_exponent      = 0;
+		static EA_CONSTEXPR_OR_CONST int                max_exponent10    = 0;
+		static EA_CONSTEXPR_OR_CONST bool               is_bounded        = true;
+		static EA_CONSTEXPR_OR_CONST bool               is_modulo         = true;
+		static EA_CONSTEXPR_OR_CONST bool               traps             = true;
+		static EA_CONSTEXPR_OR_CONST bool               tinyness_before   = false;
+		static EA_CONSTEXPR_OR_CONST float_round_style  round_style       = round_toward_zero;
+		static EA_CONSTEXPR_OR_CONST bool               has_infinity      = false;
+		static EA_CONSTEXPR_OR_CONST bool               has_quiet_NaN     = false;
+		static EA_CONSTEXPR_OR_CONST bool               has_signaling_NaN = false;
+		static EA_CONSTEXPR_OR_CONST float_denorm_style has_denorm        = denorm_absent;
+		static EA_CONSTEXPR_OR_CONST bool               has_denorm_loss   = false;
+		static EA_CONSTEXPR_OR_CONST bool               is_iec559         = false;
+
+		static EA_CONSTEXPR value_type min() 
+			{ return 0; }
+
+		static EA_CONSTEXPR value_type max()
+			{ return EASTL_LIMITS_MAX_U(value_type); }
+
+		static EA_CONSTEXPR value_type lowest() 
+			{ return 0; }
+
+		static EA_CONSTEXPR value_type epsilon() 
+			{ return 0; }
+
+		static EA_CONSTEXPR value_type round_error() 
+			{ return 0; }
+
+		static EA_CONSTEXPR value_type infinity() 
+			{ return value_type(); }
+
+		static EA_CONSTEXPR value_type quiet_NaN() 
+			{ return value_type(); }
+
+		static EA_CONSTEXPR value_type signaling_NaN()
+			{ return value_type(); }
+
+		static EA_CONSTEXPR value_type denorm_min() 
+			{ return static_cast<value_type>(0); }
+	};
+
+
+	// numeric_limits<signed int>
+	template<>
+	struct numeric_limits<signed int>
+	{
+		typedef signed int value_type;
+
+		static EA_CONSTEXPR_OR_CONST bool               is_specialized    = true;
+		static EA_CONSTEXPR_OR_CONST int                digits            = EASTL_LIMITS_DIGITS_S(value_type);
+		static EA_CONSTEXPR_OR_CONST int                digits10          = EASTL_LIMITS_DIGITS10_S(value_type);
+		static EA_CONSTEXPR_OR_CONST int                max_digits10      = 0;
+		static EA_CONSTEXPR_OR_CONST bool               is_signed         = true;
+		static EA_CONSTEXPR_OR_CONST bool               is_integer        = true;
+		static EA_CONSTEXPR_OR_CONST bool               is_exact          = true;
+		static EA_CONSTEXPR_OR_CONST int                radix             = 2;
+		static EA_CONSTEXPR_OR_CONST int                min_exponent      = 0;
+		static EA_CONSTEXPR_OR_CONST int                min_exponent10    = 0;
+		static EA_CONSTEXPR_OR_CONST int                max_exponent      = 0;
+		static EA_CONSTEXPR_OR_CONST int                max_exponent10    = 0;
+		static EA_CONSTEXPR_OR_CONST bool               is_bounded        = true;
+		static EA_CONSTEXPR_OR_CONST bool               is_modulo         = true;
+		static EA_CONSTEXPR_OR_CONST bool               traps             = true;
+		static EA_CONSTEXPR_OR_CONST bool               tinyness_before   = false;
+		static EA_CONSTEXPR_OR_CONST float_round_style  round_style       = round_toward_zero;
+		static EA_CONSTEXPR_OR_CONST bool               has_infinity      = false;
+		static EA_CONSTEXPR_OR_CONST bool               has_quiet_NaN     = false;
+		static EA_CONSTEXPR_OR_CONST bool               has_signaling_NaN = false;
+		static EA_CONSTEXPR_OR_CONST float_denorm_style has_denorm        = denorm_absent;
+		static EA_CONSTEXPR_OR_CONST bool               has_denorm_loss   = false;
+		static EA_CONSTEXPR_OR_CONST bool               is_iec559         = false;
+
+		static EA_CONSTEXPR value_type min() 
+			{ return INT_MIN; } // It's hard to get EASTL_LIMITS_MIN_S to work with all compilers here.
+
+		static EA_CONSTEXPR value_type max()
+			{ return EASTL_LIMITS_MAX_S(value_type); }
+
+		static EA_CONSTEXPR value_type lowest() 
+			{ return INT_MIN; }
+
+		static EA_CONSTEXPR value_type epsilon() 
+			{ return 0; }
+
+		static EA_CONSTEXPR value_type round_error() 
+			{ return 0; }
+
+		static EA_CONSTEXPR value_type infinity() 
+			{ return value_type(); }
+
+		static EA_CONSTEXPR value_type quiet_NaN() 
+			{ return value_type(); }
+
+		static EA_CONSTEXPR value_type signaling_NaN()
+			{ return value_type(); }
+
+		static EA_CONSTEXPR value_type denorm_min() 
+			{ return static_cast<value_type>(0); }
+	};
+
+
+	// numeric_limits<unsigned long>
+	template<>
+	struct numeric_limits<unsigned long>
+	{
+		typedef unsigned long value_type;
+
+		static EA_CONSTEXPR_OR_CONST bool               is_specialized    = true;
+		static EA_CONSTEXPR_OR_CONST int                digits            = EASTL_LIMITS_DIGITS_U(value_type);
+		static EA_CONSTEXPR_OR_CONST int                digits10          = EASTL_LIMITS_DIGITS10_U(value_type);
+		static EA_CONSTEXPR_OR_CONST int                max_digits10      = 0;
+		static EA_CONSTEXPR_OR_CONST bool               is_signed         = false;
+		static EA_CONSTEXPR_OR_CONST bool               is_integer        = true;
+		static EA_CONSTEXPR_OR_CONST bool               is_exact          = true;
+		static EA_CONSTEXPR_OR_CONST int                radix             = 2;
+		static EA_CONSTEXPR_OR_CONST int                min_exponent      = 0;
+		static EA_CONSTEXPR_OR_CONST int                min_exponent10    = 0;
+		static EA_CONSTEXPR_OR_CONST int                max_exponent      = 0;
+		static EA_CONSTEXPR_OR_CONST int                max_exponent10    = 0;
+		static EA_CONSTEXPR_OR_CONST bool               is_bounded        = true;
+		static EA_CONSTEXPR_OR_CONST bool               is_modulo         = true;
+		static EA_CONSTEXPR_OR_CONST bool               traps             = true;
+		static EA_CONSTEXPR_OR_CONST bool               tinyness_before   = false;
+		static EA_CONSTEXPR_OR_CONST float_round_style  round_style       = round_toward_zero;
+		static EA_CONSTEXPR_OR_CONST bool               has_infinity      = false;
+		static EA_CONSTEXPR_OR_CONST bool               has_quiet_NaN     = false;
+		static EA_CONSTEXPR_OR_CONST bool               has_signaling_NaN = false;
+		static EA_CONSTEXPR_OR_CONST float_denorm_style has_denorm        = denorm_absent;
+		static EA_CONSTEXPR_OR_CONST bool               has_denorm_loss   = false;
+		static EA_CONSTEXPR_OR_CONST bool               is_iec559         = false;
+
+		static EA_CONSTEXPR value_type min() 
+			{ return 0; }
+
+		static EA_CONSTEXPR value_type max()
+			{ return EASTL_LIMITS_MAX_U(value_type); }
+
+		static EA_CONSTEXPR value_type lowest() 
+			{ return 0; }
+
+		static EA_CONSTEXPR value_type epsilon() 
+			{ return 0; }
+
+		static EA_CONSTEXPR value_type round_error() 
+			{ return 0; }
+
+		static EA_CONSTEXPR value_type infinity() 
+			{ return value_type(); }
+
+		static EA_CONSTEXPR value_type quiet_NaN() 
+			{ return value_type(); }
+
+		static EA_CONSTEXPR value_type signaling_NaN()
+			{ return value_type(); }
+
+		static EA_CONSTEXPR value_type denorm_min() 
+			{ return static_cast<value_type>(0); }
+	};
+
+
+	// numeric_limits<signed long>
+	template<>
+	struct numeric_limits<signed long>
+	{
+		typedef signed long value_type;
+
+		static EA_CONSTEXPR_OR_CONST bool               is_specialized    = true;
+		static EA_CONSTEXPR_OR_CONST int                digits            = EASTL_LIMITS_DIGITS_S(value_type);
+		static EA_CONSTEXPR_OR_CONST int                digits10          = EASTL_LIMITS_DIGITS10_S(value_type);
+		static EA_CONSTEXPR_OR_CONST int                max_digits10      = 0;
+		static EA_CONSTEXPR_OR_CONST bool               is_signed         = true;
+		static EA_CONSTEXPR_OR_CONST bool               is_integer        = true;
+		static EA_CONSTEXPR_OR_CONST bool               is_exact          = true;
+		static EA_CONSTEXPR_OR_CONST int                radix             = 2;
+		static EA_CONSTEXPR_OR_CONST int                min_exponent      = 0;
+		static EA_CONSTEXPR_OR_CONST int                min_exponent10    = 0;
+		static EA_CONSTEXPR_OR_CONST int                max_exponent      = 0;
+		static EA_CONSTEXPR_OR_CONST int                max_exponent10    = 0;
+		static EA_CONSTEXPR_OR_CONST bool               is_bounded        = true;
+		static EA_CONSTEXPR_OR_CONST bool               is_modulo         = true;
+		static EA_CONSTEXPR_OR_CONST bool               traps             = true;
+		static EA_CONSTEXPR_OR_CONST bool               tinyness_before   = false;
+		static EA_CONSTEXPR_OR_CONST float_round_style  round_style       = round_toward_zero;
+		static EA_CONSTEXPR_OR_CONST bool               has_infinity      = false;
+		static EA_CONSTEXPR_OR_CONST bool               has_quiet_NaN     = false;
+		static EA_CONSTEXPR_OR_CONST bool               has_signaling_NaN = false;
+		static EA_CONSTEXPR_OR_CONST float_denorm_style has_denorm        = denorm_absent;
+		static EA_CONSTEXPR_OR_CONST bool               has_denorm_loss   = false;
+		static EA_CONSTEXPR_OR_CONST bool               is_iec559         = false;
+
+		static EA_CONSTEXPR value_type min() 
+			{ return LONG_MIN; }  // It's hard to get EASTL_LIMITS_MIN_S to work with all compilers here.
+
+		static EA_CONSTEXPR value_type max()
+			{ return EASTL_LIMITS_MAX_S(value_type); }
+
+		static EA_CONSTEXPR value_type lowest() 
+			{ return LONG_MIN; }
+
+		static EA_CONSTEXPR value_type epsilon() 
+			{ return 0; }
+
+		static EA_CONSTEXPR value_type round_error() 
+			{ return 0; }
+
+		static EA_CONSTEXPR value_type infinity() 
+			{ return value_type(); }
+
+		static EA_CONSTEXPR value_type quiet_NaN() 
+			{ return value_type(); }
+
+		static EA_CONSTEXPR value_type signaling_NaN()
+			{ return value_type(); }
+
+		static EA_CONSTEXPR value_type denorm_min() 
+			{ return static_cast<value_type>(0); }
+	};
+
+
+	// numeric_limits<unsigned long long>
+	template<>
+	struct numeric_limits<unsigned long long>
+	{
+		typedef unsigned long long value_type;
+
+		static EA_CONSTEXPR_OR_CONST bool               is_specialized    = true;
+		static EA_CONSTEXPR_OR_CONST int                digits            = EASTL_LIMITS_DIGITS_U(value_type);
+		static EA_CONSTEXPR_OR_CONST int                digits10          = EASTL_LIMITS_DIGITS10_U(value_type);
+		static EA_CONSTEXPR_OR_CONST int                max_digits10      = 0;
+		static EA_CONSTEXPR_OR_CONST bool               is_signed         = false;
+		static EA_CONSTEXPR_OR_CONST bool               is_integer        = true;
+		static EA_CONSTEXPR_OR_CONST bool               is_exact          = true;
+		static EA_CONSTEXPR_OR_CONST int                radix             = 2;
+		static EA_CONSTEXPR_OR_CONST int                min_exponent      = 0;
+		static EA_CONSTEXPR_OR_CONST int                min_exponent10    = 0;
+		static EA_CONSTEXPR_OR_CONST int                max_exponent      = 0;
+		static EA_CONSTEXPR_OR_CONST int                max_exponent10    = 0;
+		static EA_CONSTEXPR_OR_CONST bool               is_bounded        = true;
+		static EA_CONSTEXPR_OR_CONST bool               is_modulo         = true;
+		static EA_CONSTEXPR_OR_CONST bool               traps             = true;
+		static EA_CONSTEXPR_OR_CONST bool               tinyness_before   = false;
+		static EA_CONSTEXPR_OR_CONST float_round_style  round_style       = round_toward_zero;
+		static EA_CONSTEXPR_OR_CONST bool               has_infinity      = false;
+		static EA_CONSTEXPR_OR_CONST bool               has_quiet_NaN     = false;
+		static EA_CONSTEXPR_OR_CONST bool               has_signaling_NaN = false;
+		static EA_CONSTEXPR_OR_CONST float_denorm_style has_denorm        = denorm_absent;
+		static EA_CONSTEXPR_OR_CONST bool               has_denorm_loss   = false;
+		static EA_CONSTEXPR_OR_CONST bool               is_iec559         = false;
+
+		static EA_CONSTEXPR value_type min() 
+			{ return 0; }
+
+		static EA_CONSTEXPR value_type max()
+			{ return EASTL_LIMITS_MAX_U(value_type); }
+
+		static EA_CONSTEXPR value_type lowest() 
+			{ return 0; }
+
+		static EA_CONSTEXPR value_type epsilon() 
+			{ return 0; }
+
+		static EA_CONSTEXPR value_type round_error() 
+			{ return 0; }
+
+		static EA_CONSTEXPR value_type infinity() 
+			{ return value_type(); }
+
+		static EA_CONSTEXPR value_type quiet_NaN() 
+			{ return value_type(); }
+
+		static EA_CONSTEXPR value_type signaling_NaN()
+			{ return value_type(); }
+
+		static EA_CONSTEXPR value_type denorm_min() 
+			{ return static_cast<value_type>(0); }
+	};
+
+
+	// numeric_limits<signed long long>
+	template<>
+	struct numeric_limits<signed long long>
+	{
+		typedef signed long long value_type;
+
+		static EA_CONSTEXPR_OR_CONST bool               is_specialized    = true;
+		static EA_CONSTEXPR_OR_CONST int                digits            = EASTL_LIMITS_DIGITS_S(value_type);
+		static EA_CONSTEXPR_OR_CONST int                digits10          = EASTL_LIMITS_DIGITS10_S(value_type);
+		static EA_CONSTEXPR_OR_CONST int                max_digits10      = 0;
+		static EA_CONSTEXPR_OR_CONST bool               is_signed         = true;
+		static EA_CONSTEXPR_OR_CONST bool               is_integer        = true;
+		static EA_CONSTEXPR_OR_CONST bool               is_exact          = true;
+		static EA_CONSTEXPR_OR_CONST int                radix             = 2;
+		static EA_CONSTEXPR_OR_CONST int                min_exponent      = 0;
+		static EA_CONSTEXPR_OR_CONST int                min_exponent10    = 0;
+		static EA_CONSTEXPR_OR_CONST int                max_exponent      = 0;
+		static EA_CONSTEXPR_OR_CONST int                max_exponent10    = 0;
+		static EA_CONSTEXPR_OR_CONST bool               is_bounded        = true;
+		static EA_CONSTEXPR_OR_CONST bool               is_modulo         = true;
+		static EA_CONSTEXPR_OR_CONST bool               traps             = true;
+		static EA_CONSTEXPR_OR_CONST bool               tinyness_before   = false;
+		static EA_CONSTEXPR_OR_CONST float_round_style  round_style       = round_toward_zero;
+		static EA_CONSTEXPR_OR_CONST bool               has_infinity      = false;
+		static EA_CONSTEXPR_OR_CONST bool               has_quiet_NaN     = false;
+		static EA_CONSTEXPR_OR_CONST bool               has_signaling_NaN = false;
+		static EA_CONSTEXPR_OR_CONST float_denorm_style has_denorm        = denorm_absent;
+		static EA_CONSTEXPR_OR_CONST bool               has_denorm_loss   = false;
+		static EA_CONSTEXPR_OR_CONST bool               is_iec559         = false;
+
+		static EA_CONSTEXPR value_type min() 
+			{ return EASTL_LIMITS_MIN_S(value_type); }
+
+		static EA_CONSTEXPR value_type max()
+			{ return EASTL_LIMITS_MAX_S(value_type); }
+
+		static EA_CONSTEXPR value_type lowest() 
+			{ return EASTL_LIMITS_MIN_S(value_type); }
+
+		static EA_CONSTEXPR value_type epsilon() 
+			{ return 0; }
+
+		static EA_CONSTEXPR value_type round_error() 
+			{ return 0; }
+
+		static EA_CONSTEXPR value_type infinity() 
+			{ return value_type(); }
+
+		static EA_CONSTEXPR value_type quiet_NaN() 
+			{ return value_type(); }
+
+		static EA_CONSTEXPR value_type signaling_NaN()
+			{ return value_type(); }
+
+		static EA_CONSTEXPR value_type denorm_min() 
+			{ return static_cast<value_type>(0); }
+	};
+
+
+	#if (EA_COMPILER_INTMAX_SIZE >= 16) && (defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG)) // If __int128_t/__uint128_t is supported...
+		// numeric_limits<__uint128_t>
+		template<>
+		struct numeric_limits<__uint128_t>
+		{
+			typedef __uint128_t value_type;
+
+			static EA_CONSTEXPR_OR_CONST bool               is_specialized    = true;
+			static EA_CONSTEXPR_OR_CONST int                digits            = EASTL_LIMITS_DIGITS_U(value_type);
+			static EA_CONSTEXPR_OR_CONST int                digits10          = EASTL_LIMITS_DIGITS10_U(value_type);
+			static EA_CONSTEXPR_OR_CONST int                max_digits10      = 0;
+			static EA_CONSTEXPR_OR_CONST bool               is_signed         = false;
+			static EA_CONSTEXPR_OR_CONST bool               is_integer        = true;
+			static EA_CONSTEXPR_OR_CONST bool               is_exact          = true;
+			static EA_CONSTEXPR_OR_CONST int                radix             = 2;
+			static EA_CONSTEXPR_OR_CONST int                min_exponent      = 0;
+			static EA_CONSTEXPR_OR_CONST int                min_exponent10    = 0;
+			static EA_CONSTEXPR_OR_CONST int                max_exponent      = 0;
+			static EA_CONSTEXPR_OR_CONST int                max_exponent10    = 0;
+			static EA_CONSTEXPR_OR_CONST bool               is_bounded        = true;
+			static EA_CONSTEXPR_OR_CONST bool               is_modulo         = true;
+			static EA_CONSTEXPR_OR_CONST bool               traps             = true;
+			static EA_CONSTEXPR_OR_CONST bool               tinyness_before   = false;
+			static EA_CONSTEXPR_OR_CONST float_round_style  round_style       = round_toward_zero;
+			static EA_CONSTEXPR_OR_CONST bool               has_infinity      = false;
+			static EA_CONSTEXPR_OR_CONST bool               has_quiet_NaN     = false;
+			static EA_CONSTEXPR_OR_CONST bool               has_signaling_NaN = false;
+			static EA_CONSTEXPR_OR_CONST float_denorm_style has_denorm        = denorm_absent;
+			static EA_CONSTEXPR_OR_CONST bool               has_denorm_loss   = false;
+			static EA_CONSTEXPR_OR_CONST bool               is_iec559         = false;
+
+			static EA_CONSTEXPR value_type min() 
+				{ return 0; }
+
+			static EA_CONSTEXPR value_type max()
+				{ return EASTL_LIMITS_MAX_U(value_type); }
+
+			static EA_CONSTEXPR value_type lowest() 
+				{ return 0; }
+
+			static EA_CONSTEXPR value_type epsilon() 
+				{ return 0; }
+
+			static EA_CONSTEXPR value_type round_error() 
+				{ return 0; }
+
+			static EA_CONSTEXPR value_type infinity() 
+				{ return value_type(); }
+
+			static EA_CONSTEXPR value_type quiet_NaN() 
+				{ return value_type(); }
+
+			static EA_CONSTEXPR value_type signaling_NaN()
+				{ return value_type(); }
+
+			static EA_CONSTEXPR value_type denorm_min() 
+				{ return static_cast<value_type>(0); }
+		};
+
+
+		// numeric_limits<__int128_t>
+		template<>
+		struct numeric_limits<__int128_t>
+		{
+			typedef __int128_t value_type;
+
+			static EA_CONSTEXPR_OR_CONST bool               is_specialized    = true;
+			static EA_CONSTEXPR_OR_CONST int                digits            = EASTL_LIMITS_DIGITS_S(value_type);
+			static EA_CONSTEXPR_OR_CONST int                digits10          = EASTL_LIMITS_DIGITS10_S(value_type);
+			static EA_CONSTEXPR_OR_CONST int                max_digits10      = 0;
+			static EA_CONSTEXPR_OR_CONST bool               is_signed         = true;
+			static EA_CONSTEXPR_OR_CONST bool               is_integer        = true;
+			static EA_CONSTEXPR_OR_CONST bool               is_exact          = true;
+			static EA_CONSTEXPR_OR_CONST int                radix             = 2;
+			static EA_CONSTEXPR_OR_CONST int                min_exponent      = 0;
+			static EA_CONSTEXPR_OR_CONST int                min_exponent10    = 0;
+			static EA_CONSTEXPR_OR_CONST int                max_exponent      = 0;
+			static EA_CONSTEXPR_OR_CONST int                max_exponent10    = 0;
+			static EA_CONSTEXPR_OR_CONST bool               is_bounded        = true;
+			static EA_CONSTEXPR_OR_CONST bool               is_modulo         = true;
+			static EA_CONSTEXPR_OR_CONST bool               traps             = true;
+			static EA_CONSTEXPR_OR_CONST bool               tinyness_before   = false;
+			static EA_CONSTEXPR_OR_CONST float_round_style  round_style       = round_toward_zero;
+			static EA_CONSTEXPR_OR_CONST bool               has_infinity      = false;
+			static EA_CONSTEXPR_OR_CONST bool               has_quiet_NaN     = false;
+			static EA_CONSTEXPR_OR_CONST bool               has_signaling_NaN = false;
+			static EA_CONSTEXPR_OR_CONST float_denorm_style has_denorm        = denorm_absent;
+			static EA_CONSTEXPR_OR_CONST bool               has_denorm_loss   = false;
+			static EA_CONSTEXPR_OR_CONST bool               is_iec559         = false;
+
+			static EA_CONSTEXPR value_type min() 
+				{ return EASTL_LIMITS_MIN_S(value_type); }
+
+			static EA_CONSTEXPR value_type max()
+				{ return EASTL_LIMITS_MAX_S(value_type); }
+
+			static EA_CONSTEXPR value_type lowest() 
+				{ return EASTL_LIMITS_MIN_S(value_type); }
+
+			static EA_CONSTEXPR value_type epsilon() 
+				{ return 0; }
+
+			static EA_CONSTEXPR value_type round_error() 
+				{ return 0; }
+
+			static EA_CONSTEXPR value_type infinity() 
+				{ return value_type(); }
+
+			static EA_CONSTEXPR value_type quiet_NaN() 
+				{ return value_type(); }
+
+			static EA_CONSTEXPR value_type signaling_NaN()
+				{ return value_type(); }
+
+			static EA_CONSTEXPR value_type denorm_min() 
+				{ return static_cast<value_type>(0); }
+		};
+	#endif
+
+
+	// numeric_limits<float>
+	template<>
+	struct numeric_limits<float>
+	{
+		typedef float value_type;
+
+		static EA_CONSTEXPR_OR_CONST bool               is_specialized    = true;
+		static EA_CONSTEXPR_OR_CONST int                digits            = FLT_MANT_DIG;
+		static EA_CONSTEXPR_OR_CONST int                digits10          = FLT_DIG;
+		static EA_CONSTEXPR_OR_CONST int                max_digits10      = FLT_MANT_DIG;
+		static EA_CONSTEXPR_OR_CONST bool               is_signed         = true;
+		static EA_CONSTEXPR_OR_CONST bool               is_integer        = false;
+		static EA_CONSTEXPR_OR_CONST bool               is_exact          = false;
+		static EA_CONSTEXPR_OR_CONST int                radix             = FLT_RADIX;
+		static EA_CONSTEXPR_OR_CONST int                min_exponent      = FLT_MIN_EXP;
+		static EA_CONSTEXPR_OR_CONST int                min_exponent10    = FLT_MIN_10_EXP;
+		static EA_CONSTEXPR_OR_CONST int                max_exponent      = FLT_MAX_EXP;
+		static EA_CONSTEXPR_OR_CONST int                max_exponent10    = FLT_MAX_10_EXP;
+		static EA_CONSTEXPR_OR_CONST bool               is_bounded        = true;
+		static EA_CONSTEXPR_OR_CONST bool               is_modulo         = false;
+		static EA_CONSTEXPR_OR_CONST bool               traps             = true;
+		static EA_CONSTEXPR_OR_CONST bool               tinyness_before   = false;
+		static EA_CONSTEXPR_OR_CONST float_round_style  round_style       = round_to_nearest;
+		static EA_CONSTEXPR_OR_CONST bool               has_infinity      = true;
+		static EA_CONSTEXPR_OR_CONST bool               has_quiet_NaN     = true;                   // This may be wrong for some platforms.
+		static EA_CONSTEXPR_OR_CONST bool               has_signaling_NaN = true;                   // This may be wrong for some platforms.
+		static EA_CONSTEXPR_OR_CONST float_denorm_style has_denorm        = denorm_present;         // This may be wrong for some platforms.
+		static EA_CONSTEXPR_OR_CONST bool               has_denorm_loss   = false;                  // This may be wrong for some platforms.
+		static EA_CONSTEXPR_OR_CONST bool               is_iec559         = has_infinity && has_quiet_NaN && (has_denorm == denorm_present);
+
+		#if EASTL_CUSTOM_FLOAT_CONSTANTS_REQUIRED
+			static value_type min()
+				{ return FLT_MIN; }
+
+			static value_type max()
+				{ return FLT_MAX; }
+
+			static value_type lowest() 
+				{ return -FLT_MAX; }
+
+			static value_type epsilon() 
+				{ return FLT_EPSILON; }
+
+			static value_type round_error() 
+				{ return 0.5f; }
+
+			static value_type infinity() 
+				{ return Internal::gFloatInfinity; }
+
+			static value_type quiet_NaN() 
+				{ return Internal::gFloatNaN; }
+
+			static value_type signaling_NaN()
+				{ return Internal::gFloatSNaN; }
+
+			static value_type denorm_min() 
+				{ return Internal::gFloatDenorm; }
+
+		#elif (defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG)) && defined(__FLT_MIN__)
+			static EA_CONSTEXPR value_type min()
+				{ return __FLT_MIN__; }
+
+			static EA_CONSTEXPR value_type max()
+				{ return __FLT_MAX__; }
+
+			static EA_CONSTEXPR value_type lowest() 
+				{ return -__FLT_MAX__; }
+
+			static EA_CONSTEXPR value_type epsilon() 
+				{ return __FLT_EPSILON__; }
+
+			static EA_CONSTEXPR value_type round_error() 
+				{ return 0.5f; }
+
+			static EA_CONSTEXPR value_type infinity() 
+				{ return __builtin_huge_valf(); }
+
+			static EA_CONSTEXPR value_type quiet_NaN() 
+				{ return __builtin_nanf(""); }
+
+			static EA_CONSTEXPR value_type signaling_NaN()
+				{ return __builtin_nansf(""); }
+
+			static EA_CONSTEXPR value_type denorm_min() 
+				{ return __FLT_DENORM_MIN__; }
+
+		#elif defined(_CPPLIB_VER) // If using the Dinkumware Standard library...
+			static value_type min()
+				{ return FLT_MIN; }
+
+			static value_type max()
+				{ return FLT_MAX; }
+
+			static value_type lowest() 
+				{ return -FLT_MAX; }
+
+			static value_type epsilon() 
+				{ return FLT_EPSILON; }
+
+			static value_type round_error() 
+				{ return 0.5f; }
+
+			static value_type infinity() 
+				{ return _CSTD _FInf._Float; }
+
+			static value_type quiet_NaN() 
+				{ return _CSTD _FNan._Float; }
+
+			static value_type signaling_NaN()
+				{ return _CSTD _FSnan._Float; } 
+
+			static value_type denorm_min() 
+				{ return _CSTD _FDenorm._Float; }
+
+		#endif
+	};
+
+
+	// numeric_limits<double>
+	template<>
+	struct numeric_limits<double>
+	{
+		typedef double value_type;
+
+		static EA_CONSTEXPR_OR_CONST bool               is_specialized    = true;
+		static EA_CONSTEXPR_OR_CONST int                digits            = DBL_MANT_DIG;
+		static EA_CONSTEXPR_OR_CONST int                digits10          = DBL_DIG;
+		static EA_CONSTEXPR_OR_CONST int                max_digits10      = DBL_MANT_DIG;
+		static EA_CONSTEXPR_OR_CONST bool               is_signed         = true;
+		static EA_CONSTEXPR_OR_CONST bool               is_integer        = false;
+		static EA_CONSTEXPR_OR_CONST bool               is_exact          = false;
+		static EA_CONSTEXPR_OR_CONST int                radix             = FLT_RADIX;              // FLT_RADIX applies to all floating point types.
+		static EA_CONSTEXPR_OR_CONST int                min_exponent      = DBL_MIN_EXP;
+		static EA_CONSTEXPR_OR_CONST int                min_exponent10    = DBL_MIN_10_EXP;
+		static EA_CONSTEXPR_OR_CONST int                max_exponent      = DBL_MAX_EXP;
+		static EA_CONSTEXPR_OR_CONST int                max_exponent10    = DBL_MAX_10_EXP;
+		static EA_CONSTEXPR_OR_CONST bool               is_bounded        = true;
+		static EA_CONSTEXPR_OR_CONST bool               is_modulo         = false;
+		static EA_CONSTEXPR_OR_CONST bool               traps             = true;
+		static EA_CONSTEXPR_OR_CONST bool               tinyness_before   = false;
+		static EA_CONSTEXPR_OR_CONST float_round_style  round_style       = round_to_nearest;
+		static EA_CONSTEXPR_OR_CONST bool               has_infinity      = true;
+		static EA_CONSTEXPR_OR_CONST bool               has_quiet_NaN     = true;                   // This may be wrong for some platforms.
+		static EA_CONSTEXPR_OR_CONST bool               has_signaling_NaN = true;                   // This may be wrong for some platforms.
+		static EA_CONSTEXPR_OR_CONST float_denorm_style has_denorm        = denorm_present;         // This may be wrong for some platforms.
+		static EA_CONSTEXPR_OR_CONST bool               has_denorm_loss   = false;                  // This may be wrong for some platforms.
+		static EA_CONSTEXPR_OR_CONST bool               is_iec559         = has_infinity && has_quiet_NaN && (has_denorm == denorm_present);
+
+		#if EASTL_CUSTOM_FLOAT_CONSTANTS_REQUIRED
+			static value_type min()
+				{ return DBL_MIN; }
+
+			static value_type max()
+				{ return DBL_MAX; }
+
+			static value_type lowest() 
+				{ return -DBL_MAX; }
+
+			static value_type epsilon() 
+				{ return DBL_EPSILON; }
+
+			static value_type round_error() 
+				{ return 0.5f; }
+
+			static value_type infinity() 
+				{ return Internal::gDoubleInfinity; }
+
+			static value_type quiet_NaN() 
+				{ return Internal::gDoubleNaN; }
+
+			static value_type signaling_NaN()
+				{ return Internal::gDoubleSNaN; }
+
+			static value_type denorm_min() 
+				{ return Internal::gDoubleDenorm; }
+
+		#elif (defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG)) && defined(__DBL_MIN__)
+			static EA_CONSTEXPR value_type min()
+				{ return __DBL_MIN__; }
+
+			static EA_CONSTEXPR value_type max()
+				{ return __DBL_MAX__; }
+
+			static EA_CONSTEXPR value_type lowest() 
+				{ return -__DBL_MAX__; }
+
+			static EA_CONSTEXPR value_type epsilon() 
+				{ return __DBL_EPSILON__; }
+
+			static EA_CONSTEXPR value_type round_error() 
+				{ return 0.5f; }
+
+			static EA_CONSTEXPR value_type infinity() 
+				{ return __builtin_huge_val(); }
+
+			static EA_CONSTEXPR value_type quiet_NaN() 
+				{ return __builtin_nan(""); }
+
+			static EA_CONSTEXPR value_type signaling_NaN()
+				{ return __builtin_nans(""); }
+
+			static EA_CONSTEXPR value_type denorm_min() 
+				{ return __DBL_DENORM_MIN__; }
+
+		#elif defined(_CPPLIB_VER) // If using the Dinkumware Standard library...
+			static value_type min()
+				{ return DBL_MIN; }
+
+			static value_type max()
+				{ return DBL_MAX; }
+
+			static value_type lowest() 
+				{ return -DBL_MAX; }
+
+			static value_type epsilon() 
+				{ return DBL_EPSILON; }
+
+			static value_type round_error() 
+				{ return 0.5f; }
+
+			static value_type infinity() 
+				{ return _CSTD _Inf._Double; }
+
+			static value_type quiet_NaN() 
+				{ return _CSTD _Nan._Double; }
+
+			static value_type signaling_NaN()
+				{ return _CSTD _Snan._Double; } 
+
+			static value_type denorm_min() 
+				{ return _CSTD _Denorm._Double; }
+
+		#endif
+	};
+
+
+	// numeric_limits<long double>
+	template<>
+	struct numeric_limits<long double>
+	{
+		typedef long double value_type;
+
+		static EA_CONSTEXPR_OR_CONST bool               is_specialized    = true;
+		static EA_CONSTEXPR_OR_CONST int                digits            = LDBL_MANT_DIG;
+		static EA_CONSTEXPR_OR_CONST int                digits10          = LDBL_DIG;
+		static EA_CONSTEXPR_OR_CONST int                max_digits10      = LDBL_MANT_DIG;
+		static EA_CONSTEXPR_OR_CONST bool               is_signed         = true;
+		static EA_CONSTEXPR_OR_CONST bool               is_integer        = false;
+		static EA_CONSTEXPR_OR_CONST bool               is_exact          = false;
+		static EA_CONSTEXPR_OR_CONST int                radix             = FLT_RADIX;              // FLT_RADIX applies to all floating point types.
+		static EA_CONSTEXPR_OR_CONST int                min_exponent      = LDBL_MIN_EXP;
+		static EA_CONSTEXPR_OR_CONST int                min_exponent10    = LDBL_MIN_10_EXP;
+		static EA_CONSTEXPR_OR_CONST int                max_exponent      = LDBL_MAX_EXP;
+		static EA_CONSTEXPR_OR_CONST int                max_exponent10    = LDBL_MAX_10_EXP;
+		static EA_CONSTEXPR_OR_CONST bool               is_bounded        = true;
+		static EA_CONSTEXPR_OR_CONST bool               is_modulo         = false;
+		static EA_CONSTEXPR_OR_CONST bool               traps             = true;
+		static EA_CONSTEXPR_OR_CONST bool               tinyness_before   = false;
+		static EA_CONSTEXPR_OR_CONST float_round_style  round_style       = round_to_nearest;
+		static EA_CONSTEXPR_OR_CONST bool               has_infinity      = true;
+		static EA_CONSTEXPR_OR_CONST bool               has_quiet_NaN     = true;                   // This may be wrong for some platforms.
+		static EA_CONSTEXPR_OR_CONST bool               has_signaling_NaN = true;                   // This may be wrong for some platforms.
+		static EA_CONSTEXPR_OR_CONST float_denorm_style has_denorm        = denorm_present;         // This may be wrong for some platforms.
+		static EA_CONSTEXPR_OR_CONST bool               has_denorm_loss   = false;                  // This may be wrong for some platforms.
+		static EA_CONSTEXPR_OR_CONST bool               is_iec559         = has_infinity && has_quiet_NaN && (has_denorm == denorm_present);
+
+		#if EASTL_CUSTOM_FLOAT_CONSTANTS_REQUIRED
+			static value_type min()
+				{ return LDBL_MIN; }
+
+			static value_type max()
+				{ return LDBL_MAX; }
+
+			static value_type lowest() 
+				{ return -LDBL_MAX; }
+
+			static value_type epsilon() 
+				{ return LDBL_EPSILON; }
+
+			static value_type round_error() 
+				{ return 0.5f; }
+
+			static value_type infinity() 
+				{ return Internal::gLongDoubleInfinity; }
+
+			static value_type quiet_NaN() 
+				{ return Internal::gLongDoubleNaN; }
+
+			static value_type signaling_NaN()
+				{ return Internal::gLongDoubleSNaN; }
+
+			static value_type denorm_min() 
+				{ return Internal::gLongDoubleDenorm; }
+
+		#elif (defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG)) && defined(__LDBL_MIN__)
+			static EA_CONSTEXPR value_type min()
+				{ return __LDBL_MIN__; }
+
+			static EA_CONSTEXPR value_type max()
+				{ return __LDBL_MAX__; }
+
+			static EA_CONSTEXPR value_type lowest() 
+				{ return -__LDBL_MAX__; }
+
+			static EA_CONSTEXPR value_type epsilon() 
+				{ return __LDBL_EPSILON__; }
+
+			static EA_CONSTEXPR value_type round_error() 
+				{ return 0.5f; }
+
+			static EA_CONSTEXPR value_type infinity() 
+				{ return __builtin_huge_val(); }
+
+			static EA_CONSTEXPR value_type quiet_NaN() 
+				{ return __builtin_nan(""); }
+
+			static EA_CONSTEXPR value_type signaling_NaN()
+				{ return __builtin_nans(""); }
+
+			static EA_CONSTEXPR value_type denorm_min() 
+				{ return __LDBL_DENORM_MIN__; }
+
+		#elif defined(_CPPLIB_VER) // If using the Dinkumware Standard library...
+			static value_type min()
+				{ return LDBL_MIN; }
+
+			static value_type max()
+				{ return LDBL_MAX; }
+
+			static value_type lowest() 
+				{ return -LDBL_MAX; }
+
+			static value_type epsilon() 
+				{ return LDBL_EPSILON; }
+
+			static value_type round_error() 
+				{ return 0.5f; }
+
+			static value_type infinity() 
+				{ return _CSTD _LInf._Long_double; }
+
+			static value_type quiet_NaN() 
+				{ return _CSTD _LNan._Long_double; }
+
+			static value_type signaling_NaN()
+				{ return _CSTD _LSnan._Long_double; } 
+
+			static value_type denorm_min() 
+				{ return _CSTD _LDenorm._Long_double; }
+
+		#endif
+	};
+
+} // namespace eastl
+
+
+EA_RESTORE_VC_WARNING()
+
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/optional.h b/libkram/eastl/include/EASTL/optional.h
new file mode 100644
index 00000000..763bfd88
--- /dev/null
+++ b/libkram/eastl/include/EASTL/optional.h
@@ -0,0 +1,708 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// This file implements the class template optional that represents optional objects.
+//
+// An optional object is an object that contains the storage for another object and
+// manages the lifetime of this contained object, if any. The contained object may be
+// initialized after the optional object has been initialized, and may be destroyed before
+// the optional object has been destroyed.
+//
+// Any instance of optional<T> at any given time either contains a value or does not
+// contain a value. When an instance of optional<T> contains a value, it means that an
+// object of type T, referred to as the optional object's contained value, is allocated
+// within the storage of the optional object. Implementations are not permitted to use
+// additional storage, such as dynamic memory, to allocate its contained value.
+//
+// The contained value is allocated in the optional<T> storage suitably
+// aligned for the type T. When an object of type optional<T> is contextually converted to
+// bool, the conversion returns true if the object contains a value; otherwise the
+// conversion returns false.
+//
+// T shall be an object type and satisfy the requirements of Destructible.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_OPTIONAL_H
+#define EASTL_OPTIONAL_H
+
+#include <EASTL/internal/config.h>
+#include <EASTL/initializer_list.h>
+#include <EASTL/memory.h> // eastl::addressof
+#include <EASTL/internal/in_place_t.h> // eastl::in_place_t
+
+#if EASTL_EXCEPTIONS_ENABLED
+	EA_DISABLE_ALL_VC_WARNINGS()
+	#include <stdexcept> // std::logic_error.
+	EA_RESTORE_ALL_VC_WARNINGS()
+#endif
+
+#if defined(EASTL_OPTIONAL_ENABLED) && EASTL_OPTIONAL_ENABLED
+
+EA_DISABLE_VC_WARNING(4582 4583) // constructor/destructor is not implicitly called
+
+namespace eastl
+{
+	#if EASTL_EXCEPTIONS_ENABLED
+		#define EASTL_OPTIONAL_NOEXCEPT
+	#else
+		#define EASTL_OPTIONAL_NOEXCEPT EA_NOEXCEPT
+	#endif
+
+	///////////////////////////////////////////////////////////////////////////////
+	/// nullopt_t
+	///
+	/// nullopt_t is class type used to indicate eastl::optional type with uninitialized state.
+	///
+	struct nullopt_tag_t {};
+
+	struct nullopt_t
+	{
+		EA_CONSTEXPR nullopt_t(nullopt_tag_t) {}
+	};
+
+	EA_CONSTEXPR nullopt_t nullopt{nullopt_tag_t{}};
+
+
+    ///////////////////////////////////////////////////////////////////////////////
+	/// bad_optional_access
+	///
+	#if EASTL_EXCEPTIONS_ENABLED
+		struct bad_optional_access : public std::logic_error
+		{
+			bad_optional_access() : std::logic_error("eastl::bad_optional_access exception") {}
+			virtual ~bad_optional_access() EA_NOEXCEPT {}
+		};
+	#endif
+
+	namespace Internal
+	{
+		///////////////////////////////////////////////////////////////////////////////
+		/// optional_storage
+		///
+		template<typename T, bool IsTriviallyDestructible = eastl::is_trivially_destructible_v<T>>
+		struct optional_storage
+		{
+			typedef typename eastl::remove_const<T>::type value_type;
+
+			optional_storage() EA_NOEXCEPT = default;
+
+			inline optional_storage(const value_type& v)
+				: engaged(true)
+			{
+				::new (eastl::addressof(val)) value_type(v);
+			}
+
+			inline optional_storage(value_type&& v)
+				: engaged(true)
+			{
+				::new (eastl::addressof(val)) value_type(eastl::move(v));
+			}
+
+			inline ~optional_storage()
+			{
+				if (engaged)
+					destruct_value();
+			}
+
+			template <class... Args>
+			inline explicit optional_storage(in_place_t, Args&&... args)
+			    : engaged(true)
+			{
+				::new (eastl::addressof(val)) T{eastl::forward<Args>(args)...};
+			}
+
+			template <typename U,
+			          typename... Args,
+			          typename = eastl::enable_if_t<eastl::is_constructible_v<T, std::initializer_list<U>&, Args&&...>>>
+			inline explicit optional_storage(in_place_t, std::initializer_list<U> ilist, Args&&... args)
+			    : engaged(true)
+			{
+				::new (eastl::addressof(val)) value_type{ilist, eastl::forward<Args>(args)...};
+			}
+
+			inline void destruct_value() { (*(value_type*)eastl::addressof(val)).~value_type(); }
+
+
+			eastl::aligned_storage_t<sizeof(value_type), eastl::alignment_of_v<value_type>> val;
+			bool engaged = false;
+		};
+
+
+		/// optional_storage<T, true>
+		///
+		/// Template specialization for trivial types to satisfy the requirement that optional<T> is trivially
+		/// destructible when T is trivially destructible.
+		///
+		template<typename T>
+		struct optional_storage<T, true>
+		{
+			typedef eastl::remove_const_t<T> value_type;
+
+			optional_storage() EA_NOEXCEPT = default;
+
+			inline optional_storage(const value_type& v)
+				: engaged(true)
+			{
+				::new (eastl::addressof(val)) value_type(v);
+			}
+
+			inline optional_storage(value_type&& v)
+				: engaged(true)
+			{
+				::new (eastl::addressof(val)) value_type(eastl::move(v));
+			}
+
+			// Removed to make optional<T> trivially destructible when T is trivially destructible.
+			//
+			// inline ~optional_storage()
+			// {
+			//     if (engaged)
+			//         destruct_value();
+			// }
+			~optional_storage() EA_NOEXCEPT = default;
+
+			template <class... Args>
+			inline explicit optional_storage(in_place_t, Args&&... args)
+			    : engaged(true)
+			{
+				::new (eastl::addressof(val)) value_type{eastl::forward<Args>(args)...};
+			}
+
+			template <typename U,
+			          typename... Args,
+			          typename = eastl::enable_if_t<eastl::is_constructible_v<T, std::initializer_list<U>&, Args&&...>>>
+			inline explicit optional_storage(in_place_t, std::initializer_list<U> ilist, Args&&... args)
+			    : engaged(true)
+			{
+				::new (eastl::addressof(val)) value_type{ilist, eastl::forward<Args>(args)...};
+			}
+
+			inline void destruct_value() {}  // no implementation necessary since T is trivially destructible.
+
+
+			eastl::aligned_storage_t<sizeof(value_type), eastl::alignment_of_v<value_type>> val;
+			bool engaged = false;
+		};
+	} // namespace Internal
+
+
+	///////////////////////////////////////////////////////////////////////////////
+	/// optional
+	///
+    template <typename T>
+    class optional : private Internal::optional_storage<T>
+    {
+		typedef Internal::optional_storage<T> base_type;
+
+		using base_type::destruct_value;
+		using base_type::engaged;
+		using base_type::val;
+
+	public:
+		typedef T value_type;
+
+	    // (ISOCPP 20.6.3) A program that necessitates the instantiation of template optional for a reference type, or
+	    // for possibly cv-qualified types in_place_t or nullopt_t is ill-formed.
+	    static_assert(!eastl::is_reference<value_type>::value, "eastl::optional of a reference type is ill-formed");
+		static_assert(!eastl::is_same<value_type, in_place_t>::value, "eastl::optional of a in_place_t type is ill-formed");
+		static_assert(!eastl::is_same<value_type, nullopt_t>::value, "eastl::optional of a nullopt_t type is ill-formed");
+
+	    inline EA_CONSTEXPR optional() EA_NOEXCEPT {}
+	    inline EA_CONSTEXPR optional(nullopt_t) EA_NOEXCEPT {}
+	    inline EA_CONSTEXPR optional(const value_type& value) : base_type(value) {}
+		inline EA_CONSTEXPR optional(value_type&& value) EA_NOEXCEPT_IF(eastl::is_nothrow_move_constructible_v<T>)
+		    : base_type(eastl::move(value))
+		{
+		}
+
+		optional(const optional& other)
+		{
+			engaged = other.engaged;
+
+			if (engaged)
+			{
+				auto* pOtherValue = reinterpret_cast<const T*>(eastl::addressof(other.val));
+				::new (eastl::addressof(val)) value_type(*pOtherValue);
+			}
+		}
+
+		optional(optional&& other)
+		{
+			engaged = other.engaged;
+
+			if (engaged)
+			{
+				auto* pOtherValue = reinterpret_cast<T*>(eastl::addressof(other.val));
+				::new (eastl::addressof(val)) value_type(eastl::move(*pOtherValue));
+			}
+		}
+
+		template <typename... Args>
+		inline EA_CONSTEXPR explicit optional(in_place_t, Args&&... args)
+		    : base_type(in_place, eastl::forward<Args>(args)...)
+		{
+		}
+
+		template <typename U,
+		          typename... Args,
+		          typename = eastl::enable_if_t<eastl::is_constructible_v<T, std::initializer_list<U>&, Args&&...>>>
+		inline explicit optional(in_place_t, std::initializer_list<U> ilist, Args&&... args)
+		    : base_type(in_place, ilist, eastl::forward<Args>(args)...)
+		{
+		}
+
+		template <typename U = value_type,
+		          typename = eastl::enable_if_t<eastl::is_constructible_v<T, U&&> &&
+		                                        !eastl::is_same_v<eastl::remove_cvref_t<U>, eastl::in_place_t> &&
+		                                        !eastl::is_same_v<eastl::remove_cvref_t<U>, optional>>>
+		inline explicit EA_CONSTEXPR optional(U&& value)
+		    : base_type(in_place, eastl::forward<U>(value))
+		{
+		}
+
+		inline optional& operator=(nullopt_t)
+	    {
+		    reset();
+		    return *this;
+	    }
+
+	    inline optional& operator=(const optional& other)
+		{
+			auto* pOtherValue = reinterpret_cast<const T*>(eastl::addressof(other.val));
+			if (engaged == other.engaged)
+			{
+				if (engaged)
+					*get_value_address() = *pOtherValue;
+			}
+			else
+			{
+				if (engaged)
+				{
+					destruct_value();
+					engaged = false;
+				}
+				else
+				{
+					construct_value(*pOtherValue);
+					engaged = true;
+				}
+			}
+			return *this;
+		}
+
+	    inline optional& operator=(optional&& other)
+	        EA_NOEXCEPT_IF(EA_NOEXCEPT(eastl::is_nothrow_move_assignable<value_type>::value &&
+	                                       eastl::is_nothrow_move_constructible<value_type>::value))
+	    {
+			auto* pOtherValue = reinterpret_cast<T*>(eastl::addressof(other.val));
+			if (engaged == other.engaged)
+			{
+				if (engaged)
+					*get_value_address() = eastl::move(*pOtherValue);
+			}
+			else
+			{
+				if (engaged)
+				{
+					destruct_value();
+					engaged = false;
+				}
+				else
+				{
+					construct_value(eastl::move(*pOtherValue));
+					engaged = true;
+				}
+			}
+		    return *this;
+	    }
+
+	    template <class U, typename = typename eastl::enable_if<eastl::is_same<eastl::decay_t<U>, T>::value>::type>
+	    inline optional& operator=(U&& u)
+	    {
+			if(engaged)
+			{
+				*get_value_address() = eastl::forward<U>(u);
+			}
+			else
+			{
+				engaged = true;
+				construct_value(eastl::forward<U>(u));
+			}
+
+		    return *this;
+	    }
+
+	    EA_CONSTEXPR inline explicit operator bool() const { return engaged; }
+
+		EA_CONSTEXPR inline bool has_value() const EA_NOEXCEPT { return engaged; }
+
+	    template <class U>
+	    inline value_type value_or(U&& default_value) const
+			{ return engaged ? *get_value_address() : static_cast<value_type>(eastl::forward<U>(default_value)); }
+
+	    template <class U>
+	    inline value_type value_or(U&& default_value)
+			{ return engaged ? *get_value_address() : static_cast<value_type>(eastl::forward<U>(default_value)); }
+
+		inline T& value()&                    { return get_value_ref(); }
+		inline const T& value() const&        { return get_value_ref(); }
+		inline T&& value()&&                  { return get_rvalue_ref(); }
+		inline const T&& value() const&&      { return get_rvalue_ref(); }
+
+	    inline T* operator->()                { return get_value_address(); }
+	    inline const T* operator->() const    { return get_value_address(); }
+	    inline T& operator*()&                { return get_value_ref(); }
+		inline T&& operator*()&&              { return get_rvalue_ref(); }
+	    inline const T& operator*() const&    { return get_value_ref(); }
+		inline const T&& operator*() const&&  { return get_rvalue_ref(); }
+
+		template <class... Args>
+		void emplace(Args&&... args)
+		{
+			if (engaged)
+			{
+				destruct_value();
+				engaged = false;
+			}
+			construct_value(eastl::forward<Args>(args)...);
+			engaged = true;
+		}
+
+		template <class U, class... Args>
+		void emplace(std::initializer_list<U> ilist, Args&&... args)
+		{
+			if (engaged)
+			{
+				destruct_value();
+				engaged = false;
+			}
+			construct_value(ilist, eastl::forward<Args>(args)...);
+			engaged = true;
+		}
+
+	    inline void swap(optional& other)
+	        EA_NOEXCEPT_IF(eastl::is_nothrow_move_constructible<T>::value&& eastl::is_nothrow_swappable<T>::value)
+	    {
+		    using eastl::swap;
+		    if (engaged == other.engaged)
+		    {
+			    if (engaged)
+					swap(**this, *other);
+		    }
+		    else
+		    {
+			    if (engaged)
+			    {
+					other.construct_value(eastl::move(*(value_type*)eastl::addressof(val)));
+					destruct_value();
+			    }
+			    else
+			    {
+					construct_value(eastl::move(*((value_type*)eastl::addressof(other.val))));
+				    other.destruct_value();
+			    }
+
+			    swap(engaged, other.engaged);
+		    }
+	    }
+
+		inline void reset()
+		{
+			if (engaged)
+			{
+				destruct_value();
+				engaged = false;
+			}
+		}
+
+	private:
+
+		template <class... Args>
+		inline void construct_value(Args&&... args)
+		{ ::new (eastl::addressof(val)) value_type(eastl::forward<Args>(args)...); }
+
+	    inline T* get_value_address() EASTL_OPTIONAL_NOEXCEPT
+	    {
+            #if EASTL_EXCEPTIONS_ENABLED
+				if(!engaged)
+					throw bad_optional_access();
+			#elif EASTL_ASSERT_ENABLED
+				EASTL_ASSERT_MSG(engaged, "no value to retrieve");
+			#endif
+			return reinterpret_cast<T*>(eastl::addressof(val));
+	    }
+
+	    inline const T* get_value_address() const EASTL_OPTIONAL_NOEXCEPT
+	    {
+            #if EASTL_EXCEPTIONS_ENABLED
+				if(!engaged)
+					throw bad_optional_access();
+			#elif EASTL_ASSERT_ENABLED
+				EASTL_ASSERT_MSG(engaged, "no value to retrieve");
+			#endif
+			return reinterpret_cast<const T*>(eastl::addressof(val));
+	    }
+
+	    inline value_type& get_value_ref() EASTL_OPTIONAL_NOEXCEPT
+	    {
+            #if EASTL_EXCEPTIONS_ENABLED
+				if(!engaged)
+					throw bad_optional_access();
+			#elif EASTL_ASSERT_ENABLED
+				EASTL_ASSERT_MSG(engaged, "no value to retrieve");
+			#endif
+		    return *(value_type*)eastl::addressof(val);
+	    }
+
+	    inline const value_type& get_value_ref() const EASTL_OPTIONAL_NOEXCEPT
+	    {
+            #if EASTL_EXCEPTIONS_ENABLED
+				if(!engaged)
+					throw bad_optional_access();
+			#elif EASTL_ASSERT_ENABLED
+				EASTL_ASSERT_MSG(engaged, "no value to retrieve");
+			#endif
+		    return *(value_type*)eastl::addressof(val);
+	    }
+
+	    inline value_type&& get_rvalue_ref() EASTL_OPTIONAL_NOEXCEPT
+	    {
+            #if EASTL_EXCEPTIONS_ENABLED
+				if(!engaged)
+					throw bad_optional_access();
+			#elif EASTL_ASSERT_ENABLED
+				EASTL_ASSERT_MSG(engaged, "no value to retrieve");
+			#endif
+		    return eastl::move(*((value_type*)eastl::addressof(val)));
+	    }
+    }; // class optional
+
+
+    ///////////////////////////////////////////////////////////////////////////////
+	/// global swap
+	///
+    template <class T>
+    void swap(optional<T>& lhs, optional<T>& rhs) EA_NOEXCEPT_IF(EA_NOEXCEPT(lhs.swap(rhs)))
+		{ lhs.swap(rhs); }
+
+
+    ///////////////////////////////////////////////////////////////////////////////
+	/// global comparisions
+	///
+	/// http://en.cppreference.com/w/cpp/utility/optional/operator_cmp
+	///
+
+    ///////////////////////////////////////////////////////////////////////////////
+	// Compare two optional objects
+	//
+	template <class T>
+	inline EA_CONSTEXPR bool operator==(const optional<T>& lhs, const optional<T>& rhs)
+	{
+		// NOTE:
+		//
+		// Code collapsed onto a single line to satisfy requirements for constexpr expressions
+		// being a single line return statement.
+		//
+		// if(bool(lhs) != bool(rhs))
+		//     return false;
+
+		// if(bool(lhs) == false)
+		//     return true;
+
+		// return *lhs == *rhs;
+
+		return (bool(lhs) != bool(rhs)) ? false : (bool(lhs) == false) ? true : *lhs == *rhs;
+	}
+
+	template <class T>
+	inline EA_CONSTEXPR bool operator<(const optional<T>& lhs, const optional<T>& rhs)
+	{
+		// NOTE:
+		//
+		// Code collapsed onto a single line to satisify requirements for constexpr expressions
+		// being a single line return statement.
+		//
+		// if (!bool(rhs))
+		//     return false;
+
+		// if (!bool(lhs))
+		//     return true;
+
+		//  return *lhs < *rhs;
+
+	    return (!bool(rhs)) ? false : (!bool(lhs)) ? true : *lhs < *rhs;
+    }
+
+    template <class T>
+	inline EA_CONSTEXPR bool operator!=(const optional<T>& lhs, const optional<T>& rhs)
+		{ return !(lhs == rhs); }
+
+	template <class T>
+	inline EA_CONSTEXPR bool operator<=(const optional<T>& lhs, const optional<T>& rhs)
+		{ return !(rhs < lhs); }
+
+	template <class T>
+	inline EA_CONSTEXPR bool operator>(const optional<T>& lhs, const optional<T>& rhs)
+		{ return rhs < lhs; }
+
+	template <class T>
+	inline EA_CONSTEXPR bool operator>=(const optional<T>& lhs, const optional<T>& rhs)
+		{ return !(lhs < rhs);	}
+
+
+    ///////////////////////////////////////////////////////////////////////////////
+	// Compare an optional object with a nullopt
+	//
+    template <class T>
+    inline EA_CONSTEXPR bool operator==(const optional<T>& opt, eastl::nullopt_t) EA_NOEXCEPT
+		{ return !opt; }
+
+    template <class T>
+    inline EA_CONSTEXPR bool operator==(eastl::nullopt_t, const optional<T>& opt) EA_NOEXCEPT
+		{ return !opt; }
+
+    template <class T>
+    inline EA_CONSTEXPR bool operator!=(const optional<T>& opt, eastl::nullopt_t) EA_NOEXCEPT
+		{ return bool(opt); }
+
+    template <class T>
+    inline EA_CONSTEXPR bool operator!=(eastl::nullopt_t, const optional<T>& opt) EA_NOEXCEPT
+		{ return bool(opt); }
+
+    template <class T>
+    inline EA_CONSTEXPR bool operator<(const optional<T>&, eastl::nullopt_t) EA_NOEXCEPT
+		{ return false; }
+
+    template <class T>
+    inline EA_CONSTEXPR bool operator<(eastl::nullopt_t, const optional<T>& opt) EA_NOEXCEPT
+		{ return bool(opt); }
+
+    template <class T>
+    inline EA_CONSTEXPR bool operator<=(const optional<T>& opt, eastl::nullopt_t) EA_NOEXCEPT
+		{ return !opt; }
+
+    template <class T>
+    inline EA_CONSTEXPR bool operator<=(eastl::nullopt_t, const optional<T>&) EA_NOEXCEPT
+		{ return true; }
+
+    template <class T>
+    inline EA_CONSTEXPR bool operator>(const optional<T>& opt, eastl::nullopt_t) EA_NOEXCEPT
+		{ return bool(opt); }
+
+    template <class T>
+    inline EA_CONSTEXPR bool operator>(eastl::nullopt_t, const optional<T>&) EA_NOEXCEPT
+		{ return false; }
+
+    template <class T>
+    inline EA_CONSTEXPR bool operator>=(const optional<T>&, eastl::nullopt_t) EA_NOEXCEPT
+		{ return true; }
+
+    template <class T>
+    inline EA_CONSTEXPR bool operator>=(eastl::nullopt_t, const optional<T>& opt) EA_NOEXCEPT
+		{ return !opt; }
+
+
+    ///////////////////////////////////////////////////////////////////////////////
+    // Compare an optional object with a T
+	//
+    template <class T>
+    inline EA_CONSTEXPR bool operator==(const optional<T>& opt, const T& value)
+		{ return bool(opt) ? *opt == value : false; }
+
+    template <class T>
+    inline EA_CONSTEXPR bool operator==(const T& value, const optional<T>& opt)
+		{ return bool(opt) ? value == *opt : false; }
+
+    template <class T>
+    inline EA_CONSTEXPR bool operator!=(const optional<T>& opt, const T& value)
+		{ return bool(opt) ? !(*opt == value) : true; }
+
+    template <class T>
+    inline EA_CONSTEXPR bool operator!=(const T& value, const optional<T>& opt)
+		{ return bool(opt) ? !(value == *opt) : true; }
+
+    template <class T>
+    inline EA_CONSTEXPR bool operator<(const optional<T>& opt, const T& value)
+		{ return bool(opt) ? *opt < value  : true; }
+
+    template <class T>
+    inline EA_CONSTEXPR bool operator<(const T& value, const optional<T>& opt)
+		{ return bool(opt) ? value < *opt  : false;  }
+
+    template <class T>
+    inline EA_CONSTEXPR bool operator<=(const optional<T>& opt, const T& value)
+		{ return !(opt > value); }
+
+    template <class T>
+    inline EA_CONSTEXPR bool operator<=(const T& value, const optional<T>& opt)
+		{ return !(value > opt); }
+
+    template <class T>
+    inline EA_CONSTEXPR bool operator>(const optional<T>& opt, const T& value)
+		{ return bool(opt) ? value < *opt  : false; }
+
+    template <class T>
+    inline EA_CONSTEXPR bool operator>(const T& value, const optional<T>& opt)
+		{ return bool(opt) ? *opt < value  : true; }
+
+    template <class T>
+    inline EA_CONSTEXPR bool operator>=(const optional<T>& opt, const T& value)
+		{ return !(opt < value); }
+
+    template <class T>
+    inline EA_CONSTEXPR bool operator>=(const T& value, const optional<T>& opt)
+		{ return !(value < opt);  }
+
+
+    ///////////////////////////////////////////////////////////////////////////////
+	/// hash
+	///
+	template <typename T>
+	struct hash<eastl::optional<T>>
+	{
+		typedef eastl::optional<T> argument_type;
+		typedef size_t result_type;
+
+	    result_type operator()(const argument_type& opt) const EA_NOEXCEPT
+	    {
+		    if (opt)
+			    return eastl::hash<T>()(*opt);
+		    else
+			    return 0;  // no value to generate a hash from
+	    }
+    };
+
+
+    ///////////////////////////////////////////////////////////////////////////////
+	/// make_optional
+	///
+	template <class T>
+	inline EA_CONSTEXPR optional<decay_t<T>> make_optional(T&& value)
+	{
+		return optional<decay_t<T>>(eastl::forward<T>(value));
+	}
+
+	template <class T, class... Args>
+	inline EA_CONSTEXPR optional<T> make_optional(Args&&... args)
+	{
+		return optional<T>(eastl::in_place, eastl::forward<Args>(args)...);
+	}
+
+	template <class T, class U, class... Args>
+	inline EA_CONSTEXPR optional<T> make_optional(std::initializer_list<U> il, Args&&... args)
+	{
+		return eastl::optional<T>(eastl::in_place, il, eastl::forward<Args>(args)...);
+	}
+
+
+    #undef EASTL_OPTIONAL_NOEXCEPT
+
+}  // namespace eastl
+
+EA_RESTORE_VC_WARNING()
+
+#endif  // EASTL_OPTIONAL_ENABLED
+#endif  // EASTL_OPTIONAL_H
diff --git a/libkram/eastl/include/EASTL/priority_queue.h b/libkram/eastl/include/EASTL/priority_queue.h
new file mode 100644
index 00000000..ade625aa
--- /dev/null
+++ b/libkram/eastl/include/EASTL/priority_queue.h
@@ -0,0 +1,491 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// This file implements a priority_queue that is just like the C++ 
+// std::priority_queue adapter class, except it has a couple extension functions.
+// The primary distinctions between this priority_queue and std::priority_queue are:
+//    - priority_queue has a couple extension functions that allow you to 
+//      use a priority queue in extra ways. See the code for documentation.
+//    - priority_queue can contain objects with alignment requirements. 
+//      std::priority_queue cannot do so without a bit of tedious non-portable effort.
+//    - priority_queue supports debug memory naming natively.
+//    - priority_queue is easier to read, debug, and visualize.
+//    - priority_queue is savvy to an environment that doesn't have exception handling,
+//      as is sometimes the case with console or embedded environments.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_PRIORITY_QUEUE_H
+#define EASTL_PRIORITY_QUEUE_H
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/vector.h>
+#include <EASTL/heap.h>
+#include <EASTL/functional.h>
+#include <EASTL/initializer_list.h>
+#include <stddef.h>
+
+// 4530 - C++ exception handler used, but unwind semantics are not enabled. Specify /EHsc
+// 4571 - catch(...) semantics changed since Visual C++ 7.1; structured exceptions (SEH) are no longer caught.
+EA_DISABLE_VC_WARNING(4530 4571);
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+
+	/// EASTL_PRIORITY_QUEUE_DEFAULT_NAME
+	///
+	/// Defines a default container name in the absence of a user-provided name.
+	///
+	#ifndef EASTL_PRIORITY_QUEUE_DEFAULT_NAME
+		#define EASTL_PRIORITY_QUEUE_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " priority_queue" // Unless the user overrides something, this is "EASTL priority_queue".
+	#endif
+
+	/// EASTL_PRIORITY_QUEUE_DEFAULT_ALLOCATOR
+	///
+	#ifndef EASTL_PRIORITY_QUEUE_DEFAULT_ALLOCATOR
+		#define EASTL_PRIORITY_QUEUE_DEFAULT_ALLOCATOR allocator_type(EASTL_PRIORITY_QUEUE_DEFAULT_NAME)
+	#endif
+
+
+
+	/// priority_queue
+	///
+	/// The behaviour of this class is just like the std::priority_queue
+	/// class and you can refer to std documentation on it.
+	///
+	/// A priority_queue is an adapter container which implements a
+	/// queue-like container whereby pop() returns the item of highest
+	/// priority. The entire queue isn't necessarily sorted; merely the 
+	/// first item in the queue happens to be of higher priority than 
+	/// other items. You can read about priority_queues in many books
+	/// on algorithms, such as "Algorithms" by Robert Sedgewick.
+	///
+	/// The Container type is a container which is random access and 
+	/// supports empty(), size(), clear(), insert(), front(), 
+	/// push_back(), and pop_back(). You would typically use vector
+	/// or deque.
+	/// 
+	/// Note that we don't provide functions in the priority_queue 
+	/// interface for working with allocators or names. The reason for 
+	/// this is that priority_queue is an adapter class which can work
+	/// with any standard sequence and not necessarily just a sequence 
+	/// provided by this library. So what we do is provide a member 
+	/// accessor function get_container() which allows the user to 
+	/// manipulate the sequence as needed. The user needs to be careful
+	/// not to change the container's contents, however.
+	///
+	/// Classic heaps allow for the concept of removing arbitrary items
+	/// and changing the priority of arbitrary items, though the C++
+	/// std heap (and thus priority_queue) functions don't support 
+	/// these operations. We have extended the heap algorithms and the 
+	/// priority_queue implementation to support these operations.
+	///
+	///////////////////////////////////////////////////////////////////
+
+	template <typename T, typename Container = eastl::vector<T>, typename Compare = eastl::less<typename Container::value_type> >
+	class priority_queue
+	{
+	public:
+		typedef priority_queue<T, Container, Compare>        this_type;
+		typedef Container                                    container_type;
+		typedef Compare                                      compare_type;
+	  //typedef typename Container::allocator_type           allocator_type;  // We can't currently declare this because the container may be a type that doesn't have an allocator. 
+		typedef typename Container::value_type               value_type;
+		typedef typename Container::reference                reference;
+		typedef typename Container::const_reference          const_reference;
+		typedef typename Container::size_type                size_type;
+		typedef typename Container::difference_type          difference_type;
+
+	public:                   // We declare public so that global comparison operators can be implemented without adding an inline level and without tripping up GCC 2.x friend declaration failures. GCC (through at least v4.0) is poor at inlining and performance wins over correctness.
+		container_type  c;    // The C++ standard specifies that you declare a protected member variable of type Container called 'c'.
+		compare_type    comp; // The C++ standard specifies that you declare a protected member variable of type Compare called 'comp'.
+
+	public:
+		priority_queue();
+
+		// Allocator is templated here because we aren't allowed to infer the allocator_type from the Container, as some containers (e.g. array) don't 
+		// have allocators. For containers that don't have allocator types, you could use void or char as the Allocator template type.
+
+		template <class Allocator>                      
+		explicit priority_queue(const Allocator& allocator, typename eastl::enable_if<eastl::uses_allocator<container_type, Allocator>::value>::type* = NULL)
+		   : c(allocator), comp()
+		{
+		}    
+
+		template <class Allocator>
+		priority_queue(const this_type& x, const Allocator& allocator, typename eastl::enable_if<eastl::uses_allocator<container_type, Allocator>::value>::type* = NULL)
+			: c(x.c, allocator), comp(x.comp)
+		{
+			eastl::make_heap(c.begin(), c.end(), comp);
+		}
+
+		template <class Allocator>
+		priority_queue(this_type&& x, const Allocator& allocator, typename eastl::enable_if<eastl::uses_allocator<container_type, Allocator>::value>::type* = NULL)
+			: c(eastl::move(x.c), allocator), comp(x.comp)
+		{
+			eastl::make_heap(c.begin(), c.end(), comp);
+		}
+
+		explicit priority_queue(const compare_type& compare);
+		explicit priority_queue(const compare_type& compare, container_type&& x);
+
+		priority_queue(const compare_type& compare, const container_type& x);
+		priority_queue(std::initializer_list<value_type> ilist, const compare_type& compare = compare_type()); // C++11 doesn't specify that std::priority_queue has initializer list support.
+
+		template <typename InputIterator>
+		priority_queue(InputIterator first, InputIterator last);
+
+		template <typename InputIterator>
+		priority_queue(InputIterator first, InputIterator last, const compare_type& compare);
+
+		template <typename InputIterator>
+		priority_queue(InputIterator first, InputIterator last, const compare_type& compare, const container_type& x);
+
+		template <class InputIterator>
+		priority_queue(InputIterator first, InputIterator last, const compare_type& compare, container_type&& x);
+
+		// Additional C++11 support to consider:
+		//
+		// template <class Allocator>
+		// priority_queue(const Compare&, const Allocator&);
+		//
+		// template <class Allocator>
+		// priority_queue(const Compare&, const container_type&, const Allocator&);
+		//
+		// template <class Allocator>
+		// priority_queue(const Compare&, container_type&&, const Allocator&);
+
+		bool      empty() const;
+		size_type size() const;
+
+		const_reference top() const;
+
+		void push(const value_type& value);
+
+		void push(value_type&& x);
+
+		template <class... Args>
+		void emplace(Args&&... args);
+
+		void pop();
+
+		void pop(value_type& value);    // Extension to the C++11 Standard that allows popping a move-only type (e.g. unique_ptr).
+
+		void change(size_type n);   /// Moves the item at the given array index to a new location based on its current priority.
+		void remove(size_type n);   /// Removes the item at the given array index.
+
+		container_type&       get_container();
+		const container_type& get_container() const;
+
+		void swap(this_type& x) EA_NOEXCEPT_IF((eastl::is_nothrow_swappable<this_type::container_type>::value && eastl::is_nothrow_swappable<this_type::compare_type>::value));
+		
+		bool validate() const;
+
+	}; // class priority_queue
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// priority_queue
+	///////////////////////////////////////////////////////////////////////
+
+
+	template <typename T, typename Container, typename Compare>
+	inline priority_queue<T, Container, Compare>::priority_queue()
+		: c(),  // To consider: use c(EASTL_PRIORITY_QUEUE_DEFAULT_ALLOCATOR) here, though that would add the requirement that the user supplied container support this.
+		  comp()
+	{
+	}
+
+
+	template <typename T, typename Container, typename Compare>
+	inline priority_queue<T, Container, Compare>::priority_queue(const compare_type& compare)
+		: c(),  // To consider: use c(EASTL_PRIORITY_QUEUE_DEFAULT_ALLOCATOR) here, though that would add the requirement that the user supplied container support this.
+		  comp(compare)
+	{
+	}
+
+
+	template <typename T, typename Container, typename Compare>
+	inline priority_queue<T, Container, Compare>::priority_queue(const compare_type& compare, const container_type& x)
+		: c(x), comp(compare)
+	{
+		eastl::make_heap(c.begin(), c.end(), comp);
+	}
+
+
+	template <typename T, typename Container, typename Compare>
+	inline priority_queue<T, Container, Compare>::priority_queue(const compare_type& compare, container_type&& x)
+	  : c(eastl::move(x)), comp(compare)
+	{
+		eastl::make_heap(c.begin(), c.end(), comp);
+	}
+
+
+	template <typename T, typename Container, typename Compare>
+	inline priority_queue<T, Container, Compare>::priority_queue(std::initializer_list<value_type> ilist, const compare_type& compare)
+		: c(), comp(compare)
+	{
+		c.insert(c.end(), ilist.begin(), ilist.end());
+		eastl::make_heap(c.begin(), c.end(), comp);
+	}
+
+
+
+	template <typename T, typename Container, typename Compare>
+	template <typename InputIterator>
+	inline priority_queue<T, Container, Compare>::priority_queue(InputIterator first, InputIterator last)
+		: c(first, last), comp()
+	{
+		eastl::make_heap(c.begin(), c.end(), comp);
+	}
+
+
+	template <typename T, typename Container, typename Compare>
+	template <typename InputIterator>
+	inline priority_queue<T, Container, Compare>::priority_queue(InputIterator first, InputIterator last, const compare_type& compare)
+		: c(first, last), comp(compare)
+	{
+		eastl::make_heap(c.begin(), c.end(), comp);
+	}
+
+
+	template <typename T, typename Container, typename Compare>
+	template <typename InputIterator>
+	inline priority_queue<T, Container, Compare>::priority_queue(InputIterator first, InputIterator last, const compare_type& compare, const container_type& x)
+		: c(x), comp(compare)
+	{
+		c.insert(c.end(), first, last);
+		eastl::make_heap(c.begin(), c.end(), comp);
+	}
+
+
+	template <typename T, typename Container, typename Compare>
+	template <typename InputIterator>
+	inline priority_queue<T, Container, Compare>::priority_queue(InputIterator first, InputIterator last, const compare_type& compare, container_type&& x)
+		: c(eastl::move(x)), comp(compare)
+	{
+		c.insert(c.end(), first, last);
+		eastl::make_heap(c.begin(), c.end(), comp);
+	}
+
+
+	template <typename T, typename Container, typename Compare>
+	inline bool priority_queue<T, Container, Compare>::empty() const
+	{
+		return c.empty();
+	}
+
+
+	template <typename T, typename Container, typename Compare>
+	inline typename priority_queue<T, Container, Compare>::size_type
+	priority_queue<T, Container, Compare>::size() const
+	{
+		return c.size();
+	}
+
+
+	template <typename T, typename Container, typename Compare>
+	inline typename priority_queue<T, Container, Compare>::const_reference
+	priority_queue<T, Container, Compare>::top() const
+	{
+		return c.front();
+	}
+
+
+	template <typename T, typename Container, typename Compare>
+	inline void priority_queue<T, Container, Compare>::push(const value_type& value)
+	{
+		#if EASTL_EXCEPTIONS_ENABLED
+			try
+			{
+				c.push_back(value);
+				eastl::push_heap(c.begin(), c.end(), comp);
+			}
+			catch(...)
+			{
+				c.clear();
+				throw;
+			}
+		#else
+			c.push_back(value);
+			eastl::push_heap(c.begin(), c.end(), comp);
+		#endif
+	}
+
+
+	template <typename T, typename Container, typename Compare>
+	inline void priority_queue<T, Container, Compare>::push(value_type&& value)
+	{
+		#if EASTL_EXCEPTIONS_ENABLED
+			try
+			{
+				c.push_back(eastl::move(value));
+				eastl::push_heap(c.begin(), c.end(), comp);
+			}
+			catch(...)
+			{
+				c.clear();
+				throw;
+			}
+		#else
+			c.push_back(eastl::move(value));
+			eastl::push_heap(c.begin(), c.end(), comp);
+		#endif
+	}
+
+
+	template <typename T, typename Container, typename Compare>
+	template <class... Args>
+	inline void priority_queue<T, Container, Compare>::emplace(Args&&... args)
+	{
+		push(value_type(eastl::forward<Args>(args)...)); // The C++11 Standard 23.6.4/1 states that c.emplace is used, but also declares that c doesn't need to have an emplace function.
+	}
+
+
+	template <typename T, typename Container, typename Compare>
+	inline void priority_queue<T, Container, Compare>::pop()
+	{
+		#if EASTL_EXCEPTIONS_ENABLED
+			try
+			{
+				eastl::pop_heap(c.begin(), c.end(), comp);
+				c.pop_back();
+			}
+			catch(...)
+			{
+				c.clear();
+				throw;
+			}
+		#else
+			eastl::pop_heap(c.begin(), c.end(), comp);
+			c.pop_back();
+		#endif
+	}
+
+
+	template <typename T, typename Container, typename Compare>
+	inline void priority_queue<T, Container, Compare>::pop(value_type& value)
+	{
+		value = eastl::move(c.front());  // To consider: value = move_if_noexcept_assignable(c.front());
+		pop();
+	}
+
+
+	template <typename T, typename Container, typename Compare>
+	inline void priority_queue<T, Container, Compare>::change(size_type n) // This function is not in the STL std::priority_queue.
+	{
+		eastl::change_heap(c.begin(), c.size(), n, comp);
+	}
+
+
+	template <typename T, typename Container, typename Compare>
+	inline void priority_queue<T, Container, Compare>::remove(size_type n) // This function is not in the STL std::priority_queue.
+	{
+		eastl::remove_heap(c.begin(), c.size(), n, comp);
+		c.pop_back();
+	}
+
+
+	template <typename T, typename Container, typename Compare>
+	inline typename priority_queue<T, Container, Compare>::container_type&
+	priority_queue<T, Container, Compare>::get_container()
+	{
+		return c;
+	}
+
+
+	template <typename T, typename Container, typename Compare>
+	inline const typename priority_queue<T, Container, Compare>::container_type&
+	priority_queue<T, Container, Compare>::get_container() const
+	{
+		return c;
+	}
+
+
+	template <typename T, typename Container, typename Compare>
+	inline void priority_queue<T, Container, Compare>::swap(this_type& x) EA_NOEXCEPT_IF((eastl::is_nothrow_swappable<this_type::container_type>::value && 
+																						  eastl::is_nothrow_swappable<this_type::compare_type>::value))
+	{
+		using eastl::swap;
+		swap(c, x.c);
+		swap(comp, x.comp);
+	}
+
+
+	template <typename T, typename Container, typename Compare>
+	inline bool
+	priority_queue<T, Container, Compare>::validate() const
+	{
+		return c.validate() && eastl::is_heap(c.begin(), c.end(), comp);
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// global operators
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T, typename Container, typename Compare>
+	bool operator==(const priority_queue<T, Container, Compare>& a, const priority_queue<T, Container, Compare>& b)
+	{
+		return (a.c == b.c);
+	}
+
+	template <typename T, typename Container, typename Compare>
+	bool operator<(const priority_queue<T, Container, Compare>& a, const priority_queue<T, Container, Compare>& b)
+	{
+		return (a.c < b.c);
+	}
+
+	template <typename T, typename Container, typename Compare>
+	inline bool operator!=(const priority_queue<T, Container, Compare>& a, const priority_queue<T, Container, Compare>& b)
+	{
+		return !(a.c == b.c);
+	}
+
+	template <typename T, typename Container, typename Compare>
+	inline bool operator>(const priority_queue<T, Container, Compare>& a, const priority_queue<T, Container, Compare>& b)
+	{
+		return (b.c < a.c);
+	}
+
+	template <typename T, typename Container, typename Compare>
+	inline bool operator<=(const priority_queue<T, Container, Compare>& a, const priority_queue<T, Container, Compare>& b)
+	{
+		return !(b.c < a.c);
+	}
+
+	template <typename T, typename Container, typename Compare>
+	inline bool operator>=(const priority_queue<T, Container, Compare>& a, const priority_queue<T, Container, Compare>& b)
+	{
+		return !(a.c < b.c);
+	}
+
+
+	template <class T, class Container, class Compare>
+	inline void swap(priority_queue<T, Container, Compare>& a,  priority_queue<T, Container, Compare>& b) EA_NOEXCEPT_IF((eastl::is_nothrow_swappable<typename priority_queue<T, Container, Compare>::container_type>::value && 
+																														  eastl::is_nothrow_swappable<typename priority_queue<T, Container, Compare>::compare_type>::value)) // EDG has a bug and won't let us use Container in this noexcept statement
+	{
+		a.swap(b);
+	}
+
+
+} // namespace eastl
+
+
+EA_RESTORE_VC_WARNING();
+
+
+#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/queue.h b/libkram/eastl/include/EASTL/queue.h
new file mode 100644
index 00000000..9e06e207
--- /dev/null
+++ b/libkram/eastl/include/EASTL/queue.h
@@ -0,0 +1,366 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// This file implements a queue that is just like the C++ std::queue adapter class.
+// There are no significant differences between EASTL/queue and std::queue.
+// We provide this class for completeness and where std STL may not be available.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_QUEUE_H
+#define EASTL_QUEUE_H
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/deque.h>
+#include <EASTL/initializer_list.h>
+#include <stddef.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+
+	/// EASTL_QUEUE_DEFAULT_NAME
+	///
+	/// Defines a default container name in the absence of a user-provided name.
+	///
+	#ifndef EASTL_QUEUE_DEFAULT_NAME
+		#define EASTL_QUEUE_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " queue" // Unless the user overrides something, this is "EASTL queue".
+	#endif
+
+	/// EASTL_QUEUE_DEFAULT_ALLOCATOR
+	///
+	#ifndef EASTL_QUEUE_DEFAULT_ALLOCATOR
+		#define EASTL_QUEUE_DEFAULT_ALLOCATOR allocator_type(EASTL_QUEUE_DEFAULT_NAME)
+	#endif
+
+
+
+	/// queue
+	///
+	/// queue is an adapter class provides a FIFO (first-in, first-out) interface
+	/// via wrapping a sequence that provides at least the following operations:
+	///     push_back
+	///     pop_front
+	///     front
+	///     back
+	///
+	/// In practice this usually means deque, list, intrusive_list. vector and string  
+	/// cannot be used because they don't provide pop-front. This is reasonable because
+	/// a vector or string pop_front would be inefficient and could lead to 
+	/// silently poor performance.
+	///
+	template <typename T, typename Container = eastl::deque<T, EASTLAllocatorType, DEQUE_DEFAULT_SUBARRAY_SIZE(T)> >
+	class queue
+	{
+	public:
+		typedef queue<T, Container>                  this_type;
+		typedef          Container                   container_type;
+	  //typedef typename Container::allocator_type   allocator_type;  // We can't currently declare this because the container may be a type that doesn't have an allocator. 
+		typedef typename Container::value_type       value_type;
+		typedef typename Container::reference        reference;
+		typedef typename Container::const_reference  const_reference;
+		typedef typename Container::size_type        size_type;
+
+	public:               // We declare public so that global comparison operators can be implemented without adding an inline level and without tripping up GCC 2.x friend declaration failures. GCC (through at least v4.0) is poor at inlining and performance wins over correctness.
+		container_type c; // The C++ standard specifies that you declare a protected member variable of type Container called 'c'.
+
+	public:
+		queue();
+
+		// Allocator is templated here because we aren't allowed to infer the allocator_type from the Container, as some containers (e.g. array) don't 
+		// have allocators. For containers that don't have allocator types, you could use void or char as the Allocator template type.
+
+		template <class Allocator>                      
+		explicit queue(const Allocator& allocator, typename eastl::enable_if<eastl::uses_allocator<container_type, Allocator>::value>::type* = NULL)
+		  : c(allocator)
+		{
+		}    
+
+		template <class Allocator>
+		queue(const this_type& x, const Allocator& allocator, typename eastl::enable_if<eastl::uses_allocator<container_type, Allocator>::value>::type* = NULL)
+		  : c(x.c, allocator)
+		{
+		}
+
+		template <class Allocator>
+		queue(this_type&& x, const Allocator& allocator, typename eastl::enable_if<eastl::uses_allocator<container_type, Allocator>::value>::type* = NULL)
+		  : c(eastl::move(x.c), allocator)
+		{
+		}
+
+		explicit queue(const container_type& x);
+		explicit queue(container_type&& x);
+
+		// Additional C++11 support to consider:
+		//
+		// template <class Allocator>
+		// queue(const container_type& x, const Allocator& allocator);
+		//
+		// template <class Allocator>
+		// queue(container_type&& x, const Allocator& allocator);
+
+		queue(std::initializer_list<value_type> ilist); // C++11 doesn't specify that std::queue has initializer list support.
+
+		bool      empty() const;
+		size_type size() const;
+
+		reference       front();
+		const_reference front() const;
+
+		reference       back();
+		const_reference back() const;
+
+		void push(const value_type& value);
+		void push(value_type&& x);
+
+		template <class... Args>
+		EA_DEPRECATED void emplace_back(Args&&... args); // backwards compatibility
+
+		template <class... Args>
+		decltype(auto) emplace(Args&&... args);
+
+		void pop();
+
+		container_type&       get_container();
+		const container_type& get_container() const;
+
+		void swap(this_type& x) EA_NOEXCEPT_IF((eastl::is_nothrow_swappable<this_type::container_type>::value));
+
+		bool validate() const;
+
+	}; // class queue
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// queue
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T, typename Container>
+	inline queue<T, Container>::queue()
+		: c() // To consider: use c(EASTL_QUEUE_DEFAULT_ALLOCATOR) here, though that would add the requirement that the user supplied container support this.
+	{
+		// Empty
+	}
+
+
+	template <typename T, typename Container>
+	inline queue<T, Container>::queue(const Container& x)
+		: c(x)
+	{
+		// Empty
+	}
+
+
+	template <typename T, typename Container>
+	inline queue<T, Container>::queue(Container&& x)
+		: c(eastl::move(x))
+	{
+		// Empty
+	}
+
+
+	template <typename T, typename Container>
+	inline queue<T, Container>::queue(std::initializer_list<value_type> ilist)
+		: c() // We could alternatively use c(ilist) here, but that would require c to have an ilist constructor.
+	{
+		// Better solution but requires an insert function.
+		// c.insert(ilist.begin(), ilist.end());
+
+		// Possibly slower solution but doesn't require an insert function.
+		for(typename std::initializer_list<value_type>::iterator it = ilist.begin(); it != ilist.end(); ++it)
+		{
+			const value_type& value = *it;
+			c.push_back(value);
+		}
+	}
+
+
+	template <typename T, typename Container>
+	inline bool queue<T, Container>::empty() const
+	{
+		return c.empty();
+	}
+
+
+	template <typename T, typename Container>
+	inline typename queue<T, Container>::size_type
+	queue<T, Container>::size() const
+	{
+		return c.size();
+	}
+
+
+	template <typename T, typename Container>
+	inline typename queue<T, Container>::reference
+	queue<T, Container>::front()
+	{
+		return c.front();
+	}
+
+
+	template <typename T, typename Container>
+	inline typename queue<T, Container>::const_reference
+	queue<T, Container>::front() const
+	{
+		return c.front();
+	}
+
+
+	template <typename T, typename Container>
+	inline typename queue<T, Container>::reference
+	queue<T, Container>::back()
+	{
+		return c.back();
+	}
+
+
+	template <typename T, typename Container>
+	inline typename queue<T, Container>::const_reference
+	queue<T, Container>::back() const
+	{
+		return c.back();
+	}
+
+
+	template <typename T, typename Container>
+	inline void queue<T, Container>::push(const value_type& value)
+	{
+		c.push_back(const_cast<value_type&>(value)); // const_cast so that intrusive_list can work. We may revisit this.
+	}
+
+
+	template <typename T, typename Container>
+	inline void queue<T, Container>::push(value_type&& x) 
+	{
+		c.push_back(eastl::move(x));
+	}
+
+
+	template <typename T, typename Container>
+	template <class... Args> 
+	inline void queue<T, Container>::emplace_back(Args&&... args)
+	{
+		emplace(eastl::forward<Args>(args)...);
+	}
+
+	template <typename T, typename Container>
+	template <class... Args> 
+	inline decltype(auto) queue<T, Container>::emplace(Args&&... args)
+	{
+		return c.emplace_back(eastl::forward<Args>(args)...);
+	}
+
+
+	template <typename T, typename Container>
+	inline void queue<T, Container>::pop()
+	{
+		c.pop_front();
+	}
+
+
+	template <typename T, typename Container>
+	inline typename queue<T, Container>::container_type&
+	queue<T, Container>::get_container()
+	{
+		return c;
+	}
+
+
+	template <typename T, typename Container>
+	inline const typename queue<T, Container>::container_type&
+	queue<T, Container>::get_container() const
+	{
+		return c;
+	}
+
+
+	template <typename T, typename Container>
+	void queue<T, Container>::swap(this_type& x) EA_NOEXCEPT_IF((eastl::is_nothrow_swappable<this_type::container_type>::value))
+	{
+		using eastl::swap;
+		swap(c, x.c);
+	}
+
+
+	template <typename T, typename Container>
+	bool queue<T, Container>::validate() const
+	{
+		return c.validate();
+	}
+
+
+	///////////////////////////////////////////////////////////////////////
+	// global operators
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T, typename Container>
+	inline bool operator==(const queue<T, Container>& a, const queue<T, Container>& b)
+	{
+		return (a.c == b.c);
+	}
+
+	template <typename T, typename Container>
+	inline bool operator!=(const queue<T, Container>& a, const queue<T, Container>& b)
+	{
+		return !(a.c == b.c);
+	}
+
+	template <typename T, typename Container>
+	inline bool operator<(const queue<T, Container>& a, const queue<T, Container>& b)
+	{
+		return (a.c < b.c);
+	}
+
+	template <typename T, typename Container>
+	inline bool operator>(const queue<T, Container>& a, const queue<T, Container>& b)
+	{
+		return (b.c < a.c);
+	}
+
+	template <typename T, typename Container>
+	inline bool operator<=(const queue<T, Container>& a, const queue<T, Container>& b)
+	{
+		return !(b.c < a.c);
+	}
+
+	template <typename T, typename Container>
+	inline bool operator>=(const queue<T, Container>& a, const queue<T, Container>& b)
+	{
+		return !(a.c < b.c);
+	}
+
+
+	template <typename T, typename Container>
+	inline void swap(queue<T, Container>& a, queue<T, Container>& b) EA_NOEXCEPT_IF((eastl::is_nothrow_swappable<typename queue<T, Container>::container_type>::value)) // EDG has a bug and won't let us use Container in this noexcept statement
+	{
+		a.swap(b);
+	}
+
+
+} // namespace eastl
+
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/random.h b/libkram/eastl/include/EASTL/random.h
new file mode 100644
index 00000000..ca3e20b0
--- /dev/null
+++ b/libkram/eastl/include/EASTL/random.h
@@ -0,0 +1,254 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// This file defines random number generation like the std C++ <random> header. 
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_RANDOM_H
+#define EASTL_RANDOM_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/numeric_limits.h>
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// min/max workaround
+//
+// MSVC++ has #defines for min/max which collide with the min/max algorithm
+// declarations. The following may still not completely resolve some kinds of
+// problems with MSVC++ #defines, though it deals with most cases in production
+// game code.
+//
+#if EASTL_NOMINMAX
+	#ifdef min
+		#undef min
+	#endif
+	#ifdef max
+		#undef max
+	#endif
+#endif
+
+
+namespace eastl
+{
+
+	// Implements a uniform distribution of values generated by a Generator, 
+	// where Generator is typically a random or pseudo-random number generator.
+	// Note that the min/max range for this class is inclusive, so if you want 
+	// random integers 0, 1, 2, and 3, then you need to init this class with (0, 3)
+	// and not (0, 4).
+	// See the C++11 Standard, section 26.5.1.6
+	template<class IntType = int>
+	class uniform_int_distribution
+	{
+		static_assert(eastl::is_integral<IntType>::value, "uniform_int_distribution: IntType must be integral.");
+
+	public:
+		typedef IntType result_type;
+
+		// For uniform_int_distribution, param_type defines simply the min and max values of
+		// the range returned by operator(). It may mean something else for other distribution types.
+		struct param_type
+		{
+			explicit param_type(IntType a = 0, IntType b = eastl::numeric_limits<IntType>::max());
+
+			result_type a() const;
+			result_type b() const;
+
+			bool operator==(const param_type& x) { return (x.mA == mA) && (x.mB == mB); }
+			bool operator!=(const param_type& x) { return (x.mA != mA) || (x.mB != mB); }
+
+		protected:
+			IntType mA;
+			IntType mB;
+		};
+
+		uniform_int_distribution(IntType a = 0, IntType b = eastl::numeric_limits<IntType>::max());
+		uniform_int_distribution(const param_type& params);
+
+		void reset();
+
+		template<class Generator>
+		result_type operator()(Generator& g);
+
+		template<class Generator>
+		result_type operator()(Generator& g, const param_type& params);
+
+		result_type a() const;
+		result_type b() const;
+
+		param_type param() const;
+		void param(const param_type& params);
+
+		result_type min() const;
+		result_type max() const;
+
+		protected:
+			param_type mParam;
+	};
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// uniform_int_distribution
+	///////////////////////////////////////////////////////////////////////
+
+	template<class IntType>
+	inline uniform_int_distribution<IntType>::param_type::param_type(IntType aValue, IntType bValue)
+	  : mA(aValue), mB(bValue)
+	{
+		EASTL_ASSERT(aValue <= bValue);
+	}
+
+	template<class IntType>
+	inline typename uniform_int_distribution<IntType>::result_type 
+	uniform_int_distribution<IntType>::param_type::a() const
+	{
+		return mA;
+	}
+
+	template<class IntType>
+	inline typename uniform_int_distribution<IntType>::result_type 
+	uniform_int_distribution<IntType>::param_type::b() const
+	{
+		return mB;
+	}
+
+
+
+	template<class IntType>
+	inline uniform_int_distribution<IntType>::uniform_int_distribution(IntType aValue, IntType bValue) 
+	  : mParam(aValue, bValue)
+	{
+		// Nothing more to do.
+	}
+
+	template<class IntType>
+	inline uniform_int_distribution<IntType>::uniform_int_distribution(const param_type& params) 
+	  : mParam(params)
+	{
+		// Nothing more to do.
+	}
+
+	template<class IntType>
+	void uniform_int_distribution<IntType>::reset()
+	{
+		// Nothing to do.
+	}
+
+	template<class IntType>
+	template<class Generator>
+	inline typename uniform_int_distribution<IntType>::result_type 
+	uniform_int_distribution<IntType>::operator()(Generator& g)
+	{
+		return operator()(g, mParam);
+	}
+
+	template<class IntType>
+	template<class Generator>
+	inline typename uniform_int_distribution<IntType>::result_type 
+	uniform_int_distribution<IntType>::operator()(Generator& g, const param_type& params)
+	{
+		// This is a tricky function to implement in a generic way for all integral types.
+		// The solution will involve handling the case of signed types and 64 bit types,
+		// probably in a way that uses template metaprogramming to deal with signed ranges.
+
+		// Temporary solution while we research a full solution. It supports only uint8_t,
+		// uint16_t, and uint32_t uniform_int_distribution types.
+		static_assert(eastl::is_unsigned<result_type>::value && (sizeof(result_type) <= 4), "uniform_int_distribution currently supports only uint8_t, uint16_t, uint32_t.");
+
+		result_type v = g();    // Generates a value in the range of (numeric_limits<result_type>::min(), numeric_limits<result_type>::max()).
+		result_type r = (result_type)((v * (uint64_t)((params.b() - params.a()) + 1)) >> (sizeof(result_type) * 8)); // +1 because ranges are inclusive.
+		return params.a() + r;
+	}
+
+	template<class IntType>
+	inline typename uniform_int_distribution<IntType>::result_type
+	uniform_int_distribution<IntType>::a() const
+	{
+		return mParam.mA;
+	}
+
+	template<class IntType>
+	inline typename uniform_int_distribution<IntType>::result_type
+	uniform_int_distribution<IntType>::b() const
+	{
+		return mParam.mB;
+	}
+
+
+	template<class IntType>
+	inline typename uniform_int_distribution<IntType>::param_type
+	uniform_int_distribution<IntType>::param() const
+	{
+		return mParam;
+	}
+
+	template<class IntType>
+	inline void
+	uniform_int_distribution<IntType>::param(const param_type& params)
+	{
+		mParam = params;
+	}
+
+	template<class IntType>
+	inline typename uniform_int_distribution<IntType>::result_type
+	uniform_int_distribution<IntType>::min() const
+	{
+		return mParam.mA;
+	}
+
+	template<class IntType>
+	inline typename uniform_int_distribution<IntType>::result_type
+	uniform_int_distribution<IntType>::max() const
+	{
+		return mParam.mB;
+	}
+
+
+
+	template<class ResultType>
+	inline bool operator==(const uniform_int_distribution<ResultType>& lhs,
+						   const uniform_int_distribution<ResultType>& rhs)
+	{
+		return (lhs.param() == rhs.param());
+	}
+
+	template<class ResultType>
+	inline bool operator!=(const uniform_int_distribution<ResultType>& lhs,
+						   const uniform_int_distribution<ResultType>& rhs)
+	{
+		return (lhs.param() != rhs.param());
+	}
+
+
+	// EASTL doesn't currently implement IO stream-related functionality.
+	// It may be useful to forward declare these templates and let the user implement them in the meantime.
+	//
+	// template<class CharT, class Traits, class ResultType>
+	// eastl::basic_ostream<CharT, Traits>& operator<<(eastl::basic_ostream<CharT, Traits>& os, const uniform_int_distribution& uid);
+	//
+	// template<class CharT, class Traits, class ResultType>
+	// eastl::basic_istream<CharT, Traits>& operator>>(eastl::basic_istream<CharT, Traits>& is, uniform_int_distribution& uid);
+
+
+} // namespace eastl
+
+
+#endif // Header include guard
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/ratio.h b/libkram/eastl/include/EASTL/ratio.h
new file mode 100644
index 00000000..da1a7b10
--- /dev/null
+++ b/libkram/eastl/include/EASTL/ratio.h
@@ -0,0 +1,320 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+
+///////////////////////////////////////////////////////////////////////////////
+// Implements the class template eastl::ratio that provides compile-time
+// rational arithmetic support. Each instantiation of this template exactly
+// represents any finite rational number as long as its numerator Num and
+// denominator Denom are representable as compile-time constants of type
+// intmax_t. In addition, Denom may not be zero and may not be equal to the most
+// negative value. Both numerator and denominator are automatically reduced to
+// the lowest terms.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_RATIO_H
+#define EASTL_RATIO_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once 
+#endif
+
+#include <EABase/eabase.h>
+
+
+//////////////////////////////////////////////////////////////////////////////
+// namespace eastl 
+// {
+// 	template <intmax_t N, intmax_t D = 1>
+// 	class ratio
+// 	{
+// 	public:
+// 		static constexpr intmax_t num;
+// 		static constexpr intmax_t den;
+// 		typedef ratio<num, den> type;
+// 	};
+// 
+// 	// ratio arithmetic
+// 	template <class R1, class R2> using ratio_add      = ...;
+// 	template <class R1, class R2> using ratio_subtract = ...;
+// 	template <class R1, class R2> using ratio_multiply = ...;
+// 	template <class R1, class R2> using ratio_divide   = ...;
+// 
+// 	// ratio comparison
+// 	template <class R1, class R2> struct ratio_equal;
+// 	template <class R1, class R2> struct ratio_not_equal;
+// 	template <class R1, class R2> struct ratio_less;
+// 	template <class R1, class R2> struct ratio_less_equal;
+// 	template <class R1, class R2> struct ratio_greater;
+// 	template <class R1, class R2> struct ratio_greater_equal;
+// 
+// 	// convenience SI typedefs
+// 	typedef ratio<1, 1000000000000000000000000> yocto;  // not supported
+// 	typedef ratio<1,    1000000000000000000000> zepto;  // not supported
+// 	typedef ratio<1,       1000000000000000000> atto;
+// 	typedef ratio<1,          1000000000000000> femto;
+// 	typedef ratio<1,             1000000000000> pico;
+// 	typedef ratio<1,                1000000000> nano;
+// 	typedef ratio<1,                   1000000> micro;
+// 	typedef ratio<1,                      1000> milli;
+// 	typedef ratio<1,                       100> centi;
+// 	typedef ratio<1,                        10> deci;
+// 	typedef ratio<                       10, 1> deca;
+// 	typedef ratio<                      100, 1> hecto;
+// 	typedef ratio<                     1000, 1> kilo;
+// 	typedef ratio<                  1000000, 1> mega;
+// 	typedef ratio<               1000000000, 1> giga;
+// 	typedef ratio<            1000000000000, 1> tera;
+// 	typedef ratio<         1000000000000000, 1> peta;
+// 	typedef ratio<      1000000000000000000, 1> exa;
+// 	typedef ratio<   1000000000000000000000, 1> zetta;  // not supported
+// 	typedef ratio<1000000000000000000000000, 1> yotta;  // not supported
+// }
+//////////////////////////////////////////////////////////////////////////////
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/type_traits.h>
+
+
+namespace eastl 
+{ 
+	///////////////////////////////////////////////////////////////////////
+	// compile-time overflow helpers
+	///////////////////////////////////////////////////////////////////////
+	#define EASTL_RATIO_ABS(x)	        		((x) < 0 ? -(x) : (x))
+
+	template <intmax_t X, intmax_t Y>
+	struct AdditionOverFlow
+	{
+		static const bool c1 = (X <= 0 && 0 <= Y) || (Y < 0 && 0 < X);   // True if digits do not have the same sign. 
+		static const bool c2 = EASTL_RATIO_ABS(Y) <= INTMAX_MAX - EASTL_RATIO_ABS(X);
+		static const bool value = c1 || c2;
+	};
+
+    template <intmax_t X, intmax_t Y>
+    struct MultiplyOverFlow
+    {
+	    static const bool value = (EASTL_RATIO_ABS(X) <= (INTMAX_MAX / EASTL_RATIO_ABS(Y)));
+    };
+
+
+    ///////////////////////////////////////////////////////////////////////
+	// ratio (C++ Standard: 20.11.3)
+	///////////////////////////////////////////////////////////////////////
+	template <intmax_t N = 0, intmax_t D = 1>
+	class ratio 
+	{
+	public:
+		static EA_CONSTEXPR_OR_CONST intmax_t num = N;
+		static EA_CONSTEXPR_OR_CONST intmax_t den = D;
+		typedef ratio<num, den> type;
+	};
+
+	namespace Internal
+	{
+		// gcd -- implementation based on euclid's algorithm
+		template <intmax_t X, intmax_t Y> struct gcd { static const intmax_t value = gcd<Y, X % Y>::value; };
+		template <intmax_t X> struct gcd<X, 0>       { static const intmax_t value = X; };
+		template <> struct gcd<0, 0>                 { static const intmax_t value = 1; };
+
+		// lcm
+		template<intmax_t X, intmax_t Y>
+		struct lcm { static const intmax_t value = (X * (Y / gcd<X,Y>::value)); };
+
+		// ct_add
+		template <intmax_t X, intmax_t Y>
+		struct ct_add 
+		{ 
+			static_assert(AdditionOverFlow<X,Y>::value, "compile-time addition overflow"); 
+			static const intmax_t value = X + Y; 
+		};
+
+		// ct_sub
+        template <intmax_t X, intmax_t Y>
+        struct ct_sub
+        {
+			static_assert(AdditionOverFlow<X,-Y>::value, "compile-time addition overflow"); 
+	        static const intmax_t value = X - Y;
+        };
+
+        // ct_multi
+        template <intmax_t X, intmax_t Y>
+        struct ct_multi
+        {
+			static_assert(MultiplyOverFlow<X,Y>::value, "compile-time multiply overflow"); 
+	        static const intmax_t value = X * Y;
+        };
+
+        // ct_simplify
+		template <class R1>
+		struct ct_simplify
+		{ 
+			static const intmax_t divisor = Internal::gcd<R1::num, R1::den>::value; 
+			static const intmax_t num = R1::num / divisor;
+			static const intmax_t den = R1::den / divisor; 
+
+			typedef ratio<num, den> ratio_type;
+			typedef ct_simplify<R1> this_type;
+		};
+
+		#if EASTL_VARIABLE_TEMPLATES_ENABLED
+			template <intmax_t N1, intmax_t N2> intmax_t ct_add_v   = ct_add<N1, N2>::value;
+			template <intmax_t N1, intmax_t N2> intmax_t ct_multi_v = ct_multi<N1, N2>::value;
+			template <class R1, class R2> R2 ct_simplify_t          = ct_simplify<R1>::ratio_type;
+		#else
+			template <intmax_t N1, intmax_t N2> struct ct_add_v   : public ct_add<N1, N2>::value {};
+			template <intmax_t N1, intmax_t N2> struct ct_multi_v : public ct_multi<N1, N2>::value {};
+			template <class R1> struct ct_simplify_t 			  : public ct_simplify<R1>::ratio_type {};
+		#endif
+
+		///////////////////////////////////////////////////////////////////////
+		// ratio_add
+		///////////////////////////////////////////////////////////////////////
+		template <class R1, class R2>
+		struct ratio_add
+		{
+			typedef typename ct_simplify
+			<
+				typename ratio
+				<
+					ct_add
+					<
+						ct_multi<R1::num, R2::den>::value, 
+						ct_multi<R2::num, R1::den>::value
+					>::value,							
+					ct_multi<R1::den, R2::den>::value  
+				>::type
+			>::ratio_type type;
+		};
+
+		///////////////////////////////////////////////////////////////////////
+		// ratio_subtract
+		///////////////////////////////////////////////////////////////////////
+		template <class R1, class R2>
+		struct ratio_subtract
+		{
+			typedef typename ct_simplify
+			<
+				typename ratio
+				<
+					ct_sub
+					<
+						ct_multi<R1::num, R2::den>::value, 
+						ct_multi<R2::num, R1::den>::value
+					>::value,							
+					ct_multi<R1::den, R2::den>::value  
+				>::type
+			>::ratio_type type;
+		};
+
+		///////////////////////////////////////////////////////////////////////
+		// ratio_multiply
+		///////////////////////////////////////////////////////////////////////
+		template <class R1, class R2>
+		struct ratio_multiply
+		{
+			typedef typename ct_simplify
+			<
+				typename ratio
+				<
+					ct_multi<R1::num, R2::num>::value, 
+					ct_multi<R1::den, R2::den>::value  
+				>::type
+			>::ratio_type type;
+		};
+
+		///////////////////////////////////////////////////////////////////////
+		// ratio_divide
+		///////////////////////////////////////////////////////////////////////
+		template <class R1, class R2>
+		struct ratio_divide
+		{
+			typedef typename ct_simplify
+			<
+				typename ratio
+				<
+					ct_multi<R1::num, R2::den>::value, 
+					ct_multi<R1::den, R2::num>::value  
+				>::type
+			>::ratio_type type;
+		};
+
+		///////////////////////////////////////////////////////////////////////
+		// ratio_equal
+		///////////////////////////////////////////////////////////////////////
+		template <class R1, class R2>
+		struct ratio_equal
+		{
+			typedef ct_simplify<R1> sr1_t;
+			typedef ct_simplify<R2> sr2_t;
+
+	        static const bool value = (sr1_t::num == sr2_t::num) && (sr1_t::den == sr2_t::den);
+        };
+
+		///////////////////////////////////////////////////////////////////////
+		// ratio_less
+		///////////////////////////////////////////////////////////////////////
+		template <class R1, class R2>
+		struct ratio_less
+		{
+	        static const bool value = (R1::num * R2::den) < (R2::num * R1::den); 
+        };
+	} // namespace Internal
+
+
+	///////////////////////////////////////////////////////////////////////
+	// ratio arithmetic (C++ Standard: 20.11.4)
+	///////////////////////////////////////////////////////////////////////
+	#if defined(EA_COMPILER_NO_TEMPLATE_ALIASES) || (defined(_MSC_VER) && (_MSC_VER < 1900))  // prior to VS2015
+		template <class R1, class R2> struct ratio_add      : public Internal::ratio_add<R1, R2>::type {};
+		template <class R1, class R2> struct ratio_subtract : public Internal::ratio_subtract<R1, R2>::type {};
+		template <class R1, class R2> struct ratio_multiply : public Internal::ratio_multiply<R1, R2>::type {};
+		template <class R1, class R2> struct ratio_divide   : public Internal::ratio_divide<R1, R2>::type {};
+	#else
+		template <class R1, class R2> using ratio_add      = typename Internal::ratio_add<R1, R2>::type;
+		template <class R1, class R2> using ratio_subtract = typename Internal::ratio_subtract<R1, R2>::type;
+		template <class R1, class R2> using ratio_multiply = typename Internal::ratio_multiply<R1, R2>::type;
+		template <class R1, class R2> using ratio_divide   = typename Internal::ratio_divide<R1, R2>::type;
+	#endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// ratio comparison (C++ Standard: 20.11.5)
+	///////////////////////////////////////////////////////////////////////
+	template <class R1, class R2> struct ratio_equal         : public integral_constant<bool, Internal::ratio_equal<R1, R2>::value> {};
+	template <class R1, class R2> struct ratio_not_equal     : public integral_constant<bool, !ratio_equal<R1, R2>::value> {};
+	template <class R1, class R2> struct ratio_less          : public integral_constant<bool, Internal::ratio_less<R1, R2>::value> {};
+	template <class R1, class R2> struct ratio_less_equal    : public integral_constant<bool, !ratio_less<R2, R1>::value> {};
+	template <class R1, class R2> struct ratio_greater       : public integral_constant<bool, ratio_less<R2, R1>::value> {};
+	template <class R1, class R2> struct ratio_greater_equal : public integral_constant<bool, !ratio_less<R1, R2>::value> {};
+
+
+    ///////////////////////////////////////////////////////////////////////
+    // convenience SI typedefs (C++ Standard: 20.11.6)
+	///////////////////////////////////////////////////////////////////////
+	// typedef ratio<1, 1000000000000000000000000> yocto;  // not supported, too big for intmax_t
+	// typedef ratio<1, 1000000000000000000000   > zepto;  // not supported, too big for intmax_t
+	typedef ratio<1, 1000000000000000000      > atto;
+	typedef ratio<1, 1000000000000000         > femto;
+	typedef ratio<1, 1000000000000            > pico;
+	typedef ratio<1, 1000000000               > nano;
+	typedef ratio<1, 1000000                  > micro;
+	typedef ratio<1, 1000                     > milli;
+	typedef ratio<1, 100                      > centi;
+	typedef ratio<1, 10                       > deci;
+	typedef ratio<10, 1                       > deca;
+	typedef ratio<100, 1                      > hecto;
+	typedef ratio<1000, 1                     > kilo;
+	typedef ratio<1000000, 1                  > mega;
+	typedef ratio<1000000000, 1               > giga;
+	typedef ratio<1000000000000, 1            > tera;
+	typedef ratio<1000000000000000, 1         > peta;
+	typedef ratio<1000000000000000000, 1      > exa;
+	// typedef ratio<1000000000000000000000, 1   > zetta;  // not supported, too big for intmax_t
+	// typedef ratio<1000000000000000000000000, 1> yotta;  // not supported, too big for intmax_t
+}
+
+#endif // EASTL_RATIO_H
diff --git a/libkram/eastl/include/EASTL/safe_ptr.h b/libkram/eastl/include/EASTL/safe_ptr.h
new file mode 100644
index 00000000..344ded8b
--- /dev/null
+++ b/libkram/eastl/include/EASTL/safe_ptr.h
@@ -0,0 +1,485 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_SAFEPTR_H
+#define EASTL_SAFEPTR_H
+
+
+#include <EASTL/internal/config.h>
+#include <stddef.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+	class safe_ptr_base;
+
+
+	/// safe_object
+	///
+	/// In order for a class to be the template argument for safe_ptr, 
+	/// it must derive from safe_object.
+	///
+	/// Example usage:
+	///     class RandomLifetimeObject : public safe_object
+	///     {
+	///     public:
+	///         RandomLifetimeObject();
+	///         Method();
+	///         ...
+	///     };
+	///
+	class safe_object
+	{
+	public:
+		bool is_unreferenced() const;                 /// Returns true if there are zero references (by a smart_ptr) to this object (mpSafePtrList is NULL).
+		bool has_unique_reference() const;            /// Returns true if there is at most one reference (by a smart_ptr) to us.
+
+	protected:
+		safe_object();
+	   ~safe_object();
+
+		void clear_references();                      /// Forcibly removes any references (by smart_ptrs) to this object. All safe_ptr mpObject values are set to NULL. 
+
+	private:
+		friend class safe_ptr_base;
+
+		void add(safe_ptr_base* pBase) const;         /// Link pBase into my list of safe pointers.
+		void remove(safe_ptr_base* pBase) const;      /// Unlink pBase from my list of safe pointers.
+
+		mutable safe_ptr_base* mpSafePtrList;
+
+	public:
+		// Deprecated, as its name is misleading:
+		bool has_references() const;                  /// Returns true if there is at most one reference (by a smart_ptr) to us.
+	};
+
+
+
+	/// safe_ptr_base
+	///
+	/// This is a non-templated base class for safe_ptr<T>, not for direct use by the user.
+	///
+	class safe_ptr_base
+	{
+	public:
+		bool unique() const;                    /// Returns true if there are no other smart pointers pointing to our object except us. True if mpObject is NUll
+		bool empty() const;                     /// Returns true if mpObject is NULL.
+		void reset(const safe_object* pObject); /// Make this point to pObject and enlist.
+		void reset();                           /// Make this point to NULL and delist.
+
+	protected:
+		// The following are protected and must be overridden in the safe_ptr<T> subclass.
+		safe_ptr_base();
+		safe_ptr_base(const safe_object* pObject);
+		safe_ptr_base(const safe_ptr_base& safePtrBase);
+	   ~safe_ptr_base();
+
+	protected:
+		const safe_object* mpObject;
+
+	private:
+		friend class safe_object;
+
+		safe_ptr_base& operator=(const safe_ptr_base& safePtrBase);
+
+		safe_ptr_base* mpNext;
+	};
+
+
+	/// safe_ptr
+	///
+	/// safe_ptr is an automatic, lightweight solution to the dangling pointer problem.
+	/// This class is an alternative to weak_ptr which has the primary benefit of not 
+	/// allocating memory at the primary cost of being a tad slower and thread-unsafe.
+	///
+	/// During normal usage, safe_ptr<T> behaves exactly as a T*. When the
+	/// raw pointer referenced by the safe_ptr is deleted, all of the SafePtrs
+	/// for the raw pointer are set to NULL.
+	///
+	/// This works by making the raw objects derive from the class safe_object,
+	/// which maintains a linked-list of the Safe pointers that reference it.
+	/// When a safe_object is destroyed, it walks its linked list, setting the
+	/// object reference for each of its SafePtrs to NULL.
+	///
+	/// The overhead for this is light - a single pointer is added to the
+	/// size of the pointed to object, and a safePtr is the size of a raw 
+	/// pointer plus one list pointer.
+	///
+	/// This class is not thread-safe. In particular, manipulation of safe_ptr
+	/// objects that refer to the same underlying object cannot be done safely
+	/// from multiple threads. safe_ptr objects that are unrelated can be used
+	/// safely from multiple threads.
+	///
+	/// Example usage:
+	///     class RandomLifetimeObject : public safe_object
+	///     {
+	///     public:
+	///         RandomLifetimeObject();
+	///         Method();
+	///         ...
+	///     };
+	///     
+	///     safe_ptr<RandomLifetimeObject> pSafePtr(new RandomLifetimeObject);
+	///     safe_ptr<RandomLifetimeObject> pSafePtrCopy = pSafePtr;
+	///
+	///     pSafePtr->Method();
+	///     delete pSafePtr;
+	///     At this point, pSafePtrCopy evaluates to NULL.
+	///
+	template<class T>
+	class safe_ptr : public safe_ptr_base
+	{
+	public:
+		typedef T           value_type;
+		typedef safe_ptr<T> this_type;
+
+	public:
+		safe_ptr();                                         /// Default constructor.
+		explicit safe_ptr(T* pObject);                      /// Construct a safeptr from a naked pointer.
+		safe_ptr(const this_type& safePtr);                 /// Copy constructor.
+		//~safe_ptr() {}                                    /// No need to implement this; the compiler-generated destructor is OK.
+
+		this_type& operator=(const this_type& safePtr);     /// Assignment operator.
+		this_type& operator=(T* const pObject);             /// Assign this to a naked pointer.
+
+		bool operator==(const this_type& safePtr) const;    /// Returns true if safePtr points to the same object as this.
+
+	public:
+		T* get() const;                                     /// Get the naked pointer from this safe ptr.
+		operator T*() const;                                /// Implicit safe_ptr<T> -> T* conversion operator.
+		T* operator->() const;                              /// Member operator.
+		T& operator*() const;                               /// Dereference operator.
+		bool operator!() const;                             /// Boolean negation operator.
+
+		typedef T* (this_type::*bool_)() const;             /// Allows for a more portable version of testing an instance of this class as a bool.
+		operator bool_() const                              // A bug in the CodeWarrior compiler forces us to implement this inline instead of below.
+		{
+			if(mpObject)
+				return &this_type::get;
+			return NULL;
+		}
+	};
+
+} // namespace eastl
+
+
+
+
+
+
+/////////////////////////////////////////////////////////////////////////
+// Inlines
+/////////////////////////////////////////////////////////////////////////
+
+
+///////////////////////////////////////////////////////////////////////////////
+// safe_object
+///////////////////////////////////////////////////////////////////////////////
+
+inline eastl::safe_object::safe_object()
+  :  mpSafePtrList(0)
+{
+}
+
+
+inline bool eastl::safe_object::is_unreferenced() const
+{
+	return (mpSafePtrList == NULL);
+}
+
+
+inline void eastl::safe_object::clear_references()
+{
+	while(mpSafePtrList != NULL)
+	{
+		safe_ptr_base* const pNext = mpSafePtrList->mpNext;
+		mpSafePtrList->mpNext = NULL;
+		mpSafePtrList->mpObject = NULL;
+		mpSafePtrList = pNext;
+	}
+}
+
+
+inline eastl::safe_object::~safe_object()
+{
+	safe_ptr_base* pIter = mpSafePtrList;
+
+	while(pIter)
+	{
+		safe_ptr_base* const pNext = pIter->mpNext;
+		pIter->mpNext = NULL;
+		pIter->mpObject = NULL;
+		pIter = pNext;
+	}
+}
+
+
+inline void eastl::safe_object::add(safe_ptr_base* pBase) const
+{
+	pBase->mpNext = mpSafePtrList;
+	mpSafePtrList = pBase;
+}
+
+
+inline void eastl::safe_object::remove(safe_ptr_base* pBase) const
+{
+	// We have a singly-linked list (starting with mpSafePtrList) and need to 
+	// remove an element from within it.
+	if(pBase == mpSafePtrList)
+		mpSafePtrList = mpSafePtrList->mpNext;
+	else
+	{
+		for(safe_ptr_base *pPrev = mpSafePtrList, *pCurrent = mpSafePtrList->mpNext; 
+			pCurrent;
+			pPrev = pCurrent, pCurrent = pCurrent->mpNext)
+		{
+			if(pCurrent == pBase)
+			{
+				pPrev->mpNext = pCurrent->mpNext;
+				break;
+			}
+		}
+	}
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// safe_ptr_base
+///////////////////////////////////////////////////////////////////////////////
+
+inline eastl::safe_ptr_base::safe_ptr_base(const safe_ptr_base& safePtrBase)
+  : mpObject(safePtrBase.mpObject),
+	mpNext(NULL)    
+{
+	EASTL_ASSERT(this != &safePtrBase);
+
+	if(mpObject)
+		mpObject->add(this);
+}
+
+
+inline eastl::safe_ptr_base::safe_ptr_base()
+  : mpObject(NULL),
+	mpNext(NULL)
+{
+}
+
+
+inline eastl::safe_ptr_base::safe_ptr_base(const safe_object* pObject)
+  : mpObject(pObject),
+	mpNext(NULL)
+{
+	if(mpObject)
+		mpObject->add(this);
+}
+
+
+inline eastl::safe_ptr_base::~safe_ptr_base()
+{
+	if(mpObject)
+		mpObject->remove(this);
+}
+
+
+inline void eastl::safe_ptr_base::reset()
+{
+	if(mpObject)
+	{
+		mpObject->remove(this);
+		mpObject = NULL;
+	}
+}
+
+
+inline bool eastl::safe_ptr_base::empty() const
+{
+	return (mpObject == NULL);
+}
+
+
+inline void eastl::safe_ptr_base::reset(const safe_object* pNewObject)
+{
+	if(mpObject != pNewObject)
+	{
+		if(mpObject)
+			mpObject->remove(this);
+
+		mpObject = pNewObject;
+
+		if(mpObject)
+			mpObject->add(this);
+	}
+}
+
+
+inline bool eastl::safe_ptr_base::unique() const
+{
+	return (mpNext == NULL) && ((mpObject == NULL) || (mpObject->mpSafePtrList == this));
+}
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// safe_object
+///////////////////////////////////////////////////////////////////////////////
+
+
+// This function is defined here below safe_ptr_base because some compilers 
+// (GCC in particular) generate warnings about inline functions (e.g. unique below) 
+// being used before their inline implementations. 
+inline bool eastl::safe_object::has_unique_reference() const
+{
+	return mpSafePtrList ? mpSafePtrList->unique() : false;
+}
+
+// Deprecated:
+inline bool eastl::safe_object::has_references() const
+{
+	return mpSafePtrList ? mpSafePtrList->unique() : false;
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// safe_ptr<T>
+///////////////////////////////////////////////////////////////////////////////
+
+template<class T>
+inline eastl::safe_ptr<T>::safe_ptr() 
+  : safe_ptr_base()
+{
+}
+
+
+template<class T>
+inline eastl::safe_ptr<T>::safe_ptr(T* pObject)
+  : safe_ptr_base(pObject)
+{
+}
+
+
+template<class T>
+inline eastl::safe_ptr<T>::safe_ptr(const this_type& safePtr)
+  : safe_ptr_base(safePtr)
+{
+}
+
+
+template<class T>
+inline typename eastl::safe_ptr<T>::this_type& eastl::safe_ptr<T>::operator=(const this_type& safePtr)
+{
+	if(this != &safePtr)
+		reset(safePtr.mpObject);
+	return *this;
+}
+
+
+template<class T>
+inline typename eastl::safe_ptr<T>::this_type& eastl::safe_ptr<T>::operator=(T* const pObject)
+{
+	reset(pObject);
+	return *this;
+}
+
+
+template<class T>
+inline bool eastl::safe_ptr<T>::operator==(const this_type& rhs) const
+{
+	return (mpObject == rhs.mpObject);
+}
+
+
+template<class T>
+inline T* eastl::safe_ptr<T>::get() const
+{
+	return static_cast<T*>(const_cast<safe_object*>(mpObject));
+}
+
+
+template<class T>
+inline eastl::safe_ptr<T>::operator T*() const
+{
+	return static_cast<T*>(const_cast<safe_object*>(mpObject));
+}
+
+
+template<class T>
+inline T* eastl::safe_ptr<T>::operator->() const
+{
+	return static_cast<T*>(const_cast<safe_object*>(mpObject));
+}
+
+
+template<class T>
+inline T& eastl::safe_ptr<T>::operator*() const
+{
+	return *static_cast<T*>(const_cast<safe_object*>(mpObject));
+}
+
+
+template<class T>
+inline bool eastl::safe_ptr<T>::operator!() const
+{
+	return (mpObject == NULL);
+}
+
+// A bug in the CodeWarrior compiler forces us to implement this inline in the class instead of here.
+// template<class T>
+// inline eastl::safe_ptr<T>::operator bool_() const
+// {
+//     if(mpObject)
+//         return &this_type::get;
+//     return NULL;
+// }
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// global operators
+///////////////////////////////////////////////////////////////////////////////
+
+template<class T>
+inline bool operator==(const eastl::safe_ptr<T>& safePtr, const T* pObject)
+{
+	return (safePtr.get() == pObject);
+}
+
+
+template<class T>
+inline bool operator!=(const eastl::safe_ptr<T>& safePtr, const T* pObject)
+{
+	return (safePtr.get() != pObject);
+}
+
+
+template<class T>
+inline bool operator<(const eastl::safe_ptr<T>& safePtrA, const eastl::safe_ptr<T>& safePtrB)
+{
+	return (safePtrA.get() < safePtrB.get());
+}
+
+
+
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/scoped_array.h b/libkram/eastl/include/EASTL/scoped_array.h
new file mode 100644
index 00000000..c955dbaf
--- /dev/null
+++ b/libkram/eastl/include/EASTL/scoped_array.h
@@ -0,0 +1,237 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+
+///////////////////////////////////////////////////////////////////////////////
+// *** Note ***
+// *** This code is deprecated in favor of the C++11-conforming             *** 
+// *** eastl::unique_ptr template class found in <EASTL/unique_ptr.h>       *** 
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_SCOPED_ARRAY_H
+#define EASTL_SCOPED_ARRAY_H
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/internal/smart_ptr.h>   // Defines smart_array_deleter
+#include <stddef.h>                     // Definition of ptrdiff_t
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+
+	/// class scoped_array
+	///
+	/// A scoped_array is the same as scoped_ptr but for arrays. 
+	///
+	template <typename T, typename Deleter = smart_array_deleter<T> >
+	class scoped_array
+	{
+	protected:
+		/// this_type
+		/// This is an alias for scoped_array<T>, this class.
+		typedef scoped_array<T> this_type;
+
+		/// deleter_type
+		typedef Deleter deleter_type;
+
+		/// mpArray
+		/// The owned pointer. Points to an array of T.
+		T* mpArray;
+
+		/// scoped_array
+		/// This function is private in order to prevent copying, for safety.
+		scoped_array(const scoped_array&);
+
+		/// scoped_array
+		/// This function is private in order to prevent copying, for safety.
+		scoped_array& operator=(const scoped_array&);
+
+		/// scoped_ptr
+		/// This function is private in order to prevent copying, for safety.
+		scoped_array& operator=(T* pValue);
+
+	public:
+		typedef T element_type;
+
+		/// scoped_ptr
+		/// Construct a scoped_ptr from a pointer allocated via new.
+		/// Example usage:
+		///    scoped_array<int> ptr(new int[6]);
+		explicit scoped_array(T* pArray = NULL)
+			: mpArray(pArray) {}
+
+		/// ~scoped_array
+		/// Destroys the owned pointer. The destructors for each of the objects
+		/// in the owned array will be called. 
+		~scoped_array()
+		{
+			Deleter del;
+			del(mpArray);
+		}
+
+		/// reset
+		/// Deletes the owned pointer and takes ownership of the 
+		/// passed in pointer. If the passed in pointer is the same
+		/// as the owned pointer, nothing is done.
+		/// Example usage:
+		///    scoped_array<int> ptr(new int[6]);
+		///    ptr.reset(new int[7]);  // deletes int[6]
+		///    ptr.reset(NULL);        // deletes int[7]
+		void reset(T* pArray = NULL)
+		{
+			if(pArray != mpArray)
+			{
+				Deleter del;
+				del(mpArray);
+				mpArray = pArray;
+			}
+		}
+
+		/// detach
+		/// This simply forgets the owned pointer. It doesn't 
+		/// free it but rather assumes that the user does.
+		/// Example usage:
+		///    scoped_array<int> ptr(new int[6]);
+		///    int* pIntArray = ptr.get();
+		///    ptr.detach();
+		///    delete[] pIntArray;
+		T* detach()
+		{
+			T* const pTemp = mpArray;
+			mpArray = NULL;
+			return pTemp;
+		}
+
+		/// swap
+		/// Exchanges the owned pointer beween two scoped_array objects. 
+		void swap(this_type& scopedArray)
+		{
+			// std::swap(mpArray, scopedArray.mpArray); // Not used so that we can reduce a dependency.
+			T* const pArray     = scopedArray.mpArray;
+			scopedArray.mpArray = mpArray;
+			mpArray             = pArray;
+		}
+
+		/// operator[]
+		/// Returns a reference to the specified item in the owned pointer
+		/// array. 
+		/// Example usage:
+		///    scoped_array<int> ptr(new int[6]);
+		///    int x = ptr[2];
+		typename add_lvalue_reference<T>::type operator[](ptrdiff_t i) const
+		{
+			// assert(mpArray && (i >= 0));
+			return mpArray[i];
+		}
+
+		/// get
+		/// Returns the owned array pointer. 
+		/// Example usage:
+		///    struct X{ void DoSomething(); }; 
+		///    scoped_array<int> ptr(new X[8]);
+		///    X** ppX = ptr.get();
+		///    ppX[2]->DoSomething();
+		T* get() const
+		{
+			return mpArray;
+		}
+
+		/// Implicit operator bool
+		/// Allows for using a scoped_ptr as a boolean. 
+		/// Example usage:
+		///    scoped_array<int> ptr(new int[8]);
+		///    if(ptr)
+		///        ++ptr[2];
+		///     
+		/// Note that below we do not use operator bool(). The reason for this
+		/// is that booleans automatically convert up to short, int, float, etc.
+		/// The result is that this: if(scopedArray == 1) would yield true (bad).
+		typedef T* (this_type::*bool_)() const;
+		operator bool_() const
+		{
+			if(mpArray)
+				return &this_type::get;
+			return NULL;
+		}
+
+		/// operator!
+		/// This returns the opposite of operator bool; it returns true if 
+		/// the owned pointer is null. Some compilers require this and some don't.
+		///    scoped_array<int> ptr(new int(3));
+		///    if(!ptr)
+		///        assert(false);
+		bool operator!() const
+		{
+			return (mpArray == NULL);
+		}
+
+	}; // class scoped_array
+
+
+	/// unique_array
+	///
+	/// Example usage:
+	///    unique_array<int> uniqueIntArray;
+	/// Example usage:
+	///    UNIQUE_ARRAY_T(int, eastl::smart_ptr_deleter<int>) uniqueIntArray;
+	///
+	#if defined(EA_COMPILER_NO_TEMPLATE_ALIASES)
+		#define UNIQUE_ARRAY_T(T, Deleter) scoped_array<T, Deleter>
+	#else
+		template <typename T, typename Deleter = smart_ptr_deleter<T> >
+		using unique_array = scoped_array<T, Deleter>;
+		#define UNIQUE_ARRAY_T(T, Deleter) unique_array<T, Deleter>
+	#endif
+
+
+
+	/// scoped_array
+	/// returns scoped_array::get() via the input scoped_array. 
+	template <typename T, typename D>
+	inline T* get_pointer(const scoped_array<T, D>& scopedArray)
+	{
+		return scopedArray.get();
+	}
+
+
+	/// swap
+	/// Exchanges the owned pointer beween two scoped_array objects.
+	/// This non-member version is useful for compatibility of scoped_array
+	/// objects with the C++ Standard Library and other libraries.
+	template <typename T, typename D>
+	inline void swap(scoped_array<T, D>& scopedArray1, scoped_array<T, D>& scopedArray2)
+	{
+		scopedArray1.swap(scopedArray2);
+	}
+
+
+	/// operator<
+	/// Returns which scoped_array is 'less' than the other. Useful when storing
+	/// sorted containers of scoped_array objects.
+	template <typename T, typename D>
+	inline bool operator<(const scoped_array<T, D>& scopedArray1, const scoped_array<T, D>& scopedArray2)
+	{
+		return (scopedArray1.get() < scopedArray2.get()); // Alternatively use: std::less<T*>(scopedArray1.get(), scopedArray2.get());
+	}
+
+
+} // namespace eastl
+
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/scoped_ptr.h b/libkram/eastl/include/EASTL/scoped_ptr.h
new file mode 100644
index 00000000..3ba01daa
--- /dev/null
+++ b/libkram/eastl/include/EASTL/scoped_ptr.h
@@ -0,0 +1,256 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+
+///////////////////////////////////////////////////////////////////////////////
+// *** Note ***
+// *** This code is deprecated in favor of the C++11-conforming             *** 
+// *** eastl::unique_ptr template class found in <EASTL/unique_ptr.h>       *** 
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_SCOPED_PTR_H
+#define EASTL_SCOPED_PTR_H
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/internal/smart_ptr.h>   // Defines smart_ptr_deleter
+#include <stddef.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+	/// class scoped_ptr
+	///
+	/// This class is intended to be the same as the C++11 unique_ptr class, 
+	/// but was created before there was such a thing. 
+	///
+	/// This class implements a scoped_ptr template. This is a class which is 
+	/// similar to the C++ auto_ptr template, except that it prohibits copying 
+	/// of itself, for safety.
+	///
+	/// More specifically, the scoped_ptr class template stores a pointer to a 
+	/// dynamically allocated object.  The object pointed to is automatically 
+	/// deleted on destructor of scoped_ptr or can be manually deleted via the 
+	/// scopted_ptr::reset function. 
+	///
+	/// scoped_ptr cannot be used in C++ Standard Library containers; you'll need 
+	/// to use the shared_ptr template if you want to do this. The reason you can't
+	/// use scoped_ptr is that it prohibits copying. You can't (safely) use auto_ptr
+	/// in C++ Standard Library containers because copying of an auto_ptr will 
+	/// create a situation whereby objects are multiply freed.
+	/// 
+	/// scoped_ptr cannot be used with arrays of objects. The reason for this is
+	/// that it calls delete on the owned pointer and not delete[]. The latter
+	/// allows for the calling of the destructors for the objects of the owned pointer.
+	/// If you want to use scoped_ptr with a dynamically allocated array, use the
+	/// scoped_array function instead.
+	/// 
+	template <typename T, typename Deleter = smart_ptr_deleter<T> > 
+	class scoped_ptr
+	{
+	protected:
+		/// this_type
+		/// This is an alias for scoped_ptr<T>, this class.
+		typedef scoped_ptr<T> this_type;
+
+		/// deleter_type
+		typedef Deleter deleter_type;
+
+		/// mpValue
+		/// The owned pointer.
+		T* mpValue;
+
+		/// scoped_ptr
+		/// This function is private in order to prevent copying, for safety.
+		scoped_ptr(const scoped_ptr&);
+
+		/// scoped_ptr
+		/// This function is private in order to prevent copying, for safety.
+		scoped_ptr& operator=(const scoped_ptr&);
+
+		/// scoped_ptr
+		/// This function is private in order to prevent copying, for safety.
+		scoped_ptr& operator=(T* pValue);
+
+	public:
+		typedef T element_type;
+
+		/// scoped_ptr
+		/// Construct a scoped_ptr from a pointer allocated via new.
+		/// Example usage:
+		///    scoped_ptr<int> ptr(new int(3));
+		explicit scoped_ptr(T* pValue = NULL)
+			: mpValue(pValue) {}
+
+		/// ~scoped_ptr
+		/// Destroys the owned pointer. The destructor for the object
+		/// referred to by the owned pointer will be called.
+		~scoped_ptr()
+		{
+			Deleter del;
+			del(mpValue);
+		}
+
+		/// reset
+		/// Deletes the owned pointer and takes ownership of the 
+		/// passed in pointer. If the passed in pointer is the same
+		/// as the owned pointer, nothing is done.
+		/// Example usage:
+		///    scoped_ptr<int> ptr(new int(3));
+		///    ptr.reset(new int(4));  // deletes int(3)
+		///    ptr.reset(NULL);        // deletes int(4)
+		void reset(T* pValue = NULL)
+		{
+			if(pValue != mpValue)
+			{
+				Deleter del;
+				del(mpValue);
+				mpValue = pValue;
+			}
+		}
+
+		/// detach
+		/// This simply forgets the owned pointer. It doesn't 
+		/// free it but rather assumes that the user does.
+		/// Example usage:
+		///    scoped_ptr<int> ptr(new int(3));
+		///    int* pInt = ptr.detach();
+		///    delete pInt;
+		T* detach()
+		{
+			T* const pTemp = mpValue;
+			mpValue = NULL;
+			return pTemp;
+		}
+
+		/// swap
+		/// Exchanges the owned pointer beween two scoped_ptr objects. 
+		void swap(this_type& scopedPtr)
+		{
+			// std::swap(mpValue, scopedPtr.mpValue); // Not used so that we can reduce a dependency.
+			T* const pValue   = scopedPtr.mpValue;
+			scopedPtr.mpValue = mpValue;
+			mpValue           = pValue;
+		}
+
+		/// operator*
+		/// Returns the owner pointer dereferenced.
+		/// Example usage:
+		///    scoped_ptr<int> ptr(new int(3));
+		///    int x = *ptr;
+		typename add_lvalue_reference<T>::type operator*() const
+		{
+			// assert(mpValue);
+			return *mpValue;
+		}
+
+		/// operator->
+		/// Allows access to the owned pointer via operator->()
+		/// Example usage:
+		///    struct X{ void DoSomething(); }; 
+		///    scoped_ptr<int> ptr(new X);
+		///    ptr->DoSomething();
+		T* operator->() const
+		{
+			// assert(mpValue);
+			return mpValue;
+		}
+
+		/// get
+		/// Returns the owned pointer. Note that this class does 
+		/// not provide an operator T() function. This is because such
+		/// a thing (automatic conversion) is deemed unsafe.
+		/// Example usage:
+		///    struct X{ void DoSomething(); }; 
+		///    scoped_ptr<int> ptr(new X);
+		///    X* pX = ptr.get();
+		///    pX->DoSomething();
+		T* get() const
+		{
+			return mpValue;
+		}
+
+		/// Implicit operator bool
+		/// Allows for using a scoped_ptr as a boolean. 
+		/// Example usage:
+		///    scoped_ptr<int> ptr(new int(3));
+		///    if(ptr)
+		///        ++*ptr;
+		///
+		/// Note that below we do not use operator bool(). The reason for this
+		/// is that booleans automatically convert up to short, int, float, etc.
+		/// The result is that this: if(scopedPtr == 1) would yield true (bad).
+		typedef T* (this_type::*bool_)() const;
+		operator bool_() const
+		{
+			if(mpValue)
+				return &this_type::get;
+			return NULL;
+		}
+
+		/// operator!
+		/// This returns the opposite of operator bool; it returns true if 
+		/// the owned pointer is null. Some compilers require this and some don't.
+		///    scoped_ptr<int> ptr(new int(3));
+		///    if(!ptr)
+		///        assert(false);
+		bool operator!() const
+		{
+			return (mpValue == NULL);
+		}
+
+	}; // class scoped_ptr
+
+
+
+	/// get_pointer
+	/// returns scoped_ptr::get() via the input scoped_ptr. 
+	template <typename T, typename D>
+	inline T* get_pointer(const scoped_ptr<T, D>& scopedPtr)
+	{
+		return scopedPtr.get();
+	}
+
+
+	/// swap
+	/// Exchanges the owned pointer beween two scoped_ptr objects.
+	/// This non-member version is useful for compatibility of scoped_ptr
+	/// objects with the C++ Standard Library and other libraries.
+	template <typename T, typename D>
+	inline void swap(scoped_ptr<T, D>& scopedPtr1, scoped_ptr<T, D>& scopedPtr2)
+	{
+		scopedPtr1.swap(scopedPtr2);
+	}
+
+
+	/// operator<
+	/// Returns which scoped_ptr is 'less' than the other. Useful when storing
+	/// sorted containers of scoped_ptr objects.
+	template <typename T, typename D>
+	inline bool operator<(const scoped_ptr<T, D>& scopedPtr1, const scoped_ptr<T, D>& scopedPtr2)
+	{
+		return (scopedPtr1.get() < scopedPtr2.get()); // Alternatively use: std::less<T*>(scopedPtr1.get(), scopedPtr2.get());
+	}
+
+
+} // namespace eastl
+
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/segmented_vector.h b/libkram/eastl/include/EASTL/segmented_vector.h
new file mode 100644
index 00000000..d46a9428
--- /dev/null
+++ b/libkram/eastl/include/EASTL/segmented_vector.h
@@ -0,0 +1,523 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_SEGMENTED_VECTOR_H
+#define EASTL_SEGMENTED_VECTOR_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once 
+#endif
+
+#include <EASTL/internal/config.h>
+
+namespace eastl
+{
+	template<typename T, size_t Count, typename Allocator = EASTLAllocatorType>
+	class segment
+	{
+	public:
+		typedef eastl_size_t					size_type;
+		typedef segment<T, Count, Allocator>	this_type;
+		typedef T*								iterator;
+		typedef const T*						const_iterator;
+
+		const this_type*		next_segment() const;
+		this_type*				next_segment();
+
+		const_iterator			begin() const;
+		iterator				begin();
+
+		const_iterator			end() const;
+		iterator				end();
+
+	private:
+		static const uintptr_t	kIsLastSegment = 1 << 0;
+		uintptr_t				mPrev;
+
+		union
+		{
+			this_type*			mNext;
+			size_type			mSize;
+		};
+		T						mData[Count];
+		template<typename, size_t, typename> friend class segmented_vector;
+		template<typename, size_t, typename> friend struct segmented_vector_iterator;
+	};
+
+
+	template <typename T, size_t Count, typename Allocator = EASTLAllocatorType>
+	struct segmented_vector_iterator
+	{
+	public:
+        typedef segmented_vector_iterator<T, Count, Allocator>	this_type;
+		typedef segment<T, Count, Allocator>					segment_type;
+
+        T*						operator->() const;
+        T&						operator*() const;
+
+        this_type&				operator++();
+        this_type				operator++(int);
+		
+	public:
+		T*						mCurrent;
+		T*						mEnd;
+		segment_type*			mSegment;
+	};
+
+
+	template <typename T, size_t Count, typename Allocator = EASTLAllocatorType>
+	class segmented_vector
+	{
+	public:
+		typedef eastl_size_t												size_type;
+		typedef segmented_vector<T, Count, Allocator>						this_type;
+		typedef segment<T, Count, Allocator>								segment_type;
+        typedef Allocator													allocator_type;
+		typedef segmented_vector_iterator<const T, Count, Allocator>		const_iterator;
+		typedef segmented_vector_iterator<T, Count, Allocator>				iterator;
+
+
+								segmented_vector(const Allocator& allocator = Allocator());
+								~segmented_vector();
+
+		allocator_type&			get_allocator();
+
+		const segment_type*		first_segment() const;
+		segment_type*			first_segment();
+		const_iterator			begin() const;
+		iterator				begin();
+
+		const_iterator			end() const;
+		iterator				end();
+
+		size_type				size() const;
+		size_type				segment_count() const;
+		T&						front();
+		T&						back();
+
+		bool					empty() const;
+        void					clear();
+
+		T&						push_back();
+		T&						push_back(const T& value);
+		void*					push_back_uninitialized();
+
+		void					pop_back();
+
+		void					erase_unsorted(segment_type& segment, typename segment_type::iterator it);
+		iterator				erase_unsorted(const iterator& i);
+
+		void					swap(this_type& other);
+
+	protected:
+		segment_type*			DoAllocSegment(segment_type* prevSegment);
+		void*					DoPushBack();
+
+		allocator_type			mAllocator;
+		segment_type*			mFirstSegment;
+		segment_type*			mLastSegment;
+		size_type				mSegmentCount;
+	};
+
+
+	template<typename T, size_t Count, typename Allocator>
+	inline const segment<T, Count, Allocator>*
+	segment<T, Count, Allocator>::next_segment() const
+	{
+		if (mPrev & kIsLastSegment)
+			return 0;
+		else
+			return mNext;
+	}
+
+	template<typename T, size_t Count, typename Allocator>
+	inline segment<T, Count, Allocator>*
+	segment<T, Count, Allocator>::next_segment()
+	{
+		if (mPrev & kIsLastSegment)
+			return 0;
+		else
+			return mNext;
+	}
+
+	template<typename T, size_t Count, typename Allocator>
+	inline typename segment<T, Count, Allocator>::const_iterator
+	segment<T, Count, Allocator>::begin() const
+	{
+		return mData;
+	}
+
+	template<typename T, size_t Count, typename Allocator>
+	inline typename segment<T, Count, Allocator>::iterator
+	segment<T, Count, Allocator>::begin()
+	{
+		return mData;
+	}
+
+	template<typename T, size_t Count, typename Allocator>
+	inline typename segment<T, Count, Allocator>::const_iterator
+	segment<T, Count, Allocator>::end() const
+	{
+		if (mPrev & kIsLastSegment)
+			return mData + mSize;
+		else
+			return mData + Count;
+	}
+
+	template<typename T, size_t Count, typename Allocator>
+	inline typename segment<T, Count, Allocator>::iterator
+	segment<T, Count, Allocator>::end()
+	{
+		if (mPrev & kIsLastSegment)
+			return mData + mSize;
+		else
+			return mData + Count;
+	}
+
+	template<typename T, size_t Count, typename Allocator>
+    T*
+	segmented_vector_iterator<T, Count, Allocator>::operator->() const
+	{
+		return mCurrent;
+	}
+
+	template<typename T, size_t Count, typename Allocator>
+    T&
+	segmented_vector_iterator<T, Count, Allocator>::operator*() const
+	{
+		return *mCurrent;
+	}
+
+	template<typename T, size_t Count, typename Allocator>
+    segmented_vector_iterator<T, Count, Allocator>&
+	segmented_vector_iterator<T, Count, Allocator>::operator++()
+	{
+		++mCurrent;
+        if(EASTL_UNLIKELY(mCurrent == mEnd))
+        {
+			if (!(mSegment->mPrev & segment_type::kIsLastSegment))
+			{
+				mSegment = mSegment->mNext;
+				mCurrent = mSegment->begin();
+				mEnd = mSegment->end();
+			}
+			else
+				mCurrent = 0;
+        }
+        return *this;
+	}
+
+	template<typename T, size_t Count, typename Allocator>
+    segmented_vector_iterator<T, Count, Allocator>
+	segmented_vector_iterator<T, Count, Allocator>::operator++(int)
+	{
+		this_type i(*this);
+		operator++();
+		return i;
+	}
+
+
+	template <typename T, size_t Count, typename Allocator>
+	inline segmented_vector<T, Count, Allocator>::segmented_vector(const Allocator& allocator)
+	:	mAllocator(allocator)
+	,	mFirstSegment(0)
+	,	mLastSegment(0)
+	,	mSegmentCount(0)
+	{
+	}
+
+	template <typename T, size_t Count, typename Allocator>
+	inline segmented_vector<T, Count, Allocator>::~segmented_vector()
+	{
+		clear();
+	}
+
+	template <typename T, size_t Count, typename Allocator>
+	inline typename segmented_vector<T, Count, Allocator>::allocator_type&
+	segmented_vector<T, Count, Allocator>::get_allocator()
+	{
+		return mAllocator;
+	}
+
+	template <typename T, size_t Count, typename Allocator>
+	inline const typename segmented_vector<T, Count, Allocator>::segment_type*
+	segmented_vector<T, Count, Allocator>::first_segment() const
+	{
+		return mFirstSegment;
+	}
+
+	template <typename T, size_t Count, typename Allocator>
+	inline typename segmented_vector<T, Count, Allocator>::segment_type*
+	segmented_vector<T, Count, Allocator>::first_segment()
+	{
+		return mFirstSegment;
+	}
+
+	template <typename T, size_t Count, typename Allocator>
+	inline typename segmented_vector<T, Count, Allocator>::const_iterator
+	segmented_vector<T, Count, Allocator>::begin() const
+	{
+		iterator i;
+		i.mSegment = mFirstSegment;
+		if (mFirstSegment)
+		{
+			i.mCurrent = mFirstSegment->begin();
+			i.mEnd = mFirstSegment->end();
+		}
+		else
+			i.mCurrent = 0;
+		return (const_iterator&)i;
+	}
+
+	template <typename T, size_t Count, typename Allocator>
+	inline typename segmented_vector<T, Count, Allocator>::iterator
+	segmented_vector<T, Count, Allocator>::begin()
+	{
+		iterator i;
+		i.mSegment = mFirstSegment;
+		if (mFirstSegment)
+		{
+			i.mCurrent = mFirstSegment->begin();
+			i.mEnd = mFirstSegment->end();
+		}
+		else
+			i.mCurrent = 0;
+		return i;
+	}
+
+	template <typename T, size_t Count, typename Allocator>
+	inline typename segmented_vector<T, Count, Allocator>::const_iterator
+	segmented_vector<T, Count, Allocator>::end() const
+	{
+		iterator i;
+		i.mCurrent = 0;
+		return (const_iterator&)i;
+	}
+
+	template <typename T, size_t Count, typename Allocator>
+	inline typename segmented_vector<T, Count, Allocator>::iterator
+	segmented_vector<T, Count, Allocator>::end()
+	{
+		iterator i;
+		i.mCurrent = 0;
+		return i;
+	}
+
+	template <typename T, size_t Count, typename Allocator>
+	inline typename segmented_vector<T, Count, Allocator>::size_type
+	segmented_vector<T, Count, Allocator>::size() const
+	{
+		if (segment_type* segment = mLastSegment)
+			return (mSegmentCount-1)*Count + segment->mSize;
+		return 0;
+	}
+
+	template <typename T, size_t Count, typename Allocator>
+	inline typename segmented_vector<T, Count, Allocator>::size_type
+	segmented_vector<T, Count, Allocator>::segment_count() const
+	{
+		return mSegmentCount;
+	}
+
+	template <typename T, size_t Count, typename Allocator>
+	inline T&
+	segmented_vector<T, Count, Allocator>::front()
+	{
+		return mFirstSegment->mData[0];
+	}
+
+	template <typename T, size_t Count, typename Allocator>
+	inline T&
+	segmented_vector<T, Count, Allocator>::back()
+	{
+		segment_type* lastSegment = mLastSegment;
+		return lastSegment->mData[lastSegment->mSize-1];
+	}
+
+	template <typename T, size_t Count, typename Allocator>
+	inline bool
+	segmented_vector<T, Count, Allocator>::empty() const
+	{
+		return mFirstSegment == 0;
+	}
+
+	template <typename T, size_t Count, typename Allocator>
+	inline void
+	segmented_vector<T, Count, Allocator>::clear()
+	{
+		if (segment_type* segment = mFirstSegment)
+		{
+			while (segment != mLastSegment)
+			{
+				segment_type* nextSegment = segment->mNext;
+				segment->~segment_type();
+				EASTLFree(mAllocator, segment, sizeof(segment_type));
+				segment = nextSegment;
+			}
+			for (T* i = segment->mData, *e = segment->mData + segment->mSize; i!=e; ++i)
+				i->~T();
+			EASTLFree(mAllocator, segment, sizeof(segment_type));
+			mFirstSegment = 0;
+			mLastSegment = 0;
+			mSegmentCount = 0;
+		}
+	}
+
+	template <typename T, size_t Count, typename Allocator>
+	inline T&
+	segmented_vector<T, Count, Allocator>::push_back()
+	{
+		return *(new (DoPushBack()) T());
+	}
+
+	template <typename T, size_t Count, typename Allocator>
+	inline T&
+	segmented_vector<T, Count, Allocator>::push_back(const T& value)
+	{
+		return *(new (DoPushBack()) T(value));
+	}
+
+	template <typename T, size_t Count, typename Allocator>
+	inline void*
+	segmented_vector<T, Count, Allocator>::push_back_uninitialized()
+	{
+		return DoPushBack();
+	}
+
+	template <typename T, size_t Count, typename Allocator>
+	inline void
+	segmented_vector<T, Count, Allocator>::pop_back()
+	{
+		segment_type* lastSegment = mLastSegment;
+        #if EASTL_ASSERT_ENABLED
+            if(EASTL_UNLIKELY(!lastSegment))
+                EASTL_FAIL_MSG("segmented_vector::pop_back -- segmented vector is empty");
+        #endif
+		--lastSegment->mSize;
+		(lastSegment->mData + lastSegment->mSize)->T::~T();
+
+		if (!lastSegment->mSize)
+		{
+			--mSegmentCount;
+			mLastSegment = (segment_type*)(lastSegment->mPrev & (~segment_type::kIsLastSegment));
+			EASTLFree(mAllocator, lastSegment, sizeof(segment_type));
+			if (mLastSegment)
+			{
+				mLastSegment->mPrev |= segment_type::kIsLastSegment;
+				mLastSegment->mSize = Count;
+			}
+			else
+				mFirstSegment = 0;
+		}
+	}
+
+	template <typename T, size_t Count, typename Allocator>
+	inline void
+	segmented_vector<T, Count, Allocator>::erase_unsorted(segment_type& segment, typename segment_type::iterator it)
+	{
+		EA_UNUSED(segment);
+
+		*it = back();
+		pop_back();
+	}
+
+	template <typename T, size_t Count, typename Allocator>
+	inline typename segmented_vector<T, Count, Allocator>::iterator
+	segmented_vector<T, Count, Allocator>::erase_unsorted(const iterator& i)
+	{
+		iterator ret(i);
+		*i = back();
+		if (i.mSegment == mLastSegment && mLastSegment->mSize == 1)
+			ret.mCurrent = 0;
+		pop_back();
+		return ret;
+	}
+
+	template <typename T, size_t Count, typename Allocator>
+	void
+	segmented_vector<T, Count, Allocator>::swap(this_type& other)
+	{
+		allocator_type tempAllocator(mAllocator);
+		segment_type* tempFirstSegment = mFirstSegment;
+		segment_type* tempLastSegment = mLastSegment;
+		size_type tempSegmentCount = mSegmentCount;
+
+		mAllocator = other.mAllocator;
+		mFirstSegment = other.mFirstSegment;
+		mLastSegment = other.mLastSegment;
+		mSegmentCount = other.mSegmentCount;
+
+		other.mAllocator = tempAllocator;
+		other.mFirstSegment = tempFirstSegment;
+		other.mLastSegment = tempLastSegment;
+		other.mSegmentCount = tempSegmentCount;
+	}
+
+	template <typename T, size_t Count, typename Allocator>
+	segment<T, Count, Allocator>*
+	segmented_vector<T, Count, Allocator>::DoAllocSegment(segment_type* prevSegment)
+	{
+		++mSegmentCount;
+		segment_type* segment = (segment_type*)allocate_memory(mAllocator, sizeof(segment_type), EASTL_ALIGN_OF(segment_type), 0);
+		segment->mPrev = uintptr_t(prevSegment) | segment_type::kIsLastSegment;
+		segment->mSize = 1;
+		return segment;
+	}
+
+	template <typename T, size_t Count, typename Allocator>
+	inline void*
+	segmented_vector<T, Count, Allocator>::DoPushBack()
+	{
+		if (segment_type* segment = mLastSegment)
+		{
+			size_type size = segment->mSize;
+			if (size < Count)
+			{
+				++segment->mSize;
+				return segment->mData + size;
+			}
+			else
+			{
+				segment_type* lastSegment = mLastSegment;
+				segment_type* newSegment = mLastSegment = DoAllocSegment(mLastSegment);
+				lastSegment->mPrev &= ~segment_type::kIsLastSegment;
+				lastSegment->mNext = newSegment;
+				return newSegment->mData;
+			}
+		}
+		else
+		{
+			segment = mFirstSegment = mLastSegment = DoAllocSegment(0);
+			return segment->mData;
+		}
+	}
+
+	template<typename T, size_t Count, typename Allocator>
+    inline bool operator==(const segmented_vector_iterator<const T, Count, Allocator>& a, const segmented_vector_iterator<const T, Count, Allocator>& b)
+    {
+        return a.mCurrent == b.mCurrent;
+    }
+
+
+	template<typename T, size_t Count, typename Allocator>
+    inline bool operator!=(const segmented_vector_iterator<const T, Count, Allocator>& a, const segmented_vector_iterator<const T, Count, Allocator>& b)
+    {
+        return a.mCurrent != b.mCurrent;
+    }
+
+	template<typename T, size_t Count, typename Allocator>
+    inline bool operator==(const segmented_vector_iterator<T, Count, Allocator>& a, const segmented_vector_iterator<T, Count, Allocator>& b)
+    {
+        return a.mCurrent == b.mCurrent;
+    }
+
+
+	template<typename T, size_t Count, typename Allocator>
+    inline bool operator!=(const segmented_vector_iterator<T, Count, Allocator>& a, const segmented_vector_iterator<T, Count, Allocator>& b)
+    {
+        return a.mCurrent != b.mCurrent;
+    }
+}
+
+#endif
diff --git a/libkram/eastl/include/EASTL/set.h b/libkram/eastl/include/EASTL/set.h
new file mode 100644
index 00000000..a66a8853
--- /dev/null
+++ b/libkram/eastl/include/EASTL/set.h
@@ -0,0 +1,655 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+//////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_SET_H
+#define EASTL_SET_H
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/internal/red_black_tree.h>
+#include <EASTL/functional.h>
+#include <EASTL/utility.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+
+	/// EASTL_SET_DEFAULT_NAME
+	///
+	/// Defines a default container name in the absence of a user-provided name.
+	///
+	#ifndef EASTL_SET_DEFAULT_NAME
+		#define EASTL_SET_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " set" // Unless the user overrides something, this is "EASTL set".
+	#endif
+
+
+	/// EASTL_MULTISET_DEFAULT_NAME
+	///
+	/// Defines a default container name in the absence of a user-provided name.
+	///
+	#ifndef EASTL_MULTISET_DEFAULT_NAME
+		#define EASTL_MULTISET_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " multiset" // Unless the user overrides something, this is "EASTL multiset".
+	#endif
+
+
+	/// EASTL_SET_DEFAULT_ALLOCATOR
+	///
+	#ifndef EASTL_SET_DEFAULT_ALLOCATOR
+		#define EASTL_SET_DEFAULT_ALLOCATOR allocator_type(EASTL_SET_DEFAULT_NAME)
+	#endif
+
+	/// EASTL_MULTISET_DEFAULT_ALLOCATOR
+	///
+	#ifndef EASTL_MULTISET_DEFAULT_ALLOCATOR
+		#define EASTL_MULTISET_DEFAULT_ALLOCATOR allocator_type(EASTL_MULTISET_DEFAULT_NAME)
+	#endif
+
+
+
+	/// set
+	///
+	/// Implements a canonical set. 
+	///
+	/// The large majority of the implementation of this class is found in the rbtree
+	/// base class. We control the behaviour of rbtree via template parameters.
+	///
+	/// Note that the 'bMutableIterators' template parameter to rbtree is set to false.
+	/// This means that set::iterator is const and the same as set::const_iterator.
+	/// This is by design and it follows the C++ standard defect report recommendation.
+	/// If the user wants to modify a container element, the user needs to either use
+	/// mutable data members or use const_cast on the iterator's data member. Both of 
+	/// these solutions are recommended by the C++ standard defect report.
+	/// To consider: Expose the bMutableIterators template policy here at the set level
+	/// so the user can have non-const set iterators via a template parameter.
+	///
+	/// Pool allocation
+	/// If you want to make a custom memory pool for a set container, your pool 
+	/// needs to contain items of type set::node_type. So if you have a memory
+	/// pool that has a constructor that takes the size of pool items and the
+	/// count of pool items, you would do this (assuming that MemoryPool implements
+	/// the Allocator interface):
+	///     typedef set<Widget, less<Widget>, MemoryPool> WidgetSet;    // Delare your WidgetSet type.
+	///     MemoryPool myPool(sizeof(WidgetSet::node_type), 100);       // Make a pool of 100 Widget nodes.
+	///     WidgetSet mySet(&myPool);                                   // Create a map that uses the pool.
+	///
+	template <typename Key, typename Compare = eastl::less<Key>, typename Allocator = EASTLAllocatorType>
+	class set
+		: public rbtree<Key, Key, Compare, Allocator, eastl::use_self<Key>, false, true>
+	{
+	public:
+		typedef rbtree<Key, Key, Compare, Allocator, eastl::use_self<Key>, false, true> base_type;
+		typedef set<Key, Compare, Allocator>                                            this_type;
+		typedef typename base_type::size_type                                           size_type;
+		typedef typename base_type::value_type                                          value_type;
+		typedef typename base_type::iterator                                            iterator;
+		typedef typename base_type::const_iterator                                      const_iterator;
+		typedef typename base_type::reverse_iterator                                    reverse_iterator;
+		typedef typename base_type::const_reverse_iterator                              const_reverse_iterator;
+		typedef typename base_type::allocator_type                                      allocator_type;
+		typedef Compare                                                                 value_compare;
+		// Other types are inherited from the base class.
+
+		using base_type::begin;
+		using base_type::end;
+		using base_type::find;
+		using base_type::lower_bound;
+		using base_type::upper_bound;
+
+	protected:
+		using base_type::compare;
+		using base_type::get_compare;
+
+	public:
+		set(const allocator_type& allocator = EASTL_SET_DEFAULT_ALLOCATOR);
+		set(const Compare& compare, const allocator_type& allocator = EASTL_SET_DEFAULT_ALLOCATOR);
+		set(const this_type& x);
+		set(this_type&& x);
+		set(this_type&& x, const allocator_type& allocator);
+		set(std::initializer_list<value_type> ilist, const Compare& compare = Compare(), const allocator_type& allocator = EASTL_SET_DEFAULT_ALLOCATOR);
+
+		template <typename Iterator>
+		set(Iterator itBegin, Iterator itEnd); // allocator arg removed because VC7.1 fails on the default arg. To do: Make a second version of this function without a default arg.
+
+		// The (this_type&& x) ctor above has the side effect of forcing us to make operator= visible in this subclass.
+		this_type& operator=(const this_type& x) { return (this_type&)base_type::operator=(x); }
+		this_type& operator=(std::initializer_list<value_type> ilist) { return (this_type&)base_type::operator=(ilist); }
+		this_type& operator=(this_type&& x) { return (this_type&)base_type::operator=(eastl::move(x)); }
+
+	public:
+		value_compare value_comp() const;
+
+		size_type erase(const Key& k);
+		iterator  erase(const_iterator position);
+		iterator  erase(const_iterator first, const_iterator last);
+
+		reverse_iterator erase(const_reverse_iterator position);
+		reverse_iterator erase(const_reverse_iterator first, const_reverse_iterator last);
+
+		size_type count(const Key& k) const;
+
+		eastl::pair<iterator, iterator>             equal_range(const Key& k);
+		eastl::pair<const_iterator, const_iterator> equal_range(const Key& k) const;
+
+	}; // set
+
+
+
+
+
+	/// multiset
+	///
+	/// Implements a canonical multiset.
+	///
+	/// The large majority of the implementation of this class is found in the rbtree
+	/// base class. We control the behaviour of rbtree via template parameters.
+	///
+	/// See notes above in 'set' regarding multable iterators.
+	///
+	/// Pool allocation
+	/// If you want to make a custom memory pool for a multiset container, your pool 
+	/// needs to contain items of type multiset::node_type. So if you have a memory
+	/// pool that has a constructor that takes the size of pool items and the
+	/// count of pool items, you would do this (assuming that MemoryPool implements
+	/// the Allocator interface):
+	///     typedef multiset<Widget, less<Widget>, MemoryPool> WidgetSet;   // Delare your WidgetSet type.
+	///     MemoryPool myPool(sizeof(WidgetSet::node_type), 100);           // Make a pool of 100 Widget nodes.
+	///     WidgetSet mySet(&myPool);                                       // Create a map that uses the pool.
+	///
+	template <typename Key, typename Compare = eastl::less<Key>, typename Allocator = EASTLAllocatorType>
+	class multiset
+		: public rbtree<Key, Key, Compare, Allocator, eastl::use_self<Key>, false, false>
+	{
+	public:
+		typedef rbtree<Key, Key, Compare, Allocator, eastl::use_self<Key>, false, false>    base_type;
+		typedef multiset<Key, Compare, Allocator>                                           this_type;
+		typedef typename base_type::size_type                                               size_type;
+		typedef typename base_type::value_type                                              value_type;
+		typedef typename base_type::iterator                                                iterator;
+		typedef typename base_type::const_iterator                                          const_iterator;
+		typedef typename base_type::reverse_iterator                                        reverse_iterator;
+		typedef typename base_type::const_reverse_iterator                                  const_reverse_iterator;
+		typedef typename base_type::allocator_type                                          allocator_type;
+		typedef Compare                                                                     value_compare;
+		// Other types are inherited from the base class.
+
+		using base_type::begin;
+		using base_type::end;
+		using base_type::find;
+		using base_type::lower_bound;
+		using base_type::upper_bound;
+
+	protected:
+		using base_type::compare;
+		using base_type::get_compare;
+
+	public:
+		multiset(const allocator_type& allocator = EASTL_MULTISET_DEFAULT_ALLOCATOR);
+		multiset(const Compare& compare, const allocator_type& allocator = EASTL_MULTISET_DEFAULT_ALLOCATOR);
+		multiset(const this_type& x);
+		multiset(this_type&& x);
+		multiset(this_type&& x, const allocator_type& allocator);
+		multiset(std::initializer_list<value_type> ilist, const Compare& compare = Compare(), const allocator_type& allocator = EASTL_MULTISET_DEFAULT_ALLOCATOR);
+
+		template <typename Iterator>
+		multiset(Iterator itBegin, Iterator itEnd); // allocator arg removed because VC7.1 fails on the default arg. To do: Make a second version of this function without a default arg.
+
+		// The (this_type&& x) ctor above has the side effect of forcing us to make operator= visible in this subclass.
+		this_type& operator=(const this_type& x) { return (this_type&)base_type::operator=(x); }
+		this_type& operator=(std::initializer_list<value_type> ilist) { return (this_type&)base_type::operator=(ilist); }
+		this_type& operator=(this_type&& x) { return (this_type&)base_type::operator=(eastl::move(x)); }
+
+	public:
+		value_compare value_comp() const;
+
+		size_type erase(const Key& k);
+		iterator  erase(const_iterator position);
+		iterator  erase(const_iterator first, const_iterator last);
+
+		reverse_iterator erase(const_reverse_iterator position);
+		reverse_iterator erase(const_reverse_iterator first, const_reverse_iterator last);
+
+		size_type count(const Key& k) const;
+
+		eastl::pair<iterator, iterator>             equal_range(const Key& k);
+		eastl::pair<const_iterator, const_iterator> equal_range(const Key& k) const;
+
+		/// equal_range_small
+		/// This is a special version of equal_range which is optimized for the 
+		/// case of there being few or no duplicated keys in the tree.
+		eastl::pair<iterator, iterator>             equal_range_small(const Key& k);
+		eastl::pair<const_iterator, const_iterator> equal_range_small(const Key& k) const;
+
+	}; // multiset
+
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// set
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename Key, typename Compare, typename Allocator>
+	inline set<Key, Compare, Allocator>::set(const allocator_type& allocator)
+		: base_type(allocator)
+	{
+	}
+
+
+	template <typename Key, typename Compare, typename Allocator>
+	inline set<Key, Compare, Allocator>::set(const Compare& compare, const allocator_type& allocator)
+		: base_type(compare, allocator)
+	{
+	}
+
+
+	template <typename Key, typename Compare, typename Allocator>
+	inline set<Key, Compare, Allocator>::set(const this_type& x)
+		: base_type(x)
+	{
+	}
+
+
+	template <typename Key, typename Compare, typename Allocator>
+	inline set<Key, Compare, Allocator>::set(this_type&& x)
+		: base_type(eastl::move(x))
+	{
+	}
+
+	template <typename Key, typename Compare, typename Allocator>
+	inline set<Key, Compare, Allocator>::set(this_type&& x, const allocator_type& allocator)
+		: base_type(eastl::move(x), allocator)
+	{
+	}
+
+
+	template <typename Key, typename Compare, typename Allocator>
+	inline set<Key, Compare, Allocator>::set(std::initializer_list<value_type> ilist, const Compare& compare, const allocator_type& allocator)
+		: base_type(ilist.begin(), ilist.end(), compare, allocator)
+	{
+	}
+
+
+	template <typename Key, typename Compare, typename Allocator>
+	template <typename Iterator>
+	inline set<Key, Compare, Allocator>::set(Iterator itBegin, Iterator itEnd)
+		: base_type(itBegin, itEnd, Compare(), EASTL_SET_DEFAULT_ALLOCATOR)
+	{
+	}
+
+
+	template <typename Key, typename Compare, typename Allocator>
+	inline typename set<Key, Compare, Allocator>::value_compare
+	set<Key, Compare, Allocator>::value_comp() const
+	{
+		return get_compare();
+	}
+
+
+	template <typename Key, typename Compare, typename Allocator>
+	inline typename set<Key, Compare, Allocator>::size_type
+	set<Key, Compare, Allocator>::erase(const Key& k)
+	{
+		const iterator it(find(k));
+
+		if(it != end()) // If it exists...
+		{
+			base_type::erase(it);
+			return 1;
+		}
+		return 0;
+	}
+
+
+	template <typename Key, typename Compare, typename Allocator>
+	inline typename set<Key, Compare, Allocator>::iterator
+	set<Key, Compare, Allocator>::erase(const_iterator position)
+	{
+		// We need to provide this version because we override another version 
+		// and C++ hiding rules would make the base version of this hidden.
+		return base_type::erase(position);
+	}
+
+
+	template <typename Key, typename Compare, typename Allocator>
+	inline typename set<Key, Compare, Allocator>::iterator
+	set<Key, Compare, Allocator>::erase(const_iterator first, const_iterator last)
+	{
+		// We need to provide this version because we override another version 
+		// and C++ hiding rules would make the base version of this hidden.
+		return base_type::erase(first, last);
+	}
+
+
+	template <typename Key, typename Compare, typename Allocator>
+	inline typename set<Key, Compare, Allocator>::size_type
+	set<Key, Compare, Allocator>::count(const Key& k) const
+	{
+		const const_iterator it(find(k));
+		return (it != end()) ? (size_type)1 : (size_type)0;
+	}
+
+
+	template <typename Key, typename Compare, typename Allocator>
+	inline typename set<Key, Compare, Allocator>::reverse_iterator
+	set<Key, Compare, Allocator>::erase(const_reverse_iterator position)
+	{
+		return reverse_iterator(erase((++position).base()));
+	}
+
+
+	template <typename Key, typename Compare, typename Allocator>
+	inline typename set<Key, Compare, Allocator>::reverse_iterator
+	set<Key, Compare, Allocator>::erase(const_reverse_iterator first, const_reverse_iterator last)
+	{
+		// Version which erases in order from first to last.
+		// difference_type i(first.base() - last.base());
+		// while(i--)
+		//     first = erase(first);
+		// return first;
+
+		// Version which erases in order from last to first, but is slightly more efficient:
+		return reverse_iterator(erase((++last).base(), (++first).base()));
+	}
+
+
+	template <typename Key, typename Compare, typename Allocator>
+	inline eastl::pair<typename set<Key, Compare, Allocator>::iterator,
+					   typename set<Key, Compare, Allocator>::iterator>
+	set<Key, Compare, Allocator>::equal_range(const Key& k)
+	{
+		// The resulting range will either be empty or have one element,
+		// so instead of doing two tree searches (one for lower_bound and 
+		// one for upper_bound), we do just lower_bound and see if the 
+		// result is a range of size zero or one.
+		const iterator itLower(lower_bound(k));
+
+		if((itLower == end()) || compare(k, *itLower)) // If at the end or if (k is < itLower)...
+			return eastl::pair<iterator, iterator>(itLower, itLower);
+
+		iterator itUpper(itLower);
+		return eastl::pair<iterator, iterator>(itLower, ++itUpper);
+	}
+	
+
+	template <typename Key, typename Compare, typename Allocator>
+	inline eastl::pair<typename set<Key, Compare, Allocator>::const_iterator, 
+					   typename set<Key, Compare, Allocator>::const_iterator>
+	set<Key, Compare, Allocator>::equal_range(const Key& k) const
+	{
+		// See equal_range above for comments.
+		const const_iterator itLower(lower_bound(k));
+
+		if((itLower == end()) || compare(k, *itLower)) // If at the end or if (k is < itLower)...
+			return eastl::pair<const_iterator, const_iterator>(itLower, itLower);
+
+		const_iterator itUpper(itLower);
+		return eastl::pair<const_iterator, const_iterator>(itLower, ++itUpper);
+	}
+
+
+	///////////////////////////////////////////////////////////////////////
+	// erase_if 
+	//
+	// https://en.cppreference.com/w/cpp/container/set/erase_if
+	///////////////////////////////////////////////////////////////////////
+	template <class Key, class Compare, class Allocator, class Predicate>
+	void erase_if(set<Key, Compare, Allocator>& c, Predicate predicate)
+	{
+		for (auto i = c.begin(), last = c.end(); i != last;)
+		{
+			if (predicate(*i))
+			{
+				i = c.erase(i);
+			}
+			else
+			{
+				++i;
+			}
+		}
+	}
+
+
+	///////////////////////////////////////////////////////////////////////
+	// multiset
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename Key, typename Compare, typename Allocator>
+	inline multiset<Key, Compare, Allocator>::multiset(const allocator_type& allocator)
+		: base_type(allocator)
+	{
+	}
+
+
+	template <typename Key, typename Compare, typename Allocator>
+	inline multiset<Key, Compare, Allocator>::multiset(const Compare& compare, const allocator_type& allocator)
+		: base_type(compare, allocator)
+	{
+	}
+
+
+	template <typename Key, typename Compare, typename Allocator>
+	inline multiset<Key, Compare, Allocator>::multiset(const this_type& x)
+		: base_type(x)
+	{
+	}
+
+
+	template <typename Key, typename Compare, typename Allocator>
+	inline multiset<Key, Compare, Allocator>::multiset(this_type&& x)
+		: base_type(eastl::move(x))
+	{
+	}
+
+	template <typename Key, typename Compare, typename Allocator>
+	inline multiset<Key, Compare, Allocator>::multiset(this_type&& x, const allocator_type& allocator)
+		: base_type(eastl::move(x), allocator)
+	{
+	}
+
+
+	template <typename Key, typename Compare, typename Allocator>
+	inline multiset<Key, Compare, Allocator>::multiset(std::initializer_list<value_type> ilist, const Compare& compare, const allocator_type& allocator)
+		: base_type(ilist.begin(), ilist.end(), compare, allocator)
+	{
+	}
+
+
+	template <typename Key, typename Compare, typename Allocator>
+	template <typename Iterator>
+	inline multiset<Key, Compare, Allocator>::multiset(Iterator itBegin, Iterator itEnd)
+		: base_type(itBegin, itEnd, Compare(), EASTL_MULTISET_DEFAULT_ALLOCATOR)
+	{
+	}
+
+
+	template <typename Key, typename Compare, typename Allocator>
+	inline typename multiset<Key, Compare, Allocator>::value_compare
+	multiset<Key, Compare, Allocator>::value_comp() const
+	{
+		return get_compare();
+	}
+
+
+	template <typename Key, typename Compare, typename Allocator>
+	inline typename multiset<Key, Compare, Allocator>::size_type
+	multiset<Key, Compare, Allocator>::erase(const Key& k)
+	{
+		const eastl::pair<iterator, iterator> range(equal_range(k));
+		const size_type n = (size_type)eastl::distance(range.first, range.second);
+		base_type::erase(range.first, range.second);
+		return n;
+	}
+
+
+	template <typename Key, typename Compare, typename Allocator>
+	inline typename multiset<Key, Compare, Allocator>::iterator
+	multiset<Key, Compare, Allocator>::erase(const_iterator position)
+	{
+		// We need to provide this version because we override another version 
+		// and C++ hiding rules would make the base version of this hidden.
+		return base_type::erase(position);
+	}
+
+
+	template <typename Key, typename Compare, typename Allocator>
+	inline typename multiset<Key, Compare, Allocator>::iterator
+	multiset<Key, Compare, Allocator>::erase(const_iterator first, const_iterator last)
+	{
+		// We need to provide this version because we override another version 
+		// and C++ hiding rules would make the base version of this hidden.
+		return base_type::erase(first, last);
+	}
+
+
+	template <typename Key, typename Compare, typename Allocator>
+	inline typename multiset<Key, Compare, Allocator>::size_type
+	multiset<Key, Compare, Allocator>::count(const Key& k) const
+	{
+		const eastl::pair<const_iterator, const_iterator> range(equal_range(k));
+		return (size_type)eastl::distance(range.first, range.second);
+	}
+
+
+	template <typename Key, typename Compare, typename Allocator>
+	inline typename multiset<Key, Compare, Allocator>::reverse_iterator
+	multiset<Key, Compare, Allocator>::erase(const_reverse_iterator position)
+	{
+		return reverse_iterator(erase((++position).base()));
+	}
+
+
+	template <typename Key, typename Compare, typename Allocator>
+	inline typename multiset<Key, Compare, Allocator>::reverse_iterator
+	multiset<Key, Compare, Allocator>::erase(const_reverse_iterator first, const_reverse_iterator last)
+	{
+		// Version which erases in order from first to last.
+		// difference_type i(first.base() - last.base());
+		// while(i--)
+		//     first = erase(first);
+		// return first;
+
+		// Version which erases in order from last to first, but is slightly more efficient:
+		return reverse_iterator(erase((++last).base(), (++first).base()));
+	}
+
+
+	template <typename Key, typename Compare, typename Allocator>
+	inline eastl::pair<typename multiset<Key, Compare, Allocator>::iterator,
+					   typename multiset<Key, Compare, Allocator>::iterator>
+	multiset<Key, Compare, Allocator>::equal_range(const Key& k)
+	{
+		// There are multiple ways to implement equal_range. The implementation mentioned
+		// in the C++ standard and which is used by most (all?) commercial STL implementations
+		// is this:
+		//    return eastl::pair<iterator, iterator>(lower_bound(k), upper_bound(k));
+		//
+		// This does two tree searches -- one for the lower bound and one for the 
+		// upper bound. This works well for the case whereby you have a large container
+		// and there are lots of duplicated values. We provide an alternative version
+		// of equal_range called equal_range_small for cases where the user is confident
+		// that the number of duplicated items is only a few.
+
+		return eastl::pair<iterator, iterator>(lower_bound(k), upper_bound(k));
+	}
+
+
+	template <typename Key, typename Compare, typename Allocator>
+	inline eastl::pair<typename multiset<Key, Compare, Allocator>::const_iterator, 
+					   typename multiset<Key, Compare, Allocator>::const_iterator>
+	multiset<Key, Compare, Allocator>::equal_range(const Key& k) const
+	{
+		// See comments above in the non-const version of equal_range.
+		return eastl::pair<iterator, iterator>(lower_bound(k), upper_bound(k));
+	}
+
+
+	template <typename Key, typename Compare, typename Allocator>
+	inline eastl::pair<typename multiset<Key, Compare, Allocator>::iterator,
+					   typename multiset<Key, Compare, Allocator>::iterator>
+	multiset<Key, Compare, Allocator>::equal_range_small(const Key& k)
+	{
+		// We provide alternative version of equal_range here which works faster
+		// for the case where there are at most small number of potential duplicated keys.
+		const iterator itLower(lower_bound(k));
+		iterator       itUpper(itLower);
+
+		while((itUpper != end()) && !compare(k, itUpper.mpNode->mValue))
+			++itUpper;
+
+		return eastl::pair<iterator, iterator>(itLower, itUpper);
+	}
+
+
+	template <typename Key, typename Compare, typename Allocator>
+	inline eastl::pair<typename multiset<Key, Compare, Allocator>::const_iterator, 
+					   typename multiset<Key, Compare, Allocator>::const_iterator>
+	multiset<Key, Compare, Allocator>::equal_range_small(const Key& k) const
+	{
+		// We provide alternative version of equal_range here which works faster
+		// for the case where there are at most small number of potential duplicated keys.
+		const const_iterator itLower(lower_bound(k));
+		const_iterator       itUpper(itLower);
+
+		while((itUpper != end()) && !compare(k, *itUpper))
+			++itUpper;
+
+		return eastl::pair<const_iterator, const_iterator>(itLower, itUpper);
+	}
+
+
+	///////////////////////////////////////////////////////////////////////
+	// erase_if
+	//
+	// https://en.cppreference.com/w/cpp/container/multiset/erase_if
+	///////////////////////////////////////////////////////////////////////
+	template <class Key, class Compare, class Allocator, class Predicate>
+	void erase_if(multiset<Key, Compare, Allocator>& c, Predicate predicate)
+	{
+		// Erases all elements that satisfy the predicate pred from the container.
+		for (auto i = c.begin(), last = c.end(); i != last;)
+		{
+			if (predicate(*i))
+			{
+				i = c.erase(i);
+			}
+			else
+			{
+				++i;
+			}
+		}
+	}
+
+
+} // namespace eastl
+
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/shared_array.h b/libkram/eastl/include/EASTL/shared_array.h
new file mode 100644
index 00000000..b7d78408
--- /dev/null
+++ b/libkram/eastl/include/EASTL/shared_array.h
@@ -0,0 +1,434 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// Note (March 2014): shared_array is not a full implementation of an array version 
+// of C++11 shared_ptr, and there currently are no plans to make it so. A future
+// version of shared_ptr would likely take on the ability to store arrays,
+// same as unique_ptr has array support. This class isn't deprecated, but it
+// is frozen until some future decision is made on what to do about arrays.
+//
+///////////////////////////////////////////////////////////////////////////////
+// This class implements a shared_array template. This is a class which is 
+// similar to the C++ shared_ptr template, except that it works with arrays
+// instead of individual objects.
+//
+// Important notice:
+// As of this writing (9/2003), the implementation provided here has 
+// limitations that you should be aware of. These limitations will be shortly
+// rectified. Most significantly, this implementation has the following 
+// weaknesses:
+//     - It cannot safely deal with exceptions that occur during the 
+//       construction of shared_ptr objects. 
+//     - It cannot safely deal with recursive shared_ptr objects. 
+//       If a shared_ptr<A> holds a pointer to an instance of A and 
+//       class A owns an instance of shared_ptr<A> that refers to,
+//       the original instance, the memory will leak.
+//     - A template of type shared_ptr<void> will not call the destructor
+//       for an object that it stores. You thus must declare a shared_ptr
+//       template specifically for the class type.
+//     - It doesn't safely handle multiple instances of shared_ptr
+//       which own the same pointer accessed from multiple threads.
+//       This weakness is by design, for performance reasons. You should
+//       use shared_ptr_mt for multi-thread safe access.
+//
+// The rectification of the above issues are discussed in the C++ standardization
+// documents for the next C++ standard (as of 2003):
+//     http://std.dkuug.dk/jtc1/sc22/wg21/docs/papers/2003/n1450.html#Implementation-difficulty
+// 
+// This current implementation will be eventually (hopefully by 1/2004) rectified 
+// to be in line with the second generation C++ standard proposal.
+//
+// The intended design of this class is based somewhat on the design of the Boost
+// shared_array template. This design is also being considered for the next C++ 
+// standard (as of 2003). The C++ standard update proposal is currently available at:
+//     http://std.dkuug.dk/jtc1/sc22/wg21/docs/papers/2003/n1450.html
+// Boost smart pointers, including shared_array are documented at:
+//     http://www.boost.org/libs/smart_ptr/
+//
+// As of this writing (10/2003), this class has received approval from EA legal
+// for use. The potential issue is the similarity of the class name and class 
+// interface to existing open source code.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_SHARED_ARRAY_H
+#define EASTL_SHARED_ARRAY_H
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/internal/smart_ptr.h>   // Defines smart_array_deleter
+
+
+EA_DISABLE_ALL_VC_WARNINGS();
+
+	#include <new>
+	#include <stddef.h>
+
+EA_RESTORE_ALL_VC_WARNINGS();
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+
+	/// EASTL_SHARED_ARRAY_DEFAULT_NAME
+	///
+	/// Defines a default container name in the absence of a user-provided name.
+	///
+	#ifndef EASTL_SHARED_ARRAY_DEFAULT_NAME
+		#define EASTL_SHARED_ARRAY_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " shared_array" // Unless the user overrides something, this is "EASTL shared_array".
+	#endif
+
+
+	/// EASTL_SHARED_ARRAY_DEFAULT_ALLOCATOR
+	///
+	#ifndef EASTL_SHARED_ARRAY_DEFAULT_ALLOCATOR
+		#define EASTL_SHARED_ARRAY_DEFAULT_ALLOCATOR allocator_type(EASTL_SHARED_ARRAY_DEFAULT_NAME)
+	#endif
+
+
+
+	/// class shared_array
+	/// A shared_array is the same as shared_ptr but for arrays. 
+	template <typename T, typename Allocator = EASTLAllocatorType, typename Deleter = smart_array_deleter<T> >
+	class shared_array
+	{
+	protected:
+		/// this_type
+		/// This is an alias for shared_array<T>, this class.
+		typedef shared_array<T> this_type;
+
+		/// allocator_type
+		typedef Allocator allocator_type;
+
+		/// deleter_type
+		typedef Deleter deleter_type;
+
+		/// ref_count
+		/// An internal reference count type. Must be convertable to int
+		/// so that the public use_count function can work.
+		typedef int ref_count;
+
+		T*              mpArray;       /// The owned pointer. Points to an array of T.
+		ref_count*      mpRefCount;    /// Reference count for owned pointer.
+		allocator_type  mAllocator;    /// The allocator used to manage new/delete of mpRefCount.
+
+	public:
+		typedef T element_type;
+
+		/// shared_array
+		/// Takes ownership of the pointer and sets the reference count
+		/// to the pointer to 1. It is OK if the input pointer is null.
+		/// The shared reference count is allocated on the heap via operator new.
+		/// If an exception occurs during the allocation of the shared 
+		/// reference count, the owned pointer is deleted and the exception
+		/// is rethrown. A null pointer is given a reference count of 1.
+		explicit shared_array(T* pArray = NULL, const allocator_type& allocator = EASTL_SHARED_ARRAY_DEFAULT_ALLOCATOR)
+			: mpArray(pArray),
+			  mpRefCount(NULL),
+			  mAllocator(allocator)
+		{
+			// Allocate memory for the reference count.
+			void* const pMemory = EASTLAlloc(mAllocator, sizeof(ref_count));
+			if(pMemory)
+				mpRefCount = ::new(pMemory) ref_count(1);
+		}
+
+
+		/// shared_array
+		/// Shares ownership of a pointer with another instance of shared_array.
+		/// This function increments the shared reference count on the pointer.
+		/// If we want a shared_array constructor that is templated on shared_array<U>,
+		/// then we need to make it in addition to this function, as otherwise 
+		/// the compiler will generate this function and things will go wrong.
+		shared_array(const shared_array& sharedArray)
+			: mpArray(sharedArray.mpArray),
+			  mpRefCount(sharedArray.mpRefCount),
+			  mAllocator(sharedArray.mAllocator)
+		{
+			++*mpRefCount;
+		}
+
+
+		/// ~shared_array
+		/// Decrements the reference count for the owned pointer. If the 
+		/// reference count goes to zero, the owned pointer is deleted and
+		/// the shared reference count is deleted.
+		~shared_array()
+		{
+			const ref_count newRefCount(--*mpRefCount); 
+			// assert(newRefCount >= 0);
+			if(newRefCount == 0)
+			{
+				EASTLFree(mAllocator, mpRefCount, sizeof(ref_count));
+				Deleter del;
+				del(mpArray);
+			}
+		}
+
+
+		/// operator=
+		/// Copies another shared_array to this object. Note that this object
+		/// may already own a shared pointer with another different pointer
+		/// (but still of the same type) before this call. In that case,
+		/// this function releases the old pointer, decrementing its reference
+		/// count and deleting it if zero, takes shared ownership of the new 
+		/// pointer and increments its reference count.
+		/// If we want a shared_array operator= that is templated on shared_array<U>,
+		/// then we need to make it in addition to this function, as otherwise 
+		/// the compiler will generate this function and things will go wrong.
+		shared_array& operator=(const shared_array& sharedArray)
+		{
+			if(mpArray != sharedArray.mpArray)
+			{
+				// The easiest thing to do is to create a temporary and 
+				// copy ourselves ourselves into it. This is a standard 
+				// method for switching pointer ownership in systems like this.
+				shared_array(sharedArray).swap(*this);
+			}
+			return *this;
+		}
+
+
+		/// operator=
+		/// Assigns a new pointer, while decrementing the reference count on
+		/// the current pointer. The new pointer can be NULL and the current
+		/// pointer can NULL. If the new pointer is equivalent to the current
+		/// pointer, then nothing is done.
+		shared_array& operator=(T* pValue)
+		{
+			reset(pValue);
+			return *this;
+		}
+
+
+		/// reset
+		/// Releases the owned pointer and takes ownership of the 
+		/// passed in pointer. If the passed in pointer is the same
+		/// as the owned pointer, nothing is done. The passed in pointer
+		/// can be null, in which case the use count is set to 1.
+		void reset(T* pArray = NULL)
+		{
+			if(pArray != mpArray)
+			{
+				// The easiest thing to do is to create a temporary and 
+				// copy ourselves ourselves into it. This is a standard 
+				// method for switching pointer ownership in systems like this.
+				shared_array(pArray, mAllocator).swap(*this);
+			}
+		}
+
+
+		/// swap
+		/// Exchanges the owned pointer beween two shared_array objects.
+		void swap(this_type& sharedArray)
+		{
+			// We leave mAllocator as-is.
+
+			// eastl::swap(mpArray, sharedArray.mpArray);
+			T* const pArray     = sharedArray.mpArray;
+			sharedArray.mpArray = mpArray;
+			mpArray             = pArray;
+
+			// eastl::swap(mpRefCount, sharedArray.mpRefCount);
+			ref_count* const pRefCount = sharedArray.mpRefCount;
+			sharedArray.mpRefCount     = mpRefCount;
+			mpRefCount                 = pRefCount;
+		}
+
+
+		/// operator[]
+		/// Returns a reference to the specified item in the owned pointer
+		/// array. 
+		/// Example usage:
+		///    shared_array<int> ptr = new int[6];
+		///    int x = ptr[2];
+		T& operator[](ptrdiff_t i) const
+		{
+			// assert(mpArray && (i >= 0));
+			return mpArray[i];
+		}
+
+		/// operator*
+		/// Returns the owner pointer dereferenced.
+		/// Example usage:
+		///    shared_array<int> ptr = new int(3);
+		///    int x = *ptr;
+		T& operator*() const
+		{
+			// assert(mpArray);
+			return *mpArray;
+		}
+
+		/// operator->
+		/// Allows access to the owned pointer via operator->()
+		/// Example usage:
+		///    struct X{ void DoSomething(); }; 
+		///    shared_array<int> ptr = new X;
+		///    ptr->DoSomething();
+		T* operator->() const EA_NOEXCEPT
+		{
+			// assert(mpArray);
+			return mpArray;
+		}
+
+		/// get
+		/// Returns the owned pointer. Note that this class does 
+		/// not provide an operator T() function. This is because such
+		/// a thing (automatic conversion) is deemed unsafe.
+		/// Example usage:
+		///    struct X{ void DoSomething(); }; 
+		///    shared_array<int> ptr = new X;
+		///    X* pX = ptr.get();
+		///    pX->DoSomething();
+		T* get() const EA_NOEXCEPT
+		{
+			return mpArray;
+		}
+
+		/// use_count
+		/// Returns the reference count on the owned pointer.
+		/// The return value is one if the owned pointer is null.
+		int use_count() const
+		{
+			// assert(mpRefCount);
+			return (int)*mpRefCount;
+		}
+
+		/// unique
+		/// Returns true if the reference count on the owned pointer is one.
+		/// The return value is true if the owned pointer is null.
+		bool unique() const
+		{
+			// assert(mpRefCount);
+			return (*mpRefCount == 1);
+		}
+
+		/// Implicit operator bool
+		/// Allows for using a scoped_ptr as a boolean. 
+		/// Example usage:
+		///    shared_array<int> ptr = new int(3);
+		///    if(ptr)
+		///        ++*ptr;
+		///     
+		/// Note that below we do not use operator bool(). The reason for this
+		/// is that booleans automatically convert up to short, int, float, etc.
+		/// The result is that this: if(sharedArray == 1) would yield true (bad).
+		typedef T* (this_type::*bool_)() const;
+		operator bool_() const EA_NOEXCEPT
+		{
+			if(mpArray)
+				return &this_type::get;
+			return NULL;
+		}
+
+		/// operator!
+		/// This returns the opposite of operator bool; it returns true if 
+		/// the owned pointer is null. Some compilers require this and some don't.
+		///    shared_array<int> ptr = new int(3);
+		///    if(!ptr)
+		///        assert(false);
+		bool operator!() const EA_NOEXCEPT
+		{
+			return (mpArray == NULL);
+		}
+
+		/// get_allocator
+		/// Returns the memory allocator associated with this class.
+		const allocator_type& get_allocator() const EA_NOEXCEPT
+		{
+			return mAllocator;
+		}
+		allocator_type& get_allocator() EA_NOEXCEPT
+		{
+			return mAllocator;
+		}
+
+		/// set_allocator
+		/// Sets the memory allocator associated with this class.
+		void set_allocator(const allocator_type& allocator)
+		{
+			mAllocator = allocator;
+		}
+
+	}; // class shared_array
+
+
+
+	/// get_pointer
+	/// returns shared_array::get() via the input shared_array. 
+	template <typename T, typename A, typename D>
+	inline T* get_pointer(const shared_array<T, A, D>& sharedArray)
+	{
+		return sharedArray.get();
+	}
+
+	/// swap
+	/// Exchanges the owned pointer beween two shared_array objects.
+	/// This non-member version is useful for compatibility of shared_array
+	/// objects with the C++ Standard Library and other libraries.
+	template <typename T, typename A, typename D>
+	inline void swap(shared_array<T, A, D>& sharedArray1, shared_array<T, A, D>& sharedArray2)
+	{
+		sharedArray1.swap(sharedArray2);
+	}
+
+
+	/// operator!=
+	/// Compares two shared_array objects for equality. Equality is defined as 
+	/// being true when the pointer shared between two shared_array objects is equal.
+	/// It is debatable what the appropriate definition of equality is between two
+	/// shared_array objects, but we follow the current 2nd generation C++ standard proposal.
+	template <typename T, typename TA, typename TD, typename U, typename UA, typename UD>
+	inline bool operator==(const shared_array<T, TA, TD>& sharedArray1, const shared_array<U, UA, UD>& sharedArray2)
+	{
+		// assert((sharedArray1.get() != sharedArray2.get()) || (sharedArray1.use_count() == sharedArray2.use_count()));
+		return (sharedArray1.get() == sharedArray2.get());
+	}
+
+
+	/// operator!=
+	/// Compares two shared_array objects for inequality. Equality is defined as 
+	/// being true when the pointer shared between two shared_array objects is equal.
+	/// It is debatable what the appropriate definition of equality is between two
+	/// shared_array objects, but we follow the current 2nd generation C++ standard proposal.
+	template <typename T, typename TA, typename TD, typename U, typename UA, typename UD>
+	inline bool operator!=(const shared_array<T, TA, TD>& sharedArray1, const shared_array<U, UA, UD>& sharedArray2)
+	{
+		// assert((sharedArray1.get() != sharedArray2.get()) || (sharedArray1.use_count() == sharedArray2.use_count()));
+		return (sharedArray1.get() != sharedArray2.get());
+	}
+
+
+	/// operator<
+	/// Returns which shared_array is 'less' than the other. Useful when storing
+	/// sorted containers of scoped_ptr objects.
+	template <typename T, typename TA, typename TD, typename U, typename UA, typename UD>
+	inline bool operator<(const shared_array<T, TA, TD>& sharedArray1, const shared_array<U, UA, UD>& sharedArray2)
+	{
+		return (sharedArray1.get() < sharedArray2.get()); // Alternatively use: std::less<T*>(a.get(), b.get());
+	}
+
+
+} // namespace eastl
+
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/shared_ptr.h b/libkram/eastl/include/EASTL/shared_ptr.h
new file mode 100644
index 00000000..5535adfc
--- /dev/null
+++ b/libkram/eastl/include/EASTL/shared_ptr.h
@@ -0,0 +1,1696 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// This class implements the C++11 shared_ptr template. A shared_ptr is like  
+// the C++ Standard Library unique_ptr except that it allows sharing of pointers 
+// between instances via reference counting. shared_ptr objects can safely be 
+// copied and can  safely be used in containers such as vector or list.
+//
+// Notes regarding safe usage of shared_ptr:
+//     - http://www.boost.org/doc/libs/1_53_0/libs/smart_ptr/shared_ptr.htm#ThreadSafety
+//     - If you construct a shared_ptr with a raw pointer, you cannot construct another shared_ptr
+//       with just that raw pointer. Insted you need to construct additional shared_ptrs with 
+//       the originally created shared_ptr. Otherwise you will get a crash.
+//     - Usage of shared_ptr is thread-safe, but what it points to isn't automatically thread safe. 
+//       Multiple shared_ptrs that refer to the same object can be used arbitrarily by multiple threads.
+//     - You can use a single shared_ptr between multiple threads in all ways except one: assigment
+//       to that shared_ptr. The following is not thread-safe, and needs to be guarded by a mutex 
+//       or the shared_ptr atomic functions:
+//           shared_ptr<Foo> pFoo;
+//           // Thread 1:
+//           shared_ptr<Foo> pFoo2 = pFoo;
+//           // Thread 2:
+//           pFoo = make_shared<Foo>();
+//
+// Compatibility note:
+// This version of shared_ptr updates the previous version to have full C++11 
+// compatibility. However, in order to add C++11 compatibility there needed to 
+// be a few breaking changes which may affect some users. It's likely that most
+// or all breaking changes can be rectified by doing the same thing in a slightly
+// different way. Here's a list of the primary signficant breaking changes:
+//     - shared_ptr now takes just one template parameter instead of three.
+//       (allocator and deleter). You now specify the allocator and deleter
+//       as part of the shared_ptr constructor at runtime.
+//     - shared_ptr has thread safety, which 
+//
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_SHARED_PTR_H
+#define EASTL_SHARED_PTR_H
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/internal/smart_ptr.h>
+#include <EASTL/internal/thread_support.h>
+#include <EASTL/unique_ptr.h>
+#include <EASTL/functional.h>
+#include <EASTL/allocator.h>
+#if EASTL_RTTI_ENABLED
+	#include <typeinfo>
+#endif
+#if EASTL_EXCEPTIONS_ENABLED
+	#include <exception>
+#endif
+
+EA_DISABLE_ALL_VC_WARNINGS()
+#include <new>
+#include <stddef.h>
+EA_RESTORE_ALL_VC_WARNINGS()
+
+EA_DISABLE_VC_WARNING(4530); // C++ exception handler used, but unwind semantics are not enabled. Specify /EHsc
+EA_DISABLE_VC_WARNING(4571); // catch(...) semantics changed since Visual C++ 7.1; structured exceptions (SEH) are no longer caught.
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+	///////////////////////////////////////////////////////////////////////////
+	// shared_ptr
+	///////////////////////////////////////////////////////////////////////////
+
+	/// EASTL_SHARED_PTR_DEFAULT_NAME
+	///
+	/// Defines a default container name in the absence of a user-provided name.
+	///
+	#ifndef EASTL_SHARED_PTR_DEFAULT_NAME
+		#define EASTL_SHARED_PTR_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " shared_ptr" // Unless the user overrides something, this is "EASTL shared_ptr".
+	#endif
+
+
+	/// EASTL_SHARED_PTR_DEFAULT_ALLOCATOR
+	///
+	#ifndef EASTL_SHARED_PTR_DEFAULT_ALLOCATOR
+		#define EASTL_SHARED_PTR_DEFAULT_ALLOCATOR EASTLAllocatorType(EASTL_SHARED_PTR_DEFAULT_NAME)
+	#endif
+
+
+	// Forward declarations
+	template <typename T, typename Deleter> class unique_ptr;
+	template <typename T> class weak_ptr;
+	template <typename T> class enable_shared_from_this;
+
+
+
+	#if EASTL_EXCEPTIONS_ENABLED
+		// We define eastl::bad_weak_ptr as opposed to std::bad_weak_ptr. The reason is that 
+		// we can't easily know of std::bad_weak_ptr exists and we would have to #include <memory>
+		// to use it. EASTL "owns" the types that are defined in EASTL headers, and std::bad_weak_ptr 
+		// is declared in <memory>.
+
+		struct bad_weak_ptr : std::exception
+		{
+			const char* what() const EA_NOEXCEPT EA_OVERRIDE
+				{ return "bad weak_ptr"; }
+		};
+	#endif
+
+
+	/// ref_count_sp
+	///
+	/// This is a small utility class used by shared_ptr and weak_ptr.
+	struct ref_count_sp
+	{
+		int32_t mRefCount;            /// Reference count on the contained pointer. Starts as 1 by default.
+		int32_t mWeakRefCount;        /// Reference count on contained pointer plus this ref_count_sp object itself. Starts as 1 by default.
+
+	public:
+		ref_count_sp(int32_t refCount = 1, int32_t weakRefCount = 1) EA_NOEXCEPT;
+		virtual ~ref_count_sp() EA_NOEXCEPT {}
+
+		int32_t       use_count() const EA_NOEXCEPT;
+		void          addref() EA_NOEXCEPT;
+		void          release();
+		void          weak_addref() EA_NOEXCEPT;
+		void          weak_release();
+		ref_count_sp* lock() EA_NOEXCEPT;
+
+		virtual void free_value() EA_NOEXCEPT = 0;          // Release the contained object.
+		virtual void free_ref_count_sp() EA_NOEXCEPT = 0;   // Release this instance.
+
+		#if EASTL_RTTI_ENABLED
+			virtual void* get_deleter(const std::type_info& type) const EA_NOEXCEPT = 0;
+		#else
+			virtual void* get_deleter() const EA_NOEXCEPT = 0;
+		#endif
+	};
+
+
+	inline ref_count_sp::ref_count_sp(int32_t refCount, int32_t weakRefCount) EA_NOEXCEPT
+		: mRefCount(refCount), mWeakRefCount(weakRefCount) {}
+
+	inline int32_t ref_count_sp::use_count() const EA_NOEXCEPT
+	{
+		return mRefCount;   // To figure out: is this right?
+	}
+
+	inline void ref_count_sp::addref() EA_NOEXCEPT
+	{
+		Internal::atomic_increment(&mRefCount);
+		Internal::atomic_increment(&mWeakRefCount);
+	}
+
+	inline void ref_count_sp::release()
+	{
+		EASTL_ASSERT((mRefCount > 0) && (mWeakRefCount > 0));
+		if(Internal::atomic_decrement(&mRefCount) == 0)
+			free_value();
+
+		if(Internal::atomic_decrement(&mWeakRefCount) == 0)
+			free_ref_count_sp();
+	}
+
+	inline void ref_count_sp::weak_addref() EA_NOEXCEPT
+	{
+		Internal::atomic_increment(&mWeakRefCount);
+	}
+
+	inline void ref_count_sp::weak_release()
+	{
+		EASTL_ASSERT(mWeakRefCount > 0);
+		if(Internal::atomic_decrement(&mWeakRefCount) == 0)
+			free_ref_count_sp();
+	}
+
+	inline ref_count_sp* ref_count_sp::lock() EA_NOEXCEPT
+	{
+		for(int32_t refCountTemp = mRefCount; refCountTemp != 0; refCountTemp = mRefCount)
+		{
+			if(Internal::atomic_compare_and_swap(&mRefCount, refCountTemp + 1, refCountTemp))
+			{
+				Internal::atomic_increment(&mWeakRefCount);
+				return this;
+			}
+		}
+
+		return nullptr;
+	}
+
+
+
+	/// ref_count_sp_t
+	///
+	/// This is a version of ref_count_sp which is used to delete the contained pointer.
+	template <typename T, typename Allocator, typename Deleter>
+	class ref_count_sp_t : public ref_count_sp
+	{
+	public:
+		typedef ref_count_sp_t<T, Allocator, Deleter>   this_type;
+		typedef T                                       value_type;
+		typedef Allocator                               allocator_type;
+		typedef Deleter                                 deleter_type;
+
+		value_type     mValue; // This is expected to be a pointer.
+		deleter_type   mDeleter;
+		allocator_type mAllocator;
+
+		ref_count_sp_t(value_type value, deleter_type deleter, allocator_type allocator)
+			: ref_count_sp(), mValue(value), mDeleter(eastl::move(deleter)), mAllocator(eastl::move(allocator))
+		{}
+
+		void free_value() EA_NOEXCEPT
+		{
+			mDeleter(mValue);
+			mValue = nullptr;
+		}
+
+		void free_ref_count_sp() EA_NOEXCEPT
+		{
+			allocator_type allocator = mAllocator;
+			this->~ref_count_sp_t();
+			EASTLFree(allocator, this, sizeof(*this));
+		}
+
+		#if EASTL_RTTI_ENABLED
+			void* get_deleter(const std::type_info& type) const EA_NOEXCEPT
+			{
+				return (type == typeid(deleter_type)) ? (void*)&mDeleter : nullptr;
+			}
+		#else
+			void* get_deleter() const EA_NOEXCEPT
+			{
+				return (void*)&mDeleter;
+			}
+		#endif
+	};
+
+	/// ref_count_sp_t_inst
+	///
+	/// This is a version of ref_count_sp which is used to actually hold an instance of
+	/// T (instead of a pointer). This is useful to allocate the object and ref count
+	/// in a single memory allocation.
+	template<typename T, typename Allocator>
+	class ref_count_sp_t_inst : public ref_count_sp
+	{
+	public:
+		typedef ref_count_sp_t_inst<T, Allocator>                                        this_type;
+		typedef T                                                                        value_type;
+		typedef Allocator                                                                allocator_type;
+		typedef typename aligned_storage<sizeof(T), eastl::alignment_of<T>::value>::type storage_type;
+
+		storage_type   mMemory;
+		allocator_type mAllocator;
+
+		value_type* GetValue() { return static_cast<value_type*>(static_cast<void*>(&mMemory)); }
+
+		template <typename... Args>
+		ref_count_sp_t_inst(allocator_type allocator, Args&&... args)
+			: ref_count_sp(), mAllocator(eastl::move(allocator))
+		{
+			new (&mMemory) value_type(eastl::forward<Args>(args)...);
+		}
+
+		void free_value() EA_NOEXCEPT
+		{
+			GetValue()->~value_type();
+		}
+
+		void free_ref_count_sp() EA_NOEXCEPT
+		{
+			allocator_type allocator = mAllocator;
+			this->~ref_count_sp_t_inst();
+			EASTLFree(allocator, this, sizeof(*this));
+		}
+
+		#if EASTL_RTTI_ENABLED
+			void* get_deleter(const std::type_info&) const EA_NOEXCEPT
+			{
+				return nullptr; // Default base implementation.
+			}
+		#else
+			void* get_deleter() const EA_NOEXCEPT
+			{
+				return nullptr;
+			}
+		#endif
+	};
+
+
+	/// do_enable_shared_from_this
+	///
+	/// If a user calls this function, it sets up mWeakPtr member of 
+	/// the enable_shared_from_this parameter to point to the ref_count_sp 
+	/// object that's passed in. Normally, the user doesn't need to call 
+	/// this function, as the shared_ptr constructor will do it for them.
+	///
+	template <typename T, typename U>
+	void do_enable_shared_from_this(const ref_count_sp* pRefCount,
+	                                const enable_shared_from_this<T>* pEnableSharedFromThis,
+	                                const U* pValue)
+	{
+		if (pEnableSharedFromThis)
+			pEnableSharedFromThis->mWeakPtr.assign(const_cast<U*>(pValue), const_cast<ref_count_sp*>(pRefCount));
+	}
+
+	inline void do_enable_shared_from_this(const ref_count_sp*, ...) {} // Empty specialization. This no-op version is
+	                                                                    // called by shared_ptr when shared_ptr's T type
+	                                                                    // is anything but an enabled_shared_from_this
+	                                                                    // class.
+
+
+	/// shared_ptr_traits
+	/// This exists for the sole purpose of creating a typedef called
+	/// reference_type which is specialized for type void. The reason
+	/// for this is that shared_ptr::operator*() returns a reference
+	/// to T but if T is void, it needs to return void, not *void, 
+	/// as the latter is not valid C++.
+	template <typename T> struct shared_ptr_traits
+		{ typedef T& reference_type; };
+
+	template <> struct shared_ptr_traits<void>
+		{ typedef void reference_type; };
+
+	template <> struct shared_ptr_traits<void const>
+		{ typedef void reference_type; };
+
+	template <> struct shared_ptr_traits<void volatile>
+		{ typedef void reference_type; };
+
+	template <> struct shared_ptr_traits<void const volatile>
+		{ typedef void reference_type; };
+
+
+
+	/// shared_ptr
+	///
+	/// This class implements the C++11 shared_ptr template. A shared_ptr is like the C++
+	/// Standard Library unique_ptr except that it allows sharing of pointers between
+	/// instances via reference counting. shared_ptr objects can safely be copied and
+	/// can safely be used in C++ Standard Library containers such as std::vector or
+	/// std::list.
+	///
+	/// This class is not thread safe in that you cannot use an instance of it from 
+	/// two threads at the same time and cannot use two separate instances of it, which 
+	/// own the same pointer, at the same time. Use standard multithread mutex techniques
+	/// to address the former problems and use shared_ptr_mt to address the latter.
+	/// Note that this is contrary to the C++11 standard.
+	///
+	/// As of this writing, arrays aren't supported, but they are planned in the future 
+	/// based on the C++17 proposal: http://isocpp.org/files/papers/N3920.html
+	///
+	template <typename T>
+	class shared_ptr
+	{
+	public:
+		typedef shared_ptr<T>                                    this_type;
+		typedef T                                                element_type; 
+		typedef typename shared_ptr_traits<T>::reference_type    reference_type;   // This defines what a reference to a T is. It's always simply T&, except for the case where T is void, whereby the reference is also just void.
+		typedef EASTLAllocatorType                               default_allocator_type;
+		typedef default_delete<T>                                default_deleter_type;
+		typedef weak_ptr<T>                                      weak_type;
+
+	protected:
+		element_type*  mpValue;
+		ref_count_sp*  mpRefCount;           /// Base pointer to Reference count for owned pointer and the owned pointer.
+
+	public:
+		/// Initializes and "empty" shared_ptr.
+		/// Postcondition: use_count() == zero and get() == 0
+		shared_ptr() EA_NOEXCEPT
+			: mpValue(nullptr),
+			  mpRefCount(nullptr)
+		{
+			// Intentionally leaving mpRefCount as NULL. Can't allocate here due to noexcept.
+		}
+
+		/// Takes ownership of the pointer and sets the reference count
+		/// to the pointer to 1. It is OK if the input pointer is null.
+		/// The shared reference count is allocated on the heap using the
+		/// default eastl allocator.
+		/// Throws: bad_alloc, or an implementation-defined exception when 
+		///         a resource other than memory could not be obtained.
+		/// Exception safety: If an exception is thrown, delete p is called.
+		/// Postcondition in the event of no exception: use_count() == 1 && get() == p
+		template <typename U>
+		explicit shared_ptr(U* pValue,
+		                    typename eastl::enable_if<eastl::is_convertible<U*, element_type*>::value>::type* = 0)
+		    : mpValue(nullptr), mpRefCount(nullptr) // alloc_internal will set this.
+		{
+			// We explicitly use default_delete<U>. You can use the other version of this constructor to provide a
+			// custom version.
+			alloc_internal(pValue, default_allocator_type(),
+			               default_delete<U>()); // Problem: We want to be able to use default_deleter_type() instead of
+			                                     // default_delete<U>, but if default_deleter_type's type is void or
+			                                     // otherwise mismatched then this will fail to compile. What we really
+			                                     // want to be able to do is "rebind" default_allocator_type to U
+			                                     // instead of its original type.
+		}
+
+
+		shared_ptr(std::nullptr_t) EA_NOEXCEPT
+			: mpValue(nullptr),
+			  mpRefCount(nullptr)
+		{
+			// Intentionally leaving mpRefCount as NULL. Can't allocate here due to noexcept.
+		}
+
+
+		/// Takes ownership of the pointer and sets the reference count
+		/// to the pointer to 1. It is OK if the input pointer is null.
+		/// The shared reference count is allocated on the heap using the
+		/// default eastl allocator. The pointer will be disposed using the
+		/// provided deleter.
+		/// If an exception occurs during the allocation of the shared 
+		/// reference count, the owned pointer is deleted and the exception
+		/// is rethrown.
+		/// Postcondition: use_count() == 1 && get() == p
+		template <typename U, typename Deleter>
+		shared_ptr(U* pValue,
+		           Deleter deleter,
+		           typename eastl::enable_if<eastl::is_convertible<U*, element_type*>::value>::type* = 0)
+		    : mpValue(nullptr), mpRefCount(nullptr)
+		{
+			alloc_internal(pValue, default_allocator_type(), eastl::move(deleter));
+		}
+
+		template <typename Deleter>
+		shared_ptr(std::nullptr_t, Deleter deleter)
+		    : mpValue(nullptr), mpRefCount(nullptr) // alloc_internal will set this.
+		{
+			alloc_internal(nullptr, default_allocator_type(), eastl::move(deleter));
+		}
+
+
+		/// Takes ownership of the pointer and sets the reference count
+		/// to the pointer to 1. It is OK if the input pointer is null.
+		/// The shared reference count is allocated on the heap using the
+		/// supplied allocator. The pointer will be disposed using the
+		/// provided deleter.
+		/// If an exception occurs during the allocation of the shared 
+		/// reference count, the owned pointer is deleted and the exception
+		/// is rethrown.
+		/// Postcondition: use_count() == 1 && get() == p
+		template <typename U, typename Deleter, typename Allocator>
+		explicit shared_ptr(U* pValue,
+		                    Deleter deleter,
+		                    const Allocator& allocator,
+		                    typename eastl::enable_if<eastl::is_convertible<U*, element_type*>::value>::type* = 0)
+		    : mpValue(nullptr), mpRefCount(nullptr) // alloc_internal will set this.
+		{
+			alloc_internal(pValue, eastl::move(allocator), eastl::move(deleter));
+		}
+
+		template <typename Deleter, typename Allocator>
+		shared_ptr(std::nullptr_t, Deleter deleter, Allocator allocator)
+			: mpValue(nullptr),
+			  mpRefCount(nullptr) // alloc_internal will set this.
+		{
+			alloc_internal(nullptr, eastl::move(allocator), eastl::move(deleter));
+		}
+
+
+		/// shared_ptr
+		/// construction with self type.
+		/// If we want a shared_ptr constructor that is templated on shared_ptr<U>,
+		/// then we need to make it in addition to this function, as otherwise 
+		/// the compiler will generate this function and things will go wrong.
+		/// To accomplish this in a thread-safe way requires use of shared_ptr atomic_store.
+		shared_ptr(const shared_ptr& sharedPtr) EA_NOEXCEPT
+			: mpValue(sharedPtr.mpValue),
+			  mpRefCount(sharedPtr.mpRefCount)
+		{
+			if(mpRefCount)
+				mpRefCount->addref();
+		}
+
+
+		/// shared_ptr
+		/// Shares ownership of a pointer with another instance of shared_ptr.
+		/// This function increments the shared reference count on the pointer.
+		/// To accomplish this in a thread-safe way requires use of shared_ptr atomic_store.
+		template <typename U>
+		shared_ptr(const shared_ptr<U>& sharedPtr,
+		           typename eastl::enable_if<eastl::is_convertible<U*, element_type*>::value>::type* = 0) EA_NOEXCEPT
+		    : mpValue(sharedPtr.mpValue),
+		      mpRefCount(sharedPtr.mpRefCount)
+		{
+			if (mpRefCount)
+				mpRefCount->addref();
+		}
+
+
+		/// shared_ptr
+		/// 
+		/// 20.7.2.2.1p13: Constructs a shared_ptr instance that stores p and shares ownership with r.
+		/// Postconditions: get() == pValue && use_count() == sharedPtr.use_count().
+		/// To avoid the possibility of a dangling pointer, the user of this constructor must 
+		/// ensure that pValue remains valid at least until the ownership group of sharedPtr is destroyed. 
+		/// This constructor allows creation of an empty shared_ptr instance with a non-NULL stored pointer. 
+		///
+		/// Shares ownership of a pointer with another instance of shared_ptr while storing a potentially 
+		/// different pointer. This function increments the shared reference count on the sharedPtr if it exists.
+		/// If sharedPtr has no shared reference then a shared reference is not created an pValue is not 
+		/// deleted in our destructor and effectively the pointer is not actually shared.
+		///
+		/// To accomplish this in a thread-safe way requires the user to maintain the lifetime of sharedPtr
+		/// as described above.
+		///
+		template <typename U>
+		shared_ptr(const shared_ptr<U>& sharedPtr, element_type* pValue) EA_NOEXCEPT
+			: mpValue(pValue),
+			  mpRefCount(sharedPtr.mpRefCount)
+		{
+			if(mpRefCount)
+				mpRefCount->addref();
+		}
+
+
+		shared_ptr(shared_ptr&& sharedPtr) EA_NOEXCEPT
+			: mpValue(sharedPtr.mpValue),
+			  mpRefCount(sharedPtr.mpRefCount)
+		{
+			sharedPtr.mpValue = nullptr;
+			sharedPtr.mpRefCount = nullptr;
+		}
+
+
+		template <typename U>
+		shared_ptr(shared_ptr<U>&& sharedPtr,
+		           typename eastl::enable_if<eastl::is_convertible<U*, element_type*>::value>::type* = 0) EA_NOEXCEPT
+		    : mpValue(sharedPtr.mpValue),
+		      mpRefCount(sharedPtr.mpRefCount)
+		{
+			sharedPtr.mpValue = nullptr;
+			sharedPtr.mpRefCount = nullptr;
+		}
+
+		// unique_ptr constructor
+		template <typename U, typename Deleter>
+		shared_ptr(unique_ptr<U, Deleter>&& uniquePtr,
+		           typename eastl::enable_if<!eastl::is_array<U>::value && !is_lvalue_reference<Deleter>::value &&
+		                                     eastl::is_convertible<U*, element_type*>::value>::type* = 0)
+		    : mpValue(nullptr), mpRefCount(nullptr)
+		{
+			alloc_internal(uniquePtr.release(), default_allocator_type(), uniquePtr.get_deleter());
+		}
+
+		// unique_ptr constructor
+		// The following is not in the C++11 Standard.
+		template <typename U, typename Deleter, typename Allocator>
+		shared_ptr(unique_ptr<U, Deleter>&& uniquePtr,
+		           const Allocator& allocator,
+		           typename eastl::enable_if<!eastl::is_array<U>::value && !is_lvalue_reference<Deleter>::value &&
+		                                     eastl::is_convertible<U*, element_type*>::value>::type* = 0)
+		    : mpValue(nullptr), mpRefCount(nullptr)
+		{
+			alloc_internal(uniquePtr.release(), allocator, uniquePtr.get_deleter());
+		}
+
+
+		/// shared_ptr(weak_ptr)
+		/// Shares ownership of a pointer with an instance of weak_ptr.
+		/// This function increments the shared reference count on the pointer.
+		template <typename U>
+		explicit shared_ptr(const weak_ptr<U>& weakPtr,
+		                    typename eastl::enable_if<eastl::is_convertible<U*, element_type*>::value>::type* = 0)
+		    : mpValue(weakPtr.mpValue)
+		    , mpRefCount(weakPtr.mpRefCount ?
+		                     weakPtr.mpRefCount->lock() :
+		                     weakPtr.mpRefCount) // mpRefCount->lock() addref's the return value for us.
+		{
+			if (!mpRefCount)
+			{
+				mpValue = nullptr; // Question: Is it right for us to NULL this or not?
+
+			#if EASTL_EXCEPTIONS_ENABLED
+				throw eastl::bad_weak_ptr();
+			#else
+				EASTL_FAIL_MSG("eastl::shared_ptr -- bad_weak_ptr");
+			#endif
+			}
+		}
+
+
+		/// ~shared_ptr
+		/// Decrements the reference count for the owned pointer. If the 
+		/// reference count goes to zero, the owned pointer is deleted and
+		/// the shared reference count is deleted.
+		~shared_ptr()
+		{
+			if (mpRefCount)
+			{
+				mpRefCount->release();
+			}
+			// else if mpValue is non-NULL then we just lose it because it wasn't actually shared (can happen with
+			// shared_ptr(const shared_ptr<U>& sharedPtr, element_type* pValue) constructor).
+
+			#if EASTL_DEBUG
+				mpValue = nullptr;
+				mpRefCount = nullptr;
+			#endif
+		}
+
+
+		// The following is disabled because it is not specified by the C++11 Standard, as it leads to 
+		// potential collisions. Use the reset(p) and reset() functions instead.
+		//
+		// template <typename U>
+		// typename eastl::enable_if<eastl::is_convertible<U*, element_type*>::value, this_type&>::type
+		// operator=(const U* pValue) EA_NOEXCEPT
+		// {
+		//     reset(pValue);
+		//     return *this;
+		// }
+		//
+		// template <typename U>
+		// this_type& operator=(std::nullptr_t) EA_NOEXCEPT
+		// {
+		//     reset();
+		//     return *this;
+		// }
+
+
+		/// operator=
+		/// Assignment to self type.
+		/// If we want a shared_ptr operator= that is templated on shared_ptr<U>,
+		/// then we need to make it in addition to this function, as otherwise 
+		/// the compiler will generate this function and things will go wrong.
+		shared_ptr& operator=(const shared_ptr& sharedPtr) EA_NOEXCEPT
+		{
+			if(&sharedPtr != this)
+				this_type(sharedPtr).swap(*this);
+
+			return *this;
+		}
+
+
+		/// operator=
+		/// Copies another shared_ptr to this object. Note that this object
+		/// may already own a shared pointer with another different pointer
+		/// (but still of the same type) before this call. In that case,
+		/// this function releases the old pointer, decrementing its reference
+		/// count and deleting it if zero, takes shared ownership of the new 
+		/// pointer and increments its reference count.
+		template <typename U>
+		typename eastl::enable_if<eastl::is_convertible<U*, element_type*>::value, this_type&>::type
+		operator=(const shared_ptr<U>& sharedPtr) EA_NOEXCEPT
+		{
+			if(!equivalent_ownership(sharedPtr))
+				this_type(sharedPtr).swap(*this);
+			return *this;
+		}
+
+
+		/// operator=
+		/// Assignment to self type.
+		/// If we want a shared_ptr operator= that is templated on shared_ptr<U>,
+		/// then we need to make it in addition to this function, as otherwise 
+		/// the compiler will generate this function and things will go wrong.
+		this_type& operator=(shared_ptr&& sharedPtr) EA_NOEXCEPT
+		{
+			if(&sharedPtr != this)
+				this_type(eastl::move(sharedPtr)).swap(*this);
+
+			return *this;
+		}
+
+
+		/// operator=
+		/// Moves another shared_ptr to this object. Note that this object
+		/// may already own a shared pointer with another different pointer
+		/// (but still of the same type) before this call. In that case,
+		/// this function releases the old pointer, decrementing its reference
+		/// count and deleting it if zero, takes shared ownership of the new 
+		/// pointer and increments its reference count.
+		template <typename U>
+		typename eastl::enable_if<eastl::is_convertible<U*, element_type*>::value, this_type&>::type
+		operator=(shared_ptr<U>&& sharedPtr) EA_NOEXCEPT
+		{
+			if(!equivalent_ownership(sharedPtr))
+				shared_ptr(eastl::move(sharedPtr)).swap(*this);
+			return *this;
+		}
+
+
+		// unique_ptr operator=
+		template <typename U, typename Deleter>
+		typename eastl::enable_if<!eastl::is_array<U>::value && eastl::is_convertible<U*, element_type*>::value, this_type&>::type
+		operator=(unique_ptr<U, Deleter>&& uniquePtr)
+		{
+			// Note that this will use the default EASTL allocator
+			this_type(eastl::move(uniquePtr)).swap(*this);
+			return *this;
+		}
+
+
+		/// reset
+		/// Releases the owned pointer.
+		void reset() EA_NOEXCEPT
+		{
+			this_type().swap(*this);
+		}
+
+
+		/// reset
+		/// Releases the owned pointer and takes ownership of the 
+		/// passed in pointer.
+		template <typename U>
+		typename eastl::enable_if<eastl::is_convertible<U*, element_type*>::value, void>::type
+		reset(U* pValue)
+		{
+			this_type(pValue).swap(*this);
+		}
+
+
+		/// reset
+		/// Releases the owned pointer and takes ownership of the 
+		/// passed in pointer.
+		template <typename U, typename Deleter>
+		typename eastl::enable_if<eastl::is_convertible<U*, element_type*>::value, void>::type
+		reset(U* pValue, Deleter deleter)
+		{
+			shared_ptr(pValue, deleter).swap(*this);
+		}
+
+
+		/// reset
+		/// Resets the shared_ptr
+		template <typename U, typename Deleter, typename Allocator>
+		typename eastl::enable_if<eastl::is_convertible<U*, element_type*>::value, void>::type
+		reset(U* pValue, Deleter deleter, const Allocator& allocator)
+		{
+			shared_ptr(pValue, deleter, allocator).swap(*this);
+		}
+
+
+		/// swap
+		/// Exchanges the owned pointer between two shared_ptr objects.
+		/// This function is not intrinsically thread-safe. You must use atomic_exchange(shared_ptr<T>*, shared_ptr<T>)
+		/// or manually coordinate the swap.
+		void swap(this_type& sharedPtr) EA_NOEXCEPT
+		{
+			element_type* const pValue = sharedPtr.mpValue;
+			sharedPtr.mpValue = mpValue;
+			mpValue           = pValue;
+
+			ref_count_sp* const pRefCount = sharedPtr.mpRefCount;
+			sharedPtr.mpRefCount = mpRefCount;
+			mpRefCount           = pRefCount;
+		}
+
+
+		/// operator*
+		/// Returns the owner pointer dereferenced.
+		/// Example usage:
+		///    shared_ptr<int> ptr(new int(3));
+		///    int x = *ptr;
+		reference_type operator*() const EA_NOEXCEPT
+		{
+			return *mpValue;
+		}
+
+		/// operator->
+		/// Allows access to the owned pointer via operator->()
+		/// Example usage:
+		///    struct X{ void DoSomething(); }; 
+		///    shared_ptr<int> ptr(new X);
+		///    ptr->DoSomething();
+		element_type* operator->() const EA_NOEXCEPT
+		{
+			// assert(mpValue);
+			return mpValue;
+		}
+
+		/// operator[]
+		/// Index into the array pointed to by the owned pointer.
+		/// The behaviour is undefined if the owned pointer is nullptr, if the user specified index is negative, or if
+		/// the index is outside the referred array bounds.
+		///
+		/// When T is not an array type, it is unspecified whether this function is declared. If the function is declared,
+		/// it is unspecified what its return type is, except that the declaration (although not necessarily the
+		/// definition) of the function is guaranteed to be legal.
+		//
+		// TODO(rparolin): This is disabled because eastl::shared_ptr needs array support.
+		// element_type& operator[](ptrdiff_t idx)
+		// {
+		//     return get()[idx];
+		// }
+
+		/// get
+		/// Returns the owned pointer. Note that this class does 
+		/// not provide an operator T() function. This is because such
+		/// a thing (automatic conversion) is deemed unsafe.
+		/// Example usage:
+		///    struct X{ void DoSomething(); }; 
+		///    shared_ptr<int> ptr(new X);
+		///    X* pX = ptr.get();
+		///    pX->DoSomething();
+		element_type* get() const EA_NOEXCEPT
+		{
+			return mpValue;
+		}
+
+		/// use_count
+		/// Returns: the number of shared_ptr objects, *this included, that share ownership with *this, or 0 when *this is empty.
+		int use_count() const EA_NOEXCEPT
+		{
+			return mpRefCount ? mpRefCount->mRefCount : 0;
+		}
+
+		/// unique
+		/// Returns: use_count() == 1.
+		bool unique() const EA_NOEXCEPT
+		{
+			return (mpRefCount && (mpRefCount->mRefCount == 1));
+		}
+
+
+		/// owner_before
+		/// C++11 function for ordering.
+		template <typename U>
+		bool owner_before(const shared_ptr<U>& sharedPtr) const EA_NOEXCEPT
+		{
+			return (mpRefCount < sharedPtr.mpRefCount);
+		}
+
+		template <typename U>
+		bool owner_before(const weak_ptr<U>& weakPtr) const EA_NOEXCEPT
+		{
+			return (mpRefCount < weakPtr.mpRefCount);
+		}
+
+
+		template <typename Deleter>
+		Deleter* get_deleter() const EA_NOEXCEPT
+		{
+			#if EASTL_RTTI_ENABLED
+				return mpRefCount ? static_cast<Deleter*>(mpRefCount->get_deleter(typeid(typename remove_cv<Deleter>::type))) : nullptr;
+			#else
+				// This is probably unsafe but without typeid there is no way to ensure that the
+				// stored deleter is actually of the templated Deleter type.
+				return nullptr;
+
+				// Alternatively:
+				// return mpRefCount ? static_cast<Deleter*>(mpRefCount->get_deleter()) : nullptr;
+			#endif
+		}
+
+		#ifdef EA_COMPILER_NO_EXPLICIT_CONVERSION_OPERATORS
+			/// Note that below we do not use operator bool(). The reason for this
+			/// is that booleans automatically convert up to short, int, float, etc.
+			/// The result is that this: if(sharedPtr == 1) would yield true (bad).
+			typedef T* (this_type::*bool_)() const;
+			operator bool_() const EA_NOEXCEPT
+			{
+				if(mpValue)
+					return &this_type::get;
+				return nullptr;
+			}
+
+			bool operator!() const EA_NOEXCEPT
+			{
+				return (mpValue == nullptr);
+			}
+		#else
+			/// Explicit operator bool
+			/// Allows for using a shared_ptr as a boolean. 
+			/// Example usage:
+			///    shared_ptr<int> ptr(new int(3));
+			///    if(ptr)
+			///        ++*ptr;
+			explicit operator bool() const EA_NOEXCEPT
+			{
+				return (mpValue != nullptr);
+			}
+		#endif
+
+		/// Returns true if the given shared_ptr ows the same T pointer that we do.
+		template <typename U>
+		bool equivalent_ownership(const shared_ptr<U>& sharedPtr) const
+		{
+			// We compare mpRefCount instead of mpValue, because it's feasible that there are two sets of shared_ptr 
+			// objects that are unconnected to each other but happen to own the same value pointer. 
+			return (mpRefCount == sharedPtr.mpRefCount); 
+		}
+
+	protected:
+		// Friend declarations.
+		template <typename U> friend class shared_ptr;
+		template <typename U> friend class weak_ptr;
+		template <typename U> friend void allocate_shared_helper(shared_ptr<U>&, ref_count_sp*, U*);
+
+		// Handles the allocating of mpRefCount, while assigning mpValue.
+		// The provided pValue may be NULL, as with constructing with a deleter and allocator but NULL pointer.
+		template <typename U, typename Allocator, typename Deleter>
+		void alloc_internal(U pValue, Allocator allocator, Deleter deleter)
+		{
+			typedef ref_count_sp_t<U, Allocator, Deleter> ref_count_type;
+
+			#if EASTL_EXCEPTIONS_ENABLED
+				try
+				{
+					void* const pMemory = EASTLAlloc(allocator, sizeof(ref_count_type));
+					if(!pMemory) 
+						throw std::bad_alloc();
+					mpRefCount = ::new(pMemory) ref_count_type(pValue, eastl::move(deleter), eastl::move(allocator));
+					mpValue = pValue;
+					do_enable_shared_from_this(mpRefCount, pValue, pValue);
+				}
+				catch(...) // The exception would usually be std::bad_alloc.
+				{
+					deleter(pValue); // 20.7.2.2.1 p7: If an exception is thrown, delete p is called.
+					throw;           // Throws: bad_alloc, or an implementation-defined exception when a resource other than memory could not be obtained.
+				}
+			#else
+				void* const pMemory = EASTLAlloc(allocator, sizeof(ref_count_type));
+				if(pMemory)
+				{
+					mpRefCount = ::new(pMemory) ref_count_type(pValue, eastl::move(deleter), eastl::move(allocator));
+					mpValue = pValue;
+					do_enable_shared_from_this(mpRefCount, pValue, pValue);
+				}
+				else
+				{
+					deleter(pValue);    // We act the same as we do above with exceptions enabled.
+				}
+			#endif
+		}
+
+	}; // class shared_ptr
+
+
+	/// get_pointer
+	/// returns shared_ptr::get() via the input shared_ptr. 
+	template <typename T>
+	inline typename shared_ptr<T>::element_type* get_pointer(const shared_ptr<T>& sharedPtr) EA_NOEXCEPT
+	{
+		return sharedPtr.get();
+	}
+
+	/// get_deleter
+	/// returns the deleter in the input shared_ptr.
+	template <typename Deleter, typename T>
+	Deleter* get_deleter(const shared_ptr<T>& sharedPtr) EA_NOEXCEPT
+	{
+		return sharedPtr.template get_deleter<Deleter>();
+	}
+
+	/// swap
+	/// Exchanges the owned pointer beween two shared_ptr objects.
+	/// This non-member version is useful for compatibility of shared_ptr
+	/// objects with the C++ Standard Library and other libraries.
+	template <typename T>
+	inline void swap(shared_ptr<T>& a, shared_ptr<T>& b) EA_NOEXCEPT
+	{
+		a.swap(b);
+	}
+
+
+	/// shared_ptr comparison operators
+	template <typename T, typename U> 
+	inline bool operator==(const shared_ptr<T>& a, const shared_ptr<U>& b) EA_NOEXCEPT
+	{
+		// assert((a.get() != b.get()) || (a.use_count() == b.use_count()));
+		return (a.get() == b.get());
+	}
+
+	template <typename T, typename U> 
+	inline bool operator!=(const shared_ptr<T>& a, const shared_ptr<U>& b) EA_NOEXCEPT
+	{
+		// assert((a.get() != b.get()) || (a.use_count() == b.use_count()));
+		return (a.get() != b.get());
+	}
+
+	template <typename T, typename U> 
+	inline bool operator<(const shared_ptr<T>& a, const shared_ptr<U>& b) EA_NOEXCEPT
+	{
+		//typedef typename eastl::common_type<T*, U*>::type CPointer;
+		//return less<CPointer>()(a.get(), b.get());
+
+		typedef typename eastl::common_type<T*, U*>::type CPointer; // We currently need to make these temporary variables, as otherwise clang complains about CPointer being int*&&&.
+		CPointer pT = a.get();                                      // I wonder if there's something wrong with our common_type type trait implementation.
+		CPointer pU = b.get();                                      // "in instantiation of function template specialization 'eastl::operator<<int, int>, no known conversion from 'element_type *' (aka 'int *') to 'int *&&&' for 1st argument"
+		return less<CPointer>()(pT, pU);                            // It looks like common_type is making CPointer be (e.g.) int*&& instead of int*, though the problem may be in how less<> deals with that.
+	}
+
+	template <typename T, typename U> 
+	inline bool operator>(const shared_ptr<T>& a, const shared_ptr<U>& b) EA_NOEXCEPT
+	{
+		return (b < a);
+	}
+
+	template <typename T, typename U> 
+	inline bool operator<=(const shared_ptr<T>& a, const shared_ptr<U>& b) EA_NOEXCEPT
+	{
+		return !(b < a);
+	}
+
+	template <typename T, typename U> 
+	inline bool operator>=(const shared_ptr<T>& a, const shared_ptr<U>& b) EA_NOEXCEPT
+	{
+		return !(a < b);
+	}
+
+	template <typename T>
+	inline bool operator==(const shared_ptr<T>& a, std::nullptr_t) EA_NOEXCEPT
+	{
+		return !a;
+	}
+
+	template <typename T>
+	inline bool operator==(std::nullptr_t, const shared_ptr<T>& b) EA_NOEXCEPT
+	{
+		return !b;
+	}
+
+	template <typename T>
+	inline bool operator!=(const shared_ptr<T>& a, std::nullptr_t) EA_NOEXCEPT
+	{
+		return static_cast<bool>(a);
+	}
+
+	template <typename T>
+	inline bool operator!=(std::nullptr_t, const shared_ptr<T>& b) EA_NOEXCEPT
+	{
+		return static_cast<bool>(b);
+	}
+
+	template <typename T>
+	inline bool operator<(const shared_ptr<T>& a, std::nullptr_t) EA_NOEXCEPT
+	{
+		return less<T*>()(a.get(), nullptr);
+	}
+
+	template <typename T>
+	inline bool operator<(std::nullptr_t, const shared_ptr<T>& b) EA_NOEXCEPT
+	{
+		return less<T*>()(nullptr, b.get());
+	}
+
+	template <typename T>
+	inline bool operator>(const shared_ptr<T>& a, std::nullptr_t) EA_NOEXCEPT
+	{
+		return (nullptr < a);
+	}
+
+	template <typename T>
+	inline bool operator>(std::nullptr_t, const shared_ptr<T>& b) EA_NOEXCEPT
+	{
+		return (b < nullptr);
+	}
+
+	template <typename T>
+	inline bool operator<=(const shared_ptr<T>& a, std::nullptr_t) EA_NOEXCEPT
+	{
+		return !(nullptr < a);
+	}
+
+	template <typename T>
+	inline bool operator<=(std::nullptr_t, const shared_ptr<T>& b) EA_NOEXCEPT
+	{
+		return !(b < nullptr);
+	}
+
+	template <typename T>
+	inline bool operator>=(const shared_ptr<T>& a, std::nullptr_t) EA_NOEXCEPT
+	{
+		return !(a < nullptr);
+	}
+
+	template <typename T>
+	inline bool operator>=(std::nullptr_t, const shared_ptr<T>& b) EA_NOEXCEPT
+	{
+		return !(nullptr < b);
+	}
+
+
+
+
+	/// reinterpret_pointer_cast
+	///
+	/// Returns a shared_ptr<T> reinterpret-casted from a const shared_ptr<U>&.
+	/// http://isocpp.org/files/papers/N3920.html
+	///
+	/// Requires: The expression reinterpret_cast<T*>(sharedPtr.get()) shall be well formed.
+	/// Returns: If sharedPtr is empty, an empty shared_ptr<T>; otherwise, a shared_ptr<T>
+	///          object that stores const_cast<T*>(sharedPtr.get()) and shares ownership with sharedPtr.
+	/// Postconditions: w.get() == const_cast<T*>(sharedPtr.get()) and w.use_count() == sharedPtr.use_count(), 
+	///                 where w is the return value.
+	template <typename T, typename U>
+	inline shared_ptr<T> reinterpret_pointer_cast(shared_ptr<U> const& sharedPtr) EA_NOEXCEPT
+	{
+		return shared_ptr<T>(sharedPtr, reinterpret_cast<T*>(sharedPtr.get()));
+	}
+
+
+	/// static_pointer_cast
+	///
+	/// Returns a shared_ptr<T> static-casted from a shared_ptr<U>&.
+	///
+	/// Requires: The expression const_cast<T*>(sharedPtr.get()) shall be well formed.
+	/// Returns: If sharedPtr is empty, an empty shared_ptr<T>; otherwise, a shared_ptr<T> 
+	/// object that stores const_cast<T*>(sharedPtr.get()) and shares ownership with sharedPtr.
+	/// Postconditions: w.get() == const_cast<T*>(sharedPtr.get()) and w.use_count() == sharedPtr.use_count(), 
+	///                 where w is the return value.
+	template <typename T, typename U> 
+	inline shared_ptr<T> static_pointer_cast(const shared_ptr<U>& sharedPtr) EA_NOEXCEPT
+	{
+		return shared_ptr<T>(sharedPtr, static_cast<T*>(sharedPtr.get()));
+	}
+
+	template <typename T, typename U> // Retained for support for pre-C++11 shared_ptr.
+	inline shared_ptr<T> static_shared_pointer_cast(const shared_ptr<U>& sharedPtr) EA_NOEXCEPT
+		{ return static_pointer_cast<T, U>(sharedPtr); }
+
+
+
+	/// const_pointer_cast
+	///
+	/// Returns a shared_ptr<T> const-casted from a const shared_ptr<U>&.
+	/// Normally, this means that the source shared_ptr holds a const data type.
+	//
+	/// Requires: The expression const_cast<T*>(sharedPtr.get()) shall be well formed.
+	/// Returns: If sharedPtr is empty, an empty shared_ptr<T>; otherwise, a shared_ptr<T>
+	///          object that stores const_cast<T*>(sharedPtr.get()) and shares ownership with sharedPtr.
+	/// Postconditions: w.get() == const_cast<T*>(sharedPtr.get()) and w.use_count() == sharedPtr.use_count(), 
+	///                 where w is the return value.
+	template <typename T, typename U> 
+	inline shared_ptr<T> const_pointer_cast(const shared_ptr<U>& sharedPtr) EA_NOEXCEPT
+	{
+		return shared_ptr<T>(sharedPtr, const_cast<T*>(sharedPtr.get()));
+	}
+
+	template <typename T, typename U> // Retained for support for pre-C++11 shared_ptr.
+	inline shared_ptr<T> const_shared_pointer_cast(const shared_ptr<U>& sharedPtr) EA_NOEXCEPT
+		{ return const_pointer_cast<T, U>(sharedPtr); }
+
+
+
+	#if EASTL_RTTI_ENABLED
+		/// dynamic_pointer_cast
+		/// 
+		/// Returns a shared_ptr<T> dynamic-casted from a const shared_ptr<U>&.
+		/// 
+		/// Requires: The expression dynamic_cast<T*>(sharedPtr.get()) shall be well formed and shall have well defined behavior.
+		/// Returns: When dynamic_cast<T*>(sharedPtr.get()) returns a nonzero value, a shared_ptr<T> object that stores 
+		///          a copy of it and shares ownership with sharedPtr; Otherwise, an empty shared_ptr<T> object.
+		/// Postcondition: w.get() == dynamic_cast<T*>(sharedPtr.get()), where w is the return value
+		/// 
+		template <typename T, typename U>
+		inline shared_ptr<T> dynamic_pointer_cast(const shared_ptr<U>& sharedPtr) EA_NOEXCEPT
+		{
+			if(T* p = dynamic_cast<T*>(sharedPtr.get()))
+				return shared_ptr<T>(sharedPtr, p);
+			return shared_ptr<T>();
+		}
+
+		template <typename T, typename U> // Retained for support for pre-C++11 shared_ptr.
+		inline typename eastl::enable_if<!eastl::is_array<T>::value && !eastl::is_array<U>::value, shared_ptr<T> >::type
+		dynamic_shared_pointer_cast(const shared_ptr<U>& sharedPtr) EA_NOEXCEPT
+			{ return dynamic_pointer_cast<T, U>(sharedPtr); }
+	#endif
+
+
+	/// hash specialization for shared_ptr.
+	/// It simply returns eastl::hash(x.get()). If your unique_ptr pointer type (the return value of shared_ptr<T>::get) is 
+	/// a custom type and not a built-in pointer type then you will need to independently define eastl::hash for that type.
+	template <typename T> 
+	struct hash< shared_ptr<T> >
+	{ 
+		size_t operator()(const shared_ptr<T>& x) const EA_NOEXCEPT
+			{ return eastl::hash<T*>()(x.get()); }
+	};
+
+
+	template <typename T>
+	void allocate_shared_helper(eastl::shared_ptr<T>& sharedPtr, ref_count_sp* pRefCount, T* pValue)
+	{
+		sharedPtr.mpRefCount = pRefCount;
+		sharedPtr.mpValue = pValue;
+		do_enable_shared_from_this(pRefCount, pValue, pValue);
+	}
+
+	template <typename T, typename Allocator, typename... Args>
+	shared_ptr<T> allocate_shared(const Allocator& allocator, Args&&... args)
+	{
+		typedef ref_count_sp_t_inst<T, Allocator> ref_count_type;
+		shared_ptr<T> ret;
+		void* const pMemory = EASTLAlloc(const_cast<Allocator&>(allocator), sizeof(ref_count_type));
+		if(pMemory)
+		{
+			ref_count_type* pRefCount = ::new(pMemory) ref_count_type(allocator, eastl::forward<Args>(args)...);
+			allocate_shared_helper(ret, pRefCount, pRefCount->GetValue());
+		}
+		return ret;
+	}
+
+	template <typename T, typename... Args>
+	shared_ptr<T> make_shared(Args&&... args)
+	{
+		// allocate with the default allocator.
+		return eastl::allocate_shared<T>(EASTL_SHARED_PTR_DEFAULT_ALLOCATOR, eastl::forward<Args>(args)...);
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// shared_ptr atomic access
+	//
+	// These functions allow shared_ptr to act like other C++11 atomic operations.
+	// So the same way you can use atomic_load on a raw pointer, you can also
+	// use it on a shared_ptr. This allows for transparent use of shared_ptr in
+	// place of raw pointers (e.g. in templates). You do not need to use these
+	// functions for regular thread-safe direct usage of shared_ptr construction
+	// and copying, as it's intrinsically thread-safe for that already.
+	//
+	// That being said, the following is not thread-safe and needs to be guarded by 
+	// a mutex or the following atomic functions, as it's assigning the *same*
+	// shared_ptr object from multiple threads as opposed to different shared_ptr
+	// objects underlying object:
+	//      shared_ptr<Foo> pFoo;
+	//      // Thread 1:
+	//      shared_ptr<Foo> pFoo2 = pFoo;
+	//      // Thread 2:
+	//      pFoo = make_shared<Foo>();
+	///////////////////////////////////////////////////////////////////////////
+
+	template <typename T>
+	inline bool atomic_is_lock_free(const shared_ptr<T>*)
+	{
+		// Return true if atomic access to the provided shared_ptr instance is lock-free, false otherwise.
+		// For this to be lock-free, we would have to be able to copy shared_ptr objects in an atomic way 
+		// as opposed to wrapping it with a mutex like we do below. Given the nature of shared_ptr, it's 
+		// probably not feasible to implement these operations without a mutex. atomic_is_lock_free exists
+		// in the C++11 Standard because it also applies to other types such as built-in types which can
+		// be lock-free in their access.
+		return false;
+	}
+
+	template <typename T>
+	inline shared_ptr<T> atomic_load(const shared_ptr<T>* pSharedPtr)
+	{
+		Internal::shared_ptr_auto_mutex autoMutex(pSharedPtr);
+		return *pSharedPtr;
+	}
+  
+	template <typename T>
+	inline shared_ptr<T> atomic_load_explicit(const shared_ptr<T>* pSharedPtr, ... /*std::memory_order memoryOrder*/)
+	{
+		return atomic_load(pSharedPtr);
+	}
+
+	template <typename T>
+	inline void atomic_store(shared_ptr<T>* pSharedPtrA, shared_ptr<T> sharedPtrB)
+	{
+		Internal::shared_ptr_auto_mutex autoMutex(pSharedPtrA);
+		pSharedPtrA->swap(sharedPtrB);
+	}
+
+	template <typename T>
+	inline void atomic_store_explicit(shared_ptr<T>* pSharedPtrA, shared_ptr<T> sharedPtrB, ... /*std::memory_order memoryOrder*/)
+	{
+		atomic_store(pSharedPtrA, sharedPtrB);
+	}
+
+	template <typename T>
+	shared_ptr<T> atomic_exchange(shared_ptr<T>* pSharedPtrA, shared_ptr<T> sharedPtrB)
+	{
+		Internal::shared_ptr_auto_mutex autoMutex(pSharedPtrA);
+		pSharedPtrA->swap(sharedPtrB);
+		return sharedPtrB;
+	}
+  
+	template <typename T>
+	inline shared_ptr<T> atomic_exchange_explicit(shared_ptr<T>* pSharedPtrA, shared_ptr<T> sharedPtrB, ... /*std::memory_order memoryOrder*/)
+	{
+		return atomic_exchange(pSharedPtrA, sharedPtrB);
+	}
+
+	// Compares the shared pointers pointed-to by p and expected. If they are equivalent (share ownership of the 
+	// same pointer and refer to the same pointer), assigns sharedPtrNew into *pSharedPtr using the memory ordering constraints 
+	// specified by success and returns true. If they are not equivalent, assigns *pSharedPtr into *pSharedPtrCondition using the 
+	// memory ordering constraints specified by failure and returns false.
+	template <typename T>
+	bool atomic_compare_exchange_strong(shared_ptr<T>* pSharedPtr, shared_ptr<T>* pSharedPtrCondition, shared_ptr<T> sharedPtrNew)
+	{
+		Internal::shared_ptr_auto_mutex autoMutex(pSharedPtr);
+
+		if(pSharedPtr->equivalent_ownership(*pSharedPtrCondition))
+		{
+			*pSharedPtr = sharedPtrNew;
+			return true;
+		}
+
+		*pSharedPtrCondition = *pSharedPtr;
+		return false;
+	}
+
+	template <typename T>
+	inline bool atomic_compare_exchange_weak(shared_ptr<T>* pSharedPtr, shared_ptr<T>* pSharedPtrCondition, shared_ptr<T> sharedPtrNew)
+	{
+		return atomic_compare_exchange_strong(pSharedPtr, pSharedPtrCondition, sharedPtrNew);
+	}
+
+	template <typename T> // Returns true if pSharedPtr was equivalent to *pSharedPtrCondition.
+	inline bool atomic_compare_exchange_strong_explicit(shared_ptr<T>* pSharedPtr, shared_ptr<T>* pSharedPtrCondition, shared_ptr<T> sharedPtrNew, ... /*memory_order memoryOrderSuccess, memory_order memoryOrderFailure*/)
+	{
+		return atomic_compare_exchange_strong(pSharedPtr, pSharedPtrCondition, sharedPtrNew);
+	}
+
+	template <typename T>
+	inline bool atomic_compare_exchange_weak_explicit(shared_ptr<T>* pSharedPtr, shared_ptr<T>* pSharedPtrCondition, shared_ptr<T> sharedPtrNew, ... /*memory_order memoryOrderSuccess, memory_order memoryOrderFailure*/)
+	{
+		return atomic_compare_exchange_weak(pSharedPtr, pSharedPtrCondition, sharedPtrNew);
+	}
+
+
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// weak_ptr
+	///////////////////////////////////////////////////////////////////////////
+
+	/// EASTL_WEAK_PTR_DEFAULT_NAME
+	///
+	/// Defines a default container name in the absence of a user-provided name.
+	///
+	#ifndef EASTL_WEAK_PTR_DEFAULT_NAME
+		#define EASTL_WEAK_PTR_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " weak_ptr" // Unless the user overrides something, this is "EASTL weak_ptr".
+	#endif
+
+
+	/// EASTL_WEAK_PTR_DEFAULT_ALLOCATOR
+	///
+	#ifndef EASTL_WEAK_PTR_DEFAULT_ALLOCATOR
+		#define EASTL_WEAK_PTR_DEFAULT_ALLOCATOR allocator_type(EASTL_WEAK_PTR_DEFAULT_NAME)
+	#endif
+
+
+	/// weak_ptr
+	///
+	/// The weak_ptr class template stores a "weak reference" to an object 
+	/// that's already managed by a shared_ptr. To access the object, a weak_ptr 
+	/// can be converted to a shared_ptr using the shared_ptr constructor or 
+	/// the lock() member function. When the last shared_ptr to the object goes 
+	/// away and the object is deleted, the attempt to obtain a shared_ptr 
+	/// from the weak_ptr instances that refer to the deleted object will fail via
+	/// lock() returning an empty shared_ptr.
+	///
+	/// The Allocator template argument manages the memory of the shared reference
+	/// count and not the stored object. weak_ptr will not delete the stored object
+	/// but instead can only delete the reference count on that object.
+	///
+	template <typename T>
+	class weak_ptr
+	{
+	public:
+		typedef weak_ptr<T>     this_type;
+		typedef T               element_type;
+
+	public:
+		/// weak_ptr
+		weak_ptr() EA_NOEXCEPT
+			: mpValue(nullptr),
+			  mpRefCount(nullptr)
+		{
+		}
+
+
+		/// weak_ptr
+		/// Construction with self type.
+		weak_ptr(const this_type& weakPtr) EA_NOEXCEPT
+			: mpValue(weakPtr.mpValue),
+			  mpRefCount(weakPtr.mpRefCount)
+		{
+			if(mpRefCount)
+				mpRefCount->weak_addref();
+		}
+
+
+		/// weak_ptr
+		/// Move construction with self type.
+		weak_ptr(this_type&& weakPtr) EA_NOEXCEPT
+			: mpValue(weakPtr.mpValue),
+			  mpRefCount(weakPtr.mpRefCount)
+		{
+			weakPtr.mpValue = nullptr;
+			weakPtr.mpRefCount = nullptr;
+		}
+
+
+		/// weak_ptr
+		/// Constructs a weak_ptr from another weak_ptr.
+		template <typename U>
+		weak_ptr(const weak_ptr<U>& weakPtr, typename eastl::enable_if<eastl::is_convertible<U*, element_type*>::value>::type* = 0) EA_NOEXCEPT
+			: mpValue(weakPtr.mpValue),
+				mpRefCount(weakPtr.mpRefCount)
+		{
+			if(mpRefCount)
+				mpRefCount->weak_addref();
+		}
+
+
+		/// weak_ptr
+		/// Move constructs a weak_ptr from another weak_ptr.
+		template <typename U>
+		weak_ptr(weak_ptr<U>&& weakPtr,
+		         typename eastl::enable_if<eastl::is_convertible<U*, element_type*>::value>::type* = 0) EA_NOEXCEPT
+		    : mpValue(weakPtr.mpValue),
+		      mpRefCount(weakPtr.mpRefCount)
+		{
+			weakPtr.mpValue = nullptr;
+			weakPtr.mpRefCount = nullptr;
+		}
+
+
+		/// weak_ptr
+		/// Constructs a weak_ptr from a shared_ptr.
+		template <typename U>
+		weak_ptr(const shared_ptr<U>& sharedPtr,
+		         typename eastl::enable_if<eastl::is_convertible<U*, element_type*>::value>::type* = 0) EA_NOEXCEPT
+		    : mpValue(sharedPtr.mpValue),
+		      mpRefCount(sharedPtr.mpRefCount)
+		{
+			if (mpRefCount)
+				mpRefCount->weak_addref();
+		}
+
+
+		/// ~weak_ptr
+		~weak_ptr()
+		{
+			if(mpRefCount)
+				mpRefCount->weak_release();
+		}
+
+
+		/// operator=(weak_ptr)
+		/// assignment to self type.
+		this_type& operator=(const this_type& weakPtr) EA_NOEXCEPT
+		{
+			assign(weakPtr);
+			return *this;
+		}
+
+
+		this_type& operator=(this_type&& weakPtr) EA_NOEXCEPT
+		{
+			weak_ptr(eastl::move(weakPtr)).swap(*this);
+			return *this;
+		}
+
+
+		/// operator=(weak_ptr)
+		template <typename U>
+		typename eastl::enable_if<eastl::is_convertible<U*, element_type*>::value, this_type&>::type
+		operator=(const weak_ptr<U>& weakPtr) EA_NOEXCEPT
+		{
+			assign(weakPtr);
+			return *this;
+		}
+
+
+		template <typename U>
+		typename eastl::enable_if<eastl::is_convertible<U*, element_type*>::value, this_type&>::type
+		operator=(weak_ptr<U>&& weakPtr) EA_NOEXCEPT
+		{
+			weak_ptr(eastl::move(weakPtr)).swap(*this);
+			return *this;
+		}
+
+
+		/// operator=(shared_ptr)
+		/// Assigns to a weak_ptr from a shared_ptr.
+		template <typename U>
+		typename eastl::enable_if<eastl::is_convertible<U*, element_type*>::value, this_type&>::type
+		operator=(const shared_ptr<U>& sharedPtr) EA_NOEXCEPT
+		{
+			if(mpRefCount != sharedPtr.mpRefCount) // This check encompasses assignment to self.
+			{
+				// Release old reference
+				if(mpRefCount)
+					mpRefCount->weak_release();
+
+				mpValue    = sharedPtr.mpValue;
+				mpRefCount = sharedPtr.mpRefCount;
+				if(mpRefCount)
+					mpRefCount->weak_addref();
+			}
+			return *this;
+		}
+
+		shared_ptr<T> lock() const EA_NOEXCEPT
+		{
+			// We can't just return shared_ptr<T>(*this), as the object may go stale while we are doing this.
+			shared_ptr<T> temp;
+			temp.mpRefCount = mpRefCount ? mpRefCount->lock() : mpRefCount; // mpRefCount->lock() addref's the return value for us.
+			if(temp.mpRefCount)
+				temp.mpValue = mpValue;
+			return temp;
+		}
+
+		// Returns: 0 if *this is empty ; otherwise, the number of shared_ptr instances that share ownership with *this.
+		int use_count() const EA_NOEXCEPT
+		{
+			return mpRefCount ? mpRefCount->mRefCount : 0;
+		}
+
+		// Returns: use_count() == 0
+		bool expired() const EA_NOEXCEPT
+		{
+			return (!mpRefCount || (mpRefCount->mRefCount == 0));
+		}
+
+		void reset()
+		{
+			if(mpRefCount)
+				mpRefCount->weak_release();
+
+			mpValue    = nullptr;
+			mpRefCount = nullptr;
+		}
+
+		void swap(this_type& weakPtr)
+		{
+			T* const pValue = weakPtr.mpValue;
+			weakPtr.mpValue = mpValue;
+			mpValue         = pValue;
+
+			ref_count_sp* const pRefCount = weakPtr.mpRefCount;
+			weakPtr.mpRefCount = mpRefCount;
+			mpRefCount         = pRefCount;
+		}
+
+
+		/// assign
+		///
+		/// Assignment via another weak_ptr. 
+		///
+		template <typename U>
+		void assign(const weak_ptr<U>& weakPtr,
+		            typename eastl::enable_if<eastl::is_convertible<U*, element_type*>::value>::type* = 0) EA_NOEXCEPT
+		{
+			if(mpRefCount != weakPtr.mpRefCount) // This check encompasses assignment to self.
+			{
+				// Release old reference
+				if(mpRefCount)
+					mpRefCount->weak_release();
+
+				// Add new reference
+				mpValue    = weakPtr.mpValue;
+				mpRefCount = weakPtr.mpRefCount;
+				if(mpRefCount)
+					mpRefCount->weak_addref();
+			}
+		}
+
+
+		/// owner_before
+		/// C++11 function for ordering.
+		template <typename U>
+		bool owner_before(const weak_ptr<U>& weakPtr) const EA_NOEXCEPT
+		{
+			return (mpRefCount < weakPtr.mpRefCount);
+		}
+
+		/// owner_before
+		template <typename U>
+		bool owner_before(const shared_ptr<U>& sharedPtr) const EA_NOEXCEPT
+		{
+			return (mpRefCount < sharedPtr.mpRefCount);
+		}
+
+
+		/// less_than
+		/// For compatibility with pre-C++11 weak_ptr. Use owner_before instead.
+		template <typename U>
+		bool less_than(const weak_ptr<U>& weakPtr) const EA_NOEXCEPT
+		{
+			return (mpRefCount < weakPtr.mpRefCount);
+		}
+
+
+		/// assign
+		///
+		/// Assignment through a T/ref_count_sp pair. This is used by 
+		/// external utility functions.
+		///
+		void assign(element_type* pValue, ref_count_sp* pRefCount)
+		{
+			mpValue = pValue;
+
+			if(pRefCount != mpRefCount)
+			{
+				if(mpRefCount)
+					mpRefCount->weak_release();
+
+				mpRefCount = pRefCount;
+
+				if(mpRefCount)
+					mpRefCount->weak_addref();
+			}
+		}
+
+	protected:
+		element_type*  mpValue;       /// The (weakly) owned pointer.
+		ref_count_sp*  mpRefCount;    /// Reference count for owned pointer.
+
+		// Friend declarations
+		template <typename U> friend class shared_ptr;
+		template <typename U> friend class weak_ptr;
+
+	}; // class weak_ptr
+
+
+
+	/// Note that the C++11 Standard does not specify that weak_ptr has comparison operators,
+	/// though it does specify that the owner_before function exists in weak_ptr.
+	template <typename T, typename U>
+	inline bool operator<(const weak_ptr<T>& weakPtr1, const weak_ptr<U>& weakPtr2)
+	{
+		return weakPtr1.owner_before(weakPtr2);
+	}
+
+
+	template <typename T>
+	void swap(weak_ptr<T>& weakPtr1, weak_ptr<T>& weakPtr2)
+	{
+		weakPtr1.swap(weakPtr2);
+	}
+
+
+
+
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// owner_less
+	//
+	// Implements less (operator <) for shared_ptr and thus allows it to participate
+	// in algorithms and containers that use strict weak ordering, such as map.
+	///////////////////////////////////////////////////////////////////////////
+
+	template <typename T> 
+	struct owner_less;
+
+	template <typename T>
+	struct owner_less< shared_ptr<T> >
+		: public eastl::binary_function<shared_ptr<T>, shared_ptr<T>, bool>
+	{
+		typedef bool result_type;
+
+		bool operator()(shared_ptr<T> const& a, shared_ptr<T> const& b) const
+			{ return a.owner_before(b); }
+
+		bool operator()(shared_ptr<T> const& a, weak_ptr<T> const& b) const
+			{ return a.owner_before(b); }
+
+		bool operator()(weak_ptr<T> const& a, shared_ptr<T> const& b) const
+			{ return a.owner_before(b); }
+	};
+
+	template <typename T>
+	struct owner_less< weak_ptr<T> >
+		: public eastl::binary_function<weak_ptr<T>, weak_ptr<T>, bool>
+	{
+		typedef bool result_type;
+
+		bool operator()(weak_ptr<T> const& a, weak_ptr<T> const& b) const
+			{ return a.owner_before(b); }
+
+		bool operator()(weak_ptr<T> const& a, shared_ptr<T> const& b) const
+			{ return a.owner_before(b); }
+
+		bool operator()(shared_ptr<T> const& a, weak_ptr<T> const& b) const
+			{ return a.owner_before(b); }
+	};
+
+
+} // namespace eastl
+
+
+EA_RESTORE_VC_WARNING();
+EA_RESTORE_VC_WARNING();
+
+
+// We have to either #include enable_shared.h here or we need to move the enable_shared source code to here.
+#include <EASTL/internal/enable_shared.h>
+
+
+#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/slist.h b/libkram/eastl/include/EASTL/slist.h
new file mode 100644
index 00000000..27966928
--- /dev/null
+++ b/libkram/eastl/include/EASTL/slist.h
@@ -0,0 +1,1930 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// An slist is a singly-linked list. The C++ standard library doesn't define
+// such a thing as an slist, nor does the C++ TR1. Our implementation of slist
+// largely follows the design of the SGI STL slist container, which is also 
+// found in STLPort. Singly-linked lists use less memory than doubly-linked 
+// lists, but are less flexible. 
+//
+// In looking at slist, you will notice a lot of references to things like
+// 'before first', 'before last', 'insert after', and 'erase after'. This is 
+// due to the fact that std::list insert and erase works on the node before
+// the referenced node, whereas slist is singly linked and operations are only
+// efficient if they work on the node after the referenced node. This is because
+// with an slist node you know the node after it but not the node before it.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+
+
+#ifndef EASTL_SLIST_H
+#define EASTL_SLIST_H
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/allocator.h>
+#include <EASTL/type_traits.h>
+#include <EASTL/iterator.h>
+#include <EASTL/algorithm.h>
+#include <EASTL/initializer_list.h>
+#include <EASTL/sort.h>
+#include <EASTL/bonus/compressed_pair.h>
+#include <stddef.h>
+
+EA_DISABLE_ALL_VC_WARNINGS();
+
+	#include <new>
+
+EA_RESTORE_ALL_VC_WARNINGS();
+
+EA_DISABLE_SN_WARNING(828); // The EDG SN compiler has a bug in its handling of variadic template arguments and mistakenly reports "parameter "args" was never referenced"
+
+
+// 4530 - C++ exception handler used, but unwind semantics are not enabled. Specify /EHsc
+// 4345 - Behavior change: an object of POD type constructed with an initializer of the form () will be default-initialized
+// 4571 - catch(...) semantics changed since Visual C++ 7.1; structured exceptions (SEH) are no longer caught.
+EA_DISABLE_VC_WARNING(4530 4345 4571);
+
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+
+	/// EASTL_SLIST_DEFAULT_NAME
+	///
+	/// Defines a default container name in the absence of a user-provided name.
+	///
+	#ifndef EASTL_SLIST_DEFAULT_NAME
+		#define EASTL_SLIST_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " slist" // Unless the user overrides something, this is "EASTL slist".
+	#endif
+
+
+	/// EASTL_SLIST_DEFAULT_ALLOCATOR
+	///
+	#ifndef EASTL_SLIST_DEFAULT_ALLOCATOR
+		#define EASTL_SLIST_DEFAULT_ALLOCATOR allocator_type(EASTL_SLIST_DEFAULT_NAME)
+	#endif
+
+
+
+	/// SListNodeBase
+	///
+	/// This is a standalone struct so that operations on it can be done without templates
+	/// and so that an empty slist can have an SListNodeBase and thus not create any 
+	/// instances of T.
+	///
+	struct SListNodeBase
+	{
+		SListNodeBase* mpNext;
+	} EASTL_LIST_PROXY_MAY_ALIAS;
+
+
+	#if EASTL_LIST_PROXY_ENABLED
+
+		/// SListNodeBaseProxy
+		///
+		/// In debug builds, we define SListNodeBaseProxy to be the same thing as
+		/// SListNodeBase, except it is templated on the parent SListNode class.
+		/// We do this because we want users in debug builds to be able to easily
+		/// view the slist's contents in a debugger GUI. We do this only in a debug
+		/// build for the reasons described above: that SListNodeBase needs to be
+		/// as efficient as possible and not cause code bloat or extra function 
+		/// calls (inlined or not).
+		///
+		/// SListNodeBaseProxy *must* be separate from its parent class SListNode 
+		/// because the slist class must have a member node which contains no T value.
+		/// It is thus incorrect for us to have one single SListNode class which
+		/// has both mpNext and mValue. So we do a recursive template trick in the 
+		/// definition and use of SListNodeBaseProxy.
+		///
+		template <typename SLN>
+		struct SListNodeBaseProxy
+		{
+			SLN* mpNext;
+		};
+
+		template <typename T>
+		struct SListNode : public SListNodeBaseProxy< SListNode<T> >
+		{
+			T mValue;
+		};
+
+	#else
+		template <typename T>
+		struct SListNode : public SListNodeBase
+		{
+			T mValue;
+		};
+	#endif
+
+
+	/// SListIterator
+	///
+	template <typename T, typename Pointer, typename Reference>
+	struct SListIterator
+	{
+		typedef SListIterator<T, Pointer, Reference>   this_type;
+		typedef SListIterator<T, T*, T&>               iterator;
+		typedef SListIterator<T, const T*, const T&>   const_iterator;
+		typedef eastl_size_t                           size_type;     // See config.h for the definition of eastl_size_t, which defaults to size_t.
+		typedef ptrdiff_t                              difference_type;
+		typedef T                                      value_type;
+		typedef SListNode<T>                           node_type;
+		typedef Pointer                                pointer;
+		typedef Reference                              reference;
+		typedef EASTL_ITC_NS::forward_iterator_tag     iterator_category;
+
+	public:
+		node_type* mpNode;
+
+	public:
+		SListIterator();
+		SListIterator(const SListNodeBase* pNode);
+		SListIterator(const iterator& x);
+
+		reference operator*() const;
+		pointer   operator->() const;
+
+		this_type& operator++();
+		this_type  operator++(int);
+	};
+
+
+
+	/// SListBase
+	///
+	/// See VectorBase (class vector) for an explanation of why we 
+	/// create this separate base class.
+	///
+	template <typename T, typename Allocator>
+	struct SListBase
+	{
+	public:
+		typedef Allocator                              allocator_type;
+		typedef SListNode<T>                           node_type;
+		typedef eastl_size_t                           size_type;     // See config.h for the definition of eastl_size_t, which defaults to size_t.
+		typedef ptrdiff_t                              difference_type;
+		#if EASTL_LIST_PROXY_ENABLED
+			typedef SListNodeBaseProxy< SListNode<T> > base_node_type;
+		#else
+			typedef SListNodeBase                      base_node_type; // We use SListNodeBase instead of SListNode<T> because we don't want to create a T.
+		#endif
+
+	protected:
+		eastl::compressed_pair<base_node_type, allocator_type>  mNodeAllocator;
+		#if EASTL_SLIST_SIZE_CACHE
+			size_type  mSize;
+		#endif
+
+		base_node_type& internalNode() EA_NOEXCEPT { return mNodeAllocator.first(); }
+		base_node_type const& internalNode() const EA_NOEXCEPT { return mNodeAllocator.first(); }
+		allocator_type& internalAllocator() EA_NOEXCEPT { return mNodeAllocator.second(); }
+		const allocator_type& internalAllocator() const EA_NOEXCEPT { return mNodeAllocator.second(); }
+
+	public:
+		const allocator_type& get_allocator() const EA_NOEXCEPT;
+		allocator_type&       get_allocator() EA_NOEXCEPT;
+		void                  set_allocator(const allocator_type& allocator);
+
+	protected:
+		SListBase();
+		SListBase(const allocator_type& a);
+	   ~SListBase();
+
+		node_type* DoAllocateNode();
+		void       DoFreeNode(node_type* pNode);
+
+		SListNodeBase* DoEraseAfter(SListNodeBase* pNode);
+		SListNodeBase* DoEraseAfter(SListNodeBase* pNode, SListNodeBase* pNodeLast);
+
+	}; // class SListBase
+
+
+
+	/// slist
+	///
+	/// This is the equivalent of C++11's forward_list.
+	///
+	/// -- size() is O(n) --
+	/// Note that as of this writing, list::size() is an O(n) operation when EASTL_SLIST_SIZE_CACHE is disabled. 
+	/// That is, getting the size of the list is not a fast operation, as it requires traversing the list and 
+	/// counting the nodes. We could make list::size() be fast by having a member mSize variable. There are reasons 
+	/// for having such functionality and reasons for not having such functionality. We currently choose
+	/// to not have a member mSize variable as it would add four bytes to the class, add a tiny amount
+	/// of processing to functions such as insert and erase, and would only serve to improve the size
+	/// function, but no others. The alternative argument is that the C++ standard states that std::list
+	/// should be an O(1) operation (i.e. have a member size variable), most C++ standard library list
+	/// implementations do so, the size is but an integer which is quick to update, and many users 
+	/// expect to have a fast size function. The EASTL_SLIST_SIZE_CACHE option changes this.
+	/// To consider: Make size caching an optional template parameter.
+	///
+	/// Pool allocation
+	/// If you want to make a custom memory pool for a list container, your pool 
+	/// needs to contain items of type slist::node_type. So if you have a memory
+	/// pool that has a constructor that takes the size of pool items and the
+	/// count of pool items, you would do this (assuming that MemoryPool implements
+	/// the Allocator interface):
+	///     typedef slist<Widget, MemoryPool> WidgetList;          // Delare your WidgetList type.
+	///     MemoryPool myPool(sizeof(WidgetList::node_type), 100); // Make a pool of 100 Widget nodes.
+	///     WidgetList myList(&myPool);                            // Create a list that uses the pool.
+	///
+	template <typename T, typename Allocator = EASTLAllocatorType >
+	class slist : public SListBase<T, Allocator>
+	{
+		typedef SListBase<T, Allocator>              base_type;
+		typedef slist<T, Allocator>                  this_type;
+
+	public:
+		typedef T                                    value_type;
+		typedef value_type*                          pointer;
+		typedef const value_type*                    const_pointer;
+		typedef value_type&                          reference;
+		typedef const value_type&                    const_reference;
+		typedef SListIterator<T, T*, T&>             iterator;
+		typedef SListIterator<T, const T*, const T&> const_iterator;
+		typedef typename base_type::size_type        size_type;
+		typedef typename base_type::difference_type  difference_type;
+		typedef typename base_type::allocator_type   allocator_type;
+		typedef typename base_type::node_type        node_type;
+		typedef typename base_type::base_node_type   base_node_type;
+
+		using base_type::mNodeAllocator;
+		using base_type::DoEraseAfter;
+		using base_type::DoAllocateNode;
+		using base_type::DoFreeNode;
+		#if EASTL_SLIST_SIZE_CACHE
+			using base_type::mSize;
+		#endif
+		using base_type::internalNode;
+		using base_type::internalAllocator;
+
+	public:
+		slist();
+		slist(const allocator_type& allocator);
+		explicit slist(size_type n, const allocator_type& allocator = EASTL_SLIST_DEFAULT_ALLOCATOR);
+		slist(size_type n, const value_type& value, const allocator_type& allocator = EASTL_SLIST_DEFAULT_ALLOCATOR);
+		slist(const this_type& x);
+		slist(std::initializer_list<value_type> ilist, const allocator_type& allocator = EASTL_SLIST_DEFAULT_ALLOCATOR);
+		slist(this_type&& x);
+		slist(this_type&& x, const allocator_type& allocator);
+
+		template <typename InputIterator>
+		slist(InputIterator first, InputIterator last); // allocator arg removed because VC7.1 fails on the default arg. To do: Make a second version of this function without a default arg.
+
+		this_type& operator=(const this_type& x);
+		this_type& operator=(std::initializer_list<value_type>);
+		this_type& operator=(this_type&& x);
+
+		void swap(this_type& x);
+
+		void assign(size_type n, const value_type& value);
+		void assign(std::initializer_list<value_type> ilist);
+
+		template <typename InputIterator>
+		void assign(InputIterator first, InputIterator last);
+
+		iterator       begin() EA_NOEXCEPT;
+		const_iterator begin() const EA_NOEXCEPT;
+		const_iterator cbegin() const EA_NOEXCEPT;
+
+		iterator       end() EA_NOEXCEPT;
+		const_iterator end() const EA_NOEXCEPT;
+		const_iterator cend() const EA_NOEXCEPT;
+
+		iterator       before_begin() EA_NOEXCEPT;
+		const_iterator before_begin() const EA_NOEXCEPT;
+		const_iterator cbefore_begin() const EA_NOEXCEPT;
+
+		iterator        previous(const_iterator position);
+		const_iterator  previous(const_iterator position) const;
+
+		reference       front();
+		const_reference front() const;
+
+		template <class... Args>
+		void emplace_front(Args&&... args);
+
+		void      push_front(const value_type& value);
+		reference push_front();
+		void      push_front(value_type&& value);
+
+		void      pop_front();
+
+		bool      empty() const EA_NOEXCEPT;
+		size_type size() const EA_NOEXCEPT;
+
+		void resize(size_type n, const value_type& value);
+		void resize(size_type n);
+
+		iterator insert(const_iterator position);
+		iterator insert(const_iterator position, const value_type& value);
+		void     insert(const_iterator position, size_type n, const value_type& value);
+
+		template <typename InputIterator>
+		void insert(const_iterator position, InputIterator first, InputIterator last);
+
+		// Returns an iterator pointing to the last inserted element, or position if insertion count is zero.
+		iterator insert_after(const_iterator position);
+		iterator insert_after(const_iterator position, const value_type& value);
+		iterator insert_after(const_iterator position, size_type n, const value_type& value);
+		iterator insert_after(const_iterator position, std::initializer_list<value_type> ilist);
+		iterator insert_after(const_iterator position, value_type&& value);
+
+		template <class... Args>
+		iterator emplace_after(const_iterator position, Args&&... args);
+
+		template <typename InputIterator>
+		iterator insert_after(const_iterator position, InputIterator first, InputIterator last);
+
+		iterator erase(const_iterator position);
+		iterator erase(const_iterator first, const_iterator last);
+
+		iterator erase_after(const_iterator position);
+		iterator erase_after(const_iterator before_first, const_iterator last);
+
+		void clear() EA_NOEXCEPT;
+		void reset_lose_memory() EA_NOEXCEPT;    // This is a unilateral reset to an initially empty state. No destructors are called, no deallocation occurs.
+
+		void remove(const value_type& value);
+
+		template <typename Predicate>
+		void remove_if(Predicate predicate);
+
+		void reverse() EA_NOEXCEPT;
+
+		// splice splices to before position, like with the list container. However, in order to do so 
+		// it must walk the list from beginning to position, which is an O(n) operation that can thus 
+		// be slow. It's recommended that the splice_after functions be used whenever possible as they are O(1).
+		void splice(const_iterator position, this_type& x);
+		void splice(const_iterator position, this_type& x, const_iterator i);
+		void splice(const_iterator position, this_type& x, const_iterator first, const_iterator last);
+		void splice(const_iterator position, this_type&& x);
+		void splice(const_iterator position, this_type&& x, const_iterator i);
+		void splice(const_iterator position, this_type&& x, const_iterator first, const_iterator last);
+
+		void splice_after(const_iterator position, this_type& x);
+		void splice_after(const_iterator position, this_type& x, const_iterator i);
+		void splice_after(const_iterator position, this_type& x, const_iterator first, const_iterator last);
+		void splice_after(const_iterator position, this_type&& x);
+		void splice_after(const_iterator position, this_type&& x, const_iterator i);
+		void splice_after(const_iterator position, this_type&& x, const_iterator first, const_iterator last);
+
+		// The following splice_after funcions are deprecated, as they don't allow for recognizing 
+		// the allocator, cannot maintain the source mSize, and are not in the C++11 Standard definition 
+		// of std::forward_list (which is the equivalent of this class).
+		void splice_after(const_iterator position, const_iterator before_first, const_iterator before_last);  // before_first and before_last come from a source container.
+		void splice_after(const_iterator position, const_iterator previous);                                  // previous comes from a source container.
+
+		// Sorting functionality
+		// This is independent of the global sort algorithms, as lists are 
+		// linked nodes and can be sorted more efficiently by moving nodes
+		// around in ways that global sort algorithms aren't privy to.
+		void sort();
+
+		template <class Compare>
+		void sort(Compare compare);
+
+		// Not yet implemented:
+		// void merge(this_type& x);
+		// void merge(this_type&& x);
+		// template <class Compare>
+		// void merge(this_type& x, Compare compare);
+		// template <class Compare>
+		// void merge(this_type&& x, Compare compare);
+		// If these get implemented then make sure to override them in fixed_slist.
+
+		bool validate() const;
+		int  validate_iterator(const_iterator i) const;
+
+	protected:
+		node_type* DoCreateNode();
+
+		template<typename... Args>
+		node_type* DoCreateNode(Args&&... args);
+
+		template <typename Integer>
+		void DoAssign(Integer n, Integer value, true_type);
+
+		template <typename InputIterator>
+		void DoAssign(InputIterator first, InputIterator last, false_type);
+
+		void DoAssignValues(size_type n, const value_type& value);
+
+		template <typename InputIterator>
+		node_type* DoInsertAfter(SListNodeBase* pNode, InputIterator first, InputIterator last);
+
+		template <typename Integer>
+		node_type* DoInsertAfter(SListNodeBase* pNode, Integer n, Integer value, true_type);
+
+		template <typename InputIterator>
+		node_type* DoInsertAfter(SListNodeBase* pNode, InputIterator first, InputIterator last, false_type);
+
+		node_type* DoInsertValueAfter(SListNodeBase* pNode);
+		node_type* DoInsertValuesAfter(SListNodeBase* pNode, size_type n, const value_type& value);
+
+		template<typename... Args>
+		node_type* DoInsertValueAfter(SListNodeBase* pNode, Args&&... args);
+
+		void DoSwap(this_type& x);
+
+	}; // class slist
+
+
+
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// SListNodeBase functions
+	///////////////////////////////////////////////////////////////////////
+
+	inline SListNodeBase* SListNodeInsertAfter(SListNodeBase* pPrevNode, SListNodeBase* pNode)
+	{
+		pNode->mpNext = pPrevNode->mpNext;
+		pPrevNode->mpNext = pNode;
+		return pNode;
+	}
+
+	inline SListNodeBase* SListNodeGetPrevious(SListNodeBase* pNodeBase, const SListNodeBase* pNode)
+	{
+		while(pNodeBase && (pNodeBase->mpNext != pNode))
+			pNodeBase = pNodeBase->mpNext;
+		return pNodeBase;
+	}
+
+	inline const SListNodeBase* SListNodeGetPrevious(const SListNodeBase* pNodeBase, const SListNodeBase* pNode)
+	{
+		while(pNodeBase && (pNodeBase->mpNext != pNode))
+			pNodeBase = pNodeBase->mpNext;
+		return pNodeBase;
+	}
+
+	inline void SListNodeSpliceAfter(SListNodeBase* pNode, SListNodeBase* pNodeBeforeFirst, SListNodeBase* pNodeBeforeLast)
+	{
+		if((pNode != pNodeBeforeFirst) && (pNode != pNodeBeforeLast))
+		{
+			SListNodeBase* const pFirst    = pNodeBeforeFirst->mpNext;
+			SListNodeBase* const pPosition = pNode->mpNext;
+
+			pNodeBeforeFirst->mpNext = pNodeBeforeLast->mpNext;
+			pNode->mpNext            = pFirst;
+			pNodeBeforeLast->mpNext  = pPosition;
+		}
+	}
+
+	inline void SListNodeSpliceAfter(SListNodeBase* pNode, SListNodeBase* pNodeBase)
+	{
+		SListNodeBase* const pNodeBeforeLast = SListNodeGetPrevious(pNodeBase, NULL);
+
+		if(pNodeBeforeLast != pNodeBase)
+		{
+			SListNodeBase* const pPosition = pNode->mpNext;
+			pNode->mpNext           = pNodeBase->mpNext;
+			pNodeBase->mpNext       = NULL;
+			pNodeBeforeLast->mpNext = pPosition;
+		}
+	}
+
+	inline SListNodeBase* SListNodeReverse(SListNodeBase* pNode)
+	{
+		SListNodeBase* pNodeFirst = pNode;
+		pNode = pNode->mpNext;
+		pNodeFirst->mpNext = NULL;
+
+		while(pNode)
+		{
+			SListNodeBase* const pTemp = pNode->mpNext;
+			pNode->mpNext = pNodeFirst;
+			pNodeFirst    = pNode;
+			pNode         = pTemp;
+		}
+		return pNodeFirst;
+	}
+
+	inline uint32_t SListNodeGetSize(SListNodeBase* pNode)
+	{
+		uint32_t n = 0;
+		while(pNode)
+		{
+			++n;
+			pNode = pNode->mpNext;
+		}
+		return n;
+	}
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// SListIterator functions
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T, typename Pointer, typename Reference>
+	inline SListIterator<T, Pointer, Reference>::SListIterator()
+		: mpNode(NULL)
+	{
+		// Empty
+	}
+
+
+	template <typename T, typename Pointer, typename Reference>
+	inline SListIterator<T, Pointer, Reference>::SListIterator(const SListNodeBase* pNode)
+		: mpNode(static_cast<node_type*>((SListNode<T>*)const_cast<SListNodeBase*>(pNode))) // All this casting is in the name of making runtime debugging much easier on the user.
+	{
+		// Empty
+	}
+
+
+	template <typename T, typename Pointer, typename Reference>
+	inline SListIterator<T, Pointer, Reference>::SListIterator(const iterator& x)
+		: mpNode(const_cast<node_type*>(x.mpNode))
+	{
+		// Empty
+	}
+
+
+	template <typename T, typename Pointer, typename Reference>
+	inline typename SListIterator<T, Pointer, Reference>::reference
+	SListIterator<T, Pointer, Reference>::operator*() const
+	{
+		return mpNode->mValue;
+	}
+
+
+	template <typename T, typename Pointer, typename Reference>
+	inline typename SListIterator<T, Pointer, Reference>::pointer
+	SListIterator<T, Pointer, Reference>::operator->() const
+	{
+		return &mpNode->mValue;
+	}
+
+
+	template <typename T, typename Pointer, typename Reference>
+	inline typename SListIterator<T, Pointer, Reference>::this_type&
+	SListIterator<T, Pointer, Reference>::operator++()
+	{
+		mpNode = static_cast<node_type*>(mpNode->mpNext);
+		return *this;
+	}
+
+
+	template <typename T, typename Pointer, typename Reference>
+	inline typename SListIterator<T, Pointer, Reference>::this_type
+	SListIterator<T, Pointer, Reference>::operator++(int)
+	{
+		this_type temp(*this);
+		mpNode = static_cast<node_type*>(mpNode->mpNext);
+		return temp;
+	}
+
+	// The C++ defect report #179 requires that we support comparisons between const and non-const iterators.
+	// Thus we provide additional template paremeters here to support this. The defect report does not
+	// require us to support comparisons between reverse_iterators and const_reverse_iterators.
+	template <typename T, typename PointerA, typename ReferenceA, typename PointerB, typename ReferenceB>
+	inline bool operator==(const SListIterator<T, PointerA, ReferenceA>& a, 
+						   const SListIterator<T, PointerB, ReferenceB>& b)
+	{
+		return a.mpNode == b.mpNode;
+	}
+
+
+	template <typename T, typename PointerA, typename ReferenceA, typename PointerB, typename ReferenceB>
+	inline bool operator!=(const SListIterator<T, PointerA, ReferenceA>& a, 
+						   const SListIterator<T, PointerB, ReferenceB>& b)
+	{
+		return a.mpNode != b.mpNode;
+	}
+
+
+	// We provide a version of operator!= for the case where the iterators are of the 
+	// same type. This helps prevent ambiguity errors in the presence of rel_ops.
+	template <typename T, typename Pointer, typename Reference>
+	inline bool operator!=(const SListIterator<T, Pointer, Reference>& a, 
+						   const SListIterator<T, Pointer, Reference>& b)
+	{
+		return a.mpNode != b.mpNode;
+	}
+
+
+
+
+	
+	///////////////////////////////////////////////////////////////////////
+	// SListBase functions
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T, typename Allocator>
+	inline SListBase<T, Allocator>::SListBase()
+		: mNodeAllocator(base_node_type(), allocator_type(EASTL_SLIST_DEFAULT_NAME))
+		  #if EASTL_SLIST_SIZE_CACHE
+		  , mSize(0)
+		  #endif
+	{ 
+		internalNode().mpNext = NULL;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline SListBase<T, Allocator>::SListBase(const allocator_type& allocator)
+		: mNodeAllocator(base_node_type(), allocator)
+		  #if EASTL_SLIST_SIZE_CACHE
+		  , mSize(0)
+		  #endif
+	{ 
+		internalNode().mpNext = NULL;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline SListBase<T, Allocator>::~SListBase()
+	{
+		DoEraseAfter((SListNodeBase*)&internalNode(), NULL);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline const typename SListBase<T, Allocator>::allocator_type&
+	SListBase<T, Allocator>::get_allocator() const EA_NOEXCEPT
+	{
+		return internalAllocator();
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename SListBase<T, Allocator>::allocator_type&
+	SListBase<T, Allocator>::get_allocator() EA_NOEXCEPT
+	{
+		return internalAllocator();
+	}
+
+
+	template <typename T, typename Allocator>
+	void
+	SListBase<T, Allocator>::set_allocator(const allocator_type& allocator)
+	{
+		EASTL_ASSERT((internalAllocator() == allocator) || (static_cast<node_type*>(internalNode().mpNext) == NULL)); // We can only assign a different allocator if we are empty of elements.
+		internalAllocator() = allocator;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline SListNode<T>* SListBase<T, Allocator>::DoAllocateNode()
+	{
+		return (node_type*)allocate_memory(internalAllocator(), sizeof(node_type), EASTL_ALIGN_OF(T), 0);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void SListBase<T, Allocator>::DoFreeNode(node_type* pNode)
+	{
+		EASTLFree(internalAllocator(), pNode, sizeof(node_type));
+	}
+
+
+	template <typename T, typename Allocator>
+	SListNodeBase* SListBase<T, Allocator>::DoEraseAfter(SListNodeBase* pNode)
+	{
+		node_type*     const pNodeNext     = static_cast<node_type*>((base_node_type*)pNode->mpNext);
+		SListNodeBase* const pNodeNextNext = (SListNodeBase*)pNodeNext->mpNext;
+
+		pNode->mpNext = pNodeNextNext;
+		pNodeNext->~node_type();
+		DoFreeNode(pNodeNext);
+		#if EASTL_SLIST_SIZE_CACHE
+		   --mSize;
+		#endif
+		return pNodeNextNext;
+	}
+
+
+	template <typename T, typename Allocator>
+	SListNodeBase* SListBase<T, Allocator>::DoEraseAfter(SListNodeBase* pNode, SListNodeBase* pNodeLast)
+	{
+		node_type* pNodeCurrent = static_cast<node_type*>((base_node_type*)pNode->mpNext);
+
+		while(pNodeCurrent != (base_node_type*)pNodeLast)
+		{
+			node_type* const pNodeTemp = pNodeCurrent;
+			pNodeCurrent = static_cast<node_type*>((base_node_type*)pNodeCurrent->mpNext);
+			pNodeTemp->~node_type();
+			DoFreeNode(pNodeTemp);
+			#if EASTL_SLIST_SIZE_CACHE
+			--mSize;
+			#endif
+		}
+		pNode->mpNext = pNodeLast;
+		return pNodeLast;
+	}
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// slist functions
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T, typename Allocator>
+	inline slist<T, Allocator>::slist()
+		: base_type()
+	{
+		// Empty
+	}
+
+
+	template <typename T, typename Allocator>
+	inline slist<T, Allocator>::slist(const allocator_type& allocator)
+		: base_type(allocator)
+	{
+		// Empty
+	}
+
+
+	template <typename T, typename Allocator>
+	inline slist<T, Allocator>::slist(size_type n, const allocator_type& allocator)
+		: base_type(allocator)
+	{
+		DoInsertValuesAfter((SListNodeBase*)&internalNode(), n, value_type());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline slist<T, Allocator>::slist(size_type n, const value_type& value, const allocator_type& allocator)
+		: base_type(allocator)
+	{
+		DoInsertValuesAfter((SListNodeBase*)&internalNode(), n, value);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline slist<T, Allocator>::slist(const slist& x)
+		: base_type(x.internalAllocator())
+	{
+		DoInsertAfter((SListNodeBase*)&internalNode(), const_iterator((SListNodeBase*)x.internalNode().mpNext), const_iterator(NULL), false_type());
+	}
+
+
+	template <typename T, typename Allocator>
+	slist<T, Allocator>::slist(this_type&& x)
+		: base_type(x.internalAllocator())
+	{
+		swap(x);
+	}
+
+	template <typename T, typename Allocator>
+	slist<T, Allocator>::slist(this_type&& x, const allocator_type& allocator)
+		: base_type(allocator)
+	{
+		swap(x); // member swap handles the case that x has a different allocator than our allocator by doing a copy.
+	}
+
+
+	template <typename T, typename Allocator>
+	inline slist<T, Allocator>::slist(std::initializer_list<value_type> ilist, const allocator_type& allocator)
+		: base_type(allocator)
+	{
+		DoInsertAfter((SListNodeBase*)&internalNode(), ilist.begin(), ilist.end());
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename InputIterator>
+	inline slist<T, Allocator>::slist(InputIterator first, InputIterator last)
+		: base_type(EASTL_SLIST_DEFAULT_ALLOCATOR)
+	{
+		DoInsertAfter((SListNodeBase*)&internalNode(), first, last);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename slist<T, Allocator>::iterator
+	slist<T, Allocator>::begin() EA_NOEXCEPT
+	{
+		return iterator((SListNodeBase*)internalNode().mpNext);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename slist<T, Allocator>::const_iterator
+	slist<T, Allocator>::begin() const EA_NOEXCEPT
+	{
+		return const_iterator((SListNodeBase*)internalNode().mpNext);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename slist<T, Allocator>::const_iterator
+	slist<T, Allocator>::cbegin() const EA_NOEXCEPT
+	{
+		return const_iterator((SListNodeBase*)internalNode().mpNext);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename slist<T, Allocator>::iterator
+	slist<T, Allocator>::end() EA_NOEXCEPT
+	{
+		return iterator(NULL);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename slist<T, Allocator>::const_iterator
+	slist<T, Allocator>::end() const EA_NOEXCEPT
+	{
+		return const_iterator(NULL);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename slist<T, Allocator>::const_iterator
+	slist<T, Allocator>::cend() const EA_NOEXCEPT
+	{
+		return const_iterator(NULL);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename slist<T, Allocator>::iterator
+	slist<T, Allocator>::before_begin() EA_NOEXCEPT
+	{
+		return iterator((SListNodeBase*)&internalNode());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename slist<T, Allocator>::const_iterator
+	slist<T, Allocator>::before_begin() const EA_NOEXCEPT
+	{
+		return const_iterator((SListNodeBase*)&internalNode());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename slist<T, Allocator>::const_iterator
+	slist<T, Allocator>::cbefore_begin() const EA_NOEXCEPT
+	{
+		return const_iterator((SListNodeBase*)&internalNode());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename slist<T, Allocator>::iterator
+	slist<T, Allocator>::previous(const_iterator position)
+	{
+		return iterator(SListNodeGetPrevious((SListNodeBase*)&internalNode(), (SListNodeBase*)position.mpNode));
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename slist<T, Allocator>::const_iterator
+	slist<T, Allocator>::previous(const_iterator position) const
+	{
+		return const_iterator(SListNodeGetPrevious((SListNodeBase*)&internalNode(), (SListNodeBase*)position.mpNode));
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename slist<T, Allocator>::reference
+	slist<T, Allocator>::front()
+	{
+		#if EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY(internalNode().mpNext == NULL))
+				EASTL_FAIL_MSG("slist::front -- empty container");
+		#endif
+
+		EA_ANALYSIS_ASSUME(internalNode().mpNext != NULL);
+
+		return ((node_type*)internalNode().mpNext)->mValue;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename slist<T, Allocator>::const_reference
+	slist<T, Allocator>::front() const
+	{
+		#if EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY(internalNode().mpNext == NULL))
+				EASTL_FAIL_MSG("slist::front -- empty container");
+		#endif
+
+		EA_ANALYSIS_ASSUME(internalNode().mpNext != NULL);
+
+		return static_cast<node_type*>(internalNode().mpNext)->mValue;
+	}
+
+
+	template <typename T, typename Allocator>
+	template <class... Args>
+	void slist<T, Allocator>::emplace_front(Args&&... args)
+	{
+		DoInsertValueAfter((SListNodeBase*)&internalNode(), eastl::forward<Args>(args)...);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void slist<T, Allocator>::push_front(const value_type& value)
+	{
+		SListNodeInsertAfter((SListNodeBase*)&internalNode(), (SListNodeBase*)DoCreateNode(value));
+		#if EASTL_SLIST_SIZE_CACHE
+		   ++mSize;
+		#endif
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename slist<T, Allocator>::reference
+	slist<T, Allocator>::push_front()
+	{
+		SListNodeInsertAfter((SListNodeBase*)&internalNode(), (SListNodeBase*)DoCreateNode());
+		#if EASTL_SLIST_SIZE_CACHE
+		   ++mSize;
+		#endif
+		return ((node_type*)internalNode().mpNext)->mValue; // Same as return front();
+	}
+
+	template <typename T, typename Allocator>
+	void slist<T, Allocator>::push_front(value_type&& value)
+	{
+		emplace_after(before_begin(), eastl::move(value));
+	}
+
+
+	template <typename T, typename Allocator>
+	void slist<T, Allocator>::pop_front()
+	{
+		#if EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY(internalNode().mpNext == NULL))
+				EASTL_FAIL_MSG("slist::front -- empty container");
+		#endif
+
+		EA_ANALYSIS_ASSUME(internalNode().mpNext != NULL);
+
+		node_type* const pNode = static_cast<node_type*>(internalNode().mpNext);
+		internalNode().mpNext = pNode->mpNext;
+		pNode->~node_type();
+		DoFreeNode(pNode);
+		#if EASTL_SLIST_SIZE_CACHE
+		   --mSize;
+		#endif
+	}
+
+
+	template <typename T, typename Allocator>
+	typename slist<T, Allocator>::this_type& slist<T, Allocator>::operator=(const this_type& x)
+	{
+		if(&x != this)
+		{
+			// If (EASTL_ALLOCATOR_COPY_ENABLED == 1) and the current contents are allocated by an 
+			// allocator that's unequal to x's allocator, we need to reallocate our elements with 
+			// our current allocator and reallocate it with x's allocator. If the allocators are 
+			// equal then we can use a more optimal algorithm that doesn't reallocate our elements
+			// but instead can copy them in place.
+
+			#if EASTL_ALLOCATOR_COPY_ENABLED
+				bool bSlowerPathwayRequired = (internalAllocator() != x.internalAllocator());
+			#else
+				bool bSlowerPathwayRequired = false;
+			#endif
+
+			if(bSlowerPathwayRequired)
+			{
+				clear();
+
+				#if EASTL_ALLOCATOR_COPY_ENABLED
+					internalAllocator() = x.internalAllocator();
+				#endif
+			}
+
+			DoAssign(x.begin(), x.end(), eastl::false_type());
+		}
+
+		return *this;
+	}
+
+
+	template <typename T, typename Allocator>
+	typename slist<T, Allocator>::this_type& slist<T, Allocator>::operator=(this_type&& x)
+	{
+		if(this != &x)
+		{
+			clear();        // To consider: Are we really required to clear here? x is going away soon and will clear itself in its dtor.
+			swap(x);        // member swap handles the case that x has a different allocator than our allocator by doing a copy.
+		}
+		return *this;
+	}
+
+
+	template <typename T, typename Allocator>
+	typename slist<T, Allocator>::this_type& slist<T, Allocator>::operator=(std::initializer_list<value_type> ilist)
+	{
+		DoAssign(ilist.begin(), ilist.end(), false_type());
+		return *this;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void slist<T, Allocator>::assign(std::initializer_list<value_type> ilist)
+	{
+		DoAssign(ilist.begin(), ilist.end(), false_type());
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename InputIterator>                                                // It turns out that the C++ std::list specifies a two argument
+	inline void slist<T, Allocator>::assign(InputIterator first, InputIterator last) // version of assign that takes (int size, int value). These are not 
+	{                                                                                // iterators, so we need to do a template compiler trick to do the right thing.
+		DoAssign(first, last, is_integral<InputIterator>());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void slist<T, Allocator>::assign(size_type n, const value_type& value)
+	{
+		// To do: get rid of DoAssignValues and put its implementation directly here.
+		DoAssignValues(n, value);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void slist<T, Allocator>::swap(this_type& x)
+	{
+		if(internalAllocator() == x.internalAllocator()) // If allocators are equivalent...
+			DoSwap(x);
+		else // else swap the contents.
+		{
+			const this_type temp(*this); // Can't call eastl::swap because that would
+			*this = x;                   // itself call this member swap function.
+			x     = temp;
+		}
+	}
+
+
+	template <typename T, typename Allocator>
+	inline bool slist<T, Allocator>::empty() const EA_NOEXCEPT
+	{
+		return internalNode().mpNext == NULL;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename slist<T, Allocator>::size_type
+	slist<T, Allocator>::size() const EA_NOEXCEPT
+	{
+		return SListNodeGetSize((SListNodeBase*)internalNode().mpNext);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void slist<T, Allocator>::clear() EA_NOEXCEPT
+	{
+		DoEraseAfter((SListNodeBase*)&internalNode(), NULL);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void slist<T, Allocator>::reset_lose_memory() EA_NOEXCEPT
+	{
+		// The reset function is a special extension function which unilaterally 
+		// resets the container to an empty state without freeing the memory of 
+		// the contained objects. This is useful for very quickly tearing down a 
+		// container built into scratch memory.
+		internalNode().mpNext = NULL;
+		#if EASTL_SLIST_SIZE_CACHE
+			mSize = 0;
+		#endif
+	}
+
+
+	template <typename T, typename Allocator>
+	void slist<T, Allocator>::resize(size_type n, const value_type& value)
+	{
+		SListNodeBase* pNode = (SListNodeBase*)&internalNode();
+
+		for(; pNode->mpNext && (n > 0); --n)
+			pNode = pNode->mpNext;
+
+		if(pNode->mpNext)
+			DoEraseAfter(pNode, NULL);
+		else
+			DoInsertValuesAfter(pNode, n, value);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void slist<T, Allocator>::resize(size_type n)
+	{
+		resize(n, value_type());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename slist<T, Allocator>::iterator
+	slist<T, Allocator>::insert(const_iterator position)
+	{
+		return iterator((SListNodeBase*)DoInsertValueAfter(SListNodeGetPrevious((SListNodeBase*)&internalNode(), (SListNodeBase*)position.mpNode), value_type()));
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename slist<T, Allocator>::iterator
+	slist<T, Allocator>::insert(const_iterator position, const value_type& value)
+	{
+		return iterator((SListNodeBase*)DoInsertValueAfter(SListNodeGetPrevious((SListNodeBase*)&internalNode(), (SListNodeBase*)position.mpNode), value));
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void slist<T, Allocator>::insert(const_iterator position, size_type n, const value_type& value)
+	{
+		// To do: get rid of DoAssignValues and put its implementation directly here.
+		DoInsertValuesAfter(SListNodeGetPrevious((SListNodeBase*)&internalNode(), (SListNodeBase*)position.mpNode), n, value);
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename InputIterator>
+	inline void slist<T, Allocator>::insert(const_iterator position, InputIterator first, InputIterator last)
+	{
+		DoInsertAfter(SListNodeGetPrevious((SListNodeBase*)&internalNode(), (SListNodeBase*)position.mpNode), first, last);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename slist<T, Allocator>::iterator
+	slist<T, Allocator>::insert_after(const_iterator position)
+	{
+		return insert_after(position, value_type());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename slist<T, Allocator>::iterator
+	slist<T, Allocator>::insert_after(const_iterator position, const value_type& value)
+	{
+		return iterator((SListNodeBase*)DoInsertValueAfter((SListNodeBase*)position.mpNode, value));
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename slist<T, Allocator>::iterator
+	slist<T, Allocator>::insert_after(const_iterator position, size_type n, const value_type& value)
+	{
+		return iterator((SListNodeBase*)DoInsertValuesAfter((SListNodeBase*)position.mpNode, n, value));
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename slist<T, Allocator>::iterator
+	slist<T, Allocator>::insert_after(const_iterator position, std::initializer_list<value_type> ilist)
+	{
+		return iterator((SListNodeBase*)DoInsertAfter((SListNodeBase*)position.mpNode, ilist.begin(), ilist.end(), false_type()));
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename InputIterator>
+	inline typename slist<T, Allocator>::iterator
+	slist<T, Allocator>::insert_after(const_iterator position, InputIterator first, InputIterator last)
+	{
+		return iterator((SListNodeBase*)DoInsertAfter((SListNodeBase*)position.mpNode, first, last));
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename slist<T, Allocator>::iterator
+	slist<T, Allocator>::insert_after(const_iterator position, value_type&& value)
+	{
+		return emplace_after(position, eastl::move(value));
+	}
+
+
+	template <typename T, typename Allocator>
+	template <class... Args>
+	inline typename slist<T, Allocator>::iterator
+	slist<T, Allocator>::emplace_after(const_iterator position, Args&&... args)
+	{
+		return iterator((SListNodeBase*)DoInsertValueAfter(position.mpNode, eastl::forward<Args>(args)...));
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename slist<T, Allocator>::iterator
+	slist<T, Allocator>::erase(const_iterator position)
+	{
+		return DoEraseAfter(SListNodeGetPrevious((SListNodeBase*)&internalNode(), (SListNodeBase*)position.mpNode));
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename slist<T, Allocator>::iterator
+	slist<T, Allocator>::erase(const_iterator first, const_iterator last)
+	{
+		return DoEraseAfter(SListNodeGetPrevious((SListNodeBase*)&internalNode(), (SListNodeBase*)first.mpNode), (SListNodeBase*)last.mpNode);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename slist<T, Allocator>::iterator
+	slist<T, Allocator>::erase_after(const_iterator position)
+	{
+		return iterator(DoEraseAfter((SListNodeBase*)position.mpNode));
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename slist<T, Allocator>::iterator
+	slist<T, Allocator>::erase_after(const_iterator before_first, const_iterator last)
+	{
+		return iterator(DoEraseAfter((SListNodeBase*)before_first.mpNode, (SListNodeBase*)last.mpNode));
+	}
+
+
+	template <typename T, typename Allocator>
+	void slist<T, Allocator>::remove(const value_type& value)
+	{ 
+		base_node_type* pNode = &internalNode();
+
+		while(pNode && pNode->mpNext)
+		{
+			if(static_cast<node_type*>(pNode->mpNext)->mValue == value)
+				DoEraseAfter((SListNodeBase*)pNode); // This will take care of modifying pNode->mpNext.
+			else
+				pNode = pNode->mpNext;
+		}
+	}
+
+	template <typename T, typename Allocator>
+	template <typename Predicate>
+	void slist<T, Allocator>::remove_if(Predicate predicate)
+	{
+		base_node_type* pNode = &internalNode();
+
+		while(pNode && pNode->mpNext)
+		{
+			if(predicate(static_cast<node_type*>(pNode->mpNext)->mValue))
+				DoEraseAfter((SListNodeBase*)pNode); // This will take care of modifying pNode->mpNext.
+			else
+				pNode = pNode->mpNext;
+		}
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void slist<T, Allocator>::splice(const_iterator position, this_type& x)
+	{
+		// Splicing operations cannot succeed if the two containers use unequal allocators.
+		// This issue is not addressed in the C++ 1998 standard but is discussed in the 
+		// LWG defect reports, such as #431. There is no simple solution to this problem.
+		// One option is to throw an exception. Another option which probably captures the
+		// user intent most of the time is to copy the range from the source to the dest and 
+		// remove it from the source. Until then it's simply disallowed to splice with unequal allocators.
+		// EASTL_ASSERT(internalAllocator() == x.internalAllocator()); // Disabled because our member sort function uses splice but with allocators that may be unequal. There isn't a simple workaround aside from disabling this assert.
+
+		if(x.internalNode().mpNext) // If there is anything to splice...
+		{
+			if(internalAllocator() == x.internalAllocator())
+			{
+				SListNodeSpliceAfter(SListNodeGetPrevious((SListNodeBase*)&internalNode(), (SListNodeBase*)position.mpNode),
+									 (SListNodeBase*)&x.internalNode(),
+									 SListNodeGetPrevious((SListNodeBase*)&x.internalNode(), NULL));
+
+				#if EASTL_SLIST_SIZE_CACHE
+					mSize += x.mSize;
+					x.mSize = 0;
+				#endif
+			}
+			else
+			{
+				insert(position, x.begin(), x.end());
+				x.clear();
+			}
+		}
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void slist<T, Allocator>::splice(const_iterator position, this_type& x, const_iterator i)
+	{
+		if(internalAllocator() == x.internalAllocator())
+		{
+			SListNodeSpliceAfter(SListNodeGetPrevious((SListNodeBase*)&internalNode(), (SListNodeBase*)position.mpNode), 
+								 SListNodeGetPrevious((SListNodeBase*)&x.internalNode(), (SListNodeBase*)i.mpNode), 
+								 (SListNodeBase*)i.mpNode);
+
+			#if EASTL_SLIST_SIZE_CACHE
+				++mSize;
+				--x.mSize;
+			#endif
+		}
+		else
+		{
+			insert(position, *i);
+			x.erase(i);
+		}
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void slist<T, Allocator>::splice(const_iterator position, this_type& x, const_iterator first, const_iterator last)
+	{
+		if(first != last) // If there is anything to splice...
+		{
+			if(internalAllocator() == x.internalAllocator())
+			{
+				#if EASTL_SLIST_SIZE_CACHE
+					const size_type n = (size_type)eastl::distance(first, last);
+					mSize += n;
+					x.mSize -= n;
+				#endif
+
+				SListNodeSpliceAfter(SListNodeGetPrevious((SListNodeBase*)&internalNode(),       (SListNodeBase*)position.mpNode),
+									 SListNodeGetPrevious((SListNodeBase*)&x.internalNode(),     (SListNodeBase*)first.mpNode),
+									 SListNodeGetPrevious((SListNodeBase*)first.mpNode, (SListNodeBase*)last.mpNode));
+			}
+			else
+			{
+				insert(position, first, last);
+				x.erase(first, last);
+			}
+		}
+	}
+
+
+	template <typename T, typename Allocator>
+	void slist<T, Allocator>::splice(const_iterator position, this_type&& x)
+	{
+		return splice(position, x); // This will splice(const_iterator, this_type&)
+	}
+
+	template <typename T, typename Allocator>
+	void slist<T, Allocator>::splice(const_iterator position, this_type&& x, const_iterator i)
+	{
+		return splice(position, x, i); // This will splice_after(const_iterator, this_type&, const_iterator)
+	}
+
+	template <typename T, typename Allocator>
+	void slist<T, Allocator>::splice(const_iterator position, this_type&& x, const_iterator first, const_iterator last)
+	{
+		return splice(position, x, first, last); // This will splice(const_iterator, this_type&, const_iterator, const_iterator)
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void slist<T, Allocator>::splice_after(const_iterator position, this_type& x)
+	{
+		if(!x.empty()) // If there is anything to splice...
+		{
+			if(internalAllocator() == x.internalAllocator())
+			{
+				SListNodeSpliceAfter((SListNodeBase*)position.mpNode, (SListNodeBase*)&x.internalNode());
+
+				#if EASTL_SLIST_SIZE_CACHE
+					mSize += x.mSize;
+					x.mSize = 0;
+				#endif
+			}
+			else
+			{
+				insert_after(position, x.begin(), x.end());
+				x.clear();
+			}
+		}
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void slist<T, Allocator>::splice_after(const_iterator position, this_type& x, const_iterator i)
+	{
+		if(internalAllocator() == x.internalAllocator())
+		{
+			SListNodeSpliceAfter((SListNodeBase*)position.mpNode, (SListNodeBase*)i.mpNode);
+
+			#if EASTL_SLIST_SIZE_CACHE
+				mSize++;
+				x.mSize--;
+			#endif
+		}
+		else
+		{
+			const_iterator iNext(i);
+			insert_after(position, i, ++iNext);
+			x.erase(i);
+		}
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void slist<T, Allocator>::splice_after(const_iterator position, this_type& x, const_iterator first, const_iterator last)
+	{
+		if(first != last) // If there is anything to splice...
+		{
+			if(internalAllocator() == x.internalAllocator())
+			{
+				#if EASTL_SLIST_SIZE_CACHE
+					const size_type n = (size_type)eastl::distance(first, last);
+					mSize += n;
+					x.mSize -= n;
+				#endif
+
+				SListNodeSpliceAfter((SListNodeBase*)position.mpNode, (SListNodeBase*)first.mpNode, (SListNodeBase*)last.mpNode);
+			}
+			else
+			{
+				insert_after(position, first, last);
+				x.erase(first, last);
+			}
+		}
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void slist<T, Allocator>::splice_after(const_iterator position, this_type&& x)
+	{
+		return splice_after(position, x);  // This will call splice_after(const_iterator, this_type&)
+	}
+
+	template <typename T, typename Allocator>
+	inline void slist<T, Allocator>::splice_after(const_iterator position, this_type&& x, const_iterator i) 
+	{
+		return splice_after(position, x, i);  // This will call splice_after(const_iterator, this_type&, const_iterator)
+	}
+
+	template <typename T, typename Allocator>
+	inline void slist<T, Allocator>::splice_after(const_iterator position, this_type&& x, const_iterator first, const_iterator last)
+	{
+		return splice_after(position, x, first, last);  // This will call splice_after(const_iterator, this_type&, const_iterator, const_iterator)
+	}
+
+
+	// This function is deprecated.
+	// We have no way of knowing what the container or allocator for before_first/before_last is. 
+	// Thus this function requires that the iterators come from equivalent allocators.
+	template <typename T, typename Allocator>
+	inline void slist<T, Allocator>::splice_after(const_iterator position, const_iterator before_first, const_iterator before_last)
+	{
+		if(before_first != before_last) // If there is anything to splice...
+		{
+			#if EASTL_SLIST_SIZE_CACHE
+				// We have a problem here because the inserted range may come from *this or 
+				// it may come from some other list. We have no choice but to implement an O(n)
+				// brute-force search in our list for 'previous'.
+
+				iterator i((SListNodeBase*)&internalNode());
+				iterator iEnd(NULL);
+
+				for( ; i != iEnd; ++i)
+				{
+					if(i == before_first)
+						break;
+				}
+	 
+				if(i == iEnd) // If the input came from an external range...
+					mSize += (size_type)eastl::distance(before_first, before_last); // Note that we have no way of knowing how to decrementing the size from the external container, assuming it came from one.
+				else
+					{ EASTL_FAIL_MSG("slist::splice_after: Impossible to decrement source mSize. Use the other splice_after function instead."); }
+			#endif
+
+			// Insert the range of [before_first + 1, before_last + 1) after position.
+			SListNodeSpliceAfter((SListNodeBase*)position.mpNode, (SListNodeBase*)before_first.mpNode, (SListNodeBase*)before_last.mpNode);
+		}
+	}
+
+
+	// This function is deprecated.
+	// We have no way of knowing what the container or allocator for previous is. 
+	// Thus this function requires that the iterators come from equivalent allocators.
+	template <typename T, typename Allocator>
+	inline void slist<T, Allocator>::splice_after(const_iterator position, const_iterator previous)
+	{
+		#if EASTL_SLIST_SIZE_CACHE
+			// We have a problem here because the inserted range may come from *this or 
+			// it may come from some other list. We have no choice but to implement an O(n)
+			// brute-force search in our list for 'previous'.
+
+			iterator i((SListNodeBase*)&internalNode());
+			iterator iEnd(NULL);
+
+			for( ; i != iEnd; ++i)
+			{
+				if(i == previous)
+					break;
+			}
+ 
+			if(i == iEnd) // If the input came from an external range...
+				++mSize;  // Note that we have no way of knowing how to decrementing the size from the external container, assuming it came from one.
+			else
+				{ EASTL_FAIL_MSG("slist::splice_after: Impossible to decrement source mSize. Use the other splice_after function instead."); }
+		#endif
+
+		// Insert the element at previous + 1 after position.
+		SListNodeSpliceAfter((SListNodeBase*)position.mpNode, (SListNodeBase*)previous.mpNode, (SListNodeBase*)previous.mpNode->mpNext);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void slist<T, Allocator>::sort()
+	{
+		// To do: look at using a merge sort, which may well be faster. 
+		eastl::comb_sort(begin(), end());
+	}
+
+
+	template <typename T, typename Allocator>
+	template <class Compare>
+	inline void slist<T, Allocator>::sort(Compare compare)
+	{
+		// To do: look at using a merge sort, which may well be faster. 
+		eastl::comb_sort(begin(), end(), compare);
+	}
+
+
+
+	template <typename T, typename Allocator>
+	inline void slist<T, Allocator>::reverse() EA_NOEXCEPT
+	{
+		if(internalNode().mpNext)
+			internalNode().mpNext = static_cast<node_type*>((base_node_type*)SListNodeReverse((SListNodeBase*)internalNode().mpNext));
+	}
+
+
+	template <typename T, typename Allocator>
+	template<typename... Args>
+	inline typename slist<T, Allocator>::node_type*
+	slist<T, Allocator>::DoCreateNode(Args&&... args)
+	{
+		node_type* const pNode = DoAllocateNode();  // pNode is of type node_type, but it's uninitialized memory.
+
+		#if EASTL_EXCEPTIONS_ENABLED
+			try
+			{
+				::new((void*)&pNode->mValue) value_type(eastl::forward<Args>(args)...);
+			}
+			catch(...)
+			{
+				DoFreeNode(pNode);
+				throw;
+			}
+		#else
+			::new((void*)&pNode->mValue) value_type(eastl::forward<Args>(args)...);
+		#endif
+
+		return pNode;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename slist<T, Allocator>::node_type*
+	slist<T, Allocator>::DoCreateNode()
+	{
+		node_type* const pNode = DoAllocateNode();
+		#if EASTL_EXCEPTIONS_ENABLED
+			try
+			{
+				::new((void*)&pNode->mValue) value_type();
+			}
+			catch(...)
+			{
+				DoFreeNode(pNode);
+				throw;
+			}
+		#else
+			::new((void*)&pNode->mValue) value_type();
+		#endif
+		return pNode;
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename Integer>
+	void slist<T, Allocator>::DoAssign(Integer n, Integer value, true_type)
+	{
+		DoAssignValues(static_cast<size_type>(n), static_cast<value_type>(value));
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename InputIterator>
+	void slist<T, Allocator>::DoAssign(InputIterator first, InputIterator last, false_type)
+	{
+		base_node_type* pNodePrev = &internalNode();
+		node_type*      pNode     = static_cast<node_type*>(internalNode().mpNext);
+
+		for(; pNode && (first != last); ++first)
+		{
+			pNode->mValue = *first;
+			pNodePrev     = pNode;
+			pNode         = static_cast<node_type*>(pNode->mpNext);
+		}
+
+		if(first == last)
+			DoEraseAfter((SListNodeBase*)pNodePrev, NULL);
+		else
+			DoInsertAfter((SListNodeBase*)pNodePrev, first, last);
+	}
+
+
+	template <typename T, typename Allocator>
+	void slist<T, Allocator>::DoAssignValues(size_type n, const value_type& value)
+	{
+		base_node_type* pNodePrev = &internalNode();
+		node_type*      pNode     = static_cast<node_type*>(internalNode().mpNext);
+
+		for(; pNode && (n > 0); --n)
+		{
+			pNode->mValue = value;
+			pNodePrev     = pNode;
+			pNode         = static_cast<node_type*>(pNode->mpNext);
+		}
+
+		if(n)
+			DoInsertValuesAfter((SListNodeBase*)pNodePrev, n, value);
+		else
+			DoEraseAfter((SListNodeBase*)pNodePrev, NULL);
+	}
+		
+
+	template <typename T, typename Allocator>
+	template <typename InputIterator>
+	inline typename slist<T, Allocator>::node_type*
+	slist<T, Allocator>::DoInsertAfter(SListNodeBase* pNode, InputIterator first, InputIterator last)
+	{
+		return DoInsertAfter(pNode, first, last, is_integral<InputIterator>());
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename Integer>
+	inline typename slist<T, Allocator>::node_type*
+	slist<T, Allocator>::DoInsertAfter(SListNodeBase* pNode, Integer n, Integer value, true_type)
+	{
+		return DoInsertValuesAfter(pNode, n, value);
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename InputIterator>
+	inline typename slist<T, Allocator>::node_type*
+	slist<T, Allocator>::DoInsertAfter(SListNodeBase* pNode, InputIterator first, InputIterator last, false_type)
+	{
+		for(; first != last; ++first)
+		{
+			pNode = SListNodeInsertAfter((SListNodeBase*)pNode, (SListNodeBase*)DoCreateNode(*first));
+			#if EASTL_SLIST_SIZE_CACHE
+				++mSize;
+			#endif
+		}
+
+		return static_cast<node_type*>((base_node_type*)pNode);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename slist<T, Allocator>::node_type*
+	slist<T, Allocator>::DoInsertValueAfter(SListNodeBase* pNode)
+	{
+		#if EASTL_SLIST_SIZE_CACHE
+			pNode = SListNodeInsertAfter((SListNodeBase*)pNode, (SListNodeBase*)DoCreateNode());
+			++mSize;
+			return static_cast<node_type*>((base_node_type*)pNode);
+		#else
+			return static_cast<node_type*>((base_node_type*)SListNodeInsertAfter((SListNodeBase*)pNode, (SListNodeBase*)DoCreateNode()));
+		#endif
+	}
+
+
+	template <typename T, typename Allocator>
+	template<typename... Args>
+	inline typename slist<T, Allocator>::node_type*
+	slist<T, Allocator>::DoInsertValueAfter(SListNodeBase* pNode, Args&&... args)
+	{
+		SListNodeBase* pNodeNew = (SListNodeBase*)DoCreateNode(eastl::forward<Args>(args)...);
+		pNode = SListNodeInsertAfter(pNode, pNodeNew);
+		#if EASTL_LIST_SIZE_CACHE
+			++mSize; // Increment the size after the node creation because we need to assume an exception can occur in the creation.
+		#endif
+		return static_cast<node_type*>((base_node_type*)pNode);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename slist<T, Allocator>::node_type*
+	slist<T, Allocator>::DoInsertValuesAfter(SListNodeBase* pNode, size_type n, const value_type& value)
+	{
+		for(size_type i = 0; i < n; ++i)
+		{
+			pNode = SListNodeInsertAfter((SListNodeBase*)pNode, (SListNodeBase*)DoCreateNode(value));
+			#if EASTL_SLIST_SIZE_CACHE
+				++mSize; // We don't do a single mSize += n at the end because an exception may result in only a partial range insertion.
+			#endif
+		}
+		return static_cast<node_type*>((base_node_type*)pNode);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void slist<T, Allocator>::DoSwap(this_type& x)
+	{
+		eastl::swap(internalNode().mpNext, x.internalNode().mpNext);
+		eastl::swap(internalAllocator(), x.internalAllocator()); // We do this even if EASTL_ALLOCATOR_COPY_ENABLED is 0.
+		#if EASTL_LIST_SIZE_CACHE
+			eastl::swap(mSize, x.mSize);
+		#endif
+	}
+
+
+	template <typename T, typename Allocator>
+	inline bool slist<T, Allocator>::validate() const
+	{
+		#if EASTL_SLIST_SIZE_CACHE
+			size_type n = 0;
+			
+			for(const_iterator i(begin()), iEnd(end()); i != iEnd; ++i)
+				++n;
+
+			if(n != mSize)
+				return false;
+		#endif
+
+		// To do: More validation.
+		return true;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline int slist<T, Allocator>::validate_iterator(const_iterator i) const
+	{
+		// To do: Come up with a more efficient mechanism of doing this.
+
+		for(const_iterator temp = begin(), tempEnd = end(); temp != tempEnd; ++temp)
+		{
+			if(temp == i)
+				return (isf_valid | isf_current | isf_can_dereference);
+		}
+
+		if(i == end())
+			return (isf_valid | isf_current); 
+
+		return isf_none;
+	}
+
+
+	///////////////////////////////////////////////////////////////////////
+	// global operators
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T, typename Allocator>
+	bool operator==(const slist<T, Allocator>& a, const slist<T, Allocator>& b)
+	{
+		typename slist<T, Allocator>::const_iterator ia   = a.begin();
+		typename slist<T, Allocator>::const_iterator ib   = b.begin();
+		typename slist<T, Allocator>::const_iterator enda = a.end();
+
+		#if EASTL_SLIST_SIZE_CACHE
+			if(a.size() == b.size())
+			{
+				while((ia != enda) && (*ia == *ib))
+				{
+					++ia;
+					++ib;
+				}
+				return (ia == enda);
+			}
+			return false;
+		#else
+			typename slist<T, Allocator>::const_iterator endb = b.end();
+
+			while((ia != enda) && (ib != endb) && (*ia == *ib))
+			{
+				++ia;
+				++ib;
+			}
+			return (ia == enda) && (ib == endb);
+		#endif
+	}
+
+
+	template <typename T, typename Allocator>
+	inline bool operator<(const slist<T, Allocator>& a, const slist<T, Allocator>& b)
+	{
+		return eastl::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline bool operator!=(const slist<T, Allocator>& a, const slist<T, Allocator>& b)
+	{
+		return !(a == b);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline bool operator>(const slist<T, Allocator>& a, const slist<T, Allocator>& b)
+	{
+		return b < a;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline bool operator<=(const slist<T, Allocator>& a, const slist<T, Allocator>& b)
+	{
+		return !(b < a);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline bool operator>=(const slist<T, Allocator>& a, const slist<T, Allocator>& b)
+	{
+		return !(a < b);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void swap(slist<T, Allocator>& a, slist<T, Allocator>& b)
+	{
+		a.swap(b);
+	}
+
+
+	/// erase / erase_if
+	///
+	/// https://en.cppreference.com/w/cpp/container/forward_list/erase2
+	template <class T, class Allocator, class U>
+	void erase(slist<T, Allocator>& c, const U& value)
+	{
+		// Erases all elements that compare equal to value from the container.
+		c.remove_if([&](auto& elem) { return elem == value; });
+	}
+
+	template <class T, class Allocator, class Predicate>
+	void erase_if(slist<T, Allocator>& c, Predicate predicate)
+	{
+		// Erases all elements that satisfy the predicate pred from the container.
+		c.remove_if(predicate);
+	}
+
+
+	/// insert_iterator
+	///
+	/// We borrow a trick from SGI STL here and define an insert_iterator 
+	/// specialization for slist. This allows slist insertions to be O(1) 
+	/// instead of O(n/2), due to caching of the previous node.
+	///
+	template <typename T, typename Allocator>
+	class insert_iterator< slist<T, Allocator> >
+	{
+	public:
+		typedef slist<T, Allocator>                 Container;
+		typedef typename Container::const_reference const_reference;
+		typedef typename Container::iterator        iterator_type;
+		typedef EASTL_ITC_NS::output_iterator_tag   iterator_category;
+		typedef void                                value_type;
+		typedef void                                difference_type;
+		typedef void                                pointer;
+		typedef void                                reference;
+
+	protected:
+		Container&    container;
+		iterator_type it;
+
+	public:
+		insert_iterator(Container& x, iterator_type i)
+			: container(x)
+		{
+			if(i == x.begin())
+				it = x.before_begin();
+			else
+				it = x.previous(i);
+		}
+
+		insert_iterator<Container>& operator=(const_reference value)
+			{ it = container.insert_after(it, value); return *this; }
+
+		insert_iterator<Container>& operator*()
+			{ return *this; }
+
+		insert_iterator<Container>& operator++()
+			{ return *this; } // This is by design.
+
+		insert_iterator<Container>& operator++(int)
+			{ return *this; } // This is by design.
+
+	}; // insert_iterator<slist>
+
+
+} // namespace eastl
+
+EA_RESTORE_SN_WARNING()
+
+EA_RESTORE_VC_WARNING();
+
+
+#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/sort.h b/libkram/eastl/include/EASTL/sort.h
new file mode 100644
index 00000000..35d105d7
--- /dev/null
+++ b/libkram/eastl/include/EASTL/sort.h
@@ -0,0 +1,2019 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+//////////////////////////////////////////////////////////////////////////////
+
+//////////////////////////////////////////////////////////////////////////////
+// This file implements sorting algorithms. Some of these are equivalent to 
+// std C++ sorting algorithms, while others don't have equivalents in the 
+// C++ standard. We implement the following sorting algorithms:
+//    is_sorted             -- 
+//    sort                  -- Unstable.    The implementation of this is mapped to quick_sort by default.
+//    quick_sort            -- Unstable.    This is actually an intro-sort (quick sort with switch to insertion sort).
+//    tim_sort              -- Stable.
+//    tim_sort_buffer       -- Stable.
+//    partial_sort          -- Unstable.
+//    insertion_sort        -- Stable. 
+//    shell_sort            -- Unstable.
+//    heap_sort             -- Unstable. 
+//    stable_sort           -- Stable.      The implementation of this is simply mapped to merge_sort.
+//    merge                 -- 
+//    merge_sort            -- Stable. 
+//    merge_sort_buffer     -- Stable. 
+//    nth_element           -- Unstable.
+//    radix_sort            -- Stable.      Important and useful sort for integral data, and faster than all others for this.
+//    comb_sort             -- Unstable.    Possibly the best combination of small code size but fast sort.
+//    bubble_sort           -- Stable.      Useful in practice for sorting tiny sets of data (<= 10 elements).
+//    selection_sort*       -- Unstable.
+//    shaker_sort*          -- Stable.
+//    bucket_sort*          -- Stable. 
+//
+// * Found in sort_extra.h.
+//
+// Additional sorting and related algorithms we may want to implement:
+//    partial_sort_copy     This would be like the std STL version.
+//    paritition            This would be like the std STL version. This is not categorized as a sort routine by the language standard.
+//    stable_partition      This would be like the std STL version.
+//    counting_sort         Maybe we don't want to implement this.
+//
+//////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_SORT_H
+#define EASTL_SORT_H
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/internal/move_help.h>
+#include <EASTL/iterator.h>
+#include <EASTL/memory.h>
+#include <EASTL/algorithm.h>
+#include <EASTL/functional.h>
+#include <EASTL/heap.h>
+#include <EASTL/allocator.h>
+#include <EASTL/memory.h>
+
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+// EASTL_PLATFORM_PREFERRED_ALIGNMENT
+//
+// Allows for slightly faster buffers in some cases.
+//
+#if !defined(EASTL_PLATFORM_PREFERRED_ALIGNMENT)
+	#if defined(EA_PROCESSOR_ARM)
+		#define EASTL_PLATFORM_PREFERRED_ALIGNMENT 8
+	#else
+		#define EASTL_PLATFORM_PREFERRED_ALIGNMENT 16
+	#endif
+#endif
+
+
+namespace eastl
+{
+
+	/// is_sorted
+	///
+	/// Returns true if the range [first, last) is sorted.
+	/// An empty range is considered to be sorted.
+	/// To test if a range is reverse-sorted, use 'greater' as the comparison 
+	/// instead of 'less'.
+	///
+	/// Example usage:
+	///    vector<int> intArray;
+	///    bool bIsSorted        = is_sorted(intArray.begin(), intArray.end());
+	///    bool bIsReverseSorted = is_sorted(intArray.begin(), intArray.end(), greater<int>());
+	///
+	template <typename ForwardIterator, typename StrictWeakOrdering>
+	bool is_sorted(ForwardIterator first, ForwardIterator last, StrictWeakOrdering compare)
+	{
+		if(first != last)
+		{
+			ForwardIterator current = first;
+
+			for(++current; current != last; first = current, ++current)
+			{
+				if(compare(*current, *first))
+				{
+					EASTL_VALIDATE_COMPARE(!compare(*first, *current)); // Validate that the compare function is sane.
+					return false;
+				}
+			}
+		}
+		return true;
+	}
+
+	template <typename ForwardIterator>
+	inline bool is_sorted(ForwardIterator first, ForwardIterator last)
+	{
+		typedef eastl::less<typename eastl::iterator_traits<ForwardIterator>::value_type> Less;
+
+		return eastl::is_sorted<ForwardIterator, Less>(first, last, Less());
+	}
+
+
+
+	/// is_sorted_until
+	///
+	/// Returns an iterator to the first element in the range [first,last) which does not follow an ascending order.
+	/// The range between first and the iterator returned is sorted.
+	/// If the entire range is sorted, the function returns last.
+	/// The elements are compared using operator< for the first version, and comp for the second.
+	///
+	/// Example usage:
+	///     vector<int> intArray;
+	///     vector<int>::iterator unsorted_element = is_sorted_until(eastl::end(intArray), eastl::end(intArray));
+	///     vector<int>::iterator unsorted_element_with_user_compare = is_sorted_until(eastl::end(intArray), eastl::end(intArray), eastl::less<int>());
+	///
+	template<typename ForwardIterator>
+	ForwardIterator is_sorted_until(ForwardIterator first, ForwardIterator last)
+	{
+		if(first != last)
+		{
+			ForwardIterator next = first;
+
+			while(++next != last)
+			{
+				if(*next < *first)
+					return next;
+
+				first = next;
+			}
+		}
+
+		return last;
+	}
+
+	template<typename ForwardIterator, typename Compare>
+	ForwardIterator is_sorted_until(ForwardIterator first, ForwardIterator last, Compare compare)
+	{
+		if(first != last)
+		{
+			ForwardIterator next = first;
+
+			while(++next != last)
+			{
+				if(compare(*next, *first))
+					return next;
+
+				first = next;
+			}
+		}
+
+		return last;
+	}
+
+
+
+	/// merge
+	///
+	/// This function merges two sorted input sorted ranges into a result sorted range.
+	/// This merge is stable in that no element from the first range will be changed
+	/// in order relative to other elements from the first range.
+	///
+	template <typename InputIterator1, typename InputIterator2, typename OutputIterator, typename Compare>
+	OutputIterator merge(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare compare)
+	{
+		while((first1 != last1) && (first2 != last2))
+		{
+			if(compare(*first2, *first1))
+			{
+				EASTL_VALIDATE_COMPARE(!compare(*first1, *first2)); // Validate that the compare function is sane.
+				*result = *first2;
+				++first2;
+			}
+			else
+			{
+				*result = *first1;
+				++first1;
+			}
+			++result;
+		}
+
+		// Check which list is empty and explicitly copy remaining items from the other list.
+		// For performance reasons, only a single copy operation is invoked to avoid the potential overhead
+		// introduced by chaining two copy operations together.  Even if a copy is of zero size there can
+		// be overhead from calling memmove with a zero size copy.
+		if (first1 == last1)
+		{
+			return eastl::copy(first2, last2, result);
+		}
+		else
+		{
+			return eastl::copy(first1, last1, result);
+		}
+	}
+
+	template <typename InputIterator1, typename InputIterator2, typename OutputIterator>
+	inline OutputIterator merge(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result)
+	{
+		typedef eastl::less<typename eastl::iterator_traits<InputIterator1>::value_type> Less;
+
+		return eastl::merge<InputIterator1, InputIterator2, OutputIterator, Less>
+						   (first1, last1, first2, last2, result, Less());
+	}
+
+
+	//////////////////////////////////////////////////////////////////////////////
+	/// insertion_sort
+	///
+	/// insertion_sort is an O(n^2) stable sorting algorithm that starts at the
+	/// (k + 1) element and assumes the first (k) elements are sorted.
+	/// Then copy_backwards from (k + 1) to the begining any elements where the
+	/// (k + 1) element is less than [0, k] elements. The position of k when
+	/// (k + 1) element is not less than k is the sorted position of the (k + 1) element.
+	///
+	/// Example With Intermediate Steps:
+	/// (k + 1) == 2 : [3, 2, 1] -> [3, 3, 1] -> [2, 3, 1]
+	/// (k + 1) == 1 : [2, 3, 1] -> [2, 3, 3] -> [2, 2, 3] -> [1, 2, 3]
+	///              : [1, 2, 3]
+	template <typename BidirectionalIterator, typename StrictWeakOrdering>
+	void insertion_sort(BidirectionalIterator first, BidirectionalIterator last, StrictWeakOrdering compare)
+	{
+		typedef typename eastl::iterator_traits<BidirectionalIterator>::value_type value_type;
+
+		if (first != last)
+		{
+			BidirectionalIterator i = first;
+
+			for (++i; i != last; ++i)
+			{
+				value_type insertValue(eastl::move(*i));
+				BidirectionalIterator insertPosition = i;
+
+				for (BidirectionalIterator movePosition = i; movePosition != first && compare(insertValue, *(--movePosition)); --insertPosition)
+				{
+					EASTL_VALIDATE_COMPARE(!compare(*movePosition, insertValue));
+					*insertPosition = eastl::move(*movePosition);
+				}
+
+				*insertPosition = eastl::move(insertValue);
+			}
+		}
+	} // insertion_sort
+
+
+	template <typename BidirectionalIterator>
+	void insertion_sort(BidirectionalIterator first, BidirectionalIterator last)
+	{
+		typedef eastl::less<typename eastl::iterator_traits<BidirectionalIterator>::value_type> Less;
+
+		insertion_sort<BidirectionalIterator>(first, last, Less());
+
+	} // insertion_sort
+
+
+	/// shell_sort
+	///
+	/// Implements the ShellSort algorithm. This algorithm is a serious algorithm for larger 
+	/// data sets, as reported by Sedgewick in his discussions on QuickSort. Note that shell_sort
+	/// requires a random access iterator, which usually means an array (eg. vector, deque).
+	/// ShellSort has good performance with presorted sequences.
+	/// The term "shell" derives from the name of the inventor, David Shell.
+	///
+	/// To consider: Allow the user to specify the "h-sequence" array.
+	///
+	template <typename RandomAccessIterator, typename StrictWeakOrdering>
+	void shell_sort(RandomAccessIterator first, RandomAccessIterator last, StrictWeakOrdering compare)
+	{
+		typedef typename eastl::iterator_traits<RandomAccessIterator>::difference_type difference_type;
+
+		// We use the Knuth 'h' sequence below, as it is easy to calculate at runtime. 
+		// However, possibly we are better off using a different sequence based on a table.
+		// One such sequence which averages slightly better than Knuth is:
+		//    1, 5, 19, 41, 109, 209, 505, 929, 2161, 3905, 8929, 16001, 36289, 
+		//    64769, 146305, 260609, 587521, 1045505, 2354689, 4188161, 9427969, 16764929
+
+		if(first != last)
+		{
+			RandomAccessIterator iCurrent, iBack, iSorted, iInsertFirst;
+			difference_type      nSize  = last - first;
+			difference_type      nSpace = 1; // nSpace is the 'h' value of the ShellSort algorithm.
+
+			while(nSpace < nSize)
+				nSpace = (nSpace * 3) + 1; // This is the Knuth 'h' sequence: 1, 4, 13, 40, 121, 364, 1093, 3280, 9841, 29524, 88573, 265720, 797161, 2391484, 7174453, 21523360, 64570081, 193710244, 
+
+			for(nSpace = (nSpace - 1) / 3; nSpace >= 1; nSpace = (nSpace - 1) / 3)  // Integer division is less than ideal.
+			{
+				for(difference_type i = 0; i < nSpace; i++)
+				{
+					iInsertFirst = first + i;
+
+					for(iSorted = iInsertFirst + nSpace; iSorted < last; iSorted += nSpace)
+					{
+						iBack = iCurrent = iSorted;
+						
+						for(; (iCurrent != iInsertFirst) && compare(*iCurrent, *(iBack -= nSpace)); iCurrent = iBack)
+						{
+							EASTL_VALIDATE_COMPARE(!compare(*iBack, *iCurrent)); // Validate that the compare function is sane.
+							eastl::iter_swap(iCurrent, iBack);
+						}
+					}
+				}
+			}
+		}
+	} // shell_sort
+
+	template <typename RandomAccessIterator>
+	inline void shell_sort(RandomAccessIterator first, RandomAccessIterator last)
+	{
+		typedef eastl::less<typename eastl::iterator_traits<RandomAccessIterator>::value_type> Less;
+
+		eastl::shell_sort<RandomAccessIterator, Less>(first, last, Less());
+	}
+
+
+
+	/// heap_sort
+	///
+	/// Implements the HeapSort algorithm. 
+	/// Note that heap_sort requires a random access iterator, which usually means 
+	/// an array (eg. vector, deque).
+	///
+	template <typename RandomAccessIterator, typename StrictWeakOrdering>
+	void heap_sort(RandomAccessIterator first, RandomAccessIterator last, StrictWeakOrdering compare)
+	{
+		// We simply call our heap algorithms to do the work for us.
+		eastl::make_heap<RandomAccessIterator, StrictWeakOrdering>(first, last, compare);
+		eastl::sort_heap<RandomAccessIterator, StrictWeakOrdering>(first, last, compare);
+	}
+
+	template <typename RandomAccessIterator>
+	inline void heap_sort(RandomAccessIterator first, RandomAccessIterator last)
+	{
+		typedef eastl::less<typename eastl::iterator_traits<RandomAccessIterator>::value_type> Less;
+
+		eastl::heap_sort<RandomAccessIterator, Less>(first, last, Less());
+	}
+
+
+
+	namespace Internal
+	{
+		// Sorts a range whose initial (start - first) entries are already sorted.
+		// This function is a useful helper to the tim_sort function.
+		// This is the same as insertion_sort except that it has a start parameter which indicates
+		// where the start of the unsorted data is.
+		template <typename BidirectionalIterator, typename StrictWeakOrdering>
+		void insertion_sort_already_started(BidirectionalIterator first, BidirectionalIterator last, BidirectionalIterator start, StrictWeakOrdering compare)
+		{
+			typedef typename eastl::iterator_traits<BidirectionalIterator>::value_type value_type;
+
+			if (first != last) // if the range is non-empty...
+			{
+				BidirectionalIterator iCurrent, iNext, iSorted = start - 1;
+
+				for (++iSorted; iSorted != last; ++iSorted)
+				{
+					const value_type temp(*iSorted);
+
+					iNext = iCurrent = iSorted;
+
+					for (--iCurrent; (iNext != first) && compare(temp, *iCurrent); --iNext, --iCurrent)
+					{
+						EASTL_VALIDATE_COMPARE(!compare(*iCurrent, temp)); // Validate that the compare function is sane.
+						*iNext = *iCurrent;
+					}
+
+					*iNext = temp;
+				}
+			}
+		}
+	}
+
+
+
+	/// merge_sort_buffer
+	///
+	/// Implements the MergeSort algorithm with a user-supplied buffer.
+	/// The input buffer must be able to hold a number of items equal to 'last - first'.
+	/// Note that merge_sort_buffer requires a random access iterator, which usually means 
+	/// an array (eg. vector, deque).
+	///
+	/// The algorithm used for merge sort is not the standard merge sort.  It has been modified
+	/// to improve performance for data that is already partially sorted.  In fact, if data
+	/// is completely sorted, then performance is O(n), but even data with partially sorted
+	/// regions can benefit from the modifications.
+	///
+	/// 'InsertionSortLimit' specifies a size limit for which the algorithm will use insertion sort.
+	/// Due to the overhead of merge sort, it is often faster to use insertion sort once the size of a region
+	/// is fairly small.  However, insertion sort is not as efficient (in terms of assignments orcomparisons)
+	/// so choosing a value that is too large will reduce performance.  Generally a value of 16 to 32 is reasonable,
+	/// but the best choose will depend on the data being sorted.
+	template <typename RandomAccessIterator, typename T, typename StrictWeakOrdering, typename difference_type, int InsertionSortLimit>
+	class MergeSorter
+	{
+	public:
+		static void sort(RandomAccessIterator first, RandomAccessIterator last, T* pBuffer, StrictWeakOrdering compare)
+		{
+			if (sort_impl(first, last, pBuffer, difference_type(0), compare) == RL_Buffer)
+			{
+				const difference_type nCount = last - first;
+				eastl::copy<T*, RandomAccessIterator>(pBuffer, pBuffer + nCount, first);
+			}
+			EASTL_DEV_ASSERT((eastl::is_sorted<RandomAccessIterator, StrictWeakOrdering>(first, last, compare)));
+		}
+
+	private:
+		static_assert(InsertionSortLimit > 1, "Sequences of length 1 are already sorted.  Use a larger value for InsertionSortLimit");
+
+		enum ResultLocation
+		{
+			RL_SourceRange,	// i.e. result is in the range defined by [first, last)
+			RL_Buffer,		// i.e. result is in pBuffer
+		};
+
+		// sort_impl
+		//
+		// This sort routine sorts the data in [first, last) and places the result in pBuffer or in the original range of the input.  The actual
+		// location of the data is indicated by the enum returned.
+		// 
+		// lastSortedEnd is used to specify a that data in the range [first, first + lastSortedEnd] is already sorted.  This information is used
+		// to avoid unnecessary merge sorting of already sorted data.  lastSortedEnd is a hint, and can be an under estimate of the sorted elements
+		// (i.e. it is legal to pass 0).
+		static ResultLocation sort_impl(RandomAccessIterator first, RandomAccessIterator last, T* pBuffer, difference_type lastSortedEnd, StrictWeakOrdering compare)
+		{
+			const difference_type nCount = last - first;
+
+			if (lastSortedEnd < 1)
+			{
+				lastSortedEnd = eastl::is_sorted_until<RandomAccessIterator, StrictWeakOrdering>(first, last, compare) - first;
+			}
+
+			// Sort the region unless lastSortedEnd indicates it is already sorted.
+			if (lastSortedEnd < nCount)
+			{
+				// If the size is less than or equal to InsertionSortLimit use insertion sort instead of recursing further.
+				if (nCount <= InsertionSortLimit)
+				{
+					eastl::Internal::insertion_sort_already_started<RandomAccessIterator, StrictWeakOrdering>(first, last, first + lastSortedEnd, compare);
+					return RL_SourceRange;
+				}
+				else
+				{
+					const difference_type nMid = nCount / 2;
+
+					ResultLocation firstHalfLocation = RL_SourceRange;
+					// Don't sort the first half if it is already sorted.
+					if (lastSortedEnd < nMid)
+					{
+						firstHalfLocation = sort_impl(first, first + nMid, pBuffer, lastSortedEnd, compare);
+					}
+
+					ResultLocation secondHalfLocation = sort_impl(first + nMid, last, pBuffer + nMid, lastSortedEnd - nMid, compare);
+
+					return merge_halves(first, last, nMid, pBuffer, firstHalfLocation, secondHalfLocation, compare);
+				}
+			}
+			else
+			{
+				EASTL_DEV_ASSERT((eastl::is_sorted<RandomAccessIterator, StrictWeakOrdering>(first, last, compare)));
+				return RL_SourceRange;
+			}
+		}
+
+		// merge_halves
+		//
+		// Merge two sorted regions of elements.
+		// The inputs to this method effectively define two large buffers.  The variables 'firstHalfLocation' and 'secondHalfLocation' define where the data to be
+		// merged is located within the two buffers.  It is entirely possible that the two areas to be merged could be entirely located in either of the larger buffers.
+		// Upon returning the merged results will be in one of the two buffers (indicated by the return result).
+		static ResultLocation merge_halves(RandomAccessIterator first, RandomAccessIterator last, difference_type nMid, T* pBuffer, ResultLocation firstHalfLocation, ResultLocation secondHalfLocation, StrictWeakOrdering compare)
+		{
+			const difference_type nCount = last - first;
+			if (firstHalfLocation == RL_SourceRange)
+			{
+				if (secondHalfLocation == RL_SourceRange)
+				{
+					eastl::merge<RandomAccessIterator, RandomAccessIterator, T*, StrictWeakOrdering>(first, first + nMid, first + nMid, last, pBuffer, compare);
+					EASTL_DEV_ASSERT((eastl::is_sorted<T*, StrictWeakOrdering>(pBuffer, pBuffer + nCount, compare)));
+					return RL_Buffer;
+				}
+				else
+				{
+					eastl::copy(first, first + nMid, pBuffer);
+					eastl::merge<T*, T*, RandomAccessIterator, StrictWeakOrdering>(pBuffer, pBuffer + nMid, pBuffer + nMid, pBuffer + nCount, first, compare);
+					EASTL_DEV_ASSERT((eastl::is_sorted<RandomAccessIterator, StrictWeakOrdering>(first, last, compare)));
+					return RL_SourceRange;
+				}
+			}
+			else
+			{
+				if (secondHalfLocation == RL_SourceRange)
+				{
+					eastl::copy(first + nMid, last, pBuffer + nMid);
+					eastl::merge<T*, T*, RandomAccessIterator, StrictWeakOrdering>(pBuffer, pBuffer + nMid, pBuffer + nMid, pBuffer + nCount, first, compare);
+					EASTL_DEV_ASSERT((eastl::is_sorted<RandomAccessIterator, StrictWeakOrdering>(first, last, compare)));
+					return RL_SourceRange;
+				}
+				else
+				{
+					eastl::merge<T*, T*, RandomAccessIterator, StrictWeakOrdering>(pBuffer, pBuffer + nMid, pBuffer + nMid, pBuffer + nCount, first, compare);
+					EASTL_DEV_ASSERT((eastl::is_sorted<RandomAccessIterator, StrictWeakOrdering>(first, last, compare)));
+					return RL_SourceRange;
+				}
+			}
+		}
+
+	};
+
+
+	template <typename RandomAccessIterator, typename T, typename StrictWeakOrdering>
+	void merge_sort_buffer(RandomAccessIterator first, RandomAccessIterator last, T* pBuffer, StrictWeakOrdering compare)
+	{
+		typedef typename eastl::iterator_traits<RandomAccessIterator>::difference_type difference_type;
+		MergeSorter<RandomAccessIterator, T, StrictWeakOrdering, difference_type, 16>::sort(first, last, pBuffer, compare);
+	}
+
+	template <typename RandomAccessIterator, typename T>
+	inline void merge_sort_buffer(RandomAccessIterator first, RandomAccessIterator last, T* pBuffer)
+	{
+		typedef eastl::less<typename eastl::iterator_traits<RandomAccessIterator>::value_type> Less;
+
+		eastl::merge_sort_buffer<RandomAccessIterator, T, Less>(first, last, pBuffer, Less());
+	}
+
+
+
+	/// merge_sort
+	///
+	/// Implements the MergeSort algorithm.
+	/// This algorithm allocates memory via the user-supplied allocator. Use merge_sort_buffer
+	/// function if you want a version which doesn't allocate memory.
+	/// Note that merge_sort requires a random access iterator, which usually means 
+	/// an array (eg. vector, deque).
+	/// 
+	template <typename RandomAccessIterator, typename Allocator, typename StrictWeakOrdering>
+	void merge_sort(RandomAccessIterator first, RandomAccessIterator last, Allocator& allocator, StrictWeakOrdering compare)
+	{
+		typedef typename eastl::iterator_traits<RandomAccessIterator>::difference_type difference_type;
+		typedef typename eastl::iterator_traits<RandomAccessIterator>::value_type      value_type;
+
+		const difference_type nCount = last - first;
+
+		if(nCount > 1)
+		{
+			// We need to allocate an array of nCount value_type objects as a temporary buffer.
+			value_type* const pBuffer = (value_type*)allocate_memory(allocator, nCount * sizeof(value_type), EASTL_ALIGN_OF(value_type), 0);
+			eastl::uninitialized_fill(pBuffer, pBuffer + nCount, value_type());
+
+			eastl::merge_sort_buffer<RandomAccessIterator, value_type, StrictWeakOrdering>
+									(first, last, pBuffer, compare);
+
+			eastl::destruct(pBuffer, pBuffer + nCount);
+			EASTLFree(allocator, pBuffer, nCount * sizeof(value_type));
+		}
+	}
+
+	template <typename RandomAccessIterator, typename Allocator>
+	inline void merge_sort(RandomAccessIterator first, RandomAccessIterator last, Allocator& allocator)
+	{
+		typedef eastl::less<typename eastl::iterator_traits<RandomAccessIterator>::value_type> Less;
+
+		eastl::merge_sort<RandomAccessIterator, Allocator, Less>(first, last, allocator, Less());
+	}
+
+
+
+	/// partition
+	///
+	/// Implements the partition algorithm.
+	/// Rearranges the elements in the range [first, last), in such a way that all the elements 
+	/// for which pred returns true precede all those for which it returns false. The iterator 
+	/// returned points to the first element of the second group.
+	/// The relative ordering within each group is not necessarily the same as before the call. 
+	/// See function stable_partition for a function with a similar behavior and stability in 
+	/// the ordering.
+	/// 
+	/// To do: Implement a version that uses a faster BidirectionalIterator algorithm for the 
+	///        case that the iterator range is a bidirectional iterator instead of just an
+	///        input iterator (one direction).
+	///
+	template<typename InputIterator, typename Predicate>
+	InputIterator partition(InputIterator begin, InputIterator end, Predicate predicate)
+	{
+		if(begin != end) 
+		{
+			while(predicate(*begin))
+			{
+				if(++begin == end) 
+					return begin;
+			}
+
+			InputIterator middle = begin;
+
+			while(++middle != end)
+			{
+				if(predicate(*middle))
+				{
+					eastl::swap(*begin, *middle);
+					++begin;
+				}
+			}
+		}
+
+		return begin;
+	}
+
+	/// stable_partition
+	///
+	/// Performs the same function as @p partition() with the additional
+	/// guarantee that the relative ordering of elements in each group is
+	/// preserved.
+	template <typename ForwardIterator, typename Predicate>
+	ForwardIterator stable_partition(ForwardIterator first, ForwardIterator last, Predicate pred)
+	{
+		first = eastl::find_if_not(first, last, pred);
+
+		if (first == last)
+			return first;
+
+		typedef typename iterator_traits<ForwardIterator>::value_type value_type;
+
+		const auto requested_size = eastl::distance(first, last);
+
+		auto allocator = *get_default_allocator(0);
+		value_type* const buffer =
+		    (value_type*)allocate_memory(allocator, requested_size * sizeof(value_type), EASTL_ALIGN_OF(value_type), 0);
+		eastl::uninitialized_fill(buffer, buffer + requested_size, value_type());
+
+		ForwardIterator result1 = first;
+		value_type* result2 = buffer;
+
+		*result2 = eastl::move(*first);
+		++result2;
+		++first;
+		for (; first != last; ++first)
+		{
+			if (pred(*first))
+			{
+				*result1 = eastl::move(*first);
+				++result1;
+			}
+			else
+			{
+				*result2 = eastl::move(*first);
+				++result2;
+			}
+		}
+
+		eastl::copy(buffer, result2, result1);
+
+		eastl::destruct(buffer, buffer + requested_size);
+		EASTLFree(allocator, buffer, requested_size * sizeof(value_type));
+		
+		return result1;
+	}
+
+	/////////////////////////////////////////////////////////////////////
+	// quick_sort
+	//
+	// We do the "introspection sort" variant of quick sort which is now
+	// well-known and understood. You can read about this algorithm in
+	// many articles on quick sort, but briefly what it does is a median-
+	// of-three quick sort whereby the recursion depth is limited to a
+	// some value (after which it gives up on quick sort and switches to
+	// a heap sort) and whereby after a certain amount of sorting the 
+	// algorithm stops doing quick-sort and finishes the sorting via
+	// a simple insertion sort.
+	/////////////////////////////////////////////////////////////////////
+
+	#if (defined(EA_PROCESSOR_X86) || defined(EA_PROCESSOR_X86_64))
+		static const int kQuickSortLimit = 28; // For sorts of random arrays over 100 items, 28 - 32 have been found to be good numbers on x86.
+	#else
+		static const int kQuickSortLimit = 16; // It seems that on other processors lower limits are more beneficial, as they result in fewer compares.
+	#endif
+
+	namespace Internal
+	{
+		template <typename Size>
+		inline Size Log2(Size n)
+		{
+			int i;
+			for(i = 0; n; ++i)
+				n >>= 1;
+			return i - 1;
+		}
+
+		// To do: Investigate the speed of this bit-trick version of Log2.
+		//        It may work better on some platforms but not others.
+		//
+		// union FloatUnion {
+		//     float    f;
+		//     uint32_t i;
+		// };
+		// 
+		// inline uint32_t Log2(uint32_t x)
+		// {
+		//     const FloatInt32Union u = { x };
+		//     return (u.i >> 23) - 127;
+		// }
+	}
+
+	template <typename RandomAccessIterator, typename T>
+	inline RandomAccessIterator get_partition_impl(RandomAccessIterator first, RandomAccessIterator last, T&& pivotValue)
+	{
+		for(; ; ++first)
+		{
+			while(*first < pivotValue)
+			{
+				EASTL_VALIDATE_COMPARE(!(pivotValue < *first)); // Validate that the compare function is sane.
+				++first;
+			}
+			--last;
+
+			while(pivotValue < *last)
+			{
+				EASTL_VALIDATE_COMPARE(!(*last < pivotValue)); // Validate that the compare function is sane.
+				--last;
+			}
+
+			if(first >= last) // Random access iterators allow operator >=
+				return first;
+
+			eastl::iter_swap(first, last);
+		}
+	}
+
+	/// get_partition
+	///
+	/// This function takes const T& instead of T because T may have special alignment
+	/// requirements and some compilers (e.g. VC++) are don't respect alignment requirements
+	/// for function arguments.
+	///
+	template <typename RandomAccessIterator, typename T>
+	inline RandomAccessIterator get_partition(RandomAccessIterator first, RandomAccessIterator last, const T& pivotValue)
+	{
+		const T pivotCopy(pivotValue); // Need to make a temporary because the sequence below is mutating.
+		return get_partition_impl<RandomAccessIterator, const T&>(first, last, pivotCopy);
+	}
+
+	template <typename RandomAccessIterator, typename T>
+	inline RandomAccessIterator get_partition(RandomAccessIterator first, RandomAccessIterator last, T&& pivotValue)
+	{
+		// Note: unlike the copy-constructible variant of get_partition... we can't create a temporary const move-constructible object
+		return get_partition_impl<RandomAccessIterator, T&&>(first, last, eastl::move(pivotValue));
+	}
+
+	template <typename RandomAccessIterator, typename T, typename Compare>
+	inline RandomAccessIterator get_partition_impl(RandomAccessIterator first, RandomAccessIterator last, T&& pivotValue, Compare compare)
+	{
+		for(; ; ++first)
+		{
+			while(compare(*first, pivotValue))
+			{
+				EASTL_VALIDATE_COMPARE(!compare(pivotValue, *first)); // Validate that the compare function is sane.
+				++first;
+			}
+			--last;
+
+			while(compare(pivotValue, *last))
+			{
+				EASTL_VALIDATE_COMPARE(!compare(*last, pivotValue)); // Validate that the compare function is sane.
+				--last;
+			}
+
+			if(first >= last) // Random access iterators allow operator >=
+				return first;
+
+			eastl::iter_swap(first, last);
+		}
+	}
+
+	template <typename RandomAccessIterator, typename T, typename Compare> 
+	inline RandomAccessIterator get_partition(RandomAccessIterator first, RandomAccessIterator last, const T& pivotValue, Compare compare)
+	{
+		const T pivotCopy(pivotValue); // Need to make a temporary because the sequence below is mutating.
+		return get_partition_impl<RandomAccessIterator, const T&, Compare>(first, last, pivotCopy, compare);
+	}
+
+	template <typename RandomAccessIterator, typename T, typename Compare>
+	inline RandomAccessIterator get_partition(RandomAccessIterator first, RandomAccessIterator last, T&& pivotValue, Compare compare)
+	{
+		// Note: unlike the copy-constructible variant of get_partition... we can't create a temporary const move-constructible object
+		return get_partition_impl<RandomAccessIterator, T&&, Compare>(first, last, eastl::forward<T>(pivotValue), compare);
+	}
+
+
+	namespace Internal
+	{
+		// This function is used by quick_sort and is not intended to be used by itself. 
+		// This is because the implementation below makes an assumption about the input
+		// data that quick_sort satisfies but arbitrary data may not.
+		// There is a standalone insertion_sort function. 
+		template <typename RandomAccessIterator>
+		inline void insertion_sort_simple(RandomAccessIterator first, RandomAccessIterator last)
+		{
+			for(RandomAccessIterator current = first; current != last; ++current)
+			{
+				typedef typename eastl::iterator_traits<RandomAccessIterator>::value_type value_type;
+
+				RandomAccessIterator end(current), prev(current);
+				value_type           value(eastl::forward<value_type>(*current));
+
+				for(--prev; value < *prev; --end, --prev) // We skip checking for (prev >= first) because quick_sort (our caller) makes this unnecessary.
+				{
+					EASTL_VALIDATE_COMPARE(!(*prev < value)); // Validate that the compare function is sane.
+					*end = eastl::forward<value_type>(*prev);
+				}
+
+				*end = eastl::forward<value_type>(value);
+			}
+		}
+
+
+		// This function is used by quick_sort and is not intended to be used by itself. 
+		// This is because the implementation below makes an assumption about the input
+		// data that quick_sort satisfies but arbitrary data may not.
+		// There is a standalone insertion_sort function. 
+		template <typename RandomAccessIterator, typename Compare>
+		inline void insertion_sort_simple(RandomAccessIterator first, RandomAccessIterator last, Compare compare)
+		{
+			for(RandomAccessIterator current = first; current != last; ++current)
+			{
+				typedef typename eastl::iterator_traits<RandomAccessIterator>::value_type value_type;
+
+				RandomAccessIterator end(current), prev(current);
+				value_type           value(eastl::forward<value_type>(*current));
+
+				for(--prev; compare(value, *prev); --end, --prev) // We skip checking for (prev >= first) because quick_sort (our caller) makes this unnecessary.
+				{
+					EASTL_VALIDATE_COMPARE(!compare(*prev, value)); // Validate that the compare function is sane.
+					*end = eastl::forward<value_type>(*prev);
+				}
+
+				*end = eastl::forward<value_type>(value);
+			}
+		}
+	} // namespace Internal
+
+
+	template <typename RandomAccessIterator>
+	inline void partial_sort(RandomAccessIterator first, RandomAccessIterator middle, RandomAccessIterator last)
+	{
+		typedef typename eastl::iterator_traits<RandomAccessIterator>::difference_type difference_type;
+		typedef typename eastl::iterator_traits<RandomAccessIterator>::value_type      value_type;
+
+		eastl::make_heap<RandomAccessIterator>(first, middle);
+
+		for(RandomAccessIterator i = middle; i < last; ++i)
+		{
+			if(*i < *first)
+			{
+				EASTL_VALIDATE_COMPARE(!(*first < *i)); // Validate that the compare function is sane.
+				value_type temp(eastl::forward<value_type>(*i));
+				*i = eastl::forward<value_type>(*first);
+				eastl::adjust_heap<RandomAccessIterator, difference_type, value_type>
+								  (first, difference_type(0), difference_type(middle - first), difference_type(0), eastl::forward<value_type>(temp));
+			}
+		}
+
+		eastl::sort_heap<RandomAccessIterator>(first, middle);
+	}
+
+
+	template <typename RandomAccessIterator, typename Compare>
+	inline void partial_sort(RandomAccessIterator first, RandomAccessIterator middle, RandomAccessIterator last, Compare compare)
+	{
+		typedef typename eastl::iterator_traits<RandomAccessIterator>::difference_type difference_type;
+		typedef typename eastl::iterator_traits<RandomAccessIterator>::value_type      value_type;
+
+		eastl::make_heap<RandomAccessIterator, Compare>(first, middle, compare);
+
+		for(RandomAccessIterator i = middle; i < last; ++i)
+		{
+			if(compare(*i, *first))
+			{
+				EASTL_VALIDATE_COMPARE(!compare(*first, *i)); // Validate that the compare function is sane.
+				value_type temp(eastl::forward<value_type>(*i));
+				*i = eastl::forward<value_type>(*first);
+				eastl::adjust_heap<RandomAccessIterator, difference_type, value_type, Compare>
+								  (first, difference_type(0), difference_type(middle - first), difference_type(0), eastl::forward<value_type>(temp), compare);
+			}
+		}
+
+		eastl::sort_heap<RandomAccessIterator, Compare>(first, middle, compare);
+	}
+
+
+	template<typename RandomAccessIterator>
+	inline void nth_element(RandomAccessIterator first, RandomAccessIterator nth, RandomAccessIterator last)
+	{
+		typedef typename iterator_traits<RandomAccessIterator>::value_type value_type;
+
+		while((last - first) > 5)
+		{
+			const value_type           midValue(eastl::median<value_type>(*first, *(first + (last - first) / 2), *(last - 1)));
+			const RandomAccessIterator midPos(eastl::get_partition<RandomAccessIterator, value_type>(first, last, midValue));
+
+			if(midPos <= nth)
+				first = midPos;
+			else
+				last = midPos;
+		}
+
+		eastl::insertion_sort<RandomAccessIterator>(first, last);
+	}
+
+
+	template<typename RandomAccessIterator, typename Compare>
+	inline void nth_element(RandomAccessIterator first, RandomAccessIterator nth, RandomAccessIterator last, Compare compare)
+	{
+		typedef typename iterator_traits<RandomAccessIterator>::value_type value_type;
+
+		while((last - first) > 5)
+		{
+			const value_type           midValue(eastl::median<value_type, Compare>(*first, *(first + (last - first) / 2), *(last - 1), compare));
+			const RandomAccessIterator midPos(eastl::get_partition<RandomAccessIterator, value_type, Compare>(first, last, midValue, compare));
+
+			if(midPos <= nth)
+				first = midPos;
+			else
+				last = midPos;
+		}
+
+		eastl::insertion_sort<RandomAccessIterator, Compare>(first, last, compare);
+	}
+
+
+	namespace Internal
+	{
+		EA_DISABLE_VC_WARNING(4702) // unreachable code
+		template <typename RandomAccessIterator, typename Size, typename PivotValueType>
+		inline void quick_sort_impl_helper(RandomAccessIterator first, RandomAccessIterator last, Size kRecursionCount)
+		{
+			typedef typename iterator_traits<RandomAccessIterator>::value_type value_type;
+
+			while(((last - first) > kQuickSortLimit) && (kRecursionCount > 0))
+			{
+				const RandomAccessIterator position(eastl::get_partition<RandomAccessIterator, value_type>(first, last,
+					eastl::forward<PivotValueType>(eastl::median<value_type>(eastl::forward<value_type>(*first), eastl::forward<value_type>(*(first + (last - first) / 2)), eastl::forward<value_type>(*(last - 1))))));
+
+				eastl::Internal::quick_sort_impl_helper<RandomAccessIterator, Size, PivotValueType>(position, last, --kRecursionCount);
+				last = position;
+			}
+
+			if(kRecursionCount == 0)
+				eastl::partial_sort<RandomAccessIterator>(first, last, last);
+		}
+
+		template <typename RandomAccessIterator, typename Size, typename Compare, typename PivotValueType>
+		inline void quick_sort_impl_helper(RandomAccessIterator first, RandomAccessIterator last, Size kRecursionCount, Compare compare)
+		{
+			typedef typename iterator_traits<RandomAccessIterator>::value_type value_type;
+
+			while(((last - first) > kQuickSortLimit) && (kRecursionCount > 0))
+			{
+				const RandomAccessIterator position(eastl::get_partition<RandomAccessIterator, value_type, Compare>(first, last,
+					eastl::forward<PivotValueType>(eastl::median<value_type, Compare>(eastl::forward<value_type>(*first), eastl::forward<value_type>(*(first + (last - first) / 2)), eastl::forward<value_type>(*(last - 1)), compare)), compare));
+
+				eastl::Internal::quick_sort_impl_helper<RandomAccessIterator, Size, Compare, PivotValueType>(position, last, --kRecursionCount, compare);
+				last = position;
+			}
+
+			if(kRecursionCount == 0)
+				eastl::partial_sort<RandomAccessIterator, Compare>(first, last, last, compare);
+		}
+		EA_RESTORE_VC_WARNING()
+
+		template <typename RandomAccessIterator, typename Size>
+		inline void quick_sort_impl(RandomAccessIterator first, RandomAccessIterator last, Size kRecursionCount,
+			typename eastl::enable_if<eastl::is_copy_constructible<typename iterator_traits<RandomAccessIterator>::value_type>::value>::type* = 0)
+		{
+			typedef typename iterator_traits<RandomAccessIterator>::value_type value_type;
+
+			// copy constructors require const value_type
+			quick_sort_impl_helper<RandomAccessIterator, Size, const value_type>(first, last, kRecursionCount);
+		}
+
+		template <typename RandomAccessIterator, typename Size>
+		inline void quick_sort_impl(RandomAccessIterator first, RandomAccessIterator last, Size kRecursionCount,
+			typename eastl::enable_if<eastl::is_move_constructible<typename iterator_traits<RandomAccessIterator>::value_type>::value
+			&& !eastl::is_copy_constructible<typename iterator_traits<RandomAccessIterator>::value_type>::value>::type* = 0)
+		{
+			typedef typename iterator_traits<RandomAccessIterator>::value_type value_type;
+
+			// move constructors require non-const value_type
+			quick_sort_impl_helper<RandomAccessIterator, Size, value_type>(first, last, kRecursionCount);
+		}
+
+		template <typename RandomAccessIterator, typename Size, typename Compare>
+		inline void quick_sort_impl(RandomAccessIterator first, RandomAccessIterator last, Size kRecursionCount, Compare compare,
+			typename eastl::enable_if<eastl::is_copy_constructible<typename iterator_traits<RandomAccessIterator>::value_type>::value>::type* = 0)
+		{
+			typedef typename iterator_traits<RandomAccessIterator>::value_type value_type;
+
+			// copy constructors require const value_type
+			quick_sort_impl_helper<RandomAccessIterator, Size, Compare, const value_type>(first, last, kRecursionCount, compare);
+		}
+
+		template <typename RandomAccessIterator, typename Size, typename Compare>
+		inline void quick_sort_impl(RandomAccessIterator first, RandomAccessIterator last, Size kRecursionCount, Compare compare,
+			typename eastl::enable_if<eastl::is_move_constructible<typename iterator_traits<RandomAccessIterator>::value_type>::value
+			&& !eastl::is_copy_constructible<typename iterator_traits<RandomAccessIterator>::value_type>::value>::type* = 0)
+		{
+			typedef typename iterator_traits<RandomAccessIterator>::value_type value_type;
+
+			// move constructors require non-const value_type
+			quick_sort_impl_helper<RandomAccessIterator, Size, Compare, value_type>(first, last, kRecursionCount, compare);
+		}
+	}
+
+
+	/// quick_sort
+	///
+	/// This is an unstable sort.
+	/// quick_sort sorts the elements in [first, last) into ascending order, 
+	/// meaning that if i and j are any two valid iterators in [first, last) 
+	/// such that i precedes j, then *j is not less than *i. quick_sort is not 
+	/// guaranteed to be stable. That is, suppose that *i and *j are equivalent: 
+	/// neither one is less than the other. It is not guaranteed that the 
+	/// relative order of these two elements will be preserved by sort.
+	///
+	/// We implement the "introspective" variation of quick-sort. This is 
+	/// considered to be the best general-purpose variant, as it avoids 
+	/// worst-case behaviour and optimizes the final sorting stage by 
+	/// switching to an insertion sort.
+	///
+	template <typename RandomAccessIterator>
+	void quick_sort(RandomAccessIterator first, RandomAccessIterator last)
+	{
+		typedef typename eastl::iterator_traits<RandomAccessIterator>::difference_type difference_type;
+
+		if(first != last)
+		{
+			eastl::Internal::quick_sort_impl<RandomAccessIterator, difference_type>(first, last, 2 * Internal::Log2(last - first));
+
+			if((last - first) > (difference_type)kQuickSortLimit)
+			{
+				eastl::insertion_sort<RandomAccessIterator>(first, first + kQuickSortLimit);
+				eastl::Internal::insertion_sort_simple<RandomAccessIterator>(first + kQuickSortLimit, last);
+			}
+			else
+				eastl::insertion_sort<RandomAccessIterator>(first, last);
+		}
+	}
+
+
+	template <typename RandomAccessIterator, typename Compare>
+	void quick_sort(RandomAccessIterator first, RandomAccessIterator last, Compare compare)
+	{
+		typedef typename eastl::iterator_traits<RandomAccessIterator>::difference_type difference_type;
+
+		if(first != last)
+		{
+			eastl::Internal::quick_sort_impl<RandomAccessIterator, difference_type, Compare>(first, last, 2 * Internal::Log2(last - first), compare);
+
+			if((last - first) > (difference_type)kQuickSortLimit)
+			{
+				eastl::insertion_sort<RandomAccessIterator, Compare>(first, first + kQuickSortLimit, compare);
+				eastl::Internal::insertion_sort_simple<RandomAccessIterator, Compare>(first + kQuickSortLimit, last, compare);
+			}
+			else
+				eastl::insertion_sort<RandomAccessIterator, Compare>(first, last, compare);
+		}
+	}
+
+
+
+
+	namespace Internal
+	{
+		// Portions of the tim_sort code were originally written by Christopher Swenson.
+		// https://github.com/swenson/sort
+		// All code in this repository, unless otherwise specified, is hereby licensed under the 
+		// MIT Public License: Copyright (c) 2010 Christopher Swenson
+
+		const intptr_t kTimSortStackSize = 64; // Question: What's the upper-limit size requirement for this?
+
+		struct tim_sort_run
+		{
+			intptr_t start;
+			intptr_t length;
+		};
+
+
+		// EASTL_COUNT_LEADING_ZEROES
+		//
+		// Count leading zeroes in an integer.
+		//
+		#ifndef EASTL_COUNT_LEADING_ZEROES
+			#if   defined(__GNUC__)
+				#if (EA_PLATFORM_PTR_SIZE == 8)
+					#define EASTL_COUNT_LEADING_ZEROES __builtin_clzll
+				#else
+					#define EASTL_COUNT_LEADING_ZEROES __builtin_clz
+				#endif
+			#endif
+
+			#ifndef EASTL_COUNT_LEADING_ZEROES
+				static inline int eastl_count_leading_zeroes(uint64_t x)
+				{
+					if(x)
+					{
+						int n = 0;
+						if(x & UINT64_C(0xFFFFFFFF00000000)) { n += 32; x >>= 32; }
+						if(x & 0xFFFF0000)                   { n += 16; x >>= 16; }
+						if(x & 0xFFFFFF00)                   { n +=  8; x >>=  8; }
+						if(x & 0xFFFFFFF0)                   { n +=  4; x >>=  4; }
+						if(x & 0xFFFFFFFC)                   { n +=  2; x >>=  2; }
+						if(x & 0xFFFFFFFE)                   { n +=  1;           }
+						return 63 - n;
+					}
+					return 64;
+				}
+
+				static inline int eastl_count_leading_zeroes(uint32_t x)
+				{
+					if(x)
+					{
+						int n = 0;
+						if(x <= 0x0000FFFF) { n += 16; x <<= 16; }
+						if(x <= 0x00FFFFFF) { n +=  8; x <<=  8; }
+						if(x <= 0x0FFFFFFF) { n +=  4; x <<=  4; }
+						if(x <= 0x3FFFFFFF) { n +=  2; x <<=  2; }
+						if(x <= 0x7FFFFFFF) { n +=  1;           }
+						return n;
+					}
+					return 32;
+				}
+
+				#define EASTL_COUNT_LEADING_ZEROES eastl_count_leading_zeroes
+			#endif
+		#endif
+
+
+		// reverse_elements
+		//
+		// Reverses the range [first + start, first + start + size)
+		// To consider: Use void eastl::reverse(BidirectionalIterator first, BidirectionalIterator last);
+		//
+		template <typename RandomAccessIterator>
+		void reverse_elements(RandomAccessIterator first, intptr_t start, intptr_t end)
+		{
+			while(start < end)
+			{
+				eastl::swap(*(first + start), *(first + end));
+				++start;
+				--end;
+			}
+		}
+
+
+		// tim_sort_count_run
+		//
+		// Finds the length of a run which is already sorted (either up or down).
+		// If the run is in reverse order, this function puts it in regular order.
+		//
+		template <typename RandomAccessIterator, typename StrictWeakOrdering>
+		intptr_t tim_sort_count_run(const RandomAccessIterator first, const intptr_t start, const intptr_t size, StrictWeakOrdering compare)
+		{
+			if((size - start) > 1) // If there is anything in the set...
+			{
+				intptr_t curr = (start + 2);
+				
+				if(!compare(*(first + start + 1), *(first + start))) // If (first[start + 1] >= first[start]) (If the run is increasing) ...
+				{
+					for(;; ++curr)
+					{
+						if(curr >= (size - 1)) // If we are at the end of the data... this run is done.
+							break;
+
+						if(compare(*(first + curr), *(first + curr - 1))) // If this item is not in order... this run is done.
+							break;
+					}
+				}
+				else  // Else it is decreasing.
+				{
+					for(;; ++curr)
+					{
+						if(curr >= (size - 1))  // If we are at the end of the data... this run is done.
+							break;
+
+						if(!compare(*(first + curr), *(first + curr - 1)))  // If this item is not in order... this run is done.
+							break;                                          // Note that we intentionally compare against <= 0 and not just < 0. This is because 
+					}                                                       // The reverse_elements call below could reverse two equal elements and break our stability requirement.
+
+					reverse_elements(first, start, curr - 1);
+				}
+
+				return (curr - start);
+			}
+
+			// Else we have just one item in the set.       
+			return 1;
+		}
+
+
+		// Input   Return
+		// --------------
+		//  64      32
+		//  65      33
+		//  66      33
+		//  67      34
+		//  68      34
+		// ...
+		// 125      63
+		// 126      63
+		// 127      64
+		// 128      32
+		// 129      33
+		// 130      33
+		// 131      33
+		// 132      33
+		// 133      34
+		// 134      34
+		// 135      34
+		// 136      34
+		// 137      35
+		// ...
+		//
+		// This function will return a value that is always in the range of [32, 64].
+		//
+		static inline intptr_t timsort_compute_minrun(intptr_t size)
+		{
+			const int32_t  top_bit = (int32_t)((sizeof(intptr_t) * 8) - EASTL_COUNT_LEADING_ZEROES((uintptr_t)size));
+			const int32_t  shift   = (top_bit > 6) ? (top_bit - 6) : 0;
+			const intptr_t mask    = (intptr_t(1) << shift) - 1;
+				  intptr_t minrun  = (intptr_t)(size >> shift);
+
+			if(mask & size)
+				++minrun;
+
+			return minrun;
+		}
+
+
+		template <typename RandomAccessIterator, typename T, typename StrictWeakOrdering>
+		void tim_sort_merge(RandomAccessIterator first, const tim_sort_run* run_stack, const intptr_t stack_curr, 
+							T* pBuffer, StrictWeakOrdering compare)
+		{
+			const intptr_t A    = run_stack[stack_curr - 2].length;
+			const intptr_t B    = run_stack[stack_curr - 1].length;
+			const intptr_t curr = run_stack[stack_curr - 2].start;
+
+			EASTL_DEV_ASSERT((A < 10000000) && (B < 10000000) && (curr < 10000000)); // Sanity check.
+
+			if(A < B) // If the first run is shorter than the second run... merge left.
+			{
+				// Copy to another location so we have room in the main array to put the sorted items.
+				eastl::copy(first + curr, first + curr + A, pBuffer);
+
+				#if EASTL_DEV_DEBUG
+					typedef typename eastl::iterator_traits<RandomAccessIterator>::value_type value_type;
+
+					for(intptr_t i = 0; i < A; i++)
+						*(first + curr + i) = value_type();
+				#endif
+
+				intptr_t i = 0;
+				intptr_t j = curr + A;
+				
+				for(intptr_t k = curr; k < curr + A + B; k++)
+				{
+					if((i < A) && (j < (curr + A + B)))
+					{
+						if(!compare(*(first + j), *(pBuffer + i))) // If (first[j] >= pBuffer[i])...
+							*(first + k) = *(pBuffer + i++);
+						else
+							*(first + k) = *(first + j++);
+					}
+					else if(i < A)
+						*(first + k) = *(pBuffer + i++);
+					else
+						*(first + k) = *(first + j++);
+				}
+			}
+			else // Else the second run is equal or shorter... merge right.
+			{
+				eastl::copy(first + curr + A, first + curr + A + B, pBuffer);
+
+				intptr_t i = B - 1;
+				intptr_t j = curr + A - 1;
+				
+				for(intptr_t k = curr + A + B - 1; k >= curr; k--)
+				{
+					if((i >= 0) && (j >= curr))
+					{
+						if(compare(*(pBuffer + i), *(first + j))) // If (pBuffer[i] < first[j]) ...
+							*(first + k) = *(first + j--);
+						else
+							*(first + k) = *(pBuffer + i--);
+					}
+					else if(i >= 0)
+						*(first + k) = *(pBuffer + i--);
+					else
+						*(first + k) = *(first + j--);
+				}
+			}
+		}
+
+
+		// See the timsort.txt file for an explanation of this function.
+		//
+		// ------------------------------------------------------------------------
+		// What turned out to be a good compromise maintains two invariants on the
+		// stack entries, where A, B and C are the lengths of the three righmost 
+		// not-yet merged slices:
+		//    1.  A > B+C
+		//    2.  B > C
+		// ------------------------------------------------------------------------
+		//
+		static inline bool timsort_check_invariant(tim_sort_run* run_stack, const intptr_t stack_curr)
+		{
+			// To do: Optimize this for the most common type of values.
+			if(stack_curr > 2)
+			{
+				const intptr_t A = run_stack[stack_curr - 3].length;
+				const intptr_t B = run_stack[stack_curr - 2].length;
+				const intptr_t C = run_stack[stack_curr - 1].length;
+
+				EASTL_DEV_ASSERT((A < 10000000) && (B < 10000000) && (C < 10000000)); // Sanity check.
+
+				if((A <= (B + C)) || (B <= C))
+					return true; // Merge the right-most runs.
+			}
+			else if(stack_curr == 2)
+			{
+				const intptr_t A = run_stack[stack_curr - 2].length;
+				const intptr_t B = run_stack[stack_curr - 1].length;
+
+				EASTL_DEV_ASSERT((A < 10000000) && (B < 10000000)); // Sanity check.
+
+				if(A <= B)
+					return true; // Merge the right-most runs.
+			}
+
+			return false; // Don't merge the right-most runs.
+		}
+
+
+		template <typename RandomAccessIterator, typename T, typename StrictWeakOrdering>
+		intptr_t tim_sort_collapse(RandomAccessIterator first, tim_sort_run* run_stack, intptr_t stack_curr, 
+								   T* pBuffer, const intptr_t size, StrictWeakOrdering compare)
+		{
+			// If the run_stack only has one thing on it, we are done with the collapse.
+			while(stack_curr > 1)
+			{
+				// If this is the last merge, just do it.
+				if((stack_curr == 2) && ((run_stack[0].length + run_stack[1].length) == size))
+				{
+					tim_sort_merge<RandomAccessIterator, T, StrictWeakOrdering>(first, run_stack, stack_curr, pBuffer, compare);
+					run_stack[0].length += run_stack[1].length;
+					stack_curr--;
+
+					#if EASTL_DEV_DEBUG
+						memset(&run_stack[stack_curr], 0, sizeof(run_stack[stack_curr]));
+					#endif
+
+					break;
+				}
+				// Check if the invariant is off for a run_stack of 2 elements.
+				else if((stack_curr == 2) && (run_stack[0].length <= run_stack[1].length))
+				{
+					tim_sort_merge<RandomAccessIterator, T, StrictWeakOrdering>(first, run_stack, stack_curr, pBuffer, compare);
+					run_stack[0].length += run_stack[1].length;
+					stack_curr--;
+
+					#if EASTL_DEV_DEBUG
+						memset(&run_stack[stack_curr], 0, sizeof(run_stack[stack_curr]));
+					#endif
+
+					break;
+				}
+				else if (stack_curr == 2)
+					break;
+
+				const intptr_t A = run_stack[stack_curr - 3].length;
+				const intptr_t B = run_stack[stack_curr - 2].length;
+				const intptr_t C = run_stack[stack_curr - 1].length;
+				
+				if(A <= (B + C)) // Check first invariant.
+				{
+					if(A < C)
+					{
+						tim_sort_merge<RandomAccessIterator, T, StrictWeakOrdering>(first, run_stack, stack_curr - 1, pBuffer, compare);
+
+						stack_curr--;
+						run_stack[stack_curr - 2].length += run_stack[stack_curr - 1].length;   // Merge A and B.
+						run_stack[stack_curr - 1] = run_stack[stack_curr];
+
+						#if EASTL_DEV_DEBUG
+							EASTL_DEV_ASSERT((run_stack[stack_curr - 2].start + run_stack[stack_curr - 2].length) <= size);
+							EASTL_DEV_ASSERT((run_stack[stack_curr - 1].start + run_stack[stack_curr - 1].length) <= size);
+							memset(&run_stack[stack_curr], 0, sizeof(run_stack[stack_curr]));
+						#endif
+					}
+					else
+					{
+						tim_sort_merge<RandomAccessIterator, T, StrictWeakOrdering>(first, run_stack, stack_curr, pBuffer, compare);                  // Merge B and C.
+
+						stack_curr--;
+						run_stack[stack_curr - 1].length += run_stack[stack_curr].length;
+
+						#if EASTL_DEV_DEBUG
+							EASTL_DEV_ASSERT((run_stack[stack_curr - 1].start + run_stack[stack_curr - 1].length) <= size);
+							memset(&run_stack[stack_curr], 0, sizeof(run_stack[stack_curr]));
+						#endif
+					}
+				}
+				else if(B <= C) // Check second invariant
+				{
+					tim_sort_merge<RandomAccessIterator, T, StrictWeakOrdering>(first, run_stack, stack_curr, pBuffer, compare);
+
+					stack_curr--;
+					run_stack[stack_curr - 1].length += run_stack[stack_curr].length;       // Merge B and C.
+
+					#if EASTL_DEV_DEBUG
+						EASTL_DEV_ASSERT((run_stack[stack_curr - 1].start + run_stack[stack_curr - 1].length) <= size);
+						memset(&run_stack[stack_curr], 0, sizeof(run_stack[stack_curr]));
+					#endif
+				}
+				else
+					break;
+			}
+
+			return stack_curr;
+		}
+
+
+		// tim_sort_add_run
+		//
+		// Return true if the sort is done.
+		//
+		template <typename RandomAccessIterator, typename T, typename StrictWeakOrdering>
+		bool tim_sort_add_run(tim_sort_run* run_stack, RandomAccessIterator first, T* pBuffer, const intptr_t size, const intptr_t minrun, 
+							  intptr_t& len, intptr_t& run, intptr_t& curr, intptr_t& stack_curr, StrictWeakOrdering compare)
+		{
+			len = tim_sort_count_run<RandomAccessIterator, StrictWeakOrdering>(first, curr, size, compare); // This will count the length of the run and reverse the run if it is backwards.
+			run = minrun;
+
+			if(run < minrun)            // Always make runs be of minrun length (we'll sort the additional data as needed below)
+			   run = minrun;
+
+			if(run > (size - curr))     // But if there isn't minrun data remaining, just sort what's remaining.
+			   run = (size - curr);
+
+			if(run > len)               // If there is any additional data we want to sort to bring up the run length to minrun.
+			{
+				insertion_sort_already_started<RandomAccessIterator, StrictWeakOrdering>(first + curr, first + curr + run, first + curr + len, compare);
+				len = run;
+			}
+
+			// At this point, run will be equal to minrun or will go to the end of our data.
+			// Add this run to our stack of runs.
+			EASTL_DEV_ASSERT(stack_curr < kTimSortStackSize);
+			EASTL_DEV_ASSERT((curr >= 0) && (curr < size) && ((curr + len) <= size));
+
+			run_stack[stack_curr].start  = curr;
+			run_stack[stack_curr].length = len;
+			stack_curr++;
+
+			// Move to the beginning of the next run in the data.
+			curr += len;
+
+			if(curr == size)    // If we have hit the end of the data...
+			{
+				while(stack_curr > 1) // If there is any more than one run... (else all the data is sorted)
+				{
+					tim_sort_merge<RandomAccessIterator, T, StrictWeakOrdering>(first, run_stack, stack_curr, pBuffer, compare);
+
+					run_stack[stack_curr - 2].length += run_stack[stack_curr - 1].length;
+					stack_curr--;
+
+					#if EASTL_DEV_DEBUG
+						EASTL_DEV_ASSERT((run_stack[stack_curr - 1].start + run_stack[stack_curr - 1].length) <= size);
+						memset(&run_stack[stack_curr], 0, sizeof(run_stack[stack_curr]));
+					#endif
+				}
+
+				return true; // We are done with sorting.
+			}
+
+			return false;
+		}
+
+	} // namespace Internal
+
+
+	// tim_sort_buffer
+	//
+	/// This is a stable sort.
+	// Implements the tim-sort sorting algorithm with a user-provided scratch buffer.
+	// http://en.wikipedia.org/wiki/Timsort
+	// This sort is the fastest sort when sort stability (maintaining order of equal values) is required and
+	// data sets are non-trivial (size >= 15). It's also the fastest sort (e.g. faster than quick_sort) for 
+	// the case that at at least half your data is already sorted. Otherwise, eastl::quick_sort is about 10% 
+	// faster than tim_sort_buffer but is not a stable sort. There are some reports that tim_sort outperforms
+	// quick_sort but most of these aren't taking into account that optimal quick_sort implementations use
+	// a hybrid approach called "introsort" (http://en.wikipedia.org/wiki/Introsort) which improves quick_sort
+	// considerably in practice.
+	//
+	// Strengths:
+	//     - Fastest stable sort for most sizes of data.
+	//     - Fastest sort for containers of data already mostly sorted.
+	//     - Simpler to understand than quick_sort.
+	//
+	// Weaknesses:
+	//     - User must provide a scratch buffer, otherwise the buffer is dynamically allocated during runtime.
+	//     - Not as fast as quick_sort for the general case of randomized data.
+	//     - Requires a RandomAccessIterator; thus must be on an array container type and not a list container type.
+	//     - Uses a lot of code to implement; thus it's not great when there is little room for more code.
+	//
+	// The pBuffer parameter must hold at least ((last-first)/2) elements (i.e. half the elements of the container).
+	// This minimum size is a worst-case size requirement, but handles all possible cases. pBuffer is just a scratch
+	// buffer and is not needed after the return of this function, and doesn't need to be seeded with any particular
+	// values upon entering this function.
+	//
+	// Example usage:
+	//     int intArray[64];
+	//     int buffer[32];
+	//     ...
+	//     tim_sort_buffer(intArray, intArray + 64, buffer);
+	//
+	template <typename RandomAccessIterator, typename T, typename StrictWeakOrdering>
+	void tim_sort_buffer(RandomAccessIterator first, RandomAccessIterator last, T* pBuffer, StrictWeakOrdering compare)
+	{
+		using namespace Internal;
+
+		// To consider: Convert the implementation to use first/last instead of first/size.
+		const intptr_t size = (intptr_t)(last - first);
+
+		if(size < 64)
+			insertion_sort_already_started(first, first + size, first + 1, compare);
+		else
+		{
+			tim_sort_run   run_stack[kTimSortStackSize];
+			intptr_t       stack_curr = 0;
+			intptr_t       len, run;
+			intptr_t       curr = 0;
+			const intptr_t minrun = timsort_compute_minrun(size);
+
+			#if EASTL_DEV_DEBUG
+				memset(run_stack, 0, sizeof(run_stack));
+			#endif
+
+			if(tim_sort_add_run<RandomAccessIterator, T, StrictWeakOrdering>(run_stack, first, pBuffer, size, minrun, len, run, curr, stack_curr, compare))
+				return;
+			if(tim_sort_add_run<RandomAccessIterator, T, StrictWeakOrdering>(run_stack, first, pBuffer, size, minrun, len, run, curr, stack_curr, compare))
+				return;
+			if(tim_sort_add_run<RandomAccessIterator, T, StrictWeakOrdering>(run_stack, first, pBuffer, size, minrun, len, run, curr, stack_curr, compare))
+				return;
+
+			for(;;)
+			{
+				if(timsort_check_invariant(run_stack, stack_curr))
+					stack_curr = tim_sort_collapse<RandomAccessIterator, T, StrictWeakOrdering>(first, run_stack, stack_curr, pBuffer, size, compare);
+				else
+				{
+					if(tim_sort_add_run<RandomAccessIterator, T, StrictWeakOrdering>(run_stack, first, pBuffer, size, minrun, len, run, curr, stack_curr, compare))
+						break;
+				}
+			}
+		}
+	}
+
+
+	template <typename RandomAccessIterator, typename T>
+	inline void tim_sort_buffer(RandomAccessIterator first, RandomAccessIterator last, T* pBuffer)
+	{
+		typedef eastl::less<T> Less;
+
+		eastl::tim_sort_buffer<RandomAccessIterator, T, Less>(first, last, pBuffer, Less());
+	}
+
+
+
+
+	/// radix_sort
+	///
+	/// Implements a classic LSD (least significant digit) radix sort.
+	/// See http://en.wikipedia.org/wiki/Radix_sort.
+	/// This sort requires that the sorted data be of a type that has a member
+	/// radix_type typedef and an mKey member of that type. The type must be
+	/// an integral type. This limits what can be sorted, but radix_sort is 
+	/// very fast -- typically faster than any other sort.
+	/// For example:
+	///     struct Sortable {
+	///         typedef int radix_type;
+	///         radix_type mKey;
+	///         // User data goes here, or the user can inherit from Sortable.
+	///     };
+	/// or, more generally:
+	///     template <typname Integer>
+	///     struct Sortable {
+	///         typedef Integer radix_type;
+	///         Integer mKey;
+	///     };
+	/// 
+	/// Example usage:
+	///     struct Element {
+	///         typedef uint16_t radix_type;
+	///         uint16_t mKey;
+	///         uint16_t mUserData;
+	///     };
+	///
+	///     Element elementArray[100];
+	///     Element buffer[100];
+	///
+	///     radix_sort<Element*, extract_radix_key<Element> >(elementArray, elementArray + 100, buffer);
+	///
+	/// To consider: A static linked-list implementation may be faster than the version here.
+
+	namespace Internal
+	{
+		/// extract_radix_key
+		///
+		/// Default radix sort integer value reader. It expects the sorted elements 
+		/// to have an integer member of type radix_type and of name "mKey". 
+		///
+		template <typename Node>
+		struct extract_radix_key
+		{
+			typedef typename Node::radix_type radix_type;
+
+			const radix_type operator()(const Node& x) const
+				{ return x.mKey; }
+		};
+
+		// The radix_sort implementation uses two optimizations that are not part of a typical radix sort implementation.
+		// 1. Computing a histogram (i.e. finding the number of elements per bucket) for the next pass is done in parallel with the loop that "scatters"
+		//    elements in the current pass.  The advantage is that it avoids the memory traffic / cache pressure of reading keys in a separate operation.
+		//    Note: It would also be possible to compute all histograms in a single pass.  However, that would increase the amount of stack space used and
+		//    also increase cache pressure slightly.  However, it could still be faster under some situations.
+		// 2. If all elements are mapped to a single bucket, then there is no need to perform a scatter operation.  Instead the elements are left in place
+		//    and only copied if they need to be copied to the final output buffer.
+		template <typename RandomAccessIterator, typename ExtractKey, int DigitBits, typename IntegerType>
+		void radix_sort_impl(RandomAccessIterator first,
+			RandomAccessIterator last,
+			RandomAccessIterator buffer,
+			ExtractKey extractKey,
+			IntegerType)
+		{
+			RandomAccessIterator srcFirst = first;
+			constexpr size_t numBuckets = 1 << DigitBits;
+			constexpr IntegerType bucketMask = numBuckets - 1;
+
+			// The alignment of this variable isn't required; it merely allows the code below to be faster on some platforms.
+			uint32_t EA_PREFIX_ALIGN(EASTL_PLATFORM_PREFERRED_ALIGNMENT) bucketSize[numBuckets];
+			uint32_t EA_PREFIX_ALIGN(EASTL_PLATFORM_PREFERRED_ALIGNMENT) bucketPosition[numBuckets];
+
+			RandomAccessIterator temp;
+			uint32_t i;
+
+			bool doSeparateHistogramCalculation = true;
+			uint32_t j;
+			for (j = 0; j < (8 * sizeof(IntegerType)); j += DigitBits)
+			{
+				if (doSeparateHistogramCalculation)
+				{
+					memset(bucketSize, 0, sizeof(bucketSize));
+					// Calculate histogram for the first scatter operation
+					for (temp = srcFirst; temp != last; ++temp)
+						++bucketSize[(extractKey(*temp) >> j) & bucketMask];
+				}
+
+				// If a single bucket contains all of the elements, then don't bother redistributing all elements to the
+				// same bucket.
+				if (bucketSize[((extractKey(*srcFirst) >> j) & bucketMask)] == uint32_t(last - srcFirst))
+				{
+					// Set flag to ensure histogram is computed for next digit position.
+					doSeparateHistogramCalculation = true;
+				}
+				else
+				{
+					// The histogram is either not needed or it will be calculated in parallel with the scatter operation below for better cache efficiency.
+					doSeparateHistogramCalculation = false;
+
+					// If this is the last digit position, then don't calculate a histogram
+					if (j == (8 * sizeof(IntegerType) - DigitBits))
+					{
+						bucketPosition[0] = 0;
+						for (i = 0; i < numBuckets - 1; i++)
+						{
+							bucketPosition[i + 1] = bucketPosition[i] + bucketSize[i];
+						}
+
+						for (temp = srcFirst; temp != last; ++temp)
+						{
+							IntegerType key = extractKey(*temp);
+							const size_t digit = (key >> j) & bucketMask;
+							buffer[bucketPosition[digit]++] = *temp;
+						}
+					}
+					// Compute the histogram while performing the scatter operation
+					else
+					{
+						bucketPosition[0] = 0;
+						for (i = 0; i < numBuckets - 1; i++)
+						{
+							bucketPosition[i + 1] = bucketPosition[i] + bucketSize[i];
+							bucketSize[i] = 0;	// Clear the bucket for the next pass
+						}
+
+						uint32_t jNext = j + DigitBits;
+						for (temp = srcFirst; temp != last; ++temp)
+						{
+							IntegerType key = extractKey(*temp);
+							const size_t digit = (key >> j) & bucketMask;
+							buffer[bucketPosition[digit]++] = *temp;
+
+							// Update histogram for the next scatter operation
+							++bucketSize[(extractKey(*temp) >> jNext) & bucketMask];
+						}
+					}
+
+					last = buffer + (last - srcFirst);
+					temp = srcFirst;
+					srcFirst = buffer;
+					buffer = temp;
+				}
+			}
+
+			if (srcFirst != first)
+			{
+				// Copy values back into the expected buffer
+				for (temp = srcFirst; temp != last; ++temp)
+					*buffer++ = *temp;
+			}
+		}
+	} // namespace Internal
+
+	template <typename RandomAccessIterator, typename ExtractKey, int DigitBits = 8>
+	void radix_sort(RandomAccessIterator first, RandomAccessIterator last, RandomAccessIterator buffer)
+	{
+		static_assert(DigitBits > 0, "DigitBits must be > 0");
+		static_assert(DigitBits <= (sizeof(typename ExtractKey::radix_type) * 8), "DigitBits must be <= the size of the key (in bits)");
+		eastl::Internal::radix_sort_impl<RandomAccessIterator, ExtractKey, DigitBits>(first, last, buffer, ExtractKey(), typename ExtractKey::radix_type());
+	}
+
+
+
+	/// comb_sort
+	///
+	/// This is an unstable sort.
+	/// Implements the CombSort algorithm; in particular, implements the CombSort11 variation 
+	/// of the CombSort algorithm, based on the reference to '11' in the implementation.
+	///
+	/// To consider: Use a comb sort table instead of the '((nSpace * 10) + 3) / 13' expression.
+	///              Ideal tables can be found on the Internet by looking up "comb sort table".
+	///
+	template <typename ForwardIterator, typename StrictWeakOrdering>
+	void comb_sort(ForwardIterator first, ForwardIterator last, StrictWeakOrdering compare)
+	{
+		typedef typename eastl::iterator_traits<ForwardIterator>::difference_type difference_type;
+
+		ForwardIterator iCurrent, iNext;
+		difference_type length = eastl::distance(first, last);
+		difference_type nSpace = length;
+
+		for(bool bSwapped = false; (nSpace > 1) || bSwapped; )
+		{
+			nSpace = ((nSpace * 10) + 3) / 13; // Integer division is less than ideal.
+
+			if((nSpace == 9) || (nSpace == 10))
+				nSpace = 11;
+
+			iCurrent = iNext = first;
+			eastl::advance(iNext, nSpace);
+			
+			for(bSwapped = false; iNext != last; iCurrent++, iNext++)
+			{
+				if(compare(*iNext, *iCurrent))
+				{
+					EASTL_VALIDATE_COMPARE(!compare(*iCurrent, *iNext)); // Validate that the compare function is sane.
+					eastl::iter_swap(iCurrent, iNext);
+					bSwapped = true;
+				}
+			}
+		}
+	} // comb_sort
+
+	template <typename ForwardIterator>
+	inline void comb_sort(ForwardIterator first, ForwardIterator last)
+	{
+		typedef eastl::less<typename eastl::iterator_traits<ForwardIterator>::value_type> Less;
+
+		eastl::comb_sort<ForwardIterator, Less>(first, last, Less());
+	}
+
+
+
+
+	/// bubble_sort
+	///
+	/// This is a stable sort.
+	/// Implements the BubbleSort algorithm. This algorithm is only useful for 
+	/// small range sizes, such as 10 or less items. You may be better off using
+	/// insertion_sort for cases where bubble_sort works.
+	///
+	namespace Internal
+	{
+		template <typename ForwardIterator, typename StrictWeakOrdering>
+		void bubble_sort_impl(ForwardIterator first, ForwardIterator last, StrictWeakOrdering compare, EASTL_ITC_NS::forward_iterator_tag)
+		{
+			ForwardIterator iCurrent, iNext;
+
+			while(first != last)
+			{
+				iNext = iCurrent = first;
+				
+				for(++iNext; iNext != last; iCurrent = iNext, ++iNext) 
+				{
+					if(compare(*iNext, *iCurrent))
+					{
+						EASTL_VALIDATE_COMPARE(!compare(*iCurrent, *iNext)); // Validate that the compare function is sane.
+						eastl::iter_swap(iCurrent, iNext);
+					}
+				}
+				last = iCurrent;
+			}
+		}
+
+		template <typename BidirectionalIterator, typename StrictWeakOrdering>
+		void bubble_sort_impl(BidirectionalIterator first, BidirectionalIterator last, StrictWeakOrdering compare, EASTL_ITC_NS::bidirectional_iterator_tag)
+		{
+			if(first != last)
+			{
+				BidirectionalIterator iCurrent, iNext, iLastModified;
+
+				last--;
+
+				while(first != last)
+				{
+					iLastModified = iNext = iCurrent = first;
+					
+					for(++iNext; iCurrent != last; iCurrent = iNext, ++iNext)
+					{
+						if(compare(*iNext, *iCurrent))
+						{
+							EASTL_VALIDATE_COMPARE(!compare(*iCurrent, *iNext)); // Validate that the compare function is sane.
+							iLastModified = iCurrent;
+							eastl::iter_swap(iCurrent, iNext);
+						}
+					}
+
+					last = iLastModified;
+				}
+			}
+		}
+	} // namespace Internal
+
+	template <typename ForwardIterator, typename StrictWeakOrdering>
+	inline void bubble_sort(ForwardIterator first, ForwardIterator last, StrictWeakOrdering compare)
+	{
+		typedef typename eastl::iterator_traits<ForwardIterator>::iterator_category IC;
+
+		eastl::Internal::bubble_sort_impl<ForwardIterator, StrictWeakOrdering>(first, last, compare, IC());
+	}
+
+	template <typename ForwardIterator>
+	inline void bubble_sort(ForwardIterator first, ForwardIterator last)
+	{
+		typedef eastl::less<typename eastl::iterator_traits<ForwardIterator>::value_type> Less;
+		typedef typename eastl::iterator_traits<ForwardIterator>::iterator_category IC;
+
+		eastl::Internal::bubble_sort_impl<ForwardIterator, Less>(first, last, Less(), IC());
+	}
+
+
+
+	/// sort
+	/// 
+	/// We use quick_sort by default. See quick_sort for details.
+	///
+	/// EASTL_DEFAULT_SORT_FUNCTION
+	/// If a default sort function is specified then call it, otherwise use EASTL's default quick_sort.
+	/// EASTL_DEFAULT_SORT_FUNCTION must be namespace-qualified and include any necessary template
+	/// parameters (e.g. eastl::comb_sort instead of just comb_sort), and it must be visible to this code. 
+	/// The EASTL_DEFAULT_SORT_FUNCTION must be provided in two versions: 
+	///     template <typename RandomAccessIterator>
+	///     void EASTL_DEFAULT_SORT_FUNCTION(RandomAccessIterator first, RandomAccessIterator last);
+	///
+	///     template <typename RandomAccessIterator, typename Compare>
+	///     void EASTL_DEFAULT_SORT_FUNCTION(RandomAccessIterator first, RandomAccessIterator last, Compare compare)
+	///
+	template <typename RandomAccessIterator>
+	inline void sort(RandomAccessIterator first, RandomAccessIterator last)
+	{
+		#if defined(EASTL_DEFAULT_SORT_FUNCTION)
+			EASTL_DEFAULT_SORT_FUNCTION(first, last);
+		#else
+			eastl::quick_sort<RandomAccessIterator>(first, last);
+		#endif
+	}
+
+	template <typename RandomAccessIterator, typename Compare>
+	inline void sort(RandomAccessIterator first, RandomAccessIterator last, Compare compare)
+	{
+		#if defined(EASTL_DEFAULT_SORT_FUNCTION)
+			EASTL_DEFAULT_SORT_FUNCTION(first, last, compare);
+		#else
+			eastl::quick_sort<RandomAccessIterator, Compare>(first, last, compare);
+		#endif
+	}
+
+
+
+	/// stable_sort
+	/// 
+	/// We use merge_sort by default. See merge_sort for details.
+	/// Beware that the used merge_sort -- and thus stable_sort -- allocates 
+	/// memory during execution. Try using merge_sort_buffer if you want
+	/// to avoid memory allocation.
+	///
+	/// EASTL_DEFAULT_STABLE_SORT_FUNCTION
+	/// If a default sort function is specified then call it, otherwise use EASTL's default merge_sort.
+	/// EASTL_DEFAULT_STABLE_SORT_FUNCTION must be namespace-qualified and include any necessary template
+	/// parameters (e.g. eastl::tim_sort instead of just tim_sort), and it must be visible to this code. 
+	/// The EASTL_DEFAULT_STABLE_SORT_FUNCTION must be provided in three versions, though the third
+	/// allocation implementation may choose to ignore the allocator parameter: 
+	///     template <typename RandomAccessIterator, typename StrictWeakOrdering>
+	///     void EASTL_DEFAULT_STABLE_SORT_FUNCTION(RandomAccessIterator first, RandomAccessIterator last, StrictWeakOrdering compare);
+	///     
+	///     template <typename RandomAccessIterator>
+	///     void EASTL_DEFAULT_STABLE_SORT_FUNCTION(RandomAccessIterator first, RandomAccessIterator last);
+	///
+	///     template <typename RandomAccessIterator, typename Allocator, typename StrictWeakOrdering>
+	///     void EASTL_DEFAULT_STABLE_SORT_FUNCTION(RandomAccessIterator first, RandomAccessIterator last, Allocator& allocator, StrictWeakOrdering compare);
+	///
+	template <typename RandomAccessIterator, typename StrictWeakOrdering>
+	void stable_sort(RandomAccessIterator first, RandomAccessIterator last, StrictWeakOrdering compare)
+	{
+		#if defined(EASTL_DEFAULT_STABLE_SORT_FUNCTION)
+			EASTL_DEFAULT_STABLE_SORT_FUNCTION(first, last, *get_default_allocator(0), compare);
+		#else
+			eastl::merge_sort<RandomAccessIterator, EASTLAllocatorType, StrictWeakOrdering>
+							 (first, last, *get_default_allocator(0), compare);
+		#endif
+	}
+
+	template <typename RandomAccessIterator>
+	void stable_sort(RandomAccessIterator first, RandomAccessIterator last)
+	{
+		#if defined(EASTL_DEFAULT_STABLE_SORT_FUNCTION)
+			EASTL_DEFAULT_STABLE_SORT_FUNCTION(first, last, *get_default_allocator(0));
+		#else
+			eastl::merge_sort<RandomAccessIterator, EASTLAllocatorType>
+							 (first, last, *get_default_allocator(0));
+		#endif
+	}
+
+	template <typename RandomAccessIterator, typename Allocator, typename StrictWeakOrdering>
+	void stable_sort(RandomAccessIterator first, RandomAccessIterator last, Allocator& allocator, StrictWeakOrdering compare)
+	{
+		#if defined(EASTL_DEFAULT_STABLE_SORT_FUNCTION)
+			EASTL_DEFAULT_STABLE_SORT_FUNCTION(first, last, allocator, compare);
+		#else
+			eastl::merge_sort<RandomAccessIterator, Allocator, StrictWeakOrdering>(first, last, allocator, compare);
+		#endif
+	}
+
+	// This is not defined because it would cause compiler errors due to conflicts with a version above. 
+	//template <typename RandomAccessIterator, typename Allocator>
+	//void stable_sort(RandomAccessIterator first, RandomAccessIterator last, Allocator& allocator)
+	//{
+	//    #if defined(EASTL_DEFAULT_STABLE_SORT_FUNCTION)
+	//        EASTL_DEFAULT_STABLE_SORT_FUNCTION<RandomAccessIterator, Allocator>(first, last, allocator);
+	//    #else
+	//        eastl::merge_sort<RandomAccessIterator, Allocator>(first, last, allocator);
+	//    #endif
+	//}
+
+
+
+
+	/* 
+	// Something to consider adding: An eastl sort which uses qsort underneath. 
+	// The primary purpose of this is to have an eastl interface for sorting which
+	// results in very little code generation, since all instances map to the 
+	// C qsort function.
+
+	template <typename T>
+	int small_footprint_sort_func(const void* a, const void* b)
+	{
+		if(*(const T*)a < *(const T*)b)
+			return -1;
+		if(*(const T*)a > *(const T*)b)
+			return +1;
+		return 0;
+	}
+
+	template <typename ContiguousIterator>
+	void small_footprint_sort(ContiguousIterator first, ContiguousIterator last)
+	{
+		typedef typename eastl::iterator_traits<ContiguousIterator>::value_type value_type;
+
+		qsort(first, (size_t)eastl::distance(first, last), sizeof(value_type), small_footprint_sort_func<value_type>);
+	}
+	*/
+
+} // namespace eastl
+
+
+#endif // Header include guard
+
+
+
diff --git a/libkram/eastl/include/EASTL/span.h b/libkram/eastl/include/EASTL/span.h
new file mode 100644
index 00000000..1f3b9b42
--- /dev/null
+++ b/libkram/eastl/include/EASTL/span.h
@@ -0,0 +1,427 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// This file implements the eastl::span which is part of the C++ standard
+// STL library specification.
+//
+// eastl::span is a non-owning container that refers to a contiguous block of
+// memory.  It bundles up the classic pattern of a pointer and a size into a
+// single type.  A span can either have a static extent, in which case the
+// number of elements in the sequence is known and encoded in the type, or a
+// dynamic extent.
+//
+// http://en.cppreference.com/w/cpp/container/span
+// http://eel.is/c++draft/views#span.syn
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_SPAN_H
+#define EASTL_SPAN_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once 
+#endif
+
+#include <EASTL/internal/config.h>
+#include <EASTL/type_traits.h>
+#include <EASTL/iterator.h>
+#include <EASTL/array.h>
+
+namespace eastl
+{
+	static EA_CONSTEXPR size_t dynamic_extent = size_t(-1);
+
+	namespace Internal
+	{
+		// HasSizeAndData
+		// 
+		// custom type trait to determine if eastl::data(Container) and eastl::size(Container) are well-formed.
+		//
+		template <typename, typename = void>
+		struct HasSizeAndData : eastl::false_type {};
+
+		template <typename T>
+		struct HasSizeAndData<T, void_t<decltype(eastl::size(eastl::declval<T>())), decltype(eastl::data(eastl::declval<T>()))>> : eastl::true_type {};
+
+		// SubspanExtent
+		//
+		// Integral constant that calculates the resulting extent of a templated subspan operation.
+		//
+		//   If Count is not dynamic_extent then SubspanExtent::value is Count,
+		//   otherwise, if Extent is not dynamic_extent, SubspanExtent::value is (Extent - Offset),
+		//   otherwise, SubspanExtent::value is dynamic_extent.
+		//
+		template<size_t Extent, size_t Offset, size_t Count>
+		struct SubspanExtent : eastl::integral_constant<size_t, (Count != dynamic_extent ? Count : (Extent != dynamic_extent ? (Extent - Offset) : dynamic_extent))> {};
+	}
+
+	template <typename T, size_t Extent = eastl::dynamic_extent>
+	class span
+	{
+	public:
+		typedef T                                       element_type;
+		typedef remove_cv_t<T>                          value_type;
+		typedef eastl_size_t                            index_type;
+		typedef ptrdiff_t                               difference_type;
+		typedef T*                                      pointer;
+		typedef const T*                                const_pointer;
+		typedef T&                                      reference;
+		typedef const T&                                const_reference;
+		typedef T*                                      iterator;
+		typedef const T*                                const_iterator;
+		typedef eastl::reverse_iterator<iterator>       reverse_iterator;
+		typedef eastl::reverse_iterator<const_iterator> const_reverse_iterator;
+
+		static EA_CONSTEXPR size_t extent = Extent;
+
+		// constructors / destructor
+		EA_CONSTEXPR span() EA_NOEXCEPT = default;
+		EA_CONSTEXPR span(const span& other) EA_NOEXCEPT = default;
+		EA_CONSTEXPR span(pointer ptr, index_type count);
+		EA_CONSTEXPR span(pointer pBegin, pointer pEnd);
+		            ~span() EA_NOEXCEPT = default;
+
+		// copy-assignment operator
+		EA_CPP14_CONSTEXPR span& operator=(const span& other) EA_NOEXCEPT = default;
+
+		// conversion constructors for c-array and eastl::array
+		template <size_t N> EA_CONSTEXPR span(element_type (&arr)[N]) EA_NOEXCEPT;
+		template <size_t N> EA_CONSTEXPR span(eastl::array<value_type, N>& arr) EA_NOEXCEPT;
+		template <size_t N> EA_CONSTEXPR span(const eastl::array<value_type, N>& arr) EA_NOEXCEPT;
+
+		// SfinaeForGenericContainers
+		//
+		template <typename Container>
+		using SfinaeForGenericContainers =
+		    enable_if_t<!is_same_v<Container, span> && !is_same_v<Container, array<value_type>> &&
+		                !is_array_v<Container> &&
+		                Internal::HasSizeAndData<Container>::value &&
+		                is_convertible_v<remove_pointer_t<decltype(eastl::data(eastl::declval<Container&>()))> (*)[], element_type (*)[]>>;
+
+		// generic container conversion constructors
+		template <typename Container, typename = SfinaeForGenericContainers<Container>>
+		EA_CONSTEXPR span(Container& cont);
+
+		template <typename Container, typename = SfinaeForGenericContainers<const Container>>
+		EA_CONSTEXPR span(const Container& cont);
+
+		template <typename U, size_t N, typename = enable_if_t<(Extent == eastl::dynamic_extent || N == Extent) && (is_convertible_v<U(*)[], element_type(*)[]>)>>
+		EA_CONSTEXPR span(const span<U, N>& s) EA_NOEXCEPT;
+
+		// subviews
+		template<size_t Count>
+		EA_CPP14_CONSTEXPR span<element_type, Count> first() const;
+		EA_CPP14_CONSTEXPR span<element_type, dynamic_extent> first(size_t Count) const;
+
+		template<size_t Count>
+		EA_CPP14_CONSTEXPR span<element_type, Count> last() const;
+		EA_CPP14_CONSTEXPR span<element_type, dynamic_extent> last(size_t Count) const;
+
+		template <size_t Offset, size_t Count = dynamic_extent>
+		EA_CONSTEXPR span<element_type, Internal::SubspanExtent<Extent, Offset, Count>::value> subspan() const;
+		EA_CONSTEXPR span<element_type, dynamic_extent> subspan(size_t Offset, size_t Count = dynamic_extent) const;
+
+		// observers
+		EA_CONSTEXPR pointer    data() const EA_NOEXCEPT;
+		EA_CONSTEXPR index_type size() const EA_NOEXCEPT;
+		EA_CONSTEXPR index_type size_bytes() const EA_NOEXCEPT;
+		EA_CONSTEXPR bool       empty() const EA_NOEXCEPT;
+
+		// subscript operators, element access
+		EA_CONSTEXPR reference front() const;
+		EA_CONSTEXPR reference back() const;
+		EA_CONSTEXPR reference operator[](index_type idx) const;
+		EA_CONSTEXPR reference operator()(index_type idx) const;
+
+		// iterator support
+		EA_CONSTEXPR iterator begin() const EA_NOEXCEPT;
+		EA_CONSTEXPR iterator end() const EA_NOEXCEPT;
+		EA_CONSTEXPR const_iterator cbegin() const EA_NOEXCEPT;
+		EA_CONSTEXPR const_iterator cend() const EA_NOEXCEPT;
+		EA_CONSTEXPR reverse_iterator rbegin() const EA_NOEXCEPT;
+		EA_CONSTEXPR reverse_iterator rend() const EA_NOEXCEPT;
+		EA_CONSTEXPR const_reverse_iterator crbegin() const EA_NOEXCEPT;
+		EA_CONSTEXPR const_reverse_iterator crend() const EA_NOEXCEPT;
+
+	private:
+		pointer mpData = nullptr;
+		index_type mnSize = 0;
+
+	private:
+		EA_CONSTEXPR bool bounds_check(size_t) const;  // utility used in asserts
+	};
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// template deduction guides 
+	///////////////////////////////////////////////////////////////////////////
+	#ifdef __cpp_deduction_guides
+		template<class T, size_t N> span(T (&)[N]) ->           span <T, N>;
+		template<class T, size_t N> span(array<T, N>&) ->       span <T, N>;
+		template<class T, size_t N> span(const array<T, N>&) -> span <const T, N>;
+		template<class Container>   span(Container&) ->         span <typename Container::value_type>;
+		template<class Container>   span(const Container&) ->   span <const typename Container::value_type>;
+	#endif
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// comparison operators
+	///////////////////////////////////////////////////////////////////////////
+
+	template <class T, size_t X, class U, size_t Y>
+	EA_CONSTEXPR bool operator==(span<T, X> l, span<U, Y> r)
+	{
+		return (l.size() == r.size()) && eastl::equal(l.begin(), l.end(), r.begin());
+	}
+
+	template <class T, size_t X, class U, size_t Y>
+	EA_CONSTEXPR bool operator<(span<T, X> l, span<U, Y> r)
+	{
+		return eastl::lexicographical_compare(l.begin(), l.end(), r.begin(), r.end());
+	}
+
+	template <class T, size_t X, class U, size_t Y>
+	EA_CONSTEXPR bool operator!=(span<T, X> l, span<U, Y> r) { return !(l == r); }
+
+	template <class T, size_t X, class U, size_t Y>
+	EA_CONSTEXPR bool operator<=(span<T, X> l, span<U, Y> r) { return !(r < l); }
+
+	template <class T, size_t X, class U, size_t Y>
+	EA_CONSTEXPR bool operator>(span<T, X> l, span<U, Y> r) { return r < l; }
+
+	template <class T, size_t X, class U, size_t Y>
+	EA_CONSTEXPR bool operator>=(span<T, X> l, span<U, Y> r) { return !(l < r); }
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// ctor implementations
+	///////////////////////////////////////////////////////////////////////////
+
+	template <typename T, size_t Extent>
+	EA_CONSTEXPR span<T, Extent>::span(pointer ptr, index_type size)
+	    : mpData(ptr), mnSize(size)
+	{
+	}
+
+	template <typename T, size_t Extent>
+	EA_CONSTEXPR span<T, Extent>::span(pointer pBegin, pointer pEnd)
+	    : mpData(pBegin), mnSize(static_cast<index_type>(pEnd - pBegin))
+	{
+	}
+
+	template <typename T, size_t Extent>
+	template <size_t N>
+	EA_CONSTEXPR span<T, Extent>::span(element_type(&arr)[N]) EA_NOEXCEPT 
+		: span(arr, static_cast<index_type>(N))
+	{
+	}
+
+	template <typename T, size_t Extent>
+	template <size_t N>
+	EA_CONSTEXPR span<T, Extent>::span(eastl::array<value_type, N> &arr) EA_NOEXCEPT 
+		: span(arr.data(), arr.size())
+	{
+	}
+
+	template <typename T, size_t Extent>
+	template <size_t N>
+	EA_CONSTEXPR span<T, Extent>::span(const eastl::array<value_type, N>& arr) EA_NOEXCEPT
+		: span(arr.data(), arr.size())
+	{
+	}
+
+
+	template <typename T, size_t Extent>
+	template <typename Container, typename>
+	EA_CONSTEXPR span<T, Extent>::span(Container& cont)
+		: span(static_cast<pointer>(eastl::data(cont)), static_cast<index_type>(eastl::size(cont)))
+	{
+	}
+
+	template <typename T, size_t Extent>
+	template <typename Container, typename>
+	EA_CONSTEXPR span<T, Extent>::span(const Container& cont)
+		: span(static_cast<pointer>(eastl::data(cont)), static_cast<index_type>(eastl::size(cont)))
+	{
+	}
+
+	template <typename T, size_t Extent>
+	template <typename U, size_t N, typename>
+	EA_CONSTEXPR span<T, Extent>::span(const span<U, N>& s) EA_NOEXCEPT
+		: span(s.data(), s.size())
+	{
+	}
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// member function implementations
+	///////////////////////////////////////////////////////////////////////////
+
+	template <typename T, size_t Extent>
+	EA_CONSTEXPR typename span<T, Extent>::pointer span<T, Extent>::data() const EA_NOEXCEPT
+	{
+		return mpData;
+	}
+
+	template <typename T, size_t Extent>
+	EA_CONSTEXPR typename span<T, Extent>::index_type span<T, Extent>::size() const EA_NOEXCEPT
+	{
+		return mnSize;
+	}
+
+	template <typename T, size_t Extent>
+	EA_CONSTEXPR typename span<T, Extent>::index_type span<T, Extent>::size_bytes() const EA_NOEXCEPT
+	{
+		return size() * sizeof(element_type);
+	}
+
+	template <typename T, size_t Extent>
+	EA_CONSTEXPR bool span<T, Extent>::empty() const EA_NOEXCEPT
+	{
+		return size() == 0;
+	}
+
+	template <typename T, size_t Extent>
+	EA_CONSTEXPR typename span<T, Extent>::reference span<T, Extent>::front() const
+	{
+		EASTL_ASSERT_MSG(!empty(), "undefined behavior accessing an empty span");
+
+		return mpData[0];
+	}
+
+	template <typename T, size_t Extent>
+	EA_CONSTEXPR typename span<T, Extent>::reference span<T, Extent>::back() const
+	{
+		EASTL_ASSERT_MSG(!empty(), "undefined behavior accessing an empty span");
+
+		return mpData[mnSize - 1];
+	}
+
+	template <typename T, size_t Extent>
+	EA_CONSTEXPR typename span<T, Extent>::reference span<T, Extent>::operator[](index_type idx) const
+	{
+		EASTL_ASSERT_MSG(!empty(),          "undefined behavior accessing an empty span");
+		EASTL_ASSERT_MSG(bounds_check(idx), "undefined behavior accessing out of bounds");
+
+		return mpData[idx];
+	}
+
+	template <typename T, size_t Extent>
+	EA_CONSTEXPR typename span<T, Extent>::reference span<T, Extent>::operator()(index_type idx) const
+	{
+		EASTL_ASSERT_MSG(!empty(),          "undefined behavior accessing an empty span");
+		EASTL_ASSERT_MSG(bounds_check(idx), "undefined behavior accessing out of bounds");
+
+		return mpData[idx];
+	}
+
+	template <typename T, size_t Extent>
+	EA_CONSTEXPR typename span<T, Extent>::iterator span<T, Extent>::begin() const EA_NOEXCEPT
+	{
+		return mpData;
+	}
+
+	template <typename T, size_t Extent>
+	EA_CONSTEXPR typename span<T, Extent>::iterator span<T, Extent>::end() const EA_NOEXCEPT
+	{
+		return mpData + mnSize;
+	}
+
+	template <typename T, size_t Extent>
+	EA_CONSTEXPR typename span<T, Extent>::const_iterator span<T, Extent>::cbegin() const EA_NOEXCEPT
+	{
+		return mpData;
+	}
+
+	template <typename T, size_t Extent>
+	EA_CONSTEXPR typename span<T, Extent>::const_iterator span<T, Extent>::cend() const EA_NOEXCEPT
+	{
+		return mpData + mnSize;
+	}
+
+	template <typename T, size_t Extent>
+	EA_CONSTEXPR typename span<T, Extent>::reverse_iterator span<T, Extent>::rbegin() const EA_NOEXCEPT
+	{
+		return reverse_iterator(mpData + mnSize);
+	}
+
+	template <typename T, size_t Extent>
+	EA_CONSTEXPR typename span<T, Extent>::reverse_iterator span<T, Extent>::rend() const EA_NOEXCEPT
+	{
+		return reverse_iterator(mpData);
+	}
+
+	template <typename T, size_t Extent>
+	EA_CONSTEXPR typename span<T, Extent>::const_reverse_iterator span<T, Extent>::crbegin() const EA_NOEXCEPT
+	{
+		return const_reverse_iterator(mpData + mnSize);
+	}
+
+	template <typename T, size_t Extent>
+	EA_CONSTEXPR typename span<T, Extent>::const_reverse_iterator span<T, Extent>::crend() const EA_NOEXCEPT
+	{
+		return const_reverse_iterator(mpData);
+	}
+
+	template <typename T, size_t Extent>
+	template <size_t Count>
+	EA_CPP14_CONSTEXPR span<typename span<T, Extent>::element_type, Count> span<T, Extent>::first() const
+	{
+		EASTL_ASSERT_MSG(bounds_check(Count), "undefined behavior accessing out of bounds");
+		return {data(), static_cast<index_type>(Count)};
+	}
+
+	template <typename T, size_t Extent>
+	EA_CPP14_CONSTEXPR span<typename span<T, Extent>::element_type, dynamic_extent> 
+	span<T, Extent>::first(size_t sz) const
+	{
+		EASTL_ASSERT_MSG(bounds_check(sz), "undefined behavior accessing out of bounds");
+		return {data(), static_cast<index_type>(sz)};
+	}
+
+	template <typename T, size_t Extent>
+	template <size_t Count>
+	EA_CPP14_CONSTEXPR span<typename span<T, Extent>::element_type, Count> span<T, Extent>::last() const
+	{
+		EASTL_ASSERT_MSG(bounds_check(Count), "undefined behavior accessing out of bounds");
+		return {data() + size() - Count, static_cast<index_type>(Count)};
+	}
+
+	template <typename T, size_t Extent>
+	EA_CPP14_CONSTEXPR span<typename span<T, Extent>::element_type, dynamic_extent> 
+	span<T, Extent>::last(size_t sz) const
+	{
+		EASTL_ASSERT_MSG(bounds_check(sz), "undefined behavior accessing out of bounds");
+		return {data() + size() - sz, static_cast<index_type>(sz)};
+	}
+
+	template <typename T, size_t Extent>
+	template <size_t Offset, size_t Count>
+	EA_CONSTEXPR span<typename span<T, Extent>::element_type, Internal::SubspanExtent<Extent, Offset, Count>::value>
+	span<T, Extent>::subspan() const
+	{
+		EASTL_ASSERT_MSG(bounds_check(Offset),                                  "undefined behaviour accessing out of bounds");
+		EASTL_ASSERT_MSG(Count == dynamic_extent || Count <= (size() - Offset), "undefined behaviour exceeding size of span");
+
+		return {data() + Offset, eastl_size_t(Count == dynamic_extent ? size() - Offset : Count)};
+	}
+
+	template <typename T, size_t Extent>
+	EA_CONSTEXPR span<typename span<T, Extent>::element_type, dynamic_extent>
+	span<T, Extent>::subspan(size_t offset, size_t count) const
+	{
+		EASTL_ASSERT_MSG(bounds_check(offset),                                  "undefined behaviour accessing out of bounds");
+		EASTL_ASSERT_MSG(count == dynamic_extent || count <= (size() - offset), "undefined behaviour exceeding size of span");
+
+		return {data() + offset, eastl_size_t(count == dynamic_extent ? size() - offset : count)};
+	}
+
+	template <typename T, size_t Extent>
+	EA_CONSTEXPR bool span<T, Extent>::bounds_check(size_t sz) const
+	{
+		return (sz >= 0 && sz < size());
+	}
+}
+
+#endif // EASTL_SPAN_H  
diff --git a/libkram/eastl/include/EASTL/stack.h b/libkram/eastl/include/EASTL/stack.h
new file mode 100644
index 00000000..3edd5f54
--- /dev/null
+++ b/libkram/eastl/include/EASTL/stack.h
@@ -0,0 +1,346 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// This file implements a stack that is just like the C++ std::stack adapter class.
+// The only significant difference is that the stack here provides a get_container
+// function to provide access to the underlying container.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_STACK_H
+#define EASTL_STACK_H
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/vector.h>
+#include <EASTL/initializer_list.h>
+#include <stddef.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+
+	/// EASTL_STACK_DEFAULT_NAME
+	///
+	/// Defines a default container name in the absence of a user-provided name.
+	///
+	#ifndef EASTL_STACK_DEFAULT_NAME
+		#define EASTL_STACK_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " stack" // Unless the user overrides something, this is "EASTL stack".
+	#endif
+
+	/// EASTL_STACK_DEFAULT_ALLOCATOR
+	///
+	#ifndef EASTL_STACK_DEFAULT_ALLOCATOR
+		#define EASTL_STACK_DEFAULT_ALLOCATOR allocator_type(EASTL_STACK_DEFAULT_NAME)
+	#endif
+
+
+
+	/// stack
+	///
+	/// stack is an adapter class provides a LIFO (last-in, first-out) interface
+	/// via wrapping a sequence that provides at least the following operations:
+	///     push_back
+	///     pop_back
+	///     back
+	///
+	/// In practice this means vector, deque, string, list, intrusive_list. 
+	///
+	template <typename T, typename Container = eastl::vector<T> >
+	class stack
+	{
+	public:
+		typedef stack<T, Container>                  this_type;
+		typedef          Container                   container_type;
+	  //typedef typename Container::allocator_type   allocator_type;  // We can't currently declare this because the container may be a type that doesn't have an allocator. 
+		typedef typename Container::value_type       value_type;
+		typedef typename Container::reference        reference;
+		typedef typename Container::const_reference  const_reference;
+		typedef typename Container::size_type        size_type;
+
+	public:               // We declare public so that global comparison operators can be implemented without adding an inline level and without tripping up GCC 2.x friend declaration failures. GCC (through at least v4.0) is poor at inlining and performance wins over correctness.
+		container_type c; // The C++ standard specifies that you declare a protected member variable of type Container called 'c'.
+
+	public:
+		stack();
+
+		// Allocator is templated here because we aren't allowed to infer the allocator_type from the Container, as some containers (e.g. array) don't 
+		// have allocators. For containers that don't have allocator types, you could use void or char as the Allocator template type.
+
+		template <class Allocator>                      
+		explicit stack(const Allocator& allocator, typename eastl::enable_if<eastl::uses_allocator<container_type, Allocator>::value>::type* = NULL)
+		  : c(allocator)
+		{
+		}    
+
+		template <class Allocator>
+		stack(const this_type& x, const Allocator& allocator, typename eastl::enable_if<eastl::uses_allocator<container_type, Allocator>::value>::type* = NULL)
+		  : c(x.c, allocator)
+		{
+		}
+
+		template <class Allocator>
+		stack(this_type&& x, const Allocator& allocator, typename eastl::enable_if<eastl::uses_allocator<container_type, Allocator>::value>::type* = NULL)
+		  : c(eastl::move(x.c), allocator)
+		{
+		}
+
+		explicit stack(const container_type& x);
+		explicit stack(container_type&& x);
+
+		// Additional C++11 support to consider:
+		//
+		// template <class Allocator>
+		// stack(const container_type& x, const Allocator& allocator);
+		//
+		// template <class Allocator>
+		// stack(container_type&& x, const Allocator& allocator);
+
+		stack(std::initializer_list<value_type> ilist); // The first item in the initializer list is pushed first. C++11 doesn't specify that std::stack has initializer list support.
+
+		bool      empty() const;
+		size_type size() const;
+
+		reference       top();
+		const_reference top() const;
+
+		void push(const value_type& value);
+		void push(value_type&& x);
+
+		template <class... Args> void emplace_back(Args&&... args); // backwards compatibility
+		template <class... Args> decltype(auto) emplace(Args&&... args);
+
+		void pop();
+
+		container_type&       get_container();
+		const container_type& get_container() const;
+
+		void swap(this_type& x) EA_NOEXCEPT_IF(eastl::is_nothrow_swappable<this_type::container_type>::value);
+
+		bool validate() const;
+
+	}; // class stack
+
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// stack
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T, typename Container>
+	inline stack<T, Container>::stack()
+		: c() // To consider: use c(EASTL_STACK_DEFAULT_ALLOCATOR) here, though that would add the requirement that the user supplied container support this.
+	{
+		// Empty
+	}
+
+
+	template <typename T, typename Container>
+	inline stack<T, Container>::stack(const Container& x)
+		: c(x)
+	{
+		// Empty
+	}
+
+
+	template <typename T, typename Container>
+	inline stack<T, Container>::stack(Container&& x)
+		: c(eastl::move(x))
+	{
+		// Empty
+	}
+
+
+	template <typename T, typename Container>
+	inline stack<T, Container>::stack(std::initializer_list<value_type> ilist)
+		: c() // We could alternatively use c(ilist) here, but that would require c to have an ilist constructor.
+	{
+		// Better solution but requires an insert function.
+		// c.insert(ilist.begin(), ilist.end());
+
+		// Possibly slower solution but doesn't require an insert function.
+		for(const auto& value : ilist)
+		{
+			c.push_back(value);
+		}
+	}
+
+	template <typename T, typename Container>
+	inline bool stack<T, Container>::empty() const
+	{
+		return c.empty();
+	}
+
+
+	template <typename T, typename Container>
+	inline typename stack<T, Container>::size_type
+	stack<T, Container>::size() const
+	{
+		return c.size();
+	}
+
+
+	template <typename T, typename Container>
+	inline typename stack<T, Container>::reference
+	stack<T, Container>::top()
+	{
+		return c.back();
+	}
+
+
+	template <typename T, typename Container>
+	inline typename stack<T, Container>::const_reference
+	stack<T, Container>::top() const
+	{
+		return c.back();
+	}
+
+
+	template <typename T, typename Container>
+	inline void stack<T, Container>::push(const value_type& value)
+	{
+		c.push_back(const_cast<value_type&>(value)); // const_cast so that intrusive_list can work. We may revisit this.
+	}
+
+
+	template <typename T, typename Container>
+	inline void stack<T, Container>::push(value_type&& x) 
+	{
+		c.push_back(eastl::move(x));
+	}
+
+
+	template <typename T, typename Container>
+	template <class... Args>
+	inline void stack<T, Container>::emplace_back(Args&&... args)
+	{
+		emplace(eastl::forward<Args>(args)...);
+	}
+
+
+	template <typename T, typename Container>
+	template <class... Args>
+	inline decltype(auto) stack<T, Container>::emplace(Args&&... args)
+	{
+		return c.emplace_back(eastl::forward<Args>(args)...);
+	}
+
+
+	template <typename T, typename Container>
+	inline void stack<T, Container>::pop()
+	{
+		c.pop_back();
+	}
+
+
+	template <typename T, typename Container>
+	inline typename stack<T, Container>::container_type&
+	stack<T, Container>::get_container()
+	{
+		return c;
+	}
+
+
+	template <typename T, typename Container>
+	inline const typename stack<T, Container>::container_type&
+	stack<T, Container>::get_container() const
+	{
+		return c;
+	}
+
+
+	template <typename T, typename Container>
+	void stack<T, Container>::swap(this_type& x) EA_NOEXCEPT_IF(eastl::is_nothrow_swappable<this_type::container_type>::value)
+	{
+		using eastl::swap;
+		swap(c, x.c);
+	}
+
+
+	template <typename T, typename Container>
+	bool stack<T, Container>::validate() const
+	{
+		return c.validate();
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// global operators
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T, typename Container>
+	inline bool operator==(const stack<T, Container>& a, const stack<T, Container>& b)
+	{
+		return (a.c == b.c);
+	}
+
+
+	template <typename T, typename Container>
+	inline bool operator!=(const stack<T, Container>& a, const stack<T, Container>& b)
+	{
+		return !(a.c == b.c);
+	}
+
+
+	template <typename T, typename Container>
+	inline bool operator<(const stack<T, Container>& a, const stack<T, Container>& b)
+	{
+		return (a.c < b.c);
+	}
+
+
+	template <typename T, typename Container>
+	inline bool operator>(const stack<T, Container>& a, const stack<T, Container>& b)
+	{
+		return (b.c < a.c);
+	}
+
+
+	template <typename T, typename Container>
+	inline bool operator<=(const stack<T, Container>& a, const stack<T, Container>& b)
+	{
+		return !(b.c < a.c);
+	}
+
+
+	template <typename T, typename Container>
+	inline bool operator>=(const stack<T, Container>& a, const stack<T, Container>& b)
+	{
+		return !(a.c < b.c);
+	}
+
+
+	template <typename T, typename Container>
+	inline void swap(stack<T, Container>& a, stack<T, Container>& b) EA_NOEXCEPT_IF((eastl::is_nothrow_swappable<typename stack<T, Container>::container_type>::value))
+	{
+		a.swap(b);
+	}
+
+
+} // namespace eastl
+
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/string.h b/libkram/eastl/include/EASTL/string.h
new file mode 100644
index 00000000..82816a42
--- /dev/null
+++ b/libkram/eastl/include/EASTL/string.h
@@ -0,0 +1,4100 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// Implements a basic_string class, much like the C++ std::basic_string.
+// The primary distinctions between basic_string and std::basic_string are:
+//    - basic_string has a few extension functions that allow for increased performance.
+//    - basic_string has a few extension functions that make use easier,
+//      such as a member sprintf function and member tolower/toupper functions.
+//    - basic_string supports debug memory naming natively.
+//    - basic_string is easier to read, debug, and visualize.
+//    - basic_string internally manually expands basic functions such as begin(),
+//      size(), etc. in order to improve debug performance and optimizer success.
+//    - basic_string is savvy to an environment that doesn't have exception handling,
+//      as is sometimes the case with console or embedded environments.
+//    - basic_string has less deeply nested function calls and allows the user to
+//      enable forced inlining in debug builds in order to reduce bloat.
+//    - basic_string doesn't use char traits. As a result, EASTL assumes that
+//      strings will hold characters and not exotic things like widgets. At the
+//      very least, basic_string assumes that the value_type is a POD.
+//    - basic_string::size_type is defined as eastl_size_t instead of size_t in
+//      order to save memory and run faster on 64 bit systems.
+//    - basic_string data is guaranteed to be contiguous.
+//    - basic_string data is guaranteed to be 0-terminated, and the c_str() function
+//      is guaranteed to return the same pointer as the data() which is guaranteed
+//      to be the same value as &string[0].
+//    - basic_string has a set_capacity() function which frees excess capacity.
+//      The only way to do this with std::basic_string is via the cryptic non-obvious
+//      trick of using: basic_string<char>(x).swap(x);
+//    - basic_string has a force_size() function, which unilaterally moves the string
+//      end position (mpEnd) to the given location. Useful for when the user writes
+//      into the string via some external means such as C strcpy or sprintf.
+//    - basic_string substr() deviates from the standard and returns a string with
+//		a copy of this->get_allocator()
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// Copy on Write (cow)
+//
+// This string implementation does not do copy on write (cow). This is by design,
+// as cow penalizes 95% of string uses for the benefit of only 5% of the uses
+// (these percentages are qualitative, not quantitative). The primary benefit of
+// cow is that it allows for the sharing of string data between two string objects.
+// Thus if you say this:
+//    string a("hello");
+//    string b(a);
+// the "hello" will be shared between a and b. If you then say this:
+//    a = "world";
+// then a will release its reference to "hello" and leave b with the only reference
+// to it. Normally this functionality is accomplished via reference counting and
+// with atomic operations or mutexes.
+//
+// The C++ standard does not say anything about basic_string and cow. However,
+// for a basic_string implementation to be standards-conforming, a number of
+// issues arise which dictate some things about how one would have to implement
+// a cow string. The discussion of these issues will not be rehashed here, as you
+// can read the references below for better detail than can be provided in the
+// space we have here. However, we can say that the C++ standard is sensible and
+// that anything we try to do here to allow for an efficient cow implementation
+// would result in a generally unacceptable string interface.
+//
+// The disadvantages of cow strings are:
+//    - A reference count needs to exist with the string, which increases string memory usage.
+//    - With thread safety, atomic operations and mutex locks are expensive, especially
+//      on weaker memory systems such as console gaming platforms.
+//    - All non-const string accessor functions need to do a sharing check then the
+//      first such check needs to detach the string. Similarly, all string assignments
+//      need to do a sharing check as well. If you access the string before doing an
+//      assignment, the assignment doesn't result in a shared string, because the string
+//      has already been detached.
+//    - String sharing doesn't happen the large majority of the time. In some cases,
+//      the total sum of the reference count memory can exceed any memory savings
+//      gained by the strings that share representations.
+//
+// The addition of a string_cow class is under consideration for this library.
+// There are conceivably some systems which have string usage patterns which would
+// benefit from cow sharing. Such functionality is best saved for a separate string
+// implementation so that the other string uses aren't penalized.
+//
+// References:
+//    This is a good starting HTML reference on the topic:
+//       http://www.gotw.ca/publications/optimizations.htm
+//    Here is a Usenet discussion on the topic:
+//       http://groups-beta.google.com/group/comp.lang.c++.moderated/browse_thread/thread/3dc6af5198d0bf7/886c8642cb06e03d
+//
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_STRING_H
+#define EASTL_STRING_H
+
+#include <EASTL/internal/config.h>
+#include <EASTL/allocator.h>
+#include <EASTL/iterator.h>
+#include <EASTL/algorithm.h>
+#include <EASTL/initializer_list.h>
+#include <EASTL/bonus/compressed_pair.h>
+
+EA_DISABLE_ALL_VC_WARNINGS()
+#include <stddef.h>             // size_t, ptrdiff_t, etc.
+#include <stdarg.h>             // vararg functionality.
+
+#include <stdlib.h>             // malloc, free.
+#include <stdio.h>              // snprintf, etc.
+#include <ctype.h>              // toupper, etc.
+
+EA_DISABLE_GCC_WARNING(-Wtype-limits)
+#include <wchar.h>
+EA_RESTORE_GCC_WARNING()
+
+#include <string.h> // strlen, etc.
+
+#if EASTL_EXCEPTIONS_ENABLED
+	#include <stdexcept> // std::out_of_range, std::length_error.
+#endif
+EA_RESTORE_ALL_VC_WARNINGS()
+
+
+// 4530 - C++ exception handler used, but unwind semantics are not enabled. Specify /EHsc
+// 4480 - nonstandard extension used: specifying underlying type for enum
+// 4571 - catch(...) semantics changed since Visual C++ 7.1; structured exceptions (SEH) are no longer caught.
+// 4267 - 'argument' : conversion from 'size_t' to 'const uint32_t', possible loss of data. This is a bogus warning resulting from a bug in VC++.
+// 4702 - unreachable code
+EA_DISABLE_VC_WARNING(4530 4480 4571 4267 4702);
+
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+#include <EASTL/internal/char_traits.h>
+#include <EASTL/string_view.h>
+
+///////////////////////////////////////////////////////////////////////////////
+// EASTL_STRING_EXPLICIT
+//
+// See EASTL_STRING_OPT_EXPLICIT_CTORS for documentation.
+//
+#if EASTL_STRING_OPT_EXPLICIT_CTORS
+	#define EASTL_STRING_EXPLICIT explicit
+#else
+	#define EASTL_STRING_EXPLICIT
+#endif
+///////////////////////////////////////////////////////////////////////////////
+
+
+///////////////////////////////////////////////////////////////////////////////
+// Vsnprintf
+//
+// The user is expected to supply these functions one way or another. Note that
+// these functions are expected to accept parameters as per the C99 standard.
+// These functions can deal with C99 standard return values or Microsoft non-standard
+// return values but act more efficiently if implemented via the C99 style.
+//
+// In the case of EASTL_EASTDC_VSNPRINTF == 1, the user is expected to either
+// link EAStdC or provide the functions below that act the same. In the case of
+// EASTL_EASTDC_VSNPRINTF == 0, the user is expected to provide the function
+// implementations, and may simply use C vsnprintf if desired, though it's not
+// completely portable between compilers.
+//
+#if EASTL_EASTDC_VSNPRINTF
+	namespace EA
+	{
+		namespace StdC
+		{
+			// Provided by the EAStdC package or by the user.
+			EASTL_EASTDC_API int Vsnprintf(char*  EA_RESTRICT pDestination, size_t n, const char*  EA_RESTRICT pFormat, va_list arguments);
+			EASTL_EASTDC_API int Vsnprintf(char16_t* EA_RESTRICT pDestination, size_t n, const char16_t* EA_RESTRICT pFormat, va_list arguments);
+			EASTL_EASTDC_API int Vsnprintf(char32_t* EA_RESTRICT pDestination, size_t n, const char32_t* EA_RESTRICT pFormat, va_list arguments);
+			#if EA_CHAR8_UNIQUE
+				EASTL_EASTDC_API int Vsnprintf(char8_t*  EA_RESTRICT pDestination, size_t n, const char8_t*  EA_RESTRICT pFormat, va_list arguments);
+			#endif
+			#if defined(EA_WCHAR_UNIQUE) && EA_WCHAR_UNIQUE
+				EASTL_EASTDC_API int Vsnprintf(wchar_t* EA_RESTRICT pDestination, size_t n, const wchar_t* EA_RESTRICT pFormat, va_list arguments);
+			#endif
+		}
+	}
+
+	namespace eastl
+	{
+		inline int Vsnprintf(char* EA_RESTRICT pDestination, size_t n, const char* EA_RESTRICT pFormat, va_list arguments)
+			{ return EA::StdC::Vsnprintf(pDestination, n, pFormat, arguments); }
+
+		inline int Vsnprintf(char16_t* EA_RESTRICT pDestination, size_t n, const char16_t* EA_RESTRICT pFormat, va_list arguments)
+			{ return EA::StdC::Vsnprintf(pDestination, n, pFormat, arguments); }
+
+		inline int Vsnprintf(char32_t* EA_RESTRICT pDestination, size_t n, const char32_t* EA_RESTRICT pFormat, va_list arguments)
+			{ return EA::StdC::Vsnprintf(pDestination, n, pFormat, arguments); }
+
+		#if EA_CHAR8_UNIQUE
+			inline int Vsnprintf(char8_t* EA_RESTRICT pDestination, size_t n, const char8_t* EA_RESTRICT pFormat, va_list arguments)
+				{ return EA::StdC::Vsnprintf((char*)pDestination, n, (const char*)pFormat, arguments); }
+		#endif
+
+		#if defined(EA_WCHAR_UNIQUE) && EA_WCHAR_UNIQUE
+			inline int Vsnprintf(wchar_t* EA_RESTRICT pDestination, size_t n, const wchar_t* EA_RESTRICT pFormat, va_list arguments)
+			{ return EA::StdC::Vsnprintf(pDestination, n, pFormat, arguments); }
+		#endif
+	}
+#else
+	// User-provided functions.
+	extern int Vsnprintf8 (char*  pDestination, size_t n, const char*  pFormat, va_list arguments);
+	extern int Vsnprintf16(char16_t* pDestination, size_t n, const char16_t* pFormat, va_list arguments);
+	extern int Vsnprintf32(char32_t* pDestination, size_t n, const char32_t* pFormat, va_list arguments);
+	#if EA_CHAR8_UNIQUE
+		extern int Vsnprintf8 (char8_t*  pDestination, size_t n, const char8_t*  pFormat, va_list arguments);
+	#endif
+	#if defined(EA_WCHAR_UNIQUE) && EA_WCHAR_UNIQUE
+		extern int VsnprintfW(wchar_t* pDestination, size_t n, const wchar_t* pFormat, va_list arguments);
+	#endif
+
+	namespace eastl
+	{
+		inline int Vsnprintf(char* pDestination, size_t n, const char* pFormat, va_list arguments)
+			{ return Vsnprintf8(pDestination, n, pFormat, arguments); }
+
+		inline int Vsnprintf(char16_t* pDestination, size_t n, const char16_t* pFormat, va_list arguments)
+			{ return Vsnprintf16(pDestination, n, pFormat, arguments); }
+
+		inline int Vsnprintf(char32_t* pDestination, size_t n, const char32_t* pFormat, va_list arguments)
+			{ return Vsnprintf32(pDestination, n, pFormat, arguments); }
+
+		#if EA_CHAR8_UNIQUE
+			inline int Vsnprintf(char8_t* pDestination, size_t n, const char8_t* pFormat, va_list arguments)
+				{ return Vsnprintf8(pDestination, n, pFormat, arguments); }
+		#endif
+
+		#if defined(EA_WCHAR_UNIQUE) && EA_WCHAR_UNIQUE
+			inline int Vsnprintf(wchar_t* pDestination, size_t n, const wchar_t* pFormat, va_list arguments)
+				{ return VsnprintfW(pDestination, n, pFormat, arguments); }
+		#endif
+	}
+#endif
+///////////////////////////////////////////////////////////////////////////////
+
+
+
+namespace eastl
+{
+
+	/// EASTL_BASIC_STRING_DEFAULT_NAME
+	///
+	/// Defines a default container name in the absence of a user-provided name.
+	///
+	#ifndef EASTL_BASIC_STRING_DEFAULT_NAME
+		#define EASTL_BASIC_STRING_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " basic_string" // Unless the user overrides something, this is "EASTL basic_string".
+	#endif
+
+
+	/// EASTL_BASIC_STRING_DEFAULT_ALLOCATOR
+	///
+	#ifndef EASTL_BASIC_STRING_DEFAULT_ALLOCATOR
+		#define EASTL_BASIC_STRING_DEFAULT_ALLOCATOR allocator_type(EASTL_BASIC_STRING_DEFAULT_NAME)
+	#endif
+
+
+	///////////////////////////////////////////////////////////////////////////////
+	/// basic_string
+	///
+	/// Implements a templated string class, somewhat like C++ std::basic_string.
+	///
+	/// Notes:
+	///     As of this writing, an insert of a string into itself necessarily
+	///     triggers a reallocation, even if there is enough capacity in self
+	///     to handle the increase in size. This is due to the slightly tricky
+	///     nature of the operation of modifying one's self with one's self,
+	///     and thus the source and destination are being modified during the
+	///     operation. It might be useful to rectify this to the extent possible.
+	///
+	///     Our usage of noexcept specifiers is a little different from the
+	///     requirements specified by std::basic_string in C++11. This is because
+	///     our allocators are instances and not types and thus can be non-equal
+	///     and result in exceptions during assignments that theoretically can't
+	///     occur with std containers.
+	///
+	template <typename T, typename Allocator = EASTLAllocatorType>
+	class basic_string
+	{
+	public:
+		typedef basic_string<T, Allocator>                      this_type;
+		typedef basic_string_view<T>                            view_type;
+		typedef T                                               value_type;
+		typedef T*                                              pointer;
+		typedef const T*                                        const_pointer;
+		typedef T&                                              reference;
+		typedef const T&                                        const_reference;
+		typedef T*                                              iterator;           // Maintainer note: We want to leave iterator defined as T* -- at least in release builds -- as this gives some algorithms an advantage that optimizers cannot get around.
+		typedef const T*                                        const_iterator;
+		typedef eastl::reverse_iterator<iterator>               reverse_iterator;
+		typedef eastl::reverse_iterator<const_iterator>         const_reverse_iterator;
+		typedef eastl_size_t                                    size_type;          // See config.h for the definition of eastl_size_t, which defaults to size_t.
+		typedef ptrdiff_t                                       difference_type;
+		typedef Allocator                                       allocator_type;
+
+	static const size_type npos     = (size_type)-1;      /// 'npos' means non-valid position or simply non-position.
+
+	public:
+		// CtorDoNotInitialize exists so that we can create a constructor that allocates but doesn't
+		// initialize and also doesn't collide with any other constructor declaration.
+		struct CtorDoNotInitialize{};
+
+		// CtorSprintf exists so that we can create a constructor that accepts printf-style
+		// arguments but also doesn't collide with any other constructor declaration.
+		struct CtorSprintf{};
+
+		// CtorConvert exists so that we can have a constructor that implements string encoding
+		// conversion, such as between UCS2 char16_t and UTF8 char8_t.
+		struct CtorConvert{};
+
+	protected:
+		// Masks used to determine if we are in SSO or Heap
+		#ifdef EA_SYSTEM_BIG_ENDIAN
+			// Big Endian use LSB, unless we want to reorder struct layouts on endianness, Bit is set when we are in Heap
+			static constexpr size_type kHeapMask = 0x1;
+			static constexpr size_type kSSOMask  = 0x1;
+		#else
+			// Little Endian use MSB
+			static constexpr size_type kHeapMask = ~(size_type(~size_type(0)) >> 1);
+			static constexpr size_type kSSOMask  = 0x80;
+		#endif
+
+	public:
+		#ifdef EA_SYSTEM_BIG_ENDIAN
+			static constexpr size_type kMaxSize = (~kHeapMask) >> 1;
+		#else
+			static constexpr size_type kMaxSize = ~kHeapMask;
+		#endif
+
+	protected:
+		// The view of memory when the string data is obtained from the allocator.
+		struct HeapLayout
+		{
+			value_type* mpBegin;  // Begin of string.
+			size_type mnSize;     // Size of the string. Number of characters currently in the string, not including the trailing '0'
+			size_type mnCapacity; // Capacity of the string. Number of characters string can hold, not including the trailing '0'
+		};
+
+		template <typename CharT, size_t = sizeof(CharT)>
+		struct SSOPadding
+		{
+			char padding[sizeof(CharT) - sizeof(char)];
+		};
+
+		template <typename CharT>
+		struct SSOPadding<CharT, 1>
+		{
+			// template specialization to remove the padding structure to avoid warnings on zero length arrays
+			// also, this allows us to take advantage of the empty-base-class optimization.
+		};
+
+		// The view of memory when the string data is able to store the string data locally (without a heap allocation).
+		struct SSOLayout
+		{
+			static constexpr size_type SSO_CAPACITY = (sizeof(HeapLayout) - sizeof(char)) / sizeof(value_type);
+
+			// mnSize must correspond to the last byte of HeapLayout.mnCapacity, so we don't want the compiler to insert
+			// padding after mnSize if sizeof(value_type) != 1; Also ensures both layouts are the same size.
+			struct SSOSize : SSOPadding<value_type>
+			{
+				char mnRemainingSize;
+			};
+
+			value_type mData[SSO_CAPACITY]; // Local buffer for string data.
+			SSOSize mRemainingSizeField;
+		};
+
+		// This view of memory is a utility structure for easy copying of the string data.
+		struct RawLayout
+		{
+			char mBuffer[sizeof(HeapLayout)];
+		};
+
+		static_assert(sizeof(SSOLayout)  == sizeof(HeapLayout), "heap and sso layout structures must be the same size");
+		static_assert(sizeof(HeapLayout) == sizeof(RawLayout),  "heap and raw layout structures must be the same size");
+
+		// This implements the 'short string optimization' or SSO. SSO reuses the existing storage of string class to
+		// hold string data short enough to fit therefore avoiding a heap allocation. The number of characters stored in
+		// the string SSO buffer is variable and depends on the string character width. This implementation favors a
+		// consistent string size than increasing the size of the string local data to accommodate a consistent number
+		// of characters despite character width.
+		struct Layout
+		{
+			union
+			{
+				HeapLayout heap;
+				SSOLayout sso;
+				RawLayout raw;
+			};
+
+			Layout()                                                  { ResetToSSO(); } // start as SSO by default
+			Layout(const Layout& other)                               { Copy(*this, other); }
+			Layout(Layout&& other)                                    { Move(*this, other); }
+			Layout& operator=(const Layout& other)                    { Copy(*this, other); return *this; }
+			Layout& operator=(Layout&& other)                         { Move(*this, other); return *this; }
+
+			// We are using Heap when the bit is set, easier to conceptualize checking IsHeap instead of IsSSO
+			inline bool IsHeap() const EA_NOEXCEPT                    { return !!(sso.mRemainingSizeField.mnRemainingSize & kSSOMask); }
+			inline bool IsSSO() const EA_NOEXCEPT                     { return !IsHeap(); }
+			inline value_type* SSOBufferPtr() EA_NOEXCEPT             { return sso.mData; }
+			inline const value_type* SSOBufferPtr() const EA_NOEXCEPT { return sso.mData; }
+
+			// Largest value for SSO.mnSize == 23, which has two LSB bits set, but on big-endian (BE)
+			// use least significant bit (LSB) to denote heap so shift.
+			inline size_type GetSSOSize() const EA_NOEXCEPT
+			{
+				#ifdef EA_SYSTEM_BIG_ENDIAN
+					return SSOLayout::SSO_CAPACITY - (sso.mRemainingSizeField.mnRemainingSize >> 2);
+				#else
+					return (SSOLayout::SSO_CAPACITY - sso.mRemainingSizeField.mnRemainingSize);
+				#endif
+			}
+			inline size_type GetHeapSize() const EA_NOEXCEPT { return heap.mnSize; }
+			inline size_type GetSize() const EA_NOEXCEPT     { return IsHeap() ? GetHeapSize() : GetSSOSize(); }
+
+			inline void SetSSOSize(size_type size) EA_NOEXCEPT
+			{
+				#ifdef EA_SYSTEM_BIG_ENDIAN
+					sso.mRemainingSizeField.mnRemainingSize = (char)((SSOLayout::SSO_CAPACITY - size) << 2);
+				#else
+					sso.mRemainingSizeField.mnRemainingSize = (char)(SSOLayout::SSO_CAPACITY - size);
+				#endif
+			}
+
+			inline void SetHeapSize(size_type size) EA_NOEXCEPT          { heap.mnSize = size; }
+			inline void SetSize(size_type size) EA_NOEXCEPT              { IsHeap() ? SetHeapSize(size) : SetSSOSize(size); }
+
+			inline size_type GetRemainingCapacity() const EA_NOEXCEPT    { return size_type(CapacityPtr() - EndPtr()); }
+
+			inline value_type* HeapBeginPtr() EA_NOEXCEPT                { return heap.mpBegin; };
+			inline const value_type* HeapBeginPtr() const EA_NOEXCEPT    { return heap.mpBegin; };
+
+			inline value_type* SSOBeginPtr() EA_NOEXCEPT                 { return sso.mData; }
+			inline const value_type* SSOBeginPtr() const EA_NOEXCEPT     { return sso.mData; }
+
+			inline value_type* BeginPtr() EA_NOEXCEPT                    { return IsHeap() ? HeapBeginPtr() : SSOBeginPtr(); }
+			inline const value_type* BeginPtr() const EA_NOEXCEPT        { return IsHeap() ? HeapBeginPtr() : SSOBeginPtr(); }
+
+			inline value_type* HeapEndPtr() EA_NOEXCEPT                  { return heap.mpBegin + heap.mnSize; }
+			inline const value_type* HeapEndPtr() const EA_NOEXCEPT      { return heap.mpBegin + heap.mnSize; }
+
+			inline value_type* SSOEndPtr() EA_NOEXCEPT                   { return sso.mData + GetSSOSize(); }
+			inline const value_type* SSOEndPtr() const EA_NOEXCEPT       { return sso.mData + GetSSOSize(); }
+
+			// Points to end of character stream, *ptr == '0'
+			inline value_type* EndPtr() EA_NOEXCEPT                      { return IsHeap() ? HeapEndPtr() : SSOEndPtr(); }
+			inline const value_type* EndPtr() const EA_NOEXCEPT          { return IsHeap() ? HeapEndPtr() : SSOEndPtr(); }
+
+			inline value_type* HeapCapacityPtr() EA_NOEXCEPT             { return heap.mpBegin + GetHeapCapacity(); }
+			inline const value_type* HeapCapacityPtr() const EA_NOEXCEPT { return heap.mpBegin + GetHeapCapacity(); }
+
+			inline value_type* SSOCapacityPtr() EA_NOEXCEPT               { return sso.mData + SSOLayout::SSO_CAPACITY; }
+			inline const value_type* SSOCapacityPtr() const EA_NOEXCEPT   { return sso.mData + SSOLayout::SSO_CAPACITY; }
+
+			// Points to end of the buffer at the terminating '0', *ptr == '0' <- only true when size() == capacity()
+			inline value_type* CapacityPtr() EA_NOEXCEPT                 { return IsHeap() ? HeapCapacityPtr() : SSOCapacityPtr(); }
+			inline const value_type* CapacityPtr() const EA_NOEXCEPT     { return IsHeap() ? HeapCapacityPtr() : SSOCapacityPtr(); }
+
+			inline void SetHeapBeginPtr(value_type* pBegin) EA_NOEXCEPT  { heap.mpBegin = pBegin; }
+
+			inline void SetHeapCapacity(size_type cap) EA_NOEXCEPT
+			{
+			#ifdef EA_SYSTEM_BIG_ENDIAN
+				heap.mnCapacity = (cap << 1) | kHeapMask;
+			#else
+				heap.mnCapacity = (cap | kHeapMask);
+			#endif
+			}
+
+			inline size_type GetHeapCapacity() const EA_NOEXCEPT
+			{
+			#ifdef EA_SYSTEM_BIG_ENDIAN
+				return (heap.mnCapacity >> 1);
+			#else
+				return (heap.mnCapacity & ~kHeapMask);
+			#endif
+			}
+
+			inline void Copy(Layout& dst, const Layout& src) EA_NOEXCEPT { dst.raw = src.raw; }
+			inline void Move(Layout& dst, Layout& src) EA_NOEXCEPT       { eastl::swap(dst.raw, src.raw); }
+			inline void Swap(Layout& a, Layout& b) EA_NOEXCEPT           { eastl::swap(a.raw, b.raw); }
+
+			inline void ResetToSSO() EA_NOEXCEPT { *SSOBeginPtr() = 0; SetSSOSize(0); }
+		};
+
+		eastl::compressed_pair<Layout, allocator_type> mPair;
+
+		inline Layout& internalLayout() EA_NOEXCEPT                        { return mPair.first(); }
+		inline const Layout& internalLayout() const EA_NOEXCEPT            { return mPair.first(); }
+		inline allocator_type& internalAllocator() EA_NOEXCEPT             { return mPair.second(); }
+		inline const allocator_type& internalAllocator() const EA_NOEXCEPT { return mPair.second(); }
+
+	public:
+		// Constructor, destructor
+		basic_string() EA_NOEXCEPT_IF(EA_NOEXCEPT_EXPR(EASTL_BASIC_STRING_DEFAULT_ALLOCATOR));
+		explicit basic_string(const allocator_type& allocator) EA_NOEXCEPT;
+		basic_string(const this_type& x, size_type position, size_type n = npos);
+		basic_string(const value_type* p, size_type n, const allocator_type& allocator = EASTL_BASIC_STRING_DEFAULT_ALLOCATOR);
+		EASTL_STRING_EXPLICIT basic_string(const value_type* p, const allocator_type& allocator = EASTL_BASIC_STRING_DEFAULT_ALLOCATOR);
+		basic_string(size_type n, value_type c, const allocator_type& allocator = EASTL_BASIC_STRING_DEFAULT_ALLOCATOR);
+		basic_string(const this_type& x);
+	    basic_string(const this_type& x, const allocator_type& allocator);
+		basic_string(const value_type* pBegin, const value_type* pEnd, const allocator_type& allocator = EASTL_BASIC_STRING_DEFAULT_ALLOCATOR);
+		basic_string(CtorDoNotInitialize, size_type n, const allocator_type& allocator = EASTL_BASIC_STRING_DEFAULT_ALLOCATOR);
+		basic_string(CtorSprintf, const value_type* pFormat, ...);
+		basic_string(std::initializer_list<value_type> init, const allocator_type& allocator = EASTL_BASIC_STRING_DEFAULT_ALLOCATOR);
+
+		basic_string(this_type&& x) EA_NOEXCEPT;
+		basic_string(this_type&& x, const allocator_type& allocator);
+
+		explicit basic_string(const view_type& sv, const allocator_type& allocator = EASTL_BASIC_STRING_DEFAULT_ALLOCATOR);
+		basic_string(const view_type& sv, size_type position, size_type n, const allocator_type& allocator = EASTL_BASIC_STRING_DEFAULT_ALLOCATOR);
+
+		template <typename OtherCharType>
+		basic_string(CtorConvert, const OtherCharType* p, const allocator_type& allocator = EASTL_BASIC_STRING_DEFAULT_ALLOCATOR);
+
+		template <typename OtherCharType>
+		basic_string(CtorConvert, const OtherCharType* p, size_type n, const allocator_type& allocator = EASTL_BASIC_STRING_DEFAULT_ALLOCATOR);
+
+		template <typename OtherStringType> // Unfortunately we need the CtorConvert here because otherwise this function would collide with the value_type* constructor.
+		basic_string(CtorConvert, const OtherStringType& x);
+
+	   ~basic_string();
+
+		// Allocator
+		const allocator_type& get_allocator() const EA_NOEXCEPT;
+		allocator_type&       get_allocator() EA_NOEXCEPT;
+		void                  set_allocator(const allocator_type& allocator);
+
+		// Implicit conversion operator
+		operator basic_string_view<T>() const EA_NOEXCEPT;
+
+		// Operator=
+		this_type& operator=(const this_type& x);
+		this_type& operator=(const value_type* p);
+		this_type& operator=(value_type c);
+		this_type& operator=(std::initializer_list<value_type> ilist);
+		this_type& operator=(view_type v);
+		this_type& operator=(this_type&& x); // TODO(c++17): noexcept(allocator_traits<Allocator>::propagate_on_container_move_assignment::value || allocator_traits<Allocator>::is_always_equal::value);
+
+		#if EASTL_OPERATOR_EQUALS_OTHER_ENABLED
+			this_type& operator=(value_type* p) { return operator=((const value_type*)p); } // We need this because otherwise the const value_type* version can collide with the const OtherStringType& version below.
+
+			template <typename OtherCharType>
+			this_type& operator=(const OtherCharType* p);
+
+			template <typename OtherStringType>
+			this_type& operator=(const OtherStringType& x);
+		#endif
+
+		void swap(this_type& x); // TODO(c++17): noexcept(allocator_traits<Allocator>::propagate_on_container_swap::value || allocator_traits<Allocator>::is_always_equal::value);
+
+		// Assignment operations
+		this_type& assign(const this_type& x);
+		this_type& assign(const this_type& x, size_type position, size_type n = npos);
+		this_type& assign(const value_type* p, size_type n);
+		this_type& assign(const value_type* p);
+		this_type& assign(size_type n, value_type c);
+		this_type& assign(const value_type* pBegin, const value_type* pEnd);
+		this_type& assign(this_type&& x); // TODO(c++17): noexcept(allocator_traits<Allocator>::propagate_on_container_move_assignment::value || allocator_traits<Allocator>::is_always_equal::value);
+		this_type& assign(std::initializer_list<value_type>);
+
+		template <typename OtherCharType>
+		this_type& assign_convert(const OtherCharType* p);
+
+		template <typename OtherCharType>
+		this_type& assign_convert(const OtherCharType* p, size_type n);
+
+		template <typename OtherStringType>
+		this_type& assign_convert(const OtherStringType& x);
+
+		// Iterators.
+		iterator       begin() EA_NOEXCEPT;                 // Expanded in source code as: mpBegin
+		const_iterator begin() const EA_NOEXCEPT;           // Expanded in source code as: mpBegin
+		const_iterator cbegin() const EA_NOEXCEPT;
+
+		iterator       end() EA_NOEXCEPT;                   // Expanded in source code as: mpEnd
+		const_iterator end() const EA_NOEXCEPT;             // Expanded in source code as: mpEnd
+		const_iterator cend() const EA_NOEXCEPT;
+
+		reverse_iterator       rbegin() EA_NOEXCEPT;
+		const_reverse_iterator rbegin() const EA_NOEXCEPT;
+		const_reverse_iterator crbegin() const EA_NOEXCEPT;
+
+		reverse_iterator       rend() EA_NOEXCEPT;
+		const_reverse_iterator rend() const EA_NOEXCEPT;
+		const_reverse_iterator crend() const EA_NOEXCEPT;
+
+
+		// Size-related functionality
+		bool      empty() const EA_NOEXCEPT;
+		size_type size() const EA_NOEXCEPT;
+		size_type length() const EA_NOEXCEPT;
+		size_type max_size() const EA_NOEXCEPT;
+		size_type capacity() const EA_NOEXCEPT;
+		void      resize(size_type n, value_type c);
+		void      resize(size_type n);
+		void      reserve(size_type = 0);
+		void      set_capacity(size_type n = npos); // Revises the capacity to the user-specified value. Resizes the container to match the capacity if the requested capacity n is less than the current size. If n == npos then the capacity is reallocated (if necessary) such that capacity == size.
+		void      force_size(size_type n);          // Unilaterally moves the string end position (mpEnd) to the given location. Useful for when the user writes into the string via some extenal means such as C strcpy or sprintf. This allows for more efficient use than using resize to achieve this.
+		void shrink_to_fit();
+
+		// Raw access
+		const value_type* data() const  EA_NOEXCEPT;
+		      value_type* data()        EA_NOEXCEPT;
+		const value_type* c_str() const EA_NOEXCEPT;
+
+		// Element access
+		reference       operator[](size_type n);
+		const_reference operator[](size_type n) const;
+		reference       at(size_type n);
+		const_reference at(size_type n) const;
+		reference       front();
+		const_reference front() const;
+		reference       back();
+		const_reference back() const;
+
+		// Append operations
+		this_type& operator+=(const this_type& x);
+		this_type& operator+=(const value_type* p);
+		this_type& operator+=(value_type c);
+
+		this_type& append(const this_type& x);
+		this_type& append(const this_type& x,  size_type position, size_type n = npos);
+		this_type& append(const value_type* p, size_type n);
+		this_type& append(const value_type* p);
+		this_type& append(size_type n, value_type c);
+		this_type& append(const value_type* pBegin, const value_type* pEnd);
+
+		this_type& append_sprintf_va_list(const value_type* pFormat, va_list arguments);
+		this_type& append_sprintf(const value_type* pFormat, ...);
+
+		template <typename OtherCharType>
+		this_type& append_convert(const OtherCharType* p);
+
+		template <typename OtherCharType>
+		this_type& append_convert(const OtherCharType* p, size_type n);
+
+		template <typename OtherStringType>
+		this_type& append_convert(const OtherStringType& x);
+
+		void push_back(value_type c);
+		void pop_back();
+
+		// Insertion operations
+		this_type& insert(size_type position, const this_type& x);
+		this_type& insert(size_type position, const this_type& x, size_type beg, size_type n);
+		this_type& insert(size_type position, const value_type* p, size_type n);
+		this_type& insert(size_type position, const value_type* p);
+		this_type& insert(size_type position, size_type n, value_type c);
+		iterator   insert(const_iterator p, value_type c);
+		iterator   insert(const_iterator p, size_type n, value_type c);
+		iterator   insert(const_iterator p, const value_type* pBegin, const value_type* pEnd);
+		iterator   insert(const_iterator p, std::initializer_list<value_type>);
+
+		// Erase operations
+		this_type&       erase(size_type position = 0, size_type n = npos);
+		iterator         erase(const_iterator p);
+		iterator         erase(const_iterator pBegin, const_iterator pEnd);
+		reverse_iterator erase(reverse_iterator position);
+		reverse_iterator erase(reverse_iterator first, reverse_iterator last);
+		void             clear() EA_NOEXCEPT;
+
+		// Detach memory
+		pointer detach() EA_NOEXCEPT;
+
+		// Replacement operations
+		this_type&  replace(size_type position, size_type n,  const this_type& x);
+		this_type&  replace(size_type pos1,     size_type n1, const this_type& x,  size_type pos2, size_type n2 = npos);
+		this_type&  replace(size_type position, size_type n1, const value_type* p, size_type n2);
+		this_type&  replace(size_type position, size_type n1, const value_type* p);
+		this_type&  replace(size_type position, size_type n1, size_type n2, value_type c);
+		this_type&  replace(const_iterator first, const_iterator last, const this_type& x);
+		this_type&  replace(const_iterator first, const_iterator last, const value_type* p, size_type n);
+		this_type&  replace(const_iterator first, const_iterator last, const value_type* p);
+		this_type&  replace(const_iterator first, const_iterator last, size_type n, value_type c);
+		this_type&  replace(const_iterator first, const_iterator last, const value_type* pBegin, const value_type* pEnd);
+		size_type   copy(value_type* p, size_type n, size_type position = 0) const;
+
+		// Find operations
+		size_type find(const this_type& x,  size_type position = 0) const EA_NOEXCEPT;
+		size_type find(const value_type* p, size_type position = 0) const;
+		size_type find(const value_type* p, size_type position, size_type n) const;
+		size_type find(value_type c, size_type position = 0) const EA_NOEXCEPT;
+
+		// Reverse find operations
+		size_type rfind(const this_type& x,  size_type position = npos) const EA_NOEXCEPT;
+		size_type rfind(const value_type* p, size_type position = npos) const;
+		size_type rfind(const value_type* p, size_type position, size_type n) const;
+		size_type rfind(value_type c, size_type position = npos) const EA_NOEXCEPT;
+
+		// Find first-of operations
+		size_type find_first_of(const this_type& x, size_type position = 0) const EA_NOEXCEPT;
+		size_type find_first_of(const value_type* p, size_type position = 0) const;
+		size_type find_first_of(const value_type* p, size_type position, size_type n) const;
+		size_type find_first_of(value_type c, size_type position = 0) const EA_NOEXCEPT;
+
+		// Find last-of operations
+		size_type find_last_of(const this_type& x, size_type position = npos) const EA_NOEXCEPT;
+		size_type find_last_of(const value_type* p, size_type position = npos) const;
+		size_type find_last_of(const value_type* p, size_type position, size_type n) const;
+		size_type find_last_of(value_type c, size_type position = npos) const EA_NOEXCEPT;
+
+		// Find first not-of operations
+		size_type find_first_not_of(const this_type& x, size_type position = 0) const EA_NOEXCEPT;
+		size_type find_first_not_of(const value_type* p, size_type position = 0) const;
+		size_type find_first_not_of(const value_type* p, size_type position, size_type n) const;
+		size_type find_first_not_of(value_type c, size_type position = 0) const EA_NOEXCEPT;
+
+		// Find last not-of operations
+		size_type find_last_not_of(const this_type& x,  size_type position = npos) const EA_NOEXCEPT;
+		size_type find_last_not_of(const value_type* p, size_type position = npos) const;
+		size_type find_last_not_of(const value_type* p, size_type position, size_type n) const;
+		size_type find_last_not_of(value_type c, size_type position = npos) const EA_NOEXCEPT;
+
+		// Substring functionality
+		this_type substr(size_type position = 0, size_type n = npos) const;
+
+		// Comparison operations
+		int        compare(const this_type& x) const EA_NOEXCEPT;
+		int        compare(size_type pos1, size_type n1, const this_type& x) const;
+		int        compare(size_type pos1, size_type n1, const this_type& x, size_type pos2, size_type n2) const;
+		int        compare(const value_type* p) const;
+		int        compare(size_type pos1, size_type n1, const value_type* p) const;
+		int        compare(size_type pos1, size_type n1, const value_type* p, size_type n2) const;
+		static int compare(const value_type* pBegin1, const value_type* pEnd1, const value_type* pBegin2, const value_type* pEnd2);
+
+		// Case-insensitive comparison functions. Not part of C++ this_type. Only ASCII-level locale functionality is supported. Thus this is not suitable for localization purposes.
+		int        comparei(const this_type& x) const EA_NOEXCEPT;
+		int        comparei(const value_type* p) const;
+		static int comparei(const value_type* pBegin1, const value_type* pEnd1, const value_type* pBegin2, const value_type* pEnd2);
+
+		// Misc functionality, not part of C++ this_type.
+		void         make_lower();
+		void         make_upper();
+		void         ltrim();
+		void         rtrim();
+		void         trim();
+		void         ltrim(const value_type* p);
+		void         rtrim(const value_type* p);
+		void         trim(const value_type* p);
+		this_type    left(size_type n) const;
+		this_type    right(size_type n) const;
+		this_type&   sprintf_va_list(const value_type* pFormat, va_list arguments);
+		this_type&   sprintf(const value_type* pFormat, ...);
+
+		bool validate() const EA_NOEXCEPT;
+		int  validate_iterator(const_iterator i) const EA_NOEXCEPT;
+
+
+	protected:
+		// Helper functions for initialization/insertion operations.
+		value_type* DoAllocate(size_type n);
+		void        DoFree(value_type* p, size_type n);
+		size_type   GetNewCapacity(size_type currentCapacity);
+		size_type   GetNewCapacity(size_type currentCapacity, size_type minimumGrowSize);
+		void        AllocateSelf();
+		void        AllocateSelf(size_type n);
+		void        DeallocateSelf();
+		iterator    InsertInternal(const_iterator p, value_type c);
+		void        RangeInitialize(const value_type* pBegin, const value_type* pEnd);
+		void        RangeInitialize(const value_type* pBegin);
+		void        SizeInitialize(size_type n, value_type c);
+
+		bool        IsSSO() const EA_NOEXCEPT;
+
+		void        ThrowLengthException() const;
+		void        ThrowRangeException() const;
+		void        ThrowInvalidArgumentException() const;
+
+		#if EASTL_OPERATOR_EQUALS_OTHER_ENABLED
+			template <typename CharType>
+			void DoAssignConvert(CharType c, true_type);
+
+			template <typename StringType>
+			void DoAssignConvert(const StringType& x, false_type);
+		#endif
+
+		// Replacements for STL template functions.
+		static const value_type* CharTypeStringFindEnd(const value_type* pBegin, const value_type* pEnd, value_type c);
+		static const value_type* CharTypeStringRFind(const value_type* pRBegin, const value_type* pREnd, const value_type c);
+		static const value_type* CharTypeStringSearch(const value_type* p1Begin, const value_type* p1End, const value_type* p2Begin, const value_type* p2End);
+		static const value_type* CharTypeStringRSearch(const value_type* p1Begin, const value_type* p1End, const value_type* p2Begin, const value_type* p2End);
+		static const value_type* CharTypeStringFindFirstOf(const value_type* p1Begin, const value_type* p1End, const value_type* p2Begin, const value_type* p2End);
+		static const value_type* CharTypeStringRFindFirstOf(const value_type* p1RBegin, const value_type* p1REnd, const value_type* p2Begin, const value_type* p2End);
+		static const value_type* CharTypeStringFindFirstNotOf(const value_type* p1Begin, const value_type* p1End, const value_type* p2Begin, const value_type* p2End);
+		static const value_type* CharTypeStringRFindFirstNotOf(const value_type* p1RBegin, const value_type* p1REnd, const value_type* p2Begin, const value_type* p2End);
+
+	}; // basic_string
+
+
+
+
+
+	///////////////////////////////////////////////////////////////////////////////
+	// basic_string
+	///////////////////////////////////////////////////////////////////////////////
+
+	template <typename T, typename Allocator>
+	inline basic_string<T, Allocator>::basic_string() EA_NOEXCEPT_IF(EA_NOEXCEPT_EXPR(EASTL_BASIC_STRING_DEFAULT_ALLOCATOR))
+	    : mPair(allocator_type(EASTL_BASIC_STRING_DEFAULT_NAME))
+	{
+		AllocateSelf();
+	}
+
+
+	template <typename T, typename Allocator>
+	inline basic_string<T, Allocator>::basic_string(const allocator_type& allocator) EA_NOEXCEPT
+	    : mPair(allocator)
+	{
+		AllocateSelf();
+	}
+
+
+	template <typename T, typename Allocator>
+	inline basic_string<T, Allocator>::basic_string(const this_type& x)
+	    : mPair(x.get_allocator())
+	{
+		RangeInitialize(x.internalLayout().BeginPtr(), x.internalLayout().EndPtr());
+	}
+
+
+	template <typename T, typename Allocator>
+	basic_string<T, Allocator>::basic_string(const this_type& x, const allocator_type& allocator)
+		: mPair(allocator)
+	{
+		RangeInitialize(x.internalLayout().BeginPtr(), x.internalLayout().EndPtr());
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename OtherStringType>
+	inline basic_string<T, Allocator>::basic_string(CtorConvert, const OtherStringType& x)
+	    : mPair(x.get_allocator())
+	{
+		AllocateSelf();
+		append_convert(x.c_str(), x.length());
+	}
+
+
+	template <typename T, typename Allocator>
+	basic_string<T, Allocator>::basic_string(const this_type& x, size_type position, size_type n)
+		: mPair(x.get_allocator())
+	{
+		#if EASTL_STRING_OPT_RANGE_ERRORS
+			if (EASTL_UNLIKELY(position > x.internalLayout().GetSize())) // 21.4.2 p4
+			{
+				ThrowRangeException();
+				AllocateSelf();
+			}
+			else
+				RangeInitialize(
+					x.internalLayout().BeginPtr() + position,
+					x.internalLayout().BeginPtr() + position + eastl::min_alt(n, x.internalLayout().GetSize() - position));
+        #else
+			RangeInitialize(
+				x.internalLayout().BeginPtr() + position,
+				x.internalLayout().BeginPtr() + position + eastl::min_alt(n, x.internalLayout().GetSize() - position));
+        #endif
+	}
+
+
+	template <typename T, typename Allocator>
+	inline basic_string<T, Allocator>::basic_string(const value_type* p, size_type n, const allocator_type& allocator)
+		: mPair(allocator)
+	{
+		RangeInitialize(p, p + n);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline basic_string<T, Allocator>::basic_string(const view_type& sv, const allocator_type& allocator)
+	    : basic_string(sv.data(), sv.size(), allocator)
+	{
+	}
+
+
+	template <typename T, typename Allocator>
+	inline basic_string<T, Allocator>::basic_string(const view_type& sv, size_type position, size_type n, const allocator_type& allocator)
+	    : basic_string(sv.substr(position, n), allocator)
+	{
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename OtherCharType>
+	inline basic_string<T, Allocator>::basic_string(CtorConvert, const OtherCharType* p, const allocator_type& allocator)
+		: mPair(allocator)
+	{
+		AllocateSelf();    // In this case we are converting from one string encoding to another, and we
+		append_convert(p); // implement this in the simplest way, by simply default-constructing and calling assign.
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename OtherCharType>
+	inline basic_string<T, Allocator>::basic_string(CtorConvert, const OtherCharType* p, size_type n, const allocator_type& allocator)
+		: mPair(allocator)
+	{
+		AllocateSelf();         // In this case we are converting from one string encoding to another, and we
+		append_convert(p, n);   // implement this in the simplest way, by simply default-constructing and calling assign.
+	}
+
+
+	template <typename T, typename Allocator>
+	inline basic_string<T, Allocator>::basic_string(const value_type* p, const allocator_type& allocator)
+		: mPair(allocator)
+	{
+		RangeInitialize(p);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline basic_string<T, Allocator>::basic_string(size_type n, value_type c, const allocator_type& allocator)
+		: mPair(allocator)
+	{
+		SizeInitialize(n, c);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline basic_string<T, Allocator>::basic_string(const value_type* pBegin, const value_type* pEnd, const allocator_type& allocator)
+		: mPair(allocator)
+	{
+		RangeInitialize(pBegin, pEnd);
+	}
+
+
+	// CtorDoNotInitialize exists so that we can create a version that allocates but doesn't
+	// initialize but also doesn't collide with any other constructor declaration.
+	template <typename T, typename Allocator>
+	basic_string<T, Allocator>::basic_string(CtorDoNotInitialize /*unused*/, size_type n, const allocator_type& allocator)
+		: mPair(allocator)
+	{
+		// Note that we do not call SizeInitialize here.
+		AllocateSelf(n);
+		internalLayout().SetSize(0);
+		*internalLayout().EndPtr() = 0;
+	}
+
+
+	// CtorSprintf exists so that we can create a version that does a variable argument
+	// sprintf but also doesn't collide with any other constructor declaration.
+	template <typename T, typename Allocator>
+	basic_string<T, Allocator>::basic_string(CtorSprintf /*unused*/, const value_type* pFormat, ...)
+		: mPair()
+	{
+		const size_type n = (size_type)CharStrlen(pFormat);
+		AllocateSelf(n);
+		internalLayout().SetSize(0);
+
+		va_list arguments;
+		va_start(arguments, pFormat);
+		append_sprintf_va_list(pFormat, arguments);
+		va_end(arguments);
+	}
+
+
+	template <typename T, typename Allocator>
+	basic_string<T, Allocator>::basic_string(std::initializer_list<value_type> init, const allocator_type& allocator)
+		: mPair(allocator)
+	{
+		RangeInitialize(init.begin(), init.end());
+	}
+
+
+	template <typename T, typename Allocator>
+	basic_string<T, Allocator>::basic_string(this_type&& x) EA_NOEXCEPT
+		: mPair(x.get_allocator())
+	{
+		internalLayout() = eastl::move(x.internalLayout());
+		x.AllocateSelf();
+	}
+
+
+	template <typename T, typename Allocator>
+	basic_string<T, Allocator>::basic_string(this_type&& x, const allocator_type& allocator)
+	: mPair(allocator)
+	{
+		if(get_allocator() == x.get_allocator()) // If we can borrow from x...
+		{
+			internalLayout() = eastl::move(x.internalLayout());
+			x.AllocateSelf();
+		}
+		else if(x.internalLayout().BeginPtr())
+		{
+			RangeInitialize(x.internalLayout().BeginPtr(), x.internalLayout().EndPtr());
+			// Let x destruct its own items.
+		}
+	}
+
+
+	template <typename T, typename Allocator>
+	inline basic_string<T, Allocator>::~basic_string()
+	{
+		DeallocateSelf();
+	}
+
+
+	template <typename T, typename Allocator>
+	inline const typename basic_string<T, Allocator>::allocator_type&
+	basic_string<T, Allocator>::get_allocator() const EA_NOEXCEPT
+	{
+		return internalAllocator();
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::allocator_type&
+	basic_string<T, Allocator>::get_allocator() EA_NOEXCEPT
+	{
+		return internalAllocator();
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void basic_string<T, Allocator>::set_allocator(const allocator_type& allocator)
+	{
+		get_allocator() = allocator;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline const typename basic_string<T, Allocator>::value_type*
+	basic_string<T, Allocator>::data()  const EA_NOEXCEPT
+	{
+		return internalLayout().BeginPtr();
+	}
+
+
+	template <typename T, typename Allocator>
+	inline const typename basic_string<T, Allocator>::value_type*
+	basic_string<T, Allocator>::c_str() const EA_NOEXCEPT
+	{
+		return internalLayout().BeginPtr();
+	}
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::value_type*
+	basic_string<T, Allocator>::data() EA_NOEXCEPT
+	{
+		return internalLayout().BeginPtr();
+	}
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::iterator
+	basic_string<T, Allocator>::begin() EA_NOEXCEPT
+	{
+		return internalLayout().BeginPtr();
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::iterator
+	basic_string<T, Allocator>::end() EA_NOEXCEPT
+	{
+		return internalLayout().EndPtr();
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::const_iterator
+	basic_string<T, Allocator>::begin() const EA_NOEXCEPT
+	{
+		return internalLayout().BeginPtr();
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::const_iterator
+	basic_string<T, Allocator>::cbegin() const EA_NOEXCEPT
+	{
+		return internalLayout().BeginPtr();
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::const_iterator
+	basic_string<T, Allocator>::end() const EA_NOEXCEPT
+	{
+		return internalLayout().EndPtr();
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::const_iterator
+	basic_string<T, Allocator>::cend() const EA_NOEXCEPT
+	{
+		return internalLayout().EndPtr();
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::reverse_iterator
+	basic_string<T, Allocator>::rbegin() EA_NOEXCEPT
+	{
+		return reverse_iterator(internalLayout().EndPtr());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::reverse_iterator
+	basic_string<T, Allocator>::rend() EA_NOEXCEPT
+	{
+		return reverse_iterator(internalLayout().BeginPtr());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::const_reverse_iterator
+	basic_string<T, Allocator>::rbegin() const EA_NOEXCEPT
+	{
+		return const_reverse_iterator(internalLayout().EndPtr());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::const_reverse_iterator
+	basic_string<T, Allocator>::crbegin() const EA_NOEXCEPT
+	{
+		return const_reverse_iterator(internalLayout().EndPtr());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::const_reverse_iterator
+	basic_string<T, Allocator>::rend() const EA_NOEXCEPT
+	{
+		return const_reverse_iterator(internalLayout().BeginPtr());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::const_reverse_iterator
+	basic_string<T, Allocator>::crend() const EA_NOEXCEPT
+	{
+		return const_reverse_iterator(internalLayout().BeginPtr());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline bool basic_string<T, Allocator>::empty() const EA_NOEXCEPT
+	{
+		return (internalLayout().GetSize() == 0);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline bool basic_string<T, Allocator>::IsSSO() const EA_NOEXCEPT
+	{
+		return internalLayout().IsSSO();
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::size_type
+	basic_string<T, Allocator>::size() const EA_NOEXCEPT
+	{
+		return internalLayout().GetSize();
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::size_type
+	basic_string<T, Allocator>::length() const EA_NOEXCEPT
+	{
+		return internalLayout().GetSize();
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::size_type
+	basic_string<T, Allocator>::max_size() const EA_NOEXCEPT
+	{
+		return kMaxSize;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::size_type
+	basic_string<T, Allocator>::capacity() const EA_NOEXCEPT
+	{
+		if (internalLayout().IsHeap())
+		{
+			return internalLayout().GetHeapCapacity();
+		}
+		return SSOLayout::SSO_CAPACITY;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::const_reference
+	basic_string<T, Allocator>::operator[](size_type n) const
+	{
+		#if EASTL_ASSERT_ENABLED // We allow the user to reference the trailing 0 char without asserting. Perhaps we shouldn't.
+			if(EASTL_UNLIKELY(n > internalLayout().GetSize()))
+				EASTL_FAIL_MSG("basic_string::operator[] -- out of range");
+		#endif
+
+		return internalLayout().BeginPtr()[n]; // Sometimes done as *(mpBegin + n)
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::reference
+	basic_string<T, Allocator>::operator[](size_type n)
+	{
+		#if EASTL_ASSERT_ENABLED // We allow the user to reference the trailing 0 char without asserting. Perhaps we shouldn't.
+			if(EASTL_UNLIKELY(n > internalLayout().GetSize()))
+				EASTL_FAIL_MSG("basic_string::operator[] -- out of range");
+		#endif
+
+		return internalLayout().BeginPtr()[n]; // Sometimes done as *(mpBegin + n)
+	}
+
+
+	template <typename T, typename Allocator>
+	basic_string<T,Allocator>::operator basic_string_view<T>() const EA_NOEXCEPT
+	{
+		return basic_string_view<T>(data(), size());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::this_type& basic_string<T, Allocator>::operator=(const this_type& x)
+	{
+		if(&x != this)
+		{
+			#if EASTL_ALLOCATOR_COPY_ENABLED
+				bool bSlowerPathwayRequired = (get_allocator() != x.get_allocator());
+			#else
+				bool bSlowerPathwayRequired = false;
+			#endif
+
+			if(bSlowerPathwayRequired)
+			{
+				set_capacity(0); // Must use set_capacity instead of clear because set_capacity frees our memory, unlike clear.
+
+				#if EASTL_ALLOCATOR_COPY_ENABLED
+					get_allocator() = x.get_allocator();
+				#endif
+			}
+
+			assign(x.internalLayout().BeginPtr(), x.internalLayout().EndPtr());
+		}
+		return *this;
+	}
+
+
+	#if EASTL_OPERATOR_EQUALS_OTHER_ENABLED
+		template <typename T, typename Allocator>
+		template <typename CharType>
+		inline void basic_string<T, Allocator>::DoAssignConvert(CharType c, true_type)
+		{
+			assign_convert(&c, 1); // Call this version of append because it will result in the encoding-converting append being used.
+		}
+
+
+		template <typename T, typename Allocator>
+		template <typename StringType>
+		inline void basic_string<T, Allocator>::DoAssignConvert(const StringType& x, false_type)
+		{
+			//if(&x != this) // Unnecessary because &x cannot possibly equal this.
+			{
+				#if EASTL_ALLOCATOR_COPY_ENABLED
+					get_allocator() = x.get_allocator();
+				#endif
+
+				assign_convert(x.c_str(), x.length());
+			}
+		}
+
+
+		template <typename T, typename Allocator>
+		template <typename OtherStringType>
+		inline typename basic_string<T, Allocator>::this_type& basic_string<T, Allocator>::operator=(const OtherStringType& x)
+		{
+			clear();
+			DoAssignConvert(x, is_integral<OtherStringType>());
+			return *this;
+		}
+
+
+		template <typename T, typename Allocator>
+		template <typename OtherCharType>
+		inline typename basic_string<T, Allocator>::this_type& basic_string<T, Allocator>::operator=(const OtherCharType* p)
+		{
+			return assign_convert(p);
+		}
+	#endif
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::this_type& basic_string<T, Allocator>::operator=(const value_type* p)
+	{
+		return assign(p, p + CharStrlen(p));
+	}
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::this_type& basic_string<T, Allocator>::operator=(value_type c)
+	{
+		return assign((size_type)1, c);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::this_type& basic_string<T, Allocator>::operator=(this_type&& x)
+	{
+		return assign(eastl::move(x));
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::this_type& basic_string<T, Allocator>::operator=(std::initializer_list<value_type> ilist)
+	{
+		return assign(ilist.begin(), ilist.end());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::this_type& basic_string<T, Allocator>::operator=(view_type v)
+	{
+		return assign(v.data(), static_cast<this_type::size_type>(v.size()));
+	}
+
+
+	template <typename T, typename Allocator>
+	void basic_string<T, Allocator>::resize(size_type n, value_type c)
+	{
+		const size_type s = internalLayout().GetSize();
+
+		if(n < s)
+			erase(internalLayout().BeginPtr() + n, internalLayout().EndPtr());
+		else if(n > s)
+			append(n - s, c);
+	}
+
+
+	template <typename T, typename Allocator>
+	void basic_string<T, Allocator>::resize(size_type n)
+	{
+		// C++ basic_string specifies that resize(n) is equivalent to resize(n, value_type()).
+		// For built-in types, value_type() is the same as zero (value_type(0)).
+		// We can improve the efficiency (especially for long strings) of this
+		// string class by resizing without assigning to anything.
+
+		const size_type s = internalLayout().GetSize();
+
+		if(n < s)
+			erase(internalLayout().BeginPtr() + n, internalLayout().EndPtr());
+		else if(n > s)
+		{
+			append(n - s, value_type());
+		}
+	}
+
+
+	template <typename T, typename Allocator>
+	void basic_string<T, Allocator>::reserve(size_type n)
+	{
+		#if EASTL_STRING_OPT_LENGTH_ERRORS
+			if(EASTL_UNLIKELY(n > max_size()))
+				ThrowLengthException();
+		#endif
+
+		// C++20 says if the passed in capacity is less than the current capacity we do not shrink
+		// If new_cap is less than or equal to the current capacity(), there is no effect.
+		// http://en.cppreference.com/w/cpp/string/basic_string/reserve
+
+		n = eastl::max_alt(n, internalLayout().GetSize()); // Calculate the new capacity, which needs to be >= container size.
+
+		if(n > capacity())
+			set_capacity(n);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void basic_string<T, Allocator>::shrink_to_fit()
+	{
+		set_capacity(internalLayout().GetSize());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void basic_string<T, Allocator>::set_capacity(size_type n)
+	{
+		if(n == npos)
+			// If the user wants to set the capacity to equal the current size...
+			// '-1' because we pretend that we didn't allocate memory for the terminating 0.
+			n = internalLayout().GetSize();
+		else if(n < internalLayout().GetSize())
+		{
+			internalLayout().SetSize(n);
+			*internalLayout().EndPtr() = 0;
+		}
+
+		if((n < capacity() && internalLayout().IsHeap()) || (n > capacity()))
+		{
+			// In here the string is transition from heap->heap, heap->sso or sso->heap
+
+			if(EASTL_LIKELY(n))
+			{
+
+				if(n <= SSOLayout::SSO_CAPACITY)
+				{
+					// heap->sso
+					// A heap based layout wants to reduce its size to within sso capacity
+					// An sso layout wanting to reduce its capacity will not get in here
+					pointer pOldBegin = internalLayout().BeginPtr();
+					const size_type nOldCap = internalLayout().GetHeapCapacity();
+
+					CharStringUninitializedCopy(pOldBegin, pOldBegin + n, internalLayout().SSOBeginPtr());
+					internalLayout().SetSSOSize(n);
+					*internalLayout().SSOEndPtr() = 0;
+
+					DoFree(pOldBegin, nOldCap + 1);
+
+					return;
+				}
+
+				pointer pNewBegin = DoAllocate(n + 1); // We need the + 1 to accomodate the trailing 0.
+				size_type nSavedSize = internalLayout().GetSize(); // save the size in case we transition from sso->heap
+
+				pointer pNewEnd = CharStringUninitializedCopy(internalLayout().BeginPtr(), internalLayout().EndPtr(), pNewBegin);
+				*pNewEnd = 0;
+
+				DeallocateSelf();
+
+				internalLayout().SetHeapBeginPtr(pNewBegin);
+				internalLayout().SetHeapCapacity(n);
+				internalLayout().SetHeapSize(nSavedSize);
+			}
+			else
+			{
+				DeallocateSelf();
+				AllocateSelf();
+			}
+		}
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void basic_string<T, Allocator>::force_size(size_type n)
+	{
+		#if EASTL_STRING_OPT_RANGE_ERRORS
+			if(EASTL_UNLIKELY(n > capacity()))
+				ThrowRangeException();
+		#elif EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY(n > capacity()))
+				EASTL_FAIL_MSG("basic_string::force_size -- out of range");
+		#endif
+
+		internalLayout().SetSize(n);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void basic_string<T, Allocator>::clear() EA_NOEXCEPT
+	{
+		internalLayout().SetSize(0);
+		*internalLayout().BeginPtr() = value_type(0);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::pointer
+	basic_string<T, Allocator>::detach() EA_NOEXCEPT
+	{
+		// The detach function is an extension function which simply forgets the
+		// owned pointer. It doesn't free it but rather assumes that the user
+		// does. If the string is utilizing the short-string-optimization when a
+		// detach is requested, a copy of the string into a seperate memory
+		// allocation occurs and the owning pointer is given to the user who is
+		// responsible for freeing the memory.
+
+		pointer pDetached = nullptr;
+
+		if (internalLayout().IsSSO())
+		{
+			const size_type n = internalLayout().GetSize() + 1; // +1' so that we have room for the terminating 0.
+			pDetached = DoAllocate(n);
+			pointer pNewEnd = CharStringUninitializedCopy(internalLayout().BeginPtr(), internalLayout().EndPtr(), pDetached);
+			*pNewEnd = 0;
+		}
+		else
+		{
+			pDetached = internalLayout().BeginPtr();
+		}
+
+		AllocateSelf(); // reset to string to empty
+		return pDetached;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::const_reference
+	basic_string<T, Allocator>::at(size_type n) const
+	{
+		#if EASTL_STRING_OPT_RANGE_ERRORS
+			if(EASTL_UNLIKELY(n >= internalLayout().GetSize()))
+				ThrowRangeException();
+		#elif EASTL_ASSERT_ENABLED                  // We assert if the user references the trailing 0 char.
+			if(EASTL_UNLIKELY(n >= internalLayout().GetSize()))
+				EASTL_FAIL_MSG("basic_string::at -- out of range");
+		#endif
+
+		return internalLayout().BeginPtr()[n];
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::reference
+	basic_string<T, Allocator>::at(size_type n)
+	{
+		#if EASTL_STRING_OPT_RANGE_ERRORS
+			if(EASTL_UNLIKELY(n >= internalLayout().GetSize()))
+				ThrowRangeException();
+		#elif EASTL_ASSERT_ENABLED                  // We assert if the user references the trailing 0 char.
+			if(EASTL_UNLIKELY(n >= internalLayout().GetSize()))
+				EASTL_FAIL_MSG("basic_string::at -- out of range");
+		#endif
+
+		return internalLayout().BeginPtr()[n];
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::reference
+	basic_string<T, Allocator>::front()
+	{
+		#if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED
+			if (EASTL_UNLIKELY(internalLayout().GetSize() <= 0)) // We assert if the user references the trailing 0 char.
+				EASTL_FAIL_MSG("basic_string::front -- empty string");
+		#else
+			// We allow the user to reference the trailing 0 char without asserting.
+		#endif
+
+		return *internalLayout().BeginPtr();
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::const_reference
+	basic_string<T, Allocator>::front() const
+	{
+		#if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED
+			if (EASTL_UNLIKELY(internalLayout().GetSize() <= 0)) // We assert if the user references the trailing 0 char.
+				EASTL_FAIL_MSG("basic_string::front -- empty string");
+		#else
+			// We allow the user to reference the trailing 0 char without asserting.
+		#endif
+
+		return *internalLayout().BeginPtr();
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::reference
+	basic_string<T, Allocator>::back()
+	{
+		#if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED
+			if (EASTL_UNLIKELY(internalLayout().GetSize() <= 0)) // We assert if the user references the trailing 0 char.
+				EASTL_FAIL_MSG("basic_string::back -- empty string");
+		#else
+			// We allow the user to reference the trailing 0 char without asserting.
+		#endif
+
+		return *(internalLayout().EndPtr() - 1);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::const_reference
+	basic_string<T, Allocator>::back() const
+	{
+		#if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED
+			if (EASTL_UNLIKELY(internalLayout().GetSize() <= 0)) // We assert if the user references the trailing 0 char.
+				EASTL_FAIL_MSG("basic_string::back -- empty string");
+		#else
+			// We allow the user to reference the trailing 0 char without asserting.
+		#endif
+
+		return *(internalLayout().EndPtr() - 1);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline basic_string<T, Allocator>& basic_string<T, Allocator>::operator+=(const this_type& x)
+	{
+		return append(x);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline basic_string<T, Allocator>& basic_string<T, Allocator>::operator+=(const value_type* p)
+	{
+		return append(p);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline basic_string<T, Allocator>& basic_string<T, Allocator>::operator+=(value_type c)
+	{
+		push_back(c);
+		return *this;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline basic_string<T, Allocator>& basic_string<T, Allocator>::append(const this_type& x)
+	{
+		return append(x.internalLayout().BeginPtr(), x.internalLayout().EndPtr());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline basic_string<T, Allocator>& basic_string<T, Allocator>::append(const this_type& x, size_type position, size_type n)
+	{
+		#if EASTL_STRING_OPT_RANGE_ERRORS
+			if(EASTL_UNLIKELY(position >= x.internalLayout().GetSize())) // position must be < x.mpEnd, but position + n may be > mpEnd.
+				ThrowRangeException();
+		#endif
+
+		    return append(x.internalLayout().BeginPtr() + position,
+				          x.internalLayout().BeginPtr() + position + eastl::min_alt(n, x.internalLayout().GetSize() - position));
+	}
+
+
+	template <typename T, typename Allocator>
+	inline basic_string<T, Allocator>& basic_string<T, Allocator>::append(const value_type* p, size_type n)
+	{
+		return append(p, p + n);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline basic_string<T, Allocator>& basic_string<T, Allocator>::append(const value_type* p)
+	{
+		return append(p, p + CharStrlen(p));
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename OtherCharType>
+	basic_string<T, Allocator>& basic_string<T, Allocator>::append_convert(const OtherCharType* pOther)
+	{
+		return append_convert(pOther, (size_type)CharStrlen(pOther));
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename OtherStringType>
+	basic_string<T, Allocator>& basic_string<T, Allocator>::append_convert(const OtherStringType& x)
+	{
+		return append_convert(x.c_str(), x.length());
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename OtherCharType>
+	basic_string<T, Allocator>& basic_string<T, Allocator>::append_convert(const OtherCharType* pOther, size_type n)
+	{
+		// Question: What do we do in the case that we have an illegally encoded source string?
+		// This can happen with UTF8 strings. Do we throw an exception or do we ignore the input?
+		// One argument is that it's not a string class' job to handle the security aspects of a
+		// program and the higher level application code should be verifying UTF8 string validity,
+		// and thus we should do the friendly thing and ignore the invalid characters as opposed
+		// to making the user of this function handle exceptions that are easily forgotten.
+
+		const size_t         kBufferSize = 512;
+		value_type           selfBuffer[kBufferSize];   // This assumes that value_type is one of char8_t, char16_t, char32_t, or wchar_t. Or more importantly, a type with a trivial constructor and destructor.
+		value_type* const    selfBufferEnd = selfBuffer + kBufferSize;
+		const OtherCharType* pOtherEnd = pOther + n;
+
+		while(pOther != pOtherEnd)
+		{
+			value_type* pSelfBufferCurrent = selfBuffer;
+			DecodePart(pOther, pOtherEnd, pSelfBufferCurrent, selfBufferEnd);   // Write pOther to pSelfBuffer, converting encoding as we go. We currently ignore the return value, as we don't yet have a plan for handling encoding errors.
+			append(selfBuffer, pSelfBufferCurrent);
+		}
+
+		return *this;
+	}
+
+
+	template <typename T, typename Allocator>
+	basic_string<T, Allocator>& basic_string<T, Allocator>::append(size_type n, value_type c)
+	{
+		if (n > 0)
+		{
+			const size_type nSize = internalLayout().GetSize();
+			const size_type nCapacity = capacity();
+
+			if((nSize + n) > nCapacity)
+				reserve(GetNewCapacity(nCapacity, (nSize + n) - nCapacity));
+
+			pointer pNewEnd = CharStringUninitializedFillN(internalLayout().EndPtr(), n, c);
+			*pNewEnd = 0;
+			internalLayout().SetSize(nSize + n);
+		}
+
+		return *this;
+	}
+
+
+	template <typename T, typename Allocator>
+	basic_string<T, Allocator>& basic_string<T, Allocator>::append(const value_type* pBegin, const value_type* pEnd)
+	{
+		if(pBegin != pEnd)
+		{
+			const size_type nOldSize  = internalLayout().GetSize();
+			const size_type n         = (size_type)(pEnd - pBegin);
+			const size_type nCapacity = capacity();
+			const size_type nNewSize = nOldSize + n;
+
+			if(nNewSize > nCapacity)
+			{
+				const size_type nLength = GetNewCapacity(nCapacity, nNewSize - nCapacity);
+
+				pointer pNewBegin = DoAllocate(nLength + 1);
+
+				pointer pNewEnd = CharStringUninitializedCopy(internalLayout().BeginPtr(), internalLayout().EndPtr(), pNewBegin);
+				pNewEnd         = CharStringUninitializedCopy(pBegin,  pEnd,  pNewEnd);
+			   *pNewEnd         = 0;
+
+				DeallocateSelf();
+				internalLayout().SetHeapBeginPtr(pNewBegin);
+				internalLayout().SetHeapCapacity(nLength);
+				internalLayout().SetHeapSize(nNewSize);
+			}
+			else
+			{
+				pointer pNewEnd = CharStringUninitializedCopy(pBegin, pEnd, internalLayout().EndPtr());
+				*pNewEnd = 0;
+				internalLayout().SetSize(nNewSize);
+			}
+		}
+
+		return *this;
+	}
+
+
+	template <typename T, typename Allocator>
+	basic_string<T, Allocator>& basic_string<T, Allocator>::append_sprintf_va_list(const value_type* pFormat, va_list arguments)
+	{
+		// From unofficial C89 extension documentation:
+		// The vsnprintf returns the number of characters written into the array,
+		// not counting the terminating null character, or a negative value
+		// if count or more characters are requested to be generated.
+		// An error can occur while converting a value for output.
+
+		// From the C99 standard:
+		// The vsnprintf function returns the number of characters that would have
+		// been written had n been sufficiently large, not counting the terminating
+		// null character, or a negative value if an encoding error occurred.
+		// Thus, the null-terminated output has been completely written if and only
+		// if the returned value is nonnegative and less than n.
+
+		// https://www.freebsd.org/cgi/man.cgi?query=vswprintf&sektion=3&manpath=freebsd-release-ports
+		// https://www.freebsd.org/cgi/man.cgi?query=snprintf&manpath=SuSE+Linux/i386+11.3
+		// Well its time to go on an adventure...
+		// C99 vsnprintf states that a buffer size of zero returns the number of characters that would
+		// be written to the buffer irrelevant of whether the buffer is a nullptr
+		// But C99 vswprintf for wchar_t changes the behaviour of the return to instead say that it
+		// "will fail if n or more wide characters were requested to be written", so
+		// calling vswprintf with a buffer size of zero always returns -1
+		// unless... you are MSVC where they deviate from the std and say if the buffer is NULL
+		// and the size is zero it will return the number of characters written or if we are using
+		// EAStdC which also does the sane behaviour.
+
+#if !EASTL_OPENSOURCE || defined(EA_PLATFORM_MICROSOFT)
+		size_type nInitialSize = internalLayout().GetSize();
+		int nReturnValue;
+
+		#if EASTL_VA_COPY_ENABLED
+			va_list argumentsSaved;
+			va_copy(argumentsSaved, arguments);
+		#endif
+
+		nReturnValue = eastl::Vsnprintf(nullptr, 0, pFormat, arguments);
+
+		if (nReturnValue > 0)
+		{
+			resize(nReturnValue + nInitialSize);
+
+		#if EASTL_VA_COPY_ENABLED
+			va_end(arguments);
+			va_copy(arguments, argumentsSaved);
+		#endif
+
+			nReturnValue = eastl::Vsnprintf(internalLayout().BeginPtr() + nInitialSize, static_cast<size_t>(nReturnValue) + 1, pFormat, arguments);
+		}
+
+		if (nReturnValue >= 0)
+			internalLayout().SetSize(nInitialSize + nReturnValue);
+
+		#if EASTL_VA_COPY_ENABLED
+			// va_end for arguments will be called by the caller.
+			va_end(argumentsSaved);
+		#endif
+
+#else
+		size_type nInitialSize = internalLayout().GetSize();
+		size_type nInitialRemainingCapacity = internalLayout().GetRemainingCapacity();
+		int       nReturnValue;
+
+		#if EASTL_VA_COPY_ENABLED
+			va_list argumentsSaved;
+			va_copy(argumentsSaved, arguments);
+		#endif
+
+		nReturnValue = eastl::Vsnprintf(internalLayout().EndPtr(), (size_t)nInitialRemainingCapacity + 1,
+										pFormat, arguments);
+
+		if(nReturnValue >= (int)(nInitialRemainingCapacity + 1))  // If there wasn't enough capacity...
+		{
+			// In this case we definitely have C99 Vsnprintf behaviour.
+		#if EASTL_VA_COPY_ENABLED
+			va_end(arguments);
+			va_copy(arguments, argumentsSaved);
+		#endif
+			resize(nInitialSize + nReturnValue);
+			nReturnValue = eastl::Vsnprintf(internalLayout().BeginPtr() + nInitialSize, (size_t)(nReturnValue + 1),
+											pFormat, arguments);
+		}
+		else if(nReturnValue < 0) // If vsnprintf is non-C99-standard
+		{
+			// In this case we either have C89 extension behaviour or C99 behaviour.
+			size_type n = eastl::max_alt((size_type)(SSOLayout::SSO_CAPACITY - 1), (size_type)(nInitialSize * 2));
+
+			for(; (nReturnValue < 0) && (n < 1000000); n *= 2)
+			{
+			#if EASTL_VA_COPY_ENABLED
+				va_end(arguments);
+				va_copy(arguments, argumentsSaved);
+			#endif
+				resize(n);
+
+				const size_t nCapacity = (size_t)(n - nInitialSize);
+				nReturnValue = eastl::Vsnprintf(internalLayout().BeginPtr() + nInitialSize, nCapacity + 1, pFormat, arguments);
+
+				if(nReturnValue == (int)(unsigned)nCapacity)
+				{
+					resize(++n);
+					nReturnValue = eastl::Vsnprintf(internalLayout().BeginPtr() + nInitialSize, nCapacity + 2, pFormat, arguments);
+				}
+			}
+		}
+
+		if(nReturnValue >= 0)
+			internalLayout().SetSize(nInitialSize + nReturnValue);
+
+		#if EASTL_VA_COPY_ENABLED
+			// va_end for arguments will be called by the caller.
+			va_end(argumentsSaved);
+		#endif
+
+#endif // EASTL_OPENSOURCE
+
+		return *this;
+	}
+
+	template <typename T, typename Allocator>
+	basic_string<T, Allocator>& basic_string<T, Allocator>::append_sprintf(const value_type* pFormat, ...)
+	{
+		va_list arguments;
+		va_start(arguments, pFormat);
+		append_sprintf_va_list(pFormat, arguments);
+		va_end(arguments);
+
+		return *this;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void basic_string<T, Allocator>::push_back(value_type c)
+	{
+		append((size_type)1, c);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void basic_string<T, Allocator>::pop_back()
+	{
+		#if EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY(internalLayout().GetSize() <= 0))
+				EASTL_FAIL_MSG("basic_string::pop_back -- empty string");
+		#endif
+
+		internalLayout().EndPtr()[-1] = value_type(0);
+		internalLayout().SetSize(internalLayout().GetSize() - 1);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline basic_string<T, Allocator>& basic_string<T, Allocator>::assign(const this_type& x)
+	{
+		// The C++11 Standard 21.4.6.3 p6 specifies that assign from this_type assigns contents only and not the allocator.
+		return assign(x.internalLayout().BeginPtr(), x.internalLayout().EndPtr());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline basic_string<T, Allocator>& basic_string<T, Allocator>::assign(const this_type& x, size_type position, size_type n)
+	{
+		#if EASTL_STRING_OPT_RANGE_ERRORS
+			if(EASTL_UNLIKELY(position > x.internalLayout().GetSize()))
+				ThrowRangeException();
+		#endif
+
+		// The C++11 Standard 21.4.6.3 p6 specifies that assign from this_type assigns contents only and not the allocator.
+		    return assign(
+		        x.internalLayout().BeginPtr() + position,
+		        x.internalLayout().BeginPtr() + position + eastl::min_alt(n, x.internalLayout().GetSize() - position));
+	}
+
+
+	template <typename T, typename Allocator>
+	inline basic_string<T, Allocator>& basic_string<T, Allocator>::assign(const value_type* p, size_type n)
+	{
+		return assign(p, p + n);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline basic_string<T, Allocator>& basic_string<T, Allocator>::assign(const value_type* p)
+	{
+		return assign(p, p + CharStrlen(p));
+	}
+
+
+	template <typename T, typename Allocator>
+	basic_string<T, Allocator>& basic_string<T, Allocator>::assign(size_type n, value_type c)
+	{
+		if(n <= internalLayout().GetSize())
+		{
+			CharTypeAssignN(internalLayout().BeginPtr(), n, c);
+			erase(internalLayout().BeginPtr() + n, internalLayout().EndPtr());
+		}
+		else
+		{
+			CharTypeAssignN(internalLayout().BeginPtr(), internalLayout().GetSize(), c);
+			append(n - internalLayout().GetSize(), c);
+		}
+		return *this;
+	}
+
+
+	template <typename T, typename Allocator>
+	basic_string<T, Allocator>& basic_string<T, Allocator>::assign(const value_type* pBegin, const value_type* pEnd)
+	{
+		const size_type n = (size_type)(pEnd - pBegin);
+		if(n <= internalLayout().GetSize())
+		{
+			memmove(internalLayout().BeginPtr(), pBegin, (size_t)n * sizeof(value_type));
+			erase(internalLayout().BeginPtr() + n, internalLayout().EndPtr());
+		}
+		else
+		{
+			memmove(internalLayout().BeginPtr(), pBegin, (size_t)(internalLayout().GetSize()) * sizeof(value_type));
+			append(pBegin + internalLayout().GetSize(), pEnd);
+		}
+		return *this;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline basic_string<T, Allocator>& basic_string<T, Allocator>::assign(std::initializer_list<value_type> ilist)
+	{
+		return assign(ilist.begin(), ilist.end());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline basic_string<T, Allocator>& basic_string<T, Allocator>::assign(this_type&& x)
+	{
+		if(get_allocator() == x.get_allocator())
+		{
+			eastl::swap(internalLayout(), x.internalLayout());
+		}
+		else
+			assign(x.internalLayout().BeginPtr(), x.internalLayout().EndPtr());
+
+		return *this;
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename OtherCharType>
+	basic_string<T, Allocator>& basic_string<T, Allocator>::assign_convert(const OtherCharType* p)
+	{
+		clear();
+		append_convert(p);
+		return *this;
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename OtherCharType>
+	basic_string<T, Allocator>& basic_string<T, Allocator>::assign_convert(const OtherCharType* p, size_type n)
+	{
+		clear();
+		append_convert(p, n);
+		return *this;
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename OtherStringType>
+	basic_string<T, Allocator>& basic_string<T, Allocator>::assign_convert(const OtherStringType& x)
+	{
+		clear();
+		append_convert(x.data(), x.length());
+		return *this;
+	}
+
+
+	template <typename T, typename Allocator>
+	basic_string<T, Allocator>& basic_string<T, Allocator>::insert(size_type position, const this_type& x)
+	{
+		#if EASTL_STRING_OPT_RANGE_ERRORS
+			if(EASTL_UNLIKELY(position > internalLayout().GetSize()))
+				ThrowRangeException();
+		#endif
+
+		#if EASTL_STRING_OPT_LENGTH_ERRORS
+			if(EASTL_UNLIKELY(internalLayout().GetSize() > (max_size() - x.internalLayout().GetSize())))
+				ThrowLengthException();
+		#endif
+
+		insert(internalLayout().BeginPtr() + position, x.internalLayout().BeginPtr(), x.internalLayout().EndPtr());
+		return *this;
+	}
+
+
+	template <typename T, typename Allocator>
+	basic_string<T, Allocator>& basic_string<T, Allocator>::insert(size_type position, const this_type& x, size_type beg, size_type n)
+	{
+		#if EASTL_STRING_OPT_RANGE_ERRORS
+			if(EASTL_UNLIKELY((position > internalLayout().GetSize()) || (beg > x.internalLayout().GetSize())))
+				ThrowRangeException();
+		#endif
+
+		size_type nLength = eastl::min_alt(n, x.internalLayout().GetSize() - beg);
+
+		#if EASTL_STRING_OPT_LENGTH_ERRORS
+			if(EASTL_UNLIKELY(internalLayout().GetSize() > (max_size() - nLength)))
+				ThrowLengthException();
+		#endif
+
+		insert(internalLayout().BeginPtr() + position, x.internalLayout().BeginPtr() + beg, x.internalLayout().BeginPtr() + beg + nLength);
+		return *this;
+	}
+
+
+	template <typename T, typename Allocator>
+	basic_string<T, Allocator>& basic_string<T, Allocator>::insert(size_type position, const value_type* p, size_type n)
+	{
+		#if EASTL_STRING_OPT_RANGE_ERRORS
+			if(EASTL_UNLIKELY(position > internalLayout().GetSize()))
+				ThrowRangeException();
+		#endif
+
+		#if EASTL_STRING_OPT_LENGTH_ERRORS
+			if(EASTL_UNLIKELY(internalLayout().GetSize() > (max_size() - n)))
+				ThrowLengthException();
+		#endif
+
+		insert(internalLayout().BeginPtr() + position, p, p + n);
+		return *this;
+	}
+
+
+	template <typename T, typename Allocator>
+	basic_string<T, Allocator>& basic_string<T, Allocator>::insert(size_type position, const value_type* p)
+	{
+		#if EASTL_STRING_OPT_RANGE_ERRORS
+			if(EASTL_UNLIKELY(position > internalLayout().GetSize()))
+				ThrowRangeException();
+		#endif
+
+		size_type nLength = (size_type)CharStrlen(p);
+
+		#if EASTL_STRING_OPT_LENGTH_ERRORS
+			if(EASTL_UNLIKELY(internalLayout().GetSize() > (max_size() - nLength)))
+				ThrowLengthException();
+		#endif
+
+		insert(internalLayout().BeginPtr() + position, p, p + nLength);
+		return *this;
+	}
+
+
+	template <typename T, typename Allocator>
+	basic_string<T, Allocator>& basic_string<T, Allocator>::insert(size_type position, size_type n, value_type c)
+	{
+		#if EASTL_STRING_OPT_RANGE_ERRORS
+			if(EASTL_UNLIKELY(position > internalLayout().GetSize()))
+				ThrowRangeException();
+		#endif
+
+		#if EASTL_STRING_OPT_LENGTH_ERRORS
+			if(EASTL_UNLIKELY(internalLayout().GetSize() > (max_size() - n)))
+				ThrowLengthException();
+		#endif
+
+		insert(internalLayout().BeginPtr() + position, n, c);
+		return *this;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::iterator
+	basic_string<T, Allocator>::insert(const_iterator p, value_type c)
+	{
+		if(p == internalLayout().EndPtr())
+		{
+			push_back(c);
+			return internalLayout().EndPtr() - 1;
+		}
+		return InsertInternal(p, c);
+	}
+
+
+	template <typename T, typename Allocator>
+	typename basic_string<T, Allocator>::iterator
+	basic_string<T, Allocator>::insert(const_iterator p, size_type n, value_type c)
+	{
+		const difference_type nPosition = (p - internalLayout().BeginPtr()); // Save this because we might reallocate.
+
+		#if EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY((p < internalLayout().BeginPtr()) || (p > internalLayout().EndPtr())))
+				EASTL_FAIL_MSG("basic_string::insert -- invalid position");
+		#endif
+
+		if(n) // If there is anything to insert...
+		{
+			if(internalLayout().GetRemainingCapacity() >= n) // If we have enough capacity...
+			{
+				const size_type nElementsAfter = (size_type)(internalLayout().EndPtr() - p);
+
+				if(nElementsAfter >= n) // If there's enough space for the new chars between the insert position and the end...
+				{
+					// Ensure we save the size before we do the copy, as we might overwrite the size field with the NULL
+					// terminator in the edge case where we are inserting enough characters to equal our capacity
+					const size_type nSavedSize = internalLayout().GetSize();
+					CharStringUninitializedCopy((internalLayout().EndPtr() - n) + 1, internalLayout().EndPtr() + 1, internalLayout().EndPtr() + 1);
+					internalLayout().SetSize(nSavedSize + n);
+					memmove(const_cast<value_type*>(p) + n, p, (size_t)((nElementsAfter - n) + 1) * sizeof(value_type));
+					CharTypeAssignN(const_cast<value_type*>(p), n, c);
+				}
+				else
+				{
+					pointer pOldEnd = internalLayout().EndPtr();
+					#if EASTL_EXCEPTIONS_ENABLED
+						const size_type nOldSize = internalLayout().GetSize();
+					#endif
+					CharStringUninitializedFillN(internalLayout().EndPtr() + 1, n - nElementsAfter - 1, c);
+					internalLayout().SetSize(internalLayout().GetSize() + (n - nElementsAfter));
+
+					#if EASTL_EXCEPTIONS_ENABLED
+						try
+						{
+					#endif
+							// See comment in if block above
+							const size_type nSavedSize = internalLayout().GetSize();
+							CharStringUninitializedCopy(p, pOldEnd + 1, internalLayout().EndPtr());
+							internalLayout().SetSize(nSavedSize + nElementsAfter);
+					#if EASTL_EXCEPTIONS_ENABLED
+						}
+						catch(...)
+						{
+							internalLayout().SetSize(nOldSize);
+							throw;
+						}
+					#endif
+
+					CharTypeAssignN(const_cast<value_type*>(p), nElementsAfter + 1, c);
+				}
+			}
+			else
+			{
+				const size_type nOldSize = internalLayout().GetSize();
+				const size_type nOldCap  = capacity();
+				const size_type nLength  = GetNewCapacity(nOldCap, (nOldSize + n) - nOldCap);
+
+				iterator pNewBegin = DoAllocate(nLength + 1);
+
+				iterator pNewEnd = CharStringUninitializedCopy(internalLayout().BeginPtr(), p, pNewBegin);
+				pNewEnd          = CharStringUninitializedFillN(pNewEnd, n, c);
+				pNewEnd          = CharStringUninitializedCopy(p, internalLayout().EndPtr(), pNewEnd);
+			   *pNewEnd          = 0;
+
+				DeallocateSelf();
+				internalLayout().SetHeapBeginPtr(pNewBegin);
+				internalLayout().SetHeapCapacity(nLength);
+				internalLayout().SetHeapSize(nOldSize + n);
+			}
+		}
+
+		return internalLayout().BeginPtr() + nPosition;
+	}
+
+
+	template <typename T, typename Allocator>
+	typename basic_string<T, Allocator>::iterator
+	basic_string<T, Allocator>::insert(const_iterator p, const value_type* pBegin, const value_type* pEnd)
+	{
+		const difference_type nPosition = (p - internalLayout().BeginPtr()); // Save this because we might reallocate.
+
+		#if EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY((p < internalLayout().BeginPtr()) || (p > internalLayout().EndPtr())))
+				EASTL_FAIL_MSG("basic_string::insert -- invalid position");
+		#endif
+
+		const size_type n = (size_type)(pEnd - pBegin);
+
+		if(n)
+		{
+			const bool bCapacityIsSufficient = (internalLayout().GetRemainingCapacity() >= n);
+			const bool bSourceIsFromSelf     = ((pEnd >= internalLayout().BeginPtr()) && (pBegin <= internalLayout().EndPtr()));
+
+			if(bSourceIsFromSelf && internalLayout().IsSSO())
+			{
+				// pBegin to pEnd will be <= this->GetSize(), so stackTemp will guaranteed be an SSO String
+				// If we are inserting ourself into ourself and we are SSO, then on the recursive call we can
+				// guarantee 0 or 1 allocation depending if we need to realloc
+				// We don't do this for Heap strings as then this path may do 1 or 2 allocations instead of
+				// only 1 allocation when we fall through to the last else case below
+				const this_type stackTemp(pBegin, pEnd, get_allocator());
+				return insert(p, stackTemp.data(), stackTemp.data() + stackTemp.size());
+			}
+
+			// If bSourceIsFromSelf is true, then we reallocate. This is because we are
+			// inserting ourself into ourself and thus both the source and destination
+			// be modified, making it rather tricky to attempt to do in place. The simplest
+			// resolution is to reallocate. To consider: there may be a way to implement this
+			// whereby we don't need to reallocate or can often avoid reallocating.
+			if(bCapacityIsSufficient && !bSourceIsFromSelf)
+			{
+				const size_type nElementsAfter = (size_type)(internalLayout().EndPtr() - p);
+
+				if(nElementsAfter >= n) // If there are enough characters between insert pos and end
+				{
+					// Ensure we save the size before we do the copy, as we might overwrite the size field with the NULL
+					// terminator in the edge case where we are inserting enough characters to equal our capacity
+					const size_type nSavedSize = internalLayout().GetSize();
+					CharStringUninitializedCopy((internalLayout().EndPtr() - n) + 1, internalLayout().EndPtr() + 1, internalLayout().EndPtr() + 1);
+					internalLayout().SetSize(nSavedSize + n);
+					memmove(const_cast<value_type*>(p) + n, p, (size_t)((nElementsAfter - n) + 1) * sizeof(value_type));
+					memmove(const_cast<value_type*>(p), pBegin, (size_t)(n) * sizeof(value_type));
+				}
+				else
+				{
+					pointer pOldEnd = internalLayout().EndPtr();
+					#if EASTL_EXCEPTIONS_ENABLED
+						const size_type nOldSize = internalLayout().GetSize();
+					#endif
+					const value_type* const pMid = pBegin + (nElementsAfter + 1);
+
+					CharStringUninitializedCopy(pMid, pEnd, internalLayout().EndPtr() + 1);
+					internalLayout().SetSize(internalLayout().GetSize() + (n - nElementsAfter));
+
+					#if EASTL_EXCEPTIONS_ENABLED
+						try
+						{
+					#endif
+							// See comment in if block above
+							const size_type nSavedSize = internalLayout().GetSize();
+							CharStringUninitializedCopy(p, pOldEnd + 1, internalLayout().EndPtr());
+							internalLayout().SetSize(nSavedSize + nElementsAfter);
+					#if EASTL_EXCEPTIONS_ENABLED
+						}
+						catch(...)
+						{
+							internalLayout().SetSize(nOldSize);
+							throw;
+						}
+					#endif
+
+					CharStringUninitializedCopy(pBegin, pMid, const_cast<value_type*>(p));
+				}
+			}
+			else // Else we need to reallocate to implement this.
+			{
+				const size_type nOldSize = internalLayout().GetSize();
+				const size_type nOldCap  = capacity();
+				size_type nLength;
+
+				if(bCapacityIsSufficient) // If bCapacityIsSufficient is true, then bSourceIsFromSelf must be true.
+					nLength = nOldSize + n;
+				else
+					nLength = GetNewCapacity(nOldCap, (nOldSize + n) - nOldCap);
+
+				pointer pNewBegin = DoAllocate(nLength + 1);
+
+				pointer pNewEnd = CharStringUninitializedCopy(internalLayout().BeginPtr(), p, pNewBegin);
+				pNewEnd         = CharStringUninitializedCopy(pBegin, pEnd, pNewEnd);
+				pNewEnd         = CharStringUninitializedCopy(p, internalLayout().EndPtr(), pNewEnd);
+			   *pNewEnd         = 0;
+
+				DeallocateSelf();
+				internalLayout().SetHeapBeginPtr(pNewBegin);
+				internalLayout().SetHeapCapacity(nLength);
+				internalLayout().SetHeapSize(nOldSize + n);
+			}
+		}
+
+		return internalLayout().BeginPtr() + nPosition;
+	}
+
+
+	template <typename T, typename Allocator>
+	typename basic_string<T, Allocator>::iterator
+	basic_string<T, Allocator>::insert(const_iterator p, std::initializer_list<value_type> ilist)
+	{
+		return insert(p, ilist.begin(), ilist.end());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline basic_string<T, Allocator>& basic_string<T, Allocator>::erase(size_type position, size_type n)
+	{
+		#if EASTL_STRING_OPT_RANGE_ERRORS
+			if(EASTL_UNLIKELY(position > internalLayout().GetSize()))
+				ThrowRangeException();
+		#endif
+
+		#if EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY(position > internalLayout().GetSize()))
+				EASTL_FAIL_MSG("basic_string::erase -- invalid position");
+		#endif
+
+		erase(internalLayout().BeginPtr() + position,
+			  internalLayout().BeginPtr() + position + eastl::min_alt(n, internalLayout().GetSize() - position));
+
+		return *this;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::iterator
+	basic_string<T, Allocator>::erase(const_iterator p)
+	{
+		#if EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY((p < internalLayout().BeginPtr()) || (p >= internalLayout().EndPtr())))
+				EASTL_FAIL_MSG("basic_string::erase -- invalid position");
+		#endif
+
+		memmove(const_cast<value_type*>(p), p + 1, (size_t)(internalLayout().EndPtr() - p) * sizeof(value_type));
+		internalLayout().SetSize(internalLayout().GetSize() - 1);
+		return const_cast<value_type*>(p);
+	}
+
+
+	template <typename T, typename Allocator>
+	typename basic_string<T, Allocator>::iterator
+	basic_string<T, Allocator>::erase(const_iterator pBegin, const_iterator pEnd)
+	{
+		#if EASTL_ASSERT_ENABLED
+			if (EASTL_UNLIKELY((pBegin < internalLayout().BeginPtr()) || (pBegin > internalLayout().EndPtr()) ||
+							   (pEnd < internalLayout().BeginPtr()) || (pEnd > internalLayout().EndPtr()) || (pEnd < pBegin)))
+			    EASTL_FAIL_MSG("basic_string::erase -- invalid position");
+		#endif
+
+		if(pBegin != pEnd)
+		{
+			memmove(const_cast<value_type*>(pBegin), pEnd, (size_t)((internalLayout().EndPtr() - pEnd) + 1) * sizeof(value_type));
+			const size_type n = (size_type)(pEnd - pBegin);
+			internalLayout().SetSize(internalLayout().GetSize() - n);
+		}
+		return const_cast<value_type*>(pBegin);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::reverse_iterator
+	basic_string<T, Allocator>::erase(reverse_iterator position)
+	{
+		return reverse_iterator(erase((++position).base()));
+	}
+
+
+	template <typename T, typename Allocator>
+	typename basic_string<T, Allocator>::reverse_iterator
+	basic_string<T, Allocator>::erase(reverse_iterator first, reverse_iterator last)
+	{
+		return reverse_iterator(erase((++last).base(), (++first).base()));
+	}
+
+
+	template <typename T, typename Allocator>
+	basic_string<T, Allocator>& basic_string<T, Allocator>::replace(size_type position, size_type n, const this_type& x)
+	{
+		#if EASTL_STRING_OPT_RANGE_ERRORS
+			if(EASTL_UNLIKELY(position > internalLayout().GetSize()))
+				ThrowRangeException();
+		#endif
+
+		const size_type nLength = eastl::min_alt(n, internalLayout().GetSize() - position);
+
+		#if EASTL_STRING_OPT_LENGTH_ERRORS
+			if(EASTL_UNLIKELY((internalLayout().GetSize() - nLength) >= (max_size() - x.internalLayout().GetSize())))
+				ThrowLengthException();
+		#endif
+
+		return replace(internalLayout().BeginPtr() + position, internalLayout().BeginPtr() + position + nLength, x.internalLayout().BeginPtr(), x.internalLayout().EndPtr());
+	}
+
+
+	template <typename T, typename Allocator>
+	basic_string<T, Allocator>& basic_string<T, Allocator>::replace(size_type pos1, size_type n1, const this_type& x, size_type pos2, size_type n2)
+	{
+		#if EASTL_STRING_OPT_RANGE_ERRORS
+		if(EASTL_UNLIKELY((pos1 > internalLayout().GetSize()) || (pos2 > x.internalLayout().GetSize())))
+				ThrowRangeException();
+		#endif
+
+		const size_type nLength1 = eastl::min_alt(n1, internalLayout().GetSize() - pos1);
+		const size_type nLength2 = eastl::min_alt(n2, x.internalLayout().GetSize() - pos2);
+
+		#if EASTL_STRING_OPT_LENGTH_ERRORS
+			if(EASTL_UNLIKELY((internalLayout().GetSize() - nLength1) >= (max_size() - nLength2)))
+				ThrowLengthException();
+		#endif
+
+		return replace(internalLayout().BeginPtr() + pos1, internalLayout().BeginPtr() + pos1 + nLength1, x.internalLayout().BeginPtr() + pos2, x.internalLayout().BeginPtr() + pos2 + nLength2);
+	}
+
+
+	template <typename T, typename Allocator>
+	basic_string<T, Allocator>& basic_string<T, Allocator>::replace(size_type position, size_type n1, const value_type* p, size_type n2)
+	{
+		#if EASTL_STRING_OPT_RANGE_ERRORS
+			if(EASTL_UNLIKELY(position > internalLayout().GetSize()))
+				ThrowRangeException();
+		#endif
+
+		const size_type nLength = eastl::min_alt(n1, internalLayout().GetSize() - position);
+
+		#if EASTL_STRING_OPT_LENGTH_ERRORS
+			if(EASTL_UNLIKELY((n2 > max_size()) || ((internalLayout().GetSize() - nLength) >= (max_size() - n2))))
+				ThrowLengthException();
+		#endif
+
+		return replace(internalLayout().BeginPtr() + position, internalLayout().BeginPtr() + position + nLength, p, p + n2);
+	}
+
+
+	template <typename T, typename Allocator>
+	basic_string<T, Allocator>& basic_string<T, Allocator>::replace(size_type position, size_type n1, const value_type* p)
+	{
+		#if EASTL_STRING_OPT_RANGE_ERRORS
+			if(EASTL_UNLIKELY(position > internalLayout().GetSize()))
+				ThrowRangeException();
+		#endif
+
+		const size_type nLength = eastl::min_alt(n1, internalLayout().GetSize() - position);
+
+		#if EASTL_STRING_OPT_LENGTH_ERRORS
+			const size_type n2 = (size_type)CharStrlen(p);
+			if(EASTL_UNLIKELY((n2 > max_size()) || ((internalLayout().GetSize() - nLength) >= (max_size() - n2))))
+				ThrowLengthException();
+		#endif
+
+		return replace(internalLayout().BeginPtr() + position, internalLayout().BeginPtr() + position + nLength, p, p + CharStrlen(p));
+	}
+
+
+	template <typename T, typename Allocator>
+	basic_string<T, Allocator>& basic_string<T, Allocator>::replace(size_type position, size_type n1, size_type n2, value_type c)
+	{
+		#if EASTL_STRING_OPT_RANGE_ERRORS
+			if(EASTL_UNLIKELY(position > internalLayout().GetSize()))
+				ThrowRangeException();
+		#endif
+
+		const size_type nLength = eastl::min_alt(n1, internalLayout().GetSize() - position);
+
+		#if EASTL_STRING_OPT_LENGTH_ERRORS
+			if(EASTL_UNLIKELY((n2 > max_size()) || (internalLayout().GetSize() - nLength) >= (max_size() - n2)))
+				ThrowLengthException();
+		#endif
+
+		return replace(internalLayout().BeginPtr() + position, internalLayout().BeginPtr() + position + nLength, n2, c);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline basic_string<T, Allocator>& basic_string<T, Allocator>::replace(const_iterator pBegin, const_iterator pEnd, const this_type& x)
+	{
+		return replace(pBegin, pEnd, x.internalLayout().BeginPtr(), x.internalLayout().EndPtr());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline basic_string<T, Allocator>& basic_string<T, Allocator>::replace(const_iterator pBegin, const_iterator pEnd, const value_type* p, size_type n)
+	{
+		return replace(pBegin, pEnd, p, p + n);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline basic_string<T, Allocator>& basic_string<T, Allocator>::replace(const_iterator pBegin, const_iterator pEnd, const value_type* p)
+	{
+		return replace(pBegin, pEnd, p, p + CharStrlen(p));
+	}
+
+
+	template <typename T, typename Allocator>
+	basic_string<T, Allocator>& basic_string<T, Allocator>::replace(const_iterator pBegin, const_iterator pEnd, size_type n, value_type c)
+	{
+		#if EASTL_ASSERT_ENABLED
+			if (EASTL_UNLIKELY((pBegin < internalLayout().BeginPtr()) || (pBegin > internalLayout().EndPtr()) ||
+							   (pEnd < internalLayout().BeginPtr()) || (pEnd > internalLayout().EndPtr()) || (pEnd < pBegin)))
+			    EASTL_FAIL_MSG("basic_string::replace -- invalid position");
+		#endif
+
+		const size_type nLength = static_cast<size_type>(pEnd - pBegin);
+
+		if(nLength >= n)
+		{
+			CharTypeAssignN(const_cast<value_type*>(pBegin), n, c);
+			erase(pBegin + n, pEnd);
+		}
+		else
+		{
+			CharTypeAssignN(const_cast<value_type*>(pBegin), nLength, c);
+			insert(pEnd, n - nLength, c);
+		}
+		return *this;
+	}
+
+
+	template <typename T, typename Allocator>
+	basic_string<T, Allocator>& basic_string<T, Allocator>::replace(const_iterator pBegin1, const_iterator pEnd1, const value_type* pBegin2, const value_type* pEnd2)
+	{
+		#if EASTL_ASSERT_ENABLED
+			if (EASTL_UNLIKELY((pBegin1 < internalLayout().BeginPtr()) || (pBegin1 > internalLayout().EndPtr()) ||
+							   (pEnd1 < internalLayout().BeginPtr()) || (pEnd1 > internalLayout().EndPtr()) || (pEnd1 < pBegin1)))
+			    EASTL_FAIL_MSG("basic_string::replace -- invalid position");
+		#endif
+
+		const size_type nLength1 = (size_type)(pEnd1 - pBegin1);
+		const size_type nLength2 = (size_type)(pEnd2 - pBegin2);
+
+		if(nLength1 >= nLength2) // If we have a non-expanding operation...
+		{
+			if((pBegin2 > pEnd1) || (pEnd2 <= pBegin1))  // If we have a non-overlapping operation...
+				memcpy(const_cast<value_type*>(pBegin1), pBegin2, (size_t)(pEnd2 - pBegin2) * sizeof(value_type));
+			else
+				memmove(const_cast<value_type*>(pBegin1), pBegin2, (size_t)(pEnd2 - pBegin2) * sizeof(value_type));
+			erase(pBegin1 + nLength2, pEnd1);
+		}
+		else // Else we are expanding.
+		{
+			if((pBegin2 > pEnd1) || (pEnd2 <= pBegin1)) // If we have a non-overlapping operation...
+			{
+				const value_type* const pMid2 = pBegin2 + nLength1;
+
+				if((pEnd2 <= pBegin1) || (pBegin2 > pEnd1))
+					memcpy(const_cast<value_type*>(pBegin1), pBegin2, (size_t)(pMid2 - pBegin2) * sizeof(value_type));
+				else
+					memmove(const_cast<value_type*>(pBegin1), pBegin2, (size_t)(pMid2 - pBegin2) * sizeof(value_type));
+				insert(pEnd1, pMid2, pEnd2);
+			}
+			else // else we have an overlapping operation.
+			{
+				// I can't think of any easy way of doing this without allocating temporary memory.
+				const size_type nOldSize     = internalLayout().GetSize();
+				const size_type nOldCap      = capacity();
+				const size_type nNewCapacity = GetNewCapacity(nOldCap, (nOldSize + (nLength2 - nLength1)) - nOldCap);
+
+				pointer pNewBegin = DoAllocate(nNewCapacity + 1);
+
+				pointer pNewEnd = CharStringUninitializedCopy(internalLayout().BeginPtr(), pBegin1, pNewBegin);
+				pNewEnd         = CharStringUninitializedCopy(pBegin2, pEnd2,   pNewEnd);
+				pNewEnd         = CharStringUninitializedCopy(pEnd1,   internalLayout().EndPtr(),   pNewEnd);
+			   *pNewEnd         = 0;
+
+				DeallocateSelf();
+				internalLayout().SetHeapBeginPtr(pNewBegin);
+				internalLayout().SetHeapCapacity(nNewCapacity);
+				internalLayout().SetHeapSize(nOldSize + (nLength2 - nLength1));
+			}
+		}
+		return *this;
+	}
+
+
+	template <typename T, typename Allocator>
+	typename basic_string<T, Allocator>::size_type
+	basic_string<T, Allocator>::copy(value_type* p, size_type n, size_type position) const
+	{
+		#if EASTL_STRING_OPT_RANGE_ERRORS
+			if(EASTL_UNLIKELY(position > internalLayout().GetSize()))
+				ThrowRangeException();
+		#endif
+
+		// C++ std says the effects of this function are as if calling char_traits::copy()
+		// thus the 'p' must not overlap *this string, so we can use memcpy
+		const size_type nLength = eastl::min_alt(n, internalLayout().GetSize() - position);
+		CharStringUninitializedCopy(internalLayout().BeginPtr() + position, internalLayout().BeginPtr() + position + nLength, p);
+		return nLength;
+	}
+
+
+	template <typename T, typename Allocator>
+	void basic_string<T, Allocator>::swap(this_type& x)
+	{
+		if(get_allocator() == x.get_allocator() || (internalLayout().IsSSO() && x.internalLayout().IsSSO())) // If allocators are equivalent...
+		{
+			// We leave mAllocator as-is.
+			eastl::swap(internalLayout(), x.internalLayout());
+		}
+		else // else swap the contents.
+		{
+			const this_type temp(*this); // Can't call eastl::swap because that would
+			*this = x;                   // itself call this member swap function.
+			x     = temp;
+		}
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::size_type
+	basic_string<T, Allocator>::find(const this_type& x, size_type position) const EA_NOEXCEPT
+	{
+		return find(x.internalLayout().BeginPtr(), position, x.internalLayout().GetSize());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::size_type
+	basic_string<T, Allocator>::find(const value_type* p, size_type position) const
+	{
+		return find(p, position, (size_type)CharStrlen(p));
+	}
+
+
+	template <typename T, typename Allocator>
+	typename basic_string<T, Allocator>::size_type
+	basic_string<T, Allocator>::find(const value_type* p, size_type position, size_type n) const
+	{
+		// It is not clear what the requirements are for position, but since the C++ standard
+		// appears to be silent it is assumed for now that position can be any value.
+		//#if EASTL_ASSERT_ENABLED
+		//    if(EASTL_UNLIKELY(position > (size_type)(mpEnd - mpBegin)))
+		//        EASTL_FAIL_MSG("basic_string::find -- invalid position");
+		//#endif
+
+		if(EASTL_LIKELY(((npos - n) >= position) && (position + n) <= internalLayout().GetSize())) // If the range is valid...
+		{
+			const value_type* const pTemp = eastl::search(internalLayout().BeginPtr() + position, internalLayout().EndPtr(), p, p + n);
+
+			if((pTemp != internalLayout().EndPtr()) || (n == 0))
+				return (size_type)(pTemp - internalLayout().BeginPtr());
+		}
+		return npos;
+	}
+
+
+	template <typename T, typename Allocator>
+	typename basic_string<T, Allocator>::size_type
+	basic_string<T, Allocator>::find(value_type c, size_type position) const EA_NOEXCEPT
+	{
+		// It is not clear what the requirements are for position, but since the C++ standard
+		// appears to be silent it is assumed for now that position can be any value.
+		//#if EASTL_ASSERT_ENABLED
+		//    if(EASTL_UNLIKELY(position > (size_type)(mpEnd - mpBegin)))
+		//        EASTL_FAIL_MSG("basic_string::find -- invalid position");
+		//#endif
+
+		if(EASTL_LIKELY(position < internalLayout().GetSize()))// If the position is valid...
+		{
+			const const_iterator pResult = eastl::find(internalLayout().BeginPtr() + position, internalLayout().EndPtr(), c);
+
+			if(pResult != internalLayout().EndPtr())
+				return (size_type)(pResult - internalLayout().BeginPtr());
+		}
+		return npos;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::size_type
+	basic_string<T, Allocator>::rfind(const this_type& x, size_type position) const EA_NOEXCEPT
+	{
+		return rfind(x.internalLayout().BeginPtr(), position, x.internalLayout().GetSize());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::size_type
+	basic_string<T, Allocator>::rfind(const value_type* p, size_type position) const
+	{
+		return rfind(p, position, (size_type)CharStrlen(p));
+	}
+
+
+	template <typename T, typename Allocator>
+	typename basic_string<T, Allocator>::size_type
+	basic_string<T, Allocator>::rfind(const value_type* p, size_type position, size_type n) const
+	{
+		// Disabled because it's not clear what values are valid for position.
+		// It is documented that npos is a valid value, though. We return npos and
+		// don't crash if postion is any invalid value.
+		//#if EASTL_ASSERT_ENABLED
+		//    if(EASTL_UNLIKELY((position != npos) && (position > (size_type)(mpEnd - mpBegin))))
+		//        EASTL_FAIL_MSG("basic_string::rfind -- invalid position");
+		//#endif
+
+		// Note that a search for a zero length string starting at position = end() returns end() and not npos.
+		// Note by Paul Pedriana: I am not sure how this should behave in the case of n == 0 and position > size.
+		// The standard seems to suggest that rfind doesn't act exactly the same as find in that input position
+		// can be > size and the return value can still be other than npos. Thus, if n == 0 then you can
+		// never return npos, unlike the case with find.
+		const size_type nLength = internalLayout().GetSize();
+
+		if(EASTL_LIKELY(n <= nLength))
+		{
+			if(EASTL_LIKELY(n))
+			{
+				const const_iterator pEnd    = internalLayout().BeginPtr() + eastl::min_alt(nLength - n, position) + n;
+				const const_iterator pResult = CharTypeStringRSearch(internalLayout().BeginPtr(), pEnd, p, p + n);
+
+				if(pResult != pEnd)
+					return (size_type)(pResult - internalLayout().BeginPtr());
+			}
+			else
+				return eastl::min_alt(nLength, position);
+		}
+		return npos;
+	}
+
+
+	template <typename T, typename Allocator>
+	typename basic_string<T, Allocator>::size_type
+	basic_string<T, Allocator>::rfind(value_type c, size_type position) const EA_NOEXCEPT
+	{
+		// If n is zero or position is >= size, we return npos.
+		const size_type nLength = internalLayout().GetSize();
+
+		if(EASTL_LIKELY(nLength))
+		{
+			const value_type* const pEnd    = internalLayout().BeginPtr() + eastl::min_alt(nLength - 1, position) + 1;
+			const value_type* const pResult = CharTypeStringRFind(pEnd, internalLayout().BeginPtr(), c);
+
+			if(pResult != internalLayout().BeginPtr())
+				return (size_type)((pResult - 1) - internalLayout().BeginPtr());
+		}
+		return npos;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::size_type
+	basic_string<T, Allocator>::find_first_of(const this_type& x, size_type position) const EA_NOEXCEPT
+	{
+		return find_first_of(x.internalLayout().BeginPtr(), position, x.internalLayout().GetSize());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::size_type
+	basic_string<T, Allocator>::find_first_of(const value_type* p, size_type position) const
+	{
+		return find_first_of(p, position, (size_type)CharStrlen(p));
+	}
+
+
+	template <typename T, typename Allocator>
+	typename basic_string<T, Allocator>::size_type
+	basic_string<T, Allocator>::find_first_of(const value_type* p, size_type position, size_type n) const
+	{
+		// If position is >= size, we return npos.
+		if(EASTL_LIKELY((position < internalLayout().GetSize())))
+		{
+			const value_type* const pBegin = internalLayout().BeginPtr() + position;
+			const const_iterator pResult   = CharTypeStringFindFirstOf(pBegin, internalLayout().EndPtr(), p, p + n);
+
+			if(pResult != internalLayout().EndPtr())
+				return (size_type)(pResult - internalLayout().BeginPtr());
+		}
+		return npos;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::size_type
+	basic_string<T, Allocator>::find_first_of(value_type c, size_type position) const EA_NOEXCEPT
+	{
+		return find(c, position);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::size_type
+	basic_string<T, Allocator>::find_last_of(const this_type& x, size_type position) const EA_NOEXCEPT
+	{
+		return find_last_of(x.internalLayout().BeginPtr(), position, x.internalLayout().GetSize());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::size_type
+	basic_string<T, Allocator>::find_last_of(const value_type* p, size_type position) const
+	{
+		return find_last_of(p, position, (size_type)CharStrlen(p));
+	}
+
+
+	template <typename T, typename Allocator>
+	typename basic_string<T, Allocator>::size_type
+	basic_string<T, Allocator>::find_last_of(const value_type* p, size_type position, size_type n) const
+	{
+		// If n is zero or position is >= size, we return npos.
+		const size_type nLength = internalLayout().GetSize();
+
+		if(EASTL_LIKELY(nLength))
+		{
+			const value_type* const pEnd    = internalLayout().BeginPtr() + eastl::min_alt(nLength - 1, position) + 1;
+			const value_type* const pResult = CharTypeStringRFindFirstOf(pEnd, internalLayout().BeginPtr(), p, p + n);
+
+			if(pResult != internalLayout().BeginPtr())
+				return (size_type)((pResult - 1) - internalLayout().BeginPtr());
+		}
+		return npos;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::size_type
+	basic_string<T, Allocator>::find_last_of(value_type c, size_type position) const EA_NOEXCEPT
+	{
+		return rfind(c, position);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::size_type
+	basic_string<T, Allocator>::find_first_not_of(const this_type& x, size_type position) const EA_NOEXCEPT
+	{
+		return find_first_not_of(x.internalLayout().BeginPtr(), position, x.internalLayout().GetSize());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::size_type
+	basic_string<T, Allocator>::find_first_not_of(const value_type* p, size_type position) const
+	{
+		return find_first_not_of(p, position, (size_type)CharStrlen(p));
+	}
+
+
+	template <typename T, typename Allocator>
+	typename basic_string<T, Allocator>::size_type
+	basic_string<T, Allocator>::find_first_not_of(const value_type* p, size_type position, size_type n) const
+	{
+		if(EASTL_LIKELY(position <= internalLayout().GetSize()))
+		{
+			const const_iterator pResult =
+			    CharTypeStringFindFirstNotOf(internalLayout().BeginPtr() + position, internalLayout().EndPtr(), p, p + n);
+
+			if(pResult != internalLayout().EndPtr())
+				return (size_type)(pResult - internalLayout().BeginPtr());
+		}
+		return npos;
+	}
+
+
+	template <typename T, typename Allocator>
+	typename basic_string<T, Allocator>::size_type
+	basic_string<T, Allocator>::find_first_not_of(value_type c, size_type position) const EA_NOEXCEPT
+	{
+		if(EASTL_LIKELY(position <= internalLayout().GetSize()))
+		{
+			// Todo: Possibly make a specialized version of CharTypeStringFindFirstNotOf(pBegin, pEnd, c).
+			const const_iterator pResult =
+			    CharTypeStringFindFirstNotOf(internalLayout().BeginPtr() + position, internalLayout().EndPtr(), &c, &c + 1);
+
+			if(pResult != internalLayout().EndPtr())
+				return (size_type)(pResult - internalLayout().BeginPtr());
+		}
+		return npos;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::size_type
+	basic_string<T, Allocator>::find_last_not_of(const this_type& x, size_type position) const EA_NOEXCEPT
+	{
+		return find_last_not_of(x.internalLayout().BeginPtr(), position, x.internalLayout().GetSize());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::size_type
+	basic_string<T, Allocator>::find_last_not_of(const value_type* p, size_type position) const
+	{
+		return find_last_not_of(p, position, (size_type)CharStrlen(p));
+	}
+
+
+	template <typename T, typename Allocator>
+	typename basic_string<T, Allocator>::size_type
+	basic_string<T, Allocator>::find_last_not_of(const value_type* p, size_type position, size_type n) const
+	{
+		const size_type nLength = internalLayout().GetSize();
+
+		if(EASTL_LIKELY(nLength))
+		{
+			const value_type* const pEnd    = internalLayout().BeginPtr() + eastl::min_alt(nLength - 1, position) + 1;
+			const value_type* const pResult = CharTypeStringRFindFirstNotOf(pEnd, internalLayout().BeginPtr(), p, p + n);
+
+			if(pResult != internalLayout().BeginPtr())
+				return (size_type)((pResult - 1) - internalLayout().BeginPtr());
+		}
+		return npos;
+	}
+
+
+	template <typename T, typename Allocator>
+	typename basic_string<T, Allocator>::size_type
+	basic_string<T, Allocator>::find_last_not_of(value_type c, size_type position) const EA_NOEXCEPT
+	{
+		const size_type nLength = internalLayout().GetSize();
+
+		if(EASTL_LIKELY(nLength))
+		{
+			// Todo: Possibly make a specialized version of CharTypeStringRFindFirstNotOf(pBegin, pEnd, c).
+			const value_type* const pEnd    = internalLayout().BeginPtr() + eastl::min_alt(nLength - 1, position) + 1;
+			const value_type* const pResult = CharTypeStringRFindFirstNotOf(pEnd, internalLayout().BeginPtr(), &c, &c + 1);
+
+			if(pResult != internalLayout().BeginPtr())
+				return (size_type)((pResult - 1) - internalLayout().BeginPtr());
+		}
+		return npos;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline basic_string<T, Allocator> basic_string<T, Allocator>::substr(size_type position, size_type n) const
+	{
+		#if EASTL_STRING_OPT_RANGE_ERRORS
+			if(EASTL_UNLIKELY(position > internalLayout().GetSize()))
+				ThrowRangeException();
+		#elif EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY(position > internalLayout().GetSize()))
+				EASTL_FAIL_MSG("basic_string::substr -- invalid position");
+		#endif
+
+			// C++ std says the return string allocator must be default constructed, not a copy of this->get_allocator()
+			return basic_string(
+				internalLayout().BeginPtr() + position,
+				internalLayout().BeginPtr() + position +
+					eastl::min_alt(n, internalLayout().GetSize() - position), get_allocator());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline int basic_string<T, Allocator>::compare(const this_type& x) const EA_NOEXCEPT
+	{
+		return compare(internalLayout().BeginPtr(), internalLayout().EndPtr(), x.internalLayout().BeginPtr(), x.internalLayout().EndPtr());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline int basic_string<T, Allocator>::compare(size_type pos1, size_type n1, const this_type& x) const
+	{
+		#if EASTL_STRING_OPT_RANGE_ERRORS
+			if(EASTL_UNLIKELY(pos1 > internalLayout().GetSize()))
+				ThrowRangeException();
+		#endif
+
+		return compare(
+			internalLayout().BeginPtr() + pos1,
+			internalLayout().BeginPtr() + pos1 + eastl::min_alt(n1, internalLayout().GetSize() - pos1),
+			x.internalLayout().BeginPtr(), x.internalLayout().EndPtr());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline int basic_string<T, Allocator>::compare(size_type pos1, size_type n1, const this_type& x, size_type pos2, size_type n2) const
+	{
+		#if EASTL_STRING_OPT_RANGE_ERRORS
+			if(EASTL_UNLIKELY((pos1 > (size_type)(internalLayout().EndPtr() - internalLayout().BeginPtr())) ||
+			                  (pos2 > (size_type)(x.internalLayout().EndPtr() - x.internalLayout().BeginPtr()))))
+				ThrowRangeException();
+		#endif
+
+		return compare(internalLayout().BeginPtr() + pos1,
+					   internalLayout().BeginPtr() + pos1 + eastl::min_alt(n1, internalLayout().GetSize() - pos1),
+					   x.internalLayout().BeginPtr() + pos2,
+					   x.internalLayout().BeginPtr() + pos2 + eastl::min_alt(n2, x.internalLayout().GetSize() - pos2));
+	}
+
+
+	template <typename T, typename Allocator>
+	inline int basic_string<T, Allocator>::compare(const value_type* p) const
+	{
+		return compare(internalLayout().BeginPtr(), internalLayout().EndPtr(), p, p + CharStrlen(p));
+	}
+
+
+	template <typename T, typename Allocator>
+	inline int basic_string<T, Allocator>::compare(size_type pos1, size_type n1, const value_type* p) const
+	{
+		#if EASTL_STRING_OPT_RANGE_ERRORS
+			if(EASTL_UNLIKELY(pos1 > internalLayout().GetSize()))
+				ThrowRangeException();
+		#endif
+
+		return compare(internalLayout().BeginPtr() + pos1,
+					   internalLayout().BeginPtr() + pos1 + eastl::min_alt(n1, internalLayout().GetSize() - pos1),
+					   p,
+					   p + CharStrlen(p));
+	}
+
+
+	template <typename T, typename Allocator>
+	inline int basic_string<T, Allocator>::compare(size_type pos1, size_type n1, const value_type* p, size_type n2) const
+	{
+		#if EASTL_STRING_OPT_RANGE_ERRORS
+			if(EASTL_UNLIKELY(pos1 > internalLayout().GetSize()))
+				ThrowRangeException();
+		#endif
+
+		return compare(internalLayout().BeginPtr() + pos1,
+					   internalLayout().BeginPtr() + pos1 + eastl::min_alt(n1, internalLayout().GetSize() - pos1),
+					   p,
+					   p + n2);
+	}
+
+
+	// make_lower
+	// This is a very simple ASCII-only case conversion function
+	// Anything more complicated should use a more powerful separate library.
+	template <typename T, typename Allocator>
+	inline void basic_string<T, Allocator>::make_lower()
+	{
+		for(pointer p = internalLayout().BeginPtr(); p < internalLayout().EndPtr(); ++p)
+			*p = (value_type)CharToLower(*p);
+	}
+
+
+	// make_upper
+	// This is a very simple ASCII-only case conversion function
+	// Anything more complicated should use a more powerful separate library.
+	template <typename T, typename Allocator>
+	inline void basic_string<T, Allocator>::make_upper()
+	{
+		for(pointer p = internalLayout().BeginPtr(); p < internalLayout().EndPtr(); ++p)
+			*p = (value_type)CharToUpper(*p);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void basic_string<T, Allocator>::ltrim()
+	{
+		const value_type array[] = { ' ', '\t', 0 }; // This is a pretty simplistic view of whitespace.
+		erase(0, find_first_not_of(array));
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void basic_string<T, Allocator>::rtrim()
+	{
+		const value_type array[] = { ' ', '\t', 0 }; // This is a pretty simplistic view of whitespace.
+		erase(find_last_not_of(array) + 1);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void basic_string<T, Allocator>::trim()
+	{
+		ltrim();
+		rtrim();
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void basic_string<T, Allocator>::ltrim(const value_type* p)
+	{
+		erase(0, find_first_not_of(p));
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void basic_string<T, Allocator>::rtrim(const value_type* p)
+	{
+		erase(find_last_not_of(p) + 1);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void basic_string<T, Allocator>::trim(const value_type* p)
+	{
+		ltrim(p);
+		rtrim(p);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline basic_string<T, Allocator> basic_string<T, Allocator>::left(size_type n) const
+	{
+		const size_type nLength = length();
+		if(n < nLength)
+			return substr(0, n);
+		// C++ std says that substr must return default constructed allocated, but we do not.
+		// Instead it is much more practical to provide the copy of the current allocator
+		return basic_string(*this, get_allocator());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline basic_string<T, Allocator> basic_string<T, Allocator>::right(size_type n) const
+	{
+		const size_type nLength = length();
+		if(n < nLength)
+			return substr(nLength - n, n);
+		// C++ std says that substr must return default constructed allocated, but we do not.
+		// Instead it is much more practical to provide the copy of the current allocator
+		return basic_string(*this, get_allocator());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline basic_string<T, Allocator>& basic_string<T, Allocator>::sprintf(const value_type* pFormat, ...)
+	{
+		va_list arguments;
+		va_start(arguments, pFormat);
+		internalLayout().SetSize(0); // Fast truncate to zero length.
+		append_sprintf_va_list(pFormat, arguments);
+		va_end(arguments);
+
+		return *this;
+	}
+
+
+	template <typename T, typename Allocator>
+	basic_string<T, Allocator>& basic_string<T, Allocator>::sprintf_va_list(const value_type* pFormat, va_list arguments)
+	{
+		internalLayout().SetSize(0); // Fast truncate to zero length.
+
+		return append_sprintf_va_list(pFormat, arguments);
+	}
+
+
+	template <typename T, typename Allocator>
+	int basic_string<T, Allocator>::compare(const value_type* pBegin1, const value_type* pEnd1,
+											const value_type* pBegin2, const value_type* pEnd2)
+	{
+		const difference_type n1   = pEnd1 - pBegin1;
+		const difference_type n2   = pEnd2 - pBegin2;
+		const difference_type nMin = eastl::min_alt(n1, n2);
+		const int       cmp  = Compare(pBegin1, pBegin2, (size_t)nMin);
+
+		return (cmp != 0 ? cmp : (n1 < n2 ? -1 : (n1 > n2 ? 1 : 0)));
+	}
+
+
+	template <typename T, typename Allocator>
+	int basic_string<T, Allocator>::comparei(const value_type* pBegin1, const value_type* pEnd1,
+											 const value_type* pBegin2, const value_type* pEnd2)
+	{
+		const difference_type n1   = pEnd1 - pBegin1;
+		const difference_type n2   = pEnd2 - pBegin2;
+		const difference_type nMin = eastl::min_alt(n1, n2);
+		const int       cmp  = CompareI(pBegin1, pBegin2, (size_t)nMin);
+
+		return (cmp != 0 ? cmp : (n1 < n2 ? -1 : (n1 > n2 ? 1 : 0)));
+	}
+
+
+	template <typename T, typename Allocator>
+	inline int basic_string<T, Allocator>::comparei(const this_type& x) const EA_NOEXCEPT
+	{
+		return comparei(internalLayout().BeginPtr(), internalLayout().EndPtr(), x.internalLayout().BeginPtr(), x.internalLayout().EndPtr());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline int basic_string<T, Allocator>::comparei(const value_type* p) const
+	{
+		return comparei(internalLayout().BeginPtr(), internalLayout().EndPtr(), p, p + CharStrlen(p));
+	}
+
+
+	template <typename T, typename Allocator>
+	typename basic_string<T, Allocator>::iterator
+	basic_string<T, Allocator>::InsertInternal(const_iterator p, value_type c)
+	{
+		iterator pNewPosition = const_cast<value_type*>(p);
+
+		if((internalLayout().EndPtr() + 1) <= internalLayout().CapacityPtr())
+		{
+			const size_type nSavedSize = internalLayout().GetSize();
+			memmove(const_cast<value_type*>(p) + 1, p, (size_t)(internalLayout().EndPtr() - p) * sizeof(value_type));
+			*(internalLayout().EndPtr() + 1) = 0;
+			*pNewPosition = c;
+			internalLayout().SetSize(nSavedSize + 1);
+		}
+		else
+		{
+			const size_type nOldSize = internalLayout().GetSize();
+			const size_type nOldCap  = capacity();
+			const size_type nLength = GetNewCapacity(nOldCap, 1);
+
+			iterator pNewBegin = DoAllocate(nLength + 1);
+
+			pNewPosition = CharStringUninitializedCopy(internalLayout().BeginPtr(), p, pNewBegin);
+		   *pNewPosition = c;
+
+			iterator pNewEnd = pNewPosition + 1;
+			pNewEnd          = CharStringUninitializedCopy(p, internalLayout().EndPtr(), pNewEnd);
+		   *pNewEnd          = 0;
+
+			DeallocateSelf();
+			internalLayout().SetHeapBeginPtr(pNewBegin);
+			internalLayout().SetHeapCapacity(nLength);
+			internalLayout().SetHeapSize(nOldSize + 1);
+		}
+		return pNewPosition;
+	}
+
+
+	template <typename T, typename Allocator>
+	void basic_string<T, Allocator>::SizeInitialize(size_type n, value_type c)
+	{
+		AllocateSelf(n);
+
+		CharStringUninitializedFillN(internalLayout().BeginPtr(), n, c);
+		*internalLayout().EndPtr() = 0;
+	}
+
+
+	template <typename T, typename Allocator>
+	void basic_string<T, Allocator>::RangeInitialize(const value_type* pBegin, const value_type* pEnd)
+	{
+		#if EASTL_STRING_OPT_ARGUMENT_ERRORS
+			if(EASTL_UNLIKELY(!pBegin && (pEnd < pBegin))) // 21.4.2 p7
+				ThrowInvalidArgumentException();
+		#endif
+
+		const size_type n = (size_type)(pEnd - pBegin);
+
+		AllocateSelf(n);
+
+		CharStringUninitializedCopy(pBegin, pEnd, internalLayout().BeginPtr());
+		*internalLayout().EndPtr() = 0;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void basic_string<T, Allocator>::RangeInitialize(const value_type* pBegin)
+	{
+		#if EASTL_STRING_OPT_ARGUMENT_ERRORS
+			if(EASTL_UNLIKELY(!pBegin))
+				ThrowInvalidArgumentException();
+		#endif
+
+		RangeInitialize(pBegin, pBegin + CharStrlen(pBegin));
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::value_type*
+	basic_string<T, Allocator>::DoAllocate(size_type n)
+	{
+		return (value_type*)EASTLAlloc(get_allocator(), n * sizeof(value_type));
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void basic_string<T, Allocator>::DoFree(value_type* p, size_type n)
+	{
+		if(p)
+			EASTLFree(get_allocator(), p, n * sizeof(value_type));
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::size_type
+	basic_string<T, Allocator>::GetNewCapacity(size_type currentCapacity)
+	{
+		return GetNewCapacity(currentCapacity, 1);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename basic_string<T, Allocator>::size_type
+	basic_string<T, Allocator>::GetNewCapacity(size_type currentCapacity, size_type minimumGrowSize)
+	{
+		#if EASTL_STRING_OPT_LENGTH_ERRORS
+			const size_type nRemainingSize = max_size() - currentCapacity;
+			if(EASTL_UNLIKELY((minimumGrowSize > nRemainingSize)))
+			{
+				ThrowLengthException();
+			}
+		#endif
+
+		const size_type nNewCapacity = eastl::max_alt(currentCapacity + minimumGrowSize, currentCapacity * 2);
+
+		return nNewCapacity;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void basic_string<T, Allocator>::AllocateSelf()
+	{
+		internalLayout().ResetToSSO();
+	}
+
+
+	template <typename T, typename Allocator>
+	void basic_string<T, Allocator>::AllocateSelf(size_type n)
+	{
+		#if EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY(n >= 0x40000000))
+				EASTL_FAIL_MSG("basic_string::AllocateSelf -- improbably large request.");
+		#endif
+
+		#if EASTL_STRING_OPT_LENGTH_ERRORS
+			if(EASTL_UNLIKELY(n > max_size()))
+				ThrowLengthException();
+		#endif
+
+		if(n > SSOLayout::SSO_CAPACITY)
+		{
+			pointer pBegin = DoAllocate(n + 1);
+			internalLayout().SetHeapBeginPtr(pBegin);
+			internalLayout().SetHeapCapacity(n);
+			internalLayout().SetHeapSize(n);
+		}
+		else
+			internalLayout().SetSSOSize(n);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void basic_string<T, Allocator>::DeallocateSelf()
+	{
+		if(internalLayout().IsHeap())
+		{
+			DoFree(internalLayout().BeginPtr(), internalLayout().GetHeapCapacity() + 1);
+		}
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void basic_string<T, Allocator>::ThrowLengthException() const
+	{
+		#if EASTL_EXCEPTIONS_ENABLED
+			throw std::length_error("basic_string -- length_error");
+		#elif EASTL_ASSERT_ENABLED
+			EASTL_FAIL_MSG("basic_string -- length_error");
+		#endif
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void basic_string<T, Allocator>::ThrowRangeException() const
+	{
+		#if EASTL_EXCEPTIONS_ENABLED
+			throw std::out_of_range("basic_string -- out of range");
+		#elif EASTL_ASSERT_ENABLED
+			EASTL_FAIL_MSG("basic_string -- out of range");
+		#endif
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void basic_string<T, Allocator>::ThrowInvalidArgumentException() const
+	{
+		#if EASTL_EXCEPTIONS_ENABLED
+			throw std::invalid_argument("basic_string -- invalid argument");
+		#elif EASTL_ASSERT_ENABLED
+			EASTL_FAIL_MSG("basic_string -- invalid argument");
+		#endif
+	}
+
+
+	// CharTypeStringFindEnd
+	// Specialized char version of STL find() from back function.
+	// Not the same as RFind because search range is specified as forward iterators.
+	template <typename T, typename Allocator>
+	const typename basic_string<T, Allocator>::value_type*
+	basic_string<T, Allocator>::CharTypeStringFindEnd(const value_type* pBegin, const value_type* pEnd, value_type c)
+	{
+		const value_type* pTemp = pEnd;
+		while(--pTemp >= pBegin)
+		{
+			if(*pTemp == c)
+				return pTemp;
+		}
+
+		return pEnd;
+	}
+
+
+	// CharTypeStringRFind
+	// Specialized value_type version of STL find() function in reverse.
+	template <typename T, typename Allocator>
+	const typename basic_string<T, Allocator>::value_type*
+	basic_string<T, Allocator>::CharTypeStringRFind(const value_type* pRBegin, const value_type* pREnd, const value_type c)
+	{
+		while(pRBegin > pREnd)
+		{
+			if(*(pRBegin - 1) == c)
+				return pRBegin;
+			--pRBegin;
+		}
+		return pREnd;
+	}
+
+
+	// CharTypeStringSearch
+	// Specialized value_type version of STL search() function.
+	// Purpose: find p2 within p1. Return p1End if not found or if either string is zero length.
+	template <typename T, typename Allocator>
+	const typename basic_string<T, Allocator>::value_type*
+	basic_string<T, Allocator>::CharTypeStringSearch(const value_type* p1Begin, const value_type* p1End,
+													 const value_type* p2Begin, const value_type* p2End)
+	{
+		// Test for zero length strings, in which case we have a match or a failure,
+		// but the return value is the same either way.
+		if((p1Begin == p1End) || (p2Begin == p2End))
+			return p1Begin;
+
+		// Test for a pattern of length 1.
+		if((p2Begin + 1) == p2End)
+			return eastl::find(p1Begin, p1End, *p2Begin);
+
+		// General case.
+		const value_type* pTemp;
+		const value_type* pTemp1 = (p2Begin + 1);
+		const value_type* pCurrent = p1Begin;
+
+		while(p1Begin != p1End)
+		{
+			p1Begin = eastl::find(p1Begin, p1End, *p2Begin);
+			if(p1Begin == p1End)
+				return p1End;
+
+			pTemp = pTemp1;
+			pCurrent = p1Begin;
+			if(++pCurrent == p1End)
+				return p1End;
+
+			while(*pCurrent == *pTemp)
+			{
+				if(++pTemp == p2End)
+					return p1Begin;
+				if(++pCurrent == p1End)
+					return p1End;
+			}
+
+			++p1Begin;
+		}
+
+		return p1Begin;
+	}
+
+
+	// CharTypeStringRSearch
+	// Specialized value_type version of STL find_end() function (which really is a reverse search function).
+	// Purpose: find last instance of p2 within p1. Return p1End if not found or if either string is zero length.
+	template <typename T, typename Allocator>
+	const typename basic_string<T, Allocator>::value_type*
+	basic_string<T, Allocator>::CharTypeStringRSearch(const value_type* p1Begin, const value_type* p1End,
+													  const value_type* p2Begin, const value_type* p2End)
+	{
+		// Test for zero length strings, in which case we have a match or a failure,
+		// but the return value is the same either way.
+		if((p1Begin == p1End) || (p2Begin == p2End))
+			return p1Begin;
+
+		// Test for a pattern of length 1.
+		if((p2Begin + 1) == p2End)
+			return CharTypeStringFindEnd(p1Begin, p1End, *p2Begin);
+
+		// Test for search string length being longer than string length.
+		if((p2End - p2Begin) > (p1End - p1Begin))
+			return p1End;
+
+		// General case.
+		const value_type* pSearchEnd = (p1End - (p2End - p2Begin) + 1);
+		const value_type* pCurrent1;
+		const value_type* pCurrent2;
+
+		while(pSearchEnd != p1Begin)
+		{
+			// Search for the last occurrence of *p2Begin.
+			pCurrent1 = CharTypeStringFindEnd(p1Begin, pSearchEnd, *p2Begin);
+			if(pCurrent1 == pSearchEnd) // If the first char of p2 wasn't found,
+				return p1End;           // then we immediately have failure.
+
+			// In this case, *pTemp == *p2Begin. So compare the rest.
+			pCurrent2 = p2Begin;
+			while(*pCurrent1++ == *pCurrent2++)
+			{
+				if(pCurrent2 == p2End)
+					return (pCurrent1 - (p2End - p2Begin));
+			}
+
+			// A smarter algorithm might know to subtract more than just one,
+			// but in most cases it won't make much difference anyway.
+			--pSearchEnd;
+		}
+
+		return p1End;
+	}
+
+
+	// CharTypeStringFindFirstOf
+	// Specialized value_type version of STL find_first_of() function.
+	// This function is much like the C runtime strtok function, except the strings aren't null-terminated.
+	template <typename T, typename Allocator>
+	const typename basic_string<T, Allocator>::value_type*
+	basic_string<T, Allocator>::CharTypeStringFindFirstOf(const value_type* p1Begin, const value_type* p1End,
+														  const value_type* p2Begin, const value_type* p2End)
+	{
+		for( ; p1Begin != p1End; ++p1Begin)
+		{
+			for(const value_type* pTemp = p2Begin; pTemp != p2End; ++pTemp)
+			{
+				if(*p1Begin == *pTemp)
+					return p1Begin;
+			}
+		}
+		return p1End;
+	}
+
+
+	// CharTypeStringRFindFirstOf
+	// Specialized value_type version of STL find_first_of() function in reverse.
+	// This function is much like the C runtime strtok function, except the strings aren't null-terminated.
+	template <typename T, typename Allocator>
+	const typename basic_string<T, Allocator>::value_type*
+	basic_string<T, Allocator>::CharTypeStringRFindFirstOf(const value_type* p1RBegin, const value_type* p1REnd,
+														   const value_type* p2Begin,  const value_type* p2End)
+	{
+		for( ; p1RBegin != p1REnd; --p1RBegin)
+		{
+			for(const value_type* pTemp = p2Begin; pTemp != p2End; ++pTemp)
+			{
+				if(*(p1RBegin - 1) == *pTemp)
+					return p1RBegin;
+			}
+		}
+		return p1REnd;
+	}
+
+
+
+	// CharTypeStringFindFirstNotOf
+	// Specialized value_type version of STL find_first_not_of() function.
+	template <typename T, typename Allocator>
+	const typename basic_string<T, Allocator>::value_type*
+	basic_string<T, Allocator>::CharTypeStringFindFirstNotOf(const value_type* p1Begin, const value_type* p1End,
+															 const value_type* p2Begin, const value_type* p2End)
+	{
+		for( ; p1Begin != p1End; ++p1Begin)
+		{
+			const value_type* pTemp;
+			for(pTemp = p2Begin; pTemp != p2End; ++pTemp)
+			{
+				if(*p1Begin == *pTemp)
+					break;
+			}
+			if(pTemp == p2End)
+				return p1Begin;
+		}
+		return p1End;
+	}
+
+
+	// CharTypeStringRFindFirstNotOf
+	// Specialized value_type version of STL find_first_not_of() function in reverse.
+	template <typename T, typename Allocator>
+	const typename basic_string<T, Allocator>::value_type*
+	basic_string<T, Allocator>::CharTypeStringRFindFirstNotOf(const value_type* p1RBegin, const value_type* p1REnd,
+															  const value_type* p2Begin,  const value_type* p2End)
+	{
+		for( ; p1RBegin != p1REnd; --p1RBegin)
+		{
+			const value_type* pTemp;
+			for(pTemp = p2Begin; pTemp != p2End; ++pTemp)
+			{
+				if(*(p1RBegin-1) == *pTemp)
+					break;
+			}
+			if(pTemp == p2End)
+				return p1RBegin;
+		}
+		return p1REnd;
+	}
+
+
+
+
+	// iterator operators
+	template <typename T, typename Allocator>
+	inline bool operator==(const typename basic_string<T, Allocator>::reverse_iterator& r1,
+						   const typename basic_string<T, Allocator>::reverse_iterator& r2)
+	{
+		return r1.mpCurrent == r2.mpCurrent;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline bool operator!=(const typename basic_string<T, Allocator>::reverse_iterator& r1,
+						   const typename basic_string<T, Allocator>::reverse_iterator& r2)
+	{
+		return r1.mpCurrent != r2.mpCurrent;
+	}
+
+
+	// Operator +
+	template <typename T, typename Allocator>
+	basic_string<T, Allocator> operator+(const basic_string<T, Allocator>& a, const basic_string<T, Allocator>& b)
+	{
+		typedef typename basic_string<T, Allocator>::CtorDoNotInitialize CtorDoNotInitialize;
+		CtorDoNotInitialize cDNI; // GCC 2.x forces us to declare a named temporary like this.
+		basic_string<T, Allocator> result(cDNI, a.size() + b.size(), const_cast<basic_string<T, Allocator>&>(a).get_allocator()); // Note that we choose to assign a's allocator.
+		result.append(a);
+		result.append(b);
+		return result;
+	}
+
+
+	template <typename T, typename Allocator>
+	basic_string<T, Allocator> operator+(const typename basic_string<T, Allocator>::value_type* p, const basic_string<T, Allocator>& b)
+	{
+		typedef typename basic_string<T, Allocator>::CtorDoNotInitialize CtorDoNotInitialize;
+		CtorDoNotInitialize cDNI; // GCC 2.x forces us to declare a named temporary like this.
+		const typename basic_string<T, Allocator>::size_type n = (typename basic_string<T, Allocator>::size_type)CharStrlen(p);
+		basic_string<T, Allocator> result(cDNI, n + b.size(), const_cast<basic_string<T, Allocator>&>(b).get_allocator());
+		result.append(p, p + n);
+		result.append(b);
+		return result;
+	}
+
+
+	template <typename T, typename Allocator>
+	basic_string<T, Allocator> operator+(typename basic_string<T, Allocator>::value_type c, const basic_string<T, Allocator>& b)
+	{
+		typedef typename basic_string<T, Allocator>::CtorDoNotInitialize CtorDoNotInitialize;
+		CtorDoNotInitialize cDNI; // GCC 2.x forces us to declare a named temporary like this.
+		basic_string<T, Allocator> result(cDNI, 1 + b.size(), const_cast<basic_string<T, Allocator>&>(b).get_allocator());
+		result.push_back(c);
+		result.append(b);
+		return result;
+	}
+
+
+	template <typename T, typename Allocator>
+	basic_string<T, Allocator> operator+(const basic_string<T, Allocator>& a, const typename basic_string<T, Allocator>::value_type* p)
+	{
+		typedef typename basic_string<T, Allocator>::CtorDoNotInitialize CtorDoNotInitialize;
+		CtorDoNotInitialize cDNI; // GCC 2.x forces us to declare a named temporary like this.
+		const typename basic_string<T, Allocator>::size_type n = (typename basic_string<T, Allocator>::size_type)CharStrlen(p);
+		basic_string<T, Allocator> result(cDNI, a.size() + n, const_cast<basic_string<T, Allocator>&>(a).get_allocator());
+		result.append(a);
+		result.append(p, p + n);
+		return result;
+	}
+
+
+	template <typename T, typename Allocator>
+	basic_string<T, Allocator> operator+(const basic_string<T, Allocator>& a, typename basic_string<T, Allocator>::value_type c)
+	{
+		typedef typename basic_string<T, Allocator>::CtorDoNotInitialize CtorDoNotInitialize;
+		CtorDoNotInitialize cDNI; // GCC 2.x forces us to declare a named temporary like this.
+		basic_string<T, Allocator> result(cDNI, a.size() + 1, const_cast<basic_string<T, Allocator>&>(a).get_allocator());
+		result.append(a);
+		result.push_back(c);
+		return result;
+	}
+
+
+	template <typename T, typename Allocator>
+	basic_string<T, Allocator> operator+(basic_string<T, Allocator>&& a, basic_string<T, Allocator>&& b)
+	{
+		a.append(b); // Using an rvalue by name results in it becoming an lvalue.
+		return eastl::move(a);
+	}
+
+	template <typename T, typename Allocator>
+	basic_string<T, Allocator> operator+(basic_string<T, Allocator>&& a, const basic_string<T, Allocator>& b)
+	{
+		a.append(b);
+		return eastl::move(a);
+	}
+
+	template <typename T, typename Allocator>
+	basic_string<T, Allocator> operator+(const typename basic_string<T, Allocator>::value_type* p, basic_string<T, Allocator>&& b)
+	{
+		b.insert(0, p);
+		return eastl::move(b);
+	}
+
+	template <typename T, typename Allocator>
+	basic_string<T, Allocator> operator+(basic_string<T, Allocator>&& a, const typename basic_string<T, Allocator>::value_type* p)
+	{
+		a.append(p);
+		return eastl::move(a);
+	}
+
+	template <typename T, typename Allocator>
+	basic_string<T, Allocator> operator+(basic_string<T, Allocator>&& a, typename basic_string<T, Allocator>::value_type c)
+	{
+		a.push_back(c);
+		return eastl::move(a);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline bool basic_string<T, Allocator>::validate() const EA_NOEXCEPT
+	{
+		if((internalLayout().BeginPtr() == nullptr) || (internalLayout().EndPtr() == nullptr))
+			return false;
+		if(internalLayout().EndPtr() < internalLayout().BeginPtr())
+			return false;
+		if(internalLayout().CapacityPtr() < internalLayout().EndPtr())
+			return false;
+		if(*internalLayout().EndPtr() != 0)
+			return false;
+		return true;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline int basic_string<T, Allocator>::validate_iterator(const_iterator i) const EA_NOEXCEPT
+	{
+		if(i >= internalLayout().BeginPtr())
+		{
+			if(i < internalLayout().EndPtr())
+				return (isf_valid | isf_current | isf_can_dereference);
+
+			if(i <= internalLayout().EndPtr())
+				return (isf_valid | isf_current);
+		}
+
+		return isf_none;
+	}
+
+
+	///////////////////////////////////////////////////////////////////////
+	// global operators
+	///////////////////////////////////////////////////////////////////////
+
+	// Operator== and operator!=
+	template <typename T, typename Allocator>
+	inline bool operator==(const basic_string<T, Allocator>& a, const basic_string<T, Allocator>& b)
+	{
+		return ((a.size() == b.size()) && (memcmp(a.data(), b.data(), (size_t)a.size() * sizeof(typename basic_string<T, Allocator>::value_type)) == 0));
+	}
+
+
+	template <typename T, typename Allocator>
+	inline bool operator==(const typename basic_string<T, Allocator>::value_type* p, const basic_string<T, Allocator>& b)
+	{
+		typedef typename basic_string<T, Allocator>::size_type size_type;
+		const size_type n = (size_type)CharStrlen(p);
+		return ((n == b.size()) && (memcmp(p, b.data(), (size_t)n * sizeof(*p)) == 0));
+	}
+
+
+	template <typename T, typename Allocator>
+	inline bool operator==(const basic_string<T, Allocator>& a, const typename basic_string<T, Allocator>::value_type* p)
+	{
+		typedef typename basic_string<T, Allocator>::size_type size_type;
+		const size_type n = (size_type)CharStrlen(p);
+		return ((a.size() == n) && (memcmp(a.data(), p, (size_t)n * sizeof(*p)) == 0));
+	}
+
+
+	template <typename T, typename Allocator>
+	inline bool operator!=(const basic_string<T, Allocator>& a, const basic_string<T, Allocator>& b)
+	{
+		return !(a == b);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline bool operator!=(const typename basic_string<T, Allocator>::value_type* p, const basic_string<T, Allocator>& b)
+	{
+		return !(p == b);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline bool operator!=(const basic_string<T, Allocator>& a, const typename basic_string<T, Allocator>::value_type* p)
+	{
+		return !(a == p);
+	}
+
+
+	// Operator< (and also >, <=, and >=).
+	template <typename T, typename Allocator>
+	inline bool operator<(const basic_string<T, Allocator>& a, const basic_string<T, Allocator>& b)
+	{
+		return basic_string<T, Allocator>::compare(a.begin(), a.end(), b.begin(), b.end()) < 0; }
+
+
+	template <typename T, typename Allocator>
+	inline bool operator<(const typename basic_string<T, Allocator>::value_type* p, const basic_string<T, Allocator>& b)
+	{
+		typedef typename basic_string<T, Allocator>::size_type size_type;
+		const size_type n = (size_type)CharStrlen(p);
+		return basic_string<T, Allocator>::compare(p, p + n, b.begin(), b.end()) < 0;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline bool operator<(const basic_string<T, Allocator>& a, const typename basic_string<T, Allocator>::value_type* p)
+	{
+		typedef typename basic_string<T, Allocator>::size_type size_type;
+		const size_type n = (size_type)CharStrlen(p);
+		return basic_string<T, Allocator>::compare(a.begin(), a.end(), p, p + n) < 0;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline bool operator>(const basic_string<T, Allocator>& a, const basic_string<T, Allocator>& b)
+	{
+		return b < a;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline bool operator>(const typename basic_string<T, Allocator>::value_type* p, const basic_string<T, Allocator>& b)
+	{
+		return b < p;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline bool operator>(const basic_string<T, Allocator>& a, const typename basic_string<T, Allocator>::value_type* p)
+	{
+		return p < a;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline bool operator<=(const basic_string<T, Allocator>& a, const basic_string<T, Allocator>& b)
+	{
+		return !(b < a);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline bool operator<=(const typename basic_string<T, Allocator>::value_type* p, const basic_string<T, Allocator>& b)
+	{
+		return !(b < p);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline bool operator<=(const basic_string<T, Allocator>& a, const typename basic_string<T, Allocator>::value_type* p)
+	{
+		return !(p < a);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline bool operator>=(const basic_string<T, Allocator>& a, const basic_string<T, Allocator>& b)
+	{
+		return !(a < b);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline bool operator>=(const typename basic_string<T, Allocator>::value_type* p, const basic_string<T, Allocator>& b)
+	{
+		return !(p < b);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline bool operator>=(const basic_string<T, Allocator>& a, const typename basic_string<T, Allocator>::value_type* p)
+	{
+		return !(a < p);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void swap(basic_string<T, Allocator>& a, basic_string<T, Allocator>& b)
+	{
+		a.swap(b);
+	}
+
+
+	/// string / wstring
+	typedef basic_string<char>    string;
+	typedef basic_string<wchar_t> wstring;
+
+	/// custom string8 / string16 / string32
+	typedef basic_string<char>     string8;
+	typedef basic_string<char16_t> string16;
+	typedef basic_string<char32_t> string32;
+
+	/// ISO mandated string types
+	typedef basic_string<char8_t>  u8string;    // Actually not a C++11 type, but added for consistency.
+	typedef basic_string<char16_t> u16string;
+	typedef basic_string<char32_t> u32string;
+
+
+	/// hash<string>
+	///
+	/// We provide EASTL hash function objects for use in hash table containers.
+	///
+	/// Example usage:
+	///    #include <EASTL/hash_set.h>
+	///    hash_set<string> stringHashSet;
+	///
+	template <typename T> struct hash;
+
+	template <>
+	struct hash<string>
+	{
+		size_t operator()(const string& x) const
+		{
+			const unsigned char* p = (const unsigned char*)x.c_str(); // To consider: limit p to at most 256 chars.
+			unsigned int c, result = 2166136261U; // We implement an FNV-like string hash.
+			while((c = *p++) != 0) // Using '!=' disables compiler warnings.
+				result = (result * 16777619) ^ c;
+			return (size_t)result;
+		}
+	};
+
+	#if defined(EA_CHAR8_UNIQUE) && EA_CHAR8_UNIQUE
+		template <>
+		struct hash<u8string>
+		{
+			size_t operator()(const u8string& x) const
+			{
+				const char8_t* p = (const char8_t*)x.c_str();
+				unsigned int c, result = 2166136261U;
+				while((c = *p++) != 0)
+					result = (result * 16777619) ^ c;
+				return (size_t)result;
+			}
+		};
+	#endif
+
+	template <>
+	struct hash<string16>
+	{
+		size_t operator()(const string16& x) const
+		{
+			const char16_t* p = x.c_str();
+			unsigned int c, result = 2166136261U;
+			while((c = *p++) != 0)
+				result = (result * 16777619) ^ c;
+			return (size_t)result;
+		}
+	};
+
+	template <>
+	struct hash<string32>
+	{
+		size_t operator()(const string32& x) const
+		{
+			const char32_t* p = x.c_str();
+			unsigned int c, result = 2166136261U;
+			while((c = (unsigned int)*p++) != 0)
+				result = (result * 16777619) ^ c;
+			return (size_t)result;
+		}
+	};
+
+	#if defined(EA_WCHAR_UNIQUE) && EA_WCHAR_UNIQUE
+		template <>
+		struct hash<wstring>
+		{
+			size_t operator()(const wstring& x) const
+			{
+				const wchar_t* p = x.c_str();
+				unsigned int c, result = 2166136261U;
+				while((c = (unsigned int)*p++) != 0)
+					result = (result * 16777619) ^ c;
+				return (size_t)result;
+			}
+		};
+	#endif
+
+
+	/// to_string
+	///
+	/// Converts integral types to an eastl::string with the same content that sprintf produces.  The following
+	/// implementation provides a type safe conversion mechanism which avoids the common bugs associated with sprintf
+	/// style format strings.
+	///
+	/// http://en.cppreference.com/w/cpp/string/basic_string/to_string
+	///
+	inline string to_string(int value)
+		{ return string(string::CtorSprintf(), "%d", value); }
+	inline string to_string(long value)
+		{ return string(string::CtorSprintf(), "%ld", value); }
+	inline string to_string(long long value)
+		{ return string(string::CtorSprintf(), "%lld", value); }
+	inline string to_string(unsigned value)
+		{ return string(string::CtorSprintf(), "%u", value); }
+	inline string to_string(unsigned long value)
+		{ return string(string::CtorSprintf(), "%lu", value); }
+	inline string to_string(unsigned long long value)
+		{ return string(string::CtorSprintf(), "%llu", value); }
+	inline string to_string(float value)
+		{ return string(string::CtorSprintf(), "%f", value); }
+	inline string to_string(double value)
+		{ return string(string::CtorSprintf(), "%f", value); }
+	inline string to_string(long double value)
+		{ return string(string::CtorSprintf(), "%Lf", value); }
+
+
+	/// to_wstring
+	///
+	/// Converts integral types to an eastl::wstring with the same content that sprintf produces.  The following
+	/// implementation provides a type safe conversion mechanism which avoids the common bugs associated with sprintf
+	/// style format strings.
+	///
+	/// http://en.cppreference.com/w/cpp/string/basic_string/to_wstring
+	///
+	inline wstring to_wstring(int value)
+		{ return wstring(wstring::CtorSprintf(), L"%d", value); }
+	inline wstring to_wstring(long value)
+		{ return wstring(wstring::CtorSprintf(), L"%ld", value); }
+	inline wstring to_wstring(long long value)
+		{ return wstring(wstring::CtorSprintf(), L"%lld", value); }
+	inline wstring to_wstring(unsigned value)
+		{ return wstring(wstring::CtorSprintf(), L"%u", value); }
+	inline wstring to_wstring(unsigned long value)
+		{ return wstring(wstring::CtorSprintf(), L"%lu", value); }
+	inline wstring to_wstring(unsigned long long value)
+		{ return wstring(wstring::CtorSprintf(), L"%llu", value); }
+	inline wstring to_wstring(float value)
+		{ return wstring(wstring::CtorSprintf(), L"%f", value); }
+	inline wstring to_wstring(double value)
+		{ return wstring(wstring::CtorSprintf(), L"%f", value); }
+	inline wstring to_wstring(long double value)
+		{ return wstring(wstring::CtorSprintf(), L"%Lf", value); }
+
+
+	/// user defined literals
+	///
+	/// Converts a character array literal to a basic_string.
+	///
+	/// Example:
+	///   string s = "abcdef"s;
+	///
+	/// http://en.cppreference.com/w/cpp/string/basic_string/operator%22%22s
+	///
+	#if EASTL_USER_LITERALS_ENABLED && EASTL_INLINE_NAMESPACES_ENABLED
+		EA_DISABLE_VC_WARNING(4455) // disable warning C4455: literal suffix identifiers that do not start with an underscore are reserved
+	    inline namespace literals
+	    {
+		    inline namespace string_literals
+		    {
+				inline string operator"" s(const char* str, size_t len) EA_NOEXCEPT { return {str, string::size_type(len)}; }
+				inline u16string operator"" s(const char16_t* str, size_t len) EA_NOEXCEPT { return {str, u16string::size_type(len)}; }
+				inline u32string operator"" s(const char32_t* str, size_t len) EA_NOEXCEPT { return {str, u32string::size_type(len)}; }
+				inline wstring operator"" s(const wchar_t* str, size_t len) EA_NOEXCEPT { return {str, wstring::size_type(len)}; }
+
+				// C++20 char8_t support.
+				#if EA_CHAR8_UNIQUE
+					inline u8string operator"" s(const char8_t* str, size_t len) EA_NOEXCEPT { return {str, u8string::size_type(len)}; }
+				#endif
+		    }
+	    }
+		EA_RESTORE_VC_WARNING()  // warning: 4455
+	#endif
+
+
+	/// erase / erase_if
+	///
+	/// https://en.cppreference.com/w/cpp/string/basic_string/erase2
+	template <class CharT, class Allocator, class U>
+	void erase(basic_string<CharT, Allocator>& c, const U& value)
+	{
+		// Erases all elements that compare equal to value from the container.
+		c.erase(eastl::remove(c.begin(), c.end(), value), c.end());
+	}
+
+	template <class CharT, class Allocator, class Predicate>
+	void erase_if(basic_string<CharT, Allocator>& c, Predicate predicate)
+	{
+		// Erases all elements that satisfy the predicate pred from the container.
+		c.erase(eastl::remove_if(c.begin(), c.end(), predicate), c.end());
+	}
+} // namespace eastl
+
+
+EA_RESTORE_VC_WARNING();
+
+
+#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/string_hash_map.h b/libkram/eastl/include/EASTL/string_hash_map.h
new file mode 100644
index 00000000..25bdfaf1
--- /dev/null
+++ b/libkram/eastl/include/EASTL/string_hash_map.h
@@ -0,0 +1,189 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_STRING_HASH_MAP_H
+#define EASTL_STRING_HASH_MAP_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once 
+#endif
+
+#include <EASTL/hash_map.h>
+#include <EASTL/string.h>
+
+namespace eastl
+{
+
+
+template<typename T, typename Hash = hash<const char*>, typename Predicate = str_equal_to<const char*>, typename Allocator = EASTLAllocatorType>
+class string_hash_map : public eastl::hash_map<const char*, T, Hash, Predicate, Allocator>
+{
+public:
+	typedef eastl::hash_map<const char*, T, Hash, Predicate, Allocator> base;
+	typedef string_hash_map<T, Hash, Predicate, Allocator> this_type;
+	typedef typename base::base_type::allocator_type allocator_type;
+	typedef typename base::base_type::insert_return_type insert_return_type;
+	typedef typename base::base_type::iterator iterator;
+//  typedef typename base::base_type::reverse_iterator reverse_iterator;
+	typedef typename base::base_type::const_iterator const_iterator;
+	typedef typename base::base_type::size_type size_type;
+	typedef typename base::base_type::value_type value_type;
+	typedef typename base::mapped_type mapped_type;
+
+						string_hash_map(const allocator_type& allocator = allocator_type()) : base(allocator) {}
+						string_hash_map(const string_hash_map& src, const allocator_type& allocator = allocator_type());
+						~string_hash_map();
+	void				clear();
+    void				clear(bool clearBuckets);
+
+	this_type&			operator=(const this_type& x);
+
+	insert_return_type	 insert(const char* key, const T& value);
+	insert_return_type	 insert(const char* key);
+	pair<iterator, bool> insert_or_assign(const char* key, const T& value);
+	iterator			 erase(const_iterator position);
+	size_type			 erase(const char* key);
+	mapped_type&		 operator[](const char* key);
+
+private:
+	char*				strduplicate(const char* str);
+
+	// Not implemented right now
+	// insert_return_type	insert(const value_type& value);
+	// iterator				insert(iterator position, const value_type& value);
+    // reverse_iterator		erase(reverse_iterator position);
+    // reverse_iterator		erase(reverse_iterator first, reverse_iterator last);
+};
+
+
+template<typename T, typename Hash, typename Predicate, typename Allocator>
+string_hash_map<T, Hash, Predicate, Allocator>::string_hash_map(const string_hash_map& src, const allocator_type& allocator) : base(allocator)
+{
+	for (const_iterator i=src.begin(), e=src.end(); i!=e; ++i)
+		base::base_type::insert(eastl::make_pair(strduplicate(i->first), i->second));
+}
+
+template<typename T, typename Hash, typename Predicate, typename Allocator>
+string_hash_map<T, Hash, Predicate, Allocator>::~string_hash_map()
+{
+	clear();
+}
+
+template<typename T, typename Hash, typename Predicate, typename Allocator>
+void string_hash_map<T, Hash, Predicate, Allocator>::clear()
+{
+	allocator_type& allocator = base::base_type::get_allocator();
+	for (const_iterator i=base::base_type::begin(), e=base::base_type::end(); i!=e; ++i)
+		EASTLFree(allocator, (void*)i->first, 0);
+	base::base_type::clear();
+}
+
+template<typename T, typename Hash, typename Predicate, typename Allocator>
+void string_hash_map<T, Hash, Predicate, Allocator>::clear(bool clearBuckets)
+{
+	allocator_type& allocator = base::base_type::get_allocator();
+	for (const_iterator i=base::base_type::begin(), e=base::base_type::end(); i!=e; ++i)
+		EASTLFree(allocator, (void*)i->first, 0);
+	base::base_type::clear(clearBuckets);
+}
+
+template<typename T, typename Hash, typename Predicate, typename Allocator>
+typename string_hash_map<T, Hash, Predicate, Allocator>::this_type&
+string_hash_map<T, Hash, Predicate, Allocator>::operator=(const this_type& x)
+{
+	allocator_type allocator = base::base_type::get_allocator();
+	this->~this_type();
+	new (this) this_type(x, allocator);
+	return *this;
+}
+
+template<typename T, typename Hash, typename Predicate, typename Allocator>
+typename string_hash_map<T, Hash, Predicate, Allocator>::insert_return_type
+string_hash_map<T, Hash, Predicate, Allocator>::insert(const char* key)
+{
+	return insert(key, mapped_type());
+}
+
+template<typename T, typename Hash, typename Predicate, typename Allocator>
+typename string_hash_map<T, Hash, Predicate, Allocator>::insert_return_type
+string_hash_map<T, Hash, Predicate, Allocator>::insert(const char* key, const T& value)
+{
+	EASTL_ASSERT(key);
+	iterator i = base::base_type::find(key);
+	if (i != base::base_type::end())
+	{
+		insert_return_type ret;
+		ret.first = i;
+		ret.second = false;
+		return ret;
+	}
+	return base::base_type::insert(eastl::make_pair(strduplicate(key), value));
+}
+
+template<typename T, typename Hash, typename Predicate, typename Allocator>
+eastl::pair<typename string_hash_map<T, Hash, Predicate, Allocator>::iterator, bool>
+string_hash_map<T, Hash, Predicate, Allocator>::insert_or_assign(const char* key, const T& value)
+{
+	iterator i = base::base_type::find(key);
+	if (i != base::base_type::end())
+	{
+		return base::base_type::insert_or_assign(i->first, value);
+	}
+	else
+	{
+		return base::base_type::insert_or_assign(strduplicate(key), value);
+	}
+}
+
+template<typename T, typename Hash, typename Predicate, typename Allocator>
+typename string_hash_map<T, Hash, Predicate, Allocator>::iterator
+string_hash_map<T, Hash, Predicate, Allocator>::erase(const_iterator position)
+{
+	const char* key = position->first;
+	iterator result = base::base_type::erase(position);
+	EASTLFree(base::base_type::get_allocator(), (void*)key, 0);
+	return result;
+}
+
+template<typename T, typename Hash, typename Predicate, typename Allocator>
+typename string_hash_map<T, Hash, Predicate, Allocator>::size_type
+string_hash_map<T, Hash, Predicate, Allocator>::erase(const char* key)
+{
+    const iterator it(base::base_type::find(key));
+
+    if(it != base::base_type::end())
+    {
+        erase(it);
+        return 1;
+    }
+    return 0;
+}
+
+template<typename T, typename Hash, typename Predicate, typename Allocator>
+typename string_hash_map<T, Hash, Predicate, Allocator>::mapped_type&
+string_hash_map<T, Hash, Predicate, Allocator>::operator[](const char* key)
+{
+	using base_value_type = typename base::base_type::value_type;
+
+	EASTL_ASSERT(key);
+	iterator i = base::base_type::find(key);
+	if (i != base::base_type::end())
+		return i->second;
+	return base::base_type::insert(base_value_type(pair_first_construct, strduplicate(key))).first->second;
+}
+
+template<typename T, typename Hash, typename Predicate, typename Allocator>
+char*
+string_hash_map<T, Hash, Predicate, Allocator>::strduplicate(const char* str)
+{
+	size_t len = strlen(str);
+	char* result = (char*)EASTLAlloc(base::base_type::get_allocator(), (len + 1));
+	memcpy(result, str, len+1);
+	return result;
+}
+
+
+}
+
+#endif
diff --git a/libkram/eastl/include/EASTL/string_map.h b/libkram/eastl/include/EASTL/string_map.h
new file mode 100644
index 00000000..b952e39d
--- /dev/null
+++ b/libkram/eastl/include/EASTL/string_map.h
@@ -0,0 +1,167 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_STRING_MAP_H
+#define EASTL_STRING_MAP_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once 
+#endif
+
+#include <EASTL/map.h>
+#include <EASTL/string.h>
+
+namespace eastl
+{
+
+
+template<typename T, typename Predicate = str_less<const char*>, typename Allocator = EASTLAllocatorType>
+class string_map : public eastl::map<const char*, T, Predicate, Allocator>
+{
+public:
+	typedef eastl::map<const char*, T, Predicate, Allocator> base;
+	typedef string_map<T, Predicate, Allocator>              this_type;
+	typedef typename base::base_type::allocator_type         allocator_type;
+	typedef typename base::base_type::insert_return_type     insert_return_type;
+	typedef typename base::base_type::iterator               iterator;
+	typedef typename base::base_type::reverse_iterator       reverse_iterator;
+	typedef typename base::base_type::const_iterator         const_iterator;
+	typedef typename base::base_type::size_type              size_type;
+	typedef typename base::base_type::key_type               key_type;
+	typedef typename base::base_type::value_type             value_type;
+	typedef typename base::mapped_type                       mapped_type;
+
+		                string_map(const allocator_type& allocator = allocator_type()) : base(allocator) {}
+						string_map(const string_map& src, const allocator_type& allocator = allocator_type());
+						~string_map();
+	void				clear();
+	
+	this_type&			operator=(const this_type& x);
+
+	insert_return_type	insert(const char* key, const T& value);
+	insert_return_type	insert(const char* key);
+	iterator			erase(iterator position);
+	size_type			erase(const char* key);
+	mapped_type&		operator[](const char* key);
+
+private:
+	char*				strduplicate(const char* str);
+
+	// Not implemented right now
+	// insert_return_type	insert(const value_type& value);
+	// iterator			    insert(iterator position, const value_type& value);
+    // reverse_iterator	    erase(reverse_iterator position);
+    // reverse_iterator	    erase(reverse_iterator first, reverse_iterator last);
+    // void				    erase(const key_type* first, const key_type* last);
+};
+
+
+
+template<typename T, typename Predicate, typename Allocator>
+string_map<T, Predicate, Allocator>::string_map(const string_map& src, const allocator_type& allocator) : base(allocator)
+{
+	for (const_iterator i=src.begin(), e=src.end(); i!=e; ++i)
+		base::base_type::insert(eastl::make_pair(strduplicate(i->first), i->second));
+}
+
+template<typename T, typename Predicate, typename Allocator>
+string_map<T, Predicate, Allocator>::~string_map()
+{
+	clear();
+}
+
+template<typename T, typename Predicate, typename Allocator>
+void
+string_map<T, Predicate, Allocator>::clear()
+{
+	allocator_type& allocator = base::base_type::get_allocator();
+	for (const_iterator i=base::base_type::begin(), e=base::base_type::end(); i!=e; ++i)
+		allocator.deallocate((void*)i->first, 0);
+	base::base_type::clear();
+}
+
+template<typename T, typename Predicate, typename Allocator>
+typename string_map<T, Predicate, Allocator>::this_type&
+string_map<T, Predicate, Allocator>::operator=(const this_type& x)
+{
+	allocator_type allocator = base::base_type::get_allocator();
+	this->~this_type();
+	new (this) this_type(x, allocator);
+	return *this;
+}
+
+template<typename T, typename Predicate, typename Allocator>
+typename string_map<T, Predicate, Allocator>::insert_return_type
+string_map<T, Predicate, Allocator>::insert(const char* key)
+{
+	return insert(key, mapped_type());
+}
+
+template<typename T, typename Predicate, typename Allocator>
+typename string_map<T, Predicate, Allocator>::insert_return_type
+string_map<T, Predicate, Allocator>::insert(const char* key, const T& value)
+{
+	EASTL_ASSERT(key);
+	iterator i = base::base_type::find(key);
+	if (i != base::base_type::end())
+	{
+		insert_return_type ret;
+		ret.first = i;
+		ret.second = false;
+		return ret;
+	}
+	return base::base_type::insert(eastl::make_pair(strduplicate(key), value));
+}
+
+template<typename T, typename Predicate, typename Allocator>
+typename string_map<T, Predicate, Allocator>::iterator
+string_map<T, Predicate, Allocator>::erase(iterator position)
+{
+	const char* key = position->first;
+	iterator result = base::base_type::erase(position);
+	base::base_type::get_allocator().deallocate((void*)key, 0);
+	return result;
+}
+
+template<typename T, typename Predicate, typename Allocator>
+typename string_map<T, Predicate, Allocator>::size_type
+string_map<T, Predicate, Allocator>::erase(const char* key)
+{
+	const iterator it(base::base_type::find(key));
+
+    if(it != base::base_type::end())
+    {
+        erase(it);
+        return 1;
+    }
+    return 0;
+}
+
+template<typename T, typename Predicate, typename Allocator>
+typename string_map<T, Predicate, Allocator>::mapped_type&
+string_map<T, Predicate, Allocator>::operator[](const char* key)
+{
+	using base_value_type = typename base::base_type::value_type;
+
+	EASTL_ASSERT(key);
+	iterator i = base::base_type::find(key);
+	if (i != base::base_type::end())
+		return i->second;
+	return base::base_type::insert(base_value_type(pair_first_construct, strduplicate(key))).first->second;
+}
+
+template<typename T, typename Predicate, typename Allocator>
+char*
+string_map<T, Predicate, Allocator>::strduplicate(const char* str)
+{
+	size_t len = strlen(str);
+	char* result = (char*)base::base_type::get_allocator().allocate(len + 1);
+	memcpy(result, str, len+1);
+	return result;
+}
+
+
+}
+
+#endif
diff --git a/libkram/eastl/include/EASTL/string_view.h b/libkram/eastl/include/EASTL/string_view.h
new file mode 100644
index 00000000..54452a38
--- /dev/null
+++ b/libkram/eastl/include/EASTL/string_view.h
@@ -0,0 +1,631 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// This file implements the eastl::string_view which is part of the C++ standard
+// STL library specification.
+//
+// http://en.cppreference.com/w/cpp/header/string_view
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_STRING_VIEW_H
+#define EASTL_STRING_VIEW_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+#include <EASTL/internal/config.h>
+#include <EASTL/internal/char_traits.h>
+#include <EASTL/algorithm.h>
+#include <EASTL/iterator.h>
+#include <EASTL/numeric_limits.h>
+
+#if EASTL_EXCEPTIONS_ENABLED
+	EA_DISABLE_ALL_VC_WARNINGS()
+	#include <stdexcept> // std::out_of_range.
+	EA_RESTORE_ALL_VC_WARNINGS()
+#endif
+
+EA_DISABLE_VC_WARNING(4814)
+
+namespace eastl
+{
+	template <typename T>
+	class basic_string_view
+	{
+	public:
+		typedef basic_string_view<T>						this_type;
+		typedef T 											value_type;
+		typedef T* 											pointer;
+		typedef const T* 									const_pointer;
+		typedef T& 											reference;
+		typedef const T& 									const_reference;
+		typedef T* 											iterator;
+		typedef const T* 									const_iterator;
+		typedef eastl::reverse_iterator<iterator> 			reverse_iterator;
+		typedef eastl::reverse_iterator<const_iterator> 	const_reverse_iterator;
+		typedef size_t 										size_type;
+		typedef ptrdiff_t 									difference_type;
+
+		static const EA_CONSTEXPR size_type npos = size_type(-1);
+
+	protected:
+		const_pointer mpBegin = nullptr;
+		size_type mnCount = 0;
+
+	public:
+		// 21.4.2.1, construction and assignment
+		EA_CONSTEXPR basic_string_view() EA_NOEXCEPT : mpBegin(nullptr), mnCount(0) {}
+		EA_CONSTEXPR basic_string_view(const basic_string_view& other) EA_NOEXCEPT = default;
+		EA_CONSTEXPR basic_string_view(const T* s, size_type count) : mpBegin(s), mnCount(count) {}
+		EA_CONSTEXPR basic_string_view(const T* s) : mpBegin(s), mnCount(s != nullptr ? CharStrlen(s) : 0) {}
+		basic_string_view& operator=(const basic_string_view& view) = default;
+
+		// 21.4.2.2, iterator support
+		EA_CONSTEXPR const_iterator begin() const EA_NOEXCEPT { return mpBegin; }
+		EA_CONSTEXPR const_iterator cbegin() const EA_NOEXCEPT { return mpBegin; }
+		EA_CONSTEXPR const_iterator end() const EA_NOEXCEPT { return mpBegin + mnCount; }
+		EA_CONSTEXPR const_iterator cend() const EA_NOEXCEPT { return mpBegin + mnCount; }
+		EA_CONSTEXPR const_reverse_iterator rbegin() const EA_NOEXCEPT { return const_reverse_iterator(mpBegin + mnCount); }
+		EA_CONSTEXPR const_reverse_iterator crbegin() const EA_NOEXCEPT { return const_reverse_iterator(mpBegin + mnCount); }
+		EA_CONSTEXPR const_reverse_iterator rend() const EA_NOEXCEPT { return const_reverse_iterator(mpBegin); }
+		EA_CONSTEXPR const_reverse_iterator crend() const EA_NOEXCEPT { return const_reverse_iterator(mpBegin); }
+
+
+		// 21.4.2.4, element access
+		EA_CONSTEXPR const_pointer data() const { return mpBegin; }
+		EA_CONSTEXPR const_reference front() const
+		{
+			return [&] { EASTL_ASSERT_MSG(!empty(), "behavior is undefined if string_view is empty"); }(), mpBegin[0];
+		}
+
+		EA_CONSTEXPR const_reference back() const
+		{
+			return [&] { EASTL_ASSERT_MSG(!empty(), "behavior is undefined if string_view is empty"); }(), mpBegin[mnCount - 1];
+		}
+
+		EA_CONSTEXPR const_reference operator[](size_type pos) const
+		{
+			// As per the standard spec: No bounds checking is performed: the behavior is undefined if pos >= size().
+			return mpBegin[pos];
+		}
+
+		EA_CPP14_CONSTEXPR const_reference at(size_type pos) const
+		{
+			#if EASTL_EXCEPTIONS_ENABLED
+				if(EASTL_UNLIKELY(pos >= mnCount))
+					throw std::out_of_range("string_view::at -- out of range");
+			#elif EASTL_ASSERT_ENABLED
+				if(EASTL_UNLIKELY(pos >= mnCount))
+					EASTL_FAIL_MSG("string_view::at -- out of range");
+			#endif
+
+			return mpBegin[pos];
+		}
+
+
+		// 21.4.2.3, capacity
+		EA_CONSTEXPR size_type size() const EA_NOEXCEPT { return mnCount; }
+		EA_CONSTEXPR size_type length() const EA_NOEXCEPT { return mnCount; }
+
+		// avoid macro expansion of max(...) from windows headers (potentially included before this file)
+		// by wrapping function name in brackets
+		EA_CONSTEXPR size_type max_size() const EA_NOEXCEPT { return (numeric_limits<size_type>::max)(); }
+		EA_CONSTEXPR bool empty() const EA_NOEXCEPT { return mnCount == 0; }
+
+
+		// 21.4.2.5, modifiers
+		EA_CPP14_CONSTEXPR void swap(basic_string_view& v)
+		{
+			eastl::swap(mpBegin, v.mpBegin);
+			eastl::swap(mnCount, v.mnCount);
+		}
+
+		EA_CPP14_CONSTEXPR void remove_prefix(size_type n)
+		{
+			EASTL_ASSERT_MSG(n <= mnCount, "behavior is undefined if moving past the end of the string");
+			mpBegin += n;
+			mnCount -= n;
+		}
+
+		EA_CPP14_CONSTEXPR void remove_suffix(size_type n)
+		{
+			EASTL_ASSERT_MSG(n <= mnCount, "behavior is undefined if moving past the beginning of the string");
+			mnCount -= n;
+		}
+
+
+		// 21.4.2.6, string operations
+		size_type copy(T* pDestination, size_type count, size_type pos = 0) const
+		{
+			#if EASTL_EXCEPTIONS_ENABLED
+				if(EASTL_UNLIKELY(pos > mnCount))
+					throw std::out_of_range("string_view::copy -- out of range");
+			#elif EASTL_ASSERT_ENABLED
+				if(EASTL_UNLIKELY(pos > mnCount))
+					EASTL_FAIL_MSG("string_view::copy -- out of range");
+			#endif
+
+			count = eastl::min<size_type>(count, mnCount - pos);
+			auto* pResult = CharStringUninitializedCopy(mpBegin + pos, mpBegin + pos + count, pDestination);
+			// *pResult = 0; // don't write the null-terminator
+			return pResult - pDestination;
+		}
+
+		EA_CPP14_CONSTEXPR basic_string_view substr(size_type pos = 0, size_type count = npos) const
+		{
+			#if EASTL_EXCEPTIONS_ENABLED
+				if(EASTL_UNLIKELY(pos > mnCount))
+					throw std::out_of_range("string_view::substr -- out of range");
+			#elif EASTL_ASSERT_ENABLED
+				if(EASTL_UNLIKELY(pos > mnCount))
+					EASTL_FAIL_MSG("string_view::substr -- out of range");
+			#endif
+
+			count = eastl::min<size_type>(count, mnCount - pos);
+			return this_type(mpBegin + pos, count);
+		}
+
+		static EA_CPP14_CONSTEXPR int compare(const T* pBegin1, const T* pEnd1, const T* pBegin2, const T* pEnd2)
+		{
+			const ptrdiff_t n1   = pEnd1 - pBegin1;
+			const ptrdiff_t n2   = pEnd2 - pBegin2;
+			const ptrdiff_t nMin = eastl::min_alt(n1, n2);
+			const int       cmp  = Compare(pBegin1, pBegin2, (size_type)nMin);
+
+			return (cmp != 0 ? cmp : (n1 < n2 ? -1 : (n1 > n2 ? 1 : 0)));
+		}
+
+		EA_CPP14_CONSTEXPR int compare(basic_string_view sw) const EA_NOEXCEPT
+		{
+			return compare(mpBegin, mpBegin + mnCount, sw.mpBegin, sw.mpBegin + sw.mnCount);
+		}
+
+		EA_CONSTEXPR int compare(size_type pos1, size_type count1, basic_string_view sw) const
+		{
+			return substr(pos1, count1).compare(sw);
+		}
+
+		EA_CONSTEXPR int compare(size_type pos1,
+		                         size_type count1,
+		                         basic_string_view sw,
+		                         size_type pos2,
+		                         size_type count2) const
+		{
+			return substr(pos1, count1).compare(sw.substr(pos2, count2));
+		}
+
+		EA_CONSTEXPR int compare(const T* s) const { return compare(basic_string_view(s)); }
+
+		EA_CONSTEXPR int compare(size_type pos1, size_type count1, const T* s) const
+		{
+			return substr(pos1, count1).compare(basic_string_view(s));
+		}
+
+		EA_CONSTEXPR int compare(size_type pos1, size_type count1, const T* s, size_type count2) const
+		{
+			return substr(pos1, count1).compare(basic_string_view(s, count2));
+		}
+
+		EA_CPP14_CONSTEXPR size_type find(basic_string_view sw, size_type pos = 0) const EA_NOEXCEPT
+		{
+			auto* pEnd = mpBegin + mnCount;
+			if (EASTL_LIKELY(((npos - sw.size()) >= pos) && (pos + sw.size()) <= mnCount))
+			{
+				const value_type* const pTemp = eastl::search(mpBegin + pos, pEnd, sw.data(), sw.data() + sw.size());
+
+				if ((pTemp != pEnd) || (sw.size() == 0))
+					return (size_type)(pTemp - mpBegin);
+			}
+			return npos;
+		}
+
+		EA_CONSTEXPR size_type find(T c, size_type pos = 0) const EA_NOEXCEPT
+		{
+			return find(basic_string_view(&c, 1), pos);
+		}
+
+		EA_CONSTEXPR size_type find(const T* s, size_type pos, size_type count) const
+		{
+			return find(basic_string_view(s, count), pos);
+		}
+
+		EA_CONSTEXPR size_type find(const T* s, size_type pos = 0) const { return find(basic_string_view(s), pos); }
+
+		EA_CONSTEXPR size_type rfind(basic_string_view sw, size_type pos = npos) const EA_NOEXCEPT
+		{
+			return rfind(sw.mpBegin, pos, sw.mnCount);
+		}
+
+		EA_CPP14_CONSTEXPR size_type rfind(T c, size_type pos = npos) const EA_NOEXCEPT
+		{
+			if (EASTL_LIKELY(mnCount))
+			{
+				const value_type* const pEnd = mpBegin + eastl::min_alt(mnCount - 1, pos) + 1;
+				const value_type* const pResult = CharTypeStringRFind(pEnd, mpBegin, c);
+
+				if (pResult != mpBegin)
+					return (size_type)((pResult - 1) - mpBegin);
+			}
+			return npos;
+		}
+
+		EA_CPP14_CONSTEXPR size_type rfind(const T* s, size_type pos, size_type n) const
+		{
+			// Disabled because it's not clear what values are valid for position.
+			// It is documented that npos is a valid value, though. We return npos and
+			// don't crash if postion is any invalid value.
+			//#if EASTL_ASSERT_ENABLED
+			//    if(EASTL_UNLIKELY((position != npos) && (position > (size_type)(mpEnd - mpBegin))))
+			//        EASTL_FAIL_MSG("basic_string::rfind -- invalid position");
+			//#endif
+
+			// Note that a search for a zero length string starting at position = end() returns end() and not npos.
+			// Note by Paul Pedriana: I am not sure how this should behave in the case of n == 0 and position > size.
+			// The standard seems to suggest that rfind doesn't act exactly the same as find in that input position
+			// can be > size and the return value can still be other than npos. Thus, if n == 0 then you can
+			// never return npos, unlike the case with find.
+			if (EASTL_LIKELY(n <= mnCount))
+			{
+				if (EASTL_LIKELY(n))
+				{
+					const const_iterator pEnd = mpBegin + eastl::min_alt(mnCount - n, pos) + n;
+					const const_iterator pResult = CharTypeStringRSearch(mpBegin, pEnd, s, s + n);
+
+					if (pResult != pEnd)
+						return (size_type)(pResult - mpBegin);
+				}
+				else
+					return eastl::min_alt(mnCount, pos);
+			}
+			return npos;
+		}
+
+		EA_CONSTEXPR size_type rfind(const T* s, size_type pos = npos) const
+		{
+			return rfind(s, pos, (size_type)CharStrlen(s));
+		}
+
+		EA_CONSTEXPR size_type find_first_of(basic_string_view sw, size_type pos = 0) const EA_NOEXCEPT
+		{
+			return find_first_of(sw.mpBegin, pos, sw.mnCount);
+		}
+
+		EA_CONSTEXPR size_type find_first_of(T c, size_type pos = 0) const EA_NOEXCEPT { return find(c, pos); }
+
+		EA_CPP14_CONSTEXPR size_type find_first_of(const T* s, size_type pos, size_type n) const
+		{
+			// If position is >= size, we return npos.
+			if (EASTL_LIKELY((pos < mnCount)))
+			{
+				const value_type* const pBegin = mpBegin + pos;
+				const value_type* const pEnd = mpBegin + mnCount;
+				const const_iterator pResult = CharTypeStringFindFirstOf(pBegin, pEnd, s, s + n);
+
+				if (pResult != pEnd)
+					return (size_type)(pResult - mpBegin);
+			}
+			return npos;
+		}
+
+		EA_CONSTEXPR size_type find_first_of(const T* s, size_type pos = 0) const
+		{
+			return find_first_of(s, pos, (size_type)CharStrlen(s));
+		}
+
+		EA_CONSTEXPR size_type find_last_of(basic_string_view sw, size_type pos = npos) const EA_NOEXCEPT
+		{
+			return find_last_of(sw.mpBegin, pos, sw.mnCount);
+		}
+
+		EA_CONSTEXPR size_type find_last_of(T c, size_type pos = npos) const EA_NOEXCEPT { return rfind(c, pos); }
+
+		EA_CPP14_CONSTEXPR size_type find_last_of(const T* s, size_type pos, size_type n) const
+		{
+			// If n is zero or position is >= size, we return npos.
+			if (EASTL_LIKELY(mnCount))
+			{
+				const value_type* const pEnd = mpBegin + eastl::min_alt(mnCount - 1, pos) + 1;
+				const value_type* const pResult = CharTypeStringRFindFirstOf(pEnd, mpBegin, s, s + n);
+
+				if (pResult != mpBegin)
+					return (size_type)((pResult - 1) - mpBegin);
+			}
+			return npos;
+		}
+
+		EA_CONSTEXPR size_type find_last_of(const T* s, size_type pos = npos) const
+		{
+			return find_last_of(s, pos, (size_type)CharStrlen(s));
+		}
+
+		EA_CONSTEXPR size_type find_first_not_of(basic_string_view sw, size_type pos = 0) const EA_NOEXCEPT
+		{
+			return find_first_not_of(sw.mpBegin, pos, sw.mnCount);
+		}
+
+		EA_CPP14_CONSTEXPR size_type find_first_not_of(T c, size_type pos = 0) const EA_NOEXCEPT
+		{
+			if (EASTL_LIKELY(pos <= mnCount))
+			{
+				const auto pEnd = mpBegin + mnCount;
+				// Todo: Possibly make a specialized version of CharTypeStringFindFirstNotOf(pBegin, pEnd, c).
+				const const_iterator pResult = CharTypeStringFindFirstNotOf(mpBegin + pos, pEnd, &c, &c + 1);
+
+				if (pResult != pEnd)
+					return (size_type)(pResult - mpBegin);
+			}
+			return npos;
+		}
+
+		EA_CPP14_CONSTEXPR size_type find_first_not_of(const T* s, size_type pos, size_type n) const
+		{
+			if (EASTL_LIKELY(pos <= mnCount))
+			{
+				const auto pEnd = mpBegin + mnCount;
+				const const_iterator pResult = CharTypeStringFindFirstNotOf(mpBegin + pos, pEnd, s, s + n);
+
+				if (pResult != pEnd)
+					return (size_type)(pResult - mpBegin);
+			}
+			return npos;
+		}
+
+		EA_CONSTEXPR size_type find_first_not_of(const T* s, size_type pos = 0) const
+		{
+			return find_first_not_of(s, pos, (size_type)CharStrlen(s));
+		}
+
+		EA_CONSTEXPR size_type find_last_not_of(basic_string_view sw, size_type pos = npos) const EA_NOEXCEPT
+		{
+			return find_last_not_of(sw.mpBegin, pos, sw.mnCount);
+		}
+
+		EA_CPP14_CONSTEXPR size_type find_last_not_of(T c, size_type pos = npos) const EA_NOEXCEPT
+		{
+			if (EASTL_LIKELY(mnCount))
+			{
+				// Todo: Possibly make a specialized version of CharTypeStringRFindFirstNotOf(pBegin, pEnd, c).
+				const value_type* const pEnd = mpBegin + eastl::min_alt(mnCount - 1, pos) + 1;
+				const value_type* const pResult = CharTypeStringRFindFirstNotOf(pEnd, mpBegin, &c, &c + 1);
+
+				if (pResult != mpBegin)
+					return (size_type)((pResult - 1) - mpBegin);
+			}
+			return npos;
+		}
+
+		EA_CPP14_CONSTEXPR size_type find_last_not_of(const T* s, size_type pos, size_type n) const
+		{
+			if (EASTL_LIKELY(mnCount))
+			{
+				const value_type* const pEnd = mpBegin + eastl::min_alt(mnCount - 1, pos) + 1;
+				const value_type* const pResult = CharTypeStringRFindFirstNotOf(pEnd, mpBegin, s, s + n);
+
+				if (pResult != mpBegin)
+					return (size_type)((pResult - 1) - mpBegin);
+			}
+			return npos;
+		}
+
+		EA_CONSTEXPR size_type find_last_not_of(const T* s, size_type pos = npos) const
+		{
+			return find_last_not_of(s, pos, (size_type)CharStrlen(s));
+		}
+
+		// starts_with
+		EA_CONSTEXPR bool starts_with(basic_string_view x) const EA_NOEXCEPT
+		{
+			return (size() >= x.size()) && (compare(0, x.size(), x) == 0);
+		}
+
+		EA_CONSTEXPR bool starts_with(T x) const EA_NOEXCEPT
+		{
+			return starts_with(basic_string_view(&x, 1));
+		}
+
+		EA_CONSTEXPR bool starts_with(const T* s) const
+		{
+			return starts_with(basic_string_view(s));
+		}
+
+		// ends_with
+		EA_CONSTEXPR bool ends_with(basic_string_view x) const EA_NOEXCEPT
+		{
+			return (size() >= x.size()) && (compare(size() - x.size(), npos, x) == 0);
+		}
+
+		EA_CONSTEXPR bool ends_with(T x) const EA_NOEXCEPT
+		{
+			return ends_with(basic_string_view(&x, 1));
+		}
+
+		EA_CONSTEXPR bool ends_with(const T* s) const
+		{
+			return ends_with(basic_string_view(s));
+		}
+	};
+
+
+	// global operators
+
+	template <class CharT>
+	inline EA_CONSTEXPR bool operator==(basic_string_view<CharT> lhs, basic_string_view<CharT> rhs) EA_NOEXCEPT
+	{
+		return (lhs.size() == rhs.size()) && (lhs.compare(rhs) == 0);
+	}
+
+	template <class CharT>
+	inline EA_CONSTEXPR bool operator==(decay_t<basic_string_view<CharT>> lhs, basic_string_view<CharT> rhs) EA_NOEXCEPT
+	{
+		return (lhs.size() == rhs.size()) && (lhs.compare(rhs) == 0);
+	}
+
+	template <class CharT>
+	inline EA_CONSTEXPR bool operator==(basic_string_view<CharT> lhs, decay_t<basic_string_view<CharT>> rhs) EA_NOEXCEPT
+	{
+		return (lhs.size() == rhs.size()) && (lhs.compare(rhs) == 0);
+	}
+
+	template <class CharT>
+	inline EA_CONSTEXPR bool operator==(decay_t<basic_string_view<CharT>> lhs, decay_t<basic_string_view<CharT>> rhs) EA_NOEXCEPT
+	{
+		return (lhs.size() == rhs.size()) && (lhs.compare(rhs) == 0);
+	}
+
+
+
+
+	template <class CharT>
+	inline EA_CONSTEXPR bool operator!=(basic_string_view<CharT> lhs, basic_string_view<CharT> rhs) EA_NOEXCEPT
+	{
+		return !(lhs == rhs);
+	}
+
+	template <class CharT>
+	inline EA_CONSTEXPR bool operator<(basic_string_view<CharT> lhs, basic_string_view<CharT> rhs) EA_NOEXCEPT
+	{
+		return lhs.compare(rhs) < 0;
+	}
+
+	template <class CharT>
+	inline EA_CONSTEXPR bool operator<=(basic_string_view<CharT> lhs, basic_string_view<CharT> rhs) EA_NOEXCEPT
+	{
+		return !(rhs < lhs);
+	}
+
+	template <class CharT>
+	inline EA_CONSTEXPR bool operator>(basic_string_view<CharT> lhs, basic_string_view<CharT> rhs) EA_NOEXCEPT
+	{
+		return rhs < lhs;
+	}
+
+	template <class CharT>
+	inline EA_CONSTEXPR bool operator>=(basic_string_view<CharT> lhs, basic_string_view<CharT> rhs) EA_NOEXCEPT
+	{
+		return !(lhs < rhs);
+	}
+
+	// string_view / wstring_view 
+	typedef basic_string_view<char>    string_view;
+	typedef basic_string_view<wchar_t> wstring_view;
+
+	// C++17 string types
+	typedef basic_string_view<char8_t>  u8string_view;  // C++20 feature, but always present for consistency.
+	typedef basic_string_view<char16_t> u16string_view;
+	typedef basic_string_view<char32_t> u32string_view;
+
+
+	/// hash<string_view>
+	///
+	/// We provide EASTL hash function objects for use in hash table containers.
+	///
+	/// Example usage:
+	///    #include <EASTL/hash_set.h>
+	///    hash_set<string_view> stringHashSet;
+	///
+	template <typename T> struct hash;
+
+	template<> struct hash<string_view>
+	{
+		size_t operator()(const string_view& x) const
+		{
+			string_view::const_iterator p = x.cbegin();
+			string_view::const_iterator end = x.cend();
+			uint32_t result = 2166136261U; // We implement an FNV-like string hash.
+			while (p != end)
+				result = (result * 16777619) ^ (uint8_t)*p++;
+			return (size_t)result;
+		}
+	};
+
+	#if defined(EA_CHAR8_UNIQUE) && EA_CHAR8_UNIQUE
+		template<> struct hash<u8string_view>
+		{
+			size_t operator()(const u8string_view& x) const
+			{
+				u8string_view::const_iterator p = x.cbegin();
+				u8string_view::const_iterator end = x.cend();
+				uint32_t result = 2166136261U;
+				while (p != end)
+					result = (result * 16777619) ^ (uint8_t)*p++;
+				return (size_t)result;
+			}
+		};
+	#endif
+
+	template<> struct hash<u16string_view>
+	{
+		size_t operator()(const u16string_view& x) const
+		{
+			u16string_view::const_iterator p = x.cbegin();
+			u16string_view::const_iterator end = x.cend();
+			uint32_t result = 2166136261U;
+			while (p != end)
+				result = (result * 16777619) ^ (uint16_t)*p++;
+			return (size_t)result;
+		}
+	};
+
+	template<> struct hash<u32string_view>
+	{
+		size_t operator()(const u32string_view& x) const
+		{
+			u32string_view::const_iterator p = x.cbegin();
+			u32string_view::const_iterator end = x.cend();
+			uint32_t result = 2166136261U;
+			while (p != end)
+				result = (result * 16777619) ^ (uint32_t)*p++;
+			return (size_t)result;
+		}
+	};
+
+	#if defined(EA_WCHAR_UNIQUE) && EA_WCHAR_UNIQUE
+		template<> struct hash<wstring_view>
+		{
+			size_t operator()(const wstring_view& x) const
+			{
+				wstring_view::const_iterator p = x.cbegin();
+				wstring_view::const_iterator end = x.cend();
+				uint32_t result = 2166136261U;
+				while (p != end)
+					result = (result * 16777619) ^ (uint32_t)*p++;
+				return (size_t)result;
+			}
+		};
+	#endif
+
+
+	#if EASTL_USER_LITERALS_ENABLED && EASTL_INLINE_NAMESPACES_ENABLED
+		EA_DISABLE_VC_WARNING(4455) // disable warning C4455: literal suffix identifiers that do not start with an underscore are reserved
+	    inline namespace literals
+	    {
+		    inline namespace string_view_literals
+		    {
+			    EA_CONSTEXPR inline string_view operator "" sv(const char* str, size_t len) EA_NOEXCEPT { return {str, len}; }
+			    EA_CONSTEXPR inline u16string_view operator "" sv(const char16_t* str, size_t len) EA_NOEXCEPT { return {str, len}; }
+			    EA_CONSTEXPR inline u32string_view operator "" sv(const char32_t* str, size_t len) EA_NOEXCEPT { return {str, len}; }
+			    EA_CONSTEXPR inline wstring_view operator "" sv(const wchar_t* str, size_t len) EA_NOEXCEPT { return {str, len}; }
+
+				// Backwards compatibility.
+			    EA_CONSTEXPR inline string_view operator "" _sv(const char* str, size_t len) EA_NOEXCEPT { return {str, len}; }
+			    EA_CONSTEXPR inline u16string_view operator "" _sv(const char16_t* str, size_t len) EA_NOEXCEPT { return {str, len}; }
+			    EA_CONSTEXPR inline u32string_view operator "" _sv(const char32_t* str, size_t len) EA_NOEXCEPT { return {str, len}; }
+			    EA_CONSTEXPR inline wstring_view operator "" _sv(const wchar_t* str, size_t len) EA_NOEXCEPT { return {str, len}; }
+
+				// C++20 char8_t support.
+				#if EA_CHAR8_UNIQUE
+					EA_CONSTEXPR inline u8string_view operator "" sv(const char8_t* str, size_t len) EA_NOEXCEPT { return {str, len}; }
+					EA_CONSTEXPR inline u8string_view operator "" _sv(const char8_t* str, size_t len) EA_NOEXCEPT { return {str, len}; }
+				#endif
+		    }
+	    }
+		EA_RESTORE_VC_WARNING() // warning: 4455
+	#endif
+
+} // namespace eastl
+
+EA_RESTORE_VC_WARNING()
+#endif // EASTL_STRING_VIEW_H
diff --git a/libkram/eastl/include/EASTL/tuple.h b/libkram/eastl/include/EASTL/tuple.h
new file mode 100644
index 00000000..9d27bffc
--- /dev/null
+++ b/libkram/eastl/include/EASTL/tuple.h
@@ -0,0 +1,1006 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_TUPLE_H
+#define EASTL_TUPLE_H
+
+#include <EASTL/internal/config.h>
+#include <EASTL/functional.h>
+#include <EASTL/type_traits.h>
+#include <EASTL/utility.h>
+
+#include <EASTL/internal/tuple_fwd_decls.h>
+
+EA_DISABLE_VC_WARNING(4623) // warning C4623: default constructor was implicitly defined as deleted
+EA_DISABLE_VC_WARNING(4625) // warning C4625: copy constructor was implicitly defined as deleted
+EA_DISABLE_VC_WARNING(4510) // warning C4510: default constructor could not be generated
+
+#if EASTL_TUPLE_ENABLED
+
+namespace eastl
+{
+// non-recursive tuple implementation based on libc++ tuple implementation and description at
+// http://mitchnull.blogspot.ca/2012/06/c11-tuple-implementation-details-part-1.html
+
+// TupleTypes helper
+template <typename... Ts> struct TupleTypes {};
+
+// tuple_size helper
+template <typename T> class tuple_size {};
+template <typename T> class tuple_size<const T>          : public tuple_size<T> {};
+template <typename T> class tuple_size<volatile T>       : public tuple_size<T> {};
+template <typename T> class tuple_size<const volatile T> : public tuple_size<T> {};
+
+template <typename... Ts> class tuple_size<TupleTypes<Ts...>> : public integral_constant<size_t, sizeof...(Ts)> {};
+template <typename... Ts> class tuple_size<tuple<Ts...>>      : public integral_constant<size_t, sizeof...(Ts)> {};
+
+#if EASTL_VARIABLE_TEMPLATES_ENABLED
+	template <class T>
+	EA_CONSTEXPR size_t tuple_size_v = tuple_size<T>::value;
+#endif
+
+namespace Internal
+{
+	template <typename TupleIndices, typename... Ts>
+	struct TupleImpl;
+} // namespace Internal
+
+template <typename Indices, typename... Ts>
+class tuple_size<Internal::TupleImpl<Indices, Ts...>> : public integral_constant<size_t, sizeof...(Ts)>
+{
+};
+
+// tuple_element helper to be able to isolate a type given an index
+template <size_t I, typename T>
+class tuple_element
+{
+};
+
+template <size_t I>
+class tuple_element<I, TupleTypes<>>
+{
+public:
+	static_assert(I != I, "tuple_element index out of range");
+};
+
+template <typename H, typename... Ts>
+class tuple_element<0, TupleTypes<H, Ts...>>
+{
+public:
+	typedef H type;
+};
+
+template <size_t I, typename H, typename... Ts>
+class tuple_element<I, TupleTypes<H, Ts...>>
+{
+public:
+	typedef tuple_element_t<I - 1, TupleTypes<Ts...>> type;
+};
+
+// specialization for tuple
+template <size_t I, typename... Ts>
+class tuple_element<I, tuple<Ts...>>
+{
+public:
+	typedef tuple_element_t<I, TupleTypes<Ts...>> type;
+};
+
+template <size_t I, typename... Ts>
+class tuple_element<I, const tuple<Ts...>>
+{
+public:
+	typedef typename add_const<tuple_element_t<I, TupleTypes<Ts...>>>::type type;
+};
+
+template <size_t I, typename... Ts>
+class tuple_element<I, volatile tuple<Ts...>>
+{
+public:
+	typedef typename add_volatile<tuple_element_t<I, TupleTypes<Ts...>>>::type type;
+};
+
+template <size_t I, typename... Ts>
+class tuple_element<I, const volatile tuple<Ts...>>
+{
+public:
+	typedef typename add_cv<tuple_element_t<I, TupleTypes<Ts...>>>::type type;
+};
+
+// specialization for TupleImpl
+template <size_t I, typename Indices, typename... Ts>
+class tuple_element<I, Internal::TupleImpl<Indices, Ts...>> : public tuple_element<I, tuple<Ts...>>
+{
+};
+
+template <size_t I, typename Indices, typename... Ts>
+class tuple_element<I, const Internal::TupleImpl<Indices, Ts...>> : public tuple_element<I, const tuple<Ts...>>
+{
+};
+
+template <size_t I, typename Indices, typename... Ts>
+class tuple_element<I, volatile Internal::TupleImpl<Indices, Ts...>> : public tuple_element<I, volatile tuple<Ts...>>
+{
+};
+
+template <size_t I, typename Indices, typename... Ts>
+class tuple_element<I, const volatile Internal::TupleImpl<Indices, Ts...>> : public tuple_element<
+																				 I, const volatile tuple<Ts...>>
+{
+};
+
+// attempt to isolate index given a type
+template <typename T, typename Tuple>
+struct tuple_index
+{
+};
+
+template <typename T>
+struct tuple_index<T, TupleTypes<>>
+{
+	typedef void DuplicateTypeCheck;
+	tuple_index() = delete; // tuple_index should only be used for compile-time assistance, and never be instantiated
+	static const size_t index = 0;
+};
+
+template <typename T, typename... TsRest>
+struct tuple_index<T, TupleTypes<T, TsRest...>>
+{
+	typedef int DuplicateTypeCheck;
+	// after finding type T in the list of types, try to find type T in TsRest.
+	// If we stumble back into this version of tuple_index, i.e. type T appears twice in the list of types, then DuplicateTypeCheck will be of type int, and the static_assert will fail.
+	// If we don't, then we'll go through the version of tuple_index above, where all of the types have been exhausted, and DuplicateTypeCheck will be void.
+	static_assert(is_void<typename tuple_index<T, TupleTypes<TsRest...>>::DuplicateTypeCheck>::value, "duplicate type T in tuple_vector::get<T>(); unique types must be provided in declaration, or only use get<size_t>()");
+
+	static const size_t index = 0;
+};
+
+template <typename T, typename TsHead, typename... TsRest>
+struct tuple_index<T, TupleTypes<TsHead, TsRest...>>
+{
+	typedef typename tuple_index<T, TupleTypes<TsRest...>>::DuplicateTypeCheck DuplicateTypeCheck;
+	static const size_t index = tuple_index<T, TupleTypes<TsRest...>>::index + 1;
+};
+
+template <typename T, typename Indices, typename... Ts>
+struct tuple_index<T, Internal::TupleImpl<Indices, Ts...>> : public tuple_index<T, TupleTypes<Ts...>>
+{
+};
+
+
+namespace Internal
+{
+	// swallow
+	//
+	// Provides a vessel to expand variadic packs.
+	//
+	template <typename... Ts>
+	void swallow(Ts&&...) {}
+
+
+	// TupleLeaf
+	//
+	template <size_t I, typename ValueType, bool IsEmpty = is_empty_v<ValueType>>
+	class TupleLeaf;
+
+	template <size_t I, typename ValueType, bool IsEmpty>
+	inline void swap(TupleLeaf<I, ValueType, IsEmpty>& a, TupleLeaf<I, ValueType, IsEmpty>& b)
+	{
+		eastl::swap(a.getInternal(), b.getInternal());
+	}
+
+	template <size_t I, typename ValueType, bool IsEmpty>
+	class TupleLeaf
+	{
+	public:
+		TupleLeaf() : mValue() {}
+		TupleLeaf(const TupleLeaf&) = default;
+		TupleLeaf& operator=(const TupleLeaf&) = delete;
+
+		// We shouldn't need this explicit constructor as it should be handled by the template below but OSX clang
+		// is_constructible type trait incorrectly gives false for is_constructible<T&&, T&&>::value
+		explicit TupleLeaf(ValueType&& v) : mValue(eastl::move(v)) {}
+
+		template <typename T, typename = typename enable_if<is_constructible<ValueType, T&&>::value>::type>
+		explicit TupleLeaf(T&& t)
+			: mValue(eastl::forward<T>(t))
+		{
+		}
+
+		template <typename T>
+		explicit TupleLeaf(const TupleLeaf<I, T>& t)
+			: mValue(t.getInternal())
+		{
+		}
+
+		template <typename T>
+		TupleLeaf& operator=(T&& t)
+		{
+			mValue = eastl::forward<T>(t);
+			return *this;
+		}
+
+		int swap(TupleLeaf& t)
+		{
+			eastl::Internal::swap(*this, t);
+			return 0;
+		}
+
+		ValueType& getInternal() { return mValue; }
+		const ValueType& getInternal() const { return mValue; }
+
+	private:
+		ValueType mValue;  
+	};
+
+	// TupleLeaf: Specialize for when ValueType is a reference 
+	template <size_t I, typename ValueType, bool IsEmpty>
+	class TupleLeaf<I, ValueType&, IsEmpty>
+	{
+	public:
+		TupleLeaf(const TupleLeaf&) = default;
+		TupleLeaf& operator=(const TupleLeaf&) = delete;
+
+		template <typename T, typename = typename enable_if<is_constructible<ValueType, T&&>::value>::type>
+		explicit TupleLeaf(T&& t)
+			: mValue(eastl::forward<T>(t))
+		{
+		}
+
+		explicit TupleLeaf(ValueType& t) : mValue(t)
+		{
+		}
+
+		template <typename T>
+		explicit TupleLeaf(const TupleLeaf<I, T>& t)
+			: mValue(t.getInternal())
+		{
+		}
+
+		template <typename T>
+		TupleLeaf& operator=(T&& t)
+		{
+			mValue = eastl::forward<T>(t);
+			return *this;
+		}
+
+		int swap(TupleLeaf& t)
+		{
+			eastl::Internal::swap(*this, t);
+			return 0;
+		}
+
+		ValueType& getInternal() { return mValue; }
+		const ValueType& getInternal() const { return mValue; }
+
+	private:
+		ValueType& mValue;
+	};
+
+	// TupleLeaf: partial specialization for when we can use the Empty Base Class Optimization
+	template <size_t I, typename ValueType>
+	class TupleLeaf<I, ValueType, true> : private ValueType
+	{
+	public:
+		// true_type / false_type constructors for case where ValueType is default constructible and should be value
+		// initialized and case where it is not
+		TupleLeaf(const TupleLeaf&) = default;
+
+		template <typename T, typename = typename enable_if<is_constructible<ValueType, T&&>::value>::type>
+		explicit TupleLeaf(T&& t)
+			: ValueType(eastl::forward<T>(t))
+		{
+		}
+
+		template <typename T>
+		explicit TupleLeaf(const TupleLeaf<I, T>& t)
+			: ValueType(t.getInternal())
+		{
+		}
+
+		template <typename T>
+		TupleLeaf& operator=(T&& t)
+		{
+			ValueType::operator=(eastl::forward<T>(t));
+			return *this;
+		}
+
+		int swap(TupleLeaf& t)
+		{
+			eastl::Internal::swap(*this, t);
+			return 0;
+		}
+
+		ValueType& getInternal() { return static_cast<ValueType&>(*this); }
+		const ValueType& getInternal() const { return static_cast<const ValueType&>(*this); }
+
+	private:
+		TupleLeaf& operator=(const TupleLeaf&) = delete;
+	};
+
+
+
+	// MakeTupleTypes
+	//
+	//
+	template <typename TupleTypes, typename Tuple, size_t Start, size_t End>
+	struct MakeTupleTypesImpl;
+
+	template <typename... Types, typename Tuple, size_t Start, size_t End>
+	struct MakeTupleTypesImpl<TupleTypes<Types...>, Tuple, Start, End>
+	{
+		typedef typename remove_reference<Tuple>::type TupleType;
+		typedef typename MakeTupleTypesImpl<
+			TupleTypes<Types..., typename conditional<is_lvalue_reference<Tuple>::value,
+													  // append ref if Tuple is ref
+													  tuple_element_t<Start, TupleType>&,
+													  // append non-ref otherwise
+													  tuple_element_t<Start, TupleType>>::type>,
+			Tuple, Start + 1, End>::type type;
+	};
+
+	template <typename... Types, typename Tuple, size_t End>
+	struct MakeTupleTypesImpl<TupleTypes<Types...>, Tuple, End, End>
+	{
+		typedef TupleTypes<Types...> type;
+	};
+
+	template <typename Tuple>
+	using MakeTupleTypes_t = typename MakeTupleTypesImpl<TupleTypes<>, Tuple, 0,
+														 tuple_size<typename remove_reference<Tuple>::type>::value>::type;
+
+
+	// TupleImpl
+	//
+	//
+	template <size_t I, typename Indices, typename... Ts>
+	tuple_element_t<I, TupleImpl<Indices, Ts...>>& get(TupleImpl<Indices, Ts...>& t);
+
+	template <size_t I, typename Indices, typename... Ts>
+	const_tuple_element_t<I, TupleImpl<Indices, Ts...>>& get(const TupleImpl<Indices, Ts...>& t);
+
+	template <size_t I, typename Indices, typename... Ts>
+	tuple_element_t<I, TupleImpl<Indices, Ts...>>&& get(TupleImpl<Indices, Ts...>&& t);
+
+	template <typename T, typename Indices, typename... Ts>
+	T& get(TupleImpl<Indices, Ts...>& t);
+
+	template <typename T, typename Indices, typename... Ts>
+	const T& get(const TupleImpl<Indices, Ts...>& t);
+
+	template <typename T, typename Indices, typename... Ts>
+	T&& get(TupleImpl<Indices, Ts...>&& t);
+
+	template <size_t... Indices, typename... Ts>
+	struct TupleImpl<integer_sequence<size_t, Indices...>, Ts...> : public TupleLeaf<Indices, Ts>...
+	{
+		EA_CONSTEXPR TupleImpl() = default;
+
+		// index_sequence changed to integer_sequence due to issues described below in VS2015 CTP 6. 
+		// https://connect.microsoft.com/VisualStudio/feedback/details/1126958/error-in-template-parameter-pack-expansion-of-std-index-sequence
+		// 
+		template <typename... Us, typename... ValueTypes>
+		explicit TupleImpl(integer_sequence<size_t, Indices...>, TupleTypes<Us...>, ValueTypes&&... values)
+			: TupleLeaf<Indices, Ts>(eastl::forward<ValueTypes>(values))...
+		{
+		}
+
+		template <typename OtherTuple>
+		TupleImpl(OtherTuple&& t)
+			: TupleLeaf<Indices, Ts>(eastl::forward<tuple_element_t<Indices, MakeTupleTypes_t<OtherTuple>>>(get<Indices>(t)))...
+		{
+		}
+
+		template <typename OtherTuple>
+		TupleImpl& operator=(OtherTuple&& t)
+		{
+			swallow(TupleLeaf<Indices, Ts>::operator=(
+						eastl::forward<tuple_element_t<Indices, MakeTupleTypes_t<OtherTuple>>>(get<Indices>(t)))...);
+			return *this;
+		}
+
+		TupleImpl& operator=(const TupleImpl& t)
+		{
+			swallow(TupleLeaf<Indices, Ts>::operator=(static_cast<const TupleLeaf<Indices, Ts>&>(t).getInternal())...);
+			return *this;
+		}
+
+		void swap(TupleImpl& t) { swallow(TupleLeaf<Indices, Ts>::swap(static_cast<TupleLeaf<Indices, Ts>&>(t))...); }
+	};
+
+	template <size_t I, typename Indices, typename... Ts>
+	inline tuple_element_t<I, TupleImpl<Indices, Ts...>>& get(TupleImpl<Indices, Ts...>& t)
+	{
+		typedef tuple_element_t<I, TupleImpl<Indices, Ts...>> Type;
+		return static_cast<Internal::TupleLeaf<I, Type>&>(t).getInternal();
+	}
+
+	template <size_t I, typename Indices, typename... Ts>
+	inline const_tuple_element_t<I, TupleImpl<Indices, Ts...>>& get(const TupleImpl<Indices, Ts...>& t)
+	{
+		typedef tuple_element_t<I, TupleImpl<Indices, Ts...>> Type;
+		return static_cast<const Internal::TupleLeaf<I, Type>&>(t).getInternal();
+	}
+
+	template <size_t I, typename Indices, typename... Ts>
+	inline tuple_element_t<I, TupleImpl<Indices, Ts...>>&& get(TupleImpl<Indices, Ts...>&& t)
+	{
+		typedef tuple_element_t<I, TupleImpl<Indices, Ts...>> Type;
+		return static_cast<Type&&>(static_cast<Internal::TupleLeaf<I, Type>&>(t).getInternal());
+	}
+
+	template <typename T, typename Indices, typename... Ts>
+	inline T& get(TupleImpl<Indices, Ts...>& t)
+	{
+		typedef tuple_index<T, TupleImpl<Indices, Ts...>> Index;
+		return static_cast<Internal::TupleLeaf<Index::index, T>&>(t).getInternal();
+	}
+
+	template <typename T, typename Indices, typename... Ts>
+	inline const T& get(const TupleImpl<Indices, Ts...>& t)
+	{
+		typedef tuple_index<T, TupleImpl<Indices, Ts...>> Index;
+		return static_cast<const Internal::TupleLeaf<Index::index, T>&>(t).getInternal();
+	}
+
+	template <typename T, typename Indices, typename... Ts>
+	inline T&& get(TupleImpl<Indices, Ts...>&& t)
+	{
+		typedef tuple_index<T, TupleImpl<Indices, Ts...>> Index;
+		return static_cast<T&&>(static_cast<Internal::TupleLeaf<Index::index, T>&>(t).getInternal());
+	}
+
+
+	// TupleLike
+	//
+	// type-trait that determines if a type is an eastl::tuple or an eastl::pair.
+	//
+	template <typename T> struct TupleLike                   : public false_type {};
+	template <typename T> struct TupleLike<const T>          : public TupleLike<T> {};
+	template <typename T> struct TupleLike<volatile T>       : public TupleLike<T> {};
+	template <typename T> struct TupleLike<const volatile T> : public TupleLike<T> {};
+
+	template <typename... Ts>
+	struct TupleLike<tuple<Ts...>> : public true_type {};
+
+	template <typename First, typename Second>
+	struct TupleLike<eastl::pair<First, Second>> : public true_type {};
+
+
+	// TupleConvertible
+	//
+	//
+	//
+	template <bool IsSameSize, typename From, typename To>
+	struct TupleConvertibleImpl : public false_type
+	{
+	};
+
+	template <typename... FromTypes, typename... ToTypes>
+	struct TupleConvertibleImpl<true, TupleTypes<FromTypes...>,	TupleTypes<ToTypes...>>
+		: public integral_constant<bool, conjunction<is_convertible<FromTypes, ToTypes>...>::value>
+	{
+	};
+
+	template <typename From, typename To,
+			  bool = TupleLike<typename remove_reference<From>::type>::value,
+			  bool = TupleLike<typename remove_reference<To>::type>::value>
+	struct TupleConvertible : public false_type
+	{
+	};
+
+	template <typename From, typename To>
+	struct TupleConvertible<From, To, true, true>
+		: public TupleConvertibleImpl<tuple_size<typename remove_reference<From>::type>::value ==
+				tuple_size<typename remove_reference<To>::type>::value,
+				MakeTupleTypes_t<From>, MakeTupleTypes_t<To>>
+	{
+	};
+
+
+	// TupleAssignable
+	//
+	//
+	//
+	template <bool IsSameSize, typename Target, typename From>
+	struct TupleAssignableImpl : public false_type
+	{
+	};
+
+	template <typename... TargetTypes, typename... FromTypes>
+	struct TupleAssignableImpl<true, TupleTypes<TargetTypes...>, TupleTypes<FromTypes...>>
+		: public bool_constant<conjunction<is_assignable<TargetTypes, FromTypes>...>::value>
+	{
+	};
+
+	template <typename Target, typename From,
+			  bool = TupleLike<typename remove_reference<Target>::type>::value,
+			  bool = TupleLike<typename remove_reference<From>::type>::value>
+	struct TupleAssignable : public false_type
+	{
+	};
+
+	template <typename Target, typename From>
+	struct TupleAssignable<Target, From, true, true>
+		: public TupleAssignableImpl<
+			tuple_size<typename remove_reference<Target>::type>::value ==
+			tuple_size<typename remove_reference<From>::type>::value,
+			MakeTupleTypes_t<Target>, MakeTupleTypes_t<From>>
+	{
+	};
+
+
+	// TupleImplicitlyConvertible and TupleExplicitlyConvertible 
+	//
+	// helpers for constraining conditionally-explicit ctors
+	//
+	template <bool IsSameSize, typename TargetType, typename... FromTypes>
+	struct TupleImplicitlyConvertibleImpl : public false_type
+	{
+	};
+
+
+	template <typename... TargetTypes, typename... FromTypes>
+	struct TupleImplicitlyConvertibleImpl<true, TupleTypes<TargetTypes...>, FromTypes...>
+		: public conjunction<
+		is_constructible<TargetTypes, FromTypes>...,
+		is_convertible<FromTypes, TargetTypes>...>
+	{
+	};
+
+	template <typename TargetTupleType, typename... FromTypes>
+	struct TupleImplicitlyConvertible
+		: public TupleImplicitlyConvertibleImpl<
+		tuple_size<TargetTupleType>::value == sizeof...(FromTypes),
+		MakeTupleTypes_t<TargetTupleType>, FromTypes...>::type
+	{
+	};
+
+	template<typename TargetTupleType, typename... FromTypes>
+	using TupleImplicitlyConvertible_t = enable_if_t<TupleImplicitlyConvertible<TargetTupleType, FromTypes...>::value, bool>;
+
+	template <bool IsSameSize, typename TargetType, typename... FromTypes>
+	struct TupleExplicitlyConvertibleImpl : public false_type
+	{
+	};
+
+	template <typename... TargetTypes, typename... FromTypes>
+	struct TupleExplicitlyConvertibleImpl<true, TupleTypes<TargetTypes...>, FromTypes...>
+		: public conjunction<
+			is_constructible<TargetTypes, FromTypes>...,
+			negation<conjunction<is_convertible<FromTypes, TargetTypes>...>>>
+	{
+	};
+
+	template <typename TargetTupleType, typename... FromTypes>
+	struct TupleExplicitlyConvertible
+		: public TupleExplicitlyConvertibleImpl<
+		tuple_size<TargetTupleType>::value == sizeof...(FromTypes),
+		MakeTupleTypes_t<TargetTupleType>, FromTypes...>::type
+	{
+	};
+
+	template<typename TargetTupleType, typename... FromTypes>
+	using TupleExplicitlyConvertible_t = enable_if_t<TupleExplicitlyConvertible<TargetTupleType, FromTypes...>::value, bool>;
+
+
+	// TupleEqual
+	//
+	//
+	//
+	template <size_t I>
+	struct TupleEqual
+	{
+		template <typename Tuple1, typename Tuple2>
+		bool operator()(const Tuple1& t1, const Tuple2& t2)
+		{
+			static_assert(tuple_size<Tuple1>::value == tuple_size<Tuple2>::value, "comparing tuples of different sizes.");
+			return TupleEqual<I - 1>()(t1, t2) && get<I - 1>(t1) == get<I - 1>(t2);
+		}
+	};
+
+	template <>
+	struct TupleEqual<0>
+	{
+		template <typename Tuple1, typename Tuple2>
+		bool operator()(const Tuple1&, const Tuple2&)
+		{
+			return true;
+		}
+	};
+
+
+	// TupleLess
+	//
+	//
+	//
+	template <size_t I>
+	struct TupleLess
+	{
+		template <typename Tuple1, typename Tuple2>
+		bool operator()(const Tuple1& t1, const Tuple2& t2)
+		{
+			static_assert(tuple_size<Tuple1>::value == tuple_size<Tuple2>::value, "comparing tuples of different sizes.");
+			return TupleLess<I - 1>()(t1, t2) || (!TupleLess<I - 1>()(t2, t1) && get<I - 1>(t1) < get<I - 1>(t2));
+		}
+	};
+
+	template <>
+	struct TupleLess<0>
+	{
+		template <typename Tuple1, typename Tuple2>
+		bool operator()(const Tuple1&, const Tuple2&)
+		{
+			return false;
+		}
+	};
+
+
+	// MakeTupleReturnImpl
+	//
+	//
+	//
+	template <typename T> struct MakeTupleReturnImpl                       { typedef T type; };
+	template <typename T> struct MakeTupleReturnImpl<reference_wrapper<T>> { typedef T& type; };
+
+	template <typename T>
+	using MakeTupleReturn_t = typename MakeTupleReturnImpl<decay_t<T>>::type;
+
+
+	// tuple_cat helpers
+	//
+	//
+	//
+
+	// TupleCat2Impl
+	template <typename Tuple1, typename Is1, typename Tuple2, typename Is2>
+	struct TupleCat2Impl;
+
+	template <typename... T1s, size_t... I1s, typename... T2s, size_t... I2s>
+	struct TupleCat2Impl<tuple<T1s...>, index_sequence<I1s...>, tuple<T2s...>, index_sequence<I2s...>>
+	{
+		using ResultType = tuple<T1s..., T2s...>;
+
+		template <typename Tuple1, typename Tuple2>
+		static inline ResultType DoCat2(Tuple1&& t1, Tuple2&& t2)
+		{
+			return ResultType(get<I1s>(eastl::forward<Tuple1>(t1))..., get<I2s>(eastl::forward<Tuple2>(t2))...);
+		}
+	};
+
+	// TupleCat2
+	template <typename Tuple1, typename Tuple2>
+	struct TupleCat2;
+
+	template <typename... T1s, typename... T2s>
+	struct TupleCat2<tuple<T1s...>, tuple<T2s...>>
+	{
+		using Is1        = make_index_sequence<sizeof...(T1s)>;
+		using Is2        = make_index_sequence<sizeof...(T2s)>;
+		using TCI        = TupleCat2Impl<tuple<T1s...>, Is1, tuple<T2s...>, Is2>;
+		using ResultType = typename TCI::ResultType;
+
+		template <typename Tuple1, typename Tuple2>
+		static inline ResultType DoCat2(Tuple1&& t1, Tuple2&& t2)
+		{
+			return TCI::DoCat2(eastl::forward<Tuple1>(t1), eastl::forward<Tuple2>(t2));
+		}
+	};
+
+	// TupleCat
+	template <typename... Tuples>
+	struct TupleCat;
+
+	template <typename Tuple1, typename Tuple2, typename... TuplesRest>
+	struct TupleCat<Tuple1, Tuple2, TuplesRest...>
+	{
+		using FirstResultType = typename TupleCat2<Tuple1, Tuple2>::ResultType;
+		using ResultType      = typename TupleCat<FirstResultType, TuplesRest...>::ResultType;
+
+		template <typename TupleArg1, typename TupleArg2, typename... TupleArgsRest>
+		static inline ResultType DoCat(TupleArg1&& t1, TupleArg2&& t2, TupleArgsRest&&... ts)
+		{
+			return TupleCat<FirstResultType, TuplesRest...>::DoCat(
+				TupleCat2<TupleArg1, TupleArg2>::DoCat2(eastl::forward<TupleArg1>(t1), eastl::forward<TupleArg2>(t2)),
+				eastl::forward<TupleArgsRest>(ts)...);
+		}
+	};
+
+	template <typename Tuple1, typename Tuple2>
+	struct TupleCat<Tuple1, Tuple2>
+	{
+		using TC2 = TupleCat2<Tuple1, remove_reference_t<Tuple2>>;
+		using ResultType = typename TC2::ResultType;
+
+		template <typename TupleArg1, typename TupleArg2>
+		static inline ResultType DoCat(TupleArg1&& t1, TupleArg2&& t2)
+		{
+			return TC2::DoCat2(eastl::forward<TupleArg1>(t1), eastl::forward<TupleArg2>(t2));
+		}
+	};
+}  // namespace Internal
+
+
+
+// tuple
+//
+// eastl::tuple is a fixed-size container of heterogeneous values. It is a
+// generalization of eastl::pair which hold only two heterogeneous values.
+//
+// https://en.cppreference.com/w/cpp/utility/tuple
+//
+template <typename... Ts>
+class tuple;
+
+template <typename T, typename... Ts>
+class tuple<T, Ts...>
+{
+public:
+	EA_CONSTEXPR tuple() = default;
+	
+	template <typename T2 = T, 
+		Internal::TupleImplicitlyConvertible_t<tuple, const T2&, const Ts&...> = 0>
+	EA_CONSTEXPR tuple(const T& t, const Ts&... ts)
+		: mImpl(make_index_sequence<sizeof...(Ts) + 1>{}, Internal::MakeTupleTypes_t<tuple>{}, t, ts...)
+	{
+	}
+
+	template <typename T2 = T, 
+		Internal::TupleExplicitlyConvertible_t<tuple, const T2&, const Ts&...> = 0>
+	explicit EA_CONSTEXPR tuple(const T& t, const Ts&... ts)
+		: mImpl(make_index_sequence<sizeof...(Ts) + 1>{}, Internal::MakeTupleTypes_t<tuple>{}, t, ts...)
+	{
+	}
+
+	template <typename U, typename... Us,
+		Internal::TupleImplicitlyConvertible_t<tuple, U, Us...> = 0>
+		EA_CONSTEXPR tuple(U&& u, Us&&... us)
+			: mImpl(make_index_sequence<sizeof...(Us) + 1>{}, Internal::MakeTupleTypes_t<tuple>{}, eastl::forward<U>(u),
+					eastl::forward<Us>(us)...)
+	{
+	}
+
+	template <typename U, typename... Us,
+		Internal::TupleExplicitlyConvertible_t<tuple, U, Us...> = 0>
+		explicit EA_CONSTEXPR tuple(U&& u, Us&&... us)
+			: mImpl(make_index_sequence<sizeof...(Us) + 1>{}, Internal::MakeTupleTypes_t<tuple>{}, eastl::forward<U>(u),
+					eastl::forward<Us>(us)...)
+	{
+	}
+
+	template <typename OtherTuple,
+			  typename enable_if<Internal::TupleConvertible<OtherTuple, tuple>::value, bool>::type = false>
+	tuple(OtherTuple&& t)
+		: mImpl(eastl::forward<OtherTuple>(t))
+	{
+	}
+
+	template <typename OtherTuple,
+			  typename enable_if<Internal::TupleAssignable<tuple, OtherTuple>::value, bool>::type = false>
+	tuple& operator=(OtherTuple&& t)
+	{
+		mImpl.operator=(eastl::forward<OtherTuple>(t));
+		return *this;
+	}
+
+	void swap(tuple& t) { mImpl.swap(t.mImpl); }
+
+private:
+	typedef Internal::TupleImpl<make_index_sequence<sizeof...(Ts) + 1>, T, Ts...> Impl;
+	Impl mImpl;
+
+	template <size_t I, typename... Ts_>
+	friend tuple_element_t<I, tuple<Ts_...>>& get(tuple<Ts_...>& t);
+
+	template <size_t I, typename... Ts_>
+	friend const_tuple_element_t<I, tuple<Ts_...>>& get(const tuple<Ts_...>& t);
+
+	template <size_t I, typename... Ts_>
+	friend tuple_element_t<I, tuple<Ts_...>>&& get(tuple<Ts_...>&& t);
+
+	template <typename T_, typename... ts_>
+	friend T_& get(tuple<ts_...>& t);
+
+	template <typename T_, typename... ts_>
+	friend const T_& get(const tuple<ts_...>& t);
+
+	template <typename T_, typename... ts_>
+	friend T_&& get(tuple<ts_...>&& t);
+};
+
+// template specialization for an empty tuple
+template <>
+class tuple<>
+{
+public:
+	void swap(tuple&) {}
+};
+
+template <size_t I, typename... Ts>
+inline tuple_element_t<I, tuple<Ts...>>& get(tuple<Ts...>& t)
+{
+	return get<I>(t.mImpl);
+}
+
+template <size_t I, typename... Ts>
+inline const_tuple_element_t<I, tuple<Ts...>>& get(const tuple<Ts...>& t)
+{
+	return get<I>(t.mImpl);
+}
+
+template <size_t I, typename... Ts>
+inline tuple_element_t<I, tuple<Ts...>>&& get(tuple<Ts...>&& t)
+{
+	return get<I>(eastl::move(t.mImpl));
+}
+
+template <typename T, typename... Ts>
+inline T& get(tuple<Ts...>& t)
+{
+	return get<T>(t.mImpl);
+}
+
+template <typename T, typename... Ts>
+inline const T& get(const tuple<Ts...>& t)
+{
+	return get<T>(t.mImpl);
+}
+
+template <typename T, typename... Ts>
+inline T&& get(tuple<Ts...>&& t)
+{
+	return get<T>(eastl::move(t.mImpl));
+}
+
+template <typename... Ts>
+inline void swap(tuple<Ts...>& a, tuple<Ts...>& b)
+{
+	a.swap(b);
+}
+
+
+// tuple operators
+//
+//
+template <typename... T1s, typename... T2s>
+inline bool operator==(const tuple<T1s...>& t1, const tuple<T2s...>& t2)
+{
+	return Internal::TupleEqual<sizeof...(T1s)>()(t1, t2);
+}
+
+template <typename... T1s, typename... T2s>
+inline bool operator<(const tuple<T1s...>& t1, const tuple<T2s...>& t2)
+{
+	return Internal::TupleLess<sizeof...(T1s)>()(t1, t2);
+}
+
+template <typename... T1s, typename... T2s> inline bool operator!=(const tuple<T1s...>& t1, const tuple<T2s...>& t2) { return !(t1 == t2); }
+template <typename... T1s, typename... T2s> inline bool operator> (const tuple<T1s...>& t1, const tuple<T2s...>& t2) { return t2 < t1; }
+template <typename... T1s, typename... T2s> inline bool operator<=(const tuple<T1s...>& t1, const tuple<T2s...>& t2) { return !(t2 < t1); }
+template <typename... T1s, typename... T2s> inline bool operator>=(const tuple<T1s...>& t1, const tuple<T2s...>& t2) { return !(t1 < t2); }
+
+
+// tuple_cat 
+//
+//
+template <typename... Tuples>
+inline typename Internal::TupleCat<Tuples...>::ResultType tuple_cat(Tuples&&... ts)
+{
+	return Internal::TupleCat<Tuples...>::DoCat(eastl::forward<Tuples>(ts)...);
+}
+
+
+// make_tuple 
+//
+//
+template <typename... Ts>
+inline EA_CONSTEXPR tuple<Internal::MakeTupleReturn_t<Ts>...> make_tuple(Ts&&... values)
+{
+	return tuple<Internal::MakeTupleReturn_t<Ts>...>(eastl::forward<Ts>(values)...);
+}
+
+
+// forward_as_tuple 
+//
+//
+template <typename... Ts>
+inline EA_CONSTEXPR tuple<Ts&&...> forward_as_tuple(Ts&&... ts) EA_NOEXCEPT
+{
+	return tuple<Ts&&...>(eastl::forward<Ts&&>(ts)...);
+}
+
+
+// ignore 
+//
+// An object of unspecified type such that any value can be assigned to it with no effect.
+//
+// https://en.cppreference.com/w/cpp/utility/tuple/ignore
+//
+namespace Internal 
+{
+	struct ignore_t
+	{
+		ignore_t() = default;
+
+		template <typename T>
+		const ignore_t& operator=(const T&) const
+		{
+			return *this;
+		}
+	};
+}// namespace Internal
+
+static const Internal::ignore_t ignore;
+
+
+// tie 
+//
+// Creates a tuple of lvalue references to its arguments or instances of eastl::ignore.
+//
+// https://en.cppreference.com/w/cpp/utility/tuple/tie
+//
+template <typename... Ts>
+inline EA_CONSTEXPR tuple<Ts&...> tie(Ts&... ts) EA_NOEXCEPT
+{
+	return tuple<Ts&...>(ts...);
+}
+
+
+// apply
+//
+// Invoke a callable object using a tuple to supply the arguments. 
+//
+// http://en.cppreference.com/w/cpp/utility/apply
+//
+namespace detail
+{
+	template <class F, class Tuple, size_t... I>
+	EA_CONSTEXPR decltype(auto) apply_impl(F&& f, Tuple&& t, index_sequence<I...>)
+	{
+		return invoke(eastl::forward<F>(f), get<I>(eastl::forward<Tuple>(t))...);
+	}
+} // namespace detail
+
+template <class F, class Tuple>
+EA_CONSTEXPR decltype(auto) apply(F&& f, Tuple&& t)
+{
+	return detail::apply_impl(eastl::forward<F>(f), eastl::forward<Tuple>(t),
+		                      make_index_sequence<tuple_size_v<remove_reference_t<Tuple>>>{});
+}
+
+}  // namespace eastl
+
+
+///////////////////////////////////////////////////////////////
+// C++17 structured bindings support for eastl::tuple
+//
+#ifndef EA_COMPILER_NO_STRUCTURED_BINDING
+	#include <tuple>
+	namespace std
+	{
+		// NOTE(rparolin): Some platform implementations didn't check the standard specification and implemented the
+		// "tuple_size" and "tuple_element" primary template with as a struct.  The standard specifies they are
+		// implemented with the class keyword so we provide the template specializations as a class and disable the
+		// generated warning.
+		EA_DISABLE_CLANG_WARNING(-Wmismatched-tags)
+
+		template <class... Ts>
+		class tuple_size<::eastl::tuple<Ts...>> : public ::eastl::integral_constant<size_t, sizeof...(Ts)>
+		{
+		};
+
+		template <size_t I, class... Ts>
+		class tuple_element<I, ::eastl::tuple<Ts...>> : public ::eastl::tuple_element<I, ::eastl::tuple<Ts...>>
+		{
+		};
+
+		EA_RESTORE_CLANG_WARNING()
+	}
+#endif
+
+
+#endif  // EASTL_TUPLE_ENABLED
+EA_RESTORE_VC_WARNING()
+EA_RESTORE_VC_WARNING()
+EA_RESTORE_VC_WARNING()
+#endif  // EASTL_TUPLE_H
diff --git a/libkram/eastl/include/EASTL/type_traits.h b/libkram/eastl/include/EASTL/type_traits.h
new file mode 100644
index 00000000..68a388d5
--- /dev/null
+++ b/libkram/eastl/include/EASTL/type_traits.h
@@ -0,0 +1,1060 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+
+///////////////////////////////////////////////////////////////////////////////
+// Specification
+//
+// This file implements C++ type traits as proposed by the emerging C++ update
+// as of May, 2005. This update is known as "Proposed Draft Technical Report
+// on C++ Library Extensions" and is document number n1745. It can be found
+// on the Internet as n1745.pdf and as of this writing it is updated every
+// couple months to reflect current thinking.
+//////////////////////////////////////////////////////////////////////////////
+
+
+///////////////////////////////////////////////////////////////////////////////
+// Description
+//
+// EASTL includes a fairly serious type traits library that is on par with the
+// one found in Boost but offers some additional performance-enhancing help as well.
+// The type_traits library provides information about class types, as opposed to
+// class instances. For example, the is_integral type trait tells if a type is
+// one of int, short, long, char, uint64_t, etc.
+//
+// There are three primary uses of type traits:
+//     * Allowing for optimized operations on some data types.
+//     * Allowing for different logic pathways based on data types.
+//     * Allowing for compile-type assertions about data type expectations.
+//
+// Most of the type traits are automatically detected and implemented by the compiler.
+// However, EASTL allows for the user to explicitly give the compiler hints about
+// type traits that the compiler cannot know, via the EASTL_DECLARE declarations.
+// If the user has a class that is relocatable (i.e. can safely use memcpy to copy values),
+// the user can use the EASTL_DECLARE_TRIVIAL_RELOCATE declaration to tell the compiler
+// that the class can be copied via memcpy. This will automatically significantly speed
+// up some containers and algorithms that use that class.
+//
+// Here is an example of using type traits to tell if a value is a floating point
+// value or not:
+//
+//    template <typename T>
+//    DoSomething(T t) {
+//        assert(is_floating_point<T>::value);
+//    }
+//
+// Here is an example of declaring a class as relocatable and using it in a vector.
+//
+//    EASTL_DECLARE_TRIVIAL_RELOCATE(Widget); // Usually you put this at the Widget class declaration.
+//    vector<Widget> wVector;
+//    wVector.erase(wVector.begin());         // This operation will be optimized via using memcpy.
+//
+// The following is a full list of the currently recognized type traits. Most of these
+// are implemented as of this writing, but if there is one that is missing, feel free
+// to contact the maintainer of this library and request that it be completed.
+// As of this writing all C++11 type traits are supported, as well as some additional ones.
+// http://en.cppreference.com/w/cpp/types
+//
+//    Trait                                 Description
+// ------------------------------------------------------------------------------
+//    is_void                               T is void or a cv-qualified (const/void-qualified) void.
+//    is_null_pointer
+//    is_integral                           T is an integral type.
+//    is_floating_point                     T is a floating point type.
+//    is_array                              T is an array type. The templated array container is not an array type.
+//    is_enum                               T is an enumeration type.
+//    is_union                              T is a union type.
+//    is_class                              T is a class type but not a union type.
+//    is_function                           T is a function type.
+//    is_pointer                            T is a pointer type. Includes function pointers, but not pointers to (data or function) members.
+//    is_rvalue_reference
+//    is_lvalue_reference
+//    is_member_object_pointer              T is a pointer to data member.
+//    is_member_function_pointer            T is a pointer to member function.
+//
+//    is_fundamental                        T is a fundamental type (void, integral, or floating point).
+//    is_arithmetic                         T is an arithmetic type (integral or floating point).
+//    is_scalar                             T is a scalar type (arithmetic, enum, pointer, member_pointer)
+//    is_object                             T is an object type.
+//    is_compound                           T is a compound type (anything but fundamental).
+//    is_reference                          T is a reference type. Includes references to functions.
+//    is_member_pointer                     T is a pointer to a member or member function.
+//
+//    is_const                              T is const-qualified.
+//    is_volatile                           T is volatile-qualified.
+//    is_trivial
+//    is_trivially_copyable
+//    is_standard_layout
+//    is_pod                                T is a POD type.
+//    is_literal_type
+//    is_empty                              T is an empty class.
+//    is_polymorphic                        T is a polymorphic class.
+//    is_abstract                           T is an abstract class.
+//    is_signed                             T is a signed integral type.
+//    is_unsigned                           T is an unsigned integral type.
+//
+//    is_constructible
+//    is_trivially_constructible
+//    is_nothrow_constructible
+//    is_default_constructible
+//    is_trivially_default_constructible
+//    is_nothrow_default_constructible
+//    is_copy_constructible
+//    is_trivially_copy_constructible
+//    is_nothrow_copy_constructible
+//    is_move_constructible
+//    is_trivially_move_constructible
+//    is_nothrow_move_constructible
+//    is_assignable
+//    is_trivially_assignable
+//    is_nothrow_assignable
+//    is_copy_assignable
+//    is_trivially_copy_assignable
+//    is_nothrow_copy_assignable
+//    is_move_assignable
+//    is_trivially_move_assignable
+//    is_nothrow_move_assignable
+//    is_destructible
+//    is_trivially_destructible
+//    is_nothrow_destructible
+//    has_virtual_destructor                T has a virtual destructor.
+//
+//    alignment_of                          An integer value representing the number of bytes of the alignment of objects of type T; an object of type T may be allocated at an address that is a multiple of its alignment.
+//    rank                                  An integer value representing the rank of objects of type T. The term 'rank' here is used to describe the number of dimensions of an array type.
+//    extent                                An integer value representing the extent (dimension) of the I'th bound of objects of type T. If the type T is not an array type, has rank of less than I, or if I == 0 and T is of type 'array of unknown bound of U,' then value shall evaluate to zero; otherwise value shall evaluate to the number of elements in the I'th array bound of T. The term 'extent' here is used to describe the number of elements in an array type.
+//
+//    is_same                               T and U name the same type.
+//    is_base_of                            Base is a base class of Derived or Base and Derived name the same type.
+//    is_convertible                        An imaginary lvalue of type From is implicitly convertible to type To. Special conversions involving string-literals and null-pointer constants are not considered. No function-parameter adjustments are made to type To when determining whether From is convertible to To; this implies that if type To is a function type or an array type, then the condition is false.
+//
+//    remove_cv
+//    remove_const                          The member typedef type shall be the same as T except that any top level const-qualifier has been removed. remove_const<const volatile int>::type evaluates to volatile int, whereas remove_const<const int*> is const int*.
+//    remove_volatile
+//    add_cv
+//    add_const
+//    add_volatile
+//
+//    remove_reference
+//    add_lvalue_reference
+//    add_rvalue_reference
+//
+//    remove_pointer
+//    add_pointer
+//
+//    make_signed
+//    make_unsigned
+//
+//    remove_extent
+//    remove_all_extents
+//
+//    aligned_storage
+//    aligned_union
+//    decay
+//    enable_if
+//    conditional
+//    common_type
+//    underlying_type
+//    result_of
+//
+//    integral_constant
+//    bool_constant
+//    true_type
+//    false_type
+//
+// EASTL extension type traits
+//    identity                              Simply sets T as type.
+//    is_aligned                            Defined as true if the type has alignment requirements greater than default alignment, which is taken to be 8. is_aligned is not found in Boost nor C++11, though alignment_of is.
+//    union_cast                            Allows for easy-to-read casting between types that are unrelated but have binary equivalence. The classic use case is converting between float and int32_t bit representations.
+//    is_array_of_known_bounds
+//    is_array_of_unknown_bounds
+//    add_signed                            Deprecated in favor of make_signed.
+//    add_unsigned                          Deprecated in favor of make_unsigned.
+//    add_reference
+//    yes_type
+//    no_type
+//    is_swappable                          Found in <EASTL/utility.h>
+//    is_nothrow_swappable                  "
+//    is_reference_wrapper                  Found in <EASTL/functional.h>
+//    remove_reference_wrapper              "
+//
+// Deprecated pre-C++11 type traits
+//    has_trivial_constructor               The default constructor for T is trivial.
+//    has_trivial_copy                      The copy constructor for T is trivial.
+//    has_trivial_assign                    The assignment operator for T is trivial.
+//    has_trivial_destructor                The destructor for T is trivial.
+//    has_nothrow_constructor               The default constructor for T has an empty exception specification or can otherwise be deduced never to throw an exception.
+//    has_nothrow_copy                      The copy constructor for T has an empty exception specification or can otherwise be deduced never to throw an exception.
+//    has_nothrow_assign                    The assignment operator for T has an empty exception specification or can otherwise be deduced never to throw an exception.
+//   *has_trivial_relocate                  T can be moved to a new location via bitwise copy. Note that C++11 rvalue/move functionality supercedes this.
+//
+// * has_trivial_relocate is not found in Boost nor the pre-C++ standard update proposal.
+//   However, it is somewhat useful in pre-C++11 environments (prior to move semantics)
+//   for allowing the generation of optimized object moving operations. It is similar to
+//   the is_pod type trait, but goes further and allows non-pod classes to be categorized
+//   as relocatable. Such categorization is something that no compiler can do, as only
+//   the user can know if it is such. Thus EASTL_DECLARE_TRIVIAL_RELOCATE is provided to
+//   allow the user to give the compiler a hint. However, C++11 rvalue/move functionality
+//   supercedes this and will eventually fully displace it.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// Requirements
+//
+// As of this writing (5/2005), type_traits here requires a well-conforming
+// C++ compiler with respect to template metaprogramming. To use this library
+// you need to have at least one of the following:
+//     MSVC++ 7.1       (includes Win32, Win64, and WinCE platforms)
+//     GCC 3.2          (includes MacOSX, and Linux platforms)
+//     Metrowerks 8.0   (incluees MacOSX, Windows, and other platforms)
+//     EDG              (includes any compiler with EDG as a back-end, such as the Intel compiler)
+//     Comeau           (this is a C++ to C generator)
+//
+// It may be useful to list the compilers/platforms the current version of
+// type_traits doesn't support:
+//     Borland C++      (it simply has too many bugs with respect to templates).
+//     GCC 2.96         We used to have a separate set of type traits for this compiler, but removed it due to lack of necessity.
+//////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// Implementation
+//
+// The implementation here is almost entirely based on template metaprogramming.
+// This is whereby you use the compiler's template functionality to define types
+// and values and make compilation decisions based on template declarations.
+// Many of the algorithms here are similar to those found in books such as
+// "Modern C++ Design" and C++ libraries such as Boost. The implementations here
+// are simpler and more straightforward than those found in some libraries, due
+// largely to our assumption that the compiler is good at doing template programming.
+///////////////////////////////////////////////////////////////////////////////
+
+
+
+#ifndef EASTL_TYPE_TRAITS_H
+#define EASTL_TYPE_TRAITS_H
+
+
+
+#include <EASTL/internal/config.h>
+#include <stddef.h>                 // Is needed for size_t usage by some traits.
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+
+	///////////////////////////////////////////////////////////////////////
+	// integral_constant
+	//
+	// This is the base class for various type traits, as defined by C++11.
+	// This is essentially a utility base class for defining properties
+	// as both class constants (value) and as types (type).
+	//
+	template <typename T, T v>
+	struct integral_constant
+	{
+		static EA_CONSTEXPR T value = v;
+		typedef T value_type;
+		typedef integral_constant<T, v> type;
+
+		EA_CONSTEXPR operator value_type() const EA_NOEXCEPT { return value; }
+		EA_CONSTEXPR value_type operator()() const EA_NOEXCEPT { return value; }
+	};
+
+
+	///////////////////////////////////////////////////////////////////////
+	// true_type / false_type
+	//
+	// These are commonly used types in the implementation of type_traits.
+	// Other integral constant types can be defined, such as those based on int.
+	//
+	typedef integral_constant<bool, true>  true_type;
+	typedef integral_constant<bool, false> false_type;
+
+
+	///////////////////////////////////////////////////////////////////////
+	// bool_constant
+	//
+	// This is a convenience helper for the often used integral_constant<bool, value>.
+	//
+	#if defined(EA_COMPILER_NO_TEMPLATE_ALIASES)
+		template <bool B>
+		struct bool_constant : public integral_constant<bool, B> {};
+	#else
+		template <bool B>
+		using bool_constant = integral_constant<bool, B>;
+	#endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// yes_type / no_type
+	//
+	// These are used as a utility to differentiate between two things.
+	//
+	typedef char yes_type;                      // sizeof(yes_type) == 1
+	struct       no_type { char padding[8]; };  // sizeof(no_type)  != 1
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// unused
+	//
+	// Used internally to denote a special template argument that means
+	// it's an unused argument.
+	//
+	struct unused { };
+
+
+	///////////////////////////////////////////////////////////////////////
+	// argument_sink
+	//
+	// Used as a type which constructs from anything.
+	//
+	// For compilers that support variadic templates we provide an
+	// alternative argument_sink which provides a constructor overload of
+	// the variadic pack of arguments by reference.  This avoids issues of
+	// object alignment not being respected in Microsoft compilers.  Seen
+	// in VS2015 preview.  In general, since arguments are consumed and
+	// ignored its cheaper to consume references than passing by value
+	// which incurs a construction cost.
+	struct argument_sink
+	{
+		template<typename... Args>
+		argument_sink(Args&&...) {}
+	};
+
+
+	///////////////////////////////////////////////////////////////////////
+	// type_select
+	//
+	// This is used to declare a type from one of two type options.
+	// The result is based on the condition type. This has certain uses
+	// in template metaprogramming.
+	//
+	// Example usage:
+	//    typedef ChosenType = typename type_select<is_integral<SomeType>::value, ChoiceAType, ChoiceBType>::type;
+	//        or
+	//    using ChosenType = type_select_t<is_integral_v<SomeType>, ChoiceAType, ChoiceBType>;
+	//
+	template <bool bCondition, class ConditionIsTrueType, class ConditionIsFalseType>
+	struct type_select { typedef ConditionIsTrueType type; };
+
+	template <typename ConditionIsTrueType, class ConditionIsFalseType>
+	struct type_select<false, ConditionIsTrueType, ConditionIsFalseType> { typedef ConditionIsFalseType type; };
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <bool bCondition, class ConditionIsTrueType, class ConditionIsFalseType>
+		using type_select_t = typename type_select<bCondition, ConditionIsTrueType, ConditionIsFalseType>::type;
+	#endif
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// first_type_select
+	//
+	//  Similar to type_select but unilaterally selects the first type.
+	//
+	template <typename T, typename = eastl::unused, typename = eastl::unused>
+	struct first_type_select { typedef T type; };
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// type_or
+	//
+	// This is a utility class for creating composite type traits.
+	//
+	template <bool b1, bool b2, bool b3 = false, bool b4 = false, bool b5 = false>
+	struct type_or;
+
+	template <bool b1, bool b2, bool b3, bool b4, bool b5>
+	struct type_or { static const bool value = true; };
+
+	template <>
+	struct type_or<false, false, false, false, false> { static const bool value = false; };
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// type_and
+	//
+	// This is a utility class for creating composite type traits.
+	//
+	template <bool b1, bool b2, bool b3 = true, bool b4 = true, bool b5 = true>
+	struct type_and;
+
+	template <bool b1, bool b2, bool b3, bool b4, bool b5>
+	struct type_and{ static const bool value = false; };
+
+	template <>
+	struct type_and<true, true, true, true, true>{ static const bool value = true; };
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// type_equal
+	//
+	// This is a utility class for creating composite type traits.
+	//
+	template <int b1, int b2>
+	struct type_equal{ static const bool value = (b1 == b2); };
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// type_not_equal
+	//
+	// This is a utility class for creating composite type traits.
+	//
+	template <int b1, int b2>
+	struct type_not_equal{ static const bool value = (b1 != b2); };
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// type_not
+	//
+	// This is a utility class for creating composite type traits.
+	//
+	template <bool b>
+	struct type_not{ static const bool value = true; };
+
+	template <>
+	struct type_not<true>{ static const bool value = false; };
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// enable_if, disable_if
+	//
+	// template <bool B, typename T = void> struct enable_if;
+	// template <bool B, typename T = void> struct disable_if;
+
+	template<bool B, typename T = void>
+	struct enable_if {};
+
+	template <typename T>
+	struct enable_if<true, T> { typedef T type; };
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <bool B, class T = void>
+		using enable_if_t = typename enable_if<B, T>::type;
+	#endif
+
+
+	template<bool B, typename T = void>
+	struct disable_if {};
+
+	template <typename T>
+	struct disable_if<false, T> { typedef T type; };
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <bool B, class T = void>
+		using disable_if_t = typename disable_if<B, T>::type;
+	#endif
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// conditional
+	//
+	// Provides member typedef type which is defined as T if B is true at
+	// compile time, or as F if B is false.
+	//
+	template<bool B, typename T, typename F>
+	struct conditional { typedef T type; };
+
+	template <typename T, typename F>
+	struct conditional<false, T, F> { typedef F type; };
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <bool B, class T, class F>
+		using conditional_t = typename conditional<B, T, F>::type;
+	#endif
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// conjunction
+	//
+	// This is a C++17 standard utility class that performs a short-circuiting
+	// logical AND on a sequence of type traits.
+	//
+	// http://en.cppreference.com/w/cpp/types/conjunction
+	//
+	template <class...>
+	struct conjunction : eastl::true_type {};
+
+	template <class B>
+	struct conjunction<B> : B {};
+
+	template <class B, class... Bn>
+	struct conjunction<B, Bn...> : conditional<bool(B::value), conjunction<Bn...>, B>::type {};
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		#if EASTL_INLINE_VARIABLE_ENABLED
+			template<class... Bn>
+			inline constexpr bool conjunction_v = conjunction<Bn...>::value;
+		#else
+			template<class... Bn>
+			static const constexpr bool conjunction_v = conjunction<Bn...>::value;
+		#endif
+	#endif
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// disjunction
+	//
+	// This is a C++17 standard utility class that performs a short-circuiting
+	// logical OR on a sequence of type traits.
+	//
+	// http://en.cppreference.com/w/cpp/types/disjunction
+	//
+	template <class...>
+	struct disjunction : eastl::false_type {};
+
+	template <class B>
+	struct disjunction<B> : B {};
+
+	template <class B, class... Bn>
+	struct disjunction<B, Bn...> : conditional<bool(B::value), B, disjunction<Bn...>>::type {};
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		#if EASTL_INLINE_VARIABLE_ENABLED
+			template<class... B>
+			inline constexpr bool disjunction_v = disjunction<B...>::value;
+		#else
+			template<class... B>
+			static const constexpr bool disjunction_v = disjunction<B...>::value;
+		#endif
+	#endif
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// negation
+	//
+	// This is a C++17 standard utility class that performs a logical NOT on a
+	// single type trait.
+	//
+	// http://en.cppreference.com/w/cpp/types/negation
+	//
+	template <class B>
+	struct negation : eastl::bool_constant<!bool(B::value)> {};
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		#if EASTL_INLINE_VARIABLE_ENABLED
+			template<class B>
+			inline constexpr bool negation_v = negation<B>::value;
+		#else
+			template<class B>
+			static const constexpr bool negation_v = negation<B>::value;
+		#endif
+	#endif
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// identity
+	//
+	// The purpose of this is typically to deal with non-deduced template
+	// contexts. See the C++11 Standard, 14.8.2.5 p5.
+	// Also: http://cppquiz.org/quiz/question/109?result=CE&answer=&did_answer=Answer
+	//
+	// Dinkumware has an identity, but adds a member function to it:
+	//     const T& operator()(const T& t) const{ return t; }
+	//
+	// NOTE(rparolin): Use 'eastl::type_identity' it was included in the C++20
+	// standard. This is a legacy EASTL type we continue to support for
+	// backwards compatibility. 
+	//
+	template <typename T>
+	struct identity { using type = T; };
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <typename T>
+		using identity_t = typename identity<T>::type;
+	#endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// type_identity
+	//
+	// The purpose of this is typically to deal with non-deduced template
+	// contexts. See the C++11 Standard, 14.8.2.5 p5.
+	// Also: http://cppquiz.org/quiz/question/109?result=CE&answer=&did_answer=Answer
+	//
+	// https://en.cppreference.com/w/cpp/types/type_identity
+	//
+	template <typename T>
+	struct type_identity { using type = T; };
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <typename T>
+		using type_identity_t = typename type_identity<T>::type;
+	#endif
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_same
+	//
+	// Given two (possibly identical) types T and U, is_same<T, U>::value == true
+	// if and only if T and U are the same type.
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_is_same_CONFORMANCE 1    // is_same is conforming; doesn't make mistakes.
+
+	template <typename T, typename U>
+	struct is_same : public eastl::false_type { };
+
+	template <typename T>
+	struct is_same<T, T> : public eastl::true_type { };
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T, class U>
+		EA_CONSTEXPR bool is_same_v = is_same<T, U>::value;
+	#endif
+
+
+    ///////////////////////////////////////////////////////////////////////
+	// is_const
+	//
+	// is_const<T>::value == true if and only if T has const-qualification.
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_is_const_CONFORMANCE 1    // is_const is conforming.
+
+	template <typename T> struct is_const_value                    : public eastl::false_type{};
+	template <typename T> struct is_const_value<const T*>          : public eastl::true_type{};
+	template <typename T> struct is_const_value<const volatile T*> : public eastl::true_type{};
+
+	template <typename T> struct is_const : public eastl::is_const_value<T*>{};
+	template <typename T> struct is_const<T&> : public eastl::false_type{}; // Note here that T is const, not the reference to T. So is_const is false. See section 8.3.2p1 of the C++ standard.
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		EA_CONSTEXPR bool is_const_v = is_const<T>::value;
+	#endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_volatile
+	//
+	// is_volatile<T>::value == true  if and only if T has volatile-qualification.
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_is_volatile_CONFORMANCE 1    // is_volatile is conforming.
+
+	template <typename T> struct is_volatile_value                    : public eastl::false_type{};
+	template <typename T> struct is_volatile_value<volatile T*>       : public eastl::true_type{};
+	template <typename T> struct is_volatile_value<const volatile T*> : public eastl::true_type{};
+
+	template <typename T> struct is_volatile : public eastl::is_volatile_value<T*>{};
+	template <typename T> struct is_volatile<T&> : public eastl::false_type{}; // Note here that T is volatile, not the reference to T. So is_const is false. See section 8.3.2p1 of the C++ standard.
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template <class T>
+		EA_CONSTEXPR bool is_volatile_v = is_volatile<T>::value;
+	#endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_reference
+	//
+	// is_reference<T>::value == true if and only if T is a reference type.
+	// This category includes reference to function types.
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_is_reference_CONFORMANCE 1    // is_reference is conforming; doesn't make mistakes.
+
+	template <typename T> struct is_reference     : public eastl::false_type{};
+	template <typename T> struct is_reference<T&> : public eastl::true_type{};
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template<typename T>
+		EA_CONSTEXPR bool is_reference_v = is_reference<T>::value;
+	#endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// is_function
+	//
+	// is_function<T>::value == true  if and only if T is a function type.
+	// A function type here does not include a member function type.
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_is_function_CONFORMANCE 1    // is_function is conforming.
+
+	template <typename>
+	struct is_function
+		: public eastl::false_type {};
+
+	#if EA_PLATFORM_PTR_SIZE == 4 && defined(EA_PLATFORM_MICROSOFT) && defined(_MSC_EXTENSIONS)
+		// __cdecl specialization
+		template <typename ReturnValue, typename... ArgPack>
+		struct is_function<ReturnValue __cdecl (ArgPack...)>
+			: public eastl::true_type {};
+
+		template <typename ReturnValue, typename... ArgPack>
+		struct is_function<ReturnValue __cdecl (ArgPack..., ...)>    // The second ellipsis handles the case of a function that takes ellipsis, like printf.
+			: public eastl::true_type {};
+
+		// __stdcall specialization
+		template <typename ReturnValue, typename... ArgPack>
+		struct is_function<ReturnValue __stdcall (ArgPack...)>
+			: public eastl::true_type {};
+
+		// When functions use a variable number of arguments, it is the caller that cleans the stack (cf. cdecl).
+		//
+		// template <typename ReturnValue, typename... ArgPack>
+		// struct is_function<ReturnValue __stdcall (ArgPack..., ...)>    // The second ellipsis handles the case of a function that takes ellipsis, like printf.
+		//     : public eastl::true_type {};
+	#else
+		template <typename ReturnValue, typename... ArgPack>
+		struct is_function<ReturnValue (ArgPack...)>
+			: public eastl::true_type {};
+
+		template <typename ReturnValue, typename... ArgPack>
+		struct is_function<ReturnValue (ArgPack..., ...)>    // The second ellipsis handles the case of a function that takes ellipsis, like printf.
+			: public eastl::true_type {};
+	#endif
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template<typename T>
+		EA_CONSTEXPR bool is_function_v = is_function<T>::value;
+	#endif
+
+
+	// The following remove utilities are defined here instead of in the headers
+	// below because they are core utilits that many other type traits need.
+
+	///////////////////////////////////////////////////////////////////////
+	// remove_const
+	//
+	// Remove const from a type.
+	//
+	// The remove_const transformation trait removes top-level const
+	// qualification (if any) from the type to which it is applied. For a
+	// given type T, remove_const<T const>::type is equivalent to the type T.
+	// For example, remove_const<char*>::type is equivalent to char* while
+	// remove_const<const char*>::type is equivalent to const char*.
+	// In the latter case, the const qualifier modifies char, not *, and is
+	// therefore not at the top level.
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_remove_const_CONFORMANCE 1    // remove_const is conforming.
+
+	template <typename T>           struct remove_const             { typedef T type;    };
+	template <typename T>           struct remove_const<const T>    { typedef T type;    };
+	template <typename T>           struct remove_const<const T[]>  { typedef T type[];  };
+	template <typename T, size_t N> struct remove_const<const T[N]> { typedef T type[N]; };
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template<typename T>
+		using remove_const_t = typename remove_const<T>::type;
+	#endif
+
+	///////////////////////////////////////////////////////////////////////
+	// remove_volatile
+	//
+	// Remove volatile from a type.
+	//
+	// The remove_volatile transformation trait removes top-level volatile
+	// qualification (if any) from the type to which it is applied.
+	// For a given type T, the type remove_volatile <T volatile>::T is equivalent
+	// to the type T. For example, remove_volatile <char* volatile>::type is
+	// equivalent to char* while remove_volatile <volatile char*>::type is
+	// equivalent to volatile char*. In the latter case, the volatile qualifier
+	// modifies char, not *, and is therefore not at the top level.
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_remove_volatile_CONFORMANCE 1    // remove_volatile is conforming.
+
+	template <typename T>           struct remove_volatile                { typedef T type;    };
+	template <typename T>           struct remove_volatile<volatile T>    { typedef T type;    };
+	template <typename T>           struct remove_volatile<volatile T[]>  { typedef T type[];  };
+	template <typename T, size_t N> struct remove_volatile<volatile T[N]> { typedef T type[N]; };
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template<typename T>
+		using remove_volatile_t = typename remove_volatile<T>::type;
+	#endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// remove_cv
+	//
+	// Remove const and volatile from a type.
+	//
+	// The remove_cv transformation trait removes top-level const and/or volatile
+	// qualification (if any) from the type to which it is applied. For a given type T,
+	// remove_cv<T const volatile>::type is equivalent to T. For example,
+	// remove_cv<char* volatile>::type is equivalent to char*, while remove_cv<const char*>::type
+	// is equivalent to const char*. In the latter case, the const qualifier modifies
+	// char, not *, and is therefore not at the top level.
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_remove_cv_CONFORMANCE 1    // remove_cv is conforming.
+
+	template <typename T>
+	struct remove_cv { typedef typename eastl::remove_volatile<typename eastl::remove_const<T>::type>::type type; };
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template<typename T>
+		using remove_cv_t = typename remove_cv<T>::type;
+	#endif
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// add_reference
+	//
+	// Add reference to a type.
+	//
+	// The add_reference transformation trait adds a level of indirection
+	// by reference to the type to which it is applied. For a given type T,
+	// add_reference<T>::type is equivalent to T& if is_reference<T>::value == false,
+	// and T otherwise.
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_add_reference_CONFORMANCE 1    // add_reference is conforming.
+
+	template <typename T> struct add_reference_impl      { typedef T&   type; };
+	template <typename T> struct add_reference_impl<T&>  { typedef T&   type; };
+	template <>           struct add_reference_impl<void>{ typedef void type; };
+	#if defined(_MSC_VER) && (_MSC_VER <= 1600) // VS2010 and earlier mistakenly report: "cannot add a reference to a zero-sized array." Actually they are allowed, but there's nothing we can do about it under VS2010 and earlier.
+	template <typename T> struct add_reference_impl<T[0]>{ typedef T    type; };
+	#endif
+
+	template <typename T> struct add_reference { typedef typename add_reference_impl<T>::type type; };
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// remove_reference
+	//
+	// Remove reference from a type.
+	//
+	// The remove_reference transformation trait removes top-level of
+	// indirection by reference (if any) from the type to which it is applied.
+	// For a given type T, remove_reference<T&>::type is equivalent to T.
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_remove_reference_CONFORMANCE 1
+
+	template <typename T> struct remove_reference     { typedef T type; };
+	template <typename T> struct remove_reference<T&> { typedef T type; };
+	template <typename T> struct remove_reference<T&&>{ typedef T type; };
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template<typename T>
+		using remove_reference_t = typename remove_reference<T>::type;
+	#endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// remove_cvref
+	//
+	// Remove const and volatile from a reference type.
+	//
+	// The remove_cvref transformation trait removes top-level const and/or volatile
+	// qualification (if any) from the reference type to which it is applied. For a given type T&,
+	// remove_cvref<T& const volatile>::type is equivalent to T. For example,
+	// remove_cv<int& volatile>::type is equivalent to int.
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_remove_cvref_CONFORMANCE 1    // remove_cvref is conforming.
+
+	template <typename T>
+	struct remove_cvref { typedef typename eastl::remove_volatile<typename eastl::remove_const<typename eastl::remove_reference<T>::type>::type>::type type; };
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+		template<typename T>
+		using remove_cvref_t = typename remove_cvref<T>::type;
+	#endif
+
+
+	///////////////////////////////////////////////////////////////////////
+	// add_lvalue_reference
+	//
+	// C++11 Standard, section 20.9.7.2
+	// If T names an object or function type then the member typedef type
+	// shall name T&; otherwise, if T names a type 'rvalue reference to T1' then
+	// the member typedef type shall name T1&; otherwise, type shall name T.
+	//
+	// Rules (8.3.2 p6):
+	//      void + &  -> void
+	//      T    + &  -> T&
+	//      T&   + &  -> T&
+	//      T&&  + &  -> T&
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_add_lvalue_reference_CONFORMANCE 1    // add_lvalue_reference is conforming.
+
+	template <typename T> struct add_lvalue_reference                      { typedef T& type;                  };   // If T is an && type then T&& & will be equivalent to T&.
+	template <typename T> struct add_lvalue_reference<T&>                  { typedef T& type;                  };   // This shouldn't be required for modern compilers, as they recognize that a reference to a reference is still a reference.
+	template <>           struct add_lvalue_reference<void>                { typedef void type;                };
+	template <>           struct add_lvalue_reference<const void>          { typedef const void type;          };
+	template <>           struct add_lvalue_reference<volatile void>       { typedef volatile void type;       };
+	template <>           struct add_lvalue_reference<const volatile void> { typedef const volatile void type; };
+
+	#if defined(EA_COMPILER_NO_TEMPLATE_ALIASES)
+		// To do: define macro.
+	#else
+		template <typename T>
+		using add_lvalue_reference_t = typename add_lvalue_reference<T>::type;
+	#endif
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// add_rvalue_reference
+	//
+	// C++11 Standard, section 20.9.7.2
+	// If T names an object or function type then the member typedef type
+	// shall name T&&; otherwise, type shall name T. [ Note: This rule reflects
+	// the semantics of reference collapsing (8.3.2). For example, when a type T
+	// names a type T1&, the type add_rvalue_reference<T>::type is not an
+	// rvalue reference. end note ]
+	//
+	// Rules (8.3.2 p6):
+	//      void + &&  -> void
+	//      T    + &&  -> T&&
+	//      T&   + &&  -> T&
+	//      T&&  + &&  -> T&&
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_add_rvalue_reference_CONFORMANCE 1
+
+	template <typename T> struct add_rvalue_reference                      { typedef T&& type;                 }; // Dinkumware has this as { typedef typename eastl::remove_reference<T>::type&& type; }, but that doesn't seem right to me.
+	template <typename T> struct add_rvalue_reference<T&>                  { typedef T& type;                  }; // The Standard section 20.7.9.2 specifies that we do this, though it seems like the compiler ought to not require this, as C++11 stipulates that & + && -> &.
+	template <>           struct add_rvalue_reference<void>                { typedef void type;                };
+	template <>           struct add_rvalue_reference<const void>          { typedef const void type;          };
+	template <>           struct add_rvalue_reference<volatile void>       { typedef volatile void type;       };
+	template <>           struct add_rvalue_reference<const volatile void> { typedef const volatile void type; };
+
+	#if defined(EA_COMPILER_NO_TEMPLATE_ALIASES)
+		// To do: define macro.
+	#else
+		template <typename T>
+		using add_rvalue_reference_t = typename add_rvalue_reference<T>::type;
+	#endif
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// declval
+	//
+	// declval is normally found in <utility.h> but type traits need it and utility #includes this.
+	//
+	// Converts any type T to a reference type, making it possible to use member functions in
+	// decltype expressions without specifying constructors. It has no use outside decltype expressions.
+	// By design there is no implementation, as it's never executed but rather is used only in decltype expressions.
+	// The C++11 Standard section 20.2.4 states that we must declare this.
+	// http://en.cppreference.com/w/cpp/utility/declval
+	//
+	///////////////////////////////////////////////////////////////////////
+
+	#define EASTL_TYPE_TRAIT_declval_CONFORMANCE 1
+
+	template <typename T>
+	typename eastl::add_rvalue_reference<T>::type declval() EA_NOEXCEPT;
+
+	#if !defined(EA_COMPILER_NO_DECLTYPE) && !EASTL_TYPE_TRAIT_declval_CONFORMANCE
+		#error decltype is supported by the compiler but declval is not. A lot of our type trait code assumes that if the compiler supports decltype then it supports rvalue references.
+	#endif
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// static_min / static_max
+	//
+	// These are primarily useful in templated code for meta programming.
+	// Currently we are limited to size_t, as C++ doesn't allow integral
+	// template parameters to be generic. We can expand the supported types
+	// to include additional integers if needed.
+	//
+	// These are not in the C++ Standard.
+	//
+	// Example usage:
+	//     Printf("%zu", static_max<3, 7, 1, 5>::value); // prints "7"
+	//
+	///////////////////////////////////////////////////////////////////////
+	#define EASTL_TYPE_TRAIT_static_min_CONFORMANCE 1
+	#define EASTL_TYPE_TRAIT_static_max_CONFORMANCE 1
+
+	template <size_t I0, size_t ...in>
+	struct static_min;
+
+	template <size_t I0>
+	struct static_min<I0>
+		{ static const size_t value = I0; };
+
+	template <size_t I0, size_t I1, size_t ...in>
+	struct static_min<I0, I1, in...>
+		{ static const size_t value = ((I0 <= I1) ? static_min<I0, in...>::value : static_min<I1, in...>::value); };
+
+	template <size_t I0, size_t ...in>
+	struct static_max;
+
+	template <size_t I0>
+	struct static_max<I0>
+		{ static const size_t value = I0; };
+
+	template <size_t I0, size_t I1, size_t ...in>
+	struct static_max<I0, I1, in...>
+		{ static const size_t value = ((I0 >= I1) ? static_max<I0, in...>::value : static_max<I1, in...>::value); };
+
+	///////////////////////////////////////////////////////////////////////
+	/// This enum class is useful for detecting whether a system is little
+	/// or big endian. Mixed or middle endian is not modeled here as described
+	/// by the C++20 spec.
+	///////////////////////////////////////////////////////////////////////
+	EA_DISABLE_VC_WARNING(4472) // 'endian' is a native enum: add an access specifier (private/public) to declare a managed enum
+	enum class endian
+	{
+		#ifdef EA_SYSTEM_LITTLE_ENDIAN
+			little = 1,
+			big = 0,
+			native = little
+		#else
+			little = 0,
+			big = 1,
+			native = big
+		#endif
+	};
+	EA_RESTORE_VC_WARNING();
+
+} // namespace eastl
+
+
+// The following files implement the type traits themselves.
+#include <EASTL/internal/type_fundamental.h>
+#include <EASTL/internal/type_transformations.h>
+#include <EASTL/internal/type_properties.h>
+#include <EASTL/internal/type_compound.h>
+#include <EASTL/internal/type_pod.h>
+
+
+#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/unique_ptr.h b/libkram/eastl/include/EASTL/unique_ptr.h
new file mode 100644
index 00000000..562e2c4d
--- /dev/null
+++ b/libkram/eastl/include/EASTL/unique_ptr.h
@@ -0,0 +1,732 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_UNIQUE_PTR_H
+#define EASTL_UNIQUE_PTR_H
+
+
+#include <EABase/nullptr.h>
+#include <EASTL/internal/config.h>
+#include <EASTL/internal/smart_ptr.h>   // Defines smart_ptr_deleter
+#include <EASTL/internal/move_help.h>   // Defines EASTL_MOVE
+#include <EASTL/type_traits.h>
+#include <EASTL/utility.h>
+#include <EASTL/functional.h>
+#include <EASTL/bonus/compressed_pair.h>
+#include <stddef.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+namespace eastl
+{
+	/// class unique_ptr
+	///
+	/// This class implements a unique_ptr template. This is a class which is 
+	/// similar to the C++ auto_ptr template, except that it prohibits copying 
+	/// of itself, for safety.
+	///
+	/// More specifically, the unique_ptr class template stores a pointer to a 
+	/// dynamically allocated object.  The object pointed to is automatically 
+	/// deleted on destructor of unique_ptr or can be manually deleted via the 
+	/// unique_ptr::reset function. 
+	///
+	/// Memory allocation notes:
+	/// unique_ptr doesn't allocate memory; all allocated pointers are externally 
+	/// derived. unique_ptr does deallocate memory, though always through the 
+	/// user-provided deleter. You need to make sure you are consistent in providing 
+	/// a deleter which frees memory in a way that matches how it was originally allocated.
+	/// Deleters have instance information and are moved between containers the same way 
+	/// the allocated pointers are. Thus you can allocate memory via some heap and 
+	/// provide a deleter which contains a pointer to that same heap, and regardless
+	/// of what you do with the unique_ptr, including moving it to another unique_ptr,
+	/// the deletion will use the originally provided heap.
+	///
+	/// Example usage:
+	///     unique_ptr<int> p(new int);
+	///     *p = 4;
+	///
+	///     unique_ptr<int[]> pArray(new int[4]);
+	///     p[0] = 4;
+	///
+	/// Type completeness requirements
+	/// http://stackoverflow.com/questions/6012157/is-stdunique-ptrt-required-to-know-the-full-definition-of-t/6089065#6089065
+	/// Here is a table which documents several members of shared_ptr and unique_ptr with respect to completeness requirements. 
+	/// If the member requires a complete type, the entry has a "C", otherwise the table entry is filled with "I".
+	/// 
+	///                                 unique_ptr       shared_ptr
+	///     +------------------------+---------------+---------------+
+	///     |          P()           |      I        |      I        |
+	///     |  default constructor   |               |               |
+	///     +------------------------+---------------+---------------+
+	///     |      P(const P&)       |     N/A       |      I        |
+	///     |    copy constructor    |               |               |
+	///     +------------------------+---------------+---------------+
+	///     |         P(P&&)         |      I        |      I        |
+	///     |    move constructor    |               |               |
+	///     +------------------------+---------------+---------------+
+	///     |         ~P()           |      C        |      I        |
+	///     |       destructor       |               |               |
+	///     +------------------------+---------------+---------------+
+	///     |         P(A*)          |      I        |      C        |
+	///     +------------------------+---------------+---------------+
+	///     |  operator=(const P&)   |     N/A       |      I        |
+	///     |    copy assignment     |               |               |
+	///     +------------------------+---------------+---------------+
+	///     |    operator=(P&&)      |      C        |      I        |
+	///     |    move assignment     |               |               |
+	///     +------------------------+---------------+---------------+
+	///     |        reset()         |      C        |      I        |
+	///     +------------------------+---------------+---------------+
+	///     |       reset(A*)        |      C        |      C        |
+	///     +------------------------+---------------+---------------+
+	///
+	template <typename T, typename Deleter = eastl::default_delete<T> > 
+	class unique_ptr
+	{
+	public:
+		typedef Deleter                                                                  deleter_type;
+		typedef T                                                                        element_type;
+		typedef unique_ptr<element_type, deleter_type>                                   this_type;
+		typedef typename Internal::unique_pointer_type<element_type, deleter_type>::type pointer;
+
+	public:
+		/// unique_ptr
+		/// Construct a unique_ptr from a pointer allocated via new.
+		/// Example usage:
+		///    unique_ptr<int> ptr;
+		EA_CPP14_CONSTEXPR unique_ptr() EA_NOEXCEPT
+			: mPair(pointer())
+		{
+			static_assert(!eastl::is_pointer<deleter_type>::value, "unique_ptr deleter default-constructed with null pointer. Use a different constructor or change your deleter to a class.");
+		}
+
+		/// unique_ptr
+		/// Construct a unique_ptr from a null pointer.
+		/// Example usage:
+		///    unique_ptr<int> ptr(nullptr);
+		EA_CPP14_CONSTEXPR unique_ptr(std::nullptr_t) EA_NOEXCEPT
+			: mPair(pointer())
+		{
+			static_assert(!eastl::is_pointer<deleter_type>::value, "unique_ptr deleter default-constructed with null pointer. Use a different constructor or change your deleter to a class.");
+		}
+
+		/// unique_ptr
+		/// Construct a unique_ptr from a pointer allocated via new.
+		/// Example usage:
+		///    unique_ptr<int> ptr(new int(3));
+		explicit unique_ptr(pointer pValue) EA_NOEXCEPT
+			: mPair(pValue)
+		{
+			static_assert(!eastl::is_pointer<deleter_type>::value, "unique_ptr deleter default-constructed with null pointer. Use a different constructor or change your deleter to a class.");
+		}
+
+		/// unique_ptr
+		/// Constructs a unique_ptr with the owner pointer and deleter specified
+		/// Example usage:
+		///     eastl::smart_ptr_deleter<int> del;
+		///     unique_ptr<int> ptr(new int(3), del);
+		unique_ptr(pointer pValue, typename eastl::conditional<eastl::is_reference<deleter_type>::value, deleter_type, typename eastl::add_lvalue_reference<const deleter_type>::type>::type deleter) EA_NOEXCEPT
+			: mPair(pValue, deleter) {}
+
+		/// unique_ptr
+		/// Constructs a unique_ptr with the owned pointer and deleter specified (rvalue)
+		/// Example usage:
+		///     unique_ptr<int> ptr(new int(3), eastl::smart_ptr_deleter<int>());
+		unique_ptr(pointer pValue, typename eastl::remove_reference<deleter_type>::type&& deleter) EA_NOEXCEPT
+			: mPair(pValue, eastl::move(deleter))
+		{
+			static_assert(!eastl::is_reference<deleter_type>::value, "deleter_type reference refers to an rvalue deleter. The reference will probably become invalid before used. Change the deleter_type to not be a reference or construct with permanent deleter.");
+		}
+
+		/// unique_ptr
+		/// Move constructor
+		/// Example usage:
+		///     unique_ptr<int> ptr(new int(3));
+		///     unique_ptr<int> newPtr = eastl::move(ptr);
+		unique_ptr(this_type&& x) EA_NOEXCEPT
+			: mPair(x.release(), eastl::forward<deleter_type>(x.get_deleter())) {}
+
+		/// unique_ptr
+		/// Move constructor
+		/// Example usage:
+		///     unique_ptr<int> ptr(new int(3));
+		///     unique_ptr<int> newPtr = eastl::move(ptr);
+		template <typename U, typename E>
+		unique_ptr(unique_ptr<U, E>&& u, typename enable_if<!is_array<U>::value && is_convertible<typename unique_ptr<U, E>::pointer, pointer>::value && is_convertible<E, deleter_type>::value && (is_same<deleter_type, E>::value || !is_reference<deleter_type>::value)>::type* = 0) EA_NOEXCEPT
+			: mPair(u.release(), eastl::forward<E>(u.get_deleter())) {}
+
+		/// unique_ptr
+		/// Move assignment
+		/// Example usage:
+		///     unique_ptr<int> ptr(new int(3));
+		///     unique_ptr<int> newPtr(new int(4));
+		///     ptr = eastl::move(newPtr);  // Deletes int(3) and assigns mpValue to int(4)
+		this_type& operator=(this_type&& x) EA_NOEXCEPT
+		{
+			reset(x.release());
+			mPair.second() = eastl::move(eastl::forward<deleter_type>(x.get_deleter()));
+			return *this;
+		}
+
+		/// unique_ptr
+		/// Move assignment
+		template <typename U, typename E>
+		typename enable_if<!is_array<U>::value && is_convertible<typename unique_ptr<U, E>::pointer, pointer>::value && is_assignable<deleter_type&, E&&>::value, this_type&>::type
+		operator=(unique_ptr<U, E>&& u) EA_NOEXCEPT
+		{
+			reset(u.release());
+			mPair.second() = eastl::move(eastl::forward<E>(u.get_deleter()));
+			return *this;
+		}
+
+		/// operator=(nullptr_t)
+		this_type& operator=(std::nullptr_t) EA_NOEXCEPT
+		{
+			reset();
+			return *this;
+		}
+
+		/// ~unique_ptr
+		/// Destroys the owned pointer. The destructor for the object
+		/// referred to by the owned pointer will be called.
+		~unique_ptr() EA_NOEXCEPT
+		{
+			reset();
+		}
+
+		/// reset
+		/// Deletes the owned pointer and takes ownership of the 
+		/// passed in pointer. If the passed in pointer is the same
+		/// as the owned pointer, nothing is done.
+		/// Example usage:
+		///    unique_ptr<int> ptr(new int(3));
+		///    ptr.reset(new int(4));  // deletes int(3)
+		///    ptr.reset(NULL);        // deletes int(4)
+		void reset(pointer pValue = pointer()) EA_NOEXCEPT
+		{
+			if (pValue != mPair.first())
+			{
+				if (auto first = eastl::exchange(mPair.first(), pValue))
+					get_deleter()(first);
+			}
+		}
+
+		/// release
+		/// This simply forgets the owned pointer. It doesn't 
+		/// free it but rather assumes that the user does.
+		/// Example usage:
+		///    unique_ptr<int> ptr(new int(3));
+		///    int* pInt = ptr.release();
+		///    delete pInt;
+		pointer release() EA_NOEXCEPT
+		{
+			pointer const pTemp = mPair.first();
+			mPair.first() = pointer();
+			return pTemp;
+		}
+
+		/// detach
+		/// For backwards-compatibility with pre-C++11 code.
+		pointer detach() EA_NOEXCEPT { return release(); } 
+
+		/// swap
+		/// Exchanges the owned pointer beween two unique_ptr objects. 
+		void swap(this_type& x) EA_NOEXCEPT
+		{
+			mPair.swap(x.mPair);
+		}
+
+		/// operator*
+		/// Returns the owner pointer dereferenced.
+		/// Example usage:
+		///    unique_ptr<int> ptr(new int(3));
+		///    int x = *ptr;
+		typename add_lvalue_reference<T>::type operator*() const // Not noexcept, because the pointer may be NULL.
+		{
+			return *mPair.first();
+		}
+
+		/// operator->
+		/// Allows access to the owned pointer via operator->()
+		/// Example usage:
+		///    struct X{ void DoSomething(); }; 
+		///    unique_ptr<int> ptr(new X);
+		///    ptr->DoSomething();
+		pointer operator->() const EA_NOEXCEPT
+		{
+			return mPair.first();
+		}
+
+		/// get
+		/// Returns the owned pointer. Note that this class does 
+		/// not provide an operator T() function. This is because such
+		/// a thing (automatic conversion) is deemed unsafe.
+		/// Example usage:
+		///    struct X{ void DoSomething(); }; 
+		///    unique_ptr<int> ptr(new X);
+		///    X* pX = ptr.get();
+		///    pX->DoSomething();
+		pointer get() const EA_NOEXCEPT
+		{
+			return mPair.first();
+		}
+
+		/// get_deleter
+		/// Returns the deleter used to delete the owned pointer
+		/// Example usage:
+		/// unique_ptr<int> ptr(new int(3));
+		/// eastl::smart_ptr_deleter<int>& del = ptr.get_deleter();
+		deleter_type& get_deleter() EA_NOEXCEPT
+		{
+			return mPair.second();
+		}
+
+		/// get_deleter
+		/// Const version for getting the deleter
+		const deleter_type& get_deleter() const EA_NOEXCEPT
+		{
+			return mPair.second();
+		}
+
+		#ifdef EA_COMPILER_NO_EXPLICIT_CONVERSION_OPERATORS
+			/// Note that below we do not use operator bool(). The reason for this
+			/// is that booleans automatically convert up to short, int, float, etc.
+			/// The result is that this: if(uniquePtr == 1) would yield true (bad).
+			typedef T* (this_type::*bool_)() const;
+			operator bool_() const EA_NOEXCEPT
+			{
+				if(mPair.first())
+					return &this_type::get;
+				return NULL;
+			}
+
+			bool operator!() const EA_NOEXCEPT
+			{
+				return (mPair.first() == pointer());
+			}
+		#else
+			/// operator bool
+			/// Allows for using a unique_ptr as a boolean. 
+			/// Example usage:
+			///    unique_ptr<int> ptr(new int(3));
+			///    if(ptr)
+			///        ++*ptr;
+			///
+			explicit operator bool() const EA_NOEXCEPT
+			{ 
+				return (mPair.first() != pointer());
+			}
+		#endif
+
+		/// These functions are deleted in order to prevent copying, for safety.
+		unique_ptr(const this_type&) = delete;
+		unique_ptr& operator=(const this_type&) = delete;
+		unique_ptr& operator=(pointer pValue) = delete;
+
+	protected:
+		eastl::compressed_pair<pointer, deleter_type> mPair;
+	}; // class unique_ptr
+
+
+
+	/// unique_ptr specialization for unbounded arrays.
+	///
+	/// Differences from unique_ptr<T>:
+	///     - Conversions between different types of unique_ptr<T[], D> or to or 
+	///       from the non-array forms of unique_ptr produce an ill-formed program.
+	///     - Pointers to types derived from T are rejected by the constructors, and by reset.
+	///     - The observers operator* and operator-> are not provided.
+	///     - The indexing observer operator[] is provided.
+	///     - The default deleter will call delete[].
+	/// 
+	/// It's not possible to create a unique_ptr for arrays of a known bound (e.g. int[4] as opposed to int[]).
+	///
+	/// Example usage:
+	///     unique_ptr<int[]> ptr(new int[10]);
+	///     ptr[4] = 4;
+	///
+	template <typename T, typename Deleter>
+	class unique_ptr<T[], Deleter>
+	{
+	public:
+		typedef Deleter                                                                  deleter_type;
+		typedef T                                                                        element_type;
+		typedef unique_ptr<element_type[], deleter_type>                                 this_type;
+		typedef typename Internal::unique_pointer_type<element_type, deleter_type>::type pointer;
+
+	public:
+		EA_CPP14_CONSTEXPR unique_ptr() EA_NOEXCEPT
+			: mPair(pointer())
+		{
+			static_assert(!eastl::is_pointer<deleter_type>::value, "unique_ptr deleter default-constructed with null pointer. Use a different constructor or change your deleter to a class.");
+		}
+
+		EA_CPP14_CONSTEXPR unique_ptr(std::nullptr_t) EA_NOEXCEPT
+			: mPair(pointer())
+		{
+			static_assert(!eastl::is_pointer<deleter_type>::value, "unique_ptr deleter default-constructed with null pointer. Use a different constructor or change your deleter to a class.");
+		}
+
+		template <typename P,
+		          typename = eastl::enable_if_t<Internal::is_array_cv_convertible<P, pointer>::value>> // Pointers to types derived from T are rejected by the constructors, and by reset.
+		 explicit unique_ptr(P pArray) EA_NOEXCEPT
+		    : mPair(pArray)
+		{
+			static_assert(!eastl::is_pointer<deleter_type>::value,
+			              "unique_ptr deleter default-constructed with null pointer. Use a different constructor or "
+			              "change your deleter to a class.");
+		}
+
+		template <typename P>
+		unique_ptr(P pArray, typename eastl::conditional<eastl::is_reference<deleter_type>::value, deleter_type,
+														typename eastl::add_lvalue_reference<const deleter_type>::type>::type deleter,
+														typename eastl::enable_if<Internal::is_array_cv_convertible<P, pointer>::value>::type* = 0) EA_NOEXCEPT
+			: mPair(pArray, deleter) {}
+
+		template <typename P>
+		unique_ptr(P pArray, typename eastl::remove_reference<deleter_type>::type&& deleter, eastl::enable_if_t<Internal::is_array_cv_convertible<P, pointer>::value>* = 0) EA_NOEXCEPT
+			: mPair(pArray, eastl::move(deleter))
+		{
+			static_assert(!eastl::is_reference<deleter_type>::value, "deleter_type reference refers to an rvalue deleter. The reference will probably become invalid before used. Change the deleter_type to not be a reference or construct with permanent deleter.");
+		}
+
+		unique_ptr(this_type&& x) EA_NOEXCEPT
+			: mPair(x.release(), eastl::forward<deleter_type>(x.get_deleter())) {}
+
+		template <typename U, typename E>
+		unique_ptr(unique_ptr<U, E>&& u, typename enable_if<Internal::is_safe_array_conversion<T, pointer, U, typename unique_ptr<U, E>::pointer>::value && 
+															eastl::is_convertible<E, deleter_type>::value &&
+														   (!eastl::is_reference<deleter_type>::value || eastl::is_same<E, deleter_type>::value)>::type* = 0) EA_NOEXCEPT
+			: mPair(u.release(), eastl::forward<E>(u.get_deleter())) {}
+
+		this_type& operator=(this_type&& x) EA_NOEXCEPT
+		{
+			reset(x.release());
+			mPair.second() = eastl::move(eastl::forward<deleter_type>(x.get_deleter()));
+			return *this;
+		}
+
+		template <typename U, typename E>
+		typename enable_if<Internal::is_safe_array_conversion<T, pointer, U, typename unique_ptr<U, E>::pointer>::value && is_assignable<deleter_type&, E&&>::value, this_type&>::type
+		operator=(unique_ptr<U, E>&& u) EA_NOEXCEPT
+		{
+			reset(u.release());
+			mPair.second() = eastl::move(eastl::forward<E>(u.get_deleter()));
+			return *this;
+		}
+
+		this_type& operator=(std::nullptr_t) EA_NOEXCEPT
+		{
+			reset();
+			return *this;
+		}
+
+		~unique_ptr() EA_NOEXCEPT
+		{
+			reset();
+		}
+
+		void reset(pointer pArray = pointer()) EA_NOEXCEPT
+		{
+			if(pArray != mPair.first())
+			{
+				if (auto first = eastl::exchange(mPair.first(), pArray))
+					get_deleter()(first);
+			}
+		}
+
+		pointer release() EA_NOEXCEPT
+		{
+			pointer const pTemp = mPair.first();
+			mPair.first() = pointer();
+			return pTemp;
+		}
+
+		/// detach
+		/// For backwards-compatibility with pre-C++11 code.
+		pointer detach() EA_NOEXCEPT { return release(); }
+
+		void swap(this_type& x) EA_NOEXCEPT
+		{
+			mPair.swap(x.mPair);
+		}
+
+		/// operator[]
+		/// Returns a reference to the specified item in the owned pointer
+		/// array. 
+		/// Example usage:
+		///    unique_ptr<int> ptr(new int[6]);
+		///    int x = ptr[2];
+		typename add_lvalue_reference<T>::type operator[](ptrdiff_t i) const
+		{
+			// assert(mpArray && (i >= 0));
+			return mPair.first()[i];
+		}
+
+		pointer get() const EA_NOEXCEPT
+		{
+			return mPair.first();
+		}
+
+		deleter_type& get_deleter() EA_NOEXCEPT
+		{
+			return mPair.second();
+		}
+
+		const deleter_type& get_deleter() const EA_NOEXCEPT
+		{
+			return mPair.second();
+		}
+
+		#ifdef EA_COMPILER_NO_EXPLICIT_CONVERSION_OPERATORS
+			typedef T* (this_type::*bool_)() const;
+			operator bool_() const EA_NOEXCEPT
+			{
+				if(mPair.first())
+					return &this_type::get;
+				return NULL;
+			}
+
+			bool operator!() const EA_NOEXCEPT
+			{
+				return (mPair.first() == pointer());
+			}
+		#else
+			explicit operator bool() const EA_NOEXCEPT
+			{ 
+				return (mPair.first() != pointer());
+			}
+		#endif
+
+		/// These functions are deleted in order to prevent copying, for safety.
+		unique_ptr(const this_type&) = delete;
+		unique_ptr& operator=(const this_type&) = delete;
+		unique_ptr& operator=(pointer pArray) = delete;
+
+	protected:
+		eastl::compressed_pair<pointer, deleter_type> mPair;
+	};
+
+
+
+	/// make_unique
+	///
+	/// The C++11 Standard doesn't have make_unique, but there's no agreed reason as to why.
+	/// http://stackoverflow.com/questions/12580432/why-does-c11-have-make-shared-but-not-make-unique
+	/// http://herbsutter.com/2013/05/29/gotw-89-solution-smart-pointers/
+	/// Herb's solution is OK but doesn't support unique_ptr<[]> (array version). We do the same
+	/// thing libc++ does and make a specialization of make_unique for arrays.
+	///
+	/// make_unique has two cases where you can't use it and need to directly use unique_ptr:
+	///     - You need to construct the unique_ptr with a raw pointer.
+	///     - You need to specify a custom deleter.
+	///
+	/// Note: This function uses global new T by default to create the ptr instance, as per 
+	/// the C++11 Standard make_shared_ptr.
+	///
+	/// Example usage:
+	///     struct Test{ Test(int, int){} };
+	///     auto p = make_unique<Test>(1, 2);
+	///
+	///     auto pArray = make_unique<Test[]>(4);
+	///
+	namespace Internal
+	{
+		template <typename T>
+		struct unique_type
+			{ typedef unique_ptr<T>   unique_type_single; };
+
+		template <typename T>
+		struct unique_type<T[]>
+			{ typedef unique_ptr<T[]> unique_type_unbounded_array; };
+
+		template <typename T, size_t N>
+		struct unique_type<T[N]>
+			{ typedef void            unique_type_bounded_array; };
+	}
+
+	template <typename T, typename... Args>
+	inline typename Internal::unique_type<T>::unique_type_single make_unique(Args&&... args)
+		{ return unique_ptr<T>(new T(eastl::forward<Args>(args)...)); }
+
+	template <typename T>
+	inline typename Internal::unique_type<T>::unique_type_unbounded_array make_unique(size_t n)
+	{
+		typedef typename eastl::remove_extent<T>::type TBase;
+		return unique_ptr<T>(new TBase[n]);
+	}
+
+	// It's not possible to create a unique_ptr for arrays of a known bound (e.g. int[4] as opposed to int[]).
+	template <typename T, typename... Args>
+	typename Internal::unique_type<T>::unique_type_bounded_array
+	make_unique(Args&&...) = delete;
+
+
+
+
+	/// hash specialization for unique_ptr.
+	/// It simply returns eastl::hash(x.get()). If your unique_ptr pointer type (the return value of unique_ptr<T>::get) is 
+	/// a custom type and not a built-in pointer type then you will need to independently define eastl::hash for that type.
+	template <typename T, typename D>
+	struct hash< unique_ptr<T, D> >
+	{
+		size_t operator()(const unique_ptr<T, D>& x) const EA_NOEXCEPT
+			{ return eastl::hash<typename unique_ptr<T, D>::pointer>()(x.get()); }
+	};
+
+	/// swap
+	/// Exchanges the owned pointer beween two unique_ptr objects.
+	/// This non-member version is useful for compatibility of unique_ptr
+	/// objects with the C++ Standard Library and other libraries.
+	template <typename T, typename D>
+	inline void swap(unique_ptr<T, D>& a, unique_ptr<T, D>& b) EA_NOEXCEPT
+	{
+		a.swap(b);
+	}
+
+
+	template <typename T1, typename D1, typename T2, typename D2>
+	inline bool operator==(const unique_ptr<T1, D1>& a, const unique_ptr<T2, D2>& b)
+	{
+		return (a.get() == b.get());
+	}
+
+	template <typename T1, typename D1, typename T2, typename D2>
+	inline bool operator!=(const unique_ptr<T1, D1>& a, const unique_ptr<T2, D2>& b)
+	{
+		return !(a.get() == b.get());
+	}
+
+	/// Returns which unique_ptr is 'less' than the other. Useful when storing
+	/// sorted containers of unique_ptr objects.
+	template <typename T1, typename D1, typename T2, typename D2>
+	inline bool operator<(const unique_ptr<T1, D1>& a, const unique_ptr<T2, D2>& b)
+	{
+		//typedef typename eastl::unique_ptr<T1, D1>::pointer P1;       // We currently need to make these temporary variables, as otherwise clang complains about CPointer being int*&&&.
+		//typedef typename eastl::unique_ptr<T2, D2>::pointer P2;       // I think there's something wrong with our common_type type trait implementation.
+		//typedef typename eastl::common_type<P1, P2>::type   PCommon;  // "in instantiation of function template specialization 'eastl::operator<<int, int>, no known conversion from 'element_type *' (aka 'int *') to 'int *&&&' for 1st argument"
+		//return less<PCommon>()(a.get(), b.get());                     // It looks like common_type is making CPointer be (e.g.) int*&& instead of int*, though the problem may be in how less<> deals with that.
+
+		typedef typename eastl::unique_ptr<T1, D1>::pointer P1;
+		typedef typename eastl::unique_ptr<T2, D2>::pointer P2;
+		typedef typename eastl::common_type<P1, P2>::type   PCommon;
+		PCommon pT1 = a.get();
+		PCommon pT2 = b.get();
+		return less<PCommon>()(pT1, pT2);
+	}
+
+	template <typename T1, typename D1, typename T2, typename D2>
+	inline bool operator>(const unique_ptr<T1, D1>& a, const unique_ptr<T2, D2>& b) 
+	{
+		return (b < a);
+	}
+
+	template <typename T1, typename D1, typename T2, typename D2>
+	inline bool operator<=(const unique_ptr<T1, D1>& a, const unique_ptr<T2, D2>& b) 
+	{
+		return !(b < a);
+	}
+
+	template <typename T1, typename D1, typename T2, typename D2>
+	inline bool operator>=(const unique_ptr<T1, D1>& a, const unique_ptr<T2, D2>& b) 
+	{
+		return !(a < b);
+	}
+
+
+	template <typename T, typename D>
+	inline bool operator==(const unique_ptr<T, D>& a, std::nullptr_t) EA_NOEXCEPT
+	{
+		return !a;
+	}
+
+	template <typename T, typename D>
+	inline bool operator==(std::nullptr_t, const unique_ptr<T, D>& a) EA_NOEXCEPT
+	{
+		return !a;
+	}
+
+	template <typename T, typename D>
+	inline bool operator!=(const unique_ptr<T, D>& a, std::nullptr_t) EA_NOEXCEPT
+	{
+		return static_cast<bool>(a);
+	}
+
+	template <typename T, typename D>
+	inline bool operator!=(std::nullptr_t, const unique_ptr<T, D>& a) EA_NOEXCEPT
+	{
+		return static_cast<bool>(a);
+	}
+
+	template <typename T, typename D>
+	inline bool operator<(const unique_ptr<T, D>& a, std::nullptr_t)
+	{
+		typedef typename unique_ptr<T, D>::pointer pointer;
+		return less<pointer>()(a.get(), nullptr);
+	}
+
+	template <typename T, typename D>
+	inline bool operator<(std::nullptr_t, const unique_ptr<T, D>& b)
+	{
+		typedef typename unique_ptr<T, D>::pointer pointer;
+		pointer pT = b.get();
+		return less<pointer>()(nullptr, pT);
+	}
+
+	template <typename T, typename D>
+	inline bool operator>(const unique_ptr<T, D>& a, std::nullptr_t)
+	{
+		return (nullptr < a);
+	}
+
+	template <typename T, typename D>
+	inline bool operator>(std::nullptr_t, const unique_ptr<T, D>& b)
+	{
+		return (b < nullptr);
+	}
+
+	template <typename T, typename D>
+	inline bool operator<=(const unique_ptr<T, D>& a, std::nullptr_t)
+	{
+		return !(nullptr < a);
+	}
+
+	template <typename T, typename D>
+	inline bool operator<=(std::nullptr_t, const unique_ptr<T, D>& b)
+	{
+		return !(b < nullptr);
+	}
+
+	template <typename T, typename D>
+	inline bool operator>=(const unique_ptr<T, D>& a, std::nullptr_t)
+	{
+		return !(a < nullptr);
+	}
+
+	template <typename T, typename D>
+	inline bool operator>=(std::nullptr_t, const unique_ptr<T, D>& b)
+	{
+		return !(nullptr < b);
+	}
+
+
+} // namespace eastl
+
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/unordered_map.h b/libkram/eastl/include/EASTL/unordered_map.h
new file mode 100644
index 00000000..10c6b882
--- /dev/null
+++ b/libkram/eastl/include/EASTL/unordered_map.h
@@ -0,0 +1,55 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_UNORDERED_MAP_H
+#define EASTL_UNORDERED_MAP_H
+
+#include <EASTL/internal/config.h>
+#include <EASTL/hash_map.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+namespace eastl
+{
+	/// unordered_map 
+	///
+	/// The original TR1 (technical report 1) used "hash_map" to name a hash
+	/// table backed associative container of unique key-value pairs.  When the
+	/// container was added to the C++11 standard the committee chose the name
+	/// "unordered_map" to clarify that internally the elements are NOT sorted in
+	/// any particular order.  We provide a template alias here to ensure feature
+	/// parity with the original eastl::hash_map.
+	///
+	#if !defined(EA_COMPILER_NO_TEMPLATE_ALIASES)
+		template <typename Key,
+				  typename T,
+				  typename Hash = eastl::hash<Key>,
+				  typename Predicate = eastl::equal_to<Key>,
+				  typename Allocator = EASTLAllocatorType,
+				  bool bCacheHashCode = false>
+		using unordered_map = hash_map<Key, T, Hash, Predicate, Allocator, bCacheHashCode>;
+    #endif
+
+
+    /// unordered_multimap
+	///
+	/// Similar template alias as "unordered_map" except the contained elements
+	/// need not be unique. See "hash_multimap" for more details.
+	///
+	#if !defined(EA_COMPILER_NO_TEMPLATE_ALIASES)
+		template <typename Key,
+				  typename T,
+				  typename Hash = eastl::hash<Key>,
+				  typename Predicate = eastl::equal_to<Key>,
+				  typename Allocator = EASTLAllocatorType,
+				  bool bCacheHashCode = false>
+		using unordered_multimap = hash_multimap<Key, T, Hash, Predicate, Allocator, bCacheHashCode>;
+    #endif
+
+} // namespace eastl
+
+#endif // Header include guard
+
diff --git a/libkram/eastl/include/EASTL/unordered_set.h b/libkram/eastl/include/EASTL/unordered_set.h
new file mode 100644
index 00000000..ecd7219f
--- /dev/null
+++ b/libkram/eastl/include/EASTL/unordered_set.h
@@ -0,0 +1,53 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_UNORDERED_SET_H
+#define EASTL_UNORDERED_SET_H
+
+#include <EASTL/internal/config.h>
+#include <EASTL/hash_set.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+namespace eastl
+{
+
+	/// unordered_set 
+	///
+	/// The original TR1 (technical report 1) used "hash_set" to name a hash
+	/// table backed associative container of unique "Key" type objects.  When
+	/// the container was added to the C++11 standard the committee chose the
+	/// name "unordered_set" to clarify that internally the elements are NOT
+	/// sorted in any particular order.  We provide a template alias here to
+	/// ensure feature parity with the original eastl::hash_set.
+	///
+	#if !defined(EA_COMPILER_NO_TEMPLATE_ALIASES)
+		template <typename Value,
+				  typename Hash = eastl::hash<Value>,
+				  typename Predicate = eastl::equal_to<Value>,
+				  typename Allocator = EASTLAllocatorType,
+				  bool bCacheHashCode = false>
+		using unordered_set = hash_set<Value, Hash, Predicate, Allocator, bCacheHashCode>;
+	#endif
+
+    /// unordered_multiset 
+	///
+	/// Similar template alias as "unordered_set" except the contained elements
+	/// need not be unique. See "hash_multiset" for more details. 
+	///
+	#if !defined(EA_COMPILER_NO_TEMPLATE_ALIASES)
+		template <typename Value,
+				  typename Hash = eastl::hash<Value>,
+				  typename Predicate = eastl::equal_to<Value>,
+				  typename Allocator = EASTLAllocatorType,
+				  bool bCacheHashCode = false>
+		using unordered_multiset = hash_multiset<Value, Hash, Predicate, Allocator, bCacheHashCode>;
+	#endif
+
+} // namespace eastl
+
+#endif // Header include guard
+
diff --git a/libkram/eastl/include/EASTL/utility.h b/libkram/eastl/include/EASTL/utility.h
new file mode 100644
index 00000000..cc546fb0
--- /dev/null
+++ b/libkram/eastl/include/EASTL/utility.h
@@ -0,0 +1,872 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_UTILITY_H
+#define EASTL_UTILITY_H
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/type_traits.h>
+#include <EASTL/iterator.h>
+#include <EASTL/internal/functional_base.h>
+#include <EASTL/internal/move_help.h>
+#include <EABase/eahave.h>
+
+#include <EASTL/internal/integer_sequence.h>
+#include <EASTL/internal/tuple_fwd_decls.h>
+#include <EASTL/internal/in_place_t.h>
+#include <EASTL/internal/piecewise_construct_t.h>
+
+
+// 4619 - There is no warning number 'number'.
+// 4217 - Member template functions cannot be used for copy-assignment or copy-construction.
+// 4512 - 'class' : assignment operator could not be generated.  // This disabling would best be put elsewhere.
+EA_DISABLE_VC_WARNING(4619 4217 4512);
+
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+
+	/// swap
+	///
+	/// Assigns the contents of a to b and the contents of b to a. 
+	/// A temporary instance of type T is created and destroyed
+	/// in the process.
+	///
+	/// This function is used by numerous other algorithms, and as 
+	/// such it may in some cases be feasible and useful for the user 
+	/// to implement an override version of this function which is 
+	/// more efficient in some way. 
+	///
+
+	template <typename T> 
+	inline void swap(T& a, T& b) EA_NOEXCEPT_IF(eastl::is_nothrow_move_constructible<T>::value && eastl::is_nothrow_move_assignable<T>::value)
+	{
+		T temp(EASTL_MOVE(a));  // EASTL_MOVE uses EASTL::move when available, else is a no-op.
+		a = EASTL_MOVE(b);
+		b = EASTL_MOVE(temp);
+	}
+
+
+	/// is_swappable
+	///
+	/// Determines if two types can be swapped via the swap function. This determines
+	/// only if there is a swap function that matches the types and not if the assignments
+	/// within the swap implementation are valid.
+	/// Returns false for pre-C++11 compilers that don't support decltype.
+	///
+	/// This is a type trait, but it's not currently found within <type_traits.h>,
+	/// as it's dependent on the swap algorithm, which is at a higher level than
+	/// type traits.
+	///
+	/// Example usage:
+	///     static_assert(is_swappable<int>::value, "int should be swappable");
+	///
+	#if defined(EA_COMPILER_NO_DECLTYPE)
+		#define EASTL_TYPE_TRAIT_is_swappable_CONFORMANCE 0
+
+		template <typename>
+		struct is_swappable
+			: public eastl::false_type {};
+	#else
+		#define EASTL_TYPE_TRAIT_is_swappable_CONFORMANCE 1
+
+		// We declare this version of 'eastl::swap' to make compile-time existance checks for swap functions possible.  
+		//
+		#if EASTL_VARIADIC_TEMPLATES_ENABLED
+			eastl::unused swap(eastl::argument_sink, eastl::argument_sink);
+		#else
+			// Compilers that do not support variadic templates suffer from a bug with variable arguments list that
+			// causes the construction of aligned types in unaligned memory. To prevent the aligned type construction we
+			// accept the parameters by reference.
+			eastl::unused swap(eastl::argument_sink&, eastl::argument_sink&);
+		#endif
+
+		template <typename T>
+		struct is_swappable
+			: public integral_constant<bool, !eastl::is_same<decltype(swap(eastl::declval<T&>(), eastl::declval<T&>())), eastl::unused>::value> {}; // Don't prefix swap with eastl:: as we want to allow user-defined swaps via argument-dependent lookup.
+	#endif
+	
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+        template <class T>
+        EA_CONSTEXPR bool is_swappable_v = is_swappable<T>::value;
+    #endif
+
+
+
+	/// is_nothrow_swappable
+	///
+	/// Evaluates to true if is_swappable, and swap is a nothrow function.
+	/// returns false for pre-C++11 compilers that don't support nothrow.
+	///
+	/// This is a type trait, but it's not currently found within <type_traits.h>,
+	/// as it's dependent on the swap algorithm, which is at a higher level than
+	/// type traits.
+	///
+	#define EASTL_TYPE_TRAIT_is_nothrow_swappable_CONFORMANCE EASTL_TYPE_TRAIT_is_swappable_CONFORMANCE
+
+	template <typename T>
+	struct is_nothrow_swappable_helper_noexcept_wrapper
+		{ const static bool value = noexcept(swap(eastl::declval<T&>(), eastl::declval<T&>())); };
+
+	template <typename T, bool>
+	struct is_nothrow_swappable_helper
+		: public eastl::integral_constant<bool, is_nothrow_swappable_helper_noexcept_wrapper<T>::value> {}; // Don't prefix swap with eastl:: as we want to allow user-defined swaps via argument-dependent lookup.
+
+	template <typename T>
+	struct is_nothrow_swappable_helper<T, false>
+		: public eastl::false_type {};
+
+	template <typename T>
+	struct is_nothrow_swappable
+		: public eastl::is_nothrow_swappable_helper<T, eastl::is_swappable<T>::value> {};
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+        template <class T>
+        EA_CONSTEXPR bool is_nothrow_swappable_v = is_nothrow_swappable<T>::value;
+    #endif
+
+
+	
+	/// is_swappable_with
+	///
+	///
+	template <typename T, typename U, bool OneTypeIsVoid = (eastl::is_void<T>::value || eastl::is_void<U>::value)>
+	struct is_swappable_with_helper
+	{
+		// Don't prefix swap with eastl:: as we want to allow user-defined swaps via argument-dependent lookup.
+	    static const bool value =
+	        !eastl::is_same<decltype(swap(eastl::declval<T>(), eastl::declval<U>())), eastl::unused>::value &&
+	        !eastl::is_same<decltype(swap(eastl::declval<U>(), eastl::declval<T>())), eastl::unused>::value;
+    };
+
+	template <typename T, typename U>
+	struct is_swappable_with_helper<T,U, true> { static const bool value = false; };
+
+    template<typename T, typename U>
+	struct is_swappable_with 
+			: public eastl::bool_constant<is_swappable_with_helper<T, U>::value> {}; 
+
+	#if EASTL_VARIABLE_TEMPLATES_ENABLED
+        template <class T, class U>
+        EA_CONSTEXPR bool is_swappable_with_v = is_swappable_with<T, U>::value;
+    #endif
+
+
+	
+	/// is_nothrow_swappable_with
+	///
+	///
+	#if defined(EA_COMPILER_NO_DECLTYPE) || defined(EA_COMPILER_NO_NOEXCEPT) 
+		#define EASTL_TYPE_TRAIT_is_nothrow_swappable_with_CONFORMANCE 0
+		template <typename T, typename U>
+		struct is_nothrow_swappable_with_helper { static const bool value = false; };
+	#else
+		#define EASTL_TYPE_TRAIT_is_nothrow_swappable_with_CONFORMANCE 1
+		template <typename T, typename U, bool OneTypeIsVoid = (eastl::is_void<T>::value || eastl::is_void<U>::value)>
+		struct is_nothrow_swappable_with_helper
+		{
+	        // Don't prefix swap with eastl:: as we want to allow user-defined swaps via argument-dependent lookup.
+	        static const bool value = noexcept(swap(eastl::declval<T>(), eastl::declval<U>())) &&
+	                                  noexcept(swap(eastl::declval<U>(), eastl::declval<T>()));
+        };
+
+		template <typename T, typename U>
+		struct is_nothrow_swappable_with_helper<T,U, true> { static const bool value = false; };
+	#endif
+
+    template <typename T, typename U>
+    struct is_nothrow_swappable_with : public eastl::bool_constant<is_nothrow_swappable_with_helper<T, U>::value> {};
+
+    #if EASTL_VARIABLE_TEMPLATES_ENABLED
+        template <class T, class U>
+        EA_CONSTEXPR bool is_nothrow_swappable_with_v = is_nothrow_swappable_with<T, U>::value;
+    #endif
+	
+
+
+	// iter_swap helper functions
+	//
+	template <bool bTypesAreEqual>
+	struct iter_swap_impl
+	{
+		// Handles the false case, where *a and *b are different types.
+		template <typename ForwardIterator1, typename ForwardIterator2>
+		static void iter_swap(ForwardIterator1 a, ForwardIterator2 b)
+		{
+			typedef typename eastl::iterator_traits<ForwardIterator1>::value_type value_type_a;
+
+			value_type_a temp(EASTL_MOVE(*a)); // EASTL_MOVE uses EASTL::move when available, else is a no-op.
+			*a = EASTL_MOVE(*b);
+			*b = EASTL_MOVE(temp); 
+		}
+	};
+
+	template <>
+	struct iter_swap_impl<true>
+	{
+		template <typename ForwardIterator1, typename ForwardIterator2>
+		static void iter_swap(ForwardIterator1 a, ForwardIterator2 b)
+		{
+			swap(*a, *b);  // Don't prefix swap with eastl:: as we want to allow user-defined swaps via argument-dependent lookup.
+		}
+	};
+
+
+	/// iter_swap
+	///
+	/// Swaps the values of the elements the given iterators are pointing to. 
+	///
+	/// Equivalent to swap(*a, *b), though the user can provide an override to
+	/// iter_swap that is independent of an override which may exist for swap.
+	///
+	/// We provide a version of iter_swap which uses swap when the swapped types 
+	/// are equal but a manual implementation otherwise. We do this because the 
+	/// C++ standard defect report says that iter_swap(a, b) must be implemented 
+	/// as swap(*a, *b) when possible.
+	///
+	template <typename ForwardIterator1, typename ForwardIterator2>
+	inline void iter_swap(ForwardIterator1 a, ForwardIterator2 b)
+	{
+		typedef typename eastl::iterator_traits<ForwardIterator1>::value_type value_type_a;
+		typedef typename eastl::iterator_traits<ForwardIterator2>::value_type value_type_b;
+		typedef typename eastl::iterator_traits<ForwardIterator1>::reference  reference_a;
+		typedef typename eastl::iterator_traits<ForwardIterator2>::reference  reference_b;
+
+		eastl::iter_swap_impl<eastl::type_and<eastl::is_same<value_type_a, value_type_b>::value, eastl::is_same<value_type_a&, reference_a>::value, eastl::is_same<value_type_b&, reference_b>::value >::value >::iter_swap(a, b);
+	}
+
+
+
+	/// swap_ranges
+	///
+	/// Swaps each of the elements in the range [first1, last1) with the 
+	/// corresponding element in the range [first2, first2 + (last1 - first1)). 
+	///
+	/// Effects: For each nonnegative integer n < (last1 - first1),
+	/// performs: swap(*(first1 + n), *(first2 + n)).
+	///
+	/// Requires: The two ranges [first1, last1) and [first2, first2 + (last1 - first1))
+	/// shall not overlap.
+	///
+	/// Returns: first2 + (last1 - first1). That is, returns the end of the second range.
+	///
+	/// Complexity: Exactly 'last1 - first1' swaps.
+	///
+	template <typename ForwardIterator1, typename ForwardIterator2>
+	inline ForwardIterator2
+	swap_ranges(ForwardIterator1 first1, ForwardIterator1 last1, ForwardIterator2 first2)
+	{
+		for(; first1 != last1; ++first1, ++first2)
+			iter_swap(first1, first2); // Don't prefix swap with eastl:: as we want to allow user-defined swaps via argument-dependent lookup.
+		return first2;
+	}
+
+
+	/// swap
+	///
+	/// C++11 array swap
+	/// http://en.cppreference.com/w/cpp/algorithm/swap
+	///
+	template <typename T, size_t N>
+	inline void
+	swap(T (&a)[N], T (&b)[N]) EA_NOEXCEPT_IF(eastl::is_nothrow_swappable<T>::value)
+	{
+		eastl::swap_ranges(a, a + N, b);
+	}
+
+
+	/// exchange
+	///
+	/// Replaces the value of the first argument with the new value provided.  
+	/// The return value is the previous value of first argument.
+	///
+	/// http://en.cppreference.com/w/cpp/utility/exchange
+	///
+	template <typename T, typename U = T>
+	inline T exchange(T& obj, U&& new_value)
+	{
+		T old_value = eastl::move(obj);
+		obj = eastl::forward<U>(new_value);
+		return old_value;
+	}
+
+
+	/// as_const
+	///
+	/// Converts a 'T&' into a 'const T&' which simplifies calling const functions on non-const objects. 
+	///
+	/// http://en.cppreference.com/w/cpp/utility/as_const
+	///
+	/// C++ proposal paper:
+	/// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2015/n4380.html
+	///
+	template <class T>
+	EA_CONSTEXPR typename eastl::add_const<T>::type& as_const(T& t) EA_NOEXCEPT
+		{ return t; }
+
+	// The C++17 forbids 'eastl::as_const' from accepting rvalues.  Passing an rvalue reference to 'eastl::as_const'
+	// generates an 'const T&' or const lvalue reference to a temporary object. 
+	template <class T>
+	void as_const(const T&&) = delete;
+
+
+    ///////////////////////////////////////////////////////////////////////
+	/// rel_ops
+	///
+	/// rel_ops allow the automatic generation of operators !=, >, <=, >= from
+	/// just operators == and <. These are intentionally in the rel_ops namespace
+	/// so that they don't conflict with other similar operators. To use these 
+	/// operators, add "using namespace std::rel_ops;" to an appropriate place in 
+	/// your code, usually right in the function that you need them to work.
+	/// In fact, you will very likely have collision problems if you put such
+	/// using statements anywhere other than in the .cpp file like so and may 
+	/// also have collisions when you do, as the using statement will affect all
+	/// code in the module. You need to be careful about use of rel_ops.
+	///
+	namespace rel_ops
+	{
+		template <typename T>
+		inline bool operator!=(const T& x, const T& y)
+			{ return !(x == y); }
+
+		template <typename T>
+		inline bool operator>(const T& x, const T& y)
+			{ return (y < x); }
+
+		template <typename T>
+		inline bool operator<=(const T& x, const T& y)
+			{ return !(y < x); }
+
+		template <typename T>
+		inline bool operator>=(const T& x, const T& y)
+			{ return !(x < y); }
+	}
+
+
+	///////////////////////////////////////////////////////////////////////
+	/// pair_first_construct
+	///
+	/// Disambiguates when a user is requesting the 'single first element' pair constructor.
+	///
+	struct pair_first_construct_t {};
+	EA_CONSTEXPR pair_first_construct_t pair_first_construct = pair_first_construct_t();
+
+
+	///////////////////////////////////////////////////////////////////////
+	/// pair
+	///
+	/// Implements a simple pair, just like the C++ std::pair.
+	///
+	template <typename T1, typename T2>
+	struct pair
+	{
+		typedef T1           first_type;
+		typedef T2           second_type;
+		typedef pair<T1, T2> this_type;
+
+		T1 first;
+		T2 second;
+
+		template <typename TT1 = T1,
+		          typename TT2 = T2,
+		          class = eastl::enable_if_t<eastl::is_default_constructible_v<TT1> &&
+		                                     eastl::is_default_constructible_v<TT2>>>
+		EA_CONSTEXPR pair()
+		    : first(), second()
+		{
+		}
+
+	#if EASTL_ENABLE_PAIR_FIRST_ELEMENT_CONSTRUCTOR
+		template <typename TT1 = T1, typename TT2 = T2, typename = eastl::enable_if_t<eastl::is_default_constructible_v<TT2>>>
+		EA_CPP14_CONSTEXPR pair(const TT1& x)
+		    : first(x), second()
+		{
+		}
+
+		// GCC has a bug with overloading rvalue and lvalue function templates.	
+		// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=54425	
+		//	
+		// error: 'eastl::pair<T1, T2>::pair(T1&&) [with T1 = const int&; T2 = const int&]' cannot be overloaded	
+		// error: with 'eastl::pair<T1, T2>::pair(const T1&) [with T1 = const int&; T2 = const int&]'
+		#if !defined(EA_COMPILER_GNUC)
+			template <typename TT2 = T2, typename = eastl::enable_if_t<eastl::is_default_constructible_v<TT2>>>
+			EA_CPP14_CONSTEXPR pair(T1&& x)
+				: first(eastl::move(x)), second()
+			{
+			}
+		#endif
+	#endif
+
+	
+	// NOTE(rparolin): 
+	// This is a workaround to a compiler intrinic bug which fails to correctly identify a nested class using
+	// non-static data member initialization as default constructible.  
+	//
+	// See bug submitted to LLVM for more details.
+	// https://bugs.llvm.org/show_bug.cgi?id=38374
+	#if !defined(EA_COMPILER_CLANG)
+		template<typename T>
+		using single_pair_ctor_sfinae = eastl::enable_if_t<eastl::is_default_constructible_v<T>>;
+	#else
+		template<typename>
+		using single_pair_ctor_sfinae = void;
+	#endif
+
+		template <typename TT1 = T1, typename TT2 = T2, typename = single_pair_ctor_sfinae<TT2>>
+		EA_CPP14_CONSTEXPR pair(pair_first_construct_t, const TT1& x)
+		    : first(x), second()
+		{
+		}
+
+		// GCC has a bug with overloading rvalue and lvalue function templates.	
+		// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=54425	
+		//	
+		// error: 'eastl::pair<T1, T2>::pair(T1&&) [with T1 = const int&; T2 = const int&]' cannot be overloaded	
+		// error: with 'eastl::pair<T1, T2>::pair(const T1&) [with T1 = const int&; T2 = const int&]'
+		#if !defined(EA_COMPILER_GNUC)
+			template <typename TT2 = T2, typename = single_pair_ctor_sfinae<TT2>>
+			EA_CPP14_CONSTEXPR pair(pair_first_construct_t, T1&& x)
+				: first(eastl::move(x)), second()
+			{
+			}
+		#endif
+
+		template <
+		    typename TT1 = T1,
+		    typename TT2 = T2,
+		    class = eastl::enable_if_t<eastl::is_copy_constructible_v<TT1> && eastl::is_copy_constructible_v<TT2>>>
+		EA_CPP14_CONSTEXPR pair(const T1& x, const T2& y)
+		    : first(x), second(y)
+		{
+		}
+
+		EA_CPP14_CONSTEXPR pair(pair&& p) = default;
+		EA_CPP14_CONSTEXPR pair(const pair&) = default;
+
+		template <
+		    typename U,
+		    typename V,
+		    class = eastl::enable_if_t<eastl::is_convertible_v<const U&, T1> && eastl::is_convertible_v<const V&, T2>>>
+		EA_CPP14_CONSTEXPR pair(const pair<U, V>& p)
+		    : first(p.first), second(p.second)
+		{
+		}
+
+		template <typename U,
+		          typename V,
+		          typename = eastl::enable_if_t<eastl::is_convertible_v<U, T1> && eastl::is_convertible_v<V, T2>>>
+		EA_CPP14_CONSTEXPR pair(U&& u, V&& v)
+		    : first(eastl::forward<U>(u)), second(eastl::forward<V>(v))
+		{
+		}
+
+		template <typename U, typename = eastl::enable_if_t<eastl::is_convertible_v<U, T1>>>
+		EA_CPP14_CONSTEXPR pair(U&& x, const T2& y)
+			: first(eastl::forward<U>(x)), second(y)
+		{
+		}
+
+		template <typename V, typename = eastl::enable_if_t<eastl::is_convertible_v<V, T2>>>
+		EA_CPP14_CONSTEXPR pair(const T1& x, V&& y)
+			: first(x), second(eastl::forward<V>(y))
+		{
+		}
+
+		template <typename U,
+		          typename V,
+		          typename = eastl::enable_if_t<eastl::is_convertible_v<U, T1> && eastl::is_convertible_v<V, T2>>>
+		EA_CPP14_CONSTEXPR pair(pair<U, V>&& p)
+		    : first(eastl::forward<U>(p.first)), second(eastl::forward<V>(p.second))
+		{
+		}
+
+		// Initializes first with arguments of types Args1... obtained by forwarding the elements of first_args and
+		// initializes second with arguments of types Args2... obtained by forwarding the elements of second_args.
+		template <class... Args1,
+		          class... Args2,
+		          typename = eastl::enable_if_t<eastl::is_constructible_v<first_type, Args1&&...> &&
+		                                        eastl::is_constructible_v<second_type, Args2&&...>>>
+		pair(eastl::piecewise_construct_t pwc, eastl::tuple<Args1...> first_args, eastl::tuple<Args2...> second_args)
+		    : pair(pwc,
+		           eastl::move(first_args),
+		           eastl::move(second_args),
+		           eastl::make_index_sequence<sizeof...(Args1)>(),
+		           eastl::make_index_sequence<sizeof...(Args2)>())
+		{
+		}
+
+	private:
+		// NOTE(rparolin): Internal constructor used to expand the index_sequence required to expand the tuple elements.
+		template <class... Args1, class... Args2, size_t... I1, size_t... I2>
+		pair(eastl::piecewise_construct_t,
+			 eastl::tuple<Args1...> first_args,
+			 eastl::tuple<Args2...> second_args,
+			 eastl::index_sequence<I1...>,
+			 eastl::index_sequence<I2...>)
+			: first(eastl::forward<Args1>(eastl::get<I1>(first_args))...)
+			, second(eastl::forward<Args2>(eastl::get<I2>(second_args))...)
+		{
+		}
+
+	public:
+		pair& operator=(const pair& p)
+		    EA_NOEXCEPT_IF(eastl::is_nothrow_copy_assignable_v<T1>&& eastl::is_nothrow_copy_assignable_v<T2>)
+		{
+			first = p.first;
+			second = p.second;
+			return *this;
+		}
+
+		template <typename U,
+		          typename V,
+		          typename = eastl::enable_if_t<eastl::is_convertible_v<U, T1> && eastl::is_convertible_v<V, T2>>>
+		pair& operator=(const pair<U, V>& p)
+		{
+			first = p.first;
+			second = p.second;
+			return *this;
+		}
+
+		pair& operator=(pair&& p)
+		    EA_NOEXCEPT_IF(eastl::is_nothrow_move_assignable_v<T1>&& eastl::is_nothrow_move_assignable_v<T2>)
+		{
+			first = eastl::forward<T1>(p.first);
+			second = eastl::forward<T2>(p.second);
+			return *this;
+		}
+
+		template <typename U,
+		          typename V,
+		          typename = eastl::enable_if_t<eastl::is_convertible_v<U, T1> && eastl::is_convertible_v<V, T2>>>
+		pair& operator=(pair<U, V>&& p)
+		{
+			first = eastl::forward<U>(p.first);
+			second = eastl::forward<V>(p.second);
+			return *this;
+		}
+
+		void swap(pair& p) EA_NOEXCEPT_IF(eastl::is_nothrow_swappable_v<T1>&& eastl::is_nothrow_swappable_v<T2>)
+		{
+			eastl::iter_swap(&first, &p.first);
+			eastl::iter_swap(&second, &p.second);
+		}
+	};
+
+	#define EASTL_PAIR_CONFORMANCE 1
+
+
+
+	/// use_self
+	///
+	/// operator()(x) simply returns x. Used in sets, as opposed to maps.
+	/// This is a template policy implementation; it is an alternative to 
+	/// the use_first template implementation.
+	///
+	/// The existance of use_self may seem odd, given that it does nothing,
+	/// but these kinds of things are useful, virtually required, for optimal 
+	/// generic programming.
+	///
+	template <typename T>
+	struct use_self             // : public unary_function<T, T> // Perhaps we want to make it a subclass of unary_function.
+	{
+		typedef T result_type;
+
+		const T& operator()(const T& x) const
+			{ return x; }
+	};
+
+	/// use_first
+	///
+	/// operator()(x) simply returns x.first. Used in maps, as opposed to sets.
+	/// This is a template policy implementation; it is an alternative to 
+	/// the use_self template implementation. This is the same thing as the
+	/// SGI SGL select1st utility.
+	///
+	template <typename Pair>
+	struct use_first
+	{
+		typedef Pair argument_type;
+		typedef typename Pair::first_type result_type;
+
+		const result_type& operator()(const Pair& x) const
+			{ return x.first; }
+	};
+
+	/// use_second
+	///
+	/// operator()(x) simply returns x.second. 
+	/// This is the same thing as the SGI SGL select2nd utility
+	///
+	template <typename Pair>
+	struct use_second           // : public unary_function<Pair, typename Pair::second_type> // Perhaps we want to make it a subclass of unary_function.
+	{
+		typedef Pair argument_type;
+		typedef typename Pair::second_type result_type;
+
+		const result_type& operator()(const Pair& x) const
+			{ return x.second; }
+	};
+
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// global operators
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T1, typename T2>
+	EA_CPP14_CONSTEXPR inline bool operator==(const pair<T1, T2>& a, const pair<T1, T2>& b)
+	{
+		return ((a.first == b.first) && (a.second == b.second));
+	}
+
+
+	template <typename T1, typename T2>
+	EA_CPP14_CONSTEXPR inline bool operator<(const pair<T1, T2>& a, const pair<T1, T2>& b)
+	{
+		// Note that we use only operator < in this expression. Otherwise we could
+		// use the simpler: return (a.m1 == b.m1) ? (a.m2 < b.m2) : (a.m1 < b.m1);
+		// The user can write a specialization for this operator to get around this
+		// in cases where the highest performance is required.
+		return ((a.first < b.first) || (!(b.first < a.first) && (a.second < b.second)));
+	}
+
+
+	template <typename T1, typename T2>
+	EA_CPP14_CONSTEXPR inline bool operator!=(const pair<T1, T2>& a, const pair<T1, T2>& b)
+	{
+		return !(a == b);
+	}
+
+
+	template <typename T1, typename T2>
+	EA_CPP14_CONSTEXPR inline bool operator>(const pair<T1, T2>& a, const pair<T1, T2>& b)
+	{
+		return b < a;
+	}
+
+
+	template <typename T1, typename T2>
+	EA_CPP14_CONSTEXPR inline bool operator>=(const pair<T1, T2>& a, const pair<T1, T2>& b)
+	{
+		return !(a < b);
+	}
+
+
+	template <typename T1, typename T2>
+	EA_CPP14_CONSTEXPR inline bool operator<=(const pair<T1, T2>& a, const pair<T1, T2>& b)
+	{
+		return !(b < a);
+	}
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	/// make_pair / make_pair_ref
+	///
+	/// make_pair is the same as std::make_pair specified by the C++ standard.
+	/// If you look at the C++ standard, you'll see that it specifies T& instead of T.
+	/// However, it has been determined that the C++ standard is incorrect and has 
+	/// flagged it as a defect (http://www.open-std.org/jtc1/sc22/wg21/docs/lwg-defects.html#181).
+	/// In case you feel that you want a more efficient version that uses references,
+	/// we provide the make_pair_ref function below, though C++11 move support
+	/// makes that no longer necessary.
+	/// 
+	/// Note: You don't usually need to use make_pair in order to make a pair. 
+	/// The following code is equivalent, and the latter avoids one more level of inlining:
+	///     return make_pair(charPtr, charPtr);
+	///     return pair<char*, char*>(charPtr, charPtr);
+	///
+	template <typename T1, typename T2>
+	EA_CPP14_CONSTEXPR inline pair<typename eastl::remove_reference_wrapper<typename eastl::decay<T1>::type>::type, 
+								   typename eastl::remove_reference_wrapper<typename eastl::decay<T2>::type>::type>
+	make_pair(T1&& a, T2&& b)
+	{
+		typedef typename eastl::remove_reference_wrapper<typename eastl::decay<T1>::type>::type T1Type;
+		typedef typename eastl::remove_reference_wrapper<typename eastl::decay<T2>::type>::type T2Type;
+
+		return eastl::pair<T1Type, T2Type>(eastl::forward<T1>(a), eastl::forward<T2>(b));
+	}
+
+
+	// Without the following, VC++ fails to compile code like this: pair<const char*, int> p = eastl::make_pair<const char*, int>("hello", 0);
+	// We define a const reference version alternative to the above. "hello" is of type char const(&)[6] (array of 6 const chars), 
+	// but VC++ decays it to const char* and allows this make_pair to be called with that. VC++ fails below with make_pair("hello", "people") 
+	// because you can't assign arrays and until we have a better solution we just disable this make_pair specialization for when T1 or T2 
+	// are of type char const(&)[].
+	#if defined(_MSC_VER)
+		template <typename T1, typename T2>
+		EA_CPP14_CONSTEXPR inline pair<T1, T2> make_pair(
+			const T1& a,
+			const T2& b,
+			typename eastl::enable_if<!eastl::is_array<T1>::value && !eastl::is_array<T2>::value>::type* = 0)
+		{
+			return eastl::pair<T1, T2>(a, b);
+		}
+    #endif
+
+	// For backwards compatibility
+	template <typename T1, typename T2>
+	EA_CPP14_CONSTEXPR inline pair<typename eastl::remove_reference_wrapper<typename eastl::decay<T1>::type>::type, 
+								   typename eastl::remove_reference_wrapper<typename eastl::decay<T2>::type>::type>
+	make_pair_ref(T1&& a, T2&& b)
+	{
+		typedef typename eastl::remove_reference_wrapper<typename eastl::decay<T1>::type>::type T1Type;
+		typedef typename eastl::remove_reference_wrapper<typename eastl::decay<T2>::type>::type T2Type;
+
+		return eastl::pair<T1Type, T2Type>(eastl::forward<T1>(a), eastl::forward<T2>(b));
+	}
+
+#if EASTL_TUPLE_ENABLED
+
+		template <typename T1, typename T2>
+		class tuple_size<pair<T1, T2>> : public integral_constant<size_t, 2>
+		{
+		};
+
+		template <typename T1, typename T2>
+		class tuple_size<const pair<T1, T2>> : public integral_constant<size_t, 2>
+		{
+		};
+
+		template <typename T1, typename T2>
+		class tuple_element<0, pair<T1, T2>>
+		{
+		public:
+			typedef T1 type;
+		};
+
+		template <typename T1, typename T2>
+		class tuple_element<1, pair<T1, T2>>
+		{
+		public:
+			typedef T2 type;
+		};
+
+		template <typename T1, typename T2>
+		class tuple_element<0, const pair<T1, T2>>
+		{
+		public:
+			typedef const T1 type;
+		};
+
+		template <typename T1, typename T2>
+		class tuple_element<1, const pair<T1, T2>>
+		{
+		public:
+			typedef const T2 type;
+		};
+
+		template <size_t I>
+		struct GetPair;
+
+		template <>
+		struct GetPair<0>
+		{
+			template <typename T1, typename T2>
+			static EA_CONSTEXPR T1& getInternal(pair<T1, T2>& p)
+			{
+				return p.first;
+			}
+
+			template <typename T1, typename T2>
+			static EA_CONSTEXPR const T1& getInternal(const pair<T1, T2>& p)
+			{
+				return p.first;
+			}
+
+			template <typename T1, typename T2>
+			static EA_CONSTEXPR T1&& getInternal(pair<T1, T2>&& p)
+			{
+				return eastl::forward<T1>(p.first);
+			}
+		};
+
+		template <>
+		struct GetPair<1>
+		{
+			template <typename T1, typename T2>
+			static EA_CONSTEXPR T2& getInternal(pair<T1, T2>& p)
+			{
+				return p.second;
+			}
+
+			template <typename T1, typename T2>
+			static EA_CONSTEXPR const T2& getInternal(const pair<T1, T2>& p)
+			{
+				return p.second;
+			}
+
+			template <typename T1, typename T2>
+			static EA_CONSTEXPR T2&& getInternal(pair<T1, T2>&& p)
+			{
+				return eastl::forward<T2>(p.second);
+			}
+		};
+
+		template <size_t I, typename T1, typename T2>
+		tuple_element_t<I, pair<T1, T2>>& get(pair<T1, T2>& p)
+		{
+			return GetPair<I>::getInternal(p);
+		}
+
+		template <size_t I, typename T1, typename T2>
+		const tuple_element_t<I, pair<T1, T2>>& get(const pair<T1, T2>& p)
+		{
+			return GetPair<I>::getInternal(p);
+		}
+
+		template <size_t I, typename T1, typename T2>
+		tuple_element_t<I, pair<T1, T2>>&& get(pair<T1, T2>&& p)
+		{
+			return GetPair<I>::getInternal(eastl::move(p));
+		}
+
+#endif  // EASTL_TUPLE_ENABLED
+
+
+}  // namespace eastl
+
+///////////////////////////////////////////////////////////////
+// C++17 structured bindings support for eastl::pair
+//
+#ifndef EA_COMPILER_NO_STRUCTURED_BINDING
+	#include <tuple>
+	namespace std
+	{
+		// NOTE(rparolin): Some platform implementations didn't check the standard specification and implemented the
+		// "tuple_size" and "tuple_element" primary template with as a struct.  The standard specifies they are
+		// implemented with the class keyword so we provide the template specializations as a class and disable the
+		// generated warning.
+		EA_DISABLE_CLANG_WARNING(-Wmismatched-tags)
+
+		template <class... Ts>
+		class tuple_size<::eastl::pair<Ts...>> : public ::eastl::integral_constant<size_t, sizeof...(Ts)>
+		{
+		};
+
+		template <size_t I, class... Ts>
+		class tuple_element<I, ::eastl::pair<Ts...>> : public ::eastl::tuple_element<I, ::eastl::pair<Ts...>>
+		{
+		};
+
+		EA_RESTORE_CLANG_WARNING()
+	}
+#endif
+
+
+EA_RESTORE_VC_WARNING();
+
+
+#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/variant.h b/libkram/eastl/include/EASTL/variant.h
new file mode 100644
index 00000000..e59c3007
--- /dev/null
+++ b/libkram/eastl/include/EASTL/variant.h
@@ -0,0 +1,1236 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+
+///////////////////////////////////////////////////////////////////////////
+// Implements the class template variant represents a type-safe union. An
+// instance of variant at any given time either holds a value of one of its
+// alternative types, or it holds no value.
+//
+// As with unions, if a variant holds a value of some object type T, the object
+// representation of T is allocated directly within the object representation of
+// the variant itself. 
+//
+// Variant is not allowed to allocate additional (dynamic) memory.
+//
+// A variant is not permitted to hold references, arrays, or the type void.
+// Empty variants are also ill-formed (variant<monostate> can be used instead).  
+//
+// A variant is permitted to hold the same type more than once, and to hold
+// differently cv-qualified versions of the same type.  As with unions, the
+// default-initialized variant holds a value of its first alternative, unless
+// that alternative is not default-constructible (in which case default
+// constructor won't compile: the helper class monostate can be used to make
+// such variants default-constructible)
+//
+// Given defect 2901, the eastl::variant implementation does not provide the
+// specified allocator-aware functions.  This will be re-evaluated when the LWG
+// addresses this issue in future standardization updates.
+// LWG Defect 2901: https://cplusplus.github.io/LWG/issue2901
+//
+// Allocator-extended constructors
+// template <class Alloc> variant(allocator_arg_t, const Alloc&);
+// template <class Alloc> variant(allocator_arg_t, const Alloc&, const variant&);
+// template <class Alloc> variant(allocator_arg_t, const Alloc&, variant&&);
+// template <class Alloc, class T> variant(allocator_arg_t, const Alloc&, T&&);
+// template <class Alloc, class T, class... Args> variant(allocator_arg_t, const Alloc&, in_place_type_t<T>, Args&&...);
+// template <class Alloc, class T, class U, class... Args> variant(allocator_arg_t, const Alloc&, in_place_type_t<T>, initializer_list<U>, Args&&...);
+// template <class Alloc, size_t I, class... Args> variant(allocator_arg_t, const Alloc&, in_place_index_t<I>, Args&&...);
+// template <class Alloc, size_t I, class U, class... Args> variant(allocator_arg_t, const Alloc&, in_place_index_t<I>, initializer_list<U>, Args&&...);
+//
+// 20.7.12, allocator-related traits
+// template <class T, class Alloc>        struct uses_allocator;
+// template <class... Types, class Alloc> struct uses_allocator<variant<Types...>, Alloc>;
+//
+// eastl::variant doesn't support:
+//  * recursive variant support
+//  * strong exception guarantees as specified (we punted on the assignment problem).
+//    if an exception is thrown during assignment its undefined behaviour in our implementation.
+//
+// Reference: 
+// 	* http://en.cppreference.com/w/cpp/utility/variant
+// 	* https://thenewcpp.wordpress.com/2012/02/15/variadic-templates-part-3-or-how-i-wrote-a-variant-class/
+///////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_VARIANT_H
+#define EASTL_VARIANT_H
+
+#include <EASTL/internal/config.h>
+#include <EASTL/internal/in_place_t.h>
+#include <EASTL/internal/integer_sequence.h>
+#include <EASTL/meta.h>
+#include <EASTL/utility.h>
+#include <EASTL/functional.h>
+#include <EASTL/initializer_list.h>
+#include <EASTL/tuple.h>
+#include <EASTL/type_traits.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+#ifndef EA_COMPILER_CPP14_ENABLED
+	static_assert(false, "eastl::variant requires a C++14 compatible compiler (at least) ");
+#endif
+
+EA_DISABLE_VC_WARNING(4625) // copy constructor was implicitly defined as deleted
+
+namespace eastl
+{
+	namespace internal
+	{
+		///////////////////////////////////////////////////////////////////////////
+		// default_construct_if_supported<T>
+		//
+		// Utility class to remove default constructor calls for types that 
+		// do not support default construction.
+		//
+		// We can remove these utilities when C++17 'constexpr if' is available.
+		//
+		template<typename T, bool = eastl::is_default_constructible_v<T>>
+		struct default_construct_if_supported
+		{
+			static void call(T* pThis)
+			{
+				new (pThis) T();
+			}
+		};
+
+		template<typename T>
+		struct default_construct_if_supported<T, false>
+		{
+			static void call(T*) {} // intentionally blank
+		};
+
+		///////////////////////////////////////////////////////////////////////////
+		// destroy_if_supported<T>
+		//
+		// Utility class to remove default constructor calls for types that 
+		// do not support default construction.
+		//
+		// We can remove these utilities when C++17 'constexpr if' is available.
+		//
+		template<typename T, bool = eastl::is_destructible_v<T>>
+		struct destroy_if_supported
+		{
+			static void call(T* pThis)
+			{
+				pThis->~T();
+			}
+		};
+
+		template<typename T>
+		struct destroy_if_supported<T, false>
+		{
+			static void call(T* pThis) {} // intentionally blank
+		};
+
+		///////////////////////////////////////////////////////////////////////////
+		// copy_if_supported<T>
+		//
+		// Utility class to remove copy constructor calls for types that 
+		// do not support copying.
+		//
+		// We can remove these utilities when C++17 'constexpr if' is available.
+		//
+		template<typename T, bool = eastl::is_copy_constructible_v<T>>
+		struct copy_if_supported
+		{
+			static void call(T* pThis, T* pOther)
+			{
+				new (pThis) T(*pOther);
+			}
+		};
+
+		template<typename T>
+		struct copy_if_supported<T, false>
+		{
+			static void call(T* pThis, T* pOther) {} // intentionally blank
+		};
+
+		///////////////////////////////////////////////////////////////////////////
+		// move_if_supported<T>
+		//
+		// Utility class to remove move constructor calls for types that 
+		// do not support moves.
+		//
+		// We can remove these utilities when C++17 'constexpr if' is available.
+		//
+		template<typename T, bool = eastl::is_move_constructible_v<T>>
+		struct move_if_supported
+		{
+			static void call(T* pThis, T* pOther)
+			{
+				new (pThis) T(eastl::move(*pOther));
+			}
+		};
+
+		template<typename T>
+		struct move_if_supported<T, false>
+		{
+			static void call(T* pThis, T* pOther) {} // intentionally blank
+		};
+	} // namespace internal 
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// 20.7.3, variant_npos
+	//
+	EASTL_CPP17_INLINE_VARIABLE EA_CONSTEXPR size_t variant_npos = size_t(-1);
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// 20.7.10, class bad_variant_access
+	//
+	#if EASTL_EXCEPTIONS_ENABLED
+		struct bad_variant_access : public std::logic_error
+		{
+			bad_variant_access() : std::logic_error("eastl::bad_variant_access exception") {}
+			virtual ~bad_variant_access() EA_NOEXCEPT {}
+		};
+	#endif
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// TODO(rparolin): JUST COPY/PASTE THIS CODE
+	//
+	inline void CheckVariantCondition(bool b)
+	{
+		EA_UNUSED(b);
+	#if EASTL_EXCEPTIONS_ENABLED
+		if (!b)
+			throw bad_variant_access();
+	#elif EASTL_ASSERT_ENABLED
+		EASTL_ASSERT_MSG(b, "eastl::bad_variant_access assert");
+	#endif
+	}
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// 20.7.7, class monostate
+	//
+	// Unit type intended for use as a well-behaved empty alternative in
+	// variant. A variant of non-default-constructible types may list monostate
+	// as its first alternative: this makes the variant itself default-contructible.
+	//
+	struct monostate {};
+
+	// 20.7.8, monostate relational operators
+	EA_CONSTEXPR bool operator> (monostate, monostate) EA_NOEXCEPT { return false; }
+	EA_CONSTEXPR bool operator< (monostate, monostate) EA_NOEXCEPT { return false; }
+	EA_CONSTEXPR bool operator!=(monostate, monostate) EA_NOEXCEPT { return false; }
+	EA_CONSTEXPR bool operator<=(monostate, monostate) EA_NOEXCEPT { return true; }
+	EA_CONSTEXPR bool operator>=(monostate, monostate) EA_NOEXCEPT { return true; }
+	EA_CONSTEXPR bool operator==(monostate, monostate) EA_NOEXCEPT { return true; }
+
+	// 20.7.11, hash support
+	template <class T> struct hash;
+	template <> struct hash<monostate>
+		{ size_t operator()(monostate) const { return static_cast<size_t>(-0x42); } };
+
+	
+	///////////////////////////////////////////////////////////////////////////
+	// variant_storage
+	// 
+	// This is a utility class to simplify the implementation of a storage type
+	// for a distriminted union.  This utility handles the alignment, size
+	// requirements, and data access required by the variant type.
+	//
+	template<bool IsTriviallyDestructible, class... Types>
+	struct variant_storage;
+
+
+	// variant_storage
+	//
+	// specialization for non-trivial types (must call constructors and destructors)
+	//
+	template<class... Types>
+	struct variant_storage<false, Types...>
+	{
+		enum class StorageOp
+		{
+			DEFAULT_CONSTRUCT,
+			DESTROY,
+			COPY,
+			MOVE
+		};
+
+		// handler function
+		using storage_handler_ptr = void(*)(StorageOp, void*, void*);
+		using aligned_storage_impl_t = aligned_union_t<16, Types...>;
+
+		aligned_storage_impl_t mBuffer;
+		storage_handler_ptr mpHandler = nullptr;
+
+		template<typename VariantStorageT>
+		inline void DoOp(StorageOp op, VariantStorageT&& other)  // bind to both rvalue and lvalues
+		{
+			if(mpHandler)
+				DoOp(StorageOp::DESTROY);
+
+			if (other.mpHandler)
+				mpHandler = other.mpHandler;
+
+			if(mpHandler)
+				mpHandler(op, (void*)&mBuffer, (void*)&other.mBuffer);
+		}
+
+		inline void DoOp(StorageOp op)
+		{
+			if(mpHandler)
+				mpHandler(op, &mBuffer, nullptr);
+		}
+
+		template<typename T>
+		static void DoOpImpl(StorageOp op, T* pThis, T* pOther)
+		{
+			switch (op)
+			{
+				case StorageOp::DEFAULT_CONSTRUCT:
+				{
+					internal::default_construct_if_supported<T>::call(pThis);
+				}
+				break;
+
+				case StorageOp::DESTROY:
+				{
+					internal::destroy_if_supported<T>::call(pThis);
+				}
+				break;
+
+				case StorageOp::COPY:
+				{
+					internal::copy_if_supported<T>::call(pThis, pOther);
+				}
+				break;
+
+				case StorageOp::MOVE:
+				{
+					internal::move_if_supported<T>::call(pThis, pOther);
+				}
+				break;
+
+				default: {} break;
+			};
+		}
+
+	public:
+		variant_storage()
+		{
+			DoOp(StorageOp::DEFAULT_CONSTRUCT); 
+		}
+
+		~variant_storage()
+		{
+			DoOp(StorageOp::DESTROY); 
+		}
+
+		variant_storage(const variant_storage& other)
+		{
+			DoOp(StorageOp::COPY, other); 
+		}
+
+		variant_storage(variant_storage&& other)
+		{
+			DoOp(StorageOp::MOVE, other); 
+		}
+
+		variant_storage& operator=(const variant_storage& other)
+		{
+			DoOp(StorageOp::COPY, other); 
+			return *this;
+		}
+
+		variant_storage& operator=(variant_storage&& other)
+		{
+			DoOp(StorageOp::MOVE, eastl::move(other)); 
+			return *this;
+		}
+
+		template <typename T, typename... Args>
+		void set_as(Args&&... args)
+		{
+			// NOTE(rparolin): If this assert fires there is an EASTL problem picking the size of the local buffer which
+			// variant_storage used to store types. The size selected should be large enough to hold the largest type in
+			// the user provided variant type-list.  
+			static_assert(sizeof(aligned_storage_impl_t) >= sizeof(T), "T is larger than local buffer size");
+
+			using RT = remove_reference_t<T>;
+
+			new (&mBuffer) RT(eastl::forward<Args>(args)...);
+
+			mpHandler = (storage_handler_ptr)&DoOpImpl<RT>;
+		}
+
+		template <typename T, typename U, typename... Args>
+		void set_as(std::initializer_list<U> il, Args&&... args)
+		{
+			// NOTE(rparolin): If this assert fires there is an EASTL problem picking the size of the local buffer which
+			// variant_storage used to store types. The size selected should be large enough to hold the largest type in
+			// the user provided variant type-list.  
+			static_assert(sizeof(aligned_storage_impl_t) >= sizeof(T), "T is larger than local buffer size");
+
+			using RT = remove_reference_t<T>;
+
+			new (&mBuffer) RT(il, eastl::forward<Args>(args)...);
+
+			mpHandler = (storage_handler_ptr)&DoOpImpl<RT>;
+		}
+
+		template<typename T>
+		T get_as()
+		{
+			static_assert(eastl::is_pointer_v<T>, "T must be a pointer type");
+			return reinterpret_cast<T>(&mBuffer);
+		}
+
+		template<typename T>
+		const T get_as() const
+		{
+			static_assert(eastl::is_pointer_v<T>, "T must be a pointer type");
+			return reinterpret_cast<const T>(reinterpret_cast<uintptr_t>(&mBuffer));
+		}
+
+		void destroy()
+		{
+			DoOp(StorageOp::DESTROY); 
+		}
+	};
+
+
+	// variant_storage
+	//
+	// specialization for trivial types
+	//
+	template<class... Types>
+	struct variant_storage<true, Types...>
+	{
+		using aligned_storage_impl_t = aligned_union_t<16, Types...>;
+		aligned_storage_impl_t mBuffer;
+
+	public:
+
+		// NOTE(rparolin): Since this is the specialization for trivial types can we potentially remove all the
+		// defaulted special constructors.  Consider removing this.
+		//
+		// variant_storage() = default;
+		// ~variant_storage() = default;
+		// variant_storage(const variant_storage& other) = default;
+		// variant_storage(variant_storage&& other) = default;
+		// variant_storage& operator=(const variant_storage& other) = default;
+		// variant_storage& operator=(variant_storage&& other) = default;
+
+		template <typename T, typename... Args>
+		void set_as(Args&&... args)
+		{
+			// NOTE(rparolin): If this assert fires there is an EASTL problem picking the size of the local buffer which
+			// variant_storage used to store types. The size selected should be large enough to hold the largest type in
+			// the user provided variant type-list.  
+			static_assert(sizeof(aligned_storage_impl_t) >= sizeof(T), "T is larger than local buffer size");
+			new (&mBuffer) remove_reference_t<T>(eastl::forward<Args>(args)...);
+
+			// mpHandler = ...; // member does not exist in this template specialization
+		}
+
+		template <typename T, typename U, typename... Args>
+		void set_as(std::initializer_list<U> il, Args&&... args)
+		{
+			// NOTE(rparolin): If this assert fires there is an EASTL problem picking the size of the local buffer which
+			// variant_storage used to store types. The size selected should be large enough to hold the largest type in
+			// the user provided variant type-list.  
+			static_assert(sizeof(aligned_storage_impl_t) >= sizeof(T), "T is larger than local buffer size");
+			new (&mBuffer) remove_reference_t<T>(il, eastl::forward<Args>(args)...);
+
+			// mpHandler = ...; // member does not exist in this template specialization
+		}
+
+		template<typename T>
+		T get_as()
+		{
+			static_assert(eastl::is_pointer_v<T>, "T must be a pointer type");
+			return reinterpret_cast<T>(&mBuffer);
+		}
+
+		template<typename T>
+		const T get_as() const
+		{
+			static_assert(eastl::is_pointer_v<T>, "T must be a pointer type");
+			return reinterpret_cast<const T>(reinterpret_cast<uintptr_t>(&mBuffer));
+		}
+
+		void destroy() {}
+	};
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// 20.7.2, forward-declaration for types that depend on the variant 
+	//
+	template <class... Types>
+	class variant;
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// 20.7.3, variant_size, variant_size_v helper classes
+	//
+	template <class T>        struct variant_size; 
+	template <class T>        struct variant_size<const T>           : integral_constant<size_t, variant_size<T>::value>  {};
+	template <class T>        struct variant_size<volatile T>        : integral_constant<size_t, variant_size<T>::value>  {};
+	template <class T>        struct variant_size<const volatile T>  : integral_constant<size_t, variant_size<T>::value>  {};
+	template <class... Types> struct variant_size<variant<Types...>> : integral_constant<size_t, sizeof...(Types)> {};
+
+	// variant_size_v template alias
+	template <class T> EA_CONSTEXPR size_t variant_size_v = variant_size<T>::value;
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// variant_alternative_helper 
+	//
+	// This helper does the heavy lifting of traversing the variadic type list
+	// and retrieving the type at the user provided index.
+	//
+	template <size_t I, typename... Ts>
+	struct variant_alternative_helper;
+
+	template <size_t I, typename Head, typename... Tail>
+	struct variant_alternative_helper<I, Head, Tail...>
+		{ typedef typename variant_alternative_helper<I - 1, Tail...>::type type; };
+
+	template <typename Head, typename... Tail>
+	struct variant_alternative_helper<0, Head, Tail...>
+		{ typedef Head type; };
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// 20.7.4, variant_alternative
+	//
+	template <size_t I, class T> struct variant_alternative;
+	template <size_t I, class... Types> struct variant_alternative<I, variant<Types...>> : variant_alternative_helper<I, Types...> {};
+
+	// ISO required cv-qualifer specializations
+	template <size_t I, class T> struct variant_alternative<I, const T>          : add_cv_t<variant_alternative<I, T>> {};
+	template <size_t I, class T> struct variant_alternative<I, volatile T>       : add_volatile_t<variant_alternative<I, T>> {};
+	template <size_t I, class T> struct variant_alternative<I, const volatile T> : add_cv_t<variant_alternative<I, T>> {};
+
+	// variant_alternative_t template alias
+	template <size_t I, class T> using variant_alternative_t = typename variant_alternative<I, T>::type;
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// 20.7.11, hash support
+	//
+	template <class... Types>
+	struct hash<variant<Types...> >
+		{ size_t operator()(const variant<Types...>& val) const { return static_cast<size_t>(-0x42); } };
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// get_if 
+	//
+	template <size_t I, class... Types>
+	EA_CONSTEXPR add_pointer_t<variant_alternative_t<I, variant<Types...>>> get_if(variant<Types...>* pv) EA_NOEXCEPT
+	{
+		static_assert(I < sizeof...(Types), "get_if is ill-formed if I is not a valid index in the variant typelist");
+		using return_type = add_pointer_t<variant_alternative_t<I, variant<Types...>>>;
+
+		return (!pv || pv->index() != I) ? nullptr : pv->mStorage.template get_as<return_type>();
+	}
+
+	template <size_t I, class... Types>
+	EA_CONSTEXPR add_pointer_t<const variant_alternative_t<I, variant<Types...>>> get_if(const variant<Types...>* pv) EA_NOEXCEPT
+	{
+		static_assert(I < sizeof...(Types), "get_if is ill-formed if I is not a valid index in the variant typelist");
+		using return_type = add_pointer_t<variant_alternative_t<I, variant<Types...>>>;
+
+		return (!pv || pv->index() != I) ? nullptr : pv->mStorage.template get_as<return_type>();
+	}
+
+	template <class T, class... Types, size_t I = meta::get_type_index_v<T, Types...>>
+	EA_CONSTEXPR add_pointer_t<T> get_if(variant<Types...>* pv) EA_NOEXCEPT
+	{
+		return get_if<I>(pv);
+	}
+
+	template <class T, class... Types, size_t I = meta::get_type_index_v<T, Types...>>
+	EA_CONSTEXPR add_pointer_t<const T> get_if(const variant<Types...>* pv) EA_NOEXCEPT
+	{
+		return get_if<I>(pv);
+	}
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// get 
+	//
+	template <size_t I, class... Types>
+	EA_CONSTEXPR variant_alternative_t<I, variant<Types...>>& get(variant<Types...>& v)
+	{
+		static_assert(I < sizeof...(Types), "get is ill-formed if I is not a valid index in the variant typelist");
+		using return_type = add_pointer_t<variant_alternative_t<I, variant<Types...>>>;
+		
+		EASTL_ASSERT(v.index() == I);
+		return *v.mStorage.template get_as<return_type>();
+	}
+
+	template <size_t I, class... Types>
+	EA_CONSTEXPR variant_alternative_t<I, variant<Types...>>&& get(variant<Types...>&& v)
+	{
+		static_assert(I < sizeof...(Types), "get is ill-formed if I is not a valid index in the variant typelist");
+		using return_type = add_pointer_t<variant_alternative_t<I, variant<Types...>>>;
+		
+		EASTL_ASSERT(v.index() == I);
+		return eastl::move(*v.mStorage.template get_as<return_type>());
+	}
+
+	template <size_t I, class... Types>
+	EA_CONSTEXPR const variant_alternative_t<I, variant<Types...>>& get(const variant<Types...>& v)
+	{
+		static_assert(I < sizeof...(Types), "get is ill-formed if I is not a valid index in the variant typelist");
+		using return_type = add_pointer_t<variant_alternative_t<I, variant<Types...>>>;
+		
+		EASTL_ASSERT(v.index() == I);
+		return *v.mStorage.template get_as<return_type>();
+	}
+
+	template <size_t I, class... Types>
+	EA_CONSTEXPR const variant_alternative_t<I, variant<Types...>>&& get(const variant<Types...>&& v)
+	{
+		static_assert(I < sizeof...(Types), "get is ill-formed if I is not a valid index in the variant typelist");
+		using return_type = add_pointer_t<variant_alternative_t<I, variant<Types...>>>;
+		
+		EASTL_ASSERT(v.index() == I);
+		return eastl::move(*v.mStorage.template get_as<return_type>());
+	}
+
+	template <class T, class... Types, size_t I = meta::get_type_index_v<T, Types...>>
+	EA_CONSTEXPR T& get(variant<Types...>& v)
+	{
+		static_assert(I < sizeof...(Types), "get is ill-formed if I is not a valid index in the variant typelist");
+		return get<I>(v);
+	}
+
+	template <class T, class... Types, size_t I = meta::get_type_index_v<T, Types...>>
+	EA_CONSTEXPR T&& get(variant<Types...>&& v)
+	{
+		static_assert(I < sizeof...(Types), "get is ill-formed if I is not a valid index in the variant typelist");
+		return get<I>(eastl::move(v));
+	}
+
+	template <class T, class... Types, size_t I = meta::get_type_index_v<T, Types...>>
+	EA_CONSTEXPR const T& get(const variant<Types...>& v)
+	{
+		static_assert(I < sizeof...(Types), "get is ill-formed if I is not a valid index in the variant typelist");
+		return get<I>(v);
+	}
+
+	template <class T, class... Types, size_t I = meta::get_type_index_v<T, Types...>>
+	EA_CONSTEXPR const T&& get(const variant<Types...>&& v)
+	{
+		static_assert(I < sizeof...(Types), "get is ill-formed if I is not a valid index in the variant typelist");
+		return get<I>(v);
+	}
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// 20.7.4, value access
+	//
+	template <class T, class... Types, ssize_t I = meta::get_type_index_v<T, Types...>>
+	EA_CONSTEXPR bool holds_alternative(const variant<Types...>& v) EA_NOEXCEPT
+	{
+		// ssize_t template parameter because the value can be negative
+		return I == variant_npos ? false : (v.index() == I);
+	}
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// 20.7.2, variant
+	//
+	template <class... Types>
+	class variant 
+	{
+		static_assert(sizeof...(Types) > 0,                           "variant must have at least 1 type (empty variants are ill-formed)");
+		static_assert(disjunction_v<is_void<Types>...> == false,      "variant does not allow void as an alternative type");
+		static_assert(disjunction_v<is_reference<Types>...> == false, "variant does not allow references as an alternative type");
+		static_assert(disjunction_v<is_array<Types>...> == false,     "variant does not allow arrays as an alternative type");
+
+		using variant_index_t = size_t;
+		using variant_storage_t = variant_storage<conjunction_v<is_trivially_destructible<Types>...>, Types...>;
+		using T_0 = variant_alternative_t<0, variant<Types...>>;  // alias for the 1st type in the variadic pack
+
+		///////////////////////////////////////////////////////////////////////////
+		// variant data members
+		//
+		variant_index_t mIndex;
+		variant_storage_t mStorage;
+	
+	public:
+		///////////////////////////////////////////////////////////////////////////
+		// 20.7.2.1, constructors
+		//
+
+		// Only participates in overload resolution when the first alternative is default constructible
+		template <typename TT0 = T_0, typename = enable_if_t<is_default_constructible_v<TT0>>>
+		EA_CONSTEXPR variant() EA_NOEXCEPT : mIndex(variant_npos), mStorage()
+		{
+			mIndex = static_cast<variant_index_t>(0);
+			mStorage.template set_as<T_0>();
+		}
+
+		// Only participates in overload resolution if is_copy_constructible_v<T_i> is true for all T_i in Types....
+		template <bool enable = conjunction_v<is_copy_constructible<Types>...>,
+		          typename = enable_if_t<enable>> // add a dependent type to enable sfinae
+		variant(const variant& other)
+		{
+			if (this != &other)
+			{
+				mIndex = other.mIndex;
+				mStorage = other.mStorage;
+			}
+		}
+
+		// Only participates in overload resolution if is_move_constructible_v<T_i> is true for all T_i in Types...
+		template <bool enable = conjunction_v<is_move_constructible<Types>...>, typename = enable_if_t<enable>> // add a dependent type to enable sfinae
+		EA_CONSTEXPR variant(variant&& other) EA_NOEXCEPT(conjunction_v<is_move_constructible<Types>...>)
+		    : mIndex(variant_npos), mStorage()
+		{
+			if(this != &other)
+			{
+				mIndex = other.mIndex;
+				mStorage = eastl::move(other.mStorage);
+			}
+		}
+
+		// Conversion constructor
+		template <typename T,
+		          typename T_j = meta::overload_resolution_t<T, meta::overload_set<Types...>>,
+		          typename = enable_if_t<!is_same_v<decay_t<T>, variant>>,
+		          size_t I = meta::get_type_index_v<decay_t<T_j>, Types...>>
+		EA_CONSTEXPR variant(T&& t) EA_NOEXCEPT(is_nothrow_constructible_v<T_j, T>)
+		   : mIndex(variant_npos), mStorage()
+		{
+			static_assert(I >= 0, "T not found in type-list.");
+			static_assert((meta::type_count_v<T_j, Types...> == 1), "function overload is not unique - duplicate types in type list");
+
+			mIndex = static_cast<variant_index_t>(I);
+			mStorage.template set_as<T_j>(eastl::forward<T>(t));
+		}
+
+
+		///////////////////////////////////////////////////////////////////////////
+		// 20.7.2.1, in_place_t constructors
+		//
+		template <
+			class T,
+			class... Args,
+			class = enable_if_t<conjunction_v<meta::duplicate_type_check<T, Types...>, is_constructible<T, Args...>>, T>>
+		EA_CPP14_CONSTEXPR explicit variant(in_place_type_t<T>, Args&&... args)
+			: variant(in_place<meta::get_type_index_v<T, Types...>>, eastl::forward<Args>(args)...)
+		{}
+
+		template <
+		    class T,
+		    class U,
+		    class... Args,
+		    class = enable_if_t<conjunction_v<meta::duplicate_type_check<T, Types...>, is_constructible<T, Args...>>, T>>
+		EA_CPP14_CONSTEXPR explicit variant(in_place_type_t<T>, std::initializer_list<U> il, Args&&... args)
+		    : variant(in_place<meta::get_type_index_v<T, Types...>>, il, eastl::forward<Args>(args)...)
+		{}
+
+		template <size_t I,
+		          class... Args,
+		          class = enable_if_t<conjunction_v<integral_constant<bool, (I < sizeof...(Types))>,
+		                                            is_constructible<meta::get_type_at_t<I, Types...>, Args...>>>>
+		EA_CPP14_CONSTEXPR explicit variant(in_place_index_t<I>, Args&&... args)
+		    : mIndex(I)
+		{
+			mStorage.template set_as<meta::get_type_at_t<I, Types...>>(eastl::forward<Args>(args)...);
+		}
+
+		template <size_t I,
+		          class U,
+		          class... Args,
+		          class = enable_if_t<conjunction_v<integral_constant<bool, (I < sizeof...(Types))>,
+		                                            is_constructible<meta::get_type_at_t<I, Types...>, Args...>>>>
+		EA_CPP14_CONSTEXPR explicit variant(in_place_index_t<I>, std::initializer_list<U> il, Args&&... args)
+		    : mIndex(I)
+		{
+			mStorage.template set_as<meta::get_type_at_t<I, Types...>>(il, eastl::forward<Args>(args)...);
+		}
+
+
+		///////////////////////////////////////////////////////////////////////////
+		// 20.7.2.2, destructor
+		//
+		~variant() = default;
+
+
+		///////////////////////////////////////////////////////////////////////////
+		// 20.7.2.4, modifiers
+		//
+
+		// Equivalent to emplace<I>(std::forward<Args>(args)...), where I is the zero-based index of T in Types....
+		// This overload only participates in overload resolution if std::is_constructible_v<T, Args...> is true, and T
+		// occurs exactly once in Types...
+		template <
+		    class T,
+		    class... Args,
+		    size_t I = meta::get_type_index_v<T, Types...>,
+		    typename = enable_if_t<conjunction_v<is_constructible<T, Args...>, meta::duplicate_type_check<T, Types...>>>>
+		decltype(auto) emplace(Args&&... args)
+		{
+			return emplace<I>(eastl::forward<Args>(args)...);
+		}
+
+		// Equivalent to emplace<I>(il, std::forward<Args>(args)...), where I is the zero-based index of T in Types....
+		// This overload only participates in overload resolution if std::is_constructible_v<T,
+		// std::initializer_list<U>&, Args...> is true, and T occurs exactly once in Types...
+		template <class T,
+		          class U,
+		          class... Args,
+		          size_t I = meta::get_type_index_v<T, Types...>,
+		          typename = enable_if_t<conjunction_v<is_constructible<T, std::initializer_list<U>&, Args...>,
+		                                               meta::duplicate_type_check<T, Types...>>>>
+		decltype(auto) emplace(std::initializer_list<U> il, Args&&... args)
+		{
+			return emplace<I>(il, eastl::forward<T>(args)...);
+		}
+
+		// First, destroys the currently contained value (if any). Then direct-initializes the contained value as if
+		// constructing a value of type T_I with the arguments std::forward<Args>(args).... If an exception is thrown,
+		// *this may become valueless_by_exception. This overload only participates in overload resolution if
+		// std::is_constructible_v<T_I, Args...> is true. The behavior is undefined if I is not less than
+		// sizeof...(Types).
+		//
+		template <size_t I,
+		          class... Args,
+		          typename T = meta::get_type_at_t<I, Types...>,
+		          typename =
+		              enable_if_t<conjunction_v<is_constructible<T, Args...>, meta::duplicate_type_check<T, Types...>>>>
+		variant_alternative_t<I, variant>& emplace(Args&&... args)
+		{
+			if (!valueless_by_exception())
+				mStorage.destroy();
+
+			mIndex = static_cast<variant_index_t>(I);
+			mStorage.template set_as<T>(eastl::forward<Args>(args)...);
+			return *reinterpret_cast<T*>(&mStorage.mBuffer);
+		}
+
+		// First, destroys the currently contained value (if any). Then direct-initializes the contained value as if
+		// constructing a value of type T_I with the arguments il, std::forward<Args>(args).... If an exception is
+		// thrown, *this may become valueless_by_exception. This overload only participates in overload resolution if
+		// std::is_constructible_v<T_I, initializer_list<U>&, Args...> is true. The behavior is undefined if I is not
+		// less than sizeof...(Types).
+		//
+		template <size_t I,
+		          class U,
+		          class... Args,
+		          typename T = meta::get_type_at_t<I, Types...>,
+		          typename = enable_if_t<conjunction_v<is_constructible<T, std::initializer_list<U>&, Args...>, meta::duplicate_type_check<T, Types...>>>>
+		variant_alternative_t<I, variant>& emplace(std::initializer_list<U> il, Args&&... args)
+		{
+			if (!valueless_by_exception())
+				mStorage.destroy();
+
+			mIndex = static_cast<variant_index_t>(I);
+			mStorage.template set_as<T>(il, eastl::forward<Args>(args)...);
+			return *reinterpret_cast<T*>(&mStorage.mBuffer);
+		}
+
+
+		///////////////////////////////////////////////////////////////////////////
+		// 20.7.2.3, assignment
+		//
+		template <class T,
+		          typename T_j = meta::overload_resolution_t<T, meta::overload_set<Types...>>,
+		          ssize_t I = meta::get_type_index_v<decay_t<T_j>, Types...>,
+		          typename = enable_if_t<!eastl::is_same_v<decay_t<T>, variant> && eastl::is_assignable_v<T_j&, T> &&
+		                                 eastl::is_constructible_v<T_j, T>>>
+		EA_CPP14_CONSTEXPR variant& operator=(T&& t)
+		    EA_NOEXCEPT(conjunction_v<is_nothrow_assignable<T_j&, T>, is_nothrow_constructible<T_j, T>>)
+		{
+			static_assert(I >= 0, "T not found in type-list.");
+			static_assert((meta::type_count_v<T_j, Types...> == 1),
+			              "function overload is not unique - duplicate types in type list");
+
+			if (!valueless_by_exception())
+				mStorage.destroy();
+
+			mIndex = static_cast<variant_index_t>(I);
+			mStorage.template set_as<T_j>(eastl::forward<T>(t));
+			return *this;
+		}
+
+
+		// Only participates in overload resolution if is_copy_constructible_v<T_i> && is_copy_assignable_v<T_i> is true
+		// for all T_i in Types....
+		template <bool enable = conjunction_v<conjunction<is_copy_constructible<Types>...>,
+		                                      conjunction<is_copy_assignable<Types>...>>,
+		          typename = enable_if_t<enable>> // add a dependent type to enable sfinae
+		variant& operator=(const variant& other)
+		{
+			if (this != &other)
+			{
+				mIndex = other.mIndex;
+				mStorage = other.mStorage;
+			}
+			return *this;
+		}
+
+		// Only participates in overload resolution if is_move_constructible_v<T_i> && is_move_assignable_v<T_i> is true for all T_i in Types....
+		template <bool enable = conjunction_v<conjunction<is_move_constructible<Types>...>,
+		                                      conjunction<is_move_assignable<Types>...>>,
+		          typename = enable_if_t<enable>> // add a dependent type to enable sfinae
+		variant& operator=(variant&& other)
+		    EA_NOEXCEPT(conjunction_v<conjunction<is_nothrow_move_constructible<Types>...>,
+		                              conjunction<is_nothrow_move_assignable<Types>...>>)
+		{
+			if (this != &other)
+			{
+				mIndex = eastl::move(other.mIndex);
+				mStorage = eastl::move(other.mStorage);
+			}
+			return *this;
+		}
+
+
+		///////////////////////////////////////////////////////////////////////////
+		// 20.7.2.5, value status
+		//
+		EA_CONSTEXPR size_t index() const EA_NOEXCEPT { return valueless_by_exception() ? variant_npos : mIndex; }
+		EA_CONSTEXPR bool valueless_by_exception() const EA_NOEXCEPT { return mIndex == variant_npos; }
+
+
+		///////////////////////////////////////////////////////////////////////////
+		// 20.7.2.6, swap
+		//
+		void swap(variant& other) 
+			EA_NOEXCEPT(conjunction_v<is_nothrow_move_constructible<Types>..., is_nothrow_swappable<Types>...>)
+		{
+			eastl::swap(mIndex, other.mIndex);
+			eastl::swap(mStorage, other.mStorage);
+		}
+
+	private:
+		// NOTE(rparolin): get_if accessors require internal access to the variant storage class
+		template <size_t I, class... Types2> friend EA_CONSTEXPR add_pointer_t<      variant_alternative_t<I, variant<Types2...>>> get_if(      variant<Types2...>* pv) EA_NOEXCEPT;
+		template <size_t I, class... Types2> friend EA_CONSTEXPR add_pointer_t<const variant_alternative_t<I, variant<Types2...>>> get_if(const variant<Types2...>* pv) EA_NOEXCEPT;
+
+		// NOTE(rparolin): get accessors require internal access to the variant storage class
+		template <size_t I, class... Types2> friend EA_CONSTEXPR       variant_alternative_t<I, variant<Types2...>>&  get(variant<Types2...>& v);
+		template <size_t I, class... Types2> friend EA_CONSTEXPR       variant_alternative_t<I, variant<Types2...>>&& get(variant<Types2...>&& v);
+		template <size_t I, class... Types2> friend EA_CONSTEXPR const variant_alternative_t<I, variant<Types2...>>&  get(const variant<Types2...>& v);
+		template <size_t I, class... Types2> friend EA_CONSTEXPR const variant_alternative_t<I, variant<Types2...>>&& get(const variant<Types2...>&& v);
+	};
+
+	///////////////////////////////////////////////////////////////////////////
+	// 20.7.9, swap
+	//
+	template <class... Types>
+	void swap(variant<Types...>& lhs, variant<Types...>& rhs) 
+		EA_NOEXCEPT(EA_NOEXCEPT(lhs.swap(rhs)))
+	{
+		lhs.swap(rhs);
+	}
+
+	// visit is a bit convoluted, in order to fulfill a few requirements:
+	//   - It must support visiting multiple variants using a single visitor and a single function call. The
+	//     visitor in this case should have one function for each possible combination of types:
+	//
+	//     struct MyVisitor {
+	//         void operator()(int, int);
+	//         void operator()(string, string);
+	//         void operator()(int, string);
+	//         void operator()(string, int);
+	//     };
+	//
+	//     variant<int, string> a = 42;
+	//     variant<int, string> b = "hello";
+	//     visit(MyVisitor{}, a, b); // calls MyVisitor::operator()(int, string)
+	//
+	//   - It must be declared constexpr
+	//   - It must be constant-time for the case of visiting a single variant
+	//   - It must allow different return types in the visitor, as long as they are all convertible
+	//
+	// visitor_caller is responsible for the mechanics of visit. Each visitor_caller creates an array of
+	// functions which call get<I>() on the variant (where I is the array index), then add the returned reference
+	// to a tuple of arguments. The final visitor_caller calls invoke() with the visitor and the unpacked
+	// arguments.
+	//
+	// This allows us to look up each appropriate get() function in constant time using the variant's index.
+	template <typename Visitor, typename Variant, typename... Variants>
+	struct visitor_caller
+	{
+		// @visitor, @variant and @variants are all the arguments to the initial visit() function.
+		// @args is the tuple of arguments which have been retrieved by any previous visitor_callers.
+		//
+		// The two unnamed index_sequence parameters let us deduce two different sets of indices
+		// as parameter packs - one for the arguments and one for the array of call_next functions.
+		// This is necessary so we can create the constexpr array of functions which call
+		// get<I>(variant) based on the array index, and so we can unpack the final of arguments by
+		// calling get<I>(args) for each index in args.
+		template <size_t I, typename ArgsTuple, size_t... ArgsIndices, size_t... ArrayIndices>
+		static decltype(auto) EA_CONSTEXPR call_next(Visitor&& visitor,
+		                                             index_sequence<ArgsIndices...>,
+		                                             index_sequence<ArrayIndices...>,
+		                                             ArgsTuple&& args,
+		                                             Variant&& variant,
+		                                             Variants&&... variants)
+		{
+			// Call the appropriate get() function on the variant, and pack the result into a new tuple along with
+			// all of the previous arguments. Then call the next visitor_caller with the new argument added,
+			// and the current variant removed.
+			return visitor_caller<Visitor, Variants...>::call(
+				eastl::forward<Visitor>(visitor),
+				index_sequence<ArgsIndices..., sizeof...(ArgsIndices)>(),
+				index_sequence<ArrayIndices...>(),
+				eastl::make_tuple(get<ArgsIndices>(eastl::forward<ArgsTuple>(args))..., get<I>(eastl::forward<Variant>(variant))),
+				eastl::forward<Variants>(variants)...
+			);
+		}
+
+		// Arguments are the same as for call_next (see above).
+		template <typename ArgsTuple, size_t... ArgsIndices, size_t... ArrayIndices>
+		static decltype(auto) EA_CPP14_CONSTEXPR call(Visitor&& visitor,
+		                                              index_sequence<ArgsIndices...>,
+		                                              index_sequence<ArrayIndices...>,
+		                                              ArgsTuple&& args,
+		                                              Variant&& variant,
+		                                              Variants&&... variants)
+		{
+			// Deduce the type of the inner array of call_next functions
+			using return_type = decltype(call_next<0>(
+				eastl::forward<Visitor>(visitor),
+				index_sequence<ArgsIndices...>(),
+				index_sequence<ArrayIndices...>(),
+				eastl::forward<ArgsTuple>(args),
+				eastl::forward<Variant>(variant),
+				eastl::forward<Variants>(variants)...)
+			);
+
+			using next_type = return_type (*)(
+				Visitor&&,
+				index_sequence<ArgsIndices...>,
+				index_sequence<ArrayIndices...>,
+				ArgsTuple&&,
+				Variant&&,
+				Variants&&...
+			);
+
+			// Create an array of call_next<0>, call_next<1>, ... , call_next<N - 1>
+			// where N = variant_size<Variant>.
+			EA_CPP14_CONSTEXPR next_type next[] = { static_cast<next_type>(call_next<ArrayIndices>)... };
+
+			// call_next() with the correct index for the variant.
+			return next[variant.index()](
+				eastl::forward<Visitor>(visitor),
+				index_sequence<ArgsIndices...>(),
+				index_sequence<ArrayIndices...>(),
+				eastl::forward<ArgsTuple>(args),
+				eastl::forward<Variant>(variant),
+				eastl::forward<Variants>(variants)...
+			);
+		}
+	};
+
+	template <typename Visitor, typename Variant>
+	struct visitor_caller<Visitor, Variant>
+	{
+		// Invoke the correct visitor for a given variant index, and call the correct get() function to retrieve
+		// the argument. Unpack any additional arguments from earlier visitor_callers (see above).
+		template <typename R, size_t I, typename ArgsTuple, size_t... ArgsIndices>
+		static decltype(auto) EA_CONSTEXPR invoke_visitor(Visitor&& visitor, index_sequence<ArgsIndices...>, ArgsTuple&& args, Variant&& variant)
+		{
+			return static_cast<R>(invoke(
+				eastl::forward<Visitor>(visitor),
+				get<ArgsIndices>(eastl::forward<ArgsTuple>(args))...,
+				get<I>(eastl::forward<Variant>(variant))
+				));
+		}
+
+		// The final call() in the recursion.
+		//
+		// By this point, <ArgsIndices...> expands to <0 .. N - 2> where N is the number of arguments to the
+		// final invoke() call. This corresponds to each element in @args, so `get<ArgsIndices>(args)...`
+		// expands to `get<0>(args), get<1>(args), ... , get<N - 2>(args)`. The final argument is selected
+		// based on the final array index, leaving us with a sequence of arguments from 0 .. N - 1.
+		//
+		// <ArrayIndices...> is the same as in earlier calls - it expands to <0 .. I - 1> where I is the
+		// number of alternatives in the variant. This lets us call the correct `get<I>` based on the
+		// final variant index, as we did for all earlier calls.
+		template <typename ArgsTuple, size_t... ArgsIndices, size_t... ArrayIndices>
+		static decltype(auto) EA_CPP14_CONSTEXPR call(Visitor&& visitor, index_sequence<ArgsIndices...>, index_sequence<ArrayIndices...>, ArgsTuple&& args, Variant&& variant)
+		{
+			// MSVC isn't able to handle the nested pack expansion required here, so we have to just use the
+			// return type of the first visitor function instead of the common_type of all possible visitor
+			// functions. This means we can't handle the case where visitor functions return different (but
+			// compatible) types. This is unlikely to be a common case, but we might be able to get around it
+			// if it's a big issue.
+			//
+			// TODO: we should reevaluate this on future compiler releases
+			#if defined(EA_COMPILER_MSVC)
+			using return_type = invoke_result_t<Visitor, decltype(get<ArgsIndices>(args))..., decltype(get<0>(variant))>;
+			#else
+			// If we're on a compiler that can take it, determine the common_type between all possible visitor
+			// invocations.
+			using return_type = common_type_t<
+				invoke_result_t<Visitor, decltype(get<ArgsIndices>(args))..., decltype(get<ArrayIndices>(variant))>...
+			>;
+			#endif
+
+			using caller_type = return_type (*)(Visitor&&, index_sequence<ArgsIndices...>, ArgsTuple&&, Variant&&);
+
+			// Create the final array of invoke_visitor<0>, invoke_visitor<1>, ... , invoke_visitor<N - 1>
+			// where N = variant_size<Variant>
+			EA_CPP14_CONSTEXPR caller_type callers[] = { invoke_visitor<return_type, ArrayIndices>... };
+
+			return callers[eastl::forward<Variant>(variant).index()](
+				eastl::forward<Visitor>(visitor),
+				index_sequence<ArgsIndices...>(),
+				eastl::forward<ArgsTuple>(args),
+				eastl::forward<Variant>(variant)
+			);
+		}
+	};
+
+	///////////////////////////////////////////////////////////////////////////
+	// 20.7.6, visitation
+	//
+	// Example:
+	//     struct MyVisitor
+	//     {
+	//         auto operator()(int) {};
+	//         auto operator()(long) {};
+	//         auto operator()(string) {};
+	//     };
+	//
+	//     variant<int, long, string> v = "Hello, Variant";
+	//     visit(MyVisitor{}, v);  // calls MyVisitor::operator()(string) {}
+	//
+
+	// visit
+	//
+	template <class Visitor, class... Variants>
+	EA_CONSTEXPR decltype(auto) visit(Visitor&& visitor, Variants&&... variants)
+	{
+		static_assert(sizeof...(Variants) > 0, "at least one variant instance must be passed as an argument to the visit function");
+
+		using variant_type = remove_reference_t<meta::get_type_at_t<0, Variants...>>;
+		static_assert(conjunction_v<is_same<variant_type, remove_reference_t<Variants>>...>,
+		              "all variants passed to eastl::visit() must have the same type");
+
+		return visitor_caller<Visitor, Variants...>::call(
+			eastl::forward<Visitor>(visitor),
+			index_sequence<>(),
+			make_index_sequence<variant_size_v<variant_type>>(),
+			tuple<>(),
+			eastl::forward<Variants>(variants)...
+		);
+	}
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// 20.7.5, relational operators
+	//
+	namespace internal
+	{
+
+		template <class... Types, class Predicate>
+		EA_CPP14_CONSTEXPR bool Compare(const variant<Types...>& lhs, const variant<Types...>& rhs, Predicate predicate)
+		{
+			return visit(predicate, lhs, rhs);
+		}
+
+		// For variant visitation, we need to have a comparison function for all possible combinations of types,
+		// eg. for variant<int, string>, our comparator needs:
+		//
+		//   bool operator()(int, int);
+		//   bool operator()(int, string);
+		//   bool operator()(string, int);
+		//   bool operator()(string, string);
+		//
+		// Even though we never call the mixed-type versions of these functions when comparing variants, we
+		// need them in order to compile visit(). So this struct forwards the good comparisons to the appropriate
+		// comparison, and asserts that we never call the bad comparisons.
+		template <typename C>
+		struct variant_comparison : public C
+		{
+			template <typename A, typename B, typename = eastl::enable_if_t<eastl::is_same_v<eastl::decay_t<A>, eastl::decay_t<B>>>>
+			auto operator()(const A& a, const B& b)
+			{
+				return C::operator()(a, b);
+			}
+
+			template <typename A, typename B, typename = eastl::enable_if_t<!eastl::is_same_v<eastl::decay_t<A>, eastl::decay_t<B>>>>
+			bool operator()(const A&, const B&)
+			{
+				EASTL_ASSERT_MSG(false, "eastl::variant<> comparison function called on two different types at different indices! This is a library bug! Please file bug report.");
+				return false;
+			}
+		};
+
+	} // namespace internal
+
+	///////////////////////////////////////////////////////////////////////////
+	// 20.7.5, relational operators
+	//
+	template <class... Types>
+	EA_CPP14_CONSTEXPR bool operator==(const variant<Types...>& lhs, const variant<Types...>& rhs)
+	{
+		if (lhs.index() != rhs.index()) return false;
+		if (lhs.valueless_by_exception()) return true;
+		return internal::Compare(lhs, rhs, internal::variant_comparison<equal_to<>>{});
+	}
+
+	template <class... Types>
+	EA_CPP14_CONSTEXPR bool operator<(const variant<Types...>& lhs, const variant<Types...>& rhs)
+	{
+		if (rhs.valueless_by_exception()) return false;
+		if (lhs.valueless_by_exception()) return true;
+		if (lhs.index() < rhs.index()) return true;
+		if (lhs.index() > rhs.index()) return false;
+		return internal::Compare(lhs, rhs, internal::variant_comparison<less<>>{});
+	}
+
+	template <class... Types>
+	EA_CPP14_CONSTEXPR bool operator!=(const variant<Types...>& lhs, const variant<Types...>& rhs)
+	{
+		if (lhs.index() != rhs.index()) return true;
+		if (lhs.valueless_by_exception()) return false;
+		return internal::Compare(lhs, rhs, internal::variant_comparison<not_equal_to<>>{});
+	}
+
+	template <class... Types>
+	EA_CPP14_CONSTEXPR bool operator>(const variant<Types...>& lhs, const variant<Types...>& rhs)
+	{
+		if (lhs.valueless_by_exception()) return false;
+		if (rhs.valueless_by_exception()) return true;
+		if (lhs.index() > rhs.index()) return true;
+		if (lhs.index() < rhs.index()) return false;
+		return internal::Compare(lhs, rhs, internal::variant_comparison<greater<>>{});
+	}
+
+	template <class... Types>
+	EA_CPP14_CONSTEXPR bool operator<=(const variant<Types...>& lhs, const variant<Types...>& rhs)
+	{
+		if (rhs.valueless_by_exception()) return true;
+		if (lhs.valueless_by_exception()) return false;
+		if (lhs.index() < rhs.index()) return true;
+		if (lhs.index() > rhs.index()) return false;
+		return internal::Compare(lhs, rhs, internal::variant_comparison<less_equal<>>{});
+	}
+
+	template <class... Types>
+	EA_CPP14_CONSTEXPR bool operator>=(const variant<Types...>& lhs, const variant<Types...>& rhs)
+	{
+		if (rhs.valueless_by_exception()) return true;
+		if (lhs.valueless_by_exception()) return false;
+		if (lhs.index() > rhs.index()) return true;
+		if (lhs.index() < rhs.index()) return false;
+		return internal::Compare(lhs, rhs, internal::variant_comparison<greater_equal<>>{});
+	}
+} // namespace eastl
+
+EA_RESTORE_VC_WARNING()
+
+#endif // EASTL_VARIANT_H
+
diff --git a/libkram/eastl/include/EASTL/vector.h b/libkram/eastl/include/EASTL/vector.h
new file mode 100644
index 00000000..1736a785
--- /dev/null
+++ b/libkram/eastl/include/EASTL/vector.h
@@ -0,0 +1,2055 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// This file implements a vector (array-like container), much like the C++ 
+// std::vector class.
+// The primary distinctions between this vector and std::vector are:
+//    - vector has a couple extension functions that increase performance.
+//    - vector can contain objects with alignment requirements. std::vector 
+//      cannot do so without a bit of tedious non-portable effort.
+//    - vector supports debug memory naming natively.
+//    - vector is easier to read, debug, and visualize.
+//    - vector is savvy to an environment that doesn't have exception handling,
+//      as is sometimes the case with console or embedded environments.
+//    - vector has less deeply nested function calls and allows the user to 
+//      enable forced inlining in debug builds in order to reduce bloat.
+//    - vector<bool> is a vector of boolean values and not a bit vector.
+//    - vector guarantees that memory is contiguous and that vector::iterator
+//      is nothing more than a pointer to T.
+//    - vector has an explicit data() method for obtaining a pointer to storage 
+//      which is safe to call even if the block is empty. This avoids the 
+//      common &v[0], &v.front(), and &*v.begin() constructs that trigger false 
+//      asserts in STL debugging modes.
+//    - vector data is guaranteed to be contiguous.
+//    - vector has a set_capacity() function which frees excess capacity. 
+//      The only way to do this with std::vector is via the cryptic non-obvious 
+//      trick of using: vector<SomeClass>(x).swap(x);
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_VECTOR_H
+#define EASTL_VECTOR_H
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/allocator.h>
+#include <EASTL/type_traits.h>
+#include <EASTL/iterator.h>
+#include <EASTL/algorithm.h>
+#include <EASTL/initializer_list.h>
+#include <EASTL/memory.h>
+#include <EASTL/bonus/compressed_pair.h>
+
+EA_DISABLE_ALL_VC_WARNINGS()
+#include <new>
+#include <stddef.h>
+#if EASTL_EXCEPTIONS_ENABLED
+	#include <stdexcept> // std::out_of_range, std::length_error.
+#endif
+EA_RESTORE_ALL_VC_WARNINGS()
+
+// 4530 - C++ exception handler used, but unwind semantics are not enabled. Specify /EHsc
+// 4480 - nonstandard extension used: specifying underlying type for enum
+// 4571 - catch(...) semantics changed since Visual C++ 7.1; structured exceptions (SEH) are no longer caught.
+EA_DISABLE_VC_WARNING(4530 4480 4571);
+
+// 4345 - Behavior change: an object of POD type constructed with an initializer of the form () will be default-initialized
+// 4244 - Argument: conversion from 'int' to 'const eastl::vector<T>::value_type', possible loss of data
+// 4127 - Conditional expression is constant
+EA_DISABLE_VC_WARNING(4345 4244 4127);
+
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+#if EASTL_NOMINMAX
+	#ifdef min
+		#undef min
+	#endif
+	#ifdef max
+		#undef max
+	#endif
+#endif
+
+namespace eastl
+{
+
+	/// EASTL_VECTOR_DEFAULT_NAME
+	///
+	/// Defines a default container name in the absence of a user-provided name.
+	///
+	#ifndef EASTL_VECTOR_DEFAULT_NAME
+		#define EASTL_VECTOR_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " vector" // Unless the user overrides something, this is "EASTL vector".
+	#endif
+
+
+	/// EASTL_VECTOR_DEFAULT_ALLOCATOR
+	///
+	#ifndef EASTL_VECTOR_DEFAULT_ALLOCATOR
+		#define EASTL_VECTOR_DEFAULT_ALLOCATOR allocator_type(EASTL_VECTOR_DEFAULT_NAME)
+	#endif
+
+
+
+	/// VectorBase
+	///
+	/// The reason we have a VectorBase class is that it makes exception handling
+	/// simpler to implement because memory allocation is implemented entirely 
+	/// in this class. If a user creates a vector which needs to allocate
+	/// memory in the constructor, VectorBase handles it. If an exception is thrown
+	/// by the allocator then the exception throw jumps back to the user code and 
+	/// no try/catch code need be written in the vector or VectorBase constructor. 
+	/// If an exception is thrown in the vector (not VectorBase) constructor, the 
+	/// destructor for VectorBase will be called automatically (and free the allocated
+	/// memory) before the execution jumps back to the user code.
+	/// However, if the vector class were to handle both allocation and initialization
+	/// then it would have no choice but to implement an explicit try/catch statement
+	/// for all pathways that allocate memory. This increases code size and decreases
+	/// performance and makes the code a little harder read and maintain.
+	///
+	/// The C++ standard (15.2 paragraph 2) states: 
+	///    "An object that is partially constructed or partially destroyed will
+	///     have destructors executed for all its fully constructed subobjects,
+	///     that is, for subobjects for which the constructor has been completed
+	///     execution and the destructor has not yet begun execution."
+	///
+	/// The C++ standard (15.3 paragraph 11) states: 
+	///    "The fully constructed base classes and members of an object shall 
+	///     be destroyed before entering the handler of a function-try-block
+	///     of a constructor or destructor for that block."
+	///
+	template <typename T, typename Allocator>
+	struct VectorBase
+	{
+		typedef Allocator    allocator_type;
+		typedef eastl_size_t size_type;
+		typedef ptrdiff_t    difference_type;
+
+		#if defined(_MSC_VER) && (_MSC_VER >= 1400) && (_MSC_VER <= 1600) && !EASTL_STD_CPP_ONLY  // _MSC_VER of 1400 means VS2005, 1600 means VS2010. VS2012 generates errors with usage of enum:size_type.
+			enum : size_type {                      // Use Microsoft enum language extension, allowing for smaller debug symbols than using a static const. Users have been affected by this.
+				npos     = (size_type)-1,
+				kMaxSize = (size_type)-2
+			};
+		#else
+			static const size_type npos     = (size_type)-1;      /// 'npos' means non-valid position or simply non-position.
+			static const size_type kMaxSize = (size_type)-2;      /// -1 is reserved for 'npos'. It also happens to be slightly beneficial that kMaxSize is a value less than -1, as it helps us deal with potential integer wraparound issues.
+		#endif
+
+	protected:
+		T*                                          mpBegin;
+		T*                                          mpEnd;
+		eastl::compressed_pair<T*, allocator_type>  mCapacityAllocator;
+
+		T*& internalCapacityPtr() EA_NOEXCEPT { return mCapacityAllocator.first(); }
+		T* const& internalCapacityPtr() const EA_NOEXCEPT { return mCapacityAllocator.first(); }
+		allocator_type&  internalAllocator() EA_NOEXCEPT { return mCapacityAllocator.second(); }
+		const allocator_type&  internalAllocator() const EA_NOEXCEPT { return mCapacityAllocator.second(); }
+
+	public:
+		VectorBase();
+		VectorBase(const allocator_type& allocator);
+		VectorBase(size_type n, const allocator_type& allocator);
+
+	   ~VectorBase();
+
+		const allocator_type& get_allocator() const EA_NOEXCEPT;
+		allocator_type&       get_allocator() EA_NOEXCEPT;
+		void                  set_allocator(const allocator_type& allocator);
+
+	protected:
+		T*        DoAllocate(size_type n);
+		void      DoFree(T* p, size_type n);
+		size_type GetNewCapacity(size_type currentCapacity);
+
+	}; // VectorBase
+
+
+
+
+	/// vector
+	///
+	/// Implements a dynamic array.
+	///
+	template <typename T, typename Allocator = EASTLAllocatorType>
+	class vector : public VectorBase<T, Allocator>
+	{
+		typedef VectorBase<T, Allocator>                      base_type;
+		typedef vector<T, Allocator>                          this_type;
+
+	public:
+		typedef T                                             value_type;
+		typedef T*                                            pointer;
+		typedef const T*                                      const_pointer;
+		typedef T&                                            reference;
+		typedef const T&                                      const_reference;  // Maintainer note: We want to leave iterator defined as T* -- at least in release builds -- as this gives some algorithms an advantage that optimizers cannot get around.
+		typedef T*                                            iterator;         // Note: iterator is simply T* right now, but this will likely change in the future, at least for debug builds. 
+		typedef const T*                                      const_iterator;   //       Do not write code that relies on iterator being T*. The reason it will 
+		typedef eastl::reverse_iterator<iterator>             reverse_iterator; //       change in the future is that a debugging iterator system will be created.
+		typedef eastl::reverse_iterator<const_iterator>       const_reverse_iterator;    
+		typedef typename base_type::size_type                 size_type;
+		typedef typename base_type::difference_type           difference_type;
+		typedef typename base_type::allocator_type            allocator_type;
+
+		using base_type::mpBegin;
+		using base_type::mpEnd;
+		using base_type::mCapacityAllocator;
+		using base_type::npos;
+		using base_type::GetNewCapacity;
+		using base_type::DoAllocate;
+		using base_type::DoFree;
+		using base_type::internalCapacityPtr;
+		using base_type::internalAllocator;
+
+	public:
+		vector() EA_NOEXCEPT_IF(EA_NOEXCEPT_EXPR(EASTL_VECTOR_DEFAULT_ALLOCATOR));
+		explicit vector(const allocator_type& allocator) EA_NOEXCEPT;
+		explicit vector(size_type n, const allocator_type& allocator = EASTL_VECTOR_DEFAULT_ALLOCATOR);
+		vector(size_type n, const value_type& value, const allocator_type& allocator = EASTL_VECTOR_DEFAULT_ALLOCATOR);
+		vector(const this_type& x);
+		vector(const this_type& x, const allocator_type& allocator);
+		vector(this_type&& x) EA_NOEXCEPT;
+		vector(this_type&& x, const allocator_type& allocator);
+		vector(std::initializer_list<value_type> ilist, const allocator_type& allocator = EASTL_VECTOR_DEFAULT_ALLOCATOR);
+
+		template <typename InputIterator>
+		vector(InputIterator first, InputIterator last, const allocator_type& allocator = EASTL_VECTOR_DEFAULT_ALLOCATOR);
+
+	   ~vector();
+
+		this_type& operator=(const this_type& x);
+		this_type& operator=(std::initializer_list<value_type> ilist);
+		this_type& operator=(this_type&& x); // TODO(c++17): noexcept(allocator_traits<Allocator>::propagate_on_container_move_assignment::value || allocator_traits<Allocator>::is_always_equal::value)
+
+		void swap(this_type& x); // TODO(c++17): noexcept(allocator_traits<Allocator>::propagate_on_container_move_assignment::value || allocator_traits<Allocator>::is_always_equal::value)
+
+		void assign(size_type n, const value_type& value);
+
+		template <typename InputIterator>
+		void assign(InputIterator first, InputIterator last);
+
+		void assign(std::initializer_list<value_type> ilist);
+
+		iterator       begin() EA_NOEXCEPT;
+		const_iterator begin() const EA_NOEXCEPT;
+		const_iterator cbegin() const EA_NOEXCEPT;
+
+		iterator       end() EA_NOEXCEPT;
+		const_iterator end() const EA_NOEXCEPT;
+		const_iterator cend() const EA_NOEXCEPT;
+
+		reverse_iterator       rbegin() EA_NOEXCEPT;
+		const_reverse_iterator rbegin() const EA_NOEXCEPT;
+		const_reverse_iterator crbegin() const EA_NOEXCEPT;
+
+		reverse_iterator       rend() EA_NOEXCEPT;
+		const_reverse_iterator rend() const EA_NOEXCEPT;
+		const_reverse_iterator crend() const EA_NOEXCEPT;
+
+		bool      empty() const EA_NOEXCEPT;
+		size_type size() const EA_NOEXCEPT;
+		size_type capacity() const EA_NOEXCEPT;
+
+		void resize(size_type n, const value_type& value);
+		void resize(size_type n);
+		void reserve(size_type n);
+		void set_capacity(size_type n = base_type::npos);   // Revises the capacity to the user-specified value. Resizes the container to match the capacity if the requested capacity n is less than the current size. If n == npos then the capacity is reallocated (if necessary) such that capacity == size.
+		void shrink_to_fit();                               // C++11 function which is the same as set_capacity().
+
+		pointer       data() EA_NOEXCEPT;
+		const_pointer data() const EA_NOEXCEPT;
+
+		reference       operator[](size_type n);
+		const_reference operator[](size_type n) const;
+
+		reference       at(size_type n);
+		const_reference at(size_type n) const;
+
+		reference       front();
+		const_reference front() const;
+
+		reference       back();
+		const_reference back() const;
+
+		void      push_back(const value_type& value);
+		reference push_back();
+		void*     push_back_uninitialized();
+		void      push_back(value_type&& value);
+		void      pop_back();
+
+		template<class... Args>
+		iterator emplace(const_iterator position, Args&&... args);
+
+		template<class... Args>
+		reference emplace_back(Args&&... args);
+
+		iterator insert(const_iterator position, const value_type& value);
+		iterator insert(const_iterator position, size_type n, const value_type& value);
+		iterator insert(const_iterator position, value_type&& value);
+		iterator insert(const_iterator position, std::initializer_list<value_type> ilist);
+
+		template <typename InputIterator>
+		iterator insert(const_iterator position, InputIterator first, InputIterator last);
+
+		iterator erase_first(const T& value);
+		iterator erase_first_unsorted(const T& value); // Same as erase, except it doesn't preserve order, but is faster because it simply copies the last item in the vector over the erased position.
+		reverse_iterator erase_last(const T& value);
+		reverse_iterator erase_last_unsorted(const T& value); // Same as erase, except it doesn't preserve order, but is faster because it simply copies the last item in the vector over the erased position.
+
+		iterator erase(const_iterator position);
+		iterator erase(const_iterator first, const_iterator last);
+		iterator erase_unsorted(const_iterator position);         // Same as erase, except it doesn't preserve order, but is faster because it simply copies the last item in the vector over the erased position.
+
+		reverse_iterator erase(const_reverse_iterator position);
+		reverse_iterator erase(const_reverse_iterator first, const_reverse_iterator last);
+		reverse_iterator erase_unsorted(const_reverse_iterator position);
+
+		void clear() EA_NOEXCEPT;
+		void reset_lose_memory() EA_NOEXCEPT;                       // This is a unilateral reset to an initially empty state. No destructors are called, no deallocation occurs.
+
+		bool validate() const EA_NOEXCEPT;
+		int  validate_iterator(const_iterator i) const EA_NOEXCEPT;
+
+	protected:
+		// These functions do the real work of maintaining the vector. You will notice
+		// that many of them have the same name but are specialized on iterator_tag
+		// (iterator categories). This is because in these cases there is an optimized
+		// implementation that can be had for some cases relative to others. Functions
+		// which aren't referenced are neither compiled nor linked into the application.
+		template <bool bMove> struct should_move_or_copy_tag{};
+		using should_copy_tag = should_move_or_copy_tag<false>;
+		using should_move_tag = should_move_or_copy_tag<true>;
+
+		template <typename ForwardIterator> // Allocates a pointer of array count n and copy-constructs it with [first,last).
+		pointer DoRealloc(size_type n, ForwardIterator first, ForwardIterator last, should_copy_tag);
+
+		template <typename ForwardIterator> // Allocates a pointer of array count n and copy-constructs it with [first,last).
+		pointer DoRealloc(size_type n, ForwardIterator first, ForwardIterator last, should_move_tag);
+
+		template <typename Integer>
+		void DoInit(Integer n, Integer value, true_type);
+
+		template <typename InputIterator>
+		void DoInit(InputIterator first, InputIterator last, false_type);
+
+		template <typename InputIterator>
+		void DoInitFromIterator(InputIterator first, InputIterator last, EASTL_ITC_NS::input_iterator_tag);
+
+		template <typename ForwardIterator>
+		void DoInitFromIterator(ForwardIterator first, ForwardIterator last, EASTL_ITC_NS::forward_iterator_tag);
+
+		template <typename Integer, bool bMove>
+		void DoAssign(Integer n, Integer value, true_type);
+
+		template <typename InputIterator, bool bMove>
+		void DoAssign(InputIterator first, InputIterator last, false_type);
+
+		void DoAssignValues(size_type n, const value_type& value);
+
+		template <typename InputIterator, bool bMove>
+		void DoAssignFromIterator(InputIterator first, InputIterator last, EASTL_ITC_NS::input_iterator_tag);
+
+		template <typename RandomAccessIterator, bool bMove>
+		void DoAssignFromIterator(RandomAccessIterator first, RandomAccessIterator last, EASTL_ITC_NS::random_access_iterator_tag);
+
+		template <typename Integer>
+		void DoInsert(const_iterator position, Integer n, Integer value, true_type);
+
+		template <typename InputIterator>
+		void DoInsert(const_iterator position, InputIterator first, InputIterator last, false_type);
+
+		template <typename InputIterator>
+		void DoInsertFromIterator(const_iterator position, InputIterator first, InputIterator last, EASTL_ITC_NS::input_iterator_tag);
+
+		template <typename BidirectionalIterator>
+		void DoInsertFromIterator(const_iterator position, BidirectionalIterator first, BidirectionalIterator last, EASTL_ITC_NS::bidirectional_iterator_tag);
+
+		void DoInsertValues(const_iterator position, size_type n, const value_type& value);
+
+		void DoInsertValuesEnd(size_type n); // Default constructs n values
+		void DoInsertValuesEnd(size_type n, const value_type& value);
+
+		template<typename... Args>
+		void DoInsertValue(const_iterator position, Args&&... args);
+
+		template<typename... Args>
+		void DoInsertValueEnd(Args&&... args);
+
+		void DoClearCapacity();
+
+		void DoGrow(size_type n);
+
+		void DoSwap(this_type& x);
+
+	}; // class vector
+
+
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// VectorBase
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T, typename Allocator>
+	inline VectorBase<T, Allocator>::VectorBase()
+		: mpBegin(NULL), 
+		  mpEnd(NULL),
+		  mCapacityAllocator(NULL, allocator_type(EASTL_VECTOR_DEFAULT_NAME))
+	{
+	}
+
+
+	template <typename T, typename Allocator>
+	inline VectorBase<T, Allocator>::VectorBase(const allocator_type& allocator)
+		: mpBegin(NULL), 
+		  mpEnd(NULL),
+		  mCapacityAllocator(NULL, allocator)
+	{
+	}
+
+
+	template <typename T, typename Allocator>
+	inline VectorBase<T, Allocator>::VectorBase(size_type n, const allocator_type& allocator)
+		: mCapacityAllocator(allocator)
+	{
+		mpBegin    = DoAllocate(n);
+		mpEnd      = mpBegin;
+		internalCapacityPtr() = mpBegin + n;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline VectorBase<T, Allocator>::~VectorBase()
+	{
+		if(mpBegin)
+			EASTLFree(internalAllocator(), mpBegin, (internalCapacityPtr() - mpBegin) * sizeof(T));
+	}
+
+
+	template <typename T, typename Allocator>
+	inline const typename VectorBase<T, Allocator>::allocator_type&
+	VectorBase<T, Allocator>::get_allocator() const EA_NOEXCEPT
+	{
+		return internalAllocator();
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename VectorBase<T, Allocator>::allocator_type&
+	VectorBase<T, Allocator>::get_allocator() EA_NOEXCEPT
+	{
+		return internalAllocator();
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void VectorBase<T, Allocator>::set_allocator(const allocator_type& allocator)
+	{
+		internalAllocator() = allocator;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline T* VectorBase<T, Allocator>::DoAllocate(size_type n)
+	{
+		#if EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY(n >= 0x80000000))
+				EASTL_FAIL_MSG("vector::DoAllocate -- improbably large request.");
+		#endif
+
+		// If n is zero, then we allocate no memory and just return nullptr. 
+		// This is fine, as our default ctor initializes with NULL pointers. 
+		if(EASTL_LIKELY(n))
+		{
+			auto* p = (T*)allocate_memory(internalAllocator(), n * sizeof(T), EASTL_ALIGN_OF(T), 0);
+			EASTL_ASSERT_MSG(p != nullptr, "the behaviour of eastl::allocators that return nullptr is not defined.");
+			return p;
+		}
+		else
+		{
+			return nullptr;
+		}
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void VectorBase<T, Allocator>::DoFree(T* p, size_type n)
+	{
+		if(p)
+			EASTLFree(internalAllocator(), p, n * sizeof(T)); 
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename VectorBase<T, Allocator>::size_type
+	VectorBase<T, Allocator>::GetNewCapacity(size_type currentCapacity)
+	{
+		// This needs to return a value of at least currentCapacity and at least 1.
+		return (currentCapacity > 0) ? (2 * currentCapacity) : 1;
+	}
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// vector
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T, typename Allocator>
+	inline vector<T, Allocator>::vector() EA_NOEXCEPT_IF(EA_NOEXCEPT_EXPR(EASTL_VECTOR_DEFAULT_ALLOCATOR))
+		: base_type()
+	{
+		// Empty
+	}
+
+
+	template <typename T, typename Allocator>
+	inline vector<T, Allocator>::vector(const allocator_type& allocator) EA_NOEXCEPT
+		: base_type(allocator)
+	{
+		// Empty
+	}
+
+
+	template <typename T, typename Allocator>
+	inline vector<T, Allocator>::vector(size_type n, const allocator_type& allocator)
+		: base_type(n, allocator)
+	{
+		eastl::uninitialized_default_fill_n(mpBegin, n);
+		mpEnd = mpBegin + n;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline vector<T, Allocator>::vector(size_type n, const value_type& value, const allocator_type& allocator)
+		: base_type(n, allocator)
+	{
+		eastl::uninitialized_fill_n_ptr(mpBegin, n, value);
+		mpEnd = mpBegin + n;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline vector<T, Allocator>::vector(const this_type& x)
+		: base_type(x.size(), x.internalAllocator())
+	{
+		mpEnd = eastl::uninitialized_copy_ptr(x.mpBegin, x.mpEnd, mpBegin);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline vector<T, Allocator>::vector(const this_type& x, const allocator_type& allocator)
+		: base_type(x.size(), allocator)
+	{
+		mpEnd = eastl::uninitialized_copy_ptr(x.mpBegin, x.mpEnd, mpBegin);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline vector<T, Allocator>::vector(this_type&& x) EA_NOEXCEPT
+		: base_type(eastl::move(x.internalAllocator()))  // vector requires move-construction of allocator in this case.
+	{
+		DoSwap(x);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline vector<T, Allocator>::vector(this_type&& x, const allocator_type& allocator)
+		: base_type(allocator)
+	{
+		if (internalAllocator() == x.internalAllocator()) // If allocators are equivalent...
+			DoSwap(x);
+		else 
+		{
+			this_type temp(eastl::move(*this)); // move construct so we don't require the use of copy-ctors that prevent the use of move-only types.
+			temp.swap(x);
+		}
+	}
+
+
+	template <typename T, typename Allocator>
+	inline vector<T, Allocator>::vector(std::initializer_list<value_type> ilist, const allocator_type& allocator)
+		: base_type(allocator)
+	{
+		DoInit(ilist.begin(), ilist.end(), false_type());
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename InputIterator>
+	inline vector<T, Allocator>::vector(InputIterator first, InputIterator last, const allocator_type& allocator)
+		: base_type(allocator)
+	{
+		DoInit(first, last, is_integral<InputIterator>());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline vector<T, Allocator>::~vector()
+	{
+		// Call destructor for the values. Parent class will free the memory.
+		eastl::destruct(mpBegin, mpEnd);
+	}
+
+
+	template <typename T, typename Allocator>
+	typename vector<T, Allocator>::this_type&
+	vector<T, Allocator>::operator=(const this_type& x)
+	{
+		if(this != &x) // If not assigning to self...
+		{
+			// If (EASTL_ALLOCATOR_COPY_ENABLED == 1) and the current contents are allocated by an 
+			// allocator that's unequal to x's allocator, we need to reallocate our elements with 
+			// our current allocator and reallocate it with x's allocator. If the allocators are 
+			// equal then we can use a more optimal algorithm that doesn't reallocate our elements
+			// but instead can copy them in place.
+
+			#if EASTL_ALLOCATOR_COPY_ENABLED
+				bool bSlowerPathwayRequired = (internalAllocator() != x.internalAllocator());
+			#else
+				bool bSlowerPathwayRequired = false;
+			#endif
+
+			if(bSlowerPathwayRequired)
+			{
+				DoClearCapacity(); // Must clear the capacity instead of clear because set_capacity frees our memory, unlike clear.
+
+				#if EASTL_ALLOCATOR_COPY_ENABLED
+					internalAllocator() = x.internalAllocator();
+				#endif
+			}
+
+			DoAssign<const_iterator, false>(x.begin(), x.end(), eastl::false_type());
+		}
+
+		return *this;
+	}
+
+
+	template <typename T, typename Allocator>
+	typename vector<T, Allocator>::this_type&
+	vector<T, Allocator>::operator=(std::initializer_list<value_type> ilist)
+	{
+		typedef typename std::initializer_list<value_type>::iterator InputIterator;
+		typedef typename eastl::iterator_traits<InputIterator>::iterator_category IC;
+		DoAssignFromIterator<InputIterator, false>(ilist.begin(), ilist.end(), IC()); // initializer_list has const elements and so we can't move from them.
+		return *this;
+	}
+
+
+	template <typename T, typename Allocator>
+	typename vector<T, Allocator>::this_type&
+	vector<T, Allocator>::operator=(this_type&& x)
+	{
+		if(this != &x)
+		{
+			DoClearCapacity(); // To consider: Are we really required to clear here? x is going away soon and will clear itself in its dtor.
+			swap(x);           // member swap handles the case that x has a different allocator than our allocator by doing a copy.
+		}
+		return *this; 
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void vector<T, Allocator>::assign(size_type n, const value_type& value)
+	{
+		DoAssignValues(n, value);
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename InputIterator>                              
+	inline void vector<T, Allocator>::assign(InputIterator first, InputIterator last)
+	{
+		// It turns out that the C++ std::vector<int, int> specifies a two argument
+		// version of assign that takes (int size, int value). These are not iterators, 
+		// so we need to do a template compiler trick to do the right thing.
+		DoAssign<InputIterator, false>(first, last, is_integral<InputIterator>());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void vector<T, Allocator>::assign(std::initializer_list<value_type> ilist)
+	{
+		typedef typename std::initializer_list<value_type>::iterator InputIterator;
+		typedef typename eastl::iterator_traits<InputIterator>::iterator_category IC;
+		DoAssignFromIterator<InputIterator, false>(ilist.begin(), ilist.end(), IC()); // initializer_list has const elements and so we can't move from them.
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename vector<T, Allocator>::iterator
+	vector<T, Allocator>::begin() EA_NOEXCEPT
+	{
+		return mpBegin;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename vector<T, Allocator>::const_iterator
+	vector<T, Allocator>::begin() const EA_NOEXCEPT
+	{
+		return mpBegin;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename vector<T, Allocator>::const_iterator
+	vector<T, Allocator>::cbegin() const EA_NOEXCEPT
+	{
+		return mpBegin;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename vector<T, Allocator>::iterator
+	vector<T, Allocator>::end() EA_NOEXCEPT
+	{
+		return mpEnd;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename vector<T, Allocator>::const_iterator
+	vector<T, Allocator>::end() const EA_NOEXCEPT
+	{
+		return mpEnd;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename vector<T, Allocator>::const_iterator
+	vector<T, Allocator>::cend() const EA_NOEXCEPT
+	{
+		return mpEnd;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename vector<T, Allocator>::reverse_iterator
+	vector<T, Allocator>::rbegin() EA_NOEXCEPT
+	{
+		return reverse_iterator(mpEnd);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename vector<T, Allocator>::const_reverse_iterator
+	vector<T, Allocator>::rbegin() const EA_NOEXCEPT
+	{
+		return const_reverse_iterator(mpEnd);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename vector<T, Allocator>::const_reverse_iterator
+	vector<T, Allocator>::crbegin() const EA_NOEXCEPT
+	{
+		return const_reverse_iterator(mpEnd);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename vector<T, Allocator>::reverse_iterator
+	vector<T, Allocator>::rend() EA_NOEXCEPT
+	{
+		return reverse_iterator(mpBegin);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename vector<T, Allocator>::const_reverse_iterator
+	vector<T, Allocator>::rend() const EA_NOEXCEPT
+	{
+		return const_reverse_iterator(mpBegin);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename vector<T, Allocator>::const_reverse_iterator
+	vector<T, Allocator>::crend() const EA_NOEXCEPT
+	{
+		return const_reverse_iterator(mpBegin);
+	}
+
+
+	template <typename T, typename Allocator>
+	bool vector<T, Allocator>::empty() const EA_NOEXCEPT
+	{
+		return (mpBegin == mpEnd);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename vector<T, Allocator>::size_type
+	vector<T, Allocator>::size() const EA_NOEXCEPT
+	{
+		return (size_type)(mpEnd - mpBegin);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename vector<T, Allocator>::size_type
+	vector<T, Allocator>::capacity() const EA_NOEXCEPT
+	{
+		return (size_type)(internalCapacityPtr() - mpBegin);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void vector<T, Allocator>::resize(size_type n, const value_type& value)
+	{
+		if(n > (size_type)(mpEnd - mpBegin))  // We expect that more often than not, resizes will be upsizes.
+			DoInsertValuesEnd(n - ((size_type)(mpEnd - mpBegin)), value);
+		else
+		{
+			eastl::destruct(mpBegin + n, mpEnd);
+			mpEnd = mpBegin + n;
+		}
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void vector<T, Allocator>::resize(size_type n)
+	{
+		// Alternative implementation:
+		// resize(n, value_type());
+
+		if(n > (size_type)(mpEnd - mpBegin))  // We expect that more often than not, resizes will be upsizes.
+			DoInsertValuesEnd(n - ((size_type)(mpEnd - mpBegin)));
+		else
+		{
+			eastl::destruct(mpBegin + n, mpEnd);
+			mpEnd = mpBegin + n;
+		}
+	}
+
+
+	template <typename T, typename Allocator>
+	void vector<T, Allocator>::reserve(size_type n)
+	{
+		// If the user wants to reduce the reserved memory, there is the set_capacity function.
+		if(n > size_type(internalCapacityPtr() - mpBegin)) // If n > capacity ...
+			DoGrow(n);
+	}
+
+
+	template <typename T, typename Allocator>
+	void vector<T, Allocator>::set_capacity(size_type n)
+	{
+		if((n == npos) || (n <= (size_type)(mpEnd - mpBegin))) // If new capacity <= size...
+		{
+			if(n == 0)  // Very often n will be 0, and clear will be faster than resize and use less stack space.
+				clear();
+			else if(n < (size_type)(mpEnd - mpBegin))
+				resize(n);
+
+			shrink_to_fit();
+		}
+		else // Else new capacity > size.
+		{
+			pointer const pNewData = DoRealloc(n, mpBegin, mpEnd, should_move_tag());
+			eastl::destruct(mpBegin, mpEnd);
+			DoFree(mpBegin, (size_type)(internalCapacityPtr() - mpBegin));
+
+			const ptrdiff_t nPrevSize = mpEnd - mpBegin;
+			mpBegin    = pNewData;
+			mpEnd      = pNewData + nPrevSize;
+			internalCapacityPtr() = mpBegin + n;
+		}
+	}
+
+	template <typename T, typename Allocator>
+	inline void vector<T, Allocator>::shrink_to_fit()
+	{
+		// This is the simplest way to accomplish this, and it is as efficient as any other.
+		this_type temp = this_type(move_iterator<iterator>(begin()), move_iterator<iterator>(end()), internalAllocator());
+
+		// Call DoSwap() rather than swap() as we know our allocators match and we don't want to invoke the code path
+		// handling non matching allocators as it imposes additional restrictions on the type of T to be copyable
+		DoSwap(temp);
+	}
+
+	template <typename T, typename Allocator>
+	inline typename vector<T, Allocator>::pointer
+	vector<T, Allocator>::data() EA_NOEXCEPT
+	{
+		return mpBegin;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename vector<T, Allocator>::const_pointer
+	vector<T, Allocator>::data() const EA_NOEXCEPT
+	{
+		return mpBegin;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename vector<T, Allocator>::reference
+	vector<T, Allocator>::operator[](size_type n)
+	{
+	    #if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED
+			if (EASTL_UNLIKELY(n >= (static_cast<size_type>(mpEnd - mpBegin))))
+				EASTL_FAIL_MSG("vector::operator[] -- out of range");
+		#elif EASTL_ASSERT_ENABLED
+			// We allow the user to use a reference to v[0] of an empty container. But this was merely grandfathered in and ideally we shouldn't allow such access to [0].
+			if (EASTL_UNLIKELY((n != 0) && (n >= (static_cast<size_type>(mpEnd - mpBegin)))))
+				EASTL_FAIL_MSG("vector::operator[] -- out of range");
+		#endif
+
+		return *(mpBegin + n);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename vector<T, Allocator>::const_reference
+	vector<T, Allocator>::operator[](size_type n) const
+	{
+		#if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED
+			if (EASTL_UNLIKELY(n >= (static_cast<size_type>(mpEnd - mpBegin))))
+				EASTL_FAIL_MSG("vector::operator[] -- out of range");
+		#elif EASTL_ASSERT_ENABLED
+			// We allow the user to use a reference to v[0] of an empty container. But this was merely grandfathered in and ideally we shouldn't allow such access to [0].
+			if (EASTL_UNLIKELY((n != 0) && (n >= (static_cast<size_type>(mpEnd - mpBegin)))))
+				EASTL_FAIL_MSG("vector::operator[] -- out of range");
+		#endif
+
+		return *(mpBegin + n);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename vector<T, Allocator>::reference
+	vector<T, Allocator>::at(size_type n)
+	{
+		// The difference between at() and operator[] is it signals
+		// the requested position is out of range by throwing an
+		// out_of_range exception.
+
+		#if EASTL_EXCEPTIONS_ENABLED
+			if(EASTL_UNLIKELY(n >= (static_cast<size_type>(mpEnd - mpBegin))))
+				throw std::out_of_range("vector::at -- out of range");
+		#elif EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY(n >= (static_cast<size_type>(mpEnd - mpBegin))))
+				EASTL_FAIL_MSG("vector::at -- out of range");
+		#endif
+
+		return *(mpBegin + n);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename vector<T, Allocator>::const_reference
+	vector<T, Allocator>::at(size_type n) const
+	{
+		#if EASTL_EXCEPTIONS_ENABLED
+			if(EASTL_UNLIKELY(n >= (static_cast<size_type>(mpEnd - mpBegin))))
+				throw std::out_of_range("vector::at -- out of range");
+		#elif EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY(n >= (static_cast<size_type>(mpEnd - mpBegin))))
+				EASTL_FAIL_MSG("vector::at -- out of range");
+		#endif
+
+		return *(mpBegin + n);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename vector<T, Allocator>::reference
+	vector<T, Allocator>::front()
+	{
+		#if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED
+			if (EASTL_UNLIKELY((mpBegin == nullptr) || (mpEnd <= mpBegin))) // We don't allow the user to reference an empty container.
+				EASTL_FAIL_MSG("vector::front -- empty vector");
+		#else
+			// We allow the user to reference an empty container.
+		#endif
+
+		return *mpBegin;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename vector<T, Allocator>::const_reference
+	vector<T, Allocator>::front() const
+	{
+		#if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED
+			if (EASTL_UNLIKELY((mpBegin == nullptr) || (mpEnd <= mpBegin))) // We don't allow the user to reference an empty container.
+				EASTL_FAIL_MSG("vector::front -- empty vector");
+		#else
+			// We allow the user to reference an empty container.
+		#endif
+
+		return *mpBegin;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename vector<T, Allocator>::reference
+	vector<T, Allocator>::back()
+	{
+		#if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED
+			if (EASTL_UNLIKELY((mpBegin == nullptr) || (mpEnd <= mpBegin))) // We don't allow the user to reference an empty container.
+				EASTL_FAIL_MSG("vector::back -- empty vector");
+		#else
+			// We allow the user to reference an empty container.
+		#endif
+
+		return *(mpEnd - 1);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename vector<T, Allocator>::const_reference
+	vector<T, Allocator>::back() const
+	{
+		#if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED
+			if (EASTL_UNLIKELY((mpBegin == nullptr) || (mpEnd <= mpBegin))) // We don't allow the user to reference an empty container.
+				EASTL_FAIL_MSG("vector::back -- empty vector");
+		#else
+			// We allow the user to reference an empty container.
+		#endif
+
+		return *(mpEnd - 1);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void vector<T, Allocator>::push_back(const value_type& value)
+	{
+		if(mpEnd < internalCapacityPtr())
+			::new((void*)mpEnd++) value_type(value);
+		else
+			DoInsertValueEnd(value);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void vector<T, Allocator>::push_back(value_type&& value)
+	{
+		if (mpEnd < internalCapacityPtr())
+			::new((void*)mpEnd++) value_type(eastl::move(value));
+		else
+			DoInsertValueEnd(eastl::move(value));
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename vector<T, Allocator>::reference
+	vector<T, Allocator>::push_back()
+	{
+		if(mpEnd < internalCapacityPtr())
+			::new((void*)mpEnd++) value_type();
+		else // Note that in this case we create a temporary, which is less desirable.
+			DoInsertValueEnd(value_type());
+
+		return *(mpEnd - 1); // Same as return back();
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void* vector<T, Allocator>::push_back_uninitialized()
+	{
+		if(mpEnd == internalCapacityPtr())
+		{
+			const size_type newSize = (size_type)(mpEnd - mpBegin) + 1;
+			reserve(newSize);
+		}
+ 
+		return mpEnd++;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void vector<T, Allocator>::pop_back()
+	{
+		#if EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY(mpEnd <= mpBegin))
+				EASTL_FAIL_MSG("vector::pop_back -- empty vector");
+		#endif
+
+		--mpEnd;
+		mpEnd->~value_type();
+	}
+
+
+	template <typename T, typename Allocator>
+	template<class... Args>
+	inline typename vector<T, Allocator>::iterator 
+	vector<T, Allocator>::emplace(const_iterator position, Args&&... args)
+	{
+		const ptrdiff_t n = position - mpBegin; // Save this because we might reallocate.
+
+		if((mpEnd == internalCapacityPtr()) || (position != mpEnd))
+			DoInsertValue(position, eastl::forward<Args>(args)...);
+		else
+		{
+			::new((void*)mpEnd) value_type(eastl::forward<Args>(args)...);
+			++mpEnd; // Increment this after the construction above in case the construction throws an exception.
+		}
+
+		return mpBegin + n;
+	}
+
+	template <typename T, typename Allocator>
+	template<class... Args>
+	inline typename vector<T, Allocator>::reference
+	vector<T, Allocator>::emplace_back(Args&&... args)
+	{
+		if(mpEnd < internalCapacityPtr())
+		{
+			::new((void*)mpEnd) value_type(eastl::forward<Args>(args)...);  // If value_type has a move constructor, it will use it and this operation may be faster than otherwise.
+			++mpEnd; // Increment this after the construction above in case the construction throws an exception.
+		}
+		else
+			DoInsertValueEnd(eastl::forward<Args>(args)...);
+
+		return back();
+	}
+
+	template <typename T, typename Allocator>
+	inline typename vector<T, Allocator>::iterator
+	vector<T, Allocator>::insert(const_iterator position, const value_type& value)
+	{
+		#if EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY((position < mpBegin) || (position > mpEnd)))
+				EASTL_FAIL_MSG("vector::insert -- invalid position");
+		#endif
+
+		// We implment a quick pathway for the case that the insertion position is at the end and we have free capacity for it.
+		const ptrdiff_t n = position - mpBegin; // Save this because we might reallocate.
+
+		if((mpEnd == internalCapacityPtr()) || (position != mpEnd))
+			DoInsertValue(position, value);
+		else
+		{
+			::new((void*)mpEnd) value_type(value);
+			++mpEnd; // Increment this after the construction above in case the construction throws an exception.
+		}
+
+		return mpBegin + n;
+	}
+
+
+	template <typename T, typename Allocator>       
+	inline typename vector<T, Allocator>::iterator
+	vector<T, Allocator>::insert(const_iterator position, value_type&& value)
+	{
+		return emplace(position, eastl::move(value));
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename vector<T, Allocator>::iterator
+	vector<T, Allocator>::insert(const_iterator position, size_type n, const value_type& value)
+	{
+		const ptrdiff_t p = position - mpBegin; // Save this because we might reallocate.
+		DoInsertValues(position, n, value);
+		return mpBegin + p;
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename InputIterator>
+	inline typename vector<T, Allocator>::iterator
+	vector<T, Allocator>::insert(const_iterator position, InputIterator first, InputIterator last)
+	{
+		const ptrdiff_t n = position - mpBegin; // Save this because we might reallocate.
+		DoInsert(position, first, last, is_integral<InputIterator>());
+		return mpBegin + n;
+	}
+
+
+	template <typename T, typename Allocator>       
+	inline typename vector<T, Allocator>::iterator
+	vector<T, Allocator>::insert(const_iterator position, std::initializer_list<value_type> ilist)
+	{
+		const ptrdiff_t n = position - mpBegin; // Save this because we might reallocate.
+		DoInsert(position, ilist.begin(), ilist.end(), false_type());
+		return mpBegin + n;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename vector<T, Allocator>::iterator
+	vector<T, Allocator>::erase(const_iterator position)
+	{
+		#if EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY((position < mpBegin) || (position >= mpEnd)))
+				EASTL_FAIL_MSG("vector::erase -- invalid position");
+		#endif
+
+		// C++11 stipulates that position is const_iterator, but the return value is iterator.
+		iterator destPosition = const_cast<value_type*>(position);        
+
+		if((position + 1) < mpEnd)
+			eastl::move(destPosition + 1, mpEnd, destPosition);
+		--mpEnd;
+		mpEnd->~value_type();
+		return destPosition;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename vector<T, Allocator>::iterator
+	vector<T, Allocator>::erase(const_iterator first, const_iterator last)
+	{
+		#if EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY((first < mpBegin) || (first > mpEnd) || (last < mpBegin) || (last > mpEnd) || (last < first)))
+				EASTL_FAIL_MSG("vector::erase -- invalid position");
+		#endif
+ 
+		if (first != last)
+		{
+			iterator const position = const_cast<value_type*>(eastl::move(const_cast<value_type*>(last), const_cast<value_type*>(mpEnd), const_cast<value_type*>(first)));
+			eastl::destruct(position, mpEnd);
+			mpEnd -= (last - first);
+		}
+ 
+		return const_cast<value_type*>(first);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename vector<T, Allocator>::iterator
+	vector<T, Allocator>::erase_unsorted(const_iterator position)
+	{
+		#if EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY((position < mpBegin) || (position >= mpEnd)))
+				EASTL_FAIL_MSG("vector::erase -- invalid position");
+		#endif
+
+		// C++11 stipulates that position is const_iterator, but the return value is iterator.
+		iterator destPosition = const_cast<value_type*>(position);
+		*destPosition = eastl::move(*(mpEnd - 1));
+
+		// pop_back();
+		--mpEnd;
+		mpEnd->~value_type();
+
+		return destPosition;
+	}
+
+	template <typename T, typename Allocator>
+	inline typename vector<T, Allocator>::iterator vector<T, Allocator>::erase_first(const T& value)
+	{
+		static_assert(eastl::has_equality_v<T>, "T must be comparable");
+
+		iterator it = eastl::find(begin(), end(), value);
+
+		if (it != end())
+			return erase(it);
+		else
+			return it;
+	}
+
+	template <typename T, typename Allocator>
+	inline typename vector<T, Allocator>::iterator 
+	vector<T, Allocator>::erase_first_unsorted(const T& value)
+	{
+		static_assert(eastl::has_equality_v<T>, "T must be comparable");
+
+		iterator it = eastl::find(begin(), end(), value);
+
+		if (it != end())
+			return erase_unsorted(it);
+		else
+			return it;
+	}
+
+	template <typename T, typename Allocator>
+	inline typename vector<T, Allocator>::reverse_iterator 
+	vector<T, Allocator>::erase_last(const T& value)
+	{
+		static_assert(eastl::has_equality_v<T>, "T must be comparable");
+
+		reverse_iterator it = eastl::find(rbegin(), rend(), value);
+
+		if (it != rend())
+			return erase(it);
+		else
+			return it;
+	}
+
+	template <typename T, typename Allocator>
+	inline typename vector<T, Allocator>::reverse_iterator 
+	vector<T, Allocator>::erase_last_unsorted(const T& value)
+	{
+		static_assert(eastl::has_equality_v<T>, "T must be comparable");
+
+		reverse_iterator it = eastl::find(rbegin(), rend(), value);
+
+		if (it != rend())
+			return erase_unsorted(it);
+		else
+			return it;
+	}
+
+	template <typename T, typename Allocator>
+	inline typename vector<T, Allocator>::reverse_iterator
+	vector<T, Allocator>::erase(const_reverse_iterator position)
+	{
+		return reverse_iterator(erase((++position).base()));
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename vector<T, Allocator>::reverse_iterator
+	vector<T, Allocator>::erase(const_reverse_iterator first, const_reverse_iterator last)
+	{
+		// Version which erases in order from first to last.
+		// difference_type i(first.base() - last.base());
+		// while(i--)
+		//     first = erase(first);
+		// return first;
+
+		// Version which erases in order from last to first, but is slightly more efficient:
+		return reverse_iterator(erase(last.base(), first.base()));
+	}
+
+
+	template <typename T, typename Allocator>
+	inline typename vector<T, Allocator>::reverse_iterator
+	vector<T, Allocator>::erase_unsorted(const_reverse_iterator position)
+	{
+		return reverse_iterator(erase_unsorted((++position).base()));
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void vector<T, Allocator>::clear() EA_NOEXCEPT
+	{
+		eastl::destruct(mpBegin, mpEnd);
+		mpEnd = mpBegin;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void vector<T, Allocator>::reset_lose_memory() EA_NOEXCEPT
+	{
+		// The reset function is a special extension function which unilaterally 
+		// resets the container to an empty state without freeing the memory of 
+		// the contained objects. This is useful for very quickly tearing down a 
+		// container built into scratch memory.
+		mpBegin = mpEnd = internalCapacityPtr() = NULL;
+	}
+
+
+	// swap exchanges the contents of two containers. With respect to the containers allocators,
+	// the C11++ Standard (23.2.1/7) states that the behavior of a call to a container's swap function 
+	// is undefined unless the objects being swapped have allocators that compare equal or 
+	// allocator_traits<allocator_type>::propagate_on_container_swap::value is true (propagate_on_container_swap
+	// is false by default). EASTL doesn't have allocator_traits and so this doesn't directly apply,
+	// but EASTL has the effective behavior of propagate_on_container_swap = false for all allocators. 
+	template <typename T, typename Allocator>
+	inline void vector<T, Allocator>::swap(this_type& x)
+	{
+	#if defined(EASTL_VECTOR_LEGACY_SWAP_BEHAVIOUR_REQUIRES_COPY_CTOR) && EASTL_VECTOR_LEGACY_SWAP_BEHAVIOUR_REQUIRES_COPY_CTOR
+		if(internalAllocator() == x.internalAllocator()) // If allocators are equivalent...
+			DoSwap(x);
+		else // else swap the contents.
+		{
+			const this_type temp(*this); // Can't call eastl::swap because that would
+			*this = x;                   // itself call this member swap function.
+			x     = temp;
+		}
+	#else
+		// NOTE(rparolin): The previous implementation required T to be copy-constructible in the fall-back case where
+		// allocators with unique instances copied elements.  This was an unnecessary restriction and prevented the common
+		// usage of vector with non-copyable types (eg. eastl::vector<non_copyable> or eastl::vector<unique_ptr>). 
+		// 
+		// The previous implementation violated the following requirements of vector::swap so the fall-back code has
+		// been removed.  EASTL implicitly defines 'propagate_on_container_swap = false' therefore the fall-back case is
+		// undefined behaviour.  We simply swap the contents and the allocator as that is the common expectation of
+		// users and does not put the container into an invalid state since it can not free its memory via its current
+		// allocator instance.
+		//
+		// http://en.cppreference.com/w/cpp/container/vector/swap
+		// "Exchanges the contents of the container with those of other. Does not invoke any move, copy, or swap
+		// operations on individual elements."
+		//
+	    // http://en.cppreference.com/w/cpp/concept/AllocatorAwareContainer
+	    // "Swapping two containers with unequal allocators if propagate_on_container_swap is false is undefined
+	    // behavior."
+
+		DoSwap(x);
+	#endif
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename ForwardIterator>
+	inline typename vector<T, Allocator>::pointer
+	vector<T, Allocator>::DoRealloc(size_type n, ForwardIterator first, ForwardIterator last, should_copy_tag)
+	{
+		T* const p = DoAllocate(n); // p is of type T* but is not constructed. 
+		eastl::uninitialized_copy_ptr(first, last, p); // copy-constructs p from [first,last).
+		return p;
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename ForwardIterator>
+	inline typename vector<T, Allocator>::pointer
+	vector<T, Allocator>::DoRealloc(size_type n, ForwardIterator first, ForwardIterator last, should_move_tag)
+	{
+		T* const p = DoAllocate(n); // p is of type T* but is not constructed. 
+		eastl::uninitialized_move_ptr_if_noexcept(first, last, p); // move-constructs p from [first,last).
+		return p;
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename Integer>
+	inline void vector<T, Allocator>::DoInit(Integer n, Integer value, true_type)
+	{
+		mpBegin    = DoAllocate((size_type)n);
+		internalCapacityPtr() = mpBegin + n;
+		mpEnd      = internalCapacityPtr();
+
+		typedef typename eastl::remove_const<T>::type non_const_value_type; // If T is a const type (e.g. const int) then we need to initialize it as if it were non-const.
+		eastl::uninitialized_fill_n_ptr<value_type, Integer>((non_const_value_type*)mpBegin, n, value);
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename InputIterator>
+	inline void vector<T, Allocator>::DoInit(InputIterator first, InputIterator last, false_type)
+	{
+		typedef typename eastl::iterator_traits<InputIterator>:: iterator_category IC;
+		DoInitFromIterator(first, last, IC());
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename InputIterator>
+	inline void vector<T, Allocator>::DoInitFromIterator(InputIterator first, InputIterator last, EASTL_ITC_NS::input_iterator_tag)
+	{
+		// To do: Use emplace_back instead of push_back(). Our emplace_back will work below without any ifdefs.
+		for(; first != last; ++first)  // InputIterators by definition actually only allow you to iterate through them once.
+			push_back(*first);        // Thus the standard *requires* that we do this (inefficient) implementation.
+	}                                 // Luckily, InputIterators are in practice almost never used, so this code will likely never get executed.
+
+
+	template <typename T, typename Allocator>
+	template <typename ForwardIterator>
+	inline void vector<T, Allocator>::DoInitFromIterator(ForwardIterator first, ForwardIterator last, EASTL_ITC_NS::forward_iterator_tag)
+	{
+		const size_type n = (size_type)eastl::distance(first, last);
+		mpBegin    = DoAllocate(n);
+		internalCapacityPtr() = mpBegin + n;
+		mpEnd      = internalCapacityPtr();
+
+		typedef typename eastl::remove_const<T>::type non_const_value_type; // If T is a const type (e.g. const int) then we need to initialize it as if it were non-const.
+		eastl::uninitialized_copy_ptr(first, last, (non_const_value_type*)mpBegin);
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename Integer, bool bMove>
+	inline void vector<T, Allocator>::DoAssign(Integer n, Integer value, true_type)
+	{
+		DoAssignValues(static_cast<size_type>(n), static_cast<value_type>(value));
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename InputIterator, bool bMove>
+	inline void vector<T, Allocator>::DoAssign(InputIterator first, InputIterator last, false_type)
+	{
+		typedef typename eastl::iterator_traits<InputIterator>::iterator_category IC;
+		DoAssignFromIterator<InputIterator, bMove>(first, last, IC());
+	}
+
+
+	template <typename T, typename Allocator>
+	void vector<T, Allocator>::DoAssignValues(size_type n, const value_type& value)
+	{
+		if(n > size_type(internalCapacityPtr() - mpBegin)) // If n > capacity ...
+		{
+			this_type temp(n, value, internalAllocator()); // We have little choice but to reallocate with new memory.
+			swap(temp);
+		}
+		else if(n > size_type(mpEnd - mpBegin)) // If n > size ...
+		{
+			eastl::fill(mpBegin, mpEnd, value);
+			eastl::uninitialized_fill_n_ptr(mpEnd, n - size_type(mpEnd - mpBegin), value);
+			mpEnd += n - size_type(mpEnd - mpBegin);
+		}
+		else // else 0 <= n <= size
+		{
+			eastl::fill_n(mpBegin, n, value);
+			erase(mpBegin + n, mpEnd);
+		}
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename InputIterator, bool bMove>
+	void vector<T, Allocator>::DoAssignFromIterator(InputIterator first, InputIterator last, EASTL_ITC_NS::input_iterator_tag)
+	{
+		iterator position(mpBegin);
+
+		while((position != mpEnd) && (first != last))
+		{
+			*position = *first;
+			++first;
+			++position;
+		}
+		if(first == last)
+			erase(position, mpEnd);
+		else
+			insert(mpEnd, first, last);
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename RandomAccessIterator, bool bMove>
+	void vector<T, Allocator>::DoAssignFromIterator(RandomAccessIterator first, RandomAccessIterator last, EASTL_ITC_NS::random_access_iterator_tag)
+	{
+		const size_type n = (size_type)eastl::distance(first, last);
+
+		if(n > size_type(internalCapacityPtr() - mpBegin)) // If n > capacity ...
+		{
+			pointer const pNewData = DoRealloc(n, first, last, should_move_or_copy_tag<bMove>());
+			eastl::destruct(mpBegin, mpEnd);
+			DoFree(mpBegin, (size_type)(internalCapacityPtr() - mpBegin));
+
+			mpBegin    = pNewData;
+			mpEnd      = mpBegin + n;
+			internalCapacityPtr() = mpEnd;
+		}
+		else if(n <= size_type(mpEnd - mpBegin)) // If n <= size ...
+		{
+			pointer const pNewEnd = eastl::copy(first, last, mpBegin); // Since we are copying to mpBegin, we don't have to worry about needing copy_backward or a memmove-like copy (as opposed to memcpy-like copy).
+			eastl::destruct(pNewEnd, mpEnd);
+			mpEnd = pNewEnd;
+		}
+		else // else size < n <= capacity
+		{
+			RandomAccessIterator position = first + (mpEnd - mpBegin);
+			eastl::copy(first, position, mpBegin); // Since we are copying to mpBegin, we don't have to worry about needing copy_backward or a memmove-like copy (as opposed to memcpy-like copy).
+			mpEnd = eastl::uninitialized_copy_ptr(position, last, mpEnd);
+		}
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename Integer>
+	inline void vector<T, Allocator>::DoInsert(const_iterator position, Integer n, Integer value, true_type)
+	{
+		DoInsertValues(position, static_cast<size_type>(n), static_cast<value_type>(value));
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename InputIterator>
+	inline void vector<T, Allocator>::DoInsert(const_iterator position, InputIterator first, InputIterator last, false_type)
+	{
+		typedef typename eastl::iterator_traits<InputIterator>::iterator_category IC;
+		DoInsertFromIterator(position, first, last, IC());
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename InputIterator>
+	inline void vector<T, Allocator>::DoInsertFromIterator(const_iterator position, InputIterator first, InputIterator last, EASTL_ITC_NS::input_iterator_tag)
+	{
+		for(; first != last; ++first, ++position)
+			position = insert(position, *first);
+	}
+
+
+	template <typename T, typename Allocator>
+	template <typename BidirectionalIterator>
+	void vector<T, Allocator>::DoInsertFromIterator(const_iterator position, BidirectionalIterator first, BidirectionalIterator last, EASTL_ITC_NS::bidirectional_iterator_tag)
+	{
+		#if EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY((position < mpBegin) || (position > mpEnd)))
+				EASTL_FAIL_MSG("vector::insert -- invalid position");
+		#endif
+
+		// C++11 stipulates that position is const_iterator, but the return value is iterator.
+		iterator destPosition = const_cast<value_type*>(position);
+
+		if(first != last)
+		{
+			const size_type n = (size_type)eastl::distance(first, last);  // n is the number of elements we are inserting.
+
+			if(n <= size_type(internalCapacityPtr() - mpEnd)) // If n fits within the existing capacity...
+			{
+				const size_type nExtra = static_cast<size_type>(mpEnd - destPosition);
+
+				if(n < nExtra) // If the inserted values are entirely within initialized memory (i.e. are before mpEnd)...
+				{
+					eastl::uninitialized_move_ptr(mpEnd - n, mpEnd, mpEnd);
+					eastl::move_backward(destPosition, mpEnd - n, mpEnd); // We need move_backward because of potential overlap issues.
+					eastl::copy(first, last, destPosition);
+				}
+				else
+				{
+					BidirectionalIterator iTemp = first;
+					eastl::advance(iTemp, nExtra);
+					eastl::uninitialized_copy_ptr(iTemp, last, mpEnd);
+					eastl::uninitialized_move_ptr(destPosition, mpEnd, mpEnd + n - nExtra);
+					eastl::copy_backward(first, iTemp, destPosition + nExtra);
+				}
+
+				mpEnd += n;
+			}
+			else // else we need to expand our capacity.
+			{
+				const size_type nPrevSize = size_type(mpEnd - mpBegin);
+				const size_type nGrowSize = GetNewCapacity(nPrevSize);
+				const size_type nNewSize  = nGrowSize > (nPrevSize + n) ? nGrowSize : (nPrevSize + n);
+				pointer const   pNewData  = DoAllocate(nNewSize);
+
+				#if EASTL_EXCEPTIONS_ENABLED
+					pointer pNewEnd = pNewData;
+					try
+					{
+						pNewEnd = eastl::uninitialized_move_ptr_if_noexcept(mpBegin, destPosition, pNewData);
+						pNewEnd = eastl::uninitialized_copy_ptr(first, last, pNewEnd);
+						pNewEnd = eastl::uninitialized_move_ptr_if_noexcept(destPosition, mpEnd, pNewEnd);
+					}
+					catch(...)
+					{
+						eastl::destruct(pNewData, pNewEnd);
+						DoFree(pNewData, nNewSize);
+						throw;
+					}
+				#else
+					pointer pNewEnd = eastl::uninitialized_move_ptr_if_noexcept(mpBegin, destPosition, pNewData);
+					pNewEnd         = eastl::uninitialized_copy_ptr(first, last, pNewEnd);
+					pNewEnd         = eastl::uninitialized_move_ptr_if_noexcept(destPosition, mpEnd, pNewEnd);
+				#endif
+
+				eastl::destruct(mpBegin, mpEnd);
+				DoFree(mpBegin, (size_type)(internalCapacityPtr() - mpBegin));
+
+				mpBegin    = pNewData;
+				mpEnd      = pNewEnd;
+				internalCapacityPtr() = pNewData + nNewSize;
+			}
+		}
+	}
+
+
+	template <typename T, typename Allocator>
+	void vector<T, Allocator>::DoInsertValues(const_iterator position, size_type n, const value_type& value)
+	{
+		#if EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY((position < mpBegin) || (position > mpEnd)))
+				EASTL_FAIL_MSG("vector::insert -- invalid position");
+		#endif
+
+		// C++11 stipulates that position is const_iterator, but the return value is iterator.
+		iterator destPosition = const_cast<value_type*>(position);
+
+		if(n <= size_type(internalCapacityPtr() - mpEnd)) // If n is <= capacity...
+		{
+			if(n > 0) // To do: See if there is a way we can eliminate this 'if' statement.
+			{
+				// To consider: Make this algorithm work more like DoInsertValue whereby a pointer to value is used.
+				const value_type temp  = value;
+				const size_type nExtra = static_cast<size_type>(mpEnd - destPosition);
+
+				if(n < nExtra)
+				{
+					eastl::uninitialized_move_ptr(mpEnd - n, mpEnd, mpEnd);
+					eastl::move_backward(destPosition, mpEnd - n, mpEnd); // We need move_backward because of potential overlap issues.
+					eastl::fill(destPosition, destPosition + n, temp);
+				}
+				else
+				{
+					eastl::uninitialized_fill_n_ptr(mpEnd, n - nExtra, temp);
+					eastl::uninitialized_move_ptr(destPosition, mpEnd, mpEnd + n - nExtra);
+					eastl::fill(destPosition, mpEnd, temp);
+				}
+
+				mpEnd += n;
+			}
+		}
+		else // else n > capacity
+		{
+			const size_type nPrevSize = size_type(mpEnd - mpBegin);
+			const size_type nGrowSize = GetNewCapacity(nPrevSize);
+			const size_type nNewSize  = nGrowSize > (nPrevSize + n) ? nGrowSize : (nPrevSize + n);
+			pointer const pNewData    = DoAllocate(nNewSize);
+
+			#if EASTL_EXCEPTIONS_ENABLED
+				pointer pNewEnd = pNewData;
+				try
+				{
+					pNewEnd = eastl::uninitialized_move_ptr_if_noexcept(mpBegin, destPosition, pNewData);
+					eastl::uninitialized_fill_n_ptr(pNewEnd, n, value);
+					pNewEnd = eastl::uninitialized_move_ptr_if_noexcept(destPosition, mpEnd, pNewEnd + n);
+				}
+				catch(...)
+				{
+					eastl::destruct(pNewData, pNewEnd);
+					DoFree(pNewData, nNewSize);
+					throw;
+				}
+			#else
+				pointer pNewEnd = eastl::uninitialized_move_ptr_if_noexcept(mpBegin, destPosition, pNewData);
+				eastl::uninitialized_fill_n_ptr(pNewEnd, n, value);
+				pNewEnd = eastl::uninitialized_move_ptr_if_noexcept(destPosition, mpEnd, pNewEnd + n);
+			#endif
+
+			eastl::destruct(mpBegin, mpEnd);
+			DoFree(mpBegin, (size_type)(internalCapacityPtr() - mpBegin));
+
+			mpBegin    = pNewData;
+			mpEnd      = pNewEnd;
+			internalCapacityPtr() = pNewData + nNewSize;
+		}
+	}
+
+
+	template <typename T, typename Allocator>
+	void vector<T, Allocator>::DoClearCapacity() // This function exists because set_capacity() currently indirectly requires value_type to be default-constructible, 
+	{                                            // and some functions that need to clear our capacity (e.g. operator=) aren't supposed to require default-constructibility. 
+		clear();
+		this_type temp(eastl::move(*this));  // This is the simplest way to accomplish this, 
+		swap(temp);                          // and it is as efficient as any other.
+	}
+
+
+	template <typename T, typename Allocator>
+	void vector<T, Allocator>::DoGrow(size_type n)
+	{
+		pointer const pNewData = DoAllocate(n);
+
+		pointer pNewEnd = eastl::uninitialized_move_ptr_if_noexcept(mpBegin, mpEnd, pNewData);
+
+		eastl::destruct(mpBegin, mpEnd);
+		DoFree(mpBegin, (size_type)(internalCapacityPtr() - mpBegin));
+
+		mpBegin    = pNewData;
+		mpEnd      = pNewEnd;
+		internalCapacityPtr() = pNewData + n;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void vector<T, Allocator>::DoSwap(this_type& x)
+	{
+		eastl::swap(mpBegin,    x.mpBegin);
+		eastl::swap(mpEnd,      x.mpEnd);
+		eastl::swap(mCapacityAllocator, x.mCapacityAllocator); // We do this even if EASTL_ALLOCATOR_COPY_ENABLED is 0.
+	}
+
+	// The code duplication between this and the version that takes no value argument and default constructs the values
+	// is unfortunate but not easily resolved without relying on C++11 perfect forwarding.
+	template <typename T, typename Allocator>
+	void vector<T, Allocator>::DoInsertValuesEnd(size_type n, const value_type& value)
+	{
+		if(n > size_type(internalCapacityPtr() - mpEnd))
+		{
+			const size_type nPrevSize = size_type(mpEnd - mpBegin);
+			const size_type nGrowSize = GetNewCapacity(nPrevSize);
+			const size_type nNewSize = eastl::max(nGrowSize, nPrevSize + n);
+			pointer const pNewData = DoAllocate(nNewSize);
+
+			#if EASTL_EXCEPTIONS_ENABLED
+				pointer pNewEnd = pNewData; // Assign pNewEnd a value here in case the copy throws.
+				try
+				{
+					pNewEnd = eastl::uninitialized_move_ptr_if_noexcept(mpBegin, mpEnd, pNewData);
+				}
+				catch(...)
+				{
+					eastl::destruct(pNewData, pNewEnd);
+					DoFree(pNewData, nNewSize);
+					throw;
+				}
+			#else
+				pointer pNewEnd = eastl::uninitialized_move_ptr_if_noexcept(mpBegin, mpEnd, pNewData);
+			#endif
+
+			eastl::uninitialized_fill_n_ptr(pNewEnd, n, value);
+			pNewEnd += n;
+
+			eastl::destruct(mpBegin, mpEnd);
+			DoFree(mpBegin, (size_type)(internalCapacityPtr() - mpBegin));
+
+			mpBegin    = pNewData;
+			mpEnd      = pNewEnd;
+			internalCapacityPtr() = pNewData + nNewSize;
+		}
+		else
+		{
+			eastl::uninitialized_fill_n_ptr(mpEnd, n, value);
+			mpEnd += n;
+		}
+	}
+
+	template <typename T, typename Allocator>
+	void vector<T, Allocator>::DoInsertValuesEnd(size_type n)
+	{
+		if (n > size_type(internalCapacityPtr() - mpEnd))
+		{
+			const size_type nPrevSize = size_type(mpEnd - mpBegin);
+			const size_type nGrowSize = GetNewCapacity(nPrevSize);
+			const size_type nNewSize = eastl::max(nGrowSize, nPrevSize + n);
+			pointer const pNewData = DoAllocate(nNewSize);
+
+			#if EASTL_EXCEPTIONS_ENABLED
+				pointer pNewEnd = pNewData;  // Assign pNewEnd a value here in case the copy throws.
+				try { pNewEnd = eastl::uninitialized_move_ptr_if_noexcept(mpBegin, mpEnd, pNewData); }
+				catch (...)
+				{
+					eastl::destruct(pNewData, pNewEnd);
+					DoFree(pNewData, nNewSize);
+					throw;
+				}
+			#else
+				pointer pNewEnd = eastl::uninitialized_move_ptr_if_noexcept(mpBegin, mpEnd, pNewData);
+			#endif
+
+			eastl::uninitialized_default_fill_n(pNewEnd, n);
+			pNewEnd += n;
+
+			eastl::destruct(mpBegin, mpEnd);
+			DoFree(mpBegin, (size_type)(internalCapacityPtr() - mpBegin));
+
+			mpBegin = pNewData;
+			mpEnd = pNewEnd;
+			internalCapacityPtr() = pNewData + nNewSize;
+		}
+		else
+		{
+			eastl::uninitialized_default_fill_n(mpEnd, n);
+			mpEnd += n;
+		}
+	}
+
+	template <typename T, typename Allocator>
+	template<typename... Args>
+	void vector<T, Allocator>::DoInsertValue(const_iterator position, Args&&... args)
+	{
+		// To consider: It's feasible that the args is from a value_type comes from within the current sequence itself and 
+		// so we need to be sure to handle that case. This is different from insert(position, const value_type&) because in 
+		// this case value is potentially being modified.
+
+		#if EASTL_ASSERT_ENABLED
+			if(EASTL_UNLIKELY((position < mpBegin) || (position > mpEnd)))
+				EASTL_FAIL_MSG("vector::insert/emplace -- invalid position");
+		#endif
+
+		// C++11 stipulates that position is const_iterator, but the return value is iterator.
+		iterator destPosition = const_cast<value_type*>(position);
+
+		if(mpEnd != internalCapacityPtr()) // If size < capacity ...
+		{
+			// We need to take into account the possibility that args is a value_type that comes from within the vector itself.
+			// creating a temporary value on the stack here is not an optimal way to solve this because sizeof(value_type) may be
+			// too much for the given platform. An alternative solution may be to specialize this function for the case of the
+			// argument being const value_type& or value_type&&.
+			EASTL_ASSERT(position < mpEnd);                                 // While insert at end() is valid, our design is such that calling code should handle that case before getting here, as our streamlined logic directly doesn't handle this particular case due to resulting negative ranges.
+			#if EASTL_USE_FORWARD_WORKAROUND
+				auto value = value_type(eastl::forward<Args>(args)...);     // Workaround for compiler bug in VS2013 which results in a compiler internal crash while compiling this code.
+			#else
+				value_type  value(eastl::forward<Args>(args)...);           // Need to do this before the move_backward below because maybe args refers to something within the moving range.
+			#endif
+			::new(static_cast<void*>(mpEnd)) value_type(eastl::move(*(mpEnd - 1)));      // mpEnd is uninitialized memory, so we must construct into it instead of move into it like we do with the other elements below.
+			eastl::move_backward(destPosition, mpEnd - 1, mpEnd);           // We need to go backward because of potential overlap issues.
+			eastl::destruct(destPosition);
+			::new(static_cast<void*>(destPosition)) value_type(eastl::move(value));                             // Move the value argument to the given position.
+			++mpEnd;
+		}
+		else // else (size == capacity)
+		{
+			const size_type nPosSize  = size_type(destPosition - mpBegin); // Index of the insertion position.
+			const size_type nPrevSize = size_type(mpEnd - mpBegin);
+			const size_type nNewSize  = GetNewCapacity(nPrevSize);
+			pointer const   pNewData  = DoAllocate(nNewSize);
+
+			#if EASTL_EXCEPTIONS_ENABLED
+				pointer pNewEnd = pNewData;
+				try
+				{   // To do: We are not handling exceptions properly below.  In particular we don't want to 
+					// call eastl::destruct on the entire range if only the first part of the range was constructed.
+					::new((void*)(pNewData + nPosSize)) value_type(eastl::forward<Args>(args)...);              // Because the old data is potentially being moved rather than copied, we need to move.
+					pNewEnd = NULL;                                                                             // Set to NULL so that in catch we can tell the exception occurred during the next call.
+					pNewEnd = eastl::uninitialized_move_ptr_if_noexcept(mpBegin, destPosition, pNewData);       // the value first, because it might possibly be a reference to the old data being moved.
+					pNewEnd = eastl::uninitialized_move_ptr_if_noexcept(destPosition, mpEnd, ++pNewEnd);
+				}
+				catch(...)
+				{
+					if(pNewEnd)
+						eastl::destruct(pNewData, pNewEnd);                                         // Destroy what has been constructed so far.
+					else
+						eastl::destruct(pNewData + nPosSize);                                       // The exception occurred during the first uninitialized move, so destroy only the value at nPosSize.
+					DoFree(pNewData, nNewSize);
+					throw;
+				}
+			#else
+				::new((void*)(pNewData + nPosSize)) value_type(eastl::forward<Args>(args)...);                  // Because the old data is potentially being moved rather than copied, we need to move 
+				pointer pNewEnd = eastl::uninitialized_move_ptr_if_noexcept(mpBegin, destPosition, pNewData);   // the value first, because it might possibly be a reference to the old data being moved.
+				pNewEnd = eastl::uninitialized_move_ptr_if_noexcept(destPosition, mpEnd, ++pNewEnd);            // Question: with exceptions disabled, do we assume all operations are noexcept and thus there's no need for uninitialized_move_ptr_if_noexcept?
+			#endif
+
+			eastl::destruct(mpBegin, mpEnd);
+			DoFree(mpBegin, (size_type)(internalCapacityPtr() - mpBegin));
+
+			mpBegin    = pNewData;
+			mpEnd      = pNewEnd;
+			internalCapacityPtr() = pNewData + nNewSize;
+		}
+	}
+
+
+	template <typename T, typename Allocator>
+	template<typename... Args>
+	void vector<T, Allocator>::DoInsertValueEnd(Args&&... args)
+	{
+		const size_type nPrevSize = size_type(mpEnd - mpBegin);
+		const size_type nNewSize  = GetNewCapacity(nPrevSize);
+		pointer const   pNewData  = DoAllocate(nNewSize);
+
+		#if EASTL_EXCEPTIONS_ENABLED
+			pointer pNewEnd = pNewData; // Assign pNewEnd a value here in case the copy throws.
+			try
+			{
+				pNewEnd = eastl::uninitialized_move_ptr_if_noexcept(mpBegin, mpEnd, pNewData);
+				::new((void*)pNewEnd) value_type(eastl::forward<Args>(args)...);
+				pNewEnd++;
+			}
+			catch(...)
+			{
+				eastl::destruct(pNewData, pNewEnd);
+				DoFree(pNewData, nNewSize);
+				throw;
+			}
+		#else
+			pointer pNewEnd = eastl::uninitialized_move_ptr_if_noexcept(mpBegin, mpEnd, pNewData);
+			::new((void*)pNewEnd) value_type(eastl::forward<Args>(args)...);
+			pNewEnd++;
+		#endif
+
+		eastl::destruct(mpBegin, mpEnd);
+		DoFree(mpBegin, (size_type)(internalCapacityPtr() - mpBegin));
+
+		mpBegin    = pNewData;
+		mpEnd      = pNewEnd;
+		internalCapacityPtr() = pNewData + nNewSize;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline bool vector<T, Allocator>::validate() const EA_NOEXCEPT
+	{
+		if(mpEnd < mpBegin)
+			return false;
+		if(internalCapacityPtr() < mpEnd)
+			return false;
+		return true;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline int vector<T, Allocator>::validate_iterator(const_iterator i) const EA_NOEXCEPT
+	{
+		if(i >= mpBegin)
+		{
+			if(i < mpEnd)
+				return (isf_valid | isf_current | isf_can_dereference);
+
+			if(i <= mpEnd)
+				return (isf_valid | isf_current);
+		}
+
+		return isf_none;
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// global operators
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename T, typename Allocator>
+	inline bool operator==(const vector<T, Allocator>& a, const vector<T, Allocator>& b)
+	{
+		return ((a.size() == b.size()) && eastl::equal(a.begin(), a.end(), b.begin()));
+	}
+
+
+	template <typename T, typename Allocator>
+	inline bool operator!=(const vector<T, Allocator>& a, const vector<T, Allocator>& b)
+	{
+		return ((a.size() != b.size()) || !eastl::equal(a.begin(), a.end(), b.begin()));
+	}
+
+
+	template <typename T, typename Allocator>
+	inline bool operator<(const vector<T, Allocator>& a, const vector<T, Allocator>& b)
+	{
+		return eastl::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end());
+	}
+
+
+	template <typename T, typename Allocator>
+	inline bool operator>(const vector<T, Allocator>& a, const vector<T, Allocator>& b)
+	{
+		return b < a;
+	}
+
+
+	template <typename T, typename Allocator>
+	inline bool operator<=(const vector<T, Allocator>& a, const vector<T, Allocator>& b)
+	{
+		return !(b < a);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline bool operator>=(const vector<T, Allocator>& a, const vector<T, Allocator>& b)
+	{
+		return !(a < b);
+	}
+
+
+	template <typename T, typename Allocator>
+	inline void swap(vector<T, Allocator>& a, vector<T, Allocator>& b) EA_NOEXCEPT_IF(EA_NOEXCEPT_EXPR(a.swap(b)))
+	{
+		a.swap(b);
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// erase / erase_if
+	// 
+	// https://en.cppreference.com/w/cpp/container/vector/erase2
+	///////////////////////////////////////////////////////////////////////
+	template <class T, class Allocator, class U>
+	void erase(vector<T, Allocator>& c, const U& value)
+	{
+		// Erases all elements that compare equal to value from the container. 
+		c.erase(eastl::remove(c.begin(), c.end(), value), c.end());
+	}
+
+	template <class T, class Allocator, class Predicate>
+	void erase_if(vector<T, Allocator>& c, Predicate predicate)
+	{
+		// Erases all elements that satisfy the predicate pred from the container. 
+		c.erase(eastl::remove_if(c.begin(), c.end(), predicate), c.end());
+	}
+
+} // namespace eastl
+
+
+EA_RESTORE_VC_WARNING();
+EA_RESTORE_VC_WARNING();
+
+
+#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/vector_map.h b/libkram/eastl/include/EASTL/vector_map.h
new file mode 100644
index 00000000..14dec48d
--- /dev/null
+++ b/libkram/eastl/include/EASTL/vector_map.h
@@ -0,0 +1,906 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+//////////////////////////////////////////////////////////////////////////////
+
+//////////////////////////////////////////////////////////////////////////////
+// This file implements vector_map. It acts much like std::map, except its 
+// underlying representation is a random access container such as vector. 
+// These containers are sometimes also known as "sorted vectors."  
+// vector_maps have an advantage over conventional maps in that their memory
+// is contiguous and node-less. The result is that lookups are faster, more 
+// cache friendly (which potentially more so benefits speed), and the container
+// uses less memory. The downside is that inserting new items into the container
+// is slower if they are inserted in random order instead of in sorted order.
+// This tradeoff is well-worth it for many cases. Note that vector_map allows
+// you to use a deque or other random access container which may perform
+// better for you than vector.
+//
+// Note that with vector_set, vector_multiset, vector_map, vector_multimap
+// that the modification of the container potentially invalidates all 
+// existing iterators into the container, unlike what happens with conventional
+// sets and maps.
+//////////////////////////////////////////////////////////////////////////////
+
+
+
+#ifndef EASTL_VECTOR_MAP_H
+#define EASTL_VECTOR_MAP_H
+
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/allocator.h>
+#include <EASTL/functional.h>
+#include <EASTL/vector.h>
+#include <EASTL/utility.h>
+#include <EASTL/algorithm.h>
+#include <EASTL/initializer_list.h>
+#include <stddef.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+
+	/// EASTL_VECTOR_MAP_DEFAULT_NAME
+	///
+	/// Defines a default container name in the absence of a user-provided name.
+	///
+	#ifndef EASTL_VECTOR_MAP_DEFAULT_NAME
+		#define EASTL_VECTOR_MAP_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " vector_map" // Unless the user overrides something, this is "EASTL vector_map".
+	#endif
+
+
+	/// EASTL_VECTOR_MAP_DEFAULT_ALLOCATOR
+	///
+	#ifndef EASTL_VECTOR_MAP_DEFAULT_ALLOCATOR
+		#define EASTL_VECTOR_MAP_DEFAULT_ALLOCATOR allocator_type(EASTL_VECTOR_MAP_DEFAULT_NAME)
+	#endif
+
+
+
+	/// map_value_compare
+	///
+	/// Our adapter for the comparison function in the template parameters.
+	///
+	template <typename Key, typename Value, typename Compare>
+	class map_value_compare : public binary_function<Value, Value, bool>
+	{
+	public:
+		Compare c;
+
+		map_value_compare(const Compare& x)
+			: c(x) {}
+
+	public:
+		bool operator()(const Value& a, const Value& b) const
+			{ return c(a.first, b.first); }
+
+		bool operator()(const Value& a, const Key& b) const
+			{ return c(a.first, b); }
+
+		bool operator()(const Key& a, const Value& b) const
+			{ return c(a, b.first); }
+
+		bool operator()(const Key& a, const Key& b) const
+			{ return c(a, b); }
+
+	}; // map_value_compare
+
+
+
+	/// vector_map
+	///
+	/// Implements a map via a random access container such as a vector.
+	///
+	/// Note that with vector_set, vector_multiset, vector_map, vector_multimap
+	/// that the modification of the container potentially invalidates all 
+	/// existing iterators into the container, unlike what happens with conventional
+	/// sets and maps.
+	///
+	/// Note that the erase functions return iterator and not void. This allows for 
+	/// more efficient use of the container and is consistent with the C++ language 
+	/// defect report #130 (DR 130)
+	///
+	/// Note that we set the value_type to be pair<Key, T> and not pair<const Key, T>.
+	/// This means that the underlying container (e.g vector) is a container of pair<Key, T>.
+	/// Our vector and deque implementations are optimized to assign values in-place and 
+	/// using a vector of pair<const Key, T> (note the const) would make it hard to use
+	/// our existing vector implementation without a lot of headaches. As a result, 
+	/// at least for the time being we do away with the const. This means that the 
+	/// insertion type varies between map and vector_map in that the latter doesn't take
+	/// const. This also means that a certain amount of automatic safety provided by 
+	/// the implementation is lost, as the compiler will let the wayward user modify 
+	/// a key and thus make the container no longer ordered behind its back.
+	///
+	template <typename Key, typename T, typename Compare = eastl::less<Key>, 
+			  typename Allocator = EASTLAllocatorType, 
+			  typename RandomAccessContainer = eastl::vector<eastl::pair<Key, T>, Allocator> >
+	class vector_map : public RandomAccessContainer
+	{
+	public:
+		typedef RandomAccessContainer                                         base_type;
+		typedef vector_map<Key, T, Compare, Allocator, RandomAccessContainer> this_type;
+		typedef Allocator                                                     allocator_type;
+		typedef Key                                                           key_type;
+		typedef T                                                             mapped_type;
+		typedef eastl::pair<Key, T>                                           value_type;
+		typedef Compare                                                       key_compare;
+		typedef map_value_compare<Key, value_type, Compare>                   value_compare;
+		typedef value_type*                                                   pointer;
+		typedef const value_type*                                             const_pointer;
+		typedef value_type&                                                   reference;
+		typedef const value_type&                                             const_reference;
+		typedef typename base_type::size_type                                 size_type;
+		typedef typename base_type::difference_type                           difference_type;
+		typedef typename base_type::iterator                                  iterator;
+		typedef typename base_type::const_iterator                            const_iterator;
+		typedef typename base_type::reverse_iterator                          reverse_iterator;
+		typedef typename base_type::const_reverse_iterator                    const_reverse_iterator;
+		typedef eastl::pair<iterator, bool>                                   insert_return_type;
+
+		using base_type::begin;
+		using base_type::end;
+		using base_type::get_allocator;
+
+	protected:
+		value_compare mValueCompare; // To do: Make this variable go away via the zero base size optimization.
+
+	public:
+		// We have an empty ctor and a ctor that takes an allocator instead of one for both
+		// because this way our RandomAccessContainer wouldn't be required to have an constructor
+		// that takes allocator_type. 
+		vector_map();
+		explicit vector_map(const allocator_type& allocator);
+		explicit vector_map(const key_compare& comp, const allocator_type& allocator = EASTL_VECTOR_MAP_DEFAULT_ALLOCATOR);
+		vector_map(const this_type& x);
+		vector_map(this_type&& x);
+		vector_map(this_type&& x, const allocator_type& allocator);
+		vector_map(std::initializer_list<value_type> ilist, const key_compare& compare = key_compare(), const allocator_type& allocator = EASTL_VECTOR_MAP_DEFAULT_ALLOCATOR);
+
+		template <typename InputIterator>
+		vector_map(InputIterator first, InputIterator last); // allocator arg removed because VC7.1 fails on the default arg. To do: Make a second version of this function without a default arg.
+
+		template <typename InputIterator>
+		vector_map(InputIterator first, InputIterator last, const key_compare& compare); // allocator arg removed because VC7.1 fails on the default arg. To do: Make a second version of this function without a default arg.
+
+		this_type& operator=(const this_type& x);
+		this_type& operator=(std::initializer_list<value_type> ilist);
+		this_type& operator=(this_type&& x);
+
+		void swap(this_type& x);
+
+		const key_compare& key_comp() const;
+		key_compare&       key_comp();
+
+		const value_compare& value_comp() const;
+		value_compare&       value_comp();
+
+		// Inherited from base class:
+		//
+		//     allocator_type& get_allocator();
+		//     void            set_allocator(const allocator_type& allocator);
+		//
+		//     iterator       begin();
+		//     const_iterator begin() const;
+		//     const_iterator cbegin() const;
+		//
+		//     iterator       end();
+		//     const_iterator end() const;
+		//     const_iterator cend() const;
+		//
+		//     reverse_iterator       rbegin();
+		//     const_reverse_iterator rbegin() const;
+		//     const_reverse_iterator crbegin() const;
+		//
+		//     reverse_iterator       rend();
+		//     const_reverse_iterator rend() const;
+		//     const_reverse_iterator crend() const;
+		//
+		//     size_type size() const;
+		//     bool      empty() const;
+		//     void      clear();
+
+		template <class... Args>
+		eastl::pair<iterator, bool> emplace(Args&&... args);
+
+		template <class... Args> 
+		iterator emplace_hint(const_iterator position, Args&&... args);
+
+		template <typename P, typename = eastl::enable_if_t<eastl::is_constructible_v<value_type, P&&>>>
+		pair<iterator, bool> insert(P&& otherValue);
+
+		eastl::pair<iterator, bool> insert(const value_type& value);
+		pair<iterator, bool>        insert(const key_type& otherValue);
+		pair<iterator, bool>        insert(key_type&& otherValue);
+		iterator                    insert(const_iterator position, const value_type& value);
+		iterator                    insert(const_iterator position, value_type&& value);
+		void                        insert(std::initializer_list<value_type> ilist);
+
+		template <typename InputIterator>
+		void insert(InputIterator first, InputIterator last);
+
+		iterator         erase(const_iterator position);
+		iterator         erase(const_iterator first, const_iterator last);
+		size_type        erase(const key_type& k);
+		reverse_iterator erase(const_reverse_iterator position);
+		reverse_iterator erase(const_reverse_iterator first, const_reverse_iterator last);
+
+		iterator       find(const key_type& k);
+		const_iterator find(const key_type& k) const;
+
+		template <typename U, typename BinaryPredicate>
+		iterator       find_as(const U& u, BinaryPredicate predicate);
+
+		template <typename U, typename BinaryPredicate>
+		const_iterator find_as(const U& u, BinaryPredicate predicate) const;
+
+		size_type count(const key_type& k) const;
+
+		iterator       lower_bound(const key_type& k);
+		const_iterator lower_bound(const key_type& k) const;
+
+		iterator       upper_bound(const key_type& k);
+		const_iterator upper_bound(const key_type& k) const;
+
+		eastl::pair<iterator, iterator>             equal_range(const key_type& k);
+		eastl::pair<const_iterator, const_iterator> equal_range(const key_type& k) const;
+
+		template <typename U, typename BinaryPredicate> 
+		eastl::pair<iterator, iterator>             equal_range(const U& u, BinaryPredicate predicate);
+
+		template <typename U, typename BinaryPredicate> 
+		eastl::pair<const_iterator, const_iterator> equal_range(const U& u, BinaryPredicate) const;
+
+		// Note: vector_map operator[] returns a reference to the mapped_type, same as map does.
+		// But there's an important difference: This reference can be invalidated by -any- changes  
+		// to the vector_map that cause it to change capacity. This is unlike map, with which 
+		// mapped_type references are invalidated only if that mapped_type element itself is removed
+		// from the map. This is because vector is array-based and map is node-based. As a result
+		// the following code that is safe for map is unsafe for vector_map for the case that 
+		// the vMap[100] doesn't already exist in the vector_map:
+		//     vMap[100] = vMap[0]
+		mapped_type& operator[](const key_type& k);
+		mapped_type& operator[](key_type&& k);
+
+		// Functions which are disallowed due to being unsafe. 
+		void      push_back(const value_type& value) = delete;
+		reference push_back()                        = delete;
+		void*     push_back_uninitialized()          = delete;
+		template <class... Args>
+		reference emplace_back(Args&&...)            = delete;
+
+		// NOTE(rparolin): It is undefined behaviour if user code fails to ensure the container
+		// invariants are respected by performing an explicit call to 'sort' before any other
+		// operations on the container are performed that do not clear the elements.
+		//
+		// 'push_back_unsorted' and 'emplace_back_unsorted' do not satisfy container invariants
+		// for being sorted. We provide these overloads explicitly labelled as '_unsorted' as an
+		// optimization opportunity when batch inserting elements so users can defer the cost of
+		// sorting the container once when all elements are contained. This was done to clarify
+		// the intent of code by leaving a trace that a manual call to sort is required.
+		// 
+		template <typename... Args> decltype(auto) push_back_unsorted(Args&&... args)    
+			{ return base_type::push_back(eastl::forward<Args>(args)...); }
+		template <typename... Args> decltype(auto) emplace_back_unsorted(Args&&... args) 
+			{ return base_type::emplace_back(eastl::forward<Args>(args)...); }
+
+	}; // vector_map
+
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// vector_map
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline vector_map<K, T, C, A, RAC>::vector_map()
+		: base_type(), mValueCompare(C())
+	{
+		get_allocator().set_name(EASTL_VECTOR_MAP_DEFAULT_NAME);
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline vector_map<K, T, C, A, RAC>::vector_map(const allocator_type& allocator)
+		: base_type(allocator), mValueCompare(C())
+	{
+		// Empty
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline vector_map<K, T, C, A, RAC>::vector_map(const key_compare& comp, const allocator_type& allocator)
+		: base_type(allocator), mValueCompare(comp)
+	{
+		// Empty
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline vector_map<K, T, C, A, RAC>::vector_map(const this_type& x)
+		: base_type(x), mValueCompare(x.mValueCompare)
+	{
+		// Empty
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline vector_map<K, T, C, A, RAC>::vector_map(this_type&& x)
+		: base_type(eastl::move(x)), mValueCompare(x.mValueCompare)
+	{
+		// Empty. Note: x is left with empty contents but its original mValueCompare instead of the default one. 
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline vector_map<K, T, C, A, RAC>::vector_map(this_type&& x, const allocator_type& allocator)
+		: base_type(eastl::move(x), allocator), mValueCompare(x.mValueCompare)
+	{
+		// Empty. Note: x is left with empty contents but its original mValueCompare instead of the default one. 
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline vector_map<K, T, C, A, RAC>::vector_map(std::initializer_list<value_type> ilist, const key_compare& compare, const allocator_type& allocator)
+		: base_type(allocator), mValueCompare(compare)
+	{
+		insert(ilist.begin(), ilist.end());
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	template <typename InputIterator>
+	inline vector_map<K, T, C, A, RAC>::vector_map(InputIterator first, InputIterator last)
+		: base_type(EASTL_VECTOR_MAP_DEFAULT_ALLOCATOR), mValueCompare(key_compare())
+	{
+		insert(first, last);
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	template <typename InputIterator>
+	inline vector_map<K, T, C, A, RAC>::vector_map(InputIterator first, InputIterator last, const key_compare& compare)
+		: base_type(EASTL_VECTOR_MAP_DEFAULT_ALLOCATOR), mValueCompare(compare)
+	{
+		insert(first, last);
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline vector_map<K, T, C, A, RAC>&
+	vector_map<K, T, C, A, RAC>::operator=(const this_type& x)
+	{
+		base_type::operator=(x);
+		mValueCompare = value_compare(x.mValueCompare);
+		return *this;
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline vector_map<K, T, C, A, RAC>&
+	vector_map<K, T, C, A, RAC>::operator=(this_type&& x)
+	{
+		base_type::operator=(eastl::move(x));
+		eastl::swap(mValueCompare, x.mValueCompare);
+		return *this;
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline vector_map<K, T, C, A, RAC>&
+	vector_map<K, T, C, A, RAC>::operator=(std::initializer_list<value_type> ilist)
+	{
+		base_type::clear();
+		insert(ilist.begin(), ilist.end());
+		return *this;
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline void vector_map<K, T, C, A, RAC>::swap(this_type& x)
+	{
+		base_type::swap(x);
+		eastl::swap(mValueCompare, x.mValueCompare);
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline const typename vector_map<K, T, C, A, RAC>::key_compare&
+	vector_map<K, T, C, A, RAC>::key_comp() const
+	{
+		return mValueCompare.c;
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline typename vector_map<K, T, C, A, RAC>::key_compare&
+	vector_map<K, T, C, A, RAC>::key_comp()
+	{
+		return mValueCompare.c;
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline const typename vector_map<K, T, C, A, RAC>::value_compare&
+	vector_map<K, T, C, A, RAC>::value_comp() const
+	{
+		return mValueCompare;
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline typename vector_map<K, T, C, A, RAC>::value_compare&
+	vector_map<K, T, C, A, RAC>::value_comp()
+	{
+		return mValueCompare;
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	template <class... Args>
+	inline eastl::pair<typename vector_map<K, T, C, A, RAC>::iterator, bool>
+	vector_map<K, T, C, A, RAC>::emplace(Args&&... args)
+	{
+		#if EASTL_USE_FORWARD_WORKAROUND
+			auto value = value_type(eastl::forward<Args>(args)...);  // Workaround for compiler bug in VS2013 which results in a compiler internal crash while compiling this code.
+		#else
+			value_type  value(eastl::forward<Args>(args)...);
+		#endif
+		return insert(eastl::move(value));
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	template <class... Args>
+	inline typename vector_map<K, T, C, A, RAC>::iterator 
+	vector_map<K, T, C, A, RAC>::emplace_hint(const_iterator position, Args&&... args)
+	{
+		#if EASTL_USE_FORWARD_WORKAROUND
+			auto value = value_type(eastl::forward<Args>(args)...);  // Workaround for compiler bug in VS2013 which results in a compiler internal crash while compiling this code.
+		#else
+			value_type  value(eastl::forward<Args>(args)...);
+		#endif
+
+		return insert(position, eastl::move(value));
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline eastl::pair<typename vector_map<K, T, C, A, RAC>::iterator, bool>
+	vector_map<K, T, C, A, RAC>::insert(const value_type& value)
+	{
+		const iterator itLB(lower_bound(value.first));
+
+		if((itLB != end()) && !mValueCompare(value, *itLB))
+			return eastl::pair<iterator, bool>(itLB, false);
+
+		return eastl::pair<iterator, bool>(base_type::insert(itLB, value), true);
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	template <typename P, typename>
+	inline eastl::pair<typename vector_map<K, T, C, A, RAC>::iterator, bool>
+	vector_map<K, T, C, A, RAC>::insert(P&& otherValue)
+	{
+		value_type value(eastl::forward<P>(otherValue));
+		const iterator itLB(lower_bound(value.first));
+
+		if((itLB != end()) && !mValueCompare(value, *itLB))
+			return eastl::pair<iterator, bool>(itLB, false);
+
+		return eastl::pair<iterator, bool>(base_type::insert(itLB, eastl::move(value)), true);
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline eastl::pair<typename vector_map<K, T, C, A, RAC>::iterator, bool>
+	vector_map<K, T, C, A, RAC>::insert(const key_type& otherValue)
+	{
+		value_type value(eastl::pair_first_construct, otherValue);
+		const iterator itLB(lower_bound(value.first));
+
+		if((itLB != end()) && !mValueCompare(value, *itLB))
+			return eastl::pair<iterator, bool>(itLB, false);
+
+		return eastl::pair<iterator, bool>(base_type::insert(itLB, eastl::move(value)), true);
+	}
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline eastl::pair<typename vector_map<K, T, C, A, RAC>::iterator, bool>
+	vector_map<K, T, C, A, RAC>::insert(key_type&& otherValue)
+	{
+		value_type value(eastl::pair_first_construct, eastl::move(otherValue));
+		const iterator itLB(lower_bound(value.first));
+
+		if((itLB != end()) && !mValueCompare(value, *itLB))
+			return eastl::pair<iterator, bool>(itLB, false);
+
+		return eastl::pair<iterator, bool>(base_type::insert(itLB, eastl::move(value)), true);
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	typename vector_map<K, T, C, A, RAC>::iterator
+	vector_map<K, T, C, A, RAC>::insert(const_iterator position, const value_type& value)
+	{
+		// We assume that the user knows what he is doing and has supplied us with
+		// a position that is right where value should be inserted (put in front of). 
+		// We do a test to see if the position is correct. If so then we insert, 
+		// if not then we ignore the input position.
+
+		if((position == end()) || mValueCompare(value, *position))  // If the element at position is greater than value...
+		{
+			if((position == begin()) || mValueCompare(*(position - 1), value)) // If the element before position is less than value...
+				return base_type::insert(position, value);
+		}
+
+		// In this case we either have an incorrect position or value is already present.
+		// We fall back to the regular insert function. An optimization would be to detect
+		// that the element is already present, but that's only useful if the user supplied
+		// a good position but a present element.
+		const eastl::pair<typename vector_map<K, T, C, A, RAC>::iterator, bool> result = insert(value);
+
+		return result.first;
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	typename vector_map<K, T, C, A, RAC>::iterator
+	vector_map<K, T, C, A, RAC>::insert(const_iterator position, value_type&& value)
+	{
+		if((position == end()) || mValueCompare(value, *position))  // If the element at position is greater than value...
+		{
+			if((position == begin()) || mValueCompare(*(position - 1), value)) // If the element before position is less than value...
+				return base_type::insert(position, eastl::move(value));
+		}
+
+		const eastl::pair<typename vector_map<K, T, C, A, RAC>::iterator, bool> result = insert(eastl::move(value));
+
+		return result.first;
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline void vector_map<K, T, C, A, RAC>::insert(std::initializer_list<value_type> ilist)
+	{
+		insert(ilist.begin(), ilist.end());
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	template <typename InputIterator>
+	inline void vector_map<K, T, C, A, RAC>::insert(InputIterator first, InputIterator last)
+	{
+		// To consider: Improve the speed of this by getting the length of the 
+		//              input range and resizing our container to that size
+		//              before doing the insertions. We can't use reserve
+		//              because we don't know if we are using a vector or not.
+		//              Alternatively, force the user to do the reservation.
+		// To consider: When inserting values that come from a container
+		//              like this container, use the property that they are 
+		//              known to be sorted and speed up the inserts here.
+		for(; first != last; ++first)
+			insert(*first);
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline typename vector_map<K, T, C, A, RAC>::iterator
+	vector_map<K, T, C, A, RAC>::erase(const_iterator position)
+	{
+		// Note that we return iterator and not void. This allows for more efficient use of 
+		// the container and is consistent with the C++ language defect report #130 (DR 130)
+		return base_type::erase(position);
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline typename vector_map<K, T, C, A, RAC>::iterator
+	vector_map<K, T, C, A, RAC>::erase(const_iterator first, const_iterator last)
+	{
+		return base_type::erase(first, last);
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline typename vector_map<K, T, C, A, RAC>::size_type
+	vector_map<K, T, C, A, RAC>::erase(const key_type& k)
+	{
+		const iterator it(find(k));
+
+		if(it != end()) // If it exists...
+		{
+			erase(it);
+			return 1;
+		}
+		return 0;
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline typename vector_map<K, T, C, A, RAC>::reverse_iterator
+	vector_map<K, T, C, A, RAC>::erase(const_reverse_iterator position)
+	{
+		return reverse_iterator(base_type::erase((++position).base()));
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline typename vector_map<K, T, C, A, RAC>::reverse_iterator
+	vector_map<K, T, C, A, RAC>::erase(const_reverse_iterator first, const_reverse_iterator last)
+	{
+		return reverse_iterator(base_type::erase((++last).base(), (++first).base()));
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline typename vector_map<K, T, C, A, RAC>::iterator
+	vector_map<K, T, C, A, RAC>::find(const key_type& k)
+	{
+		const eastl::pair<iterator, iterator> pairIts(equal_range(k));
+		return (pairIts.first != pairIts.second) ? pairIts.first : end();
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline typename vector_map<K, T, C, A, RAC>::const_iterator
+	vector_map<K, T, C, A, RAC>::find(const key_type& k) const
+	{
+		const eastl::pair<const_iterator, const_iterator> pairIts(equal_range(k));
+		return (pairIts.first != pairIts.second) ? pairIts.first : end();
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	template <typename U, typename BinaryPredicate>
+	inline typename vector_map<K, T, C, A, RAC>::iterator
+	vector_map<K, T, C, A, RAC>::find_as(const U& u, BinaryPredicate predicate)
+	{
+		const eastl::pair<iterator, iterator> pairIts(equal_range(u, predicate));
+		return (pairIts.first != pairIts.second) ? pairIts.first : end();
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	template <typename U, typename BinaryPredicate>
+	inline typename vector_map<K, T, C, A, RAC>::const_iterator
+	vector_map<K, T, C, A, RAC>::find_as(const U& u, BinaryPredicate predicate) const
+	{
+		const eastl::pair<const_iterator, const_iterator> pairIts(equal_range(u, predicate));
+		return (pairIts.first != pairIts.second) ? pairIts.first : end();
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline typename vector_map<K, T, C, A, RAC>::size_type
+	vector_map<K, T, C, A, RAC>::count(const key_type& k) const
+	{
+		const const_iterator it(find(k));
+		return (it != end()) ? (size_type)1 : (size_type)0;
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline typename vector_map<K, T, C, A, RAC>::iterator
+	vector_map<K, T, C, A, RAC>::lower_bound(const key_type& k)
+	{
+		return eastl::lower_bound(begin(), end(), k, mValueCompare);
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline typename vector_map<K, T, C, A, RAC>::const_iterator
+	vector_map<K, T, C, A, RAC>::lower_bound(const key_type& k) const
+	{
+		return eastl::lower_bound(begin(), end(), k, mValueCompare);
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline typename vector_map<K, T, C, A, RAC>::iterator 
+	vector_map<K, T, C, A, RAC>::upper_bound(const key_type& k)
+	{
+		return eastl::upper_bound(begin(), end(), k, mValueCompare);
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline typename vector_map<K, T, C, A, RAC>::const_iterator
+	vector_map<K, T, C, A, RAC>::upper_bound(const key_type& k) const
+	{
+		return eastl::upper_bound(begin(), end(), k, mValueCompare);
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline eastl::pair<typename vector_map<K, T, C, A, RAC>::iterator, typename vector_map<K, T, C, A, RAC>::iterator>
+	vector_map<K, T, C, A, RAC>::equal_range(const key_type& k)
+	{
+		// The resulting range will either be empty or have one element,
+		// so instead of doing two tree searches (one for lower_bound and 
+		// one for upper_bound), we do just lower_bound and see if the 
+		// result is a range of size zero or one.
+		const iterator itLower(lower_bound(k));
+
+		if((itLower == end()) || mValueCompare(k, *itLower)) // If at the end or if (k is < itLower)...
+			return eastl::pair<iterator, iterator>(itLower, itLower);
+
+		iterator itUpper(itLower);
+		return eastl::pair<iterator, iterator>(itLower, ++itUpper);
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline eastl::pair<typename vector_map<K, T, C, A, RAC>::const_iterator, typename vector_map<K, T, C, A, RAC>::const_iterator>
+	vector_map<K, T, C, A, RAC>::equal_range(const key_type& k) const
+	{
+		// The resulting range will either be empty or have one element,
+		// so instead of doing two tree searches (one for lower_bound and 
+		// one for upper_bound), we do just lower_bound and see if the 
+		// result is a range of size zero or one.
+		const const_iterator itLower(lower_bound(k));
+
+		if((itLower == end()) || mValueCompare(k, *itLower)) // If at the end or if (k is < itLower)...
+			return eastl::pair<const_iterator, const_iterator>(itLower, itLower);
+
+		const_iterator itUpper(itLower);
+		return eastl::pair<const_iterator, const_iterator>(itLower, ++itUpper);
+	}
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	template <typename U, typename BinaryPredicate> 
+	inline eastl::pair<typename vector_map<K, T, C, A, RAC>::iterator, typename vector_map<K, T, C, A, RAC>::iterator>
+	vector_map<K, T, C, A, RAC>::equal_range(const U& u, BinaryPredicate predicate)
+	{
+		// The resulting range will either be empty or have one element,
+		// so instead of doing two tree searches (one for lower_bound and 
+		// one for upper_bound), we do just lower_bound and see if the 
+		// result is a range of size zero or one.
+		map_value_compare<U, value_type, BinaryPredicate> predicate_cmp(predicate);
+
+		const iterator itLower(eastl::lower_bound(begin(), end(), u, predicate_cmp));
+
+		if((itLower == end()) || predicate_cmp(u, *itLower)) // If at the end or if (k is < itLower)...
+			return eastl::pair<iterator, iterator>(itLower, itLower);
+
+		iterator itUpper(itLower);
+		return eastl::pair<iterator, iterator>(itLower, ++itUpper);
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	template <typename U, typename BinaryPredicate> 
+	inline eastl::pair<typename vector_map<K, T, C, A, RAC>::const_iterator, typename vector_map<K, T, C, A, RAC>::const_iterator>
+	vector_map<K, T, C, A, RAC>::equal_range(const U& u, BinaryPredicate predicate) const
+	{
+		// The resulting range will either be empty or have one element,
+		// so instead of doing two tree searches (one for lower_bound and 
+		// one for upper_bound), we do just lower_bound and see if the 
+		// result is a range of size zero or one.
+		map_value_compare<U, value_type, BinaryPredicate> predicate_cmp(predicate);
+
+		const const_iterator itLower(eastl::lower_bound(begin(), end(), u, predicate_cmp));
+
+		if((itLower == end()) || predicate_cmp(u, *itLower)) // If at the end or if (k is < itLower)...
+			return eastl::pair<const_iterator, const_iterator>(itLower, itLower);
+
+		const_iterator itUpper(itLower);
+		return eastl::pair<const_iterator, const_iterator>(itLower, ++itUpper);
+	}
+
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline typename vector_map<K, T, C, A, RAC>::mapped_type&
+	vector_map<K, T, C, A, RAC>::operator[](const key_type& k)
+	{
+		iterator itLB(lower_bound(k));
+
+		if((itLB == end()) || key_comp()(k, (*itLB).first))
+			itLB = insert(itLB, value_type(k, mapped_type()));
+		return (*itLB).second;
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline typename vector_map<K, T, C, A, RAC>::mapped_type&
+	vector_map<K, T, C, A, RAC>::operator[](key_type&& k)
+	{
+		iterator itLB(lower_bound(k));
+
+		if((itLB == end()) || key_comp()(k, (*itLB).first))
+			itLB = insert(itLB, value_type(eastl::move(k), mapped_type()));
+		return (*itLB).second;
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// global operators
+	///////////////////////////////////////////////////////////////////////////
+
+	template <typename Key, typename T, typename Compare, typename Allocator, typename RandomAccessContainer>
+	inline bool operator==(const vector_map<Key, T, Compare, Allocator, RandomAccessContainer>& a, 
+						   const vector_map<Key, T, Compare, Allocator, RandomAccessContainer>& b) 
+	{
+		return (a.size() == b.size()) && eastl::equal(b.begin(), b.end(), a.begin()); 
+	}
+
+
+	template <typename Key, typename T, typename Compare, typename Allocator, typename RandomAccessContainer>
+	inline bool operator<(const vector_map<Key, T, Compare, Allocator, RandomAccessContainer>& a,
+						  const vector_map<Key, T, Compare, Allocator, RandomAccessContainer>& b)
+	{
+		return eastl::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end(), a.value_comp());
+	}
+
+
+	template <typename Key, typename T, typename Compare, typename Allocator, typename RandomAccessContainer>
+	inline bool operator!=(const vector_map<Key, T, Compare, Allocator, RandomAccessContainer>& a,
+						   const vector_map<Key, T, Compare, Allocator, RandomAccessContainer>& b)
+	{
+		return !(a == b);
+	}
+
+
+	template <typename Key, typename T, typename Compare, typename Allocator, typename RandomAccessContainer>
+	inline bool operator>(const vector_map<Key, T, Compare, Allocator, RandomAccessContainer>& a,
+						  const vector_map<Key, T, Compare, Allocator, RandomAccessContainer>& b)
+	{
+		return b < a;
+	}
+
+
+	template <typename Key, typename T, typename Compare, typename Allocator, typename RandomAccessContainer>
+	inline bool operator<=(const vector_map<Key, T, Compare, Allocator, RandomAccessContainer>& a,
+						   const vector_map<Key, T, Compare, Allocator, RandomAccessContainer>& b)
+	{
+		return !(b < a);
+	}
+
+
+	template <typename Key, typename T, typename Compare, typename Allocator, typename RandomAccessContainer>
+	inline bool operator>=(const vector_map<Key, T, Compare, Allocator, RandomAccessContainer>& a,
+						   const vector_map<Key, T, Compare, Allocator, RandomAccessContainer>& b)
+	{
+		return !(a < b);
+	}
+
+
+	template <typename Key, typename T, typename Compare, typename Allocator, typename RandomAccessContainer>
+	inline void swap(vector_map<Key, T, Compare, Allocator, RandomAccessContainer>& a,
+					 vector_map<Key, T, Compare, Allocator, RandomAccessContainer>& b)
+	{
+		a.swap(b);
+	}
+
+
+} // namespace eastl
+
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/vector_multimap.h b/libkram/eastl/include/EASTL/vector_multimap.h
new file mode 100644
index 00000000..235f6718
--- /dev/null
+++ b/libkram/eastl/include/EASTL/vector_multimap.h
@@ -0,0 +1,843 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+//////////////////////////////////////////////////////////////////////////////
+
+//////////////////////////////////////////////////////////////////////////////
+// This file implements vector_multimap. It acts much like std::multimap, except 
+// its underlying representation is a random access container such as vector. 
+// These containers are sometimes also known as "sorted vectors."  
+// vector_maps have an advantage over conventional maps in that their memory
+// is contiguous and node-less. The result is that lookups are faster, more 
+// cache friendly (which potentially more so benefits speed), and the container
+// uses less memory. The downside is that inserting new items into the container
+// is slower if they are inserted in random order instead of in sorted order.
+// This tradeoff is well-worth it for many cases. Note that vector_multimap allows
+// you to use a deque or other random access container which may perform
+// better for you than vector.
+//
+// Note that with vector_set, vector_multiset, vector_map, vector_multimap
+// that the modification of the container potentially invalidates all 
+// existing iterators into the container, unlike what happens with conventional
+// sets and maps.
+//////////////////////////////////////////////////////////////////////////////
+
+
+
+#ifndef EASTL_VECTOR_MULTIMAP_H
+#define EASTL_VECTOR_MULTIMAP_H
+
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/allocator.h>
+#include <EASTL/functional.h>
+#include <EASTL/vector.h>
+#include <EASTL/utility.h>
+#include <EASTL/algorithm.h>
+#include <EASTL/initializer_list.h>
+#include <stddef.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+
+	/// EASTL_VECTOR_MULTIMAP_DEFAULT_NAME
+	///
+	/// Defines a default container name in the absence of a user-provided name.
+	///
+	#ifndef EASTL_VECTOR_MULTIMAP_DEFAULT_NAME
+		#define EASTL_VECTOR_MULTIMAP_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " vector_multimap" // Unless the user overrides something, this is "EASTL vector_multimap".
+	#endif
+
+
+	/// EASTL_VECTOR_MULTIMAP_DEFAULT_ALLOCATOR
+	///
+	#ifndef EASTL_VECTOR_MULTIMAP_DEFAULT_ALLOCATOR
+		#define EASTL_VECTOR_MULTIMAP_DEFAULT_ALLOCATOR allocator_type(EASTL_VECTOR_MULTIMAP_DEFAULT_NAME)
+	#endif
+
+
+
+	/// multimap_value_compare
+	///
+	/// Our adapter for the comparison function in the template parameters.
+	///
+	template <typename Key, typename Value, typename Compare>
+	class multimap_value_compare : public binary_function<Value, Value, bool>
+	{
+	public:
+		Compare c;
+
+		multimap_value_compare(const Compare& x)
+			: c(x) {}
+
+	public:
+		bool operator()(const Value& a, const Value& b) const
+			{ return c(a.first, b.first); }
+
+		bool operator()(const Value& a, const Key& b) const
+			{ return c(a.first, b); }
+
+		bool operator()(const Key& a, const Value& b) const
+			{ return c(a, b.first); }
+
+		bool operator()(const Key& a, const Key& b) const
+			{ return c(a, b); }
+
+	}; // multimap_value_compare
+
+
+
+	/// vector_multimap
+	///
+	/// Implements a multimap via a random access container such as a vector.
+	///
+	/// Note that with vector_set, vector_multiset, vector_map, vector_multimap
+	/// that the modification of the container potentially invalidates all 
+	/// existing iterators into the container, unlike what happens with conventional
+	/// sets and maps.
+	///
+	/// Note that the erase functions return iterator and not void. This allows for 
+	/// more efficient use of the container and is consistent with the C++ language 
+	/// defect report #130 (DR 130)
+	///
+	/// Note that we set the value_type to be pair<Key, T> and not pair<const Key, T>.
+	/// This means that the underlying container (e.g vector) is a container of pair<Key, T>.
+	/// Our vector and deque implementations are optimized to assign values in-place and 
+	/// using a vector of pair<const Key, T> (note the const) would make it hard to use
+	/// our existing vector implementation without a lot of headaches. As a result, 
+	/// at least for the time being we do away with the const. This means that the 
+	/// insertion type varies between map and vector_map in that the latter doesn't take
+	/// const. This also means that a certain amount of automatic safety provided by 
+	/// the implementation is lost, as the compiler will let the wayward user modify 
+	/// a key and thus make the container no longer ordered behind its back.
+	///
+	template <typename Key, typename T, typename Compare = eastl::less<Key>, 
+			  typename Allocator = EASTLAllocatorType,
+			  typename RandomAccessContainer = eastl::vector<eastl::pair<Key, T>, Allocator> >
+	class vector_multimap : public RandomAccessContainer
+	{
+	public:
+		typedef RandomAccessContainer                                              base_type;
+		typedef vector_multimap<Key, T, Compare, Allocator, RandomAccessContainer> this_type;
+		typedef Allocator                                                          allocator_type;
+		typedef Key                                                                key_type;
+		typedef T                                                                  mapped_type;
+		typedef eastl::pair<Key, T>                                                value_type;
+		typedef Compare                                                            key_compare;
+		typedef multimap_value_compare<Key, value_type, Compare>                   value_compare;
+		typedef value_type*                                                        pointer;
+		typedef const value_type*                                                  const_pointer;
+		typedef value_type&                                                        reference;
+		typedef const value_type&                                                  const_reference;
+		typedef typename base_type::size_type                                      size_type;
+		typedef typename base_type::difference_type                                difference_type;
+		typedef typename base_type::iterator                                       iterator;
+		typedef typename base_type::const_iterator                                 const_iterator;
+		typedef typename base_type::reverse_iterator                               reverse_iterator;
+		typedef typename base_type::const_reverse_iterator                         const_reverse_iterator;
+
+		using base_type::begin;
+		using base_type::end;
+		using base_type::get_allocator;
+
+	protected:
+		value_compare mValueCompare;
+
+	public:
+		// We have an empty ctor and a ctor that takes an allocator instead of one for both
+		// because this way our RandomAccessContainer wouldn't be required to have an constructor
+		// that takes allocator_type. 
+		vector_multimap();
+		explicit vector_multimap(const allocator_type& allocator);
+		explicit vector_multimap(const key_compare& comp, const allocator_type& allocator = EASTL_VECTOR_MULTIMAP_DEFAULT_ALLOCATOR);
+		vector_multimap(const this_type& x);
+		vector_multimap(this_type&& x);
+		vector_multimap(this_type&& x, const allocator_type& allocator);
+		vector_multimap(std::initializer_list<value_type> ilist, const key_compare& compare = key_compare(), const allocator_type& allocator = EASTL_VECTOR_MULTIMAP_DEFAULT_ALLOCATOR);
+
+		template <typename InputIterator>
+		vector_multimap(InputIterator first, InputIterator last); // allocator arg removed because VC7.1 fails on the default arg. To do: Make a second version of this function without a default arg.
+
+		template <typename InputIterator>
+		vector_multimap(InputIterator first, InputIterator last, const key_compare& compare); // allocator arg removed because VC7.1 fails on the default arg. To do: Make a second version of this function without a default arg.
+
+		this_type& operator=(const this_type& x);
+		this_type& operator=(std::initializer_list<value_type> ilist);
+		this_type& operator=(this_type&& x);
+
+		void swap(this_type& x);
+
+		const key_compare& key_comp() const;
+		key_compare&       key_comp();
+
+		const value_compare& value_comp() const;
+		value_compare&       value_comp();
+
+		// Inherited from base class:
+		//
+		//     allocator_type& get_allocator();
+		//     void            set_allocator(const allocator_type& allocator);
+		//
+		//     iterator       begin();
+		//     const_iterator begin() const;
+		//     const_iterator cbegin() const;
+		//
+		//     iterator       end();
+		//     const_iterator end() const;
+		//     const_iterator cend() const;
+		//
+		//     reverse_iterator       rbegin();
+		//     const_reverse_iterator rbegin() const;
+		//     const_reverse_iterator crbegin() const;
+		//
+		//     reverse_iterator       rend();
+		//     const_reverse_iterator rend() const;
+		//     const_reverse_iterator crend() const;
+		//
+		//     size_type size() const;
+		//     bool      empty() const;
+		//     void      clear();
+
+		template <class... Args>
+		iterator emplace(Args&&... args);
+
+		template <class... Args> 
+		iterator emplace_hint(const_iterator position, Args&&... args);
+
+		iterator insert(const value_type& value);   // The signature of this function was change in EASTL v2.05.00 from (the mistaken) pair<iterator, bool> to (the correct) iterator.
+
+		template <typename P, typename = eastl::enable_if_t<eastl::is_constructible_v<value_type, P&&>>>
+		iterator insert(P&& otherValue);
+
+		iterator insert(const key_type& otherValue);
+		iterator insert(key_type&& otherValue);
+
+		iterator insert(const_iterator position, const value_type& value);
+		iterator insert(const_iterator position, value_type&& value);
+
+		void insert(std::initializer_list<value_type> ilist);
+
+		template <typename InputIterator>
+		void insert(InputIterator first, InputIterator last);
+
+		iterator  erase(const_iterator position);
+		iterator  erase(const_iterator first, const_iterator last);
+		size_type erase(const key_type& k);
+
+		reverse_iterator erase(const_reverse_iterator position);
+		reverse_iterator erase(const_reverse_iterator first, const_reverse_iterator last);
+
+		iterator       find(const key_type& k);
+		const_iterator find(const key_type& k) const;
+
+		template <typename U, typename BinaryPredicate>
+		iterator       find_as(const U& u, BinaryPredicate predicate);
+
+		template <typename U, typename BinaryPredicate>
+		const_iterator find_as(const U& u, BinaryPredicate predicate) const;
+
+		size_type count(const key_type& k) const;
+
+		iterator       lower_bound(const key_type& k);
+		const_iterator lower_bound(const key_type& k) const;
+
+		iterator       upper_bound(const key_type& k);
+		const_iterator upper_bound(const key_type& k) const;
+
+		eastl::pair<iterator, iterator>             equal_range(const key_type& k);
+		eastl::pair<const_iterator, const_iterator> equal_range(const key_type& k) const;
+
+		/// equal_range_small
+		/// This is a special version of equal_range which is optimized for the 
+		/// case of there being few or no duplicated keys in the tree.
+		eastl::pair<iterator, iterator>             equal_range_small(const key_type& k)
+		{
+			// Defined inline because VC7.1 is broken for when it's defined outside.
+			const iterator itLower(lower_bound(k));
+			iterator       itUpper(itLower);
+
+			while((itUpper != end()) && !mValueCompare(k, *itUpper))
+				++itUpper;
+
+			return eastl::pair<iterator, iterator>(itLower, itUpper);
+		}
+		eastl::pair<const_iterator, const_iterator> equal_range_small(const key_type& k) const;
+
+		// Functions which are disallowed due to being unsafe. 
+		void      push_back(const value_type& value) = delete;
+		reference push_back()                        = delete;
+		void*     push_back_uninitialized()          = delete;
+		template <class... Args>
+		reference emplace_back(Args&&...)            = delete;
+
+		// NOTE(rparolin): It is undefined behaviour if user code fails to ensure the container
+		// invariants are respected by performing an explicit call to 'sort' before any other
+		// operations on the container are performed that do not clear the elements.
+		//
+		// 'push_back_unsorted' and 'emplace_back_unsorted' do not satisfy container invariants
+		// for being sorted. We provide these overloads explicitly labelled as '_unsorted' as an
+		// optimization opportunity when batch inserting elements so users can defer the cost of
+		// sorting the container once when all elements are contained. This was done to clarify
+		// the intent of code by leaving a trace that a manual call to sort is required.
+		// 
+		template <typename... Args> decltype(auto) push_back_unsorted(Args&&... args)    
+			{ return base_type::push_back(eastl::forward<Args>(args)...); }
+		template <typename... Args> decltype(auto) emplace_back_unsorted(Args&&... args) 
+			{ return base_type::emplace_back(eastl::forward<Args>(args)...); }
+
+	}; // vector_multimap
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// vector_multimap
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline vector_multimap<K, T, C, A, RAC>::vector_multimap()
+		: base_type(), mValueCompare(C())
+	{
+	#if EASTL_NAME_ENABLED
+		get_allocator().set_name(EASTL_VECTOR_MULTIMAP_DEFAULT_NAME);
+	#endif
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline vector_multimap<K, T, C, A, RAC>::vector_multimap(const allocator_type& allocator)
+		: base_type(allocator), mValueCompare(C())
+	{
+		// Empty
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline vector_multimap<K, T, C, A, RAC>::vector_multimap(const key_compare& comp, const allocator_type& allocator)
+		: base_type(allocator), mValueCompare(comp)
+	{
+		// Empty
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline vector_multimap<K, T, C, A, RAC>::vector_multimap(const this_type& x)
+		: base_type(x), mValueCompare(x.mValueCompare)
+	{
+		// Empty
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline vector_multimap<K, T, C, A, RAC>::vector_multimap(this_type&& x)
+		: base_type(eastl::move(x)), mValueCompare(x.mValueCompare)
+	{
+		// Empty. Note: x is left with empty contents but its original mValueCompare instead of the default one. 
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline vector_multimap<K, T, C, A, RAC>::vector_multimap(this_type&& x, const allocator_type& allocator)
+		: base_type(eastl::move(x), allocator), mValueCompare(x.mValueCompare)
+	{
+		// Empty. Note: x is left with empty contents but its original mValueCompare instead of the default one. 
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline vector_multimap<K, T, C, A, RAC>::vector_multimap(std::initializer_list<value_type> ilist, const key_compare& compare, const allocator_type& allocator)
+		: base_type(allocator), mValueCompare(compare)
+	{
+		insert(ilist.begin(), ilist.end());
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	template <typename InputIterator>
+	inline vector_multimap<K, T, C, A, RAC>::vector_multimap(InputIterator first, InputIterator last)
+		: base_type(EASTL_VECTOR_MULTIMAP_DEFAULT_ALLOCATOR), mValueCompare(key_compare())
+	{
+		insert(first, last);
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	template <typename InputIterator>
+	inline vector_multimap<K, T, C, A, RAC>::vector_multimap(InputIterator first, InputIterator last, const key_compare& compare)
+		: base_type(EASTL_VECTOR_MULTIMAP_DEFAULT_ALLOCATOR), mValueCompare(compare)
+	{
+		insert(first, last);
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline typename vector_multimap<K, T, C, A, RAC>::this_type&
+	vector_multimap<K, T, C, A, RAC>::operator=(const this_type& x)
+	{
+		base_type::operator=(x);
+		mValueCompare = value_compare(x.mValueCompare);
+		return *this;
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline typename vector_multimap<K, T, C, A, RAC>::this_type&
+	vector_multimap<K, T, C, A, RAC>::operator=(this_type&& x)
+	{
+		base_type::operator=(eastl::move(x));
+		eastl::swap(mValueCompare, x.mValueCompare);
+		return *this;
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline typename vector_multimap<K, T, C, A, RAC>::this_type&
+	vector_multimap<K, T, C, A, RAC>::operator=(std::initializer_list<value_type> ilist)
+	{
+		base_type::clear();
+		insert(ilist.begin(), ilist.end());
+		return *this;
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline void vector_multimap<K, T, C, A, RAC>::swap(this_type& x)
+	{
+		base_type::swap(x);
+		eastl::swap(mValueCompare, x.mValueCompare);
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline const typename vector_multimap<K, T, C, A, RAC>::key_compare&
+	vector_multimap<K, T, C, A, RAC>::key_comp() const
+	{
+		return mValueCompare.c;
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline typename vector_multimap<K, T, C, A, RAC>::key_compare&
+	vector_multimap<K, T, C, A, RAC>::key_comp()
+	{
+		return mValueCompare.c;
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline const typename vector_multimap<K, T, C, A, RAC>::value_compare&
+	vector_multimap<K, T, C, A, RAC>::value_comp() const
+	{
+		return mValueCompare;
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline typename vector_multimap<K, T, C, A, RAC>::value_compare&
+	vector_multimap<K, T, C, A, RAC>::value_comp()
+	{
+		return mValueCompare;
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	template <class... Args>
+	inline typename vector_multimap<K, T, C, A, RAC>::iterator
+	vector_multimap<K, T, C, A, RAC>::emplace(Args&&... args)
+	{
+		#if EASTL_USE_FORWARD_WORKAROUND
+			auto value = value_type(eastl::forward<Args>(args)...);  // Workaround for compiler bug in VS2013 which results in a compiler internal crash while compiling this code.
+		#else
+			value_type  value(eastl::forward<Args>(args)...);
+		#endif
+		return insert(eastl::move(value));
+	}
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	template <class... Args> 
+	inline typename vector_multimap<K, T, C, A, RAC>::iterator
+	vector_multimap<K, T, C, A, RAC>::emplace_hint(const_iterator position, Args&&... args)
+	{
+		#if EASTL_USE_FORWARD_WORKAROUND
+			auto value = value_type(eastl::forward<Args>(args)...);  // Workaround for compiler bug in VS2013 which results in a compiler internal crash while compiling this code.
+		#else
+			value_type  value(eastl::forward<Args>(args)...);
+		#endif
+		return insert(position, eastl::move(value));
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline typename vector_multimap<K, T, C, A, RAC>::iterator
+	vector_multimap<K, T, C, A, RAC>::insert(const value_type& value)
+	{
+		const iterator itUB(upper_bound(value.first));
+		return base_type::insert(itUB, value);
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	template <typename P, typename>
+	inline typename vector_multimap<K, T, C, A, RAC>::iterator
+	vector_multimap<K, T, C, A, RAC>::insert(P&& otherValue)
+	{
+		value_type value(eastl::forward<P>(otherValue));
+		const iterator itUB(upper_bound(value.first));
+		return base_type::insert(itUB, eastl::move(value));
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline typename vector_multimap<K, T, C, A, RAC>::iterator
+	vector_multimap<K, T, C, A, RAC>::insert(const key_type& otherValue)
+	{
+		value_type value(eastl::pair_first_construct, otherValue);
+		const iterator itUB(upper_bound(value.first));
+		return base_type::insert(itUB, eastl::move(value));
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline typename vector_multimap<K, T, C, A, RAC>::iterator
+	vector_multimap<K, T, C, A, RAC>::insert(key_type&& otherValue)
+	{
+		value_type value(eastl::pair_first_construct, eastl::move(otherValue));
+		const iterator itUB(upper_bound(value.first));
+		return base_type::insert(itUB, eastl::move(value));
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline typename vector_multimap<K, T, C, A, RAC>::iterator
+	vector_multimap<K, T, C, A, RAC>::insert(const_iterator position, const value_type& value)
+	{
+		// We assume that the user knows what he is doing and has supplied us with
+		// a position that is right where value should be inserted (put in front of). 
+		// We do a test to see if the position is correct. If so then we insert, 
+		// if not then we ignore the input position. However, 
+
+		if((position == end()) || !mValueCompare(*position, value))  // If value is <= the element at position...
+		{
+			if((position == begin()) || !mValueCompare(value, *(position - 1))) // If value is >= the element before position...
+				return base_type::insert(position, value);
+		}
+
+		// In this case we have an incorrect position. We fall back to the regular insert function.
+		return insert(value);
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline typename vector_multimap<K, T, C, A, RAC>::iterator
+	vector_multimap<K, T, C, A, RAC>::insert(const_iterator position, value_type&& value)
+	{
+		if((position == end()) || !mValueCompare(*position, value))  // If value is <= the element at position...
+		{
+			if((position == begin()) || !mValueCompare(value, *(position - 1))) // If value is >= the element before position...
+				return base_type::insert(position, eastl::move(value));
+		}
+
+		// In this case we have an incorrect position. We fall back to the regular insert function.
+		return insert(eastl::move(value));
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline void vector_multimap<K, T, C, A, RAC>::insert(std::initializer_list<value_type> ilist)
+	{
+		insert(ilist.begin(), ilist.end());
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	template <typename InputIterator>
+	inline void vector_multimap<K, T, C, A, RAC>::insert(InputIterator first, InputIterator last)
+	{
+		// To consider: Improve the speed of this by getting the length of the 
+		//              input range and resizing our container to that size
+		//              before doing the insertions. We can't use reserve
+		//              because we don't know if we are using a vector or not.
+		//              Alternatively, force the user to do the reservation.
+		// To consider: When inserting values that come from a container
+		//              like this container, use the property that they are 
+		//              known to be sorted and speed up the inserts here.
+		for(; first != last; ++first)
+			base_type::insert(upper_bound((*first).first), *first);
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline typename vector_multimap<K, T, C, A, RAC>::iterator
+	vector_multimap<K, T, C, A, RAC>::erase(const_iterator position)           
+	{
+		// Note that we return iterator and not void. This allows for more efficient use of 
+		// the container and is consistent with the C++ language defect report #130 (DR 130)
+		return base_type::erase(position);
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline typename vector_multimap<K, T, C, A, RAC>::iterator
+	vector_multimap<K, T, C, A, RAC>::erase(const_iterator first, const_iterator last)
+	{
+		return base_type::erase(first, last);
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline typename vector_multimap<K, T, C, A, RAC>::size_type
+	vector_multimap<K, T, C, A, RAC>::erase(const key_type& k)
+	{
+		const eastl::pair<iterator, iterator> pairIts(equal_range(k));
+
+		if(pairIts.first != pairIts.second)
+			base_type::erase(pairIts.first, pairIts.second);
+
+		return (size_type)eastl::distance(pairIts.first, pairIts.second); // This can result in any value >= 0.
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline typename vector_multimap<K, T, C, A, RAC>::reverse_iterator
+	vector_multimap<K, T, C, A, RAC>::erase(const_reverse_iterator position)           
+	{
+		return reverse_iterator(base_type::erase((++position).base()));
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline typename vector_multimap<K, T, C, A, RAC>::reverse_iterator
+	vector_multimap<K, T, C, A, RAC>::erase(const_reverse_iterator first, const_reverse_iterator last)
+	{
+		return reverse_iterator(base_type::erase((++last).base(), (++first).base()));
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline typename vector_multimap<K, T, C, A, RAC>::iterator
+	vector_multimap<K, T, C, A, RAC>::find(const key_type& k)
+	{
+		const eastl::pair<iterator, iterator> pairIts(equal_range(k));
+
+		if(pairIts.first != pairIts.second)
+			return pairIts.first;
+		return end();
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline typename vector_multimap<K, T, C, A, RAC>::const_iterator
+	vector_multimap<K, T, C, A, RAC>::find(const key_type& k) const
+	{
+		const eastl::pair<const_iterator, const_iterator> pairIts(equal_range(k));
+
+		if(pairIts.first != pairIts.second)
+			return pairIts.first;
+		return end();
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	template <typename U, typename BinaryPredicate>
+	inline typename vector_multimap<K, T, C, A, RAC>::const_iterator
+	vector_multimap<K, T, C, A, RAC>::find_as(const U& u, BinaryPredicate predicate) const
+	{
+		multimap_value_compare<U, value_type, BinaryPredicate> predicate_cmp(predicate);
+		const eastl::pair<const_iterator, const_iterator> pairIts(eastl::equal_range(begin(), end(), u, predicate_cmp));
+		return (pairIts.first != pairIts.second) ? pairIts.first : end();
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	template <typename U, typename BinaryPredicate>
+	inline typename vector_multimap<K, T, C, A, RAC>::iterator
+	vector_multimap<K, T, C, A, RAC>::find_as(const U& u, BinaryPredicate predicate)
+	{
+		multimap_value_compare<U, value_type, BinaryPredicate> predicate_cmp(predicate);
+		const eastl::pair<iterator, iterator> pairIts(eastl::equal_range(begin(), end(), u, predicate_cmp));
+		return (pairIts.first != pairIts.second) ? pairIts.first : end();
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline typename vector_multimap<K, T, C, A, RAC>::size_type
+	vector_multimap<K, T, C, A, RAC>::count(const key_type& k) const
+	{
+		const eastl::pair<const_iterator, const_iterator> pairIts(equal_range(k));
+		return (size_type)eastl::distance(pairIts.first, pairIts.second); 
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline typename vector_multimap<K, T, C, A, RAC>::iterator
+	vector_multimap<K, T, C, A, RAC>::lower_bound(const key_type& k)
+	{
+		return eastl::lower_bound(begin(), end(), k, mValueCompare);
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline typename vector_multimap<K, T, C, A, RAC>::const_iterator
+	vector_multimap<K, T, C, A, RAC>::lower_bound(const key_type& k) const
+	{
+		return eastl::lower_bound(begin(), end(), k, mValueCompare);
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline typename vector_multimap<K, T, C, A, RAC>::iterator
+	vector_multimap<K, T, C, A, RAC>::upper_bound(const key_type& k)
+	{
+		return eastl::upper_bound(begin(), end(), k, mValueCompare);
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline typename vector_multimap<K, T, C, A, RAC>::const_iterator 
+	vector_multimap<K, T, C, A, RAC>::upper_bound(const key_type& k) const
+	{
+		return eastl::upper_bound(begin(), end(), k, mValueCompare);
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline eastl::pair<typename vector_multimap<K, T, C, A, RAC>::iterator, typename vector_multimap<K, T, C, A, RAC>::iterator>
+	vector_multimap<K, T, C, A, RAC>::equal_range(const key_type& k)
+	{
+		return eastl::equal_range(begin(), end(), k, mValueCompare);
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline eastl::pair<typename vector_multimap<K, T, C, A, RAC>::const_iterator, typename vector_multimap<K, T, C, A, RAC>::const_iterator>
+	vector_multimap<K, T, C, A, RAC>::equal_range(const key_type& k) const
+	{
+		return eastl::equal_range(begin(), end(), k, mValueCompare);
+	}
+
+
+	/*
+	// VC++ fails to compile this when defined here, saying the function isn't a member of vector_multimap.
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline eastl::pair<typename vector_multimap<K, T, C, A, RAC>::iterator, typename vector_multimap<K, T, C, A, RAC>::iterator>
+	vector_multimap<K, T, C, A, RAC>::equal_range_small(const key_type& k)
+	{
+		const iterator itLower(lower_bound(k));
+		iterator       itUpper(itLower);
+
+		while((itUpper != end()) && !mValueCompare(k, *itUpper))
+			++itUpper;
+
+		return eastl::pair<iterator, iterator>(itLower, itUpper);
+	}
+	*/
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline eastl::pair<typename vector_multimap<K, T, C, A, RAC>::const_iterator, typename vector_multimap<K, T, C, A, RAC>::const_iterator>
+	vector_multimap<K, T, C, A, RAC>::equal_range_small(const key_type& k) const
+	{
+		const const_iterator itLower(lower_bound(k));
+		const_iterator       itUpper(itLower);
+
+		while((itUpper != end()) && !mValueCompare(k, *itUpper))
+			++itUpper;
+
+		return eastl::pair<const_iterator, const_iterator>(itLower, itUpper);
+	}
+
+
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// global operators
+	///////////////////////////////////////////////////////////////////////////
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline bool operator==(const vector_multimap<K, T, C, A, RAC>& a, 
+						   const vector_multimap<K, T, C, A, RAC>& b) 
+	{
+		return (a.size() == b.size()) && eastl::equal(b.begin(), b.end(), a.begin());
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline bool operator<(const vector_multimap<K, T, C, A, RAC>& a,
+						  const vector_multimap<K, T, C, A, RAC>& b)
+	{
+		return eastl::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end(), a.value_comp());
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline bool operator!=(const vector_multimap<K, T, C, A, RAC>& a,
+						   const vector_multimap<K, T, C, A, RAC>& b)
+	{
+		return !(a == b);
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline bool operator>(const vector_multimap<K, T, C, A, RAC>& a,
+						  const vector_multimap<K, T, C, A, RAC>& b)
+	{
+		return b < a;
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline bool operator<=(const vector_multimap<K, T, C, A, RAC>& a,
+						   const vector_multimap<K, T, C, A, RAC>& b)
+	{
+		return !(b < a);
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline bool operator>=(const vector_multimap<K, T, C, A, RAC>& a,
+						   const vector_multimap<K, T, C, A, RAC>& b)
+	{
+		return !(a < b);
+	}
+
+
+	template <typename K, typename T, typename C, typename A, typename RAC>
+	inline void swap(vector_multimap<K, T, C, A, RAC>& a,
+					 vector_multimap<K, T, C, A, RAC>& b)
+	{
+		a.swap(b);
+	}
+
+
+} // namespace eastl
+
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/vector_multiset.h b/libkram/eastl/include/EASTL/vector_multiset.h
new file mode 100644
index 00000000..7fd10a57
--- /dev/null
+++ b/libkram/eastl/include/EASTL/vector_multiset.h
@@ -0,0 +1,764 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+//////////////////////////////////////////////////////////////////////////////
+
+//////////////////////////////////////////////////////////////////////////////
+// This file implements vector_multiset. It acts much like std::multiset, except 
+// its underlying representation is a random access container such as vector. 
+// These containers are sometimes also known as "sorted vectors."  
+// vector_sets have an advantage over conventional sets in that their memory
+// is contiguous and node-less. The result is that lookups are faster, more 
+// cache friendly (which potentially more so benefits speed), and the container
+// uses less memory. The downside is that inserting new items into the container
+// is slower if they are inserted in random order instead of in sorted order.
+// This tradeoff is well-worth it for many cases. Note that vector_multiset allows
+// you to use a deque or other random access container which may perform
+// better for you than vector.
+//
+// Note that with vector_set, vector_multiset, vector_map, vector_multimap
+// that the modification of the container potentially invalidates all 
+// existing iterators into the container, unlike what happens with conventional
+// sets and maps.
+//////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_VECTOR_MULTISET_H
+#define EASTL_VECTOR_MULTISET_H
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/allocator.h>
+#include <EASTL/functional.h>
+#include <EASTL/vector.h>
+#include <EASTL/utility.h>
+#include <EASTL/algorithm.h>
+#include <EASTL/initializer_list.h>
+#include <stddef.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+
+	/// EASTL_VECTOR_MULTISET_DEFAULT_NAME
+	///
+	/// Defines a default container name in the absence of a user-provided name.
+	///
+	#ifndef EASTL_VECTOR_MULTISET_DEFAULT_NAME
+		#define EASTL_VECTOR_MULTISET_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " vector_multiset" // Unless the user overrides something, this is "EASTL vector_multiset".
+	#endif
+
+
+	/// EASTL_VECTOR_MULTISET_DEFAULT_ALLOCATOR
+	///
+	#ifndef EASTL_VECTOR_MULTISET_DEFAULT_ALLOCATOR
+		#define EASTL_VECTOR_MULTISET_DEFAULT_ALLOCATOR allocator_type(EASTL_VECTOR_MULTISET_DEFAULT_NAME)
+	#endif
+
+
+
+	/// vector_multiset
+	///
+	/// Implements a multiset via a random access container such as a vector.
+	/// This container is also known as a sorted_vector. We choose to call it 
+	/// vector_multiset, as that is a more consistent universally applicable name
+	/// for it in this library.
+	///
+	/// Note that with vector_set, vector_multiset, vector_map, vector_multimap
+	/// that the modification of the container potentially invalidates all 
+	/// existing iterators into the container, unlike what happens with conventional
+	/// sets and maps.
+	///
+	/// To consider: std::multiset has the limitation that values in the set cannot
+	/// be modified, with the idea that modifying them would change their sort
+	/// order. We have the opportunity to make it so that values can be modified
+	/// via changing iterators to be non-const, with the downside being that 
+	/// the container can get screwed up if the user screws up. Alternatively,
+	/// we can do what std STL does and require the user to make their stored
+	/// classes use 'mutable' as needed. See the C++ standard defect report
+	/// #103 (DR 103) for a discussion of this.
+	///
+	/// Note that the erase functions return iterator and not void. This allows for 
+	/// more efficient use of the container and is consistent with the C++ language 
+	/// defect report #130 (DR 130)
+	///
+	template <typename Key, typename Compare = eastl::less<Key>, typename Allocator = EASTLAllocatorType, 
+			  typename RandomAccessContainer = eastl::vector<Key, Allocator> >
+	class vector_multiset : public RandomAccessContainer
+	{
+	public:
+		typedef RandomAccessContainer                                           base_type;
+		typedef vector_multiset<Key, Compare, Allocator, RandomAccessContainer> this_type;
+		typedef Allocator                                                       allocator_type;
+		typedef Key                                                             key_type;
+		typedef Key                                                             value_type;
+		typedef Compare                                                         key_compare;
+		typedef Compare                                                         value_compare;
+		typedef value_type*                                                     pointer;
+		typedef const value_type*                                               const_pointer;
+		typedef value_type&                                                     reference;
+		typedef const value_type&                                               const_reference;
+		typedef typename base_type::size_type                                   size_type;
+		typedef typename base_type::difference_type                             difference_type;
+		typedef typename base_type::iterator                                    iterator;         // **Currently typedefing from iterator instead of const_iterator due to const issues **: Note that we typedef from const_iterator. This is by design, as sets are sorted and values cannot be modified. To consider: allow values to be modified and thus risk changing their sort values.
+		typedef typename base_type::const_iterator                              const_iterator;
+		typedef typename base_type::reverse_iterator                            reverse_iterator; // See notes directly above regarding const_iterator.
+		typedef typename base_type::const_reverse_iterator                      const_reverse_iterator;
+
+		using base_type::begin;
+		using base_type::end;
+		using base_type::get_allocator;
+
+	protected:
+		value_compare mCompare; // To consider: Declare this instead as: 'key_compare mKeyCompare'
+
+	public:
+		// We have an empty ctor and a ctor that takes an allocator instead of one for both
+		// because this way our RandomAccessContainer wouldn't be required to have an constructor
+		// that takes allocator_type. 
+		vector_multiset();
+		explicit vector_multiset(const allocator_type& allocator);
+		explicit vector_multiset(const key_compare& comp, const allocator_type& allocator = EASTL_VECTOR_MULTISET_DEFAULT_ALLOCATOR);
+		vector_multiset(const this_type& x);
+		vector_multiset(this_type&& x);
+		vector_multiset(this_type&& x, const allocator_type& allocator);
+		vector_multiset(std::initializer_list<value_type> ilist, const key_compare& compare = key_compare(), const allocator_type& allocator = EASTL_VECTOR_MULTISET_DEFAULT_ALLOCATOR);
+
+		template <typename InputIterator>
+		vector_multiset(InputIterator first, InputIterator last); // allocator arg removed because VC7.1 fails on the default arg. To do: Make a second version of this function without a default arg.
+
+		template <typename InputIterator>
+		vector_multiset(InputIterator first, InputIterator last, const key_compare& compare); // allocator arg removed because VC7.1 fails on the default arg. To do: Make a second version of this function without a default arg.
+
+		this_type& operator=(const this_type& x);
+		this_type& operator=(std::initializer_list<value_type> ilist);
+		this_type& operator=(this_type&& x);
+
+		void swap(this_type& x);
+
+		const key_compare& key_comp() const;
+		key_compare&       key_comp();
+
+		const value_compare& value_comp() const;
+		value_compare&       value_comp();
+
+		// Inherited from base class:
+		//
+		//     allocator_type& get_allocator();
+		//     void            set_allocator(const allocator_type& allocator);
+		//
+		//     iterator       begin();
+		//     const_iterator begin() const;
+		//     const_iterator cbegin() const;
+		//
+		//     iterator       end();
+		//     const_iterator end() const;
+		//     const_iterator cend() const;
+		//
+		//     reverse_iterator       rbegin();
+		//     const_reverse_iterator rbegin() const;
+		//     const_reverse_iterator crbegin() const;
+		//
+		//     reverse_iterator       rend();
+		//     const_reverse_iterator rend() const;
+		//     const_reverse_iterator crend() const;
+		//
+		//     size_type size() const;
+		//     bool      empty() const;
+		//     void      clear();
+
+		template <class... Args>
+		iterator emplace(Args&&... args);
+
+		template <class... Args> 
+		iterator emplace_hint(const_iterator position, Args&&... args);
+
+		iterator insert(const value_type& value);   // The signature of this function was change in EASTL v2.05.00 from (the mistaken) pair<iterator, bool> to (the correct) iterator.
+		iterator insert(const_iterator position, const value_type& value);
+		iterator insert(const_iterator position, value_type&& value);
+		void     insert(std::initializer_list<value_type> ilist);
+
+		template <typename P>
+		iterator insert(P&& otherValue);
+
+		template <typename InputIterator>
+		void insert(InputIterator first, InputIterator last);
+
+		iterator         erase(const_iterator position);
+		iterator         erase(const_iterator first, const_iterator last);
+		size_type        erase(const key_type& k);
+		reverse_iterator erase(const_reverse_iterator position);
+		reverse_iterator erase(const_reverse_iterator first, const_reverse_iterator last);
+
+		iterator       find(const key_type& k);
+		const_iterator find(const key_type& k) const;
+
+		template <typename U, typename BinaryPredicate>
+		iterator       find_as(const U& u, BinaryPredicate predicate);
+
+		template <typename U, typename BinaryPredicate>
+		const_iterator find_as(const U& u, BinaryPredicate predicate) const;
+
+		size_type count(const key_type& k) const;
+
+		iterator       lower_bound(const key_type& k);
+		const_iterator lower_bound(const key_type& k) const;
+
+		iterator       upper_bound(const key_type& k);
+		const_iterator upper_bound(const key_type& k) const;
+
+		eastl::pair<iterator, iterator>             equal_range(const key_type& k);
+		eastl::pair<const_iterator, const_iterator> equal_range(const key_type& k) const;
+
+		/// equal_range_small
+		/// This is a special version of equal_range which is optimized for the 
+		/// case of there being few or no duplicated keys in the tree.
+		eastl::pair<iterator, iterator>             equal_range_small(const key_type& k)
+		{
+			// Defined inline because VC7.1 is broken for when it's defined outside.
+			const iterator itLower(lower_bound(k));
+			iterator       itUpper(itLower);
+
+			while((itUpper != end()) && !mCompare(k, *itUpper))
+				++itUpper;
+
+			return eastl::pair<iterator, iterator>(itLower, itUpper);
+		}
+		eastl::pair<const_iterator, const_iterator> equal_range_small(const key_type& k) const;
+
+		// Functions which are disallowed due to being unsafe. 
+		void      push_back(const value_type& value) = delete;
+		reference push_back()                        = delete;
+		void*     push_back_uninitialized()          = delete;
+		template <class... Args>
+		reference emplace_back(Args&&...)            = delete;
+
+		// NOTE(rparolin): It is undefined behaviour if user code fails to ensure the container
+		// invariants are respected by performing an explicit call to 'sort' before any other
+		// operations on the container are performed that do not clear the elements.
+		//
+		// 'push_back_unsorted' and 'emplace_back_unsorted' do not satisfy container invariants
+		// for being sorted. We provide these overloads explicitly labelled as '_unsorted' as an
+		// optimization opportunity when batch inserting elements so users can defer the cost of
+		// sorting the container once when all elements are contained. This was done to clarify
+		// the intent of code by leaving a trace that a manual call to sort is required.
+		// 
+		template <typename... Args> decltype(auto) push_back_unsorted(Args&&... args)    
+			{ return base_type::push_back(eastl::forward<Args>(args)...); }
+		template <typename... Args> decltype(auto) emplace_back_unsorted(Args&&... args) 
+			{ return base_type::emplace_back(eastl::forward<Args>(args)...); }
+
+	}; // vector_multiset
+
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// vector_multiset
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline vector_multiset<K, C, A, RAC>::vector_multiset()
+		: base_type(), mCompare(C())
+	{
+		get_allocator().set_name(EASTL_VECTOR_MULTISET_DEFAULT_NAME);
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline vector_multiset<K, C, A, RAC>::vector_multiset(const allocator_type& allocator)
+		: base_type(allocator), mCompare(C())
+	{
+		// Empty
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline vector_multiset<K, C, A, RAC>::vector_multiset(const key_compare& comp, const allocator_type& allocator)
+		: base_type(allocator), mCompare(comp)
+	{
+		// Empty
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	template <typename InputIterator>
+	inline vector_multiset<K, C, A, RAC>::vector_multiset(InputIterator first, InputIterator last)
+		: base_type(EASTL_VECTOR_MULTISET_DEFAULT_ALLOCATOR), mCompare(key_compare())
+	{
+		insert(first, last);
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	template <typename InputIterator>
+	inline vector_multiset<K, C, A, RAC>::vector_multiset(InputIterator first, InputIterator last, const key_compare& compare)
+		: base_type(EASTL_VECTOR_MULTISET_DEFAULT_ALLOCATOR), mCompare(compare)
+	{
+		insert(first, last);
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline vector_multiset<K, C, A, RAC>::vector_multiset(const this_type& x)
+		: base_type(x), mCompare(x.mCompare)
+	{
+		// Empty
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline vector_multiset<K, C, A, RAC>::vector_multiset(this_type&& x)
+		: base_type(eastl::move(x)), mCompare(x.mCompare)
+	{
+		// Empty. Note: x is left with empty contents but its original mValueCompare instead of the default one. 
+	}
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline vector_multiset<K, C, A, RAC>::vector_multiset(this_type&& x, const allocator_type& allocator)
+		: base_type(eastl::move(x), allocator), mCompare(x.mCompare)
+	{
+		// Empty. Note: x is left with empty contents but its original mValueCompare instead of the default one. 
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline vector_multiset<K, C, A, RAC>::vector_multiset(std::initializer_list<value_type> ilist, const key_compare& compare, const allocator_type& allocator)
+		: base_type(allocator), mCompare(compare)
+	{
+		insert(ilist.begin(), ilist.end());
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline vector_multiset<K, C, A, RAC>&
+	vector_multiset<K, C, A, RAC>::operator=(const this_type& x)
+	{
+		base_type::operator=(x);
+		mCompare = value_compare(x.mCompare);
+		return *this;
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline vector_multiset<K, C, A, RAC>&
+	vector_multiset<K, C, A, RAC>::operator=(this_type&& x)
+	{
+		base_type::operator=(eastl::move(x));
+		eastl::swap(mCompare, x.mCompare);
+		return *this;
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline vector_multiset<K, C, A, RAC>&
+	vector_multiset<K, C, A, RAC>::operator=(std::initializer_list<value_type> ilist)
+	{
+		base_type::clear();
+		insert(ilist.begin(), ilist.end());
+		return *this;
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline void vector_multiset<K, C, A, RAC>::swap(this_type& x)
+	{
+		base_type::swap(x);
+		eastl::swap(mCompare, x.mCompare);
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline const typename vector_multiset<K, C, A, RAC>::key_compare&
+	vector_multiset<K, C, A, RAC>::key_comp() const
+	{
+		return mCompare;
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline typename vector_multiset<K, C, A, RAC>::key_compare&
+	vector_multiset<K, C, A, RAC>::key_comp()
+	{
+		return mCompare;
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline const typename vector_multiset<K, C, A, RAC>::value_compare&
+	vector_multiset<K, C, A, RAC>::value_comp() const
+	{
+		return mCompare;
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline typename vector_multiset<K, C, A, RAC>::value_compare&
+	vector_multiset<K, C, A, RAC>::value_comp()
+	{
+		return mCompare;
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	template <class... Args>
+	typename vector_multiset<K, C, A, RAC>::iterator
+	vector_multiset<K, C, A, RAC>::emplace(Args&&... args)
+	{
+		#if EASTL_USE_FORWARD_WORKAROUND
+			auto value = value_type(eastl::forward<Args>(args)...);  // Workaround for compiler bug in VS2013 which results in a compiler internal crash while compiling this code.
+		#else
+			value_type  value(eastl::forward<Args>(args)...);
+		#endif
+		return insert(eastl::move(value));
+	}
+
+	template <typename K, typename C, typename A, typename RAC>
+	template <class... Args> 
+	typename vector_multiset<K, C, A, RAC>::iterator
+	vector_multiset<K, C, A, RAC>::emplace_hint(const_iterator position, Args&&... args)
+	{
+		#if EASTL_USE_FORWARD_WORKAROUND
+			auto value = value_type(eastl::forward<Args>(args)...);  // Workaround for compiler bug in VS2013 which results in a compiler internal crash while compiling this code.
+		#else
+			value_type  value(eastl::forward<Args>(args)...);
+		#endif
+		return insert(position, eastl::move(value));
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline typename vector_multiset<K, C, A, RAC>::iterator
+	vector_multiset<K, C, A, RAC>::insert(const value_type& value)
+	{
+		const iterator itUB(upper_bound(value));
+		return base_type::insert(itUB, value);
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	template <typename P>
+	typename vector_multiset<K, C, A, RAC>::iterator
+	vector_multiset<K, C, A, RAC>::insert(P&& otherValue)
+	{
+		value_type value(eastl::forward<P>(otherValue));
+		const iterator itUB(upper_bound(value));
+		return base_type::insert(itUB, eastl::move(value));
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline void vector_multiset<K, C, A, RAC>::insert(std::initializer_list<value_type> ilist)
+	{
+		insert(ilist.begin(), ilist.end());
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline typename vector_multiset<K, C, A, RAC>::iterator
+	vector_multiset<K, C, A, RAC>::insert(const_iterator position, const value_type& value)
+	{
+		// We assume that the user knows what he is doing and has supplied us with
+		// a position that is right where value should be inserted (put in front of). 
+		// We do a test to see if the position is correct. If so then we insert, 
+		// if not then we ignore the input position. However, 
+
+		if((position == end()) || !mCompare(*position, value))  // If value is <= the element at position...
+		{
+			if((position == begin()) || !mCompare(value, *(position - 1))) // If value is >= the element before position...
+				return base_type::insert(position, value);
+		}
+
+		// In this case we have an incorrect position. We fall back to the regular insert function.
+		return insert(value);
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	typename vector_multiset<K, C, A, RAC>::iterator
+	vector_multiset<K, C, A, RAC>::insert(const_iterator position, value_type&& value)
+	{
+		if((position == end()) || !mCompare(*position, value))  // If value is <= the element at position...
+		{
+			if((position == begin()) || !mCompare(value, *(position - 1))) // If value is >= the element before position...
+				return base_type::insert(position, eastl::move(value));
+		}
+
+		// In this case we have an incorrect position. We fall back to the regular insert function.
+		return insert(eastl::move(value));
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	template <typename InputIterator>
+	inline void vector_multiset<K, C, A, RAC>::insert(InputIterator first, InputIterator last)
+	{
+		// To consider: Improve the speed of this by getting the length of the 
+		//              input range and resizing our container to that size
+		//              before doing the insertions. We can't use reserve
+		//              because we don't know if we are using a vector or not.
+		//              Alternatively, force the user to do the reservation.
+		// To consider: When inserting values that come from a container
+		//              like this container, use the property that they are 
+		//              known to be sorted and speed up the inserts here.
+		for(; first != last; ++first)                               
+			base_type::insert(upper_bound(*first), *first);
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline typename vector_multiset<K, C, A, RAC>::iterator 
+	vector_multiset<K, C, A, RAC>::erase(const_iterator position)
+	{
+		// Note that we return iterator and not void. This allows for more efficient use of 
+		// the container and is consistent with the C++ language defect report #130 (DR 130)
+		return base_type::erase(position);
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline typename vector_multiset<K, C, A, RAC>::iterator
+	vector_multiset<K, C, A, RAC>::erase(const_iterator first, const_iterator last)
+	{
+		return base_type::erase(first, last);
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline typename vector_multiset<K, C, A, RAC>::reverse_iterator 
+	vector_multiset<K, C, A, RAC>::erase(const_reverse_iterator position)
+	{
+		return reverse_iterator(base_type::erase((++position).base()));
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline typename vector_multiset<K, C, A, RAC>::reverse_iterator
+	vector_multiset<K, C, A, RAC>::erase(const_reverse_iterator first, const_reverse_iterator last)
+	{
+		return reverse_iterator(base_type::erase((++last).base(), (++first).base()));
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline typename vector_multiset<K, C, A, RAC>::size_type
+	vector_multiset<K, C, A, RAC>::erase(const key_type& k)
+	{
+		const eastl::pair<iterator, iterator> pairIts(equal_range(k));
+
+		if(pairIts.first != pairIts.second)
+			base_type::erase(pairIts.first, pairIts.second);
+
+		return (size_type)eastl::distance(pairIts.first, pairIts.second); // This can result in any value >= 0.
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline typename vector_multiset<K, C, A, RAC>::iterator
+	vector_multiset<K, C, A, RAC>::find(const key_type& k)
+	{
+		const eastl::pair<iterator, iterator> pairIts(equal_range(k));
+		return (pairIts.first != pairIts.second) ? pairIts.first : end();
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	template <typename U, typename BinaryPredicate>
+	inline typename vector_multiset<K, C, A, RAC>::iterator
+	vector_multiset<K, C, A, RAC>::find_as(const U& u, BinaryPredicate predicate)
+	{
+		const eastl::pair<iterator, iterator> pairIts(eastl::equal_range(begin(), end(), u, predicate));
+		return (pairIts.first != pairIts.second) ? pairIts.first : end();
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	template <typename U, typename BinaryPredicate>
+	inline typename vector_multiset<K, C, A, RAC>::const_iterator
+	vector_multiset<K, C, A, RAC>::find_as(const U& u, BinaryPredicate predicate) const
+	{
+		const eastl::pair<const_iterator, const_iterator> pairIts(eastl::equal_range(begin(), end(), u, predicate));
+		return (pairIts.first != pairIts.second) ? pairIts.first : end();
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline typename vector_multiset<K, C, A, RAC>::const_iterator
+	vector_multiset<K, C, A, RAC>::find(const key_type& k) const
+	{
+		const eastl::pair<const_iterator, const_iterator> pairIts(equal_range(k));
+		return (pairIts.first != pairIts.second) ? pairIts.first : end();
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline typename vector_multiset<K, C, A, RAC>::size_type
+	vector_multiset<K, C, A, RAC>::count(const key_type& k) const
+	{
+		const eastl::pair<const_iterator, const_iterator> pairIts(equal_range(k));
+		return (size_type)eastl::distance(pairIts.first, pairIts.second);
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline typename vector_multiset<K, C, A, RAC>::iterator
+	vector_multiset<K, C, A, RAC>::lower_bound(const key_type& k)
+	{
+		return eastl::lower_bound(begin(), end(), k, mCompare);
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline typename vector_multiset<K, C, A, RAC>::const_iterator
+	vector_multiset<K, C, A, RAC>::lower_bound(const key_type& k) const
+	{
+		return eastl::lower_bound(begin(), end(), k, mCompare);
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline typename vector_multiset<K, C, A, RAC>::iterator
+	vector_multiset<K, C, A, RAC>::upper_bound(const key_type& k)
+	{
+		return eastl::upper_bound(begin(), end(), k, mCompare);
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline typename vector_multiset<K, C, A, RAC>::const_iterator
+	vector_multiset<K, C, A, RAC>::upper_bound(const key_type& k) const
+	{
+		return eastl::upper_bound(begin(), end(), k, mCompare);
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline eastl::pair<typename vector_multiset<K, C, A, RAC>::iterator, typename vector_multiset<K, C, A, RAC>::iterator>
+	vector_multiset<K, C, A, RAC>::equal_range(const key_type& k)
+	{
+		return eastl::equal_range(begin(), end(), k, mCompare);
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline eastl::pair<typename vector_multiset<K, C, A, RAC>::const_iterator, typename vector_multiset<K, C, A, RAC>::const_iterator>
+	vector_multiset<K, C, A, RAC>::equal_range(const key_type& k) const
+	{
+		return eastl::equal_range(begin(), end(), k, mCompare);
+	}
+
+
+	/*
+	// VC++ fails to compile this when defined here, saying the function isn't a memgber of vector_multimap.
+	template <typename K, typename C, typename A, typename RAC>
+	inline eastl::pair<typename vector_multiset<K, C, A, RAC>::iterator, typename vector_multiset<K, C, A, RAC>::iterator>
+	vector_multiset<K, C, A, RAC>::equal_range_small(const key_type& k)
+	{
+		const iterator itLower(lower_bound(k));
+		iterator       itUpper(itLower);
+
+		while((itUpper != end()) && !mCompare(k, *itUpper))
+			++itUpper;
+
+		return eastl::pair<iterator, iterator>(itLower, itUpper);
+	}
+	*/
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline eastl::pair<typename vector_multiset<K, C, A, RAC>::const_iterator, typename vector_multiset<K, C, A, RAC>::const_iterator>
+	vector_multiset<K, C, A, RAC>::equal_range_small(const key_type& k) const
+	{
+		const const_iterator itLower(lower_bound(k));
+		const_iterator       itUpper(itLower);
+
+		while((itUpper != end()) && !mCompare(k, *itUpper))
+			++itUpper;
+
+		return eastl::pair<const_iterator, const_iterator>(itLower, itUpper);
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// global operators
+	///////////////////////////////////////////////////////////////////////////
+
+	template <typename Key, typename Compare, typename Allocator, typename RandomAccessContainer>
+	inline bool operator==(const vector_multiset<Key, Compare, Allocator, RandomAccessContainer>& a, 
+						   const vector_multiset<Key, Compare, Allocator, RandomAccessContainer>& b) 
+	{
+		return (a.size() == b.size()) && eastl::equal(b.begin(), b.end(), a.begin());
+	}
+
+
+	template <typename Key, typename Compare, typename Allocator, typename RandomAccessContainer>
+	inline bool operator<(const vector_multiset<Key, Compare, Allocator, RandomAccessContainer>& a,
+						  const vector_multiset<Key, Compare, Allocator, RandomAccessContainer>& b)
+	{
+		return eastl::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end(), a.value_comp());
+	}
+
+
+	template <typename Key, typename Compare, typename Allocator, typename RandomAccessContainer>
+	inline bool operator!=(const vector_multiset<Key, Compare, Allocator, RandomAccessContainer>& a,
+						   const vector_multiset<Key, Compare, Allocator, RandomAccessContainer>& b)
+	{
+		return !(a == b);
+	}
+
+
+	template <typename Key, typename Compare, typename Allocator, typename RandomAccessContainer>
+	inline bool operator>(const vector_multiset<Key, Compare, Allocator, RandomAccessContainer>& a,
+						  const vector_multiset<Key, Compare, Allocator, RandomAccessContainer>& b)
+	{
+		return b < a;
+	}
+
+
+	template <typename Key, typename Compare, typename Allocator, typename RandomAccessContainer>
+	inline bool operator<=(const vector_multiset<Key, Compare, Allocator, RandomAccessContainer>& a,
+						   const vector_multiset<Key, Compare, Allocator, RandomAccessContainer>& b)
+	{
+		return !(b < a);
+	}
+
+
+	template <typename Key, typename Compare, typename Allocator, typename RandomAccessContainer>
+	inline bool operator>=(const vector_multiset<Key, Compare, Allocator, RandomAccessContainer>& a,
+						   const vector_multiset<Key, Compare, Allocator, RandomAccessContainer>& b)
+	{
+		return !(a < b);
+	}
+
+
+	template <typename Key, typename Compare, typename Allocator, typename RandomAccessContainer>
+	inline void swap(vector_multiset<Key, Compare, Allocator, RandomAccessContainer>& a,
+					 vector_multiset<Key, Compare, Allocator, RandomAccessContainer>& b)
+	{
+		a.swap(b);
+	}
+
+
+} // namespace eastl
+
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/vector_set.h b/libkram/eastl/include/EASTL/vector_set.h
new file mode 100644
index 00000000..c03ec556
--- /dev/null
+++ b/libkram/eastl/include/EASTL/vector_set.h
@@ -0,0 +1,793 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+//////////////////////////////////////////////////////////////////////////////
+
+//////////////////////////////////////////////////////////////////////////////
+// This file implements vector_set. It acts much like std::set, except its 
+// underlying representation is a random access container such as vector. 
+// These containers are sometimes also known as "sorted vectors."  
+// vector_sets have an advantage over conventional sets in that their memory
+// is contiguous and node-less. The result is that lookups are faster, more 
+// cache friendly (which potentially more so benefits speed), and the container
+// uses less memory. The downside is that inserting new items into the container
+// is slower if they are inserted in random order instead of in sorted order.
+// This tradeoff is well-worth it for many cases. Note that vector_set allows
+// you to use a deque or other random access container which may perform
+// better for you than vector.
+//
+// Note that with vector_set, vector_multiset, vector_map, vector_multimap
+// that the modification of the container potentially invalidates all 
+// existing iterators into the container, unlike what happens with conventional
+// sets and maps.
+//////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_VECTOR_SET_H
+#define EASTL_VECTOR_SET_H
+
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/allocator.h>
+#include <EASTL/functional.h>
+#include <EASTL/vector.h>
+#include <EASTL/utility.h>
+#include <EASTL/algorithm.h>
+#include <EASTL/initializer_list.h>
+#include <stddef.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace eastl
+{
+
+	/// EASTL_VECTOR_SET_DEFAULT_NAME
+	///
+	/// Defines a default container name in the absence of a user-provided name.
+	///
+	#ifndef EASTL_VECTOR_SET_DEFAULT_NAME
+		#define EASTL_VECTOR_SET_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " vector_set" // Unless the user overrides something, this is "EASTL vector_set".
+	#endif
+
+
+	/// EASTL_VECTOR_SET_DEFAULT_ALLOCATOR
+	///
+	#ifndef EASTL_VECTOR_SET_DEFAULT_ALLOCATOR
+		#define EASTL_VECTOR_SET_DEFAULT_ALLOCATOR allocator_type(EASTL_VECTOR_SET_DEFAULT_NAME)
+	#endif
+
+
+
+	/// vector_set
+	///
+	/// Implements a set via a random access container such as a vector.
+	/// This container is also known as a sorted_vector. We choose to call it 
+	/// vector_set, as that is a more consistent universally applicable name
+	/// for it in this library.
+	///
+	/// Note that with vector_set, vector_multiset, vector_map, vector_multimap
+	/// that the modification of the container potentially invalidates all 
+	/// existing iterators into the container, unlike what happens with conventional
+	/// sets and maps.
+	///
+	/// To consider: std::set has the limitation that values in the set cannot
+	/// be modified, with the idea that modifying them would change their sort
+	/// order. We have the opportunity to make it so that values can be modified
+	/// via changing iterators to be non-const, with the downside being that 
+	/// the container can get screwed up if the user screws up. Alternatively,
+	/// we can do what std STL does and require the user to make their stored
+	/// classes use 'mutable' as needed. See the C++ standard defect report
+	/// #103 (DR 103) for a discussion of this.
+	///
+	/// Note that the erase functions return iterator and not void. This allows for 
+	/// more efficient use of the container and is consistent with the C++ language 
+	/// defect report #130 (DR 130)
+	///
+	template <typename Key, typename Compare = eastl::less<Key>, typename Allocator = EASTLAllocatorType, 
+			  typename RandomAccessContainer = eastl::vector<Key, Allocator> >
+	class vector_set : public RandomAccessContainer
+	{
+	public:
+		typedef RandomAccessContainer                                      base_type;
+		typedef vector_set<Key, Compare, Allocator, RandomAccessContainer> this_type;
+		typedef Allocator                                                  allocator_type;
+		typedef Key                                                        key_type;
+		typedef Key                                                        value_type;
+		typedef Compare                                                    key_compare;
+		typedef Compare                                                    value_compare;
+		typedef value_type*                                                pointer;
+		typedef const value_type*                                          const_pointer;
+		typedef value_type&                                                reference;
+		typedef const value_type&                                          const_reference;
+		typedef typename base_type::size_type                              size_type;
+		typedef typename base_type::difference_type                        difference_type;
+		typedef typename base_type::iterator                               iterator;         // **Currently typedefing from iterator instead of const_iterator due to const issues **: Note that we typedef from const_iterator. This is by design, as sets are sorted and values cannot be modified. To consider: allow values to be modified and thus risk changing their sort values.
+		typedef typename base_type::const_iterator                         const_iterator;
+		typedef typename base_type::reverse_iterator                       reverse_iterator; // See notes directly above regarding const_iterator.
+		typedef typename base_type::const_reverse_iterator                 const_reverse_iterator;
+		typedef eastl::pair<iterator, bool>                                insert_return_type;
+
+		using base_type::begin;
+		using base_type::end;
+		using base_type::get_allocator;
+
+	protected:
+		value_compare mCompare; // To consider: Declare this instead as: 'key_compare mKeyCompare'
+
+	public:
+		// We have an empty ctor and a ctor that takes an allocator instead of one for both
+		// because this way our RandomAccessContainer wouldn't be required to have an constructor
+		// that takes allocator_type. 
+		vector_set();
+		explicit vector_set(const allocator_type& allocator);
+		explicit vector_set(const key_compare& compare, const allocator_type& allocator = EASTL_VECTOR_SET_DEFAULT_ALLOCATOR);
+		vector_set(const this_type& x);
+		vector_set(this_type&& x);
+		vector_set(this_type&& x, const allocator_type& allocator);
+		vector_set(std::initializer_list<value_type> ilist, const key_compare& compare = key_compare(), const allocator_type& allocator = EASTL_VECTOR_SET_DEFAULT_ALLOCATOR);
+
+		template <typename InputIterator>
+		vector_set(InputIterator first, InputIterator last); // allocator arg removed because VC7.1 fails on the default arg. To do: Make a second version of this function without a default arg.
+
+		template <typename InputIterator>
+		vector_set(InputIterator first, InputIterator last, const key_compare& compare); // allocator arg removed because VC7.1 fails on the default arg. To do: Make a second version of this function without a default arg.
+
+		this_type& operator=(const this_type& x);
+		this_type& operator=(std::initializer_list<value_type> ilist);
+		this_type& operator=(this_type&& x);
+
+		void swap(this_type& x);
+
+		const key_compare& key_comp() const;
+		key_compare&       key_comp();
+
+		const value_compare& value_comp() const;
+		value_compare&       value_comp();
+
+		// Inherited from base class:
+		//
+		//     allocator_type& get_allocator();
+		//     void            set_allocator(const allocator_type& allocator);
+		//
+		//     iterator       begin();
+		//     const_iterator begin() const;
+		//     const_iterator cbegin() const;
+		//
+		//     iterator       end();
+		//     const_iterator end() const;
+		//     const_iterator cend() const;
+		//
+		//     reverse_iterator       rbegin();
+		//     const_reverse_iterator rbegin() const;
+		//     const_reverse_iterator crbegin() const;
+		//
+		//     reverse_iterator       rend();
+		//     const_reverse_iterator rend() const;
+		//     const_reverse_iterator crend() const;
+		//
+		//     size_type size() const;
+		//     bool      empty() const;
+		//     void      clear();
+
+		template <class... Args>
+		eastl::pair<iterator, bool> emplace(Args&&... args);
+
+		template <class... Args> 
+		iterator emplace_hint(const_iterator position, Args&&... args);
+
+		eastl::pair<iterator, bool> insert(const value_type& value);
+		template <typename P>
+		pair<iterator, bool> insert(P&& otherValue);
+
+		iterator insert(const_iterator position, const value_type& value);
+		iterator insert(const_iterator position, value_type&& value);
+
+		void insert(std::initializer_list<value_type> ilist);
+
+		template <typename InputIterator>
+		void insert(InputIterator first, InputIterator last);
+
+		iterator  erase(const_iterator position);
+		iterator  erase(const_iterator first, const_iterator last);
+		size_type erase(const key_type& k);
+
+		reverse_iterator erase(const_reverse_iterator position);
+		reverse_iterator erase(const_reverse_iterator first, const_reverse_iterator last);
+
+		iterator       find(const key_type& k);
+		const_iterator find(const key_type& k) const;
+
+		template <typename U, typename BinaryPredicate>
+		iterator       find_as(const U& u, BinaryPredicate predicate);
+
+		template <typename U, typename BinaryPredicate>
+		const_iterator find_as(const U& u, BinaryPredicate predicate) const;
+
+		size_type count(const key_type& k) const;
+
+		iterator       lower_bound(const key_type& k);
+		const_iterator lower_bound(const key_type& k) const;
+
+		iterator       upper_bound(const key_type& k);
+		const_iterator upper_bound(const key_type& k) const;
+
+		eastl::pair<iterator, iterator>             equal_range(const key_type& k);
+		eastl::pair<const_iterator, const_iterator> equal_range(const key_type& k) const;
+
+		template <typename U, typename BinaryPredicate> 
+		eastl::pair<iterator, iterator>             equal_range(const U& u, BinaryPredicate predicate);
+
+		template <typename U, typename BinaryPredicate> 
+		eastl::pair<const_iterator, const_iterator> equal_range(const U& u, BinaryPredicate) const;
+
+		// Functions which are disallowed due to being unsafe. 
+		void      push_back(const value_type& value) = delete;
+		reference push_back()                        = delete;
+		void*     push_back_uninitialized()          = delete;
+		template <class... Args>
+		reference emplace_back(Args&&...)            = delete;
+
+		// NOTE(rparolin): It is undefined behaviour if user code fails to ensure the container
+		// invariants are respected by performing an explicit call to 'sort' before any other
+		// operations on the container are performed that do not clear the elements.
+		//
+		// 'push_back_unsorted' and 'emplace_back_unsorted' do not satisfy container invariants
+		// for being sorted. We provide these overloads explicitly labelled as '_unsorted' as an
+		// optimization opportunity when batch inserting elements so users can defer the cost of
+		// sorting the container once when all elements are contained. This was done to clarify
+		// the intent of code by leaving a trace that a manual call to sort is required.
+		// 
+		template <typename... Args> decltype(auto) push_back_unsorted(Args&&... args)    
+			{ return base_type::push_back(eastl::forward<Args>(args)...); }
+		template <typename... Args> decltype(auto) emplace_back_unsorted(Args&&... args) 
+			{ return base_type::emplace_back(eastl::forward<Args>(args)...); }
+
+	}; // vector_set
+
+
+
+
+
+	///////////////////////////////////////////////////////////////////////
+	// vector_set
+	///////////////////////////////////////////////////////////////////////
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline vector_set<K, C, A, RAC>::vector_set()
+		: base_type(), mCompare(C())
+	{
+		get_allocator().set_name(EASTL_VECTOR_SET_DEFAULT_NAME);
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline vector_set<K, C, A, RAC>::vector_set(const allocator_type& allocator)
+		: base_type(allocator), mCompare(C())
+	{
+		// Empty
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline vector_set<K, C, A, RAC>::vector_set(const key_compare& compare, const allocator_type& allocator)
+		: base_type(allocator), mCompare(compare)
+	{
+		// Empty
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline vector_set<K, C, A, RAC>::vector_set(const this_type& x)
+		: base_type(x), mCompare(x.mCompare)
+	{
+		// Empty
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline vector_set<K, C, A, RAC>::vector_set(this_type&& x)
+		: base_type(eastl::move(x)), mCompare(x.mCompare)
+	{
+		// Empty. Note: x is left with empty contents but its original mValueCompare instead of the default one. 
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline vector_set<K, C, A, RAC>::vector_set(this_type&& x, const allocator_type& allocator)
+		: base_type(eastl::move(x), allocator), mCompare(x.mCompare)
+	{
+		// Empty. Note: x is left with empty contents but its original mValueCompare instead of the default one. 
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline vector_set<K, C, A, RAC>::vector_set(std::initializer_list<value_type> ilist, const key_compare& compare, const allocator_type& allocator)
+		: base_type(allocator), mCompare(compare)
+	{
+		insert(ilist.begin(), ilist.end());
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	template <typename InputIterator>
+	inline vector_set<K, C, A, RAC>::vector_set(InputIterator first, InputIterator last)
+		: base_type(EASTL_VECTOR_SET_DEFAULT_ALLOCATOR), mCompare(key_compare())
+	{
+		insert(first, last);
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	template <typename InputIterator>
+	inline vector_set<K, C, A, RAC>::vector_set(InputIterator first, InputIterator last, const key_compare& compare)
+		: base_type(EASTL_VECTOR_SET_DEFAULT_ALLOCATOR), mCompare(compare)
+	{
+		insert(first, last);
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline vector_set<K, C, A, RAC>&
+	vector_set<K, C, A, RAC>::operator=(const this_type& x)
+	{
+		base_type::operator=(x);
+		mCompare = value_compare(x.mCompare);
+		return *this;
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline vector_set<K, C, A, RAC>&
+	vector_set<K, C, A, RAC>::operator=(this_type&& x)
+	{
+		base_type::operator=(eastl::move(x));
+		eastl::swap(mCompare, x.mCompare);
+		return *this;
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline vector_set<K, C, A, RAC>&
+	vector_set<K, C, A, RAC>::operator=(std::initializer_list<value_type> ilist)
+	{
+		base_type::clear();
+		insert(ilist.begin(), ilist.end());
+		return *this;
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline void vector_set<K, C, A, RAC>::swap(this_type& x)
+	{
+		base_type::swap(x);
+		eastl::swap(mCompare, x.mCompare);
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline const typename vector_set<K, C, A, RAC>::key_compare&
+	vector_set<K, C, A, RAC>::key_comp() const
+	{
+		return mCompare;
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline typename vector_set<K, C, A, RAC>::key_compare&
+	vector_set<K, C, A, RAC>::key_comp()
+	{
+		return mCompare;
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline const typename vector_set<K, C, A, RAC>::value_compare&
+	vector_set<K, C, A, RAC>::value_comp() const
+	{
+		return mCompare;
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline typename vector_set<K, C, A, RAC>::value_compare&
+	vector_set<K, C, A, RAC>::value_comp()
+	{
+		return mCompare;
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	template <class... Args>
+	inline eastl::pair<typename vector_set<K, C, A, RAC>::iterator, bool>
+	vector_set<K, C, A, RAC>::emplace(Args&&... args)
+	{
+		#if EASTL_USE_FORWARD_WORKAROUND
+			auto value = value_type(eastl::forward<Args>(args)...);  // Workaround for compiler bug in VS2013 which results in a compiler internal crash while compiling this code.
+		#else
+			value_type  value(eastl::forward<Args>(args)...);
+		#endif
+
+		return insert(eastl::move(value));
+	}
+
+	template <typename K, typename C, typename A, typename RAC>
+	template <class... Args>
+	inline typename vector_set<K, C, A, RAC>::iterator 
+	vector_set<K, C, A, RAC>::emplace_hint(const_iterator position, Args&&... args)
+	{
+		#if EASTL_USE_FORWARD_WORKAROUND
+			auto value = value_type(eastl::forward<Args>(args)...);  // Workaround for compiler bug in VS2013 which results in a compiler internal crash while compiling this code.
+		#else
+			value_type  value(eastl::forward<Args>(args)...);
+		#endif
+
+		return insert(position, eastl::move(value));
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline eastl::pair<typename vector_set<K, C, A, RAC>::iterator, bool>
+	vector_set<K, C, A, RAC>::insert(const value_type& value)
+	{
+		const iterator itLB(lower_bound(value));
+
+		if((itLB != end()) && !mCompare(value, *itLB))
+			return eastl::pair<iterator, bool>(itLB, false);
+		return eastl::pair<iterator, bool>(base_type::insert(itLB, value), true);
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	template <typename P>
+	inline eastl::pair<typename vector_set<K, C, A, RAC>::iterator, bool>
+	vector_set<K, C, A, RAC>::insert(P&& otherValue)
+	{
+		value_type value(eastl::forward<P>(otherValue));
+		const iterator itLB(lower_bound(value));
+
+		if((itLB != end()) && !mCompare(value, *itLB))
+			return eastl::pair<iterator, bool>(itLB, false);
+		return eastl::pair<iterator, bool>(base_type::insert(itLB, eastl::move(value)), true);
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline typename vector_set<K, C, A, RAC>::iterator 
+	vector_set<K, C, A, RAC>::insert(const_iterator position, const value_type& value)
+	{
+		// We assume that the user knows what he is doing and has supplied us with
+		// a position that is right where value should be inserted (put in front of). 
+		// We do a test to see if the position is correct. If so then we insert, 
+		// if not then we ignore the input position.
+
+		if((position == end()) || mCompare(value, *position))  // If the element at position is greater than value...
+		{
+			if((position == begin()) || mCompare(*(position - 1), value)) // If the element before position is less than value...
+				return base_type::insert(position, value);
+		}
+
+		// In this case we either have an incorrect position or value is already present.
+		// We fall back to the regular insert function. An optimization would be to detect
+		// that the element is already present, but that's only useful if the user supplied
+		// a good position but a present element.
+		const eastl::pair<iterator, bool> result = insert(value);
+
+		return result.first;
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline typename vector_set<K, C, A, RAC>::iterator 
+	vector_set<K, C, A, RAC>::insert(const_iterator position, value_type&& value)
+	{
+		// See the other version of this function for documentation.
+		if((position == end()) || mCompare(value, *position))  // If the element at position is greater than value...
+		{
+			if((position == begin()) || mCompare(*(position - 1), value)) // If the element before position is less than value...
+				return base_type::insert(position, eastl::move(value));
+		}
+
+		const eastl::pair<iterator, bool> result = insert(eastl::move(value));
+
+		return result.first;
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline void vector_set<K, C, A, RAC>::insert(std::initializer_list<value_type> ilist)
+	{
+		insert(ilist.begin(), ilist.end());
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	template <typename InputIterator>
+	inline void vector_set<K, C, A, RAC>::insert(InputIterator first, InputIterator last)
+	{
+		// To consider: Improve the speed of this by getting the length of the 
+		//              input range and resizing our container to that size
+		//              before doing the insertions. We can't use reserve
+		//              because we don't know if we are using a vector or not.
+		//              Alternatively, force the user to do the reservation.
+		// To consider: When inserting values that come from a container
+		//              like this container, use the property that they are 
+		//              known to be sorted and speed up the inserts here.
+		for(; first != last; ++first)
+			insert(*first);
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline typename vector_set<K, C, A, RAC>::iterator 
+	vector_set<K, C, A, RAC>::erase(const_iterator position)
+	{
+		// Note that we return iterator and not void. This allows for more efficient use of 
+		// the container and is consistent with the C++ language defect report #130 (DR 130)
+		return base_type::erase(position);
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline typename vector_set<K, C, A, RAC>::iterator
+	vector_set<K, C, A, RAC>::erase(const_iterator first, const_iterator last)
+	{
+		return base_type::erase(first, last);
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline typename vector_set<K, C, A, RAC>::size_type
+	vector_set<K, C, A, RAC>::erase(const key_type& k)
+	{
+		const iterator it(find(k));
+
+		if(it != end()) // If it exists...
+		{
+			erase(it);
+			return 1;
+		}
+		return 0;
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline typename vector_set<K, C, A, RAC>::reverse_iterator 
+	vector_set<K, C, A, RAC>::erase(const_reverse_iterator position)
+	{
+		return reverse_iterator(base_type::erase((++position).base()));
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline typename vector_set<K, C, A, RAC>::reverse_iterator
+	vector_set<K, C, A, RAC>::erase(const_reverse_iterator first, const_reverse_iterator last)
+	{
+		return reverse_iterator(base_type::erase((++last).base(), (++first).base()));
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline typename vector_set<K, C, A, RAC>::iterator
+	vector_set<K, C, A, RAC>::find(const key_type& k)
+	{
+		const eastl::pair<iterator, iterator> pairIts(equal_range(k));
+		return (pairIts.first != pairIts.second) ? pairIts.first : end();
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline typename vector_set<K, C, A, RAC>::const_iterator
+	vector_set<K, C, A, RAC>::find(const key_type& k) const
+	{
+		const eastl::pair<const_iterator, const_iterator> pairIts(equal_range(k));
+		return (pairIts.first != pairIts.second) ? pairIts.first : end();
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	template <typename U, typename BinaryPredicate>
+	inline typename vector_set<K, C, A, RAC>::iterator
+	vector_set<K, C, A, RAC>::find_as(const U& u, BinaryPredicate predicate)
+	{
+		const eastl::pair<iterator, iterator> pairIts(equal_range(u, predicate));
+		return (pairIts.first != pairIts.second) ? pairIts.first : end();
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	template <typename U, typename BinaryPredicate>
+	inline typename vector_set<K, C, A, RAC>::const_iterator
+	vector_set<K, C, A, RAC>::find_as(const U& u, BinaryPredicate predicate) const
+	{
+		const eastl::pair<const_iterator, const_iterator> pairIts(equal_range(u, predicate));
+		return (pairIts.first != pairIts.second) ? pairIts.first : end();
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline typename vector_set<K, C, A, RAC>::size_type
+	vector_set<K, C, A, RAC>::count(const key_type& k) const
+	{
+		const const_iterator it(find(k));
+		return (it != end()) ? (size_type)1 : (size_type)0;
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline typename vector_set<K, C, A, RAC>::iterator
+	vector_set<K, C, A, RAC>::lower_bound(const key_type& k)
+	{
+		return eastl::lower_bound(begin(), end(), k, mCompare);
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline typename vector_set<K, C, A, RAC>::const_iterator
+	vector_set<K, C, A, RAC>::lower_bound(const key_type& k) const
+	{
+		return eastl::lower_bound(begin(), end(), k, mCompare);
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline typename vector_set<K, C, A, RAC>::iterator
+	vector_set<K, C, A, RAC>::upper_bound(const key_type& k)
+	{
+		return eastl::upper_bound(begin(), end(), k, mCompare);
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline typename vector_set<K, C, A, RAC>::const_iterator
+	vector_set<K, C, A, RAC>::upper_bound(const key_type& k) const
+	{
+		return eastl::upper_bound(begin(), end(), k, mCompare);
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline eastl::pair<typename vector_set<K, C, A, RAC>::iterator, typename vector_set<K, C, A, RAC>::iterator>
+	vector_set<K, C, A, RAC>::equal_range(const key_type& k)
+	{
+		// The resulting range will either be empty or have one element,
+		// so instead of doing two tree searches (one for lower_bound and 
+		// one for upper_bound), we do just lower_bound and see if the 
+		// result is a range of size zero or one.
+		const iterator itLower(lower_bound(k));
+
+		if((itLower == end()) || mCompare(k, *itLower)) // If at the end or if (k is < itLower)...
+			return eastl::pair<iterator, iterator>(itLower, itLower);
+
+		iterator itUpper(itLower);
+		return eastl::pair<iterator, iterator>(itLower, ++itUpper);
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	inline eastl::pair<typename vector_set<K, C, A, RAC>::const_iterator, typename vector_set<K, C, A, RAC>::const_iterator>
+	vector_set<K, C, A, RAC>::equal_range(const key_type& k) const
+	{
+		// The resulting range will either be empty or have one element,
+		// so instead of doing two tree searches (one for lower_bound and 
+		// one for upper_bound), we do just lower_bound and see if the 
+		// result is a range of size zero or one.
+		const const_iterator itLower(lower_bound(k));
+
+		if((itLower == end()) || mCompare(k, *itLower)) // If at the end or if (k is < itLower)...
+			return eastl::pair<const_iterator, const_iterator>(itLower, itLower);
+
+		const_iterator itUpper(itLower);
+		return eastl::pair<const_iterator, const_iterator>(itLower, ++itUpper);
+	}
+
+
+	template <typename K, typename C, typename A, typename RAC>
+	template<typename U, typename BinaryPredicate>
+	inline eastl::pair<typename vector_set<K, C, A, RAC>::iterator, typename vector_set<K, C, A, RAC>::iterator>
+	vector_set<K, C, A, RAC>::equal_range(const U& u, BinaryPredicate predicate)
+	{
+		// The resulting range will either be empty or have one element,
+		// so instead of doing two tree searches (one for lower_bound and 
+		// one for upper_bound), we do just lower_bound and see if the 
+		// result is a range of size zero or one.
+		const iterator itLower(eastl::lower_bound(begin(), end(), u, predicate));
+
+		if((itLower == end()) || predicate(u, *itLower)) // If at the end or if (k is < itLower)...
+			return eastl::pair<iterator, iterator>(itLower, itLower);
+
+		iterator itUpper(itLower);
+		return eastl::pair<iterator, iterator>(itLower, ++itUpper);
+	}
+
+	template <typename K, typename C, typename A, typename RAC>
+	template<typename U, typename BinaryPredicate>
+	inline eastl::pair<typename vector_set<K, C, A, RAC>::const_iterator, typename vector_set<K, C, A, RAC>::const_iterator>
+	vector_set<K, C, A, RAC>::equal_range(const U& u, BinaryPredicate predicate) const
+	{
+		// The resulting range will either be empty or have one element,
+		// so instead of doing two tree searches (one for lower_bound and 
+		// one for upper_bound), we do just lower_bound and see if the 
+		// result is a range of size zero or one.
+		const const_iterator itLower(eastl::lower_bound(begin(), end(), u, predicate));
+
+		if((itLower == end()) || predicate(u, *itLower)) // If at the end or if (k is < itLower)...
+			return eastl::pair<const_iterator, const_iterator>(itLower, itLower);
+
+		const_iterator itUpper(itLower);
+		return eastl::pair<const_iterator, const_iterator>(itLower, ++itUpper);
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// global operators
+	///////////////////////////////////////////////////////////////////////////
+
+	template <typename Key, typename Compare, typename Allocator, typename RandomAccessContainer>
+	inline bool operator==(const vector_set<Key, Compare, Allocator, RandomAccessContainer>& a, 
+						   const vector_set<Key, Compare, Allocator, RandomAccessContainer>& b) 
+	{
+		return (a.size() == b.size()) && eastl::equal(b.begin(), b.end(), a.begin());
+	}
+
+
+	template <typename Key, typename Compare, typename Allocator, typename RandomAccessContainer>
+	inline bool operator<(const vector_set<Key, Compare, Allocator, RandomAccessContainer>& a,
+						  const vector_set<Key, Compare, Allocator, RandomAccessContainer>& b)
+	{
+		return eastl::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end(), a.value_comp());
+	}
+
+
+	template <typename Key, typename Compare, typename Allocator, typename RandomAccessContainer>
+	inline bool operator!=(const vector_set<Key, Compare, Allocator, RandomAccessContainer>& a,
+						   const vector_set<Key, Compare, Allocator, RandomAccessContainer>& b)
+	{
+		return !(a == b);
+	}
+
+
+	template <typename Key, typename Compare, typename Allocator, typename RandomAccessContainer>
+	inline bool operator>(const vector_set<Key, Compare, Allocator, RandomAccessContainer>& a,
+						  const vector_set<Key, Compare, Allocator, RandomAccessContainer>& b)
+	{
+		return b < a;
+	}
+
+
+	template <typename Key, typename Compare, typename Allocator, typename RandomAccessContainer>
+	inline bool operator<=(const vector_set<Key, Compare, Allocator, RandomAccessContainer>& a,
+						   const vector_set<Key, Compare, Allocator, RandomAccessContainer>& b)
+	{
+		return !(b < a);
+	}
+
+
+	template <typename Key, typename Compare, typename Allocator, typename RandomAccessContainer>
+	inline bool operator>=(const vector_set<Key, Compare, Allocator, RandomAccessContainer>& a,
+						   const vector_set<Key, Compare, Allocator, RandomAccessContainer>& b)
+	{
+		return !(a < b);
+	}
+
+
+	template <typename Key, typename Compare, typename Allocator, typename RandomAccessContainer>
+	inline void swap(vector_set<Key, Compare, Allocator, RandomAccessContainer>& a,
+					 vector_set<Key, Compare, Allocator, RandomAccessContainer>& b)
+	{
+		a.swap(b);
+	}
+
+
+} // namespace eastl
+
+
+#endif // Header include guard
+
+
+
+
diff --git a/libkram/eastl/include/EASTL/version.h b/libkram/eastl/include/EASTL/version.h
new file mode 100644
index 00000000..0dee15f3
--- /dev/null
+++ b/libkram/eastl/include/EASTL/version.h
@@ -0,0 +1,15 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+#ifndef EASTL_VERSION_H
+#define EASTL_VERSION_H
+
+#include <EABase/eabase.h>
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once
+#endif
+
+#include <EASTL/internal/config.h>
+
+#endif
diff --git a/libkram/eastl/include/EASTL/weak_ptr.h b/libkram/eastl/include/EASTL/weak_ptr.h
new file mode 100644
index 00000000..42726961
--- /dev/null
+++ b/libkram/eastl/include/EASTL/weak_ptr.h
@@ -0,0 +1,17 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_WEAK_PTR_H
+#define EASTL_WEAK_PTR_H
+
+
+// This header file is deprecated. The implementation has moved:
+#include <EASTL/shared_ptr.h>
+
+
+#endif
+
+
+
diff --git a/libkram/eastl/source/allocator_eastl.cpp b/libkram/eastl/source/allocator_eastl.cpp
new file mode 100644
index 00000000..6b481682
--- /dev/null
+++ b/libkram/eastl/source/allocator_eastl.cpp
@@ -0,0 +1,56 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/allocator.h>
+
+
+///////////////////////////////////////////////////////////////////////////////
+// ReadMe
+//
+// This file implements the default application allocator. 
+// You can replace this allocator.cpp file with a different one,
+// you can define EASTL_USER_DEFINED_ALLOCATOR below to ignore this file,
+// or you can modify the EASTL config.h file to redefine how allocators work.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EASTL_USER_DEFINED_ALLOCATOR // If the user hasn't declared that he has defined an allocator implementation elsewhere...
+
+	namespace eastl
+	{
+
+		/// gDefaultAllocator
+		/// Default global allocator instance. 
+		EASTL_API allocator   gDefaultAllocator;
+		EASTL_API allocator* gpDefaultAllocator = &gDefaultAllocator;
+
+		EASTL_API allocator* GetDefaultAllocator()
+		{
+			return gpDefaultAllocator;
+		}
+
+		EASTL_API allocator* SetDefaultAllocator(allocator* pAllocator)
+		{
+			allocator* const pPrevAllocator = gpDefaultAllocator;
+			gpDefaultAllocator = pAllocator;
+			return pPrevAllocator;
+		}
+
+	} // namespace eastl
+
+
+#endif // EASTL_USER_DEFINED_ALLOCATOR
+
+
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/source/assert.cpp b/libkram/eastl/source/assert.cpp
new file mode 100644
index 00000000..7d32d5ec
--- /dev/null
+++ b/libkram/eastl/source/assert.cpp
@@ -0,0 +1,108 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/string.h>
+#include <EABase/eabase.h>
+
+#if defined(EA_PLATFORM_MICROSOFT)
+	EA_DISABLE_ALL_VC_WARNINGS();
+	#if defined(EA_COMPILER_MSVC)
+		#include <crtdbg.h>
+	#endif
+	#ifndef WIN32_LEAN_AND_MEAN
+		#define WIN32_LEAN_AND_MEAN
+	#endif
+	#include <Windows.h>
+	EA_RESTORE_ALL_VC_WARNINGS();
+#elif defined(EA_PLATFORM_ANDROID)
+	#include <android/log.h>
+#else
+	#include <stdio.h>
+#endif
+
+
+
+
+namespace eastl
+{
+
+	/// gpAssertionFailureFunction
+	/// 
+	/// Global assertion failure function pointer. Set by SetAssertionFailureFunction.
+	/// 
+	EASTL_API EASTL_AssertionFailureFunction gpAssertionFailureFunction        = AssertionFailureFunctionDefault;
+	EASTL_API void*                          gpAssertionFailureFunctionContext = NULL;
+
+
+
+	/// SetAssertionFailureFunction
+	///
+	/// Sets the function called when an assertion fails. If this function is not called
+	/// by the user, a default function will be used. The user may supply a context parameter
+	/// which will be passed back to the user in the function call. This is typically used
+	/// to store a C++ 'this' pointer, though other things are possible.
+	///
+	/// There is no thread safety here, so the user needs to externally make sure that
+	/// this function is not called in a thread-unsafe way. The easiest way to do this is 
+	/// to just call this function once from the main thread on application startup.
+	///
+	EASTL_API void SetAssertionFailureFunction(EASTL_AssertionFailureFunction pAssertionFailureFunction, void* pContext)
+	{
+		gpAssertionFailureFunction        = pAssertionFailureFunction;
+		gpAssertionFailureFunctionContext = pContext;
+	}
+
+
+
+	/// AssertionFailureFunctionDefault
+	///
+	EASTL_API void AssertionFailureFunctionDefault(const char* pExpression, void* /*pContext*/)
+	{
+		#if EASTL_ASSERT_ENABLED
+			#if defined(EA_PLATFORM_MICROSOFT)
+				printf("%s\n", pExpression); // Write the message to stdout
+				if( ::IsDebuggerPresent())
+				{
+					OutputDebugStringA(pExpression);
+				}
+			#elif defined(EA_PLATFORM_ANDROID)
+				__android_log_print(ANDROID_LOG_INFO, "PRINTF", "%s\n", pExpression);
+			#else
+				printf("%s\n", pExpression); // Write the message to stdout, which happens to be the trace view for many console debug machines.
+			#endif
+		#else
+			EA_UNUSED(pExpression);
+		#endif
+
+		EASTL_DEBUG_BREAK();
+	}
+
+
+	/// AssertionFailure
+	///
+	EASTL_API void AssertionFailure(const char* pExpression)
+	{
+		if(gpAssertionFailureFunction)
+			gpAssertionFailureFunction(pExpression, gpAssertionFailureFunctionContext);
+	}
+
+
+} // namespace eastl
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/source/atomic.cpp b/libkram/eastl/source/atomic.cpp
new file mode 100644
index 00000000..38cda307
--- /dev/null
+++ b/libkram/eastl/source/atomic.cpp
@@ -0,0 +1,25 @@
+/////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////////
+
+
+#include <EASTL/atomic.h>
+
+
+namespace eastl
+{
+
+namespace internal
+{
+
+
+static void EastlCompilerBarrierDataDependencyFunc(void*)
+{
+}
+
+volatile CompilerBarrierDataDependencyFuncPtr gCompilerBarrierDataDependencyFunc = &EastlCompilerBarrierDataDependencyFunc;
+
+
+} // namespace internal
+
+} // namespace eastl
diff --git a/libkram/eastl/source/fixed_pool.cpp b/libkram/eastl/source/fixed_pool.cpp
new file mode 100644
index 00000000..73b9be01
--- /dev/null
+++ b/libkram/eastl/source/fixed_pool.cpp
@@ -0,0 +1,70 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#include <EASTL/internal/fixed_pool.h>
+#include <EASTL/fixed_allocator.h>
+
+
+
+namespace eastl
+{
+
+
+	EASTL_API void fixed_pool_base::init(void* pMemory, size_t memorySize, size_t nodeSize,
+							   size_t alignment, size_t /*alignmentOffset*/)
+	{
+		// To do: Support alignmentOffset.
+
+		#if EASTL_FIXED_SIZE_TRACKING_ENABLED
+			mnCurrentSize = 0;
+			mnPeakSize    = 0;
+		#endif
+
+		if(pMemory)
+		{
+			// Assert that alignment is a power of 2 value (e.g. 1, 2, 4, 8, 16, etc.)
+			EASTL_ASSERT((alignment & (alignment - 1)) == 0);
+
+			// Make sure alignment is a valid value.
+			if(alignment < 1)
+				alignment = 1;
+
+			mpNext      = (Link*)(((uintptr_t)pMemory + (alignment - 1)) & ~(alignment - 1));
+			memorySize -= (uintptr_t)mpNext - (uintptr_t)pMemory;
+			pMemory     = mpNext;
+
+			// The node size must be at least as big as a Link, which itself is sizeof(void*).
+			if(nodeSize < sizeof(Link))
+				nodeSize = ((sizeof(Link) + (alignment - 1))) & ~(alignment - 1);
+
+			// If the user passed in a memory size that wasn't a multiple of the node size,
+			// we need to chop down the memory size so that the last node is not a whole node.
+			memorySize = (memorySize / nodeSize) * nodeSize;
+
+			mpCapacity = (Link*)((uintptr_t)pMemory + memorySize);
+			mpHead     = NULL;
+			mnNodeSize = nodeSize;
+		}
+	}
+
+
+} // namespace eastl
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/source/hashtable.cpp b/libkram/eastl/source/hashtable.cpp
new file mode 100644
index 00000000..8d31663c
--- /dev/null
+++ b/libkram/eastl/source/hashtable.cpp
@@ -0,0 +1,177 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#include <EASTL/internal/hashtable.h>
+#include <EASTL/utility.h>
+#include <math.h>  // Not all compilers support <cmath> and std::ceilf(), which we need below.
+#include <stddef.h>
+
+
+EA_DISABLE_VC_WARNING(4267); // 'argument' : conversion from 'size_t' to 'const uint32_t', possible loss of data. This is a bogus warning resulting from a bug in VC++.
+
+
+namespace eastl
+{
+
+	/// gpEmptyBucketArray
+	///
+	/// A shared representation of an empty hash table. This is present so that
+	/// a new empty hashtable allocates no memory. It has two entries, one for 
+	/// the first lone empty (NULL) bucket, and one for the non-NULL trailing sentinel.
+	/// 
+	EASTL_API void* gpEmptyBucketArray[2] = { NULL, (void*)uintptr_t(~0) };
+
+
+
+	/// gPrimeNumberArray
+	///
+	/// This is an array of prime numbers. This is the same set of prime
+	/// numbers suggested by the C++ standard proposal. These are numbers
+	/// which are separated by 8% per entry.
+	/// 
+	/// To consider: Allow the user to specify their own prime number array.
+	///
+	const uint32_t gPrimeNumberArray[] =
+	{
+		2u, 3u, 5u, 7u, 11u, 13u, 17u, 19u, 23u, 29u, 31u,
+		37u, 41u, 43u, 47u, 53u, 59u, 61u, 67u, 71u, 73u, 79u,
+		83u, 89u, 97u, 103u, 109u, 113u, 127u, 137u, 139u, 149u,
+		157u, 167u, 179u, 193u, 199u, 211u, 227u, 241u, 257u,
+		277u, 293u, 313u, 337u, 359u, 383u, 409u, 439u, 467u,
+		503u, 541u, 577u, 619u, 661u, 709u, 761u, 823u, 887u,
+		953u, 1031u, 1109u, 1193u, 1289u, 1381u, 1493u, 1613u,
+		1741u, 1879u, 2029u, 2179u, 2357u, 2549u, 2753u, 2971u,
+		3209u, 3469u, 3739u, 4027u, 4349u, 4703u, 5087u, 5503u,
+		5953u, 6427u, 6949u, 7517u, 8123u, 8783u, 9497u, 10273u,
+		11113u, 12011u, 12983u, 14033u, 15173u, 16411u, 17749u,
+		19183u, 20753u, 22447u, 24281u, 26267u, 28411u, 30727u,
+		33223u, 35933u, 38873u, 42043u, 45481u, 49201u, 53201u,
+		57557u, 62233u, 67307u, 72817u, 78779u, 85229u, 92203u,
+		99733u, 107897u, 116731u, 126271u, 136607u, 147793u,
+		159871u, 172933u, 187091u, 202409u, 218971u, 236897u,
+		256279u, 277261u, 299951u, 324503u, 351061u, 379787u,
+		410857u, 444487u, 480881u, 520241u, 562841u, 608903u,
+		658753u, 712697u, 771049u, 834181u, 902483u, 976369u,
+		1056323u, 1142821u, 1236397u, 1337629u, 1447153u, 1565659u,
+		1693859u, 1832561u, 1982627u, 2144977u, 2320627u, 2510653u,
+		2716249u, 2938679u, 3179303u, 3439651u, 3721303u, 4026031u,
+		4355707u, 4712381u, 5098259u, 5515729u, 5967347u, 6456007u,
+		6984629u, 7556579u, 8175383u, 8844859u, 9569143u, 10352717u,
+		11200489u, 12117689u, 13109983u, 14183539u, 15345007u,
+		16601593u, 17961079u, 19431899u, 21023161u, 22744717u,
+		24607243u, 26622317u, 28802401u, 31160981u, 33712729u,
+		36473443u, 39460231u, 42691603u, 46187573u, 49969847u,
+		54061849u, 58488943u, 63278561u, 68460391u, 74066549u,
+		80131819u, 86693767u, 93793069u, 101473717u, 109783337u,
+		118773397u, 128499677u, 139022417u, 150406843u, 162723577u,
+		176048909u, 190465427u, 206062531u, 222936881u, 241193053u,
+		260944219u, 282312799u, 305431229u, 330442829u, 357502601u,
+		386778277u, 418451333u, 452718089u, 489790921u, 529899637u,
+		573292817u, 620239453u, 671030513u, 725980837u, 785430967u,
+		849749479u, 919334987u, 994618837u, 1076067617u, 1164186217u,
+		1259520799u, 1362662261u, 1474249943u, 1594975441u,
+		1725587117u, 1866894511u, 2019773507u, 2185171673u,
+		2364114217u, 2557710269u, 2767159799u, 2993761039u,
+		3238918481u, 3504151727u, 3791104843u, 4101556399u,
+		4294967291u,
+		4294967291u // Sentinel so we don't have to test result of lower_bound
+	};
+
+
+	/// kPrimeCount
+	///
+	/// The number of prime numbers in gPrimeNumberArray.
+	///
+	const uint32_t kPrimeCount = (sizeof(gPrimeNumberArray) / sizeof(gPrimeNumberArray[0]) - 1);
+
+
+	/// GetPrevBucketCountOnly
+	/// Return a bucket count no greater than nBucketCountHint.
+	///
+	uint32_t prime_rehash_policy::GetPrevBucketCountOnly(uint32_t nBucketCountHint)
+	{
+		const uint32_t nPrime = *(eastl::upper_bound(gPrimeNumberArray, gPrimeNumberArray + kPrimeCount, nBucketCountHint) - 1);
+		return nPrime;
+	}
+
+
+	/// GetPrevBucketCount
+	/// Return a bucket count no greater than nBucketCountHint.
+	/// This function has a side effect of updating mnNextResize.
+	///
+	uint32_t prime_rehash_policy::GetPrevBucketCount(uint32_t nBucketCountHint) const
+	{
+		const uint32_t nPrime = *(eastl::upper_bound(gPrimeNumberArray, gPrimeNumberArray + kPrimeCount, nBucketCountHint) - 1);
+
+		mnNextResize = (uint32_t)ceilf(nPrime * mfMaxLoadFactor);
+		return nPrime;
+	}
+
+
+	/// GetNextBucketCount
+	/// Return a prime no smaller than nBucketCountHint.
+	/// This function has a side effect of updating mnNextResize.
+	///
+	uint32_t prime_rehash_policy::GetNextBucketCount(uint32_t nBucketCountHint) const
+	{
+		const uint32_t nPrime = *eastl::lower_bound(gPrimeNumberArray, gPrimeNumberArray + kPrimeCount, nBucketCountHint);
+
+		mnNextResize = (uint32_t)ceilf(nPrime * mfMaxLoadFactor);
+		return nPrime;
+	}
+
+
+	/// GetBucketCount
+	/// Return the smallest prime p such that alpha p >= nElementCount, where alpha 
+	/// is the load factor. This function has a side effect of updating mnNextResize.
+	///
+	uint32_t prime_rehash_policy::GetBucketCount(uint32_t nElementCount) const
+	{
+		const uint32_t nMinBucketCount = (uint32_t)(nElementCount / mfMaxLoadFactor);
+		const uint32_t nPrime          = *eastl::lower_bound(gPrimeNumberArray, gPrimeNumberArray + kPrimeCount, nMinBucketCount);
+
+		mnNextResize = (uint32_t)ceilf(nPrime * mfMaxLoadFactor);
+		return nPrime;
+	}
+
+
+	/// GetRehashRequired
+	/// Finds the smallest prime p such that alpha p > nElementCount + nElementAdd.
+	/// If p > nBucketCount, return pair<bool, uint32_t>(true, p); otherwise return
+	/// pair<bool, uint32_t>(false, 0). In principle this isn't very different from GetBucketCount.
+	/// This function has a side effect of updating mnNextResize.
+	///
+	eastl::pair<bool, uint32_t>
+	prime_rehash_policy::GetRehashRequired(uint32_t nBucketCount, uint32_t nElementCount, uint32_t nElementAdd) const
+	{
+		if((nElementCount + nElementAdd) > mnNextResize) // It is significant that we specify > next resize and not >= next resize.
+		{
+			if(nBucketCount == 1) // We force rehashing to occur if the bucket count is < 2.
+				nBucketCount = 0;
+
+			float fMinBucketCount = (nElementCount + nElementAdd) / mfMaxLoadFactor;
+
+			if(fMinBucketCount > (float)nBucketCount)
+			{
+				fMinBucketCount       = eastl::max_alt(fMinBucketCount, mfGrowthFactor * nBucketCount);
+				const uint32_t nPrime = *eastl::lower_bound(gPrimeNumberArray, gPrimeNumberArray + kPrimeCount, (uint32_t)fMinBucketCount);
+				mnNextResize          = (uint32_t)ceilf(nPrime * mfMaxLoadFactor);
+
+				return eastl::pair<bool, uint32_t>(true, nPrime);
+			}
+			else
+			{
+				mnNextResize = (uint32_t)ceilf(nBucketCount * mfMaxLoadFactor);
+				return eastl::pair<bool, uint32_t>(false, (uint32_t)0);
+			}
+		}
+
+		return eastl::pair<bool, uint32_t>(false, (uint32_t)0);
+	}
+
+
+} // namespace eastl
+
+EA_RESTORE_VC_WARNING();
diff --git a/libkram/eastl/source/intrusive_list.cpp b/libkram/eastl/source/intrusive_list.cpp
new file mode 100644
index 00000000..c8e8a25b
--- /dev/null
+++ b/libkram/eastl/source/intrusive_list.cpp
@@ -0,0 +1,87 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#include <EASTL/intrusive_list.h>
+
+
+namespace eastl
+{
+
+
+	EASTL_API void intrusive_list_base::reverse() EA_NOEXCEPT
+	{
+		intrusive_list_node* pNode = &mAnchor;
+		do {
+			intrusive_list_node* const pTemp = pNode->mpNext;
+			pNode->mpNext = pNode->mpPrev;
+			pNode->mpPrev = pTemp;
+			pNode         = pNode->mpPrev;
+		} 
+		while(pNode != &mAnchor);
+	}
+
+
+
+	EASTL_API bool intrusive_list_base::validate() const
+	{
+		const intrusive_list_node *p = &mAnchor;
+		const intrusive_list_node *q = p;
+
+		// We do two tests below:
+		//
+		// 1) Prev and next pointers are symmetric. We check (p->next->prev == p)
+		//    for each node, which is enough to verify all links.
+		//
+		// 2) Loop check. We bump the q pointer at one-half rate compared to the
+		//    p pointer; (p == q) if and only if we are at the start (which we
+		//    don't check) or if there is a loop somewhere in the list.
+
+		do {
+			// validate node (even phase)
+			if (p->mpNext->mpPrev != p)
+				return false;               // broken linkage detected
+
+			// bump only fast pointer
+			p = p->mpNext;
+			if (p == &mAnchor)
+				break;
+
+			if (p == q)
+				return false;               // loop detected
+
+			// validate node (odd phase)
+			if (p->mpNext->mpPrev != p)
+				return false;               // broken linkage detected
+
+			// bump both pointers
+			p = p->mpNext;
+			q = q->mpNext;
+
+			if (p == q)
+				return false;               // loop detected
+
+		} while(p != &mAnchor);
+
+		return true;
+	}
+
+
+} // namespace eastl
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/source/numeric_limits.cpp b/libkram/eastl/source/numeric_limits.cpp
new file mode 100644
index 00000000..7b7bf2f8
--- /dev/null
+++ b/libkram/eastl/source/numeric_limits.cpp
@@ -0,0 +1,572 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#include <EASTL/numeric_limits.h>
+
+
+#if EASTL_CUSTOM_FLOAT_CONSTANTS_REQUIRED
+	#include <limits> // See notes below about usage of this header.
+
+	namespace eastl
+	{
+		namespace Internal
+		{
+			// For this platformc/compiler combination we fall back to using std::numeric_limits,
+			// which is available for with most compilers and platforms, though it doesn't necessarily
+			// support the C++11 functionality that we do. However, we need it just for the four 
+			// floating point types. Note that this code isn't used for most EA platforms, as 
+			// most platforms use GCC, clang, VC++ (yvals), or Dinkumware (yvals).
+			// To do: Initialize these values via a means that doesn't depend on std::numeric_limits.
+
+			EASTL_API float       gFloatInfinity      = std::numeric_limits<float>::infinity();
+			EASTL_API float       gFloatNaN           = std::numeric_limits<float>::quiet_NaN();
+			EASTL_API float       gFloatSNaN          = std::numeric_limits<float>::signaling_NaN();
+			EASTL_API float       gFloatDenorm        = std::numeric_limits<float>::denorm_min();
+
+			EASTL_API double      gDoubleInfinity     = std::numeric_limits<double>::infinity();
+			EASTL_API double      gDoubleNaN          = std::numeric_limits<double>::quiet_NaN();
+			EASTL_API double      gDoubleSNaN         = std::numeric_limits<double>::signaling_NaN();
+			EASTL_API double      gDoubleDenorm       = std::numeric_limits<double>::denorm_min();
+
+			EASTL_API long double gLongDoubleInfinity = std::numeric_limits<long double>::infinity();
+			EASTL_API long double gLongDoubleNaN      = std::numeric_limits<long double>::quiet_NaN();
+			EASTL_API long double gLongDoubleSNaN     = std::numeric_limits<long double>::signaling_NaN();
+			EASTL_API long double gLongDoubleDenorm   = std::numeric_limits<long double>::denorm_min();
+		}
+   } 
+#endif
+
+
+#if defined(_MSC_VER)
+	// VC++ has a long-standing bug: it fails to allow the definition of static const member variables
+	// outside the declaration within the class. The C++ Standard actually requires that they be defined
+	// and some other compilers fail to link if they aren't. So we simply don't define the members for VC++.
+	// See the C++ Standard Sec. 9.4.2 paragraph 4, which makes this clear.
+	// http://bytes.com/topic/c/answers/710704-const-static-initialization-visual-studio
+#else
+
+	namespace eastl
+	{
+		namespace Internal
+		{
+			EA_CONSTEXPR_OR_CONST bool               numeric_limits_base::is_specialized;
+			EA_CONSTEXPR_OR_CONST int                numeric_limits_base::digits;
+			EA_CONSTEXPR_OR_CONST int                numeric_limits_base::digits10;
+			EA_CONSTEXPR_OR_CONST int                numeric_limits_base::max_digits10;
+			EA_CONSTEXPR_OR_CONST bool               numeric_limits_base::is_signed;
+			EA_CONSTEXPR_OR_CONST bool               numeric_limits_base::is_integer;
+			EA_CONSTEXPR_OR_CONST bool               numeric_limits_base::is_exact;
+			EA_CONSTEXPR_OR_CONST int                numeric_limits_base::radix;
+			EA_CONSTEXPR_OR_CONST int                numeric_limits_base::min_exponent;
+			EA_CONSTEXPR_OR_CONST int                numeric_limits_base::min_exponent10;
+			EA_CONSTEXPR_OR_CONST int                numeric_limits_base::max_exponent;
+			EA_CONSTEXPR_OR_CONST int                numeric_limits_base::max_exponent10;
+			EA_CONSTEXPR_OR_CONST bool               numeric_limits_base::is_bounded;
+			EA_CONSTEXPR_OR_CONST bool               numeric_limits_base::is_modulo;
+			EA_CONSTEXPR_OR_CONST bool               numeric_limits_base::traps;
+			EA_CONSTEXPR_OR_CONST bool               numeric_limits_base::tinyness_before;
+			EA_CONSTEXPR_OR_CONST float_round_style  numeric_limits_base::round_style;
+			EA_CONSTEXPR_OR_CONST bool               numeric_limits_base::has_infinity;
+			EA_CONSTEXPR_OR_CONST bool               numeric_limits_base::has_quiet_NaN;
+			EA_CONSTEXPR_OR_CONST bool               numeric_limits_base::has_signaling_NaN;
+			EA_CONSTEXPR_OR_CONST float_denorm_style numeric_limits_base::has_denorm;
+			EA_CONSTEXPR_OR_CONST bool               numeric_limits_base::has_denorm_loss;
+			EA_CONSTEXPR_OR_CONST bool               numeric_limits_base::is_iec559;
+		}
+
+		// bool
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<bool>::is_specialized;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<bool>::digits;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<bool>::digits10;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<bool>::is_signed;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<bool>::is_integer;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<bool>::is_exact;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<bool>::radix;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<bool>::min_exponent;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<bool>::min_exponent10;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<bool>::max_exponent;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<bool>::max_exponent10;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<bool>::is_bounded;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<bool>::is_modulo;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<bool>::traps;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<bool>::tinyness_before;
+		EA_CONSTEXPR_OR_CONST float_round_style     numeric_limits<bool>::round_style;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<bool>::has_infinity;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<bool>::has_quiet_NaN;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<bool>::has_signaling_NaN;
+		EA_CONSTEXPR_OR_CONST float_denorm_style    numeric_limits<bool>::has_denorm;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<bool>::has_denorm_loss;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<bool>::is_iec559;
+
+		// char
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<char>::is_specialized;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<char>::digits;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<char>::digits10;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<char>::is_signed;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<char>::is_integer;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<char>::is_exact;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<char>::radix;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<char>::min_exponent;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<char>::min_exponent10;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<char>::max_exponent;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<char>::max_exponent10;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<char>::is_bounded;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<char>::is_modulo;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<char>::traps;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<char>::tinyness_before;
+		EA_CONSTEXPR_OR_CONST float_round_style     numeric_limits<char>::round_style;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<char>::has_infinity;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<char>::has_quiet_NaN;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<char>::has_signaling_NaN;
+		EA_CONSTEXPR_OR_CONST float_denorm_style    numeric_limits<char>::has_denorm;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<char>::has_denorm_loss;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<char>::is_iec559;
+
+		// unsigned char
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned char>::is_specialized;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<unsigned char>::digits;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<unsigned char>::digits10;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned char>::is_signed;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned char>::is_integer;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned char>::is_exact;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<unsigned char>::radix;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<unsigned char>::min_exponent;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<unsigned char>::min_exponent10;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<unsigned char>::max_exponent;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<unsigned char>::max_exponent10;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned char>::is_bounded;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned char>::is_modulo;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned char>::traps;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned char>::tinyness_before;
+		EA_CONSTEXPR_OR_CONST float_round_style     numeric_limits<unsigned char>::round_style;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned char>::has_infinity;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned char>::has_quiet_NaN;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned char>::has_signaling_NaN;
+		EA_CONSTEXPR_OR_CONST float_denorm_style    numeric_limits<unsigned char>::has_denorm;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned char>::has_denorm_loss;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned char>::is_iec559;
+
+		// signed char
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<signed char>::is_specialized;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<signed char>::digits;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<signed char>::digits10;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<signed char>::is_signed;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<signed char>::is_integer;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<signed char>::is_exact;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<signed char>::radix;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<signed char>::min_exponent;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<signed char>::min_exponent10;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<signed char>::max_exponent;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<signed char>::max_exponent10;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<signed char>::is_bounded;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<signed char>::is_modulo;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<signed char>::traps;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<signed char>::tinyness_before;
+		EA_CONSTEXPR_OR_CONST float_round_style     numeric_limits<signed char>::round_style;
+
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<signed char>::has_infinity;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<signed char>::has_quiet_NaN;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<signed char>::has_signaling_NaN;
+		EA_CONSTEXPR_OR_CONST float_denorm_style    numeric_limits<signed char>::has_denorm;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<signed char>::has_denorm_loss;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<signed char>::is_iec559;
+
+		// wchar_t
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<wchar_t>::is_specialized;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<wchar_t>::digits;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<wchar_t>::digits10;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<wchar_t>::is_signed;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<wchar_t>::is_integer;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<wchar_t>::is_exact;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<wchar_t>::radix;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<wchar_t>::min_exponent;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<wchar_t>::min_exponent10;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<wchar_t>::max_exponent;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<wchar_t>::max_exponent10;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<wchar_t>::is_bounded;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<wchar_t>::is_modulo;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<wchar_t>::traps;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<wchar_t>::tinyness_before;
+		EA_CONSTEXPR_OR_CONST float_round_style     numeric_limits<wchar_t>::round_style;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<wchar_t>::has_infinity;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<wchar_t>::has_quiet_NaN;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<wchar_t>::has_signaling_NaN;
+		EA_CONSTEXPR_OR_CONST float_denorm_style    numeric_limits<wchar_t>::has_denorm;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<wchar_t>::has_denorm_loss;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<wchar_t>::is_iec559;
+
+		// char16_t
+		#if EA_CHAR16_NATIVE // If char16_t is a true unique type (as called for by the C++11 Standard)...
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<char16_t>::is_specialized;
+			EA_CONSTEXPR_OR_CONST int                   numeric_limits<char16_t>::digits;
+			EA_CONSTEXPR_OR_CONST int                   numeric_limits<char16_t>::digits10;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<char16_t>::is_signed;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<char16_t>::is_integer;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<char16_t>::is_exact;
+			EA_CONSTEXPR_OR_CONST int                   numeric_limits<char16_t>::radix;
+			EA_CONSTEXPR_OR_CONST int                   numeric_limits<char16_t>::min_exponent;
+			EA_CONSTEXPR_OR_CONST int                   numeric_limits<char16_t>::min_exponent10;
+			EA_CONSTEXPR_OR_CONST int                   numeric_limits<char16_t>::max_exponent;
+			EA_CONSTEXPR_OR_CONST int                   numeric_limits<char16_t>::max_exponent10;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<char16_t>::is_bounded;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<char16_t>::is_modulo;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<char16_t>::traps;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<char16_t>::tinyness_before;
+			EA_CONSTEXPR_OR_CONST float_round_style     numeric_limits<char16_t>::round_style;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<char16_t>::has_infinity;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<char16_t>::has_quiet_NaN;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<char16_t>::has_signaling_NaN;
+			EA_CONSTEXPR_OR_CONST float_denorm_style    numeric_limits<char16_t>::has_denorm;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<char16_t>::has_denorm_loss;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<char16_t>::is_iec559;
+		#endif
+
+		// char32_t
+		#if EA_CHAR32_NATIVE // If char32_t is a true unique type (as called for by the C++11 Standard)...
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<char32_t>::is_specialized;
+			EA_CONSTEXPR_OR_CONST int                   numeric_limits<char32_t>::digits;
+			EA_CONSTEXPR_OR_CONST int                   numeric_limits<char32_t>::digits10;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<char32_t>::is_signed;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<char32_t>::is_integer;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<char32_t>::is_exact;
+			EA_CONSTEXPR_OR_CONST int                   numeric_limits<char32_t>::radix;
+			EA_CONSTEXPR_OR_CONST int                   numeric_limits<char32_t>::min_exponent;
+			EA_CONSTEXPR_OR_CONST int                   numeric_limits<char32_t>::min_exponent10;
+			EA_CONSTEXPR_OR_CONST int                   numeric_limits<char32_t>::max_exponent;
+			EA_CONSTEXPR_OR_CONST int                   numeric_limits<char32_t>::max_exponent10;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<char32_t>::is_bounded;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<char32_t>::is_modulo;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<char32_t>::traps;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<char32_t>::tinyness_before;
+			EA_CONSTEXPR_OR_CONST float_round_style     numeric_limits<char32_t>::round_style;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<char32_t>::has_infinity;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<char32_t>::has_quiet_NaN;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<char32_t>::has_signaling_NaN;
+			EA_CONSTEXPR_OR_CONST float_denorm_style    numeric_limits<char32_t>::has_denorm;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<char32_t>::has_denorm_loss;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<char32_t>::is_iec559;
+		#endif
+	
+		// unsigned short
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned short>::is_specialized;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<unsigned short>::digits;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<unsigned short>::digits10;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned short>::is_signed;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned short>::is_integer;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned short>::is_exact;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<unsigned short>::radix;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<unsigned short>::min_exponent;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<unsigned short>::min_exponent10;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<unsigned short>::max_exponent;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<unsigned short>::max_exponent10;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned short>::is_bounded;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned short>::is_modulo;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned short>::traps;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned short>::tinyness_before;
+		EA_CONSTEXPR_OR_CONST float_round_style     numeric_limits<unsigned short>::round_style;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned short>::has_infinity;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned short>::has_quiet_NaN;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned short>::has_signaling_NaN;
+		EA_CONSTEXPR_OR_CONST float_denorm_style    numeric_limits<unsigned short>::has_denorm;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned short>::has_denorm_loss;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned short>::is_iec559;
+
+		// short
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<short>::is_specialized;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<short>::digits;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<short>::digits10;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<short>::is_signed;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<short>::is_integer;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<short>::is_exact;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<short>::radix;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<short>::min_exponent;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<short>::min_exponent10;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<short>::max_exponent;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<short>::max_exponent10;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<short>::is_bounded;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<short>::is_modulo;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<short>::traps;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<short>::tinyness_before;
+		EA_CONSTEXPR_OR_CONST float_round_style     numeric_limits<short>::round_style;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<short>::has_infinity;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<short>::has_quiet_NaN;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<short>::has_signaling_NaN;
+		EA_CONSTEXPR_OR_CONST float_denorm_style    numeric_limits<short>::has_denorm;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<short>::has_denorm_loss;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<short>::is_iec559;
+
+		// unsigned int
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned int>::is_specialized;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<unsigned int>::digits;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<unsigned int>::digits10;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned int>::is_signed;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned int>::is_integer;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned int>::is_exact;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<unsigned int>::radix;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<unsigned int>::min_exponent;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<unsigned int>::min_exponent10;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<unsigned int>::max_exponent;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<unsigned int>::max_exponent10;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned int>::is_bounded;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned int>::is_modulo;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned int>::traps;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned int>::tinyness_before;
+		EA_CONSTEXPR_OR_CONST float_round_style     numeric_limits<unsigned int>::round_style;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned int>::has_infinity;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned int>::has_quiet_NaN;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned int>::has_signaling_NaN;
+		EA_CONSTEXPR_OR_CONST float_denorm_style    numeric_limits<unsigned int>::has_denorm;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned int>::has_denorm_loss;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned int>::is_iec559;
+
+		// int
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<int>::is_specialized;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<int>::digits;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<int>::digits10;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<int>::is_signed;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<int>::is_integer;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<int>::is_exact;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<int>::radix;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<int>::min_exponent;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<int>::min_exponent10;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<int>::max_exponent;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<int>::max_exponent10;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<int>::is_bounded;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<int>::is_modulo;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<int>::traps;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<int>::tinyness_before;
+		EA_CONSTEXPR_OR_CONST float_round_style     numeric_limits<int>::round_style;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<int>::has_infinity;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<int>::has_quiet_NaN;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<int>::has_signaling_NaN;
+		EA_CONSTEXPR_OR_CONST float_denorm_style    numeric_limits<int>::has_denorm;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<int>::has_denorm_loss;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<int>::is_iec559;
+
+		// unsigned long
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned long>::is_specialized;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<unsigned long>::digits;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<unsigned long>::digits10;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned long>::is_signed;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned long>::is_integer;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned long>::is_exact;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<unsigned long>::radix;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<unsigned long>::min_exponent;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<unsigned long>::min_exponent10;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<unsigned long>::max_exponent;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<unsigned long>::max_exponent10;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned long>::is_bounded;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned long>::is_modulo;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned long>::traps;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned long>::tinyness_before;
+		EA_CONSTEXPR_OR_CONST float_round_style     numeric_limits<unsigned long>::round_style;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned long>::has_infinity;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned long>::has_quiet_NaN;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned long>::has_signaling_NaN;
+		EA_CONSTEXPR_OR_CONST float_denorm_style    numeric_limits<unsigned long>::has_denorm;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned long>::has_denorm_loss;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned long>::is_iec559;
+
+		// long
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<long>::is_specialized;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<long>::digits;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<long>::digits10;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<long>::is_signed;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<long>::is_integer;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<long>::is_exact;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<long>::radix;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<long>::min_exponent;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<long>::min_exponent10;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<long>::max_exponent;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<long>::max_exponent10;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<long>::is_bounded;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<long>::is_modulo;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<long>::traps;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<long>::tinyness_before;
+		EA_CONSTEXPR_OR_CONST float_round_style     numeric_limits<long>::round_style;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<long>::has_infinity;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<long>::has_quiet_NaN;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<long>::has_signaling_NaN;
+		EA_CONSTEXPR_OR_CONST float_denorm_style    numeric_limits<long>::has_denorm;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<long>::has_denorm_loss;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<long>::is_iec559;
+
+		// unsigned long long
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned long long>::is_specialized;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<unsigned long long>::digits;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<unsigned long long>::digits10;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned long long>::is_signed;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned long long>::is_integer;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned long long>::is_exact;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<unsigned long long>::radix;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<unsigned long long>::min_exponent;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<unsigned long long>::min_exponent10;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<unsigned long long>::max_exponent;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<unsigned long long>::max_exponent10;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned long long>::is_bounded;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned long long>::is_modulo;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned long long>::traps;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned long long>::tinyness_before;
+		EA_CONSTEXPR_OR_CONST float_round_style     numeric_limits<unsigned long long>::round_style;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned long long>::has_infinity;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned long long>::has_quiet_NaN;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned long long>::has_signaling_NaN;
+		EA_CONSTEXPR_OR_CONST float_denorm_style    numeric_limits<unsigned long long>::has_denorm;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned long long>::has_denorm_loss;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<unsigned long long>::is_iec559;
+
+		// long long
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<long long>::is_specialized;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<long long>::digits;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<long long>::digits10;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<long long>::is_signed;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<long long>::is_integer;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<long long>::is_exact;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<long long>::radix;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<long long>::min_exponent;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<long long>::min_exponent10;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<long long>::max_exponent;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<long long>::max_exponent10;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<long long>::is_bounded;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<long long>::is_modulo;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<long long>::traps;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<long long>::tinyness_before;
+		EA_CONSTEXPR_OR_CONST float_round_style     numeric_limits<long long>::round_style;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<long long>::has_infinity;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<long long>::has_quiet_NaN;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<long long>::has_signaling_NaN;
+		EA_CONSTEXPR_OR_CONST float_denorm_style    numeric_limits<long long>::has_denorm;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<long long>::has_denorm_loss;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<long long>::is_iec559;
+
+		// __uint128_t
+		#if (EA_COMPILER_INTMAX_SIZE >= 16) && (defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG)) // If __int128_t/__uint128_t is supported...
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<__uint128_t>::is_specialized;
+			EA_CONSTEXPR_OR_CONST int                   numeric_limits<__uint128_t>::digits;
+			EA_CONSTEXPR_OR_CONST int                   numeric_limits<__uint128_t>::digits10;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<__uint128_t>::is_signed;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<__uint128_t>::is_integer;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<__uint128_t>::is_exact;
+			EA_CONSTEXPR_OR_CONST int                   numeric_limits<__uint128_t>::radix;
+			EA_CONSTEXPR_OR_CONST int                   numeric_limits<__uint128_t>::min_exponent;
+			EA_CONSTEXPR_OR_CONST int                   numeric_limits<__uint128_t>::min_exponent10;
+			EA_CONSTEXPR_OR_CONST int                   numeric_limits<__uint128_t>::max_exponent;
+			EA_CONSTEXPR_OR_CONST int                   numeric_limits<__uint128_t>::max_exponent10;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<__uint128_t>::is_bounded;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<__uint128_t>::is_modulo;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<__uint128_t>::traps;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<__uint128_t>::tinyness_before;
+			EA_CONSTEXPR_OR_CONST float_round_style     numeric_limits<__uint128_t>::round_style;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<__uint128_t>::has_infinity;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<__uint128_t>::has_quiet_NaN;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<__uint128_t>::has_signaling_NaN;
+			EA_CONSTEXPR_OR_CONST float_denorm_style    numeric_limits<__uint128_t>::has_denorm;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<__uint128_t>::has_denorm_loss;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<__uint128_t>::is_iec559;
+		#endif
+
+		// __int128_t
+		#if (EA_COMPILER_INTMAX_SIZE >= 16) && (defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG)) // If __int128_t/__uint128_t is supported...
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<__int128_t>::is_specialized;
+			EA_CONSTEXPR_OR_CONST int                   numeric_limits<__int128_t>::digits;
+			EA_CONSTEXPR_OR_CONST int                   numeric_limits<__int128_t>::digits10;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<__int128_t>::is_signed;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<__int128_t>::is_integer;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<__int128_t>::is_exact;
+			EA_CONSTEXPR_OR_CONST int                   numeric_limits<__int128_t>::radix;
+			EA_CONSTEXPR_OR_CONST int                   numeric_limits<__int128_t>::min_exponent;
+			EA_CONSTEXPR_OR_CONST int                   numeric_limits<__int128_t>::min_exponent10;
+			EA_CONSTEXPR_OR_CONST int                   numeric_limits<__int128_t>::max_exponent;
+			EA_CONSTEXPR_OR_CONST int                   numeric_limits<__int128_t>::max_exponent10;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<__int128_t>::is_bounded;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<__int128_t>::is_modulo;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<__int128_t>::traps;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<__int128_t>::tinyness_before;
+			EA_CONSTEXPR_OR_CONST float_round_style     numeric_limits<__int128_t>::round_style;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<__int128_t>::has_infinity;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<__int128_t>::has_quiet_NaN;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<__int128_t>::has_signaling_NaN;
+			EA_CONSTEXPR_OR_CONST float_denorm_style    numeric_limits<__int128_t>::has_denorm;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<__int128_t>::has_denorm_loss;
+			EA_CONSTEXPR_OR_CONST bool                  numeric_limits<__int128_t>::is_iec559;
+		#endif
+
+		// float
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<float>::is_specialized;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<float>::digits;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<float>::digits10;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<float>::is_signed;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<float>::is_integer;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<float>::is_exact;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<float>::radix;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<float>::min_exponent;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<float>::min_exponent10;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<float>::max_exponent;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<float>::max_exponent10;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<float>::is_bounded;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<float>::is_modulo;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<float>::traps;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<float>::tinyness_before;
+		EA_CONSTEXPR_OR_CONST float_round_style     numeric_limits<float>::round_style;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<float>::has_infinity;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<float>::has_quiet_NaN;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<float>::has_signaling_NaN;
+		EA_CONSTEXPR_OR_CONST float_denorm_style    numeric_limits<float>::has_denorm;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<float>::has_denorm_loss;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<float>::is_iec559;
+
+		// double
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<double>::is_specialized;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<double>::digits;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<double>::digits10;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<double>::is_signed;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<double>::is_integer;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<double>::is_exact;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<double>::radix;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<double>::min_exponent;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<double>::min_exponent10;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<double>::max_exponent;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<double>::max_exponent10;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<double>::is_bounded;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<double>::is_modulo;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<double>::traps;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<double>::tinyness_before;
+		EA_CONSTEXPR_OR_CONST float_round_style     numeric_limits<double>::round_style;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<double>::has_infinity;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<double>::has_quiet_NaN;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<double>::has_signaling_NaN;
+		EA_CONSTEXPR_OR_CONST float_denorm_style    numeric_limits<double>::has_denorm;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<double>::has_denorm_loss;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<double>::is_iec559;
+
+		// long double
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<long double>::is_specialized;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<long double>::digits;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<long double>::digits10;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<long double>::is_signed;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<long double>::is_integer;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<long double>::is_exact;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<long double>::radix;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<long double>::min_exponent;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<long double>::min_exponent10;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<long double>::max_exponent;
+		EA_CONSTEXPR_OR_CONST int                   numeric_limits<long double>::max_exponent10;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<long double>::is_bounded;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<long double>::is_modulo;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<long double>::traps;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<long double>::tinyness_before;
+		EA_CONSTEXPR_OR_CONST float_round_style     numeric_limits<long double>::round_style;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<long double>::has_infinity;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<long double>::has_quiet_NaN;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<long double>::has_signaling_NaN;
+		EA_CONSTEXPR_OR_CONST float_denorm_style    numeric_limits<long double>::has_denorm;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<long double>::has_denorm_loss;
+		EA_CONSTEXPR_OR_CONST bool                  numeric_limits<long double>::is_iec559;
+
+	} // namespace eastl
+
+#endif // (VC++ 2010 or earlier)
+
+
diff --git a/libkram/eastl/source/red_black_tree.cpp b/libkram/eastl/source/red_black_tree.cpp
new file mode 100644
index 00000000..d9797b93
--- /dev/null
+++ b/libkram/eastl/source/red_black_tree.cpp
@@ -0,0 +1,518 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+
+///////////////////////////////////////////////////////////////////////////////
+// The tree insert and erase functions below are based on the original 
+// HP STL tree functions. Use of these functions was been approved by
+// EA legal on November 4, 2005 and the approval documentation is available
+// from the EASTL maintainer or from the EA legal deparatment on request.
+// 
+// Copyright (c) 1994
+// Hewlett-Packard Company
+// 
+// Permission to use, copy, modify, distribute and sell this software
+// and its documentation for any purpose is hereby granted without fee,
+// provided that the above copyright notice appear in all copies and
+// that both that copyright notice and this permission notice appear
+// in supporting documentation. Hewlett-Packard Company makes no
+// representations about the suitability of this software for any
+// purpose. It is provided "as is" without express or implied warranty.
+///////////////////////////////////////////////////////////////////////////////
+
+
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/internal/red_black_tree.h>
+#include <stddef.h>
+
+
+
+namespace eastl
+{
+	// Forward declarations
+	rbtree_node_base* RBTreeRotateLeft(rbtree_node_base* pNode, rbtree_node_base* pNodeRoot);
+	rbtree_node_base* RBTreeRotateRight(rbtree_node_base* pNode, rbtree_node_base* pNodeRoot);
+
+
+
+	/// RBTreeIncrement
+	/// Returns the next item in a sorted red-black tree.
+	///
+	EASTL_API rbtree_node_base* RBTreeIncrement(const rbtree_node_base* pNode)
+	{
+		if(pNode->mpNodeRight) 
+		{
+			pNode = pNode->mpNodeRight;
+
+			while(pNode->mpNodeLeft)
+				pNode = pNode->mpNodeLeft;
+		}
+		else 
+		{
+			rbtree_node_base* pNodeTemp = pNode->mpNodeParent;
+
+			while(pNode == pNodeTemp->mpNodeRight) 
+			{
+				pNode = pNodeTemp;
+				pNodeTemp = pNodeTemp->mpNodeParent;
+			}
+
+			if(pNode->mpNodeRight != pNodeTemp)
+				pNode = pNodeTemp;
+		}
+
+		return const_cast<rbtree_node_base*>(pNode);
+	}
+
+
+
+	/// RBTreeIncrement
+	/// Returns the previous item in a sorted red-black tree.
+	///
+	EASTL_API rbtree_node_base* RBTreeDecrement(const rbtree_node_base* pNode)
+	{
+		if((pNode->mpNodeParent->mpNodeParent == pNode) && (pNode->mColor == kRBTreeColorRed))
+			return pNode->mpNodeRight;
+		else if(pNode->mpNodeLeft)
+		{
+			rbtree_node_base* pNodeTemp = pNode->mpNodeLeft;
+
+			while(pNodeTemp->mpNodeRight)
+				pNodeTemp = pNodeTemp->mpNodeRight;
+
+			return pNodeTemp;
+		}
+
+		rbtree_node_base* pNodeTemp = pNode->mpNodeParent;
+
+		while(pNode == pNodeTemp->mpNodeLeft) 
+		{
+			pNode     = pNodeTemp;
+			pNodeTemp = pNodeTemp->mpNodeParent;
+		}
+
+		return const_cast<rbtree_node_base*>(pNodeTemp);
+	}
+
+
+
+	/// RBTreeGetBlackCount
+	/// Counts the number of black nodes in an red-black tree, from pNode down to the given bottom node.  
+	/// We don't count red nodes because red-black trees don't really care about
+	/// red node counts; it is black node counts that are significant in the 
+	/// maintenance of a balanced tree.
+	///
+	EASTL_API size_t RBTreeGetBlackCount(const rbtree_node_base* pNodeTop, const rbtree_node_base* pNodeBottom)
+	{
+		size_t nCount = 0;
+
+		for(; pNodeBottom; pNodeBottom = pNodeBottom->mpNodeParent)
+		{
+			if(pNodeBottom->mColor == kRBTreeColorBlack) 
+				++nCount;
+
+			if(pNodeBottom == pNodeTop) 
+				break;
+		}
+
+		return nCount;
+	}
+
+
+	/// RBTreeRotateLeft
+	/// Does a left rotation about the given node. 
+	/// If you want to understand tree rotation, any book on algorithms will
+	/// discuss the topic in detail.
+	///
+	rbtree_node_base* RBTreeRotateLeft(rbtree_node_base* pNode, rbtree_node_base* pNodeRoot)
+	{
+		rbtree_node_base* const pNodeTemp = pNode->mpNodeRight;
+
+		pNode->mpNodeRight = pNodeTemp->mpNodeLeft;
+
+		if(pNodeTemp->mpNodeLeft)
+			pNodeTemp->mpNodeLeft->mpNodeParent = pNode;
+		pNodeTemp->mpNodeParent = pNode->mpNodeParent;
+		
+		if(pNode == pNodeRoot)
+			pNodeRoot = pNodeTemp;
+		else if(pNode == pNode->mpNodeParent->mpNodeLeft)
+			pNode->mpNodeParent->mpNodeLeft = pNodeTemp;
+		else
+			pNode->mpNodeParent->mpNodeRight = pNodeTemp;
+
+		pNodeTemp->mpNodeLeft = pNode;
+		pNode->mpNodeParent = pNodeTemp;
+
+		return pNodeRoot;
+	}
+
+
+
+	/// RBTreeRotateRight
+	/// Does a right rotation about the given node. 
+	/// If you want to understand tree rotation, any book on algorithms will
+	/// discuss the topic in detail.
+	///
+	rbtree_node_base* RBTreeRotateRight(rbtree_node_base* pNode, rbtree_node_base* pNodeRoot)
+	{
+		rbtree_node_base* const pNodeTemp = pNode->mpNodeLeft;
+
+		pNode->mpNodeLeft = pNodeTemp->mpNodeRight;
+
+		if(pNodeTemp->mpNodeRight)
+			pNodeTemp->mpNodeRight->mpNodeParent = pNode;
+		pNodeTemp->mpNodeParent = pNode->mpNodeParent;
+
+		if(pNode == pNodeRoot)
+			pNodeRoot = pNodeTemp;
+		else if(pNode == pNode->mpNodeParent->mpNodeRight)
+			pNode->mpNodeParent->mpNodeRight = pNodeTemp;
+		else
+			pNode->mpNodeParent->mpNodeLeft = pNodeTemp;
+
+		pNodeTemp->mpNodeRight = pNode;
+		pNode->mpNodeParent = pNodeTemp;
+
+		return pNodeRoot;
+	}
+
+
+
+
+	/// RBTreeInsert
+	/// Insert a node into the tree and rebalance the tree as a result of the 
+	/// disturbance the node introduced.
+	///
+	EASTL_API void RBTreeInsert(rbtree_node_base* pNode,
+								rbtree_node_base* pNodeParent, 
+								rbtree_node_base* pNodeAnchor,
+								RBTreeSide insertionSide)
+	{
+		rbtree_node_base*& pNodeRootRef = pNodeAnchor->mpNodeParent;
+
+		// Initialize fields in new node to insert.
+		pNode->mpNodeParent = pNodeParent;
+		pNode->mpNodeRight  = NULL;
+		pNode->mpNodeLeft   = NULL;
+		pNode->mColor       = kRBTreeColorRed;
+
+		// Insert the node.
+		if(insertionSide == kRBTreeSideLeft)
+		{
+			pNodeParent->mpNodeLeft = pNode; // Also makes (leftmost = pNode) when (pNodeParent == pNodeAnchor)
+
+			if(pNodeParent == pNodeAnchor)
+			{
+				pNodeAnchor->mpNodeParent = pNode;
+				pNodeAnchor->mpNodeRight = pNode;
+			}
+			else if(pNodeParent == pNodeAnchor->mpNodeLeft)
+				pNodeAnchor->mpNodeLeft = pNode; // Maintain leftmost pointing to min node
+		}
+		else
+		{
+			pNodeParent->mpNodeRight = pNode;
+
+			if(pNodeParent == pNodeAnchor->mpNodeRight)
+				pNodeAnchor->mpNodeRight = pNode; // Maintain rightmost pointing to max node
+		}
+
+		// Rebalance the tree.
+		while((pNode != pNodeRootRef) && (pNode->mpNodeParent->mColor == kRBTreeColorRed)) 
+		{
+			EA_ANALYSIS_ASSUME(pNode->mpNodeParent != NULL);
+			rbtree_node_base* const pNodeParentParent = pNode->mpNodeParent->mpNodeParent;
+
+			if(pNode->mpNodeParent == pNodeParentParent->mpNodeLeft) 
+			{
+				rbtree_node_base* const pNodeTemp = pNodeParentParent->mpNodeRight;
+
+				if(pNodeTemp && (pNodeTemp->mColor == kRBTreeColorRed)) 
+				{
+					pNode->mpNodeParent->mColor = kRBTreeColorBlack;
+					pNodeTemp->mColor = kRBTreeColorBlack;
+					pNodeParentParent->mColor = kRBTreeColorRed;
+					pNode = pNodeParentParent;
+				}
+				else 
+				{
+					if(pNode->mpNodeParent && pNode == pNode->mpNodeParent->mpNodeRight) 
+					{
+						pNode = pNode->mpNodeParent;
+						pNodeRootRef = RBTreeRotateLeft(pNode, pNodeRootRef);
+					}
+
+					EA_ANALYSIS_ASSUME(pNode->mpNodeParent != NULL);
+					pNode->mpNodeParent->mColor = kRBTreeColorBlack;
+					pNodeParentParent->mColor = kRBTreeColorRed;
+					pNodeRootRef = RBTreeRotateRight(pNodeParentParent, pNodeRootRef);
+				}
+			}
+			else 
+			{
+				rbtree_node_base* const pNodeTemp = pNodeParentParent->mpNodeLeft;
+
+				if(pNodeTemp && (pNodeTemp->mColor == kRBTreeColorRed)) 
+				{
+					pNode->mpNodeParent->mColor = kRBTreeColorBlack;
+					pNodeTemp->mColor = kRBTreeColorBlack;
+					pNodeParentParent->mColor = kRBTreeColorRed;
+					pNode = pNodeParentParent;
+				}
+				else 
+				{
+					EA_ANALYSIS_ASSUME(pNode != NULL && pNode->mpNodeParent != NULL);
+
+					if(pNode == pNode->mpNodeParent->mpNodeLeft) 
+					{
+						pNode = pNode->mpNodeParent;
+						pNodeRootRef = RBTreeRotateRight(pNode, pNodeRootRef);
+					}
+
+					pNode->mpNodeParent->mColor = kRBTreeColorBlack;
+					pNodeParentParent->mColor = kRBTreeColorRed;
+					pNodeRootRef = RBTreeRotateLeft(pNodeParentParent, pNodeRootRef);
+				}
+			}
+		}
+
+		EA_ANALYSIS_ASSUME(pNodeRootRef != NULL);
+		pNodeRootRef->mColor = kRBTreeColorBlack;
+
+	} // RBTreeInsert
+
+
+
+
+	/// RBTreeErase
+	/// Erase a node from the tree.
+	///
+	EASTL_API void RBTreeErase(rbtree_node_base* pNode, rbtree_node_base* pNodeAnchor)
+	{
+		rbtree_node_base*& pNodeRootRef      = pNodeAnchor->mpNodeParent;
+		rbtree_node_base*& pNodeLeftmostRef  = pNodeAnchor->mpNodeLeft;
+		rbtree_node_base*& pNodeRightmostRef = pNodeAnchor->mpNodeRight;
+		rbtree_node_base*  pNodeSuccessor    = pNode;
+		rbtree_node_base*  pNodeChild        = NULL;
+		rbtree_node_base*  pNodeChildParent  = NULL;
+
+		if(pNodeSuccessor->mpNodeLeft == NULL)         // pNode has at most one non-NULL child.
+			pNodeChild = pNodeSuccessor->mpNodeRight;  // pNodeChild might be null.
+		else if(pNodeSuccessor->mpNodeRight == NULL)   // pNode has exactly one non-NULL child.
+			pNodeChild = pNodeSuccessor->mpNodeLeft;   // pNodeChild is not null.
+		else 
+		{
+			// pNode has two non-null children. Set pNodeSuccessor to pNode's successor. pNodeChild might be NULL.
+			pNodeSuccessor = pNodeSuccessor->mpNodeRight;
+
+			while(pNodeSuccessor->mpNodeLeft)
+				pNodeSuccessor = pNodeSuccessor->mpNodeLeft;
+
+			pNodeChild = pNodeSuccessor->mpNodeRight;
+		}
+
+		// Here we remove pNode from the tree and fix up the node pointers appropriately around it.
+		if(pNodeSuccessor == pNode) // If pNode was a leaf node (had both NULL children)...
+		{
+			pNodeChildParent = pNodeSuccessor->mpNodeParent;  // Assign pNodeReplacement's parent.
+
+			if(pNodeChild) 
+				pNodeChild->mpNodeParent = pNodeSuccessor->mpNodeParent;
+
+			if(pNode == pNodeRootRef) // If the node being deleted is the root node...
+				pNodeRootRef = pNodeChild; // Set the new root node to be the pNodeReplacement.
+			else 
+			{
+				if(pNode == pNode->mpNodeParent->mpNodeLeft) // If pNode is a left node...
+					pNode->mpNodeParent->mpNodeLeft  = pNodeChild;  // Make pNode's replacement node be on the same side.
+				else
+					pNode->mpNodeParent->mpNodeRight = pNodeChild;
+				// Now pNode is disconnected from the bottom of the tree (recall that in this pathway pNode was determined to be a leaf).
+			}
+
+			if(pNode == pNodeLeftmostRef) // If pNode is the tree begin() node...
+			{
+				// Because pNode is the tree begin(), pNode->mpNodeLeft must be NULL.
+				// Here we assign the new begin() (first node).
+				if(pNode->mpNodeRight && pNodeChild)
+				{
+					EASTL_ASSERT(pNodeChild != NULL); // Logically pNodeChild should always be valid.
+					pNodeLeftmostRef = RBTreeGetMinChild(pNodeChild); 
+				}
+				else
+					pNodeLeftmostRef = pNode->mpNodeParent; // This  makes (pNodeLeftmostRef == end()) if (pNode == root node)
+			}
+
+			if(pNode == pNodeRightmostRef) // If pNode is the tree last (rbegin()) node...
+			{
+				// Because pNode is the tree rbegin(), pNode->mpNodeRight must be NULL.
+				// Here we assign the new rbegin() (last node)
+				if(pNode->mpNodeLeft && pNodeChild)
+				{
+					EASTL_ASSERT(pNodeChild != NULL); // Logically pNodeChild should always be valid.
+					pNodeRightmostRef = RBTreeGetMaxChild(pNodeChild);
+				}
+				else // pNodeChild == pNode->mpNodeLeft
+					pNodeRightmostRef = pNode->mpNodeParent; // makes pNodeRightmostRef == &mAnchor if pNode == pNodeRootRef
+			}
+		}
+		else // else (pNodeSuccessor != pNode)
+		{
+			// Relink pNodeSuccessor in place of pNode. pNodeSuccessor is pNode's successor.
+			// We specifically set pNodeSuccessor to be on the right child side of pNode, so fix up the left child side.
+			pNode->mpNodeLeft->mpNodeParent = pNodeSuccessor; 
+			pNodeSuccessor->mpNodeLeft = pNode->mpNodeLeft;
+
+			if(pNodeSuccessor == pNode->mpNodeRight) // If pNode's successor was at the bottom of the tree... (yes that's effectively what this statement means)
+				pNodeChildParent = pNodeSuccessor; // Assign pNodeReplacement's parent.
+			else
+			{
+				pNodeChildParent = pNodeSuccessor->mpNodeParent;
+
+				if(pNodeChild)
+					pNodeChild->mpNodeParent = pNodeChildParent;
+
+				pNodeChildParent->mpNodeLeft = pNodeChild;
+
+				pNodeSuccessor->mpNodeRight = pNode->mpNodeRight;
+				pNode->mpNodeRight->mpNodeParent = pNodeSuccessor;
+			}
+
+			if(pNode == pNodeRootRef)
+				pNodeRootRef = pNodeSuccessor;
+			else if(pNode == pNode->mpNodeParent->mpNodeLeft)
+				pNode->mpNodeParent->mpNodeLeft = pNodeSuccessor;
+			else 
+				pNode->mpNodeParent->mpNodeRight = pNodeSuccessor;
+
+			// Now pNode is disconnected from the tree.
+
+			pNodeSuccessor->mpNodeParent = pNode->mpNodeParent;
+			eastl::swap(pNodeSuccessor->mColor, pNode->mColor);
+		}
+
+		// Here we do tree balancing as per the conventional red-black tree algorithm.
+		if(pNode->mColor == kRBTreeColorBlack) 
+		{ 
+			while((pNodeChild != pNodeRootRef) && ((pNodeChild == NULL) || (pNodeChild->mColor == kRBTreeColorBlack)))
+			{
+				if(pNodeChild == pNodeChildParent->mpNodeLeft) 
+				{
+					rbtree_node_base* pNodeTemp = pNodeChildParent->mpNodeRight;
+
+					if(pNodeTemp->mColor == kRBTreeColorRed) 
+					{
+						pNodeTemp->mColor = kRBTreeColorBlack;
+						pNodeChildParent->mColor = kRBTreeColorRed;
+						pNodeRootRef = RBTreeRotateLeft(pNodeChildParent, pNodeRootRef);
+						pNodeTemp = pNodeChildParent->mpNodeRight;
+					}
+
+					if(((pNodeTemp->mpNodeLeft  == NULL) || (pNodeTemp->mpNodeLeft->mColor  == kRBTreeColorBlack)) &&
+						((pNodeTemp->mpNodeRight == NULL) || (pNodeTemp->mpNodeRight->mColor == kRBTreeColorBlack))) 
+					{
+						pNodeTemp->mColor = kRBTreeColorRed;
+						pNodeChild = pNodeChildParent;
+						pNodeChildParent = pNodeChildParent->mpNodeParent;
+					} 
+					else 
+					{
+						if((pNodeTemp->mpNodeRight == NULL) || (pNodeTemp->mpNodeRight->mColor == kRBTreeColorBlack)) 
+						{
+							pNodeTemp->mpNodeLeft->mColor = kRBTreeColorBlack;
+							pNodeTemp->mColor = kRBTreeColorRed;
+							pNodeRootRef = RBTreeRotateRight(pNodeTemp, pNodeRootRef);
+							pNodeTemp = pNodeChildParent->mpNodeRight;
+						}
+
+						pNodeTemp->mColor = pNodeChildParent->mColor;
+						pNodeChildParent->mColor = kRBTreeColorBlack;
+
+						if(pNodeTemp->mpNodeRight) 
+							pNodeTemp->mpNodeRight->mColor = kRBTreeColorBlack;
+
+						pNodeRootRef = RBTreeRotateLeft(pNodeChildParent, pNodeRootRef);
+						break;
+					}
+				} 
+				else 
+				{   
+					// The following is the same as above, with mpNodeRight <-> mpNodeLeft.
+					rbtree_node_base* pNodeTemp = pNodeChildParent->mpNodeLeft;
+
+					if(pNodeTemp->mColor == kRBTreeColorRed) 
+					{
+						pNodeTemp->mColor        = kRBTreeColorBlack;
+						pNodeChildParent->mColor = kRBTreeColorRed;
+
+						pNodeRootRef = RBTreeRotateRight(pNodeChildParent, pNodeRootRef);
+						pNodeTemp = pNodeChildParent->mpNodeLeft;
+					}
+
+					if(((pNodeTemp->mpNodeRight == NULL) || (pNodeTemp->mpNodeRight->mColor == kRBTreeColorBlack)) &&
+						((pNodeTemp->mpNodeLeft  == NULL) || (pNodeTemp->mpNodeLeft->mColor  == kRBTreeColorBlack))) 
+					{
+						pNodeTemp->mColor = kRBTreeColorRed;
+						pNodeChild       = pNodeChildParent;
+						pNodeChildParent = pNodeChildParent->mpNodeParent;
+					} 
+					else 
+					{
+						if((pNodeTemp->mpNodeLeft == NULL) || (pNodeTemp->mpNodeLeft->mColor == kRBTreeColorBlack)) 
+						{
+							pNodeTemp->mpNodeRight->mColor = kRBTreeColorBlack;
+							pNodeTemp->mColor              = kRBTreeColorRed;
+
+							pNodeRootRef = RBTreeRotateLeft(pNodeTemp, pNodeRootRef);
+							pNodeTemp = pNodeChildParent->mpNodeLeft;
+						}
+
+						pNodeTemp->mColor = pNodeChildParent->mColor;
+						pNodeChildParent->mColor = kRBTreeColorBlack;
+
+						if(pNodeTemp->mpNodeLeft) 
+							pNodeTemp->mpNodeLeft->mColor = kRBTreeColorBlack;
+
+						pNodeRootRef = RBTreeRotateRight(pNodeChildParent, pNodeRootRef);
+						break;
+					}
+				}
+			}
+
+			if(pNodeChild)
+				pNodeChild->mColor = kRBTreeColorBlack;
+		}
+
+	} // RBTreeErase
+
+
+
+} // namespace eastl
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/source/string.cpp b/libkram/eastl/source/string.cpp
new file mode 100644
index 00000000..ae73f114
--- /dev/null
+++ b/libkram/eastl/source/string.cpp
@@ -0,0 +1,464 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/string.h>
+#include <EABase/eabase.h>
+#include <string.h>
+
+
+namespace eastl
+{
+	///////////////////////////////////////////////////////////////////////////////
+	// Converters for DecodePart
+	//
+	// For some decent documentation about conversions, see:
+	//     http://tidy.sourceforge.net/cgi-bin/lxr/source/src/utf8.c
+	//     
+	///////////////////////////////////////////////////////////////////////////////
+
+	// Requires that pDest have a capacity of at least 6 chars.
+	// Sets pResult to '\1' in the case that c is an invalid UCS4 char.
+	inline bool UCS4ToUTF8(uint32_t c, char*& pResult)
+	{
+		if(c < 0x00000080)
+			*pResult++ = (char)(uint8_t)c;
+		else if(c < 0x0800)
+		{
+			*pResult++ = (char)(uint8_t)(0xC0 | (c >> 6));
+			*pResult++ = (char)(uint8_t)(0x80 | (c & 0x3F));
+		}
+		else if(c <= 0x0000FFFF)
+		{
+			*pResult++ = (char)(uint8_t)(0xE0 | (c >> 12));
+			*pResult++ = (char)(uint8_t)(0x80 | ((c >> 6) & 0x3F));
+			*pResult++ = (char)(uint8_t)(0x80 | (c & 0x3F));
+		}
+		else if(c <= 0x001FFFFF)
+		{
+			*pResult++ = (char)(uint8_t)(0xF0 | (c >> 18));
+			*pResult++ = (char)(uint8_t)(0x80 | ((c >> 12) & 0x3F));
+			*pResult++ = (char)(uint8_t)(0x80 | ((c >> 6) & 0x3F));
+			*pResult++ = (char)(uint8_t)(0x80 | (c & 0x3F));
+		}
+		else if(c <= 0x003FFFFFF)
+		{
+			*pResult++ = (char)(uint8_t)(0xF8 | (c >> 24));
+			*pResult++ = (char)(uint8_t)(0x80 | (c >> 18));
+			*pResult++ = (char)(uint8_t)(0x80 | ((c >> 12) & 0x3F));
+			*pResult++ = (char)(uint8_t)(0x80 | ((c >> 6) & 0x3F));
+			*pResult++ = (char)(uint8_t)(0x80 | (c & 0x3F));
+		}
+		else if(c <= 0x7FFFFFFF)
+		{
+			*pResult++ = (char)(uint8_t)(0xFC | (c >> 30));
+			*pResult++ = (char)(uint8_t)(0x80 | ((c >> 24) & 0x3F));
+			*pResult++ = (char)(uint8_t)(0x80 | ((c >> 18) & 0x3F));
+			*pResult++ = (char)(uint8_t)(0x80 | ((c >> 12) & 0x3F));
+			*pResult++ = (char)(uint8_t)(0x80 | ((c >> 6) & 0x3F));
+			*pResult++ = (char)(uint8_t)(0x80 | (c & 0x3F));
+		}
+		else
+		{
+			// values >= 0x80000000 can't be converted to UTF8.
+			*pResult++ = '\1';
+			return false;
+		}
+
+		return true;
+	}
+
+
+	// Requires that pResult have a capacity of at least 3 chars.
+	// Sets pResult to '\1' in the case that c is an invalid UCS4 char.
+	inline bool UCS2ToUTF8(uint16_t c, char*& pResult)
+	{
+		return UCS4ToUTF8(c, pResult);
+	}
+
+
+	// Sets result to 0xffff in the case that the input UTF8 sequence is bad.
+	// 32 bit 0xffffffff is an invalid UCS4 code point, so we can't use that as an error return value.
+	inline bool UTF8ToUCS4(const char*& p, const char* pEnd, uint32_t& result)
+	{
+		// This could likely be implemented in a faster-executing way that uses tables.
+
+		bool           success = true;
+		uint32_t       c = 0xffff;
+		const char* pNext = NULL;
+
+		if(p < pEnd)
+		{
+			uint8_t cChar0((uint8_t)*p), cChar1, cChar2, cChar3;
+
+			// Asserts are disabled because we don't necessarily want to interrupt runtime execution due to this.
+			// EASTL_ASSERT((cChar0 != 0xFE) && (cChar0 != 0xFF));     //  No byte can be 0xFE or 0xFF
+			// Code below will effectively catch this error as it goes.
+
+			if(cChar0 < 0x80)
+			{
+				pNext = p + 1;
+				c     = cChar0;
+			}
+			else
+			{
+				//EASTL_ASSERT((cChar0 & 0xC0) == 0xC0);              //  The top two bits need to be equal to 1
+				if((cChar0 & 0xC0) != 0xC0)
+				{
+					success = false;
+					goto Failure;
+				}
+
+				if((cChar0 & 0xE0) == 0xC0)
+				{
+					pNext = p + 2;
+
+					if(pNext <= pEnd)
+					{
+						c      = (uint32_t)((cChar0 & 0x1F) << 6);
+						cChar1 = static_cast<uint8_t>(p[1]);
+						c     |= cChar1 & 0x3F;
+
+						//EASTL_ASSERT((cChar1 & 0xC0) == 0x80);          //  All subsequent code should be b10xxxxxx
+						//EASTL_ASSERT(c >= 0x0080 && c < 0x0800);        //  Check that we have the smallest coding
+						if(!((cChar1 & 0xC0) == 0x80) ||
+						   !(c >= 0x0080 && c < 0x0800))
+						{
+							success = false;
+							goto Failure;
+						}
+					}
+					else
+					{
+						success = false;
+						goto Failure;
+					}
+				}
+				else if((cChar0 & 0xF0) == 0xE0)
+				{
+					pNext = p + 3;
+
+					if(pNext <= pEnd)
+					{
+						c      = (uint32_t)((cChar0 & 0xF) << 12);
+						cChar1 = static_cast<uint8_t>(p[1]);
+						c     |= (cChar1 & 0x3F) << 6;
+						cChar2 = static_cast<uint8_t>(p[2]);
+						c     |= cChar2 & 0x3F;
+
+						//EASTL_ASSERT((cChar1 & 0xC0) == 0x80);            //  All subsequent code should be b10xxxxxx
+						//EASTL_ASSERT((cChar2 & 0xC0) == 0x80);            //  All subsequent code should be b10xxxxxx
+						//EASTL_ASSERT(c >= 0x00000800 && c <  0x00010000); //  Check that we have the smallest coding
+						if(!((cChar1 & 0xC0) == 0x80) ||
+						   !((cChar2 & 0xC0) == 0x80) ||
+						   !(c >= 0x00000800 && c <  0x00010000))
+						{
+							success = false;
+							goto Failure;
+						}
+					}
+					else
+					{
+						success = false;
+						goto Failure;
+					}
+				}
+				else if((cChar0 & 0xF8) == 0xF0)
+				{
+					pNext = p + 4;
+
+					if(pNext <= pEnd)
+					{
+						c      = (uint32_t)((cChar0 & 0x7) << 18);
+						cChar1 = static_cast<uint8_t>(p[1]);
+						c     |= (uint32_t)((cChar1 & 0x3F) << 12);
+						cChar2 = static_cast<uint8_t>(p[2]);
+						c     |= (cChar2 & 0x3F) << 6;
+						cChar3 = static_cast<uint8_t>(p[3]);
+						c     |= cChar3 & 0x3F;
+
+						//EASTL_ASSERT((cChar0 & 0xf8) == 0xf0);            //  We handle the unicode but not UCS-4
+						//EASTL_ASSERT((cChar1 & 0xC0) == 0x80);            //  All subsequent code should be b10xxxxxx
+						//EASTL_ASSERT((cChar2 & 0xC0) == 0x80);            //  All subsequent code should be b10xxxxxx
+						//EASTL_ASSERT((cChar3 & 0xC0) == 0x80);            //  All subsequent code should be b10xxxxxx
+						//EASTL_ASSERT(c >= 0x00010000 && c <= 0x0010FFFF); //  Check that we have the smallest coding, Unicode and not ucs-4
+						if(!((cChar0 & 0xf8) == 0xf0) ||
+						   !((cChar1 & 0xC0) == 0x80) ||
+						   !((cChar2 & 0xC0) == 0x80) ||
+						   !(c >= 0x00010000 && c <= 0x0010FFFF))
+						{
+							success = false;
+							goto Failure;
+						}
+					}
+					else
+					{
+						success = false;
+						goto Failure;
+					}
+				}
+				else if((cChar0 & 0xFC) == 0xF8)
+				{
+					pNext = p + 4;
+
+					if(pNext <= pEnd)
+					{
+						// To do. We don't currently support extended UCS4 characters.
+					}
+					else
+					{
+						success = false;
+						goto Failure;
+					}
+				}
+				else if((cChar0 & 0xFE) == 0xFC)
+				{
+					pNext = p + 5;
+
+					if(pNext <= pEnd)
+					{
+						// To do. We don't currently support extended UCS4 characters.
+					}
+					else
+					{
+						success = false;
+						goto Failure;
+					}
+				}
+				else
+				{
+					success = false;
+					goto Failure;
+				}
+			}
+		}
+		else
+			success = false;
+
+		Failure:
+		if(success)
+		{
+			p = pNext;
+			result = c;
+		}
+		else
+		{
+			p = p + 1;
+			result = 0xffff;
+		}
+
+		return success;
+	}
+
+	// Sets result to 0xffff in the case that the input UTF8 sequence is bad.
+	// The effect of converting UTF8 codepoints > 0xffff to UCS2 (char16_t) is to set all
+	// such codepoints to 0xffff. EASTL doesn't have a concept of setting or maintaining 
+	// error state for string conversions, though it does have a policy of converting 
+	// impossible values to something without generating invalid strings or throwing exceptions.
+	inline bool UTF8ToUCS2(const char*& p, const char* pEnd, uint16_t& result)
+	{
+		uint32_t u32;
+
+		if(UTF8ToUCS4(p, pEnd, u32))
+		{
+			if(u32 <= 0xffff)
+			{
+				result = (uint16_t)u32;
+				return true;
+			}
+		}
+
+		result = 0xffff;
+		return false;
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// DecodePart
+	///////////////////////////////////////////////////////////////////////////
+
+	EASTL_API bool DecodePart(const char*& pSrc, const char* pSrcEnd, char*& pDest, char* pDestEnd)
+	{
+		size_t sourceSize = (size_t)(pSrcEnd - pSrc);
+		size_t destSize   = (size_t)(pDestEnd - pDest);
+
+		if(sourceSize > destSize)
+		   sourceSize = destSize;
+
+		memmove(pDest, pSrc, sourceSize * sizeof(*pSrcEnd));
+
+		pSrc  += sourceSize;
+		pDest += sourceSize; // Intentionally add sourceSize here.
+
+		return true;
+	}
+
+	EASTL_API bool DecodePart(const char*& pSrc, const char* pSrcEnd, char16_t*& pDest, char16_t* pDestEnd)
+	{
+		bool success = true;
+
+		while(success && (pSrc < pSrcEnd) && (pDest < pDestEnd))
+			success = UTF8ToUCS2(pSrc, pSrcEnd, (uint16_t&)*pDest++);
+
+		return success;
+	}
+
+	EASTL_API bool DecodePart(const char*& pSrc, const char* pSrcEnd, char32_t*& pDest, char32_t* pDestEnd)
+	{
+		bool success = true;
+
+		while(success && (pSrc < pSrcEnd) && (pDest < pDestEnd))
+			success = UTF8ToUCS4(pSrc, pSrcEnd, (uint32_t&)*pDest++);
+
+		return success;
+	}
+
+
+	EASTL_API bool DecodePart(const char16_t*& pSrc, const char16_t* pSrcEnd, char*& pDest, char* pDestEnd)
+	{
+		bool success = true;
+
+		EASTL_ASSERT((pDest + 6) < pDestEnd); // The user must provide ample buffer space, preferably 256 chars or more.
+		pDestEnd -= 6; // Do this so that we can avoid dest buffer size checking in the loop below and the function it calls.
+
+		while(success && (pSrc < pSrcEnd) && (pDest < pDestEnd))
+			success = UCS2ToUTF8(*pSrc++, pDest);
+
+		return success;
+	}
+
+	EASTL_API bool DecodePart(const char16_t*& pSrc, const char16_t* pSrcEnd, char16_t*& pDest, char16_t* pDestEnd)
+	{
+		size_t sourceSize = (size_t)(pSrcEnd - pSrc);
+		size_t destSize   = (size_t)(pDestEnd - pDest);
+
+		if(sourceSize > destSize)
+		   sourceSize = destSize;
+
+		memmove(pDest, pSrc, sourceSize * sizeof(*pSrcEnd));
+
+		pSrc  += sourceSize;
+		pDest += sourceSize; // Intentionally add sourceSize here.
+
+		return true;
+	}
+
+	EASTL_API bool DecodePart(const char16_t*& pSrc, const char16_t* pSrcEnd, char32_t*& pDest, char32_t* pDestEnd)
+	{
+		size_t sourceSize = (size_t)(pSrcEnd - pSrc);
+		size_t destSize   = (size_t)(pDestEnd - pDest);
+
+		if(sourceSize > destSize)
+		   pSrcEnd = pSrc + destSize;
+
+		while(pSrc != pSrcEnd) // To consider: Improve this by unrolling this loop. Other tricks can improve its speed as well.
+			*pDest++ = (char32_t)*pSrc++;
+
+		return true;
+	}
+
+
+	EASTL_API bool DecodePart(const char32_t*& pSrc, const char32_t* pSrcEnd, char*& pDest, char* pDestEnd)
+	{
+		bool success = true;
+
+		EASTL_ASSERT((pDest + 6) < pDestEnd); // The user must provide ample buffer space, preferably 256 chars or more.
+		pDestEnd -= 6; // Do this so that we can avoid dest buffer size checking in the loop below and the function it calls.
+
+		while(success && (pSrc < pSrcEnd) && (pDest < pDestEnd))
+			success = UCS4ToUTF8(*pSrc++, pDest);
+
+		return success;
+	}
+
+	EASTL_API bool DecodePart(const char32_t*& pSrc, const char32_t* pSrcEnd, char16_t*& pDest, char16_t* pDestEnd)
+	{
+		size_t sourceSize = (size_t)(pSrcEnd - pSrc);
+		size_t destSize   = (size_t)(pDestEnd - pDest);
+
+		if(sourceSize > destSize)
+		   pSrcEnd = pSrc + destSize;
+
+		while(pSrc != pSrcEnd) // To consider: Improve this by unrolling this loop. Other tricks can improve its speed as well.
+			*pDest++ = (char16_t)*pSrc++;   // This is potentially losing data. We are not converting to UTF16; we are converting to UCS2.
+
+		return true;
+	}
+
+	EASTL_API bool DecodePart(const char32_t*& pSrc, const char32_t* pSrcEnd, char32_t*& pDest, char32_t* pDestEnd)
+	{
+		size_t sourceSize = (size_t)(pSrcEnd - pSrc);
+		size_t destSize   = (size_t)(pDestEnd - pDest);
+
+		if(sourceSize > destSize)
+		   sourceSize = destSize;
+
+		memmove(pDest, pSrc, sourceSize * sizeof(*pSrcEnd));
+
+		pSrc  += sourceSize;
+		pDest += sourceSize; // Intentionally add sourceSize here.
+
+		return true;
+	}
+
+	EASTL_API bool DecodePart(const int*& pSrc, const int* pSrcEnd, char*&  pDest, char* pDestEnd)
+	{
+		bool success = true;
+
+		EASTL_ASSERT((pDest + 6) < pDestEnd); // The user must provide ample buffer space, preferably 256 chars or more.
+		pDestEnd -= 6; // Do this so that we can avoid dest buffer size checking in the loop below and the function it calls.
+
+		while(success && (pSrc < pSrcEnd) && (pDest < pDestEnd))
+			success = UCS4ToUTF8((uint32_t)(unsigned)*pSrc++, pDest);
+
+		return success;
+	}
+
+	EASTL_API bool DecodePart(const int*& pSrc, const int* pSrcEnd, char16_t*& pDest, char16_t* pDestEnd)
+	{
+		size_t sourceSize = (size_t)(pSrcEnd - pSrc);
+		size_t destSize   = (size_t)(pDestEnd - pDest);
+
+		if(sourceSize > destSize)
+		   pSrcEnd = pSrc + destSize;
+
+		while(pSrc != pSrcEnd) // To consider: Improve this by unrolling this loop. Other tricks can improve its speed as well.
+			*pDest++ = (char16_t)*pSrc++;   // This is potentially losing data. We are not converting to UTF16; we are converting to UCS2.
+
+		return true;
+	}
+
+	EASTL_API bool DecodePart(const int*& pSrc, const int* pSrcEnd, char32_t*& pDest, char32_t* pDestEnd)
+	{
+		size_t sourceSize = (size_t)(pSrcEnd - pSrc);
+		size_t destSize   = (size_t)(pDestEnd - pDest);
+
+		if(sourceSize > destSize)
+		   pSrcEnd = pSrc + destSize;
+
+		while(pSrc != pSrcEnd) // To consider: Improve this by unrolling this loop. Other tricks can improve its speed as well.
+			*pDest++ = (char32_t)*pSrc++;   // This is potentially losing data. We are not converting to UTF16; we are converting to UCS2.
+
+		return true;
+	}
+
+
+
+} // namespace eastl
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/libkram/eastl/source/thread_support.cpp b/libkram/eastl/source/thread_support.cpp
new file mode 100644
index 00000000..1b036295
--- /dev/null
+++ b/libkram/eastl/source/thread_support.cpp
@@ -0,0 +1,121 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/internal/thread_support.h>
+#include <EASTL/type_traits.h>
+#include <EASTL/memory.h>
+
+#if defined(EA_PLATFORM_MICROSOFT)
+	EA_DISABLE_ALL_VC_WARNINGS();
+	#ifndef WIN32_LEAN_AND_MEAN
+		#define WIN32_LEAN_AND_MEAN
+	#endif
+	#include <Windows.h>
+	EA_RESTORE_ALL_VC_WARNINGS();
+#endif
+
+
+namespace eastl
+{
+	namespace Internal
+	{
+		#if EASTL_CPP11_MUTEX_ENABLED
+			// We use the C++11 Standard Library mutex as-is.
+		#else
+			/////////////////////////////////////////////////////////////////
+			// mutex
+			/////////////////////////////////////////////////////////////////
+
+			mutex::mutex()
+			{
+				#if defined(EA_PLATFORM_MICROSOFT)
+					static_assert(sizeof(mMutexBuffer) == sizeof(CRITICAL_SECTION), "mMutexBuffer size failure");
+					//static_assert(EA_ALIGN_OF(mMutexBuffer) >= EA_ALIGN_OF(CRITICAL_SECTION), "mMutexBuffer alignment failure"); // Enabling this causes the VS2012 compiler to crash.
+
+					#if !defined(_WIN32_WINNT) || (_WIN32_WINNT < 0x0403)
+						InitializeCriticalSection((CRITICAL_SECTION*)mMutexBuffer);
+					#elif !EA_WINAPI_FAMILY_PARTITION(EA_WINAPI_PARTITION_DESKTOP)
+						BOOL result = InitializeCriticalSectionEx((CRITICAL_SECTION*)mMutexBuffer, 10, 0);
+						EASTL_ASSERT(result != 0); EA_UNUSED(result);
+					#else
+						BOOL result = InitializeCriticalSectionAndSpinCount((CRITICAL_SECTION*)mMutexBuffer, 10);
+						EASTL_ASSERT(result != 0); EA_UNUSED(result);
+					#endif
+
+				#elif defined(EA_PLATFORM_POSIX)
+					pthread_mutexattr_t attr;
+
+					pthread_mutexattr_init(&attr);
+					pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_PRIVATE); 
+					pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
+					pthread_mutex_init(&mMutex, &attr);
+					pthread_mutexattr_destroy(&attr);
+				#endif
+			}
+
+			mutex::~mutex()
+			{
+				#if defined(EA_PLATFORM_MICROSOFT)
+					DeleteCriticalSection((CRITICAL_SECTION*)mMutexBuffer);
+				#elif defined(EA_PLATFORM_POSIX)
+					pthread_mutex_destroy(&mMutex);
+				#endif
+			}
+
+			void mutex::lock()
+			{
+				#if defined(EA_PLATFORM_MICROSOFT)
+					EnterCriticalSection((CRITICAL_SECTION*)mMutexBuffer);
+				#elif defined(EA_PLATFORM_POSIX)
+					pthread_mutex_lock(&mMutex);
+				#else
+					EASTL_FAIL_MSG("EASTL thread safety is not implemented yet. See EAThread for how to do this for the given platform.");
+				#endif
+			}
+
+			void mutex::unlock()
+			{
+				#if defined(EA_PLATFORM_MICROSOFT)
+					LeaveCriticalSection((CRITICAL_SECTION*)mMutexBuffer);
+				#elif defined(EA_PLATFORM_POSIX)
+					pthread_mutex_unlock(&mMutex);
+				#endif
+			}
+		#endif
+
+
+		/////////////////////////////////////////////////////////////////
+		// shared_ptr_auto_mutex
+		/////////////////////////////////////////////////////////////////
+
+		// We could solve this by having single global mutex for all shared_ptrs, a set of mutexes for shared_ptrs, 
+		// a single mutex for every shared_ptr, or have a template parameter that enables mutexes for just some shared_ptrs.
+		eastl::late_constructed<mutex, true> gSharedPtrMutex;
+
+		shared_ptr_auto_mutex::shared_ptr_auto_mutex(const void* /*pSharedPtr*/)
+			: auto_mutex(*gSharedPtrMutex.get())
+		{
+		}
+
+
+	} // namespace Internal
+
+} // namespace eastl
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

From 57e96df16857cc946dc96df9eb9ed381b454a654 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 6 Aug 2021 00:29:57 -0700
Subject: [PATCH 141/901] Kram - add eastl support

Currently disabled in KramConfig.h under USE_EASTL.   Set that to 1 to enable.  Isolated std namespace use, and added NAMESPACE_STL to switch between the implementations.   It's likely that all STL debugging breaks when using EASTL since Xcode defines all the templates internally, and no longer supplies these as python sources to lldbinit.
---
 kramv/KramLoader.h                      |   6 +-
 kramv/KramLoader.mm                     |   6 +-
 kramv/KramRenderer.h                    |   6 +-
 kramv/KramRenderer.mm                   |   3 +-
 kramv/KramViewerBase.cpp                |   2 +-
 kramv/KramViewerBase.h                  |   4 +-
 kramv/KramViewerMain.mm                 |  16 ++--
 libkram/CMakeLists.txt                  |   9 +-
 libkram/astc-encoder/astcenc_internal.h |   6 +-
 libkram/etc2comp/EtcImage.cpp           |   4 +-
 libkram/kram/KTXImage.cpp               |   2 +-
 libkram/kram/KTXImage.h                 |   6 +-
 libkram/kram/Kram.cpp                   |  24 +++--
 libkram/kram/Kram.h                     |   4 +-
 libkram/kram/KramConfig.h               | 115 ++++++++++++++----------
 libkram/kram/KramFileHelper.cpp         |   6 +-
 libkram/kram/KramFileHelper.h           |   4 +-
 libkram/kram/KramImage.cpp              |   6 +-
 libkram/kram/KramImage.h                |   6 +-
 libkram/kram/KramImageInfo.cpp          |   2 +-
 libkram/kram/KramImageInfo.h            |   6 +-
 libkram/kram/KramLog.cpp                |  17 ++--
 libkram/kram/KramLog.h                  |   4 +-
 libkram/kram/KramMipper.cpp             |   2 +-
 libkram/kram/KramMipper.h               |   4 +-
 libkram/kram/KramSDFMipper.cpp          |   2 +-
 libkram/kram/KramSDFMipper.h            |   4 +-
 libkram/kram/KramTimer.cpp              |  13 ++-
 libkram/kram/KramTimer.h                |   4 +-
 libkram/kram/KramZipHelper.cpp          |  13 +--
 libkram/kram/KramZipHelper.h            |   8 +-
 libkram/kram/TaskSystem.h               |  36 ++++----
 libkram/kram/float4a.h                  |  49 +++++++++-
 libkram/lodepng/lodepng.cpp             |  67 +++++++-------
 libkram/lodepng/lodepng.h               |  78 ++++++++--------
 libkram/squish/maths.cpp                |   2 +-
 plugin/kps/KPS.cpp                      |   7 +-
 37 files changed, 334 insertions(+), 219 deletions(-)

diff --git a/kramv/KramLoader.h b/kramv/KramLoader.h
index 65e4decb..93b3e05f 100644
--- a/kramv/KramLoader.h
+++ b/kramv/KramLoader.h
@@ -2,6 +2,8 @@
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
+#include "KramConfig.h"
+
 #if __has_feature(modules)
 @import Foundation;
 @import Metal;
@@ -51,10 +53,10 @@ class KTXImageData;
 //-------------------------------------
 
 // for toLower
-#include <string>
+//#include <string>
 
 namespace kram {
-    using namespace std;
+    using namespace NAMESPACE_STL;
 
     // provide access to lowercase strings
     string toLower(const string& text);
diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index f29d0e80..ad6a9c2b 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -12,8 +12,8 @@
 #import <simd/simd.h>
 #endif
 
-#include <vector>
-#include <algorithm> // for max
+//#include <vector>
+//#include <algorithm> // for max
 #include <mm_malloc.h>
 
 #include "Kram.h"
@@ -24,7 +24,7 @@
 #include "KTXImage.h"
 
 using namespace kram;
-using namespace std;
+using namespace NAMESPACE_STL;
 using namespace simd;
 
 string kram::toLower(const string& text) {
diff --git a/kramv/KramRenderer.h b/kramv/KramRenderer.h
index 7840b6d6..e7f83830 100644
--- a/kramv/KramRenderer.h
+++ b/kramv/KramRenderer.h
@@ -12,7 +12,11 @@
 #import <simd/simd.h>
 #endif
 
+#if USE_EASTL
+#include "EASTL/string.h"
+#else
 #include <string>
+#endif
 
 #import "KramShaders.h" // for TextureChannels
 
@@ -27,7 +31,7 @@ namespace kram {
 
 - (nonnull instancetype)initWithMetalKitView:(nonnull MTKView *)view settings:(nonnull kram::ShowSettings*)settings;
 
-- (BOOL)loadTextureFromImage:(const std::string&)fullFilename
+- (BOOL)loadTextureFromImage:(nonnull const char*)fullFilenameString
                    timestamp:(double)timestamp
                        image:(kram::KTXImage&)image
                  imageNormal:(nullable kram::KTXImage*)imageNormal
diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index bd80117e..e74b7812 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -713,7 +713,7 @@ - (void)_loadAssets
 
 
-- (BOOL)loadTextureFromImage:(const string&)fullFilename
+- (BOOL)loadTextureFromImage:(nonnull const char*)fullFilenameString
                    timestamp:(double)timestamp
                        image:(kram::KTXImage&)image
                  imageNormal:(kram::KTXImage*)imageNormal
@@ -721,6 +721,7 @@ - (BOOL)loadTextureFromImage:(const string&)fullFilename
 {
     // image can be decoded to rgba8u if platform can't display format natively
     // but still want to identify blockSize from original format
+    string fullFilename = fullFilenameString;
     
     // Note that modstamp can change, but content data hash may be the same
     bool isNewFile = (fullFilename != _showSettings->lastFilename);
diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index b4316476..c0a38b4e 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -3,7 +3,7 @@
 namespace kram
 {
 using namespace simd;
-using namespace std;
+using namespace NAMESPACE_STL;
 
 int32_t ShowSettings::totalChunks() const {
     int32_t one = 1;
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index fc17a450..1258dea4 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -5,14 +5,14 @@
 #include "KTXImage.h" // for MyMTLPixelFormat
 
 #include <cstdint>
-#include <string>
+//#include <string>
 #include <simd/simd.h>
 
 // All portable C++ code.
 
 namespace kram {
 
-using namespace std;
+using namespace NAMESPACE_STL;
 using namespace simd;
 
 enum TextureChannels
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 4827bedd..1b053d7b 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -38,6 +38,7 @@
 
 using namespace simd;
 using namespace kram;
+using namespace NAMESPACE_STL;
 
 //-------------
 
@@ -715,8 +716,8 @@ - (void)doZoomMath:(float)newZoom newPan:(float2&)newPan {
     }
     
     // that's in model space (+/0.5f, +/0.5f), so convert to texture space
-    pixel.x = clamp(pixel.x, -0.5f, maxX);
-    pixel.y = clamp(pixel.y, minY, 0.5f);
+    pixel.x = NAMESPACE_STL::clamp(pixel.x, -0.5f, maxX);
+    pixel.y = NAMESPACE_STL::clamp(pixel.y, minY, 0.5f);
     
     // now that's the point that we want to zoom towards
     // No checkson this zoom
@@ -2362,7 +2363,7 @@ - (BOOL)loadTextureFromFolder
     }
     
     Renderer* renderer = (Renderer*)self.delegate;
-    if (![renderer loadTextureFromImage:fullFilename timestamp:timestamp image:image imageNormal:hasNormal ? &imageNormal : nullptr isArchive:NO]) {
+    if (![renderer loadTextureFromImage:fullFilename.c_str() timestamp:timestamp image:image imageNormal:hasNormal ? &imageNormal : nullptr isArchive:NO]) {
         return NO;
     }
 
@@ -2501,7 +2502,7 @@ - (BOOL)loadTextureFromArchive
     }
     
     Renderer* renderer = (Renderer*)self.delegate;
-    if (![renderer loadTextureFromImage:fullFilename timestamp:(double)timestamp
+    if (![renderer loadTextureFromImage:fullFilename.c_str() timestamp:(double)timestamp
                              image:image imageNormal:hasNormal ? &imageNormal : nullptr isArchive:YES])
     {
         return NO;
@@ -2598,8 +2599,11 @@ - (BOOL)loadTextureFromURL:(NSURL*)url {
             [dc noteNewRecentDocumentURL:url];
         
             // sort them
-            sort(files.begin(), files.end());
-            
+#if USE_EASTL
+            NAMESPACE_STL::quick_sort(files.begin(), files.end());
+#else
+            NAMESPACE_STL::sort(files.begin(), files.end());
+#endif
             // replicate archive logic below
             
             self.imageURL = url;
diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index 7d5e3dbf..abbaa8cc 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -77,6 +77,10 @@ file(GLOB_RECURSE libSources CONFIGURE_DEPENDS
     "${SOURCE_DIR}/kram/*.cpp"
     "${SOURCE_DIR}/kram/*.h"
 
+    # other libraries
+    "${SOURCE_DIR}/eastl/*.cpp"
+    "${SOURCE_DIR}/eastl/*.h"
+    
     "${SOURCE_DIR}/lodepng/*.cpp"
     "${SOURCE_DIR}/lodepng/*.h"
 
@@ -86,12 +90,12 @@ file(GLOB_RECURSE libSources CONFIGURE_DEPENDS
     "${SOURCE_DIR}/tmpfileplus/*.cpp"
     "${SOURCE_DIR}/tmpfileplus/*.h"
     
-    # decoder unity file
+    # partial zstd decode-only unity file
     # cd zstd/build/single_file_libs
     # ./combine.sh -r ../../lib -o zstddeclib.c zstddeclib-in.c
     # "${SOURCE_DIR}/zstd/zstddeclib.cpp"
     
-    # full unity file
+    # full zstd encode/decode unity file
     # cd zstd/build/single_file_libs
     # ./combine.sh -r ../../lib -o zstd.c zstd-in.c
     "${SOURCE_DIR}/zstd/zstd.h"
@@ -141,6 +145,7 @@ target_include_directories(${myTargetLib} PRIVATE
     )
  
 target_include_directories(${myTargetLib} PUBLIC
+    "${SOURCE_DIR}/eastl/include/"
     "${SOURCE_DIR}/miniz/"
     )
     
diff --git a/libkram/astc-encoder/astcenc_internal.h b/libkram/astc-encoder/astcenc_internal.h
index 565bec00..f9e82b90 100644
--- a/libkram/astc-encoder/astcenc_internal.h
+++ b/libkram/astc-encoder/astcenc_internal.h
@@ -22,14 +22,14 @@
 #ifndef ASTCENC_INTERNAL_INCLUDED
 #define ASTCENC_INTERNAL_INCLUDED
 
-#include <algorithm>
-#include <atomic>
+//#include <algorithm>
+//#include <atomic>
 #include <cstddef>
 #include <cstdint>
 #include <cstdio>
 #include <cstdlib>
 #include <condition_variable>
-#include <functional>
+//#include <functional>
 #include <mutex>
 #include <type_traits>
 
diff --git a/libkram/etc2comp/EtcImage.cpp b/libkram/etc2comp/EtcImage.cpp
index 53b3eebd..88ca43b6 100644
--- a/libkram/etc2comp/EtcImage.cpp
+++ b/libkram/etc2comp/EtcImage.cpp
@@ -267,7 +267,7 @@ namespace Etc
         // alias the output etxture
         m_paucEncodingBits = outputTexture;
         
-        using namespace std;
+        using namespace NAMESPACE_STL;
         
         struct SortedBlock
         {
@@ -459,7 +459,7 @@ namespace Etc
             }
             
             // sorts largest errors to front
-            sort(sortedBlocks.begin(), sortedBlocks.end(), std::greater<SortedBlock>());
+            NAMESPACE_STL::sort(sortedBlocks.begin(), sortedBlocks.end(), std::greater<SortedBlock>());
             
             // lop off the end of the array where blocks are 0 error or don
             int counter = 0;
diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index 1dd717f9..3fe74656 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -29,7 +29,7 @@ const char* kPropChannels = "KramChannels";
 const char* kPropAddress = "KramAddress";
 const char* kPropFilter = "KramFilter";
 
-using namespace std;
+using namespace NAMESPACE_STL;
 
 // These start each KTX file to indicate the type
 const uint8_t kKTXIdentifier[kKTXIdentifierSize] = {
diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h
index e4a15441..e1f4e7a5 100644
--- a/libkram/kram/KTXImage.h
+++ b/libkram/kram/KTXImage.h
@@ -4,14 +4,14 @@
 
 #pragma once
 
-#include <string>
-#include <vector>
+//#include <string>
+//#include <vector>
 
 #include "KramConfig.h"
 
 namespace kram {
 
-using namespace std;
+using namespace NAMESPACE_STL;
 
 // TODO: abstract MyMTLPixelFormat and move to readable/neutral type
 enum MyMTLPixelFormat {
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 232bfd67..b2356732 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -6,13 +6,13 @@
 
 #include <sys/stat.h>
 
-#include <algorithm>  // for max
-#include <atomic>
+//#include <atomic>
 #include <cmath>
 #include <ctime>
 #include <inttypes.h>
-#include <string>
-#include <vector>
+//#include <algorithm>  // for max
+//#include <string>
+//#include <vector>
 
 #include "KTXImage.h"
 #include "KramFileHelper.h"
@@ -23,6 +23,20 @@
 #include "TaskSystem.h"
 #include "lodepng/lodepng.h"
 
+// one .cpp must supply these new overrides
+#if USE_EASTL
+void* __cdecl operator new[](size_t size, const char* name, int flags, unsigned debugFlags, const char* file, int line)
+{
+    return new uint8_t[size];
+}
+
+void* operator new[](size_t size, size_t alignment, size_t alignmentOffset, const char* pName, int flags, unsigned debugFlags, const char* file, int line)
+{
+    return new uint8_t[size]; // TODO: honor alignment
+}
+
+#endif
+
 //--------------------------------------
 
 // name change on Win
@@ -32,7 +46,7 @@
 
 namespace kram {
 
-using namespace std;
+using namespace NAMESPACE_STL;
 
 template<typename T>
 void releaseVector(vector<T>& v) {
diff --git a/libkram/kram/Kram.h b/libkram/kram/Kram.h
index c8deab11..3e0cdb9c 100644
--- a/libkram/kram/Kram.h
+++ b/libkram/kram/Kram.h
@@ -4,11 +4,11 @@
 
 #pragma once
 
-#include <string>
+//#include <string>
 #include "KramMmapHelper.h"
 
 namespace kram {
-using namespace std;
+using namespace NAMESPACE_STL;
 
 class Image;
 class KTXImage;
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index 1a485c73..b60a18a0 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -107,9 +107,10 @@
 #define USE_NEON 0
 #endif
 
-// not using simd/simd.h on Win or Linux, but clang would support
+// clang can compile simd/simd.h code on other platforms
+// this provides vector extensions from gcc that were setup for OpenCL shaders
 #ifndef USE_SIMDLIB
-#if KRAM_MAC || KRAM_IOS
+#if defined(__clang__)
 #define USE_SIMDLIB 1
 #else
 #define USE_SIMDLIB 0
@@ -151,6 +152,40 @@
 #define SUPPORT_RGB 1
 #endif
 
+#define USE_EASTL 0
+#if USE_EASTL
+
+// this probably breaks all STL debugging
+#include "EASTL/algorithm.h"  // for max
+#include "EASTL/atomic.h" 
+#include "EASTL/deque.h"
+#include "EASTL/functional.h"
+#include "EASTL/iterator.h" // for copy_if on Win
+#include "EASTL/shared_ptr.h" // includes thread/mutex
+#include "EASTL/sort.h"
+#include "EASTL/string.h"
+#include "EASTL/unique_ptr.h"
+#include "EASTL/unordered_map.h"
+#include "EASTL/vector.h"
+
+#define NAMESPACE_STL eastl
+
+#else
+
+#include <algorithm>  // for max
+#include <atomic>
+#include <deque>
+#include <functional>
+#include <iterator> // for copy_if on Win
+#include <memory> // for shared_ptr
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#define NAMESPACE_STL std
+
+#endif
+
 // includes that are usable across all files
 #include "KramLog.h"
 
@@ -163,9 +198,12 @@
 #endif
 
 // TODO: move half4 to it's own file, but always include it
-// Apple's SIMD doesn't have a half4 tpe.
+// Apple's files don't have a half4 type.
 namespace simd {
 
+// This has spotty support on Android.  They left out hw support
+// for _Float16 on many of the devices.  So there would need this fallback.
+
 #if USE_FLOAT16
 using half = _Float16;
 #else
@@ -224,40 +262,7 @@ class half4 {
 namespace simd
 {
 
-#if !USE_SIMDLIB
-
-
-// don't have float2/float3 type yet
-//// use instead of simd_make_float
-//inline float2 float2m(float x)
-//{
-//    return float2(x);
-//}
-//
-//inline float3 float3m(float x)
-//{
-//    return float3(x);
-//}
-//inline float3 float3m(float x, float y, float z)
-//{
-//    return float3(x, y, z);
-//}
-
-inline float4 float4m(float x)
-{
-    return float4(x);
-}
-
-inline float4 float4m(float x, float y, float z, float w)
-{
-    return float4(x, y, z, w);
-}
-//inline float4 float4m(const float3& v float w)
-//{
-//    return float4(v, w);
-//}
-
-#else
+#if USE_SIMDLIB
 
 // functional ctor
 inline float4 float4m(float3 v, float w)
@@ -280,28 +285,35 @@ inline float4 float4m(float x, float y, float z, float w)
 
 inline float2 float2m(float x)
 {
-    return float2m(x,x);
+    return x;
 }
 
 inline float3 float3m(float x)
 {
-    return float3m(x,x,x);
+    return x;
 }
 
 inline float4 float4m(float x)
 {
-    return float4m(x,x,x,x);
+    return x;
 }
 
-#endif
-
+inline float2 saturate(const float2& v)
+{
+    return simd_clamp( v, 0.0f, 1.0f );
+}
+inline float3 saturate(const float3& v)
+{
+    return simd_clamp( v, 0.0f, 1.0f );
+}
 inline float4 saturate(const float4& v)
 {
-    const float4 kZero = float4m(0.0f, 0.0f, 0.0f, 0.0f);
-    const float4 kOne = float4m(1.0f, 1.0f, 1.0f, 1.0f);
-    return min(max(v, kZero), kOne);
+    return simd_clamp( v, 0.0f, 1.0f );
 }
 
+#endif
+
+
 #if USE_FLOAT16
 
 inline float4 toFloat4(const half4& vv)
@@ -378,14 +390,21 @@ inline half4 toHalf4(const float4& vv)
 
 //---------------------------------------
 
-// Use this on vectors
-#include <vector>
 
+
+
+namespace kram
+{
+using namespace NAMESPACE_STL;
+
+// Use this on vectors
 template<typename T>
-inline size_t vsizeof(const std::vector<T>& v)
+inline size_t vsizeof(const vector<T>& v)
 {
     return sizeof(T) * v.size();
 }
+}
+
 
 
diff --git a/libkram/kram/KramFileHelper.cpp b/libkram/kram/KramFileHelper.cpp
index 32c7f712..13528b54 100644
--- a/libkram/kram/KramFileHelper.cpp
+++ b/libkram/kram/KramFileHelper.cpp
@@ -9,8 +9,8 @@
 #include <sys/stat.h>
 
 // Use this for consistent tmp file handling
-#include <algorithm> // for min
-#include <vector>
+//#include <algorithm> // for min
+//#include <vector>
 
 #include "tmpfileplus/tmpfileplus.h"
 
@@ -22,7 +22,7 @@
 #endif
 
 namespace kram {
-using namespace std;
+using namespace NAMESPACE_STL;
 
 FileHelper::~FileHelper() { close(); }
 
diff --git a/libkram/kram/KramFileHelper.h b/libkram/kram/KramFileHelper.h
index 304f3239..b8f9d74a 100644
--- a/libkram/kram/KramFileHelper.h
+++ b/libkram/kram/KramFileHelper.h
@@ -7,12 +7,12 @@
 #include <stddef.h>
 #include <stdint.h>
 
-#include <string>
+//#include <string>
 
 #include "KramConfig.h"
 
 namespace kram {
-using namespace std;
+using namespace NAMESPACE_STL;
 
 // Use this to help open/close files, since dtor is scoped, or caller can close()
 // Also allows write to temp file, then rename over the destination file.  This
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index aaf57685..a9cd17e2 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -35,8 +35,8 @@
 
 #include <cassert>
 #include <cstdio>
-#include <string>
-#include <algorithm>
+//#include <string>
+//#include <algorithm>
 
 #include "KTXImage.h"
 #include "KramMipper.h"
@@ -56,7 +56,7 @@
 
 namespace kram {
 
-using namespace std;
+using namespace NAMESPACE_STL;
 using namespace simd;
 using namespace heman;
 
diff --git a/libkram/kram/KramImage.h b/libkram/kram/KramImage.h
index 58bfaa4e..46b5ff10 100644
--- a/libkram/kram/KramImage.h
+++ b/libkram/kram/KramImage.h
@@ -4,8 +4,8 @@
 
 #pragma once
 
-#include <string>
-#include <vector>
+//#include <string>
+//#include <vector>
 
 #include "KTXImage.h"  // for MyMTLTextureType
 #include "KramMipper.h"
@@ -14,7 +14,7 @@
 
 namespace kram {
 
-using namespace std;
+using namespace NAMESPACE_STL;
 using namespace simd;
 
 class Mipper;
diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp
index ac5c5801..dbe40a2a 100644
--- a/libkram/kram/KramImageInfo.cpp
+++ b/libkram/kram/KramImageInfo.cpp
@@ -12,7 +12,7 @@
 #endif
 
 namespace kram {
-using namespace std;
+using namespace NAMESPACE_STL;
 using namespace simd;
 
 #define isStringEqual(lhs, rhs) (strcmp(lhs, rhs) == 0)
diff --git a/libkram/kram/KramImageInfo.h b/libkram/kram/KramImageInfo.h
index 1e34ae68..2b7dc2da 100644
--- a/libkram/kram/KramImageInfo.h
+++ b/libkram/kram/KramImageInfo.h
@@ -4,8 +4,8 @@
 
 #pragma once
 
-#include <string>
-#include <vector>
+//#include <string>
+//#include <vector>
 
 #include "KTXImage.h"
 #include "KramMipper.h"  // for Color
@@ -15,7 +15,7 @@ namespace kram {
 class Image;
 
 using namespace simd;
-using namespace std;
+using namespace NAMESPACE_STL;
 
 // each encoder has it's own set of outputs, can request encoder if overlap
 enum TexEncoder {
diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp
index 1d0c2d4b..b60c2821 100644
--- a/libkram/kram/KramLog.cpp
+++ b/libkram/kram/KramLog.cpp
@@ -4,23 +4,28 @@
 
 #include "KramLog.h"
 
-#include <string>
+//#include <string>
 
 // for Win
 #include <stdarg.h>
 
 #include <mutex>
 
+
 namespace kram {
-using namespace std;
 
-static mutex gLogLock;
+using mymutex = std::mutex;
+using lock_t = std::unique_lock<mymutex>;
+
+using namespace NAMESPACE_STL;
+
+static mymutex gLogLock;
 static string gErrorLogCaptureText;
 static bool gIsErrorLogCapture = false;
 void setErrorLogCapture(bool enable) {
     gIsErrorLogCapture = enable;
     if (enable) {
-        unique_lock<mutex> lock(gLogLock);
+        lock_t lock(gLogLock);
         gErrorLogCaptureText.clear();
     }
 }
@@ -30,7 +35,7 @@ bool isErrorLogCapture() { return gIsErrorLogCapture; }
 // return the text
 void getErrorLogCaptureText(string& text) {
     if (gIsErrorLogCapture) {
-        unique_lock<mutex> lock(gLogLock);
+        lock_t lock(gLogLock);
         text = gErrorLogCaptureText;
     }
     else {
@@ -227,7 +232,7 @@ extern int32_t logMessage(const char* group, int32_t logLevel,
     }
 
     // stdout isn't thread safe, so to prevent mixed output put this under mutex
-    unique_lock<mutex> lock(gLogLock);
+    lock_t lock(gLogLock);
 
     // this means caller needs to know all errors to display in the hud
     if (gIsErrorLogCapture && logLevel == LogLevelError) {
diff --git a/libkram/kram/KramLog.h b/libkram/kram/KramLog.h
index 0bb5fa6c..cbb28020 100644
--- a/libkram/kram/KramLog.h
+++ b/libkram/kram/KramLog.h
@@ -5,7 +5,7 @@
 #pragma once
 
 #include <cassert>
-#include <string>
+//#include <string>
 
 #include "KramConfig.h"
 
@@ -46,7 +46,7 @@ extern int32_t logMessage(const char* group, int32_t logLevel,
 #define KLOGE(group, fmt, ...) logMessage(group, kram::LogLevelError, __FILE__, __LINE__, __FUNCTION__, fmt, ##__VA_ARGS__)
 
 // TODO: move to Strings.h
-using namespace std;
+using namespace NAMESPACE_STL;
 
 // when set true, the internal string is cleared
 void setErrorLogCapture(bool enable);
diff --git a/libkram/kram/KramMipper.cpp b/libkram/kram/KramMipper.cpp
index 79e85413..6973883d 100644
--- a/libkram/kram/KramMipper.cpp
+++ b/libkram/kram/KramMipper.cpp
@@ -11,7 +11,7 @@
 
 namespace kram {
 
-using namespace std;
+using namespace NAMESPACE_STL;
 using namespace simd;
 
 Mipper::Mipper() { initTables(); }
diff --git a/libkram/kram/KramMipper.h b/libkram/kram/KramMipper.h
index 36329eb6..b8610f15 100644
--- a/libkram/kram/KramMipper.h
+++ b/libkram/kram/KramMipper.h
@@ -5,12 +5,12 @@
 #pragma once
 
 #include <cstdint>
-#include <vector>
+//#include <vector>
 
 #include "KramConfig.h"
 
 namespace kram {
-using namespace std;
+using namespace NAMESPACE_STL;
 using namespace simd;
 
 // return whether num is pow2
diff --git a/libkram/kram/KramSDFMipper.cpp b/libkram/kram/KramSDFMipper.cpp
index aec353bd..5d1c8e5f 100644
--- a/libkram/kram/KramSDFMipper.cpp
+++ b/libkram/kram/KramSDFMipper.cpp
@@ -11,7 +11,7 @@
 
 namespace kram {
 using namespace heman;
-using namespace std;
+using namespace NAMESPACE_STL;
 
 void SDFMipper::init(ImageData& srcImage, bool isVerbose_)
 {
diff --git a/libkram/kram/KramSDFMipper.h b/libkram/kram/KramSDFMipper.h
index ebbe4904..f444f898 100644
--- a/libkram/kram/KramSDFMipper.h
+++ b/libkram/kram/KramSDFMipper.h
@@ -4,13 +4,13 @@
 
 #pragma once
 
-#include <vector>
+//#include <vector>
 
 #include "KramConfig.h"
 #include "heman/hedistance.h"
 
 namespace kram {
-using namespace std;
+using namespace NAMESPACE_STL;
 
 class ImageData;
 
diff --git a/libkram/kram/KramTimer.cpp b/libkram/kram/KramTimer.cpp
index 740d998e..818e734f 100644
--- a/libkram/kram/KramTimer.cpp
+++ b/libkram/kram/KramTimer.cpp
@@ -4,12 +4,21 @@
 
 #include "KramTimer.h"
 
+#if USE_EASTL
+#include "EASTL/chrono.h"
+#else
 #include <chrono>
+#endif
 
 namespace kram {
 
-using namespace std;
-using namespace chrono;
+using namespace NAMESPACE_STL;
+
+#if USE_EASTL
+using namespace eastl::chrono;
+#else
+using namespace std::chrono;
+#endif
 
 double currentTimestamp()
 {
diff --git a/libkram/kram/KramTimer.h b/libkram/kram/KramTimer.h
index 21ce9dad..c28a3196 100644
--- a/libkram/kram/KramTimer.h
+++ b/libkram/kram/KramTimer.h
@@ -4,10 +4,10 @@
 
 #pragma once
 
-#include <cassert>
-
 #include "KramConfig.h"
 
+#include <cassert>
+
 namespace kram {
 // Can obtain a timestamp to nanosecond accuracy.
 extern double currentTimestamp();
diff --git a/libkram/kram/KramZipHelper.cpp b/libkram/kram/KramZipHelper.cpp
index 7f100aea..afa46702 100644
--- a/libkram/kram/KramZipHelper.cpp
+++ b/libkram/kram/KramZipHelper.cpp
@@ -9,13 +9,14 @@
 //#include <stdio.h>
 //#include <unistd.h>
 
-#include <algorithm> // for copy_if on Win
-#include <iterator> // for copy_if on Win
-#include <vector>
+//#include <algorithm> // for copy_if on Win
+//#include <iterator> // for copy_if on Win
+//#include <vector>
 
 #include "miniz.h"
 
 namespace kram {
+using namespace NAMESPACE_STL;
 
 ZipHelper::ZipHelper() {
     
@@ -34,7 +35,7 @@ bool ZipHelper::openForRead(const uint8_t* zipData_, uint64_t zipDataSize) { //
     
     zipData = zipData_;
     
-    zip = std::make_unique<mz_zip_archive>();
+    zip = make_unique<mz_zip_archive>();
     mz_zip_zero_struct(zip.get());
     
     mz_uint flags = 0;
@@ -160,7 +161,7 @@ const ZipEntry* ZipHelper::zipEntry(const char* name) const {
     return nullptr;
 }
 
-bool ZipHelper::extract(const char *filename, std::vector<uint8_t>& buffer) const {
+bool ZipHelper::extract(const char *filename, vector<uint8_t>& buffer) const {
     auto entry = zipEntry(filename);
     if (!entry) {
         return false;
@@ -174,7 +175,7 @@ bool ZipHelper::extract(const char *filename, std::vector<uint8_t>& buffer) cons
     return true;
 }
 
-bool ZipHelper::extractPartial(const char *filename, std::vector<uint8_t>& buffer) const {
+bool ZipHelper::extractPartial(const char *filename, vector<uint8_t>& buffer) const {
     if (buffer.size() == 0) {
         assert(false);
         return false;
diff --git a/libkram/kram/KramZipHelper.h b/libkram/kram/KramZipHelper.h
index cf5208aa..e7e98957 100644
--- a/libkram/kram/KramZipHelper.h
+++ b/libkram/kram/KramZipHelper.h
@@ -1,9 +1,9 @@
 #pragma once
 
-#include <memory>
+//#include <memory>
 #include <stdint.h>
-#include <vector>
-#include <unordered_map>
+//#include <vector>
+//#include <unordered_map>
 
 //#include "Images/HashHelper.h"
 
@@ -14,7 +14,7 @@ struct mz_zip_archive;
 namespace kram {
 
 //struct MmapHelper;
-using namespace std;
+using namespace NAMESPACE_STL;
 
 struct ZipEntry {
     const char *filename; // max 512, aliased
diff --git a/libkram/kram/TaskSystem.h b/libkram/kram/TaskSystem.h
index 214d8f73..c9d68a7a 100644
--- a/libkram/kram/TaskSystem.h
+++ b/libkram/kram/TaskSystem.h
@@ -7,35 +7,35 @@
 
 /**************************************************************************************************/
 
-#include <algorithm>
-#include <atomic>
-#include <deque>
-#include <functional>
-#include <memory>
+//#include <algorithm>
+//#include <atomic>
+//#include <deque>
+//#include <functional>
+//#include <memory>
 #include <mutex>
 #include <thread>
-#include <vector>
+//#include <vector>
 
 /**************************************************************************************************/
 
 namespace kram {
-
-using namespace std;
+using namespace NAMESPACE_STL;
 
 /**************************************************************************************************/
 
-using lock_t = unique_lock<mutex>;
+using mymutex = std::mutex; // for mutex
+using lock_t = std::unique_lock<mymutex>;
 
 class notification_queue {
     deque<function<void()>> _q;
     bool _done = false;
-    mutex _mutex;
-    condition_variable _ready;
+    mymutex _mutex;
+    std::condition_variable _ready;
 
 public:
     bool try_pop(function<void()>& x)
     {
-        lock_t lock{_mutex, try_to_lock};
+        lock_t lock{_mutex, std::try_to_lock};
         if (!lock || _q.empty()) {
             return false;
         }
@@ -66,7 +66,7 @@ class notification_queue {
     bool try_push(F&& f)
     {
         {
-            lock_t lock{_mutex, try_to_lock};
+            lock_t lock{_mutex, std::try_to_lock};
             if (!lock) {
                 return false;
             }
@@ -79,11 +79,13 @@ class notification_queue {
     template <typename F>
     void push(F&& f)
     {
+// TODO: fix this construct, it's saying no matching sctor for eastl::deque<eastl::function<void ()>>>
+#if !USE_EASTL
         {
             lock_t lock{_mutex};
             _q.emplace_back(forward<F>(f));
         }
-
+#endif
         // allow a waiting pop() to awaken
         _ready.notify_one();
     }
@@ -91,7 +93,7 @@ class notification_queue {
     // has queue been marked done or not
     bool is_done() const
     {
-        lock_t lock{const_cast<mutex&>(_mutex)};  // ugh
+        lock_t lock{const_cast<mymutex&>(_mutex)};  // ugh
         bool done_ = _done;
         return done_;
     }
@@ -117,7 +119,7 @@ class task_system {
     NOT_COPYABLE(task_system);
 
     const int32_t _count;
-    vector<thread> _threads;
+    vector<std::thread> _threads;
 
     // currently one queue to each thread, but can steal from other queues
     vector<notification_queue> _q;
@@ -171,7 +173,7 @@ class task_system {
     }
 
 public:
-    task_system(int32_t count = 1) : _count(std::min(count, (int32_t)thread::hardware_concurrency())), _q{(size_t)_count}, _index(0)
+    task_system(int32_t count = 1) : _count(std::min(count, (int32_t)std::thread::hardware_concurrency())), _q{(size_t)_count}, _index(0)
     {
         // start up the threads
         for (int32_t threadIndex = 0; threadIndex != _count; ++threadIndex) {
diff --git a/libkram/kram/float4a.h b/libkram/kram/float4a.h
index 80ba20a4..f70dc955 100644
--- a/libkram/kram/float4a.h
+++ b/libkram/kram/float4a.h
@@ -7,7 +7,7 @@
 #include "KramConfig.h"
 
 // This is only meant to emulate float4 when lib not available
-// (f.e. win or linux) but may move off simd lib to this.  So
+// (f.e. win or linux w/o clang) but may move off simd lib to this.  So
 // many open source projets skip SIMD, or only do SSE.  This is
 // how to support ARM and Neon from one codebase.  This uses
 // SSE2Neon.h to translate _mm calls to Neon calls
@@ -401,6 +401,51 @@ inline float4 normalize(const float4& vv)
     return float4(vv) /= length(vv);
 }
 
-};  // namespace simd
+inline float4 float4m(float x)
+{
+    return float4(x);
+}
+
+inline float4 float4m(float x, float y, float z, float w)
+{
+    return float4(x, y, z, w);
+}
+
+// need a float3 for this
+//inline float4 float4m(const float3& v float w)
+//{
+//    return float4(v, w);
+//}
+
+inline float4 saturate(const float4& v)
+{
+    return min(max(v, float4m(0.0f)), float4m(1.0f));
+}
+
+
+
+// don't have float2/float3 type yet
+//// use instead of simd_make_float
+//inline float2 float2m(float x)
+//{
+//    return float2(x);
+//}
+
+
+//inline float3 float3m(float x)
+//{
+//    return float3(x);
+//}
+//inline float3 float3m(float x, float y, float z)
+//{
+//    return float3(x, y, z);
+//}
+//inline float3 saturate(const float3& v)
+//{
+//    return min(max(v, float3m(0.0f)), float3m(1.0f));
+//}
+
+
+}  // namespace simd
 
 #endif
diff --git a/libkram/lodepng/lodepng.cpp b/libkram/lodepng/lodepng.cpp
index 58c61022..285699ed 100644
--- a/libkram/lodepng/lodepng.cpp
+++ b/libkram/lodepng/lodepng.cpp
@@ -212,7 +212,7 @@ Example: if(!uivector_resize(&lz77_encoded, datasize)) ERROR_BREAK(83);
 /*
 About uivector, ucvector and string:
 -All of them wrap dynamic arrays or text strings in a similar way.
--LodePNG was originally written in C++. The vectors replace the std::vectors that were used in the C++ version.
+-LodePNG was originally written in C++. The vectors replace the vectors that were used in the C++ version.
 -The string tools are made to avoid problems with compilers that declare things like strncat as deprecated.
 -They're not used in the interface, only internally in this file as static functions.
 -As with many other structs in this file, the init and cleanup functions serve as ctor and dtor.
@@ -3679,14 +3679,14 @@ void lodepng_color_stats_init(LodePNGColorStats* stats) {
 
 /*function used for debug purposes with C++*/
 /*void printColorStats(LodePNGColorStats* p) {
-  std::cout << "colored: " << (int)p->colored << ", ";
-  std::cout << "key: " << (int)p->key << ", ";
-  std::cout << "key_r: " << (int)p->key_r << ", ";
-  std::cout << "key_g: " << (int)p->key_g << ", ";
-  std::cout << "key_b: " << (int)p->key_b << ", ";
-  std::cout << "alpha: " << (int)p->alpha << ", ";
-  std::cout << "numcolors: " << (int)p->numcolors << ", ";
-  std::cout << "bits: " << (int)p->bits << std::endl;
+  cout << "colored: " << (int)p->colored << ", ";
+  cout << "key: " << (int)p->key << ", ";
+  cout << "key_r: " << (int)p->key_r << ", ";
+  cout << "key_g: " << (int)p->key_g << ", ";
+  cout << "key_b: " << (int)p->key_b << ", ";
+  cout << "alpha: " << (int)p->alpha << ", ";
+  cout << "numcolors: " << (int)p->numcolors << ", ";
+  cout << "bits: " << (int)p->bits << endl;
 }*/
 
 /*Returns how many bits needed to represent given value (max 8 bit)*/
@@ -6280,9 +6280,10 @@ const char* lodepng_error_text(unsigned code) {
 
 #ifdef LODEPNG_COMPILE_CPP
 namespace lodepng {
+using namespace NAMESPACE_STL;
 
 #ifdef LODEPNG_COMPILE_DISK
-unsigned load_file(std::vector<unsigned char>& buffer, const std::string& filename) {
+unsigned load_file(vector<unsigned char>& buffer, const string& filename) {
   long size = lodepng_filesize(filename.c_str());
   if(size < 0) return 78;
   buffer.resize((size_t)size);
@@ -6290,14 +6291,14 @@ unsigned load_file(std::vector<unsigned char>& buffer, const std::string& filena
 }
 
 /*write given buffer to the file, overwriting the file, it doesn't append to it.*/
-unsigned save_file(const std::vector<unsigned char>& buffer, const std::string& filename) {
+unsigned save_file(const vector<unsigned char>& buffer, const string& filename) {
   return lodepng_save_file(buffer.empty() ? 0 : &buffer[0], buffer.size(), filename.c_str());
 }
 #endif /* LODEPNG_COMPILE_DISK */
 
 #ifdef LODEPNG_COMPILE_ZLIB
 #ifdef LODEPNG_COMPILE_DECODER
-unsigned decompress(std::vector<unsigned char>& out, const unsigned char* in, size_t insize,
+unsigned decompress(vector<unsigned char>& out, const unsigned char* in, size_t insize,
                     const LodePNGDecompressSettings& settings) {
   unsigned char* buffer = 0;
   size_t buffersize = 0;
@@ -6309,14 +6310,14 @@ unsigned decompress(std::vector<unsigned char>& out, const unsigned char* in, si
   return error;
 }
 
-unsigned decompress(std::vector<unsigned char>& out, const std::vector<unsigned char>& in,
+unsigned decompress(vector<unsigned char>& out, const vector<unsigned char>& in,
                     const LodePNGDecompressSettings& settings) {
   return decompress(out, in.empty() ? 0 : &in[0], in.size(), settings);
 }
 #endif /* LODEPNG_COMPILE_DECODER */
 
 #ifdef LODEPNG_COMPILE_ENCODER
-unsigned compress(std::vector<unsigned char>& out, const unsigned char* in, size_t insize,
+unsigned compress(vector<unsigned char>& out, const unsigned char* in, size_t insize,
                   const LodePNGCompressSettings& settings) {
   unsigned char* buffer = 0;
   size_t buffersize = 0;
@@ -6328,7 +6329,7 @@ unsigned compress(std::vector<unsigned char>& out, const unsigned char* in, size
   return error;
 }
 
-unsigned compress(std::vector<unsigned char>& out, const std::vector<unsigned char>& in,
+unsigned compress(vector<unsigned char>& out, const vector<unsigned char>& in,
                   const LodePNGCompressSettings& settings) {
   return compress(out, in.empty() ? 0 : &in[0], in.size(), settings);
 }
@@ -6358,7 +6359,7 @@ State& State::operator=(const State& other) {
 
 #ifdef LODEPNG_COMPILE_DECODER
 
-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h, const unsigned char* in,
+unsigned decode(vector<unsigned char>& out, unsigned& w, unsigned& h, const unsigned char* in,
                 size_t insize, LodePNGColorType colortype, unsigned bitdepth) {
   unsigned char* buffer = 0;
   unsigned error = lodepng_decode_memory(&buffer, &w, &h, in, insize, colortype, bitdepth);
@@ -6373,12 +6374,12 @@ unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h, const
   return error;
 }
 
-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
-                const std::vector<unsigned char>& in, LodePNGColorType colortype, unsigned bitdepth) {
+unsigned decode(vector<unsigned char>& out, unsigned& w, unsigned& h,
+                const vector<unsigned char>& in, LodePNGColorType colortype, unsigned bitdepth) {
   return decode(out, w, h, in.empty() ? 0 : &in[0], (unsigned)in.size(), colortype, bitdepth);
 }
 
-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+unsigned decode(vector<unsigned char>& out, unsigned& w, unsigned& h,
                 State& state,
                 const unsigned char* in, size_t insize) {
   unsigned char* buffer = NULL;
@@ -6391,16 +6392,16 @@ unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
   return error;
 }
 
-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+unsigned decode(vector<unsigned char>& out, unsigned& w, unsigned& h,
                 State& state,
-                const std::vector<unsigned char>& in) {
+                const vector<unsigned char>& in) {
   return decode(out, w, h, state, in.empty() ? 0 : &in[0], in.size());
 }
 
 #ifdef LODEPNG_COMPILE_DISK
-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h, const std::string& filename,
+unsigned decode(vector<unsigned char>& out, unsigned& w, unsigned& h, const string& filename,
                 LodePNGColorType colortype, unsigned bitdepth) {
-  std::vector<unsigned char> buffer;
+  vector<unsigned char> buffer;
   /* safe output values in case error happens */
   w = h = 0;
   unsigned error = load_file(buffer, filename);
@@ -6411,7 +6412,7 @@ unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h, const
 #endif /* LODEPNG_COMPILE_DISK */
 
 #ifdef LODEPNG_COMPILE_ENCODER
-unsigned encode(std::vector<unsigned char>& out, const unsigned char* in, unsigned w, unsigned h,
+unsigned encode(vector<unsigned char>& out, const unsigned char* in, unsigned w, unsigned h,
                 LodePNGColorType colortype, unsigned bitdepth) {
   unsigned char* buffer;
   size_t buffersize;
@@ -6423,14 +6424,14 @@ unsigned encode(std::vector<unsigned char>& out, const unsigned char* in, unsign
   return error;
 }
 
-unsigned encode(std::vector<unsigned char>& out,
-                const std::vector<unsigned char>& in, unsigned w, unsigned h,
+unsigned encode(vector<unsigned char>& out,
+                const vector<unsigned char>& in, unsigned w, unsigned h,
                 LodePNGColorType colortype, unsigned bitdepth) {
   if(lodepng_get_raw_size_lct(w, h, colortype, bitdepth) > in.size()) return 84;
   return encode(out, in.empty() ? 0 : &in[0], w, h, colortype, bitdepth);
 }
 
-unsigned encode(std::vector<unsigned char>& out,
+unsigned encode(vector<unsigned char>& out,
                 const unsigned char* in, unsigned w, unsigned h,
                 State& state) {
   unsigned char* buffer;
@@ -6443,25 +6444,25 @@ unsigned encode(std::vector<unsigned char>& out,
   return error;
 }
 
-unsigned encode(std::vector<unsigned char>& out,
-                const std::vector<unsigned char>& in, unsigned w, unsigned h,
+unsigned encode(vector<unsigned char>& out,
+                const vector<unsigned char>& in, unsigned w, unsigned h,
                 State& state) {
   if(lodepng_get_raw_size(w, h, &state.info_raw) > in.size()) return 84;
   return encode(out, in.empty() ? 0 : &in[0], w, h, state);
 }
 
 #ifdef LODEPNG_COMPILE_DISK
-unsigned encode(const std::string& filename,
+unsigned encode(const string& filename,
                 const unsigned char* in, unsigned w, unsigned h,
                 LodePNGColorType colortype, unsigned bitdepth) {
-  std::vector<unsigned char> buffer;
+  vector<unsigned char> buffer;
   unsigned error = encode(buffer, in, w, h, colortype, bitdepth);
   if(!error) error = save_file(buffer, filename);
   return error;
 }
 
-unsigned encode(const std::string& filename,
-                const std::vector<unsigned char>& in, unsigned w, unsigned h,
+unsigned encode(const string& filename,
+                const vector<unsigned char>& in, unsigned w, unsigned h,
                 LodePNGColorType colortype, unsigned bitdepth) {
   if(lodepng_get_raw_size_lct(w, h, colortype, bitdepth) > in.size()) return 84;
   return encode(filename, in.empty() ? 0 : &in[0], w, h, colortype, bitdepth);
diff --git a/libkram/lodepng/lodepng.h b/libkram/lodepng/lodepng.h
index 6801cb78..3c0d571d 100644
--- a/libkram/lodepng/lodepng.h
+++ b/libkram/lodepng/lodepng.h
@@ -28,6 +28,8 @@ freely, subject to the following restrictions:
 
 #include <string.h> /*for size_t*/
 
+using namespace NAMESPACE_STL;
+
 extern const char* LODEPNG_VERSION_STRING;
 
 /*
@@ -90,8 +92,8 @@ source files with custom allocators.*/
 #endif
 
 #ifdef LODEPNG_COMPILE_CPP
-#include <vector>
-#include <string>
+//#include <vector>
+//#include <string>
 #endif /*LODEPNG_COMPILE_CPP*/
 
 #ifdef LODEPNG_COMPILE_PNG
@@ -211,33 +213,33 @@ unsigned lodepng_encode24_file(const char* filename,
 #ifdef LODEPNG_COMPILE_CPP
 namespace lodepng {
 #ifdef LODEPNG_COMPILE_DECODER
-/*Same as lodepng_decode_memory, but decodes to an std::vector. The colortype
+/*Same as lodepng_decode_memory, but decodes to an vector. The colortype
 is the format to output the pixels to. Default is RGBA 8-bit per channel.*/
-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+unsigned decode(vector<unsigned char>& out, unsigned& w, unsigned& h,
                 const unsigned char* in, size_t insize,
                 LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
-                const std::vector<unsigned char>& in,
+unsigned decode(vector<unsigned char>& out, unsigned& w, unsigned& h,
+                const vector<unsigned char>& in,
                 LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
 #ifdef LODEPNG_COMPILE_DISK
 /*
 Converts PNG file from disk to raw pixel data in memory.
 Same as the other decode functions, but instead takes a filename as input.
 */
-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
-                const std::string& filename,
+unsigned decode(vector<unsigned char>& out, unsigned& w, unsigned& h,
+                const string& filename,
                 LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
 #endif /* LODEPNG_COMPILE_DISK */
 #endif /* LODEPNG_COMPILE_DECODER */
 
 #ifdef LODEPNG_COMPILE_ENCODER
-/*Same as lodepng_encode_memory, but encodes to an std::vector. colortype
+/*Same as lodepng_encode_memory, but encodes to an vector. colortype
 is that of the raw input data. The output PNG color type will be auto chosen.*/
-unsigned encode(std::vector<unsigned char>& out,
+unsigned encode(vector<unsigned char>& out,
                 const unsigned char* in, unsigned w, unsigned h,
                 LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
-unsigned encode(std::vector<unsigned char>& out,
-                const std::vector<unsigned char>& in, unsigned w, unsigned h,
+unsigned encode(vector<unsigned char>& out,
+                const vector<unsigned char>& in, unsigned w, unsigned h,
                 LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
 #ifdef LODEPNG_COMPILE_DISK
 /*
@@ -245,11 +247,11 @@ Converts 32-bit RGBA raw pixel data into a PNG file on disk.
 Same as the other encode functions, but instead takes a filename as output.
 NOTE: This overwrites existing files without warning!
 */
-unsigned encode(const std::string& filename,
+unsigned encode(const string& filename,
                 const unsigned char* in, unsigned w, unsigned h,
                 LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
-unsigned encode(const std::string& filename,
-                const std::vector<unsigned char>& in, unsigned w, unsigned h,
+unsigned encode(const string& filename,
+                const vector<unsigned char>& in, unsigned w, unsigned h,
                 LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
 #endif /* LODEPNG_COMPILE_DISK */
 #endif /* LODEPNG_COMPILE_ENCODER */
@@ -991,7 +993,7 @@ unsigned lodepng_save_file(const unsigned char* buffer, size_t buffersize, const
 #endif /*LODEPNG_COMPILE_DISK*/
 
 #ifdef LODEPNG_COMPILE_CPP
-/* The LodePNG C++ wrapper uses std::vectors instead of manually allocated memory buffers. */
+/* The LodePNG C++ wrapper uses vectors instead of manually allocated memory buffers. */
 namespace lodepng {
 #ifdef LODEPNG_COMPILE_PNG
 class State : public LodePNGState {
@@ -1004,57 +1006,57 @@ class State : public LodePNGState {
 
 #ifdef LODEPNG_COMPILE_DECODER
 /* Same as other lodepng::decode, but using a State for more settings and information. */
-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+unsigned decode(vector<unsigned char>& out, unsigned& w, unsigned& h,
                 State& state,
                 const unsigned char* in, size_t insize);
-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+unsigned decode(vector<unsigned char>& out, unsigned& w, unsigned& h,
                 State& state,
-                const std::vector<unsigned char>& in);
+                const vector<unsigned char>& in);
 #endif /*LODEPNG_COMPILE_DECODER*/
 
 #ifdef LODEPNG_COMPILE_ENCODER
 /* Same as other lodepng::encode, but using a State for more settings and information. */
-unsigned encode(std::vector<unsigned char>& out,
+unsigned encode(vector<unsigned char>& out,
                 const unsigned char* in, unsigned w, unsigned h,
                 State& state);
-unsigned encode(std::vector<unsigned char>& out,
-                const std::vector<unsigned char>& in, unsigned w, unsigned h,
+unsigned encode(vector<unsigned char>& out,
+                const vector<unsigned char>& in, unsigned w, unsigned h,
                 State& state);
 #endif /*LODEPNG_COMPILE_ENCODER*/
 
 #ifdef LODEPNG_COMPILE_DISK
 /*
-Load a file from disk into an std::vector.
+Load a file from disk into an vector.
 return value: error code (0 means ok)
 */
-unsigned load_file(std::vector<unsigned char>& buffer, const std::string& filename);
+unsigned load_file(vector<unsigned char>& buffer, const string& filename);
 
 /*
-Save the binary data in an std::vector to a file on disk. The file is overwritten
+Save the binary data in an vector to a file on disk. The file is overwritten
 without warning.
 */
-unsigned save_file(const std::vector<unsigned char>& buffer, const std::string& filename);
+unsigned save_file(const vector<unsigned char>& buffer, const string& filename);
 #endif /* LODEPNG_COMPILE_DISK */
 #endif /* LODEPNG_COMPILE_PNG */
 
 #ifdef LODEPNG_COMPILE_ZLIB
 #ifdef LODEPNG_COMPILE_DECODER
 /* Zlib-decompress an unsigned char buffer */
-unsigned decompress(std::vector<unsigned char>& out, const unsigned char* in, size_t insize,
+unsigned decompress(vector<unsigned char>& out, const unsigned char* in, size_t insize,
                     const LodePNGDecompressSettings& settings = lodepng_default_decompress_settings);
 
-/* Zlib-decompress an std::vector */
-unsigned decompress(std::vector<unsigned char>& out, const std::vector<unsigned char>& in,
+/* Zlib-decompress an vector */
+unsigned decompress(vector<unsigned char>& out, const vector<unsigned char>& in,
                     const LodePNGDecompressSettings& settings = lodepng_default_decompress_settings);
 #endif /* LODEPNG_COMPILE_DECODER */
 
 #ifdef LODEPNG_COMPILE_ENCODER
 /* Zlib-compress an unsigned char buffer */
-unsigned compress(std::vector<unsigned char>& out, const unsigned char* in, size_t insize,
+unsigned compress(vector<unsigned char>& out, const unsigned char* in, size_t insize,
                   const LodePNGCompressSettings& settings = lodepng_default_compress_settings);
 
-/* Zlib-compress an std::vector */
-unsigned compress(std::vector<unsigned char>& out, const std::vector<unsigned char>& in,
+/* Zlib-compress an vector */
+unsigned compress(vector<unsigned char>& out, const vector<unsigned char>& in,
                   const LodePNGCompressSettings& settings = lodepng_default_compress_settings);
 #endif /* LODEPNG_COMPILE_ENCODER */
 #endif /* LODEPNG_COMPILE_ZLIB */
@@ -1215,7 +1217,7 @@ The C version uses buffers allocated with alloc that you need to free()
 yourself. You need to use init and cleanup functions for each struct whenever
 using a struct from the C version to avoid exploits and memory leaks.
 
-The C++ version has extra functions with std::vectors in the interface and the
+The C++ version has extra functions with vectors in the interface and the
 lodepng::State class which is a LodePNGState with constructor and destructor.
 
 These files work without modification for both C and C++ compilers because all
@@ -1657,7 +1659,7 @@ encoder and decoder, this makes a large difference.
 
 Make sure that LodePNG is compiled with the same compiler of the same version
 and with the same settings as the rest of the program, or the interfaces with
-std::vectors and std::strings in C++ can be incompatible.
+vectors and strings in C++ can be incompatible.
 
 CHAR_BITS must be 8 or higher, because LodePNG uses unsigned chars for octets.
 
@@ -1719,12 +1721,12 @@ int main(int argc, char *argv[]) {
   const char* filename = argc > 1 ? argv[1] : "test.png";
 
   //load and decode
-  std::vector<unsigned char> image;
+  vector<unsigned char> image;
   unsigned width, height;
   unsigned error = lodepng::decode(image, width, height, filename);
 
   //if there's an error, display it
-  if(error) std::cout << "decoder error " << error << ": " << lodepng_error_text(error) << std::endl;
+  if(error) cout << "decoder error " << error << ": " << lodepng_error_text(error) << endl;
 
   //the pixels are now in the vector "image", 4 bytes per pixel, ordered RGBARGBA..., use it as texture, draw it, ...
 }
@@ -1903,7 +1905,7 @@ Not all changes are listed here, the commit history in github lists more:
 *) 21 jul 2007: deflate code placed in new namespace separate from zlib code
 *) 08 jun 2007: fixed bug with 2- and 4-bit color, and small interlaced images
 *) 04 jun 2007: improved support for Visual Studio 2005: crash with accessing
-    invalid std::vector element [0] fixed, and level 3 and 4 warnings removed
+    invalid vector element [0] fixed, and level 3 and 4 warnings removed
 *) 02 jun 2007: made the encoder add a tag with version by default
 *) 27 may 2007: zlib and png code separated (but still in the same file),
     simple encoder/decoder functions added for more simple usage cases
@@ -1957,7 +1959,7 @@ Not all changes are listed here, the commit history in github lists more:
     in LodePNG namespace. Changed the order of the parameters. Rewrote the
     documentation in the header. Renamed files to lodepng.cpp and lodepng.h
 *) 22 apr 2006: Optimized and improved some code
-*) 07 sep 2005: (!) Changed to std::vector interface
+*) 07 sep 2005: (!) Changed to vector interface
 *) 12 aug 2005: Initial release (C++, decoder only)
 
 
diff --git a/libkram/squish/maths.cpp b/libkram/squish/maths.cpp
index 28d97326..8b4d60b2 100644
--- a/libkram/squish/maths.cpp
+++ b/libkram/squish/maths.cpp
@@ -34,7 +34,7 @@
 #include <algorithm>
 
 namespace squish {
-using namespace std;
+using namespace NAMESPACE_STL;
 
 Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weights )
 {
diff --git a/plugin/kps/KPS.cpp b/plugin/kps/KPS.cpp
index bf6c5e55..c54e07e7 100755
--- a/plugin/kps/KPS.cpp
+++ b/plugin/kps/KPS.cpp
@@ -52,7 +52,7 @@
 #include <stdio.h>
 #include <assert.h>
 
-#include <vector>
+//#include <vector>
 
 #ifdef __PIMac__
 #include <mach/mach.h>
@@ -87,6 +87,7 @@ extern MyMTLPixelFormat FormatToPixelFormat(DDS_Format fmt);
 // global needed by a bunch of Photoshop SDK routines
 SPBasicSuite *sSPBasic = NULL;
 
+using namespace NAMESPACE_STL;
 
 const char* kBundleIdentifier = "com.ba.kram-ps";
 
@@ -534,7 +535,7 @@ static void DoReadContinue(GlobalsPtr globals)
     uint64_t size = stream.size();
     
     // read entire ktx/2 into memory (ideally mmap it)
-    std::vector<uint8_t> data;
+    vector<uint8_t> data;
     data.resize(size);
     
     if (!stream.read(data.data(), data.size())) {
@@ -899,7 +900,7 @@ static void DoWriteStart(GlobalsPtr globals)
     int height = gStuff->imageSize32.v;
 	
     // this is a potentiall large memory allocation for one level of the image
-    std::vector<uint8_t> pixels;
+    vector<uint8_t> pixels;
     if (!CopyImageRectFromPS(globals, pixels, numPlanes, width, height)) {
         //return;
     }

From 1d9d571e8d2dfe12a82824e11d07f5edc35b459c Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 6 Aug 2021 00:31:33 -0700
Subject: [PATCH 142/901] Kram - add basis transcoder

This isn't connected to anything yet.
---
 libkram/transcoder/basisu.h                   |   475 +
 libkram/transcoder/basisu_containers.h        |  1918 ++
 libkram/transcoder/basisu_containers_impl.h   |   311 +
 libkram/transcoder/basisu_file_headers.h      |   142 +
 .../transcoder/basisu_global_selector_cb.h    |   272 +
 .../basisu_global_selector_palette.h          |   675 +
 libkram/transcoder/basisu_transcoder.cpp      | 17604 ++++++++++++++++
 libkram/transcoder/basisu_transcoder.h        |   941 +
 .../transcoder/basisu_transcoder_internal.h   |   794 +
 .../basisu_transcoder_tables_astc.inc         |   481 +
 .../basisu_transcoder_tables_astc_0_255.inc   |   481 +
 .../basisu_transcoder_tables_atc_55.inc       |   481 +
 .../basisu_transcoder_tables_atc_56.inc       |   481 +
 .../basisu_transcoder_tables_bc7_m5_alpha.inc |    49 +
 .../basisu_transcoder_tables_bc7_m5_color.inc |   481 +
 .../basisu_transcoder_tables_dxt1_5.inc       |   494 +
 .../basisu_transcoder_tables_dxt1_6.inc       |   494 +
 .../basisu_transcoder_tables_pvrtc2_45.inc    |   481 +
 ...sisu_transcoder_tables_pvrtc2_alpha_33.inc |   481 +
 libkram/transcoder/basisu_transcoder_uastc.h  |   297 +
 20 files changed, 27833 insertions(+)
 create mode 100644 libkram/transcoder/basisu.h
 create mode 100644 libkram/transcoder/basisu_containers.h
 create mode 100644 libkram/transcoder/basisu_containers_impl.h
 create mode 100644 libkram/transcoder/basisu_file_headers.h
 create mode 100644 libkram/transcoder/basisu_global_selector_cb.h
 create mode 100644 libkram/transcoder/basisu_global_selector_palette.h
 create mode 100644 libkram/transcoder/basisu_transcoder.cpp
 create mode 100644 libkram/transcoder/basisu_transcoder.h
 create mode 100644 libkram/transcoder/basisu_transcoder_internal.h
 create mode 100644 libkram/transcoder/basisu_transcoder_tables_astc.inc
 create mode 100644 libkram/transcoder/basisu_transcoder_tables_astc_0_255.inc
 create mode 100644 libkram/transcoder/basisu_transcoder_tables_atc_55.inc
 create mode 100644 libkram/transcoder/basisu_transcoder_tables_atc_56.inc
 create mode 100644 libkram/transcoder/basisu_transcoder_tables_bc7_m5_alpha.inc
 create mode 100644 libkram/transcoder/basisu_transcoder_tables_bc7_m5_color.inc
 create mode 100644 libkram/transcoder/basisu_transcoder_tables_dxt1_5.inc
 create mode 100644 libkram/transcoder/basisu_transcoder_tables_dxt1_6.inc
 create mode 100644 libkram/transcoder/basisu_transcoder_tables_pvrtc2_45.inc
 create mode 100644 libkram/transcoder/basisu_transcoder_tables_pvrtc2_alpha_33.inc
 create mode 100644 libkram/transcoder/basisu_transcoder_uastc.h

diff --git a/libkram/transcoder/basisu.h b/libkram/transcoder/basisu.h
new file mode 100644
index 00000000..4489f79c
--- /dev/null
+++ b/libkram/transcoder/basisu.h
@@ -0,0 +1,475 @@
+// basisu.h
+// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+// Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#ifdef _MSC_VER
+
+	#pragma warning (disable : 4201)
+	#pragma warning (disable : 4127) // warning C4127: conditional expression is constant
+	#pragma warning (disable : 4530) // C++ exception handler used, but unwind semantics are not enabled.
+
+	#ifndef BASISU_NO_ITERATOR_DEBUG_LEVEL
+		//#define _HAS_ITERATOR_DEBUGGING 0
+
+		#if defined(_DEBUG) || defined(DEBUG)
+			// This is madness, but we need to disable iterator debugging in debug builds or the encoder is unsable because MSVC's iterator debugging implementation is totally broken.
+			#ifndef _ITERATOR_DEBUG_LEVEL
+			#define _ITERATOR_DEBUG_LEVEL 1
+			#endif
+			#ifndef _SECURE_SCL
+			#define _SECURE_SCL 1
+			#endif
+		#else // defined(_DEBUG) || defined(DEBUG)
+			#ifndef _SECURE_SCL
+			#define _SECURE_SCL 0
+			#endif
+			#ifndef _ITERATOR_DEBUG_LEVEL
+			#define _ITERATOR_DEBUG_LEVEL 0
+			#endif
+		#endif // defined(_DEBUG) || defined(DEBUG)
+
+	#endif // BASISU_NO_ITERATOR_DEBUG_LEVEL
+
+#endif // _MSC_VER
+#if !__clang__ && __GNUC__ // Grumble clang grumble
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wclass-memaccess"
+#endif // __GNUC__
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <stdarg.h>
+#include <string.h>
+#include <memory.h>
+#include <limits.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <limits>
+#include <functional>
+#include <iterator>
+#include <type_traits>
+#include <assert.h>
+#include <random>
+
+#include "basisu_containers.h"
+
+#ifdef max
+#undef max
+#endif
+
+#ifdef min
+#undef min
+#endif
+
+#ifdef _WIN32
+#define strcasecmp _stricmp
+#endif
+
+// Set to one to enable debug printf()'s when any errors occur, for development/debugging. Especially useful for WebGL development.
+#ifndef BASISU_FORCE_DEVEL_MESSAGES
+#define BASISU_FORCE_DEVEL_MESSAGES 0
+#endif
+
+#define BASISU_NOTE_UNUSED(x) (void)(x)
+#define BASISU_ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
+#define BASISU_NO_EQUALS_OR_COPY_CONSTRUCT(x) x(const x &) = delete; x& operator= (const x &) = delete;
+#define BASISU_ASSUME(x) static_assert(x, #x);
+#define BASISU_OFFSETOF(s, m) offsetof(s, m)
+#define BASISU_STRINGIZE(x) #x
+#define BASISU_STRINGIZE2(x) BASISU_STRINGIZE(x)
+
+#if BASISU_FORCE_DEVEL_MESSAGES
+	#define BASISU_DEVEL_ERROR(...) do { basisu::debug_printf(__VA_ARGS__); } while(0)
+#else
+	#define BASISU_DEVEL_ERROR(...)
+#endif
+
+namespace basisu
+{
+	// Types/utilities
+
+#ifdef _WIN32
+	const char BASISU_PATH_SEPERATOR_CHAR = '\\';
+#else
+	const char BASISU_PATH_SEPERATOR_CHAR = '/';
+#endif
+
+	typedef basisu::vector<uint8_t> uint8_vec;
+	typedef basisu::vector<int16_t> int16_vec;
+	typedef basisu::vector<uint16_t> uint16_vec;
+	typedef basisu::vector<uint32_t> uint_vec;
+	typedef basisu::vector<uint64_t> uint64_vec;
+	typedef basisu::vector<int> int_vec;
+	typedef basisu::vector<bool> bool_vec;
+
+	void enable_debug_printf(bool enabled);
+	void debug_printf(const char *pFmt, ...);
+		
+
+	template <typename T> inline void clear_obj(T& obj) { memset(&obj, 0, sizeof(obj)); }
+
+	template <typename T0, typename T1> inline T0 lerp(T0 a, T0 b, T1 c) { return a + (b - a) * c; }
+
+	template <typename S> inline S maximum(S a, S b) { return (a > b) ? a : b; }
+	template <typename S> inline S maximum(S a, S b, S c) { return maximum(maximum(a, b), c); }
+	template <typename S> inline S maximum(S a, S b, S c, S d) { return maximum(maximum(maximum(a, b), c), d); }
+	
+	template <typename S> inline S minimum(S a, S b) {	return (a < b) ? a : b; }
+	template <typename S> inline S minimum(S a, S b, S c) {	return minimum(minimum(a, b), c); }
+	template <typename S> inline S minimum(S a, S b, S c, S d) { return minimum(minimum(minimum(a, b), c), d); }
+
+	inline float clampf(float value, float low, float high) { if (value < low) value = low; else if (value > high) value = high;	return value; }
+	inline float saturate(float value) { return clampf(value, 0, 1.0f); }
+	inline uint8_t minimumub(uint8_t a, uint8_t b) { return (a < b) ? a : b; }
+	inline uint32_t minimumu(uint32_t a, uint32_t b) { return (a < b) ? a : b; }
+	inline int32_t minimumi(int32_t a, int32_t b) { return (a < b) ? a : b; }
+	inline float minimumf(float a, float b) { return (a < b) ? a : b; }
+	inline uint8_t maximumub(uint8_t a, uint8_t b) { return (a > b) ? a : b; }
+	inline uint32_t maximumu(uint32_t a, uint32_t b) { return (a > b) ? a : b; }
+	inline int32_t maximumi(int32_t a, int32_t b) { return (a > b) ? a : b; }
+	inline float maximumf(float a, float b) { return (a > b) ? a : b; }
+	inline int squarei(int i) { return i * i; }
+	inline float squaref(float i) { return i * i; }
+	template<typename T> inline T square(T a) { return a * a; }
+
+	template <typename S> inline S clamp(S value, S low, S high) { return (value < low) ? low : ((value > high) ? high : value); }
+
+	inline uint32_t iabs(int32_t i) { return (i < 0) ? static_cast<uint32_t>(-i) : static_cast<uint32_t>(i);	}
+	inline uint64_t iabs64(int64_t i) {	return (i < 0) ? static_cast<uint64_t>(-i) : static_cast<uint64_t>(i); }
+
+	template<typename T> inline void clear_vector(T &vec) { vec.erase(vec.begin(), vec.end()); }		
+	template<typename T> inline typename T::value_type *enlarge_vector(T &vec, size_t n) { size_t cs = vec.size(); vec.resize(cs + n); return &vec[cs]; }
+
+	inline bool is_pow2(uint32_t x) { return x && ((x & (x - 1U)) == 0U); }
+	inline bool is_pow2(uint64_t x) { return x && ((x & (x - 1U)) == 0U); }
+
+	template<typename T> inline T open_range_check(T v, T minv, T maxv) { assert(v >= minv && v < maxv); BASISU_NOTE_UNUSED(minv); BASISU_NOTE_UNUSED(maxv); return v; }
+	template<typename T> inline T open_range_check(T v, T maxv) { assert(v < maxv); BASISU_NOTE_UNUSED(maxv); return v; }
+
+	inline uint32_t total_bits(uint32_t v) { uint32_t l = 0; for ( ; v > 0U; ++l) v >>= 1; return l; }
+
+	template<typename T> inline T saturate(T val) { return clamp(val, 0.0f, 1.0f); }
+
+	template<typename T, typename R> inline void append_vector(T &vec, const R *pObjs, size_t n) 
+	{ 
+		if (n)
+		{
+			const size_t cur_s = vec.size();
+			vec.resize(cur_s + n);
+			memcpy(&vec[cur_s], pObjs, sizeof(R) * n);
+		}
+	}
+
+	template<typename T> inline void append_vector(T &vec, const T &other_vec)
+	{
+		if (other_vec.size())
+			append_vector(vec, &other_vec[0], other_vec.size());
+	}
+
+	template<typename T> inline void vector_ensure_element_is_valid(T &vec, size_t idx)
+	{
+		if (idx >= vec.size())
+			vec.resize(idx + 1);
+	}
+
+	template<typename T> inline void vector_sort(T &vec)
+	{
+		if (vec.size())
+			std::sort(vec.begin(), vec.end());
+	}
+
+	template<typename T, typename U> inline bool unordered_set_contains(T& set, const U&obj)
+	{
+		return set.find(obj) != set.end();
+	}
+
+	template<typename T> int vector_find(const T &vec, const typename T::value_type &obj)
+	{
+		assert(vec.size() <= INT_MAX);
+		for (size_t i = 0; i < vec.size(); i++)
+			if (vec[i] == obj)
+				return static_cast<int>(i);
+		return -1;
+	}
+
+	template<typename T> void vector_set_all(T &vec, const typename T::value_type &obj)
+	{
+		for (size_t i = 0; i < vec.size(); i++)
+			vec[i] = obj;
+	}
+		
+	inline uint64_t read_be64(const void *p)
+	{
+		uint64_t val = 0;
+		for (uint32_t i = 0; i < 8; i++)
+			val |= (static_cast<uint64_t>(static_cast<const uint8_t *>(p)[7 - i]) << (i * 8));
+		return val;
+	}
+
+	inline void write_be64(void *p, uint64_t x)
+	{
+		for (uint32_t i = 0; i < 8; i++)
+			static_cast<uint8_t *>(p)[7 - i] = static_cast<uint8_t>(x >> (i * 8));
+	}
+
+	static inline uint16_t byteswap16(uint16_t x) { return static_cast<uint16_t>((x << 8) | (x >> 8)); }
+	static inline uint32_t byteswap32(uint32_t x) { return ((x << 24) | ((x << 8) & 0x00FF0000) | ((x >> 8) & 0x0000FF00) | (x >> 24)); }
+
+	inline uint32_t floor_log2i(uint32_t v)
+	{
+		uint32_t b = 0;
+		for (; v > 1U; ++b)
+			v >>= 1;
+		return b;
+	}
+
+	inline uint32_t ceil_log2i(uint32_t v)
+	{
+		uint32_t b = floor_log2i(v);
+		if ((b != 32) && (v > (1U << b)))
+			++b;
+		return b;
+	}
+
+	inline int posmod(int x, int y)
+	{
+		if (x >= 0)
+			return (x < y) ? x : (x % y);
+		int m = (-x) % y;
+		return (m != 0) ? (y - m) : m;
+	}
+
+	inline bool do_excl_ranges_overlap(int la, int ha, int lb, int hb)
+	{
+		assert(la < ha && lb < hb);
+		if ((ha <= lb) || (la >= hb)) return false;
+		return true;
+	}
+
+	static inline uint32_t read_le_dword(const uint8_t *pBytes)
+	{
+		return (pBytes[3] << 24U) | (pBytes[2] << 16U) | (pBytes[1] << 8U) | (pBytes[0]);
+	}
+
+	static inline void write_le_dword(uint8_t* pBytes, uint32_t val)
+	{
+		pBytes[0] = (uint8_t)val;
+		pBytes[1] = (uint8_t)(val >> 8U);
+		pBytes[2] = (uint8_t)(val >> 16U);
+		pBytes[3] = (uint8_t)(val >> 24U);
+	}
+		
+	// Always little endian 1-8 byte unsigned int
+	template<uint32_t NumBytes>
+	struct packed_uint
+	{
+		uint8_t m_bytes[NumBytes];
+
+		inline packed_uint() { static_assert(NumBytes <= sizeof(uint64_t), "Invalid NumBytes"); }
+		inline packed_uint(uint64_t v) { *this = v; }
+		inline packed_uint(const packed_uint& other) { *this = other; }
+						
+		inline packed_uint& operator= (uint64_t v) 
+		{ 
+			for (uint32_t i = 0; i < NumBytes; i++) 
+				m_bytes[i] = static_cast<uint8_t>(v >> (i * 8)); 
+			return *this; 
+		}
+
+		inline packed_uint& operator= (const packed_uint& rhs) 
+		{ 
+			memcpy(m_bytes, rhs.m_bytes, sizeof(m_bytes)); 
+			return *this;
+		}
+
+		inline operator uint32_t() const
+		{
+			switch (NumBytes)
+			{
+				case 1:  
+				{
+					return  m_bytes[0];
+				}
+				case 2:  
+				{
+					return (m_bytes[1] << 8U) | m_bytes[0];
+				}
+				case 3:  
+				{
+					return (m_bytes[2] << 16U) | (m_bytes[1] << 8U) | m_bytes[0];
+				}
+				case 4:  
+				{
+					return read_le_dword(m_bytes);
+				}
+				case 5:
+				{
+					uint32_t l = read_le_dword(m_bytes);
+					uint32_t h = m_bytes[4];
+					return static_cast<uint64_t>(l) | (static_cast<uint64_t>(h) << 32U);
+				}
+				case 6:
+				{
+					uint32_t l = read_le_dword(m_bytes);
+					uint32_t h = (m_bytes[5] << 8U) | m_bytes[4];
+					return static_cast<uint64_t>(l) | (static_cast<uint64_t>(h) << 32U);
+				}
+				case 7:
+				{
+					uint32_t l = read_le_dword(m_bytes);
+					uint32_t h = (m_bytes[6] << 16U) | (m_bytes[5] << 8U) | m_bytes[4];
+					return static_cast<uint64_t>(l) | (static_cast<uint64_t>(h) << 32U);
+				}
+				case 8:  
+				{
+					uint32_t l = read_le_dword(m_bytes);
+					uint32_t h = read_le_dword(m_bytes + 4);
+					return static_cast<uint64_t>(l) | (static_cast<uint64_t>(h) << 32U);
+				}
+				default: 
+				{
+					assert(0);
+					return 0;
+				}
+			}
+		}
+	};
+
+	enum eZero { cZero };
+	enum eNoClamp { cNoClamp };
+	
+	// Rice/Huffman entropy coding
+		
+	// This is basically Deflate-style canonical Huffman, except we allow for a lot more symbols.
+	enum
+	{
+		cHuffmanMaxSupportedCodeSize = 16, cHuffmanMaxSupportedInternalCodeSize = 31, 
+		cHuffmanFastLookupBits = 10, 
+		cHuffmanMaxSymsLog2 = 14, cHuffmanMaxSyms = 1 << cHuffmanMaxSymsLog2,
+
+		// Small zero runs
+		cHuffmanSmallZeroRunSizeMin = 3, cHuffmanSmallZeroRunSizeMax = 10, cHuffmanSmallZeroRunExtraBits = 3,
+
+		// Big zero run
+		cHuffmanBigZeroRunSizeMin = 11, cHuffmanBigZeroRunSizeMax = 138, cHuffmanBigZeroRunExtraBits = 7,
+
+		// Small non-zero run
+		cHuffmanSmallRepeatSizeMin = 3, cHuffmanSmallRepeatSizeMax = 6, cHuffmanSmallRepeatExtraBits = 2,
+
+		// Big non-zero run
+		cHuffmanBigRepeatSizeMin = 7, cHuffmanBigRepeatSizeMax = 134, cHuffmanBigRepeatExtraBits = 7,
+
+		cHuffmanTotalCodelengthCodes = 21, cHuffmanSmallZeroRunCode = 17, cHuffmanBigZeroRunCode = 18, cHuffmanSmallRepeatCode = 19, cHuffmanBigRepeatCode = 20
+	};
+
+	static const uint8_t g_huffman_sorted_codelength_codes[] = { cHuffmanSmallZeroRunCode, cHuffmanBigZeroRunCode,	cHuffmanSmallRepeatCode, cHuffmanBigRepeatCode, 0, 8, 7, 9, 6, 0xA, 5, 0xB, 4, 0xC, 3, 0xD, 2, 0xE, 1, 0xF, 0x10 };
+	const uint32_t cHuffmanTotalSortedCodelengthCodes = sizeof(g_huffman_sorted_codelength_codes) / sizeof(g_huffman_sorted_codelength_codes[0]);
+
+	// GPU texture formats
+
+	enum class texture_format
+	{
+		cInvalidTextureFormat = -1,
+		
+		// Block-based formats
+		cETC1,			// ETC1
+		cETC1S,			// ETC1 (subset: diff colors only, no subblocks)
+		cETC2_RGB,		// ETC2 color block (basisu doesn't support ETC2 planar/T/H modes - just basic ETC1)
+		cETC2_RGBA,		// ETC2 EAC alpha block followed by ETC2 color block
+		cETC2_ALPHA,	// ETC2 EAC alpha block 
+		cBC1,				// DXT1
+		cBC3,				// DXT5 (BC4/DXT5A block followed by a BC1/DXT1 block)
+		cBC4,				// DXT5A
+		cBC5,				// 3DC/DXN (two BC4/DXT5A blocks)
+		cBC7,
+		cASTC4x4,		// LDR only
+		cPVRTC1_4_RGB,
+		cPVRTC1_4_RGBA,
+		cATC_RGB,
+		cATC_RGBA_INTERPOLATED_ALPHA,
+		cFXT1_RGB,
+		cPVRTC2_4_RGBA,
+		cETC2_R11_EAC,
+		cETC2_RG11_EAC,
+		cUASTC4x4,		
+		cBC1_NV,
+		cBC1_AMD,
+		
+		// Uncompressed/raw pixels
+		cRGBA32,
+		cRGB565,
+		cBGR565,
+		cRGBA4444,
+		cABGR4444
+	};
+
+	inline uint32_t get_bytes_per_block(texture_format fmt)
+	{
+		switch (fmt)
+		{
+		case texture_format::cETC1:
+		case texture_format::cETC1S:
+		case texture_format::cETC2_RGB:
+		case texture_format::cETC2_ALPHA:
+		case texture_format::cBC1:
+		case texture_format::cBC1_NV:
+		case texture_format::cBC1_AMD:
+		case texture_format::cBC4:
+		case texture_format::cPVRTC1_4_RGB:
+		case texture_format::cPVRTC1_4_RGBA:
+		case texture_format::cATC_RGB:
+		case texture_format::cPVRTC2_4_RGBA:
+		case texture_format::cETC2_R11_EAC:
+			return 8;
+		case texture_format::cRGBA32:
+			return sizeof(uint32_t) * 16;
+		default:
+			break;
+		}
+		return 16;
+	}
+
+	inline uint32_t get_qwords_per_block(texture_format fmt)
+	{
+		return get_bytes_per_block(fmt) >> 3;
+	}
+
+	inline uint32_t get_block_width(texture_format fmt)
+	{
+		BASISU_NOTE_UNUSED(fmt);
+		switch (fmt)
+		{
+		case texture_format::cFXT1_RGB:
+			return 8;
+		default:
+			break;
+		}
+		return 4;
+	}
+
+	inline uint32_t get_block_height(texture_format fmt)
+	{
+		BASISU_NOTE_UNUSED(fmt);
+		return 4;
+	}
+							
+} // namespace basisu
+#if !__clang__ && __GNUC__
+
+#pragma GCC diagnostic pop
+#endif
diff --git a/libkram/transcoder/basisu_containers.h b/libkram/transcoder/basisu_containers.h
new file mode 100644
index 00000000..0359c9f8
--- /dev/null
+++ b/libkram/transcoder/basisu_containers.h
@@ -0,0 +1,1918 @@
+// basisu_containers.h
+#pragma once
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <assert.h>
+#include <algorithm>
+
+#if !__clang__ && __GNUC__ // Grumble clang grumble
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-value"
+#pragma GCC diagnostic ignored "-Wclass-memaccess"
+#endif
+
+#if defined(__linux__) && !defined(ANDROID)
+// Only for malloc_usable_size() in basisu_containers_impl.h
+#include <malloc.h>
+#define HAS_MALLOC_USABLE_SIZE 1
+#endif
+
+#ifdef _MSC_VER
+#define BASISU_FORCE_INLINE __forceinline
+#else
+#define BASISU_FORCE_INLINE inline
+#endif
+
+namespace basisu
+{
+   enum { cInvalidIndex = -1 };
+
+   namespace helpers
+   {
+      inline bool is_power_of_2(uint32_t x) { return x && ((x & (x - 1U)) == 0U); }
+      inline bool is_power_of_2(uint64_t x) { return x && ((x & (x - 1U)) == 0U); }
+      template<class T> const T& minimum(const T& a, const T& b) { return (b < a) ? b : a; }
+      template<class T> const T& maximum(const T& a, const T& b) { return (a < b) ? b : a; }
+
+      inline uint32_t floor_log2i(uint32_t v)
+      {
+         uint32_t l = 0;
+         while (v > 1U)
+         {
+            v >>= 1;
+            l++;
+         }
+         return l;
+      }
+
+      inline uint32_t next_pow2(uint32_t val)
+      {
+         val--;
+         val |= val >> 16;
+         val |= val >> 8;
+         val |= val >> 4;
+         val |= val >> 2;
+         val |= val >> 1;
+         return val + 1;
+      }
+
+      inline uint64_t next_pow2(uint64_t val)
+      {
+         val--;
+         val |= val >> 32;
+         val |= val >> 16;
+         val |= val >> 8;
+         val |= val >> 4;
+         val |= val >> 2;
+         val |= val >> 1;
+         return val + 1;
+      }
+   } // namespace helpers
+
+   template <typename T>
+   inline T* construct(T* p)
+   {
+      return new (static_cast<void*>(p)) T;
+   }
+
+   template <typename T, typename U>
+   inline T* construct(T* p, const U& init)
+   {
+      return new (static_cast<void*>(p)) T(init);
+   }
+
+   template <typename T>
+   inline void construct_array(T* p, size_t n)
+   {
+      T* q = p + n;
+      for (; p != q; ++p)
+         new (static_cast<void*>(p)) T;
+   }
+
+   template <typename T, typename U>
+   inline void construct_array(T* p, size_t n, const U& init)
+   {
+      T* q = p + n;
+      for (; p != q; ++p)
+         new (static_cast<void*>(p)) T(init);
+   }
+
+   template <typename T>
+   inline void destruct(T* p)
+   {
+      (void)p;
+      p->~T();
+   }
+
+   template <typename T> inline void destruct_array(T* p, size_t n)
+   {
+      T* q = p + n;
+      for (; p != q; ++p)
+         p->~T();
+   }
+
+   template<typename T> struct int_traits { enum { cMin = INT32_MIN, cMax = INT32_MAX, cSigned = true }; };
+
+   template<> struct int_traits<int8_t> { enum { cMin = INT8_MIN, cMax = INT8_MAX, cSigned = true }; };
+   template<> struct int_traits<int16_t> { enum { cMin = INT16_MIN, cMax = INT16_MAX, cSigned = true }; };
+   template<> struct int_traits<int32_t> { enum { cMin = INT32_MIN, cMax = INT32_MAX, cSigned = true }; };
+
+   template<> struct int_traits<uint8_t> { enum { cMin = 0, cMax = UINT8_MAX, cSigned = false }; };
+   template<> struct int_traits<uint16_t> { enum { cMin = 0, cMax = UINT16_MAX, cSigned = false }; };
+   template<> struct int_traits<uint32_t> { enum { cMin = 0, cMax = UINT32_MAX, cSigned = false }; };
+
+   template<typename T>
+   struct scalar_type
+   {
+      enum { cFlag = false };
+      static inline void construct(T* p) { basisu::construct(p); }
+      static inline void construct(T* p, const T& init) { basisu::construct(p, init); }
+      static inline void construct_array(T* p, size_t n) { basisu::construct_array(p, n); }
+      static inline void destruct(T* p) { basisu::destruct(p); }
+      static inline void destruct_array(T* p, size_t n) { basisu::destruct_array(p, n); }
+   };
+
+   template<typename T> struct scalar_type<T*>
+   {
+      enum { cFlag = true };
+      static inline void construct(T** p) { memset(p, 0, sizeof(T*)); }
+      static inline void construct(T** p, T* init) { *p = init; }
+      static inline void construct_array(T** p, size_t n) { memset(p, 0, sizeof(T*) * n); }
+      static inline void destruct(T** p) { p; }
+      static inline void destruct_array(T** p, size_t n) { p, n; }
+   };
+
+#define BASISU_DEFINE_BUILT_IN_TYPE(X) \
+   template<> struct scalar_type<X> { \
+   enum { cFlag = true }; \
+   static inline void construct(X* p) { memset(p, 0, sizeof(X)); } \
+   static inline void construct(X* p, const X& init) { memcpy(p, &init, sizeof(X)); } \
+   static inline void construct_array(X* p, size_t n) { memset(p, 0, sizeof(X) * n); } \
+   static inline void destruct(X* p) { p; } \
+   static inline void destruct_array(X* p, size_t n) { p, n; } };
+
+   BASISU_DEFINE_BUILT_IN_TYPE(bool)
+   BASISU_DEFINE_BUILT_IN_TYPE(char)
+   BASISU_DEFINE_BUILT_IN_TYPE(unsigned char)
+   BASISU_DEFINE_BUILT_IN_TYPE(short)
+   BASISU_DEFINE_BUILT_IN_TYPE(unsigned short)
+   BASISU_DEFINE_BUILT_IN_TYPE(int)
+   BASISU_DEFINE_BUILT_IN_TYPE(unsigned int)
+   BASISU_DEFINE_BUILT_IN_TYPE(long)
+   BASISU_DEFINE_BUILT_IN_TYPE(unsigned long)
+#ifdef __GNUC__
+   BASISU_DEFINE_BUILT_IN_TYPE(long long)
+   BASISU_DEFINE_BUILT_IN_TYPE(unsigned long long)
+#else
+   BASISU_DEFINE_BUILT_IN_TYPE(__int64)
+   BASISU_DEFINE_BUILT_IN_TYPE(unsigned __int64)
+#endif
+   BASISU_DEFINE_BUILT_IN_TYPE(float)
+   BASISU_DEFINE_BUILT_IN_TYPE(double)
+   BASISU_DEFINE_BUILT_IN_TYPE(long double)
+
+#undef BASISU_DEFINE_BUILT_IN_TYPE
+
+   template<typename T>
+   struct bitwise_movable { enum { cFlag = false }; };
+
+#define BASISU_DEFINE_BITWISE_MOVABLE(Q) template<> struct bitwise_movable<Q> { enum { cFlag = true }; };
+
+   template<typename T>
+   struct bitwise_copyable { enum { cFlag = false }; };
+
+#define BASISU_DEFINE_BITWISE_COPYABLE(Q) template<> struct bitwise_copyable<Q> { enum { cFlag = true }; };
+
+#define BASISU_IS_POD(T) __is_pod(T)
+
+#define BASISU_IS_SCALAR_TYPE(T) (scalar_type<T>::cFlag)
+
+#if defined(__GNUC__) && __GNUC__<5
+   #define BASISU_IS_TRIVIALLY_COPYABLE(...) __has_trivial_copy(__VA_ARGS__)
+#else
+   #define BASISU_IS_TRIVIALLY_COPYABLE(...) std::is_trivially_copyable<__VA_ARGS__>::value
+#endif
+
+// TODO: clean this up
+#define BASISU_IS_BITWISE_COPYABLE(T) (BASISU_IS_SCALAR_TYPE(T) || BASISU_IS_POD(T) || BASISU_IS_TRIVIALLY_COPYABLE(T) || (bitwise_copyable<T>::cFlag))
+
+#define BASISU_IS_BITWISE_COPYABLE_OR_MOVABLE(T) (BASISU_IS_BITWISE_COPYABLE(T) || (bitwise_movable<T>::cFlag))
+
+#define BASISU_HAS_DESTRUCTOR(T) ((!scalar_type<T>::cFlag) && (!__is_pod(T)))
+
+   typedef char(&yes_t)[1];
+   typedef char(&no_t)[2];
+
+   template <class U> yes_t class_test(int U::*);
+   template <class U> no_t class_test(...);
+
+   template <class T> struct is_class
+   {
+      enum { value = (sizeof(class_test<T>(0)) == sizeof(yes_t)) };
+   };
+
+   template <typename T> struct is_pointer
+   {
+      enum { value = false };
+   };
+
+   template <typename T> struct is_pointer<T*>
+   {
+      enum { value = true };
+   };
+
+   struct empty_type { };
+
+   BASISU_DEFINE_BITWISE_COPYABLE(empty_type);
+   BASISU_DEFINE_BITWISE_MOVABLE(empty_type);
+
+   template<typename T> struct rel_ops
+   {
+      friend bool operator!=(const T& x, const T& y) { return (!(x == y)); }
+      friend bool operator> (const T& x, const T& y) { return (y < x); }
+      friend bool operator<=(const T& x, const T& y) { return (!(y < x)); }
+      friend bool operator>=(const T& x, const T& y) { return (!(x < y)); }
+   };
+
+   struct elemental_vector
+   {
+      void* m_p;
+      uint32_t m_size;
+      uint32_t m_capacity;
+
+      typedef void (*object_mover)(void* pDst, void* pSrc, uint32_t num);
+
+      bool increase_capacity(uint32_t min_new_capacity, bool grow_hint, uint32_t element_size, object_mover pRelocate, bool nofail);
+   };
+
+   template<typename T>
+   class vector : public rel_ops< vector<T> >
+   {
+   public:
+      typedef T* iterator;
+      typedef const T* const_iterator;
+      typedef T value_type;
+      typedef T& reference;
+      typedef const T& const_reference;
+      typedef T* pointer;
+      typedef const T* const_pointer;
+
+      inline vector() :
+         m_p(NULL),
+         m_size(0),
+         m_capacity(0)
+      {
+      }
+
+      inline vector(uint32_t n, const T& init) :
+         m_p(NULL),
+         m_size(0),
+         m_capacity(0)
+      {
+         increase_capacity(n, false);
+         construct_array(m_p, n, init);
+         m_size = n;
+      }
+
+      inline vector(const vector& other) :
+         m_p(NULL),
+         m_size(0),
+         m_capacity(0)
+      {
+         increase_capacity(other.m_size, false);
+
+         m_size = other.m_size;
+
+         if (BASISU_IS_BITWISE_COPYABLE(T))
+            memcpy(m_p, other.m_p, m_size * sizeof(T));
+         else
+         {
+            T* pDst = m_p;
+            const T* pSrc = other.m_p;
+            for (uint32_t i = m_size; i > 0; i--)
+               construct(pDst++, *pSrc++);
+         }
+      }
+
+      inline explicit vector(size_t size) :
+         m_p(NULL),
+         m_size(0),
+         m_capacity(0)
+      {
+         resize(size);
+      }
+
+      inline ~vector()
+      {
+         if (m_p)
+         {
+            scalar_type<T>::destruct_array(m_p, m_size);
+            free(m_p);
+         }
+      }
+
+      inline vector& operator= (const vector& other)
+      {
+         if (this == &other)
+            return *this;
+
+         if (m_capacity >= other.m_size)
+            resize(0);
+         else
+         {
+            clear();
+            increase_capacity(other.m_size, false);
+         }
+
+         if (BASISU_IS_BITWISE_COPYABLE(T))
+            memcpy(m_p, other.m_p, other.m_size * sizeof(T));
+         else
+         {
+            T* pDst = m_p;
+            const T* pSrc = other.m_p;
+            for (uint32_t i = other.m_size; i > 0; i--)
+               construct(pDst++, *pSrc++);
+         }
+
+         m_size = other.m_size;
+
+         return *this;
+      }
+
+      BASISU_FORCE_INLINE const T* begin() const { return m_p; }
+      BASISU_FORCE_INLINE T* begin() { return m_p; }
+
+      BASISU_FORCE_INLINE const T* end() const { return m_p + m_size; }
+      BASISU_FORCE_INLINE T* end() { return m_p + m_size; }
+
+      BASISU_FORCE_INLINE bool empty() const { return !m_size; }
+      BASISU_FORCE_INLINE uint32_t size() const { return m_size; }
+      BASISU_FORCE_INLINE uint32_t size_in_bytes() const { return m_size * sizeof(T); }
+      BASISU_FORCE_INLINE uint32_t capacity() const { return m_capacity; }
+
+      // operator[] will assert on out of range indices, but in final builds there is (and will never be) any range checking on this method.
+      //BASISU_FORCE_INLINE const T& operator[] (uint32_t i) const { assert(i < m_size); return m_p[i]; }
+      //BASISU_FORCE_INLINE T& operator[] (uint32_t i) { assert(i < m_size); return m_p[i]; }
+
+      BASISU_FORCE_INLINE const T& operator[] (size_t i) const { assert(i < m_size); return m_p[i]; }
+      BASISU_FORCE_INLINE T& operator[] (size_t i) { assert(i < m_size); return m_p[i]; }
+
+      // at() always includes range checking, even in final builds, unlike operator [].
+      // The first element is returned if the index is out of range.
+      BASISU_FORCE_INLINE const T& at(size_t i) const { assert(i < m_size); return (i >= m_size) ? m_p[0] : m_p[i]; }
+      BASISU_FORCE_INLINE T& at(size_t i) { assert(i < m_size); return (i >= m_size) ? m_p[0] : m_p[i]; }
+
+      BASISU_FORCE_INLINE const T& front() const { assert(m_size); return m_p[0]; }
+      BASISU_FORCE_INLINE T& front() { assert(m_size); return m_p[0]; }
+
+      BASISU_FORCE_INLINE const T& back() const { assert(m_size); return m_p[m_size - 1]; }
+      BASISU_FORCE_INLINE T& back() { assert(m_size); return m_p[m_size - 1]; }
+
+      BASISU_FORCE_INLINE const T* get_ptr() const { return m_p; }
+      BASISU_FORCE_INLINE T* get_ptr() { return m_p; }
+
+      BASISU_FORCE_INLINE const T* data() const { return m_p; }
+      BASISU_FORCE_INLINE T* data() { return m_p; }
+
+      // clear() sets the container to empty, then frees the allocated block.
+      inline void clear()
+      {
+         if (m_p)
+         {
+            scalar_type<T>::destruct_array(m_p, m_size);
+            free(m_p);
+            m_p = NULL;
+            m_size = 0;
+            m_capacity = 0;
+         }
+      }
+
+      inline void clear_no_destruction()
+      {
+         if (m_p)
+         {
+            free(m_p);
+            m_p = NULL;
+            m_size = 0;
+            m_capacity = 0;
+         }
+      }
+
+      inline void reserve(size_t new_capacity_size_t)
+      {
+         if (new_capacity_size_t > UINT32_MAX)
+         {
+            assert(0);
+            return;
+         }
+
+         uint32_t new_capacity = (uint32_t)new_capacity_size_t;
+
+         if (new_capacity > m_capacity)
+            increase_capacity(new_capacity, false);
+         else if (new_capacity < m_capacity)
+         {
+            // Must work around the lack of a "decrease_capacity()" method.
+            // This case is rare enough in practice that it's probably not worth implementing an optimized in-place resize.
+            vector tmp;
+            tmp.increase_capacity(helpers::maximum(m_size, new_capacity), false);
+            tmp = *this;
+            swap(tmp);
+         }
+      }
+
+      inline bool try_reserve(size_t new_capacity_size_t)
+      {
+         if (new_capacity_size_t > UINT32_MAX)
+         {
+            assert(0);
+            return false;
+         }
+
+         uint32_t new_capacity = (uint32_t)new_capacity_size_t;
+
+         if (new_capacity > m_capacity)
+         {
+            if (!increase_capacity(new_capacity, false))
+               return false;
+         }
+         else if (new_capacity < m_capacity)
+         {
+            // Must work around the lack of a "decrease_capacity()" method.
+            // This case is rare enough in practice that it's probably not worth implementing an optimized in-place resize.
+            vector tmp;
+            tmp.increase_capacity(helpers::maximum(m_size, new_capacity), false);
+            tmp = *this;
+            swap(tmp);
+         }
+
+         return true;
+      }
+
+      // resize(0) sets the container to empty, but does not free the allocated block.
+      inline void resize(size_t new_size_size_t, bool grow_hint = false)
+      {
+         if (new_size_size_t > UINT32_MAX)
+         {
+            assert(0);
+            return;
+         }
+
+         uint32_t new_size = (uint32_t)new_size_size_t;
+
+         if (m_size != new_size)
+         {
+            if (new_size < m_size)
+               scalar_type<T>::destruct_array(m_p + new_size, m_size - new_size);
+            else
+            {
+               if (new_size > m_capacity)
+                  increase_capacity(new_size, (new_size == (m_size + 1)) || grow_hint);
+
+               scalar_type<T>::construct_array(m_p + m_size, new_size - m_size);
+            }
+
+            m_size = new_size;
+         }
+      }
+
+      inline bool try_resize(size_t new_size_size_t, bool grow_hint = false)
+      {
+         if (new_size_size_t > UINT32_MAX)
+         {
+            assert(0);
+            return false;
+         }
+
+         uint32_t new_size = (uint32_t)new_size_size_t;
+
+         if (m_size != new_size)
+         {
+            if (new_size < m_size)
+               scalar_type<T>::destruct_array(m_p + new_size, m_size - new_size);
+            else
+            {
+               if (new_size > m_capacity)
+               {
+                  if (!increase_capacity(new_size, (new_size == (m_size + 1)) || grow_hint, true))
+                     return false;
+               }
+
+               scalar_type<T>::construct_array(m_p + m_size, new_size - m_size);
+            }
+
+            m_size = new_size;
+         }
+
+         return true;
+      }
+
+      // If size >= capacity/2, reset() sets the container's size to 0 but doesn't free the allocated block (because the container may be similarly loaded in the future).
+      // Otherwise it blows away the allocated block. See http://www.codercorner.com/blog/?p=494
+      inline void reset()
+      {
+         if (m_size >= (m_capacity >> 1))
+            resize(0);
+         else
+            clear();
+      }
+
+      inline T* enlarge(uint32_t i)
+      {
+         uint32_t cur_size = m_size;
+         resize(cur_size + i, true);
+         return get_ptr() + cur_size;
+      }
+
+      inline T* try_enlarge(uint32_t i)
+      {
+         uint32_t cur_size = m_size;
+         if (!try_resize(cur_size + i, true))
+            return NULL;
+         return get_ptr() + cur_size;
+      }
+
+      BASISU_FORCE_INLINE void push_back(const T& obj)
+      {
+         assert(!m_p || (&obj < m_p) || (&obj >= (m_p + m_size)));
+
+         if (m_size >= m_capacity)
+            increase_capacity(m_size + 1, true);
+
+         scalar_type<T>::construct(m_p + m_size, obj);
+         m_size++;
+      }
+
+      inline bool try_push_back(const T& obj)
+      {
+         assert(!m_p || (&obj < m_p) || (&obj >= (m_p + m_size)));
+
+         if (m_size >= m_capacity)
+         {
+            if (!increase_capacity(m_size + 1, true, true))
+               return false;
+         }
+
+         scalar_type<T>::construct(m_p + m_size, obj);
+         m_size++;
+
+         return true;
+      }
+
+      inline void push_back_value(T obj)
+      {
+         if (m_size >= m_capacity)
+            increase_capacity(m_size + 1, true);
+
+         scalar_type<T>::construct(m_p + m_size, obj);
+         m_size++;
+      }
+
+      inline void pop_back()
+      {
+         assert(m_size);
+
+         if (m_size)
+         {
+            m_size--;
+            scalar_type<T>::destruct(&m_p[m_size]);
+         }
+      }
+
+      inline void insert(uint32_t index, const T* p, uint32_t n)
+      {
+         assert(index <= m_size);
+         if (!n)
+            return;
+
+         const uint32_t orig_size = m_size;
+         resize(m_size + n, true);
+
+         const uint32_t num_to_move = orig_size - index;
+
+         if (BASISU_IS_BITWISE_COPYABLE(T))
+         {
+            // This overwrites the destination object bits, but bitwise copyable means we don't need to worry about destruction.
+            memmove(m_p + index + n, m_p + index, sizeof(T) * num_to_move);
+         }
+         else
+         {
+            const T* pSrc = m_p + orig_size - 1;
+            T* pDst = const_cast<T*>(pSrc) + n;
+
+            for (uint32_t i = 0; i < num_to_move; i++)
+            {
+               assert((pDst - m_p) < (int)m_size);
+               *pDst-- = *pSrc--;
+            }
+         }
+
+         T* pDst = m_p + index;
+
+         if (BASISU_IS_BITWISE_COPYABLE(T))
+         {
+            // This copies in the new bits, overwriting the existing objects, which is OK for copyable types that don't need destruction.
+            memcpy(pDst, p, sizeof(T) * n);
+         }
+         else
+         {
+            for (uint32_t i = 0; i < n; i++)
+            {
+               assert((pDst - m_p) < (int)m_size);
+               *pDst++ = *p++;
+            }
+         }
+      }
+
+      inline void insert(T* p, const T& obj)
+      {
+         int64_t ofs = p - begin();
+         if ((ofs < 0) || (ofs > UINT32_MAX))
+         {
+            assert(0);
+            return;
+         }
+
+         insert((uint32_t)ofs, &obj, 1);
+      }
+
+      // push_front() isn't going to be very fast - it's only here for usability.
+      inline void push_front(const T& obj)
+      {
+         insert(0, &obj, 1);
+      }
+
+      vector& append(const vector& other)
+      {
+         if (other.m_size)
+            insert(m_size, &other[0], other.m_size);
+         return *this;
+      }
+
+      vector& append(const T* p, uint32_t n)
+      {
+         if (n)
+            insert(m_size, p, n);
+         return *this;
+      }
+            
+      inline void erase(uint32_t start, uint32_t n)
+      {
+         assert((start + n) <= m_size);
+         if ((start + n) > m_size)
+            return;
+
+         if (!n)
+            return;
+
+         const uint32_t num_to_move = m_size - (start + n);
+
+         T* pDst = m_p + start;
+
+         const T* pSrc = m_p + start + n;
+
+         if (BASISU_IS_BITWISE_COPYABLE_OR_MOVABLE(T))
+         {
+            // This test is overly cautious.
+            if ((!BASISU_IS_BITWISE_COPYABLE(T)) || (BASISU_HAS_DESTRUCTOR(T)))
+            {
+               // Type has been marked explictly as bitwise movable, which means we can move them around but they may need to be destructed.
+               // First destroy the erased objects.
+               scalar_type<T>::destruct_array(pDst, n);
+            }
+
+            // Copy "down" the objects to preserve, filling in the empty slots.
+            memmove(pDst, pSrc, num_to_move * sizeof(T));
+         }
+         else
+         {
+            // Type is not bitwise copyable or movable. 
+            // Move them down one at a time by using the equals operator, and destroying anything that's left over at the end.
+            T* pDst_end = pDst + num_to_move;
+            while (pDst != pDst_end)
+               *pDst++ = *pSrc++;
+
+            scalar_type<T>::destruct_array(pDst_end, n);
+         }
+
+         m_size -= n;
+      }
+
+      inline void erase(uint32_t index)
+      {
+         erase(index, 1);
+      }
+
+      inline void erase(T* p)
+      {
+         assert((p >= m_p) && (p < (m_p + m_size)));
+         erase(static_cast<uint32_t>(p - m_p));
+      }
+
+      inline void erase(T *pFirst, T *pEnd)
+      {
+         assert(pFirst <= pEnd);
+         assert(pFirst >= begin() && pFirst <= end());
+         assert(pEnd >= begin() && pEnd <= end());
+
+         int64_t ofs = pFirst - begin();
+         if ((ofs < 0) || (ofs > UINT32_MAX))
+         {
+            assert(0);
+            return;
+         }
+
+         int64_t n = pEnd - pFirst;
+         if ((n < 0) || (n > UINT32_MAX))
+         {
+            assert(0);
+            return;
+         }
+
+         erase((uint32_t)ofs, (uint32_t)n);
+      }
+
+      void erase_unordered(uint32_t index)
+      {
+         assert(index < m_size);
+
+         if ((index + 1) < m_size)
+            (*this)[index] = back();
+
+         pop_back();
+      }
+
+      inline bool operator== (const vector& rhs) const
+      {
+         if (m_size != rhs.m_size)
+            return false;
+         else if (m_size)
+         {
+            if (scalar_type<T>::cFlag)
+               return memcmp(m_p, rhs.m_p, sizeof(T) * m_size) == 0;
+            else
+            {
+               const T* pSrc = m_p;
+               const T* pDst = rhs.m_p;
+               for (uint32_t i = m_size; i; i--)
+                  if (!(*pSrc++ == *pDst++))
+                     return false;
+            }
+         }
+
+         return true;
+      }
+
+      inline bool operator< (const vector& rhs) const
+      {
+         const uint32_t min_size = helpers::minimum(m_size, rhs.m_size);
+
+         const T* pSrc = m_p;
+         const T* pSrc_end = m_p + min_size;
+         const T* pDst = rhs.m_p;
+
+         while ((pSrc < pSrc_end) && (*pSrc == *pDst))
+         {
+            pSrc++;
+            pDst++;
+         }
+
+         if (pSrc < pSrc_end)
+            return *pSrc < *pDst;
+
+         return m_size < rhs.m_size;
+      }
+
+      inline void swap(vector& other)
+      {
+         std::swap(m_p, other.m_p);
+         std::swap(m_size, other.m_size);
+         std::swap(m_capacity, other.m_capacity);
+      }
+
+      inline void sort()
+      {
+         std::sort(begin(), end());
+      }
+
+      inline void unique()
+      {
+         if (!empty())
+         {
+            sort();
+
+            resize(std::unique(begin(), end()) - begin());
+         }
+      }
+
+      inline void reverse()
+      {
+         uint32_t j = m_size >> 1;
+         for (uint32_t i = 0; i < j; i++)
+            std::swap(m_p[i], m_p[m_size - 1 - i]);
+      }
+
+      inline int find(const T& key) const
+      {
+         const T* p = m_p;
+         const T* p_end = m_p + m_size;
+
+         uint32_t index = 0;
+
+         while (p != p_end)
+         {
+            if (key == *p)
+               return index;
+
+            p++;
+            index++;
+         }
+
+         return cInvalidIndex;
+      }
+
+      inline int find_sorted(const T& key) const
+      {
+         if (m_size)
+         {
+            // Uniform binary search - Knuth Algorithm 6.2.1 U, unrolled twice.
+            int i = ((m_size + 1) >> 1) - 1;
+            int m = m_size;
+
+            for (; ; )
+            {
+               assert(i >= 0 && i < (int)m_size);
+               const T* pKey_i = m_p + i;
+               int cmp = key < *pKey_i;
+#if defined(_DEBUG) || defined(DEBUG)
+               int cmp2 = *pKey_i < key;
+               assert((cmp != cmp2) || (key == *pKey_i));
+#endif
+               if ((!cmp) && (key == *pKey_i)) return i;
+               m >>= 1;
+               if (!m) break;
+               cmp = -cmp;
+               i += (((m + 1) >> 1) ^ cmp) - cmp;
+               if (i < 0)
+                  break;
+
+               assert(i >= 0 && i < (int)m_size);
+               pKey_i = m_p + i;
+               cmp = key < *pKey_i;
+#if defined(_DEBUG) || defined(DEBUG)
+               cmp2 = *pKey_i < key;
+               assert((cmp != cmp2) || (key == *pKey_i));
+#endif
+               if ((!cmp) && (key == *pKey_i)) return i;
+               m >>= 1;
+               if (!m) break;
+               cmp = -cmp;
+               i += (((m + 1) >> 1) ^ cmp) - cmp;
+               if (i < 0)
+                  break;
+            }
+         }
+
+         return cInvalidIndex;
+      }
+
+      template<typename Q>
+      inline int find_sorted(const T& key, Q less_than) const
+      {
+         if (m_size)
+         {
+            // Uniform binary search - Knuth Algorithm 6.2.1 U, unrolled twice.
+            int i = ((m_size + 1) >> 1) - 1;
+            int m = m_size;
+
+            for (; ; )
+            {
+               assert(i >= 0 && i < (int)m_size);
+               const T* pKey_i = m_p + i;
+               int cmp = less_than(key, *pKey_i);
+               if ((!cmp) && (!less_than(*pKey_i, key))) return i;
+               m >>= 1;
+               if (!m) break;
+               cmp = -cmp;
+               i += (((m + 1) >> 1) ^ cmp) - cmp;
+               if (i < 0)
+                  break;
+
+               assert(i >= 0 && i < (int)m_size);
+               pKey_i = m_p + i;
+               cmp = less_than(key, *pKey_i);
+               if ((!cmp) && (!less_than(*pKey_i, key))) return i;
+               m >>= 1;
+               if (!m) break;
+               cmp = -cmp;
+               i += (((m + 1) >> 1) ^ cmp) - cmp;
+               if (i < 0) 
+                  break;
+            }
+         }
+
+         return cInvalidIndex;
+      }
+
+      inline uint32_t count_occurences(const T& key) const
+      {
+         uint32_t c = 0;
+
+         const T* p = m_p;
+         const T* p_end = m_p + m_size;
+
+         while (p != p_end)
+         {
+            if (key == *p)
+               c++;
+
+            p++;
+         }
+
+         return c;
+      }
+
+      inline void set_all(const T& o)
+      {
+         if ((sizeof(T) == 1) && (scalar_type<T>::cFlag))
+            memset(m_p, *reinterpret_cast<const uint8_t*>(&o), m_size);
+         else
+         {
+            T* pDst = m_p;
+            T* pDst_end = pDst + m_size;
+            while (pDst != pDst_end)
+               *pDst++ = o;
+         }
+      }
+
+      // Caller assumes ownership of the heap block associated with the container. Container is cleared.
+      inline void* assume_ownership()
+      {
+         T* p = m_p;
+         m_p = NULL;
+         m_size = 0;
+         m_capacity = 0;
+         return p;
+      }
+
+      // Caller is granting ownership of the indicated heap block.
+      // Block must have size constructed elements, and have enough room for capacity elements.
+      inline bool grant_ownership(T* p, uint32_t size, uint32_t capacity)
+      {
+         // To to prevent the caller from obviously shooting themselves in the foot.
+         if (((p + capacity) > m_p) && (p < (m_p + m_capacity)))
+         {
+            // Can grant ownership of a block inside the container itself!
+            assert(0);
+            return false;
+         }
+
+         if (size > capacity)
+         {
+            assert(0);
+            return false;
+         }
+
+         if (!p)
+         {
+            if (capacity)
+            {
+               assert(0);
+               return false;
+            }
+         }
+         else if (!capacity)
+         {
+            assert(0);
+            return false;
+         }
+
+         clear();
+         m_p = p;
+         m_size = size;
+         m_capacity = capacity;
+         return true;
+      }
+
+   private:
+      T* m_p;
+      uint32_t m_size;
+      uint32_t m_capacity;
+
+      template<typename Q> struct is_vector { enum { cFlag = false }; };
+      template<typename Q> struct is_vector< vector<Q> > { enum { cFlag = true }; };
+
+      static void object_mover(void* pDst_void, void* pSrc_void, uint32_t num)
+      {
+         T* pSrc = static_cast<T*>(pSrc_void);
+         T* const pSrc_end = pSrc + num;
+         T* pDst = static_cast<T*>(pDst_void);
+
+         while (pSrc != pSrc_end)
+         {
+            // placement new
+            new (static_cast<void*>(pDst)) T(*pSrc);
+            pSrc->~T();
+            ++pSrc;
+            ++pDst;
+         }
+      }
+
+      inline bool increase_capacity(uint32_t min_new_capacity, bool grow_hint, bool nofail = false)
+      {
+         return reinterpret_cast<elemental_vector*>(this)->increase_capacity(
+            min_new_capacity, grow_hint, sizeof(T),
+            (BASISU_IS_BITWISE_COPYABLE_OR_MOVABLE(T) || (is_vector<T>::cFlag)) ? NULL : object_mover, nofail);
+      }
+   };
+
+   template<typename T> struct bitwise_movable< vector<T> > { enum { cFlag = true }; };
+
+   // Hash map
+
+   template <typename T>
+   struct hasher
+   {
+      inline size_t operator() (const T& key) const { return static_cast<size_t>(key); }
+   };
+
+   template <typename T>
+   struct equal_to
+   {
+      inline bool operator()(const T& a, const T& b) const { return a == b; }
+   };
+
+   // Important: The Hasher and Equals objects must be bitwise movable!
+   template<typename Key, typename Value = empty_type, typename Hasher = hasher<Key>, typename Equals = equal_to<Key> >
+   class hash_map
+   {
+   public:
+      class iterator;
+      class const_iterator;
+   
+   private:
+      friend class iterator;
+      friend class const_iterator;
+
+      enum state
+      {
+         cStateInvalid = 0,
+         cStateValid = 1
+      };
+
+      enum
+      {
+         cMinHashSize = 4U
+      };
+
+   public:
+      typedef hash_map<Key, Value, Hasher, Equals> hash_map_type;
+      typedef std::pair<Key, Value> value_type;
+      typedef Key                   key_type;
+      typedef Value                 referent_type;
+      typedef Hasher                hasher_type;
+      typedef Equals                equals_type;
+
+      hash_map() :
+         m_hash_shift(32), m_num_valid(0), m_grow_threshold(0)
+      {
+      }
+
+      hash_map(const hash_map& other) :
+         m_values(other.m_values),
+         m_hash_shift(other.m_hash_shift),
+         m_hasher(other.m_hasher),
+         m_equals(other.m_equals),
+         m_num_valid(other.m_num_valid),
+         m_grow_threshold(other.m_grow_threshold)
+      {
+      }
+
+      hash_map& operator= (const hash_map& other)
+      {
+         if (this == &other)
+            return *this;
+
+         clear();
+
+         m_values = other.m_values;
+         m_hash_shift = other.m_hash_shift;
+         m_num_valid = other.m_num_valid;
+         m_grow_threshold = other.m_grow_threshold;
+         m_hasher = other.m_hasher;
+         m_equals = other.m_equals;
+
+         return *this;
+      }
+
+      inline ~hash_map()
+      {
+         clear();
+      }
+
+      const Equals& get_equals() const { return m_equals; }
+      Equals& get_equals() { return m_equals; }
+
+      void set_equals(const Equals& equals) { m_equals = equals; }
+
+      const Hasher& get_hasher() const { return m_hasher; }
+      Hasher& get_hasher() { return m_hasher; }
+
+      void set_hasher(const Hasher& hasher) { m_hasher = hasher; }
+
+      inline void clear()
+      {
+         if (!m_values.empty())
+         {
+            if (BASISU_HAS_DESTRUCTOR(Key) || BASISU_HAS_DESTRUCTOR(Value))
+            {
+               node* p = &get_node(0);
+               node* p_end = p + m_values.size();
+
+               uint32_t num_remaining = m_num_valid;
+               while (p != p_end)
+               {
+                  if (p->state)
+                  {
+                     destruct_value_type(p);
+                     num_remaining--;
+                     if (!num_remaining)
+                        break;
+                  }
+
+                  p++;
+               }
+            }
+
+            m_values.clear_no_destruction();
+
+            m_hash_shift = 32;
+            m_num_valid = 0;
+            m_grow_threshold = 0;
+         }
+      }
+
+      inline void reset()
+      {
+         if (!m_num_valid)
+            return;
+
+         if (BASISU_HAS_DESTRUCTOR(Key) || BASISU_HAS_DESTRUCTOR(Value))
+         {
+            node* p = &get_node(0);
+            node* p_end = p + m_values.size();
+
+            uint32_t num_remaining = m_num_valid;
+            while (p != p_end)
+            {
+               if (p->state)
+               {
+                  destruct_value_type(p);
+                  p->state = cStateInvalid;
+
+                  num_remaining--;
+                  if (!num_remaining)
+                     break;
+               }
+
+               p++;
+            }
+         }
+         else if (sizeof(node) <= 32)
+         {
+            memset(&m_values[0], 0, m_values.size_in_bytes());
+         }
+         else
+         {
+            node* p = &get_node(0);
+            node* p_end = p + m_values.size();
+
+            uint32_t num_remaining = m_num_valid;
+            while (p != p_end)
+            {
+               if (p->state)
+               {
+                  p->state = cStateInvalid;
+
+                  num_remaining--;
+                  if (!num_remaining)
+                     break;
+               }
+
+               p++;
+            }
+         }
+
+         m_num_valid = 0;
+      }
+
+      inline uint32_t size()
+      {
+         return m_num_valid;
+      }
+
+      inline uint32_t get_table_size()
+      {
+         return m_values.size();
+      }
+
+      inline bool empty()
+      {
+         return !m_num_valid;
+      }
+
+      inline void reserve(uint32_t new_capacity)
+      {
+         uint64_t new_hash_size = helpers::maximum(1U, new_capacity);
+
+         new_hash_size = new_hash_size * 2ULL;
+
+         if (!helpers::is_power_of_2(new_hash_size))
+            new_hash_size = helpers::next_pow2(new_hash_size);
+
+         new_hash_size = helpers::maximum<uint64_t>(cMinHashSize, new_hash_size);
+
+         new_hash_size = helpers::minimum<uint64_t>(0x80000000UL, new_hash_size);
+
+         if (new_hash_size > m_values.size())
+            rehash((uint32_t)new_hash_size);
+      }
+            
+      class iterator
+      {
+         friend class hash_map<Key, Value, Hasher, Equals>;
+         friend class hash_map<Key, Value, Hasher, Equals>::const_iterator;
+
+      public:
+         inline iterator() : m_pTable(NULL), m_index(0) { }
+         inline iterator(hash_map_type& table, uint32_t index) : m_pTable(&table), m_index(index) { }
+         inline iterator(const iterator& other) : m_pTable(other.m_pTable), m_index(other.m_index) { }
+
+         inline iterator& operator= (const iterator& other)
+         {
+            m_pTable = other.m_pTable;
+            m_index = other.m_index;
+            return *this;
+         }
+
+         // post-increment
+         inline iterator operator++(int)
+         {
+            iterator result(*this);
+            ++*this;
+            return result;
+         }
+
+         // pre-increment
+         inline iterator& operator++()
+         {
+            probe();
+            return *this;
+         }
+
+         inline value_type& operator*() const { return *get_cur(); }
+         inline value_type* operator->() const { return get_cur(); }
+
+         inline bool operator == (const iterator& b) const { return (m_pTable == b.m_pTable) && (m_index == b.m_index); }
+         inline bool operator != (const iterator& b) const { return !(*this == b); }
+         inline bool operator == (const const_iterator& b) const { return (m_pTable == b.m_pTable) && (m_index == b.m_index); }
+         inline bool operator != (const const_iterator& b) const { return !(*this == b); }
+
+      private:
+         hash_map_type* m_pTable;
+         uint32_t m_index;
+
+         inline value_type* get_cur() const
+         {
+            assert(m_pTable && (m_index < m_pTable->m_values.size()));
+            assert(m_pTable->get_node_state(m_index) == cStateValid);
+
+            return &m_pTable->get_node(m_index);
+         }
+
+         inline void probe()
+         {
+            assert(m_pTable);
+            m_index = m_pTable->find_next(m_index);
+         }
+      };
+
+      class const_iterator
+      {
+         friend class hash_map<Key, Value, Hasher, Equals>;
+         friend class hash_map<Key, Value, Hasher, Equals>::iterator;
+
+      public:
+         inline const_iterator() : m_pTable(NULL), m_index(0) { }
+         inline const_iterator(const hash_map_type& table, uint32_t index) : m_pTable(&table), m_index(index) { }
+         inline const_iterator(const iterator& other) : m_pTable(other.m_pTable), m_index(other.m_index) { }
+         inline const_iterator(const const_iterator& other) : m_pTable(other.m_pTable), m_index(other.m_index) { }
+
+         inline const_iterator& operator= (const const_iterator& other)
+         {
+            m_pTable = other.m_pTable;
+            m_index = other.m_index;
+            return *this;
+         }
+
+         inline const_iterator& operator= (const iterator& other)
+         {
+            m_pTable = other.m_pTable;
+            m_index = other.m_index;
+            return *this;
+         }
+
+         // post-increment
+         inline const_iterator operator++(int)
+         {
+            const_iterator result(*this);
+            ++*this;
+            return result;
+         }
+
+         // pre-increment
+         inline const_iterator& operator++()
+         {
+            probe();
+            return *this;
+         }
+
+         inline const value_type& operator*() const { return *get_cur(); }
+         inline const value_type* operator->() const { return get_cur(); }
+
+         inline bool operator == (const const_iterator& b) const { return (m_pTable == b.m_pTable) && (m_index == b.m_index); }
+         inline bool operator != (const const_iterator& b) const { return !(*this == b); }
+         inline bool operator == (const iterator& b) const { return (m_pTable == b.m_pTable) && (m_index == b.m_index); }
+         inline bool operator != (const iterator& b) const { return !(*this == b); }
+
+      private:
+         const hash_map_type* m_pTable;
+         uint32_t m_index;
+
+         inline const value_type* get_cur() const
+         {
+            assert(m_pTable && (m_index < m_pTable->m_values.size()));
+            assert(m_pTable->get_node_state(m_index) == cStateValid);
+
+            return &m_pTable->get_node(m_index);
+         }
+
+         inline void probe()
+         {
+            assert(m_pTable);
+            m_index = m_pTable->find_next(m_index);
+         }
+      };
+
+      inline const_iterator begin() const
+      {
+         if (!m_num_valid)
+            return end();
+
+         return const_iterator(*this, find_next(UINT32_MAX));
+      }
+
+      inline const_iterator end() const
+      {
+         return const_iterator(*this, m_values.size());
+      }
+
+      inline iterator begin()
+      {
+         if (!m_num_valid)
+            return end();
+
+         return iterator(*this, find_next(UINT32_MAX));
+      }
+
+      inline iterator end()
+      {
+         return iterator(*this, m_values.size());
+      }
+
+      // insert_result.first will always point to inserted key/value (or the already existing key/value).
+      // insert_resutt.second will be true if a new key/value was inserted, or false if the key already existed (in which case first will point to the already existing value).
+      typedef std::pair<iterator, bool> insert_result;
+
+      inline insert_result insert(const Key& k, const Value& v = Value())
+      {
+         insert_result result;
+         if (!insert_no_grow(result, k, v))
+         {
+            grow();
+
+            // This must succeed.
+            if (!insert_no_grow(result, k, v))
+            {
+               fprintf(stderr, "insert() failed");
+               abort();
+            }
+         }
+
+         return result;
+      }
+
+      inline insert_result insert(const value_type& v)
+      {
+         return insert(v.first, v.second);
+      }
+
+      inline const_iterator find(const Key& k) const
+      {
+         return const_iterator(*this, find_index(k));
+      }
+
+      inline iterator find(const Key& k)
+      {
+         return iterator(*this, find_index(k));
+      }
+
+      inline bool erase(const Key& k)
+      {
+         uint32_t i = find_index(k);
+
+         if (i >= m_values.size())
+            return false;
+
+         node* pDst = &get_node(i);
+         destruct_value_type(pDst);
+         pDst->state = cStateInvalid;
+
+         m_num_valid--;
+
+         for (; ; )
+         {
+            uint32_t r, j = i;
+
+            node* pSrc = pDst;
+
+            do
+            {
+               if (!i)
+               {
+                  i = m_values.size() - 1;
+                  pSrc = &get_node(i);
+               }
+               else
+               {
+                  i--;
+                  pSrc--;
+               }
+
+               if (!pSrc->state)
+                  return true;
+
+               r = hash_key(pSrc->first);
+
+            } while ((i <= r && r < j) || (r < j && j < i) || (j < i && i <= r));
+
+            move_node(pDst, pSrc);
+
+            pDst = pSrc;
+         }
+      }
+
+      inline void swap(hash_map_type& other)
+      {
+         m_values.swap(other.m_values);
+         std::swap(m_hash_shift, other.m_hash_shift);
+         std::swap(m_num_valid, other.m_num_valid);
+         std::swap(m_grow_threshold, other.m_grow_threshold);
+         std::swap(m_hasher, other.m_hasher);
+         std::swap(m_equals, other.m_equals);
+      }
+
+   private:
+      struct node : public value_type
+      {
+         uint8_t state;
+      };
+
+      static inline void construct_value_type(value_type* pDst, const Key& k, const Value& v)
+      {
+         if (BASISU_IS_BITWISE_COPYABLE(Key))
+            memcpy(&pDst->first, &k, sizeof(Key));
+         else
+            scalar_type<Key>::construct(&pDst->first, k);
+
+         if (BASISU_IS_BITWISE_COPYABLE(Value))
+            memcpy(&pDst->second, &v, sizeof(Value));
+         else
+            scalar_type<Value>::construct(&pDst->second, v);
+      }
+
+      static inline void construct_value_type(value_type* pDst, const value_type* pSrc)
+      {
+         if ((BASISU_IS_BITWISE_COPYABLE(Key)) && (BASISU_IS_BITWISE_COPYABLE(Value)))
+         {
+            memcpy(pDst, pSrc, sizeof(value_type));
+         }
+         else
+         {
+            if (BASISU_IS_BITWISE_COPYABLE(Key))
+               memcpy(&pDst->first, &pSrc->first, sizeof(Key));
+            else
+               scalar_type<Key>::construct(&pDst->first, pSrc->first);
+
+            if (BASISU_IS_BITWISE_COPYABLE(Value))
+               memcpy(&pDst->second, &pSrc->second, sizeof(Value));
+            else
+               scalar_type<Value>::construct(&pDst->second, pSrc->second);
+         }
+      }
+
+      static inline void destruct_value_type(value_type* p)
+      {
+         scalar_type<Key>::destruct(&p->first);
+         scalar_type<Value>::destruct(&p->second);
+      }
+
+      // Moves *pSrc to *pDst efficiently.
+      // pDst should NOT be constructed on entry.
+      static inline void move_node(node* pDst, node* pSrc, bool update_src_state = true)
+      {
+         assert(!pDst->state);
+
+         if (BASISU_IS_BITWISE_COPYABLE_OR_MOVABLE(Key) && BASISU_IS_BITWISE_COPYABLE_OR_MOVABLE(Value))
+         {
+            memcpy(pDst, pSrc, sizeof(node));
+         }
+         else
+         {
+            if (BASISU_IS_BITWISE_COPYABLE_OR_MOVABLE(Key))
+               memcpy(&pDst->first, &pSrc->first, sizeof(Key));
+            else
+            {
+               scalar_type<Key>::construct(&pDst->first, pSrc->first);
+               scalar_type<Key>::destruct(&pSrc->first);
+            }
+
+            if (BASISU_IS_BITWISE_COPYABLE_OR_MOVABLE(Value))
+               memcpy(&pDst->second, &pSrc->second, sizeof(Value));
+            else
+            {
+               scalar_type<Value>::construct(&pDst->second, pSrc->second);
+               scalar_type<Value>::destruct(&pSrc->second);
+            }
+
+            pDst->state = cStateValid;
+         }
+
+         if (update_src_state)
+            pSrc->state = cStateInvalid;
+      }
+
+      struct raw_node
+      {
+         inline raw_node()
+         {
+            node* p = reinterpret_cast<node*>(this);
+            p->state = cStateInvalid;
+         }
+
+         inline ~raw_node()
+         {
+            node* p = reinterpret_cast<node*>(this);
+            if (p->state)
+               hash_map_type::destruct_value_type(p);
+         }
+
+         inline raw_node(const raw_node& other)
+         {
+            node* pDst = reinterpret_cast<node*>(this);
+            const node* pSrc = reinterpret_cast<const node*>(&other);
+
+            if (pSrc->state)
+            {
+               hash_map_type::construct_value_type(pDst, pSrc);
+               pDst->state = cStateValid;
+            }
+            else
+               pDst->state = cStateInvalid;
+         }
+
+         inline raw_node& operator= (const raw_node& rhs)
+         {
+            if (this == &rhs)
+               return *this;
+
+            node* pDst = reinterpret_cast<node*>(this);
+            const node* pSrc = reinterpret_cast<const node*>(&rhs);
+
+            if (pSrc->state)
+            {
+               if (pDst->state)
+               {
+                  pDst->first = pSrc->first;
+                  pDst->second = pSrc->second;
+               }
+               else
+               {
+                  hash_map_type::construct_value_type(pDst, pSrc);
+                  pDst->state = cStateValid;
+               }
+            }
+            else if (pDst->state)
+            {
+               hash_map_type::destruct_value_type(pDst);
+               pDst->state = cStateInvalid;
+            }
+
+            return *this;
+         }
+
+         uint8_t m_bits[sizeof(node)];
+      };
+
+      typedef basisu::vector<raw_node> node_vector;
+
+      node_vector    m_values;
+      uint32_t       m_hash_shift;
+
+      Hasher         m_hasher;
+      Equals         m_equals;
+
+      uint32_t       m_num_valid;
+
+      uint32_t       m_grow_threshold;
+
+      inline uint32_t hash_key(const Key& k) const
+      {
+         assert((1U << (32U - m_hash_shift)) == m_values.size());
+
+         uint32_t hash = static_cast<uint32_t>(m_hasher(k));
+
+         // Fibonacci hashing
+         hash = (2654435769U * hash) >> m_hash_shift;
+
+         assert(hash < m_values.size());
+         return hash;
+      }
+
+      inline const node& get_node(uint32_t index) const
+      {
+         return *reinterpret_cast<const node*>(&m_values[index]);
+      }
+
+      inline node& get_node(uint32_t index)
+      {
+         return *reinterpret_cast<node*>(&m_values[index]);
+      }
+
+      inline state get_node_state(uint32_t index) const
+      {
+         return static_cast<state>(get_node(index).state);
+      }
+
+      inline void set_node_state(uint32_t index, bool valid)
+      {
+         get_node(index).state = valid;
+      }
+
+      inline void grow()
+      {
+         uint64_t n = m_values.size() * 3ULL; // was * 2
+         
+         if (!helpers::is_power_of_2(n))
+            n = helpers::next_pow2(n);
+
+         if (n > 0x80000000UL)
+            n = 0x80000000UL;
+
+         rehash(helpers::maximum<uint32_t>(cMinHashSize, (uint32_t)n));
+      }
+
+      inline void rehash(uint32_t new_hash_size)
+      {
+         assert(new_hash_size >= m_num_valid);
+         assert(helpers::is_power_of_2(new_hash_size));
+
+         if ((new_hash_size < m_num_valid) || (new_hash_size == m_values.size()))
+            return;
+
+         hash_map new_map;
+         new_map.m_values.resize(new_hash_size);
+         new_map.m_hash_shift = 32U - helpers::floor_log2i(new_hash_size);
+         assert(new_hash_size == (1U << (32U - new_map.m_hash_shift)));
+         new_map.m_grow_threshold = UINT_MAX;
+
+         node* pNode = reinterpret_cast<node*>(m_values.begin());
+         node* pNode_end = pNode + m_values.size();
+
+         while (pNode != pNode_end)
+         {
+            if (pNode->state)
+            {
+               new_map.move_into(pNode);
+
+               if (new_map.m_num_valid == m_num_valid)
+                  break;
+            }
+
+            pNode++;
+         }
+
+         new_map.m_grow_threshold = (new_hash_size + 1U) >> 1U;
+
+         m_values.clear_no_destruction();
+         m_hash_shift = 32;
+
+         swap(new_map);
+      }
+
+      inline uint32_t find_next(uint32_t index) const
+      {
+         index++;
+
+         if (index >= m_values.size())
+            return index;
+
+         const node* pNode = &get_node(index);
+
+         for (; ; )
+         {
+            if (pNode->state)
+               break;
+
+            if (++index >= m_values.size())
+               break;
+
+            pNode++;
+         }
+
+         return index;
+      }
+
+      inline uint32_t find_index(const Key& k) const
+      {
+         if (m_num_valid)
+         {
+            uint32_t index = hash_key(k);
+            const node* pNode = &get_node(index);
+
+            if (pNode->state)
+            {
+               if (m_equals(pNode->first, k))
+                  return index;
+
+               const uint32_t orig_index = index;
+
+               for (; ; )
+               {
+                  if (!index)
+                  {
+                     index = m_values.size() - 1;
+                     pNode = &get_node(index);
+                  }
+                  else
+                  {
+                     index--;
+                     pNode--;
+                  }
+
+                  if (index == orig_index)
+                     break;
+
+                  if (!pNode->state)
+                     break;
+
+                  if (m_equals(pNode->first, k))
+                     return index;
+               }
+            }
+         }
+
+         return m_values.size();
+      }
+
+      inline bool insert_no_grow(insert_result& result, const Key& k, const Value& v = Value())
+      {
+         if (!m_values.size())
+            return false;
+
+         uint32_t index = hash_key(k);
+         node* pNode = &get_node(index);
+
+         if (pNode->state)
+         {
+            if (m_equals(pNode->first, k))
+            {
+               result.first = iterator(*this, index);
+               result.second = false;
+               return true;
+            }
+
+            const uint32_t orig_index = index;
+
+            for (; ; )
+            {
+               if (!index)
+               {
+                  index = m_values.size() - 1;
+                  pNode = &get_node(index);
+               }
+               else
+               {
+                  index--;
+                  pNode--;
+               }
+
+               if (orig_index == index)
+                  return false;
+
+               if (!pNode->state)
+                  break;
+
+               if (m_equals(pNode->first, k))
+               {
+                  result.first = iterator(*this, index);
+                  result.second = false;
+                  return true;
+               }
+            }
+         }
+
+         if (m_num_valid >= m_grow_threshold)
+            return false;
+
+         construct_value_type(pNode, k, v);
+
+         pNode->state = cStateValid;
+
+         m_num_valid++;
+         assert(m_num_valid <= m_values.size());
+
+         result.first = iterator(*this, index);
+         result.second = true;
+
+         return true;
+      }
+
+      inline void move_into(node* pNode)
+      {
+         uint32_t index = hash_key(pNode->first);
+         node* pDst_node = &get_node(index);
+
+         if (pDst_node->state)
+         {
+            const uint32_t orig_index = index;
+
+            for (; ; )
+            {
+               if (!index)
+               {
+                  index = m_values.size() - 1;
+                  pDst_node = &get_node(index);
+               }
+               else
+               {
+                  index--;
+                  pDst_node--;
+               }
+
+               if (index == orig_index)
+               {
+                  assert(false);
+                  return;
+               }
+
+               if (!pDst_node->state)
+                  break;
+            }
+         }
+
+         move_node(pDst_node, pNode, false);
+
+         m_num_valid++;
+      }
+   };
+
+   template<typename Key, typename Value, typename Hasher, typename Equals>
+   struct bitwise_movable< hash_map<Key, Value, Hasher, Equals> > { enum { cFlag = true }; };
+   
+#if BASISU_HASHMAP_TEST
+   extern void hash_map_test();
+#endif
+      
+} // namespace basisu
+
+namespace std
+{
+   template<typename T>
+   inline void swap(basisu::vector<T>& a, basisu::vector<T>& b)
+   {
+      a.swap(b);
+   }
+
+   template<typename Key, typename Value, typename Hasher, typename Equals>
+   inline void swap(basisu::hash_map<Key, Value, Hasher, Equals>& a, basisu::hash_map<Key, Value, Hasher, Equals>& b)
+   {
+      a.swap(b);
+   }
+
+} // namespace std
+
+#if !__clang__ && __GNUC__
+#pragma GCC diagnostic pop
+#endif
diff --git a/libkram/transcoder/basisu_containers_impl.h b/libkram/transcoder/basisu_containers_impl.h
new file mode 100644
index 00000000..65551714
--- /dev/null
+++ b/libkram/transcoder/basisu_containers_impl.h
@@ -0,0 +1,311 @@
+// basisu_containers_impl.h
+// Do not include directly
+
+#ifdef _MSC_VER
+#pragma warning (disable:4127) // warning C4127: conditional expression is constant
+#endif
+
+namespace basisu
+{
+   bool elemental_vector::increase_capacity(uint32_t min_new_capacity, bool grow_hint, uint32_t element_size, object_mover pMover, bool nofail)
+   {
+      assert(m_size <= m_capacity);
+
+      if (sizeof(void *) == sizeof(uint64_t))
+         assert(min_new_capacity < (0x400000000ULL / element_size));
+      else
+         assert(min_new_capacity < (0x7FFF0000U / element_size));
+
+      if (m_capacity >= min_new_capacity)
+         return true;
+
+      size_t new_capacity = min_new_capacity;
+      if ((grow_hint) && (!helpers::is_power_of_2((uint64_t)new_capacity)))
+      {
+         new_capacity = (size_t)helpers::next_pow2((uint64_t)new_capacity);
+
+         assert(new_capacity && (new_capacity > m_capacity));
+
+         if (new_capacity < min_new_capacity)
+         {
+            if (nofail)
+               return false;
+            fprintf(stderr, "vector too large\n");
+            abort();
+         }
+      }
+            
+      const size_t desired_size = element_size * new_capacity;
+      size_t actual_size = 0;
+      if (!pMover)
+      {
+         void* new_p = realloc(m_p, desired_size);
+         if (!new_p)
+         {
+            if (nofail)
+               return false;
+
+            char buf[256];
+#ifdef _MSC_VER
+            sprintf_s(buf, sizeof(buf), "vector: realloc() failed allocating %u bytes", (uint32_t)desired_size);
+#else
+            sprintf(buf, "vector: realloc() failed allocating %u bytes", (uint32_t)desired_size);
+#endif
+            fprintf(stderr, "%s", buf);
+            abort();
+         }
+
+#ifdef _MSC_VER
+         actual_size = _msize(new_p);
+#elif HAS_MALLOC_USABLE_SIZE
+         actual_size = malloc_usable_size(new_p);
+#else
+         actual_size = desired_size;
+#endif
+         m_p = new_p;
+      }
+      else
+      {
+         void* new_p = malloc(desired_size);
+         if (!new_p)
+         {
+            if (nofail)
+               return false;
+
+            char buf[256];
+#ifdef _MSC_VER
+            sprintf_s(buf, sizeof(buf), "vector: malloc() failed allocating %u bytes", (uint32_t)desired_size);
+#else
+            sprintf(buf, "vector: malloc() failed allocating %u bytes", (uint32_t)desired_size);
+#endif
+            fprintf(stderr, "%s", buf);
+            abort();
+         }
+
+#ifdef _MSC_VER
+         actual_size = _msize(new_p);
+#elif HAS_MALLOC_USABLE_SIZE
+         actual_size = malloc_usable_size(new_p);
+#else
+         actual_size = desired_size;
+#endif
+
+         (*pMover)(new_p, m_p, m_size);
+
+         if (m_p)
+            free(m_p);
+         
+         m_p = new_p;
+      }
+
+      if (actual_size > desired_size)
+         m_capacity = static_cast<uint32_t>(actual_size / element_size);
+      else
+         m_capacity = static_cast<uint32_t>(new_capacity);
+
+      return true;
+   }
+
+#if BASISU_HASHMAP_TEST
+
+#define HASHMAP_TEST_VERIFY(c) do { if (!(c)) handle_hashmap_test_verify_failure(__LINE__); } while(0)
+
+   static void handle_hashmap_test_verify_failure(int line)
+   {
+      fprintf(stderr, "HASHMAP_TEST_VERIFY() faild on line %i\n", line);
+      abort();
+   }
+
+   class counted_obj
+   {
+   public:
+      counted_obj(uint32_t v = 0) :
+         m_val(v)
+      {
+         m_count++;
+      }
+
+      counted_obj(const counted_obj& obj) :
+         m_val(obj.m_val)
+      {
+         m_count++;
+      }
+
+      ~counted_obj()
+      {
+         assert(m_count > 0);
+         m_count--;
+      }
+
+      static uint32_t m_count;
+
+      uint32_t m_val;
+
+      operator size_t() const { return m_val; }
+
+      bool operator== (const counted_obj& rhs) const { return m_val == rhs.m_val; }
+      bool operator== (const uint32_t rhs) const { return m_val == rhs; }
+
+   };
+
+   uint32_t counted_obj::m_count;
+
+   static uint32_t urand32()
+   {
+      uint32_t a = rand();
+      uint32_t b = rand() << 15;
+      uint32_t c = rand() << (32 - 15);
+      return a ^ b ^ c;
+   }
+
+   static int irand32(int l, int h)
+   {
+      assert(l < h);
+      if (l >= h)
+         return l;
+
+      uint32_t range = static_cast<uint32_t>(h - l);
+
+      uint32_t rnd = urand32();
+
+      uint32_t rnd_range = static_cast<uint32_t>((((uint64_t)range) * ((uint64_t)rnd)) >> 32U);
+
+      int result = l + rnd_range;
+      assert((result >= l) && (result < h));
+      return result;
+   }
+
+   void hash_map_test()
+   {
+      {
+         basisu::hash_map<uint64_t, uint64_t> k;
+         basisu::hash_map<uint64_t, uint64_t> l;
+         std::swap(k, l);
+
+         k.begin();
+         k.end();
+         k.clear();
+         k.empty();
+         k.erase(0);
+         k.insert(0, 1);
+         k.find(0);
+         k.get_equals();
+         k.get_hasher();
+         k.get_table_size();
+         k.reset();
+         k.reserve(1);
+         k = l;
+         k.set_equals(l.get_equals());
+         k.set_hasher(l.get_hasher());
+         k.get_table_size();
+      }
+
+      uint32_t seed = 0;
+      for (; ; )
+      {
+         seed++;
+
+         typedef basisu::hash_map<counted_obj, counted_obj> my_hash_map;
+         my_hash_map m;
+
+         const uint32_t n = irand32(0, 100000);
+
+         printf("%u\n", n);
+
+         srand(seed); // r1.seed(seed);
+
+         basisu::vector<int> q;
+
+         uint32_t count = 0;
+         for (uint32_t i = 0; i < n; i++)
+         {
+            uint32_t v = urand32() & 0x7FFFFFFF;
+            my_hash_map::insert_result res = m.insert(counted_obj(v), counted_obj(v ^ 0xdeadbeef));
+            if (res.second)
+            {
+               count++;
+               q.push_back(v);
+            }
+         }
+
+         HASHMAP_TEST_VERIFY(m.size() == count);
+
+         srand(seed);
+
+         my_hash_map cm(m);
+         m.clear();
+         m = cm;
+         cm.reset();
+
+         for (uint32_t i = 0; i < n; i++)
+         {
+            uint32_t v = urand32() & 0x7FFFFFFF;
+            my_hash_map::const_iterator it = m.find(counted_obj(v));
+            HASHMAP_TEST_VERIFY(it != m.end());
+            HASHMAP_TEST_VERIFY(it->first == v);
+            HASHMAP_TEST_VERIFY(it->second == (v ^ 0xdeadbeef));
+         }
+
+         for (uint32_t t = 0; t < 2; t++)
+         {
+            const uint32_t nd = irand32(1, q.size() + 1);
+            for (uint32_t i = 0; i < nd; i++)
+            {
+               uint32_t p = irand32(0, q.size());
+
+               int k = q[p];
+               if (k >= 0)
+               {
+                  q[p] = -k - 1;
+
+                  bool s = m.erase(counted_obj(k));
+                  HASHMAP_TEST_VERIFY(s);
+               }
+            }
+
+            typedef basisu::hash_map<uint32_t, empty_type> uint_hash_set;
+            uint_hash_set s;
+
+            for (uint32_t i = 0; i < q.size(); i++)
+            {
+               int v = q[i];
+
+               if (v >= 0)
+               {
+                  my_hash_map::const_iterator it = m.find(counted_obj(v));
+                  HASHMAP_TEST_VERIFY(it != m.end());
+                  HASHMAP_TEST_VERIFY(it->first == (uint32_t)v);
+                  HASHMAP_TEST_VERIFY(it->second == ((uint32_t)v ^ 0xdeadbeef));
+
+                  s.insert(v);
+               }
+               else
+               {
+                  my_hash_map::const_iterator it = m.find(counted_obj(-v - 1));
+                  HASHMAP_TEST_VERIFY(it == m.end());
+               }
+            }
+
+            uint32_t found_count = 0;
+            for (my_hash_map::const_iterator it = m.begin(); it != m.end(); ++it)
+            {
+               HASHMAP_TEST_VERIFY(it->second == ((uint32_t)it->first ^ 0xdeadbeef));
+
+               uint_hash_set::const_iterator fit(s.find((uint32_t)it->first));
+               HASHMAP_TEST_VERIFY(fit != s.end());
+
+               HASHMAP_TEST_VERIFY(fit->first == it->first);
+
+               found_count++;
+            }
+
+            HASHMAP_TEST_VERIFY(found_count == s.size());
+         }
+
+         HASHMAP_TEST_VERIFY(counted_obj::m_count == m.size() * 2);
+      }
+   }
+
+#endif // BASISU_HASHMAP_TEST
+
+} // namespace basisu
diff --git a/libkram/transcoder/basisu_file_headers.h b/libkram/transcoder/basisu_file_headers.h
new file mode 100644
index 00000000..4316d738
--- /dev/null
+++ b/libkram/transcoder/basisu_file_headers.h
@@ -0,0 +1,142 @@
+// basis_file_headers.h
+// Copyright (C) 2019-2020 Binomial LLC. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include "basisu_transcoder_internal.h"
+
+namespace basist
+{
+	// Slice desc header flags
+	enum basis_slice_desc_flags
+	{
+		cSliceDescFlagsHasAlpha = 1,
+		
+		// Video only: Frame doesn't refer to previous frame (no usage of conditional replenishment pred symbols)
+		// Currently the first frame is always an I-Frame, all subsequent frames are P-Frames. This will eventually be changed to periodic I-Frames.
+		cSliceDescFlagsFrameIsIFrame = 2			
+	};
+
+#pragma pack(push)
+#pragma pack(1)
+	struct basis_slice_desc
+	{
+		basisu::packed_uint<3> m_image_index;  // The index of the source image provided to the encoder (will always appear in order from first to last, first image index is 0, no skipping allowed)
+		basisu::packed_uint<1> m_level_index;	// The mipmap level index (mipmaps will always appear from largest to smallest)
+		basisu::packed_uint<1> m_flags;			// enum basis_slice_desc_flags
+
+		basisu::packed_uint<2> m_orig_width;	// The original image width (may not be a multiple of 4 pixels)
+		basisu::packed_uint<2> m_orig_height;  // The original image height (may not be a multiple of 4 pixels)
+
+		basisu::packed_uint<2> m_num_blocks_x;	// The slice's block X dimensions. Each block is 4x4 pixels. The slice's pixel resolution may or may not be a power of 2.
+		basisu::packed_uint<2> m_num_blocks_y;	// The slice's block Y dimensions. 
+
+		basisu::packed_uint<4> m_file_ofs;		// Offset from the start of the file to the start of the slice's data
+		basisu::packed_uint<4> m_file_size;		// The size of the compressed slice data in bytes
+
+		basisu::packed_uint<2> m_slice_data_crc16; // The CRC16 of the compressed slice data, for extra-paranoid use cases
+	};
+
+	// File header files
+	enum basis_header_flags
+	{
+		// Always set for ETC1S files. Not set for UASTC files.
+		cBASISHeaderFlagETC1S = 1,					 
+		
+		// Set if the texture had to be Y flipped before encoding. The actual interpretation of this (is Y up or down?) is up to the user.
+		cBASISHeaderFlagYFlipped = 2,				 
+		
+		// Set if any slices contain alpha (for ETC1S, if the odd slices contain alpha data)
+		cBASISHeaderFlagHasAlphaSlices = 4,		 
+		
+		// For ETC1S files, this will be true if the file utilizes a codebook from another .basis file. 
+		cBASISHeaderFlagUsesGlobalCodebook = 8, 
+		
+		// Set if the texture data is sRGB, otherwise it's linear. 
+		// In reality, we have no idea if the texture data is actually linear or sRGB. This is the m_perceptual parameter passed to the compressor.
+		cBASISHeaderFlagSRGB = 16,					 
+	};
+
+	// The image type field attempts to describe how to interpret the image data in a Basis file.
+	// The encoder library doesn't really do anything special or different with these texture types, this is mostly here for the benefit of the user. 
+	// We do make sure the various constraints are followed (2DArray/cubemap/videoframes/volume implies that each image has the same resolution and # of mipmap levels, etc., cubemap implies that the # of image slices is a multiple of 6)
+	enum basis_texture_type
+	{
+		cBASISTexType2D = 0,					// An arbitrary array of 2D RGB or RGBA images with optional mipmaps, array size = # images, each image may have a different resolution and # of mipmap levels
+		cBASISTexType2DArray = 1,			// An array of 2D RGB or RGBA images with optional mipmaps, array size = # images, each image has the same resolution and mipmap levels
+		cBASISTexTypeCubemapArray = 2,	// an array of cubemap levels, total # of images must be divisable by 6, in X+, X-, Y+, Y-, Z+, Z- order, with optional mipmaps
+		cBASISTexTypeVideoFrames = 3,		// An array of 2D video frames, with optional mipmaps, # frames = # images, each image has the same resolution and # of mipmap levels
+		cBASISTexTypeVolume = 4,			// A 3D texture with optional mipmaps, Z dimension = # images, each image has the same resolution and # of mipmap levels
+
+		cBASISTexTypeTotal
+	};
+
+	enum
+	{
+		cBASISMaxUSPerFrame = 0xFFFFFF
+	};
+
+	enum class basis_tex_format
+	{
+		cETC1S = 0,
+		cUASTC4x4 = 1
+	};
+
+	struct basis_file_header
+	{
+		enum
+		{
+			cBASISSigValue = ('B' << 8) | 's',
+			cBASISFirstVersion = 0x10
+		};
+
+		basisu::packed_uint<2>      m_sig;				// 2 byte file signature
+		basisu::packed_uint<2>      m_ver;				// Baseline file version
+		basisu::packed_uint<2>      m_header_size;	// Header size in bytes, sizeof(basis_file_header)
+		basisu::packed_uint<2>      m_header_crc16;	// CRC16 of the remaining header data
+
+		basisu::packed_uint<4>      m_data_size;		// The total size of all data after the header
+		basisu::packed_uint<2>      m_data_crc16;		// The CRC16 of all data after the header
+
+		basisu::packed_uint<3>      m_total_slices;	// The total # of compressed slices (1 slice per image, or 2 for alpha .basis files)
+
+		basisu::packed_uint<3>      m_total_images;	// The total # of images
+				
+		basisu::packed_uint<1>      m_tex_format;		// enum basis_tex_format
+		basisu::packed_uint<2>      m_flags;			// enum basist::header_flags
+		basisu::packed_uint<1>      m_tex_type;		// enum basist::basis_texture_type
+		basisu::packed_uint<3>      m_us_per_frame;	// Framerate of video, in microseconds per frame
+
+		basisu::packed_uint<4>      m_reserved;		// For future use
+		basisu::packed_uint<4>      m_userdata0;		// For client use
+		basisu::packed_uint<4>      m_userdata1;		// For client use
+
+		basisu::packed_uint<2>      m_total_endpoints;			// The number of endpoints in the endpoint codebook 
+		basisu::packed_uint<4>      m_endpoint_cb_file_ofs;	// The compressed endpoint codebook's file offset relative to the start of the file
+		basisu::packed_uint<3>      m_endpoint_cb_file_size;	// The compressed endpoint codebook's size in bytes
+
+		basisu::packed_uint<2>      m_total_selectors;			// The number of selectors in the endpoint codebook 
+		basisu::packed_uint<4>      m_selector_cb_file_ofs;	// The compressed selectors codebook's file offset relative to the start of the file
+		basisu::packed_uint<3>      m_selector_cb_file_size;	// The compressed selector codebook's size in bytes
+
+		basisu::packed_uint<4>      m_tables_file_ofs;			// The file offset of the compressed Huffman codelength tables, for decompressing slices
+		basisu::packed_uint<4>      m_tables_file_size;			// The file size in bytes of the compressed huffman codelength tables
+
+		basisu::packed_uint<4>      m_slice_desc_file_ofs;		// The file offset to the slice description array, usually follows the header
+		
+		basisu::packed_uint<4>      m_extended_file_ofs;		// The file offset of the "extended" header and compressed data, for future use
+		basisu::packed_uint<4>      m_extended_file_size;		// The file size in bytes of the "extended" header and compressed data, for future use
+	};
+#pragma pack (pop)
+
+} // namespace basist
diff --git a/libkram/transcoder/basisu_global_selector_cb.h b/libkram/transcoder/basisu_global_selector_cb.h
new file mode 100644
index 00000000..8ab50988
--- /dev/null
+++ b/libkram/transcoder/basisu_global_selector_cb.h
@@ -0,0 +1,272 @@
+// Copyright (C) 2019-2020 Binomial LLC. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+{
+0x0, 0x505, 0x5555, 0x5F5F, 0x5055050, 0x5055F5F, 0x50AAA551, 0xFAA5A5AA, 0x6AAA5095, 0x41E6FBAB, 0x19AE99F5, 0x1057AAA4, 0x54005A1A, 0x4459AEAF, 0x56015B, 0xBAA9A554,
+0x4335E5E0, 0xD9FE5FBB, 0x2525256A, 0x9AE892, 0xC0D5FAF5, 0x5BA5E641, 0x7EDEC8B8, 0xBB671211, 0x4C9844EE, 0xEE042415, 0xE5663EAE, 0x90909091, 0xAAA45AFF, 0x15556E1D, 0xA6959195, 0x4BFF8BF,
+0x5166AAF, 0x15490065, 0x6F5BAFAF, 0xFF00FF00, 0xD96956AA, 0x15AF6B, 0xFF5A00AA, 0xE0E557AA, 0x1A6F19BD, 0x69655555, 0xD0500158, 0xEEEDD894, 0xE4E4FE, 0xC71B7B10, 0x55AA5AAF, 0x50AA59BE,
+0xE4E990E4, 0x5353B63B, 0xFEE5D0E4, 0x96AF051A, 0x3CC95A6, 0x70B5A40D, 0x9504196E, 0x4A0BD7A3, 0x11B89592, 0xAAFF4095, 0x55A5D4E0, 0xBBA55050, 0x1111666, 0xA5544000, 0xED994444, 0x5A56BF,
+0x94A954B9, 0xFB651000, 0x604E633E, 0x14291A15, 0x56965956, 0xB8E0D0C0, 0x5A565A55, 0x65A61A6A, 0xE490F990, 0xCA87AAF5, 0x6060A0B, 0x24C23143, 0x55AA9A40, 0x505E1605, 0xCEC0486E, 0x156E55EA,
+0x79978B0B, 0x4595F53C, 0x405C4AF7, 0xC1897D75, 0xD5F40BA6, 0x95444017, 0x14AD6935, 0x87C7A7BD, 0x4A4E8597, 0xFF1D7E55, 0x451400F9, 0x1112277B, 0x9A0590F8, 0x53E3492E, 0xE590E995, 0x7E730A9A,
+0x929697E7, 0x2E781609, 0xE22317A1, 0xEDE9D884, 0xDDD75CDD, 0xAF1B6F1A, 0xE6909047, 0xA77DAD5D, 0x184C0D5D, 0xFAB56010, 0x5EA4F1D0, 0x11166B6B, 0xF51A7AD6, 0xF79950F4, 0x1B6B1B11, 0x9A6D6469,
+0x441997E, 0x4546869A, 0x95AA6965, 0x155A6A, 0x6E68B0E6, 0x5A55A665, 0x1B051605, 0x601D8BE6, 0xBD2F1B06, 0x409A429B, 0x23272721, 0xB07454A9, 0x7E66A3A1, 0x1B6A5500, 0xA0E0F5A6, 0xBF5A0500,
+0x55A5A9A9, 0x99D9E995, 0xE440566F, 0x6550BE99, 0x2267777B, 0xFA50FE50, 0xA657B441, 0xB4E29343, 0x555090E5, 0x45465B6B, 0xE654E6, 0xEA90469B, 0x2E05D2F4, 0x99594444, 0xF1C20746, 0x295AD2E0,
+0xF990EA95, 0x804459AE, 0xA9999894, 0x1F41E4A5, 0x4040E5E5, 0x5481E1F2, 0x2AFF59F1, 0x6B6B1712, 0xA7131051, 0xF9406F16, 0x1B2B5B9E, 0x587E0F2F, 0x547E1919, 0xD0F5645B, 0xB1B1B1B, 0x5756A4FE,
+0x46A9B965, 0x1529F99D, 0xE490E490, 0x4495FE, 0x985E0B06, 0x5FD2D23A, 0x5D0E95A, 0xF69103F4, 0x4029790, 0x1B062F1B, 0xEE594500, 0xB6539B5A, 0x106165BA, 0xD26B7C8D, 0x8B2A25A5, 0x55EAD5E3,
+0x431FB8E1, 0xBEB4E646, 0x9A5A4545, 0x5015A6B, 0x90D4B83D, 0xDB8A99A4, 0x9E894905, 0xDD7D1101, 0xA95E00BF, 0x579FA5A5, 0xA292D145, 0x93292C96, 0xF9A995A5, 0xBFE8A450, 0xB990D15B, 0x45D1E01A,
+0x4BD3F767, 0xF243479A, 0x7E420927, 0xF9E5E090, 0xA1C869F, 0x253A36, 0x9BAB569A, 0x4147031F, 0xA059AFE, 0xE0D6590F, 0xD5EAD5E6, 0x9A4B4641, 0x5AAA4406, 0x55EA90E4, 0x10179BC4, 0x44485999,
+0x5156253E, 0x1F29E054, 0xCDDAA773, 0x5601AB05, 0x94FC94C0, 0x116166BB, 0xBF964006, 0x414196EB, 0x8498D9ED, 0xB5E08687, 0xBD564150, 0x2B8D9DF8, 0x7F12017E, 0x90904747, 0x50B56AB, 0xDBD19490,
+0xBB5A5659, 0xBAF40E4, 0x6D649014, 0x1D29166F, 0x414F3D75, 0x6F929540, 0x565AAF05, 0xBD9884E5, 0xF5342A25, 0x157915AE, 0x1A055A55, 0x9019A19F, 0x64B96A05, 0x35689CCC, 0x996012E2, 0x5252677B,
+0x156AA401, 0x25BCE483, 0xAA665555, 0xD6AF4B0F, 0x3F4BBDE0, 0x9404A9AF, 0xA590F9E4, 0x8191A5FD, 0x568190B4, 0x591A6616, 0x92C11D3E, 0x97D2E5FC, 0xF5A55A6F, 0xBEE0969B, 0x8918B4CA, 0xE0915397,
+0x5243472F, 0x95EA4055, 0x55E6E0A4, 0x9AEBD181, 0xF4A25357, 0x11115666, 0xFE45FF0A, 0x8BC7D2E1, 0x800556BB, 0x757D6A96, 0xFA909A5B, 0x68962FDB, 0xEB0056AA, 0x69970241, 0xAA58AD64, 0xC4D9DED5,
+0x5A5BF2F0, 0xBD0905B4, 0x197D7801, 0x8987EDC4, 0xFF40565A, 0x460978A4, 0xE4067FE4, 0x5DA23153, 0xB90565AE, 0x5E14B946, 0x4E35879F, 0xC72F8666, 0x1816472F, 0x9A5A4949, 0x64A0D1E5, 0xC7025B1A,
+0x1B061B55, 0xFFAA051B, 0xAF5DEDA1, 0xAA955094, 0x6659965A, 0x99A95DAD, 0x9450A5A5, 0xA550A595, 0x6914B950, 0xEF454944, 0x906BB990, 0xD680944B, 0xE091461B, 0x5363B7BB, 0xF0743906, 0x66566A69,
+0x4B85D0BC, 0x40E494A5, 0x1161B6B6, 0x519BD59, 0x5998401, 0x1651F26B, 0x5709BB1B, 0x6AE1D1B9, 0xD19297BF, 0x1A69FEE4, 0x6066B5A, 0x74A56491, 0xB4661151, 0x559191A4, 0x96756A68, 0xF5C791A6,
+0x20297A15, 0x6B660100, 0x313177A2, 0x55054150, 0x6A969669, 0xF0B82111, 0x555A6996, 0xB666295A, 0x1EA95441, 0x6A166BA5, 0x8C18566D, 0x2797278A, 0x82A552BD, 0xF964BD14, 0x41540668, 0x5078785A,
+0x5754FE, 0xF9E0E5FA, 0x15453D3F, 0x5A9699A9, 0xD9854147, 0x849494E9, 0x1DC39734, 0x67E797B3, 0x107066F, 0xAED9986C, 0xAB564140, 0x9B51A6B7, 0x5FD3E2F4, 0x5A5429F9, 0xF9A05161, 0x5A5A6965,
+0xDDD88484, 0xFA50FA55, 0x90E5E4FA, 0x6BF166B, 0x6566665A, 0xE450A6E5, 0xEB45AA04, 0xDA9A4646, 0xD7A37235, 0x11431B97, 0xD41D6E64, 0xD3D3A1A0, 0x5D540E9, 0x627777BB, 0x5054A4BE, 0x593A05ED,
+0x2EBE454B, 0x1ABA1015, 0x7C64B460, 0xC358B47F, 0x176F4293, 0xA6E417AB, 0xF611756E, 0x1F40D499, 0x84885D5F, 0x2F0B9B9B, 0x14BE05, 0xE5919590, 0x101B146E, 0x7B261190, 0xDC96F8B0, 0xF460257E,
+0x34B0AFC0, 0xEB9140FE, 0xC5C589DD, 0x1F6D6865, 0xF5100195, 0xAF560607, 0x505066B5, 0x7E590999, 0x13E190E4, 0xA56ABD59, 0xC21B68D7, 0xE594E4, 0xF6576E50, 0xFFA751D1, 0x19A179CB, 0x2726797,
+0xA1931C7C, 0xE1D90F1B, 0x7F2B2510, 0x6AF90055, 0x5F1E4C88, 0xE410757A, 0x95702212, 0x7B762100, 0x1B05BF6A, 0x16F05AB, 0xDDC5C9C9, 0x72BE594, 0xE490E555, 0xC5E50106, 0x816DAC16, 0x5540FA90,
+0x156605FF, 0x3B372621, 0x2B57A67D, 0x6C661E16, 0x1E97A917, 0xE6E2D383, 0x1B40F91B, 0xD9A63333, 0x34E18629, 0xA71616E9, 0x84946D99, 0x1B6906AF, 0xEFDE8904, 0x88F52470, 0x50E990F8, 0x4182E1B4,
+0xBAE1865B, 0xF48E4F4, 0x64A0517F, 0xA1F45902, 0x12177BF5, 0x465EBD91, 0x37A747, 0xF0A5106, 0x4C4E8A5, 0x62779E65, 0xDE494989, 0x7B6796D1, 0xC5C5C58A, 0xE4786B07, 0x6F07E0F9, 0x5554A550,
+0x95559333, 0x747A6B5, 0xA4A45500, 0xE998444, 0xF5966371, 0x111116BB, 0x783A679, 0x95409AFF, 0xFF9690E4, 0x60743EBD, 0x1C5A90FD, 0x2B051EE9, 0x5B7A1624, 0xEB415701, 0x1B6B0155, 0x9BCB8586,
+0x599E5C51, 0x510064BE, 0x50FA6060, 0x16066B5B, 0x54DA89D5, 0xA01468B5, 0xC1655E5, 0x55FF6657, 0xE4985E9, 0xD738BE27, 0x6938D450, 0x47D0E4FE, 0x4858986E, 0xE793431E, 0x1A05FFFD, 0x18939141,
+0x15EE4620, 0x79E45151, 0x663AA556, 0xD1266DD9, 0x7E0655E0, 0xB6A7676D, 0x54A96AA5, 0x1664092B, 0x56517AA0, 0xD6402CB8, 0x40A7773C, 0x554F0646, 0x488D5F2F, 0xE4E49095, 0x1C7CB4E0, 0x7C27529A,
+0xF6FAA151, 0xCC7358D6, 0xE8406D15, 0x6E074B5F, 0x638359F7, 0xD4E9A88C, 0xE888050F, 0xE6546A0B, 0xB9904EBD, 0x755061AF, 0xA371285C, 0xE95A1904, 0xCADD042D, 0x757F6ED6, 0xE4A91F06, 0x6D5D0909,
+0xE49559B8, 0xF4B0569B, 0x8454B5B9, 0x2161B5B6, 0x855AADEE, 0x575B0544, 0xBFE4D086, 0xE484CBEB, 0xF9F5426F, 0xCC653366, 0xA3524656, 0x9A5989E4, 0x10451466, 0x71F1655, 0x9B90A4ED, 0x14599FF,
+0x9666AA91, 0x5A99A945, 0x9685CD8F, 0xB8506A91, 0xB427E0F8, 0x50A990FE, 0xA5FA9090, 0x60D4DA80, 0x28E35CB3, 0x55E4AA, 0xD20B55A4, 0xE15F86, 0x36E6995E, 0x54036FFF, 0xA79D2250, 0xBA11A500,
+0x404603AC, 0x641065A0, 0x9DD84A0A, 0x969B061B, 0x36737313, 0x7B65631A, 0xA4E4C099, 0x9590448F, 0xD57F0680, 0x6094D86D, 0x15D8E3BD, 0x757F7DD7, 0xB45B854, 0x6560FA98, 0x7A805637, 0xD68416BB,
+0x7B767131, 0x90F9E8FE, 0xA4E54045, 0xE0411F6E, 0xD57D7DDD, 0x33CB1C33, 0x58ADD010, 0x9B1FA5C6, 0xA401BE95, 0xA950F994, 0xA851971E, 0x33CC33CC, 0x10F0B164, 0x151A6F01, 0x78B5660C, 0x33333225,
+0x41162402, 0x5F0506CB, 0xFD96166F, 0xE4417643, 0x56A51A94, 0x5323BFEA, 0xD12DD12D, 0xA999959A, 0x547C6482, 0x499EE652, 0x4AC7D1E5, 0x2D3DAD07, 0x6B171201, 0xAF065854, 0xD6C4891D, 0xCC739CE7,
+0x9D692663, 0x3E41597C, 0xF38314BB, 0x1150A4F4, 0xE1E50FA, 0xF4D60B6F, 0x5A54E590, 0x227AB5F0, 0x73A3D7FC, 0xD7420A59, 0x12015A59, 0x4F1999D5, 0xA90EE44A, 0x1065B9B5, 0xD10533E3, 0xBA918409,
+0xE5409FEF, 0x4549047F, 0x6B57A6A5, 0xE94691AA, 0x111A6E7E, 0x45496BA, 0x49FD999, 0x414D5B8D, 0xAB10EF5E, 0xE9878505, 0x8C910499, 0xC0C5DA3E, 0x6F1B7298, 0x177D78D0, 0x687B5665, 0x3F470353,
+0x1441A590, 0xE1965F6F, 0x5A5B4A8D, 0x47D7C98, 0xD1404115, 0xB89A053F, 0x8C4095FE, 0x4861E055, 0x3B417607, 0xF9E0E4E0, 0x65B0506, 0x93633236, 0xAA07A5E4, 0x77747080, 0x776160F0, 0x1672B05,
+0xA54E0428, 0x520A9625, 0xE581065, 0x90C76D76, 0x2157B2B3, 0x5C5BE06, 0x151A5A01, 0xA9D5C081, 0xCBCD9854, 0xFDD1061F, 0xB66111B1, 0x9DC3D7B0, 0x650A7642, 0x8095734D, 0xD052011B, 0xE0A1479B,
+0x9501BFF8, 0xE9D9BD0D, 0x7A017925, 0x69A67373, 0x41E0E557, 0x5F844124, 0xEAB0695E, 0x566B5040, 0xCCC9D693, 0xA79684DE, 0x6B5BC3C1, 0x9595667B, 0x9C33CA5C, 0x8984C5C5, 0x459BBDE9, 0x1F10A5F4,
+0x22A55AA, 0x97C3430D, 0xAA569A55, 0x552E1E00, 0xD3C3C78B, 0x82C7521E, 0x5B0605EA, 0x5FF69268, 0xD081460B, 0xE4517F06, 0x4448C9CE, 0x2F69F940, 0x476DA470, 0x9F96FE12, 0x4D8D9E8E, 0x6A6A16B5,
+0x1D05BE66, 0x84F5BD, 0x691E1B41, 0xE0939B17, 0x159059AA, 0x1E5792B9, 0x25A701A5, 0x439162E, 0xE994077C, 0x5CC396AA, 0x1D0D9AA5, 0x4A4A598D, 0x1B6F156B, 0x1A1B0F40, 0x34CB34CB, 0x6F542E,
+0x32CC739C, 0x94EB9669, 0xDA8D4E1D, 0xC6C5C46E, 0x10152B3F, 0x8787F9F9, 0x5E42D064, 0x699B05E9, 0x7030295E, 0x495E09BE, 0xEE191016, 0x801D2D56, 0x3A0099F9, 0xEA09059E, 0x5BAB5100, 0x393D49C,
+0x10E15DC2, 0xB056DD4, 0x3536915, 0xE0C18719, 0xEB964090, 0x6172727, 0xFD5900FA, 0xD10B78D1, 0x33332626, 0x50F990F9, 0x78600A5B, 0xE2B5401B, 0xAE5E9404, 0xF2CF0C0, 0x9E9D8080, 0x84E4F4F9,
+0x41F0E59F, 0x90787E12, 0xE4E19143, 0x761D6706, 0x6560BCE5, 0x134A9BD3, 0x23768995, 0x22ADF6, 0x434A5C72, 0xD4985444, 0x70936BFF, 0xAB54E0E7, 0x45E7A682, 0x7A786D90, 0xF8546A00, 0x5F5E4540,
+0x999A651E, 0xF9E297FD, 0xAF86E5, 0xD00B6E54, 0x5442878A, 0x50E940A5, 0x61F6AF1, 0x479701AA, 0xAE455E5D, 0x6560123E, 0x22D17625, 0x83071B64, 0xF9460251, 0x5F4B064A, 0x8742417C, 0x5F89C51A,
+0x14A29F50, 0x5013BF6B, 0x76395676, 0x54A590F9, 0x40915AA7, 0xEB95E041, 0x7E560504, 0x65B9E4D1, 0x3F63A594, 0x17448216, 0x1A4F87F1, 0xF990E696, 0xECE89A50, 0x2266B17, 0x6A959A98, 0x50F5001A,
+0xBF056A55, 0x74470FFE, 0x65251011, 0x9F7D6597, 0x51BB962, 0xA0D04297, 0xA257F0D1, 0x5B1569D5, 0x4F40959E, 0xEC5D1D0D, 0x51A1A, 0x1DF56462, 0xC4491A6F, 0x4B4A55EF, 0xFD741D5F, 0xE1526713,
+0x875153E2, 0x9752A2E2, 0xEFDA8504, 0xF0E84756, 0xE0A196E9, 0x5FAF5C40, 0x9A3359CC, 0xE056062E, 0xB07B71D7, 0x5966475D, 0x66161100, 0x444A0151, 0xDAC7D6F5, 0xFBE8E314, 0x35098512, 0x1A7F7690,
+0xAF970158, 0x666A6996, 0xD1D10938, 0x742969B, 0x4542A5FE, 0x6EBE50A1, 0x816E7955, 0x64E1D0C1, 0x105156AA, 0x6A9AD0F5, 0xB4909E2E, 0x55A6A959, 0x45B4999F, 0x3266CC26, 0x9B915EE, 0x9769E58B,
+0x2EF59968, 0x3F2F0711, 0x79798469, 0x6161B6B6, 0xA79504B1, 0x9B92A351, 0x61C08573, 0xAB1B656F, 0x37271601, 0xE4840979, 0x45D1C1DA, 0xA4C4961B, 0x59A7F2F0, 0xEA9FC147, 0x635362B0, 0x9561EAF,
+0x6B6762A1, 0x585A43D3, 0x8484819F, 0xD1C30D5D, 0x2123101D, 0xA0F8E4F9, 0x63676220, 0x17EB6A5, 0x90E63F27, 0xDD256045, 0x7B66A1A0, 0x64143F6, 0x41D479D7, 0xF1520F82, 0x12B44687, 0x1504BE1A,
+0x90E45401, 0xC4C98E4F, 0x919097C, 0xA7A52919, 0xB9B62313, 0x9695C089, 0x30C5E6BD, 0xAA55669B, 0xD19F0645, 0x1150E2FF, 0x36213121, 0x1F1F0A05, 0x2A315099, 0x2A1E0414, 0xA3E3D04, 0xD5992851,
+0x19A56A45, 0x5D0669E5, 0xA7C1F8C0, 0x84D1E5AA, 0x7292A464, 0x9040F5E4, 0xF185405, 0x1FAE4509, 0xF91690BE, 0x5540A540, 0xA1D2874B, 0x560B65F8, 0xC207E1E6, 0x646D0F9, 0x5A1440, 0xBB454116,
+0x13597242, 0x413A4504, 0x66E7D2D2, 0x61DA6950, 0x519DF0A0, 0xD2926EB4, 0xA583060B, 0x247E1587, 0xE50590BD, 0xEFF50146, 0x6252B722, 0x4B9AF552, 0x42445A5, 0x5D0844D0, 0xD7C1D18, 0x6B53900,
+0x7DD68434, 0xE6964247, 0xE0A50B5, 0x72635347, 0x669A6B06, 0x91549A65, 0x8F097CA5, 0x849458EC, 0xF9B09275, 0x71390D5A, 0x478BC9D6, 0x5D579AA5, 0x9ED08605, 0xCA1C35D3, 0x1029669, 0x1344FEA7,
+0x5B468B87, 0xA7F29990, 0x60BDB855, 0x3430B574, 0x544461FF, 0xC5C9550E, 0x69E716A7, 0x112336, 0x3F11D2D7, 0x2F0796E4, 0xB5250B00, 0x33CC33DD, 0x20357676, 0x7B6F9272, 0x114B09BB, 0xA7F6C987,
+0x32959833, 0x40D25BB6, 0x13170353, 0xD52E5949, 0x93626538, 0x43449A56, 0x655890BA, 0x2F56811B, 0xE5E4C88, 0xA6079500, 0xA4F90507, 0x6460A055, 0xE990906D, 0x156F56AC, 0x54CF00, 0x181D5A0A,
+0x7C09E947, 0xAD9E898C, 0xFF914212, 0x6933A7CC, 0xB2935B2E, 0x4454D8A2, 0xA6A560B6, 0x519E2075, 0x575FA6A5, 0xB8B06916, 0x598B471B, 0x10686AD1, 0x45EA0170, 0xD0470B9B, 0x3B511E0B, 0x53D79D0,
+0xCBCAC5C5, 0xDAD54CD0, 0x3542EE79, 0xB4AD4FD, 0x642DFF01, 0xB99109B9, 0xE1919B9F, 0x97B84162, 0xE995460A, 0x1060F5F5, 0x166DBCF1, 0x4214957A, 0x6C60626, 0x50FE4F0B, 0xB466470A, 0x808596E2,
+0x70D1440D, 0x818617B6, 0xC8E8DDED, 0x40443474, 0x103E0750, 0x1559A9, 0x16E29FF, 0x54FE0447, 0x34CAB25C, 0x9B30756A, 0xB0E74B05, 0xE19051D, 0x402E7450, 0xF5E0D1AB, 0x87979B5F, 0x8707BA71,
+0x90B4A491, 0x1A2F5301, 0x6C44D318, 0x8AC0A1F4, 0x5A6F0306, 0xE1159090, 0xF9A54183, 0x4CC7321C, 0x7E64868B, 0xFDE60582, 0x4BE77014, 0x1B902D01, 0x104D8FA7, 0x16A7CD, 0x94693912, 0x62E759A,
+0x594BA906, 0x5D023747, 0xDF9757AD, 0x97364CCA, 0xFA011265, 0x12E16116, 0x7A615600, 0x501196F9, 0x5067E247, 0x2A75B070, 0xBC0196BE, 0x19FD8907, 0xCA8511AE, 0x7B671210, 0xB8F0966F, 0x600AE5F4,
+0x4146858E, 0xA579C124, 0x19F26C13, 0x2320776, 0x595BF900, 0xFB059055, 0x6FD6E460, 0x86CAD5D, 0x948153A5, 0xC6C546FF, 0xE199AD5A, 0x656A566A, 0x81256994, 0x7C285400, 0x37CD6A37, 0x4CF4E1B,
+0xD181E0B5, 0x90F89F46, 0x5AD2D072, 0xF1F44D4, 0xB5E091C6, 0xFF90E764, 0x656B9965, 0x833471C7, 0xE6470700, 0x521A517, 0x56620BF, 0x7A6458C9, 0x566959A6, 0x5A5FF3D2, 0xD050063F, 0x9AC17C39,
+0xC1F03D19, 0xE7939343, 0x35312404, 0x76671223, 0xA0D05804, 0x7B773262, 0x5E2E6465, 0xE6860519, 0xDE909B5A, 0xB5C094E4, 0xAF019B15, 0x1A57027F, 0x7874E7D3, 0xB35674A, 0xD0854FB6, 0x916509FD,
+0x431F91FF, 0x9B605420, 0x566978B4, 0xE8D1042D, 0x2533074, 0xEC904443, 0xD404A4D1, 0xB9984945, 0x435181E6, 0xDFD0520A, 0x37FC61D, 0x1540FA94, 0x876DB853, 0x9D686C9D, 0x5D7DE642, 0x556A6669,
+0x6B166F05, 0xF0F06616, 0xE490051F, 0x147B0606, 0xFD76D9D9, 0x3B814E5D, 0x16E6460, 0x91F05406, 0x37444D34, 0x1B17BF00, 0xA8465A05, 0x12429D1C, 0x79753935, 0x639291F4, 0x6761F0F4, 0xBC789460,
+0xF890D79E, 0x54780743, 0x1131367B, 0xD6487C64, 0x8E582E4F, 0x6A972A65, 0x1BA6D0E5, 0x17D6007A, 0x82590727, 0x95D0FA, 0x1540E47E, 0x56B91A0B, 0x8A85C4D4, 0x9F8205E4, 0x80D4C58B, 0x75D3E647,
+0x5956D966, 0x74ED4500, 0x167EA440, 0x255E191, 0x31811515, 0x82999DDF, 0x11670BB8, 0x2BDAD965, 0x5AA5669A, 0x55BF105A, 0x88496E59, 0x5AF56600, 0x4858E751, 0xF4811BB9, 0xB501A7B0, 0x11B26DB,
+0x767C9887, 0x602D7703, 0x1219F8FD, 0x464297D, 0xFF06DB95, 0x156A04BF, 0x5050A39C, 0x35CA4F94, 0x7F00EF1B, 0x68353273, 0x150663B6, 0x79666190, 0xDA650647, 0xA962959A, 0x96E596FF, 0x537E17A3,
+0x57F9E440, 0x101458FF, 0xA4D4E441, 0x1898C4E0, 0x7E189481, 0xB6C71904, 0x9A95EDDD, 0x944449FF, 0x61E4C997, 0x52DF8F5, 0x6A51F46E, 0x9145AD, 0xD9A8DDD2, 0x8784E63D, 0xFF5B906E, 0x2998A559,
+0xCCCC6633, 0xB954C0D0, 0x70B5663B, 0x531C8B25, 0xFFBA0191, 0xF4E35B90, 0x40FF7150, 0x1F075AE5, 0xD0015BFF, 0xDCC3D6DB, 0x4E54A07F, 0x7E9647A, 0xA19D4E1, 0x51504404, 0xE7D68A8A, 0xAF10A450,
+0x71B71184, 0x79940A0E, 0x821B196E, 0x50413A5B, 0x5707962D, 0xD1B63962, 0x819DEDE4, 0xEEC5CB54, 0x251DBAD, 0x50BD6D1D, 0x20976E74, 0xDDC98A4F, 0x451079E9, 0x69146E, 0x68590311, 0x8045A9F4,
+0xEEC58B96, 0x98CCC996, 0x94784451, 0xE6D6015B, 0x3035B95, 0x12E39F6, 0x50EE4058, 0x4D1C74A0, 0xA4291505, 0x936B67D4, 0x2AC1449D, 0xA4015A6D, 0xAFCB4414, 0x74A50038, 0xDED051C8, 0x347B76DA,
+0x817ED01B, 0xDD2D79D2, 0x5A1A011B, 0xA040F556, 0x540246B8, 0xF2B45A06, 0x6A4999B4, 0x4B67D0BF, 0x31614701, 0x456C84C1, 0xB8F4814C, 0xFF009669, 0x4F4A4999, 0x907D95AB, 0xB7A49402, 0x526E61D,
+0x5A9542D3, 0xF8792606, 0xA913569, 0x3193534B, 0x7A61D074, 0x51A452E3, 0x40E490E5, 0x4106377, 0x404A1709, 0x1562727, 0xC0B89996, 0x4440781E, 0x78FA9053, 0x5D1E00E8, 0x1C3C75D0, 0xD581AB05,
+0x85C58A4, 0x44E490E4, 0xCD94CDCA, 0xB252E6D6, 0x1FC345AB, 0x40C5B905, 0x26693851, 0xEC3741D, 0x1B5869B, 0xA161510, 0xE061977B, 0x8A580510, 0xD960D554, 0xB53E091, 0x14B900FA, 0x3E094659,
+0x6090906, 0xE6B47C17, 0xECEC9840, 0xF9A405FA, 0x90F994FA, 0x2B750A5, 0x803B3D25, 0x14AE405F, 0x6F97E0FD, 0xCD34E38C, 0xDED0D4AF, 0x96D1C038, 0x51E78187, 0x93D7CACD, 0xD4D052A7, 0xE6558B4F,
+0xF6025766, 0xE54074B0, 0x6613252C, 0x257A75, 0x1B1266B5, 0xF956E0B6, 0x44D3E3B9, 0xC5C5E9E6, 0xEBD69599, 0x9F91D0F8, 0xB0A05253, 0x6E0F1761, 0x425FE480, 0xA5A051FF, 0xB1384DC7, 0x1CE31CD3,
+0xEBF6701B, 0x6B152998, 0x35A62510, 0xD140E5E6, 0x9070791D, 0x3CA6D1, 0xDAC98985, 0x90917E97, 0x19BCF91, 0xD0C7CBC7, 0xB5466B37, 0xB111D25B, 0x9A29978C, 0x3196C50, 0xCAC5C1FF, 0x4F4A4192,
+0xB14E708B, 0xD5E958D3, 0x73747E24, 0xEDE0D6A2, 0x1B91436E, 0x79252511, 0xCBC58C44, 0x7E64F890, 0x9F05B9F4, 0x1B55E0D0, 0x21D1E969, 0xF4558028, 0xF9E5C3C2, 0x1974325B, 0x6A94E0F0, 0xD101A5BD,
+0x1A17075, 0x5B2D78E1, 0x17194807, 0xF5C24B1D, 0xFA40E655, 0x7A095515, 0xE106F993, 0x565A1103, 0x5A54F6E0, 0xF5E0016E, 0xD6CBC5C1, 0xF940E696, 0xAE316D90, 0x6A146A00, 0x9B96E9E4, 0x6351D86F,
+0x5A466995, 0xD4460B8A, 0x2CC3744E, 0x99666696, 0x20257ABF, 0xA3F2955, 0xD5D0919A, 0x54444859, 0xFC5C606F, 0xA6653749, 0x306E696, 0x2528BD70, 0xA07BCE5, 0xF0A46662, 0xDC649440, 0x99C7874A,
+0x656CBEB, 0x5151A67B, 0x60DB4B05, 0xD1444107, 0x514B74, 0x165025A, 0x5B5A1101, 0x7101179A, 0xD09A070B, 0x50A096BF, 0x47A9521D, 0x4B2D7492, 0x3F0F1B53, 0x1941ABFF, 0x7B666111, 0xD3C345BA,
+0x1CA9D6, 0xE18359B9, 0x1590E6F8, 0xF4285902, 0xBF1B92D0, 0x1BE76D, 0x2A5582C3, 0x8979164B, 0x3C721B40, 0x33C0CF30, 0x2DD21EE1, 0xE9D30707, 0x192DF65, 0x1B92E7C5, 0x33333669, 0x1E0560F4,
+0xFB53034B, 0x966A6699, 0x6F9797A7, 0xEF469BDB, 0x1943F5F6, 0x1DA7C70A, 0x74741E9E, 0xF5709967, 0x520B74F1, 0x4741FBAB, 0xDA6712B9, 0xCBC6C1D0, 0x67E2D64, 0x5E03B625, 0xC088D9D5, 0xF89005FA,
+0x5A2516AA, 0xE5C7D9D6, 0x69FE5096, 0x435BE0D1, 0x7193DB8, 0x9CCC9967, 0x54613301, 0x461211BC, 0x730C6FAB, 0x6050A5B1, 0x7EB91141, 0x6A152F00, 0x69665AE5, 0xAE5780F8, 0x7D06F90, 0x2032D510,
+0x4E0746D1, 0xA69C33CC, 0x1A462616, 0x36C6C6C6, 0xFFA59D1F, 0x9B6D74F1, 0x1197907E, 0x656A9995, 0x91742E48, 0xD00538F5, 0x441542, 0x40D2469B, 0xFF5999, 0x15A9966A, 0x94841EF9, 0xA5651D1F,
+0xCC9C6633, 0x2F7D107E, 0x9B81411F, 0x9E59A669, 0x5E970007, 0x7E2E1F03, 0x29B5F244, 0x86C5D48, 0x548581B4, 0xF955CF44, 0xE35CB2CD, 0xA956B6D7, 0xE0E69125, 0x1969C59D, 0x550099EF, 0x68D99F24,
+0x5FA65010, 0x2B1256FD, 0xF8244147, 0x1D56470C, 0x2162A76, 0x62F16510, 0xAA556995, 0x540669, 0x14C29726, 0x4790FC0, 0x2CC19B44, 0x6CD7759D, 0xD7F64140, 0x425F81E4, 0x5034348D, 0x65095966,
+0x7261B0F5, 0xAF5681D1, 0x935A4051, 0x5262767B, 0x595EC2E8, 0xDB743847, 0x3C60A513, 0xC21F3991, 0xCB34C738, 0xD19AE995, 0xB9171781, 0x1107AB66, 0x5FC0D790, 0x2070F1F6, 0x6E5E1A09, 0xB5407E5E,
+0x505494EA, 0xE4703556, 0xF890999E, 0xA65BF595, 0x2919A7F, 0x49DD3880, 0x94EF960A, 0x183D1906, 0x1500F9F9, 0x9DEE4509, 0xF5FA6313, 0xE0526850, 0xBFE60005, 0xF16E685, 0xF90065A5, 0x916AF859,
+0xFF608454, 0xE6BF5094, 0x81D1E0FA, 0xA19FF91, 0x60D59CD9, 0x39524274, 0xD7994F4, 0x36295C8D, 0xE0503945, 0x2D85C545, 0xD1500658, 0x22172635, 0x338C5626, 0xDE812506, 0xDB57E8D0, 0xE9D955F6,
+0x9F6C94E6, 0x9636CC33, 0x2A1A3542, 0xF95D80F5, 0xD4266F42, 0x1C9E5F9, 0x74A1C30E, 0x6B16A050, 0xFB03506A, 0x4218B469, 0x94D1422D, 0x427D3531, 0xC25BAD40, 0x6692B292, 0x5A50E9A5, 0x717CD4C3,
+0x71938F59, 0x5147400A, 0x41BC6EB5, 0xD4A0D4E4, 0x1D0E5FA, 0xF6869F06, 0xA7E35252, 0xF4D1421B, 0xA4894DE7, 0x2D946B1A, 0xD0FFC684, 0x17C291E4, 0x7F525000, 0x15848139, 0x669669A5, 0xA7A76353,
+0xA996966A, 0x2FE155C0, 0x1E1E42D6, 0x4CB19976, 0x1103BE69, 0x57699082, 0x71624AF, 0xE1445A09, 0x969A4504, 0x5602955E, 0x5A1548E4, 0x5B074314, 0x74A6CE66, 0x9038152E, 0xD8C4554F, 0x3EBB0657,
+0xA8A65669, 0xB9B56010, 0x96B92D86, 0xE9D9CCA4, 0x33DE69A7, 0x5C8CD8E4, 0xF5D1401A, 0x4B59A46B, 0xCD258D07, 0xF3C3A475, 0x105DFC2E, 0xF7929140, 0xA4A450FE, 0x5B4598, 0x17137B75, 0x1B05F082,
+0xD62474D1, 0x673AB500, 0xA9460B5E, 0x47025AD1, 0x6E6695D9, 0x1262B6F6, 0x6AE65190, 0x66A69499, 0xD0993A76, 0x22321727, 0x55C0C56F, 0x6F7D1E43, 0x352F53E, 0x6AD6662F, 0x62713623, 0x948484F9,
+0x27161E04, 0xE996065B, 0xFED0411E, 0x8BC59A4B, 0x689E4B01, 0x40467E7E, 0x550E0F8, 0x1832D78D, 0x6CEC9580, 0x6A9E8F6E, 0x5340D4B1, 0x1449C6D, 0x22163530, 0xB4F6C3C6, 0xAD01DA95, 0x656F051E,
+0x8151167B, 0xA0C3966B, 0x40F85B15, 0xDD4B8D2A, 0x24BB667, 0x52E193F, 0x56A7019B, 0x12625504, 0x11A16297, 0x4B59066C, 0x59E990A0, 0xA45C0B00, 0x92C34B3F, 0xD06F6325, 0xC68D18B9, 0x74783C30,
+0xEA58C5CD, 0x142151F7, 0xDA649E04, 0xDA950602, 0x6E641510, 0xA7249144, 0xF46A1FF, 0xA950F611, 0x752E1FC, 0x7A460551, 0xB8E0D6D1, 0xE9D09151, 0xC745A104, 0x2072E687, 0x9F870105, 0x65A0F1FB,
+0xD0D500F9, 0x1A901FE1, 0x6F4E9401, 0x33322558, 0x52532732, 0xDB16B782, 0xBAA56046, 0xBCA19396, 0x411B39B7, 0xDC847184, 0x31C8B364, 0x6A66906E, 0x3F85D8F0, 0x36EB4393, 0xD518985, 0x34392F47,
+0x1FCF4E96, 0xF4A05E06, 0xBD5B102C, 0xFF056A65, 0x561BB601, 0xACC50A51, 0xA3733235, 0x7E1B53E2, 0x5251A0E5, 0xE078156F, 0x1F56E790, 0xA9965D59, 0xE0949C7D, 0xD87D560A, 0xD0649CED, 0x1065A47E,
+0x257CE4C3, 0xE2409597, 0x71D49220, 0x253929F, 0x47673ECA, 0x716B51F3, 0x4C1C48C5, 0xC7C1520B, 0x7B593060, 0x4F88491E, 0xA566A956, 0x7767A34, 0x65149A00, 0xB6D75A5F, 0x96A757A6, 0xA050617B,
+0xB007D141, 0x12A377B0, 0xC4E8D5F5, 0xE0C74B95, 0xC13C06F9, 0x7996213D, 0x9AC68F45, 0x65460B95, 0x99A7C292, 0x889C9C6C, 0xA4EDD440, 0xB0693DF, 0xFF50E490, 0x507A977, 0xE4147ED0, 0x80F19505,
+0xFCC5066E, 0x74B64319, 0x103A1EC6, 0xB288507E, 0x56A779A0, 0x7C9056AA, 0x381294FD, 0x5D194EC1, 0xF0C19B1F, 0xD60A1DE1, 0x94C0C4DF, 0x7E66824B, 0x69670667, 0x461520C7, 0x7466071F, 0x96FD1A00,
+0x10C4EA65, 0x93DE045, 0x6F01E2E5, 0xB196946, 0x504246AD, 0xB45E0F4, 0xABF1194, 0x84C94D5A, 0x111D27E, 0xF6951BBD, 0xE081166B, 0x56699965, 0xA1F16C54, 0x46107A64, 0x821C3491, 0x88D86E5B,
+0xE5D2C7CB, 0x5203432F, 0xE616332B, 0xF9854938, 0xD2736742, 0x671061F7, 0x9248BDB8, 0x544106E2, 0x6134280D, 0xC65E091D, 0x5599E860, 0xECC245B4, 0xFE4564E0, 0xD500F4F4, 0x59FDC0ED, 0x44530376,
+0xB152767E, 0x6A074351, 0xC565DE, 0xAD995844, 0x57F1D099, 0xB252D484, 0xD1984D8E, 0x5196A66, 0x2C59E167, 0xC0E9055F, 0xFD196F06, 0xF5F0C1C7, 0xFAD05152, 0x2126E0F5, 0xE9F42911, 0xC8339895,
+0x8D405BD, 0x50E94377, 0x443AF945, 0x5011293C, 0x76A297AD, 0x1EE440C5, 0x35DAD9A0, 0xA4419590, 0x165BA619, 0xF60B9A65, 0x1F89444A, 0x71B0255, 0x476DF0A6, 0x61D4A1A, 0x469F9DB0, 0x32355C8C,
+0x86091D46, 0xE795060B, 0x2296305C, 0x6E41F80, 0xF490E594, 0xF4E25699, 0xBF65AC50, 0x4A7E75DB, 0x1015F458, 0xB04ACD9E, 0xEE5478F4, 0x7F630C53, 0x4E49F5F0, 0xCDA3319C, 0xBE116D20, 0x4589D9E9,
+0x7435434B, 0xFDD96656, 0x5F681E4, 0x99AD1C50, 0x552A5401, 0x68F04549, 0x73B66D2, 0x1F86D101, 0xD041160B, 0xF906FFA5, 0x689676DE, 0x5F4780BD, 0xF0939AA5, 0x1F0B5F40, 0x501B3F, 0xA8C193B5,
+0x8C73CC33, 0xE24F7CD3, 0x9090A565, 0xA990D59A, 0xA1C14877, 0x946D8180, 0xF8343124, 0x14C1E915, 0x4919CBA5, 0xFF55D001, 0xF4E152A2, 0x46687EF0, 0xF05ABD48, 0xB7B37460, 0x1474C58, 0x94E490E8,
+0xB9E44183, 0x78D32DE1, 0xC8E4489F, 0x5BDAA45B, 0xB5610147, 0x5162A5F9, 0x5D6191C8, 0x4595C0A6, 0xA57DC996, 0x6D9D0D1E, 0xF58BC767, 0x1A0E5509, 0x409BD2D7, 0xFF5C135, 0x33B7579, 0xEF035600,
+0x779152B, 0x50B091B7, 0x363A3162, 0x491069DB, 0x9C4E0C5F, 0x945A1B12, 0x5490FF00, 0xB5A0B400, 0x1F82D64A, 0xB2C7426, 0x329CDA73, 0xA9995AA5, 0xF65876B4, 0x1E4E890E, 0x857AB576, 0xA52E054,
+0x1D11D88C, 0xA35895B0, 0x6F196890, 0xC7143E70, 0x6BE401FE, 0x111BA961, 0x66070AF5, 0x5F07130F, 0xA15061B6, 0x8C73CD32, 0x6C6481D6, 0x6AD5A6C1, 0x14A5872C, 0x46B90BE4, 0xE5586946, 0x3163C58C,
+0x1431B550, 0x91E94F09, 0x66F5C24B, 0xEE40461E, 0x97593992, 0x19E059F4, 0x94F92E05, 0x8CC445EA, 0x403D64A7, 0xB0FE5D8, 0x9C9B5C00, 0x7EDED9A4, 0x7CD393B5, 0xF1A50DA, 0x372B4077, 0xBF9046E7,
+0x56261E6D, 0x51232570, 0x40153CAB, 0x9B6F6712, 0x51C0160A, 0xB990052F, 0x3788B955, 0x555064A0, 0x6F0B5B05, 0x9776666B, 0xAA069B45, 0xAFF81440, 0x91E4DE1A, 0x80D5C9C6, 0x1E9E5DA4, 0x2B65B450,
+0x1213362B, 0xD766654A, 0x461346C, 0x197B2441, 0x5F9547EB, 0x15AF0506, 0x464C8D9D, 0x4021957A, 0xFE54E413, 0x13295035, 0x145E0D2, 0x4D4854E9, 0xBE50919B, 0xA76CF940, 0x91E1406E, 0x9A655A9A,
+0x3E65005A, 0x1C0B6666, 0x2611AC57, 0xBE7D1442, 0x311E3FC, 0xA4858185, 0x6E54D2A1, 0x9140A5DE, 0x844EC85, 0xF4A81540, 0x1159BF9, 0x2896D07D, 0xD5E690EA, 0x40A47D47, 0x6F672C16, 0xF1611217,
+0xE4096F65, 0x621D05EF, 0x9040979F, 0xC1C5CA46, 0x5B9606B5, 0x5101767A, 0x448C9568, 0xE4983D12, 0xCBC8B5E9, 0x1B15E402, 0xE1C60E15, 0x44E68419, 0x90D48DC, 0x7A9042BB, 0x151285D, 0x676B2613,
+0x121644B4, 0x920F52E3, 0x56DE09E, 0x6E6607A4, 0x5E560B0E, 0xC20B7616, 0xAF464146, 0x8C5C2835, 0x474BC2C6, 0xBC6419CA, 0x5C4353F0, 0x31392547, 0x504295AA, 0xCC593363, 0x1C0C6DBA, 0x4D71B7F,
+0xCC593633, 0xE71A250B, 0xE8CC599D, 0x894FA695, 0x4EC3513E, 0x39C0C645, 0x7B526034, 0xF91E2E06, 0x1A663699, 0x202599EF, 0xC1D1E195, 0x75E600FF, 0xD0D1ECDC, 0xD6DB6050, 0xE0E5016F, 0xF0910B15,
+0xD68C6917, 0x6F1A3D00, 0xF5D61BA6, 0x336D1CA5, 0x7888941D, 0x78D0912D, 0xE1D1C34C, 0x592E4C41, 0x1539F804, 0xFB019650, 0xD5E50609, 0xE8C553F9, 0x411BEBD, 0xE580D1DA, 0x1CACD693, 0xAF15BCE1,
+0x1104909C, 0x4B3B4686, 0xB3A31251, 0x50F5665, 0x1D4EC1D3, 0x2830D712, 0x401F0475, 0xC089D4DD, 0xB2425701, 0x40076F65, 0xA950F5A4, 0x9F9990BF, 0x70214431, 0x22753C31, 0xCC7369D9, 0xE9919061,
+0xF45A1F1, 0xBD0690D0, 0x79A6C180, 0x2FDD105A, 0x584296CA, 0x116F906E, 0x58484C5C, 0xDF9B10E6, 0x5184486D, 0xE9D89D90, 0x69B05ED2, 0xD9841D2B, 0xC2C6C5E6, 0xF88947CD, 0x90696469, 0xC5C0C6DF,
+0x677B0640, 0x4B479BC5, 0x83D1B811, 0x5B47E440, 0x1A419DEA, 0x3DD8D4A9, 0x8752B0F3, 0xD1D18189, 0xCCA632CC, 0x725CCB73, 0x6325CC33, 0x14464A25, 0x5410303F, 0xF8C12F85, 0x39D205E5, 0xFE850549,
+0xC68710B5, 0x471F7842, 0xD34A4C6C, 0x4742D188, 0xBFD50A44, 0x1987777C, 0x5B61C12, 0xB2530677, 0xD995A916, 0x75B496A0, 0x31E61E7, 0x53939DED, 0x51705B00, 0xBA5A4046, 0xFF14A4, 0x2599DCF8,
+0x7D47F02E, 0x9B959064, 0xDCD4E014, 0x38C22DD1, 0x65647C88, 0xD4606066, 0x3551E0D2, 0xD06157FE, 0x1131260B, 0x8B81E454, 0x1E1865FF, 0x1966A524, 0x4B4684D8, 0x3450F1B6, 0xE4FE41D1, 0x1D31F1E6,
+0xC47C1F4A, 0x94656AB, 0x45D101B, 0xB681B712, 0x9BD09215, 0x77924154, 0xFCECD4A0, 0xF0F1C72D, 0x74D0834F, 0xE4908117, 0x65E0365C, 0x955A09EF, 0x6E744349, 0xF4692B13, 0x34750F6E, 0x94D198A9,
+0x839C2DB2, 0xED929580, 0xEA609E65, 0x1A15F8D1, 0xFE00D6AD, 0xD5DC0141, 0xD90D3995, 0x6E6640ED, 0xA9B443CF, 0xE50681D9, 0x3F705659, 0x5F5E4844, 0x9A254A5B, 0x26618195, 0x8B945DAD, 0xDFD4E490,
+0x674352C0, 0x9967C1C0, 0x5990E995, 0xDEC544BA, 0xA7DA444, 0x16457E8, 0x142DFE04, 0xA4D5C084, 0x13172539, 0x84FC3590, 0xA9734748, 0xFE07F451, 0x42119AED, 0x8F087916, 0x13A65D1, 0x82856E75,
+0x3511B1B9, 0xA61BD018, 0x8619B893, 0x40BD89E5, 0x9A15F640, 0x4052464F, 0x9D87C1E0, 0x972C58F0, 0x84D8ED54, 0xE64D9C5D, 0xF421502F, 0xF24864, 0x59B8A154, 0x9A2596DF, 0x1441E6FA, 0x1BE46F90,
+0xA8915051, 0xA5B62611, 0xF582841D, 0x825362B7, 0x55F367E, 0x415263BD, 0x3E463930, 0x68754A06, 0x1117F280, 0x8B16B855, 0xEC75E1C4, 0xA0771117, 0x669DB850, 0x6D171238, 0x47167213, 0x90B21746,
+0xD92649C8, 0xCAC6C6F1, 0xE392C769, 0x60E7939B, 0x14130475, 0x56001A7D, 0xF657028F, 0x6F6819E, 0x29974C33, 0x49A57C5C, 0xD590E890, 0x6FF1116, 0x132B7665, 0xA0C60B56, 0x9844DF41, 0xA1B103E,
+0x44845EEF, 0xA6D2F5D5, 0x55FA4600, 0xBF611264, 0x6AD53610, 0xED594A11, 0xCFC15015, 0x96264247, 0xA4F1D3DA, 0x257EC166, 0xB8D6C114, 0xEA90CF4F, 0x6D5A804, 0x3F0695E8, 0x9C90BCE9, 0xAB4701D1,
+0x136357B5, 0x994D9E0E, 0x121652B2, 0x6D9FD261, 0x5CCC9733, 0x873B95E5, 0x905E9C80, 0xA051BA6F, 0x5F478589, 0x197AD19A, 0x50015DE9, 0x5BAE412F, 0x4111162B, 0x738F386, 0xE4835BAE, 0x5B55A2D0,
+0x5E4B038, 0xF9F91014, 0x8FCBD1E1, 0xD98B4743, 0xB9955DAF, 0xC5373DCD, 0xBD1C8114, 0x250532F1, 0x24504E0E, 0x62D7C50, 0xE8D44154, 0xA8944044, 0x9D669995, 0x530C6FFA, 0x65167BA7, 0xE641BE,
+0x811AB5D2, 0xD18A9D09, 0x6F064045, 0xBF902991, 0xC54F07FF, 0xF6962B11, 0xA4819FDB, 0x60F46A54, 0xE451815B, 0x7C197B, 0x9D44DA0C, 0x869640FF, 0x18C44D18, 0x40247F66, 0x5221F711, 0x510196FF,
+0x353261EF, 0x44490E0E, 0xF994E956, 0x926C45BF, 0x7D96855F, 0x84D4C98A, 0xE06BF456, 0xC21B1641, 0x4B5CB493, 0xDA90D366, 0xE8964D44, 0x197C75DA, 0xD6D46333, 0x465E78A1, 0x1942461E, 0xC1356656,
+0x58D631CC, 0xD773384C, 0x4A9940E5, 0xA0F97414, 0x53532327, 0x78107AB5, 0x3C091BA5, 0xA6469144, 0x44E45841, 0xC50D5842, 0x4A199678, 0x46365DF8, 0x5463E87, 0x8D2C1151, 0xFF5895, 0x2FD231C8,
+0x6C9CD2B3, 0xD74243B2, 0xC4409CDE, 0xB8E0411F, 0x26670F05, 0x5B6697A9, 0x55464A2B, 0x2E0F569A, 0x4660E25B, 0xFE500715, 0x9C683532, 0xCB0C5949, 0x806C6592, 0x679B0156, 0xFE005F15, 0xA540FE50,
+0x1F281510, 0x906D4F00, 0x57052C80, 0xA95BC0B5, 0x6C98C5CB, 0x44581E4E, 0xAD9D4846, 0x8352C7DB, 0xB21E50D9, 0xEB4607B7, 0x99AC9C77, 0x1509D9FC, 0x5460133E, 0x4244EE5A, 0xEF5A259A, 0x3439461B,
+0x6B940B05, 0x59E94484, 0x5062B45E, 0x859B9363, 0xFDE48194, 0x325362D2, 0xE460FB46, 0xD78B98E7, 0x64A3D346, 0xE5B06277, 0x9F655A41, 0x676B5302, 0xEA950B7D, 0xFD158A, 0x775362B4, 0x75C21938,
+0x861EB9D2, 0x589532CC, 0x1025190B, 0xD46B06FF, 0xB50E7C44, 0xCDD7A372, 0xB5B96100, 0xF2B491A1, 0x5E9960F4, 0xF1CC582E, 0x89C99575, 0x7AA1475F, 0x7B66C124, 0xD0D681EF, 0x44156E08, 0x8F1F1056,
+0x6E67D240, 0x9A4C4951, 0xD451E4A7, 0xEA941B13, 0x7E470070, 0x85422E7F, 0x5B9690FE, 0xDE06E626, 0xC8854A15, 0x5A05ED4D, 0x80F46E1B, 0x9011F302, 0x4493D2D8, 0xE50D9D68, 0xFB764400, 0x64D1816F,
+0x9965A616, 0xBF011811, 0x155B0252, 0x3D9DA411, 0x58D96D90, 0x5FAC14F, 0x34E09F47, 0x7470303D, 0x35316E5B, 0x84782D19, 0x6074B114, 0xDF85124F, 0x5B47B723, 0x91327673, 0x31A3C548, 0x6D04F9,
+0x65DA62CC, 0x779B6270, 0x9037699, 0xD59A061B, 0xD140F5D0, 0x546F04E5, 0x197A6193, 0x150F46FA, 0xE0592151, 0x54A9D0DB, 0xAA55D851, 0x50616F64, 0x35307994, 0x1198C72F, 0x472E79D1, 0xFF01981,
+0xF6195AFF, 0x58E66219, 0x69D3D67C, 0x3395C832, 0x5DA31DFF, 0xD66E7583, 0x802979D7, 0x1C419805, 0xA596916A, 0x6A464414, 0x64702547, 0xD31AB704, 0x91E6C21B, 0x41AF55, 0xC600DF64, 0x2D162960,
+0xD0D9AC64, 0x921C6378, 0xD0051C18, 0xCC26969C, 0x3C856899, 0x56A9D3A3, 0xC8CCD966, 0xB42552E2, 0xFF055B5B, 0x4AC7E641, 0xE747D0DD, 0x45A5F75D, 0xFD960205, 0xA60391FD, 0x5EAD8484, 0x13D0E563,
+0x11011BBF, 0xF4A4414, 0xCB5D0939, 0x1D6DE804, 0x2D6DF850, 0xB7E11431, 0x8B253410, 0x24D7A918, 0xA6591F6A, 0xFA0545, 0x10D3421A, 0x4E4CE453, 0x99C48C3D, 0x71D1C0F, 0xB4B94045, 0xE0815D05,
+0x9D854214, 0xF5F06136, 0x1A58FD15, 0x4742A650, 0x7E66464A, 0xCAC5D1F2, 0x99A91441, 0xB06115FB, 0xC0F43522, 0x9CE9CDC8, 0x59EA404E, 0x5B42A707, 0x16914BD, 0x872DF087, 0x4114AE9E, 0x3B1284FE,
+0x5E5E5808, 0x4680D66C, 0x364246C2, 0x65C2462E, 0x421C78A1, 0x1EB8D5EA, 0x28F24C55, 0x5021670B, 0x90BD7875, 0x71385CCA, 0xE7F07411, 0x1F11A366, 0x5FDFB852, 0x4C4DE05A, 0x9E0D9AD6, 0xE0953C84,
+0xE5CC0641, 0xE01BF406, 0x51E4072A, 0x4D66B8B5, 0x272DAD04, 0xF2C3986D, 0x6D680158, 0x464B0755, 0x550047FB, 0x74B41D1, 0xAF58676C, 0x7A655241, 0x64B9505B, 0x4D1D4D8D, 0xFFA5909B, 0x2F0754C1,
+0x4052256D, 0xB9D68609, 0x66615EB, 0x8A3D64D1, 0x2392D1E5, 0xF0A15ABD, 0xF7375B01, 0x699291A1, 0x607D0154, 0x6F19CDC6, 0x869DAC51, 0x11164726, 0xB0B50B1A, 0x9550E320, 0x257FF450, 0x3264669A,
+0xDF5D405B, 0x819855E7, 0x6B97C0A4, 0x88394945, 0xBCF14411, 0x6ED499, 0x570193E3, 0xFA346401, 0xF479095, 0xE6E6850B, 0xB2F1615, 0x95E2160, 0x144D8A3D, 0x3932D186, 0x5B64F801, 0x1F0746E7,
+0x1443EF98, 0xE8444E49, 0x4740EB50, 0xB9035B58, 0xD3D21B3E, 0xB8767699, 0x67D78366, 0xBD1187CB, 0x6C5C0C1C, 0x906D3E13, 0x9E1E6C49, 0x400598ED, 0x529106FD, 0xB4C10D2D, 0x740DC334, 0xE99458A0,
+0x94C8D572, 0xD7D18228, 0x13E566E, 0x1BC3D629, 0x63136172, 0x51A1B10, 0xF305F4E0, 0x679F6813, 0xD38356EA, 0xD669C856, 0xC5CE5A4, 0xC738CB74, 0x567DB480, 0xA65F9440, 0xE207176B, 0x9D5E4A88,
+0x1EFD6440, 0x1095738A, 0xF005646E, 0x19E1870, 0xEC481545, 0x2E598105, 0x55F7439F, 0xDBDB41A1, 0xE64F995, 0x6A9164A9, 0xA5191E04, 0xF4D1413C, 0x646EA056, 0xE3911263, 0x20117A65, 0xF9059304,
+0x48366754, 0x12166B67, 0x868A677, 0x46DB80C0, 0x1878D69A, 0x60D26DC1, 0x405EE804, 0x84D990BF, 0xDAD54606, 0xD93365CC, 0x93B2971B, 0x31269151, 0x5F0E58C2, 0x19B96390, 0xA1AD444, 0x90286D94,
+0xB1D10B7E, 0x40FAA451, 0x8276815E, 0x8669F400, 0xB5A61B12, 0x65FC9B8, 0x446AA55, 0xA11D8378, 0xAD9F4468, 0x21266E1E, 0x868544FC, 0x67520318, 0x858DD0EB, 0x86D4906F, 0x51D3A9A6, 0x9767857E,
+0xD6C99C68, 0x33CC2667, 0x660795F4, 0xD1F10155, 0x86D90D16, 0xE66A1603, 0xF5F86440, 0xE98E4504, 0x17424B9A, 0xB0839F59, 0xE70438F0, 0x432A7560, 0x92D89FC5, 0x37C42399, 0x11617570, 0x249605BE,
+0x101BE265, 0xEE905053, 0x4540DA59, 0x9729A4FF, 0xEE850607, 0x9FD15248, 0x50E5B323, 0x7579F946, 0x498459AF, 0x919C2C1E, 0x6AD78374, 0xB1647421, 0x9EE0B603, 0xA6D35E9, 0x96696C9E, 0x587A5003,
+0x49454A9, 0x6D6D9393, 0xC98677E9, 0x4095C2EF, 0x9C608505, 0x15940C2C, 0x5B60D2DB, 0x1C8151A9, 0x60F491D2, 0x2B670453, 0x4F1964BA, 0xB4430B51, 0xA9860454, 0xB0E41E46, 0x1406B7E3, 0x562701E4,
+0x20D1F6A1, 0x5352A966, 0x64E1D62F, 0x92C6ED07, 0xF882561B, 0x99195A04, 0xC934E3AC, 0xD9B91810, 0x474B04F4, 0x3D9A66, 0xCA9C10AF, 0xE1942916, 0xA125355, 0x32295C8C, 0x675A64E1, 0x9D906494,
+0x6C60EB1F, 0x7874C1BC, 0x1CB53038, 0x56D0F9D1, 0xB1EB954, 0x7C781540, 0x5323619E, 0x6FE055A9, 0xE7440D22, 0x7431A347, 0x6F1BE146, 0x1015BF00, 0x62381905, 0x7025B440, 0x176AF050, 0x5D270EF1,
+0x46AF4C18, 0x4B86D9ED, 0x8B177010, 0xFAD09606, 0xBFA15E50, 0x4701D72D, 0xA298A55E, 0x865CA950, 0x9B5B1227, 0xE0B9175, 0xF481F1FC, 0x7E5D0440, 0x1A697FE5, 0xDE84C5F0, 0xF9975303, 0x4CB46C54,
+0x3ED0D666, 0xE6B07065, 0xF0661263, 0x117C93A0, 0xD56DB4E1, 0x2E5E0E5D, 0x873DE11F, 0x5512413E, 0x5E9CE404, 0xB4C10B7F, 0xF990E956, 0x84C95E95, 0xB5E04B1F, 0x86815BA5, 0x7A5CB26C, 0x42F41F91,
+0xF4592E03, 0xF51F1300, 0x99D7810F, 0xB16051A7, 0x5B57B03E, 0xEE590B55, 0xC014FDD6, 0xF9C04505, 0x1A669669, 0x984C5F84, 0x1A6FD2E3, 0x1A075E5, 0xD80E450C, 0xD66942B6, 0xD050E31F, 0xBA605440,
+0x50E7856A, 0x406E53EF, 0xF01191B, 0xBF055243, 0x9B25E9E7, 0xF892A157, 0x2D598957, 0x85896B6, 0xEF44584B, 0x170667FB, 0xD1017F38, 0xCEC58687, 0x443C6C99, 0xB5B83D3, 0xFD05D91B, 0xA6D72DD3,
+0x6C98D9D0, 0xD66990E0, 0xDE850B0F, 0xE9964609, 0x8605B94B, 0x6DC0D196, 0xB194A5, 0x6F13424D, 0x9ED13215, 0x7064249F, 0x5751E0B0, 0x166F26D6, 0x559EC2F8, 0xE12C5C71, 0x59898D15, 0xBC015A05,
+0x90D18B0F, 0xB0E8649C, 0x170B98D0, 0x8181F491, 0xD990498C, 0x76312925, 0x93B53402, 0xCC1C4B54, 0x2F3F5006, 0x56EC4F10, 0x1AE501E4, 0x6F4D40DB, 0xA3525D69, 0x1329E491, 0x400AF995, 0x2D5C4806,
+0xB091667D, 0xCC6C1627, 0x425CD9EC, 0xF552931A, 0xA6D46580, 0xF4AE50E0, 0x1979909A, 0xF1750058, 0x3365CCD9, 0xBA166500, 0x86779739, 0x67F2611, 0x55109A0B, 0x51016A65, 0x1160BBF, 0x5A6C1401,
+0xC4DC9984, 0x33534244, 0x4057113A, 0xF1F431C1, 0xF8A44BE5, 0x1A976851, 0xB9B5C28F, 0xFE099454, 0xB065B8B5, 0xC0217707, 0xF3831BD1, 0x9757401B, 0x1D293580, 0x7A951200, 0x20615AF5, 0x6272757,
+0xE0FC2907, 0xD860B5C1, 0x985F89C, 0xC25C2C5F, 0x7F941C01, 0xA95A450A, 0x44D89A95, 0xCE1C0C58, 0x194184F9, 0x579BB371, 0x90111F40, 0x5F037075, 0xBB516996, 0x4EC9791, 0xCCEC9C44, 0x5D8404F8,
+0x48FE9D1, 0x4792E3E2, 0x6F35E381, 0x5F0350FB, 0xFF6C56, 0x6050A5F7, 0x589625C9, 0x8F79061D, 0x11D0421F, 0x9061F184, 0x504662BE, 0x8FC7D190, 0xB4404627, 0xEA1F1B03, 0xAF59D0EA, 0x79E50E04,
+0xCF80445D, 0x4191D38B, 0x8F51328, 0x6653E282, 0x8C5C3573, 0x90E05929, 0xC5910D2B, 0xAE479D46, 0x2F7C1610, 0x24B1637E, 0xCB001F55, 0x118384B5, 0xA3421710, 0xAA0725F4, 0xE9C0959A, 0xE5E74114,
+0x4A44CB25, 0x1763BD90, 0x1B095404, 0xCF09D095, 0x5B8344AC, 0xC5C0F9D1, 0xA7615441, 0xE990474B, 0x63536723, 0x1E305BA6, 0xE56B066B, 0xCA817916, 0x9F798578, 0xD7692E06, 0x56F01481, 0x37215100,
+0x17111401, 0xF890F955, 0xA68D68D6, 0xD64A06F0, 0x9F65D9D2, 0xBE90095F, 0x9967072F, 0xE01C947B, 0x12235709, 0x589DFD, 0xBB154A41, 0x2D5DC01F, 0xC58B5C08, 0xCB64DC4C, 0x9F9393AC, 0x7303467E,
+0x599EC984, 0x46E111FC, 0x7A679404, 0xA90647FB, 0x40EE949A, 0x77076E08, 0x10736423, 0x53D90AB, 0x1F510BB, 0x85CC253A, 0xAA905666, 0x45EC0B4, 0x835121F1, 0x2E5A4247, 0x1D8664A0, 0x51442CE0,
+0xDD88451E, 0xE85C0113, 0xFA409601, 0xA4211627, 0x459F50BA, 0x984C458A, 0xE65A60D7, 0x958143B2, 0xD3835BFA, 0xF727F110, 0x4F074354, 0x11113D3D, 0x1558312B, 0xFF08E6D6, 0x89BC5910, 0x99F99287,
+0x40D4183F, 0xDA66A550, 0xA45CE279, 0x7F97A251, 0xB1B025F, 0x70772265, 0xF0E21E16, 0xE79959B5, 0x496669C6, 0xECD00716, 0x5619ECE1, 0x6E5C11F, 0x1101FFF8, 0x55A01DCB, 0xA5053970, 0x3D448855,
+0x9F92E540, 0x35B8A4D0, 0x84119CA7, 0x5429406F, 0x90E05B56, 0x3752E1D8, 0xE1B42D87, 0x9454B0F1, 0xDD382505, 0x1904ED6F, 0xD94D0C58, 0x7C52031F, 0x14B06706, 0x79928799, 0x40149BC7, 0xE9DCD6D0,
+0x104F4D1, 0xD9663448, 0xBD85D003, 0x358CCDA3, 0x66F5412B, 0x6E65066A, 0x6E19B161, 0x55D1AA50, 0x785C9D30, 0x162530BE, 0x7AE64548, 0xA51B9B0E, 0xF9A56400, 0x32CE35DB, 0x17285370, 0xB0117B76,
+0x7CE04741, 0x9D0C51A4, 0xBF004D55, 0xAAD5AC11, 0x6599CB41, 0xFBD09444, 0x936291E5, 0x2D5EA056, 0x1129656A, 0x7FC09506, 0xD44149C8, 0xBA750352, 0xD2431943, 0x7967E182, 0xB813C497, 0x24219465,
+0xC43C9169, 0x6FD81483, 0x6152E3C, 0x59947B6, 0x27F90D12, 0x54046D2A, 0xD521309B, 0xE6653921, 0x9164F4F1, 0x1521FB4A, 0x61C54C38, 0x416484BF, 0x2776B11F, 0x59A9D884, 0xED806B54, 0x1BB0D59B,
+0x4E8D91A1, 0x782FD131, 0xC05A0377, 0x7A121126, 0xDFC5D0E0, 0xEF859E05, 0x7BB60515, 0x5DD26D89, 0x2E291157, 0x9F2F3530, 0xC08156A9, 0x6D91CDC5, 0xF1548117, 0xB52517FE, 0x2053E947, 0xAB5785C5,
+0x479A91E0, 0x9F90FE01, 0x312484DE, 0x441BC1A5, 0x66B501F5, 0xC4617A5B, 0x916BB0F4, 0x20657C7A, 0xE25390FF, 0x1F3861B4, 0x550586AF, 0x6B8599D4, 0x9F4294EC, 0xF4D48147, 0xD0965233, 0x1E05CFED,
+0x59176903, 0x2D2976C, 0xCD33338C, 0x1441E4AD, 0x4196C088, 0xF6079403, 0x969DC030, 0x64990D29, 0x2453F372, 0x8658909B, 0x458046FB, 0x11A185C4, 0x58FC1144, 0x31E641A, 0x8E47709E, 0xF1611277,
+0x61F907A, 0xB7100F5C, 0x5A170B19, 0x160557E, 0x55B39D0, 0xC0C556EF, 0xBBF50299, 0xD1D23E12, 0x69588543, 0x4546890, 0x90FA4115, 0x11C5ED8, 0x68B74789, 0xC06F0117, 0x172790AD, 0x606DAC54,
+0x8D488154, 0x8444EC98, 0x9A6592D3, 0xE5E15040, 0xDB031762, 0x2F099450, 0x404F0F5A, 0x1D09E9F4, 0x56045E1F, 0xF9C99F00, 0x136C6599, 0xB0C36E91, 0x1C2835B0, 0x60BE6B5, 0x462FB4D1, 0xCB19B6D7,
+0x6C48C4D9, 0xD0D19B69, 0xA5E544FC, 0xE4851E31, 0x41E464B8, 0x59C6F9D4, 0xF0A6527E, 0x5333D1E0, 0x4862F606, 0x72370631, 0xC2B490C5, 0x4643DE5C, 0x54F9C489, 0xFC526C40, 0xB0A56895, 0x45C1C134,
+0x768F1981, 0x6E295005, 0x950B6964, 0x4B4CD5E9, 0xEB131245, 0x447F844, 0x131B1D76, 0x99981A7, 0x500B1F0A, 0x91011B1B, 0x64E64709, 0xA45C8145, 0xB53001D1, 0xDB462885, 0xBE06D015, 0x4858541B,
+0x6C86C3B5, 0x70A1967B, 0x969B0414, 0xEF485C04, 0x7FE151C1, 0xEBE7F444, 0x5341990B, 0x71D58060, 0xF5A61E10, 0x79F2C53, 0xE5E41F00, 0xBFF4505, 0xE01BB4E9, 0xBFE3905D, 0xDB7664D0, 0x956A065B,
+0x12894E1, 0xBF14B401, 0x30F7A97, 0xA85C97E0, 0xF40643E9, 0xF8506706, 0x9B9B06AB, 0x5AA05995, 0x45448E3D, 0xE1D1907F, 0x24F4C22C, 0x82C2959A, 0xBF065243, 0x8785812D, 0x9A94A916, 0x315B2C60,
+0x523291D9, 0xF2991609, 0x2830579C, 0x999973CC, 0x4B14C098, 0xC1C5053, 0xD1C1C68A, 0x64B67F3, 0xD09B801D, 0x6A7C1044, 0x48580954, 0x859F095A, 0x46533620, 0x592C852D, 0xA5419141, 0x4D16A1D0,
+0xB0B153B, 0xE1944E4A, 0x21371633, 0xCEC9C4E4, 0xA5D70E08, 0x4E4C4DD, 0x591CAC04, 0x5B021DBC, 0x9967CC33, 0x81C0E5FD, 0xF651C104, 0x966760C9, 0x95E1303C, 0xEC5C6440, 0x50BF9F4, 0x3561C31E,
+0x45A096AD, 0xFF92500B, 0xF6598904, 0x19F9C50B, 0x60C64D18, 0xD640584F, 0x2194E1F3, 0xF031163D, 0x101B56E0, 0xDE5921DA, 0xBF01FD1, 0x706FD94, 0x34CB30CE, 0xB857061C, 0x5225B609, 0xC43C3135,
+0xC03B9350, 0xF001676F, 0xF0C18767, 0x6611391B, 0x4F89A46, 0x1F42441A, 0x649F0F46, 0xB5E0015E, 0x7966830E, 0x9740F4F7, 0xC8CC4451, 0x59BD404B, 0xCC9435E1, 0x3B15B2A0, 0x123C1607, 0x59468D90,
+0x40E0D749, 0x14C1F46E, 0x448BD6A9, 0x659AF556, 0x50E7435B, 0xAED2933C, 0x56051308, 0xCE146E50, 0x1C904F10, 0xEC485E0D, 0x169A936C, 0x51633605, 0xDB57A830, 0x62957AB, 0xF4F14448, 0x45CAD5D,
+0x695CC006, 0x104606B9, 0x81D1D86D, 0x8649F050, 0x20D1DB56, 0xE5E53CC0, 0xCB470256, 0xC629E494, 0x49385F8F, 0x65056E31, 0x9C86F095, 0xC8CC9865, 0x869D8118, 0xD1021F11, 0x87DAD1E2, 0xAF04D500,
+0x4518464, 0xB857B085, 0x4505E834, 0xB1E25F04, 0xD8DD9084, 0x35978C33, 0x11A64257, 0x991A1E41, 0x66B06752, 0xBAF41443, 0xBD4A4544, 0xD6753B66, 0xCAC6C639, 0x154480FD, 0x99D19BC, 0x546EE442,
+0xD0C0193E, 0x7491E21F, 0x24B0B3C7, 0xF981D501, 0x74438B58, 0x54C47391, 0x9EDD1180, 0xE697874F, 0x2E41F8E5, 0x468FDD9, 0x4F1E439C, 0x5542EB81, 0xA554FAE0, 0x1E43E2B0, 0x606D5898, 0x6587C4E8,
+0x6B86C511, 0x6050A6D3, 0x1C523485, 0xF2530396, 0x4A2D5884, 0x6FD09193, 0x29978F84, 0xFE490999, 0x9189675E, 0x99277401, 0xDD6F2494, 0x8CD39344, 0xB5783D9, 0xC859EE, 0xAC701503, 0x8FC1D7E1,
+0xA0854317, 0xD09E8444, 0x4510F2BD, 0xED484CD4, 0xDE5D0C1C, 0xC0854367, 0xD1642134, 0x503876E, 0x7C046A5E, 0xE5644035, 0x3D043173, 0x5DE8991, 0xA603D6D0, 0x70644B1B, 0x41C3F55, 0x799EC50,
+0x5C1D005A, 0xB54027BB, 0x501703FB, 0xCE5E2D84, 0xC248776B, 0x34C31DB1, 0xD08146F6, 0x6194D488, 0x902D95DF, 0x56239699, 0x79566996, 0x46366797, 0x2075B449, 0xE619904B, 0xF1FC491D, 0x4026925F,
+0x85CE994, 0xC62D651B, 0xE0F50B4, 0x60976F55, 0x5B7706A6, 0x49471F41, 0xF6D91352, 0x551A062F, 0x4145CFCA, 0x55B14234, 0xABF12531, 0xB915F0F4, 0x8AC7E275, 0x4679C12F, 0xE21353EC, 0x693602A5,
+0x4BC256AB, 0x463D64A6, 0x5E7C6B8, 0x143983F0, 0x3646871A, 0x601D1FE, 0x4D4601FC, 0xB489D0DF, 0xF7814046, 0xE5A50BA7, 0xB34A116, 0x707C4151, 0x804466BB, 0xCF450C5C, 0x1329B9F4, 0xFE400655,
+0xF4A1431B, 0x51D3202D, 0x74B0A444, 0xA6F1612, 0x6C584C8C, 0xC014F665, 0xD3982957, 0x36C136D1, 0x5C45CCD0, 0x9F459882, 0x91A753A2, 0xA9752C4C, 0x421B4297, 0x776A2313, 0x35397906, 0x48065762,
+0x1129B440, 0xAF410B5D, 0x7F9582D2, 0x4B51E650, 0x891D0D77, 0xE59909E5, 0x705D234B, 0xC3DA9055, 0xF1B11501, 0x44BC94D9, 0x1FA55C48, 0x2A422657, 0xC41091A1, 0x18970E74, 0x7699099, 0x10616AD7,
+0x1AE19F10, 0x475A6C31, 0xA27257E9, 0xC484582C, 0xD699665A, 0x17C94C18, 0xE1569B, 0x11855ABF, 0xA580D607, 0x64C18B7E, 0x7030A605, 0x2B3660D0, 0x165BE103, 0x809E1867, 0x47407FD7, 0x34E4858B,
+0x90ED740B, 0x1B43C2F5, 0xF451527C, 0x5723E1C5, 0xE1D1065B, 0xA51A9DF4, 0x7F099A85, 0x4157030B, 0x1D9F60B8, 0xC11491FD, 0xFE850A55, 0xB990431F, 0xA0C78D16, 0x6F5A4D40, 0xFE4411E, 0x34426B56,
+0x5014FE0F, 0xBD5DC51A, 0xC63637C3, 0xE1B459B2, 0xFA871B05, 0x4544D0AD, 0xE5E5093D, 0x3306C5DD, 0x47C33484, 0x6F1E146, 0xE4974F1, 0xDD48050F, 0xAB151E12, 0x5169FC92, 0x5055B27A, 0xD142AD5C,
+0x4B16B0F4, 0x312BC955, 0x75B10FC, 0x4A9E0458, 0xA65D34CE, 0x90D366CF, 0x10099D29, 0x8B358419, 0x87D2B5DA, 0x31162EF5, 0xF61989C5, 0x1F4341F8, 0xBC4C6D9C, 0x11A759, 0x9553A21, 0xCFC41530,
+0x1F05B610, 0x5B2F64C0, 0x117F1124, 0xF3D24D9, 0x44447874, 0x74351A06, 0x64C48979, 0x63C185B, 0x91F094A4, 0x4E1D1C48, 0x969103AE, 0x9CFC6067, 0x2FC94045, 0xED3844C4, 0x676C935B, 0x6E2091B5,
+0x2D9744EC, 0x6A111227, 0x78DB90E5, 0x8045587E, 0xC4E4591E, 0x479BE1E0, 0x5B667499, 0x50E0493E, 0xFB54819C, 0x431491F8, 0xD3EF50E0, 0x7363926, 0x50FF6440, 0x44DC6C9D, 0xF6D9965A, 0x548499,
+0x150A1403, 0xC7324CC7, 0xAFF0474B, 0xB479966E, 0xAD9E5451, 0xF9913C41, 0x1F09B46, 0x3E35461E, 0xA0C66C10, 0xA4D1D103, 0xA05470B4, 0x6F1A3531, 0x4047B2F5, 0x11472CE0, 0x3AE41631, 0xFD36A905,
+0xF4436F91, 0x4A419766, 0x50699E67, 0x5408D93D, 0x112711AF, 0xF500642F, 0x160BD66A, 0x11B0BFE, 0xC2C55CA1, 0x31CF859, 0xC6C95401, 0xD9C0E5C1, 0xE46056AE, 0xF5E30710, 0xA5FF0150, 0x4454203F,
+0xB6660959, 0x2522776, 0x2D586970, 0x7370757, 0x60953317, 0x81E7072D, 0x68149E6, 0x653834, 0x363D9D81, 0xB429113F, 0x566C0258, 0x51F92F00, 0xE19C5812, 0x11FB150, 0x59A65200, 0x2775E0F4,
+0x106752A3, 0x7574326, 0x5CC34739, 0xDAD54805, 0x5E936BC7, 0xCFCAC5C9, 0x2FC42555, 0xD06994C0, 0x5CAC7400, 0x5D84D074, 0x6D668604, 0xE50B19BD, 0xE7969F55, 0xFF925005, 0x65B266B, 0x4743035E,
+0x972C44C7, 0x5AB47260, 0x77123580, 0xD92DB404, 0x451BB8D1, 0xB94B4447, 0x9B955210, 0xA253815, 0x79AC5184, 0xE3D10C5C, 0x452410FB, 0xDFD3A148, 0xE0336E46, 0x4E287599, 0x12B750A9, 0xDAC0DE47,
+0xC18459A9, 0xC764CD30, 0x2334675E, 0xF6490DB8, 0x49172679, 0x659B6994, 0x7A912691, 0xB2B76074, 0xB9E58704, 0xCC33D9A9, 0x207F6A7, 0xC405F801, 0x560297CF, 0x48140E5F, 0xC44E676A, 0xB191DED,
+0x11035F60, 0x5B1F60B4, 0xA4EB1443, 0x5A6C06E6, 0x1915AE9, 0x33CD2696, 0x879F4444, 0xD442170B, 0xC9C9D9D6, 0xA1D4C124, 0xC40366B7, 0x22497E9, 0xCD18D7F9, 0x99FD111A, 0x9F6B53A1, 0xE051ABFC,
+0xBC10071D, 0xA566A759, 0x6DBC5E5, 0x4A9DA851, 0x2266B15, 0x123D6E9B, 0x66E05449, 0xD10D3870, 0x1706E462, 0xAF0B9B51, 0xE0109D6C, 0xF144861B, 0x813ED01B, 0xB6496C10, 0x1ED00768, 0x60747C10,
+0x46494376, 0x5E4C10B0, 0x573D5906, 0x5A02E5FE, 0xB3E005F2, 0xC05C4F45, 0xDBD58982, 0x6D13171F, 0x9741FA00, 0x61DB852, 0x8D46D299, 0x47421E4D, 0x74A94E6, 0x4DA31CD3, 0x681602FF, 0xC67962D7,
+0x1560BBA, 0xF8849D85, 0x7DD6D5B8, 0x5B6B4640, 0x441A349F, 0x61B3601F, 0xD550E2D3, 0xCF18111B, 0x97673333, 0xC59909E4, 0x57DAE840, 0xBB910E5, 0x144484F9, 0xA85C2511, 0x4092D5EA, 0x89910391,
+0x7D385500, 0x3D1D2176, 0x802FD41B, 0xA5D94890, 0x656E9099, 0xE66906E6, 0x4331A2D5, 0x4478E25B, 0xE78C41D, 0x3565D979, 0x4147133B, 0x89C6C64A, 0x45C9CDCF, 0x532315FA, 0x3E5A0D04, 0xB111DBFC,
+0x63162FF7, 0x9E0B9CD0, 0xA758E46B, 0xA097421F, 0xAE544262, 0xA59A5DDD, 0x3732261D, 0x71736790, 0x8895A173, 0x687470B1, 0x57E564FE, 0x6A765404, 0xF5E21482, 0x667150E1, 0xC0A764D9, 0x89950C14,
+0x3756912B, 0x3913499, 0x6FB553C3, 0x29920995, 0x750EC738, 0xC35C6992, 0xAA55E404, 0xCB4B0753, 0x20396156, 0xDB04F4C1, 0x7876825F, 0xD562319A, 0x592D121B, 0xF9360956, 0x47811B47, 0xD9BD5884,
+0x170A35E, 0x844EE59A, 0x8684EDDA, 0x405F1000, 0x813641A7, 0x176681A2, 0x7DBE6440, 0x87C6A54, 0xC485D905, 0x1AB0811D, 0xDBA59291, 0xFA52817C, 0x3981E5F0, 0x5109FEE5, 0xC1284557, 0x573B2703,
+0x30640659, 0xF1D3E997, 0x906253B7, 0x9076C3FD, 0xCA34D35C, 0xF481491C, 0x5976A4C0, 0x3FF1144, 0x26170649, 0x196B9065, 0x90693D09, 0xF0211A17, 0xAF016DD4, 0x4E449C48, 0x6F9A84F4, 0xB4449818,
+0x606DB144, 0x916B97A6, 0xEBD0FD19, 0xAA08D541, 0xD71BE505, 0x1E6507AF, 0x3E4B1361, 0xF391619, 0x4264BC50, 0x60563321, 0x4A370617, 0x436452E7, 0xBE107604, 0xDDA9C18E, 0xBF5B0371, 0x906F1107,
+0x55C5C83C, 0x31364782, 0x68DD9F1, 0xF99607E6, 0x2996367E, 0x5AF4458, 0x96582D30, 0xC4E4D98A, 0xE68719BE, 0x20C56C64, 0x5A550246, 0xB759B079, 0x64439BFB, 0xB1240D2E, 0x271350B2, 0x4314AF56,
+0xA9998D72, 0x5E8D1481, 0xB65D7253, 0x669392B8, 0xF0919B6C, 0x9F9790DD, 0x60979838, 0x3D625168, 0x12196D58, 0x59E9448A, 0x30547C92, 0x8E317590, 0x7B954843, 0x323D4907, 0x5A4D5308, 0xD10490DB,
+0x92CE4511, 0xB9353125, 0xA91F4050, 0xDC485D08, 0x2F11742, 0xB9B4090F, 0xD09891FF, 0x9503936, 0x80E3505F, 0xED99405F, 0xEA9058DE, 0x55019FC0, 0x9A1F412C, 0x471660DA, 0xF4D0C505, 0x4926666D,
+0x4191C3EF, 0x1A0526F5, 0x55F889C7, 0xD33CC738, 0x7A66C601, 0xF701C662, 0x6E953906, 0x6E6559AD, 0x1626C57A, 0xE0075266, 0x67069A5B, 0xA5F70401, 0x47786090, 0x8111C53B, 0xB4640B1A, 0x269D5DAE,
+0x50C4A474, 0x2A94987D, 0x44C05226, 0x6114423F, 0xB8474F1D, 0x8E0E05D8, 0x87B40FD2, 0x1F4392FD, 0x170E995, 0x1563DE0, 0x6F9AC5CA, 0x752A0D1D, 0xFF0A44D0, 0x7FD0D366, 0x51A62E05, 0xD0D5E8DE,
+0x56113F30, 0xE4056D87, 0xAB3B5100, 0x46539C6, 0x7F676290, 0xB851130D, 0xF5E09183, 0x64AC5D2D, 0x55F9404B, 0xE256F977, 0x70617760, 0xEE48E450, 0xDD5E35C8, 0x94D400F8, 0x446A0667, 0xEA935184,
+0x464B07F8, 0x8C74B6D4, 0x8D9D59A5, 0xCEC6ADD5, 0x2D0956A3, 0xA5BA5103, 0x31353123, 0x55FF4140, 0xA2440F15, 0x89266792, 0x44E1E154, 0xE5B41338, 0x2FCE9901, 0x95E6811B, 0x475C871, 0xAE9E4A44,
+0x5E0D141, 0xE702D66B, 0x474707E2, 0x6486CBC5, 0x7476016E, 0xB1D29B56, 0x7A34C605, 0x51427906, 0xE7967F97, 0x65D29C90, 0x55081AD, 0xD6E64045, 0x67134368, 0x9101E6DD, 0x68957841, 0x3D3D5909,
+0xE0525BAF, 0x134A4D3C, 0xC1B11909, 0x16493B76, 0x6B96D0A0, 0xBF0E79D6, 0x2C7096F9, 0x90FC04D6, 0x1742E472, 0x531CA1AC, 0xDAD08646, 0x9066669, 0xA7588D86, 0x913C54C2, 0x742A4514, 0x17111ABC,
+0xDDD7EDE5, 0x6699C498, 0x75313135, 0xE4995F64, 0x2582667E, 0x9C5C4C48, 0x1D0D10D9, 0x5B4C164, 0x46E616CB, 0xD18444BF, 0xE1D4695C, 0x40095E99, 0xECE02955, 0xFF40A411, 0x54D2421B, 0xE1C7DB05,
+0x80E5095D, 0x6745E907, 0xA6197007, 0x760674F9, 0x1522170A, 0x65181FF, 0xE0FDA45, 0x521C7867, 0x6B91D382, 0x657A83C1, 0x73C98C35, 0xB3CC31D6, 0x164B257B, 0x6F9605F9, 0x52F49EB5, 0x87592907,
+0x1B10D6F8, 0x949FD54, 0x910315BA, 0x40F09B47, 0xB761C144, 0x6D039D5, 0x59B113F, 0xE5C6D9, 0x176990FB, 0x90F8A465, 0x6DB8D144, 0x4552E392, 0xC94396B9, 0x36315A97, 0x7E874215, 0x40116F6F,
+0xDF193401, 0x70689317, 0x7E021740, 0xD5C044BD, 0xF1636E94, 0x5B0D34A1, 0x1027F689, 0x44A45590, 0xD443035F, 0xD7A4418C, 0xD949F480, 0x84C1E154, 0x6090966E, 0x40B56C1E, 0x9669C540, 0xA6305505,
+0x1667E244, 0xC245885D, 0xD504A5E, 0xA4F0C549, 0x358472A8, 0xC414BF20, 0x43369729, 0xB1112711, 0x78644352, 0x811CB856, 0x6F9792F0, 0x1E1E4195, 0x62C58C14, 0x2619D3A2, 0x998056BD, 0x653F3410,
+0x919184E4, 0x64906D6C, 0xE3D7996, 0x47660563, 0x50A91F03, 0x49F8C5CB, 0xFE15235E, 0x9783451, 0x716257B7, 0x1AB5351, 0x1710DC84, 0x3FA01D5A, 0x9844AD64, 0xE4D901BC, 0xEE546352, 0xF9547987,
+0xF6834B47, 0xB651F6D, 0xE09C9045, 0xE491424E, 0x94F9050B, 0x7DA6091D, 0xB5C14D74, 0x123A699D, 0x1F413CC3, 0x9F434158, 0x55D8C8C4, 0x51106FF, 0xD36CB1C, 0x1541FCE1, 0xC3344ED3, 0x3E29D905,
+0x150A6579, 0x502475A, 0x454B0F04, 0x65902722, 0x9EA055, 0x26568CC5, 0x7EB1443, 0x42186F96, 0x132357A9, 0x74091F13, 0xEE55480F, 0x7A0B4A45, 0xF01A8637, 0x937D68D5, 0xE2611115, 0x8A853663,
+0x60D295AB, 0x1946461A, 0xBB5190A1, 0x6D514218, 0x7C6482CF, 0x8759ACC8, 0x7E86C805, 0x42D3755F, 0xF4C10529, 0x471F1400, 0x5394238C, 0x112D54A8, 0x55E450F8, 0xDC2191BA, 0xA5F93011, 0x1A2159F5,
+0x4051A297, 0x66066999, 0xA644C058, 0x54EC9148, 0xFAFB0146, 0x1150FCEC, 0x4141D171, 0xF09F0444, 0x488E5E0E, 0x3D190F10, 0x406E405F, 0xFE9904AD, 0x101F5C6F, 0x454B093B, 0x40946C1F, 0xF0B966D,
+0xC16464DB, 0x91E2D6F9, 0x97789074, 0x537C5C40, 0x44A533C3, 0x74102B33, 0x40D425C1, 0xD748C936, 0x786619C9, 0x40917985, 0x66594C1C, 0x2F05D2D2, 0x8C711B4, 0xF9602757, 0x1013854A, 0x7D6296D0,
+0x3D99B8F0, 0x495D3243, 0x916864BE, 0x4E43B947, 0xE756A110, 0xA8D09D7D, 0xBF625095, 0xC1D48F87, 0x8BD47099, 0xDB631315, 0xE54139A0, 0xE4DC9C63, 0x846C2544, 0x4F0E04D1, 0xE0B15B26, 0x3249499D,
+
+}
diff --git a/libkram/transcoder/basisu_global_selector_palette.h b/libkram/transcoder/basisu_global_selector_palette.h
new file mode 100644
index 00000000..8bedf947
--- /dev/null
+++ b/libkram/transcoder/basisu_global_selector_palette.h
@@ -0,0 +1,675 @@
+// basisu_global_selector_palette.h
+// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+// 
+// TODO: NONE of this is used in .basis/.ktx2 files. It will be deleted soon.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include "basisu_transcoder_internal.h"
+#include <algorithm>
+
+namespace basist
+{
+	class etc1_global_palette_entry_modifier
+	{
+	public:
+		enum { cTotalBits = 15, cTotalValues = 1 << cTotalBits };
+
+		etc1_global_palette_entry_modifier(uint32_t index = 0)
+		{
+#ifdef _DEBUG
+			static bool s_tested;
+			if (!s_tested)
+			{
+				s_tested = true;
+				for (uint32_t i = 0; i < cTotalValues; i++)
+				{
+					etc1_global_palette_entry_modifier m(i);
+					etc1_global_palette_entry_modifier n = m;
+
+					assert(n.get_index() == i);
+				}
+			}
+#endif
+
+			set_index(index);
+		}
+
+		void set_index(uint32_t index)
+		{
+			assert(index < cTotalValues);
+			m_rot = index & 3;
+			m_flip = (index >> 2) & 1;
+			m_inv = (index >> 3) & 1;
+			m_contrast = (index >> 4) & 3;
+			m_shift = (index >> 6) & 1;
+			m_median = (index >> 7) & 1;
+			m_div = (index >> 8) & 1;
+			m_rand = (index >> 9) & 1;
+			m_dilate = (index >> 10) & 1;
+			m_shift_x = (index >> 11) & 1;
+			m_shift_y = (index >> 12) & 1;
+			m_erode = (index >> 13) & 1;
+			m_high_pass = (index >> 14) & 1;
+		}
+
+		uint32_t get_index() const
+		{
+			return m_rot | (m_flip << 2) | (m_inv << 3) | (m_contrast << 4) | (m_shift << 6) | (m_median << 7) | (m_div << 8) | (m_rand << 9) | (m_dilate << 10) | (m_shift_x << 11) | (m_shift_y << 12) | (m_erode << 13) | (m_high_pass << 14);
+		}
+
+		void clear()
+		{
+			basisu::clear_obj(*this);
+		}
+
+		uint8_t m_contrast;
+		bool m_rand;
+		bool m_median;
+		bool m_div;
+		bool m_shift;
+		bool m_inv;
+		bool m_flip;
+		bool m_dilate;
+		bool m_shift_x;
+		bool m_shift_y;
+		bool m_erode;
+		bool m_high_pass;
+		uint8_t m_rot;
+	};
+
+	enum modifier_types
+	{
+		cModifierContrast,
+		cModifierRand,
+		cModifierMedian,
+		cModifierDiv,
+		cModifierShift,
+		cModifierInv,
+		cModifierFlippedAndRotated,
+		cModifierDilate,
+		cModifierShiftX,
+		cModifierShiftY,
+		cModifierErode,
+		cModifierHighPass,
+		cTotalModifiers
+	};
+
+#define ETC1_GLOBAL_SELECTOR_CODEBOOK_MAX_MOD_BITS (etc1_global_palette_entry_modifier::cTotalBits)
+
+	struct etc1_selector_palette_entry
+	{
+		etc1_selector_palette_entry()
+		{
+			clear();
+		}
+
+		void clear()
+		{
+			basisu::clear_obj(*this);
+		}
+
+		uint8_t operator[] (uint32_t i) const { assert(i < 16); return m_selectors[i]; }
+		uint8_t&operator[] (uint32_t i) { assert(i < 16); return m_selectors[i]; }
+
+		void set_uint32(uint32_t v)
+		{
+			for (uint32_t byte_index = 0; byte_index < 4; byte_index++)
+			{
+				uint32_t b = (v >> (byte_index * 8)) & 0xFF;
+
+				m_selectors[byte_index * 4 + 0] = b & 3;
+				m_selectors[byte_index * 4 + 1] = (b >> 2) & 3;
+				m_selectors[byte_index * 4 + 2] = (b >> 4) & 3;
+				m_selectors[byte_index * 4 + 3] = (b >> 6) & 3;
+			}
+		}
+
+		uint32_t get_uint32() const
+		{
+			return get_byte(0) | (get_byte(1) << 8) | (get_byte(2) << 16) | (get_byte(3) << 24);
+		}
+
+		uint32_t get_byte(uint32_t byte_index) const
+		{
+			assert(byte_index < 4);
+
+			return m_selectors[byte_index * 4 + 0] |
+				(m_selectors[byte_index * 4 + 1] << 2) |
+				(m_selectors[byte_index * 4 + 2] << 4) |
+				(m_selectors[byte_index * 4 + 3] << 6);
+		}
+
+		uint8_t operator()(uint32_t x, uint32_t y) const { assert((x < 4) && (y < 4)); return m_selectors[x + y * 4]; }
+		uint8_t&operator()(uint32_t x, uint32_t y) { assert((x < 4) && (y < 4)); return m_selectors[x + y * 4]; }
+
+		uint32_t calc_distance(const etc1_selector_palette_entry &other) const
+		{
+			uint32_t dist = 0;
+			for (uint32_t i = 0; i < 8; i++)
+			{
+				int delta = static_cast<int>(m_selectors[i]) - static_cast<int>(other.m_selectors[i]);
+				dist += delta * delta;
+			}
+			return dist;
+		}
+
+#if 0
+		uint32_t calc_hamming_dist(const etc1_selector_palette_entry &other) const
+		{
+			uint32_t dist = 0;
+			for (uint32_t i = 0; i < 4; i++)
+				dist += g_hamming_dist[get_byte(i) ^ other.get_byte(i)];
+			return dist;
+		}
+#endif
+
+		etc1_selector_palette_entry get_inverted() const
+		{
+			etc1_selector_palette_entry result;
+
+			for (uint32_t i = 0; i < 16; i++)
+				result.m_selectors[i] = 3 - m_selectors[i];
+
+			return result;
+		}
+
+		etc1_selector_palette_entry get_divided() const
+		{
+			etc1_selector_palette_entry result;
+
+			const uint8_t div_selector[4] = { 2, 0, 3, 1 };
+
+			for (uint32_t i = 0; i < 16; i++)
+				result.m_selectors[i] = div_selector[m_selectors[i]];
+
+			return result;
+		}
+
+		etc1_selector_palette_entry get_shifted(int delta) const
+		{
+			etc1_selector_palette_entry result;
+
+			for (uint32_t i = 0; i < 16; i++)
+				result.m_selectors[i] = static_cast<uint8_t>(basisu::clamp<int>(m_selectors[i] + delta, 0, 3));
+
+			return result;
+		}
+
+		etc1_selector_palette_entry get_randomized() const
+		{
+			uint32_t seed = get_uint32();
+
+			etc1_selector_palette_entry result;
+
+			for (uint32_t y = 0; y < 4; y++)
+			{
+				for (uint32_t x = 0; x < 4; x++)
+				{
+					int s = (*this)(x, y);
+
+					// between 0 and 10
+					uint32_t i = basisd_urand(seed, 6) + basisd_urand(seed, 6);
+					if (i == 0)
+						s -= 2;
+					else if (i == 10)
+						s += 2;
+					else if (i < 3)
+						s -= 1;
+					else if (i > 7)
+						s += 1;
+
+					result(x, y) = static_cast<uint8_t>(basisu::clamp<int>(s, 0, 3));
+				}
+			}
+
+			return result;
+		}
+
+		etc1_selector_palette_entry get_contrast(int table_index) const
+		{
+			assert(table_index < 4);
+
+			etc1_selector_palette_entry result;
+
+			static const uint8_t s_contrast_tables[4][4] =
+			{
+				{ 0, 1, 2, 3 }, // not used
+				{ 0, 0, 3, 3 },
+				{ 1, 1, 2, 2 },
+				{ 1, 1, 3, 3 }
+			};
+
+			for (uint32_t i = 0; i < 16; i++)
+			{
+				result[i] = s_contrast_tables[table_index][(*this)[i]];
+			}
+
+			return result;
+		}
+
+		etc1_selector_palette_entry get_dilated() const
+		{
+			etc1_selector_palette_entry result;
+
+			for (uint32_t y = 0; y < 4; y++)
+			{
+				for (uint32_t x = 0; x < 4; x++)
+				{
+					uint32_t max_selector = 0;
+
+					for (int yd = -1; yd <= 1; yd++)
+					{
+						int fy = y + yd;
+						if ((fy < 0) || (fy > 3))
+							continue;
+
+						for (int xd = -1; xd <= 1; xd++)
+						{
+							int fx = x + xd;
+							if ((fx < 0) || (fx > 3))
+								continue;
+
+							max_selector = basisu::maximum<uint32_t>(max_selector, (*this)(fx, fy));
+						}
+					}
+
+					result(x, y) = static_cast<uint8_t>(max_selector);
+				}
+			}
+
+			return result;
+		}
+
+		etc1_selector_palette_entry get_eroded() const
+		{
+			etc1_selector_palette_entry result;
+
+			for (uint32_t y = 0; y < 4; y++)
+			{
+				for (uint32_t x = 0; x < 4; x++)
+				{
+					uint32_t min_selector = 99;
+
+					for (int yd = -1; yd <= 1; yd++)
+					{
+						int fy = y + yd;
+						if ((fy < 0) || (fy > 3))
+							continue;
+
+						for (int xd = -1; xd <= 1; xd++)
+						{
+							int fx = x + xd;
+							if ((fx < 0) || (fx > 3))
+								continue;
+
+							min_selector = basisu::minimum<uint32_t>(min_selector, (*this)(fx, fy));
+						}
+					}
+
+					result(x, y) = static_cast<uint8_t>(min_selector);
+				}
+			}
+
+			return result;
+		}
+
+		etc1_selector_palette_entry get_shift_x() const
+		{
+			etc1_selector_palette_entry result;
+
+			for (uint32_t y = 0; y < 4; y++)
+			{
+				for (uint32_t x = 0; x < 4; x++)
+				{
+					int sx = x - 1;
+					if (sx < 0)
+						sx = 0;
+
+					result(x, y) = (*this)(sx, y);
+				}
+			}
+
+			return result;
+		}
+
+		etc1_selector_palette_entry get_shift_y() const
+		{
+			etc1_selector_palette_entry result;
+
+			for (uint32_t y = 0; y < 4; y++)
+			{
+				int sy = y - 1;
+				if (sy < 0)
+					sy = 3;
+
+				for (uint32_t x = 0; x < 4; x++)
+					result(x, y) = (*this)(x, sy);
+			}
+
+			return result;
+		}
+
+		etc1_selector_palette_entry get_median() const
+		{
+			etc1_selector_palette_entry result;
+
+			for (uint32_t y = 0; y < 4; y++)
+			{
+				for (uint32_t x = 0; x < 4; x++)
+				{
+					// ABC
+					// D F
+					// GHI
+
+					uint8_t selectors[8];
+					uint32_t n = 0;
+
+					for (int yd = -1; yd <= 1; yd++)
+					{
+						int fy = y + yd;
+						if ((fy < 0) || (fy > 3))
+							continue;
+
+						for (int xd = -1; xd <= 1; xd++)
+						{
+							if ((xd | yd) == 0)
+								continue;
+
+							int fx = x + xd;
+							if ((fx < 0) || (fx > 3))
+								continue;
+
+							selectors[n++] = (*this)(fx, fy);
+						}
+					}
+
+					std::sort(selectors, selectors + n);
+
+					result(x, y) = selectors[n / 2];
+				}
+			}
+
+			return result;
+		}
+
+		etc1_selector_palette_entry get_high_pass() const
+		{
+			etc1_selector_palette_entry result;
+
+			static const int kernel[3][3] =
+			{
+				{ 0,  -1,  0 },
+				{ -1,  8, -1 },
+				{ 0,  -1,  0 }
+			};
+
+			for (uint32_t y = 0; y < 4; y++)
+			{
+				for (uint32_t x = 0; x < 4; x++)
+				{
+					// ABC
+					// D F
+					// GHI
+
+					int sum = 0;
+
+					for (int yd = -1; yd <= 1; yd++)
+					{
+						int fy = y + yd;
+						fy = basisu::clamp<int>(fy, 0, 3);
+
+						for (int xd = -1; xd <= 1; xd++)
+						{
+							int fx = x + xd;
+							fx = basisu::clamp<int>(fx, 0, 3);
+
+							int k = (*this)(fx, fy);
+							sum += k * kernel[yd + 1][xd + 1];
+						}
+					}
+
+					sum = sum / 4;
+
+					result(x, y) = static_cast<uint8_t>(basisu::clamp<int>(sum, 0, 3));
+				}
+			}
+
+			return result;
+		}
+
+		etc1_selector_palette_entry get_flipped_and_rotated(bool flip, uint32_t rotation_index) const
+		{
+			etc1_selector_palette_entry temp;
+
+			if (flip)
+			{
+				for (uint32_t y = 0; y < 4; y++)
+					for (uint32_t x = 0; x < 4; x++)
+						temp(x, y) = (*this)(x, 3 - y);
+			}
+			else
+			{
+				temp = *this;
+			}
+
+			etc1_selector_palette_entry result;
+
+			switch (rotation_index)
+			{
+			case 0:
+				result = temp;
+				break;
+			case 1:
+				for (uint32_t y = 0; y < 4; y++)
+					for (uint32_t x = 0; x < 4; x++)
+						result(x, y) = temp(y, 3 - x);
+				break;
+			case 2:
+				for (uint32_t y = 0; y < 4; y++)
+					for (uint32_t x = 0; x < 4; x++)
+						result(x, y) = temp(3 - x, 3 - y);
+				break;
+			case 3:
+				for (uint32_t y = 0; y < 4; y++)
+					for (uint32_t x = 0; x < 4; x++)
+						result(x, y) = temp(3 - y, x);
+				break;
+			default:
+				assert(0);
+				break;
+			}
+
+			return result;
+		}
+
+		etc1_selector_palette_entry get_modified(const etc1_global_palette_entry_modifier &modifier) const
+		{
+			etc1_selector_palette_entry r(*this);
+
+			if (modifier.m_shift_x)
+				r = r.get_shift_x();
+
+			if (modifier.m_shift_y)
+				r = r.get_shift_y();
+
+			r = r.get_flipped_and_rotated(modifier.m_flip != 0, modifier.m_rot);
+
+			if (modifier.m_dilate)
+				r = r.get_dilated();
+
+			if (modifier.m_erode)
+				r = r.get_eroded();
+
+			if (modifier.m_high_pass)
+				r = r.get_high_pass();
+
+			if (modifier.m_rand)
+				r = r.get_randomized();
+
+			if (modifier.m_div)
+				r = r.get_divided();
+
+			if (modifier.m_shift)
+				r = r.get_shifted(1);
+
+			if (modifier.m_contrast)
+				r = r.get_contrast(modifier.m_contrast);
+
+			if (modifier.m_inv)
+				r = r.get_inverted();
+
+			if (modifier.m_median)
+				r = r.get_median();
+
+			return r;
+		}
+
+		etc1_selector_palette_entry apply_modifier(modifier_types mod_type, const etc1_global_palette_entry_modifier &modifier) const
+		{
+			switch (mod_type)
+			{
+			case cModifierContrast:
+				return get_contrast(modifier.m_contrast);
+			case cModifierRand:
+				return get_randomized();
+			case cModifierMedian:
+				return get_median();
+			case cModifierDiv:
+				return get_divided();
+			case cModifierShift:
+				return get_shifted(1);
+			case cModifierInv:
+				return get_inverted();
+			case cModifierFlippedAndRotated:
+				return get_flipped_and_rotated(modifier.m_flip != 0, modifier.m_rot);
+			case cModifierDilate:
+				return get_dilated();
+			case cModifierShiftX:
+				return get_shift_x();
+			case cModifierShiftY:
+				return get_shift_y();
+			case cModifierErode:
+				return get_eroded();
+			case cModifierHighPass:
+				return get_high_pass();
+			default:
+				assert(0);
+				break;
+			}
+
+			return *this;
+		}
+
+		etc1_selector_palette_entry get_modified(const etc1_global_palette_entry_modifier &modifier, uint32_t num_order, const modifier_types *pOrder) const
+		{
+			etc1_selector_palette_entry r(*this);
+
+			for (uint32_t i = 0; i < num_order; i++)
+			{
+				r = r.apply_modifier(pOrder[i], modifier);
+			}
+
+			return r;
+		}
+
+		bool operator< (const etc1_selector_palette_entry &other) const
+		{
+			for (uint32_t i = 0; i < 16; i++)
+			{
+				if (m_selectors[i] < other.m_selectors[i])
+					return true;
+				else if (m_selectors[i] != other.m_selectors[i])
+					return false;
+			}
+
+			return false;
+		}
+
+		bool operator== (const etc1_selector_palette_entry &other) const
+		{
+			for (uint32_t i = 0; i < 16; i++)
+			{
+				if (m_selectors[i] != other.m_selectors[i])
+					return false;
+			}
+
+			return true;
+		}
+
+	private:
+		uint8_t m_selectors[16];
+	};
+
+	typedef basisu::vector<etc1_selector_palette_entry> etc1_selector_palette_entry_vec;
+
+	extern const uint32_t g_global_selector_cb[];
+	extern const uint32_t g_global_selector_cb_size;
+
+#define ETC1_GLOBAL_SELECTOR_CODEBOOK_MAX_PAL_BITS (12)
+
+	struct etc1_global_selector_codebook_entry_id
+	{
+		uint32_t m_palette_index;
+		etc1_global_palette_entry_modifier m_modifier;
+
+		etc1_global_selector_codebook_entry_id(uint32_t palette_index, const etc1_global_palette_entry_modifier &modifier) : m_palette_index(palette_index), m_modifier(modifier) { }
+
+		etc1_global_selector_codebook_entry_id() { }
+
+		void set(uint32_t palette_index, const etc1_global_palette_entry_modifier &modifier) { m_palette_index = palette_index; m_modifier = modifier; }
+	};
+
+	typedef basisu::vector<etc1_global_selector_codebook_entry_id> etc1_global_selector_codebook_entry_id_vec;
+
+	class etc1_global_selector_codebook
+	{
+	public:
+		etc1_global_selector_codebook() { }
+		etc1_global_selector_codebook(uint32_t N, const uint32_t *pEntries) { init(N, pEntries); }
+
+		void init(uint32_t N, const uint32_t* pEntries);
+
+		void print_code(FILE *pFile);
+
+		void clear()
+		{
+			m_palette.clear();
+		}
+
+		uint32_t size() const { return (uint32_t)m_palette.size(); }
+
+		const etc1_selector_palette_entry_vec &get_palette() const
+		{
+			return m_palette;
+		}
+
+		etc1_selector_palette_entry get_entry(uint32_t palette_index) const
+		{
+			return m_palette[palette_index];
+		}
+
+		etc1_selector_palette_entry get_entry(uint32_t palette_index, const etc1_global_palette_entry_modifier &modifier) const
+		{
+			return m_palette[palette_index].get_modified(modifier);
+		}
+
+		etc1_selector_palette_entry get_entry(const etc1_global_selector_codebook_entry_id &id) const
+		{
+			return m_palette[id.m_palette_index].get_modified(id.m_modifier);
+		}
+
+		etc1_selector_palette_entry_vec m_palette;
+	};
+
+} // namespace basist
diff --git a/libkram/transcoder/basisu_transcoder.cpp b/libkram/transcoder/basisu_transcoder.cpp
new file mode 100644
index 00000000..29eb3c0d
--- /dev/null
+++ b/libkram/transcoder/basisu_transcoder.cpp
@@ -0,0 +1,17604 @@
+// basisu_transcoder.cpp
+// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "basisu_transcoder.h"
+#include <limits.h>
+#include "basisu_containers_impl.h"
+
+#ifndef BASISD_IS_BIG_ENDIAN
+// TODO: This doesn't work on OSX. How can this be so difficult?
+//#if defined(__BIG_ENDIAN__) || defined(_BIG_ENDIAN) || defined(BIG_ENDIAN)
+//	#define BASISD_IS_BIG_ENDIAN (1)
+//#else
+	#define BASISD_IS_BIG_ENDIAN (0)
+//#endif
+#endif
+
+#ifndef BASISD_USE_UNALIGNED_WORD_READS
+	#ifdef __EMSCRIPTEN__
+		// Can't use unaligned loads/stores with WebAssembly.
+		#define BASISD_USE_UNALIGNED_WORD_READS (0)
+	#elif defined(_M_AMD64) || defined(_M_IX86) || defined(__i386__) || defined(__x86_64__)
+		#define BASISD_USE_UNALIGNED_WORD_READS (1)
+	#else
+		#define BASISD_USE_UNALIGNED_WORD_READS (0)
+	#endif
+#endif
+
+#define BASISD_SUPPORTED_BASIS_VERSION (0x13)
+
+#ifndef BASISD_SUPPORT_KTX2
+	#error Must have defined BASISD_SUPPORT_KTX2
+#endif
+
+#ifndef BASISD_SUPPORT_KTX2_ZSTD
+#error Must have defined BASISD_SUPPORT_KTX2_ZSTD
+#endif
+
+// Set to 1 for fuzz testing. This will disable all CRC16 checks on headers and compressed data.
+#ifndef BASISU_NO_HEADER_OR_DATA_CRC16_CHECKS
+	#define BASISU_NO_HEADER_OR_DATA_CRC16_CHECKS 0
+#endif
+
+#ifndef BASISD_SUPPORT_DXT1
+	#define BASISD_SUPPORT_DXT1 1
+#endif
+
+#ifndef BASISD_SUPPORT_DXT5A
+	#define BASISD_SUPPORT_DXT5A 1
+#endif
+
+// Disable all BC7 transcoders if necessary (useful when cross compiling to Javascript)
+#if defined(BASISD_SUPPORT_BC7) && !BASISD_SUPPORT_BC7
+	#ifndef BASISD_SUPPORT_BC7_MODE5
+		#define BASISD_SUPPORT_BC7_MODE5 0
+	#endif
+#endif // !BASISD_SUPPORT_BC7
+
+// BC7 mode 5 supports both opaque and opaque+alpha textures, and uses less memory BC1.
+#ifndef BASISD_SUPPORT_BC7_MODE5
+	#define BASISD_SUPPORT_BC7_MODE5 1
+#endif
+
+#ifndef BASISD_SUPPORT_PVRTC1
+	#define BASISD_SUPPORT_PVRTC1 1
+#endif
+
+#ifndef BASISD_SUPPORT_ETC2_EAC_A8
+	#define BASISD_SUPPORT_ETC2_EAC_A8 1
+#endif
+
+// Set BASISD_SUPPORT_UASTC to 0 to completely disable support for transcoding UASTC files.
+#ifndef BASISD_SUPPORT_UASTC
+	#define BASISD_SUPPORT_UASTC 1
+#endif
+
+#ifndef BASISD_SUPPORT_ASTC
+	#define BASISD_SUPPORT_ASTC 1
+#endif
+
+// Note that if BASISD_SUPPORT_ATC is enabled, BASISD_SUPPORT_DXT5A should also be enabled for alpha support.
+#ifndef BASISD_SUPPORT_ATC
+	#define BASISD_SUPPORT_ATC 1
+#endif
+
+// Support for ETC2 EAC R11 and ETC2 EAC RG11
+#ifndef BASISD_SUPPORT_ETC2_EAC_RG11
+	#define BASISD_SUPPORT_ETC2_EAC_RG11 1
+#endif
+
+// If BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY is 1, opaque blocks will be transcoded to ASTC at slightly higher quality (higher than BC1), but the transcoder tables will be 2x as large.
+// This impacts grayscale and grayscale+alpha textures the most.
+#ifndef BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY
+	#ifdef __EMSCRIPTEN__
+		// Let's assume size matters more than quality when compiling with emscripten.
+		#define BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY 0
+	#else
+		// Compiling native, so an extra 64K lookup table is probably acceptable.
+		#define BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY 1
+	#endif
+#endif
+
+#ifndef BASISD_SUPPORT_FXT1
+	#define BASISD_SUPPORT_FXT1 1
+#endif
+
+#ifndef BASISD_SUPPORT_PVRTC2
+	#define BASISD_SUPPORT_PVRTC2 1
+#endif
+
+#if BASISD_SUPPORT_PVRTC2
+	#if !BASISD_SUPPORT_ATC
+		#error BASISD_SUPPORT_ATC must be 1 if BASISD_SUPPORT_PVRTC2 is 1
+	#endif
+#endif
+
+#if BASISD_SUPPORT_ATC
+	#if !BASISD_SUPPORT_DXT5A
+		#error BASISD_SUPPORT_DXT5A must be 1 if BASISD_SUPPORT_ATC is 1
+	#endif
+#endif
+
+#define BASISD_WRITE_NEW_BC7_MODE5_TABLES			0
+#define BASISD_WRITE_NEW_DXT1_TABLES				0
+#define BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES		0
+#define BASISD_WRITE_NEW_ASTC_TABLES				0
+#define BASISD_WRITE_NEW_ATC_TABLES					0
+#define BASISD_WRITE_NEW_ETC2_EAC_R11_TABLES		0
+
+#ifndef BASISD_ENABLE_DEBUG_FLAGS
+	#define BASISD_ENABLE_DEBUG_FLAGS	0
+#endif
+
+// If KTX2 support is enabled, we may need Zstd for decompression of supercompressed UASTC files. Include this header.
+#if BASISD_SUPPORT_KTX2
+   // If BASISD_SUPPORT_KTX2_ZSTD is 0, UASTC files compressed with Zstd cannot be loaded.
+	#if BASISD_SUPPORT_KTX2_ZSTD
+		// We only use two Zstd API's: ZSTD_decompress() and ZSTD_isError()
+		#include "../zstd/zstd.h"
+	#endif
+#endif
+
+namespace basisu
+{
+	bool g_debug_printf;
+
+	void enable_debug_printf(bool enabled)
+	{
+		g_debug_printf = enabled;
+	}
+
+	void debug_printf(const char* pFmt, ...)
+	{
+#if BASISU_FORCE_DEVEL_MESSAGES	
+		g_debug_printf = true;
+#endif
+		if (g_debug_printf)
+		{
+			va_list args;
+			va_start(args, pFmt);
+			vprintf(pFmt, args);
+			va_end(args);
+		}
+	}
+} // namespace basisu
+
+namespace basist
+{
+
+#if BASISD_ENABLE_DEBUG_FLAGS
+	static uint32_t g_debug_flags = 0;
+#endif
+
+	uint32_t get_debug_flags()
+	{
+#if BASISD_ENABLE_DEBUG_FLAGS
+		return g_debug_flags;
+#else
+		return 0;
+#endif
+	}
+
+	void set_debug_flags(uint32_t f)
+	{
+		BASISU_NOTE_UNUSED(f);
+#if BASISD_ENABLE_DEBUG_FLAGS
+		g_debug_flags = f;
+#endif
+	}
+
+	inline uint16_t byteswap_uint16(uint16_t v)
+	{
+		return static_cast<uint16_t>((v >> 8) | (v << 8));
+	}
+
+	static inline int32_t clampi(int32_t value, int32_t low, int32_t high) { if (value < low) value = low; else if (value > high) value = high;	return value; }
+	static inline float clampf(float value, float low, float high) { if (value < low) value = low; else if (value > high) value = high;	return value; }
+	static inline float saturate(float value) { return clampf(value, 0, 1.0f); }
+
+	static inline uint8_t mul_8(uint32_t v, uint32_t q) { v = v * q + 128; return (uint8_t)((v + (v >> 8)) >> 8); }
+
+	uint16_t crc16(const void* r, size_t size, uint16_t crc)
+	{
+		crc = ~crc;
+
+		const uint8_t* p = static_cast<const uint8_t*>(r);
+		for (; size; --size)
+		{
+			const uint16_t q = *p++ ^ (crc >> 8);
+			uint16_t k = (q >> 4) ^ q;
+			crc = (((crc << 8) ^ k) ^ (k << 5)) ^ (k << 12);
+		}
+
+		return static_cast<uint16_t>(~crc);
+	}
+
+	const uint32_t g_global_selector_cb[] =
+#include "basisu_global_selector_cb.h"
+		;
+
+	const uint32_t g_global_selector_cb_size = sizeof(g_global_selector_cb) / sizeof(g_global_selector_cb[0]);
+
+	void etc1_global_selector_codebook::init(uint32_t N, const uint32_t* pEntries)
+	{
+		m_palette.resize(N);
+		for (uint32_t i = 0; i < N; i++)
+			m_palette[i].set_uint32(pEntries[i]);
+	}
+
+	void etc1_global_selector_codebook::print_code(FILE* pFile)
+	{
+		fprintf(pFile, "{\n");
+		for (uint32_t i = 0; i < m_palette.size(); i++)
+		{
+			fprintf(pFile, "0x%X,", m_palette[i].get_uint32());
+			if ((i & 15) == 15)
+				fprintf(pFile, "\n");
+		}
+		fprintf(pFile, "\n}\n");
+	}
+
+	enum etc_constants
+	{
+		cETC1BytesPerBlock = 8U,
+
+		cETC1SelectorBits = 2U,
+		cETC1SelectorValues = 1U << cETC1SelectorBits,
+		cETC1SelectorMask = cETC1SelectorValues - 1U,
+
+		cETC1BlockShift = 2U,
+		cETC1BlockSize = 1U << cETC1BlockShift,
+
+		cETC1LSBSelectorIndicesBitOffset = 0,
+		cETC1MSBSelectorIndicesBitOffset = 16,
+
+		cETC1FlipBitOffset = 32,
+		cETC1DiffBitOffset = 33,
+
+		cETC1IntenModifierNumBits = 3,
+		cETC1IntenModifierValues = 1 << cETC1IntenModifierNumBits,
+		cETC1RightIntenModifierTableBitOffset = 34,
+		cETC1LeftIntenModifierTableBitOffset = 37,
+
+		// Base+Delta encoding (5 bit bases, 3 bit delta)
+		cETC1BaseColorCompNumBits = 5,
+		cETC1BaseColorCompMax = 1 << cETC1BaseColorCompNumBits,
+
+		cETC1DeltaColorCompNumBits = 3,
+		cETC1DeltaColorComp = 1 << cETC1DeltaColorCompNumBits,
+		cETC1DeltaColorCompMax = 1 << cETC1DeltaColorCompNumBits,
+
+		cETC1BaseColor5RBitOffset = 59,
+		cETC1BaseColor5GBitOffset = 51,
+		cETC1BaseColor5BBitOffset = 43,
+
+		cETC1DeltaColor3RBitOffset = 56,
+		cETC1DeltaColor3GBitOffset = 48,
+		cETC1DeltaColor3BBitOffset = 40,
+
+		// Absolute (non-delta) encoding (two 4-bit per component bases)
+		cETC1AbsColorCompNumBits = 4,
+		cETC1AbsColorCompMax = 1 << cETC1AbsColorCompNumBits,
+
+		cETC1AbsColor4R1BitOffset = 60,
+		cETC1AbsColor4G1BitOffset = 52,
+		cETC1AbsColor4B1BitOffset = 44,
+
+		cETC1AbsColor4R2BitOffset = 56,
+		cETC1AbsColor4G2BitOffset = 48,
+		cETC1AbsColor4B2BitOffset = 40,
+
+		cETC1ColorDeltaMin = -4,
+		cETC1ColorDeltaMax = 3,
+
+		// Delta3:
+		// 0   1   2   3   4   5   6   7
+		// 000 001 010 011 100 101 110 111
+		// 0   1   2   3   -4  -3  -2  -1
+	};
+
+#define DECLARE_ETC1_INTEN_TABLE(name, N) \
+	static const int name[cETC1IntenModifierValues][cETC1SelectorValues] = \
+	{ \
+		{ N * -8,  N * -2,   N * 2,   N * 8 },{ N * -17,  N * -5,  N * 5,  N * 17 },{ N * -29,  N * -9,   N * 9,  N * 29 },{ N * -42, N * -13, N * 13,  N * 42 }, \
+		{ N * -60, N * -18, N * 18,  N * 60 },{ N * -80, N * -24, N * 24,  N * 80 },{ N * -106, N * -33, N * 33, N * 106 },{ N * -183, N * -47, N * 47, N * 183 } \
+	};
+
+	DECLARE_ETC1_INTEN_TABLE(g_etc1_inten_tables, 1);
+	DECLARE_ETC1_INTEN_TABLE(g_etc1_inten_tables16, 16);
+	DECLARE_ETC1_INTEN_TABLE(g_etc1_inten_tables48, 3 * 16);
+
+	//const uint8_t g_etc1_to_selector_index[cETC1SelectorValues] = { 2, 3, 1, 0 };
+	const uint8_t g_selector_index_to_etc1[cETC1SelectorValues] = { 3, 2, 0, 1 };
+	
+	static const uint8_t g_etc_5_to_8[32] = { 0, 8, 16, 24, 33, 41, 49, 57, 66, 74, 82, 90, 99, 107, 115, 123, 132, 140, 148, 156, 165, 173, 181, 189, 198, 206, 214, 222, 231, 239, 247, 255 };
+
+	struct decoder_etc_block
+	{
+		// big endian uint64:
+		// bit ofs:  56  48  40  32  24  16   8   0
+		// byte ofs: b0, b1, b2, b3, b4, b5, b6, b7 
+		union
+		{
+			uint64_t m_uint64;
+
+			uint32_t m_uint32[2];
+
+			uint8_t m_bytes[8];
+
+			struct
+			{
+				signed m_dred2 : 3;
+				uint32_t m_red1 : 5;
+
+				signed m_dgreen2 : 3;
+				uint32_t m_green1 : 5;
+
+				signed m_dblue2 : 3;
+				uint32_t m_blue1 : 5;
+
+				uint32_t m_flip : 1;
+				uint32_t m_diff : 1;
+				uint32_t m_cw2 : 3;
+				uint32_t m_cw1 : 3;
+
+				uint32_t m_selectors;
+			} m_differential;
+		};
+
+		inline void clear()
+		{
+			assert(sizeof(*this) == 8);
+			basisu::clear_obj(*this);
+		}
+
+		inline void set_byte_bits(uint32_t ofs, uint32_t num, uint32_t bits)
+		{
+			assert((ofs + num) <= 64U);
+			assert(num && (num < 32U));
+			assert((ofs >> 3) == ((ofs + num - 1) >> 3));
+			assert(bits < (1U << num));
+			const uint32_t byte_ofs = 7 - (ofs >> 3);
+			const uint32_t byte_bit_ofs = ofs & 7;
+			const uint32_t mask = (1 << num) - 1;
+			m_bytes[byte_ofs] &= ~(mask << byte_bit_ofs);
+			m_bytes[byte_ofs] |= (bits << byte_bit_ofs);
+		}
+
+		inline void set_flip_bit(bool flip)
+		{
+			m_bytes[3] &= ~1;
+			m_bytes[3] |= static_cast<uint8_t>(flip);
+		}
+
+		inline void set_diff_bit(bool diff)
+		{
+			m_bytes[3] &= ~2;
+			m_bytes[3] |= (static_cast<uint32_t>(diff) << 1);
+		}
+
+		// Sets intensity modifier table (0-7) used by subblock subblock_id (0 or 1)
+		inline void set_inten_table(uint32_t subblock_id, uint32_t t)
+		{
+			assert(subblock_id < 2);
+			assert(t < 8);
+			const uint32_t ofs = subblock_id ? 2 : 5;
+			m_bytes[3] &= ~(7 << ofs);
+			m_bytes[3] |= (t << ofs);
+		}
+
+		// Selector "val" ranges from 0-3 and is a direct index into g_etc1_inten_tables.
+		inline void set_selector(uint32_t x, uint32_t y, uint32_t val)
+		{
+			assert((x | y | val) < 4);
+			const uint32_t bit_index = x * 4 + y;
+
+			uint8_t* p = &m_bytes[7 - (bit_index >> 3)];
+
+			const uint32_t byte_bit_ofs = bit_index & 7;
+			const uint32_t mask = 1 << byte_bit_ofs;
+
+			static const uint8_t s_selector_index_to_etc1[4] = { 3, 2, 0, 1 };
+			const uint32_t etc1_val = s_selector_index_to_etc1[val];
+
+			const uint32_t lsb = etc1_val & 1;
+			const uint32_t msb = etc1_val >> 1;
+
+			p[0] &= ~mask;
+			p[0] |= (lsb << byte_bit_ofs);
+
+			p[-2] &= ~mask;
+			p[-2] |= (msb << byte_bit_ofs);
+		}
+
+		// Returned encoded selector value ranges from 0-3 (this is NOT a direct index into g_etc1_inten_tables, see get_selector())
+		inline uint32_t get_raw_selector(uint32_t x, uint32_t y) const
+		{
+			assert((x | y) < 4);
+
+			const uint32_t bit_index = x * 4 + y;
+			const uint32_t byte_bit_ofs = bit_index & 7;
+			const uint8_t* p = &m_bytes[7 - (bit_index >> 3)];
+			const uint32_t lsb = (p[0] >> byte_bit_ofs) & 1;
+			const uint32_t msb = (p[-2] >> byte_bit_ofs) & 1;
+			const uint32_t val = lsb | (msb << 1);
+
+			return val;
+		}
+
+		// Returned selector value ranges from 0-3 and is a direct index into g_etc1_inten_tables.
+		inline uint32_t get_selector(uint32_t x, uint32_t y) const
+		{
+			static const uint8_t s_etc1_to_selector_index[cETC1SelectorValues] = { 2, 3, 1, 0 };
+			return s_etc1_to_selector_index[get_raw_selector(x, y)];
+		}
+
+		inline void set_raw_selector_bits(uint32_t bits)
+		{
+			m_bytes[4] = static_cast<uint8_t>(bits);
+			m_bytes[5] = static_cast<uint8_t>(bits >> 8);
+			m_bytes[6] = static_cast<uint8_t>(bits >> 16);
+			m_bytes[7] = static_cast<uint8_t>(bits >> 24);
+		}
+
+		inline bool are_all_selectors_the_same() const
+		{
+			uint32_t v = *reinterpret_cast<const uint32_t*>(&m_bytes[4]);
+
+			if ((v == 0xFFFFFFFF) || (v == 0xFFFF) || (!v) || (v == 0xFFFF0000))
+				return true;
+
+			return false;
+		}
+
+		inline void set_raw_selector_bits(uint8_t byte0, uint8_t byte1, uint8_t byte2, uint8_t byte3)
+		{
+			m_bytes[4] = byte0;
+			m_bytes[5] = byte1;
+			m_bytes[6] = byte2;
+			m_bytes[7] = byte3;
+		}
+
+		inline uint32_t get_raw_selector_bits() const
+		{
+			return m_bytes[4] | (m_bytes[5] << 8) | (m_bytes[6] << 16) | (m_bytes[7] << 24);
+		}
+
+		inline void set_base4_color(uint32_t idx, uint16_t c)
+		{
+			if (idx)
+			{
+				set_byte_bits(cETC1AbsColor4R2BitOffset, 4, (c >> 8) & 15);
+				set_byte_bits(cETC1AbsColor4G2BitOffset, 4, (c >> 4) & 15);
+				set_byte_bits(cETC1AbsColor4B2BitOffset, 4, c & 15);
+			}
+			else
+			{
+				set_byte_bits(cETC1AbsColor4R1BitOffset, 4, (c >> 8) & 15);
+				set_byte_bits(cETC1AbsColor4G1BitOffset, 4, (c >> 4) & 15);
+				set_byte_bits(cETC1AbsColor4B1BitOffset, 4, c & 15);
+			}
+		}
+
+		inline void set_base5_color(uint16_t c)
+		{
+			set_byte_bits(cETC1BaseColor5RBitOffset, 5, (c >> 10) & 31);
+			set_byte_bits(cETC1BaseColor5GBitOffset, 5, (c >> 5) & 31);
+			set_byte_bits(cETC1BaseColor5BBitOffset, 5, c & 31);
+		}
+
+		void set_delta3_color(uint16_t c)
+		{
+			set_byte_bits(cETC1DeltaColor3RBitOffset, 3, (c >> 6) & 7);
+			set_byte_bits(cETC1DeltaColor3GBitOffset, 3, (c >> 3) & 7);
+			set_byte_bits(cETC1DeltaColor3BBitOffset, 3, c & 7);
+		}
+
+		void set_block_color4(const color32& c0_unscaled, const color32& c1_unscaled)
+		{
+			set_diff_bit(false);
+
+			set_base4_color(0, pack_color4(c0_unscaled, false));
+			set_base4_color(1, pack_color4(c1_unscaled, false));
+		}
+
+		void set_block_color5(const color32& c0_unscaled, const color32& c1_unscaled)
+		{
+			set_diff_bit(true);
+
+			set_base5_color(pack_color5(c0_unscaled, false));
+
+			int dr = c1_unscaled.r - c0_unscaled.r;
+			int dg = c1_unscaled.g - c0_unscaled.g;
+			int db = c1_unscaled.b - c0_unscaled.b;
+
+			set_delta3_color(pack_delta3(dr, dg, db));
+		}
+
+		bool set_block_color5_check(const color32& c0_unscaled, const color32& c1_unscaled)
+		{
+			set_diff_bit(true);
+
+			set_base5_color(pack_color5(c0_unscaled, false));
+
+			int dr = c1_unscaled.r - c0_unscaled.r;
+			int dg = c1_unscaled.g - c0_unscaled.g;
+			int db = c1_unscaled.b - c0_unscaled.b;
+
+			if (((dr < cETC1ColorDeltaMin) || (dr > cETC1ColorDeltaMax)) ||
+				((dg < cETC1ColorDeltaMin) || (dg > cETC1ColorDeltaMax)) ||
+				((db < cETC1ColorDeltaMin) || (db > cETC1ColorDeltaMax)))
+				return false;
+
+			set_delta3_color(pack_delta3(dr, dg, db));
+
+			return true;
+		}
+
+		inline uint32_t get_byte_bits(uint32_t ofs, uint32_t num) const
+		{
+			assert((ofs + num) <= 64U);
+			assert(num && (num <= 8U));
+			assert((ofs >> 3) == ((ofs + num - 1) >> 3));
+			const uint32_t byte_ofs = 7 - (ofs >> 3);
+			const uint32_t byte_bit_ofs = ofs & 7;
+			return (m_bytes[byte_ofs] >> byte_bit_ofs) & ((1 << num) - 1);
+		}
+
+		inline uint16_t get_base5_color() const
+		{
+			const uint32_t r = get_byte_bits(cETC1BaseColor5RBitOffset, 5);
+			const uint32_t g = get_byte_bits(cETC1BaseColor5GBitOffset, 5);
+			const uint32_t b = get_byte_bits(cETC1BaseColor5BBitOffset, 5);
+			return static_cast<uint16_t>(b | (g << 5U) | (r << 10U));
+		}
+
+		inline uint16_t get_base4_color(uint32_t idx) const
+		{
+			uint32_t r, g, b;
+			if (idx)
+			{
+				r = get_byte_bits(cETC1AbsColor4R2BitOffset, 4);
+				g = get_byte_bits(cETC1AbsColor4G2BitOffset, 4);
+				b = get_byte_bits(cETC1AbsColor4B2BitOffset, 4);
+			}
+			else
+			{
+				r = get_byte_bits(cETC1AbsColor4R1BitOffset, 4);
+				g = get_byte_bits(cETC1AbsColor4G1BitOffset, 4);
+				b = get_byte_bits(cETC1AbsColor4B1BitOffset, 4);
+			}
+			return static_cast<uint16_t>(b | (g << 4U) | (r << 8U));
+		}
+
+		inline color32 get_base5_color_unscaled() const
+		{
+			return color32(m_differential.m_red1, m_differential.m_green1, m_differential.m_blue1, 255);
+		}
+
+		inline bool get_flip_bit() const
+		{
+			return (m_bytes[3] & 1) != 0;
+		}
+
+		inline bool get_diff_bit() const
+		{
+			return (m_bytes[3] & 2) != 0;
+		}
+				
+		inline uint32_t get_inten_table(uint32_t subblock_id) const
+		{
+			assert(subblock_id < 2);
+			const uint32_t ofs = subblock_id ? 2 : 5;
+			return (m_bytes[3] >> ofs) & 7;
+		}
+
+		inline uint16_t get_delta3_color() const
+		{
+			const uint32_t r = get_byte_bits(cETC1DeltaColor3RBitOffset, 3);
+			const uint32_t g = get_byte_bits(cETC1DeltaColor3GBitOffset, 3);
+			const uint32_t b = get_byte_bits(cETC1DeltaColor3BBitOffset, 3);
+			return static_cast<uint16_t>(b | (g << 3U) | (r << 6U));
+		}
+		
+		void get_block_colors(color32* pBlock_colors, uint32_t subblock_index) const
+		{
+			color32 b;
+
+			if (get_diff_bit())
+			{
+				if (subblock_index)
+					unpack_color5(b, get_base5_color(), get_delta3_color(), true, 255);
+				else
+					unpack_color5(b, get_base5_color(), true);
+			}
+			else
+			{
+				b = unpack_color4(get_base4_color(subblock_index), true, 255);
+			}
+
+			const int* pInten_table = g_etc1_inten_tables[get_inten_table(subblock_index)];
+
+			pBlock_colors[0].set_noclamp_rgba(clamp255(b.r + pInten_table[0]), clamp255(b.g + pInten_table[0]), clamp255(b.b + pInten_table[0]), 255);
+			pBlock_colors[1].set_noclamp_rgba(clamp255(b.r + pInten_table[1]), clamp255(b.g + pInten_table[1]), clamp255(b.b + pInten_table[1]), 255);
+			pBlock_colors[2].set_noclamp_rgba(clamp255(b.r + pInten_table[2]), clamp255(b.g + pInten_table[2]), clamp255(b.b + pInten_table[2]), 255);
+			pBlock_colors[3].set_noclamp_rgba(clamp255(b.r + pInten_table[3]), clamp255(b.g + pInten_table[3]), clamp255(b.b + pInten_table[3]), 255);
+		}
+
+		static uint16_t pack_color4(const color32& color, bool scaled, uint32_t bias = 127U)
+		{
+			return pack_color4(color.r, color.g, color.b, scaled, bias);
+		}
+
+		static uint16_t pack_color4(uint32_t r, uint32_t g, uint32_t b, bool scaled, uint32_t bias = 127U)
+		{
+			if (scaled)
+			{
+				r = (r * 15U + bias) / 255U;
+				g = (g * 15U + bias) / 255U;
+				b = (b * 15U + bias) / 255U;
+			}
+
+			r = basisu::minimum(r, 15U);
+			g = basisu::minimum(g, 15U);
+			b = basisu::minimum(b, 15U);
+
+			return static_cast<uint16_t>(b | (g << 4U) | (r << 8U));
+		}
+
+		static uint16_t pack_color5(const color32& color, bool scaled, uint32_t bias = 127U)
+		{
+			return pack_color5(color.r, color.g, color.b, scaled, bias);
+		}
+
+		static uint16_t pack_color5(uint32_t r, uint32_t g, uint32_t b, bool scaled, uint32_t bias = 127U)
+		{
+			if (scaled)
+			{
+				r = (r * 31U + bias) / 255U;
+				g = (g * 31U + bias) / 255U;
+				b = (b * 31U + bias) / 255U;
+			}
+
+			r = basisu::minimum(r, 31U);
+			g = basisu::minimum(g, 31U);
+			b = basisu::minimum(b, 31U);
+
+			return static_cast<uint16_t>(b | (g << 5U) | (r << 10U));
+		}
+
+		uint16_t pack_delta3(const color32& color)
+		{
+			return pack_delta3(color.r, color.g, color.b);
+		}
+
+		uint16_t pack_delta3(int r, int g, int b)
+		{
+			assert((r >= cETC1ColorDeltaMin) && (r <= cETC1ColorDeltaMax));
+			assert((g >= cETC1ColorDeltaMin) && (g <= cETC1ColorDeltaMax));
+			assert((b >= cETC1ColorDeltaMin) && (b <= cETC1ColorDeltaMax));
+			if (r < 0) r += 8;
+			if (g < 0) g += 8;
+			if (b < 0) b += 8;
+			return static_cast<uint16_t>(b | (g << 3) | (r << 6));
+		}
+
+		static void unpack_delta3(int& r, int& g, int& b, uint16_t packed_delta3)
+		{
+			r = (packed_delta3 >> 6) & 7;
+			g = (packed_delta3 >> 3) & 7;
+			b = packed_delta3 & 7;
+			if (r >= 4) r -= 8;
+			if (g >= 4) g -= 8;
+			if (b >= 4) b -= 8;
+		}
+
+		static color32 unpack_color5(uint16_t packed_color5, bool scaled, uint32_t alpha)
+		{
+			uint32_t b = packed_color5 & 31U;
+			uint32_t g = (packed_color5 >> 5U) & 31U;
+			uint32_t r = (packed_color5 >> 10U) & 31U;
+
+			if (scaled)
+			{
+				b = (b << 3U) | (b >> 2U);
+				g = (g << 3U) | (g >> 2U);
+				r = (r << 3U) | (r >> 2U);
+			}
+
+			assert(alpha <= 255);
+
+			return color32(cNoClamp, r, g, b, alpha);
+		}
+
+		static void unpack_color5(uint32_t& r, uint32_t& g, uint32_t& b, uint16_t packed_color5, bool scaled)
+		{
+			color32 c(unpack_color5(packed_color5, scaled, 0));
+			r = c.r;
+			g = c.g;
+			b = c.b;
+		}
+				
+		static void unpack_color5(color32& result, uint16_t packed_color5, bool scaled)
+		{
+			result = unpack_color5(packed_color5, scaled, 255);
+		}
+
+		static bool unpack_color5(color32& result, uint16_t packed_color5, uint16_t packed_delta3, bool scaled, uint32_t alpha)
+		{
+			int dr, dg, db;
+			unpack_delta3(dr, dg, db, packed_delta3);
+
+			int r = ((packed_color5 >> 10U) & 31U) + dr;
+			int g = ((packed_color5 >> 5U) & 31U) + dg;
+			int b = (packed_color5 & 31U) + db;
+
+			bool success = true;
+			if (static_cast<uint32_t>(r | g | b) > 31U)
+			{
+				success = false;
+				r = basisu::clamp<int>(r, 0, 31);
+				g = basisu::clamp<int>(g, 0, 31);
+				b = basisu::clamp<int>(b, 0, 31);
+			}
+
+			if (scaled)
+			{
+				b = (b << 3U) | (b >> 2U);
+				g = (g << 3U) | (g >> 2U);
+				r = (r << 3U) | (r >> 2U);
+			}
+
+			result.set_noclamp_rgba(r, g, b, basisu::minimum(alpha, 255U));
+			return success;
+		}
+
+		static color32 unpack_color4(uint16_t packed_color4, bool scaled, uint32_t alpha)
+		{
+			uint32_t b = packed_color4 & 15U;
+			uint32_t g = (packed_color4 >> 4U) & 15U;
+			uint32_t r = (packed_color4 >> 8U) & 15U;
+
+			if (scaled)
+			{
+				b = (b << 4U) | b;
+				g = (g << 4U) | g;
+				r = (r << 4U) | r;
+			}
+
+			return color32(cNoClamp, r, g, b, basisu::minimum(alpha, 255U));
+		}
+
+		static void unpack_color4(uint32_t& r, uint32_t& g, uint32_t& b, uint16_t packed_color4, bool scaled)
+		{
+			color32 c(unpack_color4(packed_color4, scaled, 0));
+			r = c.r;
+			g = c.g;
+			b = c.b;
+		}
+
+		static void get_diff_subblock_colors(color32* pDst, uint16_t packed_color5, uint32_t table_idx)
+		{
+			assert(table_idx < cETC1IntenModifierValues);
+			const int* pInten_modifer_table = &g_etc1_inten_tables[table_idx][0];
+
+			uint32_t r, g, b;
+			unpack_color5(r, g, b, packed_color5, true);
+
+			const int ir = static_cast<int>(r), ig = static_cast<int>(g), ib = static_cast<int>(b);
+
+			const int y0 = pInten_modifer_table[0];
+			pDst[0].set(clamp255(ir + y0), clamp255(ig + y0), clamp255(ib + y0), 255);
+
+			const int y1 = pInten_modifer_table[1];
+			pDst[1].set(clamp255(ir + y1), clamp255(ig + y1), clamp255(ib + y1), 255);
+
+			const int y2 = pInten_modifer_table[2];
+			pDst[2].set(clamp255(ir + y2), clamp255(ig + y2), clamp255(ib + y2), 255);
+
+			const int y3 = pInten_modifer_table[3];
+			pDst[3].set(clamp255(ir + y3), clamp255(ig + y3), clamp255(ib + y3), 255);
+		}
+
+		static int clamp255(int x)
+		{
+			if (x & 0xFFFFFF00)
+			{
+				if (x < 0)
+					x = 0;
+				else if (x > 255)
+					x = 255;
+			}
+
+			return x;
+		}
+
+		static void get_block_colors5(color32* pBlock_colors, const color32& base_color5, uint32_t inten_table)
+		{
+			color32 b(base_color5);
+
+			b.r = (b.r << 3) | (b.r >> 2);
+			b.g = (b.g << 3) | (b.g >> 2);
+			b.b = (b.b << 3) | (b.b >> 2);
+
+			const int* pInten_table = g_etc1_inten_tables[inten_table];
+
+			pBlock_colors[0].set(clamp255(b.r + pInten_table[0]), clamp255(b.g + pInten_table[0]), clamp255(b.b + pInten_table[0]), 255);
+			pBlock_colors[1].set(clamp255(b.r + pInten_table[1]), clamp255(b.g + pInten_table[1]), clamp255(b.b + pInten_table[1]), 255);
+			pBlock_colors[2].set(clamp255(b.r + pInten_table[2]), clamp255(b.g + pInten_table[2]), clamp255(b.b + pInten_table[2]), 255);
+			pBlock_colors[3].set(clamp255(b.r + pInten_table[3]), clamp255(b.g + pInten_table[3]), clamp255(b.b + pInten_table[3]), 255);
+		}
+
+		static void get_block_color5(const color32& base_color5, uint32_t inten_table, uint32_t index, uint32_t& r, uint32_t &g, uint32_t &b)
+		{
+			assert(index < 4);
+
+			uint32_t br = (base_color5.r << 3) | (base_color5.r >> 2);
+			uint32_t bg = (base_color5.g << 3) | (base_color5.g >> 2);
+			uint32_t bb = (base_color5.b << 3) | (base_color5.b >> 2);
+
+			const int* pInten_table = g_etc1_inten_tables[inten_table];
+
+			r = clamp255(br + pInten_table[index]);
+			g = clamp255(bg + pInten_table[index]);
+			b = clamp255(bb + pInten_table[index]);
+		}
+
+		static void get_block_color5_r(const color32& base_color5, uint32_t inten_table, uint32_t index, uint32_t &r)
+		{
+			assert(index < 4);
+						
+			uint32_t br = (base_color5.r << 3) | (base_color5.r >> 2);
+
+			const int* pInten_table = g_etc1_inten_tables[inten_table];
+
+			r = clamp255(br + pInten_table[index]);
+		}
+
+		static void get_block_colors5_g(int* pBlock_colors, const color32& base_color5, uint32_t inten_table)
+		{
+			const int g = (base_color5.g << 3) | (base_color5.g >> 2);
+
+			const int* pInten_table = g_etc1_inten_tables[inten_table];
+
+			pBlock_colors[0] = clamp255(g + pInten_table[0]);
+			pBlock_colors[1] = clamp255(g + pInten_table[1]);
+			pBlock_colors[2] = clamp255(g + pInten_table[2]);
+			pBlock_colors[3] = clamp255(g + pInten_table[3]);
+		}
+
+		static void get_block_colors5_bounds(color32* pBlock_colors, const color32& base_color5, uint32_t inten_table, uint32_t l = 0, uint32_t h = 3)
+		{
+			color32 b(base_color5);
+
+			b.r = (b.r << 3) | (b.r >> 2);
+			b.g = (b.g << 3) | (b.g >> 2);
+			b.b = (b.b << 3) | (b.b >> 2);
+
+			const int* pInten_table = g_etc1_inten_tables[inten_table];
+
+			pBlock_colors[0].set(clamp255(b.r + pInten_table[l]), clamp255(b.g + pInten_table[l]), clamp255(b.b + pInten_table[l]), 255);
+			pBlock_colors[1].set(clamp255(b.r + pInten_table[h]), clamp255(b.g + pInten_table[h]), clamp255(b.b + pInten_table[h]), 255);
+		}
+
+		static void get_block_colors5_bounds_g(uint32_t* pBlock_colors, const color32& base_color5, uint32_t inten_table, uint32_t l = 0, uint32_t h = 3)
+		{
+			color32 b(base_color5);
+
+			b.g = (b.g << 3) | (b.g >> 2);
+
+			const int* pInten_table = g_etc1_inten_tables[inten_table];
+
+			pBlock_colors[0] = clamp255(b.g + pInten_table[l]);
+			pBlock_colors[1] = clamp255(b.g + pInten_table[h]);
+		}
+	};
+
+	enum dxt_constants
+	{
+		cDXT1SelectorBits = 2U, cDXT1SelectorValues = 1U << cDXT1SelectorBits, cDXT1SelectorMask = cDXT1SelectorValues - 1U,
+		cDXT5SelectorBits = 3U, cDXT5SelectorValues = 1U << cDXT5SelectorBits, cDXT5SelectorMask = cDXT5SelectorValues - 1U,
+	};
+
+	static const uint8_t g_etc1_x_selector_unpack[4][256] =
+	{
+		{
+			0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+			0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+			0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+			0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+			0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+			0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+			0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+			0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+		},
+		{
+			0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1,
+			2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3,
+			0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1,
+			2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3,
+			0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1,
+			2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3,
+			0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1,
+			2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3,
+		},
+
+		{
+			0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1,
+			0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1,
+			2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3,
+			2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3,
+			0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1,
+			0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1,
+			2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3,
+			2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3,
+		},
+
+		{
+			0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
+			0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
+			0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
+			0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
+			2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
+			2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
+			2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
+			2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
+		}
+	};
+
+	struct dxt1_block
+	{
+		enum { cTotalEndpointBytes = 2, cTotalSelectorBytes = 4 };
+
+		uint8_t m_low_color[cTotalEndpointBytes];
+		uint8_t m_high_color[cTotalEndpointBytes];
+		uint8_t m_selectors[cTotalSelectorBytes];
+
+		inline void clear() { basisu::clear_obj(*this); }
+
+		inline uint32_t get_high_color() const { return m_high_color[0] | (m_high_color[1] << 8U); }
+		inline uint32_t get_low_color() const { return m_low_color[0] | (m_low_color[1] << 8U); }
+		inline void set_low_color(uint16_t c) { m_low_color[0] = static_cast<uint8_t>(c & 0xFF); m_low_color[1] = static_cast<uint8_t>((c >> 8) & 0xFF); }
+		inline void set_high_color(uint16_t c) { m_high_color[0] = static_cast<uint8_t>(c & 0xFF); m_high_color[1] = static_cast<uint8_t>((c >> 8) & 0xFF); }
+		inline uint32_t get_selector(uint32_t x, uint32_t y) const { assert((x < 4U) && (y < 4U)); return (m_selectors[y] >> (x * cDXT1SelectorBits)) & cDXT1SelectorMask; }
+		inline void set_selector(uint32_t x, uint32_t y, uint32_t val) { assert((x < 4U) && (y < 4U) && (val < 4U)); m_selectors[y] &= (~(cDXT1SelectorMask << (x * cDXT1SelectorBits))); m_selectors[y] |= (val << (x * cDXT1SelectorBits)); }
+
+		static uint16_t pack_color(const color32& color, bool scaled, uint32_t bias = 127U)
+		{
+			uint32_t r = color.r, g = color.g, b = color.b;
+			if (scaled)
+			{
+				r = (r * 31U + bias) / 255U;
+				g = (g * 63U + bias) / 255U;
+				b = (b * 31U + bias) / 255U;
+			}
+			return static_cast<uint16_t>(basisu::minimum(b, 31U) | (basisu::minimum(g, 63U) << 5U) | (basisu::minimum(r, 31U) << 11U));
+		}
+
+		static uint16_t pack_unscaled_color(uint32_t r, uint32_t g, uint32_t b) { return static_cast<uint16_t>(b | (g << 5U) | (r << 11U)); }
+	};
+
+	struct dxt_selector_range
+	{
+		uint32_t m_low;
+		uint32_t m_high;
+	};
+
+	struct etc1_to_dxt1_56_solution
+	{
+		uint8_t m_lo;
+		uint8_t m_hi;
+		uint16_t m_err;
+	};
+
+#if BASISD_SUPPORT_DXT1
+	static dxt_selector_range g_etc1_to_dxt1_selector_ranges[] =
+	{
+		{ 0, 3 },
+
+		{ 1, 3 },
+		{ 0, 2 },
+
+		{ 1, 2 },
+
+		{ 2, 3 },
+		{ 0, 1 },
+	};
+
+	const uint32_t NUM_ETC1_TO_DXT1_SELECTOR_RANGES = sizeof(g_etc1_to_dxt1_selector_ranges) / sizeof(g_etc1_to_dxt1_selector_ranges[0]);
+
+	static uint32_t g_etc1_to_dxt1_selector_range_index[4][4];
+
+	const uint32_t NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS = 10;
+	static const uint8_t g_etc1_to_dxt1_selector_mappings[NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS][4] =
+	{
+		{ 0, 0, 1, 1 },
+		{ 0, 0, 1, 2 },
+		{ 0, 0, 1, 3 },
+		{ 0, 0, 2, 3 },
+		{ 0, 1, 1, 1 },
+		{ 0, 1, 2, 2 },
+		{ 0, 1, 2, 3 },
+		{ 0, 2, 3, 3 },
+		{ 1, 2, 2, 2 },
+		{ 1, 2, 3, 3 },
+	};
+	
+	static uint8_t g_etc1_to_dxt1_selector_mappings_raw_dxt1_256[NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS][256];
+	static uint8_t g_etc1_to_dxt1_selector_mappings_raw_dxt1_inv_256[NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS][256];
+
+	static const etc1_to_dxt1_56_solution g_etc1_to_dxt_6[32 * 8 * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS * NUM_ETC1_TO_DXT1_SELECTOR_RANGES] = {
+#include "basisu_transcoder_tables_dxt1_6.inc"
+	};
+
+	static const etc1_to_dxt1_56_solution g_etc1_to_dxt_5[32 * 8 * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS * NUM_ETC1_TO_DXT1_SELECTOR_RANGES] = {
+#include "basisu_transcoder_tables_dxt1_5.inc"
+	};
+#endif // BASISD_SUPPORT_DXT1
+
+#if BASISD_SUPPORT_DXT1 || BASISD_SUPPORT_UASTC
+	// First saw the idea for optimal BC1 single-color block encoding using lookup tables in ryg_dxt.
+	struct bc1_match_entry
+	{
+		uint8_t m_hi;
+		uint8_t m_lo;
+	};
+	static bc1_match_entry g_bc1_match5_equals_1[256], g_bc1_match6_equals_1[256]; // selector 1, allow equals hi/lo
+	static bc1_match_entry g_bc1_match5_equals_0[256], g_bc1_match6_equals_0[256]; // selector 0, allow equals hi/lo
+
+	static void prepare_bc1_single_color_table(bc1_match_entry* pTable, const uint8_t* pExpand, int size0, int size1, int sel)
+	{
+		for (int i = 0; i < 256; i++)
+		{
+			int lowest_e = 256;
+			for (int lo = 0; lo < size0; lo++)
+			{
+				for (int hi = 0; hi < size1; hi++)
+				{
+					const int lo_e = pExpand[lo], hi_e = pExpand[hi];
+					int e;
+
+					if (sel == 1)
+					{
+						// Selector 1
+						e = basisu::iabs(((hi_e * 2 + lo_e) / 3) - i);
+						e += (basisu::iabs(hi_e - lo_e) * 3) / 100;
+					}
+					else
+					{
+						assert(sel == 0);
+
+						// Selector 0
+						e = basisu::iabs(hi_e - i);
+					}
+
+					if (e < lowest_e)
+					{
+						pTable[i].m_hi = static_cast<uint8_t>(hi);
+						pTable[i].m_lo = static_cast<uint8_t>(lo);
+
+						lowest_e = e;
+					}
+
+				} // hi
+			} // lo
+		}
+	}
+#endif
+
+#if BASISD_WRITE_NEW_DXT1_TABLES
+	static void create_etc1_to_dxt1_5_conversion_table()
+	{
+		FILE* pFile = nullptr;
+		fopen_s(&pFile, "basisu_transcoder_tables_dxt1_5.inc", "w");
+
+		uint32_t n = 0;
+
+		for (int inten = 0; inten < 8; inten++)
+		{
+			for (uint32_t g = 0; g < 32; g++)
+			{
+				color32 block_colors[4];
+				decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
+
+				for (uint32_t sr = 0; sr < NUM_ETC1_TO_DXT1_SELECTOR_RANGES; sr++)
+				{
+					const uint32_t low_selector = g_etc1_to_dxt1_selector_ranges[sr].m_low;
+					const uint32_t high_selector = g_etc1_to_dxt1_selector_ranges[sr].m_high;
+
+					for (uint32_t m = 0; m < NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS; m++)
+					{
+						uint32_t best_lo = 0;
+						uint32_t best_hi = 0;
+						uint64_t best_err = UINT64_MAX;
+
+						for (uint32_t hi = 0; hi <= 31; hi++)
+						{
+							for (uint32_t lo = 0; lo <= 31; lo++)
+							{
+								//if (lo == hi) continue;
+
+								uint32_t colors[4];
+
+								colors[0] = (lo << 3) | (lo >> 2);
+								colors[3] = (hi << 3) | (hi >> 2);
+
+								colors[1] = (colors[0] * 2 + colors[3]) / 3;
+								colors[2] = (colors[3] * 2 + colors[0]) / 3;
+
+								uint64_t total_err = 0;
+
+								for (uint32_t s = low_selector; s <= high_selector; s++)
+								{
+									int err = block_colors[s].g - colors[g_etc1_to_dxt1_selector_mappings[m][s]];
+
+									total_err += err * err;
+								}
+
+								if (total_err < best_err)
+								{
+									best_err = total_err;
+									best_lo = lo;
+									best_hi = hi;
+								}
+							}
+						}
+
+						assert(best_err <= 0xFFFF);
+
+						//table[g + inten * 32].m_solutions[sr][m].m_lo = static_cast<uint8_t>(best_lo);
+						//table[g + inten * 32].m_solutions[sr][m].m_hi = static_cast<uint8_t>(best_hi);
+						//table[g + inten * 32].m_solutions[sr][m].m_err = static_cast<uint16_t>(best_err);
+
+						//assert(best_lo != best_hi);
+						fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
+						n++;
+						if ((n & 31) == 31)
+							fprintf(pFile, "\n");
+					} // m
+				} // sr
+			} // g
+		} // inten
+
+		fclose(pFile);
+	}
+
+	static void create_etc1_to_dxt1_6_conversion_table()
+	{
+		FILE* pFile = nullptr;
+		fopen_s(&pFile, "basisu_transcoder_tables_dxt1_6.inc", "w");
+
+		uint32_t n = 0;
+
+		for (int inten = 0; inten < 8; inten++)
+		{
+			for (uint32_t g = 0; g < 32; g++)
+			{
+				color32 block_colors[4];
+				decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
+
+				for (uint32_t sr = 0; sr < NUM_ETC1_TO_DXT1_SELECTOR_RANGES; sr++)
+				{
+					const uint32_t low_selector = g_etc1_to_dxt1_selector_ranges[sr].m_low;
+					const uint32_t high_selector = g_etc1_to_dxt1_selector_ranges[sr].m_high;
+
+					for (uint32_t m = 0; m < NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS; m++)
+					{
+						uint32_t best_lo = 0;
+						uint32_t best_hi = 0;
+						uint64_t best_err = UINT64_MAX;
+
+						for (uint32_t hi = 0; hi <= 63; hi++)
+						{
+							for (uint32_t lo = 0; lo <= 63; lo++)
+							{
+								//if (lo == hi) continue;
+
+								uint32_t colors[4];
+
+								colors[0] = (lo << 2) | (lo >> 4);
+								colors[3] = (hi << 2) | (hi >> 4);
+
+								colors[1] = (colors[0] * 2 + colors[3]) / 3;
+								colors[2] = (colors[3] * 2 + colors[0]) / 3;
+
+								uint64_t total_err = 0;
+
+								for (uint32_t s = low_selector; s <= high_selector; s++)
+								{
+									int err = block_colors[s].g - colors[g_etc1_to_dxt1_selector_mappings[m][s]];
+
+									total_err += err * err;
+								}
+
+								if (total_err < best_err)
+								{
+									best_err = total_err;
+									best_lo = lo;
+									best_hi = hi;
+								}
+							}
+						}
+
+						assert(best_err <= 0xFFFF);
+
+						//table[g + inten * 32].m_solutions[sr][m].m_lo = static_cast<uint8_t>(best_lo);
+						//table[g + inten * 32].m_solutions[sr][m].m_hi = static_cast<uint8_t>(best_hi);
+						//table[g + inten * 32].m_solutions[sr][m].m_err = static_cast<uint16_t>(best_err);
+
+						//assert(best_lo != best_hi);
+						fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
+						n++;
+						if ((n & 31) == 31)
+							fprintf(pFile, "\n");
+
+					} // m
+				} // sr
+			} // g
+		} // inten
+
+		fclose(pFile);
+	}
+#endif
+
+
+#if BASISD_SUPPORT_UASTC || BASISD_SUPPORT_ETC2_EAC_A8 || BASISD_SUPPORT_ETC2_EAC_RG11
+	static const int8_t g_eac_modifier_table[16][8] =
+	{
+		{ -3, -6, -9, -15, 2, 5, 8, 14 },
+		{ -3, -7, -10, -13, 2, 6, 9, 12 },
+		{ -2, -5, -8, -13, 1, 4, 7, 12 },
+		{ -2, -4, -6, -13, 1, 3, 5, 12 },
+		{ -3, -6, -8, -12, 2, 5, 7, 11 },
+		{ -3, -7, -9, -11, 2, 6, 8, 10 },
+		{ -4, -7, -8, -11, 3, 6, 7, 10 },
+		{ -3, -5, -8, -11, 2, 4, 7, 10 },
+
+		{ -2, -6, -8, -10, 1, 5, 7, 9 },
+		{ -2, -5, -8, -10, 1, 4, 7, 9 },
+		{ -2, -4, -8, -10, 1, 3, 7, 9 },
+		{ -2, -5, -7, -10, 1, 4, 6, 9 },
+		{ -3, -4, -7, -10, 2, 3, 6, 9 },
+		{ -1, -2, -3, -10, 0, 1, 2, 9 }, // entry 13
+		{ -4, -6, -8, -9, 3, 5, 7, 8 },
+		{ -3, -5, -7, -9, 2, 4, 6, 8 }
+	};
+
+	// Used by ETC2 EAC A8 and ETC2 EAC R11/RG11.
+	struct eac_block
+	{
+		uint16_t m_base : 8;
+
+		uint16_t m_table : 4;
+		uint16_t m_multiplier : 4;
+
+		uint8_t m_selectors[6];
+
+		uint32_t get_selector(uint32_t x, uint32_t y) const
+		{
+			assert((x < 4) && (y < 4));
+
+			const uint32_t ofs = 45 - (y + x * 4) * 3;
+
+			const uint64_t pixels = get_selector_bits();
+
+			return (pixels >> ofs) & 7;
+		}
+
+		void set_selector(uint32_t x, uint32_t y, uint32_t s)
+		{
+			assert((x < 4) && (y < 4) && (s < 8));
+
+			const uint32_t ofs = 45 - (y + x * 4) * 3;
+
+			uint64_t pixels = get_selector_bits();
+
+			pixels &= ~(7ULL << ofs);
+			pixels |= (static_cast<uint64_t>(s) << ofs);
+
+			set_selector_bits(pixels);
+		}
+
+		uint64_t get_selector_bits() const
+		{
+			uint64_t pixels = ((uint64_t)m_selectors[0] << 40) | ((uint64_t)m_selectors[1] << 32) |
+				((uint64_t)m_selectors[2] << 24) |
+				((uint64_t)m_selectors[3] << 16) | ((uint64_t)m_selectors[4] << 8) | m_selectors[5];
+			return pixels;
+		}
+
+		void set_selector_bits(uint64_t pixels)
+		{
+			m_selectors[0] = (uint8_t)(pixels >> 40);
+			m_selectors[1] = (uint8_t)(pixels >> 32);
+			m_selectors[2] = (uint8_t)(pixels >> 24);
+			m_selectors[3] = (uint8_t)(pixels >> 16);
+			m_selectors[4] = (uint8_t)(pixels >> 8);
+			m_selectors[5] = (uint8_t)(pixels);
+		}
+	};
+
+#endif // #if BASISD_SUPPORT_UASTC BASISD_SUPPORT_ETC2_EAC_A8 || BASISD_SUPPORT_ETC2_EAC_RG11
+
+#if BASISD_SUPPORT_ETC2_EAC_A8 || BASISD_SUPPORT_ETC2_EAC_RG11
+	static const dxt_selector_range s_etc2_eac_selector_ranges[] =
+	{
+		{ 0, 3 },
+
+		{ 1, 3 },
+		{ 0, 2 },
+
+		{ 1, 2 },
+	};
+
+	const uint32_t NUM_ETC2_EAC_SELECTOR_RANGES = sizeof(s_etc2_eac_selector_ranges) / sizeof(s_etc2_eac_selector_ranges[0]);
+
+	struct etc1_g_to_eac_conversion
+	{
+		uint8_t m_base;
+		uint8_t m_table_mul; // mul*16+table
+		uint16_t m_trans; // translates ETC1 selectors to ETC2_EAC_A8
+	};
+#endif // BASISD_SUPPORT_ETC2_EAC_A8 || BASISD_SUPPORT_ETC2_EAC_RG11
+
+#if BASISD_SUPPORT_ETC2_EAC_A8
+
+#if BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES
+	struct pack_eac_a8_results
+	{
+		uint32_t m_base;
+		uint32_t m_table;
+		uint32_t m_multiplier;
+		basisu::vector<uint8_t> m_selectors;
+		basisu::vector<uint8_t> m_selectors_temp;
+	};
+
+	static uint64_t pack_eac_a8_exhaustive(pack_eac_a8_results& results, const uint8_t* pPixels, uint32_t num_pixels)
+	{
+		results.m_selectors.resize(num_pixels);
+		results.m_selectors_temp.resize(num_pixels);
+
+		uint64_t best_err = UINT64_MAX;
+
+		for (uint32_t base_color = 0; base_color < 256; base_color++)
+		{
+			for (uint32_t multiplier = 1; multiplier < 16; multiplier++)
+			{
+				for (uint32_t table = 0; table < 16; table++)
+				{
+					uint64_t total_err = 0;
+
+					for (uint32_t i = 0; i < num_pixels; i++)
+					{
+						const int a = pPixels[i];
+
+						uint32_t best_s_err = UINT32_MAX;
+						uint32_t best_s = 0;
+						for (uint32_t s = 0; s < 8; s++)
+						{
+							int v = (int)multiplier * g_eac_modifier_table[table][s] + (int)base_color;
+							if (v < 0)
+								v = 0;
+							else if (v > 255)
+								v = 255;
+
+							uint32_t err = abs(a - v);
+							if (err < best_s_err)
+							{
+								best_s_err = err;
+								best_s = s;
+							}
+						}
+
+						results.m_selectors_temp[i] = static_cast<uint8_t>(best_s);
+
+						total_err += best_s_err * best_s_err;
+						if (total_err >= best_err)
+							break;
+					}
+
+					if (total_err < best_err)
+					{
+						best_err = total_err;
+						results.m_base = base_color;
+						results.m_multiplier = multiplier;
+						results.m_table = table;
+						results.m_selectors.swap(results.m_selectors_temp);
+					}
+
+				} // table
+
+			} // multiplier
+
+		} // base_color
+
+		return best_err;
+	}
+#endif // BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES
+		
+	static
+#if !BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES		
+		const
+#endif
+		etc1_g_to_eac_conversion s_etc1_g_to_etc2_a8[32 * 8][NUM_ETC2_EAC_SELECTOR_RANGES] =
+	{
+		{ { 0,1,3328 },{ 0,1,3328 },{ 0,1,256 },{ 0,1,256 } },
+		{ { 0,226,3936 },{ 0,226,3936 },{ 0,81,488 },{ 0,81,488 } },
+		{ { 6,178,4012 },{ 6,178,4008 },{ 0,146,501 },{ 0,130,496 } },
+		{ { 14,178,4012 },{ 14,178,4008 },{ 8,146,501 },{ 6,82,496 } },
+		{ { 23,178,4012 },{ 23,178,4008 },{ 17,146,501 },{ 3,228,496 } },
+		{ { 31,178,4012 },{ 31,178,4008 },{ 25,146,501 },{ 11,228,496 } },
+		{ { 39,178,4012 },{ 39,178,4008 },{ 33,146,501 },{ 19,228,496 } },
+		{ { 47,178,4012 },{ 47,178,4008 },{ 41,146,501 },{ 27,228,496 } },
+		{ { 56,178,4012 },{ 56,178,4008 },{ 50,146,501 },{ 36,228,496 } },
+		{ { 64,178,4012 },{ 64,178,4008 },{ 58,146,501 },{ 44,228,496 } },
+		{ { 72,178,4012 },{ 72,178,4008 },{ 66,146,501 },{ 52,228,496 } },
+		{ { 80,178,4012 },{ 80,178,4008 },{ 74,146,501 },{ 60,228,496 } },
+		{ { 89,178,4012 },{ 89,178,4008 },{ 83,146,501 },{ 69,228,496 } },
+		{ { 97,178,4012 },{ 97,178,4008 },{ 91,146,501 },{ 77,228,496 } },
+		{ { 105,178,4012 },{ 105,178,4008 },{ 99,146,501 },{ 85,228,496 } },
+		{ { 113,178,4012 },{ 113,178,4008 },{ 107,146,501 },{ 93,228,496 } },
+		{ { 122,178,4012 },{ 122,178,4008 },{ 116,146,501 },{ 102,228,496 } },
+		{ { 130,178,4012 },{ 130,178,4008 },{ 124,146,501 },{ 110,228,496 } },
+		{ { 138,178,4012 },{ 138,178,4008 },{ 132,146,501 },{ 118,228,496 } },
+		{ { 146,178,4012 },{ 146,178,4008 },{ 140,146,501 },{ 126,228,496 } },
+		{ { 155,178,4012 },{ 155,178,4008 },{ 149,146,501 },{ 135,228,496 } },
+		{ { 163,178,4012 },{ 163,178,4008 },{ 157,146,501 },{ 143,228,496 } },
+		{ { 171,178,4012 },{ 171,178,4008 },{ 165,146,501 },{ 151,228,496 } },
+		{ { 179,178,4012 },{ 179,178,4008 },{ 173,146,501 },{ 159,228,496 } },
+		{ { 188,178,4012 },{ 188,178,4008 },{ 182,146,501 },{ 168,228,496 } },
+		{ { 196,178,4012 },{ 196,178,4008 },{ 190,146,501 },{ 176,228,496 } },
+		{ { 204,178,4012 },{ 204,178,4008 },{ 198,146,501 },{ 184,228,496 } },
+		{ { 212,178,4012 },{ 212,178,4008 },{ 206,146,501 },{ 192,228,496 } },
+		{ { 221,178,4012 },{ 221,178,4008 },{ 215,146,501 },{ 201,228,496 } },
+		{ { 229,178,4012 },{ 229,178,4008 },{ 223,146,501 },{ 209,228,496 } },
+		{ { 235,66,4012 },{ 221,100,4008 },{ 231,146,501 },{ 217,228,496 } },
+		{ { 211,102,4085 },{ 118,31,4080 },{ 211,102,501 },{ 118,31,496 } },
+		{ { 1,2,3328 },{ 1,2,3328 },{ 0,1,320 },{ 0,1,320 } },
+		{ { 7,162,3905 },{ 7,162,3904 },{ 1,17,480 },{ 1,17,480 } },
+		{ { 15,162,3906 },{ 15,162,3904 },{ 1,117,352 },{ 1,117,352 } },
+		{ { 23,162,3906 },{ 23,162,3904 },{ 5,34,500 },{ 4,53,424 } },
+		{ { 32,162,3906 },{ 32,162,3904 },{ 14,34,500 },{ 3,69,424 } },
+		{ { 40,162,3906 },{ 40,162,3904 },{ 22,34,500 },{ 1,133,496 } },
+		{ { 48,162,3906 },{ 48,162,3904 },{ 30,34,500 },{ 4,85,496 } },
+		{ { 56,162,3906 },{ 56,162,3904 },{ 38,34,500 },{ 12,85,496 } },
+		{ { 65,162,3906 },{ 65,162,3904 },{ 47,34,500 },{ 1,106,424 } },
+		{ { 73,162,3906 },{ 73,162,3904 },{ 55,34,500 },{ 9,106,424 } },
+		{ { 81,162,3906 },{ 81,162,3904 },{ 63,34,500 },{ 7,234,496 } },
+		{ { 89,162,3906 },{ 89,162,3904 },{ 71,34,500 },{ 15,234,496 } },
+		{ { 98,162,3906 },{ 98,162,3904 },{ 80,34,500 },{ 24,234,496 } },
+		{ { 106,162,3906 },{ 106,162,3904 },{ 88,34,500 },{ 32,234,496 } },
+		{ { 114,162,3906 },{ 114,162,3904 },{ 96,34,500 },{ 40,234,496 } },
+		{ { 122,162,3906 },{ 122,162,3904 },{ 104,34,500 },{ 48,234,496 } },
+		{ { 131,162,3906 },{ 131,162,3904 },{ 113,34,500 },{ 57,234,496 } },
+		{ { 139,162,3906 },{ 139,162,3904 },{ 121,34,500 },{ 65,234,496 } },
+		{ { 147,162,3906 },{ 147,162,3904 },{ 129,34,500 },{ 73,234,496 } },
+		{ { 155,162,3906 },{ 155,162,3904 },{ 137,34,500 },{ 81,234,496 } },
+		{ { 164,162,3906 },{ 164,162,3904 },{ 146,34,500 },{ 90,234,496 } },
+		{ { 172,162,3906 },{ 172,162,3904 },{ 154,34,500 },{ 98,234,496 } },
+		{ { 180,162,3906 },{ 180,162,3904 },{ 162,34,500 },{ 106,234,496 } },
+		{ { 188,162,3906 },{ 188,162,3904 },{ 170,34,500 },{ 114,234,496 } },
+		{ { 197,162,3906 },{ 197,162,3904 },{ 179,34,500 },{ 123,234,496 } },
+		{ { 205,162,3906 },{ 205,162,3904 },{ 187,34,500 },{ 131,234,496 } },
+		{ { 213,162,3906 },{ 213,162,3904 },{ 195,34,500 },{ 139,234,496 } },
+		{ { 221,162,3906 },{ 221,162,3904 },{ 203,34,500 },{ 147,234,496 } },
+		{ { 230,162,3906 },{ 230,162,3904 },{ 212,34,500 },{ 156,234,496 } },
+		{ { 238,162,3906 },{ 174,106,4008 },{ 220,34,500 },{ 164,234,496 } },
+		{ { 240,178,4001 },{ 182,106,4008 },{ 228,34,500 },{ 172,234,496 } },
+		{ { 166,108,4085 },{ 115,31,4080 },{ 166,108,501 },{ 115,31,496 } },
+		{ { 1,68,3328 },{ 1,68,3328 },{ 0,17,384 },{ 0,17,384 } },
+		{ { 1,148,3904 },{ 1,148,3904 },{ 1,2,384 },{ 1,2,384 } },
+		{ { 21,18,3851 },{ 21,18,3848 },{ 1,50,488 },{ 1,50,488 } },
+		{ { 27,195,3851 },{ 29,18,3848 },{ 0,67,488 },{ 0,67,488 } },
+		{ { 34,195,3907 },{ 38,18,3848 },{ 20,66,482 },{ 0,3,496 } },
+		{ { 42,195,3907 },{ 46,18,3848 },{ 28,66,482 },{ 2,6,424 } },
+		{ { 50,195,3907 },{ 54,18,3848 },{ 36,66,482 },{ 4,22,424 } },
+		{ { 58,195,3907 },{ 62,18,3848 },{ 44,66,482 },{ 3,73,424 } },
+		{ { 67,195,3907 },{ 71,18,3848 },{ 53,66,482 },{ 3,22,496 } },
+		{ { 75,195,3907 },{ 79,18,3848 },{ 61,66,482 },{ 2,137,496 } },
+		{ { 83,195,3907 },{ 87,18,3848 },{ 69,66,482 },{ 1,89,496 } },
+		{ { 91,195,3907 },{ 95,18,3848 },{ 77,66,482 },{ 9,89,496 } },
+		{ { 100,195,3907 },{ 104,18,3848 },{ 86,66,482 },{ 18,89,496 } },
+		{ { 108,195,3907 },{ 112,18,3848 },{ 94,66,482 },{ 26,89,496 } },
+		{ { 116,195,3907 },{ 120,18,3848 },{ 102,66,482 },{ 34,89,496 } },
+		{ { 124,195,3907 },{ 128,18,3848 },{ 110,66,482 },{ 42,89,496 } },
+		{ { 133,195,3907 },{ 137,18,3848 },{ 119,66,482 },{ 51,89,496 } },
+		{ { 141,195,3907 },{ 145,18,3848 },{ 127,66,482 },{ 59,89,496 } },
+		{ { 149,195,3907 },{ 153,18,3848 },{ 135,66,482 },{ 67,89,496 } },
+		{ { 157,195,3907 },{ 161,18,3848 },{ 143,66,482 },{ 75,89,496 } },
+		{ { 166,195,3907 },{ 170,18,3848 },{ 152,66,482 },{ 84,89,496 } },
+		{ { 174,195,3907 },{ 178,18,3848 },{ 160,66,482 },{ 92,89,496 } },
+		{ { 182,195,3907 },{ 186,18,3848 },{ 168,66,482 },{ 100,89,496 } },
+		{ { 190,195,3907 },{ 194,18,3848 },{ 176,66,482 },{ 108,89,496 } },
+		{ { 199,195,3907 },{ 203,18,3848 },{ 185,66,482 },{ 117,89,496 } },
+		{ { 207,195,3907 },{ 211,18,3848 },{ 193,66,482 },{ 125,89,496 } },
+		{ { 215,195,3907 },{ 219,18,3848 },{ 201,66,482 },{ 133,89,496 } },
+		{ { 223,195,3907 },{ 227,18,3848 },{ 209,66,482 },{ 141,89,496 } },
+		{ { 231,195,3907 },{ 168,89,4008 },{ 218,66,482 },{ 150,89,496 } },
+		{ { 236,18,3907 },{ 176,89,4008 },{ 226,66,482 },{ 158,89,496 } },
+		{ { 158,90,4085 },{ 103,31,4080 },{ 158,90,501 },{ 103,31,496 } },
+		{ { 166,90,4085 },{ 111,31,4080 },{ 166,90,501 },{ 111,31,496 } },
+		{ { 0,70,3328 },{ 0,70,3328 },{ 0,45,256 },{ 0,45,256 } },
+		{ { 0,117,3904 },{ 0,117,3904 },{ 0,35,384 },{ 0,35,384 } },
+		{ { 13,165,3905 },{ 13,165,3904 },{ 3,221,416 },{ 3,221,416 } },
+		{ { 21,165,3906 },{ 21,165,3904 },{ 11,221,416 },{ 11,221,416 } },
+		{ { 30,165,3906 },{ 30,165,3904 },{ 7,61,352 },{ 7,61,352 } },
+		{ { 38,165,3906 },{ 38,165,3904 },{ 2,125,352 },{ 2,125,352 } },
+		{ { 46,165,3906 },{ 46,165,3904 },{ 2,37,500 },{ 10,125,352 } },
+		{ { 54,165,3906 },{ 54,165,3904 },{ 10,37,500 },{ 5,61,424 } },
+		{ { 63,165,3906 },{ 63,165,3904 },{ 19,37,500 },{ 1,189,424 } },
+		{ { 4,254,4012 },{ 71,165,3904 },{ 27,37,500 },{ 9,189,424 } },
+		{ { 12,254,4012 },{ 79,165,3904 },{ 35,37,500 },{ 4,77,424 } },
+		{ { 20,254,4012 },{ 87,165,3904 },{ 43,37,500 },{ 12,77,424 } },
+		{ { 29,254,4012 },{ 96,165,3904 },{ 52,37,500 },{ 8,93,424 } },
+		{ { 37,254,4012 },{ 104,165,3904 },{ 60,37,500 },{ 3,141,496 } },
+		{ { 45,254,4012 },{ 112,165,3904 },{ 68,37,500 },{ 11,141,496 } },
+		{ { 53,254,4012 },{ 120,165,3904 },{ 76,37,500 },{ 6,93,496 } },
+		{ { 62,254,4012 },{ 129,165,3904 },{ 85,37,500 },{ 15,93,496 } },
+		{ { 70,254,4012 },{ 137,165,3904 },{ 93,37,500 },{ 23,93,496 } },
+		{ { 78,254,4012 },{ 145,165,3904 },{ 101,37,500 },{ 31,93,496 } },
+		{ { 86,254,4012 },{ 153,165,3904 },{ 109,37,500 },{ 39,93,496 } },
+		{ { 95,254,4012 },{ 162,165,3904 },{ 118,37,500 },{ 48,93,496 } },
+		{ { 103,254,4012 },{ 170,165,3904 },{ 126,37,500 },{ 56,93,496 } },
+		{ { 111,254,4012 },{ 178,165,3904 },{ 134,37,500 },{ 64,93,496 } },
+		{ { 119,254,4012 },{ 186,165,3904 },{ 142,37,500 },{ 72,93,496 } },
+		{ { 128,254,4012 },{ 195,165,3904 },{ 151,37,500 },{ 81,93,496 } },
+		{ { 136,254,4012 },{ 203,165,3904 },{ 159,37,500 },{ 89,93,496 } },
+		{ { 212,165,3906 },{ 136,77,4008 },{ 167,37,500 },{ 97,93,496 } },
+		{ { 220,165,3394 },{ 131,93,4008 },{ 175,37,500 },{ 105,93,496 } },
+		{ { 214,181,4001 },{ 140,93,4008 },{ 184,37,500 },{ 114,93,496 } },
+		{ { 222,181,4001 },{ 148,93,4008 },{ 192,37,500 },{ 122,93,496 } },
+		{ { 114,95,4085 },{ 99,31,4080 },{ 114,95,501 },{ 99,31,496 } },
+		{ { 122,95,4085 },{ 107,31,4080 },{ 122,95,501 },{ 107,31,496 } },
+		{ { 0,102,3840 },{ 0,102,3840 },{ 0,18,384 },{ 0,18,384 } },
+		{ { 5,167,3904 },{ 5,167,3904 },{ 0,13,256 },{ 0,13,256 } },
+		{ { 4,54,3968 },{ 4,54,3968 },{ 1,67,448 },{ 1,67,448 } },
+		{ { 30,198,3850 },{ 30,198,3848 },{ 0,3,480 },{ 0,3,480 } },
+		{ { 39,198,3850 },{ 39,198,3848 },{ 3,52,488 },{ 3,52,488 } },
+		{ { 47,198,3851 },{ 47,198,3848 },{ 3,4,488 },{ 3,4,488 } },
+		{ { 55,198,3851 },{ 55,198,3848 },{ 1,70,488 },{ 1,70,488 } },
+		{ { 54,167,3906 },{ 63,198,3848 },{ 3,22,488 },{ 3,22,488 } },
+		{ { 62,167,3906 },{ 72,198,3848 },{ 24,118,488 },{ 0,6,496 } },
+		{ { 70,167,3906 },{ 80,198,3848 },{ 32,118,488 },{ 2,89,488 } },
+		{ { 78,167,3906 },{ 88,198,3848 },{ 40,118,488 },{ 1,73,496 } },
+		{ { 86,167,3906 },{ 96,198,3848 },{ 48,118,488 },{ 0,28,424 } },
+		{ { 95,167,3906 },{ 105,198,3848 },{ 57,118,488 },{ 9,28,424 } },
+		{ { 103,167,3906 },{ 113,198,3848 },{ 65,118,488 },{ 5,108,496 } },
+		{ { 111,167,3906 },{ 121,198,3848 },{ 73,118,488 },{ 13,108,496 } },
+		{ { 119,167,3906 },{ 129,198,3848 },{ 81,118,488 },{ 21,108,496 } },
+		{ { 128,167,3906 },{ 138,198,3848 },{ 90,118,488 },{ 6,28,496 } },
+		{ { 136,167,3906 },{ 146,198,3848 },{ 98,118,488 },{ 14,28,496 } },
+		{ { 144,167,3906 },{ 154,198,3848 },{ 106,118,488 },{ 22,28,496 } },
+		{ { 152,167,3906 },{ 162,198,3848 },{ 114,118,488 },{ 30,28,496 } },
+		{ { 161,167,3906 },{ 171,198,3848 },{ 123,118,488 },{ 39,28,496 } },
+		{ { 169,167,3906 },{ 179,198,3848 },{ 131,118,488 },{ 47,28,496 } },
+		{ { 177,167,3906 },{ 187,198,3848 },{ 139,118,488 },{ 55,28,496 } },
+		{ { 185,167,3906 },{ 195,198,3848 },{ 147,118,488 },{ 63,28,496 } },
+		{ { 194,167,3906 },{ 120,12,4008 },{ 156,118,488 },{ 72,28,496 } },
+		{ { 206,198,3907 },{ 116,28,4008 },{ 164,118,488 },{ 80,28,496 } },
+		{ { 214,198,3907 },{ 124,28,4008 },{ 172,118,488 },{ 88,28,496 } },
+		{ { 222,198,3395 },{ 132,28,4008 },{ 180,118,488 },{ 96,28,496 } },
+		{ { 207,134,4001 },{ 141,28,4008 },{ 189,118,488 },{ 105,28,496 } },
+		{ { 95,30,4085 },{ 86,31,4080 },{ 95,30,501 },{ 86,31,496 } },
+		{ { 103,30,4085 },{ 94,31,4080 },{ 103,30,501 },{ 94,31,496 } },
+		{ { 111,30,4085 },{ 102,31,4080 },{ 111,30,501 },{ 102,31,496 } },
+		{ { 0,104,3840 },{ 0,104,3840 },{ 0,18,448 },{ 0,18,448 } },
+		{ { 4,39,3904 },{ 4,39,3904 },{ 0,4,384 },{ 0,4,384 } },
+		{ { 0,56,3968 },{ 0,56,3968 },{ 0,84,448 },{ 0,84,448 } },
+		{ { 6,110,3328 },{ 6,110,3328 },{ 0,20,448 },{ 0,20,448 } },
+		{ { 41,200,3850 },{ 41,200,3848 },{ 1,4,480 },{ 1,4,480 } },
+		{ { 49,200,3850 },{ 49,200,3848 },{ 1,8,416 },{ 1,8,416 } },
+		{ { 57,200,3851 },{ 57,200,3848 },{ 1,38,488 },{ 1,38,488 } },
+		{ { 65,200,3851 },{ 65,200,3848 },{ 1,120,488 },{ 1,120,488 } },
+		{ { 74,200,3851 },{ 74,200,3848 },{ 2,72,488 },{ 2,72,488 } },
+		{ { 69,6,3907 },{ 82,200,3848 },{ 2,24,488 },{ 2,24,488 } },
+		{ { 77,6,3907 },{ 90,200,3848 },{ 26,120,488 },{ 10,24,488 } },
+		{ { 97,63,3330 },{ 98,200,3848 },{ 34,120,488 },{ 2,8,496 } },
+		{ { 106,63,3330 },{ 107,200,3848 },{ 43,120,488 },{ 3,92,488 } },
+		{ { 114,63,3330 },{ 115,200,3848 },{ 51,120,488 },{ 11,92,488 } },
+		{ { 122,63,3330 },{ 123,200,3848 },{ 59,120,488 },{ 7,76,496 } },
+		{ { 130,63,3330 },{ 131,200,3848 },{ 67,120,488 },{ 15,76,496 } },
+		{ { 139,63,3330 },{ 140,200,3848 },{ 76,120,488 },{ 24,76,496 } },
+		{ { 147,63,3330 },{ 148,200,3848 },{ 84,120,488 },{ 32,76,496 } },
+		{ { 155,63,3330 },{ 156,200,3848 },{ 92,120,488 },{ 40,76,496 } },
+		{ { 163,63,3330 },{ 164,200,3848 },{ 100,120,488 },{ 48,76,496 } },
+		{ { 172,63,3330 },{ 173,200,3848 },{ 109,120,488 },{ 57,76,496 } },
+		{ { 184,6,3851 },{ 181,200,3848 },{ 117,120,488 },{ 65,76,496 } },
+		{ { 192,6,3851 },{ 133,28,3936 },{ 125,120,488 },{ 73,76,496 } },
+		{ { 189,200,3907 },{ 141,28,3936 },{ 133,120,488 },{ 81,76,496 } },
+		{ { 198,200,3907 },{ 138,108,4000 },{ 142,120,488 },{ 90,76,496 } },
+		{ { 206,200,3907 },{ 146,108,4000 },{ 150,120,488 },{ 98,76,496 } },
+		{ { 214,200,3395 },{ 154,108,4000 },{ 158,120,488 },{ 106,76,496 } },
+		{ { 190,136,4001 },{ 162,108,4000 },{ 166,120,488 },{ 114,76,496 } },
+		{ { 123,30,4076 },{ 87,15,4080 },{ 123,30,492 },{ 87,15,496 } },
+		{ { 117,110,4084 },{ 80,31,4080 },{ 117,110,500 },{ 80,31,496 } },
+		{ { 125,110,4084 },{ 88,31,4080 },{ 125,110,500 },{ 88,31,496 } },
+		{ { 133,110,4084 },{ 96,31,4080 },{ 133,110,500 },{ 96,31,496 } },
+		{ { 9,56,3904 },{ 9,56,3904 },{ 0,67,448 },{ 0,67,448 } },
+		{ { 1,8,3904 },{ 1,8,3904 },{ 1,84,448 },{ 1,84,448 } },
+		{ { 1,124,3904 },{ 1,124,3904 },{ 0,39,384 },{ 0,39,384 } },
+		{ { 9,124,3904 },{ 9,124,3904 },{ 1,4,448 },{ 1,4,448 } },
+		{ { 6,76,3904 },{ 6,76,3904 },{ 0,70,448 },{ 0,70,448 } },
+		{ { 62,6,3859 },{ 62,6,3856 },{ 2,38,480 },{ 2,38,480 } },
+		{ { 70,6,3859 },{ 70,6,3856 },{ 5,43,416 },{ 5,43,416 } },
+		{ { 78,6,3859 },{ 78,6,3856 },{ 2,11,416 },{ 2,11,416 } },
+		{ { 87,6,3859 },{ 87,6,3856 },{ 0,171,488 },{ 0,171,488 } },
+		{ { 67,8,3906 },{ 95,6,3856 },{ 8,171,488 },{ 8,171,488 } },
+		{ { 75,8,3907 },{ 103,6,3856 },{ 5,123,488 },{ 5,123,488 } },
+		{ { 83,8,3907 },{ 111,6,3856 },{ 2,75,488 },{ 2,75,488 } },
+		{ { 92,8,3907 },{ 120,6,3856 },{ 0,27,488 },{ 0,27,488 } },
+		{ { 100,8,3907 },{ 128,6,3856 },{ 8,27,488 },{ 8,27,488 } },
+		{ { 120,106,3843 },{ 136,6,3856 },{ 100,6,387 },{ 16,27,488 } },
+		{ { 128,106,3843 },{ 144,6,3856 },{ 108,6,387 },{ 2,11,496 } },
+		{ { 137,106,3843 },{ 153,6,3856 },{ 117,6,387 },{ 11,11,496 } },
+		{ { 145,106,3843 },{ 161,6,3856 },{ 125,6,387 },{ 19,11,496 } },
+		{ { 163,8,3851 },{ 137,43,3904 },{ 133,6,387 },{ 27,11,496 } },
+		{ { 171,8,3851 },{ 101,11,4000 },{ 141,6,387 },{ 35,11,496 } },
+		{ { 180,8,3851 },{ 110,11,4000 },{ 150,6,387 },{ 44,11,496 } },
+		{ { 188,8,3851 },{ 118,11,4000 },{ 158,6,387 },{ 52,11,496 } },
+		{ { 172,72,3907 },{ 126,11,4000 },{ 166,6,387 },{ 60,11,496 } },
+		{ { 174,6,3971 },{ 134,11,4000 },{ 174,6,387 },{ 68,11,496 } },
+		{ { 183,6,3971 },{ 143,11,4000 },{ 183,6,387 },{ 77,11,496 } },
+		{ { 191,6,3971 },{ 151,11,4000 },{ 191,6,387 },{ 85,11,496 } },
+		{ { 199,6,3971 },{ 159,11,4000 },{ 199,6,387 },{ 93,11,496 } },
+		{ { 92,12,4084 },{ 69,15,4080 },{ 92,12,500 },{ 69,15,496 } },
+		{ { 101,12,4084 },{ 78,15,4080 },{ 101,12,500 },{ 78,15,496 } },
+		{ { 109,12,4084 },{ 86,15,4080 },{ 109,12,500 },{ 86,15,496 } },
+		{ { 117,12,4084 },{ 79,31,4080 },{ 117,12,500 },{ 79,31,496 } },
+		{ { 125,12,4084 },{ 87,31,4080 },{ 125,12,500 },{ 87,31,496 } },
+		{ { 71,8,3602 },{ 71,8,3600 },{ 2,21,384 },{ 2,21,384 } },
+		{ { 79,8,3611 },{ 79,8,3608 },{ 0,69,448 },{ 0,69,448 } },
+		{ { 87,8,3611 },{ 87,8,3608 },{ 0,23,384 },{ 0,23,384 } },
+		{ { 95,8,3611 },{ 95,8,3608 },{ 1,5,448 },{ 1,5,448 } },
+		{ { 104,8,3611 },{ 104,8,3608 },{ 0,88,448 },{ 0,88,448 } },
+		{ { 112,8,3611 },{ 112,8,3608 },{ 0,72,448 },{ 0,72,448 } },
+		{ { 120,8,3611 },{ 121,8,3608 },{ 36,21,458 },{ 36,21,456 } },
+		{ { 133,47,3091 },{ 129,8,3608 },{ 44,21,458 },{ 44,21,456 } },
+		{ { 142,47,3091 },{ 138,8,3608 },{ 53,21,459 },{ 53,21,456 } },
+		{ { 98,12,3850 },{ 98,12,3848 },{ 61,21,459 },{ 61,21,456 } },
+		{ { 106,12,3850 },{ 106,12,3848 },{ 10,92,480 },{ 69,21,456 } },
+		{ { 114,12,3851 },{ 114,12,3848 },{ 18,92,480 },{ 77,21,456 } },
+		{ { 87,12,3906 },{ 87,12,3904 },{ 3,44,488 },{ 86,21,456 } },
+		{ { 95,12,3906 },{ 95,12,3904 },{ 11,44,488 },{ 94,21,456 } },
+		{ { 103,12,3906 },{ 103,12,3904 },{ 19,44,488 },{ 102,21,456 } },
+		{ { 111,12,3907 },{ 111,12,3904 },{ 27,44,489 },{ 110,21,456 } },
+		{ { 120,12,3907 },{ 120,12,3904 },{ 36,44,489 },{ 119,21,456 } },
+		{ { 128,12,3907 },{ 128,12,3904 },{ 44,44,489 },{ 127,21,456 } },
+		{ { 136,12,3907 },{ 136,12,3904 },{ 52,44,489 },{ 135,21,456 } },
+		{ { 144,12,3907 },{ 144,12,3904 },{ 60,44,489 },{ 143,21,456 } },
+		{ { 153,12,3907 },{ 153,12,3904 },{ 69,44,490 },{ 152,21,456 } },
+		{ { 161,12,3395 },{ 149,188,3968 },{ 77,44,490 },{ 160,21,456 } },
+		{ { 169,12,3395 },{ 198,21,3928 },{ 85,44,490 },{ 168,21,456 } },
+		{ { 113,95,4001 },{ 201,69,3992 },{ 125,8,483 },{ 176,21,456 } },
+		{ { 122,95,4001 },{ 200,21,3984 },{ 134,8,483 },{ 185,21,456 } },
+		{ { 142,8,4067 },{ 208,21,3984 },{ 142,8,483 },{ 193,21,456 } },
+		{ { 151,8,4067 },{ 47,15,4080 },{ 151,8,483 },{ 47,15,496 } },
+		{ { 159,8,4067 },{ 55,15,4080 },{ 159,8,483 },{ 55,15,496 } },
+		{ { 168,8,4067 },{ 64,15,4080 },{ 168,8,483 },{ 64,15,496 } },
+		{ { 160,40,4075 },{ 72,15,4080 },{ 160,40,491 },{ 72,15,496 } },
+		{ { 168,40,4075 },{ 80,15,4080 },{ 168,40,491 },{ 80,15,496 } },
+		{ { 144,8,4082 },{ 88,15,4080 },{ 144,8,498 },{ 88,15,496 } }
+	};
+#endif // BASISD_SUPPORT_ETC2_EAC_A8
+
+#if BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES
+	static void create_etc2_eac_a8_conversion_table()
+	{
+		FILE* pFile = fopen("basisu_decoder_tables_etc2_eac_a8.inc", "w");
+
+		for (uint32_t inten = 0; inten < 8; inten++)
+		{
+			for (uint32_t base = 0; base < 32; base++)
+			{
+				color32 block_colors[4];
+				decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(base, base, base, 255), false), inten);
+
+				fprintf(pFile, "{");
+
+				for (uint32_t sel_range = 0; sel_range < NUM_ETC2_EAC_SELECTOR_RANGES; sel_range++)
+				{
+					const uint32_t low_selector = s_etc2_eac_selector_ranges[sel_range].m_low;
+					const uint32_t high_selector = s_etc2_eac_selector_ranges[sel_range].m_high;
+
+					// We have a ETC1 base color and intensity, and a used selector range from low_selector-high_selector.
+					// Now find the best ETC2 EAC A8 base/table/multiplier that fits these colors.
+
+					uint8_t pixels[4];
+					uint32_t num_pixels = 0;
+					for (uint32_t s = low_selector; s <= high_selector; s++)
+						pixels[num_pixels++] = block_colors[s].g;
+
+					pack_eac_a8_results pack_results;
+					pack_eac_a8_exhaustive(pack_results, pixels, num_pixels);
+
+					etc1_g_to_eac_conversion& c = s_etc1_g_to_etc2_a8[base + inten * 32][sel_range];
+
+					c.m_base = pack_results.m_base;
+					c.m_table_mul = pack_results.m_table * 16 + pack_results.m_multiplier;
+					c.m_trans = 0;
+
+					for (uint32_t s = 0; s < 4; s++)
+					{
+						if ((s < low_selector) || (s > high_selector))
+							continue;
+
+						uint32_t etc2_selector = pack_results.m_selectors[s - low_selector];
+
+						c.m_trans |= (etc2_selector << (s * 3));
+					}
+
+					fprintf(pFile, "{%u,%u,%u}", c.m_base, c.m_table_mul, c.m_trans);
+					if (sel_range < (NUM_ETC2_EAC_SELECTOR_RANGES - 1))
+						fprintf(pFile, ",");
+				}
+
+				fprintf(pFile, "},\n");
+			}
+		}
+
+		fclose(pFile);
+	}
+#endif
+
+#if BASISD_WRITE_NEW_ETC2_EAC_R11_TABLES
+	struct pack_eac_r11_results
+	{
+		uint32_t m_base;
+		uint32_t m_table;
+		uint32_t m_multiplier;
+		basisu::vector<uint8_t> m_selectors;
+		basisu::vector<uint8_t> m_selectors_temp;
+	};
+
+	static uint64_t pack_eac_r11_exhaustive(pack_eac_r11_results& results, const uint8_t* pPixels, uint32_t num_pixels)
+	{
+		results.m_selectors.resize(num_pixels);
+		results.m_selectors_temp.resize(num_pixels);
+
+		uint64_t best_err = UINT64_MAX;
+
+		for (uint32_t base_color = 0; base_color < 256; base_color++)
+		{
+			for (uint32_t multiplier = 0; multiplier < 16; multiplier++)
+			{
+				for (uint32_t table = 0; table < 16; table++)
+				{
+					uint64_t total_err = 0;
+
+					for (uint32_t i = 0; i < num_pixels; i++)
+					{
+						// Convert 8-bit input to 11-bits
+						const int a = (pPixels[i] * 2047 + 128) / 255;
+
+						uint32_t best_s_err = UINT32_MAX;
+						uint32_t best_s = 0;
+						for (uint32_t s = 0; s < 8; s++)
+						{
+							int v = (int)(multiplier ? (multiplier * 8) : 1) * g_eac_modifier_table[table][s] + (int)base_color * 8 + 4;
+							if (v < 0)
+								v = 0;
+							else if (v > 2047)
+								v = 2047;
+
+							uint32_t err = abs(a - v);
+							if (err < best_s_err)
+							{
+								best_s_err = err;
+								best_s = s;
+							}
+						}
+
+						results.m_selectors_temp[i] = static_cast<uint8_t>(best_s);
+
+						total_err += best_s_err * best_s_err;
+						if (total_err >= best_err)
+							break;
+					}
+
+					if (total_err < best_err)
+					{
+						best_err = total_err;
+						results.m_base = base_color;
+						results.m_multiplier = multiplier;
+						results.m_table = table;
+						results.m_selectors.swap(results.m_selectors_temp);
+					}
+
+				} // table
+
+			} // multiplier
+
+		} // base_color
+
+		return best_err;
+	}
+
+	static void create_etc2_eac_r11_conversion_table()
+	{
+		FILE* pFile = nullptr;
+		fopen_s(&pFile, "basisu_decoder_tables_etc2_eac_r11.inc", "w");
+
+		for (uint32_t inten = 0; inten < 8; inten++)
+		{
+			for (uint32_t base = 0; base < 32; base++)
+			{
+				color32 block_colors[4];
+				decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(base, base, base, 255), false), inten);
+
+				fprintf(pFile, "{");
+
+				for (uint32_t sel_range = 0; sel_range < NUM_ETC2_EAC_SELECTOR_RANGES; sel_range++)
+				{
+					const uint32_t low_selector = s_etc2_eac_selector_ranges[sel_range].m_low;
+					const uint32_t high_selector = s_etc2_eac_selector_ranges[sel_range].m_high;
+
+					// We have a ETC1 base color and intensity, and a used selector range from low_selector-high_selector.
+					// Now find the best ETC2 EAC R11 base/table/multiplier that fits these colors.
+
+					uint8_t pixels[4];
+					uint32_t num_pixels = 0;
+					for (uint32_t s = low_selector; s <= high_selector; s++)
+						pixels[num_pixels++] = block_colors[s].g;
+
+					pack_eac_r11_results pack_results;
+					pack_eac_r11_exhaustive(pack_results, pixels, num_pixels);
+
+					etc1_g_to_eac_conversion c;
+
+					c.m_base = (uint8_t)pack_results.m_base;
+					c.m_table_mul = (uint8_t)(pack_results.m_table * 16 + pack_results.m_multiplier);
+					c.m_trans = 0;
+
+					for (uint32_t s = 0; s < 4; s++)
+					{
+						if ((s < low_selector) || (s > high_selector))
+							continue;
+
+						uint32_t etc2_selector = pack_results.m_selectors[s - low_selector];
+
+						c.m_trans |= (etc2_selector << (s * 3));
+					}
+
+					fprintf(pFile, "{%u,%u,%u}", c.m_base, c.m_table_mul, c.m_trans);
+					if (sel_range < (NUM_ETC2_EAC_SELECTOR_RANGES - 1))
+						fprintf(pFile, ",");
+				}
+
+				fprintf(pFile, "},\n");
+			}
+		}
+
+		fclose(pFile);
+	}
+#endif // BASISD_WRITE_NEW_ETC2_EAC_R11_TABLES
+
+#if BASISD_WRITE_NEW_ASTC_TABLES
+	static void create_etc1_to_astc_conversion_table_0_47();
+	static void create_etc1_to_astc_conversion_table_0_255();
+#endif
+
+#if BASISD_SUPPORT_ASTC
+	static void transcoder_init_astc();
+#endif
+
+#if BASISD_WRITE_NEW_BC7_MODE5_TABLES
+	static void create_etc1_to_bc7_m5_color_conversion_table();
+	static void create_etc1_to_bc7_m5_alpha_conversion_table();
+#endif
+
+#if BASISD_SUPPORT_BC7_MODE5
+	static void transcoder_init_bc7_mode5();
+#endif
+
+#if BASISD_WRITE_NEW_ATC_TABLES
+	static void create_etc1s_to_atc_conversion_tables();
+#endif
+
+#if BASISD_SUPPORT_ATC
+	static void transcoder_init_atc();
+#endif
+
+#if BASISD_SUPPORT_PVRTC2
+	static void transcoder_init_pvrtc2();
+#endif
+
+#if BASISD_SUPPORT_UASTC
+	void uastc_init();
+#endif
+
+	static bool g_transcoder_initialized;
+		
+	// Library global initialization. Requires ~9 milliseconds when compiled and executed natively on a Core i7 2.2 GHz.
+	// If this is too slow, these computed tables can easilky be moved to be compiled in.
+	void basisu_transcoder_init()
+	{
+		if (g_transcoder_initialized)
+      {
+         BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Called more than once\n");      
+			return;
+      }
+         
+     BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Initializing (this is not an error)\n");      
+
+#if BASISD_SUPPORT_UASTC
+		uastc_init();
+#endif
+
+#if BASISD_SUPPORT_ASTC
+		transcoder_init_astc();
+#endif
+				
+#if BASISD_WRITE_NEW_ASTC_TABLES
+		create_etc1_to_astc_conversion_table_0_47();
+		create_etc1_to_astc_conversion_table_0_255();
+		exit(0);
+#endif
+
+#if BASISD_WRITE_NEW_BC7_MODE5_TABLES
+		create_etc1_to_bc7_m5_color_conversion_table();
+		create_etc1_to_bc7_m5_alpha_conversion_table();
+		exit(0);
+#endif
+
+#if BASISD_WRITE_NEW_DXT1_TABLES
+		create_etc1_to_dxt1_5_conversion_table();
+		create_etc1_to_dxt1_6_conversion_table();
+		exit(0);
+#endif
+
+#if BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES
+		create_etc2_eac_a8_conversion_table();
+		exit(0);
+#endif
+
+#if BASISD_WRITE_NEW_ATC_TABLES
+		create_etc1s_to_atc_conversion_tables();
+		exit(0);
+#endif
+
+#if BASISD_WRITE_NEW_ETC2_EAC_R11_TABLES
+		create_etc2_eac_r11_conversion_table();
+		exit(0);
+#endif
+
+#if BASISD_SUPPORT_DXT1 || BASISD_SUPPORT_UASTC
+		uint8_t bc1_expand5[32];
+		for (int i = 0; i < 32; i++)
+			bc1_expand5[i] = static_cast<uint8_t>((i << 3) | (i >> 2));
+		prepare_bc1_single_color_table(g_bc1_match5_equals_1, bc1_expand5, 32, 32, 1);
+		prepare_bc1_single_color_table(g_bc1_match5_equals_0, bc1_expand5, 1, 32, 0);
+
+		uint8_t bc1_expand6[64];
+		for (int i = 0; i < 64; i++)
+			bc1_expand6[i] = static_cast<uint8_t>((i << 2) | (i >> 4));
+		prepare_bc1_single_color_table(g_bc1_match6_equals_1, bc1_expand6, 64, 64, 1);
+		prepare_bc1_single_color_table(g_bc1_match6_equals_0, bc1_expand6, 1, 64, 0);
+
+#if 0
+		for (uint32_t i = 0; i < 256; i++)
+		{
+			printf("%u %u %u\n", i, (i * 63 + 127) / 255, g_bc1_match6_equals_0[i].m_hi);
+		}
+		exit(0);
+#endif
+
+#endif
+
+#if BASISD_SUPPORT_DXT1
+		for (uint32_t i = 0; i < NUM_ETC1_TO_DXT1_SELECTOR_RANGES; i++)
+		{
+			uint32_t l = g_etc1_to_dxt1_selector_ranges[i].m_low;
+			uint32_t h = g_etc1_to_dxt1_selector_ranges[i].m_high;
+			g_etc1_to_dxt1_selector_range_index[l][h] = i;
+		}
+
+		for (uint32_t sm = 0; sm < NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS; sm++)
+		{
+			uint8_t etc1_to_dxt1_selector_mappings_raw_dxt1[4];
+			uint8_t etc1_to_dxt1_selector_mappings_raw_dxt1_inv[4];
+
+			for (uint32_t j = 0; j < 4; j++)
+			{
+				static const uint8_t s_linear_dxt1_to_dxt1[4] = { 0, 2, 3, 1 };
+				static const uint8_t s_dxt1_inverted_xlat[4] = { 1, 0, 3, 2 };
+
+				etc1_to_dxt1_selector_mappings_raw_dxt1[j] = (uint8_t)s_linear_dxt1_to_dxt1[g_etc1_to_dxt1_selector_mappings[sm][j]];
+				etc1_to_dxt1_selector_mappings_raw_dxt1_inv[j] = (uint8_t)s_dxt1_inverted_xlat[etc1_to_dxt1_selector_mappings_raw_dxt1[j]];
+			}
+
+			for (uint32_t i = 0; i < 256; i++)
+			{
+				uint32_t k = 0, k_inv = 0;
+				for (uint32_t s = 0; s < 4; s++)
+				{
+					k |= (etc1_to_dxt1_selector_mappings_raw_dxt1[(i >> (s * 2)) & 3] << (s * 2));
+					k_inv |= (etc1_to_dxt1_selector_mappings_raw_dxt1_inv[(i >> (s * 2)) & 3] << (s * 2));
+				}
+				g_etc1_to_dxt1_selector_mappings_raw_dxt1_256[sm][i] = (uint8_t)k;
+				g_etc1_to_dxt1_selector_mappings_raw_dxt1_inv_256[sm][i] = (uint8_t)k_inv;
+			}
+		}
+#endif
+
+#if BASISD_SUPPORT_BC7_MODE5
+		transcoder_init_bc7_mode5();
+#endif
+
+#if BASISD_SUPPORT_ATC
+		transcoder_init_atc();
+#endif
+
+#if BASISD_SUPPORT_PVRTC2
+		transcoder_init_pvrtc2();
+#endif
+
+		g_transcoder_initialized = true;
+	}
+
+#if BASISD_SUPPORT_DXT1
+	static void convert_etc1s_to_dxt1(dxt1_block* pDst_block, const endpoint *pEndpoints, const selector* pSelector, bool use_threecolor_blocks)
+	{
+#if !BASISD_WRITE_NEW_DXT1_TABLES
+		const uint32_t low_selector = pSelector->m_lo_selector;
+		const uint32_t high_selector = pSelector->m_hi_selector;
+
+		const color32& base_color = pEndpoints->m_color5;
+		const uint32_t inten_table = pEndpoints->m_inten5;
+
+		if (low_selector == high_selector)
+		{
+			uint32_t r, g, b;
+			decoder_etc_block::get_block_color5(base_color, inten_table, low_selector, r, g, b);
+
+			uint32_t mask = 0xAA;
+			uint32_t max16 = (g_bc1_match5_equals_1[r].m_hi << 11) | (g_bc1_match6_equals_1[g].m_hi << 5) | g_bc1_match5_equals_1[b].m_hi;
+			uint32_t min16 = (g_bc1_match5_equals_1[r].m_lo << 11) | (g_bc1_match6_equals_1[g].m_lo << 5) | g_bc1_match5_equals_1[b].m_lo;
+
+			if ((!use_threecolor_blocks) && (min16 == max16))
+			{
+				// This is an annoying edge case that impacts BC3.
+				// This is to guarantee that BC3 blocks never use punchthrough alpha (3 color) mode, which isn't supported on some (all?) GPU's.
+				mask = 0;
+
+				// Make l > h
+				if (min16 > 0)
+					min16--;
+				else
+				{
+					// l = h = 0
+					assert(min16 == max16 && max16 == 0);
+
+					max16 = 1;
+					min16 = 0;
+					mask = 0x55;
+				}
+
+				assert(max16 > min16);
+			}
+
+			if (max16 < min16)
+			{
+				std::swap(max16, min16);
+				mask ^= 0x55;
+			}
+
+			pDst_block->set_low_color(static_cast<uint16_t>(max16));
+			pDst_block->set_high_color(static_cast<uint16_t>(min16));
+			pDst_block->m_selectors[0] = static_cast<uint8_t>(mask);
+			pDst_block->m_selectors[1] = static_cast<uint8_t>(mask);
+			pDst_block->m_selectors[2] = static_cast<uint8_t>(mask);
+			pDst_block->m_selectors[3] = static_cast<uint8_t>(mask);
+
+			return;
+		}
+		else if ((inten_table >= 7) && (pSelector->m_num_unique_selectors == 2) && (pSelector->m_lo_selector == 0) && (pSelector->m_hi_selector == 3))
+		{
+			color32 block_colors[4];
+
+			decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table);
+
+			const uint32_t r0 = block_colors[0].r;
+			const uint32_t g0 = block_colors[0].g;
+			const uint32_t b0 = block_colors[0].b;
+
+			const uint32_t r1 = block_colors[3].r;
+			const uint32_t g1 = block_colors[3].g;
+			const uint32_t b1 = block_colors[3].b;
+
+			uint32_t max16 = (g_bc1_match5_equals_0[r0].m_hi << 11) | (g_bc1_match6_equals_0[g0].m_hi << 5) | g_bc1_match5_equals_0[b0].m_hi;
+			uint32_t min16 = (g_bc1_match5_equals_0[r1].m_hi << 11) | (g_bc1_match6_equals_0[g1].m_hi << 5) | g_bc1_match5_equals_0[b1].m_hi;
+
+			uint32_t l = 0, h = 1;
+
+			if (min16 == max16)
+			{
+				// Make l > h
+				if (min16 > 0)
+				{
+					min16--;
+
+					l = 0;
+					h = 0;
+				}
+				else
+				{
+					// l = h = 0
+					assert(min16 == max16 && max16 == 0);
+
+					max16 = 1;
+					min16 = 0;
+
+					l = 1;
+					h = 1;
+				}
+
+				assert(max16 > min16);
+			}
+
+			if (max16 < min16)
+			{
+				std::swap(max16, min16);
+				l = 1;
+				h = 0;
+			}
+
+			pDst_block->set_low_color((uint16_t)max16);
+			pDst_block->set_high_color((uint16_t)min16);
+
+			for (uint32_t y = 0; y < 4; y++)
+			{
+				for (uint32_t x = 0; x < 4; x++)
+				{
+					uint32_t s = pSelector->get_selector(x, y);
+					pDst_block->set_selector(x, y, (s == 3) ? h : l);
+				}
+			}
+
+			return;
+		}
+
+		const uint32_t selector_range_table = g_etc1_to_dxt1_selector_range_index[low_selector][high_selector];
+
+		//[32][8][RANGES][MAPPING]
+		const etc1_to_dxt1_56_solution* pTable_r = &g_etc1_to_dxt_5[(inten_table * 32 + base_color.r) * (NUM_ETC1_TO_DXT1_SELECTOR_RANGES * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS];
+		const etc1_to_dxt1_56_solution* pTable_g = &g_etc1_to_dxt_6[(inten_table * 32 + base_color.g) * (NUM_ETC1_TO_DXT1_SELECTOR_RANGES * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS];
+		const etc1_to_dxt1_56_solution* pTable_b = &g_etc1_to_dxt_5[(inten_table * 32 + base_color.b) * (NUM_ETC1_TO_DXT1_SELECTOR_RANGES * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS];
+
+		uint32_t best_err = UINT_MAX;
+		uint32_t best_mapping = 0;
+
+		assert(NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS == 10);
+#define DO_ITER(m) { uint32_t total_err = pTable_r[m].m_err + pTable_g[m].m_err + pTable_b[m].m_err; if (total_err < best_err) { best_err = total_err; best_mapping = m; } }
+		DO_ITER(0); DO_ITER(1); DO_ITER(2); DO_ITER(3); DO_ITER(4);
+		DO_ITER(5); DO_ITER(6); DO_ITER(7); DO_ITER(8); DO_ITER(9);
+#undef DO_ITER
+
+		uint32_t l = dxt1_block::pack_unscaled_color(pTable_r[best_mapping].m_lo, pTable_g[best_mapping].m_lo, pTable_b[best_mapping].m_lo);
+		uint32_t h = dxt1_block::pack_unscaled_color(pTable_r[best_mapping].m_hi, pTable_g[best_mapping].m_hi, pTable_b[best_mapping].m_hi);
+
+		const uint8_t* pSelectors_xlat_256 = &g_etc1_to_dxt1_selector_mappings_raw_dxt1_256[best_mapping][0];
+
+		if (l < h)
+		{
+			std::swap(l, h);
+			pSelectors_xlat_256 = &g_etc1_to_dxt1_selector_mappings_raw_dxt1_inv_256[best_mapping][0];
+		}
+				
+		pDst_block->set_low_color(static_cast<uint16_t>(l));
+		pDst_block->set_high_color(static_cast<uint16_t>(h));
+
+		if (l == h)
+		{
+			uint8_t mask = 0;
+
+			if (!use_threecolor_blocks)
+			{
+				// This is an annoying edge case that impacts BC3.
+
+				// Make l > h
+				if (h > 0)
+					h--;
+				else
+				{
+					// l = h = 0
+					assert(l == h && h == 0);
+
+					h = 0;
+					l = 1;
+					mask = 0x55;
+				}
+
+				assert(l > h);
+				pDst_block->set_low_color(static_cast<uint16_t>(l));
+				pDst_block->set_high_color(static_cast<uint16_t>(h));
+			}
+
+			pDst_block->m_selectors[0] = mask;
+			pDst_block->m_selectors[1] = mask;
+			pDst_block->m_selectors[2] = mask;
+			pDst_block->m_selectors[3] = mask;
+
+			return;
+		}
+
+		pDst_block->m_selectors[0] = pSelectors_xlat_256[pSelector->m_selectors[0]];
+		pDst_block->m_selectors[1] = pSelectors_xlat_256[pSelector->m_selectors[1]];
+		pDst_block->m_selectors[2] = pSelectors_xlat_256[pSelector->m_selectors[2]];
+		pDst_block->m_selectors[3] = pSelectors_xlat_256[pSelector->m_selectors[3]];
+#endif
+	}
+
+#if BASISD_ENABLE_DEBUG_FLAGS
+	static void convert_etc1s_to_dxt1_vis(dxt1_block* pDst_block, const endpoint* pEndpoints, const selector* pSelector, bool use_threecolor_blocks)
+	{
+		convert_etc1s_to_dxt1(pDst_block, pEndpoints, pSelector, use_threecolor_blocks);
+
+		if (g_debug_flags & cDebugFlagVisBC1Sels)
+		{
+			uint32_t l = dxt1_block::pack_unscaled_color(31, 63, 31);
+			uint32_t h = dxt1_block::pack_unscaled_color(0, 0, 0);
+			pDst_block->set_low_color(static_cast<uint16_t>(l));
+			pDst_block->set_high_color(static_cast<uint16_t>(h));
+		}
+		else if (g_debug_flags & cDebugFlagVisBC1Endpoints)
+		{
+			for (uint32_t y = 0; y < 4; y++)
+				for (uint32_t x = 0; x < 4; x++)
+					pDst_block->set_selector(x, y, (y < 2) ? 0 : 1);
+		}
+	}
+#endif
+#endif
+
+#if BASISD_SUPPORT_FXT1
+	struct fxt1_block
+	{
+		union
+		{
+			struct
+			{
+				uint64_t m_t00 : 2;
+				uint64_t m_t01 : 2;
+				uint64_t m_t02 : 2;
+				uint64_t m_t03 : 2;
+				uint64_t m_t04 : 2;
+				uint64_t m_t05 : 2;
+				uint64_t m_t06 : 2;
+				uint64_t m_t07 : 2;
+				uint64_t m_t08 : 2;
+				uint64_t m_t09 : 2;
+				uint64_t m_t10 : 2;
+				uint64_t m_t11 : 2;
+				uint64_t m_t12 : 2;
+				uint64_t m_t13 : 2;
+				uint64_t m_t14 : 2;
+				uint64_t m_t15 : 2;
+				uint64_t m_t16 : 2;
+				uint64_t m_t17 : 2;
+				uint64_t m_t18 : 2;
+				uint64_t m_t19 : 2;
+				uint64_t m_t20 : 2;
+				uint64_t m_t21 : 2;
+				uint64_t m_t22 : 2;
+				uint64_t m_t23 : 2;
+				uint64_t m_t24 : 2;
+				uint64_t m_t25 : 2;
+				uint64_t m_t26 : 2;
+				uint64_t m_t27 : 2;
+				uint64_t m_t28 : 2;
+				uint64_t m_t29 : 2;
+				uint64_t m_t30 : 2;
+				uint64_t m_t31 : 2;
+			} m_lo;
+			uint64_t m_lo_bits;
+			uint8_t m_sels[8];
+		};
+		union
+		{
+			struct
+			{
+#ifdef BASISU_USE_ORIGINAL_3DFX_FXT1_ENCODING
+				uint64_t m_b1 : 5;
+				uint64_t m_g1 : 5;
+				uint64_t m_r1 : 5;
+				uint64_t m_b0 : 5;
+				uint64_t m_g0 : 5;
+				uint64_t m_r0 : 5;
+				uint64_t m_b3 : 5;
+				uint64_t m_g3 : 5;
+				uint64_t m_r3 : 5;
+				uint64_t m_b2 : 5;
+				uint64_t m_g2 : 5;
+				uint64_t m_r2 : 5;
+#else
+				uint64_t m_b0 : 5;
+				uint64_t m_g0 : 5;
+				uint64_t m_r0 : 5;
+				uint64_t m_b1 : 5;
+				uint64_t m_g1 : 5;
+				uint64_t m_r1 : 5;
+				uint64_t m_b2 : 5;
+				uint64_t m_g2 : 5;
+				uint64_t m_r2 : 5;
+				uint64_t m_b3 : 5;
+				uint64_t m_g3 : 5;
+				uint64_t m_r3 : 5;
+#endif
+				uint64_t m_alpha : 1;
+				uint64_t m_glsb : 2;
+				uint64_t m_mode : 1;
+			} m_hi;
+			uint64_t m_hi_bits;
+		};
+	};
+
+	static uint8_t conv_dxt1_to_fxt1_sels(uint32_t sels)
+	{
+		static uint8_t s_conv_table[16] = { 0, 3, 1, 2, 12, 15, 13, 14, 4, 7, 5, 6, 8, 11, 9, 10 };
+		return s_conv_table[sels & 15] | (s_conv_table[sels >> 4] << 4);
+	}
+
+	static void convert_etc1s_to_fxt1(void *pDst, const endpoint *pEndpoints, const selector *pSelectors, uint32_t fxt1_subblock)
+	{
+		fxt1_block* pBlock = static_cast<fxt1_block*>(pDst);
+
+		// CC_MIXED is basically DXT1 with different encoding tricks.
+		// So transcode ETC1S to DXT1, then transcode that to FXT1 which is easy and nearly lossless. 
+		// (It's not completely lossless because FXT1 rounds in its color lerps while DXT1 doesn't, but it should be good enough.)
+		dxt1_block blk;
+		convert_etc1s_to_dxt1(&blk, pEndpoints, pSelectors, false);
+
+		const uint32_t l = blk.get_low_color();
+		const uint32_t h = blk.get_high_color();
+
+		color32 color0((l >> 11) & 31, (l >> 5) & 63, l & 31, 255);
+		color32 color1((h >> 11) & 31, (h >> 5) & 63, h & 31, 255);
+
+		uint32_t g0 = color0.g & 1;
+		uint32_t g1 = color1.g & 1;
+		
+		color0.g >>= 1;
+		color1.g >>= 1;
+
+		blk.m_selectors[0] = conv_dxt1_to_fxt1_sels(blk.m_selectors[0]);
+		blk.m_selectors[1] = conv_dxt1_to_fxt1_sels(blk.m_selectors[1]);
+		blk.m_selectors[2] = conv_dxt1_to_fxt1_sels(blk.m_selectors[2]);
+		blk.m_selectors[3] = conv_dxt1_to_fxt1_sels(blk.m_selectors[3]);
+		
+		if ((blk.get_selector(0, 0) >> 1) != (g0 ^ g1))
+		{
+			std::swap(color0, color1);
+			std::swap(g0, g1);
+
+			blk.m_selectors[0] ^= 0xFF;
+			blk.m_selectors[1] ^= 0xFF;
+			blk.m_selectors[2] ^= 0xFF;
+			blk.m_selectors[3] ^= 0xFF;
+		}
+
+		if (fxt1_subblock == 0)
+		{
+			pBlock->m_hi.m_mode = 1; 
+			pBlock->m_hi.m_alpha = 0;
+			pBlock->m_hi.m_glsb = g1 | (g1 << 1);
+			pBlock->m_hi.m_r0 = color0.r;
+			pBlock->m_hi.m_g0 = color0.g;
+			pBlock->m_hi.m_b0 = color0.b;
+			pBlock->m_hi.m_r1 = color1.r;
+			pBlock->m_hi.m_g1 = color1.g;
+			pBlock->m_hi.m_b1 = color1.b;
+			pBlock->m_hi.m_r2 = color0.r;
+			pBlock->m_hi.m_g2 = color0.g;
+			pBlock->m_hi.m_b2 = color0.b;
+			pBlock->m_hi.m_r3 = color1.r;
+			pBlock->m_hi.m_g3 = color1.g;
+			pBlock->m_hi.m_b3 = color1.b;
+			pBlock->m_sels[0] = blk.m_selectors[0];
+			pBlock->m_sels[1] = blk.m_selectors[1];
+			pBlock->m_sels[2] = blk.m_selectors[2];
+			pBlock->m_sels[3] = blk.m_selectors[3];
+
+			static const uint8_t s_border_dup[4] = { 0, 85, 170, 255 };
+			pBlock->m_sels[4] = s_border_dup[blk.m_selectors[0] >> 6];
+			pBlock->m_sels[5] = s_border_dup[blk.m_selectors[1] >> 6];
+			pBlock->m_sels[6] = s_border_dup[blk.m_selectors[2] >> 6];
+			pBlock->m_sels[7] = s_border_dup[blk.m_selectors[3] >> 6];
+		}
+		else
+		{
+			pBlock->m_hi.m_glsb = (pBlock->m_hi.m_glsb & 1) | (g1 << 1);
+			pBlock->m_hi.m_r2 = color0.r;
+			pBlock->m_hi.m_g2 = color0.g;
+			pBlock->m_hi.m_b2 = color0.b;
+			pBlock->m_hi.m_r3 = color1.r;
+			pBlock->m_hi.m_g3 = color1.g;
+			pBlock->m_hi.m_b3 = color1.b;
+			pBlock->m_sels[4] = blk.m_selectors[0];
+			pBlock->m_sels[5] = blk.m_selectors[1];
+			pBlock->m_sels[6] = blk.m_selectors[2];
+			pBlock->m_sels[7] = blk.m_selectors[3];
+		}
+	}
+#endif // BASISD_SUPPORT_FXT1
+#if BASISD_SUPPORT_DXT5A
+	static dxt_selector_range s_dxt5a_selector_ranges[] =
+	{
+		{ 0, 3 },
+
+		{ 1, 3 },
+		{ 0, 2 },
+
+		{ 1, 2 },
+	};
+
+	const uint32_t NUM_DXT5A_SELECTOR_RANGES = sizeof(s_dxt5a_selector_ranges) / sizeof(s_dxt5a_selector_ranges[0]);
+
+	struct etc1_g_to_dxt5a_conversion
+	{
+		uint8_t m_lo, m_hi;
+		uint16_t m_trans;
+	};
+
+	static etc1_g_to_dxt5a_conversion g_etc1_g_to_dxt5a[32 * 8][NUM_DXT5A_SELECTOR_RANGES] =
+	{
+		{ { 8, 0, 393 },{ 8, 0, 392 },{ 2, 0, 9 },{ 2, 0, 8 }, }, { { 6, 16, 710 },{ 16, 6, 328 },{ 0, 10, 96 },{ 10, 6, 8 }, },
+		{ { 28, 5, 1327 },{ 24, 14, 328 },{ 8, 18, 96 },{ 18, 14, 8 }, }, { { 36, 13, 1327 },{ 32, 22, 328 },{ 16, 26, 96 },{ 26, 22, 8 }, },
+		{ { 45, 22, 1327 },{ 41, 31, 328 },{ 25, 35, 96 },{ 35, 31, 8 }, }, { { 53, 30, 1327 },{ 49, 39, 328 },{ 33, 43, 96 },{ 43, 39, 8 }, },
+		{ { 61, 38, 1327 },{ 57, 47, 328 },{ 41, 51, 96 },{ 51, 47, 8 }, }, { { 69, 46, 1327 },{ 65, 55, 328 },{ 49, 59, 96 },{ 59, 55, 8 }, },
+		{ { 78, 55, 1327 },{ 74, 64, 328 },{ 58, 68, 96 },{ 68, 64, 8 }, }, { { 86, 63, 1327 },{ 82, 72, 328 },{ 66, 76, 96 },{ 76, 72, 8 }, },
+		{ { 94, 71, 1327 },{ 90, 80, 328 },{ 74, 84, 96 },{ 84, 80, 8 }, }, { { 102, 79, 1327 },{ 98, 88, 328 },{ 82, 92, 96 },{ 92, 88, 8 }, },
+		{ { 111, 88, 1327 },{ 107, 97, 328 },{ 91, 101, 96 },{ 101, 97, 8 }, }, { { 119, 96, 1327 },{ 115, 105, 328 },{ 99, 109, 96 },{ 109, 105, 8 }, },
+		{ { 127, 104, 1327 },{ 123, 113, 328 },{ 107, 117, 96 },{ 117, 113, 8 }, }, { { 135, 112, 1327 },{ 131, 121, 328 },{ 115, 125, 96 },{ 125, 121, 8 }, },
+		{ { 144, 121, 1327 },{ 140, 130, 328 },{ 124, 134, 96 },{ 134, 130, 8 }, }, { { 152, 129, 1327 },{ 148, 138, 328 },{ 132, 142, 96 },{ 142, 138, 8 }, },
+		{ { 160, 137, 1327 },{ 156, 146, 328 },{ 140, 150, 96 },{ 150, 146, 8 }, }, { { 168, 145, 1327 },{ 164, 154, 328 },{ 148, 158, 96 },{ 158, 154, 8 }, },
+		{ { 177, 154, 1327 },{ 173, 163, 328 },{ 157, 167, 96 },{ 167, 163, 8 }, }, { { 185, 162, 1327 },{ 181, 171, 328 },{ 165, 175, 96 },{ 175, 171, 8 }, },
+		{ { 193, 170, 1327 },{ 189, 179, 328 },{ 173, 183, 96 },{ 183, 179, 8 }, }, { { 201, 178, 1327 },{ 197, 187, 328 },{ 181, 191, 96 },{ 191, 187, 8 }, },
+		{ { 210, 187, 1327 },{ 206, 196, 328 },{ 190, 200, 96 },{ 200, 196, 8 }, }, { { 218, 195, 1327 },{ 214, 204, 328 },{ 198, 208, 96 },{ 208, 204, 8 }, },
+		{ { 226, 203, 1327 },{ 222, 212, 328 },{ 206, 216, 96 },{ 216, 212, 8 }, }, { { 234, 211, 1327 },{ 230, 220, 328 },{ 214, 224, 96 },{ 224, 220, 8 }, },
+		{ { 243, 220, 1327 },{ 239, 229, 328 },{ 223, 233, 96 },{ 233, 229, 8 }, }, { { 251, 228, 1327 },{ 247, 237, 328 },{ 231, 241, 96 },{ 241, 237, 8 }, },
+		{ { 239, 249, 3680 },{ 245, 249, 3648 },{ 239, 249, 96 },{ 249, 245, 8 }, }, { { 247, 253, 4040 },{ 255, 253, 8 },{ 247, 253, 456 },{ 255, 253, 8 }, },
+		{ { 5, 17, 566 },{ 5, 17, 560 },{ 5, 0, 9 },{ 5, 0, 8 }, }, { { 25, 0, 313 },{ 25, 3, 328 },{ 13, 0, 49 },{ 13, 3, 8 }, },
+		{ { 39, 0, 1329 },{ 33, 11, 328 },{ 11, 21, 70 },{ 21, 11, 8 }, }, { { 47, 7, 1329 },{ 41, 19, 328 },{ 29, 7, 33 },{ 29, 19, 8 }, },
+		{ { 50, 11, 239 },{ 50, 28, 328 },{ 38, 16, 33 },{ 38, 28, 8 }, }, { { 92, 13, 2423 },{ 58, 36, 328 },{ 46, 24, 33 },{ 46, 36, 8 }, },
+		{ { 100, 21, 2423 },{ 66, 44, 328 },{ 54, 32, 33 },{ 54, 44, 8 }, }, { { 86, 7, 1253 },{ 74, 52, 328 },{ 62, 40, 33 },{ 62, 52, 8 }, },
+		{ { 95, 16, 1253 },{ 83, 61, 328 },{ 71, 49, 33 },{ 71, 61, 8 }, }, { { 103, 24, 1253 },{ 91, 69, 328 },{ 79, 57, 33 },{ 79, 69, 8 }, },
+		{ { 111, 32, 1253 },{ 99, 77, 328 },{ 87, 65, 33 },{ 87, 77, 8 }, }, { { 119, 40, 1253 },{ 107, 85, 328 },{ 95, 73, 33 },{ 95, 85, 8 }, },
+		{ { 128, 49, 1253 },{ 116, 94, 328 },{ 104, 82, 33 },{ 104, 94, 8 }, }, { { 136, 57, 1253 },{ 124, 102, 328 },{ 112, 90, 33 },{ 112, 102, 8 }, },
+		{ { 144, 65, 1253 },{ 132, 110, 328 },{ 120, 98, 33 },{ 120, 110, 8 }, }, { { 152, 73, 1253 },{ 140, 118, 328 },{ 128, 106, 33 },{ 128, 118, 8 }, },
+		{ { 161, 82, 1253 },{ 149, 127, 328 },{ 137, 115, 33 },{ 137, 127, 8 }, }, { { 169, 90, 1253 },{ 157, 135, 328 },{ 145, 123, 33 },{ 145, 135, 8 }, },
+		{ { 177, 98, 1253 },{ 165, 143, 328 },{ 153, 131, 33 },{ 153, 143, 8 }, }, { { 185, 106, 1253 },{ 173, 151, 328 },{ 161, 139, 33 },{ 161, 151, 8 }, },
+		{ { 194, 115, 1253 },{ 182, 160, 328 },{ 170, 148, 33 },{ 170, 160, 8 }, }, { { 202, 123, 1253 },{ 190, 168, 328 },{ 178, 156, 33 },{ 178, 168, 8 }, },
+		{ { 210, 131, 1253 },{ 198, 176, 328 },{ 186, 164, 33 },{ 186, 176, 8 }, }, { { 218, 139, 1253 },{ 206, 184, 328 },{ 194, 172, 33 },{ 194, 184, 8 }, },
+		{ { 227, 148, 1253 },{ 215, 193, 328 },{ 203, 181, 33 },{ 203, 193, 8 }, }, { { 235, 156, 1253 },{ 223, 201, 328 },{ 211, 189, 33 },{ 211, 201, 8 }, },
+		{ { 243, 164, 1253 },{ 231, 209, 328 },{ 219, 197, 33 },{ 219, 209, 8 }, }, { { 183, 239, 867 },{ 239, 217, 328 },{ 227, 205, 33 },{ 227, 217, 8 }, },
+		{ { 254, 214, 1329 },{ 248, 226, 328 },{ 236, 214, 33 },{ 236, 226, 8 }, }, { { 222, 244, 3680 },{ 234, 244, 3648 },{ 244, 222, 33 },{ 244, 234, 8 }, },
+		{ { 230, 252, 3680 },{ 242, 252, 3648 },{ 252, 230, 33 },{ 252, 242, 8 }, }, { { 238, 250, 4040 },{ 255, 250, 8 },{ 238, 250, 456 },{ 255, 250, 8 }, },
+		{ { 9, 29, 566 },{ 9, 29, 560 },{ 9, 0, 9 },{ 9, 0, 8 }, }, { { 17, 37, 566 },{ 17, 37, 560 },{ 17, 0, 9 },{ 17, 0, 8 }, },
+		{ { 45, 0, 313 },{ 45, 0, 312 },{ 25, 0, 49 },{ 25, 7, 8 }, }, { { 14, 63, 2758 },{ 5, 53, 784 },{ 15, 33, 70 },{ 33, 15, 8 }, },
+		{ { 71, 6, 1329 },{ 72, 4, 1328 },{ 42, 4, 33 },{ 42, 24, 8 }, }, { { 70, 3, 239 },{ 70, 2, 232 },{ 50, 12, 33 },{ 50, 32, 8 }, },
+		{ { 0, 98, 2842 },{ 78, 10, 232 },{ 58, 20, 33 },{ 58, 40, 8 }, }, { { 97, 27, 1329 },{ 86, 18, 232 },{ 66, 28, 33 },{ 66, 48, 8 }, },
+		{ { 0, 94, 867 },{ 95, 27, 232 },{ 75, 37, 33 },{ 75, 57, 8 }, }, { { 8, 102, 867 },{ 103, 35, 232 },{ 83, 45, 33 },{ 83, 65, 8 }, },
+		{ { 12, 112, 867 },{ 111, 43, 232 },{ 91, 53, 33 },{ 91, 73, 8 }, }, { { 139, 2, 1253 },{ 119, 51, 232 },{ 99, 61, 33 },{ 99, 81, 8 }, },
+		{ { 148, 13, 1253 },{ 128, 60, 232 },{ 108, 70, 33 },{ 108, 90, 8 }, }, { { 156, 21, 1253 },{ 136, 68, 232 },{ 116, 78, 33 },{ 116, 98, 8 }, },
+		{ { 164, 29, 1253 },{ 144, 76, 232 },{ 124, 86, 33 },{ 124, 106, 8 }, }, { { 172, 37, 1253 },{ 152, 84, 232 },{ 132, 94, 33 },{ 132, 114, 8 }, },
+		{ { 181, 46, 1253 },{ 161, 93, 232 },{ 141, 103, 33 },{ 141, 123, 8 }, }, { { 189, 54, 1253 },{ 169, 101, 232 },{ 149, 111, 33 },{ 149, 131, 8 }, },
+		{ { 197, 62, 1253 },{ 177, 109, 232 },{ 157, 119, 33 },{ 157, 139, 8 }, }, { { 205, 70, 1253 },{ 185, 117, 232 },{ 165, 127, 33 },{ 165, 147, 8 }, },
+		{ { 214, 79, 1253 },{ 194, 126, 232 },{ 174, 136, 33 },{ 174, 156, 8 }, }, { { 222, 87, 1253 },{ 202, 134, 232 },{ 182, 144, 33 },{ 182, 164, 8 }, },
+		{ { 230, 95, 1253 },{ 210, 142, 232 },{ 190, 152, 33 },{ 190, 172, 8 }, }, { { 238, 103, 1253 },{ 218, 150, 232 },{ 198, 160, 33 },{ 198, 180, 8 }, },
+		{ { 247, 112, 1253 },{ 227, 159, 232 },{ 207, 169, 33 },{ 207, 189, 8 }, }, { { 255, 120, 1253 },{ 235, 167, 232 },{ 215, 177, 33 },{ 215, 197, 8 }, },
+		{ { 146, 243, 867 },{ 243, 175, 232 },{ 223, 185, 33 },{ 223, 205, 8 }, }, { { 184, 231, 3682 },{ 203, 251, 784 },{ 231, 193, 33 },{ 231, 213, 8 }, },
+		{ { 193, 240, 3682 },{ 222, 240, 3648 },{ 240, 202, 33 },{ 240, 222, 8 }, }, { { 255, 210, 169 },{ 230, 248, 3648 },{ 248, 210, 33 },{ 248, 230, 8 }, },
+		{ { 218, 238, 4040 },{ 255, 238, 8 },{ 218, 238, 456 },{ 255, 238, 8 }, }, { { 226, 246, 4040 },{ 255, 246, 8 },{ 226, 246, 456 },{ 255, 246, 8 }, },
+		{ { 13, 42, 566 },{ 13, 42, 560 },{ 13, 0, 9 },{ 13, 0, 8 }, }, { { 50, 0, 329 },{ 50, 0, 328 },{ 21, 0, 9 },{ 21, 0, 8 }, },
+		{ { 29, 58, 566 },{ 67, 2, 1352 },{ 3, 29, 70 },{ 29, 3, 8 }, }, { { 10, 79, 2758 },{ 76, 11, 1352 },{ 11, 37, 70 },{ 37, 11, 8 }, },
+		{ { 7, 75, 790 },{ 7, 75, 784 },{ 20, 46, 70 },{ 46, 20, 8 }, }, { { 15, 83, 790 },{ 97, 1, 1328 },{ 28, 54, 70 },{ 54, 28, 8 }, },
+		{ { 101, 7, 1329 },{ 105, 9, 1328 },{ 62, 0, 39 },{ 62, 36, 8 }, }, { { 99, 1, 239 },{ 99, 3, 232 },{ 1, 71, 98 },{ 70, 44, 8 }, },
+		{ { 107, 11, 239 },{ 108, 12, 232 },{ 10, 80, 98 },{ 79, 53, 8 }, }, { { 115, 19, 239 },{ 116, 20, 232 },{ 18, 88, 98 },{ 87, 61, 8 }, },
+		{ { 123, 27, 239 },{ 124, 28, 232 },{ 26, 96, 98 },{ 95, 69, 8 }, }, { { 131, 35, 239 },{ 132, 36, 232 },{ 34, 104, 98 },{ 103, 77, 8 }, },
+		{ { 140, 44, 239 },{ 141, 45, 232 },{ 43, 113, 98 },{ 112, 86, 8 }, }, { { 148, 52, 239 },{ 149, 53, 232 },{ 51, 121, 98 },{ 120, 94, 8 }, },
+		{ { 156, 60, 239 },{ 157, 61, 232 },{ 59, 129, 98 },{ 128, 102, 8 }, }, { { 164, 68, 239 },{ 165, 69, 232 },{ 67, 137, 98 },{ 136, 110, 8 }, },
+		{ { 173, 77, 239 },{ 174, 78, 232 },{ 76, 146, 98 },{ 145, 119, 8 }, }, { { 181, 85, 239 },{ 182, 86, 232 },{ 84, 154, 98 },{ 153, 127, 8 }, },
+		{ { 189, 93, 239 },{ 190, 94, 232 },{ 92, 162, 98 },{ 161, 135, 8 }, }, { { 197, 101, 239 },{ 198, 102, 232 },{ 100, 170, 98 },{ 169, 143, 8 }, },
+		{ { 206, 110, 239 },{ 207, 111, 232 },{ 109, 179, 98 },{ 178, 152, 8 }, }, { { 214, 118, 239 },{ 215, 119, 232 },{ 117, 187, 98 },{ 186, 160, 8 }, },
+		{ { 222, 126, 239 },{ 223, 127, 232 },{ 125, 195, 98 },{ 194, 168, 8 }, }, { { 230, 134, 239 },{ 231, 135, 232 },{ 133, 203, 98 },{ 202, 176, 8 }, },
+		{ { 239, 143, 239 },{ 240, 144, 232 },{ 142, 212, 98 },{ 211, 185, 8 }, }, { { 247, 151, 239 },{ 180, 248, 784 },{ 150, 220, 98 },{ 219, 193, 8 }, },
+		{ { 159, 228, 3682 },{ 201, 227, 3648 },{ 158, 228, 98 },{ 227, 201, 8 }, }, { { 181, 249, 3928 },{ 209, 235, 3648 },{ 166, 236, 98 },{ 235, 209, 8 }, },
+		{ { 255, 189, 169 },{ 218, 244, 3648 },{ 175, 245, 98 },{ 244, 218, 8 }, }, { { 197, 226, 4040 },{ 226, 252, 3648 },{ 183, 253, 98 },{ 252, 226, 8 }, },
+		{ { 205, 234, 4040 },{ 255, 234, 8 },{ 205, 234, 456 },{ 255, 234, 8 }, }, { { 213, 242, 4040 },{ 255, 242, 8 },{ 213, 242, 456 },{ 255, 242, 8 }, },
+		{ { 18, 60, 566 },{ 18, 60, 560 },{ 18, 0, 9 },{ 18, 0, 8 }, }, { { 26, 68, 566 },{ 26, 68, 560 },{ 26, 0, 9 },{ 26, 0, 8 }, },
+		{ { 34, 76, 566 },{ 34, 76, 560 },{ 34, 0, 9 },{ 34, 0, 8 }, }, { { 5, 104, 2758 },{ 98, 5, 1352 },{ 42, 0, 57 },{ 42, 6, 8 }, },
+		{ { 92, 0, 313 },{ 93, 1, 312 },{ 15, 51, 70 },{ 51, 15, 8 }, }, { { 3, 101, 790 },{ 3, 101, 784 },{ 0, 59, 88 },{ 59, 23, 8 }, },
+		{ { 14, 107, 790 },{ 11, 109, 784 },{ 31, 67, 70 },{ 67, 31, 8 }, }, { { 19, 117, 790 },{ 19, 117, 784 },{ 39, 75, 70 },{ 75, 39, 8 }, },
+		{ { 28, 126, 790 },{ 28, 126, 784 },{ 83, 5, 33 },{ 84, 48, 8 }, }, { { 132, 0, 239 },{ 36, 134, 784 },{ 91, 13, 33 },{ 92, 56, 8 }, },
+		{ { 142, 4, 239 },{ 44, 142, 784 },{ 99, 21, 33 },{ 100, 64, 8 }, }, { { 150, 12, 239 },{ 52, 150, 784 },{ 107, 29, 33 },{ 108, 72, 8 }, },
+		{ { 159, 21, 239 },{ 61, 159, 784 },{ 116, 38, 33 },{ 117, 81, 8 }, }, { { 167, 29, 239 },{ 69, 167, 784 },{ 124, 46, 33 },{ 125, 89, 8 }, },
+		{ { 175, 37, 239 },{ 77, 175, 784 },{ 132, 54, 33 },{ 133, 97, 8 }, }, { { 183, 45, 239 },{ 85, 183, 784 },{ 140, 62, 33 },{ 141, 105, 8 }, },
+		{ { 192, 54, 239 },{ 94, 192, 784 },{ 149, 71, 33 },{ 150, 114, 8 }, }, { { 200, 62, 239 },{ 102, 200, 784 },{ 157, 79, 33 },{ 158, 122, 8 }, },
+		{ { 208, 70, 239 },{ 110, 208, 784 },{ 165, 87, 33 },{ 166, 130, 8 }, }, { { 216, 78, 239 },{ 118, 216, 784 },{ 173, 95, 33 },{ 174, 138, 8 }, },
+		{ { 225, 87, 239 },{ 127, 225, 784 },{ 182, 104, 33 },{ 183, 147, 8 }, }, { { 233, 95, 239 },{ 135, 233, 784 },{ 190, 112, 33 },{ 191, 155, 8 }, },
+		{ { 241, 103, 239 },{ 143, 241, 784 },{ 198, 120, 33 },{ 199, 163, 8 }, }, { { 111, 208, 3682 },{ 151, 249, 784 },{ 206, 128, 33 },{ 207, 171, 8 }, },
+		{ { 120, 217, 3682 },{ 180, 216, 3648 },{ 215, 137, 33 },{ 216, 180, 8 }, }, { { 128, 225, 3682 },{ 188, 224, 3648 },{ 223, 145, 33 },{ 224, 188, 8 }, },
+		{ { 155, 253, 3928 },{ 196, 232, 3648 },{ 231, 153, 33 },{ 232, 196, 8 }, }, { { 144, 241, 3682 },{ 204, 240, 3648 },{ 239, 161, 33 },{ 240, 204, 8 }, },
+		{ { 153, 250, 3682 },{ 213, 249, 3648 },{ 248, 170, 33 },{ 249, 213, 8 }, }, { { 179, 221, 4040 },{ 255, 221, 8 },{ 179, 221, 456 },{ 255, 221, 8 }, },
+		{ { 187, 229, 4040 },{ 255, 229, 8 },{ 187, 229, 456 },{ 255, 229, 8 }, }, { { 195, 237, 4040 },{ 255, 237, 8 },{ 195, 237, 456 },{ 255, 237, 8 }, },
+		{ { 24, 80, 566 },{ 24, 80, 560 },{ 24, 0, 9 },{ 24, 0, 8 }, }, { { 32, 88, 566 },{ 32, 88, 560 },{ 32, 0, 9 },{ 32, 0, 8 }, },
+		{ { 40, 96, 566 },{ 40, 96, 560 },{ 40, 0, 9 },{ 40, 0, 8 }, }, { { 48, 104, 566 },{ 48, 104, 560 },{ 48, 0, 9 },{ 48, 0, 8 }, },
+		{ { 9, 138, 2758 },{ 130, 7, 1352 },{ 9, 57, 70 },{ 57, 9, 8 }, }, { { 119, 0, 313 },{ 120, 0, 312 },{ 17, 65, 70 },{ 65, 17, 8 }, },
+		{ { 0, 128, 784 },{ 128, 6, 312 },{ 25, 73, 70 },{ 73, 25, 8 }, }, { { 6, 137, 790 },{ 5, 136, 784 },{ 33, 81, 70 },{ 81, 33, 8 }, },
+		{ { 42, 171, 2758 },{ 14, 145, 784 },{ 42, 90, 70 },{ 90, 42, 8 }, }, { { 50, 179, 2758 },{ 22, 153, 784 },{ 50, 98, 70 },{ 98, 50, 8 }, },
+		{ { 58, 187, 2758 },{ 30, 161, 784 },{ 58, 106, 70 },{ 106, 58, 8 }, }, { { 191, 18, 1329 },{ 38, 169, 784 },{ 112, 9, 33 },{ 114, 66, 8 }, },
+		{ { 176, 0, 239 },{ 47, 178, 784 },{ 121, 18, 33 },{ 123, 75, 8 }, }, { { 187, 1, 239 },{ 55, 186, 784 },{ 129, 26, 33 },{ 131, 83, 8 }, },
+		{ { 195, 10, 239 },{ 63, 194, 784 },{ 137, 34, 33 },{ 139, 91, 8 }, }, { { 203, 18, 239 },{ 71, 202, 784 },{ 145, 42, 33 },{ 147, 99, 8 }, },
+		{ { 212, 27, 239 },{ 80, 211, 784 },{ 154, 51, 33 },{ 156, 108, 8 }, }, { { 220, 35, 239 },{ 88, 219, 784 },{ 162, 59, 33 },{ 164, 116, 8 }, },
+		{ { 228, 43, 239 },{ 96, 227, 784 },{ 170, 67, 33 },{ 172, 124, 8 }, }, { { 236, 51, 239 },{ 104, 235, 784 },{ 178, 75, 33 },{ 180, 132, 8 }, },
+		{ { 245, 60, 239 },{ 113, 244, 784 },{ 187, 84, 33 },{ 189, 141, 8 }, }, { { 91, 194, 3680 },{ 149, 197, 3648 },{ 195, 92, 33 },{ 197, 149, 8 }, },
+		{ { 99, 202, 3680 },{ 157, 205, 3648 },{ 203, 100, 33 },{ 205, 157, 8 }, }, { { 107, 210, 3680 },{ 165, 213, 3648 },{ 211, 108, 33 },{ 213, 165, 8 }, },
+		{ { 119, 249, 3928 },{ 174, 222, 3648 },{ 220, 117, 33 },{ 222, 174, 8 }, }, { { 127, 255, 856 },{ 182, 230, 3648 },{ 228, 125, 33 },{ 230, 182, 8 }, },
+		{ { 255, 135, 169 },{ 190, 238, 3648 },{ 236, 133, 33 },{ 238, 190, 8 }, }, { { 140, 243, 3680 },{ 198, 246, 3648 },{ 244, 141, 33 },{ 246, 198, 8 }, },
+		{ { 151, 207, 4040 },{ 255, 207, 8 },{ 151, 207, 456 },{ 255, 207, 8 }, }, { { 159, 215, 4040 },{ 255, 215, 8 },{ 159, 215, 456 },{ 255, 215, 8 }, },
+		{ { 167, 223, 4040 },{ 255, 223, 8 },{ 167, 223, 456 },{ 255, 223, 8 }, }, { { 175, 231, 4040 },{ 255, 231, 8 },{ 175, 231, 456 },{ 255, 231, 8 }, },
+		{ { 33, 106, 566 },{ 33, 106, 560 },{ 33, 0, 9 },{ 33, 0, 8 }, }, { { 41, 114, 566 },{ 41, 114, 560 },{ 41, 0, 9 },{ 41, 0, 8 }, },
+		{ { 49, 122, 566 },{ 49, 122, 560 },{ 49, 0, 9 },{ 49, 0, 8 }, }, { { 57, 130, 566 },{ 57, 130, 560 },{ 57, 0, 9 },{ 57, 0, 8 }, },
+		{ { 66, 139, 566 },{ 66, 139, 560 },{ 66, 0, 9 },{ 66, 0, 8 }, }, { { 74, 147, 566 },{ 170, 7, 1352 },{ 8, 74, 70 },{ 74, 8, 8 }, },
+		{ { 152, 0, 313 },{ 178, 15, 1352 },{ 0, 82, 80 },{ 82, 16, 8 }, }, { { 162, 0, 313 },{ 186, 23, 1352 },{ 24, 90, 70 },{ 90, 24, 8 }, },
+		{ { 0, 171, 784 },{ 195, 32, 1352 },{ 33, 99, 70 },{ 99, 33, 8 }, }, { { 6, 179, 790 },{ 203, 40, 1352 },{ 41, 107, 70 },{ 107, 41, 8 }, },
+		{ { 15, 187, 790 },{ 211, 48, 1352 },{ 115, 0, 41 },{ 115, 49, 8 }, }, { { 61, 199, 710 },{ 219, 56, 1352 },{ 57, 123, 70 },{ 123, 57, 8 }, },
+		{ { 70, 208, 710 },{ 228, 65, 1352 },{ 66, 132, 70 },{ 132, 66, 8 }, }, { { 78, 216, 710 },{ 236, 73, 1352 },{ 74, 140, 70 },{ 140, 74, 8 }, },
+		{ { 86, 224, 710 },{ 244, 81, 1352 },{ 145, 7, 33 },{ 148, 82, 8 }, }, { { 222, 8, 233 },{ 252, 89, 1352 },{ 153, 15, 33 },{ 156, 90, 8 }, },
+		{ { 235, 0, 239 },{ 241, 101, 328 },{ 166, 6, 39 },{ 165, 99, 8 }, }, { { 32, 170, 3680 },{ 249, 109, 328 },{ 0, 175, 98 },{ 173, 107, 8 }, },
+		{ { 40, 178, 3680 },{ 115, 181, 3648 },{ 8, 183, 98 },{ 181, 115, 8 }, }, { { 48, 186, 3680 },{ 123, 189, 3648 },{ 16, 191, 98 },{ 189, 123, 8 }, },
+		{ { 57, 195, 3680 },{ 132, 198, 3648 },{ 25, 200, 98 },{ 198, 132, 8 }, }, { { 67, 243, 3928 },{ 140, 206, 3648 },{ 33, 208, 98 },{ 206, 140, 8 }, },
+		{ { 76, 251, 3928 },{ 148, 214, 3648 },{ 41, 216, 98 },{ 214, 148, 8 }, }, { { 86, 255, 856 },{ 156, 222, 3648 },{ 49, 224, 98 },{ 222, 156, 8 }, },
+		{ { 255, 93, 169 },{ 165, 231, 3648 },{ 58, 233, 98 },{ 231, 165, 8 }, }, { { 98, 236, 3680 },{ 173, 239, 3648 },{ 66, 241, 98 },{ 239, 173, 8 }, },
+		{ { 108, 181, 4040 },{ 181, 247, 3648 },{ 74, 249, 98 },{ 247, 181, 8 }, }, { { 116, 189, 4040 },{ 255, 189, 8 },{ 116, 189, 456 },{ 255, 189, 8 }, },
+		{ { 125, 198, 4040 },{ 255, 198, 8 },{ 125, 198, 456 },{ 255, 198, 8 }, }, { { 133, 206, 4040 },{ 255, 206, 8 },{ 133, 206, 456 },{ 255, 206, 8 }, },
+		{ { 141, 214, 4040 },{ 255, 214, 8 },{ 141, 214, 456 },{ 255, 214, 8 }, }, { { 149, 222, 4040 },{ 255, 222, 8 },{ 149, 222, 456 },{ 255, 222, 8 }, },
+		{ { 47, 183, 566 },{ 47, 183, 560 },{ 47, 0, 9 },{ 47, 0, 8 }, }, { { 55, 191, 566 },{ 55, 191, 560 },{ 55, 0, 9 },{ 55, 0, 8 }, },
+		{ { 63, 199, 566 },{ 63, 199, 560 },{ 63, 0, 9 },{ 63, 0, 8 }, }, { { 71, 207, 566 },{ 71, 207, 560 },{ 71, 0, 9 },{ 71, 0, 8 }, },
+		{ { 80, 216, 566 },{ 80, 216, 560 },{ 80, 0, 9 },{ 80, 0, 8 }, }, { { 88, 224, 566 },{ 88, 224, 560 },{ 88, 0, 9 },{ 88, 0, 8 }, },
+		{ { 3, 233, 710 },{ 3, 233, 704 },{ 2, 96, 70 },{ 96, 2, 8 }, }, { { 11, 241, 710 },{ 11, 241, 704 },{ 10, 104, 70 },{ 104, 10, 8 }, },
+		{ { 20, 250, 710 },{ 20, 250, 704 },{ 19, 113, 70 },{ 113, 19, 8 }, }, { { 27, 121, 3654 },{ 27, 121, 3648 },{ 27, 121, 70 },{ 121, 27, 8 }, },
+		{ { 35, 129, 3654 },{ 35, 129, 3648 },{ 35, 129, 70 },{ 129, 35, 8 }, }, { { 43, 137, 3654 },{ 43, 137, 3648 },{ 43, 137, 70 },{ 137, 43, 8 }, },
+		{ { 52, 146, 3654 },{ 52, 146, 3648 },{ 52, 146, 70 },{ 146, 52, 8 }, }, { { 60, 154, 3654 },{ 60, 154, 3648 },{ 60, 154, 70 },{ 154, 60, 8 }, },
+		{ { 68, 162, 3654 },{ 68, 162, 3648 },{ 68, 162, 70 },{ 162, 68, 8 }, }, { { 76, 170, 3654 },{ 76, 170, 3648 },{ 76, 170, 70 },{ 170, 76, 8 }, },
+		{ { 85, 179, 3654 },{ 85, 179, 3648 },{ 85, 179, 70 },{ 179, 85, 8 }, }, { { 93, 187, 3654 },{ 93, 187, 3648 },{ 93, 187, 70 },{ 187, 93, 8 }, },
+		{ { 101, 195, 3654 },{ 101, 195, 3648 },{ 101, 195, 70 },{ 195, 101, 8 }, }, { { 109, 203, 3654 },{ 109, 203, 3648 },{ 109, 203, 70 },{ 203, 109, 8 }, },
+		{ { 118, 212, 3654 },{ 118, 212, 3648 },{ 118, 212, 70 },{ 212, 118, 8 }, }, { { 126, 220, 3654 },{ 126, 220, 3648 },{ 126, 220, 70 },{ 220, 126, 8 }, },
+		{ { 134, 228, 3654 },{ 134, 228, 3648 },{ 134, 228, 70 },{ 228, 134, 8 }, }, { { 5, 236, 3680 },{ 142, 236, 3648 },{ 5, 236, 96 },{ 236, 142, 8 }, },
+		{ { 14, 245, 3680 },{ 151, 245, 3648 },{ 14, 245, 96 },{ 245, 151, 8 }, }, { { 23, 159, 4040 },{ 159, 253, 3648 },{ 23, 159, 456 },{ 253, 159, 8 }, },
+		{ { 31, 167, 4040 },{ 255, 167, 8 },{ 31, 167, 456 },{ 255, 167, 8 }, }, { { 39, 175, 4040 },{ 255, 175, 8 },{ 39, 175, 456 },{ 255, 175, 8 }, },
+		{ { 48, 184, 4040 },{ 255, 184, 8 },{ 48, 184, 456 },{ 255, 184, 8 }, }, { { 56, 192, 4040 },{ 255, 192, 8 },{ 56, 192, 456 },{ 255, 192, 8 }, },
+		{ { 64, 200, 4040 },{ 255, 200, 8 },{ 64, 200, 456 },{ 255, 200, 8 }, },{ { 72, 208, 4040 },{ 255, 208, 8 },{ 72, 208, 456 },{ 255, 208, 8 }, },
+
+	};
+
+	struct dxt5a_block
+	{
+		uint8_t m_endpoints[2];
+
+		enum { cTotalSelectorBytes = 6 };
+		uint8_t m_selectors[cTotalSelectorBytes];
+
+		inline void clear()
+		{
+			basisu::clear_obj(*this);
+		}
+
+		inline uint32_t get_low_alpha() const
+		{
+			return m_endpoints[0];
+		}
+
+		inline uint32_t get_high_alpha() const
+		{
+			return m_endpoints[1];
+		}
+
+		inline void set_low_alpha(uint32_t i)
+		{
+			assert(i <= UINT8_MAX);
+			m_endpoints[0] = static_cast<uint8_t>(i);
+		}
+
+		inline void set_high_alpha(uint32_t i)
+		{
+			assert(i <= UINT8_MAX);
+			m_endpoints[1] = static_cast<uint8_t>(i);
+		}
+
+		inline bool is_alpha6_block() const { return get_low_alpha() <= get_high_alpha(); }
+
+		uint32_t get_endpoints_as_word() const { return m_endpoints[0] | (m_endpoints[1] << 8); }
+		uint32_t get_selectors_as_word(uint32_t index) { assert(index < 3); return m_selectors[index * 2] | (m_selectors[index * 2 + 1] << 8); }
+
+		inline uint32_t get_selector(uint32_t x, uint32_t y) const
+		{
+			assert((x < 4U) && (y < 4U));
+
+			uint32_t selector_index = (y * 4) + x;
+			uint32_t bit_index = selector_index * cDXT5SelectorBits;
+
+			uint32_t byte_index = bit_index >> 3;
+			uint32_t bit_ofs = bit_index & 7;
+
+			uint32_t v = m_selectors[byte_index];
+			if (byte_index < (cTotalSelectorBytes - 1))
+				v |= (m_selectors[byte_index + 1] << 8);
+
+			return (v >> bit_ofs) & 7;
+		}
+
+		inline void set_selector(uint32_t x, uint32_t y, uint32_t val)
+		{
+			assert((x < 4U) && (y < 4U) && (val < 8U));
+
+			uint32_t selector_index = (y * 4) + x;
+			uint32_t bit_index = selector_index * cDXT5SelectorBits;
+
+			uint32_t byte_index = bit_index >> 3;
+			uint32_t bit_ofs = bit_index & 7;
+
+			uint32_t v = m_selectors[byte_index];
+			if (byte_index < (cTotalSelectorBytes - 1))
+				v |= (m_selectors[byte_index + 1] << 8);
+
+			v &= (~(7 << bit_ofs));
+			v |= (val << bit_ofs);
+
+			m_selectors[byte_index] = static_cast<uint8_t>(v);
+			if (byte_index < (cTotalSelectorBytes - 1))
+				m_selectors[byte_index + 1] = static_cast<uint8_t>(v >> 8);
+		}
+
+		enum { cMaxSelectorValues = 8 };
+
+		static uint32_t get_block_values6(color32* pDst, uint32_t l, uint32_t h)
+		{
+			pDst[0].a = static_cast<uint8_t>(l);
+			pDst[1].a = static_cast<uint8_t>(h);
+			pDst[2].a = static_cast<uint8_t>((l * 4 + h) / 5);
+			pDst[3].a = static_cast<uint8_t>((l * 3 + h * 2) / 5);
+			pDst[4].a = static_cast<uint8_t>((l * 2 + h * 3) / 5);
+			pDst[5].a = static_cast<uint8_t>((l + h * 4) / 5);
+			pDst[6].a = 0;
+			pDst[7].a = 255;
+			return 6;
+		}
+
+		static uint32_t get_block_values8(color32* pDst, uint32_t l, uint32_t h)
+		{
+			pDst[0].a = static_cast<uint8_t>(l);
+			pDst[1].a = static_cast<uint8_t>(h);
+			pDst[2].a = static_cast<uint8_t>((l * 6 + h) / 7);
+			pDst[3].a = static_cast<uint8_t>((l * 5 + h * 2) / 7);
+			pDst[4].a = static_cast<uint8_t>((l * 4 + h * 3) / 7);
+			pDst[5].a = static_cast<uint8_t>((l * 3 + h * 4) / 7);
+			pDst[6].a = static_cast<uint8_t>((l * 2 + h * 5) / 7);
+			pDst[7].a = static_cast<uint8_t>((l + h * 6) / 7);
+			return 8;
+		}
+
+		static uint32_t get_block_values(color32* pDst, uint32_t l, uint32_t h)
+		{
+			if (l > h)
+				return get_block_values8(pDst, l, h);
+			else
+				return get_block_values6(pDst, l, h);
+		}
+	};
+
+	static void convert_etc1s_to_dxt5a(dxt5a_block* pDst_block, const endpoint* pEndpoints, const selector* pSelector)
+	{
+		const uint32_t low_selector = pSelector->m_lo_selector;
+		const uint32_t high_selector = pSelector->m_hi_selector;
+
+		const color32& base_color = pEndpoints->m_color5;
+		const uint32_t inten_table = pEndpoints->m_inten5;
+
+		if (low_selector == high_selector)
+		{
+			uint32_t r;
+			decoder_etc_block::get_block_color5_r(base_color, inten_table, low_selector, r);
+						
+			pDst_block->set_low_alpha(r);
+			pDst_block->set_high_alpha(r);
+			pDst_block->m_selectors[0] = 0;
+			pDst_block->m_selectors[1] = 0;
+			pDst_block->m_selectors[2] = 0;
+			pDst_block->m_selectors[3] = 0;
+			pDst_block->m_selectors[4] = 0;
+			pDst_block->m_selectors[5] = 0;
+			return;
+		}
+		else if (pSelector->m_num_unique_selectors == 2)
+		{
+			color32 block_colors[4];
+
+			decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table);
+
+			const uint32_t r0 = block_colors[low_selector].r;
+			const uint32_t r1 = block_colors[high_selector].r;
+
+			pDst_block->set_low_alpha(r0);
+			pDst_block->set_high_alpha(r1);
+
+			// TODO: Optimize this
+			for (uint32_t y = 0; y < 4; y++)
+			{
+				for (uint32_t x = 0; x < 4; x++)
+				{
+					uint32_t s = pSelector->get_selector(x, y);
+					pDst_block->set_selector(x, y, (s == high_selector) ? 1 : 0);
+				}
+			}
+
+			return;
+		}
+
+		uint32_t selector_range_table = 0;
+		for (selector_range_table = 0; selector_range_table < NUM_DXT5A_SELECTOR_RANGES; selector_range_table++)
+			if ((low_selector == s_dxt5a_selector_ranges[selector_range_table].m_low) && (high_selector == s_dxt5a_selector_ranges[selector_range_table].m_high))
+				break;
+		if (selector_range_table >= NUM_DXT5A_SELECTOR_RANGES)
+			selector_range_table = 0;
+
+		const etc1_g_to_dxt5a_conversion* pTable_entry = &g_etc1_g_to_dxt5a[base_color.r + inten_table * 32][selector_range_table];
+
+		pDst_block->set_low_alpha(pTable_entry->m_lo);
+		pDst_block->set_high_alpha(pTable_entry->m_hi);
+
+		// TODO: Optimize this (like ETC1->BC1)
+		for (uint32_t y = 0; y < 4; y++)
+		{
+			for (uint32_t x = 0; x < 4; x++)
+			{
+				uint32_t s = pSelector->get_selector(x, y);
+
+				uint32_t ds = (pTable_entry->m_trans >> (s * 3)) & 7;
+
+				pDst_block->set_selector(x, y, ds);
+			}
+		}
+	}
+#endif //BASISD_SUPPORT_DXT5A
+
+	// PVRTC
+
+#if BASISD_SUPPORT_PVRTC1 || BASISD_SUPPORT_UASTC
+	static const  uint16_t g_pvrtc_swizzle_table[256] =
+	{
+		0x0000, 0x0001, 0x0004, 0x0005, 0x0010, 0x0011, 0x0014, 0x0015, 0x0040, 0x0041, 0x0044, 0x0045, 0x0050, 0x0051, 0x0054, 0x0055, 0x0100, 0x0101, 0x0104, 0x0105, 0x0110, 0x0111, 0x0114, 0x0115, 0x0140, 0x0141, 0x0144, 0x0145, 0x0150, 0x0151, 0x0154, 0x0155,
+		0x0400, 0x0401, 0x0404, 0x0405, 0x0410, 0x0411, 0x0414, 0x0415, 0x0440, 0x0441, 0x0444, 0x0445, 0x0450, 0x0451, 0x0454, 0x0455, 0x0500, 0x0501, 0x0504, 0x0505, 0x0510, 0x0511, 0x0514, 0x0515, 0x0540, 0x0541, 0x0544, 0x0545, 0x0550, 0x0551, 0x0554, 0x0555,
+		0x1000, 0x1001, 0x1004, 0x1005, 0x1010, 0x1011, 0x1014, 0x1015, 0x1040, 0x1041, 0x1044, 0x1045, 0x1050, 0x1051, 0x1054, 0x1055, 0x1100, 0x1101, 0x1104, 0x1105, 0x1110, 0x1111, 0x1114, 0x1115, 0x1140, 0x1141, 0x1144, 0x1145, 0x1150, 0x1151, 0x1154, 0x1155,
+		0x1400, 0x1401, 0x1404, 0x1405, 0x1410, 0x1411, 0x1414, 0x1415, 0x1440, 0x1441, 0x1444, 0x1445, 0x1450, 0x1451, 0x1454, 0x1455, 0x1500, 0x1501, 0x1504, 0x1505, 0x1510, 0x1511, 0x1514, 0x1515, 0x1540, 0x1541, 0x1544, 0x1545, 0x1550, 0x1551, 0x1554, 0x1555,
+		0x4000, 0x4001, 0x4004, 0x4005, 0x4010, 0x4011, 0x4014, 0x4015, 0x4040, 0x4041, 0x4044, 0x4045, 0x4050, 0x4051, 0x4054, 0x4055, 0x4100, 0x4101, 0x4104, 0x4105, 0x4110, 0x4111, 0x4114, 0x4115, 0x4140, 0x4141, 0x4144, 0x4145, 0x4150, 0x4151, 0x4154, 0x4155,
+		0x4400, 0x4401, 0x4404, 0x4405, 0x4410, 0x4411, 0x4414, 0x4415, 0x4440, 0x4441, 0x4444, 0x4445, 0x4450, 0x4451, 0x4454, 0x4455, 0x4500, 0x4501, 0x4504, 0x4505, 0x4510, 0x4511, 0x4514, 0x4515, 0x4540, 0x4541, 0x4544, 0x4545, 0x4550, 0x4551, 0x4554, 0x4555,
+		0x5000, 0x5001, 0x5004, 0x5005, 0x5010, 0x5011, 0x5014, 0x5015, 0x5040, 0x5041, 0x5044, 0x5045, 0x5050, 0x5051, 0x5054, 0x5055, 0x5100, 0x5101, 0x5104, 0x5105, 0x5110, 0x5111, 0x5114, 0x5115, 0x5140, 0x5141, 0x5144, 0x5145, 0x5150, 0x5151, 0x5154, 0x5155,
+		0x5400, 0x5401, 0x5404, 0x5405, 0x5410, 0x5411, 0x5414, 0x5415, 0x5440, 0x5441, 0x5444, 0x5445, 0x5450, 0x5451, 0x5454, 0x5455, 0x5500, 0x5501, 0x5504, 0x5505, 0x5510, 0x5511, 0x5514, 0x5515, 0x5540, 0x5541, 0x5544, 0x5545, 0x5550, 0x5551, 0x5554, 0x5555
+	};
+
+	// Note we can't use simple calculations to convert PVRTC1 encoded endpoint components to/from 8-bits, due to hardware approximations.
+	static const uint8_t g_pvrtc_5[32] = { 0,8,16,24,33,41,49,57,66,74,82,90,99,107,115,123,132,140,148,156,165,173,181,189,198,206,214,222,231,239,247,255 };
+	static const uint8_t g_pvrtc_4[16] = { 0,16,33,49,66,82,99,115,140,156,173,189,206,222,239,255 };
+	static const uint8_t g_pvrtc_3[8] = { 0,33,74,107,148,181,222,255 };
+	static const uint8_t g_pvrtc_alpha[9] = { 0,34,68,102,136,170,204,238,255 };
+		
+	static const uint8_t g_pvrtc_5_floor[256] =
+	{
+		0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,
+		3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,
+		7,7,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,11,11,11,11,11,11,
+		11,11,11,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,15,15,15,15,15,
+		15,15,15,15,16,16,16,16,16,16,16,16,17,17,17,17,17,17,17,17,18,18,18,18,18,18,18,18,19,19,19,19,
+		19,19,19,19,19,20,20,20,20,20,20,20,20,21,21,21,21,21,21,21,21,22,22,22,22,22,22,22,22,23,23,23,
+		23,23,23,23,23,23,24,24,24,24,24,24,24,24,25,25,25,25,25,25,25,25,26,26,26,26,26,26,26,26,27,27,
+		27,27,27,27,27,27,27,28,28,28,28,28,28,28,28,29,29,29,29,29,29,29,29,30,30,30,30,30,30,30,30,31
+	};
+
+	static const uint8_t g_pvrtc_5_ceil[256] =
+	{
+		0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,
+		4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,8,8,8,8,8,8,
+		8,8,8,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11,11,12,12,12,12,12,
+		12,12,12,12,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,15,15,15,15,15,15,15,15,16,16,16,16,
+		16,16,16,16,16,17,17,17,17,17,17,17,17,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19,20,20,20,
+		20,20,20,20,20,20,21,21,21,21,21,21,21,21,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23,24,24,
+		24,24,24,24,24,24,24,25,25,25,25,25,25,25,25,26,26,26,26,26,26,26,26,27,27,27,27,27,27,27,27,28,
+		28,28,28,28,28,28,28,28,29,29,29,29,29,29,29,29,30,30,30,30,30,30,30,30,31,31,31,31,31,31,31,31
+	};
+		
+	static const uint8_t g_pvrtc_4_floor[256] =
+	{
+		0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+		1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+		3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+		5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,
+		7,7,7,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,9,9,9,9,
+		9,9,9,9,9,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11,
+		11,11,11,11,11,11,11,11,11,11,11,11,11,11,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13,13,
+		13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,15
+	};
+
+	static const uint8_t g_pvrtc_4_ceil[256] =
+	{
+		0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+		2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+		4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,
+		6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,8,8,8,8,
+		8,8,8,8,8,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,10,10,10,
+		10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,12,12,
+		12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,14,
+		14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15
+	};
+		
+	static const uint8_t g_pvrtc_3_floor[256] =
+	{
+		0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+		0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+		1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+		2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+		3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,
+		4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,
+		5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,
+		6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7
+	};
+
+	static const uint8_t g_pvrtc_3_ceil[256] =
+	{
+		0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+		1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+		2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+		3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+		4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,
+		5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,
+		6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,
+		7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
+	};
+		
+	static const uint8_t g_pvrtc_alpha_floor[256] =
+	{
+		0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+		0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+		1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+		2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+		3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+		4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+		5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+		6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,8
+	};
+
+	static const uint8_t g_pvrtc_alpha_ceil[256] =
+	{
+		0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+		1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+		2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+		3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+		4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+		5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+		6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+		7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8
+	};
+
+	struct pvrtc4_block
+	{
+		uint32_t m_modulation;
+		uint32_t m_endpoints;
+
+		pvrtc4_block() : m_modulation(0), m_endpoints(0) { }
+
+		inline bool operator== (const pvrtc4_block& rhs) const
+		{
+			return (m_modulation == rhs.m_modulation) && (m_endpoints == rhs.m_endpoints);
+		}
+
+		inline void clear()
+		{
+			m_modulation = 0;
+			m_endpoints = 0;
+		}
+
+		inline bool get_block_uses_transparent_modulation() const
+		{
+			return (m_endpoints & 1) != 0;
+		}
+
+		inline void set_block_uses_transparent_modulation(bool m)
+		{
+			m_endpoints = (m_endpoints & ~1U) | static_cast<uint32_t>(m);
+		}
+
+		inline bool is_endpoint_opaque(uint32_t endpoint_index) const
+		{
+			static const uint32_t s_bitmasks[2] = { 0x8000U, 0x80000000U };
+			return (m_endpoints & s_bitmasks[basisu::open_range_check(endpoint_index, 2U)]) != 0;
+		}
+
+		inline void set_endpoint_opaque(uint32_t endpoint_index, bool opaque)
+		{
+			assert(endpoint_index < 2);
+			static const uint32_t s_bitmasks[2] = { 0x8000U, 0x80000000U };
+			if (opaque)
+				m_endpoints |= s_bitmasks[endpoint_index];
+			else
+				m_endpoints &= ~s_bitmasks[endpoint_index];
+		}
+
+		inline color32 get_endpoint_5554(uint32_t endpoint_index) const
+		{
+			assert(endpoint_index < 2);
+			static const uint32_t s_endpoint_mask[2] = { 0xFFFE, 0xFFFF };
+			uint32_t packed = (m_endpoints >> (basisu::open_range_check(endpoint_index, 2U) ? 16 : 0)) & s_endpoint_mask[endpoint_index];
+
+			uint32_t r, g, b, a;
+			if (packed & 0x8000)
+			{
+				// opaque 554 or 555
+				r = (packed >> 10) & 31;
+				g = (packed >> 5) & 31;
+				b = packed & 31;
+
+				if (!endpoint_index)
+					b |= (b >> 4);
+
+				a = 0xF;
+			}
+			else
+			{
+				// translucent 4433 or 4443
+				r = (packed >> 7) & 0x1E;
+				g = (packed >> 3) & 0x1E;
+				b = (packed & 0xF) << 1;
+
+				r |= (r >> 4);
+				g |= (g >> 4);
+
+				if (!endpoint_index)
+					b |= (b >> 3);
+				else
+					b |= (b >> 4);
+
+				a = (packed >> 11) & 0xE;
+			}
+
+			assert((r < 32) && (g < 32) && (b < 32) && (a < 16));
+					
+			return color32(r, g, b, a);
+		}
+				
+		inline color32 get_endpoint_8888(uint32_t endpoint_index) const
+		{
+			assert(endpoint_index < 2);
+			static const uint32_t s_endpoint_mask[2] = { 0xFFFE, 0xFFFF };
+			uint32_t packed = (m_endpoints >> (basisu::open_range_check(endpoint_index, 2U) ? 16 : 0)) & s_endpoint_mask[endpoint_index];
+
+			uint32_t r, g, b, a;
+			if (packed & 0x8000)
+			{
+				// opaque 554 or 555
+				// 1RRRRRGGGGGBBBBM
+				// 1RRRRRGGGGGBBBBB
+				r = (packed >> 10) & 31;
+				g = (packed >> 5) & 31;
+				b = packed & 31;
+
+				r = g_pvrtc_5[r];
+				g = g_pvrtc_5[g];
+
+				if (!endpoint_index)
+					b = g_pvrtc_4[b >> 1];
+				else
+					b = g_pvrtc_5[b];
+
+				a = 255;
+			}
+			else
+			{
+				// translucent 4433 or 4443
+				// 0AAA RRRR GGGG BBBM
+				// 0AAA RRRR GGGG BBBB
+				r = (packed >> 8) & 0xF;
+				g = (packed >> 4) & 0xF;
+				b = packed & 0xF;
+				a = (packed >> 12) & 7;
+
+				r = g_pvrtc_4[r];
+				g = g_pvrtc_4[g];
+
+				if (!endpoint_index)
+					b = g_pvrtc_3[b >> 1];
+				else
+					b = g_pvrtc_4[b];
+
+				a = g_pvrtc_alpha[a];
+			}
+			
+			return color32(r, g, b, a);
+		}
+
+		inline uint32_t get_endpoint_l8(uint32_t endpoint_index) const
+		{
+			color32 c(get_endpoint_8888(endpoint_index));
+			return c.r + c.g + c.b + c.a;
+		}
+				
+		inline uint32_t get_opaque_endpoint_l0() const
+		{
+			uint32_t packed = m_endpoints & 0xFFFE;
+
+			uint32_t r, g, b;
+			assert(packed & 0x8000);
+
+			// opaque 554 or 555
+			r = (packed >> 10) & 31;
+			g = (packed >> 5) & 31;
+			b = packed & 31;
+			b |= (b >> 4);
+
+			return r + g + b;
+		}
+
+		inline uint32_t get_opaque_endpoint_l1() const
+		{
+			uint32_t packed = m_endpoints >> 16;
+
+			uint32_t r, g, b;
+			assert(packed & 0x8000);
+
+			// opaque 554 or 555
+			r = (packed >> 10) & 31;
+			g = (packed >> 5) & 31;
+			b = packed & 31;
+
+			return r + g + b;
+		}
+
+		static uint32_t get_component_precision_in_bits(uint32_t c, uint32_t endpoint_index, bool opaque_endpoint)
+		{
+			static const uint32_t s_comp_prec[4][4] =
+			{
+				// R0 G0 B0 A0      R1 G1 B1 A1
+				{ 4, 4, 3, 3 },{ 4, 4, 4, 3 }, // transparent endpoint
+
+				{ 5, 5, 4, 0 },{ 5, 5, 5, 0 }  // opaque endpoint
+			};
+			return s_comp_prec[basisu::open_range_check(endpoint_index, 2U) + (opaque_endpoint * 2)][basisu::open_range_check(c, 4U)];
+		}
+
+		static color32 get_color_precision_in_bits(uint32_t endpoint_index, bool opaque_endpoint)
+		{
+			static const color32 s_color_prec[4] =
+			{
+				color32(4, 4, 3, 3), color32(4, 4, 4, 3), // transparent endpoint
+				color32(5, 5, 4, 0), color32(5, 5, 5, 0)  // opaque endpoint
+			};
+			return s_color_prec[basisu::open_range_check(endpoint_index, 2U) + (opaque_endpoint * 2)];
+		}
+
+		inline void set_opaque_endpoint_floor(uint32_t endpoint_index, const color32& c)
+		{
+			assert(endpoint_index < 2);
+			const uint32_t m = m_endpoints & 1;
+
+			uint32_t r = g_pvrtc_5_floor[c[0]], g = g_pvrtc_5_floor[c[1]], b = c[2];
+
+			if (!endpoint_index)
+				b = g_pvrtc_4_floor[b] << 1;
+			else
+				b = g_pvrtc_5_floor[b];
+
+			// rgba=555 here
+			assert((r < 32) && (g < 32) && (b < 32));
+
+			// 1RRRRRGGGGGBBBBM
+			// 1RRRRRGGGGGBBBBB
+
+			// opaque 554 or 555
+			uint32_t packed = 0x8000 | (r << 10) | (g << 5) | b;
+			if (!endpoint_index)
+				packed = (packed & ~1) | m;
+
+			assert(packed <= 0xFFFF);
+
+			if (endpoint_index)
+				m_endpoints = (m_endpoints & 0xFFFFU) | (packed << 16);
+			else
+				m_endpoints = (m_endpoints & 0xFFFF0000U) | packed;
+		}
+
+		inline void set_opaque_endpoint_ceil(uint32_t endpoint_index, const color32& c)
+		{
+			assert(endpoint_index < 2);
+			const uint32_t m = m_endpoints & 1;
+
+			uint32_t r = g_pvrtc_5_ceil[c[0]], g = g_pvrtc_5_ceil[c[1]], b = c[2];
+
+			if (!endpoint_index)
+				b = g_pvrtc_4_ceil[b] << 1;
+			else
+				b = g_pvrtc_5_ceil[b];
+
+			// rgba=555 here
+			assert((r < 32) && (g < 32) && (b < 32));
+
+			// 1RRRRRGGGGGBBBBM
+			// 1RRRRRGGGGGBBBBB
+
+			// opaque 554 or 555
+			uint32_t packed = 0x8000 | (r << 10) | (g << 5) | b;
+			if (!endpoint_index)
+				packed |= m;
+
+			assert(packed <= 0xFFFF);
+
+			if (endpoint_index)
+				m_endpoints = (m_endpoints & 0xFFFFU) | (packed << 16);
+			else
+				m_endpoints = (m_endpoints & 0xFFFF0000U) | packed;
+		}
+				
+		// opaque endpoints:	554 or 555
+		// transparent endpoints: 3443 or 3444
+		inline void set_endpoint_raw(uint32_t endpoint_index, const color32& c, bool opaque_endpoint)
+		{
+			assert(endpoint_index < 2);
+			const uint32_t m = m_endpoints & 1;
+			uint32_t r = c[0], g = c[1], b = c[2], a = c[3];
+
+			uint32_t packed;
+
+			if (opaque_endpoint)
+			{
+				if (!endpoint_index)
+				{
+					// 554
+					// 1RRRRRGGGGGBBBBM
+					assert((r < 32) && (g < 32) && (b < 16));
+					packed = 0x8000 | (r << 10) | (g << 5) | (b << 1) | m;
+				}
+				else
+				{
+					// 555
+					// 1RRRRRGGGGGBBBBB
+					assert((r < 32) && (g < 32) && (b < 32));
+					packed = 0x8000 | (r << 10) | (g << 5) | b;
+				}
+			}
+			else
+			{
+				if (!endpoint_index)
+				{
+					// 3443
+					// 0AAA RRRR GGGG BBBM
+					assert((r < 16) && (g < 16) && (b < 8) && (a < 8));
+					packed = (a << 12) | (r << 8) | (g << 4) | (b << 1) | m;
+				}
+				else
+				{
+					// 3444
+					// 0AAA RRRR GGGG BBBB
+					assert((r < 16) && (g < 16) && (b < 16) && (a < 8));
+					packed = (a << 12) | (r << 8) | (g << 4) | b;
+				}
+			}
+
+			assert(packed <= 0xFFFF);
+
+			if (endpoint_index)
+				m_endpoints = (m_endpoints & 0xFFFFU) | (packed << 16);
+			else
+				m_endpoints = (m_endpoints & 0xFFFF0000U) | packed;
+		}
+				
+		inline void set_endpoint_floor(uint32_t endpoint_index, const color32& c)
+		{
+			assert(endpoint_index < 2);
+
+			int a = g_pvrtc_alpha_floor[c.a];
+			if (a == 8)
+			{
+				// 554 or 555
+				uint32_t r = g_pvrtc_5_floor[c[0]], g = g_pvrtc_5_floor[c[1]], b = c[2];
+
+				if (!endpoint_index)
+					b = g_pvrtc_4_floor[b];
+				else
+					b = g_pvrtc_5_floor[b];
+
+				set_endpoint_raw(endpoint_index, color32(r, g, b, a), true);
+			}
+			else
+			{
+				// 4433 or 4443
+				uint32_t r = g_pvrtc_4_floor[c[0]], g = g_pvrtc_4_floor[c[1]], b = c[2];
+
+				if (!endpoint_index)
+					b = g_pvrtc_3_floor[b];
+				else
+					b = g_pvrtc_4_floor[b];
+
+				set_endpoint_raw(endpoint_index, color32(r, g, b, a), false);
+			}
+		}
+
+		inline void set_endpoint_ceil(uint32_t endpoint_index, const color32& c)
+		{
+			assert(endpoint_index < 2);
+
+			int a = g_pvrtc_alpha_ceil[c.a];
+			if (a == 8)
+			{
+				// 554 or 555
+				uint32_t r = g_pvrtc_5_ceil[c[0]], g = g_pvrtc_5_ceil[c[1]], b = c[2];
+
+				if (!endpoint_index)
+					b = g_pvrtc_4_ceil[b];
+				else
+					b = g_pvrtc_5_ceil[b];
+
+				set_endpoint_raw(endpoint_index, color32(r, g, b, a), true);
+			}
+			else
+			{
+				// 4433 or 4443
+				uint32_t r = g_pvrtc_4_ceil[c[0]], g = g_pvrtc_4_ceil[c[1]], b = c[2];
+
+				if (!endpoint_index)
+					b = g_pvrtc_3_ceil[b];
+				else
+					b = g_pvrtc_4_ceil[b];
+
+				set_endpoint_raw(endpoint_index, color32(r, g, b, a), false);
+			}
+		}
+
+		inline uint32_t get_modulation(uint32_t x, uint32_t y) const
+		{
+			assert((x < 4) && (y < 4));
+			return (m_modulation >> ((y * 4 + x) * 2)) & 3;
+		}
+
+		// Scaled by 8
+		inline const uint32_t* get_scaled_modulation_values(bool block_uses_transparent_modulation) const
+		{
+			static const uint32_t s_block_scales[2][4] = { { 0, 3, 5, 8 },{ 0, 4, 4, 8 } };
+			return s_block_scales[block_uses_transparent_modulation];
+		}
+
+		// Scaled by 8
+		inline uint32_t get_scaled_modulation(uint32_t x, uint32_t y) const
+		{
+			return get_scaled_modulation_values(get_block_uses_transparent_modulation())[get_modulation(x, y)];
+		}
+
+		inline void set_modulation(uint32_t x, uint32_t y, uint32_t s)
+		{
+			assert((x < 4) && (y < 4) && (s < 4));
+			uint32_t n = (y * 4 + x) * 2;
+			m_modulation = (m_modulation & (~(3 << n))) | (s << n);
+			assert(get_modulation(x, y) == s);
+		}
+
+		// Assumes modulation was initialized to 0
+		inline void set_modulation_fast(uint32_t x, uint32_t y, uint32_t s)
+		{
+			assert((x < 4) && (y < 4) && (s < 4));
+			uint32_t n = (y * 4 + x) * 2;
+			m_modulation |= (s << n);
+			assert(get_modulation(x, y) == s);
+		}
+	};
+
+#if 0
+	static const uint8_t g_pvrtc_bilinear_weights[16][4] =
+	{
+		{ 4, 4, 4, 4 }, { 2, 6, 2, 6 }, { 8, 0, 8, 0 }, { 6, 2, 6, 2 },
+		{ 2, 2, 6, 6 }, { 1, 3, 3, 9 }, { 4, 0, 12, 0 }, { 3, 1, 9, 3 },
+		{ 8, 8, 0, 0 }, { 4, 12, 0, 0 }, { 16, 0, 0, 0 }, { 12, 4, 0, 0 },
+		{ 6, 6, 2, 2 }, { 3, 9, 1, 3 }, { 12, 0, 4, 0 }, { 9, 3, 3, 1 },
+	};
+#endif
+
+	struct pvrtc1_temp_block
+	{
+		decoder_etc_block m_etc1_block;
+		uint32_t m_pvrtc_endpoints;
+	};
+
+	static inline uint32_t get_opaque_endpoint_l0(uint32_t endpoints)
+	{
+		uint32_t packed = endpoints;
+
+		uint32_t r, g, b;
+		assert(packed & 0x8000);
+
+		r = (packed >> 10) & 31;
+		g = (packed >> 5) & 31;
+		b = packed & 30;
+		b |= (b >> 4);
+
+		return r + g + b;
+	}
+
+	static inline uint32_t get_opaque_endpoint_l1(uint32_t endpoints)
+	{
+		uint32_t packed = endpoints >> 16;
+
+		uint32_t r, g, b;
+		assert(packed & 0x8000);
+
+		r = (packed >> 10) & 31;
+		g = (packed >> 5) & 31;
+		b = packed & 31;
+
+		return r + g + b;
+	}
+
+	static color32 get_endpoint_8888(uint32_t endpoints, uint32_t endpoint_index)
+	{
+		assert(endpoint_index < 2);
+		static const uint32_t s_endpoint_mask[2] = { 0xFFFE, 0xFFFF };
+		uint32_t packed = (endpoints >> (basisu::open_range_check(endpoint_index, 2U) ? 16 : 0)) & s_endpoint_mask[endpoint_index];
+
+		uint32_t r, g, b, a;
+		if (packed & 0x8000)
+		{
+			// opaque 554 or 555
+			// 1RRRRRGGGGGBBBBM
+			// 1RRRRRGGGGGBBBBB
+			r = (packed >> 10) & 31;
+			g = (packed >> 5) & 31;
+			b = packed & 31;
+
+			r = g_pvrtc_5[r];
+			g = g_pvrtc_5[g];
+
+			if (!endpoint_index)
+				b = g_pvrtc_4[b >> 1];
+			else
+				b = g_pvrtc_5[b];
+
+			a = 255;
+		}
+		else
+		{
+			// translucent 4433 or 4443
+			// 0AAA RRRR GGGG BBBM
+			// 0AAA RRRR GGGG BBBB
+			r = (packed >> 8) & 0xF;
+			g = (packed >> 4) & 0xF;
+			b = packed & 0xF;
+			a = (packed >> 12) & 7;
+
+			r = g_pvrtc_4[r];
+			g = g_pvrtc_4[g];
+
+			if (!endpoint_index)
+				b = g_pvrtc_3[b >> 1];
+			else
+				b = g_pvrtc_4[b];
+
+			a = g_pvrtc_alpha[a];
+		}
+
+		return color32(r, g, b, a);
+	}
+
+	static uint32_t get_endpoint_l8(uint32_t endpoints, uint32_t endpoint_index)
+	{
+		color32 c(get_endpoint_8888(endpoints, endpoint_index));
+		return c.r + c.g + c.b + c.a;
+	}
+#endif
+
+#if BASISD_SUPPORT_PVRTC1
+	// TODO: Support decoding a non-pow2 ETC1S texture into the next larger pow2 PVRTC texture.
+	static void fixup_pvrtc1_4_modulation_rgb(const decoder_etc_block* pETC_Blocks, const uint32_t* pPVRTC_endpoints, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y)
+	{
+		const uint32_t x_mask = num_blocks_x - 1;
+		const uint32_t y_mask = num_blocks_y - 1;
+		const uint32_t x_bits = basisu::total_bits(x_mask);
+		const uint32_t y_bits = basisu::total_bits(y_mask);
+		const uint32_t min_bits = basisu::minimum(x_bits, y_bits);
+		//const uint32_t max_bits = basisu::maximum(x_bits, y_bits);
+		const uint32_t swizzle_mask = (1 << (min_bits * 2)) - 1;
+
+		uint32_t block_index = 0;
+
+		// really 3x3
+		int e0[4][4], e1[4][4];
+
+		for (int y = 0; y < static_cast<int>(num_blocks_y); y++)
+		{
+			const uint32_t* pE_rows[3];
+
+			for (int ey = 0; ey < 3; ey++)
+			{
+				int by = y + ey - 1; 
+
+				const uint32_t* pE = &pPVRTC_endpoints[(by & y_mask) * num_blocks_x];
+
+				pE_rows[ey] = pE;
+
+				for (int ex = 0; ex < 3; ex++)
+				{
+					int bx = 0 + ex - 1; 
+
+					const uint32_t e = pE[bx & x_mask];
+
+					e0[ex][ey] = (get_opaque_endpoint_l0(e) * 255) / 31;
+					e1[ex][ey] = (get_opaque_endpoint_l1(e) * 255) / 31;
+				}
+			}
+
+			const uint32_t y_swizzle = (g_pvrtc_swizzle_table[y >> 8] << 16) | g_pvrtc_swizzle_table[y & 0xFF];
+
+			for (int x = 0; x < static_cast<int>(num_blocks_x); x++, block_index++)
+			{
+				const decoder_etc_block& src_block = pETC_Blocks[block_index];
+
+				const uint32_t x_swizzle = (g_pvrtc_swizzle_table[x >> 8] << 17) | (g_pvrtc_swizzle_table[x & 0xFF] << 1);
+
+				uint32_t swizzled = x_swizzle | y_swizzle;
+				if (num_blocks_x != num_blocks_y)
+				{
+					swizzled &= swizzle_mask;
+
+					if (num_blocks_x > num_blocks_y)
+						swizzled |= ((x >> min_bits) << (min_bits * 2));
+					else
+						swizzled |= ((y >> min_bits) << (min_bits * 2));
+				}
+
+				pvrtc4_block* pDst_block = static_cast<pvrtc4_block*>(pDst_blocks) + swizzled;
+				pDst_block->m_endpoints = pPVRTC_endpoints[block_index];
+
+				uint32_t base_r = g_etc_5_to_8[src_block.m_differential.m_red1];
+				uint32_t base_g = g_etc_5_to_8[src_block.m_differential.m_green1];
+				uint32_t base_b = g_etc_5_to_8[src_block.m_differential.m_blue1];
+
+				const int* pInten_table48 = g_etc1_inten_tables48[src_block.m_differential.m_cw1];
+				int by = (base_r + base_g + base_b) * 16;
+				int block_colors_y_x16[4];
+				block_colors_y_x16[0] = by + pInten_table48[2];
+				block_colors_y_x16[1] = by + pInten_table48[3];
+				block_colors_y_x16[2] = by + pInten_table48[1];
+				block_colors_y_x16[3] = by + pInten_table48[0];
+
+				{
+					const uint32_t ex = 2;
+					int bx = x + ex - 1;
+					bx &= x_mask;
+
+#define DO_ROW(ey) \
+					{ \
+						const uint32_t e = pE_rows[ey][bx]; \
+						e0[ex][ey] = (get_opaque_endpoint_l0(e) * 255) / 31; \
+						e1[ex][ey] = (get_opaque_endpoint_l1(e) * 255) / 31; \
+					}
+
+					DO_ROW(0);
+					DO_ROW(1);
+					DO_ROW(2);
+#undef DO_ROW
+				}
+
+				uint32_t mod = 0;
+
+				uint32_t lookup_x[4];
+
+#define DO_LOOKUP(lx) { \
+					const uint32_t byte_ofs = 7 - (((lx) * 4) >> 3); \
+					const uint32_t lsb_bits = src_block.m_bytes[byte_ofs] >> (((lx) & 1) * 4); \
+					const uint32_t msb_bits = src_block.m_bytes[byte_ofs - 2] >> (((lx) & 1) * 4); \
+					lookup_x[lx] = (lsb_bits & 0xF) | ((msb_bits & 0xF) << 4); }
+
+				DO_LOOKUP(0);
+				DO_LOOKUP(1);
+				DO_LOOKUP(2);
+				DO_LOOKUP(3);
+#undef DO_LOOKUP
+
+#define DO_PIX(lx, ly, w0, w1, w2, w3) \
+				{ \
+					int ca_l = a0 * w0 + a1 * w1 + a2 * w2 + a3 * w3; \
+					int cb_l = b0 * w0 + b1 * w1 + b2 * w2 + b3 * w3; \
+					int cl = block_colors_y_x16[g_etc1_x_selector_unpack[ly][lookup_x[lx]]]; \
+					int dl = cb_l - ca_l; \
+					int vl = cl - ca_l; \
+					int p = vl * 16; \
+					if (ca_l > cb_l) { p = -p; dl = -dl; } \
+					uint32_t m = 0; \
+					if (p > 3 * dl) m = (uint32_t)(1 << ((ly) * 8 + (lx) * 2)); \
+					if (p > 8 * dl) m = (uint32_t)(2 << ((ly) * 8 + (lx) * 2)); \
+					if (p > 13 * dl) m = (uint32_t)(3 << ((ly) * 8 + (lx) * 2)); \
+					mod |= m; \
+				}
+
+				{
+					const uint32_t ex = 0, ey = 0;
+					const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
+					const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
+					DO_PIX(0, 0, 4, 4, 4, 4);
+					DO_PIX(1, 0, 2, 6, 2, 6);
+					DO_PIX(0, 1, 2, 2, 6, 6);
+					DO_PIX(1, 1, 1, 3, 3, 9);
+				}
+
+				{
+					const uint32_t ex = 1, ey = 0;
+					const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
+					const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
+					DO_PIX(2, 0, 8, 0, 8, 0);
+					DO_PIX(3, 0, 6, 2, 6, 2);
+					DO_PIX(2, 1, 4, 0, 12, 0);
+					DO_PIX(3, 1, 3, 1, 9, 3);
+				}
+
+				{
+					const uint32_t ex = 0, ey = 1;
+					const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
+					const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
+					DO_PIX(0, 2, 8, 8, 0, 0);
+					DO_PIX(1, 2, 4, 12, 0, 0);
+					DO_PIX(0, 3, 6, 6, 2, 2);
+					DO_PIX(1, 3, 3, 9, 1, 3);
+				}
+
+				{
+					const uint32_t ex = 1, ey = 1;
+					const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
+					const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
+					DO_PIX(2, 2, 16, 0, 0, 0);
+					DO_PIX(3, 2, 12, 4, 0, 0);
+					DO_PIX(2, 3, 12, 0, 4, 0);
+					DO_PIX(3, 3, 9, 3, 3, 1);
+				}
+#undef DO_PIX
+
+				pDst_block->m_modulation = mod;
+
+				e0[0][0] = e0[1][0]; e0[1][0] = e0[2][0];
+				e0[0][1] = e0[1][1]; e0[1][1] = e0[2][1];
+				e0[0][2] = e0[1][2]; e0[1][2] = e0[2][2];
+
+				e1[0][0] = e1[1][0]; e1[1][0] = e1[2][0];
+				e1[0][1] = e1[1][1]; e1[1][1] = e1[2][1];
+				e1[0][2] = e1[1][2]; e1[1][2] = e1[2][2];
+
+			} // x
+		} // y
+	}
+
+	static void fixup_pvrtc1_4_modulation_rgba(
+		const decoder_etc_block* pETC_Blocks, 
+		const uint32_t* pPVRTC_endpoints, 
+		void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, void *pAlpha_blocks,
+		const endpoint* pEndpoints, const selector* pSelectors)
+	{
+		const uint32_t x_mask = num_blocks_x - 1;
+		const uint32_t y_mask = num_blocks_y - 1;
+		const uint32_t x_bits = basisu::total_bits(x_mask);
+		const uint32_t y_bits = basisu::total_bits(y_mask);
+		const uint32_t min_bits = basisu::minimum(x_bits, y_bits);
+		//const uint32_t max_bits = basisu::maximum(x_bits, y_bits);
+		const uint32_t swizzle_mask = (1 << (min_bits * 2)) - 1;
+
+		uint32_t block_index = 0;
+
+		// really 3x3
+		int e0[4][4], e1[4][4];
+
+		for (int y = 0; y < static_cast<int>(num_blocks_y); y++)
+		{
+			const uint32_t* pE_rows[3];
+
+			for (int ey = 0; ey < 3; ey++)
+			{
+				int by = y + ey - 1; 
+
+				const uint32_t* pE = &pPVRTC_endpoints[(by & y_mask) * num_blocks_x];
+
+				pE_rows[ey] = pE;
+
+				for (int ex = 0; ex < 3; ex++)
+				{
+					int bx = 0 + ex - 1; 
+
+					const uint32_t e = pE[bx & x_mask];
+
+					e0[ex][ey] = get_endpoint_l8(e, 0);
+					e1[ex][ey] = get_endpoint_l8(e, 1);
+				}
+			}
+
+			const uint32_t y_swizzle = (g_pvrtc_swizzle_table[y >> 8] << 16) | g_pvrtc_swizzle_table[y & 0xFF];
+
+			for (int x = 0; x < static_cast<int>(num_blocks_x); x++, block_index++)
+			{
+				const decoder_etc_block& src_block = pETC_Blocks[block_index];
+				
+				const uint16_t* pSrc_alpha_block = reinterpret_cast<const uint16_t*>(static_cast<const uint32_t*>(pAlpha_blocks) + x + (y * num_blocks_x));
+				const endpoint* pAlpha_endpoints = &pEndpoints[pSrc_alpha_block[0]];
+				const selector* pAlpha_selectors = &pSelectors[pSrc_alpha_block[1]];
+				
+				const uint32_t x_swizzle = (g_pvrtc_swizzle_table[x >> 8] << 17) | (g_pvrtc_swizzle_table[x & 0xFF] << 1);
+				
+				uint32_t swizzled = x_swizzle | y_swizzle;
+				if (num_blocks_x != num_blocks_y)
+				{
+					swizzled &= swizzle_mask;
+
+					if (num_blocks_x > num_blocks_y)
+						swizzled |= ((x >> min_bits) << (min_bits * 2));
+					else
+						swizzled |= ((y >> min_bits) << (min_bits * 2));
+				}
+
+				pvrtc4_block* pDst_block = static_cast<pvrtc4_block*>(pDst_blocks) + swizzled;
+				pDst_block->m_endpoints = pPVRTC_endpoints[block_index];
+
+				uint32_t base_r = g_etc_5_to_8[src_block.m_differential.m_red1];
+				uint32_t base_g = g_etc_5_to_8[src_block.m_differential.m_green1];
+				uint32_t base_b = g_etc_5_to_8[src_block.m_differential.m_blue1];
+
+				const int* pInten_table48 = g_etc1_inten_tables48[src_block.m_differential.m_cw1];
+				int by = (base_r + base_g + base_b) * 16;
+				int block_colors_y_x16[4];
+				block_colors_y_x16[0] = basisu::clamp<int>(by + pInten_table48[0], 0, 48 * 255);
+				block_colors_y_x16[1] = basisu::clamp<int>(by + pInten_table48[1], 0, 48 * 255);
+				block_colors_y_x16[2] = basisu::clamp<int>(by + pInten_table48[2], 0, 48 * 255);
+				block_colors_y_x16[3] = basisu::clamp<int>(by + pInten_table48[3], 0, 48 * 255);
+
+				uint32_t alpha_base_g = g_etc_5_to_8[pAlpha_endpoints->m_color5.g] * 16;
+				const int* pInten_table16 = g_etc1_inten_tables16[pAlpha_endpoints->m_inten5];
+				int alpha_block_colors_x16[4];
+				alpha_block_colors_x16[0] = basisu::clamp<int>(alpha_base_g + pInten_table16[0], 0, 16 * 255);
+				alpha_block_colors_x16[1] = basisu::clamp<int>(alpha_base_g + pInten_table16[1], 0, 16 * 255);
+				alpha_block_colors_x16[2] = basisu::clamp<int>(alpha_base_g + pInten_table16[2], 0, 16 * 255);
+				alpha_block_colors_x16[3] = basisu::clamp<int>(alpha_base_g + pInten_table16[3], 0, 16 * 255);
+
+				// clamp((base_r + base_g + base_b) * 16 + color_inten[s] * 48) + clamp(alpha_base_g * 16 + alpha_inten[as] * 16)
+
+				{
+					const uint32_t ex = 2;
+					int bx = x + ex - 1;
+					bx &= x_mask;
+
+#define DO_ROW(ey) \
+					{ \
+						const uint32_t e = pE_rows[ey][bx]; \
+						e0[ex][ey] = get_endpoint_l8(e, 0); \
+						e1[ex][ey] = get_endpoint_l8(e, 1); \
+					}
+
+					DO_ROW(0);
+					DO_ROW(1);
+					DO_ROW(2);
+#undef DO_ROW
+				}
+
+				uint32_t mod = 0;
+
+#define DO_PIX(lx, ly, w0, w1, w2, w3) \
+				{ \
+					int ca_l = a0 * w0 + a1 * w1 + a2 * w2 + a3 * w3; \
+					int cb_l = b0 * w0 + b1 * w1 + b2 * w2 + b3 * w3; \
+					int cl = block_colors_y_x16[(src_block.m_bytes[4 + ly] >> (lx * 2)) & 3] + alpha_block_colors_x16[(pAlpha_selectors->m_selectors[ly] >> (lx * 2)) & 3]; \
+					int dl = cb_l - ca_l; \
+					int vl = cl - ca_l; \
+					int p = vl * 16; \
+					if (ca_l > cb_l) { p = -p; dl = -dl; } \
+					uint32_t m = 0; \
+					if (p > 3 * dl) m = (uint32_t)(1 << ((ly) * 8 + (lx) * 2)); \
+					if (p > 8 * dl) m = (uint32_t)(2 << ((ly) * 8 + (lx) * 2)); \
+					if (p > 13 * dl) m = (uint32_t)(3 << ((ly) * 8 + (lx) * 2)); \
+					mod |= m; \
+				}
+
+				{
+					const uint32_t ex = 0, ey = 0;
+					const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
+					const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
+					DO_PIX(0, 0, 4, 4, 4, 4);
+					DO_PIX(1, 0, 2, 6, 2, 6);
+					DO_PIX(0, 1, 2, 2, 6, 6);
+					DO_PIX(1, 1, 1, 3, 3, 9);
+				}
+
+				{
+					const uint32_t ex = 1, ey = 0;
+					const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
+					const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
+					DO_PIX(2, 0, 8, 0, 8, 0);
+					DO_PIX(3, 0, 6, 2, 6, 2);
+					DO_PIX(2, 1, 4, 0, 12, 0);
+					DO_PIX(3, 1, 3, 1, 9, 3);
+				}
+
+				{
+					const uint32_t ex = 0, ey = 1;
+					const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
+					const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
+					DO_PIX(0, 2, 8, 8, 0, 0);
+					DO_PIX(1, 2, 4, 12, 0, 0);
+					DO_PIX(0, 3, 6, 6, 2, 2);
+					DO_PIX(1, 3, 3, 9, 1, 3);
+				}
+
+				{
+					const uint32_t ex = 1, ey = 1;
+					const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
+					const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
+					DO_PIX(2, 2, 16, 0, 0, 0);
+					DO_PIX(3, 2, 12, 4, 0, 0);
+					DO_PIX(2, 3, 12, 0, 4, 0);
+					DO_PIX(3, 3, 9, 3, 3, 1);
+				}
+#undef DO_PIX
+
+				pDst_block->m_modulation = mod;
+
+				e0[0][0] = e0[1][0]; e0[1][0] = e0[2][0];
+				e0[0][1] = e0[1][1]; e0[1][1] = e0[2][1];
+				e0[0][2] = e0[1][2]; e0[1][2] = e0[2][2];
+
+				e1[0][0] = e1[1][0]; e1[1][0] = e1[2][0];
+				e1[0][1] = e1[1][1]; e1[1][1] = e1[2][1];
+				e1[0][2] = e1[1][2]; e1[1][2] = e1[2][2];
+
+			} // x
+		} // y
+	}
+#endif // BASISD_SUPPORT_PVRTC1
+
+#if BASISD_SUPPORT_BC7_MODE5
+	static dxt_selector_range g_etc1_to_bc7_m5_selector_ranges[] =
+	{
+		{ 0, 3 },
+		{ 1, 3 },
+		{ 0, 2 },
+		{ 1, 2 },
+		{ 2, 3 },
+		{ 0, 1 },
+	};
+
+	const uint32_t NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES = sizeof(g_etc1_to_bc7_m5_selector_ranges) / sizeof(g_etc1_to_bc7_m5_selector_ranges[0]);
+
+	static uint32_t g_etc1_to_bc7_m5_selector_range_index[4][4];
+	
+	const uint32_t NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS = 10;
+	static const uint8_t g_etc1_to_bc7_m5_selector_mappings[NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS][4] =
+	{
+		{ 0, 0, 1, 1 },
+		{ 0, 0, 1, 2 },
+		{ 0, 0, 1, 3 },
+		{ 0, 0, 2, 3 },
+		{ 0, 1, 1, 1 },
+		{ 0, 1, 2, 2 },
+		{ 0, 1, 2, 3 },
+		{ 0, 2, 3, 3 },
+		{ 1, 2, 2, 2 },
+		{ 1, 2, 3, 3 },
+	};
+
+	struct etc1_to_bc7_m5_solution
+	{
+		uint8_t m_lo;
+		uint8_t m_hi;
+		uint16_t m_err;
+	};
+		
+	static const etc1_to_bc7_m5_solution g_etc1_to_bc7_m5_color[32 * 8 * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS * NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES] = {
+#include "basisu_transcoder_tables_bc7_m5_color.inc"
+	};
+	
+	static dxt_selector_range g_etc1_to_bc7_m5a_selector_ranges[] =
+	{
+		{ 0, 3 },
+		{ 1, 3 },
+		{ 0, 2 },
+		{ 1, 2 },
+		{ 2, 3 },
+		{ 0, 1 }
+	};
+
+	const uint32_t NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES = sizeof(g_etc1_to_bc7_m5a_selector_ranges) / sizeof(g_etc1_to_bc7_m5a_selector_ranges[0]);
+
+	static uint32_t g_etc1_to_bc7_m5a_selector_range_index[4][4];
+
+	struct etc1_g_to_bc7_m5a_conversion
+	{
+		uint8_t m_lo, m_hi;
+		uint8_t m_trans;
+	};
+
+	static etc1_g_to_bc7_m5a_conversion g_etc1_g_to_bc7_m5a[8 * 32 * NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES] =
+	{
+		#include "basisu_transcoder_tables_bc7_m5_alpha.inc"
+	};
+	
+	static inline uint32_t set_block_bits(uint8_t* pBytes, uint32_t val, uint32_t num_bits, uint32_t cur_ofs)
+	{
+		assert(num_bits < 32);
+		assert(val < (1ULL << num_bits));
+
+		uint32_t mask = static_cast<uint32_t>((1ULL << num_bits) - 1);
+
+		while (num_bits)
+		{
+			const uint32_t n = basisu::minimum<uint32_t>(8 - (cur_ofs & 7), num_bits);
+
+			pBytes[cur_ofs >> 3] &= ~static_cast<uint8_t>(mask << (cur_ofs & 7));
+			pBytes[cur_ofs >> 3] |= static_cast<uint8_t>(val << (cur_ofs & 7));
+
+			val >>= n;
+			mask >>= n;
+
+			num_bits -= n;
+			cur_ofs += n;
+		}
+
+		return cur_ofs;
+	}
+
+	struct bc7_mode_5
+	{
+		union
+		{
+			struct
+			{
+				uint64_t m_mode : 6;
+				uint64_t m_rot : 2;
+
+				uint64_t m_r0 : 7;
+				uint64_t m_r1 : 7;
+				uint64_t m_g0 : 7;
+				uint64_t m_g1 : 7;
+				uint64_t m_b0 : 7;
+				uint64_t m_b1 : 7;
+				uint64_t m_a0 : 8;
+				uint64_t m_a1_0 : 6;
+
+			} m_lo;
+
+			uint64_t m_lo_bits;
+		};
+
+		union
+		{
+			struct
+			{
+				uint64_t m_a1_1 : 2;
+
+				// bit 2
+				uint64_t m_c00 : 1;
+				uint64_t m_c10 : 2;
+				uint64_t m_c20 : 2;
+				uint64_t m_c30 : 2;
+
+				uint64_t m_c01 : 2;
+				uint64_t m_c11 : 2;
+				uint64_t m_c21 : 2;
+				uint64_t m_c31 : 2;
+
+				uint64_t m_c02 : 2;
+				uint64_t m_c12 : 2;
+				uint64_t m_c22 : 2;
+				uint64_t m_c32 : 2;
+
+				uint64_t m_c03 : 2;
+				uint64_t m_c13 : 2;
+				uint64_t m_c23 : 2;
+				uint64_t m_c33 : 2;
+
+				// bit 33
+				uint64_t m_a00 : 1;
+				uint64_t m_a10 : 2;
+				uint64_t m_a20 : 2;
+				uint64_t m_a30 : 2;
+
+				uint64_t m_a01 : 2;
+				uint64_t m_a11 : 2;
+				uint64_t m_a21 : 2;
+				uint64_t m_a31 : 2;
+
+				uint64_t m_a02 : 2;
+				uint64_t m_a12 : 2;
+				uint64_t m_a22 : 2;
+				uint64_t m_a32 : 2;
+
+				uint64_t m_a03 : 2;
+				uint64_t m_a13 : 2;
+				uint64_t m_a23 : 2;
+				uint64_t m_a33 : 2;
+
+			} m_hi;
+
+			uint64_t m_hi_bits;
+		};
+	};
+
+#if BASISD_WRITE_NEW_BC7_MODE5_TABLES
+	static void create_etc1_to_bc7_m5_color_conversion_table()
+	{
+		FILE* pFile = nullptr;
+		fopen_s(&pFile, "basisu_transcoder_tables_bc7_m5_color.inc", "w");
+
+		uint32_t n = 0;
+
+		for (int inten = 0; inten < 8; inten++)
+		{
+			for (uint32_t g = 0; g < 32; g++)
+			{
+				color32 block_colors[4];
+				decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
+
+				for (uint32_t sr = 0; sr < NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES; sr++)
+				{
+					const uint32_t low_selector = g_etc1_to_bc7_m5_selector_ranges[sr].m_low;
+					const uint32_t high_selector = g_etc1_to_bc7_m5_selector_ranges[sr].m_high;
+
+					for (uint32_t m = 0; m < NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS; m++)
+					{
+						uint32_t best_lo = 0;
+						uint32_t best_hi = 0;
+						uint64_t best_err = UINT64_MAX;
+
+						for (uint32_t hi = 0; hi <= 127; hi++)
+						{
+							for (uint32_t lo = 0; lo <= 127; lo++)
+							{
+								uint32_t colors[4];
+
+								colors[0] = (lo << 1) | (lo >> 6);
+								colors[3] = (hi << 1) | (hi >> 6);
+
+								colors[1] = (colors[0] * (64 - 21) + colors[3] * 21 + 32) / 64;
+								colors[2] = (colors[0] * (64 - 43) + colors[3] * 43 + 32) / 64;
+
+								uint64_t total_err = 0;
+
+								for (uint32_t s = low_selector; s <= high_selector; s++)
+								{
+									int err = block_colors[s].g - colors[g_etc1_to_bc7_m5_selector_mappings[m][s]];
+
+									int err_scale = 1;
+									// Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor 
+									// the low/high selectors which are clamping to either 0 or 255.
+									if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
+										err_scale = 5;
+
+									total_err += (err * err) * err_scale;
+								}
+
+								if (total_err < best_err)
+								{
+									best_err = total_err;
+									best_lo = lo;
+									best_hi = hi;
+								}
+							}
+						}
+
+						best_err = basisu::minimum<uint32_t>(best_err, 0xFFFF);
+
+						fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
+						n++;
+						if ((n & 31) == 31)
+							fprintf(pFile, "\n");
+					} // m
+				} // sr
+			} // g
+		} // inten
+
+		fclose(pFile);
+	}
+
+	static void create_etc1_to_bc7_m5_alpha_conversion_table()
+	{
+		FILE* pFile = nullptr;
+		fopen_s(&pFile, "basisu_transcoder_tables_bc7_m5_alpha.inc", "w");
+
+		uint32_t n = 0;
+
+		for (int inten = 0; inten < 8; inten++)
+		{
+			for (uint32_t g = 0; g < 32; g++)
+			{
+				color32 block_colors[4];
+				decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
+
+				for (uint32_t sr = 0; sr < NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES; sr++)
+				{
+					const uint32_t low_selector = g_etc1_to_bc7_m5a_selector_ranges[sr].m_low;
+					const uint32_t high_selector = g_etc1_to_bc7_m5a_selector_ranges[sr].m_high;
+
+					uint32_t best_lo = 0;
+					uint32_t best_hi = 0;
+					uint64_t best_err = UINT64_MAX;
+					uint32_t best_output_selectors = 0;
+
+					for (uint32_t hi = 0; hi <= 255; hi++)
+					{
+						for (uint32_t lo = 0; lo <= 255; lo++)
+						{
+							uint32_t colors[4];
+
+							colors[0] = lo;
+							colors[3] = hi;
+
+							colors[1] = (colors[0] * (64 - 21) + colors[3] * 21 + 32) / 64;
+							colors[2] = (colors[0] * (64 - 43) + colors[3] * 43 + 32) / 64;
+
+							uint64_t total_err = 0;
+							uint32_t output_selectors = 0;
+
+							for (uint32_t s = low_selector; s <= high_selector; s++)
+							{
+								int best_mapping_err = INT_MAX;
+								int best_k = 0;
+								for (int k = 0; k < 4; k++)
+								{
+									int mapping_err = block_colors[s].g - colors[k];
+									mapping_err *= mapping_err;
+
+									// Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor 
+									// the low/high selectors which are clamping to either 0 or 255.
+									if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
+										mapping_err *= 5;
+
+									if (mapping_err < best_mapping_err)
+									{
+										best_mapping_err = mapping_err;
+										best_k = k;
+									}
+								} // k
+								
+								total_err += best_mapping_err;
+								output_selectors |= (best_k << (s * 2));
+							} // s
+
+							if (total_err < best_err)
+							{
+								best_err = total_err;
+								best_lo = lo;
+								best_hi = hi;
+								best_output_selectors = output_selectors;
+							}
+
+						} // lo
+					} // hi
+										
+					fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, best_output_selectors);
+					n++;
+					if ((n & 31) == 31)
+						fprintf(pFile, "\n");
+
+				} // sr
+			} // g
+		} // inten
+
+		fclose(pFile);
+	}
+#endif // BASISD_WRITE_NEW_BC7_MODE5_TABLES
+
+	struct bc7_m5_match_entry
+	{
+		uint8_t m_hi;
+		uint8_t m_lo;
+	};
+
+	static bc7_m5_match_entry g_bc7_m5_equals_1[256] =
+	{
+		{0,0},{1,0},{3,0},{4,0},{6,0},{7,0},{9,0},{10,0},{12,0},{13,0},{15,0},{16,0},{18,0},{20,0},{21,0},{23,0},
+		{24,0},{26,0},{27,0},{29,0},{30,0},{32,0},{33,0},{35,0},{36,0},{38,0},{39,0},{41,0},{42,0},{44,0},{45,0},{47,0},
+		{48,0},{50,0},{52,0},{53,0},{55,0},{56,0},{58,0},{59,0},{61,0},{62,0},{64,0},{65,0},{66,0},{68,0},{69,0},{71,0},
+		{72,0},{74,0},{75,0},{77,0},{78,0},{80,0},{82,0},{83,0},{85,0},{86,0},{88,0},{89,0},{91,0},{92,0},{94,0},{95,0},
+		{97,0},{98,0},{100,0},{101,0},{103,0},{104,0},{106,0},{107,0},{109,0},{110,0},{112,0},{114,0},{115,0},{117,0},{118,0},{120,0},
+		{121,0},{123,0},{124,0},{126,0},{127,0},{127,1},{126,2},{126,3},{127,3},{127,4},{126,5},{126,6},{127,6},{127,7},{126,8},{126,9},
+		{127,9},{127,10},{126,11},{126,12},{127,12},{127,13},{126,14},{125,15},{127,15},{126,16},{126,17},{127,17},{127,18},{126,19},{126,20},{127,20},
+		{127,21},{126,22},{126,23},{127,23},{127,24},{126,25},{126,26},{127,26},{127,27},{126,28},{126,29},{127,29},{127,30},{126,31},{126,32},{127,32},
+		{127,33},{126,34},{126,35},{127,35},{127,36},{126,37},{126,38},{127,38},{127,39},{126,40},{126,41},{127,41},{127,42},{126,43},{126,44},{127,44},
+		{127,45},{126,46},{125,47},{127,47},{126,48},{126,49},{127,49},{127,50},{126,51},{126,52},{127,52},{127,53},{126,54},{126,55},{127,55},{127,56},
+		{126,57},{126,58},{127,58},{127,59},{126,60},{126,61},{127,61},{127,62},{126,63},{125,64},{126,64},{126,65},{127,65},{127,66},{126,67},{126,68},
+		{127,68},{127,69},{126,70},{126,71},{127,71},{127,72},{126,73},{126,74},{127,74},{127,75},{126,76},{125,77},{127,77},{126,78},{126,79},{127,79},
+		{127,80},{126,81},{126,82},{127,82},{127,83},{126,84},{126,85},{127,85},{127,86},{126,87},{126,88},{127,88},{127,89},{126,90},{126,91},{127,91},
+		{127,92},{126,93},{126,94},{127,94},{127,95},{126,96},{126,97},{127,97},{127,98},{126,99},{126,100},{127,100},{127,101},{126,102},{126,103},{127,103},
+		{127,104},{126,105},{126,106},{127,106},{127,107},{126,108},{125,109},{127,109},{126,110},{126,111},{127,111},{127,112},{126,113},{126,114},{127,114},{127,115},
+		{126,116},{126,117},{127,117},{127,118},{126,119},{126,120},{127,120},{127,121},{126,122},{126,123},{127,123},{127,124},{126,125},{126,126},{127,126},{127,127}
+	};
+	
+	static void transcoder_init_bc7_mode5()
+	{
+#if 0
+		// This is a little too much work to do at init time, so precompute it.
+		for (int i = 0; i < 256; i++)
+		{
+			int lowest_e = 256;
+			for (int lo = 0; lo < 128; lo++)
+			{
+				for (int hi = 0; hi < 128; hi++)
+				{
+					const int lo_e = (lo << 1) | (lo >> 6);
+					const int hi_e = (hi << 1) | (hi >> 6);
+
+					// Selector 1
+					int v = (lo_e * (64 - 21) + hi_e * 21 + 32) >> 6;
+					int e = abs(v - i);
+
+					if (e < lowest_e)
+					{
+						g_bc7_m5_equals_1[i].m_hi = static_cast<uint8_t>(hi);
+						g_bc7_m5_equals_1[i].m_lo = static_cast<uint8_t>(lo);
+
+						lowest_e = e;
+					}
+
+				} // hi
+								
+			} // lo
+			
+			printf("{%u,%u},", g_bc7_m5_equals_1[i].m_hi, g_bc7_m5_equals_1[i].m_lo);
+			if ((i & 15) == 15) printf("\n");
+		}
+#endif
+
+		for (uint32_t i = 0; i < NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES; i++)
+		{
+			uint32_t l = g_etc1_to_bc7_m5_selector_ranges[i].m_low;
+			uint32_t h = g_etc1_to_bc7_m5_selector_ranges[i].m_high;
+			g_etc1_to_bc7_m5_selector_range_index[l][h] = i;
+		}
+
+		for (uint32_t i = 0; i < NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES; i++)
+		{
+			uint32_t l = g_etc1_to_bc7_m5a_selector_ranges[i].m_low;
+			uint32_t h = g_etc1_to_bc7_m5a_selector_ranges[i].m_high;
+			g_etc1_to_bc7_m5a_selector_range_index[l][h] = i;
+		}
+	}
+
+	static void convert_etc1s_to_bc7_m5_color(void* pDst, const endpoint* pEndpoints, const selector* pSelector)
+	{
+		bc7_mode_5* pDst_block = static_cast<bc7_mode_5*>(pDst);
+				
+		// First ensure the block is cleared to all 0's
+		static_cast<uint64_t*>(pDst)[0] = 0;
+		static_cast<uint64_t*>(pDst)[1] = 0;
+
+		// Set alpha to 255
+		pDst_block->m_lo.m_mode = 1 << 5;
+		pDst_block->m_lo.m_a0 = 255;
+		pDst_block->m_lo.m_a1_0 = 63;
+		pDst_block->m_hi.m_a1_1 = 3;
+
+		const uint32_t low_selector = pSelector->m_lo_selector;
+		const uint32_t high_selector = pSelector->m_hi_selector;
+
+		const uint32_t base_color_r = pEndpoints->m_color5.r;
+		const uint32_t base_color_g = pEndpoints->m_color5.g;
+		const uint32_t base_color_b = pEndpoints->m_color5.b;
+		const uint32_t inten_table = pEndpoints->m_inten5;
+
+		if (pSelector->m_num_unique_selectors == 1)
+		{
+			// Solid color block - use precomputed tables and set selectors to 1.
+			uint32_t r, g, b;
+			decoder_etc_block::get_block_color5(pEndpoints->m_color5, inten_table, low_selector, r, g, b);
+
+			pDst_block->m_lo.m_r0 = g_bc7_m5_equals_1[r].m_lo;
+			pDst_block->m_lo.m_g0 = g_bc7_m5_equals_1[g].m_lo;
+			pDst_block->m_lo.m_b0 = g_bc7_m5_equals_1[b].m_lo;
+
+			pDst_block->m_lo.m_r1 = g_bc7_m5_equals_1[r].m_hi;
+			pDst_block->m_lo.m_g1 = g_bc7_m5_equals_1[g].m_hi;
+			pDst_block->m_lo.m_b1 = g_bc7_m5_equals_1[b].m_hi;
+
+			set_block_bits((uint8_t*)pDst, 0x2aaaaaab, 31, 66);
+			return;
+		}
+		else if (pSelector->m_num_unique_selectors == 2)
+		{
+			// Only one or two unique selectors, so just switch to block truncation coding (BTC) to avoid quality issues on extreme blocks.
+			color32 block_colors[4];
+
+			decoder_etc_block::get_block_colors5(block_colors, color32(base_color_r, base_color_g, base_color_b, 255), inten_table);
+
+			const uint32_t r0 = block_colors[low_selector].r;
+			const uint32_t g0 = block_colors[low_selector].g;
+			const uint32_t b0 = block_colors[low_selector].b;
+
+			const uint32_t r1 = block_colors[high_selector].r;
+			const uint32_t g1 = block_colors[high_selector].g;
+			const uint32_t b1 = block_colors[high_selector].b;
+
+			pDst_block->m_lo.m_r0 = r0 >> 1;
+			pDst_block->m_lo.m_g0 = g0 >> 1;
+			pDst_block->m_lo.m_b0 = b0 >> 1;
+
+			pDst_block->m_lo.m_r1 = r1 >> 1;
+			pDst_block->m_lo.m_g1 = g1 >> 1;
+			pDst_block->m_lo.m_b1 = b1 >> 1;
+
+			uint32_t output_low_selector = 0, output_bit_offset = 0, output_bits = 0;
+
+			for (uint32_t y = 0; y < 4; y++)
+			{
+				for (uint32_t x = 0; x < 4; x++)
+				{
+					uint32_t s = pSelector->get_selector(x, y);
+					uint32_t os = (s == low_selector) ? output_low_selector : (3 ^ output_low_selector);
+
+					uint32_t num_bits = 2;
+
+					if ((x | y) == 0)
+					{
+						if (os & 2)
+						{
+							pDst_block->m_lo.m_r0 = r1 >> 1;
+							pDst_block->m_lo.m_g0 = g1 >> 1;
+							pDst_block->m_lo.m_b0 = b1 >> 1;
+
+							pDst_block->m_lo.m_r1 = r0 >> 1;
+							pDst_block->m_lo.m_g1 = g0 >> 1;
+							pDst_block->m_lo.m_b1 = b0 >> 1;
+
+							output_low_selector = 3;
+							os = 0;
+						}
+
+						num_bits = 1;
+					}
+
+					output_bits |= (os << output_bit_offset);
+					output_bit_offset += num_bits;
+				}
+			}
+
+			set_block_bits((uint8_t*)pDst, output_bits, 31, 66);
+			return;
+		}
+
+		const uint32_t selector_range_table = g_etc1_to_bc7_m5_selector_range_index[low_selector][high_selector];
+
+		//[32][8][RANGES][MAPPING]
+		const etc1_to_bc7_m5_solution* pTable_r = &g_etc1_to_bc7_m5_color[(inten_table * 32 + base_color_r) * (NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS];
+		const etc1_to_bc7_m5_solution* pTable_g = &g_etc1_to_bc7_m5_color[(inten_table * 32 + base_color_g) * (NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS];
+		const etc1_to_bc7_m5_solution* pTable_b = &g_etc1_to_bc7_m5_color[(inten_table * 32 + base_color_b) * (NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS];
+
+		uint32_t best_err = UINT_MAX;
+		uint32_t best_mapping = 0;
+
+		assert(NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS == 10);
+#define DO_ITER(m) { uint32_t total_err = pTable_r[m].m_err + pTable_g[m].m_err + pTable_b[m].m_err; if (total_err < best_err) { best_err = total_err; best_mapping = m; } }
+		DO_ITER(0); DO_ITER(1); DO_ITER(2); DO_ITER(3); DO_ITER(4);
+		DO_ITER(5); DO_ITER(6); DO_ITER(7); DO_ITER(8); DO_ITER(9);
+#undef DO_ITER
+
+		const uint8_t* pSelectors_xlat = &g_etc1_to_bc7_m5_selector_mappings[best_mapping][0];
+
+		uint32_t s_inv = 0;
+		if (pSelectors_xlat[pSelector->get_selector(0, 0)] & 2)
+		{
+			pDst_block->m_lo.m_r0 = pTable_r[best_mapping].m_hi;
+			pDst_block->m_lo.m_g0 = pTable_g[best_mapping].m_hi;
+			pDst_block->m_lo.m_b0 = pTable_b[best_mapping].m_hi;
+
+			pDst_block->m_lo.m_r1 = pTable_r[best_mapping].m_lo;
+			pDst_block->m_lo.m_g1 = pTable_g[best_mapping].m_lo;
+			pDst_block->m_lo.m_b1 = pTable_b[best_mapping].m_lo;
+			
+			s_inv = 3;
+		}
+		else
+		{
+			pDst_block->m_lo.m_r0 = pTable_r[best_mapping].m_lo;
+			pDst_block->m_lo.m_g0 = pTable_g[best_mapping].m_lo;
+			pDst_block->m_lo.m_b0 = pTable_b[best_mapping].m_lo;
+
+			pDst_block->m_lo.m_r1 = pTable_r[best_mapping].m_hi;
+			pDst_block->m_lo.m_g1 = pTable_g[best_mapping].m_hi;
+			pDst_block->m_lo.m_b1 = pTable_b[best_mapping].m_hi;
+		}
+
+		uint32_t output_bits = 0, output_bit_ofs = 0;
+
+		for (uint32_t y = 0; y < 4; y++)
+		{
+			for (uint32_t x = 0; x < 4; x++)
+			{
+				const uint32_t s = pSelector->get_selector(x, y);
+				
+				const uint32_t os = pSelectors_xlat[s] ^ s_inv;
+
+				output_bits |= (os << output_bit_ofs);
+
+				output_bit_ofs += (((x | y) == 0) ? 1 : 2);
+			}
+		}
+
+		set_block_bits((uint8_t*)pDst, output_bits, 31, 66);
+	}
+
+	static void convert_etc1s_to_bc7_m5_alpha(void* pDst, const endpoint* pEndpoints, const selector* pSelector)
+	{
+		bc7_mode_5* pDst_block = static_cast<bc7_mode_5*>(pDst);
+
+		const uint32_t low_selector = pSelector->m_lo_selector;
+		const uint32_t high_selector = pSelector->m_hi_selector;
+
+		const uint32_t base_color_r = pEndpoints->m_color5.r;
+		const uint32_t inten_table = pEndpoints->m_inten5;
+
+		if (pSelector->m_num_unique_selectors == 1)
+		{
+			uint32_t r;
+			decoder_etc_block::get_block_color5_r(pEndpoints->m_color5, inten_table, low_selector, r);
+
+			pDst_block->m_lo.m_a0 = r;
+			pDst_block->m_lo.m_a1_0 = r & 63;
+			pDst_block->m_hi.m_a1_1 = r >> 6;
+					  
+			return;
+		}
+		else if (pSelector->m_num_unique_selectors == 2)
+		{
+			// Only one or two unique selectors, so just switch to block truncation coding (BTC) to avoid quality issues on extreme blocks.
+			int block_colors[4];
+
+			decoder_etc_block::get_block_colors5_g(block_colors, pEndpoints->m_color5, inten_table);
+
+			pDst_block->m_lo.m_a0 = block_colors[low_selector];
+			pDst_block->m_lo.m_a1_0 = block_colors[high_selector] & 63;
+			pDst_block->m_hi.m_a1_1 = block_colors[high_selector] >> 6;
+
+			uint32_t output_low_selector = 0, output_bit_offset = 0, output_bits = 0;
+
+			for (uint32_t y = 0; y < 4; y++)
+			{
+				for (uint32_t x = 0; x < 4; x++)
+				{
+					const uint32_t s = pSelector->get_selector(x, y);
+					uint32_t os = (s == low_selector) ? output_low_selector : (3 ^ output_low_selector);
+
+					uint32_t num_bits = 2;
+
+					if ((x | y) == 0)
+					{
+						if (os & 2)
+						{
+							pDst_block->m_lo.m_a0 = block_colors[high_selector];
+							pDst_block->m_lo.m_a1_0 = block_colors[low_selector] & 63;
+							pDst_block->m_hi.m_a1_1 = block_colors[low_selector] >> 6;
+
+							output_low_selector = 3;
+							os = 0;
+						}
+
+						num_bits = 1;
+					}
+
+					output_bits |= (os << output_bit_offset);
+					output_bit_offset += num_bits;
+				}
+			}
+
+			set_block_bits((uint8_t*)pDst, output_bits, 31, 97);
+			return;
+		}
+
+		const uint32_t selector_range_table = g_etc1_to_bc7_m5a_selector_range_index[low_selector][high_selector];
+						
+		const etc1_g_to_bc7_m5a_conversion* pTable = &g_etc1_g_to_bc7_m5a[inten_table * (32 * NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES) + base_color_r * NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES + selector_range_table];
+
+		pDst_block->m_lo.m_a0 = pTable->m_lo;
+		pDst_block->m_lo.m_a1_0 = pTable->m_hi & 63;
+		pDst_block->m_hi.m_a1_1 = pTable->m_hi >> 6;
+
+		uint32_t output_bit_offset = 0, output_bits = 0, selector_trans = pTable->m_trans;
+
+		for (uint32_t y = 0; y < 4; y++)
+		{
+			for (uint32_t x = 0; x < 4; x++)
+			{
+				const uint32_t s = pSelector->get_selector(x, y);
+				uint32_t os = (selector_trans >> (s * 2)) & 3;
+
+				uint32_t num_bits = 2;
+
+				if ((x | y) == 0)
+				{
+					if (os & 2)
+					{
+						pDst_block->m_lo.m_a0 = pTable->m_hi;
+						pDst_block->m_lo.m_a1_0 = pTable->m_lo & 63;
+						pDst_block->m_hi.m_a1_1 = pTable->m_lo >> 6;
+
+						selector_trans ^= 0xFF;
+						os ^= 3;
+					}
+
+					num_bits = 1;
+				}
+
+				output_bits |= (os << output_bit_offset);
+				output_bit_offset += num_bits;
+			}
+		}
+
+		set_block_bits((uint8_t*)pDst, output_bits, 31, 97);
+	}
+#endif // BASISD_SUPPORT_BC7_MODE5
+
+#if BASISD_SUPPORT_ETC2_EAC_A8 || BASISD_SUPPORT_UASTC
+	static const uint8_t g_etc2_eac_a8_sel4[6] = { 0x92, 0x49, 0x24, 0x92, 0x49, 0x24 };
+#endif
+
+#if BASISD_SUPPORT_ETC2_EAC_A8
+	static void convert_etc1s_to_etc2_eac_a8(eac_block* pDst_block, const endpoint* pEndpoints, const selector* pSelector)
+	{
+		const uint32_t low_selector = pSelector->m_lo_selector;
+		const uint32_t high_selector = pSelector->m_hi_selector;
+
+		const color32& base_color = pEndpoints->m_color5;
+		const uint32_t inten_table = pEndpoints->m_inten5;
+
+		if (low_selector == high_selector)
+		{
+			uint32_t r;
+			decoder_etc_block::get_block_color5_r(base_color, inten_table, low_selector, r);
+
+			// Constant alpha block
+			// Select table 13, use selector 4 (0), set multiplier to 1 and base color g
+			pDst_block->m_base = r;
+			pDst_block->m_table = 13;
+			pDst_block->m_multiplier = 1;
+
+			// selectors are all 4's
+			memcpy(pDst_block->m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4));
+
+			return;
+		}
+
+		uint32_t selector_range_table = 0;
+		for (selector_range_table = 0; selector_range_table < NUM_ETC2_EAC_SELECTOR_RANGES; selector_range_table++)
+			if ((low_selector == s_etc2_eac_selector_ranges[selector_range_table].m_low) && (high_selector == s_etc2_eac_selector_ranges[selector_range_table].m_high))
+				break;
+		if (selector_range_table >= NUM_ETC2_EAC_SELECTOR_RANGES)
+			selector_range_table = 0;
+
+		const etc1_g_to_eac_conversion* pTable_entry = &s_etc1_g_to_etc2_a8[base_color.r + inten_table * 32][selector_range_table];
+
+		pDst_block->m_base = pTable_entry->m_base;
+		pDst_block->m_table = pTable_entry->m_table_mul >> 4;
+		pDst_block->m_multiplier = pTable_entry->m_table_mul & 15;
+
+		uint64_t selector_bits = 0;
+
+		for (uint32_t y = 0; y < 4; y++)
+		{
+			for (uint32_t x = 0; x < 4; x++)
+			{
+				uint32_t s = pSelector->get_selector(x, y);
+
+				uint32_t ds = (pTable_entry->m_trans >> (s * 3)) & 7;
+
+				const uint32_t dst_ofs = 45 - (y + x * 4) * 3;
+				selector_bits |= (static_cast<uint64_t>(ds) << dst_ofs);
+			}
+		}
+
+		pDst_block->set_selector_bits(selector_bits);
+	}
+#endif // BASISD_SUPPORT_ETC2_EAC_A8
+
+#if BASISD_SUPPORT_ETC2_EAC_RG11
+	static const etc1_g_to_eac_conversion s_etc1_g_to_etc2_r11[32 * 8][NUM_ETC2_EAC_SELECTOR_RANGES] =
+	{
+		{{0,1,3328},{0,1,3328},{0,16,457},{0,16,456}},
+		{{0,226,3936},{0,226,3936},{0,17,424},{8,0,472}},
+		{{6,178,4012},{6,178,4008},{0,146,501},{16,0,472}},
+		{{14,178,4012},{14,178,4008},{8,146,501},{24,0,472}},
+		{{23,178,4012},{23,178,4008},{17,146,501},{33,0,472}},
+		{{31,178,4012},{31,178,4008},{25,146,501},{41,0,472}},
+		{{39,178,4012},{39,178,4008},{33,146,501},{49,0,472}},
+		{{47,178,4012},{47,178,4008},{41,146,501},{27,228,496}},
+		{{56,178,4012},{56,178,4008},{50,146,501},{36,228,496}},
+		{{64,178,4012},{64,178,4008},{58,146,501},{44,228,496}},
+		{{72,178,4012},{72,178,4008},{66,146,501},{52,228,496}},
+		{{80,178,4012},{80,178,4008},{74,146,501},{60,228,496}},
+		{{89,178,4012},{89,178,4008},{83,146,501},{69,228,496}},
+		{{97,178,4012},{97,178,4008},{91,146,501},{77,228,496}},
+		{{105,178,4012},{105,178,4008},{99,146,501},{85,228,496}},
+		{{113,178,4012},{113,178,4008},{107,146,501},{93,228,496}},
+		{{122,178,4012},{122,178,4008},{116,146,501},{102,228,496}},
+		{{130,178,4012},{130,178,4008},{124,146,501},{110,228,496}},
+		{{138,178,4012},{138,178,4008},{132,146,501},{118,228,496}},
+		{{146,178,4012},{146,178,4008},{140,146,501},{126,228,496}},
+		{{155,178,4012},{155,178,4008},{149,146,501},{135,228,496}},
+		{{163,178,4012},{163,178,4008},{157,146,501},{143,228,496}},
+		{{171,178,4012},{171,178,4008},{165,146,501},{151,228,496}},
+		{{179,178,4012},{179,178,4008},{173,146,501},{159,228,496}},
+		{{188,178,4012},{188,178,4008},{182,146,501},{168,228,496}},
+		{{196,178,4012},{196,178,4008},{190,146,501},{176,228,496}},
+		{{204,178,4012},{204,178,4008},{198,146,501},{184,228,496}},
+		{{212,178,4012},{212,178,4008},{206,146,501},{192,228,496}},
+		{{221,178,4012},{221,178,4008},{215,146,501},{201,228,496}},
+		{{229,178,4012},{229,178,4008},{223,146,501},{209,228,496}},
+		{{235,66,4012},{221,100,4008},{231,146,501},{217,228,496}},
+		{{211,102,4085},{254,32,4040},{211,102,501},{254,32,456}},
+		{{0,2,3328},{0,2,3328},{0,1,320},{0,1,320}},
+		{{7,162,3905},{7,162,3904},{0,17,480},{0,17,480}},
+		{{15,162,3906},{15,162,3904},{1,117,352},{1,117,352}},
+		{{23,162,3906},{23,162,3904},{5,34,500},{4,53,424}},
+		{{32,162,3906},{32,162,3904},{14,34,500},{3,69,424}},
+		{{40,162,3906},{40,162,3904},{22,34,500},{1,133,496}},
+		{{48,162,3906},{48,162,3904},{30,34,500},{4,85,496}},
+		{{56,162,3906},{56,162,3904},{38,34,500},{12,85,496}},
+		{{65,162,3906},{65,162,3904},{47,34,500},{1,106,424}},
+		{{73,162,3906},{73,162,3904},{55,34,500},{9,106,424}},
+		{{81,162,3906},{81,162,3904},{63,34,500},{7,234,496}},
+		{{89,162,3906},{89,162,3904},{71,34,500},{15,234,496}},
+		{{98,162,3906},{98,162,3904},{80,34,500},{24,234,496}},
+		{{106,162,3906},{106,162,3904},{88,34,500},{32,234,496}},
+		{{114,162,3906},{114,162,3904},{96,34,500},{40,234,496}},
+		{{122,162,3906},{122,162,3904},{104,34,500},{48,234,496}},
+		{{131,162,3906},{131,162,3904},{113,34,500},{57,234,496}},
+		{{139,162,3906},{139,162,3904},{121,34,500},{65,234,496}},
+		{{147,162,3906},{147,162,3904},{129,34,500},{73,234,496}},
+		{{155,162,3906},{155,162,3904},{137,34,500},{81,234,496}},
+		{{164,162,3906},{164,162,3904},{146,34,500},{90,234,496}},
+		{{172,162,3906},{172,162,3904},{154,34,500},{98,234,496}},
+		{{180,162,3906},{180,162,3904},{162,34,500},{106,234,496}},
+		{{188,162,3906},{188,162,3904},{170,34,500},{114,234,496}},
+		{{197,162,3906},{197,162,3904},{179,34,500},{123,234,496}},
+		{{205,162,3906},{205,162,3904},{187,34,500},{131,234,496}},
+		{{213,162,3906},{213,162,3904},{195,34,500},{139,234,496}},
+		{{221,162,3906},{221,162,3904},{203,34,500},{147,234,496}},
+		{{230,162,3906},{230,162,3904},{212,34,500},{156,234,496}},
+		{{238,162,3906},{174,106,4008},{220,34,500},{164,234,496}},
+		{{240,178,4001},{182,106,4008},{228,34,500},{172,234,496}},
+		{{166,108,4085},{115,31,4080},{166,108,501},{115,31,496}},
+		{{1,68,3328},{1,68,3328},{0,1,384},{0,1,384}},
+		{{1,51,3968},{1,51,3968},{0,2,384},{0,2,384}},
+		{{21,18,3851},{21,18,3848},{1,50,488},{1,50,488}},
+		{{26,195,3851},{29,18,3848},{0,67,488},{0,67,488}},
+		{{35,195,3851},{38,18,3848},{12,115,488},{0,3,496}},
+		{{43,195,3851},{46,18,3848},{20,115,488},{2,6,424}},
+		{{51,195,3851},{54,18,3848},{36,66,482},{4,22,424}},
+		{{59,195,3851},{62,18,3848},{44,66,482},{3,73,424}},
+		{{68,195,3851},{71,18,3848},{53,66,482},{3,22,496}},
+		{{76,195,3851},{79,18,3848},{61,66,482},{2,137,496}},
+		{{84,195,3851},{87,18,3848},{69,66,482},{1,89,496}},
+		{{92,195,3851},{95,18,3848},{77,66,482},{9,89,496}},
+		{{101,195,3851},{104,18,3848},{86,66,482},{18,89,496}},
+		{{109,195,3851},{112,18,3848},{94,66,482},{26,89,496}},
+		{{117,195,3851},{120,18,3848},{102,66,482},{34,89,496}},
+		{{125,195,3851},{128,18,3848},{110,66,482},{42,89,496}},
+		{{134,195,3851},{137,18,3848},{119,66,482},{51,89,496}},
+		{{141,195,3907},{145,18,3848},{127,66,482},{59,89,496}},
+		{{149,195,3907},{153,18,3848},{135,66,482},{67,89,496}},
+		{{157,195,3907},{161,18,3848},{143,66,482},{75,89,496}},
+		{{166,195,3907},{170,18,3848},{152,66,482},{84,89,496}},
+		{{174,195,3907},{178,18,3848},{160,66,482},{92,89,496}},
+		{{182,195,3907},{186,18,3848},{168,66,482},{100,89,496}},
+		{{190,195,3907},{194,18,3848},{176,66,482},{108,89,496}},
+		{{199,195,3907},{203,18,3848},{185,66,482},{117,89,496}},
+		{{207,195,3907},{211,18,3848},{193,66,482},{125,89,496}},
+		{{215,195,3907},{219,18,3848},{201,66,482},{133,89,496}},
+		{{223,195,3907},{227,18,3848},{209,66,482},{141,89,496}},
+		{{232,195,3907},{168,89,4008},{218,66,482},{150,89,496}},
+		{{236,18,3907},{176,89,4008},{226,66,482},{158,89,496}},
+		{{158,90,4085},{103,31,4080},{158,90,501},{103,31,496}},
+		{{166,90,4085},{111,31,4080},{166,90,501},{111,31,496}},
+		{{0,70,3328},{0,70,3328},{0,17,448},{0,17,448}},
+		{{0,117,3904},{0,117,3904},{0,35,384},{0,35,384}},
+		{{13,165,3905},{13,165,3904},{2,211,480},{2,211,480}},
+		{{21,165,3906},{21,165,3904},{1,51,488},{1,51,488}},
+		{{30,165,3906},{30,165,3904},{7,61,352},{7,61,352}},
+		{{38,165,3906},{38,165,3904},{2,125,352},{2,125,352}},
+		{{46,165,3906},{46,165,3904},{1,37,500},{10,125,352}},
+		{{54,165,3906},{54,165,3904},{9,37,500},{5,61,424}},
+		{{63,165,3906},{63,165,3904},{18,37,500},{1,189,424}},
+		{{71,165,3906},{71,165,3904},{26,37,500},{9,189,424}},
+		{{79,165,3906},{79,165,3904},{34,37,500},{4,77,424}},
+		{{87,165,3906},{87,165,3904},{42,37,500},{12,77,424}},
+		{{96,165,3906},{96,165,3904},{51,37,500},{8,93,424}},
+		{{104,165,3906},{104,165,3904},{59,37,500},{3,141,496}},
+		{{112,165,3906},{112,165,3904},{68,37,500},{11,141,496}},
+		{{120,165,3906},{120,165,3904},{76,37,500},{6,93,496}},
+		{{129,165,3906},{129,165,3904},{85,37,500},{15,93,496}},
+		{{70,254,4012},{137,165,3904},{93,37,500},{23,93,496}},
+		{{145,165,3906},{145,165,3904},{101,37,500},{31,93,496}},
+		{{86,254,4012},{153,165,3904},{109,37,500},{39,93,496}},
+		{{163,165,3906},{162,165,3904},{118,37,500},{48,93,496}},
+		{{171,165,3906},{170,165,3904},{126,37,500},{56,93,496}},
+		{{179,165,3906},{178,165,3904},{134,37,500},{64,93,496}},
+		{{187,165,3906},{187,165,3904},{142,37,500},{72,93,496}},
+		{{196,165,3906},{196,165,3904},{151,37,500},{81,93,496}},
+		{{204,165,3906},{204,165,3904},{159,37,500},{89,93,496}},
+		{{212,165,3906},{136,77,4008},{167,37,500},{97,93,496}},
+		{{220,165,3906},{131,93,4008},{175,37,500},{105,93,496}},
+		{{214,181,4001},{140,93,4008},{184,37,500},{114,93,496}},
+		{{222,181,4001},{148,93,4008},{192,37,500},{122,93,496}},
+		{{115,95,4085},{99,31,4080},{115,95,501},{99,31,496}},
+		{{123,95,4085},{107,31,4080},{123,95,501},{107,31,496}},
+		{{0,102,3840},{0,102,3840},{0,18,384},{0,18,384}},
+		{{5,167,3904},{5,167,3904},{0,13,256},{0,13,256}},
+		{{4,54,3968},{4,54,3968},{1,67,448},{1,67,448}},
+		{{30,198,3850},{30,198,3848},{0,3,480},{0,3,480}},
+		{{39,198,3850},{39,198,3848},{3,52,488},{3,52,488}},
+		{{47,198,3851},{47,198,3848},{3,4,488},{3,4,488}},
+		{{55,198,3851},{55,198,3848},{1,70,488},{1,70,488}},
+		{{53,167,3906},{63,198,3848},{3,22,488},{3,22,488}},
+		{{62,167,3906},{72,198,3848},{24,118,488},{0,6,496}},
+		{{70,167,3906},{80,198,3848},{32,118,488},{2,89,488}},
+		{{78,167,3906},{88,198,3848},{40,118,488},{1,73,496}},
+		{{86,167,3906},{96,198,3848},{48,118,488},{0,28,424}},
+		{{95,167,3906},{105,198,3848},{57,118,488},{9,28,424}},
+		{{103,167,3906},{113,198,3848},{65,118,488},{5,108,496}},
+		{{111,167,3906},{121,198,3848},{73,118,488},{13,108,496}},
+		{{119,167,3906},{129,198,3848},{81,118,488},{21,108,496}},
+		{{128,167,3906},{138,198,3848},{90,118,488},{6,28,496}},
+		{{136,167,3906},{146,198,3848},{98,118,488},{14,28,496}},
+		{{145,167,3906},{154,198,3848},{106,118,488},{22,28,496}},
+		{{153,167,3906},{162,198,3848},{114,118,488},{30,28,496}},
+		{{162,167,3906},{171,198,3848},{123,118,488},{39,28,496}},
+		{{170,167,3906},{179,198,3848},{131,118,488},{47,28,496}},
+		{{178,167,3906},{187,198,3848},{139,118,488},{55,28,496}},
+		{{186,167,3906},{195,198,3848},{147,118,488},{63,28,496}},
+		{{194,167,3906},{120,12,4008},{156,118,488},{72,28,496}},
+		{{206,198,3907},{116,28,4008},{164,118,488},{80,28,496}},
+		{{214,198,3907},{124,28,4008},{172,118,488},{88,28,496}},
+		{{222,198,3395},{132,28,4008},{180,118,488},{96,28,496}},
+		{{207,134,4001},{141,28,4008},{189,118,488},{105,28,496}},
+		{{95,30,4085},{86,31,4080},{95,30,501},{86,31,496}},
+		{{103,30,4085},{94,31,4080},{103,30,501},{94,31,496}},
+		{{111,30,4085},{102,31,4080},{111,30,501},{102,31,496}},
+		{{0,104,3840},{0,104,3840},{0,18,448},{0,18,448}},
+		{{4,39,3904},{4,39,3904},{0,4,384},{0,4,384}},
+		{{0,56,3968},{0,56,3968},{0,84,448},{0,84,448}},
+		{{6,110,3328},{6,110,3328},{0,20,448},{0,20,448}},
+		{{41,200,3850},{41,200,3848},{1,4,480},{1,4,480}},
+		{{49,200,3850},{49,200,3848},{1,8,416},{1,8,416}},
+		{{57,200,3851},{57,200,3848},{1,38,488},{1,38,488}},
+		{{65,200,3851},{65,200,3848},{1,120,488},{1,120,488}},
+		{{74,200,3851},{74,200,3848},{2,72,488},{2,72,488}},
+		{{68,6,3907},{82,200,3848},{2,24,488},{2,24,488}},
+		{{77,6,3907},{90,200,3848},{26,120,488},{10,24,488}},
+		{{97,63,3330},{98,200,3848},{34,120,488},{2,8,496}},
+		{{106,63,3330},{107,200,3848},{43,120,488},{3,92,488}},
+		{{114,63,3330},{115,200,3848},{51,120,488},{11,92,488}},
+		{{122,63,3330},{123,200,3848},{59,120,488},{7,76,496}},
+		{{130,63,3330},{131,200,3848},{67,120,488},{15,76,496}},
+		{{139,63,3330},{140,200,3848},{76,120,488},{24,76,496}},
+		{{147,63,3330},{148,200,3848},{84,120,488},{32,76,496}},
+		{{155,63,3330},{156,200,3848},{92,120,488},{40,76,496}},
+		{{164,63,3330},{164,200,3848},{100,120,488},{48,76,496}},
+		{{173,63,3330},{173,200,3848},{109,120,488},{57,76,496}},
+		{{184,6,3851},{181,200,3848},{117,120,488},{65,76,496}},
+		{{192,6,3851},{133,28,3936},{125,120,488},{73,76,496}},
+		{{189,200,3907},{141,28,3936},{133,120,488},{81,76,496}},
+		{{198,200,3907},{138,108,4000},{142,120,488},{90,76,496}},
+		{{206,200,3907},{146,108,4000},{150,120,488},{98,76,496}},
+		{{214,200,3395},{154,108,4000},{158,120,488},{106,76,496}},
+		{{190,136,4001},{162,108,4000},{166,120,488},{114,76,496}},
+		{{123,30,4076},{87,15,4080},{123,30,492},{87,15,496}},
+		{{117,110,4084},{80,31,4080},{117,110,500},{80,31,496}},
+		{{125,110,4084},{88,31,4080},{125,110,500},{88,31,496}},
+		{{133,110,4084},{96,31,4080},{133,110,500},{96,31,496}},
+		{{9,56,3904},{9,56,3904},{0,67,448},{0,67,448}},
+		{{1,8,3904},{1,8,3904},{1,84,448},{1,84,448}},
+		{{1,124,3904},{1,124,3904},{0,39,384},{0,39,384}},
+		{{9,124,3904},{9,124,3904},{1,4,448},{1,4,448}},
+		{{6,76,3904},{6,76,3904},{0,70,448},{0,70,448}},
+		{{62,6,3859},{62,6,3856},{2,38,480},{2,38,480}},
+		{{70,6,3859},{70,6,3856},{5,43,416},{5,43,416}},
+		{{78,6,3859},{78,6,3856},{2,11,416},{2,11,416}},
+		{{87,6,3859},{87,6,3856},{0,171,488},{0,171,488}},
+		{{67,8,3906},{95,6,3856},{8,171,488},{8,171,488}},
+		{{75,8,3907},{103,6,3856},{5,123,488},{5,123,488}},
+		{{83,8,3907},{111,6,3856},{2,75,488},{2,75,488}},
+		{{92,8,3907},{120,6,3856},{0,27,488},{0,27,488}},
+		{{100,8,3907},{128,6,3856},{8,27,488},{8,27,488}},
+		{{120,106,3843},{136,6,3856},{99,6,387},{16,27,488}},
+		{{128,106,3843},{144,6,3856},{107,6,387},{2,11,496}},
+		{{137,106,3843},{153,6,3856},{117,6,387},{11,11,496}},
+		{{145,106,3843},{161,6,3856},{125,6,387},{19,11,496}},
+		{{163,8,3851},{137,43,3904},{133,6,387},{27,11,496}},
+		{{171,8,3851},{145,43,3904},{141,6,387},{35,11,496}},
+		{{180,8,3851},{110,11,4000},{150,6,387},{44,11,496}},
+		{{188,8,3851},{118,11,4000},{158,6,387},{52,11,496}},
+		{{172,72,3907},{126,11,4000},{166,6,387},{60,11,496}},
+		{{174,6,3971},{134,11,4000},{174,6,387},{68,11,496}},
+		{{183,6,3971},{143,11,4000},{183,6,387},{77,11,496}},
+		{{191,6,3971},{151,11,4000},{191,6,387},{85,11,496}},
+		{{199,6,3971},{159,11,4000},{199,6,387},{93,11,496}},
+		{{92,12,4084},{69,15,4080},{92,12,500},{69,15,496}},
+		{{101,12,4084},{78,15,4080},{101,12,500},{78,15,496}},
+		{{110,12,4084},{86,15,4080},{110,12,500},{86,15,496}},
+		{{118,12,4084},{79,31,4080},{118,12,500},{79,31,496}},
+		{{126,12,4084},{87,31,4080},{126,12,500},{87,31,496}},
+		{{71,8,3602},{71,8,3600},{2,21,384},{2,21,384}},
+		{{79,8,3611},{79,8,3608},{0,69,448},{0,69,448}},
+		{{87,8,3611},{87,8,3608},{0,23,384},{0,23,384}},
+		{{95,8,3611},{95,8,3608},{1,5,448},{1,5,448}},
+		{{104,8,3611},{104,8,3608},{0,88,448},{0,88,448}},
+		{{112,8,3611},{112,8,3608},{0,72,448},{0,72,448}},
+		{{120,8,3611},{121,8,3608},{36,21,458},{36,21,456}},
+		{{133,47,3091},{129,8,3608},{44,21,458},{44,21,456}},
+		{{142,47,3091},{138,8,3608},{53,21,459},{53,21,456}},
+		{{98,12,3850},{98,12,3848},{61,21,459},{61,21,456}},
+		{{106,12,3850},{106,12,3848},{10,92,480},{69,21,456}},
+		{{114,12,3851},{114,12,3848},{18,92,480},{77,21,456}},
+		{{123,12,3851},{123,12,3848},{3,44,488},{86,21,456}},
+		{{95,12,3906},{95,12,3904},{11,44,488},{94,21,456}},
+		{{103,12,3906},{103,12,3904},{19,44,488},{102,21,456}},
+		{{111,12,3907},{111,12,3904},{27,44,489},{110,21,456}},
+		{{120,12,3907},{120,12,3904},{36,44,489},{119,21,456}},
+		{{128,12,3907},{128,12,3904},{44,44,489},{127,21,456}},
+		{{136,12,3907},{136,12,3904},{52,44,489},{135,21,456}},
+		{{144,12,3907},{144,12,3904},{60,44,490},{144,21,456}},
+		{{153,12,3907},{153,12,3904},{69,44,490},{153,21,456}},
+		{{161,12,3395},{149,188,3968},{77,44,490},{161,21,456}},
+		{{169,12,3395},{199,21,3928},{85,44,490},{169,21,456}},
+		{{113,95,4001},{202,69,3992},{125,8,483},{177,21,456}},
+		{{122,95,4001},{201,21,3984},{134,8,483},{186,21,456}},
+		{{143,8,4067},{209,21,3984},{142,8,483},{194,21,456}},
+		{{151,8,4067},{47,15,4080},{151,8,483},{47,15,496}},
+		{{159,8,4067},{55,15,4080},{159,8,483},{55,15,496}},
+		{{168,8,4067},{64,15,4080},{168,8,483},{64,15,496}},
+		{{160,40,4075},{72,15,4080},{160,40,491},{72,15,496}},
+		{{168,40,4075},{80,15,4080},{168,40,491},{80,15,496}},
+		{{144,8,4082},{88,15,4080},{144,8,498},{88,15,496}},
+	};
+
+	static void convert_etc1s_to_etc2_eac_r11(eac_block* pDst_block, const endpoint* pEndpoints, const selector* pSelector)
+	{
+		const uint32_t low_selector = pSelector->m_lo_selector;
+		const uint32_t high_selector = pSelector->m_hi_selector;
+
+		const color32& base_color = pEndpoints->m_color5;
+		const uint32_t inten_table = pEndpoints->m_inten5;
+
+		if (low_selector == high_selector)
+		{
+			uint32_t r;
+			decoder_etc_block::get_block_color5_r(base_color, inten_table, low_selector, r);
+
+			// Constant alpha block
+			// Select table 13, use selector 4 (0), set multiplier to 1 and base color r
+			pDst_block->m_base = r;
+			pDst_block->m_table = 13;
+			pDst_block->m_multiplier = 1;
+
+			// selectors are all 4's
+			static const uint8_t s_etc2_eac_r11_sel4[6] = { 0x92, 0x49, 0x24, 0x92, 0x49, 0x24 };
+			memcpy(pDst_block->m_selectors, s_etc2_eac_r11_sel4, sizeof(s_etc2_eac_r11_sel4));
+
+			return;
+		}
+
+		uint32_t selector_range_table = 0;
+		for (selector_range_table = 0; selector_range_table < NUM_ETC2_EAC_SELECTOR_RANGES; selector_range_table++)
+			if ((low_selector == s_etc2_eac_selector_ranges[selector_range_table].m_low) && (high_selector == s_etc2_eac_selector_ranges[selector_range_table].m_high))
+				break;
+		if (selector_range_table >= NUM_ETC2_EAC_SELECTOR_RANGES)
+			selector_range_table = 0;
+
+		const etc1_g_to_eac_conversion* pTable_entry = &s_etc1_g_to_etc2_r11[base_color.r + inten_table * 32][selector_range_table];
+
+		pDst_block->m_base = pTable_entry->m_base;
+		pDst_block->m_table = pTable_entry->m_table_mul >> 4;
+		pDst_block->m_multiplier = pTable_entry->m_table_mul & 15;
+
+		uint64_t selector_bits = 0;
+
+		for (uint32_t y = 0; y < 4; y++)
+		{
+			for (uint32_t x = 0; x < 4; x++)
+			{
+				uint32_t s = pSelector->get_selector(x, y);
+
+				uint32_t ds = (pTable_entry->m_trans >> (s * 3)) & 7;
+
+				const uint32_t dst_ofs = 45 - (y + x * 4) * 3;
+				selector_bits |= (static_cast<uint64_t>(ds) << dst_ofs);
+			}
+		}
+
+		pDst_block->set_selector_bits(selector_bits);
+	}
+#endif // BASISD_SUPPORT_ETC2_EAC_RG11
+
+// ASTC
+	struct etc1_to_astc_solution
+	{
+		uint8_t m_lo;
+		uint8_t m_hi;
+		uint16_t m_err;
+	};
+
+#if BASISD_SUPPORT_ASTC
+	static dxt_selector_range g_etc1_to_astc_selector_ranges[] =
+	{
+		{ 0, 3 },
+
+		{ 1, 3 },
+		{ 0, 2 },
+
+		{ 1, 2 },
+
+		{ 2, 3 },
+		{ 0, 1 },
+	};
+
+	const uint32_t NUM_ETC1_TO_ASTC_SELECTOR_RANGES = sizeof(g_etc1_to_astc_selector_ranges) / sizeof(g_etc1_to_astc_selector_ranges[0]);
+
+	static uint32_t g_etc1_to_astc_selector_range_index[4][4];
+
+	const uint32_t NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS = 10;
+	static const uint8_t g_etc1_to_astc_selector_mappings[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS][4] =
+	{
+		{ 0, 0, 1, 1 },
+		{ 0, 0, 1, 2 },
+		{ 0, 0, 1, 3 },
+		{ 0, 0, 2, 3 },
+		{ 0, 1, 1, 1 },
+		{ 0, 1, 2, 2 },
+		{ 0, 1, 2, 3 },
+		{ 0, 2, 3, 3 },
+		{ 1, 2, 2, 2 },
+		{ 1, 2, 3, 3 },
+	};
+
+	static const etc1_to_astc_solution g_etc1_to_astc[32 * 8 * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS * NUM_ETC1_TO_ASTC_SELECTOR_RANGES] = {
+#include "basisu_transcoder_tables_astc.inc"
+	};
+
+	// The best selector mapping to use given a base base+inten table and used selector range for converting grayscale data.
+	static uint8_t g_etc1_to_astc_best_grayscale_mapping[32][8][NUM_ETC1_TO_ASTC_SELECTOR_RANGES];
+	
+#if BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY
+	static const etc1_to_astc_solution g_etc1_to_astc_0_255[32 * 8 * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS * NUM_ETC1_TO_ASTC_SELECTOR_RANGES] = {
+#include "basisu_transcoder_tables_astc_0_255.inc"
+	};
+	static uint8_t g_etc1_to_astc_best_grayscale_mapping_0_255[32][8][NUM_ETC1_TO_ASTC_SELECTOR_RANGES];
+#endif
+
+	static uint32_t g_ise_to_unquant[48];
+
+#if BASISD_WRITE_NEW_ASTC_TABLES
+	static void create_etc1_to_astc_conversion_table_0_47()
+	{
+		FILE* pFile = nullptr;
+		fopen_s(&pFile, "basisu_transcoder_tables_astc.inc", "w");
+
+		uint32_t n = 0;
+
+		for (int inten = 0; inten < 8; inten++)
+		{
+			for (uint32_t g = 0; g < 32; g++)
+			{
+				color32 block_colors[4];
+				decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
+
+				for (uint32_t sr = 0; sr < NUM_ETC1_TO_ASTC_SELECTOR_RANGES; sr++)
+				{
+					const uint32_t low_selector = g_etc1_to_astc_selector_ranges[sr].m_low;
+					const uint32_t high_selector = g_etc1_to_astc_selector_ranges[sr].m_high;
+
+					uint32_t mapping_best_low[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
+					uint32_t mapping_best_high[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
+					uint64_t mapping_best_err[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
+					uint64_t highest_best_err = 0;
+
+					for (uint32_t m = 0; m < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; m++)
+					{
+						uint32_t best_lo = 0;
+						uint32_t best_hi = 0;
+						uint64_t best_err = UINT64_MAX;
+
+						for (uint32_t hi = 0; hi <= 47; hi++)
+						{
+							for (uint32_t lo = 0; lo <= 47; lo++)
+							{
+								uint32_t colors[4];
+
+								for (uint32_t s = 0; s < 4; s++)
+								{
+									uint32_t s_scaled = s | (s << 2) | (s << 4);
+									if (s_scaled > 32)
+										s_scaled++;
+
+									uint32_t c0 = g_ise_to_unquant[lo] | (g_ise_to_unquant[lo] << 8);
+									uint32_t c1 = g_ise_to_unquant[hi] | (g_ise_to_unquant[hi] << 8);
+									colors[s] = ((c0 * (64 - s_scaled) + c1 * s_scaled + 32) / 64) >> 8;
+								}
+
+								uint64_t total_err = 0;
+
+								for (uint32_t s = low_selector; s <= high_selector; s++)
+								{
+									int err = block_colors[s].g - colors[g_etc1_to_astc_selector_mappings[m][s]];
+
+									int err_scale = 1;
+									// Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor 
+									// the low/high selectors which are clamping to either 0 or 255.
+									if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
+										err_scale = 8;
+
+									total_err += (err * err) * err_scale;
+								}
+
+								if (total_err < best_err)
+								{
+									best_err = total_err;
+									best_lo = lo;
+									best_hi = hi;
+								}
+							}
+						}
+
+						mapping_best_low[m] = best_lo;
+						mapping_best_high[m] = best_hi;
+						mapping_best_err[m] = best_err;
+						highest_best_err = basisu::maximum(highest_best_err, best_err);
+												
+					} // m
+
+					for (uint32_t m = 0; m < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; m++)
+					{
+						uint64_t err = mapping_best_err[m];
+
+						err = basisu::minimum<uint64_t>(err, 0xFFFF);
+
+						fprintf(pFile, "{%u,%u,%u},", mapping_best_low[m], mapping_best_high[m], (uint32_t)err);
+
+						n++;
+						if ((n & 31) == 31)
+							fprintf(pFile, "\n");
+					} // m
+
+				} // sr
+			} // g
+		} // inten
+
+		fclose(pFile);
+	}
+
+	static void create_etc1_to_astc_conversion_table_0_255()
+	{
+		FILE* pFile = nullptr;
+		fopen_s(&pFile, "basisu_transcoder_tables_astc_0_255.inc", "w");
+
+		uint32_t n = 0;
+
+		for (int inten = 0; inten < 8; inten++)
+		{
+			for (uint32_t g = 0; g < 32; g++)
+			{
+				color32 block_colors[4];
+				decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
+
+				for (uint32_t sr = 0; sr < NUM_ETC1_TO_ASTC_SELECTOR_RANGES; sr++)
+				{
+					const uint32_t low_selector = g_etc1_to_astc_selector_ranges[sr].m_low;
+					const uint32_t high_selector = g_etc1_to_astc_selector_ranges[sr].m_high;
+
+					uint32_t mapping_best_low[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
+					uint32_t mapping_best_high[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
+					uint64_t mapping_best_err[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
+					uint64_t highest_best_err = 0;
+
+					for (uint32_t m = 0; m < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; m++)
+					{
+						uint32_t best_lo = 0;
+						uint32_t best_hi = 0;
+						uint64_t best_err = UINT64_MAX;
+
+						for (uint32_t hi = 0; hi <= 255; hi++)
+						{
+							for (uint32_t lo = 0; lo <= 255; lo++)
+							{
+								uint32_t colors[4];
+
+								for (uint32_t s = 0; s < 4; s++)
+								{
+									uint32_t s_scaled = s | (s << 2) | (s << 4);
+									if (s_scaled > 32)
+										s_scaled++;
+
+									uint32_t c0 = lo | (lo << 8);
+									uint32_t c1 = hi | (hi << 8);
+									colors[s] = ((c0 * (64 - s_scaled) + c1 * s_scaled + 32) / 64) >> 8;
+								}
+
+								uint64_t total_err = 0;
+
+								for (uint32_t s = low_selector; s <= high_selector; s++)
+								{
+									int err = block_colors[s].g - colors[g_etc1_to_astc_selector_mappings[m][s]];
+
+									// Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor 
+									// the low/high selectors which are clamping to either 0 or 255.
+									int err_scale = 1;
+									if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
+										err_scale = 8;
+
+									total_err += (err * err) * err_scale;
+								}
+
+								if (total_err < best_err)
+								{
+									best_err = total_err;
+									best_lo = lo;
+									best_hi = hi;
+								}
+							}
+						}
+
+						mapping_best_low[m] = best_lo;
+						mapping_best_high[m] = best_hi;
+						mapping_best_err[m] = best_err;
+						highest_best_err = basisu::maximum(highest_best_err, best_err);
+					} // m
+
+					for (uint32_t m = 0; m < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; m++)
+					{
+						uint64_t err = mapping_best_err[m];
+
+						err = basisu::minimum<uint64_t>(err, 0xFFFF);
+						
+						fprintf(pFile, "{%u,%u,%u},", mapping_best_low[m], mapping_best_high[m], (uint32_t)err);
+						
+						n++;
+						if ((n & 31) == 31)
+							fprintf(pFile, "\n");
+					} // m
+
+				} // sr
+			} // g
+		} // inten
+
+		fclose(pFile);
+	}
+#endif
+
+#endif
+
+#if BASISD_SUPPORT_UASTC || BASISD_SUPPORT_ASTC
+	// Table encodes 5 trits to 8 output bits. 3^5 entries.
+	// Inverse of the trit bit manipulation process in https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding
+	static const uint8_t g_astc_trit_encode[243] = { 0, 1, 2, 4, 5, 6, 8, 9, 10, 16, 17, 18, 20, 21, 22, 24, 25, 26, 3, 7, 11, 19, 23, 27, 12, 13, 14, 32, 33, 34, 36, 37, 38, 40, 41, 42, 48, 49, 50, 52, 53, 54, 56, 57, 58, 35, 39,
+		43, 51, 55, 59, 44, 45, 46, 64, 65, 66, 68, 69, 70, 72, 73, 74, 80, 81, 82, 84, 85, 86, 88, 89, 90, 67, 71, 75, 83, 87, 91, 76, 77, 78, 128, 129, 130, 132, 133, 134, 136, 137, 138, 144, 145, 146, 148, 149, 150, 152, 153, 154,
+		131, 135, 139, 147, 151, 155, 140, 141, 142, 160, 161, 162, 164, 165, 166, 168, 169, 170, 176, 177, 178, 180, 181, 182, 184, 185, 186, 163, 167, 171, 179, 183, 187, 172, 173, 174, 192, 193, 194, 196, 197, 198, 200, 201, 202,
+		208, 209, 210, 212, 213, 214, 216, 217, 218, 195, 199, 203, 211, 215, 219, 204, 205, 206, 96, 97, 98, 100, 101, 102, 104, 105, 106, 112, 113, 114, 116, 117, 118, 120, 121, 122, 99, 103, 107, 115, 119, 123, 108, 109, 110, 224,
+		225, 226, 228, 229, 230, 232, 233, 234, 240, 241, 242, 244, 245, 246, 248, 249, 250, 227, 231, 235, 243, 247, 251, 236, 237, 238, 28, 29, 30, 60, 61, 62, 92, 93, 94, 156, 157, 158, 188, 189, 190, 220, 221, 222, 31, 63, 95, 159,
+		191, 223, 124, 125, 126 };
+
+	// Extracts bits [low,high]
+	static inline uint32_t astc_extract_bits(uint32_t bits, int low, int high)
+	{
+		return (bits >> low) & ((1 << (high - low + 1)) - 1);
+	}
+
+	// Writes bits to output in an endian safe way
+	static inline void astc_set_bits(uint32_t* pOutput, int& bit_pos, uint32_t value, uint32_t total_bits)
+	{
+		uint8_t* pBytes = reinterpret_cast<uint8_t*>(pOutput);
+
+		while (total_bits)
+		{
+			const uint32_t bits_to_write = basisu::minimum<int>(total_bits, 8 - (bit_pos & 7));
+
+			pBytes[bit_pos >> 3] |= static_cast<uint8_t>(value << (bit_pos & 7));
+
+			bit_pos += bits_to_write;
+			total_bits -= bits_to_write;
+			value >>= bits_to_write;
+		}
+	}
+
+	// Encodes 5 values to output, usable for any range that uses trits and bits
+	static void astc_encode_trits(uint32_t* pOutput, const uint8_t* pValues, int& bit_pos, int n)
+	{
+		// First extract the trits and the bits from the 5 input values
+		int trits = 0, bits[5];
+		const uint32_t bit_mask = (1 << n) - 1;
+		for (int i = 0; i < 5; i++)
+		{
+			static const int s_muls[5] = { 1, 3, 9, 27, 81 };
+
+			const int t = pValues[i] >> n;
+
+			trits += t * s_muls[i];
+			bits[i] = pValues[i] & bit_mask;
+		}
+
+		// Encode the trits, by inverting the bit manipulations done by the decoder, converting 5 trits into 8-bits.
+		// See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding
+
+		assert(trits < 243);
+		const int T = g_astc_trit_encode[trits];
+
+		// Now interleave the 8 encoded trit bits with the bits to form the encoded output. See table 94.
+		astc_set_bits(pOutput, bit_pos, bits[0] | (astc_extract_bits(T, 0, 1) << n) | (bits[1] << (2 + n)), n * 2 + 2);
+
+		astc_set_bits(pOutput, bit_pos, astc_extract_bits(T, 2, 3) | (bits[2] << 2) | (astc_extract_bits(T, 4, 4) << (2 + n)) | (bits[3] << (3 + n)) | (astc_extract_bits(T, 5, 6) << (3 + n * 2)) |
+			(bits[4] << (5 + n * 2)) | (astc_extract_bits(T, 7, 7) << (5 + n * 3)), n * 3 + 6);
+	}
+#endif // #if BASISD_SUPPORT_UASTC || BASISD_SUPPORT_ASTC
+
+#if BASISD_SUPPORT_ASTC
+	struct astc_block_params
+	{
+		// 2 groups of 5, but only a max of 8 are used (RRGGBBAA00)
+		uint8_t m_endpoints[10]; 
+		uint8_t m_weights[32];
+	};
+	
+	// Packs a single format ASTC block using Color Endpoint Mode 12 (LDR RGBA direct), endpoint BISE range 13, 2-bit weights (range 2). 
+	// We're always going to output blocks containing alpha, even if the input doesn't have alpha, for simplicity.
+	// Each block always has 4x4 weights, uses range 13 BISE encoding on the endpoints (0-47), and each weight ranges from 0-3. This encoding should be roughly equal in quality vs. BC1 for color.
+	// 8 total endpoints, stored as RGBA LH LH LH LH order, each ranging from 0-47. 
+	// Note the input [0,47] endpoint values are not linear - they are encoded as outlined in the ASTC spec:
+	// https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-endpoint-unquantization
+	// 32 total weights, stored as 16 CA CA, each ranging from 0-3.
+	static void astc_pack_block_cem_12_weight_range2(uint32_t *pOutput, const astc_block_params* pBlock)
+	{
+		uint8_t* pBytes = reinterpret_cast<uint8_t*>(pOutput);
+
+		// Write constant block mode, color component selector, number of partitions, color endpoint mode
+		// https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#_block_mode
+		pBytes[0] = 0x42; pBytes[1] = 0x84; pBytes[2] = 0x01; pBytes[3] = 0x00;
+		pBytes[4] = 0x00; pBytes[5] = 0x00; pBytes[6] = 0x00; pBytes[7] = 0xc0;
+
+		pOutput[2] = 0;
+		pOutput[3] = 0;
+
+		// Pack 8 endpoints (each ranging between [0,47]) using BISE starting at bit 17
+		int bit_pos = 17;
+		astc_encode_trits(pOutput, pBlock->m_endpoints, bit_pos, 4);
+		astc_encode_trits(pOutput, pBlock->m_endpoints + 5, bit_pos, 4);
+
+		// Pack 32 2-bit weights, which are stored from the top down into the block in opposite bit order.
+				
+		for (uint32_t i = 0; i < 32; i++)
+		{
+			static const uint8_t s_reverse_bits[4] = { 0, 2, 1, 3 };
+			const uint32_t ofs = 126 - (i * 2);
+			pBytes[ofs >> 3] |= (s_reverse_bits[pBlock->m_weights[i]] << (ofs & 7));
+		}
+	}
+
+	// CEM mode 12 (LDR RGBA Direct), 8-bit endpoints, 1-bit weights 
+	// This ASTC mode is basically block truncation coding (BTC) using 1-bit weights and 8-bit/component endpoints - very convenient.
+	static void astc_pack_block_cem_12_weight_range0(uint32_t* pOutput, const astc_block_params* pBlock)
+	{
+		uint8_t* pBytes = reinterpret_cast<uint8_t*>(pOutput);
+
+		// Write constant block mode, color component selector, number of partitions, color endpoint mode
+		// https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#_block_mode
+		pBytes[0] = 0x41; pBytes[1] = 0x84; pBytes[2] = 0x01; pBytes[3] = 0x00;
+		pOutput[1] = 0;
+		pBytes[8] = 0x00; pBytes[9] = 0x00; pBytes[10] = 0x00; pBytes[11] = 0xc0;
+		pOutput[3] = 0;
+
+		// Pack 8 endpoints (each ranging between [0,255]) as 8-bits starting at bit 17
+		int bit_pos = 17;
+		for (uint32_t i = 0; i < 8; i++)
+			astc_set_bits(pOutput, bit_pos, pBlock->m_endpoints[i], 8);
+
+		// Pack 32 1-bit weights, which are stored from the top down into the block in opposite bit order.
+		for (uint32_t i = 0; i < 32; i++)
+		{
+			const uint32_t ofs = 127 - i;
+			pBytes[ofs >> 3] |= (pBlock->m_weights[i] << (ofs & 7));
+		}
+	}
+
+#if BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY
+	// Optional 8-bit endpoint packing functions.
+
+	// CEM mode 4 (LDR Luminance+Alpha Direct), 8-bit endpoints, 2 bit weights
+	static void astc_pack_block_cem_4_weight_range2(uint32_t* pOutput, const astc_block_params* pBlock)
+	{
+		uint8_t* pBytes = reinterpret_cast<uint8_t*>(pOutput);
+
+		// Write constant block mode, color component selector, number of partitions, color endpoint mode
+		// https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#_block_mode
+		pBytes[0] = 0x42; pBytes[1] = 0x84; pBytes[2] = 0x00; pBytes[3] = 0x00;
+		pBytes[4] = 0x00; pBytes[5] = 0x00; pBytes[6] = 0x00; pBytes[7] = 0xc0;
+		
+		pOutput[2] = 0;
+		pOutput[3] = 0;
+
+		// Pack 4 endpoints (each ranging between [0,255]) as 8-bits starting at bit 17
+		int bit_pos = 17;
+		for (uint32_t i = 0; i < 4; i++)
+			astc_set_bits(pOutput, bit_pos, pBlock->m_endpoints[i], 8);
+
+		// Pack 32 2-bit weights, which are stored from the top down into the block in opposite bit order.
+		for (uint32_t i = 0; i < 32; i++)
+		{
+			static const uint8_t s_reverse_bits[4] = { 0, 2, 1, 3 };
+			const uint32_t ofs = 126 - (i * 2);
+			pBytes[ofs >> 3] |= (s_reverse_bits[pBlock->m_weights[i]] << (ofs & 7));
+		}
+	}
+
+	// CEM mode 8 (LDR RGB Direct), 8-bit endpoints, 2 bit weights
+	static void astc_pack_block_cem_8_weight_range2(uint32_t* pOutput, const astc_block_params* pBlock)
+	{
+		uint8_t* pBytes = reinterpret_cast<uint8_t*>(pOutput);
+
+		// Write constant block mode, color component selector, number of partitions, color endpoint mode
+		// https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#_block_mode
+		pBytes[0] = 0x42; pBytes[1] = 0x00; pBytes[2] = 0x01; pBytes[3] = 0x00;
+		
+		pOutput[1] = 0;
+		pOutput[2] = 0;
+		pOutput[3] = 0;
+
+		// Pack 6 endpoints (each ranging between [0,255]) as 8-bits starting at bit 17
+		int bit_pos = 17;
+		for (uint32_t i = 0; i < 6; i++)
+			astc_set_bits(pOutput, bit_pos, pBlock->m_endpoints[i], 8);
+
+		// Pack 16 2-bit weights, which are stored from the top down into the block in opposite bit order.
+		for (uint32_t i = 0; i < 16; i++)
+		{
+			static const uint8_t s_reverse_bits[4] = { 0, 2, 1, 3 };
+			const uint32_t ofs = 126 - (i * 2);
+			pBytes[ofs >> 3] |= (s_reverse_bits[pBlock->m_weights[i]] << (ofs & 7));
+		}
+	}
+#endif
+
+	// Optimal quantized [0,47] entry to use given [0,255] input
+	static uint8_t g_astc_single_color_encoding_0[256];
+
+	// Optimal quantized [0,47] low/high values given [0,255] input assuming a selector of 1
+	static struct
+	{
+		uint8_t m_lo, m_hi;
+	} g_astc_single_color_encoding_1[256];
+		
+	static void transcoder_init_astc()
+	{
+		for (uint32_t base_color = 0; base_color < 32; base_color++)
+		{
+			for (uint32_t inten_table = 0; inten_table < 8; inten_table++)
+			{
+				for (uint32_t range_index = 0; range_index < NUM_ETC1_TO_ASTC_SELECTOR_RANGES; range_index++)
+				{
+					const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc[(inten_table * 32 + base_color) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + range_index * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
+
+					uint32_t best_mapping = 0;
+					uint32_t best_err = UINT32_MAX;
+					for (uint32_t mapping_index = 0; mapping_index < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; mapping_index++)
+					{
+						if (pTable_g[mapping_index].m_err < best_err)
+						{
+							best_err = pTable_g[mapping_index].m_err;
+							best_mapping = mapping_index;
+						}
+					}
+
+					g_etc1_to_astc_best_grayscale_mapping[base_color][inten_table][range_index] = static_cast<uint8_t>(best_mapping);
+				}
+			}
+		}
+
+#if BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY
+		for (uint32_t base_color = 0; base_color < 32; base_color++)
+		{
+			for (uint32_t inten_table = 0; inten_table < 8; inten_table++)
+			{
+				for (uint32_t range_index = 0; range_index < NUM_ETC1_TO_ASTC_SELECTOR_RANGES; range_index++)
+				{
+					const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc_0_255[(inten_table * 32 + base_color) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + range_index * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
+
+					uint32_t best_mapping = 0;
+					uint32_t best_err = UINT32_MAX;
+					for (uint32_t mapping_index = 0; mapping_index < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; mapping_index++)
+					{
+						if (pTable_g[mapping_index].m_err < best_err)
+						{
+							best_err = pTable_g[mapping_index].m_err;
+							best_mapping = mapping_index;
+						}
+					}
+
+					g_etc1_to_astc_best_grayscale_mapping_0_255[base_color][inten_table][range_index] = static_cast<uint8_t>(best_mapping);
+				}
+			}
+		}
+#endif
+
+		for (uint32_t i = 0; i < NUM_ETC1_TO_ASTC_SELECTOR_RANGES; i++)
+		{
+			uint32_t l = g_etc1_to_astc_selector_ranges[i].m_low;
+			uint32_t h = g_etc1_to_astc_selector_ranges[i].m_high;
+			g_etc1_to_astc_selector_range_index[l][h] = i;
+		}
+
+		// Endpoint dequantization, see:
+		// https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-endpoint-unquantization
+		for (uint32_t trit = 0; trit < 3; trit++)
+		{
+			for (uint32_t bit = 0; bit < 16; bit++)
+			{
+				const uint32_t A = (bit & 1) ? 511 : 0;
+				const uint32_t B = (bit >> 1) | ((bit >> 1) << 6);
+				const uint32_t C = 22;
+				const uint32_t D = trit;
+
+				uint32_t unq = D * C + B;
+				unq = unq ^ A;
+				unq = (A & 0x80) | (unq >> 2);
+
+				g_ise_to_unquant[bit | (trit << 4)] = unq;
+			}
+		}
+				
+		// Compute table used for optimal single color encoding.
+		for (int i = 0; i < 256; i++)
+		{
+			int lowest_e = INT_MAX;
+
+			for (int lo = 0; lo < 48; lo++)
+			{
+				for (int hi = 0; hi < 48; hi++)
+				{
+					const int lo_v = g_ise_to_unquant[lo];
+					const int hi_v = g_ise_to_unquant[hi];
+
+					int l = lo_v | (lo_v << 8);
+					int h = hi_v | (hi_v << 8);
+										
+					int v = ((l * (64 - 21) + (h * 21) + 32) / 64) >> 8;
+					
+					int e = abs(v - i);
+
+					if (e < lowest_e)
+					{
+						g_astc_single_color_encoding_1[i].m_hi = static_cast<uint8_t>(hi);
+						g_astc_single_color_encoding_1[i].m_lo = static_cast<uint8_t>(lo);
+
+						lowest_e = e;
+					}
+
+				} // hi
+			} // lo
+		}
+
+		for (int i = 0; i < 256; i++)
+		{
+			int lowest_e = INT_MAX;
+
+			for (int lo = 0; lo < 48; lo++)
+			{
+				const int lo_v = g_ise_to_unquant[lo];
+				
+				int e = abs(lo_v - i);
+
+				if (e < lowest_e)
+				{
+					g_astc_single_color_encoding_0[i] = static_cast<uint8_t>(lo);
+
+					lowest_e = e;
+				}
+			} // lo
+		}
+	}
+
+	// Converts opaque or color+alpha ETC1S block to ASTC 4x4.
+	// This function tries to use the best ASTC mode given the block's actual contents.
+	static void convert_etc1s_to_astc_4x4(void* pDst_block, const endpoint* pEndpoints, const selector* pSelector, 
+		bool transcode_alpha, const endpoint *pEndpoint_codebook, const selector *pSelector_codebook)
+	{
+		astc_block_params blk;
+
+		blk.m_endpoints[8] = 0;
+		blk.m_endpoints[9] = 0;
+
+		int constant_alpha_val = 255;
+		int num_unique_alpha_selectors = 1;
+
+		if (transcode_alpha)
+		{
+			const selector& alpha_selectors = pSelector_codebook[((uint16_t*)pDst_block)[1]];
+
+			num_unique_alpha_selectors = alpha_selectors.m_num_unique_selectors;
+
+			if (num_unique_alpha_selectors == 1)
+			{
+				const endpoint& alpha_endpoint = pEndpoint_codebook[((uint16_t*)pDst_block)[0]];
+
+				const color32& alpha_base_color = alpha_endpoint.m_color5;
+				const uint32_t alpha_inten_table = alpha_endpoint.m_inten5;
+
+				int alpha_block_colors[4];
+				decoder_etc_block::get_block_colors5_g(alpha_block_colors, alpha_base_color, alpha_inten_table);
+
+				constant_alpha_val = alpha_block_colors[alpha_selectors.m_lo_selector];
+			}
+		}
+
+		const color32& base_color = pEndpoints->m_color5;
+		const uint32_t inten_table = pEndpoints->m_inten5;
+
+		const uint32_t low_selector = pSelector->m_lo_selector;
+		const uint32_t high_selector = pSelector->m_hi_selector;
+
+		// Handle solid color or BTC blocks, which can always be encoded from ETC1S to ASTC losslessly.
+		if ((pSelector->m_num_unique_selectors == 1) && (num_unique_alpha_selectors == 1))
+		{
+			// Both color and alpha are constant, write a solid color block and exit.
+			// See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-void-extent-blocks
+			uint32_t r, g, b;
+			decoder_etc_block::get_block_color5(base_color, inten_table, low_selector, r, g, b);
+						
+			uint32_t* pOutput = static_cast<uint32_t*>(pDst_block);
+			uint8_t* pBytes = reinterpret_cast<uint8_t*>(pDst_block);
+
+			pBytes[0] = 0xfc; pBytes[1] = 0xfd; pBytes[2] = 0xff; pBytes[3] = 0xff;
+
+			pOutput[1] = 0xffffffff;
+			pOutput[2] = 0;
+			pOutput[3] = 0;
+
+			int bit_pos = 64;
+			astc_set_bits(pOutput, bit_pos, r | (r << 8), 16);
+			astc_set_bits(pOutput, bit_pos, g | (g << 8), 16);
+			astc_set_bits(pOutput, bit_pos, b | (b << 8), 16);
+			astc_set_bits(pOutput, bit_pos, constant_alpha_val | (constant_alpha_val << 8), 16);
+
+			return;
+		}
+		else if ((pSelector->m_num_unique_selectors <= 2) && (num_unique_alpha_selectors <= 2))
+		{
+			// Both color and alpha use <= 2 unique selectors each. 
+			// Use block truncation coding, which is lossless with ASTC (8-bit endpoints, 1-bit weights).
+			color32 block_colors[4];
+			decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table);
+
+			blk.m_endpoints[0] = block_colors[low_selector].r;
+			blk.m_endpoints[2] = block_colors[low_selector].g;
+			blk.m_endpoints[4] = block_colors[low_selector].b;
+
+			blk.m_endpoints[1] = block_colors[high_selector].r;
+			blk.m_endpoints[3] = block_colors[high_selector].g;
+			blk.m_endpoints[5] = block_colors[high_selector].b;
+
+			int s0 = blk.m_endpoints[0] + blk.m_endpoints[2] + blk.m_endpoints[4];
+			int s1 = blk.m_endpoints[1] + blk.m_endpoints[3] + blk.m_endpoints[5];
+			bool invert = false;
+			if (s1 < s0)
+			{
+				std::swap(blk.m_endpoints[0], blk.m_endpoints[1]);
+				std::swap(blk.m_endpoints[2], blk.m_endpoints[3]);
+				std::swap(blk.m_endpoints[4], blk.m_endpoints[5]);
+				invert = true;
+			}
+
+			if (transcode_alpha)
+			{
+				const endpoint& alpha_endpoint = pEndpoint_codebook[((uint16_t*)pDst_block)[0]];
+				const selector& alpha_selectors = pSelector_codebook[((uint16_t*)pDst_block)[1]];
+
+				const color32& alpha_base_color = alpha_endpoint.m_color5;
+				const uint32_t alpha_inten_table = alpha_endpoint.m_inten5;
+
+				const uint32_t alpha_low_selector = alpha_selectors.m_lo_selector;
+				const uint32_t alpha_high_selector = alpha_selectors.m_hi_selector;
+
+				int alpha_block_colors[4];
+				decoder_etc_block::get_block_colors5_g(alpha_block_colors, alpha_base_color, alpha_inten_table);
+
+				blk.m_endpoints[6] = static_cast<uint8_t>(alpha_block_colors[alpha_low_selector]);
+				blk.m_endpoints[7] = static_cast<uint8_t>(alpha_block_colors[alpha_high_selector]);
+
+				for (uint32_t y = 0; y < 4; y++)
+				{
+					for (uint32_t x = 0; x < 4; x++)
+					{
+						uint32_t s = alpha_selectors.get_selector(x, y);
+						s = (s == alpha_high_selector) ? 1 : 0;
+												
+						blk.m_weights[(x + y * 4) * 2 + 1] = static_cast<uint8_t>(s);
+					} // x
+				} // y
+			}
+			else
+			{
+				blk.m_endpoints[6] = 255;
+				blk.m_endpoints[7] = 255;
+
+				for (uint32_t i = 0; i < 16; i++)
+					blk.m_weights[i * 2 + 1] = 0;
+			}
+
+			for (uint32_t y = 0; y < 4; y++)
+			{
+				for (uint32_t x = 0; x < 4; x++)
+				{
+					uint32_t s = pSelector->get_selector(x, y);
+
+					s = (s == high_selector) ? 1 : 0;
+
+					if (invert)
+						s = 1 - s;
+
+					blk.m_weights[(x + y * 4) * 2] = static_cast<uint8_t>(s);
+				} // x
+			} // y
+
+			astc_pack_block_cem_12_weight_range0(reinterpret_cast<uint32_t*>(pDst_block), &blk);
+
+			return;
+		}
+				
+		// Either alpha and/or color use > 2 unique selectors each, so we must do something more complex.
+				
+#if BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY
+		// The optional higher quality modes use 8-bits endpoints vs. [0,47] endpoints.
+		
+		// If the block's base color is grayscale, all pixels are grayscale, so encode the block as Luminance+Alpha.
+		if ((base_color.r == base_color.g) && (base_color.r == base_color.b))
+		{
+			if (transcode_alpha)
+			{
+				const endpoint& alpha_endpoint = pEndpoint_codebook[((uint16_t*)pDst_block)[0]];
+				const selector& alpha_selectors = pSelector_codebook[((uint16_t*)pDst_block)[1]];
+
+				const color32& alpha_base_color = alpha_endpoint.m_color5;
+				const uint32_t alpha_inten_table = alpha_endpoint.m_inten5;
+
+				const uint32_t alpha_low_selector = alpha_selectors.m_lo_selector;
+				const uint32_t alpha_high_selector = alpha_selectors.m_hi_selector;
+
+				if (num_unique_alpha_selectors <= 2)
+				{
+					// Simple alpha block with only 1 or 2 unique values, so use BTC. This is lossless.
+					int alpha_block_colors[4];
+					decoder_etc_block::get_block_colors5_g(alpha_block_colors, alpha_base_color, alpha_inten_table);
+
+					blk.m_endpoints[2] = static_cast<uint8_t>(alpha_block_colors[alpha_low_selector]);
+					blk.m_endpoints[3] = static_cast<uint8_t>(alpha_block_colors[alpha_high_selector]);
+
+					for (uint32_t i = 0; i < 16; i++)
+					{
+						uint32_t s = alpha_selectors.get_selector(i & 3, i >> 2);
+						blk.m_weights[i * 2 + 1] = (s == alpha_high_selector) ? 3 : 0;
+					}
+				}
+				else
+				{
+					// Convert ETC1S alpha
+					const uint32_t alpha_selector_range_table = g_etc1_to_astc_selector_range_index[alpha_low_selector][alpha_high_selector];
+										
+					//[32][8][RANGES][MAPPING]
+					const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc_0_255[(alpha_inten_table * 32 + alpha_base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + alpha_selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
+
+					const uint32_t best_mapping = g_etc1_to_astc_best_grayscale_mapping_0_255[alpha_base_color.g][alpha_inten_table][alpha_selector_range_table];
+
+					blk.m_endpoints[2] = pTable_g[best_mapping].m_lo;
+					blk.m_endpoints[3] = pTable_g[best_mapping].m_hi;
+										
+					const uint8_t* pSelectors_xlat = &g_etc1_to_astc_selector_mappings[best_mapping][0];
+
+					for (uint32_t y = 0; y < 4; y++)
+					{
+						for (uint32_t x = 0; x < 4; x++)
+						{
+							uint32_t s = alpha_selectors.get_selector(x, y);
+							uint32_t as = pSelectors_xlat[s];
+
+							blk.m_weights[(x + y * 4) * 2 + 1] = static_cast<uint8_t>(as);
+						} // x
+					} // y
+				}
+			}
+			else
+			{
+				// No alpha slice - set output alpha to all 255's
+				blk.m_endpoints[2] = 255;
+				blk.m_endpoints[3] = 255;
+
+				for (uint32_t i = 0; i < 16; i++)
+					blk.m_weights[i * 2 + 1] = 0;
+			}
+
+			if (pSelector->m_num_unique_selectors <= 2)
+			{
+				// Simple color block with only 1 or 2 unique values, so use BTC. This is lossless.
+				int block_colors[4];
+				decoder_etc_block::get_block_colors5_g(block_colors, base_color, inten_table);
+
+				blk.m_endpoints[0] = static_cast<uint8_t>(block_colors[low_selector]);
+				blk.m_endpoints[1] = static_cast<uint8_t>(block_colors[high_selector]);
+
+				for (uint32_t i = 0; i < 16; i++)
+				{
+					uint32_t s = pSelector->get_selector(i & 3, i >> 2);
+					blk.m_weights[i * 2] = (s == high_selector) ? 3 : 0;
+				}
+			}
+			else
+			{
+				// Convert ETC1S alpha
+				const uint32_t selector_range_table = g_etc1_to_astc_selector_range_index[low_selector][high_selector];
+								
+				//[32][8][RANGES][MAPPING]
+				const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc_0_255[(inten_table * 32 + base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
+								
+				const uint32_t best_mapping = g_etc1_to_astc_best_grayscale_mapping_0_255[base_color.g][inten_table][selector_range_table];
+
+				blk.m_endpoints[0] = pTable_g[best_mapping].m_lo;
+				blk.m_endpoints[1] = pTable_g[best_mapping].m_hi;
+
+				const uint8_t* pSelectors_xlat = &g_etc1_to_astc_selector_mappings[best_mapping][0];
+
+				for (uint32_t y = 0; y < 4; y++)
+				{
+					for (uint32_t x = 0; x < 4; x++)
+					{
+						uint32_t s = pSelector->get_selector(x, y);
+						uint32_t as = pSelectors_xlat[s];
+
+						blk.m_weights[(x + y * 4) * 2] = static_cast<uint8_t>(as);
+					} // x
+				} // y
+			}
+
+			astc_pack_block_cem_4_weight_range2(reinterpret_cast<uint32_t*>(pDst_block), &blk);
+			return;
+		}
+
+		// The block isn't grayscale and it uses > 2 unique selectors for opaque and/or alpha.
+		// Check for fully opaque blocks, if so use 8-bit endpoints for slightly higher opaque quality (higher than BC1, but lower than BC7 mode 6 opaque).
+		if ((num_unique_alpha_selectors == 1) && (constant_alpha_val == 255))
+		{
+			// Convert ETC1S color
+			const uint32_t selector_range_table = g_etc1_to_astc_selector_range_index[low_selector][high_selector];
+
+			//[32][8][RANGES][MAPPING]
+			const etc1_to_astc_solution* pTable_r = &g_etc1_to_astc_0_255[(inten_table * 32 + base_color.r) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
+			const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc_0_255[(inten_table * 32 + base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
+			const etc1_to_astc_solution* pTable_b = &g_etc1_to_astc_0_255[(inten_table * 32 + base_color.b) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
+
+			uint32_t best_err = UINT_MAX;
+			uint32_t best_mapping = 0;
+
+			assert(NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS == 10);
+#define DO_ITER(m) { uint32_t total_err = pTable_r[m].m_err + pTable_g[m].m_err + pTable_b[m].m_err; if (total_err < best_err) { best_err = total_err; best_mapping = m; } }
+			DO_ITER(0); DO_ITER(1); DO_ITER(2); DO_ITER(3); DO_ITER(4);
+			DO_ITER(5); DO_ITER(6); DO_ITER(7); DO_ITER(8); DO_ITER(9);
+#undef DO_ITER
+
+			blk.m_endpoints[0] = pTable_r[best_mapping].m_lo;
+			blk.m_endpoints[1] = pTable_r[best_mapping].m_hi;
+
+			blk.m_endpoints[2] = pTable_g[best_mapping].m_lo;
+			blk.m_endpoints[3] = pTable_g[best_mapping].m_hi;
+
+			blk.m_endpoints[4] = pTable_b[best_mapping].m_lo;
+			blk.m_endpoints[5] = pTable_b[best_mapping].m_hi;
+
+			int s0 = blk.m_endpoints[0] + blk.m_endpoints[2] + blk.m_endpoints[4];
+			int s1 = blk.m_endpoints[1] + blk.m_endpoints[3] + blk.m_endpoints[5];
+			bool invert = false;
+
+			if (s1 < s0)
+			{
+				std::swap(blk.m_endpoints[0], blk.m_endpoints[1]);
+				std::swap(blk.m_endpoints[2], blk.m_endpoints[3]);
+				std::swap(blk.m_endpoints[4], blk.m_endpoints[5]);
+				invert = true;
+			}
+
+			const uint8_t* pSelectors_xlat = &g_etc1_to_astc_selector_mappings[best_mapping][0];
+
+			for (uint32_t y = 0; y < 4; y++)
+			{
+				for (uint32_t x = 0; x < 4; x++)
+				{
+					uint32_t s = pSelector->get_selector(x, y);
+					uint32_t as = pSelectors_xlat[s];
+					if (invert)
+						as = 3 - as;
+
+					blk.m_weights[x + y * 4] = static_cast<uint8_t>(as);
+				} // x
+			} // y
+
+			// Now pack to ASTC
+			astc_pack_block_cem_8_weight_range2(reinterpret_cast<uint32_t*>(pDst_block), &blk);
+			return;
+		}
+#endif //#if BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY
+
+		// Nothing else worked, so fall back to CEM Mode 12 (LDR RGBA Direct), [0,47] endpoints, weight range 2 (2-bit weights), dual planes.
+		// This mode can handle everything, but at slightly less quality than BC1.
+		if (transcode_alpha)
+		{
+			const endpoint& alpha_endpoint = pEndpoint_codebook[((uint16_t*)pDst_block)[0]];
+			const selector& alpha_selectors = pSelector_codebook[((uint16_t*)pDst_block)[1]];
+
+			const color32& alpha_base_color = alpha_endpoint.m_color5;
+			const uint32_t alpha_inten_table = alpha_endpoint.m_inten5;
+
+			const uint32_t alpha_low_selector = alpha_selectors.m_lo_selector;
+			const uint32_t alpha_high_selector = alpha_selectors.m_hi_selector;
+
+			if (alpha_low_selector == alpha_high_selector)
+			{
+				// Solid alpha block - use precomputed tables.
+				int alpha_block_colors[4];
+				decoder_etc_block::get_block_colors5_g(alpha_block_colors, alpha_base_color, alpha_inten_table);
+
+				const uint32_t g = alpha_block_colors[alpha_low_selector];
+
+				blk.m_endpoints[6] = g_astc_single_color_encoding_1[g].m_lo;
+				blk.m_endpoints[7] = g_astc_single_color_encoding_1[g].m_hi;
+
+				for (uint32_t i = 0; i < 16; i++)
+					blk.m_weights[i * 2 + 1] = 1;
+			}
+			else if ((alpha_inten_table >= 7) && (alpha_selectors.m_num_unique_selectors == 2) && (alpha_low_selector == 0) && (alpha_high_selector == 3))
+			{
+				// Handle outlier case where only the two outer colors are used with inten table 7.
+				color32 alpha_block_colors[4];
+
+				decoder_etc_block::get_block_colors5(alpha_block_colors, alpha_base_color, alpha_inten_table);
+
+				const uint32_t g0 = alpha_block_colors[0].g;
+				const uint32_t g1 = alpha_block_colors[3].g;
+
+				blk.m_endpoints[6] = g_astc_single_color_encoding_0[g0];
+				blk.m_endpoints[7] = g_astc_single_color_encoding_0[g1];
+
+				for (uint32_t y = 0; y < 4; y++)
+				{
+					for (uint32_t x = 0; x < 4; x++)
+					{
+						uint32_t s = alpha_selectors.get_selector(x, y);
+						uint32_t as = (s == alpha_high_selector) ? 3 : 0;
+
+						blk.m_weights[(x + y * 4) * 2 + 1] = static_cast<uint8_t>(as);
+					} // x
+				} // y
+			}
+			else
+			{
+				// Convert ETC1S alpha
+				const uint32_t alpha_selector_range_table = g_etc1_to_astc_selector_range_index[alpha_low_selector][alpha_high_selector];
+								
+				//[32][8][RANGES][MAPPING]
+				const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc[(alpha_inten_table * 32 + alpha_base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + alpha_selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
+
+				const uint32_t best_mapping = g_etc1_to_astc_best_grayscale_mapping[alpha_base_color.g][alpha_inten_table][alpha_selector_range_table];
+
+				blk.m_endpoints[6] = pTable_g[best_mapping].m_lo;
+				blk.m_endpoints[7] = pTable_g[best_mapping].m_hi;
+
+				const uint8_t* pSelectors_xlat = &g_etc1_to_astc_selector_mappings[best_mapping][0];
+
+				for (uint32_t y = 0; y < 4; y++)
+				{
+					for (uint32_t x = 0; x < 4; x++)
+					{
+						uint32_t s = alpha_selectors.get_selector(x, y);
+						uint32_t as = pSelectors_xlat[s];
+
+						blk.m_weights[(x + y * 4) * 2 + 1] = static_cast<uint8_t>(as);
+					} // x
+				} // y
+			}
+		}
+		else
+		{
+			// No alpha slice - set output alpha to all 255's
+			// 1 is 255 when dequantized
+			blk.m_endpoints[6] = 1;
+			blk.m_endpoints[7] = 1;
+
+			for (uint32_t i = 0; i < 16; i++)
+				blk.m_weights[i * 2 + 1] = 0;
+		}
+
+		if (low_selector == high_selector)
+		{
+			// Solid color block - use precomputed tables of optimal endpoints assuming selector weights are all 1.
+			color32 block_colors[4];
+
+			decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table);
+
+			const uint32_t r = block_colors[low_selector].r;
+			const uint32_t g = block_colors[low_selector].g;
+			const uint32_t b = block_colors[low_selector].b;
+						
+			blk.m_endpoints[0] = g_astc_single_color_encoding_1[r].m_lo;
+			blk.m_endpoints[1] = g_astc_single_color_encoding_1[r].m_hi;
+
+			blk.m_endpoints[2] = g_astc_single_color_encoding_1[g].m_lo;
+			blk.m_endpoints[3] = g_astc_single_color_encoding_1[g].m_hi;
+
+			blk.m_endpoints[4] = g_astc_single_color_encoding_1[b].m_lo;
+			blk.m_endpoints[5] = g_astc_single_color_encoding_1[b].m_hi;
+
+			int s0 = g_ise_to_unquant[blk.m_endpoints[0]] + g_ise_to_unquant[blk.m_endpoints[2]] + g_ise_to_unquant[blk.m_endpoints[4]];
+			int s1 = g_ise_to_unquant[blk.m_endpoints[1]] + g_ise_to_unquant[blk.m_endpoints[3]] + g_ise_to_unquant[blk.m_endpoints[5]];
+			bool invert = false;
+
+			if (s1 < s0)
+			{
+				std::swap(blk.m_endpoints[0], blk.m_endpoints[1]);
+				std::swap(blk.m_endpoints[2], blk.m_endpoints[3]);
+				std::swap(blk.m_endpoints[4], blk.m_endpoints[5]);
+				invert = true;
+			}
+
+			for (uint32_t i = 0; i < 16; i++)
+				blk.m_weights[i * 2] = invert ? 2 : 1;
+		}
+		else if ((inten_table >= 7) && (pSelector->m_num_unique_selectors == 2) && (pSelector->m_lo_selector == 0) && (pSelector->m_hi_selector == 3))
+		{
+			// Handle outlier case where only the two outer colors are used with inten table 7.
+			color32 block_colors[4];
+
+			decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table);
+
+			const uint32_t r0 = block_colors[0].r;
+			const uint32_t g0 = block_colors[0].g;
+			const uint32_t b0 = block_colors[0].b;
+
+			const uint32_t r1 = block_colors[3].r;
+			const uint32_t g1 = block_colors[3].g;
+			const uint32_t b1 = block_colors[3].b;
+
+			blk.m_endpoints[0] = g_astc_single_color_encoding_0[r0];
+			blk.m_endpoints[1] = g_astc_single_color_encoding_0[r1];
+
+			blk.m_endpoints[2] = g_astc_single_color_encoding_0[g0];
+			blk.m_endpoints[3] = g_astc_single_color_encoding_0[g1];
+
+			blk.m_endpoints[4] = g_astc_single_color_encoding_0[b0];
+			blk.m_endpoints[5] = g_astc_single_color_encoding_0[b1];
+
+			int s0 = g_ise_to_unquant[blk.m_endpoints[0]] + g_ise_to_unquant[blk.m_endpoints[2]] + g_ise_to_unquant[blk.m_endpoints[4]];
+			int s1 = g_ise_to_unquant[blk.m_endpoints[1]] + g_ise_to_unquant[blk.m_endpoints[3]] + g_ise_to_unquant[blk.m_endpoints[5]];
+			bool invert = false;
+
+			if (s1 < s0)
+			{
+				std::swap(blk.m_endpoints[0], blk.m_endpoints[1]);
+				std::swap(blk.m_endpoints[2], blk.m_endpoints[3]);
+				std::swap(blk.m_endpoints[4], blk.m_endpoints[5]);
+				invert = true;
+			}
+
+			for (uint32_t y = 0; y < 4; y++)
+			{
+				for (uint32_t x = 0; x < 4; x++)
+				{
+					uint32_t s = pSelector->get_selector(x, y);
+					uint32_t as = (s == low_selector) ? 0 : 3;
+
+					if (invert)
+						as = 3 - as;
+
+					blk.m_weights[(x + y * 4) * 2] = static_cast<uint8_t>(as);
+				} // x
+			} // y
+		}
+		else
+		{
+			// Convert ETC1S color
+			const uint32_t selector_range_table = g_etc1_to_astc_selector_range_index[low_selector][high_selector];
+
+			//[32][8][RANGES][MAPPING]
+			const etc1_to_astc_solution* pTable_r = &g_etc1_to_astc[(inten_table * 32 + base_color.r) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
+			const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc[(inten_table * 32 + base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
+			const etc1_to_astc_solution* pTable_b = &g_etc1_to_astc[(inten_table * 32 + base_color.b) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
+
+			uint32_t best_err = UINT_MAX;
+			uint32_t best_mapping = 0;
+
+			assert(NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS == 10);
+#define DO_ITER(m) { uint32_t total_err = pTable_r[m].m_err + pTable_g[m].m_err + pTable_b[m].m_err; if (total_err < best_err) { best_err = total_err; best_mapping = m; } }
+			DO_ITER(0); DO_ITER(1); DO_ITER(2); DO_ITER(3); DO_ITER(4);
+			DO_ITER(5); DO_ITER(6); DO_ITER(7); DO_ITER(8); DO_ITER(9);
+#undef DO_ITER
+
+			blk.m_endpoints[0] = pTable_r[best_mapping].m_lo;
+			blk.m_endpoints[1] = pTable_r[best_mapping].m_hi;
+
+			blk.m_endpoints[2] = pTable_g[best_mapping].m_lo;
+			blk.m_endpoints[3] = pTable_g[best_mapping].m_hi;
+
+			blk.m_endpoints[4] = pTable_b[best_mapping].m_lo;
+			blk.m_endpoints[5] = pTable_b[best_mapping].m_hi;
+						
+			int s0 = g_ise_to_unquant[blk.m_endpoints[0]] + g_ise_to_unquant[blk.m_endpoints[2]] + g_ise_to_unquant[blk.m_endpoints[4]];
+			int s1 = g_ise_to_unquant[blk.m_endpoints[1]] + g_ise_to_unquant[blk.m_endpoints[3]] + g_ise_to_unquant[blk.m_endpoints[5]];
+			bool invert = false;
+
+			if (s1 < s0)
+			{
+				std::swap(blk.m_endpoints[0], blk.m_endpoints[1]);
+				std::swap(blk.m_endpoints[2], blk.m_endpoints[3]);
+				std::swap(blk.m_endpoints[4], blk.m_endpoints[5]);
+				invert = true;
+			}
+
+			const uint8_t* pSelectors_xlat = &g_etc1_to_astc_selector_mappings[best_mapping][0];
+
+			for (uint32_t y = 0; y < 4; y++)
+			{
+				for (uint32_t x = 0; x < 4; x++)
+				{
+					uint32_t s = pSelector->get_selector(x, y);
+					uint32_t as = pSelectors_xlat[s];
+					if (invert)
+						as = 3 - as;
+
+					blk.m_weights[(x + y * 4) * 2] = static_cast<uint8_t>(as);
+				} // x
+			} // y
+		}
+
+		// Now pack to ASTC
+		astc_pack_block_cem_12_weight_range2(reinterpret_cast<uint32_t *>(pDst_block), &blk);
+	}
+#endif
+
+#if BASISD_SUPPORT_ATC
+	// ATC and PVRTC2 both use these tables.
+	struct etc1s_to_atc_solution
+	{
+		uint8_t m_lo;
+		uint8_t m_hi;
+		uint16_t m_err;
+	};
+
+	static dxt_selector_range g_etc1s_to_atc_selector_ranges[] =
+	{
+		{ 0, 3 },
+		{ 1, 3 },
+		{ 0, 2 },
+		{ 1, 2 },
+		{ 2, 3 },
+		{ 0, 1 },
+	};
+
+	const uint32_t NUM_ETC1S_TO_ATC_SELECTOR_RANGES = sizeof(g_etc1s_to_atc_selector_ranges) / sizeof(g_etc1s_to_atc_selector_ranges[0]);
+
+	static uint32_t g_etc1s_to_atc_selector_range_index[4][4];
+
+	const uint32_t NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS = 10;
+	static const uint8_t g_etc1s_to_atc_selector_mappings[NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS][4] =
+	{
+		{ 0, 0, 1, 1 },
+		{ 0, 0, 1, 2 },
+		{ 0, 0, 1, 3 },
+		{ 0, 0, 2, 3 },
+		{ 0, 1, 1, 1 },
+		{ 0, 1, 2, 2 },
+		{ 0, 1, 2, 3 }, //6 - identity
+		{ 0, 2, 3, 3 },
+		{ 1, 2, 2, 2 },
+		{ 1, 2, 3, 3 },
+	};
+	const uint32_t ATC_IDENTITY_SELECTOR_MAPPING_INDEX = 6;
+
+#if BASISD_SUPPORT_PVRTC2
+	static const etc1s_to_atc_solution g_etc1s_to_pvrtc2_45[32 * 8 * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS * NUM_ETC1S_TO_ATC_SELECTOR_RANGES] = {
+#include "basisu_transcoder_tables_pvrtc2_45.inc"
+	};
+
+#if 0
+	static const etc1s_to_atc_solution g_etc1s_to_pvrtc2_alpha_33[32 * 8 * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS * NUM_ETC1S_TO_ATC_SELECTOR_RANGES] = {
+#include "basisu_transcoder_tables_pvrtc2_alpha_33.inc"
+	};
+#endif
+
+#endif
+
+	static const etc1s_to_atc_solution g_etc1s_to_atc_55[32 * 8 * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS * NUM_ETC1S_TO_ATC_SELECTOR_RANGES] = {
+#include "basisu_transcoder_tables_atc_55.inc"
+	};
+
+	static const etc1s_to_atc_solution g_etc1s_to_atc_56[32 * 8 * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS * NUM_ETC1S_TO_ATC_SELECTOR_RANGES] = {
+#include "basisu_transcoder_tables_atc_56.inc"
+	};
+
+	struct atc_match_entry
+	{
+		uint8_t m_lo;
+		uint8_t m_hi;
+	};
+	static atc_match_entry g_pvrtc2_match45_equals_1[256], g_atc_match55_equals_1[256], g_atc_match56_equals_1[256]; // selector 1
+	static atc_match_entry g_pvrtc2_match4[256], g_atc_match5[256], g_atc_match6[256];
+
+	static void prepare_atc_single_color_table(atc_match_entry* pTable, int size0, int size1, int sel)
+	{
+		for (int i = 0; i < 256; i++)
+		{
+			int lowest_e = 256;
+			for (int lo = 0; lo < size0; lo++)
+			{
+				int lo_e = lo;
+				if (size0 == 16)
+				{
+					lo_e = (lo_e << 1) | (lo_e >> 3);
+					lo_e = (lo_e << 3) | (lo_e >> 2);
+				}
+				else if (size0 == 32)
+					lo_e = (lo_e << 3) | (lo_e >> 2);
+				else
+					lo_e = (lo_e << 2) | (lo_e >> 4);
+
+				for (int hi = 0; hi < size1; hi++)
+				{
+					int hi_e = hi;
+					if (size1 == 16)
+					{
+						// This is only for PVRTC2 - expand to 5 then 8
+						hi_e = (hi_e << 1) | (hi_e >> 3);
+						hi_e = (hi_e << 3) | (hi_e >> 2);
+					}
+					else if (size1 == 32)
+						hi_e = (hi_e << 3) | (hi_e >> 2);
+					else
+						hi_e = (hi_e << 2) | (hi_e >> 4);
+
+					int e;
+
+					if (sel == 1)
+					{
+						// Selector 1
+						e = abs(((lo_e * 5 + hi_e * 3) / 8) - i);
+					}
+					else
+					{
+						assert(sel == 3);
+
+						// Selector 3
+						e = abs(hi_e - i);
+					}
+
+					if (e < lowest_e)
+					{
+						pTable[i].m_lo = static_cast<uint8_t>(lo);
+						pTable[i].m_hi = static_cast<uint8_t>(hi);
+
+						lowest_e = e;
+					}
+
+				} // hi
+			} // lo
+		} // i
+	}
+
+	static void transcoder_init_atc()
+	{
+		prepare_atc_single_color_table(g_pvrtc2_match45_equals_1, 16, 32, 1);
+		prepare_atc_single_color_table(g_atc_match55_equals_1, 32, 32, 1); 
+		prepare_atc_single_color_table(g_atc_match56_equals_1, 32, 64, 1); 
+
+		prepare_atc_single_color_table(g_pvrtc2_match4, 1, 16, 3);
+		prepare_atc_single_color_table(g_atc_match5, 1, 32, 3);
+		prepare_atc_single_color_table(g_atc_match6, 1, 64, 3);
+
+		for (uint32_t i = 0; i < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; i++)
+		{
+			uint32_t l = g_etc1s_to_atc_selector_ranges[i].m_low;
+			uint32_t h = g_etc1s_to_atc_selector_ranges[i].m_high;
+			g_etc1s_to_atc_selector_range_index[l][h] = i;
+		}
+	}
+
+	struct atc_block
+	{
+		uint8_t m_lo[2];
+		uint8_t m_hi[2];
+		uint8_t m_sels[4];
+
+		void set_low_color(uint32_t r, uint32_t g, uint32_t b)
+		{
+			assert((r < 32) && (g < 32) && (b < 32));
+			uint32_t x = (r << 10) | (g << 5) | b;
+			m_lo[0] = x & 0xFF;
+			m_lo[1] = (x >> 8) & 0xFF;
+		}
+
+		void set_high_color(uint32_t r, uint32_t g, uint32_t b)
+		{
+			assert((r < 32) && (g < 64) && (b < 32));
+			uint32_t x = (r << 11) | (g << 5) | b;
+			m_hi[0] = x & 0xFF;
+			m_hi[1] = (x >> 8) & 0xFF;
+		}
+	};
+
+	static void convert_etc1s_to_atc(void* pDst, const endpoint* pEndpoints, const selector* pSelector)
+	{
+		atc_block* pBlock = static_cast<atc_block*>(pDst);
+
+		const uint32_t low_selector = pSelector->m_lo_selector;
+		const uint32_t high_selector = pSelector->m_hi_selector;
+
+		const color32& base_color = pEndpoints->m_color5;
+		const uint32_t inten_table = pEndpoints->m_inten5;
+
+		if (low_selector == high_selector)
+		{
+			uint32_t r, g, b;
+			decoder_etc_block::get_block_color5(base_color, inten_table, low_selector, r, g, b);
+
+			pBlock->set_low_color(g_atc_match55_equals_1[r].m_lo, g_atc_match56_equals_1[g].m_lo, g_atc_match55_equals_1[b].m_lo);
+			pBlock->set_high_color(g_atc_match55_equals_1[r].m_hi, g_atc_match56_equals_1[g].m_hi, g_atc_match55_equals_1[b].m_hi);
+						
+			pBlock->m_sels[0] = 0x55;
+			pBlock->m_sels[1] = 0x55;
+			pBlock->m_sels[2] = 0x55;
+			pBlock->m_sels[3] = 0x55;
+
+			return;
+		}
+		else if ((inten_table >= 7) && (pSelector->m_num_unique_selectors == 2) && (pSelector->m_lo_selector == 0) && (pSelector->m_hi_selector == 3))
+		{
+			color32 block_colors[4];
+			decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table);
+
+			const uint32_t r0 = block_colors[0].r;
+			const uint32_t g0 = block_colors[0].g;
+			const uint32_t b0 = block_colors[0].b;
+
+			const uint32_t r1 = block_colors[3].r;
+			const uint32_t g1 = block_colors[3].g;
+			const uint32_t b1 = block_colors[3].b;
+
+			pBlock->set_low_color(g_atc_match5[r0].m_hi, g_atc_match5[g0].m_hi, g_atc_match5[b0].m_hi);
+			pBlock->set_high_color(g_atc_match5[r1].m_hi, g_atc_match6[g1].m_hi, g_atc_match5[b1].m_hi);
+
+			pBlock->m_sels[0] = pSelector->m_selectors[0];
+			pBlock->m_sels[1] = pSelector->m_selectors[1];
+			pBlock->m_sels[2] = pSelector->m_selectors[2];
+			pBlock->m_sels[3] = pSelector->m_selectors[3];
+
+			return;
+		}
+
+		const uint32_t selector_range_table = g_etc1s_to_atc_selector_range_index[low_selector][high_selector];
+
+		//[32][8][RANGES][MAPPING]
+		const etc1s_to_atc_solution* pTable_r = &g_etc1s_to_atc_55[(inten_table * 32 + base_color.r) * (NUM_ETC1S_TO_ATC_SELECTOR_RANGES * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS];
+		const etc1s_to_atc_solution* pTable_g = &g_etc1s_to_atc_56[(inten_table * 32 + base_color.g) * (NUM_ETC1S_TO_ATC_SELECTOR_RANGES * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS];
+		const etc1s_to_atc_solution* pTable_b = &g_etc1s_to_atc_55[(inten_table * 32 + base_color.b) * (NUM_ETC1S_TO_ATC_SELECTOR_RANGES * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS];
+
+		uint32_t best_err = UINT_MAX;
+		uint32_t best_mapping = 0;
+
+		assert(NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS == 10);
+#define DO_ITER(m) { uint32_t total_err = pTable_r[m].m_err + pTable_g[m].m_err + pTable_b[m].m_err; if (total_err < best_err) { best_err = total_err; best_mapping = m; } }
+		DO_ITER(0); DO_ITER(1); DO_ITER(2); DO_ITER(3); DO_ITER(4);
+		DO_ITER(5); DO_ITER(6); DO_ITER(7); DO_ITER(8); DO_ITER(9);
+#undef DO_ITER
+
+		pBlock->set_low_color(pTable_r[best_mapping].m_lo, pTable_g[best_mapping].m_lo, pTable_b[best_mapping].m_lo);
+		pBlock->set_high_color(pTable_r[best_mapping].m_hi, pTable_g[best_mapping].m_hi, pTable_b[best_mapping].m_hi);
+
+		if (ATC_IDENTITY_SELECTOR_MAPPING_INDEX == best_mapping)
+		{
+			pBlock->m_sels[0] = pSelector->m_selectors[0];
+			pBlock->m_sels[1] = pSelector->m_selectors[1];
+			pBlock->m_sels[2] = pSelector->m_selectors[2];
+			pBlock->m_sels[3] = pSelector->m_selectors[3];
+		}
+		else
+		{
+			const uint8_t* pSelectors_xlat = &g_etc1s_to_atc_selector_mappings[best_mapping][0];
+
+			const uint32_t sel_bits0 = pSelector->m_selectors[0];
+			const uint32_t sel_bits1 = pSelector->m_selectors[1];
+			const uint32_t sel_bits2 = pSelector->m_selectors[2];
+			const uint32_t sel_bits3 = pSelector->m_selectors[3];
+
+			uint32_t atc_sels0 = 0, atc_sels1 = 0, atc_sels2 = 0, atc_sels3 = 0;
+
+#define DO_X(x) { \
+			const uint32_t x_shift = (x) * 2; \
+			atc_sels0 |= (pSelectors_xlat[(sel_bits0 >> x_shift) & 3] << x_shift); \
+			atc_sels1 |= (pSelectors_xlat[(sel_bits1 >> x_shift) & 3] << x_shift); \
+			atc_sels2 |= (pSelectors_xlat[(sel_bits2 >> x_shift) & 3] << x_shift); \
+			atc_sels3 |= (pSelectors_xlat[(sel_bits3 >> x_shift) & 3] << x_shift); }
+
+			DO_X(0);
+			DO_X(1);
+			DO_X(2);
+			DO_X(3);
+#undef DO_X
+
+			pBlock->m_sels[0] = (uint8_t)atc_sels0;
+			pBlock->m_sels[1] = (uint8_t)atc_sels1;
+			pBlock->m_sels[2] = (uint8_t)atc_sels2;
+			pBlock->m_sels[3] = (uint8_t)atc_sels3;
+		}
+	}
+
+#if BASISD_WRITE_NEW_ATC_TABLES
+	static void create_etc1s_to_atc_conversion_tables()
+	{
+		// ATC 55
+		FILE* pFile = nullptr;
+		fopen_s(&pFile, "basisu_transcoder_tables_atc_55.inc", "w");
+
+		uint32_t n = 0;
+
+		for (int inten = 0; inten < 8; inten++)
+		{
+			for (uint32_t g = 0; g < 32; g++)
+			{
+				color32 block_colors[4];
+				decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
+
+				for (uint32_t sr = 0; sr < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; sr++)
+				{
+					const uint32_t low_selector = g_etc1s_to_atc_selector_ranges[sr].m_low;
+					const uint32_t high_selector = g_etc1s_to_atc_selector_ranges[sr].m_high;
+
+					for (uint32_t m = 0; m < NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS; m++)
+					{
+						uint32_t best_lo = 0;
+						uint32_t best_hi = 0;
+						uint64_t best_err = UINT64_MAX;
+
+						for (uint32_t hi = 0; hi <= 31; hi++)
+						{
+							for (uint32_t lo = 0; lo <= 31; lo++)
+							{
+								uint32_t colors[4];
+
+								colors[0] = (lo << 3) | (lo >> 2);
+								colors[3] = (hi << 3) | (hi >> 2);
+
+								colors[1] = (colors[0] * 5 + colors[3] * 3) / 8;
+								colors[2] = (colors[3] * 5 + colors[0] * 3) / 8;
+
+								uint64_t total_err = 0;
+
+								for (uint32_t s = low_selector; s <= high_selector; s++)
+								{
+									int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]];
+
+									int err_scale = 1;
+									// Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor 
+									// the low/high selectors which are clamping to either 0 or 255.
+									if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
+										err_scale = 5;
+
+									total_err += (err * err) * err_scale;
+								}
+
+								if (total_err < best_err)
+								{
+									best_err = total_err;
+									best_lo = lo;
+									best_hi = hi;
+								}
+							}
+						}
+
+						//assert(best_err <= 0xFFFF);
+						best_err = basisu::minimum<uint32_t>(best_err, 0xFFFF);
+
+						fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
+						n++;
+						if ((n & 31) == 31)
+							fprintf(pFile, "\n");
+					} // m
+				} // sr
+			} // g
+		} // inten
+
+		fclose(pFile);
+		pFile = nullptr;
+
+		// ATC 56
+		fopen_s(&pFile, "basisu_transcoder_tables_atc_56.inc", "w");
+
+		n = 0;
+
+		for (int inten = 0; inten < 8; inten++)
+		{
+			for (uint32_t g = 0; g < 32; g++)
+			{
+				color32 block_colors[4];
+				decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
+
+				for (uint32_t sr = 0; sr < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; sr++)
+				{
+					const uint32_t low_selector = g_etc1s_to_atc_selector_ranges[sr].m_low;
+					const uint32_t high_selector = g_etc1s_to_atc_selector_ranges[sr].m_high;
+
+					for (uint32_t m = 0; m < NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS; m++)
+					{
+						uint32_t best_lo = 0;
+						uint32_t best_hi = 0;
+						uint64_t best_err = UINT64_MAX;
+
+						for (uint32_t hi = 0; hi <= 63; hi++)
+						{
+							for (uint32_t lo = 0; lo <= 31; lo++)
+							{
+								uint32_t colors[4];
+
+								colors[0] = (lo << 3) | (lo >> 2);
+								colors[3] = (hi << 2) | (hi >> 4);
+
+								colors[1] = (colors[0] * 5 + colors[3] * 3) / 8;
+								colors[2] = (colors[3] * 5 + colors[0] * 3) / 8;
+
+								uint64_t total_err = 0;
+
+								for (uint32_t s = low_selector; s <= high_selector; s++)
+								{
+									int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]];
+
+									int err_scale = 1;
+									// Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor 
+									// the low/high selectors which are clamping to either 0 or 255.
+									if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
+										err_scale = 5;
+
+									total_err += (err * err) * err_scale;
+								}
+
+								if (total_err < best_err)
+								{
+									best_err = total_err;
+									best_lo = lo;
+									best_hi = hi;
+								}
+							}
+						}
+
+						//assert(best_err <= 0xFFFF);
+						best_err = basisu::minimum<uint32_t>(best_err, 0xFFFF);
+
+						fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
+						n++;
+						if ((n & 31) == 31)
+							fprintf(pFile, "\n");
+					} // m
+				} // sr
+			} // g
+		} // inten
+
+		fclose(pFile);
+		
+		// PVRTC2 45
+		fopen_s(&pFile, "basisu_transcoder_tables_pvrtc2_45.inc", "w");
+
+		n = 0;
+
+		for (int inten = 0; inten < 8; inten++)
+		{
+			for (uint32_t g = 0; g < 32; g++)
+			{
+				color32 block_colors[4];
+				decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
+
+				for (uint32_t sr = 0; sr < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; sr++)
+				{
+					const uint32_t low_selector = g_etc1s_to_atc_selector_ranges[sr].m_low;
+					const uint32_t high_selector = g_etc1s_to_atc_selector_ranges[sr].m_high;
+
+					for (uint32_t m = 0; m < NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS; m++)
+					{
+						uint32_t best_lo = 0;
+						uint32_t best_hi = 0;
+						uint64_t best_err = UINT64_MAX;
+
+						for (uint32_t hi = 0; hi <= 31; hi++)
+						{
+							for (uint32_t lo = 0; lo <= 15; lo++)
+							{
+								uint32_t colors[4];
+
+								colors[0] = (lo << 1) | (lo >> 3);
+								colors[0] = (colors[0] << 3) | (colors[0] >> 2);
+
+								colors[3] = (hi << 3) | (hi >> 2);
+
+								colors[1] = (colors[0] * 5 + colors[3] * 3) / 8;
+								colors[2] = (colors[3] * 5 + colors[0] * 3) / 8;
+
+								uint64_t total_err = 0;
+
+								for (uint32_t s = low_selector; s <= high_selector; s++)
+								{
+									int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]];
+
+									int err_scale = 1;
+									// Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor 
+									// the low/high selectors which are clamping to either 0 or 255.
+									if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
+										err_scale = 5;
+
+									total_err += (err * err) * err_scale;
+								}
+
+								if (total_err < best_err)
+								{
+									best_err = total_err;
+									best_lo = lo;
+									best_hi = hi;
+								}
+							}
+						}
+
+						//assert(best_err <= 0xFFFF);
+						best_err = basisu::minimum<uint32_t>(best_err, 0xFFFF);
+
+						fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
+						n++;
+						if ((n & 31) == 31)
+							fprintf(pFile, "\n");
+					} // m
+				} // sr
+			} // g
+		} // inten
+
+		fclose(pFile);
+
+#if 0
+		// PVRTC2 34
+		fopen_s(&pFile, "basisu_transcoder_tables_pvrtc2_34.inc", "w");
+
+		n = 0;
+
+		for (int inten = 0; inten < 8; inten++)
+		{
+			for (uint32_t g = 0; g < 32; g++)
+			{
+				color32 block_colors[4];
+				decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
+
+				for (uint32_t sr = 0; sr < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; sr++)
+				{
+					const uint32_t low_selector = g_etc1s_to_atc_selector_ranges[sr].m_low;
+					const uint32_t high_selector = g_etc1s_to_atc_selector_ranges[sr].m_high;
+
+					for (uint32_t m = 0; m < NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS; m++)
+					{
+						uint32_t best_lo = 0;
+						uint32_t best_hi = 0;
+						uint64_t best_err = UINT64_MAX;
+
+						for (uint32_t hi = 0; hi <= 15; hi++)
+						{
+							for (uint32_t lo = 0; lo <= 7; lo++)
+							{
+								uint32_t colors[4];
+
+								colors[0] = (lo << 2) | (lo >> 1);
+								colors[0] = (colors[0] << 3) | (colors[0] >> 2);
+
+								colors[3] = (hi << 1) | (hi >> 3);
+								colors[3] = (colors[3] << 3) | (colors[3] >> 2);
+
+								colors[1] = (colors[0] * 5 + colors[3] * 3) / 8;
+								colors[2] = (colors[3] * 5 + colors[0] * 3) / 8;
+
+								uint64_t total_err = 0;
+
+								for (uint32_t s = low_selector; s <= high_selector; s++)
+								{
+									int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]];
+
+									int err_scale = 1;
+									// Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor 
+									// the low/high selectors which are clamping to either 0 or 255.
+									if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
+										err_scale = 5;
+
+									total_err += (err * err) * err_scale;
+								}
+
+								if (total_err < best_err)
+								{
+									best_err = total_err;
+									best_lo = lo;
+									best_hi = hi;
+								}
+							}
+						}
+
+						//assert(best_err <= 0xFFFF);
+						best_err = basisu::minimum<uint32_t>(best_err, 0xFFFF);
+
+						fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
+						n++;
+						if ((n & 31) == 31)
+							fprintf(pFile, "\n");
+					} // m
+				} // sr
+			} // g
+		} // inten
+
+		fclose(pFile);
+#endif
+#if 0
+		// PVRTC2 44
+		fopen_s(&pFile, "basisu_transcoder_tables_pvrtc2_44.inc", "w");
+
+		n = 0;
+
+		for (int inten = 0; inten < 8; inten++)
+		{
+			for (uint32_t g = 0; g < 32; g++)
+			{
+				color32 block_colors[4];
+				decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
+
+				for (uint32_t sr = 0; sr < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; sr++)
+				{
+					const uint32_t low_selector = g_etc1s_to_atc_selector_ranges[sr].m_low;
+					const uint32_t high_selector = g_etc1s_to_atc_selector_ranges[sr].m_high;
+
+					for (uint32_t m = 0; m < NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS; m++)
+					{
+						uint32_t best_lo = 0;
+						uint32_t best_hi = 0;
+						uint64_t best_err = UINT64_MAX;
+
+						for (uint32_t hi = 0; hi <= 15; hi++)
+						{
+							for (uint32_t lo = 0; lo <= 15; lo++)
+							{
+								uint32_t colors[4];
+
+								colors[0] = (lo << 1) | (lo >> 3);
+								colors[0] = (colors[0] << 3) | (colors[0] >> 2);
+
+								colors[3] = (hi << 1) | (hi >> 3);
+								colors[3] = (colors[3] << 3) | (colors[3] >> 2);
+
+								colors[1] = (colors[0] * 5 + colors[3] * 3) / 8;
+								colors[2] = (colors[3] * 5 + colors[0] * 3) / 8;
+
+								uint64_t total_err = 0;
+
+								for (uint32_t s = low_selector; s <= high_selector; s++)
+								{
+									int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]];
+
+									int err_scale = 1;
+									// Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor 
+									// the low/high selectors which are clamping to either 0 or 255.
+									if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
+										err_scale = 5;
+
+									total_err += (err * err) * err_scale;
+								}
+
+								if (total_err < best_err)
+								{
+									best_err = total_err;
+									best_lo = lo;
+									best_hi = hi;
+								}
+							}
+						}
+
+						//assert(best_err <= 0xFFFF);
+						best_err = basisu::minimum<uint32_t>(best_err, 0xFFFF);
+
+						fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
+						n++;
+						if ((n & 31) == 31)
+							fprintf(pFile, "\n");
+					} // m
+				} // sr
+			} // g
+		} // inten
+
+		fclose(pFile);
+#endif
+
+		// PVRTC2 alpha 33
+		fopen_s(&pFile, "basisu_transcoder_tables_pvrtc2_alpha_33.inc", "w");
+
+		n = 0;
+
+		for (int inten = 0; inten < 8; inten++)
+		{
+			for (uint32_t g = 0; g < 32; g++)
+			{
+				color32 block_colors[4];
+				decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
+
+				for (uint32_t sr = 0; sr < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; sr++)
+				{
+					const uint32_t low_selector = g_etc1s_to_atc_selector_ranges[sr].m_low;
+					const uint32_t high_selector = g_etc1s_to_atc_selector_ranges[sr].m_high;
+
+					for (uint32_t m = 0; m < NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS; m++)
+					{
+						uint32_t best_lo = 0;
+						uint32_t best_hi = 0;
+						uint64_t best_err = UINT64_MAX;
+
+						for (uint32_t hi = 0; hi <= 7; hi++)
+						{
+							for (uint32_t lo = 0; lo <= 7; lo++)
+							{
+								uint32_t colors[4];
+
+								colors[0] = (lo << 1);
+								colors[0] = (colors[0] << 4) | colors[0];
+
+								colors[3] = (hi << 1) | 1;
+								colors[3] = (colors[3] << 4) | colors[3];
+
+								colors[1] = (colors[0] * 5 + colors[3] * 3) / 8;
+								colors[2] = (colors[3] * 5 + colors[0] * 3) / 8;
+
+								uint64_t total_err = 0;
+
+								for (uint32_t s = low_selector; s <= high_selector; s++)
+								{
+									int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]];
+
+									int err_scale = 1;
+									// Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor 
+									// the low/high selectors which are clamping to either 0 or 255.
+									if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
+										err_scale = 5;
+
+									total_err += (err * err) * err_scale;
+								}
+
+								if (total_err < best_err)
+								{
+									best_err = total_err;
+									best_lo = lo;
+									best_hi = hi;
+								}
+							}
+						}
+
+						//assert(best_err <= 0xFFFF);
+						best_err = basisu::minimum<uint32_t>(best_err, 0xFFFF);
+
+						fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
+						n++;
+						if ((n & 31) == 31)
+							fprintf(pFile, "\n");
+					} // m
+				} // sr
+			} // g
+		} // inten
+
+		fclose(pFile);
+	}
+#endif // BASISD_WRITE_NEW_ATC_TABLES
+
+#endif // BASISD_SUPPORT_ATC
+
+#if BASISD_SUPPORT_PVRTC2
+	struct pvrtc2_block
+	{
+		uint8_t m_modulation[4];
+
+		union
+		{
+			union
+			{
+				// Opaque mode: RGB colora=554 and colorb=555
+				struct
+				{
+					uint32_t m_mod_flag : 1;
+					uint32_t m_blue_a : 4;
+					uint32_t m_green_a : 5;
+					uint32_t m_red_a : 5;
+					uint32_t m_hard_flag : 1;
+					uint32_t m_blue_b : 5;
+					uint32_t m_green_b : 5;
+					uint32_t m_red_b : 5;
+					uint32_t m_opaque_flag : 1;
+
+				} m_opaque_color_data;
+
+				// Transparent mode: RGBA colora=4433 and colorb=4443
+				struct
+				{
+					uint32_t m_mod_flag : 1;
+					uint32_t m_blue_a : 3;
+					uint32_t m_green_a : 4;
+					uint32_t m_red_a : 4;
+					uint32_t m_alpha_a : 3;
+					uint32_t m_hard_flag : 1;
+					uint32_t m_blue_b : 4;
+					uint32_t m_green_b : 4;
+					uint32_t m_red_b : 4;
+					uint32_t m_alpha_b : 3;
+					uint32_t m_opaque_flag : 1;
+
+				} m_trans_color_data;
+			};
+
+			uint32_t m_color_data_bits;
+		};
+
+		// 554
+		void set_low_color(uint32_t r, uint32_t g, uint32_t b)
+		{
+			assert((r < 32) && (g < 32) && (b < 16));
+			m_opaque_color_data.m_red_a = r;
+			m_opaque_color_data.m_green_a = g;
+			m_opaque_color_data.m_blue_a = b;
+		}
+
+		// 555
+		void set_high_color(uint32_t r, uint32_t g, uint32_t b)
+		{
+			assert((r < 32) && (g < 32) && (b < 32));
+			m_opaque_color_data.m_red_b = r;
+			m_opaque_color_data.m_green_b = g;
+			m_opaque_color_data.m_blue_b = b;
+		}
+
+		// 4433
+		void set_trans_low_color(uint32_t r, uint32_t g, uint32_t b, uint32_t a)
+		{
+			assert((r < 16) && (g < 16) && (b < 8) && (a < 8));
+			m_trans_color_data.m_red_a = r;
+			m_trans_color_data.m_green_a = g;
+			m_trans_color_data.m_blue_a = b;
+			m_trans_color_data.m_alpha_a = a;
+		}
+
+		// 4443
+		void set_trans_high_color(uint32_t r, uint32_t g, uint32_t b, uint32_t a)
+		{
+			assert((r < 16) && (g < 16) && (b < 16) && (a < 8));
+			m_trans_color_data.m_red_b = r;
+			m_trans_color_data.m_green_b = g;
+			m_trans_color_data.m_blue_b = b;
+			m_trans_color_data.m_alpha_b = a;
+		}
+	};
+
+	static struct
+	{
+		uint8_t m_l, m_h;
+	} g_pvrtc2_trans_match34[256];
+
+	static struct
+	{
+		uint8_t m_l, m_h;
+	} g_pvrtc2_trans_match44[256];
+		
+	static struct
+	{
+		uint8_t m_l, m_h;
+	} g_pvrtc2_alpha_match33[256];
+	
+	static struct
+	{
+		uint8_t m_l, m_h;
+	} g_pvrtc2_alpha_match33_0[256];
+
+	static struct
+	{
+		uint8_t m_l, m_h;
+	} g_pvrtc2_alpha_match33_3[256];
+		
+	// PVRTC2 can be forced to look like a slightly weaker variant of ATC/BC1, so that's what we do here for simplicity.
+	static void convert_etc1s_to_pvrtc2_rgb(void* pDst, const endpoint* pEndpoints, const selector* pSelector)
+	{
+		pvrtc2_block* pBlock = static_cast<pvrtc2_block*>(pDst);
+
+		pBlock->m_opaque_color_data.m_hard_flag = 1;
+		pBlock->m_opaque_color_data.m_mod_flag = 0;
+		pBlock->m_opaque_color_data.m_opaque_flag = 1;
+
+		const uint32_t low_selector = pSelector->m_lo_selector;
+		const uint32_t high_selector = pSelector->m_hi_selector;
+
+		const color32& base_color = pEndpoints->m_color5;
+		const uint32_t inten_table = pEndpoints->m_inten5;
+
+		if (low_selector == high_selector)
+		{
+			uint32_t r, g, b;
+			decoder_etc_block::get_block_color5(base_color, inten_table, low_selector, r, g, b);
+
+			pBlock->set_low_color(g_atc_match55_equals_1[r].m_lo, g_atc_match55_equals_1[g].m_lo, g_pvrtc2_match45_equals_1[b].m_lo);
+			pBlock->set_high_color(g_atc_match55_equals_1[r].m_hi, g_atc_match55_equals_1[g].m_hi, g_pvrtc2_match45_equals_1[b].m_hi);
+
+			pBlock->m_modulation[0] = 0x55;
+			pBlock->m_modulation[1] = 0x55;
+			pBlock->m_modulation[2] = 0x55;
+			pBlock->m_modulation[3] = 0x55;
+
+			return;
+		}
+		else if ((inten_table >= 7) && (pSelector->m_num_unique_selectors == 2) && (pSelector->m_lo_selector == 0) && (pSelector->m_hi_selector == 3))
+		{
+			color32 block_colors[4];
+			decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table);
+
+			const uint32_t r0 = block_colors[0].r;
+			const uint32_t g0 = block_colors[0].g;
+			const uint32_t b0 = block_colors[0].b;
+
+			const uint32_t r1 = block_colors[3].r;
+			const uint32_t g1 = block_colors[3].g;
+			const uint32_t b1 = block_colors[3].b;
+
+			pBlock->set_low_color(g_atc_match5[r0].m_hi, g_atc_match5[g0].m_hi, g_pvrtc2_match4[b0].m_hi);
+			pBlock->set_high_color(g_atc_match5[r1].m_hi, g_atc_match5[g1].m_hi, g_atc_match5[b1].m_hi);
+
+			pBlock->m_modulation[0] = pSelector->m_selectors[0];
+			pBlock->m_modulation[1] = pSelector->m_selectors[1];
+			pBlock->m_modulation[2] = pSelector->m_selectors[2];
+			pBlock->m_modulation[3] = pSelector->m_selectors[3];
+
+			return;
+		}
+
+		const uint32_t selector_range_table = g_etc1s_to_atc_selector_range_index[low_selector][high_selector];
+
+		//[32][8][RANGES][MAPPING]
+		const etc1s_to_atc_solution* pTable_r = &g_etc1s_to_atc_55[(inten_table * 32 + base_color.r) * (NUM_ETC1S_TO_ATC_SELECTOR_RANGES * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS];
+		const etc1s_to_atc_solution* pTable_g = &g_etc1s_to_atc_55[(inten_table * 32 + base_color.g) * (NUM_ETC1S_TO_ATC_SELECTOR_RANGES * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS];
+		const etc1s_to_atc_solution* pTable_b = &g_etc1s_to_pvrtc2_45[(inten_table * 32 + base_color.b) * (NUM_ETC1S_TO_ATC_SELECTOR_RANGES * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS];
+
+		uint32_t best_err = UINT_MAX;
+		uint32_t best_mapping = 0;
+
+		assert(NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS == 10);
+#define DO_ITER(m) { uint32_t total_err = pTable_r[m].m_err + pTable_g[m].m_err + pTable_b[m].m_err; if (total_err < best_err) { best_err = total_err; best_mapping = m; } }
+		DO_ITER(0); DO_ITER(1); DO_ITER(2); DO_ITER(3); DO_ITER(4);
+		DO_ITER(5); DO_ITER(6); DO_ITER(7); DO_ITER(8); DO_ITER(9);
+#undef DO_ITER
+
+		pBlock->set_low_color(pTable_r[best_mapping].m_lo, pTable_g[best_mapping].m_lo, pTable_b[best_mapping].m_lo);
+		pBlock->set_high_color(pTable_r[best_mapping].m_hi, pTable_g[best_mapping].m_hi, pTable_b[best_mapping].m_hi);
+
+		if (ATC_IDENTITY_SELECTOR_MAPPING_INDEX == best_mapping)
+		{
+			pBlock->m_modulation[0] = pSelector->m_selectors[0];
+			pBlock->m_modulation[1] = pSelector->m_selectors[1];
+			pBlock->m_modulation[2] = pSelector->m_selectors[2];
+			pBlock->m_modulation[3] = pSelector->m_selectors[3];
+		}
+		else
+		{
+			// TODO: We could make this faster using several precomputed 256 entry tables, like ETC1S->BC1 does.
+			const uint8_t* pSelectors_xlat = &g_etc1s_to_atc_selector_mappings[best_mapping][0];
+
+			const uint32_t sel_bits0 = pSelector->m_selectors[0];
+			const uint32_t sel_bits1 = pSelector->m_selectors[1];
+			const uint32_t sel_bits2 = pSelector->m_selectors[2];
+			const uint32_t sel_bits3 = pSelector->m_selectors[3];
+
+			uint32_t sels0 = 0, sels1 = 0, sels2 = 0, sels3 = 0;
+
+#define DO_X(x) { \
+			const uint32_t x_shift = (x) * 2; \
+			sels0 |= (pSelectors_xlat[(sel_bits0 >> x_shift) & 3] << x_shift); \
+			sels1 |= (pSelectors_xlat[(sel_bits1 >> x_shift) & 3] << x_shift); \
+			sels2 |= (pSelectors_xlat[(sel_bits2 >> x_shift) & 3] << x_shift); \
+			sels3 |= (pSelectors_xlat[(sel_bits3 >> x_shift) & 3] << x_shift); }
+
+			DO_X(0);
+			DO_X(1);
+			DO_X(2);
+			DO_X(3);
+#undef DO_X
+
+			pBlock->m_modulation[0] = (uint8_t)sels0;
+			pBlock->m_modulation[1] = (uint8_t)sels1;
+			pBlock->m_modulation[2] = (uint8_t)sels2;
+			pBlock->m_modulation[3] = (uint8_t)sels3;
+		}
+	}
+
+	typedef struct { float c[4]; } vec4F;
+		
+	static inline vec4F* vec4F_set_scalar(vec4F* pV, float x) { pV->c[0] = x; pV->c[1] = x; pV->c[2] = x;	pV->c[3] = x;	return pV; }
+	static inline vec4F* vec4F_set(vec4F* pV, float x, float y, float z, float w) { pV->c[0] = x;	pV->c[1] = y;	pV->c[2] = z;	pV->c[3] = w;	return pV; }
+	static inline vec4F* vec4F_saturate_in_place(vec4F* pV) { pV->c[0] = saturate(pV->c[0]); pV->c[1] = saturate(pV->c[1]); pV->c[2] = saturate(pV->c[2]); pV->c[3] = saturate(pV->c[3]); return pV; }
+	static inline vec4F vec4F_saturate(const vec4F* pV) { vec4F res; res.c[0] = saturate(pV->c[0]); res.c[1] = saturate(pV->c[1]); res.c[2] = saturate(pV->c[2]); res.c[3] = saturate(pV->c[3]); return res; }
+	static inline vec4F vec4F_from_color(const color32* pC) { vec4F res; vec4F_set(&res, pC->c[0], pC->c[1], pC->c[2], pC->c[3]); return res; }
+	static inline vec4F vec4F_add(const vec4F* pLHS, const vec4F* pRHS) { vec4F res; vec4F_set(&res, pLHS->c[0] + pRHS->c[0], pLHS->c[1] + pRHS->c[1], pLHS->c[2] + pRHS->c[2], pLHS->c[3] + pRHS->c[3]); return res; }
+	static inline vec4F vec4F_sub(const vec4F* pLHS, const vec4F* pRHS) { vec4F res; vec4F_set(&res, pLHS->c[0] - pRHS->c[0], pLHS->c[1] - pRHS->c[1], pLHS->c[2] - pRHS->c[2], pLHS->c[3] - pRHS->c[3]); return res; }
+	static inline float vec4F_dot(const vec4F* pLHS, const vec4F* pRHS) { return pLHS->c[0] * pRHS->c[0] + pLHS->c[1] * pRHS->c[1] + pLHS->c[2] * pRHS->c[2] + pLHS->c[3] * pRHS->c[3]; }
+	static inline vec4F vec4F_mul(const vec4F* pLHS, float s) { vec4F res; vec4F_set(&res, pLHS->c[0] * s, pLHS->c[1] * s, pLHS->c[2] * s, pLHS->c[3] * s); return res; }
+	static inline vec4F* vec4F_normalize_in_place(vec4F* pV) { float s = pV->c[0] * pV->c[0] + pV->c[1] * pV->c[1] + pV->c[2] * pV->c[2] + pV->c[3] * pV->c[3]; if (s != 0.0f) { s = 1.0f / sqrtf(s); pV->c[0] *= s; pV->c[1] *= s; pV->c[2] *= s; pV->c[3] *= s; } return pV; }
+
+	static color32 convert_rgba_5554_to_8888(const color32& col)
+	{
+		return color32((col[0] << 3) | (col[0] >> 2), (col[1] << 3) | (col[1] >> 2), (col[2] << 3) | (col[2] >> 2), (col[3] << 4) | col[3]);
+	}
+
+	static inline int sq(int x) { return x * x; }
+						
+	// PVRTC2 is a slightly borked format for alpha: In Non-Interpolated mode, the way AlphaB8 is exanded from 4 to 8 bits means it can never be 0. 
+	// This is actually very bad, because on 100% transparent blocks which have non-trivial color pixels, part of the color channel will leak into alpha! 
+	// And there's nothing straightforward we can do because using the other modes is too expensive/complex. I can see why Apple didn't adopt it.
+	static void convert_etc1s_to_pvrtc2_rgba(void* pDst, const endpoint* pEndpoints, const selector* pSelector, const endpoint* pEndpoint_codebook, const selector* pSelector_codebook)
+	{
+		pvrtc2_block* pBlock = static_cast<pvrtc2_block*>(pDst);
+
+		const endpoint& alpha_endpoint = pEndpoint_codebook[((uint16_t*)pBlock)[0]];
+		const selector& alpha_selectors = pSelector_codebook[((uint16_t*)pBlock)[1]];
+
+		pBlock->m_opaque_color_data.m_hard_flag = 1;
+		pBlock->m_opaque_color_data.m_mod_flag = 0;
+		pBlock->m_opaque_color_data.m_opaque_flag = 0;
+
+		const int num_unique_alpha_selectors = alpha_selectors.m_num_unique_selectors;
+
+		const color32& alpha_base_color = alpha_endpoint.m_color5;
+		const uint32_t alpha_inten_table = alpha_endpoint.m_inten5;
+
+		int constant_alpha_val = -1;
+
+		int alpha_block_colors[4];
+		decoder_etc_block::get_block_colors5_g(alpha_block_colors, alpha_base_color, alpha_inten_table);
+
+		if (num_unique_alpha_selectors == 1)
+		{
+			constant_alpha_val = alpha_block_colors[alpha_selectors.m_lo_selector];
+		}
+		else
+		{
+			constant_alpha_val = alpha_block_colors[alpha_selectors.m_lo_selector];
+
+			for (uint32_t i = alpha_selectors.m_lo_selector + 1; i <= alpha_selectors.m_hi_selector; i++)
+			{
+				if (constant_alpha_val != alpha_block_colors[i])
+				{
+					constant_alpha_val = -1;
+					break;
+				}
+			}
+		}
+
+		if (constant_alpha_val >= 250)
+		{
+			// It's opaque enough, so don't bother trying to encode it as an alpha block.
+			convert_etc1s_to_pvrtc2_rgb(pDst, pEndpoints, pSelector);
+			return;
+		}
+
+		const color32& base_color = pEndpoints->m_color5;
+		const uint32_t inten_table = pEndpoints->m_inten5;
+
+		const uint32_t low_selector = pSelector->m_lo_selector;
+		const uint32_t high_selector = pSelector->m_hi_selector;
+
+		const int num_unique_color_selectors = pSelector->m_num_unique_selectors;
+				
+		// We need to reencode the block at the pixel level, unfortunately, from two ETC1S planes.
+		// Do 4D incremental PCA, project all pixels to this hyperline, then quantize to packed endpoints and compute the modulation values.
+		const int br = (base_color.r << 3) | (base_color.r >> 2);
+		const int bg = (base_color.g << 3) | (base_color.g >> 2);
+		const int bb = (base_color.b << 3) | (base_color.b >> 2);
+		
+		color32 block_cols[4];
+		for (uint32_t i = 0; i < 4; i++)
+		{
+			const int ci = g_etc1_inten_tables[inten_table][i];
+			block_cols[i].set_clamped(br + ci, bg + ci, bb + ci, alpha_block_colors[i]);
+		}
+
+		bool solid_color_block = true;
+		if (num_unique_color_selectors > 1)
+		{
+			for (uint32_t i = low_selector + 1; i <= high_selector; i++)
+			{
+				if ((block_cols[low_selector].r != block_cols[i].r) || (block_cols[low_selector].g != block_cols[i].g) || (block_cols[low_selector].b != block_cols[i].b))
+				{
+					solid_color_block = false;
+					break;
+				}
+			}
+		}
+
+		if ((solid_color_block) && (constant_alpha_val >= 0))
+		{
+			// Constant color/alpha block.
+			// This is more complex than it may seem because of the way color and alpha are packed in PVRTC2. We need to evaluate mod0, mod1 and mod3 encodings to find the best one.
+			uint32_t r, g, b;
+			decoder_etc_block::get_block_color5(base_color, inten_table, low_selector, r, g, b);
+
+			// Mod 0
+			uint32_t lr0 = (r * 15 + 128) / 255, lg0 = (g * 15 + 128) / 255, lb0 = (b * 7 + 128) / 255; 
+			uint32_t la0 = g_pvrtc2_alpha_match33_0[constant_alpha_val].m_l;
+
+			uint32_t cr0 = (lr0 << 1) | (lr0 >> 3);
+			uint32_t cg0 = (lg0 << 1) | (lg0 >> 3);
+			uint32_t cb0 = (lb0 << 2) | (lb0 >> 1);
+			uint32_t ca0 = (la0 << 1);
+			
+			cr0 = (cr0 << 3) | (cr0 >> 2);
+			cg0 = (cg0 << 3) | (cg0 >> 2);
+			cb0 = (cb0 << 3) | (cb0 >> 2);
+			ca0 = (ca0 << 4) | ca0;
+
+			uint32_t err0 = sq(cr0 - r) + sq(cg0 - g) + sq(cb0 - b) + sq(ca0 - constant_alpha_val) * 2;
+
+			// If the alpha is < 3 or so we're kinda screwed. It's better to have some RGB error than it is to turn a 100% transparent area slightly opaque.
+			if ((err0 == 0) || (constant_alpha_val < 3))
+			{
+				pBlock->set_trans_low_color(lr0, lg0, lb0, la0);
+				pBlock->set_trans_high_color(0, 0, 0, 0);
+
+				pBlock->m_modulation[0] = 0;
+				pBlock->m_modulation[1] = 0;
+				pBlock->m_modulation[2] = 0;
+				pBlock->m_modulation[3] = 0;
+				return;
+			}
+
+			// Mod 3
+			uint32_t lr3 = (r * 15 + 128) / 255, lg3 = (g * 15 + 128) / 255, lb3 = (b * 15 + 128) / 255;
+			uint32_t la3 = g_pvrtc2_alpha_match33_3[constant_alpha_val].m_l;
+
+			uint32_t cr3 = (lr3 << 1) | (lr3 >> 3);
+			uint32_t cg3 = (lg3 << 1) | (lg3 >> 3);
+			uint32_t cb3 = (lb3 << 1) | (lb3 >> 3);
+			uint32_t ca3 = (la3 << 1) | 1;
+			
+			cr3 = (cr3 << 3) | (cr3 >> 2);
+			cg3 = (cg3 << 3) | (cg3 >> 2);
+			cb3 = (cb3 << 3) | (cb3 >> 2);
+			ca3 = (ca3 << 4) | ca3;
+
+			uint32_t err3 = sq(cr3 - r) + sq(cg3 - g) + sq(cb3 - b) + sq(ca3 - constant_alpha_val) * 2;
+			
+			// Mod 1
+			uint32_t lr1 = g_pvrtc2_trans_match44[r].m_l, lg1 = g_pvrtc2_trans_match44[g].m_l, lb1 = g_pvrtc2_trans_match34[b].m_l;
+			uint32_t hr1 = g_pvrtc2_trans_match44[r].m_h, hg1 = g_pvrtc2_trans_match44[g].m_h, hb1 = g_pvrtc2_trans_match34[b].m_h;
+			uint32_t la1 = g_pvrtc2_alpha_match33[constant_alpha_val].m_l, ha1 = g_pvrtc2_alpha_match33[constant_alpha_val].m_h;
+
+			uint32_t clr1 = (lr1 << 1) | (lr1 >> 3);
+			uint32_t clg1 = (lg1 << 1) | (lg1 >> 3);
+			uint32_t clb1 = (lb1 << 2) | (lb1 >> 1);
+			uint32_t cla1 = (la1 << 1);
+
+			clr1 = (clr1 << 3) | (clr1 >> 2);
+			clg1 = (clg1 << 3) | (clg1 >> 2);
+			clb1 = (clb1 << 3) | (clb1 >> 2);
+			cla1 = (cla1 << 4) | cla1;
+
+			uint32_t chr1 = (hr1 << 1) | (hr1 >> 3);
+			uint32_t chg1 = (hg1 << 1) | (hg1 >> 3);
+			uint32_t chb1 = (hb1 << 1) | (hb1 >> 3);
+			uint32_t cha1 = (ha1 << 1) | 1;
+
+			chr1 = (chr1 << 3) | (chr1 >> 2);
+			chg1 = (chg1 << 3) | (chg1 >> 2);
+			chb1 = (chb1 << 3) | (chb1 >> 2);
+			cha1 = (cha1 << 4) | cha1;
+
+			uint32_t r1 = (clr1 * 5 + chr1 * 3) / 8;
+			uint32_t g1 = (clg1 * 5 + chg1 * 3) / 8;
+			uint32_t b1 = (clb1 * 5 + chb1 * 3) / 8;
+			uint32_t a1 = (cla1 * 5 + cha1 * 3) / 8;
+
+			uint32_t err1 = sq(r1 - r) + sq(g1 - g) + sq(b1 - b) + sq(a1 - constant_alpha_val) * 2;
+
+			if ((err1 < err0) && (err1 < err3))
+			{
+				pBlock->set_trans_low_color(lr1, lg1, lb1, la1);
+				pBlock->set_trans_high_color(hr1, hg1, hb1, ha1);
+
+				pBlock->m_modulation[0] = 0x55;
+				pBlock->m_modulation[1] = 0x55;
+				pBlock->m_modulation[2] = 0x55;
+				pBlock->m_modulation[3] = 0x55;
+			}
+			else if (err0 < err3)
+			{
+				pBlock->set_trans_low_color(lr0, lg0, lb0, la0);
+				pBlock->set_trans_high_color(0, 0, 0, 0);
+
+				pBlock->m_modulation[0] = 0;
+				pBlock->m_modulation[1] = 0;
+				pBlock->m_modulation[2] = 0;
+				pBlock->m_modulation[3] = 0;
+			}
+			else
+			{
+				pBlock->set_trans_low_color(0, 0, 0, 0);
+				pBlock->set_trans_high_color(lr3, lg3, lb3, la3);
+
+				pBlock->m_modulation[0] = 0xFF;
+				pBlock->m_modulation[1] = 0xFF;
+				pBlock->m_modulation[2] = 0xFF;
+				pBlock->m_modulation[3] = 0xFF;
+			}
+
+			return;
+		}
+
+		// It's a complex block with non-solid color and/or alpha pixels.
+		vec4F minColor, maxColor;
+
+		if (solid_color_block)
+		{
+			// It's a solid color block.
+			uint32_t low_a = block_cols[alpha_selectors.m_lo_selector].a;
+			uint32_t high_a = block_cols[alpha_selectors.m_hi_selector].a;
+			
+			const float S = 1.0f / 255.0f;
+			vec4F_set(&minColor, block_cols[low_selector].r * S, block_cols[low_selector].g * S, block_cols[low_selector].b * S, low_a * S);
+			vec4F_set(&maxColor, block_cols[low_selector].r * S, block_cols[low_selector].g * S, block_cols[low_selector].b * S, high_a * S);
+		}
+		else if (constant_alpha_val >= 0)
+		{
+			// It's a solid alpha block.
+			const float S = 1.0f / 255.0f;
+			vec4F_set(&minColor, block_cols[low_selector].r * S, block_cols[low_selector].g * S, block_cols[low_selector].b * S, constant_alpha_val * S);
+			vec4F_set(&maxColor, block_cols[high_selector].r * S, block_cols[high_selector].g * S, block_cols[high_selector].b * S, constant_alpha_val * S);
+	   }
+		// See if any of the block colors got clamped - if so the principle axis got distorted (it's no longer just the ETC1S luma axis). 
+		// To keep quality up we need to use full 4D PCA in this case.
+		else	if ((block_cols[low_selector].c[0] == 0) || (block_cols[high_selector].c[0] == 255) ||
+				(block_cols[low_selector].c[1] == 0) || (block_cols[high_selector].c[1] == 255) ||
+				(block_cols[low_selector].c[2] == 0) || (block_cols[high_selector].c[2] == 255) ||
+				(block_cols[alpha_selectors.m_lo_selector].c[3] == 0) || (block_cols[alpha_selectors.m_hi_selector].c[3] == 255))
+		{
+			// Find principle component of RGBA colors treated as 4D vectors.
+			color32 pixels[16];
+
+			uint32_t sum_r = 0, sum_g = 0, sum_b = 0, sum_a = 0;
+			for (uint32_t i = 0; i < 16; i++)
+			{
+				color32 rgb(block_cols[pSelector->get_selector(i & 3, i >> 2)]);
+				uint32_t a = block_cols[alpha_selectors.get_selector(i & 3, i >> 2)].a;
+
+				pixels[i].set(rgb.r, rgb.g, rgb.b, a);
+
+				sum_r += rgb.r;
+				sum_g += rgb.g;
+				sum_b += rgb.b;
+				sum_a += a;
+			}
+
+			vec4F meanColor;
+			vec4F_set(&meanColor, (float)sum_r, (float)sum_g, (float)sum_b, (float)sum_a);
+			vec4F meanColorScaled = vec4F_mul(&meanColor, 1.0f / 16.0f);
+
+			meanColor = vec4F_mul(&meanColor, 1.0f / (float)(16.0f * 255.0f));
+			vec4F_saturate_in_place(&meanColor);
+
+			vec4F axis;
+			vec4F_set_scalar(&axis, 0.0f);
+			// Why this incremental method? Because it's stable and predictable. Covar+power method can require a lot of iterations to converge in 4D.
+			for (uint32_t i = 0; i < 16; i++)
+			{
+				vec4F color = vec4F_from_color(&pixels[i]);
+				color = vec4F_sub(&color, &meanColorScaled);
+				vec4F a = vec4F_mul(&color, color.c[0]);
+				vec4F b = vec4F_mul(&color, color.c[1]);
+				vec4F c = vec4F_mul(&color, color.c[2]);
+				vec4F d = vec4F_mul(&color, color.c[3]);
+				vec4F n = i ? axis : color;
+				vec4F_normalize_in_place(&n);
+				axis.c[0] += vec4F_dot(&a, &n);
+				axis.c[1] += vec4F_dot(&b, &n);
+				axis.c[2] += vec4F_dot(&c, &n);
+				axis.c[3] += vec4F_dot(&d, &n);
+			}
+
+			vec4F_normalize_in_place(&axis);
+						
+			if (vec4F_dot(&axis, &axis) < .5f)
+				vec4F_set_scalar(&axis, .5f);
+
+			float l = 1e+9f, h = -1e+9f;
+
+			for (uint32_t i = 0; i < 16; i++)
+			{
+				vec4F color = vec4F_from_color(&pixels[i]);
+
+				vec4F q = vec4F_sub(&color, &meanColorScaled);
+				float d = vec4F_dot(&q, &axis);
+
+				l = basisu::minimum(l, d);
+				h = basisu::maximum(h, d);
+			}
+
+			l *= (1.0f / 255.0f);
+			h *= (1.0f / 255.0f);
+
+			vec4F b0 = vec4F_mul(&axis, l);
+			vec4F b1 = vec4F_mul(&axis, h);
+			vec4F c0 = vec4F_add(&meanColor, &b0);
+			vec4F c1 = vec4F_add(&meanColor, &b1);
+			minColor = vec4F_saturate(&c0);
+			maxColor = vec4F_saturate(&c1);
+			if (minColor.c[3] > maxColor.c[3])
+			{
+				// VS 2019 release Code Generator issue
+				//std::swap(minColor, maxColor);
+
+				float a = minColor.c[0], b = minColor.c[1], c = minColor.c[2], d = minColor.c[3];
+				minColor.c[0] = maxColor.c[0]; minColor.c[1] = maxColor.c[1]; minColor.c[2] = maxColor.c[2]; minColor.c[3] = maxColor.c[3];
+				minColor.c[0] = maxColor.c[0]; minColor.c[1] = maxColor.c[1]; minColor.c[2] = maxColor.c[2]; minColor.c[3] = maxColor.c[3];
+				maxColor.c[0] = a; maxColor.c[1] = b; maxColor.c[2] = c; maxColor.c[3] = d;
+			}
+		}
+		else
+		{
+			// We know the RGB axis is luma, because it's an ETC1S block and none of the block colors got clamped. So we only need to use 2D PCA.
+			// We project each LA vector onto two 2D lines with axes (1,1) and (1,-1) and find the largest projection to determine if axis A is flipped relative to L.
+			uint32_t block_cols_l[4], block_cols_a[4];
+			for (uint32_t i = 0; i < 4; i++)
+			{
+				block_cols_l[i] = block_cols[i].r + block_cols[i].g + block_cols[i].b;
+				block_cols_a[i] = block_cols[i].a * 3;
+			}
+
+			int p0_min = INT_MAX, p0_max = INT_MIN;
+			int p1_min = INT_MAX, p1_max = INT_MIN;
+			for (uint32_t y = 0; y < 4; y++)
+			{
+				const uint32_t cs = pSelector->m_selectors[y];
+				const uint32_t as = alpha_selectors.m_selectors[y];
+
+				{
+					const int l = block_cols_l[cs & 3];
+					const int a = block_cols_a[as & 3];
+					const int p0 = l + a; p0_min = basisu::minimum(p0_min, p0); p0_max = basisu::maximum(p0_max, p0);
+					const int p1 = l - a; p1_min = basisu::minimum(p1_min, p1); p1_max = basisu::maximum(p1_max, p1);
+				}
+				{
+					const int l = block_cols_l[(cs >> 2) & 3];
+					const int a = block_cols_a[(as >> 2) & 3];
+					const int p0 = l + a; p0_min = basisu::minimum(p0_min, p0); p0_max = basisu::maximum(p0_max, p0);
+					const int p1 = l - a; p1_min = basisu::minimum(p1_min, p1); p1_max = basisu::maximum(p1_max, p1);
+				}
+				{
+					const int l = block_cols_l[(cs >> 4) & 3];
+					const int a = block_cols_a[(as >> 4) & 3];
+					const int p0 = l + a; p0_min = basisu::minimum(p0_min, p0); p0_max = basisu::maximum(p0_max, p0);
+					const int p1 = l - a; p1_min = basisu::minimum(p1_min, p1); p1_max = basisu::maximum(p1_max, p1);
+				}
+				{
+					const int l = block_cols_l[cs >> 6];
+					const int a = block_cols_a[as >> 6];
+					const int p0 = l + a; p0_min = basisu::minimum(p0_min, p0); p0_max = basisu::maximum(p0_max, p0);
+					const int p1 = l - a; p1_min = basisu::minimum(p1_min, p1); p1_max = basisu::maximum(p1_max, p1);
+				}
+			}
+
+			int dist0 = p0_max - p0_min;
+			int dist1 = p1_max - p1_min;
+
+			const float S = 1.0f / 255.0f;
+
+			vec4F_set(&minColor, block_cols[low_selector].r * S, block_cols[low_selector].g * S, block_cols[low_selector].b * S, block_cols[alpha_selectors.m_lo_selector].a * S);
+			vec4F_set(&maxColor, block_cols[high_selector].r * S, block_cols[high_selector].g * S, block_cols[high_selector].b * S, block_cols[alpha_selectors.m_hi_selector].a * S);
+
+			// See if the A component of the principle axis is flipped relative to L. If so, we need to flip either RGB or A bounds.
+			if (dist1 > dist0)
+			{
+				std::swap(minColor.c[0], maxColor.c[0]);
+				std::swap(minColor.c[1], maxColor.c[1]);
+				std::swap(minColor.c[2], maxColor.c[2]);
+			}
+		}
+
+		// 4433 4443
+		color32 trialMinColor, trialMaxColor;
+				
+		trialMinColor.set_clamped((int)(minColor.c[0] * 15.0f + .5f), (int)(minColor.c[1] * 15.0f + .5f), (int)(minColor.c[2] * 7.0f + .5f), (int)(minColor.c[3] * 7.0f + .5f));
+		trialMaxColor.set_clamped((int)(maxColor.c[0] * 15.0f + .5f), (int)(maxColor.c[1] * 15.0f + .5f), (int)(maxColor.c[2] * 15.0f + .5f), (int)(maxColor.c[3] * 7.0f + .5f));
+				
+		pBlock->set_trans_low_color(trialMinColor.r, trialMinColor.g, trialMinColor.b, trialMinColor.a);
+		pBlock->set_trans_high_color(trialMaxColor.r, trialMaxColor.g, trialMaxColor.b, trialMaxColor.a);
+
+		color32 color_a((trialMinColor.r << 1) | (trialMinColor.r >> 3), (trialMinColor.g << 1) | (trialMinColor.g >> 3), (trialMinColor.b << 2) | (trialMinColor.b >> 1), trialMinColor.a << 1);
+		color32 color_b((trialMaxColor.r << 1) | (trialMaxColor.r >> 3), (trialMaxColor.g << 1) | (trialMaxColor.g >> 3), (trialMaxColor.b << 1) | (trialMaxColor.b >> 3), (trialMaxColor.a << 1) | 1);
+
+		color32 color0(convert_rgba_5554_to_8888(color_a));
+		color32 color3(convert_rgba_5554_to_8888(color_b));
+
+		const int lr = color0.r;
+		const int lg = color0.g;
+		const int lb = color0.b;
+		const int la = color0.a;
+
+		const int axis_r = color3.r - lr;
+		const int axis_g = color3.g - lg;
+		const int axis_b = color3.b - lb;
+		const int axis_a = color3.a - la;
+		const int len_a = (axis_r * axis_r) + (axis_g * axis_g) + (axis_b * axis_b) + (axis_a * axis_a);
+
+		const int thresh01 = (len_a * 3) / 16;
+		const int thresh12 = len_a >> 1;
+		const int thresh23 = (len_a * 13) / 16;
+
+		if ((axis_r | axis_g | axis_b) == 0)
+		{
+			int ca_sel[4];
+
+			for (uint32_t i = 0; i < 4; i++)
+			{
+				int ca = (block_cols[i].a - la) * axis_a;
+				ca_sel[i] = (ca >= thresh23) + (ca >= thresh12) + (ca >= thresh01);
+			}
+
+			for (uint32_t y = 0; y < 4; y++)
+			{
+				const uint32_t a_sels = alpha_selectors.m_selectors[y];
+
+				uint32_t sel = ca_sel[a_sels & 3] | (ca_sel[(a_sels >> 2) & 3] << 2) | (ca_sel[(a_sels >> 4) & 3] << 4) | (ca_sel[a_sels >> 6] << 6);
+
+				pBlock->m_modulation[y] = (uint8_t)sel;
+			}
+		}
+		else
+		{
+			int cy[4], ca[4];
+
+			for (uint32_t i = 0; i < 4; i++)
+			{
+				cy[i] = (block_cols[i].r - lr) * axis_r + (block_cols[i].g - lg) * axis_g + (block_cols[i].b - lb) * axis_b;
+				ca[i] = (block_cols[i].a - la) * axis_a;
+			}
+
+			for (uint32_t y = 0; y < 4; y++)
+			{
+				const uint32_t c_sels = pSelector->m_selectors[y];
+				const uint32_t a_sels = alpha_selectors.m_selectors[y];
+
+				const int d0 = cy[c_sels & 3] + ca[a_sels & 3];
+				const int d1 = cy[(c_sels >> 2) & 3] + ca[(a_sels >> 2) & 3];
+				const int d2 = cy[(c_sels >> 4) & 3] + ca[(a_sels >> 4) & 3];
+				const int d3 = cy[c_sels >> 6] + ca[a_sels >> 6];
+
+				uint32_t sel = ((d0 >= thresh23) + (d0 >= thresh12) + (d0 >= thresh01)) |
+					(((d1 >= thresh23) + (d1 >= thresh12) + (d1 >= thresh01)) << 2) |
+					(((d2 >= thresh23) + (d2 >= thresh12) + (d2 >= thresh01)) << 4) |
+					(((d3 >= thresh23) + (d3 >= thresh12) + (d3 >= thresh01)) << 6);
+
+				pBlock->m_modulation[y] = (uint8_t)sel;
+			}
+		}
+	}
+		
+	static void transcoder_init_pvrtc2()
+	{
+		for (uint32_t v = 0; v < 256; v++)
+		{
+			int best_l = 0, best_h = 0, lowest_err = INT_MAX;
+
+			for (uint32_t l = 0; l < 8; l++)
+			{
+				uint32_t le = (l << 1);
+				le = (le << 4) | le;
+
+				for (uint32_t h = 0; h < 8; h++)
+				{
+					uint32_t he = (h << 1) | 1;
+					he = (he << 4) | he;
+
+					uint32_t m = (le * 5 + he * 3) / 8;
+
+					int err = (int)labs((int)v - (int)m);
+					if (err < lowest_err)
+					{
+						lowest_err = err;
+						best_l = l;
+						best_h = h;
+					}
+				}
+			}
+
+			g_pvrtc2_alpha_match33[v].m_l = (uint8_t)best_l;
+			g_pvrtc2_alpha_match33[v].m_h = (uint8_t)best_h;
+		}
+
+		for (uint32_t v = 0; v < 256; v++)
+		{
+			int best_l = 0, best_h = 0, lowest_err = INT_MAX;
+
+			for (uint32_t l = 0; l < 8; l++)
+			{
+				uint32_t le = (l << 1);
+				le = (le << 4) | le;
+
+				int err = (int)labs((int)v - (int)le);
+				if (err < lowest_err)
+				{
+					lowest_err = err;
+					best_l = l;
+					best_h = l;
+				}
+			}
+
+			g_pvrtc2_alpha_match33_0[v].m_l = (uint8_t)best_l;
+			g_pvrtc2_alpha_match33_0[v].m_h = (uint8_t)best_h;
+		}
+
+		for (uint32_t v = 0; v < 256; v++)
+		{
+			int best_l = 0, best_h = 0, lowest_err = INT_MAX;
+
+			for (uint32_t h = 0; h < 8; h++)
+			{
+				uint32_t he = (h << 1) | 1;
+				he = (he << 4) | he;
+
+				int err = (int)labs((int)v - (int)he);
+				if (err < lowest_err)
+				{
+					lowest_err = err;
+					best_l = h;
+					best_h = h;
+				}
+			}
+
+			g_pvrtc2_alpha_match33_3[v].m_l = (uint8_t)best_l;
+			g_pvrtc2_alpha_match33_3[v].m_h = (uint8_t)best_h;
+		}
+
+		for (uint32_t v = 0; v < 256; v++)
+		{
+			int best_l = 0, best_h = 0, lowest_err = INT_MAX;
+
+			for (uint32_t l = 0; l < 8; l++)
+			{
+				uint32_t le = (l << 2) | (l >> 1);
+				le = (le << 3) | (le >> 2);
+
+				for (uint32_t h = 0; h < 16; h++)
+				{
+					uint32_t he = (h << 1) | (h >> 3);
+					he = (he << 3) | (he >> 2);
+
+					uint32_t m = (le * 5 + he * 3) / 8;
+
+					int err = (int)labs((int)v - (int)m);
+					if (err < lowest_err)
+					{
+						lowest_err = err;
+						best_l = l;
+						best_h = h;
+					}
+				}
+			}
+
+			g_pvrtc2_trans_match34[v].m_l = (uint8_t)best_l;
+			g_pvrtc2_trans_match34[v].m_h = (uint8_t)best_h;
+		}
+				
+		for (uint32_t v = 0; v < 256; v++)
+		{
+			int best_l = 0, best_h = 0, lowest_err = INT_MAX;
+
+			for (uint32_t l = 0; l < 16; l++)
+			{
+				uint32_t le = (l << 1) | (l >> 3);
+				le = (le << 3) | (le >> 2);
+
+				for (uint32_t h = 0; h < 16; h++)
+				{
+					uint32_t he = (h << 1) | (h >> 3);
+					he = (he << 3) | (he >> 2);
+
+					uint32_t m = (le * 5 + he * 3) / 8;
+
+					int err = (int)labs((int)v - (int)m);
+					if (err < lowest_err)
+					{
+						lowest_err = err;
+						best_l = l;
+						best_h = h;
+					}
+				}
+			}
+
+			g_pvrtc2_trans_match44[v].m_l = (uint8_t)best_l;
+			g_pvrtc2_trans_match44[v].m_h = (uint8_t)best_h;
+		}
+	}
+#endif // BASISD_SUPPORT_PVRTC2
+
+	basisu_lowlevel_etc1s_transcoder::basisu_lowlevel_etc1s_transcoder(const etc1_global_selector_codebook* pGlobal_sel_codebook) :
+		m_pGlobal_codebook(nullptr),
+		m_pGlobal_sel_codebook(pGlobal_sel_codebook),
+		m_selector_history_buf_size(0)
+	{
+	}
+
+	bool basisu_lowlevel_etc1s_transcoder::decode_palettes(
+		uint32_t num_endpoints, const uint8_t* pEndpoints_data, uint32_t endpoints_data_size,
+		uint32_t num_selectors, const uint8_t* pSelectors_data, uint32_t selectors_data_size)
+	{
+		if (m_pGlobal_codebook)
+		{
+			BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 11\n");
+			return false;
+		}
+		bitwise_decoder sym_codec;
+
+		huffman_decoding_table color5_delta_model0, color5_delta_model1, color5_delta_model2, inten_delta_model;
+
+		if (!sym_codec.init(pEndpoints_data, endpoints_data_size))
+		{
+			BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 0\n");
+			return false;
+		}
+
+		if (!sym_codec.read_huffman_table(color5_delta_model0))
+		{
+			BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 1\n");
+			return false;
+		}
+
+		if (!sym_codec.read_huffman_table(color5_delta_model1))
+		{
+			BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 1a\n");
+			return false;
+		}
+
+		if (!sym_codec.read_huffman_table(color5_delta_model2))
+		{
+			BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 2a\n");
+			return false;
+		}
+
+		if (!sym_codec.read_huffman_table(inten_delta_model))
+		{
+			BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 2b\n");
+			return false;
+		}
+
+		if (!color5_delta_model0.is_valid() || !color5_delta_model1.is_valid() || !color5_delta_model2.is_valid() || !inten_delta_model.is_valid())
+		{
+			BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 2b\n");
+			return false;
+		}
+
+		const bool endpoints_are_grayscale = sym_codec.get_bits(1) != 0;
+
+		m_local_endpoints.resize(num_endpoints);
+
+		color32 prev_color5(16, 16, 16, 0);
+		uint32_t prev_inten = 0;
+
+		for (uint32_t i = 0; i < num_endpoints; i++)
+		{
+			uint32_t inten_delta = sym_codec.decode_huffman(inten_delta_model);
+			m_local_endpoints[i].m_inten5 = static_cast<uint8_t>((inten_delta + prev_inten) & 7);
+			prev_inten = m_local_endpoints[i].m_inten5;
+
+			for (uint32_t c = 0; c < (endpoints_are_grayscale ? 1U : 3U); c++)
+			{
+				int delta;
+				if (prev_color5[c] <= basist::COLOR5_PAL0_PREV_HI)
+					delta = sym_codec.decode_huffman(color5_delta_model0);
+				else if (prev_color5[c] <= basist::COLOR5_PAL1_PREV_HI)
+					delta = sym_codec.decode_huffman(color5_delta_model1);
+				else
+					delta = sym_codec.decode_huffman(color5_delta_model2);
+
+				int v = (prev_color5[c] + delta) & 31;
+
+				m_local_endpoints[i].m_color5[c] = static_cast<uint8_t>(v);
+
+				prev_color5[c] = static_cast<uint8_t>(v);
+			}
+
+			if (endpoints_are_grayscale)
+			{
+				m_local_endpoints[i].m_color5[1] = m_local_endpoints[i].m_color5[0];
+				m_local_endpoints[i].m_color5[2] = m_local_endpoints[i].m_color5[0];
+			}
+		}
+
+		sym_codec.stop();
+
+		m_local_selectors.resize(num_selectors);
+		
+		if (!sym_codec.init(pSelectors_data, selectors_data_size))
+		{
+			BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 5\n");
+			return false;
+		}
+
+		basist::huffman_decoding_table delta_selector_pal_model;
+
+		const bool used_global_selector_cb = (sym_codec.get_bits(1) == 1);
+
+		if (used_global_selector_cb)
+		{
+			// global selector palette
+			uint32_t pal_bits = sym_codec.get_bits(4);
+			uint32_t mod_bits = sym_codec.get_bits(4);
+
+			basist::huffman_decoding_table mod_model;
+			if (mod_bits)
+			{
+				if (!sym_codec.read_huffman_table(mod_model))
+				{
+					BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 6\n");
+					return false;
+				}
+				if (!mod_model.is_valid())
+				{
+					BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 6a\n");
+					return false;
+				}
+			}
+
+			for (uint32_t i = 0; i < num_selectors; i++)
+			{
+				uint32_t pal_index = 0;
+				if (pal_bits)
+					pal_index = sym_codec.get_bits(pal_bits);
+
+				uint32_t mod_index = 0;
+				if (mod_bits)
+					mod_index = sym_codec.decode_huffman(mod_model);
+
+				if (pal_index >= m_pGlobal_sel_codebook->size())
+				{
+					BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 7z\n");
+					return false;
+				}
+
+				const etc1_selector_palette_entry e(m_pGlobal_sel_codebook->get_entry(pal_index, etc1_global_palette_entry_modifier(mod_index)));
+
+				// TODO: Optimize this
+				for (uint32_t y = 0; y < 4; y++)
+					for (uint32_t x = 0; x < 4; x++)
+						m_local_selectors[i].set_selector(x, y, e[x + y * 4]);
+
+				m_local_selectors[i].init_flags();
+			}
+		}
+		else
+		{
+			const bool used_hybrid_selector_cb = (sym_codec.get_bits(1) == 1);
+
+			if (used_hybrid_selector_cb)
+			{
+				const uint32_t pal_bits = sym_codec.get_bits(4);
+				const uint32_t mod_bits = sym_codec.get_bits(4);
+
+				basist::huffman_decoding_table uses_global_cb_bitflags_model;
+				if (!sym_codec.read_huffman_table(uses_global_cb_bitflags_model))
+				{
+					BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 7\n");
+					return false;
+				}
+				if (!uses_global_cb_bitflags_model.is_valid())
+				{
+					BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 7a\n");
+					return false;
+				}
+
+				basist::huffman_decoding_table global_mod_indices_model;
+				if (mod_bits)
+				{
+					if (!sym_codec.read_huffman_table(global_mod_indices_model))
+					{
+						BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 8\n");
+						return false;
+					}
+					if (!global_mod_indices_model.is_valid())
+					{
+						BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 8a\n");
+						return false;
+					}
+				}
+
+				uint32_t cur_uses_global_cb_bitflags = 0;
+				uint32_t uses_global_cb_bitflags_remaining = 0;
+
+				for (uint32_t q = 0; q < num_selectors; q++)
+				{
+					if (!uses_global_cb_bitflags_remaining)
+					{
+						cur_uses_global_cb_bitflags = sym_codec.decode_huffman(uses_global_cb_bitflags_model);
+
+						uses_global_cb_bitflags_remaining = 8;
+					}
+					uses_global_cb_bitflags_remaining--;
+
+					const bool used_global_cb_flag = (cur_uses_global_cb_bitflags & 1) != 0;
+					cur_uses_global_cb_bitflags >>= 1;
+
+					if (used_global_cb_flag)
+					{
+						const uint32_t pal_index = pal_bits ? sym_codec.get_bits(pal_bits) : 0;
+						const uint32_t mod_index = mod_bits ? sym_codec.decode_huffman(global_mod_indices_model) : 0;
+
+						if (pal_index >= m_pGlobal_sel_codebook->size())
+						{
+							BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 8b\n");
+							return false;
+						}
+
+						const etc1_selector_palette_entry e(m_pGlobal_sel_codebook->get_entry(pal_index, etc1_global_palette_entry_modifier(mod_index)));
+
+						for (uint32_t y = 0; y < 4; y++)
+							for (uint32_t x = 0; x < 4; x++)
+								m_local_selectors[q].set_selector(x, y, e[x + y * 4]);
+					}
+					else
+					{
+						for (uint32_t j = 0; j < 4; j++)
+						{
+							uint32_t cur_byte = sym_codec.get_bits(8);
+
+							for (uint32_t k = 0; k < 4; k++)
+								m_local_selectors[q].set_selector(k, j, (cur_byte >> (k * 2)) & 3);
+						}
+					}
+
+					m_local_selectors[q].init_flags();
+				}
+			}
+			else
+			{
+				const bool used_raw_encoding = (sym_codec.get_bits(1) == 1);
+
+				if (used_raw_encoding)
+				{
+					for (uint32_t i = 0; i < num_selectors; i++)
+					{
+						for (uint32_t j = 0; j < 4; j++)
+						{
+							uint32_t cur_byte = sym_codec.get_bits(8);
+
+							for (uint32_t k = 0; k < 4; k++)
+								m_local_selectors[i].set_selector(k, j, (cur_byte >> (k * 2)) & 3);
+						}
+
+						m_local_selectors[i].init_flags();
+					}
+				}
+				else
+				{
+					if (!sym_codec.read_huffman_table(delta_selector_pal_model))
+					{
+						BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 10\n");
+						return false;
+					}
+
+					if ((num_selectors > 1) && (!delta_selector_pal_model.is_valid()))
+					{
+						BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 10a\n");
+						return false;
+					}
+
+					uint8_t prev_bytes[4] = { 0, 0, 0, 0 };
+
+					for (uint32_t i = 0; i < num_selectors; i++)
+					{
+						if (!i)
+						{
+							for (uint32_t j = 0; j < 4; j++)
+							{
+								uint32_t cur_byte = sym_codec.get_bits(8);
+								prev_bytes[j] = static_cast<uint8_t>(cur_byte);
+
+								for (uint32_t k = 0; k < 4; k++)
+									m_local_selectors[i].set_selector(k, j, (cur_byte >> (k * 2)) & 3);
+							}
+							m_local_selectors[i].init_flags();
+							continue;
+						}
+
+						for (uint32_t j = 0; j < 4; j++)
+						{
+							int delta_byte = sym_codec.decode_huffman(delta_selector_pal_model);
+
+							uint32_t cur_byte = delta_byte ^ prev_bytes[j];
+							prev_bytes[j] = static_cast<uint8_t>(cur_byte);
+
+							for (uint32_t k = 0; k < 4; k++)
+								m_local_selectors[i].set_selector(k, j, (cur_byte >> (k * 2)) & 3);
+						}
+						m_local_selectors[i].init_flags();
+					}
+				}
+			}
+		}
+
+		sym_codec.stop();
+
+		return true;
+	}
+
+	bool basisu_lowlevel_etc1s_transcoder::decode_tables(const uint8_t* pTable_data, uint32_t table_data_size)
+	{
+		basist::bitwise_decoder sym_codec;
+		if (!sym_codec.init(pTable_data, table_data_size))
+		{
+			BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 0\n");
+			return false;
+		}
+
+		if (!sym_codec.read_huffman_table(m_endpoint_pred_model))
+		{
+			BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 1\n");
+			return false;
+		}
+
+		if (m_endpoint_pred_model.get_code_sizes().size() == 0)
+		{
+			BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 1a\n");
+			return false;
+		}
+
+		if (!sym_codec.read_huffman_table(m_delta_endpoint_model))
+		{
+			BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 2\n");
+			return false;
+		}
+
+		if (m_delta_endpoint_model.get_code_sizes().size() == 0)
+		{
+			BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 2a\n");
+			return false;
+		}
+
+		if (!sym_codec.read_huffman_table(m_selector_model))
+		{
+			BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 3\n");
+			return false;
+		}
+
+		if (m_selector_model.get_code_sizes().size() == 0)
+		{
+			BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 3a\n");
+			return false;
+		}
+
+		if (!sym_codec.read_huffman_table(m_selector_history_buf_rle_model))
+		{
+			BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 4\n");
+			return false;
+		}
+
+		if (m_selector_history_buf_rle_model.get_code_sizes().size() == 0)
+		{
+			BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 4a\n");
+			return false;
+		}
+
+		m_selector_history_buf_size = sym_codec.get_bits(13);
+
+		sym_codec.stop();
+
+		return true;
+	}
+
+	bool basisu_lowlevel_etc1s_transcoder::transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
+		uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const bool is_video, const bool is_alpha_slice, const uint32_t level_index, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels,
+		basisu_transcoder_state* pState, bool transcode_alpha, void *pAlpha_blocks, uint32_t output_rows_in_pixels)
+	{
+		// 'pDst_blocks' unused when disabling *all* hardware transcode options
+		// (and 'bc1_allow_threecolor_blocks' when disabling DXT)
+		BASISU_NOTE_UNUSED(pDst_blocks);
+		BASISU_NOTE_UNUSED(bc1_allow_threecolor_blocks);
+		BASISU_NOTE_UNUSED(transcode_alpha);
+		BASISU_NOTE_UNUSED(pAlpha_blocks);
+
+		assert(g_transcoder_initialized);
+		if (!g_transcoder_initialized)
+		{
+			BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: Transcoder not globally initialized.\n");
+			return false;
+		}
+
+		if (!pState)
+			pState = &m_def_state;
+
+		const uint32_t total_blocks = num_blocks_x * num_blocks_y;
+
+		if (!output_row_pitch_in_blocks_or_pixels)
+		{
+			if (basis_block_format_is_uncompressed(fmt))
+				output_row_pitch_in_blocks_or_pixels = orig_width;
+			else
+			{
+				if (fmt == block_format::cFXT1_RGB)
+					output_row_pitch_in_blocks_or_pixels = (orig_width + 7) / 8;
+				else
+					output_row_pitch_in_blocks_or_pixels = num_blocks_x;
+			}
+		}
+
+		if (basis_block_format_is_uncompressed(fmt))
+		{
+			if (!output_rows_in_pixels)
+				output_rows_in_pixels = orig_height;
+		}
+		
+		basisu::vector<uint32_t>* pPrev_frame_indices = nullptr;
+		if (is_video)
+		{
+			// TODO: Add check to make sure the caller hasn't tried skipping past p-frames
+			//const bool alpha_flag = (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0;
+			//const uint32_t level_index = slice_desc.m_level_index;
+
+			if (level_index >= basisu_transcoder_state::cMaxPrevFrameLevels)
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: unsupported level_index\n");
+				return false;
+			}
+
+			pPrev_frame_indices = &pState->m_prev_frame_indices[is_alpha_slice][level_index];
+			if (pPrev_frame_indices->size() < total_blocks)
+				pPrev_frame_indices->resize(total_blocks);
+		}
+
+		basist::bitwise_decoder sym_codec;
+
+		if (!sym_codec.init(pImage_data, image_data_size))
+		{
+			BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: sym_codec.init failed\n");
+			return false;
+		}
+
+		approx_move_to_front selector_history_buf(m_selector_history_buf_size);
+				
+		uint32_t cur_selector_rle_count = 0;
+
+		decoder_etc_block block;
+		memset(&block, 0, sizeof(block));
+
+		block.set_flip_bit(true);
+		block.set_diff_bit(true);
+
+		void* pPVRTC_work_mem = nullptr;
+		uint32_t* pPVRTC_endpoints = nullptr;
+		if ((fmt == block_format::cPVRTC1_4_RGB) || (fmt == block_format::cPVRTC1_4_RGBA))
+		{
+			pPVRTC_work_mem = malloc(num_blocks_x * num_blocks_y * (sizeof(decoder_etc_block) + sizeof(uint32_t)));
+			if (!pPVRTC_work_mem)
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: malloc failed\n");
+				return false;
+			}
+			pPVRTC_endpoints = (uint32_t*) & ((decoder_etc_block*)pPVRTC_work_mem)[num_blocks_x * num_blocks_y];
+		}
+
+		if (pState->m_block_endpoint_preds[0].size() < num_blocks_x)
+		{
+			pState->m_block_endpoint_preds[0].resize(num_blocks_x);
+			pState->m_block_endpoint_preds[1].resize(num_blocks_x);
+		}
+
+		uint32_t cur_pred_bits = 0;
+		int prev_endpoint_pred_sym = 0;
+		int endpoint_pred_repeat_count = 0;
+		uint32_t prev_endpoint_index = 0;
+		const endpoint_vec& endpoints = m_pGlobal_codebook ? m_pGlobal_codebook->m_local_endpoints : m_local_endpoints;
+		const selector_vec& selectors = m_pGlobal_codebook ? m_pGlobal_codebook->m_local_selectors : m_local_selectors;
+		if (!endpoints.size() || !selectors.size())
+		{
+			BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: global codebooks must be unpacked first\n");
+			return false;
+		}
+
+		const uint32_t SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX = (uint32_t)selectors.size();
+		const uint32_t SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX = m_selector_history_buf_size + SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX;
+
+		for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)
+		{
+			const uint32_t cur_block_endpoint_pred_array = block_y & 1;
+
+			for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)
+			{
+				// Decode endpoint index predictor symbols
+				if ((block_x & 1) == 0)
+				{
+					if ((block_y & 1) == 0)
+					{
+						if (endpoint_pred_repeat_count)
+						{
+							endpoint_pred_repeat_count--;
+							cur_pred_bits = prev_endpoint_pred_sym;
+						}
+						else
+						{
+							cur_pred_bits = sym_codec.decode_huffman(m_endpoint_pred_model);
+							if (cur_pred_bits == ENDPOINT_PRED_REPEAT_LAST_SYMBOL)
+							{
+								endpoint_pred_repeat_count = sym_codec.decode_vlc(ENDPOINT_PRED_COUNT_VLC_BITS) + ENDPOINT_PRED_MIN_REPEAT_COUNT - 1;
+
+								cur_pred_bits = prev_endpoint_pred_sym;
+							}
+							else
+							{
+								prev_endpoint_pred_sym = cur_pred_bits;
+							}
+						}
+
+						pState->m_block_endpoint_preds[cur_block_endpoint_pred_array ^ 1][block_x].m_pred_bits = (uint8_t)(cur_pred_bits >> 4);
+					}
+					else
+					{
+						cur_pred_bits = pState->m_block_endpoint_preds[cur_block_endpoint_pred_array][block_x].m_pred_bits;
+					}
+				}
+
+				// Decode endpoint index
+				uint32_t endpoint_index, selector_index = 0;
+
+				const uint32_t pred = cur_pred_bits & 3;
+				cur_pred_bits >>= 2;
+
+				if (pred == 0)
+				{
+					// Left
+					if (!block_x)
+					{
+						BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (0)\n");
+						if (pPVRTC_work_mem)
+							free(pPVRTC_work_mem);
+						return false;
+					}
+
+					endpoint_index = prev_endpoint_index;
+				}
+				else if (pred == 1)
+				{
+					// Upper
+					if (!block_y)
+					{
+						BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (1)\n");
+						if (pPVRTC_work_mem)
+							free(pPVRTC_work_mem);
+						return false;
+					}
+
+					endpoint_index = pState->m_block_endpoint_preds[cur_block_endpoint_pred_array ^ 1][block_x].m_endpoint_index;
+				}
+				else if (pred == 2)
+				{
+					if (is_video)
+					{
+						assert(pred == CR_ENDPOINT_PRED_INDEX);
+						endpoint_index = (*pPrev_frame_indices)[block_x + block_y * num_blocks_x];
+						selector_index = endpoint_index >> 16;
+						endpoint_index &= 0xFFFFU;
+					}
+					else
+					{
+						// Upper left
+						if ((!block_x) || (!block_y))
+						{
+							BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (2)\n");
+							if (pPVRTC_work_mem)
+								free(pPVRTC_work_mem);
+							return false;
+						}
+
+						endpoint_index = pState->m_block_endpoint_preds[cur_block_endpoint_pred_array ^ 1][block_x - 1].m_endpoint_index;
+					}
+				}
+				else
+				{
+					// Decode and apply delta
+					const uint32_t delta_sym = sym_codec.decode_huffman(m_delta_endpoint_model);
+
+					endpoint_index = delta_sym + prev_endpoint_index;
+					if (endpoint_index >= endpoints.size())
+						endpoint_index -= (int)endpoints.size();
+				}
+
+				pState->m_block_endpoint_preds[cur_block_endpoint_pred_array][block_x].m_endpoint_index = (uint16_t)endpoint_index;
+
+				prev_endpoint_index = endpoint_index;
+
+				// Decode selector index
+				if ((!is_video) || (pred != CR_ENDPOINT_PRED_INDEX))
+				{
+					int selector_sym;
+					if (cur_selector_rle_count > 0)
+					{
+						cur_selector_rle_count--;
+
+						selector_sym = (int)selectors.size();
+					}
+					else
+					{
+						selector_sym = sym_codec.decode_huffman(m_selector_model);
+
+						if (selector_sym == static_cast<int>(SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX))
+						{
+							int run_sym = sym_codec.decode_huffman(m_selector_history_buf_rle_model);
+
+							if (run_sym == (SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL - 1))
+								cur_selector_rle_count = sym_codec.decode_vlc(7) + SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH;
+							else
+								cur_selector_rle_count = run_sym + SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH;
+
+							if (cur_selector_rle_count > total_blocks)
+							{
+								// The file is corrupted or we've got a bug.
+								BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (3)\n");
+								if (pPVRTC_work_mem)
+									free(pPVRTC_work_mem);
+								return false;
+							}
+
+							selector_sym = (int)selectors.size();
+
+							cur_selector_rle_count--;
+						}
+					}
+
+					if (selector_sym >= (int)selectors.size())
+					{
+						assert(m_selector_history_buf_size > 0);
+
+						int history_buf_index = selector_sym - (int)selectors.size();
+
+						if (history_buf_index >= (int)selector_history_buf.size())
+						{
+							// The file is corrupted or we've got a bug.
+							BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (4)\n");
+							if (pPVRTC_work_mem)
+								free(pPVRTC_work_mem);
+							return false;
+						}
+
+						selector_index = selector_history_buf[history_buf_index];
+
+						if (history_buf_index != 0)
+							selector_history_buf.use(history_buf_index);
+					}
+					else
+					{
+						selector_index = selector_sym;
+
+						if (m_selector_history_buf_size)
+							selector_history_buf.add(selector_index);
+					}
+				}
+
+				if ((endpoint_index >= endpoints.size()) || (selector_index >= selectors.size()))
+				{
+					// The file is corrupted or we've got a bug.
+					BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (5)\n");
+					if (pPVRTC_work_mem)
+						free(pPVRTC_work_mem);
+					return false;
+				}
+
+				if (is_video)
+					(*pPrev_frame_indices)[block_x + block_y * num_blocks_x] = endpoint_index | (selector_index << 16);
+
+#if BASISD_ENABLE_DEBUG_FLAGS
+				if ((g_debug_flags & cDebugFlagVisCRs) && ((fmt == block_format::cETC1) || (fmt == block_format::cBC1)))
+				{
+					if ((is_video) && (pred == 2))
+					{
+						decoder_etc_block* pDst_block = reinterpret_cast<decoder_etc_block*>(static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes);
+						memset(pDst_block, 0xFF, 8);
+						continue;
+					}
+				}
+#endif
+
+				const endpoint* pEndpoints = &endpoints[endpoint_index];
+				const selector* pSelector = &selectors[selector_index];
+
+				switch (fmt)
+				{
+				case block_format::cETC1:
+				{
+					decoder_etc_block* pDst_block = reinterpret_cast<decoder_etc_block*>(static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes);
+					
+					block.set_base5_color(decoder_etc_block::pack_color5(pEndpoints->m_color5, false));
+					block.set_inten_table(0, pEndpoints->m_inten5);
+					block.set_inten_table(1, pEndpoints->m_inten5);
+
+					pDst_block->m_uint32[0] = block.m_uint32[0];
+					pDst_block->set_raw_selector_bits(pSelector->m_bytes[0], pSelector->m_bytes[1], pSelector->m_bytes[2], pSelector->m_bytes[3]);
+
+					break;
+				}
+				case block_format::cBC1:
+				{
+#if BASISD_SUPPORT_DXT1
+					void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
+#if BASISD_ENABLE_DEBUG_FLAGS
+					if (g_debug_flags & (cDebugFlagVisBC1Sels | cDebugFlagVisBC1Endpoints))
+						convert_etc1s_to_dxt1_vis(static_cast<dxt1_block*>(pDst_block), pEndpoints, pSelector, bc1_allow_threecolor_blocks);
+					else
+#endif
+						convert_etc1s_to_dxt1(static_cast<dxt1_block*>(pDst_block), pEndpoints, pSelector, bc1_allow_threecolor_blocks);
+#else
+					assert(0);
+#endif
+					break;
+				}
+				case block_format::cBC4:
+				{
+#if BASISD_SUPPORT_DXT5A
+					void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
+					convert_etc1s_to_dxt5a(static_cast<dxt5a_block*>(pDst_block), pEndpoints, pSelector);
+#else
+					assert(0);
+#endif
+					break;
+				}
+				case block_format::cPVRTC1_4_RGB:
+				{
+#if BASISD_SUPPORT_PVRTC1
+					block.set_base5_color(decoder_etc_block::pack_color5(pEndpoints->m_color5, false));
+					block.set_inten_table(0, pEndpoints->m_inten5);
+					block.set_inten_table(1, pEndpoints->m_inten5);
+					block.set_raw_selector_bits(pSelector->m_bytes[0], pSelector->m_bytes[1], pSelector->m_bytes[2], pSelector->m_bytes[3]);
+
+					((decoder_etc_block*)pPVRTC_work_mem)[block_x + block_y * num_blocks_x] = block;
+
+					const color32& base_color = pEndpoints->m_color5;
+					const uint32_t inten_table = pEndpoints->m_inten5;
+
+					const uint32_t low_selector = pSelector->m_lo_selector;
+					const uint32_t high_selector = pSelector->m_hi_selector;
+
+					// Get block's RGB bounding box 
+					color32 block_colors[2];
+					decoder_etc_block::get_block_colors5_bounds(block_colors, base_color, inten_table, low_selector, high_selector);
+
+					assert(block_colors[0][0] <= block_colors[1][0]);
+					assert(block_colors[0][1] <= block_colors[1][1]);
+					assert(block_colors[0][2] <= block_colors[1][2]);
+
+					// Set PVRTC1 endpoints to floor/ceil of bounding box's coordinates.
+					pvrtc4_block temp;
+					temp.set_opaque_endpoint_floor(0, block_colors[0]);
+					temp.set_opaque_endpoint_ceil(1, block_colors[1]);
+
+					pPVRTC_endpoints[block_x + block_y * num_blocks_x] = temp.m_endpoints;
+#else
+					assert(0);
+#endif	
+
+					break;
+				}
+				case block_format::cPVRTC1_4_RGBA:
+				{
+#if BASISD_SUPPORT_PVRTC1
+					assert(pAlpha_blocks);
+					
+					block.set_base5_color(decoder_etc_block::pack_color5(pEndpoints->m_color5, false));
+					block.set_inten_table(0, pEndpoints->m_inten5);
+					block.set_inten_table(1, pEndpoints->m_inten5);
+					block.set_raw_selector_bits(pSelector->m_selectors[0], pSelector->m_selectors[1], pSelector->m_selectors[2], pSelector->m_selectors[3]);
+
+					((decoder_etc_block*)pPVRTC_work_mem)[block_x + block_y * num_blocks_x] = block;
+
+					// Get block's RGBA bounding box 
+					const color32& base_color = pEndpoints->m_color5;
+					const uint32_t inten_table = pEndpoints->m_inten5;
+					const uint32_t low_selector = pSelector->m_lo_selector;
+					const uint32_t high_selector = pSelector->m_hi_selector;
+					color32 block_colors[2];
+					decoder_etc_block::get_block_colors5_bounds(block_colors, base_color, inten_table, low_selector, high_selector);
+
+					assert(block_colors[0][0] <= block_colors[1][0]);
+					assert(block_colors[0][1] <= block_colors[1][1]);
+					assert(block_colors[0][2] <= block_colors[1][2]);
+
+					const uint16_t* pAlpha_block = reinterpret_cast<uint16_t*>(static_cast<uint8_t*>(pAlpha_blocks) + (block_x + block_y * num_blocks_x) * sizeof(uint32_t));
+
+					const endpoint* pAlpha_endpoints = &endpoints[pAlpha_block[0]];
+					const selector* pAlpha_selector = &selectors[pAlpha_block[1]];
+
+					const color32& alpha_base_color = pAlpha_endpoints->m_color5;
+					const uint32_t alpha_inten_table = pAlpha_endpoints->m_inten5;
+					const uint32_t alpha_low_selector = pAlpha_selector->m_lo_selector;
+					const uint32_t alpha_high_selector = pAlpha_selector->m_hi_selector;
+					uint32_t alpha_block_colors[2];
+					decoder_etc_block::get_block_colors5_bounds_g(alpha_block_colors, alpha_base_color, alpha_inten_table, alpha_low_selector, alpha_high_selector);
+					assert(alpha_block_colors[0] <= alpha_block_colors[1]);
+					block_colors[0].a = (uint8_t)alpha_block_colors[0];
+					block_colors[1].a = (uint8_t)alpha_block_colors[1];
+
+					// Set PVRTC1 endpoints to floor/ceil of bounding box's coordinates.
+					pvrtc4_block temp;
+					temp.set_endpoint_floor(0, block_colors[0]);
+					temp.set_endpoint_ceil(1, block_colors[1]);
+
+					pPVRTC_endpoints[block_x + block_y * num_blocks_x] = temp.m_endpoints;
+#else
+					assert(0);
+#endif	
+
+					break;
+				}
+				case block_format::cBC7:				// for more consistency with UASTC
+				case block_format::cBC7_M5_COLOR:
+				{
+#if BASISD_SUPPORT_BC7_MODE5
+					void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
+					convert_etc1s_to_bc7_m5_color(pDst_block, pEndpoints, pSelector);
+#else
+					assert(0);
+#endif
+					break;
+				}
+				case block_format::cBC7_M5_ALPHA:
+				{
+#if BASISD_SUPPORT_BC7_MODE5
+					void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
+					convert_etc1s_to_bc7_m5_alpha(pDst_block, pEndpoints, pSelector);
+#else
+					assert(0);
+#endif
+					break;
+				}
+				case block_format::cETC2_EAC_A8:
+				{
+#if BASISD_SUPPORT_ETC2_EAC_A8
+					void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
+					convert_etc1s_to_etc2_eac_a8(static_cast<eac_block*>(pDst_block), pEndpoints, pSelector);
+#else
+					assert(0);
+#endif
+					break;
+				}
+				case block_format::cASTC_4x4:
+				{
+#if BASISD_SUPPORT_ASTC
+					void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
+					convert_etc1s_to_astc_4x4(pDst_block, pEndpoints, pSelector, transcode_alpha, &endpoints[0], &selectors[0]);
+#else
+					assert(0);
+#endif
+					break;
+				}
+				case block_format::cATC_RGB:
+				{
+#if BASISD_SUPPORT_ATC
+					void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
+					convert_etc1s_to_atc(pDst_block, pEndpoints, pSelector);
+#else
+					assert(0);
+#endif
+					break;
+				}
+				case block_format::cFXT1_RGB:
+				{
+#if BASISD_SUPPORT_FXT1
+					const uint32_t fxt1_block_x = block_x >> 1;
+					const uint32_t fxt1_block_y = block_y;
+					const uint32_t fxt1_subblock = block_x & 1;
+
+					void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (fxt1_block_x + fxt1_block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
+
+					convert_etc1s_to_fxt1(pDst_block, pEndpoints, pSelector, fxt1_subblock);
+#else
+					assert(0);
+#endif
+					break;
+				}
+				case block_format::cPVRTC2_4_RGB:
+				{
+#if BASISD_SUPPORT_PVRTC2
+					void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
+					convert_etc1s_to_pvrtc2_rgb(pDst_block, pEndpoints, pSelector);
+#endif
+					break;
+				}
+				case block_format::cPVRTC2_4_RGBA:
+				{
+#if BASISD_SUPPORT_PVRTC2
+					assert(transcode_alpha);
+
+					void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
+										
+					convert_etc1s_to_pvrtc2_rgba(pDst_block, pEndpoints, pSelector, &endpoints[0], &selectors[0]);
+#endif
+					break;
+				}
+				case block_format::cIndices:
+				{
+					uint16_t* pDst_block = reinterpret_cast<uint16_t *>(static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes);
+					pDst_block[0] = static_cast<uint16_t>(endpoint_index);
+					pDst_block[1] = static_cast<uint16_t>(selector_index);
+					break;
+				}
+				case block_format::cA32:
+				{
+					assert(sizeof(uint32_t) == output_block_or_pixel_stride_in_bytes);
+					uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t);
+										
+					const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
+					const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
+					
+					int colors[4];
+					decoder_etc_block::get_block_colors5_g(colors, pEndpoints->m_color5, pEndpoints->m_inten5);
+
+					if (max_x == 4)
+					{
+						for (uint32_t y = 0; y < max_y; y++)
+						{
+							const uint32_t s = pSelector->m_selectors[y];
+
+							pDst_pixels[3] = static_cast<uint8_t>(colors[s & 3]);
+							pDst_pixels[3+4] = static_cast<uint8_t>(colors[(s >> 2) & 3]);
+							pDst_pixels[3+8] = static_cast<uint8_t>(colors[(s >> 4) & 3]);
+							pDst_pixels[3+12] = static_cast<uint8_t>(colors[(s >> 6) & 3]);
+							
+							pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint32_t);
+						}
+					}
+					else
+					{
+						for (uint32_t y = 0; y < max_y; y++)
+						{
+							const uint32_t s = pSelector->m_selectors[y];
+
+							for (uint32_t x = 0; x < max_x; x++)
+								pDst_pixels[3 + 4 * x] = static_cast<uint8_t>(colors[(s >> (x * 2)) & 3]);
+
+							pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint32_t);
+						}
+					}
+
+					break;
+				}
+				case block_format::cRGB32:
+				{
+					assert(sizeof(uint32_t) == output_block_or_pixel_stride_in_bytes);
+					uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t);
+
+					const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
+					const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
+
+					color32 colors[4];
+					decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5);
+					
+					for (uint32_t y = 0; y < max_y; y++)
+					{
+						const uint32_t s = pSelector->m_selectors[y];
+
+						for (uint32_t x = 0; x < max_x; x++)
+						{
+							const color32& c = colors[(s >> (x * 2)) & 3];
+
+							pDst_pixels[0 + 4 * x] = c.r;
+							pDst_pixels[1 + 4 * x] = c.g;
+							pDst_pixels[2 + 4 * x] = c.b;
+						}
+
+						pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint32_t);
+					}
+
+					break;
+				}
+				case block_format::cRGBA32:
+				{
+					assert(sizeof(uint32_t) == output_block_or_pixel_stride_in_bytes);
+					uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t);
+
+					const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
+					const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
+
+					color32 colors[4];
+					decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5);
+
+					for (uint32_t y = 0; y < max_y; y++)
+					{
+						const uint32_t s = pSelector->m_selectors[y];
+
+						for (uint32_t x = 0; x < max_x; x++)
+						{
+							const color32& c = colors[(s >> (x * 2)) & 3];
+
+							pDst_pixels[0 + 4 * x] = c.r;
+							pDst_pixels[1 + 4 * x] = c.g;
+							pDst_pixels[2 + 4 * x] = c.b;
+							pDst_pixels[3 + 4 * x] = 255;
+						}
+
+						pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint32_t);
+					}
+
+					break;
+				}
+				case block_format::cRGB565:
+				case block_format::cBGR565:
+				{
+					assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes);
+					uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t);
+
+					const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
+					const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
+
+					color32 colors[4];
+					decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5);
+
+					uint16_t packed_colors[4];
+					if (fmt == block_format::cRGB565)
+					{
+						for (uint32_t i = 0; i < 4; i++)
+						{
+							packed_colors[i] = static_cast<uint16_t>((mul_8(colors[i].r, 31) << 11) | (mul_8(colors[i].g, 63) << 5) | mul_8(colors[i].b, 31));
+							if (BASISD_IS_BIG_ENDIAN)
+								packed_colors[i] = byteswap_uint16(packed_colors[i]);
+						}
+					}
+					else
+					{
+						for (uint32_t i = 0; i < 4; i++)
+						{
+							packed_colors[i] = static_cast<uint16_t>((mul_8(colors[i].b, 31) << 11) | (mul_8(colors[i].g, 63) << 5) | mul_8(colors[i].r, 31));
+							if (BASISD_IS_BIG_ENDIAN)
+								packed_colors[i] = byteswap_uint16(packed_colors[i]);
+						}
+					}
+
+					for (uint32_t y = 0; y < max_y; y++)
+					{
+						const uint32_t s = pSelector->m_selectors[y];
+
+						for (uint32_t x = 0; x < max_x; x++)
+							reinterpret_cast<uint16_t *>(pDst_pixels)[x] = packed_colors[(s >> (x * 2)) & 3];
+
+						pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint16_t);
+					}
+
+					break;
+				}
+				case block_format::cRGBA4444_COLOR:
+				{
+					assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes);
+					uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t);
+
+					const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
+					const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
+
+					color32 colors[4];
+					decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5);
+
+					uint16_t packed_colors[4];
+					for (uint32_t i = 0; i < 4; i++)
+					{
+						packed_colors[i] = static_cast<uint16_t>((mul_8(colors[i].r, 15) << 12) | (mul_8(colors[i].g, 15) << 8) | (mul_8(colors[i].b, 15) << 4));
+					}
+
+					for (uint32_t y = 0; y < max_y; y++)
+					{
+						const uint32_t s = pSelector->m_selectors[y];
+
+						for (uint32_t x = 0; x < max_x; x++)
+						{
+							uint16_t cur = reinterpret_cast<uint16_t*>(pDst_pixels)[x];
+							if (BASISD_IS_BIG_ENDIAN)
+								cur = byteswap_uint16(cur);
+
+							cur = (cur & 0xF) | packed_colors[(s >> (x * 2)) & 3];
+							
+							if (BASISD_IS_BIG_ENDIAN)
+								cur = byteswap_uint16(cur);
+
+							reinterpret_cast<uint16_t*>(pDst_pixels)[x] = cur;
+						}
+
+						pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint16_t);
+					}
+
+					break;
+				}
+				case block_format::cRGBA4444_COLOR_OPAQUE:
+				{
+					assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes);
+					uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t);
+
+					const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
+					const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
+
+					color32 colors[4];
+					decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5);
+
+					uint16_t packed_colors[4];
+					for (uint32_t i = 0; i < 4; i++)
+					{
+						packed_colors[i] = static_cast<uint16_t>((mul_8(colors[i].r, 15) << 12) | (mul_8(colors[i].g, 15) << 8) | (mul_8(colors[i].b, 15) << 4) | 0xF);
+						if (BASISD_IS_BIG_ENDIAN)
+							packed_colors[i] = byteswap_uint16(packed_colors[i]);
+					}
+
+					for (uint32_t y = 0; y < max_y; y++)
+					{
+						const uint32_t s = pSelector->m_selectors[y];
+
+						for (uint32_t x = 0; x < max_x; x++)
+							reinterpret_cast<uint16_t*>(pDst_pixels)[x] = packed_colors[(s >> (x * 2)) & 3];
+
+						pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint16_t);
+					}
+
+					break;
+				}
+				case block_format::cRGBA4444_ALPHA:
+				{
+					assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes);
+					uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t);
+
+					const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
+					const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
+
+					color32 colors[4];
+					decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5);
+
+					uint16_t packed_colors[4];
+					for (uint32_t i = 0; i < 4; i++)
+					{
+						packed_colors[i] = mul_8(colors[i].g, 15);
+						if (BASISD_IS_BIG_ENDIAN)
+							packed_colors[i] = byteswap_uint16(packed_colors[i]);
+					}
+
+					for (uint32_t y = 0; y < max_y; y++)
+					{
+						const uint32_t s = pSelector->m_selectors[y];
+
+						for (uint32_t x = 0; x < max_x; x++)
+						{
+							reinterpret_cast<uint16_t*>(pDst_pixels)[x] = packed_colors[(s >> (x * 2)) & 3];
+						}
+
+						pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint16_t);
+					}
+
+					break;
+				}
+				case block_format::cETC2_EAC_R11:
+				{
+#if BASISD_SUPPORT_ETC2_EAC_RG11
+					void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
+					convert_etc1s_to_etc2_eac_r11(static_cast<eac_block*>(pDst_block), pEndpoints, pSelector);
+#else
+					assert(0);
+#endif
+					break;
+				}
+				default:
+				{
+					assert(0);
+					break;
+				}
+				}
+
+			} // block_x
+
+		} // block-y
+
+		if (endpoint_pred_repeat_count != 0)
+		{
+			BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: endpoint_pred_repeat_count != 0. The file is corrupted or this is a bug\n");
+			return false;
+		}
+
+		//assert(endpoint_pred_repeat_count == 0);
+
+#if BASISD_SUPPORT_PVRTC1
+		// PVRTC post process - create per-pixel modulation values.
+		if (fmt == block_format::cPVRTC1_4_RGB)
+			fixup_pvrtc1_4_modulation_rgb((decoder_etc_block*)pPVRTC_work_mem, pPVRTC_endpoints, pDst_blocks, num_blocks_x, num_blocks_y);
+		else if (fmt == block_format::cPVRTC1_4_RGBA)
+			fixup_pvrtc1_4_modulation_rgba((decoder_etc_block*)pPVRTC_work_mem, pPVRTC_endpoints, pDst_blocks, num_blocks_x, num_blocks_y, pAlpha_blocks, &endpoints[0], &selectors[0]);
+#endif // BASISD_SUPPORT_PVRTC1
+
+		if (pPVRTC_work_mem)
+			free(pPVRTC_work_mem);
+
+		return true;
+	}
+
+	bool basis_validate_output_buffer_size(transcoder_texture_format target_format,
+		uint32_t output_blocks_buf_size_in_blocks_or_pixels,
+		uint32_t orig_width, uint32_t orig_height,
+		uint32_t output_row_pitch_in_blocks_or_pixels,
+		uint32_t output_rows_in_pixels,
+		uint32_t total_slice_blocks)
+	{
+		if (basis_transcoder_format_is_uncompressed(target_format))
+		{
+			// Assume the output buffer is orig_width by orig_height
+			if (!output_row_pitch_in_blocks_or_pixels)
+				output_row_pitch_in_blocks_or_pixels = orig_width;
+
+			if (!output_rows_in_pixels)
+				output_rows_in_pixels = orig_height;
+
+			// Now make sure the output buffer is large enough, or we'll overwrite memory.
+			if (output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels))
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels)\n");
+				return false;
+			}
+		}
+		else if (target_format == transcoder_texture_format::cTFFXT1_RGB)
+		{
+			const uint32_t num_blocks_fxt1_x = (orig_width + 7) / 8;
+			const uint32_t num_blocks_fxt1_y = (orig_height + 3) / 4;
+			const uint32_t total_blocks_fxt1 = num_blocks_fxt1_x * num_blocks_fxt1_y;
+
+			if (output_blocks_buf_size_in_blocks_or_pixels < total_blocks_fxt1)
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: output_blocks_buf_size_in_blocks_or_pixels < total_blocks_fxt1\n");
+				return false;
+			}
+		}
+		else
+		{
+			if (output_blocks_buf_size_in_blocks_or_pixels < total_slice_blocks)
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: output_blocks_buf_size_in_blocks_or_pixels < transcode_image\n");
+				return false;
+			}
+		}
+		return true;
+	}
+
+	bool basisu_lowlevel_etc1s_transcoder::transcode_image(
+			transcoder_texture_format target_format,
+			void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
+			const uint8_t* pCompressed_data, uint32_t compressed_data_length,
+			uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index,
+			uint32_t rgb_offset, uint32_t rgb_length, uint32_t alpha_offset, uint32_t alpha_length,
+			uint32_t decode_flags,
+			bool basis_file_has_alpha_slices,
+			bool is_video,
+			uint32_t output_row_pitch_in_blocks_or_pixels,
+			basisu_transcoder_state* pState,
+			uint32_t output_rows_in_pixels)
+	{
+		if (((uint64_t)rgb_offset + rgb_length) > (uint64_t)compressed_data_length)
+		{
+			BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: source data buffer too small (color)\n");
+			return false;
+		}
+
+		if (alpha_length)
+		{
+			if (((uint64_t)alpha_offset + alpha_length) > (uint64_t)compressed_data_length)
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: source data buffer too small (alpha)\n");
+				return false;
+			}
+		}
+		else
+		{
+			assert(!basis_file_has_alpha_slices);
+		}
+
+		if ((target_format == transcoder_texture_format::cTFPVRTC1_4_RGB) || (target_format == transcoder_texture_format::cTFPVRTC1_4_RGBA))
+		{
+			if ((!basisu::is_pow2(num_blocks_x * 4)) || (!basisu::is_pow2(num_blocks_y * 4)))
+			{
+				// PVRTC1 only supports power of 2 dimensions
+				BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: PVRTC1 only supports power of 2 dimensions\n");
+				return false;
+			}
+		}
+
+		if ((target_format == transcoder_texture_format::cTFPVRTC1_4_RGBA) && (!basis_file_has_alpha_slices))
+		{
+			// Switch to PVRTC1 RGB if the input doesn't have alpha.
+			target_format = transcoder_texture_format::cTFPVRTC1_4_RGB;
+		}
+				
+		const bool transcode_alpha_data_to_opaque_formats = (decode_flags & cDecodeFlagsTranscodeAlphaDataToOpaqueFormats) != 0;
+		const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(target_format);
+		const uint32_t total_slice_blocks = num_blocks_x * num_blocks_y;
+
+		if (!basis_validate_output_buffer_size(target_format, output_blocks_buf_size_in_blocks_or_pixels, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels, total_slice_blocks))
+		{
+			BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: output buffer size too small\n");
+			return false;
+		}
+
+		bool status = false;
+
+		const uint8_t* pData = pCompressed_data + rgb_offset;
+		uint32_t data_len = rgb_length;
+		bool is_alpha_slice = false;
+
+		// If the caller wants us to transcode the mip level's alpha data, then use the next slice.
+		if ((basis_file_has_alpha_slices) && (transcode_alpha_data_to_opaque_formats))
+		{
+			pData = pCompressed_data + alpha_offset;
+			data_len = alpha_length;
+			is_alpha_slice = true;
+		}
+
+		switch (target_format)
+		{
+		case transcoder_texture_format::cTFETC1_RGB:
+		{
+			//status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC1, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
+			status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cETC1, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
+							
+			if (!status)
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC1 failed\n");
+			}
+			break;
+		}
+		case transcoder_texture_format::cTFBC1_RGB:
+		{
+#if !BASISD_SUPPORT_DXT1
+			BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: BC1/DXT1 unsupported\n");
+			return false;
+#else
+			// status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC1, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
+			status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cBC1, bytes_per_block_or_pixel, true, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
+			if (!status)
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC1 failed\n");
+			}
+			break;
+#endif
+		}
+		case transcoder_texture_format::cTFBC4_R:
+		{
+#if !BASISD_SUPPORT_DXT5A
+			BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: BC4/DXT5A unsupported\n");
+			return false;
+#else
+			//status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
+			status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cBC4, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
+			if (!status)
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC4 failed\n");
+			}
+			break;
+#endif
+		}
+		case transcoder_texture_format::cTFPVRTC1_4_RGB:
+		{
+#if !BASISD_SUPPORT_PVRTC1
+			BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: PVRTC1 4 unsupported\n");
+			return false;
+#else
+			// output_row_pitch_in_blocks_or_pixels is actually ignored because we're transcoding to PVRTC1. (Print a dev warning if it's != 0?)
+			//status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC1_4_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
+			status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cPVRTC1_4_RGB, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
+			if (!status)
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to PVRTC1 4 RGB failed\n");
+			}
+			break;
+#endif
+		}
+		case transcoder_texture_format::cTFPVRTC1_4_RGBA:
+		{
+#if !BASISD_SUPPORT_PVRTC1
+			BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: PVRTC1 4 unsupported\n");
+			return false;
+#else
+			assert(basis_file_has_alpha_slices);
+			assert(alpha_length);
+
+			// Temp buffer to hold alpha block endpoint/selector indices
+			basisu::vector<uint32_t> temp_block_indices(total_slice_blocks);
+
+			// First transcode alpha data to temp buffer
+			//status = transcode_slice(pData, data_size, slice_index + 1, &temp_block_indices[0], total_slice_blocks, block_format::cIndices, sizeof(uint32_t), decode_flags, pSlice_descs[slice_index].m_num_blocks_x, pState);
+			status = transcode_slice(&temp_block_indices[0], num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cIndices, sizeof(uint32_t), false, is_video, true, level_index, orig_width, orig_height, num_blocks_x, pState, false, nullptr, 0);
+			if (!status)
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to PVRTC1 4 RGBA failed (0)\n");
+			}
+			else
+			{
+				// output_row_pitch_in_blocks_or_pixels is actually ignored because we're transcoding to PVRTC1. (Print a dev warning if it's != 0?)
+				//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC1_4_RGBA, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState, &temp_block_indices[0]);
+				status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cPVRTC1_4_RGBA, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, &temp_block_indices[0], 0);
+				if (!status)
+				{
+					BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to PVRTC1 4 RGBA failed (1)\n");
+				}
+			}
+
+			break;
+#endif
+		}
+		case transcoder_texture_format::cTFBC7_RGBA:
+		case transcoder_texture_format::cTFBC7_ALT:
+		{
+#if !BASISD_SUPPORT_BC7_MODE5
+			BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: BC7 unsupported\n");
+			return false;
+#else
+			assert(bytes_per_block_or_pixel == 16);
+			// We used to support transcoding just alpha to BC7 - but is that useful at all?
+
+			// First transcode the color slice. The cBC7_M5_COLOR transcoder will output opaque mode 5 blocks.
+			//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC7_M5_COLOR, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
+			status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cBC7_M5_COLOR, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
+
+			if ((status) && (basis_file_has_alpha_slices))
+			{
+				// Now transcode the alpha slice. The cBC7_M5_ALPHA transcoder will now change the opaque mode 5 blocks to blocks with alpha.
+				//status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC7_M5_ALPHA, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
+				status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cBC7_M5_ALPHA, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
+			}
+
+			if (!status)
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC7 failed (0)\n");
+			}
+
+			break;
+#endif
+		}
+		case transcoder_texture_format::cTFETC2_RGBA:
+		{
+#if !BASISD_SUPPORT_ETC2_EAC_A8
+			BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: ETC2 EAC A8 unsupported\n");
+			return false;
+#else
+			assert(bytes_per_block_or_pixel == 16);
+
+			if (basis_file_has_alpha_slices)
+			{
+				// First decode the alpha data 
+				//status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_A8, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
+				status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cETC2_EAC_A8, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
+			}
+			else
+			{
+				//write_opaque_alpha_blocks(pSlice_descs[slice_index].m_num_blocks_x, pSlice_descs[slice_index].m_num_blocks_y, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_A8, 16, output_row_pitch_in_blocks_or_pixels);
+				basisu_transcoder::write_opaque_alpha_blocks(num_blocks_x, num_blocks_y, pOutput_blocks, block_format::cETC2_EAC_A8, 16, output_row_pitch_in_blocks_or_pixels);
+				status = true;
+			}
+
+			if (status)
+			{
+				// Now decode the color data
+				//status = transcode_slice(pData, data_size, slice_index, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC1, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
+				status = transcode_slice((uint8_t *)pOutput_blocks + 8, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cETC1, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
+				if (!status)
+				{
+					BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC2 RGB failed\n");
+				}
+			}
+			else
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC2 A failed\n");
+			}
+			break;
+#endif
+		}
+		case transcoder_texture_format::cTFBC3_RGBA:
+		{
+#if !BASISD_SUPPORT_DXT1
+			BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: DXT1 unsupported\n");
+			return false;
+#elif !BASISD_SUPPORT_DXT5A
+			BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: DXT5A unsupported\n");
+			return false;
+#else
+			assert(bytes_per_block_or_pixel == 16);
+						
+			// First decode the alpha data 
+			if (basis_file_has_alpha_slices)
+			{
+				//status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
+				status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cBC4, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
+			}
+			else
+			{
+				basisu_transcoder::write_opaque_alpha_blocks(num_blocks_x, num_blocks_y, pOutput_blocks, block_format::cBC4, 16, output_row_pitch_in_blocks_or_pixels);
+				status = true;
+			}
+
+			if (status)
+			{
+				// Now decode the color data. Forbid 3 color blocks, which aren't allowed in BC3.
+				//status = transcode_slice(pData, data_size, slice_index, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC1, 16, decode_flags | cDecodeFlagsBC1ForbidThreeColorBlocks, output_row_pitch_in_blocks_or_pixels, pState);
+				status = transcode_slice((uint8_t *)pOutput_blocks + 8, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cBC1, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
+				if (!status)
+				{
+					BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC3 RGB failed\n");
+				}
+			}
+			else
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC3 A failed\n");
+			}
+
+			break;
+#endif
+		}
+		case transcoder_texture_format::cTFBC5_RG:
+		{
+#if !BASISD_SUPPORT_DXT5A
+			BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: DXT5A unsupported\n");
+			return false;
+#else
+			assert(bytes_per_block_or_pixel == 16);
+
+			//bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
+				//	uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const bool is_video, const bool is_alpha_slice, const uint32_t level_index, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels = 0,
+				//	basisu_transcoder_state* pState = nullptr, bool astc_transcode_alpha = false, void* pAlpha_blocks = nullptr, uint32_t output_rows_in_pixels = 0);
+
+			// Decode the R data (actually the green channel of the color data slice in the basis file)
+			//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
+			status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cBC4, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
+			if (status)
+			{
+				if (basis_file_has_alpha_slices)
+				{
+					// Decode the G data (actually the green channel of the alpha data slice in the basis file)
+					//status = transcode_slice(pData, data_size, slice_index + 1, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
+					status = transcode_slice((uint8_t *)pOutput_blocks + 8, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cBC4, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
+					if (!status)
+					{
+						BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC5 1 failed\n");
+					}
+				}
+				else
+				{
+					basisu_transcoder::write_opaque_alpha_blocks(num_blocks_x, num_blocks_y, (uint8_t*)pOutput_blocks + 8, block_format::cBC4, 16, output_row_pitch_in_blocks_or_pixels);
+					status = true;
+				}
+			}
+			else
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC5 channel 0 failed\n");
+			}
+			break;
+#endif
+		}
+		case transcoder_texture_format::cTFASTC_4x4_RGBA:
+		{
+#if !BASISD_SUPPORT_ASTC
+			BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: ASTC unsupported\n");
+			return false;
+#else
+			assert(bytes_per_block_or_pixel == 16);
+
+			if (basis_file_has_alpha_slices)
+			{
+				// First decode the alpha data to the output (we're using the output texture as a temp buffer here).
+				//status = transcode_slice(pData, data_size, slice_index + 1, (uint8_t*)pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cIndices, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
+				status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cIndices, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
+				if (status)
+				{
+					// Now decode the color data and transcode to ASTC. The transcoder function will read the alpha selector data from the output texture as it converts and
+					// transcode both the alpha and color data at the same time to ASTC.
+					//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cASTC_4x4, 16, decode_flags | cDecodeFlagsOutputHasAlphaIndices, output_row_pitch_in_blocks_or_pixels, pState);
+					status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cASTC_4x4, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, true, nullptr, output_rows_in_pixels);
+				}
+			}
+			else
+				//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cASTC_4x4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
+				status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cASTC_4x4, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
+
+			if (!status)
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ASTC failed (0)\n");
+			}
+
+			break;
+#endif
+		}
+		case transcoder_texture_format::cTFATC_RGB:
+		{
+#if !BASISD_SUPPORT_ATC
+			BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: ATC unsupported\n");
+			return false;
+#else
+			//status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cATC_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
+			status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cATC_RGB, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
+			if (!status)
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ATC_RGB failed\n");
+			}
+			break;
+#endif
+		}
+		case transcoder_texture_format::cTFATC_RGBA:
+		{
+#if !BASISD_SUPPORT_ATC
+			BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: ATC unsupported\n");
+			return false;
+#elif !BASISD_SUPPORT_DXT5A
+			BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: DXT5A unsupported\n");
+			return false;
+#else
+			assert(bytes_per_block_or_pixel == 16);
+
+			// First decode the alpha data 
+			if (basis_file_has_alpha_slices)
+			{
+				//status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
+				status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cBC4, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
+			}
+			else
+			{
+				basisu_transcoder::write_opaque_alpha_blocks(num_blocks_x, num_blocks_y, pOutput_blocks, block_format::cBC4, 16, output_row_pitch_in_blocks_or_pixels);
+				status = true;
+			}
+
+			if (status)
+			{
+				//status = transcode_slice(pData, data_size, slice_index, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cATC_RGB, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
+				status = transcode_slice((uint8_t *)pOutput_blocks + 8, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cATC_RGB, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
+				if (!status)
+				{
+					BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ATC RGB failed\n");
+				}
+			}
+			else
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ATC A failed\n");
+			}
+			break;
+#endif
+		}
+		case transcoder_texture_format::cTFPVRTC2_4_RGB:
+		{
+#if !BASISD_SUPPORT_PVRTC2
+			BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: PVRTC2 unsupported\n");
+			return false;
+#else
+			//status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC2_4_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
+			status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cPVRTC2_4_RGB, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
+			if (!status)
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to cPVRTC2_4_RGB failed\n");
+			}
+			break;
+#endif
+		}
+		case transcoder_texture_format::cTFPVRTC2_4_RGBA:
+		{
+#if !BASISD_SUPPORT_PVRTC2
+			BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: PVRTC2 unsupported\n");
+			return false;
+#else
+			if (basis_file_has_alpha_slices)
+			{
+				// First decode the alpha data to the output (we're using the output texture as a temp buffer here).
+				//status = transcode_slice(pData, data_size, slice_index + 1, (uint8_t*)pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cIndices, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
+				status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cIndices, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
+				if (!status)
+				{
+					BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to failed\n");
+				}
+				else
+				{
+					// Now decode the color data and transcode to PVRTC2 RGBA. 
+					//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC2_4_RGBA, bytes_per_block_or_pixel, decode_flags | cDecodeFlagsOutputHasAlphaIndices, output_row_pitch_in_blocks_or_pixels, pState);
+					status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cPVRTC2_4_RGBA, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, true, nullptr, output_rows_in_pixels);
+				}
+			}
+			else
+				//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC2_4_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
+				status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cPVRTC2_4_RGB, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
+
+			if (!status)
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to cPVRTC2_4_RGBA failed\n");
+			}
+
+			break;
+#endif
+		}
+		case transcoder_texture_format::cTFRGBA32:
+		{
+			// Raw 32bpp pixels, decoded in the usual raster order (NOT block order) into an image in memory.
+
+			// First decode the alpha data 
+			if (basis_file_has_alpha_slices)
+				//status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cA32, sizeof(uint32_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels);
+				status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cA32, sizeof(uint32_t), false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
+			else
+				status = true;
+
+			if (status)
+			{
+				//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, basis_file_has_alpha_slices ? block_format::cRGB32 : block_format::cRGBA32, sizeof(uint32_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels);
+				status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, basis_file_has_alpha_slices ? block_format::cRGB32 : block_format::cRGBA32, sizeof(uint32_t), false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
+				if (!status)
+				{
+					BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to RGBA32 RGB failed\n");
+				}
+			}
+			else
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to RGBA32 A failed\n");
+			}
+
+			break;
+		}
+		case transcoder_texture_format::cTFRGB565:
+		case transcoder_texture_format::cTFBGR565:
+		{
+			// Raw 16bpp pixels, decoded in the usual raster order (NOT block order) into an image in memory.
+
+			//status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, (fmt == transcoder_texture_format::cTFRGB565) ? block_format::cRGB565 : block_format::cBGR565, sizeof(uint16_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels);
+			status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, (target_format == transcoder_texture_format::cTFRGB565) ? block_format::cRGB565 : block_format::cBGR565, sizeof(uint16_t), false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
+			if (!status)
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to RGB565 RGB failed\n");
+			}
+
+			break;
+		}
+		case transcoder_texture_format::cTFRGBA4444:
+		{
+			// Raw 16bpp pixels, decoded in the usual raster order (NOT block order) into an image in memory.
+
+			// First decode the alpha data 
+			if (basis_file_has_alpha_slices)
+				//status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cRGBA4444_ALPHA, sizeof(uint16_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels);
+				status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cRGBA4444_ALPHA, sizeof(uint16_t), false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
+			else
+				status = true;
+
+			if (status)
+			{
+				//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, basis_file_has_alpha_slices ? block_format::cRGBA4444_COLOR : block_format::cRGBA4444_COLOR_OPAQUE, sizeof(uint16_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels);
+				status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, basis_file_has_alpha_slices ? block_format::cRGBA4444_COLOR : block_format::cRGBA4444_COLOR_OPAQUE, sizeof(uint16_t), false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
+				if (!status)
+				{
+					BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to RGBA4444 RGB failed\n");
+				}
+			}
+			else
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to RGBA4444 A failed\n");
+			}
+
+			break;
+		}
+		case transcoder_texture_format::cTFFXT1_RGB:
+		{
+#if !BASISD_SUPPORT_FXT1
+			BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: FXT1 unsupported\n");
+			return false;
+#else
+			//status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cFXT1_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
+			status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cFXT1_RGB, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
+			if (!status)
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to FXT1_RGB failed\n");
+			}
+			break;
+#endif
+		}
+		case transcoder_texture_format::cTFETC2_EAC_R11:
+		{
+#if !BASISD_SUPPORT_ETC2_EAC_RG11
+			BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: EAC_RG11 unsupported\n");
+			return false;
+#else
+			//status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_R11, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
+			status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cETC2_EAC_R11, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
+			if (!status)
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC2_EAC_R11 failed\n");
+			}
+
+			break;
+#endif
+		}
+		case transcoder_texture_format::cTFETC2_EAC_RG11:
+		{
+#if !BASISD_SUPPORT_ETC2_EAC_RG11
+			BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: EAC_RG11 unsupported\n");
+			return false;
+#else
+			assert(bytes_per_block_or_pixel == 16);
+
+			if (basis_file_has_alpha_slices)
+			{
+				// First decode the alpha data to G
+				//status = transcode_slice(pData, data_size, slice_index + 1, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_R11, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
+				status = transcode_slice((uint8_t *)pOutput_blocks + 8, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cETC2_EAC_R11, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
+			}
+			else
+			{
+				basisu_transcoder::write_opaque_alpha_blocks(num_blocks_x, num_blocks_y, (uint8_t*)pOutput_blocks + 8, block_format::cETC2_EAC_R11, 16, output_row_pitch_in_blocks_or_pixels);
+				status = true;
+			}
+
+			if (status)
+			{
+				// Now decode the color data to R
+				//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_R11, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
+				status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cETC2_EAC_R11, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
+				if (!status)
+				{
+					BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC2_EAC_R11 R failed\n");
+				}
+			}
+			else
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC2_EAC_R11 G failed\n");
+			}
+
+			break;
+#endif
+		}
+		default:
+		{
+			assert(0);
+			BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: Invalid fmt\n");
+			break;
+		}
+		}
+
+		return status;
+	}
+	
+	basisu_lowlevel_uastc_transcoder::basisu_lowlevel_uastc_transcoder()
+	{
+	}
+
+	bool basisu_lowlevel_uastc_transcoder::transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
+        uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels,
+		basisu_transcoder_state* pState, uint32_t output_rows_in_pixels, int channel0, int channel1, uint32_t decode_flags)
+	{
+		BASISU_NOTE_UNUSED(pState);
+		BASISU_NOTE_UNUSED(bc1_allow_threecolor_blocks);
+
+		assert(g_transcoder_initialized);
+		if (!g_transcoder_initialized)
+		{
+			BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_slice: Transcoder not globally initialized.\n");
+			return false;
+		}
+
+#if BASISD_SUPPORT_UASTC
+		const uint32_t total_blocks = num_blocks_x * num_blocks_y;
+
+		if (!output_row_pitch_in_blocks_or_pixels)
+		{
+			if (basis_block_format_is_uncompressed(fmt))
+				output_row_pitch_in_blocks_or_pixels = orig_width;
+			else
+			{
+				if (fmt == block_format::cFXT1_RGB)
+					output_row_pitch_in_blocks_or_pixels = (orig_width + 7) / 8;
+				else
+					output_row_pitch_in_blocks_or_pixels = num_blocks_x;
+			}
+		}
+
+		if (basis_block_format_is_uncompressed(fmt))
+		{
+			if (!output_rows_in_pixels)
+				output_rows_in_pixels = orig_height;
+		}
+
+		uint32_t total_expected_block_bytes = sizeof(uastc_block) * total_blocks;
+		if (image_data_size < total_expected_block_bytes)
+		{
+			BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_slice: image_data_size < total_expected_block_bytes The file is corrupted or this is a bug.\n");
+			return false;
+		}
+
+		const uastc_block* pSource_block = reinterpret_cast<const uastc_block *>(pImage_data);
+
+		const bool high_quality = (decode_flags & cDecodeFlagsHighQuality) != 0;
+		const bool from_alpha = has_alpha && (decode_flags & cDecodeFlagsTranscodeAlphaDataToOpaqueFormats) != 0;
+
+		bool status = false;
+		if ((fmt == block_format::cPVRTC1_4_RGB) || (fmt == block_format::cPVRTC1_4_RGBA))
+		{
+			if (fmt == block_format::cPVRTC1_4_RGBA)
+				transcode_uastc_to_pvrtc1_4_rgba((const uastc_block*)pImage_data, pDst_blocks, num_blocks_x, num_blocks_y, high_quality);
+			else
+				transcode_uastc_to_pvrtc1_4_rgb((const uastc_block *)pImage_data, pDst_blocks, num_blocks_x, num_blocks_y, high_quality, from_alpha);
+		}
+		else
+		{
+			for (uint32_t block_y = 0; block_y < num_blocks_y; ++block_y)
+			{
+				void* pDst_block = (uint8_t*)pDst_blocks + block_y * output_row_pitch_in_blocks_or_pixels * output_block_or_pixel_stride_in_bytes;
+								
+				for (uint32_t block_x = 0; block_x < num_blocks_x; ++block_x, ++pSource_block, pDst_block = (uint8_t *)pDst_block + output_block_or_pixel_stride_in_bytes)
+				{
+					switch (fmt)
+					{
+					case block_format::cETC1:
+					{
+						if (from_alpha)
+							status = transcode_uastc_to_etc1(*pSource_block, pDst_block, 3);
+						else
+							status = transcode_uastc_to_etc1(*pSource_block, pDst_block);
+						break;
+					}
+					case block_format::cETC2_RGBA:
+					{
+						status = transcode_uastc_to_etc2_rgba(*pSource_block, pDst_block);
+						break;
+					}
+					case block_format::cBC1:
+					{
+						status = transcode_uastc_to_bc1(*pSource_block, pDst_block, high_quality);
+						break;
+					}
+					case block_format::cBC3:
+					{
+						status = transcode_uastc_to_bc3(*pSource_block, pDst_block, high_quality);
+						break;
+					}
+					case block_format::cBC4:
+					{
+						if (channel0 < 0) 
+							channel0 = 0;
+						status = transcode_uastc_to_bc4(*pSource_block, pDst_block, high_quality, channel0);
+						break;
+					}
+					case block_format::cBC5:
+					{
+						if (channel0 < 0)
+							channel0 = 0;
+						if (channel1 < 0)
+							channel1 = 3;
+						status = transcode_uastc_to_bc5(*pSource_block, pDst_block, high_quality, channel0, channel1);
+						break;
+					}
+					case block_format::cBC7:
+					case block_format::cBC7_M5_COLOR: // for consistently with ETC1S
+					{
+						status = transcode_uastc_to_bc7(*pSource_block, pDst_block);
+						break;
+					}
+					case block_format::cASTC_4x4:
+					{
+						status = transcode_uastc_to_astc(*pSource_block, pDst_block);
+						break;
+					}
+					case block_format::cETC2_EAC_R11:
+					{
+						if (channel0 < 0)
+							channel0 = 0;
+						status = transcode_uastc_to_etc2_eac_r11(*pSource_block, pDst_block, high_quality, channel0);
+						break;
+					}
+					case block_format::cETC2_EAC_RG11:
+					{
+						if (channel0 < 0)
+							channel0 = 0;
+						if (channel1 < 0)
+							channel1 = 3;
+						status = transcode_uastc_to_etc2_eac_rg11(*pSource_block, pDst_block, high_quality, channel0, channel1);
+						break;
+					}
+					case block_format::cRGBA32:
+					{
+						color32 block_pixels[4][4];
+						status = unpack_uastc(*pSource_block, (color32 *)block_pixels, false);
+
+						assert(sizeof(uint32_t) == output_block_or_pixel_stride_in_bytes);
+						uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t);
+
+						const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
+						const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
+
+						for (uint32_t y = 0; y < max_y; y++)
+						{
+							for (uint32_t x = 0; x < max_x; x++)
+							{
+								const color32& c = block_pixels[y][x];
+
+								pDst_pixels[0 + 4 * x] = c.r;
+								pDst_pixels[1 + 4 * x] = c.g;
+								pDst_pixels[2 + 4 * x] = c.b;
+								pDst_pixels[3 + 4 * x] = c.a;
+							}
+
+							pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint32_t);
+						}
+
+						break;
+					}
+					case block_format::cRGB565:
+					case block_format::cBGR565:
+					{
+						color32 block_pixels[4][4];
+						status = unpack_uastc(*pSource_block, (color32*)block_pixels, false);
+
+						assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes);
+						uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t);
+
+						const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
+						const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
+
+						for (uint32_t y = 0; y < max_y; y++)
+						{
+							for (uint32_t x = 0; x < max_x; x++)
+							{
+								const color32& c = block_pixels[y][x];
+
+								const uint16_t packed = (fmt == block_format::cRGB565) ? static_cast<uint16_t>((mul_8(c.r, 31) << 11) | (mul_8(c.g, 63) << 5) | mul_8(c.b, 31)) :
+									static_cast<uint16_t>((mul_8(c.b, 31) << 11) | (mul_8(c.g, 63) << 5) | mul_8(c.r, 31));
+
+								pDst_pixels[x * 2 + 0] = (uint8_t)(packed & 0xFF);
+								pDst_pixels[x * 2 + 1] = (uint8_t)((packed >> 8) & 0xFF);
+							}
+
+							pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint16_t);
+						}
+
+						break;
+					}
+					case block_format::cRGBA4444:
+					{
+						color32 block_pixels[4][4];
+						status = unpack_uastc(*pSource_block, (color32*)block_pixels, false);
+
+						assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes);
+						uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t);
+
+						const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
+						const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
+
+						for (uint32_t y = 0; y < max_y; y++)
+						{
+							for (uint32_t x = 0; x < max_x; x++)
+							{
+								const color32& c = block_pixels[y][x];
+
+								const uint16_t packed = static_cast<uint16_t>((mul_8(c.r, 15) << 12) | (mul_8(c.g, 15) << 8) | (mul_8(c.b, 15) << 4) | mul_8(c.a, 15));
+
+								pDst_pixels[x * 2 + 0] = (uint8_t)(packed & 0xFF);
+								pDst_pixels[x * 2 + 1] = (uint8_t)((packed >> 8) & 0xFF);
+							}
+
+							pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint16_t);
+						}
+						break;
+					}
+					default:
+						assert(0);
+						break;
+
+					}
+
+					if (!status)
+					{
+						BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_slice: Transcoder failed to unpack a UASTC block - this is a bug, or the data was corrupted\n");
+						return false;
+					}
+
+				} // block_x
+
+			} // block_y
+		}
+
+		return true;
+#else
+		BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_slice: UASTC is unsupported\n");
+
+		BASISU_NOTE_UNUSED(decode_flags);
+		BASISU_NOTE_UNUSED(channel0);
+		BASISU_NOTE_UNUSED(channel1);
+		BASISU_NOTE_UNUSED(output_rows_in_pixels);
+		BASISU_NOTE_UNUSED(output_row_pitch_in_blocks_or_pixels);
+		BASISU_NOTE_UNUSED(output_block_or_pixel_stride_in_bytes);
+		BASISU_NOTE_UNUSED(fmt);
+		BASISU_NOTE_UNUSED(image_data_size);
+		BASISU_NOTE_UNUSED(pImage_data);
+		BASISU_NOTE_UNUSED(num_blocks_x);
+		BASISU_NOTE_UNUSED(num_blocks_y);
+		BASISU_NOTE_UNUSED(pDst_blocks);
+
+		return false;
+#endif
+	}
+		
+	bool basisu_lowlevel_uastc_transcoder::transcode_image(
+		transcoder_texture_format target_format,
+		void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
+		const uint8_t* pCompressed_data, uint32_t compressed_data_length,
+		uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index,
+		uint32_t slice_offset, uint32_t slice_length,
+		uint32_t decode_flags,
+		bool has_alpha,
+		bool is_video,
+		uint32_t output_row_pitch_in_blocks_or_pixels,
+		basisu_transcoder_state* pState,
+		uint32_t output_rows_in_pixels,
+		int channel0, int channel1)
+	{
+		BASISU_NOTE_UNUSED(is_video);
+		BASISU_NOTE_UNUSED(level_index);
+
+		if (((uint64_t)slice_offset + slice_length) > (uint64_t)compressed_data_length)
+		{
+			BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: source data buffer too small\n");
+			return false;
+		}	
+
+		if ((target_format == transcoder_texture_format::cTFPVRTC1_4_RGB) || (target_format == transcoder_texture_format::cTFPVRTC1_4_RGBA))
+		{
+			if ((!basisu::is_pow2(num_blocks_x * 4)) || (!basisu::is_pow2(num_blocks_y * 4)))
+			{
+				// PVRTC1 only supports power of 2 dimensions
+				BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: PVRTC1 only supports power of 2 dimensions\n");
+				return false;
+			}
+		}
+
+		if ((target_format == transcoder_texture_format::cTFPVRTC1_4_RGBA) && (!has_alpha))
+		{
+			// Switch to PVRTC1 RGB if the input doesn't have alpha.
+			target_format = transcoder_texture_format::cTFPVRTC1_4_RGB;
+		}
+
+		const bool transcode_alpha_data_to_opaque_formats = (decode_flags & cDecodeFlagsTranscodeAlphaDataToOpaqueFormats) != 0;
+		const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(target_format);
+		const uint32_t total_slice_blocks = num_blocks_x * num_blocks_y;
+
+		if (!basis_validate_output_buffer_size(target_format, output_blocks_buf_size_in_blocks_or_pixels, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels, total_slice_blocks))
+		{
+			BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: output buffer size too small\n");
+			return false;
+		}
+				
+		bool status = false;
+
+		// UASTC4x4
+		switch (target_format)
+		{
+		case transcoder_texture_format::cTFETC1_RGB:
+		{
+			//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC1, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
+			status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cETC1,
+				bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1);
+				
+			if (!status)
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to ETC1 failed\n");
+			}
+			break;
+		}
+		case transcoder_texture_format::cTFETC2_RGBA:
+		{
+			//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_RGBA, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
+			status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cETC2_RGBA,
+				bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1);
+			if (!status)
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to ETC2 failed\n");
+			}
+			break;
+		}
+		case transcoder_texture_format::cTFBC1_RGB:
+		{
+			// TODO: ETC1S allows BC1 from alpha channel. That doesn't seem actually useful, though.
+			//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC1, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
+			status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC1,
+				bytes_per_block_or_pixel, true, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1);
+			if (!status)
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to BC1 failed\n");
+			}
+			break;
+		}
+		case transcoder_texture_format::cTFBC3_RGBA:
+		{
+			//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC3, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
+			status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC3,
+				bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1);
+			if (!status)
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to BC3 failed\n");
+			}
+			break;
+		}
+		case transcoder_texture_format::cTFBC4_R:
+		{
+			//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState,
+			//	nullptr, 0,
+			//	((has_alpha) && (transcode_alpha_data_to_opaque_formats)) ? 3 : 0);
+			status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC4,
+				bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels,
+				((has_alpha) && (transcode_alpha_data_to_opaque_formats)) ? 3 : 0);
+			if (!status)
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to BC4 failed\n");
+			}
+			break;
+		}
+		case transcoder_texture_format::cTFBC5_RG:
+		{
+			//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC5, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState,
+			//	nullptr, 0,
+			//	0, 3);
+			status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC5,
+				bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels,
+				0, 3);
+			if (!status)
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to BC5 failed\n");
+			}
+			break;
+		}
+		case transcoder_texture_format::cTFBC7_RGBA:
+		case transcoder_texture_format::cTFBC7_ALT:
+		{
+			//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC7, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
+			status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC7,
+				bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
+			if (!status)
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to BC7 failed\n");
+			}
+			break;
+		}
+		case transcoder_texture_format::cTFPVRTC1_4_RGB:
+		{
+			//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC1_4_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
+			status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cPVRTC1_4_RGB,
+				bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
+			if (!status)
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to PVRTC1 RGB 4bpp failed\n");
+			}
+			break;
+		}
+		case transcoder_texture_format::cTFPVRTC1_4_RGBA:
+		{
+			//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC1_4_RGBA, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
+			status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cPVRTC1_4_RGBA,
+				bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
+			if (!status)
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to PVRTC1 RGBA 4bpp failed\n");
+			}
+			break;
+		}
+		case transcoder_texture_format::cTFASTC_4x4_RGBA:
+		{
+			//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cASTC_4x4, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
+			status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cASTC_4x4,
+				bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
+			if (!status)
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to ASTC 4x4 failed\n");
+			}
+			break;
+		}
+		case transcoder_texture_format::cTFATC_RGB:
+		case transcoder_texture_format::cTFATC_RGBA:
+		{
+			BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: UASTC->ATC currently unsupported\n");
+			return false;
+		}
+		case transcoder_texture_format::cTFFXT1_RGB:
+		{
+			BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: UASTC->FXT1 currently unsupported\n");
+			return false;
+		}
+		case transcoder_texture_format::cTFPVRTC2_4_RGB:
+		{
+			BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: UASTC->PVRTC2 currently unsupported\n");
+			return false;
+		}
+		case transcoder_texture_format::cTFPVRTC2_4_RGBA:
+		{
+			BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: UASTC->PVRTC2 currently unsupported\n");
+			return false;
+		}
+		case transcoder_texture_format::cTFETC2_EAC_R11:
+		{
+			//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_R11, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState,
+			//	nullptr, 0,
+			//	((has_alpha) && (transcode_alpha_data_to_opaque_formats)) ? 3 : 0);
+			status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cETC2_EAC_R11,
+				bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels,
+				((has_alpha) && (transcode_alpha_data_to_opaque_formats)) ? 3 : 0);
+			if (!status)
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to EAC R11 failed\n");
+			}
+			break;
+		}
+		case transcoder_texture_format::cTFETC2_EAC_RG11:
+		{
+			//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_RG11, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState,
+			//	nullptr, 0,
+			//	0, 3);
+			status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cETC2_EAC_RG11,
+				bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels,
+				0, 3);
+			if (!status)
+			{
+				BASISU_DEVEL_ERROR("basisu_basisu_lowlevel_uastc_transcodertranscoder::transcode_image: transcode_slice() to EAC RG11 failed\n");
+			}
+			break;
+		}
+		case transcoder_texture_format::cTFRGBA32:
+		{
+			//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cRGBA32, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
+			status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGBA32,
+				bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
+			if (!status)
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to RGBA32 failed\n");
+			}
+			break;
+		}
+		case transcoder_texture_format::cTFRGB565:
+		{
+			//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cRGB565, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
+			status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGB565,
+				bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
+			if (!status)
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to RGB565 failed\n");
+			}
+			break;
+		}
+		case transcoder_texture_format::cTFBGR565:
+		{
+			//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBGR565, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
+			status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBGR565,
+				bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
+			if (!status)
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to RGB565 failed\n");
+			}
+			break;
+		}
+		case transcoder_texture_format::cTFRGBA4444:
+		{
+			//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cRGBA4444, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
+			status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGBA4444,
+				bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
+			if (!status)
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to RGBA4444 failed\n");
+			}
+			break;
+		}
+		default:
+		{
+			assert(0);
+			BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: Invalid format\n");
+			break;
+		}
+		}
+
+		return status;
+	}
+	
+	basisu_transcoder::basisu_transcoder(const etc1_global_selector_codebook* pGlobal_sel_codebook) :
+		m_lowlevel_etc1s_decoder(pGlobal_sel_codebook),
+		m_ready_to_transcode(false)
+	{
+	}
+
+	bool basisu_transcoder::validate_file_checksums(const void* pData, uint32_t data_size, bool full_validation) const
+	{
+		if (!validate_header(pData, data_size))
+			return false;
+
+		const basis_file_header* pHeader = reinterpret_cast<const basis_file_header*>(pData);
+
+#if !BASISU_NO_HEADER_OR_DATA_CRC16_CHECKS
+		if (crc16(&pHeader->m_data_size, sizeof(basis_file_header) - BASISU_OFFSETOF(basis_file_header, m_data_size), 0) != pHeader->m_header_crc16)
+		{
+			BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: header CRC check failed\n");
+			return false;
+		}
+
+		if (full_validation)
+		{
+			if (crc16(reinterpret_cast<const uint8_t*>(pData) + sizeof(basis_file_header), pHeader->m_data_size, 0) != pHeader->m_data_crc16)
+			{
+				BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: data CRC check failed\n");
+				return false;
+			}
+		}
+#endif		
+
+		return true;
+	}
+
+	bool basisu_transcoder::validate_header_quick(const void* pData, uint32_t data_size) const
+	{
+		if (data_size <= sizeof(basis_file_header))
+			return false;
+
+		const basis_file_header* pHeader = reinterpret_cast<const basis_file_header*>(pData);
+
+		if ((pHeader->m_sig != basis_file_header::cBASISSigValue) || (pHeader->m_ver != BASISD_SUPPORTED_BASIS_VERSION) || (pHeader->m_header_size != sizeof(basis_file_header)))
+		{
+			BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: header has an invalid signature, or file version is unsupported\n");
+			return false;
+		}
+
+		uint32_t expected_file_size = sizeof(basis_file_header) + pHeader->m_data_size;
+		if (data_size < expected_file_size)
+		{
+			BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: source buffer is too small\n");
+			return false;
+		}
+
+		if ((!pHeader->m_total_slices) || (!pHeader->m_total_images))
+		{
+			BASISU_DEVEL_ERROR("basisu_transcoder::validate_header_quick: header is invalid\n");
+			return false;
+		}
+
+		if ((pHeader->m_slice_desc_file_ofs >= data_size) ||
+			((data_size - pHeader->m_slice_desc_file_ofs) < (sizeof(basis_slice_desc) * pHeader->m_total_slices))
+			)
+		{
+			BASISU_DEVEL_ERROR("basisu_transcoder::validate_header_quick: passed in buffer is too small or data is corrupted\n");
+			return false;
+		}
+
+		return true;
+	}
+
+	bool basisu_transcoder::validate_header(const void* pData, uint32_t data_size) const
+	{
+		if (data_size <= sizeof(basis_file_header))
+		{
+			BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: input source buffer is too small\n");
+			return false;
+		}
+
+		const basis_file_header* pHeader = reinterpret_cast<const basis_file_header*>(pData);
+
+		if ((pHeader->m_sig != basis_file_header::cBASISSigValue) || (pHeader->m_ver != BASISD_SUPPORTED_BASIS_VERSION) || (pHeader->m_header_size != sizeof(basis_file_header)))
+		{
+			BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: header has an invalid signature, or file version is unsupported\n");
+			return false;
+		}
+
+		uint32_t expected_file_size = sizeof(basis_file_header) + pHeader->m_data_size;
+		if (data_size < expected_file_size)
+		{
+			BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: input source buffer is too small, or header is corrupted\n");
+			return false;
+		}
+
+		if ((!pHeader->m_total_images) || (!pHeader->m_total_slices))
+		{
+			BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: invalid basis file (total images or slices are 0)\n");
+			return false;
+		}
+
+		if (pHeader->m_total_images > pHeader->m_total_slices)
+		{
+			BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: invalid basis file (too many images)\n");
+			return false;
+		}
+
+		if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S)
+		{
+			if (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices)
+			{
+				if (pHeader->m_total_slices & 1)
+				{
+					BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: invalid alpha .basis file\n");
+					return false;
+				}
+			}
+		
+			// This flag dates back to pre-Basis Universal, when .basis supported full ETC1 too.
+			if ((pHeader->m_flags & cBASISHeaderFlagETC1S) == 0)
+			{
+				BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: Invalid .basis file (ETC1S check)\n");
+				return false;
+			}
+		}
+		else
+		{
+			if ((pHeader->m_flags & cBASISHeaderFlagETC1S) != 0)
+			{
+				BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: Invalid .basis file (ETC1S check)\n");
+				return false;
+			}
+		}
+		
+		if ((pHeader->m_slice_desc_file_ofs >= data_size) ||
+			((data_size - pHeader->m_slice_desc_file_ofs) < (sizeof(basis_slice_desc) * pHeader->m_total_slices))
+			)
+		{
+			BASISU_DEVEL_ERROR("basisu_transcoder::validate_header_quick: passed in buffer is too small or data is corrupted\n");
+			return false;
+		}
+
+		return true;
+	}
+
+	basis_texture_type basisu_transcoder::get_texture_type(const void* pData, uint32_t data_size) const
+	{
+		if (!validate_header_quick(pData, data_size))
+		{
+			BASISU_DEVEL_ERROR("basisu_transcoder::get_texture_type: header validation failed\n");
+			return cBASISTexType2DArray;
+		}
+
+		const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
+
+		basis_texture_type btt = static_cast<basis_texture_type>(static_cast<uint8_t>(pHeader->m_tex_type));
+
+		if (btt >= cBASISTexTypeTotal)
+		{
+			BASISU_DEVEL_ERROR("basisu_transcoder::validate_header_quick: header's texture type field is invalid\n");
+			return cBASISTexType2DArray;
+		}
+
+		return btt;
+	}
+
+	bool basisu_transcoder::get_userdata(const void* pData, uint32_t data_size, uint32_t& userdata0, uint32_t& userdata1) const
+	{
+		if (!validate_header_quick(pData, data_size))
+		{
+			BASISU_DEVEL_ERROR("basisu_transcoder::get_userdata: header validation failed\n");
+			return false;
+		}
+
+		const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
+
+		userdata0 = pHeader->m_userdata0;
+		userdata1 = pHeader->m_userdata1;
+		return true;
+	}
+
+	uint32_t basisu_transcoder::get_total_images(const void* pData, uint32_t data_size) const
+	{
+		if (!validate_header_quick(pData, data_size))
+		{
+			BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: header validation failed\n");
+			return 0;
+		}
+
+		const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
+
+		return pHeader->m_total_images;
+	}
+
+	basis_tex_format basisu_transcoder::get_tex_format(const void* pData, uint32_t data_size) const
+	{
+		if (!validate_header_quick(pData, data_size))
+		{
+			BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: header validation failed\n");
+			return basis_tex_format::cETC1S;
+		}
+
+		const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
+
+		return (basis_tex_format)(uint32_t)pHeader->m_tex_format;
+	}
+
+	bool basisu_transcoder::get_image_info(const void* pData, uint32_t data_size, basisu_image_info& image_info, uint32_t image_index) const
+	{
+		if (!validate_header_quick(pData, data_size))
+		{
+			BASISU_DEVEL_ERROR("basisu_transcoder::get_image_info: header validation failed\n");
+			return false;
+		}
+
+		int slice_index = find_first_slice_index(pData, data_size, image_index, 0);
+		if (slice_index < 0)
+		{
+			BASISU_DEVEL_ERROR("basisu_transcoder::get_image_info: invalid slice index\n");
+			return false;
+		}
+
+		const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
+
+		if (image_index >= pHeader->m_total_images)
+		{
+			BASISU_DEVEL_ERROR("basisu_transcoder::get_image_info: invalid image_index\n");
+			return false;
+		}
+
+		const basis_slice_desc* pSlice_descs = reinterpret_cast<const basis_slice_desc*>(static_cast<const uint8_t*>(pData) + pHeader->m_slice_desc_file_ofs);
+
+		uint32_t total_levels = 1;
+		for (uint32_t i = slice_index + 1; i < pHeader->m_total_slices; i++)
+			if (pSlice_descs[i].m_image_index == image_index)
+				total_levels = basisu::maximum<uint32_t>(total_levels, pSlice_descs[i].m_level_index + 1);
+			else
+				break;
+
+		if (total_levels > 16)
+		{
+			BASISU_DEVEL_ERROR("basisu_transcoder::get_image_info: invalid image_index\n");
+			return false;
+		}
+
+		const basis_slice_desc& slice_desc = pSlice_descs[slice_index];
+
+		image_info.m_image_index = image_index;
+		image_info.m_total_levels = total_levels;
+		
+		image_info.m_alpha_flag = false;
+
+		// For ETC1S, if anything has alpha all images have alpha. For UASTC, we only report alpha when the image actually has alpha.
+		if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S)
+			image_info.m_alpha_flag = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0; 
+		else
+			image_info.m_alpha_flag = (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0;
+
+		image_info.m_iframe_flag = (slice_desc.m_flags & cSliceDescFlagsFrameIsIFrame) != 0;
+
+		image_info.m_width = slice_desc.m_num_blocks_x * 4;
+		image_info.m_height = slice_desc.m_num_blocks_y * 4;
+		image_info.m_orig_width = slice_desc.m_orig_width;
+		image_info.m_orig_height = slice_desc.m_orig_height;
+		image_info.m_num_blocks_x = slice_desc.m_num_blocks_x;
+		image_info.m_num_blocks_y = slice_desc.m_num_blocks_y;
+		image_info.m_total_blocks = image_info.m_num_blocks_x * image_info.m_num_blocks_y;
+		image_info.m_first_slice_index = slice_index;
+
+		return true;
+	}
+
+	uint32_t basisu_transcoder::get_total_image_levels(const void* pData, uint32_t data_size, uint32_t image_index) const
+	{
+		if (!validate_header_quick(pData, data_size))
+		{
+			BASISU_DEVEL_ERROR("basisu_transcoder::get_total_image_levels: header validation failed\n");
+			return false;
+		}
+
+		int slice_index = find_first_slice_index(pData, data_size, image_index, 0);
+		if (slice_index < 0)
+		{
+			BASISU_DEVEL_ERROR("basisu_transcoder::get_total_image_levels: failed finding slice\n");
+			return false;
+		}
+
+		const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
+
+		if (image_index >= pHeader->m_total_images)
+		{
+			BASISU_DEVEL_ERROR("basisu_transcoder::get_total_image_levels: invalid image_index\n");
+			return false;
+		}
+
+		const basis_slice_desc* pSlice_descs = reinterpret_cast<const basis_slice_desc*>(static_cast<const uint8_t*>(pData) + pHeader->m_slice_desc_file_ofs);
+
+		uint32_t total_levels = 1;
+		for (uint32_t i = slice_index + 1; i < pHeader->m_total_slices; i++)
+			if (pSlice_descs[i].m_image_index == image_index)
+				total_levels = basisu::maximum<uint32_t>(total_levels, pSlice_descs[i].m_level_index + 1);
+			else
+				break;
+
+		const uint32_t cMaxSupportedLevels = 16;
+		if (total_levels > cMaxSupportedLevels)
+		{
+			BASISU_DEVEL_ERROR("basisu_transcoder::get_total_image_levels: invalid image levels!\n");
+			return false;
+		}
+
+		return total_levels;
+	}
+
+	bool basisu_transcoder::get_image_level_desc(const void* pData, uint32_t data_size, uint32_t image_index, uint32_t level_index, uint32_t& orig_width, uint32_t& orig_height, uint32_t& total_blocks) const
+	{
+		if (!validate_header_quick(pData, data_size))
+		{
+			BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_desc: header validation failed\n");
+			return false;
+		}
+
+		int slice_index = find_first_slice_index(pData, data_size, image_index, level_index);
+		if (slice_index < 0)
+		{
+			BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_desc: failed finding slice\n");
+			return false;
+		}
+
+		const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
+
+		if (image_index >= pHeader->m_total_images)
+		{
+			BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_desc: invalid image_index\n");
+			return false;
+		}
+
+		const basis_slice_desc* pSlice_descs = reinterpret_cast<const basis_slice_desc*>(static_cast<const uint8_t*>(pData) + pHeader->m_slice_desc_file_ofs);
+
+		const basis_slice_desc& slice_desc = pSlice_descs[slice_index];
+
+		orig_width = slice_desc.m_orig_width;
+		orig_height = slice_desc.m_orig_height;
+		total_blocks = slice_desc.m_num_blocks_x * slice_desc.m_num_blocks_y;
+
+		return true;
+	}
+
+	bool basisu_transcoder::get_image_level_info(const void* pData, uint32_t data_size, basisu_image_level_info& image_info, uint32_t image_index, uint32_t level_index) const
+	{
+		if (!validate_header_quick(pData, data_size))
+		{
+			BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_info: validate_file_checksums failed\n");
+			return false;
+		}
+
+		int slice_index = find_first_slice_index(pData, data_size, image_index, level_index);
+		if (slice_index < 0)
+		{
+			BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_info: failed finding slice\n");
+			return false;
+		}
+
+		const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
+
+		if (image_index >= pHeader->m_total_images)
+		{
+			BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_info: invalid image_index\n");
+			return false;
+		}
+
+		const basis_slice_desc* pSlice_descs = reinterpret_cast<const basis_slice_desc*>(static_cast<const uint8_t*>(pData) + pHeader->m_slice_desc_file_ofs);
+
+		const basis_slice_desc& slice_desc = pSlice_descs[slice_index];
+
+		image_info.m_image_index = image_index;
+		image_info.m_level_index = level_index;
+		
+		// For ETC1S, if anything has alpha all images have alpha. For UASTC, we only report alpha when the image actually has alpha.
+		if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S)
+			image_info.m_alpha_flag = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0;
+		else
+			image_info.m_alpha_flag = (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0;
+		
+		image_info.m_iframe_flag = (slice_desc.m_flags & cSliceDescFlagsFrameIsIFrame) != 0;
+		image_info.m_width = slice_desc.m_num_blocks_x * 4;
+		image_info.m_height = slice_desc.m_num_blocks_y * 4;
+		image_info.m_orig_width = slice_desc.m_orig_width;
+		image_info.m_orig_height = slice_desc.m_orig_height;
+		image_info.m_num_blocks_x = slice_desc.m_num_blocks_x;
+		image_info.m_num_blocks_y = slice_desc.m_num_blocks_y;
+		image_info.m_total_blocks = image_info.m_num_blocks_x * image_info.m_num_blocks_y;
+		image_info.m_first_slice_index = slice_index;
+
+		image_info.m_rgb_file_ofs = slice_desc.m_file_ofs;
+		image_info.m_rgb_file_len = slice_desc.m_file_size;
+		image_info.m_alpha_file_ofs = 0;
+		image_info.m_alpha_file_len = 0;
+
+		if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S)
+		{
+			if (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices)
+			{
+				assert((slice_index + 1) < (int)pHeader->m_total_slices);
+				image_info.m_alpha_file_ofs = pSlice_descs[slice_index + 1].m_file_ofs;
+				image_info.m_alpha_file_len = pSlice_descs[slice_index + 1].m_file_size;
+			}
+		}
+
+		return true;
+	}
+
+	bool basisu_transcoder::get_file_info(const void* pData, uint32_t data_size, basisu_file_info& file_info) const
+	{
+		if (!validate_file_checksums(pData, data_size, false))
+		{
+			BASISU_DEVEL_ERROR("basisu_transcoder::get_file_info: validate_file_checksums failed\n");
+			return false;
+		}
+
+		const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
+		const basis_slice_desc* pSlice_descs = reinterpret_cast<const basis_slice_desc*>(static_cast<const uint8_t*>(pData) + pHeader->m_slice_desc_file_ofs);
+
+		file_info.m_version = pHeader->m_ver;
+
+		file_info.m_total_header_size = sizeof(basis_file_header) + pHeader->m_total_slices * sizeof(basis_slice_desc);
+
+		file_info.m_total_selectors = pHeader->m_total_selectors;
+		file_info.m_selector_codebook_ofs = pHeader->m_selector_cb_file_ofs;
+		file_info.m_selector_codebook_size = pHeader->m_selector_cb_file_size;
+
+		file_info.m_total_endpoints = pHeader->m_total_endpoints;
+		file_info.m_endpoint_codebook_ofs = pHeader->m_endpoint_cb_file_ofs;
+		file_info.m_endpoint_codebook_size = pHeader->m_endpoint_cb_file_size;
+
+		file_info.m_tables_ofs = pHeader->m_tables_file_ofs;
+		file_info.m_tables_size = pHeader->m_tables_file_size;
+
+		file_info.m_tex_format = static_cast<basis_tex_format>(static_cast<int>(pHeader->m_tex_format));
+
+		file_info.m_etc1s = (pHeader->m_tex_format == (int)basis_tex_format::cETC1S);
+		
+		file_info.m_y_flipped = (pHeader->m_flags & cBASISHeaderFlagYFlipped) != 0;
+		file_info.m_has_alpha_slices = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0;
+
+		const uint32_t total_slices = pHeader->m_total_slices;
+
+		file_info.m_slice_info.resize(total_slices);
+
+		file_info.m_slices_size = 0;
+
+		file_info.m_tex_type = static_cast<basis_texture_type>(static_cast<uint8_t>(pHeader->m_tex_type));
+
+		if (file_info.m_tex_type > cBASISTexTypeTotal)
+		{
+			BASISU_DEVEL_ERROR("basisu_transcoder::get_file_info: invalid texture type, file is corrupted\n");
+			return false;
+		}
+
+		file_info.m_us_per_frame = pHeader->m_us_per_frame;
+		file_info.m_userdata0 = pHeader->m_userdata0;
+		file_info.m_userdata1 = pHeader->m_userdata1;
+
+		file_info.m_image_mipmap_levels.resize(0);
+		file_info.m_image_mipmap_levels.resize(pHeader->m_total_images);
+
+		file_info.m_total_images = pHeader->m_total_images;
+
+		for (uint32_t i = 0; i < total_slices; i++)
+		{
+			file_info.m_slices_size += pSlice_descs[i].m_file_size;
+
+			basisu_slice_info& slice_info = file_info.m_slice_info[i];
+
+			slice_info.m_orig_width = pSlice_descs[i].m_orig_width;
+			slice_info.m_orig_height = pSlice_descs[i].m_orig_height;
+			slice_info.m_width = pSlice_descs[i].m_num_blocks_x * 4;
+			slice_info.m_height = pSlice_descs[i].m_num_blocks_y * 4;
+			slice_info.m_num_blocks_x = pSlice_descs[i].m_num_blocks_x;
+			slice_info.m_num_blocks_y = pSlice_descs[i].m_num_blocks_y;
+			slice_info.m_total_blocks = slice_info.m_num_blocks_x * slice_info.m_num_blocks_y;
+			slice_info.m_compressed_size = pSlice_descs[i].m_file_size;
+			slice_info.m_slice_index = i;
+			slice_info.m_image_index = pSlice_descs[i].m_image_index;
+			slice_info.m_level_index = pSlice_descs[i].m_level_index;
+			slice_info.m_unpacked_slice_crc16 = pSlice_descs[i].m_slice_data_crc16;
+			slice_info.m_alpha_flag = (pSlice_descs[i].m_flags & cSliceDescFlagsHasAlpha) != 0;
+			slice_info.m_iframe_flag = (pSlice_descs[i].m_flags & cSliceDescFlagsFrameIsIFrame) != 0;
+
+			if (pSlice_descs[i].m_image_index >= pHeader->m_total_images)
+			{
+				BASISU_DEVEL_ERROR("basisu_transcoder::get_file_info: slice desc's image index is invalid\n");
+				return false;
+			}
+
+			file_info.m_image_mipmap_levels[pSlice_descs[i].m_image_index] = basisu::maximum<uint32_t>(file_info.m_image_mipmap_levels[pSlice_descs[i].m_image_index], pSlice_descs[i].m_level_index + 1);
+
+			if (file_info.m_image_mipmap_levels[pSlice_descs[i].m_image_index] > 16)
+			{
+				BASISU_DEVEL_ERROR("basisu_transcoder::get_file_info: slice mipmap level is invalid\n");
+				return false;
+			}
+		}
+
+		return true;
+	}
+		
+	bool basisu_transcoder::start_transcoding(const void* pData, uint32_t data_size)
+	{
+		if (!validate_header_quick(pData, data_size))
+		{
+			BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: header validation failed\n");
+			return false;
+		}
+
+		const basis_file_header* pHeader = reinterpret_cast<const basis_file_header*>(pData);
+		const uint8_t* pDataU8 = static_cast<const uint8_t*>(pData);
+
+		if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S)
+		{
+			if (m_lowlevel_etc1s_decoder.m_local_endpoints.size())
+			{
+				m_lowlevel_etc1s_decoder.clear();
+			}
+
+			if (pHeader->m_flags & cBASISHeaderFlagUsesGlobalCodebook)
+			{
+				if (!m_lowlevel_etc1s_decoder.get_global_codebooks())
+				{
+					BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: File uses global codebooks, but set_global_codebooks() has not been called\n");
+					return false;
+				}
+				if (!m_lowlevel_etc1s_decoder.get_global_codebooks()->get_endpoints().size())
+				{
+					BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: Global codebooks must be unpacked first by calling start_transcoding()\n");
+					return false;
+				}
+				if ((m_lowlevel_etc1s_decoder.get_global_codebooks()->get_endpoints().size() != pHeader->m_total_endpoints) ||
+					 (m_lowlevel_etc1s_decoder.get_global_codebooks()->get_selectors().size() != pHeader->m_total_selectors))
+				{
+					BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: Global codebook size mismatch (wrong codebooks for file).\n");
+					return false;
+				}
+				if (!pHeader->m_tables_file_size)
+				{
+					BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted (2)\n");
+					return false;
+				}
+				if (pHeader->m_tables_file_ofs > data_size)
+				{
+					BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (4)\n");
+					return false;
+				}
+				if (pHeader->m_tables_file_size > (data_size - pHeader->m_tables_file_ofs))
+				{
+					BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (5)\n");
+					return false;
+				}
+			}
+			else
+			{
+				if (!pHeader->m_endpoint_cb_file_size || !pHeader->m_selector_cb_file_size || !pHeader->m_tables_file_size)
+				{
+					BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted (0)\n");
+						return false;
+				}
+
+				if ((pHeader->m_endpoint_cb_file_ofs > data_size) || (pHeader->m_selector_cb_file_ofs > data_size) || (pHeader->m_tables_file_ofs > data_size))
+				{
+					BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (1)\n");
+					return false;
+				}
+
+				if (pHeader->m_endpoint_cb_file_size > (data_size - pHeader->m_endpoint_cb_file_ofs))
+				{
+					BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (2)\n");
+					return false;
+				}
+
+				if (pHeader->m_selector_cb_file_size > (data_size - pHeader->m_selector_cb_file_ofs))
+				{
+					BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (3)\n");
+					return false;
+				}
+
+				if (pHeader->m_tables_file_size > (data_size - pHeader->m_tables_file_ofs))
+				{
+					BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (3)\n");
+					return false;
+				}
+
+				if (!m_lowlevel_etc1s_decoder.decode_palettes(
+					pHeader->m_total_endpoints, pDataU8 + pHeader->m_endpoint_cb_file_ofs, pHeader->m_endpoint_cb_file_size,
+					pHeader->m_total_selectors, pDataU8 + pHeader->m_selector_cb_file_ofs, pHeader->m_selector_cb_file_size))
+				{
+					BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: decode_palettes failed\n");
+					return false;
+				}
+			}
+
+			if (!m_lowlevel_etc1s_decoder.decode_tables(pDataU8 + pHeader->m_tables_file_ofs, pHeader->m_tables_file_size))
+			{
+				BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: decode_tables failed\n");
+				return false;
+			}
+		}
+		else
+		{
+			// Nothing special to do for UASTC.
+			if (m_lowlevel_etc1s_decoder.m_local_endpoints.size())
+			{
+				m_lowlevel_etc1s_decoder.clear();
+			}
+		}
+		
+		m_ready_to_transcode = true;
+
+		return true;
+	}
+
+	bool basisu_transcoder::stop_transcoding()
+	{
+		m_lowlevel_etc1s_decoder.clear();
+
+		m_ready_to_transcode = false;
+		
+		return true;
+	}
+
+	bool basisu_transcoder::transcode_slice(const void* pData, uint32_t data_size, uint32_t slice_index, void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, block_format fmt,
+		uint32_t output_block_or_pixel_stride_in_bytes, uint32_t decode_flags, uint32_t output_row_pitch_in_blocks_or_pixels, basisu_transcoder_state* pState, void *pAlpha_blocks, uint32_t output_rows_in_pixels, int channel0, int channel1) const
+	{
+		if (!m_ready_to_transcode)
+		{
+			BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: must call start_transcoding first\n");
+			return false;
+		}
+
+		if (decode_flags & cDecodeFlagsPVRTCDecodeToNextPow2)
+		{
+			// TODO: Not yet supported
+			BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: cDecodeFlagsPVRTCDecodeToNextPow2 currently unsupported\n");
+			return false;
+		}
+
+		if (!validate_header_quick(pData, data_size))
+		{
+			BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: header validation failed\n");
+			return false;
+		}
+
+		const basis_file_header* pHeader = reinterpret_cast<const basis_file_header*>(pData);
+
+		const uint8_t* pDataU8 = static_cast<const uint8_t*>(pData);
+
+		if (slice_index >= pHeader->m_total_slices)
+		{
+			BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: slice_index >= pHeader->m_total_slices\n");
+			return false;
+		}
+
+		const basis_slice_desc& slice_desc = reinterpret_cast<const basis_slice_desc*>(pDataU8 + pHeader->m_slice_desc_file_ofs)[slice_index];
+
+		uint32_t total_4x4_blocks = slice_desc.m_num_blocks_x * slice_desc.m_num_blocks_y;
+		
+		if (basis_block_format_is_uncompressed(fmt))
+		{
+			// Assume the output buffer is orig_width by orig_height
+			if (!output_row_pitch_in_blocks_or_pixels)
+				output_row_pitch_in_blocks_or_pixels = slice_desc.m_orig_width;
+
+			if (!output_rows_in_pixels)
+				output_rows_in_pixels = slice_desc.m_orig_height;
+
+			// Now make sure the output buffer is large enough, or we'll overwrite memory.
+			if (output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels))
+			{
+				BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels)\n");
+				return false;
+			}
+		}
+		else if (fmt == block_format::cFXT1_RGB)
+		{
+			const uint32_t num_blocks_fxt1_x = (slice_desc.m_orig_width + 7) / 8;
+			const uint32_t num_blocks_fxt1_y = (slice_desc.m_orig_height + 3) / 4;
+			const uint32_t total_blocks_fxt1 = num_blocks_fxt1_x * num_blocks_fxt1_y;
+
+			if (output_blocks_buf_size_in_blocks_or_pixels < total_blocks_fxt1)
+			{
+				BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: output_blocks_buf_size_in_blocks_or_pixels < total_blocks_fxt1\n");
+				return false;
+			}
+		}
+		else
+		{
+			if (output_blocks_buf_size_in_blocks_or_pixels < total_4x4_blocks)
+			{
+				BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: output_blocks_buf_size_in_blocks_or_pixels < total_blocks\n");
+				return false;
+			}
+		}
+
+		if (fmt != block_format::cETC1)
+		{
+			if ((fmt == block_format::cPVRTC1_4_RGB) || (fmt == block_format::cPVRTC1_4_RGBA))
+			{
+				if ((!basisu::is_pow2(slice_desc.m_num_blocks_x * 4)) || (!basisu::is_pow2(slice_desc.m_num_blocks_y * 4)))
+				{
+					// PVRTC1 only supports power of 2 dimensions
+					BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: PVRTC1 only supports power of 2 dimensions\n");
+					return false;
+				}
+			}
+		}
+
+		if (slice_desc.m_file_ofs > data_size)
+		{
+			BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: invalid slice_desc.m_file_ofs, or passed in buffer too small\n");
+			return false;
+		}
+
+		const uint32_t data_size_left = data_size - slice_desc.m_file_ofs;
+		if (data_size_left < slice_desc.m_file_size)
+		{
+			BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: invalid slice_desc.m_file_size, or passed in buffer too small\n");
+			return false;
+		}
+				
+		if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4)
+		{
+			return m_lowlevel_uastc_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y,
+				pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size,
+				fmt, output_block_or_pixel_stride_in_bytes, (decode_flags & cDecodeFlagsBC1ForbidThreeColorBlocks) == 0, *pHeader, slice_desc, output_row_pitch_in_blocks_or_pixels, pState,
+				output_rows_in_pixels, channel0, channel1, decode_flags);
+		}
+		else
+		{
+			return m_lowlevel_etc1s_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y,
+				pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size,
+				fmt, output_block_or_pixel_stride_in_bytes, (decode_flags & cDecodeFlagsBC1ForbidThreeColorBlocks) == 0, *pHeader, slice_desc, output_row_pitch_in_blocks_or_pixels, pState,
+				(decode_flags & cDecodeFlagsOutputHasAlphaIndices) != 0, pAlpha_blocks, output_rows_in_pixels);
+		}
+	}
+
+	int basisu_transcoder::find_first_slice_index(const void* pData, uint32_t data_size, uint32_t image_index, uint32_t level_index) const
+	{
+		BASISU_NOTE_UNUSED(data_size);
+
+		const basis_file_header* pHeader = reinterpret_cast<const basis_file_header*>(pData);
+		const uint8_t* pDataU8 = static_cast<const uint8_t*>(pData);
+
+		// For very large basis files this search could be painful
+		// TODO: Binary search this
+		for (uint32_t slice_iter = 0; slice_iter < pHeader->m_total_slices; slice_iter++)
+		{
+			const basis_slice_desc& slice_desc = reinterpret_cast<const basis_slice_desc*>(pDataU8 + pHeader->m_slice_desc_file_ofs)[slice_iter];
+			if ((slice_desc.m_image_index == image_index) && (slice_desc.m_level_index == level_index))
+				return slice_iter;
+		}
+
+		BASISU_DEVEL_ERROR("basisu_transcoder::find_first_slice_index: didn't find slice\n");
+
+		return -1;
+	}
+
+	int basisu_transcoder::find_slice(const void* pData, uint32_t data_size, uint32_t image_index, uint32_t level_index, bool alpha_data) const
+	{
+		if (!validate_header_quick(pData, data_size))
+		{
+			BASISU_DEVEL_ERROR("basisu_transcoder::find_slice: header validation failed\n");
+			return false;
+		}
+
+		const basis_file_header* pHeader = reinterpret_cast<const basis_file_header*>(pData);
+		const uint8_t* pDataU8 = static_cast<const uint8_t*>(pData);
+		const basis_slice_desc* pSlice_descs = reinterpret_cast<const basis_slice_desc*>(pDataU8 + pHeader->m_slice_desc_file_ofs);
+
+		// For very large basis files this search could be painful
+		// TODO: Binary search this
+		for (uint32_t slice_iter = 0; slice_iter < pHeader->m_total_slices; slice_iter++)
+		{
+			const basis_slice_desc& slice_desc = pSlice_descs[slice_iter];
+			if ((slice_desc.m_image_index == image_index) && (slice_desc.m_level_index == level_index))
+			{
+				if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S)
+				{
+					const bool slice_alpha = (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0;
+					if (slice_alpha == alpha_data)
+						return slice_iter;
+				}
+				else
+				{
+					return slice_iter;
+				}
+			}
+		}
+
+		BASISU_DEVEL_ERROR("basisu_transcoder::find_slice: didn't find slice\n");
+
+		return -1;
+	}
+
+	void basisu_transcoder::write_opaque_alpha_blocks(
+		uint32_t num_blocks_x, uint32_t num_blocks_y,
+		void* pOutput_blocks, block_format fmt,
+		uint32_t block_stride_in_bytes, uint32_t output_row_pitch_in_blocks_or_pixels)
+	{
+		// 'num_blocks_y', 'pOutput_blocks' & 'block_stride_in_bytes' unused
+		// when disabling BASISD_SUPPORT_ETC2_EAC_A8 *and* BASISD_SUPPORT_DXT5A
+		BASISU_NOTE_UNUSED(num_blocks_y);
+		BASISU_NOTE_UNUSED(pOutput_blocks);
+		BASISU_NOTE_UNUSED(block_stride_in_bytes);
+
+		if (!output_row_pitch_in_blocks_or_pixels)
+			output_row_pitch_in_blocks_or_pixels = num_blocks_x;
+				
+		if ((fmt == block_format::cETC2_EAC_A8) || (fmt == block_format::cETC2_EAC_R11))
+		{
+#if BASISD_SUPPORT_ETC2_EAC_A8
+			eac_block blk;
+			blk.m_base = 255;
+			blk.m_multiplier = 1;
+			blk.m_table = 13;
+
+			// Selectors are all 4's
+			memcpy(&blk.m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4));
+
+			for (uint32_t y = 0; y < num_blocks_y; y++)
+			{
+				uint32_t dst_ofs = y * output_row_pitch_in_blocks_or_pixels * block_stride_in_bytes;
+				for (uint32_t x = 0; x < num_blocks_x; x++)
+				{
+					memcpy((uint8_t*)pOutput_blocks + dst_ofs, &blk, sizeof(blk));
+					dst_ofs += block_stride_in_bytes;
+				}
+			}
+#endif
+		}
+		else if (fmt == block_format::cBC4)
+		{
+#if BASISD_SUPPORT_DXT5A
+			dxt5a_block blk;
+			blk.m_endpoints[0] = 255;
+			blk.m_endpoints[1] = 255;
+			memset(blk.m_selectors, 0, sizeof(blk.m_selectors));
+
+			for (uint32_t y = 0; y < num_blocks_y; y++)
+			{
+				uint32_t dst_ofs = y * output_row_pitch_in_blocks_or_pixels * block_stride_in_bytes;
+				for (uint32_t x = 0; x < num_blocks_x; x++)
+				{
+					memcpy((uint8_t*)pOutput_blocks + dst_ofs, &blk, sizeof(blk));
+					dst_ofs += block_stride_in_bytes;
+				}
+			}
+#endif
+		}
+	}
+
+	bool basisu_transcoder::transcode_image_level(
+		const void* pData, uint32_t data_size,
+		uint32_t image_index, uint32_t level_index,
+		void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
+		transcoder_texture_format fmt,
+		uint32_t decode_flags, uint32_t output_row_pitch_in_blocks_or_pixels, basisu_transcoder_state *pState, uint32_t output_rows_in_pixels) const
+	{
+		const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(fmt);
+
+		if (!m_ready_to_transcode)
+		{
+			BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: must call start_transcoding() first\n");
+			return false;
+		}
+
+		const bool transcode_alpha_data_to_opaque_formats = (decode_flags & cDecodeFlagsTranscodeAlphaDataToOpaqueFormats) != 0;
+
+		if (decode_flags & cDecodeFlagsPVRTCDecodeToNextPow2)
+		{
+			BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: cDecodeFlagsPVRTCDecodeToNextPow2 currently unsupported\n");
+			// TODO: Not yet supported
+			return false;
+		}
+
+		if (!validate_header_quick(pData, data_size))
+		{
+			BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: header validation failed\n");
+			return false;
+		}
+
+		const basis_file_header* pHeader = reinterpret_cast<const basis_file_header*>(pData);
+
+		const uint8_t* pDataU8 = static_cast<const uint8_t*>(pData);
+
+		const basis_slice_desc* pSlice_descs = reinterpret_cast<const basis_slice_desc*>(pDataU8 + pHeader->m_slice_desc_file_ofs);
+
+		const bool basis_file_has_alpha_slices = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0;
+
+		int slice_index = find_first_slice_index(pData, data_size, image_index, level_index);
+		if (slice_index < 0)
+		{
+			BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: failed finding slice index\n");
+			// Unable to find the requested image/level 
+			return false;
+		}
+
+		if ((fmt == transcoder_texture_format::cTFPVRTC1_4_RGBA) && (!basis_file_has_alpha_slices))
+		{
+			// Switch to PVRTC1 RGB if the input doesn't have alpha.
+			fmt = transcoder_texture_format::cTFPVRTC1_4_RGB;
+		}
+				
+		if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S)
+		{
+			if (pSlice_descs[slice_index].m_flags & cSliceDescFlagsHasAlpha)
+			{
+				BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: alpha basis file has out of order alpha slice\n");
+
+				// The first slice shouldn't have alpha data in a properly formed basis file
+				return false;
+			}
+
+			if (basis_file_has_alpha_slices)
+			{
+				// The alpha data should immediately follow the color data, and have the same resolution.
+				if ((slice_index + 1U) >= pHeader->m_total_slices)
+				{
+					BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: alpha basis file has missing alpha slice\n");
+					// basis file is missing the alpha slice
+					return false;
+				}
+
+				// Basic sanity checks
+				if ((pSlice_descs[slice_index + 1].m_flags & cSliceDescFlagsHasAlpha) == 0)
+				{
+					BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: alpha basis file has missing alpha slice (flag check)\n");
+					// This slice should have alpha data
+					return false;
+				}
+
+				if ((pSlice_descs[slice_index].m_num_blocks_x != pSlice_descs[slice_index + 1].m_num_blocks_x) || (pSlice_descs[slice_index].m_num_blocks_y != pSlice_descs[slice_index + 1].m_num_blocks_y))
+				{
+					BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: alpha basis file slice dimensions bad\n");
+					// Alpha slice should have been the same res as the color slice
+					return false;
+				}
+			}
+		}
+								
+		bool status = false;
+
+		const uint32_t total_slice_blocks = pSlice_descs[slice_index].m_num_blocks_x * pSlice_descs[slice_index].m_num_blocks_y;
+
+		if (((fmt == transcoder_texture_format::cTFPVRTC1_4_RGB) || (fmt == transcoder_texture_format::cTFPVRTC1_4_RGBA)) && (output_blocks_buf_size_in_blocks_or_pixels > total_slice_blocks))
+		{
+			// The transcoder doesn't write beyond total_slice_blocks, so we need to clear the rest ourselves.
+			// For GL usage, PVRTC1 4bpp image size is (max(width, 8)* max(height, 8) * 4 + 7) / 8. 
+			// However, for KTX and internally in Basis this formula isn't used, it's just ((width+3)/4) * ((height+3)/4) * bytes_per_block_or_pixel. This is all the transcoder actually writes to memory.
+			memset(static_cast<uint8_t*>(pOutput_blocks) + total_slice_blocks * bytes_per_block_or_pixel, 0, (output_blocks_buf_size_in_blocks_or_pixels - total_slice_blocks) * bytes_per_block_or_pixel);
+		}
+		
+		if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4)
+		{
+			const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index];
+
+			// Use the container independent image transcode method.
+			status = m_lowlevel_uastc_decoder.transcode_image(fmt,
+				pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
+				(const uint8_t*)pData, data_size, pSlice_desc->m_num_blocks_x, pSlice_desc->m_num_blocks_y, pSlice_desc->m_orig_width, pSlice_desc->m_orig_height, pSlice_desc->m_level_index,
+				pSlice_desc->m_file_ofs, pSlice_desc->m_file_size,
+				decode_flags, basis_file_has_alpha_slices, pHeader->m_tex_type == cBASISTexTypeVideoFrames, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
+		}
+		else 
+		{
+			// ETC1S
+			const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index];
+			const basis_slice_desc* pAlpha_slice_desc = basis_file_has_alpha_slices ? &pSlice_descs[slice_index + 1] : nullptr;
+
+			assert((pSlice_desc->m_flags & cSliceDescFlagsHasAlpha) == 0);
+
+			if (pAlpha_slice_desc)
+			{
+				// Basic sanity checks
+				assert((pAlpha_slice_desc->m_flags & cSliceDescFlagsHasAlpha) != 0);
+				assert(pSlice_desc->m_num_blocks_x == pAlpha_slice_desc->m_num_blocks_x);
+				assert(pSlice_desc->m_num_blocks_y == pAlpha_slice_desc->m_num_blocks_y);
+				assert(pSlice_desc->m_level_index == pAlpha_slice_desc->m_level_index);
+			}
+
+			// Use the container independent image transcode method.
+			status = m_lowlevel_etc1s_decoder.transcode_image(fmt,
+				pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
+				(const uint8_t *)pData, data_size, pSlice_desc->m_num_blocks_x, pSlice_desc->m_num_blocks_y, pSlice_desc->m_orig_width, pSlice_desc->m_orig_height, pSlice_desc->m_level_index,
+				pSlice_desc->m_file_ofs, pSlice_desc->m_file_size,
+				(pAlpha_slice_desc != nullptr) ? (uint32_t)pAlpha_slice_desc->m_file_ofs : 0U, (pAlpha_slice_desc != nullptr) ? (uint32_t)pAlpha_slice_desc->m_file_size : 0U,
+				decode_flags, basis_file_has_alpha_slices, pHeader->m_tex_type == cBASISTexTypeVideoFrames, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
+
+		} // if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4)
+      
+      if (!status)
+      {
+         BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: Returning false\n");
+      }
+      else
+      {
+         //BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: Returning true\n");      
+      }
+
+		return status;
+	}
+
+	uint32_t basis_get_bytes_per_block_or_pixel(transcoder_texture_format fmt)
+	{
+		switch (fmt)
+		{
+		case transcoder_texture_format::cTFETC1_RGB:
+		case transcoder_texture_format::cTFBC1_RGB:
+		case transcoder_texture_format::cTFBC4_R:
+		case transcoder_texture_format::cTFPVRTC1_4_RGB:
+		case transcoder_texture_format::cTFPVRTC1_4_RGBA:
+		case transcoder_texture_format::cTFATC_RGB:
+		case transcoder_texture_format::cTFPVRTC2_4_RGB:
+		case transcoder_texture_format::cTFPVRTC2_4_RGBA:
+		case transcoder_texture_format::cTFETC2_EAC_R11:
+			return 8;
+		case transcoder_texture_format::cTFBC7_RGBA:
+		case transcoder_texture_format::cTFBC7_ALT:
+		case transcoder_texture_format::cTFETC2_RGBA:
+		case transcoder_texture_format::cTFBC3_RGBA:
+		case transcoder_texture_format::cTFBC5_RG:
+		case transcoder_texture_format::cTFASTC_4x4_RGBA:
+		case transcoder_texture_format::cTFATC_RGBA:
+		case transcoder_texture_format::cTFFXT1_RGB:
+		case transcoder_texture_format::cTFETC2_EAC_RG11:
+			return 16;
+		case transcoder_texture_format::cTFRGBA32:
+			return sizeof(uint32_t);
+		case transcoder_texture_format::cTFRGB565:
+		case transcoder_texture_format::cTFBGR565:
+		case transcoder_texture_format::cTFRGBA4444:
+			return sizeof(uint16_t);
+		default:
+			assert(0);
+			BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n");
+			break;
+		}
+		return 0;
+	}
+
+	const char* basis_get_format_name(transcoder_texture_format fmt)
+	{
+		switch (fmt)
+		{
+		case transcoder_texture_format::cTFETC1_RGB: return "ETC1_RGB";
+		case transcoder_texture_format::cTFBC1_RGB: return "BC1_RGB";
+		case transcoder_texture_format::cTFBC4_R: return "BC4_R";
+		case transcoder_texture_format::cTFPVRTC1_4_RGB: return "PVRTC1_4_RGB";
+		case transcoder_texture_format::cTFPVRTC1_4_RGBA: return "PVRTC1_4_RGBA";
+		case transcoder_texture_format::cTFBC7_RGBA: return "BC7_RGBA";
+		case transcoder_texture_format::cTFBC7_ALT: return "BC7_RGBA";
+		case transcoder_texture_format::cTFETC2_RGBA: return "ETC2_RGBA";
+		case transcoder_texture_format::cTFBC3_RGBA: return "BC3_RGBA";
+		case transcoder_texture_format::cTFBC5_RG: return "BC5_RG";
+		case transcoder_texture_format::cTFASTC_4x4_RGBA: return "ASTC_RGBA";
+		case transcoder_texture_format::cTFATC_RGB: return "ATC_RGB";
+		case transcoder_texture_format::cTFATC_RGBA: return "ATC_RGBA";
+		case transcoder_texture_format::cTFRGBA32: return "RGBA32";
+		case transcoder_texture_format::cTFRGB565: return "RGB565";
+		case transcoder_texture_format::cTFBGR565: return "BGR565";
+		case transcoder_texture_format::cTFRGBA4444: return "RGBA4444";
+		case transcoder_texture_format::cTFFXT1_RGB: return "FXT1_RGB";
+		case transcoder_texture_format::cTFPVRTC2_4_RGB: return "PVRTC2_4_RGB";
+		case transcoder_texture_format::cTFPVRTC2_4_RGBA: return "PVRTC2_4_RGBA";
+		case transcoder_texture_format::cTFETC2_EAC_R11: return "ETC2_EAC_R11";
+		case transcoder_texture_format::cTFETC2_EAC_RG11: return "ETC2_EAC_RG11";
+		default:
+			assert(0);
+			BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n");
+			break;
+		}
+		return "";
+	}
+
+	const char* basis_get_block_format_name(block_format fmt)
+	{
+		switch (fmt)
+		{
+		case block_format::cETC1: return "ETC1";
+		case block_format::cBC1: return "BC1";
+		case block_format::cPVRTC1_4_RGB: return "PVRTC1_4_RGB";
+		case block_format::cPVRTC1_4_RGBA: return "PVRTC1_4_RGBA";
+		case block_format::cBC7: return "BC7";
+		case block_format::cETC2_RGBA: return "ETC2_RGBA";
+		case block_format::cBC3: return "BC3";
+		case block_format::cASTC_4x4: return "ASTC_4x4";
+		case block_format::cATC_RGB: return "ATC_RGB";
+		case block_format::cRGBA32: return "RGBA32";
+		case block_format::cRGB565: return "RGB565";
+		case block_format::cBGR565: return "BGR565";
+		case block_format::cRGBA4444: return "RGBA4444";
+		case block_format::cFXT1_RGB: return "FXT1_RGB";
+		case block_format::cPVRTC2_4_RGB: return "PVRTC2_4_RGB";
+		case block_format::cPVRTC2_4_RGBA: return "PVRTC2_4_RGBA";
+		case block_format::cETC2_EAC_R11: return "ETC2_EAC_R11";
+		case block_format::cETC2_EAC_RG11: return "ETC2_EAC_RG11";
+		default:
+			assert(0);
+			BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n");
+		break;
+		}
+		return "";
+	}
+
+	const char* basis_get_texture_type_name(basis_texture_type tex_type)
+	{
+		switch (tex_type)
+		{
+		case cBASISTexType2D: return "2D";
+		case cBASISTexType2DArray: return "2D array";
+		case cBASISTexTypeCubemapArray: return "cubemap array";
+		case cBASISTexTypeVideoFrames: return "video";
+		case cBASISTexTypeVolume: return "3D";
+		default:
+			assert(0);
+			BASISU_DEVEL_ERROR("basis_get_texture_type_name: Invalid tex_type\n");
+			break;
+		}
+		return "";
+	}
+
+	bool basis_transcoder_format_has_alpha(transcoder_texture_format fmt)
+	{
+		switch (fmt)
+		{
+		case transcoder_texture_format::cTFETC2_RGBA:
+		case transcoder_texture_format::cTFBC3_RGBA:
+		case transcoder_texture_format::cTFASTC_4x4_RGBA:
+		case transcoder_texture_format::cTFBC7_RGBA:
+		case transcoder_texture_format::cTFBC7_ALT:
+		case transcoder_texture_format::cTFPVRTC1_4_RGBA:
+		case transcoder_texture_format::cTFPVRTC2_4_RGBA:
+		case transcoder_texture_format::cTFATC_RGBA:
+		case transcoder_texture_format::cTFRGBA32:
+		case transcoder_texture_format::cTFRGBA4444:
+			return true;
+		default:
+			break;
+		}
+		return false;
+	}
+
+	basisu::texture_format basis_get_basisu_texture_format(transcoder_texture_format fmt)
+	{
+		switch (fmt)
+		{
+		case transcoder_texture_format::cTFETC1_RGB: return basisu::texture_format::cETC1;
+		case transcoder_texture_format::cTFBC1_RGB: return basisu::texture_format::cBC1;
+		case transcoder_texture_format::cTFBC4_R: return basisu::texture_format::cBC4;
+		case transcoder_texture_format::cTFPVRTC1_4_RGB: return basisu::texture_format::cPVRTC1_4_RGB;
+		case transcoder_texture_format::cTFPVRTC1_4_RGBA: return basisu::texture_format::cPVRTC1_4_RGBA;
+		case transcoder_texture_format::cTFBC7_RGBA: return basisu::texture_format::cBC7;
+		case transcoder_texture_format::cTFBC7_ALT: return basisu::texture_format::cBC7;
+		case transcoder_texture_format::cTFETC2_RGBA: return basisu::texture_format::cETC2_RGBA;
+		case transcoder_texture_format::cTFBC3_RGBA: return basisu::texture_format::cBC3;
+		case transcoder_texture_format::cTFBC5_RG: return basisu::texture_format::cBC5;
+		case transcoder_texture_format::cTFASTC_4x4_RGBA: return basisu::texture_format::cASTC4x4;
+		case transcoder_texture_format::cTFATC_RGB: return basisu::texture_format::cATC_RGB;
+		case transcoder_texture_format::cTFATC_RGBA: return basisu::texture_format::cATC_RGBA_INTERPOLATED_ALPHA;
+		case transcoder_texture_format::cTFRGBA32: return basisu::texture_format::cRGBA32;
+		case transcoder_texture_format::cTFRGB565: return basisu::texture_format::cRGB565;
+		case transcoder_texture_format::cTFBGR565: return basisu::texture_format::cBGR565;
+		case transcoder_texture_format::cTFRGBA4444: return basisu::texture_format::cRGBA4444;
+		case transcoder_texture_format::cTFFXT1_RGB: return basisu::texture_format::cFXT1_RGB;
+		case transcoder_texture_format::cTFPVRTC2_4_RGB: return basisu::texture_format::cPVRTC2_4_RGBA;
+		case transcoder_texture_format::cTFPVRTC2_4_RGBA: return basisu::texture_format::cPVRTC2_4_RGBA;
+		case transcoder_texture_format::cTFETC2_EAC_R11: return basisu::texture_format::cETC2_R11_EAC;
+		case transcoder_texture_format::cTFETC2_EAC_RG11: return basisu::texture_format::cETC2_RG11_EAC;
+		default:
+			assert(0);
+			BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n");
+			break;
+		}
+		return basisu::texture_format::cInvalidTextureFormat;
+	}
+
+	bool basis_transcoder_format_is_uncompressed(transcoder_texture_format tex_type)
+	{
+		switch (tex_type)
+		{
+		case transcoder_texture_format::cTFRGBA32:
+		case transcoder_texture_format::cTFRGB565:
+		case transcoder_texture_format::cTFBGR565:
+		case transcoder_texture_format::cTFRGBA4444:
+			return true;
+		default:
+			break;
+		}
+		return false;
+	}
+
+	bool basis_block_format_is_uncompressed(block_format blk_fmt)
+	{
+		switch (blk_fmt)
+		{
+		case block_format::cRGB32:
+		case block_format::cRGBA32:
+		case block_format::cA32:
+		case block_format::cRGB565:
+		case block_format::cBGR565:
+		case block_format::cRGBA4444:
+		case block_format::cRGBA4444_COLOR:
+		case block_format::cRGBA4444_ALPHA:
+		case block_format::cRGBA4444_COLOR_OPAQUE:
+			return true;
+		default:
+			break;
+		}
+		return false;
+	}
+	
+	uint32_t basis_get_uncompressed_bytes_per_pixel(transcoder_texture_format fmt)
+	{
+		switch (fmt)
+		{
+		case transcoder_texture_format::cTFRGBA32:
+			return sizeof(uint32_t); 
+		case transcoder_texture_format::cTFRGB565:
+		case transcoder_texture_format::cTFBGR565:
+		case transcoder_texture_format::cTFRGBA4444:
+			return sizeof(uint16_t);
+		default:
+			break;
+		}
+		return 0;
+	}
+	
+	uint32_t basis_get_block_width(transcoder_texture_format tex_type)
+	{
+		switch (tex_type)
+		{
+			case transcoder_texture_format::cTFFXT1_RGB:
+				return 8;
+			default:
+				break;
+		}
+		return 4;
+	}
+
+	uint32_t basis_get_block_height(transcoder_texture_format tex_type)
+	{
+		BASISU_NOTE_UNUSED(tex_type);
+		return 4;
+	}
+	
+	bool basis_is_format_supported(transcoder_texture_format tex_type, basis_tex_format fmt)
+	{
+		if (fmt == basis_tex_format::cUASTC4x4)
+		{
+#if BASISD_SUPPORT_UASTC
+			switch (tex_type)
+			{
+				// These niche formats aren't currently supported for UASTC - everything else is.
+			case transcoder_texture_format::cTFPVRTC2_4_RGB:
+			case transcoder_texture_format::cTFPVRTC2_4_RGBA:
+			case transcoder_texture_format::cTFATC_RGB:
+			case transcoder_texture_format::cTFATC_RGBA:
+			case transcoder_texture_format::cTFFXT1_RGB:
+				return false;
+			default:
+				return true;
+			}
+#endif
+		}
+		else
+		{
+			switch (tex_type)
+			{
+				// ETC1 and uncompressed are always supported.
+			case transcoder_texture_format::cTFETC1_RGB:
+			case transcoder_texture_format::cTFRGBA32:
+			case transcoder_texture_format::cTFRGB565:
+			case transcoder_texture_format::cTFBGR565:
+			case transcoder_texture_format::cTFRGBA4444:
+				return true;
+#if BASISD_SUPPORT_DXT1
+			case transcoder_texture_format::cTFBC1_RGB:
+				return true;
+#endif
+#if BASISD_SUPPORT_DXT5A
+			case transcoder_texture_format::cTFBC4_R:
+			case transcoder_texture_format::cTFBC5_RG:
+				return true;
+#endif
+#if BASISD_SUPPORT_DXT1 && BASISD_SUPPORT_DXT5A
+			case transcoder_texture_format::cTFBC3_RGBA:
+				return true;
+#endif
+#if BASISD_SUPPORT_PVRTC1
+			case transcoder_texture_format::cTFPVRTC1_4_RGB:
+			case transcoder_texture_format::cTFPVRTC1_4_RGBA:
+				return true;
+#endif
+#if BASISD_SUPPORT_BC7_MODE5
+			case transcoder_texture_format::cTFBC7_RGBA:
+			case transcoder_texture_format::cTFBC7_ALT:
+				return true;
+#endif
+#if BASISD_SUPPORT_ETC2_EAC_A8
+			case transcoder_texture_format::cTFETC2_RGBA:
+				return true;
+#endif
+#if BASISD_SUPPORT_ASTC		
+			case transcoder_texture_format::cTFASTC_4x4_RGBA:
+				return true;
+#endif
+#if BASISD_SUPPORT_ATC
+			case transcoder_texture_format::cTFATC_RGB:
+			case transcoder_texture_format::cTFATC_RGBA:
+				return true;
+#endif
+#if BASISD_SUPPORT_FXT1
+			case transcoder_texture_format::cTFFXT1_RGB:
+				return true;
+#endif
+#if BASISD_SUPPORT_PVRTC2
+			case transcoder_texture_format::cTFPVRTC2_4_RGB:
+			case transcoder_texture_format::cTFPVRTC2_4_RGBA:
+				return true;
+#endif
+#if BASISD_SUPPORT_ETC2_EAC_RG11
+			case transcoder_texture_format::cTFETC2_EAC_R11:
+			case transcoder_texture_format::cTFETC2_EAC_RG11:
+				return true;
+#endif
+			default:
+				break;
+			}
+		}
+
+		return false;
+	}
+
+	// ------------------------------------------------------------------------------------------------------ 
+	// UASTC
+	// ------------------------------------------------------------------------------------------------------ 
+
+#if BASISD_SUPPORT_UASTC
+	const astc_bc7_common_partition2_desc g_astc_bc7_common_partitions2[TOTAL_ASTC_BC7_COMMON_PARTITIONS2] =
+	{
+		{ 0, 28, false  }, { 1, 20, false }, { 2, 16, true }, { 3, 29, false },
+		{ 4, 91, true }, { 5, 9, false }, { 6, 107, true }, { 7, 72, true },
+		{ 8, 149, false }, { 9, 204, true }, { 10, 50, false }, { 11, 114, true },
+		{ 12, 496, true }, { 13, 17, true }, { 14, 78, false }, { 15, 39, true },
+		{ 17, 252, true }, { 18, 828, true }, { 19, 43, false }, { 20, 156, false },
+		{ 21, 116, false }, { 22, 210, true }, { 23, 476, true }, { 24, 273, false },
+		{ 25, 684, true }, { 26, 359, false }, { 29, 246, true }, { 32, 195, true },
+		{ 33, 694, true }, { 52, 524, true }
+	};
+
+	const bc73_astc2_common_partition_desc g_bc7_3_astc2_common_partitions[TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS] =
+	{
+		{ 10, 36, 4 }, { 11, 48, 4 },	{ 0, 61, 3 }, { 2, 137, 4 },
+		{ 8, 161, 5 }, { 13, 183, 4 }, { 1, 226, 2 }, { 33, 281, 2 },
+		{ 40, 302, 3 }, { 20, 307, 4 }, { 21, 479, 0 }, { 58, 495, 3 },
+		{ 3, 593, 0 }, { 32, 594, 2 }, { 59, 605, 1 }, { 34, 799, 3 },
+		{ 20, 812, 1 }, { 14, 988, 4 }, { 31, 993, 3 }
+	};
+
+	const astc_bc7_common_partition3_desc g_astc_bc7_common_partitions3[TOTAL_ASTC_BC7_COMMON_PARTITIONS3] =
+	{
+		{ 4, 260, 0 }, { 8, 74, 5 }, { 9, 32, 5 }, { 10, 156, 2 },
+		{ 11, 183, 2 }, { 12, 15, 0 }, { 13, 745, 4 }, { 20, 0, 1 },
+		{ 35, 335, 1 }, { 36, 902, 5 }, { 57, 254, 0 }
+	};
+
+	const uint8_t g_astc_to_bc7_partition_index_perm_tables[6][3] = { { 0, 1, 2 }, { 1, 2, 0 }, { 2, 0, 1 },	{ 2, 1, 0 }, { 0, 2, 1 }, { 1, 0, 2 } };
+
+	const uint8_t g_bc7_to_astc_partition_index_perm_tables[6][3] = { { 0, 1, 2 }, { 2, 0, 1 }, { 1, 2, 0 },	{ 2, 1, 0 }, { 0, 2, 1 }, { 1, 0, 2 } };
+
+	uint32_t bc7_convert_partition_index_3_to_2(uint32_t p, uint32_t k)
+	{
+		assert(k < 6);
+		switch (k >> 1)
+		{
+		case 0:
+			if (p <= 1)
+				p = 0;
+			else
+				p = 1;
+			break;
+		case 1:
+			if (p == 0)
+				p = 0;
+			else
+				p = 1;
+			break;
+		case 2:
+			if ((p == 0) || (p == 2))
+				p = 0;
+			else
+				p = 1;
+			break;
+		}
+		if (k & 1)
+			p = 1 - p;
+		return p;
+	}
+
+	static const uint8_t g_zero_pattern[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+
+	const uint8_t g_astc_bc7_patterns2[TOTAL_ASTC_BC7_COMMON_PARTITIONS2][16] =
+	{
+		{ 0,0,1,1,0,0,1,1,0,0,1,1,0,0,1,1 }, { 0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1 }, { 1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0 }, { 0,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1 },
+		{ 1,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0 }, { 0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1 }, { 1,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0 }, { 1,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0 },
+		{ 0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1 }, { 1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0 }, { 0,0,0,0,0,0,0,1,0,1,1,1,1,1,1,1 }, { 1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0 },
+		{ 1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0 }, { 1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0 }, { 0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1 }, { 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0 },
+		{ 1,0,0,0,1,1,1,0,1,1,1,1,1,1,1,1 }, { 1,1,1,1,1,1,1,1,0,1,1,1,0,0,0,1 }, { 0,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0 }, { 0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0 },
+		{ 0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,0 }, { 1,1,1,1,1,1,1,1,0,1,1,1,0,0,1,1 }, { 1,0,0,0,1,1,0,0,1,1,0,0,1,1,1,0 }, { 0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0 },
+		{ 1,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1 }, { 0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0 }, { 1,1,1,1,0,0,0,0,0,0,0,0,1,1,1,1 }, { 1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0 },
+		{ 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0 }, { 1,0,0,1,0,0,1,1,0,1,1,0,1,1,0,0 }
+	};
+
+	const uint8_t g_astc_bc7_patterns3[TOTAL_ASTC_BC7_COMMON_PARTITIONS3][16] =
+	{
+		{ 0,0,0,0,0,0,0,0,1,1,2,2,1,1,2,2 }, { 1,1,1,1,1,1,1,1,0,0,0,0,2,2,2,2 }, { 1,1,1,1,0,0,0,0,0,0,0,0,2,2,2,2 },	{ 1,1,1,1,2,2,2,2,0,0,0,0,0,0,0,0 },
+		{ 1,1,2,0,1,1,2,0,1,1,2,0,1,1,2,0 }, { 0,1,1,2,0,1,1,2,0,1,1,2,0,1,1,2 }, { 0,2,1,1,0,2,1,1,0,2,1,1,0,2,1,1 },	{ 2,0,0,0,2,0,0,0,2,1,1,1,2,1,1,1 },
+		{ 2,0,1,2,2,0,1,2,2,0,1,2,2,0,1,2 }, { 1,1,1,1,0,0,0,0,2,2,2,2,1,1,1,1 }, { 0,0,2,2,0,0,1,1,0,0,1,1,0,0,2,2 }
+	};
+
+	const uint8_t g_bc7_3_astc2_patterns2[TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS][16] =
+	{
+		{ 0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0 }, { 0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0 }, { 1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0 },	{ 0,0,0,0,0,0,0,1,0,0,1,1,0,0,1,1 },
+		{ 1,1,1,1,1,1,1,1,0,0,0,0,1,1,1,1 }, { 0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0 }, { 0,0,0,1,0,0,1,1,1,1,1,1,1,1,1,1 },	{ 0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1 },
+		{ 1,1,0,0,0,0,0,0,0,0,1,1,1,1,0,0 }, { 0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0 }, { 0,0,0,0,0,0,0,0,1,1,1,0,1,1,1,0 },	{ 1,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0 },
+		{ 0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0 }, { 0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1 }, { 1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0 },	{ 1,1,0,0,1,1,0,0,1,1,0,0,1,0,0,0 },
+		{ 1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0 }, { 0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0 }, { 1,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0 }
+	};
+
+	const uint8_t g_astc_bc7_pattern2_anchors[TOTAL_ASTC_BC7_COMMON_PARTITIONS2][3] =
+	{
+		{ 0, 2 }, { 0, 3 }, { 1, 0 }, { 0, 3 }, { 7, 0 }, { 0, 2 }, { 3, 0 }, { 7, 0 },
+		{ 0, 11 }, { 2, 0 }, { 0, 7 }, { 11, 0 }, { 3, 0 }, { 8, 0 }, { 0, 4 }, { 12, 0 },
+		{ 1, 0 }, { 8, 0 }, { 0, 1 }, { 0, 2 }, { 0, 4 }, { 8, 0 }, { 1, 0 }, { 0, 2 },
+		{ 4, 0 }, { 0, 1 }, { 4, 0 }, { 1, 0 }, { 4, 0 }, { 1, 0 }
+	};
+
+	const uint8_t g_astc_bc7_pattern3_anchors[TOTAL_ASTC_BC7_COMMON_PARTITIONS3][3] =
+	{
+		{ 0, 8, 10 },	{ 8, 0, 12 }, { 4, 0, 12 }, { 8, 0, 4 }, { 3, 0, 2 }, { 0, 1, 3 }, { 0, 2, 1 }, { 1, 9, 0 }, { 1, 2, 0 }, { 4, 0, 8 }, { 0, 6, 2 }
+	};
+
+	const uint8_t g_bc7_3_astc2_patterns2_anchors[TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS][3] =
+	{
+		{ 0, 4 }, { 0, 2 }, { 2, 0 }, { 0, 7 }, { 8, 0 }, { 0, 1 }, { 0, 3 }, { 0, 1 }, { 2, 0 }, { 0, 1 }, { 0, 8 }, { 2, 0 }, { 0, 1 }, { 0, 7 }, { 12, 0 }, { 2, 0 }, { 9, 0 }, { 0, 2 }, { 4, 0 }
+	};
+
+	const uint32_t g_uastc_mode_huff_codes[TOTAL_UASTC_MODES + 1][2] =
+	{
+		{ 0x1, 4 },
+		{ 0x35, 6 },
+		{ 0x1D, 5 },
+		{ 0x3, 5 },
+
+		{ 0x13, 5 },
+		{ 0xB, 5 },
+		{ 0x1B, 5 },
+		{ 0x7, 5 },
+
+		{ 0x17, 5 },
+		{ 0xF, 5 },
+		{ 0x2, 3 },
+		{ 0x0, 2 },
+
+		{ 0x6, 3 },
+		{ 0x1F, 5 },
+		{ 0xD, 5 },
+		{ 0x5, 7 },
+
+		{ 0x15, 6 },
+		{ 0x25, 6 },
+		{ 0x9, 4 },
+		{ 0x45, 7 } // future expansion
+	};
+
+	// If g_uastc_mode_huff_codes[] changes this table must be updated!
+	static const uint8_t g_uastc_huff_modes[128] =
+	{
+		11,0,10,3,11,15,12,7,11,18,10,5,11,14,12,9,11,0,10,4,11,16,12,8,11,18,10,6,11,2,12,13,11,0,10,3,11,17,12,7,11,18,10,5,11,14,12,9,11,0,10,4,11,1,12,8,11,18,10,6,11,2,12,13,11,0,10,3,11,
+		19,12,7,11,18,10,5,11,14,12,9,11,0,10,4,11,16,12,8,11,18,10,6,11,2,12,13,11,0,10,3,11,17,12,7,11,18,10,5,11,14,12,9,11,0,10,4,11,1,12,8,11,18,10,6,11,2,12,13
+	};
+
+	const uint8_t g_uastc_mode_weight_bits[TOTAL_UASTC_MODES] = { 4, 2, 3, 2, 2, 3, 2, 2,			0,  2, 4, 2, 3, 1, 2,			4, 2, 2,     5 };
+	const uint8_t g_uastc_mode_weight_ranges[TOTAL_UASTC_MODES] = { 8, 2, 5, 2, 2, 5, 2, 2,			0,  2, 8, 2, 5, 0, 2,			8, 2, 2,     11 };
+	const uint8_t g_uastc_mode_endpoint_ranges[TOTAL_UASTC_MODES] = { 19, 20, 8, 7, 12, 20, 18, 12,	0,  8, 13, 13, 19, 20, 20,		20, 20, 20,  11 };
+	const uint8_t g_uastc_mode_subsets[TOTAL_UASTC_MODES] = { 1, 1, 2, 3, 2, 1, 1, 2,			0,  2, 1, 1, 1, 1, 1,			1, 2, 1,     1 };
+	const uint8_t g_uastc_mode_planes[TOTAL_UASTC_MODES] = { 1, 1, 1, 1, 1, 1, 2, 1,			0,  1, 1, 2, 1, 2, 1,			1, 1, 2,     1 };
+	const uint8_t g_uastc_mode_comps[TOTAL_UASTC_MODES] = { 3, 3, 3, 3, 3, 3, 3, 3,			4,  4, 4, 4, 4, 4, 4,			2, 2, 2,     3 };
+	const uint8_t g_uastc_mode_has_etc1_bias[TOTAL_UASTC_MODES] = { 1, 1, 1, 1, 1, 1, 1, 1,			0,  1, 0, 0, 0, 1, 1,			1, 1, 1,     1 };
+	const uint8_t g_uastc_mode_has_bc1_hint0[TOTAL_UASTC_MODES] = { 1, 1, 1, 1, 1, 1, 1, 1,			0,  1, 1, 1, 1, 1, 1,			1, 1, 1,     1 };
+	const uint8_t g_uastc_mode_has_bc1_hint1[TOTAL_UASTC_MODES] = { 1, 1, 1, 1, 1, 1, 1, 1,			0,  1, 0, 0, 0, 1, 1,			1, 1, 1,     1 };
+	const uint8_t g_uastc_mode_cem[TOTAL_UASTC_MODES] = { 8, 8, 8, 8, 8, 8, 8, 8,         0,  12, 12, 12, 12, 12, 12,   4, 4, 4,     8 };
+	const uint8_t g_uastc_mode_has_alpha[TOTAL_UASTC_MODES] = { 0, 0, 0, 0, 0, 0, 0, 0,			1,  1, 1, 1, 1, 1, 1,			1, 1, 1,     0 };
+	const uint8_t g_uastc_mode_is_la[TOTAL_UASTC_MODES] = { 0, 0, 0, 0, 0, 0, 0, 0,			0,  0, 0, 0, 0, 0, 0,			1, 1, 1,     0 };
+	const uint8_t g_uastc_mode_total_hint_bits[TOTAL_UASTC_MODES] = { 15, 15, 15, 15, 15, 15, 15, 15, 0, 23, 17, 17, 17, 23, 23, 23, 23, 23, 15 };
+
+	// bits, trits, quints
+	const int g_astc_bise_range_table[TOTAL_ASTC_RANGES][3] =
+	{
+		{ 1, 0, 0 }, // 0-1 0
+		{ 0, 1, 0 }, // 0-2 1
+		{ 2, 0, 0 }, // 0-3 2
+		{ 0, 0, 1 }, // 0-4 3
+
+		{ 1, 1, 0 }, // 0-5 4
+		{ 3, 0, 0 }, // 0-7 5
+		{ 1, 0, 1 }, // 0-9 6
+		{ 2, 1, 0 }, // 0-11 7
+
+		{ 4, 0, 0 }, // 0-15 8
+		{ 2, 0, 1 }, // 0-19 9
+		{ 3, 1, 0 }, // 0-23 10
+		{ 5, 0, 0 }, // 0-31 11
+
+		{ 3, 0, 1 }, // 0-39 12
+		{ 4, 1, 0 }, // 0-47 13
+		{ 6, 0, 0 }, // 0-63 14
+		{ 4, 0, 1 }, // 0-79 15
+
+		{ 5, 1, 0 }, // 0-95 16
+		{ 7, 0, 0 }, // 0-127 17
+		{ 5, 0, 1 }, // 0-159 18
+		{ 6, 1, 0 }, // 0-191 19
+
+		{ 8, 0, 0 }, // 0-255 20
+	};
+
+	int astc_get_levels(int range)
+	{
+		assert(range < (int)BC7ENC_TOTAL_ASTC_RANGES);
+		return (1 + 2 * g_astc_bise_range_table[range][1] + 4 * g_astc_bise_range_table[range][2]) << g_astc_bise_range_table[range][0];
+	}
+
+	// g_astc_unquant[] is the inverse of g_astc_sorted_order_unquant[]
+	astc_quant_bin g_astc_unquant[BC7ENC_TOTAL_ASTC_RANGES][256]; // [ASTC encoded endpoint index]
+
+	// Taken right from the ASTC spec.
+	static struct
+	{
+		const char* m_pB_str;
+		uint32_t m_c;
+	} g_astc_endpoint_unquant_params[BC7ENC_TOTAL_ASTC_RANGES] =
+	{
+		{ "", 0 },
+		{ "", 0 },
+		{ "", 0 },
+		{ "", 0 },
+		{ "000000000", 204, },  // 0-5
+		{ "", 0 },
+		{ "000000000", 113, },  // 0-9
+		{ "b000b0bb0", 93 },    // 0-11
+		{ "", 0 },
+		{ "b0000bb00", 54 },    // 0-19
+		{ "cb000cbcb", 44 },   // 0-23
+		{ "", 0 },
+		{ "cb0000cbc", 26 },   // 0-39
+		{ "dcb000dcb", 22 },   // 0-47
+		{ "", 0 },
+		{ "dcb0000dc", 13 },   // 0-79
+		{ "edcb000ed", 11 },   // 0-95
+		{ "", 0 },
+		{ "edcb0000e", 6 },    // 0-159
+		{ "fedcb000f", 5 },     // 0-191
+		{ "", 0 },
+	};
+
+	bool astc_is_valid_endpoint_range(uint32_t range)
+	{
+		if ((g_astc_bise_range_table[range][1] == 0) && (g_astc_bise_range_table[range][2] == 0))
+			return true;
+
+		return g_astc_endpoint_unquant_params[range].m_c != 0;
+	}
+
+	uint32_t unquant_astc_endpoint(uint32_t packed_bits, uint32_t packed_trits, uint32_t packed_quints, uint32_t range)
+	{
+		assert(range < BC7ENC_TOTAL_ASTC_RANGES);
+
+		const uint32_t bits = g_astc_bise_range_table[range][0];
+		const uint32_t trits = g_astc_bise_range_table[range][1];
+		const uint32_t quints = g_astc_bise_range_table[range][2];
+
+		uint32_t val = 0;
+		if ((!trits) && (!quints))
+		{
+			assert(!packed_trits && !packed_quints);
+
+			int bits_left = 8;
+			while (bits_left > 0)
+			{
+				uint32_t v = packed_bits;
+
+				int n = basisu::minimumi(bits_left, bits);
+				if (n < (int)bits)
+					v >>= (bits - n);
+
+				assert(v < (1U << n));
+
+				val |= (v << (bits_left - n));
+				bits_left -= n;
+			}
+		}
+		else
+		{
+			const uint32_t A = (packed_bits & 1) ? 511 : 0;
+			const uint32_t C = g_astc_endpoint_unquant_params[range].m_c;
+			const uint32_t D = trits ? packed_trits : packed_quints;
+
+			assert(C);
+
+			uint32_t B = 0;
+			for (uint32_t i = 0; i < 9; i++)
+			{
+				B <<= 1;
+
+				char c = g_astc_endpoint_unquant_params[range].m_pB_str[i];
+				if (c != '0')
+				{
+					c -= 'a';
+					B |= ((packed_bits >> c) & 1);
+				}
+			}
+
+			val = D * C + B;
+			val = val ^ A;
+			val = (A & 0x80) | (val >> 2);
+		}
+
+		return val;
+	}
+
+	uint32_t unquant_astc_endpoint_val(uint32_t packed_val, uint32_t range)
+	{
+		assert(range < BC7ENC_TOTAL_ASTC_RANGES);
+		assert(packed_val < (uint32_t)astc_get_levels(range));
+
+		const uint32_t bits = g_astc_bise_range_table[range][0];
+		const uint32_t trits = g_astc_bise_range_table[range][1];
+		const uint32_t quints = g_astc_bise_range_table[range][2];
+
+		if ((!trits) && (!quints))
+			return unquant_astc_endpoint(packed_val, 0, 0, range);
+		else if (trits)
+			return unquant_astc_endpoint(packed_val & ((1 << bits) - 1), packed_val >> bits, 0, range);
+		else
+			return unquant_astc_endpoint(packed_val & ((1 << bits) - 1), 0, packed_val >> bits, range);
+	}
+
+	// BC7 - Various BC7 tables/helpers
+	const uint32_t g_bc7_weights1[2] = { 0, 64 };
+	const uint32_t g_bc7_weights2[4] = { 0, 21, 43, 64 };
+	const uint32_t g_bc7_weights3[8] = { 0, 9, 18, 27, 37, 46, 55, 64 };
+	const uint32_t g_bc7_weights4[16] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
+	const uint32_t g_astc_weights4[16] = { 0, 4, 8, 12, 17, 21, 25, 29, 35, 39, 43, 47, 52, 56, 60, 64 };
+	const uint32_t g_astc_weights5[32] = { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64 };
+	const uint32_t g_astc_weights_3levels[3] = { 0, 32, 64 };
+
+	const uint8_t g_bc7_partition1[16] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
+
+	const uint8_t g_bc7_partition2[64 * 16] =
+	{
+		0,0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,		0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,		0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,		0,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,		0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,		0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,		0,0,0,1,0,0,1,1,0,1,1,1,1,1,1,1,		0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,1,
+		0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,		0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,		0,0,0,0,0,0,0,1,0,1,1,1,1,1,1,1,		0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,		0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,		0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,		0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,		0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,
+		0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,1,		0,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0,		0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,0,		0,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0,		0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,		0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,0,		0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,		0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,
+		0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0,		0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,		0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0,		0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,		0,0,0,1,0,1,1,1,1,1,1,0,1,0,0,0,		0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,		0,1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,		0,0,1,1,1,0,0,1,1,0,0,1,1,1,0,0,
+		0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,		0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1,		0,1,0,1,1,0,1,0,0,1,0,1,1,0,1,0,		0,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0,		0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,0,		0,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0,		0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,		0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,
+		0,1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,		0,0,0,1,0,0,1,1,1,1,0,0,1,0,0,0,		0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,0,		0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,0,		0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,		0,0,1,1,1,1,0,0,1,1,0,0,0,0,1,1,		0,1,1,0,0,1,1,0,1,0,0,1,1,0,0,1,		0,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,
+		0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,		0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,		0,0,0,0,0,0,1,0,0,1,1,1,0,0,1,0,		0,0,0,0,0,1,0,0,1,1,1,0,0,1,0,0,		0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,1,		0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,		0,1,1,0,0,0,1,1,1,0,0,1,1,1,0,0,		0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0,
+		0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,1,		0,1,1,0,0,0,1,1,0,0,1,1,1,0,0,1,		0,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1,		0,0,0,1,1,0,0,0,1,1,1,0,0,1,1,1,		0,0,0,0,1,1,1,1,0,0,1,1,0,0,1,1,		0,0,1,1,0,0,1,1,1,1,1,1,0,0,0,0,		0,0,1,0,0,0,1,0,1,1,1,0,1,1,1,0,		0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,1
+	};
+
+	const uint8_t g_bc7_partition3[64 * 16] =
+	{
+		0,0,1,1,0,0,1,1,0,2,2,1,2,2,2,2,		0,0,0,1,0,0,1,1,2,2,1,1,2,2,2,1,		0,0,0,0,2,0,0,1,2,2,1,1,2,2,1,1,		0,2,2,2,0,0,2,2,0,0,1,1,0,1,1,1,		0,0,0,0,0,0,0,0,1,1,2,2,1,1,2,2,		0,0,1,1,0,0,1,1,0,0,2,2,0,0,2,2,		0,0,2,2,0,0,2,2,1,1,1,1,1,1,1,1,		0,0,1,1,0,0,1,1,2,2,1,1,2,2,1,1,
+		0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,		0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,		0,0,0,0,1,1,1,1,2,2,2,2,2,2,2,2,		0,0,1,2,0,0,1,2,0,0,1,2,0,0,1,2,		0,1,1,2,0,1,1,2,0,1,1,2,0,1,1,2,		0,1,2,2,0,1,2,2,0,1,2,2,0,1,2,2,		0,0,1,1,0,1,1,2,1,1,2,2,1,2,2,2,		0,0,1,1,2,0,0,1,2,2,0,0,2,2,2,0,
+		0,0,0,1,0,0,1,1,0,1,1,2,1,1,2,2,		0,1,1,1,0,0,1,1,2,0,0,1,2,2,0,0,		0,0,0,0,1,1,2,2,1,1,2,2,1,1,2,2,		0,0,2,2,0,0,2,2,0,0,2,2,1,1,1,1,		0,1,1,1,0,1,1,1,0,2,2,2,0,2,2,2,		0,0,0,1,0,0,0,1,2,2,2,1,2,2,2,1,		0,0,0,0,0,0,1,1,0,1,2,2,0,1,2,2,		0,0,0,0,1,1,0,0,2,2,1,0,2,2,1,0,
+		0,1,2,2,0,1,2,2,0,0,1,1,0,0,0,0,		0,0,1,2,0,0,1,2,1,1,2,2,2,2,2,2,		0,1,1,0,1,2,2,1,1,2,2,1,0,1,1,0,		0,0,0,0,0,1,1,0,1,2,2,1,1,2,2,1,		0,0,2,2,1,1,0,2,1,1,0,2,0,0,2,2,		0,1,1,0,0,1,1,0,2,0,0,2,2,2,2,2,		0,0,1,1,0,1,2,2,0,1,2,2,0,0,1,1,		0,0,0,0,2,0,0,0,2,2,1,1,2,2,2,1,
+		0,0,0,0,0,0,0,2,1,1,2,2,1,2,2,2,		0,2,2,2,0,0,2,2,0,0,1,2,0,0,1,1,		0,0,1,1,0,0,1,2,0,0,2,2,0,2,2,2,		0,1,2,0,0,1,2,0,0,1,2,0,0,1,2,0,		0,0,0,0,1,1,1,1,2,2,2,2,0,0,0,0,		0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,		0,1,2,0,2,0,1,2,1,2,0,1,0,1,2,0,		0,0,1,1,2,2,0,0,1,1,2,2,0,0,1,1,
+		0,0,1,1,1,1,2,2,2,2,0,0,0,0,1,1,		0,1,0,1,0,1,0,1,2,2,2,2,2,2,2,2,		0,0,0,0,0,0,0,0,2,1,2,1,2,1,2,1,		0,0,2,2,1,1,2,2,0,0,2,2,1,1,2,2,		0,0,2,2,0,0,1,1,0,0,2,2,0,0,1,1,		0,2,2,0,1,2,2,1,0,2,2,0,1,2,2,1,		0,1,0,1,2,2,2,2,2,2,2,2,0,1,0,1,		0,0,0,0,2,1,2,1,2,1,2,1,2,1,2,1,
+		0,1,0,1,0,1,0,1,0,1,0,1,2,2,2,2,		0,2,2,2,0,1,1,1,0,2,2,2,0,1,1,1,		0,0,0,2,1,1,1,2,0,0,0,2,1,1,1,2,		0,0,0,0,2,1,1,2,2,1,1,2,2,1,1,2,		0,2,2,2,0,1,1,1,0,1,1,1,0,2,2,2,		0,0,0,2,1,1,1,2,1,1,1,2,0,0,0,2,		0,1,1,0,0,1,1,0,0,1,1,0,2,2,2,2,		0,0,0,0,0,0,0,0,2,1,1,2,2,1,1,2,
+		0,1,1,0,0,1,1,0,2,2,2,2,2,2,2,2,		0,0,2,2,0,0,1,1,0,0,1,1,0,0,2,2,		0,0,2,2,1,1,2,2,1,1,2,2,0,0,2,2,		0,0,0,0,0,0,0,0,0,0,0,0,2,1,1,2,		0,0,0,2,0,0,0,1,0,0,0,2,0,0,0,1,		0,2,2,2,1,2,2,2,0,2,2,2,1,2,2,2,		0,1,0,1,2,2,2,2,2,2,2,2,2,2,2,2,		0,1,1,1,2,0,1,1,2,2,0,1,2,2,2,0,
+	};
+
+	const uint8_t g_bc7_table_anchor_index_second_subset[64] = { 15,15,15,15,15,15,15,15,		15,15,15,15,15,15,15,15,		15, 2, 8, 2, 2, 8, 8,15,		2, 8, 2, 2, 8, 8, 2, 2,		15,15, 6, 8, 2, 8,15,15,		2, 8, 2, 2, 2,15,15, 6,		6, 2, 6, 8,15,15, 2, 2,		15,15,15,15,15, 2, 2,15 };
+
+	const uint8_t g_bc7_table_anchor_index_third_subset_1[64] =
+	{
+		3, 3,15,15, 8, 3,15,15,		8, 8, 6, 6, 6, 5, 3, 3,		3, 3, 8,15, 3, 3, 6,10,		5, 8, 8, 6, 8, 5,15,15,		8,15, 3, 5, 6,10, 8,15,		15, 3,15, 5,15,15,15,15,		3,15, 5, 5, 5, 8, 5,10,		5,10, 8,13,15,12, 3, 3
+	};
+
+	const uint8_t g_bc7_table_anchor_index_third_subset_2[64] =
+	{
+		15, 8, 8, 3,15,15, 3, 8,		15,15,15,15,15,15,15, 8,		15, 8,15, 3,15, 8,15, 8,		3,15, 6,10,15,15,10, 8,		15, 3,15,10,10, 8, 9,10,		6,15, 8,15, 3, 6, 6, 8,		15, 3,15,15,15,15,15,15,		15,15,15,15, 3,15,15, 8
+	};
+
+	const uint8_t g_bc7_num_subsets[8] = { 3, 2, 3, 2, 1, 1, 1, 2 };
+	const uint8_t g_bc7_partition_bits[8] = { 4, 6, 6, 6, 0, 0, 0, 6 };
+	const uint8_t g_bc7_color_index_bitcount[8] = { 3, 3, 2, 2, 2, 2, 4, 2 };
+
+	const uint8_t g_bc7_mode_has_p_bits[8] = { 1, 1, 0, 1, 0, 0, 1, 1 };
+	const uint8_t g_bc7_mode_has_shared_p_bits[8] = { 0, 1, 0, 0, 0, 0, 0, 0 };
+	const uint8_t g_bc7_color_precision_table[8] = { 4, 6, 5, 7, 5, 7, 7, 5 };
+	const int8_t g_bc7_alpha_precision_table[8] = { 0, 0, 0, 0, 6, 8, 7, 5 };
+
+	const uint8_t g_bc7_alpha_index_bitcount[8] = { 0, 0, 0, 0, 3, 2, 4, 2 };
+
+	endpoint_err g_bc7_mode_6_optimal_endpoints[256][2]; // [c][pbit]
+	endpoint_err g_bc7_mode_5_optimal_endpoints[256]; // [c]
+
+	static inline void bc7_set_block_bits(uint8_t* pBytes, uint32_t val, uint32_t num_bits, uint32_t* pCur_ofs)
+	{
+		assert((num_bits <= 32) && (val < (1ULL << num_bits)));
+		while (num_bits)
+		{
+			const uint32_t n = basisu::minimumu(8 - (*pCur_ofs & 7), num_bits);
+			pBytes[*pCur_ofs >> 3] |= (uint8_t)(val << (*pCur_ofs & 7));
+			val >>= n;
+			num_bits -= n;
+			*pCur_ofs += n;
+		}
+		assert(*pCur_ofs <= 128);
+	}
+
+	// TODO: Optimize this.
+	void encode_bc7_block(void* pBlock, const bc7_optimization_results* pResults)
+	{
+		const uint32_t best_mode = pResults->m_mode;
+
+		const uint32_t total_subsets = g_bc7_num_subsets[best_mode];
+		const uint32_t total_partitions = 1 << g_bc7_partition_bits[best_mode];
+		//const uint32_t num_rotations = 1 << g_bc7_rotation_bits[best_mode];
+		//const uint32_t num_index_selectors = (best_mode == 4) ? 2 : 1;
+
+		const uint8_t* pPartition;
+		if (total_subsets == 1)
+			pPartition = &g_bc7_partition1[0];
+		else if (total_subsets == 2)
+			pPartition = &g_bc7_partition2[pResults->m_partition * 16];
+		else
+			pPartition = &g_bc7_partition3[pResults->m_partition * 16];
+
+		uint8_t color_selectors[16];
+		memcpy(color_selectors, pResults->m_selectors, 16);
+
+		uint8_t alpha_selectors[16];
+		memcpy(alpha_selectors, pResults->m_alpha_selectors, 16);
+
+		color_quad_u8 low[3], high[3];
+		memcpy(low, pResults->m_low, sizeof(low));
+		memcpy(high, pResults->m_high, sizeof(high));
+
+		uint32_t pbits[3][2];
+		memcpy(pbits, pResults->m_pbits, sizeof(pbits));
+
+		int anchor[3] = { -1, -1, -1 };
+
+		for (uint32_t k = 0; k < total_subsets; k++)
+		{
+			uint32_t anchor_index = 0;
+			if (k)
+			{
+				if ((total_subsets == 3) && (k == 1))
+					anchor_index = g_bc7_table_anchor_index_third_subset_1[pResults->m_partition];
+				else if ((total_subsets == 3) && (k == 2))
+					anchor_index = g_bc7_table_anchor_index_third_subset_2[pResults->m_partition];
+				else
+					anchor_index = g_bc7_table_anchor_index_second_subset[pResults->m_partition];
+			}
+
+			anchor[k] = anchor_index;
+
+			const uint32_t color_index_bits = get_bc7_color_index_size(best_mode, pResults->m_index_selector);
+			const uint32_t num_color_indices = 1 << color_index_bits;
+
+			if (color_selectors[anchor_index] & (num_color_indices >> 1))
+			{
+				for (uint32_t i = 0; i < 16; i++)
+					if (pPartition[i] == k)
+						color_selectors[i] = (uint8_t)((num_color_indices - 1) - color_selectors[i]);
+
+				if (get_bc7_mode_has_seperate_alpha_selectors(best_mode))
+				{
+					for (uint32_t q = 0; q < 3; q++)
+					{
+						uint8_t t = low[k].m_c[q];
+						low[k].m_c[q] = high[k].m_c[q];
+						high[k].m_c[q] = t;
+					}
+				}
+				else
+				{
+					color_quad_u8 tmp = low[k];
+					low[k] = high[k];
+					high[k] = tmp;
+				}
+
+				if (!g_bc7_mode_has_shared_p_bits[best_mode])
+				{
+					uint32_t t = pbits[k][0];
+					pbits[k][0] = pbits[k][1];
+					pbits[k][1] = t;
+				}
+			}
+
+			if (get_bc7_mode_has_seperate_alpha_selectors(best_mode))
+			{
+				const uint32_t alpha_index_bits = get_bc7_alpha_index_size(best_mode, pResults->m_index_selector);
+				const uint32_t num_alpha_indices = 1 << alpha_index_bits;
+
+				if (alpha_selectors[anchor_index] & (num_alpha_indices >> 1))
+				{
+					for (uint32_t i = 0; i < 16; i++)
+						if (pPartition[i] == k)
+							alpha_selectors[i] = (uint8_t)((num_alpha_indices - 1) - alpha_selectors[i]);
+
+					uint8_t t = low[k].m_c[3];
+					low[k].m_c[3] = high[k].m_c[3];
+					high[k].m_c[3] = t;
+				}
+			}
+		}
+
+		uint8_t* pBlock_bytes = (uint8_t*)(pBlock);
+		memset(pBlock_bytes, 0, BC7ENC_BLOCK_SIZE);
+
+		uint32_t cur_bit_ofs = 0;
+		bc7_set_block_bits(pBlock_bytes, 1 << best_mode, best_mode + 1, &cur_bit_ofs);
+
+		if ((best_mode == 4) || (best_mode == 5))
+			bc7_set_block_bits(pBlock_bytes, pResults->m_rotation, 2, &cur_bit_ofs);
+
+		if (best_mode == 4)
+			bc7_set_block_bits(pBlock_bytes, pResults->m_index_selector, 1, &cur_bit_ofs);
+
+		if (total_partitions > 1)
+			bc7_set_block_bits(pBlock_bytes, pResults->m_partition, (total_partitions == 64) ? 6 : 4, &cur_bit_ofs);
+
+		const uint32_t total_comps = (best_mode >= 4) ? 4 : 3;
+		for (uint32_t comp = 0; comp < total_comps; comp++)
+		{
+			for (uint32_t subset = 0; subset < total_subsets; subset++)
+			{
+				bc7_set_block_bits(pBlock_bytes, low[subset].m_c[comp], (comp == 3) ? g_bc7_alpha_precision_table[best_mode] : g_bc7_color_precision_table[best_mode], &cur_bit_ofs);
+				bc7_set_block_bits(pBlock_bytes, high[subset].m_c[comp], (comp == 3) ? g_bc7_alpha_precision_table[best_mode] : g_bc7_color_precision_table[best_mode], &cur_bit_ofs);
+			}
+		}
+
+		if (g_bc7_mode_has_p_bits[best_mode])
+		{
+			for (uint32_t subset = 0; subset < total_subsets; subset++)
+			{
+				bc7_set_block_bits(pBlock_bytes, pbits[subset][0], 1, &cur_bit_ofs);
+				if (!g_bc7_mode_has_shared_p_bits[best_mode])
+					bc7_set_block_bits(pBlock_bytes, pbits[subset][1], 1, &cur_bit_ofs);
+			}
+		}
+
+		for (uint32_t y = 0; y < 4; y++)
+		{
+			for (uint32_t x = 0; x < 4; x++)
+			{
+				int idx = x + y * 4;
+
+				uint32_t n = pResults->m_index_selector ? get_bc7_alpha_index_size(best_mode, pResults->m_index_selector) : get_bc7_color_index_size(best_mode, pResults->m_index_selector);
+
+				if ((idx == anchor[0]) || (idx == anchor[1]) || (idx == anchor[2]))
+					n--;
+
+				bc7_set_block_bits(pBlock_bytes, pResults->m_index_selector ? alpha_selectors[idx] : color_selectors[idx], n, &cur_bit_ofs);
+			}
+		}
+
+		if (get_bc7_mode_has_seperate_alpha_selectors(best_mode))
+		{
+			for (uint32_t y = 0; y < 4; y++)
+			{
+				for (uint32_t x = 0; x < 4; x++)
+				{
+					int idx = x + y * 4;
+
+					uint32_t n = pResults->m_index_selector ? get_bc7_color_index_size(best_mode, pResults->m_index_selector) : get_bc7_alpha_index_size(best_mode, pResults->m_index_selector);
+
+					if ((idx == anchor[0]) || (idx == anchor[1]) || (idx == anchor[2]))
+						n--;
+
+					bc7_set_block_bits(pBlock_bytes, pResults->m_index_selector ? color_selectors[idx] : alpha_selectors[idx], n, &cur_bit_ofs);
+				}
+			}
+		}
+
+		assert(cur_bit_ofs == 128);
+	}
+
+	// ASTC
+	static inline void astc_set_bits_1_to_9(uint32_t* pDst, int& bit_offset, uint32_t code, uint32_t codesize)
+	{
+		uint8_t* pBuf = reinterpret_cast<uint8_t*>(pDst);
+
+		assert(codesize <= 9);
+		if (codesize)
+		{
+			uint32_t byte_bit_offset = bit_offset & 7;
+			uint32_t val = code << byte_bit_offset;
+
+			uint32_t index = bit_offset >> 3;
+			pBuf[index] |= (uint8_t)val;
+
+			if (codesize > (8 - byte_bit_offset))
+				pBuf[index + 1] |= (uint8_t)(val >> 8);
+
+			bit_offset += codesize;
+		}
+	}
+
+	void pack_astc_solid_block(void* pDst_block, const color32& color)
+	{
+		uint32_t r = color[0], g = color[1], b = color[2];
+		uint32_t a = color[3];
+
+		uint32_t* pOutput = static_cast<uint32_t*>(pDst_block);
+		uint8_t* pBytes = reinterpret_cast<uint8_t*>(pDst_block);
+
+		pBytes[0] = 0xfc; pBytes[1] = 0xfd; pBytes[2] = 0xff; pBytes[3] = 0xff;
+
+		pOutput[1] = 0xffffffff;
+		pOutput[2] = 0;
+		pOutput[3] = 0;
+
+		int bit_pos = 64;
+		astc_set_bits(reinterpret_cast<uint32_t*>(pDst_block), bit_pos, r | (r << 8), 16);
+		astc_set_bits(reinterpret_cast<uint32_t*>(pDst_block), bit_pos, g | (g << 8), 16);
+		astc_set_bits(reinterpret_cast<uint32_t*>(pDst_block), bit_pos, b | (b << 8), 16);
+		astc_set_bits(reinterpret_cast<uint32_t*>(pDst_block), bit_pos, a | (a << 8), 16);
+	}
+
+	// See 23.21 https://www.khronos.org/registry/DataFormat/specs/1.3/dataformat.1.3.inline.html#_partition_pattern_generation
+#ifdef _DEBUG
+	static inline uint32_t astc_hash52(uint32_t v)
+	{
+		uint32_t p = v;
+		p ^= p >> 15;	p -= p << 17;	p += p << 7;	p += p << 4;
+		p ^= p >> 5;	p += p << 16;	p ^= p >> 7;	p ^= p >> 3;
+		p ^= p << 6;	p ^= p >> 17;
+		return p;
+	}
+
+	int astc_compute_texel_partition(int seed, int x, int y, int z, int partitioncount, bool small_block)
+	{
+		if (small_block)
+		{
+			x <<= 1; y <<= 1; z <<= 1;
+		}
+		seed += (partitioncount - 1) * 1024;
+		uint32_t rnum = astc_hash52(seed);
+		uint8_t seed1 = rnum & 0xF;
+		uint8_t seed2 = (rnum >> 4) & 0xF;
+		uint8_t seed3 = (rnum >> 8) & 0xF;
+		uint8_t seed4 = (rnum >> 12) & 0xF;
+		uint8_t seed5 = (rnum >> 16) & 0xF;
+		uint8_t seed6 = (rnum >> 20) & 0xF;
+		uint8_t seed7 = (rnum >> 24) & 0xF;
+		uint8_t seed8 = (rnum >> 28) & 0xF;
+		uint8_t seed9 = (rnum >> 18) & 0xF;
+		uint8_t seed10 = (rnum >> 22) & 0xF;
+		uint8_t seed11 = (rnum >> 26) & 0xF;
+		uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF;
+
+		seed1 *= seed1;    seed2 *= seed2;
+		seed3 *= seed3;    seed4 *= seed4;
+		seed5 *= seed5;    seed6 *= seed6;
+		seed7 *= seed7;    seed8 *= seed8;
+		seed9 *= seed9;    seed10 *= seed10;
+		seed11 *= seed11;   seed12 *= seed12;
+
+		int sh1, sh2, sh3;
+		if (seed & 1)
+		{
+			sh1 = (seed & 2 ? 4 : 5); sh2 = (partitioncount == 3 ? 6 : 5);
+		}
+		else
+		{
+			sh1 = (partitioncount == 3 ? 6 : 5); sh2 = (seed & 2 ? 4 : 5);
+		}
+		sh3 = (seed & 0x10) ? sh1 : sh2;
+
+		seed1 >>= sh1; seed2 >>= sh2; seed3 >>= sh1; seed4 >>= sh2;
+		seed5 >>= sh1; seed6 >>= sh2; seed7 >>= sh1; seed8 >>= sh2;
+		seed9 >>= sh3; seed10 >>= sh3; seed11 >>= sh3; seed12 >>= sh3;
+
+		int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
+		int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
+		int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);
+		int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);
+
+		a &= 0x3F; b &= 0x3F; c &= 0x3F; d &= 0x3F;
+
+		if (partitioncount < 4) d = 0;
+		if (partitioncount < 3) c = 0;
+
+		if (a >= b && a >= c && a >= d)
+			return 0;
+		else if (b >= c && b >= d)
+			return 1;
+		else if (c >= d)
+			return 2;
+		else
+			return 3;
+	}
+#endif
+
+	static const uint8_t g_astc_quint_encode[125] =
+	{
+		0, 1, 2, 3, 4, 8, 9, 10, 11, 12, 16, 17, 18, 19, 20, 24, 25, 26, 27, 28, 5, 13, 21, 29, 6, 32, 33, 34, 35, 36, 40, 41, 42, 43, 44, 48, 49, 50, 51, 52, 56, 57,
+		58, 59, 60, 37, 45, 53, 61, 14, 64, 65, 66, 67, 68, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 88, 89, 90, 91, 92, 69, 77, 85, 93, 22, 96, 97, 98, 99, 100, 104,
+		105, 106, 107, 108, 112, 113, 114, 115, 116, 120, 121, 122, 123, 124, 101, 109, 117, 125, 30, 102, 103, 70, 71, 38, 110, 111, 78, 79, 46, 118, 119, 86, 87, 54,
+		126, 127, 94, 95, 62, 39, 47, 55, 63, 31
+	};
+
+	// Encodes 3 values to output, usable for any range that uses quints and bits
+	static inline void astc_encode_quints(uint32_t* pOutput, const uint8_t* pValues, int& bit_pos, int n)
+	{
+		// First extract the trits and the bits from the 5 input values
+		int quints = 0, bits[3];
+		const uint32_t bit_mask = (1 << n) - 1;
+		for (int i = 0; i < 3; i++)
+		{
+			static const int s_muls[3] = { 1, 5, 25 };
+
+			const int t = pValues[i] >> n;
+
+			quints += t * s_muls[i];
+			bits[i] = pValues[i] & bit_mask;
+		}
+
+		// Encode the quints, by inverting the bit manipulations done by the decoder, converting 3 quints into 7-bits.
+		// See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding
+
+		assert(quints < 125);
+		const int T = g_astc_quint_encode[quints];
+
+		// Now interleave the 7 encoded quint bits with the bits to form the encoded output. See table 95-96.
+		astc_set_bits(pOutput, bit_pos, bits[0] | (astc_extract_bits(T, 0, 2) << n) | (bits[1] << (3 + n)) | (astc_extract_bits(T, 3, 4) << (3 + n * 2)) |
+			(bits[2] << (5 + n * 2)) | (astc_extract_bits(T, 5, 6) << (5 + n * 3)), 7 + n * 3);
+	}
+
+	// Packs values using ASTC's BISE to output buffer.
+	static void astc_pack_bise(uint32_t* pDst, const uint8_t* pSrc_vals, int bit_pos, int num_vals, int range)
+	{
+		uint32_t temp[5] = { 0, 0, 0, 0, 0 };
+
+		const int num_bits = g_astc_bise_range_table[range][0];
+
+		int group_size = 0;
+		if (g_astc_bise_range_table[range][1])
+			group_size = 5;
+		else if (g_astc_bise_range_table[range][2])
+			group_size = 3;
+
+		if (group_size)
+		{
+			// Range has trits or quints - pack each group of 5 or 3 values 
+			const int total_groups = (group_size == 5) ? ((num_vals + 4) / 5) : ((num_vals + 2) / 3);
+
+			for (int group_index = 0; group_index < total_groups; group_index++)
+			{
+				uint8_t vals[5] = { 0, 0, 0, 0, 0 };
+
+				const int limit = basisu::minimum(group_size, num_vals - group_index * group_size);
+				for (int i = 0; i < limit; i++)
+					vals[i] = pSrc_vals[group_index * group_size + i];
+
+				if (group_size == 5)
+					astc_encode_trits(temp, vals, bit_pos, num_bits);
+				else
+					astc_encode_quints(temp, vals, bit_pos, num_bits);
+			}
+		}
+		else
+		{
+			for (int i = 0; i < num_vals; i++)
+				astc_set_bits_1_to_9(temp, bit_pos, pSrc_vals[i], num_bits);
+		}
+
+		pDst[0] |= temp[0]; pDst[1] |= temp[1];
+		pDst[2] |= temp[2]; pDst[3] |= temp[3];
+	}
+
+	const uint32_t ASTC_BLOCK_MODE_BITS = 11;
+	const uint32_t ASTC_PART_BITS = 2;
+	const uint32_t ASTC_CEM_BITS = 4;
+	const uint32_t ASTC_PARTITION_INDEX_BITS = 10;
+	const uint32_t ASTC_CCS_BITS = 2;
+
+	const uint32_t g_uastc_mode_astc_block_mode[TOTAL_UASTC_MODES] = { 0x242, 0x42, 0x53, 0x42, 0x42, 0x53, 0x442, 0x42, 0, 0x42, 0x242, 0x442, 0x53, 0x441, 0x42, 0x242, 0x42, 0x442, 0x253 };
+
+	bool pack_astc_block(uint32_t* pDst, const astc_block_desc* pBlock, uint32_t uastc_mode)
+	{
+		assert(uastc_mode < TOTAL_UASTC_MODES);
+		uint8_t* pDst_bytes = reinterpret_cast<uint8_t*>(pDst);
+
+		const int total_weights = pBlock->m_dual_plane ? 32 : 16;
+
+		// Set mode bits - see Table 146-147
+		uint32_t mode = g_uastc_mode_astc_block_mode[uastc_mode];
+		pDst_bytes[0] = (uint8_t)mode;
+		pDst_bytes[1] = (uint8_t)(mode >> 8);
+
+		memset(pDst_bytes + 2, 0, 16 - 2);
+
+		int bit_pos = ASTC_BLOCK_MODE_BITS;
+
+		// We only support 1-5 bit weight indices
+		assert(!g_astc_bise_range_table[pBlock->m_weight_range][1] && !g_astc_bise_range_table[pBlock->m_weight_range][2]);
+		const int bits_per_weight = g_astc_bise_range_table[pBlock->m_weight_range][0];
+
+		// See table 143 - PART
+		astc_set_bits_1_to_9(pDst, bit_pos, pBlock->m_subsets - 1, ASTC_PART_BITS);
+
+		if (pBlock->m_subsets == 1)
+			astc_set_bits_1_to_9(pDst, bit_pos, pBlock->m_cem, ASTC_CEM_BITS);
+		else
+		{
+			// See table 145
+			astc_set_bits(pDst, bit_pos, pBlock->m_partition_seed, ASTC_PARTITION_INDEX_BITS);
+
+			// Table 150 - we assume all CEM's are equal, so write 2 0's along with the CEM
+			astc_set_bits_1_to_9(pDst, bit_pos, (pBlock->m_cem << 2) & 63, ASTC_CEM_BITS + 2);
+		}
+
+		if (pBlock->m_dual_plane)
+		{
+			const int total_weight_bits = total_weights * bits_per_weight;
+
+			// See Illegal Encodings 23.24
+			// https://www.khronos.org/registry/DataFormat/specs/1.3/dataformat.1.3.inline.html#_illegal_encodings
+			assert((total_weight_bits >= 24) && (total_weight_bits <= 96));
+
+			int ccs_bit_pos = 128 - total_weight_bits - ASTC_CCS_BITS;
+			astc_set_bits_1_to_9(pDst, ccs_bit_pos, pBlock->m_ccs, ASTC_CCS_BITS);
+		}
+
+		const int num_cem_pairs = (1 + (pBlock->m_cem >> 2)) * pBlock->m_subsets;
+		assert(num_cem_pairs <= 9);
+
+		astc_pack_bise(pDst, pBlock->m_endpoints, bit_pos, num_cem_pairs * 2, g_uastc_mode_endpoint_ranges[uastc_mode]);
+
+		// Write the weight bits in reverse bit order.
+		switch (bits_per_weight)
+		{
+		case 1:
+		{
+			const uint32_t N = 1;
+			for (int i = 0; i < total_weights; i++)
+			{
+				const uint32_t ofs = 128 - N - i;
+				assert((ofs >> 3) < 16);
+				pDst_bytes[ofs >> 3] |= (pBlock->m_weights[i] << (ofs & 7));
+			}
+			break;
+		}
+		case 2:
+		{
+			const uint32_t N = 2;
+			for (int i = 0; i < total_weights; i++)
+			{
+				static const uint8_t s_reverse_bits2[4] = { 0, 2, 1, 3 };
+				const uint32_t ofs = 128 - N - (i * N);
+				assert((ofs >> 3) < 16);
+				pDst_bytes[ofs >> 3] |= (s_reverse_bits2[pBlock->m_weights[i]] << (ofs & 7));
+			}
+			break;
+		}
+		case 3:
+		{
+			const uint32_t N = 3;
+			for (int i = 0; i < total_weights; i++)
+			{
+				static const uint8_t s_reverse_bits3[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
+
+				const uint32_t ofs = 128 - N - (i * N);
+				const uint32_t rev = s_reverse_bits3[pBlock->m_weights[i]] << (ofs & 7);
+
+				uint32_t index = ofs >> 3;
+				assert(index < 16);
+				pDst_bytes[index++] |= rev & 0xFF;
+				if (index < 16)
+					pDst_bytes[index++] |= (rev >> 8);
+			}
+			break;
+		}
+		case 4:
+		{
+			const uint32_t N = 4;
+			for (int i = 0; i < total_weights; i++)
+			{
+				static const uint8_t s_reverse_bits4[16] = { 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15 };
+				const int ofs = 128 - N - (i * N);
+				assert(ofs >= 0 && (ofs >> 3) < 16);
+				pDst_bytes[ofs >> 3] |= (s_reverse_bits4[pBlock->m_weights[i]] << (ofs & 7));
+			}
+			break;
+		}
+		case 5:
+		{
+			const uint32_t N = 5;
+			for (int i = 0; i < total_weights; i++)
+			{
+				static const uint8_t s_reverse_bits5[32] = { 0, 16, 8, 24, 4, 20, 12, 28, 2, 18, 10, 26, 6, 22, 14, 30, 1, 17, 9, 25, 5, 21, 13, 29, 3, 19, 11, 27, 7, 23, 15, 31 };
+
+				const uint32_t ofs = 128 - N - (i * N);
+				const uint32_t rev = s_reverse_bits5[pBlock->m_weights[i]] << (ofs & 7);
+
+				uint32_t index = ofs >> 3;
+				assert(index < 16);
+				pDst_bytes[index++] |= rev & 0xFF;
+				if (index < 16)
+					pDst_bytes[index++] |= (rev >> 8);
+			}
+
+			break;
+		}
+		default:
+			assert(0);
+			break;
+		}
+
+		return true;
+	}
+
+	const uint8_t* get_anchor_indices(uint32_t subsets, uint32_t mode, uint32_t common_pattern, const uint8_t*& pPartition_pattern)
+	{
+		const uint8_t* pSubset_anchor_indices = g_zero_pattern;
+		pPartition_pattern = g_zero_pattern;
+
+		if (subsets >= 2)
+		{
+			if (subsets == 3)
+			{
+				pPartition_pattern = &g_astc_bc7_patterns3[common_pattern][0];
+				pSubset_anchor_indices = &g_astc_bc7_pattern3_anchors[common_pattern][0];
+			}
+			else if (mode == 7)
+			{
+				pPartition_pattern = &g_bc7_3_astc2_patterns2[common_pattern][0];
+				pSubset_anchor_indices = &g_bc7_3_astc2_patterns2_anchors[common_pattern][0];
+			}
+			else
+			{
+				pPartition_pattern = &g_astc_bc7_patterns2[common_pattern][0];
+				pSubset_anchor_indices = &g_astc_bc7_pattern2_anchors[common_pattern][0];
+			}
+		}
+
+		return pSubset_anchor_indices;
+	}
+
+	static inline uint32_t read_bit(const uint8_t* pBuf, uint32_t& bit_offset)
+	{
+		uint32_t byte_bits = pBuf[bit_offset >> 3] >> (bit_offset & 7);
+		bit_offset += 1;
+		return byte_bits & 1;
+	}
+
+	static inline uint32_t read_bits1_to_9(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize)
+	{
+		assert(codesize <= 9);
+		if (!codesize)
+			return 0;
+
+		if ((BASISD_IS_BIG_ENDIAN) || (!BASISD_USE_UNALIGNED_WORD_READS) || (bit_offset >= 112))
+		{
+			const uint8_t* pBytes = &pBuf[bit_offset >> 3U];
+
+			uint32_t byte_bit_offset = bit_offset & 7U;
+
+			uint32_t bits = pBytes[0] >> byte_bit_offset;
+			uint32_t bits_read = basisu::minimum<int>(codesize, 8 - byte_bit_offset);
+
+			uint32_t bits_remaining = codesize - bits_read;
+			if (bits_remaining)
+				bits |= ((uint32_t)pBytes[1]) << bits_read;
+
+			bit_offset += codesize;
+
+			return bits & ((1U << codesize) - 1U);
+		}
+
+		uint32_t byte_bit_offset = bit_offset & 7U;
+		const uint16_t w = *(const uint16_t *)(&pBuf[bit_offset >> 3U]);
+		bit_offset += codesize;
+		return (w >> byte_bit_offset) & ((1U << codesize) - 1U);
+	}
+
+	inline uint64_t read_bits64(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize)
+	{
+		assert(codesize <= 64U);
+		uint64_t bits = 0;
+		uint32_t total_bits = 0;
+
+		while (total_bits < codesize)
+		{
+			uint32_t byte_bit_offset = bit_offset & 7U;
+			uint32_t bits_to_read = basisu::minimum<int>(codesize - total_bits, 8U - byte_bit_offset);
+
+			uint32_t byte_bits = pBuf[bit_offset >> 3U] >> byte_bit_offset;
+			byte_bits &= ((1U << bits_to_read) - 1U);
+
+			bits |= ((uint64_t)(byte_bits) << total_bits);
+
+			total_bits += bits_to_read;
+			bit_offset += bits_to_read;
+		}
+
+		return bits;
+	}
+
+	static inline uint32_t read_bits1_to_9_fst(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize)
+	{
+		assert(codesize <= 9);
+		if (!codesize)
+			return 0;
+		assert(bit_offset < 112);
+
+		if ((BASISD_IS_BIG_ENDIAN) || (!BASISD_USE_UNALIGNED_WORD_READS))
+		{
+			const uint8_t* pBytes = &pBuf[bit_offset >> 3U];
+
+			uint32_t byte_bit_offset = bit_offset & 7U;
+
+			uint32_t bits = pBytes[0] >> byte_bit_offset;
+			uint32_t bits_read = basisu::minimum<int>(codesize, 8 - byte_bit_offset);
+
+			uint32_t bits_remaining = codesize - bits_read;
+			if (bits_remaining)
+				bits |= ((uint32_t)pBytes[1]) << bits_read;
+
+			bit_offset += codesize;
+
+			return bits & ((1U << codesize) - 1U);
+		}
+
+		uint32_t byte_bit_offset = bit_offset & 7U;
+		const uint16_t w = *(const uint16_t*)(&pBuf[bit_offset >> 3U]);
+		bit_offset += codesize;
+		return (w >> byte_bit_offset)& ((1U << codesize) - 1U);
+	}
+
+	bool unpack_uastc(const uastc_block& blk, unpacked_uastc_block& unpacked, bool blue_contract_check, bool read_hints)
+	{
+		//memset(&unpacked, 0, sizeof(unpacked));
+				
+#if 0
+		uint8_t table[128];
+		memset(table, 0xFF, sizeof(table));
+
+		{
+			for (uint32_t mode = 0; mode <= TOTAL_UASTC_MODES; mode++)
+			{
+				const uint32_t code = g_uastc_mode_huff_codes[mode][0];
+				const uint32_t codesize = g_uastc_mode_huff_codes[mode][1];
+
+				table[code] = mode;
+
+				uint32_t bits_left = 7 - codesize;
+				for (uint32_t i = 0; i < (1 << bits_left); i++)
+					table[code | (i << codesize)] = mode;
+			}
+
+			for (uint32_t i = 0; i < 128; i++)
+				printf("%u,", table[i]);
+			exit(0);
+		}
+#endif
+
+		const int mode = g_uastc_huff_modes[blk.m_bytes[0] & 127];
+		if (mode >= (int)TOTAL_UASTC_MODES)
+			return false;
+
+		unpacked.m_mode = mode;
+
+		uint32_t bit_ofs = g_uastc_mode_huff_codes[mode][1];
+
+		if (mode == UASTC_MODE_INDEX_SOLID_COLOR)
+		{
+			unpacked.m_solid_color.r = (uint8_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 8);
+			unpacked.m_solid_color.g = (uint8_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 8);
+			unpacked.m_solid_color.b = (uint8_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 8);
+			unpacked.m_solid_color.a = (uint8_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 8);
+
+			if (read_hints)
+			{
+				unpacked.m_etc1_flip = false;
+				unpacked.m_etc1_diff = read_bit(blk.m_bytes, bit_ofs) != 0;
+				unpacked.m_etc1_inten0 = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 3);
+				unpacked.m_etc1_inten1 = 0;
+				unpacked.m_etc1_selector = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 2);
+				unpacked.m_etc1_r = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 5);
+				unpacked.m_etc1_g = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 5);
+				unpacked.m_etc1_b = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 5);
+				unpacked.m_etc1_bias = 0;
+				unpacked.m_etc2_hints = 0;
+			}
+
+			return true;
+		}
+				
+		if (read_hints)
+		{
+			if (g_uastc_mode_has_bc1_hint0[mode])
+				unpacked.m_bc1_hint0 = read_bit(blk.m_bytes, bit_ofs) != 0;
+			else
+				unpacked.m_bc1_hint0 = false;
+
+			if (g_uastc_mode_has_bc1_hint1[mode])
+				unpacked.m_bc1_hint1 = read_bit(blk.m_bytes, bit_ofs) != 0;
+			else
+				unpacked.m_bc1_hint1 = false;
+
+			unpacked.m_etc1_flip = read_bit(blk.m_bytes, bit_ofs) != 0;
+			unpacked.m_etc1_diff = read_bit(blk.m_bytes, bit_ofs) != 0;
+			unpacked.m_etc1_inten0 = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 3);
+			unpacked.m_etc1_inten1 = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 3);
+
+			if (g_uastc_mode_has_etc1_bias[mode])
+				unpacked.m_etc1_bias = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 5);
+			else
+				unpacked.m_etc1_bias = 0;
+
+			if (g_uastc_mode_has_alpha[mode])
+			{
+				unpacked.m_etc2_hints = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 8);
+				//assert(unpacked.m_etc2_hints > 0);
+			}
+			else
+				unpacked.m_etc2_hints = 0;
+		}
+		else
+			bit_ofs += g_uastc_mode_total_hint_bits[mode];
+				
+		uint32_t subsets = 1;
+		switch (mode)
+		{
+		case 2:
+		case 4:
+		case 7:
+		case 9:
+		case 16:
+			unpacked.m_common_pattern = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 5);
+			subsets = 2;
+			break;
+		case 3:
+			unpacked.m_common_pattern = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 4);
+			subsets = 3;
+			break;
+		default:
+			break;
+		}
+
+		uint32_t part_seed = 0;
+		switch (mode)
+		{
+		case 2:
+		case 4:
+		case 9:
+		case 16:
+			if (unpacked.m_common_pattern >= TOTAL_ASTC_BC7_COMMON_PARTITIONS2)
+				return false;
+
+			part_seed = g_astc_bc7_common_partitions2[unpacked.m_common_pattern].m_astc;
+			break;
+		case 3:
+			if (unpacked.m_common_pattern >= TOTAL_ASTC_BC7_COMMON_PARTITIONS3)
+				return false;
+
+			part_seed = g_astc_bc7_common_partitions3[unpacked.m_common_pattern].m_astc;
+			break;
+		case 7:
+			if (unpacked.m_common_pattern >= TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS)
+				return false;
+
+			part_seed = g_bc7_3_astc2_common_partitions[unpacked.m_common_pattern].m_astc2;
+			break;
+		default:
+			break;
+		}
+
+		uint32_t total_planes = 1;
+		switch (mode)
+		{
+		case 6:
+		case 11:
+		case 13:
+			unpacked.m_astc.m_ccs = (int)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 2);
+			total_planes = 2;
+			break;
+		case 17:
+			unpacked.m_astc.m_ccs = 3;
+			total_planes = 2;
+			break;
+		default:
+			break;
+		}
+
+		unpacked.m_astc.m_dual_plane = (total_planes == 2);
+
+		unpacked.m_astc.m_subsets = subsets;
+		unpacked.m_astc.m_partition_seed = part_seed;
+
+		const uint32_t total_comps = g_uastc_mode_comps[mode];
+
+		const uint32_t weight_bits = g_uastc_mode_weight_bits[mode];
+
+		unpacked.m_astc.m_weight_range = g_uastc_mode_weight_ranges[mode];
+
+		const uint32_t total_values = total_comps * 2 * subsets;
+		const uint32_t endpoint_range = g_uastc_mode_endpoint_ranges[mode];
+
+		const uint32_t cem = g_uastc_mode_cem[mode];
+		unpacked.m_astc.m_cem = cem;
+
+		const uint32_t ep_bits = g_astc_bise_range_table[endpoint_range][0];
+		const uint32_t ep_trits = g_astc_bise_range_table[endpoint_range][1];
+		const uint32_t ep_quints = g_astc_bise_range_table[endpoint_range][2];
+
+		uint32_t total_tqs = 0;
+		uint32_t bundle_size = 0, mul = 0;
+		if (ep_trits)
+		{
+			total_tqs = (total_values + 4) / 5;
+			bundle_size = 5;
+			mul = 3;
+		}
+		else if (ep_quints)
+		{
+			total_tqs = (total_values + 2) / 3;
+			bundle_size = 3;
+			mul = 5;
+		}
+
+		uint32_t tq_values[8];
+		for (uint32_t i = 0; i < total_tqs; i++)
+		{
+			uint32_t num_bits = ep_trits ? 8 : 7;
+			if (i == (total_tqs - 1))
+			{
+				uint32_t num_remaining = total_values - (total_tqs - 1) * bundle_size;
+				if (ep_trits)
+				{
+					switch (num_remaining)
+					{
+					case 1: num_bits = 2; break;
+					case 2: num_bits = 4; break;
+					case 3: num_bits = 5; break;
+					case 4: num_bits = 7; break;
+					default: break;
+					}
+				}
+				else if (ep_quints)
+				{
+					switch (num_remaining)
+					{
+					case 1: num_bits = 3; break;
+					case 2: num_bits = 5; break;
+					default: break;
+					}
+				}
+			}
+
+			tq_values[i] = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, num_bits);
+		} // i
+
+		uint32_t accum = 0;
+		uint32_t accum_remaining = 0;
+		uint32_t next_tq_index = 0;
+
+		for (uint32_t i = 0; i < total_values; i++)
+		{
+			uint32_t value = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, ep_bits);
+
+			if (total_tqs)
+			{
+				if (!accum_remaining)
+				{
+					assert(next_tq_index < total_tqs);
+					accum = tq_values[next_tq_index++];
+					accum_remaining = bundle_size;
+				}
+
+				// TODO: Optimize with tables
+				uint32_t v = accum % mul;
+				accum /= mul;
+				accum_remaining--;
+
+				value |= (v << ep_bits);
+			}
+
+			unpacked.m_astc.m_endpoints[i] = (uint8_t)value;
+		}
+
+		const uint8_t* pPartition_pattern;
+		const uint8_t* pSubset_anchor_indices = get_anchor_indices(subsets, mode, unpacked.m_common_pattern, pPartition_pattern);
+
+#ifdef _DEBUG
+		for (uint32_t i = 0; i < 16; i++)
+			assert(pPartition_pattern[i] == astc_compute_texel_partition(part_seed, i & 3, i >> 2, 0, subsets, true));
+
+		for (uint32_t subset_index = 0; subset_index < subsets; subset_index++)
+		{
+			uint32_t anchor_index = 0;
+
+			for (uint32_t i = 0; i < 16; i++)
+			{
+				if (pPartition_pattern[i] == subset_index)
+				{
+					anchor_index = i;
+					break;
+				}
+			}
+
+			assert(pSubset_anchor_indices[subset_index] == anchor_index);
+		}
+#endif
+
+#if 0
+		const uint32_t total_planes_shift = total_planes - 1;
+		for (uint32_t i = 0; i < 16 * total_planes; i++)
+		{
+			uint32_t num_bits = weight_bits;
+			for (uint32_t s = 0; s < subsets; s++)
+			{
+				if (pSubset_anchor_indices[s] == (i >> total_planes_shift))
+				{
+					num_bits--;
+					break;
+				}
+			}
+
+			unpacked.m_astc.m_weights[i] = (uint8_t)read_bits1_to_9(blk.m_bytes, bit_ofs, num_bits);
+		}
+#endif
+
+		if (mode == 18)
+		{
+			// Mode 18 is the only mode with more than 64 weight bits.
+			for (uint32_t i = 0; i < 16; i++)
+				unpacked.m_astc.m_weights[i] = (uint8_t)read_bits1_to_9(blk.m_bytes, bit_ofs, i ? weight_bits : (weight_bits - 1));
+		}
+		else
+		{
+			// All other modes have <= 64 weight bits.
+			uint64_t bits;
+			
+			// Read the weight bits
+			if ((BASISD_IS_BIG_ENDIAN) || (!BASISD_USE_UNALIGNED_WORD_READS))
+				bits = read_bits64(blk.m_bytes, bit_ofs, basisu::minimum<int>(64, 128 - (int)bit_ofs));
+			else
+			{
+#ifdef __EMSCRIPTEN__
+				bits = blk.m_dwords[2];
+				bits |= (((uint64_t)blk.m_dwords[3]) << 32U);
+#else
+				bits = blk.m_qwords[1];
+#endif
+				
+				if (bit_ofs >= 64U)
+					bits >>= (bit_ofs - 64U);
+				else
+				{
+					assert(bit_ofs >= 56U);
+					
+					uint32_t bits_needed = 64U - bit_ofs;
+					bits <<= bits_needed;
+					bits |= (blk.m_bytes[7] >> (8U - bits_needed));
+				}
+			}
+						
+			bit_ofs = 0;
+
+			const uint32_t mask = (1U << weight_bits) - 1U;
+			const uint32_t anchor_mask = (1U << (weight_bits - 1U)) - 1U;
+			
+			if (total_planes == 2)
+			{
+				// Dual plane modes always have a single subset, and the first 2 weights are anchors.
+
+				unpacked.m_astc.m_weights[0] = (uint8_t)((uint32_t)(bits >> bit_ofs) & anchor_mask);
+				bit_ofs += (weight_bits - 1);
+				
+				unpacked.m_astc.m_weights[1] = (uint8_t)((uint32_t)(bits >> bit_ofs) & anchor_mask);
+				bit_ofs += (weight_bits - 1);
+
+				for (uint32_t i = 2; i < 32; i++)
+				{
+					unpacked.m_astc.m_weights[i] = (uint8_t)((uint32_t)(bits >> bit_ofs) & mask);
+					bit_ofs += weight_bits;
+				}
+			}
+			else
+			{
+				if (subsets == 1)
+				{
+					// Specialize the single subset case.
+					if (weight_bits == 4)
+					{
+						assert(bit_ofs == 0);
+						
+						// Specialize the most common case: 4-bit weights.
+						unpacked.m_astc.m_weights[0] = (uint8_t)((uint32_t)(bits) & 7);
+						unpacked.m_astc.m_weights[1] = (uint8_t)((uint32_t)(bits >> 3) & 15);
+						unpacked.m_astc.m_weights[2] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 1)) & 15);
+						unpacked.m_astc.m_weights[3] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 2)) & 15);
+
+						unpacked.m_astc.m_weights[4] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 3)) & 15);
+						unpacked.m_astc.m_weights[5] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 4)) & 15);
+						unpacked.m_astc.m_weights[6] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 5)) & 15);
+						unpacked.m_astc.m_weights[7] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 6)) & 15);
+
+						unpacked.m_astc.m_weights[8] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 7)) & 15);
+						unpacked.m_astc.m_weights[9] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 8)) & 15);
+						unpacked.m_astc.m_weights[10] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 9)) & 15);
+						unpacked.m_astc.m_weights[11] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 10)) & 15);
+
+						unpacked.m_astc.m_weights[12] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 11)) & 15);
+						unpacked.m_astc.m_weights[13] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 12)) & 15);
+						unpacked.m_astc.m_weights[14] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 13)) & 15);
+						unpacked.m_astc.m_weights[15] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 14)) & 15);
+					}
+					else
+					{
+						// First weight is always an anchor.
+						unpacked.m_astc.m_weights[0] = (uint8_t)((uint32_t)(bits >> bit_ofs) & anchor_mask);
+						bit_ofs += (weight_bits - 1);
+
+						for (uint32_t i = 1; i < 16; i++)
+						{
+							unpacked.m_astc.m_weights[i] = (uint8_t)((uint32_t)(bits >> bit_ofs) & mask);
+							bit_ofs += weight_bits;
+						}
+					}
+				}
+				else
+				{
+					const uint32_t a0 = pSubset_anchor_indices[0], a1 = pSubset_anchor_indices[1], a2 = pSubset_anchor_indices[2];
+
+					for (uint32_t i = 0; i < 16; i++)
+					{
+						if ((i == a0) || (i == a1) || (i == a2))
+						{
+							unpacked.m_astc.m_weights[i] = (uint8_t)((uint32_t)(bits >> bit_ofs) & anchor_mask);
+							bit_ofs += (weight_bits - 1);
+						}
+						else
+						{
+							unpacked.m_astc.m_weights[i] = (uint8_t)((uint32_t)(bits >> bit_ofs) & mask);
+							bit_ofs += weight_bits;
+						}
+					}
+				}
+			}
+		}
+
+		if ((blue_contract_check) && (total_comps >= 3))
+		{
+			// We only need to disable ASTC Blue Contraction when we'll be packing to ASTC. The other transcoders don't care.
+			bool invert_subset[3] = { false, false, false };
+			bool any_flag = false;
+
+			for (uint32_t subset_index = 0; subset_index < subsets; subset_index++)
+			{
+				const int s0 = g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 0]].m_unquant +
+					g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 2]].m_unquant +
+					g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 4]].m_unquant;
+
+				const int s1 = g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 1]].m_unquant +
+					g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 3]].m_unquant +
+					g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 5]].m_unquant;
+
+				if (s1 < s0)
+				{
+					for (uint32_t c = 0; c < total_comps; c++)
+						std::swap(unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + c * 2 + 0], unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + c * 2 + 1]);
+
+					invert_subset[subset_index] = true;
+					any_flag = true;
+				}
+			}
+
+			if (any_flag)
+			{
+				const uint32_t weight_mask = (1 << weight_bits) - 1;
+
+				for (uint32_t i = 0; i < 16; i++)
+				{
+					uint32_t subset = pPartition_pattern[i];
+
+					if (invert_subset[subset])
+					{
+						unpacked.m_astc.m_weights[i * total_planes] = (uint8_t)(weight_mask - unpacked.m_astc.m_weights[i * total_planes]);
+
+						if (total_planes == 2)
+							unpacked.m_astc.m_weights[i * total_planes + 1] = (uint8_t)(weight_mask - unpacked.m_astc.m_weights[i * total_planes + 1]);
+					}
+				}
+			}
+		}
+
+		return true;
+	}
+
+	static const uint32_t* g_astc_weight_tables[6] = { nullptr, g_bc7_weights1, g_bc7_weights2, g_bc7_weights3, g_astc_weights4, g_astc_weights5 };
+
+	bool unpack_uastc(uint32_t mode, uint32_t common_pattern, const color32& solid_color, const astc_block_desc& astc, color32* pPixels, bool srgb)
+	{
+		if (mode == UASTC_MODE_INDEX_SOLID_COLOR)
+		{
+			for (uint32_t i = 0; i < 16; i++)
+				pPixels[i] = solid_color;
+			return true;
+		}
+
+		color32 endpoints[3][2];
+
+		const uint32_t total_subsets = g_uastc_mode_subsets[mode];
+		const uint32_t total_comps = basisu::minimum<uint32_t>(4U, g_uastc_mode_comps[mode]);
+		const uint32_t endpoint_range = g_uastc_mode_endpoint_ranges[mode];
+		const uint32_t total_planes = g_uastc_mode_planes[mode];
+		const uint32_t weight_bits = g_uastc_mode_weight_bits[mode];
+		const uint32_t weight_levels = 1 << weight_bits;
+
+		for (uint32_t subset_index = 0; subset_index < total_subsets; subset_index++)
+		{
+			if (total_comps == 2)
+			{
+				const uint32_t ll = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + 0 * 2 + 0]].m_unquant;
+				const uint32_t lh = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + 0 * 2 + 1]].m_unquant;
+
+				const uint32_t al = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + 1 * 2 + 0]].m_unquant;
+				const uint32_t ah = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + 1 * 2 + 1]].m_unquant;
+
+				endpoints[subset_index][0].set_noclamp_rgba(ll, ll, ll, al);
+				endpoints[subset_index][1].set_noclamp_rgba(lh, lh, lh, ah);
+			}
+			else
+			{
+				for (uint32_t comp_index = 0; comp_index < total_comps; comp_index++)
+				{
+					endpoints[subset_index][0][comp_index] = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + comp_index * 2 + 0]].m_unquant;
+					endpoints[subset_index][1][comp_index] = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + comp_index * 2 + 1]].m_unquant;
+				}
+				for (uint32_t comp_index = total_comps; comp_index < 4; comp_index++)
+				{
+					endpoints[subset_index][0][comp_index] = 255;
+					endpoints[subset_index][1][comp_index] = 255;
+				}
+			}
+		}
+
+		color32 block_colors[3][32];
+
+		const uint32_t* pWeights = g_astc_weight_tables[weight_bits];
+
+		for (uint32_t subset_index = 0; subset_index < total_subsets; subset_index++)
+		{
+			for (uint32_t l = 0; l < weight_levels; l++)
+			{
+				if (total_comps == 2)
+				{
+					const uint8_t lc = (uint8_t)astc_interpolate(endpoints[subset_index][0][0], endpoints[subset_index][1][0], pWeights[l], srgb);
+					const uint8_t ac = (uint8_t)astc_interpolate(endpoints[subset_index][0][3], endpoints[subset_index][1][3], pWeights[l], srgb);
+
+					block_colors[subset_index][l].set(lc, lc, lc, ac);
+				}
+				else
+				{
+					uint32_t comp_index;
+					for (comp_index = 0; comp_index < total_comps; comp_index++)
+						block_colors[subset_index][l][comp_index] = (uint8_t)astc_interpolate(endpoints[subset_index][0][comp_index], endpoints[subset_index][1][comp_index], pWeights[l], srgb);
+
+					for (; comp_index < 4; comp_index++)
+						block_colors[subset_index][l][comp_index] = 255;
+				}
+			}
+		}
+
+		const uint8_t* pPartition_pattern = g_zero_pattern;
+
+		if (total_subsets >= 2)
+		{
+			if (total_subsets == 3)
+				pPartition_pattern = &g_astc_bc7_patterns3[common_pattern][0];
+			else if (mode == 7)
+				pPartition_pattern = &g_bc7_3_astc2_patterns2[common_pattern][0];
+			else
+				pPartition_pattern = &g_astc_bc7_patterns2[common_pattern][0];
+
+#ifdef _DEBUG
+			for (uint32_t i = 0; i < 16; i++)
+			{
+				assert(pPartition_pattern[i] == (uint8_t)astc_compute_texel_partition(astc.m_partition_seed, i & 3, i >> 2, 0, total_subsets, true));
+			}
+#endif
+		}
+
+		if (total_planes == 1)
+		{
+			if (total_subsets == 1)
+			{
+				for (uint32_t i = 0; i < 16; i++)
+				{
+					assert(astc.m_weights[i] < weight_levels);
+					pPixels[i] = block_colors[0][astc.m_weights[i]];
+				}
+			}
+			else
+			{
+				for (uint32_t i = 0; i < 16; i++)
+				{
+					assert(astc.m_weights[i] < weight_levels);
+					pPixels[i] = block_colors[pPartition_pattern[i]][astc.m_weights[i]];
+				}
+			}
+		}
+		else
+		{
+			assert(total_subsets == 1);
+
+			for (uint32_t i = 0; i < 16; i++)
+			{
+				const uint32_t subset_index = 0; // pPartition_pattern[i];
+
+				const uint32_t weight_index0 = astc.m_weights[i * 2];
+				const uint32_t weight_index1 = astc.m_weights[i * 2 + 1];
+
+				assert(weight_index0 < weight_levels && weight_index1 < weight_levels);
+
+				color32& c = pPixels[i];
+				for (uint32_t comp = 0; comp < 4; comp++)
+				{
+					if ((int)comp == astc.m_ccs)
+						c[comp] = block_colors[subset_index][weight_index1][comp];
+					else
+						c[comp] = block_colors[subset_index][weight_index0][comp];
+				}
+			}
+		}
+
+		return true;
+	}
+
+	bool unpack_uastc(const unpacked_uastc_block& unpacked_blk, color32* pPixels, bool srgb)
+	{
+		return unpack_uastc(unpacked_blk.m_mode, unpacked_blk.m_common_pattern, unpacked_blk.m_solid_color, unpacked_blk.m_astc, pPixels, srgb);
+	}
+
+	bool unpack_uastc(const uastc_block& blk, color32* pPixels, bool srgb)
+	{
+		unpacked_uastc_block unpacked_blk;
+
+		if (!unpack_uastc(blk, unpacked_blk, false, false))
+			return false;
+
+		return unpack_uastc(unpacked_blk, pPixels, srgb);
+	}
+
+	// Determines the best shared pbits to use to encode xl/xh
+	static void determine_shared_pbits(
+		uint32_t total_comps, uint32_t comp_bits, float xl[4], float xh[4],
+		color_quad_u8& bestMinColor, color_quad_u8& bestMaxColor, uint32_t best_pbits[2])
+	{
+		const uint32_t total_bits = comp_bits + 1;
+		assert(total_bits >= 4 && total_bits <= 8);
+
+		const int iscalep = (1 << total_bits) - 1;
+		const float scalep = (float)iscalep;
+
+		float best_err = 1e+9f;
+
+		for (int p = 0; p < 2; p++)
+		{
+			color_quad_u8 xMinColor, xMaxColor;
+			for (uint32_t c = 0; c < 4; c++)
+			{
+				xMinColor.m_c[c] = (uint8_t)(clampi(((int)((xl[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p));
+				xMaxColor.m_c[c] = (uint8_t)(clampi(((int)((xh[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p));
+			}
+
+			color_quad_u8 scaledLow, scaledHigh;
+
+			for (uint32_t i = 0; i < 4; i++)
+			{
+				scaledLow.m_c[i] = (xMinColor.m_c[i] << (8 - total_bits));
+				scaledLow.m_c[i] |= (scaledLow.m_c[i] >> total_bits);
+				assert(scaledLow.m_c[i] <= 255);
+
+				scaledHigh.m_c[i] = (xMaxColor.m_c[i] << (8 - total_bits));
+				scaledHigh.m_c[i] |= (scaledHigh.m_c[i] >> total_bits);
+				assert(scaledHigh.m_c[i] <= 255);
+			}
+
+			float err = 0;
+			for (uint32_t i = 0; i < total_comps; i++)
+				err += basisu::squaref((scaledLow.m_c[i] / 255.0f) - xl[i]) + basisu::squaref((scaledHigh.m_c[i] / 255.0f) - xh[i]);
+
+			if (err < best_err)
+			{
+				best_err = err;
+				best_pbits[0] = p;
+				best_pbits[1] = p;
+				for (uint32_t j = 0; j < 4; j++)
+				{
+					bestMinColor.m_c[j] = xMinColor.m_c[j] >> 1;
+					bestMaxColor.m_c[j] = xMaxColor.m_c[j] >> 1;
+				}
+			}
+		}
+	}
+
+	// Determines the best unique pbits to use to encode xl/xh
+	static void determine_unique_pbits(
+		uint32_t total_comps, uint32_t comp_bits, float xl[4], float xh[4],
+		color_quad_u8& bestMinColor, color_quad_u8& bestMaxColor, uint32_t best_pbits[2])
+	{
+		const uint32_t total_bits = comp_bits + 1;
+		const int iscalep = (1 << total_bits) - 1;
+		const float scalep = (float)iscalep;
+
+		float best_err0 = 1e+9f;
+		float best_err1 = 1e+9f;
+
+		for (int p = 0; p < 2; p++)
+		{
+			color_quad_u8 xMinColor, xMaxColor;
+
+			for (uint32_t c = 0; c < 4; c++)
+			{
+				xMinColor.m_c[c] = (uint8_t)(clampi(((int)((xl[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p));
+				xMaxColor.m_c[c] = (uint8_t)(clampi(((int)((xh[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p));
+			}
+
+			color_quad_u8 scaledLow, scaledHigh;
+			for (uint32_t i = 0; i < 4; i++)
+			{
+				scaledLow.m_c[i] = (xMinColor.m_c[i] << (8 - total_bits));
+				scaledLow.m_c[i] |= (scaledLow.m_c[i] >> total_bits);
+				assert(scaledLow.m_c[i] <= 255);
+
+				scaledHigh.m_c[i] = (xMaxColor.m_c[i] << (8 - total_bits));
+				scaledHigh.m_c[i] |= (scaledHigh.m_c[i] >> total_bits);
+				assert(scaledHigh.m_c[i] <= 255);
+			}
+
+			float err0 = 0, err1 = 0;
+			for (uint32_t i = 0; i < total_comps; i++)
+			{
+				err0 += basisu::squaref(scaledLow.m_c[i] - xl[i] * 255.0f);
+				err1 += basisu::squaref(scaledHigh.m_c[i] - xh[i] * 255.0f);
+			}
+
+			if (err0 < best_err0)
+			{
+				best_err0 = err0;
+				best_pbits[0] = p;
+
+				bestMinColor.m_c[0] = xMinColor.m_c[0] >> 1;
+				bestMinColor.m_c[1] = xMinColor.m_c[1] >> 1;
+				bestMinColor.m_c[2] = xMinColor.m_c[2] >> 1;
+				bestMinColor.m_c[3] = xMinColor.m_c[3] >> 1;
+			}
+
+			if (err1 < best_err1)
+			{
+				best_err1 = err1;
+				best_pbits[1] = p;
+
+				bestMaxColor.m_c[0] = xMaxColor.m_c[0] >> 1;
+				bestMaxColor.m_c[1] = xMaxColor.m_c[1] >> 1;
+				bestMaxColor.m_c[2] = xMaxColor.m_c[2] >> 1;
+				bestMaxColor.m_c[3] = xMaxColor.m_c[3] >> 1;
+			}
+		}
+	}
+
+	bool transcode_uastc_to_astc(const uastc_block& src_blk, void* pDst)
+	{
+		unpacked_uastc_block unpacked_src_blk;
+		if (!unpack_uastc(src_blk, unpacked_src_blk, true, false))
+			return false;
+
+		bool success = false;
+		if (unpacked_src_blk.m_mode == UASTC_MODE_INDEX_SOLID_COLOR)
+		{
+			pack_astc_solid_block(pDst, unpacked_src_blk.m_solid_color);
+			success = true;
+		}
+		else
+		{
+			success = pack_astc_block(static_cast<uint32_t*>(pDst), &unpacked_src_blk.m_astc, unpacked_src_blk.m_mode);
+		}
+
+		return success;
+	}
+
+	bool transcode_uastc_to_bc7(const unpacked_uastc_block& unpacked_src_blk, bc7_optimization_results& dst_blk)
+	{
+		memset(&dst_blk, 0, sizeof(dst_blk));
+
+		const uint32_t mode = unpacked_src_blk.m_mode;
+
+		const uint32_t endpoint_range = g_uastc_mode_endpoint_ranges[mode];
+		const uint32_t total_comps = g_uastc_mode_comps[mode];
+
+		switch (mode)
+		{
+		case 0:
+		case 5:
+		case 10:
+		case 12:
+		case 14:
+		case 15:
+		case 18:
+		{
+			// MODE 0: DualPlane: 0, WeightRange: 8 (16), Subsets: 1, EndpointRange: 19 (192) - BC7 MODE6 RGB
+			// MODE 5: DualPlane: 0, WeightRange : 5 (8), Subsets : 1, EndpointRange : 20 (256) - BC7 MODE6 RGB
+			// MODE 10 DualPlane: 0, WeightRange: 8 (16), Subsets: 1, EndpointRange: 13 (48) - BC7 MODE6
+			// MODE 12: DualPlane: 0, WeightRange : 5 (8), Subsets : 1, EndpointRange : 19 (192) - BC7 MODE6
+			// MODE 14: DualPlane: 0, WeightRange : 2 (4), Subsets : 1, EndpointRange : 20 (256) - BC7 MODE6
+			// MODE 18: DualPlane: 0, WeightRange : 11 (32), Subsets : 1, CEM : 8, EndpointRange : 11 (32) - BC7 MODE6
+			// MODE 15: DualPlane: 0, WeightRange : 8 (16), Subsets : 1, CEM : 4 (LA Direct), EndpointRange : 20 (256) - BC7 MODE6
+			dst_blk.m_mode = 6;
+
+			float xl[4], xh[4];
+			if (total_comps == 2)
+			{
+				xl[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[0]].m_unquant / 255.0f;
+				xh[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[1]].m_unquant / 255.0f;
+
+				xl[1] = xl[0];
+				xh[1] = xh[0];
+
+				xl[2] = xl[0];
+				xh[2] = xh[0];
+
+				xl[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[2]].m_unquant / 255.0f;
+				xh[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[3]].m_unquant / 255.0f;
+			}
+			else
+			{
+				xl[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[0]].m_unquant / 255.0f;
+				xl[1] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[2]].m_unquant / 255.0f;
+				xl[2] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[4]].m_unquant / 255.0f;
+
+				xh[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[1]].m_unquant / 255.0f;
+				xh[1] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[3]].m_unquant / 255.0f;
+				xh[2] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[5]].m_unquant / 255.0f;
+
+				if (total_comps == 4)
+				{
+					xl[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[6]].m_unquant / 255.0f;
+					xh[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[7]].m_unquant / 255.0f;
+				}
+				else
+				{
+					xl[3] = 1.0f;
+					xh[3] = 1.0f;
+				}
+			}
+
+			uint32_t best_pbits[2];
+			color_quad_u8 bestMinColor, bestMaxColor;
+			determine_unique_pbits((total_comps == 2) ? 4 : total_comps, 7, xl, xh, bestMinColor, bestMaxColor, best_pbits);
+
+			dst_blk.m_low[0] = bestMinColor;
+			dst_blk.m_high[0] = bestMaxColor;
+
+			if (total_comps == 3)
+			{
+				dst_blk.m_low[0].m_c[3] = 127;
+				dst_blk.m_high[0].m_c[3] = 127;
+			}
+
+			dst_blk.m_pbits[0][0] = best_pbits[0];
+			dst_blk.m_pbits[0][1] = best_pbits[1];
+
+			if (mode == 18)
+			{
+				const uint8_t s_bc7_5_to_4[32] = { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 6, 7, 8, 9, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15 };
+				for (uint32_t i = 0; i < 16; i++)
+					dst_blk.m_selectors[i] = s_bc7_5_to_4[unpacked_src_blk.m_astc.m_weights[i]];
+			}
+			else if (mode == 14)
+			{
+				const uint8_t s_bc7_2_to_4[4] = { 0, 5, 10, 15 };
+				for (uint32_t i = 0; i < 16; i++)
+					dst_blk.m_selectors[i] = s_bc7_2_to_4[unpacked_src_blk.m_astc.m_weights[i]];
+			}
+			else if ((mode == 5) || (mode == 12))
+			{
+				const uint8_t s_bc7_3_to_4[8] = { 0, 2, 4, 6, 9, 11, 13, 15 };
+				for (uint32_t i = 0; i < 16; i++)
+					dst_blk.m_selectors[i] = s_bc7_3_to_4[unpacked_src_blk.m_astc.m_weights[i]];
+			}
+			else
+			{
+				for (uint32_t i = 0; i < 16; i++)
+					dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i];
+			}
+
+			break;
+		}
+		case 1:
+		{
+			// DualPlane: 0, WeightRange : 2 (4), Subsets : 1, EndpointRange : 20 (256) - BC7 MODE3
+			// Mode 1 uses endpoint range 20 - no need to use ASTC dequant tables.
+			dst_blk.m_mode = 3;
+
+			float xl[4], xh[4];
+			xl[0] = unpacked_src_blk.m_astc.m_endpoints[0] / 255.0f;
+			xl[1] = unpacked_src_blk.m_astc.m_endpoints[2] / 255.0f;
+			xl[2] = unpacked_src_blk.m_astc.m_endpoints[4] / 255.0f;
+			xl[3] = 1.0f;
+
+			xh[0] = unpacked_src_blk.m_astc.m_endpoints[1] / 255.0f;
+			xh[1] = unpacked_src_blk.m_astc.m_endpoints[3] / 255.0f;
+			xh[2] = unpacked_src_blk.m_astc.m_endpoints[5] / 255.0f;
+			xh[3] = 1.0f;
+
+			uint32_t best_pbits[2];
+			color_quad_u8 bestMinColor, bestMaxColor;
+			memset(&bestMinColor, 0, sizeof(bestMinColor));
+			memset(&bestMaxColor, 0, sizeof(bestMaxColor));
+			determine_unique_pbits(3, 7, xl, xh, bestMinColor, bestMaxColor, best_pbits);
+
+			for (uint32_t i = 0; i < 3; i++)
+			{
+				dst_blk.m_low[0].m_c[i] = bestMinColor.m_c[i];
+				dst_blk.m_high[0].m_c[i] = bestMaxColor.m_c[i];
+				dst_blk.m_low[1].m_c[i] = bestMinColor.m_c[i];
+				dst_blk.m_high[1].m_c[i] = bestMaxColor.m_c[i];
+			}
+			dst_blk.m_pbits[0][0] = best_pbits[0];
+			dst_blk.m_pbits[0][1] = best_pbits[1];
+			dst_blk.m_pbits[1][0] = best_pbits[0];
+			dst_blk.m_pbits[1][1] = best_pbits[1];
+
+			for (uint32_t i = 0; i < 16; i++)
+				dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i];
+
+			break;
+		}
+		case 2:
+		{
+			// 2. DualPlane: 0, WeightRange : 5 (8), Subsets : 2, EndpointRange : 8 (16) - BC7 MODE1 
+			dst_blk.m_mode = 1;
+			dst_blk.m_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_bc7;
+
+			const bool invert_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_invert;
+
+			float xl[4], xh[4];
+			xl[3] = 1.0f;
+			xh[3] = 1.0f;
+
+			for (uint32_t subset = 0; subset < 2; subset++)
+			{
+				for (uint32_t i = 0; i < 3; i++)
+				{
+					uint32_t v = unpacked_src_blk.m_astc.m_endpoints[i * 2 + subset * 6];
+					v = (v << 4) | v;
+					xl[i] = v / 255.0f;
+
+					v = unpacked_src_blk.m_astc.m_endpoints[i * 2 + subset * 6 + 1];
+					v = (v << 4) | v;
+					xh[i] = v / 255.0f;
+				}
+
+				uint32_t best_pbits[2] = { 0, 0 };
+				color_quad_u8 bestMinColor, bestMaxColor;
+				memset(&bestMinColor, 0, sizeof(bestMinColor));
+				memset(&bestMaxColor, 0, sizeof(bestMaxColor));
+				determine_shared_pbits(3, 6, xl, xh, bestMinColor, bestMaxColor, best_pbits);
+
+				const uint32_t bc7_subset_index = invert_partition ? (1 - subset) : subset;
+
+				for (uint32_t i = 0; i < 3; i++)
+				{
+					dst_blk.m_low[bc7_subset_index].m_c[i] = bestMinColor.m_c[i];
+					dst_blk.m_high[bc7_subset_index].m_c[i] = bestMaxColor.m_c[i];
+				}
+
+				dst_blk.m_pbits[bc7_subset_index][0] = best_pbits[0];
+			} // subset
+
+			for (uint32_t i = 0; i < 16; i++)
+				dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i];
+
+			break;
+		}
+		case 3:
+		{
+			// DualPlane: 0, WeightRange : 2 (4), Subsets : 3, EndpointRange : 7 (12) - BC7 MODE2
+			dst_blk.m_mode = 2;
+			dst_blk.m_partition = g_astc_bc7_common_partitions3[unpacked_src_blk.m_common_pattern].m_bc7;
+
+			const uint32_t perm = g_astc_bc7_common_partitions3[unpacked_src_blk.m_common_pattern].m_astc_to_bc7_perm;
+
+			for (uint32_t subset = 0; subset < 3; subset++)
+			{
+				for (uint32_t comp = 0; comp < 3; comp++)
+				{
+					uint32_t lo = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[comp * 2 + 0 + subset * 6]].m_unquant;
+					uint32_t hi = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[comp * 2 + 1 + subset * 6]].m_unquant;
+
+					// TODO: I think this can be improved by using tables like Basis Universal does with ETC1S conversion.
+					lo = (lo * 31 + 127) / 255;
+					hi = (hi * 31 + 127) / 255;
+
+					const uint32_t bc7_subset_index = g_astc_to_bc7_partition_index_perm_tables[perm][subset];
+
+					dst_blk.m_low[bc7_subset_index].m_c[comp] = (uint8_t)lo;
+					dst_blk.m_high[bc7_subset_index].m_c[comp] = (uint8_t)hi;
+				}
+			}
+
+			for (uint32_t i = 0; i < 16; i++)
+				dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i];
+
+			break;
+		}
+		case 4:
+		{
+			// 4. DualPlane: 0, WeightRange: 2 (4), Subsets: 2, EndpointRange: 12 (40) - BC7 MODE3
+			dst_blk.m_mode = 3;
+			dst_blk.m_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_bc7;
+
+			const bool invert_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_invert;
+
+			float xl[4], xh[4];
+			xl[3] = 1.0f;
+			xh[3] = 1.0f;
+
+			for (uint32_t subset = 0; subset < 2; subset++)
+			{
+				for (uint32_t i = 0; i < 3; i++)
+				{
+					xl[i] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[i * 2 + subset * 6]].m_unquant / 255.0f;
+					xh[i] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[i * 2 + subset * 6 + 1]].m_unquant / 255.0f;
+				}
+
+				uint32_t best_pbits[2] = { 0, 0 };
+				color_quad_u8 bestMinColor, bestMaxColor;
+				memset(&bestMinColor, 0, sizeof(bestMinColor));
+				memset(&bestMaxColor, 0, sizeof(bestMaxColor));
+				determine_unique_pbits(3, 7, xl, xh, bestMinColor, bestMaxColor, best_pbits);
+
+				const uint32_t bc7_subset_index = invert_partition ? (1 - subset) : subset;
+
+				for (uint32_t i = 0; i < 3; i++)
+				{
+					dst_blk.m_low[bc7_subset_index].m_c[i] = bestMinColor.m_c[i];
+					dst_blk.m_high[bc7_subset_index].m_c[i] = bestMaxColor.m_c[i];
+				}
+				dst_blk.m_low[bc7_subset_index].m_c[3] = 127;
+				dst_blk.m_high[bc7_subset_index].m_c[3] = 127;
+
+				dst_blk.m_pbits[bc7_subset_index][0] = best_pbits[0];
+				dst_blk.m_pbits[bc7_subset_index][1] = best_pbits[1];
+
+			} // subset
+
+			for (uint32_t i = 0; i < 16; i++)
+				dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i];
+
+			break;
+		}
+		case 6:
+		case 11:
+		case 13:
+		case 17:
+		{
+			// MODE 6: DualPlane: 1, WeightRange : 2 (4), Subsets : 1, EndpointRange : 18 (160) - BC7 MODE5 RGB
+			// MODE 11: DualPlane: 1, WeightRange: 2 (4), Subsets: 1, EndpointRange: 13 (48) - BC7 MODE5
+			// MODE 13: DualPlane: 1, WeightRange: 0 (2), Subsets : 1, EndpointRange : 20 (256) - BC7 MODE5
+			// MODE 17: DualPlane: 1, WeightRange: 2 (4), Subsets: 1, CEM: 4 (LA Direct), EndpointRange: 20 (256) - BC7 MODE5
+			dst_blk.m_mode = 5;
+			dst_blk.m_rotation = (unpacked_src_blk.m_astc.m_ccs + 1) & 3;
+
+			if (total_comps == 2)
+			{
+				assert(unpacked_src_blk.m_astc.m_ccs == 3);
+
+				dst_blk.m_low->m_c[0] = (uint8_t)((g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[0]].m_unquant * 127 + 127) / 255);
+				dst_blk.m_high->m_c[0] = (uint8_t)((g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[1]].m_unquant * 127 + 127) / 255);
+
+				dst_blk.m_low->m_c[1] = dst_blk.m_low->m_c[0];
+				dst_blk.m_high->m_c[1] = dst_blk.m_high->m_c[0];
+
+				dst_blk.m_low->m_c[2] = dst_blk.m_low->m_c[0];
+				dst_blk.m_high->m_c[2] = dst_blk.m_high->m_c[0];
+
+				dst_blk.m_low->m_c[3] = (uint8_t)(g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[2]].m_unquant);
+				dst_blk.m_high->m_c[3] = (uint8_t)(g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[3]].m_unquant);
+			}
+			else
+			{
+				for (uint32_t astc_comp = 0; astc_comp < 4; astc_comp++)
+				{
+					uint32_t bc7_comp = astc_comp;
+					// ASTC and BC7 handle dual plane component rotations differently:
+					// ASTC: 2nd plane separately interpolates the CCS channel.
+					// BC7: 2nd plane channel is swapped with alpha, 2nd plane controls alpha interpolation, then we swap alpha with the desired channel.
+					if (astc_comp == (uint32_t)unpacked_src_blk.m_astc.m_ccs)
+						bc7_comp = 3;
+					else if (astc_comp == 3)
+						bc7_comp = unpacked_src_blk.m_astc.m_ccs;
+
+					uint32_t l = 255, h = 255;
+					if (astc_comp < total_comps)
+					{
+						l = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[astc_comp * 2 + 0]].m_unquant;
+						h = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[astc_comp * 2 + 1]].m_unquant;
+					}
+
+					if (bc7_comp < 3)
+					{
+						l = (l * 127 + 127) / 255;
+						h = (h * 127 + 127) / 255;
+					}
+
+					dst_blk.m_low->m_c[bc7_comp] = (uint8_t)l;
+					dst_blk.m_high->m_c[bc7_comp] = (uint8_t)h;
+				}
+			}
+
+			if (mode == 13)
+			{
+				for (uint32_t i = 0; i < 16; i++)
+				{
+					dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i * 2] ? 3 : 0;
+					dst_blk.m_alpha_selectors[i] = unpacked_src_blk.m_astc.m_weights[i * 2 + 1] ? 3 : 0;
+				}
+			}
+			else
+			{
+				for (uint32_t i = 0; i < 16; i++)
+				{
+					dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i * 2];
+					dst_blk.m_alpha_selectors[i] = unpacked_src_blk.m_astc.m_weights[i * 2 + 1];
+				}
+			}
+
+			break;
+		}
+		case 7:
+		{
+			// DualPlane: 0, WeightRange : 2 (4), Subsets : 2, EndpointRange : 12 (40) - BC7 MODE2
+			dst_blk.m_mode = 2;
+			dst_blk.m_partition = g_bc7_3_astc2_common_partitions[unpacked_src_blk.m_common_pattern].m_bc73;
+
+			const uint32_t common_pattern_k = g_bc7_3_astc2_common_partitions[unpacked_src_blk.m_common_pattern].k;
+
+			for (uint32_t bc7_part = 0; bc7_part < 3; bc7_part++)
+			{
+				const uint32_t astc_part = bc7_convert_partition_index_3_to_2(bc7_part, common_pattern_k);
+
+				for (uint32_t c = 0; c < 3; c++)
+				{
+					dst_blk.m_low[bc7_part].m_c[c] = (g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[c * 2 + 0 + astc_part * 6]].m_unquant * 31 + 127) / 255;
+					dst_blk.m_high[bc7_part].m_c[c] = (g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[c * 2 + 1 + astc_part * 6]].m_unquant * 31 + 127) / 255;
+				}
+			}
+
+			for (uint32_t i = 0; i < 16; i++)
+				dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i];
+
+			break;
+		}
+		case UASTC_MODE_INDEX_SOLID_COLOR:
+		{
+			// Void-Extent: Solid Color RGBA (BC7 MODE5 or MODE6)
+			const color32& solid_color = unpacked_src_blk.m_solid_color;
+
+			uint32_t best_err0 = g_bc7_mode_6_optimal_endpoints[solid_color.r][0].m_error + g_bc7_mode_6_optimal_endpoints[solid_color.g][0].m_error +
+				g_bc7_mode_6_optimal_endpoints[solid_color.b][0].m_error + g_bc7_mode_6_optimal_endpoints[solid_color.a][0].m_error;
+
+			uint32_t best_err1 = g_bc7_mode_6_optimal_endpoints[solid_color.r][1].m_error + g_bc7_mode_6_optimal_endpoints[solid_color.g][1].m_error +
+				g_bc7_mode_6_optimal_endpoints[solid_color.b][1].m_error + g_bc7_mode_6_optimal_endpoints[solid_color.a][1].m_error;
+
+			if (best_err0 > 0 && best_err1 > 0)
+			{
+				dst_blk.m_mode = 5;
+
+				for (uint32_t c = 0; c < 3; c++)
+				{
+					dst_blk.m_low[0].m_c[c] = g_bc7_mode_5_optimal_endpoints[solid_color.c[c]].m_lo;
+					dst_blk.m_high[0].m_c[c] = g_bc7_mode_5_optimal_endpoints[solid_color.c[c]].m_hi;
+				}
+
+				memset(dst_blk.m_selectors, BC7ENC_MODE_5_OPTIMAL_INDEX, 16);
+
+				dst_blk.m_low[0].m_c[3] = solid_color.c[3];
+				dst_blk.m_high[0].m_c[3] = solid_color.c[3];
+
+				//memset(dst_blk.m_alpha_selectors, 0, 16);
+			}
+			else
+			{
+				dst_blk.m_mode = 6;
+
+				uint32_t best_p = 0;
+				if (best_err1 < best_err0)
+					best_p = 1;
+
+				for (uint32_t c = 0; c < 4; c++)
+				{
+					dst_blk.m_low[0].m_c[c] = g_bc7_mode_6_optimal_endpoints[solid_color.c[c]][best_p].m_lo;
+					dst_blk.m_high[0].m_c[c] = g_bc7_mode_6_optimal_endpoints[solid_color.c[c]][best_p].m_hi;
+				}
+
+				dst_blk.m_pbits[0][0] = best_p;
+				dst_blk.m_pbits[0][1] = best_p;
+				memset(dst_blk.m_selectors, BC7ENC_MODE_6_OPTIMAL_INDEX, 16);
+			}
+
+			break;
+		}
+		case 9:
+		case 16:
+		{
+			// 9. DualPlane: 0, WeightRange : 2 (4), Subsets : 2, EndpointRange : 8 (16) - BC7 MODE7
+			// 16. DualPlane: 0, WeightRange: 2 (4), Subsets: 2, CEM: 4 (LA Direct), EndpointRange: 20 (256) - BC7 MODE7
+
+			dst_blk.m_mode = 7;
+			dst_blk.m_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_bc7;
+
+			const bool invert_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_invert;
+
+			for (uint32_t astc_subset = 0; astc_subset < 2; astc_subset++)
+			{
+				float xl[4], xh[4];
+
+				if (total_comps == 2)
+				{
+					xl[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[0 + astc_subset * 4]].m_unquant / 255.0f;
+					xh[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[1 + astc_subset * 4]].m_unquant / 255.0f;
+
+					xl[1] = xl[0];
+					xh[1] = xh[0];
+
+					xl[2] = xl[0];
+					xh[2] = xh[0];
+
+					xl[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[2 + astc_subset * 4]].m_unquant / 255.0f;
+					xh[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[3 + astc_subset * 4]].m_unquant / 255.0f;
+				}
+				else
+				{
+					xl[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[0 + astc_subset * 8]].m_unquant / 255.0f;
+					xl[1] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[2 + astc_subset * 8]].m_unquant / 255.0f;
+					xl[2] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[4 + astc_subset * 8]].m_unquant / 255.0f;
+					xl[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[6 + astc_subset * 8]].m_unquant / 255.0f;
+
+					xh[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[1 + astc_subset * 8]].m_unquant / 255.0f;
+					xh[1] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[3 + astc_subset * 8]].m_unquant / 255.0f;
+					xh[2] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[5 + astc_subset * 8]].m_unquant / 255.0f;
+					xh[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[7 + astc_subset * 8]].m_unquant / 255.0f;
+				}
+
+				uint32_t best_pbits[2] = { 0, 0 };
+				color_quad_u8 bestMinColor, bestMaxColor;
+				memset(&bestMinColor, 0, sizeof(bestMinColor));
+				memset(&bestMaxColor, 0, sizeof(bestMaxColor));
+				determine_unique_pbits(4, 5, xl, xh, bestMinColor, bestMaxColor, best_pbits);
+
+				const uint32_t bc7_subset_index = invert_partition ? (1 - astc_subset) : astc_subset;
+
+				dst_blk.m_low[bc7_subset_index] = bestMinColor;
+				dst_blk.m_high[bc7_subset_index] = bestMaxColor;
+
+				dst_blk.m_pbits[bc7_subset_index][0] = best_pbits[0];
+				dst_blk.m_pbits[bc7_subset_index][1] = best_pbits[1];
+			} // astc_subset
+
+			for (uint32_t i = 0; i < 16; i++)
+				dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i];
+
+			break;
+		}
+		default:
+			return false;
+		}
+
+		return true;
+	}
+
+	bool transcode_uastc_to_bc7(const uastc_block& src_blk, bc7_optimization_results& dst_blk)
+	{
+		unpacked_uastc_block unpacked_src_blk;
+		if (!unpack_uastc(src_blk, unpacked_src_blk, false, false))
+			return false;
+
+		return transcode_uastc_to_bc7(unpacked_src_blk, dst_blk);
+	}
+
+	bool transcode_uastc_to_bc7(const uastc_block& src_blk, void* pDst)
+	{
+		bc7_optimization_results temp;
+		if (!transcode_uastc_to_bc7(src_blk, temp))
+			return false;
+
+		encode_bc7_block(pDst, &temp);
+		return true;
+	}
+
+	color32 apply_etc1_bias(const color32 &block_color, uint32_t bias, uint32_t limit, uint32_t subblock)
+	{
+		color32 result;
+
+		for (uint32_t c = 0; c < 3; c++)
+		{
+			static const int s_divs[3] = { 1, 3, 9 };
+
+			int delta = 0;
+
+			switch (bias)
+			{
+			case 2: delta = subblock ? 0 : ((c == 0) ? -1 : 0); break;
+			case 5: delta = subblock ? 0 : ((c == 1) ? -1 : 0); break;
+			case 6: delta = subblock ? 0 : ((c == 2) ? -1 : 0); break;
+
+			case 7: delta = subblock ? 0 : ((c == 0) ? 1 : 0); break;
+			case 11: delta = subblock ? 0 : ((c == 1) ? 1 : 0); break;
+			case 15: delta = subblock ? 0 : ((c == 2) ? 1 : 0); break;
+
+			case 18: delta = subblock ? ((c == 0) ? -1 : 0) : 0; break;
+			case 19: delta = subblock ? ((c == 1) ? -1 : 0) : 0; break;
+			case 20: delta = subblock ? ((c == 2) ? -1 : 0) : 0; break;
+
+			case 21: delta = subblock ? ((c == 0) ? 1 : 0) : 0; break;
+			case 24: delta = subblock ? ((c == 1) ? 1 : 0) : 0; break;
+			case 8: delta = subblock ? ((c == 2) ? 1 : 0) : 0; break;
+
+			case 10: delta = -2; break;
+
+			case 27: delta = subblock ? 0 : -1; break;
+			case 28: delta = subblock ? -1 : 1; break;
+			case 29: delta = subblock ? 1 : 0; break;
+			case 30: delta = subblock ? -1 : 0; break;
+			case 31: delta = subblock ? 0 : 1; break;
+
+			default:
+				delta = ((bias / s_divs[c]) % 3) - 1;
+				break;
+			}
+
+			int v = block_color[c];
+			if (v == 0)
+			{
+				if (delta == -2)
+					v += 3;
+				else
+					v += delta + 1;
+			}
+			else if (v == (int)limit)
+			{
+				v += (delta - 1);
+			}
+			else
+			{
+				v += delta;
+				if ((v < 0) || (v > (int)limit))
+					v = (v - delta) - delta;
+			}
+
+			assert(v >= 0);
+			assert(v <= (int)limit);
+
+			result[c] = (uint8_t)v;
+		}
+
+		return result;
+	}
+
+	static void etc1_determine_selectors(decoder_etc_block& dst_blk, const color32* pSource_pixels, uint32_t first_subblock, uint32_t last_subblock)
+	{
+		static const uint8_t s_tran[4] = { 1, 0, 2, 3 };
+
+		uint16_t l_bitmask = 0;
+		uint16_t h_bitmask = 0;
+
+		for (uint32_t subblock = first_subblock; subblock < last_subblock; subblock++)
+		{
+			color32 block_colors[4];
+			dst_blk.get_block_colors(block_colors, subblock);
+
+			uint32_t block_y[4];
+			for (uint32_t i = 0; i < 4; i++)
+				block_y[i] = block_colors[i][0] * 54 + block_colors[i][1] * 183 + block_colors[i][2] * 19;
+
+			const uint32_t block_y01 = block_y[0] + block_y[1];
+			const uint32_t block_y12 = block_y[1] + block_y[2];
+			const uint32_t block_y23 = block_y[2] + block_y[3];
+
+			// X0 X0 X0 X0 X1 X1 X1 X1 X2 X2 X2 X2 X3 X3 X3 X3
+			// Y0 Y1 Y2 Y3 Y0 Y1 Y2 Y3 Y0 Y1 Y2 Y3 Y0 Y1 Y2 Y3
+
+			if (dst_blk.get_flip_bit())
+			{
+				uint32_t ofs = subblock * 2;
+
+				for (uint32_t y = 0; y < 2; y++)
+				{
+					for (uint32_t x = 0; x < 4; x++)
+					{
+						const color32& c = pSource_pixels[x + (subblock * 2 + y) * 4];
+						const uint32_t l = c[0] * 108 + c[1] * 366 + c[2] * 38;
+
+						uint32_t t = s_tran[(l < block_y01) + (l < block_y12) + (l < block_y23)];
+
+						assert(ofs < 16);
+						l_bitmask |= ((t & 1) << ofs);
+						h_bitmask |= ((t >> 1) << ofs);
+						ofs += 4;
+					}
+
+					ofs = (int)ofs + 1 - 4 * 4;
+				}
+			}
+			else
+			{
+				uint32_t ofs = (subblock * 2) * 4;
+				for (uint32_t x = 0; x < 2; x++)
+				{
+					for (uint32_t y = 0; y < 4; y++)
+					{
+						const color32& c = pSource_pixels[subblock * 2 + x + y * 4];
+						const uint32_t l = c[0] * 108 + c[1] * 366 + c[2] * 38;
+
+						uint32_t t = s_tran[(l < block_y01) + (l < block_y12) + (l < block_y23)];
+
+						assert(ofs < 16);
+						l_bitmask |= ((t & 1) << ofs);
+						h_bitmask |= ((t >> 1) << ofs);
+						++ofs;
+					}
+				}
+			}
+		}
+
+		dst_blk.m_bytes[7] = (uint8_t)(l_bitmask);
+		dst_blk.m_bytes[6] = (uint8_t)(l_bitmask >> 8);
+		dst_blk.m_bytes[5] = (uint8_t)(h_bitmask);
+		dst_blk.m_bytes[4] = (uint8_t)(h_bitmask >> 8);
+	}
+
+	static const uint8_t s_etc1_solid_selectors[4][4] = { { 255, 255, 255, 255 }, { 255, 255, 0, 0 }, { 0, 0, 0, 0 }, {0, 0, 255, 255 } };
+
+	struct etc_coord2
+	{
+		uint8_t m_x, m_y;
+	};
+
+	// [flip][subblock][pixel_index]
+	const etc_coord2 g_etc1_pixel_coords[2][2][8] =
+	{
+		{
+		  {
+			 { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 },
+			 { 1, 0 }, { 1, 1 }, { 1, 2 }, { 1, 3 }
+		  },
+		  {
+			 { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 },
+			 { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 }
+		  }
+		},
+		{
+		  {
+			 { 0, 0 }, { 1, 0 }, { 2, 0 }, { 3, 0 },
+			 { 0, 1 }, { 1, 1 }, { 2, 1 }, { 3, 1 }
+		  },
+		  {
+			 { 0, 2 }, { 1, 2 }, { 2, 2 }, { 3, 2 },
+			 { 0, 3 }, { 1, 3 }, { 2, 3 }, { 3, 3 }
+		  },
+		}
+	};
+
+	void transcode_uastc_to_etc1(unpacked_uastc_block& unpacked_src_blk, color32 block_pixels[4][4], void* pDst)
+	{
+		decoder_etc_block& dst_blk = *static_cast<decoder_etc_block*>(pDst);
+
+		if (unpacked_src_blk.m_mode == UASTC_MODE_INDEX_SOLID_COLOR)
+		{
+			dst_blk.m_bytes[3] = (uint8_t)((unpacked_src_blk.m_etc1_diff << 1) | (unpacked_src_blk.m_etc1_inten0 << 5) | (unpacked_src_blk.m_etc1_inten0 << 2));
+
+			if (unpacked_src_blk.m_etc1_diff)
+			{
+				dst_blk.m_bytes[0] = (uint8_t)(unpacked_src_blk.m_etc1_r << 3);
+				dst_blk.m_bytes[1] = (uint8_t)(unpacked_src_blk.m_etc1_g << 3);
+				dst_blk.m_bytes[2] = (uint8_t)(unpacked_src_blk.m_etc1_b << 3);
+			}
+			else
+			{
+				dst_blk.m_bytes[0] = (uint8_t)(unpacked_src_blk.m_etc1_r | (unpacked_src_blk.m_etc1_r << 4));
+				dst_blk.m_bytes[1] = (uint8_t)(unpacked_src_blk.m_etc1_g | (unpacked_src_blk.m_etc1_g << 4));
+				dst_blk.m_bytes[2] = (uint8_t)(unpacked_src_blk.m_etc1_b | (unpacked_src_blk.m_etc1_b << 4));
+			}
+
+			memcpy(dst_blk.m_bytes + 4, &s_etc1_solid_selectors[unpacked_src_blk.m_etc1_selector][0], 4);
+
+			return;
+		}
+
+		const bool flip = unpacked_src_blk.m_etc1_flip != 0;
+		const bool diff = unpacked_src_blk.m_etc1_diff != 0;
+
+		dst_blk.m_bytes[3] = (uint8_t)((int)flip | (diff << 1) | (unpacked_src_blk.m_etc1_inten0 << 5) | (unpacked_src_blk.m_etc1_inten1 << 2));
+
+		const uint32_t limit = diff ? 31 : 15;
+
+		color32 block_colors[2];
+
+		for (uint32_t subset = 0; subset < 2; subset++)
+		{
+			uint32_t avg_color[3];
+			memset(avg_color, 0, sizeof(avg_color));
+
+			for (uint32_t j = 0; j < 8; j++)
+			{
+				const etc_coord2& c = g_etc1_pixel_coords[flip][subset][j];
+
+				avg_color[0] += block_pixels[c.m_y][c.m_x].r;
+				avg_color[1] += block_pixels[c.m_y][c.m_x].g;
+				avg_color[2] += block_pixels[c.m_y][c.m_x].b;
+			} // j
+
+			block_colors[subset][0] = (uint8_t)((avg_color[0] * limit + 1020) / (8 * 255));
+			block_colors[subset][1] = (uint8_t)((avg_color[1] * limit + 1020) / (8 * 255));
+			block_colors[subset][2] = (uint8_t)((avg_color[2] * limit + 1020) / (8 * 255));
+			block_colors[subset][3] = 0;
+
+			if (g_uastc_mode_has_etc1_bias[unpacked_src_blk.m_mode])
+			{
+				block_colors[subset] = apply_etc1_bias(block_colors[subset], unpacked_src_blk.m_etc1_bias, limit, subset);
+			}
+
+		} // subset
+
+		if (diff)
+		{
+			int dr = block_colors[1].r - block_colors[0].r;
+			int dg = block_colors[1].g - block_colors[0].g;
+			int db = block_colors[1].b - block_colors[0].b;
+
+			dr = basisu::clamp<int>(dr, cETC1ColorDeltaMin, cETC1ColorDeltaMax);
+			dg = basisu::clamp<int>(dg, cETC1ColorDeltaMin, cETC1ColorDeltaMax);
+			db = basisu::clamp<int>(db, cETC1ColorDeltaMin, cETC1ColorDeltaMax);
+
+			if (dr < 0) dr += 8;
+			if (dg < 0) dg += 8;
+			if (db < 0) db += 8;
+
+			dst_blk.m_bytes[0] = (uint8_t)((block_colors[0].r << 3) | dr);
+			dst_blk.m_bytes[1] = (uint8_t)((block_colors[0].g << 3) | dg);
+			dst_blk.m_bytes[2] = (uint8_t)((block_colors[0].b << 3) | db);
+		}
+		else
+		{
+			dst_blk.m_bytes[0] = (uint8_t)(block_colors[1].r | (block_colors[0].r << 4));
+			dst_blk.m_bytes[1] = (uint8_t)(block_colors[1].g | (block_colors[0].g << 4));
+			dst_blk.m_bytes[2] = (uint8_t)(block_colors[1].b | (block_colors[0].b << 4));
+		}
+
+		etc1_determine_selectors(dst_blk, &block_pixels[0][0], 0, 2);
+	}
+
+	bool transcode_uastc_to_etc1(const uastc_block& src_blk, void* pDst)
+	{
+		unpacked_uastc_block unpacked_src_blk;
+		if (!unpack_uastc(src_blk, unpacked_src_blk, false))
+			return false;
+
+		color32 block_pixels[4][4];
+		if (unpacked_src_blk.m_mode != UASTC_MODE_INDEX_SOLID_COLOR)
+		{
+			const bool unpack_srgb = false;
+			if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
+				return false;
+		}
+
+		transcode_uastc_to_etc1(unpacked_src_blk, block_pixels, pDst);
+
+		return true;
+	}
+
+	static inline int gray_distance2(const uint8_t c, int y)
+	{
+		int gray_dist = (int)c - y;
+		return gray_dist * gray_dist;
+	}
+
+	static bool pack_etc1_y_estimate_flipped(const uint8_t* pSrc_pixels,
+		int& upper_avg, int& lower_avg, int& left_avg, int& right_avg)
+	{
+		int sums[2][2];
+
+#define GET_XY(x, y) pSrc_pixels[(x) + ((y) * 4)]
+
+		sums[0][0] = GET_XY(0, 0) + GET_XY(0, 1) + GET_XY(1, 0) + GET_XY(1, 1);
+		sums[1][0] = GET_XY(2, 0) + GET_XY(2, 1) + GET_XY(3, 0) + GET_XY(3, 1);
+		sums[0][1] = GET_XY(0, 2) + GET_XY(0, 3) + GET_XY(1, 2) + GET_XY(1, 3);
+		sums[1][1] = GET_XY(2, 2) + GET_XY(2, 3) + GET_XY(3, 2) + GET_XY(3, 3);
+
+		upper_avg = (sums[0][0] + sums[1][0] + 4) / 8;
+		lower_avg = (sums[0][1] + sums[1][1] + 4) / 8;
+		left_avg = (sums[0][0] + sums[0][1] + 4) / 8;
+		right_avg = (sums[1][0] + sums[1][1] + 4) / 8;
+
+#undef GET_XY
+#define GET_XY(x, y, a) gray_distance2(pSrc_pixels[(x) + ((y) * 4)], a)
+
+		int upper_gray_dist = 0, lower_gray_dist = 0, left_gray_dist = 0, right_gray_dist = 0;
+		for (uint32_t i = 0; i < 4; i++)
+		{
+			for (uint32_t j = 0; j < 2; j++)
+			{
+				upper_gray_dist += GET_XY(i, j, upper_avg);
+				lower_gray_dist += GET_XY(i, 2 + j, lower_avg);
+				left_gray_dist += GET_XY(j, i, left_avg);
+				right_gray_dist += GET_XY(2 + j, i, right_avg);
+			}
+		}
+
+#undef GET_XY
+
+		int upper_lower_sum = upper_gray_dist + lower_gray_dist;
+		int left_right_sum = left_gray_dist + right_gray_dist;
+
+		return upper_lower_sum < left_right_sum;
+	}
+
+	// Base  Sel Table
+	// XXXXX XX  XXX
+	static const uint16_t g_etc1_y_solid_block_configs[256] =
+	{
+		0,781,64,161,260,192,33,131,96,320,65,162,261,193,34,291,97,224,66,163,262,194,35,549,98,4,67,653,164,195,523,36,99,5,578,68,165,353,196,37,135,100,324,69,166,354,197,38,295,101,228,70,167,
+		355,198,39,553,102,8,71,608,168,199,527,40,103,9,582,72,169,357,200,41,139,104,328,73,170,358,201,42,299,105,232,74,171,359,202,43,557,106,12,75,612,172,203,531,44,107,13,586,76,173,361,
+		204,45,143,108,332,77,174,362,205,46,303,109,236,78,175,363,206,47,561,110,16,79,616,176,207,535,48,111,17,590,80,177,365,208,49,147,112,336,81,178,366,209,50,307,113,240,82,179,367,210,
+		51,565,114,20,83,620,180,211,539,52,115,21,594,84,181,369,212,53,151,116,340,85,182,370,213,54,311,117,244,86,183,371,214,55,569,118,24,87,624,184,215,543,56,119,25,598,88,185,373,216,57,
+		155,120,344,89,186,374,217,58,315,121,248,90,187,375,218,59,573,122,28,91,628,188,219,754,60,123,29,602,92,189,377,220,61,159,124,348,93,190,378,221,62,319,125,252,94,191,379,222,63,882,126
+	};
+
+	// individual
+	// table base sel0 sel1 sel2 sel3
+	static const uint16_t g_etc1_y_solid_block_4i_configs[256] =
+	{
+		0xA000,0xA800,0x540B,0xAA01,0xAA01,0xFE00,0xFF00,0xFF00,0x8,0x5515,0x5509,0x5509,0xAA03,0x5508,0x5508,0x9508,0xA508,0xA908,0xAA08,0x5513,0xAA09,0xAA09,0xAA05,0xFF08,0xFF08,0x10,0x551D,0x5511,0x5511,
+		0xAA0B,0x5510,0x5510,0x9510,0xA510,0xA910,0xAA10,0x551B,0xAA11,0xAA11,0xAA0D,0xFF10,0xFF10,0x18,0x5525,0x5519,0x5519,0xAA13,0x5518,0x5518,0x9518,0xA518,0xA918,0xAA18,0x5523,0xAA19,0xAA19,0xAA15,
+		0xFF18,0xFF18,0x20,0x552D,0x5521,0x5521,0xAA1B,0x5520,0x5520,0x9520,0xA520,0xA920,0xAA20,0x552B,0xAA21,0xAA21,0xAA1D,0xFF20,0xFF20,0x28,0x5535,0x5529,0x5529,0xAA23,0x5528,0x5528,0x9528,0xA528,0xA928,
+		0xAA28,0x5533,0xAA29,0xAA29,0xAA25,0xFF28,0xFF28,0x30,0x553D,0x5531,0x5531,0xAA2B,0x5530,0x5530,0x9530,0xA530,0xA930,0xAA30,0x553B,0xAA31,0xAA31,0xAA2D,0xFF30,0xFF30,0x38,0x5545,0x5539,0x5539,0xAA33,
+		0x5538,0x5538,0x9538,0xA538,0xA938,0xAA38,0x5543,0xAA39,0xAA39,0xAA35,0xFF38,0xFF38,0x40,0x554D,0x5541,0x5541,0xAA3B,0x5540,0x5540,0x9540,0xA540,0xA940,0xAA40,0x554B,0xAA41,0xAA41,0xAA3D,0xFF40,0xFF40,
+		0x48,0x5555,0x5549,0x5549,0xAA43,0x5548,0x5548,0x9548,0xA548,0xA948,0xAA48,0x5553,0xAA49,0xAA49,0xAA45,0xFF48,0xFF48,0x50,0x555D,0x5551,0x5551,0xAA4B,0x5550,0x5550,0x9550,0xA550,0xA950,0xAA50,0x555B,
+		0xAA51,0xAA51,0xAA4D,0xFF50,0xFF50,0x58,0x5565,0x5559,0x5559,0xAA53,0x5558,0x5558,0x9558,0xA558,0xA958,0xAA58,0x5563,0xAA59,0xAA59,0xAA55,0xFF58,0xFF58,0x60,0x556D,0x5561,0x5561,0xAA5B,0x5560,0x5560,
+		0x9560,0xA560,0xA960,0xAA60,0x556B,0xAA61,0xAA61,0xAA5D,0xFF60,0xFF60,0x68,0x5575,0x5569,0x5569,0xAA63,0x5568,0x5568,0x9568,0xA568,0xA968,0xAA68,0x5573,0xAA69,0xAA69,0xAA65,0xFF68,0xFF68,0x70,0x557D,
+		0x5571,0x5571,0xAA6B,0x5570,0x5570,0x9570,0xA570,0xA970,0xAA70,0x557B,0xAA71,0xAA71,0xAA6D,0xFF70,0xFF70,0x78,0x78,0x5579,0x5579,0xAA73,0x5578,0x9578,0x2578,0xE6E,0x278
+	};
+
+	static const uint16_t g_etc1_y_solid_block_2i_configs[256] =
+	{
+		0x416,0x800,0xA00,0x50B,0xA01,0xA01,0xF00,0xF00,0xF00,0x8,0x515,0x509,0x509,0xA03,0x508,0x508,0xF01,0xF01,0xA08,0xA08,0x513,0xA09,0xA09,0xA05,0xF08,0xF08,0x10,0x51D,0x511,0x511,0xA0B,0x510,0x510,0xF09,
+		0xF09,0xA10,0xA10,0x51B,0xA11,0xA11,0xA0D,0xF10,0xF10,0x18,0x525,0x519,0x519,0xA13,0x518,0x518,0xF11,0xF11,0xA18,0xA18,0x523,0xA19,0xA19,0xA15,0xF18,0xF18,0x20,0x52D,0x521,0x521,0xA1B,0x520,0x520,0xF19,
+		0xF19,0xA20,0xA20,0x52B,0xA21,0xA21,0xA1D,0xF20,0xF20,0x28,0x535,0x529,0x529,0xA23,0x528,0x528,0xF21,0xF21,0xA28,0xA28,0x533,0xA29,0xA29,0xA25,0xF28,0xF28,0x30,0x53D,0x531,0x531,0xA2B,0x530,0x530,0xF29,
+		0xF29,0xA30,0xA30,0x53B,0xA31,0xA31,0xA2D,0xF30,0xF30,0x38,0x545,0x539,0x539,0xA33,0x538,0x538,0xF31,0xF31,0xA38,0xA38,0x543,0xA39,0xA39,0xA35,0xF38,0xF38,0x40,0x54D,0x541,0x541,0xA3B,0x540,0x540,0xF39,
+		0xF39,0xA40,0xA40,0x54B,0xA41,0xA41,0xA3D,0xF40,0xF40,0x48,0x555,0x549,0x549,0xA43,0x548,0x548,0xF41,0xF41,0xA48,0xA48,0x553,0xA49,0xA49,0xA45,0xF48,0xF48,0x50,0x55D,0x551,0x551,0xA4B,0x550,0x550,0xF49,
+		0xF49,0xA50,0xA50,0x55B,0xA51,0xA51,0xA4D,0xF50,0xF50,0x58,0x565,0x559,0x559,0xA53,0x558,0x558,0xF51,0xF51,0xA58,0xA58,0x563,0xA59,0xA59,0xA55,0xF58,0xF58,0x60,0x56D,0x561,0x561,0xA5B,0x560,0x560,0xF59,
+		0xF59,0xA60,0xA60,0x56B,0xA61,0xA61,0xA5D,0xF60,0xF60,0x68,0x575,0x569,0x569,0xA63,0x568,0x568,0xF61,0xF61,0xA68,0xA68,0x573,0xA69,0xA69,0xA65,0xF68,0xF68,0x70,0x57D,0x571,0x571,0xA6B,0x570,0x570,0xF69,
+		0xF69,0xA70,0xA70,0x57B,0xA71,0xA71,0xA6D,0xF70,0xF70,0x78,0x78,0x579,0x579,0xA73,0x578,0x578,0xE6E,0x278
+	};
+
+	static const uint16_t g_etc1_y_solid_block_1i_configs[256] =
+	{
+		0x0,0x116,0x200,0x200,0x10B,0x201,0x201,0x300,0x300,0x8,0x115,0x109,0x109,0x203,0x108,0x108,0x114,0x301,0x204,0x208,0x208,0x113,0x209,0x209,0x205,0x308,0x10,0x11D,0x111,0x111,0x20B,0x110,0x110,0x11C,0x309,
+		0x20C,0x210,0x210,0x11B,0x211,0x211,0x20D,0x310,0x18,0x125,0x119,0x119,0x213,0x118,0x118,0x124,0x311,0x214,0x218,0x218,0x123,0x219,0x219,0x215,0x318,0x20,0x12D,0x121,0x121,0x21B,0x120,0x120,0x12C,0x319,0x21C,
+		0x220,0x220,0x12B,0x221,0x221,0x21D,0x320,0x28,0x135,0x129,0x129,0x223,0x128,0x128,0x134,0x321,0x224,0x228,0x228,0x133,0x229,0x229,0x225,0x328,0x30,0x13D,0x131,0x131,0x22B,0x130,0x130,0x13C,0x329,0x22C,0x230,
+		0x230,0x13B,0x231,0x231,0x22D,0x330,0x38,0x145,0x139,0x139,0x233,0x138,0x138,0x144,0x331,0x234,0x238,0x238,0x143,0x239,0x239,0x235,0x338,0x40,0x14D,0x141,0x141,0x23B,0x140,0x140,0x14C,0x339,0x23C,0x240,0x240,
+		0x14B,0x241,0x241,0x23D,0x340,0x48,0x155,0x149,0x149,0x243,0x148,0x148,0x154,0x341,0x244,0x248,0x248,0x153,0x249,0x249,0x245,0x348,0x50,0x15D,0x151,0x151,0x24B,0x150,0x150,0x15C,0x349,0x24C,0x250,0x250,0x15B,
+		0x251,0x251,0x24D,0x350,0x58,0x165,0x159,0x159,0x253,0x158,0x158,0x164,0x351,0x254,0x258,0x258,0x163,0x259,0x259,0x255,0x358,0x60,0x16D,0x161,0x161,0x25B,0x160,0x160,0x16C,0x359,0x25C,0x260,0x260,0x16B,0x261,
+		0x261,0x25D,0x360,0x68,0x175,0x169,0x169,0x263,0x168,0x168,0x174,0x361,0x264,0x268,0x268,0x173,0x269,0x269,0x265,0x368,0x70,0x17D,0x171,0x171,0x26B,0x170,0x170,0x17C,0x369,0x26C,0x270,0x270,0x17B,0x271,0x271,
+		0x26D,0x370,0x78,0x78,0x179,0x179,0x273,0x178,0x178,0x26E,0x278
+	};
+
+	// We don't have any useful hints to accelerate single channel ETC1, so we need to real-time encode from scratch.
+	bool transcode_uastc_to_etc1(const uastc_block& src_blk, void* pDst, uint32_t channel)
+	{
+		unpacked_uastc_block unpacked_src_blk;
+		if (!unpack_uastc(src_blk, unpacked_src_blk, false))
+			return false;
+
+#if 0
+		for (uint32_t individ = 0; individ < 2; individ++)
+		{
+			uint32_t overall_error = 0;
+
+			for (uint32_t c = 0; c < 256; c++)
+			{
+				uint32_t best_err = UINT32_MAX;
+				uint32_t best_individ = 0;
+				uint32_t best_base = 0;
+				uint32_t best_sels[4] = { 0,0,0,0 };
+				uint32_t best_table = 0;
+
+				const uint32_t limit = individ ? 16 : 32;
+
+				for (uint32_t table = 0; table < 8; table++)
+				{
+					for (uint32_t base = 0; base < limit; base++)
+					{
+						uint32_t total_e = 0;
+						uint32_t sels[4] = { 0,0,0,0 };
+
+						const uint32_t N = 4;
+						for (uint32_t i = 0; i < basisu::minimum<uint32_t>(N, (256 - c)); i++)
+						{
+							uint32_t best_sel_e = UINT32_MAX;
+							uint32_t best_sel = 0;
+
+							for (uint32_t sel = 0; sel < 4; sel++)
+							{
+								int val = individ ? ((base << 4) | base) : ((base << 3) | (base >> 2));
+								val = clamp255(val + g_etc1_inten_tables[table][sel]);
+
+								int e = iabs(val - clamp255(c + i));
+								if (e < best_sel_e)
+								{
+									best_sel_e = e;
+									best_sel = sel;
+								}
+
+							} // sel
+
+							sels[i] = best_sel;
+							total_e += best_sel_e * best_sel_e;
+
+						} // i
+
+						if (total_e < best_err)
+						{
+							best_err = total_e;
+							best_individ = individ;
+							best_base = base;
+							memcpy(best_sels, sels, sizeof(best_sels));
+							best_table = table;
+						}
+
+					} // base
+				} // table
+
+				//printf("%u: %u,%u,%u,%u,%u,%u,%u,%u\n", c, best_err, best_individ, best_table, best_base, best_sels[0], best_sels[1], best_sels[2], best_sels[3]);
+
+				uint32_t encoded = best_table | (best_base << 3) |
+					(best_sels[0] << 8) |
+					(best_sels[1] << 10) |
+					(best_sels[2] << 12) |
+					(best_sels[3] << 14);
+
+				printf("0x%X,", encoded);
+
+				overall_error += best_err;
+			} // c
+
+			printf("\n");
+			printf("Overall error: %u\n", overall_error);
+
+		} // individ
+
+		exit(0);
+#endif
+
+#if 0
+		for (uint32_t individ = 0; individ < 2; individ++)
+		{
+			uint32_t overall_error = 0;
+
+			for (uint32_t c = 0; c < 256; c++)
+			{
+				uint32_t best_err = UINT32_MAX;
+				uint32_t best_individ = 0;
+				uint32_t best_base = 0;
+				uint32_t best_sels[4] = { 0,0,0,0 };
+				uint32_t best_table = 0;
+
+				const uint32_t limit = individ ? 16 : 32;
+
+				for (uint32_t table = 0; table < 8; table++)
+				{
+					for (uint32_t base = 0; base < limit; base++)
+					{
+						uint32_t total_e = 0;
+						uint32_t sels[4] = { 0,0,0,0 };
+
+						const uint32_t N = 1;
+						for (uint32_t i = 0; i < basisu::minimum<uint32_t>(N, (256 - c)); i++)
+						{
+							uint32_t best_sel_e = UINT32_MAX;
+							uint32_t best_sel = 0;
+
+							for (uint32_t sel = 0; sel < 4; sel++)
+							{
+								int val = individ ? ((base << 4) | base) : ((base << 3) | (base >> 2));
+								val = clamp255(val + g_etc1_inten_tables[table][sel]);
+
+								int e = iabs(val - clamp255(c + i));
+								if (e < best_sel_e)
+								{
+									best_sel_e = e;
+									best_sel = sel;
+								}
+
+							} // sel
+
+							sels[i] = best_sel;
+							total_e += best_sel_e * best_sel_e;
+
+						} // i
+
+						if (total_e < best_err)
+						{
+							best_err = total_e;
+							best_individ = individ;
+							best_base = base;
+							memcpy(best_sels, sels, sizeof(best_sels));
+							best_table = table;
+						}
+
+					} // base
+				} // table
+
+				//printf("%u: %u,%u,%u,%u,%u,%u,%u,%u\n", c, best_err, best_individ, best_table, best_base, best_sels[0], best_sels[1], best_sels[2], best_sels[3]);
+
+				uint32_t encoded = best_table | (best_base << 3) |
+					(best_sels[0] << 8) |
+					(best_sels[1] << 10) |
+					(best_sels[2] << 12) |
+					(best_sels[3] << 14);
+
+				printf("0x%X,", encoded);
+
+				overall_error += best_err;
+			} // c
+
+			printf("\n");
+			printf("Overall error: %u\n", overall_error);
+
+		} // individ
+
+		exit(0);
+#endif
+
+		decoder_etc_block& dst_blk = *static_cast<decoder_etc_block*>(pDst);
+
+		if (unpacked_src_blk.m_mode == UASTC_MODE_INDEX_SOLID_COLOR)
+		{
+			const uint32_t y = unpacked_src_blk.m_solid_color[channel];
+			const uint32_t encoded_config = g_etc1_y_solid_block_configs[y];
+
+			const uint32_t base = encoded_config & 31;
+			const uint32_t sel = (encoded_config >> 5) & 3;
+			const uint32_t table = encoded_config >> 7;
+
+			dst_blk.m_bytes[3] = (uint8_t)(2 | (table << 5) | (table << 2));
+
+			dst_blk.m_bytes[0] = (uint8_t)(base << 3);
+			dst_blk.m_bytes[1] = (uint8_t)(base << 3);
+			dst_blk.m_bytes[2] = (uint8_t)(base << 3);
+
+			memcpy(dst_blk.m_bytes + 4, &s_etc1_solid_selectors[sel][0], 4);
+			return true;
+		}
+
+		color32 block_pixels[4][4];
+		const bool unpack_srgb = false;
+		if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
+			return false;
+
+		uint8_t block_y[4][4];
+		for (uint32_t i = 0; i < 16; i++)
+			((uint8_t*)block_y)[i] = ((color32*)block_pixels)[i][channel];
+
+		int upper_avg, lower_avg, left_avg, right_avg;
+		bool flip = pack_etc1_y_estimate_flipped(&block_y[0][0], upper_avg, lower_avg, left_avg, right_avg);
+
+		// non-flipped: | |
+		// vs. 
+		// flipped:     --
+		//              --
+
+		uint32_t low[2] = { 255, 255 }, high[2] = { 0, 0 };
+
+		if (flip)
+		{
+			for (uint32_t y = 0; y < 2; y++)
+			{
+				for (uint32_t x = 0; x < 4; x++)
+				{
+					const uint32_t v = block_y[y][x];
+					low[0] = basisu::minimum(low[0], v);
+					high[0] = basisu::maximum(high[0], v);
+				}
+			}
+			for (uint32_t y = 2; y < 4; y++)
+			{
+				for (uint32_t x = 0; x < 4; x++)
+				{
+					const uint32_t v = block_y[y][x];
+					low[1] = basisu::minimum(low[1], v);
+					high[1] = basisu::maximum(high[1], v);
+				}
+			}
+		}
+		else
+		{
+			for (uint32_t y = 0; y < 4; y++)
+			{
+				for (uint32_t x = 0; x < 2; x++)
+				{
+					const uint32_t v = block_y[y][x];
+					low[0] = basisu::minimum(low[0], v);
+					high[0] = basisu::maximum(high[0], v);
+				}
+			}
+			for (uint32_t y = 0; y < 4; y++)
+			{
+				for (uint32_t x = 2; x < 4; x++)
+				{
+					const uint32_t v = block_y[y][x];
+					low[1] = basisu::minimum(low[1], v);
+					high[1] = basisu::maximum(high[1], v);
+				}
+			}
+		}
+
+		const uint32_t range[2] = { high[0] - low[0], high[1] - low[1] };
+
+		dst_blk.m_bytes[3] = (uint8_t)((int)flip);
+
+		if ((range[0] <= 3) && (range[1] <= 3))
+		{
+			// This is primarily for better gradients.
+			dst_blk.m_bytes[0] = 0;
+			dst_blk.m_bytes[1] = 0;
+			dst_blk.m_bytes[2] = 0;
+
+			uint16_t l_bitmask = 0, h_bitmask = 0;
+
+			for (uint32_t subblock = 0; subblock < 2; subblock++)
+			{
+				const uint32_t encoded = (range[subblock] == 0) ? g_etc1_y_solid_block_1i_configs[low[subblock]] : ((range[subblock] < 2) ? g_etc1_y_solid_block_2i_configs[low[subblock]] : g_etc1_y_solid_block_4i_configs[low[subblock]]);
+
+				const uint32_t table = encoded & 7;
+				const uint32_t base = (encoded >> 3) & 31;
+				assert(base <= 15);
+				const uint32_t sels[4] = { (encoded >> 8) & 3, (encoded >> 10) & 3, (encoded >> 12) & 3, (encoded >> 14) & 3 };
+
+				dst_blk.m_bytes[3] |= (uint8_t)(table << (subblock ? 2 : 5));
+
+				const uint32_t sv = base << (subblock ? 0 : 4);
+				dst_blk.m_bytes[0] |= (uint8_t)(sv);
+				dst_blk.m_bytes[1] |= (uint8_t)(sv);
+				dst_blk.m_bytes[2] |= (uint8_t)(sv);
+
+				if (flip)
+				{
+					uint32_t ofs = subblock * 2;
+					for (uint32_t y = 0; y < 2; y++)
+					{
+						for (uint32_t x = 0; x < 4; x++)
+						{
+							uint32_t t = block_y[y + subblock * 2][x];
+							assert(t >= low[subblock] && t <= high[subblock]);
+							t -= low[subblock];
+							assert(t <= 3);
+
+							t = g_selector_index_to_etc1[sels[t]];
+
+							assert(ofs < 16);
+							l_bitmask |= ((t & 1) << ofs);
+							h_bitmask |= ((t >> 1) << ofs);
+							ofs += 4;
+						}
+
+						ofs = (int)ofs + 1 - 4 * 4;
+					}
+				}
+				else
+				{
+					uint32_t ofs = (subblock * 2) * 4;
+					for (uint32_t x = 0; x < 2; x++)
+					{
+						for (uint32_t y = 0; y < 4; y++)
+						{
+							uint32_t t = block_y[y][x + subblock * 2];
+							assert(t >= low[subblock] && t <= high[subblock]);
+							t -= low[subblock];
+							assert(t <= 3);
+
+							t = g_selector_index_to_etc1[sels[t]];
+
+							assert(ofs < 16);
+							l_bitmask |= ((t & 1) << ofs);
+							h_bitmask |= ((t >> 1) << ofs);
+							++ofs;
+						}
+					}
+				}
+			} // subblock
+
+			dst_blk.m_bytes[7] = (uint8_t)(l_bitmask);
+			dst_blk.m_bytes[6] = (uint8_t)(l_bitmask >> 8);
+			dst_blk.m_bytes[5] = (uint8_t)(h_bitmask);
+			dst_blk.m_bytes[4] = (uint8_t)(h_bitmask >> 8);
+
+			return true;
+		}
+
+		uint32_t y0 = ((flip ? upper_avg : left_avg) * 31 + 127) / 255;
+		uint32_t y1 = ((flip ? lower_avg : right_avg) * 31 + 127) / 255;
+
+		bool diff = true;
+
+		int dy = y1 - y0;
+
+		if ((dy < cETC1ColorDeltaMin) || (dy > cETC1ColorDeltaMax))
+		{
+			diff = false;
+
+			y0 = ((flip ? upper_avg : left_avg) * 15 + 127) / 255;
+			y1 = ((flip ? lower_avg : right_avg) * 15 + 127) / 255;
+
+			dst_blk.m_bytes[0] = (uint8_t)(y1 | (y0 << 4));
+			dst_blk.m_bytes[1] = (uint8_t)(y1 | (y0 << 4));
+			dst_blk.m_bytes[2] = (uint8_t)(y1 | (y0 << 4));
+		}
+		else
+		{
+			dy = basisu::clamp<int>(dy, cETC1ColorDeltaMin, cETC1ColorDeltaMax);
+
+			y1 = y0 + dy;
+
+			if (dy < 0) dy += 8;
+
+			dst_blk.m_bytes[0] = (uint8_t)((y0 << 3) | dy);
+			dst_blk.m_bytes[1] = (uint8_t)((y0 << 3) | dy);
+			dst_blk.m_bytes[2] = (uint8_t)((y0 << 3) | dy);
+
+			dst_blk.m_bytes[3] |= 2;
+		}
+
+		const uint32_t base_y[2] = { diff ? ((y0 << 3) | (y0 >> 2)) : ((y0 << 4) | y0), diff ? ((y1 << 3) | (y1 >> 2)) : ((y1 << 4) | y1) };
+
+		uint32_t enc_range[2];
+		for (uint32_t subset = 0; subset < 2; subset++)
+		{
+			const int pos = basisu::iabs((int)high[subset] - (int)base_y[subset]);
+			const int neg = basisu::iabs((int)base_y[subset] - (int)low[subset]);
+
+			enc_range[subset] = basisu::maximum(pos, neg);
+		}
+
+		uint16_t l_bitmask = 0, h_bitmask = 0;
+		for (uint32_t subblock = 0; subblock < 2; subblock++)
+		{
+			if ((!diff) && (range[subblock] <= 3))
+			{
+				const uint32_t encoded = (range[subblock] == 0) ? g_etc1_y_solid_block_1i_configs[low[subblock]] : ((range[subblock] < 2) ? g_etc1_y_solid_block_2i_configs[low[subblock]] : g_etc1_y_solid_block_4i_configs[low[subblock]]);
+
+				const uint32_t table = encoded & 7;
+				const uint32_t base = (encoded >> 3) & 31;
+				assert(base <= 15);
+				const uint32_t sels[4] = { (encoded >> 8) & 3, (encoded >> 10) & 3, (encoded >> 12) & 3, (encoded >> 14) & 3 };
+
+				dst_blk.m_bytes[3] |= (uint8_t)(table << (subblock ? 2 : 5));
+
+				const uint32_t mask = ~(0xF << (subblock ? 0 : 4));
+
+				dst_blk.m_bytes[0] &= mask;
+				dst_blk.m_bytes[1] &= mask;
+				dst_blk.m_bytes[2] &= mask;
+
+				const uint32_t sv = base << (subblock ? 0 : 4);
+				dst_blk.m_bytes[0] |= (uint8_t)(sv);
+				dst_blk.m_bytes[1] |= (uint8_t)(sv);
+				dst_blk.m_bytes[2] |= (uint8_t)(sv);
+
+				if (flip)
+				{
+					uint32_t ofs = subblock * 2;
+					for (uint32_t y = 0; y < 2; y++)
+					{
+						for (uint32_t x = 0; x < 4; x++)
+						{
+							uint32_t t = block_y[y + subblock * 2][x];
+							assert(t >= low[subblock] && t <= high[subblock]);
+							t -= low[subblock];
+							assert(t <= 3);
+
+							t = g_selector_index_to_etc1[sels[t]];
+
+							assert(ofs < 16);
+							l_bitmask |= ((t & 1) << ofs);
+							h_bitmask |= ((t >> 1) << ofs);
+							ofs += 4;
+						}
+
+						ofs = (int)ofs + 1 - 4 * 4;
+					}
+				}
+				else
+				{
+					uint32_t ofs = (subblock * 2) * 4;
+					for (uint32_t x = 0; x < 2; x++)
+					{
+						for (uint32_t y = 0; y < 4; y++)
+						{
+							uint32_t t = block_y[y][x + subblock * 2];
+							assert(t >= low[subblock] && t <= high[subblock]);
+							t -= low[subblock];
+							assert(t <= 3);
+
+							t = g_selector_index_to_etc1[sels[t]];
+
+							assert(ofs < 16);
+							l_bitmask |= ((t & 1) << ofs);
+							h_bitmask |= ((t >> 1) << ofs);
+							++ofs;
+						}
+					}
+				}
+
+				continue;
+			} // if
+
+			uint32_t best_err = UINT32_MAX;
+			uint8_t best_sels[8];
+			uint32_t best_inten = 0;
+
+			const int base = base_y[subblock];
+
+			const int low_limit = -base;
+			const int high_limit = 255 - base;
+
+			assert(low_limit <= 0 && high_limit >= 0);
+
+			uint32_t inten_table_mask = 0xFF;
+			const uint32_t er = enc_range[subblock];
+			// Each one of these tables is expensive to evaluate, so let's only examine the ones we know may be useful.
+			if (er <= 51)
+			{
+				inten_table_mask = 0xF;
+
+				if (er > 22)
+					inten_table_mask &= ~(1 << 0);
+
+				if ((er < 4) || (er > 39))
+					inten_table_mask &= ~(1 << 1);
+
+				if (er < 9)
+					inten_table_mask &= ~(1 << 2);
+
+				if (er < 12)
+					inten_table_mask &= ~(1 << 3);
+			}
+			else
+			{
+				inten_table_mask &= ~((1 << 0) | (1 << 1));
+
+				if (er > 60)
+					inten_table_mask &= ~(1 << 2);
+
+				if (er > 89)
+					inten_table_mask &= ~(1 << 3);
+
+				if (er > 120)
+					inten_table_mask &= ~(1 << 4);
+
+				if (er > 136)
+					inten_table_mask &= ~(1 << 5);
+
+				if (er > 174)
+					inten_table_mask &= ~(1 << 6);
+			}
+
+			for (uint32_t inten = 0; inten < 8; inten++)
+			{
+				if ((inten_table_mask & (1 << inten)) == 0)
+					continue;
+
+				const int t0 = basisu::maximum(low_limit, g_etc1_inten_tables[inten][0]);
+				const int t1 = basisu::maximum(low_limit, g_etc1_inten_tables[inten][1]);
+				const int t2 = basisu::minimum(high_limit, g_etc1_inten_tables[inten][2]);
+				const int t3 = basisu::minimum(high_limit, g_etc1_inten_tables[inten][3]);
+				assert((t0 <= t1) && (t1 <= t2) && (t2 <= t3));
+
+				const int tv[4] = { t2, t3, t1, t0 };
+
+				const int thresh01 = t0 + t1;
+				const int thresh12 = t1 + t2;
+				const int thresh23 = t2 + t3;
+
+				assert(thresh01 <= thresh12 && thresh12 <= thresh23);
+
+				static const uint8_t s_table[4] = { 1, 0, 2, 3 };
+
+				uint32_t total_err = 0;
+				uint8_t sels[8];
+
+				if (flip)
+				{
+					if (((int)high[subblock] - base) * 2 < thresh01)
+					{
+						memset(sels, 3, 8);
+
+						for (uint32_t y = 0; y < 2; y++)
+						{
+							for (uint32_t x = 0; x < 4; x++)
+							{
+								const int delta = (int)block_y[y + subblock * 2][x] - base;
+
+								const uint32_t c = 3;
+
+								uint32_t e = basisu::iabs(tv[c] - delta);
+								total_err += e * e;
+							}
+							if (total_err >= best_err)
+								break;
+						}
+					}
+					else if (((int)low[subblock] - base) * 2 >= thresh23)
+					{
+						memset(sels, 1, 8);
+
+						for (uint32_t y = 0; y < 2; y++)
+						{
+							for (uint32_t x = 0; x < 4; x++)
+							{
+								const int delta = (int)block_y[y + subblock * 2][x] - base;
+
+								const uint32_t c = 1;
+
+								uint32_t e = basisu::iabs(tv[c] - delta);
+								total_err += e * e;
+							}
+							if (total_err >= best_err)
+								break;
+						}
+					}
+					else
+					{
+						for (uint32_t y = 0; y < 2; y++)
+						{
+							for (uint32_t x = 0; x < 4; x++)
+							{
+								const int delta = (int)block_y[y + subblock * 2][x] - base;
+								const int delta2 = delta * 2;
+
+								uint32_t c = s_table[(delta2 < thresh01) + (delta2 < thresh12) + (delta2 < thresh23)];
+								sels[y * 4 + x] = (uint8_t)c;
+
+								uint32_t e = basisu::iabs(tv[c] - delta);
+								total_err += e * e;
+							}
+							if (total_err >= best_err)
+								break;
+						}
+					}
+				}
+				else
+				{
+					if (((int)high[subblock] - base) * 2 < thresh01)
+					{
+						memset(sels, 3, 8);
+
+						for (uint32_t y = 0; y < 4; y++)
+						{
+							for (uint32_t x = 0; x < 2; x++)
+							{
+								const int delta = (int)block_y[y][x + subblock * 2] - base;
+
+								const uint32_t c = 3;
+
+								uint32_t e = basisu::iabs(tv[c] - delta);
+								total_err += e * e;
+							}
+							if (total_err >= best_err)
+								break;
+						}
+					}
+					else if (((int)low[subblock] - base) * 2 >= thresh23)
+					{
+						memset(sels, 1, 8);
+
+						for (uint32_t y = 0; y < 4; y++)
+						{
+							for (uint32_t x = 0; x < 2; x++)
+							{
+								const int delta = (int)block_y[y][x + subblock * 2] - base;
+
+								const uint32_t c = 1;
+
+								uint32_t e = basisu::iabs(tv[c] - delta);
+								total_err += e * e;
+							}
+							if (total_err >= best_err)
+								break;
+						}
+					}
+					else
+					{
+						for (uint32_t y = 0; y < 4; y++)
+						{
+							for (uint32_t x = 0; x < 2; x++)
+							{
+								const int delta = (int)block_y[y][x + subblock * 2] - base;
+								const int delta2 = delta * 2;
+
+								uint32_t c = s_table[(delta2 < thresh01) + (delta2 < thresh12) + (delta2 < thresh23)];
+								sels[y * 2 + x] = (uint8_t)c;
+
+								uint32_t e = basisu::iabs(tv[c] - delta);
+								total_err += e * e;
+							}
+							if (total_err >= best_err)
+								break;
+						}
+					}
+				}
+
+				if (total_err < best_err)
+				{
+					best_err = total_err;
+					best_inten = inten;
+					memcpy(best_sels, sels, 8);
+				}
+
+			} // inten
+
+			//g_inten_hist[best_inten][enc_range[subblock]]++;
+
+			dst_blk.m_bytes[3] |= (uint8_t)(best_inten << (subblock ? 2 : 5));
+
+			if (flip)
+			{
+				uint32_t ofs = subblock * 2;
+				for (uint32_t y = 0; y < 2; y++)
+				{
+					for (uint32_t x = 0; x < 4; x++)
+					{
+						uint32_t t = best_sels[y * 4 + x];
+
+						assert(ofs < 16);
+						l_bitmask |= ((t & 1) << ofs);
+						h_bitmask |= ((t >> 1) << ofs);
+						ofs += 4;
+					}
+
+					ofs = (int)ofs + 1 - 4 * 4;
+				}
+			}
+			else
+			{
+				uint32_t ofs = (subblock * 2) * 4;
+				for (uint32_t x = 0; x < 2; x++)
+				{
+					for (uint32_t y = 0; y < 4; y++)
+					{
+						uint32_t t = best_sels[y * 2 + x];
+
+						assert(ofs < 16);
+						l_bitmask |= ((t & 1) << ofs);
+						h_bitmask |= ((t >> 1) << ofs);
+						++ofs;
+					}
+				}
+			}
+
+		} // subblock
+
+		dst_blk.m_bytes[7] = (uint8_t)(l_bitmask);
+		dst_blk.m_bytes[6] = (uint8_t)(l_bitmask >> 8);
+		dst_blk.m_bytes[5] = (uint8_t)(h_bitmask);
+		dst_blk.m_bytes[4] = (uint8_t)(h_bitmask >> 8);
+
+		return true;
+	}
+
+	const uint32_t ETC2_EAC_MIN_VALUE_SELECTOR = 3, ETC2_EAC_MAX_VALUE_SELECTOR = 7;
+
+	void transcode_uastc_to_etc2_eac_a8(unpacked_uastc_block& unpacked_src_blk, color32 block_pixels[4][4], void* pDst)
+	{
+		eac_block& dst = *static_cast<eac_block*>(pDst);
+		const color32* pSrc_pixels = &block_pixels[0][0];
+
+		if ((!g_uastc_mode_has_alpha[unpacked_src_blk.m_mode]) || (unpacked_src_blk.m_mode == UASTC_MODE_INDEX_SOLID_COLOR))
+		{
+			const uint32_t a = (unpacked_src_blk.m_mode == UASTC_MODE_INDEX_SOLID_COLOR) ? unpacked_src_blk.m_solid_color[3] : 255;
+
+			dst.m_base = a;
+			dst.m_table = 13;
+			dst.m_multiplier = 1;
+
+			memcpy(dst.m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4));
+
+			return;
+		}
+
+		uint32_t min_a = 255, max_a = 0;
+		for (uint32_t i = 0; i < 16; i++)
+		{
+			min_a = basisu::minimum<uint32_t>(min_a, pSrc_pixels[i].a);
+			max_a = basisu::maximum<uint32_t>(max_a, pSrc_pixels[i].a);
+		}
+
+		if (min_a == max_a)
+		{
+			dst.m_base = min_a;
+			dst.m_table = 13;
+			dst.m_multiplier = 1;
+
+			memcpy(dst.m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4));
+			return;
+		}
+
+		const uint32_t table = unpacked_src_blk.m_etc2_hints & 0xF;
+		const int multiplier = unpacked_src_blk.m_etc2_hints >> 4;
+
+		assert(multiplier >= 1);
+
+		dst.m_multiplier = multiplier;
+		dst.m_table = table;
+
+		const float range = (float)(g_eac_modifier_table[dst.m_table][ETC2_EAC_MAX_VALUE_SELECTOR] - g_eac_modifier_table[dst.m_table][ETC2_EAC_MIN_VALUE_SELECTOR]);
+		const int center = (int)roundf(basisu::lerp((float)min_a, (float)max_a, (float)(0 - g_eac_modifier_table[dst.m_table][ETC2_EAC_MIN_VALUE_SELECTOR]) / range));
+
+		dst.m_base = center;
+
+		const int8_t* pTable = &g_eac_modifier_table[dst.m_table][0];
+
+		uint32_t vals[8];
+		for (uint32_t j = 0; j < 8; j++)
+			vals[j] = clamp255(center + (pTable[j] * multiplier));
+
+		uint64_t sels = 0;
+		for (uint32_t i = 0; i < 16; i++)
+		{
+			const uint32_t a = block_pixels[i & 3][i >> 2].a;
+
+			const uint32_t err0 = (basisu::iabs(vals[0] - a) << 3) | 0;
+			const uint32_t err1 = (basisu::iabs(vals[1] - a) << 3) | 1;
+			const uint32_t err2 = (basisu::iabs(vals[2] - a) << 3) | 2;
+			const uint32_t err3 = (basisu::iabs(vals[3] - a) << 3) | 3;
+			const uint32_t err4 = (basisu::iabs(vals[4] - a) << 3) | 4;
+			const uint32_t err5 = (basisu::iabs(vals[5] - a) << 3) | 5;
+			const uint32_t err6 = (basisu::iabs(vals[6] - a) << 3) | 6;
+			const uint32_t err7 = (basisu::iabs(vals[7] - a) << 3) | 7;
+
+			const uint32_t min_err = basisu::minimum(basisu::minimum(basisu::minimum(basisu::minimum(basisu::minimum(basisu::minimum(err0, err1, err2), err3), err4), err5), err6), err7);
+
+			const uint64_t best_index = min_err & 7;
+			sels |= (best_index << (45 - i * 3));
+		}
+
+		dst.set_selector_bits(sels);
+	}
+
+	bool transcode_uastc_to_etc2_rgba(const uastc_block& src_blk, void* pDst)
+	{
+		eac_block& dst_etc2_eac_a8_blk = *static_cast<eac_block*>(pDst);
+		decoder_etc_block& dst_etc1_blk = static_cast<decoder_etc_block*>(pDst)[1];
+
+		unpacked_uastc_block unpacked_src_blk;
+		if (!unpack_uastc(src_blk, unpacked_src_blk, false))
+			return false;
+
+		color32 block_pixels[4][4];
+		if (unpacked_src_blk.m_mode != UASTC_MODE_INDEX_SOLID_COLOR)
+		{
+			const bool unpack_srgb = false;
+			if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
+				return false;
+		}
+
+		transcode_uastc_to_etc2_eac_a8(unpacked_src_blk, block_pixels, &dst_etc2_eac_a8_blk);
+
+		transcode_uastc_to_etc1(unpacked_src_blk, block_pixels, &dst_etc1_blk);
+
+		return true;
+	}
+
+	static const uint8_t s_uastc5_to_bc1[32] = { 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1 };
+	static const uint8_t s_uastc4_to_bc1[16] = { 0, 0, 0, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 1, 1, 1 };
+	static const uint8_t s_uastc3_to_bc1[8] = { 0, 0, 2, 2, 3, 3, 1, 1 };
+	static const uint8_t s_uastc2_to_bc1[4] = { 0, 2, 3, 1 };
+	static const uint8_t s_uastc1_to_bc1[2] = { 0, 1 };
+	const uint8_t* s_uastc_to_bc1_weights[6] = { nullptr, s_uastc1_to_bc1, s_uastc2_to_bc1, s_uastc3_to_bc1, s_uastc4_to_bc1, s_uastc5_to_bc1 };
+				
+	void encode_bc4(void* pDst, const uint8_t* pPixels, uint32_t stride)
+	{
+		uint32_t min0_v, max0_v, min1_v, max1_v,min2_v, max2_v, min3_v, max3_v;
+
+		{
+			min0_v = max0_v = pPixels[0 * stride];
+			min1_v = max1_v = pPixels[1 * stride];
+			min2_v = max2_v = pPixels[2 * stride];
+			min3_v = max3_v = pPixels[3 * stride];
+		}
+
+		{
+			uint32_t v0 = pPixels[4 * stride]; min0_v = basisu::minimum(min0_v, v0); max0_v = basisu::maximum(max0_v, v0);
+			uint32_t v1 = pPixels[5 * stride]; min1_v = basisu::minimum(min1_v, v1); max1_v = basisu::maximum(max1_v, v1);
+			uint32_t v2 = pPixels[6 * stride]; min2_v = basisu::minimum(min2_v, v2); max2_v = basisu::maximum(max2_v, v2);
+			uint32_t v3 = pPixels[7 * stride]; min3_v = basisu::minimum(min3_v, v3); max3_v = basisu::maximum(max3_v, v3);
+		}
+
+		{
+			uint32_t v0 = pPixels[8 * stride]; min0_v = basisu::minimum(min0_v, v0); max0_v = basisu::maximum(max0_v, v0);
+			uint32_t v1 = pPixels[9 * stride]; min1_v = basisu::minimum(min1_v, v1); max1_v = basisu::maximum(max1_v, v1);
+			uint32_t v2 = pPixels[10 * stride]; min2_v = basisu::minimum(min2_v, v2); max2_v = basisu::maximum(max2_v, v2);
+			uint32_t v3 = pPixels[11 * stride]; min3_v = basisu::minimum(min3_v, v3); max3_v = basisu::maximum(max3_v, v3);
+		}
+
+		{
+			uint32_t v0 = pPixels[12 * stride]; min0_v = basisu::minimum(min0_v, v0); max0_v = basisu::maximum(max0_v, v0);
+			uint32_t v1 = pPixels[13 * stride]; min1_v = basisu::minimum(min1_v, v1); max1_v = basisu::maximum(max1_v, v1);
+			uint32_t v2 = pPixels[14 * stride]; min2_v = basisu::minimum(min2_v, v2); max2_v = basisu::maximum(max2_v, v2);
+			uint32_t v3 = pPixels[15 * stride]; min3_v = basisu::minimum(min3_v, v3); max3_v = basisu::maximum(max3_v, v3);
+		}
+
+		const uint32_t min_v = basisu::minimum(min0_v, min1_v, min2_v, min3_v);
+		const uint32_t max_v = basisu::maximum(max0_v, max1_v, max2_v, max3_v);
+
+		uint8_t* pDst_bytes = static_cast<uint8_t*>(pDst);
+		pDst_bytes[0] = (uint8_t)max_v;
+		pDst_bytes[1] = (uint8_t)min_v;
+
+		if (max_v == min_v)
+		{
+			memset(pDst_bytes + 2, 0, 6);
+			return;
+		}
+
+		const uint32_t delta = max_v - min_v;
+
+		// min_v is now 0. Compute thresholds between values by scaling max_v. It's x14 because we're adding two x7 scale factors.
+		const int t0 = delta * 13;
+		const int t1 = delta * 11;
+		const int t2 = delta * 9;
+		const int t3 = delta * 7;
+		const int t4 = delta * 5;
+		const int t5 = delta * 3;
+		const int t6 = delta * 1;
+
+		// BC4 floors in its divisions, which we compensate for with the 4 bias.
+		// This function is optimal for all possible inputs (i.e. it outputs the same results as checking all 8 values and choosing the closest one).
+		const int bias = 4 - min_v * 14;
+
+		static const uint32_t s_tran0[8] = { 1U      , 7U      , 6U      , 5U      , 4U      , 3U      , 2U      , 0U };
+		static const uint32_t s_tran1[8] = { 1U << 3U, 7U << 3U, 6U << 3U, 5U << 3U, 4U << 3U, 3U << 3U, 2U << 3U, 0U << 3U };
+		static const uint32_t s_tran2[8] = { 1U << 6U, 7U << 6U, 6U << 6U, 5U << 6U, 4U << 6U, 3U << 6U, 2U << 6U, 0U << 6U };
+		static const uint32_t s_tran3[8] = { 1U << 9U, 7U << 9U, 6U << 9U, 5U << 9U, 4U << 9U, 3U << 9U, 2U << 9U, 0U << 9U };
+
+		uint64_t a0, a1, a2, a3;
+		{
+			const int v0 = pPixels[0 * stride] * 14 + bias;
+			const int v1 = pPixels[1 * stride] * 14 + bias;
+			const int v2 = pPixels[2 * stride] * 14 + bias;
+			const int v3 = pPixels[3 * stride] * 14 + bias;
+			a0 = s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)];
+			a1 = s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)];
+			a2 = s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)];
+			a3 = s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)];
+		}
+
+		{
+			const int v0 = pPixels[4 * stride] * 14 + bias;
+			const int v1 = pPixels[5 * stride] * 14 + bias;
+			const int v2 = pPixels[6 * stride] * 14 + bias;
+			const int v3 = pPixels[7 * stride] * 14 + bias;
+			a0 |= (s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)] << 12U);
+			a1 |= (s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)] << 12U);
+			a2 |= (s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)] << 12U);
+			a3 |= (s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)] << 12U);
+		}
+		
+		{
+			const int v0 = pPixels[8 * stride] * 14 + bias;
+			const int v1 = pPixels[9 * stride] * 14 + bias;
+			const int v2 = pPixels[10 * stride] * 14 + bias;
+			const int v3 = pPixels[11 * stride] * 14 + bias;
+			a0 |= (((uint64_t)s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]) << 24U);
+			a1 |= (((uint64_t)s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]) << 24U);
+			a2 |= (((uint64_t)s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]) << 24U);
+			a3 |= (((uint64_t)s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]) << 24U);
+		}
+
+		{
+			const int v0 = pPixels[12 * stride] * 14 + bias;
+			const int v1 = pPixels[13 * stride] * 14 + bias;
+			const int v2 = pPixels[14 * stride] * 14 + bias;
+			const int v3 = pPixels[15 * stride] * 14 + bias;
+			a0 |= (((uint64_t)s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]) << 36U);
+			a1 |= (((uint64_t)s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]) << 36U);
+			a2 |= (((uint64_t)s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]) << 36U);
+			a3 |= (((uint64_t)s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]) << 36U);
+		}
+
+		const uint64_t f = a0 | a1 | a2 | a3;
+		
+		pDst_bytes[2] = (uint8_t)f;
+		pDst_bytes[3] = (uint8_t)(f >> 8U);
+		pDst_bytes[4] = (uint8_t)(f >> 16U);
+		pDst_bytes[5] = (uint8_t)(f >> 24U);
+		pDst_bytes[6] = (uint8_t)(f >> 32U);
+		pDst_bytes[7] = (uint8_t)(f >> 40U);
+	}
+
+	static void bc1_find_sels(const color32 *pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16])
+	{
+		uint32_t block_r[4], block_g[4], block_b[4];
+
+		block_r[0] = (lr << 3) | (lr >> 2); block_g[0] = (lg << 2) | (lg >> 4);	block_b[0] = (lb << 3) | (lb >> 2);
+		block_r[3] = (hr << 3) | (hr >> 2);	block_g[3] = (hg << 2) | (hg >> 4);	block_b[3] = (hb << 3) | (hb >> 2);
+		block_r[1] = (block_r[0] * 2 + block_r[3]) / 3;	block_g[1] = (block_g[0] * 2 + block_g[3]) / 3;	block_b[1] = (block_b[0] * 2 + block_b[3]) / 3;
+		block_r[2] = (block_r[3] * 2 + block_r[0]) / 3;	block_g[2] = (block_g[3] * 2 + block_g[0]) / 3;	block_b[2] = (block_b[3] * 2 + block_b[0]) / 3;
+
+		int ar = block_r[3] - block_r[0], ag = block_g[3] - block_g[0], ab = block_b[3] - block_b[0];
+
+		int dots[4];
+		for (uint32_t i = 0; i < 4; i++)
+			dots[i] = (int)block_r[i] * ar + (int)block_g[i] * ag + (int)block_b[i] * ab;
+				
+		int t0 = dots[0] + dots[1], t1 = dots[1] + dots[2], t2 = dots[2] + dots[3];
+
+		ar *= 2; ag *= 2; ab *= 2;
+
+		for (uint32_t i = 0; i < 16; i++)
+		{
+			const int d = pSrc_pixels[i].r * ar + pSrc_pixels[i].g * ag + pSrc_pixels[i].b * ab;
+			static const uint8_t s_sels[4] = { 3, 2, 1, 0 };
+		
+			// Rounding matters here!
+			// d <= t0: <=, not <, to the later LS step "sees" a wider range of selectors. It matters for quality.
+			sels[i] = s_sels[(d <= t0) + (d < t1) + (d < t2)];
+		}
+	}
+
+	static inline void bc1_find_sels_2(const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16])
+	{
+		uint32_t block_r[4], block_g[4], block_b[4];
+
+		block_r[0] = (lr << 3) | (lr >> 2); block_g[0] = (lg << 2) | (lg >> 4);	block_b[0] = (lb << 3) | (lb >> 2);
+		block_r[3] = (hr << 3) | (hr >> 2);	block_g[3] = (hg << 2) | (hg >> 4);	block_b[3] = (hb << 3) | (hb >> 2);
+		block_r[1] = (block_r[0] * 2 + block_r[3]) / 3;	block_g[1] = (block_g[0] * 2 + block_g[3]) / 3;	block_b[1] = (block_b[0] * 2 + block_b[3]) / 3;
+		block_r[2] = (block_r[3] * 2 + block_r[0]) / 3;	block_g[2] = (block_g[3] * 2 + block_g[0]) / 3;	block_b[2] = (block_b[3] * 2 + block_b[0]) / 3;
+
+		int ar = block_r[3] - block_r[0], ag = block_g[3] - block_g[0], ab = block_b[3] - block_b[0];
+
+		int dots[4];
+		for (uint32_t i = 0; i < 4; i++)
+			dots[i] = (int)block_r[i] * ar + (int)block_g[i] * ag + (int)block_b[i] * ab;
+
+		int t0 = dots[0] + dots[1], t1 = dots[1] + dots[2], t2 = dots[2] + dots[3];
+
+		ar *= 2; ag *= 2; ab *= 2;
+
+		static const uint8_t s_sels[4] = { 3, 2, 1, 0 };
+
+		for (uint32_t i = 0; i < 16; i += 4)
+		{
+			const int d0 = pSrc_pixels[i+0].r * ar + pSrc_pixels[i+0].g * ag + pSrc_pixels[i+0].b * ab;
+			const int d1 = pSrc_pixels[i+1].r * ar + pSrc_pixels[i+1].g * ag + pSrc_pixels[i+1].b * ab;
+			const int d2 = pSrc_pixels[i+2].r * ar + pSrc_pixels[i+2].g * ag + pSrc_pixels[i+2].b * ab;
+			const int d3 = pSrc_pixels[i+3].r * ar + pSrc_pixels[i+3].g * ag + pSrc_pixels[i+3].b * ab;
+
+			sels[i+0] = s_sels[(d0 <= t0) + (d0 < t1) + (d0 < t2)];
+			sels[i+1] = s_sels[(d1 <= t0) + (d1 < t1) + (d1 < t2)];
+			sels[i+2] = s_sels[(d2 <= t0) + (d2 < t1) + (d2 < t2)];
+			sels[i+3] = s_sels[(d3 <= t0) + (d3 < t1) + (d3 < t2)];
+		}
+	}
+
+	struct vec3F { float c[3]; };
+		
+	static bool compute_least_squares_endpoints_rgb(const color32* pColors, const uint8_t* pSelectors, vec3F* pXl, vec3F* pXh)
+	{
+		// Derived from bc7enc16's LS function.
+		// Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf 
+		// I did this in matrix form first, expanded out all the ops, then optimized it a bit.
+		uint32_t uq00_r = 0, uq10_r = 0, ut_r = 0, uq00_g = 0, uq10_g = 0, ut_g = 0, uq00_b = 0, uq10_b = 0, ut_b = 0;
+
+		// This table is: 9 * (w * w), 9 * ((1.0f - w) * w), 9 * ((1.0f - w) * (1.0f - w))
+		// where w is [0,1/3,2/3,1]. 9 is the perfect multiplier.
+		static const uint32_t s_weight_vals[4] = { 0x000009, 0x010204, 0x040201, 0x090000 };
+
+		uint32_t weight_accum = 0;
+		for (uint32_t i = 0; i < 16; i++)
+		{
+			const uint32_t r = pColors[i].c[0], g = pColors[i].c[1], b = pColors[i].c[2];
+			const uint32_t sel = pSelectors[i];
+			ut_r += r;
+			ut_g += g;
+			ut_b += b;
+			weight_accum += s_weight_vals[sel];
+			uq00_r += sel * r;
+			uq00_g += sel * g;
+			uq00_b += sel * b;
+		}
+
+		float q00_r = (float)uq00_r, q10_r = (float)uq10_r, t_r = (float)ut_r;
+		float q00_g = (float)uq00_g, q10_g = (float)uq10_g, t_g = (float)ut_g;
+		float q00_b = (float)uq00_b, q10_b = (float)uq10_b, t_b = (float)ut_b;
+
+		q10_r = t_r * 3.0f - q00_r;
+		q10_g = t_g * 3.0f - q00_g;
+		q10_b = t_b * 3.0f - q00_b;
+
+		float z00 = (float)((weight_accum >> 16) & 0xFF);
+		float z10 = (float)((weight_accum >> 8) & 0xFF);
+		float z11 = (float)(weight_accum & 0xFF);
+		float z01 = z10;
+
+		float det = z00 * z11 - z01 * z10;
+		if (fabs(det) < 1e-8f)
+			return false;
+
+		det = 3.0f / det;
+
+		float iz00, iz01, iz10, iz11;
+		iz00 = z11 * det;
+		iz01 = -z01 * det;
+		iz10 = -z10 * det;
+		iz11 = z00 * det;
+
+		pXl->c[0] = iz00 * q00_r + iz01 * q10_r; pXh->c[0] = iz10 * q00_r + iz11 * q10_r;
+		pXl->c[1] = iz00 * q00_g + iz01 * q10_g; pXh->c[1] = iz10 * q00_g + iz11 * q10_g;
+		pXl->c[2] = iz00 * q00_b + iz01 * q10_b; pXh->c[2] = iz10 * q00_b + iz11 * q10_b;
+
+		// Check and fix channel singularities - might not be needed, but is in UASTC's encoder.
+		for (uint32_t c = 0; c < 3; c++)
+		{
+			if ((pXl->c[c] < 0.0f) || (pXh->c[c] > 255.0f))
+			{
+				uint32_t lo_v = UINT32_MAX, hi_v = 0;
+				for (uint32_t i = 0; i < 16; i++)
+				{
+					lo_v = basisu::minimumu(lo_v, pColors[i].c[c]);
+					hi_v = basisu::maximumu(hi_v, pColors[i].c[c]);
+				}
+
+				if (lo_v == hi_v)
+				{
+					pXl->c[c] = (float)lo_v;
+					pXh->c[c] = (float)hi_v;
+				}
+			}
+		}
+
+		return true;
+	}
+
+	void encode_bc1_solid_block(void* pDst, uint32_t fr, uint32_t fg, uint32_t fb) 
+	{
+		dxt1_block* pDst_block = static_cast<dxt1_block*>(pDst);
+
+		uint32_t mask = 0xAA;
+		uint32_t max16 = (g_bc1_match5_equals_1[fr].m_hi << 11) | (g_bc1_match6_equals_1[fg].m_hi << 5) | g_bc1_match5_equals_1[fb].m_hi;
+		uint32_t min16 = (g_bc1_match5_equals_1[fr].m_lo << 11) | (g_bc1_match6_equals_1[fg].m_lo << 5) | g_bc1_match5_equals_1[fb].m_lo;
+
+		if (min16 == max16)
+		{
+			// Always forbid 3 color blocks
+			// This is to guarantee that BC3 blocks never use punchthrough alpha (3 color) mode, which isn't supported on some (all?) GPU's.
+			mask = 0;
+
+			// Make l > h
+			if (min16 > 0)
+				min16--;
+			else
+			{
+				// l = h = 0
+				assert(min16 == max16 && max16 == 0);
+
+				max16 = 1;
+				min16 = 0;
+				mask = 0x55;
+			}
+
+			assert(max16 > min16);
+		}
+
+		if (max16 < min16)
+		{
+			std::swap(max16, min16);
+			mask ^= 0x55;
+		}
+
+		pDst_block->set_low_color(static_cast<uint16_t>(max16));
+		pDst_block->set_high_color(static_cast<uint16_t>(min16));
+		pDst_block->m_selectors[0] = static_cast<uint8_t>(mask);
+		pDst_block->m_selectors[1] = static_cast<uint8_t>(mask);
+		pDst_block->m_selectors[2] = static_cast<uint8_t>(mask);
+		pDst_block->m_selectors[3] = static_cast<uint8_t>(mask);
+	}
+
+	static inline uint8_t to_5(uint32_t v) { v = v * 31 + 128; return (uint8_t)((v + (v >> 8)) >> 8); }
+	static inline uint8_t to_6(uint32_t v) { v = v * 63 + 128; return (uint8_t)((v + (v >> 8)) >> 8); }
+
+	// Good references: squish library, stb_dxt.
+	void encode_bc1(void* pDst, const uint8_t* pPixels, uint32_t flags)
+	{
+		const color32* pSrc_pixels = (const color32*)pPixels;
+		dxt1_block* pDst_block = static_cast<dxt1_block*>(pDst);
+		
+		int avg_r = -1, avg_g = 0, avg_b = 0;
+		int lr = 0, lg = 0, lb = 0, hr = 0, hg = 0, hb = 0;
+		uint8_t sels[16];
+		
+		const bool use_sels = (flags & cEncodeBC1UseSelectors) != 0;
+		if (use_sels)
+		{
+			// Caller is jamming in their own selectors for us to try.
+			const uint32_t s = pDst_block->m_selectors[0] | (pDst_block->m_selectors[1] << 8) | (pDst_block->m_selectors[2] << 16) | (pDst_block->m_selectors[3] << 24);
+			
+			static const uint8_t s_sel_tran[4] = { 0, 3, 1, 2 };
+			
+			for (uint32_t i = 0; i < 16; i++)
+				sels[i] = s_sel_tran[(s >> (i * 2)) & 3];
+		}
+		else
+		{
+			const uint32_t fr = pSrc_pixels[0].r, fg = pSrc_pixels[0].g, fb = pSrc_pixels[0].b;
+
+			uint32_t j;
+			for (j = 1; j < 16; j++)
+				if ((pSrc_pixels[j].r != fr) || (pSrc_pixels[j].g != fg) || (pSrc_pixels[j].b != fb))
+					break;
+						
+			if (j == 16)
+			{
+				encode_bc1_solid_block(pDst, fr, fg, fb);
+				return;
+			}
+			
+			// Select 2 colors along the principle axis. (There must be a faster/simpler way.)
+			int total_r = fr, total_g = fg, total_b = fb;
+			int max_r = fr, max_g = fg, max_b = fb;
+			int min_r = fr, min_g = fg, min_b = fb;
+			for (uint32_t i = 1; i < 16; i++)
+			{
+				const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b;
+				max_r = basisu::maximum(max_r, r); max_g = basisu::maximum(max_g, g); max_b = basisu::maximum(max_b, b);
+				min_r = basisu::minimum(min_r, r); min_g = basisu::minimum(min_g, g); min_b = basisu::minimum(min_b, b);
+				total_r += r; total_g += g; total_b += b;
+			}
+
+			avg_r = (total_r + 8) >> 4;
+			avg_g = (total_g + 8) >> 4;
+			avg_b = (total_b + 8) >> 4;
+
+			int icov[6] = { 0, 0, 0, 0, 0, 0 };
+			for (uint32_t i = 0; i < 16; i++)
+			{
+				int r = (int)pSrc_pixels[i].r - avg_r;
+				int g = (int)pSrc_pixels[i].g - avg_g;
+				int b = (int)pSrc_pixels[i].b - avg_b;
+				icov[0] += r * r;
+				icov[1] += r * g;
+				icov[2] += r * b;
+				icov[3] += g * g;
+				icov[4] += g * b;
+				icov[5] += b * b;
+			}
+
+			float cov[6];
+			for (uint32_t i = 0; i < 6; i++)
+				cov[i] = static_cast<float>(icov[i])* (1.0f / 255.0f);
+			
+#if 0
+			// Seems silly to use full PCA to choose 2 colors. The diff in avg. PSNR between using PCA vs. not is small (~.025 difference).
+			// TODO: Try 2 or 3 different normalized diagonal vectors, choose the one that results in the largest dot delta
+			int saxis_r = max_r - min_r;
+			int saxis_g = max_g - min_g;
+			int saxis_b = max_b - min_b;
+#else
+			float xr = (float)(max_r - min_r);
+			float xg = (float)(max_g - min_g);
+			float xb = (float)(max_b - min_b);
+			//float xr = (float)(max_r - avg_r); // max-avg is nearly the same, and doesn't require computing min's
+			//float xg = (float)(max_g - avg_g);
+			//float xb = (float)(max_b - avg_b);
+			for (uint32_t power_iter = 0; power_iter < 4; power_iter++)
+			{
+				float r = xr * cov[0] + xg * cov[1] + xb * cov[2];
+				float g = xr * cov[1] + xg * cov[3] + xb * cov[4];
+				float b = xr * cov[2] + xg * cov[4] + xb * cov[5];
+				xr = r; xg = g; xb = b;
+			}
+
+			float k = basisu::maximum(fabsf(xr), fabsf(xg), fabsf(xb));
+			int saxis_r = 306, saxis_g = 601, saxis_b = 117;
+			if (k >= 2)
+			{
+				float m = 1024.0f / k;
+				saxis_r = (int)(xr * m);
+				saxis_g = (int)(xg * m);
+				saxis_b = (int)(xb * m);
+			}
+#endif
+			
+			int low_dot = INT_MAX, high_dot = INT_MIN, low_c = 0, high_c = 0;
+			for (uint32_t i = 0; i < 16; i++)
+			{
+				int dot = pSrc_pixels[i].r * saxis_r + pSrc_pixels[i].g * saxis_g + pSrc_pixels[i].b * saxis_b;
+				if (dot < low_dot)
+				{
+					low_dot = dot;
+					low_c = i;
+				}
+				if (dot > high_dot)
+				{
+					high_dot = dot;
+					high_c = i;
+				}
+			}
+
+			lr = to_5(pSrc_pixels[low_c].r);
+			lg = to_6(pSrc_pixels[low_c].g);
+			lb = to_5(pSrc_pixels[low_c].b);
+
+			hr = to_5(pSrc_pixels[high_c].r);
+			hg = to_6(pSrc_pixels[high_c].g);
+			hb = to_5(pSrc_pixels[high_c].b);
+						
+			bc1_find_sels(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels);
+		} // if (use_sels)
+
+		const uint32_t total_ls_passes = (flags & cEncodeBC1HigherQuality) ? 3 : (flags & cEncodeBC1HighQuality ? 2 : 1);
+		for (uint32_t ls_pass = 0; ls_pass < total_ls_passes; ls_pass++)
+		{
+			// This is where the real magic happens. We have an array of candidate selectors, so let's use least squares to compute the optimal low/high endpoint colors.
+			vec3F xl, xh;
+			if (!compute_least_squares_endpoints_rgb(pSrc_pixels, sels, &xl, &xh))
+			{
+				if (avg_r < 0)
+				{
+					int total_r = 0, total_g = 0, total_b = 0;
+					for (uint32_t i = 0; i < 16; i++)
+					{
+						total_r += pSrc_pixels[i].r;
+						total_g += pSrc_pixels[i].g;
+						total_b += pSrc_pixels[i].b;
+					}
+
+					avg_r = (total_r + 8) >> 4;
+					avg_g = (total_g + 8) >> 4;
+					avg_b = (total_b + 8) >> 4;
+				}
+
+				// All selectors equal - treat it as a solid block which should always be equal or better.
+				lr = g_bc1_match5_equals_1[avg_r].m_hi;
+				lg = g_bc1_match6_equals_1[avg_g].m_hi;
+				lb = g_bc1_match5_equals_1[avg_b].m_hi;
+
+				hr = g_bc1_match5_equals_1[avg_r].m_lo;
+				hg = g_bc1_match6_equals_1[avg_g].m_lo;
+				hb = g_bc1_match5_equals_1[avg_b].m_lo;
+
+				// In high/higher quality mode, let it try again in case the optimal tables have caused the sels to diverge.
+			}
+			else
+			{
+				lr = basisu::clamp((int)((xl.c[0]) * (31.0f / 255.0f) + .5f), 0, 31);
+				lg = basisu::clamp((int)((xl.c[1]) * (63.0f / 255.0f) + .5f), 0, 63);
+				lb = basisu::clamp((int)((xl.c[2]) * (31.0f / 255.0f) + .5f), 0, 31);
+
+				hr = basisu::clamp((int)((xh.c[0]) * (31.0f / 255.0f) + .5f), 0, 31);
+				hg = basisu::clamp((int)((xh.c[1]) * (63.0f / 255.0f) + .5f), 0, 63);
+				hb = basisu::clamp((int)((xh.c[2]) * (31.0f / 255.0f) + .5f), 0, 31);
+			}
+									
+			bc1_find_sels(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels);
+		}
+
+		uint32_t lc16 = dxt1_block::pack_unscaled_color(lr, lg, lb);
+		uint32_t hc16 = dxt1_block::pack_unscaled_color(hr, hg, hb);
+				
+		// Always forbid 3 color blocks
+		if (lc16 == hc16)
+		{
+			uint8_t mask = 0;
+
+			// Make l > h
+			if (hc16 > 0)
+				hc16--;
+			else
+			{
+				// lc16 = hc16 = 0
+				assert(lc16 == hc16 && hc16 == 0);
+
+				hc16 = 0;
+				lc16 = 1;
+				mask = 0x55; // select hc16
+			}
+
+			assert(lc16 > hc16);
+			pDst_block->set_low_color(static_cast<uint16_t>(lc16));
+			pDst_block->set_high_color(static_cast<uint16_t>(hc16));
+
+			pDst_block->m_selectors[0] = mask;
+			pDst_block->m_selectors[1] = mask;
+			pDst_block->m_selectors[2] = mask;
+			pDst_block->m_selectors[3] = mask;
+		}
+		else
+		{
+			uint8_t invert_mask = 0;
+			if (lc16 < hc16)
+			{
+				std::swap(lc16, hc16);
+				invert_mask = 0x55;
+			}
+
+			assert(lc16 > hc16);
+			pDst_block->set_low_color((uint16_t)lc16);
+			pDst_block->set_high_color((uint16_t)hc16);
+
+			uint32_t packed_sels = 0;
+			static const uint8_t s_sel_trans[4] = { 0, 2, 3, 1 };
+			for (uint32_t i = 0; i < 16; i++)
+				packed_sels |= ((uint32_t)s_sel_trans[sels[i]] << (i * 2));
+
+			pDst_block->m_selectors[0] = (uint8_t)packed_sels ^ invert_mask;
+			pDst_block->m_selectors[1] = (uint8_t)(packed_sels >> 8) ^ invert_mask;
+			pDst_block->m_selectors[2] = (uint8_t)(packed_sels >> 16) ^ invert_mask;
+			pDst_block->m_selectors[3] = (uint8_t)(packed_sels >> 24) ^ invert_mask;
+		}
+	}
+		
+	void encode_bc1_alt(void* pDst, const uint8_t* pPixels, uint32_t flags)
+	{
+		const color32* pSrc_pixels = (const color32*)pPixels;
+		dxt1_block* pDst_block = static_cast<dxt1_block*>(pDst);
+
+		int avg_r = -1, avg_g = 0, avg_b = 0;
+		int lr = 0, lg = 0, lb = 0, hr = 0, hg = 0, hb = 0;
+		uint8_t sels[16];
+
+		const bool use_sels = (flags & cEncodeBC1UseSelectors) != 0;
+		if (use_sels)
+		{
+			// Caller is jamming in their own selectors for us to try.
+			const uint32_t s = pDst_block->m_selectors[0] | (pDst_block->m_selectors[1] << 8) | (pDst_block->m_selectors[2] << 16) | (pDst_block->m_selectors[3] << 24);
+
+			static const uint8_t s_sel_tran[4] = { 0, 3, 1, 2 };
+
+			for (uint32_t i = 0; i < 16; i++)
+				sels[i] = s_sel_tran[(s >> (i * 2)) & 3];
+		}
+		else
+		{
+			const uint32_t fr = pSrc_pixels[0].r, fg = pSrc_pixels[0].g, fb = pSrc_pixels[0].b;
+
+			uint32_t j;
+			for (j = 1; j < 16; j++)
+				if ((pSrc_pixels[j].r != fr) || (pSrc_pixels[j].g != fg) || (pSrc_pixels[j].b != fb))
+					break;
+
+			if (j == 16)
+			{
+				encode_bc1_solid_block(pDst, fr, fg, fb);
+				return;
+			}
+
+			// Select 2 colors along the principle axis. (There must be a faster/simpler way.)
+			int total_r = fr, total_g = fg, total_b = fb;
+			int max_r = fr, max_g = fg, max_b = fb;
+			int min_r = fr, min_g = fg, min_b = fb;
+			uint32_t grayscale_flag = (fr == fg) && (fr == fb);
+			for (uint32_t i = 1; i < 16; i++)
+			{
+				const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b;
+				grayscale_flag &= ((r == g) && (r == b));
+				max_r = basisu::maximum(max_r, r); max_g = basisu::maximum(max_g, g); max_b = basisu::maximum(max_b, b);
+				min_r = basisu::minimum(min_r, r); min_g = basisu::minimum(min_g, g); min_b = basisu::minimum(min_b, b);
+				total_r += r; total_g += g; total_b += b;
+			}
+						
+			if (grayscale_flag) 
+			{
+				// Grayscale blocks are a common enough case to specialize.
+				if ((max_r - min_r) < 2)
+				{
+					lr = lb = hr = hb = to_5(fr);
+					lg = hg = to_6(fr);
+				}
+				else
+				{
+					lr = lb = to_5(min_r);
+					lg = to_6(min_r);
+
+					hr = hb = to_5(max_r);
+					hg = to_6(max_r);
+				}
+			}
+			else
+			{
+				avg_r = (total_r + 8) >> 4;
+				avg_g = (total_g + 8) >> 4;
+				avg_b = (total_b + 8) >> 4;
+
+				// Find the shortest vector from a AABB corner to the block's average color.
+				// This is to help avoid outliers.
+
+				uint32_t dist[3][2];
+				dist[0][0] = basisu::square(min_r - avg_r) << 3; dist[0][1] = basisu::square(max_r - avg_r) << 3;
+				dist[1][0] = basisu::square(min_g - avg_g) << 3; dist[1][1] = basisu::square(max_g - avg_g) << 3;
+				dist[2][0] = basisu::square(min_b - avg_b) << 3; dist[2][1] = basisu::square(max_b - avg_b) << 3;
+
+				uint32_t min_d0 = (dist[0][0] + dist[1][0] + dist[2][0]);
+				uint32_t d4 = (dist[0][0] + dist[1][0] + dist[2][1]) | 4;
+				min_d0 = basisu::minimum(min_d0, d4);
+
+				uint32_t min_d1 = (dist[0][1] + dist[1][0] + dist[2][0]) | 1;
+				uint32_t d5 = (dist[0][1] + dist[1][0] + dist[2][1]) | 5;
+				min_d1 = basisu::minimum(min_d1, d5);
+
+				uint32_t d2 = (dist[0][0] + dist[1][1] + dist[2][0]) | 2;
+				min_d0 = basisu::minimum(min_d0, d2);
+
+				uint32_t d3 = (dist[0][1] + dist[1][1] + dist[2][0]) | 3;
+				min_d1 = basisu::minimum(min_d1, d3);
+
+				uint32_t d6 = (dist[0][0] + dist[1][1] + dist[2][1]) | 6;
+				min_d0 = basisu::minimum(min_d0, d6);
+
+				uint32_t d7 = (dist[0][1] + dist[1][1] + dist[2][1]) | 7;
+				min_d1 = basisu::minimum(min_d1, d7);
+
+				uint32_t min_d = basisu::minimum(min_d0, min_d1);
+				uint32_t best_i = min_d & 7;
+
+				int delta_r = (best_i & 1) ? (max_r - avg_r) : (avg_r - min_r);
+				int delta_g = (best_i & 2) ? (max_g - avg_g) : (avg_g - min_g);
+				int delta_b = (best_i & 4) ? (max_b - avg_b) : (avg_b - min_b);
+
+				// Note: if delta_r/g/b==0, we actually want to choose a single color, so the block average color optimization kicks in.
+				uint32_t low_c = 0, high_c = 0;
+				if ((delta_r | delta_g | delta_b) != 0)
+				{
+					// Now we have a smaller AABB going from the block's average color to a cornerpoint of the larger AABB.
+					// Project all pixels colors along the 4 vectors going from a smaller AABB cornerpoint to the opposite cornerpoint, find largest projection.
+					// One of these vectors will be a decent approximation of the block's PCA.
+					const int saxis0_r = delta_r, saxis0_g = delta_g, saxis0_b = delta_b;
+
+					int low_dot0 = INT_MAX, high_dot0 = INT_MIN;
+					int low_dot1 = INT_MAX, high_dot1 = INT_MIN;
+					int low_dot2 = INT_MAX, high_dot2 = INT_MIN;
+					int low_dot3 = INT_MAX, high_dot3 = INT_MIN;
+
+					//int low_c0, low_c1, low_c2, low_c3;
+					//int high_c0, high_c1, high_c2, high_c3;
+
+					for (uint32_t i = 0; i < 16; i++)
+					{
+						const int dotx = pSrc_pixels[i].r * saxis0_r;
+						const int doty = pSrc_pixels[i].g * saxis0_g;
+						const int dotz = pSrc_pixels[i].b * saxis0_b;
+
+						const int dot0 = ((dotz + dotx + doty) << 4) + i;
+						const int dot1 = ((dotz - dotx - doty) << 4) + i;
+						const int dot2 = ((dotz - dotx + doty) << 4) + i;
+						const int dot3 = ((dotz + dotx - doty) << 4) + i;
+
+						if (dot0 < low_dot0)
+						{
+							low_dot0 = dot0;
+							//low_c0 = i;
+						}
+						if ((dot0 ^ 15) > high_dot0)
+						{
+							high_dot0 = dot0 ^ 15;
+							//high_c0 = i;
+						}
+
+						if (dot1 < low_dot1)
+						{
+							low_dot1 = dot1;
+							//low_c1 = i;
+						}
+						if ((dot1 ^ 15) > high_dot1)
+						{
+							high_dot1 = dot1 ^ 15;
+							//high_c1 = i;
+						}
+
+						if (dot2 < low_dot2)
+						{
+							low_dot2 = dot2;
+							//low_c2 = i;
+						}
+						if ((dot2 ^ 15) > high_dot2)
+						{
+							high_dot2 = dot2 ^ 15;
+							//high_c2 = i;
+						}
+
+						if (dot3 < low_dot3)
+						{
+							low_dot3 = dot3;
+							//low_c3 = i;
+						}
+						if ((dot3 ^ 15) > high_dot3)
+						{
+							high_dot3 = dot3 ^ 15;
+							//high_c3 = i;
+						}
+					}
+
+					low_c = low_dot0 & 15;
+					high_c = ~high_dot0 & 15;
+					uint32_t r = (high_dot0 & ~15) - (low_dot0 & ~15);
+
+					uint32_t tr = (high_dot1 & ~15) - (low_dot1 & ~15);
+					if (tr > r) {
+						low_c = low_dot1 & 15;
+						high_c = ~high_dot1 & 15;
+						r = tr;
+					}
+
+					tr = (high_dot2 & ~15) - (low_dot2 & ~15);
+					if (tr > r) {
+						low_c = low_dot2 & 15;
+						high_c = ~high_dot2 & 15;
+						r = tr;
+					}
+
+					tr = (high_dot3 & ~15) - (low_dot3 & ~15);
+					if (tr > r) {
+						low_c = low_dot3 & 15;
+						high_c = ~high_dot3 & 15;
+					}
+				}
+
+				lr = to_5(pSrc_pixels[low_c].r);
+				lg = to_6(pSrc_pixels[low_c].g);
+				lb = to_5(pSrc_pixels[low_c].b);
+
+				hr = to_5(pSrc_pixels[high_c].r);
+				hg = to_6(pSrc_pixels[high_c].g);
+				hb = to_5(pSrc_pixels[high_c].b);
+			}
+
+			bc1_find_sels_2(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels);
+		} // if (use_sels)
+
+		const uint32_t total_ls_passes = (flags & cEncodeBC1HigherQuality) ? 3 : (flags & cEncodeBC1HighQuality ? 2 : 1);
+		for (uint32_t ls_pass = 0; ls_pass < total_ls_passes; ls_pass++)
+		{
+			int prev_lr = lr, prev_lg = lg, prev_lb = lb, prev_hr = hr, prev_hg = hg, prev_hb = hb;
+
+			// This is where the real magic happens. We have an array of candidate selectors, so let's use least squares to compute the optimal low/high endpoint colors.
+			vec3F xl, xh;
+			if (!compute_least_squares_endpoints_rgb(pSrc_pixels, sels, &xl, &xh))
+			{
+				if (avg_r < 0)
+				{
+					int total_r = 0, total_g = 0, total_b = 0;
+					for (uint32_t i = 0; i < 16; i++)
+					{
+						total_r += pSrc_pixels[i].r;
+						total_g += pSrc_pixels[i].g;
+						total_b += pSrc_pixels[i].b;
+					}
+
+					avg_r = (total_r + 8) >> 4;
+					avg_g = (total_g + 8) >> 4;
+					avg_b = (total_b + 8) >> 4;
+				}
+
+				// All selectors equal - treat it as a solid block which should always be equal or better.
+				lr = g_bc1_match5_equals_1[avg_r].m_hi;
+				lg = g_bc1_match6_equals_1[avg_g].m_hi;
+				lb = g_bc1_match5_equals_1[avg_b].m_hi;
+
+				hr = g_bc1_match5_equals_1[avg_r].m_lo;
+				hg = g_bc1_match6_equals_1[avg_g].m_lo;
+				hb = g_bc1_match5_equals_1[avg_b].m_lo;
+
+				// In high/higher quality mode, let it try again in case the optimal tables have caused the sels to diverge.
+			}
+			else
+			{
+				lr = basisu::clamp((int)((xl.c[0]) * (31.0f / 255.0f) + .5f), 0, 31);
+				lg = basisu::clamp((int)((xl.c[1]) * (63.0f / 255.0f) + .5f), 0, 63);
+				lb = basisu::clamp((int)((xl.c[2]) * (31.0f / 255.0f) + .5f), 0, 31);
+
+				hr = basisu::clamp((int)((xh.c[0]) * (31.0f / 255.0f) + .5f), 0, 31);
+				hg = basisu::clamp((int)((xh.c[1]) * (63.0f / 255.0f) + .5f), 0, 63);
+				hb = basisu::clamp((int)((xh.c[2]) * (31.0f / 255.0f) + .5f), 0, 31);
+			}
+
+			if ((prev_lr == lr) && (prev_lg == lg) && (prev_lb == lb) && (prev_hr == hr) && (prev_hg == hg) && (prev_hb == hb))
+				break;
+
+			bc1_find_sels_2(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels);
+		}
+
+		uint32_t lc16 = dxt1_block::pack_unscaled_color(lr, lg, lb);
+		uint32_t hc16 = dxt1_block::pack_unscaled_color(hr, hg, hb);
+
+		// Always forbid 3 color blocks
+		if (lc16 == hc16)
+		{
+			uint8_t mask = 0;
+
+			// Make l > h
+			if (hc16 > 0)
+				hc16--;
+			else
+			{
+				// lc16 = hc16 = 0
+				assert(lc16 == hc16 && hc16 == 0);
+
+				hc16 = 0;
+				lc16 = 1;
+				mask = 0x55; // select hc16
+			}
+
+			assert(lc16 > hc16);
+			pDst_block->set_low_color(static_cast<uint16_t>(lc16));
+			pDst_block->set_high_color(static_cast<uint16_t>(hc16));
+
+			pDst_block->m_selectors[0] = mask;
+			pDst_block->m_selectors[1] = mask;
+			pDst_block->m_selectors[2] = mask;
+			pDst_block->m_selectors[3] = mask;
+		}
+		else
+		{
+			uint8_t invert_mask = 0;
+			if (lc16 < hc16)
+			{
+				std::swap(lc16, hc16);
+				invert_mask = 0x55;
+			}
+
+			assert(lc16 > hc16);
+			pDst_block->set_low_color((uint16_t)lc16);
+			pDst_block->set_high_color((uint16_t)hc16);
+
+			uint32_t packed_sels = 0;
+			static const uint8_t s_sel_trans[4] = { 0, 2, 3, 1 };
+			for (uint32_t i = 0; i < 16; i++)
+				packed_sels |= ((uint32_t)s_sel_trans[sels[i]] << (i * 2));
+
+			pDst_block->m_selectors[0] = (uint8_t)packed_sels ^ invert_mask;
+			pDst_block->m_selectors[1] = (uint8_t)(packed_sels >> 8) ^ invert_mask;
+			pDst_block->m_selectors[2] = (uint8_t)(packed_sels >> 16) ^ invert_mask;
+			pDst_block->m_selectors[3] = (uint8_t)(packed_sels >> 24) ^ invert_mask;
+		}
+	}
+
+	// Scale the UASTC first subset endpoints and first plane's weight indices directly to BC1's - fastest.
+	void transcode_uastc_to_bc1_hint0(const unpacked_uastc_block& unpacked_src_blk, void* pDst)
+	{
+		const uint32_t mode = unpacked_src_blk.m_mode;
+		const astc_block_desc& astc_blk = unpacked_src_blk.m_astc;
+
+		dxt1_block& b = *static_cast<dxt1_block*>(pDst);
+
+		const uint32_t endpoint_range = g_uastc_mode_endpoint_ranges[mode];
+
+		const uint32_t total_comps = g_uastc_mode_comps[mode];
+
+		if (total_comps == 2)
+		{
+			const uint32_t l = g_astc_unquant[endpoint_range][astc_blk.m_endpoints[0]].m_unquant;
+			const uint32_t h = g_astc_unquant[endpoint_range][astc_blk.m_endpoints[1]].m_unquant;
+
+			b.set_low_color(dxt1_block::pack_color(color32(l, l, l, 255), true, 127));
+			b.set_high_color(dxt1_block::pack_color(color32(h, h, h, 255), true, 127));
+		}
+		else
+		{
+			b.set_low_color(dxt1_block::pack_color(
+				color32(g_astc_unquant[endpoint_range][astc_blk.m_endpoints[0]].m_unquant,
+					g_astc_unquant[endpoint_range][astc_blk.m_endpoints[2]].m_unquant,
+					g_astc_unquant[endpoint_range][astc_blk.m_endpoints[4]].m_unquant,
+					255), true, 127)
+			);
+
+			b.set_high_color(dxt1_block::pack_color(
+				color32(g_astc_unquant[endpoint_range][astc_blk.m_endpoints[1]].m_unquant,
+					g_astc_unquant[endpoint_range][astc_blk.m_endpoints[3]].m_unquant,
+					g_astc_unquant[endpoint_range][astc_blk.m_endpoints[5]].m_unquant,
+					255), true, 127)
+			);
+		}
+
+		if (b.get_low_color() == b.get_high_color())
+		{
+			// Always forbid 3 color blocks
+			uint16_t lc16 = (uint16_t)b.get_low_color();
+			uint16_t hc16 = (uint16_t)b.get_high_color();
+			
+			uint8_t mask = 0;
+
+			// Make l > h
+			if (hc16 > 0)
+				hc16--;
+			else
+			{
+				// lc16 = hc16 = 0
+				assert(lc16 == hc16 && hc16 == 0);
+
+				hc16 = 0;
+				lc16 = 1;
+				mask = 0x55; // select hc16
+			}
+
+			assert(lc16 > hc16);
+			b.set_low_color(static_cast<uint16_t>(lc16));
+			b.set_high_color(static_cast<uint16_t>(hc16));
+
+			b.m_selectors[0] = mask;
+			b.m_selectors[1] = mask;
+			b.m_selectors[2] = mask;
+			b.m_selectors[3] = mask;
+		}
+		else
+		{
+			bool invert = false;
+			if (b.get_low_color() < b.get_high_color())
+			{
+				std::swap(b.m_low_color[0], b.m_high_color[0]);
+				std::swap(b.m_low_color[1], b.m_high_color[1]);
+				invert = true;
+			}
+
+			const uint8_t* pTran = s_uastc_to_bc1_weights[g_uastc_mode_weight_bits[mode]];
+
+			const uint32_t plane_shift = g_uastc_mode_planes[mode] - 1;
+
+			uint32_t sels = 0;
+			for (int i = 15; i >= 0; --i)
+			{
+				uint32_t s = pTran[astc_blk.m_weights[i << plane_shift]];
+
+				if (invert)
+					s ^= 1;
+
+				sels = (sels << 2) | s;
+			}
+			b.m_selectors[0] = sels & 0xFF;
+			b.m_selectors[1] = (sels >> 8) & 0xFF;
+			b.m_selectors[2] = (sels >> 16) & 0xFF;
+			b.m_selectors[3] = (sels >> 24) & 0xFF;
+		}
+	}
+
+	// Scale the UASTC first plane's weight indices to BC1, use 1 or 2 least squares passes to compute endpoints - no PCA needed.
+	void transcode_uastc_to_bc1_hint1(const unpacked_uastc_block& unpacked_src_blk, const color32 block_pixels[4][4], void* pDst, bool high_quality)
+	{
+		const uint32_t mode = unpacked_src_blk.m_mode;
+
+		const astc_block_desc& astc_blk = unpacked_src_blk.m_astc;
+
+		dxt1_block& b = *static_cast<dxt1_block*>(pDst);
+
+		b.set_low_color(1);
+		b.set_high_color(0);
+
+		const uint8_t* pTran = s_uastc_to_bc1_weights[g_uastc_mode_weight_bits[mode]];
+
+		const uint32_t plane_shift = g_uastc_mode_planes[mode] - 1;
+
+		uint32_t sels = 0;
+		for (int i = 15; i >= 0; --i)
+		{
+			sels <<= 2;
+			sels |= pTran[astc_blk.m_weights[i << plane_shift]];
+		}
+
+		b.m_selectors[0] = sels & 0xFF;
+		b.m_selectors[1] = (sels >> 8) & 0xFF;
+		b.m_selectors[2] = (sels >> 16) & 0xFF;
+		b.m_selectors[3] = (sels >> 24) & 0xFF;
+
+		encode_bc1(&b, (const uint8_t*)&block_pixels[0][0].c[0], (high_quality ? cEncodeBC1HighQuality : 0) | cEncodeBC1UseSelectors);
+	}
+
+	bool transcode_uastc_to_bc1(const uastc_block& src_blk, void* pDst, bool high_quality)
+	{
+		unpacked_uastc_block unpacked_src_blk;
+		if (!unpack_uastc(src_blk, unpacked_src_blk, false))
+			return false;
+
+		const uint32_t mode = unpacked_src_blk.m_mode;
+
+		if (mode == UASTC_MODE_INDEX_SOLID_COLOR)
+		{
+			encode_bc1_solid_block(pDst, unpacked_src_blk.m_solid_color.r, unpacked_src_blk.m_solid_color.g, unpacked_src_blk.m_solid_color.b);
+			return true;
+		}
+
+		if ((!high_quality) && (unpacked_src_blk.m_bc1_hint0))
+			transcode_uastc_to_bc1_hint0(unpacked_src_blk, pDst);
+		else
+		{
+			color32 block_pixels[4][4];
+			const bool unpack_srgb = false;
+			if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
+				return false;
+
+			if (unpacked_src_blk.m_bc1_hint1)
+				transcode_uastc_to_bc1_hint1(unpacked_src_blk, block_pixels, pDst, high_quality);
+			else
+				encode_bc1(pDst, &block_pixels[0][0].r, high_quality ? cEncodeBC1HighQuality : 0);
+		}
+
+		return true;
+	}
+
+	static void write_bc4_solid_block(uint8_t* pDst, uint32_t a)
+	{
+		pDst[0] = (uint8_t)a;
+		pDst[1] = (uint8_t)a;
+		memset(pDst + 2, 0, 6);
+	}
+
+	bool transcode_uastc_to_bc3(const uastc_block& src_blk, void* pDst, bool high_quality)
+	{
+		unpacked_uastc_block unpacked_src_blk;
+		if (!unpack_uastc(src_blk, unpacked_src_blk, false))
+			return false;
+
+		const uint32_t mode = unpacked_src_blk.m_mode;
+
+		void* pBC4_block = pDst;
+		dxt1_block* pBC1_block = &static_cast<dxt1_block*>(pDst)[1];
+
+		if (mode == UASTC_MODE_INDEX_SOLID_COLOR)
+		{
+			write_bc4_solid_block(static_cast<uint8_t*>(pBC4_block), unpacked_src_blk.m_solid_color.a);
+			encode_bc1_solid_block(pBC1_block, unpacked_src_blk.m_solid_color.r, unpacked_src_blk.m_solid_color.g, unpacked_src_blk.m_solid_color.b);
+			return true;
+		}
+
+		color32 block_pixels[4][4];
+		const bool unpack_srgb = false;
+		if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
+			return false;
+
+		basist::encode_bc4(pBC4_block, &block_pixels[0][0].a, sizeof(color32));
+
+		if ((!high_quality) && (unpacked_src_blk.m_bc1_hint0))
+			transcode_uastc_to_bc1_hint0(unpacked_src_blk, pBC1_block);
+		else
+		{
+			if (unpacked_src_blk.m_bc1_hint1)
+				transcode_uastc_to_bc1_hint1(unpacked_src_blk, block_pixels, pBC1_block, high_quality);
+			else
+				encode_bc1(pBC1_block, &block_pixels[0][0].r, high_quality ? cEncodeBC1HighQuality : 0);
+		}
+
+		return true;
+	}
+
+	bool transcode_uastc_to_bc4(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0)
+	{
+		BASISU_NOTE_UNUSED(high_quality);
+
+		unpacked_uastc_block unpacked_src_blk;
+		if (!unpack_uastc(src_blk, unpacked_src_blk, false))
+			return false;
+
+		const uint32_t mode = unpacked_src_blk.m_mode;
+
+		void* pBC4_block = pDst;
+
+		if (mode == UASTC_MODE_INDEX_SOLID_COLOR)
+		{
+			write_bc4_solid_block(static_cast<uint8_t*>(pBC4_block), unpacked_src_blk.m_solid_color.c[chan0]);
+			return true;
+		}
+
+		color32 block_pixels[4][4];
+		const bool unpack_srgb = false;
+		if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
+			return false;
+
+		basist::encode_bc4(pBC4_block, &block_pixels[0][0].c[chan0], sizeof(color32));
+
+		return true;
+	}
+
+	bool transcode_uastc_to_bc5(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0, uint32_t chan1)
+	{
+		BASISU_NOTE_UNUSED(high_quality);
+
+		unpacked_uastc_block unpacked_src_blk;
+		if (!unpack_uastc(src_blk, unpacked_src_blk, false))
+			return false;
+
+		const uint32_t mode = unpacked_src_blk.m_mode;
+
+		void* pBC4_block0 = pDst;
+		void* pBC4_block1 = (uint8_t*)pDst + 8;
+
+		if (mode == UASTC_MODE_INDEX_SOLID_COLOR)
+		{
+			write_bc4_solid_block(static_cast<uint8_t*>(pBC4_block0), unpacked_src_blk.m_solid_color.c[chan0]);
+			write_bc4_solid_block(static_cast<uint8_t*>(pBC4_block1), unpacked_src_blk.m_solid_color.c[chan1]);
+			return true;
+		}
+
+		color32 block_pixels[4][4];
+		const bool unpack_srgb = false;
+		if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
+			return false;
+
+		basist::encode_bc4(pBC4_block0, &block_pixels[0][0].c[chan0], sizeof(color32));
+		basist::encode_bc4(pBC4_block1, &block_pixels[0][0].c[chan1], sizeof(color32));
+
+		return true;
+	}
+
+	static const uint8_t s_etc2_eac_bit_ofs[16] = { 45, 33, 21, 9, 42, 30, 18, 6, 39, 27, 15, 3,	36, 24, 12,	0 };
+
+	static void pack_eac_solid_block(eac_block& blk, uint32_t a)
+	{
+		blk.m_base = static_cast<uint8_t>(a);
+		blk.m_table = 13;
+		blk.m_multiplier = 0;
+				
+		memcpy(blk.m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4));
+
+		return;
+	}
+
+	// Only checks 4 tables.
+	static void pack_eac(eac_block& blk, const uint8_t* pPixels, uint32_t stride)
+	{
+		uint32_t min_alpha = 255, max_alpha = 0;
+		for (uint32_t i = 0; i < 16; i++)
+		{
+			const uint32_t a = pPixels[i * stride];
+			if (a < min_alpha) min_alpha = a;
+			if (a > max_alpha) max_alpha = a;
+		}
+
+		if (min_alpha == max_alpha)
+		{
+			pack_eac_solid_block(blk, min_alpha);
+			return;
+		}
+
+		const uint32_t alpha_range = max_alpha - min_alpha;
+
+		const uint32_t SINGLE_TABLE_THRESH = 5;
+		if (alpha_range <= SINGLE_TABLE_THRESH)
+		{
+			// If alpha_range <= 5 table 13 is lossless
+			int base = clamp255((int)max_alpha - 2);
+
+			blk.m_base = base;
+			blk.m_multiplier = 1;
+			blk.m_table = 13;
+
+			base -= 3;
+
+			uint64_t packed_sels = 0;
+			for (uint32_t i = 0; i < 16; i++)
+			{
+				const int a = pPixels[i * stride];
+
+				static const uint8_t s_sels[6] = { 2, 1, 0, 4, 5, 6 };
+
+				int sel = a - base;
+				assert(sel >= 0 && sel <= 5);
+
+				packed_sels |= (static_cast<uint64_t>(s_sels[sel]) << s_etc2_eac_bit_ofs[i]);
+			}
+
+			blk.set_selector_bits(packed_sels);
+
+			return;
+		}
+
+		const uint32_t T0 = 2, T1 = 8, T2 = 11, T3 = 13;
+		static const uint8_t s_tables[4] = { T0, T1, T2, T3 };
+
+		int base[4], mul[4];
+		uint32_t mul_or = 0;
+		for (uint32_t i = 0; i < 4; i++)
+		{
+			const uint32_t table = s_tables[i];
+
+			const float range = (float)(g_eac_modifier_table[table][ETC2_EAC_MAX_VALUE_SELECTOR] - g_eac_modifier_table[table][ETC2_EAC_MIN_VALUE_SELECTOR]);
+
+			base[i] = clamp255((int)roundf(basisu::lerp((float)min_alpha, (float)max_alpha, (float)(0 - g_eac_modifier_table[table][ETC2_EAC_MIN_VALUE_SELECTOR]) / range)));
+			mul[i] = clampi((int)roundf(alpha_range / range), 1, 15);
+			mul_or |= mul[i];
+		}
+
+		uint32_t total_err[4] = { 0, 0, 0, 0 };
+		uint8_t sels[4][16];
+
+		for (uint32_t i = 0; i < 16; i++)
+		{
+			const int a = pPixels[i * stride];
+
+			uint32_t l0 = UINT32_MAX, l1 = UINT32_MAX, l2 = UINT32_MAX, l3 = UINT32_MAX;
+
+			if ((a < 7) || (a > (255 - 7)))
+			{
+				for (uint32_t s = 0; s < 8; s++)
+				{
+					const int v0 = clamp255(mul[0] * g_eac_modifier_table[T0][s] + base[0]);
+					const int v1 = clamp255(mul[1] * g_eac_modifier_table[T1][s] + base[1]);
+					const int v2 = clamp255(mul[2] * g_eac_modifier_table[T2][s] + base[2]);
+					const int v3 = clamp255(mul[3] * g_eac_modifier_table[T3][s] + base[3]);
+
+					l0 = basisu::minimum(l0, (basisu::iabs(v0 - a) << 3) | s);
+					l1 = basisu::minimum(l1, (basisu::iabs(v1 - a) << 3) | s);
+					l2 = basisu::minimum(l2, (basisu::iabs(v2 - a) << 3) | s);
+					l3 = basisu::minimum(l3, (basisu::iabs(v3 - a) << 3) | s);
+				}
+			}
+			else if (mul_or == 1)
+			{
+				const int a0 = base[0] - a, a1 = base[1] - a, a2 = base[2] - a, a3 = base[3] - a;
+
+				for (uint32_t s = 0; s < 8; s++)
+				{
+					const int v0 = g_eac_modifier_table[T0][s] + a0;
+					const int v1 = g_eac_modifier_table[T1][s] + a1;
+					const int v2 = g_eac_modifier_table[T2][s] + a2;
+					const int v3 = g_eac_modifier_table[T3][s] + a3;
+
+					l0 = basisu::minimum(l0, (basisu::iabs(v0) << 3) | s);
+					l1 = basisu::minimum(l1, (basisu::iabs(v1) << 3) | s);
+					l2 = basisu::minimum(l2, (basisu::iabs(v2) << 3) | s);
+					l3 = basisu::minimum(l3, (basisu::iabs(v3) << 3) | s);
+				}
+			}
+			else
+			{
+				const int a0 = base[0] - a, a1 = base[1] - a, a2 = base[2] - a, a3 = base[3] - a;
+
+				for (uint32_t s = 0; s < 8; s++)
+				{
+					const int v0 = mul[0] * g_eac_modifier_table[T0][s] + a0;
+					const int v1 = mul[1] * g_eac_modifier_table[T1][s] + a1;
+					const int v2 = mul[2] * g_eac_modifier_table[T2][s] + a2;
+					const int v3 = mul[3] * g_eac_modifier_table[T3][s] + a3;
+
+					l0 = basisu::minimum(l0, (basisu::iabs(v0) << 3) | s);
+					l1 = basisu::minimum(l1, (basisu::iabs(v1) << 3) | s);
+					l2 = basisu::minimum(l2, (basisu::iabs(v2) << 3) | s);
+					l3 = basisu::minimum(l3, (basisu::iabs(v3) << 3) | s);
+				}
+			}
+
+			sels[0][i] = l0 & 7;
+			sels[1][i] = l1 & 7;
+			sels[2][i] = l2 & 7;
+			sels[3][i] = l3 & 7;
+
+			total_err[0] += basisu::square<uint32_t>(l0 >> 3);
+			total_err[1] += basisu::square<uint32_t>(l1 >> 3);
+			total_err[2] += basisu::square<uint32_t>(l2 >> 3);
+			total_err[3] += basisu::square<uint32_t>(l3 >> 3);
+		}
+
+		uint32_t min_err = total_err[0], min_index = 0;
+		for (uint32_t i = 1; i < 4; i++)
+		{
+			if (total_err[i] < min_err)
+			{
+				min_err = total_err[i];
+				min_index = i;
+			}
+		}
+
+		blk.m_base = base[min_index];
+		blk.m_multiplier = mul[min_index];
+		blk.m_table = s_tables[min_index];
+
+		uint64_t packed_sels = 0;
+		const uint8_t* pSels = &sels[min_index][0];
+		for (uint32_t i = 0; i < 16; i++)
+			packed_sels |= (static_cast<uint64_t>(pSels[i]) << s_etc2_eac_bit_ofs[i]);
+
+		blk.set_selector_bits(packed_sels);
+	}
+
+	// Checks all 16 tables. Around ~2 dB better vs. pack_eac(), ~1.2 dB less than near-optimal.
+	static void pack_eac_high_quality(eac_block& blk, const uint8_t* pPixels, uint32_t stride)
+	{
+		uint32_t min_alpha = 255, max_alpha = 0;
+		for (uint32_t i = 0; i < 16; i++)
+		{
+			const uint32_t a = pPixels[i * stride];
+			if (a < min_alpha) min_alpha = a;
+			if (a > max_alpha) max_alpha = a;
+		}
+
+		if (min_alpha == max_alpha)
+		{
+			pack_eac_solid_block(blk, min_alpha);
+			return;
+		}
+
+		const uint32_t alpha_range = max_alpha - min_alpha;
+
+		const uint32_t SINGLE_TABLE_THRESH = 5;
+		if (alpha_range <= SINGLE_TABLE_THRESH)
+		{
+			// If alpha_range <= 5 table 13 is lossless
+			int base = clamp255((int)max_alpha - 2);
+
+			blk.m_base = base;
+			blk.m_multiplier = 1;
+			blk.m_table = 13;
+
+			base -= 3;
+
+			uint64_t packed_sels = 0;
+			for (uint32_t i = 0; i < 16; i++)
+			{
+				const int a = pPixels[i * stride];
+
+				static const uint8_t s_sels[6] = { 2, 1, 0, 4, 5, 6 };
+
+				int sel = a - base;
+				assert(sel >= 0 && sel <= 5);
+
+				packed_sels |= (static_cast<uint64_t>(s_sels[sel]) << s_etc2_eac_bit_ofs[i]);
+			}
+
+			blk.set_selector_bits(packed_sels);
+
+			return;
+		}
+
+		int base[16], mul[16];
+		for (uint32_t table = 0; table < 16; table++)
+		{
+			const float range = (float)(g_eac_modifier_table[table][ETC2_EAC_MAX_VALUE_SELECTOR] - g_eac_modifier_table[table][ETC2_EAC_MIN_VALUE_SELECTOR]);
+
+			base[table] = clamp255((int)roundf(basisu::lerp((float)min_alpha, (float)max_alpha, (float)(0 - g_eac_modifier_table[table][ETC2_EAC_MIN_VALUE_SELECTOR]) / range)));
+			mul[table] = clampi((int)roundf(alpha_range / range), 1, 15);
+		}
+
+		uint32_t total_err[16];
+		memset(total_err, 0, sizeof(total_err));
+
+		uint8_t sels[16][16];
+
+		for (uint32_t table = 0; table < 16; table++)
+		{
+			const int8_t* pTable = &g_eac_modifier_table[table][0];
+			const int m = mul[table], b = base[table];
+
+			uint32_t prev_l = 0, prev_a = UINT32_MAX;
+
+			for (uint32_t i = 0; i < 16; i++)
+			{
+				const int a = pPixels[i * stride];
+
+				if ((uint32_t)a == prev_a)
+				{
+					sels[table][i] = prev_l & 7;
+					total_err[table] += basisu::square<uint32_t>(prev_l >> 3);
+				}
+				else
+				{
+					uint32_t l = basisu::iabs(clamp255(m * pTable[0] + b) - a) << 3;
+					l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[1] + b) - a) << 3) | 1);
+					l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[2] + b) - a) << 3) | 2);
+					l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[3] + b) - a) << 3) | 3);
+					l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[4] + b) - a) << 3) | 4);
+					l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[5] + b) - a) << 3) | 5);
+					l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[6] + b) - a) << 3) | 6);
+					l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[7] + b) - a) << 3) | 7);
+
+					sels[table][i] = l & 7;
+					total_err[table] += basisu::square<uint32_t>(l >> 3);
+
+					prev_l = l;
+					prev_a = a;
+				}
+			}
+		}
+
+		uint32_t min_err = total_err[0], min_index = 0;
+		for (uint32_t i = 1; i < 16; i++)
+		{
+			if (total_err[i] < min_err)
+			{
+				min_err = total_err[i];
+				min_index = i;
+			}
+		}
+
+		blk.m_base = base[min_index];
+		blk.m_multiplier = mul[min_index];
+		blk.m_table = min_index;
+
+		uint64_t packed_sels = 0;
+		const uint8_t* pSels = &sels[min_index][0];
+		for (uint32_t i = 0; i < 16; i++)
+			packed_sels |= (static_cast<uint64_t>(pSels[i]) << s_etc2_eac_bit_ofs[i]);
+
+		blk.set_selector_bits(packed_sels);
+	}
+
+	bool transcode_uastc_to_etc2_eac_r11(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0)
+	{
+		unpacked_uastc_block unpacked_src_blk;
+		if (!unpack_uastc(src_blk, unpacked_src_blk, false))
+			return false;
+
+		const uint32_t mode = unpacked_src_blk.m_mode;
+
+		if (mode == UASTC_MODE_INDEX_SOLID_COLOR)
+		{
+			pack_eac_solid_block(*static_cast<eac_block*>(pDst), unpacked_src_blk.m_solid_color.c[chan0]);
+			return true;
+		}
+
+		color32 block_pixels[4][4];
+		const bool unpack_srgb = false;
+		if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
+			return false;
+
+		if (chan0 == 3)
+			transcode_uastc_to_etc2_eac_a8(unpacked_src_blk, block_pixels, pDst);
+		else
+			(high_quality ? pack_eac_high_quality : pack_eac)(*static_cast<eac_block*>(pDst), &block_pixels[0][0].c[chan0], sizeof(color32));
+
+		return true;
+	}
+
+	bool transcode_uastc_to_etc2_eac_rg11(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0, uint32_t chan1)
+	{
+		unpacked_uastc_block unpacked_src_blk;
+		if (!unpack_uastc(src_blk, unpacked_src_blk, false))
+			return false;
+
+		const uint32_t mode = unpacked_src_blk.m_mode;
+
+		if (mode == UASTC_MODE_INDEX_SOLID_COLOR)
+		{
+			pack_eac_solid_block(static_cast<eac_block*>(pDst)[0], unpacked_src_blk.m_solid_color.c[chan0]);
+			pack_eac_solid_block(static_cast<eac_block*>(pDst)[1], unpacked_src_blk.m_solid_color.c[chan1]);
+			return true;
+		}
+
+		color32 block_pixels[4][4];
+		const bool unpack_srgb = false;
+		if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
+			return false;
+
+		if (chan0 == 3)
+			transcode_uastc_to_etc2_eac_a8(unpacked_src_blk, block_pixels, &static_cast<eac_block*>(pDst)[0]);
+		else
+			(high_quality ? pack_eac_high_quality : pack_eac)(static_cast<eac_block*>(pDst)[0], &block_pixels[0][0].c[chan0], sizeof(color32));
+
+		if (chan1 == 3)
+			transcode_uastc_to_etc2_eac_a8(unpacked_src_blk, block_pixels, &static_cast<eac_block*>(pDst)[1]);
+		else
+			(high_quality ? pack_eac_high_quality : pack_eac)(static_cast<eac_block*>(pDst)[1], &block_pixels[0][0].c[chan1], sizeof(color32));
+		return true;
+	}
+
+	// PVRTC1
+	static void fixup_pvrtc1_4_modulation_rgb(
+		const uastc_block* pSrc_blocks,
+		const uint32_t* pPVRTC_endpoints,
+		void* pDst_blocks,
+		uint32_t num_blocks_x, uint32_t num_blocks_y, bool from_alpha)
+	{
+		const uint32_t x_mask = num_blocks_x - 1;
+		const uint32_t y_mask = num_blocks_y - 1;
+		const uint32_t x_bits = basisu::total_bits(x_mask);
+		const uint32_t y_bits = basisu::total_bits(y_mask);
+		const uint32_t min_bits = basisu::minimum(x_bits, y_bits);
+		//const uint32_t max_bits = basisu::maximum(x_bits, y_bits);
+		const uint32_t swizzle_mask = (1 << (min_bits * 2)) - 1;
+
+		uint32_t block_index = 0;
+
+		// really 3x3
+		int e0[4][4], e1[4][4];
+
+		for (int y = 0; y < static_cast<int>(num_blocks_y); y++)
+		{
+			const uint32_t* pE_rows[3];
+
+			for (int ey = 0; ey < 3; ey++)
+			{
+				int by = y + ey - 1;
+
+				const uint32_t* pE = &pPVRTC_endpoints[(by & y_mask) * num_blocks_x];
+
+				pE_rows[ey] = pE;
+
+				for (int ex = 0; ex < 3; ex++)
+				{
+					int bx = 0 + ex - 1;
+
+					const uint32_t e = pE[bx & x_mask];
+
+					e0[ex][ey] = (get_opaque_endpoint_l0(e) * 255) / 31;
+					e1[ex][ey] = (get_opaque_endpoint_l1(e) * 255) / 31;
+				}
+			}
+
+			const uint32_t y_swizzle = (g_pvrtc_swizzle_table[y >> 8] << 16) | g_pvrtc_swizzle_table[y & 0xFF];
+
+			for (int x = 0; x < static_cast<int>(num_blocks_x); x++, block_index++)
+			{
+				const uastc_block& src_block = pSrc_blocks[block_index];
+
+				color32 block_pixels[4][4];
+				unpack_uastc(src_block, &block_pixels[0][0], false);
+				if (from_alpha)
+				{
+					// Just set RGB to alpha to avoid adding complexity below.
+					for (uint32_t i = 0; i < 16; i++)
+					{
+						const uint8_t a = ((color32*)block_pixels)[i].a;
+						((color32*)block_pixels)[i].set(a, a, a, 255);
+					}
+				}
+
+				const uint32_t x_swizzle = (g_pvrtc_swizzle_table[x >> 8] << 17) | (g_pvrtc_swizzle_table[x & 0xFF] << 1);
+
+				uint32_t swizzled = x_swizzle | y_swizzle;
+				if (num_blocks_x != num_blocks_y)
+				{
+					swizzled &= swizzle_mask;
+
+					if (num_blocks_x > num_blocks_y)
+						swizzled |= ((x >> min_bits) << (min_bits * 2));
+					else
+						swizzled |= ((y >> min_bits) << (min_bits * 2));
+				}
+
+				pvrtc4_block* pDst_block = static_cast<pvrtc4_block*>(pDst_blocks) + swizzled;
+				pDst_block->m_endpoints = pPVRTC_endpoints[block_index];
+
+				{
+					const uint32_t ex = 2;
+					int bx = x + ex - 1;
+					bx &= x_mask;
+
+#define DO_ROW(ey) \
+					{ \
+						const uint32_t e = pE_rows[ey][bx]; \
+						e0[ex][ey] = (get_opaque_endpoint_l0(e) * 255) / 31; \
+						e1[ex][ey] = (get_opaque_endpoint_l1(e) * 255) / 31; \
+					}
+
+					DO_ROW(0);
+					DO_ROW(1);
+					DO_ROW(2);
+#undef DO_ROW
+				}
+
+				uint32_t mod = 0;
+
+#define DO_PIX(lx, ly, w0, w1, w2, w3) \
+				{ \
+					int ca_l = a0 * w0 + a1 * w1 + a2 * w2 + a3 * w3; \
+					int cb_l = b0 * w0 + b1 * w1 + b2 * w2 + b3 * w3; \
+					int cl = (block_pixels[ly][lx].r + block_pixels[ly][lx].g + block_pixels[ly][lx].b) * 16; \
+					int dl = cb_l - ca_l; \
+					int vl = cl - ca_l; \
+					int p = vl * 16; \
+					if (ca_l > cb_l) { p = -p; dl = -dl; } \
+					uint32_t m = 0; \
+					if (p > 3 * dl) m = (uint32_t)(1 << ((ly) * 8 + (lx) * 2)); \
+					if (p > 8 * dl) m = (uint32_t)(2 << ((ly) * 8 + (lx) * 2)); \
+					if (p > 13 * dl) m = (uint32_t)(3 << ((ly) * 8 + (lx) * 2)); \
+					mod |= m; \
+				}
+
+				{
+					const uint32_t ex = 0, ey = 0;
+					const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
+					const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
+					DO_PIX(0, 0, 4, 4, 4, 4);
+					DO_PIX(1, 0, 2, 6, 2, 6);
+					DO_PIX(0, 1, 2, 2, 6, 6);
+					DO_PIX(1, 1, 1, 3, 3, 9);
+				}
+
+				{
+					const uint32_t ex = 1, ey = 0;
+					const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
+					const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
+					DO_PIX(2, 0, 8, 0, 8, 0);
+					DO_PIX(3, 0, 6, 2, 6, 2);
+					DO_PIX(2, 1, 4, 0, 12, 0);
+					DO_PIX(3, 1, 3, 1, 9, 3);
+				}
+
+				{
+					const uint32_t ex = 0, ey = 1;
+					const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
+					const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
+					DO_PIX(0, 2, 8, 8, 0, 0);
+					DO_PIX(1, 2, 4, 12, 0, 0);
+					DO_PIX(0, 3, 6, 6, 2, 2);
+					DO_PIX(1, 3, 3, 9, 1, 3);
+				}
+
+				{
+					const uint32_t ex = 1, ey = 1;
+					const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
+					const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
+					DO_PIX(2, 2, 16, 0, 0, 0);
+					DO_PIX(3, 2, 12, 4, 0, 0);
+					DO_PIX(2, 3, 12, 0, 4, 0);
+					DO_PIX(3, 3, 9, 3, 3, 1);
+				}
+#undef DO_PIX
+
+				pDst_block->m_modulation = mod;
+
+				e0[0][0] = e0[1][0]; e0[1][0] = e0[2][0];
+				e0[0][1] = e0[1][1]; e0[1][1] = e0[2][1];
+				e0[0][2] = e0[1][2]; e0[1][2] = e0[2][2];
+
+				e1[0][0] = e1[1][0]; e1[1][0] = e1[2][0];
+				e1[0][1] = e1[1][1]; e1[1][1] = e1[2][1];
+				e1[0][2] = e1[1][2]; e1[1][2] = e1[2][2];
+
+			} // x
+		} // y
+	}
+
+	static void fixup_pvrtc1_4_modulation_rgba(
+		const uastc_block* pSrc_blocks,
+		const uint32_t* pPVRTC_endpoints,
+		void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y)
+	{
+		const uint32_t x_mask = num_blocks_x - 1;
+		const uint32_t y_mask = num_blocks_y - 1;
+		const uint32_t x_bits = basisu::total_bits(x_mask);
+		const uint32_t y_bits = basisu::total_bits(y_mask);
+		const uint32_t min_bits = basisu::minimum(x_bits, y_bits);
+		//const uint32_t max_bits = basisu::maximum(x_bits, y_bits);
+		const uint32_t swizzle_mask = (1 << (min_bits * 2)) - 1;
+
+		uint32_t block_index = 0;
+
+		// really 3x3
+		int e0[4][4], e1[4][4];
+
+		for (int y = 0; y < static_cast<int>(num_blocks_y); y++)
+		{
+			const uint32_t* pE_rows[3];
+
+			for (int ey = 0; ey < 3; ey++)
+			{
+				int by = y + ey - 1;
+
+				const uint32_t* pE = &pPVRTC_endpoints[(by & y_mask) * num_blocks_x];
+
+				pE_rows[ey] = pE;
+
+				for (int ex = 0; ex < 3; ex++)
+				{
+					int bx = 0 + ex - 1;
+
+					const uint32_t e = pE[bx & x_mask];
+
+					e0[ex][ey] = get_endpoint_l8(e, 0);
+					e1[ex][ey] = get_endpoint_l8(e, 1);
+				}
+			}
+
+			const uint32_t y_swizzle = (g_pvrtc_swizzle_table[y >> 8] << 16) | g_pvrtc_swizzle_table[y & 0xFF];
+
+			for (int x = 0; x < static_cast<int>(num_blocks_x); x++, block_index++)
+			{
+				const uastc_block& src_block = pSrc_blocks[block_index];
+
+				color32 block_pixels[4][4];
+				unpack_uastc(src_block, &block_pixels[0][0], false);
+
+				const uint32_t x_swizzle = (g_pvrtc_swizzle_table[x >> 8] << 17) | (g_pvrtc_swizzle_table[x & 0xFF] << 1);
+
+				uint32_t swizzled = x_swizzle | y_swizzle;
+				if (num_blocks_x != num_blocks_y)
+				{
+					swizzled &= swizzle_mask;
+
+					if (num_blocks_x > num_blocks_y)
+						swizzled |= ((x >> min_bits) << (min_bits * 2));
+					else
+						swizzled |= ((y >> min_bits) << (min_bits * 2));
+				}
+
+				pvrtc4_block* pDst_block = static_cast<pvrtc4_block*>(pDst_blocks) + swizzled;
+				pDst_block->m_endpoints = pPVRTC_endpoints[block_index];
+
+				{
+					const uint32_t ex = 2;
+					int bx = x + ex - 1;
+					bx &= x_mask;
+
+#define DO_ROW(ey) \
+					{ \
+						const uint32_t e = pE_rows[ey][bx]; \
+						e0[ex][ey] = get_endpoint_l8(e, 0); \
+						e1[ex][ey] = get_endpoint_l8(e, 1); \
+					}
+
+					DO_ROW(0);
+					DO_ROW(1);
+					DO_ROW(2);
+#undef DO_ROW
+				}
+
+				uint32_t mod = 0;
+
+#define DO_PIX(lx, ly, w0, w1, w2, w3) \
+				{ \
+					int ca_l = a0 * w0 + a1 * w1 + a2 * w2 + a3 * w3; \
+					int cb_l = b0 * w0 + b1 * w1 + b2 * w2 + b3 * w3; \
+					int cl = 16 * (block_pixels[ly][lx].r + block_pixels[ly][lx].g + block_pixels[ly][lx].b + block_pixels[ly][lx].a); \
+					int dl = cb_l - ca_l; \
+					int vl = cl - ca_l; \
+					int p = vl * 16; \
+					if (ca_l > cb_l) { p = -p; dl = -dl; } \
+					uint32_t m = 0; \
+					if (p > 3 * dl) m = (uint32_t)(1 << ((ly) * 8 + (lx) * 2)); \
+					if (p > 8 * dl) m = (uint32_t)(2 << ((ly) * 8 + (lx) * 2)); \
+					if (p > 13 * dl) m = (uint32_t)(3 << ((ly) * 8 + (lx) * 2)); \
+					mod |= m; \
+				}
+
+				{
+					const uint32_t ex = 0, ey = 0;
+					const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
+					const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
+					DO_PIX(0, 0, 4, 4, 4, 4);
+					DO_PIX(1, 0, 2, 6, 2, 6);
+					DO_PIX(0, 1, 2, 2, 6, 6);
+					DO_PIX(1, 1, 1, 3, 3, 9);
+				}
+
+				{
+					const uint32_t ex = 1, ey = 0;
+					const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
+					const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
+					DO_PIX(2, 0, 8, 0, 8, 0);
+					DO_PIX(3, 0, 6, 2, 6, 2);
+					DO_PIX(2, 1, 4, 0, 12, 0);
+					DO_PIX(3, 1, 3, 1, 9, 3);
+				}
+
+				{
+					const uint32_t ex = 0, ey = 1;
+					const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
+					const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
+					DO_PIX(0, 2, 8, 8, 0, 0);
+					DO_PIX(1, 2, 4, 12, 0, 0);
+					DO_PIX(0, 3, 6, 6, 2, 2);
+					DO_PIX(1, 3, 3, 9, 1, 3);
+				}
+
+				{
+					const uint32_t ex = 1, ey = 1;
+					const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
+					const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
+					DO_PIX(2, 2, 16, 0, 0, 0);
+					DO_PIX(3, 2, 12, 4, 0, 0);
+					DO_PIX(2, 3, 12, 0, 4, 0);
+					DO_PIX(3, 3, 9, 3, 3, 1);
+				}
+#undef DO_PIX
+
+				pDst_block->m_modulation = mod;
+
+				e0[0][0] = e0[1][0]; e0[1][0] = e0[2][0];
+				e0[0][1] = e0[1][1]; e0[1][1] = e0[2][1];
+				e0[0][2] = e0[1][2]; e0[1][2] = e0[2][2];
+
+				e1[0][0] = e1[1][0]; e1[1][0] = e1[2][0];
+				e1[0][1] = e1[1][1]; e1[1][1] = e1[2][1];
+				e1[0][2] = e1[1][2]; e1[1][2] = e1[2][2];
+
+			} // x
+		} // y
+	}
+
+	bool transcode_uastc_to_pvrtc1_4_rgb(const uastc_block* pSrc_blocks, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, bool high_quality, bool from_alpha)
+	{
+		BASISU_NOTE_UNUSED(high_quality);
+
+		if ((!num_blocks_x) || (!num_blocks_y))
+			return false;
+
+		const uint32_t width = num_blocks_x * 4;
+		const uint32_t height = num_blocks_y * 4;
+		if (!basisu::is_pow2(width) || !basisu::is_pow2(height))
+			return false;
+
+		basisu::vector<uint32_t> temp_endpoints(num_blocks_x * num_blocks_y);
+
+		for (uint32_t y = 0; y < num_blocks_y; y++)
+		{
+			for (uint32_t x = 0; x < num_blocks_x; x++)
+			{
+				color32 block_pixels[16];
+				if (!unpack_uastc(pSrc_blocks[x + y * num_blocks_x], block_pixels, false))
+					return false;
+
+				// Get block's RGB bounding box 
+				color32 low_color(255, 255, 255, 255), high_color(0, 0, 0, 0);
+
+				if (from_alpha)
+				{
+					uint32_t low_a = 255, high_a = 0;
+					for (uint32_t i = 0; i < 16; i++)
+					{
+						low_a = basisu::minimum<uint32_t>(low_a, block_pixels[i].a);
+						high_a = basisu::maximum<uint32_t>(high_a, block_pixels[i].a);
+					}
+					low_color.set(low_a, low_a, low_a, 255);
+					high_color.set(high_a, high_a, high_a, 255);
+				}
+				else
+				{
+					for (uint32_t i = 0; i < 16; i++)
+					{
+						low_color = color32::comp_min(low_color, block_pixels[i]);
+						high_color = color32::comp_max(high_color, block_pixels[i]);
+					}
+				}
+
+				// Set PVRTC1 endpoints to floor/ceil of bounding box's coordinates.
+				pvrtc4_block temp;
+				temp.set_opaque_endpoint_floor(0, low_color);
+				temp.set_opaque_endpoint_ceil(1, high_color);
+
+				temp_endpoints[x + y * num_blocks_x] = temp.m_endpoints;
+			}
+		}
+
+		fixup_pvrtc1_4_modulation_rgb(pSrc_blocks, &temp_endpoints[0], pDst_blocks, num_blocks_x, num_blocks_y, from_alpha);
+
+		return true;
+	}
+
+	bool transcode_uastc_to_pvrtc1_4_rgba(const uastc_block* pSrc_blocks, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, bool high_quality)
+	{
+		BASISU_NOTE_UNUSED(high_quality);
+
+		if ((!num_blocks_x) || (!num_blocks_y))
+			return false;
+
+		const uint32_t width = num_blocks_x * 4;
+		const uint32_t height = num_blocks_y * 4;
+		if (!basisu::is_pow2(width) || !basisu::is_pow2(height))
+			return false;
+
+		basisu::vector<uint32_t> temp_endpoints(num_blocks_x * num_blocks_y);
+
+		for (uint32_t y = 0; y < num_blocks_y; y++)
+		{
+			for (uint32_t x = 0; x < num_blocks_x; x++)
+			{
+				color32 block_pixels[16];
+				if (!unpack_uastc(pSrc_blocks[x + y * num_blocks_x], block_pixels, false))
+					return false;
+
+				// Get block's RGBA bounding box 
+				color32 low_color(255, 255, 255, 255), high_color(0, 0, 0, 0);
+
+				for (uint32_t i = 0; i < 16; i++)
+				{
+					low_color = color32::comp_min(low_color, block_pixels[i]);
+					high_color = color32::comp_max(high_color, block_pixels[i]);
+				}
+
+				// Set PVRTC1 endpoints to floor/ceil of bounding box's coordinates.
+				pvrtc4_block temp;
+				temp.set_endpoint_floor(0, low_color);
+				temp.set_endpoint_ceil(1, high_color);
+
+				temp_endpoints[x + y * num_blocks_x] = temp.m_endpoints;
+			}
+		}
+
+		fixup_pvrtc1_4_modulation_rgba(pSrc_blocks, &temp_endpoints[0], pDst_blocks, num_blocks_x, num_blocks_y);
+
+		return true;
+	}
+
+	void uastc_init()
+	{
+		for (uint32_t range = 0; range < BC7ENC_TOTAL_ASTC_RANGES; range++)
+		{
+			if (!astc_is_valid_endpoint_range(range))
+				continue;
+
+			const uint32_t levels = astc_get_levels(range);
+
+			uint32_t vals[256];
+			for (uint32_t i = 0; i < levels; i++)
+				vals[i] = (unquant_astc_endpoint_val(i, range) << 8) | i;
+
+			std::sort(vals, vals + levels);
+
+			for (uint32_t i = 0; i < levels; i++)
+			{
+				const uint32_t order = vals[i] & 0xFF;
+				const uint32_t unq = vals[i] >> 8;
+
+				g_astc_unquant[range][order].m_unquant = (uint8_t)unq;
+				g_astc_unquant[range][order].m_index = (uint8_t)i;
+
+			} // i
+		}
+
+		// TODO: Precompute?
+		// BC7 777.1
+		for (int c = 0; c < 256; c++)
+		{
+			for (uint32_t lp = 0; lp < 2; lp++)
+			{
+				endpoint_err best;
+				best.m_error = (uint16_t)UINT16_MAX;
+
+				for (uint32_t l = 0; l < 128; l++)
+				{
+					const uint32_t low = (l << 1) | lp;
+
+					for (uint32_t h = 0; h < 128; h++)
+					{
+						const uint32_t high = (h << 1) | lp;
+
+						const int k = (low * (64 - g_bc7_weights4[BC7ENC_MODE_6_OPTIMAL_INDEX]) + high * g_bc7_weights4[BC7ENC_MODE_6_OPTIMAL_INDEX] + 32) >> 6;
+
+						const int err = (k - c) * (k - c);
+						if (err < best.m_error)
+						{
+							best.m_error = (uint16_t)err;
+							best.m_lo = (uint8_t)l;
+							best.m_hi = (uint8_t)h;
+						}
+					} // h
+				} // l
+
+				g_bc7_mode_6_optimal_endpoints[c][lp] = best;
+			} // lp
+
+		} // c
+
+		// BC7 777
+		for (int c = 0; c < 256; c++)
+		{
+			endpoint_err best;
+			best.m_error = (uint16_t)UINT16_MAX;
+
+			for (uint32_t l = 0; l < 128; l++)
+			{
+				const uint32_t low = (l << 1) | (l >> 6);
+
+				for (uint32_t h = 0; h < 128; h++)
+				{
+					const uint32_t high = (h << 1) | (h >> 6);
+
+					const int k = (low * (64 - g_bc7_weights2[BC7ENC_MODE_5_OPTIMAL_INDEX]) + high * g_bc7_weights2[BC7ENC_MODE_5_OPTIMAL_INDEX] + 32) >> 6;
+
+					const int err = (k - c) * (k - c);
+					if (err < best.m_error)
+					{
+						best.m_error = (uint16_t)err;
+						best.m_lo = (uint8_t)l;
+						best.m_hi = (uint8_t)h;
+					}
+				} // h
+			} // l
+
+			g_bc7_mode_5_optimal_endpoints[c] = best;
+
+		} // c
+	}
+
+#endif // #if BASISD_SUPPORT_UASTC
+
+// ------------------------------------------------------------------------------------------------------ 
+// KTX2
+// ------------------------------------------------------------------------------------------------------ 
+
+#if BASISD_SUPPORT_KTX2
+	const uint8_t g_ktx2_file_identifier[12] = { 0xAB, 0x4B, 0x54, 0x58, 0x20, 0x32, 0x30, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A };
+
+	ktx2_transcoder::ktx2_transcoder(basist::etc1_global_selector_codebook* pGlobal_sel_codebook) :
+		m_etc1s_transcoder(pGlobal_sel_codebook)
+	{
+		clear();
+	}
+
+	void ktx2_transcoder::clear()
+	{
+		m_pData = nullptr;
+		m_data_size = 0;
+
+		memset(&m_header, 0, sizeof(m_header));
+		m_levels.clear();
+		m_dfd.clear();
+		m_key_values.clear();
+		memset(&m_etc1s_header, 0, sizeof(m_etc1s_header));
+		m_etc1s_image_descs.clear();
+				
+		m_format = basist::basis_tex_format::cETC1S;
+
+		m_dfd_color_model = 0;
+		m_dfd_color_prims = KTX2_DF_PRIMARIES_UNSPECIFIED;
+		m_dfd_transfer_func = 0;
+		m_dfd_flags = 0;
+		m_dfd_samples = 0;
+		m_dfd_chan0 = KTX2_DF_CHANNEL_UASTC_RGB;
+		m_dfd_chan1 = KTX2_DF_CHANNEL_UASTC_RGB;
+
+		m_etc1s_transcoder.clear();
+				
+		m_def_transcoder_state.clear();
+		
+		m_has_alpha = false;
+		m_is_video = false;
+	}
+
+	bool ktx2_transcoder::init(const void* pData, uint32_t data_size)
+	{
+		clear();
+
+		if (!pData)
+		{
+			BASISU_DEVEL_ERROR("ktx2_transcoder::init: pData is nullptr\n");
+			assert(0);
+			return false;
+		}
+
+		if (data_size <= sizeof(ktx2_header))
+		{
+			BASISU_DEVEL_ERROR("ktx2_transcoder::init: File is impossibly too small to be a valid KTX2 file\n");
+			return false;
+		}
+
+		if (memcmp(pData, g_ktx2_file_identifier, sizeof(g_ktx2_file_identifier)) != 0)
+		{
+			BASISU_DEVEL_ERROR("ktx2_transcoder::init: KTX2 file identifier is not present\n");
+			return false;
+		}
+
+		m_pData = static_cast<const uint8_t *>(pData);
+		m_data_size = data_size;
+
+		memcpy(&m_header, pData, sizeof(m_header));
+
+		// We only support UASTC and ETC1S
+		if (m_header.m_vk_format != KTX2_VK_FORMAT_UNDEFINED)
+		{
+			BASISU_DEVEL_ERROR("ktx2_transcoder::init: KTX2 file must be in ETC1S or UASTC format\n");
+			return false;
+		}
+
+		// 3.3: "When format is VK_FORMAT_UNDEFINED, typeSize must equal 1."
+		if (m_header.m_type_size != 1)
+		{
+			BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid type_size\n");
+			return false;
+		}
+
+		// We only currently support 2D textures (plain, cubemapped, or texture array), which is by far the most common use case.
+		// The BasisU library does not support 1D or 3D textures at all.
+		if ((m_header.m_pixel_width < 1) || (m_header.m_pixel_height < 1) || (m_header.m_pixel_depth > 0))
+		{
+			BASISU_DEVEL_ERROR("ktx2_transcoder::init: Only 2D or cubemap textures are supported\n");
+			return false;
+		}
+
+		// Face count must be 1 or 6
+		if ((m_header.m_face_count != 1) && (m_header.m_face_count != 6))
+		{
+			BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid face count, file is corrupted or invalid\n");
+			return false;
+		}
+
+		if (m_header.m_face_count > 1)
+		{
+			// 3.4: Make sure cubemaps are square.
+			if (m_header.m_pixel_width != m_header.m_pixel_height)
+			{
+				BASISU_DEVEL_ERROR("ktx2_transcoder::init: Cubemap is not square\n");
+				return false;
+			}
+		}
+		
+		// 3.7 levelCount: "levelCount=0 is allowed, except for block-compressed formats"
+		if (m_header.m_level_count < 1)
+		{
+			BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level count\n");
+			return false;
+		}
+
+		// Sanity check the level count.
+		if (m_header.m_level_count > KTX2_MAX_SUPPORTED_LEVEL_COUNT)
+		{
+			BASISU_DEVEL_ERROR("ktx2_transcoder::init: Too many levels or file is corrupted or invalid\n");
+			return false;
+		}
+
+		if (m_header.m_supercompression_scheme > KTX2_SS_ZSTANDARD)
+		{
+			BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid/unsupported supercompression or file is corrupted or invalid\n");
+			return false;
+		}
+
+		if (m_header.m_supercompression_scheme == KTX2_SS_BASISLZ)
+		{
+			if (m_header.m_sgd_byte_length <= sizeof(ktx2_etc1s_global_data_header))
+			{
+				BASISU_DEVEL_ERROR("ktx2_transcoder::init: Supercompression global data is too small\n");
+				return false;
+			}
+
+			if (m_header.m_sgd_byte_offset < sizeof(ktx2_header))
+			{
+				BASISU_DEVEL_ERROR("ktx2_transcoder::init: Supercompression global data offset is too low\n");
+				return false;
+			}
+
+			if (m_header.m_sgd_byte_offset + m_header.m_sgd_byte_length > m_data_size)
+			{
+				BASISU_DEVEL_ERROR("ktx2_transcoder::init: Supercompression global data offset and/or length is too high\n");
+				return false;
+			}
+		}
+
+		if (!m_levels.try_resize(m_header.m_level_count))
+		{
+			BASISU_DEVEL_ERROR("ktx2_transcoder::init: Out of memory\n");
+			return false;
+		}
+
+		const uint32_t level_index_size_in_bytes = basisu::maximum(1U, (uint32_t)m_header.m_level_count) * sizeof(ktx2_level_index);
+
+		if ((sizeof(ktx2_header) + level_index_size_in_bytes) > m_data_size)
+		{
+			BASISU_DEVEL_ERROR("ktx2_transcoder::init: File is too small (can't read level index array)\n");
+			return false;
+		}
+
+		memcpy(&m_levels[0], m_pData + sizeof(ktx2_header), level_index_size_in_bytes);
+		
+		// Sanity check the level offsets and byte sizes
+		for (uint32_t i = 0; i < m_levels.size(); i++)
+		{
+			if (m_levels[i].m_byte_offset < sizeof(ktx2_header))
+			{
+				BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level offset (too low)\n");
+				return false;
+			}
+
+			if (!m_levels[i].m_byte_length)
+			{
+				BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level byte length\n");
+			}
+
+			if ((m_levels[i].m_byte_offset + m_levels[i].m_byte_length) > m_data_size)
+			{
+				BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level offset and/or length\n");
+				return false;
+			}
+			
+			const uint64_t MAX_SANE_LEVEL_UNCOMP_SIZE = 2048ULL * 1024ULL * 1024ULL;
+			
+			if (m_levels[i].m_uncompressed_byte_length >= MAX_SANE_LEVEL_UNCOMP_SIZE)
+			{
+				BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level offset (too large)\n");
+				return false;
+			}
+
+			if (m_header.m_supercompression_scheme == KTX2_SS_BASISLZ)
+			{
+				if (m_levels[i].m_uncompressed_byte_length)
+				{
+					BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid uncompressed length (0)\n");
+					return false;
+				}
+			}
+			else if (m_header.m_supercompression_scheme >= KTX2_SS_ZSTANDARD)
+			{
+				if (!m_levels[i].m_uncompressed_byte_length)
+				{
+					BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid uncompressed length (1)\n");
+					return false;
+				}
+			}
+		}
+
+		const uint32_t DFD_MINIMUM_SIZE = 44, DFD_MAXIMUM_SIZE = 60;
+		if ((m_header.m_dfd_byte_length != DFD_MINIMUM_SIZE) && (m_header.m_dfd_byte_length != DFD_MAXIMUM_SIZE))
+		{
+			BASISU_DEVEL_ERROR("ktx2_transcoder::init: Unsupported DFD size\n");
+			return false;
+		}
+
+		if (((m_header.m_dfd_byte_offset + m_header.m_dfd_byte_length) > m_data_size) || (m_header.m_dfd_byte_offset < sizeof(ktx2_header)))
+		{
+			BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid DFD offset and/or length\n");
+			return false;
+		}
+				
+		const uint8_t* pDFD = m_pData + m_header.m_dfd_byte_offset;
+
+		if (!m_dfd.try_resize(m_header.m_dfd_byte_length))
+		{
+			BASISU_DEVEL_ERROR("ktx2_transcoder::init: Out of memory\n");
+			return false;
+		}
+
+		memcpy(m_dfd.data(), pDFD, m_header.m_dfd_byte_length);
+		
+		// This is all hard coded for only ETC1S and UASTC.
+		uint32_t dfd_total_size = basisu::read_le_dword(pDFD);
+		
+		// 3.10.3: Sanity check
+		if (dfd_total_size != m_header.m_dfd_byte_length)
+		{
+			BASISU_DEVEL_ERROR("ktx2_transcoder::init: DFD size validation failed (1)\n");
+			return false;
+		}
+				
+		// 3.10.3: More sanity checking
+		if (m_header.m_kvd_byte_length)
+		{
+			if (dfd_total_size != m_header.m_kvd_byte_offset - m_header.m_dfd_byte_offset)
+			{
+				BASISU_DEVEL_ERROR("ktx2_transcoder::init: DFD size validation failed (2)\n");
+				return false;
+			}
+		}
+
+		const uint32_t dfd_bits = basisu::read_le_dword(pDFD + 3 * sizeof(uint32_t));
+		const uint32_t sample_channel0 = basisu::read_le_dword(pDFD + 7 * sizeof(uint32_t));
+		 
+		m_dfd_color_model = dfd_bits & 255;
+		m_dfd_color_prims = (ktx2_df_color_primaries)((dfd_bits >> 8) & 255);
+		m_dfd_transfer_func = (dfd_bits >> 16) & 255;
+		m_dfd_flags = (dfd_bits >> 24) & 255;
+
+		// See 3.10.1.Restrictions
+		if ((m_dfd_transfer_func != KTX2_KHR_DF_TRANSFER_LINEAR) && (m_dfd_transfer_func != KTX2_KHR_DF_TRANSFER_SRGB))
+		{
+			BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid DFD transfer function\n");
+			return false;
+		}
+
+		if (m_dfd_color_model == KTX2_KDF_DF_MODEL_ETC1S)
+		{
+			m_format = basist::basis_tex_format::cETC1S;
+			
+			// 3.10.2: "Whether the image has 1 or 2 slices can be determined from the DFD�s sample count."
+			// If m_has_alpha is true it may be 2-channel RRRG or 4-channel RGBA, but we let the caller deal with that.
+			m_has_alpha = (m_header.m_dfd_byte_length == 60);
+			
+			m_dfd_samples = m_has_alpha ? 2 : 1;
+			m_dfd_chan0 = (ktx2_df_channel_id)((sample_channel0 >> 24) & 15);
+
+			if (m_has_alpha)
+			{
+				const uint32_t sample_channel1 = basisu::read_le_dword(pDFD + 11 * sizeof(uint32_t));
+				m_dfd_chan1 = (ktx2_df_channel_id)((sample_channel1 >> 24) & 15);
+			}
+		}
+		else if (m_dfd_color_model == KTX2_KDF_DF_MODEL_UASTC)
+		{
+			m_format = basist::basis_tex_format::cUASTC4x4;
+
+			m_dfd_samples = 1;
+			m_dfd_chan0 = (ktx2_df_channel_id)((sample_channel0 >> 24) & 15);
+			
+			// We're assuming "DATA" means RGBA so it has alpha.
+			m_has_alpha = (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RGBA) || (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RRRG);
+		}
+		else
+		{
+			// Unsupported DFD color model.
+			BASISU_DEVEL_ERROR("ktx2_transcoder::init: Unsupported DFD color model\n");
+			return false;
+		}
+				
+		if (!read_key_values())
+		{
+			BASISU_DEVEL_ERROR("ktx2_transcoder::init: read_key_values() failed\n");
+			return false;
+		}
+
+		// Check for a KTXanimData key
+		for (uint32_t i = 0; i < m_key_values.size(); i++)
+		{
+			if (strcmp(reinterpret_cast<const char*>(m_key_values[i].m_key.data()), "KTXanimData") == 0)
+			{
+				m_is_video = true;
+				break;
+			}
+		}
+
+		return true;
+	}
+
+	uint32_t ktx2_transcoder::get_etc1s_image_descs_image_flags(uint32_t level_index, uint32_t layer_index, uint32_t face_index) const
+	{
+		const uint32_t etc1s_image_index =
+			(level_index * basisu::maximum<uint32_t>(m_header.m_layer_count, 1) * m_header.m_face_count) +
+			layer_index * m_header.m_face_count +
+			face_index;
+
+		if (etc1s_image_index >= get_etc1s_image_descs().size())
+		{
+			assert(0);
+			return 0;
+		}
+
+		return get_etc1s_image_descs()[etc1s_image_index].m_image_flags;
+	}
+
+	const basisu::uint8_vec* ktx2_transcoder::find_key(const std::string& key_name) const
+	{
+		for (uint32_t i = 0; i < m_key_values.size(); i++)
+			if (strcmp((const char *)m_key_values[i].m_key.data(), key_name.c_str()) == 0)
+				return &m_key_values[i].m_value;
+
+		return nullptr;
+	}
+	
+	bool ktx2_transcoder::start_transcoding()
+	{
+		if (!m_pData)
+		{
+			BASISU_DEVEL_ERROR("ktx2_transcoder::start_transcoding: Must call init() first\n");
+			return false;
+		}
+
+		if (m_header.m_supercompression_scheme == KTX2_SS_BASISLZ) 
+		{
+			// Check if we've already decompressed the ETC1S global data. If so don't unpack it again.
+			if (!m_etc1s_transcoder.get_endpoints().empty())
+				return true;
+
+			if (!decompress_etc1s_global_data())
+			{
+				BASISU_DEVEL_ERROR("ktx2_transcoder::start_transcoding: decompress_etc1s_global_data() failed\n");
+				return false;
+			}
+			
+			if (!m_is_video)
+			{
+				// See if there are any P-frames. If so it must be a video, even if there wasn't a KTXanimData key.
+				// Video cannot be a cubemap, and it must be a texture array.
+				if ((m_header.m_face_count == 1) && (m_header.m_layer_count > 1))
+				{
+					for (uint32_t i = 0; i < m_etc1s_image_descs.size(); i++)
+					{
+						if (m_etc1s_image_descs[i].m_image_flags & KTX2_IMAGE_IS_P_FRAME)
+						{
+							m_is_video = true;
+							break;
+						}
+					}
+				}
+			}
+		}
+		else if (m_header.m_supercompression_scheme == KTX2_SS_ZSTANDARD)
+		{
+#if !BASISD_SUPPORT_KTX2_ZSTD
+			BASISU_DEVEL_ERROR("ktx2_transcoder::start_transcoding: File uses zstd supercompression, but zstd support was not enabled at compilation time (BASISD_SUPPORT_KTX2_ZSTD == 0)\n");
+			return false;
+#endif
+		}
+
+		return true;
+	}
+
+	bool ktx2_transcoder::get_image_level_info(ktx2_image_level_info& level_info, uint32_t level_index, uint32_t layer_index, uint32_t face_index) const
+	{
+		if (level_index >= m_levels.size())
+		{
+			BASISU_DEVEL_ERROR("ktx2_transcoder::get_image_level_info: level_index >= m_levels.size()\n");
+			return false;
+		}
+
+		if (m_header.m_face_count > 1)
+		{
+			if (face_index >= 6)
+			{
+				BASISU_DEVEL_ERROR("ktx2_transcoder::get_image_level_info: face_index >= 6\n");
+				return false;
+			}
+		}
+		else if (face_index != 0)
+		{
+			BASISU_DEVEL_ERROR("ktx2_transcoder::get_image_level_info: face_index != 0\n");
+			return false;
+		}
+
+		if (layer_index >= basisu::maximum<uint32_t>(m_header.m_layer_count, 1))
+		{
+			BASISU_DEVEL_ERROR("ktx2_transcoder::get_image_level_info: layer_index >= maximum<uint32_t>(m_header.m_layer_count, 1)\n");
+			return false;
+		}
+				
+		const uint32_t level_width = basisu::maximum<uint32_t>(m_header.m_pixel_width >> level_index, 1);
+		const uint32_t level_height = basisu::maximum<uint32_t>(m_header.m_pixel_height >> level_index, 1);
+		const uint32_t num_blocks_x = (level_width + 3) >> 2;
+		const uint32_t num_blocks_y = (level_height + 3) >> 2;
+
+		level_info.m_face_index = face_index;
+		level_info.m_layer_index = layer_index;
+		level_info.m_level_index = level_index;
+		level_info.m_orig_width = level_width;
+		level_info.m_orig_height = level_height;
+		level_info.m_width = num_blocks_x * 4;
+		level_info.m_height = num_blocks_y * 4;
+		level_info.m_num_blocks_x = num_blocks_x;
+		level_info.m_num_blocks_y = num_blocks_y;
+		level_info.m_total_blocks = num_blocks_x * num_blocks_y;
+		level_info.m_alpha_flag = m_has_alpha;
+		level_info.m_iframe_flag = false;
+		if (m_etc1s_image_descs.size())
+		{
+			const uint32_t etc1s_image_index =
+				(level_index * basisu::maximum<uint32_t>(m_header.m_layer_count, 1) * m_header.m_face_count) +
+				layer_index * m_header.m_face_count +
+				face_index;
+
+			level_info.m_iframe_flag = (m_etc1s_image_descs[etc1s_image_index].m_image_flags & KTX2_IMAGE_IS_P_FRAME) == 0;
+		}
+
+		return true;
+	}
+		
+	bool ktx2_transcoder::transcode_image_level(
+		uint32_t level_index, uint32_t layer_index, uint32_t face_index, 
+		void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
+		basist::transcoder_texture_format fmt,
+		uint32_t decode_flags, uint32_t output_row_pitch_in_blocks_or_pixels, uint32_t output_rows_in_pixels, int channel0, int channel1,
+		ktx2_transcoder_state* pState)
+	{
+		if (!m_pData)
+		{
+			BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: Must call init() first\n");
+			return false;
+		}
+
+		if (!pState)
+			pState = &m_def_transcoder_state;
+										
+		if (level_index >= m_levels.size())
+		{
+			BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: level_index >= m_levels.size()\n");
+			return false;
+		}
+
+		if (m_header.m_face_count > 1)
+		{
+			if (face_index >= 6)
+			{
+				BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: face_index >= 6\n");
+				return false;
+			}
+		}
+		else if (face_index != 0)
+		{
+			BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: face_index != 0\n");
+			return false;
+		}
+
+		if (layer_index >= basisu::maximum<uint32_t>(m_header.m_layer_count, 1))
+		{
+			BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: layer_index >= maximum<uint32_t>(m_header.m_layer_count, 1)\n");
+			return false;
+		}
+
+		const uint8_t* pComp_level_data = m_pData + m_levels[level_index].m_byte_offset;
+		uint64_t comp_level_data_size = m_levels[level_index].m_byte_length;
+		
+		const uint8_t* pUncomp_level_data = pComp_level_data;
+		uint64_t uncomp_level_data_size = comp_level_data_size;
+
+		if (uncomp_level_data_size > UINT32_MAX)
+		{
+			BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: uncomp_level_data_size > UINT32_MAX\n");
+			return false;
+		}
+				
+		if (m_header.m_supercompression_scheme == KTX2_SS_ZSTANDARD)
+		{
+			// Check if we've already decompressed this level's supercompressed data.
+			if ((int)level_index != pState->m_uncomp_data_level_index)
+			{
+				// Uncompress the entire level's supercompressed data.
+				if (!decompress_level_data(level_index, pState->m_level_uncomp_data))
+				{
+					BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: decompress_level_data() failed\n");
+					return false;
+				}
+				pState->m_uncomp_data_level_index = level_index;
+			}
+
+			pUncomp_level_data = pState->m_level_uncomp_data.data();
+			uncomp_level_data_size = pState->m_level_uncomp_data.size();
+		}
+				
+		const uint32_t level_width = basisu::maximum<uint32_t>(m_header.m_pixel_width >> level_index, 1);
+		const uint32_t level_height = basisu::maximum<uint32_t>(m_header.m_pixel_height >> level_index, 1);
+		const uint32_t num_blocks_x = (level_width + 3) >> 2;
+		const uint32_t num_blocks_y = (level_height + 3) >> 2;
+		
+		if (m_format == basist::basis_tex_format::cETC1S)
+		{
+			// Ensure start_transcoding() was called.
+			if (m_etc1s_transcoder.get_endpoints().empty())
+			{
+				BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: must call start_transcoding() first\n");
+				return false;
+			}
+
+			const uint32_t etc1s_image_index =
+				(level_index * basisu::maximum<uint32_t>(m_header.m_layer_count, 1) * m_header.m_face_count) +
+				layer_index * m_header.m_face_count +
+				face_index;
+		
+			// Sanity check
+			if (etc1s_image_index >= m_etc1s_image_descs.size())
+			{
+				BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: etc1s_image_index >= m_etc1s_image_descs.size()\n");
+				assert(0);
+				return false;
+			}
+
+			if (static_cast<uint32_t>(m_data_size) != m_data_size)
+			{
+				BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: File is too large\n");
+				return false;
+			}
+
+			const ktx2_etc1s_image_desc& image_desc = m_etc1s_image_descs[etc1s_image_index];
+
+			if (!m_etc1s_transcoder.transcode_image(fmt,
+				pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, m_pData, static_cast<uint32_t>(m_data_size),
+				num_blocks_x, num_blocks_y, level_width, level_height,
+				level_index,
+				m_levels[level_index].m_byte_offset + image_desc.m_rgb_slice_byte_offset, image_desc.m_rgb_slice_byte_length,
+				image_desc.m_alpha_slice_byte_length ? (m_levels[level_index].m_byte_offset + image_desc.m_alpha_slice_byte_offset) : 0, image_desc.m_alpha_slice_byte_length,
+				decode_flags, m_has_alpha,
+				m_is_video, output_row_pitch_in_blocks_or_pixels, &pState->m_transcoder_state, output_rows_in_pixels))
+			{
+				BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: ETC1S transcode_image() failed, this is either a bug or the file is corrupted/invalid\n");
+				return false;
+			}
+		}
+		else if (m_format == basist::basis_tex_format::cUASTC4x4)
+		{
+			// Compute length and offset to uncompressed 2D UASTC texture data, given the face/layer indices.
+			assert(uncomp_level_data_size == m_levels[level_index].m_uncompressed_byte_length);
+			const uint32_t total_2D_image_size = num_blocks_x * num_blocks_y * KTX2_UASTC_BLOCK_SIZE;
+						
+			const uint32_t uncomp_ofs = (layer_index * m_header.m_face_count + face_index) * total_2D_image_size;
+
+			// Sanity checks
+			if (uncomp_ofs >= uncomp_level_data_size)
+			{
+				BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: uncomp_ofs >= total_2D_image_size\n");
+				return false;
+			}
+
+			if ((uncomp_level_data_size - uncomp_ofs) < total_2D_image_size)
+			{
+				BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: (uncomp_level_data_size - uncomp_ofs) < total_2D_image_size\n");
+				return false;
+			}
+
+			if (!m_uastc_transcoder.transcode_image(fmt,
+				pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
+				(const uint8_t*)pUncomp_level_data + uncomp_ofs, (uint32_t)total_2D_image_size, num_blocks_x, num_blocks_y, level_width, level_height, level_index,
+				0, (uint32_t)total_2D_image_size,
+				decode_flags, m_has_alpha, m_is_video, output_row_pitch_in_blocks_or_pixels, nullptr, output_rows_in_pixels, channel0, channel1))
+			{
+				BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: UASTC transcode_image() failed, this is either a bug or the file is corrupted/invalid\n");
+				return false;
+			}
+		}
+		else
+		{
+			// Shouldn't get here.
+			BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: Internal error\n");
+			assert(0);
+			return false;
+		}
+
+		return true;
+	}
+		
+	bool ktx2_transcoder::decompress_level_data(uint32_t level_index, basisu::uint8_vec& uncomp_data)
+	{
+		const uint8_t* pComp_data = m_levels[level_index].m_byte_offset + m_pData;
+		const uint64_t comp_size = m_levels[level_index].m_byte_length;
+		
+		const uint64_t uncomp_size = m_levels[level_index].m_uncompressed_byte_length;
+
+		if (((size_t)comp_size) != comp_size)
+		{
+			BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: Compressed data too large\n");
+			return false;
+		}
+		if (((size_t)uncomp_size) != uncomp_size)
+		{
+			BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: Uncompressed data too large\n");
+			return false;
+		}
+
+		if (!uncomp_data.try_resize(uncomp_size))
+		{
+			BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: Out of memory\n");
+			return false;
+		}
+		
+		if (m_header.m_supercompression_scheme == KTX2_SS_ZSTANDARD)
+		{
+#if BASISD_SUPPORT_KTX2_ZSTD
+			size_t actualUncompSize = ZSTD_decompress(uncomp_data.data(), (size_t)uncomp_size, pComp_data, (size_t)comp_size);
+			if (ZSTD_isError(actualUncompSize))
+			{
+				BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: Zstd decompression failed, file is invalid or corrupted\n");
+				return false;
+			}
+			if (actualUncompSize != uncomp_size)
+			{
+				BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: Zstd decompression returned too few bytes, file is invalid or corrupted\n");
+				return false;
+			}
+#else
+			BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: File uses Zstd supercompression, but Zstd support was not enabled at compile time (BASISD_SUPPORT_KTX2_ZSTD is 0)\n");
+			return false;
+#endif
+		}
+
+		return true;
+	}
+		
+	bool ktx2_transcoder::decompress_etc1s_global_data()
+	{
+		// Note: we don't actually support 3D textures in here yet
+		//uint32_t layer_pixel_depth = basisu::maximum<uint32_t>(m_header.m_pixel_depth, 1);
+		//for (uint32_t i = 1; i < m_header.m_level_count; i++)
+		//	layer_pixel_depth += basisu::maximum<uint32_t>(m_header.m_pixel_depth >> i, 1);
+
+		const uint32_t image_count = basisu::maximum<uint32_t>(m_header.m_layer_count, 1) * m_header.m_face_count * m_header.m_level_count;
+		assert(image_count);
+
+		const uint8_t* pSrc = m_pData + m_header.m_sgd_byte_offset;
+
+		memcpy(&m_etc1s_header, pSrc, sizeof(ktx2_etc1s_global_data_header));
+		pSrc += sizeof(ktx2_etc1s_global_data_header);
+
+		if ((!m_etc1s_header.m_endpoints_byte_length) || (!m_etc1s_header.m_selectors_byte_length) || (!m_etc1s_header.m_tables_byte_length))
+		{
+			BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: Invalid ETC1S global data\n");
+			return false;
+		}
+
+		if ((!m_etc1s_header.m_endpoint_count) || (!m_etc1s_header.m_selector_count))
+		{
+			BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: endpoint and/or selector count is 0, file is invalid or corrupted\n");
+			return false;
+		}
+
+		// Sanity check the ETC1S header.
+		if ((sizeof(ktx2_etc1s_global_data_header) +
+			sizeof(ktx2_etc1s_image_desc) * image_count +
+			m_etc1s_header.m_endpoints_byte_length +
+			m_etc1s_header.m_selectors_byte_length +
+			m_etc1s_header.m_tables_byte_length +
+			m_etc1s_header.m_extended_byte_length) > m_header.m_sgd_byte_length)
+		{
+			BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: SGD byte length is too small, file is invalid or corrupted\n");
+			return false;
+		}
+				
+		if (!m_etc1s_image_descs.try_resize(image_count))
+		{
+			BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: Out of memory\n");
+			return false;
+		}
+		
+		memcpy(m_etc1s_image_descs.data(), pSrc, sizeof(ktx2_etc1s_image_desc) * image_count);
+		pSrc += sizeof(ktx2_etc1s_image_desc) * image_count;
+
+		// Sanity check the ETC1S image descs
+		for (uint32_t i = 0; i < image_count; i++)
+		{
+			// m_etc1s_transcoder.transcode_image() will validate the slice offsets/lengths before transcoding.
+
+			if (!m_etc1s_image_descs[i].m_rgb_slice_byte_length)
+			{
+				BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: ETC1S image descs sanity check failed (1)\n");
+				return false;
+			}
+
+			if (m_has_alpha)
+			{
+				if (!m_etc1s_image_descs[i].m_alpha_slice_byte_length)
+				{
+					BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: ETC1S image descs sanity check failed (2)\n");
+					return false;
+				}
+			}
+		}
+
+		const uint8_t* pEndpoint_data = pSrc;
+		const uint8_t* pSelector_data = pSrc + m_etc1s_header.m_endpoints_byte_length;
+		const uint8_t* pTables_data = pSrc + m_etc1s_header.m_endpoints_byte_length + m_etc1s_header.m_selectors_byte_length;
+
+		if (!m_etc1s_transcoder.decode_tables(pTables_data, m_etc1s_header.m_tables_byte_length))
+		{
+			BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: decode_tables() failed, file is invalid or corrupted\n");
+			return false;
+		}
+				
+		if (!m_etc1s_transcoder.decode_palettes(
+			m_etc1s_header.m_endpoint_count,	pEndpoint_data, m_etc1s_header.m_endpoints_byte_length,
+			m_etc1s_header.m_selector_count,	pSelector_data, m_etc1s_header.m_selectors_byte_length))
+		{
+			BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: decode_palettes() failed, file is likely corrupted\n");
+			return false;
+		}
+				
+		return true;
+	}
+
+	bool ktx2_transcoder::read_key_values()
+	{
+		if (!m_header.m_kvd_byte_length)
+		{
+			if (m_header.m_kvd_byte_offset)
+			{
+				BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Invalid KVD byte offset (it should be zero when the length is zero)\n");
+				return false;
+			}
+
+			return true;
+		}
+
+		if (m_header.m_kvd_byte_offset < sizeof(ktx2_header))
+		{
+			BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Invalid KVD byte offset\n");
+			return false;
+		}
+
+		if ((m_header.m_kvd_byte_offset + m_header.m_kvd_byte_length) > m_data_size)
+		{
+			BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Invalid KVD byte offset and/or length\n");
+			return false;
+		}
+
+		const uint8_t* pSrc = m_pData + m_header.m_kvd_byte_offset;
+		uint32_t src_left = m_header.m_kvd_byte_length;
+
+		if (!m_key_values.try_reserve(8))
+		{
+			BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n");
+			return false;
+		}
+
+		while (src_left > sizeof(uint32_t))
+		{
+			uint32_t l = basisu::read_le_dword(pSrc);
+			
+			pSrc += sizeof(uint32_t);
+			src_left -= sizeof(uint32_t);
+
+			if (l < 2)
+			{
+				BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Failed reading key value fields (0)\n");
+				return false;
+			}
+
+			if (src_left < l)
+			{
+				BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Failed reading key value fields (1)\n");
+				return false;
+			}
+
+			if (!m_key_values.try_resize(m_key_values.size() + 1))
+			{
+				BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n");
+				return false;
+			}
+			
+			basisu::uint8_vec& key_data = m_key_values.back().m_key;
+			basisu::uint8_vec& value_data = m_key_values.back().m_value;
+
+			do
+			{
+				if (!l)
+				{
+					BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Failed reading key value fields (2)\n");
+					return false;
+				}
+
+				if (!key_data.try_push_back(*pSrc++))
+				{
+					BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n");
+					return false;
+				}
+
+				src_left--;
+				l--;
+
+			} while (key_data.back());
+						
+			if (!value_data.try_resize(l))
+			{
+				BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n");
+				return false;
+			}
+
+			if (l)
+			{
+				memcpy(value_data.data(), pSrc, l);
+				pSrc += l;
+				src_left -= l;
+			}
+
+			uint32_t ofs = (uint32_t)(pSrc - m_pData) & 3;
+			uint32_t alignment_bytes = (4 - ofs) & 3;
+
+			if (src_left < alignment_bytes)
+			{
+				BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Failed reading key value fields (3)\n");
+				return false;
+			}
+
+			pSrc += alignment_bytes;
+			src_left -= alignment_bytes;
+		}
+
+		return true;
+	}
+		
+#endif // BASISD_SUPPORT_KTX2
+
+	bool basisu_transcoder_supports_ktx2()
+	{
+#if BASISD_SUPPORT_KTX2
+		return true;
+#else
+		return false;
+#endif
+	}
+
+	bool basisu_transcoder_supports_ktx2_zstd()
+	{
+#if BASISD_SUPPORT_KTX2_ZSTD
+		return true;
+#else
+		return false;
+#endif
+	}
+
+} // namespace basist
diff --git a/libkram/transcoder/basisu_transcoder.h b/libkram/transcoder/basisu_transcoder.h
new file mode 100644
index 00000000..bf3aed3d
--- /dev/null
+++ b/libkram/transcoder/basisu_transcoder.h
@@ -0,0 +1,941 @@
+// basisu_transcoder.h
+// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+// Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+// By default KTX2 support is enabled to simplify compilation. This implies the need for the Zstandard library (which we distribute as a single source file in the "zstd" directory) by default.
+// Set BASISD_SUPPORT_KTX2 to 0 to completely disable KTX2 support as well as Zstd/miniz usage which is only required for UASTC supercompression in KTX2 files.
+// Also see BASISD_SUPPORT_KTX2_ZSTD in basisu_transcoder.cpp, which individually disables Zstd usage.
+#ifndef BASISD_SUPPORT_KTX2
+	#define BASISD_SUPPORT_KTX2 1
+#endif
+
+// Set BASISD_SUPPORT_KTX2_ZSTD to 0 to disable Zstd usage and KTX2 UASTC Zstd supercompression support 
+#ifndef BASISD_SUPPORT_KTX2_ZSTD
+	#define BASISD_SUPPORT_KTX2_ZSTD 1
+#endif
+
+// Set BASISU_FORCE_DEVEL_MESSAGES to 1 to enable debug printf()'s whenever an error occurs, for easier debugging during development.
+#ifndef BASISU_FORCE_DEVEL_MESSAGES
+	#define BASISU_FORCE_DEVEL_MESSAGES 0
+#endif
+
+#include "basisu_transcoder_internal.h"
+#include "basisu_transcoder_uastc.h"
+#include "basisu_global_selector_palette.h"
+#include "basisu_file_headers.h"
+
+namespace basist
+{
+	// High-level composite texture formats supported by the transcoder.
+	// Each of these texture formats directly correspond to OpenGL/D3D/Vulkan etc. texture formats.
+	// Notes:
+	// - If you specify a texture format that supports alpha, but the .basis file doesn't have alpha, the transcoder will automatically output a 
+	// fully opaque (255) alpha channel.
+	// - The PVRTC1 texture formats only support power of 2 dimension .basis files, but this may be relaxed in a future version.
+	// - The PVRTC1 transcoders are real-time encoders, so don't expect the highest quality. We may add a slower encoder with improved quality.
+	// - These enums must be kept in sync with Javascript code that calls the transcoder.
+	enum class transcoder_texture_format
+	{
+		// Compressed formats
+
+		// ETC1-2
+		cTFETC1_RGB = 0,							// Opaque only, returns RGB or alpha data if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified
+		cTFETC2_RGBA = 1,							// Opaque+alpha, ETC2_EAC_A8 block followed by a ETC1 block, alpha channel will be opaque for opaque .basis files
+
+		// BC1-5, BC7 (desktop, some mobile devices)
+		cTFBC1_RGB = 2,							// Opaque only, no punchthrough alpha support yet, transcodes alpha slice if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified
+		cTFBC3_RGBA = 3, 							// Opaque+alpha, BC4 followed by a BC1 block, alpha channel will be opaque for opaque .basis files
+		cTFBC4_R = 4,								// Red only, alpha slice is transcoded to output if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified
+		cTFBC5_RG = 5,								// XY: Two BC4 blocks, X=R and Y=Alpha, .basis file should have alpha data (if not Y will be all 255's)
+		cTFBC7_RGBA = 6,							// RGB or RGBA, mode 5 for ETC1S, modes (1,2,3,5,6,7) for UASTC
+
+		// PVRTC1 4bpp (mobile, PowerVR devices)
+		cTFPVRTC1_4_RGB = 8,						// Opaque only, RGB or alpha if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified, nearly lowest quality of any texture format.
+		cTFPVRTC1_4_RGBA = 9,					// Opaque+alpha, most useful for simple opacity maps. If .basis file doesn't have alpha cTFPVRTC1_4_RGB will be used instead. Lowest quality of any supported texture format.
+
+		// ASTC (mobile, Intel devices, hopefully all desktop GPU's one day)
+		cTFASTC_4x4_RGBA = 10,					// Opaque+alpha, ASTC 4x4, alpha channel will be opaque for opaque .basis files. Transcoder uses RGB/RGBA/L/LA modes, void extent, and up to two ([0,47] and [0,255]) endpoint precisions.
+
+		// ATC (mobile, Adreno devices, this is a niche format)
+		cTFATC_RGB = 11,							// Opaque, RGB or alpha if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified. ATI ATC (GL_ATC_RGB_AMD)
+		cTFATC_RGBA = 12,							// Opaque+alpha, alpha channel will be opaque for opaque .basis files. ATI ATC (GL_ATC_RGBA_INTERPOLATED_ALPHA_AMD) 
+
+		// FXT1 (desktop, Intel devices, this is a super obscure format)
+		cTFFXT1_RGB = 17,							// Opaque only, uses exclusively CC_MIXED blocks. Notable for having a 8x4 block size. GL_3DFX_texture_compression_FXT1 is supported on Intel integrated GPU's (such as HD 630).
+														// Punch-through alpha is relatively easy to support, but full alpha is harder. This format is only here for completeness so opaque-only is fine for now.
+														// See the BASISU_USE_ORIGINAL_3DFX_FXT1_ENCODING macro in basisu_transcoder_internal.h.
+
+		cTFPVRTC2_4_RGB = 18,					// Opaque-only, almost BC1 quality, much faster to transcode and supports arbitrary texture dimensions (unlike PVRTC1 RGB).
+		cTFPVRTC2_4_RGBA = 19,					// Opaque+alpha, slower to encode than cTFPVRTC2_4_RGB. Premultiplied alpha is highly recommended, otherwise the color channel can leak into the alpha channel on transparent blocks.
+
+		cTFETC2_EAC_R11 = 20,					// R only (ETC2 EAC R11 unsigned)
+		cTFETC2_EAC_RG11 = 21,					// RG only (ETC2 EAC RG11 unsigned), R=opaque.r, G=alpha - for tangent space normal maps
+
+		// Uncompressed (raw pixel) formats
+		cTFRGBA32 = 13,							// 32bpp RGBA image stored in raster (not block) order in memory, R is first byte, A is last byte.
+		cTFRGB565 = 14,							// 16bpp RGB image stored in raster (not block) order in memory, R at bit position 11
+		cTFBGR565 = 15,							// 16bpp RGB image stored in raster (not block) order in memory, R at bit position 0
+		cTFRGBA4444 = 16,							// 16bpp RGBA image stored in raster (not block) order in memory, R at bit position 12, A at bit position 0
+
+		cTFTotalTextureFormats = 22,
+
+		// Old enums for compatibility with code compiled against previous versions
+		cTFETC1 = cTFETC1_RGB,
+		cTFETC2 = cTFETC2_RGBA,
+		cTFBC1 = cTFBC1_RGB,
+		cTFBC3 = cTFBC3_RGBA,
+		cTFBC4 = cTFBC4_R,
+		cTFBC5 = cTFBC5_RG,
+
+		// Previously, the caller had some control over which BC7 mode the transcoder output. We've simplified this due to UASTC, which supports numerous modes.
+		cTFBC7_M6_RGB = cTFBC7_RGBA,			// Opaque only, RGB or alpha if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified. Highest quality of all the non-ETC1 formats.
+		cTFBC7_M5_RGBA = cTFBC7_RGBA,			// Opaque+alpha, alpha channel will be opaque for opaque .basis files
+		cTFBC7_M6_OPAQUE_ONLY = cTFBC7_RGBA,
+		cTFBC7_M5 = cTFBC7_RGBA,
+		cTFBC7_ALT = 7,
+
+		cTFASTC_4x4 = cTFASTC_4x4_RGBA,
+
+		cTFATC_RGBA_INTERPOLATED_ALPHA = cTFATC_RGBA,
+	};
+
+	// For compressed texture formats, this returns the # of bytes per block. For uncompressed, it returns the # of bytes per pixel.
+	// NOTE: Previously, this function was called basis_get_bytes_per_block(), and it always returned 16*bytes_per_pixel for uncompressed formats which was confusing.
+	uint32_t basis_get_bytes_per_block_or_pixel(transcoder_texture_format fmt);
+
+	// Returns format's name in ASCII
+	const char* basis_get_format_name(transcoder_texture_format fmt);
+
+	// Returns block format name in ASCII
+	const char* basis_get_block_format_name(block_format fmt);
+
+	// Returns true if the format supports an alpha channel.
+	bool basis_transcoder_format_has_alpha(transcoder_texture_format fmt);
+
+	// Returns the basisu::texture_format corresponding to the specified transcoder_texture_format.
+	basisu::texture_format basis_get_basisu_texture_format(transcoder_texture_format fmt);
+
+	// Returns the texture type's name in ASCII.
+	const char* basis_get_texture_type_name(basis_texture_type tex_type);
+
+	// Returns true if the transcoder texture type is an uncompressed (raw pixel) format.
+	bool basis_transcoder_format_is_uncompressed(transcoder_texture_format tex_type);
+
+	// Returns the # of bytes per pixel for uncompressed formats, or 0 for block texture formats.
+	uint32_t basis_get_uncompressed_bytes_per_pixel(transcoder_texture_format fmt);
+
+	// Returns the block width for the specified texture format, which is currently either 4 or 8 for FXT1.
+	uint32_t basis_get_block_width(transcoder_texture_format tex_type);
+
+	// Returns the block height for the specified texture format, which is currently always 4.
+	uint32_t basis_get_block_height(transcoder_texture_format tex_type);
+
+	// Returns true if the specified format was enabled at compile time.
+	bool basis_is_format_supported(transcoder_texture_format tex_type, basis_tex_format fmt = basis_tex_format::cETC1S);
+
+	// Validates that the output buffer is large enough to hold the entire transcoded texture.
+	// For uncompressed texture formats, most input parameters are in pixels, not blocks. Blocks are 4x4 pixels.
+	bool basis_validate_output_buffer_size(transcoder_texture_format target_format,
+		uint32_t output_blocks_buf_size_in_blocks_or_pixels,
+		uint32_t orig_width, uint32_t orig_height,
+		uint32_t output_row_pitch_in_blocks_or_pixels,
+		uint32_t output_rows_in_pixels,
+		uint32_t total_slice_blocks);
+
+	class basisu_transcoder;
+
+	// This struct holds all state used during transcoding. For video, it needs to persist between image transcodes (it holds the previous frame).
+	// For threading you can use one state per thread.
+	struct basisu_transcoder_state
+	{
+		struct block_preds
+		{
+			uint16_t m_endpoint_index;
+			uint8_t m_pred_bits;
+		};
+
+		basisu::vector<block_preds> m_block_endpoint_preds[2];
+
+		enum { cMaxPrevFrameLevels = 16 };
+		basisu::vector<uint32_t> m_prev_frame_indices[2][cMaxPrevFrameLevels]; // [alpha_flag][level_index] 
+
+		void clear()
+		{
+			for (uint32_t i = 0; i < 2; i++)
+			{
+				m_block_endpoint_preds[i].clear();
+
+				for (uint32_t j = 0; j < cMaxPrevFrameLevels; j++)
+					m_prev_frame_indices[i][j].clear();
+			}
+		}
+	};
+
+	// Low-level helper class that does the actual transcoding.
+	class basisu_lowlevel_etc1s_transcoder
+	{
+		friend class basisu_transcoder;
+
+	public:
+		basisu_lowlevel_etc1s_transcoder(const basist::etc1_global_selector_codebook* pGlobal_sel_codebook);
+
+		void set_global_codebooks(const basisu_lowlevel_etc1s_transcoder* pGlobal_codebook) { m_pGlobal_codebook = pGlobal_codebook; }
+		const basisu_lowlevel_etc1s_transcoder* get_global_codebooks() const { return m_pGlobal_codebook; }
+
+		bool decode_palettes(
+			uint32_t num_endpoints, const uint8_t* pEndpoints_data, uint32_t endpoints_data_size,
+			uint32_t num_selectors, const uint8_t* pSelectors_data, uint32_t selectors_data_size);
+
+		bool decode_tables(const uint8_t* pTable_data, uint32_t table_data_size);
+
+		bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
+			uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const bool is_video, const bool is_alpha_slice, const uint32_t level_index, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels = 0,
+			basisu_transcoder_state* pState = nullptr, bool astc_transcode_alpha = false, void* pAlpha_blocks = nullptr, uint32_t output_rows_in_pixels = 0);
+
+		bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
+			uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const basis_file_header& header, const basis_slice_desc& slice_desc, uint32_t output_row_pitch_in_blocks_or_pixels = 0,
+			basisu_transcoder_state* pState = nullptr, bool astc_transcode_alpha = false, void* pAlpha_blocks = nullptr, uint32_t output_rows_in_pixels = 0)
+		{
+			return transcode_slice(pDst_blocks, num_blocks_x, num_blocks_y, pImage_data, image_data_size, fmt, output_block_or_pixel_stride_in_bytes, bc1_allow_threecolor_blocks,
+				header.m_tex_type == cBASISTexTypeVideoFrames, (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0, slice_desc.m_level_index,
+				slice_desc.m_orig_width, slice_desc.m_orig_height, output_row_pitch_in_blocks_or_pixels, pState,
+				astc_transcode_alpha,
+				pAlpha_blocks,
+				output_rows_in_pixels);
+		}
+
+		// Container independent transcoding
+		bool transcode_image(
+			transcoder_texture_format target_format,
+			void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
+			const uint8_t* pCompressed_data, uint32_t compressed_data_length,
+			uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index,
+			uint32_t rgb_offset, uint32_t rgb_length, uint32_t alpha_offset, uint32_t alpha_length,
+			uint32_t decode_flags = 0,
+			bool basis_file_has_alpha_slices = false,
+			bool is_video = false,
+			uint32_t output_row_pitch_in_blocks_or_pixels = 0,
+			basisu_transcoder_state* pState = nullptr,
+			uint32_t output_rows_in_pixels = 0);
+
+		void clear()
+		{
+			m_local_endpoints.clear();
+			m_local_selectors.clear();
+			m_endpoint_pred_model.clear();
+			m_delta_endpoint_model.clear();
+			m_selector_model.clear();
+			m_selector_history_buf_rle_model.clear();
+			m_selector_history_buf_size = 0;
+		}
+
+		// Low-level methods
+		typedef basisu::vector<endpoint> endpoint_vec;
+		const endpoint_vec& get_endpoints() const { return m_local_endpoints; }
+
+		typedef basisu::vector<selector> selector_vec;
+		const selector_vec& get_selectors() const { return m_local_selectors; }
+
+		const etc1_global_selector_codebook* get_global_sel_codebook() const { return m_pGlobal_sel_codebook; }
+
+	private:
+		const basisu_lowlevel_etc1s_transcoder* m_pGlobal_codebook;
+
+		endpoint_vec m_local_endpoints;
+		selector_vec m_local_selectors;
+
+		const etc1_global_selector_codebook* m_pGlobal_sel_codebook;
+
+		huffman_decoding_table m_endpoint_pred_model, m_delta_endpoint_model, m_selector_model, m_selector_history_buf_rle_model;
+
+		uint32_t m_selector_history_buf_size;
+
+		basisu_transcoder_state m_def_state;
+	};
+
+	enum basisu_decode_flags
+	{
+		// PVRTC1: decode non-pow2 ETC1S texture level to the next larger power of 2 (not implemented yet, but we're going to support it). Ignored if the slice's dimensions are already a power of 2.
+		cDecodeFlagsPVRTCDecodeToNextPow2 = 2,
+
+		// When decoding to an opaque texture format, if the basis file has alpha, decode the alpha slice instead of the color slice to the output texture format.
+		// This is primarily to allow decoding of textures with alpha to multiple ETC1 textures (one for color, another for alpha).
+		cDecodeFlagsTranscodeAlphaDataToOpaqueFormats = 4,
+
+		// Forbid usage of BC1 3 color blocks (we don't support BC1 punchthrough alpha yet).
+		// This flag is used internally when decoding to BC3.
+		cDecodeFlagsBC1ForbidThreeColorBlocks = 8,
+
+		// The output buffer contains alpha endpoint/selector indices. 
+		// Used internally when decoding formats like ASTC that require both color and alpha data to be available when transcoding to the output format.
+		cDecodeFlagsOutputHasAlphaIndices = 16,
+
+		cDecodeFlagsHighQuality = 32
+	};
+
+	class basisu_lowlevel_uastc_transcoder
+	{
+		friend class basisu_transcoder;
+
+	public:
+		basisu_lowlevel_uastc_transcoder();
+
+		bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
+			uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels = 0,
+			basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, uint32_t decode_flags = 0);
+
+		bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
+			uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const basis_file_header& header, const basis_slice_desc& slice_desc, uint32_t output_row_pitch_in_blocks_or_pixels = 0,
+			basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, uint32_t decode_flags = 0)
+		{
+			return transcode_slice(pDst_blocks, num_blocks_x, num_blocks_y, pImage_data, image_data_size, fmt,
+				output_block_or_pixel_stride_in_bytes, bc1_allow_threecolor_blocks, (header.m_flags & cBASISHeaderFlagHasAlphaSlices) != 0, slice_desc.m_orig_width, slice_desc.m_orig_height, output_row_pitch_in_blocks_or_pixels,
+				pState, output_rows_in_pixels, channel0, channel1, decode_flags);
+		}
+
+		// Container independent transcoding
+		bool transcode_image(
+			transcoder_texture_format target_format,
+			void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
+			const uint8_t* pCompressed_data, uint32_t compressed_data_length,
+			uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index,
+			uint32_t slice_offset, uint32_t slice_length,
+			uint32_t decode_flags = 0,
+			bool has_alpha = false,
+			bool is_video = false,
+			uint32_t output_row_pitch_in_blocks_or_pixels = 0,
+			basisu_transcoder_state* pState = nullptr,
+			uint32_t output_rows_in_pixels = 0,
+			int channel0 = -1, int channel1 = -1);
+	};
+
+	struct basisu_slice_info
+	{
+		uint32_t m_orig_width;
+		uint32_t m_orig_height;
+
+		uint32_t m_width;
+		uint32_t m_height;
+
+		uint32_t m_num_blocks_x;
+		uint32_t m_num_blocks_y;
+		uint32_t m_total_blocks;
+
+		uint32_t m_compressed_size;
+
+		uint32_t m_slice_index;	// the slice index in the .basis file
+		uint32_t m_image_index;	// the source image index originally provided to the encoder
+		uint32_t m_level_index;	// the mipmap level within this image
+
+		uint32_t m_unpacked_slice_crc16;
+
+		bool m_alpha_flag;		// true if the slice has alpha data
+		bool m_iframe_flag;		// true if the slice is an I-Frame
+	};
+
+	typedef basisu::vector<basisu_slice_info> basisu_slice_info_vec;
+
+	struct basisu_image_info
+	{
+		uint32_t m_image_index;
+		uint32_t m_total_levels;
+
+		uint32_t m_orig_width;
+		uint32_t m_orig_height;
+
+		uint32_t m_width;
+		uint32_t m_height;
+
+		uint32_t m_num_blocks_x;
+		uint32_t m_num_blocks_y;
+		uint32_t m_total_blocks;
+
+		uint32_t m_first_slice_index;
+
+		bool m_alpha_flag;		// true if the image has alpha data
+		bool m_iframe_flag;		// true if the image is an I-Frame
+	};
+
+	struct basisu_image_level_info
+	{
+		uint32_t m_image_index;
+		uint32_t m_level_index;
+
+		uint32_t m_orig_width;
+		uint32_t m_orig_height;
+
+		uint32_t m_width;
+		uint32_t m_height;
+
+		uint32_t m_num_blocks_x;
+		uint32_t m_num_blocks_y;
+		uint32_t m_total_blocks;
+
+		uint32_t m_first_slice_index;
+
+		uint32_t m_rgb_file_ofs;
+		uint32_t m_rgb_file_len;
+		uint32_t m_alpha_file_ofs;
+		uint32_t m_alpha_file_len;
+
+		bool m_alpha_flag;		// true if the image has alpha data
+		bool m_iframe_flag;		// true if the image is an I-Frame
+	};
+
+	struct basisu_file_info
+	{
+		uint32_t m_version;
+		uint32_t m_total_header_size;
+
+		uint32_t m_total_selectors;
+		// will be 0 for UASTC or if the file uses global codebooks
+		uint32_t m_selector_codebook_ofs;
+		uint32_t m_selector_codebook_size;
+
+		uint32_t m_total_endpoints;
+		// will be 0 for UASTC or if the file uses global codebooks
+		uint32_t m_endpoint_codebook_ofs;
+		uint32_t m_endpoint_codebook_size;
+
+		uint32_t m_tables_ofs;
+		uint32_t m_tables_size;
+
+		uint32_t m_slices_size;
+
+		basis_texture_type m_tex_type;
+		uint32_t m_us_per_frame;
+
+		// Low-level slice information (1 slice per image for color-only basis files, 2 for alpha basis files)
+		basisu_slice_info_vec m_slice_info;
+
+		uint32_t m_total_images;	 // total # of images
+		basisu::vector<uint32_t> m_image_mipmap_levels; // the # of mipmap levels for each image
+
+		uint32_t m_userdata0;
+		uint32_t m_userdata1;
+
+		basis_tex_format m_tex_format; // ETC1S, UASTC, etc.
+
+		bool m_y_flipped;				// true if the image was Y flipped
+		bool m_etc1s;					// true if the file is ETC1S
+		bool m_has_alpha_slices;	// true if the texture has alpha slices (for ETC1S: even slices RGB, odd slices alpha)
+	};
+
+	// High-level transcoder class which accepts .basis file data and allows the caller to query information about the file and transcode image levels to various texture formats.
+	// If you're just starting out this is the class you care about.
+	class basisu_transcoder
+	{
+		basisu_transcoder(basisu_transcoder&);
+		basisu_transcoder& operator= (const basisu_transcoder&);
+
+	public:
+		basisu_transcoder(const etc1_global_selector_codebook* pGlobal_sel_codebook);
+
+		// Validates the .basis file. This computes a crc16 over the entire file, so it's slow.
+		bool validate_file_checksums(const void* pData, uint32_t data_size, bool full_validation) const;
+
+		// Quick header validation - no crc16 checks.
+		bool validate_header(const void* pData, uint32_t data_size) const;
+
+		basis_texture_type get_texture_type(const void* pData, uint32_t data_size) const;
+		bool get_userdata(const void* pData, uint32_t data_size, uint32_t& userdata0, uint32_t& userdata1) const;
+
+		// Returns the total number of images in the basis file (always 1 or more).
+		// Note that the number of mipmap levels for each image may differ, and that images may have different resolutions.
+		uint32_t get_total_images(const void* pData, uint32_t data_size) const;
+
+		basis_tex_format get_tex_format(const void* pData, uint32_t data_size) const;
+
+		// Returns the number of mipmap levels in an image.
+		uint32_t get_total_image_levels(const void* pData, uint32_t data_size, uint32_t image_index) const;
+
+		// Returns basic information about an image. Note that orig_width/orig_height may not be a multiple of 4.
+		bool get_image_level_desc(const void* pData, uint32_t data_size, uint32_t image_index, uint32_t level_index, uint32_t& orig_width, uint32_t& orig_height, uint32_t& total_blocks) const;
+
+		// Returns information about the specified image.
+		bool get_image_info(const void* pData, uint32_t data_size, basisu_image_info& image_info, uint32_t image_index) const;
+
+		// Returns information about the specified image's mipmap level.
+		bool get_image_level_info(const void* pData, uint32_t data_size, basisu_image_level_info& level_info, uint32_t image_index, uint32_t level_index) const;
+
+		// Get a description of the basis file and low-level information about each slice.
+		bool get_file_info(const void* pData, uint32_t data_size, basisu_file_info& file_info) const;
+
+		// start_transcoding() must be called before calling transcode_slice() or transcode_image_level().
+		// For ETC1S files, this call decompresses the selector/endpoint codebooks, so ideally you would only call this once per .basis file (not each image/mipmap level).
+		bool start_transcoding(const void* pData, uint32_t data_size);
+
+		bool stop_transcoding();
+
+		// Returns true if start_transcoding() has been called.
+		bool get_ready_to_transcode() const { return m_ready_to_transcode; }
+
+		// transcode_image_level() decodes a single mipmap level from the .basis file to any of the supported output texture formats.
+		// It'll first find the slice(s) to transcode, then call transcode_slice() one or two times to decode both the color and alpha texture data (or RG texture data from two slices for BC5).
+		// If the .basis file doesn't have alpha slices, the output alpha blocks will be set to fully opaque (all 255's).
+		// Currently, to decode to PVRTC1 the basis texture's dimensions in pixels must be a power of 2, due to PVRTC1 format requirements. 
+		// output_blocks_buf_size_in_blocks_or_pixels should be at least the image level's total_blocks (num_blocks_x * num_blocks_y), or the total number of output pixels if fmt==cTFRGBA32.
+		// output_row_pitch_in_blocks_or_pixels: Number of blocks or pixels per row. If 0, the transcoder uses the slice's num_blocks_x or orig_width (NOT num_blocks_x * 4). Ignored for PVRTC1 (due to texture swizzling).
+		// output_rows_in_pixels: Ignored unless fmt is uncompressed (cRGBA32, etc.). The total number of output rows in the output buffer. If 0, the transcoder assumes the slice's orig_height (NOT num_blocks_y * 4).
+		// Notes: 
+		// - basisu_transcoder_init() must have been called first to initialize the transcoder lookup tables before calling this function.
+		// - This method assumes the output texture buffer is readable. In some cases to handle alpha, the transcoder will write temporary data to the output texture in
+		// a first pass, which will be read in a second pass.
+		bool transcode_image_level(
+			const void* pData, uint32_t data_size,
+			uint32_t image_index, uint32_t level_index,
+			void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
+			transcoder_texture_format fmt,
+			uint32_t decode_flags = 0, uint32_t output_row_pitch_in_blocks_or_pixels = 0, basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0) const;
+
+		// Finds the basis slice corresponding to the specified image/level/alpha params, or -1 if the slice can't be found.
+		int find_slice(const void* pData, uint32_t data_size, uint32_t image_index, uint32_t level_index, bool alpha_data) const;
+
+		// transcode_slice() decodes a single slice from the .basis file. It's a low-level API - most likely you want to use transcode_image_level().
+		// This is a low-level API, and will be needed to be called multiple times to decode some texture formats (like BC3, BC5, or ETC2).
+		// output_blocks_buf_size_in_blocks_or_pixels is just used for verification to make sure the output buffer is large enough.
+		// output_blocks_buf_size_in_blocks_or_pixels should be at least the image level's total_blocks (num_blocks_x * num_blocks_y), or the total number of output pixels if fmt==cTFRGBA32.
+		// output_block_stride_in_bytes: Number of bytes between each output block.
+		// output_row_pitch_in_blocks_or_pixels: Number of blocks or pixels per row. If 0, the transcoder uses the slice's num_blocks_x or orig_width (NOT num_blocks_x * 4). Ignored for PVRTC1 (due to texture swizzling).
+		// output_rows_in_pixels: Ignored unless fmt is cRGBA32. The total number of output rows in the output buffer. If 0, the transcoder assumes the slice's orig_height (NOT num_blocks_y * 4).
+		// Notes:
+		// - basisu_transcoder_init() must have been called first to initialize the transcoder lookup tables before calling this function.
+		bool transcode_slice(const void* pData, uint32_t data_size, uint32_t slice_index,
+			void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
+			block_format fmt, uint32_t output_block_stride_in_bytes, uint32_t decode_flags = 0, uint32_t output_row_pitch_in_blocks_or_pixels = 0, basisu_transcoder_state* pState = nullptr, void* pAlpha_blocks = nullptr,
+			uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1) const;
+
+		static void write_opaque_alpha_blocks(
+			uint32_t num_blocks_x, uint32_t num_blocks_y,
+			void* pOutput_blocks, block_format fmt,
+			uint32_t block_stride_in_bytes, uint32_t output_row_pitch_in_blocks_or_pixels);
+
+		void set_global_codebooks(const basisu_lowlevel_etc1s_transcoder* pGlobal_codebook) { m_lowlevel_etc1s_decoder.set_global_codebooks(pGlobal_codebook); }
+		const basisu_lowlevel_etc1s_transcoder* get_global_codebooks() const { return m_lowlevel_etc1s_decoder.get_global_codebooks(); }
+
+		const basisu_lowlevel_etc1s_transcoder& get_lowlevel_etc1s_decoder() const { return m_lowlevel_etc1s_decoder; }
+		basisu_lowlevel_etc1s_transcoder& get_lowlevel_etc1s_decoder() { return m_lowlevel_etc1s_decoder; }
+
+		const basisu_lowlevel_uastc_transcoder& get_lowlevel_uastc_decoder() const { return m_lowlevel_uastc_decoder; }
+		basisu_lowlevel_uastc_transcoder& get_lowlevel_uastc_decoder() { return m_lowlevel_uastc_decoder; }
+
+	private:
+		mutable basisu_lowlevel_etc1s_transcoder m_lowlevel_etc1s_decoder;
+		mutable basisu_lowlevel_uastc_transcoder m_lowlevel_uastc_decoder;
+
+		bool m_ready_to_transcode;
+
+		int find_first_slice_index(const void* pData, uint32_t data_size, uint32_t image_index, uint32_t level_index) const;
+
+		bool validate_header_quick(const void* pData, uint32_t data_size) const;
+	};
+
+	// basisu_transcoder_init() MUST be called before a .basis file can be transcoded.
+	void basisu_transcoder_init();
+		
+	enum debug_flags_t
+	{
+		cDebugFlagVisCRs = 1,
+		cDebugFlagVisBC1Sels = 2,
+		cDebugFlagVisBC1Endpoints = 4
+	};
+	uint32_t get_debug_flags();
+	void set_debug_flags(uint32_t f);
+
+	// ------------------------------------------------------------------------------------------------------ 
+	// Optional .KTX2 file format support
+	// KTX2 reading optionally requires miniz or Zstd decompressors for supercompressed UASTC files.
+	// ------------------------------------------------------------------------------------------------------ 
+#if BASISD_SUPPORT_KTX2
+#pragma pack(push)
+#pragma pack(1)
+	struct ktx2_header
+	{
+		uint8_t m_identifier[12];
+		basisu::packed_uint<4> m_vk_format;
+		basisu::packed_uint<4> m_type_size;
+		basisu::packed_uint<4> m_pixel_width;
+		basisu::packed_uint<4> m_pixel_height;
+		basisu::packed_uint<4> m_pixel_depth;
+		basisu::packed_uint<4> m_layer_count;
+		basisu::packed_uint<4> m_face_count;
+		basisu::packed_uint<4> m_level_count;
+		basisu::packed_uint<4> m_supercompression_scheme;
+		basisu::packed_uint<4> m_dfd_byte_offset;
+		basisu::packed_uint<4> m_dfd_byte_length;
+		basisu::packed_uint<4> m_kvd_byte_offset;
+		basisu::packed_uint<4> m_kvd_byte_length;
+		basisu::packed_uint<8> m_sgd_byte_offset;
+		basisu::packed_uint<8> m_sgd_byte_length;
+	};
+
+	struct ktx2_level_index
+	{
+		basisu::packed_uint<8> m_byte_offset;
+		basisu::packed_uint<8> m_byte_length;
+		basisu::packed_uint<8> m_uncompressed_byte_length;
+	};
+
+	struct ktx2_etc1s_global_data_header
+	{
+		basisu::packed_uint<2> m_endpoint_count;
+		basisu::packed_uint<2> m_selector_count;
+		basisu::packed_uint<4> m_endpoints_byte_length;
+		basisu::packed_uint<4> m_selectors_byte_length;
+		basisu::packed_uint<4> m_tables_byte_length;
+		basisu::packed_uint<4> m_extended_byte_length;
+	};
+
+	struct ktx2_etc1s_image_desc
+	{
+		basisu::packed_uint<4> m_image_flags;
+		basisu::packed_uint<4> m_rgb_slice_byte_offset;
+		basisu::packed_uint<4> m_rgb_slice_byte_length;
+		basisu::packed_uint<4> m_alpha_slice_byte_offset;
+		basisu::packed_uint<4> m_alpha_slice_byte_length;
+	};
+
+	struct ktx2_animdata
+	{
+		basisu::packed_uint<4> m_duration;
+		basisu::packed_uint<4> m_timescale;
+		basisu::packed_uint<4> m_loopcount;
+	};
+#pragma pack(pop)
+
+	const uint32_t KTX2_VK_FORMAT_UNDEFINED = 0;
+	const uint32_t KTX2_KDF_DF_MODEL_UASTC = 166;
+	const uint32_t KTX2_KDF_DF_MODEL_ETC1S = 163;
+	const uint32_t KTX2_IMAGE_IS_P_FRAME = 2;
+	const uint32_t KTX2_UASTC_BLOCK_SIZE = 16;
+	const uint32_t KTX2_MAX_SUPPORTED_LEVEL_COUNT = 16; // this is an implementation specific constraint and can be increased
+
+	// The KTX2 transfer functions supported by KTX2
+	const uint32_t KTX2_KHR_DF_TRANSFER_LINEAR = 1;
+	const uint32_t KTX2_KHR_DF_TRANSFER_SRGB = 2;
+
+	enum ktx2_supercompression
+	{
+		KTX2_SS_NONE = 0,
+		KTX2_SS_BASISLZ = 1,
+		KTX2_SS_ZSTANDARD = 2
+	};
+
+	extern const uint8_t g_ktx2_file_identifier[12];
+
+	enum ktx2_df_channel_id
+	{
+		KTX2_DF_CHANNEL_ETC1S_RGB = 0U,
+		KTX2_DF_CHANNEL_ETC1S_RRR = 3U,
+		KTX2_DF_CHANNEL_ETC1S_GGG = 4U,
+		KTX2_DF_CHANNEL_ETC1S_AAA = 15U,
+
+		KTX2_DF_CHANNEL_UASTC_DATA = 0U,
+		KTX2_DF_CHANNEL_UASTC_RGB = 0U,
+		KTX2_DF_CHANNEL_UASTC_RGBA = 3U,
+		KTX2_DF_CHANNEL_UASTC_RRR = 4U,
+		KTX2_DF_CHANNEL_UASTC_RRRG = 5U,
+		KTX2_DF_CHANNEL_UASTC_RG = 6U,
+	};
+
+	inline const char* ktx2_get_etc1s_df_channel_id_str(ktx2_df_channel_id id)
+	{
+		switch (id)
+		{
+		case KTX2_DF_CHANNEL_ETC1S_RGB: return "RGB";
+		case KTX2_DF_CHANNEL_ETC1S_RRR: return "RRR";
+		case KTX2_DF_CHANNEL_ETC1S_GGG: return "GGG";
+		case KTX2_DF_CHANNEL_ETC1S_AAA: return "AAA";
+		default: break;
+		}
+		return "?";
+	}
+
+	inline const char* ktx2_get_uastc_df_channel_id_str(ktx2_df_channel_id id)
+	{
+		switch (id)
+		{
+		case KTX2_DF_CHANNEL_UASTC_RGB: return "RGB";
+		case KTX2_DF_CHANNEL_UASTC_RGBA: return "RGBA";
+		case KTX2_DF_CHANNEL_UASTC_RRR: return "RRR";
+		case KTX2_DF_CHANNEL_UASTC_RRRG: return "RRRG";
+		case KTX2_DF_CHANNEL_UASTC_RG: return "RG";
+		default: break;
+		}
+		return "?";
+	}
+
+	enum ktx2_df_color_primaries
+	{
+		KTX2_DF_PRIMARIES_UNSPECIFIED = 0,
+		KTX2_DF_PRIMARIES_BT709 = 1,
+		KTX2_DF_PRIMARIES_SRGB = 1,
+		KTX2_DF_PRIMARIES_BT601_EBU = 2,
+		KTX2_DF_PRIMARIES_BT601_SMPTE = 3,
+		KTX2_DF_PRIMARIES_BT2020 = 4,
+		KTX2_DF_PRIMARIES_CIEXYZ = 5,
+		KTX2_DF_PRIMARIES_ACES = 6,
+		KTX2_DF_PRIMARIES_ACESCC = 7,
+		KTX2_DF_PRIMARIES_NTSC1953 = 8,
+		KTX2_DF_PRIMARIES_PAL525 = 9,
+		KTX2_DF_PRIMARIES_DISPLAYP3 = 10,
+		KTX2_DF_PRIMARIES_ADOBERGB = 11
+	};
+
+	inline const char* ktx2_get_df_color_primaries_str(ktx2_df_color_primaries p)
+	{
+		switch (p)
+		{
+		case KTX2_DF_PRIMARIES_UNSPECIFIED: return "UNSPECIFIED";
+		case KTX2_DF_PRIMARIES_BT709: return "BT709";
+		case KTX2_DF_PRIMARIES_BT601_EBU: return "EBU"; 
+		case KTX2_DF_PRIMARIES_BT601_SMPTE: return "SMPTE";
+		case KTX2_DF_PRIMARIES_BT2020: return "BT2020";
+		case KTX2_DF_PRIMARIES_CIEXYZ: return "CIEXYZ";
+		case KTX2_DF_PRIMARIES_ACES: return "ACES";
+		case KTX2_DF_PRIMARIES_ACESCC: return "ACESCC"; 
+		case KTX2_DF_PRIMARIES_NTSC1953: return "NTSC1953";
+		case KTX2_DF_PRIMARIES_PAL525: return "PAL525";
+		case KTX2_DF_PRIMARIES_DISPLAYP3: return "DISPLAYP3";
+		case KTX2_DF_PRIMARIES_ADOBERGB: return "ADOBERGB";
+		default: break;
+		}
+		return "?";
+	}	
+
+	// Information about a single 2D texture "image" in a KTX2 file.
+	struct ktx2_image_level_info
+	{
+		// The mipmap level index (0=largest), texture array layer index, and cubemap face index of the image.
+		uint32_t m_level_index;
+		uint32_t m_layer_index;
+		uint32_t m_face_index;
+
+		// The image's actual (or the original source image's) width/height in pixels, which may not be divisible by 4 pixels.
+		uint32_t m_orig_width;
+		uint32_t m_orig_height;
+
+		// The image's physical width/height, which will always be divisible by 4 pixels.
+		uint32_t m_width;
+		uint32_t m_height;
+
+		// The texture's dimensions in 4x4 texel blocks.
+		uint32_t m_num_blocks_x;
+		uint32_t m_num_blocks_y;
+
+		// The total number of blocks
+		uint32_t m_total_blocks;
+
+		// true if the image has alpha data
+		bool m_alpha_flag;
+
+		// true if the image is an I-Frame. Currently, for ETC1S textures, the first frame will always be an I-Frame, and subsequent frames will always be P-Frames.
+		bool m_iframe_flag;
+	};
+		
+	// Thread-specific ETC1S/supercompressed UASTC transcoder state. (If you're not doing multithreading transcoding you can ignore this.)
+	struct ktx2_transcoder_state
+	{
+		basist::basisu_transcoder_state m_transcoder_state;
+		basisu::uint8_vec m_level_uncomp_data;
+		int m_uncomp_data_level_index;
+
+		void clear()
+		{
+			m_transcoder_state.clear();
+			m_level_uncomp_data.clear();
+			m_uncomp_data_level_index = -1;
+		}
+	};
+
+	// This class is quite similar to basisu_transcoder. It treats KTX2 files as a simple container for ETC1S/UASTC texture data.
+	// It does not support 1D or 3D textures.
+	// It only supports 2D and cubemap textures, with or without mipmaps, texture arrays of 2D/cubemap textures, and texture video files. 
+	// It only supports raw non-supercompressed UASTC, ETC1S, UASTC+Zstd, or UASTC+zlib compressed files.
+	// DFD (Data Format Descriptor) parsing is purposely as simple as possible. 
+	// If you need to know how to interpret the texture channels you'll need to parse the DFD yourself after calling get_dfd().
+	class ktx2_transcoder
+	{
+	public:
+		ktx2_transcoder(basist::etc1_global_selector_codebook* pGlobal_sel_codebook);
+
+		// Frees all allocations, resets object.
+		void clear();
+
+		// init() parses the KTX2 header, level index array, DFD, and key values, but nothing else.
+		// Importantly, it does not parse or decompress the ETC1S global supercompressed data, so some things (like which frames are I/P-Frames) won't be available until start_transcoding() is called.
+		// This method holds a pointer to the file data until clear() is called.
+		bool init(const void* pData, uint32_t data_size);
+
+		// Returns the data/size passed to init().
+		const uint8_t* get_data() const { return m_pData; }
+		uint32_t get_data_size() const { return m_data_size; }
+
+		// Returns the KTX2 header. Valid after init().
+		const ktx2_header& get_header() const { return m_header; }
+
+		// Returns the KTX2 level index array. There will be one entry for each mipmap level. Valid after init().
+		const basisu::vector<ktx2_level_index>& get_level_index() const { return m_levels; }
+
+		// Returns the texture's width in texels. Always non-zero, might not be divisible by 4. Valid after init().
+		uint32_t get_width() const { return m_header.m_pixel_width; }
+
+		// Returns the texture's height in texels. Always non-zero, might not be divisible by 4. Valid after init().
+		uint32_t get_height() const { return m_header.m_pixel_height; }
+
+		// Returns the texture's number of mipmap levels. Always returns 1 or higher. Valid after init().
+		uint32_t get_levels() const { return m_header.m_level_count; }
+
+		// Returns the number of faces. Returns 1 for 2D textures and or 6 for cubemaps. Valid after init().
+		uint32_t get_faces() const { return m_header.m_face_count; }
+
+		// Returns 0 or the number of layers in the texture array or texture video. Valid after init().
+		uint32_t get_layers() const { return m_header.m_layer_count; }
+
+		// Returns cETC1S or cUASTC4x4. Valid after init().
+		basist::basis_tex_format get_format() const { return m_format; } 
+
+		bool is_etc1s() const { return get_format() == basist::basis_tex_format::cETC1S; }
+
+		bool is_uastc() const { return get_format() == basist::basis_tex_format::cUASTC4x4; }
+
+		// Returns true if the ETC1S file has two planes (typically RGBA, or RRRG), or true if the UASTC file has alpha data. Valid after init().
+		uint32_t get_has_alpha() const { return m_has_alpha; }
+
+		// Returns the entire Data Format Descriptor (DFD) from the KTX2 file. Valid after init().
+		// See https://www.khronos.org/registry/DataFormat/specs/1.3/dataformat.1.3.html#_the_khronos_data_format_descriptor_overview
+		const basisu::uint8_vec& get_dfd() const { return m_dfd; }
+
+		// Some basic DFD accessors. Valid after init().
+		uint32_t get_dfd_color_model() const { return m_dfd_color_model; }
+
+		// Returns the DFD color primary.
+		// We do not validate the color primaries, so the returned value may not be in the ktx2_df_color_primaries enum.
+		ktx2_df_color_primaries get_dfd_color_primaries() const { return m_dfd_color_prims; }
+		
+		// Returns KTX2_KHR_DF_TRANSFER_LINEAR or KTX2_KHR_DF_TRANSFER_SRGB.
+		uint32_t get_dfd_transfer_func() const { return m_dfd_transfer_func; }
+
+		uint32_t get_dfd_flags() const { return m_dfd_flags; }
+
+		// Returns 1 (ETC1S/UASTC) or 2 (ETC1S with an internal alpha channel).
+		uint32_t get_dfd_total_samples() const { return m_dfd_samples;	}
+		
+		// Returns the channel mapping for each DFD "sample". UASTC always has 1 sample, ETC1S can have one or two. 
+		// Note the returned value SHOULD be one of the ktx2_df_channel_id enums, but we don't validate that. 
+		// It's up to the caller to decide what to do if the value isn't in the enum.
+		ktx2_df_channel_id get_dfd_channel_id0() const { return m_dfd_chan0; }
+		ktx2_df_channel_id get_dfd_channel_id1() const { return m_dfd_chan1; }
+
+		// Key value field data.
+		struct key_value
+		{
+			// The key field is UTF8 and always zero terminated.
+			basisu::uint8_vec m_key;
+
+			// The value may be empty. It consists of raw bytes which may or may not be zero terminated.
+			basisu::uint8_vec m_value;
+
+			bool operator< (const key_value& rhs) const { return strcmp((const char*)m_key.data(), (const char *)rhs.m_key.data()) < 0; }
+		};
+		typedef basisu::vector<key_value> key_value_vec;
+
+		// Returns the array of key-value entries. This may be empty. Valid after init().
+		// The order of key values fields in this array exactly matches the order they were stored in the file. The keys are supposed to be sorted by their Unicode code points.
+		const key_value_vec& get_key_values() const { return m_key_values; }
+
+		const basisu::uint8_vec *find_key(const std::string& key_name) const;
+
+		// Low-level ETC1S specific accessors
+
+		// Returns the ETC1S global supercompression data header, which is only valid after start_transcoding() is called.
+		const ktx2_etc1s_global_data_header& get_etc1s_header() const { return m_etc1s_header; }
+
+		// Returns the array of ETC1S image descriptors, which is only valid after get_etc1s_image_descs() is called.
+		const basisu::vector<ktx2_etc1s_image_desc>& get_etc1s_image_descs() const { return m_etc1s_image_descs; }
+
+		// Must have called startTranscoding() first
+		uint32_t get_etc1s_image_descs_image_flags(uint32_t level_index, uint32_t layer_index, uint32_t face_index) const;
+
+		// is_video() is only valid after start_transcoding() is called.
+		// For ETC1S data, if this returns true you must currently transcode the file from first to last frame, in order, without skipping any frames.
+		bool is_video() const { return m_is_video; }
+				
+		// start_transcoding() MUST be called before calling transcode_image().
+		// This method decompresses the ETC1S global endpoint/selector codebooks, which is not free, so try to avoid calling it excessively.
+		bool start_transcoding();
+								
+		// get_image_level_info() be called after init(), but the m_iframe_flag's won't be valid until start_transcoding() is called.
+		// You can call this method before calling transcode_image_level() to retrieve basic information about the mipmap level's dimensions, etc.
+		bool get_image_level_info(ktx2_image_level_info& level_info, uint32_t level_index, uint32_t layer_index, uint32_t face_index) const;
+
+		// transcode_image_level() transcodes a single 2D texture or cubemap face from the KTX2 file.
+		// Internally it uses the same low-level transcode API's as basisu_transcoder::transcode_image_level().
+		// If the file is UASTC and is supercompressed with Zstandard, and the file is a texture array or cubemap, it's highly recommended that each mipmap level is 
+		// completely transcoded before switching to another level. Every time the mipmap level is changed all supercompressed level data must be decompressed using Zstandard as a single unit.
+		// Currently ETC1S videos must always be transcoded from first to last frame (or KTX2 "layer"), in order, with no skipping of frames.
+		// By default this method is not thread safe unless you specify a pointer to a user allocated thread-specific transcoder_state struct.
+		bool transcode_image_level(
+			uint32_t level_index, uint32_t layer_index, uint32_t face_index,
+			void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
+			basist::transcoder_texture_format fmt,
+			uint32_t decode_flags = 0, uint32_t output_row_pitch_in_blocks_or_pixels = 0, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1,
+			ktx2_transcoder_state *pState = nullptr);
+				
+	private:
+		const uint8_t* m_pData;
+		uint32_t m_data_size;
+
+		ktx2_header m_header;
+		basisu::vector<ktx2_level_index> m_levels;
+		basisu::uint8_vec m_dfd;
+		key_value_vec m_key_values;
+		
+		ktx2_etc1s_global_data_header m_etc1s_header;
+		basisu::vector<ktx2_etc1s_image_desc> m_etc1s_image_descs;
+
+		basist::basis_tex_format m_format;
+					
+		uint32_t m_dfd_color_model;
+		ktx2_df_color_primaries m_dfd_color_prims;
+		uint32_t m_dfd_transfer_func;
+		uint32_t m_dfd_flags;
+		uint32_t m_dfd_samples;
+		ktx2_df_channel_id m_dfd_chan0, m_dfd_chan1;
+								
+		basist::basisu_lowlevel_etc1s_transcoder m_etc1s_transcoder;
+		basist::basisu_lowlevel_uastc_transcoder m_uastc_transcoder;
+				
+		ktx2_transcoder_state m_def_transcoder_state;
+
+		bool m_has_alpha;
+		bool m_is_video;
+
+		bool decompress_level_data(uint32_t level_index, basisu::uint8_vec& uncomp_data);
+		bool decompress_etc1s_global_data();
+		bool read_key_values();
+	};
+
+#endif // BASISD_SUPPORT_KTX2
+
+	// Returns true if the transcoder was compiled with KTX2 support.
+	bool basisu_transcoder_supports_ktx2();
+
+	// Returns true if the transcoder was compiled with Zstandard support.
+	bool basisu_transcoder_supports_ktx2_zstd();
+
+} // namespace basisu
+
diff --git a/libkram/transcoder/basisu_transcoder_internal.h b/libkram/transcoder/basisu_transcoder_internal.h
new file mode 100644
index 00000000..2422d788
--- /dev/null
+++ b/libkram/transcoder/basisu_transcoder_internal.h
@@ -0,0 +1,794 @@
+// basisu_transcoder_internal.h - Universal texture format transcoder library.
+// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+//
+// Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#ifdef _MSC_VER
+#pragma warning (disable: 4127) //  conditional expression is constant
+#endif
+
+#define BASISD_LIB_VERSION 115
+#define BASISD_VERSION_STRING "01.15"
+
+#ifdef _DEBUG
+#define BASISD_BUILD_DEBUG
+#else
+#define BASISD_BUILD_RELEASE
+#endif
+
+#include "basisu.h"
+
+#define BASISD_znew (z = 36969 * (z & 65535) + (z >> 16))
+
+namespace basisu
+{
+	extern bool g_debug_printf;
+}
+
+namespace basist
+{
+	// Low-level formats directly supported by the transcoder (other supported texture formats are combinations of these low-level block formats).
+	// You probably don't care about these enum's unless you are going pretty low-level and calling the transcoder to decode individual slices.
+	enum class block_format
+	{
+		cETC1,								// ETC1S RGB 
+		cETC2_RGBA,							// full ETC2 EAC RGBA8 block
+		cBC1,									// DXT1 RGB 
+		cBC3,									// BC4 block followed by a four color BC1 block
+		cBC4,									// DXT5A (alpha block only)
+		cBC5,									// two BC4 blocks
+		cPVRTC1_4_RGB,						// opaque-only PVRTC1 4bpp
+		cPVRTC1_4_RGBA,					// PVRTC1 4bpp RGBA
+		cBC7,									// Full BC7 block, any mode
+		cBC7_M5_COLOR,						// RGB BC7 mode 5 color (writes an opaque mode 5 block)
+		cBC7_M5_ALPHA,						// alpha portion of BC7 mode 5 (cBC7_M5_COLOR output data must have been written to the output buffer first to set the mode/rot fields etc.)
+		cETC2_EAC_A8,						// alpha block of ETC2 EAC (first 8 bytes of the 16-bit ETC2 EAC RGBA format)
+		cASTC_4x4,							// ASTC 4x4 (either color-only or color+alpha). Note that the transcoder always currently assumes sRGB is not enabled when outputting ASTC 
+												// data. If you use a sRGB ASTC format you'll get ~1 LSB of additional error, because of the different way ASTC decoders scale 8-bit endpoints to 16-bits during unpacking.
+		
+		cATC_RGB,
+		cATC_RGBA_INTERPOLATED_ALPHA,
+		cFXT1_RGB,							// Opaque-only, has oddball 8x4 pixel block size
+
+		cPVRTC2_4_RGB,
+		cPVRTC2_4_RGBA,
+
+		cETC2_EAC_R11,
+		cETC2_EAC_RG11,
+												
+		cIndices,							// Used internally: Write 16-bit endpoint and selector indices directly to output (output block must be at least 32-bits)
+
+		cRGB32,								// Writes RGB components to 32bpp output pixels
+		cRGBA32,								// Writes RGB255 components to 32bpp output pixels
+		cA32,									// Writes alpha component to 32bpp output pixels
+				
+		cRGB565,
+		cBGR565,
+		
+		cRGBA4444_COLOR,
+		cRGBA4444_ALPHA,
+		cRGBA4444_COLOR_OPAQUE,
+		cRGBA4444,
+						
+		cTotalBlockFormats
+	};
+
+	const int COLOR5_PAL0_PREV_HI = 9, COLOR5_PAL0_DELTA_LO = -9, COLOR5_PAL0_DELTA_HI = 31;
+	const int COLOR5_PAL1_PREV_HI = 21, COLOR5_PAL1_DELTA_LO = -21, COLOR5_PAL1_DELTA_HI = 21;
+	const int COLOR5_PAL2_PREV_HI = 31, COLOR5_PAL2_DELTA_LO = -31, COLOR5_PAL2_DELTA_HI = 9;
+	const int COLOR5_PAL_MIN_DELTA_B_RUNLEN = 3, COLOR5_PAL_DELTA_5_RUNLEN_VLC_BITS = 3;
+
+	const uint32_t ENDPOINT_PRED_TOTAL_SYMBOLS = (4 * 4 * 4 * 4) + 1;
+	const uint32_t ENDPOINT_PRED_REPEAT_LAST_SYMBOL = ENDPOINT_PRED_TOTAL_SYMBOLS - 1;
+	const uint32_t ENDPOINT_PRED_MIN_REPEAT_COUNT = 3;
+	const uint32_t ENDPOINT_PRED_COUNT_VLC_BITS = 4;
+
+	const uint32_t NUM_ENDPOINT_PREDS = 3;// BASISU_ARRAY_SIZE(g_endpoint_preds);
+	const uint32_t CR_ENDPOINT_PRED_INDEX = NUM_ENDPOINT_PREDS - 1;
+	const uint32_t NO_ENDPOINT_PRED_INDEX = 3;//NUM_ENDPOINT_PREDS;
+	const uint32_t MAX_SELECTOR_HISTORY_BUF_SIZE = 64;
+	const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH = 3;
+	const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_BITS = 6;
+	const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL = (1 << SELECTOR_HISTORY_BUF_RLE_COUNT_BITS);
+		
+	uint16_t crc16(const void *r, size_t size, uint16_t crc);
+		
+	class huffman_decoding_table
+	{
+		friend class bitwise_decoder;
+
+	public:
+		huffman_decoding_table()
+		{
+		}
+
+		void clear()
+		{
+			basisu::clear_vector(m_code_sizes);
+			basisu::clear_vector(m_lookup);
+			basisu::clear_vector(m_tree);
+		}
+
+		bool init(uint32_t total_syms, const uint8_t *pCode_sizes, uint32_t fast_lookup_bits = basisu::cHuffmanFastLookupBits)
+		{
+			if (!total_syms)
+			{
+				clear();
+				return true;
+			}
+
+			m_code_sizes.resize(total_syms);
+			memcpy(&m_code_sizes[0], pCode_sizes, total_syms);
+
+			const uint32_t huffman_fast_lookup_size = 1 << fast_lookup_bits;
+
+			m_lookup.resize(0);
+			m_lookup.resize(huffman_fast_lookup_size);
+
+			m_tree.resize(0);
+			m_tree.resize(total_syms * 2);
+
+			uint32_t syms_using_codesize[basisu::cHuffmanMaxSupportedInternalCodeSize + 1];
+			basisu::clear_obj(syms_using_codesize);
+			for (uint32_t i = 0; i < total_syms; i++)
+			{
+				if (pCode_sizes[i] > basisu::cHuffmanMaxSupportedInternalCodeSize)
+					return false;
+				syms_using_codesize[pCode_sizes[i]]++;
+			}
+
+			uint32_t next_code[basisu::cHuffmanMaxSupportedInternalCodeSize + 1];
+			next_code[0] = next_code[1] = 0;
+
+			uint32_t used_syms = 0, total = 0;
+			for (uint32_t i = 1; i < basisu::cHuffmanMaxSupportedInternalCodeSize; i++)
+			{
+				used_syms += syms_using_codesize[i];
+				next_code[i + 1] = (total = ((total + syms_using_codesize[i]) << 1));
+			}
+
+			if (((1U << basisu::cHuffmanMaxSupportedInternalCodeSize) != total) && (used_syms > 1U))
+				return false;
+
+			for (int tree_next = -1, sym_index = 0; sym_index < (int)total_syms; ++sym_index)
+			{
+				uint32_t rev_code = 0, l, cur_code, code_size = pCode_sizes[sym_index];
+				if (!code_size)
+					continue;
+
+				cur_code = next_code[code_size]++;
+
+				for (l = code_size; l > 0; l--, cur_code >>= 1)
+					rev_code = (rev_code << 1) | (cur_code & 1);
+
+				if (code_size <= fast_lookup_bits)
+				{
+					uint32_t k = (code_size << 16) | sym_index;
+					while (rev_code < huffman_fast_lookup_size)
+					{
+						if (m_lookup[rev_code] != 0)
+						{
+							// Supplied codesizes can't create a valid prefix code.
+							return false;
+						}
+
+						m_lookup[rev_code] = k;
+						rev_code += (1 << code_size);
+					}
+					continue;
+				}
+
+				int tree_cur;
+				if (0 == (tree_cur = m_lookup[rev_code & (huffman_fast_lookup_size - 1)]))
+				{
+					const uint32_t idx = rev_code & (huffman_fast_lookup_size - 1);
+					if (m_lookup[idx] != 0)
+					{
+						// Supplied codesizes can't create a valid prefix code.
+						return false;
+					}
+
+					m_lookup[idx] = tree_next;
+					tree_cur = tree_next;
+					tree_next -= 2;
+				}
+
+				if (tree_cur >= 0)
+				{
+					// Supplied codesizes can't create a valid prefix code.
+					return false;
+				}
+
+				rev_code >>= (fast_lookup_bits - 1);
+
+				for (int j = code_size; j > ((int)fast_lookup_bits + 1); j--)
+				{
+					tree_cur -= ((rev_code >>= 1) & 1);
+
+					const int idx = -tree_cur - 1;
+					if (idx < 0)
+						return false;
+					else if (idx >= (int)m_tree.size())
+						m_tree.resize(idx + 1);
+										
+					if (!m_tree[idx])
+					{
+						m_tree[idx] = (int16_t)tree_next;
+						tree_cur = tree_next;
+						tree_next -= 2;
+					}
+					else
+					{
+						tree_cur = m_tree[idx];
+						if (tree_cur >= 0)
+						{
+							// Supplied codesizes can't create a valid prefix code.
+							return false;
+						}
+					}
+				}
+
+				tree_cur -= ((rev_code >>= 1) & 1);
+
+				const int idx = -tree_cur - 1;
+				if (idx < 0)
+					return false;
+				else if (idx >= (int)m_tree.size())
+					m_tree.resize(idx + 1);
+
+				if (m_tree[idx] != 0)
+				{
+					// Supplied codesizes can't create a valid prefix code.
+					return false;
+				}
+
+				m_tree[idx] = (int16_t)sym_index;
+			}
+
+			return true;
+		}
+
+		const basisu::uint8_vec &get_code_sizes() const { return m_code_sizes; }
+		const basisu::int_vec get_lookup() const { return m_lookup; }
+		const basisu::int16_vec get_tree() const { return m_tree; }
+
+		bool is_valid() const { return m_code_sizes.size() > 0; }
+
+	private:
+		basisu::uint8_vec m_code_sizes;
+		basisu::int_vec m_lookup;
+		basisu::int16_vec m_tree;
+	};
+
+	class bitwise_decoder
+	{
+	public:
+		bitwise_decoder() :
+			m_buf_size(0),
+			m_pBuf(nullptr),
+			m_pBuf_start(nullptr),
+			m_pBuf_end(nullptr),
+			m_bit_buf(0),
+			m_bit_buf_size(0)
+		{
+		}
+
+		void clear()
+		{
+			m_buf_size = 0;
+			m_pBuf = nullptr;
+			m_pBuf_start = nullptr;
+			m_pBuf_end = nullptr;
+			m_bit_buf = 0;
+			m_bit_buf_size = 0;
+		}
+
+		bool init(const uint8_t *pBuf, uint32_t buf_size)
+		{
+			if ((!pBuf) && (buf_size))
+				return false;
+
+			m_buf_size = buf_size;
+			m_pBuf = pBuf;
+			m_pBuf_start = pBuf;
+			m_pBuf_end = pBuf + buf_size;
+			m_bit_buf = 0;
+			m_bit_buf_size = 0;
+			return true;
+		}
+
+		void stop()
+		{
+		}
+
+		inline uint32_t peek_bits(uint32_t num_bits)
+		{
+			if (!num_bits)
+				return 0;
+
+			assert(num_bits <= 25);
+
+			while (m_bit_buf_size < num_bits)
+			{
+				uint32_t c = 0;
+				if (m_pBuf < m_pBuf_end)
+					c = *m_pBuf++;
+
+				m_bit_buf |= (c << m_bit_buf_size);
+				m_bit_buf_size += 8;
+				assert(m_bit_buf_size <= 32);
+			}
+
+			return m_bit_buf & ((1 << num_bits) - 1);
+		}
+
+		void remove_bits(uint32_t num_bits)
+		{
+			assert(m_bit_buf_size >= num_bits);
+
+			m_bit_buf >>= num_bits;
+			m_bit_buf_size -= num_bits;
+		}
+
+		uint32_t get_bits(uint32_t num_bits)
+		{
+			if (num_bits > 25)
+			{
+				assert(num_bits <= 32);
+
+				const uint32_t bits0 = peek_bits(25);
+				m_bit_buf >>= 25;
+				m_bit_buf_size -= 25;
+				num_bits -= 25;
+
+				const uint32_t bits = peek_bits(num_bits);
+				m_bit_buf >>= num_bits;
+				m_bit_buf_size -= num_bits;
+
+				return bits0 | (bits << 25);
+			}
+
+			const uint32_t bits = peek_bits(num_bits);
+
+			m_bit_buf >>= num_bits;
+			m_bit_buf_size -= num_bits;
+
+			return bits;
+		}
+
+		uint32_t decode_truncated_binary(uint32_t n)
+		{
+			assert(n >= 2);
+
+			const uint32_t k = basisu::floor_log2i(n);
+			const uint32_t u = (1 << (k + 1)) - n;
+
+			uint32_t result = get_bits(k);
+
+			if (result >= u)
+				result = ((result << 1) | get_bits(1)) - u;
+
+			return result;
+		}
+
+		uint32_t decode_rice(uint32_t m)
+		{
+			assert(m);
+
+			uint32_t q = 0;
+			for (;;)
+			{
+				uint32_t k = peek_bits(16);
+				
+				uint32_t l = 0;
+				while (k & 1)
+				{
+					l++;
+					k >>= 1;
+				}
+				
+				q += l;
+
+				remove_bits(l);
+
+				if (l < 16)
+					break;
+			}
+
+			return (q << m) + (get_bits(m + 1) >> 1);
+		}
+
+		inline uint32_t decode_vlc(uint32_t chunk_bits)
+		{
+			assert(chunk_bits);
+
+			const uint32_t chunk_size = 1 << chunk_bits;
+			const uint32_t chunk_mask = chunk_size - 1;
+					
+			uint32_t v = 0;
+			uint32_t ofs = 0;
+
+			for ( ; ; )
+			{
+				uint32_t s = get_bits(chunk_bits + 1);
+				v |= ((s & chunk_mask) << ofs);
+				ofs += chunk_bits;
+
+				if ((s & chunk_size) == 0)
+					break;
+				
+				if (ofs >= 32)
+				{
+					assert(0);
+					break;
+				}
+			}
+
+			return v;
+		}
+
+		inline uint32_t decode_huffman(const huffman_decoding_table &ct, int fast_lookup_bits = basisu::cHuffmanFastLookupBits)
+		{
+			assert(ct.m_code_sizes.size());
+
+			const uint32_t huffman_fast_lookup_size = 1 << fast_lookup_bits;
+						
+			while (m_bit_buf_size < 16)
+			{
+				uint32_t c = 0;
+				if (m_pBuf < m_pBuf_end)
+					c = *m_pBuf++;
+
+				m_bit_buf |= (c << m_bit_buf_size);
+				m_bit_buf_size += 8;
+				assert(m_bit_buf_size <= 32);
+			}
+						
+			int code_len;
+
+			int sym;
+			if ((sym = ct.m_lookup[m_bit_buf & (huffman_fast_lookup_size - 1)]) >= 0)
+			{
+				code_len = sym >> 16;
+				sym &= 0xFFFF;
+			}
+			else
+			{
+				code_len = fast_lookup_bits;
+				do
+				{
+					sym = ct.m_tree[~sym + ((m_bit_buf >> code_len++) & 1)]; // ~sym = -sym - 1
+				} while (sym < 0);
+			}
+
+			m_bit_buf >>= code_len;
+			m_bit_buf_size -= code_len;
+
+			return sym;
+		}
+
+		bool read_huffman_table(huffman_decoding_table &ct)
+		{
+			ct.clear();
+
+			const uint32_t total_used_syms = get_bits(basisu::cHuffmanMaxSymsLog2);
+
+			if (!total_used_syms)
+				return true;
+			if (total_used_syms > basisu::cHuffmanMaxSyms)
+				return false;
+
+			uint8_t code_length_code_sizes[basisu::cHuffmanTotalCodelengthCodes];
+			basisu::clear_obj(code_length_code_sizes);
+
+			const uint32_t num_codelength_codes = get_bits(5);
+			if ((num_codelength_codes < 1) || (num_codelength_codes > basisu::cHuffmanTotalCodelengthCodes))
+				return false;
+
+			for (uint32_t i = 0; i < num_codelength_codes; i++)
+				code_length_code_sizes[basisu::g_huffman_sorted_codelength_codes[i]] = static_cast<uint8_t>(get_bits(3));
+
+			huffman_decoding_table code_length_table;
+			if (!code_length_table.init(basisu::cHuffmanTotalCodelengthCodes, code_length_code_sizes))
+				return false;
+
+			if (!code_length_table.is_valid())
+				return false;
+
+			basisu::uint8_vec code_sizes(total_used_syms);
+
+			uint32_t cur = 0;
+			while (cur < total_used_syms)
+			{
+				int c = decode_huffman(code_length_table);
+
+				if (c <= 16)
+					code_sizes[cur++] = static_cast<uint8_t>(c);
+				else if (c == basisu::cHuffmanSmallZeroRunCode)
+					cur += get_bits(basisu::cHuffmanSmallZeroRunExtraBits) + basisu::cHuffmanSmallZeroRunSizeMin;
+				else if (c == basisu::cHuffmanBigZeroRunCode)
+					cur += get_bits(basisu::cHuffmanBigZeroRunExtraBits) + basisu::cHuffmanBigZeroRunSizeMin;
+				else
+				{
+					if (!cur)
+						return false;
+
+					uint32_t l;
+					if (c == basisu::cHuffmanSmallRepeatCode)
+						l = get_bits(basisu::cHuffmanSmallRepeatExtraBits) + basisu::cHuffmanSmallRepeatSizeMin;
+					else
+						l = get_bits(basisu::cHuffmanBigRepeatExtraBits) + basisu::cHuffmanBigRepeatSizeMin;
+
+					const uint8_t prev = code_sizes[cur - 1];
+					if (prev == 0)
+						return false;
+					do
+					{
+						if (cur >= total_used_syms)
+							return false;
+						code_sizes[cur++] = prev;
+					} while (--l > 0);
+				}
+			}
+
+			if (cur != total_used_syms)
+				return false;
+
+			return ct.init(total_used_syms, &code_sizes[0]);
+		}
+
+	private:
+		uint32_t m_buf_size;
+		const uint8_t *m_pBuf;
+		const uint8_t *m_pBuf_start;
+		const uint8_t *m_pBuf_end;
+
+		uint32_t m_bit_buf;
+		uint32_t m_bit_buf_size;
+	};
+
+	inline uint32_t basisd_rand(uint32_t seed)
+	{
+		if (!seed)
+			seed++;
+		uint32_t z = seed;
+		BASISD_znew;
+		return z;
+	}
+
+	// Returns random number in [0,limit). Max limit is 0xFFFF.
+	inline uint32_t basisd_urand(uint32_t& seed, uint32_t limit)
+	{
+		seed = basisd_rand(seed);
+		return (((seed ^ (seed >> 16)) & 0xFFFF) * limit) >> 16;
+	}
+
+	class approx_move_to_front
+	{
+	public:
+		approx_move_to_front(uint32_t n)
+		{
+			init(n);
+		}
+
+		void init(uint32_t n)
+		{
+			m_values.resize(n);
+			m_rover = n / 2;
+		}
+
+		const basisu::int_vec& get_values() const { return m_values; }
+		basisu::int_vec& get_values() { return m_values; }
+
+		uint32_t size() const { return (uint32_t)m_values.size(); }
+
+		const int& operator[] (uint32_t index) const { return m_values[index]; }
+		int operator[] (uint32_t index) { return m_values[index]; }
+
+		void add(int new_value)
+		{
+			m_values[m_rover++] = new_value;
+			if (m_rover == m_values.size())
+				m_rover = (uint32_t)m_values.size() / 2;
+		}
+
+		void use(uint32_t index)
+		{
+			if (index)
+			{
+				//std::swap(m_values[index / 2], m_values[index]);
+				int x = m_values[index / 2];
+				int y = m_values[index];
+				m_values[index / 2] = y;
+				m_values[index] = x;
+			}
+		}
+
+		// returns -1 if not found
+		int find(int value) const
+		{
+			for (uint32_t i = 0; i < m_values.size(); i++)
+				if (m_values[i] == value)
+					return i;
+			return -1;
+		}
+
+		void reset()
+		{
+			const uint32_t n = (uint32_t)m_values.size();
+
+			m_values.clear();
+
+			init(n);
+		}
+
+	private:
+		basisu::int_vec m_values;
+		uint32_t m_rover;
+	};
+
+	struct decoder_etc_block;
+	
+	inline uint8_t clamp255(int32_t i)
+	{
+		return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i);
+	}
+
+	enum eNoClamp
+	{
+		cNoClamp = 0
+	};
+
+	struct color32
+	{
+		union
+		{
+			struct
+			{
+				uint8_t r;
+				uint8_t g;
+				uint8_t b;
+				uint8_t a;
+			};
+
+			uint8_t c[4];
+			
+			uint32_t m;
+		};
+
+		color32() { }
+
+		color32(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); }
+		color32(eNoClamp unused, uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { (void)unused; set_noclamp_rgba(vr, vg, vb, va); }
+
+		void set(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { c[0] = static_cast<uint8_t>(vr); c[1] = static_cast<uint8_t>(vg); c[2] = static_cast<uint8_t>(vb); c[3] = static_cast<uint8_t>(va); }
+
+		void set_noclamp_rgb(uint32_t vr, uint32_t vg, uint32_t vb) { c[0] = static_cast<uint8_t>(vr); c[1] = static_cast<uint8_t>(vg); c[2] = static_cast<uint8_t>(vb); }
+		void set_noclamp_rgba(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); }
+
+		void set_clamped(int vr, int vg, int vb, int va) { c[0] = clamp255(vr); c[1] = clamp255(vg);	c[2] = clamp255(vb); c[3] = clamp255(va); }
+
+		uint8_t operator[] (uint32_t idx) const { assert(idx < 4); return c[idx]; }
+		uint8_t &operator[] (uint32_t idx) { assert(idx < 4); return c[idx]; }
+
+		bool operator== (const color32&rhs) const { return m == rhs.m; }
+
+		static color32 comp_min(const color32& a, const color32& b) { return color32(cNoClamp, basisu::minimum(a[0], b[0]), basisu::minimum(a[1], b[1]), basisu::minimum(a[2], b[2]), basisu::minimum(a[3], b[3])); }
+		static color32 comp_max(const color32& a, const color32& b) { return color32(cNoClamp, basisu::maximum(a[0], b[0]), basisu::maximum(a[1], b[1]), basisu::maximum(a[2], b[2]), basisu::maximum(a[3], b[3])); }
+	};
+
+	struct endpoint
+	{
+		color32 m_color5;
+		uint8_t m_inten5;
+		bool operator== (const endpoint& rhs) const
+		{
+			return (m_color5.r == rhs.m_color5.r) && (m_color5.g == rhs.m_color5.g) && (m_color5.b == rhs.m_color5.b) && (m_inten5 == rhs.m_inten5);
+		}
+		bool operator!= (const endpoint& rhs) const { return !(*this == rhs); }
+	};
+
+	struct selector
+	{
+		// Plain selectors (2-bits per value)
+		uint8_t m_selectors[4];
+
+		// ETC1 selectors
+		uint8_t m_bytes[4];
+
+		uint8_t m_lo_selector, m_hi_selector;
+		uint8_t m_num_unique_selectors;
+		bool operator== (const selector& rhs) const
+		{
+			return (m_selectors[0] == rhs.m_selectors[0]) &&
+				(m_selectors[1] == rhs.m_selectors[1]) &&
+				(m_selectors[2] == rhs.m_selectors[2]) &&
+				(m_selectors[3] == rhs.m_selectors[3]);
+		}
+		bool operator!= (const selector& rhs) const
+		{
+			return !(*this == rhs);
+		}
+
+		void init_flags()
+		{
+			uint32_t hist[4] = { 0, 0, 0, 0 };
+			for (uint32_t y = 0; y < 4; y++)
+			{
+				for (uint32_t x = 0; x < 4; x++)
+				{
+					uint32_t s = get_selector(x, y);
+					hist[s]++;
+				}
+			}
+
+			m_lo_selector = 3;
+			m_hi_selector = 0;
+			m_num_unique_selectors = 0;
+
+			for (uint32_t i = 0; i < 4; i++)
+			{
+				if (hist[i])
+				{
+					m_num_unique_selectors++;
+					if (i < m_lo_selector) m_lo_selector = static_cast<uint8_t>(i);
+					if (i > m_hi_selector) m_hi_selector = static_cast<uint8_t>(i);
+				}
+			}
+		}
+
+		// Returned selector value ranges from 0-3 and is a direct index into g_etc1_inten_tables.
+		inline uint32_t get_selector(uint32_t x, uint32_t y) const
+		{
+			assert((x < 4) && (y < 4));
+			return (m_selectors[y] >> (x * 2)) & 3;
+		}
+
+		void set_selector(uint32_t x, uint32_t y, uint32_t val)
+		{
+			static const uint8_t s_selector_index_to_etc1[4] = { 3, 2, 0, 1 };
+
+			assert((x | y | val) < 4);
+
+			m_selectors[y] &= ~(3 << (x * 2));
+			m_selectors[y] |= (val << (x * 2));
+
+			const uint32_t etc1_bit_index = x * 4 + y;
+
+			uint8_t *p = &m_bytes[3 - (etc1_bit_index >> 3)];
+
+			const uint32_t byte_bit_ofs = etc1_bit_index & 7;
+			const uint32_t mask = 1 << byte_bit_ofs;
+
+			const uint32_t etc1_val = s_selector_index_to_etc1[val];
+
+			const uint32_t lsb = etc1_val & 1;
+			const uint32_t msb = etc1_val >> 1;
+
+			p[0] &= ~mask;
+			p[0] |= (lsb << byte_bit_ofs);
+
+			p[-2] &= ~mask;
+			p[-2] |= (msb << byte_bit_ofs);
+		}
+	};
+
+	bool basis_block_format_is_uncompressed(block_format tex_type);
+	
+} // namespace basist
+
+
+
diff --git a/libkram/transcoder/basisu_transcoder_tables_astc.inc b/libkram/transcoder/basisu_transcoder_tables_astc.inc
new file mode 100644
index 00000000..cd634c0d
--- /dev/null
+++ b/libkram/transcoder/basisu_transcoder_tables_astc.inc
@@ -0,0 +1,481 @@
+{0,2,18},{0,32,2},{0,16,10},{0,16,10},{0,32,35},{0,16,27},{0,16,11},{0,16,27},{0,16,36},{0,16,28},{0,2,18},{0,32,2},{0,16,10},{0,16,10},{16,0,35},{0,16,27},{0,16,11},{0,16,27},{32,0,35},{0,16,27},{0,16,1},{0,16,1},{0,16,1},{0,16,1},{0,16,2},{0,16,2},{0,16,2},{0,0,4},{0,0,4},{0,0,4},{0,16,1},
+{0,16,1},{0,16,1},{0,16,1},{0,16,2},{0,16,2},{0,16,2},{0,0,4},{16,0,2},{0,0,4},{0,2,18},{0,32,2},{0,16,10},{0,16,10},{0,2,18},{2,0,18},{0,16,10},{0,16,18},{2,0,18},{0,16,18},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{16,4,44},{16,18,27},{16,2,30},
+{16,2,30},{0,4,52},{0,18,20},{0,2,1},{0,32,27},{0,2,77},{0,32,36},{16,4,19},{16,18,2},{16,2,5},{16,2,5},{32,32,51},{0,18,20},{0,2,1},{0,32,27},{32,32,51},{0,32,27},{16,18,26},{16,18,26},{16,18,26},{16,32,27},{0,34,8},{0,2,1},{0,2,1},{0,32,2},{0,32,19},{0,32,11},{16,18,1},{16,18,1},{16,18,1},{16,32,2},{16,2,8},
+{0,2,1},{0,2,1},{0,32,2},{34,0,8},{0,32,2},{16,4,18},{16,18,1},{16,2,4},{0,2,0},{16,4,18},{4,16,18},{0,2,0},{0,32,26},{4,16,18},{0,32,26},{16,0,26},{16,0,26},{16,0,26},{16,0,26},{0,18,0},{0,18,0},{0,18,0},{0,32,1},{0,16,10},{0,16,10},{32,36,36},{32,4,20},{32,34,31},{32,18,28},{16,6,60},{32,34,31},{32,18,19},
+{16,18,28},{0,34,52},{0,18,22},{2,4,22},{2,34,6},{2,18,14},{2,18,14},{18,2,51},{32,34,22},{32,18,10},{0,18,18},{38,0,51},{0,18,18},{32,4,19},{32,4,19},{32,4,19},{32,18,19},{32,34,17},{32,18,10},{32,18,10},{32,2,13},{16,18,14},{16,2,8},{2,18,5},{2,18,5},{2,18,5},{2,18,5},{2,2,8},{32,18,1},{32,18,1},{32,2,4},{2,2,8},
+{32,2,4},{4,0,18},{32,4,2},{2,18,10},{0,34,9},{4,0,18},{8,0,18},{0,34,9},{0,18,18},{8,0,18},{0,18,18},{32,0,18},{32,0,18},{32,0,18},{32,0,18},{32,18,9},{32,18,9},{32,18,9},{32,2,9},{16,2,4},{16,2,4},{18,6,44},{18,20,26},{18,4,30},{18,4,30},{2,6,52},{2,20,21},{2,4,1},{2,34,27},{16,20,52},{32,34,27},{18,6,19},
+{18,20,1},{18,4,5},{18,4,5},{20,16,51},{2,20,21},{2,4,1},{2,34,27},{16,20,51},{2,34,27},{18,20,26},{18,20,26},{18,20,26},{18,34,27},{2,36,8},{2,4,1},{2,4,1},{2,34,2},{32,4,11},{32,34,2},{18,20,1},{18,20,1},{18,20,1},{18,34,2},{18,4,8},{2,4,1},{2,4,1},{2,34,2},{36,2,8},{2,34,2},{36,0,18},{18,20,0},{18,4,4},
+{2,4,0},{36,0,18},{6,18,18},{2,4,0},{0,34,26},{6,18,18},{0,34,26},{18,0,26},{18,0,26},{18,0,26},{18,0,26},{2,4,1},{2,4,1},{2,4,1},{2,34,1},{32,34,1},{32,34,1},{34,8,40},{34,6,21},{34,36,33},{34,36,28},{34,22,55},{34,36,25},{34,36,13},{34,20,31},{32,6,55},{18,20,20},{4,6,21},{4,36,5},{4,36,5},{4,20,11},{22,0,51},
+{34,36,21},{34,36,9},{2,20,19},{32,6,51},{2,20,19},{34,6,21},{34,6,21},{34,6,21},{34,20,21},{34,6,14},{34,20,6},{34,20,6},{34,4,14},{18,20,11},{18,20,11},{4,36,1},{4,36,1},{4,36,1},{4,20,2},{2,24,8},{34,20,2},{34,20,2},{4,4,10},{28,0,8},{4,4,10},{22,16,18},{34,6,1},{4,36,4},{18,36,4},{22,16,18},{44,16,18},{18,36,4},
+{0,20,18},{44,16,18},{0,20,18},{34,0,20},{34,0,20},{34,0,20},{34,0,20},{34,20,5},{34,20,5},{34,20,5},{34,4,5},{18,20,2},{18,20,2},{20,8,46},{20,22,27},{20,6,31},{20,6,28},{4,24,52},{4,22,19},{4,6,7},{4,6,36},{18,22,56},{34,6,31},{20,8,21},{20,22,2},{20,6,6},{20,6,3},{36,6,51},{4,22,18},{4,6,6},{18,6,26},{31,0,51},
+{18,6,26},{20,22,26},{20,22,26},{20,22,26},{20,6,27},{4,38,9},{4,6,6},{4,6,6},{4,36,1},{34,6,9},{4,36,4},{20,22,1},{20,22,1},{20,22,1},{20,6,2},{22,2,8},{20,36,4},{20,36,4},{4,36,0},{42,2,8},{4,36,0},{22,4,18},{20,22,1},{36,6,2},{4,6,2},{22,4,18},{15,0,18},{4,6,2},{0,6,26},{15,0,18},{0,6,26},{20,0,26},
+{20,0,26},{20,0,26},{20,0,26},{4,22,1},{4,22,1},{4,22,1},{4,36,1},{4,36,4},{4,36,4},{36,40,38},{36,8,22},{36,38,33},{36,38,28},{36,24,55},{36,38,25},{36,38,13},{20,22,28},{34,8,55},{4,22,23},{6,8,21},{6,38,5},{6,38,5},{6,22,11},{24,2,51},{36,38,21},{20,38,9},{4,22,19},{13,0,51},{4,22,19},{36,8,21},{36,8,21},{36,8,21},
+{36,22,21},{36,8,14},{36,22,6},{36,22,6},{36,6,14},{4,38,10},{4,22,14},{6,38,1},{6,38,1},{6,38,1},{6,22,2},{8,2,8},{36,22,2},{36,22,2},{6,6,10},{30,2,8},{6,6,10},{10,0,18},{36,8,2},{6,38,4},{4,38,5},{10,0,18},{4,8,18},{4,38,5},{0,22,18},{4,8,18},{0,22,18},{36,0,20},{36,0,20},{36,0,20},{36,0,20},{36,22,5},
+{36,22,5},{36,22,5},{36,6,5},{4,22,5},{4,22,5},{22,10,46},{22,24,26},{22,8,30},{22,8,30},{6,26,52},{6,24,21},{6,8,5},{6,38,37},{20,24,53},{6,38,40},{22,10,21},{22,24,1},{22,8,5},{22,8,5},{16,11,51},{6,24,20},{6,8,4},{6,38,36},{25,0,51},{6,38,36},{22,24,26},{22,24,26},{22,24,26},{22,8,30},{6,40,9},{6,8,5},{6,8,5},
+{6,38,1},{36,8,11},{6,38,4},{22,24,1},{22,24,1},{22,24,1},{22,8,5},{24,4,8},{6,8,4},{6,8,4},{6,38,0},{11,0,8},{6,38,0},{42,0,18},{22,24,0},{22,8,4},{6,8,0},{42,0,18},{9,0,18},{6,8,0},{0,8,36},{9,0,18},{0,8,36},{22,0,26},{22,0,26},{22,0,26},{22,0,26},{6,24,1},{6,24,1},{6,24,1},{6,38,1},{6,38,4},
+{6,38,4},{38,12,44},{38,10,30},{38,40,46},{38,40,34},{38,26,53},{38,40,21},{38,40,9},{38,24,25},{6,10,57},{22,24,22},{8,10,21},{8,40,5},{8,40,5},{8,24,17},{28,0,51},{38,40,20},{38,40,8},{6,24,21},{37,0,51},{6,24,21},{38,26,26},{38,26,26},{38,26,26},{38,24,30},{38,10,9},{38,40,5},{38,40,5},{38,24,9},{22,24,11},{22,24,6},{8,40,1},
+{8,40,1},{8,40,1},{8,24,1},{38,10,8},{8,24,4},{8,24,4},{6,24,5},{23,0,8},{6,24,5},{16,7,18},{38,10,4},{8,40,4},{6,40,5},{16,7,18},{21,0,18},{6,40,5},{0,24,20},{21,0,18},{0,24,20},{38,0,26},{38,0,26},{38,0,26},{38,0,26},{38,40,1},{38,40,1},{38,40,1},{38,8,2},{22,24,2},{22,24,2},{24,12,38},{24,26,22},{24,10,30},
+{24,10,22},{8,28,52},{8,26,19},{8,10,6},{8,40,37},{38,26,55},{38,10,30},{24,12,22},{24,26,6},{24,10,14},{24,10,6},{42,6,51},{8,26,18},{24,10,3},{22,10,26},{19,0,51},{22,10,26},{24,26,21},{24,26,21},{24,26,21},{24,10,21},{8,42,9},{8,10,5},{8,10,5},{8,40,1},{38,10,8},{8,40,4},{24,26,5},{24,26,5},{24,26,5},{24,10,5},{28,2,8},
+{24,10,2},{24,10,2},{8,40,0},{42,8,8},{8,40,0},{26,8,18},{24,26,2},{40,10,2},{8,10,1},{26,8,18},{15,6,18},{8,10,1},{0,10,26},{15,6,18},{0,10,26},{24,0,20},{24,0,20},{24,0,20},{24,0,20},{8,26,1},{8,26,1},{8,26,1},{8,40,1},{38,10,4},{38,10,4},{40,14,40},{40,12,21},{40,42,33},{40,42,33},{40,28,55},{40,42,23},{40,42,18},
+{40,26,33},{8,12,57},{24,26,22},{10,12,21},{10,42,5},{10,42,5},{10,26,17},{30,2,51},{40,42,19},{24,42,14},{8,26,21},{35,2,51},{8,26,21},{40,12,21},{40,12,21},{40,12,21},{40,26,21},{40,12,14},{40,26,6},{40,26,6},{40,10,14},{8,42,10},{24,26,6},{10,42,1},{10,42,1},{10,42,1},{10,26,1},{16,3,8},{40,26,2},{40,26,2},{8,26,5},{17,0,8},
+{8,26,5},{16,1,18},{40,12,1},{10,42,4},{8,42,5},{16,1,18},{19,2,18},{8,42,5},{0,26,20},{19,2,18},{0,26,20},{40,0,20},{40,0,20},{40,0,20},{40,0,20},{40,26,5},{40,26,5},{40,26,5},{40,10,5},{24,26,2},{24,26,2},{26,14,38},{26,28,22},{26,12,30},{26,12,22},{10,30,52},{10,28,21},{10,12,6},{10,42,37},{24,28,53},{40,12,31},{26,14,22},
+{26,28,6},{26,12,14},{26,12,6},{47,2,51},{10,28,20},{26,12,3},{24,12,26},{17,2,51},{24,12,26},{26,28,21},{26,28,21},{26,28,21},{26,12,21},{10,44,9},{10,12,5},{10,12,5},{10,42,1},{40,12,9},{40,42,2},{26,28,5},{26,28,5},{26,28,5},{26,12,5},{30,4,8},{26,12,2},{26,12,2},{10,42,0},{15,8,8},{10,42,0},{15,0,18},{26,28,2},{42,12,5},
+{10,12,1},{15,0,18},{5,4,18},{10,12,1},{0,12,26},{5,4,18},{0,12,26},{26,0,20},{26,0,20},{26,0,20},{26,0,20},{10,28,1},{10,28,1},{10,28,1},{10,42,1},{40,42,2},{40,42,2},{42,47,46},{42,14,31},{12,44,37},{42,44,31},{42,46,52},{42,44,21},{42,44,6},{42,28,25},{10,14,51},{26,28,20},{12,30,18},{12,14,2},{12,44,1},{12,44,10},{29,0,51},
+{42,44,20},{42,44,5},{26,28,20},{37,6,51},{26,28,20},{42,30,26},{42,30,26},{42,30,26},{42,28,30},{42,14,9},{42,44,5},{42,44,5},{42,28,9},{10,44,9},{26,28,4},{12,44,0},{12,44,0},{12,44,0},{12,28,0},{45,0,8},{12,28,4},{12,28,4},{26,28,4},{27,8,8},{26,28,4},{45,2,18},{12,14,2},{12,44,1},{26,44,1},{45,2,18},{25,8,18},{26,44,1},
+{0,28,20},{25,8,18},{0,28,20},{42,0,26},{42,0,26},{42,0,26},{42,0,26},{42,44,1},{42,44,1},{42,44,1},{42,12,5},{26,28,0},{26,28,0},{28,47,38},{28,30,22},{28,14,33},{28,14,25},{12,31,55},{12,30,23},{12,14,18},{12,14,33},{42,30,55},{42,14,21},{28,47,22},{28,30,6},{44,14,14},{28,14,9},{46,10,51},{12,30,19},{28,14,6},{26,14,21},{23,8,51},
+{26,14,21},{28,30,21},{28,30,21},{28,30,21},{28,14,21},{12,46,14},{12,14,14},{12,14,14},{12,44,6},{42,14,10},{12,44,6},{28,30,5},{28,30,5},{28,30,5},{28,14,5},{29,2,8},{28,14,2},{28,14,2},{12,44,2},{35,6,8},{12,44,2},{8,7,18},{28,30,2},{44,14,5},{12,14,5},{8,7,18},{33,6,18},{12,14,5},{0,14,20},{33,6,18},{0,14,20},{28,0,20},
+{28,0,20},{28,0,20},{28,0,20},{12,30,5},{12,30,5},{12,30,5},{12,44,5},{42,14,1},{42,14,1},{44,15,46},{44,47,27},{14,46,37},{44,46,31},{44,31,53},{44,46,21},{44,46,6},{44,30,25},{42,47,55},{28,30,22},{14,31,20},{14,47,8},{14,46,1},{14,46,10},{38,3,51},{44,46,20},{44,46,5},{12,30,21},{35,8,51},{12,30,21},{44,31,26},{44,31,26},{44,31,26},
+{44,30,30},{44,47,11},{44,46,5},{44,46,5},{44,30,9},{12,46,9},{28,30,6},{14,46,0},{14,46,0},{14,46,0},{14,30,0},{43,2,8},{14,30,4},{14,30,4},{12,30,5},{21,8,8},{12,30,5},{41,0,18},{44,47,1},{14,46,1},{28,46,1},{41,0,18},{19,8,18},{28,46,1},{0,30,20},{19,8,18},{0,30,20},{44,0,26},{44,0,26},{44,0,26},{44,0,26},{44,46,1},
+{44,46,1},{44,46,1},{44,14,5},{28,30,2},{28,30,2},{30,45,38},{30,31,21},{30,47,24},{30,47,24},{14,29,55},{14,31,22},{14,47,8},{14,46,55},{28,31,53},{44,47,41},{30,45,22},{30,31,5},{30,47,8},{30,47,8},{41,2,51},{14,31,18},{14,47,4},{12,47,37},{17,8,51},{12,47,37},{30,31,20},{30,31,20},{30,31,20},{30,47,24},{14,15,14},{14,47,8},{14,47,8},
+{14,46,6},{44,47,9},{14,46,6},{30,31,4},{30,31,4},{30,31,4},{30,47,8},{38,1,8},{14,47,4},{14,47,4},{14,46,2},{43,28,8},{14,46,2},{9,0,18},{30,31,1},{46,47,1},{14,47,0},{9,0,18},{0,9,18},{14,47,0},{0,47,36},{0,9,18},{0,47,36},{30,0,20},{30,0,20},{30,0,20},{30,0,20},{14,31,4},{14,31,4},{14,31,4},{14,46,5},{14,46,5},
+{14,46,5},{46,43,54},{46,45,41},{47,15,55},{46,15,44},{46,13,51},{46,15,20},{46,15,8},{46,31,24},{14,45,56},{30,31,21},{47,29,21},{47,15,6},{47,15,6},{47,31,17},{23,0,51},{46,15,20},{46,15,8},{30,31,20},{18,9,51},{30,31,20},{46,13,37},{46,13,37},{46,13,37},{46,15,40},{46,45,8},{46,15,4},{46,15,4},{46,31,8},{14,15,14},{30,31,5},{47,15,2},
+{47,15,2},{47,15,2},{47,31,1},{39,0,8},{46,15,4},{46,15,4},{30,31,4},{9,28,8},{30,31,4},{39,2,18},{47,15,5},{47,15,5},{30,15,4},{39,2,18},{34,9,18},{30,15,4},{0,31,20},{34,9,18},{0,31,20},{46,0,36},{46,0,36},{46,0,36},{46,0,36},{46,15,0},{46,15,0},{46,15,0},{46,47,1},{30,31,1},{30,31,1},{31,43,38},{31,29,22},{31,45,25},
+{31,45,22},{47,43,53},{47,29,19},{47,45,6},{47,15,37},{46,29,52},{46,45,27},{31,43,22},{31,29,6},{31,45,9},{31,45,6},{9,6,51},{47,29,18},{47,45,5},{30,45,26},{32,7,51},{30,45,26},{31,13,21},{31,13,21},{31,13,21},{31,45,21},{47,13,9},{47,45,5},{47,45,5},{47,15,1},{46,45,10},{46,15,8},{31,13,5},{31,13,5},{31,13,5},{31,45,5},{23,2,8},
+{31,15,4},{31,15,4},{47,15,0},{20,9,8},{47,15,0},{47,11,18},{31,29,2},{15,45,5},{47,45,1},{47,11,18},{22,9,18},{47,45,1},{0,45,26},{22,9,18},{0,45,26},{31,0,20},{31,0,20},{31,0,20},{31,0,20},{47,29,1},{47,29,1},{47,29,1},{47,15,1},{46,45,1},{46,45,1},{15,11,38},{15,43,21},{15,13,33},{15,13,33},{15,27,55},{15,13,23},{15,13,18},
+{15,29,33},{47,13,59},{31,29,22},{45,27,21},{45,13,6},{45,13,6},{45,29,17},{21,2,51},{15,13,19},{15,13,14},{31,29,21},{24,9,51},{31,29,21},{15,27,21},{15,27,21},{15,27,21},{15,29,21},{15,13,14},{15,29,6},{15,29,6},{15,45,14},{47,13,10},{31,29,6},{45,13,2},{45,13,2},{45,13,2},{45,29,1},{37,2,8},{15,29,2},{15,29,2},{31,29,5},{34,7,8},
+{31,29,5},{35,0,18},{15,43,1},{45,13,5},{31,13,5},{35,0,18},{36,7,18},{31,13,5},{0,29,20},{36,7,18},{0,29,20},{15,0,20},{15,0,20},{15,0,20},{15,0,20},{15,13,5},{15,13,5},{15,13,5},{15,45,5},{31,29,2},{31,29,2},{29,41,38},{29,27,20},{29,43,25},{29,43,25},{45,41,53},{45,27,21},{45,43,6},{45,13,37},{31,27,53},{15,43,31},{29,41,22},
+{29,27,4},{29,43,9},{29,43,9},{35,2,51},{45,27,20},{45,43,5},{31,43,26},{34,5,51},{31,43,26},{29,27,20},{29,27,20},{29,27,20},{29,13,24},{45,11,9},{45,43,5},{45,43,5},{45,13,1},{15,43,9},{15,13,2},{29,27,4},{29,27,4},{29,27,4},{29,13,8},{44,1,8},{29,13,4},{29,13,4},{45,13,0},{26,9,8},{45,13,0},{3,0,18},{29,27,0},{13,43,5},
+{45,43,1},{3,0,18},{0,3,18},{45,43,1},{0,43,26},{0,3,18},{0,43,26},{29,0,20},{29,0,20},{29,0,20},{29,0,20},{45,27,1},{45,27,1},{45,27,1},{45,13,1},{15,13,2},{15,13,2},{13,9,46},{13,41,31},{43,11,37},{13,11,31},{13,25,53},{13,11,21},{13,11,6},{13,27,30},{29,11,56},{29,27,22},{43,25,18},{43,41,2},{43,11,1},{43,27,16},{17,0,51},
+{13,11,20},{13,11,5},{29,27,21},{18,3,51},{29,27,21},{13,25,26},{13,25,26},{13,25,26},{13,27,27},{13,41,9},{13,27,3},{13,27,3},{13,27,14},{45,11,9},{29,27,6},{43,11,0},{43,11,0},{43,11,0},{43,27,0},{31,5,8},{13,27,2},{13,27,2},{29,27,5},{14,9,8},{29,27,5},{31,3,18},{43,41,2},{43,11,1},{29,11,1},{31,3,18},{34,3,18},{29,11,1},
+{0,27,20},{34,3,18},{0,27,20},{13,0,26},{13,0,26},{13,0,26},{13,0,26},{13,11,1},{13,11,1},{13,11,1},{13,43,5},{29,27,2},{29,27,2},{27,39,38},{27,25,22},{27,41,33},{27,41,25},{43,23,55},{43,25,23},{43,41,18},{43,41,33},{13,25,55},{13,41,21},{27,39,22},{27,25,6},{11,41,14},{27,41,9},{27,9,51},{43,25,19},{27,41,6},{13,41,21},{14,7,51},
+{13,41,21},{27,9,21},{27,9,21},{27,9,21},{27,41,21},{43,9,14},{43,25,14},{43,25,14},{43,11,5},{13,41,10},{43,11,5},{27,9,5},{27,9,5},{27,9,5},{27,41,5},{17,2,8},{27,41,2},{27,41,2},{43,11,1},{16,1,8},{43,11,1},{43,7,18},{27,25,2},{11,41,5},{27,41,5},{43,7,18},{18,1,18},{27,41,5},{0,41,20},{18,1,18},{0,41,20},{27,0,20},
+{27,0,20},{27,0,20},{27,0,20},{43,9,5},{43,9,5},{43,9,5},{43,11,4},{13,41,1},{13,41,1},{11,7,46},{11,39,30},{41,9,37},{11,9,31},{11,23,53},{11,9,21},{11,9,6},{11,25,30},{13,39,55},{27,25,22},{41,23,18},{41,9,4},{41,9,1},{41,25,16},{17,6,51},{11,9,20},{11,9,5},{27,25,21},{20,1,51},{27,25,21},{11,23,26},{11,23,26},{11,23,26},
+{11,25,27},{11,39,9},{11,25,3},{11,25,3},{11,25,14},{43,9,9},{27,25,6},{41,9,0},{41,9,0},{41,9,0},{41,25,0},{29,3,8},{11,25,2},{11,25,2},{27,25,5},{43,9,8},{27,25,5},{29,1,18},{41,9,4},{41,9,1},{27,9,1},{29,1,18},{36,1,18},{27,9,1},{0,25,20},{36,1,18},{0,25,20},{11,0,26},{11,0,26},{11,0,26},{11,0,26},{11,9,1},
+{11,9,1},{11,9,1},{11,41,2},{27,25,2},{27,25,2},{25,37,38},{25,23,22},{25,39,25},{25,39,25},{41,21,55},{41,23,23},{41,39,9},{41,39,46},{27,23,53},{11,39,30},{25,37,22},{25,23,6},{25,39,9},{25,39,9},{27,3,51},{41,23,19},{41,39,5},{27,39,26},{39,9,51},{27,39,26},{25,7,21},{25,7,21},{25,7,21},{25,9,24},{41,7,14},{41,39,8},{41,39,8},
+{41,9,5},{11,39,8},{41,9,5},{25,7,5},{25,7,5},{25,7,5},{25,9,8},{39,11,8},{25,9,4},{25,9,4},{41,9,1},{22,1,8},{41,9,1},{41,5,18},{25,23,2},{9,39,2},{41,39,1},{41,5,18},{23,9,18},{41,39,1},{0,39,26},{23,9,18},{0,39,26},{25,0,20},{25,0,20},{25,0,20},{25,0,20},{41,7,5},{41,7,5},{41,7,5},{41,9,4},{41,9,4},
+{41,9,4},{39,21,54},{39,7,40},{39,7,37},{9,7,41},{9,5,51},{9,7,20},{9,7,5},{9,23,30},{41,37,51},{25,23,26},{39,21,18},{39,7,4},{39,7,1},{39,7,17},{11,1,51},{9,7,20},{9,7,5},{25,23,26},{5,9,51},{25,23,26},{39,7,36},{39,7,36},{39,7,36},{39,23,36},{9,21,10},{9,7,4},{9,7,4},{9,23,5},{41,7,12},{25,23,1},{39,7,0},
+{39,7,0},{39,7,0},{39,23,0},{25,5,8},{9,7,4},{9,7,4},{25,23,1},{10,1,8},{25,23,1},{25,3,18},{39,7,4},{39,7,1},{25,7,1},{25,3,18},{12,1,18},{25,7,1},{0,23,26},{12,1,18},{0,23,26},{9,0,36},{9,0,36},{9,0,36},{9,0,36},{9,7,0},{9,7,0},{9,7,0},{9,23,4},{25,23,0},{25,23,0},{23,19,38},{23,5,23},{23,21,28},
+{23,37,23},{39,19,55},{39,21,25},{39,37,13},{39,37,33},{9,21,53},{9,37,22},{7,21,27},{23,5,14},{7,37,14},{23,37,14},{23,5,51},{39,21,21},{23,37,6},{9,37,21},{14,1,51},{9,37,21},{23,5,19},{23,5,19},{23,5,19},{23,37,19},{39,5,14},{39,21,9},{39,21,9},{39,7,5},{9,37,11},{39,7,5},{23,5,10},{23,5,10},{23,5,10},{7,7,10},{9,3,8},
+{23,37,2},{23,37,2},{39,7,1},{31,3,8},{39,7,1},{37,7,18},{23,5,5},{7,37,5},{23,37,5},{37,7,18},{30,1,18},{23,37,5},{0,37,20},{30,1,18},{0,37,20},{23,0,18},{23,0,18},{23,0,18},{23,0,18},{39,5,5},{39,5,5},{39,5,5},{39,7,4},{9,37,2},{9,37,2},{7,33,44},{7,35,31},{7,5,36},{7,5,31},{7,19,53},{7,5,22},{7,5,7},
+{7,21,31},{39,35,57},{23,21,27},{37,19,20},{37,5,4},{37,5,1},{37,5,17},{23,17,51},{7,5,21},{7,5,6},{23,21,26},{45,17,51},{23,21,26},{7,19,26},{7,19,26},{7,19,26},{7,5,30},{7,35,9},{7,5,6},{7,5,6},{7,21,6},{39,5,9},{23,21,2},{37,5,0},{37,5,0},{37,5,0},{37,21,0},{23,3,8},{37,21,4},{37,21,4},{23,21,1},{43,3,8},
+{23,21,1},{23,1,18},{37,5,4},{37,5,1},{23,5,1},{23,1,18},{33,7,18},{23,5,1},{0,21,26},{33,7,18},{0,21,26},{7,0,26},{7,0,26},{7,0,26},{7,0,26},{7,5,2},{7,5,2},{7,5,2},{7,37,2},{23,21,1},{23,21,1},{21,17,38},{21,19,20},{21,35,31},{21,35,23},{37,17,55},{37,19,22},{37,35,13},{37,35,33},{23,19,56},{7,35,21},{5,19,27},
+{21,19,11},{5,35,14},{21,35,14},{21,3,51},{37,19,18},{21,35,6},{7,35,21},{43,1,51},{7,35,21},{21,3,19},{21,3,19},{21,3,19},{21,35,19},{37,3,14},{37,35,9},{37,35,9},{37,5,5},{7,35,10},{37,5,5},{21,3,10},{21,3,10},{21,3,10},{5,5,10},{3,25,8},{21,35,2},{21,35,2},{37,5,1},{29,1,8},{37,5,1},{35,5,18},{21,19,2},{5,35,5},
+{21,35,5},{35,5,18},{27,1,18},{21,35,5},{0,35,20},{27,1,18},{0,35,20},{21,0,18},{21,0,18},{21,0,18},{21,0,18},{37,19,4},{37,19,4},{37,19,4},{37,5,4},{7,35,1},{7,35,1},{35,17,44},{35,33,27},{35,3,27},{35,3,35},{5,1,51},{5,3,26},{5,3,1},{5,19,30},{37,33,51},{21,19,26},{35,17,19},{35,33,2},{35,3,2},{35,3,10},{5,1,51},
+{5,3,26},{5,3,1},{21,19,26},{9,1,51},{21,19,26},{35,3,27},{35,3,27},{35,3,27},{35,19,27},{5,17,10},{5,3,1},{5,3,1},{5,19,5},{37,3,12},{21,19,1},{35,3,2},{35,3,2},{35,3,2},{35,19,2},{19,5,8},{5,3,1},{5,3,1},{21,19,1},{37,3,8},{21,19,1},{19,3,18},{35,33,1},{35,3,1},{5,3,1},{19,3,18},{39,1,18},{5,3,1},
+{0,19,26},{39,1,18},{0,19,26},{35,0,26},{35,0,26},{35,0,26},{35,0,26},{5,3,0},{5,3,0},{5,3,0},{5,19,4},{21,19,0},{21,19,0},{19,1,54},{19,1,22},{19,17,28},{19,33,27},{19,1,61},{35,17,30},{19,33,19},{35,33,31},{5,17,52},{5,33,20},{3,1,24},{3,17,8},{3,33,13},{3,33,17},{17,5,51},{35,17,21},{19,33,10},{5,33,19},{5,17,51},
+{5,33,19},{19,1,18},{19,1,18},{19,1,18},{19,33,18},{19,17,19},{19,33,10},{19,33,10},{19,3,14},{5,33,11},{35,3,6},{3,33,4},{3,33,4},{3,33,4},{3,3,8},{3,3,8},{19,33,1},{19,33,1},{19,3,5},{3,3,8},{19,3,5},{33,33,18},{19,1,4},{3,33,9},{19,33,9},{33,33,18},{33,33,18},{19,33,9},{0,33,18},{33,33,18},{0,33,18},{19,0,18},
+{19,0,18},{19,0,18},{19,0,18},{35,1,9},{35,1,9},{35,1,9},{19,3,10},{35,3,2},{35,3,2},{33,1,76},{33,1,36},{33,1,27},{33,1,35},{33,1,84},{3,1,26},{3,1,1},{3,17,30},{19,1,56},{19,17,27},{17,1,43},{33,1,11},{33,1,2},{33,1,10},{1,3,51},{3,1,26},{3,1,1},{19,17,26},{3,1,51},{19,17,26},{33,1,27},{33,1,27},{33,1,27},
+{33,17,26},{3,1,16},{3,1,1},{3,1,1},{3,17,5},{35,1,12},{19,17,2},{33,1,2},{33,1,2},{33,1,2},{33,17,1},{17,3,8},{3,1,1},{3,1,1},{19,17,1},{35,1,8},{19,17,1},{17,1,18},{17,1,10},{33,1,1},{19,1,0},{17,1,18},{33,1,18},{19,1,0},{0,17,26},{33,1,18},{0,17,26},{33,0,26},{33,0,26},{33,0,26},{33,0,26},{3,1,0},
+{3,1,0},{3,1,0},{3,17,4},{19,17,1},{19,17,1},{17,1,36},{17,1,28},{17,1,27},{17,1,19},{17,1,28},{17,1,12},{17,1,11},{17,1,10},{33,1,20},{33,1,2},{1,1,4},{1,1,4},{1,1,4},{1,1,4},{1,17,3},{17,1,3},{17,1,2},{17,1,1},{17,1,3},{17,1,1},{17,1,27},{17,1,27},{17,1,27},{17,1,19},{17,1,19},{17,1,11},{17,1,11},
+{17,1,10},{33,1,11},{33,1,2},{1,1,4},{1,1,4},{1,1,4},{1,1,4},{1,17,2},{17,1,2},{17,1,2},{17,1,1},{17,1,2},{17,1,1},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{17,0,18},{17,0,18},{17,0,18},{17,0,18},{17,1,10},{17,1,10},{17,1,10},{17,1,10},{33,1,2},
+{33,1,2},{0,4,74},{0,18,10},{0,2,1},{0,2,26},{0,34,154},{0,2,99},{0,32,49},{0,32,121},{0,32,162},{0,32,130},{0,4,74},{0,18,10},{0,2,1},{0,2,26},{32,0,153},{0,2,99},{0,32,49},{0,32,121},{0,32,153},{0,32,121},{0,2,0},{0,2,0},{0,2,0},{0,16,4},{0,32,13},{0,16,5},{0,16,5},{0,16,9},{0,16,14},{0,16,10},{0,2,0},
+{0,2,0},{0,2,0},{0,16,4},{16,0,13},{0,16,5},{0,16,5},{0,16,9},{32,0,13},{0,16,9},{32,32,72},{0,18,10},{0,2,1},{0,2,26},{32,32,72},{32,32,72},{0,2,26},{0,32,72},{32,32,72},{0,32,72},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,38,81},{0,20,10},{16,34,34},
+{0,18,26},{0,36,243},{0,34,99},{0,18,26},{0,2,139},{0,18,279},{0,2,164},{16,6,76},{16,4,8},{16,34,9},{16,18,24},{18,0,243},{0,34,99},{0,18,26},{0,2,139},{36,0,243},{0,2,139},{0,20,10},{0,20,10},{0,20,10},{0,18,10},{0,34,50},{0,18,10},{0,18,10},{0,32,20},{0,32,61},{0,32,29},{16,4,4},{16,4,4},{16,4,4},{16,2,5},{16,2,50},
+{0,18,10},{0,18,10},{0,32,20},{34,0,50},{0,32,20},{18,2,72},{0,20,1},{16,34,5},{0,18,17},{18,2,72},{38,0,72},{0,18,17},{0,18,80},{38,0,72},{0,18,80},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,32,0},{0,32,0},{0,32,0},{0,16,0},{0,16,1},{0,16,1},{16,24,135},{16,6,66},{16,20,122},{16,4,66},{0,8,244},{0,20,81},{0,4,2},
+{0,34,121},{0,4,344},{0,34,185},{32,8,74},{32,36,1},{32,4,17},{32,4,17},{16,38,243},{0,20,81},{0,4,2},{0,34,121},{38,16,243},{0,34,121},{16,22,61},{16,22,61},{16,22,61},{16,4,65},{0,6,52},{0,4,1},{0,4,1},{0,18,9},{0,18,94},{0,18,45},{32,36,0},{32,36,0},{32,36,0},{32,34,0},{2,2,50},{0,4,1},{0,4,1},{0,18,9},{2,2,50},
+{0,18,9},{20,16,72},{32,36,1},{2,4,1},{0,4,1},{20,16,72},{16,20,72},{0,4,1},{0,34,72},{16,20,72},{0,34,72},{16,0,61},{16,0,61},{16,0,61},{16,0,61},{0,4,1},{0,4,1},{0,4,1},{0,2,1},{0,32,25},{0,32,25},{32,26,152},{2,22,91},{2,36,119},{32,20,89},{16,10,247},{16,6,78},{16,36,13},{16,4,110},{0,36,293},{0,4,103},{18,8,76},
+{18,6,8},{18,36,9},{2,20,19},{20,2,243},{16,6,74},{32,20,9},{0,4,94},{42,0,243},{0,4,94},{32,8,81},{32,8,81},{32,8,81},{32,20,80},{16,8,54},{16,36,9},{16,36,9},{16,34,8},{0,4,77},{0,34,6},{18,6,4},{18,6,4},{18,6,4},{18,4,5},{18,4,50},{32,20,0},{32,20,0},{16,34,4},{36,2,50},{16,34,4},{18,8,72},{2,22,1},{18,36,5},
+{0,36,4},{18,8,72},{44,0,72},{0,36,4},{0,4,90},{44,0,72},{0,4,90},{32,0,80},{32,0,80},{32,0,80},{32,0,80},{16,6,4},{16,6,4},{16,6,4},{16,34,4},{0,34,2},{0,34,2},{18,42,146},{18,8,83},{18,22,139},{18,6,79},{2,26,243},{2,38,78},{2,6,9},{2,36,110},{0,38,252},{0,36,79},{34,10,73},{34,8,6},{4,6,21},{34,6,14},{38,0,243},
+{2,38,78},{18,6,6},{0,36,75},{30,0,243},{0,36,75},{18,24,75},{18,24,75},{18,24,75},{18,6,75},{2,24,50},{2,6,5},{2,6,5},{2,20,4},{0,6,53},{16,20,2},{34,8,2},{34,8,2},{34,8,2},{34,36,2},{2,24,50},{18,6,2},{18,6,2},{16,20,1},{28,0,50},{16,20,1},{36,6,72},{34,8,5},{4,6,5},{2,6,5},{36,6,72},{31,0,72},{2,6,5},
+{0,36,74},{31,0,72},{0,36,74},{18,0,74},{18,0,74},{18,0,74},{18,0,74},{2,22,0},{2,22,0},{2,22,0},{2,4,4},{16,20,2},{16,20,2},{4,12,154},{4,24,85},{4,38,106},{34,38,95},{18,12,253},{18,8,90},{34,38,15},{18,6,122},{0,24,244},{16,22,93},{20,10,76},{20,8,9},{20,38,6},{20,22,24},{24,0,243},{18,8,81},{34,38,6},{0,22,80},{32,8,243},
+{0,22,80},{4,40,80},{4,40,80},{4,40,80},{4,22,80},{34,24,59},{34,22,10},{34,22,10},{18,36,19},{32,22,53},{32,6,5},{20,8,5},{20,8,5},{20,8,5},{20,6,8},{22,2,50},{34,22,1},{34,22,1},{32,6,4},{42,2,50},{32,6,4},{24,2,72},{20,8,5},{20,38,2},{18,38,1},{24,2,72},{13,0,72},{18,38,1},{0,22,80},{13,0,72},{0,22,80},{4,0,80},
+{4,0,80},{4,0,80},{4,0,80},{34,22,9},{34,22,9},{34,22,9},{18,36,10},{32,6,1},{32,6,1},{20,28,146},{20,10,77},{20,8,122},{20,8,77},{4,28,243},{4,40,78},{4,8,5},{4,38,110},{32,40,244},{2,38,79},{36,12,73},{36,40,2},{6,8,18},{36,8,18},{38,6,243},{34,40,73},{4,8,5},{2,38,75},{27,0,243},{2,38,75},{20,26,73},{20,26,73},{20,26,73},
+{20,8,76},{4,26,50},{4,8,4},{4,8,4},{4,22,4},{2,8,50},{18,22,2},{36,40,1},{36,40,1},{36,40,1},{36,38,2},{8,2,50},{4,8,4},{4,8,4},{18,22,1},{30,2,50},{18,22,1},{16,11,72},{36,40,1},{6,8,2},{4,8,1},{16,11,72},{25,0,72},{4,8,1},{0,38,74},{25,0,72},{0,38,74},{20,0,72},{20,0,72},{20,0,72},{20,0,72},{4,24,0},
+{4,24,0},{4,24,0},{4,6,4},{18,22,2},{18,22,2},{6,14,154},{6,26,85},{6,40,109},{6,24,97},{20,14,248},{20,10,78},{20,40,13},{20,8,110},{2,26,244},{34,8,91},{22,12,76},{22,10,9},{22,40,9},{22,24,29},{26,2,243},{20,10,74},{20,40,9},{2,24,80},{38,8,243},{2,24,80},{6,42,80},{6,42,80},{6,42,80},{6,24,81},{20,12,54},{20,40,9},{20,40,9},
+{20,38,13},{34,24,53},{34,8,10},{22,10,5},{22,10,5},{22,10,5},{22,8,5},{24,4,50},{36,24,1},{36,24,1},{34,8,9},{11,0,50},{34,8,9},{26,4,72},{22,10,5},{22,40,5},{4,40,5},{26,4,72},{7,0,72},{4,40,5},{0,24,80},{7,0,72},{0,24,80},{6,0,80},{6,0,80},{6,0,80},{6,0,80},{20,10,4},{20,10,4},{20,10,4},{20,38,4},{34,8,1},
+{34,8,1},{22,46,146},{22,12,83},{38,26,122},{22,10,79},{6,30,245},{6,42,74},{6,10,10},{6,40,111},{4,42,247},{20,40,79},{38,14,76},{38,12,5},{8,10,21},{38,10,17},{44,0,243},{6,42,73},{6,10,9},{4,40,74},{26,8,243},{4,40,74},{22,28,75},{22,28,75},{22,28,75},{22,10,75},{6,28,51},{6,10,6},{6,10,6},{6,24,6},{4,10,53},{20,24,6},{38,12,4},
+{38,12,4},{38,12,4},{38,40,5},{38,10,50},{22,40,4},{22,40,4},{36,24,1},{23,0,50},{36,24,1},{42,6,72},{38,12,1},{8,10,5},{6,10,5},{42,6,72},{19,0,72},{6,10,5},{0,40,74},{19,0,72},{0,40,74},{22,0,74},{22,0,74},{22,0,74},{22,0,74},{6,26,2},{6,26,2},{6,26,2},{6,8,5},{20,24,5},{20,24,5},{8,46,154},{8,28,85},{8,42,106},
+{8,42,97},{38,46,247},{38,12,82},{38,42,10},{22,26,119},{4,28,247},{36,26,83},{24,14,75},{24,28,6},{24,42,6},{24,26,30},{30,0,243},{38,12,78},{38,42,6},{4,26,75},{33,0,243},{4,26,75},{8,44,80},{8,44,80},{8,44,80},{8,26,80},{38,28,56},{38,26,9},{38,26,9},{38,40,14},{20,42,54},{6,10,9},{24,12,1},{24,12,1},{24,12,1},{24,10,5},{28,2,50},
+{38,26,5},{38,26,5},{6,10,5},{42,8,50},{6,10,5},{30,2,72},{8,28,5},{24,42,5},{22,42,1},{30,2,72},{35,2,72},{22,42,1},{0,26,74},{35,2,72},{0,26,74},{8,0,80},{8,0,80},{8,0,80},{8,0,80},{38,26,5},{38,26,5},{38,26,5},{38,40,5},{6,40,4},{6,40,4},{24,15,146},{24,14,83},{24,28,126},{24,12,79},{8,31,243},{8,44,78},{8,12,9},
+{8,42,110},{36,44,245},{22,42,79},{40,46,75},{40,14,6},{10,12,21},{40,12,14},{47,0,243},{8,44,78},{8,12,9},{6,42,74},{27,6,243},{6,42,74},{24,30,75},{24,30,75},{24,30,75},{24,12,75},{8,14,52},{8,12,5},{8,12,5},{8,26,5},{6,12,53},{22,26,6},{40,14,2},{40,14,2},{40,14,2},{40,42,1},{16,3,50},{24,42,4},{24,42,4},{38,26,1},{17,0,50},
+{38,26,1},{47,2,72},{40,14,5},{10,12,5},{8,12,5},{47,2,72},{17,2,72},{8,12,5},{0,42,74},{17,2,72},{0,42,74},{24,0,74},{24,0,74},{24,0,74},{24,0,74},{8,28,0},{8,28,0},{8,28,0},{8,10,1},{22,26,5},{22,26,5},{10,15,152},{10,30,85},{10,44,106},{40,44,95},{40,31,252},{24,14,90},{40,44,15},{24,12,122},{6,30,247},{38,28,89},{26,47,73},
+{26,30,6},{26,44,6},{26,28,21},{34,3,243},{8,30,76},{40,44,6},{6,28,80},{35,4,243},{6,28,80},{10,46,80},{10,46,80},{10,46,80},{10,28,81},{40,30,59},{40,28,10},{40,28,10},{24,42,18},{22,44,51},{8,12,9},{26,14,1},{26,14,1},{26,14,1},{26,12,5},{30,4,50},{40,28,1},{40,28,1},{8,12,5},{15,8,50},{8,12,5},{15,2,72},{10,30,5},{26,44,5},
+{24,44,1},{15,2,72},{33,4,72},{24,44,1},{0,28,80},{33,4,72},{0,28,80},{10,0,80},{10,0,80},{10,0,80},{10,0,80},{40,28,9},{40,28,9},{40,28,9},{24,42,9},{8,12,5},{8,12,5},{26,13,152},{26,47,89},{42,30,122},{26,14,89},{10,29,245},{10,46,74},{10,30,15},{10,44,106},{8,46,247},{24,44,81},{42,45,76},{42,47,9},{12,30,18},{42,14,17},{4,1,243},
+{10,46,73},{26,14,10},{8,44,80},{1,4,243},{8,44,80},{26,15,80},{26,15,80},{26,15,80},{26,14,80},{10,31,51},{10,30,6},{10,30,6},{10,28,6},{24,14,54},{24,28,6},{42,47,5},{42,47,5},{42,47,5},{42,44,5},{45,0,50},{26,14,1},{26,14,1},{40,28,1},{27,8,50},{40,28,1},{46,10,72},{12,46,5},{12,30,9},{26,14,9},{46,10,72},{23,8,72},{26,14,9},
+{0,44,80},{23,8,72},{0,44,80},{26,0,80},{26,0,80},{26,0,80},{26,0,80},{10,30,2},{10,30,2},{10,30,2},{10,12,5},{24,44,1},{24,44,1},{12,13,148},{12,31,79},{12,46,110},{12,46,83},{42,29,248},{42,47,85},{42,46,5},{26,30,126},{8,31,247},{40,30,83},{28,29,75},{28,47,5},{28,46,5},{28,46,26},{27,0,243},{26,31,80},{42,46,1},{8,30,75},{37,8,243},
+{8,30,75},{12,15,74},{12,15,74},{12,15,74},{12,30,74},{42,15,56},{42,46,5},{42,46,5},{26,14,21},{24,46,56},{10,14,2},{28,47,1},{28,47,1},{28,47,1},{28,14,5},{29,2,50},{42,46,1},{42,46,1},{10,14,1},{35,6,50},{10,14,1},{38,3,72},{28,47,4},{44,46,1},{26,46,1},{38,3,72},{35,8,72},{26,46,1},{0,30,74},{35,8,72},{0,30,74},{12,0,74},
+{12,0,74},{12,0,74},{12,0,74},{42,46,4},{42,46,4},{42,46,4},{42,44,5},{10,14,1},{10,14,1},{28,11,146},{28,45,84},{44,47,121},{28,47,79},{12,27,244},{12,15,79},{12,47,6},{12,46,106},{40,15,244},{26,46,81},{44,43,76},{44,15,6},{14,47,11},{44,47,14},{28,13,243},{42,15,76},{12,47,5},{10,46,80},{27,12,243},{10,46,80},{28,13,75},{28,13,75},{28,13,75},
+{28,47,78},{12,29,51},{12,47,5},{12,47,5},{12,30,6},{10,47,51},{26,30,6},{44,15,5},{44,15,5},{44,15,5},{44,46,5},{43,2,50},{12,47,4},{12,47,4},{42,30,1},{21,8,50},{42,30,1},{41,2,72},{44,15,2},{14,47,2},{12,47,1},{41,2,72},{17,8,72},{12,47,1},{0,46,80},{17,8,72},{0,46,80},{28,0,74},{28,0,74},{28,0,74},{28,0,74},{12,31,1},
+{12,31,1},{12,31,1},{12,14,5},{26,46,1},{26,46,1},{14,11,148},{14,29,79},{14,15,114},{14,31,90},{44,27,248},{28,45,84},{28,15,18},{28,47,115},{10,29,247},{42,47,91},{30,27,75},{30,45,5},{30,15,6},{30,31,21},{40,3,243},{28,45,75},{28,15,9},{10,31,80},{16,9,243},{10,31,80},{14,13,74},{14,13,74},{14,13,74},{14,31,74},{44,13,56},{44,31,6},{44,31,6},
+{44,46,21},{42,31,52},{42,47,10},{30,45,1},{30,45,1},{30,45,1},{30,47,2},{38,1,50},{44,31,2},{44,31,2},{42,47,9},{43,28,50},{42,47,9},{29,12,72},{30,45,4},{30,15,5},{12,15,5},{29,12,72},{26,13,72},{12,15,5},{0,31,80},{26,13,72},{0,31,80},{14,0,74},{14,0,74},{14,0,74},{14,0,74},{44,15,5},{44,15,5},{44,15,5},{44,46,5},{42,47,1},
+{42,47,1},{30,9,154},{46,43,91},{46,29,115},{30,45,85},{14,9,247},{14,13,77},{14,29,18},{14,15,114},{12,13,247},{28,15,79},{46,25,83},{46,43,10},{47,45,21},{46,45,17},{10,1,243},{14,13,73},{30,45,6},{12,15,74},{4,9,243},{12,15,74},{30,11,80},{30,11,80},{30,11,80},{30,45,81},{14,11,56},{14,29,9},{14,29,9},{14,31,6},{12,45,56},{44,31,5},{46,43,9},
+{46,43,9},{46,43,9},{46,15,10},{39,0,50},{30,45,2},{30,45,2},{44,31,1},{9,28,50},{44,31,1},{9,6,72},{46,43,1},{47,45,5},{14,45,5},{9,6,72},{32,7,72},{14,45,5},{0,15,74},{32,7,72},{0,15,74},{30,0,80},{30,0,80},{30,0,80},{30,0,80},{14,13,5},{14,13,5},{14,13,5},{14,31,5},{44,31,4},{44,31,4},{47,9,154},{47,27,81},{47,13,106},
+{47,13,97},{46,9,244},{46,43,79},{46,13,6},{46,45,121},{12,27,248},{44,29,84},{31,25,75},{31,27,6},{31,13,6},{31,29,30},{21,0,243},{46,43,78},{46,13,5},{12,29,75},{18,7,243},{12,29,75},{47,11,80},{47,11,80},{47,11,80},{47,29,80},{46,11,51},{46,13,5},{46,13,5},{46,15,11},{28,13,54},{14,45,6},{31,43,1},{31,43,1},{31,43,1},{31,29,5},{23,2,50},
+{46,13,4},{46,13,4},{14,45,5},{20,9,50},{14,45,5},{21,2,72},{47,27,1},{15,13,5},{30,13,1},{21,2,72},{24,9,72},{30,13,1},{0,29,74},{24,9,72},{0,29,74},{47,0,80},{47,0,80},{47,0,80},{47,0,80},{46,13,1},{46,13,1},{46,13,1},{46,15,2},{14,45,2},{14,45,2},{31,7,146},{31,41,83},{31,27,126},{31,43,79},{47,23,243},{47,11,78},{47,43,5},
+{47,13,110},{14,11,247},{30,13,79},{15,9,75},{15,11,2},{15,27,21},{15,43,14},{14,3,243},{47,11,78},{47,43,5},{14,13,74},{32,5,243},{14,13,74},{31,9,75},{31,9,75},{31,9,75},{31,43,75},{47,25,52},{47,43,1},{47,43,1},{47,29,5},{14,43,56},{46,29,5},{15,11,1},{15,11,1},{15,11,1},{15,13,1},{37,2,50},{47,43,1},{47,43,1},{46,29,1},{34,7,50},
+{46,29,1},{35,2,72},{15,11,1},{45,43,5},{47,43,4},{35,2,72},{34,5,72},{47,43,4},{0,13,74},{34,5,72},{0,13,74},{31,0,74},{31,0,74},{31,0,74},{31,0,74},{47,27,1},{47,27,1},{47,27,1},{47,45,1},{46,29,4},{46,29,4},{45,7,152},{45,25,81},{45,11,106},{45,11,97},{15,23,253},{31,25,90},{31,11,15},{31,43,122},{14,25,247},{46,27,89},{29,39,73},
+{29,25,6},{29,11,6},{29,27,26},{46,3,243},{47,25,76},{31,11,6},{14,27,80},{16,3,243},{14,27,80},{45,9,80},{45,9,80},{45,9,80},{45,27,81},{15,25,59},{15,27,10},{15,27,10},{31,13,18},{30,11,54},{46,43,9},{29,41,1},{29,41,1},{29,41,1},{29,43,2},{44,1,50},{15,27,1},{15,27,1},{46,43,5},{26,9,50},{46,43,5},{46,1,72},{45,25,1},{13,11,5},
+{31,11,2},{46,1,72},{26,7,72},{31,11,2},{0,27,80},{26,7,72},{0,27,80},{45,0,80},{45,0,80},{45,0,80},{45,0,80},{15,27,9},{15,27,9},{15,27,9},{31,13,9},{47,13,5},{47,13,5},{29,5,154},{29,39,89},{13,25,122},{29,41,89},{45,21,245},{45,9,74},{45,41,15},{45,11,106},{47,9,247},{31,11,85},{13,37,76},{13,9,9},{43,25,18},{13,41,17},{1,0,243},
+{45,9,73},{29,41,10},{47,11,80},{0,1,243},{47,11,80},{29,7,80},{29,7,80},{29,7,80},{29,41,80},{45,23,51},{45,41,6},{45,41,6},{45,27,6},{31,41,54},{31,27,6},{13,9,5},{13,9,5},{13,9,5},{13,11,5},{31,5,50},{29,41,1},{29,41,1},{15,27,1},{14,9,50},{15,27,1},{27,9,72},{13,9,5},{43,25,9},{29,41,9},{27,9,72},{14,7,72},{29,41,9},
+{0,11,80},{14,7,72},{0,11,80},{29,0,80},{29,0,80},{29,0,80},{29,0,80},{45,25,1},{45,25,1},{45,25,1},{45,27,5},{31,11,5},{31,11,5},{43,5,148},{43,23,79},{43,9,110},{43,9,83},{13,21,248},{13,39,85},{13,9,9},{29,25,126},{47,23,248},{15,25,83},{27,21,75},{27,23,6},{27,9,5},{27,9,26},{15,1,243},{29,23,74},{13,9,5},{31,25,75},{9,11,243},
+{31,25,75},{43,7,74},{43,7,74},{43,7,74},{43,25,74},{13,7,56},{13,9,9},{13,9,9},{13,11,21},{15,9,56},{15,41,6},{27,39,1},{27,39,1},{27,39,1},{27,25,5},{17,2,50},{43,25,4},{43,25,4},{15,41,2},{16,1,50},{15,41,2},{17,6,72},{27,23,5},{11,9,1},{29,9,0},{17,6,72},{20,1,72},{29,9,0},{0,25,74},{20,1,72},{0,25,74},{43,0,74},
+{43,0,74},{43,0,74},{43,0,74},{13,9,5},{13,9,5},{13,9,5},{13,11,5},{15,41,5},{15,41,5},{27,3,146},{27,37,83},{27,23,119},{27,39,79},{43,19,244},{43,7,79},{43,39,10},{43,9,106},{15,7,245},{29,9,85},{11,35,76},{11,7,9},{41,39,14},{11,39,17},{27,5,243},{13,7,78},{27,39,9},{45,9,80},{6,1,243},{45,9,80},{27,5,75},{27,5,75},{27,5,75},
+{27,39,75},{43,21,51},{43,39,6},{43,39,6},{43,25,6},{45,39,53},{29,25,6},{11,7,5},{11,7,5},{11,7,5},{11,9,5},{29,3,50},{27,39,5},{27,39,5},{13,25,1},{43,9,50},{13,25,1},{27,3,72},{41,7,4},{41,39,5},{27,39,5},{27,3,72},{39,9,72},{27,39,5},{0,9,80},{39,9,72},{0,9,80},{27,0,74},{27,0,74},{27,0,74},{27,0,74},{43,23,1},
+{43,23,1},{43,23,1},{43,25,5},{29,9,5},{29,9,5},{41,3,148},{41,21,79},{41,7,111},{41,7,91},{11,19,248},{11,37,85},{11,7,10},{27,39,122},{45,21,248},{13,23,83},{25,19,75},{25,21,6},{25,7,6},{25,23,30},{17,10,243},{27,21,81},{11,7,6},{29,23,75},{24,1,243},{29,23,75},{41,5,74},{41,5,74},{41,5,74},{41,23,74},{11,5,56},{11,7,9},{11,7,9},
+{11,9,21},{29,7,54},{13,39,5},{25,37,1},{25,37,1},{25,37,1},{25,39,2},{39,11,50},{41,23,4},{41,23,4},{13,39,4},{22,1,50},{13,39,4},{39,7,72},{25,21,5},{9,7,5},{27,7,2},{39,7,72},{26,1,72},{27,7,2},{0,23,74},{26,1,72},{0,23,74},{41,0,74},{41,0,74},{41,0,74},{41,0,74},{11,7,5},{11,7,5},{11,7,5},{11,9,5},{13,39,1},
+{13,39,1},{25,1,154},{9,35,91},{9,21,110},{25,37,89},{41,1,247},{41,5,77},{41,21,13},{41,7,109},{43,5,247},{27,7,85},{39,19,81},{9,35,10},{39,21,13},{9,37,22},{41,1,243},{41,5,73},{41,21,9},{43,7,80},{47,3,243},{43,7,80},{25,3,80},{25,3,80},{25,3,80},{25,37,80},{41,3,56},{41,21,9},{41,21,9},{41,23,9},{27,37,54},{11,23,9},{39,21,9},
+{39,21,9},{39,21,9},{9,7,10},{25,5,50},{25,37,1},{25,37,1},{11,23,5},{10,1,50},{11,23,5},{23,5,72},{9,35,1},{39,21,4},{11,21,4},{23,5,72},{14,1,72},{11,21,4},{0,7,80},{14,1,72},{0,7,80},{25,0,80},{25,0,80},{25,0,80},{25,0,80},{41,5,5},{41,5,5},{41,5,5},{41,23,5},{27,7,5},{27,7,5},{39,1,148},{39,3,79},{39,5,110},
+{39,5,83},{9,1,244},{9,35,79},{9,5,5},{9,21,122},{43,19,248},{11,21,77},{23,17,72},{23,19,2},{23,5,4},{23,5,25},{9,1,243},{25,19,74},{9,5,4},{27,21,73},{46,1,243},{27,21,73},{39,3,75},{39,3,75},{39,3,75},{39,21,75},{9,3,51},{9,5,5},{9,5,5},{9,7,18},{11,5,53},{41,37,2},{23,19,1},{23,19,1},{23,19,1},{23,21,1},{9,3,50},
+{9,5,4},{9,5,4},{41,37,1},{31,3,50},{41,37,1},{23,17,72},{23,19,2},{7,5,4},{25,5,0},{23,17,72},{45,17,72},{25,5,0},{0,21,72},{45,17,72},{0,21,72},{39,0,74},{39,0,74},{39,0,74},{39,0,74},{9,5,1},{9,5,1},{9,5,1},{9,7,2},{41,37,1},{41,37,1},{7,1,184},{23,17,93},{7,19,122},{23,35,89},{23,1,260},{39,3,74},{39,35,15},
+{39,5,106},{41,3,247},{25,5,85},{37,17,81},{7,33,5},{37,19,19},{7,35,17},{19,9,243},{39,3,73},{23,35,10},{41,5,80},{45,1,243},{41,5,80},{23,1,80},{23,1,80},{23,1,80},{23,35,80},{39,17,51},{39,35,6},{39,35,6},{39,21,6},{25,35,54},{9,21,9},{7,33,4},{7,33,4},{7,33,4},{7,5,5},{23,3,50},{23,35,1},{23,35,1},{9,21,5},{43,3,50},
+{9,21,5},{21,3,72},{7,33,1},{37,19,10},{23,35,9},{21,3,72},{43,1,72},{23,35,9},{0,5,80},{43,1,72},{0,5,80},{23,0,80},{23,0,80},{23,0,80},{23,0,80},{39,19,1},{39,19,1},{39,19,1},{39,21,2},{25,5,5},{25,5,5},{21,1,234},{37,1,79},{37,3,110},{37,3,90},{37,1,300},{7,33,82},{7,3,9},{23,19,139},{41,17,248},{9,19,83},{21,1,90},
+{21,17,2},{21,3,4},{21,3,25},{21,17,243},{23,17,74},{7,3,5},{25,19,75},{17,21,243},{25,19,75},{37,1,75},{37,1,75},{37,1,75},{37,19,74},{7,1,56},{7,19,6},{7,19,6},{7,5,21},{9,3,53},{9,35,6},{21,17,1},{21,17,1},{21,17,1},{21,19,4},{3,25,50},{7,19,2},{7,19,2},{9,35,2},{29,1,50},{9,35,2},{17,39,72},{21,17,2},{5,3,4},
+{23,3,0},{17,39,72},{39,17,72},{23,3,0},{0,19,74},{39,17,72},{0,19,74},{37,0,74},{37,0,74},{37,0,74},{37,0,74},{7,3,5},{7,3,5},{7,3,5},{7,5,5},{9,35,5},{9,35,5},{5,1,290},{5,1,103},{5,17,110},{21,33,106},{5,1,345},{37,1,78},{37,17,13},{37,3,119},{39,1,248},{23,3,91},{35,1,126},{35,1,6},{35,17,8},{5,33,29},{35,1,243},
+{37,1,74},{37,17,9},{9,33,81},{1,35,243},{9,33,81},{5,1,94},{5,1,94},{5,1,94},{21,33,90},{21,1,61},{37,17,9},{37,17,9},{37,19,9},{23,33,53},{7,19,8},{35,17,4},{35,17,4},{35,17,4},{35,3,4},{19,5,50},{21,33,0},{21,33,0},{7,19,4},{37,3,50},{7,19,4},{17,5,72},{35,1,2},{35,17,4},{7,17,4},{17,5,72},{5,17,72},{7,17,4},
+{0,33,80},{5,17,72},{0,33,80},{5,0,90},{5,0,90},{5,0,90},{5,0,90},{37,1,4},{37,1,4},{37,1,4},{37,19,5},{23,3,1},{23,3,1},{19,1,349},{35,1,185},{35,1,121},{35,1,81},{19,1,398},{5,1,102},{5,1,2},{21,17,122},{37,1,270},{7,17,66},{3,1,126},{19,1,45},{19,1,9},{19,1,25},{33,33,221},{35,1,82},{5,1,1},{23,17,61},{33,33,221},
+{23,17,61},{35,1,121},{35,1,121},{35,1,121},{35,17,73},{35,1,94},{5,1,2},{5,1,2},{5,33,17},{7,1,53},{37,33,1},{19,1,9},{19,1,9},{19,1,9},{19,17,1},{3,3,50},{5,1,1},{5,1,1},{37,33,0},{3,3,50},{37,33,0},{1,3,61},{33,1,25},{3,1,1},{5,1,1},{1,3,61},{3,1,61},{5,1,1},{0,17,61},{3,1,61},{0,17,61},{35,0,72},
+{35,0,72},{35,0,72},{35,0,72},{5,1,1},{5,1,1},{5,1,1},{5,3,1},{37,33,1},{37,33,1},{3,1,239},{3,1,164},{3,1,139},{19,1,89},{3,1,239},{19,1,62},{19,1,26},{35,17,34},{35,1,163},{21,1,10},{33,1,69},{33,1,29},{33,1,20},{33,1,4},{1,3,93},{3,1,38},{19,1,10},{21,1,10},{3,1,93},{21,1,10},{3,1,139},{3,1,139},{3,1,139},
+{19,1,89},{19,1,138},{19,1,26},{19,1,26},{35,17,9},{5,1,74},{5,17,8},{33,1,20},{33,1,20},{33,1,20},{33,1,4},{17,3,50},{19,1,10},{19,1,10},{5,17,4},{35,1,50},{5,17,4},{1,17,5},{17,1,1},{17,1,0},{33,1,0},{1,17,5},{17,1,5},{33,1,0},{0,1,9},{17,1,5},{0,1,9},{19,0,80},{19,0,80},{19,0,80},{19,0,80},{19,1,17},
+{19,1,17},{19,1,17},{35,17,5},{21,1,1},{21,1,1},{17,1,162},{33,1,130},{33,1,121},{33,1,81},{33,1,138},{33,1,58},{33,1,49},{3,1,1},{19,1,82},{19,1,10},{17,1,18},{17,1,10},{17,1,9},{17,1,1},{1,17,18},{17,1,6},{17,1,5},{3,1,0},{17,1,18},{3,1,0},{33,1,121},{33,1,121},{33,1,121},{33,1,81},{33,1,89},{33,1,49},{33,1,49},
+{3,1,1},{19,1,46},{19,1,10},{17,1,9},{17,1,9},{17,1,9},{17,1,1},{17,1,13},{17,1,5},{17,1,5},{3,1,0},{33,1,13},{3,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{33,0,72},{33,0,72},{33,0,72},{33,0,72},{3,1,26},{3,1,26},{3,1,26},{3,1,1},{19,1,10},
+{19,1,10},{0,38,200},{0,20,25},{0,34,5},{0,34,85},{0,20,442},{0,34,266},{0,18,125},{0,2,318},{0,18,482},{0,2,343},{0,38,200},{0,20,25},{0,34,5},{0,34,85},{16,4,441},{0,34,266},{0,18,125},{0,2,318},{4,16,441},{0,2,318},{0,4,1},{0,4,1},{0,4,1},{0,2,1},{0,2,41},{0,32,13},{0,32,13},{0,16,25},{0,16,46},{0,16,26},{0,4,1},
+{0,4,1},{0,4,1},{0,2,1},{0,2,41},{0,32,13},{0,32,13},{0,16,25},{2,0,41},{0,16,25},{18,2,200},{0,20,25},{0,34,5},{0,34,85},{18,2,200},{38,0,200},{0,34,85},{0,18,208},{38,0,200},{0,18,208},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,10,202},{0,22,1},{0,20,26},
+{0,4,41},{0,22,689},{0,20,352},{0,4,141},{0,18,468},{0,34,750},{0,18,504},{0,10,202},{0,22,1},{0,20,26},{0,4,41},{34,0,686},{0,20,352},{0,4,141},{0,18,468},{0,34,686},{0,18,468},{0,22,0},{0,22,0},{0,22,0},{0,34,1},{0,34,145},{0,18,45},{0,18,45},{0,32,85},{0,32,158},{0,32,94},{0,22,0},{0,22,0},{0,22,0},{0,34,1},{16,2,145},
+{0,18,45},{0,18,45},{0,32,85},{34,0,145},{0,32,85},{20,16,200},{0,22,1},{16,20,5},{0,4,41},{20,16,200},{16,20,200},{0,4,41},{0,34,200},{16,20,200},{0,34,200},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{16,12,229},{16,8,30},{16,6,74},{16,36,58},{0,10,723},{0,6,282},{0,36,69},
+{0,4,414},{0,20,868},{0,34,513},{16,12,204},{16,8,5},{32,6,29},{16,36,33},{4,2,723},{0,6,282},{0,36,69},{0,4,414},{10,0,723},{0,4,414},{16,8,30},{16,8,30},{16,8,30},{16,4,33},{0,6,164},{0,4,25},{0,4,25},{0,18,65},{0,18,206},{0,18,101},{16,8,5},{16,8,5},{16,8,5},{16,4,8},{2,2,162},{0,4,25},{0,4,25},{0,18,65},{2,2,162},
+{0,18,65},{18,8,200},{16,8,1},{2,36,5},{0,36,20},{18,8,200},{44,0,200},{0,36,20},{0,4,218},{44,0,200},{0,4,218},{16,0,29},{16,0,29},{16,0,29},{16,0,29},{0,18,1},{0,18,1},{0,18,1},{0,32,0},{0,32,9},{0,32,9},{16,30,327},{32,40,139},{32,22,202},{16,6,151},{0,28,724},{0,8,236},{0,22,14},{0,20,350},{0,6,956},{0,20,494},{2,12,203},
+{2,24,2},{2,22,27},{2,6,42},{36,2,723},{0,8,236},{0,22,14},{0,20,350},{20,4,723},{0,20,350},{16,42,125},{16,42,125},{16,42,125},{16,6,126},{0,24,164},{0,6,1},{0,6,1},{0,4,37},{0,4,280},{0,34,109},{2,24,1},{2,24,1},{2,24,1},{2,36,2},{18,4,162},{0,6,1},{0,6,1},{0,4,37},{36,2,162},{0,4,37},{8,0,200},{2,24,1},{18,22,5},
+{0,22,10},{8,0,200},{47,0,200},{0,22,10},{0,36,200},{47,0,200},{0,36,200},{16,0,125},{16,0,125},{16,0,125},{16,0,125},{0,6,0},{0,6,0},{0,6,0},{0,18,1},{0,18,37},{0,18,37},{2,46,410},{2,26,218},{2,8,317},{2,38,226},{16,30,725},{16,40,217},{16,38,15},{0,6,312},{0,38,988},{0,6,417},{34,28,209},{18,26,13},{34,8,27},{18,38,34},{8,0,723},
+{0,40,203},{16,38,14},{0,6,296},{47,0,723},{0,6,296},{2,12,208},{2,12,208},{2,12,208},{2,22,209},{16,42,163},{16,38,6},{16,38,6},{16,20,26},{0,6,285},{0,20,81},{18,26,9},{18,26,9},{18,26,9},{34,6,10},{2,24,162},{32,22,5},{32,22,5},{0,20,17},{28,0,162},{0,20,17},{24,2,200},{18,26,4},{4,8,9},{0,8,10},{24,2,200},{13,0,200},{0,8,10},
+{0,22,208},{13,0,200},{0,22,208},{2,0,208},{2,0,208},{2,0,208},{2,0,208},{16,8,1},{16,8,1},{16,8,1},{16,4,2},{0,4,45},{0,4,45},{18,15,402},{18,12,222},{34,24,331},{18,24,218},{32,47,725},{32,26,212},{32,24,5},{32,38,324},{0,24,884},{0,38,300},{4,30,200},{4,26,5},{4,24,36},{4,8,41},{40,0,723},{16,26,203},{32,24,4},{0,38,251},{46,2,723},
+{0,38,251},{18,14,203},{18,14,203},{18,14,203},{18,8,202},{32,28,163},{32,24,5},{32,24,5},{32,6,21},{0,38,219},{0,6,13},{4,26,1},{4,26,1},{4,26,1},{4,38,0},{22,2,162},{2,8,1},{2,8,1},{0,6,4},{42,2,162},{0,6,4},{16,11,200},{34,42,4},{20,24,4},{32,24,0},{16,11,200},{25,0,200},{32,24,0},{0,38,202},{25,0,200},{0,38,202},{18,0,202},
+{18,0,202},{18,0,202},{18,0,202},{32,40,1},{32,40,1},{32,40,1},{32,36,1},{0,6,9},{0,6,9},{4,15,408},{4,28,221},{4,10,317},{4,40,221},{18,31,725},{18,42,217},{18,40,6},{2,8,315},{0,26,788},{0,8,228},{20,47,204},{20,12,6},{36,10,27},{20,40,33},{10,2,723},{32,12,203},{18,40,5},{0,8,227},{45,2,723},{0,8,227},{4,14,208},{4,14,208},{4,14,208},
+{4,24,209},{18,44,163},{18,40,2},{18,40,2},{18,22,26},{0,24,179},{0,38,3},{20,12,5},{20,12,5},{20,12,5},{20,24,5},{8,2,162},{18,40,1},{18,40,1},{0,38,2},{30,2,162},{0,38,2},{26,4,200},{20,12,2},{6,40,5},{18,40,4},{26,4,200},{7,0,200},{18,40,4},{0,24,208},{7,0,200},{0,24,208},{4,0,208},{4,0,208},{4,0,208},{4,0,208},{18,10,1},
+{18,10,1},{18,10,1},{18,6,2},{0,38,2},{0,38,2},{20,13,402},{20,14,220},{36,26,331},{20,26,216},{34,45,725},{34,12,213},{34,26,5},{34,24,337},{0,12,740},{16,40,216},{6,31,202},{6,28,1},{6,26,36},{6,10,41},{42,2,723},{18,28,200},{34,26,4},{0,40,209},{44,6,723},{0,40,209},{20,46,200},{20,46,200},{20,46,200},{20,10,201},{34,30,163},{34,26,5},{34,26,5},
+{34,8,18},{0,26,164},{32,8,2},{6,28,0},{6,28,0},{6,28,0},{6,40,1},{24,4,162},{4,10,1},{4,10,1},{2,8,2},{11,0,162},{2,8,2},{14,0,200},{6,28,1},{22,26,4},{34,26,0},{14,0,200},{8,10,200},{34,26,0},{0,40,200},{8,10,200},{0,40,200},{20,0,200},{20,0,200},{20,0,200},{20,0,200},{34,42,1},{34,42,1},{34,42,1},{34,38,1},{32,8,1},
+{32,8,1},{6,13,404},{6,30,219},{6,12,327},{6,42,215},{20,29,725},{20,44,217},{20,12,9},{4,10,321},{0,14,723},{32,26,222},{38,15,206},{38,14,8},{38,12,24},{22,42,34},{14,0,723},{4,14,203},{20,12,8},{0,26,203},{8,10,723},{0,26,203},{6,47,202},{6,47,202},{6,47,202},{6,26,206},{20,46,163},{20,12,5},{20,12,5},{20,24,30},{32,42,164},{2,40,6},{38,44,4},
+{38,44,4},{38,44,4},{38,10,5},{38,10,162},{20,12,4},{20,12,4},{2,40,2},{23,0,162},{2,40,2},{30,2,200},{38,14,4},{8,12,4},{4,12,4},{30,2,200},{35,2,200},{4,12,4},{0,26,202},{35,2,200},{0,26,202},{6,0,202},{6,0,202},{6,0,202},{6,0,202},{20,12,1},{20,12,1},{20,12,1},{20,8,2},{2,10,4},{2,10,4},{22,11,402},{22,47,227},{38,44,321},
+{22,28,218},{36,27,727},{36,30,215},{36,28,13},{36,42,327},{32,30,725},{18,42,212},{8,29,200},{8,30,4},{24,28,29},{8,12,45},{0,1,723},{20,30,203},{6,28,5},{32,42,202},{1,0,723},{32,42,202},{22,15,202},{22,15,202},{22,15,202},{22,12,202},{36,15,168},{36,28,13},{36,28,13},{36,10,17},{2,28,163},{34,10,9},{8,30,0},{8,30,0},{8,30,0},{8,42,0},{28,2,162},
+{6,12,5},{6,12,5},{4,10,4},{42,8,162},{4,10,4},{47,2,200},{8,30,4},{24,28,4},{36,28,0},{47,2,200},{17,2,200},{36,28,0},{0,42,202},{17,2,200},{0,42,202},{22,0,202},{22,0,202},{22,0,202},{22,0,202},{36,14,5},{36,14,5},{36,14,5},{36,40,4},{34,26,2},{34,26,2},{8,11,408},{8,31,221},{8,14,312},{8,44,226},{22,27,724},{22,46,217},{22,44,12},
+{6,12,321},{2,47,728},{34,28,234},{40,29,209},{24,47,10},{40,14,22},{24,44,34},{18,3,723},{36,47,203},{22,44,11},{32,28,208},{45,8,723},{32,28,208},{8,15,209},{8,15,209},{8,15,209},{8,28,209},{22,15,165},{22,44,3},{22,44,3},{22,26,30},{34,44,164},{4,42,6},{40,30,10},{40,30,10},{40,30,10},{40,12,10},{16,3,162},{22,44,2},{22,44,2},{4,42,2},{17,0,162},
+{4,42,2},{15,2,200},{24,47,1},{10,14,4},{6,14,5},{15,2,200},{33,4,200},{6,14,5},{0,28,208},{33,4,200},{0,28,208},{8,0,208},{8,0,208},{8,0,208},{8,0,208},{22,14,1},{22,14,1},{22,14,1},{22,10,2},{4,12,4},{4,12,4},{24,9,402},{24,45,227},{40,46,324},{24,30,218},{38,25,727},{38,47,213},{38,30,13},{38,28,325},{34,31,727},{20,44,218},{10,27,202},
+{10,31,1},{26,30,29},{10,14,45},{4,3,723},{22,31,200},{8,30,8},{34,44,202},{3,4,723},{34,44,202},{24,13,202},{24,13,202},{24,13,202},{24,14,202},{38,13,168},{38,30,13},{38,30,13},{38,12,17},{4,30,163},{36,12,9},{10,31,0},{10,31,0},{10,31,0},{10,44,0},{30,4,162},{8,14,4},{8,14,4},{6,12,4},{15,8,162},{6,12,4},{43,0,200},{10,31,1},{26,30,4},
+{38,30,0},{43,0,200},{19,6,200},{38,30,0},{0,44,202},{19,6,200},{0,44,202},{24,0,202},{24,0,202},{24,0,202},{24,0,202},{38,46,5},{38,46,5},{38,46,5},{38,42,4},{20,28,4},{20,28,4},{10,9,404},{10,29,219},{26,47,330},{10,46,222},{24,9,723},{24,15,209},{24,47,18},{8,14,324},{4,45,723},{36,30,222},{42,11,206},{42,15,8},{42,47,22},{26,46,41},{43,0,723},
+{8,15,202},{24,47,18},{4,30,202},{19,6,723},{4,30,202},{10,43,202},{10,43,202},{10,43,202},{10,30,206},{24,13,162},{24,46,8},{24,46,8},{24,44,25},{20,47,168},{22,44,4},{42,15,4},{42,15,4},{42,15,4},{42,14,8},{45,0,162},{40,46,4},{40,46,4},{22,44,0},{27,8,162},{22,44,0},{38,3,200},{42,15,4},{12,47,9},{8,47,10},{38,3,200},{35,8,200},{8,47,10},
+{0,30,202},{35,8,200},{0,30,202},{10,0,202},{10,0,202},{10,0,202},{10,0,202},{24,47,0},{24,47,0},{24,47,0},{24,12,4},{22,44,4},{22,44,4},{26,7,408},{42,13,236},{42,15,332},{26,31,224},{40,23,727},{40,45,213},{40,31,8},{40,46,312},{36,29,725},{22,46,218},{12,25,201},{12,13,6},{28,31,29},{12,47,42},{11,0,723},{24,29,203},{40,31,4},{6,46,209},{5,8,723},
+{6,46,209},{26,41,208},{26,41,208},{26,41,208},{26,47,208},{40,11,168},{40,31,8},{40,31,8},{40,14,22},{6,31,163},{38,30,10},{12,13,2},{12,13,2},{12,13,2},{12,46,2},{29,2,162},{10,47,2},{10,47,2},{22,30,9},{35,6,162},{22,30,9},{41,2,200},{42,13,4},{44,31,1},{40,31,0},{41,2,200},{17,8,200},{40,31,0},{0,46,208},{17,8,200},{0,46,208},{26,0,208},
+{26,0,208},{26,0,208},{26,0,208},{40,15,4},{40,15,4},{40,15,4},{40,44,5},{38,30,1},{38,30,1},{12,7,404},{12,27,212},{12,45,332},{12,15,215},{26,23,724},{26,13,216},{26,15,8},{26,47,318},{6,43,723},{8,47,227},{44,9,206},{44,13,8},{44,45,22},{28,15,38},{24,3,723},{10,13,202},{26,15,8},{6,31,209},{3,24,723},{6,31,209},{12,41,202},{12,41,202},{12,41,202},
+{12,31,203},{26,11,162},{26,15,4},{26,15,4},{26,46,25},{38,15,162},{24,46,4},{44,13,4},{44,13,4},{44,13,4},{44,47,5},{43,2,162},{26,15,4},{26,15,4},{24,46,0},{21,8,162},{24,46,0},{29,12,200},{28,43,2},{14,15,8},{26,15,4},{29,12,200},{26,13,200},{26,15,4},{0,31,208},{26,13,200},{0,31,208},{12,0,202},{12,0,202},{12,0,202},{12,0,202},{26,45,0},
+{26,45,0},{26,45,0},{26,14,4},{24,46,4},{24,46,4},{28,5,402},{28,41,222},{44,13,332},{28,29,218},{42,21,727},{42,43,213},{42,29,8},{42,31,340},{38,27,724},{24,15,217},{14,23,201},{14,27,3},{30,29,29},{14,45,42},{9,2,723},{26,27,201},{42,29,4},{22,15,201},{2,9,723},{22,15,201},{28,9,202},{28,9,202},{28,9,202},{28,45,203},{42,9,168},{42,29,8},{42,29,8},
+{42,47,21},{8,29,163},{40,47,2},{14,11,2},{14,11,2},{14,11,2},{14,15,2},{38,1,162},{12,45,2},{12,45,2},{10,47,2},{43,28,162},{10,47,2},{37,0,200},{14,27,2},{46,29,1},{42,29,0},{37,0,200},{36,9,200},{42,29,0},{0,15,200},{36,9,200},{0,15,200},{28,0,202},{28,0,202},{28,0,202},{28,0,202},{42,13,4},{42,13,4},{42,13,4},{42,46,5},{40,47,1},
+{40,47,1},{14,35,400},{14,25,217},{30,43,340},{14,13,213},{28,5,723},{28,11,209},{28,43,8},{12,45,332},{8,41,724},{40,29,222},{46,7,201},{46,41,2},{46,43,21},{30,13,41},{37,0,723},{12,41,203},{28,43,8},{8,29,202},{36,9,723},{8,29,202},{14,23,201},{14,23,201},{14,23,201},{14,13,204},{28,9,162},{28,43,4},{28,43,4},{28,31,29},{40,13,164},{26,15,3},{46,11,2},
+{46,11,2},{46,11,2},{46,29,5},{39,0,162},{44,13,2},{44,13,2},{10,15,2},{9,28,162},{10,15,2},{21,2,200},{46,41,1},{47,43,5},{12,43,4},{21,2,200},{24,9,200},{12,43,4},{0,29,202},{24,9,200},{0,29,202},{14,0,200},{14,0,200},{14,0,200},{14,0,200},{28,43,0},{28,43,0},{28,43,0},{28,47,1},{26,15,2},{26,15,2},{30,3,410},{46,9,227},{46,27,318},
+{30,27,224},{14,5,728},{44,25,215},{14,27,8},{44,13,332},{40,25,725},{26,13,212},{47,21,200},{47,25,4},{47,27,25},{47,43,52},{5,0,723},{28,25,203},{14,27,4},{40,13,202},{0,5,723},{40,13,202},{30,7,209},{30,7,209},{30,7,209},{30,43,208},{14,39,166},{14,27,8},{14,27,8},{44,45,22},{10,27,163},{12,45,8},{47,25,0},{47,25,0},{47,25,0},{47,13,0},{23,2,162},
+{14,27,4},{14,27,4},{12,45,4},{20,9,162},{12,45,4},{35,2,200},{47,25,4},{15,27,4},{44,27,0},{35,2,200},{34,5,200},{44,27,0},{0,13,202},{34,5,200},{0,13,202},{30,0,208},{30,0,208},{30,0,208},{30,0,208},{14,27,4},{14,27,4},{14,27,4},{14,15,8},{42,29,2},{42,29,2},{47,3,408},{47,23,218},{47,41,312},{47,11,226},{30,3,723},{30,9,209},{30,41,8},
+{14,43,332},{10,39,723},{12,43,236},{15,21,209},{31,39,10},{15,41,22},{31,11,43},{30,3,723},{44,39,204},{30,41,8},{40,27,208},{42,9,723},{40,27,208},{47,7,209},{47,7,209},{47,7,209},{47,27,208},{30,7,162},{30,41,4},{30,41,4},{30,29,29},{42,11,164},{12,13,6},{31,23,9},{31,23,9},{31,23,9},{31,27,10},{37,2,162},{46,11,2},{46,11,2},{12,13,2},{34,7,162},
+{12,13,2},{46,1,200},{31,39,1},{45,41,5},{14,41,4},{46,1,200},{26,7,200},{14,41,4},{0,27,208},{26,7,200},{0,27,208},{47,0,208},{47,0,208},{47,0,208},{47,0,208},{30,41,0},{30,41,0},{30,41,0},{30,45,1},{12,43,4},{12,43,4},{31,1,402},{31,37,222},{15,9,324},{31,25,218},{46,17,733},{46,23,215},{46,25,18},{46,27,330},{42,23,725},{28,11,219},{45,19,202},
+{45,23,4},{45,25,25},{45,41,52},{3,2,723},{30,23,203},{47,25,8},{42,11,202},{2,3,723},{42,11,202},{31,5,202},{31,5,202},{31,5,202},{31,41,202},{46,5,173},{46,25,18},{46,25,18},{46,43,22},{12,25,163},{14,43,8},{45,23,0},{45,23,0},{45,23,0},{45,11,0},{44,1,162},{47,41,4},{47,41,4},{14,43,4},{26,9,162},{14,43,4},{31,1,200},{45,23,4},{13,25,4},
+{46,25,0},{31,1,200},{32,1,200},{46,25,0},{0,11,202},{32,1,200},{0,11,202},{31,0,202},{31,0,202},{31,0,202},{31,0,202},{46,9,10},{46,9,10},{46,9,10},{46,13,9},{28,27,4},{28,27,4},{45,1,404},{45,21,218},{29,39,325},{45,9,222},{31,1,723},{31,7,209},{31,39,13},{47,41,324},{12,37,725},{44,25,227},{13,3,206},{13,37,9},{13,39,17},{29,9,48},{31,1,723},
+{47,7,208},{31,39,13},{12,25,202},{32,1,723},{12,25,202},{45,35,202},{45,35,202},{45,35,202},{45,25,203},{31,5,162},{31,9,8},{31,9,8},{31,27,29},{14,9,166},{30,11,1},{13,7,4},{13,7,4},{13,7,4},{13,25,8},{31,5,162},{15,9,4},{15,9,4},{30,11,0},{14,9,162},{30,11,0},{17,6,200},{29,21,4},{43,39,4},{46,39,5},{17,6,200},{20,1,200},{46,39,5},
+{0,25,202},{20,1,200},{0,25,202},{45,0,202},{45,0,202},{45,0,202},{45,0,202},{31,39,0},{31,39,0},{31,39,0},{31,27,4},{30,11,1},{30,11,1},{13,1,440},{29,35,234},{13,7,321},{29,23,218},{45,1,732},{15,21,215},{45,23,12},{15,9,312},{44,21,731},{30,9,221},{43,17,201},{43,5,6},{27,23,30},{43,39,46},{45,1,723},{31,21,203},{45,23,3},{14,9,209},{27,9,723},
+{14,9,209},{29,33,208},{29,33,208},{29,33,208},{29,39,208},{15,3,168},{45,23,11},{45,23,11},{15,41,22},{14,23,168},{46,25,10},{43,5,2},{43,5,2},{43,5,2},{43,9,2},{17,2,162},{45,23,2},{45,23,2},{30,25,10},{16,1,162},{30,25,10},{27,3,200},{13,5,4},{11,23,2},{15,23,1},{27,3,200},{39,9,200},{15,23,1},{0,9,208},{39,9,200},{0,9,208},{29,0,208},
+{29,0,208},{29,0,208},{29,0,208},{15,7,5},{15,7,5},{15,7,5},{15,11,4},{46,25,1},{46,25,1},{43,1,500},{43,19,212},{43,37,327},{43,7,220},{13,1,760},{29,5,209},{29,37,13},{45,39,321},{14,35,724},{46,23,227},{11,1,206},{11,35,9},{11,37,17},{27,7,43},{33,10,723},{15,35,204},{29,37,13},{14,23,202},{38,1,723},{14,23,202},{43,33,202},{43,33,202},{43,33,202},
+{43,23,203},{29,3,162},{29,7,5},{29,7,5},{29,25,29},{46,7,165},{31,9,4},{11,5,4},{11,5,4},{11,5,4},{11,39,5},{29,3,162},{13,7,5},{13,7,5},{31,9,0},{43,9,162},{31,9,0},{39,7,200},{27,35,2},{41,37,4},{15,37,5},{39,7,200},{26,1,200},{15,37,5},{0,23,202},{26,1,200},{0,23,202},{43,0,202},{43,0,202},{43,0,202},{43,0,202},{29,37,0},
+{29,37,0},{29,37,0},{29,25,4},{31,9,4},{31,9,4},{11,1,530},{27,33,222},{11,5,321},{27,21,219},{27,1,812},{13,19,215},{13,21,9},{13,7,327},{46,19,724},{31,7,219},{25,1,225},{41,3,6},{25,21,30},{41,37,46},{41,3,723},{29,19,203},{13,21,5},{46,7,202},{21,9,723},{46,7,202},{27,1,203},{27,1,203},{27,1,203},{27,37,202},{13,1,168},{13,21,8},{13,21,8},
+{13,39,24},{47,21,163},{15,39,8},{41,3,2},{41,3,2},{41,3,2},{41,7,1},{39,11,162},{13,21,4},{13,21,4},{45,39,4},{22,1,162},{45,39,4},{25,1,200},{11,3,4},{9,21,2},{13,21,1},{25,1,200},{33,9,200},{13,21,1},{0,7,202},{33,9,200},{0,7,202},{27,0,202},{27,0,202},{27,0,202},{27,0,202},{13,5,4},{13,5,4},{13,5,4},{13,9,4},{15,39,4},
+{15,39,4},{25,1,634},{41,17,216},{25,35,337},{41,5,220},{41,1,863},{27,3,211},{27,35,5},{27,37,331},{47,33,725},{15,21,220},{9,1,251},{9,33,2},{9,35,18},{25,5,48},{25,1,723},{43,33,203},{27,35,5},{47,21,200},{33,9,723},{47,21,200},{41,1,209},{41,1,209},{41,1,209},{41,5,204},{27,1,164},{27,35,4},{27,35,4},{27,7,36},{15,5,164},{29,7,1},{9,3,2},
+{9,3,2},{9,3,2},{9,21,2},{25,5,162},{11,5,1},{11,5,1},{29,7,0},{10,1,162},{29,7,0},{23,17,200},{9,33,1},{39,35,1},{43,35,1},{23,17,200},{45,17,200},{43,35,1},{0,21,200},{45,17,200},{0,21,200},{41,0,200},{41,0,200},{41,0,200},{41,0,200},{27,35,0},{27,35,0},{27,35,0},{27,23,4},{29,7,1},{29,7,1},{39,1,724},{9,1,228},{9,3,315},
+{25,19,218},{9,1,932},{11,17,213},{41,19,6},{11,5,317},{15,17,725},{29,5,221},{23,1,288},{39,1,3},{23,19,26},{39,19,51},{39,1,723},{27,17,203},{41,19,2},{15,5,208},{31,1,723},{15,5,208},{9,1,227},{9,1,227},{9,1,227},{25,35,209},{41,1,174},{41,19,5},{41,19,5},{11,37,27},{45,19,163},{13,21,6},{39,1,2},{39,1,2},{39,1,2},{39,5,2},{9,3,162},
+{41,19,1},{41,19,1},{13,21,5},{31,3,162},{13,21,5},{21,3,200},{39,1,2},{7,19,2},{11,19,1},{21,3,200},{43,1,200},{11,19,1},{0,5,208},{43,1,200},{0,5,208},{25,0,208},{25,0,208},{25,0,208},{25,0,208},{41,19,4},{41,19,4},{41,19,4},{41,7,5},{13,21,2},{13,21,2},{23,1,864},{39,1,300},{39,33,324},{39,3,222},{39,1,1020},{25,1,211},{25,33,5},
+{25,35,331},{29,1,732},{13,19,222},{37,1,347},{7,1,13},{7,33,21},{23,3,48},{1,13,723},{25,1,211},{25,33,5},{15,19,203},{13,1,723},{15,19,203},{39,1,251},{39,1,251},{39,1,251},{39,19,203},{9,1,195},{25,33,4},{25,33,4},{25,5,36},{13,3,164},{27,5,5},{7,1,4},{7,1,4},{7,1,4},{7,35,8},{23,3,162},{9,3,1},{9,3,1},{27,5,1},{43,3,162},
+{27,5,1},{17,39,200},{7,1,9},{37,33,1},{41,33,1},{17,39,200},{39,17,200},{41,33,1},{0,19,202},{39,17,200},{0,19,202},{39,0,202},{39,0,202},{39,0,202},{39,0,202},{25,33,0},{25,33,0},{25,33,0},{25,21,4},{43,35,4},{43,35,4},{37,1,1012},{7,1,417},{7,1,312},{23,17,218},{7,1,1144},{39,1,268},{39,17,15},{9,3,317},{27,1,804},{27,3,218},{5,1,398},
+{21,1,81},{21,17,26},{37,17,51},{35,3,723},{23,1,254},{39,17,6},{13,3,208},{25,1,723},{13,3,208},{7,1,296},{7,1,296},{7,1,296},{23,33,209},{39,1,243},{39,17,14},{39,17,14},{9,35,27},{43,17,163},{27,19,13},{21,1,17},{21,1,17},{21,1,17},{37,3,1},{3,25,162},{39,17,5},{39,17,5},{27,19,9},{29,1,162},{27,19,9},{19,1,200},{5,1,45},{5,17,2},
+{9,17,1},{19,1,200},{37,1,200},{9,17,1},{0,3,208},{37,1,200},{0,3,208},{23,0,208},{23,0,208},{23,0,208},{23,0,208},{9,1,10},{9,1,10},{9,1,10},{9,5,9},{27,19,4},{27,19,4},{5,1,919},{21,1,494},{21,1,350},{37,1,201},{21,1,1014},{7,1,251},{23,1,14},{23,33,202},{9,1,721},{41,33,139},{19,1,341},{35,1,109},{5,1,37},{5,1,26},{3,3,546},
+{37,1,213},{7,1,1},{43,17,125},{3,3,546},{43,17,125},{21,1,350},{21,1,350},{21,1,350},{37,1,201},{37,1,312},{23,1,14},{23,1,14},{23,3,27},{41,1,168},{25,3,2},{5,1,37},{5,1,37},{5,1,37},{5,17,5},{19,5,162},{7,1,1},{7,1,1},{25,3,1},{37,3,162},{25,3,1},{33,1,113},{19,1,37},{19,1,1},{7,1,0},{33,1,113},{35,1,113},{7,1,0},
+{0,17,125},{35,1,113},{0,17,125},{37,0,200},{37,0,200},{37,0,200},{37,0,200},{23,1,10},{23,1,10},{23,1,10},{23,19,5},{25,3,1},{25,3,1},{35,1,773},{35,1,513},{5,1,414},{5,1,227},{35,1,818},{21,1,218},{37,1,69},{7,17,74},{23,1,534},{9,17,30},{3,1,190},{19,1,101},{19,1,65},{35,1,5},{33,33,333},{35,1,114},{5,1,25},{9,17,30},{33,33,333},
+{9,17,30},{5,1,414},{5,1,414},{5,1,414},{5,1,227},{21,1,373},{37,1,69},{37,1,69},{7,33,29},{9,1,216},{9,17,5},{19,1,65},{19,1,65},{19,1,65},{35,1,5},{3,3,162},{5,1,25},{5,1,25},{9,17,5},{3,3,162},{9,17,5},{17,1,25},{33,1,9},{33,1,0},{19,1,1},{17,1,25},{33,1,25},{19,1,1},{0,17,29},{33,1,25},{0,17,29},{5,0,218},
+{5,0,218},{5,0,218},{5,0,218},{37,1,20},{37,1,20},{37,1,20},{37,3,5},{9,17,1},{9,17,1},{3,1,642},{19,1,504},{19,1,468},{35,1,264},{19,1,657},{35,1,229},{5,1,141},{21,1,26},{21,1,457},{23,1,1},{33,1,134},{33,1,94},{33,1,85},{3,1,26},{1,19,193},{19,1,81},{19,1,45},{23,1,0},{19,1,193},{23,1,0},{19,1,468},{19,1,468},{19,1,468},
+{35,1,264},{35,1,425},{5,1,141},{5,1,141},{21,1,26},{7,1,254},{23,1,1},{33,1,85},{33,1,85},{33,1,85},{3,1,26},{17,3,145},{19,1,45},{19,1,45},{23,1,0},{35,1,145},{23,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{35,0,200},{35,0,200},{35,0,200},{35,0,200},{5,1,41},
+{5,1,41},{5,1,41},{21,17,5},{23,1,1},{23,1,1},{3,1,418},{3,1,343},{3,1,318},{3,1,243},{3,1,370},{19,1,161},{19,1,125},{35,1,5},{35,1,250},{21,1,25},{17,1,34},{17,1,26},{17,1,25},{33,1,13},{17,1,54},{33,1,22},{33,1,13},{5,1,1},{33,1,54},{5,1,1},{3,1,318},{3,1,318},{3,1,318},{3,1,243},{3,1,270},{19,1,125},{19,1,125},
+{35,1,5},{5,1,165},{21,1,25},{17,1,25},{17,1,25},{17,1,25},{33,1,13},{1,3,41},{33,1,13},{33,1,13},{5,1,1},{3,1,41},{5,1,1},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{19,0,208},{19,0,208},{19,0,208},{19,0,208},{35,1,85},{35,1,85},{35,1,85},{35,1,5},{21,1,25},
+{21,1,25},{0,26,421},{0,38,45},{0,36,2},{0,20,160},{0,38,926},{0,20,577},{0,4,264},{0,18,701},{0,34,989},{0,18,737},{0,26,421},{0,38,45},{0,36,2},{0,20,160},{34,0,925},{0,20,577},{0,4,264},{0,18,701},{0,34,925},{0,18,701},{0,20,1},{0,20,1},{0,20,1},{0,18,1},{0,18,85},{0,2,34},{0,2,34},{0,32,53},{0,32,94},{0,32,62},{0,20,1},
+{0,20,1},{0,20,1},{0,18,1},{32,0,85},{0,2,34},{0,2,34},{0,32,53},{18,0,85},{0,32,53},{34,4,421},{0,38,45},{0,36,2},{0,20,160},{34,4,421},{26,0,421},{0,20,160},{0,34,421},{26,0,421},{0,34,421},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,44,421},{0,24,13},{0,22,32},
+{0,36,113},{0,24,1262},{0,6,670},{0,20,304},{0,4,886},{0,4,1382},{0,34,953},{0,44,421},{0,24,13},{0,22,32},{0,36,113},{18,4,1261},{0,6,670},{0,20,304},{0,4,886},{36,2,1261},{0,4,886},{0,8,0},{0,8,0},{0,8,0},{0,4,0},{0,4,221},{0,34,73},{0,34,73},{0,2,125},{0,2,246},{0,2,150},{0,8,0},{0,8,0},{0,8,0},{0,4,0},{2,0,221},
+{0,34,73},{0,34,73},{0,2,125},{4,0,221},{0,2,125},{22,0,421},{0,24,13},{16,6,8},{0,36,113},{22,0,421},{44,0,421},{0,36,113},{0,20,433},{44,0,421},{0,20,433},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,31,430},{0,26,10},{16,38,79},{0,22,74},{0,42,1514},{0,38,717},{0,6,253},
+{0,20,965},{0,36,1713},{0,4,1109},{16,46,425},{16,10,9},{16,38,54},{0,22,74},{20,2,1514},{0,38,717},{0,6,253},{0,20,965},{42,0,1514},{0,20,965},{0,26,10},{0,26,10},{0,26,10},{0,36,9},{0,6,340},{0,20,97},{0,20,97},{0,18,185},{0,18,382},{0,18,221},{16,10,5},{16,10,5},{16,10,5},{16,36,5},{2,2,338},{0,20,97},{0,20,97},{0,18,185},{2,2,338},
+{0,18,185},{8,0,421},{0,26,1},{2,38,2},{0,22,65},{8,0,421},{31,0,421},{0,22,65},{0,36,421},{31,0,421},{0,36,421},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,32,0},{0,32,0},{0,32,0},{0,16,0},{0,16,1},{0,16,1},{16,45,482},{16,12,66},{32,24,186},{16,8,126},{0,14,1517},{0,24,605},{0,38,117},{0,6,886},{0,22,1815},{0,36,1085},{32,31,421},
+{32,42,1},{2,8,51},{32,38,85},{22,16,1514},{0,24,605},{0,38,117},{0,6,886},{44,16,1514},{0,6,886},{16,28,61},{16,28,61},{16,28,61},{16,22,62},{0,24,340},{0,22,37},{0,22,37},{0,4,125},{0,4,456},{0,34,213},{32,42,0},{32,42,0},{32,42,0},{32,6,1},{18,4,338},{0,22,37},{0,22,37},{0,4,125},{36,2,338},{0,4,125},{40,0,421},{32,42,1},{18,8,5},
+{0,8,37},{40,0,421},{13,0,421},{0,8,37},{0,22,433},{13,0,421},{0,22,433},{16,0,61},{16,0,61},{16,0,61},{16,0,61},{0,4,1},{0,4,1},{0,4,1},{0,2,1},{0,32,25},{0,32,25},{32,43,623},{32,44,219},{32,10,382},{32,24,243},{0,15,1517},{0,26,497},{0,24,35},{0,22,761},{0,8,2003},{0,22,1050},{18,15,422},{18,28,6},{18,40,51},{2,24,77},{24,0,1514},
+{0,26,497},{0,24,35},{0,22,761},{32,8,1514},{0,22,761},{32,30,202},{32,30,202},{32,30,202},{32,8,203},{0,28,338},{0,8,10},{0,8,10},{0,20,89},{0,6,565},{0,20,233},{18,12,1},{18,12,1},{18,12,1},{18,38,1},{2,24,338},{0,8,10},{0,8,10},{0,20,89},{28,0,338},{0,20,89},{40,4,421},{2,28,5},{4,40,1},{0,24,26},{40,4,421},{25,0,421},{0,24,26},
+{0,38,425},{25,0,421},{0,38,425},{32,0,202},{32,0,202},{32,0,202},{32,0,202},{0,8,1},{0,8,1},{0,8,1},{0,4,1},{0,34,68},{0,34,68},{2,11,821},{2,30,429},{2,26,634},{2,10,429},{0,27,1514},{0,28,446},{0,10,8},{0,8,670},{0,40,2187},{0,38,1083},{34,29,422},{34,14,6},{4,26,50},{34,10,86},{38,6,1514},{0,28,446},{0,10,8},{0,8,670},{27,0,1514},
+{0,8,670},{2,31,401},{2,31,401},{2,31,401},{2,24,404},{0,46,340},{0,10,4},{0,10,4},{0,6,52},{0,22,677},{0,6,277},{34,14,2},{34,14,2},{34,14,2},{34,8,5},{22,2,338},{0,10,4},{0,10,4},{0,6,52},{42,2,338},{0,6,52},{28,0,421},{34,14,5},{36,10,5},{0,10,4},{28,0,421},{7,0,421},{0,10,4},{0,24,425},{7,0,421},{0,24,425},{2,0,400},
+{2,0,400},{2,0,400},{2,0,400},{0,26,0},{0,26,0},{0,26,0},{0,36,1},{0,4,149},{0,4,149},{18,9,842},{18,47,462},{34,12,661},{18,26,450},{16,25,1521},{16,14,441},{16,42,15},{16,24,653},{0,26,2030},{0,24,882},{20,13,425},{20,30,9},{20,42,54},{4,26,77},{26,2,1514},{0,14,422},{16,42,11},{0,24,626},{38,8,1514},{0,24,626},{18,45,421},{18,45,421},{18,45,421},
+{18,10,422},{16,15,344},{16,42,14},{16,42,14},{16,38,38},{0,24,602},{0,38,162},{20,14,4},{20,14,4},{20,14,4},{20,40,5},{8,2,338},{32,26,2},{32,26,2},{0,38,18},{30,2,338},{0,38,18},{14,0,421},{4,30,5},{6,42,1},{0,42,1},{14,0,421},{19,0,421},{0,42,1},{0,40,421},{19,0,421},{0,40,421},{18,0,421},{18,0,421},{18,0,421},{18,0,421},{16,12,5},
+{16,12,5},{16,12,5},{16,6,8},{0,6,80},{0,6,80},{4,9,854},{4,31,458},{4,28,674},{34,12,453},{2,25,1515},{32,30,445},{2,12,9},{32,10,657},{0,28,1850},{0,10,666},{36,27,422},{36,46,6},{6,28,48},{36,12,86},{16,7,1514},{16,46,421},{2,12,8},{0,10,545},{21,0,1514},{0,10,545},{34,13,433},{34,13,433},{34,13,433},{34,42,433},{2,15,339},{2,12,5},{2,12,5},
+{32,8,50},{0,10,477},{0,8,62},{36,46,2},{36,46,2},{36,46,2},{36,10,5},{24,4,338},{2,12,4},{2,12,4},{0,24,9},{11,0,338},{0,24,9},{0,1,421},{36,46,5},{38,12,5},{2,12,4},{0,1,421},{1,0,421},{2,12,4},{0,26,425},{1,0,421},{0,26,425},{4,0,433},{4,0,433},{4,0,433},{4,0,433},{2,28,1},{2,28,1},{2,28,1},{2,38,2},{0,8,37},
+{0,8,37},{20,7,842},{20,45,455},{36,14,655},{20,28,450},{34,9,1523},{18,47,450},{34,44,13},{18,42,659},{0,14,1731},{0,42,542},{22,11,422},{22,47,5},{22,44,50},{6,28,86},{30,0,1514},{2,47,421},{34,44,4},{0,42,506},{33,0,1514},{0,42,506},{20,43,421},{20,43,421},{20,43,421},{20,12,425},{18,13,347},{34,28,11},{34,28,11},{18,40,43},{0,12,389},{0,10,9},{22,47,1},
+{22,47,1},{22,47,1},{22,42,1},{38,10,338},{34,28,2},{34,28,2},{0,10,5},{23,0,338},{0,10,5},{2,1,421},{22,47,4},{8,44,0},{18,44,0},{2,1,421},{1,2,421},{18,44,0},{0,42,425},{1,2,421},{0,42,425},{20,0,421},{20,0,421},{20,0,421},{20,0,421},{18,30,9},{18,30,9},{18,30,9},{34,8,10},{0,10,5},{0,10,5},{6,37,846},{6,29,454},{6,46,685},
+{6,14,459},{4,7,1517},{4,31,446},{4,14,19},{34,12,655},{0,47,1635},{0,12,478},{38,9,425},{38,15,6},{8,30,41},{38,14,77},{47,0,1514},{18,15,422},{20,14,11},{0,12,477},{27,6,1514},{0,12,477},{6,27,426},{6,27,426},{6,27,426},{6,44,426},{4,13,340},{4,14,10},{4,14,10},{4,26,50},{0,44,355},{16,26,6},{38,15,5},{38,15,5},{38,15,5},{38,28,5},{28,2,338},
+{20,14,2},{20,14,2},{32,26,1},{42,8,338},{32,26,1},{29,0,421},{38,15,2},{40,14,10},{18,30,9},{29,0,421},{33,4,421},{18,30,9},{0,28,425},{33,4,421},{0,28,425},{6,0,425},{6,0,425},{6,0,425},{6,0,425},{4,30,1},{4,30,1},{4,30,1},{4,40,0},{16,26,5},{16,26,5},{22,5,842},{22,43,455},{38,47,679},{22,30,455},{20,21,1521},{20,15,438},{36,46,13},
+{20,28,653},{0,15,1553},{16,44,454},{24,9,422},{24,45,5},{24,46,50},{8,30,77},{34,3,1514},{4,45,422},{36,46,4},{0,44,434},{35,4,1514},{0,44,434},{22,41,421},{22,41,421},{22,41,421},{22,14,422},{20,11,342},{36,30,11},{36,30,11},{20,42,38},{0,46,340},{32,12,5},{24,45,1},{24,45,1},{24,45,1},{24,44,1},{16,3,338},{36,30,2},{36,30,2},{32,12,4},{17,0,338},
+{32,12,4},{43,0,421},{24,45,4},{10,46,0},{4,46,1},{43,0,421},{19,6,421},{4,46,1},{0,44,425},{19,6,421},{0,44,425},{22,0,421},{22,0,421},{22,0,421},{22,0,421},{20,31,5},{20,31,5},{20,31,5},{20,26,5},{32,12,1},{32,12,1},{8,5,854},{8,27,458},{24,31,670},{38,47,462},{6,5,1517},{6,29,446},{6,47,6},{36,14,655},{0,29,1530},{32,30,465},{40,23,422},
+{40,13,2},{10,31,48},{40,46,90},{45,2,1514},{20,13,425},{6,47,6},{0,30,434},{25,8,1514},{0,30,434},{8,25,434},{8,25,434},{8,25,434},{8,30,437},{6,11,340},{6,47,5},{6,47,5},{6,12,52},{16,47,342},{18,28,1},{40,13,1},{40,13,1},{40,13,1},{40,14,5},{30,4,338},{22,46,4},{22,46,4},{18,28,1},{15,8,338},{18,28,1},{11,0,421},{40,13,1},{26,47,5},
+{6,47,2},{11,0,421},{5,8,421},{6,47,2},{0,30,425},{5,8,421},{0,30,425},{8,0,433},{8,0,433},{8,0,433},{8,0,433},{6,31,0},{6,31,0},{6,31,0},{6,42,0},{18,28,1},{18,28,1},{24,33,846},{24,41,465},{40,15,658},{24,31,454},{38,5,1518},{22,13,446},{38,15,14},{22,46,667},{0,27,1518},{18,46,453},{26,7,421},{26,27,2},{42,15,53},{10,31,86},{27,0,1514},
+{6,43,425},{38,15,10},{16,46,434},{37,8,1514},{16,46,434},{24,23,426},{24,23,426},{24,23,426},{24,47,426},{38,25,344},{38,31,9},{38,31,9},{22,44,41},{18,31,341},{4,14,2},{26,27,1},{26,27,1},{26,27,1},{26,46,1},{45,0,338},{8,31,5},{8,31,5},{4,14,1},{27,8,338},{4,14,1},{8,1,421},{26,27,2},{12,15,1},{6,15,2},{8,1,421},{1,8,421},{6,15,2},
+{0,46,433},{1,8,421},{0,46,433},{24,0,425},{24,0,425},{24,0,425},{24,0,425},{38,15,4},{38,15,4},{38,15,4},{38,12,8},{4,14,1},{4,14,1},{10,33,846},{10,25,454},{26,29,658},{10,45,454},{8,3,1517},{8,27,446},{8,45,8},{38,47,650},{32,11,1518},{4,47,462},{42,5,425},{42,11,9},{12,29,43},{42,15,89},{28,13,1514},{22,11,422},{8,45,8},{16,31,426},{27,12,1514},
+{16,31,426},{10,23,425},{10,23,425},{10,23,425},{10,15,429},{8,9,340},{8,45,4},{8,45,4},{8,30,50},{34,45,341},{20,30,6},{42,11,5},{42,11,5},{42,11,5},{42,31,8},{29,2,338},{8,45,4},{8,45,4},{36,30,1},{35,6,338},{36,30,1},{23,0,421},{12,11,4},{44,45,5},{8,45,4},{23,0,421},{18,9,421},{8,45,4},{0,31,425},{18,9,421},{0,31,425},{10,0,425},
+{10,0,425},{10,0,425},{10,0,425},{8,13,1},{8,13,1},{8,13,1},{8,44,0},{20,30,5},{20,30,5},{26,1,846},{26,39,470},{42,13,658},{26,29,454},{40,3,1523},{24,11,446},{24,13,20},{24,31,658},{2,25,1521},{20,15,458},{28,5,422},{28,25,6},{28,13,51},{12,29,86},{40,3,1514},{8,41,421},{24,13,11},{18,15,421},{16,9,1514},{18,15,421},{26,21,426},{26,21,426},{26,21,426},
+{26,45,426},{24,7,347},{40,29,11},{40,29,11},{24,46,41},{20,29,342},{36,47,10},{28,41,1},{28,41,1},{28,41,1},{28,15,2},{43,2,338},{40,29,2},{40,29,2},{36,47,9},{21,8,338},{36,47,9},{37,0,421},{12,25,5},{14,13,1},{8,13,2},{37,0,421},{32,7,421},{8,13,2},{0,15,421},{32,7,421},{0,15,421},{26,0,425},{26,0,425},{26,0,425},{26,0,425},{24,27,9},
+{24,27,9},{24,27,9},{40,14,10},{36,47,1},{36,47,1},{12,1,850},{12,23,454},{12,27,666},{12,43,454},{10,1,1517},{10,25,446},{10,43,8},{40,45,666},{34,9,1515},{6,45,462},{44,3,425},{44,9,9},{14,27,50},{44,13,89},{39,2,1514},{24,9,422},{10,43,8},{18,29,426},{34,9,1514},{18,29,426},{12,21,425},{12,21,425},{12,21,425},{12,13,429},{10,7,340},{10,43,4},{10,43,4},
+{10,47,50},{36,43,341},{22,31,1},{44,9,5},{44,9,5},{44,9,5},{44,29,8},{38,1,338},{26,13,4},{26,13,4},{22,31,1},{43,28,338},{22,31,1},{5,0,421},{14,9,4},{46,43,5},{10,43,4},{5,0,421},{0,5,421},{10,43,4},{0,29,425},{0,5,421},{0,29,425},{12,0,425},{12,0,425},{12,0,425},{12,0,425},{10,27,0},{10,27,0},{10,27,0},{10,46,0},{22,31,1},
+{22,31,1},{44,1,890},{44,7,462},{44,41,666},{28,27,454},{42,1,1518},{26,39,446},{42,11,8},{26,13,666},{4,23,1523},{22,13,454},{30,33,421},{30,23,1},{46,11,50},{30,27,89},{21,0,1514},{40,23,425},{42,11,4},{20,13,425},{18,7,1514},{20,13,425},{28,19,426},{28,19,426},{28,19,426},{28,27,429},{42,21,344},{42,11,8},{42,11,8},{26,15,50},{6,11,341},{8,45,9},{30,23,1},
+{30,23,1},{30,23,1},{30,13,0},{39,0,338},{42,11,4},{42,11,4},{8,45,5},{9,28,338},{8,45,5},{14,1,421},{30,23,1},{47,11,0},{26,11,0},{14,1,421},{4,5,421},{26,11,0},{0,13,425},{4,5,421},{0,13,425},{28,0,425},{28,0,425},{28,0,425},{28,0,425},{42,11,4},{42,11,4},{42,11,4},{42,47,5},{8,15,4},{8,15,4},{30,1,950},{14,21,458},{30,25,658},
+{14,41,446},{28,1,1542},{12,23,438},{12,25,20},{12,43,658},{36,7,1518},{38,27,470},{46,1,430},{46,37,10},{47,25,41},{46,41,82},{14,3,1514},{26,7,422},{28,41,11},{20,27,426},{32,5,1514},{20,27,426},{14,19,421},{14,19,421},{14,19,421},{14,11,422},{12,35,339},{12,25,11},{12,25,11},{12,29,51},{38,41,338},{24,29,6},{46,37,9},{46,37,9},{46,37,9},{46,27,10},{23,2,338},
+{28,41,2},{28,41,2},{40,29,1},{20,9,338},{40,29,1},{17,0,421},{46,37,1},{31,25,10},{26,25,9},{17,0,421},{18,3,421},{26,25,9},{0,27,425},{18,3,421},{0,27,425},{14,0,421},{14,0,421},{14,0,421},{14,0,421},{12,9,2},{12,9,2},{12,9,2},{12,15,1},{24,13,5},{24,13,5},{46,1,1010},{46,5,462},{46,39,650},{30,25,454},{14,1,1575},{28,7,446},{44,9,8},
+{28,27,658},{6,21,1523},{24,11,454},{31,1,422},{31,21,6},{31,9,50},{47,25,77},{46,3,1514},{42,21,425},{44,9,4},{22,11,425},{16,3,1514},{22,11,425},{30,17,426},{30,17,426},{30,17,426},{30,25,429},{44,19,344},{44,9,8},{44,9,8},{28,13,43},{8,9,340},{10,43,9},{31,37,1},{31,37,1},{31,37,1},{31,11,2},{37,2,338},{44,9,4},{44,9,4},{10,43,5},{34,7,338},
+{10,43,5},{31,1,421},{31,21,5},{45,9,0},{12,9,1},{31,1,421},{32,1,421},{12,9,1},{0,11,425},{32,1,421},{0,11,425},{30,0,425},{30,0,425},{30,0,425},{30,0,425},{44,9,4},{44,9,4},{44,9,4},{44,45,5},{10,13,4},{10,13,4},{31,1,1098},{47,19,453},{47,23,667},{47,39,473},{46,1,1641},{14,21,438},{14,39,14},{14,41,658},{38,5,1518},{40,25,465},{45,1,437},
+{15,5,2},{45,23,41},{15,39,86},{31,3,1514},{28,5,422},{30,39,9},{22,25,426},{34,3,1514},{22,25,426},{47,17,434},{47,17,434},{47,17,434},{47,9,437},{14,33,339},{14,39,10},{14,39,10},{14,43,53},{40,39,341},{26,27,2},{15,5,1},{15,5,1},{15,5,1},{15,25,5},{44,1,338},{30,9,5},{30,9,5},{26,27,1},{26,9,338},{26,27,1},{45,1,421},{15,5,1},{13,39,8},
+{14,39,4},{45,1,421},{20,1,421},{14,39,4},{0,25,425},{20,1,421},{0,25,425},{47,0,433},{47,0,433},{47,0,433},{47,0,433},{14,7,2},{14,7,2},{14,7,2},{14,13,1},{26,27,2},{26,27,2},{45,1,1202},{31,33,465},{15,37,655},{31,7,461},{47,1,1725},{30,35,446},{46,7,6},{30,25,670},{24,19,1518},{26,9,458},{13,1,469},{29,19,1},{13,7,52},{29,23,85},{15,1,1514},
+{14,35,426},{46,7,5},{24,9,434},{9,11,1514},{24,9,434},{31,1,434},{31,1,434},{31,1,434},{31,39,426},{46,17,339},{46,7,6},{46,7,6},{30,11,48},{10,7,341},{12,41,2},{29,19,1},{29,19,1},{29,19,1},{29,9,0},{31,5,338},{47,23,4},{47,23,4},{12,41,1},{14,9,338},{12,41,1},{43,1,421},{29,19,1},{43,7,0},{30,7,0},{43,1,421},{8,1,421},{30,7,0},
+{0,9,433},{8,1,421},{0,9,433},{31,0,425},{31,0,425},{31,0,425},{31,0,425},{46,7,2},{46,7,2},{46,7,2},{46,27,5},{12,41,1},{12,41,1},{29,1,1346},{45,17,454},{29,21,653},{45,37,459},{45,1,1818},{47,19,446},{47,37,13},{46,39,679},{10,3,1518},{42,23,455},{27,1,526},{13,33,5},{43,21,38},{13,7,93},{27,5,1514},{30,3,425},{31,37,11},{40,23,421},{6,1,1514},
+{40,23,421},{45,1,434},{45,1,434},{45,1,434},{45,7,429},{47,1,340},{47,37,4},{47,37,4},{47,25,50},{42,37,338},{44,25,5},{13,33,4},{13,33,4},{13,33,4},{13,23,5},{17,2,338},{31,37,2},{31,37,2},{44,25,1},{16,1,338},{44,25,1},{11,1,421},{13,33,1},{27,21,5},{30,21,5},{11,1,421},{26,1,421},{30,21,5},{0,23,421},{26,1,421},{0,23,421},{45,0,425},
+{45,0,425},{45,0,425},{45,0,425},{47,5,1},{47,5,1},{47,5,1},{47,11,0},{44,25,4},{44,25,4},{43,1,1502},{13,1,478},{13,35,655},{29,5,461},{29,1,1926},{31,3,446},{15,5,19},{47,7,685},{26,17,1518},{28,7,454},{11,1,569},{27,17,6},{27,5,50},{43,21,97},{17,10,1514},{47,33,425},{15,5,10},{26,7,426},{24,1,1514},{26,7,426},{13,1,477},{13,1,477},{13,1,477},
+{29,37,426},{15,1,355},{15,21,11},{15,21,11},{31,9,41},{12,5,341},{14,39,6},{27,33,1},{27,33,1},{27,33,1},{27,7,2},{29,3,338},{15,21,2},{15,21,2},{14,39,5},{43,9,338},{14,39,5},{25,1,421},{43,1,5},{41,5,0},{31,5,1},{25,1,421},{14,1,421},{31,5,1},{0,7,425},{14,1,421},{0,7,425},{29,0,425},{29,0,425},{29,0,425},{29,0,425},{31,19,9},
+{31,19,9},{31,19,9},{31,25,10},{14,39,2},{14,39,2},{11,1,1634},{43,1,542},{43,19,659},{43,35,459},{43,1,2070},{45,17,446},{45,35,13},{15,37,655},{12,1,1518},{44,21,455},{25,1,646},{11,1,9},{41,19,43},{11,5,93},{25,3,1514},{31,1,422},{29,35,11},{42,21,421},{12,1,1514},{42,21,421},{43,1,506},{43,1,506},{43,1,506},{43,5,429},{29,1,381},{45,35,4},{45,35,4},
+{45,23,50},{44,35,338},{46,23,5},{11,1,5},{11,1,5},{11,1,5},{11,21,5},{39,11,338},{29,35,2},{29,35,2},{46,23,1},{22,1,338},{46,23,1},{39,1,421},{11,1,5},{25,19,10},{31,19,9},{39,1,421},{31,1,421},{31,19,9},{0,21,421},{31,1,421},{0,21,421},{43,0,425},{43,0,425},{43,0,425},{43,0,425},{45,19,0},{45,19,0},{45,19,0},{45,9,0},{46,23,4},
+{46,23,4},{25,1,1874},{11,1,666},{11,33,657},{27,3,451},{11,1,2201},{13,1,454},{13,3,9},{29,5,674},{14,1,1557},{30,5,458},{39,1,722},{9,1,62},{9,33,50},{25,19,85},{9,1,1514},{29,1,446},{13,3,5},{44,5,433},{46,1,1514},{44,5,433},{11,1,545},{11,1,545},{11,1,545},{27,19,429},{43,1,437},{13,3,8},{13,3,8},{29,7,48},{30,19,341},{47,37,6},{25,1,9},
+{25,1,9},{25,1,9},{25,5,0},{25,5,338},{13,3,4},{13,3,4},{47,37,2},{10,1,338},{47,37,2},{37,1,421},{9,1,37},{39,3,2},{29,3,1},{37,1,421},{43,1,421},{29,3,1},{0,5,433},{43,1,421},{0,5,433},{27,0,425},{27,0,425},{27,0,425},{27,0,425},{13,3,4},{13,3,4},{13,3,4},{13,39,5},{46,21,5},{46,21,5},{9,1,2030},{25,1,882},{25,17,653},
+{41,33,453},{9,1,2382},{27,1,546},{43,17,15},{13,35,661},{47,1,1653},{46,19,462},{7,1,837},{39,1,162},{39,17,38},{9,33,89},{19,9,1514},{27,1,521},{43,17,14},{44,19,421},{45,1,1514},{44,19,421},{25,1,626},{25,1,626},{25,1,626},{41,3,422},{11,1,488},{43,17,11},{43,17,11},{43,21,54},{46,33,340},{31,21,9},{39,1,18},{39,1,18},{39,1,18},{9,19,10},{9,3,338},
+{27,33,2},{27,33,2},{15,21,4},{31,3,338},{15,21,4},{5,1,421},{7,1,80},{7,17,8},{13,17,5},{5,1,421},{9,1,421},{13,17,5},{0,19,421},{9,1,421},{0,19,421},{41,0,421},{41,0,421},{41,0,421},{41,0,421},{43,1,1},{43,1,1},{43,1,1},{43,7,1},{31,5,5},{31,5,5},{23,1,2201},{39,1,1083},{9,1,670},{25,1,450},{39,1,2443},{11,1,684},{11,1,8},
+{27,3,634},{29,1,1751},{31,3,429},{21,1,890},{7,1,277},{7,1,52},{23,17,74},{21,17,1459},{25,1,603},{11,1,4},{30,3,401},{17,21,1459},{30,3,401},{9,1,670},{9,1,670},{9,1,670},{25,17,429},{25,1,579},{11,1,8},{11,1,8},{27,5,50},{47,1,340},{15,35,6},{7,1,52},{7,1,52},{7,1,52},{23,3,0},{23,3,338},{11,1,4},{11,1,4},{15,35,2},{43,3,338},
+{15,35,2},{19,1,392},{5,1,149},{37,1,1},{27,1,0},{19,1,392},{37,1,392},{27,1,0},{0,3,400},{37,1,392},{0,3,400},{25,0,425},{25,0,425},{25,0,425},{25,0,425},{11,1,4},{11,1,4},{11,1,4},{11,37,5},{15,35,5},{15,35,5},{7,1,1901},{23,1,1050},{23,1,761},{39,1,426},{7,1,2093},{9,1,551},{25,1,35},{11,33,382},{13,1,1407},{45,33,219},{5,1,638},
+{21,1,233},{21,1,89},{7,1,29},{17,39,1064},{23,1,398},{9,1,10},{31,33,202},{39,17,1064},{31,33,202},{23,1,761},{23,1,761},{23,1,761},{39,1,426},{9,1,638},{25,1,35},{25,1,35},{41,19,51},{45,1,355},{29,19,6},{21,1,89},{21,1,89},{21,1,89},{7,17,5},{3,25,338},{9,1,10},{9,1,10},{13,19,1},{29,1,338},{13,19,1},{3,1,200},{35,1,68},{5,1,1},
+{9,1,1},{3,1,200},{5,1,200},{9,1,1},{0,33,202},{5,1,200},{0,33,202},{39,0,425},{39,0,425},{39,0,425},{39,0,425},{25,1,26},{25,1,26},{25,1,26},{41,5,1},{29,3,5},{29,3,5},{21,1,1606},{37,1,1085},{7,1,886},{7,1,461},{21,1,1749},{23,1,470},{39,1,117},{25,33,186},{11,1,1166},{13,17,66},{19,1,461},{35,1,213},{5,1,125},{21,1,2},{3,3,722},
+{7,1,266},{23,1,37},{29,17,61},{3,3,722},{29,17,61},{7,1,886},{7,1,886},{7,1,886},{7,1,461},{23,1,770},{39,1,117},{39,1,117},{9,3,51},{43,1,426},{43,33,1},{5,1,125},{5,1,125},{5,1,125},{21,1,2},{19,5,338},{23,1,37},{23,1,37},{43,33,0},{37,3,338},{43,33,0},{1,3,61},{33,1,25},{3,1,1},{5,1,1},{1,3,61},{3,1,61},{5,1,1},
+{0,17,61},{3,1,61},{0,17,61},{23,0,433},{23,0,433},{23,0,433},{23,0,433},{9,1,37},{9,1,37},{9,1,37},{9,19,5},{43,33,1},{43,33,1},{5,1,1450},{5,1,1109},{21,1,965},{37,1,542},{5,1,1505},{7,1,478},{7,1,253},{39,17,79},{9,1,1054},{27,1,10},{3,1,318},{19,1,221},{19,1,185},{35,1,29},{33,33,509},{35,1,210},{21,1,97},{27,1,10},{33,33,509},
+{27,1,10},{21,1,965},{21,1,965},{21,1,965},{37,1,542},{21,1,900},{7,1,253},{7,1,253},{39,17,54},{41,1,549},{11,17,9},{19,1,185},{19,1,185},{19,1,185},{35,1,29},{3,3,338},{21,1,97},{21,1,97},{11,17,5},{3,3,338},{11,17,5},{1,17,5},{17,1,1},{17,1,0},{33,1,0},{1,17,5},{17,1,5},{33,1,0},{0,1,9},{17,1,5},{0,1,9},{37,0,421},
+{37,0,421},{37,0,421},{37,0,421},{23,1,65},{23,1,65},{23,1,65},{39,3,2},{27,1,1},{27,1,1},{35,1,1213},{35,1,953},{5,1,886},{5,1,545},{35,1,1186},{21,1,448},{21,1,304},{23,1,32},{23,1,790},{25,1,13},{33,1,198},{3,1,150},{3,1,125},{19,1,36},{33,1,294},{19,1,121},{35,1,73},{9,1,0},{1,33,294},{9,1,0},{5,1,886},{5,1,886},{5,1,886},
+{5,1,545},{5,1,765},{21,1,304},{21,1,304},{23,1,32},{39,1,486},{25,1,13},{3,1,125},{3,1,125},{3,1,125},{19,1,36},{3,1,221},{35,1,73},{35,1,73},{9,1,0},{5,1,221},{9,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{21,0,433},{21,0,433},{21,0,433},{21,0,433},{37,1,113},
+{37,1,113},{37,1,113},{7,17,8},{25,1,13},{25,1,13},{3,1,885},{19,1,737},{19,1,701},{35,1,485},{19,1,834},{5,1,364},{5,1,264},{37,1,2},{7,1,554},{39,1,45},{17,1,82},{33,1,62},{33,1,53},{33,1,13},{1,3,114},{33,1,54},{3,1,34},{21,1,1},{3,1,114},{21,1,1},{19,1,701},{19,1,701},{19,1,701},{35,1,485},{35,1,574},{5,1,264},{5,1,264},
+{37,1,2},{7,1,329},{39,1,45},{33,1,53},{33,1,53},{33,1,53},{33,1,13},{33,1,85},{3,1,34},{3,1,34},{21,1,1},{19,1,85},{21,1,1},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{35,0,421},{35,0,421},{35,0,421},{35,0,421},{21,1,160},{21,1,160},{21,1,160},{37,1,2},{39,1,45},
+{39,1,45},{0,30,882},{0,10,100},{0,38,2},{0,6,340},{0,10,1896},{0,22,1189},{0,6,565},{0,4,1421},{0,20,2043},{0,4,1521},{0,30,882},{0,10,100},{0,38,2},{0,6,340},{4,2,1896},{0,22,1189},{0,6,565},{0,4,1421},{10,0,1896},{0,4,1421},{0,22,1},{0,22,1},{0,22,1},{0,34,0},{0,4,164},{0,18,52},{0,18,52},{0,32,98},{0,32,179},{0,32,107},{0,22,1},
+{0,22,1},{0,22,1},{0,34,0},{32,16,162},{0,18,52},{0,18,52},{0,32,98},{16,32,162},{0,32,98},{38,0,882},{0,10,100},{0,38,2},{0,6,340},{38,0,882},{30,0,882},{0,6,340},{0,20,884},{30,0,882},{0,20,884},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,15,884},{0,12,34},{0,24,13},
+{0,38,250},{0,12,2355},{0,8,1355},{0,22,585},{0,20,1669},{0,36,2567},{0,20,1813},{0,15,884},{0,12,34},{0,24,13},{0,38,250},{0,12,2355},{0,8,1355},{0,22,585},{0,20,1669},{12,0,2355},{0,20,1669},{0,10,0},{0,10,0},{0,10,0},{0,20,1},{0,20,340},{0,4,125},{0,4,125},{0,2,200},{0,18,376},{0,2,225},{0,10,0},{0,10,0},{0,10,0},{0,20,1},{16,4,338},
+{0,4,125},{0,4,125},{0,2,200},{4,16,338},{0,2,200},{24,0,882},{0,12,34},{16,24,4},{0,38,250},{24,0,882},{32,8,882},{0,38,250},{0,6,884},{32,8,882},{0,6,884},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,27,882},{0,14,9},{0,40,81},{0,8,202},{0,14,2899},{0,24,1539},{0,8,643},
+{0,6,2004},{0,6,3189},{0,36,2207},{0,27,882},{0,14,9},{16,40,61},{0,8,202},{6,2,2899},{0,24,1539},{0,8,643},{0,6,2004},{14,0,2899},{0,6,2004},{0,28,1},{0,28,1},{0,28,1},{0,6,4},{0,22,578},{0,36,221},{0,36,221},{0,18,365},{0,18,632},{0,18,401},{0,28,1},{0,28,1},{0,28,1},{0,6,4},{32,4,578},{0,36,221},{0,36,221},{0,18,365},{22,0,578},
+{0,18,365},{38,6,882},{0,14,9},{2,40,1},{0,8,202},{38,6,882},{27,0,882},{0,8,202},{0,22,884},{27,0,882},{0,22,884},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{16,25,910},{16,46,31},{16,26,151},{0,40,181},{0,31,3048},{0,26,1416},{0,40,506},{0,22,1944},{0,38,3484},{0,6,2233},{16,25,885},
+{16,46,6},{32,26,78},{16,40,165},{36,6,3048},{0,26,1416},{0,40,506},{0,22,1944},{31,0,3048},{0,22,1944},{16,30,26},{16,30,26},{16,30,26},{16,38,27},{0,24,650},{0,22,157},{0,22,157},{0,4,325},{0,4,766},{0,4,425},{16,30,1},{16,30,1},{16,30,1},{16,38,2},{18,4,648},{0,22,157},{0,22,157},{0,4,325},{36,2,648},{0,4,325},{26,2,882},{0,46,5},{18,26,4},
+{0,40,145},{26,2,882},{38,8,882},{0,40,145},{0,8,890},{38,8,882},{0,8,890},{16,0,26},{16,0,26},{16,0,26},{16,0,26},{0,18,0},{0,18,0},{0,18,0},{0,32,1},{0,16,10},{0,16,10},{16,7,1009},{16,31,134},{32,12,297},{16,26,238},{0,43,3048},{0,28,1224},{0,26,267},{0,8,1764},{0,24,3685},{0,38,2157},{2,9,885},{2,47,5},{18,12,77},{32,26,165},{6,8,3048},
+{0,28,1224},{0,26,267},{0,8,1764},{43,0,3048},{0,8,1764},{16,45,125},{16,45,125},{16,45,125},{16,40,126},{0,28,648},{0,24,80},{0,24,80},{0,36,260},{0,6,875},{0,20,413},{2,46,1},{2,46,1},{2,46,1},{2,24,2},{2,24,648},{0,24,80},{0,24,80},{0,36,260},{28,0,648},{0,36,260},{44,0,882},{2,47,4},{4,42,1},{0,26,98},{44,0,882},{26,8,882},{0,26,98},
+{0,24,890},{26,8,882},{0,24,890},{16,0,125},{16,0,125},{16,0,125},{16,0,125},{0,6,0},{0,6,0},{0,6,0},{0,18,1},{0,18,37},{0,18,37},{32,5,1147},{32,45,282},{2,44,523},{32,12,381},{0,9,3051},{0,14,1110},{0,12,116},{0,24,1658},{0,26,3859},{0,8,2173},{18,23,886},{18,15,6},{34,28,69},{18,42,166},{40,4,3048},{0,14,1110},{0,12,116},{0,24,1658},{22,8,3048},
+{0,24,1658},{32,13,265},{32,13,265},{32,13,265},{32,10,266},{0,46,650},{0,26,26},{0,26,26},{0,22,194},{0,22,987},{0,6,427},{18,15,5},{18,15,5},{18,15,5},{18,40,5},{22,2,648},{0,26,26},{0,26,26},{0,22,194},{42,2,648},{0,22,194},{30,0,882},{18,15,2},{20,28,4},{0,12,52},{30,0,882},{33,0,882},{0,12,52},{0,10,884},{33,0,882},{0,10,884},{32,0,265},
+{32,0,265},{32,0,265},{32,0,265},{0,24,0},{0,24,0},{0,24,0},{0,4,4},{0,34,89},{0,34,89},{2,3,1365},{2,13,510},{18,14,813},{2,28,566},{0,37,3048},{0,46,1013},{0,44,42},{0,10,1509},{0,12,4057},{0,10,2185},{4,7,885},{4,45,10},{20,44,86},{34,28,173},{28,0,3048},{0,46,1013},{0,44,42},{0,10,1509},{37,0,3048},{0,10,1509},{2,11,482},{2,11,482},{2,11,482},
+{2,42,481},{0,29,650},{0,12,4},{0,12,4},{0,38,128},{0,8,1161},{0,22,483},{4,15,2},{4,15,2},{4,15,2},{4,26,2},{8,2,648},{0,12,4},{0,12,4},{0,38,128},{30,2,648},{0,38,128},{47,0,882},{34,45,2},{6,44,1},{0,44,26},{47,0,882},{27,6,882},{0,44,26},{0,26,890},{27,6,882},{0,26,890},{2,0,481},{2,0,481},{2,0,481},{2,0,481},{0,12,0},
+{0,12,0},{0,12,0},{0,6,1},{0,20,180},{0,20,180},{18,17,1647},{18,27,810},{34,46,1178},{2,14,837},{0,19,3048},{0,15,933},{0,14,21},{0,26,1443},{0,28,4329},{0,26,2227},{20,21,885},{20,29,5},{36,30,69},{20,44,182},{42,6,3048},{0,15,933},{0,14,21},{0,26,1443},{19,0,3048},{0,26,1443},{18,25,765},{18,25,765},{18,25,765},{18,28,766},{0,11,648},{0,14,5},{0,14,5},
+{0,24,89},{0,10,1342},{0,8,557},{20,29,1},{20,29,1},{20,29,1},{20,42,2},{24,4,648},{16,44,2},{16,44,2},{0,24,89},{11,0,648},{0,24,89},{34,3,882},{4,13,4},{22,30,4},{0,14,17},{34,3,882},{35,4,882},{0,14,17},{0,12,884},{35,4,882},{0,12,884},{18,0,765},{18,0,765},{18,0,765},{18,0,765},{0,30,0},{0,30,0},{0,30,0},{0,38,0},{0,6,274},
+{0,6,274},{34,1,1782},{34,41,940},{4,31,1356},{34,46,950},{16,1,3052},{16,29,910},{16,46,22},{0,12,1335},{0,46,4231},{0,12,1924},{6,5,882},{6,43,5},{22,47,86},{36,30,174},{32,3,3048},{0,13,900},{32,46,13},{0,12,1299},{39,4,3048},{0,12,1299},{34,23,882},{34,23,882},{34,23,882},{34,14,883},{16,9,651},{16,46,18},{16,46,18},{16,10,74},{0,42,1256},{0,40,434},{6,13,0},
+{6,13,0},{6,13,0},{6,28,1},{38,10,648},{2,30,2},{2,30,2},{0,10,45},{23,0,648},{0,10,45},{4,1,882},{36,27,4},{8,46,4},{0,46,5},{4,1,882},{1,4,882},{0,46,5},{0,28,890},{1,4,882},{0,28,890},{34,0,882},{34,0,882},{34,0,882},{34,0,882},{16,31,1},{16,31,1},{16,31,1},{16,24,1},{0,8,250},{0,8,250},{20,1,1814},{20,25,945},{36,15,1363},
+{4,47,945},{2,1,3052},{32,43,916},{2,47,23},{16,44,1337},{0,31,3975},{0,28,1612},{22,19,886},{22,11,6},{38,31,77},{22,46,185},{2,1,3048},{0,27,883},{2,47,19},{0,44,1188},{1,2,3048},{0,44,1188},{20,7,901},{20,7,901},{20,7,901},{20,30,900},{2,9,654},{2,47,14},{2,47,14},{32,26,70},{0,28,1059},{0,26,236},{22,27,4},{22,27,4},{22,27,4},{22,14,8},{28,2,648},
+{18,46,5},{18,46,5},{0,42,36},{42,8,648},{0,42,36},{27,0,882},{22,11,2},{24,31,4},{0,31,5},{27,0,882},{37,8,882},{0,31,5},{0,14,882},{37,8,882},{0,14,882},{4,0,900},{4,0,900},{4,0,900},{4,0,900},{2,15,5},{2,15,5},{2,15,5},{2,40,4},{0,40,146},{0,40,146},{6,1,1864},{36,39,947},{6,29,1356},{36,31,943},{34,1,3073},{18,27,910},{18,15,21},
+{2,14,1335},{0,45,3751},{0,14,1379},{8,3,885},{8,41,5},{24,15,85},{38,31,182},{29,0,3048},{32,11,884},{34,15,16},{0,30,1146},{37,6,3048},{0,30,1146},{36,21,882},{36,21,882},{36,21,882},{36,47,886},{18,7,649},{18,15,21},{18,15,21},{18,12,74},{0,14,945},{0,12,91},{8,11,2},{8,11,2},{8,11,2},{8,30,2},{16,3,648},{34,31,1},{34,31,1},{0,12,10},{17,0,648},
+{0,12,10},{28,13,882},{38,25,4},{10,15,1},{2,15,1},{28,13,882},{27,12,882},{2,15,1},{0,30,890},{27,12,882},{0,30,890},{36,0,882},{36,0,882},{36,0,882},{36,0,882},{18,29,1},{18,29,1},{18,29,1},{18,26,1},{0,42,68},{0,42,68},{22,1,1944},{22,23,945},{38,13,1363},{6,45,945},{20,1,3115},{34,41,916},{4,45,18},{18,46,1337},{0,13,3580},{0,46,1153},{24,17,886},
+{24,9,6},{40,29,69},{24,15,168},{46,10,3048},{2,25,885},{4,45,14},{0,46,1053},{23,8,3048},{0,46,1053},{22,5,901},{22,5,901},{22,5,901},{6,31,901},{4,7,654},{4,45,9},{4,45,9},{34,28,70},{0,47,825},{0,44,25},{24,25,4},{24,25,4},{24,25,4},{24,46,8},{30,4,648},{20,15,0},{20,15,0},{0,44,0},{15,8,648},{0,44,0},{40,3,882},{24,9,2},{26,29,4},
+{2,29,5},{40,3,882},{16,9,882},{2,29,5},{0,47,890},{16,9,882},{0,47,890},{6,0,900},{6,0,900},{6,0,900},{6,0,900},{4,13,5},{4,13,5},{4,13,5},{4,42,4},{0,28,18},{0,28,18},{8,1,2056},{38,21,956},{8,43,1348},{38,29,949},{6,1,3156},{20,25,910},{20,13,22},{4,47,1318},{0,11,3384},{0,31,1015},{10,1,882},{10,39,5},{26,13,75},{40,29,174},{27,2,3048},
+{34,9,885},{36,13,10},{0,31,990},{39,10,3048},{0,31,990},{38,3,885},{38,3,885},{38,3,885},{38,45,885},{20,5,649},{20,13,21},{20,13,21},{20,14,66},{0,15,729},{0,30,5},{10,9,0},{10,9,0},{10,9,0},{10,31,1},{45,0,648},{6,29,2},{6,29,2},{16,30,4},{27,8,648},{16,30,4},{10,1,882},{40,23,1},{12,13,1},{4,13,1},{10,1,882},{4,9,882},{4,13,1},
+{0,31,890},{4,9,882},{0,31,890},{38,0,884},{38,0,884},{38,0,884},{38,0,884},{20,27,1},{20,27,1},{20,27,1},{20,28,1},{0,30,1},{0,30,1},{40,1,2134},{24,21,943},{40,11,1348},{8,43,952},{38,1,3240},{36,39,914},{6,27,22},{36,15,1330},{0,25,3244},{0,15,951},{42,1,891},{26,7,10},{42,27,77},{26,13,171},{8,1,3048},{4,23,886},{6,27,21},{0,15,950},{1,8,3048},
+{0,15,950},{24,3,890},{24,3,890},{24,3,890},{24,29,890},{6,35,649},{6,43,10},{6,43,10},{36,30,77},{0,13,675},{32,46,6},{26,7,10},{26,7,10},{26,7,10},{26,15,10},{29,2,648},{22,13,2},{22,13,2},{2,46,2},{35,6,648},{2,46,2},{21,0,882},{26,7,1},{28,27,4},{4,27,5},{21,0,882},{18,7,882},{4,27,5},{0,45,884},{18,7,882},{0,45,884},{24,0,890},
+{24,0,890},{24,0,890},{24,0,890},{6,11,1},{6,11,1},{6,11,1},{6,44,2},{16,47,2},{16,47,2},{26,1,2252},{40,35,940},{10,41,1348},{40,27,954},{24,1,3321},{22,23,910},{22,11,22},{6,45,1318},{0,39,3156},{0,29,940},{28,1,920},{12,37,5},{28,41,77},{12,27,180},{23,0,3048},{36,7,884},{38,11,10},{0,29,915},{18,9,3048},{0,29,915},{40,17,882},{40,17,882},{40,17,882},
+{40,43,883},{22,3,649},{38,27,20},{38,27,20},{22,47,86},{0,27,656},{2,31,9},{12,7,0},{12,7,0},{12,7,0},{12,29,1},{43,2,648},{38,27,4},{38,27,4},{34,47,4},{21,8,648},{34,47,4},{14,3,882},{42,21,1},{14,11,1},{6,11,1},{14,3,882},{32,5,882},{6,11,1},{0,29,890},{32,5,882},{0,29,890},{40,0,882},{40,0,882},{40,0,882},{40,0,882},{22,25,1},
+{22,25,1},{22,25,1},{22,30,2},{2,31,0},{2,31,0},{42,1,2404},{26,19,935},{42,9,1348},{10,41,952},{40,1,3409},{38,37,914},{8,41,30},{38,29,1354},{0,37,3087},{2,13,951},{14,1,954},{28,5,6},{44,25,77},{28,11,185},{9,6,3048},{6,21,886},{24,41,21},{0,43,900},{32,7,3048},{0,43,900},{26,1,891},{26,1,891},{26,1,891},{26,27,891},{8,3,654},{8,41,14},{8,41,14},
+{38,31,77},{16,41,651},{34,15,3},{28,5,5},{28,5,5},{28,5,5},{28,13,5},{38,1,648},{24,11,2},{24,11,2},{4,15,2},{43,28,648},{4,15,2},{46,3,882},{28,5,2},{30,25,4},{6,25,5},{46,3,882},{16,3,882},{6,25,5},{0,43,884},{16,3,882},{0,43,884},{26,0,890},{26,0,890},{26,0,890},{26,0,890},{8,9,5},{8,9,5},{8,9,5},{8,46,4},{34,15,2},
+{34,15,2},{28,1,2612},{12,3,951},{28,39,1354},{42,9,952},{26,1,3544},{24,21,910},{40,9,30},{8,43,1348},{0,35,3060},{18,27,935},{30,1,1005},{14,35,3},{30,39,77},{44,25,185},{41,12,3048},{38,35,890},{40,9,14},{0,27,891},{16,5,3048},{0,27,891},{42,1,900},{42,1,900},{42,1,900},{42,41,885},{24,1,654},{40,25,21},{40,25,21},{24,45,77},{2,9,651},{4,29,6},{14,5,2},
+{14,5,2},{14,5,2},{14,27,2},{39,0,648},{10,25,2},{10,25,2},{4,29,5},{9,28,648},{4,29,5},{1,0,882},{44,19,2},{47,9,4},{8,9,5},{1,0,882},{0,1,882},{8,9,5},{0,27,890},{0,1,882},{0,27,890},{42,0,884},{42,0,884},{42,0,884},{42,0,884},{24,7,5},{24,7,5},{24,7,5},{24,31,4},{4,29,2},{4,29,2},{14,1,2774},{28,1,940},{44,7,1318},
+{12,39,966},{12,1,3700},{40,35,916},{10,23,22},{40,11,1348},{16,19,3051},{34,41,940},{47,1,1061},{30,3,9},{46,23,86},{30,9,190},{14,1,3048},{8,19,886},{26,39,20},{16,41,882},{4,5,3048},{16,41,882},{28,1,915},{28,1,915},{28,1,915},{28,25,890},{10,1,657},{10,39,10},{10,39,10},{40,29,77},{18,39,654},{36,13,5},{46,35,4},{46,35,4},{46,35,4},{46,11,4},{23,2,648},
+{42,9,4},{42,9,4},{6,13,0},{20,9,648},{6,13,0},{15,1,882},{30,3,0},{31,23,2},{24,23,1},{15,1,882},{9,11,882},{24,23,1},{0,41,882},{9,11,882},{0,41,882},{28,0,890},{28,0,890},{28,0,890},{28,0,890},{10,7,1},{10,7,1},{10,7,1},{10,15,1},{20,43,1},{20,43,1},{46,1,3014},{14,1,951},{14,37,1330},{44,23,958},{14,1,3865},{26,19,913},{26,7,22},
+{10,41,1348},{32,33,3049},{20,25,943},{15,1,1154},{47,33,6},{31,37,77},{46,23,185},{17,0,3048},{40,3,885},{42,7,10},{2,25,890},{18,3,3048},{2,25,890},{14,1,950},{14,1,950},{14,1,950},{44,39,885},{42,1,672},{26,7,21},{26,7,21},{26,43,77},{34,7,652},{6,27,10},{47,3,2},{47,3,2},{47,3,2},{47,25,2},{37,2,648},{12,23,2},{12,23,2},{6,27,10},{34,7,648},
+{6,27,10},{27,5,882},{46,17,2},{45,7,2},{10,7,1},{27,5,882},{6,1,882},{10,7,1},{0,25,890},{6,1,882},{0,25,890},{44,0,884},{44,0,884},{44,0,884},{44,0,884},{26,5,5},{26,5,5},{26,5,5},{26,29,4},{6,27,1},{6,27,1},{47,1,3214},{30,1,1015},{46,5,1318},{14,37,966},{30,1,4009},{42,33,916},{12,21,22},{42,9,1348},{18,17,3051},{20,39,956},{29,1,1240},
+{31,1,5},{15,21,66},{31,7,185},{27,9,3048},{10,17,886},{12,21,21},{2,39,885},{14,7,3048},{2,39,885},{30,1,990},{30,1,990},{30,1,990},{30,23,890},{28,1,715},{12,37,10},{12,37,10},{12,27,75},{20,37,651},{38,11,5},{31,17,4},{31,17,4},{31,17,4},{31,9,8},{44,1,648},{28,7,2},{28,7,2},{8,11,0},{26,9,648},{8,11,0},{17,10,882},{31,1,1},{29,21,1},
+{26,21,1},{17,10,882},{24,1,882},{26,21,1},{0,39,884},{24,1,882},{0,39,884},{30,0,890},{30,0,890},{30,0,890},{30,0,890},{12,5,1},{12,5,1},{12,5,1},{12,13,1},{22,41,1},{22,41,1},{15,1,3526},{47,1,1153},{47,19,1337},{46,5,958},{47,1,4231},{28,17,910},{44,5,18},{12,39,1363},{20,1,3067},{22,23,945},{43,1,1380},{45,1,25},{29,35,70},{45,21,185},{13,3,3048},
+{12,1,888},{44,5,9},{18,7,901},{44,5,3048},{18,7,901},{47,1,1053},{47,1,1053},{47,1,1053},{46,37,890},{14,1,762},{44,5,14},{44,5,14},{28,41,69},{6,5,650},{8,25,6},{45,1,0},{45,1,0},{45,1,0},{45,23,0},{31,5,648},{14,21,0},{14,21,0},{24,25,4},{14,9,648},{24,25,4},{41,1,882},{29,1,18},{43,5,4},{12,5,5},{41,1,882},{47,3,882},{12,5,5},
+{0,7,900},{47,3,882},{0,7,900},{46,0,890},{46,0,890},{46,0,890},{46,0,890},{28,3,5},{28,3,5},{28,3,5},{28,27,4},{8,25,2},{8,25,2},{29,1,3764},{15,1,1379},{15,3,1335},{47,35,950},{15,1,4477},{14,1,925},{14,19,21},{28,7,1356},{22,1,3145},{38,37,947},{11,1,1485},{13,1,91},{13,19,74},{29,35,178},{43,1,3048},{14,1,925},{14,19,21},{20,37,882},{8,1,3048},
+{20,37,882},{31,1,1146},{31,1,1146},{31,1,1146},{31,21,890},{46,1,841},{14,35,16},{14,35,16},{14,25,85},{38,35,649},{40,9,5},{13,1,10},{13,1,10},{13,1,10},{13,7,9},{17,2,648},{30,35,1},{30,35,1},{10,9,2},{16,1,648},{10,9,2},{9,1,882},{43,1,68},{27,19,1},{28,19,1},{9,1,882},{46,1,882},{28,19,1},{0,37,882},{46,1,882},{0,37,882},{31,0,890},
+{31,0,890},{31,0,890},{31,0,890},{14,3,1},{14,3,1},{14,3,1},{14,11,1},{40,9,4},{40,9,4},{13,1,4076},{29,1,1612},{45,17,1337},{15,3,950},{29,1,4684},{46,1,1035},{46,3,23},{14,37,1363},{24,1,3256},{24,21,945},{41,1,1650},{27,1,236},{27,33,70},{13,19,174},{11,1,3048},{47,1,1013},{46,3,14},{20,5,901},{5,9,3048},{20,5,901},{45,1,1188},{45,1,1188},{45,1,1188},
+{15,35,883},{47,1,910},{46,3,19},{46,3,19},{30,39,77},{8,3,651},{10,23,6},{43,1,36},{43,1,36},{43,1,36},{43,21,1},{29,3,648},{47,19,5},{47,19,5},{26,23,4},{43,9,648},{26,23,4},{19,9,882},{41,1,146},{41,3,4},{14,3,5},{19,9,882},{45,1,882},{14,3,5},{0,5,900},{45,1,882},{0,5,900},{15,0,882},{15,0,882},{15,0,882},{15,0,882},{30,1,5},
+{30,1,5},{30,1,5},{30,25,4},{10,23,2},{10,23,2},{11,1,4374},{13,1,1924},{13,1,1335},{45,33,961},{13,1,4972},{47,1,1225},{47,17,22},{30,5,1356},{26,1,3460},{40,35,940},{9,1,1755},{41,1,434},{11,17,74},{27,3,185},{23,5,3048},{15,1,1146},{47,17,18},{22,35,882},{14,1,3048},{22,35,882},{13,1,1299},{13,1,1299},{13,1,1299},{29,19,890},{15,1,1017},{47,33,13},{47,33,13},
+{46,23,86},{40,33,652},{42,7,5},{11,1,45},{11,1,45},{11,1,45},{27,5,5},{39,11,648},{31,3,2},{31,3,2},{12,7,0},{22,1,648},{12,7,0},{21,17,882},{9,1,250},{25,17,1},{30,17,1},{21,17,882},{17,21,882},{30,17,1},{0,35,882},{17,21,882},{0,35,882},{29,0,890},{29,0,890},{29,0,890},{29,0,890},{47,1,5},{47,1,5},{47,1,5},{47,9,4},{26,37,4},
+{26,37,4},{41,1,4427},{27,1,2227},{27,1,1443},{13,17,925},{11,1,4878},{45,1,1331},{15,1,21},{47,35,1178},{44,1,3438},{26,19,810},{23,1,1746},{9,1,557},{25,1,89},{11,17,142},{23,17,2814},{13,1,1125},{15,1,5},{24,19,765},{45,17,2814},{24,19,765},{27,1,1443},{27,1,1443},{27,1,1443},{13,33,884},{29,1,1132},{15,1,21},{15,1,21},{31,37,69},{10,1,648},{28,21,5},{25,1,89},
+{25,1,89},{25,1,89},{41,19,1},{25,5,648},{45,17,2},{45,17,2},{28,21,1},{10,1,648},{28,21,1},{19,3,761},{7,1,274},{39,1,0},{31,1,0},{19,3,761},{39,1,761},{31,1,0},{0,19,765},{39,1,761},{0,19,765},{13,0,884},{13,0,884},{13,0,884},{13,0,884},{15,1,17},{15,1,17},{15,1,17},{31,23,4},{12,5,4},{12,5,4},{9,1,3933},{11,1,2185},{11,1,1509},
+{27,1,899},{41,1,4346},{13,1,1109},{45,1,42},{15,19,813},{30,1,2958},{12,3,510},{7,1,1386},{23,1,483},{39,1,128},{25,17,59},{3,25,2249},{11,1,870},{13,1,4},{10,3,482},{29,1,2249},{10,3,482},{11,1,1509},{11,1,1509},{11,1,1509},{27,17,890},{43,1,1275},{45,1,42},{45,1,42},{45,21,86},{12,1,675},{44,5,10},{39,1,128},{39,1,128},{39,1,128},{9,3,5},{9,3,648},
+{13,1,4},{13,1,4},{14,5,2},{31,3,648},{14,5,2},{3,3,481},{21,1,180},{7,1,1},{13,1,0},{3,3,481},{7,1,481},{13,1,0},{0,3,481},{7,1,481},{0,3,481},{27,0,890},{27,0,890},{27,0,890},{27,0,890},{45,1,26},{45,1,26},{45,1,26},{45,7,1},{44,35,2},{44,35,2},{39,1,3541},{9,1,2173},{25,1,1658},{11,1,893},{9,1,3801},{27,1,1035},{13,1,116},
+{45,3,523},{47,1,2574},{44,33,282},{21,1,1070},{7,1,427},{23,1,194},{39,1,16},{21,17,1769},{25,1,673},{27,1,26},{12,33,265},{17,21,1769},{12,33,265},{25,1,1658},{25,1,1658},{25,1,1658},{11,1,893},{11,1,1386},{13,1,116},{13,1,116},{29,35,69},{14,1,734},{14,19,6},{23,1,194},{23,1,194},{23,1,194},{39,17,1},{23,3,648},{27,1,26},{27,1,26},{14,19,5},{43,3,648},
+{14,19,5},{33,3,265},{35,1,89},{5,1,4},{25,1,0},{33,3,265},{21,1,265},{25,1,0},{0,33,265},{21,1,265},{0,33,265},{11,0,884},{11,0,884},{11,0,884},{11,0,884},{13,1,52},{13,1,52},{13,1,52},{29,21,4},{14,19,2},{14,19,2},{23,1,3267},{39,1,2157},{9,1,1764},{25,1,954},{39,1,3397},{11,1,950},{27,1,267},{13,33,297},{29,1,2313},{30,17,134},{5,1,838},
+{21,1,413},{37,1,260},{7,1,9},{17,39,1374},{23,1,518},{25,1,80},{44,17,125},{39,17,1374},{44,17,125},{9,1,1764},{9,1,1764},{9,1,1764},{25,1,954},{25,1,1569},{27,1,267},{27,1,267},{13,19,77},{47,1,846},{46,3,5},{37,1,260},{37,1,260},{37,1,260},{7,1,9},{3,25,648},{25,1,80},{25,1,80},{47,3,1},{29,1,648},{47,3,1},{33,1,113},{19,1,37},{19,1,1},
+{7,1,0},{33,1,113},{35,1,113},{7,1,0},{0,17,125},{35,1,113},{0,17,125},{25,0,890},{25,0,890},{25,0,890},{25,0,890},{27,1,98},{27,1,98},{27,1,98},{43,5,1},{46,3,4},{46,3,4},{7,1,3032},{7,1,2233},{23,1,1944},{39,1,1083},{7,1,3096},{9,1,1028},{41,1,506},{27,17,151},{13,1,2068},{47,17,31},{35,1,693},{5,1,425},{5,1,325},{21,1,52},{3,3,1032},
+{7,1,406},{23,1,157},{31,17,26},{3,3,1032},{31,17,26},{23,1,1944},{23,1,1944},{23,1,1944},{39,1,1083},{39,1,1723},{41,1,506},{41,1,506},{27,33,78},{45,1,1034},{47,17,6},{5,1,325},{5,1,325},{5,1,325},{21,1,52},{19,5,648},{23,1,157},{23,1,157},{31,17,1},{37,3,648},{31,17,1},{17,1,18},{17,1,10},{33,1,1},{19,1,0},{17,1,18},{33,1,18},{19,1,0},
+{0,17,26},{33,1,18},{0,17,26},{9,0,890},{9,0,890},{9,0,890},{9,0,890},{41,1,145},{41,1,145},{41,1,145},{27,19,4},{47,1,5},{47,1,5},{21,1,2710},{37,1,2207},{7,1,2004},{23,1,1173},{21,1,2775},{39,1,1036},{9,1,643},{41,1,81},{11,1,1900},{15,1,9},{3,1,524},{19,1,401},{19,1,365},{5,1,104},{33,33,771},{5,1,369},{37,1,221},{29,1,1},{33,33,771},
+{29,1,1},{7,1,2004},{7,1,2004},{7,1,2004},{23,1,1173},{23,1,1784},{9,1,643},{9,1,643},{41,17,61},{13,1,1091},{15,1,9},{19,1,365},{19,1,365},{19,1,365},{5,1,104},{33,5,578},{37,1,221},{37,1,221},{29,1,1},{23,1,578},{29,1,1},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{23,0,884},
+{23,0,884},{23,0,884},{23,0,884},{9,1,202},{9,1,202},{9,1,202},{41,3,1},{15,1,9},{15,1,9},{5,1,2214},{21,1,1813},{21,1,1669},{7,1,1109},{21,1,2151},{23,1,874},{23,1,585},{25,1,13},{25,1,1508},{13,1,34},{3,1,300},{3,1,225},{3,1,200},{19,1,61},{33,17,451},{35,1,192},{5,1,125},{11,1,0},{17,33,451},{11,1,0},{21,1,1669},{21,1,1669},{21,1,1669},
+{7,1,1109},{7,1,1460},{23,1,585},{23,1,585},{25,1,13},{11,1,872},{13,1,34},{3,1,200},{3,1,200},{3,1,200},{19,1,61},{17,5,338},{5,1,125},{5,1,125},{11,1,0},{5,17,338},{11,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{7,0,884},{7,0,884},{7,0,884},{7,0,884},{39,1,250},
+{39,1,250},{39,1,250},{25,17,4},{13,1,34},{13,1,34},{35,1,1818},{5,1,1521},{5,1,1421},{21,1,1028},{5,1,1675},{7,1,790},{7,1,565},{39,1,2},{23,1,1155},{11,1,100},{33,1,147},{33,1,107},{33,1,98},{3,1,29},{1,19,216},{19,1,88},{19,1,52},{23,1,1},{19,1,216},{23,1,1},{5,1,1421},{5,1,1421},{5,1,1421},{21,1,1028},{21,1,1158},{7,1,565},{7,1,565},
+{39,1,2},{25,1,723},{11,1,100},{33,1,98},{33,1,98},{33,1,98},{3,1,29},{33,17,162},{19,1,52},{19,1,52},{23,1,1},{17,33,162},{23,1,1},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{21,0,884},{21,0,884},{21,0,884},{21,0,884},{7,1,340},{7,1,340},{7,1,340},{39,1,2},{11,1,100},
+{11,1,100},{0,13,1568},{0,14,185},{0,10,5},{0,8,586},{0,44,3371},{0,24,2147},{0,8,1027},{0,36,2571},{0,6,3617},{0,20,2729},{0,13,1568},{0,14,185},{0,10,5},{0,8,586},{18,8,3371},{0,24,2147},{0,8,1027},{0,36,2571},{44,0,3371},{0,36,2571},{0,24,1},{0,24,1},{0,24,1},{0,20,1},{0,20,288},{0,34,100},{0,34,100},{0,2,164},{0,2,321},{0,2,189},{0,24,1},
+{0,24,1},{0,24,1},{0,20,1},{32,2,288},{0,34,100},{0,34,100},{0,2,164},{20,0,288},{0,2,164},{24,2,1568},{0,14,185},{0,10,5},{0,8,586},{24,2,1568},{13,0,1568},{0,8,586},{0,22,1576},{13,0,1568},{0,22,1576},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,25,1568},{0,46,89},{0,26,20},
+{0,40,505},{0,46,3968},{0,10,2316},{0,24,1078},{0,6,2880},{0,22,4305},{0,6,3105},{0,25,1568},{0,46,89},{0,26,20},{0,40,505},{18,10,3968},{0,10,2316},{0,24,1078},{0,6,2880},{46,0,3968},{0,6,2880},{0,12,1},{0,12,1},{0,12,1},{0,6,0},{0,6,514},{0,20,193},{0,20,193},{0,18,317},{0,18,556},{0,18,353},{0,12,1},{0,12,1},{0,12,1},{0,6,0},{2,2,512},
+{0,20,193},{0,20,193},{0,18,317},{2,2,512},{0,18,317},{16,11,1568},{0,46,89},{16,26,5},{0,40,505},{16,11,1568},{25,0,1568},{0,40,505},{0,38,1570},{25,0,1568},{0,38,1570},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,7,1568},{0,15,37},{16,12,76},{0,10,421},{0,15,4652},{0,42,2540},{0,10,1097},
+{0,38,3251},{0,8,5108},{0,22,3545},{0,7,1568},{0,15,37},{16,12,51},{0,10,421},{38,2,4651},{0,42,2540},{0,10,1097},{0,38,3251},{16,8,4651},{0,38,3251},{0,46,0},{0,46,0},{0,46,0},{0,38,1},{0,8,802},{0,6,289},{0,6,289},{0,34,493},{0,34,872},{0,34,557},{0,46,0},{0,46,0},{0,46,0},{0,38,1},{4,0,802},{0,6,289},{0,6,289},{0,34,493},{8,0,802},
+{0,34,493},{26,4,1568},{0,15,37},{32,12,2},{0,10,421},{26,4,1568},{7,0,1568},{0,10,421},{0,24,1576},{7,0,1568},{0,24,1576},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,19,1570},{0,29,8},{16,44,166},{0,12,338},{0,29,5420},{0,28,2755},{0,26,1208},{0,8,3659},{0,24,5988},{0,38,4028},{0,19,1570},
+{0,29,8},{16,44,141},{0,12,338},{40,0,5419},{0,28,2755},{0,26,1208},{0,8,3659},{46,2,5419},{0,8,3659},{0,45,0},{0,45,0},{0,45,0},{0,24,1},{0,24,1154},{0,22,433},{0,22,433},{0,4,697},{0,4,1270},{0,4,797},{0,45,0},{0,45,0},{0,45,0},{0,24,1},{18,4,1152},{0,22,433},{0,22,433},{0,4,697},{36,2,1152},{0,4,697},{14,0,1568},{0,29,8},{18,28,1},
+{0,12,338},{14,0,1568},{8,10,1568},{0,12,338},{0,40,1568},{8,10,1568},{0,40,1568},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{16,17,1609},{16,43,45},{32,30,282},{0,28,325},{0,25,5419},{0,14,2514},{0,12,804},{0,24,3462},{0,10,6191},{0,8,3965},{32,3,1574},{32,13,17},{2,14,130},{16,28,312},{16,11,5419},
+{0,14,2514},{0,12,804},{0,24,3462},{25,0,5419},{0,24,3462},{16,43,41},{16,43,41},{16,43,41},{16,10,42},{0,28,1152},{0,40,292},{0,40,292},{0,36,596},{0,6,1379},{0,20,761},{32,29,4},{32,29,4},{32,29,4},{32,10,5},{2,24,1152},{0,40,292},{0,40,292},{0,36,596},{28,0,1152},{0,36,596},{30,2,1568},{0,27,4},{4,14,1},{0,28,244},{30,2,1568},{35,2,1568},{0,28,244},
+{0,26,1570},{35,2,1568},{0,26,1570},{16,0,41},{16,0,41},{16,0,41},{16,0,41},{0,4,1},{0,4,1},{0,4,1},{0,2,1},{0,32,13},{0,32,13},{32,1,1733},{32,11,159},{32,46,455},{16,14,377},{0,7,5419},{0,47,2294},{0,44,542},{0,10,3225},{0,12,6401},{0,10,3901},{2,17,1569},{2,27,6},{18,46,125},{32,14,296},{26,4,5419},{0,47,2294},{0,44,542},{0,10,3225},{7,0,5419},
+{0,10,3225},{32,11,158},{32,11,158},{32,11,158},{32,42,157},{0,46,1154},{0,26,170},{0,26,170},{0,22,482},{0,22,1491},{0,6,739},{2,27,2},{2,27,2},{2,27,2},{2,26,5},{22,2,1152},{0,26,170},{0,26,170},{0,22,482},{42,2,1152},{0,22,482},{47,2,1568},{32,11,2},{20,30,5},{0,14,185},{47,2,1568},{17,2,1568},{0,14,185},{0,42,1570},{17,2,1568},{0,42,1570},{32,0,157},
+{32,0,157},{32,0,157},{32,0,157},{0,22,0},{0,22,0},{0,22,0},{0,34,1},{0,18,45},{0,18,45},{2,1,2003},{32,9,330},{2,31,710},{32,30,517},{0,35,5420},{0,15,2081},{0,30,345},{0,42,3099},{0,28,6641},{0,10,3885},{34,1,1574},{34,11,17},{4,47,134},{18,30,312},{14,0,5419},{0,15,2081},{0,30,345},{0,42,3099},{8,10,5419},{0,42,3099},{32,23,317},{32,23,317},{32,23,317},
+{32,28,317},{0,29,1154},{0,28,80},{0,28,80},{0,8,388},{0,8,1665},{0,38,753},{34,27,4},{34,27,4},{34,27,4},{34,12,5},{8,2,1152},{0,28,80},{0,28,80},{0,8,388},{30,2,1152},{0,8,388},{15,2,1568},{18,41,4},{36,47,5},{0,46,125},{15,2,1568},{33,4,1568},{0,46,125},{0,28,1576},{33,4,1568},{0,28,1576},{32,0,317},{32,0,317},{32,0,317},{32,0,317},{0,40,0},
+{0,40,0},{0,40,0},{0,20,0},{0,34,113},{0,34,113},{18,1,2395},{2,23,570},{18,45,1046},{2,47,725},{0,1,5419},{0,13,1947},{0,47,185},{0,12,2880},{0,14,6964},{0,12,3841},{20,1,1593},{4,25,6},{20,15,131},{34,46,326},{0,1,5419},{0,13,1947},{0,47,185},{0,12,2880},{1,0,5419},{0,12,2880},{2,21,546},{2,21,546},{2,21,546},{2,14,545},{0,11,1152},{0,30,37},{0,30,37},
+{0,24,317},{0,10,1846},{0,8,797},{4,25,2},{4,25,2},{4,25,2},{4,28,2},{24,4,1152},{0,30,37},{0,30,37},{0,24,317},{11,0,1152},{0,24,317},{43,0,1568},{34,9,1},{22,31,1},{0,47,85},{43,0,1568},{19,6,1568},{0,47,85},{0,44,1570},{19,6,1568},{0,44,1570},{2,0,545},{2,0,545},{2,0,545},{2,0,545},{0,28,0},{0,28,0},{0,28,0},{0,6,1},{0,20,208},
+{0,20,208},{4,1,3030},{18,37,927},{34,29,1474},{18,15,1042},{16,1,5540},{0,27,1787},{0,15,57},{0,44,2668},{0,46,7299},{0,28,3836},{6,1,1612},{36,39,10},{6,29,136},{20,31,312},{47,2,5419},{0,27,1787},{0,15,57},{0,44,2668},{17,2,5419},{0,44,2668},{18,19,883},{18,19,883},{18,19,883},{18,46,882},{0,23,1152},{0,31,5},{0,31,5},{0,10,225},{0,26,2064},{0,10,901},{36,9,2},
+{36,9,2},{36,9,2},{36,14,1},{38,10,1152},{0,31,5},{0,31,5},{0,10,225},{23,0,1152},{0,10,225},{38,3,1568},{20,39,4},{8,45,5},{0,15,41},{38,3,1568},{35,8,1568},{0,15,41},{0,30,1570},{35,8,1568},{0,30,1570},{18,0,882},{18,0,882},{18,0,882},{18,0,882},{0,47,1},{0,47,1},{0,47,1},{0,8,1},{0,6,325},{0,6,325},{20,1,3734},{34,5,1332},{4,43,1958},
+{18,45,1395},{2,1,5808},{0,25,1676},{0,29,20},{0,30,2566},{0,15,7631},{0,44,3925},{22,1,1656},{6,7,9},{22,13,132},{6,45,309},{15,2,5419},{0,25,1676},{0,29,20},{0,30,2566},{33,4,5419},{0,30,2566},{34,33,1258},{34,33,1258},{34,33,1258},{34,47,1259},{0,5,1154},{0,45,5},{0,45,5},{0,42,180},{0,12,2275},{0,10,981},{6,23,4},{6,23,4},{6,23,4},{6,46,5},{28,2,1152},
+{16,15,1},{16,15,1},{0,42,180},{42,8,1152},{0,42,180},{41,2,1568},{36,7,2},{24,29,5},{0,29,20},{41,2,1568},{17,8,1568},{0,29,20},{0,46,1576},{17,8,1568},{0,46,1576},{34,0,1258},{34,0,1258},{34,0,1258},{34,0,1258},{0,29,0},{0,29,0},{0,29,0},{0,40,1},{0,38,482},{0,38,482},{36,1,4356},{4,19,1676},{20,27,2370},{34,13,1683},{18,1,6121},{0,39,1616},{16,43,36},
+{0,46,2397},{0,45,7815},{0,46,3837},{8,1,1715},{38,37,10},{8,43,131},{22,29,312},{43,0,5419},{0,39,1612},{16,43,27},{0,46,2393},{19,6,5419},{0,46,2393},{4,1,1577},{4,1,1577},{4,1,1577},{4,15,1576},{0,17,1156},{16,13,22},{16,13,22},{0,12,134},{0,44,2441},{0,12,971},{38,23,1},{38,23,1},{38,23,1},{38,47,2},{16,3,1152},{32,29,1},{32,29,1},{0,12,130},{17,0,1152},
+{0,12,130},{29,12,1568},{22,37,4},{10,43,5},{0,43,10},{29,12,1568},{26,13,1568},{0,43,10},{0,31,1576},{26,13,1568},{0,31,1576},{4,0,1576},{4,0,1576},{4,0,1576},{4,0,1576},{0,41,4},{0,41,4},{0,41,4},{0,26,4},{0,24,562},{0,24,562},{22,1,4616},{20,3,1677},{6,41,2378},{20,43,1681},{4,1,6311},{16,7,1620},{32,27,20},{0,31,2365},{0,43,7444},{0,47,3383},{40,1,1766},
+{8,5,11},{24,11,132},{8,13,314},{11,0,5419},{0,37,1577},{32,27,19},{0,31,2265},{5,8,5419},{0,31,2265},{20,1,1593},{20,1,1593},{20,1,1593},{20,29,1569},{32,1,1155},{32,27,19},{32,27,19},{16,44,125},{0,46,2150},{0,28,659},{8,21,2},{8,21,2},{8,21,2},{8,31,2},{30,4,1152},{18,13,1},{18,13,1},{0,44,72},{15,8,1152},{0,44,72},{37,0,1568},{38,5,2},{26,27,2},
+{0,27,2},{37,0,1568},{36,9,1568},{0,27,2},{0,15,1568},{36,9,1568},{0,15,1568},{20,0,1568},{20,0,1568},{20,0,1568},{20,0,1568},{32,41,1},{32,41,1},{32,41,1},{32,42,2},{0,26,388},{0,26,388},{8,1,4936},{6,17,1676},{22,9,2363},{36,11,1689},{36,1,6476},{2,37,1620},{18,41,24},{32,15,2370},{0,41,7036},{0,15,2859},{26,1,1851},{40,35,11},{10,25,136},{24,27,321},{41,2,5419},
+{16,5,1571},{18,41,20},{0,15,2130},{17,8,5419},{0,15,2130},{6,1,1619},{6,1,1619},{6,1,1619},{6,13,1571},{18,1,1158},{18,11,21},{18,11,21},{2,30,125},{0,31,1905},{0,14,425},{40,5,2},{40,5,2},{40,5,2},{40,45,2},{45,0,1152},{4,27,5},{4,27,5},{0,30,41},{27,8,1152},{0,30,41},{21,2,1568},{24,19,2},{12,41,1},{16,41,1},{21,2,1568},{24,9,1568},{16,41,1},
+{0,29,1570},{24,9,1568},{0,29,1570},{6,0,1570},{6,0,1570},{6,0,1570},{6,0,1570},{18,25,4},{18,25,4},{18,25,4},{18,28,5},{0,28,232},{0,28,232},{24,1,5154},{38,1,1680},{8,39,2378},{22,41,1683},{22,1,6708},{18,5,1628},{34,25,24},{2,29,2363},{0,9,6740},{0,45,2553},{12,1,1964},{10,3,8},{26,9,139},{40,41,309},{29,12,5419},{2,35,1572},{4,25,21},{0,29,2027},{26,13,5419},
+{0,29,2027},{38,1,1664},{38,1,1664},{38,1,1664},{22,27,1570},{4,1,1185},{34,25,20},{34,25,20},{34,46,130},{0,45,1721},{0,46,218},{10,19,4},{10,19,4},{10,19,4},{10,13,8},{29,2,1152},{20,11,5},{20,11,5},{0,47,25},{35,6,1152},{0,47,25},{35,2,1568},{10,3,4},{28,25,5},{18,25,4},{35,2,1568},{34,5,1568},{18,25,4},{0,13,1570},{34,5,1568},{0,13,1570},{22,0,1570},
+{22,0,1570},{22,0,1570},{22,0,1570},{34,23,1},{34,23,1},{34,23,1},{34,14,4},{0,14,149},{0,14,149},{26,1,5444},{8,1,1724},{24,23,2378},{38,9,1689},{8,1,6964},{4,35,1620},{20,39,36},{34,13,2370},{0,23,6513},{0,13,2244},{44,1,2099},{42,33,11},{12,23,136},{26,25,321},{37,0,5419},{18,3,1571},{20,39,27},{0,43,1924},{36,9,5419},{0,43,1924},{8,1,1720},{8,1,1720},{8,1,1720},
+{8,11,1577},{36,1,1224},{20,9,19},{20,9,19},{4,47,122},{0,43,1548},{0,31,90},{42,19,1},{42,19,1},{42,19,1},{42,43,2},{43,2,1152},{6,25,5},{6,25,5},{0,31,9},{21,8,1152},{0,31,9},{46,1,1568},{26,17,2},{14,39,2},{18,39,1},{46,1,1568},{26,7,1568},{18,39,1},{0,27,1576},{26,7,1568},{0,27,1576},{8,0,1576},{8,0,1576},{8,0,1576},{8,0,1576},{20,23,9},
+{20,23,9},{20,23,9},{4,30,10},{0,47,73},{0,47,73},{42,1,5700},{40,1,1798},{40,37,2375},{24,39,1683},{40,1,7153},{20,3,1626},{36,23,24},{4,27,2386},{0,21,6243},{0,27,1980},{30,1,2210},{12,1,8},{28,7,132},{42,39,323},{5,0,5419},{4,33,1572},{6,23,21},{0,27,1836},{0,5,5419},{0,27,1836},{40,1,1762},{40,1,1762},{40,1,1762},{24,25,1570},{6,1,1275},{36,23,20},{36,23,20},
+{36,15,125},{0,41,1395},{0,45,25},{12,17,4},{12,17,4},{12,17,4},{12,11,8},{38,1,1152},{22,9,4},{22,9,4},{0,45,0},{43,28,1152},{0,45,0},{31,1,1568},{42,1,2},{30,23,5},{4,23,5},{31,1,1568},{32,1,1568},{4,23,5},{0,11,1570},{32,1,1568},{0,11,1570},{24,0,1570},{24,0,1570},{24,0,1570},{24,0,1570},{36,21,1},{36,21,1},{36,21,1},{36,46,4},{0,15,25},
+{0,15,25},{28,1,6116},{26,1,1980},{26,5,2386},{40,7,1689},{26,1,7408},{6,17,1620},{22,37,24},{36,41,2375},{0,35,6044},{0,41,1798},{47,1,2385},{44,1,25},{14,37,125},{28,23,315},{35,2,5419},{20,1,1569},{22,37,20},{0,41,1762},{34,5,5419},{0,41,1762},{26,1,1836},{26,1,1836},{26,1,1836},{10,9,1574},{38,1,1363},{22,7,21},{22,7,21},{6,29,132},{0,9,1284},{0,13,8},{44,1,0},
+{44,1,0},{44,1,0},{44,41,0},{39,0,1152},{8,23,4},{8,23,4},{16,13,4},{9,28,1152},{16,13,4},{17,6,1568},{14,1,25},{47,37,4},{20,37,1},{17,6,1568},{20,1,1568},{20,37,1},{0,25,1570},{20,1,1568},{0,25,1570},{10,0,1570},{10,0,1570},{10,0,1570},{10,0,1570},{22,5,5},{22,5,5},{22,5,5},{22,31,5},{0,43,2},{0,43,2},{14,1,6434},{12,1,2244},{12,35,2370},
+{26,21,1685},{12,1,7724},{38,1,1620},{38,21,36},{22,25,2378},{0,3,5839},{0,9,1724},{31,1,2546},{30,1,90},{46,5,122},{14,7,322},{46,1,5419},{22,1,1602},{8,21,19},{0,9,1720},{26,7,5419},{0,9,1720},{42,1,1924},{42,1,1924},{42,1,1924},{26,23,1577},{24,1,1414},{38,21,27},{38,21,27},{22,13,136},{0,7,1218},{32,43,11},{30,1,9},{30,1,9},{30,1,9},{14,9,10},{23,2,1152},
+{24,7,5},{24,7,5},{18,43,1},{20,9,1152},{18,43,1},{27,3,1568},{46,1,73},{31,5,10},{36,5,9},{27,3,1568},{39,9,1568},{36,5,9},{0,9,1576},{39,9,1568},{0,9,1576},{26,0,1576},{26,0,1576},{26,0,1576},{26,0,1576},{38,19,1},{38,19,1},{38,19,1},{38,15,2},{16,27,2},{16,27,2},{30,1,6786},{44,1,2553},{28,3,2363},{42,5,1689},{28,1,8052},{24,1,1671},{24,35,24},
+{38,9,2378},{0,1,5715},{0,39,1680},{45,1,2675},{47,1,218},{47,35,130},{30,21,315},{31,1,5419},{24,1,1667},{24,35,20},{0,39,1664},{32,1,5419},{0,39,1664},{28,1,2027},{28,1,2027},{28,1,2027},{12,7,1571},{10,1,1521},{24,5,21},{24,5,21},{8,27,139},{0,21,1169},{2,11,8},{46,1,25},{46,1,25},{46,1,25},{46,39,1},{37,2,1152},{10,21,5},{10,21,5},{18,11,4},{34,7,1152},
+{18,11,4},{39,7,1568},{15,1,149},{15,35,4},{22,35,1},{39,7,1568},{26,1,1568},{22,35,1},{0,23,1570},{26,1,1568},{0,23,1570},{12,0,1570},{12,0,1570},{12,0,1570},{12,0,1570},{24,19,4},{24,19,4},{24,19,4},{24,29,5},{2,11,4},{2,11,4},{47,1,7186},{14,1,2859},{14,33,2370},{28,35,1683},{30,1,8313},{40,1,1819},{40,19,24},{8,23,2363},{16,1,5820},{16,7,1676},{13,1,2892},
+{15,1,425},{31,3,125},{47,35,321},{45,1,5419},{26,1,1796},{10,19,21},{0,7,1619},{27,9,5419},{0,7,1619},{14,1,2130},{14,1,2130},{14,1,2130},{28,21,1570},{42,1,1608},{40,19,20},{40,19,20},{24,11,136},{0,19,1155},{34,41,11},{31,1,41},{31,1,41},{31,1,41},{47,7,5},{44,1,1152},{26,5,5},{26,5,5},{4,41,2},{26,9,1152},{4,41,2},{25,1,1568},{29,1,232},{29,19,5},
+{24,19,4},{25,1,1568},{33,9,1568},{24,19,4},{0,7,1570},{33,9,1568},{0,7,1570},{28,0,1570},{28,0,1570},{28,0,1570},{28,0,1570},{40,17,1},{40,17,1},{40,17,1},{40,13,1},{18,25,2},{18,25,2},{15,1,7706},{46,1,3383},{30,1,2365},{14,3,1685},{47,1,8695},{42,1,2092},{26,33,20},{40,7,2378},{4,1,6099},{2,21,1677},{27,1,3152},{29,1,659},{45,17,125},{31,19,315},{27,3,5419},
+{44,1,1993},{26,33,19},{0,21,1593},{39,9,5419},{0,21,1593},{30,1,2265},{30,1,2265},{30,1,2265},{14,5,1572},{28,1,1764},{26,33,19},{26,33,19},{10,25,132},{32,3,1155},{4,9,11},{45,1,72},{45,1,72},{45,1,72},{15,37,1},{31,5,1152},{12,19,1},{12,19,1},{20,9,2},{14,9,1152},{20,9,2},{23,17,1568},{27,1,388},{43,33,2},{40,33,1},{23,17,1568},{45,17,1568},{40,33,1},
+{0,21,1568},{45,17,1568},{0,21,1568},{14,0,1568},{14,0,1568},{14,0,1568},{14,0,1568},{26,1,2},{26,1,2},{26,1,2},{26,27,2},{4,39,2},{4,39,2},{45,1,8016},{47,1,3837},{47,1,2397},{30,17,1685},{31,1,9093},{28,1,2484},{42,17,36},{26,21,2370},{36,1,6379},{18,5,1676},{11,1,3345},{13,1,971},{13,1,134},{15,33,326},{39,7,5419},{30,1,2185},{12,17,22},{0,5,1577},{26,1,5419},
+{0,5,1577},{47,1,2393},{47,1,2393},{47,1,2393},{30,19,1576},{14,1,1890},{42,17,27},{42,17,27},{42,9,131},{2,17,1152},{36,39,10},{13,1,130},{13,1,130},{13,1,130},{45,5,5},{17,2,1152},{28,33,1},{28,33,1},{22,39,1},{16,1,1152},{22,39,1},{21,3,1568},{25,1,562},{27,1,4},{40,1,4},{21,3,1568},{43,1,1568},{40,1,4},{0,5,1576},{43,1,1568},{0,5,1576},{30,0,1576},
+{30,0,1576},{30,0,1576},{30,0,1576},{42,1,10},{42,1,10},{42,1,10},{42,11,5},{36,23,4},{36,23,4},{13,1,7700},{45,1,3925},{31,1,2566},{47,17,1637},{45,1,8460},{14,1,2285},{28,1,20},{42,5,1958},{38,1,5932},{4,35,1332},{9,1,2987},{11,1,981},{43,1,180},{29,17,219},{9,3,4803},{47,1,1925},{44,1,5},{32,35,1258},{31,3,4803},{32,35,1258},{31,1,2566},{31,1,2566},{31,1,2566},
+{47,3,1577},{30,1,2054},{28,1,20},{28,1,20},{12,23,132},{34,1,1155},{6,7,9},{43,1,180},{43,1,180},{43,1,180},{13,35,1},{29,3,1152},{14,17,1},{14,17,1},{22,7,4},{43,9,1152},{22,7,4},{21,1,1250},{39,1,482},{41,1,1},{28,1,0},{21,1,1250},{41,1,1250},{28,1,0},{0,35,1258},{41,1,1250},{0,35,1258},{47,0,1576},{47,0,1576},{47,0,1576},{47,0,1576},{28,1,20},
+{28,1,20},{28,1,20},{28,25,5},{6,37,2},{6,37,2},{43,1,7164},{29,1,3836},{45,1,2668},{31,1,1579},{13,1,7780},{30,1,2041},{14,1,57},{28,35,1474},{24,1,5308},{36,19,927},{9,1,2475},{11,1,901},{11,1,225},{43,17,110},{37,7,4056},{15,1,1590},{30,1,5},{18,19,883},{30,1,4056},{18,19,883},{45,1,2668},{45,1,2668},{45,1,2668},{31,17,1570},{47,1,2214},{14,1,57},{14,1,57},
+{28,7,136},{36,1,1186},{38,37,10},{11,1,225},{11,1,225},{11,1,225},{43,3,5},{39,11,1152},{30,1,5},{30,1,5},{8,37,2},{22,1,1152},{8,37,2},{5,1,882},{7,1,325},{9,1,1},{46,1,1},{5,1,882},{9,1,882},{46,1,1},{0,19,882},{9,1,882},{0,19,882},{31,0,1570},{31,0,1570},{31,0,1570},{31,0,1570},{14,1,41},{14,1,41},{14,1,41},{44,9,5},{38,21,4},
+{38,21,4},{11,1,6493},{13,1,3841},{13,1,2880},{45,1,1574},{43,1,7071},{47,1,1822},{46,1,185},{44,19,1046},{26,1,4761},{22,3,570},{23,1,2034},{9,1,797},{25,1,317},{11,17,34},{23,17,3318},{13,1,1221},{31,1,37},{20,3,546},{45,17,3318},{20,3,546},{13,1,2880},{13,1,2880},{13,1,2880},{45,1,1574},{45,1,2443},{46,1,185},{46,1,185},{14,21,131},{38,1,1275},{24,5,6},{25,1,317},
+{25,1,317},{25,1,317},{11,33,1},{25,5,1152},{31,1,37},{31,1,37},{24,5,2},{10,1,1152},{24,5,2},{3,3,545},{21,1,208},{7,1,1},{29,1,0},{3,3,545},{23,1,545},{29,1,0},{0,3,545},{23,1,545},{0,3,545},{45,0,1570},{45,0,1570},{45,0,1570},{45,0,1570},{46,1,85},{46,1,85},{46,1,85},{30,23,1},{8,35,1},{8,35,1},{41,1,6095},{11,1,3885},{43,1,3099},
+{13,1,1636},{11,1,6422},{45,1,1767},{31,1,345},{30,3,710},{44,1,4358},{8,33,330},{7,1,1698},{39,1,753},{9,1,388},{41,1,9},{3,25,2753},{27,1,1018},{29,1,80},{22,33,317},{29,1,2753},{22,33,317},{43,1,3099},{43,1,3099},{43,1,3099},{13,1,1636},{29,1,2628},{31,1,345},{31,1,345},{46,5,134},{40,1,1395},{10,35,17},{9,1,388},{9,1,388},{9,1,388},{41,1,9},{9,3,1152},
+{29,1,80},{29,1,80},{26,35,4},{31,3,1152},{26,35,4},{33,3,313},{35,1,113},{21,1,0},{41,1,0},{33,3,313},{21,1,313},{41,1,0},{0,33,317},{21,1,313},{0,33,317},{29,0,1576},{29,0,1576},{29,0,1576},{29,0,1576},{47,1,125},{47,1,125},{47,1,125},{46,37,5},{40,19,4},{40,19,4},{9,1,5661},{11,1,3901},{11,1,3225},{43,1,1739},{41,1,5978},{29,1,1685},{45,1,542},
+{47,33,455},{14,1,3994},{10,33,159},{21,1,1418},{7,1,739},{23,1,482},{9,1,16},{21,17,2273},{11,1,878},{27,1,170},{10,33,158},{17,21,2273},{10,33,158},{11,1,3225},{11,1,3225},{11,1,3225},{43,1,1739},{43,1,2875},{45,1,542},{45,1,542},{47,19,125},{42,1,1584},{26,3,6},{23,1,482},{23,1,482},{23,1,482},{9,1,16},{23,3,1152},{27,1,170},{27,1,170},{26,3,2},{43,3,1152},
+{26,3,2},{17,3,145},{19,1,45},{35,1,1},{23,1,0},{17,3,145},{35,1,145},{23,1,0},{0,33,157},{35,1,145},{0,33,157},{43,0,1570},{43,0,1570},{43,0,1570},{43,0,1570},{15,1,185},{15,1,185},{15,1,185},{31,21,5},{10,33,2},{10,33,2},{39,1,5341},{9,1,3965},{25,1,3462},{11,1,1889},{9,1,5505},{13,1,1765},{13,1,804},{31,33,282},{47,1,3750},{42,17,45},{5,1,1210},
+{21,1,761},{37,1,596},{23,1,73},{17,39,1878},{9,1,781},{41,1,292},{42,17,41},{39,17,1878},{42,17,41},{25,1,3462},{25,1,3462},{25,1,3462},{11,1,1889},{11,1,3058},{13,1,804},{13,1,804},{15,3,130},{44,1,1798},{12,33,17},{37,1,596},{37,1,596},{37,1,596},{23,1,73},{3,25,1152},{41,1,292},{41,1,292},{28,33,4},{29,1,1152},{28,33,4},{1,3,41},{33,1,13},{3,1,1},
+{5,1,1},{1,3,41},{3,1,41},{5,1,1},{0,17,41},{3,1,41},{0,17,41},{27,0,1570},{27,0,1570},{27,0,1570},{27,0,1570},{29,1,244},{29,1,244},{29,1,244},{15,5,1},{26,1,4},{26,1,4},{23,1,5128},{39,1,4028},{9,1,3659},{25,1,2169},{39,1,5148},{11,1,1917},{27,1,1208},{45,17,166},{29,1,3628},{28,1,8},{35,1,1089},{5,1,797},{5,1,697},{37,1,221},{3,3,1536},
+{7,1,706},{23,1,433},{44,1,0},{3,3,1536},{44,1,0},{9,1,3659},{9,1,3659},{9,1,3659},{25,1,2169},{25,1,3366},{27,1,1208},{27,1,1208},{45,17,141},{47,1,2105},{28,1,8},{5,1,697},{5,1,697},{5,1,697},{37,1,221},{19,5,1152},{23,1,433},{23,1,433},{44,1,0},{37,3,1152},{44,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},
+{0,1,0},{1,1,0},{0,1,0},{41,0,1568},{41,0,1568},{41,0,1568},{41,0,1568},{13,1,338},{13,1,338},{13,1,338},{29,19,1},{28,1,8},{28,1,8},{7,1,4416},{23,1,3545},{39,1,3251},{9,1,2027},{23,1,4372},{41,1,1771},{11,1,1097},{13,17,76},{29,1,2956},{14,1,37},{19,1,753},{35,1,557},{35,1,493},{21,1,148},{17,5,1067},{21,1,513},{7,1,289},{47,1,0},{5,17,1067},
+{47,1,0},{39,1,3251},{39,1,3251},{39,1,3251},{9,1,2027},{9,1,2819},{11,1,1097},{11,1,1097},{13,17,51},{47,1,1769},{14,1,37},{35,1,493},{35,1,493},{35,1,493},{21,1,148},{5,1,802},{7,1,289},{7,1,289},{47,1,0},{9,1,802},{47,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{25,0,1576},
+{25,0,1576},{25,0,1576},{25,0,1576},{11,1,421},{11,1,421},{11,1,421},{13,33,2},{14,1,37},{14,1,37},{21,1,3786},{7,1,3105},{7,1,2880},{23,1,1929},{7,1,3648},{9,1,1532},{25,1,1078},{27,1,20},{13,1,2452},{47,1,89},{3,1,456},{19,1,353},{19,1,317},{35,1,89},{33,33,683},{5,1,321},{21,1,193},{13,1,1},{33,33,683},{13,1,1},{7,1,2880},{7,1,2880},{7,1,2880},
+{23,1,1929},{39,1,2411},{25,1,1078},{25,1,1078},{27,1,20},{29,1,1451},{47,1,89},{19,1,317},{19,1,317},{19,1,317},{35,1,89},{3,3,512},{21,1,193},{21,1,193},{13,1,1},{3,3,512},{13,1,1},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{39,0,1570},{39,0,1570},{39,0,1570},{39,0,1570},{41,1,505},
+{41,1,505},{41,1,505},{27,17,5},{47,1,89},{47,1,89},{21,1,3210},{21,1,2729},{37,1,2571},{7,1,1825},{21,1,3015},{39,1,1388},{9,1,1027},{11,1,5},{11,1,2032},{15,1,185},{3,1,264},{3,1,189},{3,1,164},{19,1,45},{17,3,384},{35,1,164},{35,1,100},{25,1,1},{35,1,384},{25,1,1},{37,1,2571},{37,1,2571},{37,1,2571},{7,1,1825},{7,1,2112},{9,1,1027},{9,1,1027},
+{11,1,5},{13,1,1235},{15,1,185},{3,1,164},{3,1,164},{3,1,164},{19,1,45},{33,3,288},{35,1,100},{35,1,100},{25,1,1},{21,1,288},{25,1,1},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{23,0,1576},{23,0,1576},{23,0,1576},{23,0,1576},{9,1,586},{9,1,586},{9,1,586},{11,1,5},{15,1,185},
+{15,1,185},{0,37,2665},{0,45,274},{0,44,8},{0,26,1025},{0,15,5885},{0,12,3666},{0,10,1742},{0,38,4406},{0,8,6359},{0,22,4730},{0,37,2665},{0,45,274},{0,44,8},{0,26,1025},{22,4,5885},{0,12,3666},{0,10,1742},{0,38,4406},{15,0,5885},{0,38,4406},{0,28,0},{0,28,0},{0,28,0},{0,6,1},{0,22,545},{0,20,208},{0,20,208},{0,18,340},{0,18,593},{0,18,376},{0,28,0},
+{0,28,0},{0,28,0},{0,6,1},{2,2,545},{0,20,208},{0,20,208},{0,18,340},{22,0,545},{0,18,340},{28,0,2665},{0,45,274},{0,44,8},{0,26,1025},{28,0,2665},{37,0,2665},{0,26,1025},{0,24,2665},{37,0,2665},{0,24,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,3,2665},{0,43,169},{0,14,17},
+{0,12,865},{0,13,6669},{0,28,3898},{0,42,1825},{0,8,4826},{0,24,7263},{0,38,5231},{0,3,2665},{0,43,169},{0,14,17},{0,12,865},{24,2,6669},{0,28,3898},{0,42,1825},{0,8,4826},{13,0,6669},{0,8,4826},{0,46,1},{0,46,1},{0,46,1},{0,8,4},{0,8,841},{0,6,306},{0,6,306},{0,34,520},{0,34,917},{0,34,584},{0,46,1},{0,46,1},{0,46,1},{0,8,4},{4,0,841},
+{0,6,306},{0,6,306},{0,34,520},{8,0,841},{0,34,520},{44,2,2665},{0,43,169},{16,14,2},{0,12,865},{44,2,2665},{3,0,2665},{0,12,865},{0,40,2669},{3,0,2665},{0,40,2669},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,1,2669},{0,41,85},{16,46,79},{0,44,725},{0,11,7538},{0,14,4214},{0,12,1842},
+{0,8,5354},{0,10,8241},{0,8,5795},{0,1,2669},{0,41,85},{16,46,54},{0,44,725},{24,4,7538},{0,14,4214},{0,12,1842},{0,8,5354},{11,0,7538},{0,8,5354},{0,29,1},{0,29,1},{0,29,1},{0,24,4},{0,40,1201},{0,22,458},{0,22,458},{0,4,730},{0,4,1325},{0,4,830},{0,29,1},{0,29,1},{0,29,1},{0,24,4},{20,0,1201},{0,22,458},{0,22,458},{0,4,730},{40,0,1201},
+{0,4,730},{30,2,2665},{0,41,85},{32,46,5},{0,44,725},{30,2,2665},{35,2,2665},{0,44,725},{0,26,2665},{35,2,2665},{0,26,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{16,1,2799},{0,9,29},{16,31,167},{0,14,650},{0,9,8493},{0,46,4485},{0,44,1934},{0,40,5878},{0,26,9333},{0,40,6503},{16,1,2774},
+{0,9,29},{16,31,142},{0,14,650},{42,0,8493},{0,46,4485},{0,44,1934},{0,40,5878},{9,0,8493},{0,40,5878},{0,11,0},{0,11,0},{0,11,0},{0,26,1},{0,26,1625},{0,24,629},{0,24,629},{0,20,986},{0,20,1793},{0,20,1130},{0,11,0},{0,11,0},{0,11,0},{0,26,1},{36,0,1625},{0,24,629},{0,24,629},{0,20,986},{26,0,1625},{0,20,986},{2,1,2665},{0,9,29},{18,47,1},
+{0,14,650},{2,1,2665},{1,2,2665},{0,14,650},{0,42,2669},{1,2,2665},{0,42,2669},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{32,1,3171},{0,7,4},{16,15,315},{0,46,514},{0,7,9669},{0,31,4865},{0,46,2114},{0,10,6505},{0,12,10663},{0,10,7181},{32,1,3050},{0,7,4},{32,15,274},{0,46,514},{26,4,9669},
+{0,31,4865},{0,46,2114},{0,10,6505},{7,0,9669},{0,10,6505},{0,23,0},{0,23,0},{0,23,0},{0,12,1},{0,28,2178},{0,10,820},{0,10,820},{0,6,1348},{0,6,2405},{0,20,1553},{0,23,0},{0,23,0},{0,23,0},{0,12,1},{2,24,2178},{0,10,820},{0,10,820},{0,6,1348},{28,0,2178},{0,6,1348},{29,0,2665},{0,7,4},{4,31,9},{0,46,514},{29,0,2665},{17,4,2665},{0,46,514},
+{0,28,2665},{17,4,2665},{0,28,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{2,1,3529},{16,21,35},{32,29,444},{0,31,474},{0,19,9670},{0,45,4529},{0,46,1634},{0,42,6265},{0,28,10919},{0,26,7149},{18,1,3146},{16,21,10},{2,29,285},{0,31,474},{14,0,9669},{0,45,4529},{0,46,1634},{0,42,6265},{8,10,9669},
+{0,42,6265},{16,21,34},{16,21,34},{16,21,34},{16,44,34},{0,46,2180},{0,12,610},{0,12,610},{0,22,1184},{0,22,2517},{0,22,1473},{16,21,9},{16,21,9},{16,21,9},{16,44,9},{22,2,2178},{0,12,610},{0,12,610},{0,22,1184},{42,2,2178},{0,22,1184},{13,2,2665},{16,21,1},{20,45,0},{0,31,410},{13,2,2665},{3,6,2665},{0,31,410},{0,14,2677},{3,6,2665},{0,14,2677},{16,0,34},
+{16,0,34},{16,0,34},{16,0,34},{0,34,0},{0,34,0},{0,34,0},{0,32,1},{0,32,10},{0,32,10},{18,1,3971},{32,35,150},{2,43,644},{16,45,534},{0,1,9669},{0,43,4214},{0,31,1226},{0,28,5954},{0,14,11246},{0,12,6951},{4,1,3285},{2,5,4},{34,13,274},{32,15,483},{0,1,9669},{0,43,4214},{0,31,1226},{0,28,5954},{1,0,9669},{0,28,5954},{16,3,146},{16,3,146},{16,3,146},
+{16,30,147},{0,29,2180},{0,28,458},{0,28,458},{0,8,1018},{0,8,2691},{0,38,1419},{2,21,0},{2,21,0},{2,21,0},{2,14,1},{8,2,2178},{0,28,458},{0,28,458},{0,8,1018},{30,2,2178},{0,8,1018},{27,2,2665},{2,5,4},{6,29,9},{0,15,338},{27,2,2665},{35,8,2665},{0,15,338},{0,30,2665},{35,8,2665},{0,30,2665},{16,0,146},{16,0,146},{16,0,146},{16,0,146},{0,6,1},
+{0,6,1},{0,6,1},{0,18,4},{0,18,40},{0,18,40},{34,1,4603},{32,3,307},{2,11,925},{32,29,667},{16,1,9779},{0,41,3905},{0,45,913},{0,44,5653},{0,46,11530},{0,44,6878},{36,1,3390},{18,19,11},{4,27,269},{2,29,474},{18,3,9669},{0,41,3905},{0,45,913},{0,44,5653},{45,8,9669},{0,44,5653},{32,1,291},{32,1,291},{32,1,291},{32,47,291},{0,11,2178},{0,46,305},{0,46,305},
+{0,40,925},{0,10,2872},{0,8,1427},{34,5,10},{34,5,10},{34,5,10},{18,46,9},{24,4,2178},{0,46,305},{0,46,305},{0,40,925},{11,0,2178},{0,40,925},{8,1,2665},{18,19,2},{22,43,0},{0,29,265},{8,1,2665},{1,8,2665},{0,29,265},{0,46,2677},{1,8,2665},{0,46,2677},{32,0,290},{32,0,290},{32,0,290},{32,0,290},{0,24,1},{0,24,1},{0,24,1},{0,20,1},{0,34,100},
+{0,34,100},{20,1,5538},{2,17,582},{34,41,1298},{2,43,882},{2,1,10086},{0,9,3618},{0,43,581},{0,30,5418},{0,15,11905},{0,14,6895},{22,1,3586},{4,3,3},{36,11,273},{34,43,478},{15,2,9669},{0,9,3618},{0,43,581},{0,30,5418},{33,4,9669},{0,30,5418},{2,1,570},{2,1,570},{2,1,570},{2,15,549},{0,23,2178},{0,15,185},{0,15,185},{0,10,765},{0,26,3090},{0,10,1441},{4,3,2},
+{4,3,2},{4,3,2},{4,47,2},{38,10,2178},{0,15,185},{0,15,185},{0,10,765},{23,0,2178},{0,10,765},{23,0,2665},{4,3,2},{8,11,8},{0,43,181},{23,0,2665},{18,9,2665},{0,43,181},{0,31,2665},{18,9,2665},{0,31,2665},{2,0,545},{2,0,545},{2,0,545},{2,0,545},{0,28,0},{0,28,0},{0,28,0},{0,6,1},{0,20,208},{0,20,208},{36,1,6378},{18,1,926},{4,9,1734},
+{18,27,1131},{18,1,10495},{0,7,3434},{0,27,353},{0,47,5186},{0,45,12293},{0,46,6789},{8,1,3785},{20,17,10},{6,25,270},{4,27,491},{43,0,9669},{0,7,3434},{0,27,353},{0,47,5186},{19,6,9669},{0,47,5186},{18,1,922},{18,1,922},{18,1,922},{18,45,842},{0,5,2180},{0,29,106},{0,29,106},{0,42,666},{0,12,3301},{0,26,1514},{36,3,4},{36,3,4},{36,3,4},{36,15,8},{28,2,2178},
+{0,29,106},{0,29,106},{0,42,666},{42,8,2178},{0,42,666},{7,2,2665},{20,17,1},{24,41,2},{0,11,136},{7,2,2665},{2,7,2665},{0,11,136},{0,15,2669},{2,7,2665},{0,15,2669},{18,0,841},{18,0,841},{18,0,841},{18,0,841},{0,46,1},{0,46,1},{0,46,1},{0,8,4},{0,6,306},{0,6,306},{6,1,7490},{34,1,1446},{4,39,2218},{18,41,1450},{34,1,11103},{0,21,3209},{0,41,187},
+{0,31,4909},{0,43,12686},{0,31,6845},{40,1,3938},{6,1,3},{38,9,273},{36,41,478},{11,0,9669},{0,21,3209},{0,41,187},{0,31,4909},{5,8,9669},{0,31,4909},{34,1,1382},{34,1,1382},{34,1,1382},{34,13,1213},{0,17,2178},{0,27,40},{0,27,40},{0,28,544},{0,44,3603},{0,12,1541},{6,1,2},{6,1,2},{6,1,2},{6,45,2},{16,3,2178},{0,27,40},{0,27,40},{0,28,544},{17,0,2178},
+{0,28,544},{21,2,2665},{6,1,2},{10,9,8},{0,25,90},{21,2,2665},{16,5,2665},{0,25,90},{0,29,2665},{16,5,2665},{0,29,2665},{34,0,1213},{34,0,1213},{34,0,1213},{34,0,1213},{0,29,1},{0,29,1},{0,29,1},{0,24,4},{0,22,458},{0,22,458},{22,1,8710},{20,1,2145},{20,7,2826},{34,25,1850},{20,1,11913},{0,35,3073},{0,9,86},{0,15,4721},{0,41,13118},{0,15,6837},{26,1,4118},
+{38,1,29},{8,23,258},{6,25,491},{24,3,9669},{0,35,3073},{0,9,86},{0,15,4721},{3,24,9669},{0,15,4721},{4,1,1973},{4,1,1973},{4,1,1973},{34,27,1630},{16,1,2221},{0,25,10},{0,25,10},{0,44,450},{0,46,3876},{0,28,1633},{38,1,4},{38,1,4},{38,1,4},{38,29,8},{30,4,2178},{0,25,10},{0,25,10},{0,44,450},{15,8,2178},{0,44,450},{14,1,2665},{8,1,25},{26,39,1},
+{0,9,61},{14,1,2665},{4,5,2665},{0,9,61},{0,13,2669},{4,5,2665},{0,13,2669},{34,0,1629},{34,0,1629},{34,0,1629},{34,0,1629},{0,11,0},{0,11,0},{0,11,0},{0,26,1},{0,24,629},{0,24,629},{8,1,10335},{36,1,3100},{36,21,3546},{4,39,2361},{36,1,12883},{0,33,2901},{0,23,25},{0,13,4485},{0,25,13589},{0,45,6982},{42,1,4353},{24,1,117},{40,37,270},{38,39,491},{29,12,9669},
+{0,33,2901},{0,23,25},{0,13,4485},{26,13,9669},{0,13,4485},{36,1,2739},{36,1,2739},{36,1,2739},{4,41,2181},{32,1,2427},{0,39,4},{0,39,4},{0,46,353},{0,31,4242},{0,14,1830},{24,1,17},{24,1,17},{24,1,17},{8,43,2},{45,0,2178},{16,39,2},{16,39,2},{0,46,353},{27,8,2178},{0,46,353},{17,0,2665},{40,1,73},{12,7,5},{0,23,25},{17,0,2665},{18,3,2665},{0,23,25},
+{0,27,2665},{18,3,2665},{0,27,2665},{4,0,2180},{4,0,2180},{4,0,2180},{4,0,2180},{0,23,0},{0,23,0},{0,23,0},{0,12,1},{0,10,820},{0,10,820},{24,1,11582},{6,1,4137},{6,35,4199},{20,7,2845},{6,1,13958},{0,1,2826},{0,37,31},{0,43,4255},{0,39,13958},{0,43,6958},{28,1,4610},{10,1,278},{10,21,261},{8,23,481},{37,0,9669},{0,1,2825},{16,37,18},{0,43,4254},{36,9,9669},
+{0,43,4254},{36,1,3454},{36,1,3454},{36,1,3454},{20,9,2665},{18,1,2740},{16,7,26},{16,7,26},{0,47,278},{0,15,4491},{0,46,1858},{40,1,29},{40,1,29},{40,1,29},{40,27,5},{29,2,2178},{32,23,2},{32,23,2},{0,47,277},{35,6,2178},{0,47,277},{1,2,2665},{42,1,157},{28,37,2},{0,37,5},{1,2,2665},{2,1,2665},{0,37,5},{0,11,2677},{2,1,2665},{0,11,2677},{20,0,2665},
+{20,0,2665},{20,0,2665},{20,0,2665},{0,5,2},{0,5,2},{0,5,2},{0,14,5},{0,12,981},{0,12,981},{40,1,12090},{38,1,4554},{22,19,4203},{36,21,2837},{38,1,14410},{32,1,2930},{32,21,37},{0,27,4187},{0,37,13477},{0,27,6222},{14,1,4826},{42,1,465},{42,35,270},{40,7,488},{5,0,9669},{2,1,2921},{2,21,25},{0,27,4106},{0,5,9669},{0,27,4106},{22,1,3593},{22,1,3593},{22,1,3593},
+{6,39,2677},{34,1,2840},{32,21,21},{32,21,21},{16,15,277},{0,43,4186},{0,31,1450},{26,1,52},{26,1,52},{26,1,52},{10,41,2},{43,2,2178},{18,7,4},{18,7,4},{0,15,205},{21,8,2178},{0,15,205},{13,3,2665},{44,1,260},{14,5,5},{0,5,8},{13,3,2665},{20,1,2665},{0,5,8},{0,25,2665},{20,1,2665},{0,25,2665},{6,0,2677},{6,0,2677},{6,0,2677},{6,0,2677},{32,35,4},
+{32,35,4},{32,35,4},{32,30,5},{0,28,745},{0,28,745},{26,1,12542},{24,1,4990},{8,33,4178},{22,5,2845},{24,1,14719},{2,1,3162},{18,35,34},{16,41,4197},{0,5,13013},{0,41,5610},{46,1,5121},{28,1,754},{12,19,270},{26,21,484},{30,3,9669},{4,1,3110},{18,35,18},{0,41,3929},{42,9,9669},{0,41,3929},{8,1,3770},{8,1,3770},{8,1,3770},{22,7,2666},{20,1,3011},{18,5,26},{18,5,26},
+{32,45,261},{0,27,3822},{0,15,1062},{12,1,98},{12,1,98},{12,1,98},{42,25,5},{38,1,2178},{34,21,2},{34,21,2},{0,29,160},{43,28,2178},{0,29,160},{43,1,2665},{30,1,388},{30,35,2},{16,35,2},{43,1,2665},{8,1,2665},{16,35,2},{0,9,2677},{8,1,2665},{0,9,2677},{22,0,2665},{22,0,2665},{22,0,2665},{22,0,2665},{2,3,2},{2,3,2},{2,3,2},{2,46,5},{0,46,578},
+{0,46,578},{12,1,13222},{40,1,5610},{40,17,4197},{38,19,2849},{26,1,15194},{4,1,3497},{34,19,34},{32,9,4178},{0,3,12493},{0,25,4990},{47,1,5429},{14,1,1062},{44,33,261},{42,5,499},{46,1,9669},{36,1,3341},{4,19,26},{0,9,3770},{26,7,9669},{0,9,3770},{40,1,3929},{40,1,3929},{40,1,3929},{8,37,2678},{6,1,3174},{34,19,18},{34,19,18},{18,13,270},{0,25,3462},{0,29,754},{28,1,160},
+{28,1,160},{28,1,160},{12,39,5},{39,0,2178},{20,35,2},{20,35,2},{0,13,98},{9,28,2178},{0,13,98},{11,1,2665},{47,1,578},{47,3,5},{2,3,2},{11,1,2665},{42,1,2665},{2,3,2},{0,23,2665},{42,1,2665},{0,23,2665},{8,0,2677},{8,0,2677},{8,0,2677},{8,0,2677},{34,17,2},{34,17,2},{34,17,2},{34,31,2},{0,31,388},{0,31,388},{28,1,13826},{26,1,6222},{26,1,4187},
+{24,3,2835},{42,1,15614},{36,1,3886},{20,33,37},{18,23,4203},{0,17,12134},{0,39,4554},{45,1,5669},{30,1,1450},{14,17,277},{28,19,484},{31,1,9669},{38,1,3605},{20,33,21},{0,23,3593},{32,1,9669},{0,23,3593},{26,1,4106},{26,1,4106},{26,1,4106},{24,5,2665},{22,1,3378},{20,3,25},{20,3,25},{34,43,270},{0,39,3206},{0,43,465},{14,1,205},{14,1,205},{14,1,205},{44,7,5},{23,2,2178},
+{36,3,4},{36,3,4},{0,27,52},{20,9,2178},{0,27,52},{39,3,2665},{29,1,745},{31,33,5},{34,33,4},{39,3,2665},{14,1,2665},{34,33,4},{0,7,2677},{14,1,2665},{0,7,2677},{24,0,2665},{24,0,2665},{24,0,2665},{24,0,2665},{4,1,8},{4,1,8},{4,1,8},{4,15,5},{0,45,260},{0,45,260},{14,1,14322},{42,1,6958},{42,1,4255},{40,17,2837},{28,1,16150},{38,1,4422},{36,1,31},
+{34,7,4199},{0,1,11889},{0,7,4137},{29,1,6018},{47,1,1858},{46,1,278},{44,3,499},{45,1,9669},{40,1,3905},{6,17,26},{0,37,3454},{27,9,9669},{0,37,3454},{42,1,4254},{42,1,4254},{42,1,4254},{40,19,2678},{24,1,3540},{36,17,18},{36,17,18},{20,11,261},{0,37,2979},{0,11,278},{46,1,277},{46,1,277},{46,1,277},{14,37,5},{37,2,2178},{22,33,2},{22,33,2},{0,41,29},{34,7,2178},
+{0,41,29},{7,3,2665},{13,1,981},{15,1,5},{4,1,2},{7,3,2665},{15,1,2665},{4,1,2},{0,21,2665},{15,1,2665},{0,21,2665},{10,0,2677},{10,0,2677},{10,0,2677},{10,0,2677},{36,1,5},{36,1,5},{36,1,5},{36,29,2},{0,43,157},{0,43,157},{30,1,13683},{44,1,6982},{12,1,4485},{26,17,2739},{14,1,15204},{24,1,4100},{22,1,25},{20,37,3546},{0,1,10840},{0,37,3100},{13,1,5451},
+{15,1,1830},{47,1,353},{30,33,333},{39,11,8712},{26,1,3507},{38,1,4},{0,37,2739},{22,1,8712},{0,37,2739},{12,1,4485},{12,1,4485},{12,1,4485},{26,3,2665},{40,1,3736},{22,1,25},{22,1,25},{36,41,270},{0,5,2779},{0,25,117},{47,1,353},{47,1,353},{47,1,353},{46,5,5},{44,1,2178},{38,17,2},{38,17,2},{0,25,17},{26,9,2178},{0,25,17},{3,25,2178},{11,1,820},{13,1,1},
+{22,1,0},{3,25,2178},{29,1,2178},{22,1,0},{0,5,2180},{29,1,2178},{0,5,2180},{26,0,2665},{26,0,2665},{26,0,2665},{26,0,2665},{22,1,25},{22,1,25},{22,1,25},{6,13,5},{0,41,73},{0,41,73},{47,1,12750},{14,1,6837},{14,1,4721},{12,1,2694},{30,1,14061},{40,1,3663},{8,1,86},{6,21,2826},{2,1,9775},{0,21,2145},{11,1,4689},{29,1,1633},{45,1,450},{47,17,195},{17,10,7578},
+{44,1,2961},{24,1,10},{0,5,1973},{24,1,7578},{0,5,1973},{14,1,4721},{14,1,4721},{14,1,4721},{12,17,2673},{26,1,3965},{8,1,86},{8,1,86},{22,9,258},{0,3,2571},{0,39,29},{45,1,450},{45,1,450},{45,1,450},{47,19,5},{31,5,2178},{24,1,10},{24,1,10},{0,39,4},{14,9,2178},{0,39,4},{37,1,1625},{25,1,629},{27,1,1},{10,1,0},{37,1,1625},{27,1,1625},{10,1,0},
+{0,35,1629},{27,1,1625},{0,35,1629},{12,0,2669},{12,0,2669},{12,0,2669},{12,0,2669},{8,1,61},{8,1,61},{8,1,61},{38,27,1},{0,9,25},{0,9,25},{15,1,12134},{30,1,6845},{30,1,4909},{28,1,2666},{47,1,13165},{26,1,3426},{40,1,187},{38,5,2218},{2,1,9023},{0,35,1446},{11,1,4097},{13,1,1541},{29,1,544},{31,17,90},{39,7,6661},{14,1,2525},{26,1,40},{0,35,1382},{26,1,6661},
+{0,35,1382},{30,1,4909},{30,1,4909},{30,1,4909},{28,1,2666},{12,1,4230},{40,1,187},{40,1,187},{8,39,273},{0,17,2453},{0,7,3},{29,1,544},{29,1,544},{29,1,544},{15,3,5},{17,2,2178},{26,1,40},{26,1,40},{0,7,2},{16,1,2178},{0,7,2},{21,1,1201},{23,1,458},{25,1,4},{28,1,1},{21,1,1201},{41,1,1201},{28,1,1},{0,35,1213},{41,1,1201},{0,35,1213},{28,0,2665},
+{28,0,2665},{28,0,2665},{28,0,2665},{24,1,90},{24,1,90},{24,1,90},{8,11,8},{0,7,2},{0,7,2},{45,1,11330},{47,1,6789},{46,1,5186},{14,1,2694},{47,1,12365},{42,1,3246},{26,1,353},{8,5,1734},{20,1,8393},{0,19,926},{25,1,3614},{27,1,1514},{43,1,666},{45,17,35},{9,3,5829},{47,1,2177},{28,1,106},{0,19,922},{31,3,5829},{0,19,922},{46,1,5186},{46,1,5186},{46,1,5186},
+{14,1,2694},{14,1,4504},{26,1,353},{26,1,353},{24,7,270},{16,1,2450},{16,21,10},{43,1,666},{43,1,666},{43,1,666},{45,33,2},{29,3,2178},{28,1,106},{28,1,106},{2,37,4},{43,9,2178},{2,37,4},{5,1,841},{7,1,306},{9,1,4},{47,1,1},{5,1,841},{9,1,841},{47,1,1},{0,19,841},{9,1,841},{0,19,841},{14,0,2669},{14,0,2669},{14,0,2669},{14,0,2669},{10,1,136},
+{10,1,136},{10,1,136},{40,25,2},{16,21,1},{16,21,1},{29,1,10834},{15,1,6895},{31,1,5418},{30,1,2786},{45,1,11530},{44,1,3154},{42,1,581},{40,35,1298},{36,1,7857},{16,3,582},{9,1,3105},{11,1,1441},{11,1,765},{13,1,8},{37,7,5082},{47,1,1905},{14,1,185},{0,3,570},{30,1,5082},{0,3,570},{31,1,5418},{31,1,5418},{31,1,5418},{30,1,2786},{30,1,4724},{42,1,581},{42,1,581},
+{10,37,273},{2,1,2587},{2,5,3},{11,1,765},{11,1,765},{11,1,765},{13,1,8},{39,11,2178},{14,1,185},{14,1,185},{2,5,2},{22,1,2178},{2,5,2},{3,3,545},{21,1,208},{7,1,1},{29,1,0},{3,3,545},{23,1,545},{29,1,0},{0,3,545},{23,1,545},{0,3,545},{30,0,2665},{30,0,2665},{30,0,2665},{30,0,2665},{42,1,181},{42,1,181},{42,1,181},{10,9,8},{2,5,2},
+{2,5,2},{13,1,10311},{45,1,6878},{45,1,5653},{47,1,2933},{29,1,10827},{14,1,3066},{44,1,913},{10,3,925},{8,1,7297},{2,33,307},{39,1,2707},{9,1,1427},{41,1,925},{27,1,32},{23,17,4344},{29,1,1611},{47,1,305},{0,33,291},{45,17,4344},{0,33,291},{45,1,5653},{45,1,5653},{45,1,5653},{47,1,2933},{47,1,5051},{44,1,913},{44,1,913},{26,5,269},{4,1,2859},{18,19,11},{41,1,925},
+{41,1,925},{41,1,925},{27,1,32},{25,5,2178},{47,1,305},{47,1,305},{18,19,10},{10,1,2178},{18,19,10},{33,3,288},{35,1,100},{21,1,1},{25,1,1},{33,3,288},{21,1,288},{25,1,1},{0,33,290},{21,1,288},{0,33,290},{47,0,2677},{47,0,2677},{47,0,2677},{47,0,2677},{28,1,265},{28,1,265},{28,1,265},{42,23,0},{18,19,2},{18,19,2},{11,1,9837},{13,1,6951},{29,1,5954},
+{15,1,3166},{13,1,10279},{47,1,3138},{30,1,1226},{42,3,644},{40,1,6929},{34,33,150},{7,1,2436},{39,1,1419},{9,1,1018},{11,1,101},{3,25,3779},{13,1,1475},{29,1,458},{2,17,146},{29,1,3779},{2,17,146},{29,1,5954},{29,1,5954},{29,1,5954},{15,1,3166},{31,1,5396},{30,1,1226},{30,1,1226},{12,35,274},{36,1,3147},{4,3,4},{9,1,1018},{9,1,1018},{9,1,1018},{11,1,101},{9,3,2178},
+{29,1,458},{29,1,458},{20,3,0},{31,3,2178},{20,3,0},{17,3,128},{19,1,40},{19,1,4},{7,1,1},{17,3,128},{35,1,128},{7,1,1},{0,17,146},{35,1,128},{0,17,146},{31,0,2665},{31,0,2665},{31,0,2665},{31,0,2665},{14,1,338},{14,1,338},{14,1,338},{28,7,9},{4,3,4},{4,3,4},{11,1,9437},{27,1,7149},{43,1,6265},{29,1,3402},{11,1,9788},{47,1,3234},{47,1,1634},
+{28,33,444},{42,1,6719},{20,17,35},{21,1,2210},{23,1,1473},{23,1,1184},{25,1,241},{21,17,3299},{11,1,1400},{13,1,610},{20,17,34},{17,21,3299},{20,17,34},{43,1,6265},{43,1,6265},{43,1,6265},{29,1,3402},{45,1,5621},{47,1,1634},{47,1,1634},{28,3,285},{8,1,3421},{20,17,10},{23,1,1184},{23,1,1184},{23,1,1184},{25,1,241},{23,3,2178},{13,1,610},{13,1,610},{20,17,9},{43,3,2178},
+{20,17,9},{17,1,34},{33,1,10},{33,1,1},{35,1,0},{17,1,34},{3,1,34},{35,1,0},{0,17,34},{3,1,34},{0,17,34},{15,0,2677},{15,0,2677},{15,0,2677},{15,0,2677},{30,1,410},{30,1,410},{30,1,410},{44,21,0},{20,17,1},{20,17,1},{9,1,9175},{11,1,7181},{11,1,6505},{13,1,3686},{11,1,9340},{45,1,3447},{47,1,2114},{14,17,315},{44,1,6532},{6,1,4},{21,1,2034},
+{21,1,1553},{7,1,1348},{39,1,410},{17,39,2904},{9,1,1411},{11,1,820},{22,1,0},{39,17,2904},{22,1,0},{11,1,6505},{11,1,6505},{11,1,6505},{13,1,3686},{13,1,5990},{47,1,2114},{47,1,2114},{14,33,274},{40,1,3789},{6,1,4},{7,1,1348},{7,1,1348},{7,1,1348},{39,1,410},{3,25,2178},{11,1,820},{11,1,820},{22,1,0},{29,1,2178},{22,1,0},{1,1,0},{1,1,0},{1,1,0},
+{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{29,0,2665},{29,0,2665},{29,0,2665},{29,0,2665},{47,1,514},{47,1,514},{47,1,514},{30,5,9},{6,1,4},{6,1,4},{9,1,7987},{41,1,6503},{41,1,5878},{27,1,3561},{25,1,8118},{29,1,3051},{45,1,1934},{30,17,167},{30,1,5562},{8,1,29},{5,1,1507},{21,1,1130},{21,1,986},{23,1,298},{19,3,2166},
+{23,1,1019},{25,1,629},{10,1,0},{39,1,2166},{10,1,0},{41,1,5878},{41,1,5878},{41,1,5878},{27,1,3561},{27,1,5301},{45,1,1934},{45,1,1934},{30,17,142},{42,1,3266},{8,1,29},{21,1,986},{21,1,986},{21,1,986},{23,1,298},{37,1,1625},{25,1,629},{25,1,629},{10,1,0},{27,1,1625},{10,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},
+{0,1,0},{1,1,0},{0,1,0},{43,0,2669},{43,0,2669},{43,0,2669},{43,0,2669},{15,1,650},{15,1,650},{15,1,650},{46,19,1},{8,1,29},{8,1,29},{39,1,7111},{9,1,5795},{9,1,5354},{11,1,3381},{9,1,6983},{13,1,2803},{13,1,1842},{47,17,79},{47,1,4802},{40,1,85},{35,1,1132},{5,1,830},{5,1,730},{7,1,226},{3,3,1601},{7,1,739},{23,1,458},{28,1,1},{3,3,1601},
+{28,1,1},{9,1,5354},{9,1,5354},{9,1,5354},{11,1,3381},{11,1,4622},{13,1,1842},{13,1,1842},{47,17,54},{44,1,2834},{40,1,85},{5,1,730},{5,1,730},{5,1,730},{7,1,226},{21,1,1201},{23,1,458},{23,1,458},{28,1,1},{41,1,1201},{28,1,1},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{27,0,2665},
+{27,0,2665},{27,0,2665},{27,0,2665},{45,1,725},{45,1,725},{45,1,725},{47,33,5},{40,1,85},{40,1,85},{23,1,6361},{39,1,5231},{9,1,4826},{41,1,3294},{39,1,6071},{11,1,2610},{43,1,1825},{15,1,17},{47,1,4162},{42,1,169},{19,1,792},{35,1,584},{35,1,520},{21,1,153},{19,1,1121},{7,1,531},{7,1,306},{47,1,1},{37,1,1121},{47,1,1},{9,1,4826},{9,1,4826},{9,1,4826},
+{41,1,3294},{41,1,4145},{43,1,1825},{43,1,1825},{15,1,17},{30,1,2474},{42,1,169},{35,1,520},{35,1,520},{35,1,520},{21,1,153},{5,1,841},{7,1,306},{7,1,306},{47,1,1},{9,1,841},{47,1,1},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{41,0,2669},{41,0,2669},{41,0,2669},{41,0,2669},{13,1,865},
+{13,1,865},{13,1,865},{15,17,2},{42,1,169},{42,1,169},{7,1,5637},{23,1,4730},{39,1,4406},{9,1,3146},{39,1,5287},{11,1,2418},{11,1,1742},{45,1,8},{29,1,3547},{44,1,274},{3,1,489},{19,1,376},{19,1,340},{35,1,100},{33,33,726},{5,1,344},{21,1,208},{29,1,0},{33,33,726},{29,1,0},{39,1,4406},{39,1,4406},{39,1,4406},{9,1,3146},{9,1,3630},{11,1,1742},{11,1,1742},
+{45,1,8},{47,1,2178},{44,1,274},{19,1,340},{19,1,340},{19,1,340},{35,1,100},{3,3,545},{21,1,208},{21,1,208},{29,1,0},{23,1,545},{29,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{25,0,2665},{25,0,2665},{25,0,2665},{25,0,2665},{27,1,1025},{27,1,1025},{27,1,1025},{45,1,8},{44,1,274},
+{44,1,274},{36,1,50644},{0,1,2121},{0,25,169},{0,41,4591},{20,1,59804},{0,3,19310},{0,11,7401},{0,43,24008},{0,9,65535},{0,15,40741},{18,1,10267},{0,3,1445},{0,41,137},{0,15,3985},{42,6,18065},{0,45,12064},{0,31,6081},{0,12,14121},{19,0,18065},{0,12,14121},{0,15,1},{0,15,1},{0,15,1},{0,24,0},{0,24,1105},{0,22,410},{0,22,410},{0,4,666},{0,4,1217},{0,4,766},{0,15,1},
+{0,15,1},{0,15,1},{0,24,0},{34,2,1105},{0,22,410},{0,22,410},{0,4,666},{24,0,1105},{0,4,666},{43,2,9248},{0,3,1445},{0,41,137},{0,15,3985},{43,2,9248},{21,8,9248},{0,15,3985},{0,14,9256},{21,8,9248},{0,14,9256},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{36,1,53600},{16,1,2998},{0,9,44},
+{0,25,3941},{36,1,62123},{0,1,18506},{0,25,6750},{0,27,23131},{0,39,65535},{0,29,40569},{34,1,10859},{0,1,1241},{0,9,50},{0,13,3690},{16,3,19334},{0,43,12449},{0,15,6117},{0,28,14809},{17,0,19334},{0,28,14809},{0,27,0},{0,27,0},{0,27,0},{0,10,1},{0,26,1513},{0,8,585},{0,8,585},{0,20,914},{0,20,1669},{0,20,1058},{0,27,0},{0,27,0},{0,27,0},{0,10,1},{34,4,1513},
+{0,8,585},{0,8,585},{0,20,914},{26,0,1513},{0,20,914},{38,1,9248},{0,1,1241},{0,9,50},{0,13,3690},{38,1,9248},{43,28,9248},{0,13,3690},{0,46,9250},{43,28,9248},{0,46,9250},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{36,1,56716},{16,1,4497},{0,23,30},{0,39,3500},{36,1,64625},{0,1,18101},{0,9,6313},
+{0,11,22459},{0,37,65535},{0,43,39632},{4,1,11624},{0,1,1225},{0,39,10},{0,27,3400},{16,1,20689},{0,41,12854},{0,43,6221},{0,44,15490},{19,2,20689},{0,44,15490},{0,9,1},{0,9,1},{0,9,1},{0,42,1},{0,12,1985},{0,10,757},{0,10,757},{0,36,1241},{0,36,2193},{0,20,1394},{0,9,1},{0,9,1},{0,9,1},{0,42,1},{6,0,1985},{0,10,757},{0,10,757},{0,36,1241},{12,0,1985},
+{0,36,1241},{24,1,9248},{0,1,1225},{0,39,10},{0,27,3400},{24,1,9248},{32,9,9248},{0,27,3400},{0,47,9248},{32,9,9248},{0,47,9248},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{6,1,58324},{32,1,6344},{0,7,6},{0,23,2873},{36,1,65535},{16,1,17968},{0,23,5727},{0,25,21128},{0,21,63585},{0,27,38087},{36,1,12449},
+{16,1,1437},{0,7,9},{0,41,3185},{47,2,22129},{0,25,13298},{0,43,6189},{0,44,16354},{17,2,22129},{0,44,16354},{0,21,0},{0,21,0},{0,21,0},{0,44,1},{0,44,2521},{0,26,953},{0,26,953},{0,6,1553},{0,6,2770},{0,6,1778},{0,21,0},{0,21,0},{0,21,0},{0,44,1},{22,0,2521},{0,26,953},{0,26,953},{0,6,1553},{44,0,2521},{0,6,1553},{7,0,9248},{16,1,1412},{16,7,0},
+{0,41,3185},{7,0,9248},{0,7,9248},{0,41,3185},{0,15,9266},{0,7,9248},{0,15,9266},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{22,1,58878},{32,1,8497},{0,21,52},{0,37,2302},{22,1,65535},{16,1,18091},{0,37,5062},{0,9,19646},{0,5,60796},{0,41,35977},{36,1,13547},{16,1,1923},{16,21,62},{0,25,2897},{45,0,23851},
+{0,39,13856},{0,27,6323},{0,46,17289},{27,8,23851},{0,46,17289},{0,33,0},{0,33,0},{0,33,0},{0,30,0},{0,46,3200},{0,12,1186},{0,12,1186},{0,38,1962},{0,22,3521},{0,22,2261},{0,33,0},{0,33,0},{0,33,0},{0,30,0},{18,10,3200},{0,12,1186},{0,12,1186},{0,38,1962},{46,0,3200},{0,38,1962},{37,2,9248},{2,1,1717},{2,21,5},{0,25,2897},{37,2,9248},{34,7,9248},{0,25,2897},
+{0,45,9250},{34,7,9248},{0,45,9250},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{22,1,59528},{2,1,10468},{0,5,122},{0,21,1965},{22,1,65535},{16,1,18728},{0,21,4602},{0,23,18427},{0,3,58418},{0,25,34396},{6,1,14752},{2,1,2501},{16,35,141},{0,39,2720},{45,2,25472},{0,37,14401},{0,41,6413},{0,46,18185},{25,8,25472},
+{0,46,18185},{0,1,16},{0,1,16},{0,1,16},{0,47,0},{0,31,3872},{0,28,1450},{0,28,1450},{0,38,2362},{0,38,4283},{0,38,2723},{0,1,16},{0,1,16},{0,1,16},{0,47,0},{36,6,3872},{0,28,1450},{0,28,1450},{0,38,2362},{31,0,3872},{0,38,2362},{44,1,9248},{20,1,2041},{18,5,2},{0,39,2720},{44,1,9248},{26,9,9248},{0,39,2720},{0,13,9256},{26,9,9248},{0,13,9256},{0,0,0},
+{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{24,1,60070},{2,1,12544},{0,19,188},{0,35,1604},{22,1,65535},{32,1,19503},{0,5,4081},{0,7,17117},{0,3,56204},{0,25,32856},{22,1,15824},{2,1,3225},{32,19,229},{0,7,2478},{46,10,26744},{0,21,14657},{0,25,6357},{0,31,18737},{23,8,26744},{0,31,18737},{16,1,115},{16,1,115},{16,1,115},
+{0,15,5},{0,29,4420},{0,46,1613},{0,46,1613},{0,8,2642},{0,8,4931},{0,8,3083},{16,1,90},{16,1,90},{16,1,90},{0,15,5},{8,2,4418},{0,46,1613},{0,46,1613},{0,8,2642},{30,2,4418},{0,8,2642},{33,0,9248},{36,1,2405},{34,19,5},{0,7,2474},{33,0,9248},{32,3,9248},{0,7,2474},{0,43,9250},{32,3,9248},{0,43,9250},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,16,1},
+{0,16,1},{0,16,1},{0,16,1},{0,16,2},{0,16,2},{24,1,60699},{2,1,14864},{16,3,314},{0,19,1400},{24,1,65535},{2,1,20230},{0,19,3323},{0,37,15488},{0,17,54456},{0,39,31364},{24,1,16210},{4,1,3849},{2,33,221},{0,37,2328},{38,3,26744},{0,3,14114},{0,23,5618},{0,15,18273},{35,8,26744},{0,15,18273},{32,1,291},{32,1,291},{32,1,291},{16,29,50},{0,11,4418},{0,31,1325},{0,31,1325},
+{0,10,2465},{0,10,5112},{0,8,3051},{2,1,136},{2,1,136},{2,1,136},{32,29,5},{24,4,4418},{0,31,1325},{0,31,1325},{0,10,2465},{11,0,4418},{0,10,2465},{47,1,9248},{8,1,2738},{20,3,1},{0,37,2228},{47,1,9248},{31,9,9248},{0,37,2228},{0,11,9256},{31,9,9248},{0,11,9256},{16,0,50},{16,0,50},{16,0,50},{16,0,50},{0,4,0},{0,4,0},{0,4,0},{0,2,0},{0,32,18},
+{0,32,18},{24,1,61549},{2,1,17597},{16,17,476},{0,33,1268},{24,1,65535},{2,1,21346},{0,33,2615},{0,5,13900},{0,17,52724},{0,37,29656},{40,1,16729},{36,1,4545},{34,1,221},{32,21,2384},{8,1,26744},{0,3,13529},{0,37,4710},{0,29,17819},{1,8,26744},{0,29,17819},{2,1,626},{2,1,626},{2,1,626},{32,43,185},{0,23,4418},{0,45,1037},{0,45,1037},{0,10,2249},{0,26,5330},{0,10,2925},{34,1,185},
+{34,1,185},{34,1,185},{18,13,4},{38,10,4418},{0,45,1037},{0,45,1037},{0,10,2249},{23,0,4418},{0,10,2249},{29,3,9248},{40,1,3188},{6,17,2},{0,35,2041},{29,3,9248},{43,9,9248},{0,35,2041},{0,41,9256},{43,9,9248},{0,41,9256},{32,0,185},{32,0,185},{32,0,185},{32,0,185},{0,38,0},{0,38,0},{0,38,0},{0,34,1},{0,18,61},{0,18,61},{40,1,62083},{4,1,19345},{16,1,697},
+{0,17,1188},{24,1,65535},{2,1,22086},{0,17,2057},{0,35,12551},{0,1,51532},{0,37,28158},{26,1,16691},{36,1,5081},{20,1,265},{32,35,2281},{29,12,26259},{0,17,12803},{0,5,3981},{0,43,16952},{26,13,26259},{0,43,16952},{18,1,1006},{18,1,1006},{18,1,1006},{2,11,378},{0,5,4420},{0,43,820},{0,43,820},{0,12,2020},{0,12,5541},{0,42,2966},{20,1,265},{20,1,265},{20,1,265},{34,27,1},{28,2,4418},
+{0,43,820},{0,43,820},{0,12,2020},{42,8,4418},{0,12,2020},{43,3,8978},{42,1,3434},{22,1,1},{0,19,1737},{43,3,8978},{45,7,8978},{0,19,1737},{0,9,8986},{45,7,8978},{0,9,8986},{2,0,377},{2,0,377},{2,0,377},{2,0,377},{0,10,1},{0,10,1},{0,10,1},{0,20,4},{0,4,136},{0,4,136},{40,1,62361},{4,1,19653},{16,1,1095},{0,17,1126},{24,1,65535},{2,1,21601},{0,17,1502},
+{0,19,11253},{0,1,50904},{0,37,27226},{42,1,15419},{38,1,4708},{36,1,320},{18,19,1862},{23,2,24371},{0,1,11221},{0,35,2905},{0,27,15080},{20,9,24371},{0,27,15080},{34,1,1522},{34,1,1522},{34,1,1522},{2,25,618},{0,17,4418},{0,41,610},{0,41,610},{0,44,1810},{0,44,5843},{0,12,2885},{36,1,320},{36,1,320},{36,1,320},{20,11,9},{16,3,4418},{0,41,610},{0,41,610},{0,44,1810},{17,0,4418},
+{0,44,1810},{27,3,7938},{44,1,3033},{8,1,4},{0,19,1225},{27,3,7938},{39,9,7938},{0,19,1225},{0,9,7946},{39,9,7938},{0,9,7946},{2,0,617},{2,0,617},{2,0,617},{2,0,617},{0,44,0},{0,44,0},{0,44,0},{0,22,1},{0,36,232},{0,36,232},{40,1,62690},{4,1,20049},{32,1,1585},{16,17,1131},{40,1,65535},{2,1,21187},{0,17,1035},{0,19,10004},{0,1,50320},{0,37,26351},{28,1,14260},
+{24,1,4365},{22,1,410},{4,19,1523},{47,11,22568},{0,1,9893},{0,19,1997},{0,27,13320},{22,9,22568},{0,27,13320},{4,1,2169},{4,1,2169},{4,1,2169},{18,9,929},{16,1,4461},{0,9,442},{0,9,442},{0,14,1665},{0,46,6116},{0,44,2907},{22,1,410},{22,1,410},{22,1,410},{36,25,1},{30,4,4418},{0,9,442},{0,9,442},{0,14,1665},{15,8,4418},{0,14,1665},{11,3,6962},{14,1,2645},{10,1,1},
+{0,3,832},{11,3,6962},{44,3,6962},{0,3,832},{0,39,6962},{44,3,6962},{0,39,6962},{18,0,925},{18,0,925},{18,0,925},{18,0,925},{0,47,0},{0,47,0},{0,47,0},{0,8,0},{0,22,338},{0,22,338},{40,1,63078},{4,1,20586},{32,1,2208},{16,1,1221},{40,1,65535},{2,1,20797},{0,17,616},{0,19,8676},{0,1,49684},{0,37,25424},{14,1,12942},{40,1,4061},{8,1,530},{36,19,1147},{21,2,20642},
+{16,1,8678},{0,3,1157},{0,41,11489},{24,9,20642},{0,41,11489},{20,1,3009},{20,1,3009},{20,1,3009},{34,7,1358},{32,1,4667},{0,7,305},{0,7,305},{0,46,1445},{0,31,6482},{0,14,3034},{8,1,530},{8,1,530},{8,1,530},{22,9,5},{45,0,4418},{0,7,305},{0,7,305},{0,46,1445},{27,8,4418},{0,46,1445},{11,1,5941},{30,1,2260},{42,1,0},{0,33,445},{11,1,5941},{26,1,5941},{0,33,445},
+{0,23,5941},{26,1,5941},{0,23,5941},{34,0,1354},{34,0,1354},{34,0,1354},{34,0,1354},{0,13,0},{0,13,0},{0,13,0},{0,40,1},{0,8,522},{0,8,522},{40,1,63433},{4,1,21145},{32,1,2873},{16,1,1404},{40,1,65535},{2,1,20517},{0,1,339},{0,19,7570},{0,1,49136},{0,37,24652},{30,1,11862},{26,1,3845},{40,1,617},{22,3,868},{44,1,19021},{2,1,7769},{0,33,621},{0,25,9957},{26,9,19021},
+{0,25,9957},{36,1,3819},{36,1,3819},{36,1,3819},{4,37,1809},{2,1,5012},{0,5,185},{0,5,185},{0,31,1285},{0,15,6822},{0,46,3029},{40,1,617},{40,1,617},{40,1,617},{38,23,2},{29,2,4418},{0,5,185},{0,5,185},{0,31,1285},{35,6,4418},{0,31,1285},{41,1,5101},{47,1,1924},{28,1,1},{0,17,221},{41,1,5101},{28,1,5101},{0,17,221},{0,7,5113},{28,1,5101},{0,7,5113},{4,0,1808},
+{4,0,1808},{4,0,1808},{4,0,1808},{0,25,0},{0,25,0},{0,25,0},{0,26,4},{0,24,698},{0,24,698},{26,1,63733},{4,1,21777},{32,1,3641},{16,1,1687},{40,1,65535},{2,1,20303},{0,1,133},{0,3,6539},{0,1,48607},{0,37,23935},{30,1,10886},{42,1,3641},{26,1,724},{8,33,659},{46,3,17485},{2,1,6985},{0,17,257},{0,25,8565},{16,3,17485},{0,25,8565},{6,1,4820},{6,1,4820},{6,1,4820},
+{4,5,2324},{18,1,5437},{0,3,101},{0,3,101},{0,15,1129},{0,45,7234},{0,31,3141},{26,1,724},{26,1,724},{26,1,724},{24,7,4},{43,2,4418},{0,3,101},{0,3,101},{0,15,1129},{21,8,4418},{0,15,1129},{25,1,4325},{15,1,1658},{14,1,4},{0,17,61},{25,1,4325},{14,1,4325},{0,17,61},{0,7,4329},{14,1,4325},{0,7,4329},{4,0,2320},{4,0,2320},{4,0,2320},{4,0,2320},{0,7,0},
+{0,7,0},{0,7,0},{0,28,1},{0,10,872},{0,10,872},{26,1,63992},{4,1,22482},{2,1,4507},{32,1,2059},{40,1,65535},{2,1,20157},{0,1,26},{0,3,5537},{0,1,48100},{0,21,23272},{47,1,9918},{28,1,3518},{42,1,832},{24,33,446},{46,1,16034},{4,1,6314},{0,1,65},{0,39,7293},{26,7,16034},{0,39,7293},{22,1,5900},{22,1,5900},{22,1,5900},{20,19,2888},{34,1,6029},{0,17,40},{0,17,40},
+{0,29,1000},{0,43,7619},{0,15,3261},{42,1,832},{42,1,832},{42,1,832},{40,21,1},{38,1,4418},{0,17,40},{0,17,40},{0,29,1000},{43,28,4418},{0,29,1000},{9,1,3613},{29,1,1345},{47,1,4},{0,1,1},{9,1,3613},{46,1,3613},{0,1,1},{0,37,3613},{46,1,3613},{0,37,3613},{20,0,2888},{20,0,2888},{20,0,2888},{20,0,2888},{0,19,1},{0,19,1},{0,19,1},{0,14,0},{0,12,1082},
+{0,12,1082},{26,1,64289},{20,1,23310},{2,1,5546},{32,1,2553},{40,1,65535},{2,1,20076},{0,1,26},{0,3,4514},{0,1,47560},{0,5,22518},{31,1,9017},{14,1,3261},{28,1,1000},{10,33,281},{17,2,14504},{36,1,5594},{16,1,40},{0,23,5900},{16,1,14504},{0,23,5900},{38,1,7293},{38,1,7293},{38,1,7293},{36,33,3614},{4,1,6900},{0,1,65},{0,1,65},{0,43,832},{0,41,8070},{0,29,3518},{28,1,1000},
+{28,1,1000},{28,1,1000},{26,5,2},{39,0,4418},{16,1,40},{16,1,40},{0,43,832},{9,28,4418},{0,43,832},{23,17,2888},{13,1,1082},{15,1,0},{18,1,1},{23,17,2888},{45,17,2888},{18,1,1},{0,21,2888},{45,17,2888},{0,21,2888},{36,0,3613},{36,0,3613},{36,0,3613},{36,0,3613},{0,1,1},{0,1,1},{0,1,1},{0,46,4},{0,28,1345},{0,28,1345},{26,1,64605},{36,1,24062},{2,1,6574},
+{32,1,3098},{26,1,65535},{2,1,20094},{0,1,133},{0,33,3661},{0,1,47145},{0,5,21893},{45,1,8116},{30,1,3141},{14,1,1129},{42,17,147},{43,7,13235},{38,1,5012},{2,1,101},{0,7,4820},{18,1,13235},{0,7,4820},{24,1,8565},{24,1,8565},{24,1,8565},{6,1,4329},{36,1,7725},{16,1,257},{16,1,257},{0,27,724},{0,25,8530},{0,43,3641},{14,1,1129},{14,1,1129},{14,1,1129},{42,19,2},{23,2,4418},
+{2,1,101},{2,1,101},{0,27,724},{20,9,4418},{0,27,724},{37,3,2312},{11,1,872},{29,1,1},{6,1,0},{37,3,2312},{21,5,2312},{6,1,0},{0,5,2320},{21,5,2312},{0,5,2320},{6,0,4329},{6,0,4329},{6,0,4329},{6,0,4329},{16,1,61},{16,1,61},{16,1,61},{0,15,4},{0,14,1658},{0,14,1658},{26,1,64960},{36,1,24888},{18,1,7643},{32,1,3742},{26,1,65535},{4,1,20161},{0,1,342},
+{0,33,2900},{0,1,46786},{0,5,21347},{29,1,7443},{47,1,3029},{30,1,1285},{12,17,66},{17,6,12051},{24,1,4500},{4,1,185},{0,37,3819},{20,1,12051},{0,37,3819},{24,1,9957},{24,1,9957},{24,1,9957},{22,1,5161},{36,1,8717},{32,1,621},{32,1,621},{0,41,617},{0,39,9026},{0,27,3845},{30,1,1285},{30,1,1285},{30,1,1285},{28,3,4},{37,2,4418},{4,1,185},{4,1,185},{0,41,617},{34,7,4418},
+{0,41,617},{21,3,1800},{25,1,698},{27,1,4},{24,1,0},{21,3,1800},{43,1,1800},{24,1,0},{0,5,1808},{43,1,1800},{0,5,1808},{6,0,5113},{6,0,5113},{6,0,5113},{6,0,5113},{16,1,221},{16,1,221},{16,1,221},{0,29,1},{0,46,1924},{0,46,1924},{26,1,65314},{36,1,25774},{18,1,8796},{32,1,4480},{26,1,65535},{4,1,20229},{16,1,625},{0,33,2238},{0,1,46456},{0,5,20870},{13,1,6795},
+{15,1,3034},{47,1,1445},{44,1,17},{39,11,10952},{40,1,4076},{6,1,305},{0,21,3009},{22,1,10952},{0,21,3009},{40,1,11489},{40,1,11489},{40,1,11489},{38,1,6125},{6,1,9922},{2,1,1157},{2,1,1157},{0,9,530},{0,37,9571},{0,41,4061},{47,1,1445},{47,1,1445},{47,1,1445},{44,17,2},{44,1,4418},{6,1,305},{6,1,305},{0,9,530},{26,9,4418},{0,9,530},{5,3,1352},{9,1,522},{41,1,1},
+{12,1,0},{5,3,1352},{11,1,1352},{12,1,0},{0,35,1354},{11,1,1352},{0,35,1354},{22,0,5941},{22,0,5941},{22,0,5941},{22,0,5941},{32,1,445},{32,1,445},{32,1,445},{0,43,0},{0,31,2260},{0,31,2260},{26,1,65535},{36,1,26766},{18,1,10162},{2,1,5359},{26,1,65359},{4,1,20334},{16,1,1051},{0,33,1610},{0,1,45998},{0,5,20364},{11,1,6173},{45,1,2907},{15,1,1665},{30,1,2},{17,10,9818},
+{42,1,3693},{8,1,442},{0,5,2169},{24,1,9818},{0,5,2169},{26,1,13320},{26,1,13320},{26,1,13320},{8,1,7395},{22,1,11384},{18,1,1997},{18,1,1997},{0,23,410},{0,21,10181},{0,25,4365},{15,1,1665},{15,1,1665},{15,1,1665},{30,1,2},{31,5,4418},{8,1,442},{8,1,442},{0,23,410},{14,9,4418},{0,23,410},{5,1,925},{23,1,338},{9,1,0},{46,1,0},{5,1,925},{9,1,925},{46,1,0},
+{0,19,925},{9,1,925},{0,19,925},{38,0,6962},{38,0,6962},{38,0,6962},{38,0,6962},{2,1,832},{2,1,832},{2,1,832},{0,11,1},{0,15,2645},{0,15,2645},{26,1,65535},{36,1,27616},{18,1,11415},{2,1,6203},{26,1,65014},{4,1,20439},{16,1,1524},{0,17,1111},{0,1,45494},{0,5,19935},{11,1,5581},{13,1,2885},{45,1,1810},{47,1,50},{39,7,8901},{44,1,3373},{40,1,610},{0,35,1522},{26,1,8901},
+{0,35,1522},{26,1,15080},{26,1,15080},{26,1,15080},{24,1,8661},{24,1,12846},{34,1,2905},{34,1,2905},{0,37,320},{0,3,10790},{0,39,4708},{45,1,1810},{45,1,1810},{45,1,1810},{47,1,50},{17,2,4418},{40,1,610},{40,1,610},{0,37,320},{16,1,4418},{0,37,320},{35,1,613},{37,1,232},{23,1,1},{45,1,0},{35,1,613},{23,1,613},{45,1,0},{0,3,617},{23,1,613},{0,3,617},{8,0,7946},
+{8,0,7946},{8,0,7946},{8,0,7946},{18,1,1225},{18,1,1225},{18,1,1225},{0,9,4},{0,45,3033},{0,45,3033},{26,1,65535},{36,1,28505},{34,1,12706},{2,1,7117},{26,1,64677},{4,1,20609},{16,1,2082},{0,17,705},{0,1,45031},{0,5,19583},{41,1,5202},{43,1,2966},{13,1,2020},{31,1,148},{9,3,8069},{30,1,3125},{42,1,820},{0,19,1006},{31,3,8069},{0,19,1006},{42,1,16952},{42,1,16952},{42,1,16952},
+{24,1,10085},{24,1,14318},{4,1,3981},{4,1,3981},{0,21,265},{0,3,11302},{0,37,5081},{13,1,2020},{13,1,2020},{13,1,2020},{31,1,148},{29,3,4418},{42,1,820},{42,1,820},{0,21,265},{43,9,4418},{0,21,265},{19,1,365},{5,1,136},{21,1,4},{11,1,1},{19,1,365},{37,1,365},{11,1,1},{0,3,377},{37,1,365},{0,3,377},{8,0,8986},{8,0,8986},{8,0,8986},{8,0,8986},{18,1,1737},
+{18,1,1737},{18,1,1737},{0,23,1},{0,43,3434},{0,43,3434},{42,1,65535},{36,1,29412},{4,1,13785},{18,1,7875},{26,1,64490},{20,1,20801},{32,1,2593},{16,17,472},{0,1,43813},{0,3,17452},{9,1,4729},{11,1,2925},{11,1,2249},{45,1,281},{37,7,7322},{47,1,2941},{44,1,1037},{0,3,626},{30,1,7322},{0,3,626},{28,1,17819},{28,1,17819},{28,1,17819},{40,1,10777},{40,1,15150},{36,1,4710},{36,1,4710},
+{0,35,221},{0,17,11076},{0,37,4545},{11,1,2249},{11,1,2249},{11,1,2249},{45,1,281},{39,11,4418},{44,1,1037},{44,1,1037},{0,35,185},{22,1,4418},{0,35,185},{3,1,181},{19,1,61},{35,1,1},{39,1,0},{3,1,181},{5,1,181},{39,1,0},{0,33,185},{5,1,181},{0,33,185},{40,0,9256},{40,0,9256},{40,0,9256},{40,0,9256},{34,1,2041},{34,1,2041},{34,1,2041},{16,7,2},{0,41,3188},
+{0,41,3188},{42,1,65535},{38,1,30127},{36,1,14877},{4,1,8601},{42,1,64081},{36,1,20736},{18,1,3192},{2,17,302},{0,1,42247},{0,3,14278},{39,1,4387},{9,1,3051},{11,1,2465},{13,1,490},{23,17,6584},{29,1,2843},{30,1,1325},{0,33,291},{45,17,6584},{0,33,291},{14,1,18273},{14,1,18273},{14,1,18273},{42,1,11259},{26,1,15731},{22,1,5618},{22,1,5618},{32,3,221},{0,1,10637},{0,5,3849},{11,1,2465},
+{11,1,2465},{11,1,2465},{13,1,490},{25,5,4418},{30,1,1325},{30,1,1325},{0,3,136},{10,1,4418},{0,3,136},{1,3,50},{33,1,18},{3,1,0},{5,1,0},{1,3,50},{3,1,50},{5,1,0},{0,17,50},{3,1,50},{0,17,50},{10,0,9256},{10,0,9256},{10,0,9256},{10,0,9256},{36,1,2228},{36,1,2228},{36,1,2228},{2,21,1},{0,9,2738},{0,9,2738},{28,1,65535},{24,1,30766},{6,1,16028},
+{36,1,9391},{28,1,64158},{36,1,21269},{4,1,3821},{18,1,176},{0,1,41339},{0,3,11746},{23,1,4216},{9,1,3083},{9,1,2642},{27,1,776},{3,25,6019},{13,1,2763},{47,1,1613},{0,17,115},{29,1,6019},{0,17,115},{30,1,18737},{30,1,18737},{30,1,18737},{12,1,11820},{42,1,16379},{24,1,6357},{24,1,6357},{18,33,229},{0,1,10589},{0,3,3225},{9,1,2642},{9,1,2642},{9,1,2642},{27,1,776},{9,3,4418},
+{47,1,1613},{47,1,1613},{0,17,90},{31,3,4418},{0,17,90},{1,17,2},{17,1,2},{17,1,1},{17,1,1},{1,17,2},{17,1,2},{17,1,1},{0,1,4},{17,1,2},{0,1,4},{42,0,9250},{42,0,9250},{42,0,9250},{42,0,9250},{6,1,2474},{6,1,2474},{6,1,2474},{18,35,5},{0,37,2405},{0,37,2405},{14,1,65535},{24,1,31241},{22,1,16737},{6,1,10024},{14,1,64173},{38,1,21415},{20,1,4180},
+{4,1,111},{16,1,40689},{0,3,9508},{7,1,3648},{39,1,2723},{39,1,2362},{11,1,725},{37,1,5163},{13,1,2451},{29,1,1450},{0,1,16},{7,19,5163},{0,1,16},{47,1,18185},{47,1,18185},{47,1,18185},{44,1,11714},{28,1,15784},{40,1,6413},{40,1,6413},{34,17,141},{0,1,9881},{0,3,2501},{39,1,2362},{39,1,2362},{39,1,2362},{11,1,725},{37,7,3872},{29,1,1450},{29,1,1450},{0,1,16},{30,1,3872},
+{0,1,16},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{12,0,9256},{12,0,9256},{12,0,9256},{12,0,9256},{38,1,2720},{38,1,2720},{38,1,2720},{4,19,2},{0,21,2041},{0,21,2041},{30,1,65535},{40,1,31563},{8,1,17236},{38,1,10554},{14,1,63701},{24,1,21372},{36,1,4441},{20,1,45},{16,1,40151},{0,33,7454},{21,1,3014},
+{23,1,2261},{39,1,1962},{25,1,629},{21,17,4267},{11,1,2028},{13,1,1186},{32,1,0},{17,21,4267},{32,1,0},{47,1,17289},{47,1,17289},{47,1,17289},{14,1,11436},{14,1,14726},{26,1,6323},{26,1,6323},{20,17,62},{16,1,9032},{0,17,1923},{39,1,1962},{39,1,1962},{39,1,1962},{25,1,629},{19,11,3200},{13,1,1186},{13,1,1186},{32,1,0},{47,1,3200},{32,1,0},{1,1,0},{1,1,0},{1,1,0},
+{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{44,0,9250},{44,0,9250},{44,0,9250},{44,0,9250},{24,1,2897},{24,1,2897},{24,1,2897},{20,3,5},{0,3,1717},{0,3,1717},{30,1,65535},{26,1,31988},{24,1,17745},{8,1,11181},{30,1,63430},{40,1,21435},{22,1,4810},{6,1,5},{2,1,39477},{0,33,5328},{21,1,2339},{7,1,1778},{7,1,1553},{9,1,477},{19,5,3361},
+{25,1,1634},{27,1,953},{20,1,0},{37,3,3361},{20,1,0},{45,1,16354},{45,1,16354},{45,1,16354},{30,1,11202},{30,1,13722},{42,1,6189},{42,1,6189},{6,1,9},{2,1,8249},{0,17,1437},{7,1,1553},{7,1,1553},{7,1,1553},{9,1,477},{23,1,2521},{27,1,953},{27,1,953},{20,1,0},{45,1,2521},{20,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},
+{0,1,0},{1,1,0},{0,1,0},{14,0,9266},{14,0,9266},{14,0,9266},{14,0,9266},{40,1,3185},{40,1,3185},{40,1,3185},{6,17,0},{0,17,1412},{0,17,1412},{47,1,65535},{42,1,32389},{10,1,18354},{40,1,11850},{30,1,63370},{40,1,21737},{8,1,5159},{22,1,24},{2,1,39380},{0,17,3675},{5,1,1843},{21,1,1394},{37,1,1241},{23,1,370},{5,1,2646},{9,1,1282},{11,1,757},{8,1,1},{9,1,2646},
+{8,1,1},{45,1,15490},{45,1,15490},{45,1,15490},{47,1,10946},{47,1,12914},{42,1,6221},{42,1,6221},{38,1,10},{2,1,7753},{0,1,1225},{37,1,1241},{37,1,1241},{37,1,1241},{23,1,370},{7,1,1985},{11,1,757},{11,1,757},{8,1,1},{13,1,1985},{8,1,1},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{46,0,9248},
+{46,0,9248},{46,0,9248},{46,0,9248},{26,1,3400},{26,1,3400},{26,1,3400},{38,1,10},{0,1,1225},{0,1,1225},{47,1,65535},{28,1,33179},{26,1,18917},{10,1,12588},{47,1,62997},{26,1,21996},{24,1,5521},{8,1,36},{4,1,39403},{0,17,2452},{5,1,1411},{21,1,1058},{21,1,914},{7,1,274},{35,1,2017},{23,1,939},{9,1,585},{26,1,0},{1,35,2017},{26,1,0},{29,1,14809},{29,1,14809},{29,1,14809},
+{31,1,10801},{47,1,12162},{14,1,6117},{14,1,6117},{8,1,50},{20,1,7322},{0,1,1241},{21,1,914},{21,1,914},{21,1,914},{7,1,274},{35,5,1513},{9,1,585},{9,1,585},{26,1,0},{27,1,1513},{26,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{47,0,9250},{47,0,9250},{47,0,9250},{47,0,9250},{12,1,3690},
+{12,1,3690},{12,1,3690},{8,1,50},{0,1,1241},{0,1,1241},{45,1,65535},{14,1,33274},{42,1,19608},{42,1,13375},{47,1,62627},{42,1,22211},{10,1,6045},{24,1,138},{36,1,39015},{0,1,1732},{35,1,1048},{5,1,766},{5,1,666},{37,1,212},{3,3,1473},{7,1,675},{23,1,410},{14,1,1},{3,3,1473},{14,1,1},{13,1,14121},{13,1,14121},{13,1,14121},{45,1,10571},{45,1,11434},{30,1,6081},{30,1,6081},
+{40,1,137},{36,1,6926},{2,1,1445},{5,1,666},{5,1,666},{5,1,666},{37,1,212},{35,3,1105},{23,1,410},{23,1,410},{14,1,1},{25,1,1105},{14,1,1},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{15,0,9256},{15,0,9256},{15,0,9256},{15,0,9256},{14,1,3985},{14,1,3985},{14,1,3985},{40,1,137},{2,1,1445},
+{2,1,1445},
diff --git a/libkram/transcoder/basisu_transcoder_tables_astc_0_255.inc b/libkram/transcoder/basisu_transcoder_tables_astc_0_255.inc
new file mode 100644
index 00000000..da4e7fee
--- /dev/null
+++ b/libkram/transcoder/basisu_transcoder_tables_astc_0_255.inc
@@ -0,0 +1,481 @@
+{0,16,18},{0,12,1},{0,8,0},{0,7,5},{0,10,35},{0,6,21},{0,6,9},{0,4,24},{1,4,36},{0,4,25},{0,16,18},{0,12,1},{0,8,0},{0,7,5},{5,0,35},{0,6,21},{0,6,9},{0,4,24},{10,0,35},{0,4,24},{0,7,0},{0,7,0},{0,7,0},{0,3,0},{0,4,2},{0,3,0},{0,3,0},{0,1,1},{0,2,2},{0,1,1},{0,7,0},
+{0,7,0},{0,7,0},{0,3,0},{2,0,2},{0,3,0},{0,3,0},{0,1,1},{4,0,2},{0,1,1},{8,0,18},{0,12,1},{0,8,0},{0,7,5},{8,0,18},{16,0,18},{0,7,5},{0,5,18},{16,0,18},{0,5,18},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{3,34,36},{3,23,19},{4,17,25},
+{3,16,19},{0,34,51},{0,20,18},{0,16,1},{0,13,22},{0,15,68},{0,12,33},{6,28,18},{6,21,0},{6,16,1},{5,15,3},{17,0,51},{1,19,18},{2,15,1},{0,13,22},{34,0,51},{0,13,22},{3,25,18},{3,25,18},{3,25,18},{3,14,18},{0,25,8},{0,16,1},{0,16,1},{0,10,0},{0,12,17},{0,9,5},{6,19,0},{6,19,0},{6,19,0},{6,12,0},{12,0,8},
+{4,13,0},{4,13,0},{0,10,0},{25,0,8},{0,10,0},{20,0,18},{6,21,0},{8,16,0},{0,16,0},{20,0,18},{40,0,18},{0,16,0},{0,13,18},{40,0,18},{0,13,18},{3,0,18},{3,0,18},{3,0,18},{3,0,18},{0,19,0},{0,19,0},{0,19,0},{0,9,0},{0,9,4},{0,9,4},{11,42,36},{11,31,19},{12,24,24},{11,24,19},{8,42,51},{8,28,18},{8,24,1},
+{7,20,22},{0,27,52},{2,21,18},{14,36,18},{14,29,0},{14,24,1},{13,23,3},{29,0,51},{9,27,18},{10,23,1},{0,21,18},{58,0,51},{0,21,18},{11,33,18},{11,33,18},{11,33,18},{11,22,18},{8,33,8},{8,24,1},{8,24,1},{8,18,0},{2,23,8},{4,18,1},{14,26,0},{14,26,0},{14,26,0},{14,20,0},{24,0,8},{11,22,0},{11,22,0},{6,18,0},{49,0,8},
+{6,18,0},{32,0,18},{14,29,0},{15,24,0},{6,24,0},{32,0,18},{64,0,18},{6,24,0},{0,21,18},{64,0,18},{0,21,18},{11,0,18},{11,0,18},{11,0,18},{11,0,18},{8,27,0},{8,27,0},{8,27,0},{8,17,0},{4,19,0},{4,19,0},{19,50,36},{19,39,19},{20,32,24},{19,32,19},{16,50,51},{16,36,18},{16,32,1},{15,28,22},{7,37,51},{10,29,18},{22,43,18},
+{22,36,1},{22,32,1},{21,31,3},{41,0,51},{17,35,18},{18,31,1},{8,29,18},{82,0,51},{8,29,18},{19,41,18},{19,41,18},{19,41,18},{19,30,18},{16,41,8},{16,31,1},{16,31,1},{16,26,0},{10,31,8},{12,26,1},{22,34,0},{22,34,0},{22,34,0},{22,28,0},{36,0,8},{18,30,0},{18,30,0},{14,26,0},{73,0,8},{14,26,0},{43,0,18},{20,38,0},{23,32,0},
+{14,32,0},{43,0,18},{89,0,18},{14,32,0},{0,29,18},{89,0,18},{0,29,18},{19,0,18},{19,0,18},{19,0,18},{19,0,18},{16,35,0},{16,35,0},{16,35,0},{16,25,0},{12,27,0},{12,27,0},{28,59,36},{28,48,19},{29,41,24},{28,41,19},{25,59,51},{25,45,18},{25,41,1},{24,37,22},{16,46,51},{19,38,18},{31,52,18},{31,45,1},{31,41,1},{30,40,3},{54,0,51},
+{24,45,18},{27,40,1},{17,38,18},{110,0,51},{17,38,18},{28,49,18},{28,49,18},{28,49,18},{28,39,18},{25,49,8},{25,40,1},{25,40,1},{25,35,0},{19,40,8},{20,35,1},{31,43,0},{31,43,0},{31,43,0},{31,37,0},{49,0,8},{27,39,0},{27,39,0},{23,35,0},{101,0,8},{23,35,0},{57,0,18},{29,47,0},{32,41,0},{23,41,0},{57,0,18},{116,0,18},{23,41,0},
+{0,38,18},{116,0,18},{0,38,18},{28,0,18},{28,0,18},{28,0,18},{28,0,18},{25,43,0},{25,43,0},{25,43,0},{25,34,0},{19,37,0},{19,37,0},{36,66,36},{36,56,19},{37,49,24},{36,49,19},{33,66,51},{33,53,18},{33,49,1},{32,45,22},{23,54,51},{27,46,18},{39,60,18},{39,53,1},{39,49,1},{38,48,3},{66,0,51},{32,53,18},{35,48,1},{25,46,18},{134,0,51},
+{25,46,18},{36,57,18},{36,57,18},{36,57,18},{36,47,18},{33,57,8},{33,48,1},{33,48,1},{33,43,0},{25,49,8},{28,43,1},{39,51,0},{39,51,0},{39,51,0},{39,45,0},{61,0,8},{35,47,0},{35,47,0},{31,43,0},{125,0,8},{31,43,0},{69,0,18},{37,55,0},{40,49,0},{31,49,0},{69,0,18},{140,0,18},{31,49,0},{0,46,18},{140,0,18},{0,46,18},{36,0,18},
+{36,0,18},{36,0,18},{36,0,18},{33,51,0},{33,51,0},{33,51,0},{33,42,0},{27,45,0},{27,45,0},{44,74,36},{44,64,19},{45,57,24},{44,57,19},{41,74,51},{41,61,18},{41,57,1},{40,53,22},{31,62,51},{35,54,18},{47,68,18},{47,61,1},{47,57,1},{46,56,3},{78,0,51},{40,61,18},{43,56,1},{33,54,18},{158,0,51},{33,54,18},{44,65,18},{44,65,18},{44,65,18},
+{44,55,18},{41,65,8},{41,56,1},{41,56,1},{41,51,0},{33,57,8},{36,51,1},{47,59,0},{47,59,0},{47,59,0},{47,53,0},{73,0,8},{43,55,0},{43,55,0},{39,51,0},{149,0,8},{39,51,0},{81,0,18},{45,63,0},{48,57,0},{39,57,0},{81,0,18},{164,0,18},{39,57,0},{0,54,18},{164,0,18},{0,54,18},{44,0,18},{44,0,18},{44,0,18},{44,0,18},{41,59,0},
+{41,59,0},{41,59,0},{41,50,0},{35,53,0},{35,53,0},{52,82,36},{52,71,19},{53,65,24},{52,65,19},{49,82,51},{49,68,18},{49,65,1},{48,61,22},{39,70,51},{43,62,18},{55,76,18},{55,69,1},{55,65,1},{54,64,3},{89,0,51},{49,68,18},{51,64,1},{41,62,18},{183,0,51},{41,62,18},{52,73,18},{52,73,18},{52,73,18},{52,63,18},{49,73,8},{49,64,1},{49,64,1},
+{49,59,0},{42,64,8},{44,59,1},{55,67,0},{55,67,0},{55,67,0},{55,61,0},{85,0,8},{51,63,0},{51,63,0},{47,59,0},{174,0,8},{47,59,0},{92,0,18},{54,70,0},{56,65,0},{47,65,0},{92,0,18},{189,0,18},{47,65,0},{0,62,18},{189,0,18},{0,62,18},{52,0,18},{52,0,18},{52,0,18},{52,0,18},{49,67,0},{49,67,0},{49,67,0},{49,58,0},{43,61,0},
+{43,61,0},{61,91,36},{61,80,19},{62,74,24},{61,73,20},{58,91,51},{58,77,18},{58,73,2},{57,70,22},{48,79,51},{50,71,19},{64,85,18},{64,77,1},{64,74,1},{63,73,3},{103,0,51},{58,77,18},{59,74,1},{49,71,18},{210,0,51},{49,71,18},{61,82,18},{61,82,18},{61,82,18},{61,72,18},{58,82,8},{58,73,1},{58,73,1},{58,68,0},{51,73,8},{53,68,1},{64,76,0},
+{64,76,0},{64,76,0},{64,70,0},{98,0,8},{60,72,0},{60,72,0},{56,68,0},{201,0,8},{56,68,0},{106,0,18},{63,79,0},{65,74,0},{55,74,0},{106,0,18},{216,0,18},{55,74,0},{0,71,18},{216,0,18},{0,71,18},{61,0,18},{61,0,18},{61,0,18},{61,0,18},{58,76,0},{58,76,0},{58,76,0},{58,67,0},{53,69,0},{53,69,0},{69,99,36},{69,88,19},{70,82,24},
+{69,81,20},{66,99,51},{66,85,18},{66,81,2},{65,79,23},{56,87,51},{58,79,19},{72,93,18},{72,85,1},{72,82,1},{72,80,5},{115,0,51},{66,85,18},{67,82,1},{57,79,18},{234,0,51},{57,79,18},{69,90,18},{69,90,18},{69,90,18},{69,79,18},{66,90,8},{66,81,1},{66,81,1},{66,75,1},{59,81,8},{61,76,1},{72,84,0},{72,84,0},{72,84,0},{72,78,0},{110,0,8},
+{69,79,0},{69,79,0},{63,76,0},{225,0,8},{63,76,0},{118,0,18},{71,87,0},{73,82,0},{63,82,0},{118,0,18},{240,0,18},{63,82,0},{0,79,18},{240,0,18},{0,79,18},{69,0,18},{69,0,18},{69,0,18},{69,0,18},{66,84,0},{66,84,0},{66,84,0},{66,75,0},{61,77,0},{61,77,0},{77,107,36},{77,96,19},{78,90,24},{77,89,20},{74,107,51},{74,93,18},{74,89,2},
+{73,87,23},{64,95,51},{66,87,19},{80,101,18},{80,93,1},{80,90,1},{80,88,5},{127,0,51},{74,93,18},{75,90,1},{65,87,18},{254,2,51},{65,87,18},{77,98,18},{77,98,18},{77,98,18},{77,87,18},{74,98,8},{74,89,1},{74,89,1},{74,83,1},{67,89,8},{69,84,1},{80,92,0},{80,92,0},{80,92,0},{80,86,0},{122,0,8},{77,87,0},{77,87,0},{71,84,0},{249,0,8},
+{71,84,0},{129,0,18},{79,95,0},{81,90,0},{71,90,0},{129,0,18},{254,5,18},{71,90,0},{0,87,18},{254,5,18},{0,87,18},{77,0,18},{77,0,18},{77,0,18},{77,0,18},{74,92,0},{74,92,0},{74,92,0},{74,83,0},{69,85,0},{69,85,0},{85,115,36},{85,104,19},{86,98,24},{85,97,20},{82,115,51},{82,101,18},{82,97,2},{81,95,23},{72,103,51},{74,95,19},{88,109,18},
+{88,101,1},{88,98,1},{88,96,5},{138,0,51},{82,101,18},{83,98,1},{73,95,18},{254,14,51},{73,95,18},{85,106,18},{85,106,18},{85,106,18},{85,95,18},{82,106,8},{82,97,1},{82,97,1},{82,91,1},{75,97,8},{77,92,1},{88,100,0},{88,100,0},{88,100,0},{88,94,0},{134,0,8},{85,95,0},{85,95,0},{79,92,0},{255,9,8},{79,92,0},{141,0,18},{87,103,0},{89,98,0},
+{79,98,0},{141,0,18},{254,17,18},{79,98,0},{0,95,18},{254,17,18},{0,95,18},{85,0,18},{85,0,18},{85,0,18},{85,0,18},{82,100,0},{82,100,0},{82,100,0},{82,91,0},{77,93,0},{77,93,0},{94,124,36},{94,113,19},{95,107,24},{94,106,20},{91,124,51},{91,110,18},{91,106,2},{90,104,23},{81,112,51},{83,104,19},{97,118,18},{97,110,1},{97,107,1},{97,105,5},{152,0,51},
+{91,110,18},{92,107,1},{82,104,18},{255,27,51},{82,104,18},{94,115,18},{94,115,18},{94,115,18},{94,104,18},{91,115,8},{91,106,1},{91,106,1},{91,100,1},{84,106,8},{86,101,1},{97,108,0},{97,108,0},{97,108,0},{97,103,0},{147,0,8},{94,104,0},{94,104,0},{88,101,0},{254,23,8},{88,101,0},{155,0,18},{96,112,0},{98,107,0},{88,107,0},{155,0,18},{255,30,18},{88,107,0},
+{0,104,18},{255,30,18},{0,104,18},{94,0,18},{94,0,18},{94,0,18},{94,0,18},{91,109,0},{91,109,0},{91,109,0},{91,100,0},{86,102,0},{86,102,0},{102,132,36},{102,121,19},{102,116,23},{102,114,20},{99,132,51},{99,118,18},{99,114,2},{98,112,23},{89,120,51},{91,112,19},{105,125,18},{105,118,1},{105,115,1},{105,113,5},{164,0,51},{99,118,18},{100,114,1},{90,112,18},{255,39,51},
+{90,112,18},{102,123,18},{102,123,18},{102,123,18},{102,112,18},{99,123,8},{99,114,1},{99,114,1},{99,108,1},{92,114,8},{94,109,1},{105,116,0},{105,116,0},{105,116,0},{105,111,0},{159,0,8},{102,112,0},{102,112,0},{96,109,0},{254,35,8},{96,109,0},{167,0,18},{104,120,0},{106,115,0},{96,115,0},{167,0,18},{254,42,18},{96,115,0},{0,112,18},{254,42,18},{0,112,18},{102,0,18},
+{102,0,18},{102,0,18},{102,0,18},{99,117,0},{99,117,0},{99,117,0},{99,108,0},{94,110,0},{94,110,0},{110,140,36},{110,130,18},{110,124,23},{110,122,20},{107,140,51},{107,126,18},{107,122,2},{106,120,23},{97,128,51},{99,120,19},{113,133,18},{113,126,1},{113,123,1},{113,121,5},{175,0,51},{107,126,18},{108,122,1},{98,120,18},{254,51,51},{98,120,18},{110,130,18},{110,130,18},{110,130,18},
+{110,120,18},{107,131,8},{107,122,1},{107,122,1},{107,116,1},{100,122,8},{102,117,1},{113,124,0},{113,124,0},{113,124,0},{113,119,0},{171,0,8},{110,120,0},{110,120,0},{104,117,0},{255,46,8},{104,117,0},{178,0,18},{112,128,0},{114,123,0},{104,123,0},{178,0,18},{254,54,18},{104,123,0},{0,120,18},{254,54,18},{0,120,18},{110,0,18},{110,0,18},{110,0,18},{110,0,18},{107,124,0},
+{107,124,0},{107,124,0},{107,116,0},{102,118,0},{102,118,0},{118,147,36},{118,138,18},{118,132,23},{118,130,20},{115,148,51},{115,134,18},{115,130,2},{114,128,23},{105,136,51},{108,128,18},{121,141,18},{121,134,1},{121,131,1},{121,129,5},{187,0,51},{115,134,18},{116,130,1},{106,128,18},{254,63,51},{106,128,18},{118,138,18},{118,138,18},{118,138,18},{118,128,18},{115,138,8},{115,130,1},{115,130,1},
+{115,124,1},{108,130,8},{110,125,1},{121,132,0},{121,132,0},{121,132,0},{121,127,0},{183,0,8},{118,128,0},{118,128,0},{112,125,0},{255,58,8},{112,125,0},{190,0,18},{120,136,0},{122,131,0},{112,131,0},{190,0,18},{254,66,18},{112,131,0},{0,128,18},{254,66,18},{0,128,18},{118,0,18},{118,0,18},{118,0,18},{118,0,18},{115,132,0},{115,132,0},{115,132,0},{115,124,0},{110,126,0},
+{110,126,0},{127,156,36},{127,147,18},{127,141,23},{127,139,20},{124,156,51},{124,143,18},{124,139,2},{123,137,23},{114,145,51},{117,137,18},{130,150,18},{130,143,1},{130,140,1},{130,138,5},{201,0,51},{124,143,18},{125,139,1},{115,137,18},{255,76,51},{115,137,18},{127,147,18},{127,147,18},{127,147,18},{127,137,18},{124,147,8},{124,139,1},{124,139,1},{124,133,1},{117,139,8},{119,134,1},{130,141,0},
+{130,141,0},{130,141,0},{130,136,0},{196,0,8},{127,137,0},{127,137,0},{121,134,0},{254,72,8},{121,134,0},{204,0,18},{129,145,0},{131,140,0},{121,140,0},{204,0,18},{255,79,18},{121,140,0},{0,137,18},{255,79,18},{0,137,18},{127,0,18},{127,0,18},{127,0,18},{127,0,18},{124,141,0},{124,141,0},{124,141,0},{124,133,0},{119,135,0},{119,135,0},{135,164,36},{135,154,19},{135,149,23},
+{135,147,20},{132,164,51},{132,151,18},{132,147,2},{131,145,23},{121,153,51},{125,145,18},{138,158,18},{138,151,1},{138,148,1},{138,146,5},{213,0,51},{131,151,18},{133,147,1},{123,145,18},{254,88,51},{123,145,18},{135,155,18},{135,155,18},{135,155,18},{135,145,18},{132,155,8},{132,147,1},{132,147,1},{132,141,1},{125,147,8},{127,142,1},{138,149,0},{138,149,0},{138,149,0},{138,144,0},{208,0,8},
+{135,145,0},{135,145,0},{129,142,0},{254,84,8},{129,142,0},{215,0,18},{137,153,0},{139,148,0},{129,148,0},{215,0,18},{254,91,18},{129,148,0},{0,145,18},{254,91,18},{0,145,18},{135,0,18},{135,0,18},{135,0,18},{135,0,18},{132,149,0},{132,149,0},{132,149,0},{132,141,0},{127,143,0},{127,143,0},{143,172,36},{143,162,19},{143,157,23},{143,155,20},{140,172,51},{140,159,18},{140,155,2},
+{139,153,23},{129,161,51},{132,153,19},{146,166,18},{146,159,1},{146,156,1},{146,154,5},{224,0,51},{139,159,18},{141,155,1},{130,153,18},{254,100,51},{130,153,18},{143,163,18},{143,163,18},{143,163,18},{143,153,18},{140,163,8},{140,155,1},{140,155,1},{140,149,1},{132,155,8},{135,150,1},{146,157,0},{146,157,0},{146,157,0},{146,152,0},{220,0,8},{143,153,0},{143,153,0},{137,150,0},{255,95,8},
+{137,150,0},{227,0,18},{144,161,0},{147,156,0},{136,156,0},{227,0,18},{254,103,18},{136,156,0},{0,153,18},{254,103,18},{0,153,18},{143,0,18},{143,0,18},{143,0,18},{143,0,18},{140,157,0},{140,157,0},{140,157,0},{140,149,0},{135,151,0},{135,151,0},{151,180,36},{151,170,19},{151,165,23},{151,163,20},{148,180,51},{148,167,18},{148,163,2},{148,160,24},{137,169,51},{140,161,19},{154,174,18},
+{154,167,1},{154,164,1},{154,162,5},{236,0,51},{147,167,18},{149,164,1},{138,161,18},{254,112,51},{138,161,18},{151,171,18},{151,171,18},{151,171,18},{151,161,18},{148,171,8},{148,162,1},{148,162,1},{148,157,1},{140,163,8},{143,158,1},{154,165,0},{154,165,0},{154,165,0},{154,160,0},{232,0,8},{150,161,0},{150,161,0},{144,158,0},{255,107,8},{144,158,0},{239,0,18},{152,169,0},{155,164,0},
+{144,164,0},{239,0,18},{254,115,18},{144,164,0},{0,161,18},{254,115,18},{0,161,18},{151,0,18},{151,0,18},{151,0,18},{151,0,18},{148,165,0},{148,165,0},{148,165,0},{148,157,0},{142,159,0},{142,159,0},{160,189,36},{160,179,19},{160,174,23},{160,172,20},{157,189,51},{157,176,18},{157,172,2},{157,169,24},{146,178,51},{149,170,19},{163,183,18},{163,176,1},{163,173,1},{163,172,5},{250,0,51},
+{156,176,18},{158,173,1},{147,170,18},{255,125,51},{147,170,18},{160,180,18},{160,180,18},{160,180,18},{160,170,18},{157,180,8},{157,171,1},{157,171,1},{157,166,1},{149,172,8},{152,167,1},{163,174,0},{163,174,0},{163,174,0},{163,168,0},{245,0,8},{159,170,0},{159,170,0},{153,167,0},{254,121,8},{153,167,0},{253,0,18},{161,178,0},{164,173,0},{153,173,0},{253,0,18},{254,128,18},{153,173,0},
+{0,170,18},{254,128,18},{0,170,18},{160,0,18},{160,0,18},{160,0,18},{160,0,18},{157,174,0},{157,174,0},{157,174,0},{157,165,0},{151,168,0},{151,168,0},{168,197,36},{168,187,19},{168,182,23},{168,180,20},{165,197,51},{165,184,18},{165,180,2},{165,177,24},{154,186,51},{157,178,19},{171,191,18},{171,184,1},{171,181,1},{171,180,5},{255,13,51},{164,184,18},{166,181,1},{155,178,18},{254,137,51},
+{155,178,18},{168,188,18},{168,188,18},{168,188,18},{168,178,18},{165,188,8},{165,179,1},{165,179,1},{165,174,1},{157,180,8},{160,175,1},{171,182,0},{171,182,0},{171,182,0},{171,176,0},{255,4,8},{167,178,0},{167,178,0},{161,175,0},{255,132,8},{161,175,0},{255,19,18},{169,186,0},{172,181,0},{161,181,0},{255,19,18},{254,140,18},{161,181,0},{0,178,18},{254,140,18},{0,178,18},{168,0,18},
+{168,0,18},{168,0,18},{168,0,18},{165,182,0},{165,182,0},{165,182,0},{165,173,0},{159,176,0},{159,176,0},{176,205,36},{176,195,19},{176,190,23},{176,188,20},{173,205,51},{173,192,18},{173,188,2},{173,185,24},{162,194,51},{165,186,19},{179,199,18},{179,192,1},{179,189,1},{179,188,5},{255,37,51},{172,192,18},{174,189,1},{163,186,18},{254,149,51},{163,186,18},{176,196,18},{176,196,18},{176,196,18},
+{176,186,18},{173,196,8},{173,187,1},{173,187,1},{173,182,1},{165,188,8},{168,183,1},{179,190,0},{179,190,0},{179,190,0},{179,184,0},{255,28,8},{175,186,0},{175,186,0},{169,183,0},{255,144,8},{169,183,0},{255,43,18},{177,194,0},{180,189,0},{169,189,0},{255,43,18},{254,152,18},{169,189,0},{0,186,18},{254,152,18},{0,186,18},{176,0,18},{176,0,18},{176,0,18},{176,0,18},{173,190,0},
+{173,190,0},{173,190,0},{173,181,0},{167,184,0},{167,184,0},{184,213,36},{184,203,19},{184,197,22},{184,196,20},{181,213,51},{181,200,18},{181,196,2},{181,193,24},{170,202,51},{173,194,19},{187,207,18},{187,201,1},{187,197,0},{187,196,5},{255,61,51},{180,200,18},{182,197,1},{171,194,18},{254,161,51},{171,194,18},{184,204,18},{184,204,18},{184,204,18},{184,194,18},{181,204,8},{181,195,1},{181,195,1},
+{181,190,1},{173,196,8},{176,191,1},{187,197,0},{187,197,0},{187,197,0},{187,192,0},{255,52,8},{183,194,0},{183,194,0},{177,191,0},{255,156,8},{177,191,0},{255,67,18},{185,202,0},{187,197,0},{177,197,0},{255,67,18},{254,164,18},{177,197,0},{0,194,18},{254,164,18},{0,194,18},{184,0,18},{184,0,18},{184,0,18},{184,0,18},{181,198,0},{181,198,0},{181,198,0},{181,189,0},{175,192,0},
+{175,192,0},{193,222,36},{193,212,18},{193,206,22},{193,205,20},{190,222,51},{189,209,19},{190,206,1},{190,202,24},{179,211,51},{182,203,19},{196,215,18},{196,210,1},{196,206,0},{196,205,5},{255,89,51},{189,209,18},{190,206,1},{180,203,18},{254,174,51},{180,203,18},{193,212,18},{193,212,18},{193,212,18},{193,203,18},{190,213,8},{190,204,1},{190,204,1},{190,199,1},{182,205,8},{185,200,1},{196,206,0},
+{196,206,0},{196,206,0},{196,201,0},{255,79,8},{192,203,0},{192,203,0},{186,200,0},{253,170,8},{186,200,0},{255,95,18},{194,211,0},{196,206,0},{186,206,0},{255,95,18},{254,177,18},{186,206,0},{0,203,18},{254,177,18},{0,203,18},{193,0,18},{193,0,18},{193,0,18},{193,0,18},{190,206,0},{190,206,0},{190,206,0},{190,198,0},{184,201,0},{184,201,0},{201,229,36},{201,220,18},{201,214,22},
+{201,213,20},{198,230,51},{197,217,19},{198,214,1},{198,210,24},{187,219,51},{190,211,19},{204,223,18},{204,218,1},{204,214,0},{204,213,5},{255,113,51},{197,217,18},{198,214,1},{188,211,18},{254,186,51},{188,211,18},{201,220,18},{201,220,18},{201,220,18},{201,211,18},{198,220,8},{198,212,1},{198,212,1},{198,207,1},{190,213,8},{192,208,1},{204,214,0},{204,214,0},{204,214,0},{204,209,0},{255,104,8},
+{200,211,0},{200,211,0},{194,208,0},{255,181,8},{194,208,0},{255,119,18},{202,219,0},{204,214,0},{194,214,0},{255,119,18},{254,189,18},{194,214,0},{0,211,18},{254,189,18},{0,211,18},{201,0,18},{201,0,18},{201,0,18},{201,0,18},{198,214,0},{198,214,0},{198,214,0},{198,206,0},{192,209,0},{192,209,0},{209,237,36},{209,228,18},{209,222,22},{209,221,20},{206,237,51},{205,225,19},{206,222,1},
+{206,218,24},{196,226,51},{197,219,19},{212,231,18},{212,226,1},{212,222,0},{212,221,5},{255,137,51},{205,225,18},{206,222,1},{196,219,18},{254,198,51},{196,219,18},{209,228,18},{209,228,18},{209,228,18},{209,219,18},{206,228,8},{206,220,1},{206,220,1},{206,215,1},{198,221,8},{200,216,1},{212,222,0},{212,222,0},{212,222,0},{212,217,0},{255,128,8},{208,219,0},{208,219,0},{202,216,0},{255,193,8},
+{202,216,0},{255,143,18},{210,227,0},{212,222,0},{202,222,0},{255,143,18},{254,201,18},{202,222,0},{0,219,18},{254,201,18},{0,219,18},{209,0,18},{209,0,18},{209,0,18},{209,0,18},{206,222,0},{206,222,0},{206,222,0},{206,214,0},{200,217,0},{200,217,0},{217,245,36},{217,236,18},{217,230,22},{217,229,20},{214,245,51},{213,233,19},{214,230,1},{214,226,24},{204,234,51},{205,227,19},{220,239,18},
+{220,234,1},{220,230,0},{220,229,5},{255,161,51},{213,233,18},{214,230,1},{204,227,18},{254,210,51},{204,227,18},{217,236,18},{217,236,18},{217,236,18},{217,227,18},{214,236,8},{214,228,1},{214,228,1},{214,223,1},{206,229,8},{208,224,1},{220,230,0},{220,230,0},{220,230,0},{220,225,0},{255,152,8},{216,227,0},{216,227,0},{210,224,0},{255,205,8},{210,224,0},{255,167,18},{218,235,0},{220,230,0},
+{210,230,0},{255,167,18},{253,213,18},{210,230,0},{0,227,18},{253,213,18},{0,227,18},{217,0,18},{217,0,18},{217,0,18},{217,0,18},{214,230,0},{214,230,0},{214,230,0},{214,222,0},{208,225,0},{208,225,0},{226,254,36},{226,245,18},{226,239,22},{226,238,20},{223,254,51},{223,241,18},{223,239,1},{223,235,24},{213,243,51},{214,236,19},{229,248,18},{228,243,1},{229,239,0},{229,238,5},{255,189,51},
+{223,241,18},{223,239,1},{212,236,18},{254,223,51},{212,236,18},{226,245,18},{226,245,18},{226,245,18},{226,236,18},{223,245,8},{223,237,1},{223,237,1},{223,232,1},{216,237,8},{217,233,1},{229,239,0},{229,239,0},{229,239,0},{229,234,0},{255,180,8},{225,236,0},{225,236,0},{219,233,0},{255,218,8},{219,233,0},{255,195,18},{228,243,0},{229,239,0},{218,239,0},{255,195,18},{254,226,18},{218,239,0},
+{0,236,18},{254,226,18},{0,236,18},{226,0,18},{226,0,18},{226,0,18},{226,0,18},{223,239,0},{223,239,0},{223,239,0},{223,231,0},{217,234,0},{217,234,0},{235,254,46},{234,253,18},{234,247,22},{233,246,22},{233,255,56},{231,249,18},{231,247,1},{231,243,24},{221,251,51},{222,244,19},{238,254,19},{237,249,1},{237,247,0},{237,246,5},{255,213,51},{231,249,18},{231,247,1},{220,244,18},{254,235,51},
+{220,244,18},{234,253,18},{234,253,18},{234,253,18},{234,244,18},{231,253,8},{231,245,1},{231,245,1},{231,240,1},{224,245,8},{226,241,0},{237,247,0},{237,247,0},{237,247,0},{237,242,0},{255,204,8},{233,244,0},{233,244,0},{226,241,0},{255,230,8},{226,241,0},{255,219,18},{236,251,0},{237,247,0},{226,247,0},{255,219,18},{254,238,18},{226,247,0},{0,244,18},{254,238,18},{0,244,18},{234,0,18},
+{234,0,18},{234,0,18},{234,0,18},{231,247,0},{231,247,0},{231,247,0},{231,239,0},{226,241,0},{226,241,0},{245,255,72},{243,255,33},{242,255,22},{241,254,22},{242,255,81},{240,255,21},{239,255,1},{238,251,25},{234,255,56},{230,252,19},{248,255,29},{246,255,5},{245,255,0},{244,254,6},{255,237,51},{240,255,20},{239,255,1},{228,252,18},{254,247,51},{228,252,18},{242,255,22},{242,255,22},{242,255,22},
+{242,251,18},{241,254,14},{239,253,1},{239,253,1},{239,249,1},{232,253,8},{234,249,0},{245,255,0},{245,255,0},{245,255,0},{245,250,0},{255,228,8},{242,251,0},{242,251,0},{234,249,0},{255,242,8},{234,249,0},{255,243,18},{246,255,4},{245,255,0},{234,255,0},{255,243,18},{254,250,18},{234,255,0},{0,252,18},{254,250,18},{0,252,18},{242,0,18},{242,0,18},{242,0,18},{242,0,18},{239,255,0},
+{239,255,0},{239,255,0},{239,247,0},{234,249,0},{234,249,0},{251,255,28},{251,255,25},{251,255,24},{250,255,19},{251,255,24},{249,255,10},{248,255,9},{247,255,0},{246,255,12},{243,255,1},{254,255,1},{253,255,1},{253,255,1},{253,255,0},{255,249,3},{252,255,0},{252,255,0},{246,255,0},{254,253,3},{246,255,0},{251,255,24},{251,255,24},{251,255,24},{250,255,19},{250,254,19},{248,255,9},{248,255,9},
+{247,255,0},{246,255,8},{243,255,1},{253,254,1},{253,254,1},{253,254,1},{253,255,0},{255,249,2},{252,255,0},{252,255,0},{246,255,0},{254,253,2},{246,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{250,0,18},{250,0,18},{250,0,18},{250,0,18},{248,255,5},{248,255,5},{248,255,5},{247,255,0},{243,255,1},
+{243,255,1},{0,34,72},{0,24,5},{0,17,0},{0,13,25},{0,22,153},{0,15,90},{0,13,41},{0,10,110},{0,9,162},{0,8,119},{0,34,72},{0,24,5},{0,17,0},{0,13,25},{11,0,153},{0,15,90},{0,13,41},{0,10,110},{22,0,153},{0,10,110},{0,16,0},{0,16,0},{0,16,0},{0,8,0},{0,7,13},{0,6,2},{0,6,2},{0,4,5},{0,3,13},{0,4,6},{0,16,0},
+{0,16,0},{0,16,0},{0,8,0},{3,0,13},{0,6,2},{0,6,2},{0,4,5},{7,0,13},{0,4,5},{17,0,72},{0,24,5},{0,17,0},{0,13,25},{17,0,72},{34,0,72},{0,13,25},{0,11,72},{34,0,72},{0,11,72},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{2,54,77},{2,37,5},{3,25,18},
+{1,23,13},{0,43,243},{0,27,99},{0,23,24},{0,16,139},{0,18,276},{0,16,164},{3,52,72},{3,36,0},{4,26,6},{2,24,11},{21,0,243},{0,27,99},{0,23,24},{0,16,139},{43,0,243},{0,16,139},{2,36,5},{2,36,5},{2,36,5},{1,19,5},{0,25,50},{0,18,5},{0,18,5},{0,10,18},{0,12,59},{0,10,27},{3,34,0},{3,34,0},{3,34,0},{3,18,0},{12,0,50},
+{0,18,5},{0,18,5},{0,10,18},{25,0,50},{0,10,18},{29,0,72},{3,36,0},{8,25,0},{0,23,8},{29,0,72},{58,0,72},{0,23,8},{0,19,72},{58,0,72},{0,19,72},{1,0,5},{1,0,5},{1,0,5},{1,0,5},{0,10,0},{0,10,0},{0,10,0},{0,5,0},{0,3,1},{0,3,1},{6,70,133},{6,46,65},{7,35,94},{5,33,65},{0,67,243},{0,39,75},{0,32,2},
+{0,25,105},{0,30,332},{0,24,164},{11,60,72},{11,44,0},{12,34,6},{10,32,11},{33,0,243},{0,39,75},{1,32,1},{0,25,105},{67,0,243},{0,25,105},{6,52,61},{6,52,61},{6,52,61},{5,29,61},{0,49,50},{0,32,1},{0,32,1},{0,19,5},{0,21,94},{0,18,35},{11,42,0},{11,42,0},{11,42,0},{11,26,0},{24,0,50},{3,30,0},{3,30,0},{0,19,5},{49,0,50},
+{0,19,5},{41,0,72},{11,44,0},{16,33,0},{0,32,1},{41,0,72},{82,0,72},{0,32,1},{0,27,72},{82,0,72},{0,27,72},{5,0,61},{5,0,61},{5,0,61},{5,0,61},{0,34,0},{0,34,0},{0,34,0},{0,17,0},{0,15,17},{0,15,17},{13,80,144},{13,56,77},{15,43,109},{12,40,76},{7,77,243},{7,49,73},{7,41,1},{5,33,100},{0,42,287},{0,33,98},{19,68,72},
+{19,52,0},{20,42,6},{18,40,11},{45,0,243},{4,50,72},{9,40,1},{0,34,83},{92,0,243},{0,34,83},{13,62,72},{13,62,72},{13,62,72},{13,37,72},{7,59,50},{7,41,1},{7,41,1},{6,28,3},{0,36,66},{0,29,4},{19,50,0},{19,50,0},{19,50,0},{19,34,0},{36,0,50},{11,38,0},{11,38,0},{0,29,0},{73,0,50},{0,29,0},{52,0,72},{18,52,0},{23,41,0},
+{5,41,0},{52,0,72},{107,0,72},{5,41,0},{0,35,72},{107,0,72},{0,35,72},{13,0,72},{13,0,72},{13,0,72},{13,0,72},{7,44,0},{7,44,0},{7,44,0},{7,25,0},{0,27,2},{0,27,2},{22,89,144},{22,65,77},{24,52,109},{21,49,76},{16,86,243},{16,58,73},{16,50,1},{14,42,100},{0,57,248},{3,43,75},{28,77,72},{28,61,0},{29,50,5},{27,49,11},{58,0,243},
+{13,59,72},{18,49,1},{0,44,73},{119,0,243},{0,44,73},{22,71,72},{22,71,72},{22,71,72},{22,46,72},{16,68,50},{16,50,1},{16,50,1},{15,37,3},{0,49,50},{6,38,1},{28,59,0},{28,59,0},{28,59,0},{28,43,0},{49,0,50},{20,47,0},{20,47,0},{8,38,0},{101,0,50},{8,38,0},{66,0,72},{27,61,0},{32,50,0},{13,50,0},{66,0,72},{134,0,72},{13,50,0},
+{0,44,72},{134,0,72},{0,44,72},{22,0,72},{22,0,72},{22,0,72},{22,0,72},{16,53,0},{16,53,0},{16,53,0},{16,34,0},{6,39,0},{6,39,0},{30,97,144},{30,73,77},{32,59,106},{30,56,77},{24,94,243},{24,66,73},{24,58,2},{22,50,100},{2,69,243},{10,51,76},{36,85,72},{36,67,1},{37,58,5},{35,57,11},{70,0,243},{21,67,72},{26,58,1},{3,52,72},{143,0,243},
+{3,52,72},{30,79,72},{30,79,72},{30,79,72},{30,54,72},{24,76,50},{25,56,2},{25,56,2},{23,45,3},{8,57,50},{14,46,1},{36,66,0},{36,66,0},{36,66,0},{36,51,0},{61,0,50},{27,56,0},{27,56,0},{16,46,0},{125,0,50},{16,46,0},{78,0,72},{35,69,0},{40,58,0},{21,58,0},{78,0,72},{158,0,72},{21,58,0},{0,52,72},{158,0,72},{0,52,72},{30,0,72},
+{30,0,72},{30,0,72},{30,0,72},{24,61,0},{24,61,0},{24,61,0},{24,42,0},{14,47,0},{14,47,0},{38,105,144},{38,81,77},{40,67,106},{38,64,77},{32,102,243},{32,74,73},{32,66,2},{30,59,103},{10,77,243},{18,59,76},{44,93,72},{44,75,1},{45,66,5},{44,63,13},{82,0,243},{29,75,72},{33,66,1},{11,60,72},{167,0,243},{11,60,72},{38,87,72},{38,87,72},{38,87,72},
+{38,62,72},{32,84,50},{33,64,2},{33,64,2},{31,53,3},{16,65,50},{22,54,1},{44,74,0},{44,74,0},{44,74,0},{44,59,0},{73,0,50},{35,63,0},{35,63,0},{23,54,0},{149,0,50},{23,54,0},{89,0,72},{43,77,0},{48,66,0},{29,66,0},{89,0,72},{183,0,72},{29,66,0},{0,60,72},{183,0,72},{0,60,72},{38,0,72},{38,0,72},{38,0,72},{38,0,72},{32,69,0},
+{32,69,0},{32,69,0},{32,50,0},{21,55,0},{21,55,0},{46,113,144},{46,88,76},{48,75,106},{46,72,77},{40,110,243},{40,82,73},{40,73,2},{38,67,103},{18,85,243},{26,67,76},{52,100,72},{52,83,1},{53,74,5},{52,72,13},{94,0,243},{37,83,72},{41,74,1},{19,68,72},{192,0,243},{19,68,72},{46,95,72},{46,95,72},{46,95,72},{46,70,72},{40,92,50},{40,73,1},{40,73,1},
+{39,61,3},{24,73,50},{30,62,1},{52,82,0},{52,82,0},{52,82,0},{52,67,0},{85,0,50},{43,71,0},{43,71,0},{31,62,0},{174,0,50},{31,62,0},{101,0,72},{51,85,0},{56,74,0},{37,74,0},{101,0,72},{207,0,72},{37,74,0},{0,68,72},{207,0,72},{0,68,72},{46,0,72},{46,0,72},{46,0,72},{46,0,72},{40,76,0},{40,76,0},{40,76,0},{40,58,0},{29,63,0},
+{29,63,0},{55,122,144},{55,97,76},{57,84,106},{55,81,77},{49,119,243},{48,91,74},{49,82,2},{47,76,103},{27,94,243},{35,76,76},{61,109,72},{61,92,1},{62,83,5},{61,81,13},{107,0,243},{46,92,72},{50,83,1},{28,77,72},{219,0,243},{28,77,72},{55,103,72},{55,103,72},{55,103,72},{55,79,72},{49,101,50},{49,82,1},{49,82,1},{49,69,5},{33,82,50},{38,71,1},{61,91,0},
+{61,91,0},{61,91,0},{61,76,0},{98,0,50},{52,80,0},{52,80,0},{40,71,0},{201,0,50},{40,71,0},{115,0,72},{60,94,0},{65,83,0},{46,83,0},{115,0,72},{234,0,72},{46,83,0},{0,77,72},{234,0,72},{0,77,72},{55,0,72},{55,0,72},{55,0,72},{55,0,72},{49,85,0},{49,85,0},{49,85,0},{49,67,0},{38,72,0},{38,72,0},{63,130,144},{63,105,76},{65,92,106},
+{63,89,77},{57,127,243},{56,99,74},{57,90,2},{55,84,103},{35,102,243},{42,84,76},{69,117,72},{69,100,1},{70,91,5},{69,89,13},{119,0,243},{54,100,72},{58,91,1},{36,85,72},{243,0,243},{36,85,72},{63,111,72},{63,111,72},{63,111,72},{63,87,72},{57,108,50},{57,90,1},{57,90,1},{57,77,5},{41,90,50},{46,79,1},{69,99,0},{69,99,0},{69,99,0},{69,84,0},{110,0,50},
+{60,88,0},{60,88,0},{48,79,0},{225,0,50},{48,79,0},{127,0,72},{68,102,0},{73,91,0},{54,91,0},{127,0,72},{254,2,72},{54,91,0},{0,85,72},{254,2,72},{0,85,72},{63,0,72},{63,0,72},{63,0,72},{63,0,72},{57,93,0},{57,93,0},{57,93,0},{57,75,0},{46,80,0},{46,80,0},{71,137,144},{71,113,76},{73,100,106},{71,97,77},{65,135,243},{64,107,74},{65,98,2},
+{63,92,103},{44,109,243},{50,92,76},{77,125,72},{77,108,1},{78,99,5},{77,97,13},{131,0,243},{62,108,72},{66,99,1},{44,93,72},{255,6,243},{44,93,72},{71,119,72},{71,119,72},{71,119,72},{71,95,72},{65,116,50},{65,98,1},{65,98,1},{65,85,5},{49,98,50},{54,87,1},{77,107,0},{77,107,0},{77,107,0},{77,92,0},{122,0,50},{68,96,0},{68,96,0},{56,87,0},{249,0,50},
+{56,87,0},{138,0,72},{76,110,0},{81,99,0},{62,99,0},{138,0,72},{254,14,72},{62,99,0},{0,93,72},{254,14,72},{0,93,72},{71,0,72},{71,0,72},{71,0,72},{71,0,72},{65,101,0},{65,101,0},{65,101,0},{65,83,0},{54,88,0},{54,88,0},{79,145,144},{79,121,76},{81,108,106},{79,105,77},{73,142,243},{72,115,74},{73,106,2},{71,100,103},{52,117,243},{58,100,76},{85,133,72},
+{85,116,1},{86,107,5},{85,105,13},{143,0,243},{70,116,72},{74,107,1},{52,101,72},{255,18,243},{52,101,72},{79,127,72},{79,127,72},{79,127,72},{79,103,72},{73,124,50},{73,106,1},{73,106,1},{73,93,5},{57,106,50},{62,95,1},{85,115,0},{85,115,0},{85,115,0},{85,100,0},{134,0,50},{76,104,0},{76,104,0},{64,95,0},{255,9,50},{64,95,0},{150,0,72},{84,118,0},{89,107,0},
+{70,107,0},{150,0,72},{254,26,72},{70,107,0},{0,101,72},{254,26,72},{0,101,72},{79,0,72},{79,0,72},{79,0,72},{79,0,72},{73,109,0},{73,109,0},{73,109,0},{73,91,0},{62,96,0},{62,96,0},{88,154,144},{88,130,76},{90,117,106},{88,114,77},{82,151,243},{81,124,74},{82,115,2},{80,109,103},{61,126,243},{67,109,76},{94,142,72},{94,125,1},{95,116,5},{94,114,13},{156,0,243},
+{79,125,72},{83,116,1},{61,110,72},{254,32,243},{61,110,72},{88,136,72},{88,136,72},{88,136,72},{88,112,72},{82,133,50},{82,115,1},{82,115,1},{82,102,5},{66,115,50},{71,104,1},{94,124,0},{94,124,0},{94,124,0},{94,109,0},{147,0,50},{85,113,0},{85,113,0},{73,104,0},{254,23,50},{73,104,0},{164,0,72},{93,127,0},{98,116,0},{79,116,0},{164,0,72},{255,39,72},{79,116,0},
+{0,110,72},{255,39,72},{0,110,72},{88,0,72},{88,0,72},{88,0,72},{88,0,72},{82,118,0},{82,118,0},{82,118,0},{82,100,0},{71,105,0},{71,105,0},{96,162,144},{96,138,76},{98,125,106},{96,122,77},{90,159,243},{90,131,73},{90,123,2},{88,117,103},{69,134,243},{75,117,76},{102,150,72},{102,133,1},{103,124,5},{102,122,13},{168,0,243},{88,132,72},{91,124,1},{69,118,72},{255,43,243},
+{69,118,72},{96,144,72},{96,144,72},{96,144,72},{96,120,72},{90,141,50},{90,123,1},{90,123,1},{90,110,5},{74,123,50},{79,112,1},{102,132,0},{102,132,0},{102,132,0},{102,117,0},{159,0,50},{93,121,0},{93,121,0},{81,112,0},{254,35,50},{81,112,0},{175,0,72},{101,135,0},{106,124,0},{87,124,0},{175,0,72},{254,51,72},{87,124,0},{0,118,72},{254,51,72},{0,118,72},{96,0,72},
+{96,0,72},{96,0,72},{96,0,72},{90,126,0},{90,126,0},{90,126,0},{90,108,0},{79,113,0},{79,113,0},{104,170,144},{104,146,76},{106,133,106},{104,130,77},{98,167,243},{98,139,73},{98,131,2},{96,125,103},{77,142,243},{83,125,76},{110,158,72},{110,142,0},{111,132,5},{110,130,13},{180,0,243},{96,140,72},{99,132,1},{76,126,72},{255,55,243},{76,126,72},{104,152,72},{104,152,72},{104,152,72},
+{104,128,72},{98,149,50},{98,131,1},{98,131,1},{98,118,5},{83,130,50},{87,120,1},{110,140,0},{110,140,0},{110,140,0},{110,125,0},{171,0,50},{101,129,0},{101,129,0},{89,120,0},{255,46,50},{89,120,0},{187,0,72},{110,142,0},{114,132,0},{94,132,0},{187,0,72},{254,63,72},{94,132,0},{0,126,72},{254,63,72},{0,126,72},{104,0,72},{104,0,72},{104,0,72},{104,0,72},{98,134,0},
+{98,134,0},{98,134,0},{98,116,0},{87,121,0},{87,121,0},{112,178,144},{112,154,76},{114,140,105},{112,138,77},{106,175,243},{106,147,73},{106,139,2},{104,133,103},{85,150,243},{91,133,76},{118,166,72},{118,149,1},{119,140,5},{118,138,13},{192,0,243},{104,148,72},{107,140,1},{84,134,72},{255,67,243},{84,134,72},{112,160,72},{112,160,72},{112,160,72},{112,136,72},{106,157,50},{106,139,1},{106,139,1},
+{106,126,5},{91,138,50},{95,128,1},{118,147,0},{118,147,0},{118,147,0},{118,133,0},{183,0,50},{109,137,0},{109,137,0},{97,128,0},{255,58,50},{97,128,0},{199,0,72},{117,151,0},{122,140,0},{102,140,0},{199,0,72},{254,75,72},{102,140,0},{0,134,72},{254,75,72},{0,134,72},{112,0,72},{112,0,72},{112,0,72},{112,0,72},{106,142,0},{106,142,0},{106,142,0},{106,124,0},{95,129,0},
+{95,129,0},{121,187,144},{121,163,76},{122,151,103},{121,148,77},{115,184,243},{115,156,73},{115,148,2},{112,141,105},{94,159,243},{100,142,76},{127,175,72},{127,158,1},{128,149,5},{127,147,13},{205,0,243},{113,157,72},{116,149,1},{93,143,72},{254,81,243},{93,143,72},{121,169,72},{121,169,72},{121,169,72},{121,145,72},{115,166,50},{115,148,1},{115,148,1},{115,135,5},{100,147,50},{104,137,1},{127,156,0},
+{127,156,0},{127,156,0},{127,142,0},{196,0,50},{117,146,0},{117,146,0},{106,137,0},{254,72,50},{106,137,0},{213,0,72},{125,160,0},{131,149,0},{111,149,0},{213,0,72},{254,88,72},{111,149,0},{0,143,72},{254,88,72},{0,143,72},{121,0,72},{121,0,72},{121,0,72},{121,0,72},{115,151,0},{115,151,0},{115,151,0},{115,133,0},{104,138,0},{104,138,0},{129,195,144},{129,170,76},{130,159,103},
+{129,156,77},{123,192,243},{123,164,73},{123,156,2},{122,149,106},{102,167,243},{108,150,76},{135,182,72},{135,166,1},{136,157,5},{134,155,14},{217,0,243},{121,165,72},{124,157,1},{101,151,72},{255,92,243},{101,151,72},{129,177,72},{129,177,72},{129,177,72},{129,152,72},{123,174,50},{123,155,1},{123,155,1},{123,143,5},{108,155,50},{113,145,0},{135,164,0},{135,164,0},{135,164,0},{135,150,0},{208,0,50},
+{125,154,0},{125,154,0},{113,145,0},{254,84,50},{113,145,0},{224,0,72},{133,168,0},{139,157,0},{119,157,0},{224,0,72},{254,100,72},{119,157,0},{0,151,72},{254,100,72},{0,151,72},{129,0,72},{129,0,72},{129,0,72},{129,0,72},{123,158,0},{123,158,0},{123,158,0},{123,141,0},{113,145,0},{113,145,0},{137,203,144},{137,178,76},{138,167,103},{137,164,77},{131,200,243},{130,173,74},{131,164,2},
+{130,157,106},{110,175,243},{116,158,76},{143,190,72},{143,174,1},{144,165,5},{142,163,14},{229,0,243},{129,173,72},{132,165,1},{109,159,72},{255,104,243},{109,159,72},{137,184,72},{137,184,72},{137,184,72},{137,160,72},{131,182,50},{131,163,1},{131,163,1},{131,152,5},{116,163,50},{120,153,1},{143,172,0},{143,172,0},{143,172,0},{143,157,0},{220,0,50},{133,162,0},{133,162,0},{121,153,0},{255,95,50},
+{121,153,0},{236,0,72},{141,176,0},{147,165,0},{127,165,0},{236,0,72},{254,112,72},{127,165,0},{0,159,72},{254,112,72},{0,159,72},{137,0,72},{137,0,72},{137,0,72},{137,0,72},{131,166,0},{131,166,0},{131,166,0},{131,149,0},{120,154,0},{120,154,0},{145,211,144},{145,186,76},{146,175,103},{145,172,77},{139,208,243},{138,181,74},{139,172,2},{138,165,106},{118,183,243},{124,166,76},{151,198,72},
+{151,182,1},{152,173,5},{150,171,14},{241,0,243},{135,182,72},{140,173,1},{117,167,72},{255,116,243},{117,167,72},{145,192,72},{145,192,72},{145,192,72},{145,168,72},{139,189,50},{139,171,1},{139,171,1},{139,160,5},{124,171,50},{128,161,1},{151,180,0},{151,180,0},{151,180,0},{151,165,0},{232,0,50},{141,170,0},{141,170,0},{129,161,0},{255,107,50},{129,161,0},{248,0,72},{149,184,0},{155,173,0},
+{135,173,0},{248,0,72},{254,124,72},{135,173,0},{0,167,72},{254,124,72},{0,167,72},{145,0,72},{145,0,72},{145,0,72},{145,0,72},{139,174,0},{139,174,0},{139,174,0},{139,156,0},{127,162,0},{127,162,0},{154,219,144},{154,195,76},{155,184,103},{154,181,77},{148,217,243},{147,190,74},{148,181,2},{147,174,106},{126,192,243},{133,175,76},{160,207,72},{160,191,1},{161,182,5},{159,180,14},{254,0,243},
+{144,191,72},{149,182,1},{126,176,72},{255,129,243},{126,176,72},{154,201,72},{154,201,72},{154,201,72},{154,177,72},{148,198,50},{148,180,1},{148,180,1},{148,169,5},{132,181,50},{137,170,1},{160,189,0},{160,189,0},{160,189,0},{160,174,0},{245,0,50},{150,179,0},{150,179,0},{138,170,0},{254,121,50},{138,170,0},{255,13,72},{158,193,0},{164,182,0},{144,182,0},{255,13,72},{254,137,72},{144,182,0},
+{0,176,72},{254,137,72},{0,176,72},{154,0,72},{154,0,72},{154,0,72},{154,0,72},{148,183,0},{148,183,0},{148,183,0},{148,165,0},{136,171,0},{136,171,0},{162,227,144},{162,203,76},{163,192,103},{162,189,77},{156,224,243},{155,198,74},{156,189,2},{155,182,106},{134,200,243},{141,183,76},{168,215,72},{168,199,1},{169,190,5},{167,188,14},{255,22,243},{152,199,72},{157,190,1},{134,184,72},{255,141,243},
+{134,184,72},{162,209,72},{162,209,72},{162,209,72},{162,185,72},{156,206,50},{156,188,1},{156,188,1},{156,177,5},{139,189,50},{145,178,1},{168,197,0},{168,197,0},{168,197,0},{168,182,0},{255,4,50},{158,187,0},{158,187,0},{146,178,0},{255,132,50},{146,178,0},{255,37,72},{166,201,0},{172,190,0},{152,190,0},{255,37,72},{254,149,72},{152,190,0},{0,184,72},{254,149,72},{0,184,72},{162,0,72},
+{162,0,72},{162,0,72},{162,0,72},{156,191,0},{156,191,0},{156,191,0},{156,173,0},{144,179,0},{144,179,0},{170,235,144},{170,211,76},{171,200,103},{170,197,77},{164,232,243},{163,206,74},{164,197,2},{163,190,106},{142,208,243},{149,191,76},{176,223,72},{176,207,1},{177,198,5},{175,196,14},{255,46,243},{160,207,72},{165,198,1},{142,192,72},{255,153,243},{142,192,72},{170,217,72},{170,217,72},{170,217,72},
+{170,193,72},{164,214,50},{164,196,1},{164,196,1},{164,185,5},{147,197,50},{153,186,1},{176,205,0},{176,205,0},{176,205,0},{176,190,0},{255,28,50},{166,195,0},{166,195,0},{154,186,0},{255,144,50},{154,186,0},{255,61,72},{174,209,0},{180,198,0},{160,198,0},{255,61,72},{254,161,72},{160,198,0},{0,192,72},{254,161,72},{0,192,72},{170,0,72},{170,0,72},{170,0,72},{170,0,72},{164,199,0},
+{164,199,0},{164,199,0},{164,181,0},{152,187,0},{152,187,0},{178,243,144},{178,219,76},{179,208,103},{178,205,77},{172,240,243},{171,214,74},{172,205,2},{171,198,106},{150,216,243},{157,199,76},{184,231,72},{184,215,1},{185,206,5},{183,204,14},{255,70,243},{169,214,72},{173,206,1},{150,200,72},{255,165,243},{150,200,72},{178,225,72},{178,225,72},{178,225,72},{178,201,72},{172,222,50},{172,204,1},{172,204,1},
+{172,193,5},{155,205,50},{161,194,1},{184,213,0},{184,213,0},{184,213,0},{184,198,0},{255,52,50},{174,203,0},{174,203,0},{162,194,0},{255,156,50},{162,194,0},{255,86,72},{182,217,0},{188,206,0},{168,206,0},{255,86,72},{255,172,72},{168,206,0},{0,200,72},{255,172,72},{0,200,72},{178,0,72},{178,0,72},{178,0,72},{178,0,72},{172,207,0},{172,207,0},{172,207,0},{172,189,0},{160,195,0},
+{160,195,0},{187,252,144},{187,228,76},{188,217,103},{187,214,77},{181,249,243},{180,222,74},{181,214,2},{180,207,106},{159,225,243},{166,208,76},{193,240,72},{193,224,1},{194,216,3},{192,213,14},{255,98,243},{178,223,72},{182,215,1},{158,209,72},{255,178,243},{158,209,72},{187,234,72},{187,234,72},{187,234,72},{187,210,72},{181,231,50},{181,213,1},{181,213,1},{181,202,5},{165,213,50},{170,203,1},{193,222,0},
+{193,222,0},{193,222,0},{193,207,0},{255,79,50},{183,212,0},{183,212,0},{171,203,0},{253,170,50},{171,203,0},{255,113,72},{192,225,0},{196,215,0},{177,215,0},{255,113,72},{254,186,72},{177,215,0},{0,209,72},{254,186,72},{0,209,72},{187,0,72},{187,0,72},{187,0,72},{187,0,72},{181,216,0},{181,216,0},{181,216,0},{181,198,0},{169,204,0},{169,204,0},{196,255,148},{195,236,76},{196,225,103},
+{195,222,77},{189,254,244},{188,230,74},{189,222,2},{188,215,106},{167,233,243},{173,217,77},{201,248,72},{201,231,1},{202,224,3},{200,221,14},{255,122,243},{186,231,72},{189,222,2},{166,217,72},{255,190,243},{166,217,72},{195,242,72},{195,242,72},{195,242,72},{195,218,72},{189,239,50},{189,221,1},{189,221,1},{189,210,5},{173,221,50},{178,211,1},{201,229,0},{201,229,0},{201,229,0},{201,215,0},{255,104,50},
+{191,220,0},{191,220,0},{179,211,0},{255,181,50},{179,211,0},{255,137,72},{200,233,0},{204,223,0},{184,223,0},{255,137,72},{254,198,72},{184,223,0},{0,217,72},{254,198,72},{0,217,72},{195,0,72},{195,0,72},{195,0,72},{195,0,72},{189,224,0},{189,224,0},{189,224,0},{189,206,0},{177,212,0},{177,212,0},{205,255,170},{203,244,76},{204,232,100},{202,230,79},{199,255,255},{196,238,74},{197,231,2},
+{196,223,106},{175,241,243},{181,225,77},{210,254,73},{209,239,1},{210,232,3},{208,229,14},{255,146,243},{194,239,72},{198,230,2},{174,225,72},{255,202,243},{174,225,72},{203,250,72},{203,250,72},{203,250,72},{203,226,72},{197,247,50},{197,229,1},{197,229,1},{197,218,5},{181,229,50},{186,219,1},{209,237,0},{209,237,0},{209,237,0},{209,223,0},{255,128,50},{199,228,0},{199,228,0},{187,219,0},{255,193,50},
+{187,219,0},{255,161,72},{208,241,0},{212,231,0},{192,231,0},{255,161,72},{254,210,72},{192,231,0},{0,225,72},{254,210,72},{0,225,72},{203,0,72},{203,0,72},{203,0,72},{203,0,72},{197,232,0},{197,232,0},{197,232,0},{197,214,0},{185,220,0},{185,220,0},{215,255,208},{212,252,75},{212,240,100},{210,238,79},{211,255,287},{204,246,74},{205,239,1},{203,231,109},{183,249,243},{189,233,77},{218,255,81},
+{217,247,1},{218,240,3},{215,237,17},{255,171,243},{202,247,72},{205,239,1},{182,233,72},{254,214,243},{182,233,72},{211,255,73},{211,255,73},{211,255,73},{211,234,72},{205,255,50},{205,237,1},{205,237,1},{205,226,5},{189,237,50},{194,227,0},{217,245,0},{217,245,0},{217,245,0},{217,231,0},{255,152,50},{208,235,0},{208,235,0},{194,227,0},{255,205,50},{194,227,0},{255,186,72},{216,249,0},{220,239,0},
+{200,239,0},{255,186,72},{255,221,72},{200,239,0},{0,233,72},{255,221,72},{0,233,72},{211,0,72},{211,0,72},{211,0,72},{211,0,72},{205,239,0},{205,239,0},{205,239,0},{205,222,0},{194,227,0},{194,227,0},{224,255,274},{222,255,98},{221,249,100},{219,247,79},{221,255,332},{213,255,74},{214,248,1},{212,240,109},{195,255,245},{198,242,77},{230,255,106},{226,255,4},{227,249,3},{224,246,17},{255,198,243},
+{213,255,73},{214,248,1},{191,242,72},{255,227,243},{191,242,72},{221,255,83},{221,255,83},{221,255,83},{220,243,72},{215,255,59},{214,246,1},{214,246,1},{213,234,6},{198,246,50},{203,236,0},{226,254,0},{226,254,0},{226,254,0},{226,240,0},{255,180,50},{217,244,0},{217,244,0},{203,236,0},{255,218,50},{203,236,0},{255,213,72},{228,255,2},{229,248,0},{209,248,0},{255,213,72},{254,235,72},{209,248,0},
+{0,242,72},{254,235,72},{0,242,72},{220,0,72},{220,0,72},{220,0,72},{220,0,72},{214,248,0},{214,248,0},{214,248,0},{214,231,0},{203,236,0},{203,236,0},{233,255,327},{231,255,164},{230,255,105},{228,253,77},{230,255,370},{225,255,95},{222,254,2},{220,248,94},{213,255,262},{208,249,65},{239,255,126},{237,255,35},{235,255,5},{234,253,13},{255,219,221},{228,255,82},{223,254,1},{201,249,61},{254,238,221},
+{201,249,61},{230,255,105},{230,255,105},{230,255,105},{228,251,72},{227,255,83},{222,254,1},{222,254,1},{221,243,6},{206,254,50},{211,244,0},{235,254,5},{235,254,5},{235,254,5},{234,248,0},{255,204,50},{225,252,0},{225,252,0},{211,244,0},{255,230,50},{211,244,0},{255,234,61},{240,255,17},{238,255,0},{219,255,0},{255,234,61},{255,245,61},{219,255,0},{0,249,61},{255,245,61},{0,249,61},{228,0,72},
+{228,0,72},{228,0,72},{228,0,72},{222,253,1},{222,253,1},{222,253,1},{222,239,0},{211,244,0},{211,244,0},{242,255,233},{239,255,164},{239,255,139},{237,255,78},{239,255,239},{234,255,62},{232,255,24},{230,252,18},{225,255,158},{218,253,5},{248,255,53},{245,255,27},{245,255,18},{242,255,1},{255,237,93},{240,255,26},{237,255,5},{217,253,5},{254,247,93},{217,253,5},{239,255,139},{239,255,139},{239,255,139},
+{237,255,78},{236,255,118},{232,255,24},{232,255,24},{229,251,6},{222,255,67},{219,252,0},{245,255,18},{245,255,18},{245,255,18},{242,254,1},{255,228,50},{237,255,5},{237,255,5},{219,252,0},{255,242,50},{219,252,0},{255,246,5},{251,254,1},{250,255,0},{243,255,0},{255,246,5},{255,251,5},{243,255,0},{0,253,5},{255,251,5},{0,253,5},{236,0,72},{236,0,72},{236,0,72},{236,0,72},{232,254,8},
+{232,254,8},{232,254,8},{230,247,0},{219,252,0},{219,252,0},{248,255,136},{245,255,119},{245,255,110},{244,255,81},{245,255,122},{243,255,52},{241,255,41},{238,255,0},{237,255,75},{231,255,5},{251,255,9},{251,255,6},{251,255,5},{250,255,1},{255,246,17},{249,255,3},{249,255,2},{237,255,0},{255,251,17},{237,255,0},{245,255,110},{245,255,110},{245,255,110},{244,255,81},{245,255,86},{241,255,41},{241,255,41},
+{238,255,0},{234,255,46},{231,255,5},{251,255,5},{251,255,5},{251,255,5},{250,255,1},{255,243,13},{249,255,2},{249,255,2},{237,255,0},{254,250,13},{237,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{244,0,72},{244,0,72},{244,0,72},{244,0,72},{241,254,25},{241,254,25},{241,254,25},{238,255,0},{231,255,5},
+{231,255,5},{0,58,200},{0,42,17},{0,29,0},{0,25,65},{0,40,441},{0,27,266},{0,23,121},{0,16,318},{0,18,467},{0,16,343},{0,58,200},{0,42,17},{0,29,0},{0,25,65},{20,0,441},{0,27,266},{0,23,121},{0,16,318},{40,0,441},{0,16,318},{0,28,0},{0,28,0},{0,28,0},{0,14,0},{0,13,41},{0,12,10},{0,12,10},{0,7,20},{0,6,42},{0,7,24},{0,28,0},
+{0,28,0},{0,28,0},{0,14,0},{6,0,41},{0,12,10},{0,12,10},{0,7,20},{13,0,41},{0,7,20},{29,0,200},{0,42,17},{0,29,0},{0,25,65},{29,0,200},{58,0,200},{0,25,65},{0,19,200},{58,0,200},{0,19,200},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,82,200},{0,54,1},{1,38,19},
+{0,34,34},{0,55,686},{0,36,339},{0,30,139},{0,22,446},{0,24,747},{0,22,495},{0,82,200},{0,54,1},{2,39,17},{0,34,34},{27,0,686},{0,36,339},{0,30,139},{0,22,446},{55,0,686},{0,22,446},{0,52,0},{0,52,0},{0,52,0},{0,26,0},{0,25,145},{0,21,45},{0,21,45},{0,13,80},{0,12,154},{0,10,94},{0,52,0},{0,52,0},{0,52,0},{0,26,0},{12,0,145},
+{0,21,45},{0,21,45},{0,13,80},{25,0,145},{0,13,80},{41,0,200},{0,54,1},{8,37,0},{0,34,34},{41,0,200},{82,0,200},{0,34,34},{0,27,200},{82,0,200},{0,27,200},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{4,99,225},{4,65,26},{6,46,74},{3,43,45},{0,79,723},{0,48,282},{0,42,54},
+{0,31,401},{0,36,852},{0,29,497},{7,92,200},{7,64,0},{9,46,21},{5,42,29},{39,0,723},{0,48,282},{0,42,54},{0,31,401},{79,0,723},{0,31,401},{4,68,25},{4,68,25},{4,68,25},{3,36,25},{0,49,162},{0,36,17},{0,36,17},{0,22,58},{0,21,206},{0,19,97},{7,62,0},{7,62,0},{7,62,0},{7,34,0},{24,0,162},{0,36,17},{0,36,17},{0,22,58},{49,0,162},
+{0,22,58},{52,0,200},{6,64,0},{16,45,0},{0,44,17},{52,0,200},{107,0,200},{0,44,17},{0,35,200},{107,0,200},{0,35,200},{3,0,25},{3,0,25},{3,0,25},{3,0,25},{0,22,0},{0,22,0},{0,22,0},{0,11,0},{0,9,5},{0,9,5},{8,115,313},{8,78,121},{10,56,198},{7,51,126},{0,104,723},{0,63,227},{0,51,6},{0,40,339},{0,48,956},{0,38,494},{15,100,200},
+{15,72,0},{17,54,21},{15,49,32},{51,0,723},{0,63,227},{0,51,6},{0,40,339},{104,0,723},{0,40,339},{8,84,113},{8,84,113},{8,84,113},{7,46,113},{0,73,162},{0,48,1},{0,48,1},{0,31,29},{0,33,270},{0,27,109},{15,70,0},{15,70,0},{15,70,0},{15,42,0},{36,0,162},{0,48,1},{0,48,1},{0,31,29},{73,0,162},{0,31,29},{64,0,200},{14,72,0},{24,53,0},
+{0,53,4},{64,0,200},{131,0,200},{0,53,4},{0,43,200},{131,0,200},{0,43,200},{7,0,113},{7,0,113},{7,0,113},{7,0,113},{0,46,0},{0,46,0},{0,46,0},{0,23,0},{0,18,34},{0,18,34},{14,130,400},{15,88,215},{17,66,315},{13,60,210},{4,123,723},{3,75,207},{4,62,2},{2,49,303},{0,60,969},{0,48,417},{24,109,200},{24,81,0},{26,63,21},{23,58,33},{64,0,723},
+{0,78,201},{6,62,1},{0,50,289},{131,0,723},{0,50,289},{14,99,200},{14,99,200},{14,99,200},{14,56,200},{4,92,162},{5,60,2},{5,60,2},{3,40,14},{0,45,280},{0,39,77},{24,79,0},{24,79,0},{24,79,0},{24,51,0},{49,0,162},{7,59,0},{7,59,0},{0,40,8},{101,0,162},{0,40,8},{78,0,200},{23,81,0},{32,62,0},{1,62,0},{78,0,200},{158,0,200},{1,62,0},
+{0,52,200},{158,0,200},{0,52,200},{14,0,200},{14,0,200},{14,0,200},{14,0,200},{4,65,0},{4,65,0},{4,65,0},{4,34,0},{0,33,40},{0,33,40},{22,138,400},{23,96,215},{25,74,315},{21,68,210},{12,131,723},{11,83,207},{12,70,2},{10,57,303},{0,72,865},{0,57,290},{32,117,200},{32,89,0},{34,71,21},{31,66,33},{76,0,723},{5,87,200},{14,70,1},{0,59,251},{155,0,723},
+{0,59,251},{22,107,200},{22,107,200},{22,107,200},{22,64,200},{12,100,162},{13,68,2},{13,68,2},{10,47,17},{0,60,213},{0,48,13},{32,87,0},{32,87,0},{32,87,0},{32,59,0},{61,0,162},{15,67,0},{15,67,0},{0,50,1},{125,0,162},{0,50,1},{89,0,200},{31,89,0},{40,70,0},{9,70,0},{89,0,200},{183,0,200},{9,70,0},{0,60,200},{183,0,200},{0,60,200},{22,0,200},
+{22,0,200},{22,0,200},{22,0,200},{12,73,0},{12,73,0},{12,73,0},{12,42,0},{0,45,8},{0,45,8},{30,145,400},{31,104,215},{33,82,315},{29,76,210},{20,139,723},{19,91,207},{20,78,2},{18,65,303},{0,88,787},{0,66,225},{40,125,200},{40,97,0},{43,79,19},{39,74,33},{88,0,723},{14,94,200},{22,78,1},{0,66,224},{180,0,723},{0,66,224},{30,115,200},{30,115,200},{30,115,200},
+{30,72,200},{20,108,162},{21,76,2},{21,76,2},{18,56,17},{0,72,173},{2,58,1},{40,95,0},{40,95,0},{40,95,0},{40,67,0},{73,0,162},{23,75,0},{23,75,0},{3,58,0},{149,0,162},{3,58,0},{101,0,200},{39,97,0},{48,78,0},{17,78,0},{101,0,200},{207,0,200},{17,78,0},{0,68,200},{207,0,200},{0,68,200},{30,0,200},{30,0,200},{30,0,200},{30,0,200},{20,81,0},
+{20,81,0},{20,81,0},{20,50,0},{1,59,0},{1,59,0},{38,153,400},{39,112,215},{41,90,315},{37,84,210},{28,147,723},{27,99,207},{28,86,2},{26,73,303},{0,100,739},{4,75,212},{48,133,200},{48,105,0},{51,87,19},{47,82,33},{100,0,723},{22,102,200},{30,86,1},{0,75,206},{204,0,723},{0,75,206},{38,123,200},{38,123,200},{38,123,200},{38,80,200},{28,116,162},{29,84,2},{29,84,2},
+{26,64,17},{0,85,162},{10,66,1},{48,103,0},{48,103,0},{48,103,0},{48,75,0},{85,0,162},{31,83,0},{31,83,0},{11,66,0},{174,0,162},{11,66,0},{113,0,200},{47,105,0},{56,86,0},{25,86,0},{113,0,200},{231,0,200},{25,86,0},{0,76,200},{231,0,200},{0,76,200},{38,0,200},{38,0,200},{38,0,200},{38,0,200},{28,89,0},{28,89,0},{28,89,0},{28,58,0},{9,67,0},
+{9,67,0},{47,162,400},{48,121,215},{50,98,308},{46,93,210},{37,155,723},{36,107,207},{37,95,2},{35,82,303},{0,113,723},{13,84,212},{57,142,200},{57,112,1},{60,96,19},{56,91,33},{113,0,723},{31,111,200},{39,95,1},{0,85,200},{231,0,723},{0,85,200},{47,132,200},{47,132,200},{47,132,200},{47,89,200},{37,125,162},{38,93,2},{38,93,2},{35,73,17},{10,93,162},{19,75,1},{57,111,0},
+{57,111,0},{57,111,0},{57,84,0},{98,0,162},{40,92,0},{40,92,0},{20,75,0},{201,0,162},{20,75,0},{127,0,200},{56,114,0},{65,95,0},{34,95,0},{127,0,200},{254,2,200},{34,95,0},{0,85,200},{254,2,200},{0,85,200},{47,0,200},{47,0,200},{47,0,200},{47,0,200},{37,98,0},{37,98,0},{37,98,0},{37,67,0},{18,76,0},{18,76,0},{55,170,400},{56,129,215},{58,106,308},
+{54,101,210},{45,163,723},{44,115,207},{45,103,2},{43,90,303},{8,121,723},{21,92,212},{65,150,200},{65,121,0},{68,104,19},{64,99,33},{125,0,723},{39,119,200},{47,103,1},{7,93,200},{255,0,723},{7,93,200},{55,140,200},{55,140,200},{55,140,200},{55,97,200},{45,133,162},{46,101,2},{46,101,2},{43,81,17},{18,101,162},{27,83,1},{65,119,0},{65,119,0},{65,119,0},{65,92,0},{110,0,162},
+{48,100,0},{48,100,0},{28,83,0},{225,0,162},{28,83,0},{138,0,200},{65,121,0},{73,103,0},{42,103,0},{138,0,200},{254,14,200},{42,103,0},{0,93,200},{254,14,200},{0,93,200},{55,0,200},{55,0,200},{55,0,200},{55,0,200},{45,106,0},{45,106,0},{45,106,0},{45,75,0},{26,84,0},{26,84,0},{63,178,400},{64,137,215},{66,114,308},{62,109,210},{53,171,723},{52,123,207},{53,111,2},
+{51,98,303},{16,129,723},{27,100,215},{73,158,200},{73,129,0},{76,112,19},{72,107,33},{137,0,723},{47,127,200},{55,111,1},{15,101,200},{255,12,723},{15,101,200},{63,148,200},{63,148,200},{63,148,200},{63,105,200},{53,141,162},{54,109,2},{54,109,2},{51,89,17},{26,109,162},{35,91,1},{73,127,0},{73,127,0},{73,127,0},{73,100,0},{122,0,162},{56,108,0},{56,108,0},{36,91,0},{249,0,162},
+{36,91,0},{150,0,200},{73,129,0},{81,111,0},{49,111,0},{150,0,200},{254,26,200},{49,111,0},{0,101,200},{254,26,200},{0,101,200},{63,0,200},{63,0,200},{63,0,200},{63,0,200},{53,114,0},{53,114,0},{53,114,0},{53,83,0},{34,92,0},{34,92,0},{71,186,400},{72,144,212},{74,122,308},{70,117,210},{61,179,723},{60,131,207},{61,118,2},{59,106,303},{24,137,723},{35,108,215},{81,166,200},
+{81,137,0},{84,120,19},{80,115,33},{149,0,723},{55,135,200},{62,119,1},{23,109,200},{255,24,723},{23,109,200},{71,156,200},{71,156,200},{71,156,200},{71,113,200},{61,149,162},{61,118,1},{61,118,1},{59,97,17},{34,117,162},{43,99,1},{81,135,0},{81,135,0},{81,135,0},{81,108,0},{134,0,162},{64,116,0},{64,116,0},{44,99,0},{255,9,162},{44,99,0},{162,0,200},{81,137,0},{89,119,0},
+{57,119,0},{162,0,200},{254,38,200},{57,119,0},{0,109,200},{254,38,200},{0,109,200},{71,0,200},{71,0,200},{71,0,200},{71,0,200},{61,121,0},{61,121,0},{61,121,0},{61,91,0},{42,100,0},{42,100,0},{80,195,400},{81,153,212},{83,131,308},{79,127,212},{70,188,723},{69,140,207},{70,127,2},{68,115,303},{34,145,723},{44,117,215},{90,174,200},{90,146,0},{93,129,19},{89,124,33},{162,0,723},
+{64,144,200},{71,128,1},{32,118,200},{254,38,723},{32,118,200},{80,165,200},{80,165,200},{80,165,200},{80,122,200},{70,158,162},{70,127,1},{70,127,1},{68,106,17},{43,126,162},{52,108,1},{90,144,0},{90,144,0},{90,144,0},{90,117,0},{147,0,162},{74,124,0},{74,124,0},{53,108,0},{254,23,162},{53,108,0},{175,0,200},{90,146,0},{98,128,0},{66,128,0},{175,0,200},{254,51,200},{66,128,0},
+{0,118,200},{254,51,200},{0,118,200},{80,0,200},{80,0,200},{80,0,200},{80,0,200},{70,130,0},{70,130,0},{70,130,0},{70,100,0},{51,109,0},{51,109,0},{88,203,400},{89,161,212},{91,139,308},{87,135,212},{78,196,723},{77,148,207},{78,135,2},{75,122,305},{42,153,723},{52,125,215},{98,182,200},{98,154,0},{101,137,19},{97,132,33},{174,0,723},{72,152,200},{79,136,1},{40,126,200},{255,49,723},
+{40,126,200},{88,172,200},{88,172,200},{88,172,200},{88,129,200},{78,166,162},{78,135,1},{78,135,1},{76,114,17},{51,134,162},{60,116,0},{98,152,0},{98,152,0},{98,152,0},{98,125,0},{159,0,162},{82,132,0},{82,132,0},{60,116,0},{254,35,162},{60,116,0},{187,0,200},{98,154,0},{106,136,0},{74,136,0},{187,0,200},{254,63,200},{74,136,0},{0,126,200},{254,63,200},{0,126,200},{88,0,200},
+{88,0,200},{88,0,200},{88,0,200},{78,138,0},{78,138,0},{78,138,0},{78,108,0},{60,116,0},{60,116,0},{96,211,400},{97,169,212},{99,147,308},{95,143,212},{86,204,723},{85,156,207},{86,143,2},{83,131,308},{50,161,723},{60,133,215},{106,190,200},{106,162,0},{109,145,19},{105,141,35},{186,0,723},{80,160,200},{87,144,1},{48,134,200},{255,61,723},{48,134,200},{96,180,200},{96,180,200},{96,180,200},
+{96,137,200},{86,174,162},{86,143,1},{86,143,1},{84,122,17},{59,142,162},{68,124,0},{106,160,0},{106,160,0},{106,160,0},{106,133,0},{171,0,162},{90,140,0},{90,140,0},{68,124,0},{255,46,162},{68,124,0},{199,0,200},{106,162,0},{114,144,0},{82,144,0},{199,0,200},{254,75,200},{82,144,0},{0,134,200},{254,75,200},{0,134,200},{96,0,200},{96,0,200},{96,0,200},{96,0,200},{86,146,0},
+{86,146,0},{86,146,0},{86,116,0},{68,124,0},{68,124,0},{104,219,400},{105,177,212},{107,155,308},{103,151,212},{94,212,723},{93,164,207},{94,151,2},{91,139,308},{58,169,723},{68,141,215},{114,198,200},{114,170,0},{117,153,19},{113,149,35},{198,0,723},{88,168,200},{95,152,1},{56,142,200},{255,73,723},{56,142,200},{104,188,200},{104,188,200},{104,188,200},{104,145,200},{94,181,162},{94,151,1},{94,151,1},
+{92,130,17},{67,150,162},{76,132,0},{114,168,0},{114,168,0},{114,168,0},{114,141,0},{183,0,162},{98,148,0},{98,148,0},{76,132,0},{255,58,162},{76,132,0},{211,0,200},{114,170,0},{122,152,0},{90,152,0},{211,0,200},{255,86,200},{90,152,0},{0,142,200},{255,86,200},{0,142,200},{104,0,200},{104,0,200},{104,0,200},{104,0,200},{94,154,0},{94,154,0},{94,154,0},{94,124,0},{76,132,0},
+{76,132,0},{113,228,400},{114,186,215},{116,164,308},{112,160,212},{103,221,723},{102,173,207},{103,160,2},{100,148,308},{67,178,723},{78,150,212},{123,207,200},{123,179,0},{125,163,17},{122,158,35},{211,0,723},{96,177,200},{104,161,1},{65,151,200},{255,86,723},{65,151,200},{113,197,200},{113,197,200},{113,197,200},{113,154,200},{103,190,162},{103,160,1},{103,160,1},{102,138,19},{76,159,162},{85,141,0},{123,177,0},
+{123,177,0},{123,177,0},{123,149,0},{196,0,162},{107,157,0},{107,157,0},{85,141,0},{254,72,162},{85,141,0},{224,0,200},{123,179,0},{131,161,0},{99,161,0},{224,0,200},{254,100,200},{99,161,0},{0,151,200},{254,100,200},{0,151,200},{113,0,200},{113,0,200},{113,0,200},{113,0,200},{103,163,0},{103,163,0},{103,163,0},{103,133,0},{85,141,0},{85,141,0},{121,235,400},{122,194,215},{124,172,308},
+{120,168,212},{111,229,723},{110,181,207},{111,168,2},{108,156,308},{75,186,723},{86,158,212},{131,215,200},{131,187,0},{133,171,17},{130,166,35},{223,0,723},{104,185,200},{112,169,1},{73,159,200},{255,98,723},{73,159,200},{121,205,200},{121,205,200},{121,205,200},{121,162,200},{111,198,162},{111,168,1},{111,168,1},{110,146,19},{84,167,162},{93,149,0},{131,185,0},{131,185,0},{131,185,0},{131,157,0},{208,0,162},
+{115,165,0},{115,165,0},{93,149,0},{254,84,162},{93,149,0},{236,0,200},{131,187,0},{139,169,0},{107,169,0},{236,0,200},{254,112,200},{107,169,0},{0,159,200},{254,112,200},{0,159,200},{121,0,200},{121,0,200},{121,0,200},{121,0,200},{111,171,0},{111,171,0},{111,171,0},{111,141,0},{93,149,0},{93,149,0},{129,243,400},{130,202,215},{133,180,305},{128,176,212},{119,236,723},{118,189,207},{119,176,2},
+{116,164,308},{83,194,723},{94,166,212},{139,223,200},{139,195,0},{141,179,17},{138,174,35},{235,0,723},{112,193,200},{120,177,1},{81,167,200},{255,110,723},{81,167,200},{129,213,200},{129,213,200},{129,213,200},{129,170,200},{119,206,162},{119,176,1},{119,176,1},{118,154,19},{91,175,162},{101,157,0},{139,192,0},{139,192,0},{139,192,0},{139,165,0},{220,0,162},{123,173,0},{123,173,0},{101,157,0},{255,95,162},
+{101,157,0},{248,0,200},{139,195,0},{147,177,0},{115,177,0},{248,0,200},{254,124,200},{115,177,0},{0,167,200},{254,124,200},{0,167,200},{129,0,200},{129,0,200},{129,0,200},{129,0,200},{119,179,0},{119,179,0},{119,179,0},{119,148,0},{101,157,0},{101,157,0},{137,251,400},{138,210,215},{140,187,303},{136,184,212},{127,244,723},{126,197,207},{127,184,2},{124,172,308},{91,202,723},{102,174,212},{147,231,200},
+{147,202,1},{149,187,17},{146,182,35},{247,0,723},{120,201,200},{128,185,1},{88,175,200},{255,122,723},{88,175,200},{137,221,200},{137,221,200},{137,221,200},{137,178,200},{127,214,162},{127,184,1},{127,184,1},{126,162,19},{99,183,162},{109,165,0},{147,200,0},{147,200,0},{147,200,0},{147,173,0},{232,0,162},{131,181,0},{131,181,0},{109,165,0},{255,107,162},{109,165,0},{255,10,200},{146,203,0},{155,185,0},
+{123,185,0},{255,10,200},{255,135,200},{123,185,0},{0,175,200},{255,135,200},{0,175,200},{137,0,200},{137,0,200},{137,0,200},{137,0,200},{127,187,0},{127,187,0},{127,187,0},{127,156,0},{109,165,0},{109,165,0},{147,255,404},{147,219,215},{149,196,303},{145,193,212},{136,253,723},{135,206,207},{136,193,2},{133,181,308},{100,211,723},{111,183,212},{156,240,200},{156,210,1},{158,196,17},{155,191,35},{255,10,723},
+{129,210,200},{137,194,1},{97,184,200},{255,135,723},{97,184,200},{146,230,200},{146,230,200},{146,230,200},{146,187,200},{136,223,162},{136,193,1},{136,193,1},{135,171,19},{108,192,162},{118,174,0},{156,209,0},{156,209,0},{156,209,0},{156,182,0},{245,0,162},{139,191,0},{139,191,0},{118,174,0},{254,121,162},{118,174,0},{255,37,200},{155,212,0},{164,194,0},{131,194,0},{255,37,200},{254,149,200},{131,194,0},
+{0,184,200},{254,149,200},{0,184,200},{146,0,200},{146,0,200},{146,0,200},{146,0,200},{136,196,0},{136,196,0},{136,196,0},{136,165,0},{118,174,0},{118,174,0},{156,255,426},{155,227,215},{157,204,303},{153,201,212},{146,254,728},{143,214,207},{144,202,2},{141,189,308},{108,219,723},{118,191,215},{164,248,200},{164,218,1},{166,204,17},{163,199,35},{255,34,723},{137,218,200},{145,201,2},{105,192,200},{255,147,723},
+{105,192,200},{154,238,200},{154,238,200},{154,238,200},{154,195,200},{144,231,162},{144,200,1},{144,200,1},{143,179,19},{116,200,162},{126,182,0},{164,217,0},{164,217,0},{164,217,0},{164,190,0},{255,4,162},{146,199,0},{146,199,0},{126,182,0},{255,132,162},{126,182,0},{255,61,200},{163,220,0},{172,202,0},{139,202,0},{255,61,200},{254,161,200},{139,202,0},{0,192,200},{254,161,200},{0,192,200},{154,0,200},
+{154,0,200},{154,0,200},{154,0,200},{144,204,0},{144,204,0},{144,204,0},{144,173,0},{126,182,0},{126,182,0},{165,255,468},{163,234,212},{165,212,303},{162,207,213},{156,255,747},{151,222,207},{152,210,2},{149,197,308},{115,228,723},{126,199,215},{172,255,200},{172,226,1},{174,212,17},{171,207,35},{255,58,723},{145,226,200},{153,209,2},{113,200,200},{255,159,723},{113,200,200},{162,246,200},{162,246,200},{162,246,200},
+{162,203,200},{152,239,162},{152,208,1},{152,208,1},{151,187,19},{124,208,162},{134,190,0},{172,225,0},{172,225,0},{172,225,0},{172,198,0},{255,28,162},{155,206,0},{155,206,0},{134,190,0},{255,144,162},{134,190,0},{255,86,200},{171,228,0},{180,210,0},{147,210,0},{255,86,200},{255,172,200},{147,210,0},{0,200,200},{255,172,200},{0,200,200},{162,0,200},{162,0,200},{162,0,200},{162,0,200},{152,211,0},
+{152,211,0},{152,211,0},{152,181,0},{134,190,0},{134,190,0},{175,255,522},{171,242,212},{173,220,303},{170,216,213},{165,255,788},{158,232,208},{160,218,2},{157,205,308},{123,235,723},{134,207,215},{181,255,209},{180,234,1},{182,220,17},{179,215,35},{255,82,723},{153,234,200},{161,217,2},{121,208,200},{254,171,723},{121,208,200},{170,253,200},{170,253,200},{170,253,200},{170,211,200},{160,247,162},{160,216,1},{160,216,1},
+{159,195,19},{132,216,162},{141,198,1},{180,233,0},{180,233,0},{180,233,0},{180,206,0},{255,52,162},{163,214,0},{163,214,0},{142,198,0},{255,156,162},{142,198,0},{255,110,200},{179,236,0},{188,218,0},{155,218,0},{255,110,200},{255,184,200},{155,218,0},{0,208,200},{255,184,200},{0,208,200},{170,0,200},{170,0,200},{170,0,200},{170,0,200},{160,219,0},{160,219,0},{160,219,0},{160,189,0},{141,199,0},
+{141,199,0},{184,255,612},{180,251,212},{182,229,303},{179,225,213},{178,255,844},{167,239,210},{169,227,2},{165,214,315},{132,244,723},{143,216,215},{193,255,234},{189,243,1},{191,229,17},{187,224,38},{255,110,723},{162,243,200},{170,226,2},{130,217,200},{255,184,723},{130,217,200},{180,254,206},{180,254,206},{180,254,206},{179,220,200},{170,253,163},{169,225,1},{169,225,1},{168,204,19},{141,225,162},{150,207,0},{189,242,0},
+{189,242,0},{189,242,0},{189,215,0},{255,79,162},{172,223,0},{172,223,0},{150,207,0},{253,170,162},{150,207,0},{255,137,200},{188,245,0},{197,227,0},{164,227,0},{255,137,200},{254,198,200},{164,227,0},{0,217,200},{254,198,200},{0,217,200},{179,0,200},{179,0,200},{179,0,200},{179,0,200},{169,228,0},{169,228,0},{169,228,0},{169,198,0},{150,207,0},{150,207,0},{193,255,714},{189,255,225},{190,237,303},
+{187,233,213},{187,255,919},{175,247,210},{177,235,2},{173,222,315},{140,252,723},{151,224,215},{202,255,275},{197,251,1},{199,237,17},{195,232,38},{255,134,723},{170,251,200},{178,234,2},{138,225,200},{255,196,723},{138,225,200},{189,254,224},{189,254,224},{189,254,224},{187,228,200},{178,255,171},{177,233,1},{177,233,1},{176,212,19},{149,233,162},{158,215,0},{197,250,0},{197,250,0},{197,250,0},{197,223,0},{255,104,162},
+{180,231,0},{180,231,0},{158,215,0},{255,181,162},{158,215,0},{255,161,200},{196,253,0},{204,235,0},{172,235,0},{255,161,200},{254,210,200},{172,235,0},{0,225,200},{254,210,200},{0,225,200},{187,0,200},{187,0,200},{187,0,200},{187,0,200},{177,236,0},{177,236,0},{177,236,0},{177,206,0},{158,215,0},{158,215,0},{202,255,836},{198,255,290},{198,245,303},{195,241,213},{199,255,1015},{183,255,210},{185,243,2},
+{181,230,315},{152,255,732},{159,232,215},{214,254,331},{207,255,13},{207,245,17},{203,240,38},{255,158,723},{183,255,206},{186,242,2},{146,233,200},{255,208,723},{146,233,200},{196,255,251},{196,255,251},{196,255,251},{195,236,200},{190,255,195},{185,241,1},{185,241,1},{184,221,21},{157,241,162},{166,223,0},{205,255,1},{205,255,1},{205,255,1},{205,231,0},{255,128,162},{188,239,0},{188,239,0},{166,223,0},{255,193,162},
+{166,223,0},{255,186,200},{210,255,8},{212,243,0},{180,243,0},{255,186,200},{255,221,200},{180,243,0},{0,233,200},{255,221,200},{0,233,200},{195,0,200},{195,0,200},{195,0,200},{195,0,200},{185,244,0},{185,244,0},{185,244,0},{185,214,0},{166,223,0},{166,223,0},{215,255,976},{207,255,417},{206,253,303},{203,249,213},{208,255,1124},{195,255,258},{193,251,2},{189,238,315},{167,255,797},{166,240,215},{221,255,392},
+{216,255,77},{215,252,14},{211,248,38},{255,183,723},{198,255,248},{194,250,2},{154,241,200},{254,220,723},{154,241,200},{205,255,289},{205,255,289},{205,255,289},{203,244,200},{199,255,230},{193,249,1},{193,249,1},{192,229,21},{165,249,162},{174,231,0},{215,255,8},{215,255,8},{215,255,8},{213,239,0},{255,152,162},{196,247,0},{196,247,0},{174,231,0},{255,205,162},{174,231,0},{255,210,200},{222,255,40},{220,251,0},
+{188,251,0},{255,210,200},{255,233,200},{188,251,0},{0,241,200},{255,233,200},{0,241,200},{203,0,200},{203,0,200},{203,0,200},{203,0,200},{193,252,0},{193,252,0},{193,252,0},{193,222,0},{174,231,0},{174,231,0},{221,255,895},{217,255,494},{215,255,339},{212,255,201},{218,255,994},{207,255,251},{204,255,6},{199,245,198},{189,255,702},{176,247,121},{233,255,318},{228,255,109},{224,255,29},{222,253,13},{255,204,546},
+{213,255,198},{205,255,1},{169,247,113},{255,230,546},{169,247,113},{215,255,339},{215,255,339},{215,255,339},{212,253,200},{211,255,293},{204,255,6},{204,255,6},{201,238,21},{177,255,165},{183,240,0},{224,255,29},{224,255,29},{224,255,29},{222,248,0},{255,180,162},{206,254,1},{206,254,1},{183,240,0},{255,218,162},{183,240,0},{255,228,113},{237,255,34},{232,255,0},{207,255,0},{255,228,113},{255,242,113},{207,255,0},
+{0,247,113},{255,242,113},{0,247,113},{212,0,200},{212,0,200},{212,0,200},{212,0,200},{202,255,4},{202,255,4},{202,255,4},{202,231,0},{183,240,0},{183,240,0},{230,255,737},{226,255,497},{224,255,401},{220,255,216},{227,255,783},{216,255,206},{213,255,54},{209,249,74},{201,255,534},{189,251,26},{239,255,190},{235,255,97},{233,255,58},{230,255,1},{255,219,333},{225,255,110},{219,255,17},{185,251,25},{254,238,333},
+{185,251,25},{224,255,401},{224,255,401},{224,255,401},{220,255,216},{218,255,354},{213,255,54},{213,255,54},{209,246,21},{192,255,203},{191,248,0},{233,255,58},{233,255,58},{233,255,58},{230,254,1},{255,204,162},{219,255,17},{219,255,17},{191,248,0},{255,230,162},{191,248,0},{255,240,25},{246,255,5},{244,255,0},{231,255,0},{255,240,25},{255,248,25},{231,255,0},{0,251,25},{255,248,25},{0,251,25},{220,0,200},
+{220,0,200},{220,0,200},{220,0,200},{211,255,17},{211,255,17},{211,255,17},{210,239,0},{191,248,0},{191,248,0},{236,255,616},{233,255,495},{233,255,446},{228,255,264},{233,255,626},{225,255,220},{225,255,139},{217,254,19},{216,255,434},{199,255,1},{245,255,121},{244,255,94},{242,255,80},{240,255,20},{255,234,193},{237,255,75},{234,255,45},{201,255,0},{255,245,193},{201,255,0},{233,255,446},{233,255,446},{233,255,446},
+{228,255,264},{230,255,401},{225,255,139},{225,255,139},{216,253,17},{207,255,254},{199,255,1},{242,255,80},{242,255,80},{242,255,80},{240,255,20},{255,225,145},{234,255,45},{234,255,45},{201,255,0},{254,241,145},{201,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{228,0,200},{228,0,200},{228,0,200},{228,0,200},{221,255,34},
+{221,255,34},{221,255,34},{218,247,0},{200,254,1},{200,254,1},{242,255,400},{239,255,343},{239,255,318},{237,255,227},{239,255,370},{234,255,161},{232,255,121},{226,255,0},{225,255,243},{213,255,17},{251,255,33},{248,255,24},{248,255,20},{246,255,4},{255,243,54},{246,255,17},{243,255,10},{225,255,0},{254,250,54},{225,255,0},{239,255,318},{239,255,318},{239,255,318},{237,255,227},{236,255,253},{232,255,121},{232,255,121},
+{226,255,0},{219,255,150},{213,255,17},{248,255,20},{248,255,20},{248,255,20},{246,255,4},{255,237,41},{243,255,10},{243,255,10},{225,255,0},{254,247,41},{225,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{236,0,200},{236,0,200},{236,0,200},{236,0,200},{230,255,65},{230,255,65},{230,255,65},{226,255,0},{213,255,17},
+{213,255,17},{0,82,421},{0,60,40},{0,42,0},{0,34,145},{0,55,925},{0,36,566},{0,33,262},{0,22,677},{0,27,989},{0,22,726},{0,82,421},{0,60,40},{0,42,0},{0,34,145},{27,0,925},{0,36,566},{0,33,262},{0,22,677},{55,0,925},{0,22,677},{0,40,0},{0,40,0},{0,40,0},{0,20,0},{0,19,85},{0,15,25},{0,15,25},{0,10,45},{0,9,89},{0,8,54},{0,40,0},
+{0,40,0},{0,40,0},{0,20,0},{9,0,85},{0,15,25},{0,15,25},{0,10,45},{19,0,85},{0,10,45},{41,0,421},{0,60,40},{0,42,0},{0,34,145},{41,0,421},{82,0,421},{0,34,145},{0,27,421},{82,0,421},{0,27,421},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,107,421},{0,72,8},{1,50,18},
+{0,44,100},{0,73,1261},{0,48,670},{0,42,282},{0,28,857},{0,33,1369},{0,28,938},{0,107,421},{0,72,8},{2,51,14},{0,44,100},{36,0,1261},{0,48,670},{0,42,282},{0,28,857},{73,0,1261},{0,28,857},{0,64,0},{0,64,0},{0,64,0},{0,32,0},{0,31,221},{0,27,73},{0,27,73},{0,16,125},{0,15,237},{0,13,144},{0,64,0},{0,64,0},{0,64,0},{0,32,0},{15,0,221},
+{0,27,73},{0,27,73},{0,16,125},{31,0,221},{0,16,125},{52,0,421},{0,72,8},{7,50,0},{0,44,100},{52,0,421},{107,0,421},{0,44,100},{0,35,421},{107,0,421},{0,35,421},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{2,127,426},{1,86,5},{4,59,67},{0,53,70},{0,92,1514},{0,60,701},{0,51,243},
+{0,37,946},{0,42,1695},{0,34,1083},{3,125,421},{3,84,1},{7,59,42},{1,54,69},{45,0,1514},{0,60,701},{0,51,243},{0,37,946},{92,0,1514},{0,37,946},{2,84,5},{2,84,5},{2,84,5},{1,43,5},{0,49,338},{0,36,89},{0,36,89},{0,22,170},{0,21,382},{0,19,217},{3,82,0},{3,82,0},{3,82,0},{3,42,0},{24,0,338},{0,36,89},{0,36,89},{0,22,170},{49,0,338},
+{0,22,170},{64,0,421},{1,86,0},{15,58,0},{0,53,61},{64,0,421},{131,0,421},{0,53,61},{0,43,421},{131,0,421},{0,43,421},{1,0,5},{1,0,5},{1,0,5},{1,0,5},{0,10,0},{0,10,0},{0,10,0},{0,5,0},{0,3,1},{0,3,1},{6,143,482},{6,97,63},{8,69,163},{4,63,110},{0,116,1514},{0,72,589},{0,60,109},{0,44,857},{0,54,1815},{0,44,1053},{11,133,421},
+{11,91,1},{15,67,42},{9,62,69},{57,0,1514},{0,72,589},{0,60,109},{0,44,857},{116,0,1514},{0,44,857},{6,101,61},{6,101,61},{6,101,61},{6,52,61},{0,73,338},{0,51,34},{0,51,34},{0,31,117},{0,33,446},{0,29,209},{11,90,0},{11,90,0},{11,90,0},{11,50,0},{36,0,338},{0,51,34},{0,51,34},{0,31,117},{73,0,338},{0,31,117},{76,0,421},{10,93,0},{23,66,0},
+{0,62,32},{76,0,421},{155,0,421},{0,62,32},{0,51,421},{155,0,421},{0,51,421},{5,0,61},{5,0,61},{5,0,61},{5,0,61},{0,34,0},{0,34,0},{0,34,0},{0,17,0},{0,15,17},{0,15,17},{10,162,621},{11,109,215},{14,78,362},{9,71,234},{0,143,1514},{0,88,489},{0,72,22},{0,56,750},{0,66,1982},{0,53,1047},{20,142,421},{20,100,1},{24,76,42},{18,71,69},{70,0,1514},
+{0,88,489},{0,72,22},{0,56,750},{143,0,1514},{0,56,750},{10,120,200},{10,120,200},{10,120,200},{10,64,200},{0,101,338},{0,69,4},{0,69,4},{0,40,72},{0,45,552},{0,39,229},{20,99,0},{20,99,0},{20,99,0},{20,59,0},{49,0,338},{0,69,4},{0,69,4},{0,40,72},{101,0,338},{0,40,72},{89,0,421},{19,102,0},{32,75,0},{0,74,10},{89,0,421},{183,0,421},{0,74,10},
+{0,60,421},{183,0,421},{0,60,421},{10,0,200},{10,0,200},{10,0,200},{10,0,200},{0,61,0},{0,61,0},{0,61,0},{0,30,0},{0,24,65},{0,24,65},{14,178,813},{15,119,423},{18,88,618},{13,81,414},{0,167,1514},{0,100,441},{0,83,2},{0,65,670},{0,75,2165},{0,60,1070},{28,150,421},{28,108,1},{32,84,42},{26,79,69},{82,0,1514},{0,100,441},{2,82,2},{0,65,670},{167,0,1514},
+{0,65,670},{14,136,392},{14,136,392},{14,136,392},{14,74,392},{0,125,338},{1,80,2},{1,80,2},{0,50,41},{0,54,677},{0,48,277},{28,107,0},{28,107,0},{28,107,0},{28,67,0},{61,0,338},{3,79,0},{3,79,0},{0,50,41},{125,0,338},{0,50,41},{101,0,421},{27,110,0},{40,83,0},{0,83,1},{101,0,421},{207,0,421},{0,83,1},{0,68,421},{207,0,421},{0,68,421},{14,0,392},
+{14,0,392},{14,0,392},{14,0,392},{0,85,0},{0,85,0},{0,85,0},{0,42,0},{0,33,136},{0,33,136},{22,186,842},{22,128,450},{26,96,655},{21,89,441},{7,177,1514},{5,111,434},{8,90,4},{4,72,639},{0,88,2003},{0,69,858},{36,157,421},{36,116,1},{40,92,42},{34,87,69},{94,0,1514},{0,113,422},{8,90,3},{0,71,602},{192,0,1514},{0,71,602},{22,144,421},{22,144,421},{22,144,421},
+{21,82,421},{7,135,338},{8,90,3},{8,90,3},{6,59,35},{0,69,581},{0,57,150},{36,115,0},{36,115,0},{36,115,0},{36,75,0},{73,0,338},{11,87,0},{11,87,0},{0,59,18},{149,0,338},{0,59,18},{113,0,421},{35,118,0},{48,91,0},{2,91,0},{113,0,421},{231,0,421},{2,91,0},{0,76,421},{231,0,421},{0,76,421},{21,0,421},{21,0,421},{21,0,421},{21,0,421},{7,95,0},
+{7,95,0},{7,95,0},{7,50,0},{0,48,80},{0,48,80},{30,194,842},{30,136,450},{34,104,655},{29,97,441},{15,185,1514},{13,119,434},{16,98,4},{12,80,639},{0,103,1850},{0,80,663},{44,165,421},{44,124,1},{48,100,42},{42,95,69},{106,0,1514},{6,122,421},{16,98,3},{0,80,542},{216,0,1514},{0,80,542},{30,152,421},{30,152,421},{30,152,421},{29,90,421},{15,143,338},{16,98,3},{16,98,3},
+{14,67,35},{0,81,477},{0,66,52},{44,123,0},{44,123,0},{44,123,0},{44,83,0},{85,0,338},{19,95,0},{19,95,0},{0,68,5},{174,0,338},{0,68,5},{125,0,421},{43,126,0},{56,99,0},{10,99,0},{125,0,421},{255,0,421},{10,99,0},{0,84,421},{255,0,421},{0,84,421},{29,0,421},{29,0,421},{29,0,421},{29,0,421},{15,103,0},{15,103,0},{15,103,0},{15,58,0},{0,63,29},
+{0,63,29},{39,203,842},{39,145,450},{43,113,655},{37,106,445},{24,194,1514},{22,126,438},{25,107,4},{21,89,639},{0,115,1710},{0,90,519},{53,174,421},{53,133,1},{56,110,38},{51,104,69},{119,0,1514},{15,131,421},{25,107,3},{0,90,494},{243,0,1514},{0,90,494},{39,160,421},{39,160,421},{39,160,421},{38,99,421},{24,152,338},{25,107,3},{25,107,3},{23,76,35},{0,97,389},{0,78,3},{53,132,0},
+{53,132,0},{53,132,0},{53,92,0},{98,0,338},{29,103,0},{29,103,0},{0,79,0},{201,0,338},{0,79,0},{138,0,421},{52,135,0},{65,108,0},{19,108,0},{138,0,421},{254,14,421},{19,108,0},{0,93,421},{254,14,421},{0,93,421},{38,0,421},{38,0,421},{38,0,421},{38,0,421},{24,112,0},{24,112,0},{24,112,0},{24,67,0},{0,78,2},{0,78,2},{47,211,842},{47,153,450},{51,121,655},
+{45,114,445},{32,202,1514},{30,134,438},{33,115,4},{29,97,639},{0,129,1617},{0,99,458},{61,182,421},{61,141,1},{64,118,38},{59,112,69},{131,0,1514},{23,139,421},{33,115,3},{0,99,458},{255,6,1514},{0,99,458},{47,168,421},{47,168,421},{47,168,421},{46,107,421},{32,160,338},{33,115,3},{33,115,3},{31,84,35},{0,109,349},{5,87,1},{61,140,0},{61,140,0},{61,140,0},{61,100,0},{110,0,338},
+{37,111,0},{37,111,0},{8,87,0},{225,0,338},{8,87,0},{150,0,421},{60,143,0},{73,116,0},{27,116,0},{150,0,421},{254,26,421},{27,116,0},{0,101,421},{254,26,421},{0,101,421},{46,0,421},{46,0,421},{46,0,421},{46,0,421},{32,120,0},{32,120,0},{32,120,0},{32,75,0},{5,88,0},{5,88,0},{55,219,842},{55,161,450},{59,129,655},{53,122,445},{40,210,1514},{38,142,438},{41,123,4},
+{36,105,646},{0,141,1553},{3,108,450},{69,190,421},{69,149,1},{72,126,38},{68,118,73},{143,0,1514},{32,146,421},{41,123,3},{0,108,434},{255,18,1514},{0,108,434},{55,176,421},{55,176,421},{55,176,421},{55,114,421},{40,168,338},{41,123,3},{41,123,3},{39,92,35},{0,122,338},{13,95,1},{69,148,0},{69,148,0},{69,148,0},{69,108,0},{122,0,338},{45,119,0},{45,119,0},{15,95,0},{249,0,338},
+{15,95,0},{162,0,421},{68,151,0},{81,124,0},{35,124,0},{162,0,421},{254,38,421},{35,124,0},{0,109,421},{254,38,421},{0,109,421},{54,0,421},{54,0,421},{54,0,421},{54,0,421},{40,128,0},{40,128,0},{40,128,0},{40,83,0},{13,96,0},{13,96,0},{63,227,842},{63,169,450},{68,134,654},{61,130,445},{48,218,1514},{46,151,434},{49,131,4},{44,113,646},{0,153,1521},{11,116,450},{77,198,421},
+{77,157,1},{80,134,38},{74,128,74},{155,0,1514},{40,154,421},{49,131,3},{0,117,422},{255,30,1514},{0,117,422},{63,184,421},{63,184,421},{63,184,421},{63,122,421},{48,175,338},{49,131,3},{49,131,3},{47,100,35},{8,130,338},{21,103,1},{77,156,0},{77,156,0},{77,156,0},{77,116,0},{134,0,338},{53,127,0},{53,127,0},{23,103,0},{255,9,338},{23,103,0},{174,0,421},{76,159,0},{89,132,0},
+{43,132,0},{174,0,421},{255,49,421},{43,132,0},{0,117,421},{255,49,421},{0,117,421},{62,0,421},{62,0,421},{62,0,421},{62,0,421},{48,136,0},{48,136,0},{48,136,0},{48,91,0},{21,104,0},{21,104,0},{72,236,842},{72,178,450},{76,145,646},{70,139,445},{57,227,1514},{55,160,434},{58,140,4},{53,122,646},{4,166,1514},{20,125,450},{86,207,421},{86,166,1},{89,143,38},{83,137,74},{168,0,1514},
+{49,163,421},{58,140,3},{4,126,421},{255,43,1514},{4,126,421},{72,193,421},{72,193,421},{72,193,421},{72,131,421},{57,184,338},{58,140,3},{58,140,3},{56,109,35},{18,138,338},{30,112,1},{86,164,0},{86,164,0},{86,164,0},{86,125,0},{147,0,338},{62,136,0},{62,136,0},{32,112,0},{254,23,338},{32,112,0},{187,0,421},{85,168,0},{98,141,0},{52,141,0},{187,0,421},{254,63,421},{52,141,0},
+{0,126,421},{254,63,421},{0,126,421},{71,0,421},{71,0,421},{71,0,421},{71,0,421},{57,145,0},{57,145,0},{57,145,0},{57,100,0},{30,113,0},{30,113,0},{80,243,842},{80,186,450},{84,153,646},{78,147,445},{65,235,1514},{63,168,434},{66,148,4},{61,130,646},{12,174,1514},{27,133,450},{94,215,421},{94,174,1},{97,151,38},{91,145,74},{180,0,1514},{57,171,421},{66,148,3},{11,134,421},{255,55,1514},
+{11,134,421},{80,201,421},{80,201,421},{80,201,421},{80,139,421},{65,192,338},{66,148,3},{66,148,3},{63,116,36},{26,146,338},{38,120,1},{94,172,0},{94,172,0},{94,172,0},{94,132,0},{159,0,338},{70,144,0},{70,144,0},{40,120,0},{254,35,338},{40,120,0},{199,0,421},{92,176,0},{106,149,0},{60,149,0},{199,0,421},{254,75,421},{60,149,0},{0,134,421},{254,75,421},{0,134,421},{79,0,421},
+{79,0,421},{79,0,421},{79,0,421},{65,153,0},{65,153,0},{65,153,0},{65,108,0},{38,121,0},{38,121,0},{88,251,842},{88,194,450},{92,161,646},{86,155,445},{73,243,1514},{71,176,434},{74,156,4},{69,138,646},{21,181,1514},{35,141,450},{102,223,421},{102,182,1},{105,159,38},{99,153,74},{192,0,1514},{65,179,421},{74,156,3},{19,142,421},{255,67,1514},{19,142,421},{88,209,421},{88,209,421},{88,209,421},
+{88,147,421},{73,200,338},{74,156,3},{74,156,3},{71,125,38},{34,154,338},{46,128,1},{102,180,0},{102,180,0},{102,180,0},{102,140,0},{171,0,338},{78,152,0},{78,152,0},{48,128,0},{255,46,338},{48,128,0},{211,0,421},{100,184,0},{114,157,0},{68,157,0},{211,0,421},{255,86,421},{68,157,0},{0,142,421},{255,86,421},{0,142,421},{87,0,421},{87,0,421},{87,0,421},{87,0,421},{73,161,0},
+{73,161,0},{73,161,0},{73,116,0},{46,129,0},{46,129,0},{97,254,850},{96,202,450},{100,169,646},{94,163,445},{81,251,1514},{79,184,434},{82,164,4},{77,146,646},{29,189,1514},{43,149,450},{110,231,421},{110,190,1},{113,167,38},{107,161,74},{204,0,1514},{73,187,421},{83,164,2},{27,150,421},{255,79,1514},{27,150,421},{96,217,421},{96,217,421},{96,217,421},{96,155,421},{81,208,338},{82,163,2},{82,163,2},
+{79,133,38},{42,162,338},{54,136,1},{110,188,0},{110,188,0},{110,188,0},{110,148,0},{183,0,338},{86,160,0},{86,160,0},{56,136,0},{255,58,338},{56,136,0},{223,0,421},{108,192,0},{122,165,0},{76,165,0},{223,0,421},{255,98,421},{76,165,0},{0,150,421},{255,98,421},{0,150,421},{95,0,421},{95,0,421},{95,0,421},{95,0,421},{81,169,0},{81,169,0},{81,169,0},{81,124,0},{54,137,0},
+{54,137,0},{107,255,878},{105,211,450},{109,178,646},{103,172,445},{91,254,1518},{88,193,434},{91,173,4},{86,155,646},{38,198,1514},{52,158,450},{119,240,421},{119,199,1},{122,176,38},{116,170,74},{217,0,1514},{82,196,421},{92,173,2},{36,159,421},{255,92,1514},{36,159,421},{105,226,421},{105,226,421},{105,226,421},{105,164,421},{90,217,338},{91,172,2},{91,172,2},{88,142,38},{51,171,338},{63,145,1},{119,197,0},
+{119,197,0},{119,197,0},{119,157,0},{196,0,338},{95,169,0},{95,169,0},{65,145,0},{254,72,338},{65,145,0},{236,0,421},{117,201,0},{130,174,0},{84,174,0},{236,0,421},{254,112,421},{84,174,0},{0,159,421},{254,112,421},{0,159,421},{104,0,421},{104,0,421},{104,0,421},{104,0,421},{90,177,0},{90,177,0},{90,177,0},{90,133,0},{63,146,0},{63,146,0},{116,255,926},{113,218,450},{117,186,646},
+{111,180,445},{101,255,1535},{96,201,434},{99,181,4},{94,163,646},{46,206,1514},{60,166,450},{127,247,421},{127,207,1},{130,184,38},{124,178,74},{229,0,1514},{90,204,421},{99,181,3},{44,167,421},{255,104,1514},{44,167,421},{113,234,421},{113,234,421},{113,234,421},{113,172,421},{98,225,338},{99,180,3},{99,180,3},{96,150,38},{59,179,338},{71,153,1},{127,205,0},{127,205,0},{127,205,0},{127,165,0},{208,0,338},
+{103,177,0},{103,177,0},{73,153,0},{254,84,338},{73,153,0},{248,0,421},{125,209,0},{138,182,0},{92,182,0},{248,0,421},{254,124,421},{92,182,0},{0,167,421},{254,124,421},{0,167,421},{112,0,421},{112,0,421},{112,0,421},{112,0,421},{98,185,0},{98,185,0},{98,185,0},{98,141,0},{71,154,0},{71,154,0},{125,255,994},{121,226,450},{125,194,646},{120,186,446},{113,255,1575},{104,209,434},{107,189,4},
+{102,171,646},{54,214,1514},{68,174,450},{135,255,421},{135,215,1},{139,192,36},{132,186,74},{241,0,1514},{98,212,421},{107,189,3},{52,175,421},{255,116,1514},{52,175,421},{121,241,421},{121,241,421},{121,241,421},{121,180,421},{106,233,338},{107,188,3},{107,188,3},{104,158,38},{67,187,338},{79,161,1},{135,213,0},{135,213,0},{135,213,0},{135,173,0},{220,0,338},{110,185,0},{110,185,0},{81,161,0},{255,95,338},
+{81,161,0},{255,10,421},{133,217,0},{146,190,0},{100,190,0},{255,10,421},{255,135,421},{100,190,0},{0,175,421},{255,135,421},{0,175,421},{120,0,421},{120,0,421},{120,0,421},{120,0,421},{106,193,0},{106,193,0},{106,193,0},{106,149,0},{79,162,0},{79,162,0},{135,255,1070},{129,234,450},{133,202,646},{128,195,446},{122,255,1626},{112,216,438},{115,197,4},{110,179,646},{62,222,1514},{76,182,450},{146,254,434},
+{143,223,1},{146,199,35},{140,194,74},{253,0,1514},{106,220,421},{115,197,3},{60,183,421},{254,128,1514},{60,183,421},{129,249,421},{129,249,421},{129,249,421},{129,188,421},{114,241,338},{115,196,3},{115,196,3},{112,166,38},{75,195,338},{87,169,1},{143,221,0},{143,221,0},{143,221,0},{143,181,0},{232,0,338},{118,193,0},{118,193,0},{89,169,0},{255,107,338},{89,169,0},{255,34,421},{141,225,0},{154,198,0},
+{108,198,0},{255,34,421},{255,147,421},{108,198,0},{0,183,421},{255,147,421},{0,183,421},{128,0,421},{128,0,421},{128,0,421},{128,0,421},{114,201,0},{114,201,0},{114,201,0},{114,157,0},{87,170,0},{87,170,0},{144,255,1190},{138,243,450},{142,211,646},{137,204,446},{132,255,1703},{121,225,438},{124,206,4},{121,187,654},{71,231,1514},{85,191,450},{156,255,461},{152,232,1},{155,208,35},{151,202,75},{255,22,1514},
+{114,229,421},{124,206,3},{69,192,421},{255,141,1514},{69,192,421},{138,255,422},{138,255,422},{138,255,422},{137,197,421},{123,250,338},{124,205,3},{124,205,3},{121,175,38},{84,204,338},{96,178,1},{152,230,0},{152,230,0},{152,230,0},{152,190,0},{245,0,338},{127,202,0},{127,202,0},{97,178,0},{254,121,338},{97,178,0},{255,61,421},{150,234,0},{163,207,0},{117,207,0},{255,61,421},{254,161,421},{117,207,0},
+{0,192,421},{254,161,421},{0,192,421},{137,0,421},{137,0,421},{137,0,421},{137,0,421},{123,210,0},{123,210,0},{123,210,0},{123,166,0},{95,179,0},{95,179,0},{153,255,1318},{146,251,450},{150,219,646},{145,212,446},{144,255,1791},{129,233,438},{132,214,4},{126,196,655},{78,240,1514},{93,199,450},{165,255,506},{160,240,1},{163,216,35},{158,210,78},{255,46,1514},{122,237,421},{132,214,3},{77,200,421},{255,153,1514},
+{77,200,421},{147,255,434},{147,255,434},{147,255,434},{145,205,421},{132,255,339},{132,213,3},{132,213,3},{129,183,38},{92,212,338},{104,186,1},{160,238,0},{160,238,0},{160,238,0},{160,198,0},{255,4,338},{135,210,0},{135,210,0},{105,186,0},{255,132,338},{105,186,0},{255,86,421},{158,242,0},{171,215,0},{125,215,0},{255,86,421},{255,172,421},{125,215,0},{0,200,421},{255,172,421},{0,200,421},{145,0,421},
+{145,0,421},{145,0,421},{145,0,421},{131,218,0},{131,218,0},{131,218,0},{131,174,0},{103,187,0},{103,187,0},{162,255,1466},{156,255,458},{158,226,639},{153,220,446},{153,255,1902},{137,240,438},{140,222,4},{134,204,655},{85,248,1514},{101,207,450},{175,255,554},{168,249,1},{171,224,35},{166,218,78},{255,70,1514},{130,245,421},{140,222,3},{85,208,421},{255,165,1514},{85,208,421},{156,255,458},{156,255,458},{156,255,458},
+{153,213,421},{141,255,350},{140,221,3},{140,221,3},{137,191,38},{99,220,338},{112,194,1},{168,245,0},{168,245,0},{168,245,0},{168,206,0},{255,28,338},{143,218,0},{143,218,0},{113,194,0},{255,144,338},{113,194,0},{255,110,421},{166,250,0},{179,223,0},{133,223,0},{255,110,421},{255,184,421},{133,223,0},{0,208,421},{255,184,421},{0,208,421},{153,0,421},{153,0,421},{153,0,421},{153,0,421},{139,226,0},
+{139,226,0},{139,226,0},{139,182,0},{111,195,0},{111,195,0},{172,255,1606},{165,255,519},{166,234,639},{161,228,446},{165,255,2030},{145,248,438},{148,230,4},{142,212,655},{95,255,1515},{109,215,450},{187,255,626},{177,255,3},{179,232,35},{174,226,78},{255,95,1514},{138,253,421},{148,230,3},{93,216,421},{254,177,1514},{93,216,421},{165,255,494},{165,255,494},{165,255,494},{161,221,421},{150,255,379},{148,229,3},{148,229,3},
+{145,199,38},{107,228,338},{120,202,1},{176,253,0},{176,253,0},{176,253,0},{176,214,0},{255,52,338},{151,226,0},{151,226,0},{121,202,0},{255,156,338},{121,202,0},{255,134,421},{177,255,2},{187,231,0},{141,231,0},{255,134,421},{255,196,421},{141,231,0},{0,216,421},{255,196,421},{0,216,421},{161,0,421},{161,0,421},{161,0,421},{161,0,421},{147,234,0},{147,234,0},{147,234,0},{147,190,0},{119,203,0},
+{119,203,0},{184,255,1818},{175,255,663},{175,243,639},{170,237,446},{175,255,2175},{155,255,443},{157,239,4},{151,221,655},{113,255,1557},{118,224,450},{196,255,722},{189,255,52},{188,241,35},{183,235,78},{255,122,1514},{155,255,442},{157,239,3},{101,225,421},{255,190,1514},{101,225,421},{175,255,542},{175,255,542},{175,255,542},{170,230,421},{162,255,424},{157,238,3},{157,238,3},{155,207,42},{116,237,338},{129,211,1},{186,254,5},
+{186,254,5},{186,254,5},{185,223,0},{255,79,338},{160,235,0},{160,235,0},{130,211,0},{253,170,338},{130,211,0},{255,161,421},{192,255,29},{196,240,0},{150,240,0},{255,161,421},{254,210,421},{150,240,0},{0,225,421},{254,210,421},{0,225,421},{170,0,421},{170,0,421},{170,0,421},{170,0,421},{156,243,0},{156,243,0},{156,243,0},{156,199,0},{128,212,0},{128,212,0},{193,255,2022},{184,255,858},{183,251,639},
+{178,245,446},{187,255,2343},{167,255,523},{165,247,4},{159,229,655},{131,255,1653},{126,232,450},{208,255,826},{198,255,150},{196,249,35},{191,243,78},{255,146,1514},{167,255,514},{165,247,3},{109,233,421},{255,202,1514},{109,233,421},{181,255,602},{181,255,602},{181,255,602},{178,238,421},{172,255,474},{165,246,3},{165,246,3},{163,215,42},{124,245,338},{137,219,1},{196,255,18},{196,255,18},{196,255,18},{193,231,0},{255,104,338},
+{168,243,0},{168,243,0},{138,219,0},{255,181,338},{138,219,0},{255,186,421},{207,255,80},{204,248,0},{158,248,0},{255,186,421},{255,221,421},{158,248,0},{0,233,421},{255,221,421},{0,233,421},{178,0,421},{178,0,421},{178,0,421},{178,0,421},{164,251,0},{164,251,0},{164,251,0},{164,207,0},{136,220,0},{136,220,0},{202,255,2175},{195,255,1070},{190,255,670},{186,253,441},{196,255,2443},{177,255,663},{172,255,2},
+{167,237,618},{146,255,1735},{135,240,423},{215,255,876},{207,255,277},{205,255,41},{199,251,69},{255,171,1459},{186,255,584},{174,254,2},{117,241,392},{254,214,1459},{117,241,392},{190,255,670},{190,255,670},{190,255,670},{186,246,421},{181,255,547},{173,253,2},{173,253,2},{171,223,42},{132,253,338},{145,227,1},{205,255,41},{205,255,41},{205,255,41},{201,239,0},{255,128,338},{176,251,0},{176,251,0},{146,227,0},{255,193,338},
+{146,227,0},{255,210,392},{222,255,136},{213,255,0},{167,255,0},{255,210,392},{255,233,392},{167,255,0},{0,241,392},{255,233,392},{0,241,392},{186,0,421},{186,0,421},{186,0,421},{186,0,421},{172,255,1},{172,255,1},{172,255,1},{172,214,0},{144,228,0},{144,228,0},{208,255,1867},{202,255,1047},{199,255,750},{194,255,421},{205,255,2052},{186,255,524},{183,255,22},{177,241,362},{161,255,1400},{145,244,215},{224,255,625},
+{216,255,229},{215,255,72},{207,253,17},{255,186,1064},{198,255,392},{186,255,4},{133,245,200},{255,221,1064},{133,245,200},{199,255,750},{199,255,750},{199,255,750},{194,254,421},{193,255,635},{183,255,22},{183,255,22},{179,231,42},{146,255,350},{153,235,1},{215,255,72},{215,255,72},{215,255,72},{209,247,0},{255,152,338},{186,255,4},{186,255,4},{154,235,0},{255,205,338},{154,235,0},{255,222,200},{231,255,65},{224,255,0},
+{192,255,0},{255,222,200},{255,239,200},{192,255,0},{0,245,200},{255,239,200},{0,245,200},{194,0,421},{194,0,421},{194,0,421},{194,0,421},{181,255,10},{181,255,10},{181,255,10},{180,222,0},{152,236,0},{152,236,0},{215,255,1586},{211,255,1053},{211,255,857},{204,255,446},{215,255,1698},{198,255,455},{195,255,109},{186,247,163},{177,255,1161},{158,249,63},{230,255,425},{226,255,209},{224,255,117},{218,255,1},{255,204,722},
+{210,255,254},{204,255,34},{152,249,61},{255,230,722},{152,249,61},{211,255,857},{211,255,857},{211,255,857},{204,255,446},{202,255,749},{195,255,109},{195,255,109},{188,240,42},{164,255,413},{162,244,1},{224,255,117},{224,255,117},{224,255,117},{218,254,1},{255,180,338},{204,255,34},{204,255,34},{163,244,0},{255,218,338},{163,244,0},{255,234,61},{240,255,17},{238,255,0},{219,255,0},{255,234,61},{255,245,61},{219,255,0},
+{0,249,61},{255,245,61},{0,249,61},{203,0,421},{203,0,421},{203,0,421},{203,0,421},{193,255,32},{193,255,32},{193,255,32},{189,231,0},{161,245,0},{161,245,0},{224,255,1422},{221,255,1083},{218,255,946},{213,255,525},{221,255,1470},{207,255,478},{204,255,243},{196,251,67},{189,255,1025},{169,253,5},{239,255,318},{235,255,217},{233,255,170},{228,255,29},{255,219,509},{222,255,198},{219,255,89},{169,253,5},{254,238,509},
+{169,253,5},{218,255,946},{218,255,946},{218,255,946},{213,255,525},{215,255,862},{204,255,243},{204,255,243},{196,248,42},{180,255,530},{169,252,1},{233,255,170},{233,255,170},{233,255,170},{228,255,29},{255,204,338},{219,255,89},{219,255,89},{171,252,0},{255,230,338},{171,252,0},{255,246,5},{251,254,1},{250,255,0},{243,255,0},{255,246,5},{255,251,5},{243,255,0},{0,253,5},{255,251,5},{0,253,5},{211,0,421},
+{211,0,421},{211,0,421},{211,0,421},{202,255,61},{202,255,61},{202,255,61},{197,239,0},{169,253,0},{169,253,0},{230,255,1153},{227,255,938},{227,255,857},{222,255,533},{227,255,1141},{216,255,434},{213,255,282},{204,253,18},{201,255,790},{183,255,8},{242,255,192},{242,255,144},{239,255,125},{237,255,34},{255,231,294},{234,255,121},{228,255,73},{189,255,0},{254,244,294},{189,255,0},{227,255,857},{227,255,857},{227,255,857},
+{222,255,533},{221,255,741},{213,255,282},{213,255,282},{204,253,14},{192,255,465},{183,255,8},{239,255,125},{239,255,125},{239,255,125},{237,255,34},{255,219,221},{228,255,73},{228,255,73},{189,255,0},{254,238,221},{189,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{219,0,421},{219,0,421},{219,0,421},{219,0,421},{211,255,100},
+{211,255,100},{211,255,100},{205,247,0},{183,255,8},{183,255,8},{236,255,853},{233,255,726},{233,255,677},{228,255,485},{233,255,793},{225,255,355},{222,255,262},{213,255,0},{213,255,534},{195,255,40},{248,255,68},{245,255,54},{245,255,45},{243,255,10},{255,240,113},{240,255,41},{240,255,25},{213,255,0},{255,248,113},{213,255,0},{233,255,677},{233,255,677},{233,255,677},{228,255,485},{227,255,545},{222,255,262},{222,255,262},
+{213,255,0},{204,255,329},{195,255,40},{245,255,45},{245,255,45},{245,255,45},{243,255,10},{255,231,85},{240,255,25},{240,255,25},{213,255,0},{254,244,85},{213,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{227,0,421},{227,0,421},{227,0,421},{227,0,421},{221,255,145},{221,255,145},{221,255,145},{213,255,0},{195,255,40},
+{195,255,40},{0,119,882},{0,84,97},{0,60,1},{0,50,325},{0,79,1896},{0,51,1188},{0,47,563},{0,31,1410},{0,36,2029},{0,31,1510},{0,119,882},{0,84,97},{0,60,1},{0,50,325},{39,0,1896},{0,51,1188},{0,47,563},{0,31,1410},{79,0,1896},{0,31,1410},{0,55,0},{0,55,0},{0,55,0},{0,27,0},{0,28,162},{0,21,52},{0,21,52},{0,13,89},{0,12,173},{0,13,105},{0,55,0},
+{0,55,0},{0,55,0},{0,27,0},{14,0,162},{0,21,52},{0,21,52},{0,13,89},{28,0,162},{0,13,89},{58,0,882},{0,84,97},{0,60,1},{0,50,325},{58,0,882},{119,0,882},{0,50,325},{0,39,882},{119,0,882},{0,39,882},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,143,882},{0,97,34},{1,68,11},
+{0,59,250},{0,95,2355},{0,63,1332},{0,54,585},{0,40,1656},{0,45,2556},{0,37,1802},{0,143,882},{0,97,34},{2,69,9},{0,59,250},{46,0,2355},{0,63,1332},{0,54,585},{0,40,1656},{95,0,2355},{0,40,1656},{0,79,0},{0,79,0},{0,79,0},{0,39,0},{0,40,338},{0,33,116},{0,33,116},{0,19,193},{0,18,365},{0,16,225},{0,79,0},{0,79,0},{0,79,0},{0,39,0},{20,0,338},
+{0,33,116},{0,33,116},{0,19,193},{40,0,338},{0,19,193},{70,0,882},{0,97,34},{6,68,0},{0,59,250},{70,0,882},{143,0,882},{0,59,250},{0,47,882},{143,0,882},{0,47,882},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,167,882},{0,112,5},{2,78,61},{0,68,185},{0,113,2899},{0,75,1508},{0,63,633},
+{0,44,1965},{0,51,3176},{0,44,2161},{0,167,882},{0,112,5},{4,77,53},{0,68,185},{55,0,2899},{0,75,1508},{0,63,633},{0,44,1965},{113,0,2899},{0,44,1965},{0,104,0},{0,104,0},{0,104,0},{0,51,0},{0,52,578},{0,42,205},{0,42,205},{0,25,337},{0,24,629},{0,22,389},{0,104,0},{0,104,0},{0,104,0},{0,51,0},{26,0,578},{0,42,205},{0,42,205},{0,25,337},{52,0,578},
+{0,25,337},{82,0,882},{0,112,5},{14,76,0},{0,68,185},{82,0,882},{167,0,882},{0,68,185},{0,55,882},{167,0,882},{0,55,882},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{3,186,900},{3,124,18},{6,88,133},{1,78,162},{0,134,3048},{0,88,1398},{0,72,469},{0,53,1923},{0,60,3457},{0,50,2205},{6,179,882},
+{6,121,2},{10,86,69},{3,77,158},{66,0,3048},{0,88,1398},{0,72,469},{0,53,1923},{134,0,3048},{0,53,1923},{3,122,18},{3,122,18},{3,122,18},{3,61,18},{0,73,648},{0,54,157},{0,54,157},{0,31,317},{0,33,756},{0,31,417},{6,116,0},{6,116,0},{6,116,0},{6,60,0},{36,0,648},{0,54,157},{0,54,157},{0,31,317},{73,0,648},{0,31,317},{94,0,882},{2,124,0},{22,84,0},
+{0,77,130},{94,0,882},{192,0,882},{0,77,130},{0,63,882},{192,0,882},{0,63,882},{3,0,18},{3,0,18},{3,0,18},{3,0,18},{0,19,0},{0,19,0},{0,19,0},{0,9,0},{0,9,4},{0,9,4},{8,203,995},{8,136,115},{11,96,290},{6,86,230},{0,161,3048},{0,100,1221},{0,84,250},{0,62,1758},{0,75,3651},{0,60,2156},{15,188,882},{15,130,2},{19,95,69},{12,86,158},{79,0,3048},
+{0,100,1221},{0,84,250},{0,62,1758},{161,0,3048},{0,62,1758},{8,139,113},{8,139,113},{8,139,113},{8,72,113},{0,101,648},{0,72,73},{0,72,73},{0,40,242},{0,45,862},{0,39,409},{15,125,0},{15,125,0},{15,125,0},{15,69,0},{49,0,648},{0,72,73},{0,72,73},{0,40,242},{101,0,648},{0,40,242},{107,0,882},{11,133,0},{31,93,0},{0,87,85},{107,0,882},{219,0,882},{0,87,85},
+{0,72,882},{219,0,882},{0,72,882},{7,0,113},{7,0,113},{7,0,113},{7,0,113},{0,46,0},{0,46,0},{0,46,0},{0,23,0},{0,18,34},{0,18,34},{12,219,1147},{12,146,275},{16,104,510},{11,95,365},{0,186,3048},{0,115,1096},{0,94,114},{0,71,1620},{0,88,3844},{0,69,2137},{23,196,882},{23,138,2},{27,103,69},{20,94,158},{91,0,3048},{0,115,1096},{0,94,114},{0,71,1620},{186,0,3048},
+{0,71,1620},{12,155,265},{12,155,265},{12,155,265},{12,82,265},{0,125,648},{0,84,25},{0,84,25},{0,53,180},{0,54,987},{0,48,427},{23,133,0},{23,133,0},{23,133,0},{23,77,0},{61,0,648},{0,84,25},{0,84,25},{0,53,180},{125,0,648},{0,53,180},{119,0,882},{20,140,0},{39,101,0},{0,96,50},{119,0,882},{243,0,882},{0,96,50},{0,80,882},{243,0,882},{0,80,882},{11,0,265},
+{11,0,265},{11,0,265},{11,0,265},{0,70,0},{0,70,0},{0,70,0},{0,35,0},{0,27,89},{0,27,89},{16,235,1363},{17,157,505},{20,114,802},{14,104,559},{0,210,3048},{0,127,1000},{0,105,34},{0,80,1494},{0,94,4056},{0,78,2148},{31,204,882},{31,145,2},{35,111,69},{27,102,165},{103,0,3048},{0,127,1000},{0,105,34},{0,80,1494},{210,0,3048},{0,80,1494},{16,171,481},{16,171,481},{16,171,481},
+{16,92,481},{0,149,648},{0,100,1},{0,100,1},{0,62,125},{0,66,1139},{0,57,473},{31,140,0},{31,140,0},{31,140,0},{31,85,0},{73,0,648},{0,100,1},{0,100,1},{0,62,125},{149,0,648},{0,62,125},{131,0,882},{28,148,0},{47,109,0},{0,105,25},{131,0,882},{255,6,882},{0,105,25},{0,88,882},{255,6,882},{0,88,882},{15,0,481},{15,0,481},{15,0,481},{15,0,481},{0,95,0},
+{0,95,0},{0,95,0},{0,46,0},{0,39,169},{0,39,169},{20,251,1643},{21,167,805},{25,122,1170},{19,112,822},{0,234,3048},{0,141,933},{1,115,10},{0,90,1395},{0,106,4312},{0,84,2188},{39,212,882},{39,153,2},{43,119,69},{35,110,165},{115,0,3048},{0,141,933},{1,115,9},{0,90,1395},{234,0,3048},{0,90,1395},{20,187,761},{20,187,761},{20,187,761},{19,102,761},{0,174,648},{1,111,5},{1,111,5},
+{0,71,80},{0,75,1322},{0,66,547},{39,148,0},{39,148,0},{39,148,0},{39,93,0},{85,0,648},{5,109,0},{5,109,0},{0,71,80},{174,0,648},{0,71,80},{143,0,882},{36,156,0},{55,117,0},{0,115,8},{143,0,882},{255,18,882},{0,115,8},{0,96,882},{255,18,882},{0,96,882},{19,0,761},{19,0,761},{19,0,761},{19,0,761},{0,119,0},{0,119,0},{0,119,0},{0,58,0},{0,48,274},
+{0,48,274},{29,254,1780},{29,178,935},{33,133,1327},{26,122,936},{6,249,3048},{3,153,909},{8,125,13},{0,99,1314},{0,121,4212},{0,97,1924},{48,221,882},{48,162,2},{52,128,69},{44,119,165},{128,0,3048},{0,156,891},{10,124,9},{0,99,1278},{255,3,3048},{0,99,1278},{27,200,882},{27,200,882},{27,200,882},{27,112,882},{6,189,648},{7,123,11},{7,123,11},{3,80,61},{0,91,1227},{0,75,409},{48,157,0},
+{48,157,0},{48,157,0},{48,102,0},{98,0,648},{14,118,0},{14,118,0},{0,80,41},{201,0,648},{0,80,41},{156,0,882},{45,165,0},{63,126,0},{0,126,0},{156,0,882},{254,32,882},{0,126,0},{0,105,882},{254,32,882},{0,105,882},{27,0,882},{27,0,882},{27,0,882},{27,0,882},{6,134,0},{6,134,0},{6,134,0},{6,69,0},{0,63,232},{0,63,232},{38,254,1814},{37,186,935},{41,141,1327},
+{34,130,936},{15,255,3049},{11,161,909},{16,133,13},{8,107,1314},{0,132,3964},{0,106,1605},{56,229,882},{56,170,2},{60,136,69},{52,127,165},{140,0,3048},{2,167,882},{18,132,9},{0,108,1188},{255,15,3048},{0,108,1188},{35,208,882},{35,208,882},{35,208,882},{35,120,882},{14,196,648},{15,131,11},{15,131,11},{11,88,61},{0,103,1059},{0,88,221},{56,165,0},{56,165,0},{56,165,0},{56,110,0},{110,0,648},
+{22,126,0},{22,126,0},{0,90,20},{225,0,648},{0,90,20},{168,0,882},{53,173,0},{71,134,0},{5,134,0},{168,0,882},{255,43,882},{5,134,0},{0,113,882},{255,43,882},{0,113,882},{35,0,882},{35,0,882},{35,0,882},{35,0,882},{14,142,0},{14,142,0},{14,142,0},{14,77,0},{0,75,136},{0,75,136},{46,255,1854},{45,194,935},{49,149,1327},{42,138,936},{24,255,3064},{19,169,909},{22,141,14},
+{16,115,1314},{0,144,3748},{0,115,1348},{64,237,882},{64,178,2},{68,144,69},{60,135,165},{152,0,3048},{10,175,882},{26,140,9},{0,117,1110},{255,27,3048},{0,117,1110},{43,216,882},{43,216,882},{43,216,882},{43,128,882},{22,204,648},{23,139,11},{23,139,11},{21,95,62},{0,118,922},{0,97,91},{64,173,0},{64,173,0},{64,173,0},{64,117,0},{122,0,648},{30,134,0},{30,134,0},{0,99,5},{249,0,648},
+{0,99,5},{180,0,882},{61,181,0},{79,142,0},{13,142,0},{180,0,882},{255,55,882},{13,142,0},{0,121,882},{255,55,882},{0,121,882},{43,0,882},{43,0,882},{43,0,882},{43,0,882},{22,150,0},{22,150,0},{22,150,0},{22,85,0},{0,91,58},{0,91,58},{55,255,1924},{53,202,935},{57,157,1327},{50,146,936},{36,255,3096},{27,177,909},{30,149,14},{24,123,1314},{0,159,3559},{0,124,1153},{72,245,882},
+{72,186,2},{77,152,65},{68,143,165},{164,0,3048},{19,182,882},{33,148,9},{0,126,1044},{255,39,3048},{0,126,1044},{51,224,882},{51,224,882},{51,224,882},{51,136,882},{30,212,648},{32,147,9},{32,147,9},{28,103,65},{0,129,810},{0,106,21},{72,181,0},{72,181,0},{72,181,0},{72,125,0},{134,0,648},{38,142,0},{38,142,0},{0,108,0},{255,9,648},{0,108,0},{192,0,882},{69,189,0},{87,150,0},
+{21,150,0},{192,0,882},{255,67,882},{21,150,0},{0,129,882},{255,67,882},{0,129,882},{51,0,882},{51,0,882},{51,0,882},{51,0,882},{30,158,0},{30,158,0},{30,158,0},{30,93,0},{0,106,17},{0,106,17},{67,255,2024},{62,211,935},{66,166,1327},{59,155,936},{46,255,3145},{36,186,909},{40,158,10},{33,132,1314},{0,172,3364},{0,133,1012},{81,254,882},{81,195,2},{86,161,65},{77,152,165},{177,0,3048},
+{28,191,882},{42,157,9},{0,136,990},{255,52,3048},{0,136,990},{60,233,882},{60,233,882},{60,233,882},{60,145,882},{39,221,648},{41,156,9},{41,156,9},{37,112,65},{0,144,720},{2,118,3},{81,190,0},{81,190,0},{81,190,0},{81,134,0},{147,0,648},{47,151,0},{47,151,0},{7,117,0},{254,23,648},{7,117,0},{205,0,882},{78,198,0},{96,159,0},{30,159,0},{205,0,882},{254,81,882},{30,159,0},
+{0,138,882},{254,81,882},{0,138,882},{60,0,882},{60,0,882},{60,0,882},{60,0,882},{39,167,0},{39,167,0},{39,167,0},{39,102,0},{0,121,0},{0,121,0},{76,255,2134},{70,218,935},{74,174,1327},{66,163,942},{58,255,3217},{45,193,904},{48,166,10},{41,140,1314},{0,184,3244},{0,145,948},{91,254,888},{89,203,2},{94,169,65},{85,160,165},{189,0,3048},{36,199,882},{50,165,9},{0,145,948},{255,64,3048},
+{0,145,948},{68,241,882},{68,241,882},{68,241,882},{68,153,882},{47,229,648},{49,164,9},{49,164,9},{45,120,65},{0,156,672},{10,126,3},{89,198,0},{89,198,0},{89,198,0},{89,142,0},{159,0,648},{56,158,0},{56,158,0},{15,125,0},{254,35,648},{15,125,0},{217,0,882},{86,206,0},{104,167,0},{38,167,0},{217,0,882},{255,92,882},{38,167,0},{0,146,882},{255,92,882},{0,146,882},{68,0,882},
+{68,0,882},{68,0,882},{68,0,882},{47,174,0},{47,174,0},{47,174,0},{47,109,0},{7,129,0},{7,129,0},{86,255,2252},{78,226,935},{82,182,1327},{74,171,942},{67,255,3300},{53,201,904},{56,174,10},{49,148,1314},{0,199,3151},{3,152,935},{101,255,906},{97,211,2},{102,177,65},{93,168,165},{201,0,3048},{44,207,882},{58,173,9},{0,152,915},{255,76,3048},{0,152,915},{76,249,882},{76,249,882},{76,249,882},
+{76,161,882},{55,237,648},{57,172,9},{57,172,9},{53,128,65},{0,171,649},{18,134,3},{97,206,0},{97,206,0},{97,206,0},{97,150,0},{171,0,648},{64,166,0},{64,166,0},{23,133,0},{255,46,648},{23,133,0},{229,0,882},{94,214,0},{112,175,0},{46,175,0},{229,0,882},{255,104,882},{46,175,0},{0,154,882},{255,104,882},{0,154,882},{76,0,882},{76,0,882},{76,0,882},{76,0,882},{55,182,0},
+{55,182,0},{55,182,0},{55,117,0},{16,136,0},{16,136,0},{95,255,2398},{86,234,935},{90,190,1327},{82,179,942},{76,255,3409},{61,209,904},{64,182,10},{56,155,1318},{0,211,3087},{11,160,935},{110,255,939},{105,219,2},{110,185,65},{101,176,165},{213,0,3048},{52,215,882},{66,181,9},{0,161,893},{254,88,3048},{0,161,893},{84,254,883},{84,254,883},{84,254,883},{84,169,882},{63,245,648},{65,180,9},{65,180,9},
+{61,136,65},{7,179,648},{27,141,2},{105,214,0},{105,214,0},{105,214,0},{105,158,0},{183,0,648},{72,174,0},{72,174,0},{31,141,0},{255,58,648},{31,141,0},{241,0,882},{101,222,0},{120,183,0},{54,183,0},{241,0,882},{255,116,882},{54,183,0},{0,162,882},{255,116,882},{0,162,882},{84,0,882},{84,0,882},{84,0,882},{84,0,882},{63,190,0},{63,190,0},{63,190,0},{63,125,0},{24,144,0},
+{24,144,0},{104,255,2584},{95,243,935},{100,199,1318},{91,188,942},{89,255,3529},{69,219,909},{73,191,10},{65,165,1327},{0,224,3052},{20,169,935},{122,255,996},{114,228,2},{119,194,65},{111,183,171},{226,0,3048},{61,224,882},{75,190,9},{0,171,883},{255,101,3048},{0,171,883},{94,254,893},{94,254,893},{94,254,893},{93,177,882},{72,254,648},{74,189,9},{74,189,9},{70,145,65},{16,188,648},{36,150,2},{114,222,0},
+{114,222,0},{114,222,0},{114,167,0},{196,0,648},{81,183,0},{81,183,0},{39,150,0},{254,72,648},{39,150,0},{254,0,882},{110,231,0},{129,192,0},{63,192,0},{254,0,882},{255,129,882},{63,192,0},{0,171,882},{255,129,882},{0,171,882},{93,0,882},{93,0,882},{93,0,882},{93,0,882},{72,199,0},{72,199,0},{72,199,0},{72,134,0},{33,153,0},{33,153,0},{113,255,2774},{103,251,935},{107,206,1314},
+{99,196,942},{98,255,3672},{77,227,909},{81,199,10},{73,173,1327},{5,235,3048},{28,177,935},{132,255,1054},{121,236,3},{127,202,65},{119,192,173},{238,0,3048},{69,232,882},{83,198,9},{4,179,882},{255,113,3048},{4,179,882},{103,254,915},{103,254,915},{103,254,915},{101,185,882},{82,255,654},{82,197,9},{82,197,9},{78,153,65},{24,196,648},{44,158,2},{122,230,0},{122,230,0},{122,230,0},{122,175,0},{208,0,648},
+{89,191,0},{89,191,0},{47,158,0},{254,84,648},{47,158,0},{255,22,882},{118,239,0},{137,200,0},{71,200,0},{255,22,882},{255,141,882},{71,200,0},{0,179,882},{255,141,882},{0,179,882},{101,0,882},{101,0,882},{101,0,882},{101,0,882},{80,207,0},{80,207,0},{80,207,0},{80,142,0},{41,161,0},{41,161,0},{122,255,2984},{110,255,948},{115,214,1314},{107,204,942},{110,255,3832},{85,235,909},{89,207,10},
+{81,181,1327},{13,243,3048},{36,185,935},{141,255,1131},{129,244,3},{135,210,65},{127,200,173},{250,0,3048},{77,240,882},{91,206,9},{12,187,882},{255,125,3048},{12,187,882},{110,255,948},{110,255,948},{110,255,948},{109,193,882},{92,255,672},{90,205,9},{90,205,9},{86,161,65},{32,204,648},{52,166,2},{130,238,0},{130,238,0},{130,238,0},{130,183,0},{220,0,648},{97,199,0},{97,199,0},{55,166,0},{255,95,648},
+{55,166,0},{255,46,882},{126,247,0},{145,208,0},{79,208,0},{255,46,882},{255,153,882},{79,208,0},{0,187,882},{255,153,882},{0,187,882},{109,0,882},{109,0,882},{109,0,882},{109,0,882},{88,215,0},{88,215,0},{88,215,0},{88,150,0},{49,169,0},{49,169,0},{132,255,3182},{122,255,1012},{123,222,1314},{115,212,942},{119,255,4009},{93,243,909},{97,215,10},{89,189,1327},{21,251,3048},{44,193,935},{150,255,1226},
+{137,252,3},{143,218,65},{135,208,173},{255,13,3048},{83,249,882},{99,214,9},{20,195,882},{254,137,3048},{20,195,882},{119,255,990},{119,255,990},{119,255,990},{117,201,882},{101,255,705},{98,213,9},{98,213,9},{94,169,65},{40,212,648},{60,174,2},{138,246,0},{138,246,0},{138,246,0},{138,191,0},{232,0,648},{104,208,0},{104,208,0},{63,174,0},{255,107,648},{63,174,0},{255,70,882},{134,255,0},{153,216,0},
+{86,216,0},{255,70,882},{255,165,882},{86,216,0},{0,195,882},{255,165,882},{0,195,882},{117,0,882},{117,0,882},{117,0,882},{117,0,882},{96,223,0},{96,223,0},{96,223,0},{96,158,0},{57,177,0},{57,177,0},{141,255,3464},{131,255,1153},{132,231,1314},{124,221,942},{132,255,4209},{102,252,909},{105,224,14},{98,198,1327},{37,255,3060},{53,202,935},{162,255,1349},{149,255,21},{152,227,65},{144,217,173},{255,40,3048},
+{95,255,885},{108,223,9},{29,204,882},{255,150,3048},{29,204,882},{129,255,1044},{129,255,1044},{129,255,1044},{126,210,882},{113,255,762},{107,222,9},{107,222,9},{103,178,65},{49,221,648},{69,183,2},{147,255,0},{147,255,0},{147,255,0},{147,200,0},{245,0,648},{112,217,0},{112,217,0},{72,183,0},{254,121,648},{72,183,0},{255,98,882},{149,255,17},{162,225,0},{95,225,0},{255,98,882},{255,178,882},{95,225,0},
+{0,204,882},{255,178,882},{0,204,882},{126,0,882},{126,0,882},{126,0,882},{126,0,882},{105,232,0},{105,232,0},{105,232,0},{105,167,0},{66,186,0},{66,186,0},{150,255,3734},{140,255,1348},{140,239,1314},{132,229,942},{141,255,4420},{113,255,925},{113,232,14},{106,206,1327},{52,255,3132},{61,210,935},{172,255,1459},{158,255,91},{160,234,62},{152,225,173},{255,64,3048},{110,255,923},{116,232,11},{37,212,882},{255,162,3048},
+{37,212,882},{138,255,1110},{138,255,1110},{138,255,1110},{134,218,882},{122,255,827},{115,229,9},{115,229,9},{111,187,69},{57,229,648},{77,191,2},{156,255,5},{156,255,5},{156,255,5},{155,208,0},{255,4,648},{120,225,0},{120,225,0},{80,191,0},{255,132,648},{80,191,0},{255,122,882},{164,255,58},{170,233,0},{103,233,0},{255,122,882},{255,190,882},{103,233,0},{0,212,882},{255,190,882},{0,212,882},{134,0,882},
+{134,0,882},{134,0,882},{134,0,882},{113,240,0},{113,240,0},{113,240,0},{113,175,0},{74,194,0},{74,194,0},{162,255,4022},{149,255,1605},{148,247,1314},{140,237,942},{150,255,4657},{122,255,1020},{122,239,13},{114,214,1327},{70,255,3256},{69,218,935},{181,255,1598},{167,255,221},{167,244,61},{160,233,173},{255,89,3048},{128,255,1003},{123,240,11},{45,220,882},{254,174,3048},{45,220,882},{147,255,1188},{147,255,1188},{147,255,1188},
+{142,226,882},{132,255,897},{123,237,9},{123,237,9},{119,195,69},{65,237,648},{85,199,2},{165,255,20},{165,255,20},{165,255,20},{163,216,0},{255,28,648},{128,233,0},{128,233,0},{88,199,0},{255,144,648},{88,199,0},{255,146,882},{180,255,136},{178,241,0},{111,241,0},{255,146,882},{255,202,882},{111,241,0},{0,220,882},{255,202,882},{0,220,882},{142,0,882},{142,0,882},{142,0,882},{142,0,882},{121,248,0},
+{121,248,0},{121,248,0},{121,183,0},{82,202,0},{82,202,0},{172,255,4300},{158,255,1924},{156,255,1314},{150,244,943},{162,255,4905},{134,255,1204},{130,247,13},{122,222,1327},{82,255,3448},{77,226,935},{190,255,1755},{180,255,409},{175,252,61},{168,241,173},{255,113,3048},{143,255,1125},{131,248,11},{53,228,882},{254,186,3048},{53,228,882},{156,255,1278},{156,255,1278},{156,255,1278},{150,234,882},{141,255,992},{131,245,9},{131,245,9},
+{127,203,69},{73,245,648},{93,207,2},{175,255,41},{175,255,41},{175,255,41},{171,224,0},{255,52,648},{137,240,0},{137,240,0},{96,207,0},{255,156,648},{96,207,0},{255,171,882},{192,255,232},{186,249,0},{119,249,0},{255,171,882},{254,214,882},{119,249,0},{0,228,882},{254,214,882},{0,228,882},{150,0,882},{150,0,882},{150,0,882},{150,0,882},{129,255,0},{129,255,0},{129,255,0},{129,191,0},{90,210,0},
+{90,210,0},{178,255,4349},{171,255,2188},{165,255,1395},{159,251,923},{172,255,4837},{146,255,1309},{140,254,10},{132,229,1170},{104,255,3433},{88,234,805},{202,255,1725},{189,255,547},{184,255,80},{176,249,133},{255,137,2814},{158,255,1125},{143,254,5},{66,235,761},{254,198,2814},{66,235,761},{165,255,1395},{165,255,1395},{165,255,1395},{159,243,882},{153,255,1115},{140,254,9},{140,254,9},{136,212,69},{81,254,648},{102,216,2},{184,255,80},
+{184,255,80},{184,255,80},{180,233,0},{255,79,648},{146,249,0},{146,249,0},{105,216,0},{253,170,648},{105,216,0},{255,192,761},{207,255,274},{196,255,0},{134,255,0},{255,192,761},{255,224,761},{134,255,0},{0,235,761},{255,224,761},{0,235,761},{159,0,882},{159,0,882},{159,0,882},{159,0,882},{140,254,8},{140,254,8},{140,254,8},{138,200,0},{99,219,0},{99,219,0},{187,255,3903},{177,255,2148},{175,255,1494},
+{167,253,887},{181,255,4274},{155,255,1106},{149,255,34},{140,234,802},{119,255,2958},{98,238,505},{208,255,1361},{198,255,473},{193,255,125},{187,251,53},{255,152,2249},{167,255,857},{155,255,1},{82,239,481},{255,205,2249},{82,239,481},{175,255,1494},{175,255,1494},{175,255,1494},{167,251,882},{162,255,1242},{149,255,34},{149,255,34},{144,220,69},{95,255,670},{110,224,2},{193,255,125},{193,255,125},{193,255,125},{188,241,0},{255,104,648},
+{155,255,1},{155,255,1},{113,224,0},{255,181,648},{113,224,0},{255,204,481},{216,255,169},{208,255,0},{158,255,0},{255,204,481},{255,230,481},{158,255,0},{0,239,481},{255,230,481},{0,239,481},{167,0,882},{167,0,882},{167,0,882},{167,0,882},{149,254,25},{149,254,25},{149,254,25},{146,208,0},{106,227,0},{106,227,0},{193,255,3535},{186,255,2137},{184,255,1620},{175,255,891},{187,255,3794},{167,255,978},{159,255,114},
+{150,238,510},{131,255,2574},{109,243,275},{215,255,1046},{207,255,427},{202,255,180},{195,253,9},{255,171,1769},{183,255,650},{167,255,25},{98,243,265},{254,214,1769},{98,243,265},{184,255,1620},{184,255,1620},{184,255,1620},{175,255,891},{172,255,1364},{159,255,114},{159,255,114},{152,228,69},{113,255,734},{117,232,2},{202,255,180},{202,255,180},{202,255,180},{196,249,0},{255,128,648},{167,255,25},{167,255,25},{120,232,0},{255,193,648},
+{120,232,0},{255,216,265},{228,255,89},{220,255,0},{183,255,0},{255,216,265},{255,236,265},{183,255,0},{0,243,265},{255,236,265},{0,243,265},{175,0,882},{175,0,882},{175,0,882},{175,0,882},{159,255,50},{159,255,50},{159,255,50},{154,216,0},{114,235,0},{114,235,0},{202,255,3229},{195,255,2156},{193,255,1758},{183,255,946},{196,255,3397},{174,255,950},{171,255,250},{158,243,290},{146,255,2281},{119,247,115},{221,255,822},
+{216,255,409},{215,255,242},{204,255,1},{255,186,1374},{195,255,498},{183,255,73},{114,247,113},{255,221,1374},{114,247,113},{193,255,1758},{193,255,1758},{193,255,1758},{183,255,946},{181,255,1521},{171,255,250},{171,255,250},{160,236,69},{128,255,840},{125,240,2},{215,255,242},{215,255,242},{215,255,242},{204,255,1},{255,152,648},{183,255,73},{183,255,73},{128,240,0},{255,205,648},{128,240,0},{255,228,113},{237,255,34},{232,255,0},
+{207,255,0},{255,228,113},{255,242,113},{207,255,0},{0,247,113},{255,242,113},{0,247,113},{183,0,882},{183,0,882},{183,0,882},{183,0,882},{168,255,85},{168,255,85},{168,255,85},{162,224,0},{122,243,0},{122,243,0},{211,255,2974},{205,255,2205},{202,255,1923},{195,255,1069},{205,255,3055},{186,255,981},{180,255,469},{167,249,133},{161,255,2061},{131,252,18},{230,255,645},{224,255,417},{224,255,317},{216,255,45},{255,204,1032},
+{210,255,404},{201,255,157},{131,252,18},{255,230,1032},{131,252,18},{202,255,1923},{202,255,1923},{202,255,1923},{195,255,1069},{193,255,1710},{180,255,469},{180,255,469},{169,245,69},{146,255,1011},{134,249,2},{224,255,317},{224,255,317},{224,255,317},{216,255,45},{255,180,648},{201,255,157},{201,255,157},{137,249,0},{255,218,648},{137,249,0},{255,243,18},{246,255,4},{245,255,0},{234,255,0},{255,243,18},{254,250,18},{234,255,0},
+{0,252,18},{254,250,18},{0,252,18},{192,0,882},{192,0,882},{192,0,882},{192,0,882},{178,255,130},{178,255,130},{178,255,130},{171,233,0},{131,252,0},{131,252,0},{218,255,2682},{211,255,2161},{211,255,1965},{204,255,1170},{215,255,2712},{195,255,1014},{192,255,633},{177,253,61},{167,255,1893},{143,255,5},{236,255,513},{233,255,389},{230,255,337},{225,255,97},{255,219,771},{219,255,342},{213,255,205},{149,255,0},{254,238,771},
+{149,255,0},{211,255,1965},{211,255,1965},{211,255,1965},{204,255,1170},{202,255,1755},{192,255,633},{192,255,633},{178,251,53},{161,255,1085},{143,255,5},{230,255,337},{230,255,337},{230,255,337},{225,255,97},{255,201,578},{213,255,205},{213,255,205},{149,255,0},{254,229,578},{149,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{200,0,882},
+{200,0,882},{200,0,882},{200,0,882},{187,255,185},{187,255,185},{187,255,185},{179,241,0},{143,255,5},{143,255,5},{221,255,2188},{218,255,1802},{215,255,1656},{210,255,1086},{218,255,2117},{204,255,865},{201,255,585},{187,254,11},{183,255,1467},{158,255,34},{242,255,297},{239,255,225},{236,255,193},{231,255,53},{255,225,451},{228,255,192},{222,255,116},{174,255,0},{254,241,451},{174,255,0},{215,255,1656},{215,255,1656},{215,255,1656},
+{210,255,1086},{211,255,1426},{201,255,585},{201,255,585},{186,253,9},{167,255,869},{158,255,34},{236,255,193},{236,255,193},{236,255,193},{231,255,53},{255,213,338},{222,255,116},{222,255,116},{174,255,0},{254,235,338},{174,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{208,0,882},{208,0,882},{208,0,882},{208,0,882},{196,255,250},
+{196,255,250},{196,255,250},{187,249,0},{158,255,34},{158,255,34},{227,255,1772},{224,255,1510},{224,255,1410},{219,255,1021},{224,255,1645},{210,255,761},{208,255,563},{195,255,1},{195,255,1123},{171,255,97},{245,255,136},{242,255,105},{242,255,89},{237,255,25},{255,234,216},{237,255,86},{234,255,52},{198,255,0},{255,245,216},{198,255,0},{224,255,1410},{224,255,1410},{224,255,1410},{219,255,1021},{215,255,1140},{208,255,563},{208,255,563},
+{195,255,1},{186,255,696},{171,255,97},{242,255,89},{242,255,89},{242,255,89},{237,255,25},{255,225,162},{234,255,52},{234,255,52},{198,255,0},{254,241,162},{198,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{216,0,882},{216,0,882},{216,0,882},{216,0,882},{205,255,325},{205,255,325},{205,255,325},{195,255,1},{171,255,97},
+{171,255,97},{0,158,1568},{0,112,169},{0,80,4},{0,68,585},{0,107,3371},{0,69,2124},{0,62,1013},{0,40,2532},{0,48,3617},{0,40,2701},{0,158,1568},{0,112,169},{0,80,4},{0,68,585},{52,0,3371},{0,69,2124},{0,62,1013},{0,40,2532},{107,0,3371},{0,40,2532},{0,73,0},{0,73,0},{0,73,0},{0,36,0},{0,37,288},{0,30,97},{0,30,97},{0,16,164},{0,15,312},{0,16,189},{0,73,0},
+{0,73,0},{0,73,0},{0,36,0},{18,0,288},{0,30,97},{0,30,97},{0,16,164},{37,0,288},{0,16,164},{78,0,1568},{0,112,169},{0,80,4},{0,68,585},{78,0,1568},{158,0,1568},{0,68,585},{0,52,1568},{158,0,1568},{0,52,1568},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,183,1568},{0,124,89},{0,89,10},
+{0,77,482},{0,122,3968},{0,81,2316},{0,69,1041},{0,50,2857},{0,57,4304},{0,47,3092},{0,183,1568},{0,124,89},{2,88,8},{0,77,482},{60,0,3968},{0,81,2316},{0,69,1041},{0,50,2857},{122,0,3968},{0,50,2857},{0,98,0},{0,98,0},{0,98,0},{0,48,0},{0,49,512},{0,39,180},{0,39,180},{0,22,296},{0,21,556},{0,22,345},{0,98,0},{0,98,0},{0,98,0},{0,48,0},{24,0,512},
+{0,39,180},{0,39,180},{0,22,296},{49,0,512},{0,22,296},{89,0,1568},{0,124,89},{5,88,0},{0,77,482},{89,0,1568},{183,0,1568},{0,77,482},{0,60,1568},{183,0,1568},{0,60,1568},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,207,1568},{0,138,34},{2,97,58},{0,83,394},{0,137,4651},{0,91,2507},{0,78,1093},
+{0,56,3225},{0,63,5084},{0,53,3532},{0,207,1568},{0,138,34},{3,98,49},{0,83,394},{67,0,4651},{0,91,2507},{0,78,1093},{0,56,3225},{137,0,4651},{0,56,3225},{0,122,0},{0,122,0},{0,122,0},{0,60,0},{0,61,800},{0,48,289},{0,48,289},{0,28,468},{0,27,872},{0,25,545},{0,122,0},{0,122,0},{0,122,0},{0,60,0},{30,0,800},{0,48,289},{0,48,289},{0,28,468},{61,0,800},
+{0,28,468},{101,0,1568},{0,138,34},{13,96,0},{0,83,394},{101,0,1568},{207,0,1568},{0,83,394},{0,68,1568},{207,0,1568},{0,68,1568},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,231,1568},{0,153,5},{3,107,148},{0,93,317},{0,155,5419},{0,100,2754},{0,88,1161},{0,62,3641},{0,69,5968},{0,59,4028},{0,231,1568},
+{0,153,5},{6,107,126},{0,93,317},{76,0,5419},{0,100,2754},{0,88,1161},{0,62,3641},{155,0,5419},{0,62,3641},{0,146,0},{0,146,0},{0,146,0},{0,72,0},{0,73,1152},{0,57,424},{0,57,424},{0,34,680},{0,33,1260},{0,31,789},{0,146,0},{0,146,0},{0,146,0},{0,72,0},{36,0,1152},{0,57,424},{0,57,424},{0,34,680},{73,0,1152},{0,34,680},{113,0,1568},{0,153,5},{21,104,0},
+{0,93,317},{113,0,1568},{231,0,1568},{0,93,317},{0,76,1568},{231,0,1568},{0,76,1568},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{5,248,1609},{4,166,41},{9,116,259},{2,104,303},{0,183,5419},{0,115,2480},{0,100,798},{0,71,3404},{0,84,6188},{0,68,3926},{9,240,1568},{9,162,5},{15,115,121},{5,104,293},{89,0,5419},
+{0,115,2480},{0,100,798},{0,71,3404},{183,0,5419},{0,71,3404},{5,163,41},{5,163,41},{5,163,41},{4,83,41},{0,101,1152},{0,75,274},{0,75,274},{0,44,569},{0,45,1366},{0,40,747},{9,155,0},{9,155,0},{9,155,0},{9,81,0},{49,0,1152},{0,75,274},{0,75,274},{0,44,569},{101,0,1152},{0,44,569},{127,0,1568},{4,166,0},{30,113,0},{0,105,233},{127,0,1568},{254,2,1568},{0,105,233},
+{0,85,1568},{254,2,1568},{0,85,1568},{4,0,41},{4,0,41},{4,0,41},{4,0,41},{0,28,0},{0,28,0},{0,28,0},{0,14,0},{0,12,10},{0,12,10},{9,255,1731},{9,176,146},{13,126,435},{7,112,374},{0,207,5419},{0,129,2265},{0,109,532},{0,80,3202},{0,94,6384},{0,77,3861},{17,248,1568},{17,170,5},{23,123,121},{13,112,293},{101,0,5419},{0,129,2265},{0,109,532},{0,80,3202},{207,0,5419},
+{0,80,3202},{9,179,145},{9,179,145},{9,179,145},{9,92,145},{0,125,1152},{0,88,157},{0,88,157},{0,53,468},{0,54,1491},{0,50,737},{17,163,0},{17,163,0},{17,163,0},{17,89,0},{61,0,1152},{0,88,157},{0,88,157},{0,53,468},{125,0,1152},{0,53,468},{138,0,1568},{12,174,0},{38,121,0},{0,114,170},{138,0,1568},{254,14,1568},{0,114,170},{0,93,1568},{254,14,1568},{0,93,1568},{8,0,145},
+{8,0,145},{8,0,145},{8,0,145},{0,52,0},{0,52,0},{0,52,0},{0,26,0},{0,21,45},{0,21,45},{15,255,1991},{13,189,321},{18,134,687},{10,121,510},{0,231,5419},{0,141,2081},{0,121,324},{0,90,3035},{0,106,6640},{0,87,3833},{26,254,1569},{25,178,5},{31,131,121},{21,120,293},{113,0,5419},{0,141,2081},{0,121,324},{0,90,3035},{231,0,5419},{0,90,3035},{13,195,313},{13,195,313},{13,195,313},
+{13,102,313},{0,149,1152},{0,103,80},{0,103,80},{0,62,377},{0,66,1643},{0,57,749},{25,171,0},{25,171,0},{25,171,0},{25,97,0},{73,0,1152},{0,103,80},{0,103,80},{0,62,377},{149,0,1152},{0,62,377},{150,0,1568},{21,181,0},{46,129,0},{0,123,117},{150,0,1568},{254,26,1568},{0,123,117},{0,101,1568},{254,26,1568},{0,101,1568},{12,0,313},{12,0,313},{12,0,313},{12,0,313},{0,76,0},
+{0,76,0},{0,76,0},{0,38,0},{0,30,106},{0,30,106},{24,255,2387},{17,199,565},{23,142,1015},{15,129,713},{0,255,5419},{0,156,1924},{0,130,166},{0,99,2857},{0,115,6921},{0,94,3818},{36,255,1579},{33,187,4},{39,139,121},{29,128,293},{125,0,5419},{0,156,1924},{0,130,166},{0,99,2857},{255,0,5419},{0,99,2857},{17,212,545},{17,212,545},{17,212,545},{17,112,545},{0,174,1152},{0,118,29},{0,118,29},
+{0,71,296},{0,75,1826},{0,66,787},{33,179,0},{33,179,0},{33,179,0},{33,104,0},{85,0,1152},{0,118,29},{0,118,29},{0,71,296},{174,0,1152},{0,71,296},{162,0,1568},{29,189,0},{54,137,0},{0,133,80},{162,0,1568},{254,38,1568},{0,133,80},{0,109,1568},{254,38,1568},{0,109,1568},{16,0,545},{16,0,545},{16,0,545},{16,0,545},{0,101,0},{0,101,0},{0,101,0},{0,49,0},{0,39,193},
+{0,39,193},{30,255,3004},{22,210,924},{28,153,1470},{19,140,1026},{6,255,5520},{0,172,1772},{0,142,57},{0,108,2668},{0,127,7276},{0,103,3836},{46,255,1602},{41,195,3},{48,148,121},{38,137,293},{138,0,5419},{0,172,1772},{0,142,57},{0,108,2668},{254,14,5419},{0,108,2668},{21,231,882},{21,231,882},{21,231,882},{21,124,882},{0,201,1152},{0,132,2},{0,132,2},{0,83,218},{0,88,2034},{0,75,865},{42,188,0},
+{42,188,0},{42,188,0},{42,113,0},{98,0,1152},{0,132,2},{0,132,2},{0,83,218},{201,0,1152},{0,83,218},{175,0,1568},{38,198,0},{63,146,0},{0,142,41},{175,0,1568},{254,51,1568},{0,142,41},{0,118,1568},{254,51,1568},{0,118,1568},{21,0,882},{21,0,882},{21,0,882},{21,0,882},{0,128,0},{0,128,0},{0,128,0},{0,63,0},{0,51,320},{0,51,320},{36,255,3708},{27,221,1316},{33,161,1956},
+{24,148,1369},{15,255,5777},{0,184,1676},{0,151,19},{0,117,2514},{0,138,7620},{0,112,3881},{55,255,1643},{49,203,3},{56,156,121},{46,145,293},{150,0,5419},{0,184,1676},{0,151,19},{0,117,2514},{254,26,5419},{0,117,2514},{25,247,1250},{25,247,1250},{25,247,1250},{25,134,1250},{0,225,1152},{1,147,3},{1,147,3},{0,90,164},{0,100,2274},{0,84,961},{50,196,0},{50,196,0},{50,196,0},{50,121,0},{110,0,1152},
+{5,143,0},{5,143,0},{0,90,164},{225,0,1152},{0,90,164},{187,0,1568},{46,206,0},{71,154,0},{0,151,18},{187,0,1568},{254,63,1568},{0,151,18},{0,126,1568},{254,63,1568},{0,126,1568},{25,0,1250},{25,0,1250},{25,0,1250},{25,0,1250},{0,152,0},{0,152,0},{0,152,0},{0,75,0},{0,60,461},{0,60,461},{43,255,4356},{32,232,1665},{38,169,2370},{29,156,1670},{21,255,6121},{0,196,1616},{3,161,20},
+{0,126,2376},{0,150,7804},{0,121,3804},{67,255,1699},{57,211,3},{64,164,121},{54,153,293},{162,0,5419},{0,196,1612},{5,160,17},{0,126,2372},{254,38,5419},{0,126,2372},{30,255,1572},{30,255,1572},{30,255,1572},{30,143,1568},{2,245,1152},{4,159,17},{4,159,17},{0,99,117},{0,109,2403},{0,94,953},{58,204,0},{58,204,0},{58,204,0},{58,129,0},{122,0,1152},{13,151,0},{13,151,0},{0,99,113},{249,0,1152},
+{0,99,113},{199,0,1568},{54,214,0},{79,162,0},{0,160,5},{199,0,1568},{254,75,1568},{0,160,5},{0,134,1568},{254,75,1568},{0,134,1568},{30,0,1568},{30,0,1568},{30,0,1568},{30,0,1568},{2,172,0},{2,172,0},{2,172,0},{2,86,0},{0,72,541},{0,72,541},{52,255,4586},{40,240,1665},{47,177,2355},{37,164,1670},{33,255,6289},{7,206,1615},{11,169,20},{1,134,2353},{0,162,7444},{0,130,3321},{76,255,1766},
+{65,219,3},{72,172,121},{61,161,302},{174,0,5419},{0,211,1577},{13,168,17},{0,133,2259},{255,49,5419},{0,133,2259},{39,255,1586},{39,255,1586},{39,255,1586},{38,151,1568},{10,253,1152},{12,167,17},{12,167,17},{7,109,115},{0,124,2150},{0,103,659},{66,211,0},{66,211,0},{66,211,0},{66,137,0},{134,0,1152},{21,159,0},{21,159,0},{0,108,72},{255,9,1152},{0,108,72},{211,0,1568},{62,222,0},{86,170,0},
+{0,170,0},{211,0,1568},{255,86,1568},{0,170,0},{0,142,1568},{255,86,1568},{0,142,1568},{38,0,1568},{38,0,1568},{38,0,1568},{38,0,1568},{10,180,0},{10,180,0},{10,180,0},{10,93,0},{0,88,373},{0,88,373},{64,255,4866},{49,249,1665},{55,188,2353},{46,173,1670},{43,255,6476},{16,215,1615},{20,178,20},{10,143,2353},{0,178,7036},{0,139,2856},{86,255,1851},{75,228,5},{81,181,121},{70,170,302},{187,0,5419},
+{4,222,1568},{22,177,17},{0,145,2124},{254,63,5419},{0,145,2124},{49,255,1612},{49,255,1612},{49,255,1612},{47,160,1568},{21,255,1158},{21,176,17},{21,176,17},{16,118,115},{0,138,1900},{0,115,392},{75,220,0},{75,220,0},{75,220,0},{75,146,0},{147,0,1152},{30,168,0},{30,168,0},{0,120,34},{254,23,1152},{0,120,34},{224,0,1568},{71,231,0},{95,179,0},{7,179,0},{224,0,1568},{254,100,1568},{7,179,0},
+{0,151,1568},{254,100,1568},{0,151,1568},{47,0,1568},{47,0,1568},{47,0,1568},{47,0,1568},{19,189,0},{19,189,0},{19,189,0},{19,102,0},{0,103,232},{0,103,232},{73,255,5136},{57,254,1666},{63,196,2353},{54,181,1670},{55,255,6684},{24,223,1615},{28,186,20},{18,151,2353},{0,190,6740},{0,151,2504},{95,255,1954},{83,236,5},{89,189,121},{78,178,302},{199,0,5419},{12,230,1568},{30,185,17},{0,154,2018},{254,75,5419},
+{0,154,2018},{58,255,1650},{58,255,1650},{58,255,1650},{55,168,1568},{30,255,1179},{29,184,17},{29,184,17},{24,126,115},{0,150,1708},{0,127,216},{83,228,0},{83,228,0},{83,228,0},{83,154,0},{159,0,1152},{38,176,0},{38,176,0},{0,130,17},{254,35,1152},{0,130,17},{236,0,1568},{78,240,0},{103,187,0},{15,187,0},{236,0,1568},{254,112,1568},{15,187,0},{0,159,1568},{254,112,1568},{0,159,1568},{55,0,1568},
+{55,0,1568},{55,0,1568},{55,0,1568},{27,197,0},{27,197,0},{27,197,0},{27,110,0},{0,115,136},{0,115,136},{82,255,5426},{67,255,1701},{71,204,2353},{62,189,1670},{64,255,6905},{32,231,1615},{36,194,20},{26,159,2353},{0,202,6476},{0,160,2211},{107,255,2066},{91,244,5},{97,197,121},{86,186,302},{211,0,5419},{20,238,1568},{38,193,17},{0,160,1922},{255,86,5419},{0,160,1922},{67,255,1700},{67,255,1700},{67,255,1700},
+{63,176,1568},{39,255,1218},{37,192,17},{37,192,17},{32,134,115},{0,165,1545},{0,136,90},{91,236,0},{91,236,0},{91,236,0},{91,162,0},{171,0,1152},{46,184,0},{46,184,0},{0,139,4},{255,46,1152},{0,139,4},{248,0,1568},{85,248,0},{111,195,0},{23,195,0},{248,0,1568},{254,124,1568},{23,195,0},{0,167,1568},{254,124,1568},{0,167,1568},{63,0,1568},{63,0,1568},{63,0,1568},{63,0,1568},{35,205,0},
+{35,205,0},{35,205,0},{35,118,0},{0,129,65},{0,129,65},{92,255,5700},{76,255,1798},{79,212,2353},{70,197,1670},{73,255,7152},{40,239,1615},{44,202,20},{34,167,2353},{0,215,6213},{0,169,1980},{116,255,2195},{99,252,5},{104,206,115},{94,194,302},{223,0,5419},{28,246,1568},{46,201,17},{0,169,1836},{255,98,5419},{0,169,1836},{76,255,1762},{76,255,1762},{76,255,1762},{71,184,1568},{49,255,1260},{45,200,17},{45,200,17},
+{40,142,115},{0,178,1395},{0,145,24},{99,244,0},{99,244,0},{99,244,0},{99,170,0},{183,0,1152},{54,192,0},{54,192,0},{0,147,0},{255,58,1152},{0,147,0},{255,10,1568},{94,254,1},{119,203,0},{31,203,0},{255,10,1568},{255,135,1568},{31,203,0},{0,175,1568},{255,135,1568},{0,175,1568},{71,0,1568},{71,0,1568},{71,0,1568},{71,0,1568},{43,213,0},{43,213,0},{43,213,0},{43,126,0},{0,144,20},
+{0,144,20},{101,255,6066},{86,255,1980},{88,221,2353},{79,206,1670},{86,255,7408},{49,248,1615},{53,211,20},{43,176,2353},{0,230,5988},{0,179,1798},{129,255,2347},{110,255,24},{113,215,115},{103,203,302},{236,0,5419},{37,255,1568},{55,210,17},{0,179,1762},{254,112,5419},{0,179,1762},{86,255,1836},{86,255,1836},{86,255,1836},{80,193,1568},{61,255,1331},{54,209,17},{54,209,17},{49,151,115},{0,193,1281},{3,156,5},{108,253,0},
+{108,253,0},{108,253,0},{108,179,0},{196,0,1152},{63,201,0},{63,201,0},{9,156,0},{254,72,1152},{9,156,0},{255,37,1568},{110,255,20},{128,212,0},{40,212,0},{255,37,1568},{254,149,1568},{40,212,0},{0,184,1568},{254,149,1568},{0,184,1568},{80,0,1568},{80,0,1568},{80,0,1568},{80,0,1568},{52,222,0},{52,222,0},{52,222,0},{52,135,0},{0,159,1},{0,159,1},{110,255,6416},{95,255,2211},{96,229,2353},
+{86,213,1674},{95,255,7689},{58,254,1616},{61,219,20},{51,184,2353},{0,242,5820},{0,188,1701},{138,255,2502},{119,255,90},{121,223,115},{111,211,302},{248,0,5419},{52,255,1595},{63,218,17},{0,188,1700},{254,124,5419},{0,188,1700},{95,255,1922},{95,255,1922},{95,255,1922},{88,201,1568},{70,255,1414},{62,217,17},{62,217,17},{58,158,121},{0,205,1209},{11,164,5},{116,255,4},{116,255,4},{116,255,4},{116,187,0},{208,0,1152},
+{70,209,0},{70,209,0},{17,164,0},{254,84,1152},{17,164,0},{255,61,1568},{125,255,65},{136,220,0},{48,220,0},{255,61,1568},{254,161,1568},{48,220,0},{0,192,1568},{254,161,1568},{0,192,1568},{88,0,1568},{88,0,1568},{88,0,1568},{88,0,1568},{60,230,0},{60,230,0},{60,230,0},{60,143,0},{7,169,0},{7,169,0},{119,255,6786},{104,255,2504},{104,237,2353},{94,222,1676},{107,255,7985},{68,255,1665},{69,227,20},
+{59,192,2353},{0,254,5684},{0,197,1666},{147,255,2675},{128,255,216},{129,231,115},{119,219,302},{255,10,5419},{67,255,1665},{71,226,17},{0,197,1650},{255,135,5419},{0,197,1650},{101,255,2018},{101,255,2018},{101,255,2018},{96,209,1568},{82,255,1510},{70,224,17},{70,224,17},{66,166,121},{0,218,1163},{19,172,5},{125,255,17},{125,255,17},{125,255,17},{124,195,0},{220,0,1152},{78,217,0},{78,217,0},{25,172,0},{255,95,1152},
+{25,172,0},{255,86,1568},{140,255,136},{144,228,0},{56,228,0},{255,86,1568},{255,172,1568},{56,228,0},{0,200,1568},{255,172,1568},{0,200,1568},{96,0,1568},{96,0,1568},{96,0,1568},{96,0,1568},{68,238,0},{68,238,0},{68,238,0},{68,151,0},{15,177,0},{15,177,0},{129,255,7124},{116,255,2856},{112,245,2353},{102,230,1676},{116,255,8300},{79,255,1802},{77,235,20},{67,200,2353},{7,255,5788},{5,206,1665},{156,255,2866},
+{140,255,392},{137,239,115},{127,227,302},{255,34,5419},{82,255,1779},{79,234,17},{0,206,1612},{255,147,5419},{0,206,1612},{110,255,2124},{110,255,2124},{110,255,2124},{104,217,1568},{92,255,1608},{78,232,17},{78,232,17},{74,174,121},{1,231,1152},{27,180,5},{135,255,34},{135,255,34},{135,255,34},{132,203,0},{232,0,1152},{86,225,0},{86,225,0},{33,180,0},{255,107,1152},{33,180,0},{255,110,1568},{152,255,232},{152,236,0},
+{64,236,0},{255,110,1568},{255,184,1568},{64,236,0},{0,208,1568},{255,184,1568},{0,208,1568},{104,0,1568},{104,0,1568},{104,0,1568},{104,0,1568},{76,246,0},{76,246,0},{76,246,0},{76,159,0},{24,184,0},{24,184,0},{138,255,7586},{125,255,3321},{121,254,2353},{111,239,1676},{129,255,8636},{92,255,2092},{86,244,20},{78,208,2355},{28,255,6049},{14,215,1665},{168,255,3097},{152,255,659},{146,248,115},{136,236,302},{255,61,5419},
+{101,255,1977},{88,243,17},{0,216,1586},{254,161,5419},{0,216,1586},{122,255,2259},{122,255,2259},{122,255,2259},{113,226,1568},{101,255,1746},{87,241,17},{87,241,17},{83,183,121},{10,240,1152},{36,190,3},{147,255,72},{147,255,72},{147,255,72},{141,212,0},{245,0,1152},{95,234,0},{95,234,0},{42,189,0},{254,121,1152},{42,189,0},{255,137,1568},{167,255,373},{161,245,0},{73,245,0},{255,137,1568},{254,198,1568},{73,245,0},
+{0,217,1568},{254,198,1568},{0,217,1568},{113,0,1568},{113,0,1568},{113,0,1568},{113,0,1568},{85,254,0},{85,254,0},{85,254,0},{85,168,0},{33,193,0},{33,193,0},{147,255,8016},{134,255,3804},{129,255,2376},{119,247,1676},{138,255,8985},{104,255,2436},{94,252,20},{85,216,2370},{40,255,6353},{22,223,1665},{178,255,3291},{161,255,953},{156,255,117},{146,243,305},{255,86,5419},{119,255,2185},{96,251,17},{0,225,1572},{255,172,5419},
+{0,225,1572},{129,255,2372},{129,255,2372},{129,255,2372},{121,233,1568},{113,255,1890},{95,249,17},{95,249,17},{91,191,121},{18,248,1152},{44,198,3},{156,255,113},{156,255,113},{156,255,113},{149,220,0},{255,4,1152},{103,242,0},{103,242,0},{49,197,0},{255,132,1152},{49,197,0},{255,161,1568},{183,255,541},{169,253,0},{80,253,0},{255,161,1568},{254,210,1568},{80,253,0},{0,225,1568},{254,210,1568},{0,225,1568},{121,0,1568},
+{121,0,1568},{121,0,1568},{121,0,1568},{94,254,5},{94,254,5},{94,254,5},{93,176,0},{41,201,0},{41,201,0},{156,255,7638},{143,255,3881},{138,255,2514},{128,250,1620},{147,255,8460},{113,255,2285},{104,255,19},{94,222,1956},{58,255,5932},{34,228,1316},{184,255,2947},{171,255,961},{165,255,164},{152,246,206},{255,104,4803},{131,255,1925},{108,254,3},{6,230,1250},{255,181,4803},{6,230,1250},{138,255,2514},{138,255,2514},{138,255,2514},
+{129,241,1568},{122,255,2043},{104,255,19},{104,255,19},{99,199,121},{28,255,1153},{52,206,3},{165,255,164},{165,255,164},{165,255,164},{157,228,0},{255,28,1152},{111,250,0},{111,250,0},{57,205,0},{255,144,1152},{57,205,0},{255,177,1250},{195,255,461},{180,255,0},{101,255,0},{255,177,1250},{254,217,1250},{101,255,0},{0,230,1250},{254,217,1250},{0,230,1250},{129,0,1568},{129,0,1568},{129,0,1568},{129,0,1568},{104,255,18},
+{104,255,18},{104,255,18},{101,184,0},{49,209,0},{49,209,0},{165,255,7060},{152,255,3836},{147,255,2668},{137,253,1576},{156,255,7717},{122,255,2020},{113,255,57},{102,227,1470},{73,255,5307},{44,232,924},{193,255,2466},{180,255,865},{172,255,218},{162,250,98},{255,119,4056},{143,255,1557},{122,255,2},{22,234,882},{254,189,4056},{22,234,882},{147,255,2668},{147,255,2668},{147,255,2668},{137,249,1568},{132,255,2193},{113,255,57},{113,255,57},
+{107,207,121},{40,255,1185},{60,214,3},{172,255,218},{172,255,218},{172,255,218},{165,236,0},{255,52,1152},{122,255,2},{122,255,2},{65,213,0},{255,156,1152},{65,213,0},{255,189,882},{204,255,320},{192,255,0},{125,255,0},{255,189,882},{254,223,882},{125,255,0},{0,234,882},{254,223,882},{0,234,882},{137,0,1568},{137,0,1568},{137,0,1568},{137,0,1568},{113,255,41},{113,255,41},{113,255,41},{109,192,0},{57,217,0},
+{57,217,0},{172,255,6429},{161,255,3818},{156,255,2857},{146,255,1572},{165,255,6979},{134,255,1813},{125,255,166},{113,232,1015},{82,255,4731},{56,238,565},{199,255,2010},{189,255,787},{184,255,296},{173,252,26},{255,137,3318},{155,255,1221},{137,255,29},{41,238,545},{254,198,3318},{41,238,545},{156,255,2857},{156,255,2857},{156,255,2857},{146,255,1572},{141,255,2403},{125,255,166},{125,255,166},{116,216,121},{61,255,1273},{68,222,4},{184,255,296},
+{184,255,296},{184,255,296},{174,245,0},{255,79,1152},{137,255,29},{137,255,29},{74,222,0},{253,170,1152},{74,222,0},{255,201,545},{216,255,193},{205,255,0},{152,255,0},{255,201,545},{254,229,545},{152,255,0},{0,238,545},{254,229,545},{0,238,545},{146,0,1568},{146,0,1568},{146,0,1568},{146,0,1568},{122,255,80},{122,255,80},{122,255,80},{118,201,0},{66,226,0},{66,226,0},{178,255,5997},{168,255,3833},{165,255,3035},
+{155,255,1619},{172,255,6365},{143,255,1710},{134,255,324},{121,237,687},{101,255,4330},{66,242,321},{208,255,1673},{198,255,749},{193,255,377},{182,255,1},{255,152,2753},{167,255,989},{152,255,80},{57,242,313},{255,205,2753},{57,242,313},{165,255,3035},{165,255,3035},{165,255,3035},{155,255,1619},{153,255,2603},{134,255,324},{134,255,324},{124,224,121},{76,255,1395},{76,230,5},{193,255,377},{193,255,377},{193,255,377},{182,253,0},{255,104,1152},
+{152,255,80},{152,255,80},{82,230,0},{255,181,1152},{82,230,0},{255,213,313},{225,255,106},{217,255,0},{177,255,0},{255,213,313},{254,235,313},{177,255,0},{0,242,313},{254,235,313},{0,242,313},{154,0,1568},{154,0,1568},{154,0,1568},{154,0,1568},{132,255,117},{132,255,117},{132,255,117},{126,209,0},{74,234,0},{74,234,0},{187,255,5627},{178,255,3861},{175,255,3202},{164,255,1720},{178,255,5889},{152,255,1685},{146,255,532},
+{129,242,435},{116,255,3993},{79,246,146},{215,255,1382},{205,255,737},{202,255,468},{192,255,13},{255,171,2273},{180,255,850},{167,255,157},{74,246,145},{254,214,2273},{74,246,145},{175,255,3202},{175,255,3202},{175,255,3202},{164,255,1720},{162,255,2818},{146,255,532},{146,255,532},{132,232,121},{92,255,1584},{84,238,5},{202,255,468},{202,255,468},{202,255,468},{192,255,13},{255,128,1152},{167,255,157},{167,255,157},{90,238,0},{255,193,1152},
+{90,238,0},{255,225,145},{234,255,45},{229,255,0},{201,255,0},{255,225,145},{254,241,145},{201,255,0},{0,246,145},{254,241,145},{0,246,145},{162,0,1568},{162,0,1568},{162,0,1568},{162,0,1568},{141,255,170},{141,255,170},{141,255,170},{134,217,0},{80,243,0},{80,243,0},{193,255,5331},{187,255,3926},{184,255,3404},{174,255,1889},{187,255,5490},{161,255,1738},{155,255,798},{139,246,259},{128,255,3745},{88,251,41},{221,255,1182},
+{215,255,747},{211,255,569},{201,255,73},{255,186,1878},{192,255,746},{180,255,274},{90,250,41},{255,221,1878},{90,250,41},{184,255,3404},{184,255,3404},{184,255,3404},{174,255,1889},{172,255,3020},{155,255,798},{155,255,798},{140,240,121},{107,255,1798},{92,246,5},{211,255,569},{211,255,569},{211,255,569},{201,255,73},{255,152,1152},{180,255,274},{180,255,274},{98,246,0},{255,205,1152},{98,246,0},{255,237,41},{243,255,10},{241,255,0},
+{225,255,0},{255,237,41},{254,247,41},{225,255,0},{0,250,41},{254,247,41},{0,250,41},{170,0,1568},{170,0,1568},{170,0,1568},{170,0,1568},{150,255,233},{150,255,233},{150,255,233},{142,225,0},{88,251,0},{88,251,0},{202,255,5076},{196,255,4028},{193,255,3641},{183,255,2129},{196,255,5148},{174,255,1917},{167,255,1161},{147,251,148},{143,255,3577},{101,255,5},{230,255,1041},{224,255,789},{221,255,680},{210,255,205},{255,204,1536},
+{207,255,706},{198,255,424},{107,255,0},{255,230,1536},{107,255,0},{193,255,3641},{193,255,3641},{193,255,3641},{183,255,2129},{184,255,3299},{167,255,1161},{167,255,1161},{148,249,126},{125,255,2089},{101,255,5},{221,255,680},{221,255,680},{221,255,680},{210,255,205},{255,180,1152},{198,255,424},{198,255,424},{107,255,0},{255,218,1152},{107,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},
+{0,255,0},{255,254,0},{0,255,0},{179,0,1568},{179,0,1568},{179,0,1568},{179,0,1568},{162,255,317},{162,255,317},{162,255,317},{151,234,0},{101,255,5},{101,255,5},{208,255,4372},{202,255,3532},{199,255,3225},{189,255,2017},{202,255,4324},{180,255,1693},{177,255,1093},{157,252,58},{155,255,2953},{116,255,34},{233,255,716},{230,255,545},{227,255,468},{219,255,137},{255,213,1067},{213,255,482},{207,255,289},{131,255,0},{254,235,1067},
+{131,255,0},{199,255,3225},{199,255,3225},{199,255,3225},{189,255,2017},{190,255,2819},{177,255,1093},{177,255,1093},{157,252,49},{137,255,1737},{116,255,34},{227,255,468},{227,255,468},{227,255,468},{219,255,137},{255,192,800},{207,255,289},{207,255,289},{131,255,0},{255,224,800},{131,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{187,0,1568},
+{187,0,1568},{187,0,1568},{187,0,1568},{172,255,394},{172,255,394},{172,255,394},{159,242,0},{116,255,34},{116,255,34},{215,255,3720},{208,255,3092},{205,255,2857},{198,255,1910},{208,255,3604},{189,255,1510},{183,255,1041},{165,254,10},{164,255,2420},{131,255,89},{239,255,456},{233,255,345},{233,255,296},{225,255,85},{255,219,683},{222,255,300},{216,255,180},{155,255,0},{254,238,683},{155,255,0},{205,255,2857},{205,255,2857},{205,255,2857},
+{198,255,1910},{196,255,2411},{183,255,1041},{183,255,1041},{167,253,8},{149,255,1449},{131,255,89},{233,255,296},{233,255,296},{233,255,296},{225,255,85},{255,204,512},{216,255,180},{216,255,180},{155,255,0},{255,230,512},{155,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{195,0,1568},{195,0,1568},{195,0,1568},{195,0,1568},{178,255,482},
+{178,255,482},{178,255,482},{167,250,0},{131,255,89},{131,255,89},{218,255,3170},{215,255,2701},{215,255,2532},{207,255,1825},{215,255,2956},{198,255,1373},{192,255,1013},{175,255,4},{167,255,2025},{143,255,169},{242,255,249},{239,255,189},{239,255,164},{234,255,45},{255,228,384},{231,255,162},{225,255,97},{180,255,0},{255,242,384},{180,255,0},{215,255,2532},{215,255,2532},{215,255,2532},{207,255,1825},{205,255,2070},{192,255,1013},{192,255,1013},
+{175,255,4},{161,255,1225},{143,255,169},{239,255,164},{239,255,164},{239,255,164},{234,255,45},{255,216,288},{225,255,97},{225,255,97},{180,255,0},{255,236,288},{180,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{203,0,1568},{203,0,1568},{203,0,1568},{203,0,1568},{187,255,585},{187,255,585},{187,255,585},{175,255,4},{143,255,169},
+{143,255,169},{0,210,2665},{0,147,274},{0,106,1},{0,90,985},{0,140,5885},{0,94,3649},{0,81,1742},{0,56,4398},{0,63,6341},{0,56,4722},{0,210,2665},{0,147,274},{0,106,1},{0,90,985},{69,0,5885},{0,94,3649},{0,81,1742},{0,56,4398},{140,0,5885},{0,56,4398},{0,101,0},{0,101,0},{0,101,0},{0,49,0},{0,49,545},{0,39,193},{0,39,193},{0,22,317},{0,21,593},{0,22,366},{0,101,0},
+{0,101,0},{0,101,0},{0,49,0},{24,0,545},{0,39,193},{0,39,193},{0,22,317},{49,0,545},{0,22,317},{103,0,2665},{0,147,274},{0,106,1},{0,90,985},{103,0,2665},{210,0,2665},{0,90,985},{0,69,2665},{210,0,2665},{0,69,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,234,2665},{0,162,169},{1,114,11},
+{0,99,850},{0,158,6669},{0,103,3898},{0,91,1770},{0,62,4826},{0,72,7238},{0,62,5226},{0,234,2665},{0,162,169},{2,115,9},{0,99,850},{78,0,6669},{0,103,3898},{0,91,1770},{0,62,4826},{158,0,6669},{0,62,4826},{0,125,0},{0,125,0},{0,125,0},{0,61,0},{0,61,841},{0,51,305},{0,51,305},{0,28,493},{0,27,917},{0,28,574},{0,125,0},{0,125,0},{0,125,0},{0,61,0},{30,0,841},
+{0,51,305},{0,51,305},{0,28,493},{61,0,841},{0,28,493},{115,0,2665},{0,162,169},{6,114,0},{0,99,850},{115,0,2665},{234,0,2665},{0,99,850},{0,77,2665},{234,0,2665},{0,77,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,255,2669},{0,175,80},{2,124,61},{0,108,725},{0,174,7538},{0,115,4178},{0,100,1832},
+{0,71,5294},{0,78,8238},{0,68,5786},{2,254,2669},{0,175,80},{4,123,53},{0,108,725},{85,0,7538},{0,115,4178},{0,100,1832},{0,71,5294},{174,0,7538},{0,71,5294},{0,149,0},{0,149,0},{0,149,0},{0,73,0},{0,73,1201},{0,60,442},{0,60,442},{0,34,709},{0,33,1313},{0,31,824},{0,149,0},{0,149,0},{0,149,0},{0,73,0},{36,0,1201},{0,60,442},{0,60,442},{0,34,709},{73,0,1201},
+{0,34,709},{127,0,2665},{0,175,80},{14,122,0},{0,108,725},{127,0,2665},{254,2,2665},{0,108,725},{0,85,2665},{254,2,2665},{0,85,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{3,255,2795},{0,190,29},{3,134,155},{0,117,610},{0,189,8493},{0,124,4485},{0,109,1922},{0,77,5810},{0,88,9310},{0,74,6402},{6,255,2761},
+{0,190,29},{6,134,133},{0,117,610},{92,0,8493},{0,124,4485},{0,109,1922},{0,77,5810},{189,0,8493},{0,77,5810},{0,174,0},{0,174,0},{0,174,0},{0,85,0},{0,85,1625},{0,69,605},{0,69,605},{0,40,965},{0,39,1781},{0,37,1120},{0,174,0},{0,174,0},{0,174,0},{0,85,0},{42,0,1625},{0,69,605},{0,69,605},{0,40,965},{85,0,1625},{0,40,965},{138,0,2665},{0,190,29},{22,130,0},
+{0,117,610},{138,0,2665},{254,14,2665},{0,117,610},{0,93,2665},{254,14,2665},{0,93,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{9,255,3139},{0,205,2},{5,145,311},{0,126,493},{0,207,9669},{0,135,4865},{0,118,2054},{0,83,6450},{0,94,10654},{0,80,7162},{12,255,3029},{0,205,2},{9,143,266},{0,126,493},{101,0,9669},
+{0,135,4865},{0,118,2054},{0,83,6450},{207,0,9669},{0,83,6450},{0,201,0},{0,201,0},{0,201,0},{0,98,0},{0,101,2178},{0,81,820},{0,81,820},{0,47,1322},{0,45,2392},{0,40,1521},{0,201,0},{0,201,0},{0,201,0},{0,98,0},{49,0,2178},{0,81,820},{0,81,820},{0,47,1322},{101,0,2178},{0,47,1322},{152,0,2665},{0,205,2},{30,139,0},{0,126,493},{152,0,2665},{255,27,2665},{0,126,493},
+{0,102,2665},{255,27,2665},{0,102,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{15,255,3483},{4,217,32},{10,153,429},{2,135,473},{0,231,9669},{0,150,4526},{0,127,1610},{0,93,6193},{0,106,10910},{0,90,7050},{24,255,3141},{8,213,2},{17,151,266},{2,135,469},{113,0,9669},{0,150,4526},{0,127,1610},{0,93,6193},{231,0,9669},
+{0,93,6193},{4,217,32},{4,217,32},{4,217,32},{4,108,32},{0,125,2178},{0,94,605},{0,94,605},{0,56,1165},{0,54,2517},{0,50,1457},{8,209,0},{8,209,0},{8,209,0},{8,106,0},{61,0,2178},{0,94,605},{0,94,605},{0,56,1165},{125,0,2178},{0,56,1165},{164,0,2665},{5,216,0},{38,147,0},{0,136,410},{164,0,2665},{255,39,2665},{0,136,410},{0,110,2665},{255,39,2665},{0,110,2665},{4,0,32},
+{4,0,32},{4,0,32},{4,0,32},{0,25,0},{0,25,0},{0,25,0},{0,12,0},{0,9,8},{0,9,8},{21,255,3971},{8,227,132},{15,161,623},{5,144,523},{0,255,9669},{0,162,4214},{0,139,1218},{0,102,5913},{0,118,11198},{0,96,6942},{33,255,3266},{16,222,1},{25,159,266},{10,143,469},{125,0,9669},{0,162,4214},{0,139,1218},{0,102,5913},{255,0,9669},{0,102,5913},{8,233,128},{8,233,128},{8,233,128},
+{8,118,128},{0,149,2178},{0,109,442},{0,109,442},{0,65,1018},{0,66,2669},{0,59,1419},{16,217,0},{16,217,0},{16,217,0},{16,114,0},{73,0,2178},{0,109,442},{0,109,442},{0,65,1018},{149,0,2178},{0,65,1018},{175,0,2665},{14,223,0},{46,155,0},{0,145,325},{175,0,2665},{254,51,2665},{0,145,325},{0,118,2665},{254,51,2665},{0,118,2665},{8,0,128},{8,0,128},{8,0,128},{8,0,128},{0,49,0},
+{0,49,0},{0,49,0},{0,24,0},{0,21,40},{0,21,40},{27,255,4603},{13,238,300},{19,171,891},{10,152,642},{6,255,9761},{0,175,3898},{0,148,882},{0,111,5645},{0,127,11511},{0,105,6861},{43,255,3390},{24,230,1},{33,167,266},{18,151,469},{137,0,9669},{0,175,3898},{0,148,882},{0,111,5645},{255,12,9669},{0,111,5645},{12,249,288},{12,249,288},{12,249,288},{12,128,288},{0,174,2178},{0,124,305},{0,124,305},
+{0,74,881},{0,75,2852},{0,68,1409},{24,225,0},{24,225,0},{24,225,0},{24,122,0},{85,0,2178},{0,124,305},{0,124,305},{0,74,881},{174,0,2178},{0,74,881},{187,0,2665},{22,231,0},{54,163,0},{0,154,250},{187,0,2665},{254,63,2665},{0,154,250},{0,126,2665},{254,63,2665},{0,126,2665},{12,0,288},{12,0,288},{12,0,288},{12,0,288},{0,73,0},{0,73,0},{0,73,0},{0,36,0},{0,30,97},
+{0,30,97},{36,255,5482},{17,251,574},{24,179,1282},{14,162,853},{15,255,10055},{0,190,3618},{0,160,569},{0,120,5354},{0,138,11902},{0,114,6807},{55,255,3569},{33,239,1},{42,176,266},{27,160,469},{150,0,9669},{0,190,3618},{0,160,569},{0,120,5354},{254,26,9669},{0,120,5354},{18,255,558},{18,255,558},{18,255,558},{16,139,545},{0,201,2178},{0,138,180},{0,138,180},{0,83,740},{0,88,3060},{0,78,1427},{33,233,0},
+{33,233,0},{33,233,0},{33,131,0},{98,0,2178},{0,138,180},{0,138,180},{0,83,740},{201,0,2178},{0,83,740},{201,0,2665},{31,240,0},{63,172,0},{0,166,180},{201,0,2665},{255,76,2665},{0,166,180},{0,135,2665},{255,76,2665},{0,135,2665},{16,0,545},{16,0,545},{16,0,545},{16,0,545},{0,101,0},{0,101,0},{0,101,0},{0,49,0},{0,39,193},{0,39,193},{43,255,6378},{24,255,915},{29,187,1710},
+{18,172,1113},{21,255,10495},{0,205,3401},{0,169,353},{0,126,5126},{0,150,12278},{0,123,6786},{64,255,3738},{41,246,2},{50,183,259},{35,168,469},{162,0,9669},{0,205,3401},{0,169,353},{0,126,5126},{254,38,9669},{0,126,5126},{24,255,914},{24,255,914},{24,255,914},{20,149,841},{0,225,2178},{0,153,97},{0,153,97},{0,93,637},{0,100,3300},{0,88,1469},{41,241,0},{41,241,0},{41,241,0},{41,139,0},{110,0,2178},
+{0,153,97},{0,153,97},{0,93,637},{225,0,2178},{0,93,637},{213,0,2665},{38,249,0},{71,180,0},{0,173,130},{213,0,2665},{254,88,2665},{0,173,130},{0,143,2665},{254,88,2665},{0,143,2665},{20,0,841},{20,0,841},{20,0,841},{20,0,841},{0,125,0},{0,125,0},{0,125,0},{0,61,0},{0,51,305},{0,51,305},{49,255,7446},{30,255,1431},{33,197,2210},{22,179,1438},{30,255,11102},{0,218,3189},{0,182,185},
+{0,136,4909},{0,162,12686},{0,130,6797},{73,255,3925},{49,254,2},{57,193,258},{43,176,469},{174,0,9669},{0,218,3189},{0,182,185},{0,136,4909},{255,49,9669},{0,136,4909},{27,255,1382},{27,255,1382},{27,255,1382},{24,159,1201},{0,249,2178},{0,168,40},{0,168,40},{0,102,530},{0,109,3565},{0,94,1537},{49,249,0},{49,249,0},{49,249,0},{49,147,0},{122,0,2178},{0,168,40},{0,168,40},{0,102,530},{249,0,2178},
+{0,102,530},{224,0,2665},{46,255,1},{79,188,0},{0,182,85},{224,0,2665},{254,100,2665},{0,182,85},{0,151,2665},{254,100,2665},{0,151,2665},{24,0,1201},{24,0,1201},{24,0,1201},{24,0,1201},{0,149,0},{0,149,0},{0,149,0},{0,73,0},{0,60,442},{0,60,442},{55,255,8658},{36,255,2131},{38,205,2786},{27,188,1837},{36,255,11866},{0,230,3029},{0,191,75},{0,145,4685},{0,172,13066},{0,139,6826},{86,255,4118},
+{58,255,26},{65,201,258},{51,184,469},{186,0,9669},{0,230,3029},{0,191,75},{0,145,4685},{255,61,9669},{0,145,4685},{33,255,1954},{33,255,1954},{33,255,1954},{28,169,1625},{3,255,2219},{0,181,5},{0,181,5},{0,111,433},{0,121,3861},{0,103,1633},{57,254,1},{57,254,1},{57,254,1},{57,155,0},{134,0,2178},{0,181,5},{0,181,5},{0,111,433},{255,9,2178},{0,111,433},{236,0,2665},{61,255,20},{87,196,0},
+{0,191,50},{236,0,2665},{254,112,2665},{0,191,50},{0,159,2665},{254,112,2665},{0,159,2665},{28,0,1625},{28,0,1625},{28,0,1625},{28,0,1625},{0,174,0},{0,174,0},{0,174,0},{0,85,0},{0,69,605},{0,69,605},{61,255,10195},{43,255,3100},{43,216,3523},{31,198,2356},{43,255,12883},{0,245,2885},{0,203,20},{0,157,4450},{0,184,13589},{0,148,6898},{95,255,4346},{70,255,117},{74,210,258},{60,193,469},{199,0,9669},
+{0,245,2885},{0,203,20},{0,157,4450},{254,75,9669},{0,157,4450},{39,255,2734},{39,255,2734},{39,255,2734},{33,180,2178},{12,255,2420},{1,196,2},{1,196,2},{0,123,337},{0,132,4227},{0,112,1777},{67,255,10},{67,255,10},{67,255,10},{66,164,0},{147,0,2178},{4,194,0},{4,194,0},{0,123,337},{254,23,2178},{0,123,337},{250,0,2665},{76,255,73},{96,205,0},{0,203,20},{250,0,2665},{255,125,2665},{0,203,20},
+{0,168,2665},{255,125,2665},{0,168,2665},{33,0,2178},{33,0,2178},{33,0,2178},{33,0,2178},{0,201,0},{0,201,0},{0,201,0},{0,98,0},{0,81,820},{0,81,820},{67,255,11582},{49,255,4083},{48,224,4162},{36,206,2818},{49,255,13898},{1,255,2805},{2,212,20},{0,163,4255},{0,196,13958},{0,157,6886},{104,255,4577},{79,255,259},{82,218,258},{68,201,469},{211,0,9669},{1,255,2805},{4,211,17},{0,163,4254},{255,86,9669},
+{0,163,4254},{46,255,3434},{46,255,3434},{46,255,3434},{37,190,2665},{18,255,2709},{3,210,17},{3,210,17},{0,130,270},{0,141,4491},{0,121,1854},{76,255,29},{76,255,29},{76,255,29},{74,172,0},{159,0,2178},{12,202,0},{12,202,0},{0,130,269},{254,35,2178},{0,130,269},{255,13,2665},{92,255,157},{104,213,0},{0,212,5},{255,13,2665},{254,137,2665},{0,212,5},{0,176,2665},{254,137,2665},{0,176,2665},{37,0,2665},
+{37,0,2665},{37,0,2665},{37,0,2665},{1,223,0},{1,223,0},{1,223,0},{1,110,0},{0,91,953},{0,91,953},{79,255,12086},{58,255,4502},{56,232,4162},{44,214,2818},{61,255,14298},{10,255,2910},{10,220,20},{0,173,4166},{0,208,13470},{0,166,6215},{113,255,4826},{89,255,465},{90,226,258},{78,208,474},{223,0,9669},{10,255,2909},{12,219,17},{0,173,4085},{255,98,9669},{0,173,4085},{55,255,3574},{55,255,3574},{55,255,3574},
+{46,197,2665},{27,255,2840},{11,218,17},{11,218,17},{4,140,258},{0,156,4142},{0,133,1430},{86,255,52},{86,255,52},{86,255,52},{82,180,0},{171,0,2178},{20,210,0},{20,210,0},{0,139,202},{255,46,2178},{0,139,202},{255,37,2665},{107,255,260},{112,221,0},{0,220,1},{255,37,2665},{254,149,2665},{0,220,1},{0,184,2665},{254,149,2665},{0,184,2665},{45,0,2665},{45,0,2665},{45,0,2665},{45,0,2665},{9,231,0},
+{9,231,0},{9,231,0},{9,118,0},{0,103,745},{0,103,745},{86,255,12542},{67,255,4983},{64,240,4162},{52,222,2818},{70,255,14719},{22,255,3118},{18,228,20},{5,181,4162},{0,221,12955},{0,176,5593},{125,255,5090},{101,255,713},{98,234,258},{84,218,481},{235,0,9669},{28,255,3073},{20,227,17},{0,182,3909},{255,110,9669},{0,182,3909},{64,255,3726},{64,255,3726},{64,255,3726},{54,205,2665},{36,255,2989},{19,226,17},{19,226,17},
+{12,148,258},{0,172,3797},{0,142,1062},{95,255,89},{95,255,89},{95,255,89},{90,188,0},{183,0,2178},{28,218,0},{28,218,0},{0,148,145},{255,58,2178},{0,148,145},{255,61,2665},{119,255,388},{120,229,0},{5,229,0},{255,61,2665},{254,161,2665},{5,229,0},{0,192,2665},{254,161,2665},{0,192,2665},{53,0,2665},{53,0,2665},{53,0,2665},{53,0,2665},{17,239,0},{17,239,0},{17,239,0},{17,126,0},{0,118,562},
+{0,118,562},{98,255,13154},{79,255,5593},{73,249,4162},{61,231,2818},{86,255,15194},{34,255,3462},{27,237,20},{14,190,4162},{0,236,12478},{0,188,4983},{135,255,5365},{113,255,1062},{107,243,258},{93,227,481},{248,0,9669},{40,255,3330},{29,236,17},{0,191,3726},{254,124,9669},{0,191,3726},{73,255,3909},{73,255,3909},{73,255,3909},{63,214,2665},{46,255,3156},{28,235,17},{28,235,17},{21,157,258},{0,184,3462},{0,154,713},{107,255,145},
+{107,255,145},{107,255,145},{99,197,0},{196,0,2178},{37,227,0},{37,227,0},{0,160,89},{254,72,2178},{0,160,89},{255,89,2665},{137,255,562},{129,238,0},{14,238,0},{255,89,2665},{254,174,2665},{14,238,0},{0,201,2665},{254,174,2665},{0,201,2665},{62,0,2665},{62,0,2665},{62,0,2665},{62,0,2665},{26,248,0},{26,248,0},{26,248,0},{26,135,0},{0,135,388},{0,135,388},{107,255,13718},{89,255,6215},{82,255,4166},
+{69,239,2818},{92,255,15614},{46,255,3885},{35,245,20},{22,198,4162},{0,248,12086},{0,197,4502},{144,255,5658},{122,255,1430},{115,251,258},{101,235,481},{255,10,9669},{61,255,3601},{37,244,17},{0,200,3574},{255,135,9669},{0,200,3574},{82,255,4085},{82,255,4085},{82,255,4085},{71,222,2665},{58,255,3332},{36,242,17},{36,242,17},{29,165,258},{0,196,3206},{0,163,465},{116,255,202},{116,255,202},{116,255,202},{107,205,0},{208,0,2178},
+{44,235,0},{44,235,0},{0,169,52},{254,84,2178},{0,169,52},{255,113,2665},{152,255,745},{137,246,0},{22,246,0},{255,113,2665},{254,186,2665},{22,246,0},{0,209,2665},{254,186,2665},{0,209,2665},{70,0,2665},{70,0,2665},{70,0,2665},{70,0,2665},{34,253,1},{34,253,1},{34,253,1},{34,143,0},{0,147,260},{0,147,260},{116,255,14302},{98,255,6886},{89,255,4255},{77,247,2818},{104,255,16094},{55,255,4382},{43,253,20},
+{30,206,4162},{0,254,11806},{0,206,4083},{153,255,5969},{134,255,1854},{125,255,270},{109,243,481},{255,34,9669},{76,255,3905},{45,252,17},{0,209,3434},{255,147,9669},{0,209,3434},{89,255,4254},{89,255,4254},{89,255,4254},{79,230,2665},{67,255,3525},{44,250,17},{44,250,17},{37,173,258},{0,211,2979},{0,176,259},{125,255,269},{125,255,269},{125,255,269},{115,213,0},{220,0,2178},{52,243,0},{52,243,0},{0,179,29},{255,95,2178},
+{0,179,29},{255,137,2665},{164,255,953},{145,254,0},{30,254,0},{255,137,2665},{254,198,2665},{30,254,0},{0,217,2665},{254,198,2665},{0,217,2665},{78,0,2665},{78,0,2665},{78,0,2665},{78,0,2665},{43,255,5},{43,255,5},{43,255,5},{42,150,0},{0,162,157},{0,162,157},{122,255,13635},{107,255,6898},{98,255,4450},{86,250,2739},{110,255,15195},{67,255,4071},{52,255,20},{39,212,3523},{1,255,10735},{0,212,3100},{162,255,5381},
+{143,255,1777},{132,255,337},{118,246,331},{255,52,8712},{82,255,3492},{59,254,2},{0,216,2734},{255,156,8712},{0,216,2734},{98,255,4450},{98,255,4450},{98,255,4450},{87,238,2665},{76,255,3736},{52,255,20},{52,255,20},{45,181,258},{0,224,2757},{0,185,117},{132,255,337},{132,255,337},{132,255,337},{123,221,0},{232,0,2178},{60,251,0},{60,251,0},{0,188,10},{255,107,2178},{0,188,10},{255,152,2178},{174,255,820},{156,255,0},
+{52,255,0},{255,152,2178},{255,205,2178},{52,255,0},{0,222,2178},{255,205,2178},{0,222,2178},{86,0,2665},{86,0,2665},{86,0,2665},{86,0,2665},{52,255,20},{52,255,20},{52,255,20},{50,158,0},{0,178,73},{0,178,73},{132,255,12678},{116,255,6826},{110,255,4685},{95,253,2678},{119,255,14061},{76,255,3663},{64,255,75},{49,216,2786},{10,255,9739},{0,219,2131},{172,255,4629},{152,255,1633},{144,255,433},{129,248,179},{255,70,7578},
+{101,255,2949},{73,255,5},{0,222,1954},{255,165,7578},{0,222,1954},{110,255,4685},{110,255,4685},{110,255,4685},{96,247,2665},{89,255,3960},{64,255,75},{64,255,75},{54,190,258},{0,239,2571},{0,197,26},{144,255,433},{144,255,433},{144,255,433},{132,230,0},{245,0,2178},{73,255,5},{73,255,5},{0,197,1},{254,121,2178},{0,197,1},{255,164,1625},{186,255,605},{170,255,0},{79,255,0},{255,164,1625},{255,211,1625},{79,255,0},
+{0,226,1625},{255,211,1625},{0,226,1625},{95,0,2665},{95,0,2665},{95,0,2665},{95,0,2665},{64,255,50},{64,255,50},{64,255,50},{59,167,0},{0,193,20},{0,193,20},{138,255,11970},{125,255,6797},{119,255,4909},{104,255,2665},{129,255,13086},{82,255,3411},{73,255,185},{57,221,2210},{22,255,8987},{0,225,1431},{178,255,4049},{161,255,1537},{153,255,530},{137,250,83},{255,86,6661},{113,255,2525},{86,255,40},{0,225,1382},{255,172,6661},
+{0,225,1382},{119,255,4909},{119,255,4909},{119,255,4909},{104,255,2665},{98,255,4197},{73,255,185},{73,255,185},{62,198,258},{0,251,2435},{1,206,2},{153,255,530},{153,255,530},{153,255,530},{140,238,0},{255,4,2178},{86,255,40},{86,255,40},{4,206,0},{255,132,2178},{4,206,0},{255,177,1201},{195,255,442},{181,255,0},{104,255,0},{255,177,1201},{254,217,1201},{104,255,0},{0,230,1201},{254,217,1201},{0,230,1201},{103,0,2665},
+{103,0,2665},{103,0,2665},{103,0,2665},{73,255,85},{73,255,85},{73,255,85},{67,175,0},{0,208,1},{0,208,1},{147,255,11330},{132,255,6786},{129,255,5126},{113,255,2694},{135,255,12250},{95,255,3225},{86,255,353},{67,225,1710},{37,255,8326},{0,231,915},{184,255,3541},{167,255,1469},{162,255,637},{146,253,24},{255,104,5829},{125,255,2165},{101,255,97},{0,231,914},{255,181,5829},{0,231,914},{129,255,5126},{129,255,5126},{129,255,5126},
+{113,255,2694},{107,255,4452},{86,255,353},{86,255,353},{72,205,259},{1,255,2421},{9,214,2},{162,255,637},{162,255,637},{162,255,637},{148,245,0},{255,28,2178},{101,255,97},{101,255,97},{12,214,0},{255,144,2178},{12,214,0},{255,189,841},{204,255,305},{193,255,0},{128,255,0},{255,189,841},{254,223,841},{128,255,0},{0,234,841},{254,223,841},{0,234,841},{111,0,2665},{111,0,2665},{111,0,2665},{111,0,2665},{82,255,130},
+{82,255,130},{82,255,130},{75,183,0},{6,217,0},{6,217,0},{153,255,10758},{141,255,6807},{135,255,5354},{122,255,2777},{144,255,11503},{104,255,3102},{95,255,569},{75,230,1282},{49,255,7825},{3,237,574},{190,255,3105},{177,255,1427},{172,255,740},{156,254,1},{255,119,5082},{137,255,1869},{116,255,180},{0,237,558},{254,189,5082},{0,237,558},{135,255,5354},{135,255,5354},{135,255,5354},{122,255,2777},{119,255,4724},{95,255,569},{95,255,569},
+{79,213,266},{16,255,2587},{16,222,1},{172,255,740},{172,255,740},{172,255,740},{156,253,0},{255,52,2178},{116,255,180},{116,255,180},{20,222,0},{255,156,2178},{20,222,0},{255,201,545},{216,255,193},{205,255,0},{152,255,0},{255,201,545},{254,229,545},{152,255,0},{0,238,545},{254,229,545},{0,238,545},{119,0,2665},{119,0,2665},{119,0,2665},{119,0,2665},{89,255,180},{89,255,180},{89,255,180},{83,191,0},{15,224,0},
+{15,224,0},{162,255,10197},{150,255,6861},{144,255,5645},{131,255,2933},{153,255,10765},{116,255,3051},{107,255,882},{84,236,891},{64,255,7297},{17,242,300},{199,255,2694},{186,255,1409},{181,255,881},{167,255,20},{255,137,4344},{152,255,1611},{131,255,305},{4,243,288},{254,198,4344},{4,243,288},{144,255,5645},{144,255,5645},{144,255,5645},{131,255,2933},{129,255,5005},{107,255,882},{107,255,882},{88,222,266},{34,255,2824},{25,231,1},{181,255,881},
+{181,255,881},{181,255,881},{167,255,20},{255,79,2178},{131,255,305},{131,255,305},{28,231,0},{253,170,2178},{28,231,0},{255,216,288},{225,255,97},{218,255,0},{180,255,0},{255,216,288},{255,236,288},{180,255,0},{0,243,288},{255,236,288},{0,243,288},{128,0,2665},{128,0,2665},{128,0,2665},{128,0,2665},{101,255,250},{101,255,250},{101,255,250},{92,200,0},{24,233,0},{24,233,0},{172,255,9731},{159,255,6942},{153,255,5913},
+{140,255,3130},{162,255,10204},{125,255,3090},{116,255,1218},{94,240,623},{79,255,6924},{27,246,132},{205,255,2402},{196,255,1419},{190,255,1018},{177,255,97},{255,152,3779},{164,255,1451},{146,255,442},{20,247,128},{255,205,3779},{20,247,128},{153,255,5913},{153,255,5913},{153,255,5913},{140,255,3130},{138,255,5304},{116,255,1218},{116,255,1218},{96,230,266},{49,255,3115},{33,239,1},{190,255,1018},{190,255,1018},{190,255,1018},{177,255,97},{255,104,2178},
+{146,255,442},{146,255,442},{36,239,0},{255,181,2178},{36,239,0},{255,228,128},{234,255,40},{230,255,0},{204,255,0},{255,228,128},{255,242,128},{204,255,0},{0,247,128},{255,242,128},{0,247,128},{136,0,2665},{136,0,2665},{136,0,2665},{136,0,2665},{110,255,325},{110,255,325},{110,255,325},{100,208,0},{32,241,0},{32,241,0},{175,255,9359},{165,255,7050},{162,255,6193},{149,255,3381},{172,255,9691},{134,255,3207},{128,255,1610},
+{102,245,429},{92,255,6719},{38,251,32},{215,255,2156},{205,255,1457},{199,255,1165},{186,255,221},{255,171,3299},{177,255,1398},{161,255,605},{36,251,32},{254,214,3299},{36,251,32},{162,255,6193},{162,255,6193},{162,255,6193},{149,255,3381},{147,255,5621},{128,255,1610},{128,255,1610},{104,238,266},{67,255,3419},{41,247,2},{199,255,1165},{199,255,1165},{199,255,1165},{186,255,221},{255,128,2178},{161,255,605},{161,255,605},{44,247,0},{255,193,2178},
+{44,247,0},{255,240,32},{246,255,8},{242,255,0},{228,255,0},{255,240,32},{255,248,32},{228,255,0},{0,251,32},{255,248,32},{0,251,32},{144,0,2665},{144,0,2665},{144,0,2665},{144,0,2665},{119,255,410},{119,255,410},{119,255,410},{108,216,0},{39,250,0},{39,250,0},{184,255,9067},{175,255,7162},{172,255,6450},{158,255,3686},{175,255,9268},{146,255,3399},{137,255,2054},{110,250,311},{107,255,6532},{49,255,2},{221,255,1992},
+{211,255,1521},{208,255,1322},{195,255,397},{255,186,2904},{189,255,1366},{174,255,820},{52,255,0},{255,221,2904},{52,255,0},{172,255,6450},{172,255,6450},{172,255,6450},{158,255,3686},{159,255,5949},{137,255,2054},{137,255,2054},{112,246,266},{82,255,3765},{49,255,2},{208,255,1322},{208,255,1322},{208,255,1322},{195,255,397},{255,152,2178},{174,255,820},{174,255,820},{52,255,0},{255,205,2178},{52,255,0},{255,252,0},{255,254,0},{254,255,0},
+{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{152,0,2665},{152,0,2665},{152,0,2665},{152,0,2665},{129,255,493},{129,255,493},{129,255,493},{116,224,0},{49,255,2},{49,255,2},{190,255,7987},{181,255,6402},{178,255,5810},{167,255,3509},{184,255,7999},{155,255,3048},{146,255,1922},{121,252,155},{119,255,5562},{64,255,29},{224,255,1476},{218,255,1120},{215,255,965},{204,255,292},{255,195,2166},
+{198,255,1009},{186,255,605},{79,255,0},{254,226,2166},{79,255,0},{178,255,5810},{178,255,5810},{178,255,5810},{167,255,3509},{165,255,5209},{146,255,1922},{146,255,1922},{121,249,133},{95,255,3258},{64,255,29},{215,255,965},{215,255,965},{215,255,965},{204,255,292},{255,164,1625},{186,255,605},{186,255,605},{79,255,0},{255,211,1625},{79,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},
+{0,255,0},{255,254,0},{0,255,0},{161,0,2665},{161,0,2665},{161,0,2665},{161,0,2665},{138,255,610},{138,255,610},{138,255,610},{125,233,0},{64,255,29},{64,255,29},{196,255,7111},{187,255,5786},{184,255,5294},{174,255,3381},{187,255,6982},{161,255,2780},{155,255,1832},{131,253,61},{128,255,4795},{79,255,80},{230,255,1088},{224,255,824},{221,255,709},{210,255,212},{255,204,1601},{204,255,737},{195,255,442},{104,255,0},{255,230,1601},
+{104,255,0},{184,255,5294},{184,255,5294},{184,255,5294},{174,255,3381},{172,255,4582},{155,255,1832},{155,255,1832},{132,251,53},{107,255,2834},{79,255,80},{221,255,709},{221,255,709},{221,255,709},{210,255,212},{255,177,1201},{195,255,442},{195,255,442},{104,255,0},{254,217,1201},{104,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{169,0,2665},
+{169,0,2665},{169,0,2665},{169,0,2665},{147,255,725},{147,255,725},{147,255,725},{133,241,0},{79,255,80},{79,255,80},{202,255,6315},{193,255,5226},{190,255,4826},{180,255,3253},{193,255,6066},{167,255,2568},{164,255,1770},{141,254,11},{137,255,4122},{92,255,169},{233,255,753},{227,255,574},{227,255,493},{219,255,146},{255,210,1121},{213,255,507},{204,255,305},{128,255,0},{255,233,1121},{128,255,0},{190,255,4826},{190,255,4826},{190,255,4826},
+{180,255,3253},{181,255,4065},{164,255,1770},{164,255,1770},{140,253,9},{119,255,2474},{92,255,169},{227,255,493},{227,255,493},{227,255,493},{219,255,146},{255,189,841},{204,255,305},{204,255,305},{128,255,0},{254,223,841},{128,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{177,0,2665},{177,0,2665},{177,0,2665},{177,0,2665},{156,255,850},
+{156,255,850},{156,255,850},{141,249,0},{92,255,169},{92,255,169},{208,255,5599},{199,255,4722},{199,255,4398},{189,255,3130},{199,255,5254},{177,255,2395},{171,255,1742},{149,255,1},{149,255,3538},{107,255,274},{236,255,484},{233,255,366},{233,255,317},{225,255,90},{255,219,726},{222,255,321},{216,255,193},{152,255,0},{254,238,726},{152,255,0},{199,255,4398},{199,255,4398},{199,255,4398},{189,255,3130},{187,255,3613},{171,255,1742},{171,255,1742},
+{149,255,1},{131,255,2178},{107,255,274},{233,255,317},{233,255,317},{233,255,317},{225,255,90},{255,201,545},{216,255,193},{216,255,193},{152,255,0},{254,229,545},{152,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{185,0,2665},{185,0,2665},{185,0,2665},{185,0,2665},{165,255,985},{165,255,985},{165,255,985},{149,255,1},{107,255,274},
+{107,255,274},{43,255,50657},{1,255,1974},{0,182,128},{0,176,4572},{36,255,59540},{0,242,19268},{0,176,7306},{0,160,23941},{0,187,65535},{0,148,40590},{21,255,10267},{0,248,1412},{0,179,137},{0,148,3929},{115,0,18065},{0,153,12036},{0,139,6077},{0,93,14060},{234,0,18065},{0,93,14060},{0,143,0},{0,143,0},{0,143,0},{0,70,0},{0,70,1105},{0,57,405},{0,57,405},{0,34,653},{0,33,1209},{0,31,756},{0,143,0},
+{0,143,0},{0,143,0},{0,70,0},{35,0,1105},{0,57,405},{0,57,405},{0,34,653},{70,0,1105},{0,34,653},{171,0,9248},{0,248,1412},{0,179,137},{0,148,3929},{171,0,9248},{255,46,9248},{0,148,3929},{0,115,9248},{255,46,9248},{0,115,9248},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{43,255,53600},{4,255,2885},{0,191,38},
+{0,185,3941},{43,255,62123},{0,254,18376},{0,182,6726},{0,169,23131},{0,196,65535},{0,154,40086},{27,255,10859},{0,254,1184},{0,188,45},{0,157,3656},{122,0,19334},{0,162,12449},{0,145,6089},{0,102,14754},{249,0,19334},{0,102,14754},{0,167,0},{0,167,0},{0,167,0},{0,82,0},{0,82,1513},{0,66,562},{0,66,562},{0,37,900},{0,36,1658},{0,37,1044},{0,167,0},{0,167,0},{0,167,0},{0,82,0},{41,0,1513},
+{0,66,562},{0,66,562},{0,37,900},{82,0,1513},{0,37,900},{183,0,9248},{0,254,1184},{0,188,45},{0,157,3656},{183,0,9248},{255,58,9248},{0,157,3656},{0,123,9248},{255,58,9248},{0,123,9248},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{46,255,56765},{7,255,4404},{0,199,3},{0,194,3404},{43,255,64774},{1,255,17986},{0,191,6229},
+{0,176,22449},{0,215,65535},{0,160,39678},{33,255,11587},{1,255,1171},{0,199,4},{0,166,3393},{129,0,20689},{0,172,12834},{0,154,6125},{0,108,15490},{254,5,20689},{0,108,15490},{0,192,0},{0,192,0},{0,192,0},{0,94,0},{0,95,1985},{0,75,745},{0,75,745},{0,44,1202},{0,42,2178},{0,40,1374},{0,192,0},{0,192,0},{0,192,0},{0,94,0},{46,0,1985},{0,75,745},{0,75,745},{0,44,1202},{95,0,1985},
+{0,44,1202},{195,0,9248},{1,255,1170},{0,199,4},{0,166,3393},{195,0,9248},{255,70,9248},{0,166,3393},{0,131,9248},{255,70,9248},{0,131,9248},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{49,255,58131},{7,255,6200},{0,207,6},{0,203,2858},{46,255,65535},{1,255,17772},{0,200,5638},{0,182,21148},{0,215,63337},{0,166,38166},{43,255,12449},
+{1,255,1395},{1,208,6},{0,176,3170},{138,0,22129},{0,184,13298},{0,163,6189},{0,114,16274},{254,14,22129},{0,114,16274},{0,216,0},{0,216,0},{0,216,0},{0,106,0},{0,107,2521},{0,88,928},{0,88,928},{0,50,1530},{0,48,2770},{0,47,1762},{0,216,0},{0,216,0},{0,216,0},{0,106,0},{52,0,2521},{0,88,928},{0,88,928},{0,50,1530},{107,0,2521},{0,50,1530},{207,0,9248},{7,255,1378},{5,207,0},
+{0,176,3170},{207,0,9248},{255,82,9248},{0,176,3170},{0,139,9248},{255,82,9248},{0,139,9248},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{55,255,58853},{10,255,8410},{0,216,47},{0,212,2312},{52,255,65535},{4,255,18024},{0,209,5015},{0,191,19619},{0,224,60697},{0,176,36050},{46,255,13542},{7,255,1877},{4,217,53},{0,185,2897},{147,0,23851},
+{0,196,13856},{0,173,6281},{0,120,17216},{254,23,23851},{0,120,17216},{0,243,0},{0,243,0},{0,243,0},{0,119,0},{0,122,3200},{0,97,1186},{0,97,1186},{0,56,1945},{0,54,3521},{0,53,2243},{0,243,0},{0,243,0},{0,243,0},{0,119,0},{60,0,3200},{0,97,1186},{0,97,1186},{0,56,1945},{122,0,3200},{0,56,1945},{220,0,9248},{22,255,1693},{14,216,0},{0,185,2897},{220,0,9248},{255,95,9248},{0,185,2897},
+{0,148,9248},{255,95,9248},{0,148,9248},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{58,255,59395},{13,255,10486},{0,225,107},{0,219,1933},{55,255,65535},{7,255,18658},{0,215,4619},{0,200,18436},{0,233,58566},{0,182,34489},{52,255,14690},{13,255,2489},{6,228,133},{0,194,2664},{155,0,25472},{0,208,14384},{0,182,6401},{0,126,18104},{255,30,25472},
+{0,126,18104},{0,255,16},{0,255,16},{0,255,16},{0,131,0},{0,134,3872},{0,106,1445},{0,106,1445},{0,62,2357},{0,60,4265},{0,56,2717},{2,254,13},{2,254,13},{2,254,13},{0,131,0},{66,0,3872},{0,106,1445},{0,106,1445},{0,62,2357},{134,0,3872},{0,62,2357},{232,0,9248},{37,255,2000},{22,224,0},{0,194,2664},{232,0,9248},{255,107,9248},{0,194,2664},{0,156,9248},{255,107,9248},{0,156,9248},{0,0,0},
+{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{61,255,59976},{16,255,12621},{2,233,174},{0,228,1613},{58,255,65535},{10,255,19459},{0,224,4093},{0,206,17181},{0,242,56514},{0,188,32973},{58,255,15746},{19,255,3205},{10,235,217},{0,206,2444},{164,0,26744},{0,218,14587},{0,191,6305},{0,136,18737},{255,39,26744},{0,136,18737},{3,255,110},{3,255,110},{3,255,110},
+{1,142,2},{0,149,4418},{0,118,1585},{0,118,1585},{0,68,2633},{0,66,4909},{0,62,3077},{6,255,80},{6,255,80},{6,255,80},{2,142,0},{73,0,4418},{0,118,1585},{0,118,1585},{0,68,2633},{149,0,4418},{0,68,2633},{244,0,9248},{49,255,2377},{30,232,0},{0,206,2440},{244,0,9248},{255,119,9248},{0,206,2440},{0,164,9248},{255,119,9248},{0,164,9248},{1,0,2},{1,0,2},{1,0,2},{1,0,2},{0,7,0},
+{0,7,0},{0,7,0},{0,3,0},{0,3,0},{0,3,0},{67,255,60796},{19,255,14846},{3,240,286},{0,234,1405},{64,255,65535},{13,255,20226},{0,233,3321},{0,212,15538},{0,254,54317},{0,197,31240},{67,255,16195},{31,255,3805},{18,243,217},{0,212,2328},{175,0,26744},{0,230,14051},{0,200,5525},{0,142,18273},{254,51,26744},{0,142,18273},{9,255,278},{9,255,278},{9,255,278},{5,152,50},{0,174,4418},{0,129,1313},{0,129,1313},
+{0,77,2410},{0,75,5092},{0,71,2987},{15,255,125},{15,255,125},{15,255,125},{10,150,0},{85,0,4418},{0,129,1313},{0,129,1313},{0,77,2410},{174,0,4418},{0,77,2410},{255,1,9248},{64,255,2738},{37,240,0},{0,212,2228},{255,1,9248},{254,131,9248},{0,212,2228},{0,172,9248},{254,131,9248},{0,172,9248},{5,0,50},{5,0,50},{5,0,50},{5,0,50},{0,31,0},{0,31,0},{0,31,0},{0,15,0},{0,12,13},
+{0,12,13},{70,255,61549},{25,255,17357},{4,250,459},{1,244,1256},{70,255,65535},{16,255,21346},{0,242,2603},{0,225,13862},{0,254,52221},{0,206,29474},{79,255,16720},{43,255,4545},{27,252,217},{7,223,2341},{189,0,26744},{0,245,13481},{0,212,4710},{0,154,17762},{255,64,26744},{0,154,17762},{15,255,602},{15,255,602},{15,255,602},{10,163,181},{0,201,4418},{0,147,1037},{0,147,1037},{0,83,2196},{0,88,5300},{0,80,2921},{27,255,185},
+{27,255,185},{27,255,185},{19,159,0},{98,0,4418},{0,147,1037},{0,147,1037},{0,83,2196},{201,0,4418},{0,83,2196},{255,28,9248},{82,255,3176},{46,249,0},{0,225,2020},{255,28,9248},{255,144,9248},{0,225,2020},{0,181,9248},{255,144,9248},{0,181,9248},{9,0,181},{9,0,181},{9,0,181},{9,0,181},{0,58,0},{0,58,0},{0,58,0},{0,29,0},{0,24,58},{0,24,58},{73,255,62082},{28,255,19099},{6,255,688},
+{2,251,1170},{73,255,65535},{19,255,22086},{0,248,2054},{0,231,12530},{0,254,51038},{0,212,28165},{86,255,16691},{52,255,4985},{36,255,250},{14,230,2238},{199,0,26259},{0,254,12692},{0,221,3965},{0,160,16946},{254,75,26259},{0,160,16946},{21,255,1006},{21,255,1006},{21,255,1006},{14,173,365},{0,225,4418},{0,162,820},{0,162,820},{0,96,1994},{0,100,5540},{0,90,2891},{36,255,250},{36,255,250},{36,255,250},{27,167,0},{110,0,4418},
+{0,162,820},{0,162,820},{0,96,1994},{225,0,4418},{0,96,1994},{255,49,8978},{95,255,3433},{55,255,0},{0,231,1732},{255,49,8978},{254,155,8978},{0,231,1732},{0,188,8978},{254,155,8978},{0,188,8978},{13,0,365},{13,0,365},{13,0,365},{13,0,365},{0,82,0},{0,82,0},{0,82,0},{0,41,0},{0,33,125},{0,33,125},{76,255,62399},{28,255,19454},{6,255,1075},{2,251,1090},{73,255,65535},{19,255,21590},{0,250,1503},
+{0,231,11251},{0,254,50424},{0,212,27242},{95,255,15410},{58,255,4661},{46,255,317},{24,231,1853},{208,0,24371},{0,254,11124},{0,227,2881},{0,166,15066},{254,84,24371},{0,166,15066},{27,255,1522},{27,255,1522},{27,255,1522},{17,183,613},{0,249,4418},{0,175,605},{0,175,605},{0,105,1801},{0,109,5805},{0,97,2885},{46,255,317},{46,255,317},{46,255,317},{35,175,0},{122,0,4418},{0,175,605},{0,175,605},{0,105,1801},{249,0,4418},
+{0,105,1801},{255,61,7938},{104,255,3026},{67,255,0},{0,237,1224},{255,61,7938},{254,161,7938},{0,237,1224},{0,192,7938},{254,161,7938},{0,192,7938},{17,0,613},{17,0,613},{17,0,613},{17,0,613},{0,107,0},{0,107,0},{0,107,0},{0,52,0},{0,42,218},{0,42,218},{76,255,62711},{28,255,19886},{9,255,1549},{3,252,1093},{73,255,65535},{19,255,21152},{0,251,1032},{0,234,10008},{0,254,49821},{0,212,26360},{101,255,14198},
+{67,255,4361},{55,255,400},{33,234,1502},{215,0,22568},{1,255,9795},{0,230,1973},{0,169,13320},{254,91,22568},{0,169,13320},{33,255,2150},{33,255,2150},{33,255,2150},{21,193,925},{3,255,4459},{0,190,442},{0,190,442},{0,114,1618},{0,121,6101},{0,106,2901},{55,255,400},{55,255,400},{55,255,400},{43,183,0},{134,0,4418},{0,190,442},{0,190,442},{0,114,1618},{255,9,4418},{0,114,1618},{255,73,6962},{113,255,2645},{79,255,0},
+{0,240,801},{255,73,6962},{254,167,6962},{0,240,801},{0,196,6962},{254,167,6962},{0,196,6962},{21,0,925},{21,0,925},{21,0,925},{21,0,925},{0,131,0},{0,131,0},{0,131,0},{0,64,0},{0,51,337},{0,51,337},{76,255,63078},{31,255,20439},{9,255,2192},{4,253,1202},{76,255,65535},{19,255,20732},{0,251,606},{0,234,8676},{0,254,49164},{0,212,25424},{110,255,12917},{79,255,4059},{64,255,505},{43,234,1147},{224,0,20642},
+{1,255,8589},{0,236,1155},{0,179,11489},{254,100,20642},{0,179,11489},{39,255,2986},{39,255,2986},{39,255,2986},{26,204,1352},{12,255,4660},{0,205,289},{0,205,289},{0,126,1424},{0,132,6467},{0,115,2955},{64,255,505},{64,255,505},{64,255,505},{52,192,0},{147,0,4418},{0,205,289},{0,205,289},{0,126,1424},{254,23,4418},{0,126,1424},{255,86,5941},{125,255,2248},{92,255,0},{0,243,433},{255,86,5941},{255,172,5941},{0,243,433},
+{0,200,5941},{255,172,5941},{0,200,5941},{26,0,1352},{26,0,1352},{26,0,1352},{26,0,1352},{0,158,0},{0,158,0},{0,158,0},{0,78,0},{0,63,500},{0,63,500},{79,255,63411},{31,255,21008},{12,255,2858},{7,253,1393},{76,255,65535},{22,255,20416},{0,253,320},{0,237,7549},{0,254,48613},{0,218,24643},{116,255,11849},{82,255,3845},{73,255,610},{53,238,867},{232,0,19021},{10,255,7741},{0,242,611},{0,182,9957},{255,107,19021},
+{0,182,9957},{43,255,3819},{43,255,3819},{43,255,3819},{30,214,1800},{15,255,4981},{0,221,169},{0,221,169},{0,133,1282},{0,141,6822},{0,124,3029},{73,255,610},{73,255,610},{73,255,610},{60,200,0},{159,0,4418},{0,221,169},{0,221,169},{0,133,1282},{254,35,4418},{0,133,1282},{255,98,5101},{134,255,1921},{104,255,0},{0,246,202},{255,98,5101},{255,178,5101},{0,246,202},{0,204,5101},{255,178,5101},{0,204,5101},{30,0,1800},
+{30,0,1800},{30,0,1800},{30,0,1800},{0,183,0},{0,183,0},{0,183,0},{0,89,0},{0,72,673},{0,72,673},{86,255,63733},{34,255,21637},{12,255,3614},{8,254,1668},{76,255,65535},{22,255,20164},{0,254,123},{0,237,6489},{0,254,48082},{0,218,23857},{122,255,10853},{95,255,3629},{86,255,724},{62,241,632},{241,0,17485},{22,255,6965},{0,248,243},{0,188,8529},{255,116,17485},{0,188,8529},{49,255,4787},{49,255,4787},{49,255,4787},
+{34,224,2312},{21,255,5437},{0,233,89},{0,233,89},{0,142,1129},{0,153,7206},{0,133,3131},{86,255,724},{86,255,724},{86,255,724},{68,207,0},{171,0,4418},{0,233,89},{0,233,89},{0,142,1129},{255,46,4418},{0,142,1129},{255,110,4325},{143,255,1620},{116,255,0},{0,252,58},{255,110,4325},{255,184,4325},{0,252,58},{0,208,4325},{255,184,4325},{0,208,4325},{34,0,2312},{34,0,2312},{34,0,2312},{34,0,2312},{0,207,0},
+{0,207,0},{0,207,0},{0,101,0},{0,88,865},{0,88,865},{86,255,63992},{34,255,22322},{15,255,4457},{8,255,2033},{76,255,65535},{22,255,19980},{0,254,23},{0,240,5494},{0,254,47573},{0,218,23129},{129,255,9866},{104,255,3441},{92,255,832},{71,244,435},{248,0,16034},{34,255,6253},{0,254,51},{0,194,7213},{254,124,16034},{0,194,7213},{55,255,5867},{55,255,5867},{55,255,5867},{38,234,2888},{27,255,6029},{0,248,34},{0,248,34},
+{0,154,985},{0,162,7619},{0,142,3261},{92,255,832},{92,255,832},{92,255,832},{76,215,0},{183,0,4418},{0,248,34},{0,248,34},{0,154,985},{255,58,4418},{0,154,985},{255,122,3613},{152,255,1345},{128,255,0},{0,255,1},{255,122,3613},{255,190,3613},{0,255,1},{0,212,3613},{255,190,3613},{0,212,3613},{38,0,2888},{38,0,2888},{38,0,2888},{38,0,2888},{0,231,0},{0,231,0},{0,231,0},{0,113,0},{0,91,1066},
+{0,91,1066},{86,255,64310},{37,255,23174},{15,255,5504},{10,255,2546},{79,255,65535},{25,255,19854},{1,255,23},{0,240,4466},{0,254,47048},{0,221,22366},{138,255,8897},{113,255,3261},{101,255,985},{80,247,258},{255,4,14504},{40,255,5561},{7,255,34},{0,200,5867},{255,132,14504},{0,200,5867},{61,255,7213},{61,255,7213},{61,255,7213},{42,245,3613},{33,255,6859},{1,255,51},{1,255,51},{0,163,832},{0,175,8059},{0,151,3441},{101,255,985},
+{101,255,985},{101,255,985},{85,224,0},{196,0,4418},{7,255,34},{7,255,34},{0,163,832},{254,72,4418},{0,163,832},{255,137,2888},{164,255,1066},{141,255,0},{22,255,0},{255,137,2888},{254,198,2888},{22,255,0},{0,217,2888},{254,198,2888},{0,217,2888},{42,0,3613},{42,0,3613},{42,0,3613},{42,0,3613},{0,255,1},{0,255,1},{0,255,1},{0,127,0},{0,103,1345},{0,103,1345},{86,255,64605},{37,255,23983},{18,255,6523},
+{10,255,3097},{86,255,65535},{25,255,19808},{1,255,124},{0,243,3633},{0,254,46617},{0,221,21751},{144,255,8113},{122,255,3131},{113,255,1129},{90,248,139},{255,19,13235},{58,255,5012},{22,255,89},{0,206,4787},{254,140,13235},{0,206,4787},{67,255,8529},{67,255,8529},{67,255,8529},{46,255,4325},{43,255,7725},{7,255,243},{7,255,243},{0,169,724},{0,184,8530},{0,160,3629},{113,255,1129},{113,255,1129},{113,255,1129},{93,232,0},{208,0,4418},
+{22,255,89},{22,255,89},{0,169,724},{254,84,4418},{0,169,724},{255,149,2312},{167,255,865},{153,255,0},{46,255,0},{255,149,2312},{254,204,2312},{46,255,0},{0,221,2312},{254,204,2312},{0,221,2312},{46,0,4325},{46,0,4325},{46,0,4325},{46,0,4325},{3,255,58},{3,255,58},{3,255,58},{0,138,0},{0,112,1620},{0,112,1620},{86,255,64960},{37,255,24878},{18,255,7621},{13,255,3738},{86,255,65535},{25,255,19851},{2,255,323},
+{0,243,2885},{0,254,46257},{0,224,21209},{153,255,7392},{131,255,3029},{122,255,1282},{99,251,56},{255,37,12051},{70,255,4500},{34,255,169},{0,212,3819},{254,149,12051},{0,212,3819},{70,255,9957},{70,255,9957},{70,255,9957},{52,255,5141},{46,255,8712},{13,255,611},{13,255,611},{0,182,610},{0,196,9026},{0,169,3845},{122,255,1282},{122,255,1282},{122,255,1282},{101,240,0},{220,0,4418},{34,255,169},{34,255,169},{0,182,610},{255,95,4418},
+{0,182,610},{255,161,1800},{183,255,673},{165,255,0},{70,255,0},{255,161,1800},{254,210,1800},{70,255,0},{0,225,1800},{254,210,1800},{0,225,1800},{50,0,5101},{50,0,5101},{50,0,5101},{50,0,5101},{9,255,202},{9,255,202},{9,255,202},{0,150,0},{0,121,1921},{0,121,1921},{86,255,65314},{43,255,25774},{21,255,8796},{13,255,4456},{86,255,65535},{25,255,19965},{4,255,614},{0,246,2222},{0,254,45929},{0,224,20720},{159,255,6740},
+{140,255,2955},{129,255,1424},{108,254,11},{255,52,10952},{82,255,4052},{49,255,289},{0,216,2986},{255,156,10952},{0,216,2986},{76,255,11489},{76,255,11489},{76,255,11489},{58,255,6109},{52,255,9860},{19,255,1155},{19,255,1155},{0,191,505},{0,208,9554},{0,176,4059},{129,255,1424},{129,255,1424},{129,255,1424},{109,248,0},{232,0,4418},{49,255,289},{49,255,289},{0,191,505},{255,107,4418},{0,191,505},{255,174,1352},{192,255,500},{177,255,0},
+{95,255,0},{255,174,1352},{255,215,1352},{95,255,0},{0,229,1352},{255,215,1352},{0,229,1352},{54,0,5941},{54,0,5941},{54,0,5941},{54,0,5941},{12,255,433},{12,255,433},{12,255,433},{0,162,0},{0,129,2248},{0,129,2248},{86,255,65535},{43,255,26766},{21,255,10162},{13,255,5358},{86,255,65359},{28,255,20101},{4,255,1047},{0,246,1573},{0,254,45474},{0,227,20192},{172,255,6085},{149,255,2901},{141,255,1618},{119,255,2},{255,70,9818},
+{95,255,3685},{64,255,442},{0,222,2150},{255,165,9818},{0,222,2150},{86,255,13320},{86,255,13320},{86,255,13320},{61,255,7370},{58,255,11310},{22,255,1973},{22,255,1973},{0,200,400},{0,218,10107},{0,188,4361},{141,255,1618},{141,255,1618},{141,255,1618},{119,255,2},{245,0,4418},{64,255,442},{64,255,442},{0,200,400},{254,121,4418},{0,200,400},{255,186,925},{204,255,337},{190,255,0},{122,255,0},{255,186,925},{255,221,925},{122,255,0},
+{0,233,925},{255,221,925},{0,233,925},{59,0,6962},{59,0,6962},{59,0,6962},{59,0,6962},{15,255,801},{15,255,801},{15,255,801},{0,175,0},{0,141,2645},{0,141,2645},{86,255,65535},{43,255,27616},{24,255,11405},{16,255,6203},{86,255,65014},{28,255,20233},{5,255,1524},{0,249,1090},{1,255,44974},{0,227,19721},{175,255,5534},{156,255,2885},{150,255,1801},{128,255,40},{255,86,8901},{107,255,3373},{79,255,605},{0,228,1522},{255,172,8901},
+{0,228,1522},{89,255,15066},{89,255,15066},{89,255,15066},{67,255,8646},{64,255,12746},{28,255,2881},{28,255,2881},{0,209,317},{0,230,10691},{0,194,4661},{150,255,1801},{150,255,1801},{150,255,1801},{128,255,40},{255,4,4418},{79,255,605},{79,255,605},{0,209,317},{255,132,4418},{0,209,317},{255,198,613},{213,255,218},{202,255,0},{146,255,0},{255,198,613},{255,227,613},{146,255,0},{0,237,613},{255,227,613},{0,237,613},{63,0,7938},
+{63,0,7938},{63,0,7938},{63,0,7938},{18,255,1224},{18,255,1224},{18,255,1224},{0,187,0},{0,150,3026},{0,150,3026},{89,255,65535},{43,255,28505},{24,255,12681},{16,255,7117},{86,255,64678},{28,255,20430},{7,255,2079},{0,249,697},{1,255,44506},{0,227,19330},{181,255,5094},{165,255,2891},{159,255,1994},{137,255,130},{255,104,8069},{119,255,3125},{92,255,820},{0,234,1006},{255,181,8069},{0,234,1006},{95,255,16946},{95,255,16946},{95,255,16946},
+{73,255,10074},{67,255,14315},{34,255,3965},{34,255,3965},{0,219,250},{0,239,11302},{0,203,4985},{159,255,1994},{159,255,1994},{159,255,1994},{137,255,130},{255,28,4418},{92,255,820},{92,255,820},{0,219,250},{255,144,4418},{0,219,250},{255,210,365},{222,255,125},{214,255,0},{171,255,0},{255,210,365},{255,233,365},{171,255,0},{0,241,365},{255,233,365},{0,241,365},{67,0,8978},{67,0,8978},{67,0,8978},{67,0,8978},{24,255,1732},
+{24,255,1732},{24,255,1732},{0,199,0},{0,159,3433},{0,159,3433},{92,255,65535},{49,255,29231},{30,255,13748},{22,255,7832},{89,255,64474},{34,255,20616},{13,255,2581},{5,251,455},{1,255,43254},{0,230,17214},{187,255,4726},{175,255,2921},{172,255,2196},{146,255,272},{255,119,7322},{131,255,2941},{107,255,1037},{0,240,602},{254,189,7322},{0,240,602},{101,255,17762},{101,255,17762},{101,255,17762},{79,255,10742},{76,255,15150},{43,255,4710},{43,255,4710},
+{3,228,217},{0,251,10994},{0,212,4545},{172,255,2196},{172,255,2196},{172,255,2196},{146,255,272},{255,52,4418},{107,255,1037},{107,255,1037},{0,228,185},{255,156,4418},{0,228,185},{255,222,181},{231,255,58},{226,255,0},{195,255,0},{255,222,181},{255,239,181},{195,255,0},{0,245,181},{255,239,181},{0,245,181},{74,0,9248},{74,0,9248},{74,0,9248},{74,0,9248},{30,255,2020},{30,255,2020},{30,255,2020},{6,208,0},{0,172,3176},
+{0,172,3176},{98,255,65535},{58,255,30030},{43,255,14936},{31,255,8624},{95,255,64295},{43,255,20818},{22,255,3192},{15,252,275},{1,255,41806},{0,236,14271},{196,255,4387},{184,255,2987},{178,255,2410},{158,255,490},{255,137,6584},{146,255,2811},{125,255,1313},{0,246,278},{254,198,6584},{0,246,278},{113,255,18273},{113,255,18273},{113,255,18273},{89,255,11256},{89,255,15726},{55,255,5525},{55,255,5525},{12,237,217},{0,254,10500},{0,224,3805},{178,255,2410},
+{178,255,2410},{178,255,2410},{158,255,490},{255,79,4418},{125,255,1313},{125,255,1313},{0,240,125},{253,170,4418},{0,240,125},{255,237,50},{243,255,13},{239,255,0},{222,255,0},{255,237,50},{254,247,50},{222,255,0},{0,250,50},{254,247,50},{0,250,50},{83,0,9248},{83,0,9248},{83,0,9248},{83,0,9248},{43,255,2228},{43,255,2228},{43,255,2228},{15,217,0},{0,190,2738},{0,190,2738},{104,255,65535},{67,255,30820},{49,255,16060},
+{40,255,9410},{101,255,64140},{52,255,21086},{31,255,3826},{22,253,162},{1,255,40863},{0,239,11797},{202,255,4163},{193,255,3077},{187,255,2633},{167,255,740},{255,152,6019},{158,255,2763},{137,255,1585},{0,252,110},{255,205,6019},{0,252,110},{119,255,18737},{119,255,18737},{119,255,18737},{98,255,11747},{98,255,16315},{64,255,6305},{64,255,6305},{20,245,217},{1,255,10451},{0,236,3205},{187,255,2633},{187,255,2633},{187,255,2633},{167,255,740},{255,104,4418},
+{137,255,1585},{137,255,1585},{0,249,80},{255,181,4418},{0,249,80},{255,249,2},{252,255,0},{251,255,0},{246,255,0},{255,249,2},{254,253,2},{246,255,0},{0,254,2},{254,253,2},{0,254,2},{91,0,9248},{91,0,9248},{91,0,9248},{91,0,9248},{49,255,2440},{49,255,2440},{49,255,2440},{23,225,0},{0,205,2377},{0,205,2377},{110,255,65535},{73,255,31223},{55,255,16690},{49,255,9985},{107,255,63957},{58,255,21217},{38,255,4182},
+{30,255,97},{1,255,40169},{0,242,9493},{208,255,3603},{199,255,2717},{193,255,2357},{174,255,725},{255,164,5163},{167,255,2409},{149,255,1445},{0,255,16},{255,211,5163},{0,255,16},{129,255,18104},{129,255,18104},{129,255,18104},{107,255,11680},{107,255,15698},{73,255,6401},{73,255,6401},{27,249,133},{1,255,9723},{0,242,2489},{193,255,2357},{193,255,2357},{193,255,2357},{174,255,725},{255,119,3872},{149,255,1445},{149,255,1445},{0,252,13},{254,189,3872},
+{0,252,13},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{99,0,9248},{99,0,9248},{99,0,9248},{99,0,9248},{61,255,2664},{61,255,2664},{61,255,2664},{31,233,0},{0,218,2000},{0,218,2000},{116,255,65535},{79,255,31523},{64,255,17156},{55,255,10522},{113,255,63756},{67,255,21315},{46,255,4385},{38,254,41},{4,255,39799},{0,245,7354},{215,255,2952},
+{202,255,2243},{199,255,1945},{183,255,593},{255,171,4267},{167,255,2009},{158,255,1186},{10,255,0},{254,214,4267},{10,255,0},{135,255,17216},{135,255,17216},{135,255,17216},{116,255,11435},{113,255,14726},{82,255,6281},{82,255,6281},{38,251,53},{1,255,8923},{0,248,1877},{199,255,1945},{199,255,1945},{199,255,1945},{183,255,593},{255,131,3200},{158,255,1186},{158,255,1186},{10,255,0},{254,195,3200},{10,255,0},{255,252,0},{255,254,0},{254,255,0},
+{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{107,0,9248},{107,0,9248},{107,0,9248},{107,0,9248},{70,255,2897},{70,255,2897},{70,255,2897},{39,241,0},{0,233,1693},{0,233,1693},{122,255,65535},{89,255,32024},{73,255,17745},{64,255,11181},{119,255,63505},{73,255,21450},{55,255,4731},{48,255,5},{13,255,39517},{0,248,5202},{218,255,2308},{208,255,1762},{205,255,1530},{192,255,464},{255,180,3361},
+{183,255,1587},{167,255,928},{37,255,0},{255,218,3361},{37,255,0},{141,255,16274},{141,255,16274},{141,255,16274},{122,255,11169},{122,255,13721},{92,255,6189},{92,255,6189},{47,254,6},{13,255,8241},{0,254,1395},{205,255,1530},{205,255,1530},{205,255,1530},{192,255,464},{255,143,2521},{167,255,928},{167,255,928},{37,255,0},{254,201,2521},{37,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},
+{0,255,0},{255,254,0},{0,255,0},{116,0,9248},{116,0,9248},{116,0,9248},{116,0,9248},{79,255,3170},{79,255,3170},{79,255,3170},{48,250,0},{0,248,1378},{0,248,1378},{129,255,65535},{95,255,32533},{79,255,18407},{73,255,11877},{129,255,63435},{82,255,21690},{64,255,5107},{56,255,2},{25,255,39436},{0,248,3611},{221,255,1809},{215,255,1374},{211,255,1202},{198,255,360},{255,189,2646},{192,255,1241},{180,255,745},{61,255,0},{254,223,2646},
+{61,255,0},{147,255,15490},{147,255,15490},{147,255,15490},{131,255,10946},{129,255,12826},{101,255,6125},{101,255,6125},{56,255,4},{25,255,7705},{0,254,1171},{211,255,1202},{211,255,1202},{211,255,1202},{198,255,360},{255,155,1985},{180,255,745},{180,255,745},{61,255,0},{254,207,1985},{61,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{124,0,9248},
+{124,0,9248},{124,0,9248},{124,0,9248},{89,255,3393},{89,255,3393},{89,255,3393},{56,255,4},{0,254,1170},{0,254,1170},{132,255,65535},{101,255,32957},{86,255,19017},{82,255,12610},{129,255,62977},{89,255,22061},{73,255,5530},{64,255,31},{34,255,39178},{0,251,2372},{224,255,1376},{218,255,1044},{218,255,900},{204,255,272},{255,198,2017},{198,255,937},{189,255,562},{86,255,0},{255,227,2017},{86,255,0},{153,255,14754},{153,255,14754},{153,255,14754},
+{137,255,10742},{135,255,12066},{107,255,6089},{107,255,6089},{67,255,45},{37,255,7233},{1,255,1184},{218,255,900},{218,255,900},{218,255,900},{204,255,272},{255,167,1513},{189,255,562},{189,255,562},{86,255,0},{253,213,1513},{86,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{132,0,9248},{132,0,9248},{132,0,9248},{132,0,9248},{98,255,3656},
+{98,255,3656},{98,255,3656},{67,255,45},{1,255,1184},{1,255,1184},{138,255,65535},{107,255,33448},{95,255,19729},{89,255,13446},{135,255,62717},{95,255,22307},{79,255,6021},{73,255,105},{40,255,38959},{0,254,1627},{230,255,996},{224,255,756},{221,255,653},{213,255,194},{255,204,1473},{207,255,675},{198,255,405},{110,255,0},{255,230,1473},{110,255,0},{162,255,14060},{162,255,14060},{162,255,14060},{146,255,10545},{141,255,11378},{116,255,6077},{116,255,6077},
+{76,255,137},{40,255,6873},{7,255,1412},{221,255,653},{221,255,653},{221,255,653},{213,255,194},{255,180,1105},{198,255,405},{198,255,405},{110,255,0},{255,218,1105},{110,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{140,0,9248},{140,0,9248},{140,0,9248},{140,0,9248},{107,255,3929},{107,255,3929},{107,255,3929},{76,255,137},{7,255,1412},
+{7,255,1412},
diff --git a/libkram/transcoder/basisu_transcoder_tables_atc_55.inc b/libkram/transcoder/basisu_transcoder_tables_atc_55.inc
new file mode 100644
index 00000000..7acedd6a
--- /dev/null
+++ b/libkram/transcoder/basisu_transcoder_tables_atc_55.inc
@@ -0,0 +1,481 @@
+{0,2,20},{0,1,10},{0,1,1},{0,1,9},{0,1,35},{0,1,27},{0,1,18},{0,1,61},{0,1,52},{0,0,68},{0,2,20},{0,1,10},{0,1,1},{0,1,9},{0,1,35},{0,1,27},{0,1,18},{0,1,61},{1,0,35},{0,1,61},{0,1,1},{0,1,1},{0,1,1},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,1,1},
+{0,1,1},{0,1,1},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{1,0,18},{0,1,10},{0,1,1},{0,1,9},{1,0,18},{0,1,18},{0,1,9},{0,1,36},{0,1,18},{0,1,36},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,4,56},{0,3,38},{0,2,52},
+{0,2,36},{0,4,56},{0,3,35},{0,2,0},{0,2,52},{0,2,88},{0,1,78},{1,3,24},{1,2,14},{1,2,5},{1,2,13},{1,2,51},{0,3,35},{0,2,0},{0,2,52},{2,1,51},{0,2,52},{0,3,37},{0,3,37},{0,3,37},{0,2,36},{0,3,10},{0,2,0},{0,2,0},{0,1,5},{0,1,35},{0,1,14},{1,2,5},{1,2,5},{1,2,5},{1,1,8},{1,1,8},
+{0,2,0},{0,2,0},{0,1,5},{1,1,8},{0,1,5},{2,1,18},{0,3,2},{1,2,1},{0,2,0},{2,1,18},{1,2,18},{0,2,0},{0,2,36},{1,2,18},{0,2,36},{0,0,36},{0,0,36},{0,0,36},{0,0,36},{0,2,0},{0,2,0},{0,2,0},{0,1,1},{0,1,10},{0,1,10},{1,5,56},{1,4,38},{1,3,52},{1,3,36},{1,5,56},{1,4,35},{1,3,0},
+{1,3,52},{0,4,72},{0,3,38},{2,4,24},{2,3,14},{2,3,5},{2,3,13},{2,3,51},{0,4,24},{1,3,0},{0,3,37},{3,2,51},{0,3,37},{1,4,37},{1,4,37},{1,4,37},{1,3,36},{1,4,10},{1,3,0},{1,3,0},{1,2,5},{0,3,11},{1,2,14},{2,3,5},{2,3,5},{2,3,5},{2,2,8},{2,2,8},{1,3,0},{1,3,0},{1,2,5},{2,2,8},
+{1,2,5},{3,2,18},{1,4,2},{2,3,1},{1,3,0},{3,2,18},{7,0,18},{1,3,0},{0,3,36},{7,0,18},{0,3,36},{1,0,36},{1,0,36},{1,0,36},{1,0,36},{1,3,0},{1,3,0},{1,3,0},{1,2,1},{0,3,2},{0,3,2},{2,6,56},{2,5,38},{2,4,53},{2,4,37},{2,6,56},{2,5,35},{2,4,1},{2,4,66},{0,5,60},{1,4,52},{3,5,24},
+{3,4,14},{3,4,6},{3,4,14},{3,4,51},{1,5,24},{2,4,1},{1,4,51},{9,0,51},{1,4,51},{2,5,37},{2,5,37},{2,5,37},{2,4,36},{2,5,10},{2,4,0},{2,4,0},{2,3,5},{1,4,11},{2,3,14},{3,4,5},{3,4,5},{3,4,5},{3,3,8},{3,3,8},{2,4,0},{2,4,0},{2,3,5},{8,0,8},{2,3,5},{4,3,18},{2,5,2},{3,4,2},
+{2,4,1},{4,3,18},{8,1,18},{2,4,1},{0,4,50},{8,1,18},{0,4,50},{2,0,36},{2,0,36},{2,0,36},{2,0,36},{2,4,0},{2,4,0},{2,4,0},{2,3,1},{1,4,2},{1,4,2},{3,8,70},{3,6,58},{4,5,69},{3,5,51},{3,7,52},{3,6,25},{3,5,3},{3,5,46},{2,6,60},{2,5,36},{4,6,24},{4,5,14},{4,5,5},{4,5,13},{6,2,51},
+{3,6,24},{3,5,2},{2,5,36},{7,3,51},{2,5,36},{3,7,51},{3,7,51},{3,7,51},{3,5,51},{3,6,9},{3,5,3},{3,5,3},{3,4,9},{1,6,12},{3,4,12},{4,5,5},{4,5,5},{4,5,5},{4,4,8},{6,1,8},{3,5,2},{3,5,2},{3,4,8},{11,0,8},{3,4,8},{7,1,18},{3,6,8},{4,5,1},{3,5,1},{7,1,18},{11,1,18},{3,5,1},
+{0,5,36},{11,1,18},{0,5,36},{3,0,50},{3,0,50},{3,0,50},{3,0,50},{3,5,2},{3,5,2},{3,5,2},{3,4,5},{2,5,0},{2,5,0},{4,8,56},{4,7,38},{4,6,52},{4,6,36},{4,8,56},{4,7,35},{4,6,0},{4,6,52},{3,7,60},{3,6,36},{5,7,24},{5,6,14},{5,6,5},{5,6,13},{8,1,51},{3,7,35},{4,6,0},{3,6,36},{6,5,51},
+{3,6,36},{4,7,37},{4,7,37},{4,7,37},{4,6,36},{4,7,10},{4,6,0},{4,6,0},{4,5,5},{2,7,12},{4,5,14},{5,6,5},{5,6,5},{5,6,5},{5,5,8},{8,0,8},{4,6,0},{4,6,0},{4,5,5},{5,5,8},{4,5,5},{9,0,18},{4,7,2},{5,6,1},{4,6,0},{9,0,18},{15,0,18},{4,6,0},{0,6,36},{15,0,18},{0,6,36},{4,0,36},
+{4,0,36},{4,0,36},{4,0,36},{4,6,0},{4,6,0},{4,6,0},{4,5,1},{3,6,0},{3,6,0},{5,9,56},{5,8,38},{5,7,52},{5,7,36},{5,9,56},{5,8,35},{5,7,0},{5,7,52},{3,8,63},{4,7,38},{6,8,24},{6,7,14},{6,7,5},{6,7,13},{9,2,51},{4,8,24},{5,7,0},{4,7,37},{17,0,51},{4,7,37},{5,8,37},{5,8,37},{5,8,37},
+{5,7,36},{5,8,10},{5,7,0},{5,7,0},{5,6,5},{4,7,11},{5,6,14},{6,7,5},{6,7,5},{6,7,5},{6,6,8},{9,1,8},{5,7,0},{5,7,0},{5,6,5},{16,0,8},{5,6,5},{10,1,18},{5,8,2},{6,7,1},{5,7,0},{10,1,18},{16,1,18},{5,7,0},{0,7,36},{16,1,18},{0,7,36},{5,0,36},{5,0,36},{5,0,36},{5,0,36},{5,7,0},
+{5,7,0},{5,7,0},{5,6,1},{4,7,2},{4,7,2},{6,10,56},{6,9,38},{6,8,53},{6,8,37},{6,10,56},{6,9,35},{6,8,1},{6,8,66},{4,9,60},{5,8,52},{7,9,24},{7,8,14},{7,8,6},{7,8,14},{10,3,51},{5,9,24},{6,8,1},{5,8,51},{18,1,51},{5,8,51},{6,9,37},{6,9,37},{6,9,37},{6,8,36},{6,9,10},{6,8,0},{6,8,0},
+{6,7,5},{5,8,11},{6,7,14},{7,8,5},{7,8,5},{7,8,5},{7,7,8},{10,2,8},{6,8,0},{6,8,0},{6,7,5},{17,1,8},{6,7,5},{11,2,18},{6,9,2},{7,8,2},{6,8,1},{11,2,18},{17,2,18},{6,8,1},{0,8,50},{17,2,18},{0,8,50},{6,0,36},{6,0,36},{6,0,36},{6,0,36},{6,8,0},{6,8,0},{6,8,0},{6,7,1},{5,8,2},
+{5,8,2},{7,12,70},{7,10,58},{8,9,69},{7,9,51},{7,11,52},{7,10,25},{7,9,3},{7,9,46},{6,10,60},{6,9,36},{8,10,24},{8,9,14},{8,9,5},{8,9,13},{13,1,51},{7,10,24},{7,9,2},{6,9,36},{21,1,51},{6,9,36},{7,11,51},{7,11,51},{7,11,51},{7,9,51},{7,10,9},{7,9,3},{7,9,3},{7,8,9},{5,10,12},{7,8,12},{8,9,5},
+{8,9,5},{8,9,5},{8,8,8},{13,0,8},{7,9,2},{7,9,2},{7,8,8},{20,1,8},{7,8,8},{14,0,18},{7,10,8},{8,9,1},{7,9,1},{14,0,18},{20,2,18},{7,9,1},{0,9,36},{20,2,18},{0,9,36},{7,0,50},{7,0,50},{7,0,50},{7,0,50},{7,9,2},{7,9,2},{7,9,2},{7,8,5},{6,9,0},{6,9,0},{8,12,56},{8,11,38},{8,10,52},
+{8,10,36},{8,12,56},{8,11,35},{8,10,0},{8,10,52},{7,11,60},{7,10,36},{9,11,24},{9,10,14},{9,10,5},{9,10,13},{14,2,51},{7,11,35},{8,10,0},{7,10,36},{25,0,51},{7,10,36},{8,11,37},{8,11,37},{8,11,37},{8,10,36},{8,11,10},{8,10,0},{8,10,0},{8,9,5},{6,11,12},{8,9,14},{9,10,5},{9,10,5},{9,10,5},{9,9,8},{14,1,8},
+{8,10,0},{8,10,0},{8,9,5},{24,0,8},{8,9,5},{15,1,18},{8,11,2},{9,10,1},{8,10,0},{15,1,18},{24,1,18},{8,10,0},{0,10,36},{24,1,18},{0,10,36},{8,0,36},{8,0,36},{8,0,36},{8,0,36},{8,10,0},{8,10,0},{8,10,0},{8,9,1},{7,10,0},{7,10,0},{9,13,56},{9,12,38},{9,11,52},{9,11,36},{9,13,56},{9,12,35},{9,11,0},
+{9,11,52},{7,12,63},{8,11,38},{10,12,24},{10,11,14},{10,11,5},{10,11,13},{16,1,51},{8,12,24},{9,11,0},{8,11,37},{26,1,51},{8,11,37},{9,12,37},{9,12,37},{9,12,37},{9,11,36},{9,12,10},{9,11,0},{9,11,0},{9,10,5},{8,11,11},{9,10,14},{10,11,5},{10,11,5},{10,11,5},{10,10,8},{16,0,8},{9,11,0},{9,11,0},{9,10,5},{25,1,8},
+{9,10,5},{17,0,18},{9,12,2},{10,11,1},{9,11,0},{17,0,18},{25,2,18},{9,11,0},{0,11,36},{25,2,18},{0,11,36},{9,0,36},{9,0,36},{9,0,36},{9,0,36},{9,11,0},{9,11,0},{9,11,0},{9,10,1},{8,11,2},{8,11,2},{10,14,56},{10,13,38},{10,12,53},{10,12,37},{10,14,56},{10,13,35},{10,12,1},{10,12,66},{8,13,60},{9,12,52},{11,13,24},
+{11,12,14},{11,12,6},{11,12,14},{17,2,51},{9,13,24},{10,12,1},{9,12,51},{27,2,51},{9,12,51},{10,13,37},{10,13,37},{10,13,37},{10,12,36},{10,13,10},{10,12,0},{10,12,0},{10,11,5},{9,12,11},{10,11,14},{11,12,5},{11,12,5},{11,12,5},{11,11,8},{17,1,8},{10,12,0},{10,12,0},{10,11,5},{26,2,8},{10,11,5},{18,1,18},{10,13,2},{11,12,2},
+{10,12,1},{18,1,18},{31,0,18},{10,12,1},{0,12,50},{31,0,18},{0,12,50},{10,0,36},{10,0,36},{10,0,36},{10,0,36},{10,12,0},{10,12,0},{10,12,0},{10,11,1},{9,12,2},{9,12,2},{11,16,70},{11,14,58},{12,13,69},{11,13,51},{11,15,52},{11,14,25},{11,13,3},{11,13,46},{10,14,60},{10,13,36},{12,14,24},{12,13,14},{12,13,5},{12,13,13},{17,5,51},
+{11,14,24},{11,13,2},{10,13,36},{30,2,51},{10,13,36},{11,15,51},{11,15,51},{11,15,51},{11,13,51},{11,14,9},{11,13,3},{11,13,3},{11,12,9},{9,14,12},{11,12,12},{12,13,5},{12,13,5},{12,13,5},{12,12,8},{17,4,8},{11,13,2},{11,13,2},{11,12,8},{29,2,8},{11,12,8},{18,4,18},{11,14,8},{12,13,1},{11,13,1},{18,4,18},{29,3,18},{11,13,1},
+{0,13,36},{29,3,18},{0,13,36},{11,0,50},{11,0,50},{11,0,50},{11,0,50},{11,13,2},{11,13,2},{11,13,2},{11,12,5},{10,13,0},{10,13,0},{12,16,56},{12,15,38},{12,14,52},{12,14,36},{12,16,56},{12,15,35},{12,14,0},{12,14,52},{11,15,60},{11,14,36},{13,15,24},{13,14,14},{13,14,5},{13,14,13},{18,6,51},{11,15,35},{12,14,0},{11,14,36},{31,3,51},
+{11,14,36},{12,15,37},{12,15,37},{12,15,37},{12,14,36},{12,15,10},{12,14,0},{12,14,0},{12,13,5},{10,15,12},{12,13,14},{13,14,5},{13,14,5},{13,14,5},{13,13,8},{18,5,8},{12,14,0},{12,14,0},{12,13,5},{30,3,8},{12,13,5},{20,3,18},{12,15,2},{13,14,1},{12,14,0},{20,3,18},{28,5,18},{12,14,0},{0,14,36},{28,5,18},{0,14,36},{12,0,36},
+{12,0,36},{12,0,36},{12,0,36},{12,14,0},{12,14,0},{12,14,0},{12,13,1},{11,14,0},{11,14,0},{13,17,56},{13,16,38},{13,15,52},{13,15,36},{13,17,56},{13,16,35},{13,15,0},{13,15,52},{11,16,63},{12,15,38},{14,16,24},{14,15,14},{14,15,5},{14,15,13},{23,0,51},{12,16,24},{13,15,0},{12,15,37},{30,5,51},{12,15,37},{13,16,37},{13,16,37},{13,16,37},
+{13,15,36},{13,16,10},{13,15,0},{13,15,0},{13,14,5},{12,15,11},{13,14,14},{14,15,5},{14,15,5},{14,15,5},{14,14,8},{20,4,8},{13,15,0},{13,15,0},{13,14,5},{29,5,8},{13,14,5},{21,4,18},{13,16,2},{14,15,1},{13,15,0},{21,4,18},{29,6,18},{13,15,0},{0,15,36},{29,6,18},{0,15,36},{13,0,36},{13,0,36},{13,0,36},{13,0,36},{13,15,0},
+{13,15,0},{13,15,0},{13,14,1},{12,15,2},{12,15,2},{14,18,56},{14,17,38},{14,16,53},{14,16,37},{14,18,56},{14,17,35},{14,16,1},{14,16,66},{12,17,60},{13,16,52},{15,17,24},{15,16,14},{15,16,6},{15,16,14},{24,1,51},{13,17,24},{14,16,1},{13,16,51},{31,6,51},{13,16,51},{14,17,37},{14,17,37},{14,17,37},{14,16,36},{14,17,10},{14,16,0},{14,16,0},
+{14,15,5},{13,16,11},{14,15,14},{15,16,5},{15,16,5},{15,16,5},{15,15,8},{24,0,8},{14,16,0},{14,16,0},{14,15,5},{30,6,8},{14,15,5},{25,0,18},{14,17,2},{15,16,2},{14,16,1},{25,0,18},{30,7,18},{14,16,1},{0,16,50},{30,7,18},{0,16,50},{14,0,36},{14,0,36},{14,0,36},{14,0,36},{14,16,0},{14,16,0},{14,16,0},{14,15,1},{13,16,2},
+{13,16,2},{15,20,70},{15,18,58},{16,17,69},{15,17,51},{15,19,52},{15,18,25},{15,17,3},{15,17,46},{14,18,60},{14,17,36},{16,18,24},{16,17,14},{16,17,5},{16,17,13},{21,9,51},{15,18,24},{15,17,2},{14,17,36},{29,9,51},{14,17,36},{15,19,51},{15,19,51},{15,19,51},{15,17,51},{15,18,9},{15,17,3},{15,17,3},{15,16,9},{13,18,12},{15,16,12},{16,17,5},
+{16,17,5},{16,17,5},{16,16,8},{24,3,8},{15,17,2},{15,17,2},{15,16,8},{28,9,8},{15,16,8},{25,3,18},{15,18,8},{16,17,1},{15,17,1},{25,3,18},{28,10,18},{15,17,1},{0,17,36},{28,10,18},{0,17,36},{15,0,50},{15,0,50},{15,0,50},{15,0,50},{15,17,2},{15,17,2},{15,17,2},{15,16,5},{14,17,0},{14,17,0},{16,20,56},{16,19,38},{16,18,52},
+{16,18,36},{16,20,56},{16,19,35},{16,18,0},{16,18,52},{15,19,60},{15,18,36},{17,19,24},{17,18,14},{17,18,5},{17,18,13},{22,10,51},{15,19,35},{16,18,0},{15,18,36},{30,10,51},{15,18,36},{16,19,37},{16,19,37},{16,19,37},{16,18,36},{16,19,10},{16,18,0},{16,18,0},{16,17,5},{14,19,12},{16,17,14},{17,18,5},{17,18,5},{17,18,5},{17,17,8},{22,9,8},
+{16,18,0},{16,18,0},{16,17,5},{29,10,8},{16,17,5},{24,7,18},{16,19,2},{17,18,1},{16,18,0},{24,7,18},{29,11,18},{16,18,0},{0,18,36},{29,11,18},{0,18,36},{16,0,36},{16,0,36},{16,0,36},{16,0,36},{16,18,0},{16,18,0},{16,18,0},{16,17,1},{15,18,0},{15,18,0},{17,21,56},{17,20,38},{17,19,52},{17,19,36},{17,21,56},{17,20,35},{17,19,0},
+{17,19,52},{15,20,63},{16,19,38},{18,20,24},{18,19,14},{18,19,5},{18,19,13},{27,4,51},{16,20,24},{17,19,0},{16,19,37},{31,11,51},{16,19,37},{17,20,37},{17,20,37},{17,20,37},{17,19,36},{17,20,10},{17,19,0},{17,19,0},{17,18,5},{16,19,11},{17,18,14},{18,19,5},{18,19,5},{18,19,5},{18,18,8},{24,8,8},{17,19,0},{17,19,0},{17,18,5},{30,11,8},
+{17,18,5},{28,3,18},{17,20,2},{18,19,1},{17,19,0},{28,3,18},{28,13,18},{17,19,0},{0,19,36},{28,13,18},{0,19,36},{17,0,36},{17,0,36},{17,0,36},{17,0,36},{17,19,0},{17,19,0},{17,19,0},{17,18,1},{16,19,2},{16,19,2},{18,22,56},{18,21,38},{18,20,53},{18,20,37},{18,22,56},{18,21,35},{18,20,1},{18,20,66},{16,21,60},{17,20,52},{19,21,24},
+{19,20,14},{19,20,6},{19,20,14},{31,0,51},{17,21,24},{18,20,1},{17,20,51},{30,13,51},{17,20,51},{18,21,37},{18,21,37},{18,21,37},{18,20,36},{18,21,10},{18,20,0},{18,20,0},{18,19,5},{17,20,11},{18,19,14},{19,20,5},{19,20,5},{19,20,5},{19,19,8},{28,4,8},{18,20,0},{18,20,0},{18,19,5},{29,13,8},{18,19,5},{29,4,18},{18,21,2},{19,20,2},
+{18,20,1},{29,4,18},{29,14,18},{18,20,1},{0,20,50},{29,14,18},{0,20,50},{18,0,36},{18,0,36},{18,0,36},{18,0,36},{18,20,0},{18,20,0},{18,20,0},{18,19,1},{17,20,2},{17,20,2},{19,24,70},{19,22,58},{20,21,69},{19,21,51},{19,23,52},{19,22,25},{19,21,3},{19,21,46},{18,22,60},{18,21,36},{20,22,24},{20,21,14},{20,21,5},{20,21,13},{31,3,51},
+{19,22,24},{19,21,2},{18,21,36},{23,19,51},{18,21,36},{19,23,51},{19,23,51},{19,23,51},{19,21,51},{19,22,9},{19,21,3},{19,21,3},{19,20,9},{17,22,12},{19,20,12},{20,21,5},{20,21,5},{20,21,5},{20,20,8},{31,2,8},{19,21,2},{19,21,2},{19,20,8},{27,16,8},{19,20,8},{29,7,18},{19,22,8},{20,21,1},{19,21,1},{29,7,18},{27,17,18},{19,21,1},
+{0,21,36},{27,17,18},{0,21,36},{19,0,50},{19,0,50},{19,0,50},{19,0,50},{19,21,2},{19,21,2},{19,21,2},{19,20,5},{18,21,0},{18,21,0},{20,24,56},{20,23,38},{20,22,52},{20,22,36},{20,24,56},{20,23,35},{20,22,0},{20,22,52},{19,23,60},{19,22,36},{21,23,24},{21,22,14},{21,22,5},{21,22,13},{26,14,51},{19,23,35},{20,22,0},{19,22,36},{22,21,51},
+{19,22,36},{20,23,37},{20,23,37},{20,23,37},{20,22,36},{20,23,10},{20,22,0},{20,22,0},{20,21,5},{18,23,12},{20,21,14},{21,22,5},{21,22,5},{21,22,5},{21,21,8},{26,13,8},{20,22,0},{20,22,0},{20,21,5},{21,21,8},{20,21,5},{28,11,18},{20,23,2},{21,22,1},{20,22,0},{28,11,18},{31,16,18},{20,22,0},{0,22,36},{31,16,18},{0,22,36},{20,0,36},
+{20,0,36},{20,0,36},{20,0,36},{20,22,0},{20,22,0},{20,22,0},{20,21,1},{19,22,0},{19,22,0},{21,25,56},{21,24,38},{21,23,52},{21,23,36},{21,25,56},{21,24,35},{21,23,0},{21,23,52},{19,24,63},{20,23,38},{22,24,24},{22,23,14},{22,23,5},{22,23,13},{31,8,51},{20,24,24},{21,23,0},{20,23,37},{28,19,51},{20,23,37},{21,24,37},{21,24,37},{21,24,37},
+{21,23,36},{21,24,10},{21,23,0},{21,23,0},{21,22,5},{20,23,11},{21,22,14},{22,23,5},{22,23,5},{22,23,5},{22,22,8},{28,12,8},{21,23,0},{21,23,0},{21,22,5},{22,22,8},{21,22,5},{29,12,18},{21,24,2},{22,23,1},{21,23,0},{29,12,18},{27,20,18},{21,23,0},{0,23,36},{27,20,18},{0,23,36},{21,0,36},{21,0,36},{21,0,36},{21,0,36},{21,23,0},
+{21,23,0},{21,23,0},{21,22,1},{20,23,2},{20,23,2},{22,26,56},{22,25,38},{22,24,53},{22,24,37},{22,26,56},{22,25,35},{22,24,1},{22,24,66},{20,25,60},{21,24,52},{23,25,24},{23,24,14},{23,24,6},{23,24,14},{29,14,51},{21,25,24},{22,24,1},{21,24,51},{29,20,51},{21,24,51},{22,25,37},{22,25,37},{22,25,37},{22,24,36},{22,25,10},{22,24,0},{22,24,0},
+{22,23,5},{21,24,11},{22,23,14},{23,24,5},{23,24,5},{23,24,5},{23,23,8},{29,13,8},{22,24,0},{22,24,0},{22,23,5},{28,20,8},{22,23,5},{30,13,18},{22,25,2},{23,24,2},{22,24,1},{30,13,18},{28,21,18},{22,24,1},{0,24,50},{28,21,18},{0,24,50},{22,0,36},{22,0,36},{22,0,36},{22,0,36},{22,24,0},{22,24,0},{22,24,0},{22,23,1},{21,24,2},
+{21,24,2},{23,28,70},{23,26,58},{24,25,69},{23,25,51},{23,27,52},{23,26,25},{23,25,3},{23,25,46},{22,26,60},{22,25,36},{24,26,24},{24,25,14},{24,25,5},{24,25,13},{29,17,51},{23,26,24},{23,25,2},{22,25,36},{27,23,51},{22,25,36},{23,27,51},{23,27,51},{23,27,51},{23,25,51},{23,26,9},{23,25,3},{23,25,3},{23,24,9},{21,26,12},{23,24,12},{24,25,5},
+{24,25,5},{24,25,5},{24,24,8},{29,16,8},{23,25,2},{23,25,2},{23,24,8},{31,20,8},{23,24,8},{30,16,18},{23,26,8},{24,25,1},{23,25,1},{30,16,18},{31,21,18},{23,25,1},{0,25,36},{31,21,18},{0,25,36},{23,0,50},{23,0,50},{23,0,50},{23,0,50},{23,25,2},{23,25,2},{23,25,2},{23,24,5},{22,25,0},{22,25,0},{24,28,56},{24,27,38},{24,26,52},
+{24,26,36},{24,28,56},{24,27,35},{24,26,0},{24,26,52},{23,27,60},{23,26,36},{25,27,24},{25,26,14},{25,26,5},{25,26,13},{30,18,51},{23,27,35},{24,26,0},{23,26,36},{26,25,51},{23,26,36},{24,27,37},{24,27,37},{24,27,37},{24,26,36},{24,27,10},{24,26,0},{24,26,0},{24,25,5},{22,27,12},{24,25,14},{25,26,5},{25,26,5},{25,26,5},{25,25,8},{30,17,8},
+{24,26,0},{24,26,0},{24,25,5},{25,25,8},{24,25,5},{31,17,18},{24,27,2},{25,26,1},{24,26,0},{31,17,18},{25,26,18},{24,26,0},{0,26,36},{25,26,18},{0,26,36},{24,0,36},{24,0,36},{24,0,36},{24,0,36},{24,26,0},{24,26,0},{24,26,0},{24,25,1},{23,26,0},{23,26,0},{25,29,56},{25,28,38},{25,27,52},{25,27,36},{25,29,56},{25,28,35},{25,27,0},
+{25,27,52},{23,28,63},{24,27,38},{26,28,24},{26,27,14},{26,27,5},{26,27,13},{31,19,51},{24,28,24},{25,27,0},{24,27,37},{27,26,51},{24,27,37},{25,28,37},{25,28,37},{25,28,37},{25,27,36},{25,28,10},{25,27,0},{25,27,0},{25,26,5},{24,27,11},{25,26,14},{26,27,5},{26,27,5},{26,27,5},{26,26,8},{31,18,8},{25,27,0},{25,27,0},{25,26,5},{26,26,8},
+{25,26,5},{30,21,18},{25,28,2},{26,27,1},{25,27,0},{30,21,18},{31,24,18},{25,27,0},{0,27,36},{31,24,18},{0,27,36},{25,0,36},{25,0,36},{25,0,36},{25,0,36},{25,27,0},{25,27,0},{25,27,0},{25,26,1},{24,27,2},{24,27,2},{26,30,56},{26,29,38},{26,28,53},{26,28,37},{26,30,56},{26,29,35},{26,28,1},{26,28,66},{24,29,60},{25,28,52},{27,29,24},
+{27,28,14},{27,28,6},{27,28,14},{30,23,51},{25,29,24},{26,28,1},{25,28,51},{28,27,51},{25,28,51},{26,29,37},{26,29,37},{26,29,37},{26,28,36},{26,29,10},{26,28,0},{26,28,0},{26,27,5},{25,28,11},{26,27,14},{27,28,5},{27,28,5},{27,28,5},{27,27,8},{30,22,8},{26,28,0},{26,28,0},{26,27,5},{27,27,8},{26,27,5},{31,22,18},{26,29,2},{27,28,2},
+{26,28,1},{31,22,18},{27,28,18},{26,28,1},{0,28,50},{27,28,18},{0,28,50},{26,0,36},{26,0,36},{26,0,36},{26,0,36},{26,28,0},{26,28,0},{26,28,0},{26,27,1},{25,28,2},{25,28,2},{27,31,76},{27,30,58},{28,29,69},{27,29,51},{27,31,52},{27,30,25},{27,29,3},{27,29,46},{26,30,60},{26,29,36},{28,30,24},{28,29,14},{28,29,5},{28,29,13},{30,26,51},
+{27,30,24},{27,29,2},{26,29,36},{31,27,51},{26,29,36},{27,31,51},{27,31,51},{27,31,51},{27,29,51},{27,30,9},{27,29,3},{27,29,3},{27,28,9},{25,30,12},{27,28,12},{28,29,5},{28,29,5},{28,29,5},{28,28,8},{30,25,8},{27,29,2},{27,29,2},{27,28,8},{30,27,8},{27,28,8},{31,25,18},{27,30,8},{28,29,1},{27,29,1},{31,25,18},{28,29,18},{27,29,1},
+{0,29,36},{28,29,18},{0,29,36},{27,0,50},{27,0,50},{27,0,50},{27,0,50},{27,29,2},{27,29,2},{27,29,2},{27,28,5},{26,29,0},{26,29,0},{28,31,86},{28,31,38},{28,30,52},{28,30,36},{28,31,59},{28,31,35},{28,30,0},{28,30,52},{27,31,60},{27,30,36},{29,31,24},{29,30,14},{29,30,5},{29,30,13},{31,27,51},{27,31,35},{28,30,0},{27,30,36},{30,29,51},
+{27,30,36},{28,31,37},{28,31,37},{28,31,37},{28,30,36},{28,31,10},{28,30,0},{28,30,0},{28,29,5},{26,31,12},{28,29,14},{29,30,5},{29,30,5},{29,30,5},{29,29,8},{31,26,8},{28,30,0},{28,30,0},{28,29,5},{29,29,8},{28,29,5},{30,29,18},{28,31,2},{29,30,1},{28,30,0},{30,29,18},{29,30,18},{28,30,0},{0,30,36},{29,30,18},{0,30,36},{28,0,36},
+{28,0,36},{28,0,36},{28,0,36},{28,30,0},{28,30,0},{28,30,0},{28,29,1},{27,30,0},{27,30,0},{30,31,94},{30,31,78},{29,31,52},{29,31,36},{30,31,115},{29,31,36},{29,31,0},{29,31,52},{29,31,88},{28,31,38},{30,31,30},{30,31,14},{30,31,5},{30,31,13},{30,31,51},{29,31,36},{29,31,0},{28,31,37},{31,30,51},{28,31,37},{29,31,52},{29,31,52},{29,31,52},
+{29,31,36},{29,31,16},{29,31,0},{29,31,0},{29,30,5},{28,31,11},{29,30,14},{30,31,5},{30,31,5},{30,31,5},{30,30,8},{30,30,8},{29,31,0},{29,31,0},{29,30,5},{30,30,8},{29,30,5},{31,30,18},{30,31,10},{30,31,1},{29,31,0},{31,30,18},{30,31,18},{29,31,0},{0,31,36},{30,31,18},{0,31,36},{29,0,36},{29,0,36},{29,0,36},{29,0,36},{29,31,0},
+{29,31,0},{29,31,0},{29,30,1},{28,31,2},{28,31,2},{31,31,68},{31,31,68},{30,31,61},{30,31,45},{30,31,59},{30,31,27},{30,31,18},{30,31,1},{30,31,28},{30,31,10},{31,31,4},{31,31,4},{31,31,4},{31,31,4},{31,31,4},{31,31,4},{31,31,4},{30,31,1},{31,31,4},{30,31,1},{30,31,61},{30,31,61},{30,31,61},{30,31,45},{30,31,34},{30,31,18},{30,31,18},
+{30,31,1},{30,31,19},{30,31,10},{31,31,4},{31,31,4},{31,31,4},{31,31,4},{31,31,4},{31,31,4},{31,31,4},{30,31,1},{31,31,4},{30,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{30,0,36},{30,0,36},{30,0,36},{30,0,36},{30,31,9},{30,31,9},{30,31,9},{30,31,1},{30,31,10},
+{30,31,10},{0,4,74},{0,3,20},{0,2,2},{0,2,26},{0,2,158},{0,2,110},{0,2,62},{0,1,115},{0,1,178},{0,1,124},{0,4,74},{0,3,20},{0,2,2},{0,2,26},{1,1,154},{0,2,110},{0,2,62},{0,1,115},{1,1,154},{0,1,115},{0,2,1},{0,2,1},{0,2,1},{0,1,0},{0,1,13},{0,1,9},{0,1,9},{0,0,25},{0,0,25},{0,0,25},{0,2,1},
+{0,2,1},{0,2,1},{0,1,0},{0,1,13},{0,1,9},{0,1,9},{0,0,25},{1,0,13},{0,0,25},{1,2,72},{0,3,20},{0,2,2},{0,2,26},{1,2,72},{2,1,72},{0,2,26},{0,1,90},{2,1,72},{0,1,90},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,6,83},{0,5,13},{0,3,26},
+{0,3,14},{0,5,248},{0,3,140},{0,3,41},{0,2,139},{0,2,319},{0,2,175},{0,6,83},{0,5,13},{0,3,26},{0,3,14},{1,3,243},{0,3,140},{0,3,41},{0,2,139},{3,1,243},{0,2,139},{0,4,10},{0,4,10},{0,4,10},{0,3,13},{0,3,52},{0,2,18},{0,2,18},{0,1,29},{0,1,77},{0,1,38},{0,4,10},{0,4,10},{0,4,10},{0,3,13},{1,1,50},
+{0,2,18},{0,2,18},{0,1,29},{1,1,50},{0,1,29},{2,3,72},{0,5,4},{1,3,2},{0,3,5},{2,3,72},{3,2,72},{0,3,5},{0,2,90},{3,2,72},{0,2,90},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,1,0},{0,1,0},{0,1,0},{0,1,4},{0,0,9},{0,0,9},{1,7,147},{1,6,77},{1,4,89},{1,4,77},{0,7,244},{0,5,96},{0,4,2},
+{0,3,106},{0,4,395},{0,3,187},{1,7,83},{1,6,13},{1,4,25},{1,4,13},{2,4,243},{0,5,96},{0,4,2},{0,3,106},{4,2,243},{0,3,106},{1,5,74},{1,5,74},{1,5,74},{1,4,77},{0,5,52},{0,4,2},{0,4,2},{0,3,25},{0,3,133},{0,2,62},{1,5,10},{1,5,10},{1,5,10},{1,4,13},{2,2,50},{0,4,2},{0,4,2},{0,3,25},{2,2,50},
+{0,3,25},{3,4,72},{1,6,4},{2,4,1},{0,4,1},{3,4,72},{9,0,72},{0,4,1},{0,3,90},{9,0,72},{0,3,90},{1,0,73},{1,0,73},{1,0,73},{1,0,73},{0,4,1},{0,4,1},{0,4,1},{0,2,1},{0,2,37},{0,2,37},{2,8,164},{2,7,94},{2,5,106},{2,5,94},{1,8,245},{1,6,97},{1,5,3},{1,4,97},{0,6,364},{0,4,106},{2,8,83},
+{2,7,13},{2,5,25},{2,5,13},{6,0,243},{0,7,76},{1,5,2},{0,4,81},{10,0,243},{0,4,81},{2,6,91},{2,6,91},{2,6,91},{2,5,94},{1,6,53},{1,5,3},{1,5,3},{1,3,27},{0,4,107},{0,4,42},{2,6,10},{2,6,10},{2,6,10},{2,5,13},{3,3,50},{1,5,2},{1,5,2},{0,4,17},{8,0,50},{0,4,17},{7,0,72},{2,7,4},{3,5,1},
+{1,5,1},{7,0,72},{10,1,72},{1,5,1},{0,4,80},{10,1,72},{0,4,80},{2,0,90},{2,0,90},{2,0,90},{2,0,90},{1,5,2},{1,5,2},{1,5,2},{1,3,2},{0,3,20},{0,3,20},{3,10,154},{3,8,81},{3,6,106},{3,6,82},{2,10,248},{2,7,99},{2,6,5},{2,5,99},{0,7,308},{0,5,100},{3,10,90},{3,8,17},{4,6,27},{3,6,18},{6,3,243},
+{1,8,73},{2,6,5},{1,5,90},{8,3,243},{1,5,90},{3,8,81},{3,8,81},{3,8,81},{3,6,81},{2,8,52},{2,6,4},{2,6,4},{2,5,18},{0,6,72},{0,5,19},{3,8,17},{3,8,17},{3,8,17},{3,6,17},{6,1,50},{2,6,4},{2,6,4},{1,5,9},{11,0,50},{1,5,9},{8,1,72},{3,8,1},{4,6,2},{3,6,2},{8,1,72},{6,5,72},{3,6,2},
+{0,5,90},{6,5,72},{0,5,90},{3,0,80},{3,0,80},{3,0,80},{3,0,80},{2,6,0},{2,6,0},{2,6,0},{2,4,4},{0,6,8},{0,6,8},{4,10,164},{4,9,94},{4,7,107},{4,7,95},{3,11,248},{3,8,89},{3,7,5},{3,6,99},{0,9,253},{1,6,100},{4,10,83},{4,9,13},{4,7,26},{4,7,14},{8,2,243},{2,9,73},{3,7,5},{2,6,90},{12,2,243},
+{2,6,90},{4,8,91},{4,8,91},{4,8,91},{4,7,94},{3,9,52},{3,7,4},{3,7,4},{3,6,18},{0,8,50},{1,6,19},{4,8,10},{4,8,10},{4,8,10},{4,7,13},{8,0,50},{3,7,4},{3,7,4},{2,6,9},{5,5,50},{2,6,9},{9,2,72},{4,9,4},{5,7,2},{3,7,5},{9,2,72},{17,0,72},{3,7,5},{0,6,90},{17,0,72},{0,6,90},{4,0,90},
+{4,0,90},{4,0,90},{4,0,90},{3,7,0},{3,7,0},{3,7,0},{3,5,4},{1,7,8},{1,7,8},{5,11,164},{5,10,94},{5,8,106},{5,8,94},{4,11,245},{4,9,97},{4,8,3},{4,7,107},{0,11,249},{2,7,100},{5,11,83},{5,10,13},{5,8,25},{5,8,13},{9,3,243},{3,10,73},{4,8,2},{3,7,90},{18,0,243},{3,7,90},{5,9,91},{5,9,91},{5,9,91},
+{5,8,94},{4,9,53},{4,8,3},{4,8,3},{4,7,26},{1,9,50},{2,7,19},{5,9,10},{5,9,10},{5,9,10},{5,8,13},{9,1,50},{4,8,2},{4,8,2},{3,7,9},{16,0,50},{3,7,9},{10,3,72},{5,10,4},{6,8,1},{4,8,1},{10,3,72},{18,1,72},{4,8,1},{0,7,90},{18,1,72},{0,7,90},{5,0,90},{5,0,90},{5,0,90},{5,0,90},{4,8,2},
+{4,8,2},{4,8,2},{4,6,2},{1,8,9},{1,8,9},{6,12,164},{6,11,94},{6,9,106},{6,9,94},{5,12,245},{5,10,97},{5,9,3},{5,8,97},{1,12,252},{3,8,85},{6,12,83},{6,11,13},{6,9,25},{6,9,13},{10,4,243},{4,11,76},{5,9,2},{4,8,81},{19,1,243},{4,8,81},{6,10,91},{6,10,91},{6,10,91},{6,9,94},{5,10,53},{5,9,3},{5,9,3},
+{5,7,27},{2,10,50},{3,8,21},{6,10,10},{6,10,10},{6,10,10},{6,9,13},{10,2,50},{5,9,2},{5,9,2},{4,8,17},{17,1,50},{4,8,17},{11,4,72},{6,11,4},{7,9,1},{5,9,1},{11,4,72},{19,2,72},{5,9,1},{0,8,80},{19,2,72},{0,8,80},{6,0,90},{6,0,90},{6,0,90},{6,0,90},{5,9,2},{5,9,2},{5,9,2},{5,7,2},{3,8,5},
+{3,8,5},{7,14,154},{7,12,81},{7,10,106},{7,10,82},{6,14,248},{6,11,99},{6,10,5},{6,9,99},{2,13,244},{4,9,100},{7,14,90},{7,12,17},{8,10,27},{7,10,18},{13,2,243},{5,12,73},{6,10,5},{5,9,90},{22,1,243},{5,9,90},{7,12,81},{7,12,81},{7,12,81},{7,10,81},{6,12,52},{6,10,4},{6,10,4},{6,9,18},{3,11,53},{4,9,19},{7,12,17},
+{7,12,17},{7,12,17},{7,10,17},{13,0,50},{6,10,4},{6,10,4},{5,9,9},{20,1,50},{5,9,9},{14,2,72},{7,12,1},{8,10,2},{7,10,2},{14,2,72},{25,0,72},{7,10,2},{0,9,90},{25,0,72},{0,9,90},{7,0,80},{7,0,80},{7,0,80},{7,0,80},{6,10,0},{6,10,0},{6,10,0},{6,8,4},{4,10,8},{4,10,8},{8,14,164},{8,13,94},{8,11,107},
+{8,11,95},{7,15,248},{7,12,89},{7,11,5},{7,10,99},{3,14,244},{5,10,100},{8,14,83},{8,13,13},{8,11,26},{8,11,14},{14,3,243},{6,13,73},{7,11,5},{6,10,90},{26,0,243},{6,10,90},{8,12,91},{8,12,91},{8,12,91},{8,11,94},{7,13,52},{7,11,4},{7,11,4},{7,10,18},{4,12,50},{5,10,19},{8,12,10},{8,12,10},{8,12,10},{8,11,13},{14,1,50},
+{7,11,4},{7,11,4},{6,10,9},{24,0,50},{6,10,9},{16,1,72},{8,13,4},{9,11,2},{7,11,5},{16,1,72},{26,1,72},{7,11,5},{0,10,90},{26,1,72},{0,10,90},{8,0,90},{8,0,90},{8,0,90},{8,0,90},{7,11,0},{7,11,0},{7,11,0},{7,9,4},{5,11,8},{5,11,8},{9,15,164},{9,14,94},{9,12,106},{9,12,94},{8,15,245},{8,13,97},{8,12,3},
+{8,11,107},{4,15,249},{6,11,100},{9,15,83},{9,14,13},{9,12,25},{9,12,13},{16,2,243},{7,14,73},{8,12,2},{7,11,90},{27,1,243},{7,11,90},{9,13,91},{9,13,91},{9,13,91},{9,12,94},{8,13,53},{8,12,3},{8,12,3},{8,11,26},{5,13,50},{6,11,19},{9,13,10},{9,13,10},{9,13,10},{9,12,13},{16,0,50},{8,12,2},{8,12,2},{7,11,9},{25,1,50},
+{7,11,9},{17,2,72},{9,14,4},{10,12,1},{8,12,1},{17,2,72},{27,2,72},{8,12,1},{0,11,90},{27,2,72},{0,11,90},{9,0,90},{9,0,90},{9,0,90},{9,0,90},{8,12,2},{8,12,2},{8,12,2},{8,10,2},{5,12,9},{5,12,9},{10,16,164},{10,15,94},{10,13,106},{10,13,94},{9,16,245},{9,14,97},{9,13,3},{9,12,97},{5,16,252},{7,12,85},{10,16,83},
+{10,15,13},{10,13,25},{10,13,13},{17,3,243},{8,15,76},{9,13,2},{8,12,81},{28,2,243},{8,12,81},{10,14,91},{10,14,91},{10,14,91},{10,13,94},{9,14,53},{9,13,3},{9,13,3},{9,11,27},{6,14,50},{7,12,21},{10,14,10},{10,14,10},{10,14,10},{10,13,13},{17,1,50},{9,13,2},{9,13,2},{8,12,17},{26,2,50},{8,12,17},{18,3,72},{10,15,4},{11,13,1},
+{9,13,1},{18,3,72},{28,3,72},{9,13,1},{0,12,80},{28,3,72},{0,12,80},{10,0,90},{10,0,90},{10,0,90},{10,0,90},{9,13,2},{9,13,2},{9,13,2},{9,11,2},{7,12,5},{7,12,5},{11,18,154},{11,16,81},{11,14,106},{11,14,82},{10,18,248},{10,15,99},{10,14,5},{10,13,99},{6,17,244},{8,13,100},{11,18,90},{11,16,17},{12,14,27},{11,14,18},{17,6,243},
+{9,16,73},{10,14,5},{9,13,90},{31,2,243},{9,13,90},{11,16,81},{11,16,81},{11,16,81},{11,14,81},{10,16,52},{10,14,4},{10,14,4},{10,13,18},{7,15,53},{8,13,19},{11,16,17},{11,16,17},{11,16,17},{11,14,17},{17,4,50},{10,14,4},{10,14,4},{9,13,9},{29,2,50},{9,13,9},{18,6,72},{11,16,1},{12,14,2},{11,14,2},{18,6,72},{31,3,72},{11,14,2},
+{0,13,90},{31,3,72},{0,13,90},{11,0,80},{11,0,80},{11,0,80},{11,0,80},{10,14,0},{10,14,0},{10,14,0},{10,12,4},{8,14,8},{8,14,8},{12,18,164},{12,17,94},{12,15,107},{12,15,95},{11,19,248},{11,16,89},{11,15,5},{11,14,99},{7,18,244},{9,14,100},{12,18,83},{12,17,13},{12,15,26},{12,15,14},{22,0,243},{10,17,73},{11,15,5},{10,14,90},{30,4,243},
+{10,14,90},{12,16,91},{12,16,91},{12,16,91},{12,15,94},{11,17,52},{11,15,4},{11,15,4},{11,14,18},{8,16,50},{9,14,19},{12,16,10},{12,16,10},{12,16,10},{12,15,13},{18,5,50},{11,15,4},{11,15,4},{10,14,9},{30,3,50},{10,14,9},{23,0,72},{12,17,4},{13,15,2},{11,15,5},{23,0,72},{30,5,72},{11,15,5},{0,14,90},{30,5,72},{0,14,90},{12,0,90},
+{12,0,90},{12,0,90},{12,0,90},{11,15,0},{11,15,0},{11,15,0},{11,13,4},{9,15,8},{9,15,8},{13,19,164},{13,18,94},{13,16,106},{13,16,94},{12,19,245},{12,17,97},{12,16,3},{12,15,107},{8,19,249},{10,15,100},{13,19,83},{13,18,13},{13,16,25},{13,16,13},{23,1,243},{11,18,73},{12,16,2},{11,15,90},{31,5,243},{11,15,90},{13,17,91},{13,17,91},{13,17,91},
+{13,16,94},{12,17,53},{12,16,3},{12,16,3},{12,15,26},{9,17,50},{10,15,19},{13,17,10},{13,17,10},{13,17,10},{13,16,13},{20,4,50},{12,16,2},{12,16,2},{11,15,9},{29,5,50},{11,15,9},{24,1,72},{13,18,4},{14,16,1},{12,16,1},{24,1,72},{31,6,72},{12,16,1},{0,15,90},{31,6,72},{0,15,90},{13,0,90},{13,0,90},{13,0,90},{13,0,90},{12,16,2},
+{12,16,2},{12,16,2},{12,14,2},{9,16,9},{9,16,9},{14,20,164},{14,19,94},{14,17,106},{14,17,94},{13,20,245},{13,18,97},{13,17,3},{13,16,97},{9,20,252},{11,16,85},{14,20,83},{14,19,13},{14,17,25},{14,17,13},{24,2,243},{12,19,76},{13,17,2},{12,16,81},{27,9,243},{12,16,81},{14,18,91},{14,18,91},{14,18,91},{14,17,94},{13,18,53},{13,17,3},{13,17,3},
+{13,15,27},{10,18,50},{11,16,21},{14,18,10},{14,18,10},{14,18,10},{14,17,13},{24,0,50},{13,17,2},{13,17,2},{12,16,17},{30,6,50},{12,16,17},{25,2,72},{14,19,4},{15,17,1},{13,17,1},{25,2,72},{27,10,72},{13,17,1},{0,16,80},{27,10,72},{0,16,80},{14,0,90},{14,0,90},{14,0,90},{14,0,90},{13,17,2},{13,17,2},{13,17,2},{13,15,2},{11,16,5},
+{11,16,5},{15,22,154},{15,20,81},{15,18,106},{15,18,82},{14,22,248},{14,19,99},{14,18,5},{14,17,99},{10,21,244},{12,17,100},{15,22,90},{15,20,17},{16,18,27},{15,18,18},{27,0,243},{13,20,73},{14,18,5},{13,17,90},{30,9,243},{13,17,90},{15,20,81},{15,20,81},{15,20,81},{15,18,81},{14,20,52},{14,18,4},{14,18,4},{14,17,18},{11,19,53},{12,17,19},{15,20,17},
+{15,20,17},{15,20,17},{15,18,17},{24,3,50},{14,18,4},{14,18,4},{13,17,9},{28,9,50},{13,17,9},{22,10,72},{15,20,1},{16,18,2},{15,18,2},{22,10,72},{30,10,72},{15,18,2},{0,17,90},{30,10,72},{0,17,90},{15,0,80},{15,0,80},{15,0,80},{15,0,80},{14,18,0},{14,18,0},{14,18,0},{14,16,4},{12,18,8},{12,18,8},{16,22,164},{16,21,94},{16,19,107},
+{16,19,95},{15,23,248},{15,20,89},{15,19,5},{15,18,99},{11,22,244},{13,18,100},{16,22,83},{16,21,13},{16,19,26},{16,19,14},{26,4,243},{14,21,73},{15,19,5},{14,18,90},{31,10,243},{14,18,90},{16,20,91},{16,20,91},{16,20,91},{16,19,94},{15,21,52},{15,19,4},{15,19,4},{15,18,18},{12,20,50},{13,18,19},{16,20,10},{16,20,10},{16,20,10},{16,19,13},{22,9,50},
+{15,19,4},{15,19,4},{14,18,9},{29,10,50},{14,18,9},{27,4,72},{16,21,4},{17,19,2},{15,19,5},{27,4,72},{31,11,72},{15,19,5},{0,18,90},{31,11,72},{0,18,90},{16,0,90},{16,0,90},{16,0,90},{16,0,90},{15,19,0},{15,19,0},{15,19,0},{15,17,4},{13,19,8},{13,19,8},{17,23,164},{17,22,94},{17,20,106},{17,20,94},{16,23,245},{16,21,97},{16,20,3},
+{16,19,107},{12,23,249},{14,19,100},{17,23,83},{17,22,13},{17,20,25},{17,20,13},{30,0,243},{15,22,73},{16,20,2},{15,19,90},{30,12,243},{15,19,90},{17,21,91},{17,21,91},{17,21,91},{17,20,94},{16,21,53},{16,20,3},{16,20,3},{16,19,26},{13,21,50},{14,19,19},{17,21,10},{17,21,10},{17,21,10},{17,20,13},{24,8,50},{16,20,2},{16,20,2},{15,19,9},{30,11,50},
+{15,19,9},{31,0,72},{17,22,4},{18,20,1},{16,20,1},{31,0,72},{30,13,72},{16,20,1},{0,19,90},{30,13,72},{0,19,90},{17,0,90},{17,0,90},{17,0,90},{17,0,90},{16,20,2},{16,20,2},{16,20,2},{16,18,2},{13,20,9},{13,20,9},{18,24,164},{18,23,94},{18,21,106},{18,21,94},{17,24,245},{17,22,97},{17,21,3},{17,20,97},{13,24,252},{15,20,85},{18,24,83},
+{18,23,13},{18,21,25},{18,21,13},{31,1,243},{16,23,76},{17,21,2},{16,20,81},{31,13,243},{16,20,81},{18,22,91},{18,22,91},{18,22,91},{18,21,94},{17,22,53},{17,21,3},{17,21,3},{17,19,27},{14,22,50},{15,20,21},{18,22,10},{18,22,10},{18,22,10},{18,21,13},{28,4,50},{17,21,2},{17,21,2},{16,20,17},{29,13,50},{16,20,17},{29,6,72},{18,23,4},{19,21,1},
+{17,21,1},{29,6,72},{31,14,72},{17,21,1},{0,20,80},{31,14,72},{0,20,80},{18,0,90},{18,0,90},{18,0,90},{18,0,90},{17,21,2},{17,21,2},{17,21,2},{17,19,2},{15,20,5},{15,20,5},{19,26,154},{19,24,81},{19,22,106},{19,22,82},{18,26,248},{18,23,99},{18,22,5},{18,21,99},{14,25,244},{16,21,100},{19,26,90},{19,24,17},{20,22,27},{19,22,18},{31,4,243},
+{17,24,73},{18,22,5},{17,21,90},{24,19,243},{17,21,90},{19,24,81},{19,24,81},{19,24,81},{19,22,81},{18,24,52},{18,22,4},{18,22,4},{18,21,18},{15,23,53},{16,21,19},{19,24,17},{19,24,17},{19,24,17},{19,22,17},{31,2,50},{18,22,4},{18,22,4},{17,21,9},{27,16,50},{17,21,9},{26,14,72},{19,24,1},{20,22,2},{19,22,2},{26,14,72},{22,21,72},{19,22,2},
+{0,21,90},{22,21,72},{0,21,90},{19,0,80},{19,0,80},{19,0,80},{19,0,80},{18,22,0},{18,22,0},{18,22,0},{18,20,4},{16,22,8},{16,22,8},{20,26,164},{20,25,94},{20,23,107},{20,23,95},{19,27,248},{19,24,89},{19,23,5},{19,22,99},{15,26,244},{17,22,100},{20,26,83},{20,25,13},{20,23,26},{20,23,14},{30,8,243},{18,25,73},{19,23,5},{18,22,90},{28,18,243},
+{18,22,90},{20,24,91},{20,24,91},{20,24,91},{20,23,94},{19,25,52},{19,23,4},{19,23,4},{19,22,18},{16,24,50},{17,22,19},{20,24,10},{20,24,10},{20,24,10},{20,23,13},{26,13,50},{19,23,4},{19,23,4},{18,22,9},{21,21,50},{18,22,9},{31,8,72},{20,25,4},{21,23,2},{19,23,5},{31,8,72},{28,19,72},{19,23,5},{0,22,90},{28,19,72},{0,22,90},{20,0,90},
+{20,0,90},{20,0,90},{20,0,90},{19,23,0},{19,23,0},{19,23,0},{19,21,4},{17,23,8},{17,23,8},{21,27,164},{21,26,94},{21,24,106},{21,24,94},{20,27,245},{20,25,97},{20,24,3},{20,23,107},{16,27,249},{18,23,100},{21,27,83},{21,26,13},{21,24,25},{21,24,13},{31,9,243},{19,26,73},{20,24,2},{19,23,90},{29,19,243},{19,23,90},{21,25,91},{21,25,91},{21,25,91},
+{21,24,94},{20,25,53},{20,24,3},{20,24,3},{20,23,26},{17,25,50},{18,23,19},{21,25,10},{21,25,10},{21,25,10},{21,24,13},{28,12,50},{20,24,2},{20,24,2},{19,23,9},{22,22,50},{19,23,9},{29,14,72},{21,26,4},{22,24,1},{20,24,1},{29,14,72},{29,20,72},{20,24,1},{0,23,90},{29,20,72},{0,23,90},{21,0,90},{21,0,90},{21,0,90},{21,0,90},{20,24,2},
+{20,24,2},{20,24,2},{20,22,2},{17,24,9},{17,24,9},{22,28,164},{22,27,94},{22,25,106},{22,25,94},{21,28,245},{21,26,97},{21,25,3},{21,24,97},{17,28,252},{19,24,85},{22,28,83},{22,27,13},{22,25,25},{22,25,13},{29,15,243},{20,27,76},{21,25,2},{20,24,81},{30,20,243},{20,24,81},{22,26,91},{22,26,91},{22,26,91},{22,25,94},{21,26,53},{21,25,3},{21,25,3},
+{21,23,27},{18,26,50},{19,24,21},{22,26,10},{22,26,10},{22,26,10},{22,25,13},{29,13,50},{21,25,2},{21,25,2},{20,24,17},{28,20,50},{20,24,17},{30,15,72},{22,27,4},{23,25,1},{21,25,1},{30,15,72},{30,21,72},{21,25,1},{0,24,80},{30,21,72},{0,24,80},{22,0,90},{22,0,90},{22,0,90},{22,0,90},{21,25,2},{21,25,2},{21,25,2},{21,23,2},{19,24,5},
+{19,24,5},{23,30,154},{23,28,81},{23,26,106},{23,26,82},{22,30,248},{22,27,99},{22,26,5},{22,25,99},{18,29,244},{20,25,100},{23,30,90},{23,28,17},{24,26,27},{23,26,18},{29,18,243},{21,28,73},{22,26,5},{21,25,90},{28,23,243},{21,25,90},{23,28,81},{23,28,81},{23,28,81},{23,26,81},{22,28,52},{22,26,4},{22,26,4},{22,25,18},{19,27,53},{20,25,19},{23,28,17},
+{23,28,17},{23,28,17},{23,26,17},{29,16,50},{22,26,4},{22,26,4},{21,25,9},{31,20,50},{21,25,9},{30,18,72},{23,28,1},{24,26,2},{23,26,2},{30,18,72},{26,25,72},{23,26,2},{0,25,90},{26,25,72},{0,25,90},{23,0,80},{23,0,80},{23,0,80},{23,0,80},{22,26,0},{22,26,0},{22,26,0},{22,24,4},{20,26,8},{20,26,8},{24,30,164},{24,29,94},{24,27,107},
+{24,27,95},{23,31,248},{23,28,89},{23,27,5},{23,26,99},{19,30,244},{21,26,100},{24,30,83},{24,29,13},{24,27,26},{24,27,14},{30,19,243},{22,29,73},{23,27,5},{22,26,90},{27,25,243},{22,26,90},{24,28,91},{24,28,91},{24,28,91},{24,27,94},{23,29,52},{23,27,4},{23,27,4},{23,26,18},{20,28,50},{21,26,19},{24,28,10},{24,28,10},{24,28,10},{24,27,13},{30,17,50},
+{23,27,4},{23,27,4},{22,26,9},{25,25,50},{22,26,9},{31,19,72},{24,29,4},{25,27,2},{23,27,5},{31,19,72},{27,26,72},{23,27,5},{0,26,90},{27,26,72},{0,26,90},{24,0,90},{24,0,90},{24,0,90},{24,0,90},{23,27,0},{23,27,0},{23,27,0},{23,25,4},{21,27,8},{21,27,8},{25,31,164},{25,30,94},{25,28,106},{25,28,94},{24,31,245},{24,29,97},{24,28,3},
+{24,27,107},{20,31,249},{22,27,100},{25,31,83},{25,30,13},{25,28,25},{25,28,13},{29,23,243},{23,30,73},{24,28,2},{23,27,90},{28,26,243},{23,27,90},{25,29,91},{25,29,91},{25,29,91},{25,28,94},{24,29,53},{24,28,3},{24,28,3},{24,27,26},{21,29,50},{22,27,19},{25,29,10},{25,29,10},{25,29,10},{25,28,13},{31,18,50},{24,28,2},{24,28,2},{23,27,9},{26,26,50},
+{23,27,9},{30,23,72},{25,30,4},{26,28,1},{24,28,1},{30,23,72},{28,27,72},{24,28,1},{0,27,90},{28,27,72},{0,27,90},{25,0,90},{25,0,90},{25,0,90},{25,0,90},{24,28,2},{24,28,2},{24,28,2},{24,26,2},{21,28,9},{21,28,9},{26,31,194},{26,31,94},{26,29,106},{26,29,94},{25,31,284},{25,30,97},{25,29,3},{25,28,97},{22,31,253},{23,28,85},{27,31,99},
+{26,31,13},{26,29,25},{26,29,13},{30,24,243},{24,31,76},{25,29,2},{24,28,81},{29,27,243},{24,28,81},{26,30,91},{26,30,91},{26,30,91},{26,29,94},{25,30,53},{25,29,3},{25,29,3},{25,27,27},{22,30,50},{23,28,21},{26,30,10},{26,30,10},{26,30,10},{26,29,13},{30,22,50},{25,29,2},{25,29,2},{24,28,17},{27,27,50},{24,28,17},{31,24,72},{26,31,4},{27,29,1},
+{25,29,1},{31,24,72},{24,31,72},{25,29,1},{0,28,80},{24,31,72},{0,28,80},{26,0,90},{26,0,90},{26,0,90},{26,0,90},{25,29,2},{25,29,2},{25,29,2},{25,27,2},{23,28,5},{23,28,5},{27,31,280},{27,31,120},{27,30,106},{27,30,82},{27,31,328},{26,31,99},{26,30,5},{26,29,99},{24,31,308},{24,29,100},{28,31,105},{28,31,45},{28,30,27},{27,30,18},{30,27,243},
+{26,31,99},{26,30,5},{25,29,90},{30,28,243},{25,29,90},{27,31,84},{27,31,84},{27,31,84},{27,30,81},{26,31,58},{26,30,4},{26,30,4},{26,29,18},{23,31,53},{24,29,19},{27,31,20},{27,31,20},{27,31,20},{27,30,17},{30,25,50},{26,30,4},{26,30,4},{25,29,9},{30,27,50},{25,29,9},{31,27,72},{28,31,20},{28,30,2},{27,30,2},{31,27,72},{30,29,72},{27,30,2},
+{0,29,90},{30,29,72},{0,29,90},{27,0,80},{27,0,80},{27,0,80},{27,0,80},{26,30,0},{26,30,0},{26,30,0},{26,28,4},{24,30,8},{24,30,8},{28,31,331},{28,31,187},{28,31,106},{28,31,94},{28,31,358},{27,31,173},{27,31,4},{27,30,82},{26,31,355},{25,30,83},{29,31,126},{29,31,62},{28,31,25},{28,31,13},{30,29,221},{28,31,121},{27,31,4},{26,30,73},{29,30,221},
+{26,30,73},{28,31,106},{28,31,106},{28,31,106},{28,31,94},{27,31,100},{27,31,4},{27,31,4},{27,30,18},{25,31,72},{25,30,19},{28,31,25},{28,31,25},{28,31,25},{28,31,13},{31,26,50},{27,31,4},{27,31,4},{26,30,9},{29,29,50},{26,30,9},{31,29,61},{29,31,37},{29,31,1},{27,31,4},{31,29,61},{31,30,61},{27,31,4},{0,30,73},{31,30,61},{0,30,73},{28,0,90},
+{28,0,90},{28,0,90},{28,0,90},{27,31,0},{27,31,0},{27,31,0},{27,29,4},{25,31,8},{25,31,8},{29,31,239},{29,31,175},{29,31,139},{29,31,99},{29,31,239},{28,31,122},{28,31,41},{28,31,26},{28,31,233},{26,31,19},{30,31,54},{30,31,38},{30,31,29},{29,31,18},{30,31,93},{29,31,54},{29,31,18},{27,31,9},{31,30,93},{27,31,9},{29,31,139},{29,31,139},{29,31,139},
+{29,31,99},{29,31,139},{28,31,41},{28,31,41},{28,31,26},{27,31,116},{26,31,19},{30,31,29},{30,31,29},{30,31,29},{29,31,18},{30,30,50},{29,31,18},{29,31,18},{27,31,9},{30,30,50},{27,31,9},{31,30,9},{31,31,9},{30,31,4},{30,31,0},{31,30,9},{30,31,9},{30,31,0},{0,31,9},{30,31,9},{0,31,9},{29,0,90},{29,0,90},{29,0,90},{29,0,90},{28,31,5},
+{28,31,5},{28,31,5},{28,30,2},{26,31,10},{26,31,10},{30,31,140},{30,31,124},{30,31,115},{30,31,99},{30,31,131},{29,31,98},{29,31,62},{29,31,2},{29,31,122},{28,31,20},{31,31,25},{31,31,25},{31,31,25},{30,31,18},{31,30,22},{30,31,18},{30,31,9},{29,31,1},{30,31,22},{29,31,1},{30,31,115},{30,31,115},{30,31,115},{30,31,99},{30,31,106},{29,31,62},{29,31,62},
+{29,31,2},{29,31,86},{28,31,20},{31,31,25},{31,31,25},{31,31,25},{30,31,18},{31,30,13},{30,31,9},{30,31,9},{29,31,1},{30,31,13},{29,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{30,0,90},{30,0,90},{30,0,90},{30,0,90},{29,31,26},{29,31,26},{29,31,26},{29,31,2},{28,31,20},
+{28,31,20},{0,6,202},{0,5,52},{0,3,25},{0,3,61},{0,4,442},{0,3,313},{0,3,142},{0,2,318},{0,2,498},{0,2,354},{0,6,202},{0,5,52},{0,3,25},{0,3,61},{2,1,441},{0,3,313},{0,3,142},{0,2,318},{1,2,441},{0,2,318},{0,3,0},{0,3,0},{0,3,0},{0,2,1},{0,1,45},{0,1,25},{0,1,25},{0,1,26},{0,1,50},{0,1,35},{0,3,0},
+{0,3,0},{0,3,0},{0,2,1},{1,0,41},{0,1,25},{0,1,25},{0,1,26},{0,1,41},{0,1,26},{2,3,200},{0,5,52},{0,3,25},{0,3,61},{2,3,200},{3,2,200},{0,3,61},{0,2,218},{3,2,200},{0,2,218},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,9,200},{0,7,20},{0,5,20},
+{0,4,25},{0,6,686},{0,5,433},{0,4,169},{0,3,443},{0,3,794},{0,3,524},{0,9,200},{0,7,20},{0,5,20},{0,4,25},{3,1,686},{0,5,433},{0,4,169},{0,3,443},{6,0,686},{0,3,443},{0,6,1},{0,6,1},{0,6,1},{0,3,4},{0,3,145},{0,2,85},{0,2,85},{0,2,101},{0,1,178},{0,1,115},{0,6,1},{0,6,1},{0,6,1},{0,3,4},{1,1,145},
+{0,2,85},{0,2,85},{0,2,101},{3,0,145},{0,2,101},{3,4,200},{0,7,20},{1,4,16},{0,4,25},{3,4,200},{9,0,200},{0,4,25},{0,3,218},{9,0,200},{0,3,218},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,11,257},{0,9,54},{1,6,85},{0,5,65},{0,9,728},{0,6,371},{0,5,80},
+{0,4,377},{0,5,949},{0,4,521},{1,10,201},{1,8,18},{1,6,21},{1,5,26},{4,2,723},{0,6,371},{0,5,80},{0,4,377},{7,1,723},{0,4,377},{0,8,50},{0,8,50},{0,8,50},{0,5,49},{0,5,164},{0,4,50},{0,4,50},{0,3,65},{0,3,245},{0,2,126},{1,7,2},{1,7,2},{1,7,2},{1,4,5},{2,2,162},{0,4,50},{0,4,50},{0,3,65},{2,2,162},
+{0,3,65},{7,0,200},{0,9,5},{2,5,16},{0,5,16},{7,0,200},{10,1,200},{0,5,16},{0,4,208},{10,1,200},{0,4,208},{0,0,49},{0,0,49},{0,0,49},{0,0,49},{0,2,1},{0,2,1},{0,2,1},{0,1,4},{0,1,13},{0,1,13},{1,12,315},{1,10,118},{1,7,178},{1,6,129},{0,11,724},{0,8,289},{0,6,34},{0,5,308},{0,6,1087},{0,5,533},{2,11,201},
+{2,9,18},{2,7,21},{2,6,26},{5,3,723},{0,8,289},{0,6,34},{0,5,308},{8,2,723},{0,5,308},{1,9,114},{1,9,114},{1,9,114},{1,6,113},{0,8,162},{0,6,18},{0,6,18},{0,4,25},{0,4,338},{0,3,162},{2,8,2},{2,8,2},{2,8,2},{2,5,5},{3,3,162},{0,6,18},{0,6,18},{0,4,25},{8,0,162},{0,4,25},{6,4,200},{1,10,5},{3,6,16},
+{1,6,16},{6,4,200},{14,0,200},{1,6,16},{0,5,208},{14,0,200},{0,5,208},{1,0,113},{1,0,113},{1,0,113},{1,0,113},{0,5,0},{0,5,0},{0,5,0},{0,3,0},{0,2,61},{0,2,61},{2,14,410},{2,11,209},{2,8,288},{2,7,234},{0,14,739},{0,10,254},{0,8,33},{0,6,270},{0,8,1131},{0,6,450},{3,12,200},{3,10,13},{3,8,25},{3,7,29},{6,4,723},
+{0,10,238},{0,8,17},{0,6,254},{14,0,723},{0,6,254},{2,11,209},{2,11,209},{2,11,209},{2,7,209},{0,11,178},{0,8,17},{0,8,17},{0,5,18},{0,6,376},{0,5,123},{3,9,0},{3,9,0},{3,9,0},{3,7,4},{6,1,162},{0,8,1},{0,8,1},{0,5,2},{11,0,162},{0,5,2},{9,2,200},{2,11,1},{4,7,25},{0,8,17},{9,2,200},{17,0,200},{0,8,17},
+{0,6,218},{17,0,200},{0,6,218},{2,0,208},{2,0,208},{2,0,208},{2,0,208},{0,8,16},{0,8,16},{0,8,16},{1,4,17},{0,4,80},{0,4,80},{3,15,410},{3,12,212},{3,9,288},{3,8,224},{1,15,739},{1,11,254},{1,9,33},{1,7,270},{0,9,1013},{0,7,308},{4,13,201},{4,11,21},{4,9,21},{4,8,26},{10,0,723},{0,12,227},{1,9,17},{0,7,227},{15,1,723},
+{0,7,227},{3,11,212},{3,11,212},{3,11,212},{3,8,208},{2,10,180},{1,9,17},{1,9,17},{1,6,18},{0,8,306},{0,6,41},{4,10,2},{4,10,2},{4,10,2},{4,7,5},{8,0,162},{1,9,1},{1,9,1},{1,6,2},{5,5,162},{1,6,2},{10,3,200},{3,12,4},{5,8,16},{3,8,16},{10,3,200},{18,1,200},{3,8,16},{0,7,218},{18,1,200},{0,7,218},{3,0,208},
+{3,0,208},{3,0,208},{3,0,208},{1,9,16},{1,9,16},{1,9,16},{2,5,17},{0,6,40},{0,6,40},{4,15,426},{4,13,223},{4,10,283},{4,9,234},{2,16,739},{2,12,267},{2,10,33},{2,8,273},{0,11,913},{0,8,225},{5,14,201},{5,12,18},{5,10,21},{5,9,26},{11,1,723},{0,13,208},{2,10,17},{0,8,209},{16,2,723},{0,8,209},{4,12,219},{4,12,219},{4,12,219},
+{4,9,218},{3,11,180},{2,10,17},{2,10,17},{2,7,18},{0,9,229},{0,7,27},{5,11,2},{5,11,2},{5,11,2},{5,8,5},{9,1,162},{2,10,1},{2,10,1},{2,7,2},{16,0,162},{2,7,2},{11,4,200},{3,14,4},{6,9,16},{4,9,16},{11,4,200},{19,2,200},{4,9,16},{0,8,208},{19,2,200},{0,8,208},{4,0,218},{4,0,218},{4,0,218},{4,0,218},{2,10,16},
+{2,10,16},{2,10,16},{3,6,17},{0,8,17},{0,8,17},{5,16,420},{5,14,223},{5,11,283},{5,10,234},{3,17,739},{3,13,267},{3,11,33},{3,9,273},{0,13,868},{0,9,213},{6,15,201},{6,13,18},{6,11,21},{6,10,26},{12,2,723},{0,15,204},{3,11,17},{1,9,209},{22,0,723},{1,9,209},{5,13,219},{5,13,219},{5,13,219},{5,10,218},{3,14,180},{3,11,17},{3,11,17},
+{3,8,20},{0,11,189},{1,8,17},{6,12,2},{6,12,2},{6,12,2},{6,9,5},{10,2,162},{3,11,1},{3,11,1},{2,8,1},{17,1,162},{2,8,1},{15,0,200},{5,14,5},{7,10,16},{5,10,16},{15,0,200},{20,3,200},{5,10,16},{0,9,208},{20,3,200},{0,9,208},{5,0,218},{5,0,218},{5,0,218},{5,0,218},{3,11,16},{3,11,16},{3,11,16},{3,8,20},{0,9,5},
+{0,9,5},{6,18,410},{6,15,209},{6,12,288},{6,11,234},{4,18,739},{4,14,254},{4,12,33},{4,10,270},{0,15,804},{2,10,227},{7,16,200},{7,14,13},{7,12,25},{7,11,29},{15,0,723},{1,16,209},{4,12,17},{2,10,218},{20,3,723},{2,10,218},{6,15,209},{6,15,209},{6,15,209},{6,11,209},{4,15,178},{4,12,17},{4,12,17},{4,9,18},{0,13,171},{2,9,26},{7,13,0},
+{7,13,0},{7,13,0},{7,11,4},{13,0,162},{4,12,1},{4,12,1},{4,9,2},{20,1,162},{4,9,2},{16,1,200},{6,15,1},{8,11,25},{4,12,17},{16,1,200},{26,1,200},{4,12,17},{0,10,218},{26,1,200},{0,10,218},{6,0,208},{6,0,208},{6,0,208},{6,0,208},{4,12,16},{4,12,16},{4,12,16},{5,8,17},{1,11,5},{1,11,5},{7,19,410},{7,16,212},{7,13,288},
+{7,12,224},{5,19,739},{5,15,254},{5,13,33},{5,11,270},{0,16,747},{3,11,227},{8,17,201},{8,15,21},{8,13,21},{8,12,26},{14,4,723},{3,16,209},{5,13,17},{3,11,218},{24,2,723},{3,11,218},{7,15,212},{7,15,212},{7,15,212},{7,12,208},{6,14,180},{5,13,17},{5,13,17},{5,10,18},{1,14,171},{3,10,26},{8,14,2},{8,14,2},{8,14,2},{8,11,5},{14,1,162},
+{5,13,1},{5,13,1},{5,10,2},{24,0,162},{5,10,2},{17,2,200},{7,16,4},{9,12,16},{7,12,16},{17,2,200},{27,2,200},{7,12,16},{0,11,218},{27,2,200},{0,11,218},{7,0,208},{7,0,208},{7,0,208},{7,0,208},{5,13,16},{5,13,16},{5,13,16},{6,9,17},{2,12,8},{2,12,8},{8,19,426},{8,17,223},{8,14,283},{8,13,234},{6,20,739},{6,16,267},{6,14,33},
+{6,12,273},{0,18,727},{3,12,218},{9,18,201},{9,16,18},{9,14,21},{9,13,26},{18,0,723},{4,17,208},{6,14,17},{4,12,209},{30,0,723},{4,12,209},{8,16,219},{8,16,219},{8,16,219},{8,13,218},{7,15,180},{6,14,17},{6,14,17},{6,11,18},{2,15,171},{4,11,27},{9,15,2},{9,15,2},{9,15,2},{9,12,5},{16,0,162},{6,14,1},{6,14,1},{6,11,2},{25,1,162},
+{6,11,2},{18,3,200},{7,18,4},{10,13,16},{8,13,16},{18,3,200},{28,3,200},{8,13,16},{0,12,208},{28,3,200},{0,12,208},{8,0,218},{8,0,218},{8,0,218},{8,0,218},{6,14,16},{6,14,16},{6,14,16},{7,10,17},{3,13,8},{3,13,8},{9,20,420},{9,18,223},{9,15,283},{9,14,234},{7,21,739},{7,17,267},{7,15,33},{7,13,273},{1,19,727},{4,13,213},{10,19,201},
+{10,17,18},{10,15,21},{10,14,26},{19,1,723},{4,19,204},{7,15,17},{5,13,209},{31,1,723},{5,13,209},{9,17,219},{9,17,219},{9,17,219},{9,14,218},{7,18,180},{7,15,17},{7,15,17},{7,12,20},{3,16,173},{5,12,17},{10,16,2},{10,16,2},{10,16,2},{10,13,5},{17,1,162},{7,15,1},{7,15,1},{6,12,1},{26,2,162},{6,12,1},{20,2,200},{9,18,5},{11,14,16},
+{9,14,16},{20,2,200},{24,7,200},{9,14,16},{0,13,208},{24,7,200},{0,13,208},{9,0,218},{9,0,218},{9,0,218},{9,0,218},{7,15,16},{7,15,16},{7,15,16},{7,12,20},{4,13,5},{4,13,5},{10,22,410},{10,19,209},{10,16,288},{10,15,234},{8,22,739},{8,18,254},{8,16,33},{8,14,270},{2,20,724},{6,14,227},{11,20,200},{11,18,13},{11,16,25},{11,15,29},{20,2,723},
+{5,20,209},{8,16,17},{6,14,218},{24,7,723},{6,14,218},{10,19,209},{10,19,209},{10,19,209},{10,15,209},{8,19,178},{8,16,17},{8,16,17},{8,13,18},{3,18,170},{6,13,26},{11,17,0},{11,17,0},{11,17,0},{11,15,4},{17,4,162},{8,16,1},{8,16,1},{8,13,2},{29,2,162},{8,13,2},{23,0,200},{10,19,1},{12,15,25},{8,16,17},{23,0,200},{30,5,200},{8,16,17},
+{0,14,218},{30,5,200},{0,14,218},{10,0,208},{10,0,208},{10,0,208},{10,0,208},{8,16,16},{8,16,16},{8,16,16},{9,12,17},{5,15,5},{5,15,5},{11,23,410},{11,20,212},{11,17,288},{11,16,224},{9,23,739},{9,19,254},{9,17,33},{9,15,270},{3,21,724},{7,15,227},{12,21,201},{12,19,21},{12,17,21},{12,16,26},{21,3,723},{7,20,209},{9,17,17},{7,15,218},{28,6,723},
+{7,15,218},{11,19,212},{11,19,212},{11,19,212},{11,16,208},{10,18,180},{9,17,17},{9,17,17},{9,14,18},{5,18,171},{7,14,26},{12,18,2},{12,18,2},{12,18,2},{12,15,5},{18,5,162},{9,17,1},{9,17,1},{9,14,2},{30,3,162},{9,14,2},{24,1,200},{11,20,4},{13,16,16},{11,16,16},{24,1,200},{31,6,200},{11,16,16},{0,15,218},{31,6,200},{0,15,218},{11,0,208},
+{11,0,208},{11,0,208},{11,0,208},{9,17,16},{9,17,16},{9,17,16},{10,13,17},{6,16,8},{6,16,8},{12,23,426},{12,21,223},{12,18,283},{12,17,234},{10,24,739},{10,20,267},{10,18,33},{10,16,273},{4,22,727},{7,16,218},{13,22,201},{13,20,18},{13,18,21},{13,17,26},{22,4,723},{8,21,208},{10,18,17},{8,16,209},{29,7,723},{8,16,209},{12,20,219},{12,20,219},{12,20,219},
+{12,17,218},{11,19,180},{10,18,17},{10,18,17},{10,15,18},{6,19,171},{8,15,27},{13,19,2},{13,19,2},{13,19,2},{13,16,5},{20,4,162},{10,18,1},{10,18,1},{10,15,2},{29,5,162},{10,15,2},{25,2,200},{11,22,4},{14,17,16},{12,17,16},{25,2,200},{27,10,200},{12,17,16},{0,16,208},{27,10,200},{0,16,208},{12,0,218},{12,0,218},{12,0,218},{12,0,218},{10,18,16},
+{10,18,16},{10,18,16},{11,14,17},{7,17,8},{7,17,8},{13,24,420},{13,22,223},{13,19,283},{13,18,234},{11,25,739},{11,21,267},{11,19,33},{11,17,273},{5,23,727},{8,17,213},{14,23,201},{14,21,18},{14,19,21},{14,18,26},{26,0,723},{8,23,204},{11,19,17},{9,17,209},{30,8,723},{9,17,209},{13,21,219},{13,21,219},{13,21,219},{13,18,218},{11,22,180},{11,19,17},{11,19,17},
+{11,16,20},{7,20,173},{9,16,17},{14,20,2},{14,20,2},{14,20,2},{14,17,5},{24,0,162},{11,19,1},{11,19,1},{10,16,1},{30,6,162},{10,16,1},{26,3,200},{13,22,5},{15,18,16},{13,18,16},{26,3,200},{28,11,200},{13,18,16},{0,17,208},{28,11,200},{0,17,208},{13,0,218},{13,0,218},{13,0,218},{13,0,218},{11,19,16},{11,19,16},{11,19,16},{11,16,20},{8,17,5},
+{8,17,5},{14,26,410},{14,23,209},{14,20,288},{14,19,234},{12,26,739},{12,22,254},{12,20,33},{12,18,270},{6,24,724},{10,18,227},{15,24,200},{15,22,13},{15,20,25},{15,19,29},{26,3,723},{9,24,209},{12,20,17},{10,18,218},{28,11,723},{10,18,218},{14,23,209},{14,23,209},{14,23,209},{14,19,209},{12,23,178},{12,20,17},{12,20,17},{12,17,18},{7,22,170},{10,17,26},{15,21,0},
+{15,21,0},{15,21,0},{15,19,4},{24,3,162},{12,20,1},{12,20,1},{12,17,2},{28,9,162},{12,17,2},{27,4,200},{14,23,1},{16,19,25},{12,20,17},{27,4,200},{31,11,200},{12,20,17},{0,18,218},{31,11,200},{0,18,218},{14,0,208},{14,0,208},{14,0,208},{14,0,208},{12,20,16},{12,20,16},{12,20,16},{13,16,17},{9,19,5},{9,19,5},{15,27,410},{15,24,212},{15,21,288},
+{15,20,224},{13,27,739},{13,23,254},{13,21,33},{13,19,270},{7,25,724},{11,19,227},{16,25,201},{16,23,21},{16,21,21},{16,20,26},{28,2,723},{11,24,209},{13,21,17},{11,19,218},{27,13,723},{11,19,218},{15,23,212},{15,23,212},{15,23,212},{15,20,208},{14,22,180},{13,21,17},{13,21,17},{13,18,18},{9,22,171},{11,18,26},{16,22,2},{16,22,2},{16,22,2},{16,19,5},{22,9,162},
+{13,21,1},{13,21,1},{13,18,2},{29,10,162},{13,18,2},{31,0,200},{15,24,4},{17,20,16},{15,20,16},{31,0,200},{30,13,200},{15,20,16},{0,19,218},{30,13,200},{0,19,218},{15,0,208},{15,0,208},{15,0,208},{15,0,208},{13,21,16},{13,21,16},{13,21,16},{14,17,17},{10,20,8},{10,20,8},{16,27,426},{16,25,223},{16,22,283},{16,21,234},{14,28,739},{14,24,267},{14,22,33},
+{14,20,273},{8,26,727},{11,20,218},{17,26,201},{17,24,18},{17,22,21},{17,21,26},{29,3,723},{12,25,208},{14,22,17},{12,20,209},{28,14,723},{12,20,209},{16,24,219},{16,24,219},{16,24,219},{16,21,218},{15,23,180},{14,22,17},{14,22,17},{14,19,18},{10,23,171},{12,19,27},{17,23,2},{17,23,2},{17,23,2},{17,20,5},{24,8,162},{14,22,1},{14,22,1},{14,19,2},{30,11,162},
+{14,19,2},{29,6,200},{15,26,4},{18,21,16},{16,21,16},{29,6,200},{31,14,200},{16,21,16},{0,20,208},{31,14,200},{0,20,208},{16,0,218},{16,0,218},{16,0,218},{16,0,218},{14,22,16},{14,22,16},{14,22,16},{15,18,17},{11,21,8},{11,21,8},{17,28,420},{17,26,223},{17,23,283},{17,22,234},{15,29,739},{15,25,267},{15,23,33},{15,21,273},{9,27,727},{12,21,213},{18,27,201},
+{18,25,18},{18,23,21},{18,22,26},{30,4,723},{12,27,204},{15,23,17},{13,21,209},{29,15,723},{13,21,209},{17,25,219},{17,25,219},{17,25,219},{17,22,218},{15,26,180},{15,23,17},{15,23,17},{15,20,20},{11,24,173},{13,20,17},{18,24,2},{18,24,2},{18,24,2},{18,21,5},{28,4,162},{15,23,1},{15,23,1},{14,20,1},{29,13,162},{14,20,1},{30,7,200},{17,26,5},{19,22,16},
+{17,22,16},{30,7,200},{30,16,200},{17,22,16},{0,21,208},{30,16,200},{0,21,208},{17,0,218},{17,0,218},{17,0,218},{17,0,218},{15,23,16},{15,23,16},{15,23,16},{15,20,20},{12,21,5},{12,21,5},{18,30,410},{18,27,209},{18,24,288},{18,23,234},{16,30,739},{16,26,254},{16,24,33},{16,22,270},{10,28,724},{14,22,227},{19,28,200},{19,26,13},{19,24,25},{19,23,29},{30,7,723},
+{13,28,209},{16,24,17},{14,22,218},{30,16,723},{14,22,218},{18,27,209},{18,27,209},{18,27,209},{18,23,209},{16,27,178},{16,24,17},{16,24,17},{16,21,18},{11,26,170},{14,21,26},{19,25,0},{19,25,0},{19,25,0},{19,23,4},{31,2,162},{16,24,1},{16,24,1},{16,21,2},{27,16,162},{16,21,2},{31,8,200},{18,27,1},{20,23,25},{16,24,17},{31,8,200},{28,19,200},{16,24,17},
+{0,22,218},{28,19,200},{0,22,218},{18,0,208},{18,0,208},{18,0,208},{18,0,208},{16,24,16},{16,24,16},{16,24,16},{17,20,17},{13,23,5},{13,23,5},{19,31,410},{19,28,212},{19,25,288},{19,24,224},{17,31,739},{17,27,254},{17,25,33},{17,23,270},{11,29,724},{15,23,227},{20,29,201},{20,27,21},{20,25,21},{20,24,26},{29,11,723},{15,28,209},{17,25,17},{15,23,218},{31,17,723},
+{15,23,218},{19,27,212},{19,27,212},{19,27,212},{19,24,208},{18,26,180},{17,25,17},{17,25,17},{17,22,18},{13,26,171},{15,22,26},{20,26,2},{20,26,2},{20,26,2},{20,23,5},{26,13,162},{17,25,1},{17,25,1},{17,22,2},{21,21,162},{17,22,2},{29,14,200},{19,28,4},{21,24,16},{19,24,16},{29,14,200},{29,20,200},{19,24,16},{0,23,218},{29,20,200},{0,23,218},{19,0,208},
+{19,0,208},{19,0,208},{19,0,208},{17,25,16},{17,25,16},{17,25,16},{18,21,17},{14,24,8},{14,24,8},{20,31,426},{20,29,223},{20,26,283},{20,25,234},{19,30,740},{18,28,267},{18,26,33},{18,24,273},{12,30,727},{15,24,218},{21,30,201},{21,28,18},{21,26,21},{21,25,26},{30,12,723},{16,29,208},{18,26,17},{16,24,209},{27,21,723},{16,24,209},{20,28,219},{20,28,219},{20,28,219},
+{20,25,218},{19,27,180},{18,26,17},{18,26,17},{18,23,18},{14,27,171},{16,23,27},{21,27,2},{21,27,2},{21,27,2},{21,24,5},{28,12,162},{18,26,1},{18,26,1},{18,23,2},{22,22,162},{18,23,2},{30,15,200},{19,30,4},{22,25,16},{20,25,16},{30,15,200},{30,21,200},{20,25,16},{0,24,208},{30,21,200},{0,24,208},{20,0,218},{20,0,218},{20,0,218},{20,0,218},{18,26,16},
+{18,26,16},{18,26,16},{19,22,17},{15,25,8},{15,25,8},{21,31,468},{21,30,223},{21,27,283},{21,26,234},{20,31,749},{19,29,267},{19,27,33},{19,25,273},{13,31,727},{16,25,213},{22,31,201},{22,29,18},{22,27,21},{22,26,26},{31,13,723},{16,31,204},{19,27,17},{17,25,209},{28,22,723},{17,25,209},{21,29,219},{21,29,219},{21,29,219},{21,26,218},{19,30,180},{19,27,17},{19,27,17},
+{19,24,20},{15,28,173},{17,24,17},{22,28,2},{22,28,2},{22,28,2},{22,25,5},{29,13,162},{19,27,1},{19,27,1},{18,24,1},{28,20,162},{18,24,1},{31,16,200},{21,30,5},{23,26,16},{21,26,16},{31,16,200},{31,22,200},{21,26,16},{0,25,208},{31,22,200},{0,25,208},{21,0,218},{21,0,218},{21,0,218},{21,0,218},{19,27,16},{19,27,16},{19,27,16},{19,24,20},{16,25,5},
+{16,25,5},{22,31,570},{22,31,209},{22,28,288},{22,27,234},{21,31,804},{20,30,254},{20,28,33},{20,26,270},{15,31,753},{18,26,227},{23,31,232},{23,30,13},{23,28,25},{23,27,29},{31,16,723},{19,31,216},{20,28,17},{18,26,218},{31,22,723},{18,26,218},{22,31,209},{22,31,209},{22,31,209},{22,27,209},{20,31,178},{20,28,17},{20,28,17},{20,25,18},{15,30,170},{18,25,26},{23,29,0},
+{23,29,0},{23,29,0},{23,27,4},{29,16,162},{20,28,1},{20,28,1},{20,25,2},{31,20,162},{20,25,2},{31,19,200},{22,31,1},{24,27,25},{20,28,17},{31,19,200},{27,26,200},{20,28,17},{0,26,218},{27,26,200},{0,26,218},{22,0,208},{22,0,208},{22,0,208},{22,0,208},{20,28,16},{20,28,16},{20,28,16},{21,24,17},{17,27,5},{17,27,5},{23,31,696},{23,31,237},{23,29,288},
+{23,28,224},{23,31,888},{21,31,254},{21,29,33},{21,27,270},{17,31,824},{19,27,227},{24,31,273},{24,31,21},{24,29,21},{24,28,26},{30,20,723},{20,31,233},{21,29,17},{19,27,218},{30,24,723},{19,27,218},{23,31,212},{23,31,212},{23,31,212},{23,28,208},{22,30,180},{21,29,17},{21,29,17},{21,26,18},{17,30,171},{19,26,26},{24,30,2},{24,30,2},{24,30,2},{24,27,5},{30,17,162},
+{21,29,1},{21,29,1},{21,26,2},{25,25,162},{21,26,2},{30,23,200},{24,31,20},{25,28,16},{23,28,16},{30,23,200},{28,27,200},{23,28,16},{0,27,218},{28,27,200},{0,27,218},{23,0,208},{23,0,208},{23,0,208},{23,0,208},{21,29,16},{21,29,16},{21,29,16},{22,25,17},{18,28,8},{18,28,8},{25,31,804},{24,31,334},{24,30,283},{24,29,234},{24,31,957},{22,31,297},{22,30,33},
+{22,28,273},{20,31,913},{19,28,218},{26,31,313},{25,31,51},{25,30,21},{25,29,26},{31,21,723},{22,31,281},{22,30,17},{20,28,209},{31,25,723},{20,28,209},{24,31,234},{24,31,234},{24,31,234},{24,29,218},{23,31,180},{22,30,17},{22,30,17},{22,27,18},{18,31,171},{20,27,27},{25,31,2},{25,31,2},{25,31,2},{25,28,5},{31,18,162},{22,30,1},{22,30,1},{22,27,2},{26,26,162},
+{22,27,2},{31,24,200},{25,31,50},{26,29,16},{24,29,16},{31,24,200},{24,31,200},{24,29,16},{0,28,208},{24,31,200},{0,28,208},{24,0,218},{24,0,218},{24,0,218},{24,0,218},{22,30,16},{22,30,16},{22,30,16},{23,26,17},{19,29,8},{19,29,8},{26,31,930},{25,31,492},{25,31,283},{25,30,234},{25,31,1068},{24,31,389},{23,31,33},{23,29,273},{21,31,999},{20,29,213},{27,31,379},
+{26,31,149},{26,31,21},{26,30,26},{29,27,723},{24,31,364},{23,31,17},{21,29,209},{27,29,723},{21,29,209},{25,31,267},{25,31,267},{25,31,267},{25,30,218},{24,31,205},{23,31,17},{23,31,17},{23,28,20},{20,31,189},{21,28,17},{26,31,5},{26,31,5},{26,31,5},{26,29,5},{30,22,162},{23,31,1},{23,31,1},{22,28,1},{27,27,162},{22,28,1},{30,28,200},{27,31,90},{27,30,16},
+{25,30,16},{30,28,200},{28,30,200},{25,30,16},{0,29,208},{28,30,200},{0,29,208},{25,0,218},{25,0,218},{25,0,218},{25,0,218},{23,31,16},{23,31,16},{23,31,16},{23,28,20},{20,29,5},{20,29,5},{27,31,877},{26,31,585},{26,31,329},{26,31,209},{26,31,990},{25,31,397},{25,31,36},{24,30,165},{23,31,910},{22,30,122},{28,31,306},{28,31,162},{27,31,36},{27,31,4},{31,26,546},
+{26,31,306},{25,31,20},{22,30,113},{29,29,546},{22,30,113},{26,31,329},{26,31,329},{26,31,329},{26,31,209},{25,31,276},{25,31,36},{25,31,36},{24,29,18},{22,31,230},{22,29,26},{27,31,36},{27,31,36},{27,31,36},{27,31,4},{30,25,162},{25,31,20},{25,31,20},{24,29,2},{30,27,162},{24,29,2},{30,30,113},{29,31,61},{28,31,0},{26,31,1},{30,30,113},{30,30,113},{26,31,1},
+{0,30,113},{30,30,113},{0,30,113},{26,0,208},{26,0,208},{26,0,208},{26,0,208},{25,30,17},{25,30,17},{25,30,17},{25,28,17},{21,31,5},{21,31,5},{28,31,731},{27,31,573},{27,31,404},{27,31,244},{27,31,797},{26,31,354},{26,31,98},{25,30,82},{25,31,737},{23,31,58},{29,31,190},{29,31,126},{28,31,65},{28,31,5},{30,29,333},{28,31,185},{27,31,52},{23,31,49},{29,30,333},
+{23,31,49},{27,31,404},{27,31,404},{27,31,404},{27,31,244},{27,31,356},{26,31,98},{26,31,98},{25,30,18},{24,31,315},{23,30,26},{28,31,65},{28,31,65},{28,31,65},{28,31,5},{31,26,162},{27,31,52},{27,31,52},{25,30,2},{29,29,162},{25,30,2},{31,30,25},{30,31,13},{30,31,4},{29,31,1},{31,30,25},{30,31,25},{29,31,1},{0,31,49},{30,31,25},{0,31,49},{27,0,208},
+{27,0,208},{27,0,208},{27,0,208},{26,31,17},{26,31,17},{26,31,17},{26,29,17},{23,31,9},{23,31,9},{29,31,642},{28,31,524},{28,31,443},{28,31,299},{28,31,623},{28,31,335},{27,31,201},{26,31,17},{27,31,610},{24,31,26},{30,31,131},{30,31,115},{29,31,101},{29,31,37},{31,29,193},{29,31,121},{28,31,85},{26,31,1},{29,31,193},{26,31,1},{28,31,443},{28,31,443},{28,31,443},
+{28,31,299},{28,31,398},{27,31,201},{27,31,201},{26,31,17},{26,31,378},{24,31,26},{29,31,101},{29,31,101},{29,31,101},{29,31,37},{31,28,145},{28,31,85},{28,31,85},{26,31,1},{30,30,145},{26,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{28,0,218},{28,0,218},{28,0,218},{28,0,218},{27,31,32},
+{27,31,32},{27,31,32},{27,30,17},{24,31,26},{24,31,26},{29,31,418},{29,31,354},{29,31,318},{29,31,254},{29,31,370},{28,31,223},{28,31,142},{28,31,25},{28,31,358},{26,31,58},{30,31,51},{30,31,35},{30,31,26},{30,31,10},{31,30,54},{30,31,34},{30,31,25},{28,31,0},{30,31,54},{28,31,0},{29,31,318},{29,31,318},{29,31,318},{29,31,254},{29,31,270},{28,31,142},{28,31,142},
+{28,31,25},{27,31,249},{26,31,58},{30,31,26},{30,31,26},{30,31,26},{30,31,10},{30,31,41},{30,31,25},{30,31,25},{28,31,0},{31,30,41},{28,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{29,0,218},{29,0,218},{29,0,218},{29,0,218},{28,31,61},{28,31,61},{28,31,61},{28,31,25},{26,31,58},
+{26,31,58},{0,9,421},{0,7,113},{0,5,5},{0,4,130},{0,6,925},{0,5,658},{0,4,274},{0,3,670},{0,3,1039},{0,3,751},{0,9,421},{0,7,113},{0,5,5},{0,4,130},{3,1,925},{0,5,658},{0,4,274},{0,3,670},{6,0,925},{0,3,670},{0,4,1},{0,4,1},{0,4,1},{0,3,4},{0,2,85},{0,2,45},{0,2,45},{0,1,50},{0,1,98},{0,1,59},{0,4,1},
+{0,4,1},{0,4,1},{0,3,4},{0,2,85},{0,2,45},{0,2,45},{0,1,50},{2,0,85},{0,1,50},{5,1,421},{0,7,113},{0,5,5},{0,4,130},{5,1,421},{9,0,421},{0,4,130},{0,3,445},{9,0,421},{0,3,445},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,12,425},{0,9,52},{0,6,10},
+{0,6,82},{0,8,1261},{0,6,805},{0,5,322},{0,4,833},{0,4,1445},{0,4,977},{0,12,425},{0,9,52},{0,6,10},{0,6,82},{3,3,1261},{0,6,805},{0,5,322},{0,4,833},{8,0,1261},{0,4,833},{0,7,0},{0,7,0},{0,7,0},{0,4,1},{0,3,225},{0,3,117},{0,3,117},{0,2,125},{0,2,257},{0,2,161},{0,7,0},{0,7,0},{0,7,0},{0,4,1},{2,0,221},
+{0,3,117},{0,3,117},{0,2,125},{2,1,221},{0,2,125},{7,0,421},{0,9,52},{1,6,5},{0,6,82},{7,0,421},{10,1,421},{0,6,82},{0,4,433},{10,1,421},{0,4,433},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,14,430},{0,11,29},{0,7,74},{0,7,46},{0,10,1514},{0,8,874},{0,6,307},
+{0,5,917},{0,5,1814},{0,4,1074},{0,14,430},{0,11,29},{1,7,35},{0,7,46},{6,0,1514},{0,8,874},{0,6,307},{0,5,917},{10,0,1514},{0,5,917},{0,10,10},{0,10,10},{0,10,10},{0,6,10},{0,5,340},{0,5,160},{0,5,160},{0,3,169},{0,3,421},{0,3,250},{0,10,10},{0,10,10},{0,10,10},{0,6,10},{2,2,338},{0,5,160},{0,5,160},{0,3,169},{2,2,338},
+{0,3,169},{8,1,421},{0,11,20},{2,7,5},{0,7,37},{8,1,421},{14,0,421},{0,7,37},{0,5,433},{14,0,421},{0,5,433},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,1,0},{0,1,0},{0,1,0},{0,1,4},{0,0,9},{0,0,9},{1,15,494},{1,12,102},{1,8,137},{1,8,122},{0,12,1517},{0,9,737},{0,7,185},{0,6,794},{0,7,1982},{0,5,1062},{1,15,430},
+{1,12,38},{2,8,34},{1,8,58},{7,1,1514},{0,9,737},{0,7,185},{0,6,794},{11,1,1514},{0,6,794},{1,11,74},{1,11,74},{1,11,74},{1,7,74},{0,8,338},{0,6,98},{0,6,98},{0,4,97},{0,4,514},{0,4,241},{1,11,10},{1,11,10},{1,11,10},{1,7,10},{3,3,338},{0,6,98},{0,6,98},{0,4,97},{8,0,338},{0,4,97},{10,0,421},{0,13,9},{3,8,4},
+{0,8,16},{10,0,421},{17,0,421},{0,8,16},{0,6,433},{17,0,421},{0,6,433},{1,0,73},{1,0,73},{1,0,73},{1,0,73},{0,4,1},{0,4,1},{0,4,1},{0,2,1},{0,2,37},{0,2,37},{1,18,629},{1,14,213},{2,9,354},{1,9,218},{0,15,1517},{0,11,630},{0,9,50},{0,7,670},{0,8,2198},{0,6,1109},{3,15,437},{2,13,41},{3,9,33},{2,9,53},{8,2,1514},
+{0,11,630},{0,9,50},{0,7,670},{12,2,1514},{0,7,670},{1,13,209},{1,13,209},{1,13,209},{1,8,212},{0,11,338},{0,8,41},{0,8,41},{0,5,50},{0,6,680},{0,5,275},{3,10,17},{3,10,17},{3,10,17},{2,8,17},{6,1,338},{0,8,41},{0,8,41},{0,5,50},{11,0,338},{0,5,50},{12,0,421},{0,15,1},{4,9,5},{0,9,1},{12,0,421},{20,0,421},{0,9,1},
+{0,7,445},{20,0,421},{0,7,445},{1,0,208},{1,0,208},{1,0,208},{1,0,208},{0,7,1},{0,7,1},{0,7,1},{0,4,0},{0,3,106},{0,3,106},{2,19,821},{2,15,405},{2,11,570},{2,10,410},{0,18,1514},{0,13,577},{0,10,14},{0,8,602},{0,10,2462},{0,7,1175},{3,17,441},{3,14,41},{4,10,35},{3,10,53},{9,3,1514},{0,13,577},{0,10,14},{0,8,602},{18,0,1514},
+{0,8,602},{2,14,401},{2,14,401},{2,14,401},{2,9,404},{0,13,340},{0,10,13},{0,10,13},{0,6,29},{0,7,851},{0,6,353},{3,13,17},{3,13,17},{3,13,17},{3,9,17},{8,0,338},{0,10,13},{0,10,13},{0,6,29},{5,5,338},{0,6,29},{13,1,421},{1,16,4},{5,10,5},{1,10,1},{13,1,421},{21,1,421},{1,10,1},{0,8,433},{21,1,421},{0,8,433},{2,0,400},
+{2,0,400},{2,0,400},{2,0,400},{0,9,1},{0,9,1},{0,9,1},{0,6,4},{0,4,208},{0,4,208},{3,20,854},{3,16,437},{3,12,597},{3,11,443},{1,19,1515},{0,15,570},{1,11,15},{0,9,582},{0,11,2337},{0,9,933},{4,18,430},{4,15,29},{5,11,35},{4,11,46},{10,4,1514},{0,15,521},{1,11,14},{0,9,533},{19,1,1514},{0,9,533},{3,15,434},{3,15,434},{3,15,434},
+{3,10,437},{1,14,341},{1,11,14},{1,11,14},{1,7,30},{0,8,755},{0,7,222},{4,14,10},{4,14,10},{4,14,10},{4,10,10},{9,1,338},{0,12,2},{0,12,2},{0,7,26},{16,0,338},{0,7,26},{15,0,421},{3,16,4},{6,11,5},{2,11,1},{15,0,421},{25,0,421},{2,11,1},{0,9,433},{25,0,421},{0,9,433},{3,0,433},{3,0,433},{3,0,433},{3,0,433},{1,10,2},
+{1,10,2},{1,10,2},{1,7,5},{0,6,157},{0,6,157},{4,21,866},{4,17,454},{4,13,609},{4,12,461},{2,20,1515},{1,16,570},{2,12,19},{1,10,582},{0,13,2214},{0,10,707},{5,19,430},{5,16,38},{6,12,34},{5,12,58},{14,0,1514},{0,16,458},{2,12,18},{0,10,482},{20,2,1514},{0,10,482},{4,16,445},{4,16,445},{4,16,445},{4,11,449},{2,15,341},{2,12,19},{2,12,19},
+{2,8,26},{0,10,635},{0,8,106},{5,15,10},{5,15,10},{5,15,10},{5,11,10},{10,2,338},{1,13,2},{1,13,2},{1,8,16},{17,1,338},{1,8,16},{16,1,421},{3,18,4},{7,12,4},{3,12,0},{16,1,421},{26,1,421},{3,12,0},{0,10,433},{26,1,421},{0,10,433},{4,0,445},{4,0,445},{4,0,445},{4,0,445},{2,11,2},{2,11,2},{2,11,2},{2,7,10},{0,8,90},
+{0,8,90},{5,22,854},{5,18,438},{5,14,603},{5,13,443},{3,21,1517},{3,16,554},{3,13,21},{3,11,589},{0,15,2046},{0,11,535},{7,19,437},{6,17,41},{7,13,33},{6,13,53},{14,3,1514},{0,18,429},{3,13,21},{0,11,454},{26,0,1514},{0,11,454},{5,17,434},{5,17,434},{5,17,434},{5,12,437},{3,17,340},{3,13,20},{3,13,20},{3,9,29},{0,12,557},{0,10,49},{7,14,17},
+{7,14,17},{7,14,17},{6,12,17},{13,0,338},{2,14,4},{2,14,4},{1,10,10},{20,1,338},{1,10,10},{17,2,421},{4,19,1},{8,13,5},{4,13,1},{17,2,421},{29,1,421},{4,13,1},{0,11,445},{29,1,421},{0,11,445},{5,0,433},{5,0,433},{5,0,433},{5,0,433},{3,12,1},{3,12,1},{3,12,1},{3,9,4},{0,10,40},{0,10,40},{6,23,854},{6,19,438},{6,15,603},
+{6,14,443},{4,22,1515},{3,18,566},{4,14,15},{3,12,578},{0,16,1911},{0,12,458},{7,21,441},{7,18,41},{8,14,35},{7,14,53},{16,2,1514},{0,20,425},{4,14,14},{0,12,433},{27,1,1514},{0,12,433},{6,18,434},{6,18,434},{6,18,434},{6,13,437},{4,17,341},{4,14,14},{4,14,14},{4,10,30},{0,14,477},{0,11,35},{7,17,17},{7,17,17},{7,17,17},{7,13,17},{14,1,338},
+{3,15,4},{3,15,4},{2,11,10},{24,0,338},{2,11,10},{18,3,421},{5,20,4},{9,14,5},{5,14,1},{18,3,421},{30,2,421},{5,14,1},{0,12,433},{30,2,421},{0,12,433},{6,0,433},{6,0,433},{6,0,433},{6,0,433},{4,13,2},{4,13,2},{4,13,2},{4,10,5},{0,12,25},{0,12,25},{7,24,854},{7,20,437},{7,16,597},{7,15,443},{5,23,1515},{4,19,570},{5,15,15},
+{4,13,582},{0,18,1787},{0,13,442},{8,22,430},{8,19,29},{9,15,35},{8,15,46},{17,3,1514},{2,20,425},{5,15,14},{1,13,433},{28,2,1514},{1,13,433},{7,19,434},{7,19,434},{7,19,434},{7,14,437},{5,18,341},{5,15,14},{5,15,14},{5,11,30},{0,16,419},{1,12,45},{8,18,10},{8,18,10},{8,18,10},{8,14,10},{16,0,338},{4,16,2},{4,16,2},{3,12,17},{25,1,338},
+{3,12,17},{20,2,421},{7,20,4},{10,15,5},{6,15,1},{20,2,421},{31,3,421},{6,15,1},{0,13,433},{31,3,421},{0,13,433},{7,0,433},{7,0,433},{7,0,433},{7,0,433},{5,14,2},{5,14,2},{5,14,2},{5,11,5},{0,13,9},{0,13,9},{8,25,866},{8,21,454},{8,17,609},{8,16,461},{6,24,1515},{5,20,570},{6,16,19},{5,14,582},{0,20,1686},{1,14,442},{9,23,430},
+{9,20,38},{10,16,34},{9,16,58},{18,4,1514},{3,21,425},{6,16,18},{2,14,433},{29,3,1514},{2,14,433},{8,20,445},{8,20,445},{8,20,445},{8,15,449},{6,19,341},{6,16,19},{6,16,19},{6,12,26},{0,17,372},{2,13,45},{9,19,10},{9,19,10},{9,19,10},{9,15,10},{17,1,338},{5,17,2},{5,17,2},{5,12,16},{26,2,338},{5,12,16},{23,0,421},{7,22,4},{11,16,4},
+{7,16,0},{23,0,421},{30,5,421},{7,16,0},{0,14,433},{30,5,421},{0,14,433},{8,0,445},{8,0,445},{8,0,445},{8,0,445},{6,15,2},{6,15,2},{6,15,2},{6,11,10},{0,15,5},{0,15,5},{9,26,854},{9,22,438},{9,18,603},{9,17,443},{7,25,1517},{7,20,554},{7,17,21},{7,15,589},{0,22,1614},{2,15,462},{11,23,437},{10,21,41},{11,17,33},{10,17,53},{22,0,1514},
+{3,23,422},{7,17,21},{3,15,446},{30,4,1514},{3,15,446},{9,21,434},{9,21,434},{9,21,434},{9,16,437},{7,21,340},{7,17,20},{7,17,20},{7,13,29},{0,19,347},{3,14,46},{11,18,17},{11,18,17},{11,18,17},{10,16,17},{17,4,338},{6,18,4},{6,18,4},{5,14,10},{29,2,338},{5,14,10},{24,1,421},{8,23,1},{12,17,5},{8,17,1},{24,1,421},{31,6,421},{8,17,1},
+{0,15,445},{31,6,421},{0,15,445},{9,0,433},{9,0,433},{9,0,433},{9,0,433},{7,16,1},{7,16,1},{7,16,1},{7,13,4},{2,16,8},{2,16,8},{10,27,854},{10,23,438},{10,19,603},{10,18,443},{8,26,1515},{7,22,566},{8,18,15},{7,16,578},{0,23,1566},{3,16,443},{11,25,441},{11,22,41},{12,18,35},{11,18,53},{23,1,1514},{4,24,425},{8,18,14},{4,16,433},{31,5,1514},
+{4,16,433},{10,22,434},{10,22,434},{10,22,434},{10,17,437},{8,21,341},{8,18,14},{8,18,14},{8,14,30},{1,20,341},{4,15,35},{11,21,17},{11,21,17},{11,21,17},{11,17,17},{18,5,338},{7,19,4},{7,19,4},{6,15,10},{30,3,338},{6,15,10},{25,2,421},{9,24,4},{13,18,5},{9,18,1},{25,2,421},{29,9,421},{9,18,1},{0,16,433},{29,9,421},{0,16,433},{10,0,433},
+{10,0,433},{10,0,433},{10,0,433},{8,17,2},{8,17,2},{8,17,2},{8,14,5},{3,17,8},{3,17,8},{11,28,854},{11,24,437},{11,20,597},{11,19,443},{9,27,1515},{8,23,570},{9,19,15},{8,17,582},{0,25,1533},{4,17,442},{12,26,430},{12,23,29},{13,19,35},{12,19,46},{24,2,1514},{6,24,425},{9,19,14},{5,17,433},{27,9,1514},{5,17,433},{11,23,434},{11,23,434},{11,23,434},
+{11,18,437},{9,22,341},{9,19,14},{9,19,14},{9,15,30},{2,21,341},{5,16,45},{12,22,10},{12,22,10},{12,22,10},{12,18,10},{20,4,338},{8,20,2},{8,20,2},{7,16,17},{29,5,338},{7,16,17},{26,3,421},{11,24,4},{14,19,5},{10,19,1},{26,3,421},{30,10,421},{10,19,1},{0,17,433},{30,10,421},{0,17,433},{11,0,433},{11,0,433},{11,0,433},{11,0,433},{9,18,2},
+{9,18,2},{9,18,2},{9,15,5},{4,17,9},{4,17,9},{12,29,866},{12,25,454},{12,21,609},{12,20,461},{10,28,1515},{9,24,570},{10,20,19},{9,18,582},{0,27,1521},{5,18,442},{13,27,430},{13,24,38},{14,20,34},{13,20,58},{25,3,1514},{7,25,425},{10,20,18},{6,18,433},{28,10,1514},{6,18,433},{12,24,445},{12,24,445},{12,24,445},{12,19,449},{10,23,341},{10,20,19},{10,20,19},
+{10,16,26},{3,22,341},{6,17,45},{13,23,10},{13,23,10},{13,23,10},{13,19,10},{24,0,338},{9,21,2},{9,21,2},{9,16,16},{30,6,338},{9,16,16},{28,2,421},{11,26,4},{15,20,4},{11,20,0},{28,2,421},{31,11,421},{11,20,0},{0,18,433},{31,11,421},{0,18,433},{12,0,445},{12,0,445},{12,0,445},{12,0,445},{10,19,2},{10,19,2},{10,19,2},{10,15,10},{4,19,5},
+{4,19,5},{13,30,854},{13,26,438},{13,22,603},{13,21,443},{11,29,1517},{11,24,554},{11,21,21},{11,19,589},{1,28,1518},{6,19,462},{15,27,437},{14,25,41},{15,21,33},{14,21,53},{26,4,1514},{7,27,422},{11,21,21},{7,19,446},{31,10,1514},{7,19,446},{13,25,434},{13,25,434},{13,25,434},{13,20,437},{11,25,340},{11,21,20},{11,21,20},{11,17,29},{3,24,339},{7,18,46},{15,22,17},
+{15,22,17},{15,22,17},{14,20,17},{24,3,338},{10,22,4},{10,22,4},{9,18,10},{28,9,338},{9,18,10},{31,0,421},{12,27,1},{16,21,5},{12,21,1},{31,0,421},{30,13,421},{12,21,1},{0,19,445},{30,13,421},{0,19,445},{13,0,433},{13,0,433},{13,0,433},{13,0,433},{11,20,1},{11,20,1},{11,20,1},{11,17,4},{6,20,8},{6,20,8},{14,31,854},{14,27,438},{14,23,603},
+{14,22,443},{12,30,1515},{11,26,566},{12,22,15},{11,20,578},{2,29,1518},{7,20,443},{15,29,441},{15,26,41},{16,22,35},{15,22,53},{30,0,1514},{8,28,425},{12,22,14},{8,20,433},{30,12,1514},{8,20,433},{14,26,434},{14,26,434},{14,26,434},{14,21,437},{12,25,341},{12,22,14},{12,22,14},{12,18,30},{5,24,341},{8,19,35},{15,25,17},{15,25,17},{15,25,17},{15,21,17},{22,9,338},
+{11,23,4},{11,23,4},{10,19,10},{29,10,338},{10,19,10},{31,3,421},{13,28,4},{17,22,5},{13,22,1},{31,3,421},{31,14,421},{13,22,1},{0,20,433},{31,14,421},{0,20,433},{14,0,433},{14,0,433},{14,0,433},{14,0,433},{12,21,2},{12,21,2},{12,21,2},{12,18,5},{7,21,8},{7,21,8},{15,31,878},{15,28,437},{15,24,597},{15,23,443},{13,31,1515},{12,27,570},{13,23,15},
+{12,21,582},{3,30,1518},{8,21,442},{16,30,430},{16,27,29},{17,23,35},{16,23,46},{31,1,1514},{10,28,425},{13,23,14},{9,21,433},{31,13,1514},{9,21,433},{15,27,434},{15,27,434},{15,27,434},{15,22,437},{13,26,341},{13,23,14},{13,23,14},{13,19,30},{6,25,341},{9,20,45},{16,26,10},{16,26,10},{16,26,10},{16,22,10},{24,8,338},{12,24,2},{12,24,2},{11,20,17},{30,11,338},
+{11,20,17},{30,7,421},{15,28,4},{18,23,5},{14,23,1},{30,7,421},{30,16,421},{14,23,1},{0,21,433},{30,16,421},{0,21,433},{15,0,433},{15,0,433},{15,0,433},{15,0,433},{13,22,2},{13,22,2},{13,22,2},{13,19,5},{8,21,9},{8,21,9},{16,31,926},{16,29,454},{16,25,609},{16,24,461},{14,31,1542},{13,28,570},{14,24,19},{13,22,582},{4,31,1521},{9,22,442},{17,31,430},
+{17,28,38},{18,24,34},{17,24,58},{29,7,1514},{11,29,425},{14,24,18},{10,22,433},{27,17,1514},{10,22,433},{16,28,445},{16,28,445},{16,28,445},{16,23,449},{14,27,341},{14,24,19},{14,24,19},{14,20,26},{7,26,341},{10,21,45},{17,27,10},{17,27,10},{17,27,10},{17,23,10},{28,4,338},{13,25,2},{13,25,2},{13,20,16},{29,13,338},{13,20,16},{31,8,421},{15,30,4},{19,24,4},
+{15,24,0},{31,8,421},{31,17,421},{15,24,0},{0,22,433},{31,17,421},{0,22,433},{16,0,445},{16,0,445},{16,0,445},{16,0,445},{14,23,2},{14,23,2},{14,23,2},{14,19,10},{8,23,5},{8,23,5},{17,31,1034},{17,30,438},{17,26,603},{17,25,443},{16,31,1598},{15,28,554},{15,25,21},{15,23,589},{6,31,1535},{10,23,462},{19,31,437},{18,29,41},{19,25,33},{18,25,53},{30,8,1514},
+{11,31,422},{15,25,21},{11,23,446},{28,18,1514},{11,23,446},{17,29,434},{17,29,434},{17,29,434},{17,24,437},{15,29,340},{15,25,20},{15,25,20},{15,21,29},{7,28,339},{11,22,46},{19,26,17},{19,26,17},{19,26,17},{18,24,17},{31,2,338},{14,26,4},{14,26,4},{13,22,10},{27,16,338},{13,22,10},{31,11,421},{16,31,1},{20,25,5},{16,25,1},{31,11,421},{31,19,421},{16,25,1},
+{0,23,445},{31,19,421},{0,23,445},{17,0,433},{17,0,433},{17,0,433},{17,0,433},{15,24,1},{15,24,1},{15,24,1},{15,21,4},{10,24,8},{10,24,8},{18,31,1166},{18,31,438},{18,27,603},{18,26,443},{17,31,1643},{15,30,566},{16,26,15},{15,24,578},{8,31,1566},{11,24,443},{20,31,458},{19,30,41},{20,26,35},{19,26,53},{31,9,1514},{13,31,429},{16,26,14},{12,24,433},{29,19,1514},
+{12,24,433},{18,30,434},{18,30,434},{18,30,434},{18,25,437},{16,29,341},{16,26,14},{16,26,14},{16,22,30},{9,28,341},{12,23,35},{19,29,17},{19,29,17},{19,29,17},{19,25,17},{26,13,338},{15,27,4},{15,27,4},{14,23,10},{21,21,338},{14,23,10},{30,15,421},{18,31,5},{21,26,5},{17,26,1},{30,15,421},{30,21,421},{17,26,1},{0,24,433},{30,21,421},{0,24,433},{18,0,433},
+{18,0,433},{18,0,433},{18,0,433},{16,25,2},{16,25,2},{16,25,2},{16,22,5},{11,25,8},{11,25,8},{20,31,1326},{19,31,470},{19,28,597},{19,27,443},{18,31,1742},{16,31,570},{17,27,15},{16,25,582},{10,31,1638},{12,25,442},{21,31,506},{20,31,29},{21,27,35},{20,27,46},{29,15,1514},{15,31,461},{17,27,14},{13,25,433},{30,20,1514},{13,25,433},{19,31,434},{19,31,434},{19,31,434},
+{19,26,437},{17,30,341},{17,27,14},{17,27,14},{17,23,30},{10,29,341},{13,24,45},{20,30,10},{20,30,10},{20,30,10},{20,26,10},{28,12,338},{16,28,2},{16,28,2},{15,24,17},{22,22,338},{15,24,17},{31,16,421},{20,31,20},{22,27,5},{18,27,1},{31,16,421},{31,22,421},{18,27,1},{0,25,433},{31,22,421},{0,25,433},{19,0,433},{19,0,433},{19,0,433},{19,0,433},{17,26,2},
+{17,26,2},{17,26,2},{17,23,5},{12,25,9},{12,25,9},{21,31,1470},{20,31,561},{20,29,609},{20,28,461},{19,31,1895},{18,31,578},{18,28,19},{17,26,582},{12,31,1761},{13,26,442},{22,31,590},{21,31,59},{22,28,34},{21,28,58},{30,16,1514},{17,31,530},{18,28,18},{14,26,433},{31,21,1514},{14,26,433},{20,31,461},{20,31,461},{20,31,461},{20,27,449},{18,31,341},{18,28,19},{18,28,19},
+{18,24,26},{11,30,341},{14,25,45},{21,31,10},{21,31,10},{21,31,10},{21,27,10},{29,13,338},{17,29,2},{17,29,2},{17,24,16},{28,20,338},{17,24,16},{31,19,421},{21,31,50},{23,28,4},{19,28,0},{31,19,421},{30,24,421},{19,28,0},{0,26,433},{30,24,421},{0,26,433},{20,0,445},{20,0,445},{20,0,445},{20,0,445},{18,27,2},{18,27,2},{18,27,2},{18,23,10},{12,27,5},
+{12,27,5},{22,31,1674},{21,31,753},{21,30,603},{21,29,443},{21,31,2046},{19,31,629},{19,29,21},{19,27,589},{15,31,1917},{14,27,462},{24,31,674},{23,31,120},{23,29,33},{22,29,53},{30,19,1514},{19,31,629},{19,29,21},{15,27,446},{27,25,1514},{15,27,446},{21,31,497},{21,31,497},{21,31,497},{21,28,437},{19,31,388},{19,29,20},{19,29,20},{19,25,29},{12,31,347},{15,26,46},{23,30,17},
+{23,30,17},{23,30,17},{22,28,17},{29,16,338},{18,30,4},{18,30,4},{17,26,10},{31,20,338},{17,26,10},{30,23,421},{23,31,104},{24,29,5},{20,29,1},{30,23,421},{30,26,421},{20,29,1},{0,27,445},{30,26,421},{0,27,445},{21,0,433},{21,0,433},{21,0,433},{21,0,433},{19,28,1},{19,28,1},{19,28,1},{19,25,4},{14,28,8},{14,28,8},{23,31,1902},{22,31,995},{22,31,603},
+{22,30,443},{22,31,2235},{20,31,759},{20,30,15},{19,28,578},{17,31,2118},{15,28,443},{25,31,770},{24,31,250},{24,30,35},{23,30,53},{29,23,1514},{21,31,701},{20,30,14},{16,28,433},{28,26,1514},{16,28,433},{22,31,554},{22,31,554},{22,31,554},{22,29,437},{21,31,437},{20,30,14},{20,30,14},{20,26,30},{14,31,379},{16,27,35},{24,31,25},{24,31,25},{24,31,25},{23,29,17},{30,17,338},
+{19,31,4},{19,31,4},{18,27,10},{25,25,338},{18,27,10},{31,24,421},{25,31,169},{25,30,5},{21,30,1},{31,24,421},{31,27,421},{21,30,1},{0,28,433},{31,27,421},{0,28,433},{22,0,433},{22,0,433},{22,0,433},{22,0,433},{20,29,2},{20,29,2},{20,29,2},{20,26,5},{15,29,8},{15,29,8},{24,31,2045},{24,31,1233},{23,31,629},{23,31,442},{24,31,2360},{22,31,914},{21,31,14},
+{20,29,549},{19,31,2241},{16,29,409},{26,31,849},{25,31,395},{25,31,34},{24,31,45},{30,24,1459},{23,31,778},{21,31,13},{17,29,400},{29,27,1459},{17,29,400},{23,31,629},{23,31,629},{23,31,629},{23,30,437},{22,31,491},{21,31,14},{21,31,14},{21,27,30},{16,31,446},{17,28,45},{25,31,34},{25,31,34},{25,31,34},{24,30,10},{31,18,338},{21,31,13},{21,31,13},{19,28,17},{26,26,338},
+{19,28,17},{30,28,392},{27,31,218},{26,31,4},{22,31,0},{30,28,392},{28,30,392},{22,31,0},{0,29,400},{28,30,392},{0,29,400},{23,0,433},{23,0,433},{23,0,433},{23,0,433},{21,30,2},{21,30,2},{21,30,2},{21,27,5},{16,29,9},{16,29,9},{25,31,1767},{25,31,1167},{24,31,701},{24,31,449},{24,31,1976},{23,31,747},{22,31,66},{22,29,337},{20,31,1820},{17,30,217},{27,31,611},
+{26,31,317},{26,31,61},{25,31,10},{31,24,1064},{25,31,587},{23,31,41},{18,30,208},{24,31,1064},{18,30,208},{24,31,701},{24,31,701},{24,31,701},{24,31,449},{23,31,581},{22,31,66},{22,31,66},{22,28,26},{18,31,530},{18,29,45},{26,31,61},{26,31,61},{26,31,61},{25,31,10},{30,22,338},{23,31,41},{23,31,41},{21,28,16},{27,27,338},{21,28,16},{29,31,200},{28,31,106},{27,31,1},
+{25,31,1},{29,31,200},{31,29,200},{25,31,1},{0,30,208},{31,29,200},{0,30,208},{24,0,445},{24,0,445},{24,0,445},{24,0,445},{22,31,2},{22,31,2},{22,31,2},{22,27,10},{16,31,5},{16,31,5},{26,31,1542},{26,31,1122},{25,31,833},{25,31,497},{26,31,1647},{24,31,687},{24,31,203},{23,30,122},{22,31,1515},{19,30,110},{28,31,410},{28,31,266},{27,31,116},{27,31,20},{31,26,722},
+{26,31,402},{25,31,100},{21,30,74},{29,29,722},{21,30,74},{25,31,833},{25,31,833},{25,31,833},{25,31,497},{24,31,707},{24,31,203},{24,31,203},{23,29,29},{20,31,619},{19,30,46},{27,31,116},{27,31,116},{27,31,116},{27,31,20},{30,25,338},{25,31,100},{25,31,100},{21,30,10},{30,27,338},{21,30,10},{31,29,61},{29,31,37},{29,31,1},{27,31,4},{31,29,61},{31,30,61},{27,31,4},
+{0,30,73},{31,30,61},{0,30,73},{25,0,433},{25,0,433},{25,0,433},{25,0,433},{23,31,25},{23,31,25},{23,31,25},{23,29,4},{18,31,17},{18,31,17},{27,31,1406},{27,31,1134},{26,31,962},{26,31,602},{27,31,1454},{25,31,702},{25,31,341},{24,31,59},{24,31,1378},{20,31,35},{29,31,318},{28,31,250},{28,31,169},{28,31,61},{30,29,509},{28,31,313},{27,31,164},{22,31,10},{29,30,509},
+{22,31,10},{26,31,962},{26,31,962},{26,31,962},{26,31,602},{26,31,827},{25,31,341},{25,31,341},{24,30,30},{22,31,747},{20,31,35},{28,31,169},{28,31,169},{28,31,169},{28,31,61},{31,26,338},{27,31,164},{27,31,164},{22,31,10},{29,29,338},{22,31,10},{31,30,9},{31,31,9},{30,31,4},{30,31,0},{31,30,9},{30,31,9},{30,31,0},{0,31,9},{30,31,9},{0,31,9},{26,0,433},
+{26,0,433},{26,0,433},{26,0,433},{24,31,50},{24,31,50},{24,31,50},{24,30,5},{20,31,26},{20,31,26},{28,31,1135},{28,31,991},{27,31,874},{27,31,602},{28,31,1162},{26,31,618},{26,31,362},{25,31,5},{25,31,1087},{22,31,58},{30,31,219},{29,31,161},{29,31,125},{29,31,61},{31,29,297},{29,31,193},{28,31,117},{24,31,1},{30,30,297},{24,31,1},{27,31,874},{27,31,874},{27,31,874},
+{27,31,602},{27,31,730},{26,31,362},{26,31,362},{25,31,5},{24,31,681},{22,31,58},{29,31,125},{29,31,125},{29,31,125},{29,31,61},{30,29,221},{28,31,117},{28,31,117},{24,31,1},{31,29,221},{24,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{27,0,433},{27,0,433},{27,0,433},{27,0,433},{25,31,101},
+{25,31,101},{25,31,101},{25,31,5},{22,31,58},{22,31,58},{29,31,885},{28,31,751},{28,31,670},{28,31,526},{28,31,778},{27,31,483},{27,31,314},{26,31,10},{26,31,777},{24,31,117},{30,31,75},{30,31,59},{30,31,50},{30,31,34},{30,31,114},{29,31,81},{29,31,45},{27,31,0},{31,30,114},{27,31,0},{28,31,670},{28,31,670},{28,31,670},{28,31,526},{28,31,553},{27,31,314},{27,31,314},
+{26,31,10},{25,31,518},{24,31,117},{30,31,50},{30,31,50},{30,31,50},{30,31,34},{31,29,85},{29,31,45},{29,31,45},{27,31,0},{29,31,85},{27,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{28,0,445},{28,0,445},{28,0,445},{28,0,445},{27,31,145},{27,31,145},{27,31,145},{26,31,10},{24,31,117},
+{24,31,117},{0,13,884},{0,10,225},{0,7,18},{0,6,265},{0,9,1899},{0,7,1355},{0,6,589},{0,4,1354},{0,5,2124},{0,4,1498},{0,13,884},{0,10,225},{0,7,18},{0,6,265},{4,2,1896},{0,7,1355},{0,6,589},{0,4,1354},{7,1,1896},{0,4,1354},{0,6,0},{0,6,0},{0,6,0},{0,4,4},{0,3,162},{0,3,90},{0,3,90},{0,2,104},{0,2,200},{0,1,134},{0,6,0},
+{0,6,0},{0,6,0},{0,4,4},{0,3,162},{0,3,90},{0,3,90},{0,2,104},{3,0,162},{0,2,104},{6,3,882},{0,10,225},{0,7,18},{0,6,265},{6,3,882},{8,3,882},{0,6,265},{0,5,890},{8,3,882},{0,5,890},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,15,884},{0,12,170},{0,8,8},
+{0,7,202},{0,10,2360},{0,8,1530},{0,7,643},{0,5,1579},{0,6,2684},{0,5,1804},{0,15,884},{0,12,170},{0,8,8},{0,7,202},{5,2,2355},{0,8,1530},{0,7,643},{0,5,1579},{7,2,2355},{0,5,1579},{0,9,1},{0,9,1},{0,9,1},{0,5,1},{0,4,340},{0,4,180},{0,4,180},{0,2,200},{0,2,392},{0,2,236},{0,9,1},{0,9,1},{0,9,1},{0,5,1},{2,1,338},
+{0,4,180},{0,4,180},{0,2,200},{1,2,338},{0,2,200},{8,2,882},{0,12,170},{0,8,8},{0,7,202},{8,2,882},{12,2,882},{0,7,202},{0,6,890},{12,2,882},{0,6,890},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,18,882},{0,14,106},{0,10,52},{0,9,148},{0,12,2899},{0,9,1773},{0,8,725},
+{0,6,1854},{0,7,3348},{0,5,2124},{0,18,882},{0,14,106},{0,10,52},{0,9,148},{2,9,2899},{0,9,1773},{0,8,725},{0,6,1854},{12,0,2899},{0,6,1854},{0,11,1},{0,11,1},{0,11,1},{0,7,1},{0,6,580},{0,5,306},{0,5,306},{0,3,325},{0,3,667},{0,3,406},{0,11,1},{0,11,1},{0,11,1},{0,7,1},{1,4,578},{0,5,306},{0,5,306},{0,3,325},{4,1,578},
+{0,3,325},{9,3,882},{0,14,106},{1,9,8},{0,9,148},{9,3,882},{18,0,882},{0,9,148},{0,7,890},{18,0,882},{0,7,890},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,21,920},{0,16,89},{1,11,120},{0,10,121},{0,14,3051},{0,11,1709},{0,9,557},{0,7,1795},{0,8,3651},{0,6,2174},{1,19,886},
+{0,16,89},{1,11,56},{0,10,121},{8,1,3048},{0,11,1709},{0,9,557},{0,7,1795},{6,5,3048},{0,7,1795},{0,14,37},{0,14,37},{0,14,37},{0,8,37},{0,8,648},{0,7,274},{0,7,274},{0,4,277},{0,4,824},{0,4,421},{1,12,4},{1,12,4},{1,12,4},{1,8,8},{3,3,648},{0,7,274},{0,7,274},{0,4,277},{8,0,648},{0,4,277},{10,4,882},{0,16,53},{2,10,8},
+{0,10,85},{10,4,882},{19,1,882},{0,10,85},{0,8,900},{19,1,882},{0,8,900},{0,0,36},{0,0,36},{0,0,36},{0,0,36},{0,2,0},{0,2,0},{0,2,0},{0,1,1},{0,1,10},{0,1,10},{1,22,995},{1,17,158},{1,12,230},{1,11,186},{0,17,3051},{0,13,1579},{0,10,346},{0,8,1630},{0,9,3924},{0,7,2173},{2,20,885},{2,16,90},{2,12,53},{1,11,122},{8,4,3048},
+{0,13,1579},{0,10,346},{0,8,1630},{14,2,3048},{0,8,1630},{1,15,113},{1,15,113},{1,15,113},{1,9,117},{0,11,648},{0,9,169},{0,9,169},{0,5,200},{0,6,990},{0,5,425},{2,13,2},{2,13,2},{2,13,2},{2,9,2},{6,1,648},{0,9,169},{0,9,169},{0,5,200},{11,0,648},{0,5,200},{13,2,882},{0,18,17},{3,11,18},{0,11,34},{13,2,882},{22,1,882},{0,11,34},
+{0,9,890},{22,1,882},{0,9,890},{1,0,113},{1,0,113},{1,0,113},{1,0,113},{0,5,0},{0,5,0},{0,5,0},{0,3,0},{0,2,61},{0,2,61},{1,24,1173},{1,19,306},{2,13,422},{1,12,318},{0,20,3048},{0,15,1443},{0,12,204},{0,9,1483},{0,11,4212},{0,8,2174},{3,21,885},{3,17,90},{3,13,53},{2,12,117},{12,0,3048},{0,15,1443},{0,12,204},{0,9,1483},{20,0,3048},
+{0,9,1483},{1,18,290},{1,18,290},{1,18,290},{1,11,289},{0,13,650},{0,11,109},{0,11,109},{0,7,148},{0,7,1161},{0,6,473},{3,14,2},{3,14,2},{3,14,2},{3,10,2},{8,0,648},{0,11,109},{0,11,109},{0,7,148},{5,5,648},{0,7,148},{14,3,882},{0,20,8},{4,12,8},{0,12,8},{14,3,882},{26,0,882},{0,12,8},{0,10,890},{26,0,882},{0,10,890},{1,0,289},
+{1,0,289},{1,0,289},{1,0,289},{0,8,1},{0,8,1},{0,8,1},{0,5,4},{0,3,145},{0,3,145},{2,25,1365},{2,20,497},{2,14,713},{2,13,510},{0,23,3051},{0,16,1278},{0,13,86},{0,10,1354},{0,12,4609},{0,9,2228},{4,22,886},{3,19,94},{4,14,56},{3,13,117},{13,1,3048},{0,16,1278},{0,13,86},{0,10,1354},{21,1,3048},{0,10,1354},{2,19,482},{2,19,482},{2,19,482},
+{2,12,481},{0,16,648},{0,12,72},{0,12,72},{0,8,101},{0,8,1352},{0,7,557},{4,15,5},{4,15,5},{4,15,5},{4,11,5},{9,1,648},{0,12,72},{0,12,72},{0,8,101},{16,0,648},{0,8,101},{16,2,882},{1,21,8},{5,13,8},{0,13,5},{16,2,882},{27,1,882},{0,13,5},{0,11,890},{27,1,882},{0,11,890},{2,0,481},{2,0,481},{2,0,481},{2,0,481},{0,10,1},
+{0,10,1},{0,10,1},{0,6,1},{0,5,261},{0,5,261},{2,28,1667},{2,22,793},{3,15,1033},{2,14,793},{0,25,3048},{0,18,1170},{0,14,36},{0,11,1243},{0,14,5005},{0,10,2318},{5,23,886},{4,20,89},{5,15,56},{4,14,121},{14,2,3048},{0,18,1170},{0,14,36},{0,11,1243},{25,0,3048},{0,11,1243},{2,21,786},{2,21,786},{2,21,786},{2,14,789},{0,19,650},{0,14,32},{0,14,32},
+{0,9,50},{0,9,1619},{0,8,661},{5,16,4},{5,16,4},{5,16,4},{5,12,8},{10,2,648},{0,14,32},{0,14,32},{0,9,50},{17,1,648},{0,9,50},{17,3,882},{2,22,8},{6,14,8},{1,14,5},{17,3,882},{28,2,882},{1,14,5},{0,12,900},{28,2,882},{0,12,900},{2,0,785},{2,0,785},{2,0,785},{2,0,785},{0,13,1},{0,13,1},{0,13,1},{0,8,4},{0,6,405},
+{0,6,405},{3,29,1784},{3,23,902},{4,16,1186},{3,15,910},{1,26,3055},{0,20,1095},{1,15,47},{0,12,1159},{0,16,4945},{0,12,2084},{6,24,885},{6,20,90},{6,16,53},{5,15,122},{12,8,3048},{0,20,1059},{0,16,41},{0,12,1123},{28,0,3048},{0,12,1123},{3,23,901},{3,23,901},{3,23,901},{3,15,901},{1,20,652},{1,15,38},{1,15,38},{1,10,44},{0,11,1577},{0,9,545},{6,17,2},
+{6,17,2},{6,17,2},{6,13,2},{13,0,648},{0,16,5},{0,16,5},{0,10,13},{20,1,648},{0,10,13},{17,6,882},{3,23,2},{7,15,18},{3,15,10},{17,6,882},{31,2,882},{3,15,10},{0,13,890},{31,2,882},{0,13,890},{3,0,900},{3,0,900},{3,0,900},{3,0,900},{1,14,4},{1,14,4},{1,14,4},{1,9,5},{0,8,373},{0,8,373},{4,30,1772},{4,24,898},{5,17,1186},
+{4,16,898},{2,27,3055},{1,21,1095},{2,16,33},{1,13,1159},{0,17,4639},{0,13,1730},{7,25,885},{7,21,90},{7,17,53},{6,16,117},{16,4,3048},{0,21,996},{2,16,29},{0,13,1054},{29,1,3048},{0,13,1054},{4,23,891},{4,23,891},{4,23,891},{4,16,894},{2,21,652},{2,16,29},{2,16,29},{2,11,44},{0,13,1452},{0,11,365},{7,18,2},{7,18,2},{7,18,2},{7,14,2},{14,1,648},
+{0,18,1},{0,18,1},{0,11,4},{24,0,648},{0,11,4},{22,0,882},{4,24,8},{8,16,8},{3,16,5},{22,0,882},{30,4,882},{3,16,5},{0,14,890},{30,4,882},{0,14,890},{4,0,890},{4,0,890},{4,0,890},{4,0,890},{2,15,4},{2,15,4},{2,15,4},{2,10,5},{0,9,269},{0,9,269},{5,31,1772},{5,25,898},{6,18,1186},{5,17,898},{3,28,3055},{2,22,1095},{3,17,33},
+{2,14,1159},{0,19,4419},{0,14,1444},{8,26,886},{7,23,94},{8,18,56},{7,17,117},{17,5,3048},{0,23,936},{3,17,29},{0,14,1003},{30,2,3048},{0,14,1003},{5,24,891},{5,24,891},{5,24,891},{5,17,894},{3,22,652},{3,17,29},{3,17,29},{3,12,41},{0,15,1296},{0,12,235},{8,19,5},{8,19,5},{8,19,5},{8,15,5},{16,0,648},{1,19,1},{1,19,1},{1,12,1},{25,1,648},
+{1,12,1},{23,1,882},{5,25,8},{9,17,8},{4,17,5},{23,1,882},{31,5,882},{4,17,5},{0,15,890},{31,5,882},{0,15,890},{5,0,890},{5,0,890},{5,0,890},{5,0,890},{3,16,4},{3,16,4},{3,16,4},{3,11,5},{0,11,185},{0,11,185},{6,31,1790},{6,26,898},{7,19,1186},{6,18,898},{4,29,3057},{3,23,1095},{4,18,45},{3,15,1159},{0,20,4156},{0,15,1226},{9,27,886},
+{8,24,89},{9,19,56},{8,18,121},{18,6,3048},{0,25,909},{4,18,36},{0,15,970},{31,3,3048},{0,15,970},{6,25,891},{6,25,891},{6,25,891},{6,18,894},{4,23,659},{4,18,41},{4,18,41},{3,13,46},{0,16,1137},{0,13,137},{9,20,4},{9,20,4},{9,20,4},{9,16,8},{17,1,648},{2,20,2},{2,20,2},{2,13,1},{26,2,648},{2,13,1},{24,2,882},{6,26,8},{10,18,8},
+{5,18,5},{24,2,882},{27,9,882},{5,18,5},{0,16,900},{27,9,882},{0,16,900},{6,0,890},{6,0,890},{6,0,890},{6,0,890},{4,17,10},{4,17,10},{4,17,10},{4,12,13},{0,13,136},{0,13,136},{8,31,1844},{7,27,902},{8,20,1186},{7,19,910},{5,30,3055},{4,24,1095},{5,19,47},{4,16,1159},{0,22,3940},{0,16,1055},{10,28,885},{10,24,90},{10,20,53},{9,19,122},{22,2,3048},
+{0,27,886},{4,20,41},{0,17,926},{22,10,3048},{0,17,926},{7,27,901},{7,27,901},{7,27,901},{7,19,901},{5,24,652},{5,19,38},{5,19,38},{5,14,44},{0,18,1002},{0,15,110},{10,21,2},{10,21,2},{10,21,2},{10,17,2},{17,4,648},{3,21,4},{3,21,4},{3,14,5},{29,2,648},{3,14,5},{27,0,882},{7,27,2},{11,19,18},{7,19,10},{27,0,882},{30,9,882},{7,19,10},
+{0,17,890},{30,9,882},{0,17,890},{7,0,900},{7,0,900},{7,0,900},{7,0,900},{5,18,4},{5,18,4},{5,18,4},{5,13,5},{0,15,74},{0,15,74},{9,31,1886},{8,28,898},{9,21,1186},{8,20,898},{6,31,3055},{5,25,1095},{6,20,33},{5,17,1159},{0,23,3820},{0,18,963},{11,29,885},{11,25,90},{11,21,53},{10,20,117},{23,3,3048},{1,28,888},{6,20,29},{0,18,899},{28,8,3048},
+{0,18,899},{8,27,891},{8,27,891},{8,27,891},{8,20,894},{6,25,652},{6,20,29},{6,20,29},{6,15,44},{0,20,876},{0,16,102},{11,22,2},{11,22,2},{11,22,2},{11,18,2},{18,5,648},{4,22,1},{4,22,1},{4,15,4},{30,3,648},{4,15,4},{26,4,882},{8,28,8},{12,20,8},{7,20,5},{26,4,882},{31,10,882},{7,20,5},{0,18,890},{31,10,882},{0,18,890},{8,0,890},
+{8,0,890},{8,0,890},{8,0,890},{6,19,4},{6,19,4},{6,19,4},{6,14,5},{0,17,29},{0,17,29},{10,31,1964},{9,29,898},{10,22,1186},{9,21,898},{7,31,3100},{6,26,1095},{7,21,33},{6,18,1159},{0,25,3679},{0,19,899},{12,30,886},{11,27,94},{12,22,56},{11,21,117},{21,9,3048},{2,29,888},{7,21,29},{0,19,890},{29,9,3048},{0,19,890},{9,28,891},{9,28,891},{9,28,891},
+{9,21,894},{7,26,652},{7,21,29},{7,21,29},{7,16,41},{0,22,800},{2,16,98},{12,23,5},{12,23,5},{12,23,5},{12,19,5},{20,4,648},{5,23,1},{5,23,1},{5,16,1},{29,5,648},{5,16,1},{30,0,882},{9,29,8},{13,21,8},{8,21,5},{30,0,882},{30,12,882},{8,21,5},{0,19,890},{30,12,882},{0,19,890},{9,0,890},{9,0,890},{9,0,890},{9,0,890},{7,20,4},
+{7,20,4},{7,20,4},{7,15,5},{0,19,9},{0,19,9},{11,31,2078},{10,30,898},{11,23,1186},{10,22,898},{9,31,3181},{7,27,1095},{8,22,45},{7,19,1159},{0,27,3523},{0,20,908},{13,31,886},{12,28,89},{13,23,56},{12,22,121},{22,10,3048},{3,30,888},{8,22,36},{1,20,901},{30,10,3048},{1,20,901},{10,29,891},{10,29,891},{10,29,891},{10,22,894},{8,27,659},{8,22,41},{8,22,41},
+{7,17,46},{0,23,747},{3,17,98},{13,24,4},{13,24,4},{13,24,4},{13,20,8},{24,0,648},{6,24,2},{6,24,2},{6,17,1},{30,6,648},{6,17,1},{31,1,882},{10,30,8},{14,22,8},{9,22,5},{31,1,882},{31,13,882},{9,22,5},{0,20,900},{31,13,882},{0,20,900},{10,0,890},{10,0,890},{10,0,890},{10,0,890},{8,21,10},{8,21,10},{8,21,10},{8,16,13},{0,20,8},
+{0,20,8},{12,31,2228},{11,31,902},{12,24,1186},{11,23,910},{10,31,3256},{8,28,1095},{9,23,47},{8,20,1159},{0,29,3364},{2,21,894},{14,31,915},{14,28,90},{14,24,53},{13,23,122},{29,1,3048},{4,31,886},{8,24,41},{2,21,890},{26,14,3048},{2,21,890},{11,31,901},{11,31,901},{11,31,901},{11,23,901},{9,28,652},{9,23,38},{9,23,38},{9,18,44},{0,25,705},{3,19,101},{14,25,2},
+{14,25,2},{14,25,2},{14,21,2},{24,3,648},{7,25,4},{7,25,4},{7,18,5},{28,9,648},{7,18,5},{31,4,882},{11,31,2},{15,23,18},{11,23,10},{31,4,882},{24,19,882},{11,23,10},{0,21,890},{24,19,882},{0,21,890},{11,0,900},{11,0,900},{11,0,900},{11,0,900},{9,22,4},{9,22,4},{9,22,4},{9,17,5},{2,21,4},{2,21,4},{13,31,2414},{12,31,907},{13,25,1186},
+{12,24,898},{11,31,3391},{9,29,1095},{10,24,33},{9,21,1159},{0,31,3276},{3,22,894},{15,31,981},{15,29,90},{15,25,53},{14,24,117},{30,2,3048},{6,31,906},{10,24,29},{3,22,890},{27,15,3048},{3,22,890},{12,31,891},{12,31,891},{12,31,891},{12,24,894},{10,29,652},{10,24,29},{10,24,29},{10,19,44},{0,27,665},{4,20,102},{15,26,2},{15,26,2},{15,26,2},{15,22,2},{22,9,648},
+{8,26,1},{8,26,1},{8,19,4},{29,10,648},{8,19,4},{30,8,882},{12,31,17},{16,24,8},{11,24,5},{30,8,882},{28,18,882},{11,24,5},{0,22,890},{28,18,882},{0,22,890},{12,0,890},{12,0,890},{12,0,890},{12,0,890},{10,23,4},{10,23,4},{10,23,4},{10,18,5},{3,22,4},{3,22,4},{15,31,2606},{13,31,987},{14,26,1186},{13,25,898},{13,31,3517},{10,30,1095},{11,25,33},
+{10,22,1159},{1,31,3300},{4,23,899},{17,31,1014},{15,31,94},{16,26,56},{15,25,117},{31,3,3048},{8,31,936},{11,25,29},{4,23,890},{23,19,3048},{4,23,890},{13,31,906},{13,31,906},{13,31,906},{13,25,894},{11,30,652},{11,25,29},{11,25,29},{11,20,41},{0,29,651},{6,20,98},{16,27,5},{16,27,5},{16,27,5},{16,23,5},{24,8,648},{9,27,1},{9,27,1},{9,20,1},{30,11,648},
+{9,20,1},{31,9,882},{14,31,37},{17,25,8},{12,25,5},{31,9,882},{29,19,882},{12,25,5},{0,23,890},{29,19,882},{0,23,890},{13,0,890},{13,0,890},{13,0,890},{13,0,890},{11,24,4},{11,24,4},{11,24,4},{11,19,5},{3,24,5},{3,24,5},{16,31,2792},{15,31,1079},{15,27,1186},{14,26,898},{14,31,3652},{11,31,1095},{12,26,45},{11,23,1159},{3,31,3436},{4,24,908},{18,31,1080},
+{17,31,110},{17,27,56},{16,26,121},{26,14,3048},{10,31,996},{12,26,36},{5,24,901},{22,21,3048},{5,24,901},{14,31,939},{14,31,939},{14,31,939},{14,26,894},{12,31,659},{12,26,41},{12,26,41},{11,21,46},{1,30,651},{7,21,98},{17,28,4},{17,28,4},{17,28,4},{17,24,8},{28,4,648},{10,28,2},{10,28,2},{10,21,1},{29,13,648},{10,21,1},{29,15,882},{16,31,80},{18,26,8},
+{13,26,5},{29,15,882},{30,20,882},{13,26,5},{0,24,900},{30,20,882},{0,24,900},{14,0,890},{14,0,890},{14,0,890},{14,0,890},{12,25,10},{12,25,10},{12,25,10},{12,20,13},{4,24,8},{4,24,8},{17,31,3038},{16,31,1268},{16,28,1186},{15,27,910},{15,31,3879},{12,31,1146},{13,27,47},{12,24,1159},{5,31,3667},{6,25,894},{19,31,1205},{18,31,147},{18,28,53},{17,27,122},{30,10,3048},
+{12,31,1110},{12,28,41},{6,25,890},{30,18,3048},{6,25,890},{16,31,979},{16,31,979},{16,31,979},{15,27,901},{13,31,670},{13,27,38},{13,27,38},{13,22,44},{2,31,648},{7,23,101},{18,29,2},{18,29,2},{18,29,2},{18,25,2},{31,2,648},{11,29,4},{11,29,4},{11,22,5},{27,16,648},{11,22,5},{29,18,882},{18,31,146},{19,27,18},{15,27,10},{29,18,882},{28,23,882},{15,27,10},
+{0,25,890},{28,23,882},{0,25,890},{15,0,900},{15,0,900},{15,0,900},{15,0,900},{13,26,4},{13,26,4},{13,26,4},{13,21,5},{6,25,4},{6,25,4},{18,31,3308},{17,31,1502},{17,29,1186},{16,28,898},{17,31,4077},{14,31,1230},{14,28,33},{13,25,1159},{8,31,3820},{7,26,894},{21,31,1368},{19,31,261},{19,29,53},{18,28,117},{31,11,3048},{14,31,1226},{14,28,29},{7,26,890},{31,19,3048},
+{7,26,890},{17,31,1018},{17,31,1018},{17,31,1018},{16,28,894},{14,31,724},{14,28,29},{14,28,29},{14,23,44},{4,31,665},{8,24,102},{19,30,2},{19,30,2},{19,30,2},{19,26,2},{26,13,648},{12,30,1},{12,30,1},{12,23,4},{21,21,648},{12,23,4},{30,19,882},{20,31,193},{20,28,8},{15,28,5},{30,19,882},{27,25,882},{15,28,5},{0,26,890},{27,25,882},{0,26,890},{16,0,890},
+{16,0,890},{16,0,890},{16,0,890},{14,27,4},{14,27,4},{14,27,4},{14,22,5},{7,26,4},{7,26,4},{19,31,3614},{18,31,1804},{18,30,1186},{17,29,898},{18,31,4284},{15,31,1417},{15,29,33},{14,26,1159},{9,31,4036},{8,27,899},{22,31,1494},{20,31,405},{20,30,56},{19,29,117},{29,17,3048},{16,31,1395},{15,29,29},{8,27,890},{27,23,3048},{8,27,890},{18,31,1075},{18,31,1075},{18,31,1075},
+{17,29,894},{16,31,787},{15,29,29},{15,29,29},{15,24,41},{6,31,705},{10,24,98},{20,31,5},{20,31,5},{20,31,5},{20,27,5},{28,12,648},{13,31,1},{13,31,1},{13,24,1},{22,22,648},{13,24,1},{29,23,882},{22,31,277},{21,29,8},{16,29,5},{29,23,882},{28,26,882},{16,29,5},{0,27,890},{28,26,882},{0,27,890},{17,0,890},{17,0,890},{17,0,890},{17,0,890},{15,28,4},
+{15,28,4},{15,28,4},{15,23,5},{7,28,5},{7,28,5},{20,31,4014},{19,31,2174},{19,31,1186},{18,30,898},{19,31,4545},{17,31,1725},{16,30,45},{15,27,1159},{11,31,4300},{8,28,908},{23,31,1656},{22,31,585},{21,31,56},{20,30,121},{30,18,3048},{18,31,1563},{16,30,36},{9,28,901},{26,25,3048},{9,28,901},{19,31,1150},{19,31,1150},{19,31,1150},{18,30,894},{17,31,841},{16,30,41},{16,30,41},
+{15,25,46},{8,31,747},{11,25,98},{21,31,20},{21,31,20},{21,31,20},{21,28,8},{29,13,648},{15,31,5},{15,31,5},{14,25,1},{28,20,648},{14,25,1},{30,24,882},{23,31,397},{22,30,8},{17,30,5},{30,24,882},{29,27,882},{17,30,5},{0,28,900},{29,27,882},{0,28,900},{18,0,890},{18,0,890},{18,0,890},{18,0,890},{16,29,10},{16,29,10},{16,29,10},{16,24,13},{8,28,8},
+{8,28,8},{22,31,4123},{21,31,2404},{20,31,1278},{19,31,901},{20,31,4626},{18,31,1849},{17,31,38},{16,28,1006},{14,31,4330},{10,29,789},{24,31,1629},{23,31,715},{22,31,65},{22,30,101},{31,19,2814},{20,31,1505},{17,31,34},{10,29,785},{27,26,2814},{10,29,785},{20,31,1278},{20,31,1278},{20,31,1278},{19,31,901},{18,31,948},{17,31,38},{17,31,38},{17,26,44},{10,31,840},{11,27,101},{22,31,65},
+{22,31,65},{22,31,65},{22,29,2},{29,16,648},{17,31,34},{17,31,34},{15,26,5},{31,20,648},{15,26,5},{31,25,761},{25,31,425},{23,31,9},{19,31,1},{31,25,761},{28,29,761},{19,31,1},{0,29,785},{28,29,761},{0,29,785},{19,0,900},{19,0,900},{19,0,900},{19,0,900},{17,30,4},{17,30,4},{17,30,4},{17,25,5},{10,29,4},{10,29,4},{23,31,3735},{22,31,2314},{21,31,1395},
+{20,31,899},{22,31,4090},{19,31,1618},{18,31,104},{17,29,686},{15,31,3826},{11,29,507},{25,31,1285},{24,31,609},{23,31,122},{23,31,37},{30,22,2249},{21,31,1186},{19,31,74},{13,29,482},{27,27,2249},{13,29,482},{21,31,1395},{21,31,1395},{21,31,1395},{20,31,899},{19,31,1086},{18,31,104},{18,31,104},{18,27,44},{12,31,969},{12,28,102},{23,31,122},{23,31,122},{23,31,122},{23,30,2},{30,17,648},
+{19,31,74},{19,31,74},{16,27,4},{25,25,648},{16,27,4},{31,26,481},{27,31,269},{25,31,0},{21,31,0},{31,26,481},{31,28,481},{21,31,0},{0,29,481},{31,28,481},{0,29,481},{20,0,890},{20,0,890},{20,0,890},{20,0,890},{18,31,4},{18,31,4},{18,31,4},{18,26,5},{11,30,4},{11,30,4},{23,31,3399},{23,31,2260},{22,31,1530},{21,31,954},{23,31,3639},{20,31,1402},{19,31,238},
+{18,29,405},{17,31,3443},{12,30,314},{26,31,1009},{25,31,525},{25,31,164},{24,31,5},{30,24,1769},{23,31,918},{21,31,113},{14,30,290},{29,27,1769},{14,30,290},{22,31,1530},{22,31,1530},{22,31,1530},{21,31,954},{21,31,1251},{19,31,238},{19,31,238},{19,28,41},{14,31,1105},{14,28,98},{25,31,164},{25,31,164},{25,31,164},{24,31,5},{31,18,648},{21,31,113},{21,31,113},{17,28,1},{26,26,648},
+{17,28,1},{30,29,265},{28,31,145},{27,31,4},{24,31,1},{30,29,265},{29,30,265},{24,31,1},{0,30,289},{29,30,265},{0,30,289},{21,0,890},{21,0,890},{21,0,890},{21,0,890},{19,31,13},{19,31,13},{19,31,13},{19,27,5},{12,31,9},{12,31,9},{24,31,3069},{24,31,2257},{23,31,1683},{23,31,1054},{24,31,3258},{22,31,1330},{21,31,378},{19,30,213},{20,31,3102},{14,30,166},{27,31,801},
+{26,31,477},{26,31,221},{25,31,20},{31,24,1374},{24,31,758},{23,31,181},{16,30,114},{24,31,1374},{16,30,114},{23,31,1683},{23,31,1683},{23,31,1683},{23,31,1054},{22,31,1401},{21,31,378},{21,31,378},{19,29,46},{16,31,1296},{15,29,98},{26,31,221},{26,31,221},{26,31,221},{25,31,20},{30,22,648},{23,31,181},{23,31,181},{18,29,1},{27,27,648},{18,29,1},{30,30,113},{29,31,61},{28,31,0},
+{26,31,1},{30,30,113},{30,30,113},{26,31,1},{0,30,113},{30,30,113},{0,30,113},{22,0,890},{22,0,890},{22,0,890},{22,0,890},{20,31,45},{20,31,45},{20,31,45},{20,28,13},{13,31,25},{13,31,25},{25,31,2860},{25,31,2260},{24,31,1854},{24,31,1210},{25,31,2932},{23,31,1310},{22,31,609},{21,30,108},{21,31,2731},{15,31,101},{28,31,630},{27,31,475},{27,31,306},{26,31,101},{31,26,1032},
+{26,31,612},{25,31,290},{18,31,37},{29,29,1032},{18,31,37},{24,31,1854},{24,31,1854},{24,31,1854},{24,31,1210},{23,31,1620},{22,31,609},{22,31,609},{21,30,44},{18,31,1515},{15,31,101},{27,31,306},{27,31,306},{27,31,306},{26,31,101},{30,25,648},{25,31,290},{25,31,290},{19,30,5},{30,27,648},{19,30,5},{31,30,18},{30,31,10},{30,31,1},{29,31,0},{31,30,18},{30,31,18},{29,31,0},
+{0,31,36},{30,31,18},{0,31,36},{23,0,900},{23,0,900},{23,0,900},{23,0,900},{21,31,104},{21,31,104},{21,31,104},{21,29,5},{15,31,65},{15,31,65},{26,31,2626},{26,31,2206},{25,31,1915},{25,31,1315},{26,31,2641},{24,31,1333},{23,31,789},{22,31,40},{22,31,2445},{17,31,116},{29,31,524},{28,31,406},{28,31,325},{27,31,170},{30,29,771},{27,31,507},{26,31,320},{20,31,0},{29,30,771},
+{20,31,0},{25,31,1915},{25,31,1915},{25,31,1915},{25,31,1315},{24,31,1661},{23,31,789},{23,31,789},{22,31,40},{20,31,1517},{17,31,116},{28,31,325},{28,31,325},{28,31,325},{27,31,170},{31,26,580},{26,31,320},{26,31,320},{20,31,0},{30,28,580},{20,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{24,0,890},
+{24,0,890},{24,0,890},{24,0,890},{23,31,164},{23,31,164},{23,31,164},{22,30,5},{17,31,116},{17,31,116},{27,31,2156},{27,31,1884},{26,31,1630},{26,31,1210},{26,31,2081},{25,31,1108},{24,31,705},{23,31,5},{23,31,1927},{19,31,180},{29,31,300},{29,31,236},{29,31,200},{28,31,85},{31,28,451},{28,31,283},{27,31,194},{23,31,1},{28,31,451},{23,31,1},{26,31,1630},{26,31,1630},{26,31,1630},
+{26,31,1210},{25,31,1347},{24,31,705},{24,31,705},{23,31,5},{22,31,1229},{19,31,180},{29,31,200},{29,31,200},{29,31,200},{28,31,85},{31,27,338},{27,31,194},{27,31,194},{23,31,1},{30,29,338},{23,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{25,0,890},{25,0,890},{25,0,890},{25,0,890},{24,31,221},
+{24,31,221},{24,31,221},{23,31,5},{19,31,180},{19,31,180},{28,31,1782},{27,31,1564},{27,31,1395},{27,31,1123},{27,31,1620},{26,31,937},{25,31,651},{24,31,25},{24,31,1560},{21,31,233},{30,31,150},{30,31,134},{29,31,104},{29,31,40},{31,29,216},{29,31,136},{28,31,90},{25,31,1},{29,31,216},{25,31,1},{27,31,1395},{27,31,1395},{27,31,1395},{27,31,1123},{26,31,1101},{25,31,651},{25,31,651},
+{24,31,25},{23,31,998},{21,31,233},{29,31,104},{29,31,104},{29,31,104},{29,31,40},{31,28,162},{28,31,90},{28,31,90},{25,31,1},{28,31,162},{25,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{26,0,890},{26,0,890},{26,0,890},{26,0,890},{25,31,290},{25,31,290},{25,31,290},{24,31,25},{21,31,233},
+{21,31,233},{0,17,1568},{0,14,442},{0,10,40},{0,8,485},{0,11,3379},{0,9,2369},{0,8,1061},{0,5,2435},{0,6,3760},{0,5,2660},{0,17,1568},{0,14,442},{0,10,40},{0,8,485},{7,0,3371},{0,9,2369},{0,8,1061},{0,5,2435},{10,1,3371},{0,5,2435},{0,8,0},{0,8,0},{0,8,0},{0,5,1},{0,4,288},{0,4,160},{0,4,160},{0,2,164},{0,2,332},{0,2,200},{0,8,0},
+{0,8,0},{0,8,0},{0,5,1},{0,4,288},{0,4,160},{0,4,160},{0,2,164},{4,0,288},{0,2,164},{9,2,1568},{0,14,442},{0,10,40},{0,8,485},{9,2,1568},{17,0,1568},{0,8,485},{0,6,1586},{17,0,1568},{0,6,1586},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,20,1570},{0,16,325},{0,11,5},
+{0,9,392},{0,13,3968},{0,10,2630},{0,9,1121},{0,6,2710},{0,7,4484},{0,6,3034},{0,20,1570},{0,16,325},{0,11,5},{0,9,392},{2,10,3968},{0,10,2630},{0,9,1121},{0,6,2710},{13,0,3968},{0,6,2710},{0,11,1},{0,11,1},{0,11,1},{0,6,4},{0,5,514},{0,5,274},{0,5,274},{0,3,289},{0,3,595},{0,3,370},{0,11,1},{0,11,1},{0,11,1},{0,6,4},{2,2,512},
+{0,5,274},{0,5,274},{0,3,289},{2,2,512},{0,3,289},{10,3,1568},{0,16,325},{0,11,5},{0,9,392},{10,3,1568},{18,1,1568},{0,9,392},{0,7,1586},{18,1,1568},{0,7,1586},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,22,1570},{0,17,225},{0,12,18},{0,11,292},{0,15,4652},{0,11,2945},{0,10,1217},
+{0,7,3035},{0,8,5283},{0,7,3476},{0,22,1570},{0,17,225},{0,12,18},{0,11,292},{4,8,4651},{0,11,2945},{0,10,1217},{0,7,3035},{13,1,4651},{0,7,3035},{0,13,0},{0,13,0},{0,13,0},{0,8,1},{0,7,802},{0,6,424},{0,6,424},{0,4,449},{0,3,931},{0,3,562},{0,13,0},{0,13,0},{0,13,0},{0,8,1},{4,0,800},{0,6,424},{0,6,424},{0,4,449},{5,1,800},
+{0,4,449},{11,4,1568},{0,17,225},{1,12,13},{0,11,292},{11,4,1568},{19,2,1568},{0,11,292},{0,8,1576},{19,2,1568},{0,8,1576},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,25,1570},{0,19,149},{0,13,73},{0,12,194},{0,17,5424},{0,13,3368},{0,11,1349},{0,8,3449},{0,9,6213},{0,7,3956},{0,25,1570},
+{0,19,149},{0,13,73},{0,12,194},{10,0,5419},{0,13,3368},{0,11,1349},{0,8,3449},{15,1,5419},{0,8,3449},{0,16,1},{0,16,1},{0,16,1},{0,9,4},{0,8,1152},{0,7,610},{0,7,610},{0,4,625},{0,4,1328},{0,4,769},{0,16,1},{0,16,1},{0,16,1},{0,9,4},{3,3,1152},{0,7,610},{0,7,610},{0,4,625},{8,0,1152},{0,4,625},{15,0,1568},{0,19,149},{2,13,13},
+{0,12,194},{15,0,1568},{20,3,1568},{0,12,194},{0,9,1576},{20,3,1568},{0,9,1576},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{1,26,1633},{0,21,155},{1,14,150},{0,13,198},{0,20,5424},{0,15,3099},{0,12,996},{0,9,3179},{0,10,6544},{0,8,3890},{1,26,1569},{1,20,131},{2,14,69},{1,13,181},{10,3,5419},
+{0,15,3099},{0,12,996},{0,9,3179},{18,1,5419},{0,9,3179},{1,17,65},{1,17,65},{1,17,65},{1,11,69},{0,11,1152},{0,9,445},{0,9,445},{0,6,505},{0,6,1494},{0,5,737},{1,17,1},{1,17,1},{1,17,1},{1,11,5},{6,1,1152},{0,9,445},{0,9,445},{0,6,505},{11,0,1152},{0,6,505},{16,1,1568},{0,21,74},{3,14,5},{0,13,117},{16,1,1568},{26,1,1568},{0,13,117},
+{0,10,1586},{26,1,1568},{0,10,1586},{1,0,65},{1,0,65},{1,0,65},{1,0,65},{0,3,0},{0,3,0},{0,3,0},{0,2,1},{0,1,25},{0,1,25},{1,29,1715},{1,22,219},{2,15,342},{1,14,262},{0,22,5420},{0,16,2834},{0,13,726},{0,10,2966},{0,11,6916},{0,9,3860},{2,27,1569},{2,21,131},{3,15,69},{2,14,181},{11,4,5419},{0,16,2834},{0,13,726},{0,10,2966},{19,2,5419},
+{0,10,2966},{1,20,146},{1,20,146},{1,20,146},{1,12,146},{0,13,1154},{0,11,337},{0,11,337},{0,7,388},{0,7,1665},{0,6,749},{2,18,1},{2,18,1},{2,18,1},{2,12,5},{8,0,1152},{0,11,337},{0,11,337},{0,7,388},{5,5,1152},{0,7,388},{17,2,1568},{0,23,34},{4,15,5},{0,14,72},{17,2,1568},{27,2,1568},{0,14,72},{0,11,1586},{27,2,1568},{0,11,1586},{1,0,145},
+{1,0,145},{1,0,145},{1,0,145},{0,6,1},{0,6,1},{0,6,1},{0,3,4},{0,2,85},{0,2,85},{2,30,1907},{1,24,398},{2,16,542},{1,15,425},{0,25,5424},{0,18,2630},{0,15,486},{0,11,2771},{0,13,7299},{0,11,3860},{3,28,1569},{3,22,131},{4,16,82},{3,15,181},{15,0,5419},{0,18,2630},{0,15,486},{0,11,2771},{20,3,5419},{0,11,2771},{2,21,338},{2,21,338},{2,21,338},
+{2,13,338},{0,16,1152},{0,13,274},{0,13,274},{0,8,305},{0,8,1856},{0,7,797},{3,19,1},{3,19,1},{3,19,1},{3,13,5},{9,1,1152},{0,13,274},{0,13,274},{0,8,305},{16,0,1152},{0,8,305},{18,3,1568},{0,25,17},{5,16,13},{0,15,45},{18,3,1568},{28,3,1568},{0,15,45},{0,12,1576},{28,3,1568},{0,12,1576},{2,0,337},{2,0,337},{2,0,337},{2,0,337},{0,8,1},
+{0,8,1},{0,8,1},{0,5,0},{0,4,169},{0,4,169},{2,31,2145},{2,25,590},{3,17,862},{2,16,619},{0,27,5420},{0,20,2424},{0,16,282},{0,12,2552},{0,15,7711},{0,11,3908},{4,29,1570},{4,23,149},{4,17,73},{4,16,194},{14,4,5419},{0,20,2424},{0,16,282},{0,12,2552},{24,2,5419},{0,12,2552},{2,23,546},{2,23,546},{2,23,546},{2,15,546},{0,19,1154},{0,15,194},{0,15,194},
+{0,9,218},{0,9,2123},{0,8,865},{4,20,1},{4,20,1},{4,20,1},{4,13,4},{10,2,1152},{0,15,194},{0,15,194},{0,9,218},{17,1,1152},{0,9,218},{20,2,1568},{0,27,5},{6,17,13},{0,16,26},{20,2,1568},{24,7,1568},{0,16,26},{0,13,1576},{24,7,1568},{0,13,1576},{2,0,545},{2,0,545},{2,0,545},{2,0,545},{0,11,0},{0,11,0},{0,11,0},{0,7,4},{0,5,289},
+{0,5,289},{3,31,2596},{3,26,941},{3,19,1289},{3,17,972},{0,30,5420},{0,22,2243},{0,17,145},{0,13,2386},{0,16,8161},{0,13,3986},{5,30,1569},{5,24,131},{6,18,69},{5,17,181},{17,2,5419},{0,22,2243},{0,17,145},{0,13,2386},{27,2,5419},{0,13,2386},{3,24,901},{3,24,901},{3,24,901},{3,16,901},{0,22,1154},{0,17,109},{0,17,109},{0,10,145},{0,11,2441},{0,9,1001},{5,21,1},
+{5,21,1},{5,21,1},{5,15,5},{13,0,1152},{0,17,109},{0,17,109},{0,10,145},{20,1,1152},{0,10,145},{23,0,1568},{1,28,2},{7,18,5},{0,18,8},{23,0,1568},{30,5,1568},{0,18,8},{0,14,1586},{30,5,1568},{0,14,1586},{3,0,900},{3,0,900},{3,0,900},{3,0,900},{0,14,1},{0,14,1},{0,14,1},{0,8,1},{0,6,468},{0,6,468},{4,31,3146},{3,28,1262},{4,19,1743},
+{3,18,1297},{1,31,5484},{0,23,2096},{0,19,69},{0,14,2251},{0,18,8669},{0,14,4100},{6,31,1569},{6,25,131},{7,19,69},{6,18,181},{18,3,5419},{0,23,2096},{0,19,69},{0,14,2251},{28,3,5419},{0,14,2251},{3,27,1252},{3,27,1252},{3,27,1252},{3,17,1256},{0,24,1152},{0,18,61},{0,18,61},{0,11,100},{0,12,2859},{0,10,1157},{6,22,1},{6,22,1},{6,22,1},{6,16,5},{14,1,1152},
+{0,18,61},{0,18,61},{0,11,100},{24,0,1152},{0,11,100},{24,1,1568},{2,29,2},{8,19,5},{0,19,5},{24,1,1568},{31,6,1568},{0,19,5},{0,15,1586},{31,6,1568},{0,15,1586},{3,0,1252},{3,0,1252},{3,0,1252},{3,0,1252},{0,16,1},{0,16,1},{0,16,1},{0,10,1},{0,8,657},{0,8,657},{5,31,3716},{4,29,1603},{4,21,2148},{4,19,1631},{2,31,5655},{0,25,2005},{0,20,31},
+{0,15,2138},{0,19,8963},{0,15,4070},{7,31,1587},{7,26,131},{8,20,82},{7,19,181},{20,2,5419},{0,25,2001},{0,20,27},{0,15,2134},{24,7,5419},{0,15,2134},{4,28,1587},{4,28,1587},{4,28,1587},{4,18,1590},{0,27,1158},{0,20,22},{0,20,22},{0,12,62},{0,14,3075},{0,11,1221},{7,23,1},{7,23,1},{7,23,1},{7,17,5},{16,0,1152},{0,20,18},{0,20,18},{0,12,58},{25,1,1152},
+{0,12,58},{25,2,1568},{3,30,2},{9,20,13},{1,20,9},{25,2,1568},{27,10,1568},{1,20,9},{0,16,1576},{27,10,1568},{0,16,1576},{4,0,1586},{4,0,1586},{4,0,1586},{4,0,1586},{0,19,4},{0,19,4},{0,19,4},{0,11,8},{0,9,769},{0,9,769},{6,31,3890},{5,30,1603},{5,22,2148},{5,20,1627},{3,31,5748},{0,27,1989},{1,21,31},{0,17,2117},{0,21,8560},{0,16,3545},{9,31,1634},
+{8,27,149},{8,21,73},{8,20,194},{21,3,5419},{0,27,1889},{1,21,27},{0,17,2017},{28,6,5419},{0,17,2017},{5,29,1587},{5,29,1587},{5,29,1587},{5,19,1590},{1,28,1158},{1,21,22},{1,21,22},{1,13,62},{0,16,2801},{0,13,949},{8,24,1},{8,24,1},{8,24,1},{8,17,4},{17,1,1152},{0,22,2},{0,22,2},{0,14,26},{26,2,1152},{0,14,26},{26,3,1568},{4,31,5},{10,21,13},
+{2,21,9},{26,3,1568},{28,11,1568},{2,21,9},{0,17,1576},{28,11,1568},{0,17,1576},{5,0,1586},{5,0,1586},{5,0,1586},{5,0,1586},{1,20,4},{1,20,4},{1,20,4},{1,12,8},{0,10,625},{0,10,625},{7,31,4136},{6,31,1589},{7,23,2157},{6,21,1621},{4,31,5895},{1,28,1977},{2,22,33},{1,18,2107},{0,23,8196},{0,17,3043},{10,31,1667},{9,28,131},{10,22,69},{9,21,181},{24,1,5419},
+{0,29,1772},{2,22,24},{0,18,1875},{31,6,5419},{0,18,1875},{6,30,1576},{6,30,1576},{6,30,1576},{6,20,1580},{2,29,1161},{2,22,29},{2,22,29},{2,15,58},{0,17,2529},{0,14,656},{9,25,1},{9,25,1},{9,25,1},{9,19,5},{17,4,1152},{0,24,1},{0,24,1},{0,15,1},{29,2,1152},{0,15,1},{27,4,1568},{6,31,13},{11,22,5},{3,22,5},{27,4,1568},{31,11,1568},{3,22,5},
+{0,18,1586},{31,11,1568},{0,18,1586},{6,0,1576},{6,0,1576},{6,0,1576},{6,0,1576},{2,21,10},{2,21,10},{2,21,10},{2,14,13},{0,12,520},{0,12,520},{8,31,4436},{7,31,1625},{8,23,2175},{7,22,1621},{6,31,6079},{3,28,1973},{3,23,33},{2,19,2107},{0,24,7969},{0,18,2675},{11,31,1745},{10,29,131},{11,23,69},{10,22,181},{25,2,5419},{0,30,1699},{3,23,24},{0,19,1782},{27,10,5419},
+{0,19,1782},{7,31,1576},{7,31,1576},{7,31,1576},{7,21,1580},{3,30,1161},{3,23,29},{3,23,29},{3,16,74},{0,19,2313},{0,15,474},{10,26,1},{10,26,1},{10,26,1},{10,20,5},{18,5,1152},{1,25,1},{1,25,1},{0,16,2},{30,3,1152},{0,16,2},{31,0,1568},{8,31,34},{12,23,5},{4,23,5},{31,0,1568},{30,13,1568},{4,23,5},{0,19,1586},{30,13,1568},{0,19,1586},{7,0,1576},
+{7,0,1576},{7,0,1576},{7,0,1576},{3,22,10},{3,22,10},{3,22,10},{3,15,13},{0,14,400},{0,14,400},{9,31,4730},{8,31,1716},{8,25,2148},{8,23,1631},{7,31,6244},{3,30,1977},{4,24,31},{4,19,2138},{0,26,7669},{0,19,2375},{12,31,1832},{11,30,131},{12,24,82},{11,23,181},{26,3,5419},{1,31,1699},{4,24,27},{0,20,1720},{28,11,5419},{0,20,1720},{8,31,1595},{8,31,1595},{8,31,1595},
+{8,22,1590},{4,31,1158},{4,24,22},{4,24,22},{4,16,62},{0,21,2091},{0,17,306},{11,27,1},{11,27,1},{11,27,1},{11,21,5},{20,4,1152},{2,26,1},{2,26,1},{1,17,2},{29,5,1152},{1,17,2},{29,6,1568},{9,31,68},{13,24,13},{5,24,9},{29,6,1568},{31,14,1568},{5,24,9},{0,20,1576},{31,14,1568},{0,20,1576},{8,0,1586},{8,0,1586},{8,0,1586},{8,0,1586},{4,23,4},
+{4,23,4},{4,23,4},{4,15,8},{0,16,277},{0,16,277},{11,31,5010},{9,31,1878},{9,26,2148},{9,24,1627},{8,31,6508},{4,31,1989},{5,25,31},{4,21,2117},{0,28,7364},{0,21,2098},{14,31,1952},{12,31,149},{12,25,73},{12,24,194},{28,2,5419},{3,31,1787},{5,25,27},{0,21,1657},{27,13,5419},{0,21,1657},{9,31,1622},{9,31,1622},{9,31,1622},{9,23,1590},{5,31,1164},{5,25,22},{5,25,22},
+{5,17,62},{0,22,1928},{0,18,194},{12,28,1},{12,28,1},{12,28,1},{12,21,4},{24,0,1152},{3,27,1},{3,27,1},{2,18,2},{30,6,1152},{2,18,2},{30,7,1568},{11,31,116},{14,25,13},{6,25,9},{30,7,1568},{30,16,1568},{6,25,9},{0,21,1576},{30,16,1568},{0,21,1576},{9,0,1586},{9,0,1586},{9,0,1586},{9,0,1586},{5,24,4},{5,24,4},{5,24,4},{5,16,8},{0,18,193},
+{0,18,193},{12,31,5316},{11,31,2154},{11,27,2157},{10,25,1621},{10,31,6800},{6,31,1999},{6,26,33},{5,22,2107},{0,29,7068},{0,22,1836},{15,31,2081},{13,31,206},{14,26,69},{13,25,181},{31,0,5419},{5,31,1937},{6,26,24},{0,22,1611},{30,13,5419},{0,22,1611},{10,31,1676},{10,31,1676},{10,31,1676},{10,24,1580},{7,31,1179},{6,26,29},{6,26,29},{6,19,58},{0,24,1798},{0,19,157},{13,29,1},
+{13,29,1},{13,29,1},{13,23,5},{24,3,1152},{4,28,1},{4,28,1},{4,19,1},{28,9,1152},{4,19,1},{31,8,1568},{13,31,205},{15,26,5},{7,26,5},{31,8,1568},{28,19,1568},{7,26,5},{0,22,1586},{28,19,1568},{0,22,1586},{10,0,1576},{10,0,1576},{10,0,1576},{10,0,1576},{6,25,10},{6,25,10},{6,25,10},{6,18,13},{0,20,106},{0,20,106},{13,31,5658},{12,31,2435},{12,27,2175},
+{11,26,1621},{11,31,7055},{7,31,2090},{7,27,33},{6,23,2107},{0,31,6820},{0,23,1690},{16,31,2216},{14,31,334},{15,27,69},{14,26,181},{29,6,5419},{7,31,2081},{7,27,24},{0,23,1590},{31,14,5419},{0,23,1590},{11,31,1745},{11,31,1745},{11,31,1745},{11,25,1580},{8,31,1220},{7,27,29},{7,27,29},{7,20,74},{0,26,1650},{1,20,137},{14,30,1},{14,30,1},{14,30,1},{14,24,5},{22,9,1152},
+{5,29,1},{5,29,1},{4,20,2},{29,10,1152},{4,20,2},{29,14,1568},{15,31,289},{16,27,5},{8,27,5},{29,14,1568},{29,20,1568},{8,27,5},{0,23,1586},{29,20,1568},{0,23,1586},{11,0,1576},{11,0,1576},{11,0,1576},{11,0,1576},{7,26,10},{7,26,10},{7,26,10},{7,19,13},{0,22,58},{0,22,58},{14,31,6036},{13,31,2751},{12,29,2148},{12,27,1631},{12,31,7316},{8,31,2228},{8,28,31},
+{8,23,2138},{0,31,6884},{0,24,1613},{17,31,2402},{16,31,500},{16,28,82},{15,27,181},{30,7,5419},{9,31,2195},{8,28,27},{0,24,1577},{30,16,5419},{0,24,1577},{12,31,1811},{12,31,1811},{12,31,1811},{12,26,1590},{9,31,1286},{8,28,22},{8,28,22},{8,20,62},{0,28,1508},{2,21,137},{15,31,1},{15,31,1},{15,31,1},{15,25,5},{24,8,1152},{6,30,1},{6,30,1},{5,21,2},{30,11,1152},
+{5,21,2},{30,15,1568},{17,31,410},{17,28,13},{9,28,9},{30,15,1568},{30,21,1568},{9,28,9},{0,24,1576},{30,21,1568},{0,24,1576},{12,0,1586},{12,0,1586},{12,0,1586},{12,0,1586},{8,27,4},{8,27,4},{8,27,4},{8,19,8},{0,24,37},{0,24,37},{15,31,6450},{14,31,3135},{13,30,2148},{13,28,1627},{13,31,7661},{10,31,2448},{9,29,31},{8,25,2117},{2,31,7196},{0,25,1593},{19,31,2594},
+{17,31,698},{16,29,73},{16,28,194},{29,11,5419},{11,31,2379},{9,29,27},{1,25,1577},{31,17,5419},{1,25,1577},{13,31,1910},{13,31,1910},{13,31,1910},{13,27,1590},{10,31,1388},{9,29,22},{9,29,22},{9,21,62},{0,30,1416},{3,22,137},{16,31,4},{16,31,4},{16,31,4},{16,25,4},{28,4,1152},{7,31,1},{7,31,1},{6,22,2},{29,13,1152},{6,22,2},{31,16,1568},{19,31,530},{18,29,13},
+{10,29,9},{31,16,1568},{31,22,1568},{10,29,9},{0,25,1576},{31,22,1568},{0,25,1576},{13,0,1586},{13,0,1586},{13,0,1586},{13,0,1586},{9,28,4},{9,28,4},{9,28,4},{9,20,8},{0,25,17},{0,25,17},{16,31,6900},{15,31,3657},{15,31,2157},{14,29,1621},{15,31,8023},{11,31,2845},{10,30,33},{9,26,2107},{5,31,7651},{1,26,1611},{20,31,2866},{18,31,1011},{18,30,69},{17,29,181},{29,14,5419},
+{13,31,2657},{10,30,24},{3,26,1587},{29,20,5419},{3,26,1587},{15,31,2057},{15,31,2057},{15,31,2057},{14,28,1580},{12,31,1476},{10,30,29},{10,30,29},{10,23,58},{0,31,1324},{4,23,157},{17,31,37},{17,31,37},{17,31,37},{17,27,5},{31,2,1152},{9,31,4},{9,31,4},{8,23,1},{27,16,1152},{8,23,1},{31,19,1568},{21,31,637},{19,30,5},{11,30,5},{31,19,1568},{27,26,1568},{11,30,5},
+{0,26,1586},{27,26,1568},{0,26,1586},{14,0,1576},{14,0,1576},{14,0,1576},{14,0,1576},{10,29,10},{10,29,10},{10,29,10},{10,22,13},{1,27,9},{1,27,9},{18,31,7332},{16,31,4196},{16,31,2175},{15,30,1621},{16,31,8348},{13,31,3285},{11,31,33},{10,27,2107},{8,31,8004},{2,27,1611},{21,31,3112},{20,31,1281},{19,31,69},{18,30,181},{30,15,5419},{15,31,2897},{11,31,24},{3,27,1590},{30,21,5419},
+{3,27,1590},{16,31,2171},{16,31,2171},{16,31,2171},{15,29,1580},{13,31,1590},{11,31,29},{11,31,29},{11,24,74},{2,31,1424},{5,24,137},{19,31,65},{19,31,65},{19,31,65},{18,28,5},{26,13,1152},{11,31,20},{11,31,20},{8,24,2},{21,21,1152},{8,24,2},{30,23,1568},{23,31,785},{20,31,5},{12,31,5},{30,23,1568},{28,27,1568},{12,31,5},{0,27,1586},{28,27,1568},{0,27,1586},{15,0,1576},
+{15,0,1576},{15,0,1576},{15,0,1576},{11,30,10},{11,30,10},{11,30,10},{11,23,13},{1,28,8},{1,28,8},{19,31,7014},{17,31,4230},{17,31,2294},{16,31,1595},{17,31,7865},{14,31,3114},{12,31,85},{12,27,1706},{8,31,7436},{3,28,1268},{22,31,2794},{21,31,1221},{20,31,113},{19,30,114},{30,17,4803},{17,31,2648},{13,31,61},{4,28,1253},{25,25,4803},{4,28,1253},{17,31,2294},{17,31,2294},{17,31,2294},
+{16,30,1590},{14,31,1740},{12,31,85},{12,31,85},{12,24,62},{3,31,1571},{6,25,137},{20,31,113},{20,31,113},{20,31,113},{19,29,5},{28,12,1152},{13,31,61},{13,31,61},{9,25,2},{22,22,1152},{9,25,2},{31,23,1250},{24,31,680},{21,31,4},{15,31,0},{31,23,1250},{23,31,1250},{15,31,0},{0,28,1252},{23,31,1250},{0,28,1252},{16,0,1586},{16,0,1586},{16,0,1586},{16,0,1586},{12,31,4},
+{12,31,4},{12,31,4},{12,23,8},{2,29,8},{2,29,8},{19,31,6534},{18,31,4116},{18,31,2435},{17,31,1590},{18,31,7164},{15,31,2809},{14,31,161},{12,28,1256},{10,31,6748},{5,28,945},{23,31,2340},{22,31,1065},{21,31,164},{20,31,64},{31,17,4056},{18,31,2211},{15,31,113},{7,28,900},{25,26,4056},{7,28,900},{18,31,2435},{18,31,2435},{18,31,2435},{17,31,1590},{16,31,1923},{14,31,161},{14,31,161},
+{13,25,62},{6,31,1729},{7,26,137},{21,31,164},{21,31,164},{21,31,164},{20,29,4},{29,13,1152},{15,31,113},{15,31,113},{10,26,2},{28,20,1152},{10,26,2},{30,26,882},{25,31,482},{23,31,0},{18,31,1},{30,26,882},{31,27,882},{18,31,1},{0,28,900},{31,27,882},{0,28,900},{17,0,1586},{17,0,1586},{17,0,1586},{17,0,1586},{13,31,13},{13,31,13},{13,31,13},{13,24,8},{3,30,8},
+{3,30,8},{21,31,6091},{20,31,4022},{19,31,2609},{18,31,1640},{19,31,6490},{16,31,2617},{15,31,318},{14,28,835},{11,31,6135},{6,29,598},{24,31,1881},{23,31,931},{22,31,245},{21,31,5},{31,19,3318},{20,31,1733},{17,31,202},{8,29,545},{27,26,3318},{8,29,545},{19,31,2609},{19,31,2609},{19,31,2609},{18,31,1640},{17,31,2086},{15,31,318},{15,31,318},{14,27,58},{8,31,1868},{8,27,157},{22,31,245},
+{22,31,245},{22,31,245},{21,31,5},{29,16,1152},{17,31,202},{17,31,202},{12,27,1},{31,20,1152},{12,27,1},{31,26,545},{26,31,305},{25,31,4},{20,31,1},{31,26,545},{29,29,545},{20,31,1},{0,29,545},{29,29,545},{0,29,545},{18,0,1576},{18,0,1576},{18,0,1576},{18,0,1576},{15,31,29},{15,31,29},{15,31,29},{14,26,13},{5,31,9},{5,31,9},{22,31,5719},{21,31,3980},{20,31,2834},
+{19,31,1745},{20,31,6050},{18,31,2457},{16,31,536},{15,29,515},{14,31,5674},{7,30,406},{25,31,1573},{24,31,861},{23,31,338},{22,31,10},{30,22,2753},{21,31,1438},{19,31,290},{11,29,338},{27,27,2753},{11,29,338},{20,31,2834},{20,31,2834},{20,31,2834},{19,31,1745},{18,31,2284},{16,31,536},{16,31,536},{15,28,74},{10,31,2064},{9,28,137},{23,31,338},{23,31,338},{23,31,338},{22,31,10},{30,17,1152},
+{19,31,290},{19,31,290},{12,28,2},{25,25,1152},{12,28,2},{31,27,313},{28,31,181},{26,31,1},{23,31,0},{31,27,313},{30,29,313},{23,31,0},{0,29,337},{30,29,313},{0,29,337},{19,0,1576},{19,0,1576},{19,0,1576},{19,0,1576},{16,31,52},{16,31,52},{16,31,52},{15,27,13},{6,31,25},{6,31,25},{22,31,5399},{22,31,3974},{21,31,3035},{20,31,1875},{21,31,5619},{19,31,2378},{18,31,776},
+{16,30,318},{15,31,5258},{9,30,225},{26,31,1333},{25,31,813},{24,31,425},{23,31,65},{30,24,2273},{23,31,1218},{20,31,353},{12,30,146},{29,27,2273},{12,30,146},{21,31,3035},{21,31,3035},{21,31,3035},{20,31,1875},{19,31,2518},{18,31,776},{18,31,776},{16,28,62},{11,31,2323},{10,29,137},{24,31,425},{24,31,425},{24,31,425},{23,31,65},{31,18,1152},{20,31,353},{20,31,353},{13,29,2},{26,26,1152},
+{13,29,2},{31,28,145},{28,31,85},{28,31,4},{26,31,1},{31,28,145},{30,30,145},{26,31,1},{0,30,145},{30,30,145},{0,30,145},{20,0,1586},{20,0,1586},{20,0,1586},{20,0,1586},{17,31,85},{17,31,85},{17,31,85},{16,27,8},{8,31,40},{8,31,40},{23,31,5143},{23,31,4004},{22,31,3254},{21,31,2070},{22,31,5274},{20,31,2310},{19,31,1062},{17,30,133},{17,31,5011},{10,31,161},{27,31,1161},
+{26,31,801},{26,31,545},{25,31,164},{31,24,1878},{24,31,1094},{22,31,461},{14,30,66},{24,31,1878},{14,30,66},{22,31,3254},{22,31,3254},{22,31,3254},{21,31,2070},{20,31,2833},{19,31,1062},{19,31,1062},{17,29,62},{14,31,2577},{11,30,137},{26,31,545},{26,31,545},{26,31,545},{25,31,164},{30,22,1152},{22,31,461},{22,31,461},{14,30,2},{27,27,1152},{14,30,2},{30,31,41},{30,31,25},{29,31,1},
+{28,31,0},{30,31,41},{31,30,41},{28,31,0},{0,30,65},{31,30,41},{0,30,65},{21,0,1586},{21,0,1586},{21,0,1586},{21,0,1586},{18,31,136},{18,31,136},{18,31,136},{17,28,8},{10,31,80},{10,31,80},{24,31,4882},{24,31,4070},{23,31,3532},{22,31,2360},{24,31,4945},{21,31,2422},{20,31,1433},{18,31,58},{20,31,4717},{12,31,157},{28,31,1026},{27,31,835},{27,31,666},{26,31,305},{31,26,1536},
+{26,31,996},{24,31,628},{16,31,1},{29,29,1536},{16,31,1},{23,31,3532},{23,31,3532},{23,31,3532},{22,31,2360},{22,31,3110},{20,31,1433},{20,31,1433},{18,31,58},{16,31,2939},{12,31,157},{27,31,666},{27,31,666},{27,31,666},{26,31,305},{30,25,1152},{24,31,628},{24,31,628},{16,31,1},{30,27,1152},{16,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},
+{0,31,0},{31,31,0},{0,31,0},{22,0,1576},{22,0,1576},{22,0,1576},{22,0,1576},{19,31,221},{19,31,221},{19,31,221},{18,30,13},{12,31,157},{12,31,157},{25,31,4212},{24,31,3590},{24,31,3106},{23,31,2201},{24,31,4129},{22,31,2101},{21,31,1301},{19,31,13},{20,31,3869},{14,31,233},{28,31,706},{28,31,562},{28,31,481},{27,31,218},{31,27,1067},{27,31,699},{25,31,442},{18,31,1},{30,29,1067},
+{18,31,1},{24,31,3106},{24,31,3106},{24,31,3106},{23,31,2201},{23,31,2668},{21,31,1301},{21,31,1301},{19,31,13},{18,31,2523},{14,31,233},{28,31,481},{28,31,481},{28,31,481},{27,31,218},{31,25,802},{25,31,442},{25,31,442},{18,31,1},{31,27,802},{18,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{23,0,1576},
+{23,0,1576},{23,0,1576},{23,0,1576},{20,31,325},{20,31,325},{20,31,325},{19,31,13},{14,31,233},{14,31,233},{26,31,3642},{25,31,3132},{25,31,2771},{24,31,2070},{25,31,3444},{23,31,1834},{22,31,1205},{20,31,8},{21,31,3219},{16,31,346},{29,31,456},{28,31,370},{28,31,289},{28,31,145},{30,29,683},{28,31,451},{27,31,290},{21,31,1},{29,30,683},{21,31,1},{25,31,2771},{25,31,2771},{25,31,2771},
+{24,31,2070},{24,31,2273},{22,31,1205},{22,31,1205},{20,31,8},{20,31,2121},{16,31,346},{28,31,289},{28,31,289},{28,31,289},{28,31,145},{31,26,512},{27,31,290},{27,31,290},{21,31,1},{29,29,512},{21,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{24,0,1586},{24,0,1586},{24,0,1586},{24,0,1586},{22,31,421},
+{22,31,421},{22,31,421},{20,31,8},{16,31,346},{16,31,346},{26,31,3162},{26,31,2742},{26,31,2486},{25,31,1947},{26,31,2877},{24,31,1641},{23,31,1145},{22,31,52},{22,31,2673},{18,31,458},{29,31,264},{29,31,200},{29,31,164},{28,31,81},{30,30,384},{28,31,243},{28,31,162},{23,31,1},{30,30,384},{23,31,1},{26,31,2486},{26,31,2486},{26,31,2486},{25,31,1947},{24,31,1969},{23,31,1145},{23,31,1145},
+{22,31,52},{20,31,1785},{18,31,458},{29,31,164},{29,31,164},{29,31,164},{28,31,81},{31,27,290},{28,31,162},{28,31,162},{23,31,1},{30,29,290},{23,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{25,0,1586},{25,0,1586},{25,0,1586},{25,0,1586},{23,31,520},{23,31,520},{23,31,520},{22,31,52},{18,31,458},
+{18,31,458},{0,23,2665},{0,18,680},{0,13,50},{0,11,785},{0,15,5885},{0,11,4118},{0,10,1800},{0,7,4202},{0,8,6546},{0,7,4643},{0,23,2665},{0,18,680},{0,13,50},{0,11,785},{9,0,5885},{0,11,4118},{0,10,1800},{0,7,4202},{15,0,5885},{0,7,4202},{0,11,0},{0,11,0},{0,11,0},{0,7,4},{0,5,549},{0,5,289},{0,5,289},{0,3,306},{0,3,630},{0,3,387},{0,11,0},
+{0,11,0},{0,11,0},{0,7,4},{2,2,545},{0,5,289},{0,5,289},{0,3,306},{4,1,545},{0,3,306},{13,1,2665},{0,18,680},{0,13,50},{0,11,785},{13,1,2665},{23,0,2665},{0,11,785},{0,8,2689},{23,0,2665},{0,8,2689},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,25,2665},{0,20,521},{0,14,5},
+{0,12,625},{0,17,6669},{0,13,4529},{0,11,1890},{0,8,4610},{0,9,7494},{0,7,5171},{0,25,2665},{0,20,521},{0,14,5},{0,12,625},{9,2,6669},{0,13,4529},{0,11,1890},{0,8,4610},{17,0,6669},{0,8,4610},{0,13,1},{0,13,1},{0,13,1},{0,8,0},{0,7,841},{0,6,445},{0,6,445},{0,4,464},{0,3,982},{0,3,595},{0,13,1},{0,13,1},{0,13,1},{0,8,0},{4,0,841},
+{0,6,445},{0,6,445},{0,4,464},{7,0,841},{0,4,464},{14,2,2665},{0,20,521},{0,14,5},{0,12,625},{14,2,2665},{25,0,2665},{0,12,625},{0,9,2689},{25,0,2665},{0,9,2689},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,28,2665},{0,22,405},{0,15,10},{0,13,514},{0,19,7541},{0,14,4934},{0,12,2042},
+{0,9,5045},{0,10,8546},{0,8,5682},{0,28,2665},{0,22,405},{0,15,10},{0,13,514},{10,2,7538},{0,14,4934},{0,12,2042},{0,9,5045},{17,1,7538},{0,9,5045},{0,16,0},{0,16,0},{0,16,0},{0,10,4},{0,8,1201},{0,7,637},{0,7,637},{0,4,656},{0,4,1385},{0,4,800},{0,16,0},{0,16,0},{0,16,0},{0,10,4},{5,0,1201},{0,7,637},{0,7,637},{0,4,656},{8,0,1201},
+{0,4,656},{16,1,2665},{0,22,405},{1,15,5},{0,13,514},{16,1,2665},{28,0,2665},{0,13,514},{0,10,2689},{28,0,2665},{0,10,2689},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,30,2669},{0,23,313},{0,16,68},{0,15,410},{0,20,8498},{0,16,5330},{0,13,2210},{0,10,5530},{0,11,9702},{0,9,6270},{0,30,2669},
+{0,23,313},{0,16,68},{0,15,410},{11,2,8493},{0,16,5330},{0,13,2210},{0,10,5530},{17,2,8493},{0,10,5530},{0,19,1},{0,19,1},{0,19,1},{0,11,1},{0,9,1629},{0,8,832},{0,8,832},{0,5,881},{0,5,1874},{0,5,1106},{0,19,1},{0,19,1},{0,19,1},{0,11,1},{5,1,1625},{0,8,832},{0,8,832},{0,5,881},{8,1,1625},{0,5,881},{17,2,2665},{0,23,313},{2,16,8},
+{0,15,410},{17,2,2665},{29,1,2665},{0,15,410},{0,11,2689},{29,1,2665},{0,11,2689},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,31,2777},{0,26,232},{0,17,197},{0,16,305},{0,22,9674},{0,17,5849},{0,14,2450},{0,10,6106},{0,12,11199},{0,10,7006},{1,31,2741},{0,26,232},{1,17,146},{0,16,305},{11,4,9669},
+{0,17,5849},{0,14,2450},{0,10,6106},{19,2,9669},{0,10,6106},{0,22,1},{0,22,1},{0,22,1},{0,13,0},{0,11,2178},{0,10,1125},{0,10,1125},{0,6,1189},{0,6,2520},{0,5,1475},{0,22,1},{0,22,1},{0,22,1},{0,13,0},{6,1,2178},{0,10,1125},{0,10,1125},{0,6,1189},{11,0,2178},{0,6,1189},{20,0,2665},{0,26,232},{3,17,2},{0,16,305},{20,0,2665},{30,2,2665},{0,16,305},
+{0,12,2689},{30,2,2665},{0,12,2689},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{1,31,2949},{0,28,217},{1,18,261},{0,17,282},{0,25,9670},{0,19,5529},{0,16,1970},{0,12,5738},{0,13,11589},{0,11,6898},{2,31,2789},{0,28,217},{2,18,146},{0,17,282},{15,0,9669},{0,19,5529},{0,16,1970},{0,12,5738},{20,3,9669},
+{0,12,5738},{0,24,64},{0,24,64},{0,24,64},{1,14,64},{0,13,2180},{0,11,949},{0,11,949},{0,7,1018},{0,7,2691},{0,6,1433},{1,23,1},{1,23,1},{1,23,1},{1,14,0},{8,0,2178},{0,11,949},{0,11,949},{0,7,1018},{5,5,2178},{0,7,1018},{21,1,2665},{0,28,153},{4,18,5},{0,17,218},{21,1,2665},{31,3,2665},{0,17,218},{0,13,2689},{31,3,2665},{0,13,2689},{0,0,64},
+{0,0,64},{0,0,64},{0,0,64},{0,3,1},{0,3,1},{0,3,1},{0,2,4},{0,1,18},{0,1,18},{2,31,3285},{1,28,273},{2,19,453},{1,18,346},{0,27,9674},{0,20,5170},{0,17,1546},{0,13,5429},{0,15,11993},{0,12,6819},{3,31,2873},{1,28,209},{3,19,146},{1,18,282},{14,4,9669},{0,20,5170},{0,17,1546},{0,13,5429},{24,2,9669},{0,13,5429},{1,25,128},{1,25,128},{1,25,128},
+{1,15,137},{0,16,2178},{0,13,832},{0,13,832},{0,8,881},{0,8,2882},{0,7,1427},{2,23,4},{2,23,4},{2,23,4},{2,15,0},{9,1,2178},{0,13,832},{0,13,832},{0,8,881},{16,0,2178},{0,8,881},{23,0,2665},{0,29,85},{5,19,5},{0,18,149},{23,0,2665},{30,5,2665},{0,18,149},{0,14,2689},{30,5,2665},{0,14,2689},{1,0,128},{1,0,128},{1,0,128},{1,0,128},{0,5,1},
+{0,5,1},{0,5,1},{0,3,1},{0,2,72},{0,2,72},{3,31,3785},{1,30,405},{2,21,676},{1,19,469},{0,30,9669},{0,22,4878},{0,18,1190},{0,14,5138},{0,16,12390},{0,13,6789},{4,31,2966},{2,29,209},{4,20,149},{2,19,282},{18,0,9669},{0,22,4878},{0,18,1190},{0,14,5138},{30,0,9669},{0,14,5138},{2,26,320},{2,26,320},{2,26,320},{1,17,320},{0,19,2180},{0,15,680},{0,15,680},
+{0,9,740},{0,9,3149},{0,8,1441},{3,24,1},{3,24,1},{3,24,1},{3,16,1},{10,2,2178},{0,15,680},{0,15,680},{0,9,740},{17,1,2178},{0,9,740},{24,1,2665},{0,31,41},{6,20,8},{0,19,98},{24,1,2665},{31,6,2665},{0,19,98},{0,15,2689},{31,6,2665},{0,15,2689},{1,0,320},{1,0,320},{1,0,320},{1,0,320},{0,8,0},{0,8,0},{0,8,0},{0,5,1},{0,4,160},
+{0,4,160},{4,31,4514},{2,31,630},{3,22,1027},{2,20,694},{1,31,9738},{0,23,4646},{0,20,849},{0,15,4826},{0,18,12955},{0,14,6798},{6,31,3101},{3,31,218},{5,21,146},{3,20,299},{18,3,9669},{0,23,4646},{0,20,849},{0,15,4826},{28,3,9669},{0,15,4826},{2,29,545},{2,29,545},{2,29,545},{2,18,546},{0,22,2180},{0,17,505},{0,17,505},{0,11,610},{0,11,3467},{0,10,1513},{4,26,1},
+{4,26,1},{4,26,1},{4,17,0},{13,0,2178},{0,17,505},{0,17,505},{0,11,610},{20,1,2178},{0,11,610},{26,1,2665},{2,31,85},{7,21,2},{0,20,65},{26,1,2665},{31,8,2665},{0,20,65},{0,16,2689},{31,8,2665},{0,16,2689},{2,0,545},{2,0,545},{2,0,545},{2,0,545},{0,11,0},{0,11,0},{0,11,0},{0,7,4},{0,5,289},{0,5,289},{4,31,5330},{3,31,1018},{3,23,1430},
+{2,21,979},{2,31,9981},{0,26,4406},{0,21,579},{0,16,4610},{0,19,13489},{0,15,6846},{7,31,3233},{4,31,226},{6,22,146},{4,21,282},{20,2,9669},{0,26,4406},{0,21,579},{0,16,4610},{24,7,9669},{0,16,4610},{3,30,865},{3,30,865},{3,30,865},{3,19,866},{0,24,2178},{0,19,389},{0,19,389},{0,12,464},{0,12,3885},{0,11,1603},{5,27,1},{5,27,1},{5,27,1},{5,18,0},{14,1,2178},
+{0,19,389},{0,19,389},{0,12,464},{24,0,2178},{0,12,464},{28,0,2665},{3,31,153},{8,22,5},{0,22,37},{28,0,2665},{30,10,2665},{0,22,37},{0,17,2689},{30,10,2665},{0,17,2689},{3,0,865},{3,0,865},{3,0,865},{3,0,865},{0,13,1},{0,13,1},{0,13,1},{0,8,0},{0,6,445},{0,6,445},{5,31,6270},{3,31,1626},{4,24,1886},{3,22,1299},{2,31,10381},{0,28,4146},{0,22,377},
+{0,17,4373},{0,20,14006},{0,16,6915},{8,31,3434},{6,31,242},{7,23,146},{5,22,282},{21,3,9669},{0,28,4146},{0,22,377},{0,17,4373},{28,6,9669},{0,17,4373},{3,31,1226},{3,31,1226},{3,31,1226},{3,21,1205},{0,27,2180},{0,21,274},{0,21,274},{0,13,353},{0,14,4269},{0,11,1763},{6,27,4},{6,27,4},{6,27,4},{6,19,0},{16,0,2178},{0,21,274},{0,21,274},{0,13,353},{25,1,2178},
+{0,13,353},{29,1,2665},{5,31,232},{9,23,5},{0,23,10},{29,1,2665},{31,11,2665},{0,23,10},{0,18,2689},{31,11,2665},{0,18,2689},{3,0,1201},{3,0,1201},{3,0,1201},{3,0,1201},{0,16,0},{0,16,0},{0,16,0},{0,10,4},{0,7,637},{0,7,637},{6,31,7374},{4,31,2339},{4,25,2441},{3,23,1730},{3,31,10950},{0,29,3909},{0,23,243},{0,18,4154},{0,22,14614},{0,17,7029},{9,31,3638},
+{7,31,320},{8,24,149},{6,23,282},{22,4,9669},{0,29,3909},{0,23,243},{0,18,4154},{29,7,9669},{0,18,4154},{4,31,1714},{4,31,1714},{4,31,1714},{3,22,1666},{0,29,2180},{0,23,194},{0,23,194},{0,14,260},{0,15,4686},{0,13,1937},{7,28,1},{7,28,1},{7,28,1},{7,20,1},{17,1,2178},{0,23,194},{0,23,194},{0,14,260},{26,2,2178},{0,14,260},{31,0,2665},{8,31,313},{10,24,8},
+{0,24,4},{31,0,2665},{30,13,2665},{0,24,4},{0,19,2689},{30,13,2665},{0,19,2689},{3,0,1665},{3,0,1665},{3,0,1665},{3,0,1665},{0,19,1},{0,19,1},{0,19,1},{0,11,1},{0,8,832},{0,8,832},{7,31,8807},{5,31,3388},{5,26,3116},{4,24,2243},{4,31,11766},{0,31,3686},{0,25,138},{0,19,3938},{0,23,15369},{0,18,7206},{11,31,3853},{8,31,457},{9,25,146},{7,24,299},{25,2,9669},
+{0,31,3686},{0,25,138},{0,19,3938},{27,10,9669},{0,19,3938},{5,31,2427},{5,31,2427},{5,31,2427},{4,23,2182},{0,31,2210},{0,25,137},{0,25,137},{0,15,181},{0,16,5157},{0,14,2163},{8,30,1},{8,30,1},{8,30,1},{8,21,0},{17,4,2178},{0,25,137},{0,25,137},{0,15,181},{29,2,2178},{0,15,181},{31,3,2665},{9,31,405},{11,25,2},{1,25,2},{31,3,2665},{30,15,2665},{1,25,2},
+{0,20,2689},{30,15,2665},{0,20,2689},{4,0,2178},{4,0,2178},{4,0,2178},{4,0,2178},{0,22,1},{0,22,1},{0,22,1},{0,13,0},{0,10,1125},{0,10,1125},{7,31,10230},{6,31,4421},{6,27,3739},{4,26,2742},{5,31,12634},{0,31,3719},{0,26,87},{0,20,3771},{0,25,16061},{0,19,7283},{12,31,4050},{10,31,629},{10,26,146},{8,25,282},{26,3,9669},{0,31,3718},{0,26,86},{0,20,3770},{28,11,9669},
+{0,20,3770},{5,31,3050},{5,31,3050},{5,31,3050},{5,24,2690},{1,31,2325},{0,26,86},{0,26,86},{0,16,129},{0,18,5544},{0,15,2318},{9,31,1},{9,31,1},{9,31,1},{9,22,0},{18,5,2178},{0,26,85},{0,26,85},{0,16,128},{30,3,2178},{0,16,128},{31,6,2665},{11,31,521},{12,26,5},{2,26,2},{31,6,2665},{29,17,2665},{2,26,2},{0,21,2689},{29,17,2665},{0,21,2689},{5,0,2689},
+{5,0,2689},{5,0,2689},{5,0,2689},{0,24,1},{0,24,1},{0,24,1},{0,15,5},{0,11,1348},{0,11,1348},{9,31,10738},{7,31,4899},{7,28,3705},{5,27,2742},{6,31,13045},{1,31,4002},{1,27,87},{0,21,3686},{0,27,15601},{0,20,6570},{13,31,4302},{11,31,857},{11,27,146},{9,26,282},{28,2,9669},{2,31,3954},{1,27,86},{0,21,3605},{27,13,9669},{0,21,3605},{6,31,3173},{6,31,3173},{6,31,3173},
+{6,25,2690},{2,31,2427},{1,28,83},{1,28,83},{1,17,129},{0,20,5170},{0,16,1856},{10,31,4},{10,31,4},{10,31,4},{10,23,0},{20,4,2178},{0,28,32},{0,28,32},{0,17,89},{29,5,2178},{0,17,89},{31,8,2665},{13,31,680},{13,27,5},{3,27,2},{31,8,2665},{30,18,2665},{3,27,2},{0,22,2689},{30,18,2665},{0,22,2689},{6,0,2689},{6,0,2689},{6,0,2689},{6,0,2689},{1,25,1},
+{1,25,1},{1,25,1},{1,15,10},{0,13,1217},{0,13,1217},{10,31,11278},{8,31,5402},{7,29,3750},{6,28,2745},{7,31,13510},{3,31,4314},{2,28,77},{1,22,3686},{0,28,15046},{0,21,5958},{14,31,4590},{12,31,1171},{12,28,149},{10,27,282},{29,3,9669},{3,31,4265},{2,28,76},{0,22,3458},{28,14,9669},{0,22,3458},{7,31,3314},{7,31,3314},{7,31,3314},{7,26,2690},{4,31,2532},{2,28,73},{2,28,73},
+{2,18,129},{0,21,4837},{0,17,1490},{11,31,25},{11,31,25},{11,31,25},{11,24,1},{24,0,2178},{0,30,8},{0,30,8},{0,19,49},{30,6,2178},{0,19,49},{31,11,2665},{15,31,832},{14,28,8},{4,28,4},{31,11,2665},{31,19,2665},{4,28,4},{0,23,2689},{31,19,2665},{0,23,2689},{7,0,2689},{7,0,2689},{7,0,2689},{7,0,2689},{2,26,1},{2,26,1},{2,26,1},{2,16,5},{0,14,1037},
+{0,14,1037},{11,31,11942},{10,31,6090},{9,30,3739},{7,29,2751},{9,31,14053},{4,31,4863},{3,29,79},{2,24,3689},{0,30,14558},{0,23,5274},{16,31,4858},{14,31,1556},{13,29,146},{11,28,299},{29,6,9669},{6,31,4594},{3,29,75},{0,24,3265},{31,14,9669},{0,24,3265},{9,31,3505},{9,31,3505},{9,31,3505},{8,27,2693},{5,31,2645},{3,30,72},{3,30,72},{3,19,134},{0,23,4506},{0,19,1109},{12,31,64},
+{12,31,64},{12,31,64},{12,25,0},{24,3,2178},{1,31,10},{1,31,10},{0,20,16},{28,9,2178},{0,20,16},{31,14,2665},{17,31,1053},{15,29,2},{5,29,2},{31,14,2665},{29,22,2665},{5,29,2},{0,24,2689},{29,22,2665},{0,24,2689},{8,0,2689},{8,0,2689},{8,0,2689},{8,0,2689},{3,27,5},{3,27,5},{3,27,5},{3,18,8},{0,16,818},{0,16,818},{12,31,12466},{11,31,6718},{10,31,3739},
+{8,30,2742},{10,31,14554},{6,31,5363},{4,30,87},{3,24,3654},{0,31,14190},{0,24,4785},{17,31,5158},{15,31,1938},{14,30,146},{12,29,282},{30,7,9669},{8,31,4806},{4,30,86},{0,25,3130},{30,16,9669},{0,25,3130},{10,31,3658},{10,31,3658},{10,31,3658},{9,28,2690},{6,31,2795},{4,30,86},{4,30,86},{4,20,129},{0,25,4315},{0,20,809},{14,31,100},{14,31,100},{14,31,100},{13,26,0},{22,9,2178},
+{3,31,34},{3,31,34},{0,21,1},{29,10,2178},{0,21,1},{30,18,2665},{19,31,1241},{16,30,5},{6,30,2},{30,18,2665},{30,23,2665},{6,30,2},{0,25,2689},{30,23,2665},{0,25,2689},{9,0,2689},{9,0,2689},{9,0,2689},{9,0,2689},{4,28,1},{4,28,1},{4,28,1},{4,19,5},{0,18,666},{0,18,666},{14,31,13094},{12,31,7445},{11,31,3830},{9,31,2742},{12,31,14998},{8,31,5926},{5,31,87},
+{4,25,3686},{0,31,14254},{0,25,4323},{18,31,5494},{16,31,2414},{15,31,146},{13,30,282},{29,11,9669},{10,31,5138},{5,31,86},{0,26,3013},{31,17,9669},{0,26,3013},{11,31,3829},{11,31,3829},{11,31,3829},{10,29,2690},{7,31,2981},{5,31,86},{5,31,86},{5,21,129},{0,27,4059},{0,21,597},{15,31,145},{15,31,145},{15,31,145},{14,27,0},{24,8,2178},{5,31,85},{5,31,85},{1,22,1},{30,11,2178},
+{1,22,1},{31,19,2665},{20,31,1378},{17,31,5},{7,31,2},{31,19,2665},{29,25,2665},{7,31,2},{0,26,2689},{29,25,2665},{0,26,2689},{10,0,2689},{10,0,2689},{10,0,2689},{10,0,2689},{5,29,1},{5,29,1},{5,29,1},{5,19,10},{0,20,505},{0,20,505},{15,31,12507},{13,31,7370},{12,31,4001},{11,31,2705},{13,31,14148},{8,31,5491},{6,31,154},{5,26,3063},{1,31,13399},{0,26,3306},{19,31,4949},
+{17,31,2261},{16,31,202},{15,30,185},{29,13,8712},{11,31,4644},{7,31,145},{0,27,2403},{28,20,8712},{0,27,2403},{12,31,4001},{12,31,4001},{12,31,4001},{11,30,2690},{9,31,3204},{6,31,154},{6,31,154},{6,22,129},{0,28,3762},{0,23,425},{16,31,202},{16,31,202},{16,31,202},{15,28,1},{28,4,2178},{7,31,145},{7,31,145},{2,23,1},{29,13,2178},{2,23,1},{30,22,2178},{22,31,1145},{18,31,1},
+{10,31,1},{30,22,2178},{27,27,2178},{10,31,1},{0,27,2178},{27,27,2178},{0,27,2178},{11,0,2689},{11,0,2689},{11,0,2689},{11,0,2689},{6,30,1},{6,30,1},{6,30,1},{6,20,5},{0,22,389},{0,22,389},{16,31,11658},{14,31,7195},{13,31,4225},{12,31,2693},{14,31,13066},{10,31,5014},{8,31,261},{6,27,2390},{3,31,12366},{0,27,2277},{20,31,4338},{18,31,2037},{17,31,289},{16,30,89},{29,15,7578},
+{13,31,4037},{9,31,202},{0,27,1701},{30,20,7578},{0,27,1701},{13,31,4225},{13,31,4225},{13,31,4225},{12,31,2693},{10,31,3429},{8,31,261},{8,31,261},{7,23,134},{0,30,3509},{0,24,306},{17,31,289},{17,31,289},{17,31,289},{16,29,0},{31,2,2178},{9,31,202},{9,31,202},{3,24,0},{27,16,2178},{3,24,0},{31,22,1625},{23,31,850},{20,31,0},{13,31,1},{31,22,1625},{30,26,1625},{13,31,1},
+{0,27,1665},{30,26,1625},{0,27,1665},{12,0,2689},{12,0,2689},{12,0,2689},{12,0,2689},{7,31,5},{7,31,5},{7,31,5},{7,22,8},{0,24,306},{0,24,306},{16,31,11002},{15,31,7081},{14,31,4450},{13,31,2738},{15,31,12205},{11,31,4663},{9,31,411},{7,27,1845},{4,31,11643},{0,28,1578},{21,31,3802},{20,31,1845},{18,31,388},{17,31,25},{30,15,6661},{15,31,3525},{11,31,290},{0,28,1217},{30,21,6661},
+{0,28,1217},{14,31,4450},{14,31,4450},{14,31,4450},{13,31,2738},{11,31,3675},{9,31,411},{9,31,411},{8,24,129},{0,31,3354},{0,25,244},{18,31,388},{18,31,388},{18,31,388},{17,30,0},{26,13,2178},{11,31,290},{11,31,290},{4,25,1},{21,21,2178},{4,25,1},{31,23,1201},{24,31,653},{22,31,4},{15,31,1},{31,23,1201},{30,27,1201},{15,31,1},{0,28,1201},{30,27,1201},{0,28,1201},{13,0,2689},
+{13,0,2689},{13,0,2689},{13,0,2689},{8,31,17},{8,31,17},{8,31,17},{8,23,5},{0,26,218},{0,26,218},{17,31,10434},{16,31,7010},{15,31,4693},{14,31,2833},{16,31,11374},{12,31,4462},{10,31,629},{8,28,1387},{6,31,10895},{0,28,1002},{22,31,3334},{20,31,1701},{19,31,505},{18,31,0},{30,17,5829},{16,31,3145},{12,31,405},{1,28,866},{25,25,5829},{1,28,866},{15,31,4693},{15,31,4693},{15,31,4693},
+{14,31,2833},{12,31,3906},{10,31,629},{10,31,629},{9,25,129},{1,31,3525},{0,27,228},{19,31,505},{19,31,505},{19,31,505},{18,31,0},{28,12,2178},{12,31,405},{12,31,405},{5,26,1},{22,22,2178},{5,26,1},{30,26,841},{25,31,461},{23,31,1},{18,31,0},{30,26,841},{31,27,841},{18,31,0},{0,28,865},{31,27,841},{0,28,865},{14,0,2689},{14,0,2689},{14,0,2689},{14,0,2689},{9,31,50},
+{9,31,50},{9,31,50},{9,23,10},{0,28,137},{0,28,137},{18,31,9934},{17,31,6962},{16,31,4913},{15,31,2978},{17,31,10683},{13,31,4277},{11,31,915},{9,28,994},{8,31,10078},{0,29,630},{23,31,2934},{22,31,1605},{21,31,650},{19,31,25},{31,17,5082},{18,31,2769},{14,31,521},{2,29,546},{25,26,5082},{2,29,546},{16,31,4913},{16,31,4913},{16,31,4913},{15,31,2978},{14,31,4170},{11,31,915},{11,31,915},
+{10,26,129},{3,31,3789},{1,28,226},{21,31,650},{21,31,650},{21,31,650},{19,31,25},{29,13,2178},{14,31,521},{14,31,521},{6,27,1},{28,20,2178},{6,27,1},{31,26,545},{26,31,305},{25,31,4},{20,31,1},{31,26,545},{29,29,545},{20,31,1},{0,29,545},{29,29,545},{0,29,545},{15,0,2689},{15,0,2689},{15,0,2689},{15,0,2689},{11,31,74},{11,31,74},{11,31,74},{10,24,5},{0,29,85},
+{0,29,85},{19,31,9465},{18,31,6955},{17,31,5233},{16,31,3218},{18,31,10003},{14,31,4183},{13,31,1258},{10,29,645},{9,31,9445},{1,30,409},{24,31,2529},{23,31,1525},{22,31,785},{20,31,100},{31,19,4344},{20,31,2345},{16,31,698},{5,29,321},{27,26,4344},{5,29,321},{17,31,5233},{17,31,5233},{17,31,5233},{16,31,3218},{15,31,4491},{13,31,1258},{13,31,1258},{11,27,134},{5,31,4171},{2,29,213},{22,31,785},
+{22,31,785},{22,31,785},{20,31,100},{29,16,2178},{16,31,698},{16,31,698},{7,28,0},{31,20,2178},{7,28,0},{31,27,290},{28,31,162},{26,31,4},{23,31,1},{31,27,290},{30,29,290},{23,31,1},{0,29,320},{30,29,290},{0,29,320},{16,0,2689},{16,0,2689},{16,0,2689},{16,0,2689},{12,31,113},{12,31,113},{12,31,113},{11,26,8},{0,31,45},{0,31,45},{20,31,9219},{19,31,6985},{18,31,5530},
+{17,31,3473},{19,31,9496},{15,31,4186},{14,31,1630},{12,29,426},{11,31,8961},{3,30,277},{25,31,2275},{24,31,1509},{23,31,932},{22,31,208},{30,22,3779},{21,31,2086},{18,31,850},{6,30,129},{27,27,3779},{6,30,129},{18,31,5530},{18,31,5530},{18,31,5530},{17,31,3473},{16,31,4770},{14,31,1630},{14,31,1630},{12,28,129},{8,31,4442},{3,30,213},{23,31,932},{23,31,932},{23,31,932},{22,31,208},{30,17,2178},
+{18,31,850},{18,31,850},{8,29,1},{25,25,2178},{8,29,1},{30,30,128},{29,31,72},{28,31,1},{26,31,0},{30,30,128},{30,30,128},{26,31,0},{0,30,128},{30,30,128},{0,30,128},{17,0,2689},{17,0,2689},{17,0,2689},{17,0,2689},{13,31,170},{13,31,170},{13,31,170},{12,27,5},{2,31,89},{2,31,89},{21,31,8929},{20,31,7062},{19,31,5845},{18,31,3778},{20,31,9188},{17,31,4260},{15,31,2070},
+{13,30,234},{13,31,8680},{4,31,228},{26,31,2089},{25,31,1515},{24,31,1073},{23,31,353},{30,24,3299},{22,31,1913},{20,31,965},{9,30,65},{29,27,3299},{9,30,65},{19,31,5845},{19,31,5845},{19,31,5845},{18,31,3778},{17,31,5124},{15,31,2070},{15,31,2070},{13,29,129},{9,31,4725},{4,31,228},{24,31,1073},{24,31,1073},{24,31,1073},{23,31,353},{31,18,2178},{20,31,965},{20,31,965},{9,30,1},{26,26,2178},
+{9,30,1},{31,30,34},{30,31,18},{29,31,4},{28,31,1},{31,30,34},{31,30,34},{28,31,1},{0,30,64},{31,30,34},{0,30,64},{18,0,2689},{18,0,2689},{18,0,2689},{18,0,2689},{14,31,245},{14,31,245},{14,31,245},{13,27,10},{4,31,164},{4,31,164},{22,31,8707},{21,31,7170},{21,31,6209},{19,31,4133},{21,31,8853},{18,31,4387},{17,31,2548},{14,31,170},{14,31,8388},{6,31,244},{27,31,1971},
+{26,31,1557},{25,31,1250},{24,31,565},{31,24,2904},{23,31,1826},{22,31,1145},{10,31,1},{24,31,2904},{10,31,1},{21,31,6209},{21,31,6209},{21,31,6209},{19,31,4133},{19,31,5460},{17,31,2548},{17,31,2548},{14,30,129},{11,31,5085},{6,31,244},{25,31,1250},{25,31,1250},{25,31,1250},{24,31,565},{30,22,2178},{22,31,1145},{22,31,1145},{10,31,1},{27,27,2178},{10,31,1},{31,31,0},{31,31,0},{31,31,0},
+{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{19,0,2689},{19,0,2689},{19,0,2689},{19,0,2689},{15,31,338},{15,31,338},{15,31,338},{14,28,5},{6,31,244},{6,31,244},{23,31,7705},{22,31,6418},{21,31,5633},{20,31,3845},{22,31,7654},{19,31,3874},{18,31,2310},{15,31,53},{15,31,7258},{8,31,317},{27,31,1458},{27,31,1186},{26,31,932},{25,31,425},{31,25,2166},
+{25,31,1398},{23,31,850},{13,31,1},{28,29,2166},{13,31,1},{21,31,5633},{21,31,5633},{21,31,5633},{20,31,3845},{19,31,4830},{18,31,2310},{18,31,2310},{15,31,53},{13,31,4506},{8,31,317},{26,31,932},{26,31,932},{26,31,932},{25,31,425},{31,22,1625},{23,31,850},{23,31,850},{13,31,1},{30,26,1625},{13,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},
+{0,31,0},{31,31,0},{0,31,0},{20,0,2689},{20,0,2689},{20,0,2689},{20,0,2689},{16,31,449},{16,31,449},{16,31,449},{15,30,8},{8,31,317},{8,31,317},{24,31,6881},{23,31,5814},{22,31,5138},{21,31,3650},{23,31,6713},{20,31,3400},{19,31,2142},{16,31,5},{17,31,6397},{9,31,425},{28,31,1075},{27,31,866},{27,31,697},{26,31,320},{31,26,1601},{26,31,1041},{24,31,653},{15,31,1},{29,29,1601},
+{15,31,1},{22,31,5138},{22,31,5138},{22,31,5138},{21,31,3650},{21,31,4313},{19,31,2142},{19,31,2142},{16,31,5},{14,31,3981},{9,31,425},{27,31,697},{27,31,697},{27,31,697},{26,31,320},{31,23,1201},{24,31,653},{24,31,653},{15,31,1},{30,27,1201},{15,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{21,0,2689},
+{21,0,2689},{21,0,2689},{21,0,2689},{18,31,549},{18,31,549},{18,31,549},{16,31,5},{9,31,425},{9,31,425},{24,31,6097},{24,31,5285},{23,31,4693},{22,31,3473},{23,31,5833},{21,31,3067},{20,31,1988},{17,31,10},{18,31,5571},{11,31,541},{28,31,739},{28,31,595},{27,31,505},{27,31,233},{30,28,1121},{26,31,737},{25,31,461},{18,31,0},{28,30,1121},{18,31,0},{23,31,4693},{23,31,4693},{23,31,4693},
+{22,31,3473},{22,31,3845},{20,31,1988},{20,31,1988},{17,31,10},{15,31,3542},{11,31,541},{27,31,505},{27,31,505},{27,31,505},{27,31,233},{30,26,841},{25,31,461},{25,31,461},{18,31,0},{31,27,841},{18,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{22,0,2689},{22,0,2689},{22,0,2689},{22,0,2689},{19,31,666},
+{19,31,666},{19,31,666},{17,31,10},{11,31,541},{11,31,541},{25,31,5427},{24,31,4757},{24,31,4273},{23,31,3314},{24,31,5002},{22,31,2788},{21,31,1898},{18,31,65},{20,31,4714},{13,31,698},{29,31,489},{28,31,387},{28,31,306},{28,31,162},{30,29,726},{27,31,482},{26,31,305},{20,31,1},{29,30,726},{20,31,1},{24,31,4273},{24,31,4273},{24,31,4273},{23,31,3314},{22,31,3429},{21,31,1898},{21,31,1898},
+{18,31,65},{17,31,3213},{13,31,698},{28,31,306},{28,31,306},{28,31,306},{28,31,162},{31,26,545},{26,31,305},{26,31,305},{20,31,1},{29,29,545},{20,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{23,0,2689},{23,0,2689},{23,0,2689},{23,0,2689},{20,31,832},{20,31,832},{20,31,832},{18,31,65},{13,31,698},
+{13,31,698},{4,31,33740},{0,31,5184},{0,22,420},{0,21,4221},{3,31,45594},{0,29,24105},{0,21,8317},{0,18,24790},{0,21,63990},{0,16,38959},{2,31,9704},{0,30,2866},{0,21,389},{0,19,3229},{14,2,18065},{0,20,13257},{0,17,6153},{0,12,13481},{25,0,18065},{0,12,13481},{0,15,1},{0,15,1},{0,15,1},{0,9,1},{0,8,1105},{0,7,585},{0,7,585},{0,4,596},{0,4,1273},{0,4,740},{0,15,1},
+{0,15,1},{0,15,1},{0,9,1},{4,1,1105},{0,7,585},{0,7,585},{0,4,596},{8,0,1105},{0,4,596},{20,4,9248},{0,30,2866},{0,21,389},{0,19,3229},{20,4,9248},{29,5,9248},{0,19,3229},{0,14,9248},{29,5,9248},{0,14,9248},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{4,31,38380},{1,31,6614},{0,23,245},
+{0,22,3864},{4,31,50747},{0,31,24961},{0,22,8353},{0,19,25735},{0,22,65535},{0,17,41319},{2,31,10152},{0,31,2624},{0,23,229},{0,20,2980},{16,0,19334},{0,20,13769},{0,18,6243},{0,13,14116},{25,1,19334},{0,13,14116},{0,18,0},{0,18,0},{0,18,0},{0,11,1},{0,9,1513},{0,8,772},{0,8,772},{0,5,821},{0,5,1750},{0,4,1028},{0,18,0},{0,18,0},{0,18,0},{0,11,1},{5,1,1513},
+{0,8,772},{0,8,772},{0,5,821},{9,0,1513},{0,5,821},{24,0,9248},{0,31,2624},{0,23,229},{0,20,2980},{24,0,9248},{30,6,9248},{0,20,2980},{0,15,9248},{30,6,9248},{0,15,9248},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{4,31,43788},{1,31,8598},{0,24,126},{0,23,3525},{4,31,56155},{0,31,26241},{0,23,8425},
+{0,20,26793},{0,23,65535},{0,18,43819},{3,31,10706},{0,31,2624},{0,24,122},{0,21,2701},{17,0,20689},{0,22,14385},{0,19,6369},{0,13,14756},{25,2,20689},{0,13,14756},{0,21,1},{0,21,1},{0,21,1},{0,12,4},{0,10,1989},{0,9,1018},{0,9,1018},{0,6,1096},{0,5,2294},{0,5,1334},{0,21,1},{0,21,1},{0,21,1},{0,12,4},{5,2,1985},{0,9,1018},{0,9,1018},{0,6,1096},{9,1,1985},
+{0,6,1096},{25,1,9248},{0,31,2624},{0,24,122},{0,21,2701},{25,1,9248},{31,7,9248},{0,21,2701},{0,16,9250},{31,7,9248},{0,16,9250},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{5,31,49566},{1,31,11350},{0,25,41},{0,24,3109},{4,31,62331},{0,31,28289},{0,24,8585},{0,21,27848},{0,23,65535},{0,19,46459},{4,31,11395},
+{0,31,2880},{0,25,37},{0,22,2440},{17,2,22129},{0,23,15030},{0,20,6509},{0,14,15441},{27,2,22129},{0,14,15441},{0,23,1},{0,23,1},{0,23,1},{0,14,0},{0,12,2525},{0,10,1300},{0,10,1300},{0,6,1384},{0,6,2905},{0,6,1708},{0,23,1},{0,23,1},{0,23,1},{0,14,0},{7,0,2521},{0,10,1300},{0,10,1300},{0,6,1384},{10,1,2521},{0,6,1384},{26,2,9248},{1,31,2866},{0,25,37},
+{0,22,2440},{26,2,9248},{27,11,9248},{0,22,2440},{0,17,9250},{27,11,9248},{0,17,9250},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{5,31,56892},{2,31,15166},{0,26,20},{0,25,2804},{5,31,65535},{0,31,31511},{0,25,8733},{0,22,29095},{0,26,65535},{0,20,49444},{4,31,12385},{1,31,3380},{0,26,4},{0,23,2173},{17,4,23851},
+{0,23,15948},{0,21,6729},{0,15,16274},{29,2,23851},{0,15,16274},{0,26,0},{0,26,0},{0,26,0},{0,16,4},{0,13,3200},{0,11,1665},{0,11,1665},{0,7,1754},{0,7,3691},{0,6,2185},{0,26,0},{0,26,0},{0,26,0},{0,16,4},{2,10,3200},{0,11,1665},{0,11,1665},{0,7,1754},{13,0,3200},{0,7,1754},{24,8,9248},{3,31,3204},{0,26,4},{0,23,2173},{24,8,9248},{30,11,9248},{0,23,2173},
+{0,18,9248},{30,11,9248},{0,18,9248},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{6,31,63870},{2,31,19230},{0,27,45},{0,27,2520},{5,31,65535},{1,31,35016},{0,26,8925},{0,23,30250},{0,28,65535},{0,21,52374},{5,31,13379},{2,31,4026},{0,27,29},{0,24,1901},{18,4,25472},{0,26,16706},{0,22,6963},{0,16,17124},{29,3,25472},
+{0,16,17124},{0,29,1},{0,29,1},{0,29,1},{0,17,1},{0,14,3874},{0,13,2084},{0,13,2084},{0,8,2165},{0,8,4466},{0,7,2627},{0,29,1},{0,29,1},{0,29,1},{0,17,1},{8,1,3872},{0,13,2084},{0,13,2084},{0,8,2165},{6,5,3872},{0,8,2165},{28,4,9248},{5,31,3589},{1,27,4},{0,24,1901},{28,4,9248},{29,13,9248},{0,24,1901},{0,19,9248},{29,13,9248},{0,19,9248},{0,0,0},
+{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{6,31,65535},{2,31,24002},{0,28,109},{0,27,2268},{6,31,65535},{1,31,38780},{0,27,8825},{0,24,30825},{0,28,65535},{0,22,54996},{6,31,14345},{2,31,4766},{1,28,62},{0,26,1697},{18,6,26744},{0,28,17104},{0,23,6957},{0,17,17625},{31,3,26744},{0,17,17625},{0,31,5},{0,31,5},{0,31,5},
+{0,19,5},{0,16,4418},{0,14,2306},{0,14,2306},{0,9,2420},{0,8,5122},{0,8,2997},{0,31,5},{0,31,5},{0,31,5},{0,19,5},{9,1,4418},{0,14,2306},{0,14,2306},{0,9,2420},{16,0,4418},{0,9,2420},{29,5,9248},{8,31,3904},{2,28,1},{0,26,1693},{29,5,9248},{30,14,9248},{0,26,1693},{0,20,9250},{30,14,9248},{0,20,9250},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,1,1},
+{0,1,1},{0,1,1},{0,0,4},{0,0,4},{0,0,4},{7,31,65535},{3,31,29032},{0,29,330},{0,28,2105},{6,31,65535},{2,31,42151},{0,28,7781},{0,25,30108},{0,29,65535},{0,22,56388},{7,31,14819},{3,31,5416},{2,29,62},{0,27,1580},{23,0,26744},{0,29,16547},{0,24,6221},{0,18,17124},{30,5,26744},{0,18,17124},{1,31,84},{1,31,84},{1,31,84},{1,20,72},{0,19,4420},{0,16,2005},{0,16,2005},
+{0,10,2165},{0,9,5389},{0,9,2925},{1,31,20},{1,31,20},{1,31,20},{1,20,8},{10,2,4418},{0,16,2005},{0,16,2005},{0,10,2165},{17,1,4418},{0,10,2165},{30,6,9248},{8,31,4160},{3,29,1},{0,27,1480},{30,6,9248},{31,15,9248},{0,27,1480},{0,21,9250},{31,15,9248},{0,21,9250},{1,0,68},{1,0,68},{1,0,68},{1,0,68},{0,3,1},{0,3,1},{0,3,1},{0,2,0},{0,1,34},
+{0,1,34},{7,31,65535},{3,31,35719},{1,30,717},{0,30,2062},{7,31,65535},{2,31,46660},{0,29,6696},{0,26,29322},{0,31,65535},{0,23,58077},{9,31,15473},{5,31,6173},{3,30,65},{1,28,1601},{23,3,26744},{0,31,15992},{0,26,5346},{0,19,16582},{28,8,26744},{0,19,16582},{2,31,329},{2,31,329},{2,31,329},{1,21,189},{0,22,4420},{0,18,1737},{0,18,1737},{0,11,1898},{0,11,5707},{0,10,2885},{3,31,34},
+{3,31,34},{3,31,34},{2,21,10},{13,0,4418},{0,18,1737},{0,18,1737},{0,11,1898},{20,1,4418},{0,11,1898},{28,12,9248},{10,31,4570},{4,30,4},{0,28,1285},{28,12,9248},{22,22,9248},{0,28,1285},{0,22,9248},{22,22,9248},{0,22,9248},{1,0,185},{1,0,185},{1,0,185},{1,0,185},{0,6,1},{0,6,1},{0,6,1},{0,4,1},{0,3,97},{0,3,97},{7,31,65535},{4,31,40786},{1,31,1122},
+{0,30,2138},{7,31,65535},{2,31,49800},{0,30,5634},{0,27,27967},{0,31,65535},{0,24,58770},{10,31,15531},{6,31,6593},{4,31,61},{2,29,1533},{25,2,26259},{0,31,15284},{0,27,4514},{0,20,15812},{27,10,26259},{0,20,15812},{2,31,633},{2,31,633},{2,31,633},{2,22,381},{0,24,4418},{0,20,1480},{0,20,1480},{0,12,1640},{0,12,6125},{0,11,2891},{4,31,61},{4,31,61},{4,31,61},{3,22,10},{14,1,4418},
+{0,20,1480},{0,20,1480},{0,12,1640},{24,0,4418},{0,12,1640},{30,11,8978},{11,31,4744},{5,31,0},{0,29,1040},{30,11,8978},{31,18,8978},{0,29,1040},{0,23,8980},{31,18,8978},{0,23,8980},{2,0,377},{2,0,377},{2,0,377},{2,0,377},{0,9,0},{0,9,0},{0,9,0},{0,5,4},{0,4,193},{0,4,193},{8,31,65535},{4,31,40898},{1,31,1890},{0,31,2125},{7,31,65535},{3,31,47871},{0,30,4194},
+{0,27,24703},{0,31,65535},{0,24,56130},{11,31,14325},{8,31,6051},{5,31,100},{3,29,1218},{22,9,24371},{0,31,13716},{0,28,3402},{0,21,13989},{29,10,24371},{0,21,13989},{3,31,1058},{3,31,1058},{3,31,1058},{2,24,617},{0,27,4420},{0,22,1280},{0,22,1280},{0,13,1445},{0,14,6509},{0,12,2945},{5,31,100},{5,31,100},{5,31,100},{4,23,5},{16,0,4418},{0,22,1280},{0,22,1280},{0,13,1445},{25,1,4418},
+{0,13,1445},{29,14,7938},{13,31,4225},{7,31,4},{0,29,656},{29,14,7938},{29,20,7938},{0,29,656},{0,23,7956},{29,20,7938},{0,23,7956},{2,0,617},{2,0,617},{2,0,617},{2,0,617},{0,11,4},{0,11,4},{0,11,4},{0,7,0},{0,5,325},{0,5,325},{8,31,65535},{4,31,41266},{1,31,2914},{1,31,2170},{7,31,65535},{3,31,46175},{0,30,3010},{0,27,21695},{0,31,65535},{0,25,53636},{12,31,13140},
+{8,31,5571},{6,31,157},{4,29,932},{24,7,22568},{0,31,12404},{0,28,2474},{0,21,12245},{29,11,22568},{0,21,12245},{4,31,1630},{4,31,1630},{4,31,1630},{3,25,937},{0,29,4420},{0,23,1090},{0,23,1090},{0,14,1268},{0,15,6926},{0,13,3029},{6,31,157},{6,31,157},{6,31,157},{5,24,8},{17,1,4418},{0,23,1090},{0,23,1090},{0,14,1268},{26,2,4418},{0,14,1268},{31,12,6962},{14,31,3709},{8,31,1},
+{0,30,353},{31,12,6962},{27,22,6962},{0,30,353},{0,24,6970},{27,22,6962},{0,24,6970},{3,0,937},{3,0,937},{3,0,937},{3,0,937},{0,14,0},{0,14,0},{0,14,0},{0,8,4},{0,6,493},{0,6,493},{9,31,65535},{5,31,41956},{2,31,4257},{1,31,2512},{7,31,65535},{3,31,44573},{0,30,1984},{0,28,18569},{0,31,65535},{0,25,51026},{13,31,11930},{10,31,5125},{7,31,250},{5,29,701},{27,4,20642},
+{1,31,11209},{0,29,1634},{0,22,10422},{31,11,20642},{0,22,10422},{4,31,2350},{4,31,2350},{4,31,2350},{3,27,1361},{0,31,4450},{0,25,949},{0,25,949},{0,16,1096},{0,16,7397},{0,14,3171},{7,31,250},{7,31,250},{7,31,250},{6,25,10},{17,4,4418},{0,25,949},{0,25,949},{0,16,1096},{29,2,4418},{0,16,1096},{30,15,5941},{15,31,3176},{10,31,0},{0,30,128},{30,15,5941},{30,21,5941},{0,30,128},
+{0,24,5953},{30,21,5941},{0,24,5953},{3,0,1360},{3,0,1360},{3,0,1360},{3,0,1360},{0,17,0},{0,17,0},{0,17,0},{0,10,1},{0,8,697},{0,8,697},{9,31,65535},{5,31,42660},{2,31,5617},{1,31,3088},{8,31,65535},{3,31,43421},{0,31,1250},{0,28,15865},{0,31,65535},{0,25,48978},{13,31,10922},{11,31,4753},{8,31,360},{6,30,509},{28,4,19021},{2,31,10246},{0,30,1088},{0,23,8945},{29,13,19021},
+{0,23,8945},{5,31,3131},{5,31,3131},{5,31,3131},{4,28,1822},{1,31,4580},{0,28,776},{0,28,776},{0,17,925},{0,18,7893},{0,15,3333},{8,31,360},{8,31,360},{8,31,360},{7,26,10},{18,5,4418},{0,28,776},{0,28,776},{0,17,925},{30,3,4418},{0,17,925},{31,15,5101},{17,31,2777},{11,31,9},{0,31,25},{31,15,5101},{30,22,5101},{0,31,25},{0,25,5105},{30,22,5101},{0,25,5105},{4,0,1818},
+{4,0,1818},{4,0,1818},{4,0,1818},{0,20,1},{0,20,1},{0,20,1},{0,12,1},{0,9,925},{0,9,925},{9,31,65535},{5,31,43620},{2,31,7233},{1,31,3920},{8,31,65535},{3,31,42525},{0,31,738},{0,28,13417},{0,31,65535},{0,25,47186},{14,31,9978},{11,31,4449},{10,31,452},{7,30,354},{31,1,17485},{3,31,9369},{0,30,704},{0,24,7570},{31,13,17485},{0,24,7570},{6,31,4058},{6,31,4058},{6,31,4058},
+{4,29,2315},{2,31,4874},{0,29,610},{0,29,610},{0,18,772},{0,20,8427},{0,16,3497},{10,31,452},{10,31,452},{10,31,452},{8,27,5},{20,4,4418},{0,29,610},{0,29,610},{0,18,772},{29,5,4418},{0,18,772},{31,16,4325},{18,31,2357},{13,31,0},{1,31,0},{31,16,4325},{31,22,4325},{1,31,0},{0,25,4337},{31,22,4325},{0,25,4337},{4,0,2314},{4,0,2314},{4,0,2314},{4,0,2314},{0,22,1},
+{0,22,1},{0,22,1},{0,13,4},{0,10,1189},{0,10,1189},{9,31,65535},{5,31,44836},{2,31,9105},{2,31,4905},{8,31,65535},{3,31,41885},{0,31,482},{0,28,11225},{0,31,65535},{0,26,45590},{15,31,9102},{13,31,4161},{11,31,557},{9,30,212},{29,6,16034},{5,31,8602},{0,31,482},{0,24,6242},{31,14,16034},{0,24,6242},{6,31,5066},{6,31,5066},{6,31,5066},{5,30,2907},{2,31,5322},{0,31,482},{0,31,482},
+{0,19,637},{0,20,8939},{0,17,3725},{11,31,557},{11,31,557},{11,31,557},{9,28,8},{24,0,4418},{0,31,482},{0,31,482},{0,19,637},{30,6,4418},{0,19,637},{30,19,3613},{20,31,1940},{15,31,4},{4,31,1},{30,19,3613},{31,23,3613},{4,31,1},{0,26,3617},{31,23,3613},{0,26,3617},{5,0,2906},{5,0,2906},{5,0,2906},{5,0,2906},{0,25,1},{0,25,1},{0,25,1},{0,15,0},{0,11,1489},
+{0,11,1489},{9,31,65535},{5,31,46510},{3,31,11362},{2,31,6237},{9,31,65535},{3,31,41471},{0,31,500},{0,29,8976},{0,31,65535},{0,26,43934},{16,31,8139},{14,31,3853},{12,31,680},{10,30,109},{26,13,14504},{8,31,7667},{0,31,500},{0,25,4979},{21,21,14504},{0,25,4979},{7,31,6337},{7,31,6337},{7,31,6337},{5,31,3642},{3,31,5962},{0,31,500},{0,31,500},{0,20,520},{0,22,9629},{0,18,4035},{12,31,680},
+{12,31,680},{12,31,680},{10,29,10},{24,3,4418},{0,31,500},{0,31,500},{0,20,520},{28,9,4418},{0,20,520},{31,19,2888},{20,31,1517},{16,31,1},{7,31,1},{31,19,2888},{27,26,2888},{7,31,1},{0,26,2906},{27,26,2888},{0,26,2906},{5,0,3617},{5,0,3617},{5,0,3617},{5,0,3617},{0,28,1},{0,28,1},{0,28,1},{0,17,4},{0,11,1930},{0,11,1930},{10,31,65535},{6,31,48082},{3,31,13570},
+{2,31,7693},{9,31,65535},{3,31,41375},{0,31,788},{0,29,7120},{0,31,65535},{0,26,42734},{17,31,7409},{15,31,3625},{13,31,821},{11,31,45},{28,11,13235},{8,31,6899},{2,31,628},{0,25,3987},{31,16,13235},{0,25,3987},{7,31,7681},{7,31,7681},{7,31,7681},{6,31,4437},{4,31,6659},{1,31,738},{1,31,738},{0,21,421},{0,23,10286},{0,20,4305},{13,31,821},{13,31,821},{13,31,821},{11,30,10},{22,9,4418},
+{2,31,628},{2,31,628},{0,21,421},{29,10,4418},{0,21,421},{31,20,2312},{21,31,1217},{18,31,1},{9,31,0},{31,20,2312},{30,25,2312},{9,31,0},{0,27,2314},{30,25,2312},{0,27,2314},{6,0,4337},{6,0,4337},{6,0,4337},{6,0,4337},{0,30,1},{0,30,1},{0,30,1},{0,18,1},{0,13,2329},{0,13,2329},{10,31,65535},{6,31,49890},{3,31,16034},{2,31,9405},{9,31,65535},{4,31,41526},{0,31,1332},
+{0,29,5520},{0,31,65535},{0,26,41790},{18,31,6747},{16,31,3459},{14,31,980},{12,31,5},{31,8,12051},{10,31,6275},{4,31,801},{0,26,3066},{28,19,12051},{0,26,3066},{8,31,9062},{8,31,9062},{8,31,9062},{7,31,5410},{4,31,7555},{1,31,1154},{1,31,1154},{0,23,325},{0,23,11118},{0,20,4625},{14,31,980},{14,31,980},{14,31,980},{12,31,5},{24,8,4418},{4,31,801},{4,31,801},{0,23,325},{30,11,4418},
+{0,23,325},{30,23,1800},{23,31,949},{19,31,4},{12,31,1},{30,23,1800},{28,27,1800},{12,31,1},{0,27,1818},{28,27,1800},{0,27,1818},{6,0,5105},{6,0,5105},{6,0,5105},{6,0,5105},{0,31,36},{0,31,36},{0,31,36},{0,20,4},{0,15,2741},{0,15,2741},{10,31,65535},{6,31,51954},{3,31,18754},{3,31,11330},{9,31,65535},{4,31,41798},{1,31,2082},{0,29,4176},{0,31,65535},{0,27,41092},{19,31,6153},
+{17,31,3297},{16,31,1154},{13,31,20},{29,13,10952},{11,31,5708},{6,31,965},{0,27,2291},{28,20,10952},{0,27,2291},{9,31,10545},{9,31,10545},{9,31,10545},{7,31,6482},{5,31,8549},{2,31,1716},{2,31,1716},{0,24,221},{0,26,11876},{0,22,4989},{16,31,1154},{16,31,1154},{16,31,1154},{13,31,20},{28,4,4418},{6,31,965},{6,31,965},{0,24,221},{29,13,4418},{0,24,221},{29,26,1352},{23,31,725},{21,31,0},
+{14,31,1},{29,26,1352},{31,26,1352},{14,31,1},{0,28,1360},{31,26,1352},{0,28,1360},{7,0,5953},{7,0,5953},{7,0,5953},{7,0,5953},{1,31,145},{1,31,145},{1,31,145},{0,21,1},{0,16,3130},{0,16,3130},{10,31,65535},{6,31,54582},{4,31,21886},{3,31,13652},{9,31,65535},{4,31,42410},{1,31,3144},{0,30,2841},{0,31,65535},{0,27,40390},{19,31,5649},{18,31,3157},{17,31,1325},{15,31,74},{29,15,9818},
+{13,31,5241},{8,31,1108},{0,27,1589},{30,20,9818},{0,27,1589},{10,31,12376},{10,31,12376},{10,31,12376},{8,31,7844},{6,31,9861},{3,31,2576},{3,31,2576},{0,25,136},{0,28,12696},{0,23,5429},{17,31,1325},{17,31,1325},{17,31,1325},{15,31,74},{31,2,4418},{8,31,1108},{8,31,1108},{0,25,136},{27,16,4418},{0,25,136},{31,24,925},{25,31,505},{23,31,1},{17,31,1},{31,24,925},{31,27,925},{17,31,1},
+{0,28,937},{31,27,925},{0,28,937},{7,0,6970},{7,0,6970},{7,0,6970},{7,0,6970},{1,31,388},{1,31,388},{1,31,388},{0,23,0},{0,17,3665},{0,17,3665},{10,31,65535},{7,31,57052},{4,31,24910},{3,31,15988},{9,31,65535},{4,31,43226},{1,31,4360},{0,30,1833},{0,31,65535},{0,27,40038},{21,31,5202},{19,31,3073},{18,31,1508},{16,31,180},{30,15,8901},{14,31,4814},{10,31,1300},{0,28,1021},{30,21,8901},
+{0,28,1021},{10,31,14136},{10,31,14136},{10,31,14136},{8,31,9252},{7,31,11195},{3,31,3536},{3,31,3536},{0,26,85},{0,29,13491},{0,23,5925},{18,31,1508},{18,31,1508},{18,31,1508},{16,31,180},{26,13,4418},{10,31,1300},{10,31,1300},{0,26,85},{21,21,4418},{0,26,85},{30,27,613},{26,31,337},{24,31,1},{20,31,1},{30,27,613},{30,28,613},{20,31,1},{0,29,617},{30,28,613},{0,29,617},{8,0,7956},
+{8,0,7956},{8,0,7956},{8,0,7956},{2,31,697},{2,31,697},{2,31,697},{0,25,4},{0,18,4181},{0,18,4181},{11,31,65535},{7,31,59708},{4,31,28190},{3,31,18580},{10,31,65535},{5,31,44295},{1,31,5832},{0,30,1081},{0,31,65535},{0,27,39942},{22,31,4818},{20,31,3017},{19,31,1709},{17,31,325},{30,17,8069},{15,31,4473},{11,31,1514},{0,29,602},{25,25,8069},{0,29,602},{11,31,15965},{11,31,15965},{11,31,15965},
+{9,31,10757},{7,31,12667},{4,31,4662},{4,31,4662},{0,27,52},{0,30,14340},{0,25,6449},{19,31,1709},{19,31,1709},{19,31,1709},{17,31,325},{28,12,4418},{11,31,1514},{11,31,1514},{0,27,52},{22,22,4418},{0,27,52},{31,27,365},{27,31,205},{26,31,1},{22,31,1},{31,27,365},{30,29,365},{22,31,1},{0,29,377},{30,29,365},{0,29,377},{8,0,8980},{8,0,8980},{8,0,8980},{8,0,8980},{2,31,1097},
+{2,31,1097},{2,31,1097},{0,26,1},{0,20,4682},{0,20,4682},{11,31,65535},{8,31,58981},{5,31,29551},{4,31,19751},{10,31,65535},{5,31,43215},{2,31,6910},{1,30,686},{0,31,65535},{0,28,34909},{23,31,4502},{21,31,3011},{20,31,1973},{18,31,520},{31,17,7322},{17,31,4242},{13,31,1769},{0,29,314},{25,26,7322},{0,29,314},{12,31,16739},{12,31,16739},{12,31,16739},{10,31,11492},{8,31,13636},{5,31,5510},{5,31,5510},
+{1,28,54},{0,31,14139},{0,26,6041},{20,31,1973},{20,31,1973},{20,31,1973},{18,31,520},{29,13,4418},{13,31,1769},{13,31,1769},{0,28,29},{28,20,4418},{0,28,29},{31,28,181},{28,31,97},{27,31,4},{25,31,0},{31,28,181},{31,29,181},{25,31,0},{0,30,185},{31,29,181},{0,30,185},{9,0,9248},{9,0,9248},{9,0,9248},{9,0,9248},{3,31,1348},{3,31,1348},{3,31,1348},{1,27,5},{0,21,4520},
+{0,21,4520},{12,31,65535},{9,31,57270},{6,31,30345},{5,31,20521},{11,31,65535},{6,31,41449},{3,31,8015},{2,31,301},{0,31,65535},{0,28,28330},{24,31,4181},{22,31,3053},{21,31,2248},{20,31,772},{31,19,6584},{20,31,3941},{15,31,2041},{0,30,77},{27,26,6584},{0,30,77},{13,31,17289},{13,31,17289},{13,31,17289},{11,31,12050},{10,31,14315},{7,31,6389},{7,31,6389},{2,29,53},{0,31,13860},{0,28,5286},{21,31,2248},
+{21,31,2248},{21,31,2248},{20,31,772},{29,16,4418},{15,31,2041},{15,31,2041},{0,30,13},{31,20,4418},{0,30,13},{30,31,50},{30,31,34},{29,31,0},{28,31,1},{30,31,50},{31,30,50},{28,31,1},{0,30,68},{31,30,50},{0,30,68},{10,0,9250},{10,0,9250},{10,0,9250},{10,0,9250},{4,31,1549},{4,31,1549},{4,31,1549},{2,28,2},{0,23,4114},{0,23,4114},{13,31,65535},{9,31,55894},{7,31,31068},
+{6,31,21256},{12,31,65535},{8,31,39740},{4,31,9073},{3,31,90},{0,31,65535},{0,29,23356},{24,31,3973},{23,31,3125},{23,31,2500},{21,31,1037},{30,22,6019},{20,31,3701},{17,31,2340},{0,31,4},{27,27,6019},{0,31,4},{14,31,17796},{14,31,17796},{14,31,17796},{12,31,12625},{11,31,14957},{8,31,7139},{8,31,7139},{3,30,53},{0,31,14020},{0,29,4652},{23,31,2500},{23,31,2500},{23,31,2500},{21,31,1037},{30,17,4418},
+{17,31,2340},{17,31,2340},{0,31,4},{25,25,4418},{0,31,4},{31,31,4},{31,31,4},{31,31,4},{30,31,1},{31,31,4},{31,31,4},{30,31,1},{0,31,4},{31,31,4},{0,31,4},{11,0,9250},{11,0,9250},{11,0,9250},{11,0,9250},{6,31,1765},{6,31,1765},{6,31,1765},{3,29,2},{0,25,3877},{0,25,3877},{13,31,65535},{10,31,53236},{8,31,30487},{7,31,21105},{13,31,65535},{8,31,37332},{5,31,9177},
+{4,31,36},{1,31,65535},{0,29,18680},{25,31,3443},{24,31,2741},{23,31,2248},{22,31,980},{29,25,5163},{21,31,3218},{20,31,2117},{3,31,1},{30,26,5163},{3,31,1},{15,31,17289},{15,31,17289},{15,31,17289},{13,31,12512},{12,31,14328},{9,31,7149},{9,31,7149},{4,31,20},{0,31,13376},{0,29,3944},{23,31,2248},{23,31,2248},{23,31,2248},{22,31,980},{31,17,3872},{20,31,2117},{20,31,2117},{3,31,1},{25,26,3872},
+{3,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{12,0,9248},{12,0,9248},{12,0,9248},{12,0,9248},{7,31,1972},{7,31,1972},{7,31,1972},{4,30,5},{0,27,3545},{0,27,3545},{14,31,65535},{11,31,50266},{9,31,29322},{8,31,20567},{13,31,65535},{9,31,35025},{6,31,8985},{5,31,21},{2,31,65535},{0,29,14712},{26,31,2873},
+{25,31,2283},{24,31,1825},{22,31,820},{30,24,4267},{22,31,2657},{20,31,1685},{5,31,1},{29,27,4267},{5,31,1},{16,31,16427},{16,31,16427},{16,31,16427},{14,31,12185},{13,31,13442},{10,31,6915},{10,31,6915},{5,31,5},{1,31,12539},{0,30,3314},{24,31,1825},{24,31,1825},{24,31,1825},{22,31,820},{28,23,3200},{20,31,1685},{20,31,1685},{5,31,1},{28,25,3200},{5,31,1},{31,31,0},{31,31,0},{31,31,0},
+{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{13,0,9248},{13,0,9248},{13,0,9248},{13,0,9248},{8,31,2250},{8,31,2250},{8,31,2250},{5,31,5},{0,28,3170},{0,28,3170},{15,31,65535},{12,31,47239},{10,31,28065},{9,31,20104},{14,31,65535},{10,31,32574},{7,31,8839},{6,31,54},{3,31,64890},{0,30,10964},{26,31,2252},{25,31,1806},{25,31,1445},{23,31,650},{30,25,3361},
+{23,31,2091},{21,31,1322},{8,31,0},{30,27,3361},{8,31,0},{17,31,15584},{17,31,15584},{17,31,15584},{15,31,11846},{14,31,12522},{11,31,6697},{11,31,6697},{6,31,50},{3,31,11669},{0,31,2834},{25,31,1445},{25,31,1445},{25,31,1445},{23,31,650},{30,21,2521},{21,31,1322},{21,31,1322},{8,31,0},{31,24,2521},{8,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},
+{0,31,0},{31,31,0},{0,31,0},{14,0,9250},{14,0,9250},{14,0,9250},{14,0,9250},{9,31,2525},{9,31,2525},{9,31,2525},{6,31,50},{0,31,2834},{0,31,2834},{16,31,65535},{13,31,44559},{11,31,27000},{10,31,19705},{15,31,64179},{11,31,30525},{8,31,8677},{7,31,149},{3,31,60570},{0,30,8308},{27,31,1782},{26,31,1416},{25,31,1157},{24,31,520},{30,26,2646},{23,31,1691},{22,31,1040},{11,31,1},{31,27,2646},
+{11,31,1},{18,31,14889},{18,31,14889},{18,31,14889},{16,31,11585},{15,31,11778},{12,31,6555},{12,31,6555},{7,31,145},{3,31,11061},{0,31,2610},{25,31,1157},{25,31,1157},{25,31,1157},{24,31,520},{31,21,1985},{22,31,1040},{22,31,1040},{11,31,1},{31,25,1985},{11,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{15,0,9250},
+{15,0,9250},{15,0,9250},{15,0,9250},{10,31,2792},{10,31,2792},{10,31,2792},{7,31,145},{0,31,2610},{0,31,2610},{16,31,63318},{14,31,42019},{12,31,25930},{11,31,19324},{16,31,59178},{11,31,28845},{9,31,8605},{8,31,276},{6,31,56253},{0,30,6420},{27,31,1366},{27,31,1094},{26,31,872},{25,31,397},{30,27,2017},{25,31,1298},{23,31,794},{13,31,1},{30,28,2017},{13,31,1},{19,31,14244},{19,31,14244},{19,31,14244},
+{17,31,11312},{16,31,11037},{13,31,6429},{13,31,6429},{8,31,260},{6,31,10457},{0,31,2642},{26,31,872},{26,31,872},{26,31,872},{25,31,397},{31,22,1513},{23,31,794},{23,31,794},{13,31,1},{29,27,1513},{13,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{16,0,9248},{16,0,9248},{16,0,9248},{16,0,9248},{12,31,3074},
+{12,31,3074},{12,31,3074},{8,31,260},{0,31,2642},{0,31,2642},{17,31,58848},{15,31,39619},{13,31,24975},{12,31,19007},{16,31,54474},{13,31,27057},{10,31,8569},{9,31,461},{8,31,51302},{0,31,5046},{28,31,979},{27,31,806},{27,31,637},{26,31,292},{31,26,1473},{26,31,953},{24,31,605},{16,31,0},{29,29,1473},{16,31,0},{19,31,13604},{19,31,13604},{19,31,13604},{18,31,11057},{16,31,10429},{14,31,6339},{14,31,6339},
+{10,31,424},{8,31,9713},{1,31,2900},{27,31,637},{27,31,637},{27,31,637},{26,31,292},{30,25,1105},{24,31,605},{24,31,605},{16,31,0},{30,27,1105},{16,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{17,0,9248},{17,0,9248},{17,0,9248},{17,0,9248},{12,31,3330},{12,31,3330},{12,31,3330},{10,31,424},{1,31,2900},
+{1,31,2900},
diff --git a/libkram/transcoder/basisu_transcoder_tables_atc_56.inc b/libkram/transcoder/basisu_transcoder_tables_atc_56.inc
new file mode 100644
index 00000000..2b56c094
--- /dev/null
+++ b/libkram/transcoder/basisu_transcoder_tables_atc_56.inc
@@ -0,0 +1,481 @@
+{0,3,20},{0,3,5},{0,2,1},{0,2,9},{0,2,35},{0,2,27},{0,1,17},{0,1,24},{0,1,41},{0,1,25},{0,3,20},{0,3,5},{0,2,1},{0,2,9},{0,2,35},{0,2,27},{0,1,17},{0,1,24},{1,0,35},{0,1,24},{0,1,1},{0,1,1},{0,1,1},{0,1,0},{0,1,2},{0,1,1},{0,1,1},{0,0,4},{0,0,4},{0,0,4},{0,1,1},
+{0,1,1},{0,1,1},{0,1,0},{0,1,2},{0,1,1},{0,1,1},{0,0,4},{0,0,4},{0,0,4},{1,0,18},{0,3,5},{0,2,1},{0,2,9},{1,0,18},{1,1,18},{0,2,9},{0,1,20},{1,1,18},{0,1,20},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,9,54},{0,7,37},{0,4,52},
+{0,4,36},{0,7,52},{0,5,21},{0,4,0},{0,3,21},{0,4,88},{0,3,37},{1,5,24},{1,5,9},{1,4,5},{1,4,13},{2,1,51},{0,5,21},{0,4,0},{0,3,21},{3,1,51},{0,3,21},{0,7,36},{0,7,36},{0,7,36},{0,4,36},{0,5,10},{0,4,0},{0,4,0},{0,2,5},{0,3,26},{0,2,14},{1,3,5},{1,3,5},{1,3,5},{1,3,4},{1,2,8},
+{0,4,0},{0,4,0},{0,2,5},{2,1,8},{0,2,5},{2,2,18},{0,7,1},{1,4,1},{0,4,0},{2,2,18},{2,3,18},{0,4,0},{0,3,20},{2,3,18},{0,3,20},{0,0,36},{0,0,36},{0,0,36},{0,0,36},{0,4,0},{0,4,0},{0,4,0},{0,2,1},{0,2,10},{0,2,10},{1,11,54},{1,9,37},{1,6,52},{1,6,36},{1,9,52},{1,7,21},{1,6,0},
+{1,5,21},{0,7,63},{0,5,25},{2,7,24},{2,7,9},{2,6,5},{2,6,13},{3,3,51},{1,7,21},{1,6,0},{1,5,21},{4,3,51},{1,5,21},{1,9,36},{1,9,36},{1,9,36},{1,6,36},{1,7,10},{1,6,0},{1,6,0},{1,4,5},{0,6,11},{0,5,9},{2,5,5},{2,5,5},{2,5,5},{2,5,4},{3,1,8},{1,6,0},{1,6,0},{1,4,5},{3,3,8},
+{1,4,5},{3,4,18},{1,9,1},{2,6,1},{1,6,0},{3,4,18},{7,0,18},{1,6,0},{0,5,20},{7,0,18},{0,5,20},{1,0,36},{1,0,36},{1,0,36},{1,0,36},{1,6,0},{1,6,0},{1,6,0},{1,4,1},{0,6,2},{0,6,2},{2,13,54},{2,11,37},{2,8,52},{2,8,36},{2,11,52},{2,9,21},{2,8,0},{2,7,21},{0,11,51},{1,7,25},{3,9,24},
+{3,9,9},{3,8,5},{3,8,13},{5,1,51},{2,9,21},{2,8,0},{2,7,21},{9,0,51},{2,7,21},{2,11,36},{2,11,36},{2,11,36},{2,8,36},{2,9,10},{2,8,0},{2,8,0},{2,6,5},{1,8,11},{1,7,9},{3,7,5},{3,7,5},{3,7,5},{3,7,4},{3,6,8},{2,8,0},{2,8,0},{2,6,5},{8,0,8},{2,6,5},{4,6,18},{2,11,1},{3,8,1},
+{2,8,0},{4,6,18},{8,2,18},{2,8,0},{0,7,20},{8,2,18},{0,7,20},{2,0,36},{2,0,36},{2,0,36},{2,0,36},{2,8,0},{2,8,0},{2,8,0},{2,6,1},{1,8,2},{1,8,2},{3,15,70},{3,13,51},{4,10,69},{3,10,52},{3,14,52},{3,12,25},{3,10,4},{3,9,27},{1,13,53},{2,10,26},{4,12,22},{4,11,12},{4,10,5},{4,10,9},{6,4,51},
+{2,13,22},{3,10,3},{3,9,26},{11,1,51},{3,9,26},{3,13,51},{3,13,51},{3,13,51},{3,10,51},{3,12,9},{3,11,2},{3,11,2},{3,9,2},{2,11,9},{3,9,10},{4,10,4},{4,10,4},{4,10,4},{4,9,5},{6,2,8},{3,11,1},{3,11,1},{3,9,1},{11,0,8},{3,9,1},{7,2,18},{3,13,1},{4,10,1},{3,10,2},{7,2,18},{11,2,18},{3,10,2},
+{0,9,26},{11,2,18},{0,9,26},{3,0,50},{3,0,50},{3,0,50},{3,0,50},{3,11,1},{3,11,1},{3,11,1},{3,9,1},{2,10,0},{2,10,0},{4,17,54},{4,15,36},{4,13,54},{4,12,38},{4,15,52},{4,14,24},{4,12,3},{4,11,26},{2,15,53},{3,12,26},{5,14,22},{5,13,12},{5,12,5},{5,12,9},{8,2,51},{3,15,22},{4,12,3},{4,11,26},{12,3,51},
+{4,11,26},{4,15,36},{4,15,36},{4,15,36},{4,12,37},{4,14,8},{4,12,2},{4,12,2},{4,11,1},{3,13,9},{3,11,14},{5,12,4},{5,12,4},{5,12,4},{5,11,5},{8,0,8},{4,12,2},{4,12,2},{4,11,1},{11,3,8},{4,11,1},{9,0,18},{4,15,0},{5,12,1},{4,12,2},{9,0,18},{15,0,18},{4,12,2},{0,11,26},{15,0,18},{0,11,26},{4,0,36},
+{4,0,36},{4,0,36},{4,0,36},{4,12,1},{4,12,1},{4,12,1},{4,11,0},{3,12,0},{3,12,0},{5,19,54},{5,17,37},{5,15,54},{5,14,38},{5,17,52},{5,15,27},{5,14,3},{5,13,26},{3,17,52},{4,14,26},{6,15,24},{6,15,12},{6,14,5},{6,14,9},{10,1,51},{4,17,21},{5,14,3},{5,13,26},{17,0,51},{5,13,26},{5,17,36},{5,17,36},{5,17,36},
+{5,14,37},{5,15,10},{5,14,2},{5,14,2},{5,13,1},{4,15,12},{4,13,11},{6,14,4},{6,14,4},{6,14,4},{6,13,5},{9,2,8},{5,14,2},{5,14,2},{5,13,1},{16,0,8},{5,13,1},{10,2,18},{5,17,1},{6,14,1},{5,14,2},{10,2,18},{16,2,18},{5,14,2},{0,13,26},{16,2,18},{0,13,26},{5,0,36},{5,0,36},{5,0,36},{5,0,36},{5,14,1},
+{5,14,1},{5,14,1},{5,13,0},{4,14,0},{4,14,0},{6,21,54},{6,19,37},{6,16,52},{6,16,36},{6,19,52},{6,17,21},{6,16,0},{6,15,26},{4,19,51},{5,15,36},{7,17,24},{7,17,9},{7,16,5},{7,16,13},{11,3,51},{6,17,21},{6,16,0},{6,15,26},{18,2,51},{6,15,26},{6,19,36},{6,19,36},{6,19,36},{6,16,36},{6,17,10},{6,16,0},{6,16,0},
+{6,15,1},{5,16,11},{5,15,11},{7,15,5},{7,15,5},{7,15,5},{7,15,5},{11,1,8},{6,16,0},{6,16,0},{6,15,1},{17,2,8},{6,15,1},{11,4,18},{6,19,1},{7,16,1},{6,16,0},{11,4,18},{17,4,18},{6,16,0},{0,15,26},{17,4,18},{0,15,26},{6,0,36},{6,0,36},{6,0,36},{6,0,36},{6,16,0},{6,16,0},{6,16,0},{6,15,0},{5,16,2},
+{5,16,2},{7,23,70},{7,21,51},{8,18,69},{7,18,52},{7,22,52},{7,20,25},{7,18,4},{7,17,27},{5,21,53},{6,18,26},{8,20,22},{8,19,12},{8,18,5},{8,18,9},{13,2,51},{6,21,22},{7,18,3},{7,17,26},{21,2,51},{7,17,26},{7,21,51},{7,21,51},{7,21,51},{7,18,51},{7,20,9},{7,19,2},{7,19,2},{7,17,2},{6,19,9},{7,17,10},{8,18,4},
+{8,18,4},{8,18,4},{8,17,5},{13,0,8},{7,19,1},{7,19,1},{7,17,1},{20,2,8},{7,17,1},{14,0,18},{7,21,1},{8,18,1},{7,18,2},{14,0,18},{20,4,18},{7,18,2},{0,17,26},{20,4,18},{0,17,26},{7,0,50},{7,0,50},{7,0,50},{7,0,50},{7,19,1},{7,19,1},{7,19,1},{7,17,1},{6,18,0},{6,18,0},{8,25,54},{8,23,36},{8,21,54},
+{8,20,38},{8,24,51},{8,22,24},{8,20,3},{8,19,26},{6,23,53},{7,20,26},{9,22,22},{9,21,12},{9,20,5},{9,20,9},{14,4,51},{7,23,22},{8,20,3},{8,19,26},{25,0,51},{8,19,26},{8,23,36},{8,23,36},{8,23,36},{8,20,37},{8,22,8},{8,20,2},{8,20,2},{8,19,1},{7,21,9},{7,19,14},{9,20,4},{9,20,4},{9,20,4},{9,19,5},{14,2,8},
+{8,20,2},{8,20,2},{8,19,1},{24,0,8},{8,19,1},{15,2,18},{8,23,0},{9,20,1},{8,20,2},{15,2,18},{25,1,18},{8,20,2},{0,19,26},{25,1,18},{0,19,26},{8,0,36},{8,0,36},{8,0,36},{8,0,36},{8,20,1},{8,20,1},{8,20,1},{8,19,0},{7,20,0},{7,20,0},{9,27,54},{9,25,36},{9,23,54},{9,22,38},{9,26,51},{9,24,24},{9,22,3},
+{9,21,26},{7,25,53},{8,22,26},{10,24,22},{10,23,12},{10,22,5},{10,22,9},{16,2,51},{8,25,19},{9,22,3},{9,21,26},{27,1,51},{9,21,26},{9,25,36},{9,25,36},{9,25,36},{9,22,37},{9,24,8},{9,22,2},{9,22,2},{9,21,1},{8,23,12},{8,21,11},{10,22,4},{10,22,4},{10,22,4},{10,21,5},{16,0,8},{9,22,2},{9,22,2},{9,21,1},{26,1,8},
+{9,21,1},{17,0,18},{9,25,0},{10,22,1},{9,22,2},{17,0,18},{26,3,18},{9,22,2},{0,21,26},{26,3,18},{0,21,26},{9,0,36},{9,0,36},{9,0,36},{9,0,36},{9,22,1},{9,22,1},{9,22,1},{9,21,0},{8,22,0},{8,22,0},{10,29,54},{10,27,36},{10,25,54},{10,24,38},{10,28,51},{10,26,24},{10,24,3},{10,23,26},{8,27,52},{9,24,26},{11,26,22},
+{11,25,12},{11,24,5},{11,24,9},{17,4,51},{9,27,19},{10,24,3},{10,23,26},{27,4,51},{10,23,26},{10,27,36},{10,27,36},{10,27,36},{10,24,37},{10,26,8},{10,24,2},{10,24,2},{10,23,1},{9,25,12},{9,23,11},{11,24,4},{11,24,4},{11,24,4},{11,23,5},{17,2,8},{10,24,2},{10,24,2},{10,23,1},{27,3,8},{10,23,1},{18,2,18},{10,27,0},{11,24,1},
+{10,24,2},{18,2,18},{31,0,18},{10,24,2},{0,23,26},{31,0,18},{0,23,26},{10,0,36},{10,0,36},{10,0,36},{10,0,36},{10,24,1},{10,24,1},{10,24,1},{10,23,0},{9,24,0},{9,24,0},{11,31,70},{11,29,52},{11,27,70},{11,27,54},{11,30,53},{11,28,20},{11,27,5},{11,26,25},{10,28,56},{10,26,22},{12,28,22},{12,27,9},{12,26,8},{12,26,9},{19,3,51},
+{11,28,19},{11,27,4},{10,26,21},{30,4,51},{10,26,21},{11,30,50},{11,30,50},{11,30,50},{11,27,50},{11,28,11},{11,27,1},{11,27,1},{11,25,2},{10,27,11},{11,25,10},{12,26,4},{12,26,4},{12,26,4},{12,25,5},{19,1,8},{11,27,0},{11,27,0},{11,25,1},{29,4,8},{11,25,1},{20,1,18},{11,29,2},{12,26,4},{11,27,4},{20,1,18},{29,6,18},{11,27,4},
+{0,26,20},{29,6,18},{0,26,20},{11,0,50},{11,0,50},{11,0,50},{11,0,50},{11,27,1},{11,27,1},{11,27,1},{11,25,2},{10,26,2},{10,26,2},{12,33,54},{12,31,37},{12,29,56},{12,29,41},{12,32,51},{12,30,22},{12,29,5},{12,28,24},{11,30,56},{11,28,22},{13,30,22},{13,29,9},{13,28,8},{13,28,9},{20,5,51},{12,30,22},{12,29,5},{11,28,21},{31,6,51},
+{11,28,21},{12,31,37},{12,31,37},{12,31,37},{12,29,37},{12,30,8},{12,29,1},{12,29,1},{12,27,0},{11,29,11},{12,27,9},{13,28,4},{13,28,4},{13,28,4},{13,27,5},{20,3,8},{12,29,1},{12,29,1},{12,27,0},{30,6,8},{12,27,0},{21,3,18},{12,31,1},{13,28,4},{13,28,5},{21,3,18},{30,8,18},{13,28,5},{0,28,20},{30,8,18},{0,28,20},{12,0,36},
+{12,0,36},{12,0,36},{12,0,36},{12,29,0},{12,29,0},{12,29,0},{12,27,0},{11,28,2},{11,28,2},{13,35,54},{13,33,36},{13,31,56},{13,31,41},{13,34,51},{13,32,24},{13,31,5},{13,30,24},{11,33,53},{12,30,21},{14,32,22},{14,31,9},{14,30,8},{14,30,9},{23,0,51},{12,33,19},{13,31,5},{12,30,21},{31,9,51},{12,30,21},{13,33,36},{13,33,36},{13,33,36},
+{13,31,37},{13,32,8},{13,31,1},{13,31,1},{13,29,0},{12,31,9},{13,29,9},{14,30,4},{14,30,4},{14,30,4},{14,29,5},{21,5,8},{13,31,1},{13,31,1},{13,29,0},{31,8,8},{13,29,0},{22,5,18},{13,33,0},{14,30,4},{14,30,5},{22,5,18},{31,10,18},{14,30,5},{0,30,20},{31,10,18},{0,30,20},{13,0,36},{13,0,36},{13,0,36},{13,0,36},{13,31,0},
+{13,31,0},{13,31,0},{13,29,0},{12,30,1},{12,30,1},{14,37,54},{14,35,36},{14,33,54},{14,32,38},{14,36,51},{14,34,24},{14,32,3},{14,32,35},{12,35,52},{13,32,26},{15,34,22},{15,33,12},{15,32,5},{15,32,9},{24,2,51},{13,35,19},{14,32,3},{13,32,26},{27,17,51},{13,32,26},{14,35,36},{14,35,36},{14,35,36},{14,32,37},{14,34,8},{14,32,2},{14,32,2},
+{14,31,0},{13,33,12},{14,31,9},{15,32,4},{15,32,4},{15,32,4},{15,31,5},{24,0,8},{14,32,2},{14,32,2},{14,31,0},{31,11,8},{14,31,0},{25,0,18},{14,35,0},{15,32,1},{14,32,2},{25,0,18},{31,13,18},{14,32,2},{0,32,26},{31,13,18},{0,32,26},{14,0,36},{14,0,36},{14,0,36},{14,0,36},{14,32,1},{14,32,1},{14,32,1},{14,31,0},{13,32,0},
+{13,32,0},{15,40,68},{15,37,52},{15,35,70},{15,35,54},{15,38,53},{15,36,20},{15,35,5},{15,34,25},{14,36,56},{14,34,22},{16,36,22},{16,35,9},{16,34,8},{16,34,9},{26,1,51},{15,36,19},{15,35,4},{14,34,21},{30,17,51},{14,34,21},{15,38,50},{15,38,50},{15,38,50},{15,35,50},{15,36,11},{15,35,1},{15,35,1},{15,33,2},{14,35,11},{15,33,10},{16,34,4},
+{16,34,4},{16,34,4},{16,33,5},{24,6,8},{15,35,0},{15,35,0},{15,33,1},{29,17,8},{15,33,1},{25,6,18},{15,37,2},{16,34,4},{15,35,4},{25,6,18},{29,19,18},{15,35,4},{0,34,20},{29,19,18},{0,34,20},{15,0,50},{15,0,50},{15,0,50},{15,0,50},{15,35,1},{15,35,1},{15,35,1},{15,33,2},{14,34,2},{14,34,2},{16,41,56},{16,39,37},{16,37,56},
+{16,37,41},{16,40,51},{16,38,22},{16,37,5},{16,36,24},{15,38,56},{15,36,22},{17,38,22},{17,37,9},{17,36,8},{17,36,9},{27,3,51},{16,38,22},{16,37,5},{15,36,21},{31,19,51},{15,36,21},{16,39,37},{16,39,37},{16,39,37},{16,37,37},{16,38,8},{16,37,1},{16,37,1},{16,35,0},{15,37,11},{16,35,9},{17,36,4},{17,36,4},{17,36,4},{17,35,5},{27,1,8},
+{16,37,1},{16,37,1},{16,35,0},{30,19,8},{16,35,0},{28,1,18},{16,39,1},{17,36,4},{17,36,5},{28,1,18},{30,21,18},{17,36,5},{0,36,20},{30,21,18},{0,36,20},{16,0,36},{16,0,36},{16,0,36},{16,0,36},{16,37,0},{16,37,0},{16,37,0},{16,35,0},{15,36,2},{15,36,2},{17,43,56},{17,41,37},{17,39,56},{17,39,41},{17,42,51},{17,40,22},{17,39,5},
+{17,38,24},{15,42,56},{16,38,21},{18,40,22},{18,39,9},{18,38,8},{18,38,9},{28,5,51},{17,40,22},{17,39,5},{16,38,21},{31,22,51},{16,38,21},{17,41,37},{17,41,37},{17,41,37},{17,39,37},{17,40,8},{17,39,1},{17,39,1},{17,37,0},{16,39,9},{17,37,9},{18,38,4},{18,38,4},{18,38,4},{18,37,5},{28,3,8},{17,39,1},{17,39,1},{17,37,0},{31,21,8},
+{17,37,0},{29,3,18},{17,41,1},{18,38,4},{18,38,5},{29,3,18},{31,23,18},{18,38,5},{0,38,20},{31,23,18},{0,38,20},{17,0,36},{17,0,36},{17,0,36},{17,0,36},{17,39,0},{17,39,0},{17,39,0},{17,37,0},{16,38,1},{16,38,1},{18,45,56},{18,43,37},{18,41,56},{18,41,41},{18,44,51},{18,42,22},{18,41,5},{18,40,24},{16,43,53},{17,40,21},{19,42,22},
+{19,41,9},{19,40,8},{19,40,9},{31,0,51},{18,42,22},{18,41,5},{17,40,21},{28,29,51},{17,40,21},{18,43,37},{18,43,37},{18,43,37},{18,41,37},{18,42,8},{18,41,1},{18,41,1},{18,39,0},{17,41,9},{18,39,9},{19,40,4},{19,40,4},{19,40,4},{19,39,5},{29,5,8},{18,41,1},{18,41,1},{18,39,0},{31,24,8},{18,39,0},{30,5,18},{18,43,1},{19,40,4},
+{19,40,5},{30,5,18},{31,26,18},{19,40,5},{0,40,20},{31,26,18},{0,40,20},{18,0,36},{18,0,36},{18,0,36},{18,0,36},{18,41,0},{18,41,0},{18,41,0},{18,39,0},{17,40,1},{17,40,1},{19,48,68},{19,46,51},{20,43,70},{19,43,51},{19,47,52},{19,44,22},{19,43,3},{19,42,20},{17,46,51},{18,42,23},{20,44,24},{20,44,8},{20,43,6},{20,42,14},{31,6,51},
+{19,44,21},{19,43,2},{19,42,19},{31,29,51},{19,42,19},{19,46,50},{19,46,50},{19,46,50},{19,43,50},{19,45,9},{19,43,2},{19,43,2},{19,41,3},{18,43,10},{19,41,6},{20,42,5},{20,42,5},{20,42,5},{20,42,5},{31,4,8},{19,43,1},{19,43,1},{19,41,2},{30,29,8},{19,41,2},{30,11,18},{19,46,1},{20,43,2},{19,43,1},{30,11,18},{30,31,18},{19,43,1},
+{0,42,18},{30,31,18},{0,42,18},{19,0,50},{19,0,50},{19,0,50},{19,0,50},{19,43,2},{19,43,2},{19,43,2},{19,41,2},{18,43,1},{18,43,1},{20,49,56},{20,47,38},{20,45,53},{20,45,37},{20,48,51},{20,46,19},{20,45,1},{20,44,22},{18,48,56},{19,44,23},{21,46,24},{21,46,8},{21,45,6},{21,44,14},{30,15,51},{20,46,19},{20,45,1},{19,44,22},{31,32,51},
+{19,44,22},{20,47,37},{20,47,37},{20,47,37},{20,45,36},{20,46,10},{20,45,0},{20,45,0},{20,43,2},{19,45,10},{20,43,11},{21,44,5},{21,44,5},{21,44,5},{21,44,5},{30,13,8},{20,45,0},{20,45,0},{20,43,2},{31,31,8},{20,43,2},{31,13,18},{20,47,2},{21,45,2},{20,45,1},{31,13,18},{30,34,18},{20,45,1},{0,44,18},{30,34,18},{0,44,18},{20,0,36},
+{20,0,36},{20,0,36},{20,0,36},{20,45,0},{20,45,0},{20,45,0},{20,43,1},{19,45,1},{19,45,1},{21,51,56},{21,49,37},{21,47,53},{21,47,37},{21,50,51},{21,48,22},{21,47,1},{21,46,22},{19,50,56},{20,46,20},{22,48,22},{22,48,13},{22,47,6},{22,46,14},{30,20,51},{21,48,22},{21,47,1},{20,46,19},{31,35,51},{20,46,19},{21,49,37},{21,49,37},{21,49,37},
+{21,47,36},{21,48,8},{21,47,0},{21,47,0},{21,45,2},{20,47,11},{21,45,11},{22,46,5},{22,46,5},{22,46,5},{22,46,5},{31,15,8},{21,47,0},{21,47,0},{21,45,2},{31,34,8},{21,45,2},{31,18,18},{21,49,1},{22,47,2},{21,47,1},{31,18,18},{31,36,18},{21,47,1},{0,46,18},{31,36,18},{0,46,18},{21,0,36},{21,0,36},{21,0,36},{21,0,36},{21,47,0},
+{21,47,0},{21,47,0},{21,45,1},{20,46,2},{20,46,2},{22,53,56},{22,51,37},{22,49,56},{22,49,41},{22,52,51},{22,50,22},{22,49,5},{22,48,24},{20,51,53},{21,48,21},{23,50,22},{23,49,9},{23,48,8},{23,48,9},{31,22,51},{22,50,22},{22,49,5},{21,48,21},{28,42,51},{21,48,21},{22,51,37},{22,51,37},{22,51,37},{22,49,37},{22,50,8},{22,49,1},{22,49,1},
+{22,47,2},{21,49,9},{22,47,11},{23,48,4},{23,48,4},{23,48,4},{23,48,5},{31,20,8},{22,49,1},{22,49,1},{22,47,2},{31,37,8},{22,47,2},{31,23,18},{22,51,1},{23,48,4},{23,48,5},{31,23,18},{31,39,18},{23,48,5},{0,48,20},{31,39,18},{0,48,20},{22,0,36},{22,0,36},{22,0,36},{22,0,36},{22,49,0},{22,49,0},{22,49,0},{22,47,1},{21,48,1},
+{21,48,1},{23,56,68},{23,54,51},{24,51,70},{23,51,51},{23,55,52},{23,52,22},{23,51,3},{23,50,20},{21,54,51},{22,50,23},{24,52,24},{24,52,8},{24,51,6},{24,50,14},{31,28,51},{23,52,21},{23,51,2},{23,50,19},{31,42,51},{23,50,19},{23,54,50},{23,54,50},{23,54,50},{23,51,50},{23,53,9},{23,51,2},{23,51,2},{23,49,3},{22,51,10},{23,49,6},{24,50,5},
+{24,50,5},{24,50,5},{24,50,5},{31,26,8},{23,51,1},{23,51,1},{23,49,2},{30,42,8},{23,49,2},{31,29,18},{23,54,1},{24,51,2},{23,51,1},{31,29,18},{30,44,18},{23,51,1},{0,50,18},{30,44,18},{0,50,18},{23,0,50},{23,0,50},{23,0,50},{23,0,50},{23,51,2},{23,51,2},{23,51,2},{23,49,2},{22,51,1},{22,51,1},{24,58,54},{24,55,38},{24,53,53},
+{24,53,37},{24,56,52},{24,54,19},{24,53,1},{24,52,22},{22,56,51},{23,52,23},{25,54,24},{25,54,8},{25,53,6},{25,52,14},{31,33,51},{24,54,19},{24,53,1},{23,52,22},{31,45,51},{23,52,22},{24,56,36},{24,56,36},{24,56,36},{24,53,36},{24,54,10},{24,53,0},{24,53,0},{24,51,2},{23,53,10},{24,51,11},{25,52,5},{25,52,5},{25,52,5},{25,52,5},{31,31,8},
+{24,53,0},{24,53,0},{24,51,2},{31,44,8},{24,51,2},{31,34,18},{24,55,2},{25,53,2},{24,53,1},{31,34,18},{31,46,18},{24,53,1},{0,52,18},{31,46,18},{0,52,18},{24,0,36},{24,0,36},{24,0,36},{24,0,36},{24,53,0},{24,53,0},{24,53,0},{24,51,1},{23,53,1},{23,53,1},{25,60,54},{25,57,38},{25,55,53},{25,55,37},{25,58,52},{25,56,19},{25,55,1},
+{25,54,22},{23,58,51},{24,54,20},{26,56,24},{26,56,8},{26,55,6},{26,54,14},{31,38,51},{25,56,19},{25,55,1},{24,54,19},{31,48,51},{24,54,19},{25,58,36},{25,58,36},{25,58,36},{25,55,36},{25,56,10},{25,55,0},{25,55,0},{25,53,2},{24,55,11},{25,53,11},{26,54,5},{26,54,5},{26,54,5},{26,54,5},{31,36,8},{25,55,0},{25,55,0},{25,53,2},{31,47,8},
+{25,53,2},{30,43,18},{25,57,2},{26,55,2},{25,55,1},{30,43,18},{30,50,18},{25,55,1},{0,54,18},{30,50,18},{0,54,18},{25,0,36},{25,0,36},{25,0,36},{25,0,36},{25,55,0},{25,55,0},{25,55,0},{25,53,1},{24,54,2},{24,54,2},{26,62,54},{26,59,38},{26,57,53},{26,57,37},{26,60,52},{26,58,19},{26,57,1},{26,56,22},{25,58,56},{25,56,20},{27,58,24},
+{27,58,8},{27,57,6},{27,56,14},{30,47,51},{26,58,19},{26,57,1},{25,56,19},{31,51,51},{25,56,19},{26,60,36},{26,60,36},{26,60,36},{26,57,36},{26,58,10},{26,57,0},{26,57,0},{26,55,2},{25,57,11},{26,55,11},{27,56,5},{27,56,5},{27,56,5},{27,56,5},{30,45,8},{26,57,0},{26,57,0},{26,55,2},{31,50,8},{26,55,2},{31,45,18},{26,59,2},{27,57,2},
+{26,57,1},{31,45,18},{31,52,18},{26,57,1},{0,56,18},{31,52,18},{0,56,18},{26,0,36},{26,0,36},{26,0,36},{26,0,36},{26,57,0},{26,57,0},{26,57,0},{26,55,1},{25,56,2},{25,56,2},{27,63,76},{27,62,52},{28,59,69},{27,59,51},{27,63,52},{27,60,21},{27,59,3},{27,58,22},{25,62,52},{26,58,28},{28,60,24},{28,60,9},{28,59,5},{28,59,13},{31,49,51},
+{27,60,20},{27,59,2},{27,58,21},{30,56,51},{27,58,21},{27,62,51},{27,62,51},{27,62,51},{27,59,51},{27,61,9},{27,59,3},{27,59,3},{27,58,6},{26,60,11},{27,57,12},{28,58,5},{28,58,5},{28,58,5},{28,58,4},{29,54,8},{27,59,2},{27,59,2},{27,58,5},{29,56,8},{27,58,5},{30,54,18},{27,62,2},{28,59,1},{27,59,1},{30,54,18},{29,58,18},{27,59,1},
+{0,58,20},{29,58,18},{0,58,20},{27,0,50},{27,0,50},{27,0,50},{27,0,50},{27,60,1},{27,60,1},{27,60,1},{27,58,2},{26,59,0},{26,59,0},{28,63,86},{28,63,38},{28,61,52},{28,61,36},{28,63,59},{28,62,21},{28,61,0},{28,60,21},{27,62,60},{27,60,28},{29,62,24},{29,62,9},{29,61,5},{29,61,13},{30,58,51},{28,62,21},{28,61,0},{28,60,21},{31,58,51},
+{28,60,21},{28,63,37},{28,63,37},{28,63,37},{28,61,36},{28,62,10},{28,61,0},{28,61,0},{28,59,5},{27,62,11},{27,60,12},{29,60,5},{29,60,5},{29,60,5},{29,60,4},{30,56,8},{28,61,0},{28,61,0},{28,59,5},{30,58,8},{28,59,5},{31,56,18},{28,63,2},{29,61,1},{28,61,0},{31,56,18},{30,60,18},{28,61,0},{0,60,20},{30,60,18},{0,60,20},{28,0,36},
+{28,0,36},{28,0,36},{28,0,36},{28,61,0},{28,61,0},{28,61,0},{28,59,1},{27,61,0},{27,61,0},{30,63,94},{30,63,78},{29,63,52},{29,63,36},{30,63,115},{29,63,36},{29,63,0},{29,62,21},{29,63,88},{28,62,25},{30,63,30},{30,63,14},{30,63,5},{30,63,13},{31,60,51},{29,63,36},{29,63,0},{29,62,21},{31,61,51},{29,62,21},{29,63,52},{29,63,52},{29,63,52},
+{29,63,36},{29,63,16},{29,63,0},{29,63,0},{29,61,5},{28,63,11},{28,62,9},{30,62,5},{30,62,5},{30,62,5},{30,62,4},{31,58,8},{29,63,0},{29,63,0},{29,61,5},{31,60,8},{29,61,5},{31,61,18},{30,63,10},{30,63,1},{29,63,0},{31,61,18},{31,62,18},{29,63,0},{0,62,20},{31,62,18},{0,62,20},{29,0,36},{29,0,36},{29,0,36},{29,0,36},{29,63,0},
+{29,63,0},{29,63,0},{29,61,1},{28,63,2},{28,63,2},{31,63,68},{31,63,68},{30,63,61},{30,63,45},{30,63,59},{30,63,27},{30,63,18},{30,63,1},{30,63,28},{30,63,10},{31,63,4},{31,63,4},{31,63,4},{31,63,4},{31,63,4},{31,63,4},{31,63,4},{30,63,1},{31,63,4},{30,63,1},{30,63,61},{30,63,61},{30,63,61},{30,63,45},{30,63,34},{30,63,18},{30,63,18},
+{30,63,1},{30,63,19},{30,63,10},{31,63,4},{31,63,4},{31,63,4},{31,63,4},{31,62,4},{31,63,4},{31,63,4},{30,63,1},{31,63,4},{30,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{30,0,36},{30,0,36},{30,0,36},{30,0,36},{30,63,9},{30,63,9},{30,63,9},{30,63,1},{30,63,10},
+{30,63,10},{0,7,74},{0,6,20},{0,4,2},{0,4,26},{0,5,153},{0,4,110},{0,3,45},{0,2,115},{0,3,169},{0,2,124},{0,7,74},{0,6,20},{0,4,2},{0,4,26},{0,5,153},{0,4,110},{0,3,45},{0,2,115},{0,3,153},{0,2,115},{0,3,1},{0,3,1},{0,3,1},{0,2,0},{0,2,13},{0,2,9},{0,2,9},{0,1,5},{0,1,14},{0,1,6},{0,3,1},
+{0,3,1},{0,3,1},{0,2,0},{0,2,13},{0,2,9},{0,2,9},{0,1,5},{1,0,13},{0,1,5},{2,1,72},{0,6,20},{0,4,2},{0,4,26},{2,1,72},{3,1,72},{0,4,26},{0,3,74},{3,1,72},{0,3,74},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,13,81},{0,10,13},{0,6,26},
+{0,6,14},{0,9,244},{0,7,129},{0,6,41},{0,4,139},{0,5,300},{0,4,175},{0,13,81},{0,10,13},{0,6,26},{0,6,14},{2,3,243},{0,7,129},{0,6,41},{0,4,139},{4,1,243},{0,4,139},{0,9,9},{0,9,9},{0,9,9},{0,5,10},{0,5,52},{0,5,17},{0,5,17},{0,3,17},{0,3,68},{0,3,33},{0,9,9},{0,9,9},{0,9,9},{0,5,10},{1,2,50},
+{0,5,17},{0,5,17},{0,3,17},{2,1,50},{0,3,17},{3,3,72},{0,10,4},{1,6,2},{0,6,5},{3,3,72},{4,3,72},{0,6,5},{0,5,74},{4,3,72},{0,5,74},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,2,0},{0,2,0},{0,2,0},{0,1,1},{0,1,2},{0,1,2},{1,15,145},{1,12,77},{1,8,90},{1,8,78},{0,15,243},{0,10,96},{0,8,3},
+{0,6,106},{0,8,395},{0,6,187},{1,15,81},{1,12,13},{1,8,26},{1,8,14},{4,1,243},{0,10,96},{0,8,3},{0,6,106},{5,3,243},{0,6,106},{1,11,73},{1,11,73},{1,11,73},{1,7,74},{0,11,50},{0,8,2},{0,8,2},{0,5,2},{0,5,131},{0,5,51},{1,11,9},{1,11,9},{1,11,9},{1,7,10},{3,1,50},{0,8,2},{0,8,2},{0,5,2},{3,3,50},
+{0,5,2},{5,1,72},{1,12,4},{2,8,2},{0,8,2},{5,1,72},{9,0,72},{0,8,2},{0,7,74},{9,0,72},{0,7,74},{1,0,73},{1,0,73},{1,0,73},{1,0,73},{0,7,1},{0,7,1},{0,7,1},{0,4,1},{0,3,32},{0,3,32},{2,17,162},{2,14,94},{2,10,107},{2,10,95},{1,17,244},{1,12,97},{1,10,4},{1,8,107},{0,11,345},{0,8,116},{2,17,81},
+{2,14,13},{2,10,26},{2,10,14},{6,0,243},{0,14,76},{1,10,3},{0,9,83},{10,0,243},{0,9,83},{2,13,90},{2,13,90},{2,13,90},{2,9,91},{1,13,51},{1,10,3},{1,10,3},{1,7,3},{0,9,94},{0,7,14},{2,13,9},{2,13,9},{2,13,9},{2,9,10},{3,6,50},{1,10,2},{1,10,2},{1,7,2},{8,0,50},{1,7,2},{7,0,72},{2,14,4},{3,10,2},
+{1,10,2},{7,0,72},{10,2,72},{1,10,2},{0,9,74},{10,2,72},{0,9,74},{2,0,90},{2,0,90},{2,0,90},{2,0,90},{1,9,2},{1,9,2},{1,9,2},{1,6,2},{0,7,13},{0,7,13},{3,19,154},{3,16,82},{3,13,100},{3,12,85},{2,19,244},{2,15,90},{2,13,5},{2,11,97},{0,15,287},{0,11,73},{3,19,90},{3,16,18},{4,12,29},{3,12,21},{6,6,243},
+{1,16,75},{2,13,5},{0,11,73},{12,1,243},{0,11,73},{3,15,81},{3,15,81},{3,15,81},{3,12,81},{2,15,52},{2,13,1},{2,13,1},{2,9,4},{0,13,61},{0,10,14},{3,15,17},{3,15,17},{3,15,17},{3,12,17},{6,2,50},{2,13,1},{2,13,1},{2,9,4},{11,0,50},{2,9,4},{8,2,72},{3,16,2},{4,12,4},{2,13,4},{8,2,72},{12,3,72},{2,13,4},
+{0,11,72},{12,3,72},{0,11,72},{3,0,80},{3,0,80},{3,0,80},{3,0,80},{2,12,0},{2,12,0},{2,12,0},{2,9,0},{0,11,1},{0,11,1},{4,21,162},{4,18,94},{4,15,103},{4,14,95},{3,21,244},{3,17,88},{3,15,5},{3,13,97},{0,18,260},{1,13,73},{4,21,81},{4,18,13},{4,15,22},{4,14,14},{9,1,243},{2,18,75},{3,15,5},{1,13,73},{13,3,243},
+{1,13,73},{4,17,90},{4,17,90},{4,17,90},{4,14,91},{3,17,52},{3,15,1},{3,15,1},{3,11,4},{0,16,52},{1,12,14},{4,17,9},{4,17,9},{4,17,9},{4,14,10},{8,0,50},{3,15,1},{3,15,1},{3,11,4},{11,3,50},{3,11,4},{10,1,72},{3,20,2},{5,14,4},{3,15,4},{10,1,72},{17,0,72},{3,15,4},{0,13,72},{17,0,72},{0,13,72},{4,0,90},
+{4,0,90},{4,0,90},{4,0,90},{3,14,0},{3,14,0},{3,14,0},{3,11,0},{1,13,1},{1,13,1},{5,23,162},{5,20,94},{5,16,107},{5,16,95},{4,23,244},{4,18,97},{4,16,4},{4,15,98},{0,22,244},{2,15,73},{5,23,81},{5,20,13},{5,16,26},{5,16,14},{10,3,243},{3,20,75},{4,16,3},{2,15,73},{18,0,243},{2,15,73},{5,19,90},{5,19,90},{5,19,90},
+{5,15,94},{4,19,51},{4,16,3},{4,16,3},{4,13,5},{1,18,52},{2,14,14},{5,19,9},{5,19,9},{5,19,9},{5,15,13},{9,2,50},{4,16,2},{4,16,2},{4,13,4},{16,0,50},{4,13,4},{11,3,72},{5,20,4},{6,16,2},{4,16,2},{11,3,72},{18,2,72},{4,16,2},{0,15,72},{18,2,72},{0,15,72},{5,0,90},{5,0,90},{5,0,90},{5,0,90},{4,15,2},
+{4,15,2},{4,15,2},{4,13,1},{2,15,1},{2,15,1},{6,25,162},{6,22,94},{6,18,107},{6,18,95},{5,25,244},{5,20,97},{5,18,4},{5,16,107},{1,24,244},{3,17,74},{6,25,81},{6,22,13},{6,18,26},{6,18,14},{12,1,243},{3,24,75},{5,18,3},{3,17,74},{19,2,243},{3,17,74},{6,21,90},{6,21,90},{6,21,90},{6,17,91},{5,21,51},{5,18,3},{5,18,3},
+{5,15,5},{2,20,52},{3,16,19},{6,21,9},{6,21,9},{6,21,9},{6,17,10},{11,1,50},{5,18,2},{5,18,2},{5,15,4},{17,2,50},{5,15,4},{13,1,72},{6,22,4},{7,18,2},{5,18,2},{13,1,72},{19,4,72},{5,18,2},{0,17,74},{19,4,72},{0,17,74},{6,0,90},{6,0,90},{6,0,90},{6,0,90},{5,17,2},{5,17,2},{5,17,2},{5,15,1},{3,17,0},
+{3,17,0},{7,27,154},{7,24,82},{7,21,100},{7,20,85},{6,27,244},{6,23,90},{6,21,5},{6,19,97},{2,26,244},{4,19,73},{7,27,90},{7,24,18},{8,20,29},{7,20,21},{13,4,243},{5,24,75},{6,21,5},{4,19,73},{22,2,243},{4,19,73},{7,23,81},{7,23,81},{7,23,81},{7,20,81},{6,23,52},{6,21,1},{6,21,1},{6,17,4},{3,23,52},{4,18,14},{7,23,17},
+{7,23,17},{7,23,17},{7,20,17},{13,0,50},{6,21,1},{6,21,1},{6,17,4},{20,2,50},{6,17,4},{14,4,72},{7,24,2},{8,20,4},{6,21,4},{14,4,72},{25,0,72},{6,21,4},{0,19,72},{25,0,72},{0,19,72},{7,0,80},{7,0,80},{7,0,80},{7,0,80},{6,20,0},{6,20,0},{6,20,0},{6,17,0},{4,19,1},{4,19,1},{8,29,162},{8,26,92},{8,23,103},
+{8,22,95},{7,29,244},{7,25,90},{7,23,5},{7,21,97},{3,28,244},{5,21,73},{8,29,81},{8,26,11},{8,23,22},{8,22,14},{14,6,243},{6,26,75},{7,23,5},{5,21,73},{26,0,243},{5,21,73},{8,25,90},{8,25,90},{8,25,90},{8,22,91},{7,25,52},{7,23,1},{7,23,1},{7,19,4},{4,24,52},{5,20,14},{8,25,9},{8,25,9},{8,25,9},{8,22,10},{14,2,50},
+{7,23,1},{7,23,1},{7,19,4},{24,0,50},{7,19,4},{16,2,72},{8,26,2},{9,22,4},{7,23,4},{16,2,72},{27,1,72},{7,23,4},{0,21,72},{27,1,72},{0,21,72},{8,0,90},{8,0,90},{8,0,90},{8,0,90},{7,22,0},{7,22,0},{7,22,0},{7,19,0},{5,21,1},{5,21,1},{9,31,162},{9,28,92},{9,25,103},{9,24,95},{8,31,244},{8,26,100},{8,24,9},
+{8,23,98},{4,30,244},{6,23,73},{9,31,81},{9,28,11},{9,25,22},{9,24,14},{16,4,243},{7,28,75},{8,24,8},{6,23,73},{27,2,243},{6,23,73},{9,27,90},{9,27,90},{9,27,90},{9,24,91},{8,27,51},{8,24,5},{8,24,5},{8,21,5},{5,26,52},{6,22,14},{9,27,9},{9,27,9},{9,27,9},{9,24,10},{16,0,50},{8,24,4},{8,24,4},{8,21,4},{26,1,50},
+{8,21,4},{17,4,72},{9,28,2},{10,24,4},{8,25,4},{17,4,72},{27,4,72},{8,25,4},{0,23,72},{27,4,72},{0,23,72},{9,0,90},{9,0,90},{9,0,90},{9,0,90},{8,24,1},{8,24,1},{8,24,1},{8,21,1},{6,23,1},{6,23,1},{10,33,162},{10,30,92},{10,27,103},{10,26,95},{9,33,244},{9,28,100},{9,26,9},{9,25,98},{5,32,244},{7,25,73},{10,33,81},
+{10,30,11},{10,27,22},{10,26,14},{17,6,243},{8,30,75},{9,26,8},{7,25,73},{28,4,243},{7,25,73},{10,29,90},{10,29,90},{10,29,90},{10,26,91},{9,29,51},{9,26,5},{9,26,5},{9,23,5},{6,28,52},{7,24,14},{10,29,9},{10,29,9},{10,29,9},{10,26,10},{17,2,50},{9,26,4},{9,26,4},{9,23,4},{27,3,50},{9,23,4},{18,6,72},{10,30,2},{11,26,4},
+{9,27,4},{18,6,72},{28,6,72},{9,27,4},{0,25,72},{28,6,72},{0,25,72},{10,0,90},{10,0,90},{10,0,90},{10,0,90},{9,26,1},{9,26,1},{9,26,1},{9,23,1},{7,25,1},{7,25,1},{11,35,154},{11,32,82},{11,29,97},{11,29,85},{10,35,244},{10,31,96},{10,29,3},{10,27,90},{6,34,244},{8,27,78},{11,35,90},{11,32,18},{12,29,27},{11,29,21},{19,5,243},
+{9,32,75},{10,29,3},{9,27,75},{31,4,243},{9,27,75},{11,31,81},{11,31,81},{11,31,81},{11,28,80},{10,31,52},{10,29,2},{10,29,2},{10,26,5},{7,31,50},{9,26,11},{11,31,17},{11,31,17},{11,31,17},{11,28,16},{19,1,50},{10,29,2},{10,29,2},{9,26,2},{29,4,50},{9,26,2},{20,5,72},{11,32,2},{12,29,2},{10,29,2},{20,5,72},{31,6,72},{10,29,2},
+{0,27,74},{31,6,72},{0,27,74},{11,0,80},{11,0,80},{11,0,80},{11,0,80},{10,28,1},{10,28,1},{10,28,1},{10,25,1},{8,28,2},{8,28,2},{12,37,162},{12,34,92},{12,31,107},{12,30,99},{11,37,244},{11,33,90},{11,31,3},{11,29,90},{7,36,244},{9,29,78},{12,37,81},{12,34,11},{12,31,26},{12,30,18},{22,0,243},{10,34,75},{11,31,3},{10,29,75},{31,7,243},
+{10,29,75},{12,33,90},{12,33,90},{12,33,90},{12,30,90},{11,33,52},{11,31,2},{11,31,2},{11,28,5},{8,32,52},{10,28,11},{12,33,9},{12,33,9},{12,33,9},{12,30,9},{20,3,50},{11,31,2},{11,31,2},{10,28,2},{30,6,50},{10,28,2},{23,0,72},{12,34,2},{13,31,2},{11,31,2},{23,0,72},{31,9,72},{11,31,2},{0,29,74},{31,9,72},{0,29,74},{12,0,90},
+{12,0,90},{12,0,90},{12,0,90},{11,30,1},{11,30,1},{11,30,1},{11,27,1},{9,30,2},{9,30,2},{13,39,162},{13,36,92},{13,33,103},{13,32,95},{12,39,244},{12,34,100},{12,32,9},{12,31,100},{8,38,244},{10,31,78},{13,39,81},{13,36,11},{13,33,22},{13,32,14},{23,2,243},{11,36,75},{12,32,8},{11,31,75},{28,14,243},{11,31,75},{13,35,90},{13,35,90},{13,35,90},
+{13,32,91},{12,35,51},{12,32,5},{12,32,5},{12,30,6},{9,34,52},{11,30,11},{13,35,9},{13,35,9},{13,35,9},{13,32,10},{21,5,50},{12,32,4},{12,32,4},{11,30,2},{31,8,50},{11,30,2},{24,2,72},{13,36,2},{14,32,4},{12,33,4},{24,2,72},{27,17,72},{12,33,4},{0,31,74},{27,17,72},{0,31,74},{13,0,90},{13,0,90},{13,0,90},{13,0,90},{12,32,1},
+{12,32,1},{12,32,1},{12,29,1},{10,31,4},{10,31,4},{14,41,162},{14,38,92},{14,35,103},{14,34,95},{13,41,244},{13,36,100},{13,34,9},{13,33,98},{9,40,244},{11,33,73},{14,41,81},{14,38,11},{14,35,22},{14,34,14},{24,4,243},{12,38,75},{13,34,8},{11,33,73},{28,17,243},{11,33,73},{14,37,90},{14,37,90},{14,37,90},{14,34,91},{13,37,51},{13,34,5},{13,34,5},
+{13,31,10},{10,36,52},{11,32,14},{14,37,9},{14,37,9},{14,37,9},{14,34,10},{24,0,50},{13,34,4},{13,34,4},{12,32,4},{31,11,50},{12,32,4},{25,4,72},{14,38,2},{15,34,4},{13,35,4},{25,4,72},{28,19,72},{13,35,4},{0,33,72},{28,19,72},{0,33,72},{14,0,90},{14,0,90},{14,0,90},{14,0,90},{13,34,1},{13,34,1},{13,34,1},{13,31,1},{11,33,1},
+{11,33,1},{15,44,152},{15,40,84},{15,37,97},{15,37,85},{14,44,243},{14,39,96},{14,37,3},{14,35,90},{10,42,244},{12,35,78},{15,44,88},{15,40,20},{16,37,27},{15,37,21},{27,0,243},{13,41,76},{14,37,3},{13,35,75},{31,17,243},{13,35,75},{15,40,80},{15,40,80},{15,40,80},{15,36,80},{14,40,50},{14,37,2},{14,37,2},{14,34,5},{11,39,50},{13,34,11},{15,40,16},
+{15,40,16},{15,40,16},{15,36,16},{24,6,50},{14,37,2},{14,37,2},{13,34,2},{29,17,50},{13,34,2},{27,3,72},{15,40,4},{16,37,2},{14,37,2},{27,3,72},{31,19,72},{14,37,2},{0,35,74},{31,19,72},{0,35,74},{15,0,80},{15,0,80},{15,0,80},{15,0,80},{14,36,1},{14,36,1},{14,36,1},{14,33,1},{12,36,2},{12,36,2},{16,45,164},{16,42,95},{16,39,107},
+{16,38,99},{15,46,243},{15,41,96},{15,39,3},{15,37,90},{11,44,244},{13,37,78},{16,45,83},{16,42,14},{16,39,26},{16,38,18},{27,5,243},{14,43,76},{15,39,3},{14,37,75},{31,20,243},{14,37,75},{16,41,91},{16,41,91},{16,41,91},{16,38,90},{15,42,50},{15,39,2},{15,39,2},{15,36,5},{12,41,51},{14,36,11},{16,41,10},{16,41,10},{16,41,10},{16,38,9},{27,1,50},
+{15,39,2},{15,39,2},{14,36,2},{30,19,50},{14,36,2},{28,5,72},{15,44,4},{17,39,2},{15,39,2},{28,5,72},{31,22,72},{15,39,2},{0,37,74},{31,22,72},{0,37,74},{16,0,90},{16,0,90},{16,0,90},{16,0,90},{15,38,1},{15,38,1},{15,38,1},{15,35,1},{13,38,2},{13,38,2},{17,47,164},{17,44,95},{17,41,107},{17,40,99},{16,47,245},{16,43,91},{16,41,3},
+{16,39,100},{12,46,244},{14,39,78},{17,47,83},{17,44,14},{17,41,26},{17,40,18},{30,0,243},{15,45,76},{16,41,2},{15,39,75},{28,27,243},{15,39,75},{17,43,91},{17,43,91},{17,43,91},{17,40,90},{16,43,53},{16,41,2},{16,41,2},{16,38,6},{13,43,51},{15,38,11},{17,43,10},{17,43,10},{17,43,10},{17,40,9},{28,3,50},{16,41,1},{16,41,1},{15,38,2},{31,21,50},
+{15,38,2},{31,0,72},{16,46,1},{18,41,2},{16,41,1},{31,0,72},{28,29,72},{16,41,1},{0,39,74},{28,29,72},{0,39,74},{17,0,90},{17,0,90},{17,0,90},{17,0,90},{16,40,1},{16,40,1},{16,40,1},{16,37,1},{14,40,2},{14,40,2},{18,49,162},{18,46,95},{18,43,107},{18,42,99},{17,49,244},{17,45,91},{17,43,3},{17,41,100},{13,48,244},{15,41,78},{18,49,81},
+{18,46,14},{18,43,26},{18,42,18},{31,2,243},{15,48,75},{17,43,2},{15,41,78},{29,29,243},{15,41,78},{18,45,91},{18,45,91},{18,45,91},{18,42,90},{17,45,53},{17,43,2},{17,43,2},{17,40,6},{14,45,51},{16,40,18},{18,45,10},{18,45,10},{18,45,10},{18,42,9},{29,5,50},{17,43,1},{17,43,1},{16,40,2},{31,24,50},{16,40,2},{30,9,72},{17,48,2},{19,43,2},
+{17,43,1},{30,9,72},{29,31,72},{17,43,1},{0,41,74},{29,31,72},{0,41,74},{18,0,90},{18,0,90},{18,0,90},{18,0,90},{17,42,1},{17,42,1},{17,42,1},{17,39,1},{15,42,2},{15,42,2},{19,52,152},{19,48,84},{19,45,105},{19,45,84},{18,52,243},{18,47,89},{18,45,1},{18,43,96},{14,50,244},{16,44,81},{19,52,88},{19,48,20},{20,45,26},{19,45,20},{31,8,243},
+{17,49,76},{18,45,1},{16,44,80},{31,30,243},{16,44,80},{19,48,80},{19,48,80},{19,48,80},{19,44,81},{18,48,50},{18,45,1},{18,45,1},{18,42,1},{15,47,52},{17,42,13},{19,48,16},{19,48,16},{19,48,16},{19,44,17},{31,4,50},{18,45,1},{18,45,1},{18,42,1},{30,29,50},{18,42,1},{30,15,72},{19,48,4},{20,45,1},{18,45,1},{30,15,72},{31,32,72},{18,45,1},
+{0,43,80},{31,32,72},{0,43,80},{19,0,80},{19,0,80},{19,0,80},{19,0,80},{18,45,0},{18,45,0},{18,45,0},{18,41,1},{16,44,1},{16,44,1},{20,53,164},{20,50,95},{20,47,106},{20,47,94},{19,54,243},{19,49,96},{19,47,1},{19,45,96},{15,52,244},{17,46,81},{20,53,83},{20,50,14},{20,47,25},{20,47,13},{29,20,243},{18,51,76},{19,47,1},{17,46,80},{31,33,243},
+{17,46,80},{20,49,91},{20,49,91},{20,49,91},{20,46,90},{19,50,50},{19,47,1},{19,47,1},{19,44,1},{16,49,51},{18,44,13},{20,49,10},{20,49,10},{20,49,10},{20,46,9},{30,13,50},{19,47,1},{19,47,1},{19,44,1},{31,31,50},{19,44,1},{30,20,72},{19,52,4},{21,47,1},{19,47,1},{30,20,72},{31,35,72},{19,47,1},{0,45,80},{31,35,72},{0,45,80},{20,0,90},
+{20,0,90},{20,0,90},{20,0,90},{19,47,0},{19,47,0},{19,47,0},{19,43,1},{17,46,1},{17,46,1},{21,55,164},{21,52,95},{21,49,107},{21,48,99},{20,55,245},{20,51,91},{20,49,3},{20,47,97},{16,54,244},{18,47,85},{21,55,83},{21,52,14},{21,49,26},{21,48,18},{30,22,243},{19,53,76},{20,49,2},{19,47,81},{28,40,243},{19,47,81},{21,51,91},{21,51,91},{21,51,91},
+{21,48,90},{20,51,53},{20,49,2},{20,49,2},{20,46,5},{17,51,51},{19,46,13},{21,51,10},{21,51,10},{21,51,10},{21,48,9},{31,15,50},{20,49,1},{20,49,1},{19,46,4},{31,34,50},{19,46,4},{31,22,72},{20,54,1},{22,49,2},{20,49,1},{31,22,72},{28,42,72},{20,49,1},{0,47,80},{28,42,72},{0,47,80},{21,0,90},{21,0,90},{21,0,90},{21,0,90},{20,48,1},
+{20,48,1},{20,48,1},{20,45,2},{18,48,2},{18,48,2},{22,57,164},{22,54,95},{22,51,107},{22,50,99},{21,57,245},{21,53,91},{21,51,3},{21,49,100},{17,56,244},{19,49,78},{22,57,83},{22,54,14},{22,51,26},{22,50,18},{31,24,243},{19,56,76},{21,51,2},{19,49,78},{29,42,243},{19,49,78},{22,53,91},{22,53,91},{22,53,91},{22,50,90},{21,53,53},{21,51,2},{21,51,2},
+{21,48,6},{18,53,51},{20,48,18},{22,53,10},{22,53,10},{22,53,10},{22,50,9},{31,20,50},{21,51,1},{21,51,1},{20,48,2},{31,37,50},{20,48,2},{31,27,72},{21,56,1},{23,51,2},{21,51,1},{31,27,72},{29,44,72},{21,51,1},{0,49,74},{29,44,72},{0,49,74},{22,0,90},{22,0,90},{22,0,90},{22,0,90},{21,50,1},{21,50,1},{21,50,1},{21,47,2},{19,50,2},
+{19,50,2},{23,60,152},{23,57,81},{23,53,105},{23,53,84},{22,60,243},{22,55,89},{22,53,1},{22,51,96},{18,59,244},{20,52,81},{23,60,88},{23,57,17},{24,53,26},{23,53,20},{31,30,243},{21,57,73},{22,53,1},{20,52,80},{31,43,243},{20,52,80},{23,56,80},{23,56,80},{23,56,80},{23,52,81},{22,56,50},{22,53,1},{22,53,1},{22,50,1},{19,55,52},{21,50,13},{23,56,16},
+{23,56,16},{23,56,16},{23,52,17},{31,26,50},{22,53,1},{22,53,1},{22,50,1},{30,42,50},{22,50,1},{31,33,72},{23,57,1},{24,53,1},{22,53,1},{31,33,72},{31,45,72},{22,53,1},{0,51,80},{31,45,72},{0,51,80},{23,0,80},{23,0,80},{23,0,80},{23,0,80},{22,53,0},{22,53,0},{22,53,0},{22,49,1},{20,52,1},{20,52,1},{24,62,162},{24,58,94},{24,55,106},
+{24,55,94},{23,62,243},{23,57,89},{23,55,1},{23,53,96},{19,61,244},{21,54,81},{24,62,81},{24,58,13},{24,55,25},{24,55,13},{31,35,243},{22,59,73},{23,55,1},{21,54,80},{27,51,243},{21,54,80},{24,58,90},{24,58,90},{24,58,90},{24,54,90},{23,58,50},{23,55,1},{23,55,1},{23,52,1},{20,57,50},{22,52,13},{24,58,9},{24,58,9},{24,58,9},{24,54,9},{31,31,50},
+{23,55,1},{23,55,1},{23,52,1},{31,44,50},{23,52,1},{31,38,72},{24,58,4},{25,55,1},{23,55,1},{31,38,72},{31,48,72},{23,55,1},{0,53,80},{31,48,72},{0,53,80},{24,0,90},{24,0,90},{24,0,90},{24,0,90},{23,55,0},{23,55,0},{23,55,0},{23,51,1},{21,54,1},{21,54,1},{25,63,164},{25,60,94},{25,57,106},{25,57,94},{24,63,245},{24,59,97},{24,57,3},
+{24,55,97},{20,63,249},{22,56,81},{25,63,83},{25,60,13},{25,57,25},{25,57,13},{31,40,243},{23,61,73},{24,57,2},{22,56,80},{31,49,243},{22,56,80},{25,60,90},{25,60,90},{25,60,90},{25,56,90},{24,60,51},{24,57,3},{24,57,3},{24,54,5},{21,59,50},{23,54,13},{25,60,9},{25,60,9},{25,60,9},{25,56,9},{31,36,50},{24,57,2},{24,57,2},{23,54,4},{31,47,50},
+{23,54,4},{30,47,72},{25,60,4},{26,57,1},{24,57,1},{30,47,72},{31,51,72},{24,57,1},{0,55,80},{31,51,72},{0,55,80},{25,0,90},{25,0,90},{25,0,90},{25,0,90},{24,56,2},{24,56,2},{24,56,2},{24,53,2},{22,56,1},{22,56,1},{26,63,194},{26,62,94},{26,59,106},{26,59,94},{25,63,284},{25,61,97},{25,59,3},{25,57,97},{22,63,253},{23,58,81},{27,62,99},
+{26,62,13},{26,59,25},{26,59,13},{29,52,243},{24,63,76},{25,59,2},{23,58,80},{28,56,243},{23,58,80},{26,62,90},{26,62,90},{26,62,90},{26,58,90},{25,62,51},{25,59,3},{25,59,3},{25,56,5},{22,61,50},{24,56,10},{26,62,9},{26,62,9},{26,62,9},{26,58,9},{30,45,50},{25,59,2},{25,59,2},{24,56,1},{31,50,50},{24,56,1},{30,52,72},{26,62,4},{27,59,1},
+{25,59,1},{30,52,72},{28,58,72},{25,59,1},{0,57,80},{28,58,72},{0,57,80},{26,0,90},{26,0,90},{26,0,90},{26,0,90},{25,58,2},{25,58,2},{25,58,2},{25,55,2},{23,58,1},{23,58,1},{27,63,280},{27,63,120},{27,62,105},{27,61,82},{27,63,328},{26,63,99},{26,61,5},{26,59,99},{24,63,308},{24,60,74},{28,63,105},{28,63,45},{28,61,27},{27,61,18},{31,51,243},
+{26,63,99},{26,61,5},{24,60,74},{31,56,243},{24,60,74},{27,63,84},{27,63,84},{27,63,84},{27,60,81},{26,63,58},{26,62,2},{26,62,2},{26,58,2},{23,63,53},{25,58,9},{27,63,20},{27,63,20},{27,63,20},{27,60,17},{29,54,50},{26,62,2},{26,62,2},{26,58,2},{29,56,50},{26,58,2},{30,58,72},{28,63,20},{28,61,2},{27,61,2},{30,58,72},{31,58,72},{27,61,2},
+{0,60,74},{31,58,72},{0,60,74},{27,0,80},{27,0,80},{27,0,80},{27,0,80},{26,61,0},{26,61,0},{26,61,0},{26,58,1},{24,60,0},{24,60,0},{28,63,331},{28,63,187},{28,63,106},{28,63,94},{28,63,358},{27,63,173},{27,63,4},{27,61,82},{26,63,355},{25,62,65},{29,63,126},{29,63,62},{28,63,25},{28,63,13},{31,56,221},{28,63,121},{27,63,4},{25,62,65},{30,60,221},
+{25,62,65},{28,63,106},{28,63,106},{28,63,106},{28,62,91},{27,63,100},{27,63,4},{27,63,4},{27,60,2},{25,63,72},{26,60,9},{28,63,25},{28,63,25},{28,63,25},{28,62,10},{30,56,50},{27,63,4},{27,63,4},{27,60,2},{30,58,50},{27,60,2},{31,59,61},{29,63,37},{29,63,1},{27,63,4},{31,59,61},{31,61,61},{27,63,4},{0,62,65},{31,61,61},{0,62,65},{28,0,90},
+{28,0,90},{28,0,90},{28,0,90},{27,63,0},{27,63,0},{27,63,0},{27,60,1},{25,62,0},{25,62,0},{29,63,239},{29,63,175},{29,63,139},{29,63,99},{29,63,239},{28,63,122},{28,63,41},{28,62,19},{28,63,233},{26,63,19},{30,63,54},{30,63,38},{30,63,29},{29,63,18},{31,60,93},{29,63,54},{29,63,18},{27,63,9},{31,61,93},{27,63,9},{29,63,139},{29,63,139},{29,63,139},
+{29,63,99},{29,63,139},{28,63,41},{28,63,41},{28,62,3},{27,63,116},{27,62,9},{30,63,29},{30,63,29},{30,63,29},{29,63,18},{31,58,50},{29,63,18},{29,63,18},{28,62,2},{31,60,50},{28,62,2},{31,62,5},{31,63,9},{30,63,4},{30,63,0},{31,62,5},{31,62,9},{30,63,0},{0,63,9},{31,62,9},{0,63,9},{29,0,90},{29,0,90},{29,0,90},{29,0,90},{28,63,5},
+{28,63,5},{28,63,5},{28,61,2},{27,62,8},{27,62,8},{30,63,140},{30,63,124},{30,63,115},{30,63,99},{30,63,131},{29,63,98},{29,63,62},{29,63,2},{29,63,122},{28,63,20},{31,63,25},{31,63,25},{31,63,25},{30,63,18},{31,62,17},{30,63,18},{30,63,9},{29,63,1},{31,62,22},{29,63,1},{30,63,115},{30,63,115},{30,63,115},{30,63,99},{30,63,106},{29,63,62},{29,63,62},
+{29,63,2},{29,63,86},{28,63,20},{31,63,25},{31,63,25},{31,63,25},{30,63,18},{31,61,13},{30,63,9},{30,63,9},{29,63,1},{31,62,13},{29,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{30,0,90},{30,0,90},{30,0,90},{30,0,90},{29,63,26},{29,63,26},{29,63,26},{29,63,2},{28,63,20},
+{28,63,20},{0,13,200},{0,10,52},{0,7,2},{0,6,61},{0,9,441},{0,7,308},{0,5,139},{0,4,318},{0,5,491},{0,4,354},{0,13,200},{0,10,52},{0,7,2},{0,6,61},{2,2,441},{0,7,308},{0,5,139},{0,4,318},{2,3,441},{0,4,318},{0,6,0},{0,6,0},{0,6,0},{0,4,1},{0,3,41},{0,3,20},{0,3,20},{0,2,26},{0,2,50},{0,1,30},{0,6,0},
+{0,6,0},{0,6,0},{0,4,1},{1,0,41},{0,3,20},{0,3,20},{0,2,26},{1,1,41},{0,2,26},{3,3,200},{0,10,52},{0,7,2},{0,6,61},{3,3,200},{4,3,200},{0,6,61},{0,5,202},{4,3,200},{0,5,202},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,18,200},{0,14,20},{0,10,13},
+{0,9,26},{0,12,686},{0,9,419},{0,8,178},{0,5,442},{0,7,789},{0,5,491},{0,18,200},{0,14,20},{0,10,13},{0,9,26},{3,2,686},{0,9,419},{0,8,178},{0,5,442},{6,0,686},{0,5,442},{0,11,1},{0,11,1},{0,11,1},{0,7,0},{0,6,145},{0,5,74},{0,5,74},{0,3,74},{0,3,165},{0,3,90},{0,11,1},{0,11,1},{0,11,1},{0,7,0},{1,2,145},
+{0,5,74},{0,5,74},{0,3,74},{3,0,145},{0,3,74},{5,1,200},{0,14,20},{1,9,2},{0,9,26},{5,1,200},{9,0,200},{0,9,26},{0,7,202},{9,0,200},{0,7,202},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,23,251},{0,17,53},{1,12,78},{0,11,54},{0,17,724},{0,13,362},{0,11,86},
+{0,8,387},{0,9,932},{0,7,498},{1,20,201},{1,16,21},{1,12,14},{1,11,27},{4,4,723},{0,13,362},{0,11,86},{0,8,387},{7,2,723},{0,8,387},{0,17,49},{0,17,49},{0,17,49},{0,10,49},{0,11,162},{0,9,45},{0,9,45},{0,5,50},{0,5,243},{0,5,99},{1,13,2},{1,13,2},{1,13,2},{1,9,1},{3,1,162},{0,9,45},{0,9,45},{0,5,50},{3,3,162},
+{0,5,50},{7,0,200},{0,17,4},{2,11,2},{0,11,5},{7,0,200},{10,2,200},{0,11,5},{0,9,202},{10,2,200},{0,9,202},{0,0,49},{0,0,49},{0,0,49},{0,0,49},{0,5,0},{0,5,0},{0,5,0},{0,3,0},{0,2,13},{0,2,13},{1,25,315},{1,19,117},{1,14,171},{1,13,118},{0,23,723},{0,16,299},{0,13,18},{0,10,318},{0,12,1087},{0,9,516},{2,22,201},
+{2,17,21},{2,14,14},{2,13,27},{5,6,723},{0,16,299},{0,13,18},{0,10,318},{8,4,723},{0,10,318},{1,19,113},{1,19,113},{1,19,113},{1,12,113},{0,16,162},{0,13,17},{0,13,17},{0,8,26},{0,8,338},{0,7,129},{2,15,2},{2,15,2},{2,15,2},{2,11,1},{3,6,162},{0,13,17},{0,13,17},{0,8,26},{8,0,162},{0,8,26},{7,5,200},{1,19,4},{3,13,2},
+{0,13,2},{7,5,200},{14,0,200},{0,13,2},{0,11,202},{14,0,200},{0,11,202},{1,0,113},{1,0,113},{1,0,113},{1,0,113},{0,10,0},{0,10,0},{0,10,0},{0,6,0},{0,5,58},{0,5,58},{2,28,408},{2,22,210},{2,16,281},{2,15,213},{0,29,739},{0,20,260},{0,16,29},{0,13,280},{0,15,1143},{0,12,464},{3,25,200},{3,21,16},{3,16,18},{3,15,20},{7,5,723},
+{0,20,244},{0,16,13},{0,13,264},{14,0,723},{0,13,264},{2,21,209},{2,21,209},{2,21,209},{2,14,209},{1,19,178},{0,17,18},{0,17,18},{0,10,21},{0,12,376},{0,9,121},{3,18,0},{3,18,0},{3,18,0},{3,13,1},{6,2,162},{0,17,2},{0,17,2},{0,10,5},{11,0,162},{0,10,5},{10,1,200},{2,22,2},{4,15,5},{2,15,5},{10,1,200},{17,0,200},{2,15,5},
+{0,13,200},{17,0,200},{0,13,200},{2,0,208},{2,0,208},{2,0,208},{2,0,208},{1,13,16},{1,13,16},{1,13,16},{1,8,17},{0,8,80},{0,8,80},{3,30,408},{3,24,210},{3,18,281},{3,17,213},{1,31,739},{1,22,260},{2,17,27},{1,15,280},{0,19,1000},{0,14,322},{4,26,201},{4,22,21},{4,18,14},{4,17,27},{10,0,723},{0,23,212},{2,17,11},{0,15,225},{15,2,723},
+{0,15,225},{3,23,209},{3,23,209},{3,23,209},{3,16,209},{2,21,178},{1,19,18},{1,19,18},{1,12,21},{0,15,294},{0,12,44},{4,19,2},{4,19,2},{4,19,2},{4,15,2},{8,0,162},{1,19,2},{1,19,2},{0,13,4},{11,3,162},{0,13,4},{11,3,200},{3,24,2},{5,17,2},{2,17,2},{11,3,200},{18,2,200},{2,17,2},{0,15,200},{18,2,200},{0,15,200},{3,0,208},
+{3,0,208},{3,0,208},{3,0,208},{2,15,16},{2,15,16},{2,15,16},{2,10,17},{0,12,40},{0,12,40},{4,31,420},{4,26,222},{4,20,276},{4,19,223},{3,29,740},{2,24,260},{3,19,27},{2,16,270},{0,23,920},{0,16,234},{5,28,201},{5,24,21},{5,20,14},{5,19,27},{11,2,723},{0,27,200},{3,19,11},{0,17,211},{16,4,723},{0,17,211},{4,25,218},{4,25,218},{4,25,218},
+{4,18,218},{3,23,178},{2,21,18},{2,21,18},{2,14,21},{0,19,228},{0,15,17},{5,21,2},{5,21,2},{5,21,2},{5,17,1},{9,2,162},{2,21,2},{2,21,2},{1,15,4},{16,0,162},{1,15,4},{13,1,200},{3,28,2},{6,19,2},{3,19,2},{13,1,200},{19,4,200},{3,19,2},{0,17,202},{19,4,200},{0,17,202},{4,0,218},{4,0,218},{4,0,218},{4,0,218},{3,17,16},
+{3,17,16},{3,17,16},{3,12,17},{0,15,13},{0,15,13},{5,33,420},{5,28,222},{5,22,276},{5,21,223},{3,34,740},{3,26,260},{3,22,29},{3,18,270},{0,25,844},{0,19,202},{6,30,201},{6,26,21},{6,22,14},{6,21,27},{12,4,723},{1,29,200},{3,22,13},{0,19,202},{22,0,723},{0,19,202},{5,27,218},{5,27,218},{5,27,218},{5,20,218},{3,28,178},{3,23,18},{3,23,18},
+{3,16,18},{0,22,195},{1,17,17},{6,23,2},{6,23,2},{6,23,2},{6,19,1},{11,1,162},{3,23,2},{3,23,2},{3,16,2},{17,2,162},{3,16,2},{15,0,200},{5,28,4},{7,21,2},{4,21,2},{15,0,200},{24,1,200},{4,21,2},{0,19,202},{24,1,200},{0,19,202},{5,0,218},{5,0,218},{5,0,218},{5,0,218},{3,22,16},{3,22,16},{3,22,16},{3,16,17},{0,19,0},
+{0,19,0},{6,36,408},{6,30,210},{6,24,276},{6,23,213},{4,37,739},{4,28,260},{4,24,24},{4,21,280},{0,29,780},{1,21,202},{7,33,200},{7,29,17},{7,24,13},{7,23,20},{15,0,723},{2,31,203},{4,24,8},{1,21,201},{24,1,723},{1,21,201},{6,29,209},{6,29,209},{6,29,209},{6,22,209},{5,27,178},{4,25,17},{4,25,17},{4,18,21},{0,26,168},{2,19,14},{7,26,0},
+{7,26,0},{7,26,0},{7,21,1},{13,0,162},{4,25,1},{4,25,1},{3,19,4},{20,2,162},{3,19,4},{16,2,200},{6,30,2},{8,23,5},{6,23,5},{16,2,200},{27,1,200},{6,23,5},{0,21,200},{27,1,200},{0,21,200},{6,0,208},{6,0,208},{6,0,208},{6,0,208},{5,21,16},{5,21,16},{5,21,16},{5,16,17},{1,21,2},{1,21,2},{7,38,408},{7,32,210},{7,26,276},
+{7,25,213},{5,39,739},{5,30,260},{5,26,24},{5,23,280},{0,33,749},{2,23,202},{8,34,201},{8,30,19},{8,26,14},{8,25,21},{16,1,723},{3,33,202},{5,26,8},{2,23,201},{25,3,723},{2,23,201},{7,31,209},{7,31,209},{7,31,209},{7,24,209},{6,29,178},{5,27,17},{5,27,17},{5,20,21},{0,30,164},{3,21,14},{8,28,1},{8,28,1},{8,28,1},{8,23,2},{14,2,162},
+{5,27,1},{5,27,1},{4,21,4},{24,0,162},{4,21,4},{17,4,200},{7,32,2},{9,25,5},{7,25,5},{17,4,200},{27,4,200},{7,25,5},{0,23,200},{27,4,200},{0,23,200},{7,0,208},{7,0,208},{7,0,208},{7,0,208},{6,23,16},{6,23,16},{6,23,16},{6,18,17},{2,23,2},{2,23,2},{8,39,420},{8,34,222},{8,28,286},{8,27,223},{6,41,739},{6,32,260},{6,28,24},
+{6,25,280},{0,36,725},{3,25,202},{9,36,201},{9,32,21},{9,28,14},{9,27,21},{18,0,723},{4,35,200},{6,28,8},{3,25,201},{30,0,723},{3,25,201},{8,33,218},{8,33,218},{8,33,218},{8,26,219},{7,31,178},{6,29,17},{6,29,17},{6,22,21},{2,30,168},{4,23,17},{9,30,1},{9,30,1},{9,30,1},{9,25,2},{16,0,162},{6,29,1},{6,29,1},{5,23,4},{26,1,162},
+{5,23,4},{18,6,200},{7,36,2},{10,27,5},{8,27,5},{18,6,200},{28,6,200},{8,27,5},{0,25,200},{28,6,200},{0,25,200},{8,0,218},{8,0,218},{8,0,218},{8,0,218},{7,25,16},{7,25,16},{7,25,16},{7,20,17},{3,25,2},{3,25,2},{9,41,420},{9,36,222},{9,30,286},{9,29,223},{7,43,739},{7,34,260},{7,30,24},{7,27,280},{1,38,725},{4,27,201},{10,38,201},
+{10,34,21},{10,30,14},{10,29,21},{19,2,723},{5,37,200},{7,30,8},{4,27,201},{31,2,723},{4,27,201},{9,35,218},{9,35,218},{9,35,218},{9,28,219},{7,36,178},{7,31,17},{7,31,17},{7,24,21},{2,33,165},{5,25,17},{10,31,2},{10,31,2},{10,31,2},{10,27,2},{17,2,162},{7,31,1},{7,31,1},{6,25,4},{27,3,162},{6,25,4},{21,1,200},{9,36,4},{11,29,5},
+{9,29,5},{21,1,200},{29,8,200},{9,29,5},{0,27,200},{29,8,200},{0,27,200},{9,0,218},{9,0,218},{9,0,218},{9,0,218},{7,30,16},{7,30,16},{7,30,16},{7,24,17},{4,27,1},{4,27,1},{10,44,408},{10,38,210},{10,32,276},{10,31,217},{8,45,739},{8,36,260},{8,32,24},{8,29,267},{2,41,727},{5,29,207},{11,41,200},{11,37,17},{11,32,13},{11,31,18},{21,1,723},
+{6,39,203},{8,32,8},{6,29,203},{29,8,723},{6,29,203},{10,37,209},{10,37,209},{10,37,209},{10,31,208},{9,35,178},{8,33,17},{8,33,17},{8,27,21},{3,36,165},{6,27,18},{11,34,0},{11,34,0},{11,34,0},{11,30,1},{19,1,162},{8,33,1},{8,33,1},{7,27,2},{29,4,162},{7,27,2},{23,0,200},{10,38,2},{12,32,8},{9,32,5},{23,0,200},{31,9,200},{9,32,5},
+{0,29,202},{31,9,200},{0,29,202},{10,0,208},{10,0,208},{10,0,208},{10,0,208},{9,29,16},{9,29,16},{9,29,16},{9,25,16},{5,30,2},{5,30,2},{11,46,408},{11,40,210},{11,34,276},{11,33,213},{9,47,739},{9,38,260},{9,34,24},{9,31,267},{3,43,727},{6,31,207},{12,42,203},{12,38,19},{12,34,14},{12,33,21},{22,3,723},{7,41,203},{9,34,8},{7,31,203},{30,10,723},
+{7,31,203},{11,39,209},{11,39,209},{11,39,209},{11,32,209},{10,37,178},{9,35,17},{9,35,17},{9,29,21},{4,38,164},{7,29,18},{12,36,1},{12,36,1},{12,36,1},{12,31,2},{20,3,162},{9,35,1},{9,35,1},{8,29,2},{30,6,162},{8,29,2},{24,2,200},{11,40,2},{13,33,5},{11,33,5},{24,2,200},{27,17,200},{11,33,5},{0,31,202},{27,17,200},{0,31,202},{11,0,208},
+{11,0,208},{11,0,208},{11,0,208},{10,31,16},{10,31,16},{10,31,16},{10,27,16},{6,32,2},{6,32,2},{12,47,420},{12,42,220},{12,36,286},{12,35,223},{10,49,739},{10,40,260},{10,36,24},{10,33,280},{3,46,727},{7,33,202},{13,44,203},{13,40,19},{13,36,14},{13,35,21},{24,1,723},{8,43,202},{10,36,8},{7,33,201},{31,12,723},{7,33,201},{12,41,218},{12,41,218},{12,41,218},
+{12,34,219},{11,39,178},{10,37,17},{10,37,17},{10,31,21},{5,40,164},{8,31,21},{13,38,1},{13,38,1},{13,38,1},{13,33,2},{21,5,162},{10,37,1},{10,37,1},{9,31,2},{31,8,162},{9,31,2},{25,4,200},{12,42,2},{14,35,5},{12,35,5},{25,4,200},{28,19,200},{12,35,5},{0,33,200},{28,19,200},{0,33,200},{12,0,218},{12,0,218},{12,0,218},{12,0,218},{11,33,16},
+{11,33,16},{11,33,16},{11,29,16},{7,33,2},{7,33,2},{13,49,420},{13,44,220},{13,38,286},{13,37,223},{11,51,739},{11,42,260},{11,38,24},{11,35,280},{4,48,729},{8,35,201},{14,46,203},{14,42,19},{14,38,14},{14,37,21},{26,0,723},{9,45,202},{11,38,8},{8,35,201},{31,15,723},{8,35,201},{13,43,218},{13,43,218},{13,43,218},{13,36,219},{11,44,178},{11,39,17},{11,39,17},
+{11,32,21},{6,42,164},{9,33,17},{14,40,1},{14,40,1},{14,40,1},{14,35,2},{24,0,162},{11,39,1},{11,39,1},{10,33,4},{31,11,162},{10,33,4},{26,6,200},{13,44,2},{15,37,5},{13,37,5},{26,6,200},{29,21,200},{13,37,5},{0,35,200},{29,21,200},{0,35,200},{13,0,218},{13,0,218},{13,0,218},{13,0,218},{11,38,16},{11,38,16},{11,38,16},{11,32,17},{8,35,1},
+{8,35,1},{14,52,408},{14,46,212},{14,41,282},{14,39,217},{12,53,739},{12,44,259},{12,40,27},{12,37,267},{6,49,727},{9,37,207},{15,49,200},{15,44,20},{15,40,17},{15,39,18},{26,6,723},{10,48,203},{12,40,11},{10,37,203},{29,21,723},{10,37,203},{14,46,208},{14,46,208},{14,46,208},{14,39,208},{13,43,178},{12,41,18},{12,41,18},{12,35,21},{7,44,163},{10,35,18},{15,42,1},
+{15,42,1},{15,42,1},{15,38,1},{24,6,162},{12,41,2},{12,41,2},{11,35,2},{29,17,162},{11,35,2},{28,5,200},{13,48,2},{16,40,5},{13,40,1},{28,5,200},{31,22,200},{13,40,1},{0,37,202},{31,22,200},{0,37,202},{14,0,208},{14,0,208},{14,0,208},{14,0,208},{13,37,16},{13,37,16},{13,37,16},{13,33,16},{9,38,2},{9,38,2},{15,54,408},{15,48,210},{15,43,282},
+{15,41,217},{13,55,739},{13,46,259},{13,42,27},{13,39,267},{7,51,727},{10,39,207},{16,50,203},{16,47,18},{16,42,11},{16,41,26},{29,1,723},{11,49,203},{13,42,11},{11,39,203},{30,23,723},{11,39,203},{15,47,209},{15,47,209},{15,47,209},{15,41,208},{14,45,178},{13,43,18},{13,43,18},{13,37,21},{8,46,164},{11,37,18},{16,44,1},{16,44,1},{16,44,1},{16,39,2},{27,1,162},
+{13,43,2},{13,43,2},{12,37,2},{30,19,162},{12,37,2},{31,0,200},{15,48,2},{17,42,5},{14,42,1},{31,0,200},{28,29,200},{14,42,1},{0,39,202},{28,29,200},{0,39,202},{15,0,208},{15,0,208},{15,0,208},{15,0,208},{14,39,16},{14,39,16},{14,39,16},{14,35,16},{10,40,2},{10,40,2},{16,56,418},{16,50,220},{16,44,283},{16,43,228},{14,57,739},{14,48,260},{14,44,27},
+{14,41,267},{7,54,727},{11,41,207},{17,52,203},{17,48,19},{17,44,11},{17,43,26},{30,3,723},{12,51,202},{14,44,11},{12,41,203},{31,25,723},{12,41,203},{16,49,218},{16,49,218},{16,49,218},{16,43,219},{15,47,178},{14,45,18},{14,45,18},{14,39,21},{9,48,164},{12,39,21},{17,46,1},{17,46,1},{17,46,1},{17,41,2},{28,3,162},{14,45,2},{14,45,2},{13,39,2},{31,21,162},
+{13,39,2},{30,9,200},{16,50,2},{18,44,5},{15,44,1},{30,9,200},{29,31,200},{15,44,1},{0,41,202},{29,31,200},{0,41,202},{16,0,218},{16,0,218},{16,0,218},{16,0,218},{15,41,16},{15,41,16},{15,41,16},{15,37,16},{11,42,2},{11,42,2},{17,58,418},{17,52,220},{17,46,283},{17,45,228},{15,59,739},{15,50,260},{15,46,27},{15,43,267},{8,56,724},{12,43,206},{18,54,203},
+{18,50,19},{18,46,11},{18,45,26},{31,5,723},{13,53,202},{15,46,11},{13,43,203},{31,28,723},{13,43,203},{17,51,218},{17,51,218},{17,51,218},{17,45,219},{15,52,178},{15,47,18},{15,47,18},{15,41,21},{10,50,164},{13,41,21},{18,48,1},{18,48,1},{18,48,1},{18,43,2},{29,5,162},{15,47,2},{15,47,2},{14,41,2},{31,24,162},{14,41,2},{31,11,200},{17,52,2},{19,46,5},
+{16,46,1},{31,11,200},{29,34,200},{16,46,1},{0,43,202},{29,34,200},{0,43,202},{17,0,218},{17,0,218},{17,0,218},{17,0,218},{15,46,17},{15,46,17},{15,46,17},{15,40,16},{12,44,2},{12,44,2},{18,60,410},{18,54,212},{18,49,282},{18,48,218},{17,58,739},{16,52,259},{16,48,27},{16,45,273},{10,57,724},{13,46,208},{19,57,200},{19,52,20},{19,48,17},{19,47,25},{31,11,723},
+{14,56,203},{16,48,11},{14,45,208},{29,34,723},{14,45,208},{18,54,208},{18,54,208},{18,54,208},{18,47,208},{17,51,178},{16,49,18},{16,49,18},{16,43,20},{11,52,163},{14,44,24},{19,50,1},{19,50,1},{19,50,1},{19,46,0},{31,4,162},{16,49,2},{16,49,2},{15,43,4},{30,29,162},{15,43,4},{30,20,200},{18,54,4},{20,48,5},{17,48,1},{30,20,200},{31,35,200},{17,48,1},
+{0,45,208},{31,35,200},{0,45,208},{18,0,208},{18,0,208},{18,0,208},{18,0,208},{17,45,17},{17,45,17},{17,45,17},{17,41,16},{13,46,0},{13,46,0},{19,62,410},{19,56,212},{19,51,282},{19,49,217},{18,60,739},{17,54,259},{17,50,27},{17,47,273},{11,59,724},{14,47,218},{20,59,201},{20,55,18},{20,50,11},{20,49,26},{31,16,723},{15,58,203},{17,50,11},{15,47,208},{30,36,723},
+{15,47,208},{19,56,208},{19,56,208},{19,56,208},{19,49,208},{18,53,178},{17,51,18},{17,51,18},{17,45,20},{12,54,164},{15,46,24},{20,52,1},{20,52,1},{20,52,1},{20,48,2},{30,13,162},{17,51,2},{17,51,2},{16,45,1},{31,31,162},{16,45,1},{31,22,200},{19,56,4},{21,50,5},{18,50,1},{31,22,200},{28,42,200},{18,50,1},{0,47,208},{28,42,200},{0,47,208},{19,0,208},
+{19,0,208},{19,0,208},{19,0,208},{18,47,17},{18,47,17},{18,47,17},{18,43,16},{14,48,2},{14,48,2},{20,63,426},{20,58,223},{20,52,283},{20,51,228},{19,62,739},{18,56,259},{18,52,27},{18,49,267},{12,61,727},{15,49,207},{21,61,201},{21,57,18},{21,52,11},{21,51,26},{31,21,723},{16,60,203},{18,52,11},{16,49,203},{31,38,723},{16,49,203},{20,57,219},{20,57,219},{20,57,219},
+{20,51,219},{19,55,178},{18,53,18},{18,53,18},{18,47,20},{13,56,164},{16,47,17},{21,54,1},{21,54,1},{21,54,1},{21,49,2},{31,15,162},{18,53,2},{18,53,2},{17,47,1},{31,34,162},{17,47,1},{31,27,200},{19,60,4},{22,52,5},{19,52,1},{31,27,200},{29,44,200},{19,52,1},{0,49,202},{29,44,200},{0,49,202},{20,0,218},{20,0,218},{20,0,218},{20,0,218},{19,49,16},
+{19,49,16},{19,49,16},{19,45,16},{15,50,2},{15,50,2},{21,63,468},{21,60,223},{21,54,283},{21,53,228},{20,63,749},{19,58,259},{19,54,27},{19,51,267},{13,63,727},{16,51,206},{22,63,201},{22,59,18},{22,54,11},{22,53,26},{30,30,723},{17,62,203},{19,54,11},{17,51,203},{31,41,723},{17,51,203},{21,59,219},{21,59,219},{21,59,219},{21,53,219},{19,60,180},{19,55,18},{19,55,18},
+{19,49,21},{14,58,164},{17,49,21},{22,56,1},{22,56,1},{22,56,1},{22,51,2},{31,20,162},{19,55,2},{19,55,2},{18,49,2},{31,37,162},{18,49,2},{31,32,200},{20,62,1},{23,54,5},{20,54,1},{31,32,200},{30,46,200},{20,54,1},{0,51,202},{30,46,200},{0,51,202},{21,0,218},{21,0,218},{21,0,218},{21,0,218},{19,54,17},{19,54,17},{19,54,17},{19,48,16},{16,52,2},
+{16,52,2},{22,63,570},{22,63,209},{22,57,288},{22,56,212},{21,63,804},{20,61,254},{20,56,33},{20,53,273},{15,63,753},{17,54,208},{23,63,232},{23,61,13},{23,56,16},{23,55,25},{31,32,723},{19,62,212},{20,56,17},{18,53,208},{30,46,723},{18,53,208},{22,62,208},{22,62,208},{22,62,208},{22,55,208},{20,63,178},{20,57,17},{20,57,17},{20,51,20},{15,60,163},{18,52,24},{23,59,0},
+{23,59,0},{23,59,0},{23,54,0},{31,26,162},{20,57,1},{20,57,1},{19,51,4},{30,42,162},{19,51,4},{31,38,200},{22,63,1},{24,56,1},{21,56,1},{31,38,200},{31,48,200},{21,56,1},{0,53,208},{31,48,200},{0,53,208},{22,0,208},{22,0,208},{22,0,208},{22,0,208},{20,57,16},{20,57,16},{20,57,16},{21,49,16},{17,54,0},{17,54,0},{23,63,696},{23,63,237},{23,59,288},
+{23,58,212},{23,63,888},{21,63,254},{21,58,33},{21,55,273},{17,63,824},{18,56,208},{24,63,273},{24,62,21},{24,58,17},{24,57,26},{30,41,723},{20,63,233},{21,58,17},{19,55,208},{29,50,723},{19,55,208},{23,63,212},{23,63,212},{23,63,212},{23,57,208},{22,61,180},{21,59,17},{21,59,17},{21,53,20},{16,62,163},{19,54,24},{24,60,2},{24,60,2},{24,60,2},{24,56,2},{31,31,162},
+{21,59,1},{21,59,1},{20,53,1},{31,44,162},{20,53,1},{30,47,200},{24,62,20},{25,58,1},{22,58,1},{30,47,200},{31,51,200},{22,58,1},{0,55,208},{31,51,200},{0,55,208},{23,0,208},{23,0,208},{23,0,208},{23,0,208},{21,59,16},{21,59,16},{21,59,16},{22,51,16},{18,56,0},{18,56,0},{25,63,804},{24,63,334},{24,61,283},{24,60,227},{24,63,957},{22,63,297},{22,60,33},
+{22,57,273},{20,63,913},{19,58,208},{26,63,313},{25,63,51},{25,60,17},{25,59,26},{31,43,723},{22,63,281},{22,60,17},{19,58,208},{30,52,723},{19,58,208},{24,63,234},{24,63,234},{24,63,234},{24,59,218},{23,63,180},{22,61,17},{22,61,17},{22,55,20},{18,62,171},{20,55,17},{25,62,2},{25,62,2},{25,62,2},{25,58,2},{31,36,162},{22,61,1},{22,61,1},{21,55,1},{31,47,162},
+{21,55,1},{30,52,200},{25,63,50},{26,60,1},{23,60,1},{30,52,200},{28,58,200},{23,60,1},{0,57,208},{28,58,200},{0,57,208},{24,0,218},{24,0,218},{24,0,218},{24,0,218},{22,61,16},{22,61,16},{22,61,16},{23,53,16},{19,58,0},{19,58,0},{26,63,930},{25,63,492},{25,63,283},{25,62,227},{25,63,1068},{24,63,389},{23,62,33},{23,59,273},{21,63,999},{20,60,209},{27,63,379},
+{26,63,149},{26,62,17},{26,61,26},{31,48,723},{24,63,364},{24,62,16},{20,60,208},{31,54,723},{20,60,208},{25,63,267},{25,63,267},{25,63,267},{25,61,218},{24,63,205},{23,63,17},{23,63,17},{23,57,20},{20,63,189},{21,57,17},{26,63,5},{26,63,5},{26,63,5},{26,60,2},{30,45,162},{23,63,1},{23,63,1},{22,57,1},{31,50,162},{22,57,1},{31,54,200},{27,63,90},{27,62,1},
+{24,62,0},{31,54,200},{29,60,200},{24,62,0},{0,59,208},{29,60,200},{0,59,208},{25,0,218},{25,0,218},{25,0,218},{25,0,218},{23,63,16},{23,63,16},{23,63,16},{23,56,17},{20,60,1},{20,60,1},{27,63,877},{26,63,585},{26,63,329},{26,63,209},{26,63,990},{25,63,397},{25,63,36},{24,61,165},{23,63,910},{22,61,122},{28,63,306},{28,63,162},{27,63,36},{27,63,4},{30,56,546},
+{26,63,306},{25,63,20},{22,61,113},{30,58,546},{22,61,113},{26,63,329},{26,63,329},{26,63,329},{26,63,209},{25,63,276},{25,63,36},{25,63,36},{24,59,18},{22,63,230},{22,60,22},{27,63,36},{27,63,36},{27,63,36},{27,62,0},{29,54,162},{25,63,20},{25,63,20},{24,59,2},{29,56,162},{24,59,2},{31,58,113},{29,63,61},{28,63,0},{26,63,1},{31,58,113},{31,60,113},{26,63,1},
+{0,61,113},{31,60,113},{0,61,113},{26,0,208},{26,0,208},{26,0,208},{26,0,208},{25,62,16},{25,62,16},{25,62,16},{25,57,17},{21,62,1},{21,62,1},{28,63,731},{27,63,573},{27,63,404},{27,63,244},{27,63,797},{26,63,354},{26,63,98},{25,62,57},{25,63,737},{23,62,38},{29,63,190},{29,63,126},{28,63,65},{28,63,5},{31,56,333},{28,63,185},{27,63,52},{24,62,26},{30,60,333},
+{24,62,26},{27,63,404},{27,63,404},{27,63,404},{27,63,244},{27,63,356},{26,63,98},{26,63,98},{25,61,18},{24,63,315},{23,62,22},{28,63,65},{28,63,65},{28,63,65},{28,63,5},{30,56,162},{27,63,52},{27,63,52},{25,61,2},{30,58,162},{25,61,2},{31,61,25},{30,63,13},{30,63,4},{29,63,1},{31,61,25},{31,62,25},{29,63,1},{0,62,25},{31,62,25},{0,62,25},{27,0,208},
+{27,0,208},{27,0,208},{27,0,208},{26,63,17},{26,63,17},{26,63,17},{26,59,17},{23,63,9},{23,63,9},{29,63,642},{28,63,524},{28,63,443},{28,63,299},{28,63,623},{28,63,335},{27,63,201},{26,63,17},{27,63,610},{24,63,26},{30,63,131},{30,63,115},{29,63,101},{29,63,37},{31,59,193},{29,63,121},{28,63,85},{26,63,1},{30,62,193},{26,63,1},{28,63,443},{28,63,443},{28,63,443},
+{28,63,299},{28,63,398},{27,63,201},{27,63,201},{26,63,17},{26,63,378},{24,63,26},{29,63,101},{29,63,101},{29,63,101},{29,63,37},{31,57,145},{28,63,85},{28,63,85},{26,63,1},{31,60,145},{26,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{28,0,218},{28,0,218},{28,0,218},{28,0,218},{27,63,32},
+{27,63,32},{27,63,32},{27,61,17},{24,63,26},{24,63,26},{29,63,418},{29,63,354},{29,63,318},{29,63,254},{29,63,370},{28,63,223},{28,63,142},{28,63,25},{28,63,358},{26,63,58},{30,63,51},{30,63,35},{30,63,26},{30,63,10},{31,61,54},{30,63,34},{30,63,25},{28,63,0},{31,62,54},{28,63,0},{29,63,318},{29,63,318},{29,63,318},{29,63,254},{29,63,270},{28,63,142},{28,63,142},
+{28,63,25},{27,63,249},{26,63,58},{30,63,26},{30,63,26},{30,63,26},{30,63,10},{31,60,41},{30,63,25},{30,63,25},{28,63,0},{31,61,41},{28,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{29,0,218},{29,0,218},{29,0,218},{29,0,218},{28,63,61},{28,63,61},{28,63,61},{28,63,25},{26,63,58},
+{26,63,58},{0,18,421},{0,15,106},{0,10,8},{0,9,117},{0,12,925},{0,9,650},{0,9,286},{0,6,670},{0,7,1030},{0,5,726},{0,18,421},{0,15,106},{0,10,8},{0,9,117},{3,2,925},{0,9,650},{0,9,286},{0,6,670},{6,0,925},{0,6,670},{0,9,0},{0,9,0},{0,9,0},{0,5,1},{0,4,85},{0,4,45},{0,4,45},{0,2,50},{0,2,98},{0,2,59},{0,9,0},
+{0,9,0},{0,9,0},{0,5,1},{1,1,85},{0,4,45},{0,4,45},{0,2,50},{2,0,85},{0,2,50},{5,1,421},{0,15,106},{0,10,8},{0,9,117},{5,1,421},{9,0,421},{0,9,117},{0,7,421},{9,0,421},{0,7,421},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,24,421},{0,19,53},{0,13,8},
+{0,11,72},{0,16,1261},{0,12,805},{0,11,328},{0,7,822},{0,9,1438},{0,7,922},{0,24,421},{0,19,53},{0,13,8},{0,11,72},{3,6,1261},{0,12,805},{0,11,328},{0,7,822},{8,0,1261},{0,7,822},{0,14,0},{0,14,0},{0,14,0},{0,8,1},{0,7,221},{0,7,116},{0,7,116},{0,4,125},{0,4,257},{0,3,146},{0,14,0},{0,14,0},{0,14,0},{0,8,1},{2,0,221},
+{0,7,116},{0,7,116},{0,4,125},{3,1,221},{0,4,125},{7,0,421},{0,19,53},{1,12,8},{0,11,72},{7,0,421},{11,1,421},{0,11,72},{0,9,421},{11,1,421},{0,9,421},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,29,430},{0,22,34},{0,15,62},{0,14,49},{0,20,1514},{0,15,866},{0,13,301},
+{0,9,894},{0,11,1797},{0,9,1063},{0,29,430},{0,22,34},{1,15,33},{0,14,49},{6,0,1514},{0,15,866},{0,13,301},{0,9,894},{10,0,1514},{0,9,894},{0,19,10},{0,19,10},{0,19,10},{0,12,10},{0,11,338},{0,9,149},{0,9,149},{0,5,162},{0,5,419},{0,5,211},{0,19,10},{0,19,10},{0,19,10},{0,12,10},{3,1,338},{0,9,149},{0,9,149},{0,5,162},{3,3,338},
+{0,5,162},{8,2,421},{0,22,25},{2,14,8},{0,14,40},{8,2,421},{14,0,421},{0,14,40},{0,11,421},{14,0,421},{0,11,421},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,2,0},{0,2,0},{0,2,0},{0,1,1},{0,1,2},{0,1,2},{1,31,494},{1,24,98},{1,17,131},{1,16,110},{0,25,1517},{0,19,734},{0,15,157},{0,11,789},{0,14,1982},{0,11,1045},{1,31,430},
+{1,24,34},{2,16,35},{1,16,46},{7,2,1514},{0,19,734},{0,15,157},{0,11,789},{11,2,1514},{0,11,789},{1,21,74},{1,21,74},{1,21,74},{1,14,74},{0,16,338},{0,13,89},{0,13,89},{0,8,106},{0,8,514},{0,7,217},{1,21,10},{1,21,10},{1,21,10},{1,14,10},{3,6,338},{0,13,89},{0,13,89},{0,8,106},{8,0,338},{0,8,106},{10,0,421},{0,26,5},{3,16,5},
+{0,16,10},{10,0,421},{17,0,421},{0,16,10},{0,13,421},{17,0,421},{0,13,421},{1,0,73},{1,0,73},{1,0,73},{1,0,73},{0,7,1},{0,7,1},{0,7,1},{0,4,1},{0,3,32},{0,3,32},{1,37,629},{1,28,213},{2,19,340},{1,18,216},{0,31,1517},{0,23,629},{0,18,54},{0,14,686},{0,17,2187},{0,13,1070},{3,30,437},{2,27,33},{3,19,29},{2,18,45},{9,1,1514},
+{0,23,629},{0,18,54},{0,14,686},{13,3,1514},{0,14,686},{1,27,209},{1,27,209},{1,27,209},{1,17,208},{0,22,338},{0,17,34},{0,17,34},{0,11,53},{0,11,666},{0,9,273},{3,21,16},{3,21,16},{3,21,16},{3,15,16},{6,2,338},{0,17,34},{0,17,34},{0,11,53},{11,0,338},{0,11,53},{12,0,421},{0,30,1},{4,18,8},{0,18,5},{12,0,421},{20,0,421},{0,18,5},
+{0,15,421},{20,0,421},{0,15,421},{1,0,208},{1,0,208},{1,0,208},{1,0,208},{0,13,1},{0,13,1},{0,13,1},{0,8,0},{0,6,106},{0,6,106},{2,39,821},{2,30,405},{2,22,557},{2,20,408},{0,36,1517},{0,25,562},{0,21,14},{0,16,589},{0,19,2445},{0,15,1130},{3,35,437},{3,29,33},{4,21,33},{3,20,45},{10,3,1514},{0,25,562},{0,21,14},{0,16,589},{18,0,1514},
+{0,16,589},{2,29,401},{2,29,401},{2,29,401},{2,19,400},{0,27,340},{0,21,10},{0,21,10},{0,13,20},{0,14,851},{0,11,357},{3,26,16},{3,26,16},{3,26,16},{3,18,16},{8,0,338},{0,21,10},{0,21,10},{0,13,20},{11,3,338},{0,13,20},{13,1,421},{1,32,2},{5,20,8},{1,20,5},{13,1,421},{21,2,421},{1,20,5},{0,17,421},{21,2,421},{0,17,421},{2,0,400},
+{2,0,400},{2,0,400},{2,0,400},{0,19,0},{0,19,0},{0,19,0},{0,11,1},{0,9,205},{0,9,205},{3,41,854},{3,32,435},{3,24,590},{3,22,441},{1,38,1518},{0,29,543},{1,23,15},{0,18,575},{0,23,2318},{0,17,906},{4,37,430},{4,30,34},{5,23,33},{4,22,49},{12,1,1514},{0,29,494},{1,23,14},{0,18,526},{19,2,1514},{0,18,526},{3,31,434},{3,31,434},{3,31,434},
+{3,21,433},{1,29,341},{1,23,11},{1,23,11},{1,15,21},{0,17,734},{0,15,230},{4,27,10},{4,27,10},{4,27,10},{4,20,10},{9,2,338},{0,25,2},{0,25,2},{0,15,5},{16,0,338},{0,15,5},{15,0,421},{3,32,2},{6,22,8},{2,22,5},{15,0,421},{25,0,421},{2,22,5},{0,19,421},{25,0,421},{0,19,421},{3,0,433},{3,0,433},{3,0,433},{3,0,433},{1,21,1},
+{1,21,1},{1,21,1},{1,13,2},{0,12,157},{0,12,157},{4,42,866},{4,34,450},{4,26,581},{4,24,458},{2,40,1518},{1,31,543},{2,25,15},{1,20,575},{0,27,2166},{0,20,706},{5,39,430},{5,32,34},{6,25,33},{5,24,49},{14,0,1514},{0,32,461},{2,25,14},{0,20,481},{20,4,1514},{0,20,481},{4,33,445},{4,33,445},{4,33,445},{4,23,445},{2,31,341},{2,25,11},{2,25,11},
+{2,17,27},{0,21,626},{0,17,102},{5,29,10},{5,29,10},{5,29,10},{5,22,10},{11,1,338},{1,27,2},{1,27,2},{0,17,2},{17,2,338},{0,17,2},{16,1,421},{3,36,2},{7,24,8},{3,24,5},{16,1,421},{27,1,421},{3,24,5},{0,21,421},{27,1,421},{0,21,421},{4,0,445},{4,0,445},{4,0,445},{4,0,445},{2,23,1},{2,23,1},{2,23,1},{2,15,2},{0,16,97},
+{0,16,97},{5,45,854},{5,36,438},{5,28,579},{5,26,446},{3,43,1514},{3,32,545},{3,27,9},{3,22,582},{0,31,2010},{0,22,546},{7,38,437},{6,35,33},{7,27,26},{6,26,50},{14,6,1514},{0,37,430},{3,27,9},{0,23,446},{26,0,1514},{0,23,446},{5,35,434},{5,35,434},{5,35,434},{5,25,434},{3,34,338},{3,27,8},{3,27,8},{3,19,20},{0,25,525},{0,19,45},{7,29,16},
+{7,29,16},{7,29,16},{7,23,16},{13,0,338},{2,29,0},{2,29,0},{1,20,4},{20,2,338},{1,20,4},{18,1,421},{4,38,1},{8,26,10},{4,27,2},{18,1,421},{29,2,421},{4,27,2},{0,23,421},{29,2,421},{0,23,421},{5,0,433},{5,0,433},{5,0,433},{5,0,433},{3,25,1},{3,25,1},{3,25,1},{3,17,1},{0,19,41},{0,19,41},{6,47,854},{6,38,438},{6,30,579},
+{6,28,446},{4,45,1515},{3,36,553},{4,29,15},{3,24,589},{0,34,1887},{0,25,450},{7,44,437},{7,37,33},{8,29,30},{7,28,50},{16,4,1514},{0,40,422},{4,29,14},{0,25,425},{27,2,1514},{0,25,425},{6,37,434},{6,37,434},{6,37,434},{6,27,434},{4,35,341},{4,29,14},{4,29,14},{4,21,21},{0,29,461},{0,22,41},{7,34,16},{7,34,16},{7,34,16},{7,26,16},{14,2,338},
+{3,31,0},{3,31,0},{2,22,4},{24,0,338},{2,22,4},{19,3,421},{5,40,1},{8,29,5},{5,29,2},{19,3,421},{30,4,421},{5,29,2},{0,25,421},{30,4,421},{0,25,421},{6,0,433},{6,0,433},{6,0,433},{6,0,433},{4,27,1},{4,27,1},{4,27,1},{4,19,2},{0,23,13},{0,23,13},{7,49,854},{7,40,438},{7,32,590},{7,30,446},{5,47,1515},{4,37,543},{5,31,15},
+{4,26,591},{0,36,1785},{0,27,422},{8,45,430},{8,38,34},{9,31,30},{8,30,43},{17,6,1514},{1,42,422},{5,31,14},{0,27,422},{28,4,1514},{0,27,422},{7,39,434},{7,39,434},{7,39,434},{7,29,434},{5,37,341},{5,31,14},{5,31,14},{5,23,21},{0,32,404},{1,24,41},{8,35,10},{8,35,10},{8,35,10},{8,28,9},{16,0,338},{4,33,2},{4,33,2},{3,24,4},{26,1,338},
+{3,24,4},{21,1,421},{6,42,1},{9,31,5},{6,31,2},{21,1,421},{31,6,421},{6,31,2},{0,27,421},{31,6,421},{0,27,421},{7,0,433},{7,0,433},{7,0,433},{7,0,433},{5,29,1},{5,29,1},{5,29,1},{5,21,2},{0,27,1},{0,27,1},{8,50,866},{8,42,450},{8,34,581},{8,32,458},{6,48,1518},{5,39,543},{6,33,15},{5,28,591},{0,40,1685},{1,29,422},{9,47,430},
+{9,40,34},{10,33,33},{9,32,49},{20,1,1514},{2,44,422},{6,33,14},{1,29,422},{29,6,1514},{1,29,422},{8,41,445},{8,41,445},{8,41,445},{8,31,446},{6,39,341},{6,33,11},{6,33,11},{6,25,21},{0,35,371},{2,26,41},{9,37,10},{9,37,10},{9,37,10},{9,30,9},{17,2,338},{5,35,2},{5,35,2},{5,25,5},{27,3,338},{5,25,5},{23,0,421},{7,44,1},{11,32,8},
+{7,32,5},{23,0,421},{31,9,421},{7,32,5},{0,29,421},{31,9,421},{0,29,421},{8,0,445},{8,0,445},{8,0,445},{8,0,445},{6,31,1},{6,31,1},{6,31,1},{6,23,2},{1,29,1},{1,29,1},{9,53,854},{9,45,437},{9,36,579},{9,34,446},{7,51,1514},{7,41,546},{7,35,9},{6,31,589},{0,44,1607},{2,31,430},{11,46,437},{10,43,36},{11,35,26},{10,34,50},{22,0,1514},
+{3,47,421},{7,35,9},{3,31,426},{31,7,1514},{3,31,426},{9,43,434},{9,43,434},{9,43,434},{9,33,434},{7,42,338},{7,35,8},{7,35,8},{7,27,18},{0,39,344},{3,29,42},{11,37,16},{11,37,16},{11,37,16},{10,32,16},{19,1,338},{6,37,0},{6,37,0},{5,28,1},{29,4,338},{5,28,1},{24,2,421},{8,46,1},{12,34,10},{8,35,2},{24,2,421},{30,14,421},{8,35,2},
+{0,31,425},{30,14,421},{0,31,425},{9,0,433},{9,0,433},{9,0,433},{9,0,433},{7,33,1},{7,33,1},{7,33,1},{7,26,1},{2,32,2},{2,32,2},{10,55,854},{10,47,437},{10,38,579},{10,36,446},{8,53,1515},{7,43,561},{8,37,15},{7,32,589},{0,47,1577},{3,33,425},{11,52,437},{11,45,36},{12,37,30},{11,36,50},{23,2,1514},{4,48,422},{8,37,14},{3,33,425},{28,14,1514},
+{3,33,425},{10,45,434},{10,45,434},{10,45,434},{10,35,434},{8,43,341},{8,37,14},{8,37,14},{8,29,19},{0,43,340},{4,30,38},{11,42,16},{11,42,16},{11,42,16},{11,34,16},{20,3,338},{7,39,0},{7,39,0},{6,30,1},{30,6,338},{6,30,1},{26,1,421},{9,48,1},{12,37,5},{9,37,2},{26,1,421},{30,17,421},{9,37,2},{0,33,421},{30,17,421},{0,33,421},{10,0,433},
+{10,0,433},{10,0,433},{10,0,433},{8,35,1},{8,35,1},{8,35,1},{8,28,2},{3,34,2},{3,34,2},{11,57,854},{11,48,438},{11,40,579},{11,38,446},{9,55,1515},{8,46,555},{9,39,15},{8,34,591},{0,51,1530},{4,35,422},{12,53,430},{12,46,34},{13,39,30},{12,38,43},{24,4,1514},{5,50,422},{9,39,14},{4,35,422},{28,17,1514},{4,35,422},{11,47,434},{11,47,434},{11,47,434},
+{11,37,434},{9,45,341},{9,39,14},{9,39,14},{9,31,19},{1,45,340},{5,32,41},{12,44,9},{12,44,9},{12,44,9},{12,36,9},{21,5,338},{8,41,1},{8,41,1},{7,32,4},{31,8,338},{7,32,4},{27,3,421},{10,50,1},{13,39,5},{10,39,2},{27,3,421},{31,19,421},{10,39,2},{0,35,421},{31,19,421},{0,35,421},{11,0,433},{11,0,433},{11,0,433},{11,0,433},{9,37,1},
+{9,37,1},{9,37,1},{9,30,2},{4,35,1},{4,35,1},{12,58,866},{12,50,450},{12,41,590},{12,40,458},{10,57,1515},{9,48,555},{10,41,15},{9,36,591},{0,54,1518},{5,37,422},{13,55,430},{13,48,34},{14,41,30},{13,40,43},{25,6,1514},{6,52,422},{10,41,14},{5,37,422},{29,19,1514},{5,37,422},{12,49,445},{12,49,445},{12,49,445},{12,39,446},{10,47,341},{10,41,14},{10,41,14},
+{10,33,21},{2,47,340},{6,34,41},{13,46,9},{13,46,9},{13,46,9},{13,38,9},{24,0,338},{9,43,1},{9,43,1},{9,33,5},{31,11,338},{9,33,5},{29,1,421},{11,52,1},{14,41,5},{11,41,2},{29,1,421},{31,22,421},{11,41,2},{0,37,421},{31,22,421},{0,37,421},{12,0,445},{12,0,445},{12,0,445},{12,0,445},{10,39,1},{10,39,1},{10,39,1},{10,31,5},{5,37,1},
+{5,37,1},{13,61,854},{13,53,437},{13,44,593},{13,43,442},{11,59,1517},{11,49,546},{11,43,13},{10,39,589},{1,57,1518},{6,39,430},{15,54,437},{14,51,36},{15,43,25},{14,42,48},{27,5,1514},{7,55,421},{11,43,13},{7,39,426},{31,20,1514},{7,39,426},{13,51,434},{13,51,434},{13,51,434},{13,42,434},{11,50,338},{11,43,13},{11,43,13},{11,35,18},{4,47,341},{7,37,42},{15,45,16},
+{15,45,16},{15,45,16},{15,39,17},{24,6,338},{10,45,1},{10,45,1},{9,36,1},{29,17,338},{9,36,1},{31,0,421},{12,54,1},{16,43,4},{12,43,0},{31,0,421},{30,27,421},{12,43,0},{0,39,425},{30,27,421},{0,39,425},{13,0,433},{13,0,433},{13,0,433},{13,0,433},{11,41,1},{11,41,1},{11,41,1},{11,34,1},{6,40,1},{6,40,1},{14,63,854},{14,55,437},{14,46,593},
+{14,45,442},{12,61,1515},{11,51,561},{12,45,19},{11,41,589},{2,59,1518},{7,41,430},{15,60,437},{15,53,36},{16,45,34},{15,44,48},{30,0,1514},{9,55,425},{12,45,18},{8,41,426},{28,27,1514},{8,41,426},{14,53,434},{14,53,434},{14,53,434},{14,44,434},{12,51,341},{12,46,17},{12,46,17},{12,37,19},{4,51,340},{8,38,38},{15,50,16},{15,50,16},{15,50,16},{15,42,17},{27,1,338},
+{11,47,1},{11,47,1},{10,38,1},{30,19,338},{10,38,1},{31,6,421},{13,56,1},{17,45,4},{13,45,0},{31,6,421},{31,29,421},{13,45,0},{0,41,425},{31,29,421},{0,41,425},{14,0,433},{14,0,433},{14,0,433},{14,0,433},{12,43,1},{12,43,1},{12,43,1},{12,36,2},{7,42,1},{7,42,1},{15,63,878},{15,57,437},{15,48,579},{15,47,442},{13,63,1515},{12,54,555},{13,47,19},
+{12,43,574},{3,61,1518},{8,43,429},{16,61,430},{16,54,34},{17,47,34},{16,46,41},{31,2,1514},{10,57,425},{13,47,18},{9,43,426},{29,29,1514},{9,43,426},{15,55,434},{15,55,434},{15,55,434},{15,46,434},{13,53,341},{13,47,19},{13,47,19},{13,39,19},{5,53,340},{9,40,38},{16,52,9},{16,52,9},{16,52,9},{16,44,10},{28,3,338},{12,49,1},{12,49,1},{11,40,1},{31,21,338},
+{11,40,1},{31,11,421},{14,58,1},{18,47,4},{14,47,0},{31,11,421},{31,32,421},{14,47,0},{0,43,425},{31,32,421},{0,43,425},{15,0,433},{15,0,433},{15,0,433},{15,0,433},{13,45,1},{13,45,1},{13,45,1},{13,38,2},{8,43,4},{8,43,4},{16,63,926},{16,58,450},{16,49,590},{16,48,458},{14,63,1542},{13,56,555},{14,49,15},{13,45,574},{4,62,1517},{9,45,429},{17,63,430},
+{17,56,34},{18,49,30},{17,48,43},{30,11,1514},{11,59,425},{14,49,14},{10,45,426},{30,31,1514},{10,45,426},{16,57,445},{16,57,445},{16,57,445},{16,47,446},{14,55,341},{14,49,14},{14,49,14},{14,41,19},{6,55,340},{10,42,38},{17,54,9},{17,54,9},{17,54,9},{17,46,10},{29,5,338},{13,51,1},{13,51,1},{12,42,1},{31,24,338},{12,42,1},{31,16,421},{15,60,1},{18,49,5},
+{15,49,2},{31,16,421},{31,35,421},{15,49,2},{0,45,425},{31,35,421},{0,45,425},{16,0,445},{16,0,445},{16,0,445},{16,0,445},{14,47,1},{14,47,1},{14,47,1},{14,40,2},{9,45,4},{9,45,4},{17,63,1034},{17,61,438},{17,52,593},{17,51,442},{16,63,1598},{15,57,554},{15,51,13},{15,47,577},{6,63,1535},{10,48,434},{19,63,437},{18,59,41},{19,51,25},{18,50,48},{29,20,1514},
+{11,63,422},{15,51,13},{11,47,433},{31,33,1514},{11,47,433},{17,60,433},{17,60,433},{17,60,433},{17,50,434},{15,58,338},{15,51,13},{15,51,13},{15,43,20},{7,57,339},{12,44,41},{19,53,16},{19,53,16},{19,53,16},{19,47,17},{31,4,338},{14,53,1},{14,53,1},{13,44,1},{30,29,338},{13,44,1},{31,22,421},{16,63,1},{20,51,4},{16,51,0},{31,22,421},{30,40,421},{16,51,0},
+{0,47,433},{30,40,421},{0,47,433},{17,0,433},{17,0,433},{17,0,433},{17,0,433},{15,49,1},{15,49,1},{15,49,1},{15,42,0},{10,48,1},{10,48,1},{18,63,1166},{18,63,438},{18,54,593},{18,53,442},{17,63,1643},{15,60,561},{16,53,19},{15,49,589},{8,63,1566},{11,49,430},{20,63,458},{19,61,41},{20,53,34},{19,52,48},{30,22,1514},{13,63,429},{16,53,18},{12,49,426},{28,40,1514},
+{12,49,426},{18,62,433},{18,62,433},{18,62,433},{18,52,434},{16,60,339},{16,54,17},{16,54,17},{16,45,26},{9,57,341},{13,46,41},{19,58,17},{19,58,17},{19,58,17},{19,50,17},{30,13,338},{15,55,1},{15,55,1},{14,46,1},{31,31,338},{14,46,1},{31,27,421},{18,63,5},{21,53,4},{17,53,0},{31,27,421},{31,42,421},{17,53,0},{0,49,425},{31,42,421},{0,49,425},{18,0,433},
+{18,0,433},{18,0,433},{18,0,433},{16,51,1},{16,51,1},{16,51,1},{16,44,1},{11,50,1},{11,50,1},{20,63,1326},{19,63,470},{19,56,593},{19,55,442},{18,63,1742},{16,62,546},{17,55,19},{16,51,574},{10,63,1638},{12,51,429},{21,63,506},{20,63,29},{21,55,34},{20,54,41},{31,24,1514},{15,63,461},{17,55,18},{13,51,426},{29,42,1514},{13,51,426},{19,63,434},{19,63,434},{19,63,434},
+{19,54,434},{17,62,339},{17,56,17},{17,56,17},{17,47,26},{10,59,341},{13,48,38},{20,60,9},{20,60,9},{20,60,9},{20,52,10},{31,15,338},{16,57,2},{16,57,2},{15,48,1},{31,34,338},{15,48,1},{31,32,421},{20,63,20},{22,55,4},{18,55,0},{31,32,421},{31,45,421},{18,55,0},{0,51,425},{31,45,421},{0,51,425},{19,0,433},{19,0,433},{19,0,433},{19,0,433},{17,53,1},
+{17,53,1},{17,53,1},{17,46,1},{12,51,4},{12,51,4},{21,63,1470},{20,63,561},{20,58,582},{20,57,461},{19,63,1895},{18,62,562},{18,57,19},{17,53,574},{12,63,1761},{13,53,429},{22,63,590},{21,63,59},{22,57,34},{21,56,41},{31,29,1514},{17,63,530},{18,57,18},{14,53,426},{30,44,1514},{14,53,426},{20,63,461},{20,63,461},{20,63,461},{20,55,446},{18,63,341},{18,58,17},{18,58,17},
+{18,49,19},{11,61,341},{14,50,38},{21,62,9},{21,62,9},{21,62,9},{21,54,10},{31,20,338},{17,59,2},{17,59,2},{16,50,1},{31,37,338},{16,50,1},{31,38,421},{21,63,50},{23,57,4},{19,57,0},{31,38,421},{31,48,421},{19,57,0},{0,53,425},{31,48,421},{0,53,425},{20,0,445},{20,0,445},{20,0,445},{20,0,445},{18,55,1},{18,55,1},{18,55,1},{18,48,2},{13,53,4},
+{13,53,4},{22,63,1674},{21,63,753},{21,60,586},{21,59,443},{21,63,2046},{19,63,629},{19,60,19},{19,55,577},{15,63,1917},{14,56,426},{24,63,674},{23,63,120},{23,60,29},{22,58,50},{31,35,1514},{19,63,629},{19,60,19},{14,56,425},{27,51,1514},{14,56,425},{21,63,497},{21,63,497},{21,63,497},{21,58,433},{19,63,388},{19,60,10},{19,60,10},{19,51,20},{12,63,347},{16,52,41},{23,61,17},
+{23,61,17},{23,61,17},{23,55,17},{31,26,338},{18,62,2},{18,62,2},{17,52,1},{30,42,338},{17,52,1},{31,44,421},{23,63,104},{24,59,5},{20,59,1},{31,44,421},{31,51,421},{20,59,1},{0,56,425},{31,51,421},{0,56,425},{21,0,433},{21,0,433},{21,0,433},{21,0,433},{19,58,0},{19,58,0},{19,58,0},{19,50,0},{14,56,1},{14,56,1},{23,63,1902},{22,63,995},{22,62,586},
+{22,61,443},{22,63,2235},{20,63,759},{20,61,15},{19,57,578},{17,63,2118},{15,58,426},{25,63,770},{24,63,250},{24,61,35},{23,60,50},{31,40,1514},{21,63,701},{20,61,14},{15,58,425},{31,49,1514},{15,58,425},{22,63,554},{22,63,554},{22,63,554},{22,60,433},{21,63,437},{20,61,14},{20,61,14},{20,53,26},{14,63,379},{17,54,41},{24,63,25},{24,63,25},{24,63,25},{23,59,17},{31,31,338},
+{19,63,4},{19,63,4},{18,54,1},{31,44,338},{18,54,1},{31,49,421},{25,63,169},{25,61,5},{21,61,1},{31,49,421},{30,56,421},{21,61,1},{0,58,425},{30,56,421},{0,58,425},{22,0,433},{22,0,433},{22,0,433},{22,0,433},{20,59,2},{20,59,2},{20,59,2},{20,52,1},{15,58,1},{15,58,1},{24,63,2045},{24,63,1233},{23,63,629},{23,63,442},{24,63,2360},{22,63,914},{21,63,14},
+{20,59,549},{19,63,2241},{16,60,401},{26,63,849},{25,63,395},{25,63,34},{24,62,41},{29,52,1459},{23,63,778},{21,63,13},{17,59,400},{28,56,1459},{17,59,400},{23,63,629},{23,63,629},{23,63,629},{23,62,433},{22,63,491},{21,63,14},{21,63,14},{21,55,26},{16,63,446},{18,56,41},{25,63,34},{25,63,34},{25,63,34},{24,60,10},{31,36,338},{21,63,13},{21,63,13},{19,56,1},{31,47,338},
+{19,56,1},{31,54,392},{27,63,218},{26,63,4},{22,63,0},{31,54,392},{29,60,392},{22,63,0},{0,59,400},{29,60,392},{0,59,400},{23,0,433},{23,0,433},{23,0,433},{23,0,433},{21,61,2},{21,61,2},{21,61,2},{21,54,1},{16,60,1},{16,60,1},{25,63,1767},{25,63,1167},{24,63,701},{24,63,449},{24,63,1976},{23,63,747},{22,63,66},{21,60,306},{20,63,1820},{17,61,217},{27,63,611},
+{26,63,317},{26,63,61},{25,63,10},{30,52,1064},{25,63,587},{23,63,41},{18,61,208},{28,58,1064},{18,61,208},{24,63,701},{24,63,701},{24,63,701},{24,63,449},{23,63,581},{22,63,66},{22,63,66},{22,57,26},{18,63,530},{19,58,41},{26,63,61},{26,63,61},{26,63,61},{25,62,10},{30,45,338},{23,63,41},{23,63,41},{20,58,1},{31,50,338},{20,58,1},{30,60,200},{28,63,106},{27,63,1},
+{25,63,1},{30,60,200},{31,59,200},{25,63,1},{0,60,208},{31,59,200},{0,60,208},{24,0,445},{24,0,445},{24,0,445},{24,0,445},{22,63,2},{22,63,2},{22,63,2},{22,56,1},{17,62,1},{17,62,1},{26,63,1542},{26,63,1122},{25,63,833},{25,63,497},{26,63,1647},{24,63,687},{24,63,203},{23,61,122},{22,63,1515},{19,62,78},{28,63,410},{28,63,266},{27,63,116},{27,63,20},{30,56,722},
+{26,63,402},{25,63,100},{20,62,65},{30,58,722},{20,62,65},{25,63,833},{25,63,833},{25,63,833},{25,63,497},{24,63,707},{24,63,203},{24,63,203},{23,60,17},{20,63,619},{19,61,46},{27,63,116},{27,63,116},{27,63,116},{27,63,20},{29,54,338},{25,63,100},{25,63,100},{22,60,2},{29,56,338},{22,60,2},{31,59,61},{29,63,37},{29,63,1},{27,63,4},{31,59,61},{31,61,61},{27,63,4},
+{0,62,65},{31,61,61},{0,62,65},{25,0,433},{25,0,433},{25,0,433},{25,0,433},{23,63,25},{23,63,25},{23,63,25},{23,58,1},{19,62,13},{19,62,13},{27,63,1406},{27,63,1134},{26,63,962},{26,63,602},{27,63,1454},{25,63,702},{25,63,341},{24,62,43},{24,63,1378},{20,63,35},{29,63,318},{28,63,250},{28,63,169},{28,63,61},{31,56,509},{28,63,313},{27,63,164},{22,63,10},{30,60,509},
+{22,63,10},{26,63,962},{26,63,962},{26,63,962},{26,63,602},{26,63,827},{25,63,341},{25,63,341},{24,62,27},{22,63,747},{20,63,35},{28,63,169},{28,63,169},{28,63,169},{28,63,61},{30,56,338},{27,63,164},{27,63,164},{23,62,2},{30,58,338},{23,62,2},{31,62,5},{31,63,9},{30,63,4},{30,63,0},{31,62,5},{31,62,9},{30,63,0},{0,63,9},{31,62,9},{0,63,9},{26,0,433},
+{26,0,433},{26,0,433},{26,0,433},{24,63,50},{24,63,50},{24,63,50},{24,60,1},{20,63,26},{20,63,26},{28,63,1135},{28,63,991},{27,63,874},{27,63,602},{28,63,1162},{26,63,618},{26,63,362},{25,63,5},{25,63,1087},{22,63,58},{30,63,219},{29,63,161},{29,63,125},{29,63,61},{30,62,294},{29,63,193},{28,63,117},{24,63,1},{31,60,297},{24,63,1},{27,63,874},{27,63,874},{27,63,874},
+{27,63,602},{27,63,730},{26,63,362},{26,63,362},{25,63,5},{24,63,681},{22,63,58},{29,63,125},{29,63,125},{29,63,125},{29,63,61},{31,56,221},{28,63,117},{28,63,117},{24,63,1},{31,59,221},{24,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{27,0,433},{27,0,433},{27,0,433},{27,0,433},{25,63,101},
+{25,63,101},{25,63,101},{25,62,1},{22,63,58},{22,63,58},{29,63,885},{28,63,751},{28,63,670},{28,63,526},{28,63,778},{27,63,483},{27,63,314},{26,63,10},{26,63,777},{24,63,117},{30,63,75},{30,63,59},{30,63,50},{30,63,34},{31,60,114},{29,63,81},{29,63,45},{27,63,0},{31,61,114},{27,63,0},{28,63,670},{28,63,670},{28,63,670},{28,63,526},{28,63,553},{27,63,314},{27,63,314},
+{26,63,10},{25,63,518},{24,63,117},{30,63,50},{30,63,50},{30,63,50},{30,63,34},{31,59,85},{29,63,45},{29,63,45},{27,63,0},{30,62,85},{27,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{28,0,445},{28,0,445},{28,0,445},{28,0,445},{27,63,145},{27,63,145},{27,63,145},{26,63,10},{24,63,117},
+{24,63,117},{0,26,882},{0,21,218},{0,15,16},{0,13,260},{0,17,1899},{0,13,1341},{0,11,593},{0,8,1380},{0,9,2113},{0,7,1513},{0,26,882},{0,21,218},{0,15,16},{0,13,260},{4,4,1896},{0,13,1341},{0,11,593},{0,8,1380},{7,2,1896},{0,8,1380},{0,12,0},{0,12,0},{0,12,0},{0,7,1},{0,6,162},{0,5,85},{0,5,85},{0,3,85},{0,3,186},{0,3,101},{0,12,0},
+{0,12,0},{0,12,0},{0,7,1},{1,3,162},{0,5,85},{0,5,85},{0,3,85},{3,0,162},{0,3,85},{6,6,882},{0,21,218},{0,15,16},{0,13,260},{6,6,882},{12,1,882},{0,13,260},{0,10,884},{12,1,882},{0,10,884},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,31,884},{0,25,146},{0,17,2},
+{0,15,185},{0,21,2355},{0,17,1539},{0,13,653},{0,10,1605},{0,11,2667},{0,9,1777},{0,31,884},{0,25,146},{0,17,2},{0,15,185},{5,4,2355},{0,17,1539},{0,13,653},{0,10,1605},{7,4,2355},{0,10,1605},{0,17,1},{0,17,1},{0,17,1},{0,10,1},{0,9,338},{0,8,180},{0,8,180},{0,5,180},{0,5,389},{0,5,229},{0,17,1},{0,17,1},{0,17,1},{0,10,1},{2,2,338},
+{0,8,180},{0,8,180},{0,5,180},{2,3,338},{0,5,180},{9,1,882},{0,25,146},{0,17,2},{0,15,185},{9,1,882},{13,3,882},{0,15,185},{0,12,884},{13,3,882},{0,12,884},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,36,884},{0,29,90},{0,19,37},{0,17,130},{0,25,2899},{0,19,1764},{0,16,733},
+{0,11,1853},{0,13,3325},{0,11,2109},{0,36,884},{0,29,90},{0,19,37},{0,17,130},{7,1,2899},{0,19,1764},{0,16,733},{0,11,1853},{12,0,2899},{0,11,1853},{0,23,0},{0,23,0},{0,23,0},{0,14,1},{0,11,580},{0,11,305},{0,11,305},{0,6,325},{0,6,667},{0,5,389},{0,23,0},{0,23,0},{0,23,0},{0,14,1},{2,5,578},{0,11,305},{0,11,305},{0,6,325},{5,1,578},
+{0,6,325},{10,3,882},{0,29,90},{1,19,2},{0,17,130},{10,3,882},{18,0,882},{0,17,130},{0,14,884},{18,0,882},{0,14,884},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,42,918},{0,32,81},{1,21,105},{0,20,109},{0,29,3051},{0,21,1707},{0,19,569},{0,13,1800},{0,16,3672},{0,13,2161},{1,38,888},
+{0,32,81},{1,21,41},{0,20,109},{8,2,3048},{0,21,1707},{0,19,569},{0,13,1800},{12,3,3048},{0,13,1800},{0,28,36},{0,28,36},{0,28,36},{0,17,37},{0,16,648},{0,13,269},{0,13,269},{0,9,292},{0,8,824},{0,7,417},{1,25,4},{1,25,4},{1,25,4},{1,16,5},{3,6,648},{0,13,269},{0,13,269},{0,9,292},{8,0,648},{0,9,292},{12,1,882},{0,32,45},{2,21,2},
+{0,20,73},{12,1,882},{19,2,882},{0,20,73},{0,16,890},{19,2,882},{0,16,890},{0,0,36},{0,0,36},{0,0,36},{0,0,36},{0,4,0},{0,4,0},{0,4,0},{0,2,1},{0,2,10},{0,2,10},{1,44,997},{1,34,154},{1,24,229},{1,22,178},{0,35,3048},{0,25,1528},{0,21,324},{0,16,1605},{0,19,3907},{0,15,2138},{2,41,883},{2,32,86},{2,24,42},{1,22,114},{9,5,3048},
+{0,25,1528},{0,21,324},{0,16,1605},{15,3,3048},{0,16,1605},{1,31,113},{1,31,113},{1,31,113},{1,19,113},{0,22,648},{0,17,164},{0,17,164},{0,11,193},{0,11,976},{0,9,443},{2,27,2},{2,27,2},{2,27,2},{2,18,1},{6,2,648},{0,17,164},{0,17,164},{0,11,193},{11,0,648},{0,11,193},{13,4,882},{0,36,13},{3,23,4},{0,22,32},{13,4,882},{22,2,882},{0,22,32},
+{0,18,884},{22,2,882},{0,18,884},{1,0,113},{1,0,113},{1,0,113},{1,0,113},{0,10,0},{0,10,0},{0,10,0},{0,6,0},{0,5,58},{0,5,58},{1,49,1173},{1,38,302},{2,26,421},{1,24,321},{0,40,3051},{0,29,1380},{0,23,186},{0,18,1464},{0,21,4201},{0,17,2149},{3,43,883},{3,34,86},{3,26,42},{2,24,114},{12,0,3048},{0,29,1380},{0,23,186},{0,18,1464},{20,0,3048},
+{0,18,1464},{1,36,289},{1,36,289},{1,36,289},{1,22,290},{0,27,650},{0,21,100},{0,21,100},{0,13,130},{0,14,1161},{0,13,491},{3,29,2},{3,29,2},{3,29,2},{3,20,1},{8,0,648},{0,21,100},{0,21,100},{0,13,130},{11,3,648},{0,13,130},{14,6,882},{0,40,1},{4,25,1},{0,25,16},{14,6,882},{26,0,882},{0,25,16},{0,20,884},{26,0,882},{0,20,884},{1,0,289},
+{1,0,289},{1,0,289},{1,0,289},{0,15,1},{0,15,1},{0,15,1},{0,9,1},{0,7,136},{0,7,136},{2,51,1365},{2,40,494},{2,28,722},{2,26,513},{0,46,3048},{0,32,1269},{0,27,82},{0,20,1341},{0,25,4525},{0,19,2197},{4,45,886},{3,38,90},{4,28,36},{3,26,114},{13,2,3048},{0,32,1269},{0,27,82},{0,20,1341},{21,2,3048},{0,20,1341},{2,38,481},{2,38,481},{2,38,481},
+{2,24,482},{0,32,650},{0,25,52},{0,25,52},{0,15,85},{0,17,1345},{0,15,569},{4,31,4},{4,31,4},{4,31,4},{4,22,5},{9,2,648},{0,25,52},{0,25,52},{0,15,85},{16,0,648},{0,15,85},{16,4,882},{1,42,1},{5,27,1},{0,27,1},{16,4,882},{27,2,882},{0,27,1},{0,22,884},{27,2,882},{0,22,884},{2,0,481},{2,0,481},{2,0,481},{2,0,481},{0,21,0},
+{0,21,0},{0,21,0},{0,12,1},{0,9,250},{0,9,250},{2,57,1667},{2,44,786},{3,30,1042},{2,29,801},{0,51,3048},{0,36,1157},{0,29,20},{0,22,1236},{0,27,4891},{0,21,2281},{5,47,886},{4,41,86},{5,30,36},{4,28,116},{14,4,3048},{0,36,1157},{0,29,20},{0,22,1236},{25,0,3048},{0,22,1236},{2,43,785},{2,43,785},{2,43,785},{2,27,786},{0,38,648},{0,29,20},{0,29,20},
+{0,18,40},{0,19,1594},{0,16,677},{5,33,4},{5,33,4},{5,33,4},{5,24,5},{11,1,648},{0,29,20},{0,29,20},{0,18,40},{17,2,648},{0,18,40},{17,6,882},{2,44,1},{6,29,1},{1,29,1},{17,6,882},{28,4,882},{1,29,1},{0,24,884},{28,4,882},{0,24,884},{2,0,785},{2,0,785},{2,0,785},{2,0,785},{0,26,0},{0,26,0},{0,26,0},{0,16,1},{0,11,400},
+{0,11,400},{3,59,1784},{3,46,901},{4,32,1195},{3,31,910},{1,53,3055},{0,40,1094},{1,31,23},{0,25,1175},{0,31,4840},{0,23,2054},{6,49,883},{6,41,81},{6,32,42},{5,30,123},{14,10,3048},{0,40,1058},{1,31,19},{0,25,1139},{28,0,3048},{0,25,1139},{3,46,900},{3,46,900},{3,46,900},{3,30,900},{1,40,654},{1,31,22},{1,31,22},{1,20,38},{0,23,1560},{0,19,533},{6,35,2},
+{6,35,2},{6,35,2},{6,26,1},{13,0,648},{0,33,5},{0,33,5},{0,20,13},{20,2,648},{0,20,13},{19,5,882},{3,46,1},{7,31,5},{2,31,1},{19,5,882},{31,4,882},{2,31,1},{0,26,882},{31,4,882},{0,26,882},{3,0,900},{3,0,900},{3,0,900},{3,0,900},{1,29,4},{1,29,4},{1,29,4},{1,18,4},{0,15,377},{0,15,377},{4,61,1772},{4,48,891},{5,34,1195},
+{4,33,906},{2,55,3055},{1,42,1094},{2,33,29},{1,27,1175},{0,34,4609},{0,26,1716},{7,51,883},{7,43,81},{7,34,42},{6,32,114},{18,1,3048},{0,44,990},{2,33,25},{0,27,1058},{29,2,3048},{0,27,1058},{4,47,891},{4,47,891},{4,47,891},{4,32,891},{2,42,654},{2,33,29},{2,33,29},{2,22,38},{0,27,1396},{0,21,347},{7,37,2},{7,37,2},{7,37,2},{7,28,1},{14,2,648},
+{0,37,1},{0,37,1},{0,23,5},{24,0,648},{0,23,5},{22,0,882},{4,48,1},{8,33,1},{3,33,0},{22,0,882},{31,7,882},{3,33,0},{0,28,882},{31,7,882},{0,28,882},{4,0,890},{4,0,890},{4,0,890},{4,0,890},{2,31,4},{2,31,4},{2,31,4},{2,20,4},{0,19,260},{0,19,260},{5,63,1772},{5,50,891},{6,36,1195},{5,35,906},{3,57,3055},{2,44,1094},{3,35,29},
+{2,29,1175},{0,38,4381},{0,29,1436},{8,53,886},{7,47,89},{8,36,36},{7,34,114},{19,3,3048},{0,47,949},{3,35,25},{0,29,995},{30,4,3048},{0,29,995},{5,49,890},{5,49,890},{5,49,890},{5,33,891},{3,44,654},{3,35,29},{3,35,29},{3,24,38},{0,29,1251},{0,24,221},{8,39,4},{8,39,4},{8,39,4},{8,30,4},{16,0,648},{1,39,1},{1,39,1},{0,25,2},{26,1,648},
+{0,25,2},{23,2,882},{5,50,1},{9,35,1},{4,35,1},{23,2,882},{28,14,882},{4,35,1},{0,30,882},{28,14,882},{0,30,882},{5,0,890},{5,0,890},{5,0,890},{5,0,890},{3,32,5},{3,32,5},{3,32,5},{3,22,4},{0,23,180},{0,23,180},{6,63,1790},{6,52,891},{7,38,1195},{6,37,906},{4,59,3057},{3,46,1094},{4,37,29},{3,31,1175},{0,42,4185},{0,31,1206},{9,55,886},
+{8,49,86},{9,38,36},{8,36,116},{20,5,3048},{0,51,907},{4,37,20},{0,31,950},{31,6,3048},{0,31,950},{6,51,890},{6,51,890},{6,51,890},{6,35,891},{4,46,657},{4,37,29},{4,37,29},{3,26,49},{0,34,1121},{0,27,117},{9,41,4},{9,41,4},{9,41,4},{9,32,5},{17,2,648},{2,41,1},{2,41,1},{1,27,2},{27,3,648},{1,27,2},{24,4,882},{6,52,1},{10,37,1},
+{5,37,1},{24,4,882},{28,17,882},{5,37,1},{0,32,884},{28,17,882},{0,32,884},{6,0,890},{6,0,890},{6,0,890},{6,0,890},{4,34,9},{4,34,9},{4,34,9},{4,24,10},{0,27,116},{0,27,116},{8,63,1844},{7,54,901},{8,40,1188},{7,39,910},{5,62,3052},{4,48,1094},{5,39,23},{4,33,1175},{0,44,3969},{0,33,1039},{10,57,885},{10,49,81},{10,40,35},{9,38,123},{23,1,3048},
+{0,55,888},{5,39,19},{0,34,907},{29,12,3048},{0,34,907},{7,54,900},{7,54,900},{7,54,900},{7,38,900},{5,48,654},{5,39,22},{5,39,22},{5,28,45},{0,38,990},{0,29,80},{10,43,2},{10,43,2},{10,43,2},{10,34,1},{19,1,648},{3,43,1},{3,43,1},{3,29,1},{29,4,648},{3,29,1},{27,0,882},{7,54,1},{11,39,5},{6,39,1},{27,0,882},{31,17,882},{6,39,1},
+{0,34,882},{31,17,882},{0,34,882},{7,0,900},{7,0,900},{7,0,900},{7,0,900},{5,37,4},{5,37,4},{5,37,4},{5,26,4},{0,31,58},{0,31,58},{9,63,1886},{8,56,892},{9,42,1188},{8,41,900},{6,63,3055},{5,50,1094},{6,41,23},{5,35,1175},{0,49,3804},{0,36,935},{11,59,885},{11,51,81},{11,42,35},{10,40,123},{23,6,3048},{0,58,883},{6,41,19},{0,36,886},{30,14,3048},
+{0,36,886},{8,55,891},{8,55,891},{8,55,891},{8,40,891},{6,50,654},{6,41,22},{6,41,22},{6,30,45},{0,40,889},{1,31,80},{11,45,2},{11,45,2},{11,45,2},{11,36,1},{20,3,648},{5,43,2},{5,43,2},{4,31,1},{30,6,648},{4,31,1},{27,5,882},{8,56,2},{12,41,2},{7,41,1},{27,5,882},{31,20,882},{7,41,1},{0,36,882},{31,20,882},{0,36,882},{8,0,890},
+{8,0,890},{8,0,890},{8,0,890},{6,39,4},{6,39,4},{6,39,4},{6,28,4},{0,34,25},{0,34,25},{10,63,1964},{9,58,892},{10,44,1188},{9,43,900},{7,63,3100},{6,52,1094},{7,43,23},{6,37,1175},{0,51,3640},{0,38,887},{12,61,886},{11,55,89},{12,44,38},{11,42,123},{26,1,3048},{1,60,883},{7,43,19},{0,38,883},{30,17,3048},{0,38,883},{9,57,891},{9,57,891},{9,57,891},
+{9,42,891},{7,52,654},{7,43,22},{7,43,22},{7,32,38},{0,44,801},{2,34,86},{12,47,4},{12,47,4},{12,47,4},{12,38,4},{21,5,648},{6,45,2},{6,45,2},{4,33,2},{31,8,648},{4,33,2},{30,0,882},{9,58,2},{13,43,2},{8,43,1},{30,0,882},{28,27,882},{8,43,1},{0,38,882},{28,27,882},{0,38,882},{9,0,890},{9,0,890},{9,0,890},{9,0,890},{7,41,4},
+{7,41,4},{7,41,4},{7,30,4},{0,38,5},{0,38,5},{11,63,2078},{10,60,892},{11,46,1188},{10,45,900},{9,63,3181},{7,54,1094},{8,45,35},{7,39,1175},{0,55,3496},{1,40,887},{13,63,886},{12,56,88},{13,46,38},{12,44,110},{27,3,3048},{2,62,883},{7,46,25},{1,40,883},{31,19,3048},{1,40,883},{10,59,891},{10,59,891},{10,59,891},{10,44,891},{8,54,657},{8,45,34},{8,45,34},
+{7,34,49},{0,48,756},{3,36,86},{13,49,4},{13,49,4},{13,49,4},{13,40,4},{24,0,648},{6,49,1},{6,49,1},{5,35,2},{31,11,648},{5,35,2},{31,2,882},{10,60,2},{14,45,2},{9,45,1},{31,2,882},{29,29,882},{9,45,1},{0,40,882},{29,29,882},{0,40,882},{10,0,890},{10,0,890},{10,0,890},{10,0,890},{8,42,9},{8,42,9},{8,42,9},{8,32,10},{0,42,1},
+{0,42,1},{12,63,2228},{11,63,902},{12,48,1188},{11,47,908},{10,63,3256},{8,57,1095},{9,47,33},{8,41,1164},{0,59,3364},{2,42,889},{14,63,915},{14,57,90},{14,48,35},{13,47,117},{29,2,3048},{4,63,886},{8,48,25},{3,42,885},{29,25,3048},{3,42,885},{11,62,900},{11,62,900},{11,62,900},{11,46,900},{9,57,652},{9,47,29},{9,47,29},{9,36,45},{0,51,691},{4,37,80},{14,51,2},
+{14,51,2},{14,51,2},{14,42,2},{24,6,648},{7,51,1},{7,51,1},{7,37,1},{29,17,648},{7,37,1},{31,8,882},{11,63,2},{15,47,8},{10,47,5},{31,8,882},{31,30,882},{10,47,5},{0,42,884},{31,30,882},{0,42,884},{11,0,900},{11,0,900},{11,0,900},{11,0,900},{9,45,4},{9,45,4},{9,45,4},{9,34,4},{1,44,1},{1,44,1},{13,63,2414},{12,63,907},{13,50,1188},
+{12,49,900},{11,63,3391},{9,59,1095},{10,49,23},{9,43,1164},{0,63,3276},{3,44,889},{15,63,981},{15,59,90},{15,50,35},{14,48,123},{31,1,3048},{6,63,906},{10,49,19},{4,44,885},{30,27,3048},{4,44,885},{12,63,891},{12,63,891},{12,63,891},{12,48,891},{10,59,652},{10,49,22},{10,49,22},{10,38,45},{0,55,659},{5,39,80},{15,53,2},{15,53,2},{15,53,2},{15,44,2},{27,1,648},
+{9,51,2},{9,51,2},{8,39,1},{30,19,648},{8,39,1},{29,20,882},{12,63,17},{16,49,2},{11,49,1},{29,20,882},{31,33,882},{11,49,1},{0,44,884},{31,33,882},{0,44,884},{12,0,890},{12,0,890},{12,0,890},{12,0,890},{10,47,4},{10,47,4},{10,47,4},{10,36,4},{2,46,1},{2,46,1},{15,63,2606},{13,63,987},{14,52,1188},{13,51,900},{13,63,3517},{10,61,1095},{11,51,23},
+{10,45,1164},{1,63,3300},{4,46,892},{17,63,1014},{15,63,94},{16,52,38},{15,50,123},{31,6,3048},{8,63,936},{11,51,19},{5,46,885},{31,29,3048},{5,46,885},{13,63,906},{13,63,906},{13,63,906},{13,50,891},{11,61,652},{11,51,22},{11,51,22},{11,40,45},{0,59,651},{6,41,80},{16,55,4},{16,55,4},{16,55,4},{16,46,5},{28,3,648},{10,53,2},{10,53,2},{9,41,1},{31,21,648},
+{9,41,1},{30,22,882},{14,63,37},{17,51,2},{12,51,1},{30,22,882},{28,40,882},{12,51,1},{0,46,884},{28,40,882},{0,46,884},{13,0,890},{13,0,890},{13,0,890},{13,0,890},{11,49,4},{11,49,4},{11,49,4},{11,38,4},{3,48,0},{3,48,0},{16,63,2792},{15,63,1079},{15,54,1188},{14,53,900},{14,63,3652},{11,63,1095},{12,53,35},{11,47,1164},{3,63,3436},{5,48,887},{18,63,1080},
+{17,62,102},{17,54,38},{16,52,110},{30,15,3048},{10,63,996},{11,54,25},{5,48,883},{31,32,3048},{5,48,883},{14,63,939},{14,63,939},{14,63,939},{14,52,891},{12,62,657},{12,53,34},{12,53,34},{12,42,50},{1,61,651},{7,43,80},{17,57,4},{17,57,4},{17,57,4},{17,48,4},{29,5,648},{11,55,2},{11,55,2},{10,43,1},{31,24,648},{10,43,1},{31,24,882},{16,63,80},{18,53,2},
+{13,53,1},{31,24,882},{29,42,882},{13,53,1},{0,48,882},{29,42,882},{0,48,882},{14,0,890},{14,0,890},{14,0,890},{14,0,890},{12,50,9},{12,50,9},{12,50,9},{12,40,9},{4,50,1},{4,50,1},{17,63,3038},{16,63,1268},{16,57,1186},{15,55,908},{15,63,3879},{12,63,1146},{13,55,33},{12,49,1164},{5,63,3667},{6,50,889},{19,63,1205},{18,63,147},{18,56,41},{17,55,117},{31,17,3048},
+{12,63,1110},{12,56,20},{7,50,885},{29,38,3048},{7,50,885},{16,63,979},{16,63,979},{16,63,979},{15,54,900},{13,63,670},{13,55,29},{13,55,29},{13,45,41},{2,63,648},{8,46,81},{18,60,1},{18,60,1},{18,60,1},{18,50,2},{31,4,648},{11,59,4},{11,59,4},{11,45,1},{30,29,648},{11,45,1},{31,30,882},{18,63,146},{19,56,5},{14,55,5},{31,30,882},{31,43,882},{14,55,5},
+{0,50,884},{31,43,882},{0,50,884},{15,0,900},{15,0,900},{15,0,900},{15,0,900},{13,53,4},{13,53,4},{13,53,4},{13,42,5},{5,52,1},{5,52,1},{18,63,3308},{17,63,1502},{17,59,1186},{16,57,898},{17,63,4077},{14,63,1230},{14,57,33},{13,51,1164},{8,63,3820},{7,52,889},{21,63,1368},{19,63,261},{19,58,41},{18,57,117},{30,26,3048},{14,63,1226},{13,58,20},{8,52,885},{30,40,3048},
+{8,52,885},{17,63,1018},{17,63,1018},{17,63,1018},{16,56,890},{14,63,724},{14,57,29},{14,57,29},{14,47,41},{4,63,665},{9,48,88},{19,62,1},{19,62,1},{19,62,1},{19,52,2},{30,13,648},{12,61,1},{12,61,1},{12,47,1},{31,31,648},{12,47,1},{31,35,882},{20,63,193},{20,58,4},{15,57,5},{31,35,882},{27,51,882},{15,57,5},{0,52,884},{27,51,882},{0,52,884},{16,0,890},
+{16,0,890},{16,0,890},{16,0,890},{14,55,4},{14,55,4},{14,55,4},{14,44,5},{6,54,1},{6,54,1},{19,63,3614},{18,63,1804},{18,61,1186},{17,59,898},{18,63,4284},{15,63,1417},{15,59,33},{14,53,1164},{9,63,4036},{8,54,892},{22,63,1494},{20,63,405},{20,60,33},{19,59,117},{31,28,3048},{16,63,1395},{14,60,20},{9,54,885},{31,42,3048},{9,54,885},{18,63,1075},{18,63,1075},{18,63,1075},
+{17,58,890},{16,63,787},{15,59,29},{15,59,29},{15,48,45},{6,63,705},{10,49,80},{20,63,5},{20,63,5},{20,63,5},{20,54,5},{31,15,648},{13,63,1},{13,63,1},{13,49,1},{31,34,648},{13,49,1},{31,40,882},{22,63,277},{21,60,4},{16,59,5},{31,40,882},{31,49,882},{16,59,5},{0,54,884},{31,49,882},{0,54,884},{17,0,890},{17,0,890},{17,0,890},{17,0,890},{15,57,4},
+{15,57,4},{15,57,4},{15,46,5},{7,56,1},{7,56,1},{20,63,4014},{19,63,2174},{19,63,1186},{18,61,898},{19,63,4545},{17,63,1725},{16,62,39},{15,55,1164},{11,63,4300},{9,56,892},{23,63,1656},{22,63,585},{21,62,33},{20,60,108},{31,33,3048},{18,63,1563},{15,62,20},{10,56,885},{31,45,3048},{10,56,885},{19,63,1150},{19,63,1150},{19,63,1150},{18,60,890},{17,63,841},{16,62,35},{16,62,35},
+{16,50,50},{8,63,747},{11,51,80},{21,63,20},{21,63,20},{21,63,20},{21,56,5},{31,20,648},{15,63,5},{15,63,5},{14,51,1},{31,37,648},{14,51,1},{29,52,882},{23,63,397},{22,62,4},{17,61,5},{29,52,882},{28,56,882},{17,61,5},{0,56,884},{28,56,882},{0,56,884},{18,0,890},{18,0,890},{18,0,890},{18,0,890},{16,58,10},{16,58,10},{16,58,10},{16,48,9},{8,58,0},
+{8,58,0},{22,63,4123},{21,63,2404},{20,63,1278},{19,63,901},{20,63,4626},{18,63,1849},{17,63,38},{16,57,1006},{14,63,4330},{10,58,771},{24,63,1629},{23,63,715},{22,63,65},{22,62,81},{31,38,2814},{20,63,1505},{17,63,34},{11,58,761},{31,48,2814},{11,58,761},{20,63,1278},{20,63,1278},{20,63,1278},{19,62,901},{18,63,948},{17,63,38},{17,63,38},{17,53,41},{10,63,840},{12,54,81},{22,63,65},
+{22,63,65},{22,63,65},{22,59,2},{31,26,648},{17,63,34},{17,63,34},{15,53,1},{30,42,648},{15,53,1},{31,50,761},{25,63,425},{23,63,9},{19,63,1},{31,50,761},{31,55,761},{19,63,1},{0,58,761},{31,55,761},{0,58,761},{19,0,900},{19,0,900},{19,0,900},{19,0,900},{17,61,4},{17,61,4},{17,61,4},{17,50,5},{9,60,2},{9,60,2},{23,63,3735},{22,63,2314},{21,63,1395},
+{20,63,899},{22,63,4090},{19,63,1618},{18,63,104},{17,58,678},{15,63,3826},{11,59,507},{25,63,1285},{24,63,609},{23,63,122},{23,62,26},{30,45,2249},{21,63,1186},{19,63,74},{13,59,482},{31,50,2249},{13,59,482},{21,63,1395},{21,63,1395},{21,63,1395},{20,63,899},{19,63,1086},{18,63,104},{18,63,104},{18,55,41},{12,63,969},{13,56,81},{23,63,122},{23,63,122},{23,63,122},{23,61,2},{31,31,648},
+{19,63,74},{19,63,74},{16,55,1},{31,44,648},{16,55,1},{31,53,481},{27,63,269},{25,63,0},{21,63,0},{31,53,481},{31,57,481},{21,63,0},{0,59,481},{31,57,481},{0,59,481},{20,0,890},{20,0,890},{20,0,890},{20,0,890},{18,63,4},{18,63,4},{18,63,4},{18,52,5},{10,62,2},{10,62,2},{23,63,3399},{23,63,2260},{22,63,1530},{21,63,954},{23,63,3639},{20,63,1402},{19,63,238},
+{18,59,405},{17,63,3443},{13,60,297},{26,63,1009},{25,63,525},{25,63,164},{24,63,5},{29,52,1769},{23,63,918},{21,63,113},{14,60,266},{28,56,1769},{14,60,266},{22,63,1530},{22,63,1530},{22,63,1530},{21,63,954},{21,63,1251},{19,63,238},{19,63,238},{19,57,41},{14,63,1105},{14,58,81},{25,63,164},{25,63,164},{25,63,164},{24,62,5},{31,36,648},{21,63,113},{21,63,113},{17,57,1},{31,47,648},
+{17,57,1},{31,55,265},{28,63,145},{27,63,4},{24,63,1},{31,55,265},{30,60,265},{24,63,1},{0,60,265},{30,60,265},{0,60,265},{21,0,890},{21,0,890},{21,0,890},{21,0,890},{19,63,13},{19,63,13},{19,63,13},{19,54,5},{12,62,8},{12,62,8},{24,63,3069},{24,63,2257},{23,63,1683},{23,63,1054},{24,63,3258},{22,63,1330},{21,63,378},{19,61,213},{20,63,3102},{14,61,166},{27,63,801},
+{26,63,477},{26,63,221},{25,63,20},{30,52,1374},{24,63,758},{23,63,181},{16,61,114},{28,58,1374},{16,61,114},{23,63,1683},{23,63,1683},{23,63,1683},{23,63,1054},{22,63,1401},{21,63,378},{21,63,378},{19,59,46},{16,63,1296},{15,60,81},{26,63,221},{26,63,221},{26,63,221},{25,63,20},{30,45,648},{23,63,181},{23,63,181},{18,59,1},{31,50,648},{18,59,1},{31,58,113},{29,63,61},{28,63,0},
+{26,63,1},{31,58,113},{31,60,113},{26,63,1},{0,61,113},{31,60,113},{0,61,113},{22,0,890},{22,0,890},{22,0,890},{22,0,890},{20,63,45},{20,63,45},{20,63,45},{20,56,10},{13,63,25},{13,63,25},{25,63,2860},{25,63,2260},{24,63,1854},{24,63,1210},{25,63,2932},{23,63,1310},{22,63,609},{20,62,81},{21,63,2731},{16,62,94},{28,63,630},{27,63,475},{27,63,306},{26,63,101},{30,56,1032},
+{26,63,612},{25,63,290},{18,62,21},{30,58,1032},{18,62,21},{24,63,1854},{24,63,1854},{24,63,1854},{24,63,1210},{23,63,1620},{22,63,609},{22,63,609},{21,61,44},{18,63,1515},{16,62,78},{27,63,306},{27,63,306},{27,63,306},{26,63,101},{29,54,648},{25,63,290},{25,63,290},{19,61,5},{29,56,648},{19,61,5},{31,61,18},{30,63,10},{30,63,1},{29,63,0},{31,61,18},{31,62,18},{29,63,0},
+{0,62,20},{31,62,18},{0,62,20},{23,0,900},{23,0,900},{23,0,900},{23,0,900},{21,63,104},{21,63,104},{21,63,104},{21,59,5},{15,63,65},{15,63,65},{26,63,2626},{26,63,2206},{25,63,1915},{25,63,1315},{26,63,2641},{24,63,1333},{23,63,789},{22,63,40},{22,63,2445},{17,63,116},{29,63,524},{28,63,406},{28,63,325},{27,63,170},{31,56,771},{27,63,507},{26,63,320},{20,63,0},{30,60,771},
+{20,63,0},{25,63,1915},{25,63,1915},{25,63,1915},{25,63,1315},{24,63,1661},{23,63,789},{23,63,789},{22,62,29},{20,63,1517},{17,63,116},{28,63,325},{28,63,325},{28,63,325},{27,63,170},{31,52,578},{26,63,320},{26,63,320},{20,63,0},{28,60,578},{20,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{24,0,890},
+{24,0,890},{24,0,890},{24,0,890},{23,63,164},{23,63,164},{23,63,164},{22,61,5},{17,63,116},{17,63,116},{27,63,2156},{27,63,1884},{26,63,1630},{26,63,1210},{26,63,2081},{25,63,1108},{24,63,705},{23,63,5},{23,63,1927},{19,63,180},{29,63,300},{29,63,236},{29,63,200},{28,63,85},{31,57,451},{28,63,283},{27,63,194},{23,63,1},{29,62,451},{23,63,1},{26,63,1630},{26,63,1630},{26,63,1630},
+{26,63,1210},{25,63,1347},{24,63,705},{24,63,705},{23,63,5},{22,63,1229},{19,63,180},{29,63,200},{29,63,200},{29,63,200},{28,63,85},{30,58,338},{27,63,194},{27,63,194},{23,63,1},{31,58,338},{23,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{25,0,890},{25,0,890},{25,0,890},{25,0,890},{24,63,221},
+{24,63,221},{24,63,221},{23,63,5},{19,63,180},{19,63,180},{28,63,1782},{27,63,1564},{27,63,1395},{27,63,1123},{27,63,1620},{26,63,937},{25,63,651},{24,63,25},{24,63,1560},{21,63,233},{30,63,150},{30,63,134},{29,63,104},{29,63,40},{31,59,216},{29,63,136},{28,63,90},{25,63,1},{30,62,216},{25,63,1},{27,63,1395},{27,63,1395},{27,63,1395},{27,63,1123},{26,63,1101},{25,63,651},{25,63,651},
+{24,63,25},{23,63,998},{21,63,233},{29,63,104},{29,63,104},{29,63,104},{29,63,40},{31,57,162},{28,63,90},{28,63,90},{25,63,1},{29,62,162},{25,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{26,0,890},{26,0,890},{26,0,890},{26,0,890},{25,63,290},{25,63,290},{25,63,290},{24,63,25},{21,63,233},
+{21,63,233},{0,34,1570},{0,27,400},{0,19,25},{0,16,481},{0,23,3372},{0,17,2355},{0,15,1053},{0,11,2425},{0,13,3753},{0,11,2681},{0,34,1570},{0,27,400},{0,19,25},{0,16,481},{7,0,3371},{0,17,2355},{0,15,1053},{0,11,2425},{10,2,3371},{0,11,2425},{0,16,0},{0,16,0},{0,16,0},{0,10,1},{0,8,288},{0,7,149},{0,7,149},{0,5,160},{0,4,332},{0,4,200},{0,16,0},
+{0,16,0},{0,16,0},{0,10,1},{1,5,288},{0,7,149},{0,7,149},{0,5,160},{4,0,288},{0,5,160},{10,1,1568},{0,27,400},{0,19,25},{0,16,481},{10,1,1568},{17,0,1568},{0,16,481},{0,13,1568},{17,0,1568},{0,13,1568},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,40,1568},{0,31,296},{0,22,2},
+{0,20,373},{0,27,3968},{0,21,2627},{0,17,1107},{0,13,2720},{0,15,4479},{0,11,3065},{0,40,1568},{0,31,296},{0,22,2},{0,20,373},{7,3,3968},{0,21,2627},{0,17,1107},{0,13,2720},{13,0,3968},{0,13,2720},{0,21,1},{0,21,1},{0,21,1},{0,13,0},{0,11,512},{0,9,269},{0,9,269},{0,5,288},{0,5,593},{0,5,337},{0,21,1},{0,21,1},{0,21,1},{0,13,0},{3,1,512},
+{0,9,269},{0,9,269},{0,5,288},{3,3,512},{0,5,288},{11,3,1568},{0,31,296},{0,22,2},{0,20,373},{11,3,1568},{18,2,1568},{0,20,373},{0,15,1568},{18,2,1568},{0,15,1568},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,45,1568},{0,34,221},{0,24,17},{0,22,274},{0,30,4651},{0,23,2924},{0,19,1209},
+{0,15,3065},{0,17,5292},{0,13,3465},{0,45,1568},{0,34,221},{0,24,17},{0,22,274},{8,3,4651},{0,23,2924},{0,19,1209},{0,15,3065},{14,1,4651},{0,15,3065},{0,27,0},{0,27,0},{0,27,0},{0,16,0},{0,13,802},{0,12,424},{0,12,424},{0,7,433},{0,7,918},{0,7,533},{0,27,0},{0,27,0},{0,27,0},{0,16,0},{4,0,800},{0,12,424},{0,12,424},{0,7,433},{6,1,800},
+{0,7,433},{13,1,1568},{0,34,221},{1,24,2},{0,22,274},{13,1,1568},{19,4,1568},{0,22,274},{0,17,1570},{19,4,1568},{0,17,1570},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,50,1568},{0,38,145},{0,26,82},{0,24,193},{0,34,5419},{0,25,3275},{0,21,1347},{0,16,3410},{0,17,6220},{0,15,3933},{0,50,1568},
+{0,38,145},{1,26,81},{0,24,193},{10,0,5419},{0,25,3275},{0,21,1347},{0,16,3410},{15,2,5419},{0,16,3410},{0,32,0},{0,32,0},{0,32,0},{0,19,0},{0,16,1152},{0,15,605},{0,15,605},{0,9,628},{0,8,1328},{0,7,789},{0,32,0},{0,32,0},{0,32,0},{0,19,0},{3,6,1152},{0,15,605},{0,15,605},{0,9,628},{8,0,1152},{0,9,628},{15,0,1568},{0,38,145},{2,26,2},
+{0,24,193},{15,0,1568},{24,1,1568},{0,24,193},{0,19,1570},{24,1,1568},{0,19,1570},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{1,53,1633},{0,42,166},{1,29,162},{0,26,209},{0,40,5419},{0,29,3012},{0,25,964},{0,18,3152},{0,21,6513},{0,17,3861},{1,53,1569},{1,42,138},{2,28,74},{1,26,186},{11,3,5419},
+{0,29,3012},{0,25,964},{0,18,3152},{18,2,5419},{0,18,3152},{1,34,66},{1,34,66},{1,34,66},{1,21,66},{0,22,1152},{0,19,442},{0,19,442},{0,11,493},{0,11,1480},{0,11,749},{1,34,2},{1,34,2},{1,34,2},{1,21,2},{6,2,1152},{0,19,442},{0,19,442},{0,11,493},{11,0,1152},{0,11,493},{16,2,1568},{0,42,85},{3,28,0},{0,26,128},{16,2,1568},{27,1,1568},{0,26,128},
+{0,21,1568},{27,1,1568},{0,21,1568},{1,0,65},{1,0,65},{1,0,65},{1,0,65},{0,6,0},{0,6,0},{0,6,0},{0,4,1},{0,3,20},{0,3,20},{1,58,1713},{1,44,230},{2,31,354},{1,28,273},{0,45,5419},{0,32,2817},{0,27,682},{0,20,2945},{0,25,6853},{0,19,3825},{2,55,1569},{2,44,138},{3,30,74},{2,28,186},{13,1,5419},{0,32,2817},{0,27,682},{0,20,2945},{19,4,5419},
+{0,20,2945},{1,40,145},{1,40,145},{1,40,145},{1,25,146},{0,27,1154},{0,23,338},{0,23,338},{0,13,394},{0,14,1665},{0,13,755},{2,36,2},{2,36,2},{2,36,2},{2,23,2},{8,0,1152},{0,23,338},{0,23,338},{0,13,394},{11,3,1152},{0,13,394},{17,4,1568},{0,46,41},{4,30,1},{0,29,80},{17,4,1568},{27,4,1568},{0,29,80},{0,23,1568},{27,4,1568},{0,23,1568},{1,0,145},
+{1,0,145},{1,0,145},{1,0,145},{0,11,1},{0,11,1},{0,11,1},{0,7,0},{0,5,74},{0,5,74},{2,60,1905},{1,49,393},{2,33,531},{1,31,433},{0,50,5419},{0,36,2609},{0,29,468},{0,22,2756},{0,27,7195},{0,21,3825},{3,57,1569},{3,46,138},{4,32,81},{3,30,186},{15,0,5419},{0,36,2609},{0,29,468},{0,22,2756},{24,1,5419},{0,22,2756},{2,42,337},{2,42,337},{2,42,337},
+{2,27,338},{0,32,1154},{0,25,244},{0,25,244},{0,16,289},{0,17,1849},{0,15,797},{3,38,2},{3,38,2},{3,38,2},{3,25,2},{9,2,1152},{0,25,244},{0,25,244},{0,16,289},{16,0,1152},{0,16,289},{18,6,1568},{0,51,16},{5,32,2},{0,31,41},{18,6,1568},{28,6,1568},{0,31,41},{0,25,1568},{28,6,1568},{0,25,1568},{2,0,337},{2,0,337},{2,0,337},{2,0,337},{0,17,0},
+{0,17,0},{0,17,0},{0,10,0},{0,7,164},{0,7,164},{2,63,2145},{2,51,585},{3,35,851},{2,33,618},{0,55,5420},{0,40,2425},{0,32,274},{0,24,2585},{0,29,7609},{0,23,3861},{4,58,1570},{4,47,136},{5,34,81},{4,32,193},{16,1,5419},{0,40,2425},{0,32,274},{0,24,2585},{25,3,5419},{0,24,2585},{2,47,545},{2,47,545},{2,47,545},{2,30,545},{0,38,1152},{0,29,164},{0,29,164},
+{0,18,208},{0,19,2098},{0,17,869},{4,40,0},{4,40,0},{4,40,0},{4,27,1},{11,1,1152},{0,29,164},{0,29,164},{0,18,208},{17,2,1152},{0,18,208},{21,1,1568},{0,54,2},{6,34,2},{0,33,13},{21,1,1568},{29,8,1568},{0,33,13},{0,27,1568},{29,8,1568},{0,27,1568},{2,0,545},{2,0,545},{2,0,545},{2,0,545},{0,22,0},{0,22,0},{0,22,0},{0,13,1},{0,10,289},
+{0,10,289},{3,63,2596},{3,53,934},{3,37,1277},{2,35,964},{0,61,5420},{0,44,2242},{0,35,141},{0,28,2402},{0,32,8131},{0,25,3956},{5,61,1569},{5,50,138},{6,36,74},{5,34,186},{17,4,5419},{0,44,2242},{0,35,141},{0,28,2402},{27,4,5419},{0,28,2402},{3,49,901},{3,49,901},{3,49,901},{3,32,900},{0,44,1152},{0,34,97},{0,34,97},{0,20,145},{0,23,2436},{0,19,989},{5,43,1},
+{5,43,1},{5,43,1},{5,30,2},{13,0,1152},{0,34,97},{0,34,97},{0,20,145},{20,2,1152},{0,20,145},{23,0,1568},{1,57,2},{7,36,0},{0,36,1},{23,0,1568},{31,9,1568},{0,36,1},{0,29,1570},{31,9,1568},{0,29,1570},{3,0,900},{3,0,900},{3,0,900},{3,0,900},{0,28,0},{0,28,0},{0,28,0},{0,17,1},{0,13,461},{0,13,461},{4,63,3146},{3,57,1262},{4,40,1731},
+{3,37,1284},{1,63,5484},{0,46,2129},{0,38,53},{0,30,2243},{0,36,8615},{0,27,4070},{6,63,1569},{6,52,138},{7,38,74},{6,36,186},{18,6,5419},{0,46,2129},{0,38,53},{0,30,2243},{28,6,5419},{0,30,2243},{3,55,1252},{3,55,1252},{3,55,1252},{3,35,1253},{0,49,1152},{0,38,53},{0,38,53},{0,23,89},{0,25,2763},{0,21,1139},{6,45,1},{6,45,1},{6,45,1},{6,32,2},{14,2,1152},
+{0,38,53},{0,38,53},{0,23,89},{24,0,1152},{0,23,89},{24,2,1568},{2,59,2},{8,38,1},{1,38,1},{24,2,1568},{27,17,1568},{1,38,1},{0,31,1570},{27,17,1568},{0,31,1570},{3,0,1252},{3,0,1252},{3,0,1252},{3,0,1252},{0,33,0},{0,33,0},{0,33,0},{0,20,0},{0,15,653},{0,15,653},{5,63,3716},{4,58,1599},{4,42,2134},{4,39,1627},{2,63,5655},{0,51,1983},{0,40,33},
+{0,32,2133},{0,38,8925},{0,30,4044},{7,63,1587},{7,54,138},{8,40,80},{7,38,186},{21,1,5419},{0,51,1979},{0,40,29},{0,32,2129},{29,8,5419},{0,32,2129},{4,56,1587},{4,56,1587},{4,56,1587},{4,37,1586},{0,54,1156},{0,41,29},{0,41,29},{0,25,54},{0,27,2988},{0,23,1193},{7,47,1},{7,47,1},{7,47,1},{7,33,2},{16,0,1152},{0,41,25},{0,41,25},{0,25,50},{26,1,1152},
+{0,25,50},{25,4,1568},{3,61,2},{9,40,1},{2,40,1},{25,4,1568},{28,19,1568},{2,40,1},{0,33,1568},{28,19,1568},{0,33,1568},{4,0,1586},{4,0,1586},{4,0,1586},{4,0,1586},{0,38,5},{0,38,5},{0,38,5},{0,23,4},{0,17,754},{0,17,754},{6,63,3890},{5,60,1599},{5,44,2134},{5,41,1627},{3,63,5748},{0,55,1975},{1,42,33},{0,33,2085},{0,42,8569},{0,32,3525},{9,63,1634},
+{8,55,136},{9,42,80},{8,40,208},{22,3,5419},{0,55,1875},{1,42,29},{0,33,1985},{30,10,5419},{0,33,1985},{5,58,1587},{5,58,1587},{5,58,1587},{5,39,1586},{1,56,1156},{1,43,29},{1,43,29},{1,27,54},{0,31,2736},{0,25,907},{8,48,0},{8,48,0},{8,48,0},{8,35,1},{17,2,1152},{0,45,5},{0,45,5},{0,28,17},{27,3,1152},{0,28,17},{26,6,1568},{4,62,1},{10,42,1},
+{3,42,1},{26,6,1568},{29,21,1568},{3,42,1},{0,35,1568},{29,21,1568},{0,35,1568},{5,0,1586},{5,0,1586},{5,0,1586},{5,0,1586},{1,40,5},{1,40,5},{1,40,5},{1,25,4},{0,21,610},{0,21,610},{7,63,4136},{6,63,1589},{7,46,2141},{6,44,1613},{4,63,5895},{2,55,1973},{2,45,43},{2,35,2100},{0,46,8199},{0,34,3051},{10,63,1667},{9,57,131},{10,45,74},{9,43,195},{24,2,5419},
+{0,59,1772},{2,45,34},{0,36,1874},{27,17,5419},{0,36,1874},{6,61,1576},{6,61,1576},{6,61,1576},{6,41,1577},{2,59,1161},{2,46,26},{2,46,26},{2,30,49},{0,36,2505},{0,29,616},{9,51,1},{9,51,1},{9,51,1},{9,38,2},{19,1,1152},{0,49,2},{0,49,2},{0,30,4},{29,4,1152},{0,30,4},{28,5,1568},{6,63,13},{11,44,1},{4,44,1},{28,5,1568},{31,22,1568},{4,44,1},
+{0,37,1570},{31,22,1568},{0,37,1570},{6,0,1576},{6,0,1576},{6,0,1576},{6,0,1576},{2,43,9},{2,43,9},{2,43,9},{2,27,10},{0,25,468},{0,25,468},{8,63,4436},{7,63,1625},{7,49,2161},{7,46,1613},{6,63,6079},{3,57,1973},{3,47,43},{3,37,2100},{0,49,7908},{0,37,2671},{11,63,1745},{10,59,131},{11,47,74},{10,45,195},{25,4,5419},{0,61,1699},{3,47,34},{0,38,1787},{28,19,5419},
+{0,38,1787},{7,63,1576},{7,63,1576},{7,63,1576},{7,43,1577},{3,61,1161},{3,47,34},{3,47,34},{3,32,59},{0,38,2294},{0,31,422},{10,53,1},{10,53,1},{10,53,1},{10,40,2},{20,3,1152},{2,49,2},{2,49,2},{1,32,2},{30,6,1152},{1,32,2},{31,0,1568},{8,63,34},{12,46,1},{5,46,1},{31,0,1568},{28,29,1568},{5,46,1},{0,39,1570},{28,29,1568},{0,39,1570},{7,0,1576},
+{7,0,1576},{7,0,1576},{7,0,1576},{3,45,9},{3,45,9},{3,45,9},{3,29,10},{0,29,356},{0,29,356},{9,63,4730},{8,63,1716},{8,50,2134},{8,47,1627},{7,63,6244},{3,61,1977},{4,48,33},{3,40,2100},{0,53,7620},{0,40,2343},{12,63,1832},{11,61,131},{12,48,80},{11,47,195},{26,6,5419},{1,63,1699},{4,48,29},{0,41,1714},{29,21,5419},{0,41,1714},{8,63,1595},{8,63,1595},{8,63,1595},
+{8,45,1587},{4,62,1158},{4,49,29},{4,49,29},{4,33,54},{0,42,2098},{0,34,282},{11,55,1},{11,55,1},{11,55,1},{11,42,2},{21,5,1152},{3,51,2},{3,51,2},{2,34,2},{31,8,1152},{2,34,2},{30,9,1568},{9,63,68},{13,48,1},{6,48,1},{30,9,1568},{29,31,1568},{6,48,1},{0,41,1570},{29,31,1568},{0,41,1570},{8,0,1586},{8,0,1586},{8,0,1586},{8,0,1586},{4,47,4},
+{4,47,4},{4,47,4},{4,31,5},{0,32,269},{0,32,269},{11,63,5010},{9,63,1878},{9,52,2134},{9,49,1627},{8,63,6508},{4,62,1965},{5,50,33},{4,42,2079},{0,55,7360},{0,42,2067},{14,63,1952},{12,63,149},{13,50,80},{12,48,208},{29,1,5419},{3,63,1787},{5,50,29},{0,43,1651},{30,23,5419},{0,43,1651},{9,63,1622},{9,63,1622},{9,63,1622},{9,47,1587},{5,63,1164},{5,51,29},{5,51,29},
+{5,35,54},{0,46,1926},{0,36,186},{12,56,0},{12,56,0},{12,56,0},{12,44,1},{24,0,1152},{3,55,2},{3,55,2},{3,36,2},{31,11,1152},{3,36,2},{31,11,1568},{11,63,116},{14,50,1},{7,50,1},{31,11,1568},{29,34,1568},{7,50,1},{0,43,1570},{29,34,1568},{0,43,1570},{9,0,1586},{9,0,1586},{9,0,1586},{9,0,1586},{5,48,5},{5,48,5},{5,48,5},{5,33,4},{0,36,185},
+{0,36,185},{12,63,5316},{11,63,2154},{11,54,2141},{10,52,1613},{10,63,6800},{6,62,1995},{6,53,43},{5,44,2085},{0,59,7068},{0,44,1845},{15,63,2081},{13,63,206},{14,53,74},{13,51,195},{31,0,5419},{5,63,1937},{6,53,34},{0,45,1601},{28,29,5419},{0,45,1601},{10,63,1676},{10,63,1676},{10,63,1676},{10,49,1577},{7,63,1179},{6,54,26},{6,54,26},{6,38,49},{0,49,1764},{0,40,149},{13,59,1},
+{13,59,1},{13,59,1},{13,46,1},{24,6,1152},{4,57,1},{4,57,1},{4,38,4},{29,17,1152},{4,38,4},{30,20,1568},{13,63,205},{15,52,1},{8,52,1},{30,20,1568},{31,35,1568},{8,52,1},{0,45,1576},{31,35,1568},{0,45,1576},{10,0,1576},{10,0,1576},{10,0,1576},{10,0,1576},{6,51,9},{6,51,9},{6,51,9},{6,35,10},{0,40,113},{0,40,113},{13,63,5658},{12,63,2435},{12,56,2148},
+{11,54,1613},{11,63,7055},{7,63,2090},{7,55,43},{6,46,2085},{0,63,6820},{0,47,1701},{16,63,2216},{14,63,334},{15,55,74},{14,53,195},{30,9,5419},{7,63,2081},{7,55,34},{0,47,1580},{29,31,5419},{0,47,1580},{11,63,1745},{11,63,1745},{11,63,1745},{11,51,1577},{8,63,1220},{7,56,26},{7,56,26},{7,40,49},{0,53,1624},{0,42,145},{14,61,1},{14,61,1},{14,61,1},{14,48,2},{27,1,1152},
+{5,59,1},{5,59,1},{5,40,4},{30,19,1152},{5,40,4},{31,22,1568},{15,63,289},{16,54,1},{9,54,1},{31,22,1568},{28,42,1568},{9,54,1},{0,47,1576},{28,42,1568},{0,47,1576},{11,0,1576},{11,0,1576},{11,0,1576},{11,0,1576},{7,53,9},{7,53,9},{7,53,9},{7,37,10},{0,44,61},{0,44,61},{14,63,6036},{13,63,2751},{12,58,2119},{12,55,1627},{12,63,7316},{8,63,2228},{8,57,31},
+{7,48,2100},{0,63,6884},{0,49,1606},{17,63,2402},{16,63,500},{16,57,82},{15,55,195},{31,11,5419},{9,63,2195},{8,57,27},{0,49,1570},{29,34,5419},{0,49,1570},{12,63,1811},{12,63,1811},{12,63,1811},{12,53,1587},{9,63,1286},{8,57,22},{8,57,22},{8,41,56},{0,57,1508},{1,44,145},{15,63,1},{15,63,1},{15,63,1},{15,50,2},{28,3,1152},{6,61,1},{6,61,1},{6,42,4},{31,21,1152},
+{6,42,4},{31,27,1568},{17,63,410},{17,56,1},{10,56,1},{31,27,1568},{29,44,1568},{10,56,1},{0,49,1570},{29,44,1568},{0,49,1570},{12,0,1586},{12,0,1586},{12,0,1586},{12,0,1586},{8,55,4},{8,55,4},{8,55,4},{8,39,5},{0,48,34},{0,48,34},{15,63,6450},{14,63,3135},{13,60,2119},{13,57,1627},{13,63,7661},{10,63,2448},{9,59,31},{8,50,2079},{2,63,7196},{0,51,1580},{19,63,2594},
+{17,63,698},{16,59,73},{16,57,194},{31,16,5419},{11,63,2379},{9,59,27},{1,51,1570},{30,36,5419},{1,51,1570},{13,63,1910},{13,63,1910},{13,63,1910},{13,55,1587},{10,63,1388},{9,59,22},{9,59,22},{9,43,56},{0,61,1416},{2,46,145},{16,63,4},{16,63,4},{16,63,4},{16,52,1},{29,5,1152},{7,63,1},{7,63,1},{7,44,4},{31,24,1152},{7,44,4},{31,32,1568},{19,63,530},{18,58,1},
+{11,58,1},{31,32,1568},{30,46,1568},{11,58,1},{0,51,1570},{30,46,1568},{0,51,1570},{13,0,1586},{13,0,1586},{13,0,1586},{13,0,1586},{9,57,4},{9,57,4},{9,57,4},{9,41,5},{0,51,10},{0,51,10},{16,63,6900},{15,63,3657},{15,62,2128},{14,60,1616},{15,63,8023},{11,63,2845},{10,61,33},{9,52,2085},{5,63,7651},{1,54,1584},{20,63,2866},{18,63,1011},{18,61,69},{17,59,181},{31,22,5419},
+{13,63,2657},{10,61,24},{2,53,1577},{28,42,5419},{2,53,1577},{15,63,2057},{15,63,2057},{15,63,2057},{14,58,1577},{12,63,1476},{10,61,29},{10,61,29},{10,46,54},{0,63,1324},{4,47,137},{17,63,37},{17,63,37},{17,63,37},{17,54,1},{31,4,1152},{9,63,4},{9,63,4},{7,47,2},{30,29,1152},{7,47,2},{31,38,1568},{21,63,637},{19,60,5},{12,60,4},{31,38,1568},{31,48,1568},{12,60,4},
+{0,53,1576},{31,48,1568},{0,53,1576},{14,0,1576},{14,0,1576},{14,0,1576},{14,0,1576},{10,59,10},{10,59,10},{10,59,10},{10,44,10},{0,56,0},{0,56,0},{18,63,7332},{16,63,4196},{16,63,2175},{15,62,1616},{16,63,8348},{13,63,3285},{11,63,33},{10,54,2085},{8,63,8004},{2,56,1584},{21,63,3112},{20,63,1281},{19,63,69},{18,61,181},{31,27,5419},{15,63,2897},{11,63,24},{3,55,1577},{29,44,5419},
+{3,55,1577},{16,63,2171},{16,63,2171},{16,63,2171},{15,60,1577},{13,63,1590},{11,63,29},{11,63,29},{11,48,49},{2,63,1424},{4,50,145},{19,63,65},{19,63,65},{19,63,65},{18,56,1},{30,13,1152},{11,63,20},{11,63,20},{9,48,4},{31,31,1152},{9,48,4},{30,47,1568},{23,63,785},{20,62,4},{13,62,4},{30,47,1568},{31,51,1568},{13,62,4},{0,55,1576},{31,51,1568},{0,55,1576},{15,0,1576},
+{15,0,1576},{15,0,1576},{15,0,1576},{11,61,10},{11,61,10},{11,61,10},{11,46,10},{1,58,0},{1,58,0},{19,63,7014},{17,63,4230},{17,63,2294},{16,63,1595},{17,63,7865},{14,63,3114},{12,63,85},{12,55,1713},{8,63,7436},{3,57,1268},{22,63,2794},{21,63,1221},{20,63,113},{19,61,114},{31,31,4803},{17,63,2648},{13,63,61},{4,57,1253},{31,44,4803},{4,57,1253},{17,63,2294},{17,63,2294},{17,63,2294},
+{16,62,1587},{14,63,1740},{12,63,85},{12,63,85},{12,49,56},{3,63,1571},{5,52,145},{20,63,113},{20,63,113},{20,63,113},{19,58,1},{31,15,1152},{13,63,61},{13,63,61},{10,50,4},{31,34,1152},{10,50,4},{31,47,1250},{24,63,680},{21,63,4},{15,63,0},{31,47,1250},{31,53,1250},{15,63,0},{0,57,1252},{31,53,1250},{0,57,1252},{16,0,1586},{16,0,1586},{16,0,1586},{16,0,1586},{12,63,4},
+{12,63,4},{12,63,4},{12,47,8},{2,60,0},{2,60,0},{19,63,6534},{18,63,4116},{18,63,2435},{17,63,1590},{18,63,7164},{15,63,2809},{14,63,161},{12,57,1256},{10,63,6748},{5,58,909},{23,63,2340},{22,63,1065},{21,63,164},{20,62,41},{31,34,4056},{18,63,2211},{15,63,113},{6,58,884},{31,46,4056},{6,58,884},{18,63,2435},{18,63,2435},{18,63,2435},{17,63,1590},{16,63,1923},{14,63,161},{14,63,161},
+{13,51,56},{6,63,1729},{6,54,145},{21,63,164},{21,63,164},{21,63,164},{20,60,0},{31,20,1152},{15,63,113},{15,63,113},{11,52,4},{31,37,1152},{11,52,4},{31,49,882},{25,63,482},{23,63,0},{18,63,1},{31,49,882},{30,56,882},{18,63,1},{0,58,884},{30,56,882},{0,58,884},{17,0,1586},{17,0,1586},{17,0,1586},{17,0,1586},{13,63,13},{13,63,13},{13,63,13},{13,49,5},{3,62,0},
+{3,62,0},{21,63,6091},{20,63,4022},{19,63,2609},{18,63,1640},{19,63,6490},{16,63,2617},{15,63,318},{14,58,834},{11,63,6135},{6,59,598},{24,63,1881},{23,63,931},{22,63,245},{21,63,5},{31,38,3318},{20,63,1733},{17,63,202},{8,59,545},{31,48,3318},{8,59,545},{19,63,2609},{19,63,2609},{19,63,2609},{18,63,1640},{17,63,2086},{15,63,318},{15,63,318},{14,54,54},{8,63,1868},{8,55,137},{22,63,245},
+{22,63,245},{22,63,245},{21,62,2},{31,26,1152},{17,63,202},{17,63,202},{11,55,2},{30,42,1152},{11,55,2},{31,52,545},{26,63,305},{25,63,4},{20,63,1},{31,52,545},{30,58,545},{20,63,1},{0,59,545},{30,58,545},{0,59,545},{18,0,1576},{18,0,1576},{18,0,1576},{18,0,1576},{15,63,29},{15,63,29},{15,63,29},{14,52,10},{5,62,5},{5,62,5},{22,63,5719},{21,63,3980},{20,63,2834},
+{19,63,1745},{20,63,6050},{18,63,2457},{16,63,536},{15,59,515},{14,63,5674},{8,60,385},{25,63,1573},{24,63,861},{23,63,338},{22,63,10},{30,45,2753},{21,63,1438},{19,63,290},{10,60,313},{31,50,2753},{10,60,313},{20,63,2834},{20,63,2834},{20,63,2834},{19,63,1745},{18,63,2284},{16,63,536},{16,63,536},{15,56,54},{10,63,2064},{9,57,137},{23,63,338},{23,63,338},{23,63,338},{22,63,10},{31,31,1152},
+{19,63,290},{19,63,290},{12,57,2},{31,44,1152},{12,57,2},{31,55,313},{28,63,181},{26,63,1},{23,63,0},{31,55,313},{31,58,313},{23,63,0},{0,60,313},{31,58,313},{0,60,313},{19,0,1576},{19,0,1576},{19,0,1576},{19,0,1576},{16,63,52},{16,63,52},{16,63,52},{15,54,10},{7,62,25},{7,62,25},{22,63,5399},{22,63,3974},{21,63,3035},{20,63,1875},{21,63,5619},{19,63,2378},{18,63,776},
+{16,60,294},{15,63,5258},{9,61,225},{26,63,1333},{25,63,813},{24,63,425},{23,63,65},{29,52,2273},{23,63,1218},{20,63,353},{12,61,146},{28,56,2273},{12,61,146},{21,63,3035},{21,63,3035},{21,63,3035},{20,63,1875},{19,63,2518},{18,63,776},{18,63,776},{16,58,49},{11,63,2323},{10,59,137},{24,63,425},{24,63,425},{24,63,425},{23,63,65},{31,36,1152},{20,63,353},{20,63,353},{13,59,2},{31,47,1152},
+{13,59,2},{31,57,145},{28,63,85},{28,63,4},{26,63,1},{31,57,145},{31,60,145},{26,63,1},{0,61,145},{31,60,145},{0,61,145},{20,0,1586},{20,0,1586},{20,0,1586},{20,0,1586},{17,63,85},{17,63,85},{17,63,85},{16,56,5},{8,63,40},{8,63,40},{23,63,5143},{23,63,4004},{22,63,3254},{21,63,2070},{22,63,5274},{20,63,2310},{19,63,1062},{17,61,133},{17,63,5011},{10,63,161},{27,63,1161},
+{26,63,801},{26,63,545},{25,63,164},{30,52,1878},{24,63,1094},{22,63,461},{13,62,42},{28,58,1878},{13,62,42},{22,63,3254},{22,63,3254},{22,63,3254},{21,63,2070},{20,63,2833},{19,63,1062},{19,63,1062},{17,60,49},{14,63,2577},{11,61,137},{26,63,545},{26,63,545},{26,63,545},{25,63,164},{30,45,1152},{22,63,461},{22,63,461},{14,61,2},{31,50,1152},{14,61,2},{31,60,41},{30,63,25},{29,63,1},
+{28,63,0},{31,60,41},{31,61,41},{28,63,0},{0,62,41},{31,61,41},{0,62,41},{21,0,1586},{21,0,1586},{21,0,1586},{21,0,1586},{18,63,136},{18,63,136},{18,63,136},{17,58,5},{10,63,80},{10,63,80},{24,63,4882},{24,63,4070},{23,63,3532},{22,63,2360},{24,63,4945},{21,63,2422},{20,63,1433},{18,63,58},{20,63,4717},{12,63,157},{28,63,1026},{27,63,835},{27,63,666},{26,63,305},{30,56,1536},
+{26,63,996},{24,63,628},{16,63,1},{30,58,1536},{16,63,1},{23,63,3532},{23,63,3532},{23,63,3532},{22,63,2360},{22,63,3110},{20,63,1433},{20,63,1433},{18,62,50},{16,63,2939},{12,63,157},{27,63,666},{27,63,666},{27,63,666},{26,63,305},{29,54,1152},{24,63,628},{24,63,628},{16,63,1},{29,56,1152},{16,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},
+{0,63,0},{31,63,0},{0,63,0},{22,0,1576},{22,0,1576},{22,0,1576},{22,0,1576},{19,63,221},{19,63,221},{19,63,221},{18,60,9},{12,63,157},{12,63,157},{25,63,4212},{24,63,3590},{24,63,3106},{23,63,2201},{24,63,4129},{22,63,2101},{21,63,1301},{19,63,13},{20,63,3869},{14,63,233},{28,63,706},{28,63,562},{28,63,481},{27,63,218},{30,58,1067},{27,63,699},{25,63,442},{18,63,1},{31,58,1067},
+{18,63,1},{24,63,3106},{24,63,3106},{24,63,3106},{23,63,2201},{23,63,2668},{21,63,1301},{21,63,1301},{19,63,13},{18,63,2523},{14,63,233},{28,63,481},{28,63,481},{28,63,481},{27,63,218},{31,50,800},{25,63,442},{25,63,442},{18,63,1},{31,55,800},{18,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{23,0,1576},
+{23,0,1576},{23,0,1576},{23,0,1576},{20,63,325},{20,63,325},{20,63,325},{19,62,9},{14,63,233},{14,63,233},{26,63,3642},{25,63,3132},{25,63,2771},{24,63,2070},{25,63,3444},{23,63,1834},{22,63,1205},{20,63,8},{21,63,3219},{16,63,346},{29,63,456},{28,63,370},{28,63,289},{28,63,145},{31,56,683},{28,63,451},{27,63,290},{21,63,1},{30,60,683},{21,63,1},{25,63,2771},{25,63,2771},{25,63,2771},
+{24,63,2070},{24,63,2273},{22,63,1205},{22,63,1205},{20,63,8},{20,63,2121},{16,63,346},{28,63,289},{28,63,289},{28,63,289},{28,63,145},{30,56,512},{27,63,290},{27,63,290},{21,63,1},{30,58,512},{21,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{24,0,1586},{24,0,1586},{24,0,1586},{24,0,1586},{22,63,421},
+{22,63,421},{22,63,421},{20,63,8},{16,63,346},{16,63,346},{26,63,3162},{26,63,2742},{26,63,2486},{25,63,1947},{26,63,2877},{24,63,1641},{23,63,1145},{22,63,52},{22,63,2673},{18,63,458},{29,63,264},{29,63,200},{29,63,164},{28,63,81},{31,58,384},{28,63,243},{28,63,162},{23,63,1},{31,60,384},{23,63,1},{26,63,2486},{26,63,2486},{26,63,2486},{25,63,1947},{24,63,1969},{23,63,1145},{23,63,1145},
+{22,63,52},{20,63,1785},{18,63,458},{29,63,164},{29,63,164},{29,63,164},{28,63,81},{31,55,288},{28,63,162},{28,63,162},{23,63,1},{28,62,288},{23,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{25,0,1586},{25,0,1586},{25,0,1586},{25,0,1586},{23,63,520},{23,63,520},{23,63,520},{22,63,52},{18,63,458},
+{18,63,458},{0,46,2665},{0,36,666},{0,26,37},{0,22,773},{0,31,5885},{0,23,4085},{0,21,1802},{0,15,4214},{0,17,6543},{0,13,4662},{0,46,2665},{0,36,666},{0,26,37},{0,22,773},{9,0,5885},{0,23,4085},{0,21,1802},{0,15,4214},{15,0,5885},{0,15,4214},{0,22,0},{0,22,0},{0,22,0},{0,13,1},{0,11,545},{0,10,289},{0,10,289},{0,6,306},{0,6,630},{0,5,362},{0,22,0},
+{0,22,0},{0,22,0},{0,13,1},{3,1,545},{0,10,289},{0,10,289},{0,6,306},{5,1,545},{0,6,306},{13,2,2665},{0,36,666},{0,26,37},{0,22,773},{13,2,2665},{23,0,2665},{0,22,773},{0,17,2665},{23,0,2665},{0,17,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,51,2665},{0,40,530},{0,28,2},
+{0,24,650},{0,34,6670},{0,27,4445},{0,23,1886},{0,16,4577},{0,19,7493},{0,15,5130},{0,51,2665},{0,40,530},{0,28,2},{0,24,650},{10,1,6669},{0,27,4445},{0,23,1886},{0,16,4577},{17,0,6669},{0,16,4577},{0,27,1},{0,27,1},{0,27,1},{0,16,1},{0,14,841},{0,13,442},{0,13,442},{0,7,458},{0,7,965},{0,7,558},{0,27,1},{0,27,1},{0,27,1},{0,16,1},{4,0,841},
+{0,13,442},{0,13,442},{0,7,458},{7,0,841},{0,7,458},{15,1,2665},{0,40,530},{0,28,2},{0,24,650},{15,1,2665},{25,0,2665},{0,24,650},{0,19,2665},{25,0,2665},{0,19,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,56,2665},{0,44,410},{0,30,17},{0,28,522},{0,38,7538},{0,29,4826},{0,25,2006},
+{0,18,5002},{0,21,8547},{0,16,5681},{0,56,2665},{0,44,410},{0,30,17},{0,28,522},{11,1,7538},{0,29,4826},{0,25,2006},{0,18,5002},{17,2,7538},{0,18,5002},{0,32,1},{0,32,1},{0,32,1},{0,19,1},{0,16,1201},{0,15,628},{0,15,628},{0,9,653},{0,9,1382},{0,9,822},{0,32,1},{0,32,1},{0,32,1},{0,19,1},{5,0,1201},{0,15,628},{0,15,628},{0,9,653},{8,0,1201},
+{0,9,653},{16,2,2665},{0,44,410},{1,30,2},{0,28,522},{16,2,2665},{28,0,2665},{0,28,522},{0,21,2665},{28,0,2665},{0,21,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,62,2665},{0,49,325},{0,33,65},{0,30,405},{0,41,8493},{0,31,5261},{0,27,2162},{0,20,5477},{0,23,9705},{0,18,6259},{0,62,2665},
+{0,49,325},{0,33,65},{0,30,405},{11,4,8493},{0,31,5261},{0,27,2162},{0,20,5477},{17,4,8493},{0,20,5477},{0,38,0},{0,38,0},{0,38,0},{0,23,1},{0,19,1625},{0,17,821},{0,17,821},{0,11,898},{0,10,1874},{0,9,1094},{0,38,0},{0,38,0},{0,38,0},{0,23,1},{5,2,1625},{0,17,821},{0,17,821},{0,11,898},{8,2,1625},{0,11,898},{18,1,2665},{0,49,325},{2,32,1},
+{0,30,405},{18,1,2665},{29,2,2665},{0,30,405},{0,23,2665},{29,2,2665},{0,23,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,63,2777},{0,51,221},{0,35,178},{0,32,306},{0,45,9669},{0,34,5810},{0,29,2382},{0,22,6054},{0,25,11123},{0,20,6989},{1,63,2741},{0,51,221},{1,35,137},{0,32,306},{13,1,9669},
+{0,34,5810},{0,29,2382},{0,22,6054},{19,4,9669},{0,22,6054},{0,44,0},{0,44,0},{0,44,0},{0,26,1},{0,22,2178},{0,19,1108},{0,19,1108},{0,11,1213},{0,11,2506},{0,11,1469},{0,44,0},{0,44,0},{0,44,0},{0,26,1},{6,2,2178},{0,19,1108},{0,19,1108},{0,11,1213},{11,0,2178},{0,11,1213},{20,0,2665},{0,51,221},{3,34,1},{0,32,306},{20,0,2665},{31,3,2665},{0,32,306},
+{0,25,2669},{31,3,2665},{0,25,2669},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{1,63,2949},{0,55,209},{1,37,242},{0,35,285},{0,50,9669},{0,38,5486},{0,32,1938},{0,24,5733},{0,27,11477},{0,22,6887},{2,63,2789},{0,55,209},{2,37,137},{0,35,285},{15,0,9669},{0,38,5486},{0,32,1938},{0,24,5733},{24,1,9669},
+{0,24,5733},{1,46,64},{1,46,64},{1,46,64},{1,28,65},{0,27,2180},{0,23,932},{0,23,932},{0,15,1037},{0,14,2691},{0,13,1421},{1,46,0},{1,46,0},{1,46,0},{1,28,1},{8,0,2178},{0,23,932},{0,23,932},{0,15,1037},{11,3,2178},{0,15,1037},{21,2,2665},{0,55,145},{4,36,2},{0,35,221},{21,2,2665},{31,6,2665},{0,35,221},{0,27,2669},{31,6,2665},{0,27,2669},{0,0,64},
+{0,0,64},{0,0,64},{0,0,64},{0,5,1},{0,5,1},{0,5,1},{0,3,1},{0,3,17},{0,3,17},{2,63,3285},{1,57,273},{2,39,434},{1,37,349},{0,56,9670},{0,42,5186},{0,34,1530},{0,26,5430},{0,29,11903},{0,24,6821},{3,63,2873},{1,57,209},{3,39,137},{1,37,285},{16,1,9669},{0,42,5186},{0,34,1530},{0,26,5430},{25,3,9669},{0,26,5430},{1,51,128},{1,51,128},{1,51,128},
+{1,31,129},{0,32,2180},{0,27,772},{0,27,772},{0,16,865},{0,17,2875},{0,15,1409},{2,47,1},{2,47,1},{2,47,1},{2,30,1},{9,2,2178},{0,27,772},{0,27,772},{0,16,865},{16,0,2178},{0,16,865},{23,0,2665},{0,59,85},{5,38,2},{0,37,146},{23,0,2665},{31,9,2665},{0,37,146},{0,29,2669},{31,9,2665},{0,29,2669},{1,0,128},{1,0,128},{1,0,128},{1,0,128},{0,11,0},
+{0,11,0},{0,11,0},{0,6,1},{0,5,65},{0,5,65},{3,63,3785},{1,61,405},{2,41,653},{1,39,466},{0,61,9669},{0,44,4909},{0,36,1190},{0,28,5145},{0,32,12358},{0,26,6791},{4,63,2966},{2,59,209},{4,41,154},{2,39,285},{18,0,9669},{0,44,4909},{0,36,1190},{0,28,5145},{30,0,9669},{0,28,5145},{2,53,320},{2,53,320},{2,53,320},{2,33,321},{0,38,2178},{0,31,628},{0,31,628},
+{0,18,730},{0,19,3124},{0,17,1427},{3,49,1},{3,49,1},{3,49,1},{3,32,0},{11,1,2178},{0,31,628},{0,31,628},{0,18,730},{17,2,2178},{0,18,730},{24,2,2665},{0,63,41},{6,40,2},{0,39,89},{24,2,2665},{30,14,2665},{0,39,89},{0,31,2669},{30,14,2665},{0,31,2669},{1,0,320},{1,0,320},{1,0,320},{1,0,320},{0,16,0},{0,16,0},{0,16,0},{0,10,1},{0,7,149},
+{0,7,149},{4,63,4514},{2,63,630},{3,45,1013},{2,41,681},{1,63,9738},{0,49,4610},{0,40,833},{0,30,4849},{0,36,12905},{0,29,6798},{6,63,3101},{4,60,208},{5,43,149},{3,41,286},{18,6,9669},{0,49,4610},{0,40,833},{0,30,4849},{28,6,9669},{0,30,4849},{2,59,545},{2,59,545},{2,59,545},{2,37,545},{0,44,2178},{0,34,493},{0,34,493},{0,22,584},{0,23,3462},{0,19,1493},{4,52,0},
+{4,52,0},{4,52,0},{4,34,1},{13,0,2178},{0,34,493},{0,34,493},{0,22,584},{20,2,2178},{0,22,584},{26,1,2665},{2,63,85},{7,42,4},{0,41,52},{26,1,2665},{31,16,2665},{0,41,52},{0,33,2669},{31,16,2665},{0,33,2669},{2,0,545},{2,0,545},{2,0,545},{2,0,545},{0,22,0},{0,22,0},{0,22,0},{0,13,1},{0,10,289},{0,10,289},{4,63,5330},{3,63,1018},{3,47,1406},
+{2,43,966},{2,63,9981},{0,53,4366},{0,42,585},{0,33,4609},{0,38,13451},{0,31,6834},{7,63,3233},{5,62,208},{6,45,149},{4,42,285},{21,1,9669},{0,53,4366},{0,42,585},{0,33,4609},{29,8,9669},{0,33,4609},{3,61,865},{3,61,865},{3,61,865},{3,39,865},{0,49,2178},{0,38,377},{0,38,377},{0,24,461},{0,25,3789},{0,21,1589},{5,54,0},{5,54,0},{5,54,0},{5,36,1},{14,2,2178},
+{0,38,377},{0,38,377},{0,24,461},{24,0,2178},{0,24,461},{28,0,2665},{3,63,153},{8,44,5},{0,44,20},{28,0,2665},{31,19,2665},{0,44,20},{0,35,2669},{31,19,2665},{0,35,2669},{3,0,865},{3,0,865},{3,0,865},{3,0,865},{0,27,1},{0,27,1},{0,27,1},{0,16,1},{0,13,442},{0,13,442},{5,63,6270},{3,63,1626},{4,49,1866},{3,45,1286},{2,63,10381},{0,55,4133},{0,45,401},
+{0,35,4366},{0,42,14023},{0,32,6917},{8,63,3434},{6,63,242},{7,47,149},{5,44,285},{22,3,9669},{0,55,4133},{0,45,401},{0,35,4366},{30,10,9669},{0,35,4366},{3,63,1226},{3,63,1226},{3,63,1226},{3,42,1201},{0,54,2178},{0,42,277},{0,42,277},{0,26,356},{0,27,4170},{0,23,1721},{6,56,0},{6,56,0},{6,56,0},{6,38,1},{16,0,2178},{0,42,277},{0,42,277},{0,26,356},{26,1,2178},
+{0,26,356},{29,2,2665},{5,63,232},{9,46,5},{0,46,5},{29,2,2665},{31,22,2665},{0,46,5},{0,37,2669},{31,22,2665},{0,37,2669},{3,0,1201},{3,0,1201},{3,0,1201},{3,0,1201},{0,32,1},{0,32,1},{0,32,1},{0,19,1},{0,15,628},{0,15,628},{6,63,7374},{4,63,2339},{4,51,2411},{3,48,1715},{3,63,10950},{0,59,3909},{0,47,257},{0,37,4141},{0,44,14641},{0,34,7031},{9,63,3638},
+{7,63,320},{8,49,154},{6,46,285},{24,1,9669},{0,59,3909},{0,47,257},{0,37,4141},{31,12,9669},{0,37,4141},{4,63,1714},{4,63,1714},{4,63,1714},{4,44,1665},{0,59,2180},{0,46,193},{0,46,193},{0,28,269},{0,31,4582},{0,25,1889},{7,58,0},{7,58,0},{7,58,0},{7,40,1},{17,2,2178},{0,46,193},{0,46,193},{0,28,269},{27,3,2178},{0,28,269},{31,0,2665},{8,63,313},{10,48,2},
+{1,48,2},{31,0,2665},{30,27,2665},{1,48,2},{0,39,2669},{30,27,2665},{0,39,2669},{3,0,1665},{3,0,1665},{3,0,1665},{3,0,1665},{0,38,0},{0,38,0},{0,38,0},{0,23,1},{0,17,821},{0,17,821},{7,63,8807},{5,63,3388},{5,53,3082},{4,49,2230},{4,63,11766},{0,63,3686},{0,51,134},{0,39,3909},{0,46,15438},{0,37,7214},{11,63,3853},{8,63,457},{9,51,149},{7,49,286},{25,4,9669},
+{0,63,3686},{0,51,134},{0,39,3909},{28,19,9669},{0,39,3909},{5,63,2427},{5,63,2427},{5,63,2427},{4,47,2179},{0,63,2210},{0,49,128},{0,49,128},{0,31,193},{0,34,5117},{0,29,2123},{8,60,0},{8,60,0},{8,60,0},{8,42,1},{19,1,2178},{0,49,128},{0,49,128},{0,31,193},{29,4,2178},{0,31,193},{31,6,2665},{9,63,405},{11,50,4},{2,50,4},{31,6,2665},{31,29,2665},{2,50,4},
+{0,42,2669},{31,29,2665},{0,42,2669},{4,0,2178},{4,0,2178},{4,0,2178},{4,0,2178},{0,44,0},{0,44,0},{0,44,0},{0,26,1},{0,19,1108},{0,19,1108},{7,63,10230},{6,63,4421},{6,55,3705},{4,52,2725},{5,63,12634},{0,63,3719},{0,53,77},{0,41,3727},{0,51,15978},{0,39,7289},{12,63,4050},{10,63,629},{10,53,149},{8,50,285},{26,6,9669},{0,63,3718},{0,53,76},{0,41,3726},{29,21,9669},
+{0,41,3726},{5,63,3050},{5,63,3050},{5,63,3050},{5,49,2690},{1,63,2325},{0,53,73},{0,53,73},{0,33,118},{0,36,5499},{0,31,2266},{9,62,0},{9,62,0},{9,62,0},{9,44,1},{20,3,2178},{0,53,72},{0,53,72},{0,33,117},{30,6,2178},{0,33,117},{31,12,2665},{11,63,521},{12,52,5},{3,52,4},{31,12,2665},{31,32,2665},{3,52,4},{0,44,2669},{31,32,2665},{0,44,2669},{5,0,2689},
+{5,0,2689},{5,0,2689},{5,0,2689},{0,49,1},{0,49,1},{0,49,1},{0,29,2},{0,21,1341},{0,21,1341},{9,63,10738},{7,63,4899},{7,57,3705},{5,54,2725},{6,63,13045},{1,63,4002},{1,55,77},{0,43,3642},{0,53,15510},{0,42,6577},{13,63,4302},{11,63,857},{11,55,149},{9,52,285},{29,1,9669},{2,63,3954},{1,55,76},{0,43,3561},{30,23,9669},{0,43,3561},{6,63,3173},{6,63,3173},{6,63,3173},
+{6,51,2690},{2,63,2427},{1,55,73},{1,55,73},{1,35,118},{0,40,5171},{0,32,1846},{10,63,4},{10,63,4},{10,63,4},{10,46,1},{21,5,2178},{0,57,32},{0,57,32},{0,35,72},{31,8,2178},{0,35,72},{31,17,2665},{13,63,680},{13,54,5},{3,55,4},{31,17,2665},{31,35,2665},{3,55,4},{0,46,2669},{31,35,2665},{0,46,2669},{6,0,2689},{6,0,2689},{6,0,2689},{6,0,2689},{1,51,1},
+{1,51,1},{1,51,1},{1,31,2},{0,25,1145},{0,25,1145},{10,63,11278},{8,63,5402},{8,58,3742},{6,56,2725},{7,63,13510},{3,63,4314},{2,57,77},{1,45,3642},{0,57,15046},{0,44,5927},{14,63,4590},{12,63,1171},{12,57,149},{10,54,285},{30,3,9669},{3,63,4265},{2,57,76},{0,45,3414},{31,25,9669},{0,45,3414},{7,63,3314},{7,63,3314},{7,63,3314},{7,53,2690},{4,63,2532},{2,57,73},{2,57,73},
+{2,37,118},{0,44,4867},{0,36,1470},{11,63,25},{11,63,25},{11,63,25},{11,48,1},{24,0,2178},{0,61,8},{0,61,8},{0,38,40},{31,11,2178},{0,38,40},{31,22,2665},{15,63,832},{14,56,5},{4,57,4},{31,22,2665},{30,40,2665},{4,57,4},{0,47,2677},{30,40,2665},{0,47,2677},{7,0,2689},{7,0,2689},{7,0,2689},{7,0,2689},{2,53,1},{2,53,1},{2,53,1},{2,33,2},{0,29,965},
+{0,29,965},{11,63,11942},{10,63,6090},{9,62,3723},{8,58,2734},{9,63,14053},{4,63,4863},{3,59,79},{2,48,3633},{0,61,14558},{0,46,5283},{16,63,4858},{14,63,1556},{13,60,138},{11,57,299},{30,9,9669},{6,63,4594},{3,59,75},{0,48,3233},{29,31,9669},{0,48,3233},{9,63,3505},{9,63,3505},{9,63,3505},{8,55,2690},{5,63,2645},{3,61,72},{3,61,72},{3,39,117},{0,46,4539},{0,38,1091},{12,63,64},
+{12,63,64},{12,63,64},{12,50,1},{24,6,2178},{1,63,10},{1,63,10},{0,40,13},{29,17,2178},{0,40,13},{31,28,2665},{17,63,1053},{15,59,2},{5,59,2},{31,28,2665},{31,42,2665},{5,59,2},{0,50,2669},{31,42,2665},{0,50,2669},{8,0,2689},{8,0,2689},{8,0,2689},{8,0,2689},{3,55,5},{3,55,5},{3,55,5},{3,36,5},{0,34,794},{0,34,794},{12,63,12466},{11,63,6718},{10,62,3738},
+{8,60,2723},{10,63,14554},{6,63,5363},{4,61,87},{3,50,3633},{0,63,14190},{0,49,4774},{17,63,5158},{15,63,1938},{14,62,138},{12,59,282},{31,11,9669},{8,63,4806},{4,61,86},{0,50,3110},{29,34,9669},{0,50,3110},{10,63,3658},{10,63,3658},{10,63,3658},{9,57,2690},{6,63,2795},{4,62,66},{4,62,66},{4,41,131},{0,51,4269},{0,40,833},{14,63,100},{14,63,100},{14,63,100},{13,52,1},{27,1,2178},
+{3,63,34},{3,63,34},{0,43,2},{30,19,2178},{0,43,2},{31,33,2665},{19,63,1241},{16,61,5},{6,61,2},{31,33,2665},{31,45,2665},{6,61,2},{0,52,2669},{31,45,2665},{0,52,2669},{9,0,2689},{9,0,2689},{9,0,2689},{9,0,2689},{4,57,1},{4,57,1},{4,57,1},{4,37,2},{0,38,650},{0,38,650},{14,63,13094},{12,63,7445},{11,63,3830},{9,62,2723},{12,63,14998},{8,63,5926},{5,63,87},
+{4,51,3642},{0,63,14254},{0,51,4306},{18,63,5494},{16,63,2414},{15,63,146},{13,61,282},{31,16,9669},{10,63,5138},{5,63,86},{0,52,3005},{30,36,9669},{0,52,3005},{11,63,3829},{11,63,3829},{11,63,3829},{10,59,2690},{7,63,2981},{5,63,86},{5,63,86},{5,43,131},{0,53,4014},{0,44,601},{15,63,145},{15,63,145},{15,63,145},{14,54,1},{28,3,2178},{5,63,85},{5,63,85},{1,45,2},{31,21,2178},
+{1,45,2},{31,38,2665},{20,63,1378},{17,63,5},{7,63,2},{31,38,2665},{31,48,2665},{7,63,2},{0,54,2669},{31,48,2665},{0,54,2669},{10,0,2689},{10,0,2689},{10,0,2689},{10,0,2689},{5,59,1},{5,59,1},{5,59,1},{5,39,2},{0,40,520},{0,40,520},{15,63,12507},{13,63,7370},{12,63,4001},{11,63,2705},{13,63,14148},{8,63,5491},{6,63,154},{5,53,3033},{1,63,13399},{0,53,3297},{19,63,4949},
+{17,63,2261},{16,63,202},{15,61,185},{31,20,8712},{11,63,4644},{7,63,145},{0,54,2365},{31,37,8712},{0,54,2365},{12,63,4001},{12,63,4001},{12,63,4001},{11,61,2690},{9,63,3204},{6,63,154},{6,63,154},{6,45,131},{0,57,3762},{0,46,419},{16,63,202},{16,63,202},{16,63,202},{15,56,1},{29,5,2178},{7,63,145},{7,63,145},{2,47,2},{31,24,2178},{2,47,2},{30,45,2178},{22,63,1145},{18,63,1},
+{10,63,1},{30,45,2178},{31,50,2178},{10,63,1},{0,55,2180},{31,50,2178},{0,55,2180},{11,0,2689},{11,0,2689},{11,0,2689},{11,0,2689},{6,61,1},{6,61,1},{6,61,1},{6,41,2},{0,44,400},{0,44,400},{16,63,11658},{14,63,7195},{13,63,4225},{12,63,2693},{14,63,13066},{10,63,5014},{8,63,261},{6,54,2366},{3,63,12366},{0,55,2274},{20,63,4338},{18,63,2037},{17,63,289},{16,62,80},{31,24,7578},
+{13,63,4037},{9,63,202},{0,55,1698},{29,42,7578},{0,55,1698},{13,63,4225},{13,63,4225},{13,63,4225},{12,63,2693},{10,63,3429},{8,63,261},{8,63,261},{7,47,132},{0,61,3509},{0,49,290},{17,63,289},{17,63,289},{17,63,289},{16,59,0},{31,4,2178},{9,63,202},{9,63,202},{3,49,2},{30,29,2178},{3,49,2},{31,44,1625},{23,63,850},{20,63,0},{13,63,1},{31,44,1625},{31,52,1625},{13,63,1},
+{0,56,1625},{31,52,1625},{0,56,1625},{12,0,2689},{12,0,2689},{12,0,2689},{12,0,2689},{7,63,5},{7,63,5},{7,63,5},{7,44,4},{0,49,289},{0,49,289},{16,63,11002},{15,63,7081},{14,63,4450},{13,63,2738},{15,63,12205},{11,63,4663},{9,63,411},{7,55,1813},{4,63,11643},{0,56,1550},{21,63,3802},{20,63,1845},{18,63,388},{17,62,25},{31,27,6661},{15,63,3525},{11,63,290},{0,57,1217},{29,44,6661},
+{0,57,1217},{14,63,4450},{14,63,4450},{14,63,4450},{13,63,2738},{11,63,3675},{9,63,411},{9,63,411},{8,49,131},{0,63,3354},{0,51,222},{18,63,388},{18,63,388},{18,63,388},{17,61,0},{30,13,2178},{11,63,290},{11,63,290},{4,51,2},{31,31,2178},{4,51,2},{31,47,1201},{24,63,653},{22,63,4},{15,63,1},{31,47,1201},{31,53,1201},{15,63,1},{0,57,1201},{31,53,1201},{0,57,1201},{13,0,2689},
+{13,0,2689},{13,0,2689},{13,0,2689},{8,63,17},{8,63,17},{8,63,17},{8,46,2},{0,53,205},{0,53,205},{17,63,10434},{16,63,7010},{15,63,4693},{14,63,2833},{16,63,11374},{12,63,4462},{10,63,629},{8,56,1358},{6,63,10895},{0,57,1002},{22,63,3334},{20,63,1701},{19,63,505},{18,63,0},{31,31,5829},{16,63,3145},{12,63,405},{1,58,842},{31,44,5829},{1,58,842},{15,63,4693},{15,63,4693},{15,63,4693},
+{14,63,2833},{12,63,3906},{10,63,629},{10,63,629},{9,51,131},{1,63,3525},{0,54,218},{19,63,505},{19,63,505},{19,63,505},{18,63,0},{31,15,2178},{12,63,405},{12,63,405},{5,53,2},{31,34,2178},{5,53,2},{31,49,841},{25,63,461},{23,63,1},{18,63,0},{31,49,841},{31,55,841},{18,63,0},{0,58,841},{31,55,841},{0,58,841},{14,0,2689},{14,0,2689},{14,0,2689},{14,0,2689},{9,63,50},
+{9,63,50},{9,63,50},{9,48,2},{0,57,137},{0,57,137},{18,63,9934},{17,63,6962},{16,63,4913},{15,63,2978},{17,63,10683},{13,63,4277},{11,63,915},{9,58,974},{8,63,10078},{0,59,630},{23,63,2934},{22,63,1605},{21,63,650},{19,63,25},{31,34,5082},{18,63,2769},{14,63,521},{2,59,546},{31,46,5082},{2,59,546},{16,63,4913},{16,63,4913},{16,63,4913},{15,63,2978},{14,63,4170},{11,63,915},{11,63,915},
+{10,53,131},{3,63,3789},{1,56,218},{21,63,650},{21,63,650},{21,63,650},{19,63,25},{31,20,2178},{14,63,521},{14,63,521},{6,55,2},{31,37,2178},{6,55,2},{31,52,545},{26,63,305},{25,63,4},{20,63,1},{31,52,545},{30,58,545},{20,63,1},{0,59,545},{30,58,545},{0,59,545},{15,0,2689},{15,0,2689},{15,0,2689},{15,0,2689},{11,63,74},{11,63,74},{11,63,74},{10,49,2},{0,59,85},
+{0,59,85},{19,63,9465},{18,63,6955},{17,63,5233},{16,63,3218},{18,63,10003},{14,63,4183},{13,63,1258},{11,58,645},{9,63,9445},{1,61,409},{24,63,2529},{23,63,1525},{22,63,785},{20,63,100},{31,38,4344},{20,63,2345},{16,63,698},{4,60,289},{31,48,4344},{4,60,289},{17,63,5233},{17,63,5233},{17,63,5233},{16,63,3218},{15,63,4491},{13,63,1258},{13,63,1258},{11,56,120},{5,63,4171},{2,59,213},{22,63,785},
+{22,63,785},{22,63,785},{20,63,100},{31,26,2178},{16,63,698},{16,63,698},{7,57,0},{30,42,2178},{7,57,0},{31,55,288},{28,63,162},{26,63,4},{23,63,1},{31,55,288},{28,62,288},{23,63,1},{0,60,288},{28,62,288},{0,60,288},{16,0,2689},{16,0,2689},{16,0,2689},{16,0,2689},{12,63,113},{12,63,113},{12,63,113},{11,52,4},{0,63,45},{0,63,45},{20,63,9219},{19,63,6985},{18,63,5530},
+{17,63,3473},{19,63,9496},{15,63,4186},{14,63,1630},{11,60,404},{11,63,8961},{3,61,277},{25,63,2275},{24,63,1509},{23,63,932},{22,63,208},{30,45,3779},{21,63,2086},{18,63,850},{6,61,129},{31,50,3779},{6,61,129},{18,63,5530},{18,63,5530},{18,63,5530},{17,63,3473},{16,63,4770},{14,63,1630},{14,63,1630},{12,57,129},{8,63,4442},{3,61,213},{23,63,932},{23,63,932},{23,63,932},{22,63,208},{31,31,2178},
+{18,63,850},{18,63,850},{8,59,1},{31,44,2178},{8,59,1},{31,58,128},{29,63,72},{28,63,1},{26,63,0},{31,58,128},{31,60,128},{26,63,0},{0,61,128},{31,60,128},{0,61,128},{17,0,2689},{17,0,2689},{17,0,2689},{17,0,2689},{13,63,170},{13,63,170},{13,63,170},{12,54,2},{2,63,89},{2,63,89},{21,63,8929},{20,63,7062},{19,63,5845},{18,63,3778},{20,63,9188},{17,63,4260},{15,63,2070},
+{13,61,234},{13,63,8680},{4,62,212},{26,63,2089},{25,63,1515},{24,63,1073},{23,63,353},{29,52,3299},{22,63,1913},{20,63,965},{8,62,32},{28,56,3299},{8,62,32},{19,63,5845},{19,63,5845},{19,63,5845},{18,63,3778},{17,63,5124},{15,63,2070},{15,63,2070},{13,59,129},{9,63,4725},{4,62,196},{24,63,1073},{24,63,1073},{24,63,1073},{23,63,353},{31,36,2178},{20,63,965},{20,63,965},{9,61,1},{31,47,2178},
+{9,61,1},{31,60,34},{30,63,18},{29,63,4},{28,63,1},{31,60,34},{31,61,34},{28,63,1},{0,62,32},{31,61,34},{0,62,32},{18,0,2689},{18,0,2689},{18,0,2689},{18,0,2689},{14,63,245},{14,63,245},{14,63,245},{13,56,2},{4,63,164},{4,63,164},{22,63,8707},{21,63,7170},{21,63,6209},{19,63,4133},{21,63,8853},{18,63,4387},{17,63,2548},{14,62,154},{14,63,8388},{6,63,244},{27,63,1971},
+{26,63,1557},{25,63,1250},{24,63,565},{30,52,2904},{23,63,1826},{22,63,1145},{10,63,1},{28,58,2904},{10,63,1},{21,63,6209},{21,63,6209},{21,63,6209},{19,63,4133},{19,63,5460},{17,63,2548},{17,63,2548},{14,61,129},{11,63,5085},{6,63,244},{25,63,1250},{25,63,1250},{25,63,1250},{24,63,565},{30,45,2178},{22,63,1145},{22,63,1145},{10,63,1},{31,50,2178},{10,63,1},{31,63,0},{31,63,0},{31,63,0},
+{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{19,0,2689},{19,0,2689},{19,0,2689},{19,0,2689},{15,63,338},{15,63,338},{15,63,338},{14,58,2},{6,63,244},{6,63,244},{23,63,7705},{22,63,6418},{21,63,5633},{20,63,3845},{22,63,7654},{19,63,3874},{18,63,2310},{15,63,53},{15,63,7258},{8,63,317},{27,63,1458},{27,63,1186},{26,63,932},{25,63,425},{30,54,2166},
+{25,63,1398},{23,63,850},{13,63,1},{29,58,2166},{13,63,1},{21,63,5633},{21,63,5633},{21,63,5633},{20,63,3845},{19,63,4830},{18,63,2310},{18,63,2310},{15,62,45},{13,63,4506},{8,63,317},{26,63,932},{26,63,932},{26,63,932},{25,63,425},{31,44,1625},{23,63,850},{23,63,850},{13,63,1},{31,52,1625},{13,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},
+{0,63,0},{31,63,0},{0,63,0},{20,0,2689},{20,0,2689},{20,0,2689},{20,0,2689},{16,63,449},{16,63,449},{16,63,449},{15,60,4},{8,63,317},{8,63,317},{24,63,6881},{23,63,5814},{22,63,5138},{21,63,3650},{23,63,6713},{20,63,3400},{19,63,2142},{16,63,5},{17,63,6397},{9,63,425},{28,63,1075},{27,63,866},{27,63,697},{26,63,320},{30,56,1601},{26,63,1041},{24,63,653},{15,63,1},{30,58,1601},
+{15,63,1},{22,63,5138},{22,63,5138},{22,63,5138},{21,63,3650},{21,63,4313},{19,63,2142},{19,63,2142},{16,63,5},{14,63,3981},{9,63,425},{27,63,697},{27,63,697},{27,63,697},{26,63,320},{31,47,1201},{24,63,653},{24,63,653},{15,63,1},{31,53,1201},{15,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{21,0,2689},
+{21,0,2689},{21,0,2689},{21,0,2689},{18,63,549},{18,63,549},{18,63,549},{16,62,1},{9,63,425},{9,63,425},{24,63,6097},{24,63,5285},{23,63,4693},{22,63,3473},{23,63,5833},{21,63,3067},{20,63,1988},{17,63,10},{18,63,5571},{11,63,541},{28,63,739},{28,63,595},{27,63,505},{27,63,233},{31,54,1121},{26,63,737},{25,63,461},{18,63,0},{29,60,1121},{18,63,0},{23,63,4693},{23,63,4693},{23,63,4693},
+{22,63,3473},{22,63,3845},{20,63,1988},{20,63,1988},{17,63,10},{15,63,3542},{11,63,541},{27,63,505},{27,63,505},{27,63,505},{27,63,233},{31,49,841},{25,63,461},{25,63,461},{18,63,0},{31,55,841},{18,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{22,0,2689},{22,0,2689},{22,0,2689},{22,0,2689},{19,63,666},
+{19,63,666},{19,63,666},{17,63,10},{11,63,541},{11,63,541},{25,63,5427},{24,63,4757},{24,63,4273},{23,63,3314},{24,63,5002},{22,63,2788},{21,63,1898},{18,63,65},{20,63,4714},{13,63,698},{29,63,489},{28,63,387},{28,63,306},{28,63,162},{31,56,726},{27,63,482},{26,63,305},{20,63,1},{30,60,726},{20,63,1},{24,63,4273},{24,63,4273},{24,63,4273},{23,63,3314},{22,63,3429},{21,63,1898},{21,63,1898},
+{18,63,65},{17,63,3213},{13,63,698},{28,63,306},{28,63,306},{28,63,306},{28,63,162},{31,52,545},{26,63,305},{26,63,305},{20,63,1},{30,58,545},{20,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{23,0,2689},{23,0,2689},{23,0,2689},{23,0,2689},{20,63,832},{20,63,832},{20,63,832},{18,63,65},{13,63,698},
+{13,63,698},{4,63,33740},{0,63,5184},{0,45,446},{0,43,4126},{3,63,45594},{0,59,24105},{0,42,8295},{0,37,24703},{0,44,64117},{0,34,38807},{2,63,9704},{0,61,2866},{0,44,386},{0,37,3205},{14,4,18065},{0,38,13219},{0,34,6147},{0,24,13496},{25,0,18065},{0,24,13496},{0,31,1},{0,31,1},{0,31,1},{0,19,1},{0,16,1105},{0,15,584},{0,15,584},{0,9,605},{0,8,1273},{0,7,750},{0,31,1},
+{0,31,1},{0,31,1},{0,19,1},{4,2,1105},{0,15,584},{0,15,584},{0,9,605},{8,0,1105},{0,9,605},{21,5,9248},{0,61,2866},{0,44,386},{0,37,3205},{21,5,9248},{31,8,9248},{0,37,3205},{0,28,9256},{31,8,9248},{0,28,9256},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{4,63,38380},{1,63,6614},{0,47,261},
+{0,45,3769},{4,63,50747},{0,63,24961},{0,44,8337},{0,39,25658},{0,46,65535},{0,36,41267},{2,63,10152},{0,63,2624},{0,46,221},{0,41,2929},{16,0,19334},{0,42,13795},{0,36,6237},{0,26,14121},{26,1,19334},{0,26,14121},{0,36,1},{0,36,1},{0,36,1},{0,22,0},{0,18,1513},{0,17,769},{0,17,769},{0,9,845},{0,9,1742},{0,9,1014},{0,36,1},{0,36,1},{0,36,1},{0,22,0},{5,1,1513},
+{0,17,769},{0,17,769},{0,9,845},{9,0,1513},{0,9,845},{24,0,9248},{0,63,2624},{0,46,221},{0,41,2929},{24,0,9248},{31,11,9248},{0,41,2929},{0,30,9256},{31,11,9248},{0,30,9256},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{4,63,43788},{1,63,8598},{0,49,121},{0,47,3430},{4,63,56155},{0,63,26241},{0,46,8415},
+{0,41,26663},{0,46,65535},{0,36,43795},{3,63,10706},{0,63,2624},{0,48,116},{0,43,2650},{17,0,20689},{0,44,14404},{0,38,6363},{0,28,14796},{26,3,20689},{0,28,14796},{0,42,0},{0,42,0},{0,42,0},{0,25,0},{0,21,1985},{0,19,1009},{0,19,1009},{0,11,1090},{0,11,2281},{0,11,1346},{0,42,0},{0,42,0},{0,42,0},{0,25,0},{6,1,1985},{0,19,1009},{0,19,1009},{0,11,1090},{9,2,1985},
+{0,11,1090},{25,2,9248},{0,63,2624},{0,48,116},{0,43,2650},{25,2,9248},{27,19,9248},{0,43,2650},{0,32,9250},{27,19,9248},{0,32,9250},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{5,63,49566},{1,63,11350},{0,51,36},{0,50,3105},{4,63,62331},{0,63,28289},{0,48,8549},{0,43,27718},{0,49,65535},{0,38,46395},{4,63,11395},
+{0,63,2880},{0,51,36},{0,45,2389},{17,4,22129},{0,46,15067},{0,40,6525},{0,28,15500},{27,4,22129},{0,28,15500},{0,47,0},{0,47,0},{0,47,0},{0,28,1},{0,24,2521},{0,21,1285},{0,21,1285},{0,13,1385},{0,12,2905},{0,11,1714},{0,47,0},{0,47,0},{0,47,0},{0,28,1},{7,0,2521},{0,21,1285},{0,21,1285},{0,13,1385},{11,1,2521},{0,13,1385},{26,4,9248},{1,63,2866},{0,51,36},
+{0,45,2389},{26,4,9248},{28,21,9248},{0,45,2389},{0,34,9250},{28,21,9248},{0,34,9250},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{5,63,56892},{2,63,15166},{0,53,5},{0,52,2726},{5,63,65535},{0,63,31511},{0,51,8735},{0,45,28953},{0,53,65535},{0,40,49505},{4,63,12385},{1,63,3380},{0,53,1},{0,47,2120},{19,1,23851},
+{0,49,15876},{0,42,6761},{0,30,16331},{29,4,23851},{0,30,16331},{0,53,0},{0,53,0},{0,53,0},{0,32,1},{0,27,3200},{0,23,1640},{0,23,1640},{0,15,1769},{0,13,3689},{0,13,2169},{0,53,0},{0,53,0},{0,53,0},{0,32,1},{7,3,3200},{0,23,1640},{0,23,1640},{0,15,1769},{13,0,3200},{0,15,1769},{28,3,9248},{3,63,3204},{0,53,1},{0,47,2120},{28,3,9248},{31,21,9248},{0,47,2120},
+{0,36,9256},{31,21,9248},{0,36,9256},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{6,63,63870},{2,63,19230},{0,55,30},{0,54,2429},{5,63,65535},{1,63,35016},{0,53,8921},{0,47,30108},{0,55,65535},{0,42,52385},{5,63,13379},{2,63,4026},{0,56,18},{0,50,1885},{20,1,25472},{0,53,16616},{0,46,6989},{0,33,17105},{29,6,25472},
+{0,33,17105},{0,58,0},{0,58,0},{0,58,0},{0,35,0},{0,29,3874},{0,25,1994},{0,25,1994},{0,16,2129},{0,15,4454},{0,15,2637},{0,58,0},{0,58,0},{0,58,0},{0,35,0},{8,2,3872},{0,25,1994},{0,25,1994},{0,16,2129},{12,3,3872},{0,16,2129},{29,5,9248},{5,63,3589},{1,55,1},{0,50,1885},{29,5,9248},{31,24,9248},{0,50,1885},{0,38,9256},{31,24,9248},{0,38,9256},{0,0,0},
+{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{6,63,65535},{2,63,24002},{0,57,109},{0,56,2154},{6,63,65535},{1,63,38780},{0,55,8815},{0,48,30807},{0,57,65535},{0,44,54965},{6,63,14345},{2,63,4766},{1,58,54},{0,52,1670},{20,5,26744},{0,55,17059},{0,46,7005},{0,33,17609},{31,6,26744},{0,33,17609},{0,63,5},{0,63,5},{0,63,5},
+{0,38,4},{0,32,4420},{0,29,2210},{0,29,2210},{0,18,2378},{0,17,5115},{0,16,2981},{0,63,5},{0,63,5},{0,63,5},{0,38,4},{9,2,4418},{0,29,2210},{0,29,2210},{0,18,2378},{16,0,4418},{0,18,2378},{30,7,9248},{8,63,3904},{2,57,1},{0,52,1666},{30,7,9248},{28,31,9248},{0,52,1666},{0,40,9256},{28,31,9248},{0,40,9256},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,1,1},
+{0,1,1},{0,1,1},{0,1,0},{0,1,1},{0,1,1},{7,63,65535},{3,63,29032},{0,60,314},{0,58,1989},{6,63,65535},{2,63,42151},{0,57,7781},{0,50,30102},{0,59,65535},{0,46,56345},{7,63,14819},{3,63,5416},{2,60,54},{0,54,1565},{23,0,26744},{0,59,16547},{0,49,6177},{0,37,17105},{31,9,26744},{0,37,17105},{1,63,84},{1,63,84},{1,63,84},{1,40,68},{0,38,4418},{0,31,1972},{0,31,1972},
+{0,20,2129},{0,19,5364},{0,18,2915},{1,63,20},{1,63,20},{1,63,20},{1,40,4},{11,1,4418},{0,31,1972},{0,31,1972},{0,20,2129},{17,2,4418},{0,20,2129},{31,9,9248},{8,63,4160},{3,59,1},{0,54,1465},{31,9,9248},{28,34,9248},{0,54,1465},{0,42,9256},{28,34,9248},{0,42,9256},{1,0,68},{1,0,68},{1,0,68},{1,0,68},{0,7,0},{0,7,0},{0,7,0},{0,4,0},{0,3,25},
+{0,3,25},{7,63,65535},{3,63,35719},{1,62,657},{0,60,1985},{7,63,65535},{2,63,46660},{0,59,6696},{0,52,29368},{0,63,65535},{0,49,58301},{9,63,15473},{5,63,6173},{3,62,45},{1,56,1566},{23,6,26744},{0,63,15992},{0,53,5318},{0,39,16547},{30,14,26744},{0,39,16547},{2,63,329},{2,63,329},{2,63,329},{1,44,186},{0,44,4418},{0,36,1709},{0,36,1709},{0,22,1872},{0,23,5702},{0,20,2885},{3,63,34},
+{3,63,34},{3,63,34},{2,42,10},{13,0,4418},{0,36,1709},{0,36,1709},{0,22,1872},{20,2,4418},{0,22,1872},{31,15,9248},{10,63,4570},{4,61,4},{0,56,1268},{31,15,9248},{31,34,9248},{0,56,1268},{0,45,9250},{31,34,9248},{0,45,9250},{1,0,185},{1,0,185},{1,0,185},{1,0,185},{0,13,0},{0,13,0},{0,13,0},{0,8,1},{0,6,97},{0,6,97},{7,63,65535},{4,63,40786},{1,63,1122},
+{0,62,2034},{7,63,65535},{2,63,49800},{0,61,5634},{0,54,27965},{0,63,65535},{0,49,58553},{10,63,15531},{6,63,6593},{4,63,61},{2,58,1482},{25,4,26259},{0,63,15284},{0,55,4484},{0,41,15722},{28,19,26259},{0,41,15722},{2,63,633},{2,63,633},{2,63,633},{2,46,378},{0,49,4418},{0,40,1489},{0,40,1489},{0,24,1665},{0,25,6029},{0,22,2897},{4,63,61},{4,63,61},{4,63,61},{3,44,10},{14,2,4418},
+{0,40,1489},{0,40,1489},{0,24,1665},{24,0,4418},{0,24,1665},{31,19,8978},{11,63,4744},{5,63,0},{0,58,1025},{31,19,8978},{30,38,8978},{0,58,1025},{0,46,8986},{30,38,8978},{0,46,8986},{2,0,377},{2,0,377},{2,0,377},{2,0,377},{0,18,0},{0,18,0},{0,18,0},{0,11,0},{0,8,193},{0,8,193},{8,63,65535},{4,63,40898},{1,63,1890},{1,62,2029},{7,63,65535},{3,63,47871},{0,61,4194},
+{0,56,24760},{0,63,65535},{0,49,55881},{11,63,14325},{8,63,6051},{5,63,100},{3,58,1197},{27,1,24371},{0,63,13716},{0,57,3402},{0,41,13914},{30,19,24371},{0,41,13914},{3,63,1058},{3,63,1058},{3,63,1058},{2,49,618},{0,54,4418},{0,44,1285},{0,44,1285},{0,26,1476},{0,27,6410},{0,25,2937},{5,63,100},{5,63,100},{5,63,100},{4,46,5},{16,0,4418},{0,44,1285},{0,44,1285},{0,26,1476},{26,1,4418},
+{0,26,1476},{31,22,7938},{13,63,4225},{7,63,4},{0,60,628},{31,22,7938},{28,42,7938},{0,60,628},{0,47,7946},{28,42,7938},{0,47,7946},{2,0,617},{2,0,617},{2,0,617},{2,0,617},{0,23,1},{0,23,1},{0,23,1},{0,14,0},{0,11,320},{0,11,320},{8,63,65535},{4,63,41266},{1,63,2914},{1,62,2109},{7,63,65535},{3,63,46175},{0,61,3010},{0,56,21624},{0,63,65535},{0,51,53461},{12,63,13140},
+{8,63,5571},{6,63,157},{4,59,932},{28,1,22568},{0,63,12404},{0,57,2474},{0,43,12155},{30,21,22568},{0,43,12155},{4,63,1630},{4,63,1630},{4,63,1630},{3,51,938},{0,59,4420},{0,46,1117},{0,46,1117},{0,30,1280},{0,31,6822},{0,27,3009},{6,63,157},{6,63,157},{6,63,157},{5,48,4},{17,2,4418},{0,46,1117},{0,46,1117},{0,30,1280},{27,3,4418},{0,30,1280},{30,28,6962},{14,63,3709},{8,63,1},
+{0,60,340},{30,28,6962},{31,40,6962},{0,60,340},{0,48,6964},{31,40,6962},{0,48,6964},{3,0,937},{3,0,937},{3,0,937},{3,0,937},{0,29,0},{0,29,0},{0,29,0},{0,17,0},{0,13,482},{0,13,482},{9,63,65535},{5,63,41956},{2,63,4257},{1,62,2505},{7,63,65535},{3,63,44573},{0,62,1944},{0,56,18402},{0,63,65535},{0,51,50815},{13,63,11930},{10,63,5125},{7,63,250},{5,59,701},{28,5,20642},
+{1,63,11209},{0,59,1634},{0,45,10346},{31,22,20642},{0,45,10346},{4,63,2350},{4,63,2350},{4,63,2350},{3,54,1361},{0,63,4450},{0,51,914},{0,51,914},{0,32,1097},{0,34,7357},{0,29,3131},{7,63,250},{7,63,250},{7,63,250},{6,50,10},{19,1,4418},{0,51,914},{0,51,914},{0,32,1097},{29,4,4418},{0,32,1097},{31,27,5941},{15,63,3176},{10,63,0},{0,62,116},{31,27,5941},{31,42,5941},{0,62,116},
+{0,49,5945},{31,42,5941},{0,49,5945},{3,0,1360},{3,0,1360},{3,0,1360},{3,0,1360},{0,34,1},{0,34,1},{0,34,1},{0,21,1},{0,17,706},{0,17,706},{9,63,65535},{5,63,42660},{2,63,5617},{1,63,3088},{8,63,65535},{3,63,43421},{0,62,1240},{0,56,15810},{0,63,65535},{0,51,48735},{13,63,10922},{11,63,4753},{8,63,360},{6,61,509},{29,5,19021},{2,63,10246},{0,61,1088},{0,47,8885},{31,24,19021},
+{0,47,8885},{5,63,3131},{5,63,3131},{5,63,3131},{4,56,1818},{1,63,4580},{0,55,754},{0,55,754},{0,35,928},{0,36,7846},{0,31,3281},{8,63,360},{8,63,360},{8,63,360},{7,52,10},{20,3,4418},{0,55,754},{0,55,754},{0,35,928},{30,6,4418},{0,35,928},{31,30,5101},{17,63,2777},{11,63,9},{0,62,20},{31,30,5101},{31,43,5101},{0,62,20},{0,50,5105},{31,43,5101},{0,50,5105},{4,0,1818},
+{4,0,1818},{4,0,1818},{4,0,1818},{0,40,0},{0,40,0},{0,40,0},{0,24,0},{0,17,914},{0,17,914},{9,63,65535},{5,63,43620},{2,63,7233},{1,63,3920},{8,63,65535},{3,63,42525},{0,63,738},{0,58,13413},{0,63,65535},{0,51,46911},{14,63,9978},{11,63,4449},{10,63,452},{8,60,344},{31,2,17485},{3,63,9369},{0,61,704},{0,48,7498},{29,29,17485},{0,48,7498},{6,63,4058},{6,63,4058},{6,63,4058},
+{4,59,2315},{2,63,4874},{0,59,610},{0,59,610},{0,37,769},{0,38,8389},{0,32,3497},{10,63,452},{10,63,452},{10,63,452},{8,54,5},{21,5,4418},{0,59,610},{0,59,610},{0,37,769},{31,8,4418},{0,37,769},{31,32,4325},{18,63,2357},{13,63,0},{1,63,0},{31,32,4325},{31,45,4325},{1,63,0},{0,51,4329},{31,45,4325},{0,51,4329},{4,0,2314},{4,0,2314},{4,0,2314},{4,0,2314},{0,45,0},
+{0,45,0},{0,45,0},{0,27,0},{0,19,1184},{0,19,1184},{9,63,65535},{5,63,44836},{2,63,9105},{2,63,4905},{8,63,65535},{3,63,41885},{0,63,482},{0,58,11125},{0,63,65535},{0,51,45343},{15,63,9102},{13,63,4161},{11,63,557},{9,61,212},{30,9,16034},{5,63,8602},{0,63,482},{0,48,6250},{29,31,16034},{0,48,6250},{6,63,5066},{6,63,5066},{6,63,5066},{5,61,2907},{2,63,5322},{0,63,482},{0,63,482},
+{0,39,628},{0,42,8965},{0,36,3717},{11,63,557},{11,63,557},{11,63,557},{9,56,5},{24,0,4418},{0,63,482},{0,63,482},{0,39,628},{31,11,4418},{0,39,628},{31,35,3613},{20,63,1940},{15,63,4},{4,63,1},{31,35,3613},{28,50,3613},{4,63,1},{0,52,3617},{28,50,3613},{0,52,3617},{5,0,2906},{5,0,2906},{5,0,2906},{5,0,2906},{0,50,0},{0,50,0},{0,50,0},{0,30,1},{0,23,1480},
+{0,23,1480},{9,63,65535},{5,63,46510},{3,63,11362},{2,63,6237},{9,63,65535},{3,63,41471},{0,63,500},{0,58,8857},{0,63,65535},{0,53,43697},{16,63,8139},{14,63,3853},{12,63,680},{10,62,89},{30,13,14504},{8,63,7667},{0,63,500},{0,50,4961},{31,31,14504},{0,50,4961},{7,63,6337},{7,63,6337},{7,63,6337},{5,63,3642},{3,63,5962},{0,63,500},{0,63,500},{0,41,493},{0,44,9656},{0,38,3995},{12,63,680},
+{12,63,680},{12,63,680},{10,59,10},{24,6,4418},{0,63,500},{0,63,500},{0,41,493},{29,17,4418},{0,41,493},{31,38,2888},{20,63,1517},{16,63,1},{7,63,1},{31,38,2888},{31,48,2888},{7,63,1},{0,53,2896},{31,48,2888},{0,53,2896},{5,0,3617},{5,0,3617},{5,0,3617},{5,0,3617},{0,56,0},{0,56,0},{0,56,0},{0,34,1},{0,25,1853},{0,25,1853},{10,63,65535},{6,63,48082},{3,63,13570},
+{2,63,7693},{9,63,65535},{3,63,41375},{0,63,788},{0,58,7113},{0,63,65535},{0,53,42465},{17,63,7409},{15,63,3625},{13,63,821},{11,62,34},{31,13,13235},{8,63,6899},{2,63,628},{0,52,3956},{30,34,13235},{0,52,3956},{7,63,7681},{7,63,7681},{7,63,7681},{6,63,4437},{4,63,6659},{1,63,738},{1,63,738},{0,43,394},{0,46,10331},{0,40,4289},{13,63,821},{13,63,821},{13,63,821},{11,61,10},{27,1,4418},
+{2,63,628},{2,63,628},{0,43,394},{30,19,4418},{0,43,394},{31,41,2312},{21,63,1217},{18,63,1},{9,63,0},{31,41,2312},{29,52,2312},{9,63,0},{0,54,2320},{29,52,2312},{0,54,2320},{6,0,4337},{6,0,4337},{6,0,4337},{6,0,4337},{0,61,1},{0,61,1},{0,61,1},{0,37,0},{0,27,2225},{0,27,2225},{10,63,65535},{6,63,49890},{3,63,16034},{2,63,9405},{9,63,65535},{4,63,41526},{0,63,1332},
+{0,59,5520},{0,63,65535},{0,53,41489},{18,63,6747},{16,63,3459},{14,63,980},{12,63,5},{30,20,12051},{10,63,6275},{4,63,801},{0,53,3089},{31,35,12051},{0,53,3089},{8,63,9062},{8,63,9062},{8,63,9062},{7,63,5410},{4,63,7555},{1,63,1154},{1,63,1154},{0,46,306},{0,49,11046},{0,42,4619},{14,63,980},{14,63,980},{14,63,980},{12,62,5},{28,3,4418},{4,63,801},{4,63,801},{0,46,306},{31,21,4418},
+{0,46,306},{30,47,1800},{23,63,949},{19,63,4},{12,63,1},{30,47,1800},{31,51,1800},{12,63,1},{0,55,1808},{31,51,1800},{0,55,1808},{6,0,5105},{6,0,5105},{6,0,5105},{6,0,5105},{0,63,36},{0,63,36},{0,63,36},{0,40,0},{0,29,2633},{0,29,2633},{10,63,65535},{6,63,51954},{3,63,18754},{3,63,11330},{9,63,65535},{4,63,41798},{1,63,2082},{0,60,4084},{0,63,65535},{0,53,40769},{19,63,6153},
+{17,63,3297},{16,63,1154},{13,63,20},{31,20,10952},{11,63,5708},{6,63,965},{0,54,2281},{31,37,10952},{0,54,2281},{9,63,10545},{9,63,10545},{9,63,10545},{7,63,6482},{5,63,8549},{2,63,1716},{2,63,1716},{0,48,208},{0,53,11786},{0,44,4985},{16,63,1154},{16,63,1154},{16,63,1154},{13,63,20},{29,5,4418},{6,63,965},{6,63,965},{0,48,208},{31,24,4418},{0,48,208},{31,46,1352},{23,63,725},{21,63,0},
+{14,63,1},{31,46,1352},{30,54,1352},{14,63,1},{0,56,1360},{30,54,1352},{0,56,1360},{7,0,5953},{7,0,5953},{7,0,5953},{7,0,5953},{1,63,145},{1,63,145},{1,63,145},{0,43,1},{0,31,3077},{0,31,3077},{10,63,65535},{6,63,54582},{4,63,21886},{3,63,13652},{9,63,65535},{4,63,42410},{1,63,3144},{0,60,2770},{0,63,65535},{0,55,40127},{19,63,5649},{18,63,3157},{17,63,1325},{15,63,74},{31,24,9818},
+{13,63,5241},{8,63,1108},{0,56,1538},{29,42,9818},{0,56,1538},{10,63,12376},{10,63,12376},{10,63,12376},{8,63,7844},{6,63,9861},{3,63,2576},{3,63,2576},{0,50,145},{0,55,12659},{0,46,5441},{17,63,1325},{17,63,1325},{17,63,1325},{15,63,74},{31,4,4418},{8,63,1108},{8,63,1108},{0,50,145},{30,29,4418},{0,50,145},{31,49,925},{25,63,505},{23,63,1},{17,63,1},{31,49,925},{30,56,925},{17,63,1},
+{0,58,929},{30,56,925},{0,58,929},{7,0,6970},{7,0,6970},{7,0,6970},{7,0,6970},{1,63,388},{1,63,388},{1,63,388},{0,47,0},{0,34,3625},{0,34,3625},{10,63,65535},{7,63,57052},{4,63,24910},{3,63,15988},{9,63,65535},{4,63,43226},{1,63,4360},{0,61,1833},{0,63,65535},{0,55,39743},{21,63,5202},{19,63,3073},{18,63,1508},{16,63,180},{31,27,8901},{14,63,4814},{10,63,1300},{0,57,1021},{29,44,8901},
+{0,57,1021},{10,63,14136},{10,63,14136},{10,63,14136},{8,63,9252},{7,63,11195},{3,63,3536},{3,63,3536},{0,53,89},{0,59,13491},{0,49,5921},{18,63,1508},{18,63,1508},{18,63,1508},{16,63,180},{30,13,4418},{10,63,1300},{10,63,1300},{0,53,89},{31,31,4418},{0,53,89},{31,51,613},{26,63,337},{24,63,1},{20,63,1},{31,51,613},{31,56,613},{20,63,1},{0,59,617},{31,56,613},{0,59,617},{8,0,7956},
+{8,0,7956},{8,0,7956},{8,0,7956},{2,63,697},{2,63,697},{2,63,697},{0,50,0},{0,36,4141},{0,36,4141},{11,63,65535},{7,63,59708},{4,63,28190},{3,63,18580},{10,63,65535},{5,63,44295},{1,63,5832},{0,61,1081},{0,63,65535},{0,55,39615},{22,63,4818},{20,63,3017},{19,63,1709},{17,63,325},{31,31,8069},{15,63,4473},{11,63,1514},{0,58,593},{31,44,8069},{0,58,593},{11,63,15965},{11,63,15965},{11,63,15965},
+{9,63,10757},{7,63,12667},{4,63,4662},{4,63,4662},{0,55,50},{0,61,14340},{0,51,6395},{19,63,1709},{19,63,1709},{19,63,1709},{17,63,325},{31,15,4418},{11,63,1514},{11,63,1514},{0,55,50},{31,34,4418},{0,55,50},{31,54,365},{27,63,205},{26,63,1},{22,63,1},{31,54,365},{31,58,365},{22,63,1},{0,60,369},{31,58,365},{0,60,369},{8,0,8980},{8,0,8980},{8,0,8980},{8,0,8980},{2,63,1097},
+{2,63,1097},{2,63,1097},{0,53,0},{0,40,4689},{0,40,4689},{11,63,65535},{8,63,58981},{5,63,29551},{4,63,19751},{10,63,65535},{5,63,43215},{2,63,6910},{1,62,614},{0,63,65535},{0,57,34909},{23,63,4502},{21,63,3011},{20,63,1973},{18,63,520},{31,34,7322},{17,63,4242},{13,63,1769},{0,60,274},{31,46,7322},{0,60,274},{12,63,16739},{12,63,16739},{12,63,16739},{10,63,11492},{8,63,13636},{5,63,5510},{5,63,5510},
+{0,58,53},{0,63,14139},{0,53,5981},{20,63,1973},{20,63,1973},{20,63,1973},{18,63,520},{31,20,4418},{13,63,1769},{13,63,1769},{0,58,17},{31,37,4418},{0,58,17},{31,57,181},{28,63,97},{27,63,4},{25,63,0},{31,57,181},{31,59,181},{25,63,0},{0,61,185},{31,59,181},{0,61,185},{9,0,9248},{9,0,9248},{9,0,9248},{9,0,9248},{3,63,1348},{3,63,1348},{3,63,1348},{1,55,4},{0,42,4545},
+{0,42,4545},{12,63,65535},{9,63,57270},{6,63,30345},{5,63,20521},{11,63,65535},{6,63,41449},{3,63,8015},{2,62,242},{0,63,65535},{0,57,28330},{24,63,4181},{22,63,3053},{21,63,2248},{20,63,772},{31,38,6584},{20,63,3941},{15,63,2041},{0,61,77},{31,48,6584},{0,61,77},{13,63,17289},{13,63,17289},{13,63,17289},{11,63,12050},{10,63,14315},{7,63,6389},{7,63,6389},{2,60,41},{0,63,13860},{0,55,5252},{21,63,2248},
+{21,63,2248},{21,63,2248},{20,63,772},{31,26,4418},{15,63,2041},{15,63,2041},{0,60,4},{30,42,4418},{0,60,4},{31,60,50},{30,63,34},{29,63,0},{28,63,1},{31,60,50},{31,61,50},{28,63,1},{0,62,52},{31,61,50},{0,62,52},{10,0,9250},{10,0,9250},{10,0,9250},{10,0,9250},{4,63,1549},{4,63,1549},{4,63,1549},{2,57,2},{0,46,4141},{0,46,4141},{13,63,65535},{9,63,55894},{7,63,31068},
+{6,63,21256},{12,63,65535},{8,63,39740},{4,63,9073},{3,63,90},{0,63,65535},{0,59,23356},{24,63,3973},{23,63,3125},{23,63,2500},{21,63,1037},{30,45,6019},{20,63,3701},{17,63,2340},{0,63,4},{31,50,6019},{0,63,4},{14,63,17796},{14,63,17796},{14,63,17796},{12,63,12625},{11,63,14957},{8,63,7139},{8,63,7139},{3,62,41},{0,63,14020},{0,59,4652},{23,63,2500},{23,63,2500},{23,63,2500},{21,63,1037},{31,31,4418},
+{17,63,2340},{17,63,2340},{1,62,4},{31,44,4418},{1,62,4},{31,62,4},{31,63,4},{31,63,4},{30,63,1},{31,62,4},{31,63,4},{30,63,1},{0,63,4},{31,63,4},{0,63,4},{11,0,9250},{11,0,9250},{11,0,9250},{11,0,9250},{6,63,1765},{6,63,1765},{6,63,1765},{3,59,2},{0,51,3816},{0,51,3816},{13,63,65535},{10,63,53236},{8,63,30487},{7,63,21105},{13,63,65535},{8,63,37332},{5,63,9177},
+{4,63,36},{1,63,65535},{0,59,18680},{25,63,3443},{24,63,2741},{23,63,2248},{22,63,980},{31,44,5163},{21,63,3218},{20,63,2117},{3,63,1},{29,54,5163},{3,63,1},{15,63,17289},{15,63,17289},{15,63,17289},{13,63,12512},{12,63,14328},{9,63,7149},{9,63,7149},{4,63,20},{0,63,13376},{0,59,3944},{23,63,2248},{23,63,2248},{23,63,2248},{22,63,980},{31,34,3872},{20,63,2117},{20,63,2117},{3,63,1},{31,46,3872},
+{3,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{12,0,9248},{12,0,9248},{12,0,9248},{12,0,9248},{7,63,1972},{7,63,1972},{7,63,1972},{4,61,5},{0,55,3488},{0,55,3488},{14,63,65535},{11,63,50266},{9,63,29322},{8,63,20567},{13,63,65535},{9,63,35025},{6,63,8985},{5,63,21},{2,63,65535},{0,59,14712},{26,63,2873},
+{25,63,2283},{24,63,1825},{22,63,820},{29,52,4267},{22,63,2657},{20,63,1685},{5,63,1},{28,56,4267},{5,63,1},{16,63,16427},{16,63,16427},{16,63,16427},{14,63,12185},{13,63,13442},{10,63,6915},{10,63,6915},{5,63,5},{1,63,12539},{0,61,3314},{24,63,1825},{24,63,1825},{24,63,1825},{22,63,820},{31,37,3200},{20,63,1685},{20,63,1685},{5,63,1},{27,52,3200},{5,63,1},{31,63,0},{31,63,0},{31,63,0},
+{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{13,0,9248},{13,0,9248},{13,0,9248},{13,0,9248},{8,63,2250},{8,63,2250},{8,63,2250},{5,63,5},{0,57,3170},{0,57,3170},{15,63,65535},{12,63,47239},{10,63,28065},{9,63,20104},{14,63,65535},{10,63,32574},{7,63,8839},{6,63,54},{3,63,64890},{0,61,10964},{26,63,2252},{25,63,1806},{25,63,1445},{23,63,650},{29,54,3361},
+{23,63,2091},{21,63,1322},{8,63,0},{29,56,3361},{8,63,0},{17,63,15584},{17,63,15584},{17,63,15584},{15,63,11846},{14,63,12522},{11,63,6697},{11,63,6697},{6,63,50},{3,63,11669},{0,63,2834},{25,63,1445},{25,63,1445},{25,63,1445},{23,63,650},{31,40,2521},{21,63,1322},{21,63,1322},{8,63,0},{31,49,2521},{8,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},
+{0,63,0},{31,63,0},{0,63,0},{14,0,9250},{14,0,9250},{14,0,9250},{14,0,9250},{9,63,2525},{9,63,2525},{9,63,2525},{6,63,50},{0,63,2834},{0,63,2834},{16,63,65535},{13,63,44559},{11,63,27000},{10,63,19705},{15,63,64179},{11,63,30525},{8,63,8677},{7,63,149},{3,63,60570},{0,61,8308},{27,63,1782},{26,63,1416},{25,63,1157},{24,63,520},{31,49,2646},{23,63,1691},{22,63,1040},{11,63,1},{30,56,2646},
+{11,63,1},{18,63,14889},{18,63,14889},{18,63,14889},{16,63,11585},{15,63,11778},{12,63,6555},{12,63,6555},{7,63,145},{3,63,11061},{0,63,2610},{25,63,1157},{25,63,1157},{25,63,1157},{24,63,520},{31,42,1985},{22,63,1040},{22,63,1040},{11,63,1},{30,52,1985},{11,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{15,0,9250},
+{15,0,9250},{15,0,9250},{15,0,9250},{10,63,2792},{10,63,2792},{10,63,2792},{7,63,145},{0,63,2610},{0,63,2610},{16,63,63318},{14,63,42019},{12,63,25930},{11,63,19324},{16,63,59178},{11,63,28845},{9,63,8605},{8,63,276},{6,63,56253},{0,61,6420},{27,63,1366},{27,63,1094},{26,63,872},{25,63,397},{31,51,2017},{25,63,1298},{23,63,794},{13,63,1},{31,56,2017},{13,63,1},{19,63,14244},{19,63,14244},{19,63,14244},
+{17,63,11312},{16,63,11037},{13,63,6429},{13,63,6429},{8,63,260},{6,63,10457},{0,63,2642},{26,63,872},{26,63,872},{26,63,872},{25,63,397},{31,45,1513},{23,63,794},{23,63,794},{13,63,1},{31,52,1513},{13,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{16,0,9248},{16,0,9248},{16,0,9248},{16,0,9248},{12,63,3074},
+{12,63,3074},{12,63,3074},{8,63,260},{0,63,2642},{0,63,2642},{17,63,58848},{15,63,39619},{13,63,24975},{12,63,19007},{16,63,54474},{13,63,27057},{10,63,8569},{9,63,461},{8,63,51302},{0,63,5046},{28,63,979},{27,63,806},{27,63,637},{26,63,292},{30,56,1473},{26,63,953},{24,63,605},{16,63,0},{30,58,1473},{16,63,0},{19,63,13604},{19,63,13604},{19,63,13604},{18,63,11057},{16,63,10429},{14,63,6339},{14,63,6339},
+{10,63,424},{8,63,9713},{1,63,2900},{27,63,637},{27,63,637},{27,63,637},{26,63,292},{31,48,1105},{24,63,605},{24,63,605},{16,63,0},{31,54,1105},{16,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{17,0,9248},{17,0,9248},{17,0,9248},{17,0,9248},{12,63,3330},{12,63,3330},{12,63,3330},{10,63,424},{1,63,2900},
+{1,63,2900},
diff --git a/libkram/transcoder/basisu_transcoder_tables_bc7_m5_alpha.inc b/libkram/transcoder/basisu_transcoder_tables_bc7_m5_alpha.inc
new file mode 100644
index 00000000..66698529
--- /dev/null
+++ b/libkram/transcoder/basisu_transcoder_tables_bc7_m5_alpha.inc
@@ -0,0 +1,49 @@
+{7,0,47},{7,0,44},{2,0,15},{2,0,12},{8,2,48},{0,0,0},{15,0,27},{15,1,24},{9,0,7},{15,1,24},{23,3,96},{6,0,3},{24,7,27},{24,7,24},{18,3,6},{18,5,4},{24,5,16},{21,1,6},{32,15,27},{32,15,24},{26,11,6},{26,13,4},{32,13,16},{22,3,1},{41,24,27},{41,24,24},{35,20,6},{35,22,4},{41,22,16},{31,12,1},{49,32,27},
+{49,32,24},{43,28,6},{43,30,4},{49,30,16},{39,20,1},{57,40,27},{57,40,24},{51,36,6},{51,38,4},{57,38,16},{47,28,1},{65,48,27},{65,48,24},{59,44,6},{59,46,4},{65,46,16},{55,36,1},{74,57,27},{74,57,24},{68,53,6},{68,55,4},{74,55,16},{64,45,1},{82,65,27},{82,65,24},{76,61,6},{76,63,4},{82,63,16},{72,53,1},{90,73,27},{90,73,24},{84,69,6},
+{84,71,4},{90,71,16},{80,61,1},{98,81,27},{98,81,24},{92,77,6},{92,79,4},{98,79,16},{88,69,1},{107,90,27},{107,90,24},{101,86,6},{101,88,4},{107,88,16},{97,78,1},{115,98,27},{115,98,24},{109,94,6},{109,96,4},{115,96,16},{105,86,1},{123,106,27},{123,106,24},{117,102,6},{117,104,4},{123,104,16},{113,94,1},{131,114,27},{131,114,24},{125,110,6},{125,112,4},{131,112,16},
+{121,102,1},{140,123,27},{140,123,24},{134,119,6},{134,121,4},{140,121,16},{130,111,1},{148,131,27},{148,131,24},{142,127,6},{142,129,4},{148,129,16},{138,119,1},{156,139,27},{156,139,24},{150,135,6},{150,137,4},{156,137,16},{146,127,1},{164,147,27},{164,147,24},{158,143,6},{158,145,4},{164,145,16},{154,135,1},{173,156,27},{173,156,24},{167,152,6},{167,154,4},{173,154,16},{163,144,1},{181,164,27},
+{181,164,24},{175,160,6},{175,162,4},{181,162,16},{171,152,1},{189,172,27},{189,172,24},{183,168,6},{183,170,4},{189,170,16},{179,160,1},{197,180,27},{197,180,24},{191,176,6},{191,178,4},{197,178,16},{187,168,1},{206,189,27},{206,189,24},{200,185,6},{200,187,4},{206,187,16},{196,177,1},{214,197,27},{214,197,24},{208,193,6},{208,195,4},{214,195,16},{204,185,1},{222,205,27},{222,205,24},{216,201,6},
+{216,203,4},{222,203,16},{212,193,1},{230,213,27},{230,213,24},{224,209,6},{224,211,4},{230,211,16},{220,201,1},{239,222,27},{239,222,24},{233,218,6},{233,220,4},{239,220,16},{229,210,1},{247,230,27},{247,230,24},{241,226,6},{241,228,4},{247,228,16},{237,218,1},{255,238,27},{255,238,24},{249,234,6},{249,236,4},{255,236,16},{245,226,1},{255,247,7},{255,248,4},{255,247,7},{255,248,4},{255,0,0},
+{253,234,1},{16,0,47},{16,0,44},{5,0,15},{5,0,12},{17,5,48},{0,0,0},{37,1,111},{36,2,108},{13,0,11},{13,3,12},{37,1,96},{3,0,3},{33,0,27},{33,0,24},{31,0,27},{31,1,24},{45,9,96},{11,0,3},{40,7,27},{41,8,24},{39,7,27},{29,0,4},{41,3,16},{19,1,2},{49,16,27},{50,17,24},{38,5,6},{38,6,4},{50,12,16},{40,4,6},{57,24,27},
+{58,25,24},{46,13,6},{46,14,4},{58,20,16},{36,0,1},{65,32,27},{66,33,24},{54,21,6},{54,22,4},{66,28,16},{44,6,1},{73,40,27},{74,41,24},{62,29,6},{62,30,4},{74,36,16},{52,14,1},{82,49,27},{83,50,24},{71,38,6},{71,39,4},{83,45,16},{61,23,1},{90,57,27},{91,58,24},{79,46,6},{79,47,4},{91,53,16},{69,31,1},{98,65,27},{99,66,24},{87,54,6},
+{87,55,4},{99,61,16},{77,39,1},{106,73,27},{107,74,24},{95,62,6},{95,63,4},{107,69,16},{85,47,1},{115,82,27},{116,83,24},{104,71,6},{104,72,4},{116,78,16},{94,56,1},{123,90,27},{124,91,24},{112,79,6},{112,80,4},{124,86,16},{102,64,1},{131,98,27},{132,99,24},{120,87,6},{120,88,4},{132,94,16},{110,72,1},{139,106,27},{140,107,24},{128,95,6},{128,96,4},{140,102,16},
+{118,80,1},{148,115,27},{149,116,24},{137,104,6},{137,105,4},{149,111,16},{127,89,1},{156,123,27},{157,124,24},{145,112,6},{145,113,4},{157,119,16},{135,97,1},{164,131,27},{165,132,24},{153,120,6},{153,121,4},{165,127,16},{143,105,1},{172,139,27},{173,140,24},{161,128,6},{161,129,4},{173,135,16},{151,113,1},{181,148,27},{182,149,24},{170,137,6},{170,138,4},{182,144,16},{160,122,1},{189,156,27},
+{190,157,24},{178,145,6},{178,146,4},{190,152,16},{168,130,1},{197,164,27},{198,165,24},{186,153,6},{186,154,4},{198,160,16},{176,138,1},{205,172,27},{206,173,24},{194,161,6},{194,162,4},{206,168,16},{184,146,1},{214,181,27},{215,182,24},{203,170,6},{203,171,4},{215,177,16},{193,155,1},{222,189,27},{223,190,24},{211,178,6},{211,179,4},{223,185,16},{201,163,1},{230,197,27},{231,198,24},{219,186,6},
+{219,187,4},{231,193,16},{209,171,1},{238,205,27},{239,206,24},{227,194,6},{227,195,4},{239,201,16},{217,179,1},{247,214,27},{248,215,24},{236,203,6},{236,204,4},{248,210,16},{226,188,1},{255,222,27},{255,222,24},{244,211,6},{244,212,4},{255,220,16},{234,196,1},{254,218,6},{255,242,28},{252,219,6},{252,220,4},{255,245,16},{242,204,1},{255,238,7},{255,239,4},{255,238,7},{255,239,4},{255,0,0},
+{250,212,1},{28,0,47},{28,0,44},{9,0,15},{9,0,12},{29,9,48},{0,0,0},{53,0,111},{53,0,108},{17,0,15},{17,0,12},{37,7,32},{0,0,0},{66,3,111},{63,7,108},{25,0,11},{25,7,12},{65,5,96},{7,0,3},{51,0,27},{71,15,108},{47,0,27},{33,6,8},{73,13,96},{15,0,3},{61,4,27},{80,24,108},{61,4,27},{60,6,24},{62,0,16},{24,4,3},{69,12,27},
+{88,32,108},{69,12,27},{68,14,24},{70,8,16},{32,2,2},{77,20,27},{96,40,108},{58,2,6},{58,2,4},{78,16,16},{60,0,6},{85,28,27},{104,48,108},{66,10,6},{66,10,4},{86,24,16},{68,8,6},{94,37,27},{113,57,108},{75,19,6},{75,19,4},{95,33,16},{77,17,6},{102,45,27},{121,65,108},{83,27,6},{83,27,4},{103,41,16},{65,3,1},{110,53,27},{129,73,108},{91,35,6},
+{91,35,4},{111,49,16},{73,11,1},{118,61,27},{137,81,108},{99,43,6},{99,43,4},{119,57,16},{81,19,1},{127,70,27},{146,90,108},{108,52,6},{108,52,4},{128,66,16},{90,28,1},{135,78,27},{154,98,108},{116,60,6},{116,60,4},{136,74,16},{98,36,1},{143,86,27},{162,106,108},{124,68,6},{124,68,4},{144,82,16},{106,44,1},{151,94,27},{170,114,108},{132,76,6},{132,76,4},{152,90,16},
+{114,52,1},{160,103,27},{179,123,108},{141,85,6},{141,85,4},{161,99,16},{123,61,1},{168,111,27},{187,131,108},{149,93,6},{149,93,4},{169,107,16},{131,69,1},{176,119,27},{195,139,108},{157,101,6},{157,101,4},{177,115,16},{139,77,1},{184,127,27},{203,147,108},{165,109,6},{165,109,4},{185,123,16},{147,85,1},{193,136,27},{212,156,108},{174,118,6},{174,118,4},{194,132,16},{156,94,1},{201,144,27},
+{220,164,108},{182,126,6},{182,126,4},{202,140,16},{164,102,1},{209,152,27},{228,172,108},{190,134,6},{190,134,4},{210,148,16},{172,110,1},{217,160,27},{236,180,108},{198,142,6},{198,142,4},{218,156,16},{180,118,1},{226,169,27},{245,189,108},{207,151,6},{207,151,4},{227,165,16},{189,127,1},{234,177,27},{253,197,108},{215,159,6},{215,159,4},{235,173,16},{197,135,1},{242,185,27},{243,186,24},{223,167,6},
+{223,167,4},{243,181,16},{205,143,1},{250,193,27},{251,194,24},{231,175,6},{231,175,4},{251,189,16},{213,151,1},{255,203,27},{255,206,24},{240,184,6},{240,184,4},{255,208,16},{222,160,1},{251,188,6},{255,230,28},{248,192,6},{248,192,4},{255,233,16},{230,168,1},{255,200,6},{255,202,4},{255,200,6},{255,202,4},{255,0,0},{238,176,1},{255,226,7},{255,227,4},{255,226,7},{255,227,4},{255,0,0},
+{246,184,1},{41,0,47},{41,0,44},{13,0,15},{13,0,12},{42,13,48},{0,0,0},{71,0,111},{71,0,108},{21,0,15},{21,0,12},{50,7,32},{0,0,0},{86,1,111},{85,2,108},{29,1,15},{29,3,12},{87,0,96},{3,0,3},{96,6,111},{93,10,108},{37,0,11},{37,11,12},{95,8,96},{11,0,3},{71,0,27},{102,19,108},{68,0,27},{46,7,8},{104,17,96},{20,0,3},{82,0,27},
+{110,27,108},{81,0,27},{80,2,24},{112,25,96},{28,0,3},{91,7,27},{118,35,108},{91,7,27},{88,10,24},{91,2,16},{36,7,3},{99,15,27},{126,43,108},{99,15,27},{96,18,24},{99,10,16},{44,1,2},{108,24,27},{135,52,108},{108,24,27},{79,0,4},{108,19,16},{53,10,2},{116,32,27},{143,60,108},{87,5,6},{87,7,4},{116,27,16},{90,3,6},{124,40,27},{151,68,108},{95,13,6},
+{95,15,4},{124,35,16},{98,11,6},{132,48,27},{159,76,108},{103,21,6},{103,23,4},{132,43,16},{106,19,6},{141,57,27},{168,85,108},{112,30,6},{112,32,4},{141,52,16},{115,28,6},{149,65,27},{176,93,108},{120,38,6},{120,40,4},{149,60,16},{94,5,1},{157,73,27},{184,101,108},{128,46,6},{128,48,4},{157,68,16},{102,13,1},{165,81,27},{192,109,108},{136,54,6},{136,56,4},{165,76,16},
+{110,21,1},{174,90,27},{201,118,108},{145,63,6},{145,65,4},{174,85,16},{119,30,1},{182,98,27},{209,126,108},{153,71,6},{153,73,4},{182,93,16},{127,38,1},{190,106,27},{217,134,108},{161,79,6},{161,81,4},{190,101,16},{135,46,1},{198,114,27},{225,142,108},{169,87,6},{169,89,4},{198,109,16},{143,54,1},{207,123,27},{234,151,108},{178,96,6},{178,98,4},{207,118,16},{152,63,1},{215,131,27},
+{242,159,108},{186,104,6},{186,106,4},{215,126,16},{160,71,1},{223,139,27},{250,167,108},{194,112,6},{194,114,4},{223,134,16},{168,79,1},{231,147,27},{231,147,24},{202,120,6},{202,122,4},{231,142,16},{176,87,1},{240,156,27},{240,156,24},{211,129,6},{211,131,4},{240,151,16},{185,96,1},{248,164,27},{248,164,24},{219,137,6},{219,139,4},{248,159,16},{193,104,1},{254,173,27},{255,174,24},{227,145,6},
+{227,147,4},{255,169,16},{201,112,1},{255,182,27},{255,187,24},{235,153,6},{235,155,4},{255,193,16},{209,120,1},{249,159,6},{255,218,28},{244,162,6},{244,164,4},{255,220,16},{218,129,1},{254,169,6},{254,168,4},{252,170,6},{252,172,4},{255,245,16},{226,137,1},{255,184,6},{255,190,4},{255,184,6},{255,190,4},{255,0,0},{234,145,1},{255,213,7},{255,214,4},{255,213,7},{255,214,4},{255,0,0},
+{242,153,1},{59,0,47},{59,0,44},{18,0,15},{18,0,12},{60,18,48},{0,0,0},{69,0,47},{68,2,44},{26,0,15},{26,0,12},{68,5,32},{0,0,0},{111,0,111},{111,0,108},{34,0,15},{34,0,12},{76,13,32},{0,0,0},{123,3,111},{122,5,108},{42,3,15},{42,6,12},{125,1,96},{6,0,3},{135,8,111},{131,14,108},{50,0,11},{51,15,12},{134,10,96},{15,0,3},{95,0,27},
+{139,22,108},{59,2,11},{59,5,8},{142,18,96},{23,0,3},{105,0,27},{147,30,108},{99,0,27},{67,13,8},{150,26,96},{31,0,3},{115,0,27},{155,38,108},{113,1,27},{110,4,24},{158,34,96},{39,0,3},{125,7,27},{164,47,108},{125,7,27},{119,13,24},{167,43,96},{48,6,3},{133,15,27},{172,55,108},{133,15,27},{127,21,24},{134,5,16},{56,14,3},{141,23,27},{180,63,108},{141,23,27},
+{135,29,24},{142,13,16},{64,1,2},{149,31,27},{188,71,108},{149,31,27},{143,37,24},{150,21,16},{72,9,2},{158,40,27},{197,80,108},{118,1,6},{117,6,4},{159,30,16},{120,0,6},{166,48,27},{205,88,108},{126,9,6},{125,14,4},{167,38,16},{130,6,6},{174,56,27},{213,96,108},{134,17,6},{133,22,4},{175,46,16},{138,14,6},{182,64,27},{221,104,108},{142,25,6},{141,30,4},{183,54,16},
+{146,22,6},{191,73,27},{230,113,108},{151,34,6},{150,39,4},{192,63,16},{155,31,6},{199,81,27},{238,121,108},{159,42,6},{158,47,4},{200,71,16},{163,39,6},{207,89,27},{246,129,108},{167,50,6},{166,55,4},{208,79,16},{130,1,1},{215,97,27},{254,137,108},{175,58,6},{174,63,4},{216,87,16},{138,9,1},{224,106,27},{224,106,24},{184,67,6},{183,72,4},{225,96,16},{147,18,1},{232,114,27},
+{232,114,24},{192,75,6},{191,80,4},{233,104,16},{155,26,1},{240,122,27},{240,122,24},{200,83,6},{199,88,4},{241,112,16},{163,34,1},{248,130,27},{248,130,24},{208,91,6},{207,96,4},{249,120,16},{171,42,1},{255,140,27},{254,142,24},{217,100,6},{216,105,4},{255,135,16},{180,51,1},{255,149,27},{255,156,24},{225,108,6},{224,113,4},{255,159,16},{188,59,1},{255,159,27},{253,196,28},{233,116,6},
+{232,121,4},{255,184,16},{196,67,1},{247,120,6},{255,205,28},{241,124,6},{240,129,4},{255,208,16},{204,75,1},{252,132,6},{252,132,4},{250,133,6},{249,138,4},{255,236,16},{213,84,1},{255,144,6},{255,150,4},{255,144,6},{255,150,4},{255,0,0},{221,92,1},{254,185,7},{255,175,4},{253,187,7},{255,175,4},{255,0,0},{229,100,1},{255,196,7},{255,199,4},{255,196,7},{255,199,4},{255,0,0},
+{237,108,1},{80,0,47},{80,0,44},{24,0,15},{24,0,12},{80,24,48},{0,0,0},{88,1,47},{88,1,44},{32,0,15},{32,0,12},{88,4,32},{0,0,0},{138,0,111},{138,0,108},{40,0,15},{40,0,12},{96,12,32},{0,0,0},{153,0,111},{153,0,108},{48,0,15},{48,0,12},{104,20,32},{0,0,0},{166,4,111},{164,8,108},{57,4,15},{57,9,12},{167,3,96},{9,0,3},{176,9,111},
+{172,16,108},{65,0,11},{65,17,12},{175,11,96},{17,0,3},{189,13,111},{180,24,108},{73,0,11},{73,1,8},{183,19,96},{25,0,3},{129,0,27},{188,32,108},{83,2,11},{81,9,8},{191,27,96},{33,0,3},{141,0,27},{197,41,108},{132,0,27},{90,18,8},{200,36,96},{42,0,3},{151,0,27},{205,49,108},{148,0,27},{145,3,24},{208,44,96},{50,0,3},{160,3,27},{213,57,108},{160,3,27},
+{153,11,24},{216,52,96},{58,2,3},{168,11,27},{221,65,108},{168,11,27},{161,19,24},{170,0,16},{66,10,3},{177,20,27},{230,74,108},{177,20,27},{170,28,24},{179,7,16},{75,19,3},{185,28,27},{238,82,108},{185,28,27},{178,36,24},{187,15,16},{83,0,2},{193,36,27},{246,90,108},{193,36,27},{186,44,24},{195,23,16},{91,7,2},{201,44,27},{254,98,108},{201,44,27},{147,0,4},{203,31,16},
+{99,15,2},{210,53,27},{211,54,24},{157,1,6},{156,9,4},{212,40,16},{108,24,2},{218,61,27},{219,62,24},{165,9,6},{164,17,4},{220,48,16},{170,6,6},{226,69,27},{227,70,24},{173,17,6},{172,25,4},{228,56,16},{178,14,6},{234,77,27},{235,78,24},{181,25,6},{180,33,4},{236,64,16},{186,22,6},{243,86,27},{244,87,24},{190,34,6},{189,42,4},{245,73,16},{195,31,6},{251,94,27},
+{252,95,24},{198,42,6},{197,50,4},{253,81,16},{203,39,6},{255,104,27},{255,107,24},{206,50,6},{205,58,4},{255,102,16},{211,47,6},{255,113,27},{255,123,24},{214,58,6},{213,66,4},{255,126,16},{219,55,6},{255,126,27},{253,172,28},{223,67,6},{222,75,4},{255,153,16},{174,2,1},{242,66,6},{254,182,28},{231,75,6},{230,83,4},{255,178,16},{182,10,1},{246,79,6},{255,190,28},{239,83,6},
+{238,91,4},{255,202,16},{190,18,1},{251,89,6},{251,88,4},{247,91,6},{246,99,4},{255,227,16},{198,26,1},{255,102,6},{255,108,4},{255,102,6},{255,108,4},{255,0,0},{207,35,1},{255,117,6},{255,132,4},{255,117,6},{255,132,4},{255,0,0},{215,43,1},{254,167,7},{255,156,4},{254,167,7},{255,156,4},{255,0,0},{223,51,1},{255,175,7},{255,181,4},{255,175,7},{255,181,4},{255,0,0},
+{231,59,1},{105,0,47},{105,0,44},{33,0,15},{33,0,12},{106,33,48},{0,0,0},{115,0,47},{114,2,44},{41,0,15},{41,0,12},{114,5,32},{0,0,0},{123,2,47},{124,3,44},{49,0,15},{49,0,12},{122,13,32},{0,0,0},{190,0,111},{190,0,108},{57,0,15},{57,0,12},{130,21,32},{0,0,0},{205,0,111},{205,0,108},{66,0,15},{66,0,12},{139,30,32},{0,0,0},{217,4,111},
+{215,7,108},{74,4,15},{74,8,12},{218,3,96},{8,0,3},{229,8,111},{223,15,108},{80,0,11},{82,16,12},{226,11,96},{16,0,3},{237,13,111},{231,23,108},{89,0,11},{90,24,12},{234,19,96},{24,0,3},{251,17,111},{240,32,108},{99,0,11},{99,1,8},{243,28,96},{33,0,3},{169,0,27},{248,40,108},{108,2,11},{107,9,8},{251,36,96},{41,0,3},{181,0,27},{254,49,108},{166,0,27},
+{115,17,8},{188,79,32},{49,0,3},{190,0,27},{255,59,108},{181,0,27},{123,25,8},{196,87,32},{57,0,3},{203,0,27},{203,0,24},{197,0,27},{196,2,24},{205,96,32},{66,0,3},{213,1,27},{212,3,24},{211,2,27},{204,10,24},{213,104,32},{74,1,3},{221,9,27},{220,11,24},{219,10,27},{212,18,24},{221,0,16},{82,9,3},{229,17,27},{228,19,24},{227,18,27},{220,26,24},{229,5,16},
+{90,17,3},{238,26,27},{237,28,24},{236,27,27},{229,35,24},{238,14,16},{99,26,3},{246,34,27},{245,36,24},{244,35,27},{237,43,24},{246,22,16},{107,34,3},{254,42,27},{253,44,24},{252,43,27},{245,51,24},{254,30,16},{115,6,2},{255,52,27},{255,58,24},{255,52,27},{253,59,24},{255,53,16},{123,14,2},{255,64,27},{255,74,24},{196,0,6},{198,100,8},{255,80,16},{132,23,2},{255,74,27},
+{255,89,24},{206,1,6},{206,4,4},{255,105,16},{140,31,2},{255,86,27},{253,147,28},{215,7,6},{214,12,4},{255,129,16},{219,4,6},{255,95,27},{255,155,28},{223,15,6},{222,20,4},{255,153,16},{227,12,6},{242,17,6},{255,166,28},{232,24,6},{231,29,4},{255,181,16},{236,21,6},{247,26,6},{255,175,28},{240,32,6},{239,37,4},{255,205,16},{244,29,6},{251,38,6},{251,37,4},{248,40,6},
+{247,45,4},{255,230,16},{252,37,6},{255,50,6},{255,53,4},{255,50,6},{255,53,4},{255,0,0},{189,80,2},{255,65,6},{255,80,4},{255,65,6},{255,80,4},{255,0,0},{198,89,2},{252,131,7},{255,105,4},{252,131,7},{255,105,4},{255,0,0},{206,97,2},{254,139,7},{255,129,4},{253,141,7},{255,129,4},{255,0,0},{214,105,2},{255,150,7},{255,153,4},{255,150,7},{255,153,4},{255,0,0},
+{222,0,1},{181,0,47},{178,0,44},{47,0,15},{47,0,12},{183,47,48},{0,0,0},{190,0,47},{190,0,44},{55,0,15},{55,0,12},{191,55,48},{0,0,0},{199,0,47},{199,0,44},{63,0,15},{63,0,12},{199,63,48},{0,0,0},{207,0,47},{207,1,44},{71,0,15},{71,0,12},{207,4,32},{0,0,0},{216,1,47},{218,3,44},{80,0,15},{80,0,12},{216,13,32},{0,0,0},{225,1,47},
+{227,6,44},{88,0,15},{88,0,12},{224,21,32},{0,0,0},{233,2,47},{235,11,44},{96,1,15},{96,2,12},{232,29,32},{2,0,3},{241,4,47},{243,19,44},{104,5,15},{104,10,12},{240,37,32},{10,0,3},{251,5,47},{252,28,44},{113,9,15},{113,19,12},{249,46,32},{19,0,3},{255,9,47},{255,37,44},{117,0,11},{121,27,12},{255,55,32},{27,0,3},{255,9,47},{255,46,44},{126,0,11},
+{129,35,12},{255,67,32},{35,0,3},{248,0,27},{255,55,44},{137,0,11},{137,43,12},{255,79,32},{43,0,3},{250,0,27},{255,64,44},{147,2,11},{146,6,8},{255,93,32},{52,0,3},{251,0,27},{255,73,44},{155,4,11},{154,14,8},{255,104,32},{60,0,3},{253,0,27},{248,0,24},{233,0,27},{162,22,8},{255,116,32},{68,0,3},{254,0,27},{254,0,24},{248,0,27},{170,30,8},{255,128,32},
+{76,0,3},{255,1,27},{255,7,24},{255,1,27},{179,39,8},{255,22,16},{85,0,3},{255,2,27},{255,22,24},{255,7,27},{187,47,8},{255,47,16},{93,0,3},{255,4,27},{251,100,28},{182,0,7},{195,55,8},{255,71,16},{101,0,3},{255,4,27},{253,108,28},{191,0,7},{203,63,8},{255,95,16},{109,0,3},{255,7,27},{255,118,28},{200,0,7},{212,72,8},{255,123,16},{118,0,3},{246,0,7},
+{255,129,28},{209,0,7},{220,80,8},{255,147,16},{126,0,3},{246,0,7},{255,138,28},{218,0,7},{228,88,8},{255,172,16},{134,0,3},{249,3,7},{245,91,8},{228,3,7},{236,96,8},{255,196,16},{142,6,3},{251,14,7},{250,102,8},{237,12,7},{245,105,8},{255,223,16},{151,15,3},{253,22,7},{254,112,8},{245,20,7},{253,113,8},{255,248,16},{159,23,3},{253,31,7},{255,124,8},{249,28,7},
+{255,124,8},{255,0,0},{167,31,3},{254,39,7},{255,10,4},{252,37,7},{255,10,4},{255,0,0},{175,39,3},{255,48,7},{255,38,4},{254,48,7},{255,38,4},{255,0,0},{184,48,3},{255,56,7},{255,62,4},{255,56,7},{255,62,4},{255,0,0},{192,56,3},{255,65,7},{255,86,4},{255,65,7},{255,86,4},{255,0,0},{200,64,3},{255,74,7},{255,111,4},{255,77,7},{255,111,4},{255,0,0},
+{208,5,2},
diff --git a/libkram/transcoder/basisu_transcoder_tables_bc7_m5_color.inc b/libkram/transcoder/basisu_transcoder_tables_bc7_m5_color.inc
new file mode 100644
index 00000000..c0780988
--- /dev/null
+++ b/libkram/transcoder/basisu_transcoder_tables_bc7_m5_color.inc
@@ -0,0 +1,481 @@
+{0,7,18},{0,5,2},{0,4,1},{0,3,8},{0,4,35},{0,3,24},{0,3,12},{0,2,29},{0,2,36},{0,2,30},{0,7,18},{0,5,2},{0,4,1},{0,3,8},{2,0,35},{0,3,24},{0,3,12},{0,2,29},{4,0,35},{0,2,29},{0,3,0},{0,3,0},{0,3,0},{0,1,1},{0,1,2},{0,1,2},{0,1,2},{0,1,1},{1,0,3},{0,1,2},{0,3,0},
+{0,3,0},{0,3,0},{0,1,1},{1,0,2},{1,0,2},{1,0,2},{0,1,1},{1,0,2},{0,1,1},{4,0,18},{0,5,2},{0,4,1},{0,3,8},{4,0,18},{7,0,18},{0,3,8},{0,2,20},{7,0,18},{0,2,20},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{2,15,38},{2,11,20},{2,8,24},
+{1,8,21},{0,16,51},{0,10,19},{0,8,2},{0,6,24},{0,8,76},{0,6,40},{3,13,18},{3,10,2},{3,8,1},{3,7,5},{8,0,51},{1,9,19},{0,8,2},{0,6,24},{16,0,51},{0,6,24},{2,11,20},{2,11,20},{2,11,20},{1,7,20},{0,12,8},{0,7,2},{0,7,2},{0,5,1},{0,5,19},{0,5,10},{3,9,0},{3,9,0},{3,9,0},{3,6,0},{6,0,8},
+{1,7,0},{1,7,0},{0,5,1},{12,0,8},{0,5,1},{10,0,18},{2,11,0},{3,8,1},{0,8,1},{10,0,18},{20,0,18},{0,8,1},{0,6,20},{20,0,18},{0,6,20},{1,0,20},{1,0,20},{1,0,20},{1,0,20},{0,9,0},{0,9,0},{0,9,0},{0,4,1},{0,3,8},{0,3,8},{6,19,38},{6,15,20},{6,12,24},{5,12,21},{4,20,51},{4,14,19},{4,12,2},
+{4,10,24},{0,14,52},{1,10,20},{7,17,18},{7,14,2},{7,12,1},{7,11,5},{14,0,51},{5,13,19},{4,12,2},{1,10,20},{29,0,51},{1,10,20},{6,15,20},{6,15,20},{6,15,20},{5,11,20},{4,16,8},{4,11,2},{4,11,2},{4,9,1},{0,12,8},{1,9,2},{7,13,0},{7,13,0},{7,13,0},{7,10,0},{12,0,8},{5,11,0},{5,11,0},{3,9,0},{24,0,8},
+{3,9,0},{15,1,18},{6,15,0},{7,12,1},{3,12,0},{15,1,18},{32,0,18},{3,12,0},{0,10,20},{32,0,18},{0,10,20},{5,0,20},{5,0,20},{5,0,20},{5,0,20},{4,13,0},{4,13,0},{4,13,0},{4,8,1},{1,10,0},{1,10,0},{10,23,38},{10,19,20},{10,16,24},{9,16,21},{8,24,51},{8,18,19},{8,16,2},{8,14,24},{3,18,52},{5,14,20},{11,21,18},
+{11,18,2},{11,16,1},{11,15,5},{20,0,51},{9,17,19},{8,16,2},{5,14,20},{41,0,51},{5,14,20},{10,19,20},{10,19,20},{10,19,20},{9,15,20},{8,20,8},{8,15,2},{8,15,2},{8,13,1},{4,16,8},{5,13,2},{11,17,0},{11,17,0},{11,17,0},{11,14,0},{18,0,8},{9,15,0},{9,15,0},{7,13,0},{36,0,8},{7,13,0},{21,1,18},{10,19,0},{11,16,1},
+{7,16,0},{21,1,18},{44,0,18},{7,16,0},{0,14,20},{44,0,18},{0,14,20},{9,0,20},{9,0,20},{9,0,20},{9,0,20},{8,17,0},{8,17,0},{8,17,0},{8,12,1},{5,14,0},{5,14,0},{14,29,36},{14,24,18},{14,21,23},{14,20,20},{13,28,52},{13,22,20},{13,20,2},{12,19,23},{7,23,51},{9,19,18},{16,25,19},{15,23,1},{16,20,2},{15,20,6},{27,0,51},
+{13,22,19},{13,20,1},{8,19,18},{55,0,51},{8,19,18},{14,24,18},{14,24,18},{14,24,18},{14,19,18},{13,23,9},{13,20,1},{13,20,1},{12,17,2},{9,20,8},{10,18,1},{16,20,1},{16,20,1},{16,20,1},{16,18,1},{24,1,8},{14,19,0},{14,19,0},{12,17,1},{50,0,8},{12,17,1},{28,0,18},{15,23,0},{16,20,1},{12,20,1},{28,0,18},{58,0,18},{12,20,1},
+{0,19,18},{58,0,18},{0,19,18},{14,0,18},{14,0,18},{14,0,18},{14,0,18},{13,20,1},{13,20,1},{13,20,1},{12,17,1},{10,18,0},{10,18,0},{18,33,36},{18,28,18},{18,25,23},{18,24,20},{17,32,52},{17,26,20},{17,24,2},{16,23,23},{11,27,51},{13,23,18},{20,29,19},{19,27,1},{20,24,2},{19,24,6},{33,0,51},{17,26,19},{17,24,1},{12,23,18},{66,0,51},
+{12,23,18},{18,28,18},{18,28,18},{18,28,18},{18,23,18},{17,27,9},{17,24,1},{17,24,1},{16,21,2},{13,24,8},{14,22,1},{20,24,1},{20,24,1},{20,24,1},{20,22,1},{30,1,8},{18,23,0},{18,23,0},{16,21,1},{62,0,8},{16,21,1},{34,0,18},{19,27,0},{20,24,1},{16,24,1},{34,0,18},{69,0,18},{16,24,1},{0,23,18},{69,0,18},{0,23,18},{18,0,18},
+{18,0,18},{18,0,18},{18,0,18},{17,24,1},{17,24,1},{17,24,1},{16,21,1},{14,22,0},{14,22,0},{22,37,36},{22,32,18},{22,29,23},{22,28,20},{21,36,52},{21,30,20},{21,28,2},{20,27,23},{15,31,51},{17,27,18},{24,33,19},{23,31,1},{24,28,2},{23,28,6},{39,0,51},{21,30,19},{21,28,1},{16,27,18},{78,0,51},{16,27,18},{22,32,18},{22,32,18},{22,32,18},
+{22,27,18},{21,31,9},{21,28,1},{21,28,1},{20,25,2},{17,28,8},{18,26,1},{24,28,1},{24,28,1},{24,28,1},{24,26,1},{36,1,8},{22,27,0},{22,27,0},{20,25,1},{74,0,8},{20,25,1},{40,0,18},{23,31,0},{24,28,1},{20,28,1},{40,0,18},{82,0,18},{20,28,1},{0,27,18},{82,0,18},{0,27,18},{22,0,18},{22,0,18},{22,0,18},{22,0,18},{21,28,1},
+{21,28,1},{21,28,1},{20,25,1},{18,26,0},{18,26,0},{26,41,36},{26,36,18},{26,33,23},{26,32,20},{25,40,52},{25,34,20},{25,32,2},{24,31,23},{19,35,51},{21,31,18},{28,37,19},{27,35,1},{28,32,2},{27,32,6},{45,0,51},{25,34,19},{25,32,1},{20,31,18},{91,0,51},{20,31,18},{26,36,18},{26,36,18},{26,36,18},{26,31,18},{25,35,9},{25,32,1},{25,32,1},
+{24,29,2},{21,32,8},{22,30,1},{28,32,1},{28,32,1},{28,32,1},{28,30,1},{42,1,8},{26,31,0},{26,31,0},{24,29,1},{86,0,8},{24,29,1},{46,0,18},{27,35,0},{28,32,1},{24,32,1},{46,0,18},{94,0,18},{24,32,1},{0,31,18},{94,0,18},{0,31,18},{26,0,18},{26,0,18},{26,0,18},{26,0,18},{25,32,1},{25,32,1},{25,32,1},{24,29,1},{22,30,0},
+{22,30,0},{31,44,38},{31,40,20},{31,37,24},{30,37,21},{29,45,51},{29,39,19},{29,37,2},{29,35,24},{24,39,52},{26,35,20},{32,42,18},{32,39,2},{32,37,1},{32,36,5},{51,0,51},{30,38,19},{29,37,2},{26,35,20},{104,0,51},{26,35,20},{31,40,20},{31,40,20},{31,40,20},{30,36,20},{29,41,8},{29,36,2},{29,36,2},{29,34,1},{25,37,8},{26,34,2},{32,38,0},
+{32,38,0},{32,38,0},{32,35,0},{49,0,8},{30,36,0},{30,36,0},{28,34,0},{100,0,8},{28,34,0},{53,0,18},{31,40,0},{32,37,1},{28,37,0},{53,0,18},{107,0,18},{28,37,0},{0,35,20},{107,0,18},{0,35,20},{30,0,20},{30,0,20},{30,0,20},{30,0,20},{29,38,0},{29,38,0},{29,38,0},{29,33,1},{26,35,0},{26,35,0},{35,48,38},{35,44,20},{35,41,24},
+{34,41,21},{33,49,51},{33,43,19},{33,41,2},{33,39,24},{28,43,52},{30,39,20},{36,46,18},{36,43,2},{36,41,1},{36,40,5},{57,0,51},{34,42,19},{33,41,2},{30,39,20},{117,0,51},{30,39,20},{35,44,20},{35,44,20},{35,44,20},{34,40,20},{33,45,8},{33,40,2},{33,40,2},{33,38,1},{29,41,8},{30,38,2},{36,42,0},{36,42,0},{36,42,0},{36,39,0},{55,0,8},
+{34,40,0},{34,40,0},{32,38,0},{112,0,8},{32,38,0},{59,0,18},{35,44,0},{36,41,1},{32,41,0},{59,0,18},{120,0,18},{32,41,0},{0,39,20},{120,0,18},{0,39,20},{34,0,20},{34,0,20},{34,0,20},{34,0,20},{33,42,0},{33,42,0},{33,42,0},{33,37,1},{30,39,0},{30,39,0},{39,52,38},{39,48,20},{39,45,24},{38,45,21},{37,53,51},{37,47,19},{37,45,2},
+{37,43,24},{32,47,52},{34,43,20},{40,50,18},{40,47,2},{40,45,1},{40,44,5},{63,0,51},{38,46,19},{37,45,2},{34,43,20},{127,1,51},{34,43,20},{39,48,20},{39,48,20},{39,48,20},{38,44,20},{37,49,8},{37,44,2},{37,44,2},{37,42,1},{33,45,8},{34,42,2},{40,46,0},{40,46,0},{40,46,0},{40,43,0},{61,0,8},{38,44,0},{38,44,0},{36,42,0},{124,0,8},
+{36,42,0},{64,0,18},{39,48,0},{40,45,1},{36,45,0},{64,0,18},{126,3,18},{36,45,0},{0,43,20},{126,3,18},{0,43,20},{38,0,20},{38,0,20},{38,0,20},{38,0,20},{37,46,0},{37,46,0},{37,46,0},{37,41,1},{34,43,0},{34,43,0},{43,56,38},{43,52,20},{43,49,24},{42,49,21},{41,57,51},{41,51,19},{41,49,2},{41,47,24},{36,51,52},{38,47,20},{44,54,18},
+{44,51,2},{44,49,1},{44,48,5},{69,0,51},{42,50,19},{41,49,2},{38,47,20},{127,7,51},{38,47,20},{43,52,20},{43,52,20},{43,52,20},{42,48,20},{41,53,8},{41,48,2},{41,48,2},{41,46,1},{37,49,8},{38,46,2},{44,50,0},{44,50,0},{44,50,0},{44,47,0},{66,1,8},{42,48,0},{42,48,0},{40,46,0},{126,5,8},{40,46,0},{70,0,18},{43,52,0},{44,49,1},
+{40,49,0},{70,0,18},{126,9,18},{40,49,0},{0,47,20},{126,9,18},{0,47,20},{42,0,20},{42,0,20},{42,0,20},{42,0,20},{41,50,0},{41,50,0},{41,50,0},{41,45,1},{38,47,0},{38,47,0},{47,62,36},{47,57,18},{47,54,23},{47,53,20},{46,61,52},{46,55,20},{46,53,2},{45,52,23},{40,56,51},{42,52,18},{49,58,19},{48,56,1},{49,53,2},{48,53,6},{75,1,51},
+{46,55,19},{46,53,1},{41,52,18},{126,14,51},{41,52,18},{47,57,18},{47,57,18},{47,57,18},{47,52,18},{46,56,9},{46,53,1},{46,53,1},{45,50,2},{42,53,8},{43,51,1},{49,53,1},{49,53,1},{49,53,1},{49,51,1},{73,0,8},{47,52,0},{47,52,0},{45,50,1},{126,12,8},{45,50,1},{77,0,18},{48,56,0},{49,53,1},{45,53,1},{77,0,18},{127,15,18},{45,53,1},
+{0,52,18},{127,15,18},{0,52,18},{47,0,18},{47,0,18},{47,0,18},{47,0,18},{46,53,1},{46,53,1},{46,53,1},{45,50,1},{43,51,0},{43,51,0},{51,65,36},{51,61,18},{51,58,23},{51,57,20},{50,64,52},{50,59,20},{50,57,2},{49,56,23},{44,60,51},{46,56,18},{53,62,19},{52,60,1},{53,57,2},{52,57,6},{81,0,51},{50,59,19},{50,57,1},{45,56,18},{126,20,51},
+{45,56,18},{51,61,18},{51,61,18},{51,61,18},{51,56,18},{50,60,9},{50,57,1},{50,57,1},{49,54,2},{46,57,8},{47,55,1},{53,57,1},{53,57,1},{53,57,1},{53,55,1},{79,0,8},{51,56,0},{51,56,0},{49,54,1},{127,17,8},{49,54,1},{83,0,18},{52,60,0},{53,57,1},{49,57,1},{83,0,18},{127,21,18},{49,57,1},{0,56,18},{127,21,18},{0,56,18},{51,0,18},
+{51,0,18},{51,0,18},{51,0,18},{50,57,1},{50,57,1},{50,57,1},{49,54,1},{47,55,0},{47,55,0},{55,69,36},{55,64,19},{55,62,23},{55,61,20},{54,68,52},{54,63,20},{54,61,2},{53,60,23},{48,64,51},{50,60,18},{57,65,19},{56,64,2},{57,61,2},{56,61,6},{87,0,51},{54,63,19},{54,61,1},{49,60,18},{126,26,51},{49,60,18},{55,65,18},{55,65,18},{55,65,18},
+{55,60,18},{54,64,9},{54,61,1},{54,61,1},{53,58,2},{50,61,8},{51,59,1},{57,61,1},{57,61,1},{57,61,1},{57,59,1},{85,0,8},{55,60,0},{55,60,0},{53,58,1},{127,23,8},{53,58,1},{89,0,18},{55,64,1},{57,61,1},{53,61,1},{89,0,18},{127,27,18},{53,61,1},{0,60,18},{127,27,18},{0,60,18},{55,0,18},{55,0,18},{55,0,18},{55,0,18},{54,61,1},
+{54,61,1},{54,61,1},{53,58,1},{51,59,0},{51,59,0},{59,73,36},{59,68,19},{59,66,26},{59,64,22},{58,72,52},{57,67,19},{57,65,2},{57,63,28},{52,68,51},{53,64,21},{61,69,19},{60,67,2},{61,65,1},{60,65,5},{93,0,51},{57,67,18},{57,65,1},{52,64,20},{126,32,51},{52,64,20},{59,69,18},{59,69,18},{59,69,18},{59,64,18},{58,68,9},{58,64,2},{58,64,2},
+{57,62,2},{55,64,9},{55,63,1},{61,65,1},{61,65,1},{61,65,1},{61,63,1},{91,0,8},{58,64,1},{58,64,1},{57,62,1},{127,29,8},{57,62,1},{95,0,18},{60,67,1},{61,65,0},{56,65,0},{95,0,18},{127,33,18},{56,65,0},{0,64,20},{127,33,18},{0,64,20},{59,0,18},{59,0,18},{59,0,18},{59,0,18},{58,65,1},{58,65,1},{58,65,1},{57,62,1},{55,63,0},
+{55,63,0},{63,79,38},{63,73,21},{64,70,28},{63,69,22},{62,78,51},{62,71,18},{62,69,2},{61,68,26},{57,72,51},{58,68,19},{65,74,19},{64,72,1},{65,69,2},{64,69,6},{100,0,51},{62,71,18},{62,69,2},{58,68,18},{127,38,51},{58,68,18},{63,74,20},{63,74,20},{63,74,20},{63,68,21},{62,73,8},{62,69,1},{62,69,1},{62,66,1},{59,69,9},{60,66,2},{65,69,1},
+{65,69,1},{65,69,1},{65,67,1},{98,0,8},{63,68,1},{63,68,1},{62,66,1},{127,36,8},{62,66,1},{101,1,18},{64,72,0},{65,69,1},{62,69,1},{101,1,18},{126,40,18},{62,69,1},{0,68,18},{126,40,18},{0,68,18},{63,0,20},{63,0,20},{63,0,20},{63,0,20},{62,70,0},{62,70,0},{62,70,0},{62,66,0},{60,66,1},{60,66,1},{67,82,36},{67,77,18},{67,74,23},
+{67,73,20},{66,81,52},{66,75,20},{66,73,2},{65,72,23},{61,76,51},{62,72,19},{69,78,19},{68,76,1},{69,73,2},{68,73,6},{106,0,51},{66,75,19},{66,73,1},{62,72,18},{127,44,51},{62,72,18},{67,77,18},{67,77,18},{67,77,18},{67,72,18},{66,76,9},{66,73,1},{66,73,1},{65,70,2},{63,73,9},{63,71,2},{69,73,1},{69,73,1},{69,73,1},{69,71,1},{104,0,8},
+{67,72,0},{67,72,0},{65,70,1},{127,42,8},{65,70,1},{107,1,18},{68,76,0},{69,73,1},{65,73,1},{107,1,18},{126,46,18},{65,73,1},{0,72,18},{126,46,18},{0,72,18},{67,0,18},{67,0,18},{67,0,18},{67,0,18},{66,73,1},{66,73,1},{66,73,1},{65,70,1},{63,71,1},{63,71,1},{71,86,36},{71,81,18},{71,78,23},{71,77,20},{70,85,52},{70,79,20},{70,77,2},
+{69,76,23},{64,80,51},{66,76,18},{73,82,19},{72,80,1},{73,77,2},{72,77,6},{112,0,51},{70,79,19},{70,77,1},{65,76,18},{127,50,51},{65,76,18},{71,81,18},{71,81,18},{71,81,18},{71,76,18},{70,80,9},{70,77,1},{70,77,1},{69,74,2},{66,77,8},{67,75,1},{73,77,1},{73,77,1},{73,77,1},{73,75,1},{110,0,8},{71,76,0},{71,76,0},{69,74,1},{126,48,8},
+{69,74,1},{113,0,18},{72,80,0},{73,77,1},{69,77,1},{113,0,18},{126,52,18},{69,77,1},{0,76,18},{126,52,18},{0,76,18},{71,0,18},{71,0,18},{71,0,18},{71,0,18},{70,77,1},{70,77,1},{70,77,1},{69,74,1},{67,75,0},{67,75,0},{75,90,36},{75,85,18},{75,82,23},{75,81,20},{74,89,52},{74,83,20},{74,81,2},{73,80,23},{68,84,51},{70,80,18},{77,86,19},
+{76,84,1},{77,81,2},{76,81,6},{118,0,51},{74,83,19},{74,81,1},{69,80,18},{127,56,51},{69,80,18},{75,85,18},{75,85,18},{75,85,18},{75,80,18},{74,84,9},{74,81,1},{74,81,1},{73,78,2},{70,81,8},{71,79,1},{77,81,1},{77,81,1},{77,81,1},{77,79,1},{115,1,8},{75,80,0},{75,80,0},{73,78,1},{126,54,8},{73,78,1},{119,0,18},{76,84,0},{77,81,1},
+{73,81,1},{119,0,18},{126,58,18},{73,81,1},{0,80,18},{126,58,18},{0,80,18},{75,0,18},{75,0,18},{75,0,18},{75,0,18},{74,81,1},{74,81,1},{74,81,1},{73,78,1},{71,79,0},{71,79,0},{80,93,38},{80,89,20},{80,86,24},{79,86,21},{78,94,51},{78,88,19},{78,86,2},{78,84,24},{73,88,52},{75,84,20},{81,91,18},{81,88,2},{81,86,1},{81,85,5},{124,1,51},
+{79,87,19},{78,86,2},{75,84,20},{126,63,51},{75,84,20},{80,89,20},{80,89,20},{80,89,20},{79,85,20},{78,90,8},{78,85,2},{78,85,2},{78,83,1},{74,86,8},{75,83,2},{81,87,0},{81,87,0},{81,87,0},{81,84,0},{122,0,8},{79,85,0},{79,85,0},{77,83,0},{126,61,8},{77,83,0},{126,0,18},{80,89,0},{81,86,1},{77,86,0},{126,0,18},{126,64,18},{77,86,0},
+{0,84,20},{126,64,18},{0,84,20},{79,0,20},{79,0,20},{79,0,20},{79,0,20},{78,87,0},{78,87,0},{78,87,0},{78,82,1},{75,84,0},{75,84,0},{84,97,38},{84,93,20},{84,90,24},{83,90,21},{82,98,51},{82,92,19},{82,90,2},{82,88,24},{77,92,52},{79,88,20},{85,95,18},{85,92,2},{85,90,1},{85,89,5},{127,7,51},{83,91,19},{82,90,2},{79,88,20},{127,68,51},
+{79,88,20},{84,93,20},{84,93,20},{84,93,20},{83,89,20},{82,94,8},{82,89,2},{82,89,2},{82,87,1},{78,90,8},{79,87,2},{85,91,0},{85,91,0},{85,91,0},{85,88,0},{127,2,8},{83,89,0},{83,89,0},{81,87,0},{127,66,8},{81,87,0},{127,10,18},{84,93,0},{85,90,1},{81,90,0},{127,10,18},{126,70,18},{81,90,0},{0,88,20},{126,70,18},{0,88,20},{83,0,20},
+{83,0,20},{83,0,20},{83,0,20},{82,91,0},{82,91,0},{82,91,0},{82,86,1},{79,88,0},{79,88,0},{88,101,38},{88,97,20},{88,94,24},{87,94,21},{86,102,51},{86,96,19},{86,94,2},{86,92,24},{81,96,52},{83,92,20},{89,99,18},{89,96,2},{89,94,1},{89,93,5},{127,19,51},{87,95,19},{86,94,2},{83,92,20},{127,74,51},{83,92,20},{88,97,20},{88,97,20},{88,97,20},
+{87,93,20},{86,98,8},{86,93,2},{86,93,2},{86,91,1},{82,94,8},{83,91,2},{89,95,0},{89,95,0},{89,95,0},{89,92,0},{127,14,8},{87,93,0},{87,93,0},{85,91,0},{127,72,8},{85,91,0},{127,22,18},{88,97,0},{89,94,1},{85,94,0},{127,22,18},{126,76,18},{85,94,0},{0,92,20},{126,76,18},{0,92,20},{87,0,20},{87,0,20},{87,0,20},{87,0,20},{86,95,0},
+{86,95,0},{86,95,0},{86,90,1},{83,92,0},{83,92,0},{92,105,38},{92,101,20},{92,98,24},{91,98,21},{90,106,51},{90,100,19},{90,98,2},{90,96,24},{85,100,52},{87,96,20},{93,103,18},{93,100,2},{93,98,1},{93,97,5},{127,31,51},{91,99,19},{90,98,2},{87,96,20},{127,80,51},{87,96,20},{92,101,20},{92,101,20},{92,101,20},{91,97,20},{90,102,8},{90,97,2},{90,97,2},
+{90,95,1},{86,98,8},{87,95,2},{93,99,0},{93,99,0},{93,99,0},{93,96,0},{127,27,8},{91,97,0},{91,97,0},{89,95,0},{126,78,8},{89,95,0},{127,34,18},{92,101,0},{93,98,1},{89,98,0},{127,34,18},{126,82,18},{89,98,0},{0,96,20},{126,82,18},{0,96,20},{91,0,20},{91,0,20},{91,0,20},{91,0,20},{90,99,0},{90,99,0},{90,99,0},{90,94,1},{87,96,0},
+{87,96,0},{96,111,36},{96,106,18},{96,103,23},{96,102,20},{95,110,52},{95,104,20},{95,102,2},{94,101,23},{89,105,51},{91,101,18},{98,107,19},{97,105,1},{98,102,2},{97,102,6},{127,45,51},{95,104,19},{95,102,1},{90,101,18},{126,87,51},{90,101,18},{96,106,18},{96,106,18},{96,106,18},{96,101,18},{95,105,9},{95,102,1},{95,102,1},{94,99,2},{91,102,8},{92,100,1},{98,102,1},
+{98,102,1},{98,102,1},{98,100,1},{127,40,8},{96,101,0},{96,101,0},{94,99,1},{126,85,8},{94,99,1},{127,48,18},{97,105,0},{98,102,1},{94,102,1},{127,48,18},{127,88,18},{94,102,1},{0,101,18},{127,88,18},{0,101,18},{96,0,18},{96,0,18},{96,0,18},{96,0,18},{95,102,1},{95,102,1},{95,102,1},{94,99,1},{92,100,0},{92,100,0},{100,115,36},{100,110,18},{100,107,23},
+{100,106,20},{99,114,52},{99,108,20},{99,106,2},{98,105,23},{93,109,51},{95,105,18},{102,111,19},{101,109,1},{102,106,2},{101,106,6},{127,57,51},{99,108,19},{99,106,1},{94,105,18},{126,93,51},{94,105,18},{100,110,18},{100,110,18},{100,110,18},{100,105,18},{99,109,9},{99,106,1},{99,106,1},{98,103,2},{95,106,8},{96,104,1},{102,106,1},{102,106,1},{102,106,1},{102,104,1},{127,53,8},
+{100,105,0},{100,105,0},{98,103,1},{126,91,8},{98,103,1},{127,60,18},{101,109,0},{102,106,1},{98,106,1},{127,60,18},{127,94,18},{98,106,1},{0,105,18},{127,94,18},{0,105,18},{100,0,18},{100,0,18},{100,0,18},{100,0,18},{99,106,1},{99,106,1},{99,106,1},{98,103,1},{96,104,0},{96,104,0},{104,119,36},{104,114,18},{104,111,23},{104,110,20},{103,118,52},{103,112,20},{103,110,2},
+{102,109,23},{97,113,51},{99,109,18},{106,115,19},{105,113,1},{106,110,2},{105,110,6},{127,69,51},{103,112,19},{103,110,1},{98,109,18},{126,99,51},{98,109,18},{104,114,18},{104,114,18},{104,114,18},{104,109,18},{103,113,9},{103,110,1},{103,110,1},{102,107,2},{99,110,8},{100,108,1},{106,110,1},{106,110,1},{106,110,1},{106,108,1},{127,64,8},{104,109,0},{104,109,0},{102,107,1},{126,97,8},
+{102,107,1},{127,72,18},{105,113,0},{106,110,1},{102,110,1},{127,72,18},{127,100,18},{102,110,1},{0,109,18},{127,100,18},{0,109,18},{104,0,18},{104,0,18},{104,0,18},{104,0,18},{103,110,1},{103,110,1},{103,110,1},{102,107,1},{100,108,0},{100,108,0},{108,123,36},{108,118,18},{108,115,23},{108,114,20},{107,122,52},{107,116,20},{107,114,2},{106,113,23},{101,117,51},{103,113,18},{110,119,19},
+{109,117,1},{110,114,2},{109,114,6},{127,81,51},{107,116,19},{107,114,1},{102,113,18},{126,105,51},{102,113,18},{108,118,18},{108,118,18},{108,118,18},{108,113,18},{107,117,9},{107,114,1},{107,114,1},{106,111,2},{103,114,8},{104,112,1},{110,114,1},{110,114,1},{110,114,1},{110,112,1},{127,76,8},{108,113,0},{108,113,0},{106,111,1},{126,103,8},{106,111,1},{127,84,18},{109,117,0},{110,114,1},
+{106,114,1},{127,84,18},{127,106,18},{106,114,1},{0,113,18},{127,106,18},{0,113,18},{108,0,18},{108,0,18},{108,0,18},{108,0,18},{107,114,1},{107,114,1},{107,114,1},{106,111,1},{104,112,0},{104,112,0},{113,126,38},{113,122,20},{113,119,24},{112,119,21},{111,127,51},{111,121,19},{111,119,2},{111,117,24},{106,121,52},{108,117,20},{114,124,18},{114,121,2},{114,119,1},{114,118,5},{127,95,51},
+{112,120,19},{111,119,2},{108,117,20},{127,111,51},{108,117,20},{113,122,20},{113,122,20},{113,122,20},{112,118,20},{111,123,8},{111,118,2},{111,118,2},{111,116,1},{107,119,8},{108,116,2},{114,120,0},{114,120,0},{114,120,0},{114,117,0},{127,90,8},{112,118,0},{112,118,0},{110,116,0},{127,109,8},{110,116,0},{127,98,18},{113,122,0},{114,119,1},{110,119,0},{127,98,18},{126,113,18},{110,119,0},
+{0,117,20},{126,113,18},{0,117,20},{112,0,20},{112,0,20},{112,0,20},{112,0,20},{111,120,0},{111,120,0},{111,120,0},{111,115,1},{108,117,0},{108,117,0},{117,127,46},{117,126,20},{117,123,24},{116,123,21},{116,126,63},{115,125,19},{115,123,2},{115,121,24},{110,125,52},{112,121,20},{118,127,20},{118,125,2},{118,123,1},{118,122,5},{127,107,51},{116,124,19},{115,123,2},{112,121,20},{127,117,51},
+{112,121,20},{117,126,20},{117,126,20},{117,126,20},{116,122,20},{115,127,8},{115,122,2},{115,122,2},{115,120,1},{111,123,8},{112,120,2},{118,124,0},{118,124,0},{118,124,0},{118,121,0},{127,102,8},{116,122,0},{116,122,0},{114,120,0},{127,115,8},{114,120,0},{127,110,18},{117,126,0},{118,123,1},{114,123,0},{127,110,18},{126,119,18},{114,123,0},{0,121,20},{126,119,18},{0,121,20},{116,0,20},
+{116,0,20},{116,0,20},{116,0,20},{115,124,0},{115,124,0},{115,124,0},{115,119,1},{112,121,0},{112,121,0},{122,126,86},{121,127,40},{121,127,24},{120,127,21},{121,127,88},{119,127,27},{119,127,2},{119,125,24},{116,127,60},{116,125,20},{123,127,30},{122,127,10},{122,127,1},{122,126,5},{127,119,51},{121,127,24},{119,127,2},{116,125,20},{127,123,51},{116,125,20},{121,127,24},{121,127,24},{121,127,24},
+{120,126,20},{120,127,14},{119,126,2},{119,126,2},{119,124,1},{115,127,8},{116,124,2},{122,126,1},{122,126,1},{122,126,1},{122,125,0},{127,115,8},{120,126,0},{120,126,0},{118,124,0},{127,121,8},{118,124,0},{127,122,18},{124,127,8},{122,127,1},{118,127,0},{127,122,18},{126,125,18},{118,127,0},{0,125,20},{126,125,18},{0,125,20},{120,0,20},{120,0,20},{120,0,20},{120,0,20},{119,126,1},
+{119,126,1},{119,126,1},{119,123,1},{116,125,0},{116,125,0},{125,126,38},{125,127,30},{125,127,29},{125,127,21},{125,126,35},{124,127,16},{124,127,12},{123,127,1},{122,127,20},{122,127,2},{126,127,2},{126,127,2},{126,127,1},{126,127,1},{127,125,3},{127,126,3},{125,127,2},{124,127,0},{127,126,3},{124,127,0},{125,126,29},{125,126,29},{125,126,29},{125,127,21},{124,127,24},{124,127,12},{124,127,12},
+{123,127,1},{122,127,11},{122,127,2},{126,127,1},{126,127,1},{126,127,1},{126,127,1},{127,125,2},{127,126,2},{127,126,2},{124,127,0},{127,126,2},{124,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{124,0,20},{124,0,20},{124,0,20},{124,0,20},{124,127,8},{124,127,8},{124,127,8},{123,127,1},{122,127,2},
+{122,127,2},{0,16,72},{0,11,8},{0,8,1},{0,7,25},{0,10,153},{0,8,97},{0,6,50},{0,4,115},{0,5,162},{0,4,124},{0,16,72},{0,11,8},{0,8,1},{0,7,25},{5,0,153},{0,8,97},{0,6,50},{0,4,115},{10,0,153},{0,4,115},{0,7,0},{0,7,0},{0,7,0},{0,4,0},{0,3,13},{0,2,5},{0,2,5},{0,1,10},{0,2,14},{0,1,11},{0,7,0},
+{0,7,0},{0,7,0},{0,4,0},{2,0,13},{0,2,5},{0,2,5},{0,1,10},{3,0,13},{0,1,10},{8,0,72},{0,11,8},{0,8,1},{0,7,25},{8,0,72},{16,0,72},{0,7,25},{0,5,74},{16,0,72},{0,5,74},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{1,27,77},{1,18,5},{1,13,15},
+{1,11,18},{0,21,243},{0,14,108},{0,11,29},{0,8,154},{0,9,287},{0,8,179},{2,25,73},{1,18,1},{2,12,6},{1,11,14},{10,1,243},{0,14,108},{0,11,29},{0,8,154},{21,0,243},{0,8,154},{1,17,5},{1,17,5},{1,17,5},{1,9,5},{0,12,50},{0,8,8},{0,8,8},{0,5,25},{0,5,61},{0,5,34},{2,15,1},{2,15,1},{2,15,1},{1,9,1},{6,0,50},
+{0,8,8},{0,8,8},{0,5,25},{12,0,50},{0,5,25},{14,0,72},{1,18,0},{4,12,1},{0,12,10},{14,0,72},{29,0,72},{0,12,10},{0,9,74},{29,0,72},{0,9,74},{1,0,5},{1,0,5},{1,0,5},{1,0,5},{0,4,0},{0,4,0},{0,4,0},{0,2,0},{0,2,1},{0,2,1},{3,35,133},{3,24,63},{4,17,90},{3,16,66},{0,33,243},{0,19,75},{0,16,2},
+{0,13,110},{0,14,345},{0,12,179},{6,29,73},{5,22,1},{6,16,6},{5,15,14},{16,0,243},{0,19,75},{0,16,2},{0,13,110},{33,0,243},{0,13,110},{3,26,61},{3,26,61},{3,26,61},{3,14,61},{0,24,50},{0,16,1},{0,16,1},{0,10,5},{0,11,101},{0,9,46},{6,19,1},{6,19,1},{6,19,1},{5,13,1},{12,0,50},{1,15,0},{1,15,0},{0,10,5},{24,0,50},
+{0,10,5},{20,0,72},{5,22,0},{8,16,1},{0,16,2},{20,0,72},{41,0,72},{0,16,2},{0,13,74},{41,0,72},{0,13,74},{3,0,61},{3,0,61},{3,0,61},{3,0,61},{0,16,0},{0,16,0},{0,16,0},{0,8,0},{0,5,25},{0,5,25},{7,39,146},{7,28,76},{8,21,107},{6,20,79},{4,37,244},{4,23,76},{4,20,3},{3,16,107},{0,22,292},{0,17,106},{10,33,73},
+{9,26,1},{10,20,6},{9,19,14},{22,0,243},{2,25,72},{4,20,2},{0,17,90},{45,0,243},{0,17,90},{7,30,74},{7,30,74},{7,30,74},{7,18,74},{4,28,51},{4,20,2},{4,20,2},{3,14,6},{0,17,68},{0,14,5},{10,23,1},{10,23,1},{10,23,1},{9,17,1},{18,0,50},{5,19,0},{5,19,0},{0,14,1},{36,0,50},{0,14,1},{26,0,72},{9,26,0},{12,20,1},
+{3,20,1},{26,0,72},{53,0,72},{3,20,1},{0,17,74},{53,0,72},{0,17,74},{6,0,74},{6,0,74},{6,0,74},{6,0,74},{4,20,1},{4,20,1},{4,20,1},{4,12,1},{0,14,4},{0,14,4},{11,44,144},{11,32,76},{12,25,105},{11,24,77},{8,43,243},{8,28,75},{8,25,2},{8,21,105},{0,28,248},{1,22,76},{14,38,72},{14,30,1},{15,25,5},{14,24,13},{29,0,243},
+{7,29,72},{8,25,2},{0,22,76},{59,0,243},{0,22,76},{11,35,72},{11,35,72},{11,35,72},{11,23,72},{8,34,50},{8,24,1},{8,24,1},{8,18,5},{1,24,51},{3,19,1},{14,29,0},{14,29,0},{14,29,0},{14,21,1},{24,1,50},{9,24,0},{9,24,0},{3,19,0},{50,0,50},{3,19,0},{33,0,72},{13,31,0},{16,25,0},{6,25,0},{33,0,72},{66,0,72},{6,25,0},
+{0,22,72},{66,0,72},{0,22,72},{11,0,72},{11,0,72},{11,0,72},{11,0,72},{8,26,0},{8,26,0},{8,26,0},{8,17,0},{2,20,0},{2,20,0},{15,48,144},{15,36,76},{16,29,105},{15,28,77},{12,47,243},{12,32,75},{12,29,2},{12,25,105},{2,33,244},{5,26,76},{18,42,72},{18,34,1},{19,29,5},{18,28,13},{35,0,243},{11,33,72},{12,29,2},{1,26,72},{71,0,243},
+{1,26,72},{15,39,72},{15,39,72},{15,39,72},{15,27,72},{12,38,50},{12,28,1},{12,28,1},{12,22,5},{5,28,51},{7,23,1},{18,33,0},{18,33,0},{18,33,0},{18,25,1},{30,1,50},{13,28,0},{13,28,0},{7,23,0},{62,0,50},{7,23,0},{39,0,72},{17,35,0},{20,29,0},{10,29,0},{39,0,72},{78,0,72},{10,29,0},{0,26,72},{78,0,72},{0,26,72},{15,0,72},
+{15,0,72},{15,0,72},{15,0,72},{12,30,0},{12,30,0},{12,30,0},{12,21,0},{6,24,0},{6,24,0},{19,52,144},{19,40,76},{20,33,105},{19,32,77},{16,51,243},{16,36,75},{16,33,2},{16,29,105},{6,37,244},{9,30,76},{22,46,72},{22,38,1},{23,33,5},{22,32,13},{41,0,243},{15,37,72},{16,33,2},{5,30,72},{83,0,243},{5,30,72},{19,43,72},{19,43,72},{19,43,72},
+{19,31,72},{16,42,50},{16,32,1},{16,32,1},{16,26,5},{9,32,51},{11,27,1},{22,37,0},{22,37,0},{22,37,0},{22,29,1},{36,1,50},{17,32,0},{17,32,0},{11,27,0},{74,0,50},{11,27,0},{45,0,72},{21,39,0},{24,33,0},{14,33,0},{45,0,72},{91,0,72},{14,33,0},{0,30,72},{91,0,72},{0,30,72},{19,0,72},{19,0,72},{19,0,72},{19,0,72},{16,34,0},
+{16,34,0},{16,34,0},{16,25,0},{10,28,0},{10,28,0},{23,56,144},{23,44,76},{24,37,105},{23,36,77},{20,55,243},{20,40,75},{20,37,2},{20,33,105},{10,41,244},{13,34,76},{26,50,72},{26,42,1},{27,37,5},{26,36,13},{47,0,243},{19,41,72},{20,37,2},{9,34,72},{95,0,243},{9,34,72},{23,47,72},{23,47,72},{23,47,72},{23,35,72},{20,46,50},{20,36,1},{20,36,1},
+{20,30,5},{13,36,51},{15,31,1},{26,41,0},{26,41,0},{26,41,0},{26,33,1},{42,1,50},{21,36,0},{21,36,0},{15,31,0},{86,0,50},{15,31,0},{50,1,72},{25,43,0},{28,37,0},{18,37,0},{50,1,72},{103,0,72},{18,37,0},{0,34,72},{103,0,72},{0,34,72},{23,0,72},{23,0,72},{23,0,72},{23,0,72},{20,38,0},{20,38,0},{20,38,0},{20,29,0},{14,32,0},
+{14,32,0},{28,60,146},{28,49,76},{29,42,107},{27,41,79},{25,58,244},{25,44,76},{25,41,3},{24,37,107},{14,46,244},{17,38,76},{31,54,73},{30,47,1},{31,41,6},{30,40,14},{53,1,243},{23,46,72},{25,41,2},{15,38,74},{109,0,243},{15,38,74},{28,51,74},{28,51,74},{28,51,74},{28,39,74},{25,49,51},{25,41,2},{25,41,2},{24,35,6},{18,40,51},{19,36,1},{31,44,1},
+{31,44,1},{31,44,1},{30,38,1},{49,0,50},{26,40,0},{26,40,0},{21,35,1},{100,0,50},{21,35,1},{57,0,72},{30,47,0},{33,41,1},{24,41,1},{57,0,72},{117,0,72},{24,41,1},{0,38,74},{117,0,72},{0,38,74},{27,0,74},{27,0,74},{27,0,74},{27,0,74},{25,41,1},{25,41,1},{25,41,1},{25,33,1},{19,36,0},{19,36,0},{32,64,146},{32,53,76},{33,46,107},
+{31,45,79},{29,62,244},{29,48,76},{29,45,3},{28,41,107},{18,50,244},{21,42,76},{35,58,73},{34,51,1},{35,45,6},{34,44,14},{59,1,243},{27,50,72},{29,45,2},{19,42,74},{121,0,243},{19,42,74},{32,55,74},{32,55,74},{32,55,74},{32,43,74},{29,53,51},{29,45,2},{29,45,2},{28,39,6},{22,44,51},{23,40,1},{35,48,1},{35,48,1},{35,48,1},{34,42,1},{55,0,50},
+{30,44,0},{30,44,0},{25,39,1},{112,0,50},{25,39,1},{63,0,72},{34,51,0},{37,45,1},{28,45,1},{63,0,72},{127,1,72},{28,45,1},{0,42,74},{127,1,72},{0,42,74},{31,0,74},{31,0,74},{31,0,74},{31,0,74},{29,45,1},{29,45,1},{29,45,1},{29,37,1},{23,40,0},{23,40,0},{36,67,146},{36,57,76},{37,50,107},{35,49,79},{33,66,244},{33,52,76},{33,49,3},
+{32,45,107},{22,54,244},{25,46,76},{39,62,73},{38,55,1},{39,49,6},{38,48,14},{65,0,243},{31,54,72},{33,49,2},{23,46,74},{127,3,243},{23,46,74},{36,59,74},{36,59,74},{36,59,74},{36,47,74},{33,57,51},{33,49,2},{33,49,2},{32,43,6},{26,48,51},{27,44,1},{39,52,1},{39,52,1},{39,52,1},{38,46,1},{61,0,50},{34,48,0},{34,48,0},{29,43,1},{124,0,50},
+{29,43,1},{69,0,72},{38,55,0},{41,49,1},{32,49,1},{69,0,72},{127,7,72},{32,49,1},{0,46,74},{127,7,72},{0,46,74},{35,0,74},{35,0,74},{35,0,74},{35,0,74},{33,49,1},{33,49,1},{33,49,1},{33,41,1},{27,44,0},{27,44,0},{40,71,146},{40,61,76},{41,54,107},{39,53,79},{37,70,244},{37,56,76},{37,53,3},{36,49,107},{26,58,244},{29,50,76},{43,65,73},
+{42,59,1},{43,53,6},{42,52,14},{71,0,243},{35,58,72},{37,53,2},{27,50,74},{127,9,243},{27,50,74},{40,63,74},{40,63,74},{40,63,74},{40,51,74},{37,61,51},{37,53,2},{37,53,2},{36,47,6},{30,52,51},{31,48,1},{43,56,1},{43,56,1},{43,56,1},{42,50,1},{66,1,50},{38,52,0},{38,52,0},{33,47,1},{126,5,50},{33,47,1},{75,0,72},{42,59,0},{45,53,1},
+{36,53,1},{75,0,72},{127,13,72},{36,53,1},{0,50,74},{127,13,72},{0,50,74},{39,0,74},{39,0,74},{39,0,74},{39,0,74},{37,53,1},{37,53,1},{37,53,1},{37,45,1},{31,48,0},{31,48,0},{44,77,144},{44,65,77},{45,58,105},{44,57,77},{41,75,243},{41,61,75},{41,58,2},{41,54,105},{31,62,244},{34,55,76},{47,71,72},{47,63,1},{48,58,5},{47,57,13},{78,0,243},
+{40,62,72},{41,58,2},{30,55,72},{126,16,243},{30,55,72},{44,68,72},{44,68,72},{44,68,72},{44,56,72},{41,66,50},{41,57,1},{41,57,1},{41,51,5},{34,57,51},{36,52,1},{47,62,0},{47,62,0},{47,62,0},{47,54,1},{73,0,50},{42,57,0},{42,57,0},{36,52,0},{126,12,50},{36,52,0},{81,0,72},{47,63,1},{49,58,0},{39,58,0},{81,0,72},{126,20,72},{39,58,0},
+{0,55,72},{126,20,72},{0,55,72},{44,0,72},{44,0,72},{44,0,72},{44,0,72},{41,59,0},{41,59,0},{41,59,0},{41,50,0},{35,53,0},{35,53,0},{48,81,144},{48,69,77},{49,62,105},{48,61,77},{45,79,243},{45,65,73},{45,62,2},{45,58,105},{34,67,243},{38,59,76},{51,75,72},{51,66,1},{52,62,5},{51,61,13},{83,1,243},{45,65,73},{45,62,2},{34,59,72},{126,22,243},
+{34,59,72},{48,72,72},{48,72,72},{48,72,72},{48,60,72},{45,70,50},{45,61,1},{45,61,1},{45,55,5},{38,61,51},{40,56,1},{51,65,0},{51,65,0},{51,65,0},{51,58,1},{79,0,50},{46,61,0},{46,61,0},{40,56,0},{127,17,50},{40,56,0},{87,0,72},{51,66,1},{53,62,0},{43,62,0},{87,0,72},{126,26,72},{43,62,0},{0,59,72},{126,26,72},{0,59,72},{48,0,72},
+{48,0,72},{48,0,72},{48,0,72},{45,63,0},{45,63,0},{45,63,0},{45,54,0},{39,57,0},{39,57,0},{52,85,144},{52,73,77},{53,67,105},{52,65,77},{49,83,243},{49,69,73},{49,65,2},{49,62,105},{38,71,243},{42,63,76},{55,79,72},{55,70,1},{55,66,5},{54,65,14},{89,1,243},{49,69,73},{49,65,2},{38,63,72},{126,28,243},{38,63,72},{52,76,72},{52,76,72},{52,76,72},
+{52,63,73},{49,74,50},{49,65,1},{49,65,1},{49,59,5},{41,65,50},{44,60,1},{55,69,0},{55,69,0},{55,69,0},{55,62,1},{85,0,50},{50,64,1},{50,64,1},{44,60,0},{127,23,50},{44,60,0},{93,0,72},{55,70,1},{57,65,1},{48,65,1},{93,0,72},{126,32,72},{48,65,1},{0,63,72},{126,32,72},{0,63,72},{52,0,72},{52,0,72},{52,0,72},{52,0,72},{49,67,0},
+{49,67,0},{49,67,0},{49,58,0},{43,61,0},{43,61,0},{56,89,144},{56,77,77},{57,71,105},{56,69,77},{53,87,243},{53,73,73},{53,69,2},{52,66,103},{42,75,243},{45,66,79},{59,83,72},{59,74,1},{59,70,5},{58,69,14},{95,1,243},{53,73,73},{53,69,2},{43,66,74},{126,34,243},{43,66,74},{56,80,72},{56,80,72},{56,80,72},{56,67,73},{53,78,50},{53,69,1},{53,69,1},
+{53,63,5},{45,69,50},{47,64,2},{59,73,0},{59,73,0},{59,73,0},{59,66,0},{91,0,50},{55,67,1},{55,67,1},{47,64,1},{127,29,50},{47,64,1},{99,0,72},{59,74,1},{61,69,1},{52,69,1},{99,0,72},{126,38,72},{52,69,1},{0,66,74},{126,38,72},{0,66,74},{56,0,72},{56,0,72},{56,0,72},{56,0,72},{53,71,0},{53,71,0},{53,71,0},{53,62,0},{47,64,1},
+{47,64,1},{61,92,146},{61,80,79},{61,75,103},{60,74,78},{58,91,244},{57,78,74},{58,74,2},{56,70,105},{47,79,243},{49,71,77},{63,88,73},{63,79,2},{64,74,5},{63,73,14},{102,0,243},{57,78,73},{58,74,1},{47,71,72},{126,41,243},{47,71,72},{61,83,74},{61,83,74},{61,83,74},{60,72,74},{58,82,51},{58,73,2},{58,73,2},{57,68,5},{50,73,50},{52,68,1},{63,79,1},
+{63,79,1},{63,79,1},{63,71,1},{98,0,50},{59,72,1},{59,72,1},{53,68,0},{127,36,50},{53,68,0},{106,0,72},{63,79,1},{65,74,0},{56,74,0},{106,0,72},{127,44,72},{56,74,0},{0,71,72},{127,44,72},{0,71,72},{60,0,74},{60,0,74},{60,0,74},{60,0,74},{58,74,1},{58,74,1},{58,74,1},{57,66,1},{52,68,1},{52,68,1},{64,97,144},{64,85,76},{65,78,105},
+{64,77,77},{62,95,244},{61,82,74},{62,78,2},{60,74,105},{51,83,243},{53,75,77},{67,91,72},{67,83,1},{68,78,5},{67,77,13},{108,0,243},{61,82,73},{62,78,1},{51,75,72},{125,47,243},{51,75,72},{64,88,72},{64,88,72},{64,88,72},{64,76,72},{62,86,51},{62,77,2},{62,77,2},{61,72,5},{54,77,50},{56,72,1},{67,82,0},{67,82,0},{67,82,0},{67,74,1},{104,0,50},
+{63,76,1},{63,76,1},{57,72,0},{127,42,50},{57,72,0},{112,0,72},{66,84,0},{69,78,0},{60,78,0},{112,0,72},{127,50,72},{60,78,0},{0,75,72},{127,50,72},{0,75,72},{64,0,72},{64,0,72},{64,0,72},{64,0,72},{62,78,1},{62,78,1},{62,78,1},{61,70,1},{56,72,1},{56,72,1},{68,101,144},{68,89,76},{69,82,105},{68,81,77},{65,100,243},{65,85,75},{65,82,2},
+{65,78,105},{55,87,243},{57,79,77},{71,95,72},{71,87,1},{72,82,5},{71,81,13},{114,0,243},{64,86,72},{65,82,2},{55,79,72},{127,52,243},{55,79,72},{68,92,72},{68,92,72},{68,92,72},{68,80,72},{65,91,50},{65,81,1},{65,81,1},{65,75,5},{58,81,50},{60,76,1},{71,86,0},{71,86,0},{71,86,0},{71,78,1},{110,0,50},{66,81,0},{66,81,0},{61,76,0},{126,48,50},
+{61,76,0},{118,0,72},{70,88,0},{73,82,0},{64,82,0},{118,0,72},{127,56,72},{64,82,0},{0,79,72},{127,56,72},{0,79,72},{68,0,72},{68,0,72},{68,0,72},{68,0,72},{65,83,0},{65,83,0},{65,83,0},{65,74,0},{60,76,1},{60,76,1},{72,105,144},{72,93,76},{73,86,105},{72,85,77},{69,104,243},{69,89,75},{69,86,2},{69,82,105},{59,91,243},{61,83,77},{75,99,72},
+{75,91,1},{76,86,5},{75,85,13},{120,0,243},{68,90,72},{69,86,2},{59,83,72},{127,58,243},{59,83,72},{72,96,72},{72,96,72},{72,96,72},{72,84,72},{69,95,50},{69,85,1},{69,85,1},{69,79,5},{62,85,50},{64,80,1},{75,90,0},{75,90,0},{75,90,0},{75,82,1},{115,1,50},{70,85,0},{70,85,0},{64,80,0},{126,54,50},{64,80,0},{124,0,72},{74,92,0},{77,86,0},
+{67,86,0},{124,0,72},{127,62,72},{67,86,0},{0,83,72},{127,62,72},{0,83,72},{72,0,72},{72,0,72},{72,0,72},{72,0,72},{69,87,0},{69,87,0},{69,87,0},{69,78,0},{64,80,1},{64,80,1},{77,109,146},{77,98,76},{78,91,107},{76,90,79},{74,107,244},{74,93,76},{74,90,3},{73,86,107},{63,96,243},{66,87,76},{80,103,73},{79,96,1},{80,90,6},{79,89,14},{127,0,243},
+{72,95,72},{74,90,2},{64,87,74},{126,65,243},{64,87,74},{77,100,74},{77,100,74},{77,100,74},{77,88,74},{74,98,51},{74,90,2},{74,90,2},{73,84,6},{67,89,51},{68,85,1},{80,93,1},{80,93,1},{80,93,1},{79,87,1},{122,0,50},{75,89,0},{75,89,0},{70,84,1},{126,61,50},{70,84,1},{127,7,72},{79,96,0},{82,90,1},{73,90,1},{127,7,72},{127,68,72},{73,90,1},
+{0,87,74},{127,68,72},{0,87,74},{76,0,74},{76,0,74},{76,0,74},{76,0,74},{74,90,1},{74,90,1},{74,90,1},{74,82,1},{68,85,0},{68,85,0},{81,113,146},{81,102,76},{82,95,107},{80,94,79},{78,111,244},{78,97,76},{78,94,3},{77,90,107},{67,99,244},{70,91,76},{84,107,73},{83,100,1},{84,94,6},{83,93,14},{127,11,243},{76,99,72},{78,94,2},{68,91,74},{126,71,243},
+{68,91,74},{81,104,74},{81,104,74},{81,104,74},{81,92,74},{78,102,51},{78,94,2},{78,94,2},{77,88,6},{71,93,51},{72,89,1},{84,97,1},{84,97,1},{84,97,1},{83,91,1},{127,2,50},{79,93,0},{79,93,0},{74,88,1},{127,66,50},{74,88,1},{127,19,72},{83,100,0},{86,94,1},{77,94,1},{127,19,72},{127,74,72},{77,94,1},{0,91,74},{127,74,72},{0,91,74},{80,0,74},
+{80,0,74},{80,0,74},{80,0,74},{78,94,1},{78,94,1},{78,94,1},{78,86,1},{72,89,0},{72,89,0},{85,117,146},{85,106,76},{86,99,107},{84,98,79},{82,115,244},{82,101,76},{82,98,3},{81,94,107},{71,103,244},{74,95,76},{88,111,73},{87,104,1},{88,98,6},{87,97,14},{127,24,243},{80,103,72},{82,98,2},{72,95,74},{125,77,243},{72,95,74},{85,108,74},{85,108,74},{85,108,74},
+{85,96,74},{82,106,51},{82,98,2},{82,98,2},{81,92,6},{75,97,51},{76,93,1},{88,101,1},{88,101,1},{88,101,1},{87,95,1},{127,14,50},{83,97,0},{83,97,0},{78,92,1},{127,72,50},{78,92,1},{127,31,72},{87,104,0},{90,98,1},{81,98,1},{127,31,72},{127,80,72},{81,98,1},{0,95,74},{127,80,72},{0,95,74},{84,0,74},{84,0,74},{84,0,74},{84,0,74},{82,98,1},
+{82,98,1},{82,98,1},{82,90,1},{76,93,0},{76,93,0},{89,121,146},{89,110,76},{90,103,107},{88,102,79},{86,119,244},{86,105,76},{86,102,3},{85,98,107},{75,107,244},{78,99,76},{92,115,73},{91,108,1},{92,102,6},{91,101,14},{127,36,243},{84,107,72},{86,102,2},{76,99,74},{127,82,243},{76,99,74},{89,112,74},{89,112,74},{89,112,74},{89,100,74},{86,110,51},{86,102,2},{86,102,2},
+{85,96,6},{79,101,51},{80,97,1},{92,105,1},{92,105,1},{92,105,1},{91,99,1},{127,27,50},{87,101,0},{87,101,0},{82,96,1},{126,78,50},{82,96,1},{127,43,72},{91,108,0},{94,102,1},{85,102,1},{127,43,72},{127,86,72},{85,102,1},{0,99,74},{127,86,72},{0,99,74},{88,0,74},{88,0,74},{88,0,74},{88,0,74},{86,102,1},{86,102,1},{86,102,1},{86,94,1},{80,97,0},
+{80,97,0},{93,126,144},{93,114,76},{94,107,105},{93,106,77},{90,125,243},{90,110,75},{90,107,2},{90,103,105},{80,111,244},{83,104,76},{96,120,72},{96,112,1},{97,107,5},{96,106,13},{127,50,243},{89,111,72},{90,107,2},{79,104,72},{127,89,243},{79,104,72},{93,117,72},{93,117,72},{93,117,72},{93,105,72},{90,116,50},{90,106,1},{90,106,1},{90,100,5},{83,106,51},{85,101,1},{96,111,0},
+{96,111,0},{96,111,0},{96,103,1},{127,40,50},{91,106,0},{91,106,0},{85,101,0},{126,85,50},{85,101,0},{127,57,72},{95,113,0},{98,107,0},{88,107,0},{127,57,72},{126,93,72},{88,107,0},{0,104,72},{126,93,72},{0,104,72},{93,0,72},{93,0,72},{93,0,72},{93,0,72},{90,108,0},{90,108,0},{90,108,0},{90,99,0},{84,102,0},{84,102,0},{97,127,152},{97,118,76},{98,111,105},
+{97,110,77},{94,127,244},{94,114,75},{94,111,2},{94,107,105},{84,115,244},{87,108,76},{100,124,72},{100,116,1},{101,111,5},{100,110,13},{127,62,243},{93,115,72},{94,111,2},{83,108,72},{127,95,243},{83,108,72},{97,121,72},{97,121,72},{97,121,72},{97,109,72},{94,120,50},{94,110,1},{94,110,1},{94,104,5},{87,110,51},{89,105,1},{100,115,0},{100,115,0},{100,115,0},{100,107,1},{127,53,50},
+{95,110,0},{95,110,0},{89,105,0},{126,91,50},{89,105,0},{127,69,72},{99,117,0},{102,111,0},{92,111,0},{127,69,72},{126,99,72},{92,111,0},{0,108,72},{126,99,72},{0,108,72},{97,0,72},{97,0,72},{97,0,72},{97,0,72},{94,112,0},{94,112,0},{94,112,0},{94,103,0},{88,106,0},{88,106,0},{102,126,184},{101,122,76},{102,115,105},{101,114,77},{100,127,260},{98,118,75},{98,115,2},
+{98,111,105},{88,119,244},{91,112,76},{104,127,74},{104,120,1},{105,115,5},{104,114,13},{127,73,243},{97,119,72},{98,115,2},{87,112,72},{127,101,243},{87,112,72},{101,125,72},{101,125,72},{101,125,72},{101,113,72},{98,124,50},{98,114,1},{98,114,1},{98,108,5},{91,114,51},{93,109,1},{104,119,0},{104,119,0},{104,119,0},{104,111,1},{127,64,50},{99,114,0},{99,114,0},{93,109,0},{126,97,50},
+{93,109,0},{127,81,72},{103,121,0},{106,115,0},{96,115,0},{127,81,72},{126,105,72},{96,115,0},{0,112,72},{126,105,72},{0,112,72},{101,0,72},{101,0,72},{101,0,72},{101,0,72},{98,116,0},{98,116,0},{98,116,0},{98,107,0},{92,110,0},{92,110,0},{106,127,224},{105,126,76},{106,119,105},{105,118,77},{104,127,299},{102,122,75},{102,119,2},{102,115,105},{92,123,244},{95,116,76},{109,127,84},
+{108,124,1},{109,119,5},{108,118,13},{127,86,243},{101,123,72},{102,119,2},{91,116,72},{127,107,243},{91,116,72},{105,126,76},{105,126,76},{105,126,76},{105,117,72},{102,126,52},{102,118,1},{102,118,1},{102,112,5},{95,118,51},{97,113,1},{108,123,0},{108,123,0},{108,123,0},{108,115,1},{127,76,50},{103,118,0},{103,118,0},{97,113,0},{126,103,50},{97,113,0},{127,93,72},{107,125,0},{110,119,0},
+{100,119,0},{127,93,72},{126,111,72},{100,119,0},{0,116,72},{126,111,72},{0,116,72},{105,0,72},{105,0,72},{105,0,72},{105,0,72},{102,120,0},{102,120,0},{102,120,0},{102,111,0},{96,114,0},{96,114,0},{111,127,290},{111,127,103},{111,124,107},{109,123,79},{111,127,345},{107,126,76},{107,123,3},{106,119,107},{98,127,248},{99,120,76},{114,127,113},{113,127,5},{113,123,6},{112,122,14},{127,99,243},
+{107,126,75},{107,123,2},{97,120,74},{126,114,243},{97,120,74},{110,126,90},{110,126,90},{110,126,90},{110,121,74},{108,126,67},{107,123,2},{107,123,2},{106,117,6},{100,122,51},{101,118,1},{113,126,1},{113,126,1},{113,126,1},{112,120,1},{127,90,50},{108,122,0},{108,122,0},{103,117,1},{127,109,50},{103,117,1},{127,107,72},{113,127,4},{115,123,1},{106,123,1},{127,107,72},{127,117,72},{106,123,1},
+{0,120,74},{127,117,72},{0,120,74},{109,0,74},{109,0,74},{109,0,74},{109,0,74},{107,123,1},{107,123,1},{107,123,1},{107,115,1},{101,118,0},{101,118,0},{117,127,343},{115,127,179},{114,127,110},{113,126,78},{115,127,387},{111,127,102},{111,127,2},{110,123,90},{105,127,263},{103,124,63},{120,127,134},{118,127,46},{117,127,5},{116,126,9},{127,110,221},{113,127,89},{111,127,1},{101,124,61},{126,119,221},
+{101,124,61},{114,127,110},{114,127,110},{114,127,110},{114,125,74},{112,127,91},{111,127,2},{111,127,2},{110,121,6},{104,126,51},{105,122,1},{117,127,5},{117,127,5},{117,127,5},{116,124,1},{127,102,50},{112,126,0},{112,126,0},{107,121,1},{127,115,50},{107,121,1},{127,118,61},{119,127,25},{119,127,0},{110,127,0},{127,118,61},{126,123,61},{110,127,0},{0,124,61},{126,123,61},{0,124,61},{113,0,74},
+{113,0,74},{113,0,74},{113,0,74},{111,127,1},{111,127,1},{111,127,1},{111,119,1},{105,122,0},{105,122,0},{120,127,239},{119,127,179},{119,127,154},{118,127,83},{120,127,254},{116,127,78},{116,127,29},{114,126,15},{113,127,169},{109,126,5},{123,127,54},{122,127,34},{122,127,25},{121,127,2},{127,119,93},{119,127,33},{119,127,8},{109,126,5},{127,123,93},{109,126,5},{119,126,154},{119,126,154},{119,126,154},
+{118,127,83},{117,127,125},{116,127,29},{116,127,29},{114,125,6},{110,127,72},{109,126,1},{122,126,25},{122,126,25},{122,126,25},{121,127,2},{127,115,50},{119,127,8},{119,127,8},{111,125,1},{127,121,50},{111,125,1},{127,124,5},{125,127,1},{125,127,0},{122,127,0},{127,124,5},{126,126,5},{122,127,0},{0,126,5},{126,126,5},{0,126,5},{117,0,74},{117,0,74},{117,0,74},{117,0,74},{115,127,10},
+{115,127,10},{115,127,10},{115,123,1},{109,126,0},{109,126,0},{123,127,140},{123,127,124},{123,127,115},{122,127,83},{123,127,131},{121,127,66},{120,127,50},{119,127,1},{119,127,86},{116,127,8},{126,127,11},{125,127,11},{125,127,10},{125,127,2},{127,124,17},{125,127,6},{124,127,5},{119,127,0},{126,126,17},{119,127,0},{123,127,115},{123,127,115},{123,127,115},{122,127,83},{121,127,98},{120,127,50},{120,127,50},
+{119,127,1},{116,127,57},{116,127,8},{125,126,10},{125,126,10},{125,126,10},{125,127,2},{127,122,13},{124,127,5},{124,127,5},{119,127,0},{126,125,13},{119,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{121,0,74},{121,0,74},{121,0,74},{121,0,74},{120,127,25},{120,127,25},{120,127,25},{119,127,1},{116,127,8},
+{116,127,8},{0,29,200},{0,19,18},{0,14,1},{0,13,73},{0,20,441},{0,11,281},{0,11,134},{0,7,331},{0,8,474},{0,7,356},{0,29,200},{0,19,18},{0,14,1},{0,13,73},{10,0,441},{0,11,281},{0,11,134},{0,7,331},{20,0,441},{0,7,331},{0,13,0},{0,13,0},{0,13,0},{0,7,0},{0,6,41},{0,5,13},{0,5,13},{0,3,25},{0,3,45},{0,3,29},{0,13,0},
+{0,13,0},{0,13,0},{0,7,0},{3,0,41},{0,5,13},{0,5,13},{0,3,25},{6,0,41},{0,3,25},{14,0,200},{0,19,18},{0,14,1},{0,13,73},{14,0,200},{29,0,200},{0,13,73},{0,9,202},{29,0,200},{0,9,202},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,41,200},{0,27,2},{1,19,18},
+{0,17,45},{0,27,686},{0,19,346},{0,15,158},{0,10,467},{0,11,762},{0,10,516},{0,41,200},{0,27,2},{1,19,14},{0,17,45},{13,1,686},{0,19,346},{0,15,158},{0,10,467},{27,0,686},{0,10,467},{0,26,0},{0,26,0},{0,26,0},{0,13,0},{0,12,145},{0,11,53},{0,11,53},{0,6,89},{0,5,158},{0,6,105},{0,26,0},{0,26,0},{0,26,0},{0,13,0},{6,0,145},
+{0,11,53},{0,11,53},{0,6,89},{12,0,145},{0,6,89},{20,0,200},{1,26,2},{4,18,1},{0,17,45},{20,0,200},{41,0,200},{0,17,45},{0,13,202},{41,0,200},{0,13,202},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{2,49,225},{2,32,27},{3,24,71},{1,21,54},{0,39,723},{0,25,283},{0,20,65},
+{0,15,419},{0,17,865},{0,15,519},{4,45,201},{3,32,1},{5,23,19},{3,21,35},{19,0,723},{0,25,283},{0,20,65},{0,15,419},{39,0,723},{0,15,419},{2,34,25},{2,34,25},{2,34,25},{2,17,26},{0,24,162},{0,17,20},{0,17,20},{0,10,61},{0,11,213},{0,9,110},{4,30,1},{4,30,1},{4,30,1},{3,17,1},{12,0,162},{0,17,20},{0,17,20},{0,10,61},{24,0,162},
+{0,10,61},{26,0,200},{3,32,0},{8,22,1},{0,21,18},{26,0,200},{53,0,200},{0,21,18},{0,17,202},{53,0,200},{0,17,202},{2,0,25},{2,0,25},{2,0,25},{2,0,25},{0,10,0},{0,10,0},{0,10,0},{0,5,0},{0,5,9},{0,5,9},{4,57,313},{4,39,121},{5,28,198},{4,25,131},{0,52,723},{0,31,227},{0,26,9},{0,20,362},{0,22,959},{0,19,515},{8,49,201},
+{7,36,1},{9,27,19},{7,25,35},{25,0,723},{0,31,227},{0,26,9},{0,20,362},{52,0,723},{0,20,362},{4,42,113},{4,42,113},{4,42,113},{4,22,114},{0,36,162},{0,24,2},{0,24,2},{0,15,34},{0,17,285},{0,14,122},{8,34,1},{8,34,1},{8,34,1},{7,21,1},{18,0,162},{1,23,2},{1,23,2},{0,15,34},{36,0,162},{0,15,34},{32,0,200},{7,36,0},{12,26,1},
+{0,26,5},{32,0,200},{65,0,200},{0,26,5},{0,21,202},{65,0,200},{0,21,202},{4,0,113},{4,0,113},{4,0,113},{4,0,113},{0,23,0},{0,23,0},{0,23,0},{0,11,0},{0,8,41},{0,8,41},{7,64,400},{7,43,216},{8,32,312},{7,30,213},{2,61,723},{1,38,208},{2,31,2},{0,24,312},{0,31,980},{0,24,440},{12,54,200},{12,40,0},{13,31,20},{11,29,36},{32,0,723},
+{0,38,203},{2,31,2},{0,24,296},{65,0,723},{0,24,296},{7,49,200},{7,49,200},{7,49,200},{7,28,200},{2,46,162},{2,31,2},{2,31,2},{2,20,20},{0,22,280},{0,19,84},{12,39,0},{12,39,0},{12,39,0},{12,25,1},{24,1,162},{4,29,0},{4,29,0},{0,20,13},{50,0,162},{0,20,13},{39,0,200},{12,40,0},{16,31,0},{0,31,0},{39,0,200},{78,0,200},{0,31,0},
+{0,26,200},{78,0,200},{0,26,200},{7,0,200},{7,0,200},{7,0,200},{7,0,200},{2,32,0},{2,32,0},{2,32,0},{2,17,0},{0,14,50},{0,14,50},{11,68,400},{11,47,216},{12,36,312},{11,34,213},{6,65,723},{5,42,208},{6,35,2},{4,28,312},{0,37,868},{0,28,304},{16,58,200},{16,44,0},{17,35,20},{15,33,36},{38,0,723},{3,43,200},{6,35,2},{0,29,257},{77,0,723},
+{0,29,257},{11,53,200},{11,53,200},{11,53,200},{11,32,200},{6,50,162},{6,35,2},{6,35,2},{6,24,20},{0,28,216},{0,24,20},{16,43,0},{16,43,0},{16,43,0},{16,29,1},{30,1,162},{8,33,0},{8,33,0},{0,24,4},{62,0,162},{0,24,4},{45,0,200},{16,44,0},{20,35,0},{4,35,0},{45,0,200},{91,0,200},{4,35,0},{0,30,200},{91,0,200},{0,30,200},{11,0,200},
+{11,0,200},{11,0,200},{11,0,200},{6,36,0},{6,36,0},{6,36,0},{6,21,0},{0,22,8},{0,22,8},{15,72,400},{15,51,216},{16,40,312},{15,38,213},{10,69,723},{9,46,208},{10,39,2},{8,32,312},{0,43,788},{0,33,228},{20,62,200},{20,48,0},{21,39,20},{19,37,36},{44,0,723},{7,47,200},{10,39,2},{0,33,224},{89,0,723},{0,33,224},{15,57,200},{15,57,200},{15,57,200},
+{15,36,200},{10,54,162},{10,39,2},{10,39,2},{10,28,20},{0,35,179},{1,29,0},{20,47,0},{20,47,0},{20,47,0},{20,33,1},{36,1,162},{12,37,0},{12,37,0},{1,29,0},{74,0,162},{1,29,0},{50,1,200},{20,48,0},{24,39,0},{8,39,0},{50,1,200},{103,0,200},{8,39,0},{0,34,200},{103,0,200},{0,34,200},{15,0,200},{15,0,200},{15,0,200},{15,0,200},{10,40,0},
+{10,40,0},{10,40,0},{10,25,0},{1,29,0},{1,29,0},{19,76,400},{19,55,216},{20,44,312},{19,42,213},{14,73,723},{13,50,208},{14,43,2},{12,36,312},{0,49,740},{1,37,216},{24,66,200},{24,52,0},{25,43,20},{23,41,36},{50,0,723},{11,51,200},{14,43,2},{0,38,209},{101,0,723},{0,38,209},{19,61,200},{19,61,200},{19,61,200},{19,40,200},{14,58,162},{14,43,2},{14,43,2},
+{14,32,20},{1,42,163},{5,33,0},{24,51,0},{24,51,0},{24,51,0},{24,37,1},{42,1,162},{16,41,0},{16,41,0},{5,33,0},{86,0,162},{5,33,0},{56,1,200},{24,52,0},{28,43,0},{12,43,0},{56,1,200},{115,0,200},{12,43,0},{0,38,200},{115,0,200},{0,38,200},{19,0,200},{19,0,200},{19,0,200},{19,0,200},{14,44,0},{14,44,0},{14,44,0},{14,29,0},{5,33,0},
+{5,33,0},{24,80,402},{24,60,212},{25,49,308},{23,47,212},{19,76,724},{19,53,212},{19,47,4},{17,41,308},{1,55,724},{6,42,212},{29,69,201},{28,57,1},{30,48,19},{28,46,35},{56,1,723},{15,56,200},{19,47,3},{1,42,202},{115,0,723},{1,42,202},{24,64,202},{24,64,202},{24,64,202},{24,44,202},{19,61,163},{19,46,3},{19,46,3},{18,36,19},{6,46,163},{9,38,1},{29,55,1},
+{29,55,1},{29,55,1},{28,42,1},{49,0,162},{20,46,0},{20,46,0},{11,37,1},{100,0,162},{11,37,1},{63,0,200},{28,57,0},{33,47,1},{18,47,1},{63,0,200},{127,1,200},{18,47,1},{0,42,202},{127,1,200},{0,42,202},{23,0,202},{23,0,202},{23,0,202},{23,0,202},{19,48,1},{19,48,1},{19,48,1},{19,33,1},{9,38,0},{9,38,0},{28,84,402},{28,64,215},{29,53,308},
+{27,51,212},{23,80,724},{23,57,212},{23,51,4},{21,45,308},{5,59,724},{10,46,212},{33,73,201},{32,61,1},{34,52,19},{32,50,35},{62,1,723},{19,60,200},{23,51,3},{5,46,202},{127,0,723},{5,46,202},{28,68,202},{28,68,202},{28,68,202},{28,48,202},{23,65,163},{23,50,3},{23,50,3},{22,40,19},{10,50,163},{13,42,1},{33,59,1},{33,59,1},{33,59,1},{32,46,1},{55,0,162},
+{24,50,0},{24,50,0},{15,41,1},{112,0,162},{15,41,1},{69,0,200},{32,61,0},{37,51,1},{22,51,1},{69,0,200},{127,7,200},{22,51,1},{0,46,202},{127,7,200},{0,46,202},{27,0,202},{27,0,202},{27,0,202},{27,0,202},{23,52,1},{23,52,1},{23,52,1},{23,37,1},{13,42,0},{13,42,0},{32,88,402},{32,68,215},{33,57,308},{31,55,212},{27,84,724},{27,61,212},{27,55,4},
+{25,49,308},{8,64,723},{14,50,212},{37,77,201},{36,64,2},{38,56,19},{36,54,35},{68,0,723},{22,64,201},{27,55,3},{9,50,202},{127,6,723},{9,50,202},{32,72,202},{32,72,202},{32,72,202},{32,52,202},{27,69,163},{27,54,3},{27,54,3},{26,44,19},{14,54,163},{17,46,1},{37,63,1},{37,63,1},{37,63,1},{36,50,1},{61,0,162},{28,54,0},{28,54,0},{19,45,1},{124,0,162},
+{19,45,1},{75,0,200},{36,64,1},{41,55,1},{26,55,1},{75,0,200},{127,13,200},{26,55,1},{0,50,202},{127,13,200},{0,50,202},{31,0,202},{31,0,202},{31,0,202},{31,0,202},{27,56,1},{27,56,1},{27,56,1},{27,41,1},{17,46,0},{17,46,0},{36,92,402},{36,72,215},{37,61,308},{35,59,212},{31,88,724},{30,65,207},{31,59,4},{29,53,308},{12,68,723},{18,54,212},{41,81,201},
+{41,67,2},{42,60,19},{40,58,35},{74,0,723},{28,67,201},{31,59,3},{13,54,202},{127,12,723},{13,54,202},{36,76,202},{36,76,202},{36,76,202},{36,56,202},{31,73,163},{31,58,3},{31,58,3},{30,48,19},{18,58,163},{21,50,1},{41,66,1},{41,66,1},{41,66,1},{40,54,1},{66,1,162},{32,58,0},{32,58,0},{23,49,1},{126,5,162},{23,49,1},{80,1,200},{41,67,1},{45,59,1},
+{30,59,1},{80,1,200},{126,19,200},{30,59,1},{0,54,202},{126,19,200},{0,54,202},{35,0,202},{35,0,202},{35,0,202},{35,0,202},{31,60,1},{31,60,1},{31,60,1},{31,45,1},{21,50,0},{21,50,0},{40,97,400},{40,77,213},{42,64,314},{40,63,213},{35,94,723},{35,70,209},{35,64,6},{33,57,312},{17,72,723},{22,58,216},{45,87,200},{45,72,1},{46,64,21},{44,62,36},{80,1,723},
+{33,71,201},{36,63,4},{16,59,200},{126,19,723},{16,59,200},{40,82,200},{40,82,200},{40,82,200},{40,61,200},{35,78,162},{35,63,4},{35,63,4},{35,53,20},{22,63,163},{26,54,0},{45,72,0},{45,72,0},{45,72,0},{45,58,1},{73,0,162},{37,62,0},{37,62,0},{26,54,0},{126,12,162},{26,54,0},{87,0,200},{45,72,1},{49,64,1},{32,64,1},{87,0,200},{126,26,200},{32,64,1},
+{0,59,200},{126,26,200},{0,59,200},{40,0,200},{40,0,200},{40,0,200},{40,0,200},{35,65,0},{35,65,0},{35,65,0},{35,50,0},{26,54,0},{26,54,0},{44,101,400},{44,81,213},{46,68,314},{44,66,213},{39,98,723},{39,74,209},{39,67,2},{37,61,312},{21,76,723},{26,62,216},{49,91,200},{49,76,1},{50,68,21},{48,66,38},{86,1,723},{37,75,201},{39,67,2},{20,63,200},{126,25,723},
+{20,63,200},{44,86,200},{44,86,200},{44,86,200},{44,64,200},{39,82,162},{39,67,1},{39,67,1},{39,57,20},{25,67,162},{30,58,0},{49,76,0},{49,76,0},{49,76,0},{49,62,1},{79,0,162},{41,65,1},{41,65,1},{30,58,0},{127,17,162},{30,58,0},{93,0,200},{49,76,1},{53,67,1},{38,67,1},{93,0,200},{126,32,200},{38,67,1},{0,63,200},{126,32,200},{0,63,200},{44,0,200},
+{44,0,200},{44,0,200},{44,0,200},{39,69,0},{39,69,0},{39,69,0},{39,54,0},{30,58,0},{30,58,0},{48,105,400},{48,85,213},{50,72,314},{48,70,213},{43,102,723},{43,78,209},{43,71,2},{41,65,315},{25,80,723},{29,66,215},{53,95,200},{53,80,1},{54,72,21},{52,70,38},{92,1,723},{41,79,201},{43,71,2},{25,66,202},{126,31,723},{25,66,202},{48,90,200},{48,90,200},{48,90,200},
+{48,68,200},{43,86,162},{43,71,1},{43,71,1},{43,61,20},{29,71,162},{34,62,0},{53,80,0},{53,80,0},{53,80,0},{53,66,0},{85,0,162},{45,69,1},{45,69,1},{34,62,0},{127,23,162},{34,62,0},{99,0,200},{53,80,1},{57,71,1},{42,71,1},{99,0,200},{126,38,200},{42,71,1},{0,66,202},{126,38,200},{0,66,202},{48,0,200},{48,0,200},{48,0,200},{48,0,200},{43,73,0},
+{43,73,0},{43,73,0},{43,58,0},{34,62,0},{34,62,0},{52,109,400},{52,89,213},{54,76,314},{52,74,213},{47,106,723},{47,82,209},{47,75,2},{45,69,315},{29,84,723},{33,70,215},{57,99,200},{57,84,1},{58,76,21},{56,74,38},{98,1,723},{45,83,201},{47,75,2},{29,70,202},{126,37,723},{29,70,202},{52,94,200},{52,94,200},{52,94,200},{52,72,200},{47,90,162},{47,75,1},{47,75,1},
+{46,65,21},{33,75,162},{38,65,2},{57,84,0},{57,84,0},{57,84,0},{57,70,0},{91,0,162},{49,73,1},{49,73,1},{39,65,1},{127,29,162},{39,65,1},{105,0,200},{57,84,1},{61,75,1},{46,75,1},{105,0,200},{126,44,200},{46,75,1},{0,70,202},{126,44,200},{0,70,202},{52,0,200},{52,0,200},{52,0,200},{52,0,200},{47,77,0},{47,77,0},{47,77,0},{47,62,0},{38,65,1},
+{38,65,1},{57,113,402},{57,93,215},{58,82,315},{56,79,215},{52,109,724},{51,86,207},{52,80,2},{51,73,314},{33,89,723},{38,75,213},{62,102,201},{62,88,2},{62,81,21},{60,79,38},{105,0,723},{49,88,201},{52,80,1},{33,75,200},{126,44,723},{33,75,200},{57,97,202},{57,97,202},{57,97,202},{56,77,202},{52,94,163},{52,79,2},{52,79,2},{51,69,21},{38,79,162},{42,70,1},{62,87,1},
+{62,87,1},{62,87,1},{62,74,1},{98,0,162},{54,77,1},{54,77,1},{43,70,0},{127,36,162},{43,70,0},{112,0,200},{62,88,1},{65,80,0},{50,80,0},{112,0,200},{127,50,200},{50,80,0},{0,75,200},{127,50,200},{0,75,200},{56,0,202},{56,0,202},{56,0,202},{56,0,202},{52,80,1},{52,80,1},{52,80,1},{51,66,1},{43,69,1},{43,69,1},{61,117,402},{61,97,215},{62,86,315},
+{60,83,215},{56,113,724},{55,90,207},{56,84,2},{55,77,314},{37,93,723},{42,79,213},{65,107,200},{65,93,0},{66,84,20},{64,82,36},{111,0,723},{53,92,201},{56,84,1},{37,79,200},{127,49,723},{37,79,200},{61,101,202},{61,101,202},{61,101,202},{60,81,202},{56,98,163},{56,83,2},{56,83,2},{55,73,21},{42,83,162},{46,74,1},{65,92,0},{65,92,0},{65,92,0},{65,78,1},{104,0,162},
+{58,81,1},{58,81,1},{47,74,0},{127,42,162},{47,74,0},{118,0,200},{65,93,0},{69,84,0},{54,84,0},{118,0,200},{127,56,200},{54,84,0},{0,79,200},{127,56,200},{0,79,200},{60,0,202},{60,0,202},{60,0,202},{60,0,202},{56,84,1},{56,84,1},{56,84,1},{55,70,1},{47,73,1},{47,73,1},{64,122,400},{64,100,216},{65,89,312},{64,87,213},{60,117,724},{59,94,207},{60,88,2},
+{59,81,314},{41,97,723},{46,83,213},{69,111,200},{69,97,0},{70,88,20},{68,86,36},{117,0,723},{57,96,201},{60,88,1},{41,83,200},{127,55,723},{41,83,200},{64,106,200},{64,106,200},{64,106,200},{64,85,200},{60,102,163},{60,87,2},{60,87,2},{59,77,21},{46,87,162},{50,78,1},{69,96,0},{69,96,0},{69,96,0},{69,82,1},{110,0,162},{62,85,1},{62,85,1},{51,78,0},{126,48,162},
+{51,78,0},{124,0,200},{69,97,0},{73,88,0},{58,88,0},{124,0,200},{127,62,200},{58,88,0},{0,83,200},{127,62,200},{0,83,200},{64,0,200},{64,0,200},{64,0,200},{64,0,200},{60,88,1},{60,88,1},{60,88,1},{59,74,1},{51,77,1},{51,77,1},{68,126,400},{68,104,216},{69,93,312},{68,91,213},{63,123,724},{63,98,207},{63,92,6},{63,85,314},{45,101,723},{50,87,213},{73,115,200},
+{73,101,0},{74,92,20},{72,90,36},{123,0,723},{61,100,201},{64,91,4},{45,87,200},{127,61,723},{45,87,200},{68,110,200},{68,110,200},{68,110,200},{68,89,200},{63,108,163},{64,91,4},{64,91,4},{63,81,21},{50,91,162},{54,82,1},{73,100,0},{73,100,0},{73,100,0},{73,86,1},{115,1,162},{65,90,0},{65,90,0},{55,82,0},{126,54,162},{55,82,0},{127,5,200},{73,101,0},{77,92,0},
+{62,92,0},{127,5,200},{126,68,200},{62,92,0},{0,87,200},{126,68,200},{0,87,200},{68,0,200},{68,0,200},{68,0,200},{68,0,200},{63,94,1},{63,94,1},{63,94,1},{63,78,1},{55,81,1},{55,81,1},{73,126,410},{73,109,212},{74,98,308},{72,96,212},{68,126,724},{68,102,212},{68,96,4},{66,90,308},{50,105,723},{54,91,215},{78,119,201},{77,106,1},{79,97,19},{77,95,35},{127,5,723},
+{64,105,200},{68,96,3},{50,91,202},{126,68,723},{50,91,202},{73,114,202},{73,114,202},{73,114,202},{73,93,202},{68,110,163},{68,95,3},{68,95,3},{67,85,19},{54,96,162},{59,86,2},{78,104,1},{78,104,1},{78,104,1},{77,91,1},{122,0,162},{69,95,0},{69,95,0},{60,86,1},{126,61,162},{60,86,1},{127,19,200},{77,106,0},{82,96,1},{67,96,1},{127,19,200},{127,74,200},{67,96,1},
+{0,91,202},{127,74,200},{0,91,202},{72,0,202},{72,0,202},{72,0,202},{72,0,202},{68,97,1},{68,97,1},{68,97,1},{68,82,1},{59,86,1},{59,86,1},{77,127,434},{77,113,212},{78,102,308},{76,100,212},{72,127,732},{72,106,212},{72,100,4},{70,94,308},{54,109,723},{58,95,215},{82,123,201},{81,110,1},{83,101,19},{81,99,35},{127,18,723},{68,109,200},{72,100,3},{54,95,202},{126,74,723},
+{54,95,202},{77,118,202},{77,118,202},{77,118,202},{77,97,202},{72,114,163},{72,99,3},{72,99,3},{71,89,19},{58,100,162},{63,90,2},{82,108,1},{82,108,1},{82,108,1},{81,95,1},{127,2,162},{73,99,0},{73,99,0},{64,90,1},{127,66,162},{64,90,1},{127,31,200},{81,110,0},{86,100,1},{71,100,1},{127,31,200},{127,80,200},{71,100,1},{0,95,202},{127,80,200},{0,95,202},{76,0,202},
+{76,0,202},{76,0,202},{76,0,202},{72,101,1},{72,101,1},{72,101,1},{72,86,1},{63,90,1},{63,90,1},{82,127,468},{81,117,212},{82,106,308},{80,104,212},{77,127,753},{76,110,212},{76,104,4},{74,98,308},{58,113,723},{62,99,215},{86,127,201},{85,114,1},{87,105,19},{85,103,35},{127,30,723},{72,113,200},{76,104,3},{58,99,202},{127,79,723},{58,99,202},{81,122,202},{81,122,202},{81,122,202},
+{81,101,202},{76,118,163},{76,103,3},{76,103,3},{75,93,19},{62,104,162},{66,95,1},{86,112,1},{86,112,1},{86,112,1},{85,99,1},{127,14,162},{77,103,0},{77,103,0},{68,94,1},{127,72,162},{68,94,1},{127,43,200},{85,114,0},{90,104,1},{75,104,1},{127,43,200},{127,86,200},{75,104,1},{0,99,202},{127,86,200},{0,99,202},{80,0,202},{80,0,202},{80,0,202},{80,0,202},{76,105,1},
+{76,105,1},{76,105,1},{76,90,1},{66,95,0},{66,95,0},{86,127,546},{85,121,212},{86,110,308},{84,108,212},{82,127,788},{80,114,212},{80,108,4},{78,102,308},{62,117,723},{67,103,212},{91,127,211},{89,118,1},{91,109,19},{89,107,35},{127,42,723},{76,117,200},{80,108,3},{62,103,202},{127,85,723},{62,103,202},{85,126,202},{85,126,202},{85,126,202},{85,105,202},{80,122,163},{80,107,3},{80,107,3},
+{79,97,19},{67,107,163},{70,99,1},{90,116,1},{90,116,1},{90,116,1},{89,103,1},{127,27,162},{81,107,0},{81,107,0},{72,98,1},{126,78,162},{72,98,1},{127,56,200},{89,118,0},{94,108,1},{79,108,1},{127,56,200},{127,92,200},{79,108,1},{0,103,202},{127,92,200},{0,103,202},{84,0,202},{84,0,202},{84,0,202},{84,0,202},{80,109,1},{80,109,1},{80,109,1},{80,94,1},{70,99,0},
+{70,99,0},{91,127,632},{89,125,216},{90,114,312},{89,112,213},{88,127,852},{83,120,208},{84,113,2},{82,106,312},{66,121,724},{71,107,216},{97,127,244},{94,122,0},{95,113,20},{93,111,36},{127,56,723},{81,121,200},{84,113,2},{65,108,200},{127,92,723},{65,108,200},{89,127,209},{89,127,209},{89,127,209},{89,110,200},{84,126,164},{84,113,2},{84,113,2},{84,102,20},{71,112,163},{75,103,0},{94,121,0},
+{94,121,0},{94,121,0},{94,107,1},{127,40,162},{86,111,0},{86,111,0},{75,103,0},{126,85,162},{75,103,0},{127,69,200},{94,122,0},{98,113,0},{82,113,0},{127,69,200},{126,99,200},{82,113,0},{0,108,200},{126,99,200},{0,108,200},{89,0,200},{89,0,200},{89,0,200},{89,0,200},{84,114,0},{84,114,0},{84,114,0},{84,99,0},{75,103,0},{75,103,0},{97,127,728},{94,127,228},{94,118,312},
+{93,116,213},{94,127,932},{87,124,208},{88,117,2},{86,110,312},{70,125,724},{75,111,216},{101,127,286},{98,126,0},{99,117,20},{97,115,36},{127,67,723},{85,125,200},{88,117,2},{69,112,200},{127,98,723},{69,112,200},{94,127,224},{94,127,224},{94,127,224},{93,114,200},{89,127,174},{88,117,2},{88,117,2},{88,106,20},{75,116,163},{79,107,0},{98,125,0},{98,125,0},{98,125,0},{98,111,1},{127,53,162},
+{90,115,0},{90,115,0},{79,107,0},{126,91,162},{79,107,0},{127,81,200},{98,126,0},{102,117,0},{86,117,0},{127,81,200},{126,105,200},{86,117,0},{0,112,200},{126,105,200},{0,112,200},{93,0,200},{93,0,200},{93,0,200},{93,0,200},{88,118,0},{88,118,0},{88,118,0},{88,103,0},{79,107,0},{79,107,0},{100,127,864},{99,127,304},{98,122,312},{97,120,213},{98,127,1043},{92,127,211},{92,121,2},
+{90,114,312},{76,127,732},{79,115,216},{106,127,336},{102,127,20},{103,121,20},{101,119,36},{127,79,723},{90,127,208},{92,121,2},{73,116,200},{127,104,723},{73,116,200},{98,127,257},{98,127,257},{98,127,257},{97,118,200},{94,127,196},{92,121,2},{92,121,2},{92,110,20},{79,120,163},{83,111,0},{102,126,4},{102,126,4},{102,126,4},{102,115,1},{127,64,162},{94,119,0},{94,119,0},{83,111,0},{126,97,162},
+{83,111,0},{127,93,200},{105,127,8},{106,121,0},{90,121,0},{127,93,200},{126,111,200},{90,121,0},{0,116,200},{126,111,200},{0,116,200},{97,0,200},{97,0,200},{97,0,200},{97,0,200},{92,122,0},{92,122,0},{92,122,0},{92,107,0},{83,111,0},{83,111,0},{106,127,992},{103,127,440},{102,126,312},{101,124,213},{103,127,1144},{96,127,272},{96,125,2},{94,118,312},{84,127,804},{83,119,216},{111,127,398},
+{108,127,84},{107,125,20},{105,123,36},{127,92,723},{99,127,248},{96,125,2},{77,120,200},{126,110,723},{77,120,200},{103,127,296},{103,127,296},{103,127,296},{101,122,200},{100,127,244},{96,125,2},{96,125,2},{96,114,20},{83,124,163},{87,115,0},{107,127,13},{107,127,13},{107,127,13},{106,119,1},{127,76,162},{98,123,0},{98,123,0},{87,115,0},{126,103,162},{87,115,0},{127,105,200},{111,127,45},{110,125,0},
+{94,125,0},{127,105,200},{126,117,200},{94,125,0},{0,120,200},{126,117,200},{0,120,200},{101,0,200},{101,0,200},{101,0,200},{101,0,200},{96,126,0},{96,126,0},{96,126,0},{96,111,0},{87,115,0},{87,115,0},{111,127,919},{108,127,515},{107,127,362},{105,127,203},{109,127,1027},{102,127,267},{101,127,9},{99,122,198},{93,127,703},{88,123,121},{115,127,338},{113,127,122},{112,127,34},{110,126,14},{127,102,546},
+{108,127,202},{102,127,2},{85,123,113},{127,115,546},{85,123,113},{107,127,362},{107,127,362},{107,127,362},{106,126,202},{104,127,309},{101,127,9},{101,127,9},{100,118,19},{89,127,168},{91,120,1},{112,127,34},{112,127,34},{112,127,34},{110,124,1},{127,90,162},{104,126,2},{104,126,2},{93,119,1},{127,109,162},{93,119,1},{127,115,113},{119,127,41},{116,127,0},{104,127,0},{127,115,113},{127,121,113},{104,127,0},
+{0,123,113},{127,121,113},{0,123,113},{105,0,202},{105,0,202},{105,0,202},{105,0,202},{101,127,5},{101,127,5},{101,127,5},{101,115,1},{91,120,0},{91,120,0},{114,127,751},{111,127,514},{111,127,414},{110,127,218},{114,127,814},{108,127,219},{107,127,65},{103,124,71},{102,127,539},{94,125,27},{120,127,206},{117,127,110},{117,127,61},{115,127,2},{127,110,333},{113,127,121},{110,127,20},{93,125,25},{126,119,333},
+{93,125,25},{111,127,414},{111,127,414},{111,127,414},{110,127,218},{109,127,371},{107,127,65},{107,127,65},{104,122,19},{96,127,203},{95,124,1},{117,127,61},{117,127,61},{117,127,61},{115,127,2},{127,102,162},{110,127,20},{110,127,20},{97,123,1},{127,115,162},{97,123,1},{127,121,25},{122,127,9},{122,127,0},{116,127,0},{127,121,25},{127,124,25},{116,127,0},{0,125,25},{127,124,25},{0,125,25},{109,0,202},
+{109,0,202},{109,0,202},{109,0,202},{106,127,18},{106,127,18},{106,127,18},{105,119,1},{95,124,0},{95,124,0},{117,127,636},{117,127,516},{117,127,467},{115,127,282},{117,127,643},{111,127,241},{111,127,141},{108,126,18},{108,127,434},{99,127,2},{123,127,131},{121,127,105},{121,127,89},{119,127,26},{127,118,193},{119,127,86},{116,127,53},{101,127,0},{126,123,193},{101,127,0},{117,127,467},{117,127,467},{117,127,467},
+{115,127,282},{114,127,410},{111,127,141},{111,127,141},{108,126,14},{105,127,254},{99,127,2},{121,127,89},{121,127,89},{121,127,89},{119,127,26},{127,113,145},{116,127,53},{116,127,53},{101,127,0},{127,120,145},{101,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{113,0,202},{113,0,202},{113,0,202},{113,0,202},{111,127,41},
+{111,127,41},{111,127,41},{109,123,1},{101,126,2},{101,126,2},{120,127,412},{120,127,356},{120,127,331},{118,127,238},{120,127,387},{116,127,183},{116,127,134},{113,127,1},{113,127,262},{108,127,18},{124,127,41},{124,127,29},{124,127,25},{122,127,10},{127,122,54},{122,127,22},{122,127,13},{113,127,0},{126,125,54},{113,127,0},{120,127,331},{120,127,331},{120,127,331},{118,127,238},{117,127,266},{116,127,134},{116,127,134},
+{113,127,1},{110,127,161},{108,127,18},{124,127,25},{124,127,25},{124,127,25},{122,127,10},{127,119,41},{122,127,13},{122,127,13},{113,127,0},{127,123,41},{113,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{117,0,202},{117,0,202},{117,0,202},{117,0,202},{114,127,73},{114,127,73},{114,127,73},{113,127,1},{108,127,18},
+{108,127,18},{0,41,421},{0,28,41},{0,21,1},{0,18,157},{0,27,925},{0,19,569},{0,17,285},{0,12,701},{0,12,1005},{0,10,751},{0,41,421},{0,28,41},{0,21,1},{0,18,157},{13,1,925},{0,19,569},{0,17,285},{0,12,701},{27,0,925},{0,12,701},{0,20,0},{0,20,0},{0,20,0},{0,10,0},{0,9,85},{0,8,29},{0,8,29},{0,4,50},{0,5,94},{0,4,59},{0,20,0},
+{0,20,0},{0,20,0},{0,10,0},{5,0,85},{0,8,29},{0,8,29},{0,4,50},{9,0,85},{0,4,50},{20,0,421},{0,28,41},{0,21,1},{0,18,157},{20,0,421},{41,0,421},{0,18,157},{0,14,421},{41,0,421},{0,14,421},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,53,421},{0,37,9},{1,25,17},
+{0,21,113},{0,36,1261},{0,22,677},{0,20,305},{0,15,881},{0,17,1383},{0,13,971},{0,53,421},{0,37,9},{1,25,13},{0,21,113},{18,0,1261},{0,22,677},{0,20,305},{0,15,881},{36,0,1261},{0,15,881},{0,32,0},{0,32,0},{0,32,0},{0,15,1},{0,15,221},{0,11,85},{0,11,85},{0,7,130},{0,8,246},{0,7,155},{0,32,0},{0,32,0},{0,32,0},{0,15,1},{8,0,221},
+{0,11,85},{0,11,85},{0,7,130},{15,0,221},{0,7,130},{26,0,421},{0,37,9},{3,25,1},{0,21,113},{26,0,421},{53,0,421},{0,21,113},{0,18,421},{53,0,421},{0,18,421},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{1,63,426},{1,42,6},{2,29,66},{0,27,74},{0,45,1514},{0,28,706},{0,25,258},
+{0,18,974},{0,19,1706},{0,18,1118},{2,61,422},{1,42,2},{3,30,38},{0,27,74},{22,0,1514},{0,28,706},{0,25,258},{0,18,974},{45,0,1514},{0,18,974},{1,42,5},{1,42,5},{1,42,5},{1,21,5},{0,24,338},{0,19,90},{0,19,90},{0,10,181},{0,11,389},{0,10,230},{2,40,1},{2,40,1},{2,40,1},{1,21,1},{12,0,338},{0,19,90},{0,19,90},{0,10,181},{24,0,338},
+{0,10,181},{32,0,421},{1,42,1},{7,29,1},{0,27,65},{32,0,421},{65,0,421},{0,27,65},{0,22,421},{65,0,421},{0,22,421},{1,0,5},{1,0,5},{1,0,5},{1,0,5},{0,4,0},{0,4,0},{0,4,0},{0,2,0},{0,2,1},{0,2,1},{3,71,482},{3,48,62},{4,34,158},{2,31,117},{0,58,1514},{0,37,594},{0,31,122},{0,23,881},{0,25,1818},{0,21,1086},{6,65,422},
+{5,46,2},{7,34,38},{4,31,74},{28,0,1514},{0,37,594},{0,31,122},{0,23,881},{58,0,1514},{0,23,881},{3,50,61},{3,50,61},{3,50,61},{3,26,61},{0,36,338},{0,25,34},{0,25,34},{0,15,130},{0,17,461},{0,14,226},{6,44,1},{6,44,1},{6,44,1},{5,25,1},{18,0,338},{0,25,34},{0,25,34},{0,15,130},{36,0,338},{0,15,130},{38,0,421},{5,46,1},{11,33,1},
+{0,32,40},{38,0,421},{77,0,421},{0,32,40},{0,26,421},{77,0,421},{0,26,421},{3,0,61},{3,0,61},{3,0,61},{3,0,61},{0,16,0},{0,16,0},{0,16,0},{0,8,0},{0,5,25},{0,5,25},{5,80,621},{5,55,213},{7,40,357},{5,35,241},{0,71,1514},{0,43,497},{0,36,29},{0,27,761},{0,31,1989},{0,26,1079},{10,70,421},{10,50,1},{11,38,41},{8,35,77},{35,0,1514},
+{0,43,497},{0,36,29},{0,27,761},{71,0,1514},{0,27,761},{5,60,200},{5,60,200},{5,60,200},{5,32,200},{0,50,338},{0,34,4},{0,34,4},{0,21,80},{0,22,552},{0,19,244},{10,49,0},{10,49,0},{10,49,0},{10,29,0},{24,1,338},{0,34,4},{0,34,4},{0,21,80},{50,0,338},{0,21,80},{45,0,421},{10,50,1},{16,37,1},{0,36,13},{45,0,421},{91,0,421},{0,36,13},
+{0,30,421},{91,0,421},{0,30,421},{5,0,200},{5,0,200},{5,0,200},{5,0,200},{0,30,0},{0,30,0},{0,30,0},{0,15,0},{0,11,74},{0,11,74},{7,89,813},{7,60,421},{10,43,617},{7,40,417},{0,83,1514},{0,51,446},{0,41,3},{0,32,686},{0,37,2165},{0,31,1101},{14,74,421},{14,54,1},{15,42,41},{12,39,77},{41,0,1514},{0,51,446},{0,41,3},{0,32,686},{83,0,1514},
+{0,32,686},{7,67,392},{7,67,392},{7,67,392},{7,37,392},{0,62,338},{0,41,2},{0,41,2},{0,24,52},{0,28,680},{0,23,298},{14,53,0},{14,53,0},{14,53,0},{14,33,0},{30,1,338},{2,39,0},{2,39,0},{0,24,52},{62,0,338},{0,24,52},{51,0,421},{14,54,1},{20,41,1},{0,41,2},{51,0,421},{103,0,421},{0,41,2},{0,34,421},{103,0,421},{0,34,421},{7,0,392},
+{7,0,392},{7,0,392},{7,0,392},{0,42,0},{0,42,0},{0,42,0},{0,21,0},{0,17,146},{0,17,146},{11,93,842},{11,65,453},{12,48,654},{11,44,446},{4,87,1515},{3,54,435},{4,45,4},{0,36,654},{0,43,2030},{0,35,882},{18,78,421},{18,58,1},{19,46,41},{16,43,77},{47,0,1514},{0,57,422},{4,45,3},{0,36,605},{95,0,1514},{0,36,605},{11,71,421},{11,71,421},{11,71,421},
+{11,41,421},{4,66,339},{4,45,3},{4,45,3},{2,29,41},{0,34,581},{0,28,161},{18,57,0},{18,57,0},{18,57,0},{18,37,0},{36,1,338},{6,43,0},{6,43,0},{0,30,20},{74,0,338},{0,30,20},{57,0,421},{18,58,1},{24,45,1},{2,45,1},{57,0,421},{115,0,421},{2,45,1},{0,38,421},{115,0,421},{0,38,421},{11,0,421},{11,0,421},{11,0,421},{11,0,421},{4,46,1},
+{4,46,1},{4,46,1},{3,25,1},{0,22,85},{0,22,85},{15,97,842},{16,67,453},{16,52,654},{15,48,446},{8,91,1515},{7,58,435},{8,49,4},{4,40,654},{0,51,1850},{0,40,682},{22,82,421},{22,62,1},{23,50,41},{20,47,77},{53,0,1514},{3,61,421},{8,49,3},{0,41,554},{107,0,1514},{0,41,554},{15,75,421},{15,75,421},{15,75,421},{15,45,421},{8,70,339},{8,49,3},{8,49,3},
+{6,33,41},{0,40,477},{0,34,57},{22,61,0},{22,61,0},{22,61,0},{22,41,0},{42,1,338},{10,47,0},{10,47,0},{0,34,8},{86,0,338},{0,34,8},{63,0,421},{22,62,1},{28,49,1},{6,49,1},{63,0,421},{127,0,421},{6,49,1},{0,42,421},{127,0,421},{0,42,421},{15,0,421},{15,0,421},{15,0,421},{15,0,421},{8,50,1},{8,50,1},{8,50,1},{7,29,1},{0,31,29},
+{0,31,29},{19,102,842},{20,71,451},{21,57,650},{19,52,446},{12,97,1514},{12,63,437},{12,54,2},{9,45,650},{0,57,1710},{0,45,530},{27,86,422},{26,67,1},{28,55,38},{25,52,74},{59,1,1514},{7,66,421},{12,54,2},{0,45,494},{121,0,1514},{0,45,494},{19,81,421},{19,81,421},{19,81,421},{19,49,422},{12,75,338},{12,54,2},{12,54,2},{11,38,38},{0,49,402},{0,39,6},{27,64,1},
+{27,64,1},{27,64,1},{26,46,1},{49,0,338},{14,52,0},{14,52,0},{1,39,1},{100,0,338},{1,39,1},{69,0,421},{26,67,0},{32,54,1},{10,54,0},{69,0,421},{127,7,421},{10,54,0},{0,47,421},{127,7,421},{0,47,421},{19,0,421},{19,0,421},{19,0,421},{19,0,421},{12,56,0},{12,56,0},{12,56,0},{12,33,1},{0,38,5},{0,38,5},{23,106,842},{24,75,451},{25,61,650},
+{23,56,446},{16,101,1514},{15,67,434},{16,58,2},{13,49,650},{0,64,1617},{0,50,462},{31,90,422},{30,71,1},{32,59,38},{29,56,74},{65,0,1514},{11,70,421},{16,58,2},{0,50,461},{127,3,1514},{0,50,461},{23,85,421},{23,85,421},{23,85,421},{23,53,422},{16,79,338},{16,58,2},{16,58,2},{15,42,38},{0,54,349},{3,43,2},{31,68,1},{31,68,1},{31,68,1},{30,50,1},{55,0,338},
+{18,56,0},{18,56,0},{5,43,1},{112,0,338},{5,43,1},{75,0,421},{30,71,0},{36,58,1},{14,58,0},{75,0,421},{127,13,421},{14,58,0},{0,51,421},{127,13,421},{0,51,421},{23,0,421},{23,0,421},{23,0,421},{23,0,421},{16,60,0},{16,60,0},{16,60,0},{16,37,1},{3,43,1},{3,43,1},{27,110,842},{28,79,451},{30,64,650},{27,60,446},{20,105,1514},{19,71,434},{20,62,2},
+{17,53,650},{0,70,1553},{1,54,450},{35,94,422},{34,75,1},{36,63,38},{33,60,74},{71,0,1514},{15,74,421},{20,62,2},{0,54,441},{127,9,1514},{0,54,441},{27,89,421},{27,89,421},{27,89,421},{27,57,422},{20,83,338},{20,62,2},{20,62,2},{19,46,38},{0,61,338},{7,47,2},{35,72,1},{35,72,1},{35,72,1},{34,54,1},{61,0,338},{22,60,0},{22,60,0},{9,47,1},{124,0,338},
+{9,47,1},{81,0,421},{34,75,0},{40,62,1},{18,62,0},{81,0,421},{126,19,421},{18,62,0},{0,55,421},{126,19,421},{0,55,421},{27,0,421},{27,0,421},{27,0,421},{27,0,421},{20,64,0},{20,64,0},{20,64,0},{20,41,1},{7,47,1},{7,47,1},{31,114,842},{32,83,451},{33,67,650},{31,64,446},{24,109,1514},{23,75,434},{24,65,6},{21,57,650},{0,76,1521},{5,58,450},{39,98,422},
+{38,79,1},{41,66,42},{37,63,81},{77,0,1514},{19,78,421},{25,65,2},{0,59,425},{127,15,1514},{0,59,425},{31,93,421},{31,93,421},{31,93,421},{31,61,422},{24,87,338},{24,64,5},{24,64,5},{23,50,38},{5,64,339},{11,51,2},{39,76,1},{39,76,1},{39,76,1},{38,58,1},{66,1,338},{25,64,1},{25,64,1},{13,51,1},{126,5,338},{13,51,1},{87,0,421},{38,79,0},{45,65,1},
+{23,65,1},{87,0,421},{126,25,421},{23,65,1},{0,59,421},{126,25,421},{0,59,421},{31,0,421},{31,0,421},{31,0,421},{31,0,421},{24,67,0},{24,67,0},{24,67,0},{24,45,1},{11,51,1},{11,51,1},{36,118,842},{37,88,453},{38,72,646},{35,69,445},{29,112,1515},{27,79,438},{29,69,6},{25,61,654},{3,82,1515},{10,63,450},{43,103,421},{43,83,2},{45,70,41},{42,67,76},{83,1,1514},
+{24,82,421},{29,69,5},{2,63,421},{126,22,1514},{2,63,421},{36,96,421},{36,96,421},{36,96,421},{36,65,421},{29,91,339},{29,69,2},{29,69,2},{27,54,41},{10,68,339},{15,56,1},{43,82,0},{43,82,0},{43,82,0},{43,62,0},{73,0,338},{31,67,1},{31,67,1},{16,56,0},{126,12,338},{16,56,0},{93,0,421},{42,84,0},{49,70,0},{26,70,0},{93,0,421},{126,32,421},{26,70,0},
+{0,63,421},{126,32,421},{0,63,421},{36,0,421},{36,0,421},{36,0,421},{36,0,421},{29,71,1},{29,71,1},{29,71,1},{28,50,1},{15,56,1},{15,56,1},{40,122,842},{41,92,453},{42,76,646},{39,73,445},{33,116,1515},{31,83,438},{33,73,6},{30,64,650},{7,86,1515},{15,66,451},{47,107,421},{47,87,2},{49,74,41},{46,71,76},{89,1,1514},{28,86,421},{33,73,5},{5,67,421},{126,28,1514},
+{5,67,421},{40,100,421},{40,100,421},{40,100,421},{40,69,421},{33,95,339},{33,73,2},{33,73,2},{31,58,41},{14,72,339},{19,60,1},{47,86,0},{47,86,0},{47,86,0},{47,66,0},{79,0,338},{35,71,1},{35,71,1},{20,60,0},{127,17,338},{20,60,0},{99,0,421},{46,88,0},{53,74,0},{30,74,0},{99,0,421},{126,38,421},{30,74,0},{0,67,421},{126,38,421},{0,67,421},{40,0,421},
+{40,0,421},{40,0,421},{40,0,421},{33,75,1},{33,75,1},{33,75,1},{32,54,1},{19,60,1},{19,60,1},{44,126,842},{45,96,453},{46,80,646},{43,77,445},{37,120,1515},{35,87,438},{37,77,6},{34,68,650},{11,90,1515},{19,70,451},{51,111,421},{51,91,2},{53,78,41},{50,75,76},{95,1,1514},{32,90,421},{37,77,5},{9,71,421},{126,34,1514},{9,71,421},{44,104,421},{44,104,421},{44,104,421},
+{44,73,421},{37,99,339},{37,77,2},{37,77,2},{35,62,41},{18,76,339},{23,64,1},{51,90,0},{51,90,0},{51,90,0},{51,70,0},{85,0,338},{39,75,1},{39,75,1},{23,64,1},{127,23,338},{23,64,1},{105,0,421},{50,92,0},{57,78,0},{34,78,0},{105,0,421},{126,44,421},{34,78,0},{0,71,421},{126,44,421},{0,71,421},{44,0,421},{44,0,421},{44,0,421},{44,0,421},{37,79,1},
+{37,79,1},{37,79,1},{36,58,1},{23,64,0},{23,64,0},{48,126,854},{49,100,453},{50,84,646},{47,81,445},{41,124,1515},{39,91,438},{41,81,6},{38,72,650},{15,94,1515},{23,74,451},{55,115,421},{55,95,2},{57,82,41},{54,79,76},{101,1,1514},{36,94,421},{41,81,5},{13,75,421},{126,40,1514},{13,75,421},{48,108,421},{48,108,421},{48,108,421},{48,77,421},{41,103,339},{41,81,2},{41,81,2},
+{40,65,42},{22,80,339},{27,68,1},{55,94,0},{55,94,0},{55,94,0},{55,74,0},{91,0,338},{43,79,1},{43,79,1},{29,67,1},{127,29,338},{29,67,1},{111,0,421},{54,96,0},{61,82,0},{38,82,0},{111,0,421},{127,49,421},{38,82,0},{0,75,421},{127,49,421},{0,75,421},{48,0,421},{48,0,421},{48,0,421},{48,0,421},{41,83,1},{41,83,1},{41,83,1},{40,62,1},{27,68,0},
+{27,68,0},{53,127,886},{53,104,451},{54,88,650},{52,85,446},{46,126,1521},{44,96,434},{45,86,6},{43,77,646},{18,99,1515},{27,78,453},{60,119,422},{59,100,1},{62,87,42},{59,84,75},{108,0,1514},{40,99,421},{46,86,2},{18,79,421},{125,47,1514},{18,79,421},{52,114,421},{52,114,421},{52,114,421},{52,82,421},{45,108,338},{45,85,5},{45,85,5},{45,70,41},{26,85,339},{31,72,2},{60,97,1},
+{60,97,1},{60,97,1},{60,78,1},{98,0,338},{48,83,1},{48,83,1},{33,72,0},{127,36,338},{33,72,0},{118,0,421},{59,100,0},{65,86,1},{44,86,1},{118,0,421},{127,56,421},{44,86,1},{0,79,421},{127,56,421},{0,79,421},{52,0,421},{52,0,421},{52,0,421},{52,0,421},{45,88,0},{45,88,0},{45,88,0},{45,66,0},{31,73,0},{31,73,0},{58,127,926},{57,108,451},{58,92,650},
+{56,89,446},{51,126,1542},{48,100,434},{49,90,6},{47,81,646},{22,103,1515},{31,82,453},{63,125,422},{63,104,1},{64,91,41},{63,88,75},{114,0,1514},{44,103,421},{50,90,2},{22,83,421},{127,52,1514},{22,83,421},{56,118,421},{56,118,421},{56,118,421},{56,86,421},{49,112,338},{49,89,5},{49,89,5},{49,74,41},{30,89,339},{35,76,2},{63,103,1},{63,103,1},{63,103,1},{63,82,2},{104,0,338},
+{52,87,1},{52,87,1},{37,76,0},{127,42,338},{37,76,0},{124,0,421},{63,104,0},{69,90,1},{48,90,1},{124,0,421},{127,62,421},{48,90,1},{0,83,421},{127,62,421},{0,83,421},{56,0,421},{56,0,421},{56,0,421},{56,0,421},{49,92,0},{49,92,0},{49,92,0},{49,70,0},{35,77,0},{35,77,0},{62,127,1010},{61,112,451},{62,96,650},{60,93,446},{56,127,1577},{52,104,434},{53,94,6},
+{51,85,646},{26,107,1515},{35,86,453},{67,126,425},{67,107,1},{68,95,41},{65,92,77},{120,0,1514},{48,107,421},{54,94,2},{26,87,421},{127,58,1514},{26,87,421},{60,122,421},{60,122,421},{60,122,421},{60,90,421},{53,116,338},{53,93,5},{53,93,5},{53,78,41},{34,93,339},{39,80,2},{67,106,0},{67,106,0},{67,106,0},{67,86,0},{110,0,338},{56,91,1},{56,91,1},{41,80,0},{126,48,338},
+{41,80,0},{127,5,421},{67,107,1},{73,94,1},{52,94,1},{127,5,421},{126,68,421},{52,94,1},{0,87,421},{126,68,421},{0,87,421},{60,0,421},{60,0,421},{60,0,421},{60,0,421},{53,96,0},{53,96,0},{53,96,0},{53,74,0},{39,81,0},{39,81,0},{68,127,1098},{64,117,450},{65,101,654},{64,97,446},{61,127,1626},{56,108,434},{57,98,6},{55,89,646},{30,111,1515},{39,90,453},{72,127,437},
+{71,111,1},{72,99,41},{69,96,77},{126,0,1514},{52,111,421},{58,98,2},{30,91,421},{126,64,1514},{30,91,421},{64,125,421},{64,125,421},{64,125,421},{64,94,421},{57,120,338},{57,97,5},{57,97,5},{57,82,41},{38,97,339},{43,84,2},{71,110,0},{71,110,0},{71,110,0},{71,90,0},{115,1,338},{60,95,1},{60,95,1},{45,84,0},{126,54,338},{45,84,0},{127,18,421},{71,111,1},{77,98,1},
+{56,98,1},{127,18,421},{126,74,421},{56,98,1},{0,91,421},{126,74,421},{0,91,421},{64,0,421},{64,0,421},{64,0,421},{64,0,421},{57,100,0},{57,100,0},{57,100,0},{57,78,0},{43,85,0},{43,85,0},{71,127,1214},{69,122,450},{70,106,650},{68,101,446},{66,127,1722},{60,112,438},{62,102,6},{59,93,650},{36,115,1515},{44,95,451},{78,126,470},{75,116,2},{77,104,38},{74,101,74},{127,11,1514},
+{57,115,421},{62,102,5},{34,96,421},{126,71,1514},{34,96,421},{68,127,425},{68,127,425},{68,127,425},{68,98,422},{62,124,339},{62,102,2},{62,102,2},{61,86,42},{43,101,339},{48,89,1},{76,114,1},{76,114,1},{76,114,1},{75,95,1},{122,0,338},{63,101,1},{63,101,1},{50,88,1},{126,61,338},{50,88,1},{127,31,421},{75,116,1},{81,103,1},{59,103,0},{127,31,421},{127,80,421},{59,103,0},
+{0,96,421},{127,80,421},{0,96,421},{68,0,421},{68,0,421},{68,0,421},{68,0,421},{62,104,1},{62,104,1},{62,104,1},{62,82,1},{48,89,0},{48,89,0},{77,127,1334},{73,126,450},{74,110,650},{72,105,446},{71,127,1805},{65,116,437},{65,107,2},{63,97,650},{40,119,1515},{48,99,451},{82,127,506},{79,120,2},{81,108,38},{78,105,74},{127,24,1514},{61,119,421},{65,107,2},{38,100,421},{125,77,1514},
+{38,100,421},{73,126,441},{73,126,441},{73,126,441},{72,102,422},{65,127,340},{65,107,2},{65,107,2},{64,91,38},{47,105,339},{52,93,1},{80,118,1},{80,118,1},{80,118,1},{79,99,1},{127,2,338},{67,105,0},{67,105,0},{54,92,1},{127,66,338},{54,92,1},{127,43,421},{79,120,1},{85,107,1},{63,107,0},{127,43,421},{127,86,421},{63,107,0},{0,100,421},{127,86,421},{0,100,421},{72,0,421},
+{72,0,421},{72,0,421},{72,0,421},{65,109,0},{65,109,0},{65,109,0},{65,86,1},{52,93,0},{52,93,0},{82,127,1470},{77,127,462},{78,114,650},{76,109,446},{77,127,1917},{69,120,437},{69,111,2},{66,102,650},{44,123,1515},{52,103,451},{88,127,562},{83,124,2},{85,112,38},{82,109,74},{127,36,1514},{65,122,421},{69,111,2},{42,104,421},{127,82,1514},{42,104,421},{77,127,461},{77,127,461},{77,127,461},
+{76,106,422},{71,127,356},{69,111,2},{69,111,2},{68,95,38},{51,109,339},{56,97,1},{84,122,1},{84,122,1},{84,122,1},{83,103,1},{127,14,338},{71,109,0},{71,109,0},{58,96,1},{127,72,338},{58,96,1},{127,56,421},{83,124,1},{89,111,1},{67,111,0},{127,56,421},{127,92,421},{67,111,0},{0,104,421},{127,92,421},{0,104,421},{76,0,421},{76,0,421},{76,0,421},{76,0,421},{69,113,0},
+{69,113,0},{69,113,0},{69,90,1},{56,97,0},{56,97,0},{85,127,1634},{82,127,530},{82,118,650},{80,113,446},{82,127,2030},{73,124,437},{73,115,2},{70,106,650},{48,127,1515},{56,107,451},{92,127,646},{87,127,6},{89,116,38},{86,113,74},{127,48,1514},{69,126,421},{73,115,2},{46,108,421},{127,88,1514},{46,108,421},{82,127,494},{82,127,494},{82,127,494},{80,110,422},{75,127,386},{73,115,2},{73,115,2},
+{72,99,38},{55,113,339},{60,101,1},{88,126,1},{88,126,1},{88,126,1},{87,107,1},{127,27,338},{75,113,0},{75,113,0},{62,100,1},{126,78,338},{62,100,1},{127,67,421},{89,126,5},{93,115,1},{71,115,0},{127,67,421},{127,98,421},{71,115,0},{0,108,421},{127,98,421},{0,108,421},{80,0,421},{80,0,421},{80,0,421},{80,0,421},{73,117,0},{73,117,0},{73,117,0},{73,94,1},{60,101,0},
+{60,101,0},{91,127,1838},{87,127,682},{86,122,654},{85,118,446},{88,127,2198},{78,127,450},{78,119,4},{74,110,654},{57,127,1557},{60,111,453},{97,127,741},{93,127,57},{93,120,41},{90,117,77},{127,62,1514},{76,127,446},{78,119,3},{51,112,421},{127,95,1514},{51,112,421},{86,127,554},{86,127,554},{86,127,554},{85,115,421},{80,127,435},{78,119,3},{78,119,3},{76,103,41},{59,118,339},{64,105,1},{93,126,8},
+{93,126,8},{93,126,8},{92,111,0},{127,40,338},{80,117,0},{80,117,0},{65,105,0},{126,85,338},{65,105,0},{127,81,421},{96,127,29},{98,119,1},{76,119,1},{127,81,421},{126,105,421},{76,119,1},{0,112,421},{126,105,421},{0,112,421},{85,0,421},{85,0,421},{85,0,421},{85,0,421},{78,120,1},{78,120,1},{78,120,1},{77,99,1},{64,105,1},{64,105,1},{97,127,2070},{92,127,882},{90,126,654},
+{89,122,446},{91,127,2382},{84,127,546},{82,123,4},{78,114,654},{64,127,1658},{62,116,453},{103,127,837},{99,127,161},{97,124,41},{94,121,77},{127,73,1514},{84,127,521},{82,123,3},{55,116,421},{127,101,1514},{55,116,421},{91,127,605},{91,127,605},{91,127,605},{89,119,421},{85,127,485},{82,123,3},{82,123,3},{80,107,41},{63,122,339},{68,109,1},{97,127,20},{97,127,20},{97,127,20},{96,115,0},{127,53,338},
+{84,121,0},{84,121,0},{69,109,0},{126,91,338},{69,109,0},{127,93,421},{102,127,85},{102,123,1},{80,123,1},{127,93,421},{126,111,421},{80,123,1},{0,116,421},{126,111,421},{0,116,421},{89,0,421},{89,0,421},{89,0,421},{89,0,421},{82,124,1},{82,124,1},{82,124,1},{81,103,1},{68,109,1},{68,109,1},{100,127,2201},{96,127,1101},{95,127,686},{93,126,441},{97,127,2469},{87,127,689},{86,127,3},
+{84,117,617},{73,127,1735},{67,120,421},{109,127,916},{104,127,298},{102,127,52},{98,125,68},{127,86,1459},{93,127,584},{86,127,2},{59,120,392},{127,107,1459},{59,120,392},{95,127,686},{95,127,686},{95,127,686},{93,123,421},{91,127,557},{86,127,3},{86,127,3},{84,111,41},{66,126,338},{72,113,1},{102,126,52},{102,126,52},{102,126,52},{100,119,0},{127,64,338},{88,125,0},{88,125,0},{73,113,0},{126,97,338},
+{73,113,0},{127,105,392},{110,127,146},{106,127,0},{84,127,0},{127,105,392},{126,117,392},{84,127,0},{0,120,392},{126,117,392},{0,120,392},{93,0,421},{93,0,421},{93,0,421},{93,0,421},{86,127,2},{86,127,2},{86,127,2},{85,107,1},{72,113,1},{72,113,1},{103,127,1901},{101,127,1079},{100,127,761},{97,127,421},{100,127,2093},{93,127,537},{91,127,29},{87,120,357},{81,127,1425},{72,122,213},{111,127,638},
+{108,127,244},{106,127,80},{103,126,17},{127,93,1064},{99,127,392},{93,127,4},{67,122,200},{126,111,1064},{67,122,200},{100,127,761},{100,127,761},{100,127,761},{97,127,421},{95,127,659},{91,127,29},{91,127,29},{88,115,41},{73,127,350},{76,117,1},{106,127,80},{106,127,80},{106,127,80},{104,123,0},{127,76,338},{93,127,4},{93,127,4},{77,117,0},{126,103,338},{77,117,0},{127,111,200},{116,127,74},{112,127,0},
+{96,127,0},{127,111,200},{126,120,200},{96,127,0},{0,122,200},{126,120,200},{0,122,200},{97,0,421},{97,0,421},{97,0,421},{97,0,421},{91,127,13},{91,127,13},{91,127,13},{89,111,1},{76,117,1},{76,117,1},{109,127,1646},{106,127,1086},{104,127,881},{102,127,450},{106,127,1730},{99,127,474},{96,127,122},{93,123,158},{87,127,1166},{79,124,62},{114,127,446},{111,127,225},{111,127,125},{108,127,2},{127,102,722},
+{105,127,254},{102,127,34},{76,124,61},{127,115,722},{76,124,61},{104,127,881},{104,127,881},{104,127,881},{102,127,450},{100,127,770},{96,127,122},{96,127,122},{93,120,38},{81,127,426},{81,121,2},{111,127,125},{111,127,125},{111,127,125},{108,127,2},{127,90,338},{102,127,34},{102,127,34},{83,121,1},{127,109,338},{83,121,1},{127,118,61},{119,127,25},{119,127,0},{110,127,0},{127,118,61},{126,123,61},{110,127,0},
+{0,124,61},{126,123,61},{0,124,61},{101,0,421},{101,0,421},{101,0,421},{101,0,421},{95,127,40},{95,127,40},{95,127,40},{94,115,1},{81,121,1},{81,121,1},{111,127,1450},{111,127,1109},{109,127,974},{105,127,542},{111,127,1505},{102,127,514},{102,127,258},{98,125,66},{96,127,1026},{84,126,6},{120,127,342},{117,127,230},{117,127,181},{113,127,37},{127,110,509},{111,127,213},{108,127,90},{85,126,5},{126,119,509},
+{85,126,5},{109,127,974},{109,127,974},{109,127,974},{105,127,542},{106,127,882},{102,127,258},{102,127,258},{97,124,38},{90,127,530},{85,125,2},{117,127,181},{117,127,181},{117,127,181},{113,127,37},{127,102,338},{108,127,90},{108,127,90},{87,125,1},{127,115,338},{87,125,1},{127,124,5},{125,127,1},{125,127,0},{122,127,0},{127,124,5},{126,126,5},{122,127,0},{0,126,5},{126,126,5},{0,126,5},{105,0,421},
+{105,0,421},{105,0,421},{105,0,421},{100,127,65},{100,127,65},{100,127,65},{98,119,1},{85,125,1},{85,125,1},{114,127,1179},{114,127,971},{112,127,881},{111,127,545},{114,127,1170},{108,127,457},{107,127,305},{102,126,17},{102,127,793},{90,127,9},{121,127,209},{120,127,155},{120,127,130},{118,127,45},{127,116,294},{116,127,134},{113,127,85},{95,127,0},{126,122,294},{95,127,0},{112,127,881},{112,127,881},{112,127,881},
+{111,127,545},{111,127,765},{107,127,305},{107,127,305},{102,126,13},{96,127,465},{90,127,9},{120,127,130},{120,127,130},{120,127,130},{118,127,45},{127,110,221},{113,127,85},{113,127,85},{95,127,0},{126,119,221},{95,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{109,0,421},{109,0,421},{109,0,421},{109,0,421},{106,127,113},
+{106,127,113},{106,127,113},{102,123,1},{90,127,9},{90,127,9},{117,127,871},{117,127,751},{115,127,701},{113,127,502},{117,127,822},{111,127,364},{111,127,264},{106,127,1},{105,127,537},{99,127,41},{123,127,75},{123,127,59},{123,127,50},{121,127,17},{127,121,113},{119,127,54},{119,127,29},{107,127,0},{127,124,113},{107,127,0},{115,127,701},{115,127,701},{115,127,701},{113,127,502},{114,127,561},{111,127,264},{111,127,264},
+{106,127,1},{102,127,329},{99,127,41},{123,127,50},{123,127,50},{123,127,50},{121,127,17},{127,116,85},{119,127,29},{119,127,29},{107,127,0},{126,122,85},{107,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{113,0,421},{113,0,421},{113,0,421},{113,0,421},{109,127,157},{109,127,157},{109,127,157},{106,127,1},{99,127,41},
+{99,127,41},{0,59,882},{0,40,100},{0,30,4},{0,24,340},{0,39,1896},{0,25,1188},{0,23,590},{0,15,1444},{0,19,2040},{0,15,1544},{0,59,882},{0,40,100},{0,30,4},{0,24,340},{19,0,1896},{0,25,1188},{0,23,590},{0,15,1444},{39,0,1896},{0,15,1444},{0,27,0},{0,27,0},{0,27,0},{0,13,1},{0,13,162},{0,11,58},{0,11,58},{0,7,97},{0,5,179},{0,6,116},{0,27,0},
+{0,27,0},{0,27,0},{0,13,1},{7,0,162},{0,11,58},{0,11,58},{0,7,97},{13,0,162},{0,7,97},{29,0,882},{0,40,100},{0,30,4},{0,24,340},{29,0,882},{59,0,882},{0,24,340},{0,19,884},{59,0,882},{0,19,884},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,71,882},{0,49,40},{1,34,12},
+{0,30,260},{0,47,2355},{0,31,1332},{0,28,612},{0,18,1700},{0,22,2556},{0,18,1844},{0,71,882},{0,49,40},{1,34,8},{0,30,260},{23,0,2355},{0,31,1332},{0,28,612},{0,18,1700},{47,0,2355},{0,18,1700},{0,39,0},{0,39,0},{0,39,0},{0,19,0},{0,20,338},{0,14,130},{0,14,130},{0,10,205},{0,8,371},{0,9,244},{0,39,0},{0,39,0},{0,39,0},{0,19,0},{10,0,338},
+{0,14,130},{0,14,130},{0,10,205},{20,0,338},{0,10,205},{35,0,882},{0,49,40},{3,34,0},{0,30,260},{35,0,882},{71,0,882},{0,30,260},{0,23,884},{71,0,882},{0,23,884},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,83,882},{0,55,8},{1,40,60},{0,33,200},{0,56,2899},{0,37,1508},{0,31,660},
+{0,21,2004},{0,25,3176},{0,21,2200},{0,83,882},{0,55,8},{2,38,52},{0,33,200},{27,1,2899},{0,37,1508},{0,31,660},{0,21,2004},{56,0,2899},{0,21,2004},{0,52,0},{0,52,0},{0,52,0},{0,25,0},{0,26,578},{0,19,208},{0,19,208},{0,13,353},{0,11,635},{0,10,414},{0,52,0},{0,52,0},{0,52,0},{0,25,0},{13,0,578},{0,19,208},{0,19,208},{0,13,353},{26,0,578},
+{0,13,353},{41,0,882},{0,55,8},{7,38,0},{0,33,200},{41,0,882},{83,0,882},{0,33,200},{0,27,884},{83,0,882},{0,27,884},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{2,91,902},{1,62,20},{3,44,133},{0,39,172},{0,66,3048},{0,43,1416},{0,37,488},{0,27,1944},{0,31,3460},{0,26,2254},{3,89,882},
+{3,60,2},{5,44,68},{2,38,168},{33,0,3048},{0,43,1416},{0,37,488},{0,27,1944},{66,0,3048},{0,27,1944},{2,60,20},{2,60,20},{2,60,20},{2,30,20},{0,36,648},{0,28,160},{0,28,160},{0,15,340},{0,17,771},{0,15,440},{3,58,0},{3,58,0},{3,58,0},{3,30,0},{18,0,648},{0,28,160},{0,28,160},{0,15,340},{36,0,648},{0,15,340},{47,0,882},{1,62,0},{11,42,0},
+{0,39,136},{47,0,882},{95,0,882},{0,39,136},{0,31,884},{95,0,882},{0,31,884},{1,0,20},{1,0,20},{1,0,20},{1,0,20},{0,9,0},{0,9,0},{0,9,0},{0,4,1},{0,3,8},{0,3,8},{4,101,995},{4,67,114},{6,48,283},{3,44,242},{0,80,3048},{0,51,1224},{0,43,267},{0,32,1794},{0,37,3651},{0,30,2195},{8,93,883},{7,65,2},{9,48,67},{6,42,171},{39,1,3048},
+{0,51,1224},{0,43,267},{0,32,1794},{80,0,3048},{0,32,1794},{4,69,113},{4,69,113},{4,69,113},{4,36,113},{0,50,648},{0,34,74},{0,34,74},{0,21,250},{0,22,862},{0,19,434},{8,61,1},{8,61,1},{8,61,1},{8,34,1},{24,1,648},{0,34,74},{0,34,74},{0,21,250},{50,0,648},{0,21,250},{53,1,882},{6,65,1},{15,47,1},{0,44,89},{53,1,882},{109,0,882},{0,44,89},
+{0,36,882},{109,0,882},{0,36,882},{4,0,113},{4,0,113},{4,0,113},{4,0,113},{0,23,0},{0,23,0},{0,23,0},{0,11,0},{0,8,41},{0,8,41},{6,109,1147},{6,72,274},{8,53,499},{5,47,367},{0,92,3048},{0,57,1096},{0,47,129},{0,36,1635},{0,43,3859},{0,34,2183},{12,97,883},{11,69,2},{13,52,67},{10,46,171},{45,1,3048},{0,57,1096},{0,47,129},{0,36,1635},{92,0,3048},
+{0,36,1635},{6,77,265},{6,77,265},{6,77,265},{6,41,265},{0,62,648},{0,43,26},{0,43,26},{0,26,193},{0,28,990},{0,23,458},{12,65,1},{12,65,1},{12,65,1},{12,38,1},{30,1,648},{0,43,26},{0,43,26},{0,26,193},{62,0,648},{0,26,193},{59,1,882},{10,69,1},{19,51,1},{0,49,58},{59,1,882},{121,0,882},{0,49,58},{0,40,882},{121,0,882},{0,40,882},{6,0,265},
+{6,0,265},{6,0,265},{6,0,265},{0,35,0},{0,35,0},{0,35,0},{0,17,0},{0,14,97},{0,14,97},{8,117,1363},{8,77,506},{10,58,795},{7,52,559},{0,104,3048},{0,63,1000},{0,53,41},{0,39,1523},{0,46,4095},{0,38,2195},{16,101,883},{15,73,2},{17,56,67},{14,50,171},{51,0,3048},{0,63,1000},{0,53,41},{0,39,1523},{104,0,3048},{0,39,1523},{8,85,481},{8,85,481},{8,85,481},
+{8,46,481},{0,74,648},{0,49,2},{0,49,2},{0,30,130},{0,31,1146},{0,28,494},{16,69,1},{16,69,1},{16,69,1},{16,42,1},{36,1,648},{0,49,2},{0,49,2},{0,30,130},{74,0,648},{0,30,130},{65,0,882},{14,73,1},{23,55,1},{0,53,25},{65,0,882},{127,3,882},{0,53,25},{0,44,882},{127,3,882},{0,44,882},{8,0,481},{8,0,481},{8,0,481},{8,0,481},{0,47,0},
+{0,47,0},{0,47,0},{0,23,0},{0,19,169},{0,19,169},{10,125,1643},{11,83,802},{13,61,1159},{9,56,826},{0,117,3048},{0,70,933},{0,58,11},{0,45,1395},{0,54,4321},{0,43,2227},{20,105,883},{19,77,2},{21,60,67},{18,54,171},{57,0,3048},{0,70,933},{0,58,11},{0,45,1395},{117,0,3048},{0,45,1395},{10,93,761},{10,93,761},{10,93,761},{10,51,761},{0,86,648},{0,56,4},{0,56,4},
+{0,35,89},{0,37,1322},{0,31,578},{20,73,1},{20,73,1},{20,73,1},{20,46,1},{42,1,648},{3,54,0},{3,54,0},{0,35,89},{86,0,648},{0,35,89},{71,0,882},{18,77,1},{27,59,1},{0,58,10},{71,0,882},{127,9,882},{0,58,10},{0,48,882},{127,9,882},{0,48,882},{10,0,761},{10,0,761},{10,0,761},{10,0,761},{0,59,0},{0,59,0},{0,59,0},{0,29,0},{0,22,277},
+{0,22,277},{14,126,1784},{14,89,929},{17,65,1329},{13,60,945},{3,124,3048},{2,75,910},{3,62,14},{0,50,1329},{0,60,4212},{0,47,1974},{24,110,882},{24,80,5},{27,64,70},{23,59,168},{63,2,3048},{0,77,893},{4,63,10},{0,50,1293},{126,2,3048},{0,50,1293},{14,99,884},{14,99,884},{14,99,884},{13,56,884},{3,94,648},{3,62,10},{3,62,10},{1,40,68},{0,46,1256},{0,37,428},{24,78,0},
+{24,78,0},{24,78,0},{24,51,0},{49,0,648},{7,59,0},{7,59,0},{0,39,52},{100,0,648},{0,39,52},{78,0,882},{23,81,1},{32,63,0},{0,63,1},{78,0,882},{126,16,882},{0,63,1},{0,52,884},{126,16,882},{0,52,884},{13,0,884},{13,0,884},{13,0,884},{13,0,884},{3,66,0},{3,66,0},{3,66,0},{3,34,0},{0,31,232},{0,31,232},{19,126,1814},{18,93,929},{21,69,1329},
+{17,65,942},{7,127,3051},{6,79,910},{7,66,14},{4,54,1329},{0,67,3975},{0,52,1640},{28,114,882},{28,84,5},{30,67,66},{27,63,168},{69,1,3048},{1,83,882},{9,66,10},{0,53,1205},{126,8,3048},{0,53,1205},{18,103,884},{18,103,884},{18,103,884},{17,60,884},{7,98,648},{8,65,9},{8,65,9},{5,44,68},{0,51,1059},{0,43,236},{28,82,0},{28,82,0},{28,82,0},{28,55,0},{55,0,648},
+{11,63,0},{11,63,0},{0,45,20},{112,0,648},{0,45,20},{83,1,882},{27,85,1},{36,66,1},{4,66,1},{83,1,882},{126,22,882},{4,66,1},{0,56,884},{126,22,882},{0,56,884},{17,0,884},{17,0,884},{17,0,884},{17,0,884},{7,70,0},{7,70,0},{7,70,0},{7,38,0},{0,37,136},{0,37,136},{23,127,1854},{22,97,929},{25,73,1329},{21,69,942},{12,127,3064},{10,83,910},{11,70,14},
+{8,58,1329},{0,73,3751},{0,57,1374},{32,118,882},{32,88,5},{34,71,66},{30,67,165},{75,1,3048},{5,87,882},{13,70,10},{0,59,1125},{126,14,3048},{0,59,1125},{22,107,884},{22,107,884},{22,107,884},{21,64,884},{11,102,648},{12,69,9},{12,69,9},{9,48,68},{0,57,923},{0,49,108},{32,86,0},{32,86,0},{32,86,0},{32,59,0},{61,0,648},{16,65,1},{16,65,1},{0,49,8},{124,0,648},
+{0,49,8},{89,1,882},{31,89,1},{40,70,1},{8,70,1},{89,1,882},{126,28,882},{8,70,1},{0,60,884},{126,28,882},{0,60,884},{21,0,884},{21,0,884},{21,0,884},{21,0,884},{11,74,0},{11,74,0},{11,74,0},{11,42,0},{0,46,68},{0,46,68},{29,127,1934},{26,101,929},{29,77,1329},{25,73,942},{17,127,3091},{14,87,910},{15,74,14},{12,62,1329},{0,79,3559},{0,62,1174},{36,122,882},
+{36,92,5},{38,75,66},{34,71,165},{81,0,3048},{9,91,882},{17,74,10},{0,62,1053},{126,20,3048},{0,62,1053},{26,111,884},{26,111,884},{26,111,884},{26,67,884},{15,106,648},{16,73,9},{16,73,9},{13,52,68},{0,64,810},{0,54,26},{36,90,0},{36,90,0},{36,90,0},{36,63,0},{66,1,648},{20,69,1},{20,69,1},{0,54,1},{126,5,648},{0,54,1},{95,1,882},{35,93,1},{44,74,1},
+{12,74,1},{95,1,882},{126,34,882},{12,74,1},{0,64,882},{126,34,882},{0,64,882},{25,0,884},{25,0,884},{25,0,884},{25,0,884},{15,78,0},{15,78,0},{15,78,0},{15,46,0},{0,51,18},{0,51,18},{33,127,2036},{31,106,931},{33,81,1331},{29,77,942},{23,127,3145},{19,92,910},{19,79,14},{17,65,1329},{0,87,3375},{0,66,1021},{41,126,883},{40,98,2},{43,80,65},{39,76,166},{88,0,3048},
+{14,95,882},{21,78,9},{0,66,996},{127,26,3048},{0,66,996},{30,116,882},{30,116,882},{30,116,882},{30,72,882},{20,109,649},{20,77,10},{20,77,10},{18,57,67},{0,70,729},{1,59,3},{41,94,1},{41,94,1},{41,94,1},{40,67,1},{73,0,648},{24,74,1},{24,74,1},{4,58,1},{126,12,648},{4,58,1},{102,0,882},{39,98,1},{48,79,0},{15,79,0},{102,0,882},{126,41,882},{15,79,0},
+{0,68,884},{126,41,882},{0,68,884},{30,0,882},{30,0,882},{30,0,882},{30,0,882},{20,82,1},{20,82,1},{20,82,1},{19,51,1},{0,60,0},{0,60,0},{38,127,2134},{35,110,931},{37,85,1331},{33,81,942},{29,127,3217},{23,96,910},{23,83,14},{21,69,1329},{0,93,3247},{0,71,949},{45,127,891},{44,102,2},{47,84,65},{43,80,166},{94,0,3048},{18,99,882},{25,82,9},{0,72,948},{127,32,3048},
+{0,72,948},{34,120,882},{34,120,882},{34,120,882},{34,76,882},{24,113,649},{24,81,10},{24,81,10},{22,61,67},{0,79,673},{5,63,3},{45,98,1},{45,98,1},{45,98,1},{44,71,1},{79,0,648},{28,78,1},{28,78,1},{8,62,1},{127,17,648},{8,62,1},{108,0,882},{43,102,1},{52,83,0},{19,83,0},{108,0,882},{125,47,882},{19,83,0},{0,72,884},{125,47,882},{0,72,884},{34,0,882},
+{34,0,882},{34,0,882},{34,0,882},{24,86,1},{24,86,1},{24,86,1},{23,55,1},{3,64,1},{3,64,1},{42,127,2284},{39,114,931},{41,89,1331},{37,85,942},{33,127,3316},{27,100,910},{27,87,14},{25,73,1329},{0,99,3151},{1,76,929},{50,127,909},{48,106,2},{51,88,65},{47,84,166},{100,0,3048},{22,103,882},{29,86,9},{0,75,916},{127,38,3048},{0,75,916},{38,124,882},{38,124,882},{38,124,882},
+{38,80,882},{28,117,649},{28,85,10},{28,85,10},{27,64,66},{0,85,649},{8,66,5},{49,102,1},{49,102,1},{49,102,1},{48,75,1},{85,0,648},{32,82,1},{32,82,1},{11,66,0},{127,23,648},{11,66,0},{114,0,882},{47,106,1},{56,87,0},{23,87,0},{114,0,882},{127,52,882},{23,87,0},{0,76,884},{127,52,882},{0,76,884},{38,0,882},{38,0,882},{38,0,882},{38,0,882},{28,90,1},
+{28,90,1},{28,90,1},{27,59,1},{8,67,1},{8,67,1},{47,127,2414},{43,118,931},{45,93,1331},{41,89,942},{38,127,3409},{31,104,910},{31,91,14},{29,77,1329},{0,105,3087},{5,80,929},{55,127,939},{52,110,2},{55,92,65},{51,88,166},{106,0,3048},{26,107,882},{33,90,9},{0,80,893},{127,44,3048},{0,80,893},{42,127,883},{42,127,883},{42,127,883},{42,84,882},{32,121,649},{32,89,10},{32,89,10},
+{31,68,66},{4,89,649},{12,70,5},{53,106,1},{53,106,1},{53,106,1},{52,79,1},{91,0,648},{36,86,1},{36,86,1},{15,70,0},{127,29,648},{15,70,0},{120,0,882},{51,110,1},{60,91,0},{27,91,0},{120,0,882},{127,58,882},{27,91,0},{0,80,884},{127,58,882},{0,80,884},{42,0,882},{42,0,882},{42,0,882},{42,0,882},{32,94,1},{32,94,1},{32,94,1},{31,63,1},{12,71,1},
+{12,71,1},{52,127,2584},{47,122,929},{50,98,1329},{46,94,942},{44,127,3547},{35,108,910},{36,95,14},{34,82,1331},{0,112,3055},{9,84,931},{61,127,996},{57,113,5},{59,96,66},{55,92,165},{112,1,3048},{30,112,882},{38,95,10},{0,85,883},{126,51,3048},{0,85,883},{47,127,893},{47,127,893},{47,127,893},{47,88,884},{36,127,648},{37,94,9},{37,94,9},{35,72,65},{9,93,649},{17,75,2},{57,111,0},
+{57,111,0},{57,111,0},{57,83,0},{98,0,648},{41,90,1},{41,90,1},{21,74,1},{127,36,648},{21,74,1},{127,0,882},{56,114,1},{64,96,1},{33,95,1},{127,0,882},{126,65,882},{33,95,1},{0,85,882},{126,65,882},{0,85,882},{46,0,884},{46,0,884},{46,0,884},{46,0,884},{36,99,0},{36,99,0},{36,99,0},{36,67,0},{17,75,1},{17,75,1},{58,127,2792},{51,126,929},{54,102,1329},
+{50,98,942},{49,127,3672},{39,112,910},{40,99,14},{38,86,1331},{3,117,3049},{13,88,931},{65,127,1061},{61,117,5},{63,100,66},{59,96,165},{118,1,3048},{34,116,882},{42,99,10},{2,89,882},{126,57,3048},{2,89,882},{52,127,916},{52,127,916},{52,127,916},{51,92,884},{41,127,654},{41,98,9},{41,98,9},{39,76,65},{13,97,649},{21,79,2},{61,115,0},{61,115,0},{61,115,0},{61,87,0},{104,0,648},
+{45,94,1},{45,94,1},{25,78,1},{127,42,648},{25,78,1},{127,11,882},{60,118,1},{68,100,1},{37,99,1},{127,11,882},{126,71,882},{37,99,1},{0,89,882},{126,71,882},{0,89,882},{50,0,884},{50,0,884},{50,0,884},{50,0,884},{40,103,0},{40,103,0},{40,103,0},{40,71,0},{21,79,1},{21,79,1},{61,127,2984},{55,127,949},{58,106,1329},{54,102,942},{55,127,3832},{43,116,910},{44,103,14},
+{42,90,1331},{7,121,3049},{17,92,931},{71,127,1149},{64,122,3},{66,105,67},{63,100,165},{124,1,3048},{38,120,882},{46,103,10},{6,93,882},{126,63,3048},{6,93,882},{55,127,948},{55,127,948},{55,127,948},{55,96,884},{46,126,682},{45,102,9},{45,102,9},{43,80,65},{17,101,649},{25,83,2},{65,118,1},{65,118,1},{65,118,1},{65,91,1},{110,0,648},{49,98,1},{49,98,1},{29,82,1},{126,48,648},
+{29,82,1},{127,24,882},{63,123,1},{72,104,1},{41,103,1},{127,24,882},{125,77,882},{41,103,1},{0,93,882},{125,77,882},{0,93,882},{54,0,884},{54,0,884},{54,0,884},{54,0,884},{44,107,0},{44,107,0},{44,107,0},{44,75,0},{25,83,1},{25,83,1},{65,127,3214},{61,127,1021},{62,110,1329},{58,106,942},{61,127,4024},{47,120,910},{48,107,14},{46,94,1331},{11,125,3049},{21,96,931},{74,127,1245},
+{68,126,3},{70,109,67},{67,103,171},{127,7,3048},{42,124,882},{50,107,10},{10,97,882},{127,68,3048},{10,97,882},{61,127,996},{61,127,996},{61,127,996},{59,100,884},{50,127,714},{49,106,9},{49,106,9},{47,84,65},{21,105,649},{29,87,2},{69,122,1},{69,122,1},{69,122,1},{69,95,1},{115,1,648},{53,102,1},{53,102,1},{33,86,1},{126,54,648},{33,86,1},{127,36,882},{67,127,0},{76,108,1},
+{45,107,1},{127,36,882},{127,82,882},{45,107,1},{0,97,882},{127,82,882},{0,97,882},{58,0,884},{58,0,884},{58,0,884},{58,0,884},{48,111,0},{48,111,0},{48,111,0},{48,79,0},{29,87,1},{29,87,1},{71,127,3494},{65,127,1174},{66,114,1320},{62,110,942},{65,127,4231},{52,125,910},{52,112,14},{50,98,1329},{19,127,3067},{26,101,929},{79,127,1364},{73,127,26},{75,114,68},{72,108,168},{127,21,3048},
+{48,127,885},{54,111,9},{16,101,884},{127,75,3048},{16,101,884},{65,127,1053},{65,127,1053},{65,127,1053},{63,105,882},{56,127,769},{53,110,10},{53,110,10},{52,89,66},{25,110,649},{33,91,5},{73,126,1},{73,126,1},{73,126,1},{73,100,0},{122,0,648},{57,107,1},{57,107,1},{36,91,0},{126,61,648},{36,91,0},{127,50,882},{76,127,18},{81,112,0},{48,112,0},{127,50,882},{127,89,882},{48,112,0},
+{0,101,884},{127,89,882},{0,101,884},{63,0,882},{63,0,882},{63,0,882},{63,0,882},{53,115,1},{53,115,1},{53,115,1},{53,83,1},{33,92,1},{33,92,1},{74,127,3782},{70,127,1374},{70,118,1320},{66,113,945},{71,127,4455},{57,127,926},{56,116,14},{54,102,1329},{25,127,3139},{30,105,929},{85,127,1476},{79,127,97},{79,118,68},{76,112,168},{127,33,3048},{57,127,925},{58,115,9},{20,105,884},{126,81,3048},
+{20,105,884},{68,127,1125},{68,127,1125},{68,127,1125},{66,109,884},{61,127,827},{57,114,10},{57,114,10},{56,93,66},{29,114,649},{37,95,5},{78,126,8},{78,126,8},{78,126,8},{77,104,0},{127,2,648},{61,111,1},{61,111,1},{40,95,0},{127,66,648},{40,95,0},{127,62,882},{81,127,68},{85,116,0},{52,116,0},{127,62,882},{127,95,882},{52,116,0},{0,105,884},{127,95,882},{0,105,884},{66,0,884},
+{66,0,884},{66,0,884},{66,0,884},{57,119,1},{57,119,1},{57,119,1},{57,87,1},{37,96,1},{37,96,1},{79,127,4024},{75,127,1640},{74,122,1320},{70,117,945},{74,127,4699},{63,127,1030},{60,120,14},{58,106,1329},{34,127,3259},{34,109,929},{91,127,1620},{84,127,236},{83,122,68},{80,116,168},{127,45,3048},{64,127,1003},{62,119,9},{24,109,884},{126,87,3048},{24,109,884},{74,127,1205},{74,127,1205},{74,127,1205},
+{70,113,884},{65,127,910},{61,118,10},{61,118,10},{60,97,66},{33,118,649},{41,99,5},{82,127,20},{82,127,20},{82,127,20},{81,108,0},{127,14,648},{64,116,0},{64,116,0},{44,99,0},{127,72,648},{44,99,0},{127,73,882},{90,127,136},{89,120,0},{56,120,0},{127,73,882},{127,101,882},{56,120,0},{0,109,884},{127,101,882},{0,109,884},{70,0,884},{70,0,884},{70,0,884},{70,0,884},{61,123,1},
+{61,123,1},{61,123,1},{61,91,1},{41,100,1},{41,100,1},{85,127,4328},{79,127,1925},{78,126,1320},{74,121,945},{79,127,4920},{67,127,1215},{64,123,14},{62,110,1329},{43,127,3435},{38,113,929},{94,127,1784},{90,127,428},{87,126,68},{84,120,168},{127,57,3048},{70,127,1131},{65,124,10},{28,113,884},{126,93,3048},{28,113,884},{77,127,1293},{77,127,1293},{77,127,1293},{74,117,884},{71,127,1006},{64,123,10},{64,123,10},
+{63,100,70},{37,122,649},{45,103,5},{87,126,52},{87,126,52},{87,126,52},{85,112,0},{127,27,648},{68,120,0},{68,120,0},{48,103,0},{126,78,648},{48,103,0},{127,86,882},{96,127,232},{93,124,0},{60,124,0},{127,86,882},{127,107,882},{60,124,0},{0,113,884},{127,107,882},{0,113,884},{74,0,884},{74,0,884},{74,0,884},{74,0,884},{64,126,1},{64,126,1},{64,126,1},{64,95,0},{45,104,1},
+{45,104,1},{88,127,4403},{84,127,2227},{82,127,1395},{78,125,922},{85,127,4875},{73,127,1326},{69,127,11},{65,115,1174},{54,127,3438},{44,116,802},{100,127,1746},{95,127,578},{92,127,89},{88,124,126},{127,69,2814},{79,127,1146},{71,127,4},{33,117,761},{126,99,2814},{33,117,761},{82,127,1395},{82,127,1395},{82,127,1395},{79,121,883},{77,127,1137},{69,127,11},{69,127,11},{67,106,67},{42,126,649},{50,108,2},{92,127,89},
+{92,127,89},{92,127,89},{90,116,1},{127,40,648},{73,124,0},{73,124,0},{54,107,1},{126,85,648},{54,107,1},{127,96,761},{105,127,277},{98,127,0},{67,127,0},{127,96,761},{127,112,761},{67,127,0},{0,117,761},{127,112,761},{0,117,761},{79,0,882},{79,0,882},{79,0,882},{79,0,882},{69,127,10},{69,127,10},{69,127,10},{68,100,1},{50,108,1},{50,108,1},{94,127,3955},{89,127,2195},{88,127,1523},
+{83,127,886},{91,127,4323},{79,127,1139},{74,127,41},{71,116,799},{60,127,2958},{49,118,506},{103,127,1386},{99,127,494},{97,127,130},{92,125,50},{127,76,2249},{87,127,870},{78,127,2},{42,119,481},{126,103,2249},{42,119,481},{88,127,1523},{88,127,1523},{88,127,1523},{83,125,883},{82,127,1251},{74,127,41},{74,127,41},{71,110,67},{48,127,670},{54,112,2},{97,127,130},{97,127,130},{97,127,130},{94,120,1},{127,53,648},
+{78,127,2},{78,127,2},{58,111,1},{126,91,648},{58,111,1},{127,102,481},{108,127,169},{104,127,0},{79,127,0},{127,102,481},{127,115,481},{79,127,0},{0,119,481},{127,115,481},{0,119,481},{83,0,882},{83,0,882},{83,0,882},{83,0,882},{74,127,25},{74,127,25},{74,127,25},{72,104,1},{54,112,1},{54,112,1},{97,127,3571},{93,127,2183},{91,127,1635},{87,127,891},{94,127,3827},{84,127,1035},{79,127,114},
+{74,119,499},{67,127,2577},{55,121,274},{106,127,1098},{102,127,458},{101,127,193},{97,126,9},{127,86,1769},{93,127,654},{84,127,26},{50,121,265},{127,107,1769},{50,121,265},{91,127,1635},{91,127,1635},{91,127,1635},{87,127,891},{85,127,1387},{79,127,114},{79,127,114},{75,114,67},{57,127,734},{58,116,2},{101,127,193},{101,127,193},{101,127,193},{98,124,1},{127,64,648},{84,127,26},{84,127,26},{62,115,1},{126,97,648},
+{62,115,1},{127,108,265},{113,127,97},{110,127,0},{92,127,0},{127,108,265},{127,118,265},{92,127,0},{0,121,265},{127,118,265},{0,121,265},{87,0,882},{87,0,882},{87,0,882},{87,0,882},{79,127,50},{79,127,50},{79,127,50},{76,108,1},{58,116,1},{58,116,1},{100,127,3267},{97,127,2195},{95,127,1794},{92,127,954},{97,127,3435},{87,127,975},{84,127,267},{79,121,283},{73,127,2281},{60,123,114},{111,127,838},
+{107,127,434},{106,127,250},{102,127,2},{127,93,1374},{99,127,502},{93,127,74},{58,123,113},{126,111,1374},{58,123,113},{95,127,1794},{95,127,1794},{95,127,1794},{92,127,954},{91,127,1539},{84,127,267},{84,127,267},{79,118,67},{64,127,840},{62,120,2},{106,127,250},{106,127,250},{106,127,250},{102,127,2},{127,76,648},{93,127,74},{93,127,74},{65,119,1},{126,103,648},{65,119,1},{127,115,113},{119,127,41},{116,127,0},
+{104,127,0},{127,115,113},{127,121,113},{104,127,0},{0,123,113},{127,121,113},{0,123,113},{91,0,882},{91,0,882},{91,0,882},{91,0,882},{83,127,89},{83,127,89},{83,127,89},{80,112,1},{62,120,1},{62,120,1},{103,127,3032},{101,127,2254},{100,127,1944},{96,127,1080},{103,127,3096},{93,127,1012},{90,127,488},{83,124,133},{81,127,2104},{65,126,20},{114,127,666},{111,127,425},{111,127,325},{108,127,52},{127,102,1032},
+{105,127,404},{99,127,160},{67,125,20},{127,115,1032},{67,125,20},{100,127,1944},{100,127,1944},{100,127,1944},{96,127,1080},{97,127,1736},{90,127,488},{90,127,488},{83,122,68},{73,127,1011},{67,124,2},{111,127,325},{111,127,325},{111,127,325},{108,127,52},{127,90,648},{99,127,160},{99,127,160},{69,124,0},{127,109,648},{69,124,0},{127,122,18},{124,127,8},{122,127,1},{118,127,0},{127,122,18},{126,125,18},{118,127,0},
+{0,125,20},{126,125,18},{0,125,20},{95,0,884},{95,0,884},{95,0,884},{95,0,884},{88,127,136},{88,127,136},{88,127,136},{85,116,0},{65,126,0},{65,126,0},{109,127,2756},{106,127,2200},{103,127,2004},{102,127,1188},{106,127,2760},{96,127,1060},{96,127,660},{87,126,60},{87,127,1900},{72,127,8},{117,127,534},{117,127,414},{114,127,353},{111,127,104},{127,110,771},{108,127,352},{108,127,208},{75,127,0},{126,119,771},
+{75,127,0},{103,127,2004},{103,127,2004},{103,127,2004},{102,127,1188},{100,127,1784},{96,127,660},{96,127,660},{89,125,52},{81,127,1112},{72,127,8},{114,127,353},{114,127,353},{114,127,353},{111,127,104},{127,101,578},{108,127,208},{108,127,208},{75,127,0},{127,114,578},{75,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{99,0,884},
+{99,0,884},{99,0,884},{99,0,884},{94,127,200},{94,127,200},{94,127,200},{89,120,0},{72,127,8},{72,127,8},{111,127,2214},{109,127,1844},{109,127,1700},{105,127,1096},{109,127,2180},{102,127,900},{99,127,612},{93,126,12},{93,127,1468},{78,127,40},{120,127,306},{118,127,244},{117,127,205},{116,127,65},{127,113,451},{113,127,211},{111,127,125},{87,127,0},{127,120,451},{87,127,0},{109,127,1700},{109,127,1700},{109,127,1700},
+{105,127,1096},{103,127,1460},{99,127,612},{99,127,612},{93,126,8},{87,127,872},{78,127,40},{117,127,205},{117,127,205},{117,127,205},{116,127,65},{127,107,338},{111,127,125},{111,127,125},{87,127,0},{127,117,338},{87,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{103,0,884},{103,0,884},{103,0,884},{103,0,884},{97,127,260},
+{97,127,260},{97,127,260},{93,124,0},{78,127,40},{78,127,40},{114,127,1798},{111,127,1521},{111,127,1421},{108,127,1028},{111,127,1675},{105,127,792},{104,127,590},{97,127,4},{96,127,1128},{87,127,100},{123,127,150},{121,127,116},{120,127,97},{119,127,29},{127,118,216},{119,127,99},{116,127,58},{99,127,0},{126,123,216},{99,127,0},{111,127,1421},{111,127,1421},{111,127,1421},{108,127,1028},{109,127,1188},{104,127,590},{104,127,590},
+{97,127,4},{93,127,696},{87,127,100},{120,127,97},{120,127,97},{120,127,97},{119,127,29},{127,113,162},{116,127,58},{116,127,58},{99,127,0},{127,120,162},{99,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{107,0,884},{107,0,884},{107,0,884},{107,0,884},{103,127,340},{103,127,340},{103,127,340},{97,127,4},{87,127,100},
+{87,127,100},{0,78,1568},{0,54,170},{0,40,4},{0,33,596},{0,53,3371},{0,34,2124},{0,31,1048},{0,21,2552},{0,25,3628},{0,21,2748},{0,78,1568},{0,54,170},{0,40,4},{0,33,596},{26,0,3371},{0,34,2124},{0,31,1048},{0,21,2552},{53,0,3371},{0,21,2552},{0,36,0},{0,36,0},{0,36,0},{0,18,0},{0,18,288},{0,14,106},{0,14,106},{0,9,180},{0,8,315},{0,7,206},{0,36,0},
+{0,36,0},{0,36,0},{0,18,0},{9,0,288},{0,14,106},{0,14,106},{0,9,180},{18,0,288},{0,9,180},{39,0,1568},{0,54,170},{0,40,4},{0,33,596},{39,0,1568},{78,0,1568},{0,33,596},{0,26,1568},{78,0,1568},{0,26,1568},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,91,1568},{0,63,90},{0,45,8},
+{0,39,500},{0,61,3968},{0,40,2316},{0,35,1078},{0,24,2880},{0,28,4304},{0,24,3136},{0,91,1568},{0,63,90},{1,44,8},{0,39,500},{30,0,3968},{0,40,2316},{0,35,1078},{0,24,2880},{61,0,3968},{0,24,2880},{0,48,0},{0,48,0},{0,48,0},{0,24,0},{0,24,512},{0,19,180},{0,19,180},{0,10,313},{0,11,563},{0,10,362},{0,48,0},{0,48,0},{0,48,0},{0,24,0},{12,0,512},
+{0,19,180},{0,19,180},{0,10,313},{24,0,512},{0,10,313},{45,0,1568},{0,63,90},{2,44,0},{0,39,500},{45,0,1568},{91,0,1568},{0,39,500},{0,30,1568},{91,0,1568},{0,30,1568},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,103,1568},{0,70,37},{1,49,48},{0,42,400},{0,68,4651},{0,46,2540},{0,40,1128},
+{0,27,3256},{0,31,5084},{0,27,3580},{0,103,1568},{0,70,37},{1,49,44},{0,42,400},{33,1,4651},{0,46,2540},{0,40,1128},{0,27,3256},{68,0,4651},{0,27,3256},{0,61,0},{0,61,0},{0,61,0},{0,30,0},{0,30,800},{0,25,292},{0,25,292},{0,13,485},{0,14,883},{0,13,566},{0,61,0},{0,61,0},{0,61,0},{0,30,0},{15,0,800},{0,25,292},{0,25,292},{0,13,485},{30,0,800},
+{0,13,485},{50,1,1568},{0,70,37},{6,48,0},{0,42,400},{50,1,1568},{103,0,1568},{0,42,400},{0,34,1568},{103,0,1568},{0,34,1568},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,115,1568},{0,76,5},{1,54,145},{0,47,325},{0,77,5419},{0,51,2755},{0,43,1208},{0,30,3680},{0,34,5968},{0,30,4080},{0,115,1568},
+{0,76,5},{3,53,121},{0,47,325},{38,0,5419},{0,51,2755},{0,43,1208},{0,30,3680},{77,0,5419},{0,30,3680},{0,72,0},{0,72,0},{0,72,0},{0,36,0},{0,36,1152},{0,28,424},{0,28,424},{0,18,720},{0,17,1275},{0,15,824},{0,72,0},{0,72,0},{0,72,0},{0,36,0},{18,0,1152},{0,28,424},{0,28,424},{0,18,720},{36,0,1152},{0,18,720},{56,1,1568},{0,76,5},{10,52,0},
+{0,47,325},{56,1,1568},{115,0,1568},{0,47,325},{0,38,1568},{115,0,1568},{0,38,1568},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{2,125,1609},{2,82,42},{5,58,254},{1,51,318},{0,91,5419},{0,57,2480},{0,49,835},{0,36,3427},{0,40,6191},{0,35,3991},{5,119,1569},{4,80,6},{8,58,122},{2,52,302},{45,0,5419},
+{0,57,2480},{0,49,835},{0,36,3427},{91,0,5419},{0,36,3427},{2,82,41},{2,82,41},{2,82,41},{2,41,42},{0,50,1152},{0,37,274},{0,37,274},{0,21,586},{0,22,1366},{0,20,782},{5,76,1},{5,76,1},{5,76,1},{5,40,1},{24,1,1152},{0,37,274},{0,37,274},{0,21,586},{50,0,1152},{0,21,586},{63,0,1568},{2,82,1},{15,56,1},{0,53,245},{63,0,1568},{127,1,1568},{0,53,245},
+{0,42,1570},{127,1,1568},{0,42,1570},{2,0,41},{2,0,41},{2,0,41},{2,0,41},{0,13,0},{0,13,0},{0,13,0},{0,7,0},{0,5,13},{0,5,13},{5,126,1735},{4,87,150},{7,63,426},{3,56,382},{0,103,5419},{0,64,2265},{0,54,557},{0,39,3243},{0,46,6415},{0,39,3919},{9,123,1569},{8,84,6},{12,62,122},{6,56,302},{50,1,5419},{0,64,2265},{0,54,557},{0,39,3243},{103,0,5419},
+{0,39,3243},{4,90,145},{4,90,145},{4,90,145},{4,46,146},{0,62,1152},{0,43,170},{0,43,170},{0,27,482},{0,28,1494},{0,24,770},{9,80,1},{9,80,1},{9,80,1},{9,44,1},{30,1,1152},{0,43,170},{0,43,170},{0,27,482},{62,0,1152},{0,27,482},{69,0,1568},{6,86,1},{19,60,1},{0,56,181},{69,0,1568},{127,7,1568},{0,56,181},{0,46,1570},{127,7,1568},{0,46,1570},{4,0,145},
+{4,0,145},{4,0,145},{4,0,145},{0,26,0},{0,26,0},{0,26,0},{0,13,0},{0,11,53},{0,11,53},{9,127,1991},{7,93,322},{9,66,678},{5,61,518},{0,115,5419},{0,70,2081},{0,60,341},{0,45,3035},{0,51,6641},{0,42,3891},{13,127,1569},{12,88,6},{15,66,122},{10,60,302},{56,1,5419},{0,70,2081},{0,60,341},{0,45,3035},{115,0,5419},{0,45,3035},{6,98,313},{6,98,313},{6,98,313},
+{6,51,314},{0,74,1152},{0,51,80},{0,51,80},{0,30,394},{0,31,1650},{0,28,782},{13,84,1},{13,84,1},{13,84,1},{13,48,1},{36,1,1152},{0,51,80},{0,51,80},{0,30,394},{74,0,1152},{0,30,394},{75,0,1568},{10,90,1},{23,64,0},{0,62,125},{75,0,1568},{127,13,1568},{0,62,125},{0,50,1570},{127,13,1568},{0,50,1570},{6,0,313},{6,0,313},{6,0,313},{6,0,313},{0,38,0},
+{0,38,0},{0,38,0},{0,18,1},{0,14,117},{0,14,117},{12,127,2387},{9,98,566},{11,71,1002},{7,64,721},{0,127,5419},{0,76,1929},{0,64,181},{0,50,2886},{0,57,6921},{0,47,3879},{17,127,1577},{16,92,6},{19,70,122},{13,64,309},{62,1,5419},{0,76,1929},{0,64,181},{0,50,2886},{127,0,5419},{0,50,2886},{8,106,545},{8,106,545},{8,106,545},{8,56,545},{0,86,1152},{0,57,32},{0,57,32},
+{0,36,306},{0,37,1826},{0,34,822},{17,88,1},{17,88,1},{17,88,1},{17,52,1},{42,1,1152},{0,57,32},{0,57,32},{0,36,306},{86,0,1152},{0,36,306},{80,1,1568},{14,94,1},{27,68,0},{0,66,80},{80,1,1568},{126,19,1568},{0,66,80},{0,54,1570},{126,19,1568},{0,54,1570},{8,0,545},{8,0,545},{8,0,545},{8,0,545},{0,50,0},{0,50,0},{0,50,0},{0,24,1},{0,19,193},
+{0,19,193},{15,127,3004},{11,106,925},{14,77,1461},{9,69,1030},{3,127,5520},{0,84,1769},{0,70,66},{0,53,2697},{0,63,7276},{0,51,3898},{23,127,1602},{21,97,5},{23,74,125},{19,67,306},{69,0,5419},{0,84,1769},{0,70,66},{0,53,2697},{127,7,5419},{0,53,2697},{11,114,884},{11,114,884},{11,114,884},{10,62,884},{0,100,1152},{0,65,5},{0,65,5},{0,42,232},{0,43,2064},{0,37,896},{21,93,0},
+{21,93,0},{21,93,0},{21,57,0},{49,0,1152},{0,65,5},{0,65,5},{0,42,232},{100,0,1152},{0,42,232},{87,0,1568},{19,98,1},{31,73,1},{0,71,45},{87,0,1568},{126,26,1568},{0,71,45},{0,59,1568},{126,26,1568},{0,59,1568},{10,0,884},{10,0,884},{10,0,884},{10,0,884},{0,64,0},{0,64,0},{0,64,0},{0,31,0},{0,25,320},{0,25,320},{17,127,3702},{13,111,1317},{17,80,1945},
+{11,74,1374},{6,127,5788},{0,90,1681},{0,75,18},{0,59,2537},{0,70,7631},{0,55,3952},{29,127,1650},{25,101,5},{27,78,125},{23,71,306},{75,0,5419},{0,90,1681},{0,75,18},{0,59,2537},{127,13,5419},{0,59,2537},{13,122,1252},{13,122,1252},{13,122,1252},{13,66,1252},{0,112,1152},{0,72,5},{0,72,5},{0,45,164},{0,51,2291},{0,43,992},{25,97,0},{25,97,0},{25,97,0},{25,61,0},{55,0,1152},
+{3,70,1},{3,70,1},{0,45,164},{112,0,1152},{0,45,164},{93,0,1568},{23,102,1},{35,77,1},{0,75,18},{93,0,1568},{126,32,1568},{0,75,18},{0,63,1568},{126,32,1568},{0,63,1568},{12,0,1252},{12,0,1252},{12,0,1252},{12,0,1252},{0,75,0},{0,75,0},{0,75,0},{0,37,0},{0,31,464},{0,31,464},{23,127,4370},{16,114,1661},{19,85,2353},{14,77,1674},{12,127,6128},{0,99,1621},{2,80,18},
+{0,62,2397},{0,73,7815},{0,60,3858},{32,127,1702},{29,105,5},{31,82,125},{27,75,306},{80,1,5419},{0,99,1617},{2,80,14},{0,62,2393},{126,19,5419},{0,62,2393},{15,127,1572},{15,127,1572},{15,127,1572},{15,71,1568},{1,122,1152},{1,79,17},{1,79,17},{0,50,121},{0,54,2403},{0,46,996},{29,101,0},{29,101,0},{29,101,0},{29,64,0},{61,0,1152},{7,74,1},{7,74,1},{0,50,117},{124,0,1152},
+{0,50,117},{99,0,1568},{27,106,1},{39,81,1},{0,80,5},{99,0,1568},{126,38,1568},{0,80,5},{0,66,1570},{126,38,1568},{0,66,1570},{15,0,1568},{15,0,1568},{15,0,1568},{15,0,1568},{1,86,0},{1,86,0},{1,86,0},{1,43,0},{0,34,544},{0,34,544},{26,127,4586},{20,118,1661},{23,89,2353},{18,81,1674},{17,127,6275},{3,102,1617},{6,84,18},{1,67,2355},{0,81,7401},{0,64,3371},{38,127,1766},
+{33,109,5},{35,86,125},{31,79,306},{86,1,5419},{0,105,1577},{6,84,14},{0,66,2259},{126,25,5419},{0,66,2259},{20,127,1585},{20,127,1585},{20,127,1585},{19,75,1568},{5,126,1152},{5,83,17},{5,83,17},{4,54,121},{0,60,2155},{0,51,682},{33,105,0},{33,105,0},{33,105,0},{33,68,0},{66,1,1152},{11,78,1},{11,78,1},{0,55,80},{126,5,1152},{0,55,80},{105,0,1568},{31,110,1},{43,85,1},
+{0,84,1},{105,0,1568},{126,44,1568},{0,84,1},{0,70,1570},{126,44,1568},{0,70,1570},{19,0,1568},{19,0,1568},{19,0,1568},{19,0,1568},{5,90,0},{5,90,0},{5,90,0},{5,47,0},{0,43,388},{0,43,388},{32,127,4866},{25,123,1659},{27,93,2355},{23,86,1670},{23,127,6489},{8,107,1615},{10,88,18},{5,71,2353},{0,87,7039},{0,70,2897},{44,127,1865},{37,113,6},{40,91,122},{36,84,305},{93,0,5419},
+{2,111,1569},{10,88,17},{0,72,2124},{126,32,5419},{0,72,2124},{24,127,1619},{24,127,1619},{24,127,1619},{24,79,1570},{10,127,1161},{10,88,14},{10,88,14},{8,58,122},{0,67,1905},{0,57,409},{38,109,1},{38,109,1},{38,109,1},{37,73,1},{73,0,1152},{15,83,1},{15,83,1},{0,59,41},{126,12,1152},{0,59,41},{112,0,1568},{35,115,1},{48,89,0},{4,89,0},{112,0,1568},{127,50,1568},{4,89,0},
+{0,75,1568},{127,50,1568},{0,75,1568},{23,0,1570},{23,0,1570},{23,0,1570},{23,0,1570},{10,93,1},{10,93,1},{10,93,1},{10,51,1},{0,51,232},{0,51,232},{35,127,5154},{29,127,1659},{31,97,2355},{27,90,1670},{26,127,6689},{12,111,1615},{14,92,18},{9,75,2353},{0,93,6751},{0,74,2541},{49,127,1955},{41,117,6},{44,95,122},{40,88,305},{99,0,5419},{6,115,1569},{14,92,17},{0,75,2020},{126,38,5419},
+{0,75,2020},{29,127,1650},{29,127,1650},{29,127,1650},{28,83,1570},{15,127,1179},{14,92,14},{14,92,14},{12,62,122},{0,76,1713},{0,63,225},{42,113,1},{42,113,1},{42,113,1},{41,77,1},{79,0,1152},{19,87,1},{19,87,1},{0,64,20},{127,17,1152},{0,64,20},{118,0,1568},{39,119,1},{52,93,0},{8,93,0},{118,0,1568},{127,56,1568},{8,93,0},{0,79,1568},{127,56,1568},{0,79,1568},{27,0,1570},
+{27,0,1570},{27,0,1570},{27,0,1570},{14,97,1},{14,97,1},{14,97,1},{14,55,1},{0,57,136},{0,57,136},{41,127,5426},{33,127,1711},{35,101,2355},{31,94,1670},{32,127,6905},{16,115,1615},{18,96,18},{13,79,2353},{0,102,6483},{0,79,2241},{52,127,2075},{45,121,6},{48,99,122},{44,92,305},{105,0,5419},{10,119,1569},{18,96,17},{0,80,1929},{126,44,5419},{0,80,1929},{33,127,1707},{33,127,1707},{33,127,1707},
+{32,87,1570},{20,127,1209},{18,96,14},{18,96,14},{15,66,125},{0,81,1526},{0,67,97},{46,117,1},{46,117,1},{46,117,1},{45,81,1},{85,0,1152},{23,91,1},{23,91,1},{0,69,4},{127,23,1152},{0,69,4},{124,0,1568},{43,123,1},{56,97,0},{12,97,0},{124,0,1568},{127,62,1568},{12,97,0},{0,83,1568},{127,62,1568},{0,83,1568},{31,0,1570},{31,0,1570},{31,0,1570},{31,0,1570},{18,101,1},
+{18,101,1},{18,101,1},{18,59,1},{0,64,65},{0,64,65},{44,127,5762},{38,127,1811},{39,105,2355},{35,98,1670},{38,127,7153},{20,119,1615},{22,100,18},{17,83,2353},{0,108,6243},{0,83,2009},{58,127,2195},{49,125,6},{52,103,122},{48,96,305},{111,0,5419},{14,123,1569},{22,100,17},{0,85,1856},{127,49,5419},{0,85,1856},{38,127,1762},{38,127,1762},{38,127,1762},{36,91,1570},{26,127,1265},{22,100,14},{22,100,14},
+{19,70,125},{0,87,1398},{0,73,25},{50,121,1},{50,121,1},{50,121,1},{49,85,1},{91,0,1152},{27,95,1},{27,95,1},{0,73,0},{127,29,1152},{0,73,0},{127,5,1568},{47,127,1},{60,101,0},{16,101,0},{127,5,1568},{126,68,1568},{16,101,0},{0,87,1568},{126,68,1568},{0,87,1568},{35,0,1570},{35,0,1570},{35,0,1570},{35,0,1570},{22,105,1},{22,105,1},{22,105,1},{22,63,1},{0,70,25},
+{0,70,25},{52,127,6088},{43,127,2009},{44,110,2353},{39,102,1674},{44,127,7451},{24,123,1617},{27,105,18},{22,88,2355},{0,113,5999},{0,89,1811},{65,127,2385},{54,127,25},{56,107,125},{52,100,306},{118,0,5419},{19,127,1569},{27,105,14},{0,89,1762},{127,56,5419},{0,89,1762},{42,127,1856},{42,127,1856},{42,127,1856},{40,96,1568},{30,127,1344},{26,104,17},{26,104,17},{24,75,122},{0,96,1281},{1,78,6},{54,126,0},
+{54,126,0},{54,126,0},{54,89,0},{98,0,1152},{32,99,1},{32,99,1},{6,77,1},{127,36,1152},{6,77,1},{127,19,1568},{54,127,25},{64,105,1},{21,105,1},{127,19,1568},{127,74,1568},{21,105,1},{0,91,1570},{127,74,1568},{0,91,1570},{40,0,1568},{40,0,1568},{40,0,1568},{40,0,1568},{26,111,0},{26,111,0},{26,111,0},{26,67,0},{0,79,1},{0,79,1},{55,127,6416},{48,127,2241},{48,114,2353},
+{43,106,1674},{49,127,7696},{28,127,1617},{31,109,18},{26,92,2355},{0,122,5827},{0,94,1711},{68,127,2521},{60,127,97},{60,111,125},{56,104,306},{124,0,5419},{25,127,1601},{31,109,14},{0,94,1707},{127,62,5419},{0,94,1707},{47,127,1929},{47,127,1929},{47,127,1929},{44,100,1568},{35,127,1414},{30,108,17},{30,108,17},{28,79,122},{0,102,1209},{5,82,6},{58,127,4},{58,127,4},{58,127,4},{58,93,0},{104,0,1152},
+{36,103,1},{36,103,1},{10,81,1},{127,42,1152},{10,81,1},{127,31,1568},{63,127,65},{68,109,1},{25,109,1},{127,31,1568},{127,80,1568},{25,109,1},{0,95,1570},{127,80,1568},{0,95,1570},{44,0,1568},{44,0,1568},{44,0,1568},{44,0,1568},{30,115,0},{30,115,0},{30,115,0},{30,71,0},{4,83,1},{4,83,1},{61,127,6800},{53,127,2541},{52,118,2353},{47,110,1674},{52,127,7996},{34,127,1665},{35,113,18},
+{30,96,2355},{0,126,5709},{0,98,1659},{74,127,2705},{64,127,225},{65,115,122},{60,108,306},{127,5,5419},{34,127,1665},{35,113,14},{0,98,1650},{126,68,5419},{0,98,1650},{52,127,2020},{52,127,2020},{52,127,2020},{48,104,1568},{41,127,1510},{34,112,17},{34,112,17},{32,83,122},{0,108,1169},{9,86,6},{63,126,20},{63,126,20},{63,126,20},{62,97,0},{110,0,1152},{40,107,1},{40,107,1},{14,85,1},{126,48,1152},
+{14,85,1},{127,43,1568},{70,127,136},{72,113,1},{29,113,1},{127,43,1568},{127,86,1568},{29,113,1},{0,99,1570},{127,86,1568},{0,99,1570},{48,0,1568},{48,0,1568},{48,0,1568},{48,0,1568},{34,119,0},{34,119,0},{34,119,0},{34,75,0},{8,87,1},{8,87,1},{65,127,7186},{57,127,2897},{56,122,2353},{51,114,1674},{58,127,8300},{40,127,1809},{39,117,18},{34,100,2355},{5,127,5791},{4,102,1659},{79,127,2875},
+{70,127,409},{69,119,122},{63,114,310},{127,18,5419},{43,127,1777},{39,117,14},{0,103,1619},{126,74,5419},{0,103,1619},{55,127,2124},{55,127,2124},{55,127,2124},{52,108,1568},{44,127,1634},{38,116,17},{38,116,17},{36,87,122},{1,115,1153},{13,90,6},{68,127,41},{68,127,41},{68,127,41},{66,101,1},{115,1,1152},{44,111,1},{44,111,1},{18,89,1},{126,54,1152},{18,89,1},{127,56,1568},{76,127,232},{76,117,1},
+{33,117,1},{127,56,1568},{127,92,1568},{33,117,1},{0,103,1570},{127,92,1568},{0,103,1570},{52,0,1568},{52,0,1568},{52,0,1568},{52,0,1568},{38,123,0},{38,123,0},{38,123,0},{38,79,0},{12,91,1},{12,91,1},{68,127,7650},{63,127,3371},{60,126,2355},{56,119,1670},{65,127,8695},{46,127,2083},{43,121,18},{38,104,2353},{14,127,6049},{6,107,1661},{82,127,3112},{76,127,682},{73,123,121},{67,117,305},{127,31,5419},
+{51,127,1977},{43,121,17},{0,107,1585},{127,80,5419},{0,107,1585},{61,127,2259},{61,127,2259},{61,127,2259},{57,112,1570},{52,127,1755},{43,121,14},{43,121,14},{40,91,125},{6,119,1153},{18,94,5},{72,127,80},{72,127,80},{72,127,80},{70,106,0},{122,0,1152},{48,116,1},{48,116,1},{21,94,0},{126,61,1152},{21,94,0},{127,69,1568},{84,127,388},{80,122,0},{37,122,0},{127,69,1568},{126,99,1568},{37,122,0},
+{0,108,1568},{126,99,1568},{0,108,1568},{56,0,1570},{56,0,1570},{56,0,1570},{56,0,1570},{43,126,1},{43,126,1},{43,126,1},{42,84,1},{16,96,1},{16,96,1},{74,127,8066},{67,127,3858},{65,127,2397},{60,123,1670},{68,127,9035},{51,127,2458},{47,125,18},{42,108,2353},{22,127,6379},{10,111,1661},{88,127,3320},{79,127,977},{77,127,121},{71,121,305},{127,43,5419},{60,127,2185},{47,125,17},{0,112,1572},{127,86,5419},
+{0,112,1572},{65,127,2393},{65,127,2393},{65,127,2393},{61,116,1570},{55,127,1891},{47,125,14},{47,125,14},{44,95,125},{10,123,1153},{22,98,5},{77,127,117},{77,127,117},{77,127,117},{74,110,0},{127,2,1152},{52,120,1},{52,120,1},{25,98,0},{127,66,1152},{25,98,0},{127,81,1568},{93,127,544},{84,126,0},{41,126,0},{127,81,1568},{126,105,1568},{41,126,0},{0,112,1568},{126,105,1568},{0,112,1568},{60,0,1570},
+{60,0,1570},{60,0,1570},{60,0,1570},{47,127,5},{47,127,5},{47,127,5},{46,88,1},{20,100,1},{20,100,1},{79,127,7660},{72,127,3952},{68,127,2537},{64,125,1620},{74,127,8515},{57,127,2310},{52,127,18},{47,110,1945},{28,127,5939},{16,114,1317},{91,127,2984},{84,127,992},{82,127,164},{76,123,201},{127,53,4803},{64,127,1931},{54,126,5},{5,114,1252},{126,91,4803},{5,114,1252},{68,127,2537},{68,127,2537},{68,127,2537},
+{64,120,1569},{61,127,2043},{52,127,18},{52,127,18},{48,99,125},{14,127,1153},{26,102,5},{82,127,164},{82,127,164},{82,127,164},{78,114,0},{127,14,1152},{56,124,1},{56,124,1},{29,102,0},{127,72,1152},{29,102,0},{127,89,1250},{96,127,464},{90,127,0},{51,127,0},{127,89,1250},{125,109,1250},{51,127,0},{0,114,1252},{125,109,1250},{0,114,1252},{64,0,1568},{64,0,1568},{64,0,1568},{64,0,1568},{52,127,18},
+{52,127,18},{52,127,18},{50,92,1},{24,104,1},{24,104,1},{82,127,7060},{76,127,3898},{74,127,2697},{68,126,1576},{79,127,7756},{60,127,2062},{57,127,66},{50,113,1461},{37,127,5307},{21,116,925},{97,127,2504},{90,127,896},{85,127,232},{80,125,96},{127,60,4056},{70,127,1563},{60,127,5},{13,116,884},{127,94,4056},{13,116,884},{74,127,2697},{74,127,2697},{74,127,2697},{68,124,1569},{65,127,2214},{57,127,66},{57,127,66},
+{52,103,125},{22,127,1186},{30,106,5},{85,127,232},{85,127,232},{85,127,232},{82,118,0},{127,27,1152},{62,126,5},{62,126,5},{33,106,0},{126,78,1152},{33,106,0},{127,95,882},{102,127,320},{96,127,0},{63,127,0},{127,95,882},{127,111,882},{63,127,0},{0,116,884},{127,111,882},{0,116,884},{68,0,1568},{68,0,1568},{68,0,1568},{68,0,1568},{56,127,45},{56,127,45},{56,127,45},{54,96,1},{28,108,1},
+{28,108,1},{85,127,6483},{79,127,3828},{79,127,2867},{73,127,1574},{82,127,6979},{67,127,1846},{63,127,181},{56,116,1002},{43,127,4714},{29,118,566},{100,127,2034},{93,127,822},{91,127,306},{86,125,26},{127,69,3318},{76,127,1233},{70,127,32},{20,119,545},{126,99,3318},{20,119,545},{79,127,2867},{79,127,2867},{79,127,2867},{73,127,1574},{71,127,2425},{63,127,181},{63,127,181},{57,108,122},{31,127,1273},{34,111,6},{91,127,306},
+{91,127,306},{91,127,306},{87,122,1},{127,40,1152},{70,127,32},{70,127,32},{39,110,1},{126,85,1152},{39,110,1},{127,101,545},{108,127,193},{102,127,1},{76,127,0},{127,101,545},{127,114,545},{76,127,0},{0,119,545},{127,114,545},{0,119,545},{72,0,1570},{72,0,1570},{72,0,1570},{72,0,1570},{61,127,80},{61,127,80},{61,127,80},{59,100,0},{33,112,1},{33,112,1},{88,127,6059},{84,127,3891},{82,127,3035},
+{76,127,1634},{85,127,6411},{70,127,1754},{67,127,341},{61,118,678},{51,127,4330},{34,120,322},{103,127,1698},{98,127,782},{97,127,394},{90,127,2},{127,76,2753},{84,127,1018},{76,127,80},{28,121,313},{126,103,2753},{28,121,313},{82,127,3035},{82,127,3035},{82,127,3035},{76,127,1634},{77,127,2641},{67,127,341},{67,127,341},{61,112,122},{37,127,1401},{38,115,6},{97,127,394},{97,127,394},{97,127,394},{91,126,1},{127,53,1152},
+{76,127,80},{76,127,80},{43,114,1},{126,91,1152},{43,114,1},{127,107,313},{111,127,116},{108,127,1},{89,127,0},{127,107,313},{127,117,313},{89,127,0},{0,121,313},{127,117,313},{0,121,313},{76,0,1570},{76,0,1570},{76,0,1570},{76,0,1570},{65,127,125},{65,127,125},{65,127,125},{63,104,0},{37,116,1},{37,116,1},{94,127,5691},{88,127,3919},{88,127,3243},{81,127,1739},{88,127,5947},{76,127,1722},{73,127,557},
+{64,120,426},{57,127,3994},{39,122,150},{106,127,1434},{102,127,770},{100,127,482},{95,127,17},{127,86,2273},{90,127,850},{84,127,170},{36,123,145},{127,107,2273},{36,123,145},{88,127,3243},{88,127,3243},{88,127,3243},{81,127,1739},{79,127,2835},{73,127,557},{73,127,557},{65,115,122},{46,127,1561},{42,119,6},{100,127,482},{100,127,482},{100,127,482},{95,127,17},{127,64,1152},{84,127,170},{84,127,170},{47,118,1},{126,97,1152},
+{47,118,1},{127,113,145},{116,127,53},{114,127,0},{101,127,0},{127,113,145},{127,120,145},{101,127,0},{0,123,145},{127,120,145},{0,123,145},{80,0,1570},{80,0,1570},{80,0,1570},{80,0,1570},{71,127,181},{71,127,181},{71,127,181},{67,108,1},{41,120,1},{41,120,1},{97,127,5379},{92,127,3991},{91,127,3427},{87,127,1907},{94,127,5539},{79,127,1783},{79,127,822},{69,122,254},{64,127,3745},{44,125,42},{111,127,1210},
+{106,127,782},{106,127,586},{99,127,82},{127,93,1878},{96,127,746},{90,127,274},{44,125,41},{126,111,1878},{44,125,41},{91,127,3427},{91,127,3427},{91,127,3427},{87,127,1907},{85,127,3051},{79,127,822},{79,127,822},{69,119,122},{54,127,1798},{46,123,6},{106,127,586},{106,127,586},{106,127,586},{99,127,82},{127,76,1152},{90,127,274},{90,127,274},{51,122,1},{126,103,1152},{51,122,1},{127,119,41},{122,127,13},{120,127,0},
+{113,127,0},{127,119,41},{127,123,41},{113,127,0},{0,125,41},{127,123,41},{0,125,41},{84,0,1570},{84,0,1570},{84,0,1570},{84,0,1570},{74,127,245},{74,127,245},{74,127,245},{71,112,1},{45,124,1},{45,124,1},{100,127,5128},{97,127,4080},{97,127,3680},{90,127,2152},{97,127,5200},{87,127,1964},{84,127,1208},{73,126,145},{73,127,3580},{51,127,5},{114,127,1062},{111,127,797},{111,127,697},{105,127,212},{127,102,1536},
+{102,127,708},{99,127,424},{54,127,0},{127,115,1536},{54,127,0},{97,127,3680},{97,127,3680},{97,127,3680},{90,127,2152},{91,127,3328},{84,127,1208},{84,127,1208},{74,124,121},{63,127,2089},{51,127,5},{111,127,697},{111,127,697},{111,127,697},{105,127,212},{127,90,1152},{99,127,424},{99,127,424},{54,127,0},{127,109,1152},{54,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},
+{0,127,0},{127,127,0},{0,127,0},{89,0,1568},{89,0,1568},{89,0,1568},{89,0,1568},{79,127,320},{79,127,320},{79,127,320},{75,117,0},{51,127,5},{51,127,5},{103,127,4416},{100,127,3580},{100,127,3256},{96,127,2040},{100,127,4372},{90,127,1736},{87,127,1128},{79,126,57},{76,127,2956},{57,127,37},{117,127,738},{114,127,566},{114,127,485},{108,127,148},{127,107,1067},{108,127,484},{102,127,292},{66,127,0},{127,117,1067},
+{66,127,0},{100,127,3256},{100,127,3256},{100,127,3256},{96,127,2040},{94,127,2852},{87,127,1128},{87,127,1128},{78,125,45},{70,127,1739},{57,127,37},{114,127,485},{114,127,485},{114,127,485},{108,127,148},{127,96,800},{102,127,292},{102,127,292},{66,127,0},{127,112,800},{66,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{93,0,1568},
+{93,0,1568},{93,0,1568},{93,0,1568},{85,127,400},{85,127,400},{85,127,400},{79,121,0},{57,127,37},{57,127,37},{106,127,3784},{103,127,3136},{103,127,2880},{99,127,1924},{103,127,3648},{93,127,1564},{92,127,1078},{82,127,8},{81,127,2480},{64,127,90},{117,127,482},{117,127,362},{117,127,313},{113,127,97},{127,110,683},{111,127,321},{108,127,180},{78,127,0},{126,119,683},{78,127,0},{103,127,2880},{103,127,2880},{103,127,2880},
+{99,127,1924},{97,127,2448},{92,127,1078},{92,127,1078},{83,126,8},{76,127,1451},{64,127,90},{117,127,313},{117,127,313},{117,127,313},{113,127,97},{127,102,512},{108,127,180},{108,127,180},{78,127,0},{127,115,512},{78,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{97,0,1568},{97,0,1568},{97,0,1568},{97,0,1568},{88,127,500},
+{88,127,500},{88,127,500},{83,125,0},{64,127,90},{64,127,90},{109,127,3232},{106,127,2748},{106,127,2552},{102,127,1832},{106,127,3028},{99,127,1420},{96,127,1048},{87,127,4},{87,127,2032},{73,127,170},{120,127,262},{120,127,206},{118,127,180},{116,127,53},{127,115,384},{116,127,179},{113,127,106},{90,127,0},{127,121,384},{90,127,0},{106,127,2552},{106,127,2552},{106,127,2552},{102,127,1832},{103,127,2112},{96,127,1048},{96,127,1048},
+{87,127,4},{81,127,1260},{73,127,170},{118,127,180},{118,127,180},{118,127,180},{116,127,53},{127,108,288},{113,127,106},{113,127,106},{90,127,0},{127,118,288},{90,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{101,0,1568},{101,0,1568},{101,0,1568},{101,0,1568},{94,127,596},{94,127,596},{94,127,596},{87,127,4},{73,127,170},
+{73,127,170},{0,104,2665},{0,73,274},{0,53,4},{0,45,985},{0,69,5885},{0,46,3677},{0,41,1789},{0,27,4441},{0,31,6341},{0,27,4765},{0,104,2665},{0,73,274},{0,53,4},{0,45,985},{34,0,5885},{0,46,3677},{0,41,1789},{0,27,4441},{69,0,5885},{0,27,4441},{0,50,0},{0,50,0},{0,50,0},{0,24,1},{0,24,545},{0,19,193},{0,19,193},{0,12,337},{0,11,598},{0,10,387},{0,50,0},
+{0,50,0},{0,50,0},{0,24,1},{12,0,545},{0,19,193},{0,19,193},{0,12,337},{24,0,545},{0,12,337},{51,0,2665},{0,73,274},{0,53,4},{0,45,985},{51,0,2665},{104,0,2665},{0,45,985},{0,35,2665},{104,0,2665},{0,35,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,117,2665},{0,81,160},{1,57,12},
+{0,50,872},{0,78,6669},{0,51,3898},{0,46,1825},{0,33,4865},{0,34,7249},{0,30,5277},{0,117,2665},{0,81,160},{1,57,8},{0,50,872},{39,0,6669},{0,51,3898},{0,46,1825},{0,33,4865},{78,0,6669},{0,33,4865},{0,62,0},{0,62,0},{0,62,0},{0,30,1},{0,30,841},{0,25,305},{0,25,305},{0,13,514},{0,14,926},{0,13,595},{0,62,0},{0,62,0},{0,62,0},{0,30,1},{15,0,841},
+{0,25,305},{0,25,305},{0,13,514},{30,0,841},{0,13,514},{57,0,2665},{0,81,160},{3,57,0},{0,50,872},{57,0,2665},{117,0,2665},{0,50,872},{0,39,2665},{117,0,2665},{0,39,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,127,2669},{0,87,80},{1,63,60},{0,53,740},{0,86,7538},{0,57,4178},{0,49,1889},
+{0,36,5333},{0,40,8241},{0,33,5845},{0,127,2669},{0,87,80},{2,61,52},{0,53,740},{42,1,7538},{0,57,4178},{0,49,1889},{0,36,5333},{86,0,7538},{0,36,5333},{0,74,0},{0,74,0},{0,74,0},{0,36,1},{0,36,1201},{0,28,445},{0,28,445},{0,18,745},{0,17,1326},{0,15,861},{0,74,0},{0,74,0},{0,74,0},{0,36,1},{18,0,1201},{0,28,445},{0,28,445},{0,18,745},{36,0,1201},
+{0,18,745},{63,0,2665},{0,87,80},{7,61,0},{0,53,740},{63,0,2665},{127,1,2665},{0,53,740},{0,43,2665},{127,1,2665},{0,43,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{3,127,2797},{0,93,32},{2,65,154},{0,59,628},{0,94,8493},{0,63,4490},{0,54,1965},{0,39,5849},{0,43,9333},{0,36,6469},{3,127,2761},
+{0,93,32},{3,66,126},{0,59,628},{46,0,8493},{0,63,4490},{0,54,1965},{0,39,5849},{94,0,8493},{0,39,5849},{0,86,0},{0,86,0},{0,86,0},{0,42,1},{0,42,1625},{0,34,605},{0,34,605},{0,21,1009},{0,19,1781},{0,18,1161},{0,86,0},{0,86,0},{0,86,0},{0,42,1},{21,0,1625},{0,34,605},{0,34,605},{0,21,1009},{42,0,1625},{0,21,1009},{69,0,2665},{0,93,32},{11,64,1},
+{0,59,628},{69,0,2665},{127,7,2665},{0,59,628},{0,47,2665},{127,7,2665},{0,47,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{3,127,3157},{0,102,2},{2,71,304},{0,62,514},{0,103,9669},{0,67,4865},{0,60,2107},{0,42,6485},{0,46,10689},{0,39,7245},{6,127,3029},{0,102,2},{5,71,260},{0,62,514},{50,1,9669},
+{0,67,4865},{0,60,2107},{0,42,6485},{103,0,9669},{0,42,6485},{0,100,0},{0,100,0},{0,100,0},{0,49,0},{0,50,2178},{0,40,820},{0,40,820},{0,24,1348},{0,22,2392},{0,21,1556},{0,100,0},{0,100,0},{0,100,0},{0,49,0},{24,1,2178},{0,40,820},{0,40,820},{0,24,1348},{50,0,2178},{0,24,1348},{76,0,2665},{0,102,2},{15,69,0},{0,62,514},{76,0,2665},{126,14,2665},{0,62,514},
+{0,51,2665},{126,14,2665},{0,51,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{6,127,3509},{2,108,32},{4,76,424},{1,67,473},{0,115,9669},{0,73,4529},{0,64,1649},{0,45,6209},{0,51,10919},{0,45,7109},{12,127,3141},{4,106,2},{9,75,260},{1,67,469},{56,1,9669},{0,73,4529},{0,64,1649},{0,45,6209},{115,0,9669},
+{0,45,6209},{2,108,32},{2,108,32},{2,108,32},{2,54,32},{0,62,2178},{0,46,628},{0,46,628},{0,27,1184},{0,28,2520},{0,26,1502},{4,104,0},{4,104,0},{4,104,0},{4,53,0},{30,1,2178},{0,46,628},{0,46,628},{0,27,1184},{62,0,2178},{0,27,1184},{81,0,2665},{2,108,0},{19,73,0},{0,66,421},{81,0,2665},{126,20,2665},{0,66,421},{0,55,2665},{126,20,2665},{0,55,2665},{2,0,32},
+{2,0,32},{2,0,32},{2,0,32},{0,12,0},{0,12,0},{0,12,0},{0,6,0},{0,5,10},{0,5,10},{12,127,3989},{4,114,130},{7,79,616},{3,72,525},{0,127,9669},{0,81,4178},{0,67,1265},{0,50,5950},{0,57,11199},{0,49,7029},{17,127,3253},{8,110,2},{13,79,260},{5,71,469},{62,1,9669},{0,81,4178},{0,67,1265},{0,50,5950},{127,0,9669},{0,50,5950},{4,116,128},{4,116,128},{4,116,128},
+{4,59,128},{0,74,2178},{0,54,442},{0,54,442},{0,33,1040},{0,31,2676},{0,30,1460},{8,108,0},{8,108,0},{8,108,0},{8,57,0},{36,1,2178},{0,54,442},{0,54,442},{0,33,1040},{74,0,2178},{0,33,1040},{87,0,2665},{6,112,0},{23,77,0},{0,72,325},{87,0,2665},{126,26,2665},{0,72,325},{0,59,2665},{126,26,2665},{0,59,2665},{4,0,128},{4,0,128},{4,0,128},{4,0,128},{0,24,0},
+{0,24,0},{0,24,0},{0,12,0},{0,8,50},{0,8,50},{15,127,4613},{6,118,304},{9,84,880},{4,76,645},{3,127,9761},{0,87,3898},{0,73,913},{0,56,5686},{0,63,11511},{0,53,6939},{20,127,3401},{12,114,2},{17,83,260},{9,75,469},{68,0,9669},{0,87,3898},{0,73,913},{0,56,5686},{127,6,9669},{0,56,5686},{6,124,288},{6,124,288},{6,124,288},{6,63,289},{0,86,2178},{0,60,306},{0,60,306},
+{0,36,900},{0,37,2852},{0,34,1452},{12,112,0},{12,112,0},{12,112,0},{12,61,0},{42,1,2178},{0,60,306},{0,60,306},{0,36,900},{86,0,2178},{0,36,900},{93,0,2665},{10,116,0},{27,81,0},{0,77,260},{93,0,2665},{126,32,2665},{0,77,260},{0,63,2665},{126,32,2665},{0,63,2665},{6,0,288},{6,0,288},{6,0,288},{6,0,288},{0,36,0},{0,36,0},{0,36,0},{0,18,0},{0,14,106},
+{0,14,106},{17,127,5446},{9,124,575},{12,90,1267},{8,80,862},{7,127,10073},{0,96,3625},{0,79,594},{0,59,5393},{0,70,11905},{0,57,6894},{26,127,3570},{16,119,3},{21,87,259},{14,80,470},{75,0,9669},{0,96,3625},{0,79,594},{0,59,5393},{127,13,9669},{0,59,5393},{9,127,558},{9,127,558},{9,127,558},{8,69,545},{0,100,2178},{0,67,185},{0,67,185},{0,42,754},{0,43,3090},{0,38,1478},{17,115,1},
+{17,115,1},{17,115,1},{16,65,1},{49,0,2178},{0,67,185},{0,67,185},{0,42,754},{100,0,2178},{0,42,754},{100,0,2665},{15,120,0},{32,85,1},{0,82,193},{100,0,2665},{127,38,2665},{0,82,193},{0,67,2665},{127,38,2665},{0,67,2665},{8,0,545},{8,0,545},{8,0,545},{8,0,545},{0,50,0},{0,50,0},{0,50,0},{0,24,1},{0,19,193},{0,19,193},{20,127,6378},{12,127,915},{15,93,1695},
+{9,84,1118},{12,127,10506},{0,102,3401},{0,84,366},{0,65,5186},{0,76,12289},{0,61,6870},{32,127,3738},{20,123,3},{25,91,259},{18,84,470},{80,1,9669},{0,102,3401},{0,84,366},{0,65,5186},{126,19,9669},{0,65,5186},{12,127,914},{12,127,914},{12,127,914},{10,74,841},{0,112,2178},{0,76,97},{0,76,97},{0,45,650},{0,51,3317},{0,43,1514},{21,119,1},{21,119,1},{21,119,1},{20,69,1},{55,0,2178},
+{0,76,97},{0,76,97},{0,45,650},{112,0,2178},{0,45,650},{106,0,2665},{19,124,0},{36,89,1},{0,86,130},{106,0,2665},{127,44,2665},{0,86,130},{0,71,2665},{127,44,2665},{0,71,2665},{10,0,841},{10,0,841},{10,0,841},{10,0,841},{0,62,0},{0,62,0},{0,62,0},{0,30,1},{0,25,305},{0,25,305},{23,127,7454},{15,127,1431},{17,97,2214},{11,89,1438},{15,127,11102},{0,108,3209},{0,90,198},
+{0,69,4913},{0,81,12639},{0,65,6875},{38,127,3938},{24,127,3},{29,95,259},{22,88,470},{86,1,9669},{0,108,3209},{0,90,198},{0,69,4913},{126,25,9669},{0,69,4913},{15,127,1382},{15,127,1382},{15,127,1382},{12,79,1201},{0,124,2178},{0,84,37},{0,84,37},{0,50,549},{0,54,3565},{0,46,1598},{25,123,1},{25,123,1},{25,123,1},{24,73,1},{61,0,2178},{0,84,37},{0,84,37},{0,50,549},{124,0,2178},
+{0,50,549},{112,0,2665},{24,127,2},{40,93,1},{0,91,89},{112,0,2665},{127,50,2665},{0,91,89},{0,75,2665},{127,50,2665},{0,75,2665},{12,0,1201},{12,0,1201},{12,0,1201},{12,0,1201},{0,74,0},{0,74,0},{0,74,0},{0,36,1},{0,28,445},{0,28,445},{26,127,8674},{18,127,2131},{19,102,2786},{13,93,1839},{17,127,11833},{0,113,3038},{0,95,86},{0,72,4685},{0,84,13079},{0,70,6909},{41,127,4130},
+{30,127,35},{33,99,259},{26,92,470},{92,1,9669},{0,113,3038},{0,95,86},{0,72,4685},{126,31,9669},{0,72,4685},{17,127,1941},{17,127,1941},{17,127,1941},{14,84,1625},{3,127,2222},{0,90,5},{0,90,5},{0,56,445},{0,60,3861},{0,51,1674},{29,127,1},{29,127,1},{29,127,1},{28,77,1},{66,1,2178},{0,90,5},{0,90,5},{0,56,445},{126,5,2178},{0,56,445},{118,0,2665},{31,127,20},{44,97,1},
+{0,95,50},{118,0,2665},{127,56,2665},{0,95,50},{0,79,2665},{127,56,2665},{0,79,2665},{14,0,1625},{14,0,1625},{14,0,1625},{14,0,1625},{0,86,0},{0,86,0},{0,86,0},{0,42,1},{0,34,605},{0,34,605},{32,127,10209},{21,127,3140},{22,108,3525},{15,98,2360},{20,127,12902},{0,122,2885},{0,101,21},{0,78,4450},{0,93,13598},{0,73,6982},{47,127,4373},{34,127,134},{38,104,260},{30,96,469},{99,0,9669},
+{0,122,2885},{0,101,21},{0,78,4450},{126,38,9669},{0,78,4450},{20,127,2717},{20,127,2717},{20,127,2717},{16,90,2180},{6,127,2420},{0,98,1},{0,98,1},{0,62,353},{0,64,4230},{0,57,1814},{34,126,13},{34,126,13},{34,126,13},{33,81,1},{73,0,2178},{2,96,1},{2,96,1},{0,62,353},{126,12,2178},{0,62,353},{125,0,2665},{40,127,74},{48,102,0},{0,101,20},{125,0,2665},{126,63,2665},{0,101,20},
+{0,84,2665},{126,63,2665},{0,84,2665},{16,0,2180},{16,0,2180},{16,0,2180},{16,0,2180},{0,100,0},{0,100,0},{0,100,0},{0,49,0},{0,40,820},{0,40,820},{35,127,11582},{25,127,4131},{25,111,4166},{17,103,2825},{26,127,13903},{1,126,2826},{1,105,18},{0,83,4290},{0,96,13969},{0,79,6967},{52,127,4577},{40,127,270},{42,108,260},{34,100,469},{105,0,9669},{1,126,2825},{1,105,17},{0,83,4289},{126,44,9669},
+{0,83,4289},{23,127,3434},{23,127,3434},{23,127,3434},{19,94,2666},{9,127,2709},{1,105,14},{1,105,14},{0,66,275},{0,70,4491},{0,60,1893},{38,127,29},{38,127,29},{38,127,29},{37,85,1},{79,0,2178},{6,100,1},{6,100,1},{0,66,274},{127,17,2178},{0,66,274},{127,7,2665},{46,127,146},{52,106,0},{0,105,8},{127,7,2665},{127,68,2665},{0,105,8},{0,88,2665},{127,68,2665},{0,88,2665},{19,0,2665},
+{19,0,2665},{19,0,2665},{19,0,2665},{1,110,1},{1,110,1},{1,110,1},{0,55,1},{0,43,985},{0,43,985},{38,127,12090},{30,127,4561},{29,115,4166},{21,107,2825},{29,127,14311},{5,127,2910},{5,109,18},{0,86,4166},{0,105,13477},{0,84,6285},{58,127,4833},{46,127,470},{46,112,260},{38,104,469},{111,0,9669},{5,127,2909},{5,109,17},{0,86,4085},{127,49,9669},{0,86,4085},{26,127,3590},{26,127,3590},{26,127,3590},
+{23,98,2666},{12,127,2853},{5,109,14},{5,109,14},{3,69,259},{0,76,4147},{0,64,1475},{44,127,61},{44,127,61},{44,127,61},{41,89,1},{85,0,2178},{10,104,1},{10,104,1},{0,69,202},{127,23,2178},{0,69,202},{127,19,2665},{54,127,260},{56,110,0},{0,110,0},{127,19,2665},{127,74,2665},{0,110,0},{0,92,2665},{127,74,2665},{0,92,2665},{23,0,2665},{23,0,2665},{23,0,2665},{23,0,2665},{5,114,1},
+{5,114,1},{5,114,1},{4,59,1},{0,51,745},{0,51,745},{44,127,12610},{34,127,5039},{33,119,4166},{25,111,2825},{35,127,14719},{11,127,3118},{9,113,18},{4,90,4166},{0,113,13003},{0,87,5661},{61,127,5093},{51,127,736},{50,116,260},{42,108,469},{117,0,9669},{14,127,3073},{9,113,17},{0,89,3929},{127,55,9669},{0,89,3929},{32,127,3726},{32,127,3726},{32,127,3726},{27,102,2666},{17,127,2979},{9,113,14},{9,113,14},
+{7,73,259},{0,84,3784},{0,70,1091},{47,127,97},{47,127,97},{47,127,97},{45,93,1},{91,0,2178},{14,108,1},{14,108,1},{0,75,146},{127,29,2178},{0,75,146},{127,31,2665},{60,127,388},{60,114,0},{3,114,0},{127,31,2665},{127,80,2665},{3,114,0},{0,96,2665},{127,80,2665},{0,96,2665},{27,0,2665},{27,0,2665},{27,0,2665},{27,0,2665},{9,118,1},{9,118,1},{9,118,1},{8,63,1},{0,60,565},
+{0,60,565},{49,127,13154},{40,127,5661},{37,123,4166},{30,116,2823},{41,127,15213},{16,127,3497},{14,118,18},{8,94,4166},{0,116,12489},{0,93,5039},{68,127,5409},{57,127,1091},{54,120,259},{47,113,470},{124,0,9669},{22,127,3341},{14,118,14},{0,95,3726},{127,62,9669},{0,95,3726},{38,127,3929},{38,127,3929},{38,127,3929},{31,107,2665},{23,127,3156},{13,117,17},{13,117,17},{11,77,260},{0,90,3467},{0,76,736},{52,127,146},
+{52,127,146},{52,127,146},{49,98,1},{98,0,2178},{19,112,1},{19,112,1},{0,80,97},{127,36,2178},{0,80,97},{127,45,2665},{67,127,565},{64,119,1},{9,118,1},{127,45,2665},{126,87,2665},{9,118,1},{0,100,2665},{126,87,2665},{0,100,2665},{31,0,2665},{31,0,2665},{31,0,2665},{31,0,2665},{13,123,0},{13,123,0},{13,123,0},{13,67,0},{0,67,388},{0,67,388},{52,127,13734},{43,127,6285},{41,127,4166},
+{34,120,2823},{47,127,15677},{22,127,3905},{18,122,18},{12,98,4166},{0,125,12093},{0,97,4561},{71,127,5701},{60,127,1475},{58,124,259},{51,117,470},{127,5,9669},{31,127,3601},{18,122,14},{0,101,3590},{126,68,9669},{0,101,3590},{41,127,4085},{41,127,4085},{41,127,4085},{35,111,2665},{29,127,3332},{17,121,17},{17,121,17},{15,81,260},{0,99,3211},{0,81,470},{58,127,202},{58,127,202},{58,127,202},{53,102,1},{104,0,2178},
+{23,116,1},{23,116,1},{0,83,61},{127,42,2178},{0,83,61},{127,57,2665},{76,127,745},{68,123,1},{13,122,1},{127,57,2665},{126,93,2665},{13,122,1},{0,104,2665},{126,93,2665},{0,104,2665},{35,0,2665},{35,0,2665},{35,0,2665},{35,0,2665},{17,127,0},{17,127,0},{17,127,0},{17,71,0},{0,73,260},{0,73,260},{58,127,14302},{48,127,6967},{44,127,4290},{38,124,2823},{52,127,16094},{28,127,4409},{22,126,18},
+{16,102,4166},{0,126,11883},{0,102,4131},{77,127,6005},{67,127,1893},{61,127,275},{55,121,470},{127,18,9669},{40,127,3909},{22,126,14},{0,104,3434},{126,74,9669},{0,104,3434},{44,127,4289},{44,127,4289},{44,127,4289},{39,115,2665},{35,127,3540},{21,125,17},{21,125,17},{19,85,260},{0,105,2979},{0,87,270},{61,127,274},{61,127,274},{61,127,274},{57,106,1},{110,0,2178},{27,120,1},{27,120,1},{0,89,29},{126,48,2178},
+{0,89,29},{127,69,2665},{81,127,985},{72,127,1},{17,126,1},{127,69,2665},{126,99,2665},{17,126,1},{0,108,2665},{126,99,2665},{0,108,2665},{39,0,2665},{39,0,2665},{39,0,2665},{39,0,2665},{22,126,8},{22,126,8},{22,126,8},{21,75,0},{0,81,146},{0,81,146},{61,127,13635},{53,127,6982},{49,127,4450},{42,125,2742},{55,127,15195},{34,127,4106},{26,127,21},{19,105,3525},{2,127,10776},{0,106,3140},{79,127,5396},
+{70,127,1814},{65,127,353},{59,122,321},{127,27,8712},{46,127,3462},{29,127,1},{0,107,2717},{126,78,8712},{0,107,2717},{49,127,4450},{49,127,4450},{49,127,4450},{43,119,2665},{38,127,3736},{26,127,21},{26,127,21},{23,89,260},{0,113,2772},{0,93,134},{65,127,353},{65,127,353},{65,127,353},{61,110,1},{115,1,2178},{31,124,1},{31,124,1},{0,93,13},{126,54,2178},{0,93,13},{127,76,2178},{87,127,820},{78,127,0},
+{27,127,0},{127,76,2178},{126,103,2178},{27,127,0},{0,110,2180},{126,103,2178},{0,110,2180},{43,0,2665},{43,0,2665},{43,0,2665},{43,0,2665},{26,127,20},{26,127,20},{26,127,20},{25,79,0},{0,87,74},{0,87,74},{65,127,12750},{57,127,6909},{55,127,4685},{48,126,2678},{61,127,14070},{37,127,3711},{32,127,86},{25,108,2786},{5,127,9739},{0,109,2131},{85,127,4658},{76,127,1674},{71,127,445},{64,123,173},{127,36,7578},
+{51,127,2949},{37,127,5},{0,110,1941},{127,82,7578},{0,110,1941},{55,127,4685},{55,127,4685},{55,127,4685},{48,123,2666},{44,127,3987},{32,127,86},{32,127,86},{28,94,259},{0,119,2571},{0,97,35},{71,127,445},{71,127,445},{71,127,445},{66,114,1},{122,0,2178},{37,127,5},{37,127,5},{0,98,1},{126,61,2178},{0,98,1},{127,83,1625},{93,127,605},{84,127,1},{40,127,0},{127,83,1625},{126,106,1625},{40,127,0},
+{0,113,1625},{126,106,1625},{0,113,1625},{48,0,2665},{48,0,2665},{48,0,2665},{48,0,2665},{32,127,50},{32,127,50},{32,127,50},{30,83,1},{0,96,20},{0,96,20},{68,127,12050},{62,127,6875},{58,127,4913},{52,127,2666},{65,127,13165},{43,127,3423},{37,127,198},{30,110,2214},{11,127,8987},{0,112,1431},{88,127,4082},{79,127,1547},{77,127,549},{68,124,81},{127,43,6661},{57,127,2525},{43,127,37},{0,112,1382},{127,86,6661},
+{0,112,1382},{58,127,4913},{58,127,4913},{58,127,4913},{52,127,2666},{49,127,4197},{37,127,198},{37,127,198},{32,98,259},{0,125,2435},{0,103,3},{77,127,549},{77,127,549},{77,127,549},{70,118,1},{127,2,2178},{43,127,37},{43,127,37},{3,102,1},{127,66,2178},{3,102,1},{127,89,1201},{99,127,445},{90,127,1},{53,127,0},{127,89,1201},{125,109,1201},{53,127,0},{0,115,1201},{125,109,1201},{0,115,1201},{52,0,2665},
+{52,0,2665},{52,0,2665},{52,0,2665},{36,127,89},{36,127,89},{36,127,89},{34,87,1},{0,103,2},{0,103,2},{74,127,11418},{65,127,6870},{62,127,5186},{56,127,2701},{68,127,12313},{46,127,3251},{43,127,366},{34,112,1695},{14,127,8383},{0,115,915},{91,127,3578},{84,127,1514},{79,127,650},{73,126,21},{127,53,5829},{63,127,2165},{51,127,97},{0,115,914},{126,91,5829},{0,115,914},{62,127,5186},{62,127,5186},{62,127,5186},
+{56,127,2701},{55,127,4461},{43,127,366},{43,127,366},{36,102,259},{2,127,2427},{4,107,3},{79,127,650},{79,127,650},{79,127,650},{74,122,1},{127,14,2178},{51,127,97},{51,127,97},{7,106,1},{127,72,2178},{7,106,1},{127,95,841},{102,127,305},{96,127,1},{64,127,0},{127,95,841},{127,111,841},{64,127,0},{0,117,841},{127,111,841},{0,117,841},{56,0,2665},{56,0,2665},{56,0,2665},{56,0,2665},{41,127,130},
+{41,127,130},{41,127,130},{38,91,1},{3,108,0},{3,108,0},{77,127,10830},{69,127,6894},{68,127,5393},{60,127,2786},{71,127,11565},{54,127,3154},{48,127,594},{37,115,1267},{25,127,7825},{3,118,575},{94,127,3146},{87,127,1478},{85,127,754},{77,127,1},{127,60,5082},{70,127,1869},{57,127,185},{0,118,558},{127,94,5082},{0,118,558},{68,127,5393},{68,127,5393},{68,127,5393},{60,127,2786},{58,127,4725},{48,127,594},{48,127,594},
+{40,106,259},{8,127,2587},{8,111,3},{85,127,754},{85,127,754},{85,127,754},{78,126,1},{127,27,2178},{57,127,185},{57,127,185},{11,110,1},{126,78,2178},{11,110,1},{127,101,545},{108,127,193},{102,127,1},{76,127,0},{127,101,545},{127,114,545},{76,127,0},{0,119,545},{127,114,545},{0,119,545},{60,0,2665},{60,0,2665},{60,0,2665},{60,0,2665},{45,127,193},{45,127,193},{45,127,193},{42,95,1},{7,112,0},
+{7,112,0},{79,127,10221},{74,127,6939},{71,127,5686},{64,127,2954},{77,127,10836},{57,127,3109},{54,127,913},{42,117,880},{34,127,7300},{8,120,304},{100,127,2736},{93,127,1452},{91,127,900},{83,127,29},{127,69,4344},{76,127,1611},{67,127,306},{2,121,288},{126,99,4344},{2,121,288},{71,127,5686},{71,127,5686},{71,127,5686},{64,127,2954},{65,127,5051},{54,127,913},{54,127,913},{44,110,260},{14,127,2856},{13,115,2},{91,127,900},
+{91,127,900},{91,127,900},{83,127,29},{127,40,2178},{67,127,306},{67,127,306},{15,115,0},{126,85,2178},{15,115,0},{127,108,288},{113,127,106},{109,127,0},{90,127,0},{127,108,288},{127,118,288},{90,127,0},{0,121,288},{127,118,288},{0,121,288},{64,0,2665},{64,0,2665},{64,0,2665},{64,0,2665},{50,127,260},{50,127,260},{50,127,260},{46,100,0},{11,117,0},{11,117,0},{85,127,9781},{79,127,6942},{77,127,5950},
+{70,127,3146},{79,127,10205},{63,127,3141},{57,127,1265},{47,119,616},{40,127,6924},{13,123,130},{103,127,2436},{97,127,1460},{94,127,1040},{87,127,104},{127,76,3779},{81,127,1496},{73,127,442},{10,123,128},{126,103,3779},{10,123,128},{77,127,5950},{77,127,5950},{77,127,5950},{70,127,3146},{68,127,5347},{57,127,1265},{57,127,1265},{48,114,260},{25,127,3115},{17,119,2},{94,127,1040},{94,127,1040},{94,127,1040},{87,127,104},{127,53,2178},
+{73,127,442},{73,127,442},{19,119,0},{126,91,2178},{19,119,0},{127,115,128},{116,127,50},{115,127,0},{102,127,0},{127,115,128},{127,121,128},{102,127,0},{0,123,128},{127,121,128},{0,123,128},{68,0,2665},{68,0,2665},{68,0,2665},{68,0,2665},{55,127,325},{55,127,325},{55,127,325},{50,104,0},{15,121,0},{15,121,0},{88,127,9417},{82,127,7109},{82,127,6209},{73,127,3406},{85,127,9733},{67,127,3260},{63,127,1649},
+{50,122,424},{46,127,6644},{19,125,32},{106,127,2208},{101,127,1502},{100,127,1184},{93,127,232},{127,86,3299},{87,127,1400},{81,127,628},{19,125,32},{127,107,3299},{19,125,32},{82,127,6209},{82,127,6209},{82,127,6209},{73,127,3406},{74,127,5659},{63,127,1649},{63,127,1649},{52,118,260},{34,127,3419},{21,123,2},{100,127,1184},{100,127,1184},{100,127,1184},{93,127,232},{127,64,2178},{81,127,628},{81,127,628},{23,123,0},{126,97,2178},
+{23,123,0},{127,121,32},{122,127,10},{121,127,0},{115,127,0},{127,121,32},{127,124,32},{115,127,0},{0,125,32},{127,124,32},{0,125,32},{72,0,2665},{72,0,2665},{72,0,2665},{72,0,2665},{61,127,421},{61,127,421},{61,127,421},{54,108,0},{19,125,0},{19,125,0},{91,127,9133},{88,127,7245},{85,127,6485},{79,127,3710},{88,127,9325},{73,127,3460},{67,127,2107},{55,124,304},{54,127,6532},{25,127,2},{111,127,2038},
+{106,127,1556},{103,127,1348},{96,127,416},{127,93,2904},{93,127,1368},{87,127,820},{27,127,0},{126,111,2904},{27,127,0},{85,127,6485},{85,127,6485},{85,127,6485},{79,127,3710},{79,127,5949},{67,127,2107},{67,127,2107},{56,122,260},{40,127,3771},{25,127,2},{103,127,1348},{103,127,1348},{103,127,1348},{96,127,416},{127,76,2178},{87,127,820},{87,127,820},{27,127,0},{126,103,2178},{27,127,0},{127,127,0},{127,127,0},{127,127,0},
+{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{76,0,2665},{76,0,2665},{76,0,2665},{76,0,2665},{65,127,514},{65,127,514},{65,127,514},{58,112,0},{25,127,2},{25,127,2},{94,127,8049},{91,127,6469},{88,127,5849},{84,127,3561},{91,127,8053},{76,127,3106},{73,127,1965},{62,125,154},{60,127,5562},{34,127,32},{111,127,1507},{109,127,1161},{106,127,1009},{102,127,305},{127,98,2166},
+{99,127,1009},{93,127,605},{40,127,0},{126,113,2166},{40,127,0},{88,127,5849},{88,127,5849},{88,127,5849},{84,127,3561},{82,127,5209},{73,127,1965},{73,127,1965},{61,124,126},{46,127,3225},{34,127,32},{106,127,1009},{106,127,1009},{106,127,1009},{102,127,305},{127,83,1625},{93,127,605},{93,127,605},{40,127,0},{126,106,1625},{40,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},
+{0,127,0},{127,127,0},{0,127,0},{80,0,2665},{80,0,2665},{80,0,2665},{80,0,2665},{68,127,628},{68,127,628},{68,127,628},{63,116,1},{34,127,32},{34,127,32},{97,127,7165},{94,127,5845},{91,127,5333},{87,127,3401},{94,127,7033},{79,127,2823},{79,127,1862},{64,126,60},{64,127,4795},{40,127,80},{114,127,1107},{111,127,830},{111,127,730},{105,127,221},{127,102,1601},{102,127,737},{99,127,445},{53,127,0},{127,115,1601},
+{53,127,0},{91,127,5333},{91,127,5333},{91,127,5333},{87,127,3401},{85,127,4629},{79,127,1862},{79,127,1862},{66,125,52},{54,127,2834},{40,127,80},{111,127,730},{111,127,730},{111,127,730},{105,127,221},{127,89,1201},{99,127,445},{99,127,445},{53,127,0},{125,109,1201},{53,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{84,0,2665},
+{84,0,2665},{84,0,2665},{84,0,2665},{74,127,740},{74,127,740},{74,127,740},{66,120,0},{40,127,80},{40,127,80},{100,127,6361},{97,127,5277},{94,127,4865},{90,127,3265},{97,127,6117},{84,127,2641},{79,127,1814},{70,126,12},{70,127,4123},{46,127,160},{117,127,779},{114,127,595},{114,127,514},{108,127,153},{127,105,1121},{105,127,513},{102,127,305},{64,127,0},{126,117,1121},{64,127,0},{94,127,4865},{94,127,4865},{94,127,4865},
+{90,127,3265},{91,127,4117},{79,127,1814},{79,127,1814},{70,126,8},{60,127,2474},{46,127,160},{114,127,514},{114,127,514},{114,127,514},{108,127,153},{127,95,841},{102,127,305},{102,127,305},{64,127,0},{127,111,841},{64,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{88,0,2665},{88,0,2665},{88,0,2665},{88,0,2665},{79,127,853},
+{79,127,853},{79,127,853},{70,124,0},{46,127,160},{46,127,160},{103,127,5637},{100,127,4765},{100,127,4441},{93,127,3153},{100,127,5305},{90,127,2465},{86,127,1789},{74,127,4},{73,127,3543},{54,127,274},{117,127,507},{117,127,387},{115,127,337},{111,127,101},{127,110,726},{108,127,337},{108,127,193},{76,127,0},{126,119,726},{76,127,0},{100,127,4441},{100,127,4441},{100,127,4441},{93,127,3153},{94,127,3657},{86,127,1789},{86,127,1789},
+{74,127,4},{67,127,2182},{54,127,274},{115,127,337},{115,127,337},{115,127,337},{111,127,101},{127,101,545},{108,127,193},{108,127,193},{76,127,0},{127,114,545},{76,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{92,0,2665},{92,0,2665},{92,0,2665},{92,0,2665},{82,127,985},{82,127,985},{82,127,985},{74,127,4},{54,127,274},
+{54,127,274},{17,127,39416},{1,127,2073},{0,90,164},{0,86,5261},{17,127,49709},{0,113,21212},{0,85,8480},{0,75,26137},{0,87,56335},{0,67,37225},{12,127,10274},{0,122,1413},{0,90,148},{0,75,3940},{57,0,18065},{0,76,12036},{0,68,6166},{0,45,14098},{117,0,18065},{0,45,14098},{0,71,0},{0,71,0},{0,71,0},{0,35,0},{0,35,1105},{0,28,405},{0,28,405},{0,15,689},{0,14,1226},{0,15,789},{0,71,0},
+{0,71,0},{0,71,0},{0,35,0},{17,0,1105},{0,28,405},{0,28,405},{0,15,689},{35,0,1105},{0,15,689},{85,0,9248},{0,122,1413},{0,90,148},{0,75,3940},{85,0,9248},{127,23,9248},{0,75,3940},{0,57,9250},{127,23,9248},{0,57,9250},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{20,127,44736},{2,127,2677},{0,95,64},
+{0,92,4941},{17,127,55213},{0,119,21820},{0,89,8426},{0,78,27209},{0,90,60583},{0,73,39465},{15,127,10870},{0,125,1213},{0,93,52},{0,78,3656},{61,0,19334},{0,81,12395},{0,73,6176},{0,50,14795},{124,0,19334},{0,50,14795},{0,83,0},{0,83,0},{0,83,0},{0,41,0},{0,41,1513},{0,34,565},{0,34,565},{0,18,937},{0,19,1661},{0,18,1081},{0,83,0},{0,83,0},{0,83,0},{0,41,0},{20,0,1513},
+{0,34,565},{0,34,565},{0,18,937},{41,0,1513},{0,18,937},{91,0,9248},{0,125,1213},{0,93,52},{0,78,3656},{91,0,9248},{127,29,9248},{0,78,3656},{0,61,9250},{127,29,9248},{0,61,9250},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{20,127,50624},{2,127,4005},{0,99,4},{0,95,4545},{20,127,61345},{0,125,22460},{0,93,8410},
+{0,83,28480},{0,96,65039},{0,76,41789},{17,127,11556},{1,127,1217},{0,99,4},{0,83,3425},{64,0,20689},{0,87,12835},{0,76,6216},{0,53,15539},{126,3,20689},{0,53,15539},{0,95,0},{0,95,0},{0,95,0},{0,47,0},{0,47,1985},{0,37,745},{0,37,745},{0,21,1225},{0,22,2185},{0,21,1421},{0,95,0},{0,95,0},{0,95,0},{0,47,0},{23,0,1985},{0,37,745},{0,37,745},{0,21,1225},{47,0,1985},
+{0,21,1225},{97,0,9248},{2,127,1205},{0,99,4},{0,83,3425},{97,0,9248},{127,35,9248},{0,83,3425},{0,65,9248},{127,35,9248},{0,65,9248},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{23,127,56952},{5,127,6081},{0,103,9},{0,98,4221},{20,127,65535},{0,126,23535},{0,97,8416},{0,86,29640},{0,99,65535},{0,79,44233},{20,127,12416},
+{2,127,1397},{1,103,8},{0,86,3181},{69,0,22129},{0,90,13307},{0,81,6266},{0,56,16331},{127,7,22129},{0,56,16331},{0,107,0},{0,107,0},{0,107,0},{0,53,0},{0,53,2521},{0,43,953},{0,43,953},{0,24,1553},{0,22,2777},{0,24,1809},{0,107,0},{0,107,0},{0,107,0},{0,53,0},{26,0,2521},{0,43,953},{0,43,953},{0,24,1553},{53,0,2521},{0,24,1553},{103,0,9248},{2,127,1381},{2,103,0},
+{0,86,3181},{103,0,9248},{127,41,9248},{0,86,3181},{0,69,9248},{127,41,9248},{0,69,9248},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{26,127,64790},{5,127,9105},{1,108,78},{0,104,3845},{23,127,65535},{2,127,25370},{0,102,8458},{0,89,31046},{0,105,65535},{0,81,47079},{23,127,13542},{5,127,1881},{2,109,50},{0,92,2897},{73,0,23851},
+{0,96,13865},{0,87,6374},{0,59,17289},{126,12,23851},{0,59,17289},{0,121,0},{0,121,0},{0,121,0},{0,59,1},{0,61,3200},{0,51,1210},{0,51,1210},{0,27,1972},{0,25,3528},{0,27,2296},{0,121,0},{0,121,0},{0,121,0},{0,59,1},{30,0,3200},{0,51,1210},{0,51,1210},{0,27,1972},{61,0,3200},{0,27,1972},{110,0,9248},{11,127,1693},{7,107,1},{0,92,2897},{110,0,9248},{126,48,9248},{0,92,2897},
+{0,73,9250},{126,48,9248},{0,73,9250},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{26,127,65535},{5,127,12609},{1,111,198},{0,107,3541},{26,127,65535},{2,127,27706},{0,105,8522},{0,92,32390},{0,113,65535},{0,87,49743},{26,127,14690},{8,127,2501},{3,113,126},{0,98,2665},{77,0,25472},{0,102,14385},{0,90,6486},{0,62,18185},{127,15,25472},
+{0,62,18185},{0,127,16},{0,127,16},{0,127,16},{0,65,0},{0,66,3872},{0,51,1450},{0,51,1450},{0,30,2384},{0,28,4268},{0,27,2776},{1,127,13},{1,127,13},{1,127,13},{0,65,0},{33,0,3872},{0,51,1450},{0,51,1450},{0,30,2384},{66,0,3872},{0,30,2384},{115,1,9248},{19,127,2041},{11,111,1},{0,98,2665},{115,1,9248},{126,54,9248},{0,98,2665},{0,77,9250},{126,54,9248},{0,77,9250},{0,0,0},
+{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{29,127,65535},{8,127,16605},{1,116,349},{0,112,3274},{26,127,65535},{2,127,30470},{0,109,8346},{0,98,33226},{0,113,65535},{0,90,52035},{29,127,15746},{11,127,3213},{5,117,217},{0,101,2445},{81,0,26744},{0,108,14657},{0,96,6398},{0,66,18739},{126,20,26744},{0,66,18739},{3,127,116},{3,127,116},{3,127,116},
+{0,71,4},{0,74,4418},{0,57,1586},{0,57,1586},{0,33,2664},{0,31,4916},{0,30,3140},{3,127,80},{3,127,80},{3,127,80},{1,70,1},{36,1,4418},{0,57,1586},{0,57,1586},{0,33,2664},{74,0,4418},{0,33,2664},{121,1,9248},{25,127,2377},{15,115,1},{0,101,2441},{121,1,9248},{126,60,9248},{0,101,2441},{0,81,9250},{126,60,9248},{0,81,9250},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,3,0},
+{0,3,0},{0,3,0},{0,1,1},{1,0,2},{1,0,2},{32,127,65535},{11,127,21205},{2,120,605},{0,115,3106},{29,127,65535},{2,127,33062},{0,114,7330},{0,101,32522},{0,119,65535},{0,93,52995},{35,127,16210},{14,127,3809},{9,121,217},{0,107,2325},{87,0,26744},{0,116,14054},{0,99,5606},{0,72,18275},{126,26,26744},{0,72,18275},{6,127,292},{6,127,292},{6,127,292},{2,76,52},{0,86,4418},{0,64,1313},{0,64,1313},
+{0,39,2440},{0,37,5092},{0,36,3044},{9,127,128},{9,127,128},{9,127,128},{5,74,1},{42,1,4418},{0,64,1313},{0,64,1313},{0,39,2440},{86,0,4418},{0,39,2440},{127,1,9248},{34,127,2741},{19,119,1},{0,107,2225},{127,1,9248},{127,65,9248},{0,107,2225},{0,85,9250},{127,65,9248},{0,85,9250},{2,0,52},{2,0,52},{2,0,52},{2,0,52},{0,15,0},{0,15,0},{0,15,0},{0,7,1},{0,5,18},
+{0,5,18},{35,127,65535},{11,127,27154},{3,124,1078},{0,121,3041},{32,127,65535},{5,127,36765},{0,119,6285},{0,107,31809},{0,125,65535},{0,96,54246},{38,127,16729},{22,127,4594},{14,126,218},{4,111,2318},{94,0,26744},{0,122,13481},{0,105,4785},{0,75,17772},{127,32,26744},{0,75,17772},{9,127,614},{9,127,614},{9,127,614},{5,81,181},{0,100,4418},{0,73,1037},{0,73,1037},{0,42,2210},{0,43,5330},{0,41,2986},{13,127,193},
+{13,127,193},{13,127,193},{9,79,1},{49,0,4418},{0,73,1037},{0,73,1037},{0,42,2210},{100,0,4418},{0,42,2210},{127,14,9248},{40,127,3177},{23,124,0},{0,110,2009},{127,14,9248},{127,72,9248},{0,110,2009},{0,90,9248},{127,72,9248},{0,90,9248},{5,0,181},{5,0,181},{5,0,181},{5,0,181},{0,29,0},{0,29,0},{0,29,0},{0,14,0},{0,11,65},{0,11,65},{35,127,65535},{14,127,31662},{4,127,1705},
+{0,124,3025},{35,127,65535},{8,127,39325},{0,122,5245},{0,110,30421},{0,125,65535},{0,102,54322},{44,127,16749},{25,127,5034},{17,127,245},{7,114,2214},{99,0,26259},{0,125,12725},{0,111,3981},{0,78,16984},{126,38,26259},{0,78,16984},{10,127,1025},{10,127,1025},{10,127,1025},{7,86,365},{0,112,4418},{0,81,797},{0,81,797},{0,47,2025},{0,51,5557},{0,45,2950},{17,127,245},{17,127,245},{17,127,245},{13,83,1},{55,0,4418},
+{0,81,797},{0,81,797},{0,47,2025},{112,0,4418},{0,47,2025},{127,25,8978},{46,127,3385},{28,127,0},{0,115,1732},{127,25,8978},{127,77,8978},{0,115,1732},{0,93,8980},{127,77,8978},{0,93,8980},{7,0,365},{7,0,365},{7,0,365},{7,0,365},{0,41,0},{0,41,0},{0,41,0},{0,20,0},{0,17,137},{0,17,137},{38,127,65535},{14,127,32078},{6,127,2618},{1,125,2789},{35,127,65535},{8,127,37485},{0,123,3805},
+{0,110,27013},{0,125,65535},{0,102,51330},{49,127,15435},{31,127,4714},{23,127,317},{12,116,1830},{104,0,24371},{0,125,11205},{0,113,2881},{0,83,15117},{127,42,24371},{0,83,15117},{12,127,1538},{12,127,1538},{12,127,1538},{9,91,613},{0,124,4418},{0,87,605},{0,87,605},{0,53,1825},{0,54,5805},{0,49,2946},{23,127,317},{23,127,317},{23,127,317},{17,87,1},{61,0,4418},{0,87,605},{0,87,605},{0,53,1825},{124,0,4418},
+{0,53,1825},{127,31,7938},{51,127,3029},{34,127,0},{0,118,1224},{127,31,7938},{127,80,7938},{0,118,1224},{0,95,7940},{127,80,7938},{0,95,7940},{9,0,613},{9,0,613},{9,0,613},{9,0,613},{0,53,0},{0,53,0},{0,53,0},{0,26,0},{0,22,221},{0,22,221},{38,127,65535},{14,127,32750},{6,127,3706},{3,124,2753},{35,127,65535},{8,127,35901},{0,124,2601},{0,110,23861},{0,125,65535},{0,102,48594},{52,127,14211},
+{34,127,4410},{26,127,405},{16,117,1482},{107,1,22568},{2,127,9869},{0,116,1973},{0,86,13349},{126,46,22568},{0,86,13349},{17,127,2137},{17,127,2137},{17,127,2137},{11,96,925},{3,127,4462},{0,93,445},{0,93,445},{0,56,1649},{0,60,6101},{0,54,2966},{26,127,405},{26,127,405},{26,127,405},{21,91,1},{66,1,4418},{0,93,445},{0,93,445},{0,56,1649},{126,5,4418},{0,56,1649},{127,37,6962},{57,127,2645},{40,127,0},
+{0,118,808},{127,37,6962},{127,83,6962},{0,118,808},{0,97,6964},{127,83,6962},{0,97,6964},{11,0,925},{11,0,925},{11,0,925},{11,0,925},{0,65,0},{0,65,0},{0,65,0},{0,32,0},{0,25,337},{0,25,337},{38,127,65535},{14,127,33812},{7,127,5233},{4,125,2961},{35,127,65535},{8,127,34425},{0,125,1509},{0,112,20542},{0,125,65535},{0,105,45810},{55,127,12917},{40,127,4114},{32,127,505},{22,117,1140},{112,0,20642},
+{2,127,8609},{0,119,1157},{0,89,11489},{127,50,20642},{0,89,11489},{20,127,2969},{20,127,2969},{20,127,2969},{13,102,1352},{6,127,4660},{0,102,289},{0,102,289},{0,62,1445},{0,64,6470},{0,57,3018},{32,127,505},{32,127,505},{32,127,505},{26,95,1},{73,0,4418},{0,102,289},{0,102,289},{0,62,1445},{126,12,4418},{0,62,1445},{127,43,5941},{63,127,2248},{46,127,1},{0,121,433},{127,43,5941},{127,86,5941},{0,121,433},
+{0,100,5941},{127,86,5941},{0,100,5941},{13,0,1352},{13,0,1352},{13,0,1352},{13,0,1352},{0,78,0},{0,78,0},{0,78,0},{0,39,0},{0,31,500},{0,31,500},{38,127,65535},{19,127,34934},{9,127,6748},{5,126,3381},{38,127,65535},{11,127,33369},{0,125,789},{0,112,17822},{0,125,65535},{0,105,43458},{58,127,11849},{43,127,3870},{38,127,617},{27,119,868},{115,1,19021},{5,127,7741},{0,120,621},{0,92,9957},{126,54,19021},
+{0,92,9957},{23,127,3833},{23,127,3833},{23,127,3833},{15,107,1800},{9,127,4984},{0,108,185},{0,108,185},{0,66,1282},{0,70,6822},{0,63,3090},{38,127,617},{38,127,617},{38,127,617},{30,99,1},{79,0,4418},{0,108,185},{0,108,185},{0,66,1282},{127,17,4418},{0,66,1282},{127,50,5101},{67,127,1921},{52,127,0},{0,124,205},{127,50,5101},{127,89,5101},{0,124,205},{0,102,5101},{127,89,5101},{0,102,5101},{15,0,1800},
+{15,0,1800},{15,0,1800},{15,0,1800},{0,91,0},{0,91,0},{0,91,0},{0,45,0},{0,37,676},{0,37,676},{41,127,65535},{19,127,36134},{9,127,8476},{5,126,4005},{38,127,65535},{11,127,32489},{0,126,309},{0,115,15210},{0,125,65535},{0,105,41362},{61,127,10853},{46,127,3674},{41,127,725},{31,120,632},{120,0,17485},{11,127,6965},{0,123,249},{0,95,8537},{127,58,17485},{0,95,8537},{23,127,4809},{23,127,4809},{23,127,4809},
+{17,112,2312},{12,127,5444},{0,116,89},{0,116,89},{0,72,1130},{0,76,7206},{0,67,3194},{41,127,725},{41,127,725},{41,127,725},{34,103,1},{85,0,4418},{0,116,89},{0,116,89},{0,72,1130},{127,23,4418},{0,72,1130},{127,56,4325},{70,127,1625},{58,127,0},{0,124,61},{127,56,4325},{127,92,4325},{0,124,61},{0,104,4325},{127,92,4325},{0,104,4325},{17,0,2312},{17,0,2312},{17,0,2312},{17,0,2312},{0,103,0},
+{0,103,0},{0,103,0},{0,50,1},{0,40,872},{0,40,872},{41,127,65535},{19,127,37590},{9,127,10460},{6,127,4841},{38,127,65535},{11,127,31865},{0,127,65},{0,115,12842},{0,125,65535},{0,105,39522},{65,127,9918},{51,127,3500},{47,127,853},{35,121,436},{124,0,16034},{14,127,6273},{0,125,53},{0,98,7229},{127,62,16034},{0,98,7229},{26,127,5881},{26,127,5881},{26,127,5881},{19,117,2888},{15,127,6040},{0,125,37},{0,125,37},
+{0,75,986},{0,81,7565},{0,70,3318},{47,127,853},{47,127,853},{47,127,853},{38,107,1},{91,0,4418},{0,125,37},{0,125,37},{0,75,986},{127,29,4418},{0,75,986},{127,62,3613},{76,127,1345},{64,127,0},{0,127,1},{127,62,3613},{127,95,3613},{0,127,1},{0,106,3613},{127,95,3613},{0,106,3613},{19,0,2888},{19,0,2888},{19,0,2888},{19,0,2888},{0,115,0},{0,115,0},{0,115,0},{0,56,1},{0,46,1096},
+{0,46,1096},{41,127,65535},{22,127,39522},{12,127,12842},{7,127,6030},{38,127,65535},{11,127,31469},{0,127,65},{0,118,10460},{0,125,65535},{0,108,37590},{68,127,8954},{57,127,3318},{52,127,986},{40,123,258},{127,2,14504},{22,127,5594},{2,127,37},{0,101,5881},{127,66,14504},{0,101,5881},{29,127,7229},{29,127,7229},{29,127,7229},{21,122,3614},{17,127,6824},{1,127,53},{1,127,53},{0,80,853},{0,87,8059},{0,76,3500},{52,127,986},
+{52,127,986},{52,127,986},{42,112,1},{98,0,4418},{2,127,37},{2,127,37},{0,80,853},{127,36,4418},{0,80,853},{127,69,2888},{81,127,1096},{70,127,1},{11,127,0},{127,69,2888},{126,99,2888},{11,127,0},{0,108,2888},{126,99,2888},{0,108,2888},{21,0,3613},{21,0,3613},{21,0,3613},{21,0,3613},{0,127,1},{0,127,1},{0,127,1},{0,63,0},{0,51,1345},{0,51,1345},{41,127,65535},{22,127,41362},{12,127,15210},
+{8,127,7309},{41,127,65535},{11,127,31389},{1,127,309},{0,118,8476},{0,125,65535},{0,108,36134},{71,127,8174},{60,127,3194},{55,127,1130},{45,123,137},{127,10,13235},{28,127,5018},{11,127,89},{0,101,4809},{126,70,13235},{0,101,4809},{32,127,8537},{32,127,8537},{32,127,8537},{23,127,4326},{20,127,7700},{4,127,249},{4,127,249},{0,86,725},{0,93,8531},{0,81,3674},{55,127,1130},{55,127,1130},{55,127,1130},{46,116,1},{104,0,4418},
+{11,127,89},{11,127,89},{0,86,725},{127,42,4418},{0,86,725},{127,75,2312},{87,127,872},{76,127,1},{24,127,0},{127,75,2312},{126,102,2312},{24,127,0},{0,110,2312},{126,102,2312},{0,110,2312},{23,0,4325},{23,0,4325},{23,0,4325},{23,0,4325},{3,127,61},{3,127,61},{3,127,61},{0,69,0},{0,54,1625},{0,54,1625},{44,127,65535},{22,127,43458},{15,127,17822},{8,127,8829},{41,127,65535},{14,127,31497},{2,127,789},
+{0,118,6748},{0,125,65535},{0,108,34934},{77,127,7454},{64,127,3090},{61,127,1282},{50,125,53},{127,19,12051},{34,127,4506},{16,127,185},{0,104,3833},{127,74,12051},{0,104,3833},{35,127,9957},{35,127,9957},{35,127,9957},{25,127,5150},{23,127,8712},{5,127,621},{5,127,621},{0,89,617},{0,96,9035},{0,84,3870},{61,127,1282},{61,127,1282},{61,127,1282},{50,120,1},{110,0,4418},{16,127,185},{16,127,185},{0,89,617},{126,48,4418},
+{0,89,617},{127,81,1800},{90,127,676},{82,127,0},{36,127,0},{127,81,1800},{126,105,1800},{36,127,0},{0,112,1800},{126,105,1800},{0,112,1800},{25,0,5101},{25,0,5101},{25,0,5101},{25,0,5101},{3,127,205},{3,127,205},{3,127,205},{0,75,0},{0,60,1921},{0,60,1921},{44,127,65535},{22,127,45810},{15,127,20542},{10,127,10546},{41,127,65535},{14,127,31833},{2,127,1509},{0,120,5233},{0,125,65535},{0,113,33812},{79,127,6740},
+{70,127,3018},{65,127,1445},{54,126,9},{127,27,10952},{40,127,4058},{25,127,289},{0,107,2969},{126,78,10952},{0,107,2969},{38,127,11489},{38,127,11489},{38,127,11489},{28,127,6114},{26,127,9860},{8,127,1157},{8,127,1157},{0,95,505},{0,102,9555},{0,87,4114},{65,127,1445},{65,127,1445},{65,127,1445},{54,124,1},{115,1,4418},{25,127,289},{25,127,289},{0,95,505},{126,54,4418},{0,95,505},{127,87,1352},{96,127,500},{88,127,0},
+{48,127,0},{127,87,1352},{126,108,1352},{48,127,0},{0,114,1352},{126,108,1352},{0,114,1352},{27,0,5941},{27,0,5941},{27,0,5941},{27,0,5941},{6,127,433},{6,127,433},{6,127,433},{0,80,1},{0,64,2248},{0,64,2248},{44,127,65535},{25,127,48594},{17,127,23861},{11,127,12725},{41,127,65535},{14,127,32517},{3,127,2601},{0,121,3706},{0,126,65535},{0,113,32750},{82,127,6098},{73,127,2966},{71,127,1649},{59,127,4},{127,36,9818},
+{46,127,3638},{34,127,445},{0,110,2137},{127,82,9818},{0,110,2137},{41,127,13349},{41,127,13349},{41,127,13349},{31,127,7380},{29,127,11310},{11,127,1973},{11,127,1973},{0,101,405},{0,108,10181},{0,93,4410},{71,127,1649},{71,127,1649},{71,127,1649},{59,127,4},{122,0,4418},{34,127,445},{34,127,445},{0,101,405},{126,61,4418},{0,101,405},{127,93,925},{102,127,337},{95,127,0},{62,127,0},{127,93,925},{126,111,925},{62,127,0},
+{0,116,925},{126,111,925},{0,116,925},{29,0,6964},{29,0,6964},{29,0,6964},{29,0,6964},{9,127,808},{9,127,808},{9,127,808},{0,87,0},{0,70,2645},{0,70,2645},{44,127,65535},{25,127,51330},{17,127,27013},{11,127,14917},{41,127,65535},{14,127,33397},{4,127,3805},{0,121,2618},{1,126,65535},{0,113,32078},{88,127,5594},{79,127,2891},{74,127,1825},{64,127,45},{127,43,8901},{54,127,3373},{40,127,605},{0,115,1538},{127,86,8901},
+{0,115,1538},{44,127,15117},{44,127,15117},{44,127,15117},{34,127,8664},{32,127,12746},{14,127,2881},{14,127,2881},{0,104,317},{0,113,10694},{0,96,4714},{74,127,1825},{74,127,1825},{74,127,1825},{64,127,45},{127,2,4418},{40,127,605},{40,127,605},{0,104,317},{127,66,4418},{0,104,317},{127,99,613},{105,127,221},{101,127,0},{73,127,0},{127,99,613},{126,114,613},{73,127,0},{0,118,613},{126,114,613},{0,118,613},{31,0,7940},
+{31,0,7940},{31,0,7940},{31,0,7940},{9,127,1224},{9,127,1224},{9,127,1224},{0,93,0},{0,76,3029},{0,76,3029},{49,127,65535},{25,127,54322},{17,127,30421},{13,127,17348},{44,127,65535},{14,127,34533},{5,127,5245},{0,123,1705},{2,127,65535},{0,113,31662},{91,127,5146},{82,127,2950},{79,127,1994},{67,127,145},{127,53,8069},{60,127,3125},{46,127,797},{0,117,1025},{126,91,8069},{0,117,1025},{49,127,16984},{49,127,16984},{49,127,16984},
+{37,127,10100},{35,127,14318},{16,127,3981},{16,127,3981},{0,110,245},{0,119,11302},{0,102,5034},{79,127,1994},{79,127,1994},{79,127,1994},{67,127,145},{127,14,4418},{46,127,797},{46,127,797},{0,110,245},{127,72,4418},{0,110,245},{127,105,365},{111,127,136},{107,127,0},{86,127,0},{127,105,365},{126,117,365},{86,127,0},{0,120,365},{126,117,365},{0,120,365},{33,0,8980},{33,0,8980},{33,0,8980},{33,0,8980},{12,127,1732},
+{12,127,1732},{12,127,1732},{0,99,0},{0,81,3385},{0,81,3385},{49,127,65535},{28,127,54246},{20,127,31809},{16,127,18396},{49,127,65535},{19,127,34118},{8,127,6285},{3,124,1078},{2,127,65535},{0,116,27154},{94,127,4770},{86,127,2986},{82,127,2210},{73,127,281},{127,60,7322},{67,127,2945},{54,127,1037},{0,118,614},{127,94,7322},{0,118,614},{52,127,17772},{52,127,17772},{52,127,17772},{40,127,10764},{38,127,15150},{22,127,4785},{22,127,4785},
+{1,113,218},{0,125,10994},{0,105,4594},{82,127,2210},{82,127,2210},{82,127,2210},{73,127,281},{127,27,4418},{54,127,1037},{54,127,1037},{0,114,193},{126,78,4418},{0,114,193},{127,111,181},{116,127,65},{113,127,0},{98,127,0},{127,111,181},{126,120,181},{98,127,0},{0,122,181},{126,120,181},{0,122,181},{37,0,9248},{37,0,9248},{37,0,9248},{37,0,9248},{17,127,2009},{17,127,2009},{17,127,2009},{3,104,0},{0,87,3177},
+{0,87,3177},{52,127,65535},{34,127,52995},{26,127,32522},{21,127,19126},{49,127,65535},{22,127,32935},{13,127,7330},{7,125,605},{2,127,64493},{0,116,21205},{97,127,4436},{91,127,3044},{88,127,2440},{79,127,505},{127,69,6584},{73,127,2811},{63,127,1313},{0,121,292},{126,99,6584},{0,121,292},{55,127,18275},{55,127,18275},{55,127,18275},{46,127,11259},{44,127,15797},{28,127,5606},{28,127,5606},{6,118,217},{0,125,10589},{0,113,3809},{88,127,2440},
+{88,127,2440},{88,127,2440},{79,127,505},{127,40,4418},{63,127,1313},{63,127,1313},{0,118,128},{126,85,4418},{0,118,128},{127,119,50},{122,127,18},{119,127,1},{111,127,0},{127,119,50},{127,123,50},{111,127,0},{0,124,52},{127,123,50},{0,124,52},{41,0,9250},{41,0,9250},{41,0,9250},{41,0,9250},{20,127,2225},{20,127,2225},{20,127,2225},{8,108,1},{0,93,2741},{0,93,2741},{55,127,65535},{37,127,52035},{29,127,33226},
+{25,127,19751},{52,127,65535},{28,127,31983},{18,127,8346},{11,126,349},{2,127,60061},{0,119,16605},{100,127,4216},{97,127,3140},{94,127,2664},{84,127,776},{127,76,6019},{76,127,2779},{70,127,1586},{0,124,116},{126,103,6019},{0,124,116},{61,127,18739},{61,127,18739},{61,127,18739},{51,127,11794},{49,127,16315},{31,127,6398},{31,127,6398},{10,122,217},{2,127,10565},{0,116,3213},{94,127,2664},{94,127,2664},{94,127,2664},{84,127,776},{127,53,4418},
+{70,127,1586},{70,127,1586},{0,124,80},{126,91,4418},{0,124,80},{127,125,2},{127,126,2},{125,127,1},{124,127,0},{127,125,2},{127,126,2},{124,127,0},{0,126,4},{127,126,2},{0,126,4},{45,0,9250},{45,0,9250},{45,0,9250},{45,0,9250},{26,127,2441},{26,127,2441},{26,127,2441},{12,112,1},{0,102,2377},{0,102,2377},{58,127,65535},{40,127,49743},{35,127,32390},{28,127,19671},{55,127,65535},{31,127,30383},{22,127,8522},
+{15,126,198},{2,127,55505},{0,122,12609},{103,127,3648},{100,127,2776},{97,127,2384},{87,127,740},{127,83,5163},{84,127,2468},{73,127,1450},{0,127,16},{126,106,5163},{0,127,16},{65,127,18185},{65,127,18185},{65,127,18185},{54,127,11714},{52,127,15699},{37,127,6486},{37,127,6486},{14,124,126},{2,127,9785},{0,119,2501},{97,127,2384},{97,127,2384},{97,127,2384},{87,127,740},{127,60,3872},{73,127,1450},{73,127,1450},{0,126,13},{127,94,3872},
+{0,126,13},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{49,0,9250},{49,0,9250},{49,0,9250},{49,0,9250},{29,127,2665},{29,127,2665},{29,127,2665},{16,116,1},{0,108,2041},{0,108,2041},{61,127,65535},{46,127,47079},{38,127,31046},{33,127,19370},{58,127,65535},{34,127,28647},{25,127,8458},{19,126,78},{2,127,51393},{0,122,9105},{106,127,3012},
+{100,127,2296},{100,127,1972},{90,127,612},{127,86,4267},{87,127,2028},{76,127,1210},{5,127,0},{127,107,4267},{5,127,0},{65,127,17289},{65,127,17289},{65,127,17289},{57,127,11462},{55,127,14739},{40,127,6374},{40,127,6374},{18,125,50},{2,127,8937},{0,122,1881},{100,127,1972},{100,127,1972},{100,127,1972},{90,127,612},{127,66,3200},{76,127,1210},{76,127,1210},{5,127,0},{127,97,3200},{5,127,0},{127,127,0},{127,127,0},{127,127,0},
+{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{53,0,9250},{53,0,9250},{53,0,9250},{53,0,9250},{35,127,2897},{35,127,2897},{35,127,2897},{20,120,1},{0,116,1693},{0,116,1693},{65,127,65535},{48,127,44233},{41,127,29640},{37,127,18961},{61,127,65535},{40,127,26823},{30,127,8416},{24,127,9},{8,127,47133},{0,122,6081},{109,127,2377},{103,127,1809},{103,127,1553},{96,127,481},{127,90,3361},
+{90,127,1593},{84,127,953},{19,127,0},{127,109,3361},{19,127,0},{71,127,16331},{71,127,16331},{71,127,16331},{63,127,11212},{61,127,13721},{46,127,6266},{46,127,6266},{24,126,8},{5,127,8245},{0,125,1397},{103,127,1553},{103,127,1553},{103,127,1553},{96,127,481},{127,72,2521},{84,127,953},{84,127,953},{19,127,0},{127,100,2521},{19,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},
+{0,127,0},{127,127,0},{0,127,0},{58,0,9248},{58,0,9248},{58,0,9248},{58,0,9248},{41,127,3181},{41,127,3181},{41,127,3181},{24,125,0},{0,125,1381},{0,125,1381},{65,127,65535},{51,127,41789},{44,127,28480},{40,127,18673},{65,127,65535},{43,127,25251},{34,127,8410},{28,127,4},{14,127,43557},{0,125,4005},{111,127,1843},{106,127,1421},{106,127,1225},{99,127,373},{127,95,2646},{96,127,1241},{90,127,745},{31,127,0},{127,111,2646},
+{31,127,0},{74,127,15539},{74,127,15539},{74,127,15539},{64,127,10979},{65,127,12914},{51,127,6216},{51,127,6216},{28,127,4},{11,127,7709},{0,126,1217},{106,127,1225},{106,127,1225},{106,127,1225},{99,127,373},{127,78,1985},{90,127,745},{90,127,745},{31,127,0},{127,103,1985},{31,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{62,0,9248},
+{62,0,9248},{62,0,9248},{62,0,9248},{44,127,3425},{44,127,3425},{44,127,3425},{28,127,4},{0,125,1205},{0,125,1205},{68,127,65535},{54,127,39465},{49,127,27209},{46,127,18393},{65,127,64563},{46,127,23799},{38,127,8426},{32,127,64},{14,127,40437},{0,125,2677},{111,127,1411},{111,127,1070},{109,127,937},{102,127,281},{127,99,2017},{99,127,937},{93,127,565},{43,127,0},{126,114,2017},{43,127,0},{77,127,14795},{77,127,14795},{77,127,14795},
+{70,127,10779},{68,127,12146},{54,127,6176},{54,127,6176},{34,127,52},{14,127,7281},{2,127,1213},{109,127,937},{109,127,937},{109,127,937},{102,127,281},{127,84,1513},{93,127,565},{93,127,565},{43,127,0},{127,106,1513},{43,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{65,0,9250},{65,0,9250},{65,0,9250},{65,0,9250},{49,127,3656},
+{49,127,3656},{49,127,3656},{34,127,52},{2,127,1213},{2,127,1213},{71,127,63180},{60,127,37225},{52,127,26137},{48,127,18128},{68,127,59595},{51,127,22636},{42,127,8480},{37,127,164},{22,127,37455},{0,126,2073},{114,127,1019},{111,127,766},{111,127,666},{105,127,205},{127,102,1473},{102,127,681},{99,127,405},{56,127,0},{127,115,1473},{56,127,0},{79,127,14066},{79,127,14066},{79,127,14066},{73,127,10571},{71,127,11450},{59,127,6166},{59,127,6166},
+{37,127,148},{25,127,6914},{5,127,1413},{111,127,666},{111,127,666},{111,127,666},{105,127,205},{127,90,1105},{99,127,405},{99,127,405},{56,127,0},{127,109,1105},{56,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{69,0,9250},{69,0,9250},{69,0,9250},{69,0,9250},{52,127,3940},{52,127,3940},{52,127,3940},{37,127,148},{5,127,1413},
+{5,127,1413},
diff --git a/libkram/transcoder/basisu_transcoder_tables_dxt1_5.inc b/libkram/transcoder/basisu_transcoder_tables_dxt1_5.inc
new file mode 100644
index 00000000..82445509
--- /dev/null
+++ b/libkram/transcoder/basisu_transcoder_tables_dxt1_5.inc
@@ -0,0 +1,494 @@
+// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+{0,2,18},{0,1,9},{0,1,0},{0,1,9},{0,1,40},{0,1,22},{0,1,13},{0,1,61},{0,1,47},{0,1,65},{0,2,18},{0,1,9},{0,1,0},{0,1,9},{0,1,40},{0,1,22},{0,1,13},{0,1,61},{1,0,40},{0,1,61},{0,1,0},{0,1,0},{0,1,0},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,1,0},
+{0,1,0},{0,1,0},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{1,0,18},{0,1,9},{0,1,0},{0,1,9},{1,0,18},{2,0,18},{0,1,9},{0,1,36},{2,0,18},{0,1,36},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,5,54},{0,3,40},{0,2,61},
+{0,2,36},{0,4,51},{0,2,37},{0,2,1},{0,2,52},{0,2,77},{0,1,73},{1,3,22},{1,2,13},{1,2,4},{1,2,13},{0,4,51},{1,2,34},{0,2,1},{0,2,52},{4,0,51},{0,2,52},{0,4,37},{0,4,37},{0,4,37},{0,2,36},{0,3,8},{0,2,1},{0,2,1},{0,1,5},{0,1,30},{0,1,9},{1,2,4},{1,2,4},{1,2,4},{1,1,8},{1,1,8},
+{0,2,1},{0,2,1},{0,1,5},{3,0,8},{0,1,5},{2,1,18},{0,3,4},{1,2,0},{0,2,0},{2,1,18},{5,0,18},{0,2,0},{0,2,36},{5,0,18},{0,2,36},{0,0,36},{0,0,36},{0,0,36},{0,0,36},{0,2,1},{0,2,1},{0,2,1},{0,1,1},{0,1,5},{0,1,5},{1,6,54},{1,4,40},{1,3,61},{1,3,36},{1,5,51},{1,3,37},{1,3,1},
+{1,3,52},{0,3,72},{0,3,40},{2,4,22},{2,3,13},{2,3,4},{2,3,13},{1,5,51},{0,4,29},{1,3,1},{0,3,40},{7,0,51},{0,3,40},{1,5,37},{1,5,37},{1,5,37},{1,3,36},{1,4,8},{1,3,1},{1,3,1},{1,2,5},{0,3,8},{1,2,9},{2,3,4},{2,3,4},{2,3,4},{2,2,8},{3,0,8},{1,3,1},{1,3,1},{1,2,5},{6,0,8},
+{1,2,5},{3,2,18},{1,4,4},{2,3,0},{1,3,0},{3,2,18},{6,1,18},{1,3,0},{0,3,36},{6,1,18},{0,3,36},{1,0,36},{1,0,36},{1,0,36},{1,0,36},{1,3,1},{1,3,1},{1,3,1},{1,2,1},{0,3,4},{0,3,4},{2,7,54},{2,5,40},{2,4,62},{2,4,38},{2,6,51},{2,4,27},{2,4,3},{2,3,69},{0,5,60},{1,4,54},{3,5,22},
+{3,4,9},{3,4,6},{3,4,21},{5,0,51},{2,4,27},{2,4,3},{0,4,50},{10,0,51},{0,4,50},{2,6,37},{2,6,37},{2,6,37},{2,4,37},{2,5,8},{2,4,2},{2,4,2},{2,3,5},{1,4,8},{2,3,9},{3,4,5},{3,4,5},{3,4,5},{3,3,8},{4,1,8},{2,4,2},{2,4,2},{2,3,5},{9,0,8},{2,3,5},{3,5,18},{2,5,4},{3,4,2},
+{2,4,2},{3,5,18},{7,2,18},{2,4,2},{0,4,50},{7,2,18},{0,4,50},{2,0,36},{2,0,36},{2,0,36},{2,0,36},{2,4,1},{2,4,1},{2,4,1},{2,3,1},{1,4,4},{1,4,4},{3,8,68},{3,6,60},{4,5,68},{3,5,50},{3,7,53},{3,6,28},{3,5,5},{3,5,53},{3,5,68},{2,5,38},{4,6,22},{4,5,13},{4,5,4},{4,5,13},{6,1,52},
+{3,6,27},{3,5,4},{1,5,37},{13,0,52},{1,5,37},{3,7,50},{3,7,50},{3,7,50},{3,5,50},{3,6,11},{3,5,5},{3,5,5},{3,4,6},{2,5,11},{3,4,9},{4,5,4},{4,5,4},{4,5,4},{4,4,8},{4,4,8},{3,5,4},{3,5,4},{3,4,5},{12,0,8},{3,4,5},{7,0,18},{4,5,9},{4,5,0},{3,5,0},{7,0,18},{14,0,18},{3,5,0},
+{0,5,36},{14,0,18},{0,5,36},{3,0,50},{3,0,50},{3,0,50},{3,0,50},{3,6,2},{3,6,2},{3,6,2},{3,4,2},{2,5,2},{2,5,2},{4,9,54},{4,7,40},{4,6,61},{4,6,36},{4,8,51},{4,6,37},{4,6,1},{4,6,52},{2,7,68},{3,6,38},{5,7,22},{5,6,13},{5,6,4},{5,6,13},{8,0,51},{5,6,34},{4,6,1},{2,6,37},{16,0,51},
+{2,6,37},{4,8,37},{4,8,37},{4,8,37},{4,6,36},{4,7,8},{4,6,1},{4,6,1},{4,5,5},{3,6,11},{4,5,9},{5,6,4},{5,6,4},{5,6,4},{5,5,8},{5,5,8},{4,6,1},{4,6,1},{4,5,5},{15,0,8},{4,5,5},{8,1,18},{4,7,4},{5,6,0},{4,6,0},{8,1,18},{17,0,18},{4,6,0},{0,6,36},{17,0,18},{0,6,36},{4,0,36},
+{4,0,36},{4,0,36},{4,0,36},{4,6,1},{4,6,1},{4,6,1},{4,5,1},{3,6,2},{3,6,2},{5,10,54},{5,8,40},{5,7,61},{5,7,36},{5,9,51},{5,7,37},{5,7,1},{5,7,52},{3,8,60},{4,7,40},{6,8,22},{6,7,13},{6,7,4},{6,7,13},{9,1,51},{4,8,29},{5,7,1},{3,7,37},{19,0,51},{3,7,37},{5,9,37},{5,9,37},{5,9,37},
+{5,7,36},{5,8,8},{5,7,1},{5,7,1},{5,6,5},{4,7,8},{5,6,9},{6,7,4},{6,7,4},{6,7,4},{6,6,8},{9,0,8},{5,7,1},{5,7,1},{5,6,5},{18,0,8},{5,6,5},{10,0,18},{5,8,4},{6,7,0},{5,7,0},{10,0,18},{18,1,18},{5,7,0},{0,7,36},{18,1,18},{0,7,36},{5,0,36},{5,0,36},{5,0,36},{5,0,36},{5,7,1},
+{5,7,1},{5,7,1},{5,6,1},{4,7,4},{4,7,4},{6,11,54},{6,9,40},{6,8,62},{6,8,38},{6,10,51},{6,8,27},{6,8,3},{6,7,69},{4,9,60},{5,8,54},{7,9,22},{7,8,9},{7,8,6},{7,8,21},{11,0,51},{6,8,27},{6,8,3},{4,8,50},{22,0,51},{4,8,50},{6,10,37},{6,10,37},{6,10,37},{6,8,37},{6,9,8},{6,8,2},{6,8,2},
+{6,7,5},{5,8,8},{6,7,9},{7,8,5},{7,8,5},{7,8,5},{7,7,8},{10,1,8},{6,8,2},{6,8,2},{6,7,5},{21,0,8},{6,7,5},{11,1,18},{6,9,4},{7,8,2},{6,8,2},{11,1,18},{19,2,18},{6,8,2},{0,8,50},{19,2,18},{0,8,50},{6,0,36},{6,0,36},{6,0,36},{6,0,36},{6,8,1},{6,8,1},{6,8,1},{6,7,1},{5,8,4},
+{5,8,4},{7,12,68},{7,10,60},{8,9,68},{7,9,50},{7,11,53},{7,10,28},{7,9,5},{7,9,53},{7,9,68},{6,9,38},{8,10,22},{8,9,13},{8,9,4},{8,9,13},{12,1,52},{7,10,27},{7,9,4},{5,9,37},{25,0,52},{5,9,37},{7,11,50},{7,11,50},{7,11,50},{7,9,50},{7,10,11},{7,9,5},{7,9,5},{7,8,6},{6,9,11},{7,8,9},{8,9,4},
+{8,9,4},{8,9,4},{8,8,8},{12,0,8},{7,9,4},{7,9,4},{7,8,5},{24,0,8},{7,8,5},{13,0,18},{8,9,9},{8,9,0},{7,9,0},{13,0,18},{26,0,18},{7,9,0},{0,9,36},{26,0,18},{0,9,36},{7,0,50},{7,0,50},{7,0,50},{7,0,50},{7,10,2},{7,10,2},{7,10,2},{7,8,2},{6,9,2},{6,9,2},{8,13,54},{8,11,40},{8,10,61},
+{8,10,36},{8,12,51},{8,10,37},{8,10,1},{8,10,52},{6,11,68},{7,10,38},{9,11,22},{9,10,13},{9,10,4},{9,10,13},{12,4,51},{9,10,34},{8,10,1},{6,10,37},{28,0,51},{6,10,37},{8,12,37},{8,12,37},{8,12,37},{8,10,36},{8,11,8},{8,10,1},{8,10,1},{8,9,5},{7,10,11},{8,9,9},{9,10,4},{9,10,4},{9,10,4},{9,9,8},{13,1,8},
+{8,10,1},{8,10,1},{8,9,5},{27,0,8},{8,9,5},{14,1,18},{8,11,4},{9,10,0},{8,10,0},{14,1,18},{29,0,18},{8,10,0},{0,10,36},{29,0,18},{0,10,36},{8,0,36},{8,0,36},{8,0,36},{8,0,36},{8,10,1},{8,10,1},{8,10,1},{8,9,1},{7,10,2},{7,10,2},{9,14,54},{9,12,40},{9,11,61},{9,11,36},{9,13,51},{9,11,37},{9,11,1},
+{9,11,52},{7,12,60},{8,11,40},{10,12,22},{10,11,13},{10,11,4},{10,11,13},{13,5,51},{8,12,29},{9,11,1},{7,11,37},{31,0,51},{7,11,37},{9,13,37},{9,13,37},{9,13,37},{9,11,36},{9,12,8},{9,11,1},{9,11,1},{9,10,5},{8,11,8},{9,10,9},{10,11,4},{10,11,4},{10,11,4},{10,10,8},{15,0,8},{9,11,1},{9,11,1},{9,10,5},{30,0,8},
+{9,10,5},{15,2,18},{9,12,4},{10,11,0},{9,11,0},{15,2,18},{30,1,18},{9,11,0},{0,11,36},{30,1,18},{0,11,36},{9,0,36},{9,0,36},{9,0,36},{9,0,36},{9,11,1},{9,11,1},{9,11,1},{9,10,1},{8,11,4},{8,11,4},{10,15,54},{10,13,40},{10,12,62},{10,12,38},{10,14,51},{10,12,27},{10,12,3},{10,11,69},{8,13,60},{9,12,54},{11,13,22},
+{11,12,9},{11,12,6},{11,12,21},{17,0,51},{10,12,27},{10,12,3},{8,12,50},{30,2,51},{8,12,50},{10,14,37},{10,14,37},{10,14,37},{10,12,37},{10,13,8},{10,12,2},{10,12,2},{10,11,5},{9,12,8},{10,11,9},{11,12,5},{11,12,5},{11,12,5},{11,11,8},{16,1,8},{10,12,2},{10,12,2},{10,11,5},{31,1,8},{10,11,5},{15,5,18},{10,13,4},{11,12,2},
+{10,12,2},{15,5,18},{31,2,18},{10,12,2},{0,12,50},{31,2,18},{0,12,50},{10,0,36},{10,0,36},{10,0,36},{10,0,36},{10,12,1},{10,12,1},{10,12,1},{10,11,1},{9,12,4},{9,12,4},{11,16,68},{11,14,60},{12,13,68},{11,13,50},{11,15,53},{11,14,28},{11,13,5},{11,13,53},{11,13,68},{10,13,38},{12,14,22},{12,13,13},{12,13,4},{12,13,13},{18,1,52},
+{11,14,27},{11,13,4},{9,13,37},{31,3,52},{9,13,37},{11,15,50},{11,15,50},{11,15,50},{11,13,50},{11,14,11},{11,13,5},{11,13,5},{11,12,6},{10,13,11},{11,12,9},{12,13,4},{12,13,4},{12,13,4},{12,12,8},{16,4,8},{11,13,4},{11,13,4},{11,12,5},{28,4,8},{11,12,5},{19,0,18},{12,13,9},{12,13,0},{11,13,0},{19,0,18},{30,4,18},{11,13,0},
+{0,13,36},{30,4,18},{0,13,36},{11,0,50},{11,0,50},{11,0,50},{11,0,50},{11,14,2},{11,14,2},{11,14,2},{11,12,2},{10,13,2},{10,13,2},{12,17,54},{12,15,40},{12,14,61},{12,14,36},{12,16,51},{12,14,37},{12,14,1},{12,14,52},{10,15,68},{11,14,38},{13,15,22},{13,14,13},{13,14,4},{13,14,13},{20,0,51},{13,14,34},{12,14,1},{10,14,37},{24,8,51},
+{10,14,37},{12,16,37},{12,16,37},{12,16,37},{12,14,36},{12,15,8},{12,14,1},{12,14,1},{12,13,5},{11,14,11},{12,13,9},{13,14,4},{13,14,4},{13,14,4},{13,13,8},{17,5,8},{12,14,1},{12,14,1},{12,13,5},{31,4,8},{12,13,5},{20,1,18},{12,15,4},{13,14,0},{12,14,0},{20,1,18},{31,5,18},{12,14,0},{0,14,36},{31,5,18},{0,14,36},{12,0,36},
+{12,0,36},{12,0,36},{12,0,36},{12,14,1},{12,14,1},{12,14,1},{12,13,1},{11,14,2},{11,14,2},{13,18,54},{13,16,40},{13,15,61},{13,15,36},{13,17,51},{13,15,37},{13,15,1},{13,15,52},{11,16,60},{12,15,40},{14,16,22},{14,15,13},{14,15,4},{14,15,13},{21,1,51},{12,16,29},{13,15,1},{11,15,37},{27,8,51},{11,15,37},{13,17,37},{13,17,37},{13,17,37},
+{13,15,36},{13,16,8},{13,15,1},{13,15,1},{13,14,5},{12,15,8},{13,14,9},{14,15,4},{14,15,4},{14,15,4},{14,14,8},{21,0,8},{13,15,1},{13,15,1},{13,14,5},{30,6,8},{13,14,5},{22,0,18},{13,16,4},{14,15,0},{13,15,0},{22,0,18},{30,7,18},{13,15,0},{0,15,36},{30,7,18},{0,15,36},{13,0,36},{13,0,36},{13,0,36},{13,0,36},{13,15,1},
+{13,15,1},{13,15,1},{13,14,1},{12,15,4},{12,15,4},{14,19,54},{14,17,40},{14,16,62},{14,16,38},{14,18,51},{14,16,27},{14,16,3},{14,15,69},{12,17,60},{13,16,54},{15,17,22},{15,16,9},{15,16,6},{15,16,21},{23,0,51},{14,16,27},{14,16,3},{12,16,50},{30,8,51},{12,16,50},{14,18,37},{14,18,37},{14,18,37},{14,16,37},{14,17,8},{14,16,2},{14,16,2},
+{14,15,5},{13,16,8},{14,15,9},{15,16,5},{15,16,5},{15,16,5},{15,15,8},{22,1,8},{14,16,2},{14,16,2},{14,15,5},{31,7,8},{14,15,5},{23,1,18},{14,17,4},{15,16,2},{14,16,2},{23,1,18},{27,10,18},{14,16,2},{0,16,50},{27,10,18},{0,16,50},{14,0,36},{14,0,36},{14,0,36},{14,0,36},{14,16,1},{14,16,1},{14,16,1},{14,15,1},{13,16,4},
+{13,16,4},{15,20,68},{15,18,60},{16,17,68},{15,17,50},{15,19,53},{15,18,28},{15,17,5},{15,17,53},{15,17,68},{14,17,38},{16,18,22},{16,17,13},{16,17,4},{16,17,13},{24,1,52},{15,18,27},{15,17,4},{13,17,37},{31,9,52},{13,17,37},{15,19,50},{15,19,50},{15,19,50},{15,17,50},{15,18,11},{15,17,5},{15,17,5},{15,16,6},{14,17,11},{15,16,9},{16,17,4},
+{16,17,4},{16,17,4},{16,16,8},{24,0,8},{15,17,4},{15,17,4},{15,16,5},{24,12,8},{15,16,5},{25,0,18},{16,17,9},{16,17,0},{15,17,0},{25,0,18},{30,10,18},{15,17,0},{0,17,36},{30,10,18},{0,17,36},{15,0,50},{15,0,50},{15,0,50},{15,0,50},{15,18,2},{15,18,2},{15,18,2},{15,16,2},{14,17,2},{14,17,2},{16,21,54},{16,19,40},{16,18,61},
+{16,18,36},{16,20,51},{16,18,37},{16,18,1},{16,18,52},{14,19,68},{15,18,38},{17,19,22},{17,18,13},{17,18,4},{17,18,13},{24,4,51},{17,18,34},{16,18,1},{14,18,37},{28,12,51},{14,18,37},{16,20,37},{16,20,37},{16,20,37},{16,18,36},{16,19,8},{16,18,1},{16,18,1},{16,17,5},{15,18,11},{16,17,9},{17,18,4},{17,18,4},{17,18,4},{17,17,8},{25,1,8},
+{16,18,1},{16,18,1},{16,17,5},{27,12,8},{16,17,5},{26,1,18},{16,19,4},{17,18,0},{16,18,0},{26,1,18},{31,11,18},{16,18,0},{0,18,36},{31,11,18},{0,18,36},{16,0,36},{16,0,36},{16,0,36},{16,0,36},{16,18,1},{16,18,1},{16,18,1},{16,17,1},{15,18,2},{15,18,2},{17,22,54},{17,20,40},{17,19,61},{17,19,36},{17,21,51},{17,19,37},{17,19,1},
+{17,19,52},{15,20,60},{16,19,40},{18,20,22},{18,19,13},{18,19,4},{18,19,13},{25,5,51},{16,20,29},{17,19,1},{15,19,37},{31,12,51},{15,19,37},{17,21,37},{17,21,37},{17,21,37},{17,19,36},{17,20,8},{17,19,1},{17,19,1},{17,18,5},{16,19,8},{17,18,9},{18,19,4},{18,19,4},{18,19,4},{18,18,8},{27,0,8},{17,19,1},{17,19,1},{17,18,5},{30,12,8},
+{17,18,5},{27,2,18},{17,20,4},{18,19,0},{17,19,0},{27,2,18},{30,13,18},{17,19,0},{0,19,36},{30,13,18},{0,19,36},{17,0,36},{17,0,36},{17,0,36},{17,0,36},{17,19,1},{17,19,1},{17,19,1},{17,18,1},{16,19,4},{16,19,4},{18,23,54},{18,21,40},{18,20,62},{18,20,38},{18,22,51},{18,20,27},{18,20,3},{18,19,69},{16,21,60},{17,20,54},{19,21,22},
+{19,20,9},{19,20,6},{19,20,21},{29,0,51},{18,20,27},{18,20,3},{16,20,50},{30,14,51},{16,20,50},{18,22,37},{18,22,37},{18,22,37},{18,20,37},{18,21,8},{18,20,2},{18,20,2},{18,19,5},{17,20,8},{18,19,9},{19,20,5},{19,20,5},{19,20,5},{19,19,8},{28,1,8},{18,20,2},{18,20,2},{18,19,5},{31,13,8},{18,19,5},{27,5,18},{18,21,4},{19,20,2},
+{18,20,2},{27,5,18},{31,14,18},{18,20,2},{0,20,50},{31,14,18},{0,20,50},{18,0,36},{18,0,36},{18,0,36},{18,0,36},{18,20,1},{18,20,1},{18,20,1},{18,19,1},{17,20,4},{17,20,4},{19,24,68},{19,22,60},{20,21,68},{19,21,50},{19,23,53},{19,22,28},{19,21,5},{19,21,53},{19,21,68},{18,21,38},{20,22,22},{20,21,13},{20,21,4},{20,21,13},{30,1,52},
+{19,22,27},{19,21,4},{17,21,37},{31,15,52},{17,21,37},{19,23,50},{19,23,50},{19,23,50},{19,21,50},{19,22,11},{19,21,5},{19,21,5},{19,20,6},{18,21,11},{19,20,9},{20,21,4},{20,21,4},{20,21,4},{20,20,8},{28,4,8},{19,21,4},{19,21,4},{19,20,5},{28,16,8},{19,20,5},{31,0,18},{20,21,9},{20,21,0},{19,21,0},{31,0,18},{30,16,18},{19,21,0},
+{0,21,36},{30,16,18},{0,21,36},{19,0,50},{19,0,50},{19,0,50},{19,0,50},{19,22,2},{19,22,2},{19,22,2},{19,20,2},{18,21,2},{18,21,2},{20,25,54},{20,23,40},{20,22,61},{20,22,36},{20,24,51},{20,22,37},{20,22,1},{20,22,52},{18,23,68},{19,22,38},{21,23,22},{21,22,13},{21,22,4},{21,22,13},{28,8,51},{21,22,34},{20,22,1},{18,22,37},{24,20,51},
+{18,22,37},{20,24,37},{20,24,37},{20,24,37},{20,22,36},{20,23,8},{20,22,1},{20,22,1},{20,21,5},{19,22,11},{20,21,9},{21,22,4},{21,22,4},{21,22,4},{21,21,8},{29,5,8},{20,22,1},{20,22,1},{20,21,5},{31,16,8},{20,21,5},{31,3,18},{20,23,4},{21,22,0},{20,22,0},{31,3,18},{31,17,18},{20,22,0},{0,22,36},{31,17,18},{0,22,36},{20,0,36},
+{20,0,36},{20,0,36},{20,0,36},{20,22,1},{20,22,1},{20,22,1},{20,21,1},{19,22,2},{19,22,2},{21,26,54},{21,24,40},{21,23,61},{21,23,36},{21,25,51},{21,23,37},{21,23,1},{21,23,52},{19,24,60},{20,23,40},{22,24,22},{22,23,13},{22,23,4},{22,23,13},{29,9,51},{20,24,29},{21,23,1},{19,23,37},{27,20,51},{19,23,37},{21,25,37},{21,25,37},{21,25,37},
+{21,23,36},{21,24,8},{21,23,1},{21,23,1},{21,22,5},{20,23,8},{21,22,9},{22,23,4},{22,23,4},{22,23,4},{22,22,8},{31,4,8},{21,23,1},{21,23,1},{21,22,5},{30,18,8},{21,22,5},{31,6,18},{21,24,4},{22,23,0},{21,23,0},{31,6,18},{30,19,18},{21,23,0},{0,23,36},{30,19,18},{0,23,36},{21,0,36},{21,0,36},{21,0,36},{21,0,36},{21,23,1},
+{21,23,1},{21,23,1},{21,22,1},{20,23,4},{20,23,4},{22,27,54},{22,25,40},{22,24,62},{22,24,38},{22,26,51},{22,24,27},{22,24,3},{22,23,69},{20,25,60},{21,24,54},{23,25,22},{23,24,9},{23,24,6},{23,24,21},{31,8,51},{22,24,27},{22,24,3},{20,24,50},{30,20,51},{20,24,50},{22,26,37},{22,26,37},{22,26,37},{22,24,37},{22,25,8},{22,24,2},{22,24,2},
+{22,23,5},{21,24,8},{22,23,9},{23,24,5},{23,24,5},{23,24,5},{23,23,8},{31,7,8},{22,24,2},{22,24,2},{22,23,5},{31,19,8},{22,23,5},{31,9,18},{22,25,4},{23,24,2},{22,24,2},{31,9,18},{27,22,18},{22,24,2},{0,24,50},{27,22,18},{0,24,50},{22,0,36},{22,0,36},{22,0,36},{22,0,36},{22,24,1},{22,24,1},{22,24,1},{22,23,1},{21,24,4},
+{21,24,4},{23,28,68},{23,26,60},{24,25,68},{23,25,50},{23,27,53},{23,26,28},{23,25,5},{23,25,53},{23,25,68},{22,25,38},{24,26,22},{24,25,13},{24,25,4},{24,25,13},{31,11,52},{23,26,27},{23,25,4},{21,25,37},{31,21,52},{21,25,37},{23,27,50},{23,27,50},{23,27,50},{23,25,50},{23,26,11},{23,25,5},{23,25,5},{23,24,6},{22,25,11},{23,24,9},{24,25,4},
+{24,25,4},{24,25,4},{24,24,8},{28,16,8},{23,25,4},{23,25,4},{23,24,5},{24,24,8},{23,24,5},{31,12,18},{24,25,9},{24,25,0},{23,25,0},{31,12,18},{30,22,18},{23,25,0},{0,25,36},{30,22,18},{0,25,36},{23,0,50},{23,0,50},{23,0,50},{23,0,50},{23,26,2},{23,26,2},{23,26,2},{23,24,2},{22,25,2},{22,25,2},{24,29,54},{24,27,40},{24,26,61},
+{24,26,36},{24,28,51},{24,26,37},{24,26,1},{24,26,52},{22,27,68},{23,26,38},{25,27,22},{25,26,13},{25,26,4},{25,26,13},{28,20,51},{25,26,34},{24,26,1},{22,26,37},{28,24,51},{22,26,37},{24,28,37},{24,28,37},{24,28,37},{24,26,36},{24,27,8},{24,26,1},{24,26,1},{24,25,5},{23,26,11},{24,25,9},{25,26,4},{25,26,4},{25,26,4},{25,25,8},{29,17,8},
+{24,26,1},{24,26,1},{24,25,5},{27,24,8},{24,25,5},{31,15,18},{24,27,4},{25,26,0},{24,26,0},{31,15,18},{31,23,18},{24,26,0},{0,26,36},{31,23,18},{0,26,36},{24,0,36},{24,0,36},{24,0,36},{24,0,36},{24,26,1},{24,26,1},{24,26,1},{24,25,1},{23,26,2},{23,26,2},{25,30,54},{25,28,40},{25,27,61},{25,27,36},{25,29,51},{25,27,37},{25,27,1},
+{25,27,52},{23,28,60},{24,27,40},{26,28,22},{26,27,13},{26,27,4},{26,27,13},{29,21,51},{24,28,29},{25,27,1},{23,27,37},{31,24,51},{23,27,37},{25,29,37},{25,29,37},{25,29,37},{25,27,36},{25,28,8},{25,27,1},{25,27,1},{25,26,5},{24,27,8},{25,26,9},{26,27,4},{26,27,4},{26,27,4},{26,26,8},{31,16,8},{25,27,1},{25,27,1},{25,26,5},{30,24,8},
+{25,26,5},{31,18,18},{25,28,4},{26,27,0},{25,27,0},{31,18,18},{30,25,18},{25,27,0},{0,27,36},{30,25,18},{0,27,36},{25,0,36},{25,0,36},{25,0,36},{25,0,36},{25,27,1},{25,27,1},{25,27,1},{25,26,1},{24,27,4},{24,27,4},{26,31,54},{26,29,40},{26,28,62},{26,28,38},{26,30,51},{26,28,27},{26,28,3},{26,27,69},{24,29,60},{25,28,54},{27,29,22},
+{27,28,9},{27,28,6},{27,28,21},{31,20,51},{26,28,27},{26,28,3},{24,28,50},{30,26,51},{24,28,50},{26,30,37},{26,30,37},{26,30,37},{26,28,37},{26,29,8},{26,28,2},{26,28,2},{26,27,5},{25,28,8},{26,27,9},{27,28,5},{27,28,5},{27,28,5},{27,27,8},{31,19,8},{26,28,2},{26,28,2},{26,27,5},{31,25,8},{26,27,5},{31,21,18},{26,29,4},{27,28,2},
+{26,28,2},{31,21,18},{31,26,18},{26,28,2},{0,28,50},{31,26,18},{0,28,50},{26,0,36},{26,0,36},{26,0,36},{26,0,36},{26,28,1},{26,28,1},{26,28,1},{26,27,1},{25,28,4},{25,28,4},{28,30,86},{27,30,60},{28,29,68},{27,29,50},{27,31,53},{27,30,28},{27,29,5},{27,29,53},{27,29,68},{26,29,38},{28,30,22},{28,29,13},{28,29,4},{28,29,13},{31,23,52},
+{27,30,27},{27,29,4},{25,29,37},{31,27,52},{25,29,37},{27,31,50},{27,31,50},{27,31,50},{27,29,50},{27,30,11},{27,29,5},{27,29,5},{27,28,6},{26,29,11},{27,28,9},{28,29,4},{28,29,4},{28,29,4},{28,28,8},{28,28,8},{27,29,4},{27,29,4},{27,28,5},{28,28,8},{27,28,5},{31,24,18},{28,29,9},{28,29,0},{27,29,0},{31,24,18},{30,28,18},{27,29,0},
+{0,29,36},{30,28,18},{0,29,36},{27,0,50},{27,0,50},{27,0,50},{27,0,50},{27,30,2},{27,30,2},{27,30,2},{27,28,2},{26,29,2},{26,29,2},{29,31,86},{28,31,40},{28,30,61},{28,30,36},{28,31,72},{28,30,37},{28,30,1},{28,30,52},{26,31,68},{27,30,38},{29,31,22},{29,30,13},{29,30,4},{29,30,13},{31,26,52},{29,30,34},{28,30,1},{26,30,37},{30,29,52},
+{26,30,37},{28,31,40},{28,31,40},{28,31,40},{28,30,36},{28,31,8},{28,30,1},{28,30,1},{28,29,5},{27,30,11},{28,29,9},{29,30,4},{29,30,4},{29,30,4},{29,29,8},{29,29,8},{28,30,1},{28,30,1},{28,29,5},{31,28,8},{28,29,5},{31,27,18},{28,31,4},{29,30,0},{28,30,0},{31,27,18},{31,29,18},{28,30,0},{0,30,36},{31,29,18},{0,30,36},{28,0,36},
+{28,0,36},{28,0,36},{28,0,36},{28,30,1},{28,30,1},{28,30,1},{28,29,1},{27,30,2},{27,30,2},{30,31,104},{30,31,77},{29,31,61},{29,31,36},{30,31,116},{29,31,37},{29,31,1},{29,31,52},{28,31,72},{28,31,40},{30,31,40},{30,31,13},{30,31,4},{30,31,13},{31,29,52},{30,31,34},{29,31,1},{27,31,37},{31,30,52},{27,31,37},{29,31,61},{29,31,61},{29,31,61},
+{29,31,36},{29,31,26},{29,31,1},{29,31,1},{29,30,5},{28,31,8},{29,30,9},{30,31,4},{30,31,4},{30,31,4},{30,30,8},{31,28,8},{29,31,1},{29,31,1},{29,30,5},{30,30,8},{29,30,5},{31,30,18},{30,31,9},{30,31,0},{29,31,0},{31,30,18},{30,31,18},{29,31,0},{0,31,36},{30,31,18},{0,31,36},{29,0,36},{29,0,36},{29,0,36},{29,0,36},{29,31,1},
+{29,31,1},{29,31,1},{29,30,1},{28,31,4},{28,31,4},{31,31,68},{31,31,68},{31,31,68},{30,31,45},{31,31,68},{30,31,34},{30,31,25},{30,31,1},{30,31,23},{30,31,5},{31,31,4},{31,31,4},{31,31,4},{31,31,4},{31,31,4},{31,31,4},{31,31,4},{30,31,1},{31,31,4},{30,31,1},{31,31,68},{31,31,68},{31,31,68},{30,31,45},{30,31,52},{30,31,25},{30,31,25},
+{30,31,1},{30,31,14},{30,31,5},{31,31,4},{31,31,4},{31,31,4},{31,31,4},{31,31,4},{31,31,4},{31,31,4},{30,31,1},{31,31,4},{30,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{30,0,36},{30,0,36},{30,0,36},{30,0,36},{30,31,16},{30,31,16},{30,31,16},{30,31,1},{30,31,5},
+{30,31,5},{0,4,72},{0,3,10},{0,2,1},{0,2,26},{0,3,154},{0,2,99},{0,2,51},{0,1,115},{0,1,173},{0,1,119},{0,4,72},{0,3,10},{0,2,1},{0,2,26},{1,1,154},{0,2,99},{0,2,51},{0,1,115},{3,0,154},{0,1,115},{0,2,0},{0,2,0},{0,2,0},{0,1,0},{0,1,13},{0,1,4},{0,1,4},{0,0,25},{0,0,25},{0,0,25},{0,2,0},
+{0,2,0},{0,2,0},{0,1,0},{0,1,13},{0,1,4},{0,1,4},{0,0,25},{1,0,13},{0,0,25},{0,4,72},{0,3,10},{0,2,1},{0,2,26},{0,4,72},{4,0,72},{0,2,26},{0,1,90},{4,0,72},{0,1,90},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,7,81},{0,5,13},{0,3,35},
+{0,3,19},{0,5,244},{0,3,115},{0,3,35},{0,2,139},{0,2,308},{0,2,164},{0,7,81},{0,5,13},{1,3,26},{0,3,19},{2,1,244},{0,3,115},{0,3,35},{0,2,139},{5,0,244},{0,2,139},{0,5,9},{0,5,9},{0,5,9},{0,2,18},{0,3,50},{0,2,13},{0,2,13},{0,1,29},{0,1,72},{0,1,33},{0,5,9},{0,5,9},{0,5,9},{0,2,18},{1,1,50},
+{0,2,13},{0,2,13},{0,1,29},{3,0,50},{0,1,29},{1,5,72},{0,5,4},{1,3,1},{0,3,10},{1,5,72},{7,0,72},{0,3,10},{0,2,90},{7,0,72},{0,2,90},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,1,1},{0,1,1},{0,1,1},{0,1,4},{0,1,8},{0,1,8},{1,8,145},{1,6,77},{1,4,98},{1,4,82},{0,8,243},{0,5,76},{0,4,1},
+{0,3,115},{0,4,364},{0,3,179},{1,8,81},{1,6,13},{2,4,25},{1,4,18},{4,0,243},{0,5,76},{0,4,1},{0,3,115},{8,0,243},{0,3,115},{1,6,73},{1,6,73},{1,6,73},{1,3,82},{0,6,50},{0,4,1},{0,4,1},{0,2,26},{0,3,114},{0,2,51},{1,6,9},{1,6,9},{1,6,9},{1,3,18},{3,0,50},{0,4,1},{0,4,1},{0,2,26},{6,0,50},
+{0,2,26},{5,0,72},{1,6,4},{2,4,0},{0,4,1},{5,0,72},{10,0,72},{0,4,1},{0,3,90},{10,0,72},{0,3,90},{1,0,73},{1,0,73},{1,0,73},{1,0,73},{0,4,0},{0,4,0},{0,4,0},{0,2,1},{0,2,26},{0,2,26},{2,9,162},{2,7,94},{2,5,115},{2,5,99},{1,9,244},{1,6,77},{1,5,2},{1,4,106},{0,5,300},{0,4,105},{2,9,81},
+{2,7,13},{3,5,25},{2,5,18},{5,1,243},{1,6,76},{1,5,1},{0,4,89},{11,0,243},{0,4,89},{2,7,90},{2,7,90},{2,7,90},{2,4,94},{1,7,51},{1,5,2},{1,5,2},{1,3,27},{0,4,74},{0,3,35},{2,7,9},{2,7,9},{2,7,9},{2,4,13},{4,1,50},{1,5,1},{1,5,1},{0,4,25},{9,0,50},{0,4,25},{6,1,72},{2,7,4},{3,5,0},
+{1,5,1},{6,1,72},{13,0,72},{1,5,1},{0,4,80},{13,0,72},{0,4,80},{2,0,90},{2,0,90},{2,0,90},{2,0,90},{1,5,1},{1,5,1},{1,5,1},{1,3,2},{0,3,10},{0,3,10},{3,10,154},{3,8,84},{3,6,117},{3,6,85},{2,10,244},{2,7,75},{2,6,2},{2,5,106},{0,7,253},{0,5,100},{3,10,90},{3,8,20},{4,6,26},{3,6,21},{7,0,244},
+{2,7,75},{2,6,2},{0,5,91},{14,0,244},{0,5,91},{3,8,80},{3,8,80},{3,8,80},{3,6,84},{2,8,52},{2,6,1},{2,6,1},{2,5,25},{0,6,52},{0,5,19},{3,8,16},{3,8,16},{3,8,16},{3,6,20},{4,4,50},{2,6,1},{2,6,1},{0,5,10},{12,0,50},{0,5,10},{8,0,72},{3,8,4},{4,6,1},{2,6,1},{8,0,72},{16,0,72},{2,6,1},
+{0,5,90},{16,0,72},{0,5,90},{3,0,80},{3,0,80},{3,0,80},{3,0,80},{2,6,1},{2,6,1},{2,6,1},{2,4,1},{0,5,10},{0,5,10},{4,11,162},{4,9,94},{4,7,116},{4,7,100},{3,11,244},{3,8,76},{3,7,2},{3,6,106},{1,8,253},{1,6,100},{4,11,81},{4,9,13},{5,7,26},{4,7,19},{8,1,244},{3,8,76},{3,7,2},{1,6,91},{17,0,244},
+{1,6,91},{4,9,90},{4,9,90},{4,9,90},{4,6,99},{3,9,52},{3,7,1},{3,7,1},{3,6,25},{1,7,52},{1,6,19},{4,9,9},{4,9,9},{4,9,9},{4,6,18},{5,5,50},{3,7,1},{3,7,1},{1,6,10},{15,0,50},{1,6,10},{9,1,72},{4,9,4},{5,7,1},{3,7,1},{9,1,72},{19,0,72},{3,7,1},{0,6,90},{19,0,72},{0,6,90},{4,0,90},
+{4,0,90},{4,0,90},{4,0,90},{3,7,1},{3,7,1},{3,7,1},{3,5,1},{1,6,10},{1,6,10},{5,12,162},{5,10,94},{5,8,115},{5,8,99},{4,12,244},{4,9,77},{4,8,2},{4,7,116},{2,9,253},{2,7,100},{5,12,81},{5,10,13},{6,8,25},{5,8,18},{8,4,243},{4,9,76},{4,8,1},{2,7,91},{20,0,243},{2,7,91},{5,10,90},{5,10,90},{5,10,90},
+{5,7,99},{4,10,51},{4,8,2},{4,8,2},{4,6,27},{2,8,50},{2,7,19},{5,10,9},{5,10,9},{5,10,9},{5,7,18},{9,0,50},{4,8,1},{4,8,1},{2,7,10},{18,0,50},{2,7,10},{11,0,72},{5,10,4},{6,8,0},{4,8,1},{11,0,72},{22,0,72},{4,8,1},{0,7,90},{22,0,72},{0,7,90},{5,0,90},{5,0,90},{5,0,90},{5,0,90},{4,8,1},
+{4,8,1},{4,8,1},{4,6,2},{2,7,10},{2,7,10},{6,13,162},{6,11,94},{6,9,115},{6,9,99},{5,13,244},{5,10,77},{5,9,2},{5,8,106},{3,10,253},{3,8,84},{6,13,81},{6,11,13},{7,9,25},{6,9,18},{9,5,243},{5,10,76},{5,9,1},{3,8,80},{23,0,243},{3,8,80},{6,11,90},{6,11,90},{6,11,90},{6,8,94},{5,11,51},{5,9,2},{5,9,2},
+{5,7,27},{3,9,50},{3,8,20},{6,11,9},{6,11,9},{6,11,9},{6,8,13},{10,1,50},{5,9,1},{5,9,1},{3,8,16},{21,0,50},{3,8,16},{12,1,72},{6,11,4},{7,9,0},{5,9,1},{12,1,72},{25,0,72},{5,9,1},{0,8,80},{25,0,72},{0,8,80},{6,0,90},{6,0,90},{6,0,90},{6,0,90},{5,9,1},{5,9,1},{5,9,1},{5,7,2},{3,8,4},
+{3,8,4},{7,14,154},{7,12,84},{7,10,117},{7,10,85},{6,14,244},{6,11,75},{6,10,2},{6,9,106},{4,11,253},{4,9,100},{7,14,90},{7,12,20},{8,10,26},{7,10,21},{13,0,244},{6,11,75},{6,10,2},{4,9,91},{26,0,244},{4,9,91},{7,12,80},{7,12,80},{7,12,80},{7,10,84},{6,12,52},{6,10,1},{6,10,1},{6,9,25},{4,10,52},{4,9,19},{7,12,16},
+{7,12,16},{7,12,16},{7,10,20},{12,0,50},{6,10,1},{6,10,1},{4,9,10},{24,0,50},{4,9,10},{12,4,72},{7,12,4},{8,10,1},{6,10,1},{12,4,72},{28,0,72},{6,10,1},{0,9,90},{28,0,72},{0,9,90},{7,0,80},{7,0,80},{7,0,80},{7,0,80},{6,10,1},{6,10,1},{6,10,1},{6,8,1},{4,9,10},{4,9,10},{8,15,162},{8,13,94},{8,11,116},
+{8,11,100},{7,15,244},{7,12,76},{7,11,2},{7,10,106},{5,12,253},{5,10,100},{8,15,81},{8,13,13},{9,11,26},{8,11,19},{14,1,244},{7,12,76},{7,11,2},{5,10,91},{29,0,244},{5,10,91},{8,13,90},{8,13,90},{8,13,90},{8,10,99},{7,13,52},{7,11,1},{7,11,1},{7,10,25},{5,11,52},{5,10,19},{8,13,9},{8,13,9},{8,13,9},{8,10,18},{13,1,50},
+{7,11,1},{7,11,1},{5,10,10},{27,0,50},{5,10,10},{13,5,72},{8,13,4},{9,11,1},{7,11,1},{13,5,72},{31,0,72},{7,11,1},{0,10,90},{31,0,72},{0,10,90},{8,0,90},{8,0,90},{8,0,90},{8,0,90},{7,11,1},{7,11,1},{7,11,1},{7,9,1},{5,10,10},{5,10,10},{9,16,162},{9,14,94},{9,12,115},{9,12,99},{8,16,244},{8,13,77},{8,12,2},
+{8,11,116},{6,13,253},{6,11,100},{9,16,81},{9,14,13},{10,12,25},{9,12,18},{16,0,243},{8,13,76},{8,12,1},{6,11,91},{24,4,243},{6,11,91},{9,14,90},{9,14,90},{9,14,90},{9,11,99},{8,14,51},{8,12,2},{8,12,2},{8,10,27},{6,12,50},{6,11,19},{9,14,9},{9,14,9},{9,14,9},{9,11,18},{15,0,50},{8,12,1},{8,12,1},{6,11,10},{30,0,50},
+{6,11,10},{17,0,72},{9,14,4},{10,12,0},{8,12,1},{17,0,72},{30,2,72},{8,12,1},{0,11,90},{30,2,72},{0,11,90},{9,0,90},{9,0,90},{9,0,90},{9,0,90},{8,12,1},{8,12,1},{8,12,1},{8,10,2},{6,11,10},{6,11,10},{10,17,162},{10,15,94},{10,13,115},{10,13,99},{9,17,244},{9,14,77},{9,13,2},{9,12,106},{7,14,253},{7,12,84},{10,17,81},
+{10,15,13},{11,13,25},{10,13,18},{17,1,243},{9,14,76},{9,13,1},{7,12,80},{27,4,243},{7,12,80},{10,15,90},{10,15,90},{10,15,90},{10,12,94},{9,15,51},{9,13,2},{9,13,2},{9,11,27},{7,13,50},{7,12,20},{10,15,9},{10,15,9},{10,15,9},{10,12,13},{16,1,50},{9,13,1},{9,13,1},{7,12,16},{31,1,50},{7,12,16},{18,1,72},{10,15,4},{11,13,0},
+{9,13,1},{18,1,72},{31,3,72},{9,13,1},{0,12,80},{31,3,72},{0,12,80},{10,0,90},{10,0,90},{10,0,90},{10,0,90},{9,13,1},{9,13,1},{9,13,1},{9,11,2},{7,12,4},{7,12,4},{11,18,154},{11,16,84},{11,14,117},{11,14,85},{10,18,244},{10,15,75},{10,14,2},{10,13,106},{8,15,253},{8,13,100},{11,18,90},{11,16,20},{12,14,26},{11,14,21},{19,0,244},
+{10,15,75},{10,14,2},{8,13,91},{30,4,244},{8,13,91},{11,16,80},{11,16,80},{11,16,80},{11,14,84},{10,16,52},{10,14,1},{10,14,1},{10,13,25},{8,14,52},{8,13,19},{11,16,16},{11,16,16},{11,16,16},{11,14,20},{16,4,50},{10,14,1},{10,14,1},{8,13,10},{28,4,50},{8,13,10},{20,0,72},{11,16,4},{12,14,1},{10,14,1},{20,0,72},{24,8,72},{10,14,1},
+{0,13,90},{24,8,72},{0,13,90},{11,0,80},{11,0,80},{11,0,80},{11,0,80},{10,14,1},{10,14,1},{10,14,1},{10,12,1},{8,13,10},{8,13,10},{12,19,162},{12,17,94},{12,15,116},{12,15,100},{11,19,244},{11,16,76},{11,15,2},{11,14,106},{9,16,253},{9,14,100},{12,19,81},{12,17,13},{13,15,26},{12,15,19},{20,1,244},{11,16,76},{11,15,2},{9,14,91},{31,5,244},
+{9,14,91},{12,17,90},{12,17,90},{12,17,90},{12,14,99},{11,17,52},{11,15,1},{11,15,1},{11,14,25},{9,15,52},{9,14,19},{12,17,9},{12,17,9},{12,17,9},{12,14,18},{17,5,50},{11,15,1},{11,15,1},{9,14,10},{31,4,50},{9,14,10},{21,1,72},{12,17,4},{13,15,1},{11,15,1},{21,1,72},{27,8,72},{11,15,1},{0,14,90},{27,8,72},{0,14,90},{12,0,90},
+{12,0,90},{12,0,90},{12,0,90},{11,15,1},{11,15,1},{11,15,1},{11,13,1},{9,14,10},{9,14,10},{13,20,162},{13,18,94},{13,16,115},{13,16,99},{12,20,244},{12,17,77},{12,16,2},{12,15,116},{10,17,253},{10,15,100},{13,20,81},{13,18,13},{14,16,25},{13,16,18},{20,4,243},{12,17,76},{12,16,1},{10,15,91},{28,8,243},{10,15,91},{13,18,90},{13,18,90},{13,18,90},
+{13,15,99},{12,18,51},{12,16,2},{12,16,2},{12,14,27},{10,16,50},{10,15,19},{13,18,9},{13,18,9},{13,18,9},{13,15,18},{21,0,50},{12,16,1},{12,16,1},{10,15,10},{30,6,50},{10,15,10},{23,0,72},{13,18,4},{14,16,0},{12,16,1},{23,0,72},{30,8,72},{12,16,1},{0,15,90},{30,8,72},{0,15,90},{13,0,90},{13,0,90},{13,0,90},{13,0,90},{12,16,1},
+{12,16,1},{12,16,1},{12,14,2},{10,15,10},{10,15,10},{14,21,162},{14,19,94},{14,17,115},{14,17,99},{13,21,244},{13,18,77},{13,17,2},{13,16,106},{11,18,253},{11,16,84},{14,21,81},{14,19,13},{15,17,25},{14,17,18},{21,5,243},{13,18,76},{13,17,1},{11,16,80},{31,8,243},{11,16,80},{14,19,90},{14,19,90},{14,19,90},{14,16,94},{13,19,51},{13,17,2},{13,17,2},
+{13,15,27},{11,17,50},{11,16,20},{14,19,9},{14,19,9},{14,19,9},{14,16,13},{22,1,50},{13,17,1},{13,17,1},{11,16,16},{31,7,50},{11,16,16},{24,1,72},{14,19,4},{15,17,0},{13,17,1},{24,1,72},{31,9,72},{13,17,1},{0,16,80},{31,9,72},{0,16,80},{14,0,90},{14,0,90},{14,0,90},{14,0,90},{13,17,1},{13,17,1},{13,17,1},{13,15,2},{11,16,4},
+{11,16,4},{15,22,154},{15,20,84},{15,18,117},{15,18,85},{14,22,244},{14,19,75},{14,18,2},{14,17,106},{12,19,253},{12,17,100},{15,22,90},{15,20,20},{16,18,26},{15,18,21},{25,0,244},{14,19,75},{14,18,2},{12,17,91},{30,10,244},{12,17,91},{15,20,80},{15,20,80},{15,20,80},{15,18,84},{14,20,52},{14,18,1},{14,18,1},{14,17,25},{12,18,52},{12,17,19},{15,20,16},
+{15,20,16},{15,20,16},{15,18,20},{24,0,50},{14,18,1},{14,18,1},{12,17,10},{24,12,50},{12,17,10},{24,4,72},{15,20,4},{16,18,1},{14,18,1},{24,4,72},{28,12,72},{14,18,1},{0,17,90},{28,12,72},{0,17,90},{15,0,80},{15,0,80},{15,0,80},{15,0,80},{14,18,1},{14,18,1},{14,18,1},{14,16,1},{12,17,10},{12,17,10},{16,23,162},{16,21,94},{16,19,116},
+{16,19,100},{15,23,244},{15,20,76},{15,19,2},{15,18,106},{13,20,253},{13,18,100},{16,23,81},{16,21,13},{17,19,26},{16,19,19},{26,1,244},{15,20,76},{15,19,2},{13,18,91},{31,11,244},{13,18,91},{16,21,90},{16,21,90},{16,21,90},{16,18,99},{15,21,52},{15,19,1},{15,19,1},{15,18,25},{13,19,52},{13,18,19},{16,21,9},{16,21,9},{16,21,9},{16,18,18},{25,1,50},
+{15,19,1},{15,19,1},{13,18,10},{27,12,50},{13,18,10},{25,5,72},{16,21,4},{17,19,1},{15,19,1},{25,5,72},{31,12,72},{15,19,1},{0,18,90},{31,12,72},{0,18,90},{16,0,90},{16,0,90},{16,0,90},{16,0,90},{15,19,1},{15,19,1},{15,19,1},{15,17,1},{13,18,10},{13,18,10},{17,24,162},{17,22,94},{17,20,115},{17,20,99},{16,24,244},{16,21,77},{16,20,2},
+{16,19,116},{14,21,253},{14,19,100},{17,24,81},{17,22,13},{18,20,25},{17,20,18},{28,0,243},{16,21,76},{16,20,1},{14,19,91},{24,16,243},{14,19,91},{17,22,90},{17,22,90},{17,22,90},{17,19,99},{16,22,51},{16,20,2},{16,20,2},{16,18,27},{14,20,50},{14,19,19},{17,22,9},{17,22,9},{17,22,9},{17,19,18},{27,0,50},{16,20,1},{16,20,1},{14,19,10},{30,12,50},
+{14,19,10},{29,0,72},{17,22,4},{18,20,0},{16,20,1},{29,0,72},{30,14,72},{16,20,1},{0,19,90},{30,14,72},{0,19,90},{17,0,90},{17,0,90},{17,0,90},{17,0,90},{16,20,1},{16,20,1},{16,20,1},{16,18,2},{14,19,10},{14,19,10},{18,25,162},{18,23,94},{18,21,115},{18,21,99},{17,25,244},{17,22,77},{17,21,2},{17,20,106},{15,22,253},{15,20,84},{18,25,81},
+{18,23,13},{19,21,25},{18,21,18},{29,1,243},{17,22,76},{17,21,1},{15,20,80},{27,16,243},{15,20,80},{18,23,90},{18,23,90},{18,23,90},{18,20,94},{17,23,51},{17,21,2},{17,21,2},{17,19,27},{15,21,50},{15,20,20},{18,23,9},{18,23,9},{18,23,9},{18,20,13},{28,1,50},{17,21,1},{17,21,1},{15,20,16},{31,13,50},{15,20,16},{30,1,72},{18,23,4},{19,21,0},
+{17,21,1},{30,1,72},{31,15,72},{17,21,1},{0,20,80},{31,15,72},{0,20,80},{18,0,90},{18,0,90},{18,0,90},{18,0,90},{17,21,1},{17,21,1},{17,21,1},{17,19,2},{15,20,4},{15,20,4},{19,26,154},{19,24,84},{19,22,117},{19,22,85},{18,26,244},{18,23,75},{18,22,2},{18,21,106},{16,23,253},{16,21,100},{19,26,90},{19,24,20},{20,22,26},{19,22,21},{31,0,244},
+{18,23,75},{18,22,2},{16,21,91},{30,16,244},{16,21,91},{19,24,80},{19,24,80},{19,24,80},{19,22,84},{18,24,52},{18,22,1},{18,22,1},{18,21,25},{16,22,52},{16,21,19},{19,24,16},{19,24,16},{19,24,16},{19,22,20},{28,4,50},{18,22,1},{18,22,1},{16,21,10},{28,16,50},{16,21,10},{28,8,72},{19,24,4},{20,22,1},{18,22,1},{28,8,72},{24,20,72},{18,22,1},
+{0,21,90},{24,20,72},{0,21,90},{19,0,80},{19,0,80},{19,0,80},{19,0,80},{18,22,1},{18,22,1},{18,22,1},{18,20,1},{16,21,10},{16,21,10},{20,27,162},{20,25,94},{20,23,116},{20,23,100},{19,27,244},{19,24,76},{19,23,2},{19,22,106},{17,24,253},{17,22,100},{20,27,81},{20,25,13},{21,23,26},{20,23,19},{31,3,244},{19,24,76},{19,23,2},{17,22,91},{31,17,244},
+{17,22,91},{20,25,90},{20,25,90},{20,25,90},{20,22,99},{19,25,52},{19,23,1},{19,23,1},{19,22,25},{17,23,52},{17,22,19},{20,25,9},{20,25,9},{20,25,9},{20,22,18},{29,5,50},{19,23,1},{19,23,1},{17,22,10},{31,16,50},{17,22,10},{29,9,72},{20,25,4},{21,23,1},{19,23,1},{29,9,72},{27,20,72},{19,23,1},{0,22,90},{27,20,72},{0,22,90},{20,0,90},
+{20,0,90},{20,0,90},{20,0,90},{19,23,1},{19,23,1},{19,23,1},{19,21,1},{17,22,10},{17,22,10},{21,28,162},{21,26,94},{21,24,115},{21,24,99},{20,28,244},{20,25,77},{20,24,2},{20,23,116},{18,25,253},{18,23,100},{21,28,81},{21,26,13},{22,24,25},{21,24,18},{28,12,243},{20,25,76},{20,24,1},{18,23,91},{28,20,243},{18,23,91},{21,26,90},{21,26,90},{21,26,90},
+{21,23,99},{20,26,51},{20,24,2},{20,24,2},{20,22,27},{18,24,50},{18,23,19},{21,26,9},{21,26,9},{21,26,9},{21,23,18},{31,4,50},{20,24,1},{20,24,1},{18,23,10},{30,18,50},{18,23,10},{31,8,72},{21,26,4},{22,24,0},{20,24,1},{31,8,72},{30,20,72},{20,24,1},{0,23,90},{30,20,72},{0,23,90},{21,0,90},{21,0,90},{21,0,90},{21,0,90},{20,24,1},
+{20,24,1},{20,24,1},{20,22,2},{18,23,10},{18,23,10},{22,29,162},{22,27,94},{22,25,115},{22,25,99},{21,29,244},{21,26,77},{21,25,2},{21,24,106},{19,26,253},{19,24,84},{22,29,81},{22,27,13},{23,25,25},{22,25,18},{29,13,243},{21,26,76},{21,25,1},{19,24,80},{31,20,243},{19,24,80},{22,27,90},{22,27,90},{22,27,90},{22,24,94},{21,27,51},{21,25,2},{21,25,2},
+{21,23,27},{19,25,50},{19,24,20},{22,27,9},{22,27,9},{22,27,9},{22,24,13},{31,7,50},{21,25,1},{21,25,1},{19,24,16},{31,19,50},{19,24,16},{31,11,72},{22,27,4},{23,25,0},{21,25,1},{31,11,72},{31,21,72},{21,25,1},{0,24,80},{31,21,72},{0,24,80},{22,0,90},{22,0,90},{22,0,90},{22,0,90},{21,25,1},{21,25,1},{21,25,1},{21,23,2},{19,24,4},
+{19,24,4},{23,30,154},{23,28,84},{23,26,117},{23,26,85},{22,30,244},{22,27,75},{22,26,2},{22,25,106},{20,27,253},{20,25,100},{23,30,90},{23,28,20},{24,26,26},{23,26,21},{31,12,244},{22,27,75},{22,26,2},{20,25,91},{30,22,244},{20,25,91},{23,28,80},{23,28,80},{23,28,80},{23,26,84},{22,28,52},{22,26,1},{22,26,1},{22,25,25},{20,26,52},{20,25,19},{23,28,16},
+{23,28,16},{23,28,16},{23,26,20},{28,16,50},{22,26,1},{22,26,1},{20,25,10},{24,24,50},{20,25,10},{28,20,72},{23,28,4},{24,26,1},{22,26,1},{28,20,72},{28,24,72},{22,26,1},{0,25,90},{28,24,72},{0,25,90},{23,0,80},{23,0,80},{23,0,80},{23,0,80},{22,26,1},{22,26,1},{22,26,1},{22,24,1},{20,25,10},{20,25,10},{24,31,162},{24,29,94},{24,27,116},
+{24,27,100},{23,31,244},{23,28,76},{23,27,2},{23,26,106},{21,28,253},{21,26,100},{24,31,81},{24,29,13},{25,27,26},{24,27,19},{31,15,244},{23,28,76},{23,27,2},{21,26,91},{31,23,244},{21,26,91},{24,29,90},{24,29,90},{24,29,90},{24,26,99},{23,29,52},{23,27,1},{23,27,1},{23,26,25},{21,27,52},{21,26,19},{24,29,9},{24,29,9},{24,29,9},{24,26,18},{29,17,50},
+{23,27,1},{23,27,1},{21,26,10},{27,24,50},{21,26,10},{29,21,72},{24,29,4},{25,27,1},{23,27,1},{29,21,72},{31,24,72},{23,27,1},{0,26,90},{31,24,72},{0,26,90},{24,0,90},{24,0,90},{24,0,90},{24,0,90},{23,27,1},{23,27,1},{23,27,1},{23,25,1},{21,26,10},{21,26,10},{25,31,180},{25,30,94},{25,28,115},{25,28,99},{24,31,265},{24,29,77},{24,28,2},
+{24,27,116},{22,29,253},{22,27,100},{26,30,97},{25,30,13},{26,28,25},{25,28,18},{28,24,243},{24,29,76},{24,28,1},{22,27,91},{24,28,243},{22,27,91},{25,30,90},{25,30,90},{25,30,90},{25,27,99},{24,30,51},{24,28,2},{24,28,2},{24,26,27},{22,28,50},{22,27,19},{25,30,9},{25,30,9},{25,30,9},{25,27,18},{31,16,50},{24,28,1},{24,28,1},{22,27,10},{30,24,50},
+{22,27,10},{31,20,72},{25,30,4},{26,28,0},{24,28,1},{31,20,72},{30,26,72},{24,28,1},{0,27,90},{30,26,72},{0,27,90},{25,0,90},{25,0,90},{25,0,90},{25,0,90},{24,28,1},{24,28,1},{24,28,1},{24,26,2},{22,27,10},{22,27,10},{26,31,234},{26,31,94},{26,29,115},{26,29,99},{26,31,325},{25,30,77},{25,29,2},{25,28,106},{23,30,253},{23,28,84},{27,31,97},
+{26,31,13},{27,29,25},{26,29,18},{29,25,243},{25,30,76},{25,29,1},{23,28,80},{27,28,243},{23,28,80},{26,31,90},{26,31,90},{26,31,90},{26,28,94},{25,31,51},{25,29,2},{25,29,2},{25,27,27},{23,29,50},{23,28,20},{26,31,9},{26,31,9},{26,31,9},{26,28,13},{31,19,50},{25,29,1},{25,29,1},{23,28,16},{31,25,50},{23,28,16},{31,23,72},{26,31,4},{27,29,0},
+{25,29,1},{31,23,72},{31,27,72},{25,29,1},{0,28,80},{31,27,72},{0,28,80},{26,0,90},{26,0,90},{26,0,90},{26,0,90},{25,29,1},{25,29,1},{25,29,1},{25,27,2},{23,28,4},{23,28,4},{27,31,314},{27,31,105},{27,30,117},{27,30,85},{27,31,347},{26,31,75},{26,30,2},{26,29,106},{24,31,253},{24,29,100},{28,31,115},{28,31,35},{28,30,26},{27,30,21},{31,24,244},
+{26,31,75},{26,30,2},{24,29,91},{30,28,244},{24,29,91},{27,31,89},{27,31,89},{27,31,89},{27,30,84},{26,31,82},{26,30,1},{26,30,1},{26,29,25},{24,30,52},{24,29,19},{28,30,25},{28,30,25},{28,30,25},{27,30,20},{28,28,50},{26,30,1},{26,30,1},{24,29,10},{28,28,50},{24,29,10},{31,26,74},{28,31,10},{28,30,1},{26,30,1},{31,26,74},{30,29,74},{26,30,1},
+{0,29,90},{30,29,74},{0,29,90},{27,0,80},{27,0,80},{27,0,80},{27,0,80},{26,30,1},{26,30,1},{26,30,1},{26,28,1},{24,29,10},{24,29,10},{28,31,371},{28,31,179},{28,31,115},{28,31,99},{28,31,387},{27,31,122},{27,31,1},{27,30,89},{26,31,279},{25,30,83},{29,31,146},{29,31,61},{29,31,25},{28,31,18},{31,27,221},{28,31,98},{27,31,1},{25,30,74},{31,29,221},
+{25,30,74},{28,31,115},{28,31,115},{28,31,115},{28,30,99},{27,31,122},{27,31,1},{27,31,1},{27,30,25},{25,31,52},{25,30,19},{29,31,25},{29,31,25},{29,31,25},{28,30,18},{29,29,50},{27,31,1},{27,31,1},{25,30,10},{31,28,50},{25,30,10},{31,29,61},{30,31,34},{29,31,0},{27,31,0},{31,29,61},{31,30,61},{27,31,0},{0,30,73},{31,30,61},{0,30,73},{28,0,90},
+{28,0,90},{28,0,90},{28,0,90},{27,31,1},{27,31,1},{27,31,1},{27,29,1},{25,30,10},{25,30,10},{29,31,275},{29,31,190},{29,31,154},{29,31,99},{29,31,270},{28,31,99},{28,31,35},{28,31,35},{28,31,195},{26,31,19},{30,31,70},{30,31,43},{30,31,34},{29,31,18},{31,29,94},{29,31,49},{29,31,13},{26,31,10},{31,30,94},{26,31,10},{29,31,154},{29,31,154},{29,31,154},
+{29,31,99},{29,31,149},{28,31,35},{28,31,35},{28,30,27},{27,31,77},{26,31,19},{30,31,34},{30,31,34},{30,31,34},{29,31,18},{31,28,50},{29,31,13},{29,31,13},{26,31,10},{30,30,50},{26,31,10},{31,30,9},{31,31,9},{30,31,9},{30,31,0},{31,30,9},{30,31,9},{30,31,0},{0,31,9},{30,31,9},{0,31,9},{29,0,90},{29,0,90},{29,0,90},{29,0,90},{28,31,10},
+{28,31,10},{28,31,10},{28,30,2},{26,31,10},{26,31,10},{30,31,162},{30,31,135},{30,31,126},{30,31,99},{30,31,154},{30,31,100},{29,31,73},{29,31,2},{29,31,109},{28,31,10},{31,31,25},{31,31,25},{31,31,25},{30,31,18},{31,30,22},{30,31,19},{30,31,10},{29,31,1},{30,31,22},{29,31,1},{30,31,126},{30,31,126},{30,31,126},{30,31,99},{30,31,118},{29,31,73},{29,31,73},
+{29,31,2},{29,31,73},{28,31,10},{31,31,25},{31,31,25},{31,31,25},{30,31,18},{31,30,13},{30,31,10},{30,31,10},{29,31,1},{30,31,13},{29,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{30,0,90},{30,0,90},{30,0,90},{30,0,90},{29,31,37},{29,31,37},{29,31,37},{29,31,2},{28,31,10},
+{28,31,10},{0,7,200},{0,5,20},{0,4,20},{0,3,74},{0,5,441},{0,3,282},{0,3,138},{0,2,318},{0,2,487},{0,2,343},{0,7,200},{0,5,20},{0,4,20},{0,3,74},{2,1,441},{0,3,282},{0,3,138},{0,2,318},{5,0,441},{0,2,318},{0,3,1},{0,3,1},{0,3,1},{0,2,1},{0,2,41},{0,1,20},{0,1,20},{0,1,26},{0,1,45},{0,1,30},{0,3,1},
+{0,3,1},{0,3,1},{0,2,1},{1,0,41},{0,1,20},{0,1,20},{0,1,26},{2,0,41},{0,1,26},{1,5,200},{0,5,20},{0,4,20},{0,3,74},{1,5,200},{7,0,200},{0,3,74},{0,2,218},{7,0,200},{0,2,218},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,10,200},{0,7,5},{0,5,32},
+{0,4,41},{0,7,689},{0,5,369},{0,4,162},{0,3,474},{0,3,762},{0,3,538},{0,10,200},{0,7,5},{0,5,32},{0,4,41},{3,1,686},{0,5,369},{0,4,162},{0,3,474},{3,2,686},{0,3,474},{0,6,1},{0,6,1},{0,6,1},{0,3,1},{0,3,145},{0,3,65},{0,3,65},{0,2,101},{0,1,173},{0,1,110},{0,6,1},{0,6,1},{0,6,1},{0,3,1},{1,1,145},
+{0,3,65},{0,3,65},{0,2,101},{3,0,145},{0,2,101},{5,0,200},{0,7,5},{1,4,17},{0,4,41},{5,0,200},{10,0,200},{0,4,41},{0,3,218},{10,0,200},{0,3,218},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,13,249},{0,8,59},{1,6,97},{0,5,69},{0,10,728},{0,6,299},{0,5,56},
+{0,4,433},{0,4,884},{0,4,554},{1,11,201},{1,8,6},{1,6,33},{1,5,42},{5,0,728},{0,6,299},{0,5,56},{0,4,433},{10,0,728},{0,4,433},{0,9,50},{0,9,50},{0,9,50},{0,5,53},{0,6,162},{0,4,25},{0,4,25},{0,3,82},{0,3,226},{0,2,115},{1,7,2},{1,7,2},{1,7,2},{1,4,2},{3,0,162},{0,4,25},{0,4,25},{0,3,82},{6,0,162},
+{0,3,82},{6,1,200},{1,8,5},{2,5,17},{0,5,20},{6,1,200},{13,0,200},{0,5,20},{0,4,208},{13,0,200},{0,4,208},{0,0,49},{0,0,49},{0,0,49},{0,0,49},{0,3,1},{0,3,1},{0,3,1},{0,1,4},{0,1,8},{0,1,8},{1,14,313},{1,9,123},{1,7,210},{1,6,133},{0,12,724},{0,8,251},{0,6,18},{0,5,352},{0,6,987},{0,5,521},{2,12,201},
+{2,9,6},{2,7,33},{2,6,42},{4,4,724},{0,8,251},{0,6,18},{0,5,352},{12,0,724},{0,5,352},{1,10,114},{1,10,114},{1,10,114},{1,6,117},{0,9,162},{0,6,2},{0,6,2},{0,4,49},{0,4,291},{0,3,146},{2,8,2},{2,8,2},{2,8,2},{2,5,2},{4,1,162},{0,6,2},{0,6,2},{0,4,49},{9,0,162},{0,4,49},{7,2,200},{2,9,5},{3,6,17},
+{0,6,17},{7,2,200},{14,1,200},{0,6,17},{0,5,208},{14,1,200},{0,5,208},{1,0,113},{1,0,113},{1,0,113},{1,0,113},{0,6,1},{0,6,1},{0,6,1},{0,3,1},{0,2,50},{0,2,50},{2,15,410},{2,11,221},{2,8,324},{2,7,234},{1,14,739},{0,10,233},{0,8,40},{0,6,298},{0,7,1013},{0,6,426},{3,13,202},{3,10,1},{3,8,32},{3,7,41},{7,2,723},
+{0,10,217},{0,8,24},{0,6,282},{14,1,723},{0,6,282},{2,12,209},{2,12,209},{2,12,209},{2,7,209},{0,12,178},{1,7,20},{1,7,20},{0,5,26},{0,6,308},{0,5,91},{3,10,1},{3,10,1},{3,10,1},{3,6,4},{4,4,162},{1,7,4},{1,7,4},{0,5,10},{12,0,162},{0,5,10},{9,1,200},{3,10,1},{4,8,20},{0,8,20},{9,1,200},{19,0,200},{0,8,20},
+{0,6,218},{19,0,200},{0,6,218},{2,0,208},{2,0,208},{2,0,208},{2,0,208},{1,7,16},{1,7,16},{1,7,16},{1,4,16},{0,4,53},{0,4,53},{3,16,408},{3,12,225},{3,9,324},{3,8,228},{2,15,739},{1,11,233},{2,8,33},{1,7,298},{0,9,875},{0,7,303},{4,14,201},{4,11,6},{4,9,33},{4,8,42},{7,5,723},{0,11,204},{2,8,17},{0,7,254},{15,2,723},
+{0,7,254},{3,12,209},{3,12,209},{3,12,209},{3,8,212},{1,13,178},{2,8,17},{2,8,17},{1,6,26},{0,7,229},{0,6,17},{4,10,2},{4,10,2},{4,10,2},{4,7,2},{5,5,162},{2,8,1},{2,8,1},{0,6,1},{15,0,162},{0,6,1},{11,0,200},{4,11,5},{5,8,17},{2,8,17},{11,0,200},{22,0,200},{2,8,17},{0,7,218},{22,0,200},{0,7,218},{3,0,208},
+{3,0,208},{3,0,208},{3,0,208},{2,8,16},{2,8,16},{2,8,16},{2,5,16},{0,6,16},{0,6,16},{4,17,418},{4,12,228},{4,10,315},{4,9,238},{3,16,744},{2,11,234},{3,9,33},{2,8,305},{0,11,828},{0,8,228},{5,15,201},{5,12,6},{5,10,33},{5,9,42},{11,0,728},{1,12,204},{3,9,17},{0,8,224},{22,0,728},{0,8,224},{4,13,219},{4,13,219},{4,13,219},
+{4,9,222},{3,12,178},{3,9,17},{3,9,17},{2,7,26},{0,9,178},{0,7,6},{5,11,2},{5,11,2},{5,11,2},{5,8,2},{9,0,162},{3,9,1},{3,9,1},{1,7,1},{18,0,162},{1,7,1},{12,1,200},{5,12,5},{6,9,17},{3,9,17},{12,1,200},{25,0,200},{3,9,17},{0,8,208},{25,0,200},{0,8,208},{4,0,218},{4,0,218},{4,0,218},{4,0,218},{3,9,16},
+{3,9,16},{3,9,16},{3,6,16},{0,7,5},{0,7,5},{5,18,418},{5,13,228},{5,11,315},{5,10,238},{3,19,744},{3,12,225},{3,11,40},{3,9,305},{0,12,749},{0,9,225},{6,16,201},{6,13,6},{6,11,33},{6,10,42},{12,0,724},{2,13,204},{4,10,18},{0,9,209},{24,0,724},{0,9,209},{5,14,219},{5,14,219},{5,14,219},{5,10,222},{3,15,178},{3,11,24},{3,11,24},
+{3,8,32},{1,10,178},{1,8,5},{6,12,2},{6,12,2},{6,12,2},{6,9,2},{10,1,162},{4,10,2},{4,10,2},{2,8,1},{21,0,162},{2,8,1},{14,0,200},{6,13,5},{7,10,17},{4,10,17},{14,0,200},{26,1,200},{4,10,17},{0,9,208},{26,1,200},{0,9,208},{5,0,218},{5,0,218},{5,0,218},{5,0,218},{3,12,17},{3,12,17},{3,12,17},{3,7,20},{1,8,5},
+{1,8,5},{6,19,410},{6,15,221},{6,12,324},{6,11,234},{5,18,739},{4,14,233},{4,12,40},{4,10,298},{0,14,724},{2,10,228},{7,17,202},{7,14,1},{7,12,32},{7,11,41},{14,0,723},{3,14,211},{4,12,24},{1,10,218},{26,1,723},{1,10,218},{6,16,209},{6,16,209},{6,16,209},{6,11,209},{4,16,178},{5,11,20},{5,11,20},{4,9,26},{1,12,171},{3,9,10},{7,14,1},
+{7,14,1},{7,14,1},{7,10,4},{12,0,162},{5,11,4},{5,11,4},{3,9,1},{24,0,162},{3,9,1},{13,5,200},{7,14,1},{8,12,20},{3,12,17},{13,5,200},{31,0,200},{3,12,17},{0,10,218},{31,0,200},{0,10,218},{6,0,208},{6,0,208},{6,0,208},{6,0,208},{5,11,16},{5,11,16},{5,11,16},{5,8,16},{3,9,9},{3,9,9},{7,20,408},{7,16,225},{7,13,324},
+{7,12,228},{6,19,739},{5,15,233},{6,12,33},{5,11,298},{1,15,724},{3,11,228},{8,18,201},{8,15,6},{8,13,33},{8,12,42},{15,1,723},{4,15,204},{6,12,17},{2,11,218},{27,2,723},{2,11,218},{7,16,209},{7,16,209},{7,16,209},{7,12,212},{5,17,178},{6,12,17},{6,12,17},{5,10,26},{2,13,171},{3,10,14},{8,14,2},{8,14,2},{8,14,2},{8,11,2},{13,1,162},
+{6,12,1},{6,12,1},{4,10,1},{27,0,162},{4,10,1},{17,0,200},{8,15,5},{9,12,17},{6,12,17},{17,0,200},{30,2,200},{6,12,17},{0,11,218},{30,2,200},{0,11,218},{7,0,208},{7,0,208},{7,0,208},{7,0,208},{6,12,16},{6,12,16},{6,12,16},{6,9,16},{3,11,10},{3,11,10},{8,21,418},{8,16,228},{8,14,315},{8,13,238},{7,20,744},{6,15,234},{7,13,33},
+{6,12,305},{2,16,729},{3,12,225},{9,19,201},{9,16,6},{9,14,33},{9,13,42},{17,0,728},{5,16,204},{7,13,17},{3,12,209},{30,2,728},{3,12,209},{8,17,219},{8,17,219},{8,17,219},{8,13,222},{7,16,178},{7,13,17},{7,13,17},{6,11,26},{3,14,171},{4,11,6},{9,15,2},{9,15,2},{9,15,2},{9,12,2},{15,0,162},{7,13,1},{7,13,1},{5,11,1},{30,0,162},
+{5,11,1},{18,1,200},{9,16,5},{10,13,17},{7,13,17},{18,1,200},{31,3,200},{7,13,17},{0,12,208},{31,3,200},{0,12,208},{8,0,218},{8,0,218},{8,0,218},{8,0,218},{7,13,16},{7,13,16},{7,13,16},{7,10,16},{4,11,5},{4,11,5},{9,22,418},{9,17,228},{9,15,315},{9,14,238},{7,23,744},{7,16,225},{7,15,40},{7,13,305},{3,17,729},{4,13,225},{10,20,201},
+{10,17,6},{10,15,33},{10,14,42},{16,4,724},{6,17,204},{8,14,18},{4,13,209},{28,4,724},{4,13,209},{9,18,219},{9,18,219},{9,18,219},{9,14,222},{7,19,178},{7,15,24},{7,15,24},{7,12,32},{5,14,178},{5,12,5},{10,16,2},{10,16,2},{10,16,2},{10,13,2},{16,1,162},{8,14,2},{8,14,2},{6,12,1},{31,1,162},{6,12,1},{19,2,200},{10,17,5},{11,14,17},
+{8,14,17},{19,2,200},{30,5,200},{8,14,17},{0,13,208},{30,5,200},{0,13,208},{9,0,218},{9,0,218},{9,0,218},{9,0,218},{7,16,17},{7,16,17},{7,16,17},{7,11,20},{5,12,5},{5,12,5},{10,23,410},{10,19,221},{10,16,324},{10,15,234},{9,22,739},{8,18,233},{8,16,40},{8,14,298},{4,18,724},{6,14,228},{11,21,202},{11,18,1},{11,16,32},{11,15,41},{19,2,723},
+{7,18,211},{8,16,24},{5,14,218},{30,5,723},{5,14,218},{10,20,209},{10,20,209},{10,20,209},{10,15,209},{8,20,178},{9,15,20},{9,15,20},{8,13,26},{5,16,171},{7,13,10},{11,18,1},{11,18,1},{11,18,1},{11,14,4},{16,4,162},{9,15,4},{9,15,4},{7,13,1},{28,4,162},{7,13,1},{21,1,200},{11,18,1},{12,16,20},{7,16,17},{21,1,200},{27,8,200},{7,16,17},
+{0,14,218},{27,8,200},{0,14,218},{10,0,208},{10,0,208},{10,0,208},{10,0,208},{9,15,16},{9,15,16},{9,15,16},{9,12,16},{7,13,9},{7,13,9},{11,24,408},{11,20,225},{11,17,324},{11,16,228},{10,23,739},{9,19,233},{10,16,33},{9,15,298},{5,19,724},{7,15,228},{12,22,201},{12,19,6},{12,17,33},{12,16,42},{19,5,723},{8,19,204},{10,16,17},{6,15,218},{31,6,723},
+{6,15,218},{11,20,209},{11,20,209},{11,20,209},{11,16,212},{9,21,178},{10,16,17},{10,16,17},{9,14,26},{6,17,171},{7,14,14},{12,18,2},{12,18,2},{12,18,2},{12,15,2},{17,5,162},{10,16,1},{10,16,1},{8,14,1},{31,4,162},{8,14,1},{23,0,200},{12,19,5},{13,16,17},{10,16,17},{23,0,200},{30,8,200},{10,16,17},{0,15,218},{30,8,200},{0,15,218},{11,0,208},
+{11,0,208},{11,0,208},{11,0,208},{10,16,16},{10,16,16},{10,16,16},{10,13,16},{7,15,10},{7,15,10},{12,25,418},{12,20,228},{12,18,315},{12,17,238},{11,24,744},{10,19,234},{11,17,33},{10,16,305},{6,20,729},{7,16,225},{13,23,201},{13,20,6},{13,18,33},{13,17,42},{23,0,728},{9,20,204},{11,17,17},{7,16,209},{30,8,728},{7,16,209},{12,21,219},{12,21,219},{12,21,219},
+{12,17,222},{11,20,178},{11,17,17},{11,17,17},{10,15,26},{7,18,171},{8,15,6},{13,19,2},{13,19,2},{13,19,2},{13,16,2},{21,0,162},{11,17,1},{11,17,1},{9,15,1},{30,6,162},{9,15,1},{24,1,200},{13,20,5},{14,17,17},{11,17,17},{24,1,200},{31,9,200},{11,17,17},{0,16,208},{31,9,200},{0,16,208},{12,0,218},{12,0,218},{12,0,218},{12,0,218},{11,17,16},
+{11,17,16},{11,17,16},{11,14,16},{8,15,5},{8,15,5},{13,26,418},{13,21,228},{13,19,315},{13,18,238},{11,27,744},{11,20,225},{11,19,40},{11,17,305},{7,21,729},{8,17,225},{14,24,201},{14,21,6},{14,19,33},{14,18,42},{24,0,724},{10,21,204},{12,18,18},{8,17,209},{24,12,724},{8,17,209},{13,22,219},{13,22,219},{13,22,219},{13,18,222},{11,23,178},{11,19,24},{11,19,24},
+{11,16,32},{9,18,178},{9,16,5},{14,20,2},{14,20,2},{14,20,2},{14,17,2},{22,1,162},{12,18,2},{12,18,2},{10,16,1},{31,7,162},{10,16,1},{26,0,200},{14,21,5},{15,18,17},{12,18,17},{26,0,200},{30,11,200},{12,18,17},{0,17,208},{30,11,200},{0,17,208},{13,0,218},{13,0,218},{13,0,218},{13,0,218},{11,20,17},{11,20,17},{11,20,17},{11,15,20},{9,16,5},
+{9,16,5},{14,27,410},{14,23,221},{14,20,324},{14,19,234},{13,26,739},{12,22,233},{12,20,40},{12,18,298},{8,22,724},{10,18,228},{15,25,202},{15,22,1},{15,20,32},{15,19,41},{26,0,723},{11,22,211},{12,20,24},{9,18,218},{30,11,723},{9,18,218},{14,24,209},{14,24,209},{14,24,209},{14,19,209},{12,24,178},{13,19,20},{13,19,20},{12,17,26},{9,20,171},{11,17,10},{15,22,1},
+{15,22,1},{15,22,1},{15,18,4},{24,0,162},{13,19,4},{13,19,4},{11,17,1},{24,12,162},{11,17,1},{25,5,200},{15,22,1},{16,20,20},{11,20,17},{25,5,200},{31,12,200},{11,20,17},{0,18,218},{31,12,200},{0,18,218},{14,0,208},{14,0,208},{14,0,208},{14,0,208},{13,19,16},{13,19,16},{13,19,16},{13,16,16},{11,17,9},{11,17,9},{15,28,408},{15,24,225},{15,21,324},
+{15,20,228},{14,27,739},{13,23,233},{14,20,33},{13,19,298},{9,23,724},{11,19,228},{16,26,201},{16,23,6},{16,21,33},{16,20,42},{27,1,723},{12,23,204},{14,20,17},{10,19,218},{27,14,723},{10,19,218},{15,24,209},{15,24,209},{15,24,209},{15,20,212},{13,25,178},{14,20,17},{14,20,17},{13,18,26},{10,21,171},{11,18,14},{16,22,2},{16,22,2},{16,22,2},{16,19,2},{25,1,162},
+{14,20,1},{14,20,1},{12,18,1},{27,12,162},{12,18,1},{29,0,200},{16,23,5},{17,20,17},{14,20,17},{29,0,200},{30,14,200},{14,20,17},{0,19,218},{30,14,200},{0,19,218},{15,0,208},{15,0,208},{15,0,208},{15,0,208},{14,20,16},{14,20,16},{14,20,16},{14,17,16},{11,19,10},{11,19,10},{16,29,418},{16,24,228},{16,22,315},{16,21,238},{15,28,744},{14,23,234},{15,21,33},
+{14,20,305},{10,24,729},{11,20,225},{17,27,201},{17,24,6},{17,22,33},{17,21,42},{29,0,728},{13,24,204},{15,21,17},{11,20,209},{30,14,728},{11,20,209},{16,25,219},{16,25,219},{16,25,219},{16,21,222},{15,24,178},{15,21,17},{15,21,17},{14,19,26},{11,22,171},{12,19,6},{17,23,2},{17,23,2},{17,23,2},{17,20,2},{27,0,162},{15,21,1},{15,21,1},{13,19,1},{30,12,162},
+{13,19,1},{30,1,200},{17,24,5},{18,21,17},{15,21,17},{30,1,200},{31,15,200},{15,21,17},{0,20,208},{31,15,200},{0,20,208},{16,0,218},{16,0,218},{16,0,218},{16,0,218},{15,21,16},{15,21,16},{15,21,16},{15,18,16},{12,19,5},{12,19,5},{17,30,418},{17,25,228},{17,23,315},{17,22,238},{15,31,744},{15,24,225},{15,23,40},{15,21,305},{11,25,729},{12,21,225},{18,28,201},
+{18,25,6},{18,23,33},{18,22,42},{28,4,724},{14,25,204},{16,22,18},{12,21,209},{28,16,724},{12,21,209},{17,26,219},{17,26,219},{17,26,219},{17,22,222},{15,27,178},{15,23,24},{15,23,24},{15,20,32},{13,22,178},{13,20,5},{18,24,2},{18,24,2},{18,24,2},{18,21,2},{28,1,162},{16,22,2},{16,22,2},{14,20,1},{31,13,162},{14,20,1},{31,2,200},{18,25,5},{19,22,17},
+{16,22,17},{31,2,200},{30,17,200},{16,22,17},{0,21,208},{30,17,200},{0,21,208},{17,0,218},{17,0,218},{17,0,218},{17,0,218},{15,24,17},{15,24,17},{15,24,17},{15,19,20},{13,20,5},{13,20,5},{18,31,410},{18,27,221},{18,24,324},{18,23,234},{17,30,739},{16,26,233},{16,24,40},{16,22,298},{12,26,724},{14,22,228},{19,29,202},{19,26,1},{19,24,32},{19,23,41},{31,2,723},
+{15,26,211},{16,24,24},{13,22,218},{30,17,723},{13,22,218},{18,28,209},{18,28,209},{18,28,209},{18,23,209},{16,28,178},{17,23,20},{17,23,20},{16,21,26},{13,24,171},{15,21,10},{19,26,1},{19,26,1},{19,26,1},{19,22,4},{28,4,162},{17,23,4},{17,23,4},{15,21,1},{28,16,162},{15,21,1},{29,9,200},{19,26,1},{20,24,20},{15,24,17},{29,9,200},{27,20,200},{15,24,17},
+{0,22,218},{27,20,200},{0,22,218},{18,0,208},{18,0,208},{18,0,208},{18,0,208},{17,23,16},{17,23,16},{17,23,16},{17,20,16},{15,21,9},{15,21,9},{19,31,426},{19,28,225},{19,25,324},{19,24,228},{18,31,739},{17,27,233},{18,24,33},{17,23,298},{13,27,724},{15,23,228},{20,30,201},{20,27,6},{20,25,33},{20,24,42},{31,5,723},{16,27,204},{18,24,17},{14,23,218},{31,18,723},
+{14,23,218},{19,28,209},{19,28,209},{19,28,209},{19,24,212},{17,29,178},{18,24,17},{18,24,17},{17,22,26},{14,25,171},{15,22,14},{20,26,2},{20,26,2},{20,26,2},{20,23,2},{29,5,162},{18,24,1},{18,24,1},{16,22,1},{31,16,162},{16,22,1},{31,8,200},{20,27,5},{21,24,17},{18,24,17},{31,8,200},{30,20,200},{18,24,17},{0,23,218},{30,20,200},{0,23,218},{19,0,208},
+{19,0,208},{19,0,208},{19,0,208},{18,24,16},{18,24,16},{18,24,16},{18,21,16},{15,23,10},{15,23,10},{20,31,468},{20,28,228},{20,26,315},{20,25,238},{19,31,747},{18,27,234},{19,25,33},{18,24,305},{14,28,729},{15,24,225},{21,31,201},{21,28,6},{21,26,33},{21,25,42},{31,8,728},{17,28,204},{19,25,17},{15,24,209},{30,20,728},{15,24,209},{20,29,219},{20,29,219},{20,29,219},
+{20,25,222},{19,28,178},{19,25,17},{19,25,17},{18,23,26},{15,26,171},{16,23,6},{21,27,2},{21,27,2},{21,27,2},{21,24,2},{31,4,162},{19,25,1},{19,25,1},{17,23,1},{30,18,162},{17,23,1},{31,11,200},{21,28,5},{22,25,17},{19,25,17},{31,11,200},{31,21,200},{19,25,17},{0,24,208},{31,21,200},{0,24,208},{20,0,218},{20,0,218},{20,0,218},{20,0,218},{19,25,16},
+{19,25,16},{19,25,16},{19,22,16},{16,23,5},{16,23,5},{21,31,546},{21,29,228},{21,27,315},{21,26,238},{20,31,788},{19,28,225},{19,27,40},{19,25,305},{15,29,729},{16,25,225},{22,31,219},{22,29,6},{22,27,33},{22,26,42},{28,16,724},{18,29,204},{20,26,18},{16,25,209},{24,24,724},{16,25,209},{21,30,219},{21,30,219},{21,30,219},{21,26,222},{19,31,178},{19,27,24},{19,27,24},
+{19,24,32},{17,26,178},{17,24,5},{22,28,2},{22,28,2},{22,28,2},{22,25,2},{31,7,162},{20,26,2},{20,26,2},{18,24,1},{31,19,162},{18,24,1},{31,14,200},{22,29,5},{23,26,17},{20,26,17},{31,14,200},{30,23,200},{20,26,17},{0,25,208},{30,23,200},{0,25,208},{21,0,218},{21,0,218},{21,0,218},{21,0,218},{19,28,17},{19,28,17},{19,28,17},{19,23,20},{17,24,5},
+{17,24,5},{23,31,672},{22,31,221},{22,28,324},{22,27,234},{22,31,888},{20,30,233},{20,28,40},{20,26,298},{16,30,724},{18,26,228},{23,31,272},{23,30,1},{23,28,32},{23,27,41},{31,14,723},{19,30,211},{20,28,24},{17,26,218},{30,23,723},{17,26,218},{22,31,212},{22,31,212},{22,31,212},{22,27,209},{21,30,180},{21,27,20},{21,27,20},{20,25,26},{17,28,171},{19,25,10},{23,30,1},
+{23,30,1},{23,30,1},{23,26,4},{28,16,162},{21,27,4},{21,27,4},{19,25,1},{24,24,162},{19,25,1},{29,21,200},{23,30,1},{24,28,20},{19,28,17},{29,21,200},{31,24,200},{19,28,17},{0,26,218},{31,24,200},{0,26,218},{22,0,208},{22,0,208},{22,0,208},{22,0,208},{21,27,16},{21,27,16},{21,27,16},{21,24,16},{19,25,9},{19,25,9},{24,31,770},{23,31,228},{23,29,324},
+{23,28,228},{23,31,932},{21,31,233},{22,28,33},{21,27,298},{17,31,724},{19,27,228},{25,31,299},{24,31,6},{24,29,33},{24,28,42},{31,17,723},{20,31,204},{22,28,17},{18,27,218},{27,26,723},{18,27,218},{23,31,224},{23,31,224},{23,31,224},{23,28,212},{22,31,180},{22,28,17},{22,28,17},{21,26,26},{18,29,171},{19,26,14},{24,30,2},{24,30,2},{24,30,2},{24,27,2},{29,17,162},
+{22,28,1},{22,28,1},{20,26,1},{27,24,162},{20,26,1},{31,20,200},{24,31,5},{25,28,17},{22,28,17},{31,20,200},{30,26,200},{22,28,17},{0,27,218},{30,26,200},{0,27,218},{23,0,208},{23,0,208},{23,0,208},{23,0,208},{22,28,16},{22,28,16},{22,28,16},{22,25,16},{19,27,10},{19,27,10},{25,31,884},{24,31,303},{24,30,315},{24,29,238},{24,31,1025},{22,31,234},{23,29,33},
+{22,28,305},{19,31,747},{19,28,225},{26,31,353},{25,31,27},{25,30,33},{25,29,42},{31,20,728},{22,31,218},{23,29,17},{19,28,209},{30,26,728},{19,28,209},{24,31,254},{24,31,254},{24,31,254},{24,29,222},{23,31,196},{23,29,17},{23,29,17},{22,27,26},{19,30,171},{20,27,6},{25,31,2},{25,31,2},{25,31,2},{25,28,2},{31,16,162},{23,29,1},{23,29,1},{21,27,1},{30,24,162},
+{21,27,1},{31,23,200},{26,31,20},{26,29,17},{23,29,17},{31,23,200},{31,27,200},{23,29,17},{0,28,208},{31,27,200},{0,28,208},{24,0,218},{24,0,218},{24,0,218},{24,0,218},{23,29,16},{23,29,16},{23,29,16},{23,26,16},{20,27,5},{20,27,5},{26,31,1034},{25,31,468},{25,31,315},{25,30,238},{25,31,1172},{24,31,291},{23,31,40},{23,29,305},{21,31,837},{20,29,225},{27,31,409},
+{26,31,117},{26,31,33},{26,30,42},{28,28,724},{24,31,266},{24,30,18},{20,29,209},{28,28,724},{20,29,209},{25,31,299},{25,31,299},{25,31,299},{25,30,222},{24,31,237},{23,31,24},{23,31,24},{23,28,32},{21,30,178},{21,28,5},{26,31,17},{26,31,17},{26,31,17},{26,29,2},{31,19,162},{24,30,2},{24,30,2},{22,28,1},{31,25,162},{22,28,1},{31,26,200},{27,31,53},{27,30,17},
+{24,30,17},{31,26,200},{30,29,200},{24,30,17},{0,29,208},{30,29,200},{0,29,208},{25,0,218},{25,0,218},{25,0,218},{25,0,218},{23,31,20},{23,31,20},{23,31,20},{23,27,20},{21,28,5},{21,28,5},{27,31,933},{27,31,570},{26,31,377},{26,31,209},{27,31,1054},{25,31,309},{25,31,20},{24,30,193},{23,31,735},{22,30,123},{28,31,338},{28,31,146},{27,31,49},{27,31,16},{29,29,546},
+{26,31,222},{25,31,4},{21,30,113},{31,28,546},{21,30,113},{26,31,377},{26,31,377},{26,31,377},{26,31,209},{26,31,338},{25,31,20},{25,31,20},{24,29,26},{22,31,173},{23,29,10},{27,31,49},{27,31,49},{27,31,49},{27,30,4},{28,28,162},{25,31,4},{25,31,4},{23,29,1},{28,28,162},{23,29,1},{31,28,113},{29,31,52},{28,31,1},{26,31,1},{31,28,113},{30,30,113},{26,31,1},
+{0,30,113},{30,30,113},{0,30,113},{26,0,208},{26,0,208},{26,0,208},{26,0,208},{25,31,16},{25,31,16},{25,31,16},{25,28,16},{23,29,9},{23,29,9},{28,31,779},{27,31,554},{27,31,433},{27,31,224},{28,31,859},{26,31,270},{26,31,74},{25,30,90},{24,31,590},{23,31,59},{29,31,218},{29,31,133},{28,31,82},{28,31,2},{31,27,333},{27,31,146},{27,31,25},{22,31,49},{31,29,333},
+{22,31,49},{27,31,433},{27,31,433},{27,31,433},{27,31,224},{27,31,378},{26,31,74},{26,31,74},{25,30,26},{24,31,229},{23,30,14},{28,31,82},{28,31,82},{28,31,82},{28,31,2},{29,29,162},{27,31,25},{27,31,25},{24,30,1},{31,28,162},{24,30,1},{31,30,25},{30,31,10},{30,31,1},{28,31,1},{31,30,25},{30,31,25},{28,31,1},{0,31,49},{30,31,25},{0,31,49},{27,0,208},
+{27,0,208},{27,0,208},{27,0,208},{26,31,25},{26,31,25},{26,31,25},{26,29,16},{23,31,10},{23,31,10},{29,31,684},{28,31,538},{28,31,474},{28,31,282},{28,31,682},{27,31,283},{27,31,162},{26,31,25},{26,31,482},{24,31,5},{30,31,153},{30,31,126},{30,31,117},{29,31,37},{31,29,193},{29,31,108},{28,31,65},{25,31,0},{31,30,193},{25,31,0},{28,31,474},{28,31,474},{28,31,474},
+{28,31,282},{28,31,426},{27,31,162},{27,31,162},{26,31,25},{26,31,286},{24,31,5},{30,31,117},{30,31,117},{30,31,117},{29,31,37},{31,28,145},{28,31,65},{28,31,65},{25,31,0},{30,30,145},{25,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{28,0,218},{28,0,218},{28,0,218},{28,0,218},{27,31,41},
+{27,31,41},{27,31,41},{27,30,16},{24,31,5},{24,31,5},{29,31,460},{29,31,375},{29,31,339},{29,31,254},{29,31,415},{28,31,202},{28,31,138},{27,31,20},{27,31,295},{26,31,26},{30,31,73},{30,31,46},{30,31,37},{30,31,10},{31,30,54},{30,31,27},{30,31,18},{28,31,1},{30,31,54},{28,31,1},{29,31,339},{29,31,339},{29,31,339},{29,31,254},{29,31,294},{28,31,138},{28,31,138},
+{27,31,20},{27,31,174},{26,31,26},{30,31,37},{30,31,37},{30,31,37},{30,31,10},{31,29,45},{30,31,18},{30,31,18},{28,31,1},{31,30,45},{28,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{29,0,218},{29,0,218},{29,0,218},{29,0,218},{28,31,74},{28,31,74},{28,31,74},{27,31,20},{26,31,26},
+{26,31,26},{0,10,421},{0,7,52},{0,5,1},{0,4,162},{0,7,926},{0,5,590},{0,4,283},{0,3,701},{0,3,1005},{0,3,765},{0,10,421},{0,7,52},{0,5,1},{0,4,162},{3,1,925},{0,5,590},{0,4,283},{0,3,701},{3,2,925},{0,3,701},{0,5,0},{0,5,0},{0,5,0},{0,2,9},{0,2,89},{0,2,34},{0,2,34},{0,1,50},{0,1,93},{0,1,54},{0,5,0},
+{0,5,0},{0,5,0},{0,2,9},{1,0,89},{0,2,34},{0,2,34},{0,1,50},{2,0,89},{0,1,50},{5,0,421},{0,7,52},{0,5,1},{0,4,162},{5,0,421},{10,0,421},{0,4,162},{0,3,445},{10,0,421},{0,3,445},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,13,421},{0,9,10},{0,6,26},
+{0,5,117},{0,9,1261},{0,6,701},{0,5,286},{0,4,917},{0,4,1390},{0,3,1005},{0,13,421},{0,9,10},{0,6,26},{0,5,117},{4,1,1261},{0,6,701},{0,5,286},{0,4,917},{9,0,1261},{0,4,917},{0,8,1},{0,8,1},{0,8,1},{0,4,1},{0,4,221},{0,3,89},{0,3,89},{0,2,125},{0,2,246},{0,2,150},{0,8,1},{0,8,1},{0,8,1},{0,4,1},{2,0,221},
+{0,3,89},{0,3,89},{0,2,125},{4,0,221},{0,2,125},{6,1,421},{0,9,10},{1,6,1},{0,5,117},{6,1,421},{13,0,421},{0,5,117},{0,4,433},{13,0,421},{0,4,433},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,16,430},{0,11,14},{0,7,110},{0,7,91},{0,11,1514},{0,7,737},{0,6,259},
+{0,4,1002},{0,5,1710},{0,4,1123},{0,16,430},{0,11,14},{1,7,51},{0,7,91},{5,1,1514},{0,7,737},{0,6,259},{0,4,1002},{11,0,1514},{0,4,1002},{0,11,10},{0,11,10},{0,11,10},{0,5,13},{0,6,338},{0,5,104},{0,5,104},{0,3,194},{0,3,402},{0,2,243},{0,11,10},{0,11,10},{0,11,10},{0,5,13},{3,0,338},{0,5,104},{0,5,104},{0,3,194},{6,0,338},
+{0,3,194},{8,0,421},{0,11,5},{2,7,1},{0,7,82},{8,0,421},{16,0,421},{0,7,82},{0,5,433},{16,0,421},{0,5,433},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,1,1},{0,1,1},{0,1,1},{0,1,4},{0,1,8},{0,1,8},{1,17,494},{1,12,78},{1,8,173},{1,8,154},{0,14,1514},{0,9,602},{0,7,146},{0,6,874},{0,7,1875},{0,5,1066},{1,17,430},
+{1,12,14},{2,8,50},{1,8,90},{7,0,1514},{0,9,602},{0,7,146},{0,6,874},{14,0,1514},{0,6,874},{1,12,74},{1,12,74},{1,12,74},{1,6,77},{0,9,338},{0,6,50},{0,6,50},{0,4,137},{0,4,467},{0,3,258},{1,12,10},{1,12,10},{1,12,10},{1,6,13},{4,1,338},{0,6,50},{0,6,50},{0,4,137},{9,0,338},{0,4,137},{9,1,421},{1,12,5},{3,8,1},
+{0,8,49},{9,1,421},{19,0,421},{0,8,49},{0,6,433},{19,0,421},{0,6,433},{1,0,73},{1,0,73},{1,0,73},{1,0,73},{0,4,0},{0,4,0},{0,4,0},{0,2,1},{0,2,26},{0,2,26},{1,20,629},{1,14,233},{2,10,385},{1,9,245},{0,17,1517},{0,11,521},{0,9,26},{0,7,769},{0,8,2025},{0,6,1085},{3,16,437},{2,13,17},{3,9,53},{2,9,81},{8,1,1517},
+{0,11,521},{0,9,26},{0,7,769},{17,0,1517},{0,7,769},{1,15,208},{1,15,208},{1,15,208},{1,8,208},{0,12,338},{0,8,8},{0,8,8},{0,5,74},{0,5,579},{0,5,243},{3,11,16},{3,11,16},{3,11,16},{3,7,16},{4,4,338},{0,8,8},{0,8,8},{0,5,74},{12,0,338},{0,5,74},{11,0,421},{2,13,1},{4,9,1},{0,9,10},{11,0,421},{22,0,421},{0,9,10},
+{0,7,445},{22,0,421},{0,7,445},{1,0,208},{1,0,208},{1,0,208},{1,0,208},{0,7,1},{0,7,1},{0,7,1},{0,4,4},{0,3,80},{0,3,80},{2,21,821},{2,15,425},{2,11,645},{2,10,437},{0,20,1514},{0,12,458},{0,10,2},{0,8,689},{0,9,2198},{0,7,1146},{3,19,437},{3,14,17},{4,10,51},{3,10,81},{8,4,1514},{0,12,458},{0,10,2},{0,8,689},{20,0,1514},
+{0,8,689},{2,16,400},{2,16,400},{2,16,400},{2,9,400},{0,15,338},{0,10,1},{0,10,1},{0,6,41},{0,7,717},{0,6,297},{3,14,16},{3,14,16},{3,14,16},{3,8,20},{5,5,338},{0,10,1},{0,10,1},{0,6,41},{15,0,338},{0,6,41},{12,1,421},{3,14,1},{5,10,1},{0,10,1},{12,1,421},{25,0,421},{0,10,1},{0,8,433},{25,0,421},{0,8,433},{2,0,400},
+{2,0,400},{2,0,400},{2,0,400},{0,10,1},{0,10,1},{0,10,1},{0,5,1},{0,4,157},{0,4,157},{3,22,854},{3,15,459},{3,12,666},{3,11,470},{1,21,1515},{1,13,459},{1,11,3},{0,9,651},{0,11,2070},{0,9,891},{4,20,430},{4,15,14},{5,11,51},{4,11,91},{9,5,1514},{0,14,425},{1,11,2},{0,9,602},{23,0,1514},{0,9,602},{3,17,433},{3,17,433},{3,17,433},
+{3,10,433},{1,16,339},{1,11,2},{1,11,2},{1,7,42},{0,8,613},{0,7,173},{4,15,10},{4,15,10},{4,15,10},{4,9,13},{9,0,338},{1,11,1},{1,11,1},{0,7,29},{18,0,338},{0,7,29},{14,0,421},{4,15,5},{6,11,1},{1,11,1},{14,0,421},{28,0,421},{1,11,1},{0,9,433},{28,0,421},{0,9,433},{3,0,433},{3,0,433},{3,0,433},{3,0,433},{1,11,2},
+{1,11,2},{1,11,2},{1,6,2},{0,6,97},{0,6,97},{4,23,866},{4,16,461},{4,12,670},{4,12,494},{2,22,1515},{2,14,459},{2,12,3},{1,10,651},{0,12,1913},{0,10,677},{5,21,430},{5,16,14},{6,12,50},{5,12,90},{13,0,1514},{1,15,425},{2,12,2},{0,10,533},{26,0,1514},{0,10,533},{4,18,446},{4,18,446},{4,18,446},{4,11,446},{2,17,339},{2,12,3},{2,12,3},
+{2,8,42},{0,10,500},{0,8,65},{5,16,10},{5,16,10},{5,16,10},{5,10,13},{10,1,338},{2,12,2},{2,12,2},{0,8,16},{21,0,338},{0,8,16},{15,1,421},{5,16,5},{7,12,1},{2,12,1},{15,1,421},{31,0,421},{2,12,1},{0,10,433},{31,0,421},{0,10,433},{4,0,445},{4,0,445},{4,0,445},{4,0,445},{2,12,2},{2,12,2},{2,12,2},{2,7,2},{0,8,49},
+{0,8,49},{5,24,854},{5,18,458},{5,14,678},{5,13,470},{3,23,1517},{3,15,461},{3,13,5},{2,11,653},{0,14,1758},{0,11,530},{7,20,437},{6,17,17},{7,13,53},{6,13,81},{14,1,1517},{2,16,422},{3,13,5},{0,11,494},{29,0,1517},{0,11,494},{5,19,433},{5,19,433},{5,19,433},{5,12,433},{3,18,340},{3,13,4},{3,13,4},{3,9,41},{0,12,419},{0,10,19},{7,15,16},
+{7,15,16},{7,15,16},{7,11,16},{12,0,338},{3,13,4},{3,13,4},{0,10,10},{24,0,338},{0,10,10},{17,0,421},{6,17,1},{8,13,1},{3,13,1},{17,0,421},{30,2,421},{3,13,1},{0,11,445},{30,2,421},{0,11,445},{5,0,433},{5,0,433},{5,0,433},{5,0,433},{3,14,1},{3,14,1},{3,14,1},{3,8,1},{0,10,10},{0,10,10},{6,25,854},{6,19,458},{6,15,678},
+{6,14,470},{4,24,1515},{4,16,459},{4,14,3},{3,12,666},{0,16,1658},{0,12,459},{7,23,437},{7,18,17},{8,14,51},{7,14,81},{16,0,1514},{3,17,422},{4,14,2},{0,12,458},{24,4,1514},{0,12,458},{6,20,433},{6,20,433},{6,20,433},{6,13,433},{4,19,339},{4,14,2},{4,14,2},{4,10,42},{0,13,365},{0,11,14},{7,18,16},{7,18,16},{7,18,16},{7,12,20},{13,1,338},
+{4,14,1},{4,14,1},{0,11,10},{27,0,338},{0,11,10},{18,1,421},{7,18,1},{9,14,1},{4,14,1},{18,1,421},{31,3,421},{4,14,1},{0,12,433},{31,3,421},{0,12,433},{6,0,433},{6,0,433},{6,0,433},{6,0,433},{4,14,2},{4,14,2},{4,14,2},{4,9,2},{0,11,5},{0,11,5},{7,26,854},{7,19,459},{7,16,666},{7,15,470},{5,25,1515},{5,17,459},{5,15,3},
+{4,13,651},{0,17,1577},{1,13,459},{8,24,430},{8,19,14},{9,15,51},{8,15,91},{17,1,1514},{4,18,425},{5,15,2},{0,13,437},{27,4,1514},{0,13,437},{7,21,433},{7,21,433},{7,21,433},{7,14,433},{5,20,339},{5,15,2},{5,15,2},{5,11,42},{0,15,339},{1,12,21},{8,19,10},{8,19,10},{8,19,10},{8,13,13},{15,0,338},{5,15,1},{5,15,1},{1,12,17},{30,0,338},
+{1,12,17},{20,0,421},{8,19,5},{10,15,1},{5,15,1},{20,0,421},{30,5,421},{5,15,1},{0,13,433},{30,5,421},{0,13,433},{7,0,433},{7,0,433},{7,0,433},{7,0,433},{5,15,2},{5,15,2},{5,15,2},{5,10,2},{1,12,5},{1,12,5},{8,27,866},{8,20,461},{8,16,670},{8,16,494},{6,26,1515},{6,18,459},{6,16,3},{5,14,651},{0,19,1530},{2,14,459},{9,25,430},
+{9,20,14},{10,16,50},{9,16,90},{19,0,1514},{5,19,425},{6,16,2},{0,14,434},{30,4,1514},{0,14,434},{8,22,446},{8,22,446},{8,22,446},{8,15,446},{6,21,339},{6,16,3},{6,16,3},{6,12,42},{1,16,339},{2,13,21},{9,20,10},{9,20,10},{9,20,10},{9,14,13},{16,1,338},{6,16,2},{6,16,2},{4,12,16},{31,1,338},{4,12,16},{21,1,421},{9,20,5},{11,16,1},
+{6,16,1},{21,1,421},{31,6,421},{6,16,1},{0,14,433},{31,6,421},{0,14,433},{8,0,445},{8,0,445},{8,0,445},{8,0,445},{6,16,2},{6,16,2},{6,16,2},{6,11,2},{2,13,5},{2,13,5},{9,28,854},{9,22,458},{9,18,678},{9,17,470},{7,27,1517},{7,19,461},{7,17,5},{6,15,653},{1,20,1526},{3,15,461},{11,24,437},{10,21,17},{11,17,53},{10,17,81},{20,1,1517},
+{6,20,422},{7,17,5},{2,15,446},{31,5,1517},{2,15,446},{9,23,433},{9,23,433},{9,23,433},{9,16,433},{7,22,340},{7,17,4},{7,17,4},{7,13,41},{2,17,340},{4,14,19},{11,19,16},{11,19,16},{11,19,16},{11,15,16},{16,4,338},{7,17,4},{7,17,4},{4,14,10},{28,4,338},{4,14,10},{23,0,421},{10,21,1},{12,17,1},{7,17,1},{23,0,421},{30,8,421},{7,17,1},
+{0,15,445},{30,8,421},{0,15,445},{9,0,433},{9,0,433},{9,0,433},{9,0,433},{7,18,1},{7,18,1},{7,18,1},{7,12,1},{4,14,10},{4,14,10},{10,29,854},{10,23,458},{10,19,678},{10,18,470},{8,28,1515},{8,20,459},{8,18,3},{7,16,666},{2,21,1526},{4,16,459},{11,27,437},{11,22,17},{12,18,51},{11,18,81},{20,4,1514},{7,21,422},{8,18,2},{2,16,434},{28,8,1514},
+{2,16,434},{10,24,433},{10,24,433},{10,24,433},{10,17,433},{8,23,339},{8,18,2},{8,18,2},{8,14,42},{3,18,340},{4,15,14},{11,22,16},{11,22,16},{11,22,16},{11,16,20},{17,5,338},{8,18,1},{8,18,1},{4,15,10},{31,4,338},{4,15,10},{24,1,421},{11,22,1},{13,18,1},{8,18,1},{24,1,421},{31,9,421},{8,18,1},{0,16,433},{31,9,421},{0,16,433},{10,0,433},
+{10,0,433},{10,0,433},{10,0,433},{8,18,2},{8,18,2},{8,18,2},{8,13,2},{4,15,5},{4,15,5},{11,30,854},{11,23,459},{11,20,666},{11,19,470},{9,29,1515},{9,21,459},{9,19,3},{8,17,651},{3,22,1526},{5,17,459},{12,28,430},{12,23,14},{13,19,51},{12,19,91},{21,5,1514},{8,22,425},{9,19,2},{3,17,434},{31,8,1514},{3,17,434},{11,25,433},{11,25,433},{11,25,433},
+{11,18,433},{9,24,339},{9,19,2},{9,19,2},{9,15,42},{4,19,339},{5,16,21},{12,23,10},{12,23,10},{12,23,10},{12,17,13},{21,0,338},{9,19,1},{9,19,1},{5,16,17},{30,6,338},{5,16,17},{26,0,421},{12,23,5},{14,19,1},{9,19,1},{26,0,421},{30,11,421},{9,19,1},{0,17,433},{30,11,421},{0,17,433},{11,0,433},{11,0,433},{11,0,433},{11,0,433},{9,19,2},
+{9,19,2},{9,19,2},{9,14,2},{5,16,5},{5,16,5},{12,31,866},{12,24,461},{12,20,670},{12,20,494},{10,30,1515},{10,22,459},{10,20,3},{9,18,651},{4,23,1530},{6,18,459},{13,29,430},{13,24,14},{14,20,50},{13,20,90},{25,0,1514},{9,23,425},{10,20,2},{4,18,434},{30,10,1514},{4,18,434},{12,26,446},{12,26,446},{12,26,446},{12,19,446},{10,25,339},{10,20,3},{10,20,3},
+{10,16,42},{5,20,339},{6,17,21},{13,24,10},{13,24,10},{13,24,10},{13,18,13},{22,1,338},{10,20,2},{10,20,2},{8,16,16},{31,7,338},{8,16,16},{27,1,421},{13,24,5},{15,20,1},{10,20,1},{27,1,421},{31,12,421},{10,20,1},{0,18,433},{31,12,421},{0,18,433},{12,0,445},{12,0,445},{12,0,445},{12,0,445},{10,20,2},{10,20,2},{10,20,2},{10,15,2},{6,17,5},
+{6,17,5},{13,31,878},{13,26,458},{13,22,678},{13,21,470},{11,31,1517},{11,23,461},{11,21,5},{10,19,653},{5,24,1526},{7,19,461},{15,28,437},{14,25,17},{15,21,53},{14,21,81},{26,1,1517},{10,24,422},{11,21,5},{6,19,446},{31,11,1517},{6,19,446},{13,27,433},{13,27,433},{13,27,433},{13,20,433},{11,26,340},{11,21,4},{11,21,4},{11,17,41},{6,21,340},{8,18,19},{15,23,16},
+{15,23,16},{15,23,16},{15,19,16},{24,0,338},{11,21,4},{11,21,4},{8,18,10},{24,12,338},{8,18,10},{29,0,421},{14,25,1},{16,21,1},{11,21,1},{29,0,421},{30,14,421},{11,21,1},{0,19,445},{30,14,421},{0,19,445},{13,0,433},{13,0,433},{13,0,433},{13,0,433},{11,22,1},{11,22,1},{11,22,1},{11,16,1},{8,18,10},{8,18,10},{14,31,938},{14,27,458},{14,23,678},
+{14,22,470},{12,31,1542},{12,24,459},{12,22,3},{11,20,666},{6,25,1526},{8,20,459},{15,31,437},{15,26,17},{16,22,51},{15,22,81},{28,0,1514},{11,25,422},{12,22,2},{6,20,434},{24,16,1514},{6,20,434},{14,28,433},{14,28,433},{14,28,433},{14,21,433},{12,27,339},{12,22,2},{12,22,2},{12,18,42},{7,22,340},{8,19,14},{15,26,16},{15,26,16},{15,26,16},{15,20,20},{25,1,338},
+{12,22,1},{12,22,1},{8,19,10},{27,12,338},{8,19,10},{30,1,421},{15,26,1},{17,22,1},{12,22,1},{30,1,421},{31,15,421},{12,22,1},{0,20,433},{31,15,421},{0,20,433},{14,0,433},{14,0,433},{14,0,433},{14,0,433},{12,22,2},{12,22,2},{12,22,2},{12,17,2},{8,19,5},{8,19,5},{15,31,998},{15,27,459},{15,24,666},{15,23,470},{14,31,1598},{13,25,459},{13,23,3},
+{12,21,651},{7,26,1526},{9,21,459},{16,31,442},{16,27,14},{17,23,51},{16,23,91},{29,1,1514},{12,26,425},{13,23,2},{7,21,434},{27,16,1514},{7,21,434},{15,29,433},{15,29,433},{15,29,433},{15,22,433},{13,28,339},{13,23,2},{13,23,2},{13,19,42},{8,23,339},{9,20,21},{16,27,10},{16,27,10},{16,27,10},{16,21,13},{27,0,338},{13,23,1},{13,23,1},{9,20,17},{30,12,338},
+{9,20,17},{31,2,421},{16,27,5},{18,23,1},{13,23,1},{31,2,421},{30,17,421},{13,23,1},{0,21,433},{30,17,421},{0,21,433},{15,0,433},{15,0,433},{15,0,433},{15,0,433},{13,23,2},{13,23,2},{13,23,2},{13,18,2},{9,20,5},{9,20,5},{16,31,1086},{16,28,461},{16,24,670},{16,24,494},{15,31,1622},{14,26,459},{14,24,3},{13,22,651},{8,27,1530},{10,22,459},{18,31,446},
+{17,28,14},{18,24,50},{17,24,90},{31,0,1514},{13,27,425},{14,24,2},{8,22,434},{30,16,1514},{8,22,434},{16,30,446},{16,30,446},{16,30,446},{16,23,446},{14,29,339},{14,24,3},{14,24,3},{14,20,42},{9,24,339},{10,21,21},{17,28,10},{17,28,10},{17,28,10},{17,22,13},{28,1,338},{14,24,2},{14,24,2},{12,20,16},{31,13,338},{12,20,16},{31,5,421},{17,28,5},{19,24,1},
+{14,24,1},{31,5,421},{31,18,421},{14,24,1},{0,22,433},{31,18,421},{0,22,433},{16,0,445},{16,0,445},{16,0,445},{16,0,445},{14,24,2},{14,24,2},{14,24,2},{14,19,2},{10,21,5},{10,21,5},{18,31,1242},{17,30,458},{17,26,678},{17,25,470},{16,31,1703},{15,27,461},{15,25,5},{14,23,653},{9,28,1526},{11,23,461},{19,31,461},{18,29,17},{19,25,53},{18,25,81},{31,3,1517},
+{14,28,422},{15,25,5},{10,23,446},{31,17,1517},{10,23,446},{17,31,433},{17,31,433},{17,31,433},{17,24,433},{15,30,340},{15,25,4},{15,25,4},{15,21,41},{10,25,340},{12,22,19},{19,27,16},{19,27,16},{19,27,16},{19,23,16},{28,4,338},{15,25,4},{15,25,4},{12,22,10},{28,16,338},{12,22,10},{31,8,421},{18,29,1},{20,25,1},{15,25,1},{31,8,421},{30,20,421},{15,25,1},
+{0,23,445},{30,20,421},{0,23,445},{17,0,433},{17,0,433},{17,0,433},{17,0,433},{15,26,1},{15,26,1},{15,26,1},{15,20,1},{12,22,10},{12,22,10},{19,31,1326},{18,31,458},{18,27,678},{18,26,470},{17,31,1838},{16,28,459},{16,26,3},{15,24,666},{10,29,1526},{12,24,459},{20,31,506},{19,30,17},{20,26,51},{19,26,81},{28,12,1514},{15,29,422},{16,26,2},{10,24,434},{28,20,1514},
+{10,24,434},{18,31,442},{18,31,442},{18,31,442},{18,25,433},{16,31,339},{16,26,2},{16,26,2},{16,22,42},{11,26,340},{12,23,14},{19,30,16},{19,30,16},{19,30,16},{19,24,20},{29,5,338},{16,26,1},{16,26,1},{12,23,10},{31,16,338},{12,23,10},{31,11,421},{19,30,1},{21,26,1},{16,26,1},{31,11,421},{31,21,421},{16,26,1},{0,24,433},{31,21,421},{0,24,433},{18,0,433},
+{18,0,433},{18,0,433},{18,0,433},{16,26,2},{16,26,2},{16,26,2},{16,21,2},{12,23,5},{12,23,5},{20,31,1470},{19,31,459},{19,28,666},{19,27,470},{19,31,1911},{17,29,459},{17,27,3},{16,25,651},{11,30,1526},{13,25,459},{21,31,590},{20,31,14},{21,27,51},{20,27,91},{29,13,1514},{16,30,425},{17,27,2},{11,25,434},{31,20,1514},{11,25,434},{19,31,458},{19,31,458},{19,31,458},
+{19,26,433},{17,31,357},{17,27,2},{17,27,2},{17,23,42},{12,27,339},{13,24,21},{20,31,10},{20,31,10},{20,31,10},{20,25,13},{31,4,338},{17,27,1},{17,27,1},{13,24,17},{30,18,338},{13,24,17},{31,14,421},{20,31,5},{22,27,1},{17,27,1},{31,14,421},{30,23,421},{17,27,1},{0,25,433},{30,23,421},{0,25,433},{19,0,433},{19,0,433},{19,0,433},{19,0,433},{17,27,2},
+{17,27,2},{17,27,2},{17,22,2},{13,24,5},{13,24,5},{21,31,1650},{20,31,530},{20,28,670},{20,28,494},{20,31,2030},{18,30,459},{18,28,3},{17,26,651},{12,31,1530},{14,26,459},{23,31,650},{21,31,29},{22,28,50},{21,28,90},{31,12,1514},{17,31,425},{18,28,2},{12,26,434},{30,22,1514},{12,26,434},{20,31,494},{20,31,494},{20,31,494},{20,27,446},{18,31,411},{18,28,3},{18,28,3},
+{18,24,42},{13,28,339},{14,25,21},{21,31,13},{21,31,13},{21,31,13},{21,26,13},{31,7,338},{18,28,2},{18,28,2},{16,24,16},{31,19,338},{16,24,16},{31,17,421},{22,31,10},{23,28,1},{18,28,1},{31,17,421},{31,24,421},{18,28,1},{0,26,433},{31,24,421},{0,26,433},{20,0,445},{20,0,445},{20,0,445},{20,0,445},{18,28,2},{18,28,2},{18,28,2},{18,23,2},{14,25,5},
+{14,25,5},{22,31,1902},{21,31,723},{21,30,678},{21,29,470},{21,31,2235},{19,31,461},{19,29,5},{18,27,653},{14,31,1587},{15,27,461},{24,31,734},{23,31,65},{23,29,53},{22,29,81},{31,15,1517},{19,31,461},{19,29,5},{14,27,446},{31,23,1517},{14,27,446},{21,31,554},{21,31,554},{21,31,554},{21,28,433},{20,31,437},{19,29,4},{19,29,4},{19,25,41},{14,29,340},{16,26,19},{23,31,16},
+{23,31,16},{23,31,16},{23,27,16},{28,16,338},{19,29,4},{19,29,4},{16,26,10},{24,24,338},{16,26,10},{31,20,421},{23,31,49},{24,29,1},{19,29,1},{31,20,421},{30,26,421},{19,29,1},{0,27,445},{30,26,421},{0,27,445},{21,0,433},{21,0,433},{21,0,433},{21,0,433},{19,30,1},{19,30,1},{19,30,1},{19,24,1},{16,26,10},{16,26,10},{23,31,2074},{23,31,930},{22,31,678},
+{22,30,470},{23,31,2382},{20,31,570},{20,30,3},{19,28,666},{16,31,1703},{16,28,459},{25,31,854},{24,31,173},{24,30,51},{23,30,81},{28,24,1514},{21,31,554},{20,30,2},{14,28,434},{24,28,1514},{14,28,434},{22,31,629},{22,31,629},{22,31,629},{22,29,433},{21,31,491},{20,30,2},{20,30,2},{20,26,42},{15,30,340},{16,27,14},{24,31,29},{24,31,29},{24,31,29},{23,28,20},{29,17,338},
+{20,30,1},{20,30,1},{16,27,10},{27,24,338},{16,27,10},{31,23,421},{25,31,109},{25,30,1},{20,30,1},{31,23,421},{31,27,421},{20,30,1},{0,28,433},{31,27,421},{0,28,433},{22,0,433},{22,0,433},{22,0,433},{22,0,433},{20,30,2},{20,30,2},{20,30,2},{20,25,2},{16,27,5},{16,27,5},{24,31,2229},{24,31,1146},{23,31,689},{23,31,469},{24,31,2476},{22,31,731},{21,31,2},
+{20,29,618},{18,31,1805},{17,29,426},{26,31,953},{25,31,339},{25,31,50},{24,30,82},{29,25,1459},{23,31,620},{21,31,1},{15,29,401},{27,28,1459},{15,29,401},{23,31,689},{23,31,689},{23,31,689},{23,30,433},{22,31,581},{21,31,2},{21,31,2},{21,27,42},{16,31,339},{17,28,21},{25,31,50},{25,31,50},{25,31,50},{24,29,13},{31,16,338},{21,31,1},{21,31,1},{17,28,17},{30,24,338},
+{17,28,17},{31,26,392},{27,31,157},{26,31,0},{21,31,0},{31,26,392},{30,29,392},{21,31,0},{0,29,400},{30,29,392},{0,29,400},{23,0,433},{23,0,433},{23,0,433},{23,0,433},{21,31,2},{21,31,2},{21,31,2},{21,26,2},{17,28,5},{17,28,5},{25,31,1943},{24,31,1130},{24,31,769},{24,31,446},{25,31,2103},{23,31,573},{22,31,42},{22,29,373},{20,31,1481},{18,30,234},{27,31,657},
+{26,31,285},{26,31,89},{25,31,18},{31,23,1064},{24,31,426},{23,31,8},{16,30,209},{31,27,1064},{16,30,209},{24,31,769},{24,31,769},{24,31,769},{24,31,446},{23,31,661},{22,31,42},{22,31,42},{22,28,42},{18,31,365},{18,29,21},{26,31,89},{26,31,89},{26,31,89},{25,30,13},{31,19,338},{23,31,8},{23,31,8},{20,28,16},{31,25,338},{20,28,16},{31,27,202},{28,31,80},{27,31,4},
+{24,31,1},{31,27,202},{31,29,202},{24,31,1},{0,30,208},{31,29,202},{0,30,208},{24,0,445},{24,0,445},{24,0,445},{24,0,445},{22,31,17},{22,31,17},{22,31,17},{22,27,2},{18,29,5},{18,29,5},{27,31,1710},{26,31,1126},{25,31,917},{25,31,469},{26,31,1779},{24,31,507},{24,31,146},{23,30,154},{22,31,1221},{20,30,83},{28,31,450},{27,31,258},{27,31,137},{27,31,16},{29,29,722},
+{26,31,286},{25,31,52},{20,30,74},{31,28,722},{20,30,74},{25,31,917},{25,31,917},{25,31,917},{25,31,469},{25,31,789},{24,31,146},{24,31,146},{23,29,41},{20,31,446},{20,30,19},{27,31,137},{27,31,137},{27,31,137},{27,31,16},{28,28,338},{25,31,52},{25,31,52},{20,30,10},{28,28,338},{20,30,10},{31,29,61},{30,31,34},{29,31,0},{27,31,0},{31,29,61},{31,30,61},{27,31,0},
+{0,30,73},{31,30,61},{0,30,73},{25,0,433},{25,0,433},{25,0,433},{25,0,433},{23,31,49},{23,31,49},{23,31,49},{23,28,1},{20,30,10},{20,30,10},{27,31,1486},{27,31,1123},{27,31,1002},{26,31,554},{27,31,1519},{25,31,582},{25,31,293},{24,30,106},{23,31,1090},{20,31,14},{29,31,354},{28,31,258},{28,31,194},{28,31,50},{31,27,509},{27,31,234},{27,31,113},{20,31,10},{31,29,509},
+{20,31,10},{27,31,1002},{27,31,1002},{27,31,1002},{26,31,554},{26,31,915},{25,31,293},{25,31,293},{24,30,42},{22,31,564},{20,31,14},{28,31,194},{28,31,194},{28,31,194},{28,31,50},{29,29,338},{27,31,113},{27,31,113},{20,31,10},{31,28,338},{20,31,10},{31,30,9},{31,31,9},{30,31,9},{30,31,0},{31,30,9},{30,31,9},{30,31,0},{0,31,9},{30,31,9},{0,31,9},{26,0,433},
+{26,0,433},{26,0,433},{26,0,433},{24,31,82},{24,31,82},{24,31,82},{24,29,2},{20,31,5},{20,31,5},{28,31,1197},{28,31,1005},{27,31,917},{27,31,554},{28,31,1213},{26,31,522},{26,31,326},{25,31,17},{24,31,870},{22,31,16},{30,31,241},{29,31,182},{29,31,146},{29,31,61},{31,28,297},{28,31,153},{28,31,89},{23,31,1},{30,30,297},{23,31,1},{27,31,917},{27,31,917},{27,31,917},
+{27,31,554},{27,31,774},{26,31,326},{26,31,326},{25,31,17},{24,31,509},{22,31,16},{29,31,146},{29,31,146},{29,31,146},{29,31,61},{31,27,221},{28,31,89},{28,31,89},{23,31,1},{31,29,221},{23,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{27,0,433},{27,0,433},{27,0,433},{27,0,433},{26,31,130},
+{26,31,130},{26,31,130},{25,30,2},{22,31,16},{22,31,16},{29,31,927},{28,31,765},{28,31,701},{28,31,509},{28,31,845},{27,31,404},{27,31,283},{26,31,2},{26,31,589},{24,31,52},{30,31,97},{30,31,70},{30,31,61},{30,31,34},{31,30,118},{30,31,67},{29,31,40},{26,31,1},{30,31,118},{26,31,1},{28,31,701},{28,31,701},{28,31,701},{28,31,509},{28,31,589},{27,31,283},{27,31,283},
+{26,31,2},{25,31,386},{24,31,52},{30,31,61},{30,31,61},{30,31,61},{30,31,34},{31,29,85},{29,31,40},{29,31,40},{26,31,1},{31,30,85},{26,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{28,0,445},{28,0,445},{28,0,445},{28,0,445},{27,31,162},{27,31,162},{27,31,162},{26,31,2},{24,31,52},
+{24,31,52},{0,14,884},{0,10,117},{0,7,10},{0,6,317},{0,10,1899},{0,6,1236},{0,6,573},{0,4,1438},{0,4,2065},{0,4,1559},{0,14,884},{0,10,117},{0,7,10},{0,6,317},{5,0,1899},{0,6,1236},{0,6,573},{0,4,1438},{10,0,1899},{0,4,1438},{0,7,1},{0,7,1},{0,7,1},{0,3,4},{0,3,164},{0,3,68},{0,3,68},{0,2,104},{0,2,189},{0,1,129},{0,7,1},
+{0,7,1},{0,7,1},{0,3,4},{2,0,164},{0,3,68},{0,3,68},{0,2,104},{3,0,164},{0,2,104},{7,0,884},{0,10,117},{0,7,10},{0,6,317},{7,0,884},{14,0,884},{0,6,317},{0,5,890},{14,0,884},{0,5,890},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,17,884},{0,12,53},{0,8,20},
+{0,7,265},{0,11,2360},{0,8,1384},{0,7,626},{0,5,1683},{0,5,2580},{0,5,1852},{0,17,884},{0,12,53},{0,8,20},{0,7,265},{6,0,2356},{0,8,1384},{0,7,626},{0,5,1683},{10,1,2356},{0,5,1683},{0,10,1},{0,10,1},{0,10,1},{0,5,1},{0,5,338},{0,4,137},{0,4,137},{0,2,200},{0,2,381},{0,2,225},{0,10,1},{0,10,1},{0,10,1},{0,5,1},{2,1,338},
+{0,4,137},{0,4,137},{0,2,200},{5,0,338},{0,2,200},{8,1,884},{0,12,53},{1,8,5},{0,7,265},{8,1,884},{17,0,884},{0,7,265},{0,6,890},{17,0,884},{0,6,890},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,20,882},{0,14,16},{0,10,85},{0,8,200},{0,14,2904},{0,9,1530},{0,8,684},
+{0,6,1978},{0,6,3220},{0,5,2172},{0,20,882},{0,14,16},{1,9,84},{0,8,200},{7,0,2904},{0,9,1530},{0,8,684},{0,6,1978},{14,0,2904},{0,6,1978},{0,12,1},{0,12,1},{0,12,1},{0,6,4},{0,6,580},{0,5,218},{0,5,218},{0,3,356},{0,3,644},{0,3,420},{0,12,1},{0,12,1},{0,12,1},{0,6,4},{3,0,580},{0,5,218},{0,5,218},{0,3,356},{6,0,580},
+{0,3,356},{8,4,882},{0,14,16},{2,9,5},{0,8,200},{8,4,882},{20,0,882},{0,8,200},{0,7,890},{20,0,882},{0,7,890},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,23,918},{0,15,41},{1,11,153},{0,10,184},{0,16,3048},{0,11,1476},{0,9,473},{0,6,1950},{0,7,3517},{0,6,2206},{1,21,886},
+{1,15,20},{1,11,89},{0,10,184},{8,0,3048},{0,11,1476},{0,9,473},{0,6,1950},{16,0,3048},{0,6,1950},{0,15,37},{0,15,37},{0,15,37},{0,8,40},{0,9,648},{0,7,185},{0,7,185},{0,4,337},{0,4,777},{0,4,458},{1,13,5},{1,13,5},{1,13,5},{1,7,8},{4,1,648},{0,7,185},{0,7,185},{0,4,337},{9,0,648},{0,4,337},{9,5,882},{0,15,5},{3,10,5},
+{0,10,148},{9,5,882},{23,0,882},{0,10,148},{0,8,900},{23,0,882},{0,8,900},{0,0,36},{0,0,36},{0,0,36},{0,0,36},{0,2,1},{0,2,1},{0,2,1},{0,1,1},{0,1,5},{0,1,5},{1,24,997},{1,17,123},{1,12,318},{1,11,243},{0,19,3051},{0,12,1278},{0,10,274},{0,8,1795},{0,9,3672},{0,7,2222},{2,22,885},{2,16,10},{2,12,101},{1,11,179},{10,0,3051},
+{0,12,1278},{0,10,274},{0,8,1795},{19,0,3051},{0,8,1795},{1,17,114},{1,17,114},{1,17,114},{1,9,114},{0,12,648},{0,9,85},{0,9,85},{0,5,244},{0,5,889},{0,5,413},{2,15,1},{2,15,1},{2,15,1},{2,8,5},{4,4,648},{0,9,85},{0,9,85},{0,5,244},{12,0,648},{0,5,244},{13,0,884},{2,16,9},{4,11,10},{0,11,90},{13,0,884},{26,0,884},{0,11,90},
+{0,9,890},{26,0,884},{0,9,890},{1,0,113},{1,0,113},{1,0,113},{1,0,113},{0,6,1},{0,6,1},{0,6,1},{0,3,1},{0,2,50},{0,2,50},{1,27,1173},{1,18,314},{2,13,510},{1,12,374},{0,22,3051},{0,14,1139},{0,12,153},{0,9,1630},{0,10,3924},{0,8,2199},{3,23,885},{3,17,10},{3,13,101},{2,12,197},{11,0,3051},{0,14,1139},{0,12,153},{0,9,1630},{22,0,3051},
+{0,9,1630},{1,20,290},{1,20,290},{1,20,290},{1,10,293},{0,15,648},{0,10,41},{0,10,41},{0,6,181},{0,7,1027},{0,6,437},{3,16,2},{3,16,2},{3,16,2},{3,9,5},{5,5,648},{0,10,41},{0,10,41},{0,6,181},{15,0,648},{0,6,181},{14,1,884},{3,17,9},{5,12,5},{0,12,53},{14,1,884},{29,0,884},{0,12,53},{0,10,890},{29,0,884},{0,10,890},{1,0,289},
+{1,0,289},{1,0,289},{1,0,289},{0,8,1},{0,8,1},{0,8,1},{0,4,1},{0,3,113},{0,3,113},{2,28,1365},{2,19,506},{3,14,830},{2,13,566},{0,25,3051},{0,16,1051},{0,13,36},{0,10,1483},{0,12,4164},{0,9,2174},{4,24,886},{4,18,20},{4,14,89},{3,13,197},{12,1,3051},{0,16,1051},{0,13,36},{0,10,1483},{25,0,3051},{0,10,1483},{2,21,482},{2,21,482},{2,21,482},
+{2,11,485},{0,18,648},{0,12,5},{0,12,5},{0,7,149},{0,8,1182},{0,7,510},{4,16,5},{4,16,5},{4,16,5},{4,10,8},{9,0,648},{0,12,5},{0,12,5},{0,7,149},{18,0,648},{0,7,149},{16,0,882},{3,19,10},{6,13,5},{0,13,20},{16,0,882},{24,4,882},{0,13,20},{0,11,890},{24,4,882},{0,11,890},{2,0,481},{2,0,481},{2,0,481},{2,0,481},{0,11,1},
+{0,11,1},{0,11,1},{0,6,1},{0,5,185},{0,5,185},{2,31,1669},{3,20,818},{3,15,1161},{2,14,838},{0,28,3048},{0,17,949},{0,14,6},{0,11,1395},{0,13,4381},{0,10,2228},{5,25,886},{5,19,20},{5,15,89},{4,14,184},{12,4,3048},{0,17,949},{0,14,6},{0,11,1395},{28,0,3048},{0,11,1395},{2,24,786},{2,24,786},{2,24,786},{2,13,786},{0,21,648},{0,14,2},{0,14,2},
+{0,9,101},{0,9,1352},{0,8,590},{5,17,5},{5,17,5},{5,17,5},{5,11,8},{10,1,648},{0,14,2},{0,14,2},{0,9,101},{21,0,648},{0,9,101},{17,1,882},{4,19,5},{7,14,5},{0,14,5},{17,1,882},{27,4,882},{0,14,5},{0,12,900},{27,4,882},{0,12,900},{2,0,785},{2,0,785},{2,0,785},{2,0,785},{0,14,1},{0,14,1},{0,14,1},{0,7,1},{0,6,305},
+{0,6,305},{3,31,1814},{3,22,968},{4,16,1314},{3,15,945},{1,29,3055},{0,19,936},{1,15,17},{0,12,1314},{0,15,4321},{0,12,2007},{6,26,885},{6,20,10},{6,16,101},{5,15,179},{15,2,3051},{0,19,900},{1,15,13},{0,12,1278},{31,0,3051},{0,12,1278},{3,25,900},{3,25,900},{3,25,900},{3,14,900},{1,22,654},{1,15,8},{1,15,8},{0,10,76},{0,11,1296},{0,9,425},{6,19,1},
+{6,19,1},{6,19,1},{6,12,5},{12,0,648},{1,15,4},{1,15,4},{0,10,40},{24,0,648},{0,10,40},{19,0,884},{6,20,9},{8,15,10},{1,15,9},{19,0,884},{30,4,884},{1,15,9},{0,13,890},{30,4,884},{0,13,890},{3,0,900},{3,0,900},{3,0,900},{3,0,900},{1,16,5},{1,16,5},{1,16,5},{1,8,8},{0,7,269},{0,7,269},{5,31,1838},{4,23,948},{5,17,1314},
+{4,16,943},{2,30,3055},{1,20,936},{2,16,13},{1,13,1314},{0,16,4056},{0,13,1620},{7,27,885},{7,21,10},{7,17,101},{6,16,197},{17,0,3051},{0,21,891},{2,16,9},{0,13,1179},{30,2,3051},{0,13,1179},{4,26,891},{4,26,891},{4,26,891},{4,15,891},{2,23,654},{2,16,9},{2,16,9},{1,11,76},{0,13,1107},{0,10,273},{7,20,2},{7,20,2},{7,20,2},{7,13,5},{13,1,648},
+{3,15,5},{3,15,5},{0,11,20},{27,0,648},{0,11,20},{20,1,884},{7,21,9},{9,16,5},{2,16,5},{20,1,884},{31,5,884},{2,16,5},{0,14,890},{31,5,884},{0,14,890},{4,0,890},{4,0,890},{4,0,890},{4,0,890},{2,17,5},{2,17,5},{2,17,5},{2,9,8},{0,9,149},{0,9,149},{6,31,1868},{5,24,948},{6,18,1314},{5,17,943},{3,31,3055},{2,21,936},{3,17,13},
+{2,14,1314},{0,18,3825},{0,14,1354},{8,28,886},{8,22,20},{8,18,89},{7,17,197},{18,1,3051},{1,22,891},{3,17,9},{0,14,1098},{31,3,3051},{0,14,1098},{5,27,891},{5,27,891},{5,27,891},{5,16,891},{3,24,652},{3,17,9},{3,17,9},{2,12,86},{0,14,976},{0,12,126},{8,20,5},{8,20,5},{8,20,5},{8,14,8},{15,0,648},{4,16,5},{4,16,5},{0,12,5},{30,0,648},
+{0,12,5},{20,4,882},{7,23,10},{10,17,5},{3,17,5},{20,4,882},{28,8,882},{3,17,5},{0,15,890},{28,8,882},{0,15,890},{5,0,890},{5,0,890},{5,0,890},{5,0,890},{3,18,5},{3,18,5},{3,18,5},{3,10,8},{0,11,80},{0,11,80},{7,31,1908},{6,25,948},{7,19,1314},{6,18,943},{4,31,3084},{3,22,936},{4,18,15},{3,15,1314},{0,19,3640},{0,15,1175},{9,29,886},
+{9,23,20},{9,19,89},{8,18,184},{20,0,3048},{2,23,891},{4,18,6},{0,15,1054},{24,8,3048},{0,15,1054},{6,28,891},{6,28,891},{6,28,891},{6,17,891},{4,25,657},{4,18,11},{4,18,11},{3,13,86},{0,16,852},{0,13,27},{9,21,5},{9,21,5},{9,21,5},{9,15,8},{16,1,648},{4,18,2},{4,18,2},{0,13,2},{31,1,648},{0,13,2},{21,5,882},{8,23,5},{11,18,5},
+{4,18,5},{21,5,882},{31,8,882},{4,18,5},{0,16,900},{31,8,882},{0,16,900},{6,0,890},{6,0,890},{6,0,890},{6,0,890},{4,18,10},{4,18,10},{4,18,10},{4,11,10},{0,13,26},{0,13,26},{8,31,1998},{7,26,968},{8,20,1314},{7,19,945},{6,31,3160},{4,23,936},{5,19,17},{4,16,1314},{0,21,3420},{0,16,1028},{10,30,885},{10,24,10},{10,20,101},{9,19,179},{22,0,3051},
+{3,24,891},{5,19,13},{0,16,1003},{30,7,3051},{0,16,1003},{7,29,900},{7,29,900},{7,29,900},{7,18,900},{5,26,654},{5,19,8},{5,19,8},{4,14,76},{0,18,750},{1,14,24},{10,23,1},{10,23,1},{10,23,1},{10,16,5},{16,4,648},{5,19,4},{5,19,4},{2,14,5},{28,4,648},{2,14,5},{25,0,884},{10,24,9},{12,19,10},{5,19,9},{25,0,884},{30,10,884},{5,19,9},
+{0,17,890},{30,10,884},{0,17,890},{7,0,900},{7,0,900},{7,0,900},{7,0,900},{5,20,5},{5,20,5},{5,20,5},{5,12,8},{0,15,5},{0,15,5},{9,31,2124},{8,27,948},{9,21,1314},{8,20,943},{7,31,3196},{5,24,936},{6,20,13},{5,17,1314},{0,23,3307},{0,17,971},{11,31,885},{11,25,10},{11,21,101},{10,20,197},{23,0,3051},{4,25,891},{6,20,9},{0,17,970},{30,8,3051},
+{0,17,970},{8,30,891},{8,30,891},{8,30,891},{8,19,891},{6,27,654},{6,20,9},{6,20,9},{5,15,76},{0,19,691},{2,15,24},{11,24,2},{11,24,2},{11,24,2},{11,17,5},{17,5,648},{7,19,5},{7,19,5},{3,15,5},{31,4,648},{3,15,5},{26,1,884},{11,25,9},{13,20,5},{6,20,5},{26,1,884},{31,11,884},{6,20,5},{0,18,890},{31,11,884},{0,18,890},{8,0,890},
+{8,0,890},{8,0,890},{8,0,890},{6,21,5},{6,21,5},{6,21,5},{6,13,8},{1,16,5},{1,16,5},{10,31,2286},{9,28,948},{10,22,1314},{9,21,943},{8,31,3277},{6,25,936},{7,21,13},{6,18,1314},{0,24,3196},{0,19,948},{12,31,904},{12,26,20},{12,22,89},{11,21,197},{24,1,3051},{5,26,891},{7,21,9},{0,19,939},{31,9,3051},{0,19,939},{9,31,891},{9,31,891},{9,31,891},
+{9,20,891},{7,28,652},{7,21,9},{7,21,9},{6,16,86},{0,21,652},{2,16,18},{12,24,5},{12,24,5},{12,24,5},{12,18,8},{21,0,648},{8,20,5},{8,20,5},{3,16,2},{30,6,648},{3,16,2},{28,0,882},{11,27,10},{14,21,5},{7,21,5},{28,0,882},{24,16,882},{7,21,5},{0,19,890},{24,16,882},{0,19,890},{9,0,890},{9,0,890},{9,0,890},{9,0,890},{7,22,5},
+{7,22,5},{7,22,5},{7,14,8},{2,17,5},{2,17,5},{11,31,2414},{10,29,948},{11,23,1314},{10,22,943},{9,31,3412},{7,26,936},{8,22,15},{7,19,1314},{0,26,3115},{1,20,958},{13,31,958},{13,27,20},{13,23,89},{12,22,184},{24,4,3048},{6,27,891},{8,22,6},{0,20,925},{28,12,3048},{0,20,925},{10,31,894},{10,31,894},{10,31,894},{10,21,891},{8,29,657},{8,22,11},{8,22,11},
+{7,17,86},{1,22,652},{3,17,18},{13,25,5},{13,25,5},{13,25,5},{13,19,8},{22,1,648},{8,22,2},{8,22,2},{4,17,2},{31,7,648},{4,17,2},{29,1,882},{12,27,5},{15,22,5},{8,22,5},{29,1,882},{27,16,882},{8,22,5},{0,20,900},{27,16,882},{0,20,900},{10,0,890},{10,0,890},{10,0,890},{10,0,890},{8,22,10},{8,22,10},{8,22,10},{8,15,10},{3,18,5},
+{3,18,5},{13,31,2606},{11,30,968},{12,24,1314},{11,23,945},{11,31,3519},{8,27,936},{9,23,17},{8,20,1314},{0,28,3085},{2,21,942},{15,31,995},{14,28,10},{14,24,101},{13,23,179},{27,2,3051},{7,28,891},{9,23,13},{0,21,891},{31,12,3051},{0,21,891},{11,31,925},{11,31,925},{11,31,925},{11,22,900},{9,30,654},{9,23,8},{9,23,8},{8,18,76},{2,23,651},{5,18,24},{14,27,1},
+{14,27,1},{14,27,1},{14,20,5},{24,0,648},{9,23,4},{9,23,4},{6,18,5},{24,12,648},{6,18,5},{31,0,884},{14,28,9},{16,23,10},{9,23,9},{31,0,884},{30,16,884},{9,23,9},{0,21,890},{30,16,884},{0,21,890},{11,0,900},{11,0,900},{11,0,900},{11,0,900},{9,24,5},{9,24,5},{9,24,5},{9,16,8},{4,19,5},{4,19,5},{14,31,2804},{12,31,948},{13,25,1314},
+{12,24,943},{12,31,3652},{9,28,936},{10,24,13},{9,21,1314},{0,29,3052},{3,22,942},{16,31,1054},{15,29,10},{15,25,101},{14,24,197},{29,0,3051},{8,29,891},{10,24,9},{1,22,891},{30,14,3051},{1,22,891},{12,31,939},{12,31,939},{12,31,939},{12,23,891},{10,31,654},{10,24,9},{10,24,9},{9,19,76},{3,24,652},{6,19,24},{15,28,2},{15,28,2},{15,28,2},{15,21,5},{25,1,648},
+{11,23,5},{11,23,5},{7,19,5},{27,12,648},{7,19,5},{31,3,884},{15,29,9},{17,24,5},{10,24,5},{31,3,884},{31,17,884},{10,24,5},{0,22,890},{31,17,884},{0,22,890},{12,0,890},{12,0,890},{12,0,890},{12,0,890},{10,25,5},{10,25,5},{10,25,5},{10,17,8},{5,20,5},{5,20,5},{15,31,2956},{14,31,979},{14,26,1314},{13,25,943},{13,31,3841},{10,29,936},{11,25,13},
+{10,22,1314},{1,30,3052},{4,23,948},{17,31,1144},{16,30,20},{16,26,89},{15,25,197},{30,1,3051},{9,30,891},{11,25,9},{2,23,891},{31,15,3051},{2,23,891},{14,31,979},{14,31,979},{14,31,979},{13,24,891},{11,31,670},{11,25,9},{11,25,9},{10,20,86},{4,25,652},{6,20,18},{16,28,5},{16,28,5},{16,28,5},{16,22,8},{27,0,648},{12,24,5},{12,24,5},{7,20,2},{30,12,648},
+{7,20,2},{28,12,882},{15,31,10},{18,25,5},{11,25,5},{28,12,882},{28,20,882},{11,25,5},{0,23,890},{28,20,882},{0,23,890},{13,0,890},{13,0,890},{13,0,890},{13,0,890},{11,26,5},{11,26,5},{11,26,5},{11,18,8},{6,21,5},{6,21,5},{16,31,3182},{15,31,1028},{15,27,1314},{14,26,943},{15,31,4020},{11,30,936},{12,26,15},{11,23,1314},{2,31,3052},{5,24,958},{18,31,1270},
+{17,31,20},{17,27,89},{16,26,184},{28,8,3048},{10,31,891},{12,26,6},{2,24,901},{24,20,3048},{2,24,901},{15,31,1003},{15,31,1003},{15,31,1003},{14,25,891},{12,31,707},{12,26,11},{12,26,11},{11,21,86},{5,26,652},{7,21,18},{17,29,5},{17,29,5},{17,29,5},{17,23,8},{28,1,648},{12,26,2},{12,26,2},{8,21,2},{31,13,648},{8,21,2},{29,13,882},{16,31,5},{19,26,5},
+{12,26,5},{29,13,882},{31,20,882},{12,26,5},{0,24,900},{31,20,882},{0,24,900},{14,0,890},{14,0,890},{14,0,890},{14,0,890},{12,26,10},{12,26,10},{12,26,10},{12,19,10},{7,22,5},{7,22,5},{17,31,3508},{16,31,1175},{16,28,1314},{15,27,945},{16,31,4209},{12,31,936},{13,27,17},{12,24,1314},{4,31,3100},{6,25,942},{20,31,1368},{18,31,37},{18,28,101},{17,27,179},{31,6,3051},
+{12,31,900},{13,27,13},{4,25,891},{30,19,3051},{4,25,891},{16,31,1054},{16,31,1054},{16,31,1054},{15,26,900},{14,31,780},{13,27,8},{13,27,8},{12,22,76},{6,27,651},{9,22,24},{18,31,1},{18,31,1},{18,31,1},{18,24,5},{28,4,648},{13,27,4},{13,27,4},{10,22,5},{28,16,648},{10,22,5},{31,12,884},{18,31,36},{20,27,10},{13,27,9},{31,12,884},{30,22,884},{13,27,9},
+{0,25,890},{30,22,884},{0,25,890},{15,0,900},{15,0,900},{15,0,900},{15,0,900},{13,28,5},{13,28,5},{13,28,5},{13,20,8},{8,23,5},{8,23,5},{19,31,3790},{17,31,1412},{17,29,1314},{16,28,943},{17,31,4452},{14,31,954},{14,28,13},{13,25,1314},{7,31,3196},{7,26,942},{21,31,1494},{19,31,126},{19,29,101},{18,28,197},{31,8,3051},{14,31,950},{14,28,9},{5,26,891},{30,20,3051},
+{5,26,891},{17,31,1123},{17,31,1123},{17,31,1123},{16,27,891},{15,31,820},{14,28,9},{14,28,9},{13,23,76},{7,28,652},{10,23,24},{19,31,5},{19,31,5},{19,31,5},{19,25,5},{29,5,648},{15,27,5},{15,27,5},{11,23,5},{31,16,648},{11,23,5},{31,15,884},{20,31,80},{21,28,5},{14,28,5},{31,15,884},{31,23,884},{14,28,5},{0,26,890},{31,23,884},{0,26,890},{16,0,890},
+{16,0,890},{16,0,890},{16,0,890},{14,29,5},{14,29,5},{14,29,5},{14,21,8},{9,24,5},{9,24,5},{20,31,4072},{18,31,1694},{18,30,1314},{17,29,943},{19,31,4705},{15,31,1064},{15,29,13},{14,26,1314},{8,31,3355},{8,27,948},{22,31,1656},{20,31,276},{20,30,89},{19,29,197},{31,11,3051},{16,31,1054},{15,29,9},{6,27,891},{31,21,3051},{6,27,891},{18,31,1210},{18,31,1210},{18,31,1210},
+{17,28,891},{16,31,897},{15,29,9},{15,29,9},{14,24,86},{8,29,652},{10,24,18},{20,31,20},{20,31,20},{20,31,20},{20,26,8},{31,4,648},{16,28,5},{16,28,5},{11,24,2},{30,18,648},{11,24,2},{28,24,882},{22,31,157},{22,29,5},{15,29,5},{28,24,882},{24,28,882},{15,29,5},{0,27,890},{24,28,882},{0,27,890},{17,0,890},{17,0,890},{17,0,890},{17,0,890},{15,30,5},
+{15,30,5},{15,30,5},{15,22,8},{10,25,5},{10,25,5},{21,31,4390},{19,31,2007},{19,31,1314},{18,30,943},{20,31,4932},{16,31,1287},{16,30,15},{15,27,1314},{11,31,3547},{9,28,958},{23,31,1784},{22,31,465},{21,31,89},{20,30,184},{28,20,3048},{18,31,1188},{16,30,6},{6,28,901},{28,24,3048},{6,28,901},{19,31,1278},{19,31,1278},{19,31,1278},{18,29,891},{17,31,1011},{16,30,11},{16,30,11},
+{15,25,86},{9,30,652},{11,25,18},{21,31,53},{21,31,53},{21,31,53},{21,27,8},{31,7,648},{16,30,2},{16,30,2},{12,25,2},{31,19,648},{12,25,2},{29,25,882},{24,31,269},{23,30,5},{16,30,5},{29,25,882},{27,28,882},{16,30,5},{0,28,900},{27,28,882},{0,28,900},{18,0,890},{18,0,890},{18,0,890},{18,0,890},{16,30,10},{16,30,10},{16,30,10},{16,23,10},{11,26,5},
+{11,26,5},{22,31,4471},{20,31,2295},{20,31,1395},{19,31,936},{21,31,4906},{18,31,1414},{17,31,8},{16,28,1161},{13,31,3570},{11,28,818},{24,31,1769},{23,31,590},{23,31,106},{22,30,146},{29,21,2814},{19,31,1206},{17,31,4},{8,29,786},{31,24,2814},{8,29,786},{20,31,1395},{20,31,1395},{20,31,1395},{19,30,900},{19,31,1134},{17,31,8},{17,31,8},{16,26,76},{10,31,651},{13,26,24},{23,31,106},
+{23,31,106},{23,31,106},{22,28,5},{28,16,648},{17,31,4},{17,31,4},{14,26,5},{24,24,648},{14,26,5},{31,24,761},{26,31,317},{24,31,1},{17,31,0},{31,24,761},{30,28,761},{17,31,0},{0,29,785},{30,28,761},{0,29,785},{19,0,900},{19,0,900},{19,0,900},{19,0,900},{17,31,8},{17,31,8},{17,31,8},{17,24,8},{12,27,5},{12,27,5},{23,31,3955},{22,31,2260},{21,31,1530},
+{20,31,891},{22,31,4375},{19,31,1194},{18,31,54},{17,28,805},{15,31,3075},{12,29,498},{25,31,1417},{24,31,510},{24,31,149},{23,30,74},{31,19,2249},{21,31,937},{19,31,5},{11,29,482},{31,25,2249},{11,29,482},{21,31,1530},{21,31,1530},{21,31,1530},{20,31,891},{20,31,1251},{18,31,54},{18,31,54},{17,27,76},{12,31,691},{14,27,24},{24,31,149},{24,31,149},{24,31,149},{23,29,5},{29,17,648},
+{19,31,5},{19,31,5},{15,27,5},{27,24,648},{15,27,5},{29,29,481},{27,31,202},{25,31,4},{20,31,1},{29,29,481},{31,28,481},{20,31,1},{0,29,481},{31,28,481},{0,29,481},{20,0,890},{20,0,890},{20,0,890},{20,0,890},{18,31,29},{18,31,29},{18,31,29},{18,25,8},{13,28,5},{13,28,5},{24,31,3609},{23,31,2199},{22,31,1683},{21,31,915},{23,31,3827},{20,31,1071},{19,31,153},
+{18,29,485},{16,31,2690},{13,30,306},{26,31,1133},{25,31,489},{25,31,200},{24,31,20},{29,25,1769},{22,31,710},{21,31,41},{12,30,290},{27,28,1769},{12,30,290},{22,31,1683},{22,31,1683},{22,31,1683},{21,31,915},{21,31,1401},{19,31,153},{19,31,153},{18,28,86},{14,31,769},{14,28,18},{25,31,200},{25,31,200},{25,31,200},{24,30,8},{31,16,648},{21,31,41},{21,31,41},{15,28,2},{30,24,648},
+{15,28,2},{31,27,265},{28,31,113},{27,31,1},{23,31,1},{31,27,265},{31,29,265},{23,31,1},{0,30,289},{31,29,265},{0,30,289},{21,0,890},{21,0,890},{21,0,890},{21,0,890},{19,31,53},{19,31,53},{19,31,53},{19,26,8},{14,29,5},{14,29,5},{24,31,3305},{24,31,2222},{23,31,1795},{22,31,990},{24,31,3438},{22,31,1087},{21,31,306},{19,30,293},{18,31,2403},{15,30,118},{27,31,857},
+{26,31,465},{26,31,269},{25,31,8},{31,23,1374},{24,31,546},{23,31,98},{14,30,114},{31,27,1374},{14,30,114},{23,31,1795},{23,31,1795},{23,31,1795},{22,31,990},{22,31,1587},{21,31,306},{21,31,306},{19,29,86},{16,31,897},{15,29,18},{26,31,269},{26,31,269},{26,31,269},{25,31,8},{31,19,648},{23,31,98},{23,31,98},{16,29,2},{31,25,648},{16,29,2},{31,28,113},{29,31,52},{28,31,1},
+{26,31,1},{31,28,113},{30,30,113},{26,31,1},{0,30,113},{30,30,113},{0,30,113},{22,0,890},{22,0,890},{22,0,890},{22,0,890},{20,31,90},{20,31,90},{20,31,90},{20,27,10},{15,30,5},{15,30,5},{25,31,3092},{25,31,2292},{24,31,1978},{24,31,1123},{25,31,3124},{23,31,1068},{22,31,525},{20,30,140},{20,31,2196},{16,31,41},{28,31,680},{27,31,458},{27,31,337},{26,31,65},{29,29,1032},
+{26,31,456},{24,31,185},{16,31,37},{31,28,1032},{16,31,37},{24,31,1978},{24,31,1978},{24,31,1978},{24,31,1123},{24,31,1769},{22,31,525},{22,31,525},{20,30,76},{18,31,1080},{17,30,24},{27,31,337},{27,31,337},{27,31,337},{26,31,65},{28,28,648},{24,31,185},{24,31,185},{18,30,5},{28,28,648},{18,30,5},{31,30,18},{30,31,9},{30,31,0},{29,31,0},{31,30,18},{30,31,18},{29,31,0},
+{0,31,36},{30,31,18},{0,31,36},{23,0,900},{23,0,900},{23,0,900},{23,0,900},{22,31,164},{22,31,164},{22,31,164},{21,28,8},{16,31,5},{16,31,5},{27,31,2818},{26,31,2254},{25,31,2043},{25,31,1243},{26,31,2829},{24,31,1099},{23,31,684},{21,31,72},{22,31,2007},{18,31,20},{29,31,566},{28,31,420},{28,31,356},{27,31,122},{31,27,771},{27,31,386},{26,31,232},{19,31,1},{31,29,771},
+{19,31,1},{25,31,2043},{25,31,2043},{25,31,2043},{25,31,1243},{25,31,1819},{23,31,684},{23,31,684},{21,31,72},{20,31,1172},{18,31,20},{28,31,356},{28,31,356},{28,31,356},{27,31,122},{31,25,578},{26,31,232},{26,31,232},{19,31,1},{27,30,578},{19,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{24,0,890},
+{24,0,890},{24,0,890},{24,0,890},{23,31,200},{23,31,200},{23,31,200},{22,29,8},{18,31,20},{18,31,20},{27,31,2242},{27,31,1879},{26,31,1738},{26,31,1150},{27,31,2209},{24,31,987},{24,31,626},{23,31,20},{23,31,1560},{19,31,53},{29,31,342},{29,31,257},{29,31,221},{28,31,68},{31,28,452},{28,31,228},{27,31,137},{22,31,1},{30,30,452},{22,31,1},{26,31,1738},{26,31,1738},{26,31,1738},
+{26,31,1150},{25,31,1499},{24,31,626},{24,31,626},{23,31,20},{22,31,950},{19,31,53},{29,31,221},{29,31,221},{29,31,221},{28,31,68},{31,26,340},{27,31,137},{27,31,137},{22,31,1},{30,29,340},{22,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{25,0,890},{25,0,890},{25,0,890},{25,0,890},{24,31,265},
+{24,31,265},{24,31,265},{23,30,8},{19,31,53},{19,31,53},{28,31,1844},{27,31,1559},{27,31,1438},{27,31,1075},{27,31,1713},{26,31,853},{25,31,635},{24,31,10},{24,31,1207},{21,31,125},{30,31,172},{30,31,145},{29,31,125},{29,31,40},{31,29,216},{29,31,121},{28,31,68},{24,31,1},{31,30,216},{24,31,1},{27,31,1438},{27,31,1438},{27,31,1438},{27,31,1075},{27,31,1229},{25,31,635},{25,31,635},
+{24,31,10},{23,31,756},{21,31,125},{29,31,125},{29,31,125},{29,31,125},{29,31,40},{31,28,164},{28,31,68},{28,31,68},{24,31,1},{30,30,164},{24,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{26,0,890},{26,0,890},{26,0,890},{26,0,890},{25,31,346},{25,31,346},{25,31,346},{24,31,10},{21,31,125},
+{21,31,125},{0,19,1568},{0,13,202},{0,10,13},{0,8,596},{0,13,3371},{0,9,2162},{0,8,1080},{0,5,2539},{0,6,3648},{0,5,2708},{0,19,1568},{0,13,202},{0,10,13},{0,8,596},{6,1,3371},{0,9,2162},{0,8,1080},{0,5,2539},{13,0,3371},{0,5,2539},{0,9,0},{0,9,0},{0,9,0},{0,4,4},{0,4,290},{0,4,125},{0,4,125},{0,2,164},{0,2,321},{0,2,189},{0,9,0},
+{0,9,0},{0,9,0},{0,4,4},{2,1,290},{0,4,125},{0,4,125},{0,2,164},{4,0,290},{0,2,164},{9,1,1568},{0,13,202},{0,10,13},{0,8,596},{9,1,1568},{19,0,1568},{0,8,596},{0,6,1586},{19,0,1568},{0,6,1586},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,22,1568},{0,15,117},{0,11,8},
+{0,9,485},{0,15,3971},{0,10,2369},{0,9,1061},{0,6,2834},{0,7,4341},{0,6,3090},{0,22,1568},{0,15,117},{0,11,8},{0,9,485},{7,1,3968},{0,10,2369},{0,9,1061},{0,6,2834},{11,2,3968},{0,6,2834},{0,12,1},{0,12,1},{0,12,1},{0,6,0},{0,6,512},{0,5,194},{0,5,194},{0,3,320},{0,3,576},{0,2,381},{0,12,1},{0,12,1},{0,12,1},{0,6,0},{3,0,512},
+{0,5,194},{0,5,194},{0,3,320},{6,0,512},{0,3,320},{11,0,1568},{0,15,117},{0,11,8},{0,9,485},{11,0,1568},{22,0,1568},{0,9,485},{0,7,1586},{22,0,1568},{0,7,1586},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,25,1568},{0,17,45},{0,12,58},{0,10,392},{0,17,4652},{0,11,2596},{0,10,1121},
+{0,7,3254},{0,8,5140},{0,6,3570},{0,25,1568},{0,17,45},{0,12,58},{0,10,392},{8,1,4652},{0,11,2596},{0,10,1121},{0,7,3254},{17,0,4652},{0,7,3254},{0,15,1},{0,15,1},{0,15,1},{0,7,4},{0,7,802},{0,6,320},{0,6,320},{0,3,512},{0,3,896},{0,3,576},{0,15,1},{0,15,1},{0,15,1},{0,7,4},{3,2,802},{0,6,320},{0,6,320},{0,3,512},{7,0,802},
+{0,3,512},{12,1,1568},{0,17,45},{1,12,13},{0,10,392},{12,1,1568},{25,0,1568},{0,10,392},{0,8,1576},{25,0,1568},{0,8,1576},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,28,1570},{0,19,16},{0,13,178},{0,11,340},{0,19,5424},{0,13,2843},{0,11,1240},{0,8,3704},{0,9,6003},{0,7,4095},{0,28,1570},
+{0,19,16},{1,13,122},{0,11,340},{7,5,5419},{0,13,2843},{0,11,1240},{0,8,3704},{15,2,5419},{0,8,3704},{0,18,1},{0,18,1},{0,18,1},{0,9,1},{0,9,1152},{0,7,461},{0,7,461},{0,4,709},{0,4,1281},{0,4,830},{0,18,1},{0,18,1},{0,18,1},{0,9,1},{4,1,1152},{0,7,461},{0,7,461},{0,4,709},{9,0,1152},{0,4,709},{14,0,1568},{0,19,16},{2,13,13},
+{0,11,340},{14,0,1568},{26,1,1568},{0,11,340},{0,9,1576},{26,1,1568},{0,9,1576},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{1,29,1633},{1,20,75},{1,14,265},{0,13,313},{0,22,5419},{0,14,2571},{0,12,853},{0,9,3410},{0,10,6244},{0,9,3986},{1,29,1569},{1,20,11},{2,14,117},{0,13,313},{11,0,5419},
+{0,14,2571},{0,12,853},{0,9,3410},{22,0,5419},{0,9,3410},{1,19,65},{1,19,65},{1,19,65},{1,10,65},{0,12,1152},{0,9,289},{0,9,289},{0,5,580},{0,5,1393},{0,5,749},{1,19,1},{1,19,1},{1,19,1},{1,10,1},{4,4,1152},{0,9,289},{0,9,289},{0,5,580},{12,0,1152},{0,5,580},{13,5,1568},{0,21,4},{3,14,13},{0,13,232},{13,5,1568},{31,0,1568},{0,13,232},
+{0,10,1586},{31,0,1568},{0,10,1586},{1,0,65},{1,0,65},{1,0,65},{1,0,65},{0,3,1},{0,3,1},{0,3,1},{0,2,1},{0,1,20},{0,1,20},{1,31,1731},{1,22,149},{2,15,457},{1,14,377},{0,25,5419},{0,16,2347},{0,13,556},{0,10,3179},{0,11,6495},{0,9,3890},{2,30,1569},{2,21,11},{3,15,117},{1,14,313},{12,1,5419},{0,16,2347},{0,13,556},{0,10,3179},{25,0,5419},
+{0,10,3179},{1,22,145},{1,22,145},{1,22,145},{1,11,154},{0,15,1152},{0,11,194},{0,11,194},{0,6,481},{0,7,1531},{0,6,737},{2,20,1},{2,20,1},{2,20,1},{2,11,1},{5,5,1152},{0,11,194},{0,11,194},{0,6,481},{15,0,1152},{0,6,481},{17,0,1568},{1,22,4},{4,15,8},{0,14,157},{17,0,1568},{30,2,1568},{0,14,157},{0,11,1586},{30,2,1568},{0,11,1586},{1,0,145},
+{1,0,145},{1,0,145},{1,0,145},{0,6,1},{0,6,1},{0,6,1},{0,3,1},{0,3,65},{0,3,65},{2,31,1977},{2,23,341},{2,16,707},{1,15,510},{0,28,5424},{0,17,2137},{0,15,373},{0,11,3035},{0,13,6709},{0,10,3860},{3,31,1569},{3,22,11},{4,16,122},{2,15,313},{14,0,5419},{0,17,2137},{0,15,373},{0,11,3035},{26,1,5419},{0,11,3035},{2,23,337},{2,23,337},{2,23,337},
+{2,12,341},{0,18,1152},{0,13,104},{0,13,104},{0,8,410},{0,8,1686},{0,7,786},{3,21,1},{3,21,1},{3,21,1},{3,12,2},{9,0,1152},{0,13,104},{0,13,104},{0,8,410},{18,0,1152},{0,8,410},{18,1,1568},{2,23,4},{5,16,13},{0,15,117},{18,1,1568},{31,3,1568},{0,15,117},{0,12,1576},{31,3,1568},{0,12,1576},{2,0,337},{2,0,337},{2,0,337},{2,0,337},{0,9,1},
+{0,9,1},{0,9,1},{0,5,4},{0,4,130},{0,4,130},{3,31,2353},{2,24,570},{3,17,1027},{2,16,714},{0,31,5424},{0,19,1979},{0,16,195},{0,12,2857},{0,14,7039},{0,11,3919},{4,31,1576},{4,23,16},{5,17,122},{3,16,331},{15,1,5419},{0,19,1979},{0,16,195},{0,12,2857},{27,2,5419},{0,12,2857},{2,26,546},{2,26,546},{2,26,546},{2,14,546},{0,21,1152},{0,14,50},{0,14,50},
+{0,9,305},{0,9,1856},{0,8,830},{4,22,1},{4,22,1},{4,22,1},{4,13,1},{10,1,1152},{0,14,50},{0,14,50},{0,9,305},{21,0,1152},{0,9,305},{19,2,1568},{3,24,10},{6,17,13},{0,16,74},{19,2,1568},{30,5,1568},{0,16,74},{0,13,1576},{30,5,1568},{0,13,1576},{2,0,545},{2,0,545},{2,0,545},{2,0,545},{0,12,0},{0,12,0},{0,12,0},{0,6,1},{0,5,205},
+{0,5,205},{4,31,2980},{3,26,925},{3,19,1484},{2,17,1032},{1,31,5504},{0,21,1811},{0,17,61},{0,13,2651},{0,16,7420},{0,13,3876},{6,31,1619},{5,24,11},{6,18,117},{4,17,313},{17,0,5419},{0,21,1811},{0,17,61},{0,13,2651},{30,2,5419},{0,13,2651},{3,27,900},{3,27,900},{3,27,900},{3,15,900},{0,24,1152},{0,16,8},{0,16,8},{0,10,208},{0,11,2124},{0,9,881},{5,23,1},
+{5,23,1},{5,23,1},{5,14,1},{12,0,1152},{0,16,8},{0,16,8},{0,10,208},{24,0,1152},{0,10,208},{21,1,1568},{4,25,4},{7,18,13},{0,17,45},{21,1,1568},{27,8,1568},{0,17,45},{0,14,1586},{27,8,1568},{0,14,1586},{3,0,900},{3,0,900},{3,0,900},{3,0,900},{0,15,1},{0,15,1},{0,15,1},{0,8,4},{0,6,356},{0,6,356},{4,31,3668},{3,27,1320},{4,20,1940},
+{3,18,1352},{2,31,5771},{0,23,1737},{0,18,37},{0,14,2486},{0,17,7711},{0,14,3930},{7,31,1635},{6,25,11},{7,19,117},{5,18,313},{18,1,5419},{0,23,1737},{0,18,37},{0,14,2486},{31,3,5419},{0,14,2486},{3,30,1252},{3,30,1252},{3,30,1252},{3,16,1256},{0,27,1152},{0,18,1},{0,18,1},{0,11,164},{0,12,2369},{0,10,1001},{6,24,1},{6,24,1},{6,24,1},{6,15,1},{13,1,1152},
+{0,18,1},{0,18,1},{0,11,164},{27,0,1152},{0,11,164},{23,0,1568},{5,26,4},{8,19,8},{0,18,36},{23,0,1568},{30,8,1568},{0,18,36},{0,15,1586},{30,8,1568},{0,15,1586},{3,0,1252},{3,0,1252},{3,0,1252},{3,0,1252},{0,18,1},{0,18,1},{0,18,1},{0,9,1},{0,7,505},{0,7,505},{5,31,4346},{4,28,1644},{5,21,2372},{4,19,1703},{3,31,6079},{0,24,1644},{0,20,38},
+{0,15,2390},{0,18,7969},{0,15,3907},{8,31,1682},{7,26,11},{8,20,122},{6,19,313},{19,2,5419},{0,24,1640},{0,20,34},{0,15,2386},{30,5,5419},{0,15,2386},{4,31,1587},{4,31,1587},{4,31,1587},{4,17,1590},{0,30,1156},{0,20,29},{0,20,29},{0,12,117},{0,13,2483},{0,11,1044},{7,25,1},{7,25,1},{7,25,1},{7,16,2},{15,0,1152},{1,19,1},{1,19,1},{0,12,113},{30,0,1152},
+{0,12,113},{24,1,1568},{6,27,4},{9,20,13},{0,20,25},{24,1,1568},{31,9,1568},{0,20,25},{0,16,1576},{31,9,1568},{0,16,1576},{4,0,1586},{4,0,1586},{4,0,1586},{4,0,1586},{0,21,5},{0,21,5},{0,21,5},{0,11,8},{0,9,565},{0,9,565},{7,31,4580},{5,29,1644},{6,22,2372},{4,20,1692},{4,31,6228},{1,25,1644},{1,21,38},{1,16,2348},{0,20,7577},{0,16,3408},{9,31,1760},
+{8,27,16},{9,21,122},{7,20,331},{19,5,5419},{0,26,1593},{2,20,27},{0,16,2252},{31,6,5419},{0,16,2252},{5,31,1590},{5,31,1590},{5,31,1590},{5,18,1590},{1,31,1156},{1,21,29},{1,21,29},{1,13,117},{0,15,2241},{0,13,699},{8,26,1},{8,26,1},{8,26,1},{8,17,1},{16,1,1152},{2,20,2},{2,20,2},{0,13,74},{31,1,1152},{0,13,74},{26,0,1568},{7,28,10},{10,21,13},
+{0,21,10},{26,0,1568},{30,11,1568},{0,21,10},{0,17,1576},{30,11,1568},{0,17,1576},{5,0,1586},{5,0,1586},{5,0,1586},{5,0,1586},{1,22,5},{1,22,5},{1,22,5},{1,12,8},{0,10,433},{0,10,433},{8,31,4826},{6,30,1644},{7,23,2352},{6,21,1676},{5,31,6463},{2,26,1644},{2,22,38},{2,17,2348},{0,22,7196},{0,17,2863},{11,31,1865},{9,28,11},{10,22,117},{8,21,313},{23,0,5419},
+{1,27,1586},{2,22,29},{0,18,2115},{30,8,5419},{0,18,2115},{6,31,1612},{6,31,1612},{6,31,1612},{6,19,1585},{3,30,1179},{3,21,29},{3,21,29},{2,14,122},{0,17,1953},{0,14,426},{9,27,1},{9,27,1},{9,27,1},{9,18,1},{16,4,1152},{3,21,4},{3,21,4},{0,15,49},{28,4,1152},{0,15,49},{25,5,1568},{8,29,4},{11,22,13},{1,22,4},{25,5,1568},{31,12,1568},{1,22,4},
+{0,18,1586},{31,12,1568},{0,18,1586},{6,0,1576},{6,0,1576},{6,0,1576},{6,0,1576},{2,24,10},{2,24,10},{2,24,10},{2,13,10},{0,13,272},{0,13,272},{9,31,5108},{7,31,1644},{8,24,2372},{7,22,1676},{7,31,6660},{3,27,1644},{3,23,38},{3,18,2348},{0,23,6891},{0,18,2519},{12,31,1952},{10,29,11},{11,23,117},{9,22,313},{24,1,5419},{2,28,1584},{3,23,29},{0,19,2027},{31,9,5419},
+{0,19,2027},{7,31,1640},{7,31,1640},{7,31,1640},{7,20,1580},{4,31,1188},{3,23,34},{3,23,34},{3,15,122},{0,19,1795},{0,15,261},{10,28,1},{10,28,1},{10,28,1},{10,19,1},{17,5,1152},{4,22,1},{4,22,1},{0,16,26},{31,4,1152},{0,16,26},{29,0,1568},{9,30,4},{12,23,8},{2,23,4},{29,0,1568},{30,14,1568},{2,23,4},{0,19,1586},{30,14,1568},{0,19,1586},{7,0,1576},
+{7,0,1576},{7,0,1576},{7,0,1576},{3,24,10},{3,24,10},{3,24,10},{3,14,10},{0,14,170},{0,14,170},{10,31,5426},{8,31,1695},{9,25,2372},{8,23,1703},{8,31,6861},{4,28,1644},{4,24,38},{3,19,2372},{0,25,6573},{0,19,2268},{13,31,2066},{11,30,11},{12,24,122},{10,23,313},{26,0,5419},{3,29,1584},{4,24,34},{0,20,1937},{30,11,5419},{0,20,1937},{8,31,1686},{8,31,1686},{8,31,1686},
+{8,21,1590},{5,31,1206},{4,24,29},{4,24,29},{4,16,117},{0,20,1602},{0,17,126},{11,29,1},{11,29,1},{11,29,1},{11,20,2},{21,0,1152},{5,23,1},{5,23,1},{0,17,5},{30,6,1152},{0,17,5},{30,1,1568},{10,31,4},{13,24,13},{3,24,10},{30,1,1568},{31,15,1568},{3,24,10},{0,20,1576},{31,15,1568},{0,20,1576},{8,0,1586},{8,0,1586},{8,0,1586},{8,0,1586},{4,25,5},
+{4,25,5},{4,25,5},{4,15,8},{0,16,90},{0,16,90},{11,31,5658},{9,31,1836},{10,26,2372},{8,24,1692},{9,31,7116},{5,29,1644},{5,25,38},{5,20,2348},{0,26,6379},{0,21,2028},{14,31,2216},{12,31,16},{13,25,122},{11,24,331},{27,1,5419},{2,31,1584},{6,24,27},{0,21,1832},{27,14,5419},{0,21,1832},{9,31,1755},{9,31,1755},{9,31,1755},{9,22,1590},{6,31,1260},{5,25,29},{5,25,29},
+{5,17,117},{0,22,1459},{0,18,38},{12,30,1},{12,30,1},{12,30,1},{12,21,1},{22,1,1152},{6,24,2},{6,24,2},{0,18,2},{31,7,1152},{0,18,2},{31,2,1568},{11,31,13},{14,25,13},{4,25,10},{31,2,1568},{30,17,1568},{4,25,10},{0,21,1576},{30,17,1568},{0,21,1576},{9,0,1586},{9,0,1586},{9,0,1586},{9,0,1586},{5,26,5},{5,26,5},{5,26,5},{5,16,8},{0,18,37},
+{0,18,37},{12,31,6036},{11,31,2033},{11,27,2352},{10,25,1676},{11,31,7423},{6,30,1644},{6,26,38},{6,21,2348},{0,28,6109},{0,22,1794},{15,31,2371},{13,31,50},{14,26,117},{12,25,313},{29,0,5419},{5,31,1586},{6,26,29},{0,22,1730},{30,14,5419},{0,22,1730},{10,31,1865},{10,31,1865},{10,31,1865},{10,23,1585},{7,31,1339},{7,25,29},{7,25,29},{6,18,122},{0,23,1345},{0,19,16},{13,31,1},
+{13,31,1},{13,31,1},{13,22,1},{24,0,1152},{7,25,4},{7,25,4},{2,19,1},{24,12,1152},{2,19,1},{29,9,1568},{14,31,41},{15,26,13},{5,26,4},{29,9,1568},{27,20,1568},{5,26,4},{0,22,1586},{27,20,1568},{0,22,1586},{10,0,1576},{10,0,1576},{10,0,1576},{10,0,1576},{6,28,10},{6,28,10},{6,28,10},{6,17,10},{0,20,13},{0,20,13},{13,31,6450},{12,31,2268},{12,28,2372},
+{11,26,1676},{12,31,7676},{7,31,1644},{7,27,38},{7,22,2348},{0,30,5924},{0,23,1695},{17,31,2536},{15,31,139},{15,27,117},{13,26,313},{30,1,5419},{7,31,1635},{7,27,29},{0,23,1686},{31,15,5419},{0,23,1686},{11,31,1937},{11,31,1937},{11,31,1937},{11,24,1580},{9,31,1420},{7,27,34},{7,27,34},{7,19,122},{0,25,1234},{1,20,17},{14,31,10},{14,31,10},{14,31,10},{14,23,1},{25,1,1152},
+{8,26,1},{8,26,1},{2,20,2},{27,12,1152},{2,20,2},{31,8,1568},{15,31,90},{16,27,8},{6,27,4},{31,8,1568},{30,20,1568},{6,27,4},{0,23,1586},{30,20,1568},{0,23,1586},{11,0,1576},{11,0,1576},{11,0,1576},{11,0,1576},{7,28,10},{7,28,10},{7,28,10},{7,18,10},{0,21,10},{0,21,10},{15,31,6772},{13,31,2595},{13,29,2372},{12,27,1703},{13,31,7985},{8,31,1725},{8,28,38},
+{7,23,2372},{0,31,5773},{0,24,1644},{18,31,2722},{16,31,261},{16,28,122},{14,27,313},{31,2,5419},{8,31,1721},{8,28,34},{0,24,1640},{30,17,5419},{0,24,1640},{12,31,2027},{12,31,2027},{12,31,2027},{12,25,1590},{10,31,1510},{8,28,29},{8,28,29},{8,20,117},{0,27,1188},{2,21,17},{15,31,26},{15,31,26},{15,31,26},{15,24,2},{27,0,1152},{9,27,1},{9,27,1},{3,21,2},{30,12,1152},
+{3,21,2},{31,11,1568},{17,31,180},{17,28,13},{7,28,10},{31,11,1568},{31,21,1568},{7,28,10},{0,24,1576},{31,21,1568},{0,24,1576},{12,0,1586},{12,0,1586},{12,0,1586},{12,0,1586},{8,29,5},{8,29,5},{8,29,5},{8,19,8},{1,22,10},{1,22,10},{16,31,7154},{14,31,2955},{14,30,2372},{12,28,1692},{14,31,8348},{10,31,1895},{9,29,38},{9,24,2348},{1,31,5956},{1,25,1644},{19,31,2866},
+{17,31,468},{17,29,122},{15,28,331},{31,5,5419},{10,31,1859},{10,28,27},{0,25,1601},{31,18,5419},{0,25,1601},{13,31,2162},{13,31,2162},{13,31,2162},{13,26,1590},{11,31,1590},{9,29,29},{9,29,29},{9,21,117},{0,28,1161},{3,22,17},{16,31,49},{16,31,49},{16,31,49},{16,25,1},{28,1,1152},{10,28,2},{10,28,2},{4,22,2},{31,13,1152},{4,22,2},{31,14,1568},{19,31,277},{18,29,13},
+{8,29,10},{31,14,1568},{30,23,1568},{8,29,10},{0,25,1576},{30,23,1568},{0,25,1576},{13,0,1586},{13,0,1586},{13,0,1586},{13,0,1586},{9,30,5},{9,30,5},{9,30,5},{9,20,8},{2,23,10},{2,23,10},{17,31,7636},{15,31,3408},{15,31,2352},{14,29,1676},{16,31,8673},{11,31,2187},{10,30,38},{10,25,2348},{3,31,6235},{2,26,1644},{20,31,3112},{19,31,754},{18,30,117},{16,29,313},{31,8,5419},
+{12,31,2060},{10,30,29},{0,26,1587},{30,20,5419},{0,26,1587},{15,31,2252},{15,31,2252},{15,31,2252},{14,27,1585},{12,31,1740},{11,29,29},{11,29,29},{10,22,122},{0,30,1163},{4,23,16},{18,31,85},{18,31,85},{18,31,85},{17,26,1},{28,4,1152},{11,29,4},{11,29,4},{6,23,1},{28,16,1152},{6,23,1},{29,21,1568},{20,31,436},{19,30,13},{9,30,4},{29,21,1568},{31,24,1568},{9,30,4},
+{0,26,1586},{31,24,1568},{0,26,1586},{14,0,1576},{14,0,1576},{14,0,1576},{14,0,1576},{10,31,13},{10,31,13},{10,31,13},{10,21,10},{3,24,10},{3,24,10},{18,31,8122},{16,31,3907},{16,31,2390},{15,30,1676},{17,31,9036},{12,31,2576},{11,31,38},{11,26,2348},{6,31,6555},{3,27,1644},{22,31,3392},{20,31,1044},{19,31,117},{17,30,313},{31,11,5419},{15,31,2284},{11,31,29},{0,27,1587},{31,21,5419},
+{0,27,1587},{16,31,2386},{16,31,2386},{16,31,2386},{15,28,1580},{14,31,1924},{11,31,34},{11,31,34},{11,23,122},{1,31,1163},{5,24,17},{19,31,113},{19,31,113},{19,31,113},{18,27,1},{29,5,1152},{12,30,1},{12,30,1},{6,24,2},{31,16,1152},{6,24,2},{31,20,1568},{22,31,593},{20,31,8},{10,31,4},{31,20,1568},{30,26,1568},{10,31,4},{0,27,1586},{30,26,1568},{0,27,1586},{15,0,1576},
+{15,0,1576},{15,0,1576},{15,0,1576},{11,31,25},{11,31,25},{11,31,25},{11,22,10},{4,25,10},{4,25,10},{19,31,7638},{17,31,4060},{17,31,2539},{16,30,1659},{18,31,8553},{14,31,2430},{13,31,37},{11,27,1940},{7,31,6120},{4,28,1320},{23,31,2996},{20,31,1064},{20,31,164},{19,30,186},{29,17,4803},{16,31,2018},{13,31,1},{1,28,1253},{27,24,4803},{1,28,1253},{17,31,2539},{17,31,2539},{17,31,2539},
+{16,29,1590},{15,31,2028},{13,31,37},{13,31,37},{12,24,117},{3,31,1170},{6,25,17},{20,31,164},{20,31,164},{20,31,164},{19,28,2},{31,4,1152},{13,31,1},{13,31,1},{7,25,2},{30,18,1152},{7,25,2},{31,22,1250},{24,31,505},{22,31,0},{13,31,0},{31,22,1250},{30,27,1250},{13,31,0},{0,28,1252},{30,27,1250},{0,28,1252},{16,0,1586},{16,0,1586},{16,0,1586},{16,0,1586},{13,31,37},
+{13,31,37},{13,31,37},{12,23,8},{5,26,10},{5,26,10},{20,31,7060},{19,31,3955},{18,31,2710},{17,31,1595},{19,31,7717},{15,31,2140},{14,31,77},{13,28,1480},{8,31,5539},{6,28,925},{23,31,2516},{22,31,945},{21,31,233},{20,30,100},{31,15,4056},{18,31,1656},{15,31,8},{4,28,900},{31,23,4056},{4,28,900},{18,31,2710},{18,31,2710},{18,31,2710},{17,30,1590},{16,31,2193},{14,31,77},{14,31,77},
+{13,25,117},{5,31,1233},{7,26,17},{21,31,233},{21,31,233},{21,31,233},{20,29,1},{31,7,1152},{15,31,8},{15,31,8},{8,26,2},{31,19,1152},{8,26,2},{31,23,884},{25,31,370},{23,31,4},{16,31,1},{31,23,884},{31,27,884},{16,31,1},{0,28,900},{31,27,884},{0,28,900},{17,0,1586},{17,0,1586},{17,0,1586},{17,0,1586},{14,31,52},{14,31,52},{14,31,52},{13,24,8},{6,27,10},
+{6,27,10},{21,31,6535},{20,31,3919},{19,31,2857},{18,31,1585},{20,31,6979},{16,31,1942},{15,31,195},{14,28,990},{11,31,4914},{7,29,562},{24,31,2045},{23,31,830},{22,31,338},{21,31,26},{29,21,3318},{19,31,1314},{17,31,52},{5,29,546},{31,24,3318},{5,29,546},{19,31,2857},{19,31,2857},{19,31,2857},{18,31,1585},{17,31,2436},{15,31,195},{15,31,195},{14,26,122},{7,31,1339},{8,27,16},{22,31,338},
+{22,31,338},{22,31,338},{21,30,1},{28,16,1152},{17,31,52},{17,31,52},{10,27,1},{24,24,1152},{10,27,1},{31,25,545},{26,31,221},{25,31,0},{19,31,0},{31,25,545},{31,28,545},{19,31,0},{0,29,545},{31,28,545},{0,29,545},{18,0,1576},{18,0,1576},{18,0,1576},{18,0,1576},{15,31,74},{15,31,74},{15,31,74},{14,25,10},{7,28,10},{7,28,10},{22,31,6151},{20,31,3935},{20,31,3035},
+{19,31,1640},{21,31,6458},{18,31,1902},{16,31,373},{15,29,670},{12,31,4499},{8,29,347},{25,31,1729},{24,31,786},{23,31,410},{22,31,1},{31,19,2753},{20,31,1094},{19,31,113},{8,29,338},{31,25,2753},{8,29,338},{20,31,3035},{20,31,3035},{20,31,3035},{19,31,1640},{19,31,2630},{16,31,373},{16,31,373},{15,27,122},{10,31,1483},{9,28,17},{23,31,410},{23,31,410},{23,31,410},{22,31,1},{29,17,1152},
+{19,31,113},{19,31,113},{10,28,2},{27,24,1152},{10,28,2},{31,26,317},{27,31,130},{26,31,9},{22,31,0},{31,26,317},{30,29,317},{22,31,0},{0,29,337},{30,29,317},{0,29,337},{19,0,1576},{19,0,1576},{19,0,1576},{19,0,1576},{16,31,117},{16,31,117},{16,31,117},{15,26,10},{8,29,10},{8,29,10},{23,31,5691},{22,31,4004},{21,31,3254},{20,31,1755},{22,31,6023},{19,31,1830},{18,31,606},
+{16,29,430},{14,31,4162},{9,30,155},{26,31,1481},{25,31,801},{24,31,505},{23,31,26},{29,25,2273},{22,31,914},{20,31,194},{9,30,146},{27,28,2273},{9,30,146},{21,31,3254},{21,31,3254},{21,31,3254},{20,31,1755},{20,31,2835},{18,31,606},{18,31,606},{16,28,117},{11,31,1665},{10,29,17},{24,31,505},{24,31,505},{24,31,505},{23,31,26},{31,16,1152},{20,31,194},{20,31,194},{11,29,2},{30,24,1152},
+{11,29,2},{31,28,145},{28,31,65},{28,31,1},{25,31,0},{31,28,145},{30,30,145},{25,31,0},{0,30,145},{30,30,145},{0,30,145},{20,0,1586},{20,0,1586},{20,0,1586},{20,0,1586},{17,31,180},{17,31,180},{17,31,180},{16,27,8},{9,30,10},{9,30,10},{24,31,5421},{23,31,3999},{22,31,3491},{21,31,1947},{23,31,5539},{20,31,1879},{19,31,853},{17,30,238},{16,31,3922},{11,30,81},{27,31,1229},
+{26,31,813},{26,31,617},{24,31,100},{31,23,1878},{23,31,822},{22,31,305},{12,30,66},{31,27,1878},{12,30,66},{22,31,3491},{22,31,3491},{22,31,3491},{21,31,1947},{21,31,3081},{19,31,853},{19,31,853},{17,29,117},{14,31,1905},{11,30,17},{26,31,617},{26,31,617},{26,31,617},{24,31,100},{31,19,1152},{22,31,305},{22,31,305},{12,30,2},{31,25,1152},{12,30,2},{31,29,45},{30,31,18},{29,31,4},
+{28,31,1},{31,29,45},{31,30,45},{28,31,1},{0,30,65},{31,30,45},{0,30,65},{21,0,1586},{21,0,1586},{21,0,1586},{21,0,1586},{18,31,261},{18,31,261},{18,31,261},{17,28,8},{10,31,10},{10,31,10},{24,31,5178},{24,31,4095},{23,31,3704},{22,31,2201},{24,31,5197},{21,31,2101},{20,31,1240},{18,31,153},{18,31,3760},{12,31,16},{28,31,1088},{27,31,830},{27,31,709},{26,31,245},{29,29,1536},
+{26,31,792},{24,31,461},{14,31,1},{31,28,1536},{14,31,1},{23,31,3704},{23,31,3704},{23,31,3704},{22,31,2201},{23,31,3396},{20,31,1240},{20,31,1240},{18,30,122},{15,31,2208},{12,31,16},{27,31,709},{27,31,709},{27,31,709},{26,31,245},{28,28,1152},{24,31,461},{24,31,461},{14,31,1},{28,28,1152},{14,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},
+{0,31,0},{31,31,0},{0,31,0},{22,0,1576},{22,0,1576},{22,0,1576},{22,0,1576},{20,31,340},{20,31,340},{20,31,340},{18,29,10},{12,31,16},{12,31,16},{25,31,4468},{24,31,3615},{24,31,3254},{23,31,2060},{24,31,4413},{22,31,1834},{21,31,1205},{19,31,58},{19,31,3127},{14,31,53},{28,31,768},{28,31,576},{28,31,512},{27,31,170},{31,26,1068},{26,31,536},{25,31,338},{16,31,1},{30,29,1068},
+{16,31,1},{24,31,3254},{24,31,3254},{24,31,3254},{23,31,2060},{23,31,2852},{21,31,1205},{21,31,1205},{19,31,58},{16,31,1878},{14,31,53},{28,31,512},{28,31,512},{28,31,512},{27,31,170},{31,24,802},{25,31,338},{25,31,338},{16,31,1},{30,28,802},{16,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{23,0,1576},
+{23,0,1576},{23,0,1576},{23,0,1576},{21,31,421},{21,31,421},{21,31,421},{19,30,10},{14,31,53},{14,31,53},{26,31,3858},{25,31,3188},{25,31,2899},{24,31,1947},{25,31,3700},{23,31,1596},{23,31,1112},{20,31,8},{20,31,2588},{16,31,117},{29,31,498},{28,31,384},{28,31,320},{27,31,122},{31,27,683},{27,31,342},{26,31,212},{19,31,1},{31,29,683},{19,31,1},{25,31,2899},{25,31,2899},{25,31,2899},
+{24,31,1947},{24,31,2441},{23,31,1112},{23,31,1112},{20,31,8},{18,31,1568},{16,31,117},{28,31,320},{28,31,320},{28,31,320},{27,31,122},{29,29,512},{26,31,212},{26,31,212},{19,31,1},{31,28,512},{19,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{24,0,1586},{24,0,1586},{24,0,1586},{24,0,1586},{22,31,520},
+{22,31,520},{22,31,520},{20,31,8},{16,31,117},{16,31,117},{27,31,3258},{26,31,2790},{26,31,2594},{25,31,1875},{26,31,3105},{24,31,1447},{23,31,1080},{21,31,20},{22,31,2151},{18,31,208},{29,31,306},{29,31,221},{29,31,185},{28,31,64},{31,28,384},{28,31,192},{27,31,125},{22,31,1},{30,30,384},{22,31,1},{26,31,2594},{26,31,2594},{26,31,2594},{25,31,1875},{25,31,2131},{23,31,1080},{23,31,1080},
+{21,31,20},{20,31,1336},{18,31,208},{29,31,185},{29,31,185},{29,31,185},{28,31,64},{31,27,290},{27,31,125},{27,31,125},{22,31,1},{31,29,290},{22,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{25,0,1586},{25,0,1586},{25,0,1586},{25,0,1586},{23,31,596},{23,31,596},{23,31,596},{21,31,20},{18,31,208},
+{18,31,208},{0,25,2669},{0,18,320},{0,13,5},{0,11,985},{0,17,5885},{0,11,3745},{0,10,1746},{0,7,4421},{0,8,6385},{0,7,4782},{0,25,2669},{0,18,320},{0,13,5},{0,11,985},{8,1,5885},{0,11,3745},{0,10,1746},{0,7,4421},{17,0,5885},{0,7,4421},{0,12,0},{0,12,0},{0,12,0},{0,6,1},{0,6,545},{0,5,205},{0,5,205},{0,3,337},{0,3,609},{0,3,401},{0,12,0},
+{0,12,0},{0,12,0},{0,6,1},{3,0,545},{0,5,205},{0,5,205},{0,3,337},{6,0,545},{0,3,337},{13,0,2669},{0,18,320},{0,13,5},{0,11,985},{13,0,2669},{25,0,2669},{0,11,985},{0,8,2689},{25,0,2669},{0,8,2689},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,28,2665},{0,20,212},{0,14,10},
+{0,12,850},{0,19,6669},{0,13,3974},{0,11,1837},{0,8,4865},{0,9,7266},{0,7,5310},{0,28,2665},{0,20,212},{0,14,10},{0,12,850},{9,1,6669},{0,13,3974},{0,11,1837},{0,8,4865},{19,0,6669},{0,8,4865},{0,15,0},{0,15,0},{0,15,0},{0,7,9},{0,7,845},{0,6,337},{0,6,337},{0,3,545},{0,3,945},{0,3,609},{0,15,0},{0,15,0},{0,15,0},{0,7,9},{3,2,841},
+{0,6,337},{0,6,337},{0,3,545},{6,1,841},{0,3,545},{12,4,2665},{0,20,212},{1,14,5},{0,12,850},{12,4,2665},{28,0,2665},{0,12,850},{0,9,2689},{28,0,2665},{0,9,2689},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,31,2665},{0,21,113},{0,15,65},{0,13,709},{0,21,7538},{0,14,4289},{0,12,1907},
+{0,9,5330},{0,10,8294},{0,8,5845},{0,31,2665},{0,21,113},{0,15,65},{0,13,709},{10,1,7538},{0,14,4289},{0,12,1907},{0,9,5330},{21,0,7538},{0,9,5330},{0,18,0},{0,18,0},{0,18,0},{0,9,0},{0,9,1201},{0,7,482},{0,7,482},{0,4,740},{0,4,1334},{0,4,861},{0,18,0},{0,18,0},{0,18,0},{0,9,0},{4,1,1201},{0,7,482},{0,7,482},{0,4,740},{9,0,1201},
+{0,4,740},{13,5,2665},{0,21,113},{2,15,5},{0,13,709},{13,5,2665},{31,0,2665},{0,13,709},{0,10,2689},{31,0,2665},{0,10,2689},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,31,2809},{0,23,52},{0,16,173},{0,14,586},{0,23,8494},{0,15,4610},{0,13,1950},{0,9,5826},{0,11,9409},{0,9,6402},{1,31,2753},
+{0,23,52},{1,16,132},{0,14,586},{11,1,8493},{0,15,4610},{0,13,1950},{0,9,5826},{19,2,8493},{0,9,5826},{0,21,0},{0,21,0},{0,21,0},{0,10,9},{0,10,1629},{0,9,640},{0,9,640},{0,5,985},{0,5,1798},{0,5,1154},{0,21,0},{0,21,0},{0,21,0},{0,10,9},{3,5,1625},{0,9,640},{0,9,640},{0,5,985},{7,2,1625},{0,5,985},{17,0,2665},{0,23,52},{2,16,13},
+{0,14,586},{17,0,2665},{30,2,2665},{0,14,586},{0,11,2689},{30,2,2665},{0,11,2689},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{1,31,3105},{0,25,8},{1,17,325},{0,15,512},{0,25,9669},{0,17,4961},{0,14,2120},{0,10,6421},{0,12,10774},{0,10,7150},{2,31,3033},{0,25,8},{1,17,261},{0,15,512},{12,1,9669},
+{0,17,4961},{0,14,2120},{0,10,6421},{25,0,9669},{0,10,6421},{0,24,0},{0,24,0},{0,24,0},{0,12,0},{0,12,2178},{0,9,865},{0,9,865},{0,6,1313},{0,5,2419},{0,5,1523},{0,24,0},{0,24,0},{0,24,0},{0,12,0},{4,4,2178},{0,9,865},{0,9,865},{0,6,1313},{12,0,2178},{0,6,1313},{19,0,2669},{0,25,8},{4,17,5},{0,15,512},{19,0,2669},{31,3,2669},{0,15,512},
+{0,12,2689},{31,3,2669},{0,12,2689},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{2,31,3465},{0,27,65},{1,19,434},{0,17,474},{0,28,9670},{0,18,4658},{0,16,1717},{0,11,6209},{0,13,10979},{0,11,7109},{3,31,3101},{1,26,8},{2,18,261},{0,17,474},{14,0,9669},{0,18,4658},{0,16,1717},{0,11,6209},{26,1,9669},
+{0,11,6209},{1,25,64},{1,25,64},{1,25,64},{1,13,64},{0,15,2178},{0,11,680},{0,11,680},{0,7,1189},{0,7,2557},{0,6,1457},{1,25,0},{1,25,0},{1,25,0},{1,13,0},{5,5,2178},{0,11,680},{0,11,680},{0,7,1189},{15,0,2178},{0,7,1189},{20,0,2665},{0,27,1},{5,18,5},{0,17,410},{20,0,2665},{24,8,2665},{0,17,410},{0,13,2689},{24,8,2665},{0,13,2689},{0,0,64},
+{0,0,64},{0,0,64},{0,0,64},{0,3,0},{0,3,0},{0,3,0},{0,2,4},{0,1,13},{0,1,13},{3,31,3917},{1,28,129},{2,20,645},{1,18,538},{0,31,9670},{0,20,4330},{0,17,1246},{0,12,5925},{0,14,11321},{0,12,7014},{4,31,3218},{2,27,8},{3,19,261},{1,18,474},{15,1,9669},{0,20,4330},{0,17,1246},{0,12,5925},{27,2,9669},{0,12,5925},{1,28,128},{1,28,128},{1,28,128},
+{1,15,132},{0,18,2178},{0,13,482},{0,13,482},{0,8,1040},{0,8,2712},{0,7,1470},{2,26,0},{2,26,0},{2,26,0},{2,14,0},{9,0,2178},{0,13,482},{0,13,482},{0,8,1040},{18,0,2178},{0,8,1040},{21,1,2665},{1,28,1},{6,19,5},{0,18,305},{21,1,2665},{27,8,2665},{0,18,305},{0,14,2689},{27,8,2665},{0,14,2689},{1,0,128},{1,0,128},{1,0,128},{1,0,128},{0,6,0},
+{0,6,0},{0,6,0},{0,3,0},{0,2,61},{0,2,61},{3,31,4541},{2,29,321},{2,21,904},{1,19,625},{1,31,9749},{0,21,4018},{0,18,914},{0,14,5633},{0,16,11661},{0,13,6859},{5,31,3374},{3,28,10},{4,20,254},{2,19,474},{17,0,9670},{0,21,4018},{0,18,914},{0,14,5633},{30,2,9670},{0,14,5633},{2,29,320},{2,29,320},{2,29,320},{1,16,320},{0,21,2178},{0,15,353},{0,15,353},
+{0,9,881},{0,9,2882},{0,9,1457},{3,27,0},{3,27,0},{3,27,0},{3,15,0},{10,1,2178},{0,15,353},{0,15,353},{0,9,881},{21,0,2178},{0,9,881},{23,0,2665},{2,29,1},{6,20,13},{0,19,245},{23,0,2665},{30,8,2665},{0,19,245},{0,15,2689},{30,8,2665},{0,15,2689},{1,0,320},{1,0,320},{1,0,320},{1,0,320},{0,9,0},{0,9,0},{0,9,0},{0,4,4},{0,4,125},
+{0,4,125},{4,31,5378},{2,31,570},{3,22,1267},{2,20,850},{2,31,10045},{0,23,3745},{0,19,642},{0,14,5354},{0,17,11993},{0,14,6798},{7,31,3553},{4,29,8},{5,21,261},{3,20,491},{18,1,9669},{0,23,3745},{0,19,642},{0,14,5354},{31,3,9669},{0,14,5354},{2,31,561},{2,31,561},{2,31,561},{2,17,546},{0,24,2178},{0,17,205},{0,17,205},{0,10,730},{0,11,3150},{0,9,1457},{4,28,0},
+{4,28,0},{4,28,0},{4,16,0},{12,0,2178},{0,17,205},{0,17,205},{0,10,730},{24,0,2178},{0,10,730},{25,0,2669},{3,30,1},{8,21,5},{0,20,170},{25,0,2669},{31,9,2669},{0,20,170},{0,16,2689},{31,9,2669},{0,16,2689},{2,0,545},{2,0,545},{2,0,545},{2,0,545},{0,12,0},{0,12,0},{0,12,0},{0,6,1},{0,5,205},{0,5,205},{5,31,6330},{3,31,905},{4,23,1718},
+{2,21,1099},{3,31,10453},{0,25,3478},{0,21,369},{0,16,5138},{0,18,12455},{0,15,6867},{8,31,3710},{5,30,8},{6,22,261},{4,21,474},{19,2,9669},{0,25,3478},{0,21,369},{0,16,5138},{30,5,9669},{0,16,5138},{3,31,901},{3,31,901},{3,31,901},{3,18,866},{0,27,2178},{0,19,130},{0,19,130},{0,11,650},{0,12,3395},{0,10,1523},{5,29,0},{5,29,0},{5,29,0},{5,17,0},{13,1,2178},
+{0,19,130},{0,19,130},{0,11,650},{27,0,2178},{0,11,650},{24,4,2665},{4,31,1},{9,22,5},{0,21,113},{24,4,2665},{28,12,2665},{0,21,113},{0,17,2689},{28,12,2665},{0,17,2689},{3,0,865},{3,0,865},{3,0,865},{3,0,865},{0,15,0},{0,15,0},{0,15,0},{0,7,9},{0,6,337},{0,6,337},{6,31,7446},{4,31,1458},{4,24,2174},{3,22,1419},{3,31,11045},{0,27,3314},{0,22,195},
+{0,17,4865},{0,20,12855},{0,16,6882},{9,31,3902},{6,31,8},{7,23,261},{5,22,474},{19,5,9669},{0,27,3314},{0,22,195},{0,17,4865},{31,6,9669},{0,17,4865},{3,31,1397},{3,31,1397},{3,31,1397},{3,19,1210},{0,30,2178},{0,20,61},{0,20,61},{0,12,545},{0,13,3645},{0,12,1634},{6,30,0},{6,30,0},{6,30,0},{6,18,0},{15,0,2178},{0,20,61},{0,20,61},{0,12,545},{30,0,2178},
+{0,12,545},{25,5,2665},{6,31,8},{10,23,5},{0,22,74},{25,5,2665},{31,12,2665},{0,22,74},{0,18,2689},{31,12,2665},{0,18,2689},{3,0,1201},{3,0,1201},{3,0,1201},{3,0,1201},{0,18,0},{0,18,0},{0,18,0},{0,9,0},{0,7,482},{0,7,482},{7,31,8578},{4,31,2146},{5,25,2766},{3,23,1835},{4,31,11766},{0,28,3125},{0,23,94},{0,18,4610},{0,21,13238},{0,17,6837},{10,31,4130},
+{7,31,49},{8,24,254},{6,23,474},{23,0,9670},{0,28,3125},{0,23,94},{0,18,4610},{30,8,9670},{0,18,4610},{4,31,1921},{4,31,1921},{4,31,1921},{3,21,1665},{0,31,2228},{0,22,18},{0,22,18},{0,14,425},{0,15,3987},{0,13,1677},{7,31,0},{7,31,0},{7,31,0},{7,19,0},{16,1,2178},{0,22,18},{0,22,18},{0,14,425},{31,1,2178},{0,14,425},{29,0,2665},{7,31,49},{10,24,13},
+{0,23,58},{29,0,2665},{30,14,2665},{0,23,58},{0,19,2689},{30,14,2665},{0,19,2689},{3,0,1665},{3,0,1665},{3,0,1665},{3,0,1665},{0,21,0},{0,21,0},{0,21,0},{0,10,9},{0,9,640},{0,9,640},{7,31,10135},{5,31,3190},{5,26,3543},{4,24,2348},{5,31,12846},{0,30,2961},{0,25,30},{0,19,4421},{0,23,13846},{0,18,6930},{11,31,4345},{8,31,178},{9,25,261},{7,24,491},{24,1,9669},
+{0,30,2961},{0,25,30},{0,19,4421},{31,9,9669},{0,19,4421},{5,31,2706},{5,31,2706},{5,31,2706},{4,22,2179},{1,31,2404},{0,24,0},{0,24,0},{0,15,337},{0,16,4356},{0,14,1833},{8,31,9},{8,31,9},{8,31,9},{8,20,0},{16,4,2178},{0,24,0},{0,24,0},{0,15,337},{28,4,2178},{0,15,337},{31,0,2669},{10,31,113},{12,25,5},{0,25,26},{31,0,2669},{31,15,2669},{0,25,26},
+{0,20,2689},{31,15,2669},{0,20,2689},{4,0,2178},{4,0,2178},{4,0,2178},{4,0,2178},{0,24,0},{0,24,0},{0,24,0},{0,12,0},{0,9,865},{0,9,865},{8,31,11466},{6,31,4187},{6,27,4166},{4,25,2818},{6,31,13898},{0,31,2871},{0,26,15},{0,20,4226},{0,24,14214},{0,19,7006},{12,31,4590},{10,31,321},{10,26,261},{8,25,474},{26,0,9669},{0,31,2870},{0,26,14},{0,20,4225},{30,11,9669},
+{0,20,4225},{6,31,3458},{6,31,3458},{6,31,3458},{5,23,2690},{2,31,2691},{0,26,14},{0,26,14},{0,16,261},{0,17,4595},{0,15,1953},{9,31,36},{9,31,36},{9,31,36},{9,21,0},{17,5,2178},{1,25,0},{1,25,0},{0,16,260},{31,4,2178},{0,16,260},{28,8,2665},{11,31,194},{13,26,5},{0,26,5},{28,8,2665},{24,20,2665},{0,26,5},{0,21,2689},{24,20,2665},{0,21,2689},{5,0,2689},
+{5,0,2689},{5,0,2689},{5,0,2689},{0,27,1},{0,27,1},{0,27,1},{0,14,5},{0,11,1037},{0,11,1037},{9,31,12054},{7,31,4578},{7,28,4118},{5,26,2818},{7,31,14214},{1,31,3042},{1,27,15},{0,21,4099},{0,25,13683},{0,21,6322},{14,31,4858},{11,31,514},{11,27,261},{9,26,474},{27,1,9669},{2,31,3014},{1,27,14},{0,21,4018},{27,14,9669},{0,21,4018},{7,31,3554},{7,31,3554},{7,31,3554},
+{6,24,2689},{3,31,2795},{1,27,14},{1,27,14},{1,17,261},{0,19,4269},{0,16,1517},{11,31,73},{11,31,73},{11,31,73},{10,22,0},{21,0,2178},{2,26,0},{2,26,0},{0,17,185},{30,6,2178},{0,17,185},{29,9,2665},{13,31,320},{14,27,5},{0,27,1},{29,9,2665},{27,20,2665},{0,27,1},{0,22,2689},{27,20,2665},{0,22,2689},{6,0,2689},{6,0,2689},{6,0,2689},{6,0,2689},{1,28,1},
+{1,28,1},{1,28,1},{1,15,5},{0,13,797},{0,13,797},{11,31,12506},{8,31,5075},{8,29,4166},{6,27,2818},{8,31,14651},{3,31,3255},{2,28,23},{1,22,4099},{0,27,13238},{0,21,5650},{15,31,5054},{12,31,782},{12,28,254},{10,27,474},{29,0,9670},{3,31,3206},{2,28,22},{0,22,3829},{30,14,9670},{0,22,3829},{8,31,3706},{8,31,3706},{8,31,3706},{7,25,2689},{4,31,2946},{2,28,19},{2,28,19},
+{2,18,261},{0,21,3906},{0,17,1106},{12,31,106},{12,31,106},{12,31,106},{11,23,0},{22,1,2178},{3,27,0},{3,27,0},{0,18,128},{31,7,2178},{0,18,128},{31,8,2665},{15,31,445},{14,28,13},{1,28,4},{31,8,2665},{30,20,2665},{1,28,4},{0,23,2689},{30,20,2665},{0,23,2689},{7,0,2689},{7,0,2689},{7,0,2689},{7,0,2689},{2,29,1},{2,29,1},{2,29,1},{2,15,10},{0,14,637},
+{0,14,637},{12,31,13094},{10,31,5782},{9,30,4166},{7,28,2838},{10,31,15213},{4,31,3618},{3,29,25},{2,23,4101},{0,29,12686},{0,23,5075},{16,31,5378},{14,31,1172},{13,29,261},{11,28,491},{30,1,9669},{6,31,3469},{3,29,21},{0,23,3706},{31,15,9669},{0,23,3706},{9,31,3890},{9,31,3890},{9,31,3890},{8,26,2690},{6,31,3157},{3,29,24},{3,29,24},{3,19,254},{0,23,3619},{0,19,782},{13,31,145},
+{13,31,145},{13,31,145},{12,24,0},{24,0,2178},{4,28,0},{4,28,0},{0,19,106},{24,12,2178},{0,19,106},{31,11,2669},{16,31,640},{16,29,5},{2,29,2},{31,11,2669},{31,21,2669},{2,29,2},{0,24,2689},{31,21,2669},{0,24,2689},{8,0,2689},{8,0,2689},{8,0,2689},{8,0,2689},{3,30,5},{3,30,5},{3,30,5},{3,17,8},{0,17,436},{0,17,436},{13,31,13718},{11,31,6325},{10,31,4166},
+{8,29,2818},{11,31,15565},{6,31,4094},{4,30,15},{3,24,4118},{0,31,12355},{0,24,4578},{17,31,5738},{15,31,1517},{14,30,261},{12,29,474},{31,2,9669},{7,31,3761},{4,30,14},{0,24,3554},{30,17,9669},{0,24,3554},{10,31,4085},{10,31,4085},{10,31,4085},{9,27,2690},{7,31,3285},{4,30,14},{4,30,14},{4,20,261},{0,24,3330},{0,20,514},{14,31,208},{14,31,208},{14,31,208},{13,25,0},{25,1,2178},
+{5,29,0},{5,29,0},{0,21,64},{27,12,2178},{0,21,64},{28,20,2665},{18,31,829},{17,30,5},{3,30,2},{28,20,2665},{28,24,2665},{3,30,2},{0,25,2689},{28,24,2665},{0,25,2689},{9,0,2689},{9,0,2689},{9,0,2689},{9,0,2689},{4,31,1},{4,31,1},{4,31,1},{4,18,5},{0,18,306},{0,18,306},{14,31,14378},{12,31,7006},{11,31,4226},{9,30,2818},{12,31,16054},{7,31,4578},{5,31,15},
+{4,25,4099},{0,31,12051},{0,25,4089},{19,31,5970},{16,31,1953},{15,31,261},{13,30,474},{31,5,9669},{10,31,4081},{5,31,14},{0,25,3413},{31,18,9669},{0,25,3413},{11,31,4225},{11,31,4225},{11,31,4225},{10,28,2689},{8,31,3476},{5,31,14},{5,31,14},{5,21,261},{0,26,3091},{0,21,289},{15,31,260},{15,31,260},{15,31,260},{14,26,0},{27,0,2178},{6,30,0},{6,30,0},{0,22,25},{30,12,2178},
+{0,22,25},{29,21,2665},{20,31,1037},{18,31,5},{4,31,1},{29,21,2665},{31,24,2665},{4,31,1},{0,26,2689},{31,24,2665},{0,26,2689},{10,0,2689},{10,0,2689},{10,0,2689},{10,0,2689},{5,31,10},{5,31,10},{5,31,10},{5,19,5},{0,20,194},{0,20,194},{15,31,13557},{13,31,7094},{12,31,4421},{10,31,2769},{13,31,15228},{8,31,4270},{6,31,46},{5,26,3476},{0,31,11020},{0,26,3108},{20,31,5400},
+{17,31,1931},{16,31,337},{15,30,320},{31,7,8712},{11,31,3630},{7,31,0},{0,26,2667},{31,19,8712},{0,26,2667},{12,31,4421},{12,31,4421},{12,31,4421},{11,29,2689},{9,31,3722},{6,31,46},{6,31,46},{6,22,261},{0,27,2882},{0,22,173},{16,31,337},{16,31,337},{16,31,337},{15,27,0},{28,1,2178},{7,31,0},{7,31,0},{0,23,9},{31,13,2178},{0,23,9},{31,19,2178},{22,31,881},{19,31,0},
+{7,31,0},{31,19,2178},{31,25,2178},{7,31,0},{0,27,2178},{31,25,2178},{0,27,2178},{11,0,2689},{11,0,2689},{11,0,2689},{11,0,2689},{6,31,37},{6,31,37},{6,31,37},{6,19,10},{0,22,109},{0,22,109},{16,31,12678},{14,31,7003},{13,31,4693},{12,31,2714},{15,31,14026},{10,31,3943},{8,31,94},{6,26,2766},{2,31,10074},{0,27,2146},{20,31,4698},{19,31,1746},{17,31,464},{16,30,164},{29,13,7578},
+{12,31,3090},{9,31,20},{0,27,1921},{31,20,7578},{0,27,1921},{13,31,4693},{13,31,4693},{13,31,4693},{12,30,2690},{11,31,3939},{8,31,94},{8,31,94},{7,23,254},{0,29,2650},{0,24,49},{17,31,464},{17,31,464},{17,31,464},{16,28,0},{28,4,2178},{9,31,20},{9,31,20},{0,24,0},{28,16,2178},{0,24,0},{31,21,1625},{23,31,653},{21,31,4},{10,31,1},{31,21,1625},{31,26,1625},{10,31,1},
+{0,27,1665},{31,26,1625},{0,27,1665},{12,0,2689},{12,0,2689},{12,0,2689},{12,0,2689},{8,31,58},{8,31,58},{8,31,58},{7,21,8},{0,24,49},{0,24,49},{17,31,12042},{15,31,6882},{15,31,4946},{13,31,2690},{16,31,13127},{11,31,3615},{9,31,229},{7,27,2174},{3,31,9313},{0,27,1458},{21,31,4150},{19,31,1634},{19,31,545},{17,30,89},{31,11,6661},{14,31,2654},{11,31,61},{0,28,1397},{31,21,6661},
+{0,28,1397},{15,31,4946},{15,31,4946},{15,31,4946},{13,31,2690},{12,31,4170},{9,31,229},{9,31,229},{8,24,261},{0,31,2520},{0,25,10},{19,31,545},{19,31,545},{19,31,545},{17,29,0},{29,5,2178},{11,31,61},{11,31,61},{1,25,0},{31,16,2178},{1,25,0},{31,22,1201},{24,31,482},{22,31,1},{13,31,1},{31,22,1201},{30,27,1201},{13,31,1},{0,28,1201},{30,27,1201},{0,28,1201},{13,0,2689},
+{13,0,2689},{13,0,2689},{13,0,2689},{9,31,85},{9,31,85},{9,31,85},{8,22,5},{0,25,10},{0,25,10},{18,31,11474},{16,31,6867},{15,31,5138},{14,31,2725},{16,31,12279},{12,31,3410},{10,31,419},{8,27,1718},{4,31,8678},{0,28,905},{23,31,3626},{20,31,1550},{20,31,650},{18,31,25},{29,17,5829},{15,31,2306},{12,31,130},{0,28,901},{27,24,5829},{0,28,901},{15,31,5138},{15,31,5138},{15,31,5138},
+{14,31,2725},{13,31,4452},{10,31,419},{10,31,419},{9,25,261},{0,31,2520},{1,26,10},{20,31,650},{20,31,650},{20,31,650},{18,30,0},{31,4,2178},{12,31,130},{12,31,130},{2,26,0},{30,18,2178},{2,26,0},{31,23,845},{25,31,353},{23,31,9},{16,31,0},{31,23,845},{31,27,845},{16,31,0},{0,28,865},{31,27,845},{0,28,865},{14,0,2689},{14,0,2689},{14,0,2689},{14,0,2689},{10,31,130},
+{10,31,130},{10,31,130},{9,23,5},{0,27,1},{0,27,1},{19,31,10774},{17,31,6962},{16,31,5378},{15,31,2810},{17,31,11598},{12,31,3346},{12,31,642},{9,28,1222},{6,31,8113},{0,29,570},{23,31,3146},{22,31,1539},{21,31,773},{19,31,0},{31,15,5082},{16,31,2034},{14,31,221},{0,29,554},{31,23,5082},{0,29,554},{16,31,5378},{16,31,5378},{16,31,5378},{15,31,2810},{15,31,4746},{12,31,642},{12,31,642},
+{10,26,261},{2,31,2714},{2,27,10},{21,31,773},{21,31,773},{21,31,773},{19,31,0},{31,7,2178},{14,31,221},{14,31,221},{3,27,0},{31,19,2178},{3,27,0},{31,25,545},{26,31,221},{25,31,0},{19,31,0},{31,25,545},{31,28,545},{19,31,0},{0,29,545},{31,28,545},{0,29,545},{15,0,2689},{15,0,2689},{15,0,2689},{15,0,2689},{11,31,170},{11,31,170},{11,31,170},{10,23,10},{1,28,1},
+{1,28,1},{20,31,10225},{18,31,7025},{18,31,5729},{16,31,2978},{19,31,10792},{14,31,3283},{13,31,982},{10,29,861},{8,31,7588},{2,29,325},{24,31,2729},{23,31,1460},{22,31,932},{20,31,36},{29,21,4344},{19,31,1746},{16,31,353},{2,29,321},{31,24,4344},{2,29,321},{18,31,5729},{18,31,5729},{18,31,5729},{16,31,2978},{16,31,5028},{13,31,982},{13,31,982},{11,27,254},{4,31,2981},{3,28,10},{22,31,932},
+{22,31,932},{22,31,932},{20,31,36},{28,16,2178},{16,31,353},{16,31,353},{4,28,0},{24,24,2178},{4,28,0},{31,27,290},{27,31,125},{27,31,4},{22,31,1},{31,27,290},{31,29,290},{22,31,1},{0,29,320},{31,29,290},{0,29,320},{16,0,2689},{16,0,2689},{16,0,2689},{16,0,2689},{12,31,245},{12,31,245},{12,31,245},{11,25,8},{2,29,5},{2,29,5},{20,31,9825},{19,31,7014},{19,31,5925},
+{17,31,3218},{20,31,10245},{15,31,3285},{14,31,1330},{11,29,612},{10,31,7225},{3,30,133},{25,31,2467},{24,31,1470},{23,31,1040},{21,31,144},{31,19,3779},{20,31,1580},{18,31,500},{3,30,129},{31,25,3779},{3,30,129},{19,31,5925},{19,31,5925},{19,31,5925},{17,31,3218},{17,31,5346},{14,31,1330},{14,31,1330},{12,28,261},{6,31,3267},{4,29,10},{23,31,1040},{23,31,1040},{23,31,1040},{21,31,144},{29,17,2178},
+{18,31,500},{18,31,500},{5,29,0},{27,24,2178},{5,29,0},{31,28,128},{29,31,61},{28,31,0},{25,31,1},{31,28,128},{30,30,128},{25,31,1},{0,30,128},{30,30,128},{0,30,128},{17,0,2689},{17,0,2689},{17,0,2689},{17,0,2689},{13,31,338},{13,31,338},{13,31,338},{12,26,5},{3,30,5},{3,30,5},{21,31,9523},{20,31,7109},{20,31,6209},{18,31,3473},{20,31,9749},{16,31,3410},{15,31,1717},
+{12,30,401},{11,31,6964},{4,31,65},{26,31,2273},{25,31,1539},{24,31,1189},{23,31,260},{29,25,3299},{22,31,1490},{20,31,680},{6,30,64},{27,28,3299},{6,30,64},{20,31,6209},{20,31,6209},{20,31,6209},{18,31,3473},{18,31,5700},{15,31,1717},{15,31,1717},{13,29,261},{8,31,3587},{5,30,10},{24,31,1189},{24,31,1189},{24,31,1189},{23,31,260},{31,16,2178},{20,31,680},{20,31,680},{6,30,0},{30,24,2178},
+{6,30,0},{31,30,34},{30,31,13},{30,31,4},{28,31,0},{31,30,34},{30,31,34},{28,31,0},{0,30,64},{30,31,34},{0,30,64},{18,0,2689},{18,0,2689},{18,0,2689},{18,0,2689},{15,31,421},{15,31,421},{15,31,421},{13,27,5},{4,31,1},{4,31,1},{23,31,9201},{21,31,7314},{21,31,6530},{19,31,3778},{21,31,9420},{18,31,3652},{16,31,2193},{14,30,325},{14,31,6804},{6,31,10},{27,31,2057},
+{26,31,1605},{25,31,1378},{24,31,442},{31,23,2904},{23,31,1452},{22,31,881},{7,31,0},{31,27,2904},{7,31,0},{21,31,6530},{21,31,6530},{21,31,6530},{19,31,3778},{19,31,5956},{16,31,2193},{16,31,2193},{14,30,261},{10,31,3957},{6,31,10},{25,31,1378},{25,31,1378},{25,31,1378},{24,31,442},{31,19,2178},{22,31,881},{22,31,881},{7,31,0},{31,25,2178},{7,31,0},{31,31,0},{31,31,0},{31,31,0},
+{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{19,0,2689},{19,0,2689},{19,0,2689},{19,0,2689},{16,31,512},{16,31,512},{16,31,512},{14,27,10},{6,31,10},{6,31,10},{23,31,8049},{22,31,6550},{22,31,5925},{20,31,3589},{23,31,8137},{19,31,3220},{18,31,2050},{15,31,173},{15,31,5805},{8,31,52},{27,31,1544},{27,31,1181},{26,31,1040},{25,31,353},{31,24,2166},
+{24,31,1083},{23,31,653},{10,31,1},{30,28,2166},{10,31,1},{22,31,5925},{22,31,5925},{22,31,5925},{20,31,3589},{20,31,5209},{18,31,2050},{18,31,2050},{15,30,117},{12,31,3405},{8,31,52},{26,31,1040},{26,31,1040},{26,31,1040},{25,31,353},{31,21,1625},{23,31,653},{23,31,653},{10,31,1},{31,26,1625},{10,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},
+{0,31,0},{31,31,0},{0,31,0},{20,0,2689},{20,0,2689},{20,0,2689},{20,0,2689},{17,31,625},{17,31,625},{17,31,625},{15,29,8},{8,31,52},{8,31,52},{24,31,7177},{23,31,5845},{23,31,5361},{21,31,3473},{23,31,7033},{20,31,2945},{19,31,1907},{16,31,65},{16,31,5026},{10,31,117},{28,31,1137},{27,31,861},{27,31,740},{26,31,260},{29,29,1601},{26,31,833},{24,31,482},{13,31,1},{31,28,1601},
+{13,31,1},{23,31,5361},{23,31,5361},{23,31,5361},{21,31,3473},{21,31,4661},{19,31,1907},{19,31,1907},{16,31,65},{14,31,2997},{10,31,117},{27,31,740},{27,31,740},{27,31,740},{26,31,260},{31,22,1201},{24,31,482},{24,31,482},{13,31,1},{30,27,1201},{13,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{21,0,2689},
+{21,0,2689},{21,0,2689},{21,0,2689},{18,31,754},{18,31,754},{18,31,754},{16,30,5},{10,31,117},{10,31,117},{24,31,6393},{24,31,5310},{23,31,4865},{22,31,3314},{24,31,6146},{20,31,2737},{20,31,1837},{17,31,5},{18,31,4381},{11,31,212},{28,31,801},{28,31,609},{28,31,545},{27,31,185},{31,26,1121},{26,31,561},{25,31,353},{16,31,0},{30,29,1121},{16,31,0},{23,31,4865},{23,31,4865},{23,31,4865},
+{22,31,3314},{22,31,4181},{20,31,1837},{20,31,1837},{17,31,5},{15,31,2621},{11,31,212},{28,31,545},{28,31,545},{28,31,545},{27,31,185},{31,23,845},{25,31,353},{25,31,353},{16,31,0},{31,27,845},{16,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{22,0,2689},{22,0,2689},{22,0,2689},{22,0,2689},{19,31,850},
+{19,31,850},{19,31,850},{17,31,5},{11,31,212},{11,31,212},{25,31,5683},{24,31,4782},{24,31,4421},{23,31,3173},{24,31,5314},{22,31,2563},{21,31,1844},{18,31,10},{18,31,3757},{13,31,338},{29,31,531},{28,31,401},{28,31,337},{27,31,121},{31,27,726},{27,31,363},{26,31,221},{19,31,0},{31,29,726},{19,31,0},{24,31,4421},{24,31,4421},{24,31,4421},{23,31,3173},{23,31,3657},{21,31,1844},{21,31,1844},
+{18,31,10},{16,31,2321},{13,31,338},{28,31,337},{28,31,337},{28,31,337},{27,31,121},{31,25,545},{26,31,221},{26,31,221},{19,31,0},{31,28,545},{19,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{23,0,2689},{23,0,2689},{23,0,2689},{23,0,2689},{20,31,985},{20,31,985},{20,31,985},{18,31,10},{13,31,338},
+{13,31,338},{2,31,10560},{0,30,1586},{0,22,173},{0,18,3826},{0,28,18065},{0,19,12194},{0,17,6081},{0,11,14098},{0,13,19386},{0,11,14998},{3,31,10216},{0,30,1586},{0,22,173},{0,18,3826},{12,4,18065},{0,19,12194},{0,17,6081},{0,11,14098},{28,0,18065},{0,11,14098},{0,17,1},{0,17,1},{0,17,1},{0,9,4},{0,9,1105},{0,7,442},{0,7,442},{0,4,680},{0,4,1230},{0,4,801},{0,17,1},
+{0,17,1},{0,17,1},{0,9,4},{4,1,1105},{0,7,442},{0,7,442},{0,4,680},{9,0,1105},{0,4,680},{21,0,9248},{0,30,1586},{0,22,173},{0,18,3826},{21,0,9248},{30,6,9248},{0,18,3826},{0,14,9248},{30,6,9248},{0,14,9248},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{2,31,11328},{0,31,1341},{0,23,68},
+{0,19,3626},{0,30,19334},{0,20,12611},{0,18,6099},{0,12,14756},{0,14,20886},{0,12,15845},{3,31,10792},{0,31,1341},{0,23,68},{0,19,3626},{15,0,19334},{0,20,12611},{0,18,6099},{0,12,14756},{30,0,19334},{0,12,14756},{0,20,0},{0,20,0},{0,20,0},{0,10,1},{0,10,1513},{0,8,605},{0,8,605},{0,5,925},{0,5,1682},{0,4,1089},{0,20,0},{0,20,0},{0,20,0},{0,10,1},{5,0,1513},
+{0,8,605},{0,8,605},{0,5,925},{10,0,1513},{0,5,925},{22,1,9248},{0,31,1341},{0,23,68},{0,19,3626},{22,1,9248},{31,7,9248},{0,19,3626},{0,15,9248},{31,7,9248},{0,15,9248},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{3,31,12200},{0,31,1325},{0,24,10},{0,20,3365},{0,31,20705},{0,21,13009},{0,18,6227},
+{0,13,15441},{0,15,22455},{0,13,16666},{4,31,11489},{0,31,1325},{0,24,10},{0,20,3365},{15,2,20689},{0,21,13009},{0,18,6227},{0,13,15441},{30,1,20689},{0,13,15441},{0,23,0},{0,23,0},{0,23,0},{0,11,9},{0,11,1989},{0,9,772},{0,9,772},{0,5,1213},{0,5,2194},{0,5,1382},{0,23,0},{0,23,0},{0,23,0},{0,11,9},{6,0,1985},{0,9,772},{0,9,772},{0,5,1213},{10,1,1985},
+{0,5,1213},{23,2,9248},{0,31,1325},{0,24,10},{0,20,3365},{23,2,9248},{30,9,9248},{0,20,3365},{0,16,9250},{30,9,9248},{0,16,9250},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{3,31,13288},{0,31,1565},{0,25,10},{0,22,3077},{1,31,22214},{0,23,13555},{0,20,6310},{0,14,16176},{0,16,24130},{0,13,17562},{4,31,12385},
+{0,31,1565},{0,25,10},{0,22,3077},{17,0,22129},{0,23,13555},{0,20,6310},{0,14,16176},{30,2,22129},{0,14,16176},{0,26,0},{0,26,0},{0,26,0},{0,13,0},{0,13,2521},{0,10,1018},{0,10,1018},{0,6,1508},{0,6,2801},{0,6,1764},{0,26,0},{0,26,0},{0,26,0},{0,13,0},{6,1,2521},{0,10,1018},{0,10,1018},{0,6,1508},{13,0,2521},{0,6,1508},{23,5,9248},{0,31,1565},{0,25,10},
+{0,22,3077},{23,5,9248},{31,10,9248},{0,22,3077},{0,17,9250},{31,10,9248},{0,17,9250},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{4,31,14788},{0,31,2141},{0,27,72},{0,22,2825},{1,31,24140},{0,25,14114},{0,21,6323},{0,14,17202},{0,17,26063},{0,14,18646},{5,31,13521},{1,31,2100},{0,27,72},{0,22,2825},{16,4,23851},
+{0,25,14114},{0,21,6323},{0,14,17202},{28,4,23851},{0,14,17202},{0,29,1},{0,29,1},{0,29,1},{0,15,4},{0,15,3202},{0,12,1285},{0,12,1285},{0,7,1973},{0,7,3569},{0,6,2241},{0,29,1},{0,29,1},{0,29,1},{0,15,4},{7,1,3200},{0,12,1285},{0,12,1285},{0,7,1973},{11,2,3200},{0,7,1973},{27,0,9248},{3,31,1885},{2,26,8},{0,22,2825},{27,0,9248},{30,12,9248},{0,22,2825},
+{0,18,9248},{30,12,9248},{0,18,9248},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{4,31,16228},{1,31,2836},{0,28,170},{0,24,2612},{2,31,25971},{0,25,14594},{0,22,6473},{0,16,18144},{0,17,27951},{0,14,19830},{7,31,14675},{2,31,2706},{1,27,149},{0,24,2612},{19,0,25472},{0,25,14594},{0,22,6473},{0,16,18144},{30,4,25472},
+{0,16,18144},{0,31,9},{0,31,9},{0,31,9},{0,16,0},{0,16,3872},{0,13,1514},{0,13,1514},{0,7,2405},{0,7,4305},{0,7,2766},{0,31,9},{0,31,9},{0,31,9},{0,16,0},{8,0,3872},{0,13,1514},{0,13,1514},{0,7,2405},{16,0,3872},{0,7,2405},{28,1,9248},{4,31,2210},{3,27,8},{0,24,2612},{28,1,9248},{31,13,9248},{0,24,2612},{0,19,9248},{31,13,9248},{0,19,9248},{0,0,0},
+{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{4,31,17796},{2,31,3702},{1,29,270},{0,25,2361},{3,31,27563},{0,27,14944},{0,23,6470},{0,17,18681},{0,19,29556},{0,16,20628},{7,31,15635},{3,31,3425},{1,29,206},{0,25,2361},{20,0,26744},{0,27,14944},{0,23,6470},{0,17,18681},{24,8,26744},{0,17,18681},{0,31,125},{0,31,125},{0,31,125},
+{0,18,8},{0,18,4418},{0,14,1696},{0,14,1696},{0,8,2664},{0,8,4952},{0,8,3148},{1,31,72},{1,31,72},{1,31,72},{0,18,8},{9,0,4418},{0,14,1696},{0,14,1696},{0,8,2664},{18,0,4418},{0,8,2664},{30,0,9248},{6,31,2557},{3,28,10},{0,25,2357},{30,0,9248},{30,15,9248},{0,25,2357},{0,20,9250},{30,15,9248},{0,20,9250},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,1,0},
+{0,1,0},{0,1,0},{0,0,4},{0,0,4},{0,0,4},{5,31,19090},{2,31,4598},{1,30,406},{0,26,2220},{3,31,28187},{0,29,14347},{0,25,5609},{0,18,18144},{0,20,29961},{0,17,20301},{8,31,16082},{4,31,4019},{2,30,206},{0,26,2220},{21,1,26744},{0,29,14347},{0,25,5609},{0,18,18144},{27,8,26744},{0,18,18144},{1,31,264},{1,31,264},{1,31,264},{1,19,72},{0,21,4418},{0,16,1412},{0,16,1412},
+{0,10,2420},{0,9,5122},{0,9,2997},{2,31,117},{2,31,117},{2,31,117},{1,19,8},{10,1,4418},{0,16,1412},{0,16,1412},{0,10,2420},{21,0,4418},{0,10,2420},{31,1,9248},{8,31,2929},{4,29,10},{0,26,2120},{31,1,9248},{27,18,9248},{0,26,2120},{0,21,9250},{27,18,9248},{0,21,9250},{1,0,68},{1,0,68},{1,0,68},{1,0,68},{0,4,1},{0,4,1},{0,4,1},{0,2,0},{0,2,25},
+{0,2,25},{7,31,20669},{3,31,5786},{2,31,625},{1,27,2283},{4,31,29033},{0,30,13795},{0,26,4770},{0,18,17667},{0,21,30425},{0,18,20068},{9,31,16691},{5,31,4841},{3,31,205},{1,27,2219},{23,0,26747},{0,30,13795},{0,26,4770},{0,18,17667},{30,8,26747},{0,18,17667},{2,31,589},{2,31,589},{2,31,589},{1,20,186},{0,24,4418},{0,18,1125},{0,18,1125},{0,10,2186},{0,11,5390},{0,10,2915},{3,31,169},
+{3,31,169},{3,31,169},{2,20,13},{12,0,4418},{0,18,1125},{0,18,1125},{0,10,2186},{24,0,4418},{0,10,2186},{31,4,9248},{10,31,3380},{6,30,8},{0,27,1954},{31,4,9248},{30,18,9248},{0,27,1954},{0,22,9248},{30,18,9248},{0,22,9248},{1,0,185},{1,0,185},{1,0,185},{1,0,185},{0,7,0},{0,7,0},{0,7,0},{0,3,9},{0,3,73},{0,3,73},{7,31,21577},{4,31,6797},{3,31,985},
+{1,28,2226},{5,31,29436},{0,31,12990},{0,27,4027},{0,20,16925},{0,23,30436},{0,18,19620},{11,31,16645},{7,31,5260},{4,31,232},{2,28,2141},{24,1,26259},{0,31,12990},{0,27,4027},{0,20,16925},{31,9,26259},{0,20,16925},{3,31,985},{3,31,985},{3,31,985},{2,21,378},{0,27,4418},{0,20,905},{0,20,905},{0,12,2005},{0,12,5635},{0,11,2950},{4,31,232},{4,31,232},{4,31,232},{3,21,13},{13,1,4418},
+{0,20,905},{0,20,905},{0,12,2005},{27,0,4418},{0,12,2005},{31,7,8980},{11,31,3601},{7,31,4},{0,28,1665},{31,7,8980},{31,19,8980},{0,28,1665},{0,23,8980},{31,19,8980},{0,23,8980},{2,0,377},{2,0,377},{2,0,377},{2,0,377},{0,10,0},{0,10,0},{0,10,0},{0,5,0},{0,4,146},{0,4,146},{7,31,21193},{4,31,7053},{3,31,1481},{2,28,2093},{6,31,28313},{0,31,11406},{0,28,2965},
+{0,20,15021},{0,23,28900},{0,19,17995},{11,31,15381},{7,31,4876},{5,31,325},{3,28,1786},{25,1,24371},{0,31,11406},{0,28,2965},{0,20,15021},{27,12,24371},{0,20,15021},{3,31,1481},{3,31,1481},{3,31,1481},{2,23,621},{0,30,4418},{0,21,680},{0,21,680},{0,13,1780},{0,13,5885},{0,12,2950},{5,31,325},{5,31,325},{5,31,325},{4,22,8},{15,0,4418},{0,21,680},{0,21,680},{0,13,1780},{30,0,4418},
+{0,13,1780},{31,8,7938},{12,31,3188},{8,31,0},{0,29,1156},{31,8,7938},{30,20,7938},{0,29,1156},{0,23,7956},{30,20,7938},{0,23,7956},{2,0,617},{2,0,617},{2,0,617},{2,0,617},{0,13,0},{0,13,0},{0,13,0},{0,6,9},{0,5,233},{0,5,233},{8,31,20825},{5,31,7494},{4,31,2089},{2,29,1970},{7,31,27269},{0,31,10078},{0,28,2021},{0,21,13204},{0,25,27384},{0,20,16398},{12,31,14148},
+{8,31,4562},{7,31,392},{4,29,1445},{26,1,22568},{0,31,10078},{0,28,2021},{0,21,13204},{31,11,22568},{0,21,13204},{4,31,2089},{4,31,2089},{4,31,2089},{3,23,946},{0,31,4468},{0,23,521},{0,23,521},{0,14,1573},{0,15,6227},{0,13,2909},{7,31,392},{7,31,392},{7,31,392},{5,23,8},{16,1,4418},{0,23,521},{0,23,521},{0,14,1573},{31,1,4418},{0,14,1573},{31,10,6964},{14,31,2785},{10,31,4},
+{0,29,740},{31,10,6964},{31,20,6964},{0,29,740},{0,24,6970},{31,20,6964},{0,24,6970},{3,0,937},{3,0,937},{3,0,937},{3,0,937},{0,16,1},{0,16,1},{0,16,1},{0,8,1},{0,6,377},{0,6,377},{8,31,20717},{6,31,8012},{5,31,2958},{3,29,2057},{7,31,26243},{0,31,8890},{0,29,1154},{0,22,11325},{0,25,25854},{0,21,14671},{13,31,12926},{10,31,4313},{8,31,485},{5,29,1106},{25,5,20642},
+{0,31,8890},{0,29,1154},{0,22,11325},{31,12,20642},{0,22,11325},{5,31,2958},{5,31,2958},{5,31,2958},{3,25,1361},{1,31,4644},{0,25,337},{0,25,337},{0,15,1429},{0,16,6596},{0,14,2981},{8,31,485},{8,31,485},{8,31,485},{6,24,13},{16,4,4418},{0,25,337},{0,25,337},{0,15,1429},{28,4,4418},{0,15,1429},{31,11,5941},{15,31,2377},{11,31,1},{0,30,388},{31,11,5941},{31,21,5941},{0,30,388},
+{0,24,5953},{31,21,5941},{0,24,5953},{3,0,1360},{3,0,1360},{3,0,1360},{3,0,1360},{0,19,0},{0,19,0},{0,19,0},{0,10,4},{0,8,548},{0,8,548},{9,31,20713},{7,31,8575},{5,31,3806},{3,30,2260},{7,31,25603},{0,31,8106},{0,29,642},{0,22,9805},{0,26,24678},{0,21,13359},{15,31,11882},{11,31,4006},{9,31,596},{6,29,818},{28,1,19021},{2,31,8066},{0,29,642},{0,22,9805},{31,13,19021},
+{0,22,9805},{5,31,3806},{5,31,3806},{5,31,3806},{4,26,1819},{2,31,4962},{0,27,232},{0,27,232},{0,16,1268},{0,17,6926},{0,15,3126},{9,31,596},{9,31,596},{9,31,596},{7,25,13},{17,5,4418},{0,27,232},{0,27,232},{0,16,1268},{31,4,4418},{0,16,1268},{31,13,5101},{16,31,2042},{13,31,4},{0,30,164},{31,13,5101},{31,22,5101},{0,30,164},{0,25,5105},{31,22,5101},{0,25,5105},{4,0,1818},
+{4,0,1818},{4,0,1818},{4,0,1818},{0,22,0},{0,22,0},{0,22,0},{0,11,0},{0,9,697},{0,9,697},{10,31,20905},{7,31,9247},{6,31,4787},{4,30,2547},{8,31,25042},{1,31,7537},{0,30,264},{0,23,8449},{0,27,23521},{0,22,12141},{15,31,10794},{12,31,3786},{10,31,725},{8,29,621},{29,1,17485},{3,31,7274},{0,30,264},{0,23,8449},{27,16,17485},{0,23,8449},{6,31,4787},{6,31,4787},{6,31,4787},
+{4,27,2323},{3,31,5386},{0,29,130},{0,29,130},{0,18,1096},{0,19,7364},{0,17,3225},{10,31,725},{10,31,725},{10,31,725},{8,26,8},{21,0,4418},{0,29,130},{0,29,130},{0,18,1096},{30,6,4418},{0,18,1096},{31,14,4325},{18,31,1737},{14,31,1},{0,31,64},{31,14,4325},{30,23,4325},{0,31,64},{0,25,4337},{30,23,4325},{0,25,4337},{4,0,2314},{4,0,2314},{4,0,2314},{4,0,2314},{0,25,0},
+{0,25,0},{0,25,0},{0,12,4},{0,10,925},{0,10,925},{11,31,21021},{8,31,10106},{7,31,5819},{4,30,3027},{8,31,24722},{2,31,7042},{0,31,81},{0,24,7169},{0,29,22467},{0,22,11133},{16,31,9869},{12,31,3594},{11,31,821},{8,30,420},{30,1,16034},{4,31,6558},{0,31,81},{0,24,7169},{31,15,16034},{0,24,7169},{7,31,5819},{7,31,5819},{7,31,5819},{5,28,2915},{3,31,5962},{0,30,72},{0,30,72},
+{0,18,968},{0,20,7781},{0,17,3305},{11,31,821},{11,31,821},{11,31,821},{9,27,8},{22,1,4418},{0,30,72},{0,30,72},{0,18,968},{31,7,4418},{0,18,968},{31,15,3617},{19,31,1450},{15,31,9},{0,31,0},{31,15,3617},{31,23,3617},{0,31,0},{0,26,3617},{31,23,3617},{0,26,3617},{5,0,2906},{5,0,2906},{5,0,2906},{5,0,2906},{0,28,1},{0,28,1},{0,28,1},{0,14,0},{0,11,1156},
+{0,11,1156},{11,31,21381},{8,31,11186},{7,31,7169},{5,31,3633},{9,31,24543},{3,31,6762},{0,31,81},{0,24,5819},{0,29,21333},{0,23,10106},{17,31,8955},{14,31,3433},{12,31,980},{10,30,242},{29,5,14504},{6,31,5834},{1,31,74},{0,24,5819},{31,16,14504},{0,24,5819},{7,31,7169},{7,31,7169},{7,31,7169},{5,30,3618},{4,31,6757},{0,31,81},{0,31,81},{0,20,821},{0,21,8245},{0,18,3531},{12,31,980},
+{12,31,980},{12,31,980},{10,28,13},{24,0,4418},{1,31,74},{1,31,74},{0,20,821},{24,12,4418},{0,20,821},{29,21,2888},{20,31,1156},{17,31,1},{3,31,1},{29,21,2888},{31,24,2888},{3,31,1},{0,26,2906},{31,24,2888},{0,26,2906},{5,0,3617},{5,0,3617},{5,0,3617},{5,0,3617},{0,31,0},{0,31,0},{0,31,0},{0,15,9},{0,13,1421},{0,13,1421},{12,31,21949},{9,31,12367},{8,31,8449},
+{6,31,4338},{10,31,24600},{3,31,6650},{1,31,298},{0,25,4698},{0,30,20575},{0,24,9247},{17,31,8219},{15,31,3236},{13,31,1157},{11,30,122},{31,3,13235},{7,31,5260},{3,31,145},{0,25,4698},{31,17,13235},{0,25,4698},{8,31,8449},{8,31,8449},{8,31,8449},{6,31,4338},{5,31,7667},{1,31,298},{1,31,298},{0,21,680},{0,23,8779},{0,19,3786},{13,31,1157},{13,31,1157},{13,31,1157},{11,29,13},{25,1,4418},
+{3,31,145},{3,31,145},{0,21,680},{27,12,4418},{0,21,680},{31,19,2314},{22,31,949},{19,31,4},{6,31,1},{31,19,2314},{31,25,2314},{6,31,1},{0,27,2314},{31,25,2314},{0,27,2314},{6,0,4337},{6,0,4337},{6,0,4337},{6,0,4337},{0,31,64},{0,31,64},{0,31,64},{0,17,0},{0,13,1709},{0,13,1709},{12,31,22557},{10,31,13585},{9,31,9926},{6,31,5186},{11,31,24636},{3,31,6794},{2,31,692},
+{0,26,3723},{0,31,19836},{0,25,8442},{19,31,7417},{16,31,3126},{15,31,1268},{12,31,68},{29,9,12051},{8,31,4762},{4,31,232},{0,26,3723},{27,20,12051},{0,26,3723},{9,31,9926},{9,31,9926},{9,31,9926},{6,31,5186},{5,31,8691},{2,31,692},{2,31,692},{0,22,557},{0,25,9284},{0,21,3929},{15,31,1268},{15,31,1268},{15,31,1268},{12,30,8},{27,0,4418},{4,31,232},{4,31,232},{0,22,557},{30,12,4418},
+{0,22,557},{31,20,1800},{22,31,725},{20,31,0},{9,31,1},{31,20,1800},{30,26,1800},{9,31,1},{0,27,1818},{30,26,1800},{0,27,1818},{6,0,5105},{6,0,5105},{6,0,5105},{6,0,5105},{1,31,185},{1,31,185},{1,31,185},{0,18,9},{0,15,2042},{0,15,2042},{12,31,23421},{11,31,14850},{9,31,11462},{7,31,6149},{11,31,24860},{4,31,7053},{2,31,1236},{0,26,2891},{0,31,19260},{0,25,7818},{19,31,6761},
+{17,31,3107},{16,31,1429},{13,31,8},{31,7,10952},{10,31,4300},{6,31,353},{0,26,2891},{31,19,10952},{0,26,2891},{9,31,11462},{9,31,11462},{9,31,11462},{7,31,6149},{7,31,9845},{2,31,1236},{2,31,1236},{0,23,485},{0,25,9764},{0,21,4185},{16,31,1429},{16,31,1429},{16,31,1429},{13,31,8},{28,1,4418},{6,31,353},{6,31,353},{0,23,485},{31,13,4418},{0,23,485},{31,22,1354},{23,31,548},{22,31,4},
+{12,31,0},{31,22,1354},{30,27,1354},{12,31,0},{0,28,1360},{30,27,1354},{0,28,1360},{7,0,5953},{7,0,5953},{7,0,5953},{7,0,5953},{1,31,425},{1,31,425},{1,31,425},{0,20,1},{0,17,2372},{0,17,2372},{13,31,24507},{11,31,16398},{10,31,13349},{8,31,7460},{11,31,25418},{4,31,7647},{3,31,2021},{0,27,2089},{0,31,18918},{0,26,7302},{20,31,6098},{18,31,3037},{17,31,1640},{14,31,25},{29,13,9818},
+{12,31,3874},{8,31,521},{0,27,2089},{31,20,9818},{0,27,2089},{10,31,13349},{10,31,13349},{10,31,13349},{8,31,7460},{7,31,11195},{3,31,2021},{3,31,2021},{0,24,392},{0,27,10472},{0,23,4562},{17,31,1640},{17,31,1640},{17,31,1640},{14,31,25},{28,4,4418},{8,31,521},{8,31,521},{0,24,392},{28,16,4418},{0,24,392},{31,23,925},{24,31,386},{23,31,1},{15,31,1},{31,23,925},{31,27,925},{15,31,1},
+{0,28,937},{31,27,925},{0,28,937},{7,0,6970},{7,0,6970},{7,0,6970},{7,0,6970},{2,31,785},{2,31,785},{2,31,785},{0,22,4},{0,17,2741},{0,17,2741},{14,31,25663},{12,31,17995},{11,31,15021},{8,31,8740},{12,31,26003},{6,31,8399},{3,31,2965},{0,28,1481},{0,31,18886},{0,26,7014},{21,31,5634},{19,31,2950},{18,31,1853},{16,31,72},{31,11,8901},{14,31,3578},{10,31,698},{0,28,1481},{31,21,8901},
+{0,28,1481},{11,31,15021},{11,31,15021},{11,31,15021},{8,31,8740},{8,31,12646},{3,31,2965},{3,31,2965},{0,26,292},{0,29,11051},{0,24,4876},{18,31,1853},{18,31,1853},{18,31,1853},{16,31,72},{29,5,4418},{10,31,698},{10,31,698},{0,26,292},{31,16,4418},{0,26,292},{31,25,613},{26,31,245},{25,31,4},{18,31,1},{31,25,613},{27,30,613},{18,31,1},{0,29,617},{27,30,613},{0,29,617},{8,0,7956},
+{8,0,7956},{8,0,7956},{8,0,7956},{3,31,1201},{3,31,1201},{3,31,1201},{0,23,0},{0,18,3185},{0,18,3185},{15,31,26715},{12,31,19659},{11,31,16925},{9,31,10232},{12,31,26835},{6,31,9215},{4,31,4027},{0,28,985},{0,31,19110},{0,27,6797},{22,31,5238},{20,31,2950},{19,31,2005},{17,31,180},{29,17,8069},{15,31,3314},{11,31,905},{0,28,985},{27,24,8069},{0,28,985},{11,31,16925},{11,31,16925},{11,31,16925},
+{9,31,10232},{8,31,14182},{4,31,4027},{4,31,4027},{0,27,232},{0,29,11627},{0,25,5117},{19,31,2005},{19,31,2005},{19,31,2005},{17,31,180},{31,4,4418},{11,31,905},{11,31,905},{0,27,232},{30,18,4418},{0,27,232},{31,26,365},{27,31,146},{26,31,1},{21,31,1},{31,26,365},{30,29,365},{21,31,1},{0,29,377},{30,29,365},{0,29,377},{8,0,8980},{8,0,8980},{8,0,8980},{8,0,8980},{3,31,1665},
+{3,31,1665},{3,31,1665},{0,24,4},{0,20,3601},{0,20,3601},{15,31,26555},{13,31,20326},{12,31,17723},{10,31,10897},{13,31,26598},{7,31,9527},{5,31,4934},{0,29,590},{0,31,18606},{0,28,5786},{23,31,4770},{21,31,3041},{20,31,2210},{18,31,325},{31,15,7322},{16,31,3126},{14,31,1145},{0,29,554},{31,23,7322},{0,29,554},{12,31,17723},{12,31,17723},{12,31,17723},{10,31,10897},{9,31,15092},{5,31,4934},{5,31,4934},
+{0,28,205},{0,31,11381},{0,26,4709},{20,31,2210},{20,31,2210},{20,31,2210},{18,31,325},{31,7,4418},{14,31,1145},{14,31,1145},{0,28,169},{31,19,4418},{0,28,169},{31,27,185},{28,31,73},{27,31,9},{24,31,0},{31,27,185},{31,29,185},{24,31,0},{0,30,185},{31,29,185},{0,30,185},{9,0,9248},{9,0,9248},{9,0,9248},{9,0,9248},{4,31,1954},{4,31,1954},{4,31,1954},{1,25,8},{0,21,3330},
+{0,21,3330},{16,31,25958},{15,31,20468},{13,31,18321},{11,31,11371},{15,31,25748},{8,31,9863},{7,31,5684},{1,30,373},{0,31,18111},{0,29,4452},{24,31,4437},{23,31,3084},{22,31,2500},{19,31,554},{29,21,6584},{18,31,2996},{15,31,1412},{0,30,237},{31,24,6584},{0,30,237},{13,31,18321},{13,31,18321},{13,31,18321},{11,31,11371},{11,31,15661},{7,31,5684},{7,31,5684},{1,29,206},{0,31,10886},{0,27,4019},{22,31,2500},
+{22,31,2500},{22,31,2500},{19,31,554},{28,16,4418},{15,31,1412},{15,31,1412},{0,29,100},{24,24,4418},{0,29,100},{31,29,52},{30,31,25},{29,31,1},{27,31,1},{31,29,52},{31,30,52},{27,31,1},{0,30,68},{31,30,52},{0,30,68},{10,0,9250},{10,0,9250},{10,0,9250},{10,0,9250},{5,31,2197},{5,31,2197},{5,31,2197},{2,27,5},{0,23,2929},{0,23,2929},{17,31,25604},{15,31,20628},{15,31,18692},
+{12,31,11876},{16,31,25201},{10,31,10381},{8,31,6470},{2,30,270},{2,31,17924},{0,29,3588},{25,31,4259},{23,31,3148},{23,31,2664},{20,31,820},{31,19,6019},{19,31,2950},{16,31,1717},{0,30,125},{31,25,6019},{0,30,125},{15,31,18692},{15,31,18692},{15,31,18692},{12,31,11876},{12,31,16244},{8,31,6470},{8,31,6470},{2,30,206},{0,31,10854},{0,29,3332},{23,31,2664},{23,31,2664},{23,31,2664},{20,31,820},{29,17,4418},
+{16,31,1717},{16,31,1717},{0,30,61},{27,24,4418},{0,30,61},{31,31,4},{31,31,4},{31,31,4},{30,31,1},{31,31,4},{31,31,4},{30,31,1},{0,31,4},{31,31,4},{0,31,4},{11,0,9250},{11,0,9250},{11,0,9250},{11,0,9250},{6,31,2440},{6,31,2440},{6,31,2440},{3,27,10},{0,25,2509},{0,25,2509},{18,31,24418},{16,31,19831},{15,31,18144},{13,31,11876},{16,31,23685},{11,31,10015},{8,31,6638},
+{3,31,170},{3,31,16879},{0,30,2738},{25,31,3699},{24,31,2766},{24,31,2405},{21,31,820},{31,20,5164},{20,31,2584},{18,31,1552},{0,31,9},{30,26,5164},{0,31,9},{15,31,18144},{15,31,18144},{15,31,18144},{13,31,11876},{12,31,15696},{8,31,6638},{8,31,6638},{4,30,132},{0,31,10150},{0,29,2624},{24,31,2405},{24,31,2405},{24,31,2405},{21,31,820},{31,15,3872},{18,31,1552},{18,31,1552},{0,31,9},{31,23,3872},
+{0,31,9},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{12,0,9248},{12,0,9248},{12,0,9248},{12,0,9248},{7,31,2612},{7,31,2612},{7,31,2612},{4,28,8},{0,27,2210},{0,27,2210},{19,31,22762},{17,31,18876},{16,31,17222},{14,31,11585},{17,31,22068},{11,31,9551},{10,31,6509},{5,31,68},{4,31,15612},{0,30,2098},{26,31,3089},
+{24,31,2334},{24,31,1973},{22,31,661},{29,25,4267},{22,31,2150},{19,31,1285},{2,31,0},{27,28,4267},{2,31,0},{16,31,17222},{16,31,17222},{16,31,17222},{14,31,11585},{13,31,14786},{10,31,6509},{10,31,6509},{5,31,68},{0,31,9366},{0,30,2034},{24,31,1973},{24,31,1973},{24,31,1973},{22,31,661},{31,16,3202},{19,31,1285},{19,31,1285},{2,31,0},{30,24,3202},{2,31,0},{31,31,0},{31,31,0},{31,31,0},
+{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{13,0,9248},{13,0,9248},{13,0,9248},{13,0,9248},{8,31,2845},{8,31,2845},{8,31,2845},{5,29,8},{0,29,1856},{0,29,1856},{19,31,21160},{18,31,17776},{17,31,16329},{15,31,11282},{17,31,20358},{12,31,9098},{11,31,6310},{6,31,5},{6,31,14287},{0,31,1565},{27,31,2412},{25,31,1862},{25,31,1573},{23,31,509},{28,28,3361},
+{22,31,1691},{20,31,1021},{5,31,1},{28,28,3361},{5,31,1},{17,31,16329},{17,31,16329},{17,31,16329},{15,31,11282},{15,31,13658},{11,31,6310},{11,31,6310},{6,31,5},{2,31,8690},{0,31,1565},{25,31,1573},{25,31,1573},{25,31,1573},{23,31,509},{31,18,2521},{20,31,1021},{20,31,1021},{5,31,1},{30,25,2521},{5,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},
+{0,31,0},{31,31,0},{0,31,0},{14,0,9250},{14,0,9250},{14,0,9250},{14,0,9250},{9,31,3176},{9,31,3176},{9,31,3176},{6,31,5},{0,30,1556},{0,30,1556},{20,31,19810},{19,31,16741},{18,31,15584},{16,31,11057},{19,31,18721},{14,31,8860},{12,31,6234},{7,31,10},{7,31,13210},{0,31,1325},{27,31,1868},{26,31,1464},{26,31,1268},{24,31,397},{31,23,2649},{23,31,1329},{22,31,794},{8,31,0},{31,27,2649},
+{8,31,0},{18,31,15584},{18,31,15584},{18,31,15584},{16,31,11057},{15,31,12858},{12,31,6234},{12,31,6234},{7,31,10},{3,31,8150},{0,31,1325},{26,31,1268},{26,31,1268},{26,31,1268},{24,31,397},{31,19,1989},{22,31,794},{22,31,794},{8,31,0},{31,25,1989},{8,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{15,0,9250},
+{15,0,9250},{15,0,9250},{15,0,9250},{11,31,3365},{11,31,3365},{11,31,3365},{7,31,10},{0,31,1325},{0,31,1325},{20,31,18626},{19,31,15845},{19,31,14756},{17,31,10897},{19,31,17297},{15,31,8442},{13,31,6285},{8,31,68},{8,31,12227},{0,31,1341},{27,31,1452},{27,31,1089},{27,31,968},{25,31,325},{31,24,2018},{24,31,1011},{23,31,605},{11,31,0},{30,28,2018},{11,31,0},{19,31,14756},{19,31,14756},{19,31,14756},
+{17,31,10897},{16,31,12077},{13,31,6285},{13,31,6285},{8,31,68},{4,31,7686},{0,31,1341},{27,31,968},{27,31,968},{27,31,968},{25,31,325},{31,21,1513},{23,31,605},{23,31,605},{11,31,0},{31,26,1513},{11,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{16,0,9248},{16,0,9248},{16,0,9248},{16,0,9248},{12,31,3626},
+{12,31,3626},{12,31,3626},{8,31,68},{0,31,1341},{0,31,1341},{21,31,17476},{20,31,14998},{20,31,14098},{18,31,10672},{20,31,16018},{15,31,8154},{15,31,6218},{9,31,200},{10,31,11338},{0,31,1613},{28,31,1041},{27,31,801},{27,31,680},{26,31,232},{29,29,1473},{26,31,753},{24,31,442},{14,31,0},{31,28,1473},{14,31,0},{20,31,14098},{20,31,14098},{20,31,14098},{18,31,10672},{17,31,11453},{15,31,6218},{15,31,6218},
+{9,31,200},{6,31,7270},{0,31,1613},{27,31,680},{27,31,680},{27,31,680},{26,31,232},{28,28,1105},{24,31,442},{24,31,442},{14,31,0},{28,28,1105},{14,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{17,0,9248},{17,0,9248},{17,0,9248},{17,0,9248},{13,31,3929},{13,31,3929},{13,31,3929},{9,31,200},{0,31,1613},
+{0,31,1613},
diff --git a/libkram/transcoder/basisu_transcoder_tables_dxt1_6.inc b/libkram/transcoder/basisu_transcoder_tables_dxt1_6.inc
new file mode 100644
index 00000000..fad45fe2
--- /dev/null
+++ b/libkram/transcoder/basisu_transcoder_tables_dxt1_6.inc
@@ -0,0 +1,494 @@
+// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+{0,4,18},{0,3,4},{0,2,0},{0,2,9},{0,3,36},{0,2,22},{0,2,13},{0,1,24},{0,1,41},{0,1,25},{0,4,18},{0,3,4},{0,2,0},{0,2,9},{1,1,36},{0,2,22},{0,2,13},{0,1,24},{3,0,36},{0,1,24},{0,2,0},{0,2,0},{0,2,0},{0,1,0},{0,1,2},{0,1,1},{0,1,1},{0,0,4},{0,0,4},{0,0,4},{0,2,0},
+{0,2,0},{0,2,0},{0,1,0},{0,1,2},{0,1,1},{0,1,1},{0,0,4},{1,0,2},{0,0,4},{2,0,18},{0,3,4},{0,2,0},{0,2,9},{2,0,18},{4,0,18},{0,2,9},{0,1,20},{4,0,18},{0,1,20},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{1,8,38},{1,6,21},{1,4,24},
+{1,4,24},{0,8,52},{0,5,18},{0,4,1},{0,3,24},{0,4,77},{0,3,40},{2,6,22},{1,6,5},{2,4,4},{1,4,8},{4,0,52},{0,5,18},{0,4,1},{0,3,24},{8,0,52},{0,3,24},{1,6,20},{1,6,20},{1,6,20},{1,3,21},{0,6,8},{0,4,1},{0,4,1},{0,2,5},{0,3,24},{0,2,9},{2,4,4},{2,4,4},{2,4,4},{2,3,4},{3,0,8},
+{1,3,1},{1,3,1},{1,2,4},{6,0,8},{1,2,4},{5,0,18},{1,6,1},{2,4,0},{0,4,0},{5,0,18},{10,0,18},{0,4,0},{0,3,20},{10,0,18},{0,3,20},{1,0,20},{1,0,20},{1,0,20},{1,0,20},{0,5,0},{0,5,0},{0,5,0},{0,2,1},{0,2,5},{0,2,5},{3,10,38},{3,8,21},{3,6,24},{3,6,24},{2,10,52},{2,7,18},{2,6,1},
+{2,5,24},{0,7,53},{1,5,21},{4,8,22},{3,8,5},{4,6,4},{3,6,8},{7,0,52},{2,7,18},{2,6,1},{1,5,20},{14,0,52},{1,5,20},{3,8,20},{3,8,20},{3,8,20},{3,5,21},{2,8,8},{2,6,1},{2,6,1},{2,4,5},{0,6,8},{1,5,5},{4,6,4},{4,6,4},{4,6,4},{4,5,4},{6,0,8},{3,5,1},{3,5,1},{3,4,4},{12,0,8},
+{3,4,4},{8,0,18},{3,8,1},{4,6,0},{2,6,0},{8,0,18},{16,0,18},{2,6,0},{0,5,20},{16,0,18},{0,5,20},{3,0,20},{3,0,20},{3,0,20},{3,0,20},{2,7,0},{2,7,0},{2,7,0},{2,4,1},{1,5,1},{1,5,1},{5,12,38},{5,10,21},{5,8,24},{5,8,24},{4,12,52},{4,9,18},{4,8,1},{4,7,24},{2,9,53},{3,7,21},{6,10,22},
+{5,10,5},{6,8,4},{5,8,8},{2,16,51},{4,9,18},{4,8,1},{3,7,20},{20,0,51},{3,7,20},{5,10,20},{5,10,20},{5,10,20},{5,7,21},{4,10,8},{4,8,1},{4,8,1},{4,6,5},{2,8,8},{3,7,5},{6,8,4},{6,8,4},{6,8,4},{6,7,4},{9,0,8},{5,7,1},{5,7,1},{5,6,4},{18,0,8},{5,6,4},{11,0,18},{5,10,1},{6,8,0},
+{4,8,0},{11,0,18},{22,0,18},{4,8,0},{0,7,20},{22,0,18},{0,7,20},{5,0,20},{5,0,20},{5,0,20},{5,0,20},{4,9,0},{4,9,0},{4,9,0},{4,6,1},{3,7,1},{3,7,1},{7,15,36},{7,12,19},{7,10,28},{7,10,20},{6,15,52},{6,11,22},{6,10,7},{6,9,28},{3,12,52},{5,9,27},{8,13,19},{8,11,3},{8,10,3},{8,10,6},{13,1,51},
+{6,11,21},{7,10,3},{5,9,27},{27,0,51},{5,9,27},{7,12,19},{7,12,19},{7,12,19},{7,10,19},{6,13,9},{6,10,6},{6,10,6},{6,9,3},{5,10,9},{5,9,2},{8,10,2},{8,10,2},{8,10,2},{8,9,2},{12,1,8},{7,10,2},{7,10,2},{5,9,2},{25,0,8},{5,9,2},{14,1,18},{7,12,1},{8,10,2},{6,10,2},{14,1,18},{15,7,18},{6,10,2},
+{0,9,26},{15,7,18},{0,9,26},{7,0,18},{7,0,18},{7,0,18},{7,0,18},{6,11,2},{6,11,2},{6,11,2},{6,9,2},{5,9,1},{5,9,1},{9,16,38},{9,14,19},{9,12,28},{9,12,20},{8,17,52},{8,13,22},{8,12,7},{8,11,28},{5,14,52},{7,11,27},{10,15,19},{10,13,3},{10,12,3},{10,12,6},{16,1,51},{8,13,21},{9,12,3},{7,11,27},{33,0,51},
+{7,11,27},{9,14,19},{9,14,19},{9,14,19},{9,12,19},{8,15,9},{8,12,6},{8,12,6},{8,11,3},{7,12,9},{7,11,2},{10,12,2},{10,12,2},{10,12,2},{10,11,2},{15,1,8},{9,12,2},{9,12,2},{7,11,2},{31,0,8},{7,11,2},{17,0,18},{9,14,1},{10,12,2},{8,12,2},{17,0,18},{34,0,18},{8,12,2},{0,11,26},{34,0,18},{0,11,26},{9,0,18},
+{9,0,18},{9,0,18},{9,0,18},{8,13,2},{8,13,2},{8,13,2},{8,11,2},{7,11,1},{7,11,1},{11,18,38},{11,16,19},{11,14,28},{11,14,20},{10,19,52},{10,15,22},{10,14,7},{10,13,28},{7,16,52},{9,13,27},{12,16,21},{12,15,3},{12,14,3},{12,14,6},{19,1,51},{10,15,21},{11,14,3},{9,13,27},{39,0,51},{9,13,27},{11,16,18},{11,16,18},{11,16,18},
+{11,14,19},{10,17,9},{10,14,6},{10,14,6},{10,13,3},{9,14,9},{9,13,2},{12,14,2},{12,14,2},{12,14,2},{12,13,2},{15,7,8},{11,14,2},{11,14,2},{9,13,2},{31,3,8},{9,13,2},{20,0,18},{11,16,1},{12,14,2},{10,14,2},{20,0,18},{40,0,18},{10,14,2},{0,13,26},{40,0,18},{0,13,26},{11,0,18},{11,0,18},{11,0,18},{11,0,18},{10,15,2},
+{10,15,2},{10,15,2},{10,13,2},{9,13,1},{9,13,1},{13,20,38},{13,18,19},{13,16,27},{13,16,19},{12,21,52},{12,17,19},{12,16,5},{12,15,28},{10,17,52},{11,15,27},{14,18,21},{14,17,3},{14,16,1},{13,16,10},{22,1,51},{12,17,18},{13,16,2},{11,15,27},{45,0,51},{11,15,27},{13,18,18},{13,18,18},{13,18,18},{13,16,19},{12,19,9},{12,16,5},{12,16,5},
+{12,15,3},{10,16,11},{11,15,2},{14,16,1},{14,16,1},{14,16,1},{14,15,2},{15,13,8},{13,16,2},{13,16,2},{11,15,2},{31,6,8},{11,15,2},{23,0,18},{13,18,1},{14,16,0},{12,16,0},{23,0,18},{46,0,18},{12,16,0},{0,15,26},{46,0,18},{0,15,26},{13,0,18},{13,0,18},{13,0,18},{13,0,18},{12,17,1},{12,17,1},{12,17,1},{12,15,2},{11,15,1},
+{11,15,1},{15,23,38},{15,20,21},{15,18,37},{15,18,21},{15,22,55},{15,19,23},{15,18,5},{14,17,30},{13,19,56},{13,17,28},{16,21,19},{16,19,3},{16,18,3},{16,18,6},{17,17,51},{15,19,19},{15,18,1},{14,17,26},{51,0,51},{14,17,26},{15,21,20},{15,21,20},{15,21,20},{15,18,20},{15,19,14},{15,18,4},{15,18,4},{14,17,5},{13,18,9},{13,17,3},{16,18,2},
+{16,18,2},{16,18,2},{16,17,2},{24,1,8},{15,18,0},{15,18,0},{14,17,1},{49,0,8},{14,17,1},{26,1,18},{15,20,1},{16,18,2},{15,18,1},{26,1,18},{53,0,18},{15,18,1},{0,17,26},{53,0,18},{0,17,26},{15,0,20},{15,0,20},{15,0,20},{15,0,20},{15,18,4},{15,18,4},{15,18,4},{14,17,4},{13,17,2},{13,17,2},{17,25,36},{17,22,19},{17,20,28},
+{17,20,20},{16,25,52},{16,21,22},{16,20,7},{16,19,28},{15,21,56},{15,19,28},{18,23,19},{18,21,3},{18,20,3},{18,20,6},{20,17,51},{16,21,21},{17,20,3},{14,20,26},{57,0,51},{14,20,26},{17,22,19},{17,22,19},{17,22,19},{17,20,19},{16,23,9},{16,20,6},{16,20,6},{16,19,3},{15,20,9},{15,19,3},{18,20,2},{18,20,2},{18,20,2},{18,19,2},{27,1,8},
+{17,20,2},{17,20,2},{15,19,2},{55,0,8},{15,19,2},{29,1,18},{17,22,1},{18,20,2},{16,20,2},{29,1,18},{59,0,18},{16,20,2},{0,19,26},{59,0,18},{0,19,26},{17,0,18},{17,0,18},{17,0,18},{17,0,18},{16,21,2},{16,21,2},{16,21,2},{16,19,2},{15,19,2},{15,19,2},{19,27,36},{19,24,19},{19,22,28},{19,22,20},{18,27,52},{18,23,22},{18,22,7},
+{18,21,28},{15,24,56},{17,21,27},{20,25,19},{20,23,3},{20,22,3},{20,22,6},{23,17,51},{18,23,21},{19,22,3},{17,21,27},{63,0,51},{17,21,27},{19,24,19},{19,24,19},{19,24,19},{19,22,19},{18,25,9},{18,22,6},{18,22,6},{18,21,3},{17,22,9},{17,21,2},{20,22,2},{20,22,2},{20,22,2},{20,21,2},{30,1,8},{19,22,2},{19,22,2},{17,21,2},{61,0,8},
+{17,21,2},{31,3,18},{19,24,1},{20,22,2},{18,22,2},{31,3,18},{63,1,18},{18,22,2},{0,21,26},{63,1,18},{0,21,26},{19,0,18},{19,0,18},{19,0,18},{19,0,18},{18,23,2},{18,23,2},{18,23,2},{18,21,2},{17,21,1},{17,21,1},{21,29,36},{21,26,19},{21,24,28},{21,24,20},{20,29,52},{20,25,22},{20,24,7},{20,23,28},{17,26,52},{19,23,27},{22,27,19},
+{22,25,3},{22,24,3},{22,24,6},{34,1,51},{20,25,21},{21,24,3},{19,23,27},{63,3,51},{19,23,27},{21,26,19},{21,26,19},{21,26,19},{21,24,19},{20,27,9},{20,24,6},{20,24,6},{20,23,3},{19,24,9},{19,23,2},{22,24,2},{22,24,2},{22,24,2},{22,23,2},{33,1,8},{21,24,2},{21,24,2},{19,23,2},{63,2,8},{19,23,2},{31,9,18},{21,26,1},{22,24,2},
+{20,24,2},{31,9,18},{63,4,18},{20,24,2},{0,23,26},{63,4,18},{0,23,26},{21,0,18},{21,0,18},{21,0,18},{21,0,18},{20,25,2},{20,25,2},{20,25,2},{20,23,2},{19,23,1},{19,23,1},{23,31,40},{23,29,24},{23,27,33},{23,26,24},{23,30,55},{22,28,24},{23,26,8},{22,26,28},{20,28,51},{21,26,21},{24,29,20},{24,28,1},{24,27,4},{24,26,5},{38,0,51},
+{22,28,20},{23,26,4},{20,26,20},{62,7,51},{20,26,20},{23,29,20},{23,29,20},{23,29,20},{23,26,20},{23,28,12},{23,26,4},{23,26,4},{22,25,4},{20,27,9},{22,25,4},{24,27,0},{24,27,0},{24,27,0},{24,26,1},{31,12,8},{23,26,0},{23,26,0},{22,25,0},{62,6,8},{22,25,0},{38,1,18},{24,28,1},{24,27,4},{23,26,4},{38,1,18},{45,16,18},{23,26,4},
+{0,26,20},{45,16,18},{0,26,20},{23,0,20},{23,0,20},{23,0,20},{23,0,20},{23,26,4},{23,26,4},{23,26,4},{22,25,4},{21,26,1},{21,26,1},{25,33,38},{25,31,24},{25,29,33},{25,28,24},{25,32,55},{24,30,24},{25,28,8},{24,28,28},{22,30,51},{23,28,21},{26,31,20},{26,30,1},{26,29,4},{26,28,5},{41,0,51},{24,30,20},{25,28,4},{22,28,20},{62,10,51},
+{22,28,20},{25,31,20},{25,31,20},{25,31,20},{25,28,20},{25,30,12},{25,28,4},{25,28,4},{24,27,4},{22,29,9},{24,27,4},{26,29,0},{26,29,0},{26,29,0},{26,28,1},{31,18,8},{25,28,0},{25,28,0},{24,27,0},{62,9,8},{24,27,0},{41,1,18},{26,30,1},{26,29,4},{25,28,4},{41,1,18},{51,16,18},{25,28,4},{0,28,20},{51,16,18},{0,28,20},{25,0,20},
+{25,0,20},{25,0,20},{25,0,20},{25,28,4},{25,28,4},{25,28,4},{24,27,4},{23,28,1},{23,28,1},{27,35,38},{27,32,21},{27,31,33},{27,30,24},{27,34,55},{26,32,24},{27,30,8},{26,30,28},{25,31,56},{25,30,21},{28,33,18},{28,32,2},{28,31,4},{28,30,5},{44,0,51},{26,32,20},{27,30,4},{24,30,20},{62,13,51},{24,30,20},{27,33,20},{27,33,20},{27,33,20},
+{27,30,20},{27,31,14},{27,30,4},{27,30,4},{26,29,4},{24,31,9},{26,29,4},{28,31,0},{28,31,0},{28,31,0},{28,30,1},{34,17,8},{27,30,0},{27,30,0},{26,29,0},{62,12,8},{26,29,0},{44,1,18},{27,32,1},{28,31,4},{27,30,4},{44,1,18},{57,16,18},{27,30,4},{0,30,20},{57,16,18},{0,30,20},{27,0,20},{27,0,20},{27,0,20},{27,0,20},{27,30,4},
+{27,30,4},{27,30,4},{26,29,4},{25,30,1},{25,30,1},{29,37,38},{29,34,21},{29,32,37},{29,32,21},{29,36,55},{29,33,23},{29,32,5},{28,32,39},{27,33,56},{26,32,30},{30,35,18},{30,34,2},{30,32,2},{30,32,5},{47,0,51},{29,33,19},{29,32,1},{26,32,26},{46,24,51},{26,32,26},{29,35,20},{29,35,20},{29,35,20},{29,32,20},{29,33,14},{29,32,4},{29,32,4},
+{28,31,4},{27,32,9},{28,31,4},{30,33,0},{30,33,0},{30,33,0},{30,31,4},{37,17,8},{29,32,0},{29,32,0},{28,31,0},{62,15,8},{28,31,0},{47,1,18},{29,34,1},{30,32,2},{29,32,1},{47,1,18},{63,16,18},{29,32,1},{0,32,26},{63,16,18},{0,32,26},{29,0,20},{29,0,20},{29,0,20},{29,0,20},{29,32,4},{29,32,4},{29,32,4},{28,31,4},{28,31,4},
+{28,31,4},{31,40,44},{31,37,28},{32,35,40},{31,34,31},{31,38,53},{31,35,21},{31,34,7},{31,34,30},{28,36,51},{29,34,21},{32,37,20},{32,36,1},{32,35,4},{32,34,5},{50,0,51},{30,36,19},{31,34,6},{28,34,21},{62,19,51},{28,34,21},{31,38,26},{31,38,26},{31,38,26},{31,34,27},{31,36,9},{31,34,3},{31,34,3},{31,33,2},{29,34,10},{30,33,2},{32,35,0},
+{32,35,0},{32,35,0},{32,34,1},{49,0,8},{31,34,2},{31,34,2},{30,33,1},{62,18,8},{30,33,1},{47,8,18},{32,36,1},{32,35,4},{31,34,5},{47,8,18},{62,20,18},{31,34,5},{0,34,20},{62,20,18},{0,34,20},{31,0,26},{31,0,26},{31,0,26},{31,0,26},{31,35,1},{31,35,1},{31,35,1},{31,33,2},{29,34,1},{29,34,1},{33,41,40},{33,39,24},{33,37,33},
+{33,36,24},{33,40,55},{32,38,24},{33,36,8},{32,36,28},{30,38,51},{31,36,21},{34,39,20},{34,38,1},{34,37,4},{34,36,5},{53,0,51},{32,38,20},{33,36,4},{30,36,21},{62,22,51},{30,36,21},{33,39,20},{33,39,20},{33,39,20},{33,36,20},{33,38,12},{33,36,4},{33,36,4},{32,35,4},{31,36,10},{32,35,4},{34,37,0},{34,37,0},{34,37,0},{34,36,1},{52,0,8},
+{33,36,0},{33,36,0},{32,35,0},{62,21,8},{32,35,0},{47,14,18},{34,38,1},{34,37,4},{33,36,4},{47,14,18},{62,23,18},{33,36,4},{0,36,20},{62,23,18},{0,36,20},{33,0,20},{33,0,20},{33,0,20},{33,0,20},{33,36,4},{33,36,4},{33,36,4},{32,35,4},{31,36,1},{31,36,1},{35,43,40},{35,41,24},{35,39,33},{35,38,24},{35,42,55},{34,40,24},{35,38,8},
+{34,38,28},{32,40,51},{33,38,21},{36,41,20},{36,40,1},{36,39,4},{36,38,5},{56,0,51},{34,40,20},{35,38,4},{32,38,20},{62,25,51},{32,38,20},{35,41,20},{35,41,20},{35,41,20},{35,38,20},{35,40,12},{35,38,4},{35,38,4},{34,37,4},{32,39,9},{34,37,4},{36,39,0},{36,39,0},{36,39,0},{36,38,1},{55,0,8},{35,38,0},{35,38,0},{34,37,0},{62,24,8},
+{34,37,0},{48,17,18},{36,40,1},{36,39,4},{35,38,4},{48,17,18},{62,26,18},{35,38,4},{0,38,20},{62,26,18},{0,38,20},{35,0,20},{35,0,20},{35,0,20},{35,0,20},{35,38,4},{35,38,4},{35,38,4},{34,37,4},{33,38,1},{33,38,1},{37,45,40},{37,43,24},{37,41,33},{37,40,24},{37,44,55},{36,42,24},{37,40,8},{36,40,28},{34,42,51},{35,40,21},{38,43,20},
+{38,42,1},{38,41,4},{38,40,5},{59,0,51},{36,42,20},{37,40,4},{34,40,20},{62,28,51},{34,40,20},{37,43,20},{37,43,20},{37,43,20},{37,40,20},{37,42,12},{37,40,4},{37,40,4},{36,39,4},{34,41,9},{36,39,4},{38,41,0},{38,41,0},{38,41,0},{38,40,1},{58,0,8},{37,40,0},{37,40,0},{36,39,0},{62,27,8},{36,39,0},{51,17,18},{38,42,1},{38,41,4},
+{37,40,4},{51,17,18},{62,29,18},{37,40,4},{0,40,20},{62,29,18},{0,40,20},{37,0,20},{37,0,20},{37,0,20},{37,0,20},{37,40,4},{37,40,4},{37,40,4},{36,39,4},{35,40,1},{35,40,1},{40,46,44},{40,44,27},{40,43,28},{39,43,28},{39,47,52},{39,44,22},{39,43,3},{38,42,28},{36,44,53},{37,42,19},{40,46,19},{40,44,2},{40,43,3},{40,42,10},{62,1,51},
+{38,44,19},{39,43,2},{37,42,18},{63,31,51},{37,42,18},{40,44,26},{40,44,26},{40,44,26},{40,42,26},{39,44,11},{39,43,2},{39,43,2},{39,41,2},{37,43,11},{38,41,3},{40,44,1},{40,44,1},{40,44,1},{40,42,1},{53,16,8},{40,42,1},{40,42,1},{39,41,1},{63,30,8},{39,41,1},{63,0,18},{40,44,1},{40,43,2},{38,43,1},{63,0,18},{62,32,18},{38,43,1},
+{0,42,18},{62,32,18},{0,42,18},{39,0,26},{39,0,26},{39,0,26},{39,0,26},{39,43,1},{39,43,1},{39,43,1},{39,41,1},{37,42,1},{37,42,1},{42,48,44},{42,46,27},{42,45,28},{41,45,28},{41,48,53},{41,46,22},{41,45,3},{40,44,28},{38,46,53},{39,44,19},{42,48,19},{42,46,2},{42,45,3},{42,44,10},{63,5,51},{40,46,19},{41,45,2},{39,44,18},{47,42,51},
+{39,44,18},{42,46,26},{42,46,26},{42,46,26},{42,44,26},{41,46,11},{41,45,2},{41,45,2},{41,43,2},{39,45,11},{40,43,3},{42,46,1},{42,46,1},{42,46,1},{42,44,1},{56,16,8},{42,44,1},{42,44,1},{41,43,1},{62,33,8},{41,43,1},{63,6,18},{42,46,1},{42,45,2},{40,45,1},{63,6,18},{62,35,18},{40,45,1},{0,44,18},{62,35,18},{0,44,18},{41,0,26},
+{41,0,26},{41,0,26},{41,0,26},{41,45,1},{41,45,1},{41,45,1},{41,43,1},{39,44,1},{39,44,1},{44,50,44},{44,48,26},{44,47,28},{43,47,28},{43,50,53},{43,47,27},{43,47,3},{42,46,28},{40,48,51},{41,46,19},{44,50,19},{44,48,1},{44,47,3},{44,46,10},{63,11,51},{42,48,19},{43,47,2},{41,46,18},{47,45,51},{41,46,18},{44,48,26},{44,48,26},{44,48,26},
+{44,46,26},{43,48,9},{43,47,2},{43,47,2},{43,45,2},{41,47,11},{42,45,3},{44,48,1},{44,48,1},{44,48,1},{44,46,1},{59,16,8},{44,46,1},{44,46,1},{43,45,1},{62,36,8},{43,45,1},{63,12,18},{44,48,0},{44,47,2},{42,47,1},{63,12,18},{62,38,18},{42,47,1},{0,46,18},{62,38,18},{0,46,18},{43,0,26},{43,0,26},{43,0,26},{43,0,26},{43,47,1},
+{43,47,1},{43,47,1},{43,45,1},{41,46,1},{41,46,1},{46,52,44},{46,50,26},{46,49,31},{45,48,31},{45,52,53},{45,49,21},{45,48,7},{45,48,30},{42,50,51},{43,48,21},{46,52,19},{46,50,1},{46,49,6},{46,48,6},{55,32,51},{44,50,19},{45,48,6},{42,48,21},{46,48,51},{42,48,21},{46,50,26},{46,50,26},{46,50,26},{45,48,27},{45,50,9},{45,48,3},{45,48,3},
+{45,47,2},{43,48,10},{44,47,3},{46,50,1},{46,50,1},{46,50,1},{46,48,2},{62,16,8},{45,48,2},{45,48,2},{45,47,1},{62,39,8},{45,47,1},{63,18,18},{46,50,0},{47,48,4},{45,48,5},{63,18,18},{62,41,18},{45,48,5},{0,48,20},{62,41,18},{0,48,20},{45,0,26},{45,0,26},{45,0,26},{45,0,26},{45,49,1},{45,49,1},{45,49,1},{45,47,1},{43,48,1},
+{43,48,1},{48,54,44},{48,52,27},{48,51,28},{48,50,35},{47,55,51},{47,52,21},{47,51,3},{47,50,22},{45,52,52},{45,50,19},{48,54,19},{48,52,2},{48,51,3},{48,50,10},{63,23,51},{47,52,21},{47,51,3},{45,50,18},{63,43,51},{45,50,18},{48,52,26},{48,52,26},{48,52,26},{48,50,26},{47,53,8},{47,51,2},{47,51,2},{47,49,1},{45,51,8},{46,49,5},{48,52,1},
+{48,52,1},{48,52,1},{48,50,1},{63,21,8},{48,50,1},{48,50,1},{47,49,1},{63,42,8},{47,49,1},{63,25,18},{48,52,1},{48,51,2},{46,51,1},{63,25,18},{63,44,18},{46,51,1},{0,50,18},{63,44,18},{0,50,18},{48,0,26},{48,0,26},{48,0,26},{48,0,26},{47,51,1},{47,51,1},{47,51,1},{47,49,0},{45,50,1},{45,50,1},{50,56,44},{50,54,27},{50,53,28},
+{49,53,28},{49,57,52},{49,54,22},{49,53,3},{48,52,28},{47,54,52},{47,52,19},{50,56,19},{50,54,2},{50,53,3},{50,52,10},{63,29,51},{48,54,19},{49,53,2},{47,52,18},{63,46,51},{47,52,18},{50,54,26},{50,54,26},{50,54,26},{50,52,26},{49,54,11},{49,53,2},{49,53,2},{49,51,2},{47,53,8},{48,51,3},{50,54,1},{50,54,1},{50,54,1},{50,52,1},{63,27,8},
+{50,52,1},{50,52,1},{49,51,1},{63,45,8},{49,51,1},{63,31,18},{50,54,1},{50,53,2},{48,53,1},{63,31,18},{63,47,18},{48,53,1},{0,52,18},{63,47,18},{0,52,18},{49,0,26},{49,0,26},{49,0,26},{49,0,26},{49,53,1},{49,53,1},{49,53,1},{49,51,1},{47,52,1},{47,52,1},{52,58,44},{52,56,27},{52,55,28},{51,55,28},{51,59,52},{51,56,22},{51,55,3},
+{50,54,28},{48,56,53},{49,54,19},{52,58,19},{52,56,2},{52,55,3},{52,54,10},{63,35,51},{50,56,19},{51,55,2},{49,54,18},{63,49,51},{49,54,18},{52,56,26},{52,56,26},{52,56,26},{52,54,26},{51,56,11},{51,55,2},{51,55,2},{51,53,2},{49,55,11},{50,53,3},{52,56,1},{52,56,1},{52,56,1},{52,54,1},{63,33,8},{52,54,1},{52,54,1},{51,53,1},{47,56,8},
+{51,53,1},{57,48,18},{52,56,1},{52,55,2},{50,55,1},{57,48,18},{62,50,18},{50,55,1},{0,54,18},{62,50,18},{0,54,18},{51,0,26},{51,0,26},{51,0,26},{51,0,26},{51,55,1},{51,55,1},{51,55,1},{51,53,1},{49,54,1},{49,54,1},{54,60,44},{54,58,27},{54,57,28},{53,57,28},{53,61,52},{53,58,22},{53,57,3},{52,56,28},{50,58,53},{51,56,19},{54,60,19},
+{54,58,2},{54,57,3},{54,56,10},{63,41,51},{52,58,19},{53,57,2},{51,56,18},{63,52,51},{51,56,18},{54,58,26},{54,58,26},{54,58,26},{54,56,26},{53,58,11},{53,57,2},{53,57,2},{53,55,2},{51,57,11},{52,55,3},{54,58,1},{54,58,1},{54,58,1},{54,56,1},{63,39,8},{54,56,1},{54,56,1},{53,55,1},{47,59,8},{53,55,1},{60,48,18},{54,58,1},{54,57,2},
+{52,57,1},{60,48,18},{62,53,18},{52,57,1},{0,56,18},{62,53,18},{0,56,18},{53,0,26},{53,0,26},{53,0,26},{53,0,26},{53,57,1},{53,57,1},{53,57,1},{53,55,1},{51,56,1},{51,56,1},{56,63,38},{56,61,21},{56,59,24},{56,59,24},{55,63,52},{55,60,18},{55,59,1},{55,58,24},{53,60,53},{54,58,21},{57,61,22},{56,61,5},{57,59,4},{56,59,8},{63,47,52},
+{55,60,18},{55,59,1},{54,58,20},{63,55,52},{54,58,20},{56,61,20},{56,61,20},{56,61,20},{56,58,21},{55,61,8},{55,59,1},{55,59,1},{55,57,5},{53,59,8},{54,58,5},{57,59,4},{57,59,4},{57,59,4},{57,58,4},{61,49,8},{56,58,1},{56,58,1},{56,57,4},{63,54,8},{56,57,4},{63,49,18},{56,61,1},{57,59,0},{55,59,0},{63,49,18},{63,56,18},{55,59,0},
+{0,58,20},{63,56,18},{0,58,20},{56,0,20},{56,0,20},{56,0,20},{56,0,20},{55,60,0},{55,60,0},{55,60,0},{55,57,1},{54,58,1},{54,58,1},{58,63,56},{58,63,21},{58,61,24},{58,61,24},{58,63,68},{57,62,18},{57,61,1},{57,60,24},{55,62,53},{56,60,21},{59,63,22},{58,63,5},{59,61,4},{58,61,8},{63,53,52},{57,62,18},{57,61,1},{56,60,20},{63,58,52},
+{56,60,20},{58,63,20},{58,63,20},{58,63,20},{58,60,21},{57,63,8},{57,61,1},{57,61,1},{57,59,5},{55,61,8},{56,60,5},{59,61,4},{59,61,4},{59,61,4},{59,60,4},{63,51,8},{58,60,1},{58,60,1},{58,59,4},{63,57,8},{58,59,4},{63,55,18},{58,63,1},{59,61,0},{57,61,0},{63,55,18},{63,59,18},{57,61,0},{0,60,20},{63,59,18},{0,60,20},{58,0,20},
+{58,0,20},{58,0,20},{58,0,20},{57,62,0},{57,62,0},{57,62,0},{57,59,1},{56,60,1},{56,60,1},{60,63,88},{60,63,40},{60,63,24},{60,63,24},{60,63,88},{59,63,37},{59,63,1},{59,62,24},{58,63,63},{58,62,21},{61,63,40},{61,63,13},{61,63,4},{60,63,8},{63,59,52},{60,63,24},{59,63,1},{58,62,20},{63,61,52},{58,62,20},{60,63,24},{60,63,24},{60,63,24},
+{60,62,21},{60,63,24},{59,63,1},{59,63,1},{59,61,5},{57,63,8},{58,62,5},{61,63,4},{61,63,4},{61,63,4},{61,62,4},{63,57,8},{60,62,1},{60,62,1},{60,61,4},{63,60,8},{60,61,4},{63,61,18},{61,63,9},{61,63,0},{59,63,0},{63,61,18},{63,62,18},{59,63,0},{0,62,20},{63,62,18},{0,62,20},{60,0,20},{60,0,20},{60,0,20},{60,0,20},{59,63,1},
+{59,63,1},{59,63,1},{59,61,1},{58,62,1},{58,62,1},{62,63,38},{62,63,33},{62,63,29},{62,63,24},{62,63,35},{62,63,25},{62,63,21},{61,63,1},{61,63,23},{60,63,4},{63,63,4},{63,63,4},{63,63,4},{63,63,4},{63,63,4},{63,63,4},{63,63,4},{62,63,0},{63,63,4},{62,63,0},{62,63,29},{62,63,29},{62,63,29},{62,63,24},{62,63,26},{62,63,21},{62,63,21},
+{61,63,1},{61,63,14},{60,63,4},{63,63,4},{63,63,4},{63,63,4},{63,63,4},{63,62,4},{63,63,4},{63,63,4},{62,63,0},{62,63,4},{62,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{62,0,20},{62,0,20},{62,0,20},{62,0,20},{61,63,16},{61,63,16},{61,63,16},{61,63,1},{60,63,4},
+{60,63,4},{0,8,74},{0,6,10},{0,4,1},{0,4,26},{0,6,154},{0,4,99},{0,3,50},{0,2,115},{0,3,170},{0,2,119},{0,8,74},{0,6,10},{0,4,1},{0,4,26},{3,0,154},{0,4,99},{0,3,50},{0,2,115},{6,0,154},{0,2,115},{0,4,0},{0,4,0},{0,4,0},{0,2,0},{0,2,13},{0,2,4},{0,2,4},{0,1,5},{0,1,14},{0,1,6},{0,4,0},
+{0,4,0},{0,4,0},{0,2,0},{1,0,13},{0,2,4},{0,2,4},{0,1,5},{2,0,13},{0,1,5},{4,0,74},{0,6,10},{0,4,1},{0,4,26},{4,0,74},{8,0,74},{0,4,26},{0,3,74},{8,0,74},{0,3,74},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,14,83},{0,10,10},{1,6,27},
+{0,6,19},{0,11,243},{0,7,110},{0,5,34},{0,4,139},{0,5,280},{0,4,164},{1,12,75},{1,9,2},{1,6,11},{1,6,18},{5,1,243},{0,7,110},{0,5,34},{0,4,139},{11,0,243},{0,4,139},{0,10,9},{0,10,9},{0,10,9},{0,5,9},{0,6,50},{0,5,9},{0,5,9},{0,3,26},{0,3,66},{0,2,33},{1,8,1},{1,8,1},{1,8,1},{1,4,2},{3,0,50},
+{0,5,9},{0,5,9},{0,3,26},{6,0,50},{0,3,26},{7,0,74},{1,9,1},{2,6,1},{0,6,10},{7,0,74},{14,0,74},{0,6,10},{0,5,74},{14,0,74},{0,5,74},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,2,1},{0,2,1},{0,2,1},{0,1,1},{0,1,2},{0,1,2},{1,18,137},{1,12,74},{2,9,98},{1,8,67},{0,16,244},{0,10,78},{0,8,2},
+{0,6,115},{0,8,344},{0,6,179},{3,14,75},{3,11,2},{3,8,11},{3,8,18},{8,1,243},{0,10,78},{0,8,2},{0,6,115},{15,1,243},{0,6,115},{1,14,65},{1,14,65},{1,14,65},{1,7,66},{0,12,50},{0,8,1},{0,8,1},{0,5,5},{0,5,104},{0,5,41},{3,10,1},{3,10,1},{3,10,1},{3,6,2},{6,0,50},{0,8,1},{0,8,1},{0,5,5},{12,0,50},
+{0,5,5},{2,16,72},{3,11,1},{4,8,1},{0,8,1},{2,16,72},{20,0,72},{0,8,1},{0,7,74},{20,0,72},{0,7,74},{1,0,65},{1,0,65},{1,0,65},{1,0,65},{0,8,1},{0,8,1},{0,8,1},{0,4,1},{0,3,25},{0,3,25},{3,20,146},{3,14,83},{4,11,115},{3,10,76},{2,18,245},{2,12,79},{2,10,3},{1,8,108},{0,11,293},{0,8,103},{5,16,73},
+{5,13,2},{5,10,11},{5,10,18},{11,1,243},{1,13,75},{2,10,2},{0,8,94},{15,4,243},{0,8,94},{3,16,74},{3,16,74},{3,16,74},{3,9,75},{2,14,51},{2,10,2},{2,10,2},{2,7,6},{0,9,75},{0,7,6},{5,12,1},{5,12,1},{5,12,1},{5,8,2},{9,0,50},{2,10,1},{2,10,1},{0,7,2},{18,0,50},{0,7,2},{5,16,72},{5,13,1},{6,10,1},
+{2,10,1},{5,16,72},{26,0,72},{2,10,1},{0,9,74},{26,0,72},{0,9,74},{3,0,74},{3,0,74},{3,0,74},{3,0,74},{2,10,2},{2,10,2},{2,10,2},{2,6,2},{0,7,5},{0,7,5},{6,21,152},{6,16,82},{6,13,109},{5,12,84},{4,21,243},{4,15,78},{4,13,8},{4,11,108},{0,14,255},{0,11,77},{7,19,72},{7,15,1},{7,13,8},{7,12,13},{6,17,243},
+{3,15,72},{5,12,5},{0,11,73},{29,0,243},{0,11,73},{6,16,81},{6,16,81},{6,16,81},{5,12,80},{4,17,50},{4,13,4},{4,13,4},{4,9,5},{0,12,52},{1,10,4},{7,15,0},{7,15,0},{7,15,0},{7,11,0},{12,1,50},{5,12,1},{5,12,1},{3,9,4},{25,0,50},{3,9,4},{16,1,72},{7,15,1},{8,13,4},{5,12,4},{16,1,72},{33,0,72},{5,12,4},
+{0,11,72},{33,0,72},{0,11,72},{5,0,80},{5,0,80},{5,0,80},{5,0,80},{4,13,0},{4,13,0},{4,13,0},{4,9,1},{1,10,0},{1,10,0},{8,23,152},{8,18,82},{8,15,109},{7,14,84},{6,23,243},{6,16,75},{6,15,8},{6,13,108},{1,17,244},{2,13,77},{9,21,72},{9,17,1},{9,15,8},{9,14,13},{17,1,243},{5,17,72},{7,14,5},{1,13,72},{35,0,243},
+{1,13,72},{8,18,81},{8,18,81},{8,18,81},{7,14,80},{6,19,50},{6,15,4},{6,15,4},{6,11,5},{2,14,52},{3,12,4},{9,16,1},{9,16,1},{9,16,1},{9,13,0},{15,1,50},{7,14,1},{7,14,1},{5,11,4},{31,0,50},{5,11,4},{19,1,72},{9,17,1},{10,15,4},{7,14,4},{19,1,72},{39,0,72},{7,14,4},{0,13,72},{39,0,72},{0,13,72},{7,0,80},
+{7,0,80},{7,0,80},{7,0,80},{6,15,0},{6,15,0},{6,15,0},{6,11,1},{3,12,0},{3,12,0},{10,25,152},{10,20,82},{10,17,114},{9,16,82},{8,25,243},{8,18,75},{8,16,2},{8,15,108},{3,19,244},{4,15,77},{11,23,72},{11,19,1},{11,16,10},{11,16,17},{20,1,243},{7,19,72},{8,16,2},{3,15,72},{41,0,243},{3,15,72},{10,20,81},{10,20,81},{10,20,81},
+{10,15,81},{8,21,50},{8,16,1},{8,16,1},{8,13,5},{4,16,52},{5,14,4},{11,18,1},{11,18,1},{11,18,1},{11,15,0},{15,7,50},{8,16,1},{8,16,1},{7,13,4},{31,3,50},{7,13,4},{22,1,72},{11,19,1},{12,16,2},{8,16,1},{22,1,72},{45,0,72},{8,16,1},{0,15,72},{45,0,72},{0,15,72},{9,0,80},{9,0,80},{9,0,80},{9,0,80},{8,17,0},
+{8,17,0},{8,17,0},{8,13,1},{5,14,0},{5,14,0},{12,27,152},{12,22,82},{12,19,114},{11,18,82},{10,27,243},{10,20,75},{10,18,2},{10,16,106},{5,21,244},{6,17,79},{13,25,72},{13,21,1},{13,18,10},{13,18,17},{23,1,243},{9,21,72},{10,18,2},{5,17,74},{47,0,243},{5,17,74},{12,22,81},{12,22,81},{12,22,81},{12,17,80},{10,23,50},{10,18,1},{10,18,1},
+{10,15,5},{6,18,52},{8,15,9},{13,20,1},{13,20,1},{13,20,1},{13,17,1},{15,13,50},{10,18,1},{10,18,1},{9,15,4},{31,6,50},{9,15,4},{25,1,72},{13,21,1},{14,18,2},{10,18,1},{25,1,72},{47,2,72},{10,18,1},{0,17,74},{47,2,72},{0,17,74},{11,0,80},{11,0,80},{11,0,80},{11,0,80},{10,19,0},{10,19,0},{10,19,0},{10,15,1},{7,16,0},
+{7,16,0},{14,30,146},{14,24,78},{14,21,114},{14,20,79},{12,30,244},{12,23,79},{12,21,7},{12,19,109},{7,23,244},{9,19,76},{15,28,73},{15,23,2},{15,21,14},{15,20,14},{27,0,243},{11,23,75},{13,20,4},{7,19,73},{46,4,243},{7,19,73},{14,25,74},{14,25,74},{14,25,74},{14,19,75},{12,25,53},{12,21,3},{12,21,3},{12,17,6},{8,21,52},{9,18,6},{15,23,1},
+{15,23,1},{15,23,1},{15,19,1},{24,1,50},{13,20,0},{13,20,0},{11,17,5},{49,0,50},{11,17,5},{20,17,72},{15,23,1},{16,21,4},{13,20,4},{20,17,72},{57,0,72},{13,20,4},{0,19,72},{57,0,72},{0,19,72},{14,0,74},{14,0,74},{14,0,74},{14,0,74},{12,22,1},{12,22,1},{12,22,1},{12,17,2},{9,18,2},{9,18,2},{16,31,152},{16,26,81},{16,23,109},
+{16,22,88},{14,32,244},{14,25,79},{14,23,7},{14,21,109},{9,25,244},{11,21,76},{17,29,72},{17,25,1},{17,23,8},{17,22,13},{30,0,243},{13,25,75},{15,22,4},{9,21,73},{46,7,243},{9,21,73},{16,27,80},{16,27,80},{16,27,80},{16,21,81},{14,27,53},{14,23,3},{14,23,3},{14,19,6},{10,23,52},{11,20,6},{17,25,0},{17,25,0},{17,25,0},{17,21,0},{27,1,50},
+{15,22,0},{15,22,0},{13,19,5},{55,0,50},{13,19,5},{23,17,72},{17,25,1},{18,23,4},{15,22,4},{23,17,72},{63,0,72},{15,22,4},{0,21,72},{63,0,72},{0,21,72},{16,0,80},{16,0,80},{16,0,80},{16,0,80},{14,24,1},{14,24,1},{14,24,1},{14,19,2},{11,20,2},{11,20,2},{18,33,152},{18,28,81},{18,25,109},{17,24,84},{16,33,243},{16,27,78},{16,25,8},
+{16,23,108},{11,27,244},{13,23,76},{19,31,72},{19,27,1},{19,25,8},{19,24,13},{32,1,243},{15,27,75},{17,24,5},{11,23,73},{46,10,243},{11,23,73},{18,29,80},{18,29,80},{18,29,80},{17,24,80},{16,29,50},{16,25,4},{16,25,4},{16,21,5},{12,25,52},{13,22,6},{19,27,0},{19,27,0},{19,27,0},{19,23,0},{30,1,50},{17,24,1},{17,24,1},{15,21,5},{61,0,50},
+{15,21,5},{34,1,72},{19,27,1},{20,25,4},{17,24,4},{34,1,72},{63,3,72},{17,24,4},{0,23,72},{63,3,72},{0,23,72},{17,0,80},{17,0,80},{17,0,80},{17,0,80},{16,25,0},{16,25,0},{16,25,0},{16,21,1},{13,22,2},{13,22,2},{20,35,152},{20,30,81},{20,27,109},{19,26,84},{18,35,243},{18,29,78},{18,27,8},{18,25,108},{13,29,244},{15,25,76},{21,33,72},
+{21,29,1},{21,27,8},{21,26,13},{35,1,243},{17,29,72},{19,26,5},{13,25,73},{46,13,243},{13,25,73},{20,31,80},{20,31,80},{20,31,80},{19,26,80},{18,31,50},{18,27,4},{18,27,4},{18,23,5},{14,27,52},{15,24,6},{21,29,0},{21,29,0},{21,29,0},{21,25,0},{33,1,50},{19,26,1},{19,26,1},{17,23,4},{63,2,50},{17,23,4},{37,1,72},{21,29,1},{22,27,4},
+{19,26,4},{37,1,72},{63,6,72},{19,26,4},{0,25,72},{63,6,72},{0,25,72},{19,0,80},{19,0,80},{19,0,80},{19,0,80},{18,27,0},{18,27,0},{18,27,0},{18,23,1},{15,24,2},{15,24,2},{22,38,146},{22,32,78},{22,29,111},{22,28,84},{20,38,244},{20,31,74},{20,29,4},{20,27,100},{14,32,247},{17,27,75},{23,36,73},{23,32,3},{23,29,11},{23,28,14},{39,0,243},
+{20,31,73},{20,29,3},{16,27,74},{62,8,243},{16,27,74},{22,33,74},{22,33,74},{22,33,74},{22,28,75},{20,33,53},{20,29,3},{20,29,3},{20,25,10},{16,29,50},{18,26,2},{23,31,2},{23,31,2},{23,31,2},{23,27,2},{31,12,50},{21,28,1},{21,28,1},{18,26,1},{62,6,50},{18,26,1},{41,0,72},{23,32,2},{24,29,2},{19,29,2},{41,0,72},{62,10,72},{19,29,2},
+{0,27,74},{62,10,72},{0,27,74},{22,0,74},{22,0,74},{22,0,74},{22,0,74},{20,30,1},{20,30,1},{20,30,1},{20,25,1},{18,26,1},{18,26,1},{24,40,146},{24,34,78},{24,31,111},{24,30,84},{22,40,244},{22,33,79},{22,31,4},{22,29,100},{17,33,244},{19,29,75},{25,38,73},{25,33,2},{25,31,11},{25,30,14},{42,0,243},{21,33,75},{22,31,3},{18,29,74},{62,11,243},
+{18,29,74},{24,35,74},{24,35,74},{24,35,74},{24,30,75},{22,35,53},{22,31,3},{22,31,3},{22,27,10},{18,31,50},{20,28,2},{25,33,1},{25,33,1},{25,33,1},{25,29,2},{31,18,50},{23,30,1},{23,30,1},{20,28,1},{62,9,50},{20,28,1},{44,0,72},{25,33,1},{26,31,2},{21,31,2},{44,0,72},{62,13,72},{21,31,2},{0,29,74},{62,13,72},{0,29,74},{24,0,74},
+{24,0,74},{24,0,74},{24,0,74},{22,32,1},{22,32,1},{22,32,1},{22,27,1},{20,28,1},{20,28,1},{26,42,146},{26,36,78},{26,33,114},{26,32,79},{24,42,244},{24,35,79},{24,33,7},{24,31,100},{19,35,244},{21,31,75},{27,40,73},{27,35,2},{28,33,13},{27,32,14},{45,0,243},{23,35,75},{25,32,4},{20,31,74},{62,14,243},{20,31,74},{26,37,74},{26,37,74},{26,37,74},
+{26,32,75},{24,37,53},{24,33,3},{24,33,3},{24,29,10},{20,33,52},{22,30,2},{27,35,1},{27,35,1},{27,35,1},{27,31,2},{34,17,50},{25,32,0},{25,32,0},{22,30,1},{62,12,50},{22,30,1},{47,0,72},{27,35,1},{28,33,4},{25,32,4},{47,0,72},{46,24,72},{25,32,4},{0,31,74},{46,24,72},{0,31,74},{26,0,74},{26,0,74},{26,0,74},{26,0,74},{24,34,1},
+{24,34,1},{24,34,1},{24,29,1},{22,30,1},{22,30,1},{28,44,146},{28,38,78},{28,35,114},{28,34,79},{26,44,244},{26,37,79},{26,35,7},{26,33,109},{21,37,244},{23,33,76},{29,42,73},{29,37,2},{30,35,13},{29,34,14},{47,2,243},{25,37,75},{27,34,4},{21,33,73},{62,17,243},{21,33,73},{28,39,74},{28,39,74},{28,39,74},{28,33,75},{26,39,53},{26,35,3},{26,35,3},
+{26,31,10},{22,35,52},{23,32,6},{29,37,1},{29,37,1},{29,37,1},{29,33,1},{37,17,50},{27,34,0},{27,34,0},{23,32,5},{62,15,50},{23,32,5},{49,1,72},{29,37,1},{30,35,4},{27,34,4},{49,1,72},{46,27,72},{27,34,4},{0,33,72},{46,27,72},{0,33,72},{28,0,74},{28,0,74},{28,0,74},{28,0,74},{26,36,1},{26,36,1},{26,36,1},{26,31,1},{23,32,2},
+{23,32,2},{30,46,146},{30,41,77},{31,37,121},{30,36,81},{29,45,247},{28,39,77},{28,37,9},{28,35,103},{24,39,248},{25,35,76},{31,44,78},{31,40,4},{32,37,11},{31,36,17},{51,0,243},{28,39,73},{28,37,5},{24,35,74},{47,28,243},{24,35,74},{30,42,72},{30,42,72},{30,42,72},{30,36,72},{29,40,54},{29,36,6},{29,36,6},{28,34,9},{24,37,51},{26,34,2},{31,40,4},
+{31,40,4},{31,40,4},{31,35,5},{49,0,50},{29,36,2},{29,36,2},{26,34,1},{62,18,50},{26,34,1},{53,0,72},{31,40,0},{32,37,2},{28,37,1},{53,0,72},{62,22,72},{28,37,1},{0,35,74},{62,22,72},{0,35,74},{30,0,72},{30,0,72},{30,0,72},{30,0,72},{29,36,5},{29,36,5},{29,36,5},{28,33,4},{26,34,1},{26,34,1},{32,48,146},{32,42,79},{32,39,111},
+{32,38,84},{31,47,247},{30,41,77},{30,39,9},{30,37,103},{26,41,248},{27,37,76},{33,46,73},{33,42,3},{33,39,11},{33,38,14},{54,0,243},{30,41,73},{30,39,5},{26,37,74},{47,31,243},{26,37,74},{32,43,75},{32,43,75},{32,43,75},{32,38,75},{31,42,54},{31,38,6},{31,38,6},{30,36,9},{26,39,51},{28,36,2},{33,41,2},{33,41,2},{33,41,2},{33,37,2},{52,0,50},
+{31,38,2},{31,38,2},{28,36,1},{62,21,50},{28,36,1},{56,0,72},{33,42,2},{34,39,2},{30,39,1},{56,0,72},{62,25,72},{30,39,1},{0,37,74},{62,25,72},{0,37,74},{32,0,74},{32,0,74},{32,0,74},{32,0,74},{31,38,5},{31,38,5},{31,38,5},{30,35,4},{28,36,1},{28,36,1},{34,50,146},{34,44,79},{34,41,111},{34,40,84},{32,50,244},{32,43,74},{32,41,4},
+{32,39,100},{28,43,248},{29,39,76},{35,48,73},{35,44,3},{35,41,11},{35,40,14},{57,0,243},{32,43,73},{32,41,3},{28,39,74},{50,32,243},{28,39,74},{34,45,75},{34,45,75},{34,45,75},{34,40,75},{32,46,51},{32,41,3},{32,41,3},{32,37,10},{28,41,51},{30,38,2},{35,43,2},{35,43,2},{35,43,2},{35,39,2},{55,0,50},{33,40,1},{33,40,1},{30,38,1},{62,24,50},
+{30,38,1},{59,0,72},{35,44,2},{36,41,2},{31,41,2},{59,0,72},{62,28,72},{31,41,2},{0,39,74},{62,28,72},{0,39,74},{34,0,74},{34,0,74},{34,0,74},{34,0,74},{32,42,1},{32,42,1},{32,42,1},{32,37,1},{30,38,1},{30,38,1},{36,52,146},{36,46,79},{36,43,111},{36,42,84},{34,52,244},{34,45,74},{34,43,4},{34,41,100},{30,45,248},{31,41,76},{37,50,73},
+{37,46,3},{37,43,11},{37,42,14},{60,0,243},{34,45,73},{34,43,3},{30,41,74},{56,32,243},{30,41,74},{36,47,75},{36,47,75},{36,47,75},{36,42,75},{34,47,53},{34,43,3},{34,43,3},{34,39,10},{30,43,51},{32,40,2},{37,45,2},{37,45,2},{37,45,2},{37,41,2},{58,0,50},{35,42,1},{35,42,1},{32,40,1},{62,27,50},{32,40,1},{62,0,72},{37,46,2},{38,43,2},
+{33,43,2},{62,0,72},{62,31,72},{33,43,2},{0,41,74},{62,31,72},{0,41,74},{36,0,74},{36,0,74},{36,0,74},{36,0,74},{34,44,1},{34,44,1},{34,44,1},{34,39,1},{32,40,1},{32,40,1},{38,54,146},{38,49,77},{39,45,120},{38,45,76},{37,53,247},{36,47,78},{37,45,5},{36,43,100},{31,48,243},{33,43,81},{40,50,78},{39,48,4},{40,45,8},{39,45,20},{63,1,243},
+{36,47,74},{37,45,1},{33,43,80},{63,32,243},{33,43,80},{38,50,72},{38,50,72},{38,50,72},{38,44,72},{37,48,54},{37,45,5},{37,45,5},{36,42,8},{32,45,53},{34,42,1},{40,46,4},{40,46,4},{40,46,4},{40,43,4},{53,16,50},{37,45,1},{37,45,1},{35,42,0},{63,30,50},{35,42,0},{63,5,72},{39,48,0},{41,45,1},{36,45,0},{63,5,72},{47,42,72},{36,45,0},
+{0,43,80},{47,42,72},{0,43,80},{38,0,72},{38,0,72},{38,0,72},{38,0,72},{37,45,4},{37,45,4},{37,45,4},{37,41,4},{34,42,1},{34,42,1},{40,56,146},{40,51,77},{41,47,120},{40,47,76},{39,55,247},{38,49,77},{39,47,5},{38,45,100},{34,49,248},{35,45,81},{42,52,78},{41,50,4},{42,47,8},{41,47,20},{63,7,243},{38,49,73},{39,47,1},{35,45,80},{63,35,243},
+{35,45,80},{40,52,72},{40,52,72},{40,52,72},{40,46,72},{39,50,54},{39,47,5},{39,47,5},{38,44,8},{34,47,53},{36,44,1},{42,48,4},{42,48,4},{42,48,4},{42,45,4},{56,16,50},{39,47,1},{39,47,1},{37,44,0},{62,33,50},{37,44,0},{63,11,72},{41,50,0},{43,47,1},{38,47,0},{63,11,72},{47,45,72},{38,47,0},{0,45,80},{47,45,72},{0,45,80},{40,0,72},
+{40,0,72},{40,0,72},{40,0,72},{39,47,4},{39,47,4},{39,47,4},{39,43,4},{36,44,1},{36,44,1},{42,58,146},{42,53,77},{43,49,121},{42,48,81},{41,57,247},{40,51,77},{40,49,9},{40,47,100},{36,51,248},{37,47,81},{44,54,78},{43,52,4},{44,49,6},{43,48,17},{63,13,243},{40,51,73},{40,49,5},{37,47,80},{63,38,243},{37,47,80},{42,54,72},{42,54,72},{42,54,72},
+{42,48,72},{41,52,54},{41,48,6},{41,48,6},{40,46,8},{36,49,51},{38,46,1},{44,50,4},{44,50,4},{44,50,4},{44,47,4},{59,16,50},{41,48,2},{41,48,2},{39,46,0},{62,36,50},{39,46,0},{55,32,72},{43,52,0},{44,49,2},{40,49,1},{55,32,72},{46,48,72},{40,49,1},{0,47,80},{46,48,72},{0,47,80},{42,0,72},{42,0,72},{42,0,72},{42,0,72},{41,48,5},
+{41,48,5},{41,48,5},{41,45,4},{38,46,1},{38,46,1},{44,60,146},{44,55,77},{45,51,121},{44,50,81},{43,59,247},{42,53,77},{42,51,9},{42,49,103},{38,53,248},{39,49,76},{46,56,78},{45,54,4},{46,51,6},{45,50,17},{63,19,243},{42,53,73},{42,51,5},{38,49,74},{63,41,243},{38,49,74},{44,56,72},{44,56,72},{44,56,72},{44,50,72},{43,54,54},{43,50,6},{43,50,6},
+{42,48,9},{38,51,51},{40,48,2},{46,52,4},{46,52,4},{46,52,4},{46,49,4},{62,16,50},{43,50,2},{43,50,2},{40,48,1},{62,39,50},{40,48,1},{58,32,72},{45,54,0},{46,51,2},{42,51,1},{58,32,72},{52,48,72},{42,51,1},{0,49,74},{52,48,72},{0,49,74},{44,0,72},{44,0,72},{44,0,72},{44,0,72},{43,50,5},{43,50,5},{43,50,5},{43,47,4},{40,48,1},
+{40,48,1},{46,63,146},{46,57,79},{47,53,115},{46,53,78},{45,61,245},{45,55,77},{45,53,2},{44,51,105},{39,56,243},{42,51,82},{48,59,76},{48,55,9},{48,53,8},{47,53,18},{59,33,243},{44,55,74},{45,53,1},{41,51,80},{55,48,243},{41,51,80},{46,59,74},{46,59,74},{46,59,74},{46,52,75},{45,57,51},{45,53,2},{45,53,2},{44,50,10},{41,53,51},{42,50,1},{48,54,4},
+{48,54,4},{48,54,4},{48,51,4},{63,21,50},{45,53,1},{45,53,1},{43,50,0},{63,42,50},{43,50,0},{63,29,72},{47,56,2},{49,53,1},{44,53,1},{63,29,72},{63,46,72},{44,53,1},{0,51,80},{63,46,72},{0,51,80},{46,0,74},{46,0,74},{46,0,74},{46,0,74},{45,53,1},{45,53,1},{45,53,1},{45,49,1},{42,50,1},{42,50,1},{48,63,152},{48,59,77},{49,55,120},
+{48,55,76},{47,63,245},{47,57,77},{47,55,2},{46,53,105},{41,58,243},{44,53,82},{50,61,76},{49,58,4},{50,55,8},{49,55,20},{62,33,243},{46,57,74},{47,55,1},{43,53,80},{61,48,243},{43,53,80},{48,60,72},{48,60,72},{48,60,72},{48,54,72},{47,59,51},{47,55,2},{47,55,2},{46,52,10},{43,55,51},{44,52,1},{50,56,4},{50,56,4},{50,56,4},{50,53,4},{63,27,50},
+{47,55,1},{47,55,1},{45,52,0},{63,45,50},{45,52,0},{63,35,72},{49,58,0},{51,55,1},{46,55,1},{63,35,72},{63,49,72},{46,55,1},{0,53,80},{63,49,72},{0,53,80},{48,0,72},{48,0,72},{48,0,72},{48,0,72},{47,55,1},{47,55,1},{47,55,1},{47,51,1},{44,52,1},{44,52,1},{51,63,184},{50,61,77},{51,57,120},{50,57,76},{49,63,268},{48,59,78},{49,57,5},
+{48,55,100},{43,60,243},{46,55,82},{52,63,76},{51,60,4},{52,57,8},{51,57,20},{63,37,243},{48,59,74},{49,57,1},{45,55,80},{63,50,243},{45,55,80},{50,62,72},{50,62,72},{50,62,72},{50,56,72},{49,60,56},{49,57,5},{49,57,5},{48,54,8},{45,57,51},{46,54,1},{52,58,4},{52,58,4},{52,58,4},{52,55,4},{63,33,50},{49,57,1},{49,57,1},{47,54,0},{47,56,50},
+{47,54,0},{63,41,72},{51,60,0},{53,57,1},{48,57,0},{63,41,72},{63,52,72},{48,57,0},{0,55,80},{63,52,72},{0,55,80},{50,0,72},{50,0,72},{50,0,72},{50,0,72},{49,57,4},{49,57,4},{49,57,4},{49,53,4},{46,54,1},{46,54,1},{53,63,226},{52,63,77},{53,59,120},{52,59,76},{52,63,300},{50,61,78},{51,59,5},{50,57,100},{45,62,243},{47,57,84},{54,63,84},
+{53,62,4},{54,59,8},{53,59,20},{63,43,243},{49,62,73},{51,59,1},{47,57,80},{63,53,243},{47,57,80},{52,63,76},{52,63,76},{52,63,76},{52,58,72},{51,62,56},{51,59,5},{51,59,5},{50,56,8},{47,59,51},{48,56,1},{54,60,4},{54,60,4},{54,60,4},{54,57,4},{63,39,50},{51,59,1},{51,59,1},{49,56,0},{47,59,50},{49,56,0},{63,47,72},{53,62,0},{55,59,1},
+{50,59,0},{63,47,72},{63,55,72},{50,59,0},{0,57,80},{63,55,72},{0,57,80},{52,0,72},{52,0,72},{52,0,72},{52,0,72},{51,59,4},{51,59,4},{51,59,4},{51,55,4},{48,56,1},{48,56,1},{56,63,314},{55,63,115},{55,62,115},{54,61,76},{54,63,364},{53,63,79},{53,61,3},{52,59,108},{48,63,252},{49,60,79},{57,63,115},{56,63,10},{56,61,11},{56,61,18},{63,50,243},
+{53,63,78},{53,61,2},{47,60,75},{62,57,243},{47,60,75},{54,63,99},{54,63,99},{54,63,99},{54,60,75},{53,63,69},{53,61,2},{53,61,2},{53,58,6},{49,61,51},{50,58,6},{56,63,1},{56,63,1},{56,63,1},{56,59,2},{61,49,50},{53,61,1},{53,61,1},{51,58,2},{63,54,50},{51,58,2},{63,53,74},{56,63,9},{57,61,1},{53,61,1},{63,53,74},{63,58,74},{53,61,1},
+{0,60,74},{63,58,74},{0,60,74},{54,0,74},{54,0,74},{54,0,74},{54,0,74},{53,61,2},{53,61,2},{53,61,2},{53,57,2},{50,59,2},{50,59,2},{57,63,371},{57,63,179},{57,63,115},{56,63,75},{57,63,387},{55,63,123},{55,63,2},{54,61,91},{52,63,286},{51,62,70},{59,63,146},{58,63,59},{58,63,10},{58,63,17},{63,55,221},{57,63,98},{55,63,1},{49,62,66},{63,59,221},
+{49,62,66},{57,63,115},{57,63,115},{57,63,115},{56,62,75},{56,63,93},{55,63,2},{55,63,2},{55,60,6},{51,63,51},{52,60,6},{58,63,10},{58,63,10},{58,63,10},{58,61,2},{63,51,50},{55,63,1},{55,63,1},{53,60,2},{63,57,50},{53,60,2},{63,59,61},{60,63,25},{59,63,0},{55,63,0},{63,59,61},{63,61,61},{55,63,0},{0,62,65},{63,61,61},{0,62,65},{56,0,74},
+{56,0,74},{56,0,74},{56,0,74},{55,63,2},{55,63,2},{55,63,2},{55,59,2},{52,61,2},{52,61,2},{60,63,259},{59,63,190},{59,63,154},{58,63,90},{59,63,270},{58,63,91},{57,63,35},{57,62,22},{56,63,194},{54,63,11},{61,63,70},{60,63,42},{60,63,26},{60,63,2},{63,59,94},{60,63,42},{59,63,13},{53,63,10},{63,61,94},{53,63,10},{59,63,154},{59,63,154},{59,63,154},
+{58,63,90},{58,63,147},{57,63,35},{57,63,35},{57,62,6},{55,63,77},{54,62,6},{60,63,26},{60,63,26},{60,63,26},{60,63,2},{63,57,50},{59,63,13},{59,63,13},{55,62,2},{63,60,50},{55,62,2},{63,62,5},{62,63,4},{62,63,0},{61,63,0},{63,62,5},{62,63,5},{61,63,0},{0,63,9},{62,63,5},{0,63,9},{58,0,74},{58,0,74},{58,0,74},{58,0,74},{57,63,10},
+{57,63,10},{57,63,10},{57,61,2},{54,63,2},{54,63,2},{61,63,162},{61,63,135},{61,63,126},{60,63,90},{61,63,154},{60,63,66},{60,63,50},{59,63,2},{59,63,109},{57,63,10},{62,63,19},{62,63,14},{62,63,10},{62,63,5},{63,62,17},{62,63,12},{62,63,8},{59,63,1},{62,63,17},{59,63,1},{61,63,126},{61,63,126},{61,63,126},{60,63,90},{60,63,98},{60,63,50},{60,63,50},
+{59,63,2},{58,63,62},{57,63,10},{62,63,10},{62,63,10},{62,63,10},{62,63,5},{63,61,13},{62,63,8},{62,63,8},{59,63,1},{63,62,13},{59,63,1},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{60,0,74},{60,0,74},{60,0,74},{60,0,74},{60,63,34},{60,63,34},{60,63,34},{59,63,2},{57,63,10},
+{57,63,10},{0,14,202},{0,10,25},{0,7,1},{0,6,74},{0,10,441},{0,6,282},{0,5,133},{0,4,318},{0,5,477},{0,4,343},{0,14,202},{0,10,25},{0,7,1},{0,6,74},{5,0,441},{0,6,282},{0,5,133},{0,4,318},{10,0,441},{0,4,318},{0,7,0},{0,7,0},{0,7,0},{0,3,1},{0,3,41},{0,3,17},{0,3,17},{0,2,26},{0,2,45},{0,1,30},{0,7,0},
+{0,7,0},{0,7,0},{0,3,1},{2,0,41},{0,3,17},{0,3,17},{0,2,26},{3,0,41},{0,2,26},{7,0,202},{0,10,25},{0,7,1},{0,6,74},{7,0,202},{14,0,202},{0,6,74},{0,5,202},{14,0,202},{0,5,202},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,20,200},{0,14,1},{0,10,25},
+{0,8,41},{0,14,686},{0,9,362},{0,8,141},{0,5,467},{0,6,762},{0,5,503},{0,20,200},{0,14,1},{0,10,25},{0,8,41},{7,0,686},{0,9,362},{0,8,141},{0,5,467},{14,0,686},{0,5,467},{0,13,0},{0,13,0},{0,13,0},{0,6,1},{0,6,145},{0,5,52},{0,5,52},{0,3,89},{0,3,161},{0,3,105},{0,13,0},{0,13,0},{0,13,0},{0,6,1},{3,0,145},
+{0,5,52},{0,5,52},{0,3,89},{6,0,145},{0,3,89},{2,16,200},{0,14,1},{2,9,1},{0,8,41},{2,16,200},{20,0,200},{0,8,41},{0,7,202},{20,0,200},{0,7,202},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{1,24,225},{1,16,27},{1,12,83},{1,11,51},{0,19,724},{0,12,299},{0,10,62},
+{0,8,414},{0,9,875},{0,7,500},{2,22,201},{2,16,6},{2,12,26},{1,11,35},{10,0,723},{0,12,299},{0,10,62},{0,8,414},{14,3,723},{0,8,414},{1,17,25},{1,17,25},{1,17,25},{1,9,25},{0,12,162},{0,8,25},{0,8,25},{0,5,61},{0,5,216},{0,5,97},{2,15,1},{2,15,1},{2,15,1},{2,8,2},{6,0,162},{0,8,25},{0,8,25},{0,5,61},{12,0,162},
+{0,5,61},{5,16,200},{1,16,2},{4,11,1},{0,11,17},{5,16,200},{26,0,200},{0,11,17},{0,9,202},{26,0,200},{0,9,202},{1,0,25},{1,0,25},{1,0,25},{1,0,25},{0,5,1},{0,5,1},{0,5,1},{0,3,1},{0,2,8},{0,2,8},{2,28,313},{2,19,118},{2,14,203},{2,13,130},{0,25,724},{0,16,236},{0,13,6},{0,10,339},{0,11,984},{0,10,508},{4,24,201},
+{4,17,5},{4,14,26},{3,13,35},{13,0,723},{0,16,236},{0,13,6},{0,10,339},{14,6,723},{0,10,339},{2,21,113},{2,21,113},{2,21,113},{2,11,114},{0,18,162},{0,12,2},{0,12,2},{0,7,34},{0,8,280},{0,7,115},{4,17,1},{4,17,1},{4,17,1},{4,10,2},{9,0,162},{0,12,2},{0,12,2},{0,7,34},{18,0,162},{0,7,34},{16,0,200},{3,18,2},{6,13,1},
+{0,13,2},{16,0,200},{32,0,200},{0,13,2},{0,11,202},{32,0,200},{0,11,202},{2,0,113},{2,0,113},{2,0,113},{2,0,113},{0,11,1},{0,11,1},{0,11,1},{0,6,1},{0,5,40},{0,5,40},{4,31,408},{4,22,216},{4,16,317},{3,15,216},{1,30,723},{1,19,216},{1,15,9},{0,12,312},{0,15,1000},{0,12,440},{6,27,200},{6,20,1},{7,16,29},{5,15,36},{16,0,723},
+{0,19,203},{2,15,5},{0,12,296},{32,0,723},{0,12,296},{4,23,209},{4,23,209},{4,23,209},{4,14,208},{1,23,162},{1,16,4},{1,16,4},{1,10,20},{0,11,294},{0,10,89},{6,19,1},{6,19,1},{6,19,1},{6,13,0},{12,1,162},{2,15,1},{2,15,1},{0,10,8},{25,0,162},{0,10,8},{19,1,200},{6,20,1},{8,15,5},{2,15,4},{19,1,200},{39,0,200},{2,15,4},
+{0,13,200},{39,0,200},{0,13,200},{3,0,208},{3,0,208},{3,0,208},{3,0,208},{1,16,0},{1,16,0},{1,16,0},{1,9,1},{0,8,45},{0,8,45},{6,33,408},{6,24,216},{6,18,317},{5,17,213},{3,32,723},{3,21,216},{3,17,2},{2,14,312},{0,18,888},{0,14,293},{8,29,200},{8,22,1},{9,17,26},{8,17,37},{19,0,723},{1,22,200},{3,17,2},{0,14,257},{38,0,723},
+{0,14,257},{6,25,209},{6,25,209},{6,25,209},{5,16,208},{3,25,162},{3,17,1},{3,17,1},{3,12,20},{0,15,228},{0,12,20},{8,21,1},{8,21,1},{8,21,1},{8,15,0},{15,1,162},{3,17,1},{3,17,1},{0,12,4},{31,0,162},{0,12,4},{22,1,200},{8,22,1},{10,17,2},{3,17,1},{22,1,200},{45,0,200},{3,17,1},{0,15,200},{45,0,200},{0,15,200},{5,0,208},
+{5,0,208},{5,0,208},{5,0,208},{3,18,0},{3,18,0},{3,18,0},{3,11,1},{0,11,13},{0,11,13},{8,35,408},{8,26,216},{8,20,317},{7,19,213},{5,34,723},{5,23,216},{5,19,2},{4,16,298},{0,22,804},{0,16,228},{10,31,200},{10,24,1},{11,19,26},{10,19,37},{22,0,723},{3,24,200},{5,19,2},{0,16,227},{44,0,723},{0,16,227},{8,27,209},{8,27,209},{8,27,209},
+{7,18,208},{5,27,162},{5,19,1},{5,19,1},{5,14,20},{0,18,180},{0,15,4},{10,23,1},{10,23,1},{10,23,1},{10,17,1},{15,7,162},{5,19,1},{5,19,1},{2,14,4},{31,3,162},{2,14,4},{25,1,200},{10,24,1},{12,19,2},{5,19,1},{25,1,200},{47,2,200},{5,19,1},{0,17,202},{47,2,200},{0,17,202},{7,0,208},{7,0,208},{7,0,208},{7,0,208},{5,20,0},
+{5,20,0},{5,20,0},{5,13,1},{0,15,0},{0,15,0},{10,37,408},{10,28,216},{10,22,317},{9,21,213},{7,36,723},{7,25,216},{7,21,2},{6,18,298},{0,25,748},{1,19,218},{12,33,200},{12,26,1},{13,21,26},{12,21,37},{25,0,723},{5,26,200},{7,21,2},{0,19,211},{50,0,723},{0,19,211},{10,29,209},{10,29,209},{10,29,209},{9,20,208},{7,29,162},{7,21,1},{7,21,1},
+{7,16,17},{0,21,164},{3,16,3},{12,25,1},{12,25,1},{12,25,1},{12,19,1},{15,13,162},{7,21,1},{7,21,1},{4,16,1},{31,6,162},{4,16,1},{28,1,200},{12,26,1},{14,21,2},{7,21,1},{28,1,200},{47,5,200},{7,21,1},{0,19,202},{47,5,200},{0,19,202},{9,0,208},{9,0,208},{9,0,208},{9,0,208},{7,22,0},{7,22,0},{7,22,0},{7,15,1},{2,17,0},
+{2,17,0},{12,39,404},{12,30,212},{13,24,318},{12,23,215},{9,39,724},{9,27,212},{9,24,7},{8,20,306},{0,28,724},{3,21,210},{14,35,203},{14,28,2},{15,24,22},{14,23,30},{28,1,723},{7,28,203},{10,23,4},{0,21,201},{47,5,723},{0,21,201},{12,32,202},{12,32,202},{12,32,202},{12,22,202},{9,31,165},{9,24,3},{9,24,3},{9,18,14},{2,24,164},{4,19,6},{14,28,1},
+{14,28,1},{14,28,1},{14,21,1},{24,1,162},{10,23,0},{10,23,0},{6,18,5},{49,0,162},{6,18,5},{23,17,200},{14,28,1},{16,24,4},{10,23,4},{23,17,200},{63,0,200},{10,23,4},{0,21,200},{63,0,200},{0,21,200},{12,0,202},{12,0,202},{12,0,202},{12,0,202},{9,25,1},{9,25,1},{9,25,1},{9,17,2},{4,19,2},{4,19,2},{14,41,404},{14,32,215},{15,26,318},
+{14,25,215},{11,41,724},{11,29,212},{11,26,7},{10,22,306},{2,30,724},{5,23,210},{16,37,200},{16,30,1},{17,26,24},{16,25,40},{31,1,723},{9,30,203},{12,25,4},{2,23,201},{47,8,723},{2,23,201},{14,34,202},{14,34,202},{14,34,202},{14,24,202},{11,33,163},{11,26,3},{11,26,3},{11,20,14},{4,26,164},{6,21,6},{16,30,0},{16,30,0},{16,30,0},{16,23,0},{27,1,162},
+{12,25,0},{12,25,0},{8,20,5},{55,0,162},{8,20,5},{34,1,200},{16,30,1},{18,26,4},{12,25,4},{34,1,200},{63,3,200},{12,25,4},{0,23,200},{63,3,200},{0,23,200},{14,0,202},{14,0,202},{14,0,202},{14,0,202},{11,27,1},{11,27,1},{11,27,1},{11,19,2},{6,21,2},{6,21,2},{16,43,408},{16,34,216},{16,28,312},{16,27,221},{13,43,724},{13,31,212},{13,28,7},
+{12,24,306},{4,32,724},{7,25,210},{18,39,200},{18,32,1},{19,28,24},{17,27,36},{34,0,723},{11,32,201},{14,27,4},{4,25,201},{47,11,723},{4,25,201},{16,35,209},{16,35,209},{16,35,209},{16,26,208},{13,35,163},{13,28,3},{13,28,3},{13,22,14},{6,28,164},{8,23,6},{18,31,1},{18,31,1},{18,31,1},{18,25,0},{30,1,162},{14,27,0},{14,27,0},{10,22,5},{61,0,162},
+{10,22,5},{37,1,200},{18,32,1},{20,28,4},{14,27,4},{37,1,200},{63,6,200},{14,27,4},{0,25,200},{63,6,200},{0,25,200},{16,0,208},{16,0,208},{16,0,208},{16,0,208},{13,29,1},{13,29,1},{13,29,1},{13,21,2},{8,23,2},{8,23,2},{18,45,408},{18,36,216},{18,30,312},{17,29,216},{15,45,724},{15,33,210},{15,30,7},{14,26,306},{6,34,724},{9,27,210},{20,41,200},
+{20,34,1},{21,30,24},{19,29,36},{37,0,723},{13,34,201},{16,29,5},{6,27,201},{47,14,723},{6,27,201},{18,37,209},{18,37,209},{18,37,209},{18,28,208},{15,37,163},{15,30,3},{15,30,3},{15,24,14},{8,30,164},{10,25,6},{20,33,1},{20,33,1},{20,33,1},{20,27,0},{33,1,162},{16,29,1},{16,29,1},{12,24,5},{63,2,162},{12,24,5},{40,1,200},{20,34,1},{22,30,4},
+{16,29,4},{40,1,200},{63,9,200},{16,29,4},{0,27,200},{63,9,200},{0,27,200},{17,0,208},{17,0,208},{17,0,208},{17,0,208},{15,31,1},{15,31,1},{15,31,1},{15,23,2},{10,25,2},{10,25,2},{20,47,404},{20,38,212},{21,32,318},{20,31,215},{17,47,724},{17,35,212},{17,32,7},{16,29,308},{8,36,724},{11,29,210},{22,44,201},{22,36,2},{23,32,22},{22,31,35},{40,1,723},
+{15,36,206},{17,32,6},{9,29,202},{63,9,723},{9,29,202},{20,40,202},{20,40,202},{20,40,202},{20,30,203},{17,39,165},{17,32,3},{17,32,3},{17,26,19},{10,32,164},{13,27,2},{22,36,1},{22,36,1},{22,36,1},{22,29,2},{31,12,162},{18,31,1},{18,31,1},{13,27,1},{62,6,162},{13,27,1},{44,0,200},{22,36,1},{24,32,4},{16,32,4},{44,0,200},{62,13,200},{16,32,4},
+{0,29,202},{62,13,200},{0,29,202},{20,0,202},{20,0,202},{20,0,202},{20,0,202},{17,33,1},{17,33,1},{17,33,1},{17,25,1},{13,27,1},{13,27,1},{22,49,404},{22,40,212},{23,34,318},{22,33,215},{19,49,724},{19,37,212},{19,34,7},{18,31,308},{10,38,724},{13,31,210},{24,46,201},{24,38,2},{25,34,22},{24,33,30},{43,1,723},{17,38,203},{20,33,4},{11,31,202},{63,12,723},
+{11,31,202},{22,42,202},{22,42,202},{22,42,202},{22,32,202},{19,41,165},{19,34,3},{19,34,3},{19,28,19},{12,34,164},{15,29,2},{24,38,1},{24,38,1},{24,38,1},{24,31,2},{31,18,162},{20,33,0},{20,33,0},{15,29,1},{62,9,162},{15,29,1},{47,0,200},{24,38,1},{26,34,4},{20,33,4},{47,0,200},{46,24,200},{20,33,4},{0,31,202},{46,24,200},{0,31,202},{22,0,202},
+{22,0,202},{22,0,202},{22,0,202},{19,35,1},{19,35,1},{19,35,1},{19,27,1},{15,29,1},{15,29,1},{24,51,404},{24,42,212},{25,36,318},{24,35,215},{21,51,724},{21,39,212},{21,36,7},{20,32,306},{12,40,724},{15,33,213},{26,47,203},{26,40,2},{27,36,22},{26,35,30},{46,1,723},{19,40,203},{22,35,4},{13,33,200},{63,15,723},{13,33,200},{24,44,202},{24,44,202},{24,44,202},
+{24,34,202},{21,43,165},{21,36,3},{21,36,3},{21,30,19},{14,36,164},{17,31,2},{26,40,1},{26,40,1},{26,40,1},{26,33,1},{34,17,162},{22,35,0},{22,35,0},{17,31,1},{62,12,162},{17,31,1},{49,1,200},{26,40,1},{28,36,4},{22,35,4},{49,1,200},{46,27,200},{22,35,4},{0,33,200},{46,27,200},{0,33,200},{24,0,202},{24,0,202},{24,0,202},{24,0,202},{21,37,1},
+{21,37,1},{21,37,1},{21,29,1},{17,31,1},{17,31,1},{26,53,404},{26,44,212},{27,38,318},{26,37,215},{23,53,724},{23,41,212},{23,38,7},{22,34,306},{14,42,724},{17,35,210},{28,49,203},{28,42,2},{29,38,22},{28,37,30},{47,5,723},{21,42,203},{24,37,4},{15,35,200},{63,18,723},{15,35,200},{26,46,202},{26,46,202},{26,46,202},{26,36,202},{23,45,165},{23,38,3},{23,38,3},
+{23,32,14},{16,38,164},{18,33,6},{28,42,1},{28,42,1},{28,42,1},{28,35,1},{37,17,162},{24,37,0},{24,37,0},{20,32,5},{62,15,162},{20,32,5},{52,1,200},{28,42,1},{30,38,4},{24,37,4},{52,1,200},{46,30,200},{24,37,4},{0,35,200},{46,30,200},{0,35,200},{26,0,202},{26,0,202},{26,0,202},{26,0,202},{23,39,1},{23,39,1},{23,39,1},{23,31,1},{18,33,2},
+{18,33,2},{28,56,400},{28,46,216},{29,40,314},{28,39,213},{26,53,728},{25,44,215},{25,40,9},{25,37,306},{17,44,728},{19,37,210},{31,50,204},{30,45,4},{31,40,21},{30,39,29},{52,1,723},{24,44,201},{25,40,5},{17,37,202},{46,30,723},{17,37,202},{28,49,200},{28,49,200},{28,49,200},{28,38,201},{26,46,166},{26,39,6},{26,39,6},{25,34,17},{18,40,163},{21,35,2},{31,43,4},
+{31,43,4},{31,43,4},{31,37,4},{49,0,162},{26,39,2},{26,39,2},{21,35,1},{62,18,162},{21,35,1},{56,0,200},{30,45,0},{32,40,2},{25,40,1},{56,0,200},{62,25,200},{25,40,1},{0,37,202},{62,25,200},{0,37,202},{28,0,200},{28,0,200},{28,0,200},{28,0,200},{26,39,5},{26,39,5},{26,39,5},{25,33,4},{21,35,1},{21,35,1},{30,58,400},{30,48,217},{31,42,314},
+{30,41,213},{28,55,728},{27,46,215},{27,42,9},{27,39,306},{19,46,728},{21,39,210},{32,54,201},{32,47,3},{33,42,19},{32,41,35},{55,1,723},{26,46,201},{27,42,5},{19,39,202},{47,32,723},{19,39,202},{30,51,200},{30,51,200},{30,51,200},{30,40,201},{28,48,166},{28,41,6},{28,41,6},{27,36,17},{20,42,163},{23,37,2},{32,46,2},{32,46,2},{32,46,2},{32,39,2},{52,0,162},
+{28,41,2},{28,41,2},{23,37,1},{62,21,162},{23,37,1},{59,0,200},{32,47,2},{34,42,2},{27,42,1},{59,0,200},{62,28,200},{27,42,1},{0,39,202},{62,28,200},{0,39,202},{30,0,200},{30,0,200},{30,0,200},{30,0,200},{28,41,5},{28,41,5},{28,41,5},{27,35,4},{23,37,1},{23,37,1},{32,60,402},{32,50,212},{32,44,324},{32,43,215},{30,57,728},{29,47,216},{29,44,9},
+{29,41,306},{20,48,724},{23,41,210},{34,56,201},{34,48,2},{35,44,19},{34,43,35},{58,1,723},{28,48,201},{29,44,5},{21,41,202},{53,32,723},{21,41,202},{32,52,202},{32,52,202},{32,52,202},{32,42,203},{30,50,166},{30,43,6},{30,43,6},{29,38,17},{22,44,163},{25,39,2},{34,48,1},{34,48,1},{34,48,1},{34,41,2},{55,0,162},{30,43,2},{30,43,2},{25,39,1},{62,24,162},
+{25,39,1},{62,0,200},{34,48,1},{36,44,2},{29,44,1},{62,0,200},{62,31,200},{29,44,1},{0,41,202},{62,31,200},{0,41,202},{32,0,202},{32,0,202},{32,0,202},{32,0,202},{30,43,5},{30,43,5},{30,43,5},{29,37,4},{25,39,1},{25,39,1},{34,62,402},{34,52,212},{34,46,324},{34,45,215},{31,61,728},{31,49,210},{31,46,9},{31,43,306},{22,50,724},{25,43,210},{36,58,201},
+{36,50,2},{37,46,19},{36,45,35},{61,1,723},{30,50,201},{31,46,5},{23,43,202},{59,32,723},{23,43,202},{34,54,202},{34,54,202},{34,54,202},{34,44,203},{31,54,166},{31,46,8},{31,46,8},{31,40,17},{24,46,163},{27,41,2},{36,50,1},{36,50,1},{36,50,1},{36,43,2},{58,0,162},{32,45,1},{32,45,1},{27,41,1},{62,27,162},{27,41,1},{63,4,200},{36,50,1},{38,46,2},
+{31,46,1},{63,4,200},{62,34,200},{31,46,1},{0,43,202},{62,34,200},{0,43,202},{34,0,202},{34,0,202},{34,0,202},{34,0,202},{31,47,5},{31,47,5},{31,47,5},{31,39,4},{27,41,1},{27,41,1},{36,63,408},{36,54,216},{37,48,314},{36,47,217},{34,62,727},{33,52,215},{33,48,9},{32,45,308},{24,53,723},{28,45,217},{39,58,206},{38,53,4},{39,48,21},{38,47,36},{63,4,723},
+{32,52,201},{33,48,5},{26,45,208},{62,34,723},{26,45,208},{36,57,200},{36,57,200},{36,57,200},{36,47,201},{34,54,166},{33,48,8},{33,48,8},{33,43,20},{26,48,163},{29,43,1},{39,51,4},{39,51,4},{39,51,4},{39,45,4},{53,16,162},{35,47,4},{35,47,4},{30,43,0},{63,30,162},{30,43,0},{63,11,200},{38,53,0},{40,48,2},{33,48,1},{63,11,200},{47,45,200},{33,48,1},
+{0,45,208},{47,45,200},{0,45,208},{36,0,200},{36,0,200},{36,0,200},{36,0,200},{34,47,5},{34,47,5},{34,47,5},{34,41,4},{29,43,1},{29,43,1},{39,63,440},{38,56,216},{39,50,314},{38,49,213},{36,63,728},{35,54,215},{35,50,9},{34,47,308},{26,55,723},{30,47,217},{41,60,206},{40,55,4},{41,50,21},{40,49,29},{63,10,723},{34,54,201},{35,50,5},{28,47,208},{62,37,723},
+{28,47,208},{38,59,200},{38,59,200},{38,59,200},{38,48,201},{36,56,166},{36,49,6},{36,49,6},{35,45,20},{28,50,163},{31,45,1},{41,53,4},{41,53,4},{41,53,4},{41,47,4},{56,16,162},{36,49,2},{36,49,2},{32,45,0},{62,33,162},{32,45,0},{55,32,200},{40,55,0},{42,50,2},{35,50,1},{55,32,200},{46,48,200},{35,50,1},{0,47,208},{46,48,200},{0,47,208},{38,0,200},
+{38,0,200},{38,0,200},{38,0,200},{36,49,5},{36,49,5},{36,49,5},{36,43,4},{31,45,1},{31,45,1},{41,63,482},{40,58,216},{41,52,314},{40,51,213},{39,63,760},{37,56,215},{37,52,9},{37,49,306},{28,57,723},{31,49,215},{43,62,206},{42,57,4},{43,52,21},{42,51,29},{63,16,723},{36,56,201},{37,52,5},{29,49,203},{62,40,723},{29,49,203},{40,61,200},{40,61,200},{40,61,200},
+{40,50,201},{38,58,166},{38,51,6},{38,51,6},{37,47,20},{30,52,163},{33,47,1},{43,55,4},{43,55,4},{43,55,4},{43,49,4},{59,16,162},{38,51,2},{38,51,2},{34,47,0},{62,36,162},{34,47,0},{58,32,200},{42,57,0},{44,52,2},{37,52,1},{58,32,200},{52,48,200},{37,52,1},{0,49,202},{52,48,200},{0,49,202},{40,0,200},{40,0,200},{40,0,200},{40,0,200},{38,51,5},
+{38,51,5},{38,51,5},{38,45,4},{33,47,1},{33,47,1},{43,63,530},{42,60,216},{43,54,314},{42,53,213},{41,63,799},{39,58,215},{39,54,9},{39,51,306},{30,59,723},{33,51,210},{45,63,212},{44,59,4},{45,54,21},{44,53,29},{63,22,723},{38,58,201},{39,54,5},{31,51,203},{62,43,723},{31,51,203},{42,63,200},{42,63,200},{42,63,200},{42,52,201},{40,60,166},{40,53,6},{40,53,6},
+{39,48,17},{32,54,163},{35,49,2},{45,57,4},{45,57,4},{45,57,4},{45,51,4},{62,16,162},{40,53,2},{40,53,2},{35,49,1},{62,39,162},{35,49,1},{61,32,200},{44,59,0},{46,54,2},{39,54,1},{61,32,200},{58,48,200},{39,54,1},{0,51,202},{58,48,200},{0,51,202},{42,0,200},{42,0,200},{42,0,200},{42,0,200},{40,53,5},{40,53,5},{40,53,5},{40,47,4},{35,49,1},
+{35,49,1},{46,63,626},{44,63,222},{45,57,315},{44,56,211},{44,63,869},{41,60,213},{42,56,2},{41,53,298},{32,61,723},{36,53,217},{48,63,244},{47,60,5},{47,56,26},{46,55,34},{61,32,723},{40,60,202},{42,56,1},{34,53,208},{58,48,723},{34,53,208},{44,63,218},{44,63,218},{44,63,218},{44,55,203},{42,63,163},{42,56,2},{42,56,2},{41,51,25},{35,56,163},{37,51,1},{47,60,1},
+{47,60,1},{47,60,1},{47,53,2},{63,21,162},{42,56,1},{42,56,1},{38,51,0},{63,42,162},{38,51,0},{63,35,200},{46,61,2},{49,56,1},{41,56,1},{63,35,200},{63,49,200},{41,56,1},{0,53,208},{63,49,200},{0,53,208},{44,0,202},{44,0,202},{44,0,202},{44,0,202},{42,56,1},{42,56,1},{42,56,1},{42,49,1},{37,51,1},{37,51,1},{48,63,728},{47,63,238},{47,59,315},
+{46,58,211},{46,63,937},{43,62,213},{44,58,2},{43,55,298},{34,63,723},{38,55,217},{50,63,286},{48,63,4},{49,58,20},{48,57,36},{63,34,723},{42,62,202},{44,58,1},{36,55,208},{62,49,723},{36,55,208},{47,63,234},{47,63,234},{47,63,234},{46,57,203},{44,63,181},{44,58,2},{44,58,2},{43,53,25},{37,58,163},{39,53,1},{49,61,4},{49,61,4},{49,61,4},{49,55,4},{63,27,162},
+{44,58,1},{44,58,1},{40,53,0},{63,45,162},{40,53,0},{63,41,200},{48,63,0},{51,58,1},{43,58,1},{63,41,200},{63,52,200},{43,58,1},{0,55,208},{63,52,200},{0,55,208},{46,0,202},{46,0,202},{46,0,202},{46,0,202},{44,58,1},{44,58,1},{44,58,1},{44,51,1},{39,53,1},{39,53,1},{50,63,866},{49,63,317},{49,61,312},{48,60,216},{48,63,1048},{46,63,219},{46,60,2},
+{45,57,298},{38,63,748},{40,57,217},{53,63,350},{51,63,20},{51,60,20},{50,59,36},{63,40,723},{46,63,218},{46,60,1},{38,57,208},{62,52,723},{38,57,208},{48,63,264},{48,63,264},{48,63,264},{48,59,201},{47,63,205},{46,60,2},{46,60,2},{45,55,25},{39,60,163},{41,55,1},{51,63,4},{51,63,4},{51,63,4},{51,57,4},{63,33,162},{46,60,1},{46,60,1},{42,55,0},{47,56,162},
+{42,55,0},{63,47,200},{52,63,13},{53,60,1},{45,60,1},{63,47,200},{63,55,200},{45,60,1},{0,57,208},{63,55,200},{0,57,208},{48,0,200},{48,0,200},{48,0,200},{48,0,200},{46,60,1},{46,60,1},{46,60,1},{46,53,1},{41,55,1},{41,55,1},{53,63,1026},{51,63,440},{51,63,312},{50,62,216},{51,63,1144},{48,63,280},{48,62,5},{47,59,298},{43,63,827},{42,59,217},{55,63,428},
+{54,63,100},{53,62,20},{52,61,36},{63,46,723},{49,63,266},{48,62,1},{40,59,208},{62,55,723},{40,59,208},{51,63,296},{51,63,296},{51,63,296},{50,61,201},{49,63,248},{48,62,5},{48,62,5},{47,57,25},{41,62,163},{43,57,1},{53,63,13},{53,63,13},{53,63,13},{53,59,4},{63,39,162},{48,62,1},{48,62,1},{44,57,0},{47,59,162},{44,57,0},{63,53,200},{55,63,53},{55,62,1},
+{47,62,1},{63,53,200},{63,58,200},{47,62,1},{0,59,208},{63,58,200},{0,59,208},{50,0,200},{50,0,200},{50,0,200},{50,0,200},{48,62,4},{48,62,4},{48,62,4},{48,55,4},{43,57,1},{43,57,1},{54,63,955},{54,63,523},{53,63,362},{52,63,202},{54,63,1027},{51,63,283},{50,63,14},{49,60,198},{47,63,735},{44,61,118},{57,63,338},{56,63,141},{56,63,41},{55,63,17},{63,51,546},
+{53,63,233},{51,63,2},{43,61,113},{63,57,546},{43,61,113},{53,63,362},{53,63,362},{53,63,362},{52,63,202},{51,63,315},{50,63,14},{50,63,14},{49,59,19},{44,63,171},{46,59,3},{56,63,41},{56,63,41},{56,63,41},{55,61,2},{61,49,162},{51,63,2},{51,63,2},{47,59,1},{63,54,162},{47,59,1},{63,57,113},{58,63,50},{57,63,1},{52,63,0},{63,57,113},{63,60,113},{52,63,0},
+{0,61,113},{63,60,113},{0,61,113},{52,0,202},{52,0,202},{52,0,202},{52,0,202},{50,63,5},{50,63,5},{50,63,5},{50,57,2},{45,60,0},{45,60,0},{57,63,779},{56,63,542},{56,63,442},{54,63,227},{56,63,830},{54,63,251},{53,63,78},{51,62,70},{51,63,587},{47,62,25},{59,63,218},{58,63,123},{58,63,74},{57,63,2},{63,55,333},{56,63,145},{55,63,25},{47,62,25},{63,59,333},
+{47,62,25},{56,63,442},{56,63,442},{56,63,442},{54,63,227},{54,63,371},{53,63,78},{53,63,78},{51,61,19},{48,63,219},{47,61,6},{58,63,74},{58,63,74},{58,63,74},{57,63,2},{63,51,162},{55,63,25},{55,63,25},{48,61,2},{63,57,162},{48,61,2},{63,60,25},{61,63,10},{60,63,1},{58,63,0},{63,60,25},{62,62,25},{58,63,0},{0,62,25},{62,62,25},{0,62,25},{54,0,202},
+{54,0,202},{54,0,202},{54,0,202},{52,63,26},{52,63,26},{52,63,26},{52,59,2},{47,62,0},{47,62,0},{59,63,684},{57,63,538},{57,63,474},{57,63,282},{57,63,682},{56,63,285},{55,63,171},{53,63,18},{54,63,482},{49,63,5},{60,63,153},{60,63,105},{60,63,89},{59,63,37},{63,59,193},{58,63,107},{58,63,58},{50,63,1},{63,61,193},{50,63,1},{57,63,474},{57,63,474},{57,63,474},
+{57,63,282},{57,63,426},{55,63,171},{55,63,171},{53,63,18},{52,63,278},{49,63,5},{60,63,89},{60,63,89},{60,63,89},{59,63,37},{63,57,145},{58,63,58},{58,63,58},{50,63,1},{63,60,145},{50,63,1},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{56,0,202},{56,0,202},{56,0,202},{56,0,202},{54,63,50},
+{54,63,50},{54,63,50},{54,61,2},{49,63,5},{49,63,5},{60,63,426},{59,63,375},{59,63,339},{58,63,251},{59,63,415},{57,63,202},{57,63,138},{56,63,2},{55,63,295},{53,63,29},{62,63,43},{62,63,38},{62,63,34},{61,63,10},{63,61,54},{61,63,27},{60,63,17},{56,63,1},{63,62,54},{56,63,1},{59,63,339},{59,63,339},{59,63,339},{58,63,251},{59,63,294},{57,63,138},{57,63,138},
+{56,63,2},{55,63,174},{53,63,29},{62,63,34},{62,63,34},{62,63,34},{61,63,10},{63,60,41},{60,63,17},{60,63,17},{56,63,1},{62,62,41},{56,63,1},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{58,0,202},{58,0,202},{58,0,202},{58,0,202},{57,63,74},{57,63,74},{57,63,74},{56,63,2},{53,63,29},
+{53,63,29},{0,20,421},{0,14,50},{0,10,4},{0,9,157},{0,14,925},{0,9,589},{0,8,264},{0,6,701},{0,6,1005},{0,5,738},{0,20,421},{0,14,50},{0,10,4},{0,9,157},{7,0,925},{0,9,589},{0,8,264},{0,6,701},{14,0,925},{0,6,701},{0,10,0},{0,10,0},{0,10,0},{0,5,0},{0,5,85},{0,4,34},{0,4,34},{0,2,50},{0,2,93},{0,2,54},{0,10,0},
+{0,10,0},{0,10,0},{0,5,0},{2,1,85},{0,4,34},{0,4,34},{0,2,50},{5,0,85},{0,2,50},{10,1,421},{0,14,50},{0,10,4},{0,9,157},{10,1,421},{20,0,421},{0,9,157},{0,7,421},{20,0,421},{0,7,421},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,26,421},{0,18,13},{0,13,20},
+{0,11,100},{0,18,1261},{0,11,701},{0,10,294},{0,7,857},{0,8,1382},{0,7,938},{0,26,421},{0,18,13},{0,13,20},{0,11,100},{9,0,1261},{0,11,701},{0,10,294},{0,7,857},{18,0,1261},{0,7,857},{0,16,0},{0,16,0},{0,16,0},{0,8,0},{0,8,221},{0,6,89},{0,6,89},{0,4,125},{0,4,246},{0,4,150},{0,16,0},{0,16,0},{0,16,0},{0,8,0},{4,0,221},
+{0,6,89},{0,6,89},{0,4,125},{8,0,221},{0,4,125},{13,1,421},{0,18,13},{2,12,4},{0,11,100},{13,1,421},{26,0,421},{0,11,100},{0,9,421},{26,0,421},{0,9,421},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,32,430},{0,22,10},{1,15,70},{0,13,70},{0,22,1517},{0,14,730},{0,13,257},
+{0,9,974},{0,11,1713},{0,8,1109},{1,30,422},{1,21,5},{2,15,45},{0,13,70},{11,1,1514},{0,14,730},{0,13,257},{0,9,974},{15,4,1514},{0,9,974},{0,22,9},{0,22,9},{0,22,9},{0,11,9},{0,12,338},{0,9,106},{0,9,106},{0,5,181},{0,5,392},{0,5,217},{1,20,1},{1,20,1},{1,20,1},{1,10,2},{6,0,338},{0,9,106},{0,9,106},{0,5,181},{12,0,338},
+{0,5,181},{16,0,421},{0,22,1},{4,14,4},{0,13,61},{16,0,421},{32,0,421},{0,13,61},{0,11,421},{32,0,421},{0,11,421},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,2,1},{0,2,1},{0,2,1},{0,1,1},{0,1,2},{0,1,2},{1,36,486},{1,24,69},{2,17,163},{1,16,115},{0,28,1517},{0,18,614},{0,15,126},{0,11,857},{0,14,1841},{0,11,1053},{3,32,422},
+{3,23,5},{3,17,46},{2,15,70},{14,1,1514},{0,18,614},{0,15,126},{0,11,857},{15,7,1514},{0,11,857},{1,26,65},{1,26,65},{1,26,65},{1,13,66},{0,18,338},{0,13,45},{0,13,45},{0,8,125},{0,8,456},{0,7,211},{3,22,1},{3,22,1},{3,22,1},{3,12,2},{9,0,338},{0,13,45},{0,13,45},{0,8,125},{18,0,338},{0,8,125},{19,0,421},{2,24,1},{6,16,1},
+{0,16,37},{19,0,421},{38,0,421},{0,16,37},{0,13,421},{38,0,421},{0,13,421},{1,0,65},{1,0,65},{1,0,65},{1,0,65},{0,8,1},{0,8,1},{0,8,1},{0,4,1},{0,3,25},{0,3,25},{3,39,629},{3,27,213},{3,20,365},{2,18,237},{0,35,1514},{0,22,506},{0,18,24},{0,14,750},{0,16,2003},{0,13,1050},{5,35,421},{5,25,2},{6,19,45},{5,17,72},{17,1,1514},
+{0,22,506},{0,18,24},{0,14,750},{35,0,1514},{0,14,750},{3,28,209},{3,28,209},{3,28,209},{2,16,208},{0,25,338},{0,17,9},{0,17,9},{0,10,72},{0,11,566},{0,10,241},{5,24,1},{5,24,1},{5,24,1},{5,15,0},{12,1,338},{0,17,9},{0,17,9},{0,10,72},{25,0,338},{0,10,72},{22,1,421},{4,26,1},{8,18,5},{0,18,8},{22,1,421},{45,0,421},{0,18,8},
+{0,15,421},{45,0,421},{0,15,421},{2,0,208},{2,0,208},{2,0,208},{2,0,208},{0,15,0},{0,15,0},{0,15,0},{0,8,1},{0,6,80},{0,6,80},{4,43,821},{4,30,418},{5,22,621},{3,20,420},{0,41,1514},{0,25,450},{0,20,5},{0,16,670},{0,19,2187},{0,15,1109},{7,37,421},{7,27,2},{8,21,45},{7,19,72},{20,1,1514},{0,25,450},{0,20,5},{0,16,670},{41,0,1514},
+{0,16,670},{4,32,400},{4,32,400},{4,32,400},{4,18,400},{0,31,338},{0,20,1},{0,20,1},{0,13,40},{0,14,694},{0,11,297},{7,26,1},{7,26,1},{7,26,1},{7,17,1},{15,1,338},{0,20,1},{0,20,1},{0,13,40},{31,0,338},{0,13,40},{25,1,421},{6,28,1},{10,20,5},{0,20,4},{25,1,421},{51,0,421},{0,20,4},{0,17,421},{51,0,421},{0,17,421},{3,0,400},
+{3,0,400},{3,0,400},{3,0,400},{0,21,0},{0,21,0},{0,21,0},{0,11,1},{0,8,149},{0,8,149},{5,47,846},{6,32,451},{6,23,662},{5,22,445},{2,43,1515},{1,28,446},{2,22,6},{0,18,638},{0,22,2046},{0,18,878},{9,39,421},{9,29,2},{10,23,45},{9,21,72},{23,1,1514},{0,28,426},{2,22,5},{0,18,589},{47,0,1514},{0,18,589},{5,36,425},{5,36,425},{5,36,425},
+{5,21,426},{2,32,341},{2,22,2},{2,22,2},{1,15,38},{0,16,606},{0,14,158},{9,28,1},{9,28,1},{9,28,1},{9,19,1},{15,7,338},{2,22,1},{2,22,1},{0,15,20},{31,3,338},{0,15,20},{28,1,421},{8,30,1},{12,22,5},{2,22,4},{28,1,421},{57,0,421},{2,22,4},{0,19,421},{57,0,421},{0,19,421},{5,0,425},{5,0,425},{5,0,425},{5,0,425},{2,23,1},
+{2,23,1},{2,23,1},{2,13,2},{0,12,97},{0,12,97},{7,49,846},{8,33,450},{8,25,662},{7,24,445},{4,45,1515},{3,30,446},{4,24,6},{2,20,638},{0,25,1886},{0,19,682},{11,41,421},{11,31,2},{12,25,45},{11,23,72},{26,1,1514},{1,31,421},{4,24,5},{0,20,545},{53,0,1514},{0,20,545},{7,38,425},{7,38,425},{7,38,425},{7,23,426},{4,34,341},{4,24,2},{4,24,2},
+{4,16,42},{0,20,500},{0,16,62},{11,30,1},{11,30,1},{11,30,1},{11,21,1},{15,13,338},{4,24,1},{4,24,1},{0,17,5},{31,6,338},{0,17,5},{31,1,421},{10,32,1},{14,24,5},{4,24,4},{31,1,421},{63,0,421},{4,24,4},{0,21,421},{63,0,421},{0,21,421},{7,0,425},{7,0,425},{7,0,425},{7,0,425},{4,25,1},{4,25,1},{4,25,1},{4,15,2},{0,16,37},
+{0,16,37},{10,49,846},{10,36,445},{11,28,650},{10,26,450},{6,47,1517},{5,32,446},{6,27,3},{4,22,646},{0,29,1751},{0,22,522},{13,44,422},{13,34,3},{14,27,46},{13,26,74},{30,0,1514},{3,33,422},{6,27,3},{0,22,497},{46,7,1514},{0,22,497},{10,39,425},{10,39,425},{10,39,425},{10,24,426},{6,37,338},{6,27,2},{6,27,2},{6,19,40},{0,24,410},{0,19,9},{13,33,1},
+{13,33,1},{13,33,1},{13,23,1},{24,1,338},{7,26,0},{7,26,0},{1,19,5},{49,0,338},{1,19,5},{35,0,421},{12,34,1},{16,27,1},{5,27,1},{35,0,421},{63,3,421},{5,27,1},{0,23,421},{63,3,421},{0,23,421},{10,0,425},{10,0,425},{10,0,425},{10,0,425},{6,28,0},{6,28,0},{6,28,0},{6,17,1},{0,19,5},{0,19,5},{12,51,846},{12,38,445},{13,30,650},
+{12,28,450},{8,49,1517},{7,34,446},{8,29,3},{6,24,646},{0,32,1647},{0,25,458},{15,46,422},{15,36,3},{16,29,42},{15,28,74},{32,1,1514},{5,35,422},{8,29,3},{0,25,457},{46,10,1514},{0,25,457},{12,41,425},{12,41,425},{12,41,425},{12,26,426},{8,39,338},{8,29,2},{8,29,2},{8,21,40},{0,27,362},{1,22,5},{15,35,1},{15,35,1},{15,35,1},{15,25,1},{27,1,338},
+{9,28,0},{9,28,0},{3,21,5},{55,0,338},{3,21,5},{38,0,421},{14,36,1},{18,29,1},{7,29,1},{38,0,421},{63,6,421},{7,29,1},{0,25,421},{63,6,421},{0,25,421},{12,0,425},{12,0,425},{12,0,425},{12,0,425},{8,30,0},{8,30,0},{8,30,0},{8,19,1},{1,22,1},{1,22,1},{14,53,846},{14,40,445},{15,32,661},{14,30,450},{10,51,1517},{9,36,446},{10,31,3},
+{8,26,646},{0,35,1575},{0,27,446},{17,47,421},{17,37,2},{18,31,42},{17,30,73},{35,1,1514},{7,37,422},{10,31,3},{0,27,430},{46,13,1514},{0,27,430},{14,43,425},{14,43,425},{14,43,425},{14,28,426},{10,41,338},{10,31,2},{10,31,2},{10,23,40},{0,31,341},{3,24,5},{17,36,1},{17,36,1},{17,36,1},{17,27,0},{30,1,338},{11,30,0},{11,30,0},{5,23,5},{61,0,338},
+{5,23,5},{41,0,421},{16,38,1},{20,31,1},{9,31,1},{41,0,421},{63,9,421},{9,31,1},{0,27,421},{63,9,421},{0,27,421},{14,0,425},{14,0,425},{14,0,425},{14,0,425},{10,31,1},{10,31,1},{10,31,1},{10,21,1},{3,24,1},{3,24,1},{16,55,854},{16,42,451},{16,33,662},{15,32,446},{12,53,1517},{11,38,446},{12,32,9},{10,28,646},{0,39,1533},{2,29,446},{19,49,421},
+{19,39,2},{20,33,45},{18,31,81},{38,1,1514},{9,39,422},{13,32,6},{0,29,422},{45,16,1514},{0,29,422},{15,47,433},{15,47,433},{15,47,433},{15,31,433},{12,43,338},{12,32,5},{12,32,5},{12,25,40},{1,33,339},{5,26,5},{19,38,1},{19,38,1},{19,38,1},{19,29,0},{33,1,338},{13,32,2},{13,32,2},{7,25,5},{63,2,338},{7,25,5},{44,0,421},{18,40,1},{22,32,5},
+{12,32,5},{44,0,421},{63,12,421},{12,32,5},{0,29,421},{63,12,421},{0,29,421},{15,0,433},{15,0,433},{15,0,433},{15,0,433},{12,33,1},{12,33,1},{12,33,1},{12,23,1},{5,26,1},{5,26,1},{18,58,846},{18,44,445},{19,36,650},{18,34,450},{14,56,1515},{14,40,447},{14,35,7},{13,31,646},{0,42,1515},{5,31,445},{21,52,422},{21,42,2},{22,35,46},{21,34,74},{42,0,1514},
+{12,41,422},{14,35,6},{2,31,425},{62,11,1514},{2,31,425},{18,47,425},{18,47,425},{18,47,425},{18,32,426},{14,46,339},{14,35,6},{14,35,6},{14,27,35},{4,35,341},{8,28,5},{21,41,1},{21,41,1},{21,41,1},{21,31,2},{31,12,338},{15,34,0},{15,34,0},{8,28,1},{62,6,338},{8,28,1},{47,0,421},{21,42,1},{24,35,1},{13,35,1},{47,0,421},{62,16,421},{13,35,1},
+{0,31,425},{62,16,421},{0,31,425},{18,0,425},{18,0,425},{18,0,425},{18,0,425},{14,36,1},{14,36,1},{14,36,1},{14,25,1},{7,29,1},{7,29,1},{20,60,846},{20,46,445},{21,38,650},{20,36,450},{16,58,1514},{16,41,449},{16,37,3},{15,32,630},{2,44,1515},{7,33,441},{23,54,422},{23,44,2},{24,37,46},{23,36,74},{45,0,1514},{14,43,422},{16,37,3},{4,33,421},{62,14,1514},
+{4,33,421},{20,49,425},{20,49,425},{20,49,425},{20,34,426},{16,47,340},{16,37,2},{16,37,2},{16,29,34},{6,37,341},{10,30,5},{23,43,1},{23,43,1},{23,43,1},{23,33,1},{31,18,338},{17,36,0},{17,36,0},{10,30,1},{62,9,338},{10,30,1},{50,0,421},{23,44,1},{26,37,1},{15,37,1},{50,0,421},{62,19,421},{15,37,1},{0,33,421},{62,19,421},{0,33,421},{20,0,425},
+{20,0,425},{20,0,425},{20,0,425},{16,38,0},{16,38,0},{16,38,0},{16,27,0},{9,31,1},{9,31,1},{22,62,846},{22,48,445},{23,40,650},{22,38,450},{18,60,1514},{18,43,449},{18,39,3},{16,34,646},{4,46,1515},{9,35,441},{25,56,422},{25,46,2},{26,39,46},{25,38,74},{47,2,1514},{16,45,425},{18,39,3},{6,35,421},{62,17,1514},{6,35,421},{22,51,425},{22,51,425},{22,51,425},
+{22,36,426},{18,49,338},{18,39,2},{18,39,2},{18,31,34},{8,39,341},{11,32,6},{25,45,1},{25,45,1},{25,45,1},{25,35,1},{34,17,338},{19,38,0},{19,38,0},{12,32,4},{62,12,338},{12,32,4},{53,0,421},{25,46,1},{28,39,1},{17,39,1},{53,0,421},{62,22,421},{17,39,1},{0,35,421},{62,22,421},{0,35,421},{22,0,425},{22,0,425},{22,0,425},{22,0,425},{18,40,0},
+{18,40,0},{18,40,0},{18,29,0},{11,32,2},{11,32,2},{24,63,850},{24,50,445},{25,42,650},{24,40,450},{20,62,1514},{20,45,449},{20,41,3},{18,36,646},{6,48,1517},{11,37,441},{27,58,422},{27,48,3},{28,41,46},{27,40,74},{47,8,1514},{18,47,425},{20,41,3},{8,37,421},{62,20,1514},{8,37,421},{24,53,425},{24,53,425},{24,53,425},{24,38,426},{20,51,338},{20,41,2},{20,41,2},
+{20,33,40},{10,41,341},{13,34,6},{27,47,1},{27,47,1},{27,47,1},{27,37,1},{37,17,338},{21,40,0},{21,40,0},{14,34,4},{62,15,338},{14,34,4},{56,0,421},{26,48,1},{30,41,1},{19,41,1},{56,0,421},{62,25,421},{19,41,1},{0,37,421},{62,25,421},{0,37,421},{24,0,425},{24,0,425},{24,0,425},{24,0,425},{20,42,0},{20,42,0},{20,42,0},{20,31,0},{13,34,2},
+{13,34,2},{26,63,882},{26,52,447},{27,44,646},{26,42,446},{23,62,1526},{22,48,447},{22,43,5},{21,39,646},{8,50,1515},{13,39,443},{30,58,425},{29,50,4},{30,44,45},{29,42,69},{54,0,1514},{20,49,422},{22,43,4},{10,39,426},{47,31,1514},{10,39,426},{26,56,421},{26,56,421},{26,56,421},{26,41,421},{22,54,339},{22,43,5},{22,43,5},{22,35,35},{12,43,338},{16,36,5},{30,48,4},
+{30,48,4},{30,48,4},{30,39,4},{49,0,338},{23,42,2},{23,42,2},{16,36,1},{62,18,338},{16,36,1},{59,0,421},{29,50,0},{32,43,1},{22,43,0},{59,0,421},{62,28,421},{22,43,0},{0,39,425},{62,28,421},{0,39,425},{26,0,421},{26,0,421},{26,0,421},{26,0,421},{22,44,2},{22,44,2},{22,44,2},{22,33,1},{15,37,1},{15,37,1},{29,63,922},{28,54,447},{29,46,646},
+{28,44,446},{25,63,1535},{24,50,447},{24,45,5},{23,41,646},{10,52,1515},{15,41,443},{31,62,425},{31,52,4},{32,45,45},{31,44,69},{57,0,1514},{22,51,422},{24,45,4},{12,41,426},{50,32,1514},{12,41,426},{28,58,421},{28,58,421},{28,58,421},{28,43,421},{24,56,339},{24,45,5},{24,45,5},{24,37,35},{14,45,338},{18,38,5},{31,52,4},{31,52,4},{31,52,4},{31,41,5},{52,0,338},
+{25,44,2},{25,44,2},{18,38,1},{62,21,338},{18,38,1},{62,0,421},{31,52,0},{34,45,1},{24,45,0},{62,0,421},{62,31,421},{24,45,0},{0,41,425},{62,31,421},{0,41,425},{28,0,421},{28,0,421},{28,0,421},{28,0,421},{24,46,2},{24,46,2},{24,46,2},{24,35,1},{17,39,1},{17,39,1},{31,63,994},{30,56,447},{31,48,655},{30,46,446},{28,63,1575},{26,52,447},{26,47,5},
+{25,43,646},{12,54,1515},{17,43,445},{33,63,426},{33,54,2},{34,47,45},{33,46,66},{60,0,1514},{24,53,422},{26,47,4},{14,43,426},{56,32,1514},{14,43,426},{30,60,421},{30,60,421},{30,60,421},{30,45,421},{26,58,339},{26,47,5},{26,47,5},{26,39,35},{16,47,338},{20,40,5},{33,53,1},{33,53,1},{33,53,1},{33,43,2},{55,0,338},{27,46,2},{27,46,2},{20,40,1},{62,24,338},
+{20,40,1},{63,4,421},{33,54,1},{36,47,1},{26,47,0},{63,4,421},{62,34,421},{26,47,0},{0,43,425},{62,34,421},{0,43,425},{30,0,421},{30,0,421},{30,0,421},{30,0,421},{26,48,1},{26,48,1},{26,48,1},{26,37,1},{19,41,1},{19,41,1},{33,63,1082},{32,58,445},{33,50,650},{32,48,450},{30,63,1638},{28,54,447},{28,49,7},{27,45,646},{14,56,1515},{19,45,445},{36,63,434},
+{35,56,2},{36,49,46},{35,48,74},{63,0,1514},{26,55,422},{28,49,6},{16,45,425},{62,32,1514},{16,45,425},{32,61,425},{32,61,425},{32,61,425},{32,47,426},{28,60,339},{28,49,6},{28,49,6},{28,41,35},{18,49,341},{22,42,5},{35,55,1},{35,55,1},{35,55,1},{35,45,2},{58,0,338},{29,48,0},{29,48,0},{22,42,1},{62,27,338},{22,42,1},{63,10,421},{35,56,1},{38,49,1},
+{27,49,1},{63,10,421},{62,37,421},{27,49,1},{0,45,425},{62,37,421},{0,45,425},{32,0,425},{32,0,425},{32,0,425},{32,0,425},{28,50,1},{28,50,1},{28,50,1},{28,39,1},{21,43,1},{21,43,1},{36,63,1206},{34,61,446},{35,52,646},{34,50,446},{33,63,1710},{30,56,445},{31,51,5},{29,47,638},{17,58,1515},{22,47,450},{39,63,469},{37,58,5},{38,52,45},{37,50,69},{63,7,1514},
+{27,58,422},{31,51,1},{19,47,433},{63,35,1514},{19,47,433},{34,63,422},{34,63,422},{34,63,422},{34,49,421},{31,61,342},{31,51,5},{31,51,5},{30,43,36},{20,51,338},{24,44,2},{38,56,4},{38,56,4},{38,56,4},{38,47,4},{53,16,338},{31,51,1},{31,51,1},{25,44,0},{63,30,338},{25,44,0},{63,17,421},{37,58,1},{40,51,1},{30,51,0},{63,17,421},{63,40,421},{30,51,0},
+{0,47,433},{63,40,421},{0,47,433},{34,0,421},{34,0,421},{34,0,421},{34,0,421},{31,51,4},{31,51,4},{31,51,4},{31,41,4},{23,45,1},{23,45,1},{38,63,1350},{36,63,446},{37,54,646},{36,52,446},{36,63,1814},{32,58,447},{32,53,5},{31,48,638},{19,60,1515},{23,49,443},{41,63,517},{39,60,5},{40,54,45},{39,52,69},{63,13,1514},{29,60,422},{32,53,4},{20,49,426},{63,38,1514},
+{20,49,426},{36,63,437},{36,63,437},{36,63,437},{36,51,421},{33,62,347},{32,53,5},{32,53,5},{32,45,33},{22,53,338},{26,46,2},{40,58,4},{40,58,4},{40,58,4},{40,49,4},{56,16,338},{33,52,2},{33,52,2},{27,46,0},{62,33,338},{27,46,0},{63,23,421},{39,60,1},{42,53,1},{32,53,0},{63,23,421},{63,43,421},{32,53,0},{0,49,425},{63,43,421},{0,49,425},{36,0,421},
+{36,0,421},{36,0,421},{36,0,421},{32,54,2},{32,54,2},{32,54,2},{32,43,2},{25,47,1},{25,47,1},{40,63,1466},{38,63,474},{39,56,646},{38,54,446},{38,63,1931},{34,60,447},{34,55,5},{33,51,646},{21,62,1515},{25,51,443},{43,63,569},{41,62,5},{42,56,45},{41,54,69},{63,19,1514},{32,61,422},{34,55,4},{22,51,426},{63,41,1514},{22,51,426},{38,63,470},{38,63,470},{38,63,470},
+{38,53,421},{35,63,355},{34,55,5},{34,55,5},{34,47,33},{24,55,338},{28,48,3},{42,60,4},{42,60,4},{42,60,4},{42,51,4},{59,16,338},{35,54,2},{35,54,2},{28,48,2},{62,36,338},{28,48,2},{63,29,421},{41,62,1},{44,55,1},{34,55,0},{63,29,421},{63,46,421},{34,55,0},{0,51,425},{63,46,421},{0,51,425},{38,0,421},{38,0,421},{38,0,421},{38,0,421},{34,56,2},
+{34,56,2},{34,56,2},{34,45,2},{27,49,1},{27,49,1},{43,63,1634},{41,63,546},{41,58,646},{40,56,446},{40,63,2039},{36,62,447},{36,57,5},{35,53,646},{24,63,1521},{27,53,443},{46,63,633},{44,63,9},{44,58,45},{43,56,69},{63,25,1514},{34,63,422},{36,57,4},{24,53,426},{63,44,1514},{24,53,426},{40,63,502},{40,63,502},{40,63,502},{40,55,421},{37,63,379},{36,57,5},{36,57,5},
+{36,49,35},{26,57,338},{30,50,3},{44,62,4},{44,62,4},{44,62,4},{44,53,4},{62,16,338},{37,56,2},{37,56,2},{30,50,2},{62,39,338},{30,50,2},{63,34,421},{44,63,5},{46,57,1},{36,57,0},{63,34,421},{62,49,421},{36,57,0},{0,53,425},{62,49,421},{0,53,425},{40,0,421},{40,0,421},{40,0,421},{40,0,421},{36,58,2},{36,58,2},{36,58,2},{36,47,2},{29,51,1},
+{29,51,1},{45,63,1866},{43,63,689},{43,60,650},{42,59,447},{43,63,2201},{38,63,469},{39,59,6},{37,55,638},{29,63,1590},{30,55,450},{48,63,741},{46,63,69},{46,60,46},{45,58,70},{62,33,1514},{38,63,465},{39,59,2},{25,56,425},{61,48,1514},{25,56,425},{43,63,545},{43,63,545},{43,63,545},{42,57,422},{40,63,424},{39,59,5},{39,59,5},{38,51,36},{29,59,339},{32,52,2},{46,63,5},
+{46,63,5},{46,63,5},{46,55,2},{63,21,338},{39,59,1},{39,59,1},{33,52,0},{63,42,338},{33,52,0},{63,41,421},{48,63,41},{49,59,2},{38,59,2},{63,41,421},{63,52,421},{38,59,2},{0,56,425},{63,52,421},{0,56,425},{42,0,421},{42,0,421},{42,0,421},{42,0,421},{39,59,4},{39,59,4},{39,59,4},{39,49,4},{31,53,1},{31,53,1},{48,63,2070},{46,63,889},{45,62,650},
+{44,61,447},{46,63,2361},{41,63,573},{41,61,6},{39,57,638},{32,63,1710},{32,57,450},{51,63,837},{49,63,184},{48,62,49},{47,60,70},{63,37,1514},{43,63,545},{41,61,2},{27,58,425},{63,50,1514},{27,58,425},{45,63,614},{45,63,614},{45,63,614},{44,59,422},{43,63,488},{41,61,5},{41,61,5},{40,53,36},{31,61,339},{34,54,2},{48,63,20},{48,63,20},{48,63,20},{48,57,4},{63,27,338},
+{41,61,1},{41,61,1},{35,54,0},{63,45,338},{35,54,0},{63,47,421},{51,63,97},{51,61,2},{40,61,2},{63,47,421},{63,55,421},{40,61,2},{0,58,425},{63,55,421},{0,58,425},{44,0,421},{44,0,421},{44,0,421},{44,0,421},{41,61,4},{41,61,4},{41,61,4},{41,51,4},{33,55,1},{33,55,1},{50,63,2239},{48,63,1109},{47,63,701},{46,62,441},{48,63,2469},{44,63,720},{43,63,5},
+{41,59,605},{37,63,1804},{34,59,417},{54,63,916},{51,63,308},{51,63,52},{49,62,56},{63,43,1459},{46,63,618},{43,63,1},{31,59,400},{63,53,1459},{31,59,400},{47,63,701},{47,63,701},{47,63,701},{46,61,422},{45,63,566},{43,63,5},{43,63,5},{42,55,36},{32,63,341},{36,56,2},{51,63,52},{51,63,52},{51,63,52},{50,59,4},{63,33,338},{43,63,1},{43,63,1},{37,56,0},{47,56,338},
+{37,56,0},{63,53,392},{55,63,157},{53,63,1},{42,63,1},{63,53,392},{63,58,392},{42,63,1},{0,59,400},{63,58,392},{0,59,400},{46,0,421},{46,0,421},{46,0,421},{46,0,421},{43,63,4},{43,63,4},{43,63,4},{43,53,4},{35,57,1},{35,57,1},{51,63,1901},{50,63,1114},{49,63,785},{48,63,421},{51,63,2093},{46,63,574},{45,63,38},{44,60,356},{40,63,1476},{36,60,213},{55,63,684},
+{54,63,260},{53,63,85},{51,63,20},{63,47,1064},{49,63,426},{47,63,8},{35,60,208},{63,55,1064},{35,60,208},{49,63,785},{49,63,785},{49,63,785},{48,63,421},{48,63,661},{45,63,38},{45,63,38},{44,57,36},{37,63,371},{38,58,2},{53,63,85},{53,63,85},{53,63,85},{52,61,4},{63,39,338},{47,63,8},{47,63,8},{39,58,0},{47,59,338},{39,58,0},{63,56,200},{57,63,80},{56,63,1},
+{48,63,0},{63,56,200},{62,60,200},{48,63,0},{0,60,208},{62,60,200},{0,60,208},{48,0,421},{48,0,421},{48,0,421},{48,0,421},{45,63,13},{45,63,13},{45,63,13},{45,55,4},{37,59,1},{37,59,1},{54,63,1646},{52,63,1119},{51,63,886},{50,63,470},{53,63,1761},{48,63,526},{48,63,126},{46,61,146},{44,63,1218},{39,62,69},{57,63,450},{56,63,245},{56,63,145},{54,63,2},{63,51,722},
+{52,63,290},{51,63,50},{38,62,65},{63,57,722},{38,62,65},{51,63,886},{51,63,886},{51,63,886},{50,63,470},{50,63,776},{48,63,126},{48,63,126},{47,59,42},{41,63,446},{40,60,5},{56,63,145},{56,63,145},{56,63,145},{54,63,2},{61,49,338},{51,63,50},{51,63,50},{42,60,1},{63,54,338},{42,60,1},{63,59,61},{60,63,25},{59,63,0},{55,63,0},{63,59,61},{63,61,61},{55,63,0},
+{0,62,65},{63,61,61},{0,62,65},{50,0,421},{50,0,421},{50,0,421},{50,0,421},{48,63,45},{48,63,45},{48,63,45},{47,57,2},{40,61,1},{40,61,1},{56,63,1518},{54,63,1118},{54,63,974},{52,63,565},{54,63,1526},{51,63,534},{51,63,278},{48,62,57},{47,63,1090},{42,63,10},{59,63,354},{58,63,251},{57,63,194},{56,63,50},{63,55,509},{55,63,234},{54,63,106},{42,63,9},{63,59,509},
+{42,63,9},{54,63,974},{54,63,974},{54,63,974},{52,63,565},{53,63,904},{51,63,278},{51,63,278},{48,61,38},{44,63,574},{42,62,5},{57,63,194},{57,63,194},{57,63,194},{56,63,50},{63,51,338},{54,63,106},{54,63,106},{44,62,1},{63,57,338},{44,62,1},{63,62,5},{62,63,4},{62,63,0},{61,63,0},{63,62,5},{62,63,5},{61,63,0},{0,63,9},{62,63,5},{0,63,9},{52,0,421},
+{52,0,421},{52,0,421},{52,0,421},{50,63,72},{50,63,72},{50,63,72},{49,59,5},{42,63,1},{42,63,1},{57,63,1197},{56,63,994},{56,63,894},{54,63,565},{56,63,1210},{54,63,493},{53,63,332},{50,63,13},{51,63,861},{45,63,17},{60,63,209},{60,63,161},{60,63,145},{58,63,50},{63,58,294},{58,63,147},{57,63,89},{47,63,1},{62,61,294},{47,63,1},{56,63,894},{56,63,894},{56,63,894},
+{54,63,565},{54,63,781},{53,63,332},{53,63,332},{50,63,13},{48,63,501},{45,63,17},{60,63,145},{60,63,145},{60,63,145},{58,63,50},{63,55,221},{57,63,89},{57,63,89},{47,63,1},{63,59,221},{47,63,1},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{54,0,421},{54,0,421},{54,0,421},{54,0,421},{52,63,117},
+{52,63,117},{52,63,117},{51,61,5},{45,63,17},{45,63,17},{58,63,925},{57,63,765},{57,63,701},{57,63,509},{57,63,845},{55,63,423},{54,63,301},{53,63,5},{52,63,598},{49,63,52},{61,63,97},{61,63,70},{61,63,61},{60,63,17},{63,60,113},{60,63,57},{59,63,40},{53,63,1},{62,62,113},{53,63,1},{57,63,701},{57,63,701},{57,63,701},{57,63,509},{57,63,589},{54,63,301},{54,63,301},
+{53,63,5},{51,63,365},{49,63,52},{61,63,61},{61,63,61},{61,63,61},{60,63,17},{63,58,85},{59,63,40},{59,63,40},{53,63,1},{62,61,85},{53,63,1},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{56,0,421},{56,0,421},{56,0,421},{56,0,421},{54,63,157},{54,63,157},{54,63,157},{53,63,5},{49,63,52},
+{49,63,52},{0,29,882},{0,21,116},{0,15,4},{0,13,320},{0,19,1899},{0,14,1214},{0,11,573},{0,8,1421},{0,9,2052},{0,8,1521},{0,29,882},{0,21,116},{0,15,4},{0,13,320},{10,0,1896},{0,14,1214},{0,11,573},{0,8,1421},{14,3,1896},{0,8,1421},{0,14,0},{0,14,0},{0,14,0},{0,7,0},{0,7,162},{0,5,61},{0,5,61},{0,3,100},{0,3,180},{0,3,116},{0,14,0},
+{0,14,0},{0,14,0},{0,7,0},{3,1,162},{0,5,61},{0,5,61},{0,3,100},{7,0,162},{0,3,100},{6,17,882},{0,21,116},{0,15,4},{0,13,320},{6,17,882},{29,0,882},{0,13,320},{0,10,884},{29,0,882},{0,10,884},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,35,882},{0,25,50},{0,17,10},
+{0,15,260},{0,23,2355},{0,16,1355},{0,14,589},{0,10,1656},{0,11,2567},{0,10,1825},{0,35,882},{0,25,50},{0,17,10},{0,15,260},{3,17,2355},{0,16,1355},{0,14,589},{0,10,1656},{23,0,2355},{0,10,1656},{0,19,1},{0,19,1},{0,19,1},{0,10,0},{0,10,338},{0,8,125},{0,8,125},{0,4,200},{0,5,374},{0,4,225},{0,19,1},{0,19,1},{0,19,1},{0,10,0},{5,0,338},
+{0,8,125},{0,8,125},{0,4,200},{10,0,338},{0,4,200},{17,1,882},{0,25,50},{1,17,2},{0,15,260},{17,1,882},{35,0,882},{0,15,260},{0,12,884},{35,0,882},{0,12,884},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,41,882},{0,28,10},{1,19,69},{0,17,193},{0,28,2899},{0,19,1539},{0,16,643},
+{0,11,1965},{0,13,3209},{0,11,2161},{0,41,882},{0,28,10},{1,19,53},{0,17,193},{14,0,2899},{0,19,1539},{0,16,643},{0,11,1965},{28,0,2899},{0,11,1965},{0,25,1},{0,25,1},{0,25,1},{0,13,0},{0,13,578},{0,11,221},{0,11,221},{0,6,356},{0,6,644},{0,5,401},{0,25,1},{0,25,1},{0,25,1},{0,13,0},{6,1,578},{0,11,221},{0,11,221},{0,6,356},{13,0,578},
+{0,6,356},{20,1,882},{0,28,10},{3,19,2},{0,17,193},{20,1,882},{41,0,882},{0,17,193},{0,14,884},{41,0,882},{0,14,884},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{1,45,902},{1,31,24},{1,21,142},{0,19,166},{0,33,3048},{0,22,1443},{0,18,481},{0,13,1908},{0,16,3492},{0,13,2197},{2,43,886},
+{1,31,8},{3,21,73},{1,19,153},{16,1,3048},{0,22,1443},{0,18,481},{0,13,1908},{33,0,3048},{0,13,1908},{1,29,21},{1,29,21},{1,29,21},{1,15,21},{0,18,648},{0,14,169},{0,14,169},{0,8,325},{0,8,766},{0,7,421},{2,27,5},{2,27,5},{2,27,5},{2,15,4},{9,0,648},{0,14,169},{0,14,169},{0,8,325},{18,0,648},{0,8,325},{23,1,882},{0,31,2},{5,21,2},
+{0,19,130},{23,1,882},{47,0,882},{0,19,130},{0,16,890},{47,0,882},{0,16,890},{1,0,20},{1,0,20},{1,0,20},{1,0,20},{0,5,0},{0,5,0},{0,5,0},{0,2,1},{0,2,5},{0,2,5},{2,49,997},{2,34,114},{3,24,290},{1,22,234},{0,39,3051},{0,25,1256},{0,21,258},{0,16,1764},{0,19,3685},{0,15,2195},{4,46,883},{3,33,10},{5,23,74},{3,21,154},{15,10,3048},
+{0,25,1256},{0,21,258},{0,16,1764},{30,5,3048},{0,16,1764},{2,34,113},{2,34,113},{2,34,113},{2,18,113},{0,25,648},{0,17,89},{0,17,89},{0,10,242},{0,11,876},{0,10,411},{4,30,1},{4,30,1},{4,30,1},{4,17,1},{12,1,648},{0,17,89},{0,17,89},{0,10,242},{25,0,648},{0,10,242},{27,0,882},{3,33,1},{8,23,1},{0,22,80},{27,0,882},{46,4,882},{0,22,80},
+{0,18,884},{46,4,882},{0,18,884},{2,0,113},{2,0,113},{2,0,113},{2,0,113},{0,11,1},{0,11,1},{0,11,1},{0,6,1},{0,5,40},{0,5,40},{3,53,1149},{3,36,274},{4,27,513},{3,24,362},{0,45,3051},{0,28,1128},{0,24,122},{0,18,1605},{0,22,3901},{0,16,2173},{6,47,885},{5,35,10},{7,25,74},{5,23,154},{15,16,3048},{0,28,1128},{0,24,122},{0,18,1605},{30,8,3048},
+{0,18,1605},{3,38,265},{3,38,265},{3,38,265},{3,20,266},{0,31,648},{0,21,34},{0,21,34},{0,13,170},{0,14,1004},{0,11,457},{6,32,1},{6,32,1},{6,32,1},{6,19,1},{15,1,648},{0,21,34},{0,21,34},{0,13,170},{31,0,648},{0,13,170},{30,0,882},{5,35,1},{10,25,1},{0,24,41},{30,0,882},{46,7,882},{0,24,41},{0,20,884},{46,7,882},{0,20,884},{3,0,265},
+{3,0,265},{3,0,265},{3,0,265},{0,17,0},{0,17,0},{0,17,0},{0,9,1},{0,8,104},{0,8,104},{4,57,1365},{4,39,498},{5,28,793},{4,26,561},{0,51,3048},{0,31,1032},{0,26,41},{0,20,1509},{0,24,4147},{0,19,2149},{8,49,885},{7,37,10},{9,27,74},{7,25,154},{17,17,3048},{0,31,1032},{0,26,41},{0,20,1509},{51,0,3048},{0,20,1509},{4,42,481},{4,42,481},{4,42,481},
+{4,23,481},{0,36,650},{0,25,4},{0,25,4},{0,15,130},{0,16,1161},{0,14,481},{8,34,1},{8,34,1},{8,34,1},{8,21,1},{15,7,648},{0,25,4},{0,25,4},{0,15,130},{31,3,648},{0,15,130},{32,1,882},{7,37,1},{12,27,1},{0,26,25},{32,1,882},{46,10,882},{0,26,25},{0,22,884},{46,10,882},{0,22,884},{4,0,481},{4,0,481},{4,0,481},{4,0,481},{0,23,0},
+{0,23,0},{0,23,0},{0,12,1},{0,9,193},{0,9,193},{5,61,1645},{5,42,795},{7,30,1177},{4,28,826},{0,57,3048},{0,35,954},{0,29,9},{0,22,1380},{0,27,4419},{0,21,2197},{10,51,885},{9,39,10},{11,29,74},{9,27,154},{20,17,3048},{0,35,954},{0,29,9},{0,22,1380},{57,0,3048},{0,22,1380},{5,46,761},{5,46,761},{5,46,761},{5,25,762},{0,42,650},{0,28,4},{0,28,4},
+{0,17,85},{0,19,1345},{0,16,557},{10,36,1},{10,36,1},{10,36,1},{10,23,1},{15,13,648},{1,27,1},{1,27,1},{0,17,85},{31,6,648},{0,17,85},{35,1,882},{9,39,1},{14,29,1},{0,29,9},{35,1,882},{46,13,882},{0,29,9},{0,24,884},{46,13,882},{0,24,884},{5,0,761},{5,0,761},{5,0,761},{5,0,761},{0,29,0},{0,29,0},{0,29,0},{0,15,1},{0,11,296},
+{0,11,296},{7,63,1774},{7,44,924},{8,33,1320},{6,30,941},{2,60,3052},{1,38,924},{2,31,10},{0,24,1302},{0,31,4300},{0,24,1942},{12,54,882},{12,40,8},{13,32,68},{11,29,161},{32,0,3048},{0,39,898},{2,31,6},{0,24,1266},{62,1,3048},{0,24,1266},{7,48,885},{7,48,885},{7,48,885},{7,28,884},{2,45,652},{2,31,9},{2,31,9},{1,20,62},{0,22,1275},{0,19,419},{12,39,0},
+{12,39,0},{12,39,0},{12,25,1},{24,1,648},{3,30,0},{3,30,0},{0,20,45},{49,0,648},{0,20,45},{39,0,882},{11,41,1},{16,31,2},{1,31,1},{39,0,882},{62,8,882},{1,31,1},{0,26,882},{62,8,882},{0,26,882},{7,0,884},{7,0,884},{7,0,884},{7,0,884},{2,32,4},{2,32,4},{2,32,4},{2,17,4},{0,16,250},{0,16,250},{9,63,1798},{9,46,924},{10,35,1320},
+{9,32,936},{4,62,3052},{3,40,924},{4,33,9},{2,26,1302},{0,33,4023},{0,26,1647},{14,56,882},{14,42,8},{15,34,68},{13,31,161},{35,0,3048},{0,42,882},{4,33,5},{0,27,1170},{62,4,3048},{0,27,1170},{9,50,885},{9,50,885},{9,50,885},{9,30,884},{4,47,652},{4,32,8},{4,32,8},{3,22,62},{0,25,1107},{0,22,243},{14,41,0},{14,41,0},{14,41,0},{14,27,1},{27,1,648},
+{5,32,1},{5,32,1},{0,22,18},{55,0,648},{0,22,18},{42,0,882},{13,43,1},{18,33,1},{2,33,0},{42,0,882},{62,11,882},{2,33,0},{0,28,882},{62,11,882},{0,28,882},{9,0,884},{9,0,884},{9,0,884},{9,0,884},{4,34,4},{4,34,4},{4,34,4},{4,19,4},{0,19,146},{0,19,146},{12,63,1846},{11,48,924},{12,37,1320},{11,34,936},{6,63,3055},{5,42,924},{6,35,9},
+{4,28,1302},{0,36,3799},{0,28,1356},{16,58,883},{16,44,9},{17,35,74},{15,33,161},{38,0,3048},{2,44,882},{6,35,5},{0,29,1110},{62,7,3048},{0,29,1110},{11,52,885},{11,52,885},{11,52,885},{11,32,885},{6,49,652},{6,34,8},{6,34,8},{5,24,62},{0,29,969},{0,24,109},{16,42,1},{16,42,1},{16,42,1},{16,29,2},{30,1,648},{7,34,1},{7,34,1},{0,24,9},{61,0,648},
+{0,24,9},{45,0,882},{15,45,1},{20,35,1},{4,35,0},{45,0,882},{62,14,882},{4,35,0},{0,30,882},{62,14,882},{0,30,882},{11,0,884},{11,0,884},{11,0,884},{11,0,884},{6,36,4},{6,36,4},{6,36,4},{6,21,4},{0,22,74},{0,22,74},{14,63,1912},{13,50,924},{14,39,1320},{13,36,936},{9,63,3087},{7,44,924},{8,37,9},{6,30,1302},{0,39,3607},{0,31,1156},{18,60,883},
+{18,46,9},{19,37,74},{17,35,154},{41,0,3048},{4,46,882},{8,37,5},{0,31,1035},{62,10,3048},{0,31,1035},{13,54,885},{13,54,885},{13,54,885},{13,34,885},{8,51,652},{8,36,8},{8,36,8},{7,26,62},{0,33,846},{0,27,37},{18,44,1},{18,44,1},{18,44,1},{18,31,2},{33,1,648},{9,36,1},{9,36,1},{0,27,1},{63,2,648},{0,27,1},{47,2,882},{16,48,1},{22,37,1},
+{6,37,0},{47,2,882},{62,17,882},{6,37,0},{0,32,884},{62,17,882},{0,32,884},{13,0,884},{13,0,884},{13,0,884},{13,0,884},{8,38,4},{8,38,4},{8,38,4},{8,23,4},{0,26,29},{0,26,29},{16,63,2014},{15,52,932},{16,40,1341},{15,38,932},{12,63,3141},{9,46,924},{10,39,7},{9,32,1308},{0,43,3436},{0,33,1003},{20,62,884},{20,48,8},{21,40,74},{19,37,161},{36,16,3048},
+{6,48,882},{10,39,6},{0,33,978},{63,13,3048},{0,33,978},{15,57,882},{15,57,882},{15,57,882},{15,36,882},{10,54,649},{10,39,6},{10,39,6},{9,28,61},{0,36,737},{1,29,8},{20,47,0},{20,47,0},{20,47,0},{20,33,1},{31,12,648},{11,38,1},{11,38,1},{2,29,1},{62,6,648},{2,29,1},{51,0,882},{19,49,1},{24,39,1},{9,39,1},{51,0,882},{47,28,882},{9,39,1},
+{0,34,882},{47,28,882},{0,34,882},{15,0,882},{15,0,882},{15,0,882},{15,0,882},{10,40,2},{10,40,2},{10,40,2},{10,25,2},{0,30,2},{0,30,2},{19,63,2126},{17,54,924},{18,42,1341},{16,40,941},{14,63,3204},{11,48,924},{12,41,7},{11,34,1308},{0,46,3300},{0,35,939},{22,63,890},{22,50,8},{23,42,74},{21,39,161},{39,16,3048},{8,50,882},{12,41,6},{0,35,939},{62,16,3048},
+{0,35,939},{17,59,884},{17,59,884},{17,59,884},{17,38,884},{12,56,649},{12,41,6},{12,41,6},{11,30,61},{0,39,681},{3,31,8},{22,49,0},{22,49,0},{22,49,0},{22,35,1},{31,18,648},{13,40,1},{13,40,1},{4,31,1},{62,9,648},{4,31,1},{54,0,882},{21,51,1},{26,41,1},{11,41,1},{54,0,882},{47,31,882},{11,41,1},{0,36,882},{47,31,882},{0,36,882},{17,0,884},
+{17,0,884},{17,0,884},{17,0,884},{12,42,2},{12,42,2},{12,42,2},{12,27,2},{2,32,1},{2,32,1},{20,63,2264},{19,56,924},{20,44,1341},{18,42,941},{17,63,3292},{13,50,924},{14,43,7},{13,36,1308},{0,50,3192},{0,38,923},{25,63,906},{24,52,8},{25,44,74},{23,41,161},{50,0,3048},{10,52,882},{14,43,6},{0,38,907},{62,19,3048},{0,38,907},{19,61,884},{19,61,884},{19,61,884},
+{19,40,884},{14,58,649},{14,43,6},{14,43,6},{13,32,67},{0,42,657},{4,33,10},{24,51,0},{24,51,0},{24,51,0},{24,37,1},{34,17,648},{15,42,1},{15,42,1},{6,33,1},{62,12,648},{6,33,1},{57,0,882},{23,53,1},{28,43,1},{13,43,1},{57,0,882},{50,32,882},{13,43,1},{0,38,882},{50,32,882},{0,38,882},{19,0,884},{19,0,884},{19,0,884},{19,0,884},{14,44,2},
+{14,44,2},{14,44,2},{14,29,2},{4,34,1},{4,34,1},{23,63,2376},{21,58,924},{22,46,1341},{20,44,941},{19,63,3391},{15,52,924},{16,45,10},{15,38,1308},{0,53,3112},{2,40,923},{27,63,948},{26,54,8},{27,46,74},{25,43,161},{53,0,3048},{12,54,882},{16,45,6},{0,40,891},{62,22,3048},{0,40,891},{21,63,884},{21,63,884},{21,63,884},{21,42,884},{16,59,652},{16,45,9},{16,45,9},
+{15,34,67},{1,45,650},{6,35,10},{26,53,0},{26,53,0},{26,53,0},{26,39,1},{37,17,648},{17,44,0},{17,44,0},{8,35,1},{62,15,648},{8,35,1},{60,0,882},{25,55,1},{30,45,1},{15,45,1},{60,0,882},{56,32,882},{15,45,1},{0,40,882},{56,32,882},{0,40,882},{21,0,884},{21,0,884},{21,0,884},{21,0,884},{16,46,4},{16,46,4},{16,46,4},{16,31,4},{6,36,1},
+{6,36,1},{26,63,2564},{23,61,927},{24,49,1324},{23,47,935},{22,63,3529},{17,54,924},{18,47,10},{17,41,1302},{0,56,3060},{5,42,924},{30,63,1003},{28,57,6},{29,48,67},{28,46,158},{56,1,3048},{15,56,883},{19,47,8},{1,42,884},{63,25,3048},{1,42,884},{23,63,891},{23,63,891},{23,63,891},{23,44,883},{18,62,649},{18,47,6},{18,47,6},{17,36,61},{4,47,652},{9,37,8},{28,55,2},
+{28,55,2},{28,55,2},{28,42,2},{49,0,648},{19,46,2},{19,46,2},{11,37,0},{62,18,648},{11,37,0},{63,1,882},{27,58,2},{32,47,4},{17,47,5},{63,1,882},{63,32,882},{17,47,5},{0,42,884},{63,32,882},{0,42,884},{23,0,882},{23,0,882},{23,0,882},{23,0,882},{18,48,2},{18,48,2},{18,48,2},{18,33,2},{8,38,1},{8,38,1},{28,63,2774},{25,63,927},{26,51,1324},
+{25,48,932},{25,63,3681},{19,56,924},{20,49,7},{19,43,1302},{0,59,3052},{7,44,924},{32,63,1074},{30,59,6},{31,50,67},{30,48,170},{59,1,3048},{16,58,885},{20,49,6},{3,44,884},{63,28,3048},{3,44,884},{25,63,918},{25,63,918},{25,63,918},{25,46,883},{20,63,651},{20,49,6},{20,49,6},{19,38,61},{5,49,651},{11,39,8},{30,57,2},{30,57,2},{30,57,2},{30,44,2},{52,0,648},
+{21,48,1},{21,48,1},{13,39,0},{62,21,648},{13,39,0},{63,7,882},{29,60,2},{34,49,1},{19,49,1},{63,7,882},{63,35,882},{19,49,1},{0,44,884},{63,35,882},{0,44,884},{25,0,882},{25,0,882},{25,0,882},{25,0,882},{20,50,2},{20,50,2},{20,50,2},{20,35,2},{10,40,1},{10,40,1},{31,63,2998},{28,63,951},{28,53,1324},{27,50,932},{26,63,3844},{21,58,924},{22,51,7},
+{21,45,1302},{2,61,3052},{9,46,924},{34,63,1146},{32,61,9},{33,52,74},{32,50,169},{62,1,3048},{18,60,885},{22,51,6},{5,46,884},{63,31,3048},{5,46,884},{28,63,950},{28,63,950},{28,63,950},{27,48,882},{23,63,675},{22,51,6},{22,51,6},{21,40,61},{7,51,651},{13,41,8},{32,59,0},{32,59,0},{32,59,0},{32,46,1},{55,0,648},{23,50,1},{23,50,1},{15,41,0},{62,24,648},
+{15,41,0},{63,13,882},{31,62,2},{36,51,1},{21,51,1},{63,13,882},{63,38,882},{21,51,1},{0,46,884},{63,38,882},{0,46,884},{27,0,882},{27,0,882},{27,0,882},{27,0,882},{22,52,2},{22,52,2},{22,52,2},{22,37,2},{12,42,1},{12,42,1},{33,63,3224},{30,63,1031},{30,55,1324},{29,52,932},{29,63,3996},{23,60,924},{24,53,7},{23,47,1302},{4,63,3052},{11,48,922},{37,63,1226},
+{34,63,9},{35,54,74},{33,51,161},{63,5,3048},{20,62,885},{24,53,6},{6,48,882},{47,42,3048},{6,48,882},{30,63,995},{30,63,995},{30,63,995},{29,50,882},{25,63,705},{24,53,6},{24,53,6},{23,42,61},{9,53,651},{15,43,8},{34,61,0},{34,61,0},{34,61,0},{34,47,4},{58,0,648},{25,52,1},{25,52,1},{16,43,1},{62,27,648},{16,43,1},{63,19,882},{33,63,4},{38,53,1},
+{23,53,1},{63,19,882},{63,41,882},{23,53,1},{0,48,882},{63,41,882},{0,48,882},{29,0,882},{29,0,882},{29,0,882},{29,0,882},{24,54,2},{24,54,2},{24,54,2},{24,39,2},{14,44,1},{14,44,1},{34,63,3510},{32,63,1202},{33,57,1335},{31,54,936},{33,63,4252},{25,63,925},{26,55,13},{25,49,1309},{9,63,3091},{13,50,918},{40,63,1349},{37,63,38},{37,56,77},{36,54,158},{60,17,3048},
+{24,63,894},{27,55,8},{9,50,884},{63,37,3048},{9,50,884},{32,63,1058},{32,63,1058},{32,63,1058},{31,52,885},{28,63,762},{26,55,9},{26,55,9},{26,44,61},{11,56,650},{17,45,11},{36,63,2},{36,63,2},{36,63,2},{36,50,2},{53,16,648},{28,54,1},{28,54,1},{19,45,1},{63,30,648},{19,45,1},{59,33,882},{37,63,29},{40,56,4},{26,55,4},{59,33,882},{55,48,882},{26,55,4},
+{0,50,884},{55,48,882},{0,50,884},{31,0,884},{31,0,884},{31,0,884},{31,0,884},{26,57,0},{26,57,0},{26,57,0},{26,42,1},{16,47,2},{16,47,2},{37,63,3734},{34,63,1399},{35,59,1335},{33,57,935},{34,63,4441},{28,63,957},{28,57,13},{27,51,1309},{13,63,3192},{15,52,918},{43,63,1485},{39,63,131},{39,58,77},{38,56,158},{63,17,3048},{28,63,957},{29,57,8},{11,52,884},{63,40,3048},
+{11,52,884},{34,63,1110},{34,63,1110},{34,63,1110},{33,54,883},{30,63,840},{28,57,9},{28,57,9},{28,46,61},{13,58,650},{19,47,11},{39,63,10},{39,63,10},{39,63,10},{38,52,2},{56,16,648},{30,56,1},{30,56,1},{21,47,1},{62,33,648},{21,47,1},{62,33,882},{41,63,80},{42,58,4},{28,57,4},{62,33,882},{61,48,882},{28,57,4},{0,52,884},{61,48,882},{0,52,884},{33,0,882},
+{33,0,882},{33,0,882},{33,0,882},{28,59,0},{28,59,0},{28,59,0},{28,44,1},{18,48,1},{18,48,1},{40,63,4022},{37,63,1647},{37,61,1335},{35,59,935},{37,63,4657},{31,63,1085},{30,59,13},{29,53,1309},{18,63,3339},{17,54,924},{45,63,1635},{42,63,275},{41,60,77},{40,58,158},{63,23,3048},{32,63,1044},{31,59,8},{13,54,884},{63,43,3048},{13,54,884},{36,63,1203},{36,63,1203},{36,63,1203},
+{35,56,883},{33,63,915},{30,59,9},{30,59,9},{30,48,65},{15,60,650},{21,49,8},{41,63,25},{41,63,25},{41,63,25},{40,54,2},{59,16,648},{32,57,4},{32,57,4},{23,49,0},{62,36,648},{23,49,0},{63,37,882},{44,63,160},{44,60,4},{30,59,4},{63,37,882},{63,50,882},{30,59,4},{0,54,884},{63,50,882},{0,54,884},{35,0,882},{35,0,882},{35,0,882},{35,0,882},{30,61,0},
+{30,61,0},{30,61,0},{30,46,1},{20,50,1},{20,50,1},{42,63,4364},{40,63,1991},{39,63,1335},{37,61,935},{40,63,4905},{34,63,1287},{32,61,10},{31,55,1309},{21,63,3555},{19,56,924},{48,63,1796},{44,63,465},{43,62,77},{42,60,158},{63,29,3048},{35,63,1188},{33,61,8},{15,56,884},{63,46,3048},{15,56,884},{39,63,1299},{39,63,1299},{39,63,1299},{37,58,883},{34,63,1017},{32,61,6},{32,61,6},
+{32,50,66},{17,62,650},{23,51,8},{43,63,45},{43,63,45},{43,63,45},{42,56,2},{62,16,648},{33,60,2},{33,60,2},{25,51,0},{62,39,648},{25,51,0},{63,43,882},{48,63,260},{46,62,4},{30,62,4},{63,43,882},{63,53,882},{30,62,4},{0,56,884},{63,53,882},{0,56,884},{37,0,882},{37,0,882},{37,0,882},{37,0,882},{32,63,1},{32,63,1},{32,63,1},{32,47,2},{22,52,1},
+{22,52,1},{45,63,4441},{43,63,2286},{41,63,1421},{39,62,916},{43,63,4878},{37,63,1410},{34,63,9},{33,57,1141},{26,63,3535},{21,58,795},{50,63,1770},{47,63,609},{46,63,85},{44,62,120},{63,35,2814},{40,63,1194},{35,63,2},{17,58,762},{63,49,2814},{17,58,762},{41,63,1421},{41,63,1421},{41,63,1421},{39,61,885},{37,63,1130},{34,63,9},{34,63,9},{34,52,61},{20,63,651},{25,53,11},{46,63,85},
+{46,63,85},{46,63,85},{44,58,4},{63,21,648},{36,62,1},{36,62,1},{27,53,2},{63,42,648},{27,53,2},{63,48,761},{51,63,305},{48,63,1},{34,63,0},{63,48,761},{62,56,761},{34,63,0},{0,58,761},{62,56,761},{0,58,761},{39,0,884},{39,0,884},{39,0,884},{39,0,884},{34,63,9},{34,63,9},{34,63,9},{34,50,1},{24,55,2},{24,55,2},{46,63,3945},{43,63,2238},{43,63,1509},
+{41,63,885},{45,63,4345},{38,63,1230},{37,63,41},{34,58,781},{30,63,3066},{24,59,494},{51,63,1386},{48,63,530},{48,63,130},{46,62,45},{63,39,2249},{43,63,914},{39,63,8},{21,59,482},{47,59,2249},{21,59,482},{43,63,1509},{43,63,1509},{43,63,1509},{41,63,885},{40,63,1242},{37,63,41},{37,63,41},{36,54,61},{24,63,689},{27,55,11},{48,63,130},{48,63,130},{48,63,130},{46,60,4},{63,27,648},
+{39,63,8},{39,63,8},{29,55,2},{63,45,648},{29,55,2},{63,51,481},{54,63,193},{51,63,1},{40,63,0},{63,51,481},{63,57,481},{40,63,0},{0,59,481},{63,57,481},{0,59,481},{41,0,884},{41,0,884},{41,0,884},{41,0,884},{37,63,25},{37,63,25},{37,63,25},{36,52,1},{26,57,2},{26,57,2},{48,63,3571},{46,63,2182},{46,63,1653},{43,63,900},{46,63,3838},{41,63,1094},{40,63,137},
+{37,59,490},{32,63,2703},{27,60,270},{53,63,1106},{51,63,458},{50,63,193},{48,63,10},{63,43,1769},{46,63,698},{42,63,40},{25,60,266},{63,53,1769},{25,60,266},{46,63,1653},{46,63,1653},{46,63,1653},{43,63,900},{43,63,1386},{40,63,137},{40,63,137},{38,56,61},{29,63,779},{29,57,11},{50,63,193},{50,63,193},{50,63,193},{48,62,1},{63,33,648},{42,63,40},{42,63,40},{31,57,2},{47,56,648},
+{31,57,2},{63,54,265},{57,63,113},{54,63,1},{46,63,0},{63,54,265},{62,59,265},{46,63,0},{0,60,265},{62,59,265},{0,60,265},{43,0,884},{43,0,884},{43,0,884},{43,0,884},{39,63,52},{39,63,52},{39,63,52},{38,54,1},{28,59,2},{28,59,2},{50,63,3307},{48,63,2195},{48,63,1795},{46,63,964},{48,63,3435},{43,63,1026},{42,63,296},{39,60,269},{37,63,2410},{30,61,115},{54,63,882},
+{54,63,450},{53,63,265},{50,63,10},{63,47,1374},{48,63,546},{46,63,89},{29,61,114},{63,55,1374},{29,61,114},{48,63,1795},{48,63,1795},{48,63,1795},{46,63,964},{45,63,1560},{42,63,296},{42,63,296},{40,58,61},{32,63,891},{31,59,11},{53,63,265},{53,63,265},{53,63,265},{50,63,10},{63,39,648},{46,63,89},{46,63,89},{33,59,1},{47,59,648},{33,59,1},{63,57,113},{58,63,50},{57,63,1},
+{52,63,0},{63,57,113},{63,60,113},{52,63,0},{0,61,113},{63,60,113},{0,61,113},{45,0,884},{45,0,884},{45,0,884},{45,0,884},{41,63,97},{41,63,97},{41,63,97},{40,56,1},{30,61,2},{30,61,2},{51,63,3032},{51,63,2264},{50,63,1965},{48,63,1096},{51,63,3096},{46,63,1059},{46,63,530},{41,62,126},{40,63,2191},{33,62,25},{57,63,680},{56,63,465},{55,63,356},{53,63,68},{63,51,1032},
+{52,63,450},{49,63,185},{34,62,20},{63,57,1032},{34,62,20},{50,63,1965},{50,63,1965},{50,63,1965},{48,63,1096},{48,63,1736},{46,63,530},{46,63,530},{42,60,62},{37,63,1086},{33,62,9},{55,63,356},{55,63,356},{55,63,356},{53,63,68},{61,49,648},{49,63,185},{49,63,185},{36,61,4},{63,54,648},{36,61,4},{63,61,18},{61,63,9},{61,63,0},{59,63,0},{63,61,18},{63,62,18},{59,63,0},
+{0,62,20},{63,62,18},{0,62,20},{47,0,890},{47,0,890},{47,0,890},{47,0,890},{44,63,149},{44,63,149},{44,63,149},{42,58,1},{32,63,0},{32,63,0},{54,63,2756},{52,63,2249},{51,63,2004},{50,63,1224},{53,63,2803},{48,63,1092},{48,63,692},{44,62,58},{43,63,1991},{35,63,16},{59,63,566},{57,63,420},{57,63,356},{55,63,125},{63,55,771},{55,63,386},{52,63,241},{38,63,0},{63,59,771},
+{38,63,0},{51,63,2004},{51,63,2004},{51,63,2004},{50,63,1224},{50,63,1802},{48,63,692},{48,63,692},{44,62,42},{40,63,1166},{35,63,16},{57,63,356},{57,63,356},{57,63,356},{55,63,125},{63,50,580},{52,63,241},{52,63,241},{38,63,0},{62,57,580},{38,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{49,0,884},
+{49,0,884},{49,0,884},{49,0,884},{46,63,193},{46,63,193},{46,63,193},{44,60,1},{35,63,16},{35,63,16},{54,63,2276},{54,63,1844},{54,63,1700},{52,63,1125},{54,63,2180},{51,63,948},{49,63,649},{46,63,10},{46,63,1551},{39,63,58},{60,63,324},{59,63,257},{59,63,221},{57,63,68},{63,57,452},{57,63,228},{55,63,137},{44,63,0},{63,60,452},{44,63,0},{54,63,1700},{54,63,1700},{54,63,1700},
+{52,63,1125},{51,63,1460},{49,63,649},{49,63,649},{46,63,10},{43,63,926},{39,63,58},{59,63,221},{59,63,221},{59,63,221},{57,63,68},{63,53,340},{55,63,137},{55,63,137},{44,63,0},{63,58,340},{44,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{51,0,884},{51,0,884},{51,0,884},{51,0,884},{48,63,260},
+{48,63,260},{48,63,260},{46,62,1},{39,63,58},{39,63,58},{57,63,1844},{55,63,1585},{55,63,1464},{54,63,1044},{56,63,1747},{52,63,850},{51,63,596},{48,63,4},{48,63,1204},{43,63,117},{60,63,164},{60,63,116},{60,63,100},{59,63,40},{63,59,216},{58,63,114},{58,63,65},{50,63,0},{63,61,216},{50,63,0},{55,63,1464},{55,63,1464},{55,63,1464},{54,63,1044},{54,63,1188},{51,63,596},{51,63,596},
+{48,63,4},{46,63,750},{43,63,117},{60,63,100},{60,63,100},{60,63,100},{59,63,40},{63,56,164},{58,63,65},{58,63,65},{50,63,0},{62,60,164},{50,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{53,0,884},{53,0,884},{53,0,884},{53,0,884},{51,63,340},{51,63,340},{51,63,340},{48,63,4},{43,63,117},
+{43,63,117},{0,39,1568},{0,28,194},{0,19,10},{0,16,586},{0,26,3371},{0,17,2169},{0,16,1027},{0,10,2532},{0,12,3648},{0,10,2701},{0,39,1568},{0,28,194},{0,19,10},{0,16,586},{5,16,3371},{0,17,2169},{0,16,1027},{0,10,2532},{26,0,3371},{0,10,2532},{0,18,0},{0,18,0},{0,18,0},{0,9,0},{0,9,288},{0,8,109},{0,8,109},{0,4,164},{0,4,321},{0,4,189},{0,18,0},
+{0,18,0},{0,18,0},{0,9,0},{4,1,288},{0,8,109},{0,8,109},{0,4,164},{9,0,288},{0,4,164},{19,1,1568},{0,28,194},{0,19,10},{0,16,586},{19,1,1568},{39,0,1568},{0,16,586},{0,13,1568},{39,0,1568},{0,13,1568},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,45,1568},{0,31,106},{0,22,10},
+{0,18,481},{0,30,3968},{0,19,2355},{0,18,1057},{0,13,2852},{0,14,4319},{0,11,3117},{0,45,1568},{0,31,106},{0,22,10},{0,18,481},{15,0,3968},{0,19,2355},{0,18,1057},{0,13,2852},{30,0,3968},{0,13,2852},{0,24,0},{0,24,0},{0,24,0},{0,12,0},{0,12,512},{0,10,205},{0,10,205},{0,5,313},{0,5,566},{0,5,349},{0,24,0},{0,24,0},{0,24,0},{0,12,0},{6,0,512},
+{0,10,205},{0,10,205},{0,5,313},{12,0,512},{0,5,313},{22,1,1568},{0,31,106},{1,22,1},{0,18,481},{22,1,1568},{45,0,1568},{0,18,481},{0,15,1568},{45,0,1568},{0,15,1568},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,50,1570},{0,34,52},{1,24,58},{0,21,377},{0,34,4652},{0,22,2571},{0,19,1107},
+{0,14,3225},{0,16,5108},{0,13,3525},{0,50,1570},{0,34,52},{1,24,42},{0,21,377},{15,4,4651},{0,22,2571},{0,19,1107},{0,14,3225},{30,2,4651},{0,14,3225},{0,30,0},{0,30,0},{0,30,0},{0,15,0},{0,15,800},{0,11,317},{0,11,317},{0,7,468},{0,7,889},{0,7,549},{0,30,0},{0,30,0},{0,30,0},{0,15,0},{7,1,800},{0,11,317},{0,11,317},{0,7,468},{15,0,800},
+{0,7,468},{25,1,1568},{0,34,52},{3,24,1},{0,21,377},{25,1,1568},{47,2,1568},{0,21,377},{0,17,1570},{47,2,1568},{0,17,1570},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,56,1570},{0,38,13},{1,27,138},{0,24,305},{0,38,5419},{0,25,2819},{0,22,1187},{0,16,3659},{0,17,6013},{0,14,4061},{0,56,1570},
+{0,38,13},{1,27,122},{0,24,305},{19,0,5419},{0,25,2819},{0,22,1187},{0,16,3659},{38,0,5419},{0,16,3659},{0,36,0},{0,36,0},{0,36,0},{0,18,0},{0,18,1152},{0,14,445},{0,14,445},{0,8,697},{0,8,1270},{0,8,797},{0,36,0},{0,36,0},{0,36,0},{0,18,0},{9,0,1152},{0,14,445},{0,14,445},{0,8,697},{18,0,1152},{0,8,697},{28,1,1568},{0,38,13},{5,26,1},
+{0,24,305},{28,1,1568},{47,5,1568},{0,24,305},{0,19,1570},{47,5,1568},{0,19,1570},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{1,61,1609},{1,41,43},{2,29,250},{1,26,301},{0,45,5419},{0,28,2552},{0,25,820},{0,18,3377},{0,21,6243},{0,18,3953},{2,59,1569},{2,41,10},{3,29,125},{1,26,285},{22,1,5419},
+{0,28,2552},{0,25,820},{0,18,3377},{45,0,5419},{0,18,3377},{1,40,42},{1,40,42},{1,40,42},{1,21,42},{0,25,1152},{0,19,292},{0,19,292},{0,11,569},{0,11,1380},{0,10,747},{2,38,2},{2,38,2},{2,38,2},{2,20,2},{12,1,1152},{0,19,292},{0,19,292},{0,11,569},{25,0,1152},{0,11,569},{23,17,1568},{0,42,1},{7,28,1},{0,26,233},{23,17,1568},{63,0,1568},{0,26,233},
+{0,21,1568},{63,0,1568},{0,21,1568},{1,0,41},{1,0,41},{1,0,41},{1,0,41},{0,7,0},{0,7,0},{0,7,0},{0,3,1},{0,3,17},{0,3,17},{2,63,1731},{2,44,146},{3,31,441},{1,28,370},{0,50,5420},{0,33,2329},{0,27,554},{0,21,3217},{0,23,6476},{0,19,3861},{4,61,1569},{4,43,10},{5,31,125},{3,28,285},{25,1,5419},{0,33,2329},{0,27,554},{0,21,3217},{47,2,5419},
+{0,21,3217},{2,44,146},{2,44,146},{2,44,146},{2,23,146},{0,31,1152},{0,22,180},{0,22,180},{0,13,458},{0,14,1508},{0,13,747},{4,40,2},{4,40,2},{4,40,2},{4,22,2},{15,1,1152},{0,22,180},{0,22,180},{0,13,458},{31,0,1152},{0,13,458},{34,1,1568},{2,44,1},{9,30,1},{0,28,164},{34,1,1568},{63,3,1568},{0,28,164},{0,23,1568},{63,3,1568},{0,23,1568},{2,0,145},
+{2,0,145},{2,0,145},{2,0,145},{0,13,0},{0,13,0},{0,13,0},{0,6,1},{0,5,52},{0,5,52},{4,63,1977},{3,47,318},{5,33,675},{3,30,498},{0,56,5420},{0,36,2129},{0,30,338},{0,22,3012},{0,25,6733},{0,21,3825},{6,63,1569},{6,45,10},{8,33,123},{5,30,285},{28,1,5419},{0,36,2129},{0,30,338},{0,22,3012},{47,5,5419},{0,22,3012},{3,48,313},{3,48,313},{3,48,313},
+{3,26,314},{0,36,1154},{0,25,100},{0,25,100},{0,16,388},{0,16,1665},{0,14,757},{6,42,2},{6,42,2},{6,42,2},{6,24,2},{15,7,1152},{0,25,100},{0,25,100},{0,16,388},{31,3,1152},{0,16,388},{37,1,1568},{4,46,1},{11,32,2},{0,30,113},{37,1,1568},{63,6,1568},{0,30,113},{0,25,1568},{63,6,1568},{0,25,1568},{3,0,313},{3,0,313},{3,0,313},{3,0,313},{0,19,0},
+{0,19,0},{0,19,0},{0,9,1},{0,8,116},{0,8,116},{6,63,2353},{4,49,562},{6,35,1006},{3,32,715},{0,62,5420},{0,39,1961},{0,32,174},{0,24,2817},{0,28,7013},{0,24,3841},{9,63,1577},{8,47,10},{10,35,123},{7,31,290},{31,1,5419},{0,39,1961},{0,32,174},{0,24,2817},{47,8,5419},{0,24,2817},{4,52,545},{4,52,545},{4,52,545},{4,28,546},{0,42,1154},{0,29,45},{0,29,45},
+{0,18,289},{0,19,1849},{0,16,797},{8,44,2},{8,44,2},{8,44,2},{8,26,2},{15,13,1152},{0,29,45},{0,29,45},{0,18,289},{31,6,1152},{0,18,289},{40,1,1568},{6,48,1},{13,34,2},{0,33,73},{40,1,1568},{63,9,1568},{0,33,73},{0,27,1568},{63,9,1568},{0,27,1568},{4,0,545},{4,0,545},{4,0,545},{4,0,545},{0,25,0},{0,25,0},{0,25,0},{0,12,1},{0,11,212},
+{0,11,212},{8,63,2980},{6,52,920},{7,38,1444},{5,34,1012},{2,63,5504},{0,42,1814},{0,35,57},{0,27,2630},{0,31,7380},{0,25,3860},{11,63,1604},{10,49,13},{12,37,116},{10,33,292},{34,1,5419},{0,42,1814},{0,35,57},{0,27,2630},{63,3,5419},{0,27,2630},{5,57,884},{5,57,884},{5,57,884},{5,31,884},{0,49,1152},{0,33,5},{0,33,5},{0,21,221},{0,22,2091},{0,19,875},{11,45,4},
+{11,45,4},{11,45,4},{11,28,4},{24,1,1152},{0,33,5},{0,33,5},{0,21,221},{49,0,1152},{0,21,221},{44,0,1568},{8,50,1},{15,36,4},{0,35,32},{44,0,1568},{62,13,1568},{0,35,32},{0,29,1570},{62,13,1568},{0,29,1570},{5,0,884},{5,0,884},{5,0,884},{5,0,884},{0,31,1},{0,31,1},{0,31,1},{0,16,1},{0,14,349},{0,14,349},{9,63,3638},{7,55,1309},{8,39,1940},
+{6,36,1365},{3,63,5739},{0,45,1718},{0,38,17},{0,29,2514},{0,34,7760},{0,28,3900},{14,63,1636},{12,51,13},{14,39,116},{12,35,292},{37,1,5419},{0,45,1718},{0,38,17},{0,29,2514},{63,6,5419},{0,29,2514},{6,61,1252},{6,61,1252},{6,61,1252},{6,33,1253},{0,55,1152},{0,37,4},{0,37,4},{0,22,162},{0,25,2339},{0,21,989},{13,47,4},{13,47,4},{13,47,4},{13,30,4},{27,1,1152},
+{1,36,1},{1,36,1},{0,22,162},{55,0,1152},{0,22,162},{47,0,1568},{10,52,1},{17,38,1},{0,38,16},{47,0,1568},{46,24,1568},{0,38,16},{0,31,1570},{46,24,1568},{0,31,1570},{6,0,1252},{6,0,1252},{6,0,1252},{6,0,1252},{0,37,0},{0,37,0},{0,37,0},{0,19,1},{0,16,490},{0,16,490},{11,63,4328},{8,57,1644},{10,41,2360},{7,39,1656},{6,63,6079},{0,49,1644},{1,40,17},
+{0,31,2359},{0,36,7943},{0,30,3834},{16,63,1689},{14,53,13},{16,41,129},{14,37,292},{40,1,5419},{0,49,1640},{1,40,13},{0,31,2355},{63,9,5419},{0,31,2355},{8,63,1576},{8,63,1576},{8,63,1576},{7,36,1576},{1,59,1156},{1,39,13},{1,39,13},{0,25,110},{0,28,2475},{0,24,1017},{15,49,4},{15,49,4},{15,49,4},{15,32,4},{30,1,1152},{3,38,1},{3,38,1},{0,25,106},{61,0,1152},
+{0,25,106},{49,1,1568},{12,54,1},{19,40,1},{0,40,4},{49,1,1568},{46,27,1568},{0,40,4},{0,33,1568},{46,27,1568},{0,33,1568},{7,0,1576},{7,0,1576},{7,0,1576},{7,0,1576},{1,41,4},{1,41,4},{1,41,4},{1,21,4},{0,19,578},{0,19,578},{14,63,4584},{10,59,1644},{12,43,2360},{9,41,1656},{8,63,6244},{2,51,1644},{3,42,17},{1,33,2308},{0,42,7575},{0,32,3345},{19,63,1761},
+{16,55,10},{17,43,125},{16,40,290},{43,1,5419},{0,52,1592},{3,42,13},{0,33,2225},{63,12,5419},{0,33,2225},{10,63,1585},{10,63,1585},{10,63,1585},{9,38,1576},{3,61,1156},{3,41,13},{3,41,13},{2,27,110},{0,31,2211},{0,25,699},{16,52,2},{16,52,2},{16,52,2},{16,34,2},{33,1,1152},{5,40,1},{5,40,1},{0,27,61},{63,2,1152},{0,27,61},{52,1,1568},{14,56,1},{21,42,1},
+{1,42,0},{52,1,1568},{46,30,1568},{1,42,0},{0,35,1568},{46,30,1568},{0,35,1568},{9,0,1576},{9,0,1576},{9,0,1576},{9,0,1576},{3,43,4},{3,43,4},{3,43,4},{3,23,4},{0,22,410},{0,22,410},{15,63,4826},{12,62,1650},{14,46,2355},{11,43,1660},{11,63,6452},{4,54,1641},{5,44,15},{3,35,2308},{0,45,7165},{0,35,2875},{22,63,1862},{18,57,9},{20,45,126},{18,42,281},{47,0,5419},
+{0,56,1568},{6,44,10},{0,35,2091},{46,24,5419},{0,35,2091},{12,63,1606},{12,63,1606},{12,63,1606},{12,40,1571},{5,63,1155},{5,44,14},{5,44,14},{4,29,109},{0,33,1977},{0,28,424},{19,53,4},{19,53,4},{19,53,4},{19,36,4},{31,12,1152},{7,42,1},{7,42,1},{0,30,29},{62,6,1152},{0,30,29},{56,0,1568},{17,58,1},{24,44,2},{3,44,2},{56,0,1568},{62,25,1568},{3,44,2},
+{0,37,1570},{62,25,1568},{0,37,1570},{12,0,1570},{12,0,1570},{12,0,1570},{12,0,1570},{5,46,1},{5,46,1},{5,46,1},{5,25,2},{0,25,260},{0,25,260},{17,63,5096},{14,63,1652},{15,48,2358},{13,45,1660},{14,63,6668},{6,56,1641},{7,46,15},{5,37,2308},{0,47,6891},{0,36,2543},{23,63,1956},{20,59,9},{22,47,126},{20,44,281},{49,1,5419},{2,58,1568},{8,46,10},{0,38,1979},{46,27,5419},
+{0,38,1979},{14,63,1651},{14,63,1651},{14,63,1651},{14,42,1571},{8,63,1179},{7,46,14},{7,46,14},{6,31,109},{0,38,1778},{0,31,232},{21,55,4},{21,55,4},{21,55,4},{21,38,4},{31,18,1152},{9,44,1},{9,44,1},{0,32,10},{62,9,1152},{0,32,10},{59,0,1568},{19,60,1},{26,46,2},{5,46,2},{59,0,1568},{62,28,1568},{5,46,2},{0,39,1570},{62,28,1568},{0,39,1570},{14,0,1570},
+{14,0,1570},{14,0,1570},{14,0,1570},{7,48,1},{7,48,1},{7,48,1},{7,27,2},{0,28,164},{0,28,164},{20,63,5352},{17,63,1708},{18,49,2360},{15,47,1660},{15,63,6877},{8,58,1641},{9,48,14},{7,39,2308},{0,50,6576},{0,39,2215},{26,63,2052},{22,61,9},{24,49,116},{22,46,281},{52,1,5419},{4,60,1568},{9,48,13},{0,40,1907},{46,30,5419},{0,40,1907},{16,63,1697},{16,63,1697},{16,63,1697},
+{16,44,1577},{10,63,1209},{9,48,14},{9,48,14},{8,33,115},{0,42,1601},{0,33,110},{23,57,4},{23,57,4},{23,57,4},{23,40,4},{34,17,1152},{11,46,1},{11,46,1},{0,34,2},{62,12,1152},{0,34,2},{62,0,1568},{21,62,1},{28,48,1},{7,48,0},{62,0,1568},{62,31,1568},{7,48,0},{0,41,1570},{62,31,1568},{0,41,1570},{16,0,1576},{16,0,1576},{16,0,1576},{16,0,1576},{9,50,1},
+{9,50,1},{9,50,1},{9,29,2},{0,33,85},{0,33,85},{23,63,5672},{19,63,1825},{20,51,2360},{17,49,1656},{19,63,7135},{10,60,1641},{11,50,14},{9,41,2308},{0,53,6336},{0,42,1983},{29,63,2180},{24,63,9},{26,51,116},{24,48,293},{55,1,5419},{6,62,1568},{11,50,13},{0,42,1814},{47,32,5419},{0,42,1814},{19,63,1761},{19,63,1761},{19,63,1761},{17,46,1576},{12,63,1249},{11,50,14},{11,50,14},
+{10,35,115},{0,45,1449},{0,36,30},{25,59,4},{25,59,4},{25,59,4},{25,42,4},{37,17,1152},{13,48,0},{13,48,0},{1,36,2},{62,15,1152},{1,36,2},{63,4,1568},{23,63,4},{30,50,1},{9,50,0},{63,4,1568},{62,34,1568},{9,50,0},{0,43,1570},{62,34,1568},{0,43,1570},{17,0,1576},{17,0,1576},{17,0,1576},{17,0,1576},{11,52,1},{11,52,1},{11,52,1},{11,31,2},{0,36,29},
+{0,36,29},{25,63,6066},{21,63,2039},{22,54,2355},{19,51,1660},{20,63,7420},{12,62,1635},{13,52,14},{11,43,2316},{0,57,6109},{0,44,1789},{31,63,2369},{27,63,38},{28,53,115},{26,50,286},{59,0,5419},{9,63,1577},{14,52,11},{0,44,1740},{62,28,5419},{0,44,1740},{21,63,1843},{21,63,1843},{21,63,1843},{20,48,1571},{15,63,1314},{13,52,13},{13,52,13},{12,37,116},{0,48,1329},{0,39,13},{27,62,1},
+{27,62,1},{27,62,1},{27,44,2},{49,0,1152},{15,50,2},{15,50,2},{4,38,4},{62,18,1152},{4,38,4},{63,11,1568},{27,63,37},{32,52,2},{12,52,1},{63,11,1568},{47,45,1568},{12,52,1},{0,45,1576},{47,45,1568},{0,45,1576},{20,0,1570},{20,0,1570},{20,0,1570},{20,0,1570},{13,54,1},{13,54,1},{13,54,1},{13,34,1},{0,40,4},{0,40,4},{28,63,6434},{23,63,2268},{24,56,2355},
+{21,53,1660},{23,63,7668},{14,63,1652},{15,54,14},{13,45,2316},{0,61,5924},{0,47,1685},{34,63,2502},{29,63,123},{30,55,115},{28,52,286},{62,0,5419},{13,63,1627},{16,54,10},{0,47,1676},{62,31,5419},{0,47,1676},{23,63,1907},{23,63,1907},{23,63,1907},{22,50,1571},{17,63,1395},{15,54,13},{15,54,13},{14,39,116},{0,51,1241},{2,41,13},{29,63,2},{29,63,2},{29,63,2},{29,46,2},{52,0,1152},
+{17,52,1},{17,52,1},{6,40,4},{62,21,1152},{6,40,4},{55,32,1568},{31,63,97},{34,54,2},{14,54,1},{55,32,1568},{46,48,1568},{14,54,1},{0,47,1576},{46,48,1568},{0,47,1576},{22,0,1570},{22,0,1570},{22,0,1570},{22,0,1570},{15,56,1},{15,56,1},{15,56,1},{15,36,1},{1,43,1},{1,43,1},{29,63,6756},{26,63,2548},{26,58,2355},{23,55,1660},{26,63,7948},{17,63,1716},{17,56,15},
+{15,47,2316},{0,63,5773},{0,49,1638},{37,63,2694},{32,63,262},{32,57,126},{30,54,286},{63,4,5419},{17,63,1715},{18,56,10},{0,49,1634},{62,34,5419},{0,49,1634},{25,63,2018},{25,63,2018},{25,63,2018},{24,52,1571},{20,63,1483},{17,56,14},{17,56,14},{16,41,109},{0,54,1185},{4,43,13},{31,63,17},{31,63,17},{31,63,17},{31,48,2},{55,0,1152},{19,54,1},{19,54,1},{8,42,4},{62,24,1152},
+{8,42,4},{58,32,1568},{34,63,169},{36,56,2},{15,56,2},{58,32,1568},{52,48,1568},{15,56,2},{0,49,1570},{52,48,1568},{0,49,1570},{24,0,1570},{24,0,1570},{24,0,1570},{24,0,1570},{17,58,1},{17,58,1},{17,58,1},{17,37,2},{3,45,1},{3,45,1},{31,63,7218},{29,63,2924},{28,60,2355},{25,57,1660},{29,63,8260},{20,63,1884},{19,58,15},{17,49,2308},{3,63,5933},{2,51,1638},{39,63,2892},
+{34,63,445},{34,59,126},{32,56,281},{63,10,5419},{21,63,1849},{20,58,10},{0,51,1606},{62,37,5419},{0,51,1606},{28,63,2130},{28,63,2130},{28,63,2130},{26,54,1571},{23,63,1603},{19,58,14},{19,58,14},{18,43,109},{0,58,1156},{6,45,13},{33,63,40},{33,63,40},{33,63,40},{33,50,4},{58,0,1152},{21,56,1},{21,56,1},{10,44,4},{62,27,1152},{10,44,4},{61,32,1568},{38,63,274},{38,58,2},
+{17,58,2},{61,32,1568},{58,48,1568},{17,58,2},{0,51,1570},{58,48,1568},{0,51,1570},{26,0,1570},{26,0,1570},{26,0,1570},{26,0,1570},{19,60,1},{19,60,1},{19,60,1},{19,39,2},{5,47,1},{5,47,1},{34,63,7586},{31,63,3453},{30,62,2357},{28,59,1668},{31,63,8699},{23,63,2180},{21,60,21},{19,51,2316},{7,63,6224},{4,53,1634},{42,63,3131},{37,63,722},{36,62,125},{34,58,278},{55,32,5419},
+{26,63,2052},{22,60,9},{0,53,1580},{46,48,5419},{0,53,1580},{30,63,2272},{30,63,2272},{30,63,2272},{28,56,1568},{25,63,1746},{21,60,17},{21,60,17},{21,45,113},{1,61,1154},{9,47,14},{36,63,74},{36,63,74},{36,63,74},{35,52,2},{53,16,1152},{23,59,1},{23,59,1},{11,47,1},{63,30,1152},{11,47,1},{63,35,1568},{43,63,433},{40,60,4},{20,60,4},{63,35,1568},{63,49,1568},{20,60,4},
+{0,53,1576},{63,49,1568},{0,53,1576},{28,0,1568},{28,0,1568},{28,0,1568},{28,0,1568},{21,63,0},{21,63,0},{21,63,0},{21,42,0},{7,49,1},{7,49,1},{37,63,8018},{34,63,3915},{32,63,2410},{30,61,1668},{34,63,8985},{26,63,2548},{23,62,21},{21,53,2316},{12,63,6555},{6,55,1634},{43,63,3345},{40,63,1026},{38,63,129},{36,60,278},{58,32,5419},{30,63,2274},{24,62,9},{0,55,1577},{52,48,5419},
+{0,55,1577},{32,63,2406},{32,63,2406},{32,63,2406},{30,58,1568},{28,63,1890},{23,62,17},{23,62,17},{23,47,113},{3,63,1154},{11,49,12},{38,63,125},{38,63,125},{38,63,125},{37,54,2},{56,16,1152},{25,61,1},{25,61,1},{14,48,5},{62,33,1152},{14,48,5},{63,41,1568},{46,63,585},{42,62,4},{22,62,4},{63,41,1568},{63,52,1568},{22,62,4},{0,55,1576},{63,52,1568},{0,55,1576},{30,0,1568},
+{30,0,1568},{30,0,1568},{30,0,1568},{23,63,4},{23,63,4},{23,63,4},{23,44,0},{9,51,1},{9,51,1},{39,63,7700},{35,63,4026},{34,63,2514},{32,62,1611},{37,63,8485},{29,63,2424},{26,63,20},{23,55,1896},{15,63,6115},{9,56,1308},{46,63,2973},{43,63,1034},{40,63,169},{37,61,194},{63,27,4803},{32,63,2024},{27,63,1},{2,57,1253},{63,45,4803},{2,57,1253},{34,63,2514},{34,63,2514},{34,63,2514},
+{32,60,1570},{29,63,2056},{26,63,20},{26,63,20},{24,49,116},{7,63,1164},{13,51,12},{40,63,169},{40,63,169},{40,63,169},{39,56,2},{59,16,1152},{27,63,1},{27,63,1},{16,50,4},{62,36,1152},{16,50,4},{63,45,1250},{48,63,500},{45,63,1},{26,63,0},{63,45,1250},{47,62,1250},{26,63,0},{0,57,1252},{47,62,1250},{0,57,1252},{32,0,1570},{32,0,1570},{32,0,1570},{32,0,1570},{26,63,20},
+{26,63,20},{26,63,20},{25,46,0},{11,53,1},{11,53,1},{40,63,7062},{37,63,3915},{37,63,2690},{34,63,1579},{37,63,7765},{30,63,2178},{28,63,77},{26,56,1437},{18,63,5499},{11,57,918},{48,63,2504},{44,63,945},{43,63,225},{40,62,89},{63,31,4056},{35,63,1656},{30,63,9},{6,58,885},{63,47,4056},{6,58,885},{37,63,2690},{37,63,2690},{37,63,2690},{34,62,1570},{33,63,2227},{28,63,77},{28,63,77},
+{26,51,116},{11,63,1227},{15,53,12},{43,63,225},{43,63,225},{43,63,225},{41,58,2},{62,16,1152},{30,63,9},{30,63,9},{18,52,4},{62,39,1152},{18,52,4},{63,47,884},{51,63,356},{47,63,4},{32,63,0},{63,47,884},{63,55,884},{32,63,0},{0,58,884},{63,55,884},{0,58,884},{34,0,1570},{34,0,1570},{34,0,1570},{34,0,1570},{28,63,41},{28,63,41},{28,63,41},{27,48,1},{13,55,1},
+{13,55,1},{43,63,6493},{40,63,3882},{39,63,2880},{36,63,1584},{40,63,6982},{34,63,1966},{31,63,206},{28,57,971},{23,63,4927},{14,59,562},{50,63,2070},{46,63,842},{46,63,313},{43,62,25},{63,35,3318},{38,63,1326},{34,63,45},{11,59,545},{63,49,3318},{11,59,545},{39,63,2880},{39,63,2880},{39,63,2880},{36,63,1584},{34,63,2434},{31,63,206},{31,63,206},{29,53,114},{15,63,1329},{17,55,14},{46,63,313},
+{46,63,313},{46,63,313},{43,61,0},{63,21,1152},{34,63,45},{34,63,45},{19,55,1},{63,42,1152},{19,55,1},{63,51,545},{54,63,225},{51,63,1},{39,63,0},{63,51,545},{63,57,545},{39,63,0},{0,59,545},{63,57,545},{0,59,545},{36,0,1568},{36,0,1568},{36,0,1568},{36,0,1568},{31,63,85},{31,63,85},{31,63,85},{29,50,1},{15,57,1},{15,57,1},{45,63,6113},{43,63,3938},{40,63,3065},
+{38,63,1649},{43,63,6422},{35,63,1878},{34,63,365},{30,59,651},{26,63,4495},{16,60,318},{51,63,1698},{48,63,794},{48,63,394},{45,63,0},{63,39,2753},{41,63,1094},{38,63,106},{15,60,313},{47,59,2753},{15,60,313},{40,63,3065},{40,63,3065},{40,63,3065},{38,63,1649},{37,63,2610},{34,63,365},{34,63,365},{31,55,114},{20,63,1483},{19,57,14},{48,63,394},{48,63,394},{48,63,394},{45,63,0},{63,27,1152},
+{38,63,106},{38,63,106},{21,57,1},{63,45,1152},{21,57,1},{63,54,313},{55,63,130},{54,63,1},{45,63,0},{63,54,313},{62,59,313},{45,63,0},{0,60,313},{62,59,313},{0,60,313},{38,0,1568},{38,0,1568},{38,0,1568},{38,0,1568},{33,63,128},{33,63,128},{33,63,128},{31,52,1},{17,59,1},{17,59,1},{46,63,5677},{43,63,3954},{43,63,3225},{40,63,1764},{45,63,5985},{37,63,1830},{37,63,605},
+{32,60,410},{29,63,4159},{19,61,146},{53,63,1454},{51,63,770},{50,63,493},{47,63,25},{63,43,2273},{44,63,926},{41,63,194},{19,61,145},{63,53,2273},{19,61,145},{43,63,3225},{43,63,3225},{43,63,3225},{40,63,1764},{40,63,2818},{37,63,605},{37,63,605},{33,57,113},{23,63,1659},{21,59,14},{50,63,493},{50,63,493},{50,63,493},{47,63,25},{63,33,1152},{41,63,194},{41,63,194},{23,59,1},{47,56,1152},
+{23,59,1},{63,57,145},{58,63,58},{57,63,1},{50,63,1},{63,57,145},{63,60,145},{50,63,1},{0,61,145},{63,60,145},{0,61,145},{40,0,1568},{40,0,1568},{40,0,1568},{40,0,1568},{34,63,185},{34,63,185},{34,63,185},{33,54,0},{19,61,1},{19,61,1},{48,63,5379},{46,63,3978},{46,63,3449},{43,63,1924},{46,63,5542},{40,63,1870},{38,63,890},{34,61,225},{32,63,3919},{22,62,43},{54,63,1242},
+{53,63,809},{51,63,610},{49,63,101},{63,47,1878},{48,63,810},{44,63,314},{23,62,41},{63,55,1878},{23,62,41},{46,63,3449},{46,63,3449},{46,63,3449},{43,63,1924},{43,63,3058},{38,63,890},{38,63,890},{35,59,113},{27,63,1889},{23,61,14},{51,63,610},{51,63,610},{51,63,610},{49,63,101},{63,39,1152},{44,63,314},{44,63,314},{25,61,1},{47,59,1152},{25,61,1},{63,60,41},{60,63,17},{60,63,1},
+{56,63,1},{63,60,41},{62,62,41},{56,63,1},{0,62,41},{62,62,41},{0,62,41},{42,0,1568},{42,0,1568},{42,0,1568},{42,0,1568},{37,63,233},{37,63,233},{37,63,233},{35,56,0},{21,63,1},{21,63,1},{50,63,5170},{48,63,4080},{48,63,3680},{46,63,2195},{48,63,5200},{43,63,2023},{41,63,1269},{37,62,133},{35,63,3772},{25,63,17},{57,63,1088},{55,63,861},{54,63,720},{52,63,241},{63,51,1536},
+{51,63,768},{49,63,461},{28,63,0},{63,57,1536},{28,63,0},{48,63,3680},{48,63,3680},{48,63,3680},{46,63,2195},{46,63,3345},{41,63,1269},{41,63,1269},{37,61,114},{32,63,2200},{25,63,17},{54,63,720},{54,63,720},{54,63,720},{52,63,241},{61,49,1152},{49,63,461},{49,63,461},{28,63,0},{63,54,1152},{28,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},
+{0,63,0},{63,63,0},{0,63,0},{44,0,1570},{44,0,1570},{44,0,1570},{44,0,1570},{40,63,317},{40,63,317},{40,63,317},{37,58,2},{25,63,17},{25,63,17},{51,63,4416},{50,63,3629},{48,63,3296},{47,63,2070},{50,63,4411},{46,63,1823},{43,63,1150},{39,63,50},{38,63,3132},{29,63,52},{57,63,768},{57,63,576},{56,63,505},{54,63,160},{63,53,1068},{54,63,544},{51,63,320},{33,63,1},{63,58,1068},
+{33,63,1},{48,63,3296},{48,63,3296},{48,63,3296},{47,63,2070},{46,63,2881},{43,63,1150},{43,63,1150},{39,62,42},{34,63,1846},{29,63,52},{56,63,505},{56,63,505},{56,63,505},{54,63,160},{63,48,800},{51,63,320},{51,63,320},{33,63,1},{62,56,800},{33,63,1},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{46,0,1570},
+{46,0,1570},{46,0,1570},{46,0,1570},{42,63,410},{42,63,410},{42,63,410},{39,60,2},{29,63,52},{29,63,52},{53,63,3826},{51,63,3136},{51,63,2880},{49,63,1961},{51,63,3648},{46,63,1615},{46,63,1086},{41,63,5},{41,63,2588},{32,63,116},{59,63,498},{57,63,384},{57,63,320},{56,63,116},{63,55,683},{55,63,342},{54,63,208},{39,63,1},{63,59,683},{39,63,1},{51,63,2880},{51,63,2880},{51,63,2880},
+{49,63,1961},{48,63,2448},{46,63,1086},{46,63,1086},{41,63,5},{37,63,1558},{32,63,116},{57,63,320},{57,63,320},{57,63,320},{56,63,116},{63,51,512},{54,63,208},{54,63,208},{39,63,1},{63,57,512},{39,63,1},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{48,0,1568},{48,0,1568},{48,0,1568},{48,0,1568},{45,63,514},
+{45,63,514},{45,63,514},{41,62,2},{32,63,116},{32,63,116},{54,63,3232},{53,63,2781},{53,63,2585},{51,63,1856},{53,63,3067},{48,63,1456},{48,63,1056},{43,63,10},{44,63,2140},{35,63,212},{60,63,272},{59,63,221},{59,63,185},{57,63,64},{63,57,384},{57,63,192},{55,63,125},{45,63,1},{63,60,384},{45,63,1},{53,63,2585},{53,63,2585},{53,63,2585},{51,63,1856},{51,63,2112},{48,63,1056},{48,63,1056},
+{43,63,10},{40,63,1334},{35,63,212},{59,63,185},{59,63,185},{59,63,185},{57,63,64},{63,54,288},{55,63,125},{55,63,125},{45,63,1},{62,59,288},{45,63,1},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{50,0,1568},{50,0,1568},{50,0,1568},{50,0,1568},{46,63,605},{46,63,605},{46,63,605},{43,63,10},{35,63,212},
+{35,63,212},{0,51,2665},{0,36,306},{0,26,5},{0,22,965},{0,34,5885},{0,22,3726},{0,21,1754},{0,14,4398},{0,16,6359},{0,13,4722},{0,51,2665},{0,36,306},{0,26,5},{0,22,965},{17,0,5885},{0,22,3726},{0,21,1754},{0,14,4398},{34,0,5885},{0,14,4398},{0,25,0},{0,25,0},{0,25,0},{0,12,1},{0,12,545},{0,11,212},{0,11,212},{0,6,337},{0,5,605},{0,5,374},{0,25,0},
+{0,25,0},{0,25,0},{0,12,1},{6,0,545},{0,11,212},{0,11,212},{0,6,337},{12,0,545},{0,6,337},{26,0,2665},{0,36,306},{0,26,5},{0,22,965},{26,0,2665},{51,0,2665},{0,22,965},{0,17,2665},{51,0,2665},{0,17,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,57,2665},{0,39,202},{0,29,13},
+{0,24,818},{0,39,6669},{0,25,3974},{0,22,1790},{0,16,4826},{0,18,7285},{0,16,5267},{0,57,2665},{0,39,202},{0,29,13},{0,24,818},{19,1,6669},{0,25,3974},{0,22,1790},{0,16,4826},{39,0,6669},{0,16,4826},{0,31,0},{0,31,0},{0,31,0},{0,15,1},{0,15,841},{0,12,337},{0,12,337},{0,7,493},{0,7,934},{0,7,574},{0,31,0},{0,31,0},{0,31,0},{0,15,1},{8,0,841},
+{0,12,337},{0,12,337},{0,7,493},{15,0,841},{0,7,493},{29,0,2665},{0,39,202},{1,28,2},{0,24,818},{29,0,2665},{57,0,2665},{0,24,818},{0,19,2665},{57,0,2665},{0,19,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,63,2665},{0,44,116},{1,31,66},{0,27,698},{0,42,7541},{0,28,4254},{0,25,1854},
+{0,18,5281},{0,19,8271},{0,16,5795},{0,63,2665},{0,44,116},{1,31,50},{0,27,698},{15,13,7538},{0,28,4254},{0,25,1854},{0,18,5281},{31,6,7538},{0,18,5281},{0,36,1},{0,36,1},{0,36,1},{0,18,1},{0,18,1201},{0,14,468},{0,14,468},{0,8,730},{0,8,1325},{0,8,830},{0,36,1},{0,36,1},{0,36,1},{0,18,1},{9,0,1201},{0,14,468},{0,14,468},{0,8,730},{18,0,1201},
+{0,8,730},{32,0,2665},{0,44,116},{3,30,2},{0,27,698},{32,0,2665},{63,0,2665},{0,27,698},{0,21,2665},{63,0,2665},{0,21,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{1,63,2781},{0,47,52},{1,33,148},{0,29,610},{0,46,8493},{0,31,4566},{0,27,1962},{0,18,5809},{0,22,9367},{0,18,6385},{2,63,2753},
+{0,47,52},{2,33,129},{0,29,610},{23,0,8493},{0,31,4566},{0,27,1962},{0,18,5809},{46,0,8493},{0,18,5809},{0,42,1},{0,42,1},{0,42,1},{0,21,1},{0,21,1625},{0,16,637},{0,16,637},{0,10,965},{0,10,1806},{0,10,1134},{0,42,1},{0,42,1},{0,42,1},{0,21,1},{11,0,1625},{0,16,637},{0,16,637},{0,10,965},{21,0,1625},{0,10,965},{35,0,2665},{0,47,52},{5,32,1},
+{0,29,610},{35,0,2665},{63,3,2665},{0,29,610},{0,23,2665},{63,3,2665},{0,23,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{2,63,3105},{0,51,10},{1,35,297},{0,32,481},{0,51,9670},{0,33,4965},{0,30,2120},{0,21,6413},{0,24,10749},{0,19,7191},{3,63,2989},{0,51,10},{2,36,257},{0,32,481},{25,1,9669},
+{0,33,4965},{0,30,2120},{0,21,6413},{47,2,9669},{0,21,6413},{0,49,0},{0,49,0},{0,49,0},{0,25,1},{0,25,2178},{0,19,850},{0,19,850},{0,11,1325},{0,11,2406},{0,10,1521},{0,49,0},{0,49,0},{0,49,0},{0,25,1},{12,1,2178},{0,19,850},{0,19,850},{0,11,1325},{25,0,2178},{0,11,1325},{38,0,2665},{0,51,10},{8,34,2},{0,32,481},{38,0,2665},{62,7,2665},{0,32,481},
+{0,25,2669},{62,7,2665},{0,25,2669},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{3,63,3437},{1,54,33},{3,37,425},{1,33,457},{0,57,9670},{0,36,4629},{0,31,1654},{0,24,6165},{0,26,11014},{0,22,7031},{6,63,3101},{2,53,10},{4,38,257},{1,33,441},{28,1,9669},{0,36,4629},{0,31,1654},{0,24,6165},{47,5,9669},
+{0,24,6165},{1,53,32},{1,53,32},{1,53,32},{1,27,32},{0,31,2178},{0,22,666},{0,22,666},{0,13,1160},{0,14,2534},{0,13,1449},{2,51,0},{2,51,0},{2,51,0},{2,27,1},{15,1,2178},{0,22,666},{0,22,666},{0,13,1160},{31,0,2178},{0,13,1160},{41,0,2665},{1,54,1},{10,36,2},{0,33,394},{41,0,2665},{62,10,2665},{0,33,394},{0,27,2669},{62,10,2665},{0,27,2669},{1,0,32},
+{1,0,32},{1,0,32},{1,0,32},{0,6,0},{0,6,0},{0,6,0},{0,3,0},{0,2,13},{0,2,13},{6,63,3917},{2,57,129},{3,40,609},{1,36,497},{0,63,9670},{0,39,4325},{0,33,1274},{0,25,5878},{0,28,11299},{0,24,6917},{8,63,3233},{4,55,10},{6,40,257},{3,35,441},{31,1,9669},{0,39,4325},{0,33,1274},{0,25,5878},{47,8,9669},{0,25,5878},{2,57,128},{2,57,128},{2,57,128},
+{2,30,129},{0,36,2180},{0,28,490},{0,28,490},{0,16,1018},{0,16,2691},{0,14,1441},{4,53,0},{4,53,0},{4,53,0},{4,29,1},{15,7,2178},{0,28,490},{0,28,490},{0,16,1018},{31,3,2178},{0,16,1018},{44,0,2665},{3,56,1},{12,38,2},{0,36,306},{44,0,2665},{62,13,2665},{0,36,306},{0,29,2669},{62,13,2665},{0,29,2669},{2,0,128},{2,0,128},{2,0,128},{2,0,128},{0,12,0},
+{0,12,0},{0,12,0},{0,6,0},{0,5,45},{0,5,45},{6,63,4541},{3,59,297},{5,42,865},{3,38,625},{2,63,9749},{0,45,4021},{0,36,914},{0,27,5581},{0,31,11611},{0,27,6877},{11,63,3377},{6,57,10},{8,42,257},{5,37,441},{34,0,9669},{0,45,4021},{0,36,914},{0,27,5581},{47,11,9669},{0,27,5581},{3,61,288},{3,61,288},{3,61,288},{3,32,288},{0,42,2180},{0,31,338},{0,31,338},
+{0,18,865},{0,19,2875},{0,16,1427},{6,55,0},{6,55,0},{6,55,0},{6,31,1},{15,13,2178},{0,31,338},{0,31,338},{0,18,865},{31,6,2178},{0,18,865},{47,0,2665},{5,58,1},{14,40,2},{0,38,225},{47,0,2665},{62,16,2665},{0,38,225},{0,31,2669},{62,16,2665},{0,31,2669},{3,0,288},{3,0,288},{3,0,288},{3,0,288},{0,18,0},{0,18,0},{0,18,0},{0,9,0},{0,8,109},
+{0,8,109},{9,63,5374},{4,62,570},{6,44,1269},{4,40,841},{3,63,10021},{0,47,3745},{0,39,593},{0,30,5294},{0,36,12029},{0,28,6810},{14,63,3558},{8,59,9},{10,44,254},{7,39,446},{37,1,9669},{0,47,3745},{0,39,593},{0,30,5294},{63,6,9669},{0,30,5294},{4,63,561},{4,63,561},{4,63,561},{4,35,546},{0,49,2178},{0,34,218},{0,34,218},{0,21,725},{0,22,3117},{0,19,1433},{8,58,1},
+{8,58,1},{8,58,1},{8,33,2},{24,1,2178},{0,34,218},{0,34,218},{0,21,725},{49,0,2178},{0,21,725},{50,0,2665},{7,60,1},{16,42,5},{0,41,157},{50,0,2665},{62,19,2665},{0,41,157},{0,33,2669},{62,19,2665},{0,33,2669},{4,0,545},{4,0,545},{4,0,545},{4,0,545},{0,25,0},{0,25,0},{0,25,0},{0,12,1},{0,11,212},{0,11,212},{11,63,6350},{6,63,905},{7,47,1678},
+{4,42,1102},{6,63,10453},{0,50,3485},{0,42,361},{0,32,5054},{0,36,12429},{0,31,6794},{15,63,3710},{10,61,9},{12,46,254},{9,41,446},{40,1,9669},{0,50,3485},{0,42,361},{0,32,5054},{63,9,9669},{0,32,5054},{6,63,901},{6,63,901},{6,63,901},{5,37,842},{0,55,2178},{0,38,125},{0,38,125},{0,24,629},{0,25,3365},{0,22,1489},{10,60,1},{10,60,1},{10,60,1},{10,35,2},{27,1,2178},
+{0,38,125},{0,38,125},{0,24,629},{55,0,2178},{0,24,629},{53,0,2665},{9,62,1},{18,44,5},{0,43,117},{53,0,2665},{62,22,2665},{0,43,117},{0,35,2669},{62,22,2665},{0,35,2669},{5,0,841},{5,0,841},{5,0,841},{5,0,841},{0,31,0},{0,31,0},{0,31,0},{0,15,1},{0,12,337},{0,12,337},{12,63,7350},{7,63,1450},{9,48,2190},{6,44,1422},{6,63,11045},{0,53,3293},{0,45,193},
+{0,35,4870},{0,39,12829},{0,32,6807},{17,63,3905},{12,63,9},{14,48,267},{11,43,446},{43,1,9669},{0,53,3293},{0,45,193},{0,35,4870},{63,12,9669},{0,35,4870},{7,63,1369},{7,63,1369},{7,63,1369},{6,40,1202},{0,61,2178},{0,42,53},{0,42,53},{0,25,520},{0,28,3645},{0,24,1573},{12,62,1},{12,62,1},{12,62,1},{12,37,2},{30,1,2178},{0,42,53},{0,42,53},{0,25,520},{61,0,2178},
+{0,25,520},{56,0,2665},{12,63,8},{20,46,5},{0,45,72},{56,0,2665},{62,25,2665},{0,45,72},{0,37,2669},{62,25,2665},{0,37,2669},{6,0,1201},{6,0,1201},{6,0,1201},{6,0,1201},{0,36,1},{0,36,1},{0,36,1},{0,18,1},{0,14,468},{0,14,468},{14,63,8614},{9,63,2129},{9,51,2758},{6,47,1822},{9,63,11765},{0,57,3125},{0,47,81},{0,36,4629},{0,42,13261},{0,35,6855},{20,63,4081},
+{15,63,49},{16,50,257},{13,45,446},{46,1,9669},{0,57,3125},{0,47,81},{0,36,4629},{63,15,9669},{0,36,4629},{9,63,1933},{9,63,1933},{9,63,1933},{7,42,1626},{1,63,2212},{0,45,13},{0,45,13},{0,27,421},{0,31,3957},{0,25,1673},{14,63,2},{14,63,2},{14,63,2},{14,39,2},{33,1,2178},{0,45,13},{0,45,13},{0,27,421},{63,2,2178},{0,27,421},{59,0,2665},{15,63,40},{22,48,2},
+{0,47,45},{59,0,2665},{62,28,2665},{0,47,45},{0,39,2669},{62,28,2665},{0,39,2669},{7,0,1625},{7,0,1625},{7,0,1625},{7,0,1625},{0,42,1},{0,42,1},{0,42,1},{0,21,1},{0,16,637},{0,16,637},{15,63,10085},{11,63,3185},{11,53,3481},{8,49,2337},{11,63,12845},{0,62,2958},{0,50,14},{0,38,4381},{0,45,13802},{0,36,6942},{23,63,4318},{17,63,154},{18,52,254},{14,48,456},{49,1,9669},
+{0,62,2958},{0,50,14},{0,38,4381},{46,27,9669},{0,38,4381},{10,63,2717},{10,63,2717},{10,63,2717},{8,45,2180},{3,63,2394},{0,49,0},{0,49,0},{0,30,317},{0,33,4314},{0,28,1811},{17,63,10},{17,63,10},{17,63,10},{16,41,1},{31,12,2178},{0,49,0},{0,49,0},{0,30,317},{62,6,2178},{0,30,317},{62,1,2665},{20,63,113},{24,50,4},{0,50,13},{62,1,2665},{63,31,2665},{0,50,13},
+{0,42,2669},{63,31,2665},{0,42,2669},{8,0,2180},{8,0,2180},{8,0,2180},{8,0,2180},{0,49,0},{0,49,0},{0,49,0},{0,25,1},{0,19,850},{0,19,850},{17,63,11454},{12,63,4143},{12,55,4141},{9,51,2805},{12,63,13803},{0,63,2871},{1,52,18},{0,41,4182},{0,50,14186},{0,39,6911},{26,63,4550},{20,63,306},{20,54,254},{17,49,446},{52,1,9669},{0,63,2870},{1,52,9},{0,41,4181},{46,30,9669},
+{0,41,4181},{12,63,3414},{12,63,3414},{12,63,3414},{9,47,2673},{3,63,2691},{1,52,14},{1,52,14},{0,32,245},{0,36,4587},{0,31,1906},{19,63,25},{19,63,25},{19,63,25},{18,43,1},{31,18,2178},{2,51,0},{2,51,0},{0,32,244},{62,9,2178},{0,32,244},{63,5,2665},{23,63,193},{26,52,4},{0,52,4},{63,5,2665},{63,34,2665},{0,52,4},{0,44,2669},{63,34,2665},{0,44,2669},{9,0,2669},
+{9,0,2669},{9,0,2669},{9,0,2669},{0,55,1},{0,55,1},{0,55,1},{0,28,2},{0,22,1009},{0,22,1009},{20,63,11990},{15,63,4575},{14,57,4141},{11,53,2805},{15,63,14195},{3,63,3015},{3,54,18},{0,43,4122},{0,53,13674},{0,41,6249},{29,63,4814},{23,63,522},{22,56,254},{19,51,446},{55,1,9669},{3,63,3006},{3,54,9},{0,43,4041},{47,32,9669},{0,43,4041},{14,63,3561},{14,63,3561},{14,63,3561},
+{11,49,2670},{6,63,2795},{3,54,14},{3,54,14},{2,34,245},{0,39,4227},{0,33,1470},{21,63,58},{21,63,58},{21,63,58},{20,45,1},{34,17,2178},{4,53,0},{4,53,0},{0,35,180},{62,12,2178},{0,35,180},{63,11,2665},{26,63,305},{28,54,4},{2,54,4},{63,11,2665},{63,37,2665},{2,54,4},{0,46,2669},{63,37,2665},{0,46,2669},{11,0,2669},{11,0,2669},{11,0,2669},{11,0,2669},{2,57,1},
+{2,57,1},{2,57,1},{2,30,2},{0,25,801},{0,25,801},{22,63,12554},{17,63,5066},{16,59,4118},{13,55,2805},{17,63,14614},{6,63,3255},{5,56,18},{1,45,4078},{0,56,13194},{0,44,5633},{31,63,5090},{26,63,802},{24,58,254},{21,53,446},{58,1,9669},{7,63,3198},{5,56,9},{0,45,3846},{53,32,9669},{0,45,3846},{15,63,3710},{15,63,3710},{15,63,3710},{13,51,2670},{9,63,2931},{5,56,14},{5,56,14},
+{4,36,245},{0,42,3899},{0,36,1110},{23,63,90},{23,63,90},{23,63,90},{22,47,1},{37,17,2178},{6,55,0},{6,55,0},{0,37,136},{62,15,2178},{0,37,136},{63,17,2665},{30,63,442},{30,56,4},{4,56,4},{63,17,2665},{63,40,2665},{4,56,4},{0,47,2677},{63,40,2665},{0,47,2677},{13,0,2669},{13,0,2669},{13,0,2669},{13,0,2669},{4,59,1},{4,59,1},{4,59,1},{4,31,5},{0,28,625},
+{0,28,625},{23,63,13130},{20,63,5706},{18,61,4122},{15,57,2807},{20,63,15102},{9,63,3625},{7,59,15},{3,47,4086},{0,59,12686},{0,47,5027},{34,63,5386},{29,63,1169},{26,60,257},{23,56,446},{62,0,9669},{12,63,3469},{7,59,11},{0,47,3658},{62,31,9669},{0,47,3658},{17,63,3905},{17,63,3905},{17,63,3905},{16,53,2677},{12,63,3112},{7,58,9},{7,58,9},{6,38,246},{0,45,3576},{0,38,755},{26,63,136},
+{26,63,136},{26,63,136},{24,49,4},{49,0,2178},{8,57,2},{8,57,2},{0,39,85},{62,18,2178},{0,39,85},{63,23,2665},{34,63,628},{32,59,2},{4,59,2},{63,23,2665},{63,43,2665},{4,59,2},{0,50,2669},{63,43,2665},{0,50,2669},{15,0,2677},{15,0,2677},{15,0,2677},{15,0,2677},{7,60,4},{7,60,4},{7,60,4},{7,33,5},{0,33,424},{0,33,424},{26,63,13650},{23,63,6378},{20,63,4122},
+{17,59,2799},{23,63,15558},{12,63,4065},{9,61,15},{6,49,4074},{0,62,12278},{0,49,4534},{36,63,5698},{30,63,1556},{28,62,257},{25,58,446},{63,4,9669},{15,63,3749},{9,61,11},{0,49,3510},{62,34,9669},{0,49,3510},{20,63,4041},{20,63,4041},{20,63,4041},{17,56,2670},{15,63,3304},{9,60,9},{9,60,9},{8,40,246},{0,50,3317},{0,41,499},{29,63,200},{29,63,200},{29,63,200},{26,51,4},{52,0,2178},
+{10,59,2},{10,59,2},{0,42,45},{62,21,2178},{0,42,45},{63,29,2665},{37,63,820},{34,61,2},{6,61,2},{63,29,2665},{63,46,2665},{6,61,2},{0,52,2669},{63,46,2665},{0,52,2669},{17,0,2669},{17,0,2669},{17,0,2669},{17,0,2669},{9,62,4},{9,62,4},{9,62,4},{9,35,5},{0,36,296},{0,36,296},{29,63,14234},{23,63,7050},{23,63,4242},{19,61,2799},{26,63,16046},{15,63,4601},{11,63,15},
+{8,51,4074},{0,63,12051},{0,50,4110},{37,63,6002},{34,63,1989},{31,63,270},{27,60,446},{63,10,9669},{20,63,4081},{11,63,11},{0,52,3374},{62,37,9669},{0,52,3374},{23,63,4241},{23,63,4241},{23,63,4241},{19,58,2670},{17,63,3485},{11,62,9},{11,62,9},{10,42,246},{0,53,3069},{0,43,306},{31,63,269},{31,63,269},{31,63,269},{28,53,4},{55,0,2178},{12,61,2},{12,61,2},{0,44,18},{62,24,2178},
+{0,44,18},{63,35,2665},{41,63,1037},{36,63,2},{8,63,2},{63,35,2665},{63,49,2665},{8,63,2},{0,54,2669},{63,49,2665},{0,54,2669},{19,0,2669},{19,0,2669},{19,0,2669},{19,0,2669},{11,63,5},{11,63,5},{11,63,5},{11,37,5},{0,40,193},{0,40,193},{31,63,13639},{26,63,7005},{25,63,4454},{21,62,2721},{28,63,15204},{17,63,4285},{13,63,24},{9,52,3457},{0,63,11020},{0,52,3109},{40,63,5381},
+{34,63,1890},{33,63,346},{29,61,296},{62,16,8712},{23,63,3636},{14,63,2},{0,53,2676},{62,39,8712},{0,53,2676},{25,63,4454},{25,63,4454},{25,63,4454},{21,60,2670},{19,63,3707},{13,63,24},{13,63,24},{12,44,246},{0,56,2853},{0,45,153},{33,63,346},{33,63,346},{33,63,346},{30,55,4},{58,0,2178},{14,63,2},{14,63,2},{0,46,10},{62,27,2178},{0,46,10},{63,39,2178},{44,63,872},{39,63,1},
+{14,63,1},{63,39,2178},{47,59,2178},{14,63,1},{0,55,2180},{47,59,2178},{0,55,2180},{21,0,2669},{21,0,2669},{21,0,2669},{21,0,2669},{13,63,20},{13,63,20},{13,63,20},{13,39,5},{0,45,104},{0,45,104},{33,63,12766},{29,63,6930},{26,63,4694},{24,63,2685},{29,63,14014},{20,63,3898},{16,63,101},{13,53,2722},{3,63,10057},{0,54,2129},{43,63,4689},{37,63,1718},{36,63,452},{32,62,164},{63,19,7578},
+{26,63,3078},{18,63,17},{0,55,1905},{63,41,7578},{0,55,1905},{26,63,4694},{26,63,4694},{26,63,4694},{24,61,2670},{22,63,3960},{16,63,101},{16,63,101},{14,47,242},{0,59,2650},{0,48,49},{36,63,452},{36,63,452},{36,63,452},{33,57,4},{53,16,2178},{18,63,17},{18,63,17},{0,49,1},{63,30,2178},{0,49,1},{63,42,1625},{46,63,650},{42,63,0},{21,63,0},{63,42,1625},{62,53,1625},{21,63,0},
+{0,56,1625},{62,53,1625},{0,56,1625},{24,0,2669},{24,0,2669},{24,0,2669},{24,0,2669},{15,63,50},{15,63,50},{15,63,50},{15,42,2},{0,48,40},{0,48,40},{34,63,11970},{31,63,6969},{29,63,4878},{26,63,2670},{31,63,13161},{21,63,3638},{18,63,229},{15,55,2146},{6,63,9313},{0,56,1410},{43,63,4097},{40,63,1614},{37,63,541},{34,62,69},{58,32,6661},{29,63,2654},{21,63,65},{0,56,1346},{52,48,6661},
+{0,56,1346},{29,63,4878},{29,63,4878},{29,63,4878},{26,63,2670},{23,63,4206},{18,63,229},{18,63,229},{15,49,242},{0,62,2506},{0,51,9},{37,63,541},{37,63,541},{37,63,541},{35,59,4},{56,16,2178},{21,63,65},{21,63,65},{2,51,1},{62,33,2178},{2,51,1},{63,45,1201},{48,63,481},{45,63,0},{27,63,0},{63,45,1201},{63,54,1201},{27,63,0},{0,57,1201},{63,54,1201},{0,57,1201},{26,0,2669},
+{26,0,2669},{26,0,2669},{26,0,2669},{17,63,85},{17,63,85},{17,63,85},{16,44,4},{0,51,8},{0,51,8},{37,63,11370},{33,63,6958},{31,63,5145},{28,63,2718},{34,63,12263},{23,63,3410},{21,63,405},{17,56,1665},{9,63,8665},{0,57,905},{46,63,3585},{43,63,1574},{40,63,637},{36,63,20},{63,27,5829},{32,63,2294},{26,63,130},{0,57,901},{63,45,5829},{0,57,901},{31,63,5145},{31,63,5145},{31,63,5145},
+{28,63,2718},{26,63,4430},{21,63,405},{21,63,405},{18,50,246},{0,63,2520},{2,53,9},{40,63,637},{40,63,637},{40,63,637},{37,61,4},{59,16,2178},{26,63,130},{26,63,130},{4,53,1},{62,36,2178},{4,53,1},{63,48,841},{51,63,337},{48,63,1},{33,63,0},{63,48,841},{62,56,841},{33,63,0},{0,58,841},{62,56,841},{0,58,841},{28,0,2669},{28,0,2669},{28,0,2669},{28,0,2669},{20,63,117},
+{20,63,117},{20,63,117},{18,46,4},{1,54,1},{1,54,1},{37,63,10794},{34,63,6895},{34,63,5374},{30,63,2813},{36,63,11574},{26,63,3274},{23,63,622},{19,57,1222},{13,63,8106},{1,59,562},{48,63,3170},{43,63,1494},{43,63,765},{39,63,4},{63,31,5082},{34,63,1998},{29,63,218},{0,59,554},{63,47,5082},{0,59,554},{34,63,5374},{34,63,5374},{34,63,5374},{30,63,2813},{29,63,4686},{23,63,622},{23,63,622},
+{20,52,246},{4,63,2714},{4,55,9},{43,63,765},{43,63,765},{43,63,765},{39,63,4},{62,16,2178},{29,63,218},{29,63,218},{6,55,1},{62,39,2178},{6,55,1},{63,51,545},{54,63,225},{51,63,1},{39,63,0},{63,51,545},{63,57,545},{39,63,0},{0,59,545},{63,57,545},{0,59,545},{30,0,2669},{30,0,2669},{30,0,2669},{30,0,2669},{22,63,180},{22,63,180},{22,63,180},{21,47,5},{3,56,1},
+{3,56,1},{40,63,10197},{37,63,6930},{36,63,5678},{32,63,2993},{37,63,10780},{29,63,3229},{26,63,945},{21,59,853},{17,63,7593},{4,60,293},{48,63,2756},{46,63,1454},{45,63,914},{41,63,37},{63,35,4344},{38,63,1740},{32,63,360},{2,60,289},{63,49,4344},{2,60,289},{36,63,5678},{36,63,5678},{36,63,5678},{32,63,2993},{31,63,5067},{26,63,945},{26,63,945},{22,55,242},{9,63,2979},{6,57,10},{45,63,914},
+{45,63,914},{45,63,914},{41,63,37},{63,21,2178},{32,63,360},{32,63,360},{8,57,1},{63,42,2178},{8,57,1},{63,54,288},{55,63,125},{54,63,0},{45,63,1},{63,54,288},{62,59,288},{45,63,1},{0,60,288},{62,59,288},{0,60,288},{32,0,2669},{32,0,2669},{32,0,2669},{32,0,2669},{25,63,250},{25,63,250},{25,63,250},{23,50,2},{5,59,2},{5,59,2},{43,63,9837},{40,63,7042},{37,63,5945},
+{34,63,3198},{40,63,10204},{32,63,3344},{29,63,1289},{24,59,589},{20,63,7225},{7,61,130},{51,63,2436},{48,63,1460},{48,63,1060},{43,63,122},{63,39,3779},{41,63,1580},{37,63,505},{6,61,129},{47,59,3779},{6,61,129},{37,63,5945},{37,63,5945},{37,63,5945},{34,63,3198},{34,63,5304},{29,63,1289},{29,63,1289},{24,57,242},{13,63,3261},{8,59,10},{48,63,1060},{48,63,1060},{48,63,1060},{43,63,122},{63,27,2178},
+{37,63,505},{37,63,505},{10,59,1},{63,45,2178},{10,59,1},{63,57,128},{58,63,53},{57,63,0},{51,63,0},{63,57,128},{63,60,128},{51,63,0},{0,61,128},{63,60,128},{0,61,128},{34,0,2669},{34,0,2669},{34,0,2669},{34,0,2669},{27,63,337},{27,63,337},{27,63,337},{25,52,2},{7,61,2},{7,61,2},{43,63,9437},{40,63,7154},{40,63,6193},{37,63,3454},{42,63,9783},{34,63,3398},{32,63,1740},
+{26,61,397},{23,63,6953},{9,62,37},{53,63,2246},{51,63,1508},{50,63,1213},{46,63,250},{63,43,3299},{44,63,1484},{40,63,673},{10,62,33},{63,53,3299},{10,62,33},{40,63,6193},{40,63,6193},{40,63,6193},{37,63,3454},{37,63,5624},{32,63,1740},{32,63,1740},{26,59,242},{17,63,3589},{10,61,10},{50,63,1213},{50,63,1213},{50,63,1213},{46,63,250},{63,33,2178},{40,63,673},{40,63,673},{12,61,1},{47,56,2178},
+{12,61,1},{63,60,32},{61,63,13},{60,63,0},{57,63,0},{63,60,32},{62,62,32},{57,63,0},{0,62,32},{62,62,32},{0,62,32},{36,0,2669},{36,0,2669},{36,0,2669},{36,0,2669},{29,63,405},{29,63,405},{29,63,405},{27,54,2},{9,63,2},{9,63,2},{46,63,9141},{43,63,7234},{43,63,6505},{40,63,3806},{43,63,9340},{37,63,3622},{34,63,2149},{27,62,277},{27,63,6772},{12,63,10},{54,63,2052},
+{53,63,1601},{51,63,1348},{48,63,436},{63,47,2904},{47,63,1452},{44,63,872},{14,63,1},{63,55,2904},{14,63,1},{43,63,6505},{43,63,6505},{43,63,6505},{40,63,3806},{40,63,5976},{34,63,2149},{34,63,2149},{28,61,242},{21,63,3955},{12,63,10},{51,63,1348},{51,63,1348},{51,63,1348},{48,63,436},{63,39,2178},{44,63,872},{44,63,872},{14,63,1},{47,59,2178},{14,63,1},{63,63,0},{63,63,0},{63,63,0},
+{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{38,0,2669},{38,0,2669},{38,0,2669},{38,0,2669},{32,63,520},{32,63,520},{32,63,520},{29,56,2},{12,63,10},{12,63,10},{46,63,8097},{46,63,6510},{43,63,5893},{41,63,3605},{46,63,8074},{37,63,3244},{37,63,2019},{30,62,129},{30,63,5794},{16,63,58},{56,63,1563},{54,63,1161},{54,63,1017},{51,63,337},{63,49,2166},
+{49,63,1083},{46,63,650},{21,63,0},{63,56,2166},{21,63,0},{43,63,5893},{43,63,5893},{43,63,5893},{41,63,3605},{40,63,5238},{37,63,2019},{37,63,2019},{30,62,113},{24,63,3401},{16,63,58},{54,63,1017},{54,63,1017},{54,63,1017},{51,63,337},{63,42,1625},{46,63,650},{46,63,650},{21,63,0},{62,53,1625},{21,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},
+{0,63,0},{63,63,0},{0,63,0},{40,0,2665},{40,0,2665},{40,0,2665},{40,0,2665},{34,63,610},{34,63,610},{34,63,610},{31,58,0},{16,63,58},{16,63,58},{48,63,7165},{46,63,5854},{46,63,5325},{43,63,3434},{46,63,7050},{40,63,2932},{37,63,1955},{32,62,57},{32,63,5021},{20,63,117},{57,63,1137},{54,63,889},{54,63,745},{52,63,250},{63,51,1601},{51,63,801},{48,63,481},{27,63,0},{63,57,1601},
+{27,63,0},{46,63,5325},{46,63,5325},{46,63,5325},{43,63,3434},{43,63,4622},{37,63,1955},{37,63,1955},{32,62,41},{27,63,2977},{20,63,117},{54,63,745},{54,63,745},{54,63,745},{52,63,250},{63,45,1201},{48,63,481},{48,63,481},{27,63,0},{63,54,1201},{27,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{42,0,2665},
+{42,0,2665},{42,0,2665},{42,0,2665},{37,63,730},{37,63,730},{37,63,730},{33,60,1},{20,63,117},{20,63,117},{50,63,6415},{48,63,5277},{48,63,4877},{45,63,3330},{48,63,6117},{43,63,2716},{40,63,1843},{35,63,10},{35,63,4341},{23,63,205},{57,63,801},{57,63,609},{56,63,530},{54,63,169},{63,53,1121},{52,63,571},{51,63,337},{33,63,0},{63,58,1121},{33,63,0},{48,63,4877},{48,63,4877},{48,63,4877},
+{45,63,3330},{45,63,4146},{40,63,1843},{40,63,1843},{35,63,10},{30,63,2617},{23,63,205},{56,63,530},{56,63,530},{56,63,530},{54,63,169},{63,48,841},{51,63,337},{51,63,337},{33,63,0},{62,56,841},{33,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{44,0,2665},{44,0,2665},{44,0,2665},{44,0,2665},{39,63,865},
+{39,63,865},{39,63,865},{35,62,1},{23,63,205},{23,63,205},{51,63,5637},{50,63,4826},{48,63,4445},{47,63,3189},{48,63,5365},{43,63,2524},{43,63,1795},{37,63,5},{37,63,3750},{27,63,320},{59,63,531},{57,63,401},{57,63,337},{55,63,122},{63,55,726},{55,63,363},{54,63,225},{39,63,0},{63,59,726},{39,63,0},{48,63,4445},{48,63,4445},{48,63,4445},{47,63,3189},{46,63,3654},{43,63,1795},{43,63,1795},
+{37,63,5},{32,63,2329},{27,63,320},{57,63,337},{57,63,337},{57,63,337},{55,63,122},{63,51,545},{54,63,225},{54,63,225},{39,63,0},{63,57,545},{39,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{46,0,2665},{46,0,2665},{46,0,2665},{46,0,2665},{40,63,1010},{40,63,1010},{40,63,1010},{37,63,5},{27,63,320},
+{27,63,320},{3,63,10504},{0,62,1552},{0,44,169},{0,38,3866},{0,57,18065},{0,39,12152},{0,35,6099},{0,24,13992},{0,26,19423},{0,22,14922},{6,63,10216},{0,62,1552},{0,44,169},{0,38,3866},{20,17,18065},{0,39,12152},{0,35,6099},{0,24,13992},{57,0,18065},{0,24,13992},{0,35,0},{0,35,0},{0,35,0},{0,17,1},{0,17,1105},{0,14,424},{0,14,424},{0,8,666},{0,8,1217},{0,7,766},{0,35,0},
+{0,35,0},{0,35,0},{0,17,1},{9,0,1105},{0,14,424},{0,14,424},{0,8,666},{17,0,1105},{0,8,666},{34,17,9248},{0,62,1552},{0,44,169},{0,38,3866},{34,17,9248},{62,12,9248},{0,38,3866},{0,28,9256},{62,12,9248},{0,28,9256},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{5,63,11298},{0,63,1341},{0,47,65},
+{0,38,3578},{0,61,19334},{0,42,12584},{0,36,6093},{0,24,14696},{0,28,20850},{0,24,15720},{6,63,10792},{0,63,1341},{0,47,65},{0,38,3578},{30,1,19334},{0,42,12584},{0,36,6093},{0,24,14696},{61,0,19334},{0,24,14696},{0,41,0},{0,41,0},{0,41,0},{0,20,1},{0,20,1513},{0,16,585},{0,16,585},{0,10,901},{0,9,1681},{0,8,1070},{0,41,0},{0,41,0},{0,41,0},{0,20,1},{10,1,1513},
+{0,16,585},{0,16,585},{0,10,901},{20,0,1513},{0,10,901},{37,17,9248},{0,63,1341},{0,47,65},{0,38,3578},{37,17,9248},{62,15,9248},{0,38,3578},{0,30,9256},{62,15,9248},{0,30,9256},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{6,63,12200},{0,63,1325},{0,49,9},{0,41,3298},{0,63,20705},{0,42,13032},{0,38,6147},
+{0,27,15400},{0,31,22426},{0,25,16626},{9,63,11512},{0,63,1325},{0,49,9},{0,41,3298},{31,3,20689},{0,42,13032},{0,38,6147},{0,27,15400},{63,1,20689},{0,27,15400},{0,47,0},{0,47,0},{0,47,0},{0,23,1},{0,23,1985},{0,19,769},{0,19,769},{0,11,1202},{0,11,2193},{0,10,1374},{0,47,0},{0,47,0},{0,47,0},{0,23,1},{12,0,1985},{0,19,769},{0,19,769},{0,11,1202},{23,0,1985},
+{0,11,1202},{48,1,9248},{0,63,1325},{0,49,9},{0,41,3298},{48,1,9248},{63,17,9248},{0,41,3298},{0,32,9250},{63,17,9248},{0,32,9250},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{6,63,13288},{0,63,1565},{0,51,4},{0,44,3050},{2,63,22214},{0,45,13496},{0,41,6227},{0,28,16225},{0,31,24090},{0,27,17528},{9,63,12344},
+{0,63,1565},{0,51,4},{0,44,3050},{34,1,22129},{0,45,13496},{0,41,6227},{0,28,16225},{63,3,22129},{0,28,16225},{0,53,0},{0,53,0},{0,53,0},{0,26,1},{0,26,2521},{0,22,985},{0,22,985},{0,13,1517},{0,11,2801},{0,11,1766},{0,53,0},{0,53,0},{0,53,0},{0,26,1},{13,1,2521},{0,22,985},{0,22,985},{0,13,1517},{26,0,2521},{0,13,1517},{51,1,9248},{1,63,1552},{1,51,0},
+{0,44,3050},{51,1,9248},{63,20,9248},{0,44,3050},{0,34,9250},{63,20,9248},{0,34,9250},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{6,63,14818},{1,63,2106},{0,54,58},{0,47,2792},{3,63,24091},{0,50,14075},{0,42,6341},{0,30,17106},{0,33,26067},{0,28,18692},{12,63,13474},{3,63,2077},{1,54,50},{0,47,2792},{31,12,23851},
+{0,50,14075},{0,42,6341},{0,30,17106},{62,6,23851},{0,30,17106},{0,59,1},{0,59,1},{0,59,1},{0,30,0},{0,30,3200},{0,25,1258},{0,25,1258},{0,13,1940},{0,14,3542},{0,13,2229},{0,59,1},{0,59,1},{0,59,1},{0,30,0},{15,0,3200},{0,25,1258},{0,25,1258},{0,13,1940},{30,0,3200},{0,13,1940},{55,0,9248},{6,63,1885},{3,53,2},{0,47,2792},{55,0,9248},{62,24,9248},{0,47,2792},
+{0,36,9256},{62,24,9248},{0,36,9256},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{9,63,16258},{3,63,2813},{1,56,141},{0,49,2561},{4,63,25971},{0,50,14619},{0,45,6453},{0,32,17996},{0,36,27923},{0,30,19698},{12,63,14594},{3,63,2669},{2,56,122},{0,49,2561},{38,1,25472},{0,50,14619},{0,45,6453},{0,32,17996},{45,16,25472},
+{0,32,17996},{0,63,9},{0,63,9},{0,63,9},{0,33,1},{0,33,3872},{0,25,1530},{0,25,1530},{0,16,2378},{0,14,4294},{0,14,2717},{0,63,9},{0,63,9},{0,63,9},{0,33,1},{16,1,3872},{0,25,1530},{0,25,1530},{0,16,2378},{33,0,3872},{0,16,2378},{58,0,9248},{9,63,2205},{5,55,2},{0,49,2561},{58,0,9248},{62,27,9248},{0,49,2561},{0,38,9256},{62,27,9248},{0,38,9256},{0,0,0},
+{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{9,63,17750},{3,63,3617},{1,58,254},{0,50,2366},{6,63,27563},{0,53,14891},{0,47,6385},{0,32,18616},{0,39,29523},{0,32,20465},{15,63,15654},{6,63,3425},{2,59,206},{0,50,2366},{41,0,26744},{0,53,14891},{0,47,6385},{0,32,18616},{62,10,26744},{0,32,18616},{1,63,101},{1,63,101},{1,63,101},
+{0,36,5},{0,36,4420},{0,28,1666},{0,28,1666},{0,16,2642},{0,16,4931},{0,16,3083},{2,63,72},{2,63,72},{2,63,72},{1,35,4},{15,7,4418},{0,28,1666},{0,28,1666},{0,16,2642},{31,3,4418},{0,16,2642},{61,0,9248},{13,63,2554},{7,57,2},{0,50,2362},{61,0,9248},{62,30,9248},{0,50,2362},{0,40,9256},{62,30,9248},{0,40,9256},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,2,0},
+{0,2,0},{0,2,0},{0,1,0},{0,1,1},{0,1,1},{12,63,19046},{4,63,4598},{3,60,382},{0,52,2237},{6,63,28187},{0,59,14347},{0,50,5579},{0,35,18104},{0,39,29955},{0,33,20313},{17,63,16091},{7,63,4046},{4,61,206},{0,52,2237},{44,0,26744},{0,59,14347},{0,50,5579},{0,35,18104},{62,13,26744},{0,35,18104},{2,63,264},{2,63,264},{2,63,264},{1,38,52},{0,42,4420},{0,33,1381},{0,33,1381},
+{0,18,2405},{0,19,5115},{0,18,2981},{4,63,117},{4,63,117},{4,63,117},{3,37,4},{15,13,4418},{0,33,1381},{0,33,1381},{0,18,2405},{31,6,4418},{0,18,2405},{63,2,9248},{17,63,2938},{9,59,2},{0,52,2137},{63,2,9248},{46,41,9248},{0,52,2137},{0,42,9256},{46,41,9248},{0,42,9256},{1,0,52},{1,0,52},{1,0,52},{1,0,52},{0,8,0},{0,8,0},{0,8,0},{0,4,0},{0,3,20},
+{0,3,20},{12,63,20585},{6,63,5786},{4,63,625},{1,56,2246},{9,63,29012},{0,62,13736},{0,53,4760},{0,38,17595},{0,45,30452},{0,36,20086},{20,63,16620},{9,63,4794},{6,63,205},{1,56,2230},{39,16,26744},{0,62,13736},{0,53,4760},{0,38,17595},{62,16,26744},{0,38,17595},{4,63,589},{4,63,589},{4,63,589},{2,41,185},{0,49,4418},{0,36,1097},{0,36,1097},{0,21,2153},{0,22,5357},{0,21,2937},{6,63,169},
+{6,63,169},{6,63,169},{5,39,2},{24,1,4418},{0,36,1097},{0,36,1097},{0,21,2153},{49,0,4418},{0,21,2153},{59,16,9248},{20,63,3380},{11,62,4},{0,55,1901},{59,16,9248},{62,36,9248},{0,55,1901},{0,45,9250},{62,36,9248},{0,45,9250},{2,0,185},{2,0,185},{2,0,185},{2,0,185},{0,14,1},{0,14,1},{0,14,1},{0,7,1},{0,5,72},{0,5,72},{15,63,21605},{9,63,6850},{5,63,978},
+{2,57,2226},{11,63,29435},{0,63,12990},{0,55,3962},{0,41,16835},{0,45,30392},{0,38,19516},{22,63,16694},{12,63,5218},{9,63,225},{4,56,2130},{49,1,26259},{0,63,12990},{0,55,3962},{0,41,16835},{46,27,26259},{0,41,16835},{5,63,978},{5,63,978},{5,63,978},{3,43,370},{0,55,4418},{0,39,881},{0,39,881},{0,24,1945},{0,25,5605},{0,22,2889},{9,63,225},{9,63,225},{9,63,225},{7,41,2},{27,1,4418},
+{0,39,881},{0,39,881},{0,24,1945},{55,0,4418},{0,24,1945},{63,14,8978},{23,63,3592},{14,63,1},{0,58,1625},{63,14,8978},{46,47,8978},{0,58,1625},{0,46,8986},{46,47,8978},{0,46,8986},{3,0,369},{3,0,369},{3,0,369},{3,0,369},{0,20,0},{0,20,0},{0,20,0},{0,10,1},{0,8,136},{0,8,136},{15,63,21141},{9,63,7026},{6,63,1481},{3,58,2034},{12,63,28216},{0,63,11406},{0,56,2868},
+{0,41,14915},{0,48,28876},{0,39,17854},{23,63,15352},{15,63,4866},{11,63,306},{6,58,1746},{52,0,24371},{0,63,11406},{0,56,2868},{0,41,14915},{62,21,24371},{0,41,14915},{6,63,1481},{6,63,1481},{6,63,1481},{4,46,617},{0,61,4418},{0,45,689},{0,45,689},{0,27,1769},{0,28,5885},{0,24,2889},{11,63,306},{11,63,306},{11,63,306},{9,43,2},{30,1,4418},{0,45,689},{0,45,689},{0,27,1769},{61,0,4418},
+{0,27,1769},{55,32,7938},{26,63,3176},{16,63,1},{0,58,1129},{55,32,7938},{46,48,7938},{0,58,1129},{0,47,7946},{46,48,7938},{0,47,7946},{4,0,617},{4,0,617},{4,0,617},{4,0,617},{0,26,0},{0,26,0},{0,26,0},{0,13,1},{0,11,232},{0,11,232},{17,63,20849},{9,63,7458},{8,63,2106},{4,59,1970},{12,63,27224},{0,63,10078},{0,58,1986},{0,42,13214},{0,50,27357},{0,41,16276},{26,63,14168},
+{17,63,4556},{14,63,394},{9,58,1410},{47,14,22568},{0,63,10078},{0,58,1986},{0,42,13214},{62,23,22568},{0,42,13214},{8,63,2106},{8,63,2106},{8,63,2106},{5,48,930},{1,63,4452},{0,47,521},{0,47,521},{0,28,1600},{0,31,6197},{0,27,2921},{14,63,394},{14,63,394},{14,63,394},{11,45,2},{33,1,4418},{0,47,521},{0,47,521},{0,28,1600},{63,2,4418},{0,28,1600},{56,33,6962},{29,63,2792},{19,63,1},
+{0,59,740},{56,33,6962},{49,48,6962},{0,59,740},{0,48,6964},{49,48,6962},{0,48,6964},{5,0,929},{5,0,929},{5,0,929},{5,0,929},{0,32,0},{0,32,0},{0,32,0},{0,16,0},{0,14,360},{0,14,360},{17,63,20651},{12,63,7922},{9,63,2921},{5,60,2052},{15,63,26270},{0,63,8890},{0,59,1154},{0,44,11309},{0,53,25875},{0,42,14658},{26,63,12890},{20,63,4254},{16,63,493},{10,59,1076},{56,0,20642},
+{0,63,8890},{0,59,1154},{0,44,11309},{62,25,20642},{0,44,11309},{9,63,2921},{9,63,2921},{9,63,2921},{6,51,1360},{3,63,4634},{0,50,346},{0,50,346},{0,31,1402},{0,33,6554},{0,28,2987},{16,63,493},{16,63,493},{16,63,493},{13,48,1},{31,12,4418},{0,50,346},{0,50,346},{0,31,1402},{62,6,4418},{0,31,1402},{63,23,5941},{30,63,2386},{23,63,1},{0,61,388},{63,23,5941},{63,43,5941},{0,61,388},
+{0,49,5945},{63,43,5941},{0,49,5945},{6,0,1360},{6,0,1360},{6,0,1360},{6,0,1360},{0,39,0},{0,39,0},{0,39,0},{0,19,1},{0,16,522},{0,16,522},{20,63,20683},{12,63,8578},{11,63,3792},{7,60,2241},{15,63,25566},{1,63,8090},{0,59,642},{0,45,9834},{0,53,24595},{0,44,13350},{29,63,11794},{20,63,4030},{17,63,610},{13,59,804},{58,0,19021},{3,63,8050},{0,59,642},{0,45,9834},{62,27,19021},
+{0,45,9834},{11,63,3792},{11,63,3792},{11,63,3792},{8,53,1808},{3,63,4954},{0,55,232},{0,55,232},{0,33,1241},{0,36,6922},{0,31,3051},{17,63,610},{17,63,610},{17,63,610},{15,50,1},{31,18,4418},{0,55,232},{0,55,232},{0,33,1241},{62,9,4418},{0,33,1241},{63,26,5101},{34,63,2050},{26,63,1},{0,61,164},{63,26,5101},{62,45,5101},{0,61,164},{0,50,5105},{62,45,5101},{0,50,5105},{7,0,1808},
+{7,0,1808},{7,0,1808},{7,0,1808},{0,45,0},{0,45,0},{0,45,0},{0,22,1},{0,19,706},{0,19,706},{20,63,20715},{15,63,9258},{12,63,4729},{8,61,2553},{17,63,25067},{3,63,7474},{0,61,264},{0,47,8373},{0,56,23451},{0,45,12138},{31,63,10854},{23,63,3766},{20,63,698},{16,60,594},{60,0,17485},{6,63,7274},{0,61,264},{0,47,8373},{56,32,17485},{0,47,8373},{12,63,4729},{12,63,4729},{12,63,4729},
+{8,56,2320},{6,63,5386},{0,59,130},{0,59,130},{0,35,1076},{0,39,7322},{0,33,3161},{20,63,698},{20,63,698},{20,63,698},{17,51,2},{34,17,4418},{0,59,130},{0,59,130},{0,35,1076},{62,12,4418},{0,35,1076},{63,29,4325},{35,63,1733},{29,63,1},{0,62,41},{63,29,4325},{63,46,4325},{0,62,41},{0,51,4329},{63,46,4325},{0,51,4329},{8,0,2320},{8,0,2320},{8,0,2320},{8,0,2320},{0,50,1},
+{0,50,1},{0,50,1},{0,25,1},{0,19,914},{0,19,914},{20,63,21003},{15,63,10106},{14,63,5840},{9,62,2993},{17,63,24683},{3,63,7010},{0,62,74},{0,49,7113},{0,56,22427},{0,47,11094},{33,63,9941},{26,63,3566},{23,63,818},{17,61,402},{62,0,16034},{9,63,6562},{0,62,74},{0,49,7113},{62,31,16034},{0,49,7113},{14,63,5840},{14,63,5840},{14,63,5840},{10,58,2896},{6,63,5962},{0,62,58},{0,62,58},
+{0,38,932},{0,42,7754},{0,35,3317},{23,63,818},{23,63,818},{23,63,818},{19,53,2},{37,17,4418},{0,62,58},{0,62,58},{0,38,932},{62,15,4418},{0,38,932},{63,32,3613},{38,63,1445},{32,63,1},{0,63,0},{63,32,3613},{61,48,3613},{0,63,0},{0,52,3617},{61,48,3613},{0,52,3617},{9,0,2896},{9,0,2896},{9,0,2896},{9,0,2896},{0,56,1},{0,56,1},{0,56,1},{0,28,1},{0,22,1130},
+{0,22,1130},{23,63,21401},{17,63,11165},{15,63,7141},{10,63,3641},{20,63,24533},{6,63,6762},{0,63,81},{0,49,5745},{0,59,21333},{0,47,9996},{34,63,8897},{29,63,3396},{26,63,976},{20,61,224},{56,16,14504},{12,63,5834},{1,63,68},{0,49,5745},{62,33,14504},{0,49,5745},{15,63,7141},{15,63,7141},{15,63,7141},{11,61,3617},{9,63,6772},{0,63,81},{0,63,81},{0,41,794},{0,45,8260},{0,38,3515},{26,63,976},
+{26,63,976},{26,63,976},{21,56,0},{49,0,4418},{1,63,68},{1,63,68},{0,41,794},{62,18,4418},{0,41,794},{63,35,2888},{41,63,1156},{35,63,0},{7,63,0},{63,35,2888},{63,49,2888},{7,63,0},{0,53,2896},{63,49,2888},{0,53,2896},{11,0,3617},{11,0,3617},{11,0,3617},{11,0,3617},{0,63,0},{0,63,0},{0,63,0},{0,32,1},{0,25,1413},{0,25,1413},{23,63,21913},{17,63,12317},{17,63,8473},
+{12,63,4330},{20,63,24437},{6,63,6650},{2,63,298},{0,52,4721},{0,62,20509},{0,49,9157},{37,63,8153},{29,63,3268},{28,63,1129},{23,61,128},{63,6,13235},{15,63,5258},{6,63,145},{0,52,4721},{62,35,13235},{0,52,4721},{17,63,8473},{17,63,8473},{17,63,8473},{12,63,4330},{9,63,7636},{2,63,298},{2,63,298},{0,42,689},{0,45,8740},{0,39,3689},{28,63,1129},{28,63,1129},{28,63,1129},{23,58,0},{52,0,4418},
+{6,63,145},{6,63,145},{0,42,689},{62,21,4418},{0,42,689},{63,38,2312},{43,63,925},{38,63,0},{13,63,0},{63,38,2312},{62,51,2312},{13,63,0},{0,54,2320},{62,51,2312},{0,54,2320},{12,0,4329},{12,0,4329},{12,0,4329},{12,0,4329},{1,63,52},{1,63,52},{1,63,52},{0,34,1},{0,28,1693},{0,28,1693},{26,63,22641},{20,63,13461},{17,63,9881},{13,63,5169},{20,63,24597},{6,63,6794},{3,63,649},
+{0,52,3713},{0,62,19773},{0,50,8413},{37,63,7401},{32,63,3181},{29,63,1280},{24,62,48},{63,11,12051},{18,63,4746},{9,63,233},{0,52,3713},{47,45,12051},{0,52,3713},{17,63,9881},{17,63,9881},{17,63,9881},{13,63,5169},{12,63,8644},{3,63,649},{3,63,649},{0,45,569},{0,50,9245},{0,42,3905},{29,63,1280},{29,63,1280},{29,63,1280},{25,60,0},{55,0,4418},{9,63,233},{9,63,233},{0,45,569},{62,24,4418},
+{0,45,569},{63,41,1800},{46,63,725},{41,63,0},{19,63,0},{63,41,1800},{63,52,1800},{19,63,0},{0,55,1808},{63,52,1800},{0,55,1808},{13,0,5105},{13,0,5105},{13,0,5105},{13,0,5105},{2,63,185},{2,63,185},{2,63,185},{0,37,1},{0,31,2005},{0,31,2005},{26,63,23345},{20,63,14805},{20,63,11441},{14,63,6170},{23,63,24893},{9,63,7066},{4,63,1236},{0,53,2900},{0,63,19260},{0,52,7861},{40,63,6753},
+{34,63,3038},{32,63,1465},{27,63,9},{62,16,10952},{21,63,4298},{12,63,353},{0,53,2900},{62,39,10952},{0,53,2900},{20,63,11441},{20,63,11441},{20,63,11441},{14,63,6170},{12,63,9764},{4,63,1236},{4,63,1236},{0,47,458},{0,50,9789},{0,45,4185},{32,63,1465},{32,63,1465},{32,63,1465},{27,62,0},{58,0,4418},{12,63,353},{12,63,353},{0,47,458},{62,27,4418},{0,47,458},{63,44,1352},{48,63,544},{44,63,0},
+{25,63,0},{63,44,1352},{62,54,1352},{25,63,0},{0,56,1360},{62,54,1352},{0,56,1360},{14,0,5945},{14,0,5945},{14,0,5945},{14,0,5945},{3,63,400},{3,63,400},{3,63,400},{0,40,1},{0,33,2336},{0,33,2336},{26,63,24443},{23,63,16415},{20,63,13259},{15,63,7448},{23,63,25379},{9,63,7606},{6,63,2021},{0,55,2045},{0,63,18918},{0,53,7275},{40,63,6141},{37,63,2978},{34,63,1625},{29,63,10},{63,19,9818},
+{24,63,3870},{17,63,530},{0,55,2045},{63,41,9818},{0,55,2045},{20,63,13259},{20,63,13259},{20,63,13259},{15,63,7448},{15,63,11218},{6,63,2021},{6,63,2021},{0,49,365},{0,53,10427},{0,45,4509},{34,63,1625},{34,63,1625},{34,63,1625},{29,63,10},{53,16,4418},{17,63,530},{17,63,530},{0,49,365},{63,30,4418},{0,49,365},{63,47,925},{51,63,377},{47,63,1},{31,63,1},{63,47,925},{63,55,925},{31,63,1},
+{0,58,929},{63,55,925},{0,58,929},{15,0,6964},{15,0,6964},{15,0,6964},{15,0,6964},{3,63,769},{3,63,769},{3,63,769},{0,44,0},{0,36,2745},{0,36,2745},{29,63,25483},{23,63,17983},{22,63,15066},{17,63,8739},{23,63,26083},{12,63,8302},{6,63,2965},{0,56,1458},{0,63,18886},{0,53,6955},{43,63,5581},{40,63,2986},{37,63,1801},{32,63,65},{58,32,8901},{27,63,3558},{20,63,698},{0,56,1458},{52,48,8901},
+{0,56,1458},{22,63,15066},{22,63,15066},{22,63,15066},{17,63,8739},{15,63,12626},{6,63,2965},{6,63,2965},{0,52,277},{0,56,11011},{0,49,4833},{37,63,1801},{37,63,1801},{37,63,1801},{32,63,65},{56,16,4418},{20,63,698},{20,63,698},{0,52,277},{62,33,4418},{0,52,277},{63,50,613},{52,63,250},{50,63,0},{37,63,0},{63,50,613},{62,57,613},{37,63,0},{0,59,617},{62,57,613},{0,59,617},{16,0,7946},
+{16,0,7946},{16,0,7946},{16,0,7946},{5,63,1184},{5,63,1184},{5,63,1184},{0,47,0},{0,36,3145},{0,36,3145},{29,63,26667},{26,63,19695},{23,63,16891},{18,63,10206},{26,63,26795},{12,63,9118},{9,63,4037},{0,58,933},{0,63,19110},{0,56,6699},{46,63,5181},{40,63,2970},{40,63,2009},{34,63,160},{63,27,8069},{30,63,3310},{23,63,898},{0,58,933},{63,45,8069},{0,58,933},{23,63,16891},{23,63,16891},{23,63,16891},
+{18,63,10206},{17,63,14179},{9,63,4037},{9,63,4037},{0,55,221},{0,59,11627},{0,50,5115},{40,63,2009},{40,63,2009},{40,63,2009},{34,63,160},{59,16,4418},{23,63,898},{23,63,898},{0,55,221},{62,36,4418},{0,55,221},{63,53,365},{55,63,146},{53,63,0},{43,63,0},{63,53,365},{63,58,365},{43,63,0},{0,60,369},{63,58,365},{0,60,369},{16,0,8986},{16,0,8986},{16,0,8986},{16,0,8986},{6,63,1665},
+{6,63,1665},{6,63,1665},{0,50,1},{0,39,3545},{0,39,3545},{31,63,26643},{26,63,20231},{26,63,17731},{20,63,10867},{26,63,26531},{15,63,9546},{12,63,4889},{0,59,590},{0,63,18606},{0,56,5707},{46,63,4781},{43,63,2978},{42,63,2228},{37,63,320},{63,31,7322},{32,63,3134},{27,63,1125},{0,59,554},{63,47,7322},{0,59,554},{26,63,17731},{26,63,17731},{26,63,17731},{20,63,10867},{20,63,15043},{12,63,4889},{12,63,4889},
+{1,56,185},{0,62,11315},{0,53,4667},{42,63,2228},{42,63,2228},{42,63,2228},{37,63,320},{62,16,4418},{27,63,1125},{27,63,1125},{0,56,162},{62,39,4418},{0,56,162},{63,56,181},{57,63,73},{56,63,0},{49,63,0},{63,56,181},{62,60,181},{49,63,0},{0,61,185},{62,60,181},{0,61,185},{18,0,9250},{18,0,9250},{18,0,9250},{18,0,9250},{8,63,1972},{8,63,1972},{8,63,1972},{1,52,4},{0,42,3341},
+{0,42,3341},{34,63,26006},{29,63,20400},{28,63,18273},{23,63,11384},{29,63,25736},{17,63,9864},{12,63,5726},{2,60,366},{0,63,18111},{0,59,4452},{48,63,4436},{46,63,3050},{43,63,2465},{40,63,562},{63,35,6584},{37,63,3006},{30,63,1421},{0,61,237},{63,49,6584},{0,61,237},{28,63,18273},{28,63,18273},{28,63,18273},{23,63,11384},{23,63,15704},{12,63,5726},{12,63,5726},{3,59,189},{0,63,10886},{0,56,3924},{43,63,2465},
+{43,63,2465},{43,63,2465},{40,63,562},{63,21,4418},{30,63,1421},{30,63,1421},{0,59,100},{63,42,4418},{0,59,100},{63,59,52},{60,63,20},{59,63,1},{56,63,0},{63,59,52},{63,61,52},{56,63,0},{0,62,52},{63,61,52},{0,62,52},{20,0,9256},{20,0,9256},{20,0,9256},{20,0,9256},{9,63,2205},{9,63,2205},{9,63,2205},{4,54,1},{0,45,2925},{0,45,2925},{34,63,25526},{31,63,20721},{29,63,18700},
+{24,63,11921},{31,63,25279},{20,63,10264},{15,63,6494},{4,62,238},{4,63,17924},{0,59,3588},{51,63,4228},{48,63,3140},{46,63,2665},{41,63,833},{63,39,6019},{40,63,2958},{34,63,1693},{0,62,84},{47,59,6019},{0,62,84},{29,63,18700},{29,63,18700},{29,63,18700},{24,63,11921},{23,63,16280},{15,63,6494},{15,63,6494},{5,61,189},{0,63,10854},{0,59,3332},{46,63,2665},{46,63,2665},{46,63,2665},{41,63,833},{63,27,4418},
+{34,63,1693},{34,63,1693},{0,61,61},{63,45,4418},{0,61,61},{63,62,4},{63,63,4},{62,63,1},{62,63,0},{63,62,4},{62,63,4},{62,63,0},{0,63,4},{62,63,4},{0,63,4},{22,0,9256},{22,0,9256},{22,0,9256},{22,0,9256},{12,63,2389},{12,63,2389},{12,63,2389},{6,56,1},{0,50,2512},{0,50,2512},{37,63,24250},{34,63,19895},{31,63,18169},{26,63,11820},{34,63,23717},{20,63,10012},{17,63,6584},
+{7,62,122},{6,63,16879},{0,62,2736},{51,63,3648},{48,63,2784},{48,63,2384},{43,63,778},{63,42,5163},{41,63,2584},{37,63,1549},{0,63,9},{46,61,5163},{0,63,9},{31,63,18169},{31,63,18169},{31,63,18169},{26,63,11820},{26,63,15620},{17,63,6584},{17,63,6584},{7,62,106},{0,63,10150},{0,59,2624},{48,63,2384},{48,63,2384},{48,63,2384},{43,63,778},{63,31,3872},{37,63,1549},{37,63,1549},{0,63,9},{63,47,3872},
+{0,63,9},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{24,0,9256},{24,0,9256},{24,0,9256},{24,0,9256},{15,63,2605},{15,63,2605},{15,63,2605},{8,58,1},{0,53,2176},{0,53,2176},{37,63,22746},{34,63,18775},{34,63,17254},{29,63,11564},{34,63,21973},{23,63,9532},{20,63,6424},{9,62,57},{9,63,15607},{0,62,2032},{53,63,3058},
+{51,63,2304},{48,63,2000},{46,63,650},{63,43,4267},{43,63,2134},{40,63,1285},{4,63,0},{63,53,4267},{4,63,0},{34,63,17254},{34,63,17254},{34,63,17254},{29,63,11564},{29,63,14692},{20,63,6424},{20,63,6424},{9,62,41},{0,63,9366},{0,62,2016},{48,63,2000},{48,63,2000},{48,63,2000},{46,63,650},{55,49,3200},{40,63,1285},{40,63,1285},{4,63,0},{63,48,3200},{4,63,0},{63,63,0},{63,63,0},{63,63,0},
+{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{26,0,9256},{26,0,9256},{26,0,9256},{26,0,9256},{17,63,2836},{17,63,2836},{17,63,2836},{10,60,1},{0,59,1856},{0,59,1856},{40,63,21168},{37,63,17685},{34,63,16300},{30,63,11323},{37,63,20205},{26,63,9090},{23,63,6310},{12,63,4},{12,63,14287},{0,62,1546},{54,63,2377},{51,63,1809},{51,63,1553},{47,63,520},{61,49,3361},
+{46,63,1683},{43,63,1018},{11,63,0},{63,54,3361},{11,63,0},{34,63,16300},{34,63,16300},{34,63,16300},{30,63,11323},{29,63,13666},{23,63,6310},{23,63,6310},{12,63,4},{3,63,8686},{0,62,1530},{51,63,1553},{51,63,1553},{51,63,1553},{47,63,520},{63,37,2521},{43,63,1018},{43,63,1018},{11,63,0},{62,50,2521},{11,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},
+{0,63,0},{63,63,0},{0,63,0},{29,0,9250},{29,0,9250},{29,0,9250},{29,0,9250},{20,63,3114},{20,63,3114},{20,63,3114},{12,62,1},{0,62,1514},{0,62,1514},{40,63,19824},{37,63,16725},{37,63,15500},{32,63,11084},{37,63,18685},{26,63,8770},{26,63,6270},{14,63,16},{15,63,13215},{0,63,1325},{54,63,1881},{54,63,1449},{51,63,1249},{48,63,409},{63,47,2649},{47,63,1329},{44,63,797},{16,63,1},{63,55,2649},
+{16,63,1},{37,63,15500},{37,63,15500},{37,63,15500},{32,63,11084},{31,63,12906},{26,63,6270},{26,63,6270},{14,63,16},{6,63,8150},{0,63,1325},{51,63,1249},{51,63,1249},{51,63,1249},{48,63,409},{63,40,1985},{44,63,797},{44,63,797},{16,63,1},{63,51,1985},{16,63,1},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{31,0,9250},
+{31,0,9250},{31,0,9250},{31,0,9250},{22,63,3393},{22,63,3393},{22,63,3393},{14,63,16},{0,63,1325},{0,63,1325},{43,63,18608},{40,63,15853},{37,63,14796},{34,63,10841},{40,63,17341},{29,63,8410},{26,63,6206},{17,63,74},{17,63,12226},{0,63,1341},{56,63,1451},{54,63,1081},{54,63,937},{51,63,305},{63,50,2017},{49,63,1011},{47,63,605},{22,63,1},{62,57,2017},{22,63,1},{37,63,14796},{37,63,14796},{37,63,14796},
+{34,63,10841},{34,63,12089},{26,63,6206},{26,63,6206},{17,63,74},{9,63,7678},{0,63,1341},{54,63,937},{54,63,937},{54,63,937},{51,63,305},{63,43,1513},{47,63,605},{47,63,605},{22,63,1},{62,53,1513},{22,63,1},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{32,0,9256},{32,0,9256},{32,0,9256},{32,0,9256},{23,63,3650},
+{23,63,3650},{23,63,3650},{17,63,74},{0,63,1341},{0,63,1341},{43,63,17392},{40,63,15021},{40,63,14060},{37,63,10673},{40,63,16013},{32,63,8261},{29,63,6166},{19,63,194},{20,63,11338},{1,63,1594},{57,63,1041},{56,63,822},{54,63,697},{52,63,234},{63,51,1473},{51,63,737},{49,63,442},{28,63,1},{63,57,1473},{28,63,1},{40,63,14060},{40,63,14060},{40,63,14060},{37,63,10673},{34,63,11401},{29,63,6166},{29,63,6166},
+{19,63,194},{12,63,7270},{1,63,1594},{54,63,697},{54,63,697},{54,63,697},{52,63,234},{63,46,1105},{49,63,442},{49,63,442},{28,63,1},{63,54,1105},{28,63,1},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{34,0,9256},{34,0,9256},{34,0,9256},{34,0,9256},{26,63,3898},{26,63,3898},{26,63,3898},{19,63,194},{1,63,1594},
+{1,63,1594},
diff --git a/libkram/transcoder/basisu_transcoder_tables_pvrtc2_45.inc b/libkram/transcoder/basisu_transcoder_tables_pvrtc2_45.inc
new file mode 100644
index 00000000..fbaf988d
--- /dev/null
+++ b/libkram/transcoder/basisu_transcoder_tables_pvrtc2_45.inc
@@ -0,0 +1,481 @@
+{0,2,20},{0,1,10},{0,1,1},{0,1,9},{0,1,35},{0,1,27},{0,1,18},{0,1,61},{0,1,52},{0,0,68},{0,2,20},{0,1,10},{0,1,1},{0,1,9},{0,1,35},{0,1,27},{0,1,18},{0,1,61},{0,1,43},{0,1,61},{0,1,1},{0,1,1},{0,1,1},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,1,1},
+{0,1,1},{0,1,1},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,2,20},{0,1,10},{0,1,1},{0,1,9},{0,2,20},{0,1,18},{0,1,9},{0,1,36},{0,1,18},{0,1,36},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,4,56},{0,3,38},{0,2,52},
+{0,2,36},{0,4,56},{0,3,35},{0,2,0},{0,2,52},{0,2,88},{0,1,78},{0,4,56},{0,3,38},{0,2,52},{0,2,36},{1,0,52},{0,3,35},{0,2,0},{0,2,52},{1,1,51},{0,2,52},{0,3,37},{0,3,37},{0,3,37},{0,2,36},{0,3,10},{0,2,0},{0,2,0},{0,1,5},{0,1,35},{0,1,14},{0,3,37},{0,3,37},{0,3,37},{0,2,36},{0,3,10},
+{0,2,0},{0,2,0},{0,1,5},{1,0,16},{0,1,5},{1,1,18},{0,3,2},{0,2,16},{0,2,0},{1,1,18},{2,0,20},{0,2,0},{0,2,36},{2,0,20},{0,2,36},{0,0,36},{0,0,36},{0,0,36},{0,0,36},{0,2,0},{0,2,0},{0,2,0},{0,1,1},{0,1,10},{0,1,10},{1,4,88},{1,3,78},{1,3,69},{1,3,77},{1,3,115},{0,4,88},{0,3,98},
+{0,3,101},{0,4,72},{0,3,38},{1,4,24},{1,3,14},{1,3,5},{1,3,13},{1,3,51},{0,4,24},{0,3,34},{0,3,37},{3,0,52},{0,3,37},{1,3,69},{1,3,69},{1,3,69},{1,2,72},{1,2,72},{1,2,72},{1,2,72},{1,2,72},{0,3,11},{0,2,24},{1,3,5},{1,3,5},{1,3,5},{1,2,8},{1,2,8},{1,2,8},{1,2,8},{1,2,8},{1,2,8},
+{1,2,8},{0,7,18},{1,3,10},{1,3,1},{0,3,9},{0,7,18},{1,3,18},{0,3,9},{0,3,36},{1,3,18},{0,3,36},{1,0,68},{1,0,68},{1,0,68},{1,0,68},{1,1,65},{1,1,65},{1,1,65},{0,3,65},{0,3,2},{0,3,2},{1,6,56},{1,5,38},{1,4,53},{1,4,37},{1,6,56},{1,5,35},{1,4,1},{1,4,66},{0,5,60},{0,4,70},{1,6,56},
+{1,5,38},{1,4,53},{1,4,37},{0,9,51},{1,5,35},{1,4,1},{0,4,54},{2,3,51},{0,4,54},{1,5,37},{1,5,37},{1,5,37},{1,4,36},{1,5,10},{1,4,0},{1,4,0},{1,3,5},{0,5,11},{1,3,14},{1,5,37},{1,5,37},{1,5,37},{1,4,36},{0,8,8},{1,4,0},{1,4,0},{1,3,5},{4,0,8},{1,3,5},{2,3,18},{1,5,2},{1,4,17},
+{1,4,1},{2,3,18},{4,1,18},{1,4,1},{0,4,50},{4,1,18},{0,4,50},{1,0,36},{1,0,36},{1,0,36},{1,0,36},{1,4,0},{1,4,0},{1,4,0},{1,3,1},{1,3,10},{1,3,10},{2,6,88},{2,5,78},{2,5,69},{2,5,77},{2,5,115},{2,5,107},{2,5,98},{1,5,117},{1,6,60},{1,5,36},{2,6,24},{2,5,14},{2,5,5},{2,5,13},{3,2,51},
+{1,6,35},{2,5,34},{1,5,36},{1,6,51},{1,5,36},{2,5,69},{2,5,69},{2,5,69},{2,4,72},{2,4,72},{2,4,72},{2,4,72},{2,4,72},{1,5,16},{1,5,36},{2,5,5},{2,5,5},{2,5,5},{2,4,8},{3,1,8},{2,4,8},{2,4,8},{2,4,8},{3,3,8},{2,4,8},{3,3,20},{2,5,10},{2,5,1},{2,5,9},{3,3,20},{2,5,18},{2,5,9},
+{0,5,36},{2,5,18},{0,5,36},{2,0,68},{2,0,68},{2,0,68},{2,0,68},{2,3,68},{2,3,68},{2,3,68},{2,4,68},{1,5,0},{1,5,0},{2,8,56},{2,7,38},{2,6,52},{2,6,36},{2,8,56},{2,7,35},{2,6,0},{2,6,52},{1,7,76},{1,6,70},{2,8,56},{2,7,38},{2,6,52},{2,6,36},{4,1,51},{2,7,35},{2,6,0},{1,6,45},{3,5,51},
+{1,6,45},{2,7,37},{2,7,37},{2,7,37},{2,6,36},{2,7,10},{2,6,0},{2,6,0},{2,5,5},{1,7,12},{2,5,14},{2,7,37},{2,7,37},{2,7,37},{2,6,36},{4,0,8},{2,6,0},{2,6,0},{2,5,5},{1,7,8},{2,5,5},{3,5,18},{2,7,2},{2,6,16},{2,6,0},{3,5,18},{0,9,18},{2,6,0},{0,6,36},{0,9,18},{0,6,36},{2,0,36},
+{2,0,36},{2,0,36},{2,0,36},{2,6,0},{2,6,0},{2,6,0},{2,5,1},{1,7,8},{1,7,8},{3,8,88},{3,7,78},{3,7,69},{3,7,77},{3,7,115},{2,8,88},{2,7,98},{2,7,101},{1,9,67},{2,7,38},{3,8,24},{3,7,14},{3,7,5},{3,7,13},{3,7,51},{2,8,24},{2,7,34},{2,7,37},{8,0,51},{2,7,37},{3,7,69},{3,7,69},{3,7,69},
+{3,6,72},{3,6,72},{3,6,72},{3,6,72},{3,6,72},{2,7,11},{2,6,24},{3,7,5},{3,7,5},{3,7,5},{3,6,8},{3,6,8},{3,6,8},{3,6,8},{3,6,8},{3,6,8},{3,6,8},{5,1,18},{3,7,10},{3,7,1},{2,7,9},{5,1,18},{3,7,18},{2,7,9},{0,7,36},{3,7,18},{0,7,36},{3,0,68},{3,0,68},{3,0,68},{3,0,68},{3,5,65},
+{3,5,65},{3,5,65},{2,7,65},{2,7,2},{2,7,2},{3,10,56},{3,9,38},{3,8,53},{3,8,37},{3,10,56},{3,9,35},{3,8,1},{3,8,66},{2,9,60},{2,8,70},{3,10,56},{3,9,38},{3,8,53},{3,8,37},{5,3,51},{3,9,35},{3,8,1},{2,8,54},{4,7,51},{2,8,54},{3,9,37},{3,9,37},{3,9,37},{3,8,36},{3,9,10},{3,8,0},{3,8,0},
+{3,7,5},{2,9,11},{3,7,14},{3,9,37},{3,9,37},{3,9,37},{3,8,36},{5,2,8},{3,8,0},{3,8,0},{3,7,5},{8,1,8},{3,7,5},{4,7,18},{3,9,2},{3,8,17},{3,8,1},{4,7,18},{8,2,18},{3,8,1},{0,8,50},{8,2,18},{0,8,50},{3,0,36},{3,0,36},{3,0,36},{3,0,36},{3,8,0},{3,8,0},{3,8,0},{3,7,1},{3,7,10},
+{3,7,10},{4,10,88},{4,9,78},{4,9,69},{4,9,77},{4,9,115},{4,9,107},{4,9,98},{3,9,117},{3,10,60},{3,9,36},{4,10,24},{4,9,14},{4,9,5},{4,9,13},{5,6,51},{3,10,35},{4,9,34},{3,9,36},{10,1,51},{3,9,36},{4,9,69},{4,9,69},{4,9,69},{4,8,72},{4,8,72},{4,8,72},{4,8,72},{4,8,72},{3,9,16},{3,9,36},{4,9,5},
+{4,9,5},{4,9,5},{4,8,8},{5,5,8},{4,8,8},{4,8,8},{4,8,8},{5,7,8},{4,8,8},{7,0,18},{4,9,10},{4,9,1},{4,9,9},{7,0,18},{4,9,18},{4,9,9},{0,9,36},{4,9,18},{0,9,36},{4,0,68},{4,0,68},{4,0,68},{4,0,68},{4,7,68},{4,7,68},{4,7,68},{4,8,68},{3,9,0},{3,9,0},{4,12,56},{4,11,38},{4,10,52},
+{4,10,36},{4,12,56},{4,11,35},{4,10,0},{4,10,52},{3,11,76},{3,10,70},{4,12,56},{4,11,38},{4,10,52},{4,10,36},{7,2,51},{4,11,35},{4,10,0},{3,10,45},{12,0,51},{3,10,45},{4,11,37},{4,11,37},{4,11,37},{4,10,36},{4,11,10},{4,10,0},{4,10,0},{4,9,5},{3,11,12},{4,9,14},{4,11,37},{4,11,37},{4,11,37},{4,10,36},{7,1,8},
+{4,10,0},{4,10,0},{4,9,5},{10,2,8},{4,9,5},{5,9,18},{4,11,2},{4,10,16},{4,10,0},{5,9,18},{10,3,18},{4,10,0},{0,10,36},{10,3,18},{0,10,36},{4,0,36},{4,0,36},{4,0,36},{4,0,36},{4,10,0},{4,10,0},{4,10,0},{4,9,1},{3,11,8},{3,11,8},{5,12,88},{5,11,78},{5,11,69},{5,11,77},{5,11,115},{4,12,88},{4,11,98},
+{4,11,101},{3,13,67},{4,11,38},{5,12,24},{5,11,14},{5,11,5},{5,11,13},{5,11,51},{4,12,24},{4,11,34},{4,11,37},{11,3,51},{4,11,37},{5,11,69},{5,11,69},{5,11,69},{5,10,72},{5,10,72},{5,10,72},{5,10,72},{5,10,72},{4,11,11},{4,10,24},{5,11,5},{5,11,5},{5,11,5},{5,10,8},{5,10,8},{5,10,8},{5,10,8},{5,10,8},{12,1,8},
+{5,10,8},{8,0,18},{5,11,10},{5,11,1},{4,11,9},{8,0,18},{12,2,18},{4,11,9},{0,11,36},{12,2,18},{0,11,36},{5,0,68},{5,0,68},{5,0,68},{5,0,68},{5,9,65},{5,9,65},{5,9,65},{4,11,65},{4,11,2},{4,11,2},{5,14,56},{5,13,38},{5,12,53},{5,12,37},{5,14,56},{5,13,35},{5,12,1},{5,12,66},{4,13,60},{4,12,70},{5,14,56},
+{5,13,38},{5,12,53},{5,12,37},{8,2,51},{5,13,35},{5,12,1},{4,12,54},{13,2,51},{4,12,54},{5,13,37},{5,13,37},{5,13,37},{5,12,36},{5,13,10},{5,12,0},{5,12,0},{5,11,5},{4,13,11},{5,11,14},{5,13,37},{5,13,37},{5,13,37},{5,12,36},{8,1,8},{5,12,0},{5,12,0},{5,11,5},{10,5,8},{5,11,5},{6,11,18},{5,13,2},{5,12,17},
+{5,12,1},{6,11,18},{15,0,18},{5,12,1},{0,12,50},{15,0,18},{0,12,50},{5,0,36},{5,0,36},{5,0,36},{5,0,36},{5,12,0},{5,12,0},{5,12,0},{5,11,1},{5,11,10},{5,11,10},{6,14,88},{6,13,78},{6,13,69},{6,13,77},{6,13,115},{6,13,107},{6,13,98},{5,13,117},{5,14,60},{5,13,36},{6,14,24},{6,13,14},{6,13,5},{6,13,13},{8,5,51},
+{5,14,35},{6,13,34},{5,13,36},{12,5,51},{5,13,36},{6,13,69},{6,13,69},{6,13,69},{6,12,72},{6,12,72},{6,12,72},{6,12,72},{6,12,72},{5,13,16},{5,13,36},{6,13,5},{6,13,5},{6,13,5},{6,12,8},{8,4,8},{6,12,8},{6,12,8},{6,12,8},{14,2,8},{6,12,8},{3,24,18},{6,13,10},{6,13,1},{6,13,9},{3,24,18},{14,3,18},{6,13,9},
+{0,13,36},{14,3,18},{0,13,36},{6,0,68},{6,0,68},{6,0,68},{6,0,68},{6,11,68},{6,11,68},{6,11,68},{6,12,68},{5,13,0},{5,13,0},{6,16,56},{6,15,38},{6,14,52},{6,14,36},{6,16,56},{6,15,35},{6,14,0},{6,14,52},{5,15,76},{5,14,70},{6,16,56},{6,15,38},{6,14,52},{6,14,36},{3,26,51},{6,15,35},{6,14,0},{5,14,45},{15,3,51},
+{5,14,45},{6,15,37},{6,15,37},{6,15,37},{6,14,36},{6,15,10},{6,14,0},{6,14,0},{6,13,5},{5,15,12},{6,13,14},{6,15,37},{6,15,37},{6,15,37},{6,14,36},{3,25,8},{6,14,0},{6,14,0},{6,13,5},{12,6,8},{6,13,5},{9,5,18},{6,15,2},{6,14,16},{6,14,0},{9,5,18},{12,7,18},{6,14,0},{0,14,36},{12,7,18},{0,14,36},{6,0,36},
+{6,0,36},{6,0,36},{6,0,36},{6,14,0},{6,14,0},{6,14,0},{6,13,1},{5,15,8},{5,15,8},{7,16,88},{7,15,78},{7,15,69},{7,15,77},{7,15,115},{6,16,88},{6,15,98},{6,15,101},{5,17,67},{6,15,38},{7,16,24},{7,15,14},{7,15,5},{7,15,13},{11,0,51},{6,16,24},{6,15,34},{6,15,37},{13,7,51},{6,15,37},{7,15,69},{7,15,69},{7,15,69},
+{7,14,72},{7,14,72},{7,14,72},{7,14,72},{7,14,72},{6,15,11},{6,14,24},{7,15,5},{7,15,5},{7,15,5},{7,14,8},{9,6,8},{7,14,8},{7,14,8},{7,14,8},{14,5,8},{7,14,8},{10,4,18},{7,15,10},{7,15,1},{6,15,9},{10,4,18},{14,6,18},{6,15,9},{0,15,36},{14,6,18},{0,15,36},{7,0,68},{7,0,68},{7,0,68},{7,0,68},{7,13,65},
+{7,13,65},{7,13,65},{6,15,65},{6,15,2},{6,15,2},{7,18,56},{7,17,38},{7,16,53},{7,16,37},{7,18,56},{7,17,35},{7,16,1},{7,16,66},{6,17,60},{6,16,70},{7,18,56},{7,17,38},{7,16,53},{7,16,37},{10,6,51},{7,17,35},{7,16,1},{6,16,54},{15,6,51},{6,16,54},{7,17,37},{7,17,37},{7,17,37},{7,16,36},{7,17,10},{7,16,0},{7,16,0},
+{7,15,5},{6,17,11},{7,15,14},{7,17,37},{7,17,37},{7,17,37},{7,16,36},{10,5,8},{7,16,0},{7,16,0},{7,15,5},{12,9,8},{7,15,5},{12,0,18},{7,17,2},{7,16,17},{7,16,1},{12,0,18},{12,10,18},{7,16,1},{0,16,50},{12,10,18},{0,16,50},{7,0,36},{7,0,36},{7,0,36},{7,0,36},{7,16,0},{7,16,0},{7,16,0},{7,15,1},{7,15,10},
+{7,15,10},{7,21,326},{7,19,322},{8,17,392},{7,17,322},{7,21,137},{7,18,116},{7,17,133},{7,17,117},{7,18,60},{7,17,36},{8,16,118},{8,16,134},{8,17,136},{8,17,136},{10,9,51},{7,18,35},{7,17,52},{7,17,36},{14,9,51},{7,17,36},{7,20,307},{7,20,307},{7,20,307},{7,18,307},{7,20,91},{7,18,91},{7,18,91},{7,16,110},{7,17,16},{7,17,36},{8,15,101},
+{8,15,101},{8,15,101},{8,16,101},{10,8,8},{7,18,10},{7,18,10},{7,16,29},{11,12,8},{7,16,29},{12,3,18},{7,19,16},{8,17,36},{7,17,16},{12,3,18},{11,13,18},{7,17,16},{0,17,36},{11,13,18},{0,17,36},{7,0,306},{7,0,306},{7,0,306},{7,0,306},{7,19,81},{7,19,81},{7,19,81},{7,17,81},{7,17,0},{7,17,0},{8,19,88},{8,18,78},{8,18,69},
+{8,18,77},{8,18,115},{8,18,107},{8,18,98},{8,17,136},{7,19,76},{7,18,70},{8,19,24},{8,18,14},{8,18,5},{8,18,13},{9,15,51},{8,18,43},{8,18,34},{7,18,45},{5,22,51},{7,18,45},{8,18,69},{8,18,69},{8,18,69},{8,17,72},{8,17,72},{8,17,72},{8,17,72},{8,17,72},{7,19,12},{7,18,70},{8,18,5},{8,18,5},{8,18,5},{8,17,8},{9,14,8},
+{8,17,8},{8,17,8},{8,17,8},{14,10,8},{8,17,8},{11,9,18},{8,18,10},{8,18,1},{8,18,9},{11,9,18},{14,11,18},{8,18,9},{0,18,36},{14,11,18},{0,18,36},{8,0,68},{8,0,68},{8,0,68},{8,0,68},{8,16,65},{8,16,65},{8,16,65},{8,17,68},{7,19,8},{7,19,8},{8,21,56},{8,20,38},{8,19,52},{8,19,36},{8,21,56},{8,20,35},{8,19,0},
+{8,19,52},{7,21,67},{8,18,78},{8,21,56},{8,20,38},{8,19,52},{8,19,36},{13,4,51},{8,20,35},{8,19,0},{8,19,52},{15,11,51},{8,19,52},{8,20,37},{8,20,37},{8,20,37},{8,19,36},{8,20,10},{8,19,0},{8,19,0},{8,18,5},{8,18,35},{8,18,14},{8,20,37},{8,20,37},{8,20,37},{8,19,36},{11,10,8},{8,19,0},{8,19,0},{8,18,5},{5,23,8},
+{8,18,5},{12,8,18},{8,20,2},{8,19,16},{8,19,0},{12,8,18},{11,16,18},{8,19,0},{0,19,36},{11,16,18},{0,19,36},{8,0,36},{8,0,36},{8,0,36},{8,0,36},{8,19,0},{8,19,0},{8,19,0},{8,18,1},{8,18,10},{8,18,10},{9,21,88},{9,20,78},{9,20,70},{9,20,78},{9,20,115},{8,21,88},{8,20,99},{8,20,115},{8,21,72},{8,20,52},{9,21,24},
+{9,20,14},{9,20,6},{9,20,14},{15,0,51},{8,21,24},{8,20,35},{8,20,51},{12,16,51},{8,20,51},{9,20,69},{9,20,69},{9,20,69},{9,19,72},{9,19,72},{9,19,72},{9,19,72},{9,19,72},{8,20,11},{8,19,24},{9,20,5},{9,20,5},{9,20,5},{9,19,8},{12,9,8},{9,19,8},{9,19,8},{9,19,8},{14,13,8},{9,19,8},{14,4,18},{9,20,10},{9,20,2},
+{8,20,10},{14,4,18},{14,14,18},{8,20,10},{0,20,50},{14,14,18},{0,20,50},{9,0,68},{9,0,68},{9,0,68},{9,0,68},{9,18,65},{9,18,65},{9,18,65},{8,20,65},{8,20,2},{8,20,2},{9,24,70},{9,22,58},{9,21,75},{9,21,51},{9,23,52},{9,22,25},{9,21,3},{9,21,46},{8,23,68},{8,21,70},{9,24,69},{9,22,57},{9,21,74},{9,21,50},{15,3,51},
+{9,22,24},{9,21,2},{8,21,45},{11,19,51},{8,21,45},{9,23,51},{9,23,51},{9,23,51},{9,21,51},{9,22,9},{9,21,3},{9,21,3},{9,20,9},{8,22,12},{9,20,12},{9,23,50},{9,23,50},{9,23,50},{9,21,50},{15,2,8},{9,21,2},{9,21,2},{9,20,8},{13,16,8},{9,20,8},{14,7,18},{9,22,8},{9,21,25},{9,21,1},{14,7,18},{13,17,18},{9,21,1},
+{0,21,36},{13,17,18},{0,21,36},{9,0,50},{9,0,50},{9,0,50},{9,0,50},{9,21,2},{9,21,2},{9,21,2},{9,20,5},{9,20,8},{9,20,8},{10,23,88},{10,22,78},{10,22,69},{10,22,77},{10,22,115},{10,22,107},{10,22,98},{9,22,117},{9,23,60},{9,22,36},{10,23,24},{10,22,14},{10,22,5},{10,22,13},{11,19,51},{9,23,35},{10,22,34},{9,22,36},{9,23,51},
+{9,22,36},{10,22,69},{10,22,69},{10,22,69},{10,21,72},{10,21,72},{10,21,72},{10,21,72},{10,21,72},{9,22,16},{9,22,36},{10,22,5},{10,22,5},{10,22,5},{10,21,8},{11,18,8},{10,21,8},{10,21,8},{10,21,8},{10,21,8},{10,21,8},{13,13,18},{10,22,10},{10,22,1},{10,22,9},{13,13,18},{15,16,18},{10,22,9},{0,22,36},{15,16,18},{0,22,36},{10,0,68},
+{10,0,68},{10,0,68},{10,0,68},{10,20,65},{10,20,65},{10,20,65},{10,21,68},{9,22,0},{9,22,0},{10,25,56},{10,24,38},{10,23,52},{10,23,36},{10,25,56},{10,24,35},{10,23,0},{10,23,52},{9,24,63},{9,23,70},{10,25,56},{10,24,38},{10,23,52},{10,23,36},{15,8,51},{10,24,35},{10,23,0},{9,23,45},{11,22,51},{9,23,45},{10,24,37},{10,24,37},{10,24,37},
+{10,23,36},{10,24,10},{10,23,0},{10,23,0},{10,22,5},{9,24,14},{10,22,14},{10,24,37},{10,24,37},{10,24,37},{10,23,36},{13,14,8},{10,23,0},{10,23,0},{10,22,5},{8,25,8},{10,22,5},{14,12,18},{10,24,2},{10,23,16},{10,23,0},{14,12,18},{13,20,18},{10,23,0},{0,23,36},{13,20,18},{0,23,36},{10,0,36},{10,0,36},{10,0,36},{10,0,36},{10,23,0},
+{10,23,0},{10,23,0},{10,22,1},{10,22,10},{10,22,10},{11,25,88},{11,24,78},{11,24,70},{11,24,78},{11,24,115},{10,25,88},{10,24,99},{10,24,115},{9,26,67},{10,24,52},{11,25,24},{11,24,14},{11,24,6},{11,24,14},{14,14,51},{10,25,24},{10,24,35},{10,24,51},{14,20,51},{10,24,51},{11,24,69},{11,24,69},{11,24,69},{11,23,72},{11,23,72},{11,23,72},{11,23,72},
+{11,23,72},{10,24,11},{10,23,24},{11,24,5},{11,24,5},{11,24,5},{11,23,8},{14,13,8},{11,23,8},{11,23,8},{11,23,8},{11,23,8},{11,23,8},{13,18,18},{11,24,10},{11,24,2},{10,24,10},{13,18,18},{11,24,18},{10,24,10},{0,24,50},{11,24,18},{0,24,50},{11,0,68},{11,0,68},{11,0,68},{11,0,68},{11,22,65},{11,22,65},{11,22,65},{10,24,65},{10,24,2},
+{10,24,2},{11,28,70},{11,26,58},{11,25,75},{11,25,51},{11,27,52},{11,26,25},{11,25,3},{11,25,46},{10,27,68},{10,25,70},{11,28,69},{11,26,57},{11,25,74},{11,25,50},{14,17,51},{11,26,24},{11,25,2},{10,25,45},{13,23,51},{10,25,45},{11,27,51},{11,27,51},{11,27,51},{11,25,51},{11,26,9},{11,25,3},{11,25,3},{11,24,9},{10,26,12},{11,24,12},{11,27,50},
+{11,27,50},{11,27,50},{11,25,50},{14,16,8},{11,25,2},{11,25,2},{11,24,8},{15,20,8},{11,24,8},{13,21,18},{11,26,8},{11,25,25},{11,25,1},{13,21,18},{15,21,18},{11,25,1},{0,25,36},{15,21,18},{0,25,36},{11,0,50},{11,0,50},{11,0,50},{11,0,50},{11,25,2},{11,25,2},{11,25,2},{11,24,5},{11,24,8},{11,24,8},{12,27,88},{12,26,78},{12,26,69},
+{12,26,77},{12,26,115},{12,26,107},{12,26,98},{11,26,117},{11,27,60},{11,26,36},{12,27,24},{12,26,14},{12,26,5},{12,26,13},{13,23,51},{11,27,35},{12,26,34},{11,26,36},{11,27,51},{11,26,36},{12,26,69},{12,26,69},{12,26,69},{12,25,72},{12,25,72},{12,25,72},{12,25,72},{12,25,72},{11,26,16},{11,26,36},{12,26,5},{12,26,5},{12,26,5},{12,25,8},{13,22,8},
+{12,25,8},{12,25,8},{12,25,8},{12,25,8},{12,25,8},{15,17,18},{12,26,10},{12,26,1},{12,26,9},{15,17,18},{12,26,18},{12,26,9},{0,26,36},{12,26,18},{0,26,36},{12,0,68},{12,0,68},{12,0,68},{12,0,68},{12,24,65},{12,24,65},{12,24,65},{12,25,68},{11,26,0},{11,26,0},{12,29,56},{12,28,38},{12,27,52},{12,27,36},{12,29,56},{12,28,35},{12,27,0},
+{12,27,52},{11,28,63},{11,27,70},{12,29,56},{12,28,38},{12,27,52},{12,27,36},{15,19,51},{12,28,35},{12,27,0},{11,27,45},{13,26,51},{11,27,45},{12,28,37},{12,28,37},{12,28,37},{12,27,36},{12,28,10},{12,27,0},{12,27,0},{12,26,5},{11,28,14},{12,26,14},{12,28,37},{12,28,37},{12,28,37},{12,27,36},{15,18,8},{12,27,0},{12,27,0},{12,26,5},{10,29,8},
+{12,26,5},{13,26,18},{12,28,2},{12,27,16},{12,27,0},{13,26,18},{15,24,18},{12,27,0},{0,27,36},{15,24,18},{0,27,36},{12,0,36},{12,0,36},{12,0,36},{12,0,36},{12,27,0},{12,27,0},{12,27,0},{12,26,1},{12,26,10},{12,26,10},{13,29,88},{13,28,78},{13,28,70},{13,28,78},{13,28,115},{12,29,88},{12,28,99},{12,28,115},{11,30,67},{12,28,52},{13,29,24},
+{13,28,14},{13,28,6},{13,28,14},{13,28,51},{12,29,24},{12,28,35},{12,28,51},{11,30,51},{12,28,51},{13,28,69},{13,28,69},{13,28,69},{13,27,72},{13,27,72},{13,27,72},{13,27,72},{13,27,72},{12,28,11},{12,27,24},{13,28,5},{13,28,5},{13,28,5},{13,27,8},{13,27,8},{13,27,8},{13,27,8},{13,27,8},{13,27,8},{13,27,8},{15,22,18},{13,28,10},{13,28,2},
+{12,28,10},{15,22,18},{13,28,18},{12,28,10},{0,28,50},{13,28,18},{0,28,50},{13,0,68},{13,0,68},{13,0,68},{13,0,68},{13,26,65},{13,26,65},{13,26,65},{12,28,65},{12,28,2},{12,28,2},{13,31,76},{13,30,58},{13,29,75},{13,29,51},{13,31,52},{13,30,25},{13,29,3},{13,29,46},{12,31,68},{12,29,70},{13,31,75},{13,30,57},{13,29,74},{13,29,50},{13,31,51},
+{13,30,24},{13,29,2},{12,29,45},{15,27,51},{12,29,45},{13,31,51},{13,31,51},{13,31,51},{13,29,51},{13,30,9},{13,29,3},{13,29,3},{13,28,9},{12,30,12},{13,28,12},{13,31,50},{13,31,50},{13,31,50},{13,29,50},{13,30,8},{13,29,2},{13,29,2},{13,28,8},{12,30,8},{13,28,8},{15,25,18},{13,30,8},{13,29,25},{13,29,1},{15,25,18},{12,31,18},{13,29,1},
+{0,29,36},{12,31,18},{0,29,36},{13,0,50},{13,0,50},{13,0,50},{13,0,50},{13,29,2},{13,29,2},{13,29,2},{13,28,5},{13,28,8},{13,28,8},{14,31,88},{14,30,78},{14,30,69},{14,30,77},{14,30,115},{14,30,107},{14,30,98},{13,30,117},{13,31,60},{13,30,36},{14,31,24},{14,30,14},{14,30,5},{14,30,13},{15,27,51},{13,31,35},{14,30,34},{13,30,36},{13,31,51},
+{13,30,36},{14,30,69},{14,30,69},{14,30,69},{14,29,72},{14,29,72},{14,29,72},{14,29,72},{14,29,72},{13,30,16},{13,30,36},{14,30,5},{14,30,5},{14,30,5},{14,29,8},{15,26,8},{14,29,8},{14,29,8},{14,29,8},{14,29,8},{14,29,8},{14,31,20},{14,30,10},{14,30,1},{14,30,9},{14,31,20},{14,30,18},{14,30,9},{0,30,36},{14,30,18},{0,30,36},{14,0,68},
+{14,0,68},{14,0,68},{14,0,68},{14,28,65},{14,28,65},{14,28,65},{14,29,68},{13,30,0},{13,30,0},{14,31,152},{14,31,88},{14,31,52},{14,31,36},{14,31,116},{14,31,36},{14,31,0},{14,31,52},{14,31,88},{13,31,70},{15,30,118},{14,31,88},{14,31,52},{14,31,36},{15,29,52},{14,31,36},{14,31,0},{13,31,45},{15,30,51},{13,31,45},{14,31,52},{14,31,52},{14,31,52},
+{14,31,36},{14,31,16},{14,31,0},{14,31,0},{14,30,5},{14,30,35},{14,30,14},{14,31,52},{14,31,52},{14,31,52},{14,31,36},{15,28,10},{14,31,0},{14,31,0},{14,30,5},{15,29,16},{14,30,5},{15,30,18},{15,30,34},{14,31,16},{14,31,0},{15,30,18},{15,30,26},{14,31,0},{0,31,36},{15,30,26},{0,31,36},{14,0,36},{14,0,36},{14,0,36},{14,0,36},{14,31,0},
+{14,31,0},{14,31,0},{14,30,1},{14,30,10},{14,30,10},{15,31,68},{15,31,68},{15,31,68},{15,31,68},{15,31,68},{15,31,68},{15,31,68},{15,31,68},{15,31,68},{14,31,20},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,68},{15,31,68},{15,31,68},{15,31,68},{15,31,68},{15,31,68},{15,31,68},
+{15,31,68},{14,31,56},{14,31,20},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{15,0,68},{15,0,68},{15,0,68},{15,0,68},{15,30,65},{15,30,65},{15,30,65},{15,31,68},{14,31,20},
+{14,31,20},{0,4,74},{0,3,20},{0,2,2},{0,2,26},{0,2,158},{0,2,110},{0,2,62},{0,1,115},{0,1,178},{0,1,124},{0,4,74},{0,3,20},{0,2,2},{0,2,26},{0,2,158},{0,2,110},{0,2,62},{0,1,115},{1,0,158},{0,1,115},{0,2,1},{0,2,1},{0,2,1},{0,1,0},{0,1,13},{0,1,9},{0,1,9},{0,0,25},{0,0,25},{0,0,25},{0,2,1},
+{0,2,1},{0,2,1},{0,1,0},{0,1,13},{0,1,9},{0,1,9},{0,0,25},{0,0,25},{0,0,25},{1,0,74},{0,3,20},{0,2,2},{0,2,26},{1,0,74},{1,1,72},{0,2,26},{0,1,90},{1,1,72},{0,1,90},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,6,83},{0,5,13},{0,3,26},
+{0,3,14},{0,5,248},{0,3,140},{0,3,41},{0,2,139},{0,2,319},{0,2,175},{0,6,83},{0,5,13},{0,3,26},{0,3,14},{1,1,244},{0,3,140},{0,3,41},{0,2,139},{0,3,248},{0,2,139},{0,4,10},{0,4,10},{0,4,10},{0,3,13},{0,3,52},{0,2,18},{0,2,18},{0,1,29},{0,1,77},{0,1,38},{0,4,10},{0,4,10},{0,4,10},{0,3,13},{0,3,52},
+{0,2,18},{0,2,18},{0,1,29},{1,0,58},{0,1,29},{1,3,72},{0,5,4},{0,3,17},{0,3,5},{1,3,72},{3,0,74},{0,3,5},{0,2,90},{3,0,74},{0,2,90},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,1,0},{0,1,0},{0,1,0},{0,1,4},{0,0,9},{0,0,9},{0,9,193},{0,7,125},{0,4,202},{0,4,122},{0,7,244},{0,5,96},{0,4,2},
+{0,3,106},{0,4,395},{0,3,187},{1,6,99},{1,5,45},{1,4,26},{1,4,50},{1,4,243},{0,5,96},{0,4,2},{0,3,106},{2,2,243},{0,3,106},{0,7,121},{0,7,121},{0,7,121},{0,4,122},{0,5,52},{0,4,2},{0,4,2},{0,3,25},{0,3,133},{0,2,62},{1,4,26},{1,4,26},{1,4,26},{1,3,25},{1,2,50},{0,4,2},{0,4,2},{0,3,25},{1,2,50},
+{0,3,25},{0,9,72},{0,7,4},{1,4,1},{0,4,1},{0,9,72},{2,3,72},{0,4,1},{0,3,90},{2,3,72},{0,3,90},{0,0,121},{0,0,121},{0,0,121},{0,0,121},{0,4,1},{0,4,1},{0,4,1},{0,2,1},{0,2,37},{0,2,37},{1,8,164},{1,7,94},{1,5,106},{1,5,94},{0,10,292},{0,7,125},{0,5,81},{0,4,130},{0,6,364},{0,4,106},{1,8,83},
+{1,7,13},{1,5,25},{1,5,13},{3,0,243},{0,7,76},{0,5,32},{0,4,81},{5,0,243},{0,4,81},{1,6,91},{1,6,91},{1,6,91},{1,5,94},{0,8,99},{0,6,51},{0,6,51},{0,4,66},{0,4,107},{0,4,42},{1,6,10},{1,6,10},{1,6,10},{1,5,13},{0,8,50},{0,6,2},{0,6,2},{0,4,17},{4,0,50},{0,4,17},{3,2,74},{1,7,4},{1,5,16},
+{1,5,4},{3,2,74},{5,1,72},{1,5,4},{0,4,80},{5,1,72},{0,4,80},{1,0,90},{1,0,90},{1,0,90},{1,0,90},{0,6,50},{0,6,50},{0,6,50},{0,4,50},{0,3,20},{0,3,20},{1,11,218},{1,9,149},{1,7,242},{1,6,149},{1,10,248},{1,7,99},{1,6,5},{1,5,99},{0,7,308},{0,5,100},{2,8,99},{2,7,45},{2,6,27},{2,6,51},{3,3,243},
+{0,9,81},{1,6,5},{0,5,99},{4,3,243},{0,5,99},{1,9,145},{1,9,145},{1,9,145},{1,6,148},{1,8,52},{1,6,4},{1,6,4},{1,5,18},{0,6,72},{0,5,19},{2,6,26},{2,6,26},{2,6,26},{2,5,25},{3,1,50},{1,6,4},{1,6,4},{0,5,18},{3,3,50},{0,5,18},{4,1,72},{1,9,5},{2,6,2},{1,6,5},{4,1,72},{3,5,72},{1,6,5},
+{0,5,90},{3,5,72},{0,5,90},{1,0,144},{1,0,144},{1,0,144},{1,0,144},{1,6,0},{1,6,0},{1,6,0},{1,4,4},{0,6,8},{0,6,8},{2,10,164},{2,9,94},{2,7,107},{2,7,95},{1,12,307},{1,9,137},{1,7,115},{1,6,154},{0,9,253},{1,6,106},{2,10,83},{2,9,13},{2,7,26},{2,7,14},{4,2,243},{1,9,73},{2,7,41},{1,6,90},{6,2,243},
+{1,6,90},{2,8,91},{2,8,91},{2,8,91},{2,7,94},{1,10,116},{1,8,69},{1,8,69},{1,6,73},{0,8,50},{1,6,25},{2,8,10},{2,8,10},{2,8,10},{2,7,13},{4,0,50},{1,8,5},{1,8,5},{1,6,9},{1,7,50},{1,6,9},{3,7,72},{2,9,4},{2,7,17},{2,7,5},{3,7,72},{8,0,72},{2,7,5},{0,6,90},{8,0,72},{0,6,90},{2,0,90},
+{2,0,90},{2,0,90},{2,0,90},{1,9,65},{1,9,65},{1,9,65},{1,6,64},{0,7,10},{0,7,10},{2,13,194},{2,11,126},{2,8,203},{2,8,123},{2,11,245},{2,9,97},{2,8,3},{2,7,107},{0,11,249},{1,7,100},{3,10,99},{3,9,45},{3,8,26},{3,8,50},{3,8,243},{1,11,81},{2,8,2},{1,7,99},{4,6,243},{1,7,99},{2,11,122},{2,11,122},{2,11,122},
+{2,8,123},{2,9,53},{2,8,3},{2,8,3},{2,7,26},{1,8,62},{1,7,19},{3,8,26},{3,8,26},{3,8,26},{3,7,25},{3,6,50},{2,8,2},{2,8,2},{1,7,18},{3,6,50},{1,7,18},{5,3,72},{2,11,4},{3,8,1},{2,8,1},{5,3,72},{4,7,72},{2,8,1},{0,7,90},{4,7,72},{0,7,90},{2,0,122},{2,0,122},{2,0,122},{2,0,122},{2,8,2},
+{2,8,2},{2,8,2},{2,6,2},{1,7,10},{1,7,10},{3,12,164},{3,11,94},{3,9,106},{3,9,94},{2,14,292},{2,11,125},{2,9,81},{2,8,130},{1,11,253},{2,8,106},{3,12,83},{3,11,13},{3,9,25},{3,9,13},{5,4,243},{2,11,76},{2,9,32},{2,8,81},{9,1,243},{2,8,81},{3,10,91},{3,10,91},{3,10,91},{3,9,94},{2,12,99},{2,10,51},{2,10,51},
+{2,8,66},{1,10,50},{2,8,42},{3,10,10},{3,10,10},{3,10,10},{3,9,13},{5,2,50},{2,10,2},{2,10,2},{2,8,17},{8,1,50},{2,8,17},{1,19,72},{3,11,4},{3,9,16},{3,9,4},{1,19,72},{9,2,72},{3,9,4},{0,8,80},{9,2,72},{0,8,80},{3,0,90},{3,0,90},{3,0,90},{3,0,90},{2,10,50},{2,10,50},{2,10,50},{2,8,50},{1,9,9},
+{1,9,9},{3,15,218},{3,13,149},{3,11,242},{3,10,149},{3,14,248},{3,11,99},{3,10,5},{3,9,99},{1,13,244},{2,9,100},{4,12,99},{4,11,45},{4,10,27},{4,10,51},{5,7,243},{2,13,81},{3,10,5},{2,9,99},{6,7,243},{2,9,99},{3,13,145},{3,13,145},{3,13,145},{3,10,148},{3,12,52},{3,10,4},{3,10,4},{3,9,18},{1,12,56},{2,9,19},{4,10,26},
+{4,10,26},{4,10,26},{4,9,25},{5,5,50},{3,10,4},{3,10,4},{2,9,18},{5,7,50},{2,9,18},{7,2,72},{3,13,5},{4,10,2},{3,10,5},{7,2,72},{12,0,72},{3,10,5},{0,9,90},{12,0,72},{0,9,90},{3,0,144},{3,0,144},{3,0,144},{3,0,144},{3,10,0},{3,10,0},{3,10,0},{3,8,4},{2,10,8},{2,10,8},{4,14,164},{4,13,94},{4,11,107},
+{4,11,95},{3,16,307},{3,13,137},{3,11,115},{3,10,154},{2,13,253},{3,10,106},{4,14,83},{4,13,13},{4,11,26},{4,11,14},{7,3,243},{3,13,73},{4,11,41},{3,10,90},{11,2,243},{3,10,90},{4,12,91},{4,12,91},{4,12,91},{4,11,94},{3,14,116},{3,12,69},{3,12,69},{3,10,73},{2,12,50},{3,10,25},{4,12,10},{4,12,10},{4,12,10},{4,11,13},{7,1,50},
+{3,12,5},{3,12,5},{3,10,9},{10,2,50},{3,10,9},{5,11,72},{4,13,4},{4,11,17},{4,11,5},{5,11,72},{11,3,72},{4,11,5},{0,10,90},{11,3,72},{0,10,90},{4,0,90},{4,0,90},{4,0,90},{4,0,90},{3,13,65},{3,13,65},{3,13,65},{3,10,64},{2,11,10},{2,11,10},{4,17,194},{4,15,126},{4,12,203},{4,12,123},{4,15,245},{4,13,97},{4,12,3},
+{4,11,107},{2,15,249},{3,11,100},{5,14,99},{5,13,45},{5,12,26},{5,12,50},{5,12,243},{3,15,81},{4,12,2},{3,11,99},{13,1,243},{3,11,99},{4,15,122},{4,15,122},{4,15,122},{4,12,123},{4,13,53},{4,12,3},{4,12,3},{4,11,26},{3,12,62},{3,11,19},{5,12,26},{5,12,26},{5,12,26},{5,11,25},{5,10,50},{4,12,2},{4,12,2},{3,11,18},{12,1,50},
+{3,11,18},{8,2,72},{4,15,4},{5,12,1},{4,12,1},{8,2,72},{13,2,72},{4,12,1},{0,11,90},{13,2,72},{0,11,90},{4,0,122},{4,0,122},{4,0,122},{4,0,122},{4,12,2},{4,12,2},{4,12,2},{4,10,2},{3,11,10},{3,11,10},{5,16,164},{5,15,94},{5,13,106},{5,13,94},{4,18,292},{4,15,125},{4,13,81},{4,12,130},{3,15,253},{4,12,106},{5,16,83},
+{5,15,13},{5,13,25},{5,13,13},{8,3,243},{4,15,76},{4,13,32},{4,12,81},{11,5,243},{4,12,81},{5,14,91},{5,14,91},{5,14,91},{5,13,94},{4,16,99},{4,14,51},{4,14,51},{4,12,66},{3,14,50},{4,12,42},{5,14,10},{5,14,10},{5,14,10},{5,13,13},{8,1,50},{4,14,2},{4,14,2},{4,12,17},{10,5,50},{4,12,17},{3,23,72},{5,15,4},{5,13,16},
+{5,13,4},{3,23,72},{11,6,72},{5,13,4},{0,12,80},{11,6,72},{0,12,80},{5,0,90},{5,0,90},{5,0,90},{5,0,90},{4,14,50},{4,14,50},{4,14,50},{4,12,50},{3,13,9},{3,13,9},{5,19,218},{5,17,149},{5,15,242},{5,14,149},{5,18,248},{5,15,99},{5,14,5},{5,13,99},{3,17,244},{4,13,100},{6,16,99},{6,15,45},{6,14,27},{6,14,51},{8,6,243},
+{4,17,81},{5,14,5},{4,13,99},{15,2,243},{4,13,99},{5,17,145},{5,17,145},{5,17,145},{5,14,148},{5,16,52},{5,14,4},{5,14,4},{5,13,18},{3,16,56},{4,13,19},{6,14,26},{6,14,26},{6,14,26},{6,13,25},{8,4,50},{5,14,4},{5,14,4},{4,13,18},{14,2,50},{4,13,18},{3,26,72},{5,17,5},{6,14,2},{5,14,5},{3,26,72},{15,3,72},{5,14,5},
+{0,13,90},{15,3,72},{0,13,90},{5,0,144},{5,0,144},{5,0,144},{5,0,144},{5,14,0},{5,14,0},{5,14,0},{5,12,4},{4,14,8},{4,14,8},{6,18,164},{6,17,94},{6,15,107},{6,15,95},{5,20,307},{5,17,137},{5,15,115},{5,14,154},{4,17,253},{5,14,106},{6,18,83},{6,17,13},{6,15,26},{6,15,14},{5,20,243},{5,17,73},{6,15,41},{5,14,90},{13,6,243},
+{5,14,90},{6,16,91},{6,16,91},{6,16,91},{6,15,94},{5,18,116},{5,16,69},{5,16,69},{5,14,73},{4,16,50},{5,14,25},{6,16,10},{6,16,10},{6,16,10},{6,15,13},{3,25,50},{5,16,5},{5,16,5},{5,14,9},{12,6,50},{5,14,9},{11,0,72},{6,17,4},{6,15,17},{6,15,5},{11,0,72},{13,7,72},{6,15,5},{0,14,90},{13,7,72},{0,14,90},{6,0,90},
+{6,0,90},{6,0,90},{6,0,90},{5,17,65},{5,17,65},{5,17,65},{5,14,64},{4,15,10},{4,15,10},{6,21,194},{6,19,126},{6,16,203},{6,16,123},{6,19,245},{6,17,97},{6,16,3},{6,15,107},{4,19,249},{5,15,100},{7,18,99},{7,17,45},{7,16,26},{7,16,50},{11,1,243},{5,19,81},{6,16,2},{5,15,99},{15,5,243},{5,15,99},{6,19,122},{6,19,122},{6,19,122},
+{6,16,123},{6,17,53},{6,16,3},{6,16,3},{6,15,26},{5,16,62},{5,15,19},{7,16,26},{7,16,26},{7,16,26},{7,15,25},{9,6,50},{6,16,2},{6,16,2},{5,15,18},{14,5,50},{5,15,18},{10,6,72},{6,19,4},{7,16,1},{6,16,1},{10,6,72},{15,6,72},{6,16,1},{0,15,90},{15,6,72},{0,15,90},{6,0,122},{6,0,122},{6,0,122},{6,0,122},{6,16,2},
+{6,16,2},{6,16,2},{6,14,2},{5,15,10},{5,15,10},{7,20,164},{7,19,94},{7,17,106},{7,17,94},{6,22,292},{6,19,125},{6,17,81},{6,16,130},{5,19,253},{6,16,106},{7,20,83},{7,19,13},{7,17,25},{7,17,13},{10,7,243},{6,19,76},{6,17,32},{6,16,81},{13,9,243},{6,16,81},{7,18,91},{7,18,91},{7,18,91},{7,17,94},{6,20,99},{6,18,51},{6,18,51},
+{6,16,66},{5,18,50},{6,16,42},{7,18,10},{7,18,10},{7,18,10},{7,17,13},{10,5,50},{6,18,2},{6,18,2},{6,16,17},{12,9,50},{6,16,17},{12,2,72},{7,19,4},{7,17,16},{7,17,4},{12,2,72},{13,10,72},{7,17,4},{0,16,80},{13,10,72},{0,16,80},{7,0,90},{7,0,90},{7,0,90},{7,0,90},{6,18,50},{6,18,50},{6,18,50},{6,16,50},{5,17,9},
+{5,17,9},{7,23,218},{7,21,149},{7,19,242},{7,18,149},{7,22,248},{7,19,99},{7,18,5},{7,17,99},{5,21,244},{6,17,100},{7,23,218},{7,21,149},{8,18,206},{7,18,149},{13,0,243},{6,21,81},{7,18,5},{6,17,99},{5,21,243},{6,17,99},{7,21,145},{7,21,145},{7,21,145},{7,18,148},{7,20,52},{7,18,4},{7,18,4},{7,17,18},{5,20,56},{6,17,19},{7,21,145},
+{7,21,145},{7,21,145},{7,18,148},{10,8,50},{7,18,4},{7,18,4},{6,17,18},{11,12,50},{6,17,18},{9,15,72},{7,21,5},{8,18,37},{7,18,5},{9,15,72},{5,22,72},{7,18,5},{0,17,90},{5,22,72},{0,17,90},{7,0,144},{7,0,144},{7,0,144},{7,0,144},{7,18,0},{7,18,0},{7,18,0},{7,16,4},{6,18,8},{6,18,8},{8,21,388},{8,20,334},{8,19,316},
+{8,19,340},{7,24,307},{7,21,137},{7,19,115},{7,18,154},{6,21,253},{7,18,106},{8,21,99},{8,20,45},{8,19,27},{8,19,51},{8,19,243},{7,21,73},{7,19,51},{7,18,90},{15,10,243},{7,18,90},{8,19,315},{8,19,315},{8,19,315},{8,18,314},{7,22,116},{7,20,69},{7,20,69},{7,18,73},{6,20,50},{7,18,25},{8,19,26},{8,19,26},{8,19,26},{8,18,25},{9,14,50},
+{7,20,5},{7,20,5},{7,18,9},{14,10,50},{7,18,9},{13,4,72},{8,20,20},{8,19,2},{7,19,26},{13,4,72},{15,11,72},{7,19,26},{0,18,90},{15,11,72},{0,18,90},{8,0,314},{8,0,314},{8,0,314},{8,0,314},{7,21,65},{7,21,65},{7,21,65},{7,18,64},{6,19,10},{6,19,10},{8,23,164},{8,22,94},{8,20,106},{8,20,94},{8,22,329},{8,20,221},{8,20,121},
+{8,19,220},{6,23,249},{7,19,100},{8,23,83},{8,22,13},{8,20,25},{8,20,13},{13,5,243},{7,23,81},{8,20,40},{7,19,99},{12,15,243},{7,19,99},{8,21,91},{8,21,91},{8,21,91},{8,20,94},{8,20,133},{8,19,99},{8,19,99},{8,18,110},{7,20,62},{7,19,19},{8,21,10},{8,21,10},{8,21,10},{8,20,13},{11,10,50},{8,19,18},{8,19,18},{7,19,18},{5,23,50},
+{7,19,18},{15,0,72},{8,22,4},{8,20,16},{8,20,4},{15,0,72},{12,16,72},{8,20,4},{0,19,90},{12,16,72},{0,19,90},{8,0,90},{8,0,90},{8,0,90},{8,0,90},{8,18,81},{8,18,81},{8,18,81},{8,18,85},{7,19,10},{7,19,10},{8,26,194},{8,24,131},{8,21,203},{8,21,123},{8,24,245},{8,22,97},{8,21,3},{8,20,97},{7,23,253},{7,20,141},{9,23,99},
+{9,22,45},{9,21,26},{9,21,50},{15,1,243},{8,22,96},{8,21,2},{8,20,96},{15,13,243},{8,20,96},{8,24,122},{8,24,122},{8,24,122},{8,21,123},{8,22,53},{8,21,3},{8,21,3},{8,19,27},{7,22,50},{8,19,75},{9,21,26},{9,21,26},{9,21,26},{9,20,25},{12,9,50},{8,21,2},{8,21,2},{8,19,26},{14,13,50},{8,19,26},{14,6,72},{8,24,9},{9,21,1},
+{8,21,1},{14,6,72},{15,14,72},{8,21,1},{0,20,80},{15,14,72},{0,20,80},{8,0,122},{8,0,122},{8,0,122},{8,0,122},{8,21,2},{8,21,2},{8,21,2},{8,19,2},{7,21,9},{7,21,9},{9,26,154},{9,24,81},{9,22,106},{9,22,82},{9,24,307},{8,24,137},{9,22,91},{8,21,154},{7,25,244},{8,21,106},{9,26,90},{9,24,17},{9,22,42},{9,22,18},{15,4,243},
+{8,24,73},{9,22,27},{8,21,90},{9,22,243},{8,21,90},{9,24,81},{9,24,81},{9,24,81},{9,22,81},{9,22,114},{8,23,68},{8,23,68},{8,21,73},{7,24,56},{8,21,25},{9,24,17},{9,24,17},{9,24,17},{9,22,17},{15,2,50},{8,23,4},{8,23,4},{8,21,9},{13,16,50},{8,21,9},{11,19,72},{9,24,1},{9,22,26},{9,22,2},{11,19,72},{9,23,72},{9,22,2},
+{0,21,90},{9,23,72},{0,21,90},{9,0,80},{9,0,80},{9,0,80},{9,0,80},{9,20,65},{9,20,65},{9,20,65},{8,21,64},{8,21,16},{8,21,16},{9,28,216},{9,26,149},{9,23,245},{9,23,149},{9,27,248},{9,24,89},{9,23,5},{9,22,99},{7,27,260},{8,22,100},{10,25,99},{10,24,45},{10,23,27},{10,23,51},{10,23,243},{8,26,81},{9,23,5},{8,22,99},{11,21,243},
+{8,22,99},{9,26,145},{9,26,145},{9,26,145},{9,23,148},{9,25,52},{9,23,4},{9,23,4},{9,22,18},{8,23,72},{8,22,19},{10,23,26},{10,23,26},{10,23,26},{10,22,25},{11,18,50},{9,23,4},{9,23,4},{8,22,18},{10,21,50},{8,22,18},{15,8,72},{9,26,5},{10,23,2},{9,23,5},{15,8,72},{11,22,72},{9,23,5},{0,22,90},{11,22,72},{0,22,90},{9,0,144},
+{9,0,144},{9,0,144},{9,0,144},{9,23,0},{9,23,0},{9,23,0},{9,21,4},{8,23,8},{8,23,8},{10,27,164},{10,26,94},{10,24,106},{10,24,94},{9,29,307},{9,26,137},{9,24,105},{9,23,154},{8,26,253},{9,23,106},{10,27,83},{10,26,13},{10,24,25},{10,24,13},{15,9,243},{9,26,73},{10,24,40},{9,23,90},{14,19,243},{9,23,90},{10,25,91},{10,25,91},{10,25,91},
+{10,24,94},{9,27,116},{9,25,69},{9,25,69},{9,23,73},{8,25,50},{9,23,25},{10,25,10},{10,25,10},{10,25,10},{10,24,13},{13,14,50},{9,25,5},{9,25,5},{9,23,9},{8,25,50},{9,23,9},{14,14,72},{10,26,4},{10,24,16},{10,24,4},{14,14,72},{14,20,72},{10,24,4},{0,23,90},{14,20,72},{0,23,90},{10,0,90},{10,0,90},{10,0,90},{10,0,90},{9,26,65},
+{9,26,65},{9,26,65},{9,23,64},{8,24,9},{8,24,9},{10,30,194},{10,28,131},{10,25,203},{10,25,123},{10,28,245},{10,26,97},{10,25,3},{10,24,97},{8,28,252},{9,24,85},{11,27,99},{11,26,45},{11,25,26},{11,25,50},{14,15,243},{9,28,80},{10,25,2},{9,24,84},{12,23,243},{9,24,84},{10,28,122},{10,28,122},{10,28,122},{10,25,123},{10,26,53},{10,25,3},{10,25,3},
+{10,23,27},{9,25,62},{9,24,21},{11,25,26},{11,25,26},{11,25,26},{11,24,25},{14,13,50},{10,25,2},{10,25,2},{9,24,20},{11,23,50},{9,24,20},{13,20,72},{10,28,9},{11,25,1},{10,25,1},{13,20,72},{13,23,74},{10,25,1},{0,24,80},{13,23,74},{0,24,80},{10,0,122},{10,0,122},{10,0,122},{10,0,122},{10,25,2},{10,25,2},{10,25,2},{10,23,2},{9,24,5},
+{9,24,5},{11,30,154},{11,28,81},{11,26,106},{11,26,82},{11,28,307},{10,28,137},{11,26,91},{10,25,154},{9,28,260},{10,25,106},{11,30,90},{11,28,17},{11,26,42},{11,26,18},{14,18,243},{10,28,73},{11,26,27},{10,25,90},{11,26,243},{10,25,90},{11,28,81},{11,28,81},{11,28,81},{11,26,81},{11,26,114},{10,27,68},{10,27,68},{10,25,73},{9,27,53},{10,25,25},{11,28,17},
+{11,28,17},{11,28,17},{11,26,17},{14,16,50},{10,27,4},{10,27,4},{10,25,9},{15,20,50},{10,25,9},{13,23,72},{11,28,1},{11,26,26},{11,26,2},{13,23,72},{11,27,72},{11,26,2},{0,25,90},{11,27,72},{0,25,90},{11,0,80},{11,0,80},{11,0,80},{11,0,80},{11,24,65},{11,24,65},{11,24,65},{10,25,64},{10,25,16},{10,25,16},{11,31,248},{11,30,149},{11,27,245},
+{11,27,149},{11,31,248},{11,28,89},{11,27,5},{11,26,99},{9,30,244},{10,26,100},{12,29,99},{12,28,45},{12,27,27},{12,27,51},{12,27,243},{10,30,81},{11,27,5},{10,26,99},{13,25,243},{10,26,99},{11,30,145},{11,30,145},{11,30,145},{11,27,148},{11,29,52},{11,27,4},{11,27,4},{11,26,18},{9,29,56},{10,26,19},{12,27,26},{12,27,26},{12,27,26},{12,26,25},{13,22,50},
+{11,27,4},{11,27,4},{10,26,18},{12,25,50},{10,26,18},{15,19,72},{11,30,5},{12,27,2},{11,27,5},{15,19,72},{13,26,72},{11,27,5},{0,26,90},{13,26,72},{0,26,90},{11,0,144},{11,0,144},{11,0,144},{11,0,144},{11,27,0},{11,27,0},{11,27,0},{11,25,4},{10,27,8},{10,27,8},{12,31,164},{12,30,94},{12,28,106},{12,28,94},{12,30,329},{11,30,137},{11,28,105},
+{11,27,154},{10,30,253},{11,27,106},{12,31,83},{12,30,13},{12,28,25},{12,28,13},{14,23,243},{11,30,73},{12,28,40},{11,27,90},{11,29,243},{11,27,90},{12,29,91},{12,29,91},{12,29,91},{12,28,94},{11,31,116},{11,29,69},{11,29,69},{11,27,73},{10,29,50},{11,27,25},{12,29,10},{12,29,10},{12,29,10},{12,28,13},{15,18,50},{11,29,5},{11,29,5},{11,27,9},{10,29,50},
+{11,27,9},{13,28,72},{12,30,4},{12,28,16},{12,28,4},{13,28,72},{11,30,72},{12,28,4},{0,27,90},{11,30,72},{0,27,90},{12,0,90},{12,0,90},{12,0,90},{12,0,90},{11,30,65},{11,30,65},{11,30,65},{11,27,64},{10,28,9},{10,28,9},{12,31,356},{12,31,140},{12,29,203},{12,29,123},{12,31,284},{12,30,97},{12,29,3},{12,28,97},{10,31,287},{11,28,85},{13,31,99},
+{13,30,45},{13,29,26},{13,29,50},{13,29,243},{12,30,96},{12,29,2},{11,28,84},{14,27,243},{11,28,84},{12,31,131},{12,31,131},{12,31,131},{12,29,123},{12,30,53},{12,29,3},{12,29,3},{12,27,27},{11,29,62},{11,28,21},{13,29,26},{13,29,26},{13,29,26},{13,28,25},{13,27,50},{12,29,2},{12,29,2},{11,28,20},{13,27,50},{11,28,20},{15,24,72},{12,31,18},{13,29,1},
+{12,29,1},{15,24,72},{15,27,74},{12,29,1},{0,28,80},{15,27,74},{0,28,80},{12,0,122},{12,0,122},{12,0,122},{12,0,122},{12,29,2},{12,29,2},{12,29,2},{12,27,2},{11,28,5},{11,28,5},{13,31,280},{13,31,120},{13,30,106},{13,30,82},{13,31,328},{13,31,200},{13,30,91},{12,29,154},{12,31,344},{12,29,106},{13,31,216},{13,31,56},{13,30,42},{13,30,18},{15,25,244},
+{13,31,136},{13,30,27},{12,29,90},{13,30,243},{12,29,90},{13,31,84},{13,31,84},{13,31,84},{13,30,81},{13,30,114},{12,31,68},{12,31,68},{12,29,73},{11,31,53},{12,29,25},{13,31,20},{13,31,20},{13,31,20},{13,30,17},{13,30,50},{12,31,4},{12,31,4},{12,29,9},{12,30,50},{12,29,9},{15,27,72},{13,31,40},{13,30,26},{13,30,2},{15,27,72},{13,31,72},{13,30,2},
+{0,29,90},{13,31,72},{0,29,90},{13,0,80},{13,0,80},{13,0,80},{13,0,80},{13,28,65},{13,28,65},{13,28,65},{12,29,64},{12,29,16},{12,29,16},{14,31,415},{14,31,351},{13,31,244},{13,31,148},{14,31,511},{13,31,173},{13,31,4},{13,30,82},{13,31,381},{12,30,83},{14,31,126},{14,31,62},{14,31,26},{14,31,50},{14,31,222},{13,31,173},{13,31,4},{12,30,82},{14,30,221},
+{12,30,82},{13,31,244},{13,31,244},{13,31,244},{13,31,148},{13,31,100},{13,31,4},{13,31,4},{13,30,18},{12,31,72},{12,30,19},{14,31,26},{14,31,26},{14,31,26},{14,30,25},{15,26,50},{13,31,4},{13,31,4},{12,30,18},{14,29,50},{12,30,18},{15,29,61},{14,31,37},{14,31,1},{13,31,4},{15,29,61},{15,30,61},{13,31,4},{0,30,73},{15,30,61},{0,30,73},{13,0,144},
+{13,0,144},{13,0,144},{13,0,144},{13,31,0},{13,31,0},{13,31,0},{13,29,4},{12,31,8},{12,31,8},{14,31,239},{14,31,175},{14,31,139},{14,31,99},{14,31,239},{14,31,135},{14,31,99},{13,31,73},{13,31,285},{13,31,25},{14,31,158},{14,31,94},{14,31,58},{14,31,18},{15,29,94},{14,31,54},{14,31,18},{13,31,9},{15,30,93},{13,31,9},{14,31,139},{14,31,139},{14,31,139},
+{14,31,99},{14,31,139},{14,31,99},{14,31,99},{13,31,73},{13,31,116},{13,31,25},{14,31,58},{14,31,58},{14,31,58},{14,31,18},{15,28,52},{14,31,18},{14,31,18},{13,31,9},{15,29,58},{13,31,9},{15,30,9},{15,31,9},{15,31,9},{14,31,9},{15,30,9},{15,31,9},{14,31,9},{0,31,9},{15,31,9},{0,31,9},{14,0,90},{14,0,90},{14,0,90},{14,0,90},{14,30,81},
+{14,30,81},{14,30,81},{13,31,64},{13,31,16},{13,31,16},{15,31,314},{14,31,258},{14,31,222},{14,31,158},{14,31,226},{14,31,98},{14,31,62},{14,31,2},{14,31,122},{14,31,50},{15,31,25},{15,31,25},{15,31,25},{15,31,25},{15,30,22},{15,31,25},{15,31,25},{14,31,1},{15,31,25},{14,31,1},{14,31,222},{14,31,222},{14,31,222},{14,31,158},{14,31,126},{14,31,62},{14,31,62},
+{14,31,2},{14,31,86},{14,31,50},{15,31,25},{15,31,25},{15,31,25},{15,31,25},{15,30,13},{15,31,25},{15,31,25},{14,31,1},{15,30,25},{14,31,1},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{14,0,122},{14,0,122},{14,0,122},{14,0,122},{14,31,26},{14,31,26},{14,31,26},{14,31,2},{14,31,50},
+{14,31,50},{0,6,202},{0,5,52},{0,3,25},{0,3,61},{0,4,442},{0,3,313},{0,3,142},{0,2,318},{0,2,498},{0,2,354},{0,6,202},{0,5,52},{0,3,25},{0,3,61},{1,1,441},{0,3,313},{0,3,142},{0,2,318},{2,0,442},{0,2,318},{0,3,0},{0,3,0},{0,3,0},{0,2,1},{0,1,45},{0,1,25},{0,1,25},{0,1,26},{0,1,50},{0,1,35},{0,3,0},
+{0,3,0},{0,3,0},{0,2,1},{0,1,45},{0,1,25},{0,1,25},{0,1,26},{0,1,41},{0,1,26},{1,3,200},{0,5,52},{0,3,25},{0,3,61},{1,3,200},{3,0,202},{0,3,61},{0,2,218},{3,0,202},{0,2,218},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,9,200},{0,7,20},{0,5,20},
+{0,4,25},{0,6,686},{0,5,433},{0,4,169},{0,3,443},{0,3,794},{0,3,524},{0,9,200},{0,7,20},{0,5,20},{0,4,25},{0,6,686},{0,5,433},{0,4,169},{0,3,443},{3,0,686},{0,3,443},{0,6,1},{0,6,1},{0,6,1},{0,3,4},{0,3,145},{0,2,85},{0,2,85},{0,2,101},{0,1,178},{0,1,115},{0,6,1},{0,6,1},{0,6,1},{0,3,4},{0,3,145},
+{0,2,85},{0,2,85},{0,2,101},{0,2,149},{0,2,101},{0,9,200},{0,7,20},{0,5,20},{0,4,25},{0,9,200},{2,3,200},{0,4,25},{0,3,218},{2,3,200},{0,3,218},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,11,257},{0,9,54},{0,6,114},{0,5,65},{0,9,728},{0,6,371},{0,5,80},
+{0,4,377},{0,5,949},{0,4,521},{0,11,257},{0,9,54},{1,5,97},{0,5,65},{2,2,723},{0,6,371},{0,5,80},{0,4,377},{1,4,723},{0,4,377},{0,8,50},{0,8,50},{0,8,50},{0,5,49},{0,5,164},{0,4,50},{0,4,50},{0,3,65},{0,3,245},{0,2,126},{0,8,50},{0,8,50},{0,8,50},{0,5,49},{1,2,162},{0,4,50},{0,4,50},{0,3,65},{1,2,162},
+{0,3,65},{3,2,202},{0,9,5},{1,5,16},{0,5,16},{3,2,202},{5,1,200},{0,5,16},{0,4,208},{5,1,200},{0,4,208},{0,0,49},{0,0,49},{0,0,49},{0,0,49},{0,2,1},{0,2,1},{0,2,1},{0,1,4},{0,1,13},{0,1,13},{0,14,425},{0,11,234},{1,7,277},{0,7,245},{0,11,724},{0,8,289},{0,6,34},{0,5,308},{0,6,1087},{0,5,533},{1,11,201},
+{1,9,18},{1,7,21},{1,6,26},{1,8,723},{0,8,289},{0,6,34},{0,5,308},{4,2,723},{0,5,308},{0,11,225},{0,11,225},{0,11,225},{0,7,229},{0,8,162},{0,6,18},{0,6,18},{0,4,25},{0,4,338},{0,3,162},{1,8,2},{1,8,2},{1,8,2},{1,5,5},{0,8,162},{0,6,18},{0,6,18},{0,4,25},{4,0,162},{0,4,25},{3,4,200},{0,11,9},{1,7,20},
+{0,7,20},{3,4,200},{7,0,200},{0,7,20},{0,5,208},{7,0,200},{0,5,208},{0,0,225},{0,0,225},{0,0,225},{0,0,225},{0,5,0},{0,5,0},{0,5,0},{0,3,0},{0,2,61},{0,2,61},{1,14,410},{1,11,209},{1,8,288},{1,7,234},{0,14,739},{0,10,254},{0,8,33},{0,6,270},{0,8,1131},{0,6,450},{1,14,266},{1,11,65},{2,7,106},{1,7,90},{3,4,723},
+{0,10,238},{0,8,17},{0,6,254},{7,0,723},{0,6,254},{1,11,209},{1,11,209},{1,11,209},{1,7,209},{0,11,178},{0,8,17},{0,8,17},{0,5,18},{0,6,376},{0,5,123},{1,11,65},{1,11,65},{1,11,65},{1,7,65},{3,1,162},{0,8,1},{0,8,1},{0,5,2},{3,3,162},{0,5,2},{3,7,200},{1,11,1},{2,7,25},{0,8,17},{3,7,200},{8,0,200},{0,8,17},
+{0,6,218},{8,0,200},{0,6,218},{1,0,208},{1,0,208},{1,0,208},{1,0,208},{0,8,16},{0,8,16},{0,8,16},{0,5,17},{0,4,80},{0,4,80},{1,16,474},{1,13,276},{1,9,457},{1,9,292},{1,13,740},{1,10,298},{1,8,45},{1,7,315},{0,9,1013},{0,7,308},{2,13,201},{2,11,21},{2,9,21},{2,8,26},{5,0,723},{0,12,227},{1,8,29},{0,7,227},{5,4,723},
+{0,7,227},{1,13,272},{1,13,272},{1,13,272},{1,9,276},{1,10,180},{1,8,29},{1,8,29},{1,6,33},{0,8,306},{0,6,41},{2,10,2},{2,10,2},{2,10,2},{2,7,5},{4,0,162},{0,10,5},{0,10,5},{0,6,5},{1,7,162},{0,6,5},{5,3,200},{1,13,4},{2,9,20},{1,9,20},{5,3,200},{4,7,200},{1,9,20},{0,7,218},{4,7,200},{0,7,218},{1,0,272},
+{1,0,272},{1,0,272},{1,0,272},{1,7,17},{1,7,17},{1,7,17},{1,5,17},{0,6,40},{0,6,40},{2,15,426},{2,13,223},{2,10,283},{2,9,234},{1,16,739},{1,12,267},{1,10,33},{1,8,273},{0,11,913},{0,8,225},{2,15,257},{2,13,54},{3,9,97},{2,9,65},{4,6,723},{0,13,208},{1,10,17},{0,8,209},{3,8,723},{0,8,209},{2,12,219},{2,12,219},{2,12,219},
+{2,9,218},{1,13,180},{1,10,17},{1,10,17},{1,7,18},{0,9,229},{0,7,27},{2,12,50},{2,12,50},{2,12,50},{2,9,49},{3,6,162},{1,10,1},{1,10,1},{1,7,2},{3,6,162},{1,7,2},{1,19,200},{2,13,5},{3,9,16},{2,9,16},{1,19,200},{9,2,200},{2,9,16},{0,8,208},{9,2,200},{0,8,208},{2,0,218},{2,0,218},{2,0,218},{2,0,218},{1,10,16},
+{1,10,16},{1,10,16},{1,7,17},{0,8,17},{0,8,17},{2,18,450},{2,15,259},{2,11,410},{2,11,270},{2,15,749},{2,12,314},{2,10,59},{2,9,333},{0,13,868},{0,9,213},{3,15,201},{3,13,18},{3,11,21},{3,10,26},{6,2,723},{0,15,204},{2,10,34},{0,9,212},{8,3,723},{0,9,212},{2,15,250},{2,15,250},{2,15,250},{2,11,254},{2,12,187},{2,10,43},{2,10,43},
+{2,8,50},{0,11,189},{1,8,37},{3,12,2},{3,12,2},{3,12,2},{3,9,5},{5,2,162},{1,12,2},{1,12,2},{1,8,1},{8,1,162},{1,8,1},{5,8,200},{2,15,9},{3,11,20},{2,11,20},{5,8,200},{4,10,200},{2,11,20},{0,9,208},{4,10,200},{0,9,208},{2,0,250},{2,0,250},{2,0,250},{2,0,250},{2,9,25},{2,9,25},{2,9,25},{2,7,25},{0,9,5},
+{0,9,5},{3,18,410},{3,15,209},{3,12,288},{3,11,234},{2,18,739},{2,14,254},{2,12,33},{2,10,270},{0,15,804},{1,10,227},{3,18,266},{3,15,65},{4,11,106},{3,11,90},{5,8,723},{1,15,219},{2,12,17},{1,10,218},{4,10,723},{1,10,218},{3,15,209},{3,15,209},{3,15,209},{3,11,209},{2,15,178},{2,12,17},{2,12,17},{2,9,18},{0,13,171},{1,9,26},{3,15,65},
+{3,15,65},{3,15,65},{3,11,65},{5,5,162},{2,12,1},{2,12,1},{2,9,2},{5,7,162},{2,9,2},{5,11,200},{3,15,1},{4,11,25},{2,12,17},{5,11,200},{11,3,200},{2,12,17},{0,10,218},{11,3,200},{0,10,218},{3,0,208},{3,0,208},{3,0,208},{3,0,208},{2,12,16},{2,12,16},{2,12,16},{2,9,17},{1,10,9},{1,10,9},{3,20,474},{3,17,276},{3,13,457},
+{3,13,292},{3,17,740},{3,14,298},{3,12,45},{3,11,315},{0,16,747},{1,11,231},{4,17,201},{4,15,21},{4,13,21},{4,12,26},{7,4,723},{1,17,209},{3,12,29},{1,11,227},{9,5,723},{1,11,227},{3,17,272},{3,17,272},{3,17,272},{3,13,276},{3,14,180},{3,12,29},{3,12,29},{3,10,33},{0,15,171},{2,10,41},{4,14,2},{4,14,2},{4,14,2},{4,11,5},{7,1,162},
+{2,14,5},{2,14,5},{2,10,5},{10,2,162},{2,10,5},{8,2,200},{3,17,4},{4,13,20},{3,13,20},{8,2,200},{13,2,200},{3,13,20},{0,11,218},{13,2,200},{0,11,218},{3,0,272},{3,0,272},{3,0,272},{3,0,272},{3,11,17},{3,11,17},{3,11,17},{3,9,17},{1,12,8},{1,12,8},{4,19,426},{4,17,223},{4,14,283},{4,13,234},{3,20,739},{3,16,267},{3,14,33},
+{3,12,273},{0,18,727},{2,12,225},{4,19,257},{4,17,54},{5,13,97},{4,13,65},{6,10,723},{2,17,208},{3,14,17},{2,12,209},{12,3,723},{2,12,209},{4,16,219},{4,16,219},{4,16,219},{4,13,218},{3,17,180},{3,14,17},{3,14,17},{3,11,18},{1,15,171},{2,11,27},{4,16,50},{4,16,50},{4,16,50},{4,13,49},{5,10,162},{3,14,1},{3,14,1},{3,11,2},{12,1,162},
+{3,11,2},{3,23,200},{4,17,5},{5,13,16},{4,13,16},{3,23,200},{11,6,200},{4,13,16},{0,12,208},{11,6,200},{0,12,208},{4,0,218},{4,0,218},{4,0,218},{4,0,218},{3,14,16},{3,14,16},{3,14,16},{3,11,17},{1,13,10},{1,13,10},{4,22,450},{4,19,259},{4,15,410},{4,15,270},{4,19,749},{4,16,314},{4,14,59},{4,13,333},{1,18,747},{2,13,213},{5,19,201},
+{5,17,18},{5,15,21},{5,14,26},{9,1,723},{2,19,204},{4,14,34},{2,13,212},{15,1,723},{2,13,212},{4,19,250},{4,19,250},{4,19,250},{4,15,254},{4,16,187},{4,14,43},{4,14,43},{4,12,50},{1,16,174},{3,12,37},{5,16,2},{5,16,2},{5,16,2},{5,13,5},{8,1,162},{3,16,2},{3,16,2},{3,12,1},{10,5,162},{3,12,1},{9,4,200},{4,19,9},{5,15,20},
+{4,15,20},{9,4,200},{9,10,200},{4,15,20},{0,13,208},{9,10,200},{0,13,208},{4,0,250},{4,0,250},{4,0,250},{4,0,250},{4,13,25},{4,13,25},{4,13,25},{4,11,25},{2,13,5},{2,13,5},{5,22,410},{5,19,209},{5,16,288},{5,15,234},{4,22,739},{4,18,254},{4,16,33},{4,14,270},{1,20,724},{3,14,227},{5,22,266},{5,19,65},{6,15,106},{5,15,90},{9,4,723},
+{3,19,219},{4,16,17},{3,14,218},{9,10,723},{3,14,218},{5,19,209},{5,19,209},{5,19,209},{5,15,209},{4,19,178},{4,16,17},{4,16,17},{4,13,18},{2,17,171},{3,13,26},{5,19,65},{5,19,65},{5,19,65},{5,15,65},{8,4,162},{4,16,1},{4,16,1},{4,13,2},{14,2,162},{4,13,2},{11,0,200},{5,19,1},{6,15,25},{4,16,17},{11,0,200},{13,7,200},{4,16,17},
+{0,14,218},{13,7,200},{0,14,218},{5,0,208},{5,0,208},{5,0,208},{5,0,208},{4,16,16},{4,16,16},{4,16,16},{4,13,17},{3,14,9},{3,14,9},{5,24,474},{5,21,276},{5,17,457},{5,17,292},{5,21,740},{5,18,298},{5,16,45},{5,15,315},{1,22,740},{3,15,231},{6,21,201},{6,19,21},{6,17,21},{6,16,26},{10,3,723},{3,21,209},{5,16,29},{3,15,227},{11,9,723},
+{3,15,227},{5,21,272},{5,21,272},{5,21,272},{5,17,276},{5,18,180},{5,16,29},{5,16,29},{5,14,33},{2,19,171},{4,14,41},{6,18,2},{6,18,2},{6,18,2},{6,15,5},{3,25,162},{4,18,5},{4,18,5},{4,14,5},{12,6,162},{4,14,5},{10,6,200},{5,21,4},{6,17,20},{5,17,20},{10,6,200},{15,6,200},{5,17,20},{0,15,218},{15,6,200},{0,15,218},{5,0,272},
+{5,0,272},{5,0,272},{5,0,272},{5,15,17},{5,15,17},{5,15,17},{5,13,17},{3,16,8},{3,16,8},{6,23,426},{6,21,223},{6,18,283},{6,17,234},{5,24,739},{5,20,267},{5,18,33},{5,16,273},{2,22,727},{4,16,225},{6,23,257},{6,21,54},{7,17,97},{6,17,65},{9,9,723},{4,21,208},{5,18,17},{4,16,209},{14,7,723},{4,16,209},{6,20,219},{6,20,219},{6,20,219},
+{6,17,218},{5,21,180},{5,18,17},{5,18,17},{5,15,18},{3,19,171},{4,15,27},{6,20,50},{6,20,50},{6,20,50},{6,17,49},{9,6,162},{5,18,1},{5,18,1},{5,15,2},{14,5,162},{5,15,2},{12,2,200},{6,21,5},{7,17,16},{6,17,16},{12,2,200},{13,10,200},{6,17,16},{0,16,208},{13,10,200},{0,16,208},{6,0,218},{6,0,218},{6,0,218},{6,0,218},{5,18,16},
+{5,18,16},{5,18,16},{5,15,17},{3,17,10},{3,17,10},{6,26,450},{6,23,259},{6,19,410},{6,19,270},{6,23,749},{6,20,314},{6,18,59},{6,17,333},{3,22,747},{4,17,213},{7,23,201},{7,21,18},{7,19,21},{7,18,26},{11,5,723},{4,23,204},{6,18,34},{4,17,212},{12,11,723},{4,17,212},{6,23,250},{6,23,250},{6,23,250},{6,19,254},{6,20,187},{6,18,43},{6,18,43},
+{6,16,50},{3,20,174},{5,16,37},{7,20,2},{7,20,2},{7,20,2},{7,17,5},{10,5,162},{5,20,2},{5,20,2},{5,16,1},{12,9,162},{5,16,1},{11,8,200},{6,23,9},{7,19,20},{6,19,20},{11,8,200},{11,14,200},{6,19,20},{0,17,208},{11,14,200},{0,17,208},{6,0,250},{6,0,250},{6,0,250},{6,0,250},{6,17,25},{6,17,25},{6,17,25},{6,15,25},{4,17,5},
+{4,17,5},{7,26,410},{7,23,209},{7,20,288},{7,19,234},{6,26,739},{6,22,254},{6,20,33},{6,18,270},{3,24,724},{5,18,227},{7,26,266},{7,23,65},{7,20,144},{7,19,90},{11,8,723},{5,23,219},{6,20,17},{5,18,218},{11,14,723},{5,18,218},{7,23,209},{7,23,209},{7,23,209},{7,19,209},{6,23,178},{6,20,17},{6,20,17},{6,17,18},{4,21,171},{5,17,26},{7,23,65},
+{7,23,65},{7,23,65},{7,19,65},{10,8,162},{6,20,1},{6,20,1},{6,17,2},{11,12,162},{6,17,2},{13,4,200},{7,23,1},{8,19,50},{6,20,17},{13,4,200},{15,11,200},{6,20,17},{0,18,218},{15,11,200},{0,18,218},{7,0,208},{7,0,208},{7,0,208},{7,0,208},{6,20,16},{6,20,16},{6,20,16},{6,17,17},{5,18,9},{5,18,9},{7,28,474},{7,25,276},{7,21,457},
+{7,21,292},{7,25,740},{7,22,298},{7,20,45},{7,19,315},{3,26,740},{5,19,231},{8,23,283},{8,22,133},{8,20,97},{8,20,133},{12,7,723},{5,25,209},{7,20,29},{5,19,227},{13,13,723},{5,19,227},{7,25,272},{7,25,272},{7,25,272},{7,21,276},{7,22,180},{7,20,29},{7,20,29},{7,18,33},{4,23,171},{6,18,41},{8,20,81},{8,20,81},{8,20,81},{8,19,82},{9,14,162},
+{6,22,5},{6,22,5},{6,18,5},{14,10,162},{6,18,5},{15,0,200},{7,25,4},{8,20,16},{7,21,20},{15,0,200},{12,16,200},{7,21,20},{0,19,218},{12,16,200},{0,19,218},{7,0,272},{7,0,272},{7,0,272},{7,0,272},{7,19,17},{7,19,17},{7,19,17},{7,17,17},{5,20,8},{5,20,8},{8,26,642},{8,24,459},{8,22,462},{8,21,467},{7,28,739},{7,24,267},{7,22,33},
+{7,20,273},{4,26,727},{6,20,225},{8,26,201},{8,24,18},{8,22,21},{8,21,26},{14,3,723},{6,25,208},{7,22,17},{6,20,209},{11,17,723},{6,20,209},{8,23,443},{8,23,443},{8,23,443},{8,20,446},{7,25,180},{7,22,17},{7,22,17},{7,19,18},{5,23,171},{6,19,27},{8,23,2},{8,23,2},{8,23,2},{8,20,5},{11,10,162},{7,22,1},{7,22,1},{7,19,2},{5,23,162},
+{7,19,2},{14,6,200},{8,24,17},{8,22,20},{7,22,17},{14,6,200},{15,14,200},{7,22,17},{0,20,208},{15,14,200},{0,20,208},{8,0,442},{8,0,442},{8,0,442},{8,0,442},{7,22,16},{7,22,16},{7,22,16},{7,19,17},{5,21,10},{5,21,10},{8,28,420},{8,26,223},{8,23,283},{8,22,234},{7,31,872},{7,26,371},{7,23,201},{7,21,368},{5,26,747},{6,21,213},{8,28,251},
+{8,26,54},{9,22,97},{8,22,65},{13,9,723},{6,27,204},{7,23,57},{6,21,212},{14,15,723},{6,21,212},{8,25,219},{8,25,219},{8,25,219},{8,22,218},{7,27,308},{7,24,146},{7,24,146},{7,20,145},{5,24,174},{7,20,37},{8,25,50},{8,25,50},{8,25,50},{8,22,49},{12,9,162},{7,24,2},{7,24,2},{7,20,1},{14,13,162},{7,20,1},{13,12,200},{8,26,5},{9,22,16},
+{8,22,16},{13,12,200},{13,18,200},{8,22,16},{0,21,208},{13,18,200},{0,21,208},{8,0,218},{8,0,218},{8,0,218},{8,0,218},{7,24,145},{7,24,145},{7,24,145},{7,20,145},{6,21,5},{6,21,5},{8,31,474},{8,28,276},{9,24,425},{8,24,292},{8,28,740},{8,25,298},{8,23,61},{8,22,315},{5,28,724},{7,22,227},{9,28,200},{9,26,13},{9,24,25},{9,23,29},{13,12,723},
+{7,27,219},{8,23,45},{7,22,218},{13,18,723},{7,22,218},{8,28,272},{8,28,272},{8,28,272},{8,24,276},{8,25,180},{8,23,36},{8,23,36},{8,21,33},{6,25,171},{7,21,26},{9,25,0},{9,25,0},{9,25,0},{9,23,4},{15,2,162},{8,23,20},{8,23,20},{8,21,17},{13,16,162},{8,21,17},{15,8,200},{8,28,4},{9,24,25},{8,24,20},{15,8,200},{11,22,200},{8,24,20},
+{0,22,218},{11,22,200},{0,22,218},{8,0,272},{8,0,272},{8,0,272},{8,0,272},{8,22,17},{8,22,17},{8,22,17},{8,20,17},{7,22,9},{7,22,9},{9,31,410},{9,28,212},{9,25,288},{9,24,224},{8,31,739},{8,27,254},{8,25,33},{8,23,270},{5,30,740},{7,23,231},{9,31,266},{9,28,68},{10,24,97},{9,24,80},{14,11,723},{7,29,209},{8,25,17},{7,23,227},{15,17,723},
+{7,23,227},{9,27,212},{9,27,212},{9,27,212},{9,24,208},{8,28,180},{8,25,17},{8,25,17},{8,22,18},{6,27,171},{7,23,62},{9,27,68},{9,27,68},{9,27,68},{9,24,64},{11,18,162},{8,25,1},{8,25,1},{8,22,2},{10,21,162},{8,22,2},{14,14,200},{9,28,4},{10,24,16},{9,24,16},{14,14,200},{14,20,200},{9,24,16},{0,23,218},{14,20,200},{0,23,218},{9,0,208},
+{9,0,208},{9,0,208},{9,0,208},{8,25,16},{8,25,16},{8,25,16},{8,22,17},{7,24,8},{7,24,8},{9,31,570},{9,30,276},{9,26,457},{9,26,292},{9,30,740},{9,27,298},{9,25,45},{9,24,324},{6,30,727},{7,24,280},{10,30,201},{10,28,18},{10,26,21},{10,25,26},{13,17,723},{8,29,227},{9,25,29},{8,24,224},{13,21,723},{8,24,224},{9,30,272},{9,30,272},{9,30,272},
+{9,26,276},{9,27,180},{9,25,29},{9,25,29},{9,23,33},{7,27,171},{8,23,41},{10,27,2},{10,27,2},{10,27,2},{10,24,5},{13,14,162},{8,27,5},{8,27,5},{8,23,5},{8,25,162},{8,23,5},{13,20,200},{9,30,4},{10,26,20},{9,26,20},{13,20,200},{13,23,202},{9,26,20},{0,24,208},{13,23,202},{0,24,208},{9,0,272},{9,0,272},{9,0,272},{9,0,272},{9,24,17},
+{9,24,17},{9,24,17},{9,22,17},{7,25,10},{7,25,10},{10,31,468},{10,30,223},{10,27,283},{10,26,234},{9,31,835},{9,29,267},{9,27,33},{9,25,273},{7,30,747},{8,25,225},{11,29,283},{10,30,54},{11,26,97},{10,26,65},{15,13,723},{8,30,208},{9,27,17},{8,25,209},{11,25,723},{8,25,209},{10,29,219},{10,29,219},{10,29,219},{10,26,218},{9,30,180},{9,27,17},{9,27,17},
+{9,24,20},{7,28,174},{8,24,17},{10,29,50},{10,29,50},{10,29,50},{10,26,49},{14,13,162},{9,27,1},{9,27,1},{9,24,4},{11,23,162},{9,24,4},{15,16,200},{10,30,5},{11,26,16},{10,26,16},{15,16,200},{15,22,200},{10,26,16},{0,25,208},{15,22,200},{0,25,208},{10,0,218},{10,0,218},{10,0,218},{10,0,218},{9,27,16},{9,27,16},{9,27,16},{9,24,20},{8,24,17},
+{8,24,17},{11,31,632},{10,31,297},{11,28,425},{10,28,292},{10,31,804},{10,29,298},{10,27,61},{10,26,315},{7,31,823},{8,26,231},{11,31,232},{11,30,13},{11,28,25},{11,27,29},{15,16,723},{9,31,216},{10,27,45},{8,26,227},{15,22,723},{8,26,227},{10,31,288},{10,31,288},{10,31,288},{10,28,276},{10,29,180},{10,27,36},{10,27,36},{10,25,33},{7,30,189},{9,25,30},{11,29,0},
+{11,29,0},{11,29,0},{11,27,4},{14,16,162},{9,29,4},{9,29,4},{9,25,5},{15,20,162},{9,25,5},{15,19,200},{11,30,13},{11,28,25},{10,28,20},{15,19,200},{13,26,200},{10,28,20},{0,26,218},{13,26,200},{0,26,218},{10,0,272},{10,0,272},{10,0,272},{10,0,272},{10,26,17},{10,26,17},{10,26,17},{10,24,17},{8,27,5},{8,27,5},{11,31,696},{11,31,237},{11,29,288},
+{11,28,224},{11,31,888},{10,31,254},{10,29,33},{10,27,270},{8,31,824},{9,27,227},{12,31,283},{11,31,93},{12,28,97},{11,28,80},{13,25,723},{10,31,238},{10,29,17},{9,27,218},{12,27,723},{9,27,218},{11,31,212},{11,31,212},{11,31,212},{11,28,208},{10,31,196},{10,29,17},{10,29,17},{10,26,18},{8,30,171},{9,26,26},{11,31,68},{11,31,68},{11,31,68},{11,28,64},{13,22,162},
+{10,29,1},{10,29,1},{10,26,2},{12,25,162},{10,26,2},{13,28,200},{11,31,29},{12,28,16},{11,28,16},{13,28,200},{11,30,200},{11,28,16},{0,27,218},{11,30,200},{0,27,218},{11,0,208},{11,0,208},{11,0,208},{11,0,208},{10,29,16},{10,29,16},{10,29,16},{10,26,17},{9,27,9},{9,27,9},{12,31,804},{12,31,492},{11,30,457},{11,30,292},{11,31,1080},{11,31,298},{11,29,45},
+{11,28,324},{9,31,920},{9,28,218},{12,31,363},{12,31,51},{12,30,21},{12,29,26},{15,21,723},{11,31,282},{11,29,29},{9,28,217},{15,25,723},{9,28,217},{11,31,372},{11,31,372},{11,31,372},{11,30,276},{11,31,180},{11,29,29},{11,29,29},{11,27,33},{8,31,184},{10,27,41},{12,31,2},{12,31,2},{12,31,2},{12,28,5},{15,18,162},{10,31,5},{10,31,5},{10,27,5},{10,29,162},
+{10,27,5},{15,24,200},{12,31,50},{12,30,20},{11,30,20},{15,24,200},{15,27,202},{11,30,20},{0,28,208},{15,27,202},{0,28,208},{11,0,272},{11,0,272},{11,0,272},{11,0,272},{11,28,17},{11,28,17},{11,28,17},{11,26,17},{9,29,8},{9,29,8},{12,31,996},{12,31,492},{12,31,283},{12,30,234},{12,31,1068},{11,31,458},{11,31,33},{11,29,273},{10,31,999},{10,29,225},{13,31,379},
+{13,31,171},{13,30,97},{12,30,65},{14,27,723},{12,31,371},{11,31,17},{10,29,209},{13,29,723},{10,29,209},{12,31,267},{12,31,267},{12,31,267},{12,30,218},{12,30,333},{11,31,17},{11,31,17},{11,28,20},{9,31,212},{10,28,17},{13,30,81},{13,30,81},{13,30,81},{12,30,49},{13,27,162},{11,31,1},{11,31,1},{11,28,4},{13,27,162},{11,28,4},{15,27,202},{13,31,90},{13,30,16},
+{12,30,16},{15,27,202},{15,28,202},{12,30,16},{0,29,208},{15,28,202},{0,29,208},{12,0,218},{12,0,218},{12,0,218},{12,0,218},{11,31,16},{11,31,16},{11,31,16},{11,28,20},{9,30,10},{9,30,10},{13,31,877},{13,31,605},{13,31,436},{12,31,288},{13,31,1021},{12,31,397},{12,31,36},{12,30,210},{11,31,910},{10,30,126},{14,31,414},{13,31,205},{13,31,36},{13,31,4},{15,26,546},
+{13,31,317},{12,31,20},{10,30,122},{14,29,546},{10,30,122},{13,31,436},{13,31,436},{13,31,436},{12,31,288},{12,31,276},{12,31,36},{12,31,36},{12,29,33},{10,31,261},{11,29,30},{13,31,36},{13,31,36},{13,31,36},{13,31,4},{13,30,162},{12,31,20},{12,31,20},{11,29,5},{12,30,162},{11,29,5},{15,28,117},{14,31,61},{14,31,25},{13,31,4},{15,28,117},{14,31,117},{13,31,4},
+{0,30,113},{14,31,117},{0,30,113},{12,0,272},{12,0,272},{12,0,272},{12,0,272},{12,30,17},{12,30,17},{12,30,17},{12,28,17},{10,31,5},{10,31,5},{13,31,845},{13,31,573},{13,31,404},{13,31,244},{13,31,797},{13,31,365},{12,31,196},{12,30,82},{12,31,737},{11,31,58},{14,31,190},{14,31,126},{14,31,90},{14,31,82},{14,31,334},{13,31,221},{13,31,52},{11,31,49},{14,30,333},
+{11,31,49},{13,31,404},{13,31,404},{13,31,404},{13,31,244},{13,31,356},{12,31,196},{12,31,196},{12,30,18},{11,31,333},{11,30,26},{14,31,90},{14,31,90},{14,31,90},{14,31,82},{15,26,162},{13,31,52},{13,31,52},{12,30,2},{14,29,162},{12,30,2},{15,30,25},{15,30,41},{14,31,9},{14,31,1},{15,30,25},{15,30,29},{14,31,1},{0,31,49},{15,30,29},{0,31,49},{13,0,208},
+{13,0,208},{13,0,208},{13,0,208},{12,31,52},{12,31,52},{12,31,52},{12,30,17},{11,31,9},{11,31,9},{14,31,642},{14,31,578},{14,31,542},{13,31,441},{14,31,690},{13,31,370},{13,31,201},{13,31,32},{13,31,610},{12,31,40},{14,31,201},{14,31,137},{14,31,101},{14,31,37},{15,29,193},{14,31,121},{14,31,85},{12,31,4},{14,31,193},{12,31,4},{14,31,542},{14,31,542},{14,31,542},
+{13,31,441},{13,31,473},{13,31,201},{13,31,201},{13,31,32},{12,31,401},{12,31,40},{14,31,101},{14,31,101},{14,31,101},{14,31,37},{15,28,145},{14,31,85},{14,31,85},{12,31,4},{15,29,149},{12,31,4},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{13,0,272},{13,0,272},{13,0,272},{13,0,272},{13,31,32},
+{13,31,32},{13,31,32},{13,30,17},{12,31,40},{12,31,40},{14,31,418},{14,31,354},{14,31,318},{14,31,254},{14,31,370},{14,31,242},{14,31,206},{13,31,32},{13,31,418},{13,31,80},{15,31,81},{15,31,81},{15,31,81},{15,31,81},{15,30,54},{14,31,73},{14,31,37},{14,31,9},{15,30,66},{14,31,9},{14,31,318},{14,31,318},{14,31,318},{14,31,254},{14,31,270},{14,31,206},{14,31,206},
+{13,31,32},{13,31,249},{13,31,80},{15,31,81},{15,31,81},{15,31,81},{15,31,81},{15,29,45},{14,31,37},{14,31,37},{14,31,9},{15,30,41},{14,31,9},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{14,0,218},{14,0,218},{14,0,218},{14,0,218},{13,31,160},{13,31,160},{13,31,160},{13,31,32},{13,31,80},
+{13,31,80},{0,9,421},{0,7,113},{0,5,5},{0,4,130},{0,6,925},{0,5,658},{0,4,274},{0,3,670},{0,3,1039},{0,3,751},{0,9,421},{0,7,113},{0,5,5},{0,4,130},{0,6,925},{0,5,658},{0,4,274},{0,3,670},{3,0,925},{0,3,670},{0,4,1},{0,4,1},{0,4,1},{0,3,4},{0,2,85},{0,2,45},{0,2,45},{0,1,50},{0,1,98},{0,1,59},{0,4,1},
+{0,4,1},{0,4,1},{0,3,4},{0,2,85},{0,2,45},{0,2,45},{0,1,50},{1,0,85},{0,1,50},{1,6,421},{0,7,113},{0,5,5},{0,4,130},{1,6,421},{3,2,421},{0,4,130},{0,3,445},{3,2,421},{0,3,445},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,12,425},{0,9,52},{0,6,10},
+{0,6,82},{0,8,1261},{0,6,805},{0,5,322},{0,4,833},{0,4,1445},{0,4,977},{0,12,425},{0,9,52},{0,6,10},{0,6,82},{0,8,1261},{0,6,805},{0,5,322},{0,4,833},{4,0,1261},{0,4,833},{0,7,0},{0,7,0},{0,7,0},{0,4,1},{0,3,225},{0,3,117},{0,3,117},{0,2,125},{0,2,257},{0,2,161},{0,7,0},{0,7,0},{0,7,0},{0,4,1},{1,0,221},
+{0,3,117},{0,3,117},{0,2,125},{1,1,221},{0,2,125},{3,2,421},{0,9,52},{0,6,10},{0,6,82},{3,2,421},{5,1,421},{0,6,82},{0,4,433},{5,1,421},{0,4,433},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,14,430},{0,11,29},{0,7,74},{0,7,46},{0,10,1514},{0,8,874},{0,6,307},
+{0,5,917},{0,5,1814},{0,4,1074},{0,14,430},{0,11,29},{0,7,74},{0,7,46},{3,0,1514},{0,8,874},{0,6,307},{0,5,917},{5,0,1514},{0,5,917},{0,10,10},{0,10,10},{0,10,10},{0,6,10},{0,5,340},{0,5,160},{0,5,160},{0,3,169},{0,3,421},{0,3,250},{0,10,10},{0,10,10},{0,10,10},{0,6,10},{1,2,338},{0,5,160},{0,5,160},{0,3,169},{1,2,338},
+{0,3,169},{4,1,421},{0,11,20},{1,7,5},{0,7,37},{4,1,421},{7,0,421},{0,7,37},{0,5,433},{7,0,421},{0,5,433},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,1,0},{0,1,0},{0,1,0},{0,1,4},{0,0,9},{0,0,9},{0,17,542},{0,13,130},{0,9,285},{0,8,137},{0,12,1517},{0,9,737},{0,7,185},{0,6,794},{0,7,1982},{0,5,1062},{1,13,450},
+{1,11,77},{1,8,34},{1,7,122},{3,3,1517},{0,9,737},{0,7,185},{0,6,794},{2,5,1514},{0,6,794},{0,12,121},{0,12,121},{0,12,121},{0,7,125},{0,8,338},{0,6,98},{0,6,98},{0,4,97},{0,4,514},{0,4,241},{1,9,25},{1,9,25},{1,9,25},{1,6,26},{0,8,338},{0,6,98},{0,6,98},{0,4,97},{4,0,338},{0,4,97},{5,0,421},{0,13,9},{1,8,9},
+{0,8,16},{5,0,421},{8,0,421},{0,8,16},{0,6,433},{8,0,421},{0,6,433},{0,0,121},{0,0,121},{0,0,121},{0,0,121},{0,4,1},{0,4,1},{0,4,1},{0,2,1},{0,2,37},{0,2,37},{1,16,697},{1,13,297},{1,9,354},{1,9,309},{0,15,1517},{0,11,630},{0,9,50},{0,7,670},{0,8,2198},{0,6,1109},{1,16,441},{1,13,41},{1,9,98},{1,9,53},{4,2,1514},
+{0,11,630},{0,9,50},{0,7,670},{6,2,1514},{0,7,670},{1,12,273},{1,12,273},{1,12,273},{1,8,273},{0,11,338},{0,8,41},{0,8,41},{0,5,50},{0,6,680},{0,5,275},{1,12,17},{1,12,17},{1,12,17},{1,8,17},{3,1,338},{0,8,41},{0,8,41},{0,5,50},{3,3,338},{0,5,50},{6,0,421},{0,15,1},{2,9,5},{0,9,1},{6,0,421},{5,6,421},{0,9,1},
+{0,7,445},{5,6,421},{0,7,445},{1,0,272},{1,0,272},{1,0,272},{1,0,272},{0,7,1},{0,7,1},{0,7,1},{0,4,0},{0,3,106},{0,3,106},{1,19,821},{1,15,405},{1,11,570},{1,10,410},{0,18,1514},{0,13,577},{0,10,14},{0,8,602},{0,10,2462},{0,7,1175},{2,16,450},{2,13,77},{2,10,35},{2,10,107},{3,8,1514},{0,13,577},{0,10,14},{0,8,602},{4,6,1514},
+{0,8,602},{1,14,401},{1,14,401},{1,14,401},{1,9,404},{0,13,340},{0,10,13},{0,10,13},{0,6,29},{0,7,851},{0,6,353},{2,11,25},{2,11,25},{2,11,25},{2,8,26},{4,0,338},{0,10,13},{0,10,13},{0,6,29},{1,7,338},{0,6,29},{5,6,421},{1,15,5},{2,10,10},{0,10,10},{5,6,421},{10,1,421},{0,10,10},{0,8,433},{10,1,421},{0,8,433},{1,0,400},
+{1,0,400},{1,0,400},{1,0,400},{0,9,1},{0,9,1},{0,9,1},{0,6,4},{0,4,208},{0,4,208},{1,22,902},{1,17,485},{2,11,750},{1,11,482},{0,21,1566},{0,15,570},{0,11,95},{0,9,582},{0,11,2337},{0,9,933},{2,18,430},{2,15,29},{2,11,74},{2,11,46},{5,4,1514},{0,15,521},{0,11,46},{0,9,533},{9,1,1514},{0,9,533},{1,17,481},{1,17,481},{1,17,481},
+{1,11,481},{0,16,387},{0,12,51},{0,12,51},{0,7,75},{0,8,755},{0,7,222},{2,14,10},{2,14,10},{2,14,10},{2,10,10},{3,6,338},{0,12,2},{0,12,2},{0,7,26},{3,6,338},{0,7,26},{7,2,421},{1,17,4},{3,11,5},{1,11,1},{7,2,421},{12,0,421},{1,11,1},{0,9,433},{12,0,421},{0,9,433},{1,0,481},{1,0,481},{1,0,481},{1,0,481},{0,12,50},
+{0,12,50},{0,12,50},{0,7,50},{0,6,157},{0,6,157},{2,21,866},{2,17,454},{2,13,609},{2,12,461},{1,20,1515},{1,15,578},{1,12,19},{1,10,603},{0,13,2214},{0,10,707},{3,17,450},{3,15,77},{3,12,34},{3,11,122},{7,0,1514},{0,16,458},{1,12,18},{0,10,482},{4,9,1514},{0,10,482},{2,16,445},{2,16,445},{2,16,445},{2,11,449},{1,15,341},{1,12,19},{1,12,19},
+{1,8,26},{0,10,635},{0,8,106},{3,13,25},{3,13,25},{3,13,25},{3,10,26},{5,2,338},{0,14,2},{0,14,2},{0,9,20},{8,1,338},{0,9,20},{7,4,421},{2,17,9},{3,12,9},{1,12,9},{7,4,421},{11,3,421},{1,12,9},{0,10,433},{11,3,421},{0,10,433},{2,0,445},{2,0,445},{2,0,445},{2,0,445},{1,11,2},{1,11,2},{1,11,2},{1,7,10},{0,8,90},
+{0,8,90},{2,24,902},{2,19,482},{3,13,723},{2,13,482},{1,23,1578},{1,17,566},{1,14,117},{1,11,590},{0,15,2046},{0,11,535},{3,20,441},{3,17,41},{3,13,98},{3,13,53},{7,3,1514},{0,18,429},{2,13,50},{0,11,454},{11,2,1514},{0,11,454},{2,19,481},{2,19,481},{2,19,481},{2,13,481},{1,18,404},{1,14,68},{1,14,68},{1,10,89},{0,12,557},{0,10,49},{3,16,17},
+{3,16,17},{3,16,17},{3,12,17},{5,5,338},{1,14,4},{1,14,4},{0,10,13},{5,7,338},{0,10,13},{8,2,421},{2,19,1},{4,13,5},{2,13,1},{8,2,421},{14,1,421},{2,13,1},{0,11,445},{14,1,421},{0,11,445},{2,0,481},{2,0,481},{2,0,481},{2,0,481},{1,14,64},{1,14,64},{1,14,64},{1,9,65},{0,10,40},{0,10,40},{3,23,854},{3,19,438},{3,15,603},
+{3,14,443},{2,22,1515},{2,17,578},{2,14,15},{2,12,603},{0,16,1911},{0,12,458},{4,20,450},{4,17,77},{4,14,35},{4,14,107},{5,12,1514},{0,20,425},{2,14,14},{0,12,433},{13,1,1514},{0,12,433},{3,18,434},{3,18,434},{3,18,434},{3,13,437},{2,17,341},{2,14,14},{2,14,14},{2,10,30},{0,14,477},{0,11,35},{4,15,25},{4,15,25},{4,15,25},{4,12,26},{7,1,338},
+{1,16,5},{1,16,5},{1,11,10},{10,2,338},{1,11,10},{8,5,421},{3,19,5},{4,14,10},{2,14,10},{8,5,421},{12,5,421},{2,14,10},{0,12,433},{12,5,421},{0,12,433},{3,0,433},{3,0,433},{3,0,433},{3,0,433},{2,13,2},{2,13,2},{2,13,2},{2,10,5},{0,12,25},{0,12,25},{3,26,902},{3,21,485},{4,15,750},{3,15,482},{2,25,1566},{2,19,570},{2,15,95},
+{2,13,582},{0,18,1787},{0,13,442},{4,22,430},{4,19,29},{4,15,74},{4,15,46},{8,3,1514},{1,20,425},{2,15,46},{0,13,442},{11,5,1514},{0,13,442},{3,21,481},{3,21,481},{3,21,481},{3,15,481},{2,20,387},{2,16,51},{2,16,51},{2,11,75},{0,16,419},{1,12,69},{4,18,10},{4,18,10},{4,18,10},{4,14,10},{5,10,338},{2,16,2},{2,16,2},{1,12,20},{12,1,338},
+{1,12,20},{9,4,421},{3,21,4},{5,15,5},{3,15,1},{9,4,421},{15,3,421},{3,15,1},{0,13,433},{15,3,421},{0,13,433},{3,0,481},{3,0,481},{3,0,481},{3,0,481},{2,16,50},{2,16,50},{2,16,50},{2,11,50},{0,13,9},{0,13,9},{4,25,866},{4,21,454},{4,17,609},{4,16,461},{3,24,1515},{3,19,578},{3,16,19},{3,14,603},{0,20,1686},{1,14,458},{5,21,450},
+{5,19,77},{5,16,34},{5,15,122},{3,24,1514},{1,22,425},{3,16,18},{1,14,433},{14,3,1514},{1,14,433},{4,20,445},{4,20,445},{4,20,445},{4,15,449},{3,19,341},{3,16,19},{3,16,19},{3,12,26},{0,17,372},{1,13,45},{5,17,25},{5,17,25},{5,17,25},{5,14,26},{8,1,338},{2,18,2},{2,18,2},{2,13,20},{10,5,338},{2,13,20},{11,0,421},{4,21,9},{5,16,9},
+{3,16,9},{11,0,421},{13,7,421},{3,16,9},{0,14,433},{13,7,421},{0,14,433},{4,0,445},{4,0,445},{4,0,445},{4,0,445},{3,15,2},{3,15,2},{3,15,2},{3,11,10},{0,15,5},{0,15,5},{4,28,902},{4,23,482},{5,17,723},{4,17,482},{3,27,1578},{3,21,566},{3,18,117},{3,15,590},{0,22,1614},{1,15,462},{5,24,441},{5,21,41},{5,17,98},{5,17,53},{5,20,1514},
+{2,22,429},{4,17,50},{2,15,454},{13,6,1514},{2,15,454},{4,23,481},{4,23,481},{4,23,481},{4,17,481},{3,22,404},{3,18,68},{3,18,68},{3,14,89},{0,19,347},{2,14,49},{5,20,17},{5,20,17},{5,20,17},{5,16,17},{8,4,338},{3,18,4},{3,18,4},{2,14,13},{14,2,338},{2,14,13},{11,3,421},{4,23,1},{6,17,5},{4,17,1},{11,3,421},{15,6,421},{4,17,1},
+{0,15,445},{15,6,421},{0,15,445},{4,0,481},{4,0,481},{4,0,481},{4,0,481},{3,18,64},{3,18,64},{3,18,64},{3,13,65},{1,16,8},{1,16,8},{5,27,854},{5,23,438},{5,19,603},{5,18,443},{4,26,1515},{4,21,578},{4,18,15},{4,16,603},{0,23,1566},{2,16,458},{6,24,450},{6,21,77},{6,18,35},{6,18,107},{11,1,1514},{2,24,425},{4,18,14},{2,16,433},{15,5,1514},
+{2,16,433},{5,22,434},{5,22,434},{5,22,434},{5,17,437},{4,21,341},{4,18,14},{4,18,14},{4,14,30},{0,21,341},{2,15,35},{6,19,25},{6,19,25},{6,19,25},{6,16,26},{3,25,338},{3,20,5},{3,20,5},{3,15,10},{12,6,338},{3,15,10},{12,2,421},{5,23,5},{6,18,10},{4,18,10},{12,2,421},{14,9,421},{4,18,10},{0,16,433},{14,9,421},{0,16,433},{5,0,433},
+{5,0,433},{5,0,433},{5,0,433},{4,17,2},{4,17,2},{4,17,2},{4,14,5},{1,17,10},{1,17,10},{5,30,902},{5,25,485},{6,19,750},{5,19,482},{4,29,1566},{4,23,570},{4,19,95},{4,17,582},{0,25,1533},{2,17,442},{6,26,430},{6,23,29},{6,19,74},{6,19,46},{10,7,1514},{3,24,425},{4,19,46},{2,17,442},{13,9,1514},{2,17,442},{5,25,481},{5,25,481},{5,25,481},
+{5,19,481},{4,24,387},{4,20,51},{4,20,51},{4,15,75},{1,21,341},{3,16,69},{6,22,10},{6,22,10},{6,22,10},{6,18,10},{9,6,338},{4,20,2},{4,20,2},{3,16,20},{14,5,338},{3,16,20},{11,8,421},{5,25,4},{7,19,5},{5,19,1},{11,8,421},{11,14,421},{5,19,1},{0,17,433},{11,14,421},{0,17,433},{5,0,481},{5,0,481},{5,0,481},{5,0,481},{4,20,50},
+{4,20,50},{4,20,50},{4,15,50},{2,17,9},{2,17,9},{6,29,866},{6,25,454},{6,21,609},{6,20,461},{5,28,1515},{5,23,578},{5,20,19},{5,18,603},{0,27,1521},{3,18,458},{7,25,450},{7,23,77},{7,20,34},{7,19,122},{12,3,1514},{3,26,425},{5,20,18},{3,18,433},{11,13,1514},{3,18,433},{6,24,445},{6,24,445},{6,24,445},{6,19,449},{5,23,341},{5,20,19},{5,20,19},
+{5,16,26},{1,23,341},{3,17,45},{7,21,25},{7,21,25},{7,21,25},{7,18,26},{10,5,338},{4,22,2},{4,22,2},{4,17,20},{12,9,338},{4,17,20},{13,4,421},{6,25,9},{7,20,9},{5,20,9},{13,4,421},{15,11,421},{5,20,9},{0,18,433},{15,11,421},{0,18,433},{6,0,445},{6,0,445},{6,0,445},{6,0,445},{5,19,2},{5,19,2},{5,19,2},{5,15,10},{2,19,5},
+{2,19,5},{6,31,914},{6,27,482},{7,21,723},{6,21,482},{5,31,1578},{5,25,566},{5,22,117},{5,19,590},{1,27,1535},{3,19,462},{7,28,441},{7,25,41},{7,21,98},{7,21,53},{8,19,1514},{4,26,429},{6,21,50},{4,19,454},{15,10,1514},{4,19,454},{6,27,481},{6,27,481},{6,27,481},{6,21,481},{5,26,404},{5,22,68},{5,22,68},{5,18,89},{2,23,347},{4,18,49},{7,24,17},
+{7,24,17},{7,24,17},{7,20,17},{10,8,338},{5,22,4},{5,22,4},{4,18,13},{11,12,338},{4,18,13},{15,0,421},{6,27,1},{8,21,50},{6,21,1},{15,0,421},{13,15,421},{6,21,1},{0,19,445},{13,15,421},{0,19,445},{6,0,481},{6,0,481},{6,0,481},{6,0,481},{5,22,64},{5,22,64},{5,22,64},{5,17,65},{3,20,8},{3,20,8},{7,31,854},{7,27,438},{7,23,603},
+{7,22,443},{6,30,1515},{6,25,578},{6,22,15},{6,20,603},{1,29,1518},{4,20,458},{7,31,565},{7,27,149},{8,22,174},{7,22,154},{13,5,1514},{4,28,425},{6,22,14},{4,20,433},{12,15,1514},{4,20,433},{7,26,434},{7,26,434},{7,26,434},{7,21,437},{6,25,341},{6,22,14},{6,22,14},{6,18,30},{2,25,341},{4,19,35},{7,26,145},{7,26,145},{7,26,145},{7,21,148},{9,14,338},
+{5,24,5},{5,24,5},{5,19,10},{14,10,338},{5,19,10},{15,3,421},{7,27,5},{8,22,5},{6,22,10},{15,3,421},{15,14,421},{6,22,10},{0,20,433},{15,14,421},{0,20,433},{7,0,433},{7,0,433},{7,0,433},{7,0,433},{6,21,2},{6,21,2},{6,21,2},{6,18,5},{3,21,10},{3,21,10},{7,31,1046},{7,29,485},{7,24,770},{7,23,482},{7,29,1598},{6,27,570},{6,23,95},
+{6,21,582},{2,29,1533},{4,21,442},{8,28,450},{8,26,77},{8,23,35},{8,23,107},{15,1,1514},{5,28,425},{6,23,46},{4,21,442},{15,13,1514},{4,21,442},{7,29,481},{7,29,481},{7,29,481},{7,23,481},{6,28,387},{6,24,51},{6,24,51},{6,19,75},{3,25,341},{5,20,69},{8,24,25},{8,24,25},{8,24,25},{8,21,26},{11,10,338},{6,24,2},{6,24,2},{5,20,20},{5,23,338},
+{5,20,20},{13,12,421},{7,29,4},{8,23,10},{7,23,1},{13,12,421},{13,18,421},{7,23,1},{0,21,433},{13,18,421},{0,21,433},{7,0,481},{7,0,481},{7,0,481},{7,0,481},{6,24,50},{6,24,50},{6,24,50},{6,19,50},{4,21,9},{4,21,9},{8,31,1106},{8,28,714},{8,24,749},{8,24,734},{7,31,1542},{7,27,578},{7,24,19},{7,22,603},{2,31,1521},{5,22,458},{8,31,430},
+{8,28,38},{8,24,73},{8,24,58},{14,7,1514},{5,30,425},{7,24,18},{5,22,433},{13,17,1514},{5,22,433},{8,27,686},{8,27,686},{8,27,686},{8,23,686},{7,27,341},{7,24,19},{7,24,19},{7,20,26},{3,27,341},{5,21,45},{8,27,10},{8,27,10},{8,27,10},{8,23,10},{12,9,338},{6,26,2},{6,26,2},{6,21,20},{14,13,338},{6,21,20},{15,8,421},{7,31,20},{9,24,4},
+{7,24,9},{15,8,421},{15,17,421},{7,24,9},{0,22,433},{15,17,421},{0,22,433},{8,0,685},{8,0,685},{8,0,685},{8,0,685},{7,23,2},{7,23,2},{7,23,2},{7,19,10},{4,23,5},{4,23,5},{8,31,1034},{8,30,438},{8,26,603},{8,25,443},{8,30,1806},{7,29,566},{7,26,117},{7,23,590},{3,31,1535},{5,23,462},{9,31,437},{9,28,77},{9,25,33},{9,25,98},{10,23,1514},
+{6,30,429},{7,26,53},{6,23,454},{11,21,1514},{6,23,454},{8,29,434},{8,29,434},{8,29,434},{8,24,437},{7,30,404},{7,26,68},{7,26,68},{7,22,89},{4,27,347},{6,22,49},{9,26,17},{9,26,17},{9,26,17},{9,23,20},{15,2,338},{7,26,4},{7,26,4},{6,22,13},{13,16,338},{6,22,13},{15,11,421},{8,30,5},{9,25,17},{8,25,10},{15,11,421},{15,19,421},{8,25,10},
+{0,23,445},{15,19,421},{0,23,445},{8,0,433},{8,0,433},{8,0,433},{8,0,433},{7,26,64},{7,26,64},{7,26,64},{7,21,65},{5,24,8},{5,24,8},{9,31,1174},{8,31,506},{9,26,723},{8,26,482},{8,31,1643},{7,30,717},{8,26,131},{7,24,725},{4,31,1566},{6,24,458},{9,31,549},{9,30,41},{9,26,98},{9,26,53},{15,9,1514},{7,30,461},{8,26,50},{6,24,433},{14,19,1514},
+{6,24,433},{8,31,490},{8,31,490},{8,31,490},{8,26,481},{8,28,421},{8,25,122},{8,25,122},{8,22,131},{4,29,341},{6,23,35},{9,29,17},{9,29,17},{9,29,17},{9,25,17},{11,18,338},{7,28,5},{7,28,5},{7,23,10},{10,21,338},{7,23,10},{14,17,421},{9,30,25},{10,26,5},{8,26,1},{14,17,421},{13,23,421},{8,26,1},{0,24,433},{13,23,421},{0,24,433},{8,0,481},
+{8,0,481},{8,0,481},{8,0,481},{8,24,82},{8,24,82},{8,24,82},{8,21,81},{5,25,10},{5,25,10},{9,31,1334},{9,31,470},{9,28,597},{9,27,443},{9,31,1815},{8,30,578},{8,27,15},{8,25,603},{5,31,1638},{6,25,442},{10,31,506},{10,30,77},{10,27,35},{10,27,107},{14,15,1514},{7,31,506},{8,27,14},{6,25,442},{12,23,1514},{6,25,442},{9,31,434},{9,31,434},{9,31,434},
+{9,26,437},{8,30,341},{8,27,14},{8,27,14},{8,23,30},{5,29,341},{7,24,69},{10,28,25},{10,28,25},{10,28,25},{10,25,26},{13,14,338},{8,27,13},{8,27,13},{7,24,20},{8,25,338},{7,24,20},{15,16,421},{9,31,37},{10,27,10},{8,27,10},{15,16,421},{15,22,421},{8,27,10},{0,25,433},{15,22,421},{0,25,433},{9,0,433},{9,0,433},{9,0,433},{9,0,433},{8,26,2},
+{8,26,2},{8,26,2},{8,23,5},{6,25,9},{6,25,9},{10,31,1470},{10,31,735},{10,28,749},{9,28,481},{9,31,1895},{8,31,579},{8,28,89},{8,26,582},{6,31,1761},{7,26,458},{11,31,614},{10,31,59},{10,28,73},{10,28,58},{13,21,1514},{8,31,530},{8,28,40},{7,26,433},{15,21,1514},{7,26,433},{9,31,562},{9,31,562},{9,31,562},{9,28,481},{9,30,421},{8,29,51},{8,29,51},
+{8,24,65},{5,31,341},{7,25,45},{10,31,10},{10,31,10},{10,31,10},{10,27,10},{14,13,338},{8,29,2},{8,29,2},{8,24,16},{11,23,338},{8,24,16},{15,19,421},{10,31,50},{11,28,4},{9,28,0},{15,19,421},{13,26,421},{9,28,0},{0,26,433},{13,26,421},{0,26,433},{9,0,481},{9,0,481},{9,0,481},{9,0,481},{8,29,50},{8,29,50},{8,29,50},{8,24,49},{6,27,5},
+{6,27,5},{11,31,1838},{10,31,753},{10,30,603},{10,29,443},{10,31,2046},{9,31,629},{9,29,21},{9,27,589},{7,31,1935},{7,27,462},{11,31,749},{11,31,120},{11,29,33},{11,29,98},{12,27,1514},{9,31,629},{9,29,21},{7,27,461},{13,25,1514},{7,27,461},{10,31,497},{10,31,497},{10,31,497},{10,28,437},{9,31,388},{9,29,20},{9,29,20},{9,25,29},{6,31,347},{8,25,105},{11,30,17},
+{11,30,17},{11,30,17},{11,27,20},{14,16,338},{8,31,4},{8,31,4},{8,26,10},{15,20,338},{8,26,10},{14,25,421},{11,31,104},{11,29,17},{10,29,10},{14,25,421},{12,29,421},{10,29,10},{0,27,445},{12,29,421},{0,27,445},{10,0,433},{10,0,433},{10,0,433},{10,0,433},{9,28,1},{9,28,1},{9,28,1},{9,25,4},{7,28,8},{7,28,8},{11,31,1902},{11,31,1001},{11,30,723},
+{10,30,482},{11,31,2286},{10,31,782},{9,31,117},{9,28,578},{8,31,2118},{7,29,491},{12,31,770},{12,31,338},{11,30,98},{11,30,53},{14,23,1514},{10,31,701},{10,30,50},{8,28,442},{11,29,1514},{8,28,442},{11,31,677},{11,31,677},{11,31,677},{10,30,481},{10,31,437},{9,31,68},{9,31,68},{9,27,89},{7,31,379},{8,27,49},{11,31,52},{11,31,52},{11,31,52},{11,29,17},{13,22,338},
+{9,31,4},{9,31,4},{8,27,13},{12,25,338},{8,27,13},{15,24,421},{12,31,169},{12,30,5},{10,30,1},{15,24,421},{15,27,421},{10,30,1},{0,28,433},{15,27,421},{0,28,433},{10,0,481},{10,0,481},{10,0,481},{10,0,481},{9,31,64},{9,31,64},{9,31,64},{9,26,65},{7,29,10},{7,29,10},{12,31,2151},{11,31,1254},{11,31,629},{11,31,442},{11,31,2393},{10,31,975},{10,31,14},
+{10,29,570},{9,31,2241},{8,29,425},{13,31,931},{12,31,395},{12,31,34},{12,30,105},{13,29,1459},{11,31,778},{10,31,13},{8,29,400},{14,27,1459},{8,29,400},{11,31,629},{11,31,629},{11,31,629},{11,30,437},{10,31,581},{10,31,14},{10,31,14},{10,27,30},{8,31,477},{8,28,45},{12,31,34},{12,31,34},{12,31,34},{12,29,26},{15,18,338},{10,31,13},{10,31,13},{9,28,17},{10,29,338},
+{9,28,17},{15,27,394},{13,31,218},{12,31,9},{10,31,9},{15,27,394},{15,28,394},{10,31,9},{0,29,400},{15,28,394},{0,29,400},{11,0,433},{11,0,433},{11,0,433},{11,0,433},{10,30,2},{10,30,2},{10,30,2},{10,27,5},{8,29,25},{8,29,25},{12,31,1767},{12,31,1167},{12,31,806},{11,31,506},{12,31,1983},{11,31,747},{11,31,122},{10,29,346},{10,31,1836},{8,30,217},{13,31,611},
+{13,31,339},{12,31,130},{12,31,10},{15,24,1064},{12,31,587},{11,31,41},{8,30,217},{15,27,1067},{8,30,217},{12,31,806},{12,31,806},{12,31,806},{11,31,506},{11,31,581},{11,31,122},{11,31,122},{10,28,65},{9,31,557},{9,29,69},{12,31,130},{12,31,130},{12,31,130},{12,31,10},{13,27,338},{11,31,41},{11,31,41},{10,28,16},{13,27,338},{10,28,16},{14,31,200},{14,31,136},{13,31,1},
+{12,31,1},{14,31,200},{15,29,200},{12,31,1},{0,30,208},{15,29,200},{0,30,208},{11,0,481},{11,0,481},{11,0,481},{11,0,481},{11,30,82},{11,30,82},{11,30,82},{10,28,49},{8,30,9},{8,30,9},{13,31,1646},{12,31,1194},{12,31,833},{12,31,497},{12,31,1686},{12,31,750},{11,31,221},{11,30,122},{11,31,1541},{9,30,110},{14,31,542},{13,31,285},{13,31,116},{13,31,20},{15,26,722},
+{13,31,429},{12,31,100},{10,30,74},{14,29,722},{10,30,74},{12,31,833},{12,31,833},{12,31,833},{12,31,497},{12,31,725},{11,31,221},{11,31,221},{11,29,29},{10,31,632},{9,30,46},{13,31,116},{13,31,116},{13,31,116},{13,31,20},{13,30,338},{12,31,100},{12,31,100},{10,30,10},{12,30,338},{10,30,10},{15,29,61},{14,31,37},{14,31,1},{13,31,4},{15,29,61},{15,30,61},{13,31,4},
+{0,30,73},{15,30,61},{0,30,73},{12,0,433},{12,0,433},{12,0,433},{12,0,433},{11,31,25},{11,31,25},{11,31,25},{11,29,4},{9,31,17},{9,31,17},{13,31,1406},{13,31,1134},{13,31,965},{12,31,737},{13,31,1454},{12,31,702},{12,31,341},{11,31,89},{11,31,1381},{10,31,49},{14,31,318},{14,31,254},{14,31,218},{13,31,116},{14,31,510},{13,31,333},{13,31,164},{10,31,13},{14,30,509},
+{10,31,13},{13,31,965},{13,31,965},{13,31,965},{12,31,737},{12,31,869},{12,31,341},{12,31,341},{11,31,89},{11,31,756},{10,31,49},{14,31,218},{14,31,218},{14,31,218},{13,31,116},{15,26,338},{13,31,164},{13,31,164},{10,31,13},{14,29,338},{10,31,13},{15,30,9},{15,31,9},{15,31,9},{14,31,9},{15,30,9},{15,31,9},{14,31,9},{0,31,9},{15,31,9},{0,31,9},{12,0,481},
+{12,0,481},{12,0,481},{12,0,481},{12,31,85},{12,31,85},{12,31,85},{11,30,65},{10,31,40},{10,31,40},{13,31,1315},{13,31,1043},{13,31,874},{13,31,602},{13,31,1171},{13,31,627},{13,31,458},{12,31,5},{12,31,1087},{11,31,80},{14,31,225},{14,31,161},{14,31,125},{14,31,61},{15,29,297},{14,31,193},{14,31,157},{12,31,4},{14,31,297},{12,31,4},{13,31,874},{13,31,874},{13,31,874},
+{13,31,602},{13,31,730},{13,31,458},{13,31,458},{12,31,5},{11,31,705},{11,31,80},{14,31,125},{14,31,125},{14,31,125},{14,31,61},{14,31,221},{14,31,157},{14,31,157},{12,31,4},{15,29,221},{12,31,4},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{13,0,433},{13,0,433},{13,0,433},{13,0,433},{12,31,101},
+{12,31,101},{12,31,101},{12,31,5},{11,31,80},{11,31,80},{14,31,885},{14,31,821},{14,31,785},{13,31,650},{14,31,885},{13,31,483},{13,31,314},{13,31,81},{13,31,779},{11,31,144},{15,31,169},{14,31,145},{14,31,109},{14,31,45},{15,30,118},{14,31,81},{14,31,45},{13,31,0},{15,30,114},{13,31,0},{14,31,785},{14,31,785},{14,31,785},{13,31,650},{13,31,586},{13,31,314},{13,31,314},
+{13,31,81},{12,31,518},{11,31,144},{14,31,109},{14,31,109},{14,31,109},{14,31,45},{15,29,85},{14,31,45},{14,31,45},{13,31,0},{14,31,85},{13,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{13,0,481},{13,0,481},{13,0,481},{13,0,481},{13,31,145},{13,31,145},{13,31,145},{13,31,81},{11,31,144},
+{11,31,144},{0,13,884},{0,10,225},{0,7,18},{0,6,265},{0,9,1899},{0,7,1355},{0,6,589},{0,4,1354},{0,5,2124},{0,4,1498},{0,13,884},{0,10,225},{0,7,18},{0,6,265},{2,2,1896},{0,7,1355},{0,6,589},{0,4,1354},{1,4,1896},{0,4,1354},{0,6,0},{0,6,0},{0,6,0},{0,4,4},{0,3,162},{0,3,90},{0,3,90},{0,2,104},{0,2,200},{0,1,134},{0,6,0},
+{0,6,0},{0,6,0},{0,4,4},{0,3,162},{0,3,90},{0,3,90},{0,2,104},{0,2,164},{0,2,104},{3,3,882},{0,10,225},{0,7,18},{0,6,265},{3,3,882},{4,3,882},{0,6,265},{0,5,890},{4,3,882},{0,5,890},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,15,884},{0,12,170},{0,8,8},
+{0,7,202},{0,10,2360},{0,8,1530},{0,7,643},{0,5,1579},{0,6,2684},{0,5,1804},{0,15,884},{0,12,170},{0,8,8},{0,7,202},{1,7,2355},{0,8,1530},{0,7,643},{0,5,1579},{1,5,2355},{0,5,1579},{0,9,1},{0,9,1},{0,9,1},{0,5,1},{0,4,340},{0,4,180},{0,4,180},{0,2,200},{0,2,392},{0,2,236},{0,9,1},{0,9,1},{0,9,1},{0,5,1},{1,1,338},
+{0,4,180},{0,4,180},{0,2,200},{2,0,340},{0,2,200},{4,2,882},{0,12,170},{0,8,8},{0,7,202},{4,2,882},{6,2,882},{0,7,202},{0,6,890},{6,2,882},{0,6,890},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,18,882},{0,14,106},{0,10,52},{0,9,148},{0,12,2899},{0,9,1773},{0,8,725},
+{0,6,1854},{0,7,3348},{0,5,2124},{0,18,882},{0,14,106},{0,10,52},{0,9,148},{1,9,2899},{0,9,1773},{0,8,725},{0,6,1854},{6,0,2899},{0,6,1854},{0,11,1},{0,11,1},{0,11,1},{0,7,1},{0,6,580},{0,5,306},{0,5,306},{0,3,325},{0,3,667},{0,3,406},{0,11,1},{0,11,1},{0,11,1},{0,7,1},{1,2,580},{0,5,306},{0,5,306},{0,3,325},{2,1,578},
+{0,3,325},{3,8,882},{0,14,106},{1,9,13},{0,9,148},{3,8,882},{4,6,882},{0,9,148},{0,7,890},{4,6,882},{0,7,890},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,21,920},{0,16,89},{0,11,153},{0,10,121},{0,14,3051},{0,11,1709},{0,9,557},{0,7,1795},{0,8,3651},{0,6,2174},{0,21,920},
+{0,16,89},{1,10,108},{0,10,121},{4,1,3048},{0,11,1709},{0,9,557},{0,7,1795},{3,5,3048},{0,7,1795},{0,14,37},{0,14,37},{0,14,37},{0,8,37},{0,8,648},{0,7,274},{0,7,274},{0,4,277},{0,4,824},{0,4,421},{0,14,37},{0,14,37},{0,14,37},{0,8,37},{0,8,648},{0,7,274},{0,7,274},{0,4,277},{4,0,648},{0,4,277},{5,4,882},{0,16,53},{1,10,8},
+{0,10,85},{5,4,882},{9,1,882},{0,10,85},{0,8,900},{9,1,882},{0,8,900},{0,0,36},{0,0,36},{0,0,36},{0,0,36},{0,2,0},{0,2,0},{0,2,0},{0,1,1},{0,1,10},{0,1,10},{0,23,1115},{0,18,242},{1,12,309},{0,11,259},{0,17,3051},{0,13,1579},{0,10,346},{0,8,1630},{0,9,3924},{0,7,2173},{1,20,885},{1,16,90},{1,12,53},{1,11,131},{4,4,3048},
+{0,13,1579},{0,10,346},{0,8,1630},{7,2,3048},{0,8,1630},{0,17,226},{0,17,226},{0,17,226},{0,10,225},{0,11,648},{0,9,169},{0,9,169},{0,5,200},{0,6,990},{0,5,425},{1,13,2},{1,13,2},{1,13,2},{1,9,2},{3,1,648},{0,9,169},{0,9,169},{0,5,200},{3,3,648},{0,5,200},{5,7,882},{0,18,17},{2,11,18},{0,11,34},{5,7,882},{6,7,882},{0,11,34},
+{0,9,890},{6,7,882},{0,9,890},{0,0,225},{0,0,225},{0,0,225},{0,0,225},{0,5,0},{0,5,0},{0,5,0},{0,3,0},{0,2,61},{0,2,61},{1,23,1187},{1,18,350},{1,13,422},{1,12,373},{0,20,3048},{0,15,1443},{0,12,204},{0,9,1483},{0,11,4212},{0,8,2174},{1,23,931},{1,18,94},{2,12,108},{1,12,117},{6,0,3048},{0,15,1443},{0,12,204},{0,9,1483},{5,6,3048},
+{0,9,1483},{1,16,305},{1,16,305},{1,16,305},{1,10,309},{0,13,650},{0,11,109},{0,11,109},{0,7,148},{0,7,1161},{0,6,473},{1,16,49},{1,16,49},{1,16,49},{1,10,53},{4,0,648},{0,11,109},{0,11,109},{0,7,148},{1,7,648},{0,7,148},{7,3,882},{0,20,8},{2,12,8},{0,12,8},{7,3,882},{11,2,882},{0,12,8},{0,10,890},{11,2,882},{0,10,890},{1,0,305},
+{1,0,305},{1,0,305},{1,0,305},{0,8,1},{0,8,1},{0,8,1},{0,5,4},{0,3,145},{0,3,145},{1,25,1365},{1,20,497},{1,14,713},{1,13,510},{0,23,3051},{0,16,1278},{0,13,86},{0,10,1354},{0,12,4609},{0,9,2228},{2,22,886},{2,18,110},{2,14,56},{2,13,152},{5,6,3048},{0,16,1278},{0,13,86},{0,10,1354},{10,1,3048},{0,10,1354},{1,19,482},{1,19,482},{1,19,482},
+{1,12,481},{0,16,648},{0,12,72},{0,12,72},{0,8,101},{0,8,1352},{0,7,557},{2,15,5},{2,15,5},{2,15,5},{2,11,5},{3,6,648},{0,12,72},{0,12,72},{0,8,101},{3,6,648},{0,8,101},{5,12,882},{0,21,10},{3,13,13},{0,13,5},{5,12,882},{13,1,882},{0,13,5},{0,11,890},{13,1,882},{0,11,890},{1,0,481},{1,0,481},{1,0,481},{1,0,481},{0,10,1},
+{0,10,1},{0,10,1},{0,6,1},{0,5,261},{0,5,261},{1,28,1667},{1,22,793},{1,15,1182},{1,14,793},{0,25,3048},{0,18,1170},{0,14,36},{0,11,1243},{0,14,5005},{0,10,2318},{2,25,920},{2,20,89},{3,14,108},{2,14,121},{7,2,3048},{0,18,1170},{0,14,36},{0,11,1243},{12,0,3048},{0,11,1243},{1,21,786},{1,21,786},{1,21,786},{1,14,789},{0,19,650},{0,14,32},{0,14,32},
+{0,9,50},{0,9,1619},{0,8,661},{2,18,37},{2,18,37},{2,18,37},{2,12,37},{5,2,648},{0,14,32},{0,14,32},{0,9,50},{8,1,648},{0,9,50},{8,3,882},{1,22,8},{3,14,8},{1,14,8},{8,3,882},{11,5,882},{1,14,8},{0,12,900},{11,5,882},{0,12,900},{1,0,785},{1,0,785},{1,0,785},{1,0,785},{0,13,1},{0,13,1},{0,13,1},{0,8,4},{0,6,405},
+{0,6,405},{2,27,1844},{2,22,971},{2,16,1186},{2,15,988},{0,28,3084},{0,20,1095},{0,16,77},{0,12,1159},{0,16,4945},{0,12,2084},{3,24,885},{3,20,90},{3,16,53},{3,15,131},{6,8,3048},{0,20,1059},{0,16,41},{0,12,1123},{4,12,3048},{0,12,1123},{2,21,955},{2,21,955},{2,21,955},{2,14,954},{0,22,686},{0,16,41},{0,16,41},{0,10,49},{0,11,1577},{0,9,545},{3,17,2},
+{3,17,2},{3,17,2},{3,13,2},{5,5,648},{0,16,5},{0,16,5},{0,10,13},{5,7,648},{0,10,13},{8,6,882},{1,24,9},{4,15,18},{1,15,13},{8,6,882},{15,2,882},{1,15,13},{0,13,890},{15,2,882},{0,13,890},{2,0,954},{2,0,954},{2,0,954},{2,0,954},{0,16,37},{0,16,37},{0,16,37},{0,9,40},{0,8,373},{0,8,373},{2,30,1772},{2,24,898},{2,17,1287},
+{2,16,898},{1,27,3055},{1,20,1143},{1,16,33},{1,13,1214},{0,17,4639},{0,13,1730},{3,27,931},{3,22,94},{4,16,108},{3,16,117},{5,14,3048},{0,21,996},{1,16,29},{0,13,1054},{14,1,3048},{0,13,1054},{2,23,891},{2,23,891},{2,23,891},{2,16,894},{1,21,652},{1,16,29},{1,16,29},{1,11,44},{0,13,1452},{0,11,365},{3,20,49},{3,20,49},{3,20,49},{3,14,53},{7,1,648},
+{0,18,1},{0,18,1},{0,11,4},{10,2,648},{0,11,4},{5,20,882},{2,24,8},{4,16,8},{2,16,8},{5,20,882},{13,6,882},{2,16,8},{0,14,890},{13,6,882},{0,14,890},{2,0,890},{2,0,890},{2,0,890},{2,0,890},{1,15,4},{1,15,4},{1,15,4},{1,10,5},{0,9,269},{0,9,269},{3,29,1838},{3,24,970},{3,18,1186},{3,17,983},{1,30,3084},{1,22,1095},{1,18,77},
+{1,14,1159},{0,19,4419},{0,14,1444},{4,26,886},{4,22,110},{4,18,56},{4,17,152},{8,5,3048},{0,23,936},{1,18,41},{0,14,1003},{12,5,3048},{0,14,1003},{3,23,955},{3,23,955},{3,23,955},{3,16,954},{1,23,692},{1,18,41},{1,18,41},{1,12,46},{0,15,1296},{0,12,235},{4,19,5},{4,19,5},{4,19,5},{4,15,5},{5,10,648},{1,18,5},{1,18,5},{0,12,10},{12,1,648},
+{0,12,10},{11,1,882},{2,25,10},{5,17,13},{2,17,5},{11,1,882},{15,5,882},{2,17,5},{0,15,890},{15,5,882},{0,15,890},{3,0,954},{3,0,954},{3,0,954},{3,0,954},{1,18,37},{1,18,37},{1,18,37},{1,11,40},{0,11,185},{0,11,185},{3,31,1790},{3,26,898},{3,19,1287},{3,18,898},{2,29,3057},{2,22,1179},{2,18,45},{1,15,1250},{0,20,4156},{0,15,1226},{4,29,920},
+{4,24,89},{5,18,108},{4,18,121},{3,26,3048},{0,25,909},{2,18,36},{0,15,970},{15,3,3048},{0,15,970},{3,25,891},{3,25,891},{3,25,891},{3,18,894},{2,23,659},{2,18,41},{2,18,41},{2,13,59},{0,16,1137},{0,13,137},{4,22,37},{4,22,37},{4,22,37},{4,16,37},{8,1,648},{1,20,2},{1,20,2},{1,13,1},{10,5,648},{1,13,1},{10,7,882},{3,26,8},{5,18,8},
+{3,18,8},{10,7,882},{13,9,882},{3,18,8},{0,16,900},{13,9,882},{0,16,900},{3,0,890},{3,0,890},{3,0,890},{3,0,890},{2,17,10},{2,17,10},{2,17,10},{2,12,13},{0,13,136},{0,13,136},{4,31,1844},{4,26,971},{4,20,1186},{4,19,988},{2,31,3111},{2,24,1095},{2,20,77},{2,16,1159},{0,22,3940},{0,16,1055},{5,28,885},{5,24,90},{5,20,53},{5,19,131},{5,22,3048},
+{0,27,886},{2,20,41},{0,17,926},{8,13,3048},{0,17,926},{4,25,955},{4,25,955},{4,25,955},{4,18,954},{2,26,686},{2,20,41},{2,20,41},{2,14,49},{0,18,1002},{0,15,110},{5,21,2},{5,21,2},{5,21,2},{5,17,2},{8,4,648},{2,20,5},{2,20,5},{2,14,13},{14,2,648},{2,14,13},{13,0,882},{3,28,9},{6,19,18},{3,19,13},{13,0,882},{5,21,882},{3,19,13},
+{0,17,890},{5,21,882},{0,17,890},{4,0,954},{4,0,954},{4,0,954},{4,0,954},{2,20,37},{2,20,37},{2,20,37},{2,13,40},{0,15,74},{0,15,74},{4,31,1972},{4,28,898},{4,21,1287},{4,20,898},{3,31,3055},{3,24,1143},{3,20,33},{3,17,1214},{0,23,3820},{0,18,963},{5,31,931},{5,26,94},{6,20,108},{5,20,117},{11,3,3048},{0,28,899},{3,20,29},{0,18,899},{11,11,3048},
+{0,18,899},{4,27,891},{4,27,891},{4,27,891},{4,20,894},{3,25,652},{3,20,29},{3,20,29},{3,15,44},{0,20,876},{0,16,102},{5,24,49},{5,24,49},{5,24,49},{5,18,53},{3,25,648},{2,22,1},{2,22,1},{2,15,4},{12,6,648},{2,15,4},{8,19,882},{4,28,8},{6,20,8},{4,20,8},{8,19,882},{15,10,882},{4,20,8},{0,18,890},{15,10,882},{0,18,890},{4,0,890},
+{4,0,890},{4,0,890},{4,0,890},{3,19,4},{3,19,4},{3,19,4},{3,14,5},{0,17,29},{0,17,29},{5,31,1964},{5,28,970},{5,22,1186},{5,21,983},{4,31,3172},{3,26,1095},{3,22,77},{3,18,1159},{0,25,3679},{0,19,899},{6,30,886},{6,26,110},{6,22,56},{6,21,152},{10,9,3048},{1,29,888},{3,22,41},{0,19,890},{14,9,3048},{0,19,890},{5,27,955},{5,27,955},{5,27,955},
+{5,20,954},{3,27,692},{3,22,41},{3,22,41},{3,16,46},{0,22,800},{1,16,98},{6,23,5},{6,23,5},{6,23,5},{6,19,5},{9,6,648},{3,22,5},{3,22,5},{2,16,10},{14,5,648},{2,16,10},{13,5,882},{4,29,10},{7,21,13},{4,21,5},{13,5,882},{12,15,882},{4,21,5},{0,19,890},{12,15,882},{0,19,890},{5,0,954},{5,0,954},{5,0,954},{5,0,954},{3,22,37},
+{3,22,37},{3,22,37},{3,15,40},{0,19,9},{0,19,9},{6,31,2264},{5,30,898},{5,23,1287},{5,22,898},{4,31,3204},{4,26,1179},{4,22,45},{3,19,1250},{0,27,3523},{0,20,908},{6,31,968},{6,28,89},{7,22,108},{6,22,121},{9,15,3048},{1,30,899},{4,22,36},{0,20,904},{5,22,3048},{0,20,904},{5,29,891},{5,29,891},{5,29,891},{5,22,894},{4,27,659},{4,22,41},{4,22,41},
+{4,17,59},{0,23,747},{1,18,102},{6,26,37},{6,26,37},{6,26,37},{6,20,37},{10,5,648},{3,24,2},{3,24,2},{3,17,1},{12,9,648},{3,17,1},{15,1,882},{5,30,8},{7,22,8},{5,22,8},{15,1,882},{15,13,882},{5,22,8},{0,20,900},{15,13,882},{0,20,900},{5,0,890},{5,0,890},{5,0,890},{5,0,890},{4,21,10},{4,21,10},{4,21,10},{4,16,13},{0,20,8},
+{0,20,8},{6,31,2228},{6,30,971},{6,24,1186},{6,23,988},{5,31,3256},{4,28,1095},{4,24,77},{4,20,1159},{0,29,3364},{1,21,894},{7,31,915},{7,28,90},{7,24,53},{7,23,131},{14,1,3048},{2,31,886},{4,24,41},{1,21,890},{10,17,3048},{1,21,890},{6,29,955},{6,29,955},{6,29,955},{6,22,954},{4,30,686},{4,24,41},{4,24,41},{4,18,49},{0,25,705},{2,19,110},{7,25,2},
+{7,25,2},{7,25,2},{7,21,2},{10,8,648},{4,24,5},{4,24,5},{4,18,13},{11,12,648},{4,18,13},{15,4,882},{6,30,17},{7,24,52},{5,23,13},{15,4,882},{9,22,882},{5,23,13},{0,21,890},{9,22,882},{0,21,890},{6,0,954},{6,0,954},{6,0,954},{6,0,954},{4,24,37},{4,24,37},{4,24,37},{4,17,40},{1,21,4},{1,21,4},{7,31,2444},{6,31,907},{6,25,1287},
+{6,24,898},{6,31,3436},{5,28,1143},{5,24,33},{5,21,1214},{0,31,3276},{1,22,908},{8,30,1208},{7,30,94},{7,25,166},{7,24,117},{13,7,3048},{3,31,906},{5,24,29},{1,22,899},{13,15,3048},{1,22,899},{6,31,891},{6,31,891},{6,31,891},{6,24,894},{5,29,652},{5,24,29},{5,24,29},{5,19,44},{0,27,665},{2,20,102},{7,28,49},{7,28,49},{7,28,49},{7,22,53},{9,14,648},
+{4,26,1},{4,26,1},{4,19,4},{14,10,648},{4,19,4},{10,23,882},{6,31,17},{8,24,13},{6,24,8},{10,23,882},{11,21,882},{6,24,8},{0,22,890},{11,21,882},{0,22,890},{6,0,890},{6,0,890},{6,0,890},{6,0,890},{5,23,4},{5,23,4},{5,23,4},{5,18,5},{1,23,4},{1,23,4},{7,31,2636},{7,31,991},{7,26,1186},{7,25,983},{6,31,3532},{5,30,1095},{5,26,77},
+{5,22,1159},{0,31,3340},{2,23,899},{8,31,1014},{7,31,262},{8,25,108},{7,25,254},{15,3,3048},{4,31,936},{5,26,41},{2,23,890},{11,19,3048},{2,23,890},{7,31,955},{7,31,955},{7,31,955},{7,24,954},{5,31,692},{5,26,41},{5,26,41},{5,20,46},{0,29,651},{3,20,98},{8,26,101},{8,26,101},{8,26,101},{8,22,101},{11,10,648},{5,26,5},{5,26,5},{4,20,10},{5,23,648},
+{4,20,10},{15,9,882},{7,31,37},{8,25,8},{6,25,5},{15,9,882},{14,19,882},{6,25,5},{0,23,890},{14,19,882},{0,23,890},{7,0,954},{7,0,954},{7,0,954},{7,0,954},{5,26,37},{5,26,37},{5,26,37},{5,19,40},{2,23,9},{2,23,9},{8,31,3110},{7,31,1135},{7,27,1287},{7,26,898},{7,31,3652},{6,30,1179},{6,26,45},{5,23,1250},{1,31,3492},{2,24,908},{8,31,1174},
+{8,31,110},{8,27,56},{8,26,152},{11,19,3048},{5,31,996},{6,26,36},{2,24,904},{9,23,3048},{2,24,904},{7,31,939},{7,31,939},{7,31,939},{7,26,894},{6,31,659},{6,26,41},{6,26,41},{6,21,59},{1,29,659},{3,22,102},{8,28,4},{8,28,4},{8,28,4},{8,24,8},{12,9,648},{5,28,2},{5,28,2},{5,21,1},{14,13,648},{5,21,1},{14,15,882},{8,31,106},{9,26,13},
+{7,26,8},{14,15,882},{12,23,882},{7,26,8},{0,24,900},{12,23,882},{0,24,900},{7,0,890},{7,0,890},{7,0,890},{7,0,890},{6,25,10},{6,25,10},{6,25,10},{6,20,13},{2,24,8},{2,24,8},{8,31,3038},{8,31,1470},{8,28,1391},{7,27,1137},{7,31,4120},{6,31,1146},{6,28,77},{6,24,1159},{3,31,3681},{3,25,894},{9,31,1205},{8,31,245},{9,27,99},{8,27,122},{10,25,3048},
+{6,31,1110},{6,28,41},{3,25,890},{12,21,3048},{3,25,890},{8,31,1274},{8,31,1274},{8,31,1274},{7,27,1128},{7,30,750},{6,28,41},{6,28,41},{6,22,49},{1,31,648},{4,23,110},{8,31,49},{8,31,49},{8,31,49},{8,25,53},{15,2,648},{6,28,5},{6,28,5},{6,22,13},{13,16,648},{6,22,13},{14,18,882},{9,31,164},{9,27,18},{7,27,13},{14,18,882},{11,26,882},{7,27,13},
+{0,25,890},{11,26,882},{0,25,890},{7,0,1124},{7,0,1124},{7,0,1124},{7,0,1124},{6,28,37},{6,28,37},{6,28,37},{6,21,40},{3,25,4},{3,25,4},{9,31,3454},{8,31,1502},{8,29,1186},{8,28,983},{8,31,4077},{7,31,1230},{7,28,33},{7,25,1214},{4,31,3820},{3,26,908},{10,31,1368},{9,31,261},{9,29,53},{9,28,126},{15,11,3048},{7,31,1226},{7,28,29},{3,26,899},{15,19,3048},
+{3,26,899},{8,31,1018},{8,31,1018},{8,31,1018},{8,27,954},{7,31,724},{7,28,29},{7,28,29},{7,23,44},{2,31,665},{4,24,102},{9,30,2},{9,30,2},{9,30,2},{9,26,2},{11,18,648},{6,30,1},{6,30,1},{6,23,4},{10,21,648},{6,23,4},{12,27,882},{10,31,225},{10,28,13},{7,28,20},{12,27,882},{13,25,882},{7,28,20},{0,26,890},{13,25,882},{0,26,890},{8,0,954},
+{8,0,954},{8,0,954},{8,0,954},{7,27,4},{7,27,4},{7,27,4},{7,22,5},{3,27,4},{3,27,4},{9,31,3614},{9,31,1886},{8,30,1287},{8,29,898},{8,31,4381},{7,31,1582},{7,30,77},{7,26,1159},{5,31,4036},{4,27,899},{10,31,1560},{10,31,405},{10,29,108},{9,29,117},{14,17,3048},{8,31,1444},{7,30,41},{4,27,890},{13,23,3048},{4,27,890},{8,31,1146},{8,31,1146},{8,31,1146},
+{8,29,894},{8,30,1011},{7,30,41},{7,30,41},{7,24,46},{3,31,705},{5,24,98},{9,31,85},{9,31,85},{9,31,85},{9,27,53},{13,14,648},{7,30,5},{7,30,5},{6,24,10},{8,25,648},{6,24,10},{14,23,882},{10,31,305},{10,29,8},{8,29,8},{14,23,882},{11,29,882},{8,29,8},{0,27,890},{11,29,882},{0,27,890},{8,0,890},{8,0,890},{8,0,890},{8,0,890},{7,30,37},
+{7,30,37},{7,30,37},{7,23,40},{4,27,9},{4,27,9},{10,31,4072},{9,31,2174},{9,31,1186},{9,30,983},{9,31,4545},{8,31,1725},{8,30,207},{7,27,1250},{6,31,4339},{4,28,908},{11,31,1656},{10,31,645},{10,31,56},{10,30,152},{13,23,3048},{8,31,1604},{7,31,77},{4,28,904},{11,27,3048},{4,28,904},{9,31,1150},{9,31,1150},{9,31,1150},{9,29,954},{8,31,841},{8,29,193},{8,29,193},
+{7,25,197},{4,31,747},{5,26,102},{10,31,20},{10,31,20},{10,31,20},{10,28,8},{14,13,648},{7,31,41},{7,31,41},{7,25,1},{11,23,648},{7,25,1},{13,29,882},{11,31,397},{11,30,13},{8,30,5},{13,29,882},{14,27,882},{8,30,5},{0,28,900},{14,27,882},{0,28,900},{9,0,954},{9,0,954},{9,0,954},{9,0,954},{8,27,122},{8,27,122},{8,27,122},{8,23,122},{4,28,8},
+{4,28,8},{10,31,4147},{10,31,2404},{9,31,1429},{9,31,901},{10,31,4627},{9,31,1938},{8,31,38},{8,28,1061},{7,31,4330},{5,29,789},{12,31,1701},{11,31,715},{11,31,90},{10,31,113},{15,19,2814},{10,31,1554},{8,31,34},{5,29,785},{13,26,2814},{5,29,785},{9,31,1429},{9,31,1429},{9,31,1429},{9,31,901},{9,31,1022},{8,31,38},{8,31,38},{8,26,44},{5,31,840},{6,27,110},{11,31,90},
+{11,31,90},{11,31,90},{10,29,53},{14,16,648},{8,31,34},{8,31,34},{7,26,20},{15,20,648},{7,26,20},{15,25,761},{12,31,425},{11,31,9},{9,31,1},{15,25,761},{12,31,761},{9,31,1},{0,29,785},{12,31,761},{0,29,785},{9,0,900},{9,0,900},{9,0,900},{9,0,900},{8,30,4},{8,30,4},{8,30,4},{8,25,5},{5,29,4},{5,29,4},{11,31,3735},{10,31,2356},{10,31,1395},
+{10,31,954},{10,31,4099},{9,31,1618},{9,31,174},{8,29,686},{7,31,3930},{6,29,510},{12,31,1285},{12,31,685},{11,31,122},{11,31,37},{13,27,2249},{10,31,1186},{9,31,74},{6,29,485},{13,27,2249},{6,29,485},{10,31,1395},{10,31,1395},{10,31,1395},{10,31,954},{9,31,1086},{9,31,174},{9,31,174},{8,27,49},{6,31,969},{6,28,102},{11,31,122},{11,31,122},{11,31,122},{11,30,2},{13,22,648},
+{9,31,74},{9,31,74},{8,27,13},{12,25,648},{8,27,13},{15,26,481},{13,31,269},{12,31,0},{10,31,0},{15,26,481},{15,28,481},{10,31,0},{0,29,481},{15,28,481},{0,29,481},{10,0,954},{10,0,954},{10,0,954},{10,0,954},{8,31,61},{8,31,61},{8,31,61},{8,26,40},{5,31,4},{5,31,4},{11,31,3399},{11,31,2260},{11,31,1635},{10,31,954},{11,31,3639},{10,31,1435},{9,31,238},
+{9,29,430},{8,31,3443},{6,30,314},{13,31,1121},{12,31,525},{12,31,164},{11,31,53},{13,29,1769},{11,31,918},{10,31,113},{7,30,290},{14,27,1769},{7,30,290},{11,31,1635},{11,31,1635},{11,31,1635},{10,31,954},{10,31,1251},{9,31,238},{9,31,238},{9,28,41},{7,31,1105},{7,28,98},{12,31,164},{12,31,164},{12,31,164},{11,31,53},{15,18,648},{10,31,113},{10,31,113},{8,28,1},{10,29,648},
+{8,28,1},{15,27,269},{13,31,173},{13,31,4},{11,31,4},{15,27,269},{14,30,265},{11,31,4},{0,30,289},{14,30,265},{0,30,289},{10,0,890},{10,0,890},{10,0,890},{10,0,890},{9,31,13},{9,31,13},{9,31,13},{9,27,5},{6,31,9},{6,31,9},{12,31,3157},{11,31,2308},{11,31,1683},{11,31,1054},{11,31,3303},{10,31,1339},{10,31,378},{9,30,213},{9,31,3103},{7,30,166},{13,31,801},
+{13,31,529},{12,31,260},{12,31,20},{15,24,1374},{12,31,777},{11,31,181},{8,30,117},{15,27,1377},{8,30,117},{11,31,1683},{11,31,1683},{11,31,1683},{11,31,1054},{10,31,1491},{10,31,378},{10,31,378},{9,29,46},{8,31,1331},{7,30,102},{12,31,260},{12,31,260},{12,31,260},{12,31,20},{13,27,648},{11,31,181},{11,31,181},{8,29,10},{13,27,648},{8,29,10},{15,28,117},{14,31,61},{14,31,25},
+{13,31,4},{15,28,117},{14,31,117},{13,31,4},{0,30,113},{14,31,117},{0,30,113},{11,0,954},{11,0,954},{11,0,954},{11,0,954},{10,31,122},{10,31,122},{10,31,122},{9,28,40},{7,31,29},{7,31,29},{12,31,2860},{12,31,2260},{12,31,1899},{11,31,1261},{12,31,2932},{11,31,1310},{11,31,685},{10,30,108},{10,31,2731},{7,31,173},{13,31,747},{13,31,475},{13,31,306},{13,31,130},{15,26,1032},
+{12,31,651},{12,31,290},{8,31,40},{14,29,1032},{8,31,40},{12,31,1899},{12,31,1899},{12,31,1899},{11,31,1261},{11,31,1620},{11,31,685},{11,31,685},{10,30,44},{9,31,1524},{8,30,134},{13,31,306},{13,31,306},{13,31,306},{13,31,130},{13,30,648},{12,31,290},{12,31,290},{9,30,5},{12,30,648},{9,30,5},{15,30,18},{15,30,34},{14,31,16},{14,31,0},{15,30,18},{15,30,26},{14,31,0},
+{0,31,36},{15,30,26},{0,31,36},{11,0,900},{11,0,900},{11,0,900},{11,0,900},{10,31,104},{10,31,104},{10,31,104},{10,29,5},{8,30,130},{8,30,130},{13,31,2732},{12,31,2276},{12,31,1915},{12,31,1315},{12,31,2660},{11,31,1414},{11,31,789},{10,31,45},{11,31,2487},{8,31,116},{14,31,524},{14,31,460},{14,31,424},{13,31,170},{14,31,776},{13,31,507},{13,31,338},{10,31,9},{14,30,771},
+{10,31,9},{12,31,1915},{12,31,1915},{12,31,1915},{12,31,1315},{12,31,1699},{11,31,789},{11,31,789},{10,31,45},{10,31,1546},{8,31,116},{14,31,424},{14,31,424},{14,31,424},{13,31,170},{15,26,580},{13,31,338},{13,31,338},{10,31,9},{14,29,580},{10,31,9},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{12,0,954},
+{12,0,954},{12,0,954},{12,0,954},{11,31,164},{11,31,164},{11,31,164},{10,30,40},{8,31,116},{8,31,116},{13,31,2156},{13,31,1884},{13,31,1715},{12,31,1251},{13,31,2132},{12,31,1108},{12,31,747},{11,31,5},{11,31,1927},{9,31,180},{14,31,300},{14,31,236},{14,31,200},{14,31,136},{15,28,451},{14,31,328},{13,31,194},{11,31,1},{15,29,456},{11,31,1},{13,31,1715},{13,31,1715},{13,31,1715},
+{12,31,1251},{12,31,1347},{12,31,747},{12,31,747},{11,31,5},{10,31,1242},{9,31,180},{14,31,200},{14,31,200},{14,31,200},{14,31,136},{15,27,338},{13,31,194},{13,31,194},{11,31,1},{13,31,338},{11,31,1},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{12,0,890},{12,0,890},{12,0,890},{12,0,890},{11,31,260},
+{11,31,260},{11,31,260},{11,31,5},{9,31,180},{9,31,180},{13,31,1836},{13,31,1564},{13,31,1395},{13,31,1123},{13,31,1620},{12,31,1012},{12,31,651},{11,31,85},{12,31,1564},{10,31,233},{14,31,204},{14,31,140},{14,31,104},{14,31,40},{15,29,216},{14,31,136},{14,31,100},{12,31,1},{14,31,216},{12,31,1},{13,31,1395},{13,31,1395},{13,31,1395},{13,31,1123},{13,31,1179},{12,31,651},{12,31,651},
+{11,31,85},{11,31,998},{10,31,233},{14,31,104},{14,31,104},{14,31,104},{14,31,40},{15,28,162},{14,31,100},{14,31,100},{12,31,1},{15,29,164},{12,31,1},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{13,0,954},{13,0,954},{13,0,954},{13,0,954},{12,31,290},{12,31,290},{12,31,290},{11,31,85},{10,31,233},
+{10,31,233},{0,17,1568},{0,14,442},{0,10,40},{0,8,485},{0,11,3379},{0,9,2369},{0,8,1061},{0,5,2435},{0,6,3760},{0,5,2660},{0,17,1568},{0,14,442},{0,10,40},{0,8,485},{1,8,3372},{0,9,2369},{0,8,1061},{0,5,2435},{5,1,3371},{0,5,2435},{0,8,0},{0,8,0},{0,8,0},{0,5,1},{0,4,288},{0,4,160},{0,4,160},{0,2,164},{0,2,332},{0,2,200},{0,8,0},
+{0,8,0},{0,8,0},{0,5,1},{0,4,288},{0,4,160},{0,4,160},{0,2,164},{2,0,288},{0,2,164},{3,7,1568},{0,14,442},{0,10,40},{0,8,485},{3,7,1568},{8,0,1568},{0,8,485},{0,6,1586},{8,0,1568},{0,6,1586},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,20,1570},{0,16,325},{0,11,5},
+{0,9,392},{0,13,3968},{0,10,2630},{0,9,1121},{0,6,2710},{0,7,4484},{0,6,3034},{0,20,1570},{0,16,325},{0,11,5},{0,9,392},{1,10,3968},{0,10,2630},{0,9,1121},{0,6,2710},{5,2,3968},{0,6,2710},{0,11,1},{0,11,1},{0,11,1},{0,6,4},{0,5,514},{0,5,274},{0,5,274},{0,3,289},{0,3,595},{0,3,370},{0,11,1},{0,11,1},{0,11,1},{0,6,4},{1,2,512},
+{0,5,274},{0,5,274},{0,3,289},{1,2,512},{0,3,289},{5,3,1568},{0,16,325},{0,11,5},{0,9,392},{5,3,1568},{4,7,1568},{0,9,392},{0,7,1586},{4,7,1568},{0,7,1586},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,22,1570},{0,17,225},{0,12,18},{0,11,292},{0,15,4652},{0,11,2945},{0,10,1217},
+{0,7,3035},{0,8,5283},{0,7,3476},{0,22,1570},{0,17,225},{0,12,18},{0,11,292},{2,8,4651},{0,11,2945},{0,10,1217},{0,7,3035},{5,3,4651},{0,7,3035},{0,13,0},{0,13,0},{0,13,0},{0,8,1},{0,7,802},{0,6,424},{0,6,424},{0,4,449},{0,3,931},{0,3,562},{0,13,0},{0,13,0},{0,13,0},{0,8,1},{2,0,800},{0,6,424},{0,6,424},{0,4,449},{0,4,800},
+{0,4,449},{1,19,1568},{0,17,225},{0,12,18},{0,11,292},{1,19,1568},{9,2,1568},{0,11,292},{0,8,1576},{9,2,1568},{0,8,1576},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,25,1570},{0,19,149},{0,13,73},{0,12,194},{0,17,5424},{0,13,3368},{0,11,1349},{0,8,3449},{0,9,6213},{0,7,3956},{0,25,1570},
+{0,19,149},{0,13,73},{0,12,194},{5,0,5419},{0,13,3368},{0,11,1349},{0,8,3449},{5,4,5419},{0,8,3449},{0,16,1},{0,16,1},{0,16,1},{0,9,4},{0,8,1152},{0,7,610},{0,7,610},{0,4,625},{0,4,1328},{0,4,769},{0,16,1},{0,16,1},{0,16,1},{0,9,4},{0,8,1152},{0,7,610},{0,7,610},{0,4,625},{4,0,1152},{0,4,625},{5,8,1568},{0,19,149},{1,13,13},
+{0,12,194},{5,8,1568},{4,10,1568},{0,12,194},{0,9,1576},{4,10,1568},{0,9,1576},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,28,1651},{0,21,155},{0,14,281},{0,13,198},{0,20,5424},{0,15,3099},{0,12,996},{0,9,3179},{0,10,6544},{0,8,3890},{1,24,1619},{0,21,155},{1,14,69},{0,13,198},{5,3,5419},
+{0,15,3099},{0,12,996},{0,9,3179},{4,7,5419},{0,9,3179},{0,19,82},{0,19,82},{0,19,82},{0,11,82},{0,11,1152},{0,9,445},{0,9,445},{0,6,505},{0,6,1494},{0,5,737},{1,15,50},{1,15,50},{1,15,50},{1,10,49},{3,1,1152},{0,9,445},{0,9,445},{0,6,505},{3,3,1152},{0,6,505},{5,11,1568},{0,21,74},{1,14,20},{0,13,117},{5,11,1568},{11,3,1568},{0,13,117},
+{0,10,1586},{11,3,1568},{0,10,1586},{0,0,81},{0,0,81},{0,0,81},{0,0,81},{0,3,0},{0,3,0},{0,3,0},{0,2,1},{0,1,25},{0,1,25},{1,27,1825},{0,23,323},{1,15,342},{0,14,361},{0,22,5420},{0,16,2834},{0,13,726},{0,10,2966},{0,11,6916},{0,9,3860},{1,27,1569},{1,21,131},{1,15,86},{1,14,181},{1,19,5419},{0,16,2834},{0,13,726},{0,10,2966},{9,2,5419},
+{0,10,2966},{1,18,257},{1,18,257},{1,18,257},{1,12,261},{0,13,1154},{0,11,337},{0,11,337},{0,7,388},{0,7,1665},{0,6,749},{1,18,1},{1,18,1},{1,18,1},{1,12,5},{4,0,1152},{0,11,337},{0,11,337},{0,7,388},{1,7,1152},{0,7,388},{8,2,1568},{0,23,34},{2,15,5},{0,14,72},{8,2,1568},{13,2,1568},{0,14,72},{0,11,1586},{13,2,1568},{0,11,1586},{1,0,257},
+{1,0,257},{1,0,257},{1,0,257},{0,6,1},{0,6,1},{0,6,1},{0,3,4},{0,2,85},{0,2,85},{1,30,1907},{1,23,411},{1,16,542},{1,15,454},{0,25,5424},{0,18,2630},{0,15,486},{0,11,2771},{0,13,7299},{0,11,3860},{2,26,1634},{1,23,155},{2,16,82},{1,15,198},{5,8,5419},{0,18,2630},{0,15,486},{0,11,2771},{4,10,5419},{0,11,2771},{1,21,338},{1,21,338},{1,21,338},
+{1,13,338},{0,16,1152},{0,13,274},{0,13,274},{0,8,305},{0,8,1856},{0,7,797},{2,17,64},{2,17,64},{2,17,64},{2,12,65},{3,6,1152},{0,13,274},{0,13,274},{0,8,305},{3,6,1152},{0,8,305},{3,23,1568},{0,25,17},{2,16,18},{0,15,45},{3,23,1568},{11,6,1568},{0,15,45},{0,12,1576},{11,6,1568},{0,12,1576},{1,0,337},{1,0,337},{1,0,337},{1,0,337},{0,8,1},
+{0,8,1},{0,8,1},{0,5,0},{0,4,169},{0,4,169},{1,31,2145},{1,25,590},{1,17,915},{1,16,619},{0,27,5420},{0,20,2424},{0,16,282},{0,12,2552},{0,15,7711},{0,11,3908},{2,29,1570},{2,23,149},{2,17,73},{2,16,194},{7,4,5419},{0,20,2424},{0,16,282},{0,12,2552},{9,5,5419},{0,12,2552},{1,23,546},{1,23,546},{1,23,546},{1,15,546},{0,19,1154},{0,15,194},{0,15,194},
+{0,9,218},{0,9,2123},{0,8,865},{2,20,1},{2,20,1},{2,20,1},{2,13,4},{5,2,1152},{0,15,194},{0,15,194},{0,9,218},{8,1,1152},{0,9,218},{9,4,1568},{0,27,5},{3,17,13},{0,16,26},{9,4,1568},{9,10,1568},{0,16,26},{0,13,1576},{9,10,1568},{0,13,1576},{1,0,545},{1,0,545},{1,0,545},{1,0,545},{0,11,0},{0,11,0},{0,11,0},{0,7,4},{0,5,289},
+{0,5,289},{2,31,2746},{1,27,945},{2,18,1370},{1,17,977},{0,30,5420},{0,22,2243},{0,17,145},{0,13,2386},{0,16,8161},{0,13,3986},{3,28,1619},{2,25,155},{3,18,69},{2,17,198},{8,2,5419},{0,22,2243},{0,17,145},{0,13,2386},{13,2,5419},{0,13,2386},{1,26,932},{1,26,932},{1,26,932},{1,16,936},{0,22,1154},{0,17,109},{0,17,109},{0,10,145},{0,11,2441},{0,9,1001},{3,19,50},
+{3,19,50},{3,19,50},{3,14,49},{5,5,1152},{0,17,109},{0,17,109},{0,10,145},{5,7,1152},{0,10,145},{11,0,1568},{0,29,10},{3,18,20},{0,18,8},{11,0,1568},{13,7,1568},{0,18,8},{0,14,1586},{13,7,1568},{0,14,1586},{1,0,932},{1,0,932},{1,0,932},{1,0,932},{0,14,1},{0,14,1},{0,14,1},{0,8,1},{0,6,468},{0,6,468},{2,31,3146},{2,27,1412},{2,19,1743},
+{1,19,1441},{0,31,5515},{0,23,2096},{0,19,69},{0,14,2251},{0,18,8669},{0,14,4100},{3,31,1569},{3,25,131},{3,19,86},{3,18,181},{3,23,5419},{0,23,2096},{0,19,69},{0,14,2251},{11,6,5419},{0,14,2251},{2,25,1379},{2,25,1379},{2,25,1379},{2,17,1379},{0,24,1152},{0,18,61},{0,18,61},{0,11,100},{0,12,2859},{0,10,1157},{3,22,1},{3,22,1},{3,22,1},{3,16,5},{7,1,1152},
+{0,18,61},{0,18,61},{0,11,100},{10,2,1152},{0,11,100},{10,6,1568},{1,29,2},{4,19,5},{0,19,5},{10,6,1568},{15,6,1568},{0,19,5},{0,15,1586},{15,6,1568},{0,15,1586},{2,0,1378},{2,0,1378},{2,0,1378},{2,0,1378},{0,16,1},{0,16,1},{0,16,1},{0,10,1},{0,8,657},{0,8,657},{2,31,3802},{2,29,1603},{2,21,2148},{2,19,1631},{1,31,5655},{0,25,2005},{0,20,31},
+{0,15,2138},{0,19,8963},{0,15,4070},{4,30,1634},{3,27,155},{4,20,82},{3,19,198},{9,4,5419},{0,25,2001},{0,20,27},{0,15,2134},{9,10,5419},{0,15,2134},{2,28,1587},{2,28,1587},{2,28,1587},{2,18,1590},{0,27,1158},{0,20,22},{0,20,22},{0,12,62},{0,14,3075},{0,11,1221},{4,21,64},{4,21,64},{4,21,64},{4,16,65},{5,10,1152},{0,20,18},{0,20,18},{0,12,58},{12,1,1152},
+{0,12,58},{12,2,1568},{1,31,10},{4,20,18},{0,20,18},{12,2,1568},{13,10,1568},{0,20,18},{0,16,1576},{13,10,1568},{0,16,1576},{2,0,1586},{2,0,1586},{2,0,1586},{2,0,1586},{0,19,4},{0,19,4},{0,19,4},{0,11,8},{0,9,769},{0,9,769},{3,31,3890},{2,31,1623},{3,21,2180},{2,20,1644},{1,31,5863},{0,27,1989},{1,21,109},{0,17,2117},{0,21,8560},{0,16,3545},{4,31,1640},
+{4,27,149},{4,21,73},{4,20,194},{10,3,5419},{0,27,1889},{0,21,49},{0,17,2017},{11,9,5419},{0,17,2017},{2,30,1619},{2,30,1619},{2,30,1619},{2,20,1619},{1,26,1188},{1,20,86},{1,20,86},{1,13,121},{0,16,2801},{0,13,949},{4,24,1},{4,24,1},{4,24,1},{4,17,4},{8,1,1152},{0,22,2},{0,22,2},{0,14,26},{10,5,1152},{0,14,26},{11,8,1568},{2,31,5},{5,21,13},
+{1,21,9},{11,8,1568},{11,14,1568},{1,21,9},{0,17,1576},{11,14,1568},{0,17,1576},{2,0,1618},{2,0,1618},{2,0,1618},{2,0,1618},{1,18,37},{1,18,37},{1,18,37},{1,12,37},{0,10,625},{0,10,625},{4,31,4308},{3,31,1589},{3,23,2160},{3,21,1621},{2,31,5895},{1,27,1999},{1,22,33},{1,18,2124},{0,23,8196},{0,17,3043},{5,31,1667},{4,29,155},{5,22,69},{4,21,198},{10,6,5419},
+{0,29,1772},{1,22,24},{0,18,1875},{15,6,5419},{0,18,1875},{3,30,1576},{3,30,1576},{3,30,1576},{3,20,1580},{1,29,1161},{1,22,29},{1,22,29},{1,15,58},{0,17,2529},{0,14,656},{5,23,50},{5,23,50},{5,23,50},{5,18,49},{8,4,1152},{0,24,1},{0,24,1},{0,15,1},{14,2,1152},{0,15,1},{13,4,1568},{3,31,13},{5,22,20},{2,22,8},{13,4,1568},{15,11,1568},{2,22,8},
+{0,18,1586},{15,11,1568},{0,18,1586},{3,0,1576},{3,0,1576},{3,0,1576},{3,0,1576},{1,21,10},{1,21,10},{1,21,10},{1,14,13},{0,12,520},{0,12,520},{4,31,4436},{3,31,1765},{4,23,2175},{3,23,1669},{3,31,6079},{1,29,1977},{2,23,105},{1,19,2107},{0,24,7969},{0,18,2675},{6,31,1832},{5,29,131},{5,23,86},{5,22,181},{12,2,5419},{0,30,1699},{1,23,62},{0,19,1782},{13,10,5419},
+{0,19,1782},{3,31,1665},{3,31,1665},{3,31,1665},{3,22,1640},{2,28,1188},{2,22,97},{2,22,97},{2,15,136},{0,19,2313},{0,15,474},{5,26,1},{5,26,1},{5,26,1},{5,20,5},{3,25,1152},{1,24,5},{1,24,5},{0,16,2},{12,6,1152},{0,16,2},{15,0,1568},{4,31,34},{6,23,5},{2,23,5},{15,0,1568},{12,16,1568},{2,23,5},{0,19,1586},{12,16,1568},{0,19,1586},{3,0,1640},
+{3,0,1640},{3,0,1640},{3,0,1640},{2,20,37},{2,20,37},{2,20,37},{2,14,37},{0,14,400},{0,14,400},{5,31,4740},{4,31,1716},{4,25,2148},{4,23,1631},{3,31,6351},{2,29,2005},{2,24,31},{2,19,2138},{0,26,7669},{0,19,2375},{6,31,1832},{5,31,155},{6,24,82},{5,23,198},{11,8,5419},{0,31,1712},{2,24,27},{0,20,1720},{11,14,5419},{0,20,1720},{4,31,1595},{4,31,1595},{4,31,1595},
+{4,22,1590},{2,31,1158},{2,24,22},{2,24,22},{2,16,62},{0,21,2091},{0,17,306},{6,25,64},{6,25,64},{6,25,64},{6,20,65},{9,6,1152},{1,26,1},{1,26,1},{1,17,5},{14,5,1152},{1,17,5},{14,6,1568},{5,31,74},{6,24,18},{2,24,18},{14,6,1568},{15,14,1568},{2,24,18},{0,20,1576},{15,14,1568},{0,20,1576},{4,0,1586},{4,0,1586},{4,0,1586},{4,0,1586},{2,23,4},
+{2,23,4},{2,23,4},{2,15,8},{0,16,277},{0,16,277},{5,31,5060},{5,31,1980},{5,25,2180},{4,24,1644},{4,31,6508},{2,31,1989},{3,25,109},{2,21,2117},{0,28,7364},{0,21,2098},{7,31,1952},{6,31,149},{6,25,73},{6,24,194},{12,7,5419},{1,31,1804},{2,25,49},{0,21,1657},{13,13,5419},{0,21,1657},{4,31,1739},{4,31,1739},{4,31,1739},{4,24,1619},{3,30,1188},{3,24,86},{3,24,86},
+{3,17,121},{0,22,1928},{0,18,194},{6,28,1},{6,28,1},{6,28,1},{6,21,4},{10,5,1152},{2,26,2},{2,26,2},{1,18,2},{12,9,1152},{1,18,2},{13,12,1568},{6,31,149},{7,25,13},{3,25,9},{13,12,1568},{13,18,1568},{3,25,9},{0,21,1576},{13,18,1568},{0,21,1576},{4,0,1618},{4,0,1618},{4,0,1618},{4,0,1618},{3,22,37},{3,22,37},{3,22,37},{3,16,37},{0,18,193},
+{0,18,193},{6,31,5316},{5,31,2160},{5,27,2160},{5,25,1621},{5,31,6800},{3,31,1999},{3,26,33},{3,22,2124},{0,29,7068},{0,22,1836},{7,31,2195},{7,31,270},{7,26,69},{6,25,198},{15,0,5419},{2,31,1970},{3,26,24},{0,22,1611},{12,16,5419},{0,22,1611},{5,31,1676},{5,31,1676},{5,31,1676},{5,24,1580},{3,31,1233},{3,26,29},{3,26,29},{3,19,58},{0,24,1798},{0,19,157},{7,27,50},
+{7,27,50},{7,27,50},{7,22,49},{10,8,1152},{2,28,1},{2,28,1},{2,19,1},{11,12,1152},{2,19,1},{15,8,1568},{7,31,221},{7,26,20},{4,26,8},{15,8,1568},{11,22,1568},{4,26,8},{0,22,1586},{11,22,1568},{0,22,1586},{5,0,1576},{5,0,1576},{5,0,1576},{5,0,1576},{3,25,10},{3,25,10},{3,25,10},{3,18,13},{0,20,106},{0,20,106},{6,31,5828},{6,31,2435},{6,27,2175},
+{5,27,1669},{5,31,7184},{4,31,2132},{4,27,105},{3,23,2107},{0,31,6820},{0,23,1690},{8,31,2306},{7,31,334},{7,27,86},{7,26,181},{14,6,5419},{4,31,2096},{3,27,62},{0,23,1590},{15,14,5419},{0,23,1590},{6,31,1859},{6,31,1859},{6,31,1859},{5,26,1640},{4,31,1220},{4,26,97},{4,26,97},{4,19,136},{0,26,1650},{0,21,161},{7,30,1},{7,30,1},{7,30,1},{7,24,5},{9,14,1152},
+{3,28,5},{3,28,5},{2,20,2},{14,10,1152},{2,20,2},{14,14,1568},{7,31,333},{8,27,40},{4,27,5},{14,14,1568},{14,20,1568},{4,27,5},{0,23,1586},{14,20,1568},{0,23,1586},{5,0,1640},{5,0,1640},{5,0,1640},{5,0,1640},{4,24,37},{4,24,37},{4,24,37},{4,18,37},{0,22,58},{0,22,58},{7,31,6036},{6,31,2835},{6,29,2148},{6,27,1631},{6,31,7316},{4,31,2228},{4,28,31},
+{4,23,2138},{0,31,6884},{0,24,1613},{8,31,2402},{8,31,666},{8,28,269},{7,27,198},{13,12,5419},{4,31,2224},{4,28,27},{0,24,1577},{13,18,5419},{0,24,1577},{6,31,1811},{6,31,1811},{6,31,1811},{6,26,1590},{5,31,1356},{4,28,22},{4,28,22},{4,20,62},{0,28,1508},{1,21,137},{7,31,106},{7,31,106},{7,31,106},{7,25,82},{11,10,1152},{3,30,1},{3,30,1},{3,21,5},{5,23,1152},
+{3,21,5},{13,20,1568},{8,31,410},{8,28,13},{4,28,18},{13,20,1568},{13,23,1570},{4,28,18},{0,24,1576},{13,23,1570},{0,24,1576},{6,0,1586},{6,0,1586},{6,0,1586},{6,0,1586},{4,27,4},{4,27,4},{4,27,4},{4,19,8},{0,24,37},{0,24,37},{7,31,6740},{7,31,3135},{7,29,2180},{6,28,1644},{7,31,7676},{5,31,2448},{5,29,109},{4,25,2117},{1,31,7196},{0,25,1593},{9,31,2594},
+{8,31,698},{8,29,82},{8,28,345},{14,11,5419},{5,31,2412},{4,29,49},{1,25,1580},{15,17,5419},{1,25,1580},{7,31,1979},{7,31,1979},{7,31,1979},{6,28,1619},{5,31,1388},{5,28,86},{5,28,86},{5,21,121},{0,30,1416},{1,23,161},{8,30,64},{8,30,64},{8,30,64},{8,25,65},{12,9,1152},{4,30,2},{4,30,2},{3,22,2},{14,13,1152},{3,22,2},{15,16,1568},{9,31,530},{8,29,18},
+{5,29,9},{15,16,1568},{15,22,1568},{5,29,9},{0,25,1576},{15,22,1568},{0,25,1576},{6,0,1618},{6,0,1618},{6,0,1618},{6,0,1618},{5,26,37},{5,26,37},{5,26,37},{5,20,37},{0,25,17},{0,25,17},{8,31,6906},{7,31,3909},{7,31,2160},{7,29,1621},{7,31,8144},{6,31,2902},{5,30,33},{5,26,2124},{2,31,7661},{1,26,1615},{9,31,2945},{9,31,1025},{8,30,86},{8,29,181},{14,14,5419},
+{7,31,2694},{5,30,24},{1,26,1590},{14,20,5419},{1,26,1590},{7,31,2060},{7,31,2060},{7,31,2060},{7,28,1580},{6,31,1476},{5,30,29},{5,30,29},{5,23,58},{0,31,1324},{2,23,157},{8,31,37},{8,31,37},{8,31,37},{8,27,5},{15,2,1152},{5,30,20},{5,30,20},{4,23,1},{13,16,1152},{4,23,1},{15,19,1568},{10,31,637},{9,30,5},{6,30,8},{15,19,1568},{13,26,1568},{6,30,8},
+{0,26,1586},{13,26,1568},{0,26,1586},{7,0,1576},{7,0,1576},{7,0,1576},{7,0,1576},{5,29,10},{5,29,10},{5,29,10},{5,22,13},{0,28,10},{0,28,10},{8,31,7386},{8,31,4250},{8,31,2490},{7,31,1669},{8,31,8461},{6,31,3350},{6,31,105},{5,27,2107},{4,31,8004},{1,27,1611},{10,31,3112},{9,31,1361},{9,31,69},{8,30,198},{13,20,5419},{7,31,2950},{5,31,62},{2,27,1590},{11,25,5420},
+{2,27,1590},{8,31,2486},{8,31,2486},{8,31,2486},{7,30,1640},{6,31,1700},{6,30,97},{6,30,97},{6,23,136},{1,31,1424},{2,25,161},{9,31,65},{9,31,65},{9,31,65},{9,27,49},{11,18,1152},{5,31,58},{5,31,58},{4,24,2},{10,21,1152},{4,24,2},{13,28,1568},{11,31,785},{9,31,20},{6,31,5},{13,28,1568},{11,30,1568},{6,31,5},{0,27,1586},{11,30,1568},{0,27,1586},{7,0,1640},
+{7,0,1640},{7,0,1640},{7,0,1640},{6,28,37},{6,28,37},{6,28,37},{6,22,37},{1,28,10},{1,28,10},{9,31,7014},{8,31,4230},{8,31,2294},{8,31,1846},{8,31,7865},{7,31,3114},{6,31,85},{6,27,1706},{4,31,7436},{2,28,1289},{11,31,2852},{10,31,1221},{9,31,145},{9,30,114},{13,22,4803},{8,31,2648},{6,31,81},{2,28,1253},{12,25,4803},{2,28,1253},{8,31,2294},{8,31,2294},{8,31,2294},
+{8,30,1811},{7,31,1740},{6,31,85},{6,31,85},{6,24,62},{2,31,1577},{3,25,137},{9,31,145},{9,31,145},{9,31,145},{9,29,5},{13,14,1152},{6,31,81},{6,31,81},{5,25,5},{8,25,1152},{5,25,5},{15,23,1250},{11,31,689},{10,31,4},{7,31,9},{15,23,1250},{11,31,1250},{7,31,9},{0,28,1252},{11,31,1250},{0,28,1252},{8,0,1810},{8,0,1810},{8,0,1810},{8,0,1810},{6,31,4},
+{6,31,4},{6,31,4},{6,23,8},{1,29,8},{1,29,8},{9,31,6534},{9,31,4134},{8,31,2486},{8,31,1590},{9,31,7237},{7,31,2970},{7,31,161},{6,28,1256},{5,31,6748},{2,29,949},{11,31,2340},{10,31,1125},{10,31,164},{9,31,97},{15,17,4056},{9,31,2244},{7,31,125},{3,28,909},{12,26,4056},{3,28,909},{8,31,2486},{8,31,2486},{8,31,2486},{8,31,1590},{7,31,2156},{7,31,161},{7,31,161},
+{7,25,121},{3,31,1729},{3,27,161},{10,31,164},{10,31,164},{10,31,164},{10,29,65},{14,13,1152},{7,31,125},{7,31,125},{5,26,2},{11,23,1152},{5,26,2},{13,31,882},{12,31,482},{11,31,0},{8,31,4},{13,31,882},{15,27,882},{8,31,4},{0,28,900},{15,27,882},{0,28,900},{8,0,1586},{8,0,1586},{8,0,1586},{8,0,1586},{7,30,37},{7,30,37},{7,30,37},{7,24,37},{1,31,16},
+{1,31,16},{10,31,6091},{9,31,4053},{9,31,2609},{8,31,1761},{9,31,6490},{8,31,2622},{7,31,458},{7,28,835},{6,31,6162},{3,29,598},{12,31,1989},{11,31,931},{11,31,306},{10,31,5},{15,19,3318},{10,31,1806},{8,31,202},{4,29,545},{13,26,3318},{4,29,545},{9,31,2609},{9,31,2609},{9,31,2609},{8,31,1761},{8,31,2086},{7,31,458},{7,31,458},{7,27,58},{4,31,1868},{4,27,157},{11,31,306},
+{11,31,306},{11,31,306},{10,31,5},{14,16,1152},{8,31,202},{8,31,202},{6,27,1},{15,20,1152},{6,27,1},{15,26,545},{13,31,313},{12,31,4},{10,31,4},{15,26,545},{14,29,545},{10,31,4},{0,29,545},{14,29,545},{0,29,545},{8,0,1640},{8,0,1640},{8,0,1640},{8,0,1640},{7,31,58},{7,31,58},{7,31,58},{7,26,13},{2,31,13},{2,31,13},{10,31,5723},{10,31,3980},{9,31,2945},
+{9,31,1745},{10,31,6083},{8,31,2494},{8,31,558},{7,29,558},{7,31,5674},{4,30,411},{12,31,1573},{11,31,963},{11,31,338},{11,31,49},{13,27,2753},{10,31,1438},{9,31,290},{5,29,341},{13,27,2753},{5,29,341},{9,31,2945},{9,31,2945},{9,31,2945},{9,31,1745},{9,31,2390},{8,31,558},{8,31,558},{7,28,147},{5,31,2064},{4,29,161},{11,31,338},{11,31,338},{11,31,338},{11,31,49},{13,22,1152},
+{9,31,290},{9,31,290},{6,28,2},{12,25,1152},{6,28,2},{15,27,313},{13,31,185},{13,31,16},{11,31,0},{15,27,313},{13,31,313},{11,31,0},{0,29,337},{13,31,313},{0,29,337},{9,0,1576},{9,0,1576},{9,0,1576},{9,0,1576},{8,31,197},{8,31,197},{8,31,197},{7,27,122},{3,31,25},{3,31,25},{11,31,5415},{10,31,3996},{10,31,3035},{10,31,2006},{10,31,5619},{9,31,2378},{8,31,814},
+{8,29,414},{7,31,5338},{5,30,251},{12,31,1413},{12,31,813},{12,31,452},{11,31,65},{13,29,2273},{11,31,1218},{10,31,365},{6,30,146},{14,27,2273},{6,30,146},{10,31,3035},{10,31,3035},{10,31,3035},{10,31,2006},{9,31,2518},{8,31,814},{8,31,814},{8,28,121},{6,31,2329},{5,29,137},{12,31,452},{12,31,452},{12,31,452},{11,31,65},{15,18,1152},{10,31,365},{10,31,365},{7,29,5},{10,29,1152},
+{7,29,5},{15,28,145},{14,31,85},{13,31,16},{12,31,4},{15,28,145},{15,29,149},{12,31,4},{0,30,145},{15,29,149},{0,30,145},{9,0,1640},{9,0,1640},{9,0,1640},{9,0,1640},{8,31,85},{8,31,85},{8,31,85},{8,27,37},{4,31,40},{4,31,40},{11,31,5143},{11,31,4004},{11,31,3379},{10,31,2070},{11,31,5287},{10,31,2431},{9,31,1062},{8,30,133},{8,31,5011},{5,31,161},{13,31,1161},
+{13,31,889},{12,31,548},{12,31,164},{15,24,1878},{11,31,1106},{11,31,481},{7,30,66},{15,27,1881},{7,30,66},{11,31,3379},{11,31,3379},{11,31,3379},{10,31,2070},{10,31,2835},{9,31,1062},{9,31,1062},{8,29,62},{7,31,2577},{5,31,161},{12,31,548},{12,31,548},{12,31,548},{12,31,164},{13,27,1152},{11,31,481},{11,31,481},{7,30,2},{13,27,1152},{7,30,2},{15,29,45},{14,31,37},{14,31,1},
+{14,31,9},{15,29,45},{15,30,41},{14,31,9},{0,30,65},{15,30,41},{0,30,65},{10,0,1586},{10,0,1586},{10,0,1586},{10,0,1586},{9,31,221},{9,31,221},{9,31,221},{8,28,8},{5,31,80},{5,31,80},{12,31,4948},{11,31,4157},{11,31,3532},{11,31,2393},{11,31,5008},{10,31,2422},{10,31,1461},{9,31,125},{9,31,4752},{6,31,157},{13,31,1107},{13,31,835},{13,31,666},{12,31,362},{15,26,1536},
+{12,31,1011},{12,31,650},{8,31,16},{14,29,1536},{8,31,16},{11,31,3532},{11,31,3532},{11,31,3532},{11,31,2393},{10,31,3204},{10,31,1461},{10,31,1461},{9,30,114},{8,31,2976},{6,31,157},{13,31,666},{13,31,666},{13,31,666},{12,31,362},{13,30,1152},{12,31,650},{12,31,650},{8,31,16},{12,30,1152},{8,31,16},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},
+{0,31,0},{15,31,0},{0,31,0},{10,0,1640},{10,0,1640},{10,0,1640},{10,0,1640},{9,31,221},{9,31,221},{9,31,221},{9,29,25},{6,31,157},{6,31,157},{12,31,4212},{12,31,3612},{12,31,3251},{11,31,2201},{12,31,4212},{11,31,2154},{10,31,1301},{9,31,13},{10,31,3939},{7,31,233},{14,31,776},{13,31,659},{13,31,490},{13,31,218},{15,27,1067},{13,31,699},{12,31,442},{9,31,4},{13,31,1067},
+{9,31,4},{12,31,3251},{12,31,3251},{12,31,3251},{11,31,2201},{11,31,2668},{10,31,1301},{10,31,1301},{9,31,13},{8,31,2528},{7,31,233},{13,31,490},{13,31,490},{13,31,490},{13,31,218},{15,25,802},{12,31,442},{12,31,442},{9,31,4},{15,27,802},{9,31,4},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{11,0,1576},
+{11,0,1576},{11,0,1576},{11,0,1576},{10,31,340},{10,31,340},{10,31,340},{9,31,13},{7,31,233},{7,31,233},{12,31,3732},{12,31,3132},{12,31,2771},{12,31,2171},{12,31,3444},{11,31,1834},{11,31,1209},{10,31,37},{10,31,3219},{8,31,400},{14,31,456},{14,31,392},{14,31,356},{13,31,170},{14,31,684},{13,31,459},{13,31,290},{10,31,1},{14,30,683},{10,31,1},{12,31,2771},{12,31,2771},{12,31,2771},
+{12,31,2171},{11,31,2348},{11,31,1209},{11,31,1209},{10,31,37},{9,31,2156},{8,31,400},{14,31,356},{14,31,356},{14,31,356},{13,31,170},{15,26,512},{13,31,290},{13,31,290},{10,31,1},{14,29,512},{10,31,1},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{11,0,1640},{11,0,1640},{11,0,1640},{11,0,1640},{10,31,436},
+{10,31,436},{10,31,436},{10,31,37},{8,31,400},{8,31,400},{13,31,3172},{13,31,2900},{12,31,2547},{12,31,1947},{12,31,2932},{12,31,1732},{11,31,1145},{10,31,53},{11,31,2695},{8,31,464},{14,31,264},{14,31,200},{14,31,164},{14,31,100},{15,28,387},{14,31,268},{13,31,178},{11,31,1},{15,29,396},{11,31,1},{12,31,2547},{12,31,2547},{12,31,2547},{12,31,1947},{12,31,1971},{11,31,1145},{11,31,1145},
+{10,31,53},{10,31,1794},{8,31,464},{14,31,164},{14,31,164},{14,31,164},{14,31,100},{15,27,290},{13,31,178},{13,31,178},{11,31,1},{14,30,290},{11,31,1},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{12,0,1586},{12,0,1586},{12,0,1586},{12,0,1586},{11,31,520},{11,31,520},{11,31,520},{10,31,53},{8,31,464},
+{8,31,464},{0,23,2665},{0,18,680},{0,13,50},{0,11,785},{0,15,5885},{0,11,4118},{0,10,1800},{0,7,4202},{0,8,6546},{0,7,4643},{0,23,2665},{0,18,680},{0,13,50},{0,11,785},{3,5,5885},{0,11,4118},{0,10,1800},{0,7,4202},{0,9,5885},{0,7,4202},{0,11,0},{0,11,0},{0,11,0},{0,7,4},{0,5,549},{0,5,289},{0,5,289},{0,3,306},{0,3,630},{0,3,387},{0,11,0},
+{0,11,0},{0,11,0},{0,7,4},{1,2,545},{0,5,289},{0,5,289},{0,3,306},{2,1,545},{0,3,306},{6,3,2665},{0,18,680},{0,13,50},{0,11,785},{6,3,2665},{11,0,2665},{0,11,785},{0,8,2689},{11,0,2665},{0,8,2689},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,25,2665},{0,20,521},{0,14,5},
+{0,12,625},{0,17,6669},{0,13,4529},{0,11,1890},{0,8,4610},{0,9,7494},{0,7,5171},{0,25,2665},{0,20,521},{0,14,5},{0,12,625},{3,7,6669},{0,13,4529},{0,11,1890},{0,8,4610},{8,0,6669},{0,8,4610},{0,13,1},{0,13,1},{0,13,1},{0,8,0},{0,7,841},{0,6,445},{0,6,445},{0,4,464},{0,3,982},{0,3,595},{0,13,1},{0,13,1},{0,13,1},{0,8,0},{2,0,841},
+{0,6,445},{0,6,445},{0,4,464},{1,3,841},{0,4,464},{7,2,2665},{0,20,521},{0,14,5},{0,12,625},{7,2,2665},{12,0,2665},{0,12,625},{0,9,2689},{12,0,2665},{0,9,2689},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,28,2665},{0,22,405},{0,15,10},{0,13,514},{0,19,7541},{0,14,4934},{0,12,2042},
+{0,9,5045},{0,10,8546},{0,8,5682},{0,28,2665},{0,22,405},{0,15,10},{0,13,514},{5,2,7538},{0,14,4934},{0,12,2042},{0,9,5045},{8,1,7538},{0,9,5045},{0,16,0},{0,16,0},{0,16,0},{0,10,4},{0,8,1201},{0,7,637},{0,7,637},{0,4,656},{0,4,1385},{0,4,800},{0,16,0},{0,16,0},{0,16,0},{0,10,4},{1,5,1201},{0,7,637},{0,7,637},{0,4,656},{4,0,1201},
+{0,4,656},{6,8,2665},{0,22,405},{0,15,10},{0,13,514},{6,8,2665},{11,3,2665},{0,13,514},{0,10,2689},{11,3,2665},{0,10,2689},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,30,2669},{0,23,313},{0,16,68},{0,15,410},{0,20,8498},{0,16,5330},{0,13,2210},{0,10,5530},{0,11,9702},{0,9,6270},{0,30,2669},
+{0,23,313},{0,16,68},{0,15,410},{4,7,8493},{0,16,5330},{0,13,2210},{0,10,5530},{8,2,8493},{0,10,5530},{0,19,1},{0,19,1},{0,19,1},{0,11,1},{0,9,1629},{0,8,832},{0,8,832},{0,5,881},{0,5,1874},{0,5,1106},{0,19,1},{0,19,1},{0,19,1},{0,11,1},{2,3,1625},{0,8,832},{0,8,832},{0,5,881},{4,1,1625},{0,5,881},{8,2,2665},{0,23,313},{1,16,8},
+{0,15,410},{8,2,2665},{14,1,2665},{0,15,410},{0,11,2689},{14,1,2665},{0,11,2689},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,31,2777},{0,26,232},{0,17,197},{0,16,305},{0,22,9674},{0,17,5849},{0,14,2450},{0,10,6106},{0,12,11199},{0,10,7006},{0,31,2777},{0,26,232},{0,17,197},{0,16,305},{1,19,9669},
+{0,17,5849},{0,14,2450},{0,10,6106},{9,2,9669},{0,10,6106},{0,22,1},{0,22,1},{0,22,1},{0,13,0},{0,11,2178},{0,10,1125},{0,10,1125},{0,6,1189},{0,6,2520},{0,5,1475},{0,22,1},{0,22,1},{0,22,1},{0,13,0},{3,1,2178},{0,10,1125},{0,10,1125},{0,6,1189},{3,3,2178},{0,6,1189},{9,2,2665},{0,26,232},{1,17,17},{0,16,305},{9,2,2665},{13,4,2665},{0,16,305},
+{0,12,2689},{13,4,2665},{0,12,2689},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{1,31,3045},{0,28,217},{0,19,401},{0,17,282},{0,25,9670},{0,19,5529},{0,16,1970},{0,12,5738},{0,13,11589},{0,11,6898},{1,31,2789},{0,28,217},{1,18,146},{0,17,282},{5,8,9669},{0,19,5529},{0,16,1970},{0,12,5738},{4,10,9669},
+{0,12,5738},{0,24,64},{0,24,64},{0,24,64},{0,15,68},{0,13,2180},{0,11,949},{0,11,949},{0,7,1018},{0,7,2691},{0,6,1433},{1,21,64},{1,21,64},{1,21,64},{1,13,68},{4,0,2178},{0,11,949},{0,11,949},{0,7,1018},{1,7,2178},{0,7,1018},{10,1,2665},{0,28,153},{2,18,5},{0,17,218},{10,1,2665},{15,3,2665},{0,17,218},{0,13,2689},{15,3,2665},{0,13,2689},{0,0,64},
+{0,0,64},{0,0,64},{0,0,64},{0,3,1},{0,3,1},{0,3,1},{0,2,4},{0,1,18},{0,1,18},{1,31,3285},{0,29,341},{1,19,453},{0,18,405},{0,27,9674},{0,20,5170},{0,17,1546},{0,13,5429},{0,15,11993},{0,12,6819},{2,31,2966},{1,28,221},{1,19,197},{1,18,305},{7,4,9669},{0,20,5170},{0,17,1546},{0,13,5429},{9,5,9669},{0,13,5429},{0,27,257},{0,27,257},{0,27,257},
+{1,15,256},{0,16,2178},{0,13,832},{0,13,832},{0,8,881},{0,8,2882},{0,7,1427},{1,23,4},{1,23,4},{1,23,4},{1,15,0},{3,6,2178},{0,13,832},{0,13,832},{0,8,881},{3,6,2178},{0,8,881},{11,0,2665},{0,29,85},{2,19,10},{0,18,149},{11,0,2665},{13,7,2665},{0,18,149},{0,14,2689},{13,7,2665},{0,14,2689},{0,0,256},{0,0,256},{0,0,256},{0,0,256},{0,5,1},
+{0,5,1},{0,5,1},{0,3,1},{0,2,72},{0,2,72},{1,31,3909},{1,29,465},{1,21,676},{1,19,538},{0,30,9669},{0,22,4878},{0,18,1190},{0,14,5138},{0,16,12390},{0,13,6789},{2,31,2966},{1,29,209},{2,20,149},{1,19,282},{6,10,9669},{0,22,4878},{0,18,1190},{0,14,5138},{12,3,9669},{0,14,5138},{1,26,320},{1,26,320},{1,26,320},{1,16,324},{0,19,2180},{0,15,680},{0,15,680},
+{0,9,740},{0,9,3149},{0,8,1441},{1,26,64},{1,26,64},{1,26,64},{1,16,68},{5,2,2178},{0,15,680},{0,15,680},{0,9,740},{8,1,2178},{0,9,740},{11,3,2665},{0,31,41},{3,20,8},{0,19,98},{11,3,2665},{15,6,2665},{0,19,98},{0,15,2689},{15,6,2665},{0,15,2689},{1,0,320},{1,0,320},{1,0,320},{1,0,320},{0,8,0},{0,8,0},{0,8,0},{0,5,1},{0,4,160},
+{0,4,160},{2,31,4514},{1,31,630},{1,22,1110},{1,20,694},{0,31,9789},{0,23,4646},{0,20,849},{0,15,4826},{0,18,12955},{0,14,6798},{3,31,3101},{2,30,232},{2,21,197},{2,20,305},{3,23,9669},{0,23,4646},{0,20,849},{0,15,4826},{11,6,9669},{0,15,4826},{1,29,545},{1,29,545},{1,29,545},{1,18,546},{0,22,2180},{0,17,505},{0,17,505},{0,11,610},{0,11,3467},{0,10,1513},{2,26,1},
+{2,26,1},{2,26,1},{2,17,0},{5,5,2178},{0,17,505},{0,17,505},{0,11,610},{5,7,2178},{0,11,610},{11,6,2665},{1,31,85},{3,21,17},{0,20,65},{11,6,2665},{15,8,2665},{0,20,65},{0,16,2689},{15,8,2665},{0,16,2689},{1,0,545},{1,0,545},{1,0,545},{1,0,545},{0,11,0},{0,11,0},{0,11,0},{0,7,4},{0,5,289},{0,5,289},{2,31,5330},{1,31,1110},{2,23,1490},
+{1,21,979},{1,31,9981},{0,26,4406},{0,21,579},{0,16,4610},{0,19,13489},{0,15,6846},{3,31,3341},{2,31,226},{3,22,146},{2,21,282},{9,4,9669},{0,26,4406},{0,21,579},{0,16,4610},{9,10,9669},{0,16,4610},{1,31,885},{1,31,885},{1,31,885},{1,20,885},{0,24,2178},{0,19,389},{0,19,389},{0,12,464},{0,12,3885},{0,11,1603},{3,25,64},{3,25,64},{3,25,64},{3,17,68},{7,1,2178},
+{0,19,389},{0,19,389},{0,12,464},{10,2,2178},{0,12,464},{13,2,2665},{2,31,162},{4,22,5},{0,22,37},{13,2,2665},{12,13,2665},{0,22,37},{0,17,2689},{12,13,2665},{0,17,2689},{1,0,881},{1,0,881},{1,0,881},{1,0,881},{0,13,1},{0,13,1},{0,13,1},{0,8,0},{0,6,445},{0,6,445},{3,31,6366},{2,31,1635},{2,24,1886},{1,22,1410},{1,31,10381},{0,28,4146},{0,22,377},
+{0,17,4373},{0,20,14006},{0,16,6915},{4,31,3434},{3,31,242},{3,23,197},{3,22,305},{10,3,9669},{0,28,4146},{0,22,377},{0,17,4373},{11,9,9669},{0,17,4373},{2,31,1346},{2,31,1346},{2,31,1346},{2,20,1345},{0,27,2180},{0,21,274},{0,21,274},{0,13,353},{0,14,4269},{0,11,1763},{3,27,4},{3,27,4},{3,27,4},{3,19,0},{5,10,2178},{0,21,274},{0,21,274},{0,13,353},{12,1,2178},
+{0,13,353},{14,1,2665},{3,31,242},{4,23,10},{0,23,10},{14,1,2665},{15,11,2665},{0,23,10},{0,18,2689},{15,11,2665},{0,18,2689},{1,0,1345},{1,0,1345},{1,0,1345},{1,0,1345},{0,16,0},{0,16,0},{0,16,0},{0,10,4},{0,7,637},{0,7,637},{3,31,7374},{2,31,2339},{2,25,2441},{2,23,1763},{2,31,11019},{0,29,3909},{0,23,243},{0,18,4154},{0,22,14614},{0,17,7029},{5,31,3654},
+{4,31,394},{4,24,149},{3,23,282},{9,9,9669},{0,29,3909},{0,23,243},{0,18,4154},{14,7,9669},{0,18,4154},{2,31,1714},{2,31,1714},{2,31,1714},{2,22,1669},{0,29,2180},{0,23,194},{0,23,194},{0,14,260},{0,15,4686},{0,13,1937},{3,30,64},{3,30,64},{3,30,64},{3,20,68},{8,1,2178},{0,23,194},{0,23,194},{0,14,260},{10,5,2178},{0,14,260},{15,0,2665},{4,31,313},{5,24,8},
+{0,24,4},{15,0,2665},{13,15,2665},{0,24,4},{0,19,2689},{13,15,2665},{0,19,2689},{2,0,1665},{2,0,1665},{2,0,1665},{2,0,1665},{0,19,1},{0,19,1},{0,19,1},{0,11,1},{0,8,832},{0,8,832},{3,31,8967},{3,31,3510},{3,26,3255},{2,24,2243},{2,31,11766},{0,31,3686},{0,25,138},{0,19,3938},{0,23,15369},{0,18,7206},{5,31,3933},{4,31,457},{4,25,197},{4,24,305},{12,2,9669},
+{0,31,3686},{0,25,138},{0,19,3938},{13,10,9669},{0,19,3938},{2,31,2434},{2,31,2434},{2,31,2434},{2,23,2182},{0,31,2210},{0,25,137},{0,25,137},{0,15,181},{0,16,5157},{0,14,2163},{4,30,1},{4,30,1},{4,30,1},{4,21,0},{8,4,2178},{0,25,137},{0,25,137},{0,15,181},{14,2,2178},{0,15,181},{15,3,2665},{5,31,421},{5,25,17},{1,25,5},{15,3,2665},{12,18,2665},{1,25,5},
+{0,20,2689},{12,18,2665},{0,20,2689},{2,0,2178},{2,0,2178},{2,0,2178},{2,0,2178},{0,22,1},{0,22,1},{0,22,1},{0,13,0},{0,10,1125},{0,10,1125},{4,31,10234},{3,31,4421},{3,27,3739},{2,26,2742},{2,31,12773},{0,31,3719},{0,26,87},{0,20,3771},{0,25,16061},{0,19,7283},{6,31,4050},{5,31,629},{5,26,146},{4,25,282},{11,8,9669},{0,31,3718},{0,26,86},{0,20,3770},{11,14,9669},
+{0,20,3770},{3,31,3125},{3,31,3125},{3,31,3125},{2,25,2706},{1,31,2411},{0,26,86},{0,26,86},{0,16,129},{0,18,5544},{0,15,2318},{5,29,64},{5,29,64},{5,29,64},{5,21,68},{3,25,2178},{0,26,85},{0,26,85},{0,16,128},{12,6,2178},{0,16,128},{15,6,2665},{5,31,565},{6,26,5},{1,26,2},{15,6,2665},{14,17,2665},{1,26,2},{0,21,2689},{14,17,2665},{0,21,2689},{2,0,2705},
+{2,0,2705},{2,0,2705},{2,0,2705},{0,24,1},{0,24,1},{0,24,1},{0,15,5},{0,11,1348},{0,11,1348},{4,31,10874},{4,31,5018},{3,28,3750},{3,26,2754},{3,31,13045},{1,31,4003},{0,27,183},{0,21,3686},{0,27,15601},{0,20,6570},{7,31,4366},{5,31,965},{5,27,197},{5,26,305},{12,7,9669},{1,31,3954},{0,27,102},{0,21,3605},{13,13,9669},{0,21,3605},{3,31,3173},{3,31,3173},{3,31,3173},
+{3,25,2690},{1,31,2427},{0,28,113},{0,28,113},{0,17,170},{0,20,5170},{0,16,1856},{5,31,4},{5,31,4},{5,31,4},{5,23,0},{9,6,2178},{0,28,32},{0,28,32},{0,17,89},{14,5,2178},{0,17,89},{15,8,2665},{6,31,706},{6,27,10},{2,27,10},{15,8,2665},{12,21,2665},{2,27,10},{0,22,2689},{12,21,2665},{0,22,2689},{3,0,2689},{3,0,2689},{3,0,2689},{3,0,2689},{1,23,53},
+{1,23,53},{1,23,53},{1,15,49},{0,13,1217},{0,13,1217},{5,31,11278},{4,31,5402},{4,29,3753},{3,28,2745},{4,31,13566},{1,31,4403},{1,28,77},{0,22,3747},{0,28,15046},{0,21,5958},{7,31,4590},{6,31,1171},{6,28,149},{5,27,282},{14,3,9669},{2,31,4265},{1,28,76},{0,22,3458},{11,17,9669},{0,22,3458},{4,31,3377},{4,31,3377},{4,31,3377},{3,27,2706},{2,31,2532},{1,28,73},{1,28,73},
+{1,18,129},{0,21,4837},{0,17,1490},{6,31,82},{6,31,82},{6,31,82},{5,24,68},{10,5,2178},{0,30,8},{0,30,8},{0,19,49},{12,9,2178},{0,19,49},{15,11,2665},{7,31,850},{7,28,8},{2,28,4},{15,11,2665},{15,19,2665},{2,28,4},{0,23,2689},{15,19,2665},{0,23,2689},{3,0,2705},{3,0,2705},{3,0,2705},{3,0,2705},{1,26,1},{1,26,1},{1,26,1},{1,16,5},{0,14,1037},
+{0,14,1037},{6,31,11954},{5,31,6090},{4,30,3794},{4,28,2754},{5,31,14170},{2,31,4863},{2,29,187},{1,24,3689},{0,30,14558},{0,23,5274},{8,31,5030},{7,31,1556},{6,29,197},{6,28,305},{14,6,9669},{3,31,4594},{1,29,101},{0,24,3265},{15,14,9669},{0,24,3265},{4,31,3530},{4,31,3530},{4,31,3530},{4,27,2693},{2,31,2739},{1,30,134},{1,30,134},{1,19,197},{0,23,4506},{0,19,1109},{6,31,64},
+{6,31,64},{6,31,64},{6,25,0},{10,8,2178},{1,30,34},{1,30,34},{0,20,16},{11,12,2178},{0,20,16},{15,14,2665},{8,31,1053},{7,29,17},{3,29,5},{15,14,2665},{14,22,2665},{3,29,5},{0,24,2689},{14,22,2665},{0,24,2689},{4,0,2689},{4,0,2689},{4,0,2689},{4,0,2689},{2,26,50},{2,26,50},{2,26,50},{2,17,49},{0,16,818},{0,16,818},{6,31,12466},{5,31,6794},{5,31,3739},
+{4,30,2742},{5,31,14554},{3,31,5363},{2,30,87},{1,24,3737},{0,31,14190},{0,24,4785},{8,31,5158},{7,31,2036},{7,30,146},{6,29,282},{13,12,9669},{4,31,4806},{2,30,86},{0,25,3130},{13,18,9669},{0,25,3130},{5,31,3658},{5,31,3658},{5,31,3658},{4,29,2706},{3,31,2795},{2,30,86},{2,30,86},{2,20,129},{0,25,4315},{0,20,809},{7,31,100},{7,31,100},{7,31,100},{7,25,68},{9,14,2178},
+{1,31,68},{1,31,68},{0,21,1},{14,10,2178},{0,21,1},{13,23,2665},{9,31,1241},{8,30,50},{3,30,2},{13,23,2665},{11,27,2665},{3,30,2},{0,25,2689},{11,27,2665},{0,25,2689},{4,0,2705},{4,0,2705},{4,0,2705},{4,0,2705},{2,28,1},{2,28,1},{2,28,1},{2,19,5},{0,18,666},{0,18,666},{7,31,13094},{6,31,7445},{5,31,3915},{5,30,2754},{6,31,14998},{4,31,5926},{2,31,183},
+{2,25,3686},{0,31,14254},{0,25,4323},{9,31,5546},{8,31,2478},{7,31,197},{7,30,305},{14,11,9669},{5,31,5138},{2,31,102},{0,26,3013},{15,17,9669},{0,26,3013},{5,31,3914},{5,31,3914},{5,31,3914},{5,29,2690},{4,31,3042},{2,31,182},{2,31,182},{2,21,170},{0,27,4059},{0,21,597},{7,31,196},{7,31,196},{7,31,196},{7,27,0},{11,10,2178},{2,31,101},{2,31,101},{0,22,4},{5,23,2178},
+{0,22,4},{15,19,2665},{10,31,1384},{8,31,5},{4,31,10},{15,19,2665},{14,25,2665},{4,31,10},{0,26,2689},{14,25,2665},{0,26,2689},{5,0,2689},{5,0,2689},{5,0,2689},{5,0,2689},{3,27,53},{3,27,53},{3,27,53},{3,19,49},{0,20,505},{0,20,505},{7,31,12517},{6,31,7482},{6,31,4001},{5,31,2706},{6,31,14185},{4,31,5491},{3,31,154},{2,26,3124},{0,31,13437},{0,26,3306},{9,31,4949},
+{8,31,2261},{8,31,325},{7,30,192},{14,13,8712},{6,31,4686},{3,31,153},{0,27,2403},{11,23,8712},{0,27,2403},{6,31,4001},{6,31,4001},{6,31,4001},{5,31,2706},{4,31,3234},{3,31,154},{3,31,154},{3,22,129},{0,28,3762},{0,23,425},{8,31,325},{8,31,325},{8,31,325},{7,28,68},{12,9,2178},{3,31,153},{3,31,153},{1,23,1},{14,13,2178},{1,23,1},{13,27,2178},{10,31,1157},{9,31,16},
+{5,31,1},{13,27,2178},{13,27,2178},{5,31,1},{0,27,2178},{13,27,2178},{0,27,2178},{5,0,2705},{5,0,2705},{5,0,2705},{5,0,2705},{3,30,1},{3,30,1},{3,30,1},{3,20,5},{0,22,389},{0,22,389},{8,31,12034},{7,31,7195},{6,31,4370},{6,31,2693},{7,31,13066},{5,31,5014},{4,31,261},{3,27,2390},{1,31,12394},{0,27,2277},{10,31,4410},{9,31,2045},{8,31,289},{8,30,192},{14,15,7578},
+{7,31,4050},{4,31,212},{0,27,1701},{12,23,7578},{0,27,1701},{6,31,4370},{6,31,4370},{6,31,4370},{6,31,2693},{5,31,3429},{4,31,261},{4,31,261},{3,23,197},{0,30,3509},{0,24,306},{8,31,289},{8,31,289},{8,31,289},{8,28,68},{15,2,2178},{4,31,212},{4,31,212},{1,24,9},{13,16,2178},{1,24,9},{15,22,1625},{11,31,850},{9,31,25},{6,31,4},{15,22,1625},{13,28,1625},{6,31,4},
+{0,27,1665},{13,28,1625},{0,27,1665},{6,0,2689},{6,0,2689},{6,0,2689},{6,0,2689},{4,30,50},{4,30,50},{4,30,50},{4,21,49},{0,24,306},{0,24,306},{8,31,11042},{7,31,7259},{7,31,4450},{6,31,2805},{7,31,12298},{5,31,4742},{4,31,501},{4,27,1875},{2,31,11643},{0,28,1578},{10,31,3802},{9,31,1869},{9,31,425},{8,31,25},{13,20,6661},{7,31,3554},{5,31,292},{0,28,1217},{13,23,6662},
+{0,28,1217},{7,31,4450},{7,31,4450},{7,31,4450},{6,31,2805},{6,31,3714},{4,31,501},{4,31,501},{4,24,129},{0,31,3354},{0,25,244},{9,31,425},{9,31,425},{9,31,425},{8,30,0},{11,18,2178},{5,31,292},{5,31,292},{2,25,1},{10,21,2178},{2,25,1},{15,23,1201},{11,31,674},{10,31,9},{7,31,16},{15,23,1201},{12,30,1201},{7,31,16},{0,28,1201},{12,30,1201},{0,28,1201},{6,0,2705},
+{6,0,2705},{6,0,2705},{6,0,2705},{4,31,17},{4,31,17},{4,31,17},{4,23,5},{0,26,218},{0,26,218},{8,31,10434},{8,31,7186},{7,31,4898},{7,31,2833},{8,31,11595},{6,31,4462},{5,31,629},{4,28,1387},{3,31,10895},{0,28,1002},{11,31,3446},{10,31,1707},{9,31,505},{8,31,73},{13,22,5829},{8,31,3170},{6,31,405},{1,28,869},{12,25,5829},{1,28,869},{7,31,4898},{7,31,4898},{7,31,4898},
+{7,31,2833},{6,31,3906},{5,31,629},{5,31,629},{4,25,170},{0,31,3546},{0,27,228},{9,31,505},{9,31,505},{9,31,505},{9,30,68},{13,14,2178},{6,31,405},{6,31,405},{2,26,4},{8,25,2178},{2,26,4},{13,31,841},{12,31,461},{11,31,1},{8,31,9},{13,31,841},{15,27,841},{8,31,9},{0,28,865},{15,27,841},{0,28,865},{7,0,2689},{7,0,2689},{7,0,2689},{7,0,2689},{5,31,53},
+{5,31,53},{5,31,53},{5,23,49},{0,28,137},{0,28,137},{9,31,10014},{8,31,6962},{8,31,5026},{7,31,3105},{8,31,10683},{7,31,4354},{6,31,933},{5,28,1019},{4,31,10078},{0,29,630},{11,31,2934},{10,31,1611},{10,31,650},{9,31,25},{15,17,5082},{8,31,2786},{7,31,521},{1,29,546},{12,26,5082},{1,29,546},{8,31,5026},{8,31,5026},{8,31,5026},{7,31,3105},{7,31,4170},{6,31,933},{6,31,933},
+{5,26,129},{1,31,3814},{0,28,234},{10,31,650},{10,31,650},{10,31,650},{9,31,25},{14,13,2178},{7,31,521},{7,31,521},{3,27,1},{11,23,2178},{3,27,1},{15,26,545},{13,31,313},{12,31,4},{10,31,4},{15,26,545},{14,29,545},{10,31,4},{0,29,545},{14,29,545},{0,29,545},{7,0,2705},{7,0,2705},{7,0,2705},{7,0,2705},{5,31,101},{5,31,101},{5,31,101},{5,24,5},{0,29,85},
+{0,29,85},{9,31,9465},{9,31,7065},{8,31,5233},{8,31,3329},{8,31,10116},{7,31,4183},{6,31,1338},{5,29,645},{5,31,9447},{0,30,441},{11,31,2664},{11,31,1525},{10,31,848},{10,31,113},{15,19,4344},{9,31,2424},{8,31,724},{2,30,321},{13,26,4344},{2,30,321},{8,31,5233},{8,31,5233},{8,31,5233},{8,31,3329},{7,31,4629},{6,31,1338},{6,31,1338},{5,27,197},{2,31,4212},{1,29,213},{10,31,848},
+{10,31,848},{10,31,848},{10,31,113},{14,16,2178},{8,31,724},{8,31,724},{3,28,9},{15,20,2178},{3,28,9},{15,27,290},{13,31,178},{13,31,9},{11,31,1},{15,27,290},{14,30,290},{11,31,1},{0,29,320},{14,30,290},{0,29,320},{8,0,2929},{8,0,2929},{8,0,2929},{8,0,2929},{6,31,113},{6,31,113},{6,31,113},{6,25,49},{0,31,45},{0,31,45},{10,31,9329},{9,31,6985},{9,31,5541},
+{8,31,3473},{9,31,9496},{8,31,4420},{7,31,1630},{6,29,426},{5,31,9031},{1,30,301},{12,31,2275},{11,31,1557},{11,31,932},{10,31,225},{13,27,3779},{10,31,2086},{8,31,884},{3,30,129},{13,27,3779},{3,30,129},{9,31,5541},{9,31,5541},{9,31,5541},{8,31,3473},{8,31,4836},{7,31,1630},{7,31,1630},{6,28,129},{4,31,4442},{1,30,237},{11,31,932},{11,31,932},{11,31,932},{10,31,225},{13,22,2178},
+{8,31,884},{8,31,884},{4,29,1},{12,25,2178},{4,29,1},{15,28,130},{14,31,72},{13,31,25},{12,31,9},{15,28,130},{15,29,136},{12,31,9},{0,30,128},{15,29,136},{0,30,128},{8,0,2689},{8,0,2689},{8,0,2689},{8,0,2689},{6,31,257},{6,31,257},{6,31,257},{6,27,5},{1,31,89},{1,31,89},{10,31,8929},{10,31,7186},{9,31,5845},{9,31,3829},{9,31,9208},{8,31,4260},{7,31,2270},
+{6,30,245},{6,31,8708},{2,31,228},{12,31,2115},{12,31,1515},{12,31,1154},{11,31,353},{13,29,3299},{11,31,1938},{10,31,1013},{4,30,68},{14,27,3299},{4,30,68},{9,31,5845},{9,31,5845},{9,31,5845},{9,31,3829},{8,31,5124},{7,31,2270},{7,31,2270},{6,29,170},{4,31,4762},{2,31,228},{12,31,1154},{12,31,1154},{12,31,1154},{11,31,353},{15,18,2178},{10,31,1013},{10,31,1013},{4,30,4},{10,29,2178},
+{4,30,4},{15,30,34},{14,31,40},{14,31,4},{14,31,4},{15,30,34},{15,30,34},{14,31,4},{0,30,64},{15,30,34},{0,30,64},{8,0,2705},{8,0,2705},{8,0,2705},{8,0,2705},{7,31,245},{7,31,245},{7,31,245},{7,27,49},{2,31,164},{2,31,164},{11,31,8857},{10,31,7170},{10,31,6209},{9,31,4133},{10,31,8853},{8,31,4484},{8,31,2548},{7,31,170},{7,31,8388},{3,31,244},{13,31,1971},
+{12,31,1611},{12,31,1250},{11,31,625},{15,24,2904},{11,31,1826},{10,31,1157},{5,31,1},{15,27,2907},{5,31,1},{10,31,6209},{10,31,6209},{10,31,6209},{9,31,4133},{9,31,5460},{8,31,2548},{8,31,2548},{7,30,129},{5,31,5126},{3,31,244},{12,31,1250},{12,31,1250},{12,31,1250},{11,31,625},{13,27,2178},{10,31,1157},{10,31,1157},{5,31,1},{13,27,2178},{5,31,1},{15,31,0},{15,31,0},{15,31,0},
+{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{9,0,2689},{9,0,2689},{9,0,2689},{9,0,2689},{7,31,485},{7,31,485},{7,31,485},{7,28,5},{3,31,244},{3,31,244},{11,31,7705},{11,31,6566},{10,31,5633},{10,31,3890},{10,31,7737},{9,31,3874},{8,31,2386},{7,31,116},{7,31,7398},{4,31,317},{13,31,1458},{13,31,1186},{13,31,1017},{12,31,425},{15,25,2166},
+{12,31,1398},{11,31,850},{6,31,4},{12,31,2166},{6,31,4},{10,31,5633},{10,31,5633},{10,31,5633},{10,31,3890},{9,31,4830},{8,31,2386},{8,31,2386},{7,31,116},{6,31,4509},{4,31,317},{13,31,1017},{13,31,1017},{13,31,1017},{12,31,425},{15,22,1625},{11,31,850},{11,31,850},{6,31,4},{13,28,1625},{6,31,4},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},
+{0,31,0},{15,31,0},{0,31,0},{9,0,2725},{9,0,2725},{9,0,2725},{9,0,2725},{8,31,450},{8,31,450},{8,31,450},{7,30,101},{4,31,317},{4,31,317},{11,31,6953},{11,31,5814},{11,31,5189},{10,31,3650},{11,31,6713},{10,31,3531},{9,31,2142},{8,31,74},{8,31,6397},{5,31,425},{13,31,1138},{13,31,866},{13,31,697},{12,31,361},{15,26,1601},{12,31,1046},{11,31,674},{7,31,16},{14,29,1601},
+{7,31,16},{11,31,5189},{11,31,5189},{11,31,5189},{10,31,3650},{10,31,4313},{9,31,2142},{9,31,2142},{8,31,74},{7,31,3981},{5,31,425},{13,31,697},{13,31,697},{13,31,697},{12,31,361},{15,23,1201},{11,31,674},{11,31,674},{7,31,16},{12,30,1201},{7,31,16},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{10,0,2689},
+{10,0,2689},{10,0,2689},{10,0,2689},{8,31,578},{8,31,578},{8,31,578},{8,30,49},{5,31,425},{5,31,425},{12,31,6211},{11,31,5318},{11,31,4693},{11,31,3554},{11,31,5833},{10,31,3067},{10,31,2106},{8,31,10},{9,31,5601},{6,31,580},{14,31,825},{13,31,674},{13,31,505},{13,31,233},{15,27,1122},{13,31,738},{12,31,461},{8,31,9},{13,31,1122},{8,31,9},{11,31,4693},{11,31,4693},{11,31,4693},
+{11,31,3554},{10,31,3849},{10,31,2106},{10,31,2106},{8,31,10},{7,31,3629},{6,31,580},{13,31,505},{13,31,505},{13,31,505},{13,31,233},{13,31,841},{12,31,461},{12,31,461},{8,31,9},{15,27,841},{8,31,9},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{10,0,2705},{10,0,2705},{10,0,2705},{10,0,2705},{9,31,666},
+{9,31,666},{9,31,666},{8,31,10},{6,31,580},{6,31,580},{12,31,5427},{12,31,4827},{11,31,4453},{11,31,3314},{12,31,5175},{10,31,2859},{10,31,1898},{9,31,74},{10,31,4842},{7,31,724},{14,31,489},{14,31,425},{14,31,389},{13,31,169},{14,31,729},{13,31,482},{13,31,313},{10,31,4},{14,30,726},{10,31,4},{11,31,4453},{11,31,4453},{11,31,4453},{11,31,3314},{11,31,3445},{10,31,1898},{10,31,1898},
+{9,31,74},{8,31,3213},{7,31,724},{14,31,389},{14,31,389},{14,31,389},{13,31,169},{15,26,545},{13,31,313},{13,31,313},{10,31,4},{14,29,545},{10,31,4},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{11,0,2689},{11,0,2689},{11,0,2689},{11,0,2689},{9,31,890},{9,31,890},{9,31,890},{9,31,74},{7,31,724},
+{7,31,724},{2,31,33740},{0,31,5184},{0,22,420},{0,21,4221},{1,31,46089},{0,29,24105},{0,21,8317},{0,18,24790},{0,21,63990},{0,16,38959},{1,31,9704},{0,30,2866},{0,21,389},{0,19,3229},{7,2,18065},{0,20,13257},{0,17,6153},{0,12,13481},{12,0,18065},{0,12,13481},{0,15,1},{0,15,1},{0,15,1},{0,9,1},{0,8,1105},{0,7,585},{0,7,585},{0,4,596},{0,4,1273},{0,4,740},{0,15,1},
+{0,15,1},{0,15,1},{0,9,1},{2,1,1105},{0,7,585},{0,7,585},{0,4,596},{4,0,1105},{0,4,596},{9,6,9248},{0,30,2866},{0,21,389},{0,19,3229},{9,6,9248},{14,5,9248},{0,19,3229},{0,14,9248},{14,5,9248},{0,14,9248},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{2,31,38380},{0,31,6720},{0,23,245},
+{0,22,3864},{2,31,50747},{0,31,24961},{0,22,8353},{0,19,25735},{0,22,65535},{0,17,41319},{1,31,10152},{0,31,2624},{0,23,229},{0,20,2980},{5,10,19334},{0,20,13769},{0,18,6243},{0,13,14116},{12,1,19334},{0,13,14116},{0,18,0},{0,18,0},{0,18,0},{0,11,1},{0,9,1513},{0,8,772},{0,8,772},{0,5,821},{0,5,1750},{0,4,1028},{0,18,0},{0,18,0},{0,18,0},{0,11,1},{1,6,1513},
+{0,8,772},{0,8,772},{0,5,821},{3,2,1513},{0,5,821},{10,5,9248},{0,31,2624},{0,23,229},{0,20,2980},{10,5,9248},{12,9,9248},{0,20,2980},{0,15,9248},{12,9,9248},{0,15,9248},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{2,31,43788},{0,31,9024},{0,24,126},{0,23,3525},{2,31,56155},{0,31,26241},{0,23,8425},
+{0,20,26793},{0,23,65535},{0,18,43819},{2,31,10787},{0,31,2624},{0,24,122},{0,21,2701},{8,0,20689},{0,22,14385},{0,19,6369},{0,13,14756},{12,2,20689},{0,13,14756},{0,21,1},{0,21,1},{0,21,1},{0,12,4},{0,10,1989},{0,9,1018},{0,9,1018},{0,6,1096},{0,5,2294},{0,5,1334},{0,21,1},{0,21,1},{0,21,1},{0,12,4},{1,7,1985},{0,9,1018},{0,9,1018},{0,6,1096},{1,5,1985},
+{0,6,1096},{12,1,9248},{0,31,2624},{0,24,122},{0,21,2701},{12,1,9248},{15,7,9248},{0,21,2701},{0,16,9250},{15,7,9248},{0,16,9250},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{2,31,49964},{1,31,11512},{0,25,41},{0,24,3109},{2,31,62331},{0,31,28289},{0,24,8585},{0,21,27848},{0,23,65535},{0,19,46459},{2,31,11395},
+{0,31,2880},{0,25,37},{0,22,2440},{8,2,22129},{0,23,15030},{0,20,6509},{0,14,15441},{13,2,22129},{0,14,15441},{0,23,1},{0,23,1},{0,23,1},{0,14,0},{0,12,2525},{0,10,1300},{0,10,1300},{0,6,1384},{0,6,2905},{0,6,1708},{0,23,1},{0,23,1},{0,23,1},{0,14,0},{3,2,2521},{0,10,1300},{0,10,1300},{0,6,1384},{5,1,2521},{0,6,1384},{11,7,9248},{0,31,2880},{0,25,37},
+{0,22,2440},{11,7,9248},{13,11,9248},{0,22,2440},{0,17,9250},{13,11,9248},{0,17,9250},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{3,31,57022},{1,31,15166},{0,26,20},{0,25,2804},{2,31,65535},{0,31,31511},{0,25,8733},{0,22,29095},{0,26,65535},{0,20,49444},{2,31,12385},{0,31,3474},{0,26,4},{0,23,2173},{8,4,23851},
+{0,23,15948},{0,21,6729},{0,15,16274},{14,2,23851},{0,15,16274},{0,26,0},{0,26,0},{0,26,0},{0,16,4},{0,13,3200},{0,11,1665},{0,11,1665},{0,7,1754},{0,7,3691},{0,6,2185},{0,26,0},{0,26,0},{0,26,0},{0,16,4},{1,10,3200},{0,11,1665},{0,11,1665},{0,7,1754},{5,2,3200},{0,7,1754},{11,10,9248},{1,31,3226},{0,26,4},{0,23,2173},{11,10,9248},{5,23,9248},{0,23,2173},
+{0,18,9248},{5,23,9248},{0,18,9248},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{3,31,63870},{1,31,19230},{0,27,45},{0,27,2520},{3,31,65535},{0,31,35191},{0,26,8925},{0,23,30250},{0,28,65535},{0,21,52374},{3,31,13449},{1,31,4026},{0,27,29},{0,24,1901},{3,24,25472},{0,26,16706},{0,22,6963},{0,16,17124},{14,3,25472},
+{0,16,17124},{0,29,1},{0,29,1},{0,29,1},{0,17,1},{0,14,3874},{0,13,2084},{0,13,2084},{0,8,2165},{0,8,4466},{0,7,2627},{0,29,1},{0,29,1},{0,29,1},{0,17,1},{4,1,3872},{0,13,2084},{0,13,2084},{0,8,2165},{3,5,3872},{0,8,2165},{12,9,9248},{2,31,3593},{0,27,29},{0,24,1901},{12,9,9248},{14,13,9248},{0,24,1901},{0,19,9248},{14,13,9248},{0,19,9248},{0,0,0},
+{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{3,31,65535},{1,31,24002},{0,28,109},{0,27,2268},{3,31,65535},{1,31,39095},{0,27,8825},{0,24,30825},{0,28,65535},{0,22,54996},{3,31,14345},{1,31,4766},{0,29,102},{0,26,1697},{3,26,26744},{0,28,17104},{0,23,6957},{0,17,17625},{15,3,26744},{0,17,17625},{0,31,5},{0,31,5},{0,31,5},
+{0,19,5},{0,16,4418},{0,14,2306},{0,14,2306},{0,9,2420},{0,8,5122},{0,8,2997},{0,31,5},{0,31,5},{0,31,5},{0,19,5},{3,6,4418},{0,14,2306},{0,14,2306},{0,9,2420},{3,6,4418},{0,9,2420},{14,5,9248},{4,31,3904},{1,28,1},{0,26,1693},{14,5,9248},{12,17,9248},{0,26,1693},{0,20,9250},{12,17,9248},{0,20,9250},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,1,1},
+{0,1,1},{0,1,1},{0,0,4},{0,0,4},{0,0,4},{3,31,65535},{1,31,29442},{0,29,330},{0,28,2105},{3,31,65535},{1,31,42151},{0,28,7781},{0,25,30108},{0,29,65535},{0,22,56388},{4,31,14976},{2,31,5434},{1,29,62},{0,27,1580},{11,0,26744},{0,29,16547},{0,24,6221},{0,18,17124},{13,7,26744},{0,18,17124},{0,31,181},{0,31,181},{0,31,181},{0,20,101},{0,19,4420},{0,16,2005},{0,16,2005},
+{0,10,2165},{0,9,5389},{0,9,2925},{1,31,37},{1,31,37},{1,31,37},{1,19,37},{5,2,4418},{0,16,2005},{0,16,2005},{0,10,2165},{8,1,4418},{0,10,2165},{13,11,9248},{4,31,4160},{1,29,26},{0,27,1480},{13,11,9248},{15,15,9248},{0,27,1480},{0,21,9250},{15,15,9248},{0,21,9250},{0,0,100},{0,0,100},{0,0,100},{0,0,100},{0,3,1},{0,3,1},{0,3,1},{0,2,0},{0,1,34},
+{0,1,34},{4,31,65535},{2,31,36070},{0,30,822},{0,30,2062},{3,31,65535},{1,31,46660},{0,29,6696},{0,26,29322},{0,31,65535},{0,23,58077},{4,31,15507},{3,31,6253},{1,31,109},{0,28,1646},{11,3,26744},{0,31,15992},{0,26,5346},{0,19,16582},{11,11,26744},{0,19,16582},{1,31,329},{1,31,329},{1,31,329},{1,21,266},{0,22,4420},{0,18,1737},{0,18,1737},{0,11,1898},{0,11,5707},{0,10,2885},{1,31,73},
+{1,31,73},{1,31,73},{1,21,10},{5,5,4418},{0,18,1737},{0,18,1737},{0,11,1898},{5,7,4418},{0,11,1898},{13,14,9248},{5,31,4570},{2,30,4},{0,28,1285},{13,14,9248},{8,25,9248},{0,28,1285},{0,22,9248},{8,25,9248},{0,22,9248},{1,0,265},{1,0,265},{1,0,265},{1,0,265},{0,6,1},{0,6,1},{0,6,1},{0,4,1},{0,3,97},{0,3,97},{4,31,65535},{2,31,40786},{0,31,1405},
+{0,30,2138},{4,31,65535},{1,31,49800},{0,30,5634},{0,27,27967},{0,31,65535},{0,24,58770},{5,31,15531},{3,31,6593},{2,31,61},{1,29,1533},{12,2,26259},{0,31,15284},{0,27,4514},{0,20,15812},{13,10,26259},{0,20,15812},{1,31,633},{1,31,633},{1,31,633},{1,22,381},{0,24,4418},{0,20,1480},{0,20,1480},{0,12,1640},{0,12,6125},{0,11,2891},{2,31,61},{2,31,61},{2,31,61},{2,21,37},{7,1,4418},
+{0,20,1480},{0,20,1480},{0,12,1640},{10,2,4418},{0,12,1640},{13,16,8978},{6,31,4777},{2,31,25},{0,29,1040},{13,16,8978},{15,18,8978},{0,29,1040},{0,23,8980},{15,18,8978},{0,23,8980},{1,0,377},{1,0,377},{1,0,377},{1,0,377},{0,9,0},{0,9,0},{0,9,0},{0,5,4},{0,4,193},{0,4,193},{4,31,65535},{2,31,40898},{1,31,2217},{0,31,2125},{4,31,65535},{1,31,47976},{0,30,4194},
+{0,27,24703},{0,31,65535},{0,24,56130},{5,31,14379},{4,31,6051},{2,31,173},{2,29,1284},{9,14,24371},{0,31,13716},{0,28,3402},{0,21,13989},{14,10,24371},{0,21,13989},{1,31,1193},{1,31,1193},{1,31,1193},{1,24,617},{0,27,4420},{0,22,1280},{0,22,1280},{0,13,1445},{0,14,6509},{0,12,2945},{2,31,173},{2,31,173},{2,31,173},{2,23,5},{5,10,4418},{0,22,1280},{0,22,1280},{0,13,1445},{12,1,4418},
+{0,13,1445},{14,14,7938},{7,31,4253},{3,31,9},{0,29,656},{14,14,7938},{14,20,7938},{0,29,656},{0,23,7956},{14,20,7938},{0,23,7956},{1,0,617},{1,0,617},{1,0,617},{1,0,617},{0,11,4},{0,11,4},{0,11,4},{0,7,0},{0,5,325},{0,5,325},{4,31,65535},{2,31,41266},{1,31,3033},{0,31,2333},{4,31,65535},{1,31,46408},{0,30,3010},{0,27,21695},{0,31,65535},{0,25,53636},{6,31,13140},
+{4,31,5571},{3,31,157},{2,29,932},{11,9,22568},{0,31,12404},{0,28,2474},{0,21,12245},{14,11,22568},{0,21,12245},{2,31,1630},{2,31,1630},{2,31,1630},{1,26,989},{0,29,4420},{0,23,1090},{0,23,1090},{0,14,1268},{0,15,6926},{0,13,3029},{3,31,157},{3,31,157},{3,31,157},{3,23,37},{8,1,4418},{0,23,1090},{0,23,1090},{0,14,1268},{10,5,4418},{0,14,1268},{15,12,6962},{7,31,3709},{4,31,1},
+{0,30,353},{15,12,6962},{13,22,6962},{0,30,353},{0,24,6970},{13,22,6962},{0,24,6970},{1,0,985},{1,0,985},{1,0,985},{1,0,985},{0,14,0},{0,14,0},{0,14,0},{0,8,4},{0,6,493},{0,6,493},{4,31,65535},{2,31,41986},{1,31,4257},{0,31,2873},{4,31,65535},{1,31,44950},{0,30,1984},{0,28,18569},{0,31,65535},{0,25,51026},{6,31,11934},{5,31,5125},{4,31,296},{3,29,706},{13,4,20642},
+{0,31,11234},{0,29,1634},{0,22,10422},{15,11,20642},{0,22,10422},{2,31,2350},{2,31,2350},{2,31,2350},{2,26,1450},{0,31,4450},{0,25,949},{0,25,949},{0,16,1096},{0,16,7397},{0,14,3171},{4,31,296},{4,31,296},{4,31,296},{3,25,10},{8,4,4418},{0,25,949},{0,25,949},{0,16,1096},{14,2,4418},{0,16,1096},{14,17,5941},{7,31,3250},{5,31,0},{0,30,128},{14,17,5941},{13,23,5941},{0,30,128},
+{0,24,5953},{13,23,5941},{0,24,5953},{2,0,1450},{2,0,1450},{2,0,1450},{2,0,1450},{0,17,0},{0,17,0},{0,17,0},{0,10,1},{0,8,697},{0,8,697},{4,31,65535},{2,31,42898},{1,31,5617},{1,31,3337},{4,31,65535},{1,31,43926},{0,31,1250},{0,28,15865},{0,31,65535},{0,25,48978},{7,31,10938},{5,31,4773},{4,31,360},{3,30,509},{12,9,19021},{1,31,10246},{0,30,1088},{0,23,8945},{14,13,19021},
+{0,23,8945},{2,31,3262},{2,31,3262},{2,31,3262},{2,28,1822},{1,31,4682},{0,28,776},{0,28,776},{0,17,925},{0,18,7893},{0,15,3333},{4,31,360},{4,31,360},{4,31,360},{4,25,37},{3,25,4418},{0,28,776},{0,28,776},{0,17,925},{12,6,4418},{0,17,925},{15,15,5101},{8,31,2777},{6,31,9},{0,31,25},{15,15,5101},{11,26,5101},{0,31,25},{0,25,5105},{11,26,5101},{0,25,5105},{2,0,1818},
+{2,0,1818},{2,0,1818},{2,0,1818},{0,20,1},{0,20,1},{0,20,1},{0,12,1},{0,9,925},{0,9,925},{4,31,65535},{2,31,44066},{1,31,7233},{1,31,3993},{4,31,65535},{2,31,43110},{0,31,738},{0,28,13417},{0,31,65535},{0,25,47186},{7,31,9978},{6,31,4467},{5,31,452},{4,30,357},{15,1,17485},{2,31,9441},{0,30,704},{0,24,7570},{15,13,17485},{0,24,7570},{3,31,4058},{3,31,4058},{3,31,4058},
+{2,29,2315},{1,31,4874},{0,29,610},{0,29,610},{0,18,772},{0,20,8427},{0,16,3497},{5,31,452},{5,31,452},{5,31,452},{4,27,5},{9,6,4418},{0,29,610},{0,29,610},{0,18,772},{14,5,4418},{0,18,772},{15,16,4325},{8,31,2377},{6,31,25},{0,31,9},{15,16,4325},{15,22,4325},{0,31,9},{0,25,4337},{15,22,4325},{0,25,4337},{2,0,2314},{2,0,2314},{2,0,2314},{2,0,2314},{0,22,1},
+{0,22,1},{0,22,1},{0,13,4},{0,10,1189},{0,10,1189},{5,31,65535},{3,31,45090},{1,31,9105},{1,31,4905},{4,31,65535},{2,31,42326},{0,31,482},{0,28,11225},{0,31,65535},{0,26,45590},{7,31,9274},{6,31,4179},{5,31,612},{4,30,245},{14,6,16034},{3,31,8633},{0,31,482},{0,24,6242},{15,14,16034},{0,24,6242},{3,31,5066},{3,31,5066},{3,31,5066},{2,31,2939},{1,31,5322},{0,31,482},{0,31,482},
+{0,19,637},{0,20,8939},{0,17,3725},{5,31,612},{5,31,612},{5,31,612},{5,27,37},{10,5,4418},{0,31,482},{0,31,482},{0,19,637},{12,9,4418},{0,19,637},{13,24,3613},{9,31,1973},{7,31,9},{2,31,1},{13,24,3613},{15,23,3613},{2,31,1},{0,26,3617},{15,23,3613},{0,26,3617},{2,0,2938},{2,0,2938},{2,0,2938},{2,0,2938},{0,25,1},{0,25,1},{0,25,1},{0,15,0},{0,11,1489},
+{0,11,1489},{5,31,65535},{3,31,46530},{1,31,11517},{1,31,6237},{4,31,65535},{2,31,41750},{0,31,500},{0,29,8976},{0,31,65535},{0,26,43934},{8,31,8225},{7,31,3853},{6,31,680},{5,30,109},{11,18,14504},{4,31,7667},{0,31,500},{0,25,4979},{10,21,14504},{0,25,4979},{3,31,6506},{3,31,6506},{3,31,6506},{3,31,3701},{2,31,6019},{0,31,500},{0,31,500},{0,20,520},{0,22,9629},{0,18,4035},{6,31,680},
+{6,31,680},{6,31,680},{5,29,10},{10,8,4418},{0,31,500},{0,31,500},{0,20,520},{11,12,4418},{0,20,520},{15,19,2888},{10,31,1537},{8,31,16},{3,31,4},{15,19,2888},{13,26,2888},{3,31,4},{0,26,2906},{13,26,2888},{0,26,2906},{3,0,3697},{3,0,3697},{3,0,3697},{3,0,3697},{0,28,1},{0,28,1},{0,28,1},{0,17,4},{0,11,1930},{0,11,1930},{5,31,65535},{3,31,48082},{1,31,13933},
+{1,31,7693},{4,31,65535},{2,31,41510},{0,31,788},{0,29,7120},{0,31,65535},{0,26,42734},{8,31,7409},{7,31,3693},{7,31,884},{6,30,116},{13,13,13235},{4,31,6899},{1,31,628},{0,25,3987},{15,16,13235},{0,25,3987},{4,31,7686},{4,31,7686},{4,31,7686},{3,31,4437},{2,31,6659},{0,31,788},{0,31,788},{0,21,421},{0,23,10286},{0,20,4305},{7,31,884},{7,31,884},{7,31,884},{6,29,37},{9,14,4418},
+{1,31,628},{1,31,628},{0,21,421},{14,10,4418},{0,21,421},{15,20,2312},{10,31,1217},{8,31,16},{4,31,9},{15,20,2312},{12,28,2312},{4,31,9},{0,27,2314},{12,28,2312},{0,27,2314},{3,0,4337},{3,0,4337},{3,0,4337},{3,0,4337},{0,30,1},{0,30,1},{0,30,1},{0,18,1},{0,13,2329},{0,13,2329},{5,31,65535},{3,31,49890},{2,31,16310},{1,31,9405},{4,31,65535},{2,31,41526},{0,31,1332},
+{0,29,5520},{0,31,65535},{0,26,41790},{8,31,6849},{8,31,3601},{7,31,980},{6,31,5},{15,8,12051},{5,31,6275},{2,31,801},{0,26,3066},{11,22,12051},{0,26,3066},{4,31,9062},{4,31,9062},{4,31,9062},{3,31,5429},{2,31,7555},{1,31,1172},{1,31,1172},{0,23,325},{0,23,11118},{0,20,4625},{7,31,980},{7,31,980},{7,31,980},{6,31,5},{11,10,4418},{2,31,801},{2,31,801},{0,23,325},{5,23,4418},
+{0,23,325},{13,28,1800},{11,31,949},{9,31,4},{6,31,1},{13,28,1800},{11,30,1800},{6,31,1},{0,27,1818},{11,30,1800},{0,27,1818},{3,0,5105},{3,0,5105},{3,0,5105},{3,0,5105},{0,31,36},{0,31,36},{0,31,36},{0,20,4},{0,15,2741},{0,15,2741},{5,31,65535},{3,31,51954},{2,31,18790},{1,31,11373},{5,31,65535},{2,31,41798},{0,31,2132},{0,29,4176},{0,31,65535},{0,27,41092},{9,31,6153},
+{8,31,3297},{7,31,1332},{7,31,37},{14,13,10952},{5,31,5763},{3,31,965},{0,27,2291},{11,23,10952},{0,27,2291},{4,31,10694},{4,31,10694},{4,31,10694},{4,31,6566},{3,31,8619},{1,31,1716},{1,31,1716},{0,24,221},{0,26,11876},{0,22,4989},{7,31,1332},{7,31,1332},{7,31,1332},{7,31,37},{12,9,4418},{3,31,965},{3,31,965},{0,24,221},{14,13,4418},{0,24,221},{14,26,1352},{11,31,725},{10,31,0},
+{7,31,1},{14,26,1352},{15,26,1352},{7,31,1},{0,28,1360},{15,26,1352},{0,28,1360},{3,0,6001},{3,0,6001},{3,0,6001},{3,0,6001},{0,31,196},{0,31,196},{0,31,196},{0,21,1},{0,16,3130},{0,16,3130},{5,31,65535},{3,31,54582},{2,31,21886},{1,31,13893},{5,31,65535},{2,31,42410},{0,31,3338},{0,30,2841},{0,31,65535},{0,27,40390},{9,31,5649},{9,31,3249},{8,31,1325},{7,31,109},{14,15,9818},
+{6,31,5258},{4,31,1108},{0,27,1589},{12,23,9818},{0,27,1589},{5,31,12376},{5,31,12376},{5,31,12376},{4,31,7844},{3,31,9861},{1,31,2634},{1,31,2634},{0,25,136},{0,28,12696},{0,23,5429},{8,31,1325},{8,31,1325},{8,31,1325},{7,31,109},{15,2,4418},{4,31,1108},{4,31,1108},{0,25,136},{13,16,4418},{0,25,136},{15,24,925},{12,31,505},{11,31,1},{8,31,1},{15,24,925},{15,27,925},{8,31,1},
+{0,28,937},{15,27,925},{0,28,937},{4,0,7060},{4,0,7060},{4,0,7060},{4,0,7060},{1,31,425},{1,31,425},{1,31,425},{0,23,0},{0,17,3665},{0,17,3665},{5,31,65535},{3,31,57190},{2,31,24910},{1,31,16405},{5,31,65535},{2,31,43226},{0,31,4682},{0,30,1833},{0,31,65535},{0,27,40038},{10,31,5202},{9,31,3073},{8,31,1565},{8,31,277},{13,20,8901},{7,31,4814},{5,31,1300},{0,28,1021},{13,23,8902},
+{0,28,1021},{5,31,14136},{5,31,14136},{5,31,14136},{4,31,9252},{3,31,11237},{2,31,3590},{2,31,3590},{0,26,85},{0,29,13491},{0,23,5925},{8,31,1565},{8,31,1565},{8,31,1565},{8,31,277},{11,18,4418},{5,31,1300},{5,31,1300},{0,26,85},{10,21,4418},{0,26,85},{15,25,617},{13,31,365},{12,31,16},{9,31,9},{15,25,617},{13,30,613},{9,31,9},{0,29,617},{13,30,613},{0,29,617},{4,0,7956},
+{4,0,7956},{4,0,7956},{4,0,7956},{1,31,697},{1,31,697},{1,31,697},{0,25,4},{0,18,4181},{0,18,4181},{5,31,65535},{3,31,60054},{2,31,28190},{2,31,18895},{5,31,65535},{2,31,44298},{1,31,6090},{0,30,1081},{0,31,65535},{0,27,39942},{10,31,4850},{10,31,3107},{9,31,1709},{8,31,325},{13,22,8069},{7,31,4574},{6,31,1553},{0,29,602},{12,25,8069},{0,29,602},{6,31,16067},{6,31,16067},{6,31,16067},
+{5,31,10872},{4,31,12824},{2,31,4662},{2,31,4662},{0,27,52},{0,30,14340},{0,25,6449},{9,31,1709},{9,31,1709},{9,31,1709},{8,31,325},{13,14,4418},{6,31,1553},{6,31,1553},{0,27,52},{8,25,4418},{0,27,52},{15,27,365},{13,31,205},{12,31,16},{11,31,4},{15,27,365},{13,31,365},{11,31,4},{0,29,377},{13,31,365},{0,29,377},{4,0,8980},{4,0,8980},{4,0,8980},{4,0,8980},{1,31,1097},
+{1,31,1097},{1,31,1097},{0,26,1},{0,20,4682},{0,20,4682},{6,31,65535},{4,31,58981},{2,31,29926},{2,31,19751},{5,31,65535},{3,31,43402},{1,31,6910},{0,30,765},{0,31,65535},{0,28,34909},{11,31,4502},{10,31,3011},{9,31,2045},{9,31,557},{15,17,7322},{8,31,4242},{7,31,1781},{0,29,314},{12,26,7322},{0,29,314},{6,31,16739},{6,31,16739},{6,31,16739},{5,31,11492},{4,31,13636},{3,31,5586},{3,31,5586},
+{0,28,65},{0,31,14139},{0,26,6041},{9,31,2045},{9,31,2045},{9,31,2045},{9,31,557},{14,13,4418},{7,31,1781},{7,31,1781},{0,28,29},{11,23,4418},{0,28,29},{15,28,181},{14,31,117},{13,31,4},{12,31,0},{15,28,181},{15,29,181},{12,31,0},{0,30,185},{15,29,181},{0,30,185},{4,0,9376},{4,0,9376},{4,0,9376},{4,0,9376},{2,31,1405},{2,31,1405},{2,31,1405},{0,28,40},{0,21,4520},
+{0,21,4520},{6,31,65535},{4,31,57316},{3,31,30345},{3,31,20808},{6,31,65535},{3,31,41449},{2,31,8321},{1,31,301},{0,31,65535},{0,28,28330},{11,31,4232},{11,31,3093},{10,31,2248},{9,31,809},{15,19,6584},{9,31,3992},{7,31,2105},{0,30,77},{13,26,6584},{0,30,77},{7,31,17380},{7,31,17380},{7,31,17380},{6,31,12161},{5,31,14315},{3,31,6405},{3,31,6405},{1,29,53},{0,31,13860},{0,28,5286},{10,31,2248},
+{10,31,2248},{10,31,2248},{9,31,809},{14,16,4418},{7,31,2105},{7,31,2105},{0,30,13},{15,20,4418},{0,30,13},{15,29,52},{14,31,36},{14,31,0},{13,31,9},{15,29,52},{15,30,50},{13,31,9},{0,30,68},{15,30,50},{0,30,68},{5,0,9250},{5,0,9250},{5,0,9250},{5,0,9250},{2,31,1549},{2,31,1549},{2,31,1549},{1,28,2},{0,23,4114},{0,23,4114},{6,31,65535},{5,31,55908},{4,31,31583},
+{3,31,21256},{6,31,65535},{4,31,39740},{2,31,9073},{1,31,285},{0,31,65535},{0,29,23356},{12,31,4011},{11,31,3125},{11,31,2500},{10,31,1037},{13,27,6019},{10,31,3738},{8,31,2340},{0,31,4},{13,27,6019},{0,31,4},{7,31,17796},{7,31,17796},{7,31,17796},{6,31,12625},{6,31,14996},{4,31,7139},{4,31,7139},{1,30,86},{0,31,14020},{0,29,4652},{11,31,2500},{11,31,2500},{11,31,2500},{10,31,1037},{13,22,4418},
+{8,31,2340},{8,31,2340},{0,31,4},{12,25,4418},{0,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{0,31,4},{15,31,4},{0,31,4},{6,0,9376},{6,0,9376},{6,0,9376},{6,0,9376},{3,31,1765},{3,31,1765},{3,31,1765},{1,30,50},{0,25,3877},{0,25,3877},{7,31,65535},{5,31,53236},{4,31,30487},{4,31,21367},{6,31,65535},{4,31,37332},{3,31,9385},
+{2,31,36},{0,31,65535},{0,29,18680},{12,31,3443},{12,31,2843},{11,31,2248},{10,31,997},{14,25,5163},{10,31,3218},{9,31,2120},{1,31,4},{12,29,5163},{1,31,4},{7,31,17504},{7,31,17504},{7,31,17504},{7,31,12569},{6,31,14328},{4,31,7227},{4,31,7227},{2,31,20},{0,31,13376},{0,29,3944},{11,31,2248},{11,31,2248},{11,31,2248},{10,31,997},{15,17,3872},{9,31,2120},{9,31,2120},{1,31,4},{12,26,3872},
+{1,31,4},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{6,0,9248},{6,0,9248},{6,0,9248},{6,0,9248},{3,31,2005},{3,31,2005},{3,31,2005},{2,30,5},{0,27,3545},{0,27,3545},{7,31,65535},{6,31,50785},{4,31,29687},{4,31,20567},{7,31,65535},{4,31,35412},{3,31,8985},{2,31,196},{1,31,65535},{0,29,14712},{12,31,2883},
+{12,31,2283},{12,31,1922},{11,31,821},{13,29,4267},{11,31,2694},{10,31,1745},{3,31,4},{14,27,4267},{3,31,4},{8,31,16610},{8,31,16610},{8,31,16610},{7,31,12185},{6,31,13528},{5,31,6915},{5,31,6915},{2,31,52},{1,31,12556},{0,30,3314},{12,31,1922},{12,31,1922},{12,31,1922},{11,31,821},{12,28,3200},{10,31,1745},{10,31,1745},{3,31,4},{11,28,3200},{3,31,4},{15,31,0},{15,31,0},{15,31,0},
+{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{6,0,9376},{6,0,9376},{6,0,9376},{6,0,9376},{4,31,2250},{4,31,2250},{4,31,2250},{2,31,52},{0,28,3170},{0,28,3170},{7,31,65535},{6,31,47239},{5,31,28065},{5,31,20409},{7,31,65535},{5,31,32574},{4,31,8965},{3,31,54},{1,31,65206},{0,30,10964},{13,31,2326},{12,31,1806},{12,31,1445},{11,31,650},{13,30,3361},
+{11,31,2091},{10,31,1322},{4,31,0},{12,30,3361},{4,31,0},{8,31,15584},{8,31,15584},{8,31,15584},{7,31,12059},{7,31,12522},{6,31,6811},{6,31,6811},{3,31,50},{1,31,11710},{0,31,2834},{12,31,1445},{12,31,1445},{12,31,1445},{11,31,650},{14,23,2521},{10,31,1322},{10,31,1322},{4,31,0},{15,24,2521},{4,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},
+{0,31,0},{15,31,0},{0,31,0},{7,0,9250},{7,0,9250},{7,0,9250},{7,0,9250},{5,31,2600},{5,31,2600},{5,31,2600},{3,31,50},{0,31,2834},{0,31,2834},{8,31,65535},{6,31,44903},{5,31,27361},{5,31,19705},{7,31,64494},{5,31,30846},{4,31,8677},{3,31,470},{2,31,60777},{0,30,8308},{13,31,1782},{13,31,1510},{12,31,1157},{12,31,557},{13,31,2646},{11,31,1691},{11,31,1066},{5,31,4},{15,27,2646},
+{5,31,4},{8,31,14944},{8,31,14944},{8,31,14944},{8,31,11696},{7,31,11850},{6,31,6555},{6,31,6555},{4,31,164},{2,31,11097},{0,31,2610},{12,31,1157},{12,31,1157},{12,31,1157},{12,31,557},{15,21,1985},{11,31,1066},{11,31,1066},{5,31,4},{15,25,1985},{5,31,4},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{7,0,9410},
+{7,0,9410},{7,0,9410},{7,0,9410},{5,31,2792},{5,31,2792},{5,31,2792},{4,31,164},{0,31,2610},{0,31,2610},{8,31,63584},{7,31,42019},{6,31,25930},{5,31,19769},{8,31,60273},{6,31,28860},{5,31,8761},{4,31,276},{3,31,56253},{0,30,6420},{13,31,1366},{13,31,1094},{13,31,925},{12,31,397},{15,25,2018},{12,31,1298},{11,31,794},{7,31,4},{13,30,2017},{7,31,4},{9,31,14244},{9,31,14244},{9,31,14244},
+{8,31,11312},{8,31,11249},{7,31,6499},{7,31,6499},{4,31,260},{3,31,10457},{0,31,2642},{13,31,925},{13,31,925},{13,31,925},{12,31,397},{15,22,1513},{11,31,794},{11,31,794},{7,31,4},{14,27,1513},{7,31,4},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{8,0,9376},{8,0,9376},{8,0,9376},{8,0,9376},{6,31,3074},
+{6,31,3074},{6,31,3074},{4,31,260},{0,31,2642},{0,31,2642},{8,31,58848},{7,31,39683},{6,31,25130},{6,31,19007},{8,31,54849},{6,31,27132},{5,31,8569},{4,31,756},{4,31,51302},{0,31,5046},{13,31,1078},{13,31,806},{13,31,637},{12,31,365},{15,26,1473},{12,31,978},{12,31,617},{8,31,9},{14,29,1473},{8,31,9},{9,31,13604},{9,31,13604},{9,31,13604},{8,31,11184},{8,31,10433},{7,31,6339},{7,31,6339},
+{5,31,424},{4,31,9713},{0,31,2930},{13,31,637},{13,31,637},{13,31,637},{12,31,365},{14,27,1105},{12,31,617},{12,31,617},{8,31,9},{13,29,1105},{8,31,9},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{8,0,9248},{8,0,9248},{8,0,9248},{8,0,9248},{6,31,3330},{6,31,3330},{6,31,3330},{5,31,424},{0,31,2930},
+{0,31,2930},
diff --git a/libkram/transcoder/basisu_transcoder_tables_pvrtc2_alpha_33.inc b/libkram/transcoder/basisu_transcoder_tables_pvrtc2_alpha_33.inc
new file mode 100644
index 00000000..3b9d7022
--- /dev/null
+++ b/libkram/transcoder/basisu_transcoder_tables_pvrtc2_alpha_33.inc
@@ -0,0 +1,481 @@
+{0,0,20},{0,0,20},{0,0,97},{0,0,145},{0,0,56},{0,0,104},{0,0,181},{0,0,406},{0,0,204},{0,0,442},{0,0,20},{0,0,20},{0,0,97},{0,0,145},{0,0,56},{0,0,104},{0,0,181},{0,0,406},{0,0,168},{0,0,406},{0,0,16},{0,0,16},{0,0,16},{0,0,64},{0,0,52},{0,0,100},{0,0,100},{0,0,325},{0,0,200},{0,0,361},{0,0,16},
+{0,0,16},{0,0,16},{0,0,64},{0,0,52},{0,0,100},{0,0,100},{0,0,325},{0,0,164},{0,0,325},{0,0,20},{0,0,20},{0,0,97},{0,0,145},{0,0,20},{0,0,68},{0,0,145},{0,0,306},{0,0,68},{0,0,306},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,1,126},{0,0,88},{0,0,53},
+{0,0,37},{0,0,116},{0,0,36},{0,0,1},{0,0,66},{0,0,88},{0,0,102},{0,1,126},{0,0,88},{0,0,53},{0,0,37},{0,0,116},{0,0,36},{0,0,1},{0,0,66},{0,0,52},{0,0,66},{0,0,52},{0,0,52},{0,0,52},{0,0,36},{0,0,16},{0,0,0},{0,0,0},{0,0,65},{0,0,52},{0,0,101},{0,0,52},{0,0,52},{0,0,52},{0,0,36},{0,0,16},
+{0,0,0},{0,0,0},{0,0,65},{0,0,16},{0,0,65},{0,1,90},{0,0,52},{0,0,17},{0,0,1},{0,1,90},{0,0,36},{0,0,1},{0,0,50},{0,0,36},{0,0,50},{0,0,36},{0,0,36},{0,0,36},{0,0,36},{0,0,0},{0,0,0},{0,0,0},{0,0,16},{0,0,52},{0,0,52},{0,1,286},{0,1,310},{0,0,453},{0,0,373},{0,1,115},{0,1,307},{0,0,241},
+{0,0,130},{0,0,280},{0,0,70},{0,1,222},{0,1,246},{0,0,389},{0,0,309},{0,1,51},{1,0,195},{0,0,177},{0,0,66},{1,0,107},{0,0,66},{0,1,261},{0,1,261},{0,1,261},{0,0,324},{0,1,90},{0,0,192},{0,0,192},{0,0,81},{0,0,84},{0,0,21},{0,1,197},{0,1,197},{0,1,197},{0,0,260},{0,1,26},{0,0,128},{0,0,128},{0,0,17},{0,0,80},
+{0,0,17},{0,1,26},{0,1,50},{1,0,130},{1,0,74},{0,1,26},{1,0,26},{1,0,74},{0,0,50},{1,0,26},{0,0,50},{0,0,260},{0,0,260},{0,0,260},{0,0,260},{0,1,89},{0,1,89},{0,1,89},{0,0,80},{0,0,20},{0,0,20},{1,0,494},{1,0,550},{1,0,694},{1,0,702},{0,2,363},{0,1,291},{1,0,583},{1,0,631},{0,1,116},{1,0,428},{1,0,170},
+{1,0,226},{1,0,370},{1,0,378},{1,0,51},{0,1,35},{1,0,259},{1,0,307},{1,0,91},{1,0,307},{1,0,469},{1,0,469},{1,0,469},{1,0,477},{0,1,314},{0,1,290},{0,1,290},{1,0,406},{0,1,115},{1,0,203},{1,0,145},{1,0,145},{1,0,145},{1,0,153},{1,0,26},{1,0,34},{1,0,34},{1,0,82},{1,0,10},{1,0,82},{1,0,26},{0,1,50},{1,0,226},
+{1,0,234},{1,0,26},{0,1,26},{1,0,234},{0,0,306},{0,1,26},{0,0,306},{1,0,468},{1,0,468},{1,0,468},{1,0,468},{0,1,265},{0,1,265},{0,1,265},{1,0,325},{0,1,90},{0,1,90},{1,1,116},{1,1,124},{1,1,215},{1,1,271},{1,1,188},{1,1,252},{1,1,343},{1,1,606},{0,1,152},{0,1,392},{1,1,35},{1,1,43},{1,1,134},{1,1,190},{1,1,107},
+{1,1,171},{0,1,260},{0,1,356},{2,0,51},{0,1,356},{1,1,115},{1,1,115},{1,1,115},{1,1,171},{1,0,161},{1,0,241},{1,0,241},{1,0,469},{0,1,52},{0,1,292},{1,1,34},{1,1,34},{1,1,34},{1,1,90},{0,2,16},{1,0,160},{1,0,160},{0,1,256},{0,1,16},{0,1,256},{1,1,26},{1,1,34},{1,1,125},{0,1,116},{1,1,26},{2,0,26},{0,1,116},
+{0,1,356},{2,0,26},{0,1,356},{1,0,90},{1,0,90},{1,0,90},{1,0,90},{1,0,97},{1,0,97},{1,0,97},{1,0,145},{0,1,36},{0,1,36},{1,1,116},{1,1,60},{1,1,39},{1,1,31},{1,1,92},{1,1,28},{1,1,7},{1,1,94},{1,1,100},{1,1,142},{1,1,115},{1,1,59},{1,1,38},{1,1,30},{0,3,51},{1,1,27},{1,1,6},{1,1,93},{1,1,51},
+{1,1,93},{1,1,35},{1,1,35},{1,1,35},{1,1,27},{1,1,11},{1,1,3},{1,1,3},{1,1,90},{1,1,75},{1,1,138},{1,1,34},{1,1,34},{1,1,34},{1,1,26},{1,1,10},{1,1,2},{1,1,2},{1,1,89},{1,1,26},{1,1,89},{2,0,26},{1,1,34},{1,1,13},{1,1,5},{2,0,26},{3,0,26},{1,1,5},{0,1,68},{3,0,26},{0,1,68},{1,0,26},
+{1,0,26},{1,0,26},{1,0,26},{1,1,2},{1,1,2},{1,1,2},{1,1,26},{1,1,74},{1,1,74},{1,2,238},{1,2,286},{1,1,375},{1,1,303},{1,2,105},{1,1,316},{1,1,183},{1,1,94},{0,2,156},{1,1,46},{1,2,189},{1,2,237},{1,1,326},{1,1,254},{1,2,56},{2,1,232},{1,1,134},{1,1,45},{0,2,56},{1,1,45},{1,2,222},{1,2,222},{1,2,222},
+{1,1,267},{1,2,89},{1,1,147},{1,1,147},{1,1,58},{1,1,59},{1,1,10},{1,2,173},{1,2,173},{1,2,173},{1,1,218},{2,0,10},{1,1,98},{1,1,98},{1,1,9},{3,0,10},{1,1,9},{1,2,20},{1,2,68},{2,1,136},{2,1,72},{1,2,20},{0,2,20},{2,1,72},{0,1,36},{0,2,20},{0,1,36},{1,0,218},{1,0,218},{1,0,218},{1,0,218},{1,2,85},
+{1,2,85},{1,2,85},{1,1,58},{1,1,10},{1,1,10},{2,1,550},{2,1,598},{2,1,730},{2,1,730},{1,3,361},{1,2,265},{2,1,597},{1,1,606},{1,2,152},{2,1,408},{2,1,189},{2,1,237},{2,1,369},{2,1,369},{2,1,56},{1,2,40},{2,1,236},{2,1,264},{4,0,56},{2,1,264},{2,1,534},{2,1,534},{2,1,534},{2,1,534},{1,2,265},{1,2,265},{1,2,265},
+{1,1,410},{2,1,152},{2,1,212},{2,1,173},{2,1,173},{2,1,173},{2,1,173},{0,4,8},{2,1,40},{2,1,40},{2,1,68},{2,1,8},{2,1,68},{2,1,20},{1,2,36},{2,1,200},{2,1,200},{2,1,20},{4,0,20},{2,1,200},{0,1,260},{4,0,20},{0,1,260},{2,0,530},{2,0,530},{2,0,530},{2,0,530},{1,2,229},{1,2,229},{1,2,229},{1,2,325},{1,2,116},
+{1,2,116},{2,2,152},{2,2,168},{2,2,273},{2,2,337},{2,2,236},{2,2,316},{2,2,421},{2,2,706},{1,2,116},{1,2,436},{2,2,52},{2,2,68},{2,2,173},{2,2,237},{3,0,56},{1,2,211},{1,2,251},{1,2,411},{3,1,56},{1,2,411},{2,2,152},{2,2,152},{2,2,152},{2,2,216},{2,1,158},{2,1,230},{2,1,230},{2,1,438},{1,2,35},{1,2,315},{2,2,52},
+{2,2,52},{2,2,52},{2,2,116},{1,3,10},{2,1,130},{2,1,130},{1,2,290},{1,2,10},{1,2,290},{3,0,20},{2,2,52},{2,2,157},{1,2,130},{3,0,20},{3,1,20},{1,2,130},{0,2,410},{3,1,20},{0,2,410},{2,0,116},{2,0,116},{2,0,116},{2,0,116},{2,1,109},{2,1,109},{2,1,109},{2,1,149},{1,2,26},{1,2,26},{2,2,88},{2,2,40},{2,2,33},
+{2,2,33},{2,2,76},{2,2,28},{2,2,21},{2,2,130},{2,2,120},{2,2,190},{2,2,84},{2,2,36},{2,2,29},{2,2,29},{1,4,56},{2,2,24},{2,2,17},{2,2,126},{2,2,56},{2,2,126},{2,2,24},{2,2,24},{2,2,24},{2,2,24},{2,2,12},{2,2,12},{2,2,12},{2,2,121},{2,2,104},{2,2,181},{2,2,20},{2,2,20},{2,2,20},{2,2,20},{2,2,8},
+{2,2,8},{2,2,8},{2,2,117},{5,0,8},{2,2,117},{1,4,20},{2,2,20},{2,2,13},{2,2,13},{1,4,20},{2,2,20},{2,2,13},{0,2,90},{2,2,20},{0,2,90},{2,0,20},{2,0,20},{2,0,20},{2,0,20},{2,2,8},{2,2,8},{2,2,8},{2,2,40},{2,2,100},{2,2,100},{2,3,198},{2,3,270},{2,2,305},{2,2,241},{2,3,103},{2,2,252},{2,2,133},
+{2,2,66},{1,3,148},{2,2,30},{2,3,162},{2,3,234},{2,2,269},{2,2,205},{2,3,67},{2,2,216},{2,2,97},{2,2,30},{6,0,67},{2,2,30},{2,3,189},{2,3,189},{2,3,189},{2,2,216},{2,3,94},{2,2,108},{2,2,108},{2,2,41},{2,2,40},{2,2,5},{2,3,153},{2,3,153},{2,3,153},{2,2,180},{3,1,8},{2,2,72},{2,2,72},{2,2,5},{4,1,8},
+{2,2,5},{2,3,18},{2,3,90},{2,2,125},{2,2,61},{2,3,18},{6,0,18},{2,2,61},{0,2,26},{6,0,18},{0,2,26},{2,0,180},{2,0,180},{2,0,180},{2,0,180},{2,2,72},{2,2,72},{2,2,72},{2,2,40},{2,2,4},{2,2,4},{3,2,614},{2,3,622},{3,2,774},{3,2,766},{2,3,343},{2,3,247},{3,2,619},{2,2,514},{2,3,196},{2,2,382},{3,2,214},
+{3,2,254},{3,2,374},{3,2,366},{4,0,59},{2,3,51},{3,2,219},{3,2,227},{3,2,59},{3,2,227},{3,2,605},{3,2,605},{3,2,605},{3,2,597},{2,3,222},{2,3,246},{2,3,246},{2,2,345},{3,2,179},{2,2,213},{3,2,205},{3,2,205},{3,2,205},{3,2,197},{4,0,10},{3,2,50},{3,2,50},{3,2,58},{3,2,10},{3,2,58},{3,2,18},{2,3,26},{3,2,178},
+{3,2,170},{3,2,18},{5,1,18},{3,2,170},{0,2,218},{5,1,18},{0,2,218},{2,0,596},{2,0,596},{2,0,596},{2,0,596},{2,3,197},{2,3,197},{2,3,197},{2,2,296},{2,3,146},{2,3,146},{3,3,196},{3,3,220},{3,3,339},{3,3,411},{3,3,292},{3,3,388},{3,3,507},{3,3,814},{2,3,88},{2,3,488},{3,3,75},{3,3,99},{3,3,218},{3,3,290},{4,1,67},
+{2,3,168},{2,3,248},{2,3,472},{4,2,67},{2,3,472},{3,2,182},{3,2,182},{3,2,182},{3,2,246},{3,2,161},{3,2,225},{3,2,225},{3,2,413},{2,3,24},{3,2,308},{3,2,61},{3,2,61},{3,2,61},{3,2,125},{2,4,8},{3,2,104},{3,2,104},{3,2,292},{7,0,8},{3,2,292},{4,1,18},{3,3,74},{3,3,193},{2,3,148},{4,1,18},{4,2,18},{2,3,148},
+{0,3,468},{4,2,18},{0,3,468},{3,0,146},{3,0,146},{3,0,146},{3,0,146},{3,2,125},{3,2,125},{3,2,125},{3,2,157},{2,3,20},{2,3,20},{3,3,68},{3,3,28},{3,3,35},{3,3,43},{3,3,68},{3,3,36},{3,3,43},{3,3,174},{3,3,148},{3,3,246},{3,3,59},{3,3,19},{3,3,26},{3,3,34},{3,3,59},{3,3,27},{3,3,34},{2,3,152},{6,1,59},
+{2,3,152},{3,3,19},{3,3,19},{3,3,19},{3,3,27},{3,3,19},{3,3,27},{3,3,27},{3,3,158},{3,3,139},{3,3,230},{3,3,10},{3,3,10},{3,3,10},{3,3,18},{3,3,10},{3,3,18},{3,3,18},{2,3,136},{6,1,10},{2,3,136},{5,0,18},{3,3,10},{3,3,17},{3,3,25},{5,0,18},{3,3,18},{3,3,25},{0,3,116},{3,3,18},{0,3,116},{3,0,18},
+{3,0,18},{3,0,18},{3,0,18},{3,3,18},{3,3,18},{3,3,18},{3,3,58},{3,3,130},{3,3,130},{3,4,166},{3,4,262},{3,3,243},{3,3,187},{3,4,109},{3,3,196},{3,3,91},{3,3,46},{3,3,148},{3,3,22},{3,4,141},{3,4,237},{3,3,218},{3,3,162},{4,2,59},{3,3,171},{3,3,66},{3,3,21},{5,2,59},{3,3,21},{3,4,162},{3,4,162},{3,4,162},
+{3,3,171},{3,4,105},{3,3,75},{3,3,75},{3,3,30},{3,3,27},{3,3,6},{3,4,137},{3,4,137},{3,4,137},{3,3,146},{4,2,10},{3,3,50},{3,3,50},{3,3,5},{5,2,10},{3,3,5},{3,4,20},{3,4,116},{3,3,97},{3,3,41},{3,4,20},{7,1,20},{3,3,41},{0,3,20},{7,1,20},{0,3,20},{3,0,146},{3,0,146},{3,0,146},{3,0,146},{3,3,50},
+{3,3,50},{3,3,50},{3,3,26},{3,3,2},{3,3,2},{3,5,598},{3,4,550},{4,3,826},{4,3,810},{3,4,285},{3,4,237},{4,3,649},{3,3,430},{4,3,248},{3,3,310},{4,3,245},{4,3,277},{4,3,385},{4,3,369},{5,1,52},{3,4,68},{4,3,208},{4,3,196},{4,3,52},{4,3,196},{3,4,546},{3,4,546},{3,4,546},{3,4,594},{3,4,185},{3,4,233},{3,4,233},
+{3,3,286},{4,3,212},{3,3,166},{4,3,241},{4,3,241},{4,3,241},{4,3,225},{5,1,16},{4,3,64},{4,3,64},{4,3,52},{7,1,16},{4,3,52},{4,3,20},{3,4,20},{4,3,160},{4,3,144},{4,3,20},{6,2,20},{4,3,144},{0,3,180},{6,2,20},{0,3,180},{3,0,530},{3,0,530},{3,0,530},{3,0,530},{3,4,169},{3,4,169},{3,4,169},{3,3,250},{3,3,130},
+{3,3,130},{4,4,248},{4,4,280},{4,4,413},{4,4,493},{4,3,291},{4,3,451},{4,4,601},{4,3,835},{3,4,68},{3,4,548},{4,4,104},{4,4,136},{4,4,269},{4,4,349},{6,0,59},{3,4,131},{3,4,251},{3,4,539},{3,4,59},{3,4,539},{4,3,205},{4,3,205},{4,3,205},{4,3,261},{4,3,170},{4,3,226},{4,3,226},{4,3,394},{3,4,19},{4,3,275},{4,3,61},
+{4,3,61},{4,3,61},{4,3,117},{6,0,10},{4,3,82},{4,3,82},{4,3,250},{3,4,10},{4,3,250},{5,2,20},{4,4,100},{4,4,233},{3,4,170},{5,2,20},{5,3,20},{3,4,170},{0,4,530},{5,3,20},{0,4,530},{4,0,180},{4,0,180},{4,0,180},{4,0,180},{4,3,145},{4,3,145},{4,3,145},{4,3,169},{3,4,18},{3,4,18},{4,4,56},{4,4,24},{4,4,45},
+{4,4,61},{4,4,68},{4,4,52},{4,4,73},{4,4,226},{4,4,184},{3,4,292},{4,4,40},{4,4,8},{4,4,29},{4,4,45},{4,4,52},{4,4,36},{4,4,57},{3,4,171},{7,2,52},{3,4,171},{4,4,20},{4,4,20},{4,4,20},{4,4,36},{4,4,32},{4,4,48},{4,4,48},{4,4,201},{4,4,180},{3,4,267},{4,4,4},{4,4,4},{4,4,4},{4,4,20},{5,2,16},
+{4,4,32},{4,4,32},{3,4,146},{7,2,16},{3,4,146},{6,1,20},{4,4,4},{4,4,25},{4,4,41},{6,1,20},{4,4,20},{4,4,41},{0,4,146},{4,4,20},{0,4,146},{4,0,20},{4,0,20},{4,0,20},{4,0,20},{4,4,32},{4,4,32},{4,4,32},{4,4,80},{3,4,146},{3,4,146},{4,5,142},{4,5,262},{4,4,189},{4,4,141},{4,5,123},{4,4,148},{4,4,57},
+{4,4,34},{4,4,120},{4,4,22},{4,5,126},{4,5,246},{4,4,173},{4,4,125},{5,3,52},{4,4,132},{4,4,41},{4,4,18},{6,3,52},{4,4,18},{4,5,141},{4,5,141},{4,5,141},{4,4,132},{4,4,96},{4,4,48},{4,4,48},{4,4,25},{4,4,20},{4,4,13},{4,5,125},{4,5,125},{4,5,125},{4,4,116},{6,1,16},{4,4,32},{4,4,32},{4,4,9},{6,3,16},
+{4,4,9},{7,0,26},{4,5,146},{4,4,73},{4,4,25},{7,0,26},{3,5,26},{4,4,25},{0,4,18},{3,5,26},{0,4,18},{4,0,116},{4,0,116},{4,0,116},{4,0,116},{4,4,32},{4,4,32},{4,4,32},{4,4,16},{4,4,4},{4,4,4},{4,5,558},{4,5,486},{4,4,845},{4,4,733},{4,5,235},{4,5,235},{4,4,553},{4,4,354},{5,4,276},{4,4,246},{5,4,282},
+{5,4,306},{5,4,402},{5,4,378},{6,2,51},{4,5,91},{5,4,203},{5,4,171},{5,4,51},{5,4,171},{4,5,477},{4,5,477},{4,5,477},{4,5,549},{4,5,154},{4,5,226},{4,5,226},{4,4,233},{3,5,235},{4,4,125},{5,4,281},{5,4,281},{5,4,281},{5,4,257},{7,0,10},{5,4,82},{5,4,82},{5,4,50},{3,5,10},{5,4,50},{6,2,26},{4,5,18},{5,4,146},
+{5,4,122},{6,2,26},{7,3,26},{5,4,122},{0,4,146},{7,3,26},{0,4,146},{4,0,468},{4,0,468},{4,0,468},{4,0,468},{4,5,145},{4,5,145},{4,5,145},{4,4,208},{4,4,100},{4,4,100},{5,5,308},{5,5,348},{5,5,495},{5,5,583},{5,4,285},{5,4,429},{5,4,633},{5,4,781},{4,5,56},{4,5,616},{5,5,139},{5,5,179},{5,5,326},{5,5,414},{7,1,52},
+{4,5,100},{4,5,260},{5,4,612},{4,5,52},{5,4,612},{5,4,234},{5,4,234},{5,4,234},{5,4,282},{5,4,185},{5,4,233},{5,4,233},{5,4,381},{4,5,20},{5,4,248},{5,4,65},{5,4,65},{5,4,65},{5,4,113},{7,1,16},{5,4,64},{5,4,64},{5,4,212},{7,3,16},{5,4,212},{6,3,26},{5,5,130},{5,5,277},{4,5,196},{6,3,26},{6,4,26},{4,5,196},
+{0,4,596},{6,4,26},{0,4,596},{5,0,218},{5,0,218},{5,0,218},{5,0,218},{5,4,169},{5,4,169},{5,4,169},{5,4,185},{4,5,20},{4,5,20},{5,5,52},{5,5,28},{5,5,63},{5,5,87},{5,5,76},{5,5,76},{5,5,111},{5,5,286},{5,5,228},{4,5,296},{5,5,27},{5,5,3},{5,5,38},{5,5,62},{5,5,51},{5,5,51},{5,5,86},{4,5,196},{3,6,51},
+{4,5,196},{5,5,27},{5,5,27},{5,5,27},{5,5,51},{5,5,51},{5,5,75},{5,5,75},{5,5,250},{4,5,180},{4,5,260},{5,5,2},{5,5,2},{5,5,2},{5,5,26},{6,3,10},{5,5,50},{5,5,50},{4,5,160},{6,4,10},{4,5,160},{7,2,26},{5,5,2},{5,5,37},{5,5,61},{7,2,26},{5,5,26},{5,5,61},{0,5,180},{5,5,26},{0,5,180},{5,0,26},
+{5,0,26},{5,0,26},{5,0,26},{5,5,50},{5,5,50},{5,5,50},{5,5,106},{4,5,116},{4,5,116},{5,6,126},{5,5,220},{5,5,143},{5,5,103},{5,6,145},{5,5,108},{5,5,31},{5,5,30},{5,5,100},{5,5,30},{5,6,117},{5,5,211},{5,5,134},{5,5,94},{6,4,51},{5,5,99},{5,5,22},{5,5,21},{7,4,51},{5,5,21},{5,6,126},{5,6,126},{5,6,126},
+{5,5,99},{5,5,67},{5,5,27},{5,5,27},{5,5,26},{5,5,19},{5,5,26},{5,6,117},{5,6,117},{5,6,117},{5,5,90},{7,2,10},{5,5,18},{5,5,18},{5,5,17},{5,5,10},{5,5,17},{6,4,26},{5,5,130},{5,5,53},{5,5,13},{6,4,26},{7,4,26},{5,5,13},{0,5,20},{7,4,26},{0,5,20},{5,0,90},{5,0,90},{5,0,90},{5,0,90},{5,5,18},
+{5,5,18},{5,5,18},{5,5,10},{5,5,10},{5,5,10},{5,6,478},{5,6,430},{5,5,735},{5,5,631},{5,6,193},{5,6,241},{5,5,463},{5,5,286},{4,6,268},{5,5,190},{6,5,325},{5,6,309},{6,5,425},{6,5,393},{7,3,56},{6,5,120},{6,5,204},{6,5,152},{6,5,56},{6,5,152},{5,6,414},{5,6,414},{5,6,414},{5,6,510},{5,6,129},{5,6,225},{5,6,225},
+{5,5,186},{5,5,195},{5,5,90},{5,6,293},{5,6,293},{5,6,293},{6,5,293},{5,6,8},{6,5,104},{6,5,104},{6,5,52},{4,6,8},{6,5,52},{7,3,20},{5,6,20},{6,5,136},{6,5,104},{7,3,20},{6,5,20},{6,5,104},{0,5,116},{6,5,20},{0,5,116},{5,0,410},{5,0,410},{5,0,410},{5,0,410},{5,6,125},{5,6,125},{5,6,125},{5,5,170},{5,5,74},
+{5,5,74},{6,5,350},{6,6,424},{6,6,585},{6,5,670},{6,5,287},{6,5,415},{6,5,607},{6,5,735},{5,6,52},{6,5,588},{6,5,154},{6,6,228},{6,6,389},{6,5,474},{5,7,51},{5,6,75},{5,6,275},{6,5,539},{5,6,51},{6,5,539},{6,5,269},{6,5,269},{6,5,269},{6,5,309},{6,5,206},{6,5,246},{6,5,246},{6,5,374},{5,6,27},{6,5,227},{6,5,73},
+{6,5,73},{6,5,73},{6,5,113},{6,5,10},{6,5,50},{6,5,50},{6,5,178},{3,7,10},{6,5,178},{5,7,26},{5,6,146},{6,6,325},{5,6,226},{5,7,26},{5,6,26},{5,6,226},{0,5,530},{5,6,26},{0,5,530},{6,0,260},{6,0,260},{6,0,260},{6,0,260},{6,5,197},{6,5,197},{6,5,197},{6,5,205},{5,6,26},{5,6,26},{6,6,56},{6,6,40},{6,6,89},
+{6,6,121},{6,6,92},{6,6,108},{6,6,157},{6,6,354},{6,6,280},{5,6,308},{6,6,20},{6,6,4},{6,6,53},{6,6,85},{6,6,56},{6,6,72},{6,6,121},{5,6,227},{4,7,56},{5,6,227},{6,6,40},{6,6,40},{6,6,40},{6,6,72},{6,6,76},{6,6,108},{6,6,108},{6,6,305},{5,6,139},{5,6,259},{6,6,4},{6,6,4},{6,6,4},{6,6,36},{7,4,8},
+{6,6,72},{6,6,72},{5,6,178},{7,5,8},{5,6,178},{6,6,20},{6,6,4},{6,6,53},{6,6,85},{6,6,20},{4,7,20},{6,6,85},{0,6,218},{4,7,20},{0,6,218},{6,0,36},{6,0,36},{6,0,36},{6,0,36},{6,6,72},{6,6,72},{6,6,72},{6,6,136},{5,6,90},{5,6,90},{6,7,118},{6,6,168},{6,6,105},{6,6,73},{6,7,175},{6,6,76},{6,6,13},
+{6,6,34},{6,6,88},{6,6,46},{6,7,114},{6,6,164},{6,6,101},{6,6,69},{7,5,56},{6,6,72},{6,6,9},{6,6,30},{6,6,72},{6,6,30},{6,6,104},{6,6,104},{6,6,104},{6,6,72},{6,6,44},{6,6,12},{6,6,12},{6,6,33},{6,6,24},{6,6,45},{6,6,100},{6,6,100},{6,6,100},{6,6,68},{7,5,40},{6,6,8},{6,6,8},{6,6,29},{6,6,8},
+{6,6,29},{7,5,20},{6,6,100},{6,6,37},{6,6,5},{7,5,20},{5,7,50},{6,6,5},{0,6,26},{5,7,50},{0,6,26},{6,0,68},{6,0,68},{6,0,68},{6,0,68},{6,6,8},{6,6,8},{6,6,8},{6,6,8},{6,6,20},{6,6,20},{6,7,406},{6,7,382},{6,6,633},{6,6,537},{6,7,159},{6,7,255},{6,6,381},{6,6,226},{5,7,228},{6,6,142},{6,7,306},
+{6,7,282},{7,6,454},{7,6,414},{6,7,59},{7,6,139},{7,6,211},{6,6,126},{5,7,59},{6,6,126},{6,7,357},{6,7,357},{6,7,357},{6,6,456},{6,7,110},{6,7,230},{6,7,230},{6,6,145},{6,6,152},{6,6,61},{6,7,257},{6,7,257},{6,7,257},{7,6,333},{6,7,10},{7,6,130},{7,6,130},{6,6,45},{5,7,10},{6,6,45},{7,6,50},{6,7,26},{7,6,130},
+{7,6,90},{7,6,50},{7,6,18},{7,6,90},{0,6,90},{7,6,18},{0,6,90},{6,0,356},{6,0,356},{6,0,356},{6,0,356},{6,7,109},{6,7,109},{6,7,109},{6,6,136},{6,6,52},{6,6,52},{7,6,374},{7,6,454},{7,6,634},{7,6,666},{7,6,297},{7,6,409},{7,6,589},{7,6,697},{6,7,56},{7,6,536},{7,6,149},{7,6,229},{7,6,409},{7,6,441},{7,6,72},
+{6,7,56},{6,7,296},{7,6,472},{6,7,56},{7,6,472},{7,6,310},{7,6,310},{7,6,310},{7,6,342},{7,6,233},{7,6,265},{7,6,265},{7,6,373},{6,7,40},{7,6,212},{7,6,85},{7,6,85},{7,6,85},{7,6,117},{7,6,8},{7,6,40},{7,6,40},{7,6,148},{7,6,40},{7,6,148},{7,6,68},{6,7,116},{7,6,328},{6,7,260},{7,6,68},{6,7,20},{6,7,260},
+{0,6,468},{6,7,20},{0,6,468},{7,0,306},{7,0,306},{7,0,306},{7,0,306},{7,6,229},{7,6,229},{7,6,229},{7,6,229},{6,7,36},{6,7,36},{7,7,68},{7,7,60},{7,7,123},{7,7,163},{7,7,116},{7,7,148},{7,7,211},{7,7,430},{6,7,248},{6,7,328},{7,7,19},{7,7,11},{7,7,74},{7,7,114},{7,7,67},{7,7,99},{7,7,162},{6,7,264},{7,7,171},
+{6,7,264},{7,7,59},{7,7,59},{7,7,59},{7,7,99},{7,7,107},{7,7,147},{7,7,147},{7,7,366},{6,7,104},{6,7,264},{7,7,10},{7,7,10},{7,7,10},{7,7,50},{7,7,58},{7,7,98},{7,7,98},{6,7,200},{6,7,40},{6,7,200},{7,7,18},{7,7,10},{7,7,73},{6,7,100},{7,7,18},{7,7,50},{6,7,100},{0,7,260},{7,7,50},{0,7,260},{7,0,50},
+{7,0,50},{7,0,50},{7,0,50},{7,6,85},{7,6,85},{7,6,85},{7,6,149},{6,7,68},{6,7,68},{7,7,196},{7,7,124},{7,7,75},{7,7,51},{7,7,148},{7,7,52},{7,7,3},{7,7,46},{7,7,84},{7,7,70},{7,7,195},{7,7,123},{7,7,74},{7,7,50},{7,7,147},{7,7,51},{7,7,2},{7,7,45},{7,7,59},{7,7,45},{7,7,75},{7,7,75},{7,7,75},
+{7,7,51},{7,7,27},{7,7,3},{7,7,3},{7,7,46},{7,7,35},{7,7,70},{7,7,74},{7,7,74},{7,7,74},{7,7,50},{7,7,26},{7,7,2},{7,7,2},{7,7,45},{7,7,10},{7,7,45},{7,7,146},{7,7,74},{7,7,25},{7,7,1},{7,7,146},{7,7,50},{7,7,1},{0,7,36},{7,7,50},{0,7,36},{7,0,50},{7,0,50},{7,0,50},{7,0,50},{7,7,2},
+{7,7,2},{7,7,2},{7,7,10},{7,7,34},{7,7,34},{7,7,548},{7,7,476},{7,7,427},{7,7,355},{7,7,404},{7,7,260},{7,7,211},{7,7,106},{7,7,132},{7,7,34},{7,7,467},{7,7,395},{7,7,346},{7,7,274},{7,7,323},{7,7,179},{7,7,130},{7,7,25},{7,7,123},{7,7,25},{7,7,427},{7,7,427},{7,7,427},{7,7,355},{7,7,283},{7,7,211},{7,7,211},
+{7,7,106},{7,7,83},{7,7,34},{7,7,346},{7,7,346},{7,7,346},{7,7,274},{7,7,202},{7,7,130},{7,7,130},{7,7,25},{7,7,74},{7,7,25},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{7,0,306},{7,0,306},{7,0,306},{7,0,306},{7,7,162},{7,7,162},{7,7,162},{7,7,106},{7,7,34},
+{7,7,34},{0,0,122},{0,0,50},{0,0,1},{0,0,25},{0,0,158},{0,0,110},{0,0,61},{0,0,244},{0,0,210},{0,0,280},{0,0,122},{0,0,50},{0,0,1},{0,0,25},{0,0,158},{0,0,110},{0,0,61},{0,0,244},{0,0,174},{0,0,244},{0,0,1},{0,0,1},{0,0,1},{0,0,25},{0,0,37},{0,0,61},{0,0,61},{0,0,244},{0,0,161},{0,0,280},{0,0,1},
+{0,0,1},{0,0,1},{0,0,25},{0,0,37},{0,0,61},{0,0,61},{0,0,244},{0,0,125},{0,0,244},{0,0,122},{0,0,50},{0,0,1},{0,0,25},{0,0,122},{0,0,74},{0,0,25},{0,0,144},{0,0,74},{0,0,144},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,1,81},{0,1,81},{0,0,122},
+{0,0,82},{0,1,328},{0,0,243},{0,0,82},{0,0,129},{0,0,319},{0,0,165},{0,1,81},{0,1,81},{0,0,122},{0,0,82},{0,1,328},{0,0,243},{0,0,82},{0,0,129},{0,0,283},{0,0,129},{0,1,45},{0,1,45},{0,1,45},{0,0,18},{0,0,58},{0,0,18},{0,0,18},{0,0,65},{0,0,94},{0,0,101},{0,1,45},{0,1,45},{0,1,45},{0,0,18},{0,0,58},
+{0,0,18},{0,0,18},{0,0,65},{0,0,58},{0,0,65},{0,1,72},{0,1,72},{0,0,113},{0,0,73},{0,1,72},{1,0,104},{0,0,73},{0,0,80},{1,0,104},{0,0,80},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,0,49},{0,0,85},{0,0,85},{0,2,225},{0,1,129},{0,1,449},{0,0,498},{0,1,264},{0,1,168},{0,0,402},
+{0,0,273},{0,0,687},{0,0,309},{0,2,225},{0,1,129},{0,1,449},{0,0,498},{0,1,264},{0,1,168},{0,0,402},{0,0,273},{1,0,248},{0,0,273},{0,1,125},{0,1,125},{0,1,125},{0,1,221},{0,1,68},{0,0,146},{0,0,146},{0,0,17},{0,0,158},{0,0,53},{0,1,125},{0,1,125},{0,1,125},{0,1,221},{0,1,68},{0,0,146},{0,0,146},{0,0,17},{0,0,122},
+{0,0,17},{1,0,72},{0,1,8},{1,0,292},{1,0,260},{1,0,72},{1,0,104},{1,0,260},{0,0,272},{1,0,104},{0,0,272},{0,0,121},{0,0,121},{0,0,121},{0,0,121},{0,0,25},{0,0,25},{0,0,25},{0,0,1},{0,0,37},{0,0,37},{0,2,514},{0,2,558},{0,1,610},{0,1,514},{0,2,297},{0,1,153},{0,1,153},{0,1,777},{0,1,392},{0,0,802},{1,1,347},
+{1,1,355},{1,1,446},{0,1,465},{0,2,248},{0,1,104},{0,1,104},{0,1,728},{0,1,248},{0,1,728},{0,2,414},{0,2,414},{0,2,414},{0,1,414},{0,1,149},{0,1,53},{0,1,53},{0,0,274},{0,1,292},{0,0,226},{1,0,229},{1,0,229},{1,0,229},{1,0,261},{1,0,68},{0,1,4},{0,1,4},{1,0,160},{1,0,52},{1,0,160},{0,2,104},{1,1,130},{0,1,200},
+{0,1,104},{0,2,104},{2,0,74},{0,1,104},{0,1,584},{2,0,74},{0,1,584},{0,0,410},{0,0,410},{0,0,410},{0,0,410},{0,1,49},{0,1,49},{0,1,49},{0,0,130},{0,0,82},{0,0,82},{1,1,464},{1,1,400},{1,1,365},{1,1,397},{1,1,572},{0,2,499},{0,1,387},{0,1,435},{0,1,428},{0,1,188},{1,1,140},{1,1,76},{1,1,41},{1,1,73},{1,1,248},
+{1,1,216},{0,1,131},{0,1,179},{2,0,264},{0,1,179},{1,1,364},{1,1,364},{1,1,364},{1,1,396},{0,2,314},{0,1,386},{0,1,386},{0,1,434},{0,1,67},{0,1,187},{1,1,40},{1,1,40},{1,1,40},{1,1,72},{0,2,58},{0,1,130},{0,1,130},{0,1,178},{0,1,58},{0,1,178},{0,3,72},{1,1,40},{1,1,5},{1,1,37},{0,3,72},{1,1,72},{1,1,37},
+{0,1,170},{1,1,72},{0,1,170},{1,0,360},{1,0,360},{1,0,360},{1,0,360},{0,2,265},{0,2,265},{0,2,265},{0,1,265},{0,1,18},{0,1,18},{1,2,178},{1,2,202},{1,1,189},{1,1,157},{1,2,463},{1,1,316},{1,1,169},{1,1,238},{0,2,412},{0,1,124},{1,2,78},{1,2,102},{1,1,89},{1,1,57},{2,0,248},{0,2,99},{1,1,69},{0,1,99},{3,0,248},
+{0,1,99},{1,1,140},{1,1,140},{1,1,140},{1,1,108},{1,1,152},{1,1,120},{1,1,120},{1,1,189},{0,1,275},{0,1,75},{1,1,40},{1,1,40},{1,1,40},{1,1,8},{1,1,52},{1,1,20},{1,1,20},{0,1,50},{1,1,68},{0,1,50},{1,2,74},{1,2,98},{1,1,85},{1,1,53},{1,2,74},{0,2,74},{1,1,53},{0,1,74},{0,2,74},{0,1,74},{1,0,104},
+{1,0,104},{1,0,104},{1,0,104},{1,1,116},{1,1,116},{1,1,116},{1,1,164},{0,1,50},{0,1,50},{1,3,226},{1,2,106},{1,2,466},{1,1,429},{1,2,255},{1,2,207},{1,1,345},{1,1,238},{0,2,252},{1,1,298},{1,3,222},{1,2,102},{1,2,462},{1,1,425},{1,2,251},{1,2,203},{1,1,341},{1,1,234},{0,2,251},{1,1,234},{1,2,105},{1,2,105},{1,2,105},
+{1,1,204},{1,2,86},{1,1,120},{1,1,120},{1,1,13},{0,2,83},{1,1,73},{1,2,101},{1,2,101},{1,2,101},{1,1,200},{2,0,52},{1,1,116},{1,1,116},{1,1,9},{3,0,52},{1,1,9},{2,1,74},{1,2,2},{2,1,274},{2,1,234},{2,1,74},{4,0,74},{2,1,234},{0,1,234},{4,0,74},{0,1,234},{1,0,104},{1,0,104},{1,0,104},{1,0,104},{1,1,20},
+{1,1,20},{1,1,20},{1,1,4},{1,1,64},{1,1,64},{1,3,450},{1,2,522},{1,2,562},{1,2,490},{1,3,295},{1,2,127},{1,2,167},{1,1,750},{1,2,428},{1,1,714},{2,2,400},{2,2,416},{2,2,521},{1,2,454},{2,1,251},{1,2,91},{1,2,131},{0,2,651},{4,0,251},{0,2,651},{1,3,369},{1,3,369},{1,3,369},{1,2,369},{1,2,118},{1,2,46},{1,2,46},
+{1,1,221},{0,2,163},{1,1,185},{2,1,257},{2,1,257},{2,1,257},{2,1,281},{0,4,50},{1,2,10},{1,2,10},{2,1,146},{2,1,50},{2,1,146},{3,0,80},{2,2,160},{1,2,202},{1,2,130},{3,0,80},{3,1,80},{1,2,130},{0,1,650},{3,1,80},{0,1,650},{1,0,360},{1,0,360},{1,0,360},{1,0,360},{1,2,37},{1,2,37},{1,2,37},{1,1,100},{1,1,64},
+{1,1,64},{2,2,500},{2,2,444},{2,2,423},{2,2,463},{1,4,588},{1,3,513},{1,2,329},{1,2,441},{0,3,284},{1,2,232},{2,2,139},{2,2,83},{2,2,62},{2,2,102},{3,0,251},{2,2,243},{1,2,104},{1,2,216},{3,1,251},{1,2,216},{2,2,419},{2,2,419},{2,2,419},{2,2,459},{1,3,277},{1,2,325},{1,2,325},{1,2,437},{1,2,68},{1,2,228},{2,2,58},
+{2,2,58},{2,2,58},{2,2,98},{1,3,52},{1,2,100},{1,2,100},{1,2,212},{1,2,52},{1,2,212},{1,4,74},{2,2,34},{2,2,13},{1,2,40},{1,4,74},{2,2,74},{1,2,40},{0,2,200},{2,2,74},{0,2,200},{2,0,410},{2,0,410},{2,0,410},{2,0,410},{1,3,241},{1,3,241},{1,3,241},{1,2,241},{1,2,32},{1,2,32},{2,3,202},{2,3,250},{2,2,183},
+{2,2,159},{2,2,460},{2,2,316},{2,2,183},{2,2,274},{1,3,468},{1,2,104},{2,3,81},{2,3,129},{2,2,62},{2,2,38},{1,4,251},{1,3,96},{2,2,62},{1,2,88},{2,2,251},{1,2,88},{2,2,147},{2,2,147},{2,2,147},{2,2,123},{2,2,171},{2,2,147},{2,2,147},{2,2,238},{0,3,219},{1,2,68},{2,2,26},{2,2,26},{2,2,26},{2,2,2},{2,2,50},
+{2,2,26},{2,2,26},{1,2,52},{5,0,50},{1,2,52},{2,3,80},{2,3,128},{2,2,61},{2,2,37},{2,3,80},{6,0,80},{2,2,37},{0,2,72},{6,0,80},{0,2,72},{2,0,122},{2,0,122},{2,0,122},{2,0,122},{2,2,146},{2,2,146},{2,2,146},{2,2,202},{1,2,32},{1,2,32},{2,3,234},{2,3,90},{2,2,455},{2,2,367},{2,3,253},{2,3,253},{2,2,295},
+{2,2,210},{1,3,244},{2,2,282},{2,3,225},{2,3,81},{2,2,446},{2,2,358},{2,3,244},{2,3,244},{2,2,286},{2,2,201},{6,0,244},{2,2,201},{2,3,90},{2,3,90},{2,3,90},{2,2,171},{2,3,109},{2,2,99},{2,2,99},{2,2,14},{1,3,100},{2,2,86},{2,3,81},{2,3,81},{2,3,81},{2,2,162},{3,1,50},{2,2,90},{2,2,90},{2,2,5},{4,1,50},
+{2,2,5},{4,0,80},{2,3,0},{3,2,260},{3,2,212},{4,0,80},{5,1,80},{3,2,212},{0,2,200},{5,1,80},{0,2,200},{2,0,90},{2,0,90},{2,0,90},{2,0,90},{2,2,18},{2,2,18},{2,2,18},{2,2,10},{2,2,82},{2,2,82},{2,4,394},{2,3,442},{2,3,522},{2,3,474},{2,4,301},{2,3,109},{2,3,189},{2,2,658},{2,3,472},{2,2,634},{2,4,369},
+{2,3,417},{2,3,497},{2,3,449},{3,2,244},{2,3,84},{2,3,164},{3,2,620},{5,1,244},{3,2,620},{2,4,330},{2,4,330},{2,4,330},{2,3,330},{2,3,93},{2,3,45},{2,3,45},{2,2,174},{1,3,132},{2,2,150},{3,2,289},{3,2,289},{3,2,289},{3,2,305},{4,0,52},{2,3,20},{2,3,20},{3,2,136},{3,2,52},{3,2,136},{2,4,80},{2,3,128},{2,3,208},
+{2,3,160},{2,4,80},{7,0,80},{2,3,160},{0,2,584},{7,0,80},{0,2,584},{2,0,314},{2,0,314},{2,0,314},{2,0,314},{2,3,29},{2,3,29},{2,3,29},{2,2,74},{2,2,50},{2,2,50},{3,3,544},{3,3,496},{3,3,489},{3,3,537},{2,4,535},{2,4,535},{2,3,279},{2,3,455},{1,4,292},{2,3,284},{3,3,144},{3,3,96},{3,3,89},{3,3,137},{4,1,244},
+{3,3,276},{2,3,83},{2,3,259},{4,2,244},{2,3,259},{3,3,480},{3,3,480},{3,3,480},{3,3,528},{2,4,246},{2,3,270},{2,3,270},{2,3,446},{2,3,75},{2,3,275},{3,3,80},{3,3,80},{3,3,80},{3,3,128},{2,4,50},{2,3,74},{2,3,74},{2,3,250},{7,0,50},{2,3,250},{5,0,80},{3,3,32},{3,3,25},{2,3,34},{5,0,80},{6,1,80},{2,3,34},
+{0,3,234},{6,1,80},{0,3,234},{3,0,464},{3,0,464},{3,0,464},{3,0,464},{2,4,221},{2,4,221},{2,4,221},{2,3,221},{2,3,50},{2,3,50},{3,4,234},{3,3,304},{3,3,185},{3,3,169},{3,3,452},{3,3,324},{3,3,205},{3,3,318},{1,4,452},{2,3,92},{3,4,90},{3,3,160},{3,3,41},{3,3,25},{5,0,244},{2,4,99},{3,3,61},{2,3,83},{3,3,244},
+{2,3,83},{3,3,160},{3,3,160},{3,3,160},{3,3,144},{3,3,196},{3,3,180},{3,3,180},{3,3,293},{2,3,187},{2,3,67},{3,3,16},{3,3,16},{3,3,16},{3,3,0},{3,3,52},{3,3,36},{3,3,36},{2,3,58},{6,1,52},{2,3,58},{4,2,80},{3,3,160},{3,3,41},{3,3,25},{4,2,80},{5,2,80},{3,3,25},{0,3,74},{5,2,80},{0,3,74},{3,0,144},
+{3,0,144},{3,0,144},{3,0,144},{3,3,180},{3,3,180},{3,3,180},{3,3,244},{2,3,18},{2,3,18},{3,4,202},{3,4,82},{3,3,393},{3,3,313},{3,4,259},{3,4,307},{3,3,253},{3,3,190},{2,4,244},{3,3,274},{3,4,186},{3,4,66},{3,3,377},{3,3,297},{3,4,243},{2,4,243},{3,3,237},{3,3,174},{7,1,243},{3,3,174},{3,4,81},{3,4,81},{3,4,81},
+{3,3,144},{3,4,138},{3,3,84},{3,3,84},{3,3,21},{2,4,123},{3,3,105},{3,4,65},{3,4,65},{3,4,65},{3,3,128},{4,2,52},{3,3,68},{3,3,68},{3,3,5},{5,2,52},{3,3,5},{5,1,74},{3,4,2},{4,3,250},{4,3,194},{5,1,74},{4,3,74},{4,3,194},{0,3,170},{4,3,74},{0,3,170},{3,0,80},{3,0,80},{3,0,80},{3,0,80},{3,3,20},
+{3,3,20},{3,3,20},{3,3,20},{3,3,104},{3,3,104},{3,5,346},{3,4,370},{3,4,490},{3,4,466},{3,5,315},{3,4,99},{3,4,219},{3,3,574},{2,4,468},{3,3,562},{3,5,330},{3,4,354},{3,4,474},{3,4,450},{4,3,243},{3,4,83},{3,4,203},{3,3,558},{6,2,243},{3,3,558},{3,5,297},{3,5,297},{3,5,297},{3,4,297},{3,4,74},{3,4,50},{3,4,50},
+{3,3,133},{2,4,107},{3,3,121},{3,5,281},{3,5,281},{3,5,281},{3,4,281},{5,1,58},{3,4,34},{3,4,34},{3,3,117},{7,1,58},{3,3,117},{6,0,74},{3,4,98},{3,4,218},{3,4,194},{6,0,74},{3,4,74},{3,4,194},{0,3,522},{3,4,74},{0,3,522},{3,0,272},{3,0,272},{3,0,272},{3,0,272},{3,4,25},{3,4,25},{3,4,25},{3,3,52},{3,3,40},
+{3,3,40},{4,4,596},{4,4,556},{4,4,563},{4,4,619},{3,5,477},{3,4,477},{3,4,237},{3,4,477},{2,5,308},{3,4,344},{4,4,155},{4,4,115},{4,4,122},{4,4,178},{5,2,243},{2,5,299},{3,4,68},{2,4,308},{5,3,243},{2,4,308},{4,4,547},{4,4,547},{4,4,547},{4,4,603},{3,5,221},{3,4,221},{3,4,221},{3,4,461},{3,4,88},{3,4,328},{4,4,106},
+{4,4,106},{4,4,106},{4,4,162},{6,0,52},{3,4,52},{3,4,52},{2,4,292},{3,4,52},{2,4,292},{4,4,74},{4,4,34},{4,4,41},{3,4,32},{4,4,74},{7,2,74},{3,4,32},{0,4,272},{7,2,74},{0,4,272},{4,0,522},{4,0,522},{4,0,522},{4,0,522},{3,4,205},{3,4,205},{3,4,205},{3,4,205},{3,4,72},{3,4,72},{4,5,274},{4,4,300},{4,4,195},
+{4,4,187},{4,4,452},{4,4,340},{4,4,235},{4,4,370},{2,5,404},{3,4,88},{4,5,105},{4,4,131},{4,4,26},{4,4,18},{6,1,243},{3,5,108},{4,4,66},{3,4,84},{4,4,243},{3,4,84},{4,4,179},{4,4,179},{4,4,179},{4,4,171},{4,4,227},{4,4,219},{4,4,219},{4,4,354},{3,4,152},{3,4,72},{4,4,10},{4,4,10},{4,4,10},{4,4,2},{5,2,58},
+{4,4,50},{4,4,50},{3,4,68},{7,2,58},{3,4,68},{5,3,74},{4,4,130},{4,4,25},{4,4,17},{5,3,74},{6,3,74},{4,4,17},{0,4,80},{6,3,74},{0,4,80},{4,0,170},{4,0,170},{4,0,170},{4,0,170},{4,3,205},{4,3,205},{4,3,205},{4,3,269},{3,4,8},{3,4,8},{4,5,178},{4,5,82},{4,4,339},{4,4,267},{4,5,273},{4,5,369},{4,4,219},
+{4,4,178},{3,5,252},{4,4,274},{4,5,153},{4,5,57},{4,4,314},{4,4,242},{7,0,248},{3,5,204},{4,4,194},{4,4,153},{3,5,248},{4,4,153},{4,5,78},{4,5,78},{4,5,78},{4,4,123},{4,4,147},{4,4,75},{4,4,75},{4,4,34},{3,5,152},{4,4,130},{4,5,53},{4,5,53},{4,5,53},{4,4,98},{6,1,58},{4,4,50},{4,4,50},{4,4,9},{6,3,58},
+{4,4,9},{6,2,72},{4,5,8},{5,4,244},{5,4,180},{6,2,72},{5,4,72},{5,4,180},{0,4,144},{5,4,72},{0,4,144},{4,0,74},{4,0,74},{4,0,74},{4,0,74},{4,4,26},{4,4,26},{4,4,26},{4,4,34},{4,4,130},{4,4,130},{4,6,306},{4,5,306},{4,5,466},{4,5,466},{4,6,337},{4,5,97},{4,5,257},{4,4,498},{3,5,412},{4,4,498},{4,6,297},
+{4,5,297},{4,5,457},{4,5,457},{5,4,248},{4,5,88},{4,5,248},{4,4,489},{7,3,248},{4,4,489},{4,5,270},{4,5,270},{4,5,270},{4,5,270},{4,5,61},{4,5,61},{4,5,61},{4,4,98},{3,5,88},{4,4,98},{4,5,261},{4,5,261},{4,5,261},{4,5,261},{7,0,52},{4,5,52},{4,5,52},{4,4,89},{3,5,52},{4,4,89},{7,1,72},{4,5,72},{4,5,232},
+{3,5,232},{7,1,72},{4,5,72},{3,5,232},{0,4,464},{4,5,72},{0,4,464},{4,0,234},{4,0,234},{4,0,234},{4,0,234},{4,5,25},{4,5,25},{4,5,25},{4,4,34},{4,4,34},{4,4,34},{5,5,656},{5,5,624},{5,5,645},{5,5,709},{4,6,427},{4,5,403},{4,5,203},{4,5,507},{4,5,332},{4,5,412},{5,5,172},{5,5,140},{5,5,161},{5,5,225},{6,3,248},
+{4,5,259},{4,5,59},{3,5,339},{6,4,248},{3,5,339},{5,5,620},{5,5,620},{5,5,620},{5,5,684},{4,6,202},{4,5,178},{4,5,178},{4,5,482},{4,5,107},{4,5,387},{5,5,136},{5,5,136},{5,5,136},{5,5,200},{7,1,58},{4,5,34},{4,5,34},{5,4,290},{7,3,58},{5,4,290},{5,5,72},{5,5,40},{5,5,61},{4,5,34},{5,5,72},{3,6,72},{4,5,34},
+{0,5,314},{3,6,72},{0,5,314},{5,0,584},{5,0,584},{5,0,584},{5,0,584},{4,5,169},{4,5,169},{4,5,169},{4,5,193},{4,5,98},{4,5,98},{5,6,322},{5,5,304},{5,5,213},{5,5,213},{5,5,460},{5,5,364},{5,5,273},{5,5,430},{3,6,364},{4,5,92},{5,6,126},{5,5,108},{5,5,17},{5,5,17},{7,2,248},{4,6,123},{5,5,77},{4,5,91},{5,5,248},
+{4,5,91},{5,5,204},{5,5,204},{5,5,204},{5,5,204},{5,5,264},{5,5,264},{5,5,264},{5,5,421},{4,5,123},{4,5,83},{5,5,8},{5,5,8},{5,5,8},{5,5,8},{6,3,52},{5,5,68},{5,5,68},{4,5,82},{6,4,52},{4,5,82},{6,4,72},{5,5,104},{5,5,13},{5,5,13},{6,4,72},{7,4,72},{5,5,13},{0,5,90},{7,4,72},{0,5,90},{5,0,200},
+{5,0,200},{5,0,200},{5,0,200},{5,4,221},{5,4,221},{5,4,221},{5,4,277},{4,5,2},{4,5,2},{5,6,162},{5,6,90},{5,5,293},{5,5,229},{5,6,295},{5,5,396},{5,5,193},{5,5,174},{4,6,268},{5,5,282},{5,6,126},{5,6,54},{5,5,257},{5,5,193},{5,6,259},{4,6,171},{5,5,157},{5,5,138},{4,6,259},{5,5,138},{5,6,81},{5,6,81},{5,6,81},
+{5,5,108},{5,5,136},{5,5,72},{5,5,72},{5,5,53},{4,6,187},{5,5,161},{5,6,45},{5,6,45},{5,6,45},{5,5,72},{7,2,52},{5,5,36},{5,5,36},{5,5,17},{5,5,52},{5,5,17},{7,3,74},{5,6,18},{5,5,221},{5,5,157},{7,3,74},{6,5,74},{5,5,157},{0,5,122},{6,5,74},{0,5,122},{5,0,72},{5,0,72},{5,0,72},{5,0,72},{5,5,36},
+{5,5,36},{5,5,36},{5,5,52},{5,5,160},{5,5,160},{5,7,274},{5,6,250},{5,6,450},{5,6,474},{5,6,343},{5,6,103},{5,6,303},{5,5,430},{4,6,364},{5,5,442},{5,7,270},{5,6,246},{5,6,446},{5,6,470},{7,3,251},{5,6,99},{5,6,299},{5,5,426},{6,5,251},{5,5,426},{5,6,225},{5,6,225},{5,6,225},{5,6,249},{5,6,54},{5,6,78},{5,6,78},
+{5,5,69},{4,6,75},{5,5,81},{5,6,221},{5,6,221},{5,6,221},{5,6,245},{5,6,50},{5,6,74},{5,6,74},{5,5,65},{4,6,50},{5,5,65},{5,7,74},{5,6,50},{5,6,250},{4,6,250},{5,7,74},{5,6,74},{4,6,250},{0,5,410},{5,6,74},{0,5,410},{5,0,200},{5,0,200},{5,0,200},{5,0,200},{5,6,29},{5,6,29},{5,6,29},{5,5,20},{5,5,32},
+{5,5,32},{6,6,724},{6,6,700},{6,6,735},{5,6,690},{5,7,385},{5,6,337},{5,6,177},{5,6,545},{5,6,328},{5,6,488},{6,6,195},{6,6,171},{6,6,206},{6,6,278},{7,4,259},{5,6,216},{5,6,56},{4,6,376},{7,5,259},{4,6,376},{5,7,654},{5,7,654},{5,7,654},{5,6,654},{5,7,189},{5,6,141},{5,6,141},{5,6,509},{5,6,132},{5,5,450},{6,5,157},
+{6,5,157},{6,5,157},{6,5,221},{6,5,52},{5,6,20},{5,6,20},{6,5,256},{3,7,52},{6,5,256},{6,6,74},{6,6,50},{6,6,85},{5,6,40},{6,6,74},{4,7,74},{5,6,40},{0,6,360},{4,7,74},{0,6,360},{5,0,650},{5,0,650},{5,0,650},{5,0,650},{5,6,137},{5,6,137},{5,6,137},{5,6,185},{5,6,128},{5,6,128},{6,7,378},{6,6,316},{6,6,239},
+{6,6,247},{6,6,476},{6,6,396},{6,6,319},{5,6,465},{4,7,332},{5,6,104},{6,7,153},{6,6,91},{6,6,14},{6,6,22},{6,6,251},{5,7,144},{6,6,94},{5,6,104},{4,7,251},{5,6,104},{6,6,235},{6,6,235},{6,6,235},{6,6,243},{6,6,307},{6,6,315},{6,6,315},{5,6,461},{5,6,100},{5,6,100},{6,6,10},{6,6,10},{6,6,10},{6,6,18},{7,4,50},
+{6,6,90},{6,6,90},{5,6,100},{7,5,50},{5,6,100},{7,5,74},{6,6,82},{6,6,5},{6,6,13},{7,5,74},{6,6,90},{6,6,13},{0,6,104},{6,6,90},{0,6,104},{6,0,234},{6,0,234},{6,0,234},{6,0,234},{6,5,241},{6,5,241},{6,5,241},{6,5,289},{5,6,0},{5,6,0},{6,7,154},{6,7,106},{6,6,255},{6,6,199},{6,7,325},{6,6,364},{6,6,175},
+{6,6,178},{5,7,292},{5,6,232},{6,7,105},{6,7,57},{6,6,206},{6,6,150},{7,5,251},{5,7,144},{6,6,126},{6,6,129},{5,7,276},{6,6,129},{6,7,90},{6,7,90},{6,7,90},{6,6,99},{6,6,131},{6,6,75},{6,6,75},{6,6,78},{6,6,219},{5,6,132},{6,7,41},{6,7,41},{6,7,41},{6,6,50},{7,5,82},{6,6,26},{6,6,26},{6,6,29},{6,6,50},
+{6,6,29},{6,7,80},{6,7,32},{6,6,181},{6,6,125},{6,7,80},{7,6,80},{6,6,125},{0,6,104},{7,6,80},{0,6,104},{6,0,74},{6,0,74},{6,0,74},{6,0,74},{6,6,50},{6,6,50},{6,6,50},{6,6,74},{5,6,128},{5,6,128},{6,7,442},{6,7,202},{6,7,442},{6,7,490},{6,7,309},{6,7,117},{6,7,357},{6,6,370},{5,7,324},{6,6,394},{6,7,441},
+{6,7,201},{6,7,441},{6,7,489},{7,6,276},{6,7,116},{6,7,356},{6,6,369},{7,6,244},{6,6,369},{6,7,186},{6,7,186},{6,7,186},{6,7,234},{6,7,53},{6,7,101},{6,7,101},{6,6,46},{5,7,68},{6,6,70},{6,7,185},{6,7,185},{6,7,185},{6,7,233},{6,7,52},{6,7,100},{6,7,100},{6,6,45},{5,7,52},{6,6,45},{7,6,80},{6,7,32},{6,7,272},
+{5,7,272},{7,6,80},{6,7,80},{5,7,272},{0,6,360},{6,7,80},{0,6,360},{6,0,170},{6,0,170},{6,0,170},{6,0,170},{6,7,37},{6,7,37},{6,7,37},{6,6,10},{6,6,34},{6,6,34},{7,7,800},{7,7,784},{6,7,802},{6,7,634},{6,7,903},{6,7,279},{6,7,159},{6,7,591},{6,7,332},{6,7,572},{7,7,224},{7,7,208},{7,7,257},{7,7,337},{7,6,339},
+{6,7,179},{6,7,59},{5,7,419},{6,7,251},{5,7,419},{7,6,745},{7,6,745},{7,6,745},{6,7,585},{6,7,278},{6,7,110},{6,7,110},{6,6,469},{6,7,163},{6,6,385},{7,6,169},{7,6,169},{7,6,169},{7,6,225},{7,6,50},{6,7,10},{6,7,10},{7,6,226},{7,6,82},{7,6,226},{7,7,80},{7,7,64},{7,7,113},{6,7,50},{7,7,80},{7,7,144},{6,7,50},
+{0,7,410},{7,7,144},{0,7,410},{6,0,584},{6,0,584},{6,0,584},{6,0,584},{6,7,109},{6,7,109},{6,7,109},{6,7,181},{6,6,160},{6,6,160},{7,7,393},{7,7,321},{7,7,272},{7,7,288},{7,7,477},{7,7,421},{7,7,372},{6,7,446},{6,7,483},{6,7,123},{7,7,137},{7,7,65},{7,7,16},{7,7,32},{7,7,221},{7,7,165},{7,7,116},{6,7,122},{7,7,261},
+{6,7,122},{7,7,272},{7,7,272},{7,7,272},{7,7,288},{7,7,356},{7,7,372},{7,7,372},{6,7,446},{6,7,83},{6,7,123},{7,7,16},{7,7,16},{7,7,16},{7,7,32},{7,7,100},{7,7,116},{7,7,116},{6,7,122},{6,7,82},{6,7,122},{7,7,121},{7,7,49},{7,7,0},{7,7,16},{7,7,121},{7,7,65},{7,7,16},{0,7,121},{7,7,65},{0,7,121},{7,0,272},
+{7,0,272},{7,0,272},{7,0,272},{7,6,265},{7,6,265},{7,6,265},{7,6,305},{6,7,2},{6,7,2},{7,7,265},{7,7,193},{7,7,144},{7,7,96},{7,7,253},{7,7,133},{7,7,84},{7,7,109},{7,7,297},{6,7,107},{7,7,201},{7,7,129},{7,7,80},{7,7,32},{7,7,189},{7,7,69},{7,7,20},{7,7,45},{7,7,101},{7,7,45},{7,7,144},{7,7,144},{7,7,144},
+{7,7,96},{7,7,132},{7,7,84},{7,7,84},{7,7,109},{7,7,248},{6,7,107},{7,7,80},{7,7,80},{7,7,80},{7,7,32},{7,7,68},{7,7,20},{7,7,20},{7,7,45},{7,7,52},{7,7,45},{7,7,185},{7,7,113},{7,7,64},{7,7,16},{7,7,185},{7,7,65},{7,7,16},{0,7,9},{7,7,65},{0,7,9},{7,0,80},{7,0,80},{7,0,80},{7,0,80},{7,7,68},
+{7,7,68},{7,7,68},{7,7,100},{6,7,98},{6,7,98},{7,7,386},{7,7,314},{7,7,265},{7,7,193},{7,7,278},{7,7,134},{7,7,85},{7,7,4},{7,7,138},{7,7,40},{7,7,386},{7,7,314},{7,7,265},{7,7,193},{7,7,278},{7,7,134},{7,7,85},{7,7,4},{7,7,102},{7,7,4},{7,7,265},{7,7,265},{7,7,265},{7,7,193},{7,7,157},{7,7,85},{7,7,85},
+{7,7,4},{7,7,89},{7,7,40},{7,7,265},{7,7,265},{7,7,265},{7,7,193},{7,7,157},{7,7,85},{7,7,85},{7,7,4},{7,7,53},{7,7,4},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{7,0,144},{7,0,144},{7,0,144},{7,0,144},{7,7,36},{7,7,36},{7,7,36},{7,7,4},{7,7,40},
+{7,7,40},{0,1,200},{0,1,104},{0,0,153},{0,0,145},{0,1,561},{0,0,398},{0,0,181},{0,0,308},{0,0,498},{0,0,344},{0,1,200},{0,1,104},{0,0,153},{0,0,145},{0,1,561},{0,0,398},{0,0,181},{0,0,308},{0,0,462},{0,0,308},{0,0,9},{0,0,9},{0,0,9},{0,0,1},{0,0,45},{0,0,37},{0,0,37},{0,0,164},{0,0,137},{0,0,200},{0,0,9},
+{0,0,9},{0,0,9},{0,0,1},{0,0,45},{0,0,37},{0,0,37},{0,0,164},{0,0,101},{0,0,164},{0,1,200},{0,1,104},{0,0,153},{0,0,145},{0,1,200},{1,0,232},{0,0,145},{0,0,208},{1,0,232},{0,0,208},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,2,232},{0,1,40},{0,1,200},
+{0,1,392},{0,1,689},{0,1,593},{0,0,485},{0,0,500},{0,0,914},{0,0,536},{0,2,232},{0,1,40},{0,1,200},{0,1,392},{0,1,689},{0,1,593},{0,0,485},{0,0,500},{1,0,761},{0,0,500},{0,1,4},{0,1,4},{0,1,4},{0,0,49},{0,0,157},{0,0,85},{0,0,85},{0,0,100},{0,0,185},{0,0,136},{0,1,4},{0,1,4},{0,1,4},{0,0,49},{0,0,157},
+{0,0,85},{0,0,85},{0,0,100},{0,0,149},{0,0,100},{1,0,200},{0,1,40},{0,1,200},{0,1,392},{1,0,200},{1,0,232},{0,1,392},{0,0,400},{1,0,232},{0,0,400},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,2,281},{0,2,149},{0,1,121},{0,1,121},{0,2,808},{0,1,376},{0,1,216},
+{0,0,857},{0,1,1169},{0,0,893},{0,2,281},{0,2,149},{0,1,121},{0,1,121},{1,0,728},{0,1,376},{0,1,216},{0,0,857},{1,0,744},{0,0,857},{0,1,85},{0,1,85},{0,1,85},{0,1,85},{0,1,180},{0,1,180},{0,1,180},{0,0,73},{0,0,270},{0,0,109},{0,1,85},{0,1,85},{0,1,85},{0,1,85},{0,1,180},{0,1,180},{0,1,180},{0,0,73},{0,0,234},
+{0,0,73},{0,2,232},{0,2,100},{0,1,72},{0,1,72},{0,2,232},{2,0,202},{0,1,72},{0,1,712},{2,0,202},{0,1,712},{0,0,49},{0,0,49},{0,0,49},{0,0,49},{0,0,1},{0,0,1},{0,0,1},{0,0,9},{0,0,45},{0,0,45},{0,3,427},{0,2,229},{0,1,425},{0,1,233},{0,2,744},{0,1,504},{0,1,24},{0,1,584},{0,1,1105},{0,1,945},{0,3,427},
+{0,2,229},{1,1,414},{0,1,233},{0,2,744},{0,1,504},{0,1,24},{0,1,584},{2,0,739},{0,1,584},{0,2,229},{0,2,229},{0,2,229},{0,1,229},{0,1,212},{0,1,20},{0,1,20},{0,0,281},{0,0,590},{0,0,317},{0,2,229},{0,2,229},{0,2,229},{0,1,229},{1,0,180},{0,1,20},{0,1,20},{0,0,281},{1,0,164},{0,0,281},{0,3,202},{0,2,4},{1,1,53},
+{0,1,8},{0,3,202},{1,1,202},{0,1,8},{0,1,328},{1,1,202},{0,1,328},{0,0,225},{0,0,225},{0,0,225},{0,0,225},{0,1,16},{0,1,16},{0,1,16},{0,0,25},{0,0,61},{0,0,61},{0,4,842},{0,3,740},{1,1,1125},{0,1,834},{0,3,744},{0,2,267},{0,1,283},{0,1,267},{0,1,1356},{0,1,476},{1,2,302},{1,2,230},{1,1,225},{1,1,225},{0,3,728},
+{0,2,251},{0,1,267},{0,1,251},{1,1,728},{0,1,251},{0,3,596},{0,3,596},{0,3,596},{0,1,713},{0,2,186},{0,1,162},{0,1,162},{0,1,146},{0,1,395},{0,1,355},{1,1,104},{1,1,104},{1,1,104},{1,1,104},{0,2,170},{0,1,146},{0,1,146},{0,1,130},{0,1,170},{0,1,130},{1,2,202},{1,2,130},{1,1,125},{1,1,125},{1,2,202},{0,2,202},{1,1,125},
+{0,1,202},{0,2,202},{0,1,202},{0,0,592},{0,0,592},{0,0,592},{0,0,592},{0,1,41},{0,1,41},{0,1,41},{0,1,65},{0,0,200},{0,0,200},{1,3,738},{1,2,522},{1,2,722},{1,1,885},{0,4,987},{0,2,443},{0,2,379},{0,1,507},{0,2,1100},{0,1,412},{1,3,254},{1,2,38},{1,2,238},{1,1,401},{1,2,739},{0,2,299},{0,2,235},{0,1,363},{0,2,739},
+{0,1,363},{1,2,497},{1,2,497},{1,2,497},{1,1,524},{0,3,324},{0,2,154},{0,2,154},{0,1,146},{0,1,411},{0,1,51},{1,2,13},{1,2,13},{1,2,13},{1,1,40},{1,1,164},{0,2,10},{0,2,10},{0,1,2},{1,1,180},{0,1,2},{2,1,202},{1,2,34},{1,2,234},{0,2,234},{2,1,202},{4,0,202},{0,2,234},{0,1,362},{4,0,202},{0,1,362},{1,0,488},
+{1,0,488},{1,0,488},{1,0,488},{0,2,145},{0,2,145},{0,2,145},{0,1,145},{0,1,50},{0,1,50},{1,3,450},{1,3,362},{1,2,306},{1,2,330},{1,2,1015},{1,2,583},{1,2,463},{1,1,990},{0,2,940},{0,1,860},{1,3,254},{1,3,166},{1,2,110},{1,2,134},{0,4,731},{0,3,288},{0,2,155},{1,1,794},{2,1,731},{1,1,794},{1,2,257},{1,2,257},{1,2,257},
+{1,2,281},{1,2,390},{1,1,392},{1,1,392},{1,1,261},{0,2,315},{0,1,131},{1,2,61},{1,2,61},{1,2,61},{1,2,85},{2,0,164},{0,2,106},{0,2,106},{1,1,65},{3,0,164},{1,1,65},{3,0,208},{1,3,130},{1,2,74},{0,2,74},{3,0,208},{3,1,208},{0,2,74},{0,1,778},{3,1,208},{0,1,778},{1,0,232},{1,0,232},{1,0,232},{1,0,232},{1,1,196},
+{1,1,196},{1,1,196},{1,1,212},{0,1,82},{0,1,82},{1,4,440},{1,3,234},{1,2,402},{1,2,234},{1,3,767},{1,2,503},{1,2,63},{1,2,687},{0,3,1140},{0,2,396},{1,4,404},{1,3,198},{1,2,366},{1,2,198},{1,3,731},{0,3,224},{1,2,27},{0,2,387},{1,2,731},{0,2,387},{1,3,233},{1,3,233},{1,3,233},{1,2,233},{1,2,230},{1,2,62},{1,2,62},
+{1,1,277},{0,2,203},{1,1,385},{1,3,197},{1,3,197},{1,3,197},{1,2,197},{0,4,162},{1,2,26},{1,2,26},{1,1,241},{2,1,162},{1,1,241},{2,2,208},{1,3,2},{2,2,65},{1,2,2},{2,2,208},{5,0,208},{1,2,2},{0,2,362},{5,0,208},{0,2,362},{1,0,232},{1,0,232},{1,0,232},{1,0,232},{1,2,61},{1,2,61},{1,2,61},{1,1,52},{0,2,34},
+{0,2,34},{1,5,810},{1,3,702},{1,2,1122},{1,2,738},{1,4,748},{1,3,281},{1,2,225},{1,2,273},{0,3,780},{0,2,252},{2,3,329},{2,3,281},{2,2,222},{2,2,230},{2,2,731},{1,3,272},{1,2,216},{0,2,216},{5,0,731},{0,2,216},{1,4,547},{1,4,547},{1,4,547},{1,2,638},{1,3,173},{1,2,125},{1,2,125},{1,2,173},{0,3,339},{0,2,152},{2,2,122},
+{2,2,122},{2,2,122},{2,2,130},{1,3,164},{1,2,116},{1,2,116},{0,2,116},{1,2,164},{0,2,116},{2,3,208},{2,3,160},{2,2,101},{2,2,109},{2,3,208},{6,0,208},{2,2,109},{0,2,200},{6,0,208},{0,2,200},{1,0,538},{1,0,538},{1,0,538},{1,0,538},{1,2,25},{1,2,25},{1,2,25},{1,2,73},{0,2,52},{0,2,52},{2,3,810},{2,3,570},{2,3,810},
+{2,2,887},{1,4,940},{1,3,393},{1,3,393},{1,2,449},{0,3,1004},{1,2,392},{2,3,281},{2,3,41},{2,3,281},{2,2,358},{3,1,731},{1,3,272},{1,3,272},{1,2,328},{4,1,731},{1,2,328},{2,3,554},{2,3,554},{2,3,554},{2,2,563},{1,4,315},{1,3,137},{1,3,137},{1,2,125},{0,3,163},{1,2,68},{2,3,25},{2,3,25},{2,3,25},{2,2,34},{2,2,162},
+{1,3,16},{1,3,16},{1,2,4},{5,0,162},{1,2,4},{4,0,208},{2,3,32},{2,3,272},{1,3,272},{4,0,208},{5,1,208},{1,3,272},{0,2,328},{5,1,208},{0,2,328},{2,0,538},{2,0,538},{2,0,538},{2,0,538},{1,3,121},{1,3,121},{1,3,121},{1,2,121},{1,2,64},{1,2,64},{2,4,458},{2,3,410},{2,3,330},{2,3,378},{2,3,1013},{2,3,629},{1,3,505},
+{2,2,962},{0,4,772},{1,2,776},{2,4,233},{2,3,185},{2,3,105},{2,3,153},{4,0,724},{1,4,323},{1,3,144},{2,2,737},{3,2,724},{2,2,737},{2,3,266},{2,3,266},{2,3,266},{2,3,314},{2,3,437},{2,2,395},{2,2,395},{2,2,286},{1,3,356},{1,2,100},{2,3,41},{2,3,41},{2,3,41},{2,3,89},{3,1,162},{1,3,80},{1,3,80},{2,2,61},{4,1,162},
+{2,2,61},{2,4,208},{2,3,160},{2,3,80},{1,3,80},{2,4,208},{7,0,208},{1,3,80},{0,2,712},{7,0,208},{0,2,712},{2,0,250},{2,0,250},{2,0,250},{2,0,250},{2,2,226},{2,2,226},{2,2,226},{2,2,250},{1,2,64},{1,2,64},{2,5,436},{2,4,222},{2,3,362},{2,3,218},{2,4,773},{2,3,485},{2,3,85},{2,3,773},{0,4,804},{1,3,452},{2,5,387},
+{2,4,173},{2,3,313},{2,3,169},{2,4,724},{1,4,211},{2,3,36},{1,3,436},{7,0,724},{1,3,436},{2,4,218},{2,4,218},{2,4,218},{2,3,218},{2,3,229},{2,3,85},{2,3,85},{2,2,254},{1,3,196},{2,2,374},{2,4,169},{2,4,169},{2,4,169},{2,3,169},{4,0,164},{2,3,36},{2,3,36},{2,2,205},{3,2,164},{2,2,205},{3,3,202},{2,4,4},{3,3,81},
+{2,3,0},{3,3,202},{6,1,202},{2,3,0},{0,3,400},{6,1,202},{0,3,400},{2,0,218},{2,0,218},{2,0,218},{2,0,218},{2,3,85},{2,3,85},{2,3,85},{2,2,58},{1,3,52},{1,3,52},{2,5,760},{2,4,618},{2,3,1010},{2,3,650},{2,5,760},{2,4,303},{2,3,175},{2,3,287},{1,4,788},{1,3,236},{3,4,362},{3,4,338},{3,3,225},{3,3,241},{3,3,724},
+{2,4,299},{2,3,171},{1,3,211},{6,1,724},{1,3,211},{2,5,504},{2,5,504},{2,5,504},{2,3,569},{2,4,166},{2,3,94},{2,3,94},{2,3,206},{0,4,363},{1,3,155},{3,3,144},{3,3,144},{3,3,144},{3,3,160},{2,4,162},{2,3,90},{2,3,90},{1,3,130},{7,0,162},{1,3,130},{4,2,208},{2,4,130},{3,3,81},{3,3,97},{4,2,208},{5,2,208},{3,3,97},
+{0,3,202},{5,2,208},{0,3,202},{2,0,488},{2,0,488},{2,0,488},{2,0,488},{2,3,13},{2,3,13},{2,3,13},{2,3,85},{1,3,34},{1,3,34},{3,4,842},{3,4,626},{3,4,906},{3,3,897},{2,5,888},{2,4,351},{2,4,415},{2,3,399},{0,5,788},{2,3,380},{3,4,266},{3,4,50},{3,4,330},{3,3,321},{4,2,724},{2,4,251},{2,4,315},{2,3,299},{5,2,724},
+{2,3,299},{3,4,617},{3,4,617},{3,4,617},{3,3,608},{2,5,312},{2,4,126},{2,4,126},{2,3,110},{1,4,164},{2,3,91},{3,4,41},{3,4,41},{3,4,41},{3,3,32},{3,3,164},{2,4,26},{2,4,26},{2,3,10},{6,1,164},{2,3,10},{5,1,202},{3,4,34},{3,4,314},{3,3,305},{5,1,202},{4,3,202},{3,3,305},{0,3,298},{4,3,202},{0,3,298},{3,0,592},
+{3,0,592},{3,0,592},{3,0,592},{2,4,101},{2,4,101},{2,4,101},{2,3,101},{2,3,82},{2,3,82},{3,5,474},{3,4,402},{3,4,362},{3,4,434},{3,4,1019},{3,4,683},{2,4,463},{3,3,942},{1,5,804},{2,3,700},{3,5,218},{3,4,146},{3,4,106},{3,4,178},{5,1,723},{2,5,364},{2,4,139},{3,3,686},{4,3,723},{3,3,686},{3,4,281},{3,4,281},{3,4,281},
+{3,4,353},{3,4,490},{2,4,382},{2,4,382},{3,3,317},{1,4,324},{2,3,75},{3,4,25},{3,4,25},{3,4,25},{3,4,97},{4,2,164},{2,4,58},{2,4,58},{3,3,61},{5,2,164},{3,3,61},{6,0,202},{3,4,130},{3,4,90},{2,4,90},{6,0,202},{3,4,202},{2,4,90},{0,3,650},{3,4,202},{0,3,650},{3,0,272},{3,0,272},{3,0,272},{3,0,272},{3,3,260},
+{3,3,260},{3,3,260},{3,3,292},{2,3,50},{2,3,50},{3,6,440},{3,5,218},{3,4,330},{3,4,210},{3,5,787},{3,4,475},{3,4,115},{3,4,867},{1,5,772},{2,4,516},{3,6,376},{3,5,154},{3,4,266},{3,4,146},{6,0,723},{2,5,204},{3,4,51},{1,4,478},{3,4,723},{1,4,478},{3,5,209},{3,5,209},{3,5,209},{3,4,209},{3,4,234},{3,4,114},{3,4,114},
+{3,3,237},{2,4,195},{3,3,369},{3,5,145},{3,5,145},{3,5,145},{3,4,145},{5,1,170},{3,4,50},{3,4,50},{3,3,173},{7,1,170},{3,3,173},{4,4,200},{3,5,10},{4,4,101},{3,4,2},{4,4,200},{7,2,200},{3,4,2},{0,4,442},{7,2,200},{0,4,442},{3,0,208},{3,0,208},{3,0,208},{3,0,208},{3,3,100},{3,3,100},{3,3,100},{3,3,68},{2,4,74},
+{2,4,74},{3,6,692},{3,5,542},{3,4,906},{3,4,570},{3,6,780},{3,5,333},{3,4,133},{3,4,309},{2,5,804},{2,4,228},{4,5,401},{4,4,395},{4,4,234},{4,4,258},{4,4,723},{3,5,332},{3,4,132},{2,4,212},{7,2,723},{2,4,212},{3,6,467},{3,6,467},{3,6,467},{3,4,506},{3,5,165},{3,4,69},{3,4,69},{3,4,245},{1,5,324},{2,4,164},{4,4,170},
+{4,4,170},{4,4,170},{4,4,194},{6,0,164},{3,4,68},{3,4,68},{2,4,148},{3,4,164},{2,4,148},{5,3,202},{3,5,100},{4,4,65},{4,4,89},{5,3,202},{6,3,202},{4,4,89},{0,4,208},{6,3,202},{0,4,208},{3,0,442},{3,0,442},{3,0,442},{3,0,442},{3,4,5},{3,4,5},{3,4,5},{3,4,101},{2,4,20},{2,4,20},{4,5,882},{4,5,690},{4,4,955},
+{4,4,915},{3,6,844},{3,5,317},{3,5,445},{3,4,357},{1,6,772},{3,4,376},{4,5,257},{4,5,65},{4,4,330},{4,4,290},{5,3,723},{3,5,236},{3,5,364},{3,4,276},{6,3,723},{3,4,276},{4,5,686},{4,5,686},{4,5,686},{4,4,659},{3,6,315},{3,5,121},{3,5,121},{3,4,101},{2,5,171},{3,4,120},{4,5,61},{4,5,61},{4,5,61},{4,4,34},{5,2,170},
+{3,5,40},{3,5,40},{3,4,20},{7,2,170},{3,4,20},{6,2,200},{4,5,40},{4,4,305},{4,4,265},{6,2,200},{5,4,200},{4,4,265},{0,4,272},{5,4,200},{0,4,272},{4,0,650},{4,0,650},{4,0,650},{4,0,650},{3,5,85},{3,5,85},{3,5,85},{3,4,85},{3,4,104},{3,4,104},{4,6,498},{4,5,402},{4,5,402},{4,5,498},{3,7,1017},{3,6,700},{3,5,429},
+{3,4,917},{2,6,844},{3,4,632},{4,6,209},{4,5,113},{4,5,113},{4,5,209},{6,2,728},{3,6,411},{3,5,140},{3,4,628},{5,4,728},{3,4,628},{4,5,302},{4,5,302},{4,5,302},{4,5,398},{3,6,459},{3,5,329},{3,5,329},{3,4,341},{2,5,283},{3,4,56},{4,5,13},{4,5,13},{4,5,13},{4,5,109},{6,1,170},{3,5,40},{3,5,40},{3,4,52},{6,3,170},
+{3,4,52},{7,1,200},{4,5,104},{4,5,104},{3,5,104},{7,1,200},{4,5,200},{3,5,104},{0,4,592},{4,5,200},{0,4,592},{4,0,298},{4,0,298},{4,0,298},{4,0,298},{4,4,298},{4,4,298},{4,4,298},{3,4,325},{3,4,40},{3,4,40},{4,7,452},{4,6,222},{4,5,306},{4,5,210},{4,6,809},{4,5,473},{4,5,153},{4,5,969},{2,6,748},{3,5,588},{4,7,371},
+{4,6,141},{4,5,225},{4,5,129},{7,1,728},{3,6,203},{4,5,72},{2,5,513},{4,5,728},{2,5,513},{4,6,206},{4,6,206},{4,6,206},{4,5,206},{4,5,245},{4,5,149},{4,5,149},{4,4,226},{3,5,200},{4,4,370},{4,6,125},{4,6,125},{4,6,125},{4,5,125},{7,0,164},{4,5,68},{4,5,68},{4,4,145},{3,5,164},{4,4,145},{5,5,202},{4,6,20},{4,5,104},
+{4,5,8},{5,5,202},{3,6,202},{4,5,8},{0,5,488},{3,6,202},{0,5,488},{4,0,202},{4,0,202},{4,0,202},{4,0,202},{4,4,106},{4,4,106},{4,4,106},{4,4,82},{3,5,100},{3,5,100},{4,7,632},{4,6,474},{4,5,810},{4,5,498},{4,7,808},{4,6,371},{4,5,99},{4,5,339},{3,6,828},{3,5,228},{5,6,446},{5,5,396},{5,5,249},{5,5,281},{5,5,728},
+{4,6,371},{4,5,99},{3,5,219},{3,6,728},{3,5,219},{4,7,436},{4,7,436},{4,7,436},{4,5,449},{4,6,170},{4,5,50},{4,5,50},{4,5,290},{2,6,291},{3,5,179},{5,5,200},{5,5,200},{5,5,200},{5,5,232},{7,1,170},{4,5,50},{4,5,50},{3,5,170},{7,3,170},{3,5,170},{6,4,200},{4,6,74},{5,5,53},{5,5,85},{6,4,200},{7,4,200},{5,5,85},
+{0,5,218},{7,4,200},{0,5,218},{4,0,400},{4,0,400},{4,0,400},{4,0,400},{4,5,1},{4,5,1},{4,5,1},{4,4,100},{3,5,10},{3,5,10},{5,6,930},{5,6,762},{5,5,973},{5,5,941},{4,7,808},{4,6,291},{4,6,483},{4,5,323},{2,7,764},{4,5,380},{5,6,254},{5,6,86},{5,5,297},{5,5,265},{6,4,728},{4,6,227},{5,5,373},{4,5,259},{7,4,728},
+{4,5,259},{5,5,748},{5,5,748},{5,5,748},{5,5,716},{4,6,298},{4,6,122},{4,6,122},{4,5,98},{3,6,184},{4,5,155},{5,5,72},{5,5,72},{5,5,72},{5,5,40},{6,3,164},{4,6,58},{4,6,58},{4,5,34},{6,4,164},{4,5,34},{7,3,202},{5,6,50},{5,5,261},{5,5,229},{7,3,202},{6,5,202},{5,5,229},{0,5,250},{6,5,202},{0,5,250},{5,0,712},
+{5,0,712},{5,0,712},{5,0,712},{4,6,73},{4,6,73},{4,6,73},{4,5,73},{4,5,130},{4,5,130},{5,7,530},{5,6,410},{5,6,450},{5,6,570},{5,6,1055},{4,7,720},{4,6,403},{4,5,819},{3,7,892},{4,5,572},{5,7,206},{5,6,86},{5,6,126},{5,6,246},{5,6,731},{3,7,387},{4,6,147},{4,5,563},{4,6,731},{4,5,563},{5,6,329},{5,6,329},{5,6,329},
+{5,5,428},{4,7,420},{4,6,282},{4,6,282},{4,5,290},{3,6,248},{4,5,43},{5,6,5},{5,6,5},{5,6,5},{5,5,104},{7,2,164},{4,6,26},{4,6,26},{4,5,34},{5,5,164},{4,5,34},{5,7,202},{5,6,82},{5,6,122},{4,6,122},{5,7,202},{5,6,202},{4,6,122},{0,5,538},{5,6,202},{0,5,538},{5,0,328},{5,0,328},{5,0,328},{5,0,328},{4,6,281},
+{4,6,281},{4,6,281},{4,5,281},{4,5,34},{4,5,34},{5,7,498},{5,7,234},{5,6,290},{5,6,218},{5,7,839},{5,6,479},{5,6,199},{5,6,1079},{3,7,732},{4,6,668},{5,7,398},{5,7,134},{5,6,190},{5,6,118},{6,5,731},{4,7,208},{5,6,99},{3,6,554},{3,7,731},{3,6,554},{5,7,209},{5,7,209},{5,7,209},{5,6,209},{5,6,262},{5,6,190},{5,6,190},
+{5,5,221},{4,6,211},{4,5,315},{5,7,109},{5,7,109},{5,7,109},{5,6,109},{5,6,162},{5,6,90},{5,6,90},{5,5,121},{4,6,162},{5,5,121},{7,4,208},{5,7,34},{5,6,90},{5,6,18},{7,4,208},{7,5,208},{5,6,18},{0,6,538},{7,5,208},{0,6,538},{5,0,200},{5,0,200},{5,0,200},{5,0,200},{5,5,116},{5,5,116},{5,5,116},{5,5,100},{4,6,130},
+{4,6,130},{5,7,1074},{5,7,414},{5,6,722},{5,6,434},{5,7,857},{5,7,417},{5,6,73},{5,6,377},{4,7,860},{4,6,236},{6,7,497},{6,6,403},{6,6,270},{6,6,310},{7,4,731},{4,7,379},{5,6,72},{4,6,232},{7,5,731},{4,6,232},{5,7,398},{5,7,398},{5,7,398},{5,6,398},{5,7,181},{5,6,37},{5,6,37},{5,6,341},{3,7,264},{4,6,200},{6,6,234},
+{6,6,234},{6,6,234},{6,6,274},{6,5,164},{5,6,36},{5,6,36},{4,6,196},{3,7,164},{4,6,196},{7,5,202},{5,7,52},{6,6,45},{5,6,72},{7,5,202},{6,6,218},{5,6,72},{0,6,232},{6,6,218},{0,6,232},{5,0,362},{5,0,362},{5,0,362},{5,0,362},{5,6,1},{5,6,1},{5,6,1},{5,5,82},{4,6,4},{4,6,4},{6,7,986},{6,7,842},{6,6,999},
+{6,6,975},{5,7,1417},{5,7,273},{5,6,505},{5,6,297},{4,7,828},{5,6,392},{6,7,257},{6,7,113},{6,6,270},{6,6,246},{7,5,739},{5,7,224},{6,6,366},{5,6,248},{6,6,731},{5,6,248},{6,6,803},{6,6,803},{6,6,803},{6,6,779},{5,7,261},{5,7,129},{5,7,129},{5,6,101},{4,7,203},{5,6,196},{6,6,74},{6,6,74},{6,6,74},{6,6,50},{7,4,162},
+{5,7,80},{5,7,80},{5,6,52},{7,5,162},{5,6,52},{6,7,208},{6,7,64},{6,6,221},{6,6,197},{6,7,208},{7,6,208},{6,6,197},{0,6,232},{7,6,208},{0,6,232},{5,0,778},{5,0,778},{5,0,778},{5,0,778},{5,7,65},{5,7,65},{5,7,65},{5,6,65},{5,6,160},{5,6,160},{6,7,762},{6,7,426},{6,7,506},{6,7,650},{6,7,1085},{5,7,641},{5,7,385},
+{5,6,729},{5,7,980},{5,6,520},{6,7,401},{6,7,65},{6,7,145},{6,7,289},{6,7,724},{5,7,416},{5,7,160},{5,6,504},{5,7,724},{5,6,504},{6,7,362},{6,7,362},{6,7,362},{6,6,443},{6,6,555},{5,7,241},{5,7,241},{5,6,245},{4,7,219},{5,6,36},{6,7,1},{6,7,1},{6,7,1},{6,6,82},{7,5,194},{5,7,16},{5,7,16},{5,6,20},{6,6,162},
+{5,6,20},{7,6,208},{6,7,64},{6,7,144},{5,7,144},{7,6,208},{6,7,208},{5,7,144},{0,6,488},{6,7,208},{0,6,488},{6,0,362},{6,0,362},{6,0,362},{6,0,362},{5,7,241},{5,7,241},{5,7,241},{5,6,241},{5,6,32},{5,6,32},{6,7,1050},{6,7,522},{6,7,282},{6,7,234},{6,7,1069},{6,7,493},{6,7,253},{6,6,1122},{5,7,1012},{5,7,756},{7,7,843},
+{6,7,401},{6,7,161},{6,7,113},{7,6,724},{6,7,372},{6,7,132},{4,7,601},{6,7,756},{4,7,601},{6,7,266},{6,7,266},{6,7,266},{6,7,218},{6,7,285},{6,7,237},{6,7,237},{6,6,222},{5,7,228},{5,6,260},{6,7,145},{6,7,145},{6,7,145},{6,7,97},{6,7,164},{6,7,116},{6,7,116},{6,6,101},{5,7,164},{6,6,101},{7,7,218},{7,7,178},{6,7,80},
+{6,7,32},{7,7,218},{6,7,272},{6,7,32},{0,7,592},{6,7,272},{0,7,592},{6,0,202},{6,0,202},{6,0,202},{6,0,202},{6,6,130},{6,6,130},{6,6,130},{6,6,122},{5,6,160},{5,6,160},{6,7,1641},{6,7,1017},{6,7,617},{6,7,353},{6,7,1318},{6,7,430},{6,7,30},{6,7,398},{6,7,1035},{5,7,227},{7,7,393},{7,7,321},{7,7,272},{7,7,320},{7,7,621},
+{6,7,426},{6,7,26},{5,7,226},{6,7,594},{5,7,226},{6,7,617},{6,7,617},{6,7,617},{6,7,353},{6,7,294},{6,7,30},{6,7,30},{6,7,398},{5,7,291},{5,7,227},{7,7,272},{7,7,272},{7,7,272},{7,7,320},{7,6,162},{6,7,26},{6,7,26},{5,7,226},{7,6,194},{5,7,226},{7,7,137},{7,7,65},{7,7,16},{6,7,25},{7,7,137},{7,7,113},{6,7,25},
+{0,7,225},{7,7,113},{0,7,225},{6,0,328},{6,0,328},{6,0,328},{6,0,328},{6,7,5},{6,7,5},{6,7,5},{6,6,68},{5,7,2},{5,7,2},{7,7,985},{7,7,913},{7,7,864},{7,7,848},{7,7,1117},{6,7,654},{6,7,254},{6,7,110},{6,7,763},{5,7,179},{7,7,201},{7,7,129},{7,7,80},{7,7,64},{7,7,333},{7,7,245},{7,7,196},{6,7,74},{7,7,373},
+{6,7,74},{7,7,864},{7,7,864},{7,7,864},{7,7,848},{6,7,710},{6,7,254},{6,7,254},{6,7,110},{6,7,363},{5,7,179},{7,7,80},{7,7,80},{7,7,80},{7,7,64},{7,7,212},{7,7,196},{7,7,196},{6,7,74},{6,7,194},{6,7,74},{7,7,137},{7,7,65},{7,7,16},{7,7,0},{7,7,137},{7,7,49},{7,7,0},{0,7,49},{7,7,49},{0,7,49},{6,0,712},
+{6,0,712},{6,0,712},{6,0,712},{6,7,85},{6,7,85},{6,7,85},{6,7,61},{5,7,130},{5,7,130},{7,7,642},{7,7,570},{7,7,521},{7,7,449},{7,7,678},{7,7,534},{7,7,485},{6,7,205},{6,7,834},{6,7,34},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,278},{7,7,134},{7,7,85},{6,7,9},{7,7,198},{6,7,9},{7,7,521},{7,7,521},{7,7,521},
+{7,7,449},{7,7,557},{7,7,485},{7,7,485},{6,7,205},{6,7,434},{6,7,34},{7,7,121},{7,7,121},{7,7,121},{7,7,49},{7,7,157},{7,7,85},{7,7,85},{6,7,9},{7,7,149},{6,7,9},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{7,0,400},{7,0,400},{7,0,400},{7,0,400},{6,7,421},
+{6,7,421},{6,7,421},{6,7,205},{6,7,34},{6,7,34},{7,7,450},{7,7,378},{7,7,329},{7,7,257},{7,7,390},{7,7,246},{7,7,197},{7,7,148},{7,7,426},{6,7,130},{7,7,306},{7,7,234},{7,7,185},{7,7,113},{7,7,246},{7,7,102},{7,7,53},{7,7,4},{7,7,102},{7,7,4},{7,7,329},{7,7,329},{7,7,329},{7,7,257},{7,7,269},{7,7,197},{7,7,197},
+{7,7,148},{7,7,377},{6,7,130},{7,7,185},{7,7,185},{7,7,185},{7,7,113},{7,7,125},{7,7,53},{7,7,53},{7,7,4},{7,7,53},{7,7,4},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{7,0,208},{7,0,208},{7,0,208},{7,0,208},{7,7,148},{7,7,148},{7,7,148},{7,7,148},{6,7,130},
+{6,7,130},{0,2,445},{0,1,157},{0,1,117},{0,1,405},{0,1,926},{0,1,806},{0,0,670},{0,0,741},{0,0,1169},{0,0,777},{0,2,445},{0,1,157},{0,1,117},{0,1,405},{0,1,926},{0,1,806},{0,0,670},{0,0,741},{1,0,990},{0,0,741},{0,1,36},{0,1,36},{0,1,36},{0,0,9},{0,0,85},{0,0,45},{0,0,45},{0,0,116},{0,0,145},{0,0,152},{0,1,36},
+{0,1,36},{0,1,36},{0,0,9},{0,0,85},{0,0,45},{0,0,45},{0,0,116},{0,0,109},{0,0,116},{1,0,421},{0,1,157},{0,1,117},{0,1,405},{1,0,421},{0,1,445},{0,1,405},{0,0,641},{0,1,445},{0,0,641},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,2,461},{0,2,109},{0,1,5},
+{0,1,101},{0,1,1326},{0,1,822},{0,1,462},{0,0,1205},{0,1,1783},{0,0,1241},{0,2,461},{0,2,109},{0,1,5},{0,1,101},{1,0,1294},{0,1,822},{0,1,462},{0,0,1205},{1,0,1262},{0,0,1205},{0,1,4},{0,1,4},{0,1,4},{0,1,100},{0,0,261},{0,0,157},{0,0,157},{0,0,116},{0,0,257},{0,0,152},{0,1,4},{0,1,4},{0,1,4},{0,1,100},{0,0,261},
+{0,0,157},{0,0,157},{0,0,116},{0,0,221},{0,0,116},{1,1,461},{0,2,109},{0,1,5},{0,1,101},{1,1,461},{2,0,421},{0,1,101},{0,1,901},{2,0,421},{0,1,901},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,3,430},{0,2,38},{0,1,158},{0,1,62},{0,2,1517},{0,1,989},{0,1,309},
+{0,1,1317},{0,1,1878},{0,1,1678},{0,3,430},{0,2,38},{0,1,158},{0,1,62},{0,2,1517},{0,1,989},{0,1,309},{0,1,1317},{0,1,1517},{0,1,1317},{0,2,13},{0,2,13},{0,2,13},{0,1,13},{0,1,356},{0,1,260},{0,1,260},{0,0,193},{0,0,446},{0,0,229},{0,2,13},{0,2,13},{0,2,13},{0,1,13},{0,1,356},{0,1,260},{0,1,260},{0,0,193},{0,0,410},
+{0,0,193},{0,3,421},{0,2,29},{0,1,149},{0,1,53},{0,3,421},{1,1,421},{0,1,53},{0,1,533},{1,1,421},{0,1,533},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,0,49},{0,0,85},{0,0,85},{0,4,602},{0,3,234},{0,2,518},{0,1,382},{0,3,1622},{0,2,825},{0,1,325},{0,1,821},{0,1,2022},{0,1,1182},{0,4,602},
+{0,3,234},{0,2,518},{0,1,382},{1,1,1526},{0,2,825},{0,1,325},{0,1,821},{2,0,1526},{0,1,821},{0,2,157},{0,2,157},{0,2,157},{0,1,157},{0,1,388},{0,1,100},{0,1,100},{0,0,401},{0,0,766},{0,0,437},{0,2,157},{0,2,157},{0,2,157},{0,1,157},{1,0,356},{0,1,100},{0,1,100},{0,0,401},{1,0,340},{0,0,401},{1,2,425},{0,3,113},{1,1,234},
+{0,1,261},{1,2,425},{0,2,425},{0,1,261},{0,1,421},{0,2,425},{0,1,421},{0,0,121},{0,0,121},{0,0,121},{0,0,121},{0,0,25},{0,0,25},{0,0,25},{0,0,1},{0,0,37},{0,0,37},{0,4,845},{0,3,405},{0,2,725},{0,2,549},{0,3,1541},{0,2,654},{0,2,270},{0,1,722},{0,2,2583},{0,1,1083},{1,3,657},{1,2,345},{1,2,345},{0,2,549},{2,0,1517},
+{0,2,654},{0,2,270},{0,1,722},{3,0,1517},{0,1,722},{0,3,404},{0,3,404},{0,3,404},{0,2,449},{0,2,346},{0,2,170},{0,2,170},{0,1,146},{0,1,707},{0,1,507},{1,1,232},{1,1,232},{1,1,232},{1,1,200},{0,2,346},{0,2,170},{0,2,170},{0,1,146},{0,1,346},{0,1,146},{2,1,421},{0,3,5},{1,2,149},{0,2,149},{2,1,421},{4,0,421},{0,2,149},
+{0,1,601},{4,0,421},{0,1,601},{0,0,400},{0,0,400},{0,0,400},{0,0,400},{0,1,1},{0,1,1},{0,1,1},{0,0,100},{0,0,136},{0,0,136},{0,5,1209},{0,4,937},{1,2,1197},{0,2,789},{0,4,1526},{0,3,737},{0,2,14},{0,1,1042},{0,2,2487},{0,1,1403},{1,3,481},{1,3,173},{1,2,41},{1,2,161},{1,2,1526},{0,3,737},{0,2,14},{0,1,1042},{2,1,1526},
+{0,1,1042},{0,4,793},{0,4,793},{0,4,793},{0,2,785},{0,3,356},{0,2,10},{0,2,10},{0,1,18},{0,1,899},{0,1,379},{1,2,37},{1,2,37},{1,2,37},{1,1,136},{1,1,340},{0,2,10},{0,2,10},{0,1,18},{1,1,356},{0,1,18},{3,0,425},{1,3,137},{1,2,5},{0,2,5},{3,0,425},{3,1,425},{0,2,5},{0,2,965},{3,1,425},{0,2,965},{0,0,784},
+{0,0,784},{0,0,784},{0,0,784},{0,2,9},{0,2,9},{0,2,9},{0,1,9},{0,0,360},{0,0,360},{1,4,1158},{1,3,758},{1,2,850},{1,2,778},{0,4,1671},{0,3,546},{0,2,191},{0,2,903},{0,2,2390},{0,2,1430},{1,4,429},{1,3,29},{1,2,121},{1,2,49},{2,1,1526},{0,3,497},{0,2,142},{0,2,854},{4,0,1526},{0,2,854},{1,3,742},{1,3,742},{1,3,742},
+{1,2,742},{0,3,437},{0,2,155},{0,2,155},{0,1,195},{0,2,946},{0,1,290},{1,3,13},{1,3,13},{1,3,13},{1,2,13},{2,0,340},{0,2,106},{0,2,106},{0,1,146},{3,0,340},{0,1,146},{1,4,425},{1,3,25},{1,2,117},{1,2,45},{1,4,425},{2,2,425},{1,2,45},{0,2,565},{2,2,425},{0,2,565},{1,0,733},{1,0,733},{1,0,733},{1,0,733},{0,2,74},
+{0,2,74},{0,2,74},{0,1,74},{0,1,169},{0,1,169},{1,4,966},{1,4,606},{1,3,886},{1,2,682},{0,5,1742},{0,3,866},{0,3,641},{0,2,727},{0,3,2382},{0,2,758},{1,4,605},{1,4,245},{1,3,525},{1,2,321},{3,0,1517},{0,3,641},{1,2,302},{0,2,502},{3,1,1517},{0,2,502},{1,3,486},{1,3,486},{1,3,486},{1,2,486},{0,4,563},{0,3,241},{0,3,241},
+{0,2,531},{0,2,626},{0,1,546},{1,3,125},{1,3,125},{1,3,125},{1,2,125},{0,4,338},{0,3,16},{0,3,16},{0,2,306},{2,1,338},{0,2,306},{3,1,433},{1,4,145},{2,2,212},{1,2,221},{3,1,433},{6,0,433},{1,2,221},{0,2,421},{6,0,433},{0,2,421},{1,0,461},{1,0,461},{1,0,461},{1,0,461},{0,3,225},{0,3,225},{0,3,225},{0,2,306},{0,1,185},
+{0,1,185},{1,5,894},{1,4,462},{1,3,778},{1,3,646},{1,4,1626},{1,3,749},{1,3,429},{1,2,809},{0,3,2022},{0,2,614},{2,4,706},{1,4,362},{2,3,410},{1,3,546},{3,1,1526},{0,4,441},{0,3,227},{0,2,565},{4,1,1526},{0,2,565},{1,4,462},{1,4,462},{1,4,462},{1,3,525},{1,3,440},{1,2,296},{1,2,296},{1,2,280},{0,2,725},{0,2,85},{2,2,250},
+{2,2,250},{2,2,250},{2,2,226},{1,3,340},{0,3,106},{0,3,106},{0,2,36},{1,2,340},{0,2,36},{3,2,425},{1,4,1},{2,3,185},{0,3,146},{3,2,425},{5,1,425},{0,3,146},{0,2,565},{5,1,425},{0,2,565},{1,0,461},{1,0,461},{1,0,461},{1,0,461},{1,2,100},{1,2,100},{1,2,100},{1,1,181},{0,2,49},{0,2,49},{1,6,1166},{1,4,878},{1,3,1226},
+{1,3,742},{1,5,1545},{1,4,798},{1,3,29},{1,2,985},{0,3,2246},{0,2,1030},{2,4,482},{2,4,218},{2,3,58},{2,3,202},{2,3,1517},{0,4,521},{1,3,25},{1,2,981},{6,0,1517},{1,2,981},{1,5,749},{1,5,749},{1,5,749},{1,3,733},{1,4,374},{1,3,20},{1,3,20},{1,2,24},{0,3,482},{0,2,69},{2,3,49},{2,3,49},{2,3,49},{2,2,130},{2,2,338},
+{1,3,16},{1,3,16},{1,2,20},{5,0,338},{1,2,20},{4,1,433},{1,4,145},{2,3,9},{1,3,9},{4,1,433},{7,0,433},{1,3,9},{0,2,965},{7,0,433},{0,2,965},{1,0,733},{1,0,733},{1,0,733},{1,0,733},{1,3,20},{1,3,20},{1,3,20},{1,2,20},{0,2,65},{0,2,65},{2,5,1218},{2,4,810},{2,3,874},{2,3,826},{1,5,1625},{1,4,542},{1,3,141},
+{1,3,961},{0,4,1806},{0,3,642},{2,5,434},{2,4,26},{2,3,90},{2,3,42},{3,2,1517},{1,4,506},{1,3,105},{0,3,626},{5,1,1517},{0,3,626},{2,4,801},{2,4,801},{2,4,801},{2,3,801},{1,4,406},{1,3,116},{1,3,116},{1,2,152},{0,3,482},{1,2,285},{2,4,17},{2,4,17},{2,4,17},{2,3,17},{3,1,338},{1,3,80},{1,3,80},{1,2,116},{4,1,338},
+{1,2,116},{3,3,425},{2,4,25},{2,3,89},{2,3,41},{3,3,425},{6,1,425},{2,3,41},{0,3,601},{6,1,425},{0,3,601},{2,0,785},{2,0,785},{2,0,785},{2,0,785},{1,3,52},{1,3,52},{1,3,52},{1,2,52},{0,3,41},{0,3,41},{2,5,962},{2,4,618},{2,3,906},{2,3,666},{1,6,1710},{1,4,798},{1,4,663},{1,3,721},{0,4,1838},{0,3,450},{2,5,562},
+{2,4,218},{2,3,506},{2,3,266},{4,1,1514},{0,5,474},{2,3,285},{0,3,434},{4,2,1514},{0,3,434},{2,4,497},{2,4,497},{2,4,497},{2,3,497},{1,5,536},{1,4,222},{1,4,222},{1,3,552},{0,4,469},{0,3,281},{2,4,97},{2,4,97},{2,4,97},{2,3,97},{4,0,340},{1,4,26},{1,4,26},{0,3,265},{3,2,340},{0,3,265},{4,2,425},{2,4,137},{3,3,194},
+{2,3,185},{4,2,425},{5,2,425},{2,3,185},{0,3,425},{5,2,425},{0,3,425},{2,0,481},{2,0,481},{2,0,481},{2,0,481},{1,4,197},{1,4,197},{1,4,197},{1,3,296},{0,3,25},{0,3,25},{2,6,870},{2,5,446},{2,4,758},{2,4,670},{2,5,1638},{2,4,771},{2,4,515},{2,3,823},{0,5,1710},{1,3,598},{3,4,737},{2,5,325},{3,4,481},{2,4,549},{5,0,1517},
+{1,5,458},{1,4,224},{1,3,534},{3,3,1517},{1,3,534},{2,5,445},{2,5,445},{2,5,445},{2,4,526},{2,4,459},{2,3,291},{2,3,291},{2,3,339},{0,4,370},{1,3,114},{3,3,272},{3,3,272},{3,3,272},{3,3,256},{2,4,338},{1,4,80},{1,4,80},{1,3,50},{7,0,338},{1,3,50},{5,1,425},{2,5,1},{3,4,225},{1,4,160},{5,1,425},{4,3,425},{1,4,160},
+{0,3,533},{4,3,425},{0,3,533},{2,0,445},{2,0,445},{2,0,445},{2,0,445},{2,3,122},{2,3,122},{2,3,122},{2,2,185},{1,3,65},{1,3,65},{2,7,1130},{2,5,798},{2,4,1142},{2,4,702},{2,6,1571},{2,4,819},{2,4,51},{2,3,935},{0,5,1614},{1,3,950},{3,5,489},{3,5,269},{3,4,81},{3,4,249},{3,4,1514},{1,5,490},{2,4,42},{2,3,926},{7,1,1514},
+{2,3,926},{2,6,710},{2,6,710},{2,6,710},{2,4,686},{2,5,397},{2,4,35},{2,4,35},{2,3,35},{1,4,509},{1,3,50},{3,4,65},{3,4,65},{3,4,65},{3,3,128},{3,3,340},{2,4,26},{2,4,26},{2,3,26},{6,1,340},{2,3,26},{6,0,425},{2,5,113},{3,4,17},{2,4,17},{6,0,425},{3,4,425},{2,4,17},{0,3,901},{3,4,425},{0,3,901},{2,0,685},
+{2,0,685},{2,0,685},{2,0,685},{2,4,34},{2,4,34},{2,4,34},{2,3,34},{1,3,49},{1,3,49},{3,6,1286},{3,5,870},{3,4,906},{3,4,882},{2,6,1587},{2,5,546},{2,4,99},{2,4,1027},{1,5,1838},{1,4,702},{3,6,445},{3,5,29},{3,4,65},{3,4,41},{4,3,1514},{2,5,521},{2,4,74},{1,4,677},{6,2,1514},{1,4,677},{3,5,866},{3,5,866},{3,5,866},
+{3,4,866},{2,5,381},{2,4,83},{2,4,83},{2,3,115},{0,5,349},{2,3,286},{3,5,25},{3,5,25},{3,5,25},{3,4,25},{4,2,340},{2,4,58},{2,4,58},{2,3,90},{5,2,340},{2,3,90},{4,4,421},{3,5,29},{3,4,65},{3,4,41},{4,4,421},{7,2,421},{3,4,41},{0,4,641},{7,2,421},{0,4,641},{3,0,841},{3,0,841},{3,0,841},{3,0,841},{2,4,34},
+{2,4,34},{2,4,34},{2,3,34},{1,4,61},{1,4,61},{3,6,966},{3,5,614},{3,4,874},{3,4,658},{2,7,1686},{2,5,738},{2,4,659},{2,4,723},{0,6,1518},{1,4,446},{3,6,525},{3,5,173},{3,4,433},{3,4,217},{5,2,1517},{1,6,457},{3,4,274},{1,4,437},{5,3,1517},{1,4,437},{3,5,514},{3,5,514},{3,5,514},{3,4,514},{2,6,515},{2,5,209},{2,5,209},
+{2,3,579},{1,5,510},{1,4,302},{3,5,73},{3,5,73},{3,5,73},{3,4,73},{5,1,346},{2,5,40},{2,5,40},{3,3,293},{7,1,346},{3,3,293},{5,3,421},{3,5,109},{4,4,180},{3,4,153},{5,3,421},{6,3,421},{3,4,153},{0,4,433},{6,3,421},{0,4,433},{3,0,505},{3,0,505},{3,0,505},{3,0,505},{2,5,173},{2,5,173},{2,5,173},{2,3,290},{1,4,13},
+{1,4,13},{3,7,854},{3,6,438},{3,5,746},{3,5,702},{3,6,1658},{3,5,801},{3,5,609},{3,4,845},{1,6,1758},{2,4,590},{3,7,710},{3,6,294},{4,5,558},{3,5,558},{6,1,1514},{2,6,481},{2,5,227},{2,4,509},{4,4,1514},{2,4,509},{3,6,434},{3,6,434},{3,6,434},{3,5,533},{3,5,484},{3,4,292},{3,4,292},{3,4,404},{1,5,357},{2,4,149},{3,6,290},
+{3,6,290},{3,6,290},{4,4,290},{6,0,340},{2,5,58},{2,5,58},{2,4,68},{3,4,340},{2,4,68},{6,2,421},{3,6,5},{4,5,269},{2,5,178},{6,2,421},{5,4,421},{2,5,178},{0,4,505},{5,4,421},{0,4,505},{3,0,433},{3,0,433},{3,0,433},{3,0,433},{3,4,148},{3,4,148},{3,4,148},{3,3,193},{2,4,85},{2,4,85},{3,7,1206},{3,6,726},{3,5,1066},
+{3,5,670},{3,7,1605},{3,5,785},{3,5,81},{3,4,893},{1,6,1598},{2,4,878},{4,6,502},{4,5,310},{4,5,110},{4,5,302},{7,0,1517},{2,6,465},{3,5,65},{2,4,877},{3,5,1517},{2,4,877},{3,7,677},{3,7,677},{3,7,677},{3,5,645},{3,6,426},{3,5,56},{3,5,56},{3,4,52},{2,5,542},{2,4,37},{4,5,85},{4,5,85},{4,5,85},{4,4,130},{5,2,346},
+{3,5,40},{3,5,40},{2,4,36},{7,2,346},{2,4,36},{7,1,421},{3,6,85},{4,5,29},{3,5,29},{7,1,421},{4,5,421},{3,5,29},{0,4,841},{4,5,421},{0,4,841},{3,0,641},{3,0,641},{3,0,641},{3,0,641},{3,4,52},{3,4,52},{3,4,52},{3,4,52},{2,4,37},{2,4,37},{4,7,1362},{4,6,938},{4,5,946},{4,5,946},{3,7,1557},{3,6,558},{3,5,65},
+{3,5,1101},{0,7,1662},{2,5,770},{4,7,462},{4,6,38},{4,5,46},{4,5,46},{5,4,1517},{3,6,542},{3,5,49},{1,5,721},{7,3,1517},{1,5,721},{4,5,937},{4,5,937},{4,5,937},{4,5,937},{3,6,362},{3,5,56},{3,5,56},{3,4,84},{1,6,350},{3,4,293},{4,5,37},{4,5,37},{4,5,37},{4,5,37},{6,1,346},{3,5,40},{3,5,40},{3,4,68},{6,3,346},
+{3,4,68},{5,5,421},{4,6,37},{4,5,45},{3,5,45},{5,5,421},{3,6,421},{3,5,45},{0,5,685},{3,6,421},{0,5,685},{4,0,901},{4,0,901},{4,0,901},{4,0,901},{3,5,20},{3,5,20},{3,5,20},{3,4,20},{2,5,85},{2,5,85},{4,7,978},{4,6,618},{4,5,850},{4,5,658},{3,7,2021},{3,6,686},{3,5,561},{3,5,733},{1,7,1530},{2,5,450},{4,7,494},
+{4,6,134},{4,5,366},{4,5,174},{6,3,1526},{2,7,446},{4,5,269},{2,5,446},{6,4,1526},{2,5,446},{4,6,537},{4,6,537},{4,6,537},{4,5,537},{3,7,500},{3,6,202},{3,6,202},{3,4,500},{1,6,510},{2,5,329},{4,6,53},{4,6,53},{4,6,53},{4,5,53},{7,0,340},{3,6,58},{3,6,58},{4,4,265},{3,5,340},{4,4,265},{6,4,421},{4,6,85},{5,5,170},
+{4,5,125},{6,4,421},{7,4,421},{4,5,125},{0,5,445},{7,4,421},{0,5,445},{4,0,533},{4,0,533},{4,0,533},{4,0,533},{3,6,153},{3,6,153},{3,6,153},{3,4,244},{2,5,5},{2,5,5},{4,7,1158},{4,7,438},{4,6,742},{4,6,742},{4,7,1686},{4,6,839},{3,6,677},{4,5,875},{1,7,1710},{3,5,590},{5,6,769},{4,7,269},{4,6,573},{4,6,573},{7,2,1517},
+{3,7,510},{3,6,236},{3,5,490},{5,5,1517},{3,5,490},{4,7,429},{4,7,429},{4,7,429},{4,5,546},{4,6,515},{4,5,299},{4,5,299},{4,5,475},{2,6,350},{3,5,190},{4,7,260},{4,7,260},{4,7,260},{5,5,328},{7,1,346},{3,6,40},{3,6,40},{3,5,90},{7,3,346},{3,5,90},{7,3,421},{4,7,13},{4,6,317},{3,6,200},{7,3,421},{6,5,421},{3,6,200},
+{0,5,481},{6,5,421},{0,5,481},{4,0,425},{4,0,425},{4,0,425},{4,0,425},{4,5,178},{4,5,178},{4,5,178},{4,4,205},{3,5,109},{3,5,109},{4,7,1862},{4,7,662},{4,6,998},{4,6,646},{4,7,1686},{4,6,759},{4,6,119},{4,5,859},{2,7,1590},{3,5,814},{5,7,521},{5,6,305},{5,6,145},{5,6,361},{5,6,1526},{3,7,446},{4,6,94},{3,5,810},{4,6,1526},
+{3,5,810},{4,7,637},{4,7,637},{4,7,637},{4,6,610},{4,6,435},{4,6,83},{4,6,83},{4,5,75},{1,7,565},{3,5,30},{5,6,109},{5,6,109},{5,6,109},{5,5,136},{6,3,340},{4,6,58},{4,6,58},{3,5,26},{6,4,340},{3,5,26},{5,7,421},{4,7,61},{5,6,45},{4,6,45},{5,7,421},{5,6,421},{4,6,45},{0,5,785},{5,6,421},{0,5,785},{4,0,601},
+{4,0,601},{4,0,601},{4,0,601},{4,5,50},{4,5,50},{4,5,50},{4,5,74},{3,5,29},{3,5,29},{5,7,1498},{5,7,1014},{5,6,994},{5,6,1018},{4,7,2198},{4,7,578},{4,6,39},{4,6,1183},{2,7,1878},{3,6,846},{5,7,537},{5,7,53},{5,6,33},{5,6,57},{7,3,1526},{4,7,569},{4,6,30},{2,6,758},{6,5,1526},{2,6,758},{5,6,990},{5,6,990},{5,6,990},
+{5,6,1014},{4,7,349},{4,6,35},{4,6,35},{4,5,59},{2,7,357},{3,5,254},{5,6,29},{5,6,29},{5,6,29},{5,6,53},{7,2,340},{4,6,26},{4,6,26},{4,5,50},{5,5,340},{4,5,50},{6,6,425},{5,7,49},{5,6,29},{4,6,29},{6,6,425},{4,7,425},{4,6,29},{0,6,733},{4,7,425},{0,6,733},{5,0,965},{5,0,965},{5,0,965},{5,0,965},{4,6,10},
+{4,6,10},{4,6,10},{4,5,10},{3,6,113},{3,6,113},{5,7,1466},{5,7,630},{5,6,834},{5,6,666},{5,7,2055},{4,7,642},{4,6,471},{4,6,751},{3,7,1766},{3,6,462},{5,7,937},{5,7,101},{5,6,305},{5,6,137},{5,7,1526},{4,7,521},{5,6,270},{3,6,461},{5,6,1526},{3,6,461},{5,7,566},{5,7,566},{5,7,566},{5,6,566},{4,7,621},{4,7,201},{4,7,201},
+{4,5,427},{2,7,469},{3,6,362},{5,7,37},{5,7,37},{5,7,37},{5,6,37},{5,6,338},{4,7,80},{4,7,80},{5,5,241},{4,6,338},{5,5,241},{7,5,425},{5,7,65},{6,6,164},{5,6,101},{7,5,425},{6,6,433},{5,6,101},{0,6,461},{6,6,433},{0,6,461},{5,0,565},{5,0,565},{5,0,565},{5,0,565},{4,7,137},{4,7,137},{4,7,137},{4,5,202},{3,6,1},
+{3,6,1},{5,7,2042},{5,7,810},{5,7,746},{5,7,790},{5,7,2073},{5,7,885},{4,7,651},{4,6,877},{4,7,2102},{4,6,598},{6,7,794},{6,7,530},{5,7,550},{5,7,594},{6,6,1526},{5,7,689},{4,7,251},{4,6,477},{6,6,1526},{4,6,477},{5,7,521},{5,7,521},{5,7,521},{5,6,521},{5,7,552},{5,6,312},{5,6,312},{4,6,516},{3,7,349},{4,6,237},{5,7,325},
+{5,7,325},{5,7,325},{5,6,325},{6,5,340},{4,7,26},{4,7,26},{4,6,116},{3,7,340},{4,6,116},{6,7,433},{6,7,169},{5,7,325},{4,7,226},{6,7,433},{7,6,425},{4,7,226},{0,6,461},{7,6,425},{0,6,461},{5,0,421},{5,0,421},{5,0,421},{5,0,421},{5,6,212},{5,6,212},{5,6,212},{5,5,221},{4,6,137},{4,6,137},{6,7,2362},{5,7,1514},{5,7,938},
+{5,7,630},{5,7,2633},{5,7,741},{5,7,165},{5,6,833},{4,7,2070},{4,6,758},{6,7,762},{6,7,306},{6,7,186},{6,7,426},{7,5,1526},{5,7,705},{5,7,129},{4,6,749},{5,7,1541},{4,6,749},{5,7,889},{5,7,889},{5,7,889},{5,7,581},{5,7,424},{5,7,116},{5,7,116},{5,6,104},{3,7,525},{4,6,29},{6,7,137},{6,7,137},{6,7,137},{6,6,146},{7,4,338},
+{5,7,80},{5,7,80},{4,6,20},{7,5,338},{4,6,20},{7,6,433},{6,7,185},{6,7,65},{5,7,65},{7,6,433},{6,7,425},{5,7,65},{0,6,733},{6,7,425},{0,6,733},{5,0,565},{5,0,565},{5,0,565},{5,0,565},{5,6,52},{5,6,52},{5,6,52},{5,6,100},{4,6,25},{4,6,25},{6,7,2073},{6,7,1449},{6,7,1049},{5,7,981},{6,7,2548},{5,7,1044},{5,7,20},
+{5,6,1196},{5,7,2365},{4,7,929},{6,7,1049},{6,7,425},{6,7,25},{6,7,73},{7,6,1492},{6,7,948},{5,7,16},{3,7,800},{7,6,1460},{3,7,800},{6,7,1049},{6,7,1049},{6,7,1049},{5,7,981},{5,7,680},{5,7,20},{5,7,20},{5,6,40},{4,7,434},{4,6,205},{6,7,25},{6,7,25},{6,7,25},{6,7,73},{7,5,370},{5,7,16},{5,7,16},{5,6,36},{6,6,338},
+{5,6,36},{7,7,410},{7,7,338},{6,7,16},{5,7,16},{7,7,410},{6,7,464},{5,7,16},{0,7,784},{6,7,464},{0,7,784},{5,0,965},{5,0,965},{5,0,965},{5,0,965},{5,7,4},{5,7,4},{5,7,4},{5,6,4},{4,7,145},{4,7,145},{6,7,1769},{6,7,1145},{6,7,745},{6,7,601},{6,7,1940},{6,7,1172},{5,7,308},{5,7,696},{5,7,1805},{4,7,401},{7,7,1043},
+{6,7,569},{6,7,169},{6,7,25},{7,6,1076},{6,7,596},{6,7,196},{4,7,401},{6,7,1076},{4,7,401},{6,7,745},{6,7,745},{6,7,745},{6,7,601},{6,7,916},{5,7,308},{5,7,308},{5,6,360},{4,7,626},{5,6,341},{6,7,169},{6,7,169},{6,7,169},{6,7,25},{6,7,340},{6,7,196},{6,7,196},{6,6,221},{5,7,340},{6,6,221},{7,7,202},{7,7,130},{7,7,81},
+{6,7,0},{7,7,202},{7,7,218},{6,7,0},{0,7,400},{7,7,218},{0,7,400},{6,0,601},{6,0,601},{6,0,601},{6,0,601},{5,7,164},{5,7,164},{5,7,164},{5,6,164},{4,7,1},{4,7,1},{6,7,1886},{6,7,1262},{6,7,862},{6,7,502},{6,7,1715},{6,7,731},{6,7,331},{5,7,507},{5,7,1634},{4,7,266},{7,7,521},{7,7,449},{7,7,400},{6,7,277},{7,7,797},
+{6,7,506},{6,7,106},{5,7,146},{6,7,770},{5,7,146},{6,7,862},{6,7,862},{6,7,862},{6,7,502},{6,7,691},{6,7,331},{6,7,331},{5,7,507},{5,7,610},{4,7,266},{7,7,400},{7,7,400},{7,7,400},{6,7,277},{7,6,338},{6,7,106},{6,7,106},{5,7,146},{7,6,370},{5,7,146},{7,7,121},{7,7,49},{7,7,0},{7,7,16},{7,7,121},{7,7,65},{7,7,16},
+{0,7,121},{7,7,65},{0,7,121},{6,0,421},{6,0,421},{6,0,421},{6,0,421},{6,7,250},{6,7,250},{6,7,250},{6,6,241},{4,7,145},{4,7,145},{7,7,2010},{6,7,1774},{6,7,1374},{6,7,822},{6,7,1923},{6,7,747},{6,7,347},{6,7,139},{6,7,1446},{5,7,34},{7,7,329},{7,7,257},{7,7,208},{7,7,160},{7,7,509},{7,7,389},{6,7,298},{5,7,18},{7,7,549},
+{5,7,18},{6,7,1374},{6,7,1374},{6,7,1374},{6,7,822},{6,7,899},{6,7,347},{6,7,347},{6,7,139},{5,7,866},{5,7,34},{7,7,208},{7,7,208},{7,7,208},{7,7,160},{7,7,388},{6,7,298},{6,7,298},{5,7,18},{6,7,370},{5,7,18},{7,7,185},{7,7,113},{7,7,64},{7,7,16},{7,7,185},{7,7,65},{7,7,16},{0,7,9},{7,7,65},{0,7,9},{6,0,533},
+{6,0,533},{6,0,533},{6,0,533},{6,7,58},{6,7,58},{6,7,58},{6,7,130},{5,7,25},{5,7,25},{7,7,1347},{7,7,1275},{7,7,1226},{7,7,1154},{7,7,1431},{6,7,922},{6,7,522},{6,7,2},{6,7,1125},{5,7,137},{7,7,258},{7,7,186},{7,7,137},{7,7,65},{7,7,342},{7,7,198},{7,7,149},{6,7,1},{7,7,294},{6,7,1},{7,7,1226},{7,7,1226},{7,7,1226},
+{7,7,1154},{6,7,1146},{6,7,522},{6,7,522},{6,7,2},{6,7,725},{5,7,137},{7,7,137},{7,7,137},{7,7,137},{7,7,65},{7,7,221},{7,7,149},{7,7,149},{6,7,1},{7,7,245},{6,7,1},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{6,0,901},{6,0,901},{6,0,901},{6,0,901},{6,7,122},
+{6,7,122},{6,7,122},{6,7,2},{5,7,137},{5,7,137},{7,7,883},{7,7,811},{7,7,762},{7,7,690},{7,7,871},{7,7,727},{7,7,678},{6,7,130},{6,7,949},{6,7,149},{7,7,258},{7,7,186},{7,7,137},{7,7,65},{7,7,246},{7,7,102},{7,7,53},{7,7,36},{7,7,134},{7,7,36},{7,7,762},{7,7,762},{7,7,762},{7,7,690},{7,7,750},{7,7,678},{7,7,678},
+{6,7,130},{6,7,549},{6,7,149},{7,7,137},{7,7,137},{7,7,137},{7,7,65},{7,7,125},{7,7,53},{7,7,53},{7,7,36},{7,7,85},{7,7,36},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{7,0,641},{7,0,641},{7,0,641},{7,0,641},{6,7,442},{6,7,442},{6,7,442},{6,7,130},{6,7,149},
+{6,7,149},{0,3,932},{0,2,218},{0,1,82},{0,1,250},{0,2,1971},{0,1,1371},{0,1,611},{0,0,1950},{0,1,2332},{0,0,1986},{0,3,932},{0,2,218},{0,1,82},{0,1,250},{1,0,1899},{0,1,1371},{0,1,611},{0,0,1950},{1,0,1923},{0,0,1950},{0,1,1},{0,1,1},{0,1,1},{0,0,64},{0,0,180},{0,0,100},{0,0,100},{0,0,101},{0,0,200},{0,0,137},{0,1,1},
+{0,1,1},{0,1,1},{0,0,64},{0,0,180},{0,0,100},{0,0,100},{0,0,101},{0,0,164},{0,0,101},{1,1,884},{0,2,218},{0,1,82},{0,1,250},{1,1,884},{2,0,900},{0,1,250},{0,1,1170},{2,0,900},{0,1,1170},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,3,900},{0,2,250},{0,2,314},
+{0,1,314},{0,2,2355},{0,1,1755},{0,1,675},{0,1,1875},{0,1,2716},{0,1,2236},{0,3,900},{0,2,250},{0,2,314},{0,1,314},{0,2,2355},{0,1,1755},{0,1,675},{0,1,1875},{0,1,2355},{0,1,1875},{0,2,25},{0,2,25},{0,2,25},{0,1,25},{0,1,410},{0,0,292},{0,0,292},{0,0,181},{0,0,392},{0,0,217},{0,2,25},{0,2,25},{0,2,25},{0,1,25},{0,1,410},
+{0,0,292},{0,0,292},{0,0,181},{0,0,356},{0,0,181},{2,0,884},{0,2,250},{0,2,314},{0,1,314},{2,0,884},{3,0,884},{0,1,314},{0,1,914},{3,0,884},{0,1,914},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,4,890},{0,3,104},{0,2,90},{0,2,442},{0,2,2995},{0,2,1851},{0,1,995},
+{0,1,1875},{0,1,3356},{0,1,2236},{0,4,890},{0,3,104},{0,2,90},{0,2,442},{1,1,2932},{0,2,1851},{0,1,995},{0,1,1875},{2,0,2900},{0,1,1875},{0,2,9},{0,2,9},{0,2,9},{0,1,9},{0,1,586},{0,1,370},{0,1,370},{0,0,389},{0,0,712},{0,0,425},{0,2,9},{0,2,9},{0,2,9},{0,1,9},{0,1,586},{0,1,370},{0,1,370},{0,0,389},{1,0,650},
+{0,0,389},{1,2,890},{0,3,104},{0,2,90},{0,2,442},{1,2,890},{2,1,890},{0,2,442},{0,1,914},{2,1,890},{0,1,914},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,4,990},{0,3,140},{0,2,158},{0,2,158},{0,3,3048},{0,2,1707},{0,2,747},{0,1,1795},{0,1,3916},{0,1,2156},{0,4,990},
+{0,3,140},{0,2,158},{0,2,158},{0,3,3048},{0,2,1707},{0,2,747},{0,1,1795},{1,1,3048},{0,1,1795},{0,3,40},{0,3,40},{0,3,40},{0,1,157},{0,1,698},{0,1,290},{0,1,290},{0,0,641},{0,0,1076},{0,0,677},{0,3,40},{0,3,40},{0,3,40},{0,1,157},{1,0,666},{0,1,290},{0,1,290},{0,0,641},{1,0,650},{0,0,641},{2,1,890},{0,3,104},{0,2,122},
+{0,2,122},{2,1,890},{4,0,890},{0,2,122},{0,1,1170},{4,0,890},{0,1,1170},{0,0,36},{0,0,36},{0,0,36},{0,0,36},{0,0,0},{0,0,0},{0,0,0},{0,0,16},{0,0,52},{0,0,52},{0,5,1115},{0,4,265},{0,2,689},{0,2,293},{0,4,3096},{0,3,1731},{0,2,324},{0,1,2020},{0,2,4009},{0,1,2381},{0,5,1115},{0,4,265},{1,2,429},{0,2,293},{1,2,3048},
+{0,3,1731},{0,2,324},{0,1,2020},{0,2,3048},{0,1,2020},{0,4,261},{0,4,261},{0,4,261},{0,2,229},{0,2,656},{0,2,260},{0,2,260},{0,1,256},{0,1,1017},{0,1,617},{0,4,261},{0,4,261},{0,4,261},{0,2,229},{0,2,656},{0,2,260},{0,2,260},{0,1,256},{0,1,656},{0,1,256},{3,0,890},{0,4,40},{1,2,68},{0,2,68},{3,0,890},{5,0,890},{0,2,68},
+{0,2,1220},{5,0,890},{0,2,1220},{0,0,225},{0,0,225},{0,0,225},{0,0,225},{0,1,16},{0,1,16},{0,1,16},{0,0,25},{0,0,61},{0,0,61},{0,6,1419},{0,4,569},{0,3,1078},{0,2,821},{0,4,3096},{0,3,1395},{0,2,356},{0,2,1832},{0,2,4201},{0,2,2793},{1,4,1011},{1,3,353},{1,2,413},{1,2,413},{2,1,3048},{0,3,1395},{0,2,356},{0,2,1832},{4,0,3048},
+{0,2,1832},{0,4,533},{0,4,533},{0,4,533},{0,2,565},{0,3,666},{0,2,100},{0,2,100},{0,1,128},{0,1,1209},{0,1,489},{1,2,157},{1,2,157},{1,2,157},{1,2,157},{1,1,650},{0,2,100},{0,2,100},{0,1,128},{1,1,666},{0,1,128},{3,1,890},{0,4,40},{1,2,292},{0,2,292},{3,1,890},{4,1,890},{0,2,292},{0,2,932},{4,1,890},{0,2,932},{0,0,529},
+{0,0,529},{0,0,529},{0,0,529},{0,1,16},{0,1,16},{0,1,16},{0,1,64},{0,0,205},{0,0,205},{0,6,1915},{0,5,1019},{1,3,1269},{0,3,1110},{0,5,3051},{0,3,1443},{0,3,318},{0,2,1384},{0,2,4777},{0,2,2345},{1,5,909},{1,4,131},{1,3,113},{1,3,509},{3,0,3051},{0,3,1443},{0,3,318},{0,2,1384},{3,1,3051},{0,2,1384},{0,5,970},{0,5,970},{0,5,970},
+{0,3,1010},{0,3,698},{0,2,196},{0,2,196},{0,1,256},{0,2,1641},{0,1,617},{1,3,13},{1,3,13},{1,3,13},{1,2,13},{2,0,650},{0,2,196},{0,2,196},{0,1,256},{3,0,650},{0,1,256},{2,3,884},{0,5,58},{1,3,104},{0,3,149},{2,3,884},{6,0,884},{0,3,149},{0,2,900},{6,0,884},{0,2,900},{0,0,961},{0,0,961},{0,0,961},{0,0,961},{0,2,0},
+{0,2,0},{0,2,0},{0,1,0},{0,1,361},{0,1,361},{1,5,2113},{1,4,1271},{1,3,1285},{1,3,1329},{0,6,3123},{0,4,1208},{0,3,30},{0,2,1320},{0,3,5011},{0,2,2281},{1,5,957},{1,4,115},{1,3,129},{1,3,173},{1,4,3051},{0,4,1208},{0,3,30},{0,2,1320},{2,2,3051},{0,2,1320},{1,4,1190},{1,4,1190},{1,4,1190},{1,2,1281},{0,4,648},{0,3,26},{0,3,26},
+{0,2,296},{0,2,1641},{0,1,1001},{1,4,34},{1,4,34},{1,4,34},{1,2,125},{0,4,648},{0,3,26},{0,3,26},{0,2,296},{2,1,648},{0,2,296},{3,2,884},{0,5,26},{1,3,104},{0,3,5},{3,2,884},{5,1,884},{0,3,5},{0,2,1124},{5,1,884},{0,2,1124},{1,0,1181},{1,0,1181},{1,0,1181},{1,0,1181},{0,3,25},{0,3,25},{0,3,25},{0,1,64},{0,1,425},
+{0,1,425},{1,6,1864},{1,5,1038},{1,3,1390},{1,3,1038},{0,6,3132},{0,4,1199},{0,3,201},{0,2,1743},{0,3,4924},{0,2,2332},{1,6,1080},{1,5,254},{2,3,458},{1,3,254},{2,3,3051},{0,4,1163},{0,3,165},{0,2,1707},{6,0,3051},{0,2,1707},{1,4,1016},{1,4,1016},{1,4,1016},{1,3,989},{0,5,716},{0,3,152},{0,3,152},{0,2,62},{0,2,1611},{0,2,651},{1,4,232},
+{1,4,232},{1,4,232},{1,3,205},{1,3,650},{0,3,116},{0,3,116},{0,2,26},{1,2,650},{0,2,26},{4,1,884},{1,5,58},{2,3,58},{1,3,58},{4,1,884},{4,2,884},{1,3,58},{0,3,1274},{4,2,884},{0,3,1274},{1,0,980},{1,0,980},{1,0,980},{1,0,980},{0,3,52},{0,3,52},{0,3,52},{0,2,61},{0,1,458},{0,1,458},{1,7,1784},{1,5,910},{1,4,1441},
+{1,3,1134},{0,7,3247},{0,5,1292},{0,4,567},{0,3,1474},{0,4,4900},{0,3,2178},{2,5,1028},{2,4,362},{2,3,394},{2,3,418},{3,2,3051},{0,5,1096},{1,3,331},{0,3,1278},{5,1,3051},{0,3,1278},{1,5,885},{1,5,885},{1,5,885},{1,3,909},{0,5,876},{0,4,206},{0,4,206},{0,2,254},{0,3,1548},{0,2,347},{2,3,169},{2,3,169},{2,3,169},{2,3,193},{2,2,648},
+{0,4,10},{0,4,10},{0,2,58},{5,0,648},{0,2,58},{5,0,884},{1,5,26},{2,3,250},{1,3,250},{5,0,884},{3,3,884},{1,3,250},{0,3,954},{3,3,884},{0,3,954},{1,0,884},{1,0,884},{1,0,884},{1,0,884},{0,4,197},{0,4,197},{0,4,197},{0,2,205},{0,2,298},{0,2,298},{1,7,1976},{1,6,1124},{1,4,1649},{1,4,1229},{1,6,3204},{0,5,1452},{1,4,525},
+{0,3,1474},{0,4,4420},{0,3,1474},{2,6,930},{2,5,160},{2,4,138},{2,3,546},{4,1,3060},{0,5,968},{0,4,195},{0,3,990},{4,2,3060},{0,3,990},{1,6,1060},{1,6,1060},{1,6,1060},{1,4,1108},{1,4,824},{1,3,314},{1,3,314},{1,2,370},{0,3,1260},{0,2,427},{2,4,17},{2,4,17},{2,4,17},{2,3,17},{3,1,648},{0,4,74},{0,4,74},{1,2,226},{4,1,648},
+{1,2,226},{3,4,882},{1,6,80},{2,4,122},{0,4,146},{3,4,882},{7,1,882},{0,4,146},{0,3,890},{7,1,882},{0,3,890},{1,0,1044},{1,0,1044},{1,0,1044},{1,0,1044},{1,3,145},{1,3,145},{1,3,145},{1,2,145},{0,2,202},{0,2,202},{2,6,2374},{1,6,1476},{2,4,1550},{1,4,1469},{1,6,3172},{1,5,1259},{1,4,61},{1,3,1323},{0,4,4452},{0,3,1282},{2,6,930},
+{2,5,96},{2,4,106},{2,4,194},{5,0,3060},{0,6,1144},{1,4,45},{0,3,1086},{6,1,3060},{0,3,1086},{2,5,1476},{2,5,1476},{2,5,1476},{1,4,1460},{1,5,666},{1,4,52},{1,4,52},{1,3,362},{0,3,1356},{0,3,321},{2,5,32},{2,5,32},{2,5,32},{2,3,97},{4,0,650},{1,4,36},{1,4,36},{0,3,125},{3,2,650},{0,3,125},{4,3,882},{1,6,16},{2,4,90},
+{1,4,9},{4,3,882},{6,2,882},{1,4,9},{0,3,1082},{6,2,882},{0,3,1082},{1,0,1460},{1,0,1460},{1,0,1460},{1,0,1460},{1,4,52},{1,4,52},{1,4,52},{1,2,65},{0,3,200},{0,3,200},{2,7,1892},{2,6,1090},{2,4,1370},{2,4,1062},{1,7,3100},{1,5,1169},{1,4,151},{1,3,1665},{0,5,4036},{0,3,1678},{2,7,1051},{2,6,249},{3,4,493},{2,4,221},{3,4,3060},
+{0,6,883},{1,4,126},{0,4,1528},{7,1,3060},{0,4,1528},{2,5,1035},{2,5,1035},{2,5,1035},{2,4,1026},{1,6,723},{1,4,115},{1,4,115},{1,3,65},{0,4,1004},{0,3,78},{2,5,194},{2,5,194},{2,5,194},{2,4,185},{2,4,648},{1,4,90},{1,4,90},{1,3,40},{7,0,648},{1,3,40},{5,2,882},{2,6,80},{3,4,52},{2,4,52},{5,2,882},{5,3,882},{2,4,52},
+{0,4,1332},{5,3,882},{0,4,1332},{2,0,1010},{2,0,1010},{2,0,1010},{2,0,1010},{1,4,34},{1,4,34},{1,4,34},{1,3,61},{0,3,74},{0,3,74},{2,7,1892},{2,6,898},{2,5,1451},{2,4,1094},{2,6,3501},{1,6,1308},{1,5,589},{1,4,1510},{0,5,3940},{0,4,1116},{3,6,1051},{3,5,377},{3,4,381},{3,4,429},{5,1,3060},{0,6,1059},{2,4,312},{0,4,1016},{6,2,3060},
+{0,4,1016},{2,6,882},{2,6,882},{2,6,882},{2,4,898},{1,6,835},{1,5,189},{1,5,189},{1,3,209},{0,4,1036},{0,3,270},{3,4,185},{3,4,185},{3,4,185},{3,4,233},{3,3,650},{1,5,20},{1,5,20},{1,3,40},{6,1,650},{1,3,40},{6,1,882},{2,6,16},{3,4,212},{2,4,212},{6,1,882},{4,4,882},{2,4,212},{0,4,980},{4,4,882},{0,4,980},{2,0,882},
+{2,0,882},{2,0,882},{2,0,882},{1,5,173},{1,5,173},{1,5,173},{1,3,173},{0,4,136},{0,4,136},{2,7,2404},{2,7,1116},{2,5,1595},{2,5,1235},{2,7,3244},{1,6,1404},{2,5,619},{1,4,1446},{0,6,3804},{0,4,892},{3,7,957},{3,6,195},{3,5,169},{3,4,509},{6,0,3060},{0,7,936},{1,5,196},{0,4,888},{3,4,3060},{0,4,888},{2,7,1035},{2,7,1035},{2,7,1035},
+{2,5,1091},{2,5,835},{2,4,317},{2,4,317},{2,3,369},{0,5,875},{1,3,396},{3,5,25},{3,5,25},{3,5,25},{3,4,25},{4,2,650},{1,5,52},{1,5,52},{2,3,200},{5,2,650},{2,3,200},{7,0,884},{2,7,106},{3,5,144},{1,5,160},{7,0,884},{3,5,884},{1,5,160},{0,4,884},{3,5,884},{0,4,884},{2,0,1010},{2,0,1010},{2,0,1010},{2,0,1010},{2,4,173},
+{2,4,173},{2,4,173},{2,3,173},{0,4,8},{0,4,8},{3,7,2430},{2,7,1404},{3,5,1610},{2,5,1411},{2,7,3148},{2,6,1309},{2,5,91},{2,4,1325},{0,6,3484},{0,4,1180},{3,7,909},{3,6,83},{3,5,89},{3,5,221},{4,4,3051},{0,7,1000},{2,5,66},{1,4,1053},{7,2,3051},{1,4,1053},{2,7,1403},{2,7,1403},{2,7,1403},{2,5,1395},{2,6,681},{2,5,75},{2,5,75},
+{2,4,425},{0,5,795},{0,4,280},{3,6,34},{3,6,34},{3,6,34},{3,4,73},{5,1,656},{2,5,50},{2,5,50},{1,4,153},{7,1,656},{1,4,153},{5,4,884},{2,7,10},{3,5,80},{2,5,17},{5,4,884},{7,3,884},{2,5,17},{0,4,1044},{7,3,884},{0,4,1044},{2,0,1394},{2,0,1394},{2,0,1394},{2,0,1394},{2,4,61},{2,4,61},{2,4,61},{2,3,61},{0,4,136},
+{0,4,136},{3,7,2214},{3,7,1150},{3,5,1358},{3,5,1094},{2,7,3652},{2,6,1147},{2,5,109},{2,4,1595},{0,7,3724},{0,5,1402},{3,7,1314},{3,7,250},{3,5,458},{3,5,194},{5,3,3060},{1,7,888},{2,5,93},{0,5,1398},{6,3,3060},{0,5,1398},{3,6,1060},{3,6,1060},{3,6,1060},{3,5,1069},{2,7,736},{2,5,84},{2,5,84},{2,4,74},{0,6,820},{1,4,81},{3,6,160},
+{3,6,160},{3,6,160},{3,5,169},{6,0,650},{2,5,68},{2,5,68},{1,4,45},{3,4,650},{1,4,45},{6,3,884},{3,7,106},{4,5,50},{3,5,50},{6,3,884},{6,4,884},{3,5,50},{0,5,1394},{6,4,884},{0,5,1394},{3,0,1044},{3,0,1044},{3,0,1044},{3,0,1044},{2,5,20},{2,5,20},{2,5,20},{2,4,65},{0,5,8},{0,5,8},{3,7,2566},{3,7,894},{3,6,1469},
+{3,5,1062},{3,7,3535},{2,7,1332},{2,6,619},{2,5,1554},{0,7,3276},{0,5,1146},{4,7,1080},{4,6,398},{4,5,374},{4,5,446},{6,2,3051},{1,7,1016},{3,5,299},{0,5,1046},{5,4,3051},{0,5,1046},{3,7,885},{3,7,885},{3,7,885},{3,5,893},{2,7,800},{2,6,178},{2,6,178},{2,4,170},{0,6,660},{1,4,225},{4,5,205},{4,5,205},{4,5,205},{4,5,277},{5,2,656},
+{2,6,34},{2,6,34},{2,4,26},{7,2,656},{2,4,26},{7,2,884},{3,7,10},{4,5,178},{3,5,178},{7,2,884},{5,5,884},{3,5,178},{0,5,1010},{5,5,884},{0,5,1010},{3,0,884},{3,0,884},{3,0,884},{3,0,884},{2,6,153},{2,6,153},{2,6,153},{2,4,145},{0,5,136},{0,5,136},{4,7,3320},{3,7,1150},{3,6,1549},{3,6,1249},{3,7,3487},{2,7,1364},{2,6,603},
+{2,5,1426},{0,7,3340},{1,5,892},{4,7,1016},{4,7,236},{4,6,206},{4,5,478},{7,1,3051},{2,7,964},{2,6,203},{1,5,883},{4,5,3051},{1,5,883},{3,7,1029},{3,7,1029},{3,7,1029},{3,6,1080},{3,6,852},{3,5,326},{3,5,326},{3,4,374},{0,6,884},{2,4,371},{4,6,37},{4,6,37},{4,6,37},{4,5,37},{6,1,656},{2,6,34},{2,6,34},{3,4,178},{6,3,656},
+{3,4,178},{5,6,890},{3,7,170},{4,6,170},{2,6,178},{5,6,890},{4,6,890},{2,6,178},{0,5,882},{4,6,890},{0,5,882},{3,0,980},{3,0,980},{3,0,980},{3,0,980},{3,5,205},{3,5,205},{3,5,205},{3,4,205},{1,5,10},{1,5,10},{4,7,2936},{4,7,1676},{4,6,1678},{3,6,1361},{3,7,3951},{3,7,1367},{3,6,129},{3,5,1335},{1,7,3496},{1,5,1116},{4,7,1336},
+{4,7,76},{4,6,78},{4,6,254},{5,5,3048},{2,7,1124},{3,6,93},{2,5,1026},{3,6,3048},{2,5,1026},{3,7,1557},{3,7,1557},{3,7,1557},{3,6,1336},{3,7,702},{3,6,104},{3,6,104},{3,5,494},{0,7,667},{1,5,275},{4,7,40},{4,7,40},{4,7,40},{4,5,53},{7,0,650},{3,6,68},{3,6,68},{2,5,185},{3,5,650},{2,5,185},{7,3,890},{4,7,72},{4,6,74},
+{3,6,29},{7,3,890},{6,5,890},{3,6,29},{0,5,1010},{6,5,890},{0,5,1010},{3,0,1332},{3,0,1332},{3,0,1332},{3,0,1332},{3,5,61},{3,5,61},{3,5,61},{3,4,61},{1,5,106},{1,5,106},{4,7,3116},{4,7,1316},{4,6,1354},{4,6,1134},{4,7,4084},{3,7,1133},{3,6,75},{3,5,1533},{1,7,3676},{1,6,1470},{5,7,1429},{4,7,355},{4,6,393},{4,6,173},{6,4,3051},
+{3,7,1124},{3,6,66},{1,6,1469},{7,4,3051},{1,6,1469},{4,7,1091},{4,7,1091},{4,7,1091},{4,6,1118},{3,7,729},{3,6,59},{3,6,59},{3,5,89},{0,7,820},{2,5,90},{4,7,130},{4,7,130},{4,7,130},{4,6,157},{7,1,656},{3,6,50},{3,6,50},{2,5,41},{7,3,656},{2,5,41},{7,4,890},{4,7,234},{5,6,52},{4,6,52},{7,4,890},{7,5,890},{4,6,52},
+{0,5,1460},{7,5,890},{0,5,1460},{4,0,1082},{4,0,1082},{4,0,1082},{4,0,1082},{3,6,10},{3,6,10},{3,6,10},{3,5,73},{1,6,10},{1,6,10},{4,7,3820},{4,7,1540},{4,7,1495},{4,6,1038},{4,7,4084},{3,7,1469},{3,6,571},{3,6,1606},{2,7,3916},{1,6,1150},{5,7,1349},{5,7,425},{5,6,373},{5,6,469},{7,3,3048},{3,7,1348},{4,6,292},{1,6,1069},{6,5,3048},
+{1,6,1069},{4,7,1011},{4,7,1011},{4,7,1011},{4,6,894},{3,7,1161},{3,7,173},{3,7,173},{3,5,137},{1,7,659},{2,5,186},{5,6,229},{5,6,229},{5,6,229},{5,6,325},{6,3,650},{3,7,52},{3,7,52},{3,5,16},{6,4,650},{3,5,16},{6,6,890},{5,7,200},{5,6,148},{4,6,148},{6,6,890},{6,6,890},{4,6,148},{0,6,1044},{6,6,890},{0,6,1044},{4,0,890},
+{4,0,890},{4,0,890},{4,0,890},{3,7,137},{3,7,137},{3,7,137},{3,5,121},{1,6,106},{1,6,106},{5,7,4054},{4,7,2276},{4,7,1511},{4,7,1271},{4,7,4596},{4,7,1596},{3,7,577},{3,6,1414},{2,7,4204},{2,6,900},{5,7,1653},{5,7,377},{5,7,249},{5,6,453},{5,7,3048},{4,7,1371},{3,7,216},{2,6,884},{5,6,3048},{2,6,884},{4,7,1315},{4,7,1315},{4,7,1315},
+{4,6,1054},{4,7,875},{4,6,341},{4,6,341},{4,5,385},{1,7,835},{3,5,352},{5,7,53},{5,7,53},{5,7,53},{5,6,53},{7,2,650},{3,7,20},{3,7,20},{4,5,160},{5,5,650},{4,5,160},{7,5,890},{5,7,328},{5,7,200},{3,7,200},{7,5,890},{5,7,900},{3,7,200},{0,6,884},{5,7,900},{0,6,884},{4,0,954},{4,0,954},{4,0,954},{4,0,954},{4,6,241},
+{4,6,241},{4,6,241},{4,5,241},{2,6,16},{2,6,16},{5,7,4022},{5,7,2394},{5,7,1754},{4,7,1319},{5,7,4921},{4,7,1660},{4,7,175},{4,6,1353},{3,7,4380},{2,6,1060},{6,7,2021},{5,7,713},{5,7,73},{5,7,293},{6,6,3051},{4,7,1611},{4,7,126},{3,6,1005},{4,7,3051},{3,6,1005},{5,7,1718},{5,7,1718},{5,7,1718},{4,7,1283},{4,7,859},{4,7,139},{4,7,139},
+{4,5,465},{2,7,779},{2,6,276},{5,7,37},{5,7,37},{5,7,37},{5,6,37},{5,6,648},{4,7,90},{4,7,90},{3,6,221},{4,6,648},{3,6,221},{7,6,900},{6,7,452},{5,7,72},{4,7,45},{7,6,900},{7,6,884},{4,7,45},{0,6,980},{7,6,884},{0,6,980},{4,0,1274},{4,0,1274},{4,0,1274},{4,0,1274},{4,6,65},{4,6,65},{4,6,65},{4,5,65},{2,6,80},
+{2,6,80},{5,7,4265},{5,7,2373},{5,7,1349},{5,7,1173},{5,7,4606},{4,7,2065},{4,7,40},{4,6,1266},{3,7,4455},{3,6,1261},{6,7,1649},{6,7,1025},{5,7,325},{5,7,149},{7,5,2817},{5,7,1514},{4,7,36},{3,6,1197},{6,6,2841},{3,6,1197},{5,7,1349},{5,7,1349},{5,7,1349},{5,7,1173},{4,7,1300},{4,7,40},{4,7,40},{4,6,110},{2,7,1040},{3,6,105},{5,7,325},
+{5,7,325},{5,7,325},{5,7,149},{6,5,650},{4,7,36},{4,7,36},{3,6,41},{3,7,650},{3,6,41},{7,6,801},{6,7,449},{6,7,49},{4,7,36},{7,6,801},{6,7,761},{4,7,36},{0,6,1181},{6,7,761},{0,6,1181},{5,0,1124},{5,0,1124},{5,0,1124},{5,0,1124},{4,7,4},{4,7,4},{4,7,4},{4,6,85},{2,7,16},{2,7,16},{5,7,4345},{5,7,2453},{5,7,1429},
+{5,7,901},{5,7,4190},{5,7,1770},{4,7,360},{4,6,1266},{4,7,3861},{2,7,1041},{6,7,1281},{6,7,657},{6,7,257},{5,7,325},{6,7,2250},{5,7,1194},{5,7,170},{2,7,977},{5,7,2250},{2,7,977},{5,7,1429},{5,7,1429},{5,7,1429},{5,7,901},{5,7,1274},{4,7,360},{4,7,360},{4,6,110},{3,7,979},{3,6,153},{6,7,257},{6,7,257},{6,7,257},{5,7,325},{7,4,648},
+{5,7,170},{5,7,170},{4,6,10},{7,5,648},{4,6,10},{7,7,521},{6,7,401},{6,7,1},{5,7,1},{7,7,521},{6,7,521},{5,7,1},{0,7,961},{6,7,521},{0,7,961},{5,0,900},{5,0,900},{5,0,900},{5,0,900},{4,7,164},{4,7,164},{4,7,164},{4,6,101},{2,7,80},{2,7,80},{6,7,3669},{5,7,2917},{5,7,1893},{5,7,1013},{5,7,4158},{5,7,1386},{5,7,362},
+{4,7,1049},{4,7,3381},{3,7,555},{6,7,1169},{6,7,545},{6,7,145},{6,7,73},{7,6,1802},{5,7,1130},{5,7,106},{3,7,530},{7,6,1770},{3,7,530},{5,7,1893},{5,7,1893},{5,7,1893},{5,7,1013},{5,7,1242},{5,7,362},{5,7,362},{5,6,402},{3,7,1251},{4,6,339},{6,7,145},{6,7,145},{6,7,145},{6,7,73},{7,5,680},{5,7,106},{5,7,106},{5,6,146},{6,6,648},
+{5,6,146},{7,7,265},{7,7,193},{6,7,81},{6,7,9},{7,7,265},{7,7,305},{6,7,9},{0,7,529},{7,7,305},{0,7,529},{5,0,932},{5,0,932},{5,0,932},{5,0,932},{5,7,281},{5,7,281},{5,7,281},{5,6,281},{3,7,26},{3,7,26},{6,7,3077},{6,7,2453},{6,7,2053},{5,7,1509},{6,7,3438},{5,7,1386},{5,7,362},{5,7,650},{5,7,3195},{3,7,283},{7,7,1293},
+{6,7,689},{6,7,289},{6,7,25},{7,6,1386},{6,7,786},{5,7,298},{4,7,261},{6,7,1386},{4,7,261},{6,7,2053},{6,7,2053},{6,7,2053},{5,7,1509},{5,7,1594},{5,7,362},{5,7,362},{5,6,434},{4,7,1260},{3,7,283},{6,7,289},{6,7,289},{6,7,289},{6,7,25},{6,7,650},{5,7,298},{5,7,298},{4,7,261},{5,7,650},{4,7,261},{7,7,137},{7,7,65},{7,7,16},
+{6,7,25},{7,7,137},{7,7,113},{6,7,25},{0,7,225},{7,7,113},{0,7,225},{5,0,1220},{5,0,1220},{5,0,1220},{5,0,1220},{5,7,73},{5,7,73},{5,7,73},{5,6,73},{3,7,58},{3,7,58},{6,7,2870},{6,7,2246},{6,7,1846},{6,7,1366},{6,7,2889},{6,7,1785},{5,7,821},{5,7,137},{5,7,2700},{4,7,126},{7,7,771},{7,7,699},{7,7,650},{6,7,277},{7,7,1107},
+{6,7,696},{6,7,296},{4,7,45},{6,7,1080},{4,7,45},{6,7,1846},{6,7,1846},{6,7,1846},{6,7,1366},{6,7,1865},{5,7,821},{5,7,821},{5,7,137},{4,7,1611},{4,7,126},{7,7,650},{7,7,650},{7,7,650},{6,7,277},{7,6,648},{6,7,296},{6,7,296},{4,7,45},{7,6,680},{4,7,45},{7,7,146},{7,7,74},{7,7,25},{7,7,1},{7,7,146},{7,7,50},{7,7,1},
+{0,7,36},{7,7,50},{0,7,36},{6,0,1170},{6,0,1170},{6,0,1170},{6,0,1170},{5,7,145},{5,7,145},{5,7,145},{5,7,101},{4,7,90},{4,7,90},{6,7,2962},{6,7,2338},{6,7,1938},{6,7,1314},{6,7,2677},{6,7,1429},{6,7,1029},{5,7,85},{5,7,2536},{4,7,122},{7,7,531},{7,7,459},{7,7,410},{7,7,338},{7,7,771},{7,7,627},{6,7,404},{5,7,4},{7,7,827},
+{5,7,4},{6,7,1938},{6,7,1938},{6,7,1938},{6,7,1314},{6,7,1653},{6,7,1029},{6,7,1029},{5,7,85},{5,7,1512},{4,7,122},{7,7,410},{7,7,410},{7,7,410},{7,7,338},{7,7,650},{6,7,404},{6,7,404},{5,7,4},{6,7,596},{5,7,4},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{6,0,914},
+{6,0,914},{6,0,914},{6,0,914},{5,7,481},{5,7,481},{5,7,481},{5,7,85},{4,7,122},{4,7,122},{7,7,2924},{6,7,2338},{6,7,1938},{6,7,1314},{6,7,2373},{6,7,1125},{6,7,725},{5,7,325},{6,7,2132},{5,7,232},{7,7,323},{7,7,251},{7,7,202},{7,7,130},{7,7,467},{7,7,323},{7,7,274},{5,7,36},{7,7,459},{5,7,36},{6,7,1938},{6,7,1938},{6,7,1938},
+{6,7,1314},{6,7,1349},{6,7,725},{6,7,725},{5,7,325},{5,7,1256},{5,7,232},{7,7,202},{7,7,202},{7,7,202},{7,7,130},{7,7,346},{7,7,274},{7,7,274},{5,7,36},{7,7,410},{5,7,36},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{6,0,914},{6,0,914},{6,0,914},{6,0,914},{6,7,325},
+{6,7,325},{6,7,325},{5,7,325},{5,7,232},{5,7,232},{7,7,2092},{7,7,2020},{7,7,1971},{6,7,1570},{7,7,2140},{6,7,1077},{6,7,677},{6,7,85},{6,7,1588},{5,7,232},{7,7,243},{7,7,171},{7,7,122},{7,7,50},{7,7,291},{7,7,147},{7,7,98},{6,7,4},{7,7,219},{6,7,4},{7,7,1971},{7,7,1971},{7,7,1971},{6,7,1570},{6,7,1301},{6,7,677},{6,7,677},
+{6,7,85},{6,7,1188},{5,7,232},{7,7,122},{7,7,122},{7,7,122},{7,7,50},{7,7,170},{7,7,98},{7,7,98},{6,7,4},{7,7,170},{6,7,4},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{6,0,1170},{6,0,1170},{6,0,1170},{6,0,1170},{6,7,277},{6,7,277},{6,7,277},{6,7,85},{5,7,232},
+{5,7,232},{0,4,1618},{0,3,436},{0,2,74},{0,2,866},{0,2,3411},{0,2,2531},{0,1,1251},{0,1,2531},{0,1,3772},{0,1,2892},{0,4,1618},{0,3,436},{0,2,74},{0,2,866},{0,2,3411},{0,2,2531},{0,1,1251},{0,1,2531},{2,0,3376},{0,1,2531},{0,1,25},{0,1,25},{0,1,25},{0,1,49},{0,0,360},{0,0,232},{0,0,232},{0,0,149},{0,0,332},{0,0,185},{0,1,25},
+{0,1,25},{0,1,25},{0,1,49},{0,0,360},{0,0,232},{0,0,232},{0,0,149},{0,0,296},{0,0,149},{1,2,1570},{0,3,436},{0,2,74},{0,2,866},{1,2,1570},{0,2,1570},{0,2,866},{0,1,1570},{0,2,1570},{0,1,1570},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,4,1586},{0,3,340},{0,2,10},
+{0,2,450},{0,3,4016},{0,2,2627},{0,2,1411},{0,1,2691},{0,1,4572},{0,1,3052},{0,4,1586},{0,3,340},{0,2,10},{0,2,450},{1,1,3968},{0,2,2627},{0,2,1411},{0,1,2691},{2,0,4016},{0,1,2691},{0,2,1},{0,2,1},{0,2,1},{0,1,1},{0,1,530},{0,1,362},{0,1,362},{0,0,325},{0,0,620},{0,0,361},{0,2,1},{0,2,1},{0,2,1},{0,1,1},{0,1,530},
+{0,1,362},{0,1,362},{0,0,325},{0,0,584},{0,0,325},{2,1,1570},{0,3,340},{0,2,10},{0,2,450},{2,1,1570},{4,0,1570},{0,2,450},{0,1,1730},{4,0,1570},{0,1,1730},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,5,1576},{0,4,290},{0,2,202},{0,2,290},{0,3,4656},{0,2,2979},{0,2,1251},
+{0,1,3107},{0,1,5628},{0,1,3468},{0,5,1576},{0,4,290},{0,2,202},{0,2,290},{0,3,4656},{0,2,2979},{0,2,1251},{0,1,3107},{1,1,4656},{0,1,3107},{0,3,16},{0,3,16},{0,3,16},{0,1,81},{0,1,802},{0,1,442},{0,1,442},{0,0,629},{0,0,1036},{0,0,665},{0,3,16},{0,3,16},{0,3,16},{0,1,81},{0,1,802},{0,1,442},{0,1,442},{0,0,629},{1,0,818},
+{0,0,629},{3,0,1576},{0,4,290},{0,2,202},{0,2,290},{3,0,1576},{3,1,1576},{0,2,290},{0,1,2146},{3,1,1576},{0,1,2146},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,5,1640},{0,4,162},{0,3,241},{0,2,386},{0,4,5539},{0,3,3512},{0,2,1347},{0,1,3779},{0,2,6396},{0,1,4140},{0,5,1640},
+{0,4,162},{0,3,241},{0,2,386},{1,2,5435},{0,3,3512},{0,2,1347},{0,1,3779},{0,2,5435},{0,1,3779},{0,3,16},{0,3,16},{0,3,16},{0,2,25},{0,1,1202},{0,1,650},{0,1,650},{0,1,970},{0,0,1580},{0,0,1097},{0,3,16},{0,3,16},{0,3,16},{0,2,25},{1,0,1170},{0,1,650},{0,1,650},{0,1,970},{1,0,1154},{0,1,970},{2,2,1576},{0,4,162},{0,3,241},
+{0,2,386},{2,2,1576},{5,0,1576},{0,2,386},{0,2,1730},{5,0,1576},{0,2,1730},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,6,1667},{0,5,259},{0,3,286},{0,3,406},{0,4,5440},{0,3,3035},{0,2,1284},{0,2,3504},{0,2,6513},{0,2,4465},{0,6,1667},{0,5,259},{0,3,286},{0,3,406},{2,1,5424},
+{0,3,3035},{0,2,1284},{0,2,3504},{4,0,5424},{0,2,3504},{0,4,81},{0,4,81},{0,4,81},{0,2,97},{0,2,1160},{0,2,500},{0,2,500},{0,1,520},{0,1,1521},{0,1,881},{0,4,81},{0,4,81},{0,4,81},{0,2,97},{0,2,1160},{0,2,500},{0,2,500},{0,1,520},{0,1,1160},{0,1,520},{2,3,1576},{0,5,178},{1,3,100},{0,3,325},{2,3,1576},{6,0,1576},{0,3,325},
+{0,2,1568},{6,0,1576},{0,2,1568},{0,0,81},{0,0,81},{0,0,81},{0,0,81},{0,0,9},{0,0,9},{0,0,9},{0,0,1},{0,0,37},{0,0,37},{0,7,1865},{0,5,339},{0,3,734},{0,3,374},{0,5,5435},{0,3,3019},{0,3,814},{0,2,2992},{0,2,7025},{0,2,3953},{1,5,1865},{0,5,339},{1,3,293},{0,3,374},{3,0,5435},{0,3,3019},{0,3,814},{0,2,2992},{3,1,5435},
+{0,2,2992},{0,5,314},{0,5,314},{0,5,314},{0,3,370},{0,3,1170},{0,2,340},{0,2,340},{0,1,392},{0,1,1713},{0,1,753},{1,3,289},{1,3,289},{1,3,289},{1,2,289},{1,1,1154},{0,2,340},{0,2,340},{0,1,392},{1,1,1170},{0,1,392},{4,0,1576},{0,5,50},{1,3,4},{0,3,85},{4,0,1576},{5,1,1576},{0,3,85},{0,2,1696},{5,1,1576},{0,2,1696},{0,0,289},
+{0,0,289},{0,0,289},{0,0,289},{0,1,4},{0,1,4},{0,1,4},{0,0,49},{0,0,85},{0,0,85},{0,7,2265},{0,6,787},{1,3,1401},{0,3,726},{0,5,5515},{0,4,2664},{0,3,462},{0,2,2864},{0,3,7363},{0,2,3825},{1,6,1667},{1,5,405},{1,3,245},{1,3,377},{2,2,5427},{0,4,2664},{0,3,462},{0,2,2864},{5,0,5427},{0,2,2864},{0,5,634},{0,5,634},{0,5,634},
+{0,3,626},{0,3,1202},{0,3,362},{0,3,362},{0,1,520},{0,2,2145},{0,1,881},{1,4,106},{1,4,106},{1,4,106},{1,2,145},{2,0,1154},{0,3,362},{0,3,362},{0,1,520},{3,0,1154},{0,1,520},{2,4,1576},{0,6,162},{1,3,164},{0,3,101},{2,4,1576},{7,0,1576},{0,3,101},{0,2,2080},{7,0,1576},{0,2,2080},{0,0,625},{0,0,625},{0,0,625},{0,0,625},{0,1,36},
+{0,1,36},{0,1,36},{0,1,36},{0,0,261},{0,0,261},{1,6,2775},{0,6,1091},{1,4,1422},{0,3,1462},{0,6,5427},{0,4,2536},{0,3,494},{0,2,3120},{0,3,7635},{0,2,4081},{1,6,1619},{1,5,165},{1,4,266},{1,3,361},{3,1,5427},{0,4,2536},{0,3,494},{0,2,3120},{4,1,5427},{0,2,3120},{0,6,1090},{0,6,1090},{0,6,1090},{0,3,1138},{0,4,1152},{0,3,170},{0,3,170},
+{0,2,416},{0,2,2145},{0,1,1265},{1,4,10},{1,4,10},{1,4,10},{1,3,37},{0,4,1152},{0,3,170},{0,3,170},{0,2,416},{2,1,1152},{0,2,416},{3,3,1570},{0,6,2},{1,4,265},{1,3,360},{3,3,1570},{6,1,1570},{1,3,360},{0,3,1768},{6,1,1570},{0,3,1768},{0,0,1089},{0,0,1089},{0,0,1089},{0,0,1089},{0,2,4},{0,2,4},{0,2,4},{0,1,4},{0,1,365},
+{0,1,365},{1,7,2796},{1,6,1432},{1,4,1413},{1,4,1593},{0,7,5435},{0,5,2360},{0,4,299},{0,3,2594},{0,3,8400},{0,3,4530},{1,7,1640},{1,6,276},{1,4,257},{1,4,437},{4,0,5427},{0,5,2360},{0,4,299},{0,3,2594},{3,2,5427},{0,3,2594},{1,5,1221},{1,5,1221},{1,5,1221},{1,3,1229},{0,5,1184},{0,4,250},{0,4,250},{0,2,146},{0,2,2451},{0,2,1107},{1,5,65},
+{1,5,65},{1,5,65},{1,3,73},{1,3,1154},{0,4,250},{0,4,250},{0,2,146},{1,2,1154},{0,2,146},{4,2,1576},{0,6,128},{2,4,130},{0,4,74},{4,2,1576},{5,2,1576},{0,4,74},{0,3,1570},{5,2,1576},{0,3,1570},{1,0,1220},{1,0,1220},{1,0,1220},{1,0,1220},{0,3,4},{0,3,4},{0,3,4},{0,1,121},{0,1,482},{0,1,482},{1,7,3180},{1,6,1464},{1,4,1813},
+{1,4,1513},{0,7,5515},{0,5,2168},{0,4,59},{0,3,2242},{0,4,8764},{0,3,4178},{2,6,1894},{1,6,308},{2,4,326},{1,4,357},{2,4,5427},{0,5,2168},{0,4,59},{0,3,2242},{7,0,5427},{0,3,2242},{1,6,1448},{1,6,1448},{1,6,1448},{1,4,1512},{0,5,1184},{0,4,58},{0,4,58},{0,2,178},{0,2,2995},{0,2,1139},{1,6,292},{1,6,292},{1,6,292},{2,3,325},{2,2,1152},
+{0,4,58},{0,4,58},{0,2,178},{5,0,1152},{0,2,178},{5,1,1570},{0,7,34},{2,4,2},{0,4,10},{5,1,1570},{4,3,1570},{0,4,10},{0,3,1666},{4,3,1570},{0,3,1666},{1,0,1412},{1,0,1412},{1,0,1412},{1,0,1412},{0,3,36},{0,3,36},{0,3,36},{0,2,9},{0,1,722},{0,1,722},{1,7,3816},{1,6,1748},{1,5,2450},{1,4,1685},{0,7,5983},{0,6,2180},{0,4,207},
+{0,3,2278},{0,4,9004},{0,3,4038},{2,7,1700},{2,6,462},{2,4,230},{2,4,406},{3,3,5420},{0,6,2176},{0,4,203},{0,3,2274},{6,1,5420},{0,3,2274},{1,6,1604},{1,6,1604},{1,6,1604},{1,4,1604},{0,6,1156},{0,4,126},{0,4,126},{0,3,429},{0,3,3044},{0,2,1307},{2,5,136},{2,5,136},{2,5,136},{2,3,149},{3,1,1152},{0,4,122},{0,4,122},{0,3,425},{4,1,1152},
+{0,3,425},{6,0,1570},{0,7,130},{2,4,130},{1,4,85},{6,0,1570},{3,4,1570},{1,4,85},{0,3,2018},{3,4,1570},{0,3,2018},{1,0,1600},{1,0,1600},{1,0,1600},{1,0,1600},{0,4,5},{0,4,5},{0,4,5},{0,2,29},{0,2,866},{0,2,866},{1,7,4520},{1,7,1608},{1,5,2418},{1,4,1925},{1,7,5996},{0,6,1956},{0,5,409},{0,4,2751},{0,5,9020},{0,3,3846},{2,7,1604},
+{2,6,174},{2,5,297},{2,4,342},{4,2,5420},{0,6,1856},{0,5,309},{0,4,2651},{5,2,5420},{0,4,2651},{1,7,1604},{1,7,1604},{1,7,1604},{1,4,1636},{0,7,1302},{0,5,120},{0,5,120},{0,3,189},{0,3,2820},{0,3,1245},{2,5,8},{2,5,8},{2,5,8},{2,4,53},{4,0,1154},{0,5,20},{0,5,20},{0,3,89},{3,2,1154},{0,3,89},{4,4,1568},{1,7,8},{2,5,293},
+{0,5,293},{4,4,1568},{7,2,1568},{0,5,293},{0,4,1810},{7,2,1568},{0,4,1810},{1,0,1600},{1,0,1600},{1,0,1600},{1,0,1600},{0,5,116},{0,5,116},{0,5,116},{0,3,164},{0,2,610},{0,2,610},{2,7,4356},{1,7,2004},{2,5,2635},{1,5,2006},{1,7,5924},{0,6,2316},{0,5,499},{0,4,2337},{0,5,8300},{0,4,3420},{2,7,1955},{2,7,299},{2,5,234},{2,5,474},{5,1,5420},
+{0,6,1955},{0,5,138},{0,4,1976},{4,3,5420},{0,4,1976},{1,7,1955},{1,7,1955},{1,7,1955},{1,5,1942},{1,6,1427},{0,5,435},{0,5,435},{0,3,378},{0,4,2628},{0,3,642},{2,6,53},{2,6,53},{2,6,53},{2,4,53},{2,4,1152},{0,5,74},{0,5,74},{0,3,17},{7,0,1152},{0,3,17},{5,3,1570},{1,7,98},{3,5,164},{1,5,100},{5,3,1570},{6,3,1570},{1,5,100},
+{0,4,1576},{6,3,1570},{0,4,1576},{1,0,1906},{1,0,1906},{1,0,1906},{1,0,1906},{1,4,234},{1,4,234},{1,4,234},{1,2,325},{0,3,626},{0,3,626},{2,7,4356},{2,7,1964},{2,5,2267},{2,5,2027},{1,7,6404},{1,6,2220},{1,5,117},{1,4,2314},{0,5,8204},{0,4,2684},{3,7,1929},{2,7,283},{3,5,365},{2,5,346},{6,0,5420},{0,7,1712},{1,5,68},{0,4,1784},{3,4,5420},
+{0,4,1784},{2,6,1942},{2,6,1942},{2,6,1942},{2,4,2006},{1,6,1219},{1,5,117},{1,5,117},{1,3,209},{0,4,2340},{0,3,514},{2,6,261},{2,6,261},{2,6,261},{2,4,325},{3,3,1154},{0,6,50},{0,6,50},{1,3,160},{6,1,1154},{1,3,160},{6,2,1568},{2,7,58},{3,5,4},{1,5,4},{6,2,1568},{5,4,1568},{1,5,4},{0,4,1640},{5,4,1568},{0,4,1640},{2,0,1906},
+{2,0,1906},{2,0,1906},{2,0,1906},{1,4,74},{1,4,74},{1,4,74},{1,3,65},{0,3,370},{0,3,370},{2,7,4868},{2,7,1740},{2,5,2411},{2,5,1691},{2,7,6548},{1,7,2244},{1,5,165},{1,4,2250},{0,6,7668},{0,4,2460},{3,7,1817},{3,7,525},{3,5,221},{3,5,441},{4,4,5419},{0,7,1712},{1,5,164},{0,4,1976},{7,2,5419},{0,4,1976},{2,7,1619},{2,7,1619},{2,7,1619},
+{2,5,1627},{1,7,1155},{1,5,101},{1,5,101},{1,3,417},{0,5,2379},{0,4,696},{3,5,157},{3,5,157},{3,5,157},{3,4,157},{4,2,1154},{0,6,82},{0,6,82},{0,4,212},{5,2,1154},{0,4,212},{7,1,1568},{2,7,122},{3,5,100},{2,5,73},{7,1,1568},{4,5,1568},{2,5,73},{0,4,1960},{4,5,1568},{0,4,1960},{2,0,1618},{2,0,1618},{2,0,1618},{2,0,1618},{1,5,1},
+{1,5,1},{1,5,1},{1,3,17},{0,3,370},{0,3,370},{3,7,5570},{2,7,2028},{2,6,2394},{2,5,1867},{2,7,6452},{1,7,1956},{1,6,439},{1,4,2698},{0,6,7348},{0,4,2748},{3,7,2089},{3,7,189},{3,6,334},{3,5,329},{5,3,5419},{1,7,1875},{1,6,358},{0,5,2145},{6,3,5419},{0,5,2145},{2,7,1667},{2,7,1667},{2,7,1667},{2,5,1611},{1,7,1331},{1,6,115},{1,6,115},
+{1,4,198},{0,5,1979},{0,4,248},{3,6,10},{3,6,10},{3,6,10},{3,5,73},{5,1,1160},{1,6,34},{1,6,34},{0,4,52},{7,1,1160},{0,4,52},{5,5,1570},{3,7,180},{4,5,320},{2,5,281},{5,5,1570},{3,6,1570},{2,5,281},{0,5,1856},{3,6,1570},{0,5,1856},{2,0,1586},{2,0,1586},{2,0,1586},{2,0,1586},{1,6,106},{1,6,106},{1,6,106},{1,4,162},{0,4,212},
+{0,4,212},{3,7,5354},{3,7,2770},{3,6,2717},{2,6,1986},{2,7,6956},{1,7,2244},{1,6,457},{1,5,2351},{0,7,7268},{0,5,1974},{4,7,2384},{3,7,270},{3,6,217},{3,6,517},{6,2,5419},{1,7,1920},{1,6,133},{0,5,1650},{5,4,5419},{0,5,1650},{2,7,2180},{2,7,2180},{2,7,2180},{2,6,1905},{2,7,1480},{1,6,376},{1,6,376},{1,4,333},{0,5,1988},{0,4,203},{3,7,45},
+{3,7,45},{3,7,45},{3,5,37},{6,0,1154},{1,6,52},{1,6,52},{1,4,9},{3,4,1154},{1,4,9},{6,4,1568},{3,7,234},{3,6,181},{1,6,117},{6,4,1568},{7,4,1568},{1,6,117},{0,5,1586},{7,4,1568},{0,5,1586},{2,0,1856},{2,0,1856},{2,0,1856},{2,0,1856},{2,5,272},{2,5,272},{2,5,272},{1,4,324},{0,4,194},{0,4,194},{3,7,5706},{3,7,2514},{3,6,2285},
+{3,6,2105},{3,7,7359},{2,7,2244},{2,6,147},{2,5,2358},{0,7,6820},{0,5,1718},{4,7,2256},{3,7,750},{4,6,410},{3,6,341},{7,1,5419},{2,7,2180},{2,6,83},{0,5,1618},{4,5,5419},{0,5,1618},{3,7,1985},{3,7,1985},{3,7,1985},{3,5,2041},{2,7,1224},{2,6,146},{2,6,146},{2,4,210},{0,6,1644},{1,4,509},{3,7,221},{3,7,221},{3,7,221},{3,5,277},{5,2,1160},
+{1,7,64},{1,7,64},{2,4,146},{7,2,1160},{2,4,146},{7,3,1570},{4,7,356},{4,6,10},{2,6,2},{7,3,1570},{6,5,1570},{2,6,2},{0,5,1618},{6,5,1570},{0,5,1618},{3,0,1960},{3,0,1960},{3,0,1960},{3,0,1960},{2,5,80},{2,5,80},{2,5,80},{2,4,89},{0,5,100},{0,5,100},{3,7,6570},{3,7,2770},{3,6,2365},{3,6,1705},{3,7,7311},{2,7,2276},{2,6,131},
+{2,5,2230},{0,7,6884},{0,5,1974},{4,7,2512},{4,7,532},{4,6,218},{4,6,482},{5,5,5424},{2,7,2276},{2,6,131},{1,5,1931},{3,6,5424},{1,5,1931},{3,7,1809},{3,7,1809},{3,7,1809},{3,6,1656},{2,7,1352},{2,6,82},{2,6,82},{2,4,370},{0,6,1548},{0,5,293},{4,6,169},{4,6,169},{4,6,169},{4,5,169},{6,1,1160},{1,7,64},{1,7,64},{1,5,250},{6,3,1160},
+{1,5,250},{5,7,1570},{4,7,388},{4,6,74},{3,6,65},{5,7,1570},{5,6,1570},{3,6,65},{0,5,1906},{5,6,1570},{0,5,1906},{3,0,1640},{3,0,1640},{3,0,1640},{3,0,1640},{2,6,1},{2,6,1},{2,6,1},{2,4,9},{0,5,68},{0,5,68},{4,7,6752},{3,7,3538},{3,7,2378},{3,6,1817},{3,7,7775},{2,7,2820},{2,7,477},{2,5,2614},{1,7,7360},{0,6,1978},{5,7,3110},
+{4,7,692},{4,7,377},{4,6,322},{6,4,5424},{3,7,2539},{2,7,413},{0,6,1942},{7,4,5424},{0,6,1942},{3,7,2017},{3,7,2017},{3,7,2017},{3,6,1592},{3,7,1846},{2,7,116},{2,7,116},{2,5,213},{0,7,1531},{1,5,283},{4,7,16},{4,7,16},{4,7,16},{4,6,97},{7,0,1154},{2,7,52},{2,7,52},{1,5,58},{3,5,1154},{1,5,58},{7,4,1576},{5,7,610},{5,6,306},
+{3,6,241},{7,4,1576},{7,5,1576},{3,6,241},{0,6,1906},{7,5,1576},{0,6,1906},{3,0,1576},{3,0,1576},{3,0,1576},{3,0,1576},{2,7,100},{2,7,100},{2,7,100},{2,5,164},{0,6,72},{0,6,72},{4,7,6932},{4,7,3932},{4,7,2807},{3,7,1974},{4,7,8428},{3,7,2981},{2,7,423},{2,6,2373},{1,7,7540},{0,6,1618},{5,7,2921},{4,7,1331},{4,7,206},{4,7,566},{7,3,5424},
+{3,7,2692},{2,7,134},{0,6,1609},{6,5,5424},{0,6,1609},{4,7,2707},{4,7,2707},{4,7,2707},{3,7,1874},{3,7,1513},{2,7,323},{2,7,323},{2,5,294},{0,7,1324},{1,5,184},{4,7,106},{4,7,106},{4,7,106},{4,6,25},{7,1,1160},{2,7,34},{2,7,34},{2,5,5},{7,3,1160},{2,5,5},{7,5,1570},{5,7,628},{4,7,181},{2,7,125},{7,5,1570},{6,6,1586},{2,7,125},
+{0,6,1600},{6,6,1586},{0,6,1600},{3,0,1810},{3,0,1810},{3,0,1810},{3,0,1810},{2,7,298},{2,7,298},{2,7,298},{2,5,290},{0,6,18},{0,6,18},{4,7,7636},{4,7,4156},{4,7,2311},{4,7,2191},{4,7,8428},{3,7,3317},{3,7,185},{3,6,2410},{2,7,8180},{1,6,1722},{5,7,3161},{5,7,1357},{5,7,461},{4,7,342},{5,7,5424},{4,7,2979},{3,7,104},{1,6,1601},{5,6,5424},
+{1,6,1601},{4,7,2307},{4,7,2307},{4,7,2307},{4,6,2082},{3,7,1625},{3,7,181},{3,7,181},{3,5,217},{0,7,1548},{1,5,504},{5,7,457},{5,7,457},{5,7,457},{4,6,233},{6,3,1154},{3,7,100},{3,7,100},{3,5,136},{6,4,1154},{3,5,136},{6,7,1576},{5,7,916},{5,7,20},{3,7,4},{6,7,1576},{7,6,1576},{3,7,4},{0,6,1600},{7,6,1576},{0,6,1600},{4,0,2018},
+{4,0,2018},{4,0,2018},{4,0,2018},{3,6,90},{3,6,90},{3,6,90},{3,5,117},{0,7,104},{0,7,104},{5,7,7862},{4,7,4316},{4,7,2291},{4,7,1691},{4,7,8004},{3,7,3433},{3,7,69},{3,6,1774},{2,7,7580},{1,6,1470},{5,7,3101},{5,7,1209},{5,7,185},{4,7,466},{7,4,4803},{4,7,2579},{3,7,68},{2,6,1448},{7,5,4803},{2,6,1448},{4,7,2291},{4,7,2291},{4,7,2291},
+{4,7,1691},{3,7,2121},{3,7,69},{3,7,69},{3,5,329},{1,7,1539},{1,6,314},{5,7,185},{5,7,185},{5,7,185},{5,6,185},{7,2,1154},{3,7,68},{3,7,68},{2,6,292},{5,5,1154},{2,6,292},{7,6,1252},{6,7,724},{5,7,16},{4,7,25},{7,6,1252},{7,6,1268},{4,7,25},{0,6,1412},{7,6,1268},{0,6,1412},{4,0,1666},{4,0,1666},{4,0,1666},{4,0,1666},{3,7,5},
+{3,7,5},{3,7,5},{3,5,5},{0,7,40},{0,7,40},{5,7,6806},{4,7,4684},{4,7,2659},{4,7,1579},{4,7,7668},{4,7,2988},{3,7,341},{3,6,1390},{2,7,7084},{1,6,1470},{6,7,2645},{5,7,1193},{5,7,169},{5,7,125},{6,6,4059},{4,7,2259},{4,7,234},{2,6,1224},{4,7,4059},{2,6,1224},{4,7,2659},{4,7,2659},{4,7,2659},{4,7,1579},{4,7,2043},{3,7,341},{3,7,341},
+{3,6,234},{1,7,1779},{1,6,314},{5,7,169},{5,7,169},{5,7,169},{5,7,125},{5,6,1152},{4,7,234},{4,7,234},{2,6,68},{4,6,1152},{2,6,68},{7,6,900},{6,7,500},{6,7,100},{4,7,9},{7,6,900},{6,7,884},{4,7,9},{0,6,1220},{6,7,884},{0,6,1220},{4,0,1570},{4,0,1570},{4,0,1570},{4,0,1570},{3,7,85},{3,7,85},{3,7,85},{3,5,149},{1,7,98},
+{1,7,98},{5,7,6077},{5,7,4185},{5,7,3161},{4,7,1912},{5,7,6790},{4,7,2529},{4,7,504},{3,6,1417},{3,7,6199},{1,7,1097},{6,7,1925},{6,7,1301},{5,7,457},{5,7,17},{7,5,3321},{5,7,1754},{4,7,180},{1,7,1093},{6,6,3345},{1,7,1093},{5,7,3161},{5,7,3161},{5,7,3161},{4,7,1912},{4,7,2124},{4,7,504},{4,7,504},{3,6,261},{2,7,1944},{2,6,171},{5,7,457},
+{5,7,457},{5,7,457},{5,7,17},{6,5,1154},{4,7,180},{4,7,180},{3,6,5},{3,7,1154},{3,6,5},{7,7,605},{6,7,401},{6,7,1},{5,7,1},{7,7,605},{6,7,569},{5,7,1},{0,7,1089},{6,7,569},{0,7,1089},{4,0,1768},{4,0,1768},{4,0,1768},{4,0,1768},{4,7,360},{4,7,360},{4,7,360},{3,6,260},{1,7,8},{1,7,8},{5,7,5837},{5,7,3945},{5,7,2921},
+{5,7,2129},{5,7,6054},{4,7,2529},{4,7,504},{4,6,1386},{3,7,5767},{2,7,773},{6,7,1557},{6,7,933},{6,7,533},{5,7,193},{6,7,2754},{5,7,1434},{4,7,404},{2,7,629},{5,7,2754},{2,7,629},{5,7,2921},{5,7,2921},{5,7,2921},{5,7,2129},{4,7,2604},{4,7,504},{4,7,504},{4,6,230},{2,7,2264},{2,6,443},{6,7,533},{6,7,533},{6,7,533},{5,7,193},{7,4,1152},
+{4,7,404},{4,7,404},{3,6,117},{7,5,1152},{3,6,117},{7,7,317},{7,7,245},{6,7,49},{6,7,25},{7,7,317},{7,7,373},{6,7,25},{0,7,625},{7,7,373},{0,7,625},{5,0,2080},{5,0,2080},{5,0,2080},{5,0,2080},{4,7,104},{4,7,104},{4,7,104},{4,6,149},{2,7,148},{2,7,148},{5,7,5981},{5,7,4089},{5,7,3065},{5,7,1921},{5,7,5702},{5,7,2666},{4,7,888},
+{4,7,693},{4,7,5325},{2,7,341},{6,7,1445},{6,7,821},{6,7,421},{6,7,205},{7,6,2306},{5,7,1370},{5,7,346},{2,7,325},{7,6,2274},{2,7,325},{5,7,3065},{5,7,3065},{5,7,3065},{5,7,1921},{5,7,2786},{4,7,888},{4,7,888},{4,6,294},{3,7,2355},{2,7,341},{6,7,421},{6,7,421},{6,7,421},{6,7,205},{7,5,1184},{5,7,346},{5,7,346},{4,6,290},{6,6,1152},
+{4,6,290},{7,7,157},{7,7,85},{7,7,36},{6,7,9},{7,7,157},{7,7,149},{6,7,9},{0,7,289},{7,7,149},{0,7,289},{5,0,1696},{5,0,1696},{5,0,1696},{5,0,1696},{4,7,104},{4,7,104},{4,7,104},{4,6,5},{2,7,52},{2,7,52},{6,7,5433},{5,7,4617},{5,7,3593},{5,7,2097},{5,7,5734},{5,7,2346},{5,7,1322},{4,7,261},{4,7,4909},{2,7,293},{6,7,1589},
+{6,7,965},{6,7,565},{6,7,157},{7,6,1890},{6,7,1146},{5,7,538},{3,7,82},{6,7,1890},{3,7,82},{5,7,3593},{5,7,3593},{5,7,3593},{5,7,2097},{5,7,2818},{5,7,1322},{5,7,1322},{4,7,261},{3,7,2691},{2,7,293},{6,7,565},{6,7,565},{6,7,565},{6,7,157},{6,7,1154},{5,7,538},{5,7,538},{3,7,82},{5,7,1154},{3,7,82},{7,7,125},{7,7,53},{7,7,4},
+{7,7,4},{7,7,125},{7,7,53},{7,7,4},{0,7,81},{7,7,53},{0,7,81},{5,0,1568},{5,0,1568},{5,0,1568},{5,0,1568},{4,7,360},{4,7,360},{4,7,360},{4,6,117},{2,7,212},{2,7,212},{6,7,4866},{6,7,4242},{6,7,3842},{5,7,2754},{6,7,5113},{5,7,2445},{5,7,1421},{4,7,234},{5,7,4804},{3,7,164},{7,7,1203},{7,7,1131},{6,7,1033},{6,7,409},{7,7,1611},
+{6,7,1056},{6,7,656},{4,7,9},{6,7,1584},{4,7,9},{6,7,3842},{6,7,3842},{6,7,3842},{5,7,2754},{5,7,3313},{5,7,1421},{5,7,1421},{4,7,234},{4,7,2875},{3,7,164},{6,7,1033},{6,7,1033},{6,7,1033},{6,7,409},{7,6,1152},{6,7,656},{6,7,656},{4,7,9},{7,6,1184},{4,7,9},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},
+{0,7,0},{7,7,98},{0,7,0},{5,0,1730},{5,0,1730},{5,0,1730},{5,0,1730},{5,7,397},{5,7,397},{5,7,397},{4,7,234},{3,7,164},{3,7,164},{6,7,4194},{6,7,3570},{6,7,3170},{6,7,2546},{6,7,4137},{5,7,2365},{5,7,1341},{5,7,185},{5,7,3876},{3,7,324},{7,7,771},{7,7,699},{7,7,650},{6,7,521},{7,7,1083},{6,7,864},{6,7,464},{4,7,25},{7,7,1187},
+{4,7,25},{6,7,3170},{6,7,3170},{6,7,3170},{6,7,2546},{6,7,3113},{5,7,1341},{5,7,1341},{5,7,185},{4,7,2491},{3,7,324},{7,7,650},{7,7,650},{7,7,650},{6,7,521},{7,6,832},{6,7,464},{6,7,464},{4,7,25},{6,7,800},{4,7,25},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{5,0,2146},
+{5,0,2146},{5,0,2146},{5,0,2146},{5,7,317},{5,7,317},{5,7,317},{5,7,185},{3,7,324},{3,7,324},{6,7,3778},{6,7,3154},{6,7,2754},{6,7,2130},{6,7,3417},{6,7,2169},{5,7,1517},{5,7,9},{5,7,3204},{4,7,338},{7,7,467},{7,7,395},{7,7,346},{7,7,274},{7,7,683},{7,7,539},{6,7,400},{5,7,0},{7,7,723},{5,7,0},{6,7,2754},{6,7,2754},{6,7,2754},
+{6,7,2130},{6,7,2393},{5,7,1517},{5,7,1517},{5,7,9},{5,7,2180},{4,7,338},{7,7,346},{7,7,346},{7,7,346},{7,7,274},{7,7,562},{6,7,400},{6,7,400},{5,7,0},{6,7,544},{5,7,0},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{6,0,1730},{6,0,1730},{6,0,1730},{6,0,1730},{5,7,493},
+{5,7,493},{5,7,493},{5,7,9},{4,7,338},{4,7,338},{6,7,3618},{6,7,2994},{6,7,2594},{6,7,1970},{6,7,2953},{6,7,1705},{6,7,1305},{5,7,89},{5,7,2788},{4,7,466},{7,7,291},{7,7,219},{7,7,170},{7,7,98},{7,7,411},{7,7,267},{7,7,218},{6,7,16},{7,7,387},{6,7,16},{6,7,2594},{6,7,2594},{6,7,2594},{6,7,1970},{6,7,1929},{6,7,1305},{6,7,1305},
+{5,7,89},{5,7,1764},{4,7,466},{7,7,170},{7,7,170},{7,7,170},{7,7,98},{7,7,290},{7,7,218},{7,7,218},{6,7,16},{7,7,338},{6,7,16},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{6,0,1570},{6,0,1570},{6,0,1570},{6,0,1570},{6,7,905},{6,7,905},{6,7,905},{5,7,89},{4,7,466},
+{4,7,466},{0,5,2665},{0,4,697},{0,3,290},{0,2,841},{0,3,5901},{0,2,4170},{0,2,1802},{0,1,4310},{0,1,6951},{0,1,4671},{0,5,2665},{0,4,697},{0,3,290},{0,2,841},{2,0,5893},{0,2,4170},{0,2,1802},{0,1,4310},{3,0,5893},{0,1,4310},{0,2,4},{0,2,4},{0,2,4},{0,1,4},{0,1,557},{0,1,365},{0,1,365},{0,0,356},{0,0,665},{0,0,392},{0,2,4},
+{0,2,4},{0,2,4},{0,1,4},{0,1,557},{0,1,365},{0,1,365},{0,0,356},{0,0,629},{0,0,356},{3,0,2665},{0,4,697},{0,3,290},{0,2,841},{3,0,2665},{3,1,2665},{0,2,841},{0,2,3145},{3,1,2665},{0,2,3145},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,6,2705},{0,4,617},{0,3,34},
+{0,2,985},{0,4,6754},{0,3,4625},{0,2,1946},{0,1,5030},{0,2,7635},{0,1,5391},{0,6,2705},{0,4,617},{0,3,34},{0,2,985},{1,2,6674},{0,3,4625},{0,2,1946},{0,1,5030},{0,2,6674},{0,1,5030},{0,3,9},{0,3,9},{0,3,9},{0,1,100},{0,1,845},{0,1,461},{0,1,461},{0,0,676},{0,0,1097},{0,0,712},{0,3,9},{0,3,9},{0,3,9},{0,1,100},{0,1,845},
+{0,1,461},{0,1,461},{0,0,676},{1,0,853},{0,0,676},{1,4,2665},{0,4,617},{0,3,34},{0,2,985},{1,4,2665},{2,2,2665},{0,2,985},{0,2,2777},{2,2,2665},{0,2,2777},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,6,2689},{0,5,477},{0,3,34},{0,3,634},{0,4,7538},{0,3,4865},{0,2,2346},
+{0,2,5474},{0,2,8547},{0,1,6367},{0,6,2689},{0,5,477},{0,3,34},{0,3,634},{0,4,7538},{0,3,4865},{0,2,2346},{0,2,5474},{2,1,7538},{0,2,5474},{0,3,25},{0,3,25},{0,3,25},{0,2,16},{0,1,1261},{0,1,685},{0,1,685},{0,1,965},{0,1,1646},{0,0,1160},{0,3,25},{0,3,25},{0,3,25},{0,2,16},{1,0,1213},{0,1,685},{0,1,685},{0,1,965},{1,0,1205},
+{0,1,965},{2,3,2669},{0,5,477},{0,3,34},{0,3,634},{2,3,2669},{6,0,2669},{0,3,634},{0,2,2665},{6,0,2669},{0,2,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,7,2669},{0,5,365},{0,3,290},{0,3,410},{0,4,8578},{0,3,5361},{0,3,2346},{0,2,5618},{0,2,9715},{0,2,6579},{0,7,2669},
+{0,5,365},{0,3,290},{0,3,410},{2,1,8498},{0,3,5361},{0,3,2346},{0,2,5618},{4,0,8498},{0,2,5618},{0,4,0},{0,4,0},{0,4,0},{0,2,16},{0,2,1637},{0,2,977},{0,2,977},{0,1,997},{0,1,1998},{0,1,1358},{0,4,0},{0,4,0},{0,4,0},{0,2,16},{1,0,1629},{0,2,977},{0,2,977},{0,1,997},{0,1,1637},{0,1,997},{3,2,2669},{0,5,365},{1,3,185},
+{0,3,410},{3,2,2669},{5,1,2669},{0,3,410},{0,2,2809},{5,1,2669},{0,2,2809},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,7,2777},{0,6,257},{0,4,277},{0,3,464},{0,5,9677},{0,4,6026},{0,3,2400},{0,2,6086},{0,2,11335},{0,2,7047},{0,7,2777},{0,6,257},{0,4,277},{0,3,464},{3,0,9677},
+{0,4,6026},{0,3,2400},{0,2,6086},{3,1,9677},{0,2,6086},{0,5,16},{0,5,16},{0,5,16},{0,3,64},{0,2,2186},{0,2,1130},{0,2,1130},{0,1,1186},{0,1,2547},{0,1,1547},{0,5,16},{0,5,16},{0,5,16},{0,3,64},{0,2,2186},{0,2,1130},{0,2,1130},{0,1,1186},{0,1,2186},{0,1,1186},{4,1,2669},{0,6,257},{0,4,277},{0,3,464},{4,1,2669},{4,2,2669},{0,3,464},
+{0,3,3209},{4,2,2669},{0,3,3209},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,7,3209},{0,6,209},{0,4,389},{0,4,541},{0,6,9765},{0,4,5546},{0,3,2080},{0,2,5990},{0,3,11621},{0,2,6951},{0,7,3209},{0,6,209},{0,4,389},{0,4,541},{1,4,9677},{0,4,5546},{0,3,2080},{0,2,5990},{2,2,9677},
+{0,2,5990},{0,5,80},{0,5,80},{0,5,80},{0,3,64},{0,3,2196},{0,2,970},{0,2,970},{0,1,1058},{0,1,2739},{0,1,1419},{0,5,80},{0,5,80},{0,5,80},{0,3,64},{1,1,2180},{0,2,970},{0,2,970},{0,1,1058},{1,1,2196},{0,1,1058},{5,0,2669},{0,6,145},{1,4,52},{0,4,477},{5,0,2669},{3,3,2669},{0,4,477},{0,3,2809},{3,3,2669},{0,3,2809},{0,0,64},
+{0,0,64},{0,0,64},{0,0,64},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,40},{0,0,40},{0,7,4025},{0,7,441},{0,4,885},{0,4,429},{0,6,9685},{0,4,5450},{0,4,1854},{0,3,6029},{0,3,11925},{0,2,7239},{1,7,3001},{0,7,441},{1,4,344},{0,4,429},{3,1,9685},{0,4,5450},{0,4,1854},{0,3,6029},{4,1,9685},{0,3,6029},{0,6,256},{0,6,256},{0,6,256},
+{0,3,320},{0,3,2228},{0,3,848},{0,3,848},{0,1,1186},{0,2,3171},{0,1,1547},{0,6,256},{0,6,256},{0,6,256},{0,3,320},{2,0,2180},{0,3,848},{0,3,848},{0,1,1186},{3,0,2180},{0,1,1186},{4,2,2677},{0,7,185},{1,4,20},{0,4,173},{4,2,2677},{7,1,2677},{0,4,173},{0,3,2665},{7,1,2677},{0,3,2665},{0,0,256},{0,0,256},{0,0,256},{0,0,256},{0,1,9},
+{0,1,9},{0,1,9},{0,0,36},{0,0,72},{0,0,72},{1,7,4141},{0,7,617},{1,4,1500},{0,4,701},{0,7,9690},{0,5,5001},{0,4,1214},{0,3,5277},{0,3,12613},{0,3,7213},{1,7,2985},{1,6,465},{1,4,344},{1,4,524},{4,0,9674},{0,5,5001},{0,4,1214},{0,3,5277},{3,2,9674},{0,3,5277},{0,7,601},{0,7,601},{0,7,601},{0,4,601},{0,4,2178},{0,3,656},{0,3,656},
+{0,2,866},{0,2,3171},{0,2,1827},{1,5,101},{1,5,101},{1,5,101},{1,3,109},{0,4,2178},{0,3,656},{0,3,656},{0,2,866},{2,1,2178},{0,2,866},{5,1,2669},{0,7,41},{2,4,181},{0,4,125},{5,1,2669},{4,3,2669},{0,4,125},{0,3,2777},{4,3,2669},{0,3,2777},{0,0,576},{0,0,576},{0,0,576},{0,0,576},{0,1,25},{0,1,25},{0,1,25},{0,1,49},{0,0,232},
+{0,0,232},{1,7,4582},{0,7,1274},{1,5,1446},{0,4,1466},{0,7,9789},{0,5,4794},{0,4,953},{0,3,4890},{0,4,13038},{0,3,6826},{1,7,3426},{1,7,290},{1,5,290},{1,4,443},{2,4,9685},{0,5,4794},{0,4,953},{0,3,4890},{7,0,9685},{0,3,4890},{0,7,1105},{0,7,1105},{0,7,1105},{0,4,1105},{0,5,2210},{0,4,592},{0,4,592},{0,2,596},{0,2,3477},{0,2,1557},{1,6,26},
+{1,6,26},{1,6,26},{1,4,82},{1,3,2180},{0,4,592},{0,4,592},{0,2,596},{1,2,2180},{0,2,596},{6,0,2677},{0,7,185},{1,5,289},{0,4,377},{6,0,2677},{5,3,2677},{0,4,377},{0,3,3209},{5,3,2677},{0,3,3209},{0,0,1089},{0,0,1089},{0,0,1089},{0,0,1089},{0,2,4},{0,2,4},{0,2,4},{0,1,4},{0,1,365},{0,1,365},{1,7,5382},{1,7,1350},{1,5,1510},
+{1,5,1738},{0,7,10285},{0,6,4406},{0,5,971},{0,3,4954},{0,4,13534},{0,3,6890},{2,7,3454},{1,7,194},{1,5,354},{1,5,582},{3,3,9674},{0,6,4406},{0,5,971},{0,3,4954},{6,1,9674},{0,3,4954},{1,6,1214},{1,6,1214},{1,6,1214},{1,4,1206},{0,5,2210},{0,4,400},{0,4,400},{0,2,628},{0,2,4021},{0,2,1589},{1,6,58},{1,6,58},{1,6,58},{1,4,50},{2,2,2178},
+{0,4,400},{0,4,400},{0,2,628},{5,0,2178},{0,2,628},{4,4,2669},{1,7,145},{2,5,74},{0,5,130},{4,4,2669},{7,2,2669},{0,5,130},{0,4,2845},{7,2,2669},{0,4,2845},{1,0,1205},{1,0,1205},{1,0,1205},{1,0,1205},{0,3,9},{0,3,9},{0,3,9},{0,1,100},{0,1,461},{0,1,461},{1,7,6566},{1,7,1638},{1,5,1958},{1,5,1578},{1,7,10830},{0,6,4118},{0,5,443},
+{0,4,4785},{0,4,14414},{0,3,7338},{2,7,3390},{1,7,482},{2,5,371},{1,5,422},{4,2,9674},{0,6,4118},{0,5,443},{0,4,4785},{5,2,9674},{0,4,4785},{1,7,1382},{1,7,1382},{1,7,1382},{1,4,1430},{0,6,2178},{0,5,442},{0,5,442},{0,3,641},{0,3,4242},{0,2,1877},{1,7,226},{1,7,226},{1,7,226},{1,4,274},{3,1,2178},{0,5,442},{0,5,442},{0,3,641},{4,1,2178},
+{0,3,641},{5,3,2669},{1,7,257},{2,5,10},{0,5,2},{5,3,2669},{6,3,2669},{0,5,2},{0,4,2669},{6,3,2669},{0,4,2669},{1,0,1381},{1,0,1381},{1,0,1381},{1,0,1381},{0,3,25},{0,3,25},{0,3,25},{0,2,16},{0,1,685},{0,1,685},{1,7,8134},{1,7,2310},{1,6,2671},{1,5,1802},{1,7,11086},{0,7,4109},{0,5,299},{0,4,4193},{0,5,14830},{0,4,7442},{2,7,3710},
+{2,7,490},{2,5,323},{2,5,563},{5,1,9669},{0,7,4109},{0,5,299},{0,4,4193},{4,3,9669},{0,4,4193},{1,7,1734},{1,7,1734},{1,7,1734},{1,5,1721},{0,7,2228},{0,5,218},{0,5,218},{0,3,305},{0,3,4626},{0,3,2241},{2,6,125},{2,6,125},{2,6,125},{2,4,125},{4,0,2180},{0,5,218},{0,5,218},{0,3,305},{3,2,2180},{0,3,305},{6,2,2665},{2,7,369},{3,5,181},
+{1,5,117},{6,2,2665},{5,4,2665},{1,5,117},{0,4,2749},{5,4,2665},{0,4,2749},{1,0,1685},{1,0,1685},{1,0,1685},{1,0,1685},{0,4,0},{0,4,0},{0,4,0},{0,2,16},{0,2,977},{0,2,977},{2,7,9153},{1,7,3525},{1,6,3220},{1,5,2513},{1,7,11833},{0,7,3686},{0,6,548},{0,4,3986},{0,5,15577},{0,4,7235},{3,7,4141},{2,7,481},{2,6,309},{2,5,428},{6,0,9674},
+{0,7,3686},{0,6,548},{0,4,3986},{3,4,9674},{0,4,3986},{1,7,2436},{1,7,2436},{1,7,2436},{1,5,2189},{0,7,2210},{0,5,272},{0,5,272},{0,3,233},{0,3,5364},{0,3,2169},{2,7,40},{2,7,40},{2,7,40},{2,5,104},{2,4,2178},{0,5,272},{0,5,272},{0,3,233},{7,0,2178},{0,3,233},{7,1,2669},{2,7,477},{2,6,305},{0,6,292},{7,1,2669},{4,5,2669},{0,6,292},
+{0,4,3145},{4,5,2669},{0,4,3145},{1,0,2180},{1,0,2180},{1,0,2180},{1,0,2180},{0,5,16},{0,5,16},{0,5,16},{0,3,64},{0,2,1130},{0,2,1130},{2,7,10154},{1,7,4946},{1,6,4049},{1,6,2853},{1,7,12838},{0,7,3719},{0,6,133},{0,4,4211},{0,6,16286},{0,4,7346},{3,7,4061},{2,7,881},{2,6,325},{2,6,629},{4,4,9669},{0,7,3718},{0,6,132},{0,4,4210},{7,2,9669},
+{0,4,4210},{1,7,3265},{1,7,3265},{1,7,3265},{1,6,2789},{0,7,2467},{0,6,69},{0,6,69},{0,3,442},{0,4,5602},{0,3,2290},{2,7,40},{2,7,40},{2,7,40},{2,5,40},{3,3,2180},{0,6,68},{0,6,68},{0,3,441},{6,1,2180},{0,3,441},{5,5,2665},{3,7,617},{3,6,100},{0,6,68},{5,5,2665},{3,6,2665},{0,6,68},{0,5,2885},{3,6,2665},{0,5,2885},{1,0,2689},
+{1,0,2689},{1,0,2689},{1,0,2689},{0,5,17},{0,5,17},{0,5,17},{0,3,1},{0,2,1341},{0,2,1341},{2,7,10666},{2,7,5146},{2,6,4206},{1,6,2933},{2,7,13606},{0,7,4215},{0,6,181},{0,5,3914},{0,6,15454},{0,4,7122},{3,7,4365},{3,7,1097},{3,6,404},{2,6,421},{5,3,9669},{1,7,4133},{0,6,100},{0,5,3833},{6,3,9669},{0,5,3833},{2,7,3777},{2,7,3777},{2,7,3777},
+{1,6,2933},{1,7,2805},{0,6,181},{0,6,181},{0,4,275},{0,4,5282},{0,3,2130},{2,7,296},{2,7,296},{2,7,296},{2,5,232},{4,2,2180},{0,6,100},{0,6,100},{0,4,194},{5,2,2180},{0,4,194},{6,4,2665},{3,7,697},{3,6,4},{1,6,4},{6,4,2665},{7,4,2665},{1,6,4},{0,5,2677},{7,4,2665},{0,5,2677},{1,0,2929},{1,0,2929},{1,0,2929},{1,0,2929},{0,6,81},
+{0,6,81},{0,6,81},{0,3,145},{0,3,1289},{0,3,1289},{2,7,11690},{2,7,5434},{2,7,4085},{2,6,3198},{2,7,13510},{1,7,4470},{1,6,585},{0,5,3690},{0,6,15134},{0,5,6210},{3,7,5053},{3,7,1177},{3,6,308},{2,6,597},{6,2,9670},{1,7,4181},{1,6,296},{0,5,3401},{5,4,9670},{0,5,3401},{2,7,3409},{2,7,3409},{2,7,3409},{2,6,3134},{1,7,2565},{0,7,323},{0,7,323},
+{0,4,323},{0,5,5085},{0,4,1634},{3,7,153},{3,7,153},{3,7,153},{3,5,145},{5,1,2186},{0,7,34},{0,7,34},{0,4,34},{7,1,2186},{0,4,34},{7,3,2665},{4,7,937},{3,6,164},{1,6,100},{7,3,2665},{6,5,2665},{1,6,100},{0,5,2725},{6,5,2665},{0,5,2725},{2,0,3085},{2,0,3085},{2,0,3085},{2,0,3085},{1,5,290},{1,5,290},{1,5,290},{1,3,298},{0,3,985},
+{0,3,985},{3,7,12062},{2,7,6370},{2,7,3743},{2,6,3018},{2,7,14014},{1,7,4758},{1,6,603},{1,5,4049},{0,7,14638},{0,5,5310},{4,7,4958},{3,7,1726},{3,7,334},{3,6,419},{7,1,9669},{1,7,4694},{0,7,341},{0,5,3374},{4,5,9669},{0,5,3374},{2,7,3454},{2,7,3454},{2,7,3454},{2,6,2729},{1,7,2754},{1,6,314},{1,6,314},{1,4,289},{0,5,4626},{0,4,1121},{3,7,45},
+{3,7,45},{3,7,45},{3,5,109},{6,0,2180},{0,7,52},{0,7,52},{0,4,160},{3,4,2180},{0,4,160},{5,7,2665},{4,7,1009},{4,6,293},{2,6,293},{5,7,2665},{5,6,2665},{2,6,293},{0,5,3085},{5,6,2665},{0,5,3085},{2,0,2725},{2,0,2725},{2,0,2725},{2,0,2725},{1,6,89},{1,6,89},{1,6,89},{1,4,145},{0,3,949},{0,3,949},{3,7,12414},{3,7,7246},{2,7,3983},
+{2,7,2879},{2,7,15006},{1,7,5558},{1,7,163},{1,5,4161},{0,7,14190},{0,5,5054},{4,7,5246},{4,7,2186},{3,7,302},{3,6,659},{5,5,9670},{2,7,4926},{1,7,163},{0,5,3758},{3,6,9670},{0,5,3758},{2,7,3902},{2,7,3902},{2,7,3902},{2,6,2777},{1,7,3330},{1,7,82},{1,7,82},{1,4,401},{0,6,4490},{0,4,1073},{3,7,221},{3,7,221},{3,7,221},{3,6,34},{5,2,2186},
+{1,7,82},{1,7,82},{0,5,277},{7,2,2186},{0,5,277},{6,6,2665},{5,7,1313},{4,7,130},{1,7,82},{6,6,2665},{4,7,2665},{1,7,82},{0,6,2929},{4,7,2665},{0,6,2929},{2,0,2677},{2,0,2677},{2,0,2677},{2,0,2677},{1,6,9},{1,6,9},{1,6,9},{1,4,1},{0,4,673},{0,4,673},{3,7,13278},{3,7,7502},{3,7,4254},{2,7,2895},{3,7,15045},{2,7,6114},{1,7,147},
+{1,6,3978},{0,7,14254},{0,6,4818},{4,7,5918},{4,7,2378},{4,7,443},{3,7,426},{6,4,9670},{2,7,5438},{1,7,83},{0,6,3218},{7,4,9670},{0,6,3218},{3,7,4253},{3,7,4253},{3,7,4253},{2,7,2894},{2,7,3054},{1,7,146},{1,7,146},{1,5,296},{0,6,3978},{0,5,821},{4,7,442},{4,7,442},{4,7,442},{3,6,194},{6,1,2186},{1,7,82},{1,7,82},{0,5,37},{6,3,2186},
+{0,5,37},{7,5,2665},{5,7,1361},{4,7,2},{2,7,10},{7,5,2665},{6,6,2689},{2,7,10},{0,6,2689},{6,6,2689},{0,6,2689},{2,0,2885},{2,0,2885},{2,0,2885},{2,0,2885},{1,7,65},{1,7,65},{1,7,65},{1,4,113},{0,4,625},{0,4,625},{3,7,13261},{3,7,7409},{3,7,4045},{3,7,3209},{3,7,14116},{2,7,5615},{2,7,574},{1,6,3165},{0,7,13437},{0,6,3429},{5,7,5269},
+{4,7,2275},{4,7,250},{3,7,505},{5,6,8712},{3,7,4724},{2,7,250},{0,6,2405},{4,6,8712},{0,6,2405},{3,7,4045},{3,7,4045},{3,7,4045},{3,7,3209},{2,7,3150},{2,7,574},{2,7,574},{1,5,296},{0,6,3850},{0,5,453},{4,7,250},{4,7,250},{4,7,250},{4,6,169},{7,0,2180},{2,7,250},{2,7,250},{1,5,40},{3,5,2180},{1,5,40},{6,7,2180},{5,7,1168},{4,7,81},
+{2,7,25},{6,7,2180},{5,7,2180},{2,7,25},{0,6,2180},{5,7,2180},{0,6,2180},{3,0,3145},{3,0,3145},{3,0,3145},{3,0,3145},{1,7,305},{1,7,305},{1,7,305},{1,5,292},{0,5,449},{0,5,449},{4,7,11894},{3,7,7634},{3,7,4270},{3,7,2750},{3,7,13315},{2,7,5354},{2,7,313},{1,6,2634},{1,7,12414},{0,6,2250},{5,7,4369},{4,7,2365},{4,7,340},{4,7,160},{7,3,7590},
+{3,7,4094},{2,7,232},{0,6,1721},{6,5,7590},{0,6,1721},{3,7,4270},{3,7,4270},{3,7,4270},{3,7,2750},{2,7,3717},{2,7,313},{2,7,313},{2,5,302},{0,7,3546},{0,5,498},{4,7,340},{4,7,340},{4,7,340},{4,6,97},{7,1,2186},{2,7,232},{2,7,232},{1,5,130},{7,3,2186},{1,5,130},{6,7,1649},{6,7,1025},{5,7,9},{3,7,1},{6,7,1649},{7,6,1625},{3,7,1},
+{0,6,1685},{7,6,1625},{0,6,1685},{3,0,2749},{3,0,2749},{3,0,2749},{3,0,2749},{2,7,117},{2,7,117},{2,7,117},{2,5,181},{0,5,377},{0,5,377},{4,7,11078},{4,7,7478},{3,7,4878},{3,7,2750},{4,7,12662},{3,7,5031},{2,7,489},{2,6,1911},{1,7,11470},{0,6,1610},{5,7,3841},{5,7,1949},{4,7,692},{4,7,32},{5,7,6662},{4,7,3641},{3,7,442},{0,6,1385},{5,6,6662},
+{0,6,1385},{3,7,4878},{3,7,4878},{3,7,4878},{3,7,2750},{3,7,3795},{2,7,489},{2,7,489},{2,5,366},{0,7,3354},{0,6,454},{4,7,692},{4,7,692},{4,7,692},{4,7,32},{6,3,2180},{3,7,442},{3,7,442},{0,6,229},{6,4,2180},{0,6,229},{7,6,1201},{6,7,689},{5,7,25},{4,7,16},{7,6,1201},{7,6,1225},{4,7,16},{0,6,1381},{7,6,1225},{0,6,1381},{3,0,2669},
+{3,0,2669},{3,0,2669},{3,0,2669},{2,7,5},{2,7,5},{2,7,5},{2,5,5},{0,6,229},{0,6,229},{4,7,10646},{4,7,7046},{4,7,5021},{3,7,3134},{4,7,11526},{3,7,4503},{2,7,1049},{2,6,1479},{1,7,10910},{0,6,1354},{5,7,3569},{5,7,1677},{5,7,653},{4,7,160},{7,4,5829},{4,7,3065},{3,7,410},{1,6,1209},{7,5,5829},{1,6,1209},{4,7,5021},{4,7,5021},{4,7,5021},
+{3,7,3134},{3,7,3875},{2,7,1049},{2,7,1049},{2,6,323},{0,7,3546},{0,6,198},{5,7,653},{5,7,653},{5,7,653},{4,7,160},{7,2,2180},{3,7,410},{3,7,410},{1,6,53},{5,5,2180},{1,6,53},{7,6,865},{6,7,481},{6,7,81},{4,7,16},{7,6,865},{6,7,841},{4,7,16},{0,6,1205},{6,7,841},{0,6,1205},{3,0,2845},{3,0,2845},{3,0,2845},{3,0,2845},{2,7,149},
+{2,7,149},{2,7,149},{2,5,85},{0,6,149},{0,6,149},{4,7,10598},{4,7,6998},{4,7,4973},{4,7,3353},{4,7,10774},{3,7,4359},{3,7,995},{2,6,1431},{2,7,10294},{0,7,1242},{6,7,3329},{5,7,1661},{5,7,637},{5,7,197},{6,6,5085},{4,7,2745},{3,7,634},{0,7,1098},{4,7,5085},{0,7,1098},{4,7,4973},{4,7,4973},{4,7,4973},{4,7,3353},{3,7,4339},{3,7,995},{3,7,995},
+{2,6,275},{1,7,3845},{0,6,326},{5,7,637},{5,7,637},{5,7,637},{5,7,197},{5,6,2178},{3,7,634},{3,7,634},{1,6,37},{4,6,2178},{1,6,37},{7,7,605},{6,7,401},{6,7,1},{5,7,1},{7,7,605},{6,7,569},{5,7,1},{0,7,1089},{6,7,569},{0,7,1089},{4,0,3209},{4,0,3209},{4,0,3209},{4,0,3209},{3,7,370},{3,7,370},{3,7,370},{2,6,274},{0,7,153},
+{0,7,153},{5,7,9925},{4,7,7403},{4,7,5378},{4,7,3218},{4,7,10387},{4,7,4627},{3,7,1292},{3,6,1477},{2,7,9727},{0,7,621},{6,7,2609},{5,7,1949},{5,7,925},{5,7,89},{7,5,4347},{5,7,2384},{4,7,666},{0,7,612},{6,6,4371},{0,7,612},{4,7,5378},{4,7,5378},{4,7,5378},{4,7,3218},{4,7,4762},{3,7,1292},{3,7,1292},{3,6,321},{1,7,4106},{1,6,469},{5,7,925},
+{5,7,925},{5,7,925},{5,7,89},{6,5,2180},{4,7,666},{4,7,666},{2,6,104},{3,7,2180},{2,6,104},{7,7,290},{7,7,218},{6,7,64},{6,7,16},{7,7,290},{7,7,338},{6,7,16},{0,7,576},{7,7,338},{0,7,576},{4,0,2777},{4,0,2777},{4,0,2777},{4,0,2777},{3,7,136},{3,7,136},{3,7,136},{3,5,200},{0,7,45},{0,7,45},{5,7,9269},{5,7,7377},{4,7,6146},
+{4,7,3506},{5,7,10044},{4,7,4211},{3,7,1964},{3,7,836},{3,7,9185},{0,7,477},{6,7,2241},{6,7,1617},{6,7,1217},{5,7,265},{6,7,3780},{5,7,2064},{4,7,890},{1,7,257},{5,7,3780},{1,7,257},{4,7,6146},{4,7,6146},{4,7,6146},{4,7,3506},{4,7,4826},{3,7,1964},{3,7,1964},{3,6,337},{2,7,4590},{0,7,477},{6,7,1217},{6,7,1217},{6,7,1217},{5,7,265},{7,4,2178},
+{4,7,890},{4,7,890},{1,7,257},{7,5,2178},{1,7,257},{7,7,146},{7,7,74},{7,7,25},{6,7,16},{7,7,146},{7,7,130},{6,7,16},{0,7,256},{7,7,130},{0,7,256},{4,0,2665},{4,0,2665},{4,0,2665},{4,0,2665},{3,7,200},{3,7,200},{3,7,200},{3,6,13},{0,7,221},{0,7,221},{5,7,8997},{5,7,7105},{5,7,6081},{4,7,4178},{5,7,9276},{4,7,4179},{4,7,2154},
+{3,7,356},{3,7,8721},{1,7,209},{6,7,2129},{6,7,1505},{6,7,1105},{6,7,673},{7,6,3332},{5,7,2000},{5,7,976},{2,7,73},{7,6,3300},{2,7,73},{5,7,6081},{5,7,6081},{5,7,6081},{4,7,4178},{4,7,5274},{4,7,2154},{4,7,2154},{3,7,356},{2,7,4878},{1,7,209},{6,7,1105},{6,7,1105},{6,7,1105},{6,7,673},{7,5,2210},{5,7,976},{5,7,976},{2,7,73},{6,6,2178},
+{2,7,73},{7,7,130},{7,7,58},{7,7,9},{7,7,1},{7,7,130},{7,7,50},{7,7,1},{0,7,64},{7,7,50},{0,7,64},{4,0,2809},{4,0,2809},{4,0,2809},{4,0,2809},{3,7,520},{3,7,520},{3,7,520},{3,6,61},{1,7,145},{1,7,145},{5,7,9109},{5,7,7217},{5,7,6193},{5,7,4301},{5,7,8892},{4,7,4531},{4,7,2506},{3,7,260},{4,7,8587},{1,7,289},{6,7,2273},
+{6,7,1649},{6,7,1249},{6,7,625},{7,6,2916},{6,7,1956},{5,7,1168},{2,7,25},{6,7,2916},{2,7,25},{5,7,6193},{5,7,6193},{5,7,6193},{5,7,4301},{5,7,5976},{4,7,2506},{4,7,2506},{3,7,260},{3,7,5277},{1,7,289},{6,7,1249},{6,7,1249},{6,7,1249},{6,7,625},{6,7,2180},{5,7,1168},{5,7,1168},{2,7,25},{5,7,2180},{2,7,25},{7,7,242},{7,7,170},{7,7,121},
+{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{4,0,3209},{4,0,3209},{4,0,3209},{4,0,3209},{4,7,481},{4,7,481},{4,7,481},{3,7,260},{1,7,289},{1,7,289},{6,7,8325},{5,7,6749},{5,7,5725},{5,7,3833},{5,7,7866},{5,7,4082},{4,7,2470},{4,7,265},{4,7,7219},{2,7,365},{7,7,1842},{6,7,1460},{6,7,1060},{6,7,436},{7,6,2241},
+{6,7,1425},{6,7,1025},{3,7,1},{6,7,2169},{3,7,1},{5,7,5725},{5,7,5725},{5,7,5725},{5,7,3833},{5,7,4950},{4,7,2470},{4,7,2470},{4,7,265},{3,7,4521},{2,7,365},{6,7,1060},{6,7,1060},{6,7,1060},{6,7,436},{6,7,1649},{6,7,1025},{6,7,1025},{3,7,1},{7,6,1625},{3,7,1},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},
+{0,7,0},{7,7,98},{0,7,0},{5,0,2809},{5,0,2809},{5,0,2809},{5,0,2809},{4,7,445},{4,7,445},{4,7,445},{4,6,202},{2,7,365},{2,7,365},{6,7,7093},{6,7,6469},{5,7,5581},{5,7,3689},{5,7,7226},{5,7,3442},{5,7,2418},{4,7,25},{4,7,6275},{2,7,509},{7,7,1266},{7,7,1194},{6,7,1028},{6,7,404},{7,7,1686},{6,7,1089},{6,7,689},{4,7,16},{6,7,1641},
+{4,7,16},{5,7,5581},{5,7,5581},{5,7,5581},{5,7,3689},{5,7,4310},{5,7,2418},{5,7,2418},{4,7,25},{3,7,4121},{2,7,509},{6,7,1028},{6,7,1028},{6,7,1028},{6,7,404},{7,6,1201},{6,7,689},{6,7,689},{4,7,16},{7,6,1225},{4,7,16},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{5,0,2665},
+{5,0,2665},{5,0,2665},{5,0,2665},{4,7,685},{4,7,685},{4,7,685},{4,7,25},{2,7,509},{2,7,509},{6,7,6117},{6,7,5493},{6,7,5093},{5,7,3801},{6,7,6098},{5,7,3058},{5,7,2034},{4,7,41},{4,7,5587},{3,7,613},{7,7,818},{7,7,746},{7,7,697},{6,7,500},{7,7,1142},{6,7,881},{6,7,481},{4,7,16},{6,7,1241},{4,7,16},{6,7,5093},{6,7,5093},{6,7,5093},
+{5,7,3801},{5,7,3926},{5,7,2034},{5,7,2034},{4,7,41},{4,7,3562},{3,7,613},{7,7,697},{7,7,697},{7,7,697},{6,7,500},{7,6,865},{6,7,481},{6,7,481},{4,7,16},{6,7,841},{4,7,16},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{5,0,2777},{5,0,2777},{5,0,2777},{5,0,2777},{5,7,1010},
+{5,7,1010},{5,7,1010},{4,7,41},{3,7,613},{3,7,613},{6,7,5397},{6,7,4773},{6,7,4373},{6,7,3749},{6,7,5074},{5,7,2930},{5,7,1906},{4,7,313},{5,7,4753},{3,7,725},{7,7,498},{7,7,426},{7,7,377},{7,7,305},{7,7,726},{7,7,582},{6,7,401},{5,7,1},{7,7,774},{5,7,1},{6,7,4373},{6,7,4373},{6,7,4373},{6,7,3749},{5,7,3798},{5,7,1906},{5,7,1906},
+{4,7,313},{4,7,3130},{3,7,725},{7,7,377},{7,7,377},{7,7,377},{7,7,305},{7,7,605},{6,7,401},{6,7,401},{5,7,1},{6,7,569},{5,7,1},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{5,0,3145},{5,0,3145},{5,0,3145},{5,0,3145},{5,7,882},{5,7,882},{5,7,882},{4,7,313},{3,7,725},
+{3,7,725},{1,7,34142},{0,7,5184},{0,5,609},{0,5,4841},{1,7,46442},{0,7,24449},{0,5,9741},{0,4,24761},{0,5,65162},{0,3,40820},{0,7,10048},{0,7,2880},{0,5,545},{0,4,3204},{1,4,18070},{0,4,13297},{0,4,6453},{0,2,13857},{2,2,18070},{0,2,13857},{0,3,9},{0,3,9},{0,3,9},{0,2,36},{0,1,1145},{0,1,617},{0,1,617},{0,1,977},{0,0,1505},{0,0,1036},{0,3,9},
+{0,3,9},{0,3,9},{0,2,36},{1,0,1129},{0,1,617},{0,1,617},{0,1,977},{1,0,1105},{0,1,977},{4,2,9250},{0,7,2880},{0,5,545},{0,4,3204},{4,2,9250},{5,2,9250},{0,4,3204},{0,3,9280},{5,2,9250},{0,3,9280},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{1,7,38782},{0,7,6720},{0,5,305},
+{0,5,3801},{1,7,51082},{0,7,24961},{0,5,8701},{0,4,25849},{0,5,65535},{0,4,42094},{1,7,10502},{0,7,2624},{0,5,241},{0,4,3044},{3,1,19334},{0,4,14065},{0,4,6293},{0,3,14756},{4,1,19334},{0,3,14756},{0,4,4},{0,4,4},{0,4,4},{0,2,4},{0,2,1537},{0,1,937},{0,1,937},{0,1,977},{0,1,1898},{0,1,1338},{0,4,4},{0,4,4},{0,4,4},{0,2,4},{1,0,1513},
+{0,1,937},{0,1,937},{0,1,977},{0,1,1537},{0,1,977},{5,1,9256},{0,7,2624},{0,5,241},{0,4,3044},{5,1,9256},{7,1,9256},{0,4,3044},{0,3,9280},{7,1,9256},{0,3,9280},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{1,7,44190},{0,7,9024},{0,5,769},{0,5,3529},{1,7,56490},{0,7,26241},{0,5,8429},
+{0,4,27705},{0,5,65535},{0,4,43950},{1,7,10854},{0,7,2624},{0,5,193},{0,5,2953},{2,3,20689},{0,5,14598},{0,4,6389},{0,3,15012},{6,0,20689},{0,3,15012},{0,4,36},{0,4,36},{0,4,36},{0,2,100},{0,2,1985},{0,2,1061},{0,2,1061},{0,1,1105},{0,1,2346},{0,1,1466},{0,4,36},{0,4,36},{0,4,36},{0,2,100},{0,2,1985},{0,2,1061},{0,2,1061},{0,1,1105},{0,1,1985},
+{0,1,1105},{6,0,9256},{0,7,2624},{0,5,193},{0,5,2953},{6,0,9256},{6,2,9256},{0,5,2953},{0,3,9536},{6,2,9256},{0,3,9536},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{1,7,50366},{0,7,12096},{0,6,1101},{0,5,4025},{1,7,62666},{0,7,28289},{0,5,8925},{0,5,28912},{0,6,65535},{0,4,46574},{1,7,11462},
+{0,7,2880},{0,6,317},{0,5,2425},{4,0,22137},{0,5,15206},{0,4,6741},{0,3,15524},{3,2,22137},{0,3,15524},{0,5,1},{0,5,1},{0,5,1},{0,3,9},{0,2,2561},{0,2,1285},{0,2,1285},{0,1,1361},{0,1,2922},{0,1,1722},{0,5,1},{0,5,1},{0,5,1},{0,3,9},{1,1,2561},{0,2,1285},{0,2,1285},{0,1,1361},{2,0,2521},{0,1,1361},{5,2,9250},{0,7,2880},{0,6,317},
+{0,5,2425},{5,2,9250},{5,3,9250},{0,5,2425},{0,4,9640},{5,3,9250},{0,4,9640},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{1,7,58232},{0,7,16470},{0,6,129},{0,6,3489},{1,7,65535},{0,7,31511},{0,6,10213},{0,5,29110},{0,6,65535},{0,4,50444},{1,7,12452},{0,7,3474},{0,6,29},{0,5,2137},{2,4,23851},
+{0,6,16172},{0,5,7037},{0,3,16406},{7,0,23851},{0,3,16406},{0,6,4},{0,6,4},{0,6,4},{0,3,36},{0,3,3232},{0,2,1690},{0,2,1690},{0,1,1802},{0,1,3723},{0,1,2163},{0,6,4},{0,6,4},{0,6,4},{0,3,36},{1,1,3200},{0,2,1690},{0,2,1690},{0,1,1802},{2,0,3232},{0,1,1802},{6,1,9256},{1,7,3232},{0,6,29},{0,5,2137},{6,1,9256},{6,3,9256},{0,5,2137},
+{0,4,9298},{6,3,9256},{0,4,9298},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{1,7,65535},{0,7,21174},{0,6,81},{0,6,2545},{1,7,65535},{0,7,35191},{0,6,9269},{0,5,30102},{0,6,65535},{0,5,54602},{1,7,13604},{0,7,4274},{0,6,45},{0,5,2153},{4,1,25472},{0,6,16620},{0,5,7053},{0,3,17462},{4,2,25472},
+{0,3,17462},{0,6,36},{0,6,36},{0,6,36},{0,4,49},{0,3,3872},{0,3,2132},{0,3,2132},{0,1,2330},{0,1,4571},{0,1,2691},{0,6,36},{0,6,36},{0,6,36},{0,4,49},{0,3,3872},{0,3,2132},{0,3,2132},{0,1,2330},{1,1,3872},{0,1,2330},{7,0,9250},{1,7,3488},{0,6,45},{0,5,2153},{7,0,9250},{3,5,9250},{0,5,2153},{0,4,9266},{3,5,9250},{0,4,9266},{0,0,0},
+{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{2,7,65535},{1,7,25204},{0,6,805},{0,6,2373},{1,7,65535},{0,7,39263},{0,6,8769},{0,5,31402},{0,6,65535},{0,5,55902},{1,7,14880},{1,7,5024},{0,6,321},{0,6,1889},{5,0,26756},{0,6,17000},{0,5,7049},{0,4,18139},{6,1,26756},{0,4,18139},{0,7,5},{0,7,5},{0,7,5},
+{0,4,5},{0,3,4468},{0,3,2248},{0,3,2248},{0,2,2722},{0,2,5411},{0,1,3227},{0,7,5},{0,7,5},{0,7,5},{0,4,5},{2,0,4420},{0,3,2248},{0,3,2248},{0,2,2722},{3,0,4420},{0,2,2722},{5,4,9250},{1,7,4000},{1,6,185},{0,6,1885},{5,4,9250},{7,3,9250},{0,6,1885},{0,4,9490},{7,3,9250},{0,4,9490},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,16},
+{0,0,16},{0,0,16},{0,0,64},{0,0,100},{0,0,100},{2,7,65535},{1,7,29620},{0,7,1306},{0,6,3061},{2,7,65535},{0,7,43055},{0,6,8145},{0,6,31878},{0,7,65535},{0,5,56670},{2,7,14924},{1,7,5344},{0,7,406},{0,6,1617},{4,2,26756},{0,6,16744},{0,5,6553},{0,4,17195},{5,2,26756},{0,4,17195},{0,7,181},{0,7,181},{0,7,181},{0,4,181},{0,4,4418},{0,3,2056},{0,3,2056},
+{0,2,2210},{0,2,5411},{0,2,3171},{0,7,181},{0,7,181},{0,7,181},{0,4,181},{0,4,4418},{0,3,2056},{0,3,2056},{0,2,2210},{2,1,4418},{0,2,2210},{6,3,9248},{2,7,4292},{0,7,306},{0,6,1517},{6,3,9248},{6,4,9248},{0,6,1517},{0,5,9698},{6,4,9248},{0,5,9698},{0,0,100},{0,0,100},{0,0,100},{0,0,100},{0,0,16},{0,0,16},{0,0,16},{0,0,0},{0,0,36},
+{0,0,36},{2,7,65535},{1,7,35659},{0,7,865},{0,7,2657},{2,7,65535},{1,7,48315},{0,7,8072},{0,6,29745},{0,7,65535},{0,5,58605},{2,7,15347},{1,7,6163},{1,7,270},{0,6,1770},{5,1,26756},{0,7,15992},{0,6,5378},{0,4,16592},{6,2,26756},{0,4,16592},{0,7,685},{0,7,685},{0,7,685},{0,5,370},{0,5,4450},{0,4,1768},{0,4,1768},{0,2,1940},{0,2,5717},{0,2,2901},{1,7,234},
+{1,7,234},{1,7,234},{1,4,250},{1,3,4420},{0,4,1768},{0,4,1768},{0,2,1940},{1,2,4420},{0,2,1940},{7,2,9250},{3,7,4820},{1,7,45},{0,6,1409},{7,2,9250},{5,5,9250},{0,6,1409},{0,5,9320},{5,5,9250},{0,5,9320},{0,0,361},{0,0,361},{0,0,361},{0,0,361},{0,1,0},{0,1,0},{0,1,0},{0,0,81},{0,0,117},{0,0,117},{2,7,65535},{1,7,40299},{0,7,1405},
+{0,7,2173},{2,7,65535},{1,7,50747},{0,7,6068},{0,6,28101},{0,7,65535},{0,5,59897},{2,7,15659},{1,7,6963},{1,7,74},{1,6,2046},{6,0,26264},{0,7,15284},{0,6,4470},{0,4,16052},{3,4,26264},{0,4,16052},{1,7,1230},{1,7,1230},{1,7,1230},{0,5,754},{0,5,4450},{0,4,1576},{0,4,1576},{0,2,1972},{0,2,6261},{0,2,2933},{1,7,74},{1,7,74},{1,7,74},{1,5,113},{2,2,4418},
+{0,4,1576},{0,4,1576},{0,2,1972},{5,0,4418},{0,2,1972},{5,6,8980},{3,7,4808},{1,7,25},{0,7,1444},{5,6,8980},{4,6,8980},{0,7,1444},{0,5,8980},{4,6,8980},{0,5,8980},{0,0,729},{0,0,729},{0,0,729},{0,0,729},{0,2,16},{0,2,16},{0,2,16},{0,1,16},{0,0,325},{0,0,325},{2,7,65535},{1,7,40395},{0,7,2381},{0,7,2125},{2,7,65535},{1,7,48635},{0,7,4500},
+{0,6,24853},{0,7,65535},{0,5,57545},{3,7,14605},{2,7,6211},{1,7,170},{1,6,1598},{5,2,24379},{0,7,13716},{0,6,3446},{0,5,14549},{5,3,24379},{0,5,14549},{1,7,1326},{1,7,1326},{1,7,1326},{1,5,1157},{0,6,4418},{0,5,1394},{0,5,1394},{0,3,1621},{0,3,6482},{0,2,3221},{1,7,170},{1,7,170},{1,7,170},{1,5,1},{3,1,4418},{0,5,1394},{0,5,1394},{0,3,1621},{4,1,4418},
+{0,3,1621},{7,3,7940},{3,7,4264},{2,7,81},{0,7,900},{7,3,7940},{6,5,7940},{0,7,900},{0,5,7988},{6,5,7940},{0,5,7988},{1,0,1157},{1,0,1157},{1,0,1157},{1,0,1157},{0,2,16},{0,2,16},{0,2,16},{0,1,16},{0,1,377},{0,1,377},{2,7,65535},{1,7,40747},{0,7,3613},{0,7,2333},{2,7,65535},{1,7,46779},{0,7,3188},{0,6,21861},{0,7,65535},{0,5,55449},{3,7,13181},
+{2,7,5667},{1,7,522},{1,7,1306},{6,1,22571},{0,7,12404},{0,6,2678},{0,5,12453},{4,4,22571},{0,5,12453},{1,7,1678},{1,7,1678},{1,7,1678},{1,5,1301},{0,7,4468},{0,5,1170},{0,5,1170},{0,3,1285},{0,3,6866},{0,3,3221},{1,7,522},{1,7,522},{1,7,522},{1,5,145},{4,0,4420},{0,5,1170},{0,5,1170},{0,3,1285},{3,2,4420},{0,3,1285},{6,5,6964},{3,7,3848},{2,7,1},
+{0,7,484},{6,5,6964},{3,7,6964},{0,7,484},{0,5,7124},{3,7,6964},{0,5,7124},{1,0,1237},{1,0,1237},{1,0,1237},{1,0,1237},{0,3,1},{0,3,1},{0,3,1},{0,2,100},{0,1,505},{0,1,505},{2,7,65535},{1,7,41449},{0,7,5305},{0,7,2873},{2,7,65535},{1,7,44997},{0,7,2018},{0,6,18801},{0,7,65535},{0,6,52421},{3,7,11885},{2,7,5361},{2,7,320},{1,7,1000},{5,3,20645},
+{0,7,11234},{0,7,2018},{0,5,10401},{6,3,20645},{0,5,10401},{1,7,2380},{1,7,2380},{1,7,2380},{1,6,1496},{0,7,4450},{0,6,964},{0,6,964},{0,3,1213},{0,3,7604},{0,3,3149},{2,7,320},{2,7,320},{2,7,320},{2,5,272},{2,4,4418},{0,6,964},{0,6,964},{0,3,1213},{7,0,4418},{0,3,1213},{5,7,5941},{4,7,3181},{2,7,64},{0,7,169},{5,7,5941},{5,6,5941},{0,7,169},
+{0,5,6305},{5,6,5941},{0,5,6305},{1,0,1480},{1,0,1480},{1,0,1480},{1,0,1480},{0,4,25},{0,4,25},{0,4,25},{0,2,1},{0,1,802},{0,1,802},{2,7,65535},{1,7,42345},{0,7,7081},{0,7,3625},{2,7,65535},{1,7,43685},{0,7,1250},{0,6,16353},{0,7,65535},{0,6,49973},{3,7,11005},{3,7,5153},{2,7,320},{2,7,964},{7,0,19026},{1,7,10349},{0,7,1250},{0,5,8849},{3,5,19026},
+{0,5,8849},{1,7,3276},{1,7,3276},{1,7,3276},{1,6,1848},{0,7,4706},{0,6,740},{0,6,740},{0,4,1226},{0,4,7955},{0,3,3357},{2,7,320},{2,7,320},{2,7,320},{2,6,145},{3,3,4420},{0,6,740},{0,6,740},{0,4,1226},{6,1,4420},{0,4,1226},{7,4,5105},{4,7,2701},{3,7,25},{0,7,25},{7,4,5105},{7,5,5105},{0,7,25},{0,6,5645},{7,5,5105},{0,6,5645},{1,0,1832},
+{1,0,1832},{1,0,1832},{1,0,1832},{0,4,9},{0,4,9},{0,4,9},{0,2,49},{0,2,1010},{0,2,1010},{2,7,65535},{1,7,43497},{1,7,9052},{0,7,4633},{2,7,65535},{1,7,42629},{0,7,738},{0,6,14161},{0,7,65535},{0,6,47781},{3,7,10381},{3,7,4529},{2,7,576},{2,7,484},{5,4,17490},{1,7,9293},{0,7,738},{0,5,7553},{7,3,17490},{0,5,7553},{1,7,4428},{1,7,4428},{1,7,4428},
+{1,7,2412},{0,7,5218},{0,7,738},{0,7,738},{0,4,810},{0,4,8467},{0,3,3821},{2,7,576},{2,7,576},{2,7,576},{2,6,1},{4,2,4420},{0,7,738},{0,7,738},{0,4,810},{5,2,4420},{0,4,810},{6,6,4329},{4,7,2349},{3,7,9},{0,7,9},{6,6,4329},{4,7,4329},{0,7,9},{0,6,4637},{4,7,4329},{0,6,4637},{1,0,2312},{1,0,2312},{1,0,2312},{1,0,2312},{0,5,4},
+{0,5,4},{0,5,4},{0,3,36},{0,2,1186},{0,2,1186},{2,7,65535},{1,7,44905},{1,7,10460},{0,7,5897},{2,7,65535},{1,7,41829},{0,7,482},{0,6,12225},{0,7,65535},{0,6,45845},{4,7,9325},{3,7,4161},{3,7,797},{2,7,260},{7,1,16034},{1,7,8493},{0,7,482},{0,5,6513},{4,5,16034},{0,5,6513},{2,7,5712},{2,7,5712},{2,7,5712},{1,7,2924},{1,7,5672},{0,7,482},{0,7,482},
+{0,4,650},{0,4,9235},{0,4,3899},{3,7,797},{3,7,797},{3,7,797},{2,6,113},{5,1,4426},{0,7,482},{0,7,482},{0,4,650},{7,1,4426},{0,4,650},{6,6,3625},{5,7,1985},{4,7,100},{1,7,4},{6,6,3625},{6,6,3617},{1,7,4},{0,6,3757},{6,6,3617},{0,6,3757},{1,0,2920},{1,0,2920},{1,0,2920},{1,0,2920},{0,5,36},{0,5,36},{0,5,36},{0,3,4},{0,2,1490},
+{0,2,1490},{2,7,65535},{1,7,46795},{1,7,12350},{0,7,7625},{2,7,65535},{1,7,41235},{0,7,500},{0,6,10353},{0,7,65535},{0,6,43973},{4,7,8227},{3,7,4053},{3,7,689},{2,7,314},{6,3,14507},{2,7,7875},{0,7,500},{0,5,5649},{6,4,14507},{0,5,5649},{2,7,6594},{2,7,6594},{2,7,6594},{1,7,3806},{1,7,6086},{0,7,500},{0,7,500},{0,4,776},{0,5,9830},{0,4,4025},{3,7,689},
+{3,7,689},{3,7,689},{3,6,298},{6,0,4420},{0,7,500},{0,7,500},{0,4,776},{3,4,4420},{0,4,776},{7,5,2890},{5,7,1508},{4,7,1},{2,7,25},{7,5,2890},{6,6,2906},{2,7,25},{0,6,2920},{6,6,2906},{0,6,2920},{1,0,3757},{1,0,3757},{1,0,3757},{1,0,3757},{0,6,9},{0,6,9},{0,6,9},{0,4,100},{0,2,1985},{0,2,1985},{2,7,65535},{1,7,48747},{1,7,14302},
+{0,7,9433},{2,7,65535},{1,7,40979},{0,7,788},{0,6,8961},{0,7,65535},{0,6,42581},{4,7,7523},{4,7,3923},{3,7,865},{3,7,181},{7,2,13243},{2,7,7075},{1,7,754},{0,6,4337},{5,5,13243},{0,6,4337},{2,7,7650},{2,7,7650},{2,7,7650},{1,7,4862},{1,7,6726},{0,7,788},{0,7,788},{0,5,529},{0,5,10470},{0,4,4409},{3,7,865},{3,7,865},{3,7,865},{3,7,181},{5,2,4426},
+{1,7,754},{1,7,754},{0,5,529},{7,2,4426},{0,5,529},{6,7,2320},{5,7,1220},{4,7,49},{2,7,9},{6,7,2320},{5,7,2320},{2,7,9},{0,6,2312},{5,7,2320},{0,6,2312},{1,0,4637},{1,0,4637},{1,0,4637},{1,0,4637},{0,7,4},{0,7,4},{0,7,4},{0,4,4},{0,3,2297},{0,3,2297},{2,7,65535},{1,7,50955},{1,7,16510},{1,7,10798},{2,7,65535},{1,7,40979},{0,7,1332},
+{0,7,6964},{0,7,65535},{0,6,41445},{4,7,7075},{4,7,3475},{3,7,1297},{3,7,5},{6,4,12051},{2,7,6531},{1,7,754},{0,6,3201},{7,4,12051},{0,6,3201},{2,7,8962},{2,7,8962},{2,7,8962},{2,7,5834},{1,7,7622},{0,7,1332},{0,7,1332},{0,5,289},{0,6,11342},{0,4,5049},{3,7,1297},{3,7,1297},{3,7,1297},{3,7,5},{6,1,4426},{1,7,754},{1,7,754},{0,5,289},{6,3,4426},
+{0,5,289},{6,7,1808},{5,7,1060},{5,7,36},{3,7,4},{6,7,1808},{7,6,1808},{3,7,4},{0,6,1832},{7,6,1808},{0,6,1832},{1,0,5645},{1,0,5645},{1,0,5645},{1,0,5645},{0,7,36},{0,7,36},{0,7,36},{0,4,36},{0,3,2665},{0,3,2665},{2,7,65535},{1,7,53419},{1,7,18974},{1,7,12366},{2,7,65535},{1,7,41235},{0,7,2132},{0,7,5204},{0,7,65535},{0,6,40565},{5,7,6641},
+{4,7,3283},{4,7,1258},{3,7,85},{5,6,10952},{3,7,5900},{1,7,1010},{0,6,2321},{4,6,10952},{0,6,2321},{2,7,10530},{2,7,10530},{2,7,10530},{2,7,6666},{1,7,8774},{0,7,2132},{0,7,2132},{0,5,305},{0,6,11790},{0,5,5205},{4,7,1258},{4,7,1258},{4,7,1258},{3,7,85},{7,0,4420},{1,7,1010},{1,7,1010},{0,5,305},{3,5,4420},{0,5,305},{7,6,1360},{6,7,800},{5,7,4},
+{3,7,36},{7,6,1360},{7,6,1360},{3,7,36},{0,6,1480},{7,6,1360},{0,6,1480},{2,0,6305},{2,0,6305},{2,0,6305},{2,0,6305},{0,7,196},{0,7,196},{0,7,196},{0,5,49},{0,3,3161},{0,3,3161},{2,7,65535},{2,7,56301},{1,7,22052},{1,7,14436},{2,7,65535},{1,7,41829},{0,7,3338},{0,7,3530},{0,7,65535},{0,6,39881},{5,7,5741},{4,7,3373},{4,7,1348},{4,7,328},{7,3,9830},
+{3,7,5270},{2,7,1184},{0,6,1637},{6,5,9830},{0,6,1637},{2,7,12600},{2,7,12600},{2,7,12600},{2,7,7908},{1,7,10376},{1,7,2760},{1,7,2760},{0,6,481},{0,6,12600},{0,5,5529},{4,7,1348},{4,7,1348},{4,7,1348},{4,7,328},{7,1,4426},{2,7,1184},{2,7,1184},{0,6,481},{7,3,4426},{0,6,481},{7,6,937},{6,7,521},{5,7,121},{4,7,4},{7,6,937},{6,7,929},{4,7,4},
+{0,6,1237},{6,7,929},{0,6,1237},{2,0,7124},{2,0,7124},{2,0,7124},{2,0,7124},{0,7,529},{0,7,529},{0,7,529},{0,5,4},{0,4,3778},{0,4,3778},{2,7,65535},{2,7,58413},{1,7,25060},{1,7,16548},{2,7,65535},{1,7,42629},{0,7,4682},{0,7,2314},{0,7,65535},{0,6,39545},{5,7,5213},{5,7,3321},{4,7,1700},{4,7,200},{5,7,8902},{3,7,4982},{2,7,1440},{0,6,1301},{5,6,8902},
+{0,6,1301},{3,7,14701},{3,7,14701},{3,7,14701},{2,7,9284},{2,7,11492},{1,7,3560},{1,7,3560},{0,6,145},{0,6,13592},{0,5,6089},{4,7,1700},{4,7,1700},{4,7,1700},{4,7,200},{6,3,4420},{2,7,1440},{2,7,1440},{0,6,145},{6,4,4420},{0,6,145},{7,6,697},{6,7,409},{6,7,9},{5,7,9},{7,6,697},{6,7,625},{5,7,9},{0,6,1157},{6,7,625},{0,6,1157},{2,0,7988},
+{2,0,7988},{2,0,7988},{2,0,7988},{0,7,961},{0,7,961},{0,7,961},{0,5,100},{0,4,4210},{0,4,4210},{3,7,65535},{2,7,60781},{1,7,28324},{1,7,18916},{2,7,65535},{1,7,43685},{0,7,6282},{0,7,1354},{0,7,65535},{0,6,39465},{5,7,4941},{5,7,3049},{5,7,2025},{4,7,328},{7,4,8069},{4,7,4465},{3,7,1586},{0,6,1221},{7,5,8069},{0,6,1221},{3,7,16189},{3,7,16189},{3,7,16189},
+{2,7,10916},{2,7,12740},{1,7,4616},{1,7,4616},{0,6,65},{0,7,14411},{0,6,6789},{5,7,2025},{5,7,2025},{5,7,2025},{4,7,328},{7,2,4420},{3,7,1586},{3,7,1586},{0,6,65},{5,5,4420},{0,6,65},{7,7,377},{7,7,305},{6,7,25},{5,7,25},{7,7,377},{6,7,449},{5,7,25},{0,7,729},{6,7,449},{0,7,729},{2,0,8980},{2,0,8980},{2,0,8980},{2,0,8980},{1,7,1480},
+{1,7,1480},{1,7,1480},{0,6,16},{0,4,4770},{0,4,4770},{3,7,65535},{2,7,59505},{1,7,29984},{1,7,19680},{2,7,65535},{1,7,43137},{0,7,8318},{0,7,830},{0,7,65535},{0,6,34901},{5,7,4925},{5,7,3033},{5,7,2009},{4,7,712},{6,6,7325},{4,7,4145},{3,7,1810},{0,7,650},{4,7,7325},{0,7,650},{3,7,16745},{3,7,16745},{3,7,16745},{2,7,12024},{2,7,13464},{1,7,5556},{1,7,5556},
+{0,6,277},{0,7,14139},{0,6,6017},{5,7,2009},{5,7,2009},{5,7,2009},{4,7,712},{5,6,4418},{3,7,1810},{3,7,1810},{0,6,241},{4,6,4418},{0,6,241},{7,7,185},{7,7,113},{7,7,64},{6,7,1},{7,7,185},{7,7,193},{6,7,1},{0,7,361},{7,7,193},{0,7,361},{2,0,9320},{2,0,9320},{2,0,9320},{2,0,9320},{1,7,1460},{1,7,1460},{1,7,1460},{0,6,52},{0,5,4772},
+{0,5,4772},{3,7,65535},{2,7,57588},{1,7,32135},{1,7,20823},{3,7,65535},{1,7,42804},{1,7,8359},{0,7,1289},{0,7,65535},{0,6,28970},{6,7,4317},{5,7,3321},{5,7,2297},{5,7,845},{7,5,6587},{5,7,4008},{4,7,2066},{0,7,164},{6,6,6611},{0,7,164},{3,7,17366},{3,7,17366},{3,7,17366},{3,7,12274},{2,7,14427},{2,7,6699},{2,7,6699},{0,7,389},{0,7,13860},{0,6,5234},{5,7,2297},
+{5,7,2297},{5,7,2297},{5,7,845},{6,5,4420},{4,7,2066},{4,7,2066},{0,7,164},{3,7,4420},{0,7,164},{7,7,122},{7,7,50},{7,7,1},{7,7,9},{7,7,122},{7,7,58},{7,7,9},{0,7,100},{7,7,58},{0,7,100},{2,0,9698},{2,0,9698},{2,0,9698},{2,0,9698},{1,7,1586},{1,7,1586},{1,7,1586},{0,7,289},{0,5,4250},{0,5,4250},{3,7,65535},{2,7,56836},{2,7,31631},
+{1,7,22791},{3,7,65535},{2,7,40532},{1,7,9015},{1,7,778},{0,7,65535},{0,6,24650},{6,7,3949},{6,7,3325},{5,7,2825},{5,7,1021},{6,7,6020},{5,7,3688},{4,7,2290},{0,7,4},{5,7,6020},{0,7,4},{3,7,18326},{3,7,18326},{3,7,18326},{3,7,12626},{3,7,15077},{2,7,7227},{2,7,7227},{1,7,294},{0,7,14020},{0,6,4946},{5,7,2825},{5,7,2825},{5,7,2825},{5,7,1021},{7,4,4418},
+{4,7,2290},{4,7,2290},{0,7,4},{7,5,4418},{0,7,4},{7,7,202},{7,7,130},{7,7,81},{7,7,25},{7,7,202},{7,7,74},{7,7,25},{0,7,4},{7,7,74},{0,7,4},{3,0,9490},{3,0,9490},{3,0,9490},{3,0,9490},{1,7,1970},{1,7,1970},{1,7,1970},{1,6,202},{0,6,3922},{0,6,3922},{3,7,65535},{3,7,55466},{2,7,30335},{2,7,21687},{3,7,65535},{2,7,37932},{1,7,9535},
+{1,7,70},{0,7,65535},{0,7,20544},{6,7,3417},{6,7,2793},{6,7,2393},{5,7,1033},{6,7,5184},{5,7,3204},{5,7,2180},{1,7,25},{7,6,5168},{1,7,25},{4,7,17611},{4,7,17611},{4,7,17611},{3,7,12630},{3,7,14321},{2,7,7251},{2,7,7251},{1,7,34},{0,7,13376},{0,7,4160},{6,7,2393},{6,7,2393},{6,7,2393},{5,7,1033},{6,6,3874},{5,7,2180},{5,7,2180},{1,7,25},{4,7,3874},
+{1,7,25},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{3,0,9266},{3,0,9266},{3,0,9266},{3,0,9266},{2,7,2210},{2,7,2210},{2,7,2210},{1,7,34},{0,6,3442},{0,6,3442},{3,7,65535},{3,7,51210},{2,7,29343},{2,7,20695},{3,7,65535},{2,7,35820},{1,7,10495},{1,7,134},{1,7,65535},{0,7,15936},{6,7,2889},
+{6,7,2265},{6,7,1865},{5,7,1049},{7,6,4288},{5,7,2724},{5,7,1700},{1,7,9},{7,6,4272},{1,7,9},{4,7,16555},{4,7,16555},{4,7,16555},{3,7,12662},{3,7,13441},{2,7,7251},{2,7,7251},{1,7,34},{0,7,12608},{0,7,3392},{6,7,1865},{6,7,1865},{6,7,1865},{5,7,1049},{7,5,3218},{5,7,1700},{5,7,1700},{1,7,9},{6,6,3202},{1,7,9},{7,7,242},{7,7,170},{7,7,121},
+{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{3,0,9298},{3,0,9298},{3,0,9298},{3,0,9298},{2,7,2210},{2,7,2210},{2,7,2210},{1,7,34},{0,6,3218},{0,6,3218},{4,7,65535},{3,7,47340},{2,7,29145},{2,7,20497},{3,7,65535},{2,7,34362},{2,7,9157},{1,7,1124},{1,7,64598},{0,7,11670},{6,7,2448},{6,7,1824},{6,7,1424},{6,7,800},{7,6,3361},
+{6,7,2321},{5,7,1313},{2,7,0},{6,7,3401},{2,7,0},{4,7,15673},{4,7,15673},{4,7,15673},{4,7,12073},{3,7,12757},{3,7,6905},{3,7,6905},{1,7,340},{1,7,11657},{0,7,2834},{6,7,1424},{6,7,1424},{6,7,1424},{6,7,800},{7,5,2525},{5,7,1313},{5,7,1313},{2,7,0},{5,7,2545},{2,7,0},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},
+{0,7,0},{7,7,98},{0,7,0},{3,0,9640},{3,0,9640},{3,0,9640},{3,0,9640},{2,7,2516},{2,7,2516},{2,7,2516},{1,7,340},{0,7,2834},{0,7,2834},{4,7,65535},{3,7,44716},{3,7,27896},{2,7,21137},{4,7,65535},{3,7,31853},{2,7,8677},{2,7,784},{1,7,59734},{0,7,8694},{6,7,2192},{6,7,1568},{6,7,1168},{6,7,544},{7,6,2673},{6,7,1761},{5,7,1105},{3,7,25},{6,7,2649},
+{3,7,25},{4,7,15161},{4,7,15161},{4,7,15161},{4,7,11561},{4,7,12169},{3,7,6569},{3,7,6569},{2,7,208},{1,7,10889},{0,7,2610},{6,7,1168},{6,7,1168},{6,7,1168},{6,7,544},{6,7,1985},{5,7,1105},{5,7,1105},{3,7,25},{5,7,1985},{3,7,25},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{4,0,9536},
+{4,0,9536},{4,0,9536},{4,0,9536},{2,7,3060},{2,7,3060},{2,7,3060},{2,7,208},{0,7,2610},{0,7,2610},{4,7,63766},{3,7,42860},{3,7,26040},{3,7,20188},{4,7,60070},{3,7,29085},{2,7,8965},{2,7,336},{1,7,55638},{0,7,6486},{7,7,1686},{6,7,1440},{6,7,1040},{6,7,416},{7,6,2113},{6,7,1329},{6,7,929},{3,7,9},{6,7,2025},{3,7,9},{4,7,14905},{4,7,14905},{4,7,14905},
+{4,7,11305},{4,7,11209},{3,7,6489},{3,7,6489},{2,7,272},{1,7,10377},{0,7,2642},{6,7,1040},{6,7,1040},{6,7,1040},{6,7,416},{7,6,1537},{6,7,929},{6,7,929},{3,7,9},{7,6,1513},{3,7,9},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{4,0,9280},{4,0,9280},{4,0,9280},{4,0,9280},{3,7,3125},
+{3,7,3125},{3,7,3125},{2,7,272},{0,7,2642},{0,7,2642},{4,7,59414},{4,7,41414},{3,7,24952},{3,7,19100},{4,7,55014},{3,7,27085},{2,7,10021},{2,7,656},{1,7,52310},{0,7,5046},{7,7,1142},{7,7,1070},{7,7,1021},{6,7,416},{7,7,1538},{6,7,1025},{6,7,625},{4,7,4},{6,7,1529},{4,7,4},{5,7,13964},{5,7,13964},{5,7,13964},{4,7,11305},{4,7,10505},{3,7,6665},{3,7,6665},
+{2,7,592},{2,7,9973},{0,7,2930},{7,7,1021},{7,7,1021},{7,7,1021},{6,7,416},{7,6,1105},{6,7,625},{6,7,625},{4,7,4},{6,7,1129},{4,7,4},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{4,0,9280},{4,0,9280},{4,0,9280},{4,0,9280},{3,7,3301},{3,7,3301},{3,7,3301},{2,7,592},{0,7,2930},
+{0,7,2930},
diff --git a/libkram/transcoder/basisu_transcoder_uastc.h b/libkram/transcoder/basisu_transcoder_uastc.h
new file mode 100644
index 00000000..d501a2af
--- /dev/null
+++ b/libkram/transcoder/basisu_transcoder_uastc.h
@@ -0,0 +1,297 @@
+// basisu_transcoder_uastc.h
+#pragma once
+#include "basisu_transcoder_internal.h"
+
+namespace basist
+{
+	struct color_quad_u8
+	{ 
+		uint8_t m_c[4]; 
+	};
+
+	const uint32_t TOTAL_UASTC_MODES = 19;
+	const uint32_t UASTC_MODE_INDEX_SOLID_COLOR = 8;
+
+	const uint32_t TOTAL_ASTC_BC7_COMMON_PARTITIONS2 = 30;
+	const uint32_t TOTAL_ASTC_BC7_COMMON_PARTITIONS3 = 11;
+	const uint32_t TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS = 19;
+
+	extern const uint8_t g_uastc_mode_weight_bits[TOTAL_UASTC_MODES];
+	extern const uint8_t g_uastc_mode_weight_ranges[TOTAL_UASTC_MODES];
+	extern const uint8_t g_uastc_mode_endpoint_ranges[TOTAL_UASTC_MODES];
+	extern const uint8_t g_uastc_mode_subsets[TOTAL_UASTC_MODES];
+	extern const uint8_t g_uastc_mode_planes[TOTAL_UASTC_MODES];
+	extern const uint8_t g_uastc_mode_comps[TOTAL_UASTC_MODES];
+	extern const uint8_t g_uastc_mode_has_etc1_bias[TOTAL_UASTC_MODES];
+	extern const uint8_t g_uastc_mode_has_bc1_hint0[TOTAL_UASTC_MODES];
+	extern const uint8_t g_uastc_mode_has_bc1_hint1[TOTAL_UASTC_MODES];
+	extern const uint8_t g_uastc_mode_has_alpha[TOTAL_UASTC_MODES];
+	extern const uint8_t g_uastc_mode_is_la[TOTAL_UASTC_MODES];
+
+	struct astc_bc7_common_partition2_desc
+	{
+		uint8_t m_bc7;
+		uint16_t m_astc;
+		bool m_invert;
+	};
+
+	extern const astc_bc7_common_partition2_desc g_astc_bc7_common_partitions2[TOTAL_ASTC_BC7_COMMON_PARTITIONS2];
+
+	struct bc73_astc2_common_partition_desc
+	{
+		uint8_t m_bc73;
+		uint16_t m_astc2;
+		uint8_t k;		// 0-5 - how to modify the BC7 3-subset pattern to match the ASTC pattern (LSB=invert)
+	};
+
+	extern const bc73_astc2_common_partition_desc g_bc7_3_astc2_common_partitions[TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS];
+
+	struct astc_bc7_common_partition3_desc
+	{
+		uint8_t m_bc7;
+		uint16_t m_astc;
+		uint8_t m_astc_to_bc7_perm; // converts ASTC to BC7 partition using g_astc_bc7_partition_index_perm_tables[][]
+	};
+
+	extern const astc_bc7_common_partition3_desc g_astc_bc7_common_partitions3[TOTAL_ASTC_BC7_COMMON_PARTITIONS3];
+
+	extern const uint8_t g_astc_bc7_patterns2[TOTAL_ASTC_BC7_COMMON_PARTITIONS2][16];
+	extern const uint8_t g_astc_bc7_patterns3[TOTAL_ASTC_BC7_COMMON_PARTITIONS3][16];
+	extern const uint8_t g_bc7_3_astc2_patterns2[TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS][16];
+
+	extern const uint8_t g_astc_bc7_pattern2_anchors[TOTAL_ASTC_BC7_COMMON_PARTITIONS2][3];
+	extern const uint8_t g_astc_bc7_pattern3_anchors[TOTAL_ASTC_BC7_COMMON_PARTITIONS3][3];
+	extern const uint8_t g_bc7_3_astc2_patterns2_anchors[TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS][3];
+
+	extern const uint32_t g_uastc_mode_huff_codes[TOTAL_UASTC_MODES + 1][2];
+
+	extern const uint8_t g_astc_to_bc7_partition_index_perm_tables[6][3];
+	extern const uint8_t g_bc7_to_astc_partition_index_perm_tables[6][3]; // inverse of g_astc_to_bc7_partition_index_perm_tables
+
+	extern const uint8_t* s_uastc_to_bc1_weights[6];
+
+	uint32_t bc7_convert_partition_index_3_to_2(uint32_t p, uint32_t k);
+
+	inline uint32_t astc_interpolate(uint32_t l, uint32_t h, uint32_t w, bool srgb)
+	{
+		if (srgb)
+		{
+			l = (l << 8) | 0x80;
+			h = (h << 8) | 0x80;
+		}
+		else
+		{
+			l = (l << 8) | l;
+			h = (h << 8) | h;
+		}
+
+		uint32_t k = (l * (64 - w) + h * w + 32) >> 6;
+
+		return k >> 8;
+	}
+
+	struct astc_block_desc
+	{
+		int m_weight_range;	// weight BISE range
+
+		int m_subsets;			// number of ASTC partitions
+		int m_partition_seed;	// partition pattern seed
+		int m_cem;				// color endpoint mode used by all subsets
+
+		int m_ccs;				// color component selector (dual plane only)
+		bool m_dual_plane;	// true if dual plane
+
+		// Weight and endpoint BISE values. 
+		// Note these values are NOT linear, they must be BISE encoded. See Table 97 and Table 107.
+		uint8_t m_endpoints[18];	// endpoint values, in RR GG BB etc. order 
+		uint8_t m_weights[64];		// weight index values, raster order, in P0 P1, P0 P1, etc. or P0, P0, P0, P0, etc. order
+	};
+
+	const uint32_t BC7ENC_TOTAL_ASTC_RANGES = 21;
+
+	// See tables 81, 93, 18.13.Endpoint Unquantization
+	const uint32_t TOTAL_ASTC_RANGES = 21;
+	extern const int g_astc_bise_range_table[TOTAL_ASTC_RANGES][3];
+
+	struct astc_quant_bin
+	{
+		uint8_t m_unquant; // unquantized value
+		uint8_t m_index; // sorted index
+	};
+
+	extern astc_quant_bin g_astc_unquant[BC7ENC_TOTAL_ASTC_RANGES][256]; // [ASTC encoded endpoint index]
+
+	int astc_get_levels(int range);
+	bool astc_is_valid_endpoint_range(uint32_t range);
+	uint32_t unquant_astc_endpoint(uint32_t packed_bits, uint32_t packed_trits, uint32_t packed_quints, uint32_t range);
+	uint32_t unquant_astc_endpoint_val(uint32_t packed_val, uint32_t range);
+
+	const uint8_t* get_anchor_indices(uint32_t subsets, uint32_t mode, uint32_t common_pattern, const uint8_t*& pPartition_pattern);
+
+	// BC7
+	const uint32_t BC7ENC_BLOCK_SIZE = 16;
+
+	struct bc7_block
+	{
+		uint64_t m_qwords[2];
+	};
+
+	struct bc7_optimization_results
+	{
+		uint32_t m_mode;
+		uint32_t m_partition;
+		uint8_t m_selectors[16];
+		uint8_t m_alpha_selectors[16];
+		color_quad_u8 m_low[3];
+		color_quad_u8 m_high[3];
+		uint32_t m_pbits[3][2];
+		uint32_t m_index_selector;
+		uint32_t m_rotation;
+	};
+
+	extern const uint32_t g_bc7_weights1[2];
+	extern const uint32_t g_bc7_weights2[4];
+	extern const uint32_t g_bc7_weights3[8];
+	extern const uint32_t g_bc7_weights4[16];
+	extern const uint32_t g_astc_weights4[16];
+	extern const uint32_t g_astc_weights5[32];
+	extern const uint32_t g_astc_weights_3levels[3];
+	extern const uint8_t g_bc7_partition1[16];
+	extern const uint8_t g_bc7_partition2[64 * 16];
+	extern const uint8_t g_bc7_partition3[64 * 16];
+	extern const uint8_t g_bc7_table_anchor_index_second_subset[64];
+	extern const uint8_t g_bc7_table_anchor_index_third_subset_1[64];
+	extern const uint8_t g_bc7_table_anchor_index_third_subset_2[64];
+	extern const uint8_t g_bc7_num_subsets[8];
+	extern const uint8_t g_bc7_partition_bits[8];
+	extern const uint8_t g_bc7_color_index_bitcount[8];
+	extern const uint8_t g_bc7_mode_has_p_bits[8];
+	extern const uint8_t g_bc7_mode_has_shared_p_bits[8];
+	extern const uint8_t g_bc7_color_precision_table[8];
+	extern const int8_t g_bc7_alpha_precision_table[8];
+	extern const uint8_t g_bc7_alpha_index_bitcount[8];
+
+	inline bool get_bc7_mode_has_seperate_alpha_selectors(int mode) { return (mode == 4) || (mode == 5); }
+	inline int get_bc7_color_index_size(int mode, int index_selection_bit) { return g_bc7_color_index_bitcount[mode] + index_selection_bit; }
+	inline int get_bc7_alpha_index_size(int mode, int index_selection_bit) { return g_bc7_alpha_index_bitcount[mode] - index_selection_bit; }
+
+	struct endpoint_err
+	{
+		uint16_t m_error; uint8_t m_lo; uint8_t m_hi;
+	};
+
+	extern endpoint_err g_bc7_mode_6_optimal_endpoints[256][2]; // [c][pbit]
+	const uint32_t BC7ENC_MODE_6_OPTIMAL_INDEX = 5;
+
+	extern endpoint_err g_bc7_mode_5_optimal_endpoints[256]; // [c]
+	const uint32_t BC7ENC_MODE_5_OPTIMAL_INDEX = 1;
+
+	// Packs a BC7 block from a high-level description. Handles all BC7 modes.
+	void encode_bc7_block(void* pBlock, const bc7_optimization_results* pResults);
+
+	// Packs an ASTC block
+	// Constraints: Always 4x4, all subset CEM's must be equal, only tested with LDR CEM's.
+	bool pack_astc_block(uint32_t* pDst, const astc_block_desc* pBlock, uint32_t mode);
+
+	void pack_astc_solid_block(void* pDst_block, const color32& color);
+
+#ifdef _DEBUG
+	int astc_compute_texel_partition(int seed, int x, int y, int z, int partitioncount, bool small_block);
+#endif
+		
+	struct uastc_block
+	{
+		union
+		{
+			uint8_t m_bytes[16];
+			uint32_t m_dwords[4];
+
+#ifndef __EMSCRIPTEN__
+			uint64_t m_qwords[2];
+#endif
+		};
+	};
+
+	struct unpacked_uastc_block
+	{
+		astc_block_desc m_astc;
+
+		uint32_t m_mode;
+		uint32_t m_common_pattern;
+
+		color32 m_solid_color;
+
+		bool m_bc1_hint0;
+		bool m_bc1_hint1;
+
+		bool m_etc1_flip;
+		bool m_etc1_diff;
+		uint32_t m_etc1_inten0;
+		uint32_t m_etc1_inten1;
+
+		uint32_t m_etc1_bias;
+
+		uint32_t m_etc2_hints;
+
+		uint32_t m_etc1_selector;
+		uint32_t m_etc1_r, m_etc1_g, m_etc1_b;
+	};
+
+	color32 apply_etc1_bias(const color32 &block_color, uint32_t bias, uint32_t limit, uint32_t subblock);
+	
+	struct decoder_etc_block;
+	struct eac_block;
+		
+	bool unpack_uastc(uint32_t mode, uint32_t common_pattern, const color32& solid_color, const astc_block_desc& astc, color32* pPixels, bool srgb);
+	bool unpack_uastc(const unpacked_uastc_block& unpacked_blk, color32* pPixels, bool srgb);
+
+	bool unpack_uastc(const uastc_block& blk, color32* pPixels, bool srgb);
+	bool unpack_uastc(const uastc_block& blk, unpacked_uastc_block& unpacked, bool undo_blue_contract, bool read_hints = true);
+
+	bool transcode_uastc_to_astc(const uastc_block& src_blk, void* pDst);
+
+	bool transcode_uastc_to_bc7(const unpacked_uastc_block& unpacked_src_blk, bc7_optimization_results& dst_blk);
+	bool transcode_uastc_to_bc7(const uastc_block& src_blk, bc7_optimization_results& dst_blk);
+	bool transcode_uastc_to_bc7(const uastc_block& src_blk, void* pDst);
+
+	void transcode_uastc_to_etc1(unpacked_uastc_block& unpacked_src_blk, color32 block_pixels[4][4], void* pDst);
+	bool transcode_uastc_to_etc1(const uastc_block& src_blk, void* pDst);
+	bool transcode_uastc_to_etc1(const uastc_block& src_blk, void* pDst, uint32_t channel);
+
+	void transcode_uastc_to_etc2_eac_a8(unpacked_uastc_block& unpacked_src_blk, color32 block_pixels[4][4], void* pDst);
+	bool transcode_uastc_to_etc2_rgba(const uastc_block& src_blk, void* pDst);
+
+	// Packs 16 scalar values to BC4. Same PSNR as stb_dxt's BC4 encoder, around 13% faster.
+	void encode_bc4(void* pDst, const uint8_t* pPixels, uint32_t stride);
+	
+	void encode_bc1_solid_block(void* pDst, uint32_t fr, uint32_t fg, uint32_t fb);
+
+	enum
+	{
+		cEncodeBC1HighQuality = 1,
+		cEncodeBC1HigherQuality = 2,
+		cEncodeBC1UseSelectors = 4,
+	};
+	void encode_bc1(void* pDst, const uint8_t* pPixels, uint32_t flags);
+	
+	// Alternate PCA-free encoder, around 15% faster, same (or slightly higher) avg. PSNR
+	void encode_bc1_alt(void* pDst, const uint8_t* pPixels, uint32_t flags);
+
+	void transcode_uastc_to_bc1_hint0(const unpacked_uastc_block& unpacked_src_blk, void* pDst);
+	void transcode_uastc_to_bc1_hint1(const unpacked_uastc_block& unpacked_src_blk, const color32 block_pixels[4][4], void* pDst, bool high_quality);
+
+	bool transcode_uastc_to_bc1(const uastc_block& src_blk, void* pDst, bool high_quality);
+	bool transcode_uastc_to_bc3(const uastc_block& src_blk, void* pDst, bool high_quality);
+	bool transcode_uastc_to_bc4(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0);
+	bool transcode_uastc_to_bc5(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0, uint32_t chan1);
+
+	bool transcode_uastc_to_etc2_eac_r11(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0);
+	bool transcode_uastc_to_etc2_eac_rg11(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0, uint32_t chan1);
+
+	bool transcode_uastc_to_pvrtc1_4_rgb(const uastc_block* pSrc_blocks, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, bool high_quality, bool from_alpha);
+	bool transcode_uastc_to_pvrtc1_4_rgba(const uastc_block* pSrc_blocks, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, bool high_quality);
+		
+	// uastc_init() MUST be called before using this module.
+	void uastc_init();
+
+} // namespace basist

From b422379b20f215fc7bd9cc6f6b768200a403ccc2 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 6 Aug 2021 10:07:17 -0700
Subject: [PATCH 143/901] kram - fix build for macOS, add clamp

---
 libkram/kram/KramConfig.h | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index b60a18a0..ae348926 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -252,6 +252,16 @@ class half4 {
 
 }  // namespace simd
 
+#if !USE_EASTL
+
+namespace std
+{
+inline float clamp(float x, float minValue, float maxValue) { return min(max(x, minValue), maxValue); }
+inline double clamp(double x, double minValue, double maxValue) { return min(max(x, minValue), maxValue); }
+}
+
+#endif
+
 #if USE_SIMDLIB
 #include "simd/simd.h"
 #else
@@ -298,17 +308,25 @@ inline float4 float4m(float x)
     return x;
 }
 
+inline float saturate(float v)
+{
+    return NAMESPACE_STL::clamp(v, 0.0f, 1.0f);
+}
+inline double saturate(double v)
+{
+    return NAMESPACE_STL::clamp(v, 0.0, 1.0);
+}
 inline float2 saturate(const float2& v)
 {
-    return simd_clamp( v, 0.0f, 1.0f );
+    return simd_clamp(v, 0.0f, 1.0f);
 }
 inline float3 saturate(const float3& v)
 {
-    return simd_clamp( v, 0.0f, 1.0f );
+    return simd_clamp(v, 0.0f, 1.0f);
 }
 inline float4 saturate(const float4& v)
 {
-    return simd_clamp( v, 0.0f, 1.0f );
+    return simd_clamp(v, 0.0f, 1.0f);
 }
 
 #endif

From 06b05320199f60608fb43002dea0a04a29d4caff Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 6 Aug 2021 22:55:54 -0700
Subject: [PATCH 144/901] Eastl - see if replacing EASTL/atomic.h with <atomic>
 fixes builds

This class seems like overkill and introduces a ton of files with one macro per file for armv7/8 but iOS is arm64.
---
 libkram/kram/Kram.cpp     | 4 ++--
 libkram/kram/KramConfig.h | 3 ++-
 libkram/kram/TaskSystem.h | 2 +-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index b2356732..4b581d39 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -2570,8 +2570,8 @@ int32_t kramAppScript(vector<const char*>& args)
 
     // as a global this auto allocates 16 threads, and don't want that unless actually
     // using scripting.  And even then want control over the number of threads.
-    atomic<int32_t> errorCounter(0);  // doesn't initialize to 0 otherwise
-    atomic<int32_t> skippedCounter(0);
+    std::atomic<int32_t> errorCounter(0);  // doesn't initialize to 0 otherwise
+    std::atomic<int32_t> skippedCounter(0);
     int32_t commandCounter = 0;
 
     {
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index ae348926..82b6e18f 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -157,7 +157,8 @@
 
 // this probably breaks all STL debugging
 #include "EASTL/algorithm.h"  // for max
-#include "EASTL/atomic.h" 
+//#include "EASTL/atomic.h"
+#include <atomic>
 #include "EASTL/deque.h"
 #include "EASTL/functional.h"
 #include "EASTL/iterator.h" // for copy_if on Win
diff --git a/libkram/kram/TaskSystem.h b/libkram/kram/TaskSystem.h
index c9d68a7a..4fa10ca6 100644
--- a/libkram/kram/TaskSystem.h
+++ b/libkram/kram/TaskSystem.h
@@ -123,7 +123,7 @@ class task_system {
 
     // currently one queue to each thread, but can steal from other queues
     vector<notification_queue> _q;
-    atomic<int32_t> _index;
+    std::atomic<int32_t> _index;
 
     void run(int32_t threadIndex)
     {

From 583e14fa70b6edc15ab9c3ec30b958496f47028c Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 6 Aug 2021 23:08:41 -0700
Subject: [PATCH 145/901] kram - strip EASTL/atomic.cpp/h from the included
 sources

---
 libkram/CMakeLists.txt | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index abbaa8cc..50aedff5 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -127,6 +127,10 @@ list(FILTER libSources EXCLUDE REGEX ".astcenccli_platform_dependents.cpp$")
 list(FILTER libSources EXCLUDE REGEX ".astcenccli_toplevel_help.cpp$")
 list(FILTER libSources EXCLUDE REGEX ".astcenccli_toplevel.cpp$")
 
+# this EASTL file is causing problems on Windows with the x86/arch_x86.h include
+list(FILTER libSources EXCLUDE REGEX ".atomic.h$")
+list(FILTER libSources EXCLUDE REGEX ".atomic.cpp$")
+
 # this will preserve hierarchy of sources in a build project
 source_group(TREE "${SOURCE_DIR}" PREFIX "source" FILES ${libSources})
 

From 21413b3077c148479409d8e163bdb91d3529673c Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 7 Aug 2021 15:38:40 -0700
Subject: [PATCH 146/901] EASTL - just use pragma once instead of header guards

---
 .../eastl/include/EABase/config/eacompiler.h  |  6 +-
 .../include/EABase/config/eacompilertraits.h  | 60 +------------------
 .../eastl/include/EABase/config/eaplatform.h  | 13 ++--
 libkram/eastl/include/EABase/eabase.h         | 10 +---
 libkram/eastl/include/EABase/eahave.h         | 11 +---
 libkram/eastl/include/EABase/earesult.h       | 11 +---
 libkram/eastl/include/EABase/eastdarg.h       |  5 +-
 libkram/eastl/include/EABase/eaunits.h        |  9 +--
 libkram/eastl/include/EABase/int128.h         |  7 +--
 libkram/eastl/include/EABase/nullptr.h        |  6 +-
 libkram/eastl/include/EABase/version.h        |  4 +-
 libkram/eastl/include/EASTL/algorithm.h       | 12 +---
 libkram/eastl/include/EASTL/allocator.h       | 13 +---
 .../eastl/include/EASTL/allocator_malloc.h    |  7 +--
 libkram/eastl/include/EASTL/any.h             |  8 +--
 libkram/eastl/include/EASTL/array.h           | 12 +---
 libkram/eastl/include/EASTL/bitset.h          | 10 +---
 libkram/eastl/include/EASTL/bitvector.h       | 10 +---
 libkram/eastl/include/EASTL/bonus/adaptors.h  | 10 +---
 .../eastl/include/EASTL/bonus/call_traits.h   | 13 +---
 .../include/EASTL/bonus/compressed_pair.h     | 12 +---
 .../include/EASTL/bonus/fixed_ring_buffer.h   |  9 +--
 .../include/EASTL/bonus/fixed_tuple_vector.h  |  8 +--
 .../include/EASTL/bonus/intrusive_sdlist.h    |  9 +--
 .../include/EASTL/bonus/intrusive_slist.h     | 13 +---
 libkram/eastl/include/EASTL/bonus/list_map.h  |  5 +-
 libkram/eastl/include/EASTL/bonus/lru_cache.h |  9 ---
 .../eastl/include/EASTL/bonus/ring_buffer.h   | 12 +---
 .../eastl/include/EASTL/bonus/sort_extra.h    | 12 +---
 .../eastl/include/EASTL/bonus/tuple_vector.h  |  9 +--
 libkram/eastl/include/EASTL/chrono.h          |  8 +--
 libkram/eastl/include/EASTL/core_allocator.h  |  4 +-
 .../include/EASTL/core_allocator_adapter.h    |  9 +--
 libkram/eastl/include/EASTL/deque.h           | 11 +---
 libkram/eastl/include/EASTL/finally.h         |  8 +--
 libkram/eastl/include/EASTL/fixed_allocator.h | 11 +---
 libkram/eastl/include/EASTL/fixed_function.h  |  8 +--
 libkram/eastl/include/EASTL/fixed_hash_map.h  | 11 +---
 libkram/eastl/include/EASTL/fixed_hash_set.h  | 11 +---
 libkram/eastl/include/EASTL/fixed_list.h      | 11 +---
 libkram/eastl/include/EASTL/fixed_map.h       | 12 +---
 libkram/eastl/include/EASTL/fixed_set.h       | 14 +----
 libkram/eastl/include/EASTL/fixed_slist.h     | 14 +----
 libkram/eastl/include/EASTL/fixed_string.h    |  8 +--
 libkram/eastl/include/EASTL/fixed_substring.h | 12 +---
 libkram/eastl/include/EASTL/fixed_vector.h    | 14 +----
 libkram/eastl/include/EASTL/functional.h      | 12 +---
 libkram/eastl/include/EASTL/hash_map.h        | 12 +---
 libkram/eastl/include/EASTL/hash_set.h        | 12 +---
 libkram/eastl/include/EASTL/heap.h            | 13 +---
 .../eastl/include/EASTL/initializer_list.h    | 12 +---
 .../include/EASTL/internal/atomic/arch/arch.h |  9 +--
 .../internal/atomic/arch/arch_add_fetch.h     |  9 +--
 .../internal/atomic/arch/arch_and_fetch.h     | 10 +---
 .../atomic/arch/arch_cmpxchg_strong.h         | 10 +---
 .../internal/atomic/arch/arch_cmpxchg_weak.h  | 10 +---
 .../atomic/arch/arch_compiler_barrier.h       | 10 +---
 .../internal/atomic/arch/arch_cpu_pause.h     | 11 +---
 .../internal/atomic/arch/arch_exchange.h      | 10 +---
 .../internal/atomic/arch/arch_fetch_add.h     | 10 +---
 .../internal/atomic/arch/arch_fetch_and.h     | 11 +---
 .../internal/atomic/arch/arch_fetch_or.h      |  9 +--
 .../internal/atomic/arch/arch_fetch_sub.h     |  9 +--
 .../internal/atomic/arch/arch_fetch_xor.h     |  9 +--
 .../EASTL/internal/atomic/arch/arch_load.h    |  8 +--
 .../atomic/arch/arch_memory_barrier.h         |  8 +--
 .../internal/atomic/arch/arch_or_fetch.h      | 11 +---
 .../internal/atomic/arch/arch_signal_fence.h  |  9 +--
 .../EASTL/internal/atomic/arch/arch_store.h   | 11 +---
 .../internal/atomic/arch/arch_sub_fetch.h     | 10 +---
 .../internal/atomic/arch/arch_thread_fence.h  | 12 +---
 .../internal/atomic/arch/arch_xor_fetch.h     |  9 +--
 .../EASTL/internal/atomic/atomic_asserts.h    | 11 +---
 .../EASTL/internal/atomic/atomic_base_width.h | 10 +---
 .../EASTL/internal/atomic/atomic_casts.h      |  9 +--
 .../EASTL/internal/atomic/atomic_flag.h       | 11 +---
 .../internal/atomic/atomic_flag_standalone.h  |  9 +--
 .../EASTL/internal/atomic/atomic_integral.h   |  8 +--
 .../EASTL/internal/atomic/atomic_macros.h     | 10 +---
 .../atomic/atomic_macros/atomic_macros.h      | 11 +---
 .../atomic_macros/atomic_macros_add_fetch.h   | 10 +---
 .../atomic_macros/atomic_macros_and_fetch.h   |  9 ---
 .../atomic/atomic_macros/atomic_macros_base.h | 10 +---
 .../atomic_macros_cmpxchg_strong.h            | 10 +---
 .../atomic_macros_cmpxchg_weak.h              | 10 +---
 .../atomic_macros_compiler_barrier.h          |  9 +--
 .../atomic_macros/atomic_macros_cpu_pause.h   | 10 +---
 .../atomic_macros/atomic_macros_exchange.h    | 11 +---
 .../atomic_macros/atomic_macros_fetch_add.h   | 11 +---
 .../atomic_macros/atomic_macros_fetch_and.h   | 10 +---
 .../atomic_macros/atomic_macros_fetch_or.h    | 11 +---
 .../atomic_macros/atomic_macros_fetch_sub.h   | 10 +---
 .../atomic_macros/atomic_macros_fetch_xor.h   |  9 +--
 .../atomic/atomic_macros/atomic_macros_load.h | 10 +---
 .../atomic_macros_memory_barrier.h            | 10 +---
 .../atomic_macros/atomic_macros_or_fetch.h    | 11 +---
 .../atomic_macros_signal_fence.h              | 10 +---
 .../atomic_macros/atomic_macros_store.h       | 11 +---
 .../atomic_macros/atomic_macros_sub_fetch.h   |  9 +--
 .../atomic_macros_thread_fence.h              | 11 +---
 .../atomic_macros/atomic_macros_xor_fetch.h   |  9 +--
 .../internal/atomic/atomic_memory_order.h     | 11 +---
 .../EASTL/internal/atomic/atomic_pointer.h    |  9 +--
 .../internal/atomic/atomic_size_aligned.h     | 10 +---
 .../EASTL/internal/atomic/atomic_standalone.h |  9 +--
 .../EASTL/internal/atomic/compiler/compiler.h | 11 +---
 .../atomic/compiler/compiler_add_fetch.h      | 11 +---
 .../atomic/compiler/compiler_and_fetch.h      | 11 +---
 .../atomic/compiler/compiler_barrier.h        |  9 +--
 .../atomic/compiler/compiler_cmpxchg_strong.h | 11 +---
 .../atomic/compiler/compiler_cmpxchg_weak.h   | 11 +---
 .../atomic/compiler/compiler_cpu_pause.h      | 10 +---
 .../atomic/compiler/compiler_exchange.h       | 11 +---
 .../atomic/compiler/compiler_fetch_add.h      |  9 +--
 .../atomic/compiler/compiler_fetch_and.h      | 10 +---
 .../atomic/compiler/compiler_fetch_or.h       | 10 +---
 .../atomic/compiler/compiler_fetch_sub.h      |  9 +--
 .../atomic/compiler/compiler_fetch_xor.h      |  8 +--
 .../internal/atomic/compiler/compiler_load.h  | 11 +---
 .../atomic/compiler/compiler_memory_barrier.h | 10 +---
 .../atomic/compiler/compiler_or_fetch.h       | 11 +---
 .../atomic/compiler/compiler_signal_fence.h   |  9 +--
 .../internal/atomic/compiler/compiler_store.h | 10 +---
 .../atomic/compiler/compiler_sub_fetch.h      | 10 +---
 .../atomic/compiler/compiler_thread_fence.h   | 11 +---
 .../atomic/compiler/compiler_xor_fetch.h      | 10 +---
 .../atomic/compiler/gcc/compiler_gcc.h        | 11 +---
 .../compiler/gcc/compiler_gcc_add_fetch.h     | 10 +---
 .../compiler/gcc/compiler_gcc_and_fetch.h     | 10 +---
 .../compiler/gcc/compiler_gcc_barrier.h       | 10 +---
 .../gcc/compiler_gcc_cmpxchg_strong.h         | 10 +---
 .../compiler/gcc/compiler_gcc_cmpxchg_weak.h  | 10 +---
 .../compiler/gcc/compiler_gcc_cpu_pause.h     |  9 +--
 .../compiler/gcc/compiler_gcc_exchange.h      | 11 +---
 .../compiler/gcc/compiler_gcc_fetch_add.h     |  9 +--
 .../compiler/gcc/compiler_gcc_fetch_and.h     | 11 +---
 .../compiler/gcc/compiler_gcc_fetch_or.h      | 11 +---
 .../compiler/gcc/compiler_gcc_fetch_sub.h     | 12 +---
 .../compiler/gcc/compiler_gcc_fetch_xor.h     | 10 +---
 .../atomic/compiler/gcc/compiler_gcc_load.h   |  9 +--
 .../compiler/gcc/compiler_gcc_or_fetch.h      |  9 +--
 .../compiler/gcc/compiler_gcc_signal_fence.h  | 10 +---
 .../atomic/compiler/gcc/compiler_gcc_store.h  | 10 +---
 .../compiler/gcc/compiler_gcc_sub_fetch.h     |  9 +--
 .../compiler/gcc/compiler_gcc_thread_fence.h  | 10 +---
 .../compiler/gcc/compiler_gcc_xor_fetch.h     |  9 +--
 .../atomic/compiler/msvc/compiler_msvc.h      | 10 +---
 .../compiler/msvc/compiler_msvc_add_fetch.h   | 10 +---
 .../compiler/msvc/compiler_msvc_and_fetch.h   | 10 +---
 .../compiler/msvc/compiler_msvc_barrier.h     | 10 +---
 .../msvc/compiler_msvc_cmpxchg_strong.h       | 11 +---
 .../msvc/compiler_msvc_cmpxchg_weak.h         | 10 +---
 .../compiler/msvc/compiler_msvc_cpu_pause.h   |  9 +--
 .../compiler/msvc/compiler_msvc_exchange.h    | 11 +---
 .../compiler/msvc/compiler_msvc_fetch_add.h   | 11 +---
 .../compiler/msvc/compiler_msvc_fetch_and.h   | 11 +---
 .../compiler/msvc/compiler_msvc_fetch_or.h    |  9 +--
 .../compiler/msvc/compiler_msvc_fetch_sub.h   |  8 +--
 .../compiler/msvc/compiler_msvc_fetch_xor.h   | 11 +---
 .../compiler/msvc/compiler_msvc_or_fetch.h    |  9 +--
 .../msvc/compiler_msvc_signal_fence.h         | 10 +---
 .../compiler/msvc/compiler_msvc_sub_fetch.h   | 11 +---
 .../compiler/msvc/compiler_msvc_xor_fetch.h   | 10 +---
 .../include/EASTL/internal/char_traits.h      |  9 +--
 libkram/eastl/include/EASTL/internal/config.h | 21 +++----
 .../eastl/include/EASTL/internal/copy_help.h  |  9 +--
 .../include/EASTL/internal/enable_shared.h    |  8 +--
 .../eastl/include/EASTL/internal/fill_help.h  |  9 +--
 .../eastl/include/EASTL/internal/fixed_pool.h | 12 +---
 .../eastl/include/EASTL/internal/function.h   |  9 +--
 .../include/EASTL/internal/function_detail.h  |  9 +--
 .../include/EASTL/internal/function_help.h    |  9 +--
 .../include/EASTL/internal/functional_base.h  |  8 +--
 .../include/EASTL/internal/generic_iterator.h |  9 +--
 .../eastl/include/EASTL/internal/hashtable.h  | 11 +---
 .../eastl/include/EASTL/internal/in_place_t.h | 10 +---
 .../include/EASTL/internal/integer_sequence.h |  4 +-
 .../EASTL/internal/intrusive_hashtable.h      | 11 +---
 libkram/eastl/include/EASTL/internal/mem_fn.h |  7 ---
 .../include/EASTL/internal/memory_base.h      | 10 +---
 .../eastl/include/EASTL/internal/move_help.h  | 10 +---
 .../include/EASTL/internal/pair_fwd_decls.h   |  5 +-
 .../EASTL/internal/piecewise_construct_t.h    | 10 +---
 .../include/EASTL/internal/red_black_tree.h   | 11 +---
 .../eastl/include/EASTL/internal/smart_ptr.h  | 10 +---
 .../include/EASTL/internal/thread_support.h   | 10 +---
 .../include/EASTL/internal/tuple_fwd_decls.h  |  5 +-
 .../include/EASTL/internal/type_compound.h    | 10 +---
 .../include/EASTL/internal/type_fundamental.h |  9 +--
 .../eastl/include/EASTL/internal/type_pod.h   | 11 +---
 .../include/EASTL/internal/type_properties.h  | 11 +---
 .../EASTL/internal/type_transformations.h     |  9 +--
 .../eastl/include/EASTL/intrusive_hash_map.h  | 12 +---
 .../eastl/include/EASTL/intrusive_hash_set.h  | 12 +---
 libkram/eastl/include/EASTL/intrusive_list.h  | 13 +---
 libkram/eastl/include/EASTL/intrusive_ptr.h   | 12 +---
 libkram/eastl/include/EASTL/iterator.h        | 11 +---
 libkram/eastl/include/EASTL/linked_array.h    | 11 +---
 libkram/eastl/include/EASTL/linked_ptr.h      | 13 +---
 libkram/eastl/include/EASTL/list.h            | 13 +---
 libkram/eastl/include/EASTL/map.h             | 13 +---
 libkram/eastl/include/EASTL/memory.h          | 12 +---
 libkram/eastl/include/EASTL/meta.h            |  8 +--
 libkram/eastl/include/EASTL/numeric.h         | 12 +---
 libkram/eastl/include/EASTL/numeric_limits.h  | 12 +---
 libkram/eastl/include/EASTL/optional.h        |  4 +-
 libkram/eastl/include/EASTL/priority_queue.h  | 14 +----
 libkram/eastl/include/EASTL/queue.h           | 13 +---
 libkram/eastl/include/EASTL/random.h          | 10 +---
 libkram/eastl/include/EASTL/ratio.h           |  9 +--
 libkram/eastl/include/EASTL/safe_ptr.h        | 12 +---
 libkram/eastl/include/EASTL/scoped_array.h    | 12 +---
 libkram/eastl/include/EASTL/scoped_ptr.h      | 13 +---
 .../eastl/include/EASTL/segmented_vector.h    |  9 +--
 libkram/eastl/include/EASTL/set.h             | 12 +---
 libkram/eastl/include/EASTL/shared_array.h    | 13 +---
 libkram/eastl/include/EASTL/shared_ptr.h      | 10 +---
 libkram/eastl/include/EASTL/slist.h           | 13 +---
 libkram/eastl/include/EASTL/sort.h            | 21 ++-----
 libkram/eastl/include/EASTL/span.h            |  9 +--
 libkram/eastl/include/EASTL/stack.h           | 12 +---
 libkram/eastl/include/EASTL/string.h          | 10 +---
 libkram/eastl/include/EASTL/string_hash_map.h |  9 +--
 libkram/eastl/include/EASTL/string_map.h      |  9 +--
 libkram/eastl/include/EASTL/string_view.h     |  8 +--
 libkram/eastl/include/EASTL/tuple.h           |  4 +-
 libkram/eastl/include/EASTL/type_traits.h     | 13 +---
 libkram/eastl/include/EASTL/unique_ptr.h      | 11 +---
 libkram/eastl/include/EASTL/unordered_map.h   |  9 +--
 libkram/eastl/include/EASTL/unordered_set.h   |  9 +--
 libkram/eastl/include/EASTL/utility.h         | 13 +---
 libkram/eastl/include/EASTL/variant.h         |  9 +--
 libkram/eastl/include/EASTL/vector.h          | 12 +---
 libkram/eastl/include/EASTL/vector_map.h      | 11 +---
 libkram/eastl/include/EASTL/vector_multimap.h | 12 +---
 libkram/eastl/include/EASTL/vector_multiset.h | 13 +---
 libkram/eastl/include/EASTL/vector_set.h      | 13 +---
 libkram/eastl/include/EASTL/version.h         |  8 +--
 libkram/eastl/include/EASTL/weak_ptr.h        |  7 +--
 libkram/kram/TaskSystem.h                     |  4 +-
 240 files changed, 255 insertions(+), 2225 deletions(-)

diff --git a/libkram/eastl/include/EABase/config/eacompiler.h b/libkram/eastl/include/EABase/config/eacompiler.h
index bd656ed9..68d4e460 100644
--- a/libkram/eastl/include/EABase/config/eacompiler.h
+++ b/libkram/eastl/include/EABase/config/eacompiler.h
@@ -187,8 +187,7 @@
  *
  *---------------------------------------------------------------------------*/
 
-#ifndef INCLUDED_eacompiler_H
-#define INCLUDED_eacompiler_H
+#pragma once
 
 	#include <EABase/config/eaplatform.h>
 
@@ -1770,9 +1769,6 @@
 	#endif
 
 
-#endif // INCLUDED_eacompiler_H
-
-
 
 
diff --git a/libkram/eastl/include/EABase/config/eacompilertraits.h b/libkram/eastl/include/EABase/config/eacompilertraits.h
index 1d8bcb43..9fc43832 100644
--- a/libkram/eastl/include/EABase/config/eacompilertraits.h
+++ b/libkram/eastl/include/EABase/config/eacompilertraits.h
@@ -86,8 +86,7 @@
  *---------------------------------------------------------------------------*/
 
 
-#ifndef INCLUDED_eacompilertraits_H
-#define INCLUDED_eacompilertraits_H
+#pragma once
 
 	#include <EABase/config/eaplatform.h>
 	#include <EABase/config/eacompiler.h>
@@ -1851,61 +1850,6 @@
 		#endif
 	#endif
 
-
-	// ------------------------------------------------------------------------
-	// EA_PRAGMA_ONCE_SUPPORTED
-	// 
-	// This is a wrapper for the #pragma once preprocessor directive.
-	// It allows for some compilers (in particular VC++) to implement signifcantly
-	// faster include file preprocessing. #pragma once can be used to replace 
-	// header include guards or to augment them. However, #pragma once isn't 
-	// necessarily supported by all compilers and isn't guaranteed to be so in
-	// the future, so using #pragma once to replace traditional include guards 
-	// is not strictly portable. Note that a direct #define for #pragma once is
-	// impossible with VC++, due to limitations, but can be done with other 
-	// compilers/preprocessors via _Pragma("once").
-	// 
-	// Example usage (which includes traditional header guards for portability):
-	//    #ifndef SOMEPACKAGE_SOMEHEADER_H
-	//    #define SOMEPACKAGE_SOMEHEADER_H
-	//
-	//    #if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	//        #pragma once
-	//    #endif
-	//
-	//    <user code> 
-	//
-	//    #endif
-	//
-	#if defined(_MSC_VER) || defined(__GNUC__) || defined(__EDG__) || defined(__APPLE__)
-		#define EA_PRAGMA_ONCE_SUPPORTED 1
-	#endif
-
-
-
-	// ------------------------------------------------------------------------
-	// EA_ONCE
-	// 
-	// Example usage (which includes traditional header guards for portability):
-	//    #ifndef SOMEPACKAGE_SOMEHEADER_H
-	//    #define SOMEPACKAGE_SOMEHEADER_H
-	//
-	//    EA_ONCE()
-	//
-	//    <user code> 
-	//
-	//    #endif
-	//
-	#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-		#if defined(_MSC_VER)
-			#define EA_ONCE() __pragma(once)
-		#else
-			#define EA_ONCE() // _Pragma("once")   It turns out that _Pragma("once") isn't supported by many compilers.
-		#endif
-	#endif
-
-
-
 	// ------------------------------------------------------------------------
 	// EA_OVERRIDE
 	// 
@@ -2548,8 +2492,6 @@
 	//
 	// <No current platform fails to propogate sign bits on right signed shifts>
 
-#endif // Header include guard
-
 
 
diff --git a/libkram/eastl/include/EABase/config/eaplatform.h b/libkram/eastl/include/EABase/config/eaplatform.h
index 37c1350a..f348b4df 100644
--- a/libkram/eastl/include/EABase/config/eaplatform.h
+++ b/libkram/eastl/include/EABase/config/eaplatform.h
@@ -5,6 +5,9 @@
  *-----------------------------------------------------------------------------
  * Currently supported platform indentification defines include:
  */
+
+#pragma once
+
 #ifdef EA_PLATFORM_PS4 // ifdef for code stripping purposes 
 // EA_PLATFORM_PS4 (EA_PLATFORM_KETTLE)
 #endif
@@ -76,9 +79,6 @@
 */
 
 
-#ifndef INCLUDED_eaplatform_H
-#define INCLUDED_eaplatform_H
-
 
 // Cygwin
 // This is a pseudo-platform which will be defined along with EA_PLATFORM_LINUX when
@@ -674,9 +674,7 @@
 #ifndef EA_PLATFORM_MIN_MALLOC_ALIGNMENT
 	#if defined(EA_PLATFORM_APPLE)
 		#define EA_PLATFORM_MIN_MALLOC_ALIGNMENT 16
-	#elif defined(EA_PLATFORM_ANDROID) && defined(EA_PROCESSOR_ARM)
-		#define EA_PLATFORM_MIN_MALLOC_ALIGNMENT 8
-	#elif defined(EA_PLATFORM_ANDROID) && defined(EA_PROCESSOR_X86_64)
+	#elif defined(EA_PLATFORM_ANDROID)
 		#define EA_PLATFORM_MIN_MALLOC_ALIGNMENT 8
 	#else
 		#define EA_PLATFORM_MIN_MALLOC_ALIGNMENT (EA_PLATFORM_PTR_SIZE * 2)
@@ -726,9 +724,6 @@
 #endif
 
 
-#endif // INCLUDED_eaplatform_H
-
-
 
 
diff --git a/libkram/eastl/include/EABase/eabase.h b/libkram/eastl/include/EABase/eabase.h
index dab9e467..ad614da2 100644
--- a/libkram/eastl/include/EABase/eabase.h
+++ b/libkram/eastl/include/EABase/eabase.h
@@ -5,9 +5,7 @@
  *---------------------------------------------------------------------------*/
 
 
-#ifndef INCLUDED_eabase_H
-#define INCLUDED_eabase_H
-
+#pragma once
 
 // Identify the compiler and declare the EA_COMPILER_xxxx defines
 #include <EABase/config/eacompiler.h>
@@ -18,10 +16,6 @@
 // Identify the platform and declare the EA_xxxx defines
 #include <EABase/config/eaplatform.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
 // Always include version.h for backwards compatibility.
 #include <EABase/version.h>
 
@@ -1004,8 +998,6 @@
 	#include <EABase/int128.h>
 #endif
 
-#endif // Header include guard
-
 
 
diff --git a/libkram/eastl/include/EABase/eahave.h b/libkram/eastl/include/EABase/eahave.h
index b0987be7..9b2f746a 100644
--- a/libkram/eastl/include/EABase/eahave.h
+++ b/libkram/eastl/include/EABase/eahave.h
@@ -98,17 +98,11 @@
 ---------------------------------------------------------------------------*/
 
 
-#ifndef INCLUDED_eahave_H
-#define INCLUDED_eahave_H
-
+#pragma once
 
 #include <EABase/eabase.h>
 
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
 /* EA_HAVE_XXX_FEATURE */
 
 #if !defined(EA_HAVE_EXTENSIONS_FEATURE) && !defined(EA_NO_HAVE_EXTENSIONS_FEATURE)
@@ -871,7 +865,4 @@
 #endif
 
 
-#endif /* Header include guard */
-
-
 
diff --git a/libkram/eastl/include/EABase/earesult.h b/libkram/eastl/include/EABase/earesult.h
index d08b3460..2cd7dddd 100644
--- a/libkram/eastl/include/EABase/earesult.h
+++ b/libkram/eastl/include/EABase/earesult.h
@@ -5,17 +5,10 @@
  *---------------------------------------------------------------------------*/
 
 
-#ifndef INCLUDED_earesult_H
-#define INCLUDED_earesult_H
-
+#pragma once
 
 #include <EABase/eabase.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once /* Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. */
-#endif
-
-
 
 /* This result type is width-compatible with most systems. */
 typedef int32_t ea_result_type;
@@ -55,8 +48,6 @@ namespace EA
 #endif
 
 
-#endif
-
 
 
diff --git a/libkram/eastl/include/EABase/eastdarg.h b/libkram/eastl/include/EABase/eastdarg.h
index 2c613eb8..a333576f 100644
--- a/libkram/eastl/include/EABase/eastdarg.h
+++ b/libkram/eastl/include/EABase/eastdarg.h
@@ -5,8 +5,7 @@
  *---------------------------------------------------------------------------*/
 
 
-#ifndef INCLUDED_eastdarg_H
-#define INCLUDED_eastdarg_H
+#pragma once
 
 
 #include <EABase/eabase.h>
@@ -93,7 +92,5 @@
 
 
-#endif /* Header include guard */
-
 
 
diff --git a/libkram/eastl/include/EABase/eaunits.h b/libkram/eastl/include/EABase/eaunits.h
index 22357234..d6cd3739 100644
--- a/libkram/eastl/include/EABase/eaunits.h
+++ b/libkram/eastl/include/EABase/eaunits.h
@@ -5,15 +5,10 @@
  *---------------------------------------------------------------------------*/
 
 
-#ifndef INCLUDED_eaunits_h
-#define INCLUDED_eaunits_h
+#pragma once
 
 #include <EABase/eabase.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
 // Defining common SI unit macros.
 //
 // The mebibyte is a multiple of the unit byte for digital information. Technically a
@@ -47,8 +42,6 @@
 #define EA_PEBIBYTE(x) (size_t(x) * 1024 * 1024 * 1024 * 1024 * 1024)
 #define EA_EXBIBYTE(x) (size_t(x) * 1024 * 1024 * 1024 * 1024 * 1024 * 1024)
 
-#endif // INCLUDED_earesult_H
-
 
 
diff --git a/libkram/eastl/include/EABase/int128.h b/libkram/eastl/include/EABase/int128.h
index 068d557a..145e0f93 100644
--- a/libkram/eastl/include/EABase/int128.h
+++ b/libkram/eastl/include/EABase/int128.h
@@ -5,8 +5,7 @@
  *---------------------------------------------------------------------------*/
 
 
-#ifndef INCLUDED_int128_h
-#define INCLUDED_int128_h
+#pragma once
 
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -1262,7 +1261,3 @@ EA_RESTORE_VC_WARNING()
 /* EA_CONSTEXPR */ inline int128_t INT128_C(int64_t nPart1, int64_t nPart0) { return int128_t(static_cast<uint64_t>(nPart0), static_cast<uint64_t>(nPart1)); }
 
 
-
-
-#endif // INCLUDED_int128_h
-
diff --git a/libkram/eastl/include/EABase/nullptr.h b/libkram/eastl/include/EABase/nullptr.h
index d6629d50..4b146899 100644
--- a/libkram/eastl/include/EABase/nullptr.h
+++ b/libkram/eastl/include/EABase/nullptr.h
@@ -4,16 +4,12 @@
  * Copyright (c) Electronic Arts Inc. All rights reserved.
  *---------------------------------------------------------------------------*/
 
+#pragma once
 
 #include <EABase/eabase.h>
 #include <EABase/eahave.h>
 
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once /* Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. */
-#endif
-
-
 #if defined(EA_COMPILER_CPP11_ENABLED) && !defined(EA_COMPILER_NO_NULLPTR) && !defined(EA_HAVE_nullptr_t_IMPL)
 	// The compiler supports nullptr, but the standard library doesn't implement a declaration for std::nullptr_t. So we provide one.
 	namespace std { typedef decltype(nullptr) nullptr_t; }
diff --git a/libkram/eastl/include/EABase/version.h b/libkram/eastl/include/EABase/version.h
index b6e1b665..a8619eb7 100644
--- a/libkram/eastl/include/EABase/version.h
+++ b/libkram/eastl/include/EABase/version.h
@@ -4,8 +4,7 @@
  * Copyright (c) Electronic Arts Inc. All rights reserved.
  *---------------------------------------------------------------------------*/
 
-#ifndef INCLUDED_EABASE_VERSION_H
-#define INCLUDED_EABASE_VERSION_H
+#pragma once
 
 ///////////////////////////////////////////////////////////////////////////////
 // EABASE_VERSION
@@ -33,4 +32,3 @@
     #define EABASE_VERSION_N 20912
 #endif
 
-#endif
diff --git a/libkram/eastl/include/EASTL/algorithm.h b/libkram/eastl/include/EASTL/algorithm.h
index da35c2e2..6a0d313b 100644
--- a/libkram/eastl/include/EASTL/algorithm.h
+++ b/libkram/eastl/include/EASTL/algorithm.h
@@ -232,9 +232,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ALGORITHM_H
-#define EASTL_ALGORITHM_H
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/type_traits.h>
@@ -259,12 +257,6 @@ EA_DISABLE_ALL_VC_WARNINGS();
 
 EA_RESTORE_ALL_VC_WARNINGS();
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
-
 ///////////////////////////////////////////////////////////////////////////////
 // min/max workaround
 //
@@ -4203,8 +4195,6 @@ namespace eastl
 } // namespace eastl
 
 
-#endif // Header include guard
-
 
 
diff --git a/libkram/eastl/include/EASTL/allocator.h b/libkram/eastl/include/EASTL/allocator.h
index ad20e4d8..2ce85177 100644
--- a/libkram/eastl/include/EASTL/allocator.h
+++ b/libkram/eastl/include/EASTL/allocator.h
@@ -3,21 +3,12 @@
 /////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ALLOCATOR_H
-#define EASTL_ALLOCATOR_H
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EABase/nullptr.h>
 #include <stddef.h>
 
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
-
 namespace eastl
 {
 
@@ -376,8 +367,6 @@ namespace eastl
 }
 
 
-#endif // Header include guard
-
 
 
diff --git a/libkram/eastl/include/EASTL/allocator_malloc.h b/libkram/eastl/include/EASTL/allocator_malloc.h
index a13d1165..8cf55569 100644
--- a/libkram/eastl/include/EASTL/allocator_malloc.h
+++ b/libkram/eastl/include/EASTL/allocator_malloc.h
@@ -3,9 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ALLOCATOR_MALLOC_H
-#define EASTL_ALLOCATOR_MALLOC_H
-
+#pragma once
 
 #include <EABase/eahave.h>
 #include <EASTL/allocator.h>
@@ -118,9 +116,6 @@ namespace eastl
 
 
-#endif // Header include guard
-
-
 
 
diff --git a/libkram/eastl/include/EASTL/any.h b/libkram/eastl/include/EASTL/any.h
index c2ef6388..4fac4395 100644
--- a/libkram/eastl/include/EASTL/any.h
+++ b/libkram/eastl/include/EASTL/any.h
@@ -21,12 +21,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ANY_H
-#define EASTL_ANY_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/internal/in_place_t.h>
@@ -649,4 +644,3 @@ namespace eastl
 
 } // namespace eastl
 
-#endif // EASTL_ANY_H
diff --git a/libkram/eastl/include/EASTL/array.h b/libkram/eastl/include/EASTL/array.h
index 590aa94b..aff66653 100644
--- a/libkram/eastl/include/EASTL/array.h
+++ b/libkram/eastl/include/EASTL/array.h
@@ -12,9 +12,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ARRAY_H
-#define EASTL_ARRAY_H
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/iterator.h>
@@ -28,12 +26,6 @@
 	EA_RESTORE_ALL_VC_WARNINGS()
 #endif
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
-
 namespace eastl
 {
 
@@ -517,8 +509,6 @@ namespace eastl
 } // namespace eastl
 
 
-#endif // Header include guard
-
 
 
diff --git a/libkram/eastl/include/EASTL/bitset.h b/libkram/eastl/include/EASTL/bitset.h
index d9261050..561474d8 100644
--- a/libkram/eastl/include/EASTL/bitset.h
+++ b/libkram/eastl/include/EASTL/bitset.h
@@ -20,9 +20,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_BITSET_H
-#define EASTL_BITSET_H
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/algorithm.h>
@@ -44,11 +42,6 @@ EA_RESTORE_ALL_VC_WARNINGS();
 
 EA_DISABLE_VC_WARNING(4127); // Conditional expression is constant
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
 
 namespace eastl
 {
@@ -2229,4 +2222,3 @@ EA_RESTORE_GCC_WARNING()
 
 EA_RESTORE_VC_WARNING();
 
-#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/bitvector.h b/libkram/eastl/include/EASTL/bitvector.h
index ade67823..84505036 100644
--- a/libkram/eastl/include/EASTL/bitvector.h
+++ b/libkram/eastl/include/EASTL/bitvector.h
@@ -13,9 +13,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_BITVECTOR_H
-#define EASTL_BITVECTOR_H
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/vector.h>
@@ -24,11 +22,6 @@
 
 EA_DISABLE_VC_WARNING(4480); // nonstandard extension used: specifying underlying type for enum
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
 
 namespace eastl
 {
@@ -1471,4 +1464,3 @@ namespace eastl
 
 EA_RESTORE_VC_WARNING();
 
-#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/bonus/adaptors.h b/libkram/eastl/include/EASTL/bonus/adaptors.h
index 423cacdd..4b814581 100644
--- a/libkram/eastl/include/EASTL/bonus/adaptors.h
+++ b/libkram/eastl/include/EASTL/bonus/adaptors.h
@@ -6,19 +6,13 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ADAPTORS_H
-#define EASTL_ADAPTORS_H
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/internal/move_help.h>
 #include <EASTL/type_traits.h>
 #include <EASTL/iterator.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
 EA_DISABLE_VC_WARNING(4512 4626)
 #if defined(_MSC_VER) && (_MSC_VER >= 1900) // VS2015+
 	EA_DISABLE_VC_WARNING(5027) // move assignment operator was implicitly defined as deleted
@@ -84,5 +78,3 @@ namespace eastl
 	EA_RESTORE_VC_WARNING()
 #endif
 EA_RESTORE_VC_WARNING()
-
-#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/bonus/call_traits.h b/libkram/eastl/include/EASTL/bonus/call_traits.h
index 0995d051..bef3e5da 100644
--- a/libkram/eastl/include/EASTL/bonus/call_traits.h
+++ b/libkram/eastl/include/EASTL/bonus/call_traits.h
@@ -14,18 +14,10 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_CALL_TRAITS_H
-#define EASTL_CALL_TRAITS_H
-
+#pragma once
 
 #include <EASTL/internal/config.h>
-#include <EASTL/type_traits.h>     
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
+#include <EASTL/type_traits.h>
 
 namespace eastl
 {
@@ -100,7 +92,6 @@ namespace eastl
 } // namespace eastl
 
 
-#endif // Header include guard
 
 
diff --git a/libkram/eastl/include/EASTL/bonus/compressed_pair.h b/libkram/eastl/include/EASTL/bonus/compressed_pair.h
index 379642ba..93441317 100644
--- a/libkram/eastl/include/EASTL/bonus/compressed_pair.h
+++ b/libkram/eastl/include/EASTL/bonus/compressed_pair.h
@@ -56,17 +56,11 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_COMPRESSED_PAIR_H
-#define EASTL_COMPRESSED_PAIR_H
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/type_traits.h>     
-#include <EASTL/bonus/call_traits.h>     
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
+#include <EASTL/bonus/call_traits.h>
 
 #if defined(_MSC_VER) && (_MSC_VER >= 1900)  // VS2015 or later
 	EA_DISABLE_VC_WARNING(4626 5027) // warning C4626: 'eastl::compressed_pair_imp<T1,T2,0>': assignment operator was implicitly defined as deleted because a base class assignment operator is inaccessible or deleted
@@ -454,7 +448,5 @@ namespace eastl
 	EA_RESTORE_VC_WARNING()
 #endif
 
-#endif // Header include guard
-
 
 
diff --git a/libkram/eastl/include/EASTL/bonus/fixed_ring_buffer.h b/libkram/eastl/include/EASTL/bonus/fixed_ring_buffer.h
index 2bb54e47..e07f6acb 100644
--- a/libkram/eastl/include/EASTL/bonus/fixed_ring_buffer.h
+++ b/libkram/eastl/include/EASTL/bonus/fixed_ring_buffer.h
@@ -2,17 +2,12 @@
 // Copyright (c) Electronic Arts Inc. All rights reserved.
 ///////////////////////////////////////////////////////////////////////////////
 
-#ifndef EASTL_FIXED_RING_BUFFER_H
-#define EASTL_FIXED_RING_BUFFER_H
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/fixed_vector.h>
 #include <EASTL/bonus/ring_buffer.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
 namespace eastl
 {
 
@@ -46,5 +41,3 @@ namespace eastl
 
 } // namespace eastl
 
-#endif // Header include guard
-
diff --git a/libkram/eastl/include/EASTL/bonus/fixed_tuple_vector.h b/libkram/eastl/include/EASTL/bonus/fixed_tuple_vector.h
index e9ce0ec0..10d506b7 100644
--- a/libkram/eastl/include/EASTL/bonus/fixed_tuple_vector.h
+++ b/libkram/eastl/include/EASTL/bonus/fixed_tuple_vector.h
@@ -2,16 +2,11 @@
 // Copyright (c) Electronic Arts Inc. All rights reserved.
 ///////////////////////////////////////////////////////////////////////////////
 
-#ifndef EASTL_FIXEDTUPLEVECTOR_H
-#define EASTL_FIXEDTUPLEVECTOR_H
+#pragma once
 
 #include <EASTL/bonus/tuple_vector.h>
 #include <EASTL/internal/fixed_pool.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
 namespace eastl
 {
 
@@ -207,4 +202,3 @@ inline void swap(fixed_tuple_vector<nodeCount, bEnableOverflow, Ts...>& a,
 
 }  // namespace eastl
 
-#endif  // EASTL_TUPLEVECTOR_H
diff --git a/libkram/eastl/include/EASTL/bonus/intrusive_sdlist.h b/libkram/eastl/include/EASTL/bonus/intrusive_sdlist.h
index 1b126d43..89c29402 100644
--- a/libkram/eastl/include/EASTL/bonus/intrusive_sdlist.h
+++ b/libkram/eastl/include/EASTL/bonus/intrusive_sdlist.h
@@ -48,17 +48,12 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_INTRUSIVE_SDLIST_H
-#define EASTL_INTRUSIVE_SDLIST_H
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/iterator.h>
 #include <EASTL/algorithm.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
 
 
@@ -667,8 +662,6 @@ namespace eastl
 } // namespace eastl
 
 
-#endif // Header include guard
-
 
 
diff --git a/libkram/eastl/include/EASTL/bonus/intrusive_slist.h b/libkram/eastl/include/EASTL/bonus/intrusive_slist.h
index 28d445d9..910b3314 100644
--- a/libkram/eastl/include/EASTL/bonus/intrusive_slist.h
+++ b/libkram/eastl/include/EASTL/bonus/intrusive_slist.h
@@ -9,20 +9,12 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_INTRUSIVE_SLIST_H
-#define EASTL_INTRUSIVE_SLIST_H
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/iterator.h>
 #include <EASTL/algorithm.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
-
 namespace eastl
 {
 
@@ -294,9 +286,6 @@ namespace eastl
 } // namespace eastl
 
 
-#endif // Header include guard
-
-
 
 
diff --git a/libkram/eastl/include/EASTL/bonus/list_map.h b/libkram/eastl/include/EASTL/bonus/list_map.h
index 8a080d6d..46559c64 100644
--- a/libkram/eastl/include/EASTL/bonus/list_map.h
+++ b/libkram/eastl/include/EASTL/bonus/list_map.h
@@ -3,9 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_LIST_MAP_H
-#define EASTL_LIST_MAP_H
-
+#pragma once
 
 #include <EASTL/map.h>
 
@@ -925,7 +923,6 @@ namespace eastl
 } // namespace eastl
 
 
-#endif // Header include guard
 
 
diff --git a/libkram/eastl/include/EASTL/bonus/lru_cache.h b/libkram/eastl/include/EASTL/bonus/lru_cache.h
index 46d053dc..2a681ef4 100644
--- a/libkram/eastl/include/EASTL/bonus/lru_cache.h
+++ b/libkram/eastl/include/EASTL/bonus/lru_cache.h
@@ -21,12 +21,7 @@
 // entries as a user scrolls through the entries.
 ///////////////////////////////////////////////////////////////////////////////
 
-#ifndef EASTL_LRUCACHE_H
-#define EASTL_LRUCACHE_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
 #pragma once
-#endif
 
 #include <EASTL/list.h>
 #include <EASTL/unordered_map.h>
@@ -418,7 +413,3 @@ namespace eastl
 		delete_callback_type	m_delete_callback;
 	};
 }
-
-
-
-#endif
diff --git a/libkram/eastl/include/EASTL/bonus/ring_buffer.h b/libkram/eastl/include/EASTL/bonus/ring_buffer.h
index fcd8fd2c..0f06c65b 100644
--- a/libkram/eastl/include/EASTL/bonus/ring_buffer.h
+++ b/libkram/eastl/include/EASTL/bonus/ring_buffer.h
@@ -14,9 +14,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_RING_BUFFER_H
-#define EASTL_RING_BUFFER_H
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/iterator.h>
@@ -24,12 +22,6 @@
 #include <EASTL/initializer_list.h>
 #include <stddef.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
-
 namespace eastl
 {
 	/// EASTL_RING_BUFFER_DEFAULT_NAME
@@ -1559,8 +1551,6 @@ namespace eastl
 } // namespace eastl
 
 
-#endif // Header include guard
-
 
 
diff --git a/libkram/eastl/include/EASTL/bonus/sort_extra.h b/libkram/eastl/include/EASTL/bonus/sort_extra.h
index 5f9a0c46..5af03d3a 100644
--- a/libkram/eastl/include/EASTL/bonus/sort_extra.h
+++ b/libkram/eastl/include/EASTL/bonus/sort_extra.h
@@ -12,9 +12,7 @@
 //////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_SORT_EXTRA_H
-#define EASTL_SORT_EXTRA_H
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/iterator.h>
@@ -24,12 +22,6 @@
 #include <EASTL/sort.h>             // For backwards compatibility due to sorts moved from here to sort.h.
 #include <EASTL/allocator.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
-
 namespace eastl
 {
 	/// selection_sort
@@ -181,8 +173,6 @@ namespace eastl
 } // namespace eastl
 
 
-#endif // Header include guard
-
 
 
diff --git a/libkram/eastl/include/EASTL/bonus/tuple_vector.h b/libkram/eastl/include/EASTL/bonus/tuple_vector.h
index 7123c57f..3c65e6cd 100644
--- a/libkram/eastl/include/EASTL/bonus/tuple_vector.h
+++ b/libkram/eastl/include/EASTL/bonus/tuple_vector.h
@@ -20,8 +20,7 @@
 // Consult doc/Bonus/tuple_vector_readme.md for more information.
 ///////////////////////////////////////////////////////////////////////////////
 
-#ifndef EASTL_TUPLEVECTOR_H
-#define EASTL_TUPLEVECTOR_H
+#pragma once
 
 #include <EASTL/bonus/compressed_pair.h>
 #include <EASTL/internal/config.h>
@@ -30,10 +29,6 @@
 #include <EASTL/tuple.h>
 #include <EASTL/utility.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
 EA_DISABLE_VC_WARNING(4244) // warning C4244: 'conversion from '___' to '___', possible loss of data
 EA_DISABLE_VC_WARNING(4623) // warning C4623: default constructor was implicitly defined as deleted
 EA_DISABLE_VC_WARNING(4625) // warning C4625: copy constructor was implicitly defined as deleted
@@ -1588,5 +1583,3 @@ EA_RESTORE_VC_WARNING()
 EA_RESTORE_VC_WARNING()
 EA_RESTORE_VC_WARNING()
 EA_RESTORE_VC_WARNING()
-
-#endif  // EASTL_TUPLEVECTOR_H
diff --git a/libkram/eastl/include/EASTL/chrono.h b/libkram/eastl/include/EASTL/chrono.h
index 453ab0f4..ccfeb2f9 100644
--- a/libkram/eastl/include/EASTL/chrono.h
+++ b/libkram/eastl/include/EASTL/chrono.h
@@ -12,12 +12,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_CHRONO_H
-#define EASTL_CHRONO_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once 
-#endif
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/type_traits.h>
@@ -741,4 +736,3 @@ namespace chrono
 #endif
 
 
-#endif 
diff --git a/libkram/eastl/include/EASTL/core_allocator.h b/libkram/eastl/include/EASTL/core_allocator.h
index e4374912..4a175faa 100644
--- a/libkram/eastl/include/EASTL/core_allocator.h
+++ b/libkram/eastl/include/EASTL/core_allocator.h
@@ -2,8 +2,7 @@
 // Copyright (c) Electronic Arts Inc. All rights reserved.
 /////////////////////////////////////////////////////////////////////////////
 
-#ifndef EASTL_CORE_ALLOCATOR_H
-#define EASTL_CORE_ALLOCATOR_H
+#pragma once
 
 #if EASTL_CORE_ALLOCATOR_ENABLED
 
@@ -66,5 +65,4 @@ namespace EA
 }
 
 #endif // EASTL_CORE_ALLOCATOR_ENABLED
-#endif // EASTL_CORE_ALLOCATOR_H
 
diff --git a/libkram/eastl/include/EASTL/core_allocator_adapter.h b/libkram/eastl/include/EASTL/core_allocator_adapter.h
index d6f18275..f4a07d9a 100644
--- a/libkram/eastl/include/EASTL/core_allocator_adapter.h
+++ b/libkram/eastl/include/EASTL/core_allocator_adapter.h
@@ -7,19 +7,13 @@
 // However, this header file is not dependent on ICoreAllocator or its package.
 ///////////////////////////////////////////////////////////////////////////////
 
-#ifndef EASTL_CORE_ALLOCATOR_ADAPTER_H
-#define EASTL_CORE_ALLOCATOR_ADAPTER_H
+#pragma once
 
 #if EASTL_CORE_ALLOCATOR_ENABLED
 
 
 #include <EASTL/internal/config.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
 /// EASTL_CORE_ALLOCATOR_ADAPTER_GET_DEFAULT_CORE_ALLOCATOR
 ///
 /// This allows the application to override the default name for the default global core allocator.
@@ -357,7 +351,6 @@ namespace EA
 
 
 #endif // EASTL_CORE_ALLOCATOR_ENABLED
-#endif // Header include guard
 
 
diff --git a/libkram/eastl/include/EASTL/deque.h b/libkram/eastl/include/EASTL/deque.h
index c2d55b1c..52d0ee25 100644
--- a/libkram/eastl/include/EASTL/deque.h
+++ b/libkram/eastl/include/EASTL/deque.h
@@ -67,9 +67,7 @@
 //////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_DEQUE_H
-#define EASTL_DEQUE_H
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/allocator.h>
@@ -104,12 +102,6 @@ EA_DISABLE_VC_WARNING(4267 4345 4480 4530 4571);
 	EA_DISABLE_VC_WARNING(4703 4701)
 #endif
 
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
 namespace eastl
 {
 
@@ -2684,4 +2676,3 @@ EA_RESTORE_VC_WARNING();
 #endif
 
 
-#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/finally.h b/libkram/eastl/include/EASTL/finally.h
index b4ed5803..632cb846 100644
--- a/libkram/eastl/include/EASTL/finally.h
+++ b/libkram/eastl/include/EASTL/finally.h
@@ -24,12 +24,7 @@
 // * https://www.bfilipek.com/2017/04/finalact.html
 ///////////////////////////////////////////////////////////////////////////////
 
-#ifndef EASTL_FINALLY_H
-#define EASTL_FINALLY_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/internal/move_help.h>
@@ -90,4 +85,3 @@ namespace eastl
 	}
 }
 
-#endif // EASTL_FINALLY_H
diff --git a/libkram/eastl/include/EASTL/fixed_allocator.h b/libkram/eastl/include/EASTL/fixed_allocator.h
index 488eae4a..cc7734fe 100644
--- a/libkram/eastl/include/EASTL/fixed_allocator.h
+++ b/libkram/eastl/include/EASTL/fixed_allocator.h
@@ -9,9 +9,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_FIXED_ALLOCATOR_H
-#define EASTL_FIXED_ALLOCATOR_H
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/internal/fixed_pool.h>
@@ -28,11 +26,6 @@ EA_RESTORE_ALL_VC_WARNINGS();
 
 EA_DISABLE_VC_WARNING(4275); // non dll-interface class used as base for DLL-interface classkey 'identifier'
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
 
 namespace eastl
 {
@@ -451,5 +444,3 @@ namespace eastl
 
 
 EA_RESTORE_VC_WARNING();
-
-#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/fixed_function.h b/libkram/eastl/include/EASTL/fixed_function.h
index 6aed768a..47d61665 100644
--- a/libkram/eastl/include/EASTL/fixed_function.h
+++ b/libkram/eastl/include/EASTL/fixed_function.h
@@ -2,12 +2,7 @@
 // Copyright (c) Electronic Arts Inc. All rights reserved.
 /////////////////////////////////////////////////////////////////////////////
 
-#ifndef EASTL_FIXED_FUNCTION_H
-#define EASTL_FIXED_FUNCTION_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 #include <EASTL/internal/function_detail.h>
 
@@ -215,4 +210,3 @@ namespace eastl
 
 } // namespace eastl
 
-#endif // EASTL_FIXED_FUNCTION_H
diff --git a/libkram/eastl/include/EASTL/fixed_hash_map.h b/libkram/eastl/include/EASTL/fixed_hash_map.h
index af6663dd..f216214f 100644
--- a/libkram/eastl/include/EASTL/fixed_hash_map.h
+++ b/libkram/eastl/include/EASTL/fixed_hash_map.h
@@ -8,20 +8,13 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_FIXED_HASH_MAP_H
-#define EASTL_FIXED_HASH_MAP_H
-
+#pragma once
 
 #include <EASTL/hash_map.h>
 #include <EASTL/internal/fixed_pool.h>
 
 EA_DISABLE_VC_WARNING(4127) // Conditional expression is constant
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
 namespace eastl
 {
 	/// EASTL_FIXED_HASH_MAP_DEFAULT_NAME
@@ -807,8 +800,6 @@ namespace eastl
 
 EA_RESTORE_VC_WARNING()
 
-#endif // Header include guard
-
 
 
diff --git a/libkram/eastl/include/EASTL/fixed_hash_set.h b/libkram/eastl/include/EASTL/fixed_hash_set.h
index 0db9f49f..b93fb78e 100644
--- a/libkram/eastl/include/EASTL/fixed_hash_set.h
+++ b/libkram/eastl/include/EASTL/fixed_hash_set.h
@@ -8,21 +8,13 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_FIXED_HASH_SET_H
-#define EASTL_FIXED_HASH_SET_H
-
+#pragma once
 
 #include <EASTL/hash_set.h>
 #include <EASTL/internal/fixed_pool.h>
 
 EA_DISABLE_VC_WARNING(4127) // Conditional expression is constant
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
-
 namespace eastl
 {
 	/// EASTL_FIXED_HASH_SET_DEFAULT_NAME
@@ -767,7 +759,6 @@ namespace eastl
 
 EA_RESTORE_VC_WARNING()
 
-#endif // Header include guard
 
 
diff --git a/libkram/eastl/include/EASTL/fixed_list.h b/libkram/eastl/include/EASTL/fixed_list.h
index 9e48089c..0e61ee83 100644
--- a/libkram/eastl/include/EASTL/fixed_list.h
+++ b/libkram/eastl/include/EASTL/fixed_list.h
@@ -7,18 +7,11 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_FIXED_LIST_H
-#define EASTL_FIXED_LIST_H
-
+#pragma once
 
 #include <EASTL/list.h>
 #include <EASTL/internal/fixed_pool.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
 
 namespace eastl
 {
@@ -373,8 +366,6 @@ namespace eastl
 } // namespace eastl
 
 
-#endif // Header include guard
-
 
 
diff --git a/libkram/eastl/include/EASTL/fixed_map.h b/libkram/eastl/include/EASTL/fixed_map.h
index c01db08f..817241ff 100644
--- a/libkram/eastl/include/EASTL/fixed_map.h
+++ b/libkram/eastl/include/EASTL/fixed_map.h
@@ -8,19 +8,11 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_FIXED_MAP_H
-#define EASTL_FIXED_MAP_H
-
+#pragma once
 
 #include <EASTL/map.h>
 #include <EASTL/fixed_set.h> // Included because fixed_rbtree_base resides here.
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
-
 namespace eastl
 {
 	/// EASTL_FIXED_MAP_DEFAULT_NAME
@@ -568,8 +560,6 @@ namespace eastl
 } // namespace eastl
 
 
-#endif // Header include guard
-
 
 
diff --git a/libkram/eastl/include/EASTL/fixed_set.h b/libkram/eastl/include/EASTL/fixed_set.h
index e5f00236..27fc012d 100644
--- a/libkram/eastl/include/EASTL/fixed_set.h
+++ b/libkram/eastl/include/EASTL/fixed_set.h
@@ -8,19 +8,11 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_FIXED_SET_H
-#define EASTL_FIXED_SET_H
-
+#pragma once
 
 #include <EASTL/set.h>
 #include <EASTL/internal/fixed_pool.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
-
 namespace eastl
 {
 	/// EASTL_FIXED_SET_DEFAULT_NAME
@@ -566,10 +558,6 @@ namespace eastl
 } // namespace eastl
 
 
-#endif // Header include guard
-
-
-
 
 
diff --git a/libkram/eastl/include/EASTL/fixed_slist.h b/libkram/eastl/include/EASTL/fixed_slist.h
index 85a7a7b3..cde1f347 100644
--- a/libkram/eastl/include/EASTL/fixed_slist.h
+++ b/libkram/eastl/include/EASTL/fixed_slist.h
@@ -7,19 +7,11 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_FIXED_SLIST_H
-#define EASTL_FIXED_SLIST_H
-
+#pragma once
 
 #include <EASTL/slist.h>
 #include <EASTL/internal/fixed_pool.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
-
 namespace eastl
 {
 	/// EASTL_FIXED_SLIST_DEFAULT_NAME
@@ -374,10 +366,6 @@ namespace eastl
 } // namespace eastl
 
 
-#endif // Header include guard
-
-
-
 
 
diff --git a/libkram/eastl/include/EASTL/fixed_string.h b/libkram/eastl/include/EASTL/fixed_string.h
index f646302b..a888cf71 100644
--- a/libkram/eastl/include/EASTL/fixed_string.h
+++ b/libkram/eastl/include/EASTL/fixed_string.h
@@ -9,17 +9,12 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_FIXED_STRING_H
-#define EASTL_FIXED_STRING_H
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/string.h>
 #include <EASTL/internal/fixed_pool.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
 
 namespace eastl
 {
@@ -802,4 +797,3 @@ namespace eastl
 
 } // namespace eastl
 
-#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/fixed_substring.h b/libkram/eastl/include/EASTL/fixed_substring.h
index 033052f4..c2bfe643 100644
--- a/libkram/eastl/include/EASTL/fixed_substring.h
+++ b/libkram/eastl/include/EASTL/fixed_substring.h
@@ -3,18 +3,10 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_FIXED_SUBSTRING_H
-#define EASTL_FIXED_SUBSTRING_H
-
+#pragma once
 
 #include <EASTL/string.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
-
 namespace eastl
 {
 
@@ -261,5 +253,3 @@ namespace eastl
 } // namespace eastl
 
 
-
-#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/fixed_vector.h b/libkram/eastl/include/EASTL/fixed_vector.h
index 1dc482bd..ffbd0768 100644
--- a/libkram/eastl/include/EASTL/fixed_vector.h
+++ b/libkram/eastl/include/EASTL/fixed_vector.h
@@ -9,19 +9,11 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_FIXED_VECTOR_H
-#define EASTL_FIXED_VECTOR_H
-
+#pragma once
 
 #include <EASTL/vector.h>
 #include <EASTL/internal/fixed_pool.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
-
 namespace eastl
 {
 	/// EASTL_FIXED_VECTOR_DEFAULT_NAME
@@ -610,10 +602,6 @@ namespace eastl
 
 
-#endif // Header include guard
-
-
-
 
 
diff --git a/libkram/eastl/include/EASTL/functional.h b/libkram/eastl/include/EASTL/functional.h
index 556bf020..b01486af 100644
--- a/libkram/eastl/include/EASTL/functional.h
+++ b/libkram/eastl/include/EASTL/functional.h
@@ -2,9 +2,7 @@
 // Copyright (c) Electronic Arts Inc. All rights reserved.
 ///////////////////////////////////////////////////////////////////////////////
 
-#ifndef EASTL_FUNCTIONAL_H
-#define EASTL_FUNCTIONAL_H
-
+#pragma once
 
 #include <EABase/eabase.h>
 #include <EASTL/internal/config.h>
@@ -14,12 +12,6 @@
 #include <EASTL/internal/mem_fn.h>
 
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
-
 namespace eastl
 {
 	///////////////////////////////////////////////////////////////////////
@@ -1256,8 +1248,6 @@ namespace eastl
 
 #include <EASTL/internal/function.h>
 
-#endif // Header include guard
-
 
 
diff --git a/libkram/eastl/include/EASTL/hash_map.h b/libkram/eastl/include/EASTL/hash_map.h
index c363597f..f47ba059 100644
--- a/libkram/eastl/include/EASTL/hash_map.h
+++ b/libkram/eastl/include/EASTL/hash_map.h
@@ -11,21 +11,13 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_HASH_MAP_H
-#define EASTL_HASH_MAP_H
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/internal/hashtable.h>
 #include <EASTL/functional.h>
 #include <EASTL/utility.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
-
 namespace eastl
 {
 
@@ -571,8 +563,6 @@ namespace eastl
 } // namespace eastl
 
 
-#endif // Header include guard
-
 
 
diff --git a/libkram/eastl/include/EASTL/hash_set.h b/libkram/eastl/include/EASTL/hash_set.h
index c075975d..dae9c778 100644
--- a/libkram/eastl/include/EASTL/hash_set.h
+++ b/libkram/eastl/include/EASTL/hash_set.h
@@ -11,21 +11,13 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_HASH_SET_H
-#define EASTL_HASH_SET_H
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/internal/hashtable.h>
 #include <EASTL/functional.h>
 #include <EASTL/utility.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
-
 namespace eastl
 {
 
@@ -453,8 +445,6 @@ namespace eastl
 } // namespace eastl
 
 
-#endif // Header include guard
-
 
 
diff --git a/libkram/eastl/include/EASTL/heap.h b/libkram/eastl/include/EASTL/heap.h
index f0e770b9..7d71910e 100644
--- a/libkram/eastl/include/EASTL/heap.h
+++ b/libkram/eastl/include/EASTL/heap.h
@@ -30,20 +30,12 @@
 
 
-#ifndef EASTL_HEAP_H
-#define EASTL_HEAP_H
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/iterator.h>
 #include <stddef.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
-
 namespace eastl
 {
 
@@ -678,8 +670,5 @@ namespace eastl
 } // namespace eastl
 
 
-#endif // Header include guard
-
-
 
 
diff --git a/libkram/eastl/include/EASTL/initializer_list.h b/libkram/eastl/include/EASTL/initializer_list.h
index 028fb4f8..95759f56 100644
--- a/libkram/eastl/include/EASTL/initializer_list.h
+++ b/libkram/eastl/include/EASTL/initializer_list.h
@@ -7,18 +7,11 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_INITIALIZER_LIST_H
-#define EASTL_INITIALIZER_LIST_H
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EABase/eahave.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
 #if defined(EA_HAVE_CPP11_INITIALIZER_LIST) // If the compiler can generate calls to std::initializer_list...
 
 	// The initializer_list type must be declared in the std namespace, as that's the 
@@ -78,9 +71,6 @@
 #endif
 
 
-#endif // Header include guard
-
-
 
 
diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch.h
index 4924a591..b282995a 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/arch/arch.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_ARCH_H
-#define EASTL_ATOMIC_INTERNAL_ARCH_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 /////////////////////////////////////////////////////////////////////////////////
 //
@@ -62,4 +56,3 @@
 #include "arch_thread_fence.h"
 
 
-#endif /* EASTL_ATOMIC_INTERNAL_ARCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_add_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_add_fetch.h
index 65771f89..87812ec7 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_add_fetch.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_add_fetch.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_ARCH_ADD_FETCH_H
-#define EASTL_ATOMIC_INTERNAL_ARCH_ADD_FETCH_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 /////////////////////////////////////////////////////////////////////////////////
@@ -169,5 +164,3 @@
 	#define EASTL_ARCH_ATOMIC_ADD_FETCH_SEQ_CST_128_AVAILABLE 0
 #endif
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_ARCH_ADD_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_and_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_and_fetch.h
index df7ba35d..809460dd 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_and_fetch.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_and_fetch.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_ARCH_AND_FETCH_H
-#define EASTL_ATOMIC_INTERNAL_ARCH_AND_FETCH_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 /////////////////////////////////////////////////////////////////////////////////
@@ -168,6 +163,3 @@
 #else
 	#define EASTL_ARCH_ATOMIC_AND_FETCH_SEQ_CST_128_AVAILABLE 0
 #endif
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_ARCH_AND_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_cmpxchg_strong.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_cmpxchg_strong.h
index 1005dc33..817f26d8 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_cmpxchg_strong.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_cmpxchg_strong.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_ARCH_CMPXCHG_STRONG_H
-#define EASTL_ATOMIC_INTERNAL_ARCH_CMPXCHG_STRONG_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 /////////////////////////////////////////////////////////////////////////////////
 //
@@ -426,5 +420,3 @@
 #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_128(type, ret, ptr, expected, desired) \
 	EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_128(type, ret, ptr, expected, desired)
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_ARCH_CMPXCHG_STRONG_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_cmpxchg_weak.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_cmpxchg_weak.h
index 5ce26386..29f41b33 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_cmpxchg_weak.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_cmpxchg_weak.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_ARCH_CMPXCHG_WEAK_H
-#define EASTL_ATOMIC_INTERNAL_ARCH_CMPXCHG_WEAK_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 /////////////////////////////////////////////////////////////////////////////////
 //
@@ -426,5 +420,3 @@
 #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_128(type, ret, ptr, expected, desired) \
 	EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_128(type, ret, ptr, expected, desired)
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_ARCH_CMPXCHG_WEAK_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_compiler_barrier.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_compiler_barrier.h
index 0652469b..bd08be3e 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_compiler_barrier.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_compiler_barrier.h
@@ -3,17 +3,9 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_ARCH_COMPILER_BARRIER_H
-#define EASTL_ATOMIC_INTERNAL_ARCH_COMPILER_BARRIER_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 #define EASTL_ARCH_ATOMIC_COMPILER_BARRIER_AVAILABLE 0
 
 #define EASTL_ARCH_ATOMIC_COMPILER_BARRIER_DATA_DEPENDENCY_AVAILABLE 0
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_ARCH_COMPILER_BARRIER_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_cpu_pause.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_cpu_pause.h
index e8c2d1d7..096041e5 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_cpu_pause.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_cpu_pause.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_ARCH_CPU_PAUSE_H
-#define EASTL_ATOMIC_INTERNAL_ARCH_CPU_PAUSE_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 /////////////////////////////////////////////////////////////////////////////////
 //
@@ -20,6 +14,3 @@
 #else
 	#define EASTL_ARCH_ATOMIC_CPU_PAUSE_AVAILABLE 0
 #endif
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_ARCH_CPU_PAUSE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_exchange.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_exchange.h
index 76003188..9c33b95e 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_exchange.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_exchange.h
@@ -2,13 +2,7 @@
 // Copyright (c) Electronic Arts Inc. All rights reserved.
 /////////////////////////////////////////////////////////////////////////////////
 
-
-#ifndef EASTL_ATOMIC_INTERNAL_ARCH_EXCHANGE_H
-#define EASTL_ATOMIC_INTERNAL_ARCH_EXCHANGE_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 /////////////////////////////////////////////////////////////////////////////////
@@ -169,5 +163,3 @@
 	#define EASTL_ARCH_ATOMIC_EXCHANGE_SEQ_CST_128_AVAILABLE 0
 #endif
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_ARCH_EXCHANGE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_add.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_add.h
index 71907f70..5c670965 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_add.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_add.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_ARCH_FETCH_ADD_H
-#define EASTL_ATOMIC_INTERNAL_ARCH_FETCH_ADD_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 /////////////////////////////////////////////////////////////////////////////////
 //
@@ -169,5 +163,3 @@
 	#define EASTL_ARCH_ATOMIC_FETCH_ADD_SEQ_CST_128_AVAILABLE 0
 #endif
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_ARCH_FETCH_ADD_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_and.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_and.h
index f2b39a4c..260c2dc2 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_and.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_and.h
@@ -2,14 +2,7 @@
 // Copyright (c) Electronic Arts Inc. All rights reserved.
 /////////////////////////////////////////////////////////////////////////////////
 
-
-#ifndef EASTL_ATOMIC_INTERNAL_ARCH_FETCH_AND_H
-#define EASTL_ATOMIC_INTERNAL_ARCH_FETCH_AND_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 /////////////////////////////////////////////////////////////////////////////////
 //
@@ -169,5 +162,3 @@
 	#define EASTL_ARCH_ATOMIC_FETCH_AND_SEQ_CST_128_AVAILABLE 0
 #endif
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_ARCH_FETCH_AND_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_or.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_or.h
index dd6dd0db..036d77d0 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_or.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_or.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_ARCH_FETCH_OR_H
-#define EASTL_ATOMIC_INTERNAL_ARCH_FETCH_OR_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 /////////////////////////////////////////////////////////////////////////////////
@@ -169,5 +164,3 @@
 	#define EASTL_ARCH_ATOMIC_FETCH_OR_SEQ_CST_128_AVAILABLE 0
 #endif
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_ARCH_FETCH_OR_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_sub.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_sub.h
index ea63db73..e8840f42 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_sub.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_sub.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_ARCH_FETCH_SUB_H
-#define EASTL_ATOMIC_INTERNAL_ARCH_FETCH_SUB_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 /////////////////////////////////////////////////////////////////////////////////
 //
@@ -170,4 +164,3 @@
 #endif
 
 
-#endif /* EASTL_ATOMIC_INTERNAL_ARCH_FETCH_SUB_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_xor.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_xor.h
index b41ad2d4..80ca2e53 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_xor.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_xor.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_ARCH_FETCH_XOR_H
-#define EASTL_ATOMIC_INTERNAL_ARCH_FETCH_XOR_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 /////////////////////////////////////////////////////////////////////////////////
@@ -169,5 +164,3 @@
 	#define EASTL_ARCH_ATOMIC_FETCH_XOR_SEQ_CST_128_AVAILABLE 0
 #endif
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_ARCH_FETCH_XOR_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_load.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_load.h
index eea7cf49..2766f895 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_load.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_load.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_ARCH_LOAD_H
-#define EASTL_ATOMIC_INTERNAL_ARCH_LOAD_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 /////////////////////////////////////////////////////////////////////////////////
@@ -122,4 +117,3 @@
 #endif
 
 
-#endif /* EASTL_ATOMIC_INTERNAL_ARCH_LOAD_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_memory_barrier.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_memory_barrier.h
index c6cc6bfc..56f48845 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_memory_barrier.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_memory_barrier.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_ARCH_MEMORY_BARRIER_H
-#define EASTL_ATOMIC_INTERNAL_ARCH_MEMORY_BARRIER_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 /////////////////////////////////////////////////////////////////////////////////
@@ -44,4 +39,3 @@
 #endif
 
 
-#endif /* EASTL_ATOMIC_INTERNAL_ARCH_MEMORY_BARRIER_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_or_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_or_fetch.h
index 110326b4..6d442d4c 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_or_fetch.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_or_fetch.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_ARCH_OR_FETCH_H
-#define EASTL_ATOMIC_INTERNAL_ARCH_OR_FETCH_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 /////////////////////////////////////////////////////////////////////////////////
 //
@@ -168,6 +162,3 @@
 #else
 	#define EASTL_ARCH_ATOMIC_OR_FETCH_SEQ_CST_128_AVAILABLE 0
 #endif
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_ARCH_OR_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_signal_fence.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_signal_fence.h
index 65b64fc2..9f3bd702 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_signal_fence.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_signal_fence.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_ARCH_SIGNAL_FENCE_H
-#define EASTL_ATOMIC_INTERNAL_ARCH_SIGNAL_FENCE_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 #define EASTL_ARCH_ATOMIC_SIGNAL_FENCE_RELAXED_AVAILABLE 0
 #define EASTL_ARCH_ATOMIC_SIGNAL_FENCE_ACQUIRE_AVAILABLE 0
@@ -18,4 +12,3 @@
 #define EASTL_ARCH_ATOMIC_SIGNAL_FENCE_SEQ_CST_AVAILABLE 0
 
 
-#endif /* EASTL_ATOMIC_INTERNAL_ARCH_SIGNAL_FENCE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_store.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_store.h
index 9a4112cb..e8b71ff5 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_store.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_store.h
@@ -2,14 +2,7 @@
 // Copyright (c) Electronic Arts Inc. All rights reserved.
 /////////////////////////////////////////////////////////////////////////////////
 
-
-#ifndef EASTL_ATOMIC_INTERNAL_ARCH_STORE_H
-#define EASTL_ATOMIC_INTERNAL_ARCH_STORE_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 /////////////////////////////////////////////////////////////////////////////////
 //
@@ -109,5 +102,3 @@
 	#define EASTL_ARCH_ATOMIC_STORE_SEQ_CST_128_AVAILABLE 0
 #endif
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_ARCH_STORE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_sub_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_sub_fetch.h
index 20241b14..13de2e9e 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_sub_fetch.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_sub_fetch.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_ARCH_SUB_FETCH_H
-#define EASTL_ATOMIC_INTERNAL_ARCH_SUB_FETCH_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 /////////////////////////////////////////////////////////////////////////////////
@@ -168,6 +163,3 @@
 #else
 	#define EASTL_ARCH_ATOMIC_SUB_FETCH_SEQ_CST_128_AVAILABLE 0
 #endif
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_ARCH_SUB_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_thread_fence.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_thread_fence.h
index 676fbf19..9f110876 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_thread_fence.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_thread_fence.h
@@ -2,14 +2,7 @@
 // Copyright (c) Electronic Arts Inc. All rights reserved.
 /////////////////////////////////////////////////////////////////////////////////
 
-
-#ifndef EASTL_ATOMIC_INTERNAL_ARCH_THREAD_FENCE_H
-#define EASTL_ATOMIC_INTERNAL_ARCH_THREAD_FENCE_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 /////////////////////////////////////////////////////////////////////////////////
 //
@@ -44,6 +37,3 @@
 #else
 	#define EASTL_ARCH_ATOMIC_THREAD_FENCE_SEQ_CST_AVAILABLE 0
 #endif
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_ARCH_THREAD_FENCE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_xor_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_xor_fetch.h
index 63548c22..57a65a6c 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_xor_fetch.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_xor_fetch.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_ARCH_XOR_FETCH_H
-#define EASTL_ATOMIC_INTERNAL_ARCH_XOR_FETCH_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 /////////////////////////////////////////////////////////////////////////////////
@@ -169,5 +164,3 @@
 	#define EASTL_ARCH_ATOMIC_XOR_FETCH_SEQ_CST_128_AVAILABLE 0
 #endif
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_ARCH_XOR_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_asserts.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_asserts.h
index 9324a479..c2ab3080 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/atomic_asserts.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_asserts.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_STATIC_ASSERTS_H
-#define EASTL_ATOMIC_INTERNAL_STATIC_ASSERTS_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 #define EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(type)				\
 	static_assert(!eastl::is_same<type, type>::value, "eastl::atomic<T> : volatile eastl::atomic<T> is not what you expect! Read the docs in EASTL/atomic.h! Use the memory orders to access the atomic object!");
@@ -70,6 +64,3 @@ namespace internal
 
 
 } // namespace eastl
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_STATIC_ASSERTS_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_base_width.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_base_width.h
index ca476182..af8fe660 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/atomic_base_width.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_base_width.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_BASE_WIDTH_H
-#define EASTL_ATOMIC_INTERNAL_BASE_WIDTH_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 #include "atomic_push_compiler_options.h"
 
@@ -342,5 +336,3 @@ namespace internal
 
 #include "atomic_pop_compiler_options.h"
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_BASE_WIDTH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_casts.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_casts.h
index 54b9ed27..2cfa2360 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/atomic_casts.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_casts.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_CASTS_H
-#define EASTL_ATOMIC_INTERNAL_CASTS_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 #include <EASTL/internal/type_transformations.h>
@@ -186,5 +181,3 @@ EASTL_FORCE_INLINE ptrdiff_t AtomicNegateOperand(ptrdiff_t val) EA_NOEXCEPT
 #define EASTL_ATOMIC_NEGATE_OPERAND(val)		\
 	eastl::internal::AtomicNegateOperand((val))
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_CASTS_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_flag.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_flag.h
index e135d612..4b1a03d1 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/atomic_flag.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_flag.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNA_ATOMIC_FLAG_H
-#define EASTL_ATOMIC_INTERNA_ATOMIC_FLAG_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 #include "atomic_push_compiler_options.h"
 
@@ -165,6 +159,3 @@ class atomic_flag
 
 
 #include "atomic_pop_compiler_options.h"
-
-
-#endif /* EASTL_ATOMIC_INTERNA_ATOMIC_FLAG_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_flag_standalone.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_flag_standalone.h
index b5284bed..ff6998d3 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/atomic_flag_standalone.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_flag_standalone.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_FLAG_STANDALONE_H
-#define EASTL_ATOMIC_INTERNAL_FLAG_STANDALONE_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 namespace eastl
@@ -65,5 +60,3 @@ EASTL_FORCE_INLINE bool atomic_flag_test_explicit(eastl::atomic_flag* atomicObj,
 
 } // namespace eastl
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_FLAG_STANDALONE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_integral.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_integral.h
index 7c94db32..5407c932 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/atomic_integral.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_integral.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_INTEGRAL_H
-#define EASTL_ATOMIC_INTERNAL_INTEGRAL_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 #include "atomic_push_compiler_options.h"
@@ -340,4 +335,3 @@ namespace internal
 #include "atomic_pop_compiler_options.h"
 
 
-#endif /* EASTL_ATOMIC_INTERNAL_INTEGRAL_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros.h
index 756a4b4d..5cb17c07 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_MACROS_H
-#define EASTL_ATOMIC_INTERNAL_MACROS_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 /////////////////////////////////////////////////////////////////////////////////
@@ -62,6 +57,3 @@
 #include "arch/arch.h"
 
 #include "atomic_macros/atomic_macros.h"
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_MACROS_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros.h
index 941ac51c..8f48eaf1 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_ATOMIC_MACROS_H
-#define EASTL_ATOMIC_INTERNAL_ATOMIC_MACROS_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 #include "atomic_macros_base.h"
 
@@ -140,6 +134,3 @@
 	#define EASTL_ATOMIC_FIXED_WIDTH_TYPE_128 EASTL_COMPILER_ATOMIC_FIXED_WIDTH_TYPE_128
 
 #endif
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_ATOMIC_MACROS_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_add_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_add_fetch.h
index f551a07c..ac4e4e9a 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_add_fetch.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_add_fetch.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_MACROS_ADD_FETCH_H
-#define EASTL_ATOMIC_INTERNAL_MACROS_ADD_FETCH_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 /////////////////////////////////////////////////////////////////////////////////
 //
@@ -94,5 +88,3 @@
 #define EASTL_ATOMIC_ADD_FETCH_SEQ_CST_128(type, ret, ptr, val)			\
 	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_SEQ_CST_128)(type, ret, ptr, val)
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_MACROS_ADD_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_and_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_and_fetch.h
index 69127223..2ef6880b 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_and_fetch.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_and_fetch.h
@@ -3,13 +3,6 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_MACROS_AND_FETCH_H
-#define EASTL_ATOMIC_INTERNAL_MACROS_AND_FETCH_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
 
 /////////////////////////////////////////////////////////////////////////////////
 //
@@ -94,5 +87,3 @@
 #define EASTL_ATOMIC_AND_FETCH_SEQ_CST_128(type, ret, ptr, val)			\
 	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_SEQ_CST_128)(type, ret, ptr, val)
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_MACROS_AND_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_base.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_base.h
index f03720d9..52dcdfdc 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_base.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_base.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_MACROS_BASE_H
-#define EASTL_ATOMIC_INTERNAL_MACROS_BASE_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 #define EASTL_ATOMIC_INTERNAL_COMPILER_AVAILABLE(op)					\
 	EA_PREPROCESSOR_JOIN(EA_PREPROCESSOR_JOIN(EASTL_COMPILER_, op), _AVAILABLE)
@@ -61,5 +55,3 @@
 		op												\
 		)
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_MACROS_BASE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_cmpxchg_strong.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_cmpxchg_strong.h
index 3cff4935..dc68f14c 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_cmpxchg_strong.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_cmpxchg_strong.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_MACROS_CMPXCHG_STRONG_H
-#define EASTL_ATOMIC_INTERNAL_MACROS_CMPXCHG_STRONG_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 /////////////////////////////////////////////////////////////////////////////////
@@ -240,6 +235,3 @@
 
 #define EASTL_ATOMIC_CMPXCHG_STRONG_SEQ_CST_128(type, ret, ptr, expected, desired) \
 	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_SEQ_CST_128)(type, ret, ptr, expected, desired)
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_MACROS_CMPXCHG_STRONG_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_cmpxchg_weak.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_cmpxchg_weak.h
index 60ea8b0b..89f5da6d 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_cmpxchg_weak.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_cmpxchg_weak.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_MACROS_CMPXCHG_WEAK_H
-#define EASTL_ATOMIC_INTERNAL_MACROS_CMPXCHG_WEAK_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 /////////////////////////////////////////////////////////////////////////////////
@@ -240,6 +235,3 @@
 
 #define EASTL_ATOMIC_CMPXCHG_WEAK_SEQ_CST_128(type, ret, ptr, expected, desired) \
 	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_SEQ_CST_128)(type, ret, ptr, expected, desired)
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_MACROS_CMPXCHG_WEAK_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_compiler_barrier.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_compiler_barrier.h
index 96ea6d0b..a07d0821 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_compiler_barrier.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_compiler_barrier.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_MACROS_COMPILER_BARRIER_H
-#define EASTL_ATOMIC_INTERNAL_MACROS_COMPILER_BARRIER_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 /////////////////////////////////////////////////////////////////////////////////
@@ -26,5 +21,3 @@
 #define EASTL_ATOMIC_COMPILER_BARRIER_DATA_DEPENDENCY(val, type)		\
 	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_COMPILER_BARRIER_DATA_DEPENDENCY)(val, type)
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_MACROS_COMPILER_BARRIER_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_cpu_pause.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_cpu_pause.h
index e027b576..e92cf18c 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_cpu_pause.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_cpu_pause.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_MACROS_CPU_PAUSE_H
-#define EASTL_ATOMIC_INTERNAL_MACROS_CPU_PAUSE_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 /////////////////////////////////////////////////////////////////////////////////
@@ -17,6 +12,3 @@
 //
 #define EASTL_ATOMIC_CPU_PAUSE()					\
 	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CPU_PAUSE)()
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_MACROS_CPU_PAUSE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_exchange.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_exchange.h
index 0681318f..37ef8882 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_exchange.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_exchange.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_MACROS_EXCHANGE_H
-#define EASTL_ATOMIC_INTERNAL_MACROS_EXCHANGE_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 /////////////////////////////////////////////////////////////////////////////////
 //
@@ -93,6 +87,3 @@
 
 #define EASTL_ATOMIC_EXCHANGE_SEQ_CST_128(type, ret, ptr, val)			\
 	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_SEQ_CST_128)(type, ret, ptr, val)
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_MACROS_EXCHANGE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_add.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_add.h
index 701fdf37..0aa53573 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_add.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_add.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_MACROS_FETCH_ADD_H
-#define EASTL_ATOMIC_INTERNAL_MACROS_FETCH_ADD_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 /////////////////////////////////////////////////////////////////////////////////
 //
@@ -93,6 +87,3 @@
 
 #define EASTL_ATOMIC_FETCH_ADD_SEQ_CST_128(type, ret, ptr, val)			\
 	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_SEQ_CST_128)(type, ret, ptr, val)
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_MACROS_FETCH_ADD_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_and.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_and.h
index 831f1bfe..3d59d5ba 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_and.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_and.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_MACROS_FETCH_AND_H
-#define EASTL_ATOMIC_INTERNAL_MACROS_FETCH_AND_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 /////////////////////////////////////////////////////////////////////////////////
 //
@@ -94,5 +88,3 @@
 #define EASTL_ATOMIC_FETCH_AND_SEQ_CST_128(type, ret, ptr, val)			\
 	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_SEQ_CST_128)(type, ret, ptr, val)
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_MACROS_FETCH_AND_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_or.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_or.h
index b1322970..86b6a626 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_or.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_or.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_MACROS_FETCH_OR_H
-#define EASTL_ATOMIC_INTERNAL_MACROS_FETCH_OR_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 /////////////////////////////////////////////////////////////////////////////////
 //
@@ -93,6 +87,3 @@
 
 #define EASTL_ATOMIC_FETCH_OR_SEQ_CST_128(type, ret, ptr, val)			\
 	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_SEQ_CST_128)(type, ret, ptr, val)
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_MACROS_FETCH_OR_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_sub.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_sub.h
index 00980643..56498a40 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_sub.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_sub.h
@@ -2,13 +2,7 @@
 // Copyright (c) Electronic Arts Inc. All rights reserved.
 /////////////////////////////////////////////////////////////////////////////////
 
-
-#ifndef EASTL_ATOMIC_INTERNAL_MACROS_FETCH_SUB_H
-#define EASTL_ATOMIC_INTERNAL_MACROS_FETCH_SUB_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 /////////////////////////////////////////////////////////////////////////////////
@@ -94,5 +88,3 @@
 #define EASTL_ATOMIC_FETCH_SUB_SEQ_CST_128(type, ret, ptr, val)			\
 	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_SEQ_CST_128)(type, ret, ptr, val)
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_MACROS_FETCH_SUB_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_xor.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_xor.h
index 2887ea56..232a46e6 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_xor.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_xor.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_MACROS_FETCH_XOR_H
-#define EASTL_ATOMIC_INTERNAL_MACROS_FETCH_XOR_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 /////////////////////////////////////////////////////////////////////////////////
@@ -94,5 +89,3 @@
 #define EASTL_ATOMIC_FETCH_XOR_SEQ_CST_128(type, ret, ptr, val)			\
 	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_SEQ_CST_128)(type, ret, ptr, val)
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_MACROS_FETCH_XOR_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_load.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_load.h
index 76580593..8880743c 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_load.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_load.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_MACROS_LOAD_H
-#define EASTL_ATOMIC_INTERNAL_MACROS_LOAD_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 /////////////////////////////////////////////////////////////////////////////////
@@ -70,6 +65,3 @@
 
 #define EASTL_ATOMIC_LOAD_READ_DEPENDS_64(type, ret, ptr)				\
 	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_LOAD_READ_DEPENDS_64)(type, ret, ptr)
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_MACROS_LOAD_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_memory_barrier.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_memory_barrier.h
index 14f7be92..52a7be8b 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_memory_barrier.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_memory_barrier.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_MACROS_MEMORY_BARRIER_H
-#define EASTL_ATOMIC_INTERNAL_MACROS_MEMORY_BARRIER_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 /////////////////////////////////////////////////////////////////////////////////
 //
@@ -34,5 +28,3 @@
 #define EASTL_ATOMIC_CPU_RMB()						\
 	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CPU_RMB)()
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_MACROS_MEMORY_BARRIER_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_or_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_or_fetch.h
index c9ebd6e3..ab4278cf 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_or_fetch.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_or_fetch.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_MACROS_OR_FETCH_H
-#define EASTL_ATOMIC_INTERNAL_MACROS_OR_FETCH_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 /////////////////////////////////////////////////////////////////////////////////
 //
@@ -93,6 +87,3 @@
 
 #define EASTL_ATOMIC_OR_FETCH_SEQ_CST_128(type, ret, ptr, val)			\
 	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_SEQ_CST_128)(type, ret, ptr, val)
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_MACROS_OR_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_signal_fence.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_signal_fence.h
index dd16b106..8eae9037 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_signal_fence.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_signal_fence.h
@@ -2,13 +2,7 @@
 // Copyright (c) Electronic Arts Inc. All rights reserved.
 /////////////////////////////////////////////////////////////////////////////////
 
-
-#ifndef EASTL_ATOMIC_INTERNAL_MACROS_SIGNAL_FENCE_H
-#define EASTL_ATOMIC_INTERNAL_MACROS_SIGNAL_FENCE_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 /////////////////////////////////////////////////////////////////////////////////
@@ -30,5 +24,3 @@
 #define EASTL_ATOMIC_SIGNAL_FENCE_SEQ_CST()						\
 	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SIGNAL_FENCE_SEQ_CST)()
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_MACROS_SIGNAL_FENCE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_store.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_store.h
index 64b662e1..611f6942 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_store.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_store.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_MACROS_STORE_H
-#define EASTL_ATOMIC_INTERNAL_MACROS_STORE_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 /////////////////////////////////////////////////////////////////////////////////
 //
@@ -63,6 +57,3 @@
 
 #define EASTL_ATOMIC_STORE_SEQ_CST_128(type, ptr, val)					\
 	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_STORE_SEQ_CST_128)(type, ptr, val)
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_MACROS_STORE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_sub_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_sub_fetch.h
index 330f38e9..5c2b2fb0 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_sub_fetch.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_sub_fetch.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_MACROS_SUB_FETCH_H
-#define EASTL_ATOMIC_INTERNAL_MACROS_SUB_FETCH_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 /////////////////////////////////////////////////////////////////////////////////
@@ -94,5 +89,3 @@
 #define EASTL_ATOMIC_SUB_FETCH_SEQ_CST_128(type, ret, ptr, val)			\
 	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_SEQ_CST_128)(type, ret, ptr, val)
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_MACROS_SUB_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_thread_fence.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_thread_fence.h
index 26492c59..1508afd9 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_thread_fence.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_thread_fence.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_MACROS_THREAD_FENCE_H
-#define EASTL_ATOMIC_INTERNAL_MACROS_THREAD_FENCE_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 /////////////////////////////////////////////////////////////////////////////////
 //
@@ -29,6 +23,3 @@
 
 #define EASTL_ATOMIC_THREAD_FENCE_SEQ_CST()						\
 	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_THREAD_FENCE_SEQ_CST)()
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_MACROS_THREAD_FENCE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_xor_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_xor_fetch.h
index 42276470..6b085ab5 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_xor_fetch.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_xor_fetch.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_MACROS_XOR_FETCH_H
-#define EASTL_ATOMIC_INTERNAL_MACROS_XOR_FETCH_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 /////////////////////////////////////////////////////////////////////////////////
@@ -94,5 +89,3 @@
 #define EASTL_ATOMIC_XOR_FETCH_SEQ_CST_128(type, ret, ptr, val)			\
 	EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_SEQ_CST_128)(type, ret, ptr, val)
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_MACROS_XOR_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_memory_order.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_memory_order.h
index b1c14035..07d06376 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/atomic_memory_order.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_memory_order.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_MEMORY_ORDER_H
-#define EASTL_ATOMIC_INTERNAL_MEMORY_ORDER_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 namespace eastl
 {
@@ -39,6 +33,3 @@ EASTL_CPP17_INLINE_VARIABLE constexpr auto memory_order_seq_cst = internal::memo
 
 
 } // namespace eastl
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_MEMORY_ORDER_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_pointer.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_pointer.h
index 18f6691c..1c916a1c 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/atomic_pointer.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_pointer.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_POINTER_H
-#define EASTL_ATOMIC_INTERNAL_POINTER_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 #include "atomic_push_compiler_options.h"
@@ -277,5 +272,3 @@ namespace internal
 
 #include "atomic_pop_compiler_options.h"
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_POINTER_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_size_aligned.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_size_aligned.h
index db23e478..a771514c 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/atomic_size_aligned.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_size_aligned.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_SIZE_ALIGNED_H
-#define EASTL_ATOMIC_INTERNAL_SIZE_ALIGNED_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 #include "atomic_push_compiler_options.h"
@@ -192,6 +187,3 @@ namespace internal
 
 
 #include "atomic_pop_compiler_options.h"
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_SIZE_ALIGNED_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_standalone.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_standalone.h
index 011d5fb3..39a676fe 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/atomic_standalone.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_standalone.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_STANDALONE_H
-#define EASTL_ATOMIC_INTERNAL_STANDALONE_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 namespace eastl
@@ -466,5 +461,3 @@ EASTL_FORCE_INLINE bool atomic_is_lock_free(const eastl::atomic<T>* atomicObj) E
 
 } // namespace eastl
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_STANDALONE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler.h
index 65a4cd00..dc092be1 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 /////////////////////////////////////////////////////////////////////////////////
 //
@@ -115,6 +109,3 @@ extern EASTL_API volatile CompilerBarrierDataDependencyFuncPtr gCompilerBarrierD
 #include "compiler_signal_fence.h"
 
 #include "compiler_thread_fence.h"
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_add_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_add_fetch.h
index 763921c4..28415f43 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_add_fetch.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_add_fetch.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_ADD_FETCH_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_ADD_FETCH_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 /////////////////////////////////////////////////////////////////////////////////
 //
@@ -168,6 +162,3 @@
 #else
 	#define EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_128_AVAILABLE 0
 #endif
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_ADD_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_and_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_and_fetch.h
index 7b1e0a42..52346a17 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_and_fetch.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_and_fetch.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_AND_FETCH_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_AND_FETCH_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 /////////////////////////////////////////////////////////////////////////////////
 //
@@ -168,6 +162,3 @@
 #else
 	#define EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_128_AVAILABLE 0
 #endif
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_AND_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_barrier.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_barrier.h
index 550070e3..44e486c1 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_barrier.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_barrier.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_BARRIER_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_BARRIER_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 /////////////////////////////////////////////////////////////////////////////////
@@ -32,5 +27,3 @@
 	#define EASTL_COMPILER_ATOMIC_COMPILER_BARRIER_DATA_DEPENDENCY_AVAILABLE 0
 #endif
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_BARRIER_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_cmpxchg_strong.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_cmpxchg_strong.h
index 2ee29711..174bc347 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_cmpxchg_strong.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_cmpxchg_strong.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_CMPXCHG_STRONG_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_CMPXCHG_STRONG_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 /////////////////////////////////////////////////////////////////////////////////
 //
@@ -425,6 +419,3 @@
 	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_128_AVAILABLE
 #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_128(type, ret, ptr, expected, desired) \
 	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_128(type, ret, ptr, expected, desired)
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_CMPXCHG_STRONG_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_cmpxchg_weak.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_cmpxchg_weak.h
index 9bc1a621..dcf4a7ba 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_cmpxchg_weak.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_cmpxchg_weak.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_CMPXCHG_WEAK_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_CMPXCHG_WEAK_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 /////////////////////////////////////////////////////////////////////////////////
 //
@@ -425,6 +419,3 @@
 	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_128_AVAILABLE
 #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_128(type, ret, ptr, expected, desired) \
 	EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_128(type, ret, ptr, expected, desired)
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_CMPXCHG_WEAK_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_cpu_pause.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_cpu_pause.h
index 073b3fbb..e8ec58f5 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_cpu_pause.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_cpu_pause.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_CPU_PAUSE_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_CPU_PAUSE_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 /////////////////////////////////////////////////////////////////////////////////
@@ -27,6 +22,3 @@
 	#define EASTL_COMPILER_ATOMIC_CPU_PAUSE_AVAILABLE 1
 
 #endif
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_CPU_PAUSE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_exchange.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_exchange.h
index d82b199d..67dc95fd 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_exchange.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_exchange.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_EXCHANGE_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_EXCHANGE_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 /////////////////////////////////////////////////////////////////////////////////
 //
@@ -168,6 +162,3 @@
 #else
 	#define EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_128_AVAILABLE 0
 #endif
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_EXCHANGE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_add.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_add.h
index e6c4238f..4f78f536 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_add.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_add.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_ADD_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_ADD_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 /////////////////////////////////////////////////////////////////////////////////
@@ -169,5 +164,3 @@
 	#define EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_128_AVAILABLE 0
 #endif
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_ADD_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_and.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_and.h
index b0976fc7..d54414ab 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_and.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_and.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_AND_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_AND_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 /////////////////////////////////////////////////////////////////////////////////
 //
@@ -169,5 +163,3 @@
 	#define EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_128_AVAILABLE 0
 #endif
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_AND_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_or.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_or.h
index 2e6cfdac..31252dba 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_or.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_or.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_OR_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_OR_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 /////////////////////////////////////////////////////////////////////////////////
 //
@@ -169,5 +163,3 @@
 	#define EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_128_AVAILABLE 0
 #endif
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_OR_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_sub.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_sub.h
index d7ed86cc..6eca9079 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_sub.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_sub.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_SUB_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_SUB_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 /////////////////////////////////////////////////////////////////////////////////
@@ -169,5 +164,3 @@
 	#define EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_128_AVAILABLE 0
 #endif
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_SUB_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_xor.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_xor.h
index 10cf7d90..0564d3f6 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_xor.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_xor.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_XOR_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_XOR_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 /////////////////////////////////////////////////////////////////////////////////
@@ -170,4 +165,3 @@
 #endif
 
 
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_XOR_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_load.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_load.h
index 734dbb80..c02145fc 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_load.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_load.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_LOAD_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_LOAD_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 /////////////////////////////////////////////////////////////////////////////////
 //
@@ -134,6 +128,3 @@
 
 #define EASTL_COMPILER_ATOMIC_LOAD_READ_DEPENDS_32_AVAILABLE 1
 #define EASTL_COMPILER_ATOMIC_LOAD_READ_DEPENDS_64_AVAILABLE 1
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_LOAD_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_memory_barrier.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_memory_barrier.h
index ac3923c6..cab323bd 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_memory_barrier.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_memory_barrier.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MEMORY_BARRIER_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_MEMORY_BARRIER_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 /////////////////////////////////////////////////////////////////////////////////
 //
@@ -43,5 +37,3 @@
 	#define EASTL_COMPILER_ATOMIC_CPU_RMB_AVAILABLE 0
 #endif
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MEMORY_BARRIER_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_or_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_or_fetch.h
index a26a72c7..3a76b210 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_or_fetch.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_or_fetch.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_OR_FETCH_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_OR_FETCH_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 /////////////////////////////////////////////////////////////////////////////////
 //
@@ -168,6 +162,3 @@
 #else
 	#define EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_128_AVAILABLE 0
 #endif
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_OR_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_signal_fence.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_signal_fence.h
index 25b0b741..0d053d80 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_signal_fence.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_signal_fence.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_SIGNAL_FENCE_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_SIGNAL_FENCE_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 /////////////////////////////////////////////////////////////////////////////////
@@ -45,5 +40,3 @@
 	#define EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_SEQ_CST_AVAILABLE 0
 #endif
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_SIGNAL_FENCE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_store.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_store.h
index 1a553e2a..231298fa 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_store.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_store.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_STORE_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_STORE_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 /////////////////////////////////////////////////////////////////////////////////
 //
@@ -109,5 +103,3 @@
 	#define EASTL_COMPILER_ATOMIC_STORE_SEQ_CST_128_AVAILABLE 0
 #endif
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_STORE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_sub_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_sub_fetch.h
index 4b7eea92..f1956592 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_sub_fetch.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_sub_fetch.h
@@ -2,13 +2,7 @@
 // Copyright (c) Electronic Arts Inc. All rights reserved.
 /////////////////////////////////////////////////////////////////////////////////
 
-
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_SUB_FETCH_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_SUB_FETCH_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 /////////////////////////////////////////////////////////////////////////////////
@@ -169,5 +163,3 @@
 	#define EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_128_AVAILABLE 0
 #endif
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_SUB_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_thread_fence.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_thread_fence.h
index 01d8f0f9..a26860e3 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_thread_fence.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_thread_fence.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_THREAD_FENCE_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_THREAD_FENCE_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 /////////////////////////////////////////////////////////////////////////////////
 //
@@ -44,6 +38,3 @@
 #else
 	#define EASTL_COMPILER_ATOMIC_THREAD_FENCE_SEQ_CST_AVAILABLE 0
 #endif
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_THREAD_FENCE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_xor_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_xor_fetch.h
index 05680bd1..60bb98de 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_xor_fetch.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_xor_fetch.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_XOR_FETCH_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_XOR_FETCH_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 /////////////////////////////////////////////////////////////////////////////////
 //
@@ -169,5 +163,3 @@
 	#define EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_128_AVAILABLE 0
 #endif
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_XOR_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc.h
index 26a99c20..187ad440 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 /**
  * NOTE:
@@ -149,6 +143,3 @@ static_assert(__atomic_always_lock_free(4, 0), "eastl::atomic<T> where sizeof(T)
 #include "compiler_gcc_signal_fence.h"
 
 #include "compiler_gcc_thread_fence.h"
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_add_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_add_fetch.h
index 1d19196b..c8ab6ee0 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_add_fetch.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_add_fetch.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_ADD_FETCH_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_ADD_FETCH_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 #define EASTL_GCC_ATOMIC_ADD_FETCH_N(integralType, type, ret, ptr, val, gccMemoryOrder) \
 	EASTL_GCC_ATOMIC_FETCH_INTRIN_N(integralType, __atomic_add_fetch, type, ret, ptr, val, gccMemoryOrder)
@@ -114,5 +108,3 @@
 #define EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_128(type, ret, ptr, val) \
 	EASTL_GCC_ATOMIC_ADD_FETCH_128(type, ret, ptr, val, __ATOMIC_SEQ_CST)
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_ADD_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_and_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_and_fetch.h
index a35307f0..b27221ae 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_and_fetch.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_and_fetch.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_AND_FETCH_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_AND_FETCH_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 #define EASTL_GCC_ATOMIC_AND_FETCH_N(integralType, type, ret, ptr, val, gccMemoryOrder) \
 	EASTL_GCC_ATOMIC_FETCH_INTRIN_N(integralType, __atomic_and_fetch, type, ret, ptr, val, gccMemoryOrder)
@@ -114,5 +108,3 @@
 #define EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_128(type, ret, ptr, val) \
 	EASTL_GCC_ATOMIC_AND_FETCH_128(type, ret, ptr, val, __ATOMIC_SEQ_CST)
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_AND_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_barrier.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_barrier.h
index 64e8e541..950cecf4 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_barrier.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_barrier.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_BARRIER_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_BARRIER_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 /////////////////////////////////////////////////////////////////////////////////
 //
@@ -26,5 +20,3 @@
 #define EASTL_COMPILER_ATOMIC_COMPILER_BARRIER_DATA_DEPENDENCY(val, type) \
 	__asm__ __volatile__ ("" : /* Output Operands */ : "r"(&(val)) : "memory")
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_BARRIER_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_cmpxchg_strong.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_cmpxchg_strong.h
index 3e47cf2e..ac7b41df 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_cmpxchg_strong.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_cmpxchg_strong.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_CMPXCHG_STRONG_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_CMPXCHG_STRONG_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 #define EASTL_GCC_ATOMIC_CMPXCHG_STRONG_N(integralType, type, ret, ptr, expected, desired, successOrder, failOrder) \
 	EASTL_GCC_ATOMIC_CMPXCHG_INTRIN_N(integralType, type, ret, ptr, expected, desired, false, successOrder, failOrder)
@@ -178,5 +172,3 @@
 #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_128(type, ret, ptr, expected, desired) \
 	EASTL_GCC_ATOMIC_CMPXCHG_STRONG_128(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_CMPXCHG_STRONG_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_cmpxchg_weak.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_cmpxchg_weak.h
index f55fe3a3..86cb4bed 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_cmpxchg_weak.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_cmpxchg_weak.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_CMPXCHG_WEAK_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_CMPXCHG_WEAK_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma oncef
 
 
 #define EASTL_GCC_ATOMIC_CMPXCHG_WEAK_N(integralType, type, ret, ptr, expected, desired, successOrder, failOrder) \
@@ -177,6 +172,3 @@
 
 #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_128(type, ret, ptr, expected, desired) \
 	EASTL_GCC_ATOMIC_CMPXCHG_WEAK_128(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_CMPXCHG_WEAK_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_cpu_pause.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_cpu_pause.h
index 9d4ac35e..ea349d91 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_cpu_pause.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_cpu_pause.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_CPU_PAUSE_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_CPU_PAUSE_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 /////////////////////////////////////////////////////////////////////////////////
@@ -27,5 +22,3 @@
 
 #endif
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_CPU_PAUSE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_exchange.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_exchange.h
index a3325547..356544d6 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_exchange.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_exchange.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_EXCHANGE_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_EXCHANGE_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 #define EASTL_GCC_ATOMIC_EXCHANGE_N(integralType, type, ret, ptr, val, gccMemoryOrder) \
 	EASTL_GCC_ATOMIC_EXCHANGE_INTRIN_N(integralType, type, ret, ptr, val, gccMemoryOrder)
@@ -113,6 +107,3 @@
 
 #define EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_128(type, ret, ptr, val)	\
 	EASTL_GCC_ATOMIC_EXCHANGE_128(type, ret, ptr, val, __ATOMIC_SEQ_CST)
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_EXCHANGE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_add.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_add.h
index 98abbb83..b74933cf 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_add.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_add.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_ADD_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_ADD_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 #define EASTL_GCC_ATOMIC_FETCH_ADD_N(integralType, type, ret, ptr, val, gccMemoryOrder) \
@@ -114,5 +109,3 @@
 #define EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_128(type, ret, ptr, val) \
 	EASTL_GCC_ATOMIC_FETCH_ADD_128(type, ret, ptr, val, __ATOMIC_SEQ_CST)
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_ADD_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_and.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_and.h
index 0dfb81db..7d53e119 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_and.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_and.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_AND_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_AND_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 #define EASTL_GCC_ATOMIC_FETCH_AND_N(integralType, type, ret, ptr, val, gccMemoryOrder) \
 	EASTL_GCC_ATOMIC_FETCH_INTRIN_N(integralType, __atomic_fetch_and, type, ret, ptr, val, gccMemoryOrder)
@@ -113,6 +107,3 @@
 
 #define EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_128(type, ret, ptr, val) \
 	EASTL_GCC_ATOMIC_FETCH_AND_128(type, ret, ptr, val, __ATOMIC_SEQ_CST)
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_AND_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_or.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_or.h
index ba259b74..c0455598 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_or.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_or.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_OR_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_OR_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 #define EASTL_GCC_ATOMIC_FETCH_OR_N(integralType, type, ret, ptr, val, gccMemoryOrder) \
 	EASTL_GCC_ATOMIC_FETCH_INTRIN_N(integralType, __atomic_fetch_or, type, ret, ptr, val, gccMemoryOrder)
@@ -113,6 +107,3 @@
 
 #define EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_128(type, ret, ptr, val)	\
 	EASTL_GCC_ATOMIC_FETCH_OR_128(type, ret, ptr, val, __ATOMIC_SEQ_CST)
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_OR_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_sub.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_sub.h
index c8be225e..196fa189 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_sub.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_sub.h
@@ -2,14 +2,7 @@
 // Copyright (c) Electronic Arts Inc. All rights reserved.
 /////////////////////////////////////////////////////////////////////////////////
 
-
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_SUB_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_SUB_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 #define EASTL_GCC_ATOMIC_FETCH_SUB_N(integralType, type, ret, ptr, val, gccMemoryOrder) \
 	EASTL_GCC_ATOMIC_FETCH_INTRIN_N(integralType, __atomic_fetch_sub, type, ret, ptr, val, gccMemoryOrder)
@@ -113,6 +106,3 @@
 
 #define EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_128(type, ret, ptr, val) \
 	EASTL_GCC_ATOMIC_FETCH_SUB_128(type, ret, ptr, val, __ATOMIC_SEQ_CST)
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_SUB_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_xor.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_xor.h
index 4ec6d676..2f831ebd 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_xor.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_xor.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_XOR_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_XOR_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 #define EASTL_GCC_ATOMIC_FETCH_XOR_N(integralType, type, ret, ptr, val, gccMemoryOrder) \
 	EASTL_GCC_ATOMIC_FETCH_INTRIN_N(integralType, __atomic_fetch_xor, type, ret, ptr, val, gccMemoryOrder)
@@ -114,5 +108,3 @@
 #define EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_128(type, ret, ptr, val) \
 	EASTL_GCC_ATOMIC_FETCH_XOR_128(type, ret, ptr, val, __ATOMIC_SEQ_CST)
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_XOR_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_load.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_load.h
index a4a3ebf1..d1756e91 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_load.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_load.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_LOAD_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_LOAD_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 #define EASTL_GCC_ATOMIC_LOAD_N(integralType, type, ret, ptr, gccMemoryOrder) \
@@ -86,5 +81,3 @@
 #define EASTL_COMPILER_ATOMIC_LOAD_SEQ_CST_128(type, ret, ptr)	\
 	EASTL_GCC_ATOMIC_LOAD_128(type, ret, ptr, __ATOMIC_SEQ_CST)
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_LOAD_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_or_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_or_fetch.h
index 9e4db3e1..55aec0ef 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_or_fetch.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_or_fetch.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_OR_FETCH_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_OR_FETCH_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 #define EASTL_GCC_ATOMIC_OR_FETCH_N(integralType, type, ret, ptr, val, gccMemoryOrder) \
@@ -114,5 +109,3 @@
 #define EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_128(type, ret, ptr, val)	\
 	EASTL_GCC_ATOMIC_OR_FETCH_128(type, ret, ptr, val, __ATOMIC_SEQ_CST)
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_OR_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_signal_fence.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_signal_fence.h
index 16dff14f..88cc77fc 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_signal_fence.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_signal_fence.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_SIGNAL_FENCE_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_SIGNAL_FENCE_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 #define EASTL_GCC_ATOMIC_SIGNAL_FENCE(gccMemoryOrder)	\
@@ -33,6 +28,3 @@
 
 #define EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_SEQ_CST()	\
 	EASTL_GCC_ATOMIC_SIGNAL_FENCE(__ATOMIC_SEQ_CST)
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_SIGNAL_FENCE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_store.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_store.h
index 04a28ac4..47318ca5 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_store.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_store.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_STORE_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_STORE_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 #define EASTL_GCC_ATOMIC_STORE_N(integralType, ptr, val, gccMemoryOrder) \
@@ -84,6 +79,3 @@
 
 #define EASTL_COMPILER_ATOMIC_STORE_SEQ_CST_128(type, ptr, val)	\
 	EASTL_GCC_ATOMIC_STORE_128(ptr, val, __ATOMIC_SEQ_CST)
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_STORE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_sub_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_sub_fetch.h
index 62f8cd91..eeec9652 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_sub_fetch.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_sub_fetch.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_SUB_FETCH_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_SUB_FETCH_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 #define EASTL_GCC_ATOMIC_SUB_FETCH_N(integralType, type, ret, ptr, val, gccMemoryOrder) \
@@ -114,5 +109,3 @@
 #define EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_128(type, ret, ptr, val) \
 	EASTL_GCC_ATOMIC_SUB_FETCH_128(type, ret, ptr, val, __ATOMIC_SEQ_CST)
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_SUB_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_thread_fence.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_thread_fence.h
index 0dd005e4..c0c5b8c9 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_thread_fence.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_thread_fence.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_THREAD_FENCE_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_THREAD_FENCE_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 #define EASTL_GCC_ATOMIC_THREAD_FENCE(gccMemoryOrder) \
@@ -33,6 +28,3 @@
 
 #define EASTL_COMPILER_ATOMIC_THREAD_FENCE_SEQ_CST()	\
 	EASTL_GCC_ATOMIC_THREAD_FENCE(__ATOMIC_SEQ_CST)
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_THREAD_FENCE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_xor_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_xor_fetch.h
index 4827d79f..60d228d4 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_xor_fetch.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_xor_fetch.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_XOR_FETCH_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_XOR_FETCH_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 #define EASTL_GCC_ATOMIC_XOR_FETCH_N(integralType, type, ret, ptr, val, gccMemoryOrder) \
@@ -114,5 +109,3 @@
 #define EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_128(type, ret, ptr, val) \
 	EASTL_GCC_ATOMIC_XOR_FETCH_128(type, ret, ptr, val, __ATOMIC_SEQ_CST)
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_XOR_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc.h
index 6df8c05f..dbae8adc 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 EA_DISABLE_ALL_VC_WARNINGS();
@@ -255,6 +250,3 @@ struct FixedWidth128
 #include "compiler_msvc_cpu_pause.h"
 
 #include "compiler_msvc_signal_fence.h"
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_add_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_add_fetch.h
index 12fc4b04..f9ce230f 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_add_fetch.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_add_fetch.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_ADD_FETCH_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_ADD_FETCH_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 #define EASTL_MSVC_ADD_FETCH_POST_INTRIN_COMPUTE(ret, val, addend)	\
 	ret = (val) + (addend)
@@ -101,4 +95,4 @@
 	EASTL_MSVC_ATOMIC_ADD_FETCH_64(type, ret, ptr, val, SEQ_CST)
 
 
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_ADD_FETCH_H */
+
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_and_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_and_fetch.h
index 70ec577f..78208c4c 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_and_fetch.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_and_fetch.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_AND_FETCH_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_AND_FETCH_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 #if defined(EA_PROCESSOR_X86_64)
 
@@ -117,5 +111,3 @@
 #define EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_64(type, ret, ptr, val)	\
 	EASTL_MSVC_ATOMIC_AND_FETCH_64(type, ret, ptr, val, SEQ_CST)
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_AND_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_barrier.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_barrier.h
index 02e2d03a..65ae8762 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_barrier.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_barrier.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_BARRIER_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_BARRIER_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 /////////////////////////////////////////////////////////////////////////////////
 //
@@ -27,5 +21,3 @@
 	EASTL_COMPILER_ATOMIC_COMPILER_BARRIER_DATA_DEPENDENCY_FUNC(const_cast<type*>(eastl::addressof((val)))); \
 	EASTL_ATOMIC_COMPILER_BARRIER()
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_BARRIER_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_cmpxchg_strong.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_cmpxchg_strong.h
index 42117a1a..3d80d371 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_cmpxchg_strong.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_cmpxchg_strong.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_CMPXCHG_STRONG_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_CMPXCHG_STRONG_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 #if defined(EA_PROCESSOR_X86_64)
 
@@ -190,6 +184,3 @@
 
 #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_128(type, ret, ptr, expected, desired) \
 	EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_128(type, ret, ptr, expected, desired, SEQ_CST)
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_CMPXCHG_STRONG_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_cmpxchg_weak.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_cmpxchg_weak.h
index 8f4147ac..faa8fbfb 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_cmpxchg_weak.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_cmpxchg_weak.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_CMPXCHG_WEAK_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_CMPXCHG_WEAK_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 /////////////////////////////////////////////////////////////////////////////////
 //
@@ -158,5 +152,3 @@
 #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_128(type, ret, ptr, expected, desired) \
 	EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_128(type, ret, ptr, expected, desired)
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_CMPXCHG_WEAK_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_cpu_pause.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_cpu_pause.h
index 720701ab..2d99b07d 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_cpu_pause.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_cpu_pause.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_CPU_PAUSE_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_CPU_PAUSE_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 /////////////////////////////////////////////////////////////////////////////////
@@ -23,5 +18,3 @@
 #define EASTL_COMPILER_ATOMIC_CPU_PAUSE()		\
 	YieldProcessor()
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_CPU_PAUSE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_exchange.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_exchange.h
index 323f1fae..136bab09 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_exchange.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_exchange.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_EXCHANGE_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_EXCHANGE_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 #define EASTL_MSVC_ATOMIC_EXCHANGE_8(type, ret, ptr, val, MemoryOrder)	\
 	EASTL_MSVC_ATOMIC_EXCHANGE_OP_N(char, _InterlockedExchange8, type, ret, ptr, val, MemoryOrder)
@@ -120,6 +114,3 @@
 
 #define EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_128(type, ret, ptr, val)	\
 	EASTL_MSVC_ATOMIC_EXCHANGE_128(type, ret, ptr, val, SEQ_CST)
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_EXCHANGE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_add.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_add.h
index a951740e..5ce3f6c8 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_add.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_add.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_ADD_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_ADD_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 #define EASTL_MSVC_ATOMIC_FETCH_ADD_N(integralType, addIntrinsic, type, ret, ptr, val, MemoryOrder) \
 	EASTL_MSVC_ATOMIC_FETCH_OP_N(integralType, addIntrinsic, type, ret, ptr, val, MemoryOrder, \
@@ -96,6 +90,3 @@
 
 #define EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_64(type, ret, ptr, val)	\
 	EASTL_MSVC_ATOMIC_FETCH_ADD_64(type, ret, ptr, val, SEQ_CST)
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_ADD_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_and.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_and.h
index 96f78942..900b0832 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_and.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_and.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_AND_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_AND_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 #if defined(EA_PROCESSOR_X86_64)
 
@@ -113,6 +107,3 @@
 
 #define EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_64(type, ret, ptr, val)	\
 	EASTL_MSVC_ATOMIC_FETCH_AND_64(type, ret, ptr, val, SEQ_CST)
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_AND_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_or.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_or.h
index 2792fc3d..93cb22c1 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_or.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_or.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_OR_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_OR_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 #if defined(EA_PROCESSOR_X86_64)
@@ -114,5 +109,3 @@
 #define EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_64(type, ret, ptr, val)	\
 	EASTL_MSVC_ATOMIC_FETCH_OR_64(type, ret, ptr, val, SEQ_CST)
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_OR_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_sub.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_sub.h
index 6d5d9e3a..5276b045 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_sub.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_sub.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_SUB_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_SUB_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 #define EASTL_MSVC_FETCH_SUB_PRE_INTRIN_COMPUTE(ret, val) \
@@ -101,4 +96,3 @@
 	EASTL_MSVC_ATOMIC_FETCH_SUB_64(type, ret, ptr, val, SEQ_CST)
 
 
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_SUB_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_xor.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_xor.h
index 371153e9..9b1abe3d 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_xor.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_xor.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_XOR_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_XOR_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 #if defined(EA_PROCESSOR_X86_64)
 
@@ -113,6 +107,3 @@
 
 #define EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_64(type, ret, ptr, val)	\
 	EASTL_MSVC_ATOMIC_FETCH_XOR_64(type, ret, ptr, val, SEQ_CST)
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_XOR_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_or_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_or_fetch.h
index c5b5fac3..9a7b5ed4 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_or_fetch.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_or_fetch.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_OR_FETCH_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_OR_FETCH_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 #if defined(EA_PROCESSOR_X86_64)
@@ -117,5 +112,3 @@
 #define EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_64(type, ret, ptr, val)	\
 	EASTL_MSVC_ATOMIC_OR_FETCH_64(type, ret, ptr, val, SEQ_CST)
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_OR_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_signal_fence.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_signal_fence.h
index f35f5772..1969c07e 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_signal_fence.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_signal_fence.h
@@ -2,13 +2,7 @@
 // Copyright (c) Electronic Arts Inc. All rights reserved.
 /////////////////////////////////////////////////////////////////////////////////
 
-
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_SIGNAL_FENCE_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_SIGNAL_FENCE_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 
 /////////////////////////////////////////////////////////////////////////////////
@@ -30,5 +24,3 @@
 #define EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_SEQ_CST()	\
 	EASTL_ATOMIC_COMPILER_BARRIER()
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_SIGNAL_FENCE_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_sub_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_sub_fetch.h
index 6fb61e29..f2867939 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_sub_fetch.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_sub_fetch.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_SUB_FETCH_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_SUB_FETCH_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 #define EASTL_MSVC_SUB_FETCH_PRE_INTRIN_COMPUTE(ret, val) \
 	ret = EASTL_ATOMIC_NEGATE_OPERAND((val))
@@ -102,6 +96,3 @@
 
 #define EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_64(type, ret, ptr, val)	\
 	EASTL_MSVC_ATOMIC_SUB_FETCH_64(type, ret, ptr, val, SEQ_CST)
-
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_SUB_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_xor_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_xor_fetch.h
index 44ffff90..9ae97959 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_xor_fetch.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_xor_fetch.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_XOR_FETCH_H
-#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_XOR_FETCH_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 #if defined(EA_PROCESSOR_X86_64)
 
@@ -117,5 +111,3 @@
 #define EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_64(type, ret, ptr, val)	\
 	EASTL_MSVC_ATOMIC_XOR_FETCH_64(type, ret, ptr, val, SEQ_CST)
 
-
-#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_XOR_FETCH_H */
diff --git a/libkram/eastl/include/EASTL/internal/char_traits.h b/libkram/eastl/include/EASTL/internal/char_traits.h
index 62fe79b9..c27afb7f 100644
--- a/libkram/eastl/include/EASTL/internal/char_traits.h
+++ b/libkram/eastl/include/EASTL/internal/char_traits.h
@@ -11,12 +11,7 @@
 // http://en.cppreference.com/w/cpp/string/char_traits
 ///////////////////////////////////////////////////////////////////////////////
 
-#ifndef EASTL_CHAR_TRAITS_H
-#define EASTL_CHAR_TRAITS_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/type_traits.h>
@@ -460,5 +455,3 @@ namespace eastl
 		return pDestination;
 	}
 } // namespace eastl
-
-#endif // EASTL_CHAR_TRAITS_H
diff --git a/libkram/eastl/include/EASTL/internal/config.h b/libkram/eastl/include/EASTL/internal/config.h
index 530bbc87..4b3b52f7 100644
--- a/libkram/eastl/include/EASTL/internal/config.h
+++ b/libkram/eastl/include/EASTL/internal/config.h
@@ -3,8 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_INTERNAL_CONFIG_H
-#define EASTL_INTERNAL_CONFIG_H
+#pragma once
 
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -57,16 +56,13 @@
 //
 ///////////////////////////////////////////////////////////////////////////////
 
+#pragma once
+
 #ifndef EASTL_EABASE_DISABLED
 	#include <EABase/eabase.h>
 #endif
 #include <EABase/eahave.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
-
 ///////////////////////////////////////////////////////////////////////////////
 // EASTL_VERSION
 //
@@ -1652,11 +1648,11 @@ typedef EASTL_SSIZE_T eastl_ssize_t; // Signed version of eastl_size_t. Concept
 // Identifies the minimum alignment that EASTL should assume system allocations
 // from malloc and new will have.
 #if !defined(EASTL_SYSTEM_ALLOCATOR_MIN_ALIGNMENT)
-	#if defined(EA_PLATFORM_MICROSOFT) || defined(EA_PLATFORM_APPLE)
+	//#if defined(EA_PLATFORM_MICROSOFT) || defined(EA_PLATFORM_APPLE)
 		#define EASTL_SYSTEM_ALLOCATOR_MIN_ALIGNMENT 16
-	#else
-		#define EASTL_SYSTEM_ALLOCATOR_MIN_ALIGNMENT (EA_PLATFORM_PTR_SIZE * 2)
-	#endif
+	//#else
+	//	#define EASTL_SYSTEM_ALLOCATOR_MIN_ALIGNMENT (EA_PLATFORM_PTR_SIZE * 2)
+	//#endif
 #endif
 
 
@@ -1872,6 +1868,3 @@ typedef EASTL_SSIZE_T eastl_ssize_t; // Signed version of eastl_size_t. Concept
 #else
 	#define EASTL_SYSTEM_LITTLE_ENDIAN_STATEMENT(...)
 #endif
-
-
-#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/internal/copy_help.h b/libkram/eastl/include/EASTL/internal/copy_help.h
index e5fb2abd..c0cf0919 100644
--- a/libkram/eastl/include/EASTL/internal/copy_help.h
+++ b/libkram/eastl/include/EASTL/internal/copy_help.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_INTERNAL_COPY_HELP_H
-#define EASTL_INTERNAL_COPY_HELP_H
-
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/type_traits.h>
@@ -197,7 +191,6 @@ namespace eastl
 	}
 } // namespace eastl
 
-#endif // Header include guard
 
 
diff --git a/libkram/eastl/include/EASTL/internal/enable_shared.h b/libkram/eastl/include/EASTL/internal/enable_shared.h
index ac5f0729..bed68d62 100644
--- a/libkram/eastl/include/EASTL/internal/enable_shared.h
+++ b/libkram/eastl/include/EASTL/internal/enable_shared.h
@@ -3,14 +3,9 @@
 /////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_INTERNAL_ENABLE_SHARED_H
-#define EASTL_INTERNAL_ENABLE_SHARED_H
-
+#pragma once
 
 #include <EABase/eabase.h>
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
 
 namespace eastl
 {
@@ -74,7 +69,6 @@ namespace eastl
 } // namespace eastl
 
 
-#endif // Header include guard
 
 
diff --git a/libkram/eastl/include/EASTL/internal/fill_help.h b/libkram/eastl/include/EASTL/internal/fill_help.h
index 235a24ee..cbb57489 100644
--- a/libkram/eastl/include/EASTL/internal/fill_help.h
+++ b/libkram/eastl/include/EASTL/internal/fill_help.h
@@ -3,13 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_INTERNAL_FILL_HELP_H
-#define EASTL_INTERNAL_FILL_HELP_H
-
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 #include <EASTL/internal/config.h>
 
@@ -466,7 +460,6 @@ namespace eastl
 
 } // namespace eastl
 
-#endif // Header include guard
 
 
diff --git a/libkram/eastl/include/EASTL/internal/fixed_pool.h b/libkram/eastl/include/EASTL/internal/fixed_pool.h
index 5a380046..69e89865 100644
--- a/libkram/eastl/include/EASTL/internal/fixed_pool.h
+++ b/libkram/eastl/include/EASTL/internal/fixed_pool.h
@@ -15,14 +15,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_INTERNAL_FIXED_POOL_H
-#define EASTL_INTERNAL_FIXED_POOL_H
-
-
-#include <EABase/eabase.h>
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/functional.h>
@@ -1626,6 +1619,3 @@ namespace eastl
 
 
 EA_RESTORE_VC_WARNING();
-
-
-#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/internal/function.h b/libkram/eastl/include/EASTL/internal/function.h
index 6e857f0b..e671aaf4 100644
--- a/libkram/eastl/include/EASTL/internal/function.h
+++ b/libkram/eastl/include/EASTL/internal/function.h
@@ -2,12 +2,7 @@
 // Copyright (c) Electronic Arts Inc. All rights reserved.
 /////////////////////////////////////////////////////////////////////////////
 
-#ifndef EASTL_FUNCTION_H
-#define EASTL_FUNCTION_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 #include <EASTL/internal/function_detail.h>
 
@@ -157,5 +152,3 @@ namespace eastl
 	}
 
 } // namespace eastl
-
-#endif // EASTL_FUNCTION_H
diff --git a/libkram/eastl/include/EASTL/internal/function_detail.h b/libkram/eastl/include/EASTL/internal/function_detail.h
index dc18b631..dbe99b54 100644
--- a/libkram/eastl/include/EASTL/internal/function_detail.h
+++ b/libkram/eastl/include/EASTL/internal/function_detail.h
@@ -2,12 +2,7 @@
 // Copyright (c) Electronic Arts Inc. All rights reserved.
 ///////////////////////////////////////////////////////////////////////////////
 
-#ifndef EASTL_FUNCTION_DETAIL_H
-#define EASTL_FUNCTION_DETAIL_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 #include <EABase/eabase.h>
 #include <EABase/nullptr.h>
@@ -669,5 +664,3 @@ namespace eastl
 	} // namespace internal
 
 } // namespace eastl
-
-#endif // EASTL_FUNCTION_DETAIL_H
diff --git a/libkram/eastl/include/EASTL/internal/function_help.h b/libkram/eastl/include/EASTL/internal/function_help.h
index 04481d37..0d080f21 100644
--- a/libkram/eastl/include/EASTL/internal/function_help.h
+++ b/libkram/eastl/include/EASTL/internal/function_help.h
@@ -2,12 +2,7 @@
 // Copyright (c) Electronic Arts Inc. All rights reserved.
 /////////////////////////////////////////////////////////////////////////////
 
-#ifndef EASTL_INTERNAL_FUNCTION_HELP_H
-#define EASTL_INTERNAL_FUNCTION_HELP_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/type_traits.h>
@@ -47,5 +42,3 @@ namespace eastl
 	} // namespace internal
 } // namespace eastl
 
-#endif // Header include guard
-
diff --git a/libkram/eastl/include/EASTL/internal/functional_base.h b/libkram/eastl/include/EASTL/internal/functional_base.h
index a7d2dc91..c141f022 100644
--- a/libkram/eastl/include/EASTL/internal/functional_base.h
+++ b/libkram/eastl/include/EASTL/internal/functional_base.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_INTERNAL_FUNCTIONAL_BASE_H
-#define EASTL_INTERNAL_FUNCTIONAL_BASE_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/internal/memory_base.h>
@@ -386,4 +381,3 @@ namespace eastl
 
 } // namespace eastl
 
-#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/internal/generic_iterator.h b/libkram/eastl/include/EASTL/internal/generic_iterator.h
index b32998a8..1185650f 100644
--- a/libkram/eastl/include/EASTL/internal/generic_iterator.h
+++ b/libkram/eastl/include/EASTL/internal/generic_iterator.h
@@ -10,14 +10,9 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_INTERNAL_GENERIC_ITERATOR_H
-#define EASTL_INTERNAL_GENERIC_ITERATOR_H
-
+#pragma once
 
 #include <EABase/eabase.h>
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
 
 #include <EASTL/internal/config.h>
 #include <EASTL/iterator.h>
@@ -205,4 +200,4 @@ namespace eastl
 EA_RESTORE_VC_WARNING();
 
 
-#endif // Header include guard
+
diff --git a/libkram/eastl/include/EASTL/internal/hashtable.h b/libkram/eastl/include/EASTL/internal/hashtable.h
index bb6d27eb..465d514c 100644
--- a/libkram/eastl/include/EASTL/internal/hashtable.h
+++ b/libkram/eastl/include/EASTL/internal/hashtable.h
@@ -23,15 +23,9 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_INTERNAL_HASHTABLE_H
-#define EASTL_INTERNAL_HASHTABLE_H
-
+#pragma once
 
 #include <EABase/eabase.h>
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
 #include <EASTL/internal/config.h>
 #include <EASTL/type_traits.h>
 #include <EASTL/allocator.h>
@@ -3217,6 +3211,3 @@ namespace eastl
 
 
 EA_RESTORE_VC_WARNING();
-
-
-#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/internal/in_place_t.h b/libkram/eastl/include/EASTL/internal/in_place_t.h
index 79acd184..374161ce 100644
--- a/libkram/eastl/include/EASTL/internal/in_place_t.h
+++ b/libkram/eastl/include/EASTL/internal/in_place_t.h
@@ -3,14 +3,9 @@
 /////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_INTERNAL_IN_PLACE_T_H
-#define EASTL_INTERNAL_IN_PLACE_T_H
-
+#pragma once
 
 #include <EABase/eabase.h>
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
 
 namespace eastl
 {
@@ -73,9 +68,6 @@ namespace eastl
 } // namespace eastl
 
 
-#endif // Header include guard
-
-
 
 
diff --git a/libkram/eastl/include/EASTL/internal/integer_sequence.h b/libkram/eastl/include/EASTL/internal/integer_sequence.h
index 88cf1b1b..8818c5cc 100644
--- a/libkram/eastl/include/EASTL/internal/integer_sequence.h
+++ b/libkram/eastl/include/EASTL/internal/integer_sequence.h
@@ -2,8 +2,7 @@
 // Copyright (c) Electronic Arts Inc. All rights reserved.
 /////////////////////////////////////////////////////////////////////////////
 
-#ifndef EASTL_INTEGER_SEQUENCE_H
-#define EASTL_INTEGER_SEQUENCE_H
+#pragma once
 
 #include <EABase/config/eacompiler.h>
 #include <EASTL/internal/config.h>
@@ -71,4 +70,3 @@ using index_sequence_for = make_index_sequence<sizeof...(T)>;
 
 }  // namespace eastl
 
-#endif  // EASTL_INTEGER_SEQUENCE_H
diff --git a/libkram/eastl/include/EASTL/internal/intrusive_hashtable.h b/libkram/eastl/include/EASTL/internal/intrusive_hashtable.h
index dccca5b1..7991b003 100644
--- a/libkram/eastl/include/EASTL/internal/intrusive_hashtable.h
+++ b/libkram/eastl/include/EASTL/internal/intrusive_hashtable.h
@@ -12,14 +12,9 @@
 
 
-#ifndef EASTL_INTERNAL_INTRUSIVE_HASHTABLE_H
-#define EASTL_INTERNAL_INTRUSIVE_HASHTABLE_H
-
+#pragma once
 
 #include <EABase/eabase.h>
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
 
 #include <EASTL/internal/config.h>
 #include <EASTL/internal/hashtable.h>
@@ -983,7 +978,3 @@ namespace eastl
 
 
 } // namespace eastl
-
-
-
-#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/internal/mem_fn.h b/libkram/eastl/include/EASTL/internal/mem_fn.h
index 1d3e7b3f..2655867e 100644
--- a/libkram/eastl/include/EASTL/internal/mem_fn.h
+++ b/libkram/eastl/include/EASTL/internal/mem_fn.h
@@ -3,12 +3,7 @@
 /////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_INTERNAL_MEM_FN_H
-#define EASTL_INTERNAL_MEM_FN_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
 #pragma once
-#endif
 
 ////////////////////////////////////////////////////////////////////////////////
 // The code in this file is a modification of the libcxx implementation.  We copy
@@ -300,5 +295,3 @@ namespace eastl
 	{ return mem_fn_impl<R (T::*)(A0, A1, A2) const volatile>(pm); }
 
 } // namespace eastl
-
-#endif // EASTL_INTERNAL_MEM_FN_H
diff --git a/libkram/eastl/include/EASTL/internal/memory_base.h b/libkram/eastl/include/EASTL/internal/memory_base.h
index b1c3490b..1f825b61 100644
--- a/libkram/eastl/include/EASTL/internal/memory_base.h
+++ b/libkram/eastl/include/EASTL/internal/memory_base.h
@@ -2,16 +2,10 @@
 // Copyright (c) Electronic Arts Inc. All rights reserved.
 /////////////////////////////////////////////////////////////////////////////
 
-#ifndef EASTL_INTERNAL_MEMORY_BASE_H
-#define EASTL_INTERNAL_MEMORY_BASE_H
+#pragma once
 
 #include <EASTL/internal/config.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
 ////////////////////////////////////////////////////////////////////////////////////////////
 // This file contains basic functionality found in the standard library 'memory' header that
 // have limited or no dependencies.  This allows us to utilize these utilize these functions
@@ -33,5 +27,3 @@ namespace eastl
 
 } // namespace eastl
 
-#endif // EASTL_INTERNAL_MEMORY_BASE_H
-
diff --git a/libkram/eastl/include/EASTL/internal/move_help.h b/libkram/eastl/include/EASTL/internal/move_help.h
index 97990df6..52be00b5 100644
--- a/libkram/eastl/include/EASTL/internal/move_help.h
+++ b/libkram/eastl/include/EASTL/internal/move_help.h
@@ -3,15 +3,9 @@
 /////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_INTERNAL_MOVE_HELP_H
-#define EASTL_INTERNAL_MOVE_HELP_H
-
+#pragma once
 
 #include <EABase/eabase.h>
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
 #include <EASTL/internal/config.h>
 #include <EASTL/type_traits.h>
 
@@ -144,8 +138,6 @@ namespace eastl
 
 } // namespace eastl
 
-#endif // Header include guard
-
 
 
diff --git a/libkram/eastl/include/EASTL/internal/pair_fwd_decls.h b/libkram/eastl/include/EASTL/internal/pair_fwd_decls.h
index a716482d..d84c9dbb 100644
--- a/libkram/eastl/include/EASTL/internal/pair_fwd_decls.h
+++ b/libkram/eastl/include/EASTL/internal/pair_fwd_decls.h
@@ -2,8 +2,7 @@
 // Copyright (c) Electronic Arts Inc. All rights reserved.
 /////////////////////////////////////////////////////////////////////////////
 
-#ifndef EASTL_PAIR_FWD_DECLS_H
-#define EASTL_PAIR_FWD_DECLS_H
+#pragma once
 
 #include <EASTL/internal/config.h>
 
@@ -12,5 +11,3 @@ namespace eastl
 	template <typename T1, typename T2>
 	struct pair;
 }
-
-#endif // EASTL_PAIR_FWD_DECLS_H
diff --git a/libkram/eastl/include/EASTL/internal/piecewise_construct_t.h b/libkram/eastl/include/EASTL/internal/piecewise_construct_t.h
index d853f0ea..ca59610d 100644
--- a/libkram/eastl/include/EASTL/internal/piecewise_construct_t.h
+++ b/libkram/eastl/include/EASTL/internal/piecewise_construct_t.h
@@ -3,14 +3,9 @@
 /////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_INTERNAL_PIECEWISE_CONSTRUCT_T_H
-#define EASTL_INTERNAL_PIECEWISE_CONSTRUCT_T_H
-
+#pragma once
 
 #include <EABase/eabase.h>
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
 
 namespace eastl
 {
@@ -37,9 +32,6 @@ namespace eastl
 } // namespace eastl
 
 
-#endif // Header include guard
-
-
 
 
diff --git a/libkram/eastl/include/EASTL/internal/red_black_tree.h b/libkram/eastl/include/EASTL/internal/red_black_tree.h
index 7448bd42..7cd118a2 100644
--- a/libkram/eastl/include/EASTL/internal/red_black_tree.h
+++ b/libkram/eastl/include/EASTL/internal/red_black_tree.h
@@ -3,15 +3,9 @@
 /////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_RED_BLACK_TREE_H
-#define EASTL_RED_BLACK_TREE_H
-
+#pragma once
 
 #include <EABase/eabase.h>
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
 #include <EASTL/internal/config.h>
 #include <EASTL/type_traits.h>
 #include <EASTL/allocator.h>
@@ -2395,6 +2389,3 @@ namespace eastl
 
 
 EA_RESTORE_VC_WARNING();
-
-
-#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/internal/smart_ptr.h b/libkram/eastl/include/EASTL/internal/smart_ptr.h
index f1d52e1b..309ab46e 100644
--- a/libkram/eastl/include/EASTL/internal/smart_ptr.h
+++ b/libkram/eastl/include/EASTL/internal/smart_ptr.h
@@ -3,16 +3,11 @@
 /////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_INTERNAL_SMART_PTR_H
-#define EASTL_INTERNAL_SMART_PTR_H
-
+#pragma once
 
 #include <EABase/eabase.h>
 #include <EASTL/type_traits.h>
 #include <EASTL/memory.h>
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
 
 
 namespace eastl
@@ -244,9 +239,6 @@ namespace eastl
 } // namespace eastl
 
 
-#endif // Header include guard
-
-
 
 
diff --git a/libkram/eastl/include/EASTL/internal/thread_support.h b/libkram/eastl/include/EASTL/internal/thread_support.h
index 80386d20..38b16061 100644
--- a/libkram/eastl/include/EASTL/internal/thread_support.h
+++ b/libkram/eastl/include/EASTL/internal/thread_support.h
@@ -3,14 +3,9 @@
 /////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_INTERNAL_THREAD_SUPPORT_H
-#define EASTL_INTERNAL_THREAD_SUPPORT_H
-
+#pragma once
 
 #include <EABase/eabase.h>
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
 #include <EASTL/internal/config.h>
 
 /////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -239,6 +234,3 @@ namespace eastl
 
 
 EA_RESTORE_VC_WARNING();
-
-
-#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/internal/tuple_fwd_decls.h b/libkram/eastl/include/EASTL/internal/tuple_fwd_decls.h
index a2c773cd..c257f0b3 100644
--- a/libkram/eastl/include/EASTL/internal/tuple_fwd_decls.h
+++ b/libkram/eastl/include/EASTL/internal/tuple_fwd_decls.h
@@ -2,8 +2,7 @@
 // Copyright (c) Electronic Arts Inc. All rights reserved.
 /////////////////////////////////////////////////////////////////////////////
 
-#ifndef EASTL_TUPLE_FWD_DECLS_H
-#define EASTL_TUPLE_FWD_DECLS_H
+#pragma once
 
 #include <EASTL/internal/config.h>
 
@@ -52,5 +51,3 @@ namespace eastl
 }
 
 #endif  // EASTL_VARIADIC_TEMPLATES_ENABLED
-
-#endif  // EASTL_TUPLE_FWD_DECLS_H
diff --git a/libkram/eastl/include/EASTL/internal/type_compound.h b/libkram/eastl/include/EASTL/internal/type_compound.h
index 178a7342..93d392a0 100644
--- a/libkram/eastl/include/EASTL/internal/type_compound.h
+++ b/libkram/eastl/include/EASTL/internal/type_compound.h
@@ -3,14 +3,9 @@
 /////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_INTERNAL_TYPE_COMPOUND_H
-#define EASTL_INTERNAL_TYPE_COMPOUND_H
-
+#pragma once
 
 #include <EABase/eabase.h>
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
 
 
 // Until we revise the code below to handle EDG warnings, we don't have much choice but to disable them.
@@ -793,8 +788,5 @@ namespace eastl
 } // namespace eastl
 
 
-#endif // Header include guard
-
-
 
 
diff --git a/libkram/eastl/include/EASTL/internal/type_fundamental.h b/libkram/eastl/include/EASTL/internal/type_fundamental.h
index 950d15e3..5916ed01 100644
--- a/libkram/eastl/include/EASTL/internal/type_fundamental.h
+++ b/libkram/eastl/include/EASTL/internal/type_fundamental.h
@@ -3,17 +3,11 @@
 /////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_INTERNAL_TYPE_FUNDAMENTAL_H
-#define EASTL_INTERNAL_TYPE_FUNDAMENTAL_H
-
+#pragma once
 
 #include <EABase/eabase.h>
 #include <EABase/nullptr.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
 namespace eastl
 {
 
@@ -265,7 +259,6 @@ namespace eastl
 } // namespace eastl
 
 
-#endif // Header include guard
 
 
diff --git a/libkram/eastl/include/EASTL/internal/type_pod.h b/libkram/eastl/include/EASTL/internal/type_pod.h
index 8726a7e6..439f029a 100644
--- a/libkram/eastl/include/EASTL/internal/type_pod.h
+++ b/libkram/eastl/include/EASTL/internal/type_pod.h
@@ -3,15 +3,9 @@
 /////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_INTERNAL_TYPE_POD_H
-#define EASTL_INTERNAL_TYPE_POD_H
-
+#pragma once
 
 #include <EABase/eabase.h>
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
 #include <limits.h>
 #include <EASTL/type_traits.h>
 
@@ -1940,6 +1934,3 @@ namespace eastl
 
 
 } // namespace eastl
-
-
-#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/internal/type_properties.h b/libkram/eastl/include/EASTL/internal/type_properties.h
index 5276f878..ccafed90 100644
--- a/libkram/eastl/include/EASTL/internal/type_properties.h
+++ b/libkram/eastl/include/EASTL/internal/type_properties.h
@@ -3,15 +3,9 @@
 /////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_INTERNAL_TYPE_PROPERTIES_H
-#define EASTL_INTERNAL_TYPE_PROPERTIES_H
-
+#pragma once
 
 #include <EABase/eabase.h>
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
 #include <limits.h>
 #include <EASTL/internal/type_compound.h>
 
@@ -375,6 +369,3 @@ namespace eastl
 	#endif
 
 } // namespace eastl
-
-
-#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/internal/type_transformations.h b/libkram/eastl/include/EASTL/internal/type_transformations.h
index cffa65e5..bd4148ab 100644
--- a/libkram/eastl/include/EASTL/internal/type_transformations.h
+++ b/libkram/eastl/include/EASTL/internal/type_transformations.h
@@ -3,14 +3,9 @@
 /////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_INTERNAL_TYPE_TRANFORMATIONS_H
-#define EASTL_INTERNAL_TYPE_TRANFORMATIONS_H
-
+#pragma once
 
 #include <EABase/eabase.h>
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
 
 #include <limits.h>
 
@@ -582,8 +577,6 @@ namespace eastl
 } // namespace eastl
 
 
-#endif // Header include guard
-
 
 
diff --git a/libkram/eastl/include/EASTL/intrusive_hash_map.h b/libkram/eastl/include/EASTL/intrusive_hash_map.h
index 37f16188..1320fae2 100644
--- a/libkram/eastl/include/EASTL/intrusive_hash_map.h
+++ b/libkram/eastl/include/EASTL/intrusive_hash_map.h
@@ -2,21 +2,13 @@
 // Copyright (c) Electronic Arts Inc. All rights reserved.
 ///////////////////////////////////////////////////////////////////////////////
 
-#ifndef EASTL_INTRUSIVE_HASH_MAP_H
-#define EASTL_INTRUSIVE_HASH_MAP_H
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/internal/intrusive_hashtable.h>
 #include <EASTL/functional.h>
 #include <EASTL/utility.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
-
 namespace eastl
 {
 
@@ -89,8 +81,6 @@ namespace eastl
 } // namespace eastl
 
 
-#endif // Header include guard
-
 
 
diff --git a/libkram/eastl/include/EASTL/intrusive_hash_set.h b/libkram/eastl/include/EASTL/intrusive_hash_set.h
index a25d03a6..36745e81 100644
--- a/libkram/eastl/include/EASTL/intrusive_hash_set.h
+++ b/libkram/eastl/include/EASTL/intrusive_hash_set.h
@@ -2,21 +2,13 @@
 // Copyright (c) Electronic Arts Inc. All rights reserved.
 ///////////////////////////////////////////////////////////////////////////////
 
-#ifndef EASTL_INTRUSIVE_HASH_SET_H
-#define EASTL_INTRUSIVE_HASH_SET_H
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/internal/intrusive_hashtable.h>
 #include <EASTL/functional.h>
 #include <EASTL/utility.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
-
 namespace eastl
 {
 
@@ -85,8 +77,6 @@ namespace eastl
 } // namespace eastl
 
 
-#endif // Header include guard
-
 
 
diff --git a/libkram/eastl/include/EASTL/intrusive_list.h b/libkram/eastl/include/EASTL/intrusive_list.h
index 18d7e93a..dfb7a051 100644
--- a/libkram/eastl/include/EASTL/intrusive_list.h
+++ b/libkram/eastl/include/EASTL/intrusive_list.h
@@ -77,20 +77,12 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_INTRUSIVE_LIST_H
-#define EASTL_INTRUSIVE_LIST_H
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/iterator.h>
 #include <EASTL/algorithm.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
-
 namespace eastl
 {
 
@@ -1297,9 +1289,6 @@ namespace eastl
 } // namespace eastl
 
 
-#endif // Header include guard
-
-
 
 
diff --git a/libkram/eastl/include/EASTL/intrusive_ptr.h b/libkram/eastl/include/EASTL/intrusive_ptr.h
index af4e686f..7a4f8c88 100644
--- a/libkram/eastl/include/EASTL/intrusive_ptr.h
+++ b/libkram/eastl/include/EASTL/intrusive_ptr.h
@@ -3,19 +3,11 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_INTRUSIVE_PTR_H
-#define EASTL_INTRUSIVE_PTR_H
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <stddef.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
-
 namespace eastl
 {
 	// We provide default implementations of AddRef and Release in the eastl namespace.
@@ -398,8 +390,6 @@ namespace eastl
 } // namespace eastl
 
 
-#endif // Header include guard
-
 
 
diff --git a/libkram/eastl/include/EASTL/iterator.h b/libkram/eastl/include/EASTL/iterator.h
index d2dc8993..c4cbfb58 100644
--- a/libkram/eastl/include/EASTL/iterator.h
+++ b/libkram/eastl/include/EASTL/iterator.h
@@ -3,9 +3,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_ITERATOR_H
-#define EASTL_ITERATOR_H
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/internal/move_help.h>
@@ -32,12 +30,6 @@ EA_RESTORE_ALL_VC_WARNINGS();
 EA_DISABLE_VC_WARNING(4619); // There is no warning number 'number'.
 EA_DISABLE_VC_WARNING(4217); // Member template functions cannot be used for copy-assignment or copy-construction.
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
-
 namespace eastl
 {
 	/// iterator_status_flag
@@ -1189,4 +1181,3 @@ namespace eastl
 EA_RESTORE_VC_WARNING();
 EA_RESTORE_VC_WARNING();
 
-#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/linked_array.h b/libkram/eastl/include/EASTL/linked_array.h
index 88d99146..0f48daec 100644
--- a/libkram/eastl/include/EASTL/linked_array.h
+++ b/libkram/eastl/include/EASTL/linked_array.h
@@ -8,20 +8,13 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_LINKED_ARRAY_H
-#define EASTL_LINKED_ARRAY_H
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/internal/smart_ptr.h>   // Defines smart_array_deleter
 #include <EASTL/linked_ptr.h>           // Defines linked_ptr_base 
 #include <stddef.h>                     // Definition of ptrdiff_t
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
 
 namespace eastl
 {
@@ -324,8 +317,6 @@ namespace eastl
 } // namespace eastl
 
 
-#endif // Header include guard
-
 
 
diff --git a/libkram/eastl/include/EASTL/linked_ptr.h b/libkram/eastl/include/EASTL/linked_ptr.h
index f57681a9..8fb7cb3b 100644
--- a/libkram/eastl/include/EASTL/linked_ptr.h
+++ b/libkram/eastl/include/EASTL/linked_ptr.h
@@ -3,21 +3,13 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_LINKED_PTR_H
-#define EASTL_LINKED_PTR_H
-
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/internal/smart_ptr.h>   // Defines smart_ptr_deleter
 #include <EASTL/allocator.h>
 #include <stddef.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
 
 namespace eastl
 {
@@ -403,9 +395,6 @@ namespace eastl
 } // namespace eastl
 
 
-#endif // Header include guard
-
-
 
 
diff --git a/libkram/eastl/include/EASTL/list.h b/libkram/eastl/include/EASTL/list.h
index 680dcad7..fe7b6fae 100644
--- a/libkram/eastl/include/EASTL/list.h
+++ b/libkram/eastl/include/EASTL/list.h
@@ -31,9 +31,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_LIST_H
-#define EASTL_LIST_H
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/allocator.h>
@@ -56,12 +54,6 @@ EA_RESTORE_ALL_VC_WARNINGS()
 EA_DISABLE_VC_WARNING(4530 4345 4571 4623);
 
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
-
 namespace eastl
 {
 
@@ -2163,6 +2155,3 @@ namespace eastl
 EA_RESTORE_SN_WARNING()
 
 EA_RESTORE_VC_WARNING();
-
-
-#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/map.h b/libkram/eastl/include/EASTL/map.h
index 0e6c1d0f..25ca0ec3 100644
--- a/libkram/eastl/include/EASTL/map.h
+++ b/libkram/eastl/include/EASTL/map.h
@@ -3,21 +3,13 @@
 //////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_MAP_H
-#define EASTL_MAP_H
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/internal/red_black_tree.h>
 #include <EASTL/functional.h>
 #include <EASTL/utility.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
-
 namespace eastl
 {
 
@@ -677,8 +669,5 @@ namespace eastl
 } // namespace eastl
 
 
-#endif // Header include guard
-
-
 
 
diff --git a/libkram/eastl/include/EASTL/memory.h b/libkram/eastl/include/EASTL/memory.h
index cf24b41a..05813f2d 100644
--- a/libkram/eastl/include/EASTL/memory.h
+++ b/libkram/eastl/include/EASTL/memory.h
@@ -63,9 +63,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_MEMORY_H
-#define EASTL_MEMORY_H
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/internal/memory_base.h>
@@ -91,11 +89,6 @@ EA_RESTORE_ALL_VC_WARNINGS()
 EA_DISABLE_VC_WARNING(4530 4146 4571);
 
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
 namespace eastl
 {
 
@@ -1680,6 +1673,3 @@ namespace eastl
 
 
 EA_RESTORE_VC_WARNING();
-
-
-#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/meta.h b/libkram/eastl/include/EASTL/meta.h
index 09880b7a..8901f5a6 100644
--- a/libkram/eastl/include/EASTL/meta.h
+++ b/libkram/eastl/include/EASTL/meta.h
@@ -2,16 +2,11 @@
 // Copyright (c) Electronic Arts Inc. All rights reserved.
 /////////////////////////////////////////////////////////////////////////////
 
-#ifndef EASTL_META_H
-#define EASTL_META_H
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/type_traits.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
 ////////////////////////////////////////////////////////////////////////////////////////////
 // This file contains meta programming utilities that are internal to EASTL. We reserve
 // the right to change this file at any time as it is only intended to be used internally.
@@ -218,5 +213,4 @@ namespace eastl
 	} // namespace meta 
 } // namespace eastl
 
-#endif // EASTL_META_H
 
diff --git a/libkram/eastl/include/EASTL/numeric.h b/libkram/eastl/include/EASTL/numeric.h
index 4b83c945..a43dff1b 100644
--- a/libkram/eastl/include/EASTL/numeric.h
+++ b/libkram/eastl/include/EASTL/numeric.h
@@ -8,18 +8,11 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_NUMERIC_H
-#define EASTL_NUMERIC_H
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/iterator.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
 
 namespace eastl
 {
@@ -237,9 +230,6 @@ namespace eastl
 } // namespace eastl
 
 
-#endif // Header include guard
-
-
 
 
diff --git a/libkram/eastl/include/EASTL/numeric_limits.h b/libkram/eastl/include/EASTL/numeric_limits.h
index c2770c9e..284f6699 100644
--- a/libkram/eastl/include/EASTL/numeric_limits.h
+++ b/libkram/eastl/include/EASTL/numeric_limits.h
@@ -29,9 +29,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_NUMERIC_LIMITS_H
-#define EASTL_NUMERIC_LIMITS_H
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/type_traits.h>
@@ -41,11 +39,6 @@
 	#include <ymath.h>
 #endif
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
 // Disable Warnings:
 //   4310 - cast truncates constant value
 //   4296 - expression is always false
@@ -1692,9 +1685,6 @@ namespace eastl
 EA_RESTORE_VC_WARNING()
 
 
-#endif // Header include guard
-
-
 
 
diff --git a/libkram/eastl/include/EASTL/optional.h b/libkram/eastl/include/EASTL/optional.h
index 763bfd88..629d7969 100644
--- a/libkram/eastl/include/EASTL/optional.h
+++ b/libkram/eastl/include/EASTL/optional.h
@@ -25,8 +25,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_OPTIONAL_H
-#define EASTL_OPTIONAL_H
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/initializer_list.h>
@@ -705,4 +704,3 @@ namespace eastl
 EA_RESTORE_VC_WARNING()
 
 #endif  // EASTL_OPTIONAL_ENABLED
-#endif  // EASTL_OPTIONAL_H
diff --git a/libkram/eastl/include/EASTL/priority_queue.h b/libkram/eastl/include/EASTL/priority_queue.h
index ade625aa..7477a67f 100644
--- a/libkram/eastl/include/EASTL/priority_queue.h
+++ b/libkram/eastl/include/EASTL/priority_queue.h
@@ -17,10 +17,7 @@
 //
 ///////////////////////////////////////////////////////////////////////////////
 
-
-#ifndef EASTL_PRIORITY_QUEUE_H
-#define EASTL_PRIORITY_QUEUE_H
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/vector.h>
@@ -33,12 +30,6 @@
 // 4571 - catch(...) semantics changed since Visual C++ 7.1; structured exceptions (SEH) are no longer caught.
 EA_DISABLE_VC_WARNING(4530 4571);
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
-
 namespace eastl
 {
 
@@ -486,6 +477,3 @@ namespace eastl
 
 
 EA_RESTORE_VC_WARNING();
-
-
-#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/queue.h b/libkram/eastl/include/EASTL/queue.h
index 9e06e207..e435d826 100644
--- a/libkram/eastl/include/EASTL/queue.h
+++ b/libkram/eastl/include/EASTL/queue.h
@@ -9,21 +9,13 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_QUEUE_H
-#define EASTL_QUEUE_H
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/deque.h>
 #include <EASTL/initializer_list.h>
 #include <stddef.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
-
 namespace eastl
 {
 
@@ -350,9 +342,6 @@ namespace eastl
 } // namespace eastl
 
 
-#endif // Header include guard
-
-
 
 
diff --git a/libkram/eastl/include/EASTL/random.h b/libkram/eastl/include/EASTL/random.h
index ca3e20b0..221258db 100644
--- a/libkram/eastl/include/EASTL/random.h
+++ b/libkram/eastl/include/EASTL/random.h
@@ -7,13 +7,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_RANDOM_H
-#define EASTL_RANDOM_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/numeric_limits.h>
@@ -244,8 +238,6 @@ namespace eastl
 } // namespace eastl
 
 
-#endif // Header include guard
-
 
 
diff --git a/libkram/eastl/include/EASTL/ratio.h b/libkram/eastl/include/EASTL/ratio.h
index da1a7b10..f6716205 100644
--- a/libkram/eastl/include/EASTL/ratio.h
+++ b/libkram/eastl/include/EASTL/ratio.h
@@ -14,12 +14,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_RATIO_H
-#define EASTL_RATIO_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once 
-#endif
+#pragma once
 
 #include <EABase/eabase.h>
 
@@ -316,5 +311,3 @@ namespace eastl
 	// typedef ratio<1000000000000000000000, 1   > zetta;  // not supported, too big for intmax_t
 	// typedef ratio<1000000000000000000000000, 1> yotta;  // not supported, too big for intmax_t
 }
-
-#endif // EASTL_RATIO_H
diff --git a/libkram/eastl/include/EASTL/safe_ptr.h b/libkram/eastl/include/EASTL/safe_ptr.h
index 344ded8b..57dbabef 100644
--- a/libkram/eastl/include/EASTL/safe_ptr.h
+++ b/libkram/eastl/include/EASTL/safe_ptr.h
@@ -3,19 +3,11 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_SAFEPTR_H
-#define EASTL_SAFEPTR_H
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <stddef.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
-
 namespace eastl
 {
 	class safe_ptr_base;
@@ -467,8 +459,6 @@ inline bool operator<(const eastl::safe_ptr<T>& safePtrA, const eastl::safe_ptr<
 
 
-#endif // Header include guard
-
 
 
diff --git a/libkram/eastl/include/EASTL/scoped_array.h b/libkram/eastl/include/EASTL/scoped_array.h
index c955dbaf..9325a75f 100644
--- a/libkram/eastl/include/EASTL/scoped_array.h
+++ b/libkram/eastl/include/EASTL/scoped_array.h
@@ -10,19 +10,12 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_SCOPED_ARRAY_H
-#define EASTL_SCOPED_ARRAY_H
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/internal/smart_ptr.h>   // Defines smart_array_deleter
 #include <stddef.h>                     // Definition of ptrdiff_t
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
 
 namespace eastl
 {
@@ -226,9 +219,6 @@ namespace eastl
 } // namespace eastl
 
 
-#endif // Header include guard
-
-
 
 
diff --git a/libkram/eastl/include/EASTL/scoped_ptr.h b/libkram/eastl/include/EASTL/scoped_ptr.h
index 3ba01daa..f38c58cd 100644
--- a/libkram/eastl/include/EASTL/scoped_ptr.h
+++ b/libkram/eastl/include/EASTL/scoped_ptr.h
@@ -10,20 +10,12 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_SCOPED_PTR_H
-#define EASTL_SCOPED_PTR_H
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/internal/smart_ptr.h>   // Defines smart_ptr_deleter
 #include <stddef.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
-
 namespace eastl
 {
 	/// class scoped_ptr
@@ -243,9 +235,6 @@ namespace eastl
 } // namespace eastl
 
 
-#endif // Header include guard
-
-
 
 
diff --git a/libkram/eastl/include/EASTL/segmented_vector.h b/libkram/eastl/include/EASTL/segmented_vector.h
index d46a9428..8f6a7b20 100644
--- a/libkram/eastl/include/EASTL/segmented_vector.h
+++ b/libkram/eastl/include/EASTL/segmented_vector.h
@@ -2,12 +2,7 @@
 // Copyright (c) Electronic Arts Inc. All rights reserved.
 ///////////////////////////////////////////////////////////////////////////////
 
-#ifndef EASTL_SEGMENTED_VECTOR_H
-#define EASTL_SEGMENTED_VECTOR_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once 
-#endif
+#pragma once
 
 #include <EASTL/internal/config.h>
 
@@ -519,5 +514,3 @@ namespace eastl
         return a.mCurrent != b.mCurrent;
     }
 }
-
-#endif
diff --git a/libkram/eastl/include/EASTL/set.h b/libkram/eastl/include/EASTL/set.h
index a66a8853..d23ab6bd 100644
--- a/libkram/eastl/include/EASTL/set.h
+++ b/libkram/eastl/include/EASTL/set.h
@@ -3,20 +3,13 @@
 //////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_SET_H
-#define EASTL_SET_H
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/internal/red_black_tree.h>
 #include <EASTL/functional.h>
 #include <EASTL/utility.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
 
 namespace eastl
 {
@@ -631,9 +624,6 @@ namespace eastl
 } // namespace eastl
 
 
-#endif // Header include guard
-
-
 
 
diff --git a/libkram/eastl/include/EASTL/shared_array.h b/libkram/eastl/include/EASTL/shared_array.h
index b7d78408..fce4dfdc 100644
--- a/libkram/eastl/include/EASTL/shared_array.h
+++ b/libkram/eastl/include/EASTL/shared_array.h
@@ -54,9 +54,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_SHARED_ARRAY_H
-#define EASTL_SHARED_ARRAY_H
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/internal/smart_ptr.h>   // Defines smart_array_deleter
@@ -69,12 +67,6 @@ EA_DISABLE_ALL_VC_WARNINGS();
 
 EA_RESTORE_ALL_VC_WARNINGS();
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
-
 namespace eastl
 {
 
@@ -420,9 +412,6 @@ namespace eastl
 } // namespace eastl
 
 
-#endif // Header include guard
-
-
 
 
diff --git a/libkram/eastl/include/EASTL/shared_ptr.h b/libkram/eastl/include/EASTL/shared_ptr.h
index 5535adfc..6d0433cb 100644
--- a/libkram/eastl/include/EASTL/shared_ptr.h
+++ b/libkram/eastl/include/EASTL/shared_ptr.h
@@ -38,9 +38,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_SHARED_PTR_H
-#define EASTL_SHARED_PTR_H
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/internal/smart_ptr.h>
@@ -63,10 +61,6 @@ EA_RESTORE_ALL_VC_WARNINGS()
 EA_DISABLE_VC_WARNING(4530); // C++ exception handler used, but unwind semantics are not enabled. Specify /EHsc
 EA_DISABLE_VC_WARNING(4571); // catch(...) semantics changed since Visual C++ 7.1; structured exceptions (SEH) are no longer caught.
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
 
 
 namespace eastl
@@ -1692,5 +1686,3 @@ EA_RESTORE_VC_WARNING();
 // We have to either #include enable_shared.h here or we need to move the enable_shared source code to here.
 #include <EASTL/internal/enable_shared.h>
 
-
-#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/slist.h b/libkram/eastl/include/EASTL/slist.h
index 27966928..ef4bd642 100644
--- a/libkram/eastl/include/EASTL/slist.h
+++ b/libkram/eastl/include/EASTL/slist.h
@@ -20,9 +20,7 @@
 
 
-#ifndef EASTL_SLIST_H
-#define EASTL_SLIST_H
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/allocator.h>
@@ -49,12 +47,6 @@ EA_DISABLE_SN_WARNING(828); // The EDG SN compiler has a bug in its handling of
 EA_DISABLE_VC_WARNING(4530 4345 4571);
 
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
-
 namespace eastl
 {
 
@@ -1925,6 +1917,3 @@ namespace eastl
 EA_RESTORE_SN_WARNING()
 
 EA_RESTORE_VC_WARNING();
-
-
-#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/sort.h b/libkram/eastl/include/EASTL/sort.h
index 35d105d7..fdc7373d 100644
--- a/libkram/eastl/include/EASTL/sort.h
+++ b/libkram/eastl/include/EASTL/sort.h
@@ -38,9 +38,7 @@
 //////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_SORT_H
-#define EASTL_SORT_H
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/internal/move_help.h>
@@ -52,22 +50,16 @@
 #include <EASTL/allocator.h>
 #include <EASTL/memory.h>
 
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
 // EASTL_PLATFORM_PREFERRED_ALIGNMENT
 //
 // Allows for slightly faster buffers in some cases.
 //
 #if !defined(EASTL_PLATFORM_PREFERRED_ALIGNMENT)
-	#if defined(EA_PROCESSOR_ARM)
-		#define EASTL_PLATFORM_PREFERRED_ALIGNMENT 8
-	#else
+	//#if defined(EA_PROCESSOR_ARM)
+	//	#define EASTL_PLATFORM_PREFERRED_ALIGNMENT 8
+	//#else
 		#define EASTL_PLATFORM_PREFERRED_ALIGNMENT 16
-	#endif
+	//#endif
 #endif
 
 
@@ -2013,7 +2005,4 @@ namespace eastl
 } // namespace eastl
 
 
-#endif // Header include guard
-
-
 
diff --git a/libkram/eastl/include/EASTL/span.h b/libkram/eastl/include/EASTL/span.h
index 1f3b9b42..3f472dbf 100644
--- a/libkram/eastl/include/EASTL/span.h
+++ b/libkram/eastl/include/EASTL/span.h
@@ -16,12 +16,7 @@
 // http://eel.is/c++draft/views#span.syn
 ///////////////////////////////////////////////////////////////////////////////
 
-#ifndef EASTL_SPAN_H
-#define EASTL_SPAN_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once 
-#endif
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/type_traits.h>
@@ -423,5 +418,3 @@ namespace eastl
 		return (sz >= 0 && sz < size());
 	}
 }
-
-#endif // EASTL_SPAN_H  
diff --git a/libkram/eastl/include/EASTL/stack.h b/libkram/eastl/include/EASTL/stack.h
index 3edd5f54..09052a3b 100644
--- a/libkram/eastl/include/EASTL/stack.h
+++ b/libkram/eastl/include/EASTL/stack.h
@@ -9,21 +9,13 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_STACK_H
-#define EASTL_STACK_H
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/vector.h>
 #include <EASTL/initializer_list.h>
 #include <stddef.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
-
 namespace eastl
 {
 
@@ -330,8 +322,6 @@ namespace eastl
 } // namespace eastl
 
 
-#endif // Header include guard
-
 
 
diff --git a/libkram/eastl/include/EASTL/string.h b/libkram/eastl/include/EASTL/string.h
index 82816a42..a400c047 100644
--- a/libkram/eastl/include/EASTL/string.h
+++ b/libkram/eastl/include/EASTL/string.h
@@ -87,8 +87,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_STRING_H
-#define EASTL_STRING_H
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/allocator.h>
@@ -125,10 +124,6 @@ EA_RESTORE_ALL_VC_WARNINGS()
 EA_DISABLE_VC_WARNING(4530 4480 4571 4267 4702);
 
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
 
 #include <EASTL/internal/char_traits.h>
 #include <EASTL/string_view.h>
@@ -4095,6 +4090,3 @@ namespace eastl
 
 
 EA_RESTORE_VC_WARNING();
-
-
-#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/string_hash_map.h b/libkram/eastl/include/EASTL/string_hash_map.h
index 25bdfaf1..031a1eac 100644
--- a/libkram/eastl/include/EASTL/string_hash_map.h
+++ b/libkram/eastl/include/EASTL/string_hash_map.h
@@ -2,12 +2,7 @@
 // Copyright (c) Electronic Arts Inc. All rights reserved.
 ///////////////////////////////////////////////////////////////////////////////
 
-#ifndef EASTL_STRING_HASH_MAP_H
-#define EASTL_STRING_HASH_MAP_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once 
-#endif
+#pragma once
 
 #include <EASTL/hash_map.h>
 #include <EASTL/string.h>
@@ -185,5 +180,3 @@ string_hash_map<T, Hash, Predicate, Allocator>::strduplicate(const char* str)
 
 
 }
-
-#endif
diff --git a/libkram/eastl/include/EASTL/string_map.h b/libkram/eastl/include/EASTL/string_map.h
index b952e39d..761cb5a3 100644
--- a/libkram/eastl/include/EASTL/string_map.h
+++ b/libkram/eastl/include/EASTL/string_map.h
@@ -2,12 +2,7 @@
 // Copyright (c) Electronic Arts Inc. All rights reserved.
 ///////////////////////////////////////////////////////////////////////////////
 
-#ifndef EASTL_STRING_MAP_H
-#define EASTL_STRING_MAP_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once 
-#endif
+#pragma once
 
 #include <EASTL/map.h>
 #include <EASTL/string.h>
@@ -163,5 +158,3 @@ string_map<T, Predicate, Allocator>::strduplicate(const char* str)
 
 
 }
-
-#endif
diff --git a/libkram/eastl/include/EASTL/string_view.h b/libkram/eastl/include/EASTL/string_view.h
index 54452a38..2e864ef0 100644
--- a/libkram/eastl/include/EASTL/string_view.h
+++ b/libkram/eastl/include/EASTL/string_view.h
@@ -9,12 +9,7 @@
 // http://en.cppreference.com/w/cpp/header/string_view
 ///////////////////////////////////////////////////////////////////////////////
 
-#ifndef EASTL_STRING_VIEW_H
-#define EASTL_STRING_VIEW_H
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/internal/char_traits.h>
@@ -628,4 +623,3 @@ namespace eastl
 } // namespace eastl
 
 EA_RESTORE_VC_WARNING()
-#endif // EASTL_STRING_VIEW_H
diff --git a/libkram/eastl/include/EASTL/tuple.h b/libkram/eastl/include/EASTL/tuple.h
index 9d27bffc..53b95cd7 100644
--- a/libkram/eastl/include/EASTL/tuple.h
+++ b/libkram/eastl/include/EASTL/tuple.h
@@ -2,8 +2,7 @@
 // Copyright (c) Electronic Arts Inc. All rights reserved.
 ///////////////////////////////////////////////////////////////////////////////
 
-#ifndef EASTL_TUPLE_H
-#define EASTL_TUPLE_H
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/functional.h>
@@ -1003,4 +1002,3 @@ EA_CONSTEXPR decltype(auto) apply(F&& f, Tuple&& t)
 EA_RESTORE_VC_WARNING()
 EA_RESTORE_VC_WARNING()
 EA_RESTORE_VC_WARNING()
-#endif  // EASTL_TUPLE_H
diff --git a/libkram/eastl/include/EASTL/type_traits.h b/libkram/eastl/include/EASTL/type_traits.h
index 68a388d5..0439baca 100644
--- a/libkram/eastl/include/EASTL/type_traits.h
+++ b/libkram/eastl/include/EASTL/type_traits.h
@@ -234,20 +234,11 @@
 
 
-#ifndef EASTL_TYPE_TRAITS_H
-#define EASTL_TYPE_TRAITS_H
-
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <stddef.h>                 // Is needed for size_t usage by some traits.
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
-
 namespace eastl
 {
 
@@ -1056,5 +1047,3 @@ namespace eastl
 #include <EASTL/internal/type_compound.h>
 #include <EASTL/internal/type_pod.h>
 
-
-#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/unique_ptr.h b/libkram/eastl/include/EASTL/unique_ptr.h
index 562e2c4d..6243642d 100644
--- a/libkram/eastl/include/EASTL/unique_ptr.h
+++ b/libkram/eastl/include/EASTL/unique_ptr.h
@@ -3,9 +3,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_UNIQUE_PTR_H
-#define EASTL_UNIQUE_PTR_H
-
+#pragma once
 
 #include <EABase/nullptr.h>
 #include <EASTL/internal/config.h>
@@ -17,11 +15,6 @@
 #include <EASTL/bonus/compressed_pair.h>
 #include <stddef.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
 namespace eastl
 {
 	/// class unique_ptr
@@ -718,8 +711,6 @@ namespace eastl
 } // namespace eastl
 
 
-#endif // Header include guard
-
 
 
diff --git a/libkram/eastl/include/EASTL/unordered_map.h b/libkram/eastl/include/EASTL/unordered_map.h
index 10c6b882..e7b57d01 100644
--- a/libkram/eastl/include/EASTL/unordered_map.h
+++ b/libkram/eastl/include/EASTL/unordered_map.h
@@ -2,16 +2,11 @@
 // Copyright (c) Electronic Arts Inc. All rights reserved.
 ///////////////////////////////////////////////////////////////////////////////
 
-#ifndef EASTL_UNORDERED_MAP_H
-#define EASTL_UNORDERED_MAP_H
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/hash_map.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
 namespace eastl
 {
 	/// unordered_map 
@@ -51,5 +46,3 @@ namespace eastl
 
 } // namespace eastl
 
-#endif // Header include guard
-
diff --git a/libkram/eastl/include/EASTL/unordered_set.h b/libkram/eastl/include/EASTL/unordered_set.h
index ecd7219f..197dcba4 100644
--- a/libkram/eastl/include/EASTL/unordered_set.h
+++ b/libkram/eastl/include/EASTL/unordered_set.h
@@ -2,16 +2,11 @@
 // Copyright (c) Electronic Arts Inc. All rights reserved.
 ///////////////////////////////////////////////////////////////////////////////
 
-#ifndef EASTL_UNORDERED_SET_H
-#define EASTL_UNORDERED_SET_H
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/hash_set.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
 namespace eastl
 {
 
@@ -49,5 +44,3 @@ namespace eastl
 
 } // namespace eastl
 
-#endif // Header include guard
-
diff --git a/libkram/eastl/include/EASTL/utility.h b/libkram/eastl/include/EASTL/utility.h
index cc546fb0..ae239866 100644
--- a/libkram/eastl/include/EASTL/utility.h
+++ b/libkram/eastl/include/EASTL/utility.h
@@ -2,10 +2,7 @@
 // Copyright (c) Electronic Arts Inc. All rights reserved.
 ///////////////////////////////////////////////////////////////////////////////
 
-
-#ifndef EASTL_UTILITY_H
-#define EASTL_UTILITY_H
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/type_traits.h>
@@ -26,11 +23,6 @@
 EA_DISABLE_VC_WARNING(4619 4217 4512);
 
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
 
 namespace eastl
 {
@@ -867,6 +859,3 @@ namespace eastl
 
 
 EA_RESTORE_VC_WARNING();
-
-
-#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/variant.h b/libkram/eastl/include/EASTL/variant.h
index e59c3007..116fccc2 100644
--- a/libkram/eastl/include/EASTL/variant.h
+++ b/libkram/eastl/include/EASTL/variant.h
@@ -53,8 +53,7 @@
 // 	* https://thenewcpp.wordpress.com/2012/02/15/variadic-templates-part-3-or-how-i-wrote-a-variant-class/
 ///////////////////////////////////////////////////////////////////////////
 
-#ifndef EASTL_VARIANT_H
-#define EASTL_VARIANT_H
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/internal/in_place_t.h>
@@ -66,10 +65,6 @@
 #include <EASTL/tuple.h>
 #include <EASTL/type_traits.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
 #ifndef EA_COMPILER_CPP14_ENABLED
 	static_assert(false, "eastl::variant requires a C++14 compatible compiler (at least) ");
 #endif
@@ -1232,5 +1227,3 @@ namespace eastl
 
 EA_RESTORE_VC_WARNING()
 
-#endif // EASTL_VARIANT_H
-
diff --git a/libkram/eastl/include/EASTL/vector.h b/libkram/eastl/include/EASTL/vector.h
index 1736a785..340a9599 100644
--- a/libkram/eastl/include/EASTL/vector.h
+++ b/libkram/eastl/include/EASTL/vector.h
@@ -29,9 +29,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_VECTOR_H
-#define EASTL_VECTOR_H
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/allocator.h>
@@ -60,11 +58,6 @@ EA_DISABLE_VC_WARNING(4530 4480 4571);
 // 4127 - Conditional expression is constant
 EA_DISABLE_VC_WARNING(4345 4244 4127);
 
-
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
 #if EASTL_NOMINMAX
 	#ifdef min
 		#undef min
@@ -2050,6 +2043,3 @@ namespace eastl
 
 EA_RESTORE_VC_WARNING();
 EA_RESTORE_VC_WARNING();
-
-
-#endif // Header include guard
diff --git a/libkram/eastl/include/EASTL/vector_map.h b/libkram/eastl/include/EASTL/vector_map.h
index 14dec48d..2eea7e7b 100644
--- a/libkram/eastl/include/EASTL/vector_map.h
+++ b/libkram/eastl/include/EASTL/vector_map.h
@@ -23,9 +23,7 @@
 
 
-#ifndef EASTL_VECTOR_MAP_H
-#define EASTL_VECTOR_MAP_H
-
+#pragma once
 
 
 #include <EASTL/internal/config.h>
@@ -37,10 +35,6 @@
 #include <EASTL/initializer_list.h>
 #include <stddef.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
 
 
 namespace eastl
@@ -886,9 +880,6 @@ namespace eastl
 } // namespace eastl
 
 
-#endif // Header include guard
-
-
 
 
diff --git a/libkram/eastl/include/EASTL/vector_multimap.h b/libkram/eastl/include/EASTL/vector_multimap.h
index 235f6718..ad1c6529 100644
--- a/libkram/eastl/include/EASTL/vector_multimap.h
+++ b/libkram/eastl/include/EASTL/vector_multimap.h
@@ -23,9 +23,7 @@
 
 
-#ifndef EASTL_VECTOR_MULTIMAP_H
-#define EASTL_VECTOR_MULTIMAP_H
-
+#pragma once
 
 
 #include <EASTL/internal/config.h>
@@ -37,11 +35,6 @@
 #include <EASTL/initializer_list.h>
 #include <stddef.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
 
 namespace eastl
 {
@@ -818,9 +811,6 @@ namespace eastl
 } // namespace eastl
 
 
-#endif // Header include guard
-
-
 
 
diff --git a/libkram/eastl/include/EASTL/vector_multiset.h b/libkram/eastl/include/EASTL/vector_multiset.h
index 7fd10a57..399b2fb0 100644
--- a/libkram/eastl/include/EASTL/vector_multiset.h
+++ b/libkram/eastl/include/EASTL/vector_multiset.h
@@ -22,9 +22,7 @@
 //////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_VECTOR_MULTISET_H
-#define EASTL_VECTOR_MULTISET_H
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/allocator.h>
@@ -35,12 +33,6 @@
 #include <EASTL/initializer_list.h>
 #include <stddef.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
-
 namespace eastl
 {
 
@@ -748,9 +740,6 @@ namespace eastl
 } // namespace eastl
 
 
-#endif // Header include guard
-
-
 
 
diff --git a/libkram/eastl/include/EASTL/vector_set.h b/libkram/eastl/include/EASTL/vector_set.h
index c03ec556..fea33f86 100644
--- a/libkram/eastl/include/EASTL/vector_set.h
+++ b/libkram/eastl/include/EASTL/vector_set.h
@@ -22,10 +22,7 @@
 //////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_VECTOR_SET_H
-#define EASTL_VECTOR_SET_H
-
-
+#pragma once
 
 #include <EASTL/internal/config.h>
 #include <EASTL/allocator.h>
@@ -36,11 +33,6 @@
 #include <EASTL/initializer_list.h>
 #include <stddef.h>
 
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
-#endif
-
-
 
 namespace eastl
 {
@@ -786,8 +778,5 @@ namespace eastl
 } // namespace eastl
 
 
-#endif // Header include guard
-
-
 
 
diff --git a/libkram/eastl/include/EASTL/version.h b/libkram/eastl/include/EASTL/version.h
index 0dee15f3..adc3f2c6 100644
--- a/libkram/eastl/include/EASTL/version.h
+++ b/libkram/eastl/include/EASTL/version.h
@@ -2,14 +2,8 @@
 // Copyright (c) Electronic Arts Inc. All rights reserved.
 /////////////////////////////////////////////////////////////////////////////
 
-#ifndef EASTL_VERSION_H
-#define EASTL_VERSION_H
+#pragma once
 
 #include <EABase/eabase.h>
-#if defined(EA_PRAGMA_ONCE_SUPPORTED)
-	#pragma once
-#endif
-
 #include <EASTL/internal/config.h>
 
-#endif
diff --git a/libkram/eastl/include/EASTL/weak_ptr.h b/libkram/eastl/include/EASTL/weak_ptr.h
index 42726961..d4bcc746 100644
--- a/libkram/eastl/include/EASTL/weak_ptr.h
+++ b/libkram/eastl/include/EASTL/weak_ptr.h
@@ -3,15 +3,10 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 
-#ifndef EASTL_WEAK_PTR_H
-#define EASTL_WEAK_PTR_H
-
+#pragma once
 
 // This header file is deprecated. The implementation has moved:
 #include <EASTL/shared_ptr.h>
 
 
-#endif
-
-
 
diff --git a/libkram/kram/TaskSystem.h b/libkram/kram/TaskSystem.h
index 4fa10ca6..f337faad 100644
--- a/libkram/kram/TaskSystem.h
+++ b/libkram/kram/TaskSystem.h
@@ -80,12 +80,12 @@ class notification_queue {
     void push(F&& f)
     {
 // TODO: fix this construct, it's saying no matching sctor for eastl::deque<eastl::function<void ()>>>
-#if !USE_EASTL
+//#if !USE_EASTL
         {
             lock_t lock{_mutex};
             _q.emplace_back(forward<F>(f));
         }
-#endif
+//#endif
         // allow a waiting pop() to awaken
         _ready.notify_one();
     }

From f812090859737994d10d8e63571512d40f633be8 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 8 Aug 2021 14:13:51 -0700
Subject: [PATCH 147/901] EASTL - add one more pragma once

---
 libkram/eastl/include/EASTL/internal/config.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/libkram/eastl/include/EASTL/internal/config.h b/libkram/eastl/include/EASTL/internal/config.h
index 4b3b52f7..a033e660 100644
--- a/libkram/eastl/include/EASTL/internal/config.h
+++ b/libkram/eastl/include/EASTL/internal/config.h
@@ -56,8 +56,6 @@
 //
 ///////////////////////////////////////////////////////////////////////////////
 
-#pragma once
-
 #ifndef EASTL_EABASE_DISABLED
 	#include <EABase/eabase.h>
 #endif

From 8720421369c97863ac8c22173005cd2030f5e03a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 8 Aug 2021 14:15:22 -0700
Subject: [PATCH 148/901] kram - more library fixes

Add fopen_mkdir call.  This might break Win.
Added exists().
---
 libkram/kram/KramFileHelper.cpp | 52 +++++++++++++++++++++++++++++++--
 libkram/kram/KramFileHelper.h   |  2 ++
 libkram/kram/KramMmapHelper.cpp |  8 +++++
 libkram/kram/KramMmapHelper.h   |  1 +
 libkram/kram/TaskSystem.h       |  8 +++--
 5 files changed, 66 insertions(+), 5 deletions(-)

diff --git a/libkram/kram/KramFileHelper.cpp b/libkram/kram/KramFileHelper.cpp
index 13528b54..827d59ae 100644
--- a/libkram/kram/KramFileHelper.cpp
+++ b/libkram/kram/KramFileHelper.cpp
@@ -7,6 +7,7 @@
 // here's how to mmmap data, but NSData may have another way
 #include <stdio.h>
 #include <sys/stat.h>
+#include <sys/errno.h>
 
 // Use this for consistent tmp file handling
 //#include <algorithm> // for min
@@ -24,6 +25,36 @@
 namespace kram {
 using namespace NAMESPACE_STL;
 
+#define nl "\n"
+
+// https://stackoverflow.com/questions/7430248/creating-a-new-directory-in-c
+static void mkdirRecursive(char *path) {
+    char *sep = strrchr(path, '/');
+    if (sep != NULL) {
+        *sep = 0;
+        mkdirRecursive(path);
+        *sep = '/';
+    }
+    
+    if (*path != '\0' && mkdir(path, 0755) && errno != EEXIST) {
+        KLOGE("kram", "error while trying to create '%s'" nl
+               "%s" nl,
+               path, strerror(errno)); // same as %m
+    }
+}
+
+static FILE *fopen_mkdir(const char *path, const char *mode) {
+    const char *sep = strrchr(path, '/');
+    if(sep) {
+        char *path0 = strdup(path);
+        path0[ sep - path ] = 0;
+        mkdirRecursive(path0);
+        free(path0);
+    }
+    
+    return fopen(path,mode);
+}
+
 FileHelper::~FileHelper() { close(); }
 
 // no current extension
@@ -43,7 +74,7 @@ bool FileHelper::openTemporaryFile(const char* suffix, const char* access)
     // can't rename with this set to 0, but it will autodelete tmp file
     int keep = 0;
 
-    // Note: can't pass access either, always opened as rw
+    // Note: can't pass . either, always opened as rw
     _fp = tmpfileplus("/tmp/", "kramimage-", suffix, &pathname, keep);
     if (!_fp) {
         return false;
@@ -158,8 +189,16 @@ bool FileHelper::open(const char* filename, const char* access)
 {
     close();
 
-    _fp = fopen(filename, access);
+    if (access[0] == 'w') {
+        KLOGI("kram", "opening file for write");
+        _fp = fopen_mkdir(filename, access);
+    }
+    else {
+        _fp = fopen(filename, access);
+    }
+    
     if (!_fp) {
+        KLOGE("kram", "couldn't open file %s", filename);
         return false;
     }
 
@@ -199,6 +238,15 @@ size_t FileHelper::size() const
     return (int64_t)stats.st_size;
 }
 
+bool FileHelper::exists(const char* filename) const
+{
+    struct stat stats;
+    if (stat(filename, &stats) < 0) {
+        return false;
+    }
+    return true;
+}
+
 uint64_t FileHelper::modificationTimestamp(const char* filename) {
   
     // Win has to rename all this, so make it happy using wrappers from miniz
diff --git a/libkram/kram/KramFileHelper.h b/libkram/kram/KramFileHelper.h
index b8f9d74a..4fa01301 100644
--- a/libkram/kram/KramFileHelper.h
+++ b/libkram/kram/KramFileHelper.h
@@ -21,6 +21,8 @@ class FileHelper {
 public:
     ~FileHelper();
 
+    bool exists(const char* filenae) const;
+
     bool open(const char* filename, const char* access);
 
     // this file is auto-deleted by close(), is that okay with renameFile use?
diff --git a/libkram/kram/KramMmapHelper.cpp b/libkram/kram/KramMmapHelper.cpp
index 72ddc4fc..11a5874f 100644
--- a/libkram/kram/KramMmapHelper.cpp
+++ b/libkram/kram/KramMmapHelper.cpp
@@ -18,6 +18,14 @@
 #endif
 
 MmapHelper::MmapHelper() {}
+MmapHelper::MmapHelper(MmapHelper&& rhs) {
+    addr = rhs.addr;
+    length = rhs.length;
+    
+    // prevent close after move
+    rhs.addr = nullptr;
+    rhs.length = 0;
+}
 
 MmapHelper::~MmapHelper() { close(); }
 
diff --git a/libkram/kram/KramMmapHelper.h b/libkram/kram/KramMmapHelper.h
index f3e1e958..3510f9b6 100644
--- a/libkram/kram/KramMmapHelper.h
+++ b/libkram/kram/KramMmapHelper.h
@@ -13,6 +13,7 @@
 class MmapHelper {
 public:
     MmapHelper();
+    MmapHelper(MmapHelper&& rhs);
     ~MmapHelper();
 
     bool open(const char *filename);
diff --git a/libkram/kram/TaskSystem.h b/libkram/kram/TaskSystem.h
index f337faad..e35a23d9 100644
--- a/libkram/kram/TaskSystem.h
+++ b/libkram/kram/TaskSystem.h
@@ -79,13 +79,15 @@ class notification_queue {
     template <typename F>
     void push(F&& f)
     {
-// TODO: fix this construct, it's saying no matching sctor for eastl::deque<eastl::function<void ()>>>
-//#if !USE_EASTL
         {
             lock_t lock{_mutex};
+            // TODO: fix this construct, it's saying no matching sctor for eastl::deque<eastl::function<void ()>>>::value_type
+#if USE_EASTL
             _q.emplace_back(forward<F>(f));
+#else
+            _q.emplace_back(forward<F>(f));
+#endif
         }
-//#endif
         // allow a waiting pop() to awaken
         _ready.notify_one();
     }

From 1939a1b9169e19342ca307b06eb034d0a3fe6a6a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 8 Aug 2021 15:07:18 -0700
Subject: [PATCH 149/901] kram - fixup FileHelper

---
 libkram/kram/KramFileHelper.cpp | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/libkram/kram/KramFileHelper.cpp b/libkram/kram/KramFileHelper.cpp
index 827d59ae..4e7f4693 100644
--- a/libkram/kram/KramFileHelper.cpp
+++ b/libkram/kram/KramFileHelper.cpp
@@ -7,7 +7,10 @@
 // here's how to mmmap data, but NSData may have another way
 #include <stdio.h>
 #include <sys/stat.h>
+
+#if KRAM_MAC || KRAM_LINUX
 #include <sys/errno.h>
+#endf
 
 // Use this for consistent tmp file handling
 //#include <algorithm> // for min
@@ -36,7 +39,13 @@ static void mkdirRecursive(char *path) {
         *sep = '/';
     }
     
-    if (*path != '\0' && mkdir(path, 0755) && errno != EEXIST) {
+    if (*path != '\0' && mkdir(path, 0755)
+// TODO: win needs to be able to find errno.h
+#if  KRAM_MAC || KRAM_IOS || KRAM_LINUX
+        && errno != EEXIST
+#endif
+    )
+    {
         KLOGE("kram", "error while trying to create '%s'" nl
                "%s" nl,
                path, strerror(errno)); // same as %m
@@ -189,8 +198,7 @@ bool FileHelper::open(const char* filename, const char* access)
 {
     close();
 
-    if (access[0] == 'w') {
-        KLOGI("kram", "opening file for write");
+    if (strstr(access, "w") != nullptr) {
         _fp = fopen_mkdir(filename, access);
     }
     else {
@@ -198,7 +206,6 @@ bool FileHelper::open(const char* filename, const char* access)
     }
     
     if (!_fp) {
-        KLOGE("kram", "couldn't open file %s", filename);
         return false;
     }
 

From bb5768721e6d32519379a56e3f254df335ea59a8 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 8 Aug 2021 15:13:10 -0700
Subject: [PATCH 150/901] kram - fix FileHelper one last time

---
 libkram/kram/KramFileHelper.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libkram/kram/KramFileHelper.cpp b/libkram/kram/KramFileHelper.cpp
index 4e7f4693..376a327d 100644
--- a/libkram/kram/KramFileHelper.cpp
+++ b/libkram/kram/KramFileHelper.cpp
@@ -10,7 +10,7 @@
 
 #if KRAM_MAC || KRAM_LINUX
 #include <sys/errno.h>
-#endf
+#endif
 
 // Use this for consistent tmp file handling
 //#include <algorithm> // for min

From 4e99edc64d9eaee0b3e824ef787f94f49c25a204 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 8 Aug 2021 16:06:16 -0700
Subject: [PATCH 151/901] kram - more cleanup, remove modules

---
 kramv/KramLoader.h                                |  7 -------
 kramv/KramLoader.mm                               |  6 ------
 kramv/KramRenderer.h                              | 15 ++-------------
 kramv/KramRenderer.mm                             |  6 ------
 kramv/KramShaders.h                               |  6 ------
 kramv/KramViewerBase.h                            |  4 +++-
 kramv/KramViewerMain.mm                           | 10 ++--------
 .../astcenc_vecmathlib_neon_armv7_4.h             |  2 +-
 libkram/astc-encoder/astcenc_vecmathlib_none_4.h  |  2 +-
 libkram/bc7enc/bc7decomp.h                        |  2 +-
 libkram/bc7enc/rgbcx.h                            |  2 +-
 libkram/etc2comp/EtcImage.cpp                     |  6 +++---
 libkram/kram/KTXImage.cpp                         |  6 +++---
 libkram/kram/KramFileHelper.cpp                   | 11 ++---------
 libkram/kram/KramImage.cpp                        |  5 ++---
 libkram/kram/KramMipper.cpp                       |  2 +-
 libkram/kram/KramSDFMipper.cpp                    |  2 +-
 libkram/squish/alpha.cpp                          |  2 +-
 libkram/squish/maths.cpp                          |  2 +-
 libkram/squish/maths.h                            |  2 +-
 20 files changed, 26 insertions(+), 74 deletions(-)

diff --git a/kramv/KramLoader.h b/kramv/KramLoader.h
index 93b3e05f..1493e3e0 100644
--- a/kramv/KramLoader.h
+++ b/kramv/KramLoader.h
@@ -4,11 +4,6 @@
 
 #include "KramConfig.h"
 
-#if __has_feature(modules)
-@import Foundation;
-@import Metal;
-#else
-
 #import <Foundation/Foundation.h>
 
 // protocol requires imports
@@ -18,8 +13,6 @@
 #import <Metal/MTLPixelFormat.h>
 #import <Metal/MTLTexture.h>
 
-#endif
-
 namespace kram {
 class KTXImage;
 class KTXImageData;
diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index ad6a9c2b..88f2288f 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -6,12 +6,6 @@
 
 #import <TargetConditionals.h>
 
-#if __has_feature(modules)
-@import simd;
-#else
-#import <simd/simd.h>
-#endif
-
 //#include <vector>
 //#include <algorithm> // for max
 #include <mm_malloc.h>
diff --git a/kramv/KramRenderer.h b/kramv/KramRenderer.h
index e7f83830..028708d4 100644
--- a/kramv/KramRenderer.h
+++ b/kramv/KramRenderer.h
@@ -2,22 +2,11 @@
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
-#if __has_feature(modules)
-@import Foundaton;
-@import simd;
-@import MetalKit;
-#else
+
 #import <MetalKit/MetalKit.h>
 #import <Foundation/NSURL.h>
-#import <simd/simd.h>
-#endif
-
-#if USE_EASTL
-#include "EASTL/string.h"
-#else
-#include <string>
-#endif
 
+#include "KramConfig.h"
 #import "KramShaders.h" // for TextureChannels
 
 namespace kram {
diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index e74b7812..27d40000 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -6,13 +6,7 @@
 
 #import <TargetConditionals.h>
 
-#if __has_feature(modules)
-@import simd;
-@import ModelIO;
-#else
-#import <simd/simd.h>
 #import <ModelIO/ModelIO.h>
-#endif
 
 // Include header shared between C code here, which executes Metal API commands, and .metal files
 #import "KramShaders.h"
diff --git a/kramv/KramShaders.h b/kramv/KramShaders.h
index b686db05..932c732b 100644
--- a/kramv/KramShaders.h
+++ b/kramv/KramShaders.h
@@ -5,18 +5,12 @@
 #ifndef ShaderTypes_h
 #define ShaderTypes_h
 
-#if __has_feature(modules)
-@import Foundation;
-@import simd;
-@import Metal
-#else
 #ifndef __METAL_VERSION__
 #import <Foundation/Foundation.h>
 #else
 #include <metal_stdlib>
 #endif
 #import <simd/simd.h>
-#endif
 
 #ifdef __METAL_VERSION__
     #define NS_ENUM(_type, _name) enum _name : _type _name; enum _name : _type
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index 1258dea4..9f22e8a6 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -6,7 +6,9 @@
 
 #include <cstdint>
 //#include <string>
-#include <simd/simd.h>
+//#include <simd/simd.h>
+
+#include "KramConfig.h"
 
 // All portable C++ code.
 
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 1b053d7b..f3349093 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -4,20 +4,14 @@
 
 #import <TargetConditionals.h>
 
-#if __has_feature(modules)
-@import Cocoa;
-@import Metal;
-@import MetalKit;
-@import simd;
-#else
 #import <Cocoa/Cocoa.h>
 #import <Metal/Metal.h>
 #import <MetalKit/MetalKit.h>
-#include <simd/simd.h>
-#endif
 
 #import "KramRenderer.h"
 #import "KramShaders.h"
+
+// C++
 #include "KramLog.h"
 #include "KramMipper.h"
 
diff --git a/libkram/astc-encoder/astcenc_vecmathlib_neon_armv7_4.h b/libkram/astc-encoder/astcenc_vecmathlib_neon_armv7_4.h
index 7d33dc15..894ad6da 100644
--- a/libkram/astc-encoder/astcenc_vecmathlib_neon_armv7_4.h
+++ b/libkram/astc-encoder/astcenc_vecmathlib_neon_armv7_4.h
@@ -29,7 +29,7 @@
 	#error "Include astcenc_vecmathlib.h, do not include directly"
 #endif
 
-#include <algorithm>
+//#include <algorithm>
 #include <cfenv>
 
 
diff --git a/libkram/astc-encoder/astcenc_vecmathlib_none_4.h b/libkram/astc-encoder/astcenc_vecmathlib_none_4.h
index 716d6982..70c1757b 100644
--- a/libkram/astc-encoder/astcenc_vecmathlib_none_4.h
+++ b/libkram/astc-encoder/astcenc_vecmathlib_none_4.h
@@ -40,7 +40,7 @@
 	#error "Include astcenc_vecmathlib.h, do not include directly"
 #endif
 
-#include <algorithm>
+//#include <algorithm>
 #include <cstdio>
 #include <cstring>
 #include <cfenv>
diff --git a/libkram/bc7enc/bc7decomp.h b/libkram/bc7enc/bc7decomp.h
index 3c1e8352..cccdf50e 100644
--- a/libkram/bc7enc/bc7decomp.h
+++ b/libkram/bc7enc/bc7decomp.h
@@ -2,7 +2,7 @@
 
 #include <stdlib.h>
 #include <stdint.h>
-#include <algorithm>
+//#include <algorithm>
 #include <math.h>
 #include <assert.h>
 
diff --git a/libkram/bc7enc/rgbcx.h b/libkram/bc7enc/rgbcx.h
index 54d18c09..748d39e2 100644
--- a/libkram/bc7enc/rgbcx.h
+++ b/libkram/bc7enc/rgbcx.h
@@ -58,7 +58,7 @@
 
 #include <stdlib.h>
 #include <stdint.h>
-#include <algorithm>
+//#include <algorithm>
 #include <assert.h>
 #include <limits.h>
 
diff --git a/libkram/etc2comp/EtcImage.cpp b/libkram/etc2comp/EtcImage.cpp
index 88ca43b6..77f5a071 100644
--- a/libkram/etc2comp/EtcImage.cpp
+++ b/libkram/etc2comp/EtcImage.cpp
@@ -29,7 +29,6 @@ Image is an array of 4x4 blocks that represent the encoding of the source image
 //#include <windows.h>
 //#endif
 
-#include <stdlib.h>
 
 #include "EtcImage.h"
 
@@ -39,14 +38,15 @@ Image is an array of 4x4 blocks that represent the encoding of the source image
 #include "EtcBlock4x4Encoding_R11.h"
 #include "EtcBlock4x4Encoding_RG11.h"
 
-#include <algorithm>
+#include <stdlib.h>
+//#include <algorithm>
 #include <ctime>
 #include <chrono>
 //#include <future>
 #include <stdio.h>
 #include <string.h>
 #include <assert.h>
-#include <vector>
+//#include <vector>
 
 #define ETCCOMP_MIN_EFFORT_LEVEL (0.0f)
 #define ETCCOMP_DEFAULT_EFFORT_LEVEL (40.0f)
diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index 3fe74656..3cad6ae7 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -6,9 +6,9 @@
 
 #include <stdio.h>
 
-#include <algorithm>
-#include <map>
-#include <unordered_map>
+//#include <algorithm>
+//#include <map>
+//#include <unordered_map>
 
 // for zlib decompress
 #include "miniz.h"
diff --git a/libkram/kram/KramFileHelper.cpp b/libkram/kram/KramFileHelper.cpp
index 376a327d..fa364789 100644
--- a/libkram/kram/KramFileHelper.cpp
+++ b/libkram/kram/KramFileHelper.cpp
@@ -8,9 +8,7 @@
 #include <stdio.h>
 #include <sys/stat.h>
 
-#if KRAM_MAC || KRAM_LINUX
-#include <sys/errno.h>
-#endif
+#include <errno.h>
 
 // Use this for consistent tmp file handling
 //#include <algorithm> // for min
@@ -39,12 +37,7 @@ static void mkdirRecursive(char *path) {
         *sep = '/';
     }
     
-    if (*path != '\0' && mkdir(path, 0755)
-// TODO: win needs to be able to find errno.h
-#if  KRAM_MAC || KRAM_IOS || KRAM_LINUX
-        && errno != EEXIST
-#endif
-    )
+    if (*path != '\0' && mkdir(path, 0755) && errno != EEXIST)
     {
         KLOGE("kram", "error while trying to create '%s'" nl
                "%s" nl,
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index a9cd17e2..45394037 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -41,12 +41,11 @@
 #include "KTXImage.h"
 #include "KramMipper.h"
 #include "KramFileHelper.h"
+#include "KramZipHelper.h"
 #include "KramSDFMipper.h"
 #include "KramTimer.h"
 
-#if KRAM_MAC || KRAM_LINUX
-#include <sys/errno.h>
-#endif
+#include <errno.h>
 
 // for zlib compress
 #include "miniz.h"
diff --git a/libkram/kram/KramMipper.cpp b/libkram/kram/KramMipper.cpp
index 6973883d..b4b897b5 100644
--- a/libkram/kram/KramMipper.cpp
+++ b/libkram/kram/KramMipper.cpp
@@ -4,7 +4,7 @@
 
 #include "KramMipper.h"
 
-#include <algorithm>
+//#include <algorithm>
 #include <cassert>
 
 #include "KTXImage.h" // for mipDown
diff --git a/libkram/kram/KramSDFMipper.cpp b/libkram/kram/KramSDFMipper.cpp
index 5d1c8e5f..d9e5e2df 100644
--- a/libkram/kram/KramSDFMipper.cpp
+++ b/libkram/kram/KramSDFMipper.cpp
@@ -4,7 +4,7 @@
 
 #include "KramSDFMipper.h"
 
-#include <algorithm>
+//#include <algorithm>
 
 #include "KramMipper.h"
 #include "KTXImage.h" // for mipDown
diff --git a/libkram/squish/alpha.cpp b/libkram/squish/alpha.cpp
index 548769b8..bf7790b9 100644
--- a/libkram/squish/alpha.cpp
+++ b/libkram/squish/alpha.cpp
@@ -26,7 +26,7 @@
 #include "alpha.h"
 
 #include <climits>
-#include <algorithm>
+//#include <algorithm>
 
 namespace squish {
 
diff --git a/libkram/squish/maths.cpp b/libkram/squish/maths.cpp
index 8b4d60b2..79c08c5c 100644
--- a/libkram/squish/maths.cpp
+++ b/libkram/squish/maths.cpp
@@ -31,7 +31,7 @@
 
 #include "maths.h"
 #include <cfloat>
-#include <algorithm>
+//#include <algorithm>
 
 namespace squish {
 using namespace NAMESPACE_STL;
diff --git a/libkram/squish/maths.h b/libkram/squish/maths.h
index 726129c7..43f37c4a 100644
--- a/libkram/squish/maths.h
+++ b/libkram/squish/maths.h
@@ -27,7 +27,7 @@
 #define SQUISH_MATHS_H
 
 #include <cmath>
-#include <algorithm>
+//#include <algorithm>
 
 namespace squish {
 

From 8ec6872767f35b72f98c4fedaf735d90aea227f1 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 8 Aug 2021 19:10:33 -0700
Subject: [PATCH 152/901] kram - make includes explicit.  Want to move libkram
 to a framework

---
 libkram/CMakeLists.txt          | 65 +++++++++++++++++++++++----------
 libkram/heman/hedistance.cpp    |  2 +-
 libkram/kram/Kram.cpp           |  2 +-
 libkram/kram/KramConfig.h       |  6 ++-
 libkram/kram/KramFileHelper.cpp |  2 +-
 libkram/kram/KramImage.cpp      | 12 +++---
 libkram/kram/KramImageInfo.cpp  |  2 +-
 libkram/kram/KramSDFMipper.h    |  2 +-
 8 files changed, 62 insertions(+), 31 deletions(-)

diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index 50aedff5..deee594f 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -8,6 +8,7 @@ option(ETCENC "Compile ETC2Comp Encoder" ON)
 option(SQUISH "Compile Squish Encoder" ON)
 option(ASTCENC "Compile ASTCenc Encoder" ON)
 option(BCENC "Compile BCenc Encoder" ON)
+option(EASTL "Compile EASTL" OFF)
 
 # convert ON to 1, UGH
 set(COMPILE_ATE 0)
@@ -36,6 +37,13 @@ if (ASTCENC)
     set(COMPILE_ASTCENC 1)
 endif()
 
+# this isn't an encoder, but replaces stl with eastl
+set(COMPILE_EASTL 0)
+
+if (EASTL)
+    set(COMPILE_EASTL 1)
+endif()
+
 #-----------------------------------------------------
 # libkram
 
@@ -46,8 +54,25 @@ set(myTargetLib libkram)
 # can also use OBJECT or SHARED, object cuts compile time
 add_library(${myTargetLib} STATIC)
 
+# nay only want to do on macOS, can be SHARED or STATIC
+#set_target_properties(${myTargetLib} PROPERTIES
+#    FRAMWORK TRUE
+#    FRAMEWORK_VERSION A
+#
+#    MACOSX_FRAMEWORK_IDENTIFIER com.ba.kram
+#    #MACOSX_FRAMEWORK_INFO_PLIST Info.plist
+#
+#    # "current version" in semantic format in Mach-O binary file
+#    VERSION 16.4.0
+#    # "compatibility version" in semantic format in Mach-O binary file
+#    SOVERSION 1.0.0
+#
+#    #PUBLIC_HEADER "${SOURCE_DIR}/kram.h"
+#    #XCODE_ATTRIBUTE_CODE_SIGN_IDENTITY "iPhone Developer"
+#)
+    
 # turn off pch
-set_target_properties(${myTargetLib} PROPERTIES 
+set_target_properties(${myTargetLib} PROPERTIES
     # rename lib to kram, libkram will then be the output
     OUTPUT_NAME kram
 
@@ -62,8 +87,8 @@ file(GLOB_RECURSE libSources CONFIGURE_DEPENDS
 	"${SOURCE_DIR}/astc-encoder/*.h"
 
     # ATE is Apple specific to macOS)
-    "${SOURCE_DIR}/ate/*.mm"
-    "${SOURCE_DIR}/ate/*.h"
+    "${SOURCE_DIR}/ate/ateencoder.mm"
+    "${SOURCE_DIR}/ate/ateencoder.h"
 
     "${SOURCE_DIR}/bc7enc/*.cpp"
     "${SOURCE_DIR}/bc7enc/*.h"
@@ -71,8 +96,8 @@ file(GLOB_RECURSE libSources CONFIGURE_DEPENDS
     "${SOURCE_DIR}/etc2comp/*.cpp"
     "${SOURCE_DIR}/etc2comp/*.h"
 
-    "${SOURCE_DIR}/heman/*.cpp"
-    "${SOURCE_DIR}/heman/*.h"
+    "${SOURCE_DIR}/heman/hedistance.cpp"
+    "${SOURCE_DIR}/heman/hedistance.h"
 
     "${SOURCE_DIR}/kram/*.cpp"
     "${SOURCE_DIR}/kram/*.h"
@@ -81,14 +106,14 @@ file(GLOB_RECURSE libSources CONFIGURE_DEPENDS
     "${SOURCE_DIR}/eastl/*.cpp"
     "${SOURCE_DIR}/eastl/*.h"
     
-    "${SOURCE_DIR}/lodepng/*.cpp"
-    "${SOURCE_DIR}/lodepng/*.h"
+    "${SOURCE_DIR}/lodepng/lodepng.cpp"
+    "${SOURCE_DIR}/lodepng/lodepng.h"
 
     "${SOURCE_DIR}/squish/*.cpp"
     "${SOURCE_DIR}/squish/*.h"
 
-    "${SOURCE_DIR}/tmpfileplus/*.cpp"
-    "${SOURCE_DIR}/tmpfileplus/*.h"
+    "${SOURCE_DIR}/tmpfileplus/tmpfileplus.cpp"
+    "${SOURCE_DIR}/tmpfileplus/tmpfileplus.h"
     
     # partial zstd decode-only unity file
     # cd zstd/build/single_file_libs
@@ -136,23 +161,24 @@ source_group(TREE "${SOURCE_DIR}" PREFIX "source" FILES ${libSources})
 
 target_include_directories(${myTargetLib} PUBLIC
     "${SOURCE_DIR}/kram/"
+    
+    # why are these public, must be in public headers
+    "${SOURCE_DIR}/eastl/include/"
 )
 
 target_include_directories(${myTargetLib} PRIVATE
-    "${SOURCE_DIR}/"
-    #"${SOURCE_DIR}/kram/"
-    #"${SOURCE_DIR}/squish/"
-    #"${SOURCE_DIR}/lodepng"
     "${SOURCE_DIR}/astc-encoder/"
+    "${SOURCE_DIR}/ate/"
+    "${SOURCE_DIR}/bc7enc/"
     "${SOURCE_DIR}/etc2comp/"
-    "${SOURCE_DIR}/zstd/"
-    )
- 
-target_include_directories(${myTargetLib} PUBLIC
-    "${SOURCE_DIR}/eastl/include/"
+    "${SOURCE_DIR}/heman/"
+    "${SOURCE_DIR}/lodepng"
     "${SOURCE_DIR}/miniz/"
+    "${SOURCE_DIR}/squish/"
+    "${SOURCE_DIR}/tmpfileplus/"
+    "${SOURCE_DIR}/zstd/"
     )
-    
+     
 # only add sources to the library
 target_sources(${myTargetLib} PRIVATE ${libSources})
 
@@ -205,4 +231,5 @@ target_compile_definitions(${myTargetLib} PUBLIC
     "-DCOMPILE_ETCENC=${COMPILE_ETCENC}"
     "-DCOMPILE_SQUISH=${COMPILE_SQUISH}"
     "-DCOMPILE_ASTCENC=${COMPILE_ASTCENC}"
+    "-DCOMPILE_EASTL=${COMPILE_EASTL}"
 )
diff --git a/libkram/heman/hedistance.cpp b/libkram/heman/hedistance.cpp
index a83e238c..ab5802cf 100644
--- a/libkram/heman/hedistance.cpp
+++ b/libkram/heman/hedistance.cpp
@@ -22,7 +22,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 */
 
-#include "heman/hedistance.h"
+#include "hedistance.h"
 #include <assert.h>
 #include <stdint.h>
 #include <stdlib.h>
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 4b581d39..45b6d30c 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -21,7 +21,7 @@
 #include "KramTimer.h"
 #include "KramVersion.h"
 #include "TaskSystem.h"
-#include "lodepng/lodepng.h"
+#include "lodepng.h"
 
 // one .cpp must supply these new overrides
 #if USE_EASTL
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index 82b6e18f..aadf49c2 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -147,12 +147,16 @@
 #define COMPILE_ASTCENC 0
 #endif
 
+#ifndef COMPILE_EASTL
+#define COMPILE_EASTL 0
+#endif
+
 // rgb8/16f/32f formats only supported for import, Metal doesn't expose these formats
 #ifndef SUPPORT_RGB
 #define SUPPORT_RGB 1
 #endif
 
-#define USE_EASTL 0
+#define USE_EASTL COMPILE_EASTL
 #if USE_EASTL
 
 // this probably breaks all STL debugging
diff --git a/libkram/kram/KramFileHelper.cpp b/libkram/kram/KramFileHelper.cpp
index fa364789..fdae8842 100644
--- a/libkram/kram/KramFileHelper.cpp
+++ b/libkram/kram/KramFileHelper.cpp
@@ -14,7 +14,7 @@
 //#include <algorithm> // for min
 //#include <vector>
 
-#include "tmpfileplus/tmpfileplus.h"
+#include "tmpfileplus.h"
 
 #if KRAM_MAC || KRAM_IOS || KRAM_LINUX
 #include <unistd.h>  // for getpagesize()
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index 45394037..de7bb3cd 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -5,7 +5,7 @@
 #include "KramImage.h"
 
 #if COMPILE_ATE
-#include "ate/ateencoder.h"  // astc/bc encoder, apple only
+#include "ateencoder.h"  // astc/bc encoder, apple only
 #endif
 
 #if COMPILE_ETCENC
@@ -13,7 +13,7 @@
 #endif
 
 #if COMPILE_SQUISH
-#include "squish/squish.h"  // bc encoder
+#include "squish.h"  // bc encoder
 #endif
 
 #if COMPILE_BCENC
@@ -21,13 +21,13 @@
 #define RGBCX_IMPLEMENTATION 1
 #define RGBCX_USE_SMALLER_TABLES 1
 
-#include "bc7enc/bc7decomp.h"
-#include "bc7enc/bc7enc.h"  // bc encoder
-#include "bc7enc/rgbcx.h"
+#include "bc7decomp.h"
+#include "bc7enc.h"  // bc encoder
+#include "rgbcx.h"
 #endif
 
 #if COMPILE_ASTCENC
-#include "astc-encoder/astcenc.h"  // astc encoder
+#include "astcenc.h"  // astc encoder
 
 // hack to improve block generation on L1 and LA encoding
 //extern thread_local int32_t gAstcenc_UniqueChannelsInPartitioning;
diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp
index dbe40a2a..8b782e23 100644
--- a/libkram/kram/KramImageInfo.cpp
+++ b/libkram/kram/KramImageInfo.cpp
@@ -8,7 +8,7 @@
 
 #if COMPILE_ATE
 // encode/decode formats vary with version
-#include "ate/ateencoder.h"
+#include "ateencoder.h"
 #endif
 
 namespace kram {
diff --git a/libkram/kram/KramSDFMipper.h b/libkram/kram/KramSDFMipper.h
index f444f898..715c66ba 100644
--- a/libkram/kram/KramSDFMipper.h
+++ b/libkram/kram/KramSDFMipper.h
@@ -7,7 +7,7 @@
 //#include <vector>
 
 #include "KramConfig.h"
-#include "heman/hedistance.h"
+#include "hedistance.h"
 
 namespace kram {
 using namespace NAMESPACE_STL;

From a85d11e2e3f33e392ac7cf65a70888c4a0f692c6 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 9 Aug 2021 09:35:28 -0700
Subject: [PATCH 153/901] kram - more win fixes

---
 libkram/kram/KramFileHelper.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/libkram/kram/KramFileHelper.cpp b/libkram/kram/KramFileHelper.cpp
index fdae8842..d347e37c 100644
--- a/libkram/kram/KramFileHelper.cpp
+++ b/libkram/kram/KramFileHelper.cpp
@@ -19,8 +19,12 @@
 #if KRAM_MAC || KRAM_IOS || KRAM_LINUX
 #include <unistd.h>  // for getpagesize()
 #endif
+
 #if KRAM_WIN
 #include <windows.h>  // for GetNativeSystemInfo()
+#include <direct.h> // direct-ory for _mkdir, _rmdir
+#define mkdir _mkdir
+#define rmdir _rmdir
 #endif
 
 namespace kram {

From cc4b32a71b311a196295d0d08a6e6da1c0a02e0a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 9 Aug 2021 21:43:45 -0700
Subject: [PATCH 154/901] kram - more Win fixes

---
 libkram/kram/KramFileHelper.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/libkram/kram/KramFileHelper.cpp b/libkram/kram/KramFileHelper.cpp
index d347e37c..3fa31ff6 100644
--- a/libkram/kram/KramFileHelper.cpp
+++ b/libkram/kram/KramFileHelper.cpp
@@ -23,8 +23,10 @@
 #if KRAM_WIN
 #include <windows.h>  // for GetNativeSystemInfo()
 #include <direct.h> // direct-ory for _mkdir, _rmdir
-#define mkdir _mkdir
-#define rmdir _rmdir
+
+// Windows mkdir doesn't take permission
+#define mkdir(fname, permission) _mkdir(fname)
+#define rmdir(fname) _rmdir(fname)
 #endif
 
 namespace kram {

From 664e333502ea189abd10b2332f71b64579e6f6ea Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 20 Aug 2021 20:51:59 -0700
Subject: [PATCH 155/901] Kram - add KramLib.h superheader, rename some files

---
 kram/KramMain.cpp                             |  2 +-
 kramv/KramLoader.h                            |  2 +-
 kramv/KramLoader.mm                           | 12 +++----
 kramv/KramRenderer.h                          |  2 +-
 kramv/KramRenderer.mm                         |  2 --
 kramv/KramViewerBase.h                        |  4 +--
 kramv/KramViewerMain.mm                       | 17 +++++-----
 .../EASTL/internal/atomic/atomic_macros.h     |  2 +-
 .../{atomic_macros.h => atomic_macros_all.h}  |  0
 libkram/eastl/include/EASTL/version.h         |  9 ------
 libkram/kram/KramImage.cpp                    |  1 -
 libkram/kram/KramLib.h                        | 31 +++++++++++++++++++
 libkram/kram/KramSDFMipper.cpp                |  3 +-
 libkram/kram/KramSDFMipper.h                  | 10 ++++--
 plugin/kps/KPS.cpp                            |  5 +--
 15 files changed, 62 insertions(+), 40 deletions(-)
 rename libkram/eastl/include/EASTL/internal/atomic/atomic_macros/{atomic_macros.h => atomic_macros_all.h} (100%)
 delete mode 100644 libkram/eastl/include/EASTL/version.h
 create mode 100644 libkram/kram/KramLib.h

diff --git a/kram/KramMain.cpp b/kram/KramMain.cpp
index 9ddbdcde..e449bf16 100644
--- a/kram/KramMain.cpp
+++ b/kram/KramMain.cpp
@@ -1,4 +1,4 @@
-#include "Kram.h"
+#include "KramLib.h"
 
 int main(int argc, char* argv[])
 {
diff --git a/kramv/KramLoader.h b/kramv/KramLoader.h
index 1493e3e0..9fc3e768 100644
--- a/kramv/KramLoader.h
+++ b/kramv/KramLoader.h
@@ -2,7 +2,7 @@
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
-#include "KramConfig.h"
+#include "KramLib.h"
 
 #import <Foundation/Foundation.h>
 
diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index 88f2288f..6b9ee9bb 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -10,12 +10,12 @@
 //#include <algorithm> // for max
 #include <mm_malloc.h>
 
-#include "Kram.h"
-#include "KramLog.h"
-#include "KramImage.h"
-#include "KramFileHelper.h"
-#include "KramMmapHelper.h"
-#include "KTXImage.h"
+#include "KramLib.h"
+//#include "KramLog.h"
+//#include "KramImage.h"
+//#include "KramFileHelper.h"
+//#include "KramMmapHelper.h"
+//#include "KTXImage.h"
 
 using namespace kram;
 using namespace NAMESPACE_STL;
diff --git a/kramv/KramRenderer.h b/kramv/KramRenderer.h
index 028708d4..41106049 100644
--- a/kramv/KramRenderer.h
+++ b/kramv/KramRenderer.h
@@ -6,7 +6,7 @@
 #import <MetalKit/MetalKit.h>
 #import <Foundation/NSURL.h>
 
-#include "KramConfig.h"
+#include "KramLib.h"
 #import "KramShaders.h" // for TextureChannels
 
 namespace kram {
diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 27d40000..71fe716a 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -12,8 +12,6 @@
 #import "KramShaders.h"
 #import "KramLoader.h"
 
-#include "KTXImage.h"
-#include "Kram.h"
 #include "KramViewerBase.h"
 
 static const NSUInteger MaxBuffersInFlight = 3;
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index 9f22e8a6..73280cc4 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -2,14 +2,12 @@
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
-#include "KTXImage.h" // for MyMTLPixelFormat
+#include "KramLib.h" // for MyMTLPixelFormat
 
 #include <cstdint>
 //#include <string>
 //#include <simd/simd.h>
 
-#include "KramConfig.h"
-
 // All portable C++ code.
 
 namespace kram {
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index f3349093..c8cb65d7 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -12,17 +12,18 @@
 #import "KramShaders.h"
 
 // C++
-#include "KramLog.h"
-#include "KramMipper.h"
+#include "KramLib.h"
+#include "KramVersion.h" // keep kramv version in sync with libkram
+
+//#include "KramMipper.h"
 
-#include "Kram.h"
-#include "KramFileHelper.h"
-#include "KramMmapHelper.h"
-#include "KramZipHelper.h"
+//#include "Kram.h"
+//#include "KramFileHelper.h"
+//#include "KramMmapHelper.h"
+//#include "KramZipHelper.h"
 
-#include "KramImage.h"
+//#include "KramImage.h"
 #include "KramViewerBase.h"
-#include "KramVersion.h" // keep kramv version in sync with libkram
 
 #ifdef NDEBUG
 static bool doPrintPanZoom = false;
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros.h
index 5cb17c07..2bb3e39b 100644
--- a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros.h
+++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros.h
@@ -56,4 +56,4 @@
 #include "compiler/compiler.h"
 #include "arch/arch.h"
 
-#include "atomic_macros/atomic_macros.h"
+#include "atomic_macros/atomic_macros_all.h"
diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_all.h
similarity index 100%
rename from libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros.h
rename to libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_all.h
diff --git a/libkram/eastl/include/EASTL/version.h b/libkram/eastl/include/EASTL/version.h
deleted file mode 100644
index adc3f2c6..00000000
--- a/libkram/eastl/include/EASTL/version.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/////////////////////////////////////////////////////////////////////////////
-// Copyright (c) Electronic Arts Inc. All rights reserved.
-/////////////////////////////////////////////////////////////////////////////
-
-#pragma once
-
-#include <EABase/eabase.h>
-#include <EASTL/internal/config.h>
-
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index de7bb3cd..72050a09 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -57,7 +57,6 @@ namespace kram {
 
 using namespace NAMESPACE_STL;
 using namespace simd;
-using namespace heman;
 
 template <typename T>
 void pointFilterImage(int32_t w, int32_t h, const T* srcImage,
diff --git a/libkram/kram/KramLib.h b/libkram/kram/KramLib.h
new file mode 100644
index 00000000..3c49ac75
--- /dev/null
+++ b/libkram/kram/KramLib.h
@@ -0,0 +1,31 @@
+// kram - Copyright 2020 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
+
+#pragma once
+
+// This is a master header.  Can be used if turning this into a framework.
+// But found frameworks to be more difficult to use than libs.
+
+#include "KramConfig.h"
+
+// helpers
+#include "KramFileHelper.h"
+#include "KramMmapHelper.h"
+#include "KramZipHelper.h"
+#include "KramTimer.h"
+#include "KramLog.h"
+
+#include "Kram.h"
+
+#include "KramImage.h"
+#include "KramImageInfo.h"
+#include "KTXImage.h"
+#include "KramMipper.h"
+#include "KramSDFMipper.h"
+
+#include "sse2neon.h"
+#include "float4a.h"
+#include "TaskSystem.h"
+
+
diff --git a/libkram/kram/KramSDFMipper.cpp b/libkram/kram/KramSDFMipper.cpp
index d9e5e2df..df8d21eb 100644
--- a/libkram/kram/KramSDFMipper.cpp
+++ b/libkram/kram/KramSDFMipper.cpp
@@ -8,6 +8,7 @@
 
 #include "KramMipper.h"
 #include "KTXImage.h" // for mipDown
+#include "hedistance.h"
 
 namespace kram {
 using namespace heman;
@@ -70,7 +71,7 @@ void SDFMipper::mipmap(ImageData& dstImage, int32_t mipLevel)
 
     my_image dst = {w, h, 1, (uint8_t*)pixels};
 
-    heman_distance_create_sdf(&srcBitmapImage, &dst, maxD, isVerbose);
+    heman_distance_create_sdf((const heman::my_image*)&srcBitmapImage, (heman::my_image*)&dst, maxD, isVerbose);
 
     const uint8_t* srcImageData = (const uint8_t*)pixels;  // 1 byte
 
diff --git a/libkram/kram/KramSDFMipper.h b/libkram/kram/KramSDFMipper.h
index 715c66ba..7fe4f6ae 100644
--- a/libkram/kram/KramSDFMipper.h
+++ b/libkram/kram/KramSDFMipper.h
@@ -7,13 +7,19 @@
 //#include <vector>
 
 #include "KramConfig.h"
-#include "hedistance.h"
 
 namespace kram {
 using namespace NAMESPACE_STL;
 
 class ImageData;
 
+struct my_image {
+    int32_t width;
+    int32_t height;
+    int32_t numChannels;
+    uint8_t* data;
+};
+
 class SDFMipper {
 public:
     void init(ImageData& srcImage, bool isVerbose = false);
@@ -25,7 +31,7 @@ class SDFMipper {
     uint8_t threshold = 120;
     float maxD = 0.0;
     bool isVerbose = false;
-    heman::my_image srcBitmapImage;
+    my_image srcBitmapImage;
     vector<uint8_t> srcBitmap;
 };
 
diff --git a/plugin/kps/KPS.cpp b/plugin/kps/KPS.cpp
index c54e07e7..cdf3186d 100755
--- a/plugin/kps/KPS.cpp
+++ b/plugin/kps/KPS.cpp
@@ -62,10 +62,7 @@
 #define MIN(A,B)            ( (A) < (B) ? (A) : (B))
 #endif
         
-#include "Kram.h"
-#include "KTXImage.h"
-#include "KramImage.h"
-#include "KramImageInfo.h"
+#include "KramLib.h"
 
 // this is only on macOS
 #include <sys/stat.h>

From a6f0ccb983309af5080470ef58d33c7d94e04405 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 20 Aug 2021 21:11:11 -0700
Subject: [PATCH 156/901] KramLib - fix the header to not include files already
 pulled by KramConfig.h

---
 libkram/kram/KramLib.h | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/libkram/kram/KramLib.h b/libkram/kram/KramLib.h
index 3c49ac75..daee5799 100644
--- a/libkram/kram/KramLib.h
+++ b/libkram/kram/KramLib.h
@@ -24,8 +24,3 @@
 #include "KramMipper.h"
 #include "KramSDFMipper.h"
 
-#include "sse2neon.h"
-#include "float4a.h"
-#include "TaskSystem.h"
-
-

From ae288573cc8ad7ec6e0b6461cc4e325760d7b769 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 21 Aug 2021 10:21:29 -0700
Subject: [PATCH 157/901] kram - try to fix cmake for mac universal/ios, give
 up and switch to human build library project, fix warnings

Switch to BUILD_<PLATFORM> use throughout CMake files.  These currently should work for macOS and Win, but not iOS.  May continue to use for Windows, or may move to project/solution there.

Add libram project in build2 folder using Xcode 12.4 on Big Sur.   Will add kramv and others and a real workspace since CMake can't build them.  And clean build folder will now work which doesn't work at all in CMake.  Can just switch to xcodebuild to have command line builds, and jettison cmake.
---
 CMakeLists.txt                           |   69 +-
 build2/kram.xcodeproj/project.pbxproj    | 1476 ++++++++++++++++++++++
 kram/CMakeLists.txt                      |   41 +-
 libkram/CMakeLists.txt                   |   96 +-
 libkram/kram/KTXImage.cpp                |    2 +-
 libkram/kram/Kram.cpp                    |    3 -
 libkram/kram/KramConfig.h                |   20 +-
 libkram/kram/KramImage.cpp               |    2 +-
 libkram/kram/KramImageInfo.cpp           |    4 +-
 libkram/kram/KramMipper.cpp              |   12 +-
 libkram/tmpfileplus/tmpfileplus.cpp      |    2 +-
 libkram/transcoder/basisu_transcoder.cpp |    4 +-
 libkram/zstd/zstd.cpp                    |   10 +-
 13 files changed, 1646 insertions(+), 95 deletions(-)
 create mode 100644 build2/kram.xcodeproj/project.pbxproj

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 20cf0e14..a1aaf434 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -7,23 +7,25 @@ endif()
 
 #-----------------------------------------------------
 
-# https://cmake.org/cmake/help/latest/policy/CMP0077.html#policy:CMP0077
-#cmake_policy(SET CMP0077 NEW)
-
-set(BUILD_UNIX FALSE)
 set(BUILD_IOS FALSE)
+set(BUILD_MAC FALSE)
+set(BUILD_WIN FALSE)
+set(BUILD_UNIX FALSE)
+
 if (APPLE)
     if (CMAKE_SYSTEM_NAME STREQUAL "iOS")
         message("build for iOS")
         set(BUILD_IOS TRUE)
     else()
         message("build for macOS")
+        set(BUILD_MAC TRUE)
     endif()
 elseif (WIN32)
     message("build for win x64")
+    set(BUILD_WIN TRUE)
 elseif (UNIX AND NOT APPLE)
     message("build for unix")
-    set(UNIXBUILD TRUE)
+    set(BUILD_UNIX TRUE)
 endif()
 
 #-----------------------------------------------------
@@ -52,20 +54,25 @@ set(CMAKE_CXX_EXTENSIONS NO)
 # set(CMAKE_OSX_SYSROOT macos)  # this doesn't work
 
 # CMAKE_OSX_DEPLOYMENT_TARGET must be set as a CACHE variable, or it will be stripped
-if (APPLE)
-    if (BUILD_IOS)
-        set(CMAKE_OSX_DEPLOYMENT_TARGET "11.0" CACHE STRING "Minimum iOS")
-        set(CMAKE_OSX_ARCHITECTURES "$(ARCHS_STANDARD)" CACHE STRING "Architecture iOS")
-    else()
-        set(CMAKE_OSX_DEPLOYMENT_TARGET "10.15" CACHE STRING "Minimum macOS")
-        set(CMAKE_OSX_ARCHITECTURES "$(ARCHS_STANDARD)" CACHE STRING "Architecture macOS")
-    endif()
+if (BUILD_IOS)
+    set(CMAKE_OSX_DEPLOYMENT_TARGET "14.0" CACHE STRING "Minimum iOS")
+    set(CMAKE_OSX_ARCHITECTURES "arm64" CACHE STRING "Architecture iOS")
+elseif (BUILD_MAC)
+    set(CMAKE_OSX_DEPLOYMENT_TARGET "10.15" CACHE STRING "Minimum macOS")
+    set(CMAKE_OSX_ARCHITECTURES "$(ARCHS_STANDARD)" CACHE STRING "Architecture macOS")
 endif()
 
 set(CMAKE_CONFIGURATION_TYPES "Debug;Release")
 set(CMAKE_BUILD_TYPE Release)
 
-set(CMAKE_DEFAULT_STARTUP_PROJECT "kram")
+if (BUILD_MAC)
+    set(CMAKE_DEFAULT_STARTUP_PROJECT "libkram")
+elseif (BUILD_IOS)
+    set(CMAKE_DEFAULT_STARTUP_PROJECT "kram")
+elseif (BUILD_WIN)
+    set(CMAKE_DEFAULT_STARTUP_PROJECT "kram")
+endif()
+
 
 #-----------------------------------------------------
 
@@ -73,9 +80,9 @@ set(CMAKE_DEFAULT_STARTUP_PROJECT "kram")
 # No way to make xcode workspaces, but could do manually.
 set(myTargetWorkspace kramWorkspace)
 
-if (APPLE)
+if (BUILD_MAC OR BUILD_IOS)
     project(${myTargetWorkspace} LANGUAGES C CXX OBJCXX)
-else()
+elseif (BUILD_WIN)
     project(${myTargetWorkspace} LANGUAGES C CXX)
 endif()
 
@@ -87,12 +94,12 @@ add_subdirectory(libkram)
 add_subdirectory(kram)
 
 # the viewer is only written for macOS Intel/ARM currently, uses libkram
-if (APPLE)
+if (BUILD_MAC)
     add_subdirectory(kramv)
 endif()
 
 # ps plugin that uses libkram
-if (APPLE)
+if (BUILD_MAC)
     add_subdirectory(plugin)
 endif()
 
@@ -101,7 +108,7 @@ endif()
 # https://discourse.cmake.org/t/specifying-cmake-osx-sysroot-breaks-xcode-projects-but-no-other-choice/2532/8
 # use snipet from Alian Martin to validate SDK
 
-if (APPLE)
+if (BUILD_MAC OR BUILD_IOS)
     if(NOT DEFINED CMAKE_OSX_SYSROOT)
         message(FATAL_ERROR "Cannot check SDK version if CMAKE_OSX_SYSROOT is not defined."
     )
@@ -132,7 +139,8 @@ if (APPLE)
             message(FATAL_ERROR "This project requires at least iPhoneOS ${XCODE_MIN_SDK_IOS}"
         )
         endif()
-    else()
+        
+    elseif (BUILD_MAC)
         message("macOS SDK ${SDK_VERSION}")
         message("macOS deploy ${CMAKE_OSX_DEPLOYMENT_TARGET}")
         message("macOS arch ${CMAKE_OSX_ARCHITECTURES}")
@@ -146,7 +154,13 @@ endif()
 
 #-----------------------------------------------------
 
-set(BIN_DIR ${PROJECT_SOURCE_DIR}/bin)
+if (BUILD_IOS)
+    set(BIN_DIR ${PROJECT_SOURCE_DIR}/bin/ios)
+elseif (BUILD_MAC)
+    set(BIN_DIR ${PROJECT_SOURCE_DIR}/bin/mac)
+elseif (BUILD_WIN)
+    set(BIN_DIR ${PROJECT_SOURCE_DIR}/bin/win)
+endif()
 
 # So by default install depends on ALL_BUILD target, but that will fail if plugin
 # does not have everything setup to build (or like now is not building).
@@ -155,12 +169,17 @@ set(BIN_DIR ${PROJECT_SOURCE_DIR}/bin)
 
 # install doesn't seem to do anything on WIN32, the build elements are not copied
 install(TARGETS libkram ARCHIVE DESTINATION ${BIN_DIR})
-install(TARGETS kram RUNTIME DESTINATION ${BIN_DIR})
-if (APPLE)
+
+if (BUILD_MAC OR BUILD_WIN)
+    install(TARGETS kram RUNTIME DESTINATION ${BIN_DIR})
+endif()
+
+if (BUILD_MAC)
 	install(TARGETS kramv BUNDLE DESTINATION ${BIN_DIR})
 endif()
-# don't install this
-#if (APPLE)
+
+# don't install this yet
+#if (BUILD_MAC)
 #    install(TARGETS kram-ps BUNDLE DESTINATION ${BIN_DIR})
 #endif()
 
diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
new file mode 100644
index 00000000..1a13b76a
--- /dev/null
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -0,0 +1,1476 @@
+// !$*UTF8*$!
+{
+	archiveVersion = 1;
+	classes = {
+	};
+	objectVersion = 50;
+	objects = {
+
+/* Begin PBXBuildFile section */
+		706EEF7F26D1595D001C950E /* EtcBlock4x4Encoding_RGB8.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDAA26D1583E001C950E /* EtcBlock4x4Encoding_RGB8.cpp */; };
+		706EEF8026D1595D001C950E /* EtcImage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDAC26D1583E001C950E /* EtcImage.cpp */; };
+		706EEF8126D1595D001C950E /* EtcDifferentialTrys.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDAF26D1583E001C950E /* EtcDifferentialTrys.cpp */; };
+		706EEF8226D1595D001C950E /* EtcMath.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDB126D1583E001C950E /* EtcMath.cpp */; };
+		706EEF8326D1595D001C950E /* EtcBlock4x4Encoding_RGBA8.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDB226D1583E001C950E /* EtcBlock4x4Encoding_RGBA8.cpp */; };
+		706EEF8426D1595D001C950E /* EtcBlock4x4Encoding_RG11.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDB326D1583E001C950E /* EtcBlock4x4Encoding_RG11.cpp */; };
+		706EEF8526D1595D001C950E /* EtcBlock4x4Encoding_RGB8A1.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDB626D1583E001C950E /* EtcBlock4x4Encoding_RGB8A1.cpp */; };
+		706EEF8726D1595D001C950E /* EtcIndividualTrys.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDBB26D1583E001C950E /* EtcIndividualTrys.cpp */; };
+		706EEF8826D1595D001C950E /* EtcBlock4x4Encoding_R11.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDBC26D1583E001C950E /* EtcBlock4x4Encoding_R11.cpp */; };
+		706EEF8926D1595D001C950E /* EtcBlock4x4Encoding_ETC1.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDBF26D1583E001C950E /* EtcBlock4x4Encoding_ETC1.cpp */; };
+		706EEF8A26D1595D001C950E /* EtcBlock4x4Encoding.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDC526D1583E001C950E /* EtcBlock4x4Encoding.cpp */; };
+		706EEF8B26D1595D001C950E /* EtcBlock4x4.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDC626D1583E001C950E /* EtcBlock4x4.cpp */; };
+		706EEF8C26D1595D001C950E /* bc7decomp.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDCE26D1583E001C950E /* bc7decomp.cpp */; };
+		706EEF8D26D1595D001C950E /* bc7enc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDD026D1583E001C950E /* bc7enc.cpp */; };
+		706EEF8E26D1595D001C950E /* astcenc_pick_best_endpoint_format.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDD326D1583E001C950E /* astcenc_pick_best_endpoint_format.cpp */; };
+		706EEF8F26D1595D001C950E /* astcenc_integer_sequence.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDD526D1583E001C950E /* astcenc_integer_sequence.cpp */; };
+		706EEF9026D1595D001C950E /* astcenc_compute_variance.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDD726D1583E001C950E /* astcenc_compute_variance.cpp */; };
+		706EEF9126D1595D001C950E /* astcenc_quantization.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDD826D1583E001C950E /* astcenc_quantization.cpp */; };
+		706EEF9226D1595D001C950E /* astcenc_color_unquantize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDD926D1583E001C950E /* astcenc_color_unquantize.cpp */; };
+		706EEF9326D1595D001C950E /* astcenc_mathlib_softfloat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDDA26D1583E001C950E /* astcenc_mathlib_softfloat.cpp */; };
+		706EEF9426D1595D001C950E /* astcenc_weight_quant_xfer_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDDB26D1583E001C950E /* astcenc_weight_quant_xfer_tables.cpp */; };
+		706EEF9526D1595D001C950E /* astcenc_encoding_choice_error.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDDD26D1583E001C950E /* astcenc_encoding_choice_error.cpp */; };
+		706EEF9626D1595D001C950E /* astcenc_percentile_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDDE26D1583E001C950E /* astcenc_percentile_tables.cpp */; };
+		706EEF9726D1595D001C950E /* astcenc_partition_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDDF26D1583E001C950E /* astcenc_partition_tables.cpp */; };
+		706EEF9826D1595D001C950E /* astcenc_decompress_symbolic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDE026D1583E001C950E /* astcenc_decompress_symbolic.cpp */; };
+		706EEF9926D1595D001C950E /* astcenc_color_quantize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDE126D1583E001C950E /* astcenc_color_quantize.cpp */; };
+		706EEF9A26D1595D001C950E /* astcenc_platform_isa_detection.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDE326D1583E001C950E /* astcenc_platform_isa_detection.cpp */; };
+		706EEF9B26D1595D001C950E /* astcenc_image.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDE426D1583E001C950E /* astcenc_image.cpp */; };
+		706EEF9C26D1595D001C950E /* astcenc_kmeans_partitioning.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDE626D1583E001C950E /* astcenc_kmeans_partitioning.cpp */; };
+		706EEF9D26D1595D001C950E /* astcenc_compress_symbolic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDE826D1583E001C950E /* astcenc_compress_symbolic.cpp */; };
+		706EEF9E26D1595D001C950E /* astcenc_ideal_endpoints_and_weights.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDE926D1583E001C950E /* astcenc_ideal_endpoints_and_weights.cpp */; };
+		706EEF9F26D1595D001C950E /* astcenc_mathlib.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDEB26D1583E001C950E /* astcenc_mathlib.cpp */; };
+		706EEFA026D1595D001C950E /* astcenc_find_best_partitioning.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDEC26D1583E001C950E /* astcenc_find_best_partitioning.cpp */; };
+		706EEFA126D1595D001C950E /* astcenc_diagnostic_trace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDEE26D1583E001C950E /* astcenc_diagnostic_trace.cpp */; };
+		706EEFA226D1595D001C950E /* astcenc_symbolic_physical.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDEF26D1583E001C950E /* astcenc_symbolic_physical.cpp */; };
+		706EEFA326D1595D001C950E /* astcenc_weight_align.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDF026D1583E001C950E /* astcenc_weight_align.cpp */; };
+		706EEFA426D1595D001C950E /* astcenc_block_sizes2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDF226D1583E001C950E /* astcenc_block_sizes2.cpp */; };
+		706EEFA526D1595D001C950E /* astcenc_entry.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDF326D1583E001C950E /* astcenc_entry.cpp */; };
+		706EEFA626D1595D001C950E /* astcenc_averages_and_directions.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDF526D1583E001C950E /* astcenc_averages_and_directions.cpp */; };
+		706EEFA726D1595D001C950E /* basisu_transcoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE0426D1583F001C950E /* basisu_transcoder.cpp */; };
+		706EEFA826D1595D001C950E /* miniz.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1126D1583F001C950E /* miniz.cpp */; };
+		706EEFA926D1595D001C950E /* hedistance.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1426D1583F001C950E /* hedistance.cpp */; };
+		706EEFAA26D1595D001C950E /* KramTimer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1A26D1583F001C950E /* KramTimer.cpp */; };
+		706EEFAB26D1595D001C950E /* KTXImage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1B26D1583F001C950E /* KTXImage.cpp */; };
+		706EEFAC26D1595D001C950E /* KramMipper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1C26D1583F001C950E /* KramMipper.cpp */; };
+		706EEFAD26D1595D001C950E /* KramZipHelper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1E26D1583F001C950E /* KramZipHelper.cpp */; };
+		706EEFAE26D1595D001C950E /* TaskSystem.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1F26D1583F001C950E /* TaskSystem.cpp */; };
+		706EEFAF26D1595D001C950E /* KramFileHelper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2126D1583F001C950E /* KramFileHelper.cpp */; };
+		706EEFB026D1595D001C950E /* KramImageInfo.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2526D1583F001C950E /* KramImageInfo.cpp */; };
+		706EEFB126D1595D001C950E /* KramImage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2626D1583F001C950E /* KramImage.cpp */; };
+		706EEFB226D1595D001C950E /* KramLog.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2826D1583F001C950E /* KramLog.cpp */; };
+		706EEFB326D1595D001C950E /* KramSDFMipper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2B26D1583F001C950E /* KramSDFMipper.cpp */; };
+		706EEFB426D1595D001C950E /* KramMmapHelper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2C26D1583F001C950E /* KramMmapHelper.cpp */; };
+		706EEFB526D1595D001C950E /* float4a.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2F26D1583F001C950E /* float4a.cpp */; };
+		706EEFB626D1595D001C950E /* Kram.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE3526D1583F001C950E /* Kram.cpp */; };
+		706EEFB726D1595D001C950E /* squish.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE3D26D1583F001C950E /* squish.cpp */; };
+		706EEFB826D1595D001C950E /* colourset.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE3E26D1583F001C950E /* colourset.cpp */; };
+		706EEFB926D1595D001C950E /* clusterfit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE4226D1583F001C950E /* clusterfit.cpp */; };
+		706EEFBB26D1595D001C950E /* rangefit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE4426D1583F001C950E /* rangefit.cpp */; };
+		706EEFBC26D1595D001C950E /* alpha.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE4626D1583F001C950E /* alpha.cpp */; };
+		706EEFBD26D1595D001C950E /* colourblock.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE4726D1583F001C950E /* colourblock.cpp */; };
+		706EEFBE26D1595E001C950E /* colourfit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE4926D1583F001C950E /* colourfit.cpp */; };
+		706EEFC026D1595E001C950E /* maths.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE4D26D1583F001C950E /* maths.cpp */; };
+		706EEFC126D1595E001C950E /* singlecolourfit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE4E26D1583F001C950E /* singlecolourfit.cpp */; };
+		706EEFC226D1595E001C950E /* zstd.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE5026D1583F001C950E /* zstd.cpp */; };
+		706EEFC326D1595E001C950E /* zstddeclib.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE5126D1583F001C950E /* zstddeclib.cpp */; };
+		706EEFC426D1595E001C950E /* lodepng.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE5626D1583F001C950E /* lodepng.cpp */; };
+		706EEFC526D1595E001C950E /* tmpfileplus.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE5826D1583F001C950E /* tmpfileplus.cpp */; };
+		706EEFD126D15984001C950E /* EtcErrorMetric.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDAB26D1583E001C950E /* EtcErrorMetric.h */; };
+		706EEFD226D15984001C950E /* EtcColor.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDAD26D1583E001C950E /* EtcColor.h */; };
+		706EEFD326D15984001C950E /* EtcDifferentialTrys.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDAE26D1583E001C950E /* EtcDifferentialTrys.h */; };
+		706EEFD426D15984001C950E /* EtcBlock4x4Encoding_RGB8.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDB026D1583E001C950E /* EtcBlock4x4Encoding_RGB8.h */; };
+		706EEFD526D15984001C950E /* EtcConfig.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDB426D1583E001C950E /* EtcConfig.h */; };
+		706EEFD626D15984001C950E /* EtcBlock4x4Encoding_R11.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDB526D1583E001C950E /* EtcBlock4x4Encoding_R11.h */; };
+		706EEFD726D15984001C950E /* EtcBlock4x4Encoding_RG11.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDB726D1583E001C950E /* EtcBlock4x4Encoding_RG11.h */; };
+		706EEFD826D15984001C950E /* EtcMath.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDB926D1583E001C950E /* EtcMath.h */; };
+		706EEFD926D15984001C950E /* EtcIndividualTrys.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDBA26D1583E001C950E /* EtcIndividualTrys.h */; };
+		706EEFDA26D15984001C950E /* EtcBlock4x4EncodingBits.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDBD26D1583E001C950E /* EtcBlock4x4EncodingBits.h */; };
+		706EEFDB26D15984001C950E /* EtcBlock4x4Encoding_RGB8A1.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDBE26D1583E001C950E /* EtcBlock4x4Encoding_RGB8A1.h */; };
+		706EEFDC26D15984001C950E /* EtcBlock4x4.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC026D1583E001C950E /* EtcBlock4x4.h */; };
+		706EEFDD26D15984001C950E /* Etc.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC126D1583E001C950E /* Etc.h */; };
+		706EEFDE26D15984001C950E /* EtcImage.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC226D1583E001C950E /* EtcImage.h */; };
+		706EEFDF26D15984001C950E /* EtcBlock4x4Encoding_ETC1.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC326D1583E001C950E /* EtcBlock4x4Encoding_ETC1.h */; };
+		706EEFE026D15984001C950E /* EtcBlock4x4Encoding_RGBA8.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC426D1583E001C950E /* EtcBlock4x4Encoding_RGBA8.h */; };
+		706EEFE126D15984001C950E /* EtcColorFloatRGBA.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC726D1583E001C950E /* EtcColorFloatRGBA.h */; };
+		706EEFE226D15984001C950E /* EtcBlock4x4Encoding.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC826D1583E001C950E /* EtcBlock4x4Encoding.h */; };
+		706EEFE326D15984001C950E /* rgbcx.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDCB26D1583E001C950E /* rgbcx.h */; };
+		706EEFE426D15984001C950E /* bc7enc.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDCC26D1583E001C950E /* bc7enc.h */; };
+		706EEFE526D15984001C950E /* bc7decomp.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDCD26D1583E001C950E /* bc7decomp.h */; };
+		706EEFE626D15984001C950E /* rgbcx_table4.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDD126D1583E001C950E /* rgbcx_table4.h */; };
+		706EEFE726D15984001C950E /* astcenc_diagnostic_trace.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDD426D1583E001C950E /* astcenc_diagnostic_trace.h */; };
+		706EEFE826D15984001C950E /* astcenc_vecmathlib.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDD626D1583E001C950E /* astcenc_vecmathlib.h */; };
+		706EEFE926D15984001C950E /* astcenc_vecmathlib_avx2_8.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDDC26D1583E001C950E /* astcenc_vecmathlib_avx2_8.h */; };
+		706EEFEA26D15984001C950E /* astcenc.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDE226D1583E001C950E /* astcenc.h */; };
+		706EEFEB26D15984001C950E /* astcenc_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDE526D1583E001C950E /* astcenc_internal.h */; };
+		706EEFEC26D15984001C950E /* astcenc_vecmathlib_neon_armv7_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDE726D1583E001C950E /* astcenc_vecmathlib_neon_armv7_4.h */; };
+		706EEFED26D15984001C950E /* astcenc_vecmathlib_sse_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDEA26D1583E001C950E /* astcenc_vecmathlib_sse_4.h */; };
+		706EEFEE26D15984001C950E /* astcenc_vecmathlib_neon_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDED26D1583E001C950E /* astcenc_vecmathlib_neon_4.h */; };
+		706EEFEF26D15984001C950E /* astcenc_vecmathlib_none_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDF126D1583E001C950E /* astcenc_vecmathlib_none_4.h */; };
+		706EEFF026D15984001C950E /* astcenc_vecmathlib_common_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDF426D1583E001C950E /* astcenc_vecmathlib_common_4.h */; };
+		706EEFF126D15984001C950E /* astcenc_mathlib.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDF626D1583E001C950E /* astcenc_mathlib.h */; };
+		706EEFF226D15984001C950E /* ateencoder.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFA26D1583E001C950E /* ateencoder.h */; };
+		706EEFF326D15984001C950E /* basisu_transcoder.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFC26D1583E001C950E /* basisu_transcoder.h */; };
+		706EEFF426D15984001C950E /* basisu_containers.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFD26D1583E001C950E /* basisu_containers.h */; };
+		706EEFF526D15985001C950E /* basisu_containers_impl.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFF26D1583E001C950E /* basisu_containers_impl.h */; };
+		706EEFF626D15985001C950E /* basisu_transcoder_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE0226D1583F001C950E /* basisu_transcoder_internal.h */; };
+		706EEFF726D15985001C950E /* basisu_global_selector_cb.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE0326D1583F001C950E /* basisu_global_selector_cb.h */; };
+		706EEFF826D15985001C950E /* basisu_transcoder_uastc.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE0526D1583F001C950E /* basisu_transcoder_uastc.h */; };
+		706EEFF926D15985001C950E /* basisu_global_selector_palette.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE0626D1583F001C950E /* basisu_global_selector_palette.h */; };
+		706EEFFA26D15985001C950E /* basisu.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE0C26D1583F001C950E /* basisu.h */; };
+		706EEFFB26D15985001C950E /* basisu_file_headers.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE0E26D1583F001C950E /* basisu_file_headers.h */; };
+		706EEFFC26D15985001C950E /* miniz.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE1226D1583F001C950E /* miniz.h */; };
+		706EEFFD26D15985001C950E /* hedistance.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE1526D1583F001C950E /* hedistance.h */; };
+		706EEFFE26D15985001C950E /* stb_rect_pack.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE1726D1583F001C950E /* stb_rect_pack.h */; };
+		706EEFFF26D15985001C950E /* KramZipHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE1926D1583F001C950E /* KramZipHelper.h */; };
+		706EF00026D15985001C950E /* KramSDFMipper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2026D1583F001C950E /* KramSDFMipper.h */; };
+		706EF00126D15985001C950E /* sse2neon.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2226D1583F001C950E /* sse2neon.h */; };
+		706EF00226D15985001C950E /* KramConfig.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2326D1583F001C950E /* KramConfig.h */; };
+		706EF00326D15985001C950E /* KramLog.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2426D1583F001C950E /* KramLog.h */; };
+		706EF00426D15985001C950E /* KramLib.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2726D1583F001C950E /* KramLib.h */; };
+		706EF00526D15985001C950E /* KramVersion.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2926D1583F001C950E /* KramVersion.h */; };
+		706EF00626D15985001C950E /* KramImage.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2A26D1583F001C950E /* KramImage.h */; };
+		706EF00726D15985001C950E /* win_mmap.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2D26D1583F001C950E /* win_mmap.h */; };
+		706EF00826D15985001C950E /* Kram.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2E26D1583F001C950E /* Kram.h */; };
+		706EF00926D15985001C950E /* KTXImage.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3026D1583F001C950E /* KTXImage.h */; };
+		706EF00A26D15985001C950E /* KramImageInfo.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3126D1583F001C950E /* KramImageInfo.h */; };
+		706EF00B26D15985001C950E /* KramTimer.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3226D1583F001C950E /* KramTimer.h */; };
+		706EF00C26D15985001C950E /* KramMmapHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3326D1583F001C950E /* KramMmapHelper.h */; };
+		706EF00D26D15985001C950E /* float4a.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3426D1583F001C950E /* float4a.h */; };
+		706EF00E26D15985001C950E /* KramFileHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3626D1583F001C950E /* KramFileHelper.h */; };
+		706EF00F26D15985001C950E /* KramMipper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3726D1583F001C950E /* KramMipper.h */; };
+		706EF01026D15985001C950E /* TaskSystem.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3826D1583F001C950E /* TaskSystem.h */; };
+		706EF01126D15985001C950E /* squish.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3A26D1583F001C950E /* squish.h */; };
+		706EF01226D15985001C950E /* clusterfit.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3B26D1583F001C950E /* clusterfit.h */; };
+		706EF01326D15985001C950E /* colourfit.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3C26D1583F001C950E /* colourfit.h */; };
+		706EF01426D15985001C950E /* alpha.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3F26D1583F001C950E /* alpha.h */; };
+		706EF01526D15985001C950E /* singlecolourfit.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE4126D1583F001C950E /* singlecolourfit.h */; };
+		706EF01626D15985001C950E /* maths.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE4526D1583F001C950E /* maths.h */; };
+		706EF01726D15985001C950E /* colourset.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE4826D1583F001C950E /* colourset.h */; };
+		706EF01826D15985001C950E /* colourblock.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE4A26D1583F001C950E /* colourblock.h */; };
+		706EF01926D15985001C950E /* rangefit.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE4B26D1583F001C950E /* rangefit.h */; };
+		706EF01A26D15985001C950E /* zstd.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE5226D1583F001C950E /* zstd.h */; };
+		706EF01B26D15985001C950E /* lodepng.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE5426D1583F001C950E /* lodepng.h */; };
+		706EF01C26D15985001C950E /* tmpfileplus.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE5926D1583F001C950E /* tmpfileplus.h */; };
+		706EF12B26D159F9001C950E /* libate.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF12A26D159F9001C950E /* libate.tbd */; };
+		706EF14B26D166C5001C950E /* EtcErrorMetric.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDAB26D1583E001C950E /* EtcErrorMetric.h */; };
+		706EF14C26D166C5001C950E /* EtcColor.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDAD26D1583E001C950E /* EtcColor.h */; };
+		706EF14D26D166C5001C950E /* EtcDifferentialTrys.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDAE26D1583E001C950E /* EtcDifferentialTrys.h */; };
+		706EF14E26D166C5001C950E /* EtcBlock4x4Encoding_RGB8.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDB026D1583E001C950E /* EtcBlock4x4Encoding_RGB8.h */; };
+		706EF14F26D166C5001C950E /* EtcConfig.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDB426D1583E001C950E /* EtcConfig.h */; };
+		706EF15026D166C5001C950E /* EtcBlock4x4Encoding_R11.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDB526D1583E001C950E /* EtcBlock4x4Encoding_R11.h */; };
+		706EF15126D166C5001C950E /* EtcBlock4x4Encoding_RG11.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDB726D1583E001C950E /* EtcBlock4x4Encoding_RG11.h */; };
+		706EF15226D166C5001C950E /* EtcMath.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDB926D1583E001C950E /* EtcMath.h */; };
+		706EF15326D166C5001C950E /* EtcIndividualTrys.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDBA26D1583E001C950E /* EtcIndividualTrys.h */; };
+		706EF15426D166C5001C950E /* EtcBlock4x4EncodingBits.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDBD26D1583E001C950E /* EtcBlock4x4EncodingBits.h */; };
+		706EF15526D166C5001C950E /* EtcBlock4x4Encoding_RGB8A1.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDBE26D1583E001C950E /* EtcBlock4x4Encoding_RGB8A1.h */; };
+		706EF15626D166C5001C950E /* EtcBlock4x4.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC026D1583E001C950E /* EtcBlock4x4.h */; };
+		706EF15726D166C5001C950E /* Etc.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC126D1583E001C950E /* Etc.h */; };
+		706EF15826D166C5001C950E /* EtcImage.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC226D1583E001C950E /* EtcImage.h */; };
+		706EF15926D166C5001C950E /* EtcBlock4x4Encoding_ETC1.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC326D1583E001C950E /* EtcBlock4x4Encoding_ETC1.h */; };
+		706EF15A26D166C5001C950E /* EtcBlock4x4Encoding_RGBA8.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC426D1583E001C950E /* EtcBlock4x4Encoding_RGBA8.h */; };
+		706EF15B26D166C5001C950E /* EtcColorFloatRGBA.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC726D1583E001C950E /* EtcColorFloatRGBA.h */; };
+		706EF15C26D166C5001C950E /* EtcBlock4x4Encoding.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC826D1583E001C950E /* EtcBlock4x4Encoding.h */; };
+		706EF15D26D166C5001C950E /* rgbcx.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDCB26D1583E001C950E /* rgbcx.h */; };
+		706EF15E26D166C5001C950E /* bc7enc.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDCC26D1583E001C950E /* bc7enc.h */; };
+		706EF15F26D166C5001C950E /* bc7decomp.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDCD26D1583E001C950E /* bc7decomp.h */; };
+		706EF16026D166C5001C950E /* rgbcx_table4.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDD126D1583E001C950E /* rgbcx_table4.h */; };
+		706EF16126D166C5001C950E /* astcenc_diagnostic_trace.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDD426D1583E001C950E /* astcenc_diagnostic_trace.h */; };
+		706EF16226D166C5001C950E /* astcenc_vecmathlib.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDD626D1583E001C950E /* astcenc_vecmathlib.h */; };
+		706EF16326D166C5001C950E /* astcenc_vecmathlib_avx2_8.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDDC26D1583E001C950E /* astcenc_vecmathlib_avx2_8.h */; };
+		706EF16426D166C5001C950E /* astcenc.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDE226D1583E001C950E /* astcenc.h */; };
+		706EF16526D166C5001C950E /* astcenc_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDE526D1583E001C950E /* astcenc_internal.h */; };
+		706EF16626D166C5001C950E /* astcenc_vecmathlib_neon_armv7_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDE726D1583E001C950E /* astcenc_vecmathlib_neon_armv7_4.h */; };
+		706EF16726D166C5001C950E /* astcenc_vecmathlib_sse_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDEA26D1583E001C950E /* astcenc_vecmathlib_sse_4.h */; };
+		706EF16826D166C5001C950E /* astcenc_vecmathlib_neon_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDED26D1583E001C950E /* astcenc_vecmathlib_neon_4.h */; };
+		706EF16926D166C5001C950E /* astcenc_vecmathlib_none_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDF126D1583E001C950E /* astcenc_vecmathlib_none_4.h */; };
+		706EF16A26D166C5001C950E /* astcenc_vecmathlib_common_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDF426D1583E001C950E /* astcenc_vecmathlib_common_4.h */; };
+		706EF16B26D166C5001C950E /* astcenc_mathlib.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDF626D1583E001C950E /* astcenc_mathlib.h */; };
+		706EF16C26D166C5001C950E /* ateencoder.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFA26D1583E001C950E /* ateencoder.h */; };
+		706EF16D26D166C5001C950E /* basisu_transcoder.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFC26D1583E001C950E /* basisu_transcoder.h */; };
+		706EF16E26D166C5001C950E /* basisu_containers.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFD26D1583E001C950E /* basisu_containers.h */; };
+		706EF16F26D166C5001C950E /* basisu_containers_impl.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFF26D1583E001C950E /* basisu_containers_impl.h */; };
+		706EF17026D166C5001C950E /* basisu_transcoder_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE0226D1583F001C950E /* basisu_transcoder_internal.h */; };
+		706EF17126D166C5001C950E /* basisu_global_selector_cb.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE0326D1583F001C950E /* basisu_global_selector_cb.h */; };
+		706EF17226D166C5001C950E /* basisu_transcoder_uastc.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE0526D1583F001C950E /* basisu_transcoder_uastc.h */; };
+		706EF17326D166C5001C950E /* basisu_global_selector_palette.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE0626D1583F001C950E /* basisu_global_selector_palette.h */; };
+		706EF17426D166C5001C950E /* basisu.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE0C26D1583F001C950E /* basisu.h */; };
+		706EF17526D166C5001C950E /* basisu_file_headers.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE0E26D1583F001C950E /* basisu_file_headers.h */; };
+		706EF17626D166C5001C950E /* miniz.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE1226D1583F001C950E /* miniz.h */; };
+		706EF17726D166C5001C950E /* hedistance.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE1526D1583F001C950E /* hedistance.h */; };
+		706EF17826D166C5001C950E /* stb_rect_pack.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE1726D1583F001C950E /* stb_rect_pack.h */; };
+		706EF17926D166C5001C950E /* KramZipHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE1926D1583F001C950E /* KramZipHelper.h */; };
+		706EF17A26D166C5001C950E /* KramSDFMipper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2026D1583F001C950E /* KramSDFMipper.h */; };
+		706EF17B26D166C5001C950E /* sse2neon.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2226D1583F001C950E /* sse2neon.h */; };
+		706EF17C26D166C5001C950E /* KramConfig.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2326D1583F001C950E /* KramConfig.h */; };
+		706EF17D26D166C5001C950E /* KramLog.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2426D1583F001C950E /* KramLog.h */; };
+		706EF17E26D166C5001C950E /* KramLib.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2726D1583F001C950E /* KramLib.h */; };
+		706EF17F26D166C5001C950E /* KramVersion.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2926D1583F001C950E /* KramVersion.h */; };
+		706EF18026D166C5001C950E /* KramImage.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2A26D1583F001C950E /* KramImage.h */; };
+		706EF18126D166C5001C950E /* win_mmap.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2D26D1583F001C950E /* win_mmap.h */; };
+		706EF18226D166C5001C950E /* Kram.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2E26D1583F001C950E /* Kram.h */; };
+		706EF18326D166C5001C950E /* KTXImage.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3026D1583F001C950E /* KTXImage.h */; };
+		706EF18426D166C5001C950E /* KramImageInfo.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3126D1583F001C950E /* KramImageInfo.h */; };
+		706EF18526D166C5001C950E /* KramTimer.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3226D1583F001C950E /* KramTimer.h */; };
+		706EF18626D166C5001C950E /* KramMmapHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3326D1583F001C950E /* KramMmapHelper.h */; };
+		706EF18726D166C5001C950E /* float4a.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3426D1583F001C950E /* float4a.h */; };
+		706EF18826D166C5001C950E /* KramFileHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3626D1583F001C950E /* KramFileHelper.h */; };
+		706EF18926D166C5001C950E /* KramMipper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3726D1583F001C950E /* KramMipper.h */; };
+		706EF18A26D166C5001C950E /* TaskSystem.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3826D1583F001C950E /* TaskSystem.h */; };
+		706EF18B26D166C5001C950E /* squish.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3A26D1583F001C950E /* squish.h */; };
+		706EF18C26D166C5001C950E /* clusterfit.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3B26D1583F001C950E /* clusterfit.h */; };
+		706EF18D26D166C5001C950E /* colourfit.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3C26D1583F001C950E /* colourfit.h */; };
+		706EF18E26D166C5001C950E /* alpha.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3F26D1583F001C950E /* alpha.h */; };
+		706EF18F26D166C5001C950E /* singlecolourfit.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE4126D1583F001C950E /* singlecolourfit.h */; };
+		706EF19026D166C5001C950E /* maths.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE4526D1583F001C950E /* maths.h */; };
+		706EF19126D166C5001C950E /* colourset.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE4826D1583F001C950E /* colourset.h */; };
+		706EF19226D166C5001C950E /* colourblock.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE4A26D1583F001C950E /* colourblock.h */; };
+		706EF19326D166C5001C950E /* rangefit.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE4B26D1583F001C950E /* rangefit.h */; };
+		706EF19426D166C5001C950E /* zstd.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE5226D1583F001C950E /* zstd.h */; };
+		706EF19526D166C5001C950E /* lodepng.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE5426D1583F001C950E /* lodepng.h */; };
+		706EF19626D166C5001C950E /* tmpfileplus.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE5926D1583F001C950E /* tmpfileplus.h */; };
+		706EF19826D166C5001C950E /* EtcBlock4x4Encoding_RGB8.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDAA26D1583E001C950E /* EtcBlock4x4Encoding_RGB8.cpp */; };
+		706EF19926D166C5001C950E /* EtcImage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDAC26D1583E001C950E /* EtcImage.cpp */; };
+		706EF19A26D166C5001C950E /* EtcDifferentialTrys.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDAF26D1583E001C950E /* EtcDifferentialTrys.cpp */; };
+		706EF19B26D166C5001C950E /* EtcMath.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDB126D1583E001C950E /* EtcMath.cpp */; };
+		706EF19C26D166C5001C950E /* EtcBlock4x4Encoding_RGBA8.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDB226D1583E001C950E /* EtcBlock4x4Encoding_RGBA8.cpp */; };
+		706EF19D26D166C5001C950E /* EtcBlock4x4Encoding_RG11.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDB326D1583E001C950E /* EtcBlock4x4Encoding_RG11.cpp */; };
+		706EF19E26D166C5001C950E /* EtcBlock4x4Encoding_RGB8A1.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDB626D1583E001C950E /* EtcBlock4x4Encoding_RGB8A1.cpp */; };
+		706EF19F26D166C5001C950E /* EtcIndividualTrys.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDBB26D1583E001C950E /* EtcIndividualTrys.cpp */; };
+		706EF1A026D166C5001C950E /* EtcBlock4x4Encoding_R11.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDBC26D1583E001C950E /* EtcBlock4x4Encoding_R11.cpp */; };
+		706EF1A126D166C5001C950E /* EtcBlock4x4Encoding_ETC1.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDBF26D1583E001C950E /* EtcBlock4x4Encoding_ETC1.cpp */; };
+		706EF1A226D166C5001C950E /* EtcBlock4x4Encoding.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDC526D1583E001C950E /* EtcBlock4x4Encoding.cpp */; };
+		706EF1A326D166C5001C950E /* EtcBlock4x4.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDC626D1583E001C950E /* EtcBlock4x4.cpp */; };
+		706EF1A426D166C5001C950E /* bc7decomp.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDCE26D1583E001C950E /* bc7decomp.cpp */; };
+		706EF1A526D166C5001C950E /* bc7enc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDD026D1583E001C950E /* bc7enc.cpp */; };
+		706EF1A626D166C5001C950E /* astcenc_pick_best_endpoint_format.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDD326D1583E001C950E /* astcenc_pick_best_endpoint_format.cpp */; };
+		706EF1A726D166C5001C950E /* astcenc_integer_sequence.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDD526D1583E001C950E /* astcenc_integer_sequence.cpp */; };
+		706EF1A826D166C5001C950E /* astcenc_compute_variance.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDD726D1583E001C950E /* astcenc_compute_variance.cpp */; };
+		706EF1A926D166C5001C950E /* astcenc_quantization.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDD826D1583E001C950E /* astcenc_quantization.cpp */; };
+		706EF1AA26D166C5001C950E /* astcenc_color_unquantize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDD926D1583E001C950E /* astcenc_color_unquantize.cpp */; };
+		706EF1AB26D166C5001C950E /* astcenc_mathlib_softfloat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDDA26D1583E001C950E /* astcenc_mathlib_softfloat.cpp */; };
+		706EF1AC26D166C5001C950E /* astcenc_weight_quant_xfer_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDDB26D1583E001C950E /* astcenc_weight_quant_xfer_tables.cpp */; };
+		706EF1AD26D166C5001C950E /* astcenc_encoding_choice_error.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDDD26D1583E001C950E /* astcenc_encoding_choice_error.cpp */; };
+		706EF1AE26D166C5001C950E /* astcenc_percentile_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDDE26D1583E001C950E /* astcenc_percentile_tables.cpp */; };
+		706EF1AF26D166C5001C950E /* astcenc_partition_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDDF26D1583E001C950E /* astcenc_partition_tables.cpp */; };
+		706EF1B026D166C5001C950E /* astcenc_decompress_symbolic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDE026D1583E001C950E /* astcenc_decompress_symbolic.cpp */; };
+		706EF1B126D166C5001C950E /* astcenc_color_quantize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDE126D1583E001C950E /* astcenc_color_quantize.cpp */; };
+		706EF1B226D166C5001C950E /* astcenc_platform_isa_detection.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDE326D1583E001C950E /* astcenc_platform_isa_detection.cpp */; };
+		706EF1B326D166C5001C950E /* astcenc_image.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDE426D1583E001C950E /* astcenc_image.cpp */; };
+		706EF1B426D166C5001C950E /* astcenc_kmeans_partitioning.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDE626D1583E001C950E /* astcenc_kmeans_partitioning.cpp */; };
+		706EF1B526D166C5001C950E /* astcenc_compress_symbolic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDE826D1583E001C950E /* astcenc_compress_symbolic.cpp */; };
+		706EF1B626D166C5001C950E /* astcenc_ideal_endpoints_and_weights.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDE926D1583E001C950E /* astcenc_ideal_endpoints_and_weights.cpp */; };
+		706EF1B726D166C5001C950E /* astcenc_mathlib.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDEB26D1583E001C950E /* astcenc_mathlib.cpp */; };
+		706EF1B826D166C5001C950E /* astcenc_find_best_partitioning.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDEC26D1583E001C950E /* astcenc_find_best_partitioning.cpp */; };
+		706EF1B926D166C5001C950E /* astcenc_diagnostic_trace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDEE26D1583E001C950E /* astcenc_diagnostic_trace.cpp */; };
+		706EF1BA26D166C5001C950E /* astcenc_symbolic_physical.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDEF26D1583E001C950E /* astcenc_symbolic_physical.cpp */; };
+		706EF1BB26D166C5001C950E /* astcenc_weight_align.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDF026D1583E001C950E /* astcenc_weight_align.cpp */; };
+		706EF1BC26D166C5001C950E /* astcenc_block_sizes2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDF226D1583E001C950E /* astcenc_block_sizes2.cpp */; };
+		706EF1BD26D166C5001C950E /* astcenc_entry.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDF326D1583E001C950E /* astcenc_entry.cpp */; };
+		706EF1BE26D166C5001C950E /* astcenc_averages_and_directions.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDF526D1583E001C950E /* astcenc_averages_and_directions.cpp */; };
+		706EF1BF26D166C5001C950E /* basisu_transcoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE0426D1583F001C950E /* basisu_transcoder.cpp */; };
+		706EF1C026D166C5001C950E /* miniz.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1126D1583F001C950E /* miniz.cpp */; };
+		706EF1C126D166C5001C950E /* hedistance.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1426D1583F001C950E /* hedistance.cpp */; };
+		706EF1C226D166C5001C950E /* KramTimer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1A26D1583F001C950E /* KramTimer.cpp */; };
+		706EF1C326D166C5001C950E /* KTXImage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1B26D1583F001C950E /* KTXImage.cpp */; };
+		706EF1C426D166C5001C950E /* KramMipper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1C26D1583F001C950E /* KramMipper.cpp */; };
+		706EF1C526D166C5001C950E /* KramZipHelper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1E26D1583F001C950E /* KramZipHelper.cpp */; };
+		706EF1C626D166C5001C950E /* TaskSystem.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1F26D1583F001C950E /* TaskSystem.cpp */; };
+		706EF1C726D166C5001C950E /* KramFileHelper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2126D1583F001C950E /* KramFileHelper.cpp */; };
+		706EF1C826D166C5001C950E /* KramImageInfo.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2526D1583F001C950E /* KramImageInfo.cpp */; };
+		706EF1C926D166C5001C950E /* KramImage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2626D1583F001C950E /* KramImage.cpp */; };
+		706EF1CA26D166C5001C950E /* KramLog.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2826D1583F001C950E /* KramLog.cpp */; };
+		706EF1CB26D166C5001C950E /* KramSDFMipper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2B26D1583F001C950E /* KramSDFMipper.cpp */; };
+		706EF1CC26D166C5001C950E /* KramMmapHelper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2C26D1583F001C950E /* KramMmapHelper.cpp */; };
+		706EF1CD26D166C5001C950E /* float4a.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2F26D1583F001C950E /* float4a.cpp */; };
+		706EF1CE26D166C5001C950E /* Kram.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE3526D1583F001C950E /* Kram.cpp */; };
+		706EF1CF26D166C5001C950E /* squish.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE3D26D1583F001C950E /* squish.cpp */; };
+		706EF1D026D166C5001C950E /* colourset.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE3E26D1583F001C950E /* colourset.cpp */; };
+		706EF1D126D166C5001C950E /* clusterfit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE4226D1583F001C950E /* clusterfit.cpp */; };
+		706EF1D226D166C5001C950E /* rangefit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE4426D1583F001C950E /* rangefit.cpp */; };
+		706EF1D326D166C5001C950E /* alpha.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE4626D1583F001C950E /* alpha.cpp */; };
+		706EF1D426D166C5001C950E /* colourblock.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE4726D1583F001C950E /* colourblock.cpp */; };
+		706EF1D526D166C5001C950E /* colourfit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE4926D1583F001C950E /* colourfit.cpp */; };
+		706EF1D626D166C5001C950E /* maths.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE4D26D1583F001C950E /* maths.cpp */; };
+		706EF1D726D166C5001C950E /* singlecolourfit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE4E26D1583F001C950E /* singlecolourfit.cpp */; };
+		706EF1D826D166C5001C950E /* zstd.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE5026D1583F001C950E /* zstd.cpp */; };
+		706EF1D926D166C5001C950E /* zstddeclib.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE5126D1583F001C950E /* zstddeclib.cpp */; };
+		706EF1DA26D166C5001C950E /* lodepng.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE5626D1583F001C950E /* lodepng.cpp */; };
+		706EF1DB26D166C5001C950E /* tmpfileplus.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE5826D1583F001C950E /* tmpfileplus.cpp */; };
+		706EF1DD26D166C5001C950E /* libate.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF12A26D159F9001C950E /* libate.tbd */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXFileReference section */
+		706ECDDE26D1577A001C950E /* libkram.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libkram.a; sourceTree = BUILT_PRODUCTS_DIR; };
+		706EEDAA26D1583E001C950E /* EtcBlock4x4Encoding_RGB8.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = EtcBlock4x4Encoding_RGB8.cpp; sourceTree = "<group>"; };
+		706EEDAB26D1583E001C950E /* EtcErrorMetric.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = EtcErrorMetric.h; sourceTree = "<group>"; };
+		706EEDAC26D1583E001C950E /* EtcImage.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = EtcImage.cpp; sourceTree = "<group>"; };
+		706EEDAD26D1583E001C950E /* EtcColor.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = EtcColor.h; sourceTree = "<group>"; };
+		706EEDAE26D1583E001C950E /* EtcDifferentialTrys.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = EtcDifferentialTrys.h; sourceTree = "<group>"; };
+		706EEDAF26D1583E001C950E /* EtcDifferentialTrys.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = EtcDifferentialTrys.cpp; sourceTree = "<group>"; };
+		706EEDB026D1583E001C950E /* EtcBlock4x4Encoding_RGB8.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = EtcBlock4x4Encoding_RGB8.h; sourceTree = "<group>"; };
+		706EEDB126D1583E001C950E /* EtcMath.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = EtcMath.cpp; sourceTree = "<group>"; };
+		706EEDB226D1583E001C950E /* EtcBlock4x4Encoding_RGBA8.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = EtcBlock4x4Encoding_RGBA8.cpp; sourceTree = "<group>"; };
+		706EEDB326D1583E001C950E /* EtcBlock4x4Encoding_RG11.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = EtcBlock4x4Encoding_RG11.cpp; sourceTree = "<group>"; };
+		706EEDB426D1583E001C950E /* EtcConfig.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = EtcConfig.h; sourceTree = "<group>"; };
+		706EEDB526D1583E001C950E /* EtcBlock4x4Encoding_R11.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = EtcBlock4x4Encoding_R11.h; sourceTree = "<group>"; };
+		706EEDB626D1583E001C950E /* EtcBlock4x4Encoding_RGB8A1.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = EtcBlock4x4Encoding_RGB8A1.cpp; sourceTree = "<group>"; };
+		706EEDB726D1583E001C950E /* EtcBlock4x4Encoding_RG11.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = EtcBlock4x4Encoding_RG11.h; sourceTree = "<group>"; };
+		706EEDB926D1583E001C950E /* EtcMath.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = EtcMath.h; sourceTree = "<group>"; };
+		706EEDBA26D1583E001C950E /* EtcIndividualTrys.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = EtcIndividualTrys.h; sourceTree = "<group>"; };
+		706EEDBB26D1583E001C950E /* EtcIndividualTrys.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = EtcIndividualTrys.cpp; sourceTree = "<group>"; };
+		706EEDBC26D1583E001C950E /* EtcBlock4x4Encoding_R11.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = EtcBlock4x4Encoding_R11.cpp; sourceTree = "<group>"; };
+		706EEDBD26D1583E001C950E /* EtcBlock4x4EncodingBits.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = EtcBlock4x4EncodingBits.h; sourceTree = "<group>"; };
+		706EEDBE26D1583E001C950E /* EtcBlock4x4Encoding_RGB8A1.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = EtcBlock4x4Encoding_RGB8A1.h; sourceTree = "<group>"; };
+		706EEDBF26D1583E001C950E /* EtcBlock4x4Encoding_ETC1.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = EtcBlock4x4Encoding_ETC1.cpp; sourceTree = "<group>"; };
+		706EEDC026D1583E001C950E /* EtcBlock4x4.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = EtcBlock4x4.h; sourceTree = "<group>"; };
+		706EEDC126D1583E001C950E /* Etc.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = Etc.h; sourceTree = "<group>"; };
+		706EEDC226D1583E001C950E /* EtcImage.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = EtcImage.h; sourceTree = "<group>"; };
+		706EEDC326D1583E001C950E /* EtcBlock4x4Encoding_ETC1.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = EtcBlock4x4Encoding_ETC1.h; sourceTree = "<group>"; };
+		706EEDC426D1583E001C950E /* EtcBlock4x4Encoding_RGBA8.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = EtcBlock4x4Encoding_RGBA8.h; sourceTree = "<group>"; };
+		706EEDC526D1583E001C950E /* EtcBlock4x4Encoding.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = EtcBlock4x4Encoding.cpp; sourceTree = "<group>"; };
+		706EEDC626D1583E001C950E /* EtcBlock4x4.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = EtcBlock4x4.cpp; sourceTree = "<group>"; };
+		706EEDC726D1583E001C950E /* EtcColorFloatRGBA.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = EtcColorFloatRGBA.h; sourceTree = "<group>"; };
+		706EEDC826D1583E001C950E /* EtcBlock4x4Encoding.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = EtcBlock4x4Encoding.h; sourceTree = "<group>"; };
+		706EEDCA26D1583E001C950E /* LICENSE */ = {isa = PBXFileReference; lastKnownFileType = text; path = LICENSE; sourceTree = "<group>"; };
+		706EEDCB26D1583E001C950E /* rgbcx.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = rgbcx.h; sourceTree = "<group>"; };
+		706EEDCC26D1583E001C950E /* bc7enc.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = bc7enc.h; sourceTree = "<group>"; };
+		706EEDCD26D1583E001C950E /* bc7decomp.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = bc7decomp.h; sourceTree = "<group>"; };
+		706EEDCE26D1583E001C950E /* bc7decomp.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = bc7decomp.cpp; sourceTree = "<group>"; };
+		706EEDCF26D1583E001C950E /* README.md */ = {isa = PBXFileReference; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = "<group>"; };
+		706EEDD026D1583E001C950E /* bc7enc.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = bc7enc.cpp; sourceTree = "<group>"; };
+		706EEDD126D1583E001C950E /* rgbcx_table4.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = rgbcx_table4.h; sourceTree = "<group>"; };
+		706EEDD326D1583E001C950E /* astcenc_pick_best_endpoint_format.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_pick_best_endpoint_format.cpp; sourceTree = "<group>"; };
+		706EEDD426D1583E001C950E /* astcenc_diagnostic_trace.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = astcenc_diagnostic_trace.h; sourceTree = "<group>"; };
+		706EEDD526D1583E001C950E /* astcenc_integer_sequence.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_integer_sequence.cpp; sourceTree = "<group>"; };
+		706EEDD626D1583E001C950E /* astcenc_vecmathlib.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = astcenc_vecmathlib.h; sourceTree = "<group>"; };
+		706EEDD726D1583E001C950E /* astcenc_compute_variance.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_compute_variance.cpp; sourceTree = "<group>"; };
+		706EEDD826D1583E001C950E /* astcenc_quantization.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_quantization.cpp; sourceTree = "<group>"; };
+		706EEDD926D1583E001C950E /* astcenc_color_unquantize.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_color_unquantize.cpp; sourceTree = "<group>"; };
+		706EEDDA26D1583E001C950E /* astcenc_mathlib_softfloat.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_mathlib_softfloat.cpp; sourceTree = "<group>"; };
+		706EEDDB26D1583E001C950E /* astcenc_weight_quant_xfer_tables.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_weight_quant_xfer_tables.cpp; sourceTree = "<group>"; };
+		706EEDDC26D1583E001C950E /* astcenc_vecmathlib_avx2_8.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = astcenc_vecmathlib_avx2_8.h; sourceTree = "<group>"; };
+		706EEDDD26D1583E001C950E /* astcenc_encoding_choice_error.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_encoding_choice_error.cpp; sourceTree = "<group>"; };
+		706EEDDE26D1583E001C950E /* astcenc_percentile_tables.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_percentile_tables.cpp; sourceTree = "<group>"; };
+		706EEDDF26D1583E001C950E /* astcenc_partition_tables.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_partition_tables.cpp; sourceTree = "<group>"; };
+		706EEDE026D1583E001C950E /* astcenc_decompress_symbolic.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_decompress_symbolic.cpp; sourceTree = "<group>"; };
+		706EEDE126D1583E001C950E /* astcenc_color_quantize.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_color_quantize.cpp; sourceTree = "<group>"; };
+		706EEDE226D1583E001C950E /* astcenc.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = astcenc.h; sourceTree = "<group>"; };
+		706EEDE326D1583E001C950E /* astcenc_platform_isa_detection.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_platform_isa_detection.cpp; sourceTree = "<group>"; };
+		706EEDE426D1583E001C950E /* astcenc_image.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_image.cpp; sourceTree = "<group>"; };
+		706EEDE526D1583E001C950E /* astcenc_internal.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = astcenc_internal.h; sourceTree = "<group>"; };
+		706EEDE626D1583E001C950E /* astcenc_kmeans_partitioning.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_kmeans_partitioning.cpp; sourceTree = "<group>"; };
+		706EEDE726D1583E001C950E /* astcenc_vecmathlib_neon_armv7_4.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = astcenc_vecmathlib_neon_armv7_4.h; sourceTree = "<group>"; };
+		706EEDE826D1583E001C950E /* astcenc_compress_symbolic.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_compress_symbolic.cpp; sourceTree = "<group>"; };
+		706EEDE926D1583E001C950E /* astcenc_ideal_endpoints_and_weights.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_ideal_endpoints_and_weights.cpp; sourceTree = "<group>"; };
+		706EEDEA26D1583E001C950E /* astcenc_vecmathlib_sse_4.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = astcenc_vecmathlib_sse_4.h; sourceTree = "<group>"; };
+		706EEDEB26D1583E001C950E /* astcenc_mathlib.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_mathlib.cpp; sourceTree = "<group>"; };
+		706EEDEC26D1583E001C950E /* astcenc_find_best_partitioning.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_find_best_partitioning.cpp; sourceTree = "<group>"; };
+		706EEDED26D1583E001C950E /* astcenc_vecmathlib_neon_4.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = astcenc_vecmathlib_neon_4.h; sourceTree = "<group>"; };
+		706EEDEE26D1583E001C950E /* astcenc_diagnostic_trace.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_diagnostic_trace.cpp; sourceTree = "<group>"; };
+		706EEDEF26D1583E001C950E /* astcenc_symbolic_physical.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_symbolic_physical.cpp; sourceTree = "<group>"; };
+		706EEDF026D1583E001C950E /* astcenc_weight_align.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_weight_align.cpp; sourceTree = "<group>"; };
+		706EEDF126D1583E001C950E /* astcenc_vecmathlib_none_4.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = astcenc_vecmathlib_none_4.h; sourceTree = "<group>"; };
+		706EEDF226D1583E001C950E /* astcenc_block_sizes2.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_block_sizes2.cpp; sourceTree = "<group>"; };
+		706EEDF326D1583E001C950E /* astcenc_entry.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_entry.cpp; sourceTree = "<group>"; };
+		706EEDF426D1583E001C950E /* astcenc_vecmathlib_common_4.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = astcenc_vecmathlib_common_4.h; sourceTree = "<group>"; };
+		706EEDF526D1583E001C950E /* astcenc_averages_and_directions.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_averages_and_directions.cpp; sourceTree = "<group>"; };
+		706EEDF626D1583E001C950E /* astcenc_mathlib.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = astcenc_mathlib.h; sourceTree = "<group>"; };
+		706EEDF926D1583E001C950E /* ateencoder.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = ateencoder.mm; sourceTree = "<group>"; };
+		706EEDFA26D1583E001C950E /* ateencoder.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = ateencoder.h; sourceTree = "<group>"; };
+		706EEDFC26D1583E001C950E /* basisu_transcoder.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = basisu_transcoder.h; sourceTree = "<group>"; };
+		706EEDFD26D1583E001C950E /* basisu_containers.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = basisu_containers.h; sourceTree = "<group>"; };
+		706EEDFE26D1583E001C950E /* basisu_transcoder_tables_bc7_m5_color.inc */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.pascal; path = basisu_transcoder_tables_bc7_m5_color.inc; sourceTree = "<group>"; };
+		706EEDFF26D1583E001C950E /* basisu_containers_impl.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = basisu_containers_impl.h; sourceTree = "<group>"; };
+		706EEE0026D1583E001C950E /* basisu_transcoder_tables_astc_0_255.inc */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.pascal; path = basisu_transcoder_tables_astc_0_255.inc; sourceTree = "<group>"; };
+		706EEE0126D1583F001C950E /* basisu_transcoder_tables_pvrtc2_alpha_33.inc */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.pascal; path = basisu_transcoder_tables_pvrtc2_alpha_33.inc; sourceTree = "<group>"; };
+		706EEE0226D1583F001C950E /* basisu_transcoder_internal.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = basisu_transcoder_internal.h; sourceTree = "<group>"; };
+		706EEE0326D1583F001C950E /* basisu_global_selector_cb.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = basisu_global_selector_cb.h; sourceTree = "<group>"; };
+		706EEE0426D1583F001C950E /* basisu_transcoder.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = basisu_transcoder.cpp; sourceTree = "<group>"; };
+		706EEE0526D1583F001C950E /* basisu_transcoder_uastc.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = basisu_transcoder_uastc.h; sourceTree = "<group>"; };
+		706EEE0626D1583F001C950E /* basisu_global_selector_palette.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = basisu_global_selector_palette.h; sourceTree = "<group>"; };
+		706EEE0726D1583F001C950E /* basisu_transcoder_tables_astc.inc */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.pascal; path = basisu_transcoder_tables_astc.inc; sourceTree = "<group>"; };
+		706EEE0826D1583F001C950E /* basisu_transcoder_tables_pvrtc2_45.inc */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.pascal; path = basisu_transcoder_tables_pvrtc2_45.inc; sourceTree = "<group>"; };
+		706EEE0926D1583F001C950E /* basisu_transcoder_tables_atc_56.inc */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.pascal; path = basisu_transcoder_tables_atc_56.inc; sourceTree = "<group>"; };
+		706EEE0A26D1583F001C950E /* basisu_transcoder_tables_bc7_m5_alpha.inc */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.pascal; path = basisu_transcoder_tables_bc7_m5_alpha.inc; sourceTree = "<group>"; };
+		706EEE0B26D1583F001C950E /* basisu_transcoder_tables_atc_55.inc */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.pascal; path = basisu_transcoder_tables_atc_55.inc; sourceTree = "<group>"; };
+		706EEE0C26D1583F001C950E /* basisu.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = basisu.h; sourceTree = "<group>"; };
+		706EEE0D26D1583F001C950E /* basisu_transcoder_tables_dxt1_6.inc */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.pascal; path = basisu_transcoder_tables_dxt1_6.inc; sourceTree = "<group>"; };
+		706EEE0E26D1583F001C950E /* basisu_file_headers.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = basisu_file_headers.h; sourceTree = "<group>"; };
+		706EEE0F26D1583F001C950E /* basisu_transcoder_tables_dxt1_5.inc */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.pascal; path = basisu_transcoder_tables_dxt1_5.inc; sourceTree = "<group>"; };
+		706EEE1126D1583F001C950E /* miniz.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = miniz.cpp; sourceTree = "<group>"; };
+		706EEE1226D1583F001C950E /* miniz.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = miniz.h; sourceTree = "<group>"; };
+		706EEE1426D1583F001C950E /* hedistance.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = hedistance.cpp; sourceTree = "<group>"; };
+		706EEE1526D1583F001C950E /* hedistance.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = hedistance.h; sourceTree = "<group>"; };
+		706EEE1726D1583F001C950E /* stb_rect_pack.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = stb_rect_pack.h; sourceTree = "<group>"; };
+		706EEE1926D1583F001C950E /* KramZipHelper.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KramZipHelper.h; sourceTree = "<group>"; };
+		706EEE1A26D1583F001C950E /* KramTimer.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = KramTimer.cpp; sourceTree = "<group>"; };
+		706EEE1B26D1583F001C950E /* KTXImage.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = KTXImage.cpp; sourceTree = "<group>"; };
+		706EEE1C26D1583F001C950E /* KramMipper.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = KramMipper.cpp; sourceTree = "<group>"; };
+		706EEE1D26D1583F001C950E /* _clang-format */ = {isa = PBXFileReference; lastKnownFileType = text; path = "_clang-format"; sourceTree = "<group>"; };
+		706EEE1E26D1583F001C950E /* KramZipHelper.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = KramZipHelper.cpp; sourceTree = "<group>"; };
+		706EEE1F26D1583F001C950E /* TaskSystem.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = TaskSystem.cpp; sourceTree = "<group>"; };
+		706EEE2026D1583F001C950E /* KramSDFMipper.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KramSDFMipper.h; sourceTree = "<group>"; };
+		706EEE2126D1583F001C950E /* KramFileHelper.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = KramFileHelper.cpp; sourceTree = "<group>"; };
+		706EEE2226D1583F001C950E /* sse2neon.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = sse2neon.h; sourceTree = "<group>"; };
+		706EEE2326D1583F001C950E /* KramConfig.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KramConfig.h; sourceTree = "<group>"; };
+		706EEE2426D1583F001C950E /* KramLog.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KramLog.h; sourceTree = "<group>"; };
+		706EEE2526D1583F001C950E /* KramImageInfo.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = KramImageInfo.cpp; sourceTree = "<group>"; };
+		706EEE2626D1583F001C950E /* KramImage.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = KramImage.cpp; sourceTree = "<group>"; };
+		706EEE2726D1583F001C950E /* KramLib.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KramLib.h; sourceTree = "<group>"; };
+		706EEE2826D1583F001C950E /* KramLog.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = KramLog.cpp; sourceTree = "<group>"; };
+		706EEE2926D1583F001C950E /* KramVersion.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KramVersion.h; sourceTree = "<group>"; };
+		706EEE2A26D1583F001C950E /* KramImage.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KramImage.h; sourceTree = "<group>"; };
+		706EEE2B26D1583F001C950E /* KramSDFMipper.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = KramSDFMipper.cpp; sourceTree = "<group>"; };
+		706EEE2C26D1583F001C950E /* KramMmapHelper.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = KramMmapHelper.cpp; sourceTree = "<group>"; };
+		706EEE2D26D1583F001C950E /* win_mmap.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = win_mmap.h; sourceTree = "<group>"; };
+		706EEE2E26D1583F001C950E /* Kram.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = Kram.h; sourceTree = "<group>"; };
+		706EEE2F26D1583F001C950E /* float4a.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = float4a.cpp; sourceTree = "<group>"; };
+		706EEE3026D1583F001C950E /* KTXImage.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KTXImage.h; sourceTree = "<group>"; };
+		706EEE3126D1583F001C950E /* KramImageInfo.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KramImageInfo.h; sourceTree = "<group>"; };
+		706EEE3226D1583F001C950E /* KramTimer.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KramTimer.h; sourceTree = "<group>"; };
+		706EEE3326D1583F001C950E /* KramMmapHelper.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KramMmapHelper.h; sourceTree = "<group>"; };
+		706EEE3426D1583F001C950E /* float4a.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = float4a.h; sourceTree = "<group>"; };
+		706EEE3526D1583F001C950E /* Kram.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = Kram.cpp; sourceTree = "<group>"; };
+		706EEE3626D1583F001C950E /* KramFileHelper.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KramFileHelper.h; sourceTree = "<group>"; };
+		706EEE3726D1583F001C950E /* KramMipper.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KramMipper.h; sourceTree = "<group>"; };
+		706EEE3826D1583F001C950E /* TaskSystem.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = TaskSystem.h; sourceTree = "<group>"; };
+		706EEE3A26D1583F001C950E /* squish.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = squish.h; sourceTree = "<group>"; };
+		706EEE3B26D1583F001C950E /* clusterfit.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = clusterfit.h; sourceTree = "<group>"; };
+		706EEE3C26D1583F001C950E /* colourfit.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = colourfit.h; sourceTree = "<group>"; };
+		706EEE3D26D1583F001C950E /* squish.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = squish.cpp; sourceTree = "<group>"; };
+		706EEE3E26D1583F001C950E /* colourset.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = colourset.cpp; sourceTree = "<group>"; };
+		706EEE3F26D1583F001C950E /* alpha.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = alpha.h; sourceTree = "<group>"; };
+		706EEE4026D1583F001C950E /* singlecolourlookup.inl */ = {isa = PBXFileReference; lastKnownFileType = text; path = singlecolourlookup.inl; sourceTree = "<group>"; };
+		706EEE4126D1583F001C950E /* singlecolourfit.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = singlecolourfit.h; sourceTree = "<group>"; };
+		706EEE4226D1583F001C950E /* clusterfit.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = clusterfit.cpp; sourceTree = "<group>"; };
+		706EEE4426D1583F001C950E /* rangefit.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = rangefit.cpp; sourceTree = "<group>"; };
+		706EEE4526D1583F001C950E /* maths.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = maths.h; sourceTree = "<group>"; };
+		706EEE4626D1583F001C950E /* alpha.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = alpha.cpp; sourceTree = "<group>"; };
+		706EEE4726D1583F001C950E /* colourblock.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = colourblock.cpp; sourceTree = "<group>"; };
+		706EEE4826D1583F001C950E /* colourset.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = colourset.h; sourceTree = "<group>"; };
+		706EEE4926D1583F001C950E /* colourfit.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = colourfit.cpp; sourceTree = "<group>"; };
+		706EEE4A26D1583F001C950E /* colourblock.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = colourblock.h; sourceTree = "<group>"; };
+		706EEE4B26D1583F001C950E /* rangefit.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = rangefit.h; sourceTree = "<group>"; };
+		706EEE4D26D1583F001C950E /* maths.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = maths.cpp; sourceTree = "<group>"; };
+		706EEE4E26D1583F001C950E /* singlecolourfit.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = singlecolourfit.cpp; sourceTree = "<group>"; };
+		706EEE5026D1583F001C950E /* zstd.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = zstd.cpp; sourceTree = "<group>"; };
+		706EEE5126D1583F001C950E /* zstddeclib.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = zstddeclib.cpp; sourceTree = "<group>"; };
+		706EEE5226D1583F001C950E /* zstd.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = zstd.h; sourceTree = "<group>"; };
+		706EEE5426D1583F001C950E /* lodepng.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = lodepng.h; sourceTree = "<group>"; };
+		706EEE5526D1583F001C950E /* LICENSE */ = {isa = PBXFileReference; lastKnownFileType = text; path = LICENSE; sourceTree = "<group>"; };
+		706EEE5626D1583F001C950E /* lodepng.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = lodepng.cpp; sourceTree = "<group>"; };
+		706EEE5826D1583F001C950E /* tmpfileplus.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = tmpfileplus.cpp; sourceTree = "<group>"; };
+		706EEE5926D1583F001C950E /* tmpfileplus.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = tmpfileplus.h; sourceTree = "<group>"; };
+		706EF12A26D159F9001C950E /* libate.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = libate.tbd; path = usr/lib/libate.tbd; sourceTree = SDKROOT; };
+		706EF1E126D166C5001C950E /* libkram-ios.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = "libkram-ios.a"; sourceTree = BUILT_PRODUCTS_DIR; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+		706ECDDC26D1577A001C950E /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				706EF12B26D159F9001C950E /* libate.tbd in Frameworks */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+		706EF1DC26D166C5001C950E /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				706EF1DD26D166C5001C950E /* libate.tbd in Frameworks */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+		706ECDD526D1577A001C950E = {
+			isa = PBXGroup;
+			children = (
+				706EEDA826D1583E001C950E /* libkram */,
+				706ECDDF26D1577A001C950E /* Products */,
+				706EF12926D159F9001C950E /* Frameworks */,
+			);
+			sourceTree = "<group>";
+		};
+		706ECDDF26D1577A001C950E /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				706ECDDE26D1577A001C950E /* libkram.a */,
+				706EF1E126D166C5001C950E /* libkram-ios.a */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
+		706EEDA826D1583E001C950E /* libkram */ = {
+			isa = PBXGroup;
+			children = (
+				706EEDA926D1583E001C950E /* etc2comp */,
+				706EEDC926D1583E001C950E /* bc7enc */,
+				706EEDD226D1583E001C950E /* astc-encoder */,
+				706EEDF826D1583E001C950E /* ate */,
+				706EEDFB26D1583E001C950E /* transcoder */,
+				706EEE1026D1583F001C950E /* miniz */,
+				706EEE1326D1583F001C950E /* heman */,
+				706EEE1626D1583F001C950E /* stb */,
+				706EEE1826D1583F001C950E /* kram */,
+				706EEE3926D1583F001C950E /* squish */,
+				706EEE4F26D1583F001C950E /* zstd */,
+				706EEE5326D1583F001C950E /* lodepng */,
+				706EEE5726D1583F001C950E /* tmpfileplus */,
+			);
+			name = libkram;
+			path = ../libkram;
+			sourceTree = "<group>";
+		};
+		706EEDA926D1583E001C950E /* etc2comp */ = {
+			isa = PBXGroup;
+			children = (
+				706EEDAA26D1583E001C950E /* EtcBlock4x4Encoding_RGB8.cpp */,
+				706EEDAB26D1583E001C950E /* EtcErrorMetric.h */,
+				706EEDAC26D1583E001C950E /* EtcImage.cpp */,
+				706EEDAD26D1583E001C950E /* EtcColor.h */,
+				706EEDAE26D1583E001C950E /* EtcDifferentialTrys.h */,
+				706EEDAF26D1583E001C950E /* EtcDifferentialTrys.cpp */,
+				706EEDB026D1583E001C950E /* EtcBlock4x4Encoding_RGB8.h */,
+				706EEDB126D1583E001C950E /* EtcMath.cpp */,
+				706EEDB226D1583E001C950E /* EtcBlock4x4Encoding_RGBA8.cpp */,
+				706EEDB326D1583E001C950E /* EtcBlock4x4Encoding_RG11.cpp */,
+				706EEDB426D1583E001C950E /* EtcConfig.h */,
+				706EEDB526D1583E001C950E /* EtcBlock4x4Encoding_R11.h */,
+				706EEDB626D1583E001C950E /* EtcBlock4x4Encoding_RGB8A1.cpp */,
+				706EEDB726D1583E001C950E /* EtcBlock4x4Encoding_RG11.h */,
+				706EEDB926D1583E001C950E /* EtcMath.h */,
+				706EEDBA26D1583E001C950E /* EtcIndividualTrys.h */,
+				706EEDBB26D1583E001C950E /* EtcIndividualTrys.cpp */,
+				706EEDBC26D1583E001C950E /* EtcBlock4x4Encoding_R11.cpp */,
+				706EEDBD26D1583E001C950E /* EtcBlock4x4EncodingBits.h */,
+				706EEDBE26D1583E001C950E /* EtcBlock4x4Encoding_RGB8A1.h */,
+				706EEDBF26D1583E001C950E /* EtcBlock4x4Encoding_ETC1.cpp */,
+				706EEDC026D1583E001C950E /* EtcBlock4x4.h */,
+				706EEDC126D1583E001C950E /* Etc.h */,
+				706EEDC226D1583E001C950E /* EtcImage.h */,
+				706EEDC326D1583E001C950E /* EtcBlock4x4Encoding_ETC1.h */,
+				706EEDC426D1583E001C950E /* EtcBlock4x4Encoding_RGBA8.h */,
+				706EEDC526D1583E001C950E /* EtcBlock4x4Encoding.cpp */,
+				706EEDC626D1583E001C950E /* EtcBlock4x4.cpp */,
+				706EEDC726D1583E001C950E /* EtcColorFloatRGBA.h */,
+				706EEDC826D1583E001C950E /* EtcBlock4x4Encoding.h */,
+			);
+			path = etc2comp;
+			sourceTree = "<group>";
+		};
+		706EEDC926D1583E001C950E /* bc7enc */ = {
+			isa = PBXGroup;
+			children = (
+				706EEDCA26D1583E001C950E /* LICENSE */,
+				706EEDCB26D1583E001C950E /* rgbcx.h */,
+				706EEDCC26D1583E001C950E /* bc7enc.h */,
+				706EEDCD26D1583E001C950E /* bc7decomp.h */,
+				706EEDCE26D1583E001C950E /* bc7decomp.cpp */,
+				706EEDCF26D1583E001C950E /* README.md */,
+				706EEDD026D1583E001C950E /* bc7enc.cpp */,
+				706EEDD126D1583E001C950E /* rgbcx_table4.h */,
+			);
+			path = bc7enc;
+			sourceTree = "<group>";
+		};
+		706EEDD226D1583E001C950E /* astc-encoder */ = {
+			isa = PBXGroup;
+			children = (
+				706EEDD326D1583E001C950E /* astcenc_pick_best_endpoint_format.cpp */,
+				706EEDD426D1583E001C950E /* astcenc_diagnostic_trace.h */,
+				706EEDD526D1583E001C950E /* astcenc_integer_sequence.cpp */,
+				706EEDD626D1583E001C950E /* astcenc_vecmathlib.h */,
+				706EEDD726D1583E001C950E /* astcenc_compute_variance.cpp */,
+				706EEDD826D1583E001C950E /* astcenc_quantization.cpp */,
+				706EEDD926D1583E001C950E /* astcenc_color_unquantize.cpp */,
+				706EEDDA26D1583E001C950E /* astcenc_mathlib_softfloat.cpp */,
+				706EEDDB26D1583E001C950E /* astcenc_weight_quant_xfer_tables.cpp */,
+				706EEDDC26D1583E001C950E /* astcenc_vecmathlib_avx2_8.h */,
+				706EEDDD26D1583E001C950E /* astcenc_encoding_choice_error.cpp */,
+				706EEDDE26D1583E001C950E /* astcenc_percentile_tables.cpp */,
+				706EEDDF26D1583E001C950E /* astcenc_partition_tables.cpp */,
+				706EEDE026D1583E001C950E /* astcenc_decompress_symbolic.cpp */,
+				706EEDE126D1583E001C950E /* astcenc_color_quantize.cpp */,
+				706EEDE226D1583E001C950E /* astcenc.h */,
+				706EEDE326D1583E001C950E /* astcenc_platform_isa_detection.cpp */,
+				706EEDE426D1583E001C950E /* astcenc_image.cpp */,
+				706EEDE526D1583E001C950E /* astcenc_internal.h */,
+				706EEDE626D1583E001C950E /* astcenc_kmeans_partitioning.cpp */,
+				706EEDE726D1583E001C950E /* astcenc_vecmathlib_neon_armv7_4.h */,
+				706EEDE826D1583E001C950E /* astcenc_compress_symbolic.cpp */,
+				706EEDE926D1583E001C950E /* astcenc_ideal_endpoints_and_weights.cpp */,
+				706EEDEA26D1583E001C950E /* astcenc_vecmathlib_sse_4.h */,
+				706EEDEB26D1583E001C950E /* astcenc_mathlib.cpp */,
+				706EEDEC26D1583E001C950E /* astcenc_find_best_partitioning.cpp */,
+				706EEDED26D1583E001C950E /* astcenc_vecmathlib_neon_4.h */,
+				706EEDEE26D1583E001C950E /* astcenc_diagnostic_trace.cpp */,
+				706EEDEF26D1583E001C950E /* astcenc_symbolic_physical.cpp */,
+				706EEDF026D1583E001C950E /* astcenc_weight_align.cpp */,
+				706EEDF126D1583E001C950E /* astcenc_vecmathlib_none_4.h */,
+				706EEDF226D1583E001C950E /* astcenc_block_sizes2.cpp */,
+				706EEDF326D1583E001C950E /* astcenc_entry.cpp */,
+				706EEDF426D1583E001C950E /* astcenc_vecmathlib_common_4.h */,
+				706EEDF526D1583E001C950E /* astcenc_averages_and_directions.cpp */,
+				706EEDF626D1583E001C950E /* astcenc_mathlib.h */,
+			);
+			path = "astc-encoder";
+			sourceTree = "<group>";
+		};
+		706EEDF826D1583E001C950E /* ate */ = {
+			isa = PBXGroup;
+			children = (
+				706EEDF926D1583E001C950E /* ateencoder.mm */,
+				706EEDFA26D1583E001C950E /* ateencoder.h */,
+			);
+			path = ate;
+			sourceTree = "<group>";
+		};
+		706EEDFB26D1583E001C950E /* transcoder */ = {
+			isa = PBXGroup;
+			children = (
+				706EEDFC26D1583E001C950E /* basisu_transcoder.h */,
+				706EEDFD26D1583E001C950E /* basisu_containers.h */,
+				706EEDFE26D1583E001C950E /* basisu_transcoder_tables_bc7_m5_color.inc */,
+				706EEDFF26D1583E001C950E /* basisu_containers_impl.h */,
+				706EEE0026D1583E001C950E /* basisu_transcoder_tables_astc_0_255.inc */,
+				706EEE0126D1583F001C950E /* basisu_transcoder_tables_pvrtc2_alpha_33.inc */,
+				706EEE0226D1583F001C950E /* basisu_transcoder_internal.h */,
+				706EEE0326D1583F001C950E /* basisu_global_selector_cb.h */,
+				706EEE0426D1583F001C950E /* basisu_transcoder.cpp */,
+				706EEE0526D1583F001C950E /* basisu_transcoder_uastc.h */,
+				706EEE0626D1583F001C950E /* basisu_global_selector_palette.h */,
+				706EEE0726D1583F001C950E /* basisu_transcoder_tables_astc.inc */,
+				706EEE0826D1583F001C950E /* basisu_transcoder_tables_pvrtc2_45.inc */,
+				706EEE0926D1583F001C950E /* basisu_transcoder_tables_atc_56.inc */,
+				706EEE0A26D1583F001C950E /* basisu_transcoder_tables_bc7_m5_alpha.inc */,
+				706EEE0B26D1583F001C950E /* basisu_transcoder_tables_atc_55.inc */,
+				706EEE0C26D1583F001C950E /* basisu.h */,
+				706EEE0D26D1583F001C950E /* basisu_transcoder_tables_dxt1_6.inc */,
+				706EEE0E26D1583F001C950E /* basisu_file_headers.h */,
+				706EEE0F26D1583F001C950E /* basisu_transcoder_tables_dxt1_5.inc */,
+			);
+			path = transcoder;
+			sourceTree = "<group>";
+		};
+		706EEE1026D1583F001C950E /* miniz */ = {
+			isa = PBXGroup;
+			children = (
+				706EEE1126D1583F001C950E /* miniz.cpp */,
+				706EEE1226D1583F001C950E /* miniz.h */,
+			);
+			path = miniz;
+			sourceTree = "<group>";
+		};
+		706EEE1326D1583F001C950E /* heman */ = {
+			isa = PBXGroup;
+			children = (
+				706EEE1426D1583F001C950E /* hedistance.cpp */,
+				706EEE1526D1583F001C950E /* hedistance.h */,
+			);
+			path = heman;
+			sourceTree = "<group>";
+		};
+		706EEE1626D1583F001C950E /* stb */ = {
+			isa = PBXGroup;
+			children = (
+				706EEE1726D1583F001C950E /* stb_rect_pack.h */,
+			);
+			path = stb;
+			sourceTree = "<group>";
+		};
+		706EEE1826D1583F001C950E /* kram */ = {
+			isa = PBXGroup;
+			children = (
+				706EEE1926D1583F001C950E /* KramZipHelper.h */,
+				706EEE1A26D1583F001C950E /* KramTimer.cpp */,
+				706EEE1B26D1583F001C950E /* KTXImage.cpp */,
+				706EEE1C26D1583F001C950E /* KramMipper.cpp */,
+				706EEE1D26D1583F001C950E /* _clang-format */,
+				706EEE1E26D1583F001C950E /* KramZipHelper.cpp */,
+				706EEE1F26D1583F001C950E /* TaskSystem.cpp */,
+				706EEE2026D1583F001C950E /* KramSDFMipper.h */,
+				706EEE2126D1583F001C950E /* KramFileHelper.cpp */,
+				706EEE2226D1583F001C950E /* sse2neon.h */,
+				706EEE2326D1583F001C950E /* KramConfig.h */,
+				706EEE2426D1583F001C950E /* KramLog.h */,
+				706EEE2526D1583F001C950E /* KramImageInfo.cpp */,
+				706EEE2626D1583F001C950E /* KramImage.cpp */,
+				706EEE2726D1583F001C950E /* KramLib.h */,
+				706EEE2826D1583F001C950E /* KramLog.cpp */,
+				706EEE2926D1583F001C950E /* KramVersion.h */,
+				706EEE2A26D1583F001C950E /* KramImage.h */,
+				706EEE2B26D1583F001C950E /* KramSDFMipper.cpp */,
+				706EEE2C26D1583F001C950E /* KramMmapHelper.cpp */,
+				706EEE2D26D1583F001C950E /* win_mmap.h */,
+				706EEE2E26D1583F001C950E /* Kram.h */,
+				706EEE2F26D1583F001C950E /* float4a.cpp */,
+				706EEE3026D1583F001C950E /* KTXImage.h */,
+				706EEE3126D1583F001C950E /* KramImageInfo.h */,
+				706EEE3226D1583F001C950E /* KramTimer.h */,
+				706EEE3326D1583F001C950E /* KramMmapHelper.h */,
+				706EEE3426D1583F001C950E /* float4a.h */,
+				706EEE3526D1583F001C950E /* Kram.cpp */,
+				706EEE3626D1583F001C950E /* KramFileHelper.h */,
+				706EEE3726D1583F001C950E /* KramMipper.h */,
+				706EEE3826D1583F001C950E /* TaskSystem.h */,
+			);
+			path = kram;
+			sourceTree = "<group>";
+		};
+		706EEE3926D1583F001C950E /* squish */ = {
+			isa = PBXGroup;
+			children = (
+				706EEE3A26D1583F001C950E /* squish.h */,
+				706EEE3B26D1583F001C950E /* clusterfit.h */,
+				706EEE3C26D1583F001C950E /* colourfit.h */,
+				706EEE3D26D1583F001C950E /* squish.cpp */,
+				706EEE3E26D1583F001C950E /* colourset.cpp */,
+				706EEE3F26D1583F001C950E /* alpha.h */,
+				706EEE4026D1583F001C950E /* singlecolourlookup.inl */,
+				706EEE4126D1583F001C950E /* singlecolourfit.h */,
+				706EEE4226D1583F001C950E /* clusterfit.cpp */,
+				706EEE4426D1583F001C950E /* rangefit.cpp */,
+				706EEE4526D1583F001C950E /* maths.h */,
+				706EEE4626D1583F001C950E /* alpha.cpp */,
+				706EEE4726D1583F001C950E /* colourblock.cpp */,
+				706EEE4826D1583F001C950E /* colourset.h */,
+				706EEE4926D1583F001C950E /* colourfit.cpp */,
+				706EEE4A26D1583F001C950E /* colourblock.h */,
+				706EEE4B26D1583F001C950E /* rangefit.h */,
+				706EEE4D26D1583F001C950E /* maths.cpp */,
+				706EEE4E26D1583F001C950E /* singlecolourfit.cpp */,
+			);
+			path = squish;
+			sourceTree = "<group>";
+		};
+		706EEE4F26D1583F001C950E /* zstd */ = {
+			isa = PBXGroup;
+			children = (
+				706EEE5026D1583F001C950E /* zstd.cpp */,
+				706EEE5126D1583F001C950E /* zstddeclib.cpp */,
+				706EEE5226D1583F001C950E /* zstd.h */,
+			);
+			path = zstd;
+			sourceTree = "<group>";
+		};
+		706EEE5326D1583F001C950E /* lodepng */ = {
+			isa = PBXGroup;
+			children = (
+				706EEE5426D1583F001C950E /* lodepng.h */,
+				706EEE5526D1583F001C950E /* LICENSE */,
+				706EEE5626D1583F001C950E /* lodepng.cpp */,
+			);
+			path = lodepng;
+			sourceTree = "<group>";
+		};
+		706EEE5726D1583F001C950E /* tmpfileplus */ = {
+			isa = PBXGroup;
+			children = (
+				706EEE5826D1583F001C950E /* tmpfileplus.cpp */,
+				706EEE5926D1583F001C950E /* tmpfileplus.h */,
+			);
+			path = tmpfileplus;
+			sourceTree = "<group>";
+		};
+		706EF12926D159F9001C950E /* Frameworks */ = {
+			isa = PBXGroup;
+			children = (
+				706EF12A26D159F9001C950E /* libate.tbd */,
+			);
+			name = Frameworks;
+			sourceTree = "<group>";
+		};
+/* End PBXGroup section */
+
+/* Begin PBXHeadersBuildPhase section */
+		706ECDDA26D1577A001C950E /* Headers */ = {
+			isa = PBXHeadersBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				706EEFD126D15984001C950E /* EtcErrorMetric.h in Headers */,
+				706EEFD226D15984001C950E /* EtcColor.h in Headers */,
+				706EEFD326D15984001C950E /* EtcDifferentialTrys.h in Headers */,
+				706EEFD426D15984001C950E /* EtcBlock4x4Encoding_RGB8.h in Headers */,
+				706EEFD526D15984001C950E /* EtcConfig.h in Headers */,
+				706EEFD626D15984001C950E /* EtcBlock4x4Encoding_R11.h in Headers */,
+				706EEFD726D15984001C950E /* EtcBlock4x4Encoding_RG11.h in Headers */,
+				706EEFD826D15984001C950E /* EtcMath.h in Headers */,
+				706EEFD926D15984001C950E /* EtcIndividualTrys.h in Headers */,
+				706EEFDA26D15984001C950E /* EtcBlock4x4EncodingBits.h in Headers */,
+				706EEFDB26D15984001C950E /* EtcBlock4x4Encoding_RGB8A1.h in Headers */,
+				706EEFDC26D15984001C950E /* EtcBlock4x4.h in Headers */,
+				706EEFDD26D15984001C950E /* Etc.h in Headers */,
+				706EEFDE26D15984001C950E /* EtcImage.h in Headers */,
+				706EEFDF26D15984001C950E /* EtcBlock4x4Encoding_ETC1.h in Headers */,
+				706EEFE026D15984001C950E /* EtcBlock4x4Encoding_RGBA8.h in Headers */,
+				706EEFE126D15984001C950E /* EtcColorFloatRGBA.h in Headers */,
+				706EEFE226D15984001C950E /* EtcBlock4x4Encoding.h in Headers */,
+				706EEFE326D15984001C950E /* rgbcx.h in Headers */,
+				706EEFE426D15984001C950E /* bc7enc.h in Headers */,
+				706EEFE526D15984001C950E /* bc7decomp.h in Headers */,
+				706EEFE626D15984001C950E /* rgbcx_table4.h in Headers */,
+				706EEFE726D15984001C950E /* astcenc_diagnostic_trace.h in Headers */,
+				706EEFE826D15984001C950E /* astcenc_vecmathlib.h in Headers */,
+				706EEFE926D15984001C950E /* astcenc_vecmathlib_avx2_8.h in Headers */,
+				706EEFEA26D15984001C950E /* astcenc.h in Headers */,
+				706EEFEB26D15984001C950E /* astcenc_internal.h in Headers */,
+				706EEFEC26D15984001C950E /* astcenc_vecmathlib_neon_armv7_4.h in Headers */,
+				706EEFED26D15984001C950E /* astcenc_vecmathlib_sse_4.h in Headers */,
+				706EEFEE26D15984001C950E /* astcenc_vecmathlib_neon_4.h in Headers */,
+				706EEFEF26D15984001C950E /* astcenc_vecmathlib_none_4.h in Headers */,
+				706EEFF026D15984001C950E /* astcenc_vecmathlib_common_4.h in Headers */,
+				706EEFF126D15984001C950E /* astcenc_mathlib.h in Headers */,
+				706EEFF226D15984001C950E /* ateencoder.h in Headers */,
+				706EEFF326D15984001C950E /* basisu_transcoder.h in Headers */,
+				706EEFF426D15984001C950E /* basisu_containers.h in Headers */,
+				706EEFF526D15985001C950E /* basisu_containers_impl.h in Headers */,
+				706EEFF626D15985001C950E /* basisu_transcoder_internal.h in Headers */,
+				706EEFF726D15985001C950E /* basisu_global_selector_cb.h in Headers */,
+				706EEFF826D15985001C950E /* basisu_transcoder_uastc.h in Headers */,
+				706EEFF926D15985001C950E /* basisu_global_selector_palette.h in Headers */,
+				706EEFFA26D15985001C950E /* basisu.h in Headers */,
+				706EEFFB26D15985001C950E /* basisu_file_headers.h in Headers */,
+				706EEFFC26D15985001C950E /* miniz.h in Headers */,
+				706EEFFD26D15985001C950E /* hedistance.h in Headers */,
+				706EEFFE26D15985001C950E /* stb_rect_pack.h in Headers */,
+				706EEFFF26D15985001C950E /* KramZipHelper.h in Headers */,
+				706EF00026D15985001C950E /* KramSDFMipper.h in Headers */,
+				706EF00126D15985001C950E /* sse2neon.h in Headers */,
+				706EF00226D15985001C950E /* KramConfig.h in Headers */,
+				706EF00326D15985001C950E /* KramLog.h in Headers */,
+				706EF00426D15985001C950E /* KramLib.h in Headers */,
+				706EF00526D15985001C950E /* KramVersion.h in Headers */,
+				706EF00626D15985001C950E /* KramImage.h in Headers */,
+				706EF00726D15985001C950E /* win_mmap.h in Headers */,
+				706EF00826D15985001C950E /* Kram.h in Headers */,
+				706EF00926D15985001C950E /* KTXImage.h in Headers */,
+				706EF00A26D15985001C950E /* KramImageInfo.h in Headers */,
+				706EF00B26D15985001C950E /* KramTimer.h in Headers */,
+				706EF00C26D15985001C950E /* KramMmapHelper.h in Headers */,
+				706EF00D26D15985001C950E /* float4a.h in Headers */,
+				706EF00E26D15985001C950E /* KramFileHelper.h in Headers */,
+				706EF00F26D15985001C950E /* KramMipper.h in Headers */,
+				706EF01026D15985001C950E /* TaskSystem.h in Headers */,
+				706EF01126D15985001C950E /* squish.h in Headers */,
+				706EF01226D15985001C950E /* clusterfit.h in Headers */,
+				706EF01326D15985001C950E /* colourfit.h in Headers */,
+				706EF01426D15985001C950E /* alpha.h in Headers */,
+				706EF01526D15985001C950E /* singlecolourfit.h in Headers */,
+				706EF01626D15985001C950E /* maths.h in Headers */,
+				706EF01726D15985001C950E /* colourset.h in Headers */,
+				706EF01826D15985001C950E /* colourblock.h in Headers */,
+				706EF01926D15985001C950E /* rangefit.h in Headers */,
+				706EF01A26D15985001C950E /* zstd.h in Headers */,
+				706EF01B26D15985001C950E /* lodepng.h in Headers */,
+				706EF01C26D15985001C950E /* tmpfileplus.h in Headers */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+		706EF14A26D166C5001C950E /* Headers */ = {
+			isa = PBXHeadersBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				706EF14B26D166C5001C950E /* EtcErrorMetric.h in Headers */,
+				706EF14C26D166C5001C950E /* EtcColor.h in Headers */,
+				706EF14D26D166C5001C950E /* EtcDifferentialTrys.h in Headers */,
+				706EF14E26D166C5001C950E /* EtcBlock4x4Encoding_RGB8.h in Headers */,
+				706EF14F26D166C5001C950E /* EtcConfig.h in Headers */,
+				706EF15026D166C5001C950E /* EtcBlock4x4Encoding_R11.h in Headers */,
+				706EF15126D166C5001C950E /* EtcBlock4x4Encoding_RG11.h in Headers */,
+				706EF15226D166C5001C950E /* EtcMath.h in Headers */,
+				706EF15326D166C5001C950E /* EtcIndividualTrys.h in Headers */,
+				706EF15426D166C5001C950E /* EtcBlock4x4EncodingBits.h in Headers */,
+				706EF15526D166C5001C950E /* EtcBlock4x4Encoding_RGB8A1.h in Headers */,
+				706EF15626D166C5001C950E /* EtcBlock4x4.h in Headers */,
+				706EF15726D166C5001C950E /* Etc.h in Headers */,
+				706EF15826D166C5001C950E /* EtcImage.h in Headers */,
+				706EF15926D166C5001C950E /* EtcBlock4x4Encoding_ETC1.h in Headers */,
+				706EF15A26D166C5001C950E /* EtcBlock4x4Encoding_RGBA8.h in Headers */,
+				706EF15B26D166C5001C950E /* EtcColorFloatRGBA.h in Headers */,
+				706EF15C26D166C5001C950E /* EtcBlock4x4Encoding.h in Headers */,
+				706EF15D26D166C5001C950E /* rgbcx.h in Headers */,
+				706EF15E26D166C5001C950E /* bc7enc.h in Headers */,
+				706EF15F26D166C5001C950E /* bc7decomp.h in Headers */,
+				706EF16026D166C5001C950E /* rgbcx_table4.h in Headers */,
+				706EF16126D166C5001C950E /* astcenc_diagnostic_trace.h in Headers */,
+				706EF16226D166C5001C950E /* astcenc_vecmathlib.h in Headers */,
+				706EF16326D166C5001C950E /* astcenc_vecmathlib_avx2_8.h in Headers */,
+				706EF16426D166C5001C950E /* astcenc.h in Headers */,
+				706EF16526D166C5001C950E /* astcenc_internal.h in Headers */,
+				706EF16626D166C5001C950E /* astcenc_vecmathlib_neon_armv7_4.h in Headers */,
+				706EF16726D166C5001C950E /* astcenc_vecmathlib_sse_4.h in Headers */,
+				706EF16826D166C5001C950E /* astcenc_vecmathlib_neon_4.h in Headers */,
+				706EF16926D166C5001C950E /* astcenc_vecmathlib_none_4.h in Headers */,
+				706EF16A26D166C5001C950E /* astcenc_vecmathlib_common_4.h in Headers */,
+				706EF16B26D166C5001C950E /* astcenc_mathlib.h in Headers */,
+				706EF16C26D166C5001C950E /* ateencoder.h in Headers */,
+				706EF16D26D166C5001C950E /* basisu_transcoder.h in Headers */,
+				706EF16E26D166C5001C950E /* basisu_containers.h in Headers */,
+				706EF16F26D166C5001C950E /* basisu_containers_impl.h in Headers */,
+				706EF17026D166C5001C950E /* basisu_transcoder_internal.h in Headers */,
+				706EF17126D166C5001C950E /* basisu_global_selector_cb.h in Headers */,
+				706EF17226D166C5001C950E /* basisu_transcoder_uastc.h in Headers */,
+				706EF17326D166C5001C950E /* basisu_global_selector_palette.h in Headers */,
+				706EF17426D166C5001C950E /* basisu.h in Headers */,
+				706EF17526D166C5001C950E /* basisu_file_headers.h in Headers */,
+				706EF17626D166C5001C950E /* miniz.h in Headers */,
+				706EF17726D166C5001C950E /* hedistance.h in Headers */,
+				706EF17826D166C5001C950E /* stb_rect_pack.h in Headers */,
+				706EF17926D166C5001C950E /* KramZipHelper.h in Headers */,
+				706EF17A26D166C5001C950E /* KramSDFMipper.h in Headers */,
+				706EF17B26D166C5001C950E /* sse2neon.h in Headers */,
+				706EF17C26D166C5001C950E /* KramConfig.h in Headers */,
+				706EF17D26D166C5001C950E /* KramLog.h in Headers */,
+				706EF17E26D166C5001C950E /* KramLib.h in Headers */,
+				706EF17F26D166C5001C950E /* KramVersion.h in Headers */,
+				706EF18026D166C5001C950E /* KramImage.h in Headers */,
+				706EF18126D166C5001C950E /* win_mmap.h in Headers */,
+				706EF18226D166C5001C950E /* Kram.h in Headers */,
+				706EF18326D166C5001C950E /* KTXImage.h in Headers */,
+				706EF18426D166C5001C950E /* KramImageInfo.h in Headers */,
+				706EF18526D166C5001C950E /* KramTimer.h in Headers */,
+				706EF18626D166C5001C950E /* KramMmapHelper.h in Headers */,
+				706EF18726D166C5001C950E /* float4a.h in Headers */,
+				706EF18826D166C5001C950E /* KramFileHelper.h in Headers */,
+				706EF18926D166C5001C950E /* KramMipper.h in Headers */,
+				706EF18A26D166C5001C950E /* TaskSystem.h in Headers */,
+				706EF18B26D166C5001C950E /* squish.h in Headers */,
+				706EF18C26D166C5001C950E /* clusterfit.h in Headers */,
+				706EF18D26D166C5001C950E /* colourfit.h in Headers */,
+				706EF18E26D166C5001C950E /* alpha.h in Headers */,
+				706EF18F26D166C5001C950E /* singlecolourfit.h in Headers */,
+				706EF19026D166C5001C950E /* maths.h in Headers */,
+				706EF19126D166C5001C950E /* colourset.h in Headers */,
+				706EF19226D166C5001C950E /* colourblock.h in Headers */,
+				706EF19326D166C5001C950E /* rangefit.h in Headers */,
+				706EF19426D166C5001C950E /* zstd.h in Headers */,
+				706EF19526D166C5001C950E /* lodepng.h in Headers */,
+				706EF19626D166C5001C950E /* tmpfileplus.h in Headers */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXHeadersBuildPhase section */
+
+/* Begin PBXNativeTarget section */
+		706ECDDD26D1577A001C950E /* kram */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 706ECDE926D1577A001C950E /* Build configuration list for PBXNativeTarget "kram" */;
+			buildPhases = (
+				706ECDDA26D1577A001C950E /* Headers */,
+				706ECDDB26D1577A001C950E /* Sources */,
+				706ECDDC26D1577A001C950E /* Frameworks */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = kram;
+			productName = kram;
+			productReference = 706ECDDE26D1577A001C950E /* libkram.a */;
+			productType = "com.apple.product-type.library.static";
+		};
+		706EF14926D166C5001C950E /* kram-ios */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 706EF1DE26D166C5001C950E /* Build configuration list for PBXNativeTarget "kram-ios" */;
+			buildPhases = (
+				706EF14A26D166C5001C950E /* Headers */,
+				706EF19726D166C5001C950E /* Sources */,
+				706EF1DC26D166C5001C950E /* Frameworks */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = "kram-ios";
+			productName = kram;
+			productReference = 706EF1E126D166C5001C950E /* libkram-ios.a */;
+			productType = "com.apple.product-type.library.static";
+		};
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+		706ECDD626D1577A001C950E /* Project object */ = {
+			isa = PBXProject;
+			attributes = {
+				LastUpgradeCheck = 1240;
+				TargetAttributes = {
+					706ECDDD26D1577A001C950E = {
+						CreatedOnToolsVersion = 12.4;
+					};
+				};
+			};
+			buildConfigurationList = 706ECDD926D1577A001C950E /* Build configuration list for PBXProject "kram" */;
+			compatibilityVersion = "Xcode 9.3";
+			developmentRegion = en;
+			hasScannedForEncodings = 0;
+			knownRegions = (
+				en,
+				Base,
+				English,
+				english,
+			);
+			mainGroup = 706ECDD526D1577A001C950E;
+			productRefGroup = 706ECDDF26D1577A001C950E /* Products */;
+			projectDirPath = "";
+			projectRoot = "";
+			targets = (
+				706ECDDD26D1577A001C950E /* kram */,
+				706EF14926D166C5001C950E /* kram-ios */,
+			);
+		};
+/* End PBXProject section */
+
+/* Begin PBXSourcesBuildPhase section */
+		706ECDDB26D1577A001C950E /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				706EEF7F26D1595D001C950E /* EtcBlock4x4Encoding_RGB8.cpp in Sources */,
+				706EEF8026D1595D001C950E /* EtcImage.cpp in Sources */,
+				706EEF8126D1595D001C950E /* EtcDifferentialTrys.cpp in Sources */,
+				706EEF8226D1595D001C950E /* EtcMath.cpp in Sources */,
+				706EEF8326D1595D001C950E /* EtcBlock4x4Encoding_RGBA8.cpp in Sources */,
+				706EEF8426D1595D001C950E /* EtcBlock4x4Encoding_RG11.cpp in Sources */,
+				706EEF8526D1595D001C950E /* EtcBlock4x4Encoding_RGB8A1.cpp in Sources */,
+				706EEF8726D1595D001C950E /* EtcIndividualTrys.cpp in Sources */,
+				706EEF8826D1595D001C950E /* EtcBlock4x4Encoding_R11.cpp in Sources */,
+				706EEF8926D1595D001C950E /* EtcBlock4x4Encoding_ETC1.cpp in Sources */,
+				706EEF8A26D1595D001C950E /* EtcBlock4x4Encoding.cpp in Sources */,
+				706EEF8B26D1595D001C950E /* EtcBlock4x4.cpp in Sources */,
+				706EEF8C26D1595D001C950E /* bc7decomp.cpp in Sources */,
+				706EEF8D26D1595D001C950E /* bc7enc.cpp in Sources */,
+				706EEF8E26D1595D001C950E /* astcenc_pick_best_endpoint_format.cpp in Sources */,
+				706EEF8F26D1595D001C950E /* astcenc_integer_sequence.cpp in Sources */,
+				706EEF9026D1595D001C950E /* astcenc_compute_variance.cpp in Sources */,
+				706EEF9126D1595D001C950E /* astcenc_quantization.cpp in Sources */,
+				706EEF9226D1595D001C950E /* astcenc_color_unquantize.cpp in Sources */,
+				706EEF9326D1595D001C950E /* astcenc_mathlib_softfloat.cpp in Sources */,
+				706EEF9426D1595D001C950E /* astcenc_weight_quant_xfer_tables.cpp in Sources */,
+				706EEF9526D1595D001C950E /* astcenc_encoding_choice_error.cpp in Sources */,
+				706EEF9626D1595D001C950E /* astcenc_percentile_tables.cpp in Sources */,
+				706EEF9726D1595D001C950E /* astcenc_partition_tables.cpp in Sources */,
+				706EEF9826D1595D001C950E /* astcenc_decompress_symbolic.cpp in Sources */,
+				706EEF9926D1595D001C950E /* astcenc_color_quantize.cpp in Sources */,
+				706EEF9A26D1595D001C950E /* astcenc_platform_isa_detection.cpp in Sources */,
+				706EEF9B26D1595D001C950E /* astcenc_image.cpp in Sources */,
+				706EEF9C26D1595D001C950E /* astcenc_kmeans_partitioning.cpp in Sources */,
+				706EEF9D26D1595D001C950E /* astcenc_compress_symbolic.cpp in Sources */,
+				706EEF9E26D1595D001C950E /* astcenc_ideal_endpoints_and_weights.cpp in Sources */,
+				706EEF9F26D1595D001C950E /* astcenc_mathlib.cpp in Sources */,
+				706EEFA026D1595D001C950E /* astcenc_find_best_partitioning.cpp in Sources */,
+				706EEFA126D1595D001C950E /* astcenc_diagnostic_trace.cpp in Sources */,
+				706EEFA226D1595D001C950E /* astcenc_symbolic_physical.cpp in Sources */,
+				706EEFA326D1595D001C950E /* astcenc_weight_align.cpp in Sources */,
+				706EEFA426D1595D001C950E /* astcenc_block_sizes2.cpp in Sources */,
+				706EEFA526D1595D001C950E /* astcenc_entry.cpp in Sources */,
+				706EEFA626D1595D001C950E /* astcenc_averages_and_directions.cpp in Sources */,
+				706EEFA726D1595D001C950E /* basisu_transcoder.cpp in Sources */,
+				706EEFA826D1595D001C950E /* miniz.cpp in Sources */,
+				706EEFA926D1595D001C950E /* hedistance.cpp in Sources */,
+				706EEFAA26D1595D001C950E /* KramTimer.cpp in Sources */,
+				706EEFAB26D1595D001C950E /* KTXImage.cpp in Sources */,
+				706EEFAC26D1595D001C950E /* KramMipper.cpp in Sources */,
+				706EEFAD26D1595D001C950E /* KramZipHelper.cpp in Sources */,
+				706EEFAE26D1595D001C950E /* TaskSystem.cpp in Sources */,
+				706EEFAF26D1595D001C950E /* KramFileHelper.cpp in Sources */,
+				706EEFB026D1595D001C950E /* KramImageInfo.cpp in Sources */,
+				706EEFB126D1595D001C950E /* KramImage.cpp in Sources */,
+				706EEFB226D1595D001C950E /* KramLog.cpp in Sources */,
+				706EEFB326D1595D001C950E /* KramSDFMipper.cpp in Sources */,
+				706EEFB426D1595D001C950E /* KramMmapHelper.cpp in Sources */,
+				706EEFB526D1595D001C950E /* float4a.cpp in Sources */,
+				706EEFB626D1595D001C950E /* Kram.cpp in Sources */,
+				706EEFB726D1595D001C950E /* squish.cpp in Sources */,
+				706EEFB826D1595D001C950E /* colourset.cpp in Sources */,
+				706EEFB926D1595D001C950E /* clusterfit.cpp in Sources */,
+				706EEFBB26D1595D001C950E /* rangefit.cpp in Sources */,
+				706EEFBC26D1595D001C950E /* alpha.cpp in Sources */,
+				706EEFBD26D1595D001C950E /* colourblock.cpp in Sources */,
+				706EEFBE26D1595E001C950E /* colourfit.cpp in Sources */,
+				706EEFC026D1595E001C950E /* maths.cpp in Sources */,
+				706EEFC126D1595E001C950E /* singlecolourfit.cpp in Sources */,
+				706EEFC226D1595E001C950E /* zstd.cpp in Sources */,
+				706EEFC326D1595E001C950E /* zstddeclib.cpp in Sources */,
+				706EEFC426D1595E001C950E /* lodepng.cpp in Sources */,
+				706EEFC526D1595E001C950E /* tmpfileplus.cpp in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+		706EF19726D166C5001C950E /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				706EF19826D166C5001C950E /* EtcBlock4x4Encoding_RGB8.cpp in Sources */,
+				706EF19926D166C5001C950E /* EtcImage.cpp in Sources */,
+				706EF19A26D166C5001C950E /* EtcDifferentialTrys.cpp in Sources */,
+				706EF19B26D166C5001C950E /* EtcMath.cpp in Sources */,
+				706EF19C26D166C5001C950E /* EtcBlock4x4Encoding_RGBA8.cpp in Sources */,
+				706EF19D26D166C5001C950E /* EtcBlock4x4Encoding_RG11.cpp in Sources */,
+				706EF19E26D166C5001C950E /* EtcBlock4x4Encoding_RGB8A1.cpp in Sources */,
+				706EF19F26D166C5001C950E /* EtcIndividualTrys.cpp in Sources */,
+				706EF1A026D166C5001C950E /* EtcBlock4x4Encoding_R11.cpp in Sources */,
+				706EF1A126D166C5001C950E /* EtcBlock4x4Encoding_ETC1.cpp in Sources */,
+				706EF1A226D166C5001C950E /* EtcBlock4x4Encoding.cpp in Sources */,
+				706EF1A326D166C5001C950E /* EtcBlock4x4.cpp in Sources */,
+				706EF1A426D166C5001C950E /* bc7decomp.cpp in Sources */,
+				706EF1A526D166C5001C950E /* bc7enc.cpp in Sources */,
+				706EF1A626D166C5001C950E /* astcenc_pick_best_endpoint_format.cpp in Sources */,
+				706EF1A726D166C5001C950E /* astcenc_integer_sequence.cpp in Sources */,
+				706EF1A826D166C5001C950E /* astcenc_compute_variance.cpp in Sources */,
+				706EF1A926D166C5001C950E /* astcenc_quantization.cpp in Sources */,
+				706EF1AA26D166C5001C950E /* astcenc_color_unquantize.cpp in Sources */,
+				706EF1AB26D166C5001C950E /* astcenc_mathlib_softfloat.cpp in Sources */,
+				706EF1AC26D166C5001C950E /* astcenc_weight_quant_xfer_tables.cpp in Sources */,
+				706EF1AD26D166C5001C950E /* astcenc_encoding_choice_error.cpp in Sources */,
+				706EF1AE26D166C5001C950E /* astcenc_percentile_tables.cpp in Sources */,
+				706EF1AF26D166C5001C950E /* astcenc_partition_tables.cpp in Sources */,
+				706EF1B026D166C5001C950E /* astcenc_decompress_symbolic.cpp in Sources */,
+				706EF1B126D166C5001C950E /* astcenc_color_quantize.cpp in Sources */,
+				706EF1B226D166C5001C950E /* astcenc_platform_isa_detection.cpp in Sources */,
+				706EF1B326D166C5001C950E /* astcenc_image.cpp in Sources */,
+				706EF1B426D166C5001C950E /* astcenc_kmeans_partitioning.cpp in Sources */,
+				706EF1B526D166C5001C950E /* astcenc_compress_symbolic.cpp in Sources */,
+				706EF1B626D166C5001C950E /* astcenc_ideal_endpoints_and_weights.cpp in Sources */,
+				706EF1B726D166C5001C950E /* astcenc_mathlib.cpp in Sources */,
+				706EF1B826D166C5001C950E /* astcenc_find_best_partitioning.cpp in Sources */,
+				706EF1B926D166C5001C950E /* astcenc_diagnostic_trace.cpp in Sources */,
+				706EF1BA26D166C5001C950E /* astcenc_symbolic_physical.cpp in Sources */,
+				706EF1BB26D166C5001C950E /* astcenc_weight_align.cpp in Sources */,
+				706EF1BC26D166C5001C950E /* astcenc_block_sizes2.cpp in Sources */,
+				706EF1BD26D166C5001C950E /* astcenc_entry.cpp in Sources */,
+				706EF1BE26D166C5001C950E /* astcenc_averages_and_directions.cpp in Sources */,
+				706EF1BF26D166C5001C950E /* basisu_transcoder.cpp in Sources */,
+				706EF1C026D166C5001C950E /* miniz.cpp in Sources */,
+				706EF1C126D166C5001C950E /* hedistance.cpp in Sources */,
+				706EF1C226D166C5001C950E /* KramTimer.cpp in Sources */,
+				706EF1C326D166C5001C950E /* KTXImage.cpp in Sources */,
+				706EF1C426D166C5001C950E /* KramMipper.cpp in Sources */,
+				706EF1C526D166C5001C950E /* KramZipHelper.cpp in Sources */,
+				706EF1C626D166C5001C950E /* TaskSystem.cpp in Sources */,
+				706EF1C726D166C5001C950E /* KramFileHelper.cpp in Sources */,
+				706EF1C826D166C5001C950E /* KramImageInfo.cpp in Sources */,
+				706EF1C926D166C5001C950E /* KramImage.cpp in Sources */,
+				706EF1CA26D166C5001C950E /* KramLog.cpp in Sources */,
+				706EF1CB26D166C5001C950E /* KramSDFMipper.cpp in Sources */,
+				706EF1CC26D166C5001C950E /* KramMmapHelper.cpp in Sources */,
+				706EF1CD26D166C5001C950E /* float4a.cpp in Sources */,
+				706EF1CE26D166C5001C950E /* Kram.cpp in Sources */,
+				706EF1CF26D166C5001C950E /* squish.cpp in Sources */,
+				706EF1D026D166C5001C950E /* colourset.cpp in Sources */,
+				706EF1D126D166C5001C950E /* clusterfit.cpp in Sources */,
+				706EF1D226D166C5001C950E /* rangefit.cpp in Sources */,
+				706EF1D326D166C5001C950E /* alpha.cpp in Sources */,
+				706EF1D426D166C5001C950E /* colourblock.cpp in Sources */,
+				706EF1D526D166C5001C950E /* colourfit.cpp in Sources */,
+				706EF1D626D166C5001C950E /* maths.cpp in Sources */,
+				706EF1D726D166C5001C950E /* singlecolourfit.cpp in Sources */,
+				706EF1D826D166C5001C950E /* zstd.cpp in Sources */,
+				706EF1D926D166C5001C950E /* zstddeclib.cpp in Sources */,
+				706EF1DA26D166C5001C950E /* lodepng.cpp in Sources */,
+				706EF1DB26D166C5001C950E /* tmpfileplus.cpp in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXSourcesBuildPhase section */
+
+/* Begin XCBuildConfiguration section */
+		706ECDE726D1577A001C950E /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_ANALYZER_NONNULL = YES;
+				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
+				CLANG_CXX_LIBRARY = "libc++";
+				CLANG_ENABLE_MODULES = YES;
+				CLANG_ENABLE_OBJC_ARC = YES;
+				CLANG_ENABLE_OBJC_WEAK = YES;
+				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_COMMA = YES;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INFINITE_RECURSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
+				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+				CLANG_WARN_STRICT_PROTOTYPES = YES;
+				CLANG_WARN_SUSPICIOUS_MOVE = YES;
+				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
+				CLANG_WARN_UNREACHABLE_CODE = YES;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				COPY_PHASE_STRIP = NO;
+				DEBUG_INFORMATION_FORMAT = dwarf;
+				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				ENABLE_TESTABILITY = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu11;
+				GCC_DYNAMIC_NO_PIC = NO;
+				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_OPTIMIZATION_LEVEL = 0;
+				GCC_PREPROCESSOR_DEFINITIONS = (
+					"DEBUG=1",
+					"$(inherited)",
+				);
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				MACOSX_DEPLOYMENT_TARGET = 10.15;
+				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
+				MTL_FAST_MATH = YES;
+				ONLY_ACTIVE_ARCH = YES;
+				SDKROOT = macosx;
+			};
+			name = Debug;
+		};
+		706ECDE826D1577A001C950E /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_ANALYZER_NONNULL = YES;
+				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
+				CLANG_CXX_LIBRARY = "libc++";
+				CLANG_ENABLE_MODULES = YES;
+				CLANG_ENABLE_OBJC_ARC = YES;
+				CLANG_ENABLE_OBJC_WEAK = YES;
+				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_COMMA = YES;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INFINITE_RECURSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
+				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+				CLANG_WARN_STRICT_PROTOTYPES = YES;
+				CLANG_WARN_SUSPICIOUS_MOVE = YES;
+				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
+				CLANG_WARN_UNREACHABLE_CODE = YES;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				COPY_PHASE_STRIP = NO;
+				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+				ENABLE_NS_ASSERTIONS = NO;
+				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu11;
+				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				MACOSX_DEPLOYMENT_TARGET = 10.15;
+				MTL_ENABLE_DEBUG_INFO = NO;
+				MTL_FAST_MATH = YES;
+				SDKROOT = macosx;
+			};
+			name = Release;
+		};
+		706ECDEA26D1577A001C950E /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				CLANG_WARN_ATOMIC_IMPLICIT_SEQ_CST = YES;
+				CLANG_WARN_COMMA = NO;
+				CLANG_WARN_CXX0X_EXTENSIONS = YES;
+				CLANG_WARN_DOCUMENTATION_COMMENTS = NO;
+				CLANG_WARN_OBJC_EXPLICIT_OWNERSHIP_TYPE = YES;
+				CLANG_WARN_OBJC_IMPLICIT_ATOMIC_PROPERTIES = YES;
+				CLANG_WARN_OBJC_INTERFACE_IVARS = YES;
+				CLANG_WARN_OBJC_MISSING_PROPERTY_SYNTHESIS = YES;
+				CLANG_WARN_OBJC_REPEATED_USE_OF_WEAK = YES;
+				CLANG_WARN__EXIT_TIME_DESTRUCTORS = NO;
+				CLANG_X86_VECTOR_INSTRUCTIONS = avx;
+				CODE_SIGN_STYLE = Automatic;
+				EXECUTABLE_PREFIX = lib;
+				GCC_ENABLE_CPP_EXCEPTIONS = NO;
+				GCC_ENABLE_CPP_RTTI = NO;
+				GCC_PRECOMPILE_PREFIX_HEADER = NO;
+				GCC_PREFIX_HEADER = "$(PROJECT_DIR)/../libkram/kram/KramConfig.h";
+				GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
+				GCC_WARN_HIDDEN_VIRTUAL_FUNCTIONS = YES;
+				GCC_WARN_NON_VIRTUAL_DESTRUCTOR = YES;
+				GCC_WARN_SHADOW = YES;
+				GCC_WARN_STRICT_SELECTOR_MATCH = YES;
+				HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram/**";
+				OTHER_CFLAGS = (
+					"-DCOMPILE_ASTCENC=1",
+					"-DCOMPILE_ATE=1",
+					"-DCOMPILE_ETCENC=1",
+					"-DCOMPILE_SQUISH=1",
+					"-DCOMPILE_BCENC=1",
+					"-include",
+					KramConfig.h,
+				);
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				SKIP_INSTALL = YES;
+				SYSTEM_HEADER_SEARCH_PATHS = "";
+			};
+			name = Debug;
+		};
+		706ECDEB26D1577A001C950E /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				CLANG_WARN_ATOMIC_IMPLICIT_SEQ_CST = YES;
+				CLANG_WARN_COMMA = NO;
+				CLANG_WARN_CXX0X_EXTENSIONS = YES;
+				CLANG_WARN_DOCUMENTATION_COMMENTS = NO;
+				CLANG_WARN_OBJC_EXPLICIT_OWNERSHIP_TYPE = YES;
+				CLANG_WARN_OBJC_IMPLICIT_ATOMIC_PROPERTIES = YES;
+				CLANG_WARN_OBJC_INTERFACE_IVARS = YES;
+				CLANG_WARN_OBJC_MISSING_PROPERTY_SYNTHESIS = YES;
+				CLANG_WARN_OBJC_REPEATED_USE_OF_WEAK = YES;
+				CLANG_WARN__EXIT_TIME_DESTRUCTORS = NO;
+				CLANG_X86_VECTOR_INSTRUCTIONS = avx;
+				CODE_SIGN_STYLE = Automatic;
+				DEAD_CODE_STRIPPING = NO;
+				EXECUTABLE_PREFIX = lib;
+				GCC_ENABLE_CPP_EXCEPTIONS = NO;
+				GCC_ENABLE_CPP_RTTI = NO;
+				GCC_PRECOMPILE_PREFIX_HEADER = NO;
+				GCC_PREFIX_HEADER = "$(PROJECT_DIR)/../libkram/kram/KramConfig.h";
+				GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
+				GCC_WARN_HIDDEN_VIRTUAL_FUNCTIONS = YES;
+				GCC_WARN_NON_VIRTUAL_DESTRUCTOR = YES;
+				GCC_WARN_SHADOW = YES;
+				GCC_WARN_STRICT_SELECTOR_MATCH = YES;
+				HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram/**";
+				OTHER_CFLAGS = (
+					"-DCOMPILE_ASTCENC=1",
+					"-DCOMPILE_ATE=1",
+					"-DCOMPILE_ETCENC=1",
+					"-DCOMPILE_SQUISH=1",
+					"-DCOMPILE_BCENC=1",
+					"-include",
+					KramConfig.h,
+				);
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				SKIP_INSTALL = YES;
+				SYSTEM_HEADER_SEARCH_PATHS = "";
+			};
+			name = Release;
+		};
+		706EF1DF26D166C5001C950E /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ARCHS = arm64;
+				CLANG_WARN_COMMA = NO;
+				CLANG_WARN_DOCUMENTATION_COMMENTS = NO;
+				CLANG_X86_VECTOR_INSTRUCTIONS = default;
+				CODE_SIGN_STYLE = Automatic;
+				EXECUTABLE_PREFIX = lib;
+				GCC_ENABLE_CPP_EXCEPTIONS = NO;
+				GCC_ENABLE_CPP_RTTI = NO;
+				GCC_PRECOMPILE_PREFIX_HEADER = NO;
+				GCC_PREFIX_HEADER = "$(PROJECT_DIR)/../libkram/kram/KramConfig.h";
+				GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
+				GCC_WARN_SHADOW = YES;
+				HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram/**";
+				IPHONEOS_DEPLOYMENT_TARGET = 14.1;
+				OTHER_CFLAGS = (
+					"-DCOMPILE_ASTCENC=1",
+					"-DCOMPILE_ATE=1",
+					"-DCOMPILE_ETCENC=1",
+					"-DCOMPILE_SQUISH=1",
+					"-DCOMPILE_BCENC=1",
+					"-include",
+					KramConfig.h,
+				);
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				SDKROOT = iphoneos;
+				SKIP_INSTALL = YES;
+				SUPPORTED_PLATFORMS = "iphonesimulator iphoneos";
+				SYSTEM_HEADER_SEARCH_PATHS = "";
+			};
+			name = Debug;
+		};
+		706EF1E026D166C5001C950E /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ARCHS = arm64;
+				CLANG_WARN_COMMA = NO;
+				CLANG_WARN_DOCUMENTATION_COMMENTS = NO;
+				CLANG_X86_VECTOR_INSTRUCTIONS = default;
+				CODE_SIGN_STYLE = Automatic;
+				DEAD_CODE_STRIPPING = NO;
+				EXECUTABLE_PREFIX = lib;
+				GCC_ENABLE_CPP_EXCEPTIONS = NO;
+				GCC_ENABLE_CPP_RTTI = NO;
+				GCC_PRECOMPILE_PREFIX_HEADER = NO;
+				GCC_PREFIX_HEADER = "$(PROJECT_DIR)/../libkram/kram/KramConfig.h";
+				GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
+				GCC_WARN_SHADOW = YES;
+				HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram/**";
+				IPHONEOS_DEPLOYMENT_TARGET = 14.1;
+				OTHER_CFLAGS = (
+					"-DCOMPILE_ASTCENC=1",
+					"-DCOMPILE_ATE=1",
+					"-DCOMPILE_ETCENC=1",
+					"-DCOMPILE_SQUISH=1",
+					"-DCOMPILE_BCENC=1",
+					"-include",
+					KramConfig.h,
+				);
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				SDKROOT = iphoneos;
+				SKIP_INSTALL = YES;
+				SUPPORTED_PLATFORMS = "iphonesimulator iphoneos";
+				SYSTEM_HEADER_SEARCH_PATHS = "";
+			};
+			name = Release;
+		};
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+		706ECDD926D1577A001C950E /* Build configuration list for PBXProject "kram" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				706ECDE726D1577A001C950E /* Debug */,
+				706ECDE826D1577A001C950E /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		706ECDE926D1577A001C950E /* Build configuration list for PBXNativeTarget "kram" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				706ECDEA26D1577A001C950E /* Debug */,
+				706ECDEB26D1577A001C950E /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		706EF1DE26D166C5001C950E /* Build configuration list for PBXNativeTarget "kram-ios" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				706EF1DF26D166C5001C950E /* Debug */,
+				706EF1E026D166C5001C950E /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+/* End XCConfigurationList section */
+	};
+	rootObject = 706ECDD626D1577A001C950E /* Project object */;
+}
diff --git a/kram/CMakeLists.txt b/kram/CMakeLists.txt
index b2afa050..ba3aef5c 100644
--- a/kram/CMakeLists.txt
+++ b/kram/CMakeLists.txt
@@ -1,5 +1,28 @@
 #cmake_minimum_required(VERSION 3.19.1 FATAL_ERROR)
 
+#-----------------------------------------------------
+
+set(BUILD_IOS FALSE)
+set(BUILD_MAC FALSE)
+set(BUILD_WIN FALSE)
+set(BUILD_UNIX FALSE)
+
+if (APPLE)
+    if (CMAKE_SYSTEM_NAME STREQUAL "iOS")
+        message("build for iOS")
+        set(BUILD_IOS TRUE)
+    else()
+        message("build for macOS")
+        set(BUILD_MAC TRUE)
+    endif()
+elseif (WIN32)
+    message("build for win x64")
+    set(BUILD_WIN TRUE)
+elseif (UNIX AND NOT APPLE)
+    message("build for unix")
+    set(BUILD_UNIX TRUE)
+endif()
+
 #-----------------------------------------------------
 # kram 
 
@@ -7,14 +30,10 @@
 set(myTargetApp kram)
 
 # not using version in cmake anymore, this is pulled in by KramVersion.h
-if (APPLE)
-project(${myTargetApp} LANGUAGES C CXX OBJCXX
-    #VERSION 0.9.0
-)
-else()
-project(${myTargetApp} LANGUAGES C CXX
-    #VERSION 0.9.0
-)
+if (BUILD_MAC)
+    project(${myTargetApp} LANGUAGES C CXX OBJCXX)
+elseif (BUILD_WIN)
+    project(${myTargetApp} LANGUAGES C CXX)
 endif()
 
 # **** this the executable target ****, for a CLI App
@@ -22,7 +41,7 @@ add_executable(${myTargetApp})
 
 #-----------------------------------------------------
     
-if (APPLE)
+if (BUILD_MAC)
     # ate is a macOS/iOS only library, and it varies in encode support by OS revision
     target_link_libraries(${myTargetApp}
         ate
@@ -65,7 +84,7 @@ if (APPLE)
 
     target_compile_options(${myTargetApp} PRIVATE -W -Wall)
 
-elseif (WIN32)
+elseif (BUILD_WIN)
     target_link_libraries(${myTargetApp} libkram)
 
     # When Win rebuilds library, it doesn't relink app to correct code when you
@@ -104,7 +123,7 @@ elseif (WIN32)
     
     endif()
     
-elseif (UNIXBUILD)
+elseif (BUILD_UNIX)
     target_link_libraries(${myTargetApp} libkram)
 
     # TODO: finish this
diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index deee594f..a2f1df37 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -2,6 +2,29 @@
 
 #-----------------------------------------------------
 
+set(BUILD_IOS FALSE)
+set(BUILD_MAC FALSE)
+set(BUILD_WIN FALSE)
+set(BUILD_UNIX FALSE)
+
+if (APPLE)
+    if (CMAKE_SYSTEM_NAME STREQUAL "iOS")
+        message("build for iOS")
+        set(BUILD_IOS TRUE)
+    else()
+        message("build for macOS")
+        set(BUILD_MAC TRUE)
+    endif()
+elseif (WIN32)
+    message("build for win x64")
+    set(BUILD_WIN TRUE)
+elseif (UNIX AND NOT APPLE)
+    message("build for unix")
+    set(BUILD_UNIX TRUE)
+endif()
+
+#-----------------------------------------------------
+
 # TODO: hook these up to added code below, and pass to as command line settings
 option(ATE "Compile ATE Encoder" ON)
 option(ETCENC "Compile ETC2Comp Encoder" ON)
@@ -17,7 +40,7 @@ set(COMPILE_ETCENC 0)
 set(COMPILE_SQUISH 0)
 set(COMPILE_ASTCENC 0)
 
-if (ATE AND APPLE)
+if (ATE AND (BUILD_MAC OR BUILD_IOS))
     set(COMPILE_ATE 1)
 endif()
 
@@ -54,28 +77,24 @@ set(myTargetLib libkram)
 # can also use OBJECT or SHARED, object cuts compile time
 add_library(${myTargetLib} STATIC)
 
-# nay only want to do on macOS, can be SHARED or STATIC
-#set_target_properties(${myTargetLib} PROPERTIES
-#    FRAMWORK TRUE
-#    FRAMEWORK_VERSION A
-#
-#    MACOSX_FRAMEWORK_IDENTIFIER com.ba.kram
-#    #MACOSX_FRAMEWORK_INFO_PLIST Info.plist
-#
-#    # "current version" in semantic format in Mach-O binary file
-#    VERSION 16.4.0
-#    # "compatibility version" in semantic format in Mach-O binary file
-#    SOVERSION 1.0.0
-#
-#    #PUBLIC_HEADER "${SOURCE_DIR}/kram.h"
-#    #XCODE_ATTRIBUTE_CODE_SIGN_IDENTITY "iPhone Developer"
-#)
-    
-# turn off pch
-set_target_properties(${myTargetLib} PROPERTIES
-    # rename lib to kram, libkram will then be the output
-    OUTPUT_NAME kram
+if (CMAKE_BUILD_TYPE STREQUAL "DEBUG")
+    set(DEBUG 1)
+endif()
 
+if (DEBUG)
+    set_target_properties(${myTargetLib} PROPERTIES
+        # rename lib to kramd, libkramd will then be the output or kramd.lib
+        OUTPUT_NAME kramd
+    )
+else()
+    set_target_properties(${myTargetLib} PROPERTIES
+        # rename lib to kram, libkram will then be the output
+        OUTPUT_NAME kram
+    )
+endif()
+
+set_target_properties(${myTargetLib} PROPERTIES
+    # turn off pch
     DISABLE_PRECOMPILE_HEADERS ON
 )
 
@@ -131,10 +150,10 @@ file(GLOB_RECURSE libSources CONFIGURE_DEPENDS
 )
 
 # no objc on win or linux
-if (WIN32)
+if (BUILD_WIN)
     list(FILTER libSources EXCLUDE REGEX ".*ateencoder.mm$")
     list(FILTER libSources EXCLUDE REGEX ".*ateencoder.h$")
-elseif (UNIXBUILD)
+elseif (BUILD_UNIX)
     list(FILTER libSources EXCLUDE REGEX ".*ateencoder.mm$")
     list(FILTER libSources EXCLUDE REGEX ".*ateencoder.h$")
 endif()
@@ -182,14 +201,14 @@ target_include_directories(${myTargetLib} PRIVATE
 # only add sources to the library
 target_sources(${myTargetLib} PRIVATE ${libSources})
 
-if (APPLE)
+if (BUILD_MAC)
     set_target_properties(${myTargetLib} PROPERTIES
         # Note: match this up with CXX version
         # c++11 min
         XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD "c++14"
         XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++"
 
-        # avx1
+        # avx1 (ignored by universal?)
         XCODE_ATTRIBUTE_CLANG_X86_VECTOR_INSTRUCTIONS "avx"
         
         # turn off exceptions/rtti
@@ -201,7 +220,28 @@ if (APPLE)
     )
 
     target_compile_options(${myTargetLib} PRIVATE -include KramConfig.h -W -Wall)
-elseif (WIN32)
+    
+elseif (BUILD_IOS)
+    set_target_properties(${myTargetLib} PROPERTIES
+        # Note: match this up with CXX version
+        # c++11 min
+        XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD "c++14"
+        XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++"
+
+        # avx1
+        #XCODE_ATTRIBUTE_CLANG_X86_VECTOR_INSTRUCTIONS "avx"
+        
+        # turn off exceptions/rtti
+        XCODE_ATTRIBUTE_GCC_ENABLE_CPP_EXCEPTIONS NO
+        XCODE_ATTRIBUTE_GCC_ENABLE_CPP_RTTI NO
+        
+        # can't believe this isn't on by default in CMAKE
+        XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC YES
+    )
+
+    target_compile_options(${myTargetLib} PRIVATE -include KramConfig.h -W -Wall)
+    
+elseif (BUILD_WIN)
     
     # TODO: switch to add_target_definitions
 
@@ -218,7 +258,7 @@ elseif (WIN32)
     # fix STL
     target_compile_definitions(${myTargetLib} PRIVATE "-D_D_HAS_EXCEPTIONS=0 -D_ITERATOR_DEBUG_LEVEL=0")
     
-elseif (UNIXBUILD)
+elseif (BUILD_UNIX)
     # TODO: finish this
     
     target_compile_options(${myTargetLib} PRIVATE -include KramConfig.h -W -Wall)
diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index 3cad6ae7..38a444e5 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -1445,7 +1445,7 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength, bool i
         mipLevels = levels;
         
         // copy the original ktx2 levels, this includes mip compression
-        bool isCompressed =
+        isCompressed =
             (mipLevels[0].lengthCompressed > 0) &&
             ((mipLevels[0].length * numChunks) != mipLevels[0].lengthCompressed);
         
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 45b6d30c..51acf7ea 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -360,9 +360,6 @@ bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray
         case LCT_PALETTE:  // ?
             hasColor = true;
             break;
-        
-            hasColor = false;
-            break;;
     }
 
     switch (state.info_png.color.colortype) {
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index aadf49c2..471f5847 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -160,18 +160,18 @@
 #if USE_EASTL
 
 // this probably breaks all STL debugging
-#include "EASTL/algorithm.h"  // for max
+#include <EASTL/algorithm.h"  // for max
 //#include "EASTL/atomic.h"
 #include <atomic>
-#include "EASTL/deque.h"
-#include "EASTL/functional.h"
-#include "EASTL/iterator.h" // for copy_if on Win
-#include "EASTL/shared_ptr.h" // includes thread/mutex
-#include "EASTL/sort.h"
-#include "EASTL/string.h"
-#include "EASTL/unique_ptr.h"
-#include "EASTL/unordered_map.h"
-#include "EASTL/vector.h"
+#include <EASTL/deque.h>
+#include <EASTL/functional.h>
+#include <EASTL/iterator.h> // for copy_if on Win
+#include <EASTL/shared_ptr.h> // includes thread/mutex
+#include <EASTL/sort.h>
+#include <EASTL/string.h>
+#include <EASTL/unique_ptr.h>
+#include <EASTL/unordered_map.h>
+#include <EASTL/vector.h>
 
 #define NAMESPACE_STL eastl
 
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index 72050a09..994b0de8 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -999,7 +999,7 @@ enum KHR_DF_CHANNEL {
     //KHR_DF_CHANNEL_ETC2_RED = 0,
     KHR_DF_CHANNEL_ETC2_GREEN = 1,
     KHR_DF_CHANNEL_ETC2_COLOR = 2, // RGB
-    KHR_DF_CHANNEL_ETC2_ALPHA = 16,
+    KHR_DF_CHANNEL_ETC2_ALPHA = 15,
     
     // ASTC
     //KHR_DF_CHANNEL_ASTC_DATA = 0,
diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp
index 8b782e23..bece6819 100644
--- a/libkram/kram/KramImageInfo.cpp
+++ b/libkram/kram/KramImageInfo.cpp
@@ -434,8 +434,8 @@ bool isSupportedFormat(TexEncoder encoder, MyMTLPixelFormat format)
         case kTexEncoderATE: {
             bool isBCSupported = false;
 #if COMPILE_ATE
-            ATEEncoder encoder;
-            isBCSupported = encoder.isBCSupported();
+            ATEEncoder ateEncoder;
+            isBCSupported = ateEncoder.isBCSupported();
 #endif
             table = isBCSupported ? kEncodingFormatsATEv2 : kEncodingFormatsATEv1;
             tableSize = isBCSupported ? countof(kEncodingFormatsATEv2) : countof(kEncodingFormatsATEv1);
diff --git a/libkram/kram/KramMipper.cpp b/libkram/kram/KramMipper.cpp
index b4b897b5..ee897260 100644
--- a/libkram/kram/KramMipper.cpp
+++ b/libkram/kram/KramMipper.cpp
@@ -503,10 +503,10 @@ void Mipper::mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) cons
                     }
 
                     // overwrite rgba8u version, since this is what is encoded
-                    Color c = Unormfloat4ToColor(cFloat);
+                    Color color = Unormfloat4ToColor(cFloat);
 
                     // can only skip this if cSrc = cDst
-                    cDstColor[dstIndex] = c;
+                    cDstColor[dstIndex] = color;
                 }
             }
             else if (srcFloat) {
@@ -525,15 +525,15 @@ void Mipper::mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) cons
                     // Overwrite the RGBA8u image too (this will go out to
                     // encoder) that means BC/ASTC are linearly fit to
                     // non-linear srgb colors - ick
-                    Color c = Unormfloat4ToColor(cFloat);
-                    cDstColor[dstIndex] = c;
+                    Color color = Unormfloat4ToColor(cFloat);
+                    cDstColor[dstIndex] = color;
                 }
             }
             else {
 
                 // can overwrite memory on linear image, some precision loss, but fast
-                Color c = Unormfloat4ToColor(cFloat);
-                cDstColor[dstIndex] = c;
+                Color color = Unormfloat4ToColor(cFloat);
+                cDstColor[dstIndex] = color;
             }
 
             dstIndex++;
diff --git a/libkram/tmpfileplus/tmpfileplus.cpp b/libkram/tmpfileplus/tmpfileplus.cpp
index 6ad221d8..da555877 100644
--- a/libkram/tmpfileplus/tmpfileplus.cpp
+++ b/libkram/tmpfileplus/tmpfileplus.cpp
@@ -189,7 +189,7 @@ static FILE *mktempfile_internal(const char *tmpdir, const char *pfx, const char
  */
 {
     FILE *fp;
-    int fd;
+    int fd = 0;
     char randpart[] = "1234567890";
     size_t lentempname;
     int i;
diff --git a/libkram/transcoder/basisu_transcoder.cpp b/libkram/transcoder/basisu_transcoder.cpp
index 29eb3c0d..3cebab1b 100644
--- a/libkram/transcoder/basisu_transcoder.cpp
+++ b/libkram/transcoder/basisu_transcoder.cpp
@@ -22,7 +22,7 @@
 //#if defined(__BIG_ENDIAN__) || defined(_BIG_ENDIAN) || defined(BIG_ENDIAN)
 //	#define BASISD_IS_BIG_ENDIAN (1)
 //#else
-	#define BASISD_IS_BIG_ENDIAN (0)
+constexpr bool BASISD_IS_BIG_ENDIAN = false;
 //#endif
 #endif
 
@@ -10778,7 +10778,7 @@ namespace basist
 			return false;
 		}
 
-		const bool transcode_alpha_data_to_opaque_formats = (decode_flags & cDecodeFlagsTranscodeAlphaDataToOpaqueFormats) != 0;
+		//const bool transcode_alpha_data_to_opaque_formats = (decode_flags & cDecodeFlagsTranscodeAlphaDataToOpaqueFormats) != 0;
 
 		if (decode_flags & cDecodeFlagsPVRTCDecodeToNextPow2)
 		{
diff --git a/libkram/zstd/zstd.cpp b/libkram/zstd/zstd.cpp
index 45a4c83e..fdd1ae8d 100644
--- a/libkram/zstd/zstd.cpp
+++ b/libkram/zstd/zstd.cpp
@@ -23297,11 +23297,11 @@ size_t ZSTD_RowFindBestMatch_generic (
 
     /* DMS/DDS variables that may be referenced laster */
     const ZSTD_matchState_t* const dms = ms->dictMatchState;
-    size_t ddsIdx;
-    U32 ddsExtraAttempts; /* cctx hash tables are limited in searches, but allow extra searches into DDS */
-    U32 dmsTag;
-    U32* dmsRow;
-    BYTE* dmsTagRow;
+    size_t ddsIdx = 0;
+    U32 ddsExtraAttempts = 0; /* cctx hash tables are limited in searches, but allow extra searches into DDS */
+    U32 dmsTag = 0;
+    U32* dmsRow = NULL;
+    BYTE* dmsTagRow = NULL;
 
     if (dictMode == ZSTD_dedicatedDictSearch) {
         const U32 ddsHashLog = dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG;

From 1974a86392689e581d90d1d944ffc94050ec74d0 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 21 Aug 2021 11:39:13 -0700
Subject: [PATCH 158/901] kram - explicit project for kramv and workspace

Want workspace to hold kramv, kram, and libkram projects
---
 build2/kram.xcodeproj/project.pbxproj         |   2 +
 .../kram.xcworkspace/contents.xcworkspacedata |  10 +
 build2/kramv.xcodeproj/project.pbxproj        | 386 ++++++++++++++++++
 3 files changed, 398 insertions(+)
 create mode 100644 build2/kram.xcworkspace/contents.xcworkspacedata
 create mode 100644 build2/kramv.xcodeproj/project.pbxproj

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index 1a13b76a..0f617fce 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -297,6 +297,7 @@
 		706EF1DA26D166C5001C950E /* lodepng.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE5626D1583F001C950E /* lodepng.cpp */; };
 		706EF1DB26D166C5001C950E /* tmpfileplus.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE5826D1583F001C950E /* tmpfileplus.cpp */; };
 		706EF1DD26D166C5001C950E /* libate.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF12A26D159F9001C950E /* libate.tbd */; };
+		706EF26426D17DCC001C950E /* ateencoder.mm in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDF926D1583E001C950E /* ateencoder.mm */; };
 /* End PBXBuildFile section */
 
 /* Begin PBXFileReference section */
@@ -1028,6 +1029,7 @@
 			isa = PBXSourcesBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
+				706EF26426D17DCC001C950E /* ateencoder.mm in Sources */,
 				706EEF7F26D1595D001C950E /* EtcBlock4x4Encoding_RGB8.cpp in Sources */,
 				706EEF8026D1595D001C950E /* EtcImage.cpp in Sources */,
 				706EEF8126D1595D001C950E /* EtcDifferentialTrys.cpp in Sources */,
diff --git a/build2/kram.xcworkspace/contents.xcworkspacedata b/build2/kram.xcworkspace/contents.xcworkspacedata
new file mode 100644
index 00000000..0ea830a1
--- /dev/null
+++ b/build2/kram.xcworkspace/contents.xcworkspacedata
@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Workspace
+   version = "1.0">
+   <FileRef
+      location = "group:kram.xcodeproj">
+   </FileRef>
+   <FileRef
+      location = "container:kramv.xcodeproj">
+   </FileRef>
+</Workspace>
diff --git a/build2/kramv.xcodeproj/project.pbxproj b/build2/kramv.xcodeproj/project.pbxproj
new file mode 100644
index 00000000..7f8b516e
--- /dev/null
+++ b/build2/kramv.xcodeproj/project.pbxproj
@@ -0,0 +1,386 @@
+// !$*UTF8*$!
+{
+	archiveVersion = 1;
+	classes = {
+	};
+	objectVersion = 50;
+	objects = {
+
+/* Begin PBXBuildFile section */
+		706EF23926D17A81001C950E /* KramViewerMain.mm in Sources */ = {isa = PBXBuildFile; fileRef = 706EF22C26D17A81001C950E /* KramViewerMain.mm */; };
+		706EF23B26D17A81001C950E /* KramViewerBase.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EF22F26D17A81001C950E /* KramViewerBase.cpp */; };
+		706EF23C26D17A81001C950E /* KramLoader.mm in Sources */ = {isa = PBXBuildFile; fileRef = 706EF23026D17A81001C950E /* KramLoader.mm */; };
+		706EF23D26D17A81001C950E /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 706EF23126D17A81001C950E /* Assets.xcassets */; };
+		706EF23E26D17A81001C950E /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 706EF23326D17A81001C950E /* Main.storyboard */; };
+		706EF23F26D17A81001C950E /* KramShaders.metal in Sources */ = {isa = PBXBuildFile; fileRef = 706EF23626D17A81001C950E /* KramShaders.metal */; };
+		706EF24026D17A81001C950E /* KramRenderer.mm in Sources */ = {isa = PBXBuildFile; fileRef = 706EF23726D17A81001C950E /* KramRenderer.mm */; };
+		706EF24926D17BC2001C950E /* libkram.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF24826D17BC2001C950E /* libkram.a */; };
+		706EF24D26D17C30001C950E /* ModelIO.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF24C26D17C30001C950E /* ModelIO.framework */; };
+		706EF24F26D17C43001C950E /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF24E26D17C43001C950E /* Foundation.framework */; };
+		706EF25226D17C6F001C950E /* MetalKit.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF25126D17C6F001C950E /* MetalKit.framework */; };
+		706EF25526D17C85001C950E /* Metal.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF25426D17C85001C950E /* Metal.framework */; };
+		706EF25726D17C9D001C950E /* AppKit.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF25626D17C9D001C950E /* AppKit.framework */; };
+		706EF26726D17DFA001C950E /* libate.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF25926D17CAA001C950E /* libate.tbd */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXFileReference section */
+		706EF20F26D17A26001C950E /* kramv.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = kramv.app; sourceTree = BUILT_PRODUCTS_DIR; };
+		706EF22A26D17A81001C950E /* KramViewerBase.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = KramViewerBase.h; sourceTree = "<group>"; };
+		706EF22B26D17A81001C950E /* kramv.entitlements */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.entitlements; path = kramv.entitlements; sourceTree = "<group>"; };
+		706EF22C26D17A81001C950E /* KramViewerMain.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = KramViewerMain.mm; sourceTree = "<group>"; };
+		706EF22E26D17A81001C950E /* KramShaders.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = KramShaders.h; sourceTree = "<group>"; };
+		706EF22F26D17A81001C950E /* KramViewerBase.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = KramViewerBase.cpp; sourceTree = "<group>"; };
+		706EF23026D17A81001C950E /* KramLoader.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = KramLoader.mm; sourceTree = "<group>"; };
+		706EF23126D17A81001C950E /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
+		706EF23226D17A81001C950E /* KramRenderer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = KramRenderer.h; sourceTree = "<group>"; };
+		706EF23426D17A81001C950E /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/Main.storyboard; sourceTree = "<group>"; };
+		706EF23526D17A81001C950E /* KramLoader.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = KramLoader.h; sourceTree = "<group>"; };
+		706EF23626D17A81001C950E /* KramShaders.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = KramShaders.metal; sourceTree = "<group>"; };
+		706EF23726D17A81001C950E /* KramRenderer.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = KramRenderer.mm; sourceTree = "<group>"; };
+		706EF23826D17A81001C950E /* Info.plist */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
+		706EF24826D17BC2001C950E /* libkram.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; path = libkram.a; sourceTree = BUILT_PRODUCTS_DIR; };
+		706EF24C26D17C30001C950E /* ModelIO.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = ModelIO.framework; path = System/Library/Frameworks/ModelIO.framework; sourceTree = SDKROOT; };
+		706EF24E26D17C43001C950E /* Foundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Foundation.framework; path = System/Library/Frameworks/Foundation.framework; sourceTree = SDKROOT; };
+		706EF25126D17C6F001C950E /* MetalKit.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = MetalKit.framework; path = System/Library/Frameworks/MetalKit.framework; sourceTree = SDKROOT; };
+		706EF25426D17C85001C950E /* Metal.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Metal.framework; path = System/Library/Frameworks/Metal.framework; sourceTree = SDKROOT; };
+		706EF25626D17C9D001C950E /* AppKit.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = AppKit.framework; path = System/Library/Frameworks/AppKit.framework; sourceTree = SDKROOT; };
+		706EF25926D17CAA001C950E /* libate.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = libate.tbd; path = usr/lib/libate.tbd; sourceTree = SDKROOT; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+		706EF20C26D17A26001C950E /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				706EF24D26D17C30001C950E /* ModelIO.framework in Frameworks */,
+				706EF25226D17C6F001C950E /* MetalKit.framework in Frameworks */,
+				706EF25526D17C85001C950E /* Metal.framework in Frameworks */,
+				706EF25726D17C9D001C950E /* AppKit.framework in Frameworks */,
+				706EF26726D17DFA001C950E /* libate.tbd in Frameworks */,
+				706EF24F26D17C43001C950E /* Foundation.framework in Frameworks */,
+				706EF24926D17BC2001C950E /* libkram.a in Frameworks */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+		706EF20626D17A26001C950E = {
+			isa = PBXGroup;
+			children = (
+				706EF22926D17A81001C950E /* kramv */,
+				706EF21026D17A26001C950E /* Products */,
+				706EF24726D17BC2001C950E /* Frameworks */,
+			);
+			sourceTree = "<group>";
+		};
+		706EF21026D17A26001C950E /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				706EF20F26D17A26001C950E /* kramv.app */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
+		706EF22926D17A81001C950E /* kramv */ = {
+			isa = PBXGroup;
+			children = (
+				706EF22A26D17A81001C950E /* KramViewerBase.h */,
+				706EF22C26D17A81001C950E /* KramViewerMain.mm */,
+				706EF22F26D17A81001C950E /* KramViewerBase.cpp */,
+				706EF23026D17A81001C950E /* KramLoader.mm */,
+				706EF23226D17A81001C950E /* KramRenderer.h */,
+				706EF23526D17A81001C950E /* KramLoader.h */,
+				706EF22E26D17A81001C950E /* KramShaders.h */,
+				706EF23626D17A81001C950E /* KramShaders.metal */,
+				706EF23726D17A81001C950E /* KramRenderer.mm */,
+				706EF22B26D17A81001C950E /* kramv.entitlements */,
+				706EF23126D17A81001C950E /* Assets.xcassets */,
+				706EF23326D17A81001C950E /* Main.storyboard */,
+				706EF23826D17A81001C950E /* Info.plist */,
+			);
+			name = kramv;
+			path = ../kramv;
+			sourceTree = "<group>";
+		};
+		706EF24726D17BC2001C950E /* Frameworks */ = {
+			isa = PBXGroup;
+			children = (
+				706EF25926D17CAA001C950E /* libate.tbd */,
+				706EF25626D17C9D001C950E /* AppKit.framework */,
+				706EF25426D17C85001C950E /* Metal.framework */,
+				706EF25126D17C6F001C950E /* MetalKit.framework */,
+				706EF24E26D17C43001C950E /* Foundation.framework */,
+				706EF24C26D17C30001C950E /* ModelIO.framework */,
+				706EF24826D17BC2001C950E /* libkram.a */,
+			);
+			name = Frameworks;
+			sourceTree = "<group>";
+		};
+/* End PBXGroup section */
+
+/* Begin PBXNativeTarget section */
+		706EF20E26D17A26001C950E /* kramv */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 706EF22326D17A2E001C950E /* Build configuration list for PBXNativeTarget "kramv" */;
+			buildPhases = (
+				706EF20B26D17A26001C950E /* Sources */,
+				706EF20C26D17A26001C950E /* Frameworks */,
+				706EF20D26D17A26001C950E /* Resources */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = kramv;
+			productName = kramv;
+			productReference = 706EF20F26D17A26001C950E /* kramv.app */;
+			productType = "com.apple.product-type.application";
+		};
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+		706EF20726D17A26001C950E /* Project object */ = {
+			isa = PBXProject;
+			attributes = {
+				LastUpgradeCheck = 1240;
+				TargetAttributes = {
+					706EF20E26D17A26001C950E = {
+						CreatedOnToolsVersion = 12.4;
+					};
+				};
+			};
+			buildConfigurationList = 706EF20A26D17A26001C950E /* Build configuration list for PBXProject "kramv" */;
+			compatibilityVersion = "Xcode 9.3";
+			developmentRegion = en;
+			hasScannedForEncodings = 0;
+			knownRegions = (
+				en,
+				Base,
+			);
+			mainGroup = 706EF20626D17A26001C950E;
+			productRefGroup = 706EF21026D17A26001C950E /* Products */;
+			projectDirPath = "";
+			projectRoot = "";
+			targets = (
+				706EF20E26D17A26001C950E /* kramv */,
+			);
+		};
+/* End PBXProject section */
+
+/* Begin PBXResourcesBuildPhase section */
+		706EF20D26D17A26001C950E /* Resources */ = {
+			isa = PBXResourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				706EF23E26D17A81001C950E /* Main.storyboard in Resources */,
+				706EF23D26D17A81001C950E /* Assets.xcassets in Resources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXResourcesBuildPhase section */
+
+/* Begin PBXSourcesBuildPhase section */
+		706EF20B26D17A26001C950E /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				706EF23C26D17A81001C950E /* KramLoader.mm in Sources */,
+				706EF24026D17A81001C950E /* KramRenderer.mm in Sources */,
+				706EF23926D17A81001C950E /* KramViewerMain.mm in Sources */,
+				706EF23B26D17A81001C950E /* KramViewerBase.cpp in Sources */,
+				706EF23F26D17A81001C950E /* KramShaders.metal in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXSourcesBuildPhase section */
+
+/* Begin PBXVariantGroup section */
+		706EF23326D17A81001C950E /* Main.storyboard */ = {
+			isa = PBXVariantGroup;
+			children = (
+				706EF23426D17A81001C950E /* Base */,
+			);
+			name = Main.storyboard;
+			sourceTree = "<group>";
+		};
+/* End PBXVariantGroup section */
+
+/* Begin XCBuildConfiguration section */
+		706EF22126D17A2E001C950E /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_ANALYZER_NONNULL = YES;
+				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
+				CLANG_CXX_LIBRARY = "libc++";
+				CLANG_ENABLE_MODULES = YES;
+				CLANG_ENABLE_OBJC_ARC = YES;
+				CLANG_ENABLE_OBJC_WEAK = YES;
+				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_COMMA = YES;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INFINITE_RECURSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
+				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+				CLANG_WARN_STRICT_PROTOTYPES = YES;
+				CLANG_WARN_SUSPICIOUS_MOVE = YES;
+				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
+				CLANG_WARN_UNREACHABLE_CODE = YES;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				COPY_PHASE_STRIP = NO;
+				DEBUG_INFORMATION_FORMAT = dwarf;
+				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				ENABLE_TESTABILITY = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu11;
+				GCC_DYNAMIC_NO_PIC = NO;
+				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_OPTIMIZATION_LEVEL = 0;
+				GCC_PREPROCESSOR_DEFINITIONS = (
+					"DEBUG=1",
+					"$(inherited)",
+				);
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				MACOSX_DEPLOYMENT_TARGET = 11.1;
+				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
+				MTL_FAST_MATH = YES;
+				ONLY_ACTIVE_ARCH = YES;
+				SDKROOT = macosx;
+			};
+			name = Debug;
+		};
+		706EF22226D17A2E001C950E /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_ANALYZER_NONNULL = YES;
+				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
+				CLANG_CXX_LIBRARY = "libc++";
+				CLANG_ENABLE_MODULES = YES;
+				CLANG_ENABLE_OBJC_ARC = YES;
+				CLANG_ENABLE_OBJC_WEAK = YES;
+				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_COMMA = YES;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INFINITE_RECURSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
+				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+				CLANG_WARN_STRICT_PROTOTYPES = YES;
+				CLANG_WARN_SUSPICIOUS_MOVE = YES;
+				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
+				CLANG_WARN_UNREACHABLE_CODE = YES;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				COPY_PHASE_STRIP = NO;
+				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+				ENABLE_NS_ASSERTIONS = NO;
+				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu11;
+				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				MACOSX_DEPLOYMENT_TARGET = 11.1;
+				MTL_ENABLE_DEBUG_INFO = NO;
+				MTL_FAST_MATH = YES;
+				SDKROOT = macosx;
+			};
+			name = Release;
+		};
+		706EF22426D17A2E001C950E /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
+				ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
+				CODE_SIGN_ENTITLEMENTS = "$(PROJECT_DIR)/../kramv/kramv.entitlements";
+				CODE_SIGN_STYLE = Automatic;
+				COMBINE_HIDPI_IMAGES = YES;
+				HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram";
+				INFOPLIST_FILE = "$(SRCROOT)/../kramv/Info.plist";
+				LD_RUNPATH_SEARCH_PATHS = (
+					"$(inherited)",
+					"@executable_path/../Frameworks",
+				);
+				MACOSX_DEPLOYMENT_TARGET = 10.15;
+				PRODUCT_BUNDLE_IDENTIFIER = com.ba.kramv;
+				PRODUCT_NAME = "$(TARGET_NAME)";
+			};
+			name = Debug;
+		};
+		706EF22526D17A2E001C950E /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
+				ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
+				CODE_SIGN_ENTITLEMENTS = "$(PROJECT_DIR)/../kramv/kramv.entitlements";
+				CODE_SIGN_STYLE = Automatic;
+				COMBINE_HIDPI_IMAGES = YES;
+				DEAD_CODE_STRIPPING = YES;
+				HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram";
+				INFOPLIST_FILE = "$(SRCROOT)/../kramv/Info.plist";
+				LD_RUNPATH_SEARCH_PATHS = (
+					"$(inherited)",
+					"@executable_path/../Frameworks",
+				);
+				MACOSX_DEPLOYMENT_TARGET = 10.15;
+				PRODUCT_BUNDLE_IDENTIFIER = com.ba.kramv;
+				PRODUCT_NAME = "$(TARGET_NAME)";
+			};
+			name = Release;
+		};
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+		706EF20A26D17A26001C950E /* Build configuration list for PBXProject "kramv" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				706EF22126D17A2E001C950E /* Debug */,
+				706EF22226D17A2E001C950E /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		706EF22326D17A2E001C950E /* Build configuration list for PBXNativeTarget "kramv" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				706EF22426D17A2E001C950E /* Debug */,
+				706EF22526D17A2E001C950E /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+/* End XCConfigurationList section */
+	};
+	rootObject = 706EF20726D17A26001C950E /* Project object */;
+}

From 16c125e9ec4340600dbf3b06a0a38607a05e79b8 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 21 Aug 2021 11:54:12 -0700
Subject: [PATCH 159/901] kram - add kramc (kram cli) to the workspace

This now builds.  Had to move KramMain to kramc folder just to make clearer that that main is the cli application.
---
 .../kram.xcworkspace/contents.xcworkspacedata |   3 +
 build2/kramc.xcodeproj/project.pbxproj        | 305 ++++++++++++++++++
 {kram => kramc}/CMakeLists.txt                |   0
 {kram => kramc}/KramMain.cpp                  |   0
 4 files changed, 308 insertions(+)
 create mode 100644 build2/kramc.xcodeproj/project.pbxproj
 rename {kram => kramc}/CMakeLists.txt (100%)
 rename {kram => kramc}/KramMain.cpp (100%)

diff --git a/build2/kram.xcworkspace/contents.xcworkspacedata b/build2/kram.xcworkspace/contents.xcworkspacedata
index 0ea830a1..64369490 100644
--- a/build2/kram.xcworkspace/contents.xcworkspacedata
+++ b/build2/kram.xcworkspace/contents.xcworkspacedata
@@ -4,6 +4,9 @@
    <FileRef
       location = "group:kram.xcodeproj">
    </FileRef>
+   <FileRef
+      location = "group:kramc.xcodeproj">
+   </FileRef>
    <FileRef
       location = "container:kramv.xcodeproj">
    </FileRef>
diff --git a/build2/kramc.xcodeproj/project.pbxproj b/build2/kramc.xcodeproj/project.pbxproj
new file mode 100644
index 00000000..644e6b8f
--- /dev/null
+++ b/build2/kramc.xcodeproj/project.pbxproj
@@ -0,0 +1,305 @@
+// !$*UTF8*$!
+{
+	archiveVersion = 1;
+	classes = {
+	};
+	objectVersion = 50;
+	objects = {
+
+/* Begin PBXBuildFile section */
+		706EF28326D18251001C950E /* libkram.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF28226D18251001C950E /* libkram.a */; };
+		706EF28526D1825D001C950E /* libate.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF28426D18257001C950E /* libate.tbd */; };
+		706EF28726D18290001C950E /* KramMain.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EF28026D18223001C950E /* KramMain.cpp */; };
+		706EF28B26D182CB001C950E /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF28A26D182CB001C950E /* Foundation.framework */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXCopyFilesBuildPhase section */
+		706EF27026D18082001C950E /* CopyFiles */ = {
+			isa = PBXCopyFilesBuildPhase;
+			buildActionMask = 2147483647;
+			dstPath = /usr/share/man/man1/;
+			dstSubfolderSpec = 0;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 1;
+		};
+/* End PBXCopyFilesBuildPhase section */
+
+/* Begin PBXFileReference section */
+		706EF27226D18082001C950E /* kramc */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = kramc; sourceTree = BUILT_PRODUCTS_DIR; };
+		706EF28026D18223001C950E /* KramMain.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = KramMain.cpp; sourceTree = "<group>"; };
+		706EF28226D18251001C950E /* libkram.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; path = libkram.a; sourceTree = BUILT_PRODUCTS_DIR; };
+		706EF28426D18257001C950E /* libate.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = libate.tbd; path = usr/lib/libate.tbd; sourceTree = SDKROOT; };
+		706EF28A26D182CB001C950E /* Foundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Foundation.framework; path = System/Library/Frameworks/Foundation.framework; sourceTree = SDKROOT; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+		706EF26F26D18082001C950E /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				706EF28326D18251001C950E /* libkram.a in Frameworks */,
+				706EF28B26D182CB001C950E /* Foundation.framework in Frameworks */,
+				706EF28526D1825D001C950E /* libate.tbd in Frameworks */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+		706EF26926D18082001C950E = {
+			isa = PBXGroup;
+			children = (
+				706EF27E26D18223001C950E /* kramc */,
+				706EF27326D18082001C950E /* Products */,
+				706EF28126D18251001C950E /* Frameworks */,
+			);
+			sourceTree = "<group>";
+		};
+		706EF27326D18082001C950E /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				706EF27226D18082001C950E /* kramc */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
+		706EF27E26D18223001C950E /* kramc */ = {
+			isa = PBXGroup;
+			children = (
+				706EF28026D18223001C950E /* KramMain.cpp */,
+			);
+			name = kramc;
+			path = ../kramc;
+			sourceTree = "<group>";
+		};
+		706EF28126D18251001C950E /* Frameworks */ = {
+			isa = PBXGroup;
+			children = (
+				706EF28A26D182CB001C950E /* Foundation.framework */,
+				706EF28426D18257001C950E /* libate.tbd */,
+				706EF28226D18251001C950E /* libkram.a */,
+			);
+			name = Frameworks;
+			sourceTree = "<group>";
+		};
+/* End PBXGroup section */
+
+/* Begin PBXNativeTarget section */
+		706EF27126D18082001C950E /* kramc */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 706EF27926D18082001C950E /* Build configuration list for PBXNativeTarget "kramc" */;
+			buildPhases = (
+				706EF26E26D18082001C950E /* Sources */,
+				706EF26F26D18082001C950E /* Frameworks */,
+				706EF27026D18082001C950E /* CopyFiles */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = kramc;
+			productName = kramc;
+			productReference = 706EF27226D18082001C950E /* kramc */;
+			productType = "com.apple.product-type.tool";
+		};
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+		706EF26A26D18082001C950E /* Project object */ = {
+			isa = PBXProject;
+			attributes = {
+				LastUpgradeCheck = 1240;
+				TargetAttributes = {
+					706EF27126D18082001C950E = {
+						CreatedOnToolsVersion = 12.4;
+					};
+				};
+			};
+			buildConfigurationList = 706EF26D26D18082001C950E /* Build configuration list for PBXProject "kramc" */;
+			compatibilityVersion = "Xcode 9.3";
+			developmentRegion = en;
+			hasScannedForEncodings = 0;
+			knownRegions = (
+				en,
+				Base,
+			);
+			mainGroup = 706EF26926D18082001C950E;
+			productRefGroup = 706EF27326D18082001C950E /* Products */;
+			projectDirPath = "";
+			projectRoot = "";
+			targets = (
+				706EF27126D18082001C950E /* kramc */,
+			);
+		};
+/* End PBXProject section */
+
+/* Begin PBXSourcesBuildPhase section */
+		706EF26E26D18082001C950E /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				706EF28726D18290001C950E /* KramMain.cpp in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXSourcesBuildPhase section */
+
+/* Begin XCBuildConfiguration section */
+		706EF27726D18082001C950E /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_ANALYZER_NONNULL = YES;
+				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
+				CLANG_CXX_LIBRARY = "libc++";
+				CLANG_ENABLE_MODULES = YES;
+				CLANG_ENABLE_OBJC_ARC = YES;
+				CLANG_ENABLE_OBJC_WEAK = YES;
+				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_COMMA = YES;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INFINITE_RECURSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
+				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+				CLANG_WARN_STRICT_PROTOTYPES = YES;
+				CLANG_WARN_SUSPICIOUS_MOVE = YES;
+				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
+				CLANG_WARN_UNREACHABLE_CODE = YES;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				COPY_PHASE_STRIP = NO;
+				DEBUG_INFORMATION_FORMAT = dwarf;
+				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				ENABLE_TESTABILITY = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu11;
+				GCC_DYNAMIC_NO_PIC = NO;
+				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_OPTIMIZATION_LEVEL = 0;
+				GCC_PREPROCESSOR_DEFINITIONS = (
+					"DEBUG=1",
+					"$(inherited)",
+				);
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				MACOSX_DEPLOYMENT_TARGET = 11.1;
+				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
+				MTL_FAST_MATH = YES;
+				ONLY_ACTIVE_ARCH = YES;
+				SDKROOT = macosx;
+			};
+			name = Debug;
+		};
+		706EF27826D18082001C950E /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_ANALYZER_NONNULL = YES;
+				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
+				CLANG_CXX_LIBRARY = "libc++";
+				CLANG_ENABLE_MODULES = YES;
+				CLANG_ENABLE_OBJC_ARC = YES;
+				CLANG_ENABLE_OBJC_WEAK = YES;
+				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_COMMA = YES;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INFINITE_RECURSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
+				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+				CLANG_WARN_STRICT_PROTOTYPES = YES;
+				CLANG_WARN_SUSPICIOUS_MOVE = YES;
+				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
+				CLANG_WARN_UNREACHABLE_CODE = YES;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				COPY_PHASE_STRIP = NO;
+				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+				ENABLE_NS_ASSERTIONS = NO;
+				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu11;
+				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				MACOSX_DEPLOYMENT_TARGET = 11.1;
+				MTL_ENABLE_DEBUG_INFO = NO;
+				MTL_FAST_MATH = YES;
+				SDKROOT = macosx;
+			};
+			name = Release;
+		};
+		706EF27A26D18082001C950E /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				CODE_SIGN_STYLE = Automatic;
+				HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram";
+				MACOSX_DEPLOYMENT_TARGET = 10.15;
+				PRODUCT_NAME = "$(TARGET_NAME)";
+			};
+			name = Debug;
+		};
+		706EF27B26D18082001C950E /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				CODE_SIGN_STYLE = Automatic;
+				DEAD_CODE_STRIPPING = YES;
+				HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram";
+				MACOSX_DEPLOYMENT_TARGET = 10.15;
+				PRODUCT_NAME = "$(TARGET_NAME)";
+			};
+			name = Release;
+		};
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+		706EF26D26D18082001C950E /* Build configuration list for PBXProject "kramc" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				706EF27726D18082001C950E /* Debug */,
+				706EF27826D18082001C950E /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		706EF27926D18082001C950E /* Build configuration list for PBXNativeTarget "kramc" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				706EF27A26D18082001C950E /* Debug */,
+				706EF27B26D18082001C950E /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+/* End XCConfigurationList section */
+	};
+	rootObject = 706EF26A26D18082001C950E /* Project object */;
+}
diff --git a/kram/CMakeLists.txt b/kramc/CMakeLists.txt
similarity index 100%
rename from kram/CMakeLists.txt
rename to kramc/CMakeLists.txt
diff --git a/kram/KramMain.cpp b/kramc/KramMain.cpp
similarity index 100%
rename from kram/KramMain.cpp
rename to kramc/KramMain.cpp

From 0a8a8d37120089af84b7a715fa0fd598c227208f Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 21 Aug 2021 11:58:44 -0700
Subject: [PATCH 160/901] kramv - only run sdf guess if signed single channel
 format, ignore unsigned (f.e. ASTC)

Was compressing data to BC4Unorm and that wasn't signed and wasn't SDF.
---
 kramv/KramRenderer.mm | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 71fe716a..ec10b3a7 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -871,12 +871,13 @@ - (void)updateImageSettings:(const string&)fullFilename image:(KTXImage&)image
     
     // could cycle between rrr1 and r001.
     int32_t numChannels = numChannelsOfFormat(originalFormat);
+    bool isSigned = isSignedFormat(originalFormat);
     
     // note that decoded textures are 3/4 channel even though they are normal/sdf originally, so test those first
     if (numChannels == 2 || endsWith(filenameShort, "-n") || endsWith(filenameShort, "_normal")) {
         isNormal = true;
     }
-    else if (numChannels == 1 || endsWith(filenameShort, "-sdf")) {
+    else if ((numChannels == 1 && isSigned) || endsWith(filenameShort, "-sdf")) {
         isSDF = true;
     }
     else if (numChannels == 3 || numChannels == 4 || endsWith(filenameShort, "-a") || endsWith(filenameShort, "_basecolor")) {

From 2f01669d87f68d04f6748aa3b9774df02974f468 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 21 Aug 2021 14:44:35 -0700
Subject: [PATCH 161/901] kram - adjust warnings, and fix shadow vars and self
 references

---
 build2/kramc.xcodeproj/project.pbxproj | 24 ++++++++++++++++++++++
 build2/kramv.xcodeproj/project.pbxproj | 26 ++++++++++++++++++++++++
 kramv/KramLoader.mm                    |  7 ++++---
 kramv/KramRenderer.mm                  | 28 +++++++++++++-------------
 kramv/KramViewerMain.mm                | 16 +++++++--------
 5 files changed, 76 insertions(+), 25 deletions(-)

diff --git a/build2/kramc.xcodeproj/project.pbxproj b/build2/kramc.xcodeproj/project.pbxproj
index 644e6b8f..7e42ebeb 100644
--- a/build2/kramc.xcodeproj/project.pbxproj
+++ b/build2/kramc.xcodeproj/project.pbxproj
@@ -260,7 +260,19 @@
 		706EF27A26D18082001C950E /* Debug */ = {
 			isa = XCBuildConfiguration;
 			buildSettings = {
+				CLANG_WARN_ATOMIC_IMPLICIT_SEQ_CST = YES;
+				CLANG_WARN_COMMA = NO;
+				CLANG_WARN_CXX0X_EXTENSIONS = YES;
+				CLANG_WARN_OBJC_EXPLICIT_OWNERSHIP_TYPE = YES;
+				CLANG_WARN_OBJC_IMPLICIT_ATOMIC_PROPERTIES = YES;
+				CLANG_WARN_OBJC_INTERFACE_IVARS = YES_ERROR;
+				CLANG_WARN_OBJC_MISSING_PROPERTY_SYNTHESIS = YES;
+				CLANG_WARN_OBJC_REPEATED_USE_OF_WEAK = YES;
 				CODE_SIGN_STYLE = Automatic;
+				GCC_WARN_HIDDEN_VIRTUAL_FUNCTIONS = YES;
+				GCC_WARN_NON_VIRTUAL_DESTRUCTOR = YES;
+				GCC_WARN_SHADOW = YES;
+				GCC_WARN_STRICT_SELECTOR_MATCH = YES;
 				HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram";
 				MACOSX_DEPLOYMENT_TARGET = 10.15;
 				PRODUCT_NAME = "$(TARGET_NAME)";
@@ -270,8 +282,20 @@
 		706EF27B26D18082001C950E /* Release */ = {
 			isa = XCBuildConfiguration;
 			buildSettings = {
+				CLANG_WARN_ATOMIC_IMPLICIT_SEQ_CST = YES;
+				CLANG_WARN_COMMA = NO;
+				CLANG_WARN_CXX0X_EXTENSIONS = YES;
+				CLANG_WARN_OBJC_EXPLICIT_OWNERSHIP_TYPE = YES;
+				CLANG_WARN_OBJC_IMPLICIT_ATOMIC_PROPERTIES = YES;
+				CLANG_WARN_OBJC_INTERFACE_IVARS = YES_ERROR;
+				CLANG_WARN_OBJC_MISSING_PROPERTY_SYNTHESIS = YES;
+				CLANG_WARN_OBJC_REPEATED_USE_OF_WEAK = YES;
 				CODE_SIGN_STYLE = Automatic;
 				DEAD_CODE_STRIPPING = YES;
+				GCC_WARN_HIDDEN_VIRTUAL_FUNCTIONS = YES;
+				GCC_WARN_NON_VIRTUAL_DESTRUCTOR = YES;
+				GCC_WARN_SHADOW = YES;
+				GCC_WARN_STRICT_SELECTOR_MATCH = YES;
 				HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram";
 				MACOSX_DEPLOYMENT_TARGET = 10.15;
 				PRODUCT_NAME = "$(TARGET_NAME)";
diff --git a/build2/kramv.xcodeproj/project.pbxproj b/build2/kramv.xcodeproj/project.pbxproj
index 7f8b516e..a4f967f8 100644
--- a/build2/kramv.xcodeproj/project.pbxproj
+++ b/build2/kramv.xcodeproj/project.pbxproj
@@ -323,9 +323,22 @@
 			buildSettings = {
 				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
 				ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
+				CLANG_WARN_ATOMIC_IMPLICIT_SEQ_CST = YES;
+				CLANG_WARN_COMMA = NO;
+				CLANG_WARN_CXX0X_EXTENSIONS = YES;
+				CLANG_WARN_OBJC_EXPLICIT_OWNERSHIP_TYPE = YES;
+				CLANG_WARN_OBJC_IMPLICIT_ATOMIC_PROPERTIES = YES;
+				CLANG_WARN_OBJC_INTERFACE_IVARS = YES_ERROR;
+				CLANG_WARN_OBJC_MISSING_PROPERTY_SYNTHESIS = NO;
+				CLANG_WARN_OBJC_REPEATED_USE_OF_WEAK = YES;
 				CODE_SIGN_ENTITLEMENTS = "$(PROJECT_DIR)/../kramv/kramv.entitlements";
 				CODE_SIGN_STYLE = Automatic;
 				COMBINE_HIDPI_IMAGES = YES;
+				GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
+				GCC_WARN_HIDDEN_VIRTUAL_FUNCTIONS = YES;
+				GCC_WARN_NON_VIRTUAL_DESTRUCTOR = YES;
+				GCC_WARN_SHADOW = YES;
+				GCC_WARN_STRICT_SELECTOR_MATCH = YES;
 				HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram";
 				INFOPLIST_FILE = "$(SRCROOT)/../kramv/Info.plist";
 				LD_RUNPATH_SEARCH_PATHS = (
@@ -343,10 +356,23 @@
 			buildSettings = {
 				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
 				ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
+				CLANG_WARN_ATOMIC_IMPLICIT_SEQ_CST = YES;
+				CLANG_WARN_COMMA = NO;
+				CLANG_WARN_CXX0X_EXTENSIONS = YES;
+				CLANG_WARN_OBJC_EXPLICIT_OWNERSHIP_TYPE = YES;
+				CLANG_WARN_OBJC_IMPLICIT_ATOMIC_PROPERTIES = YES;
+				CLANG_WARN_OBJC_INTERFACE_IVARS = YES_ERROR;
+				CLANG_WARN_OBJC_MISSING_PROPERTY_SYNTHESIS = NO;
+				CLANG_WARN_OBJC_REPEATED_USE_OF_WEAK = YES;
 				CODE_SIGN_ENTITLEMENTS = "$(PROJECT_DIR)/../kramv/kramv.entitlements";
 				CODE_SIGN_STYLE = Automatic;
 				COMBINE_HIDPI_IMAGES = YES;
 				DEAD_CODE_STRIPPING = YES;
+				GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
+				GCC_WARN_HIDDEN_VIRTUAL_FUNCTIONS = YES;
+				GCC_WARN_NON_VIRTUAL_DESTRUCTOR = YES;
+				GCC_WARN_SHADOW = YES;
+				GCC_WARN_STRICT_SELECTOR_MATCH = YES;
 				HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram";
 				INFOPLIST_FILE = "$(SRCROOT)/../kramv/Info.plist";
 				LD_RUNPATH_SEARCH_PATHS = (
diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index 6b9ee9bb..f0abe370 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -557,7 +557,8 @@ - (void)createStagingBufffer:(uint64_t)dataSize {
                                             length:dataSize
                                            options:MTLResourceStorageModeShared
                                        deallocator: ^(void *macroUnusedArg(pointer), NSUInteger macroUnusedArg(length)) {
-                                            delete _data;
+                                            delete self->_data;
+                                            self->_data = nullptr;
                                             }
     ];
 }
@@ -616,8 +617,8 @@ - (void)uploadTexturesIfNeeded:(id<MTLBlitCommandEncoder>)blitEncoder commandBuf
          {
             // can only reset this once gpu completes the blits above
             // also guard against addding to this in blitTextureFromImage when completion handler will reset to 0
-            if (_bufferOffset == bufferOffsetCopy)
-                _bufferOffset = 0;
+            if (self->_bufferOffset == bufferOffsetCopy)
+                self->_bufferOffset = 0;
         }];
     }
 }
diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index ec10b3a7..f9b1f28b 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -283,7 +283,7 @@ - (void)_loadMetalWithView:(nonnull MTKView *)view
 
 - (BOOL)hotloadShaders:(const char*)filename
 {
-    NSURL* _metallibFileURL = [NSURL fileURLWithPath:[NSString stringWithUTF8String:filename]];
+    _metallibFileURL = [NSURL fileURLWithPath:[NSString stringWithUTF8String:filename]];
 
     NSError* err = nil;
     NSDate *fileDate = nil;
@@ -547,12 +547,12 @@ - (void)_loadAssets
     {
         mdlMesh.vertexDescriptor = _mdlVertexDescriptor;
         
-        id<MDLMeshBuffer> pos = mdlMesh.vertexBuffers[BufferIndexMeshPosition];
-        MDLMeshBufferMap *posMap = [pos map];
+        id<MDLMeshBuffer> posBuffer = mdlMesh.vertexBuffers[BufferIndexMeshPosition];
+        MDLMeshBufferMap *posMap = [posBuffer map];
         packed_float3* posData = (packed_float3*)posMap.bytes;
         
-        id<MDLMeshBuffer> normals = mdlMesh.vertexBuffers[BufferIndexMeshNormal];
-        MDLMeshBufferMap *normalsMap = [normals map];
+        id<MDLMeshBuffer> normalBuffer = mdlMesh.vertexBuffers[BufferIndexMeshNormal];
+        MDLMeshBufferMap *normalsMap = [normalBuffer map];
         packed_float3* normalData = (packed_float3*)normalsMap.bytes;
         
         // vertexCount reports 306, but vertex 289+ are garbage
@@ -600,18 +600,18 @@ - (void)_loadAssets
     {
         mdlMesh.vertexDescriptor = _mdlVertexDescriptor;
         
-        id<MDLMeshBuffer> uvs = mdlMesh.vertexBuffers[BufferIndexMeshUV0];
-        MDLMeshBufferMap *uvsMap = [uvs map];
+        id<MDLMeshBuffer> uvsBuffer = mdlMesh.vertexBuffers[BufferIndexMeshUV0];
+        MDLMeshBufferMap *uvsMap = [uvsBuffer map];
         packed_float2* uvData = (packed_float2*)uvsMap.bytes;
     
         // this is all aos
         
-        id<MDLMeshBuffer> pos = mdlMesh.vertexBuffers[BufferIndexMeshPosition];
-        MDLMeshBufferMap *posMap = [pos map];
+        id<MDLMeshBuffer> posBuffer = mdlMesh.vertexBuffers[BufferIndexMeshPosition];
+        MDLMeshBufferMap *posMap = [posBuffer map];
         packed_float3* posData = (packed_float3*)posMap.bytes;
         
-        id<MDLMeshBuffer> normals = mdlMesh.vertexBuffers[BufferIndexMeshNormal];
-        MDLMeshBufferMap *normalsMap = [normals map];
+        id<MDLMeshBuffer> normalsBuffe = mdlMesh.vertexBuffers[BufferIndexMeshNormal];
+        MDLMeshBufferMap *normalsMap = [normalsBuffe map];
         packed_float3* normalData = (packed_float3*)normalsMap.bytes;
         
         
@@ -1648,9 +1648,9 @@ - (void)drawSample
         }
         
         // return the value at the sample
-        _showSettings->textureResult = data;
-        _showSettings->textureResultX = textureLookupX;
-        _showSettings->textureResultY = textureLookupY;
+        self->_showSettings->textureResult = data;
+        self->_showSettings->textureResultX = textureLookupX;
+        self->_showSettings->textureResultY = textureLookupY;
         
         //printf("Color %f %f %f %f\n", data.x, data.y, data.z, data.w);
     }];
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index c8cb65d7..a1d83881 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -86,7 +86,7 @@ - (void)makeWindowControllers {
 }
 
 
-- (NSData *)dataOfType:(NSString *)typeName error:(NSError **)outError {
+- (NSData *)dataOfType:(nonnull NSString *)typeName error:(NSError * _Nullable *)outError {
     // Insert code here to write your document to data of the specified type. If outError != NULL, ensure that you create and set an appropriate error if you return nil.
     // Alternatively, you could remove this method and override -fileWrapperOfType:error:, -writeToURL:ofType:error:, or -writeToURL:ofType:forSaveOperation:originalContentsURL:error: instead.
     [NSException raise:@"UnimplementedMethod" format:@"%@ is unimplemented", NSStringFromSelector(_cmd)];
@@ -94,7 +94,7 @@ - (NSData *)dataOfType:(NSString *)typeName error:(NSError **)outError {
 }
 
 
-- (BOOL)readFromURL:(NSURL *)url ofType:(NSString *)typeName error:(NSError **)outError {
+- (BOOL)readFromURL:(nonnull NSURL *)url ofType:(nonnull NSString *)typeName error:(NSError * _Nullable*)outError {
     
     
     // called from OpenRecent documents menu
@@ -2560,8 +2560,8 @@ - (BOOL)loadTextureFromURL:(NSURL*)url {
             
             
             NSDirectoryEnumerator *directoryEnumerator = [[NSFileManager defaultManager] enumeratorAtURL:url includingPropertiesForKeys:[NSArray array] options:0 errorHandler://nil
-               ^BOOL(NSURL *url, NSError *error) {
-                macroUnusedVar(url);
+               ^BOOL(NSURL *urlArg, NSError *error) {
+                macroUnusedVar(urlArg);
                 macroUnusedVar(error);
 
                 // handle error
@@ -2641,12 +2641,12 @@ - (BOOL)loadTextureFromURL:(NSURL*)url {
             getErrorLogCaptureText(errorText);
             setErrorLogCapture(false);
             
-            const string& filename = _folderFiles[_fileFolderIndex];
+            const string& folder = _folderFiles[_fileFolderIndex];
             
             // prepend filename
             string finalErrorText;
             append_sprintf(finalErrorText,
-                           "Could not load from folder:\n %s\n", filename.c_str());
+                           "Could not load from folder:\n %s\n", folder.c_str());
             finalErrorText += errorText;
             
             [self setHudText: finalErrorText.c_str()];
@@ -2741,12 +2741,12 @@ - (BOOL)loadTextureFromURL:(NSURL*)url {
             setErrorLogCapture(false);
             
             const auto& entry =_zip.zipEntrys()[_fileArchiveIndex];
-            const char* filename = entry.filename;
+            const char* archiveFilename = entry.filename;
             
             // prepend filename
             string finalErrorText;
             append_sprintf(finalErrorText,
-                           "Could not load from archive:\n %s\n", filename);
+                           "Could not load from archive:\n %s\n", archiveFilename);
             finalErrorText += errorText;
             
             [self setHudText: finalErrorText.c_str()];

From 4bc8dc87c6603ffab77c151a84445f2a7ab8438a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 21 Aug 2021 16:29:19 -0700
Subject: [PATCH 162/901] kram - more project fixes

---
 build2/kram.xcodeproj/project.pbxproj | 14 ++++++--------
 libkram/kram/KramConfig.h             |  9 +++++++++
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index 0f617fce..2b1d5538 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -298,6 +298,7 @@
 		706EF1DB26D166C5001C950E /* tmpfileplus.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE5826D1583F001C950E /* tmpfileplus.cpp */; };
 		706EF1DD26D166C5001C950E /* libate.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF12A26D159F9001C950E /* libate.tbd */; };
 		706EF26426D17DCC001C950E /* ateencoder.mm in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDF926D1583E001C950E /* ateencoder.mm */; };
+		706EFC2426D1C39B001C950E /* ateencoder.mm in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDF926D1583E001C950E /* ateencoder.mm */; };
 /* End PBXBuildFile section */
 
 /* Begin PBXFileReference section */
@@ -1105,6 +1106,7 @@
 			isa = PBXSourcesBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
+				706EFC2426D1C39B001C950E /* ateencoder.mm in Sources */,
 				706EF19826D166C5001C950E /* EtcBlock4x4Encoding_RGB8.cpp in Sources */,
 				706EF19926D166C5001C950E /* EtcImage.cpp in Sources */,
 				706EF19A26D166C5001C950E /* EtcDifferentialTrys.cpp in Sources */,
@@ -1218,6 +1220,8 @@
 				ENABLE_TESTABILITY = YES;
 				GCC_C_LANGUAGE_STANDARD = gnu11;
 				GCC_DYNAMIC_NO_PIC = NO;
+				GCC_ENABLE_CPP_EXCEPTIONS = NO;
+				GCC_ENABLE_CPP_RTTI = NO;
 				GCC_NO_COMMON_BLOCKS = YES;
 				GCC_OPTIMIZATION_LEVEL = 0;
 				GCC_PREPROCESSOR_DEFINITIONS = (
@@ -1276,6 +1280,8 @@
 				ENABLE_NS_ASSERTIONS = NO;
 				ENABLE_STRICT_OBJC_MSGSEND = YES;
 				GCC_C_LANGUAGE_STANDARD = gnu11;
+				GCC_ENABLE_CPP_EXCEPTIONS = NO;
+				GCC_ENABLE_CPP_RTTI = NO;
 				GCC_NO_COMMON_BLOCKS = YES;
 				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
 				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
@@ -1306,8 +1312,6 @@
 				CLANG_X86_VECTOR_INSTRUCTIONS = avx;
 				CODE_SIGN_STYLE = Automatic;
 				EXECUTABLE_PREFIX = lib;
-				GCC_ENABLE_CPP_EXCEPTIONS = NO;
-				GCC_ENABLE_CPP_RTTI = NO;
 				GCC_PRECOMPILE_PREFIX_HEADER = NO;
 				GCC_PREFIX_HEADER = "$(PROJECT_DIR)/../libkram/kram/KramConfig.h";
 				GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
@@ -1348,8 +1352,6 @@
 				CODE_SIGN_STYLE = Automatic;
 				DEAD_CODE_STRIPPING = NO;
 				EXECUTABLE_PREFIX = lib;
-				GCC_ENABLE_CPP_EXCEPTIONS = NO;
-				GCC_ENABLE_CPP_RTTI = NO;
 				GCC_PRECOMPILE_PREFIX_HEADER = NO;
 				GCC_PREFIX_HEADER = "$(PROJECT_DIR)/../libkram/kram/KramConfig.h";
 				GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
@@ -1382,8 +1384,6 @@
 				CLANG_X86_VECTOR_INSTRUCTIONS = default;
 				CODE_SIGN_STYLE = Automatic;
 				EXECUTABLE_PREFIX = lib;
-				GCC_ENABLE_CPP_EXCEPTIONS = NO;
-				GCC_ENABLE_CPP_RTTI = NO;
 				GCC_PRECOMPILE_PREFIX_HEADER = NO;
 				GCC_PREFIX_HEADER = "$(PROJECT_DIR)/../libkram/kram/KramConfig.h";
 				GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
@@ -1417,8 +1417,6 @@
 				CODE_SIGN_STYLE = Automatic;
 				DEAD_CODE_STRIPPING = NO;
 				EXECUTABLE_PREFIX = lib;
-				GCC_ENABLE_CPP_EXCEPTIONS = NO;
-				GCC_ENABLE_CPP_RTTI = NO;
 				GCC_PRECOMPILE_PREFIX_HEADER = NO;
 				GCC_PREFIX_HEADER = "$(PROJECT_DIR)/../libkram/kram/KramConfig.h";
 				GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index 471f5847..06f8c756 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -263,6 +263,15 @@ namespace std
 {
 inline float clamp(float x, float minValue, float maxValue) { return min(max(x, minValue), maxValue); }
 inline double clamp(double x, double minValue, double maxValue) { return min(max(x, minValue), maxValue); }
+
+inline double clamp(int8_t x, int8_t minValue, int8_t maxValue) { return min(max(x, minValue), maxValue); }
+inline double clamp(uint8_t x, uint8_t minValue, uint8_t maxValue) { return min(max(x, minValue), maxValue); }
+
+inline double clamp(int16_t x, int16_t minValue, int16_t maxValue) { return min(max(x, minValue), maxValue); }
+inline double clamp(uint16_t x, uint16_t minValue, uint16_t maxValue) { return min(max(x, minValue), maxValue); }
+
+inline double clamp(int32_t x, int32_t minValue, int32_t maxValue) { return min(max(x, minValue), maxValue); }
+inline double clamp(int64_t x, int64_t minValue, int64_t maxValue) { return min(max(x, minValue), maxValue); }
 }
 
 #endif

From 63e8513334f0479fc71a3fd4777be2520b9e8d2a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 21 Aug 2021 21:29:59 -0700
Subject: [PATCH 163/901] kram - more project files

---
 CMakeLists.txt                         | 17 ++++++++++-------
 build2/kramc.xcodeproj/project.pbxproj | 10 +++++-----
 kramv/Info.plist                       |  2 ++
 3 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a1aaf434..bac67552 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -154,13 +154,16 @@ endif()
 
 #-----------------------------------------------------
 
-if (BUILD_IOS)
-    set(BIN_DIR ${PROJECT_SOURCE_DIR}/bin/ios)
-elseif (BUILD_MAC)
-    set(BIN_DIR ${PROJECT_SOURCE_DIR}/bin/mac)
-elseif (BUILD_WIN)
-    set(BIN_DIR ${PROJECT_SOURCE_DIR}/bin/win)
-endif()
+# was considering platform-specific builds, but mac/win don't conflict
+set(BIN_DIR ${PROJECT_SOURCE_DIR}/bin)
+    
+#if (BUILD_IOS)
+#    set(BIN_DIR ${PROJECT_SOURCE_DIR}/bin/ios)
+#elseif (BUILD_MAC)
+#    set(BIN_DIR ${PROJECT_SOURCE_DIR}/bin/mac)
+#elseif (BUILD_WIN)
+#    set(BIN_DIR ${PROJECT_SOURCE_DIR}/bin/win)
+#endif()
 
 # So by default install depends on ALL_BUILD target, but that will fail if plugin
 # does not have everything setup to build (or like now is not building).
diff --git a/build2/kramc.xcodeproj/project.pbxproj b/build2/kramc.xcodeproj/project.pbxproj
index 7e42ebeb..303a5fa5 100644
--- a/build2/kramc.xcodeproj/project.pbxproj
+++ b/build2/kramc.xcodeproj/project.pbxproj
@@ -26,7 +26,7 @@
 /* End PBXCopyFilesBuildPhase section */
 
 /* Begin PBXFileReference section */
-		706EF27226D18082001C950E /* kramc */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = kramc; sourceTree = BUILT_PRODUCTS_DIR; };
+		706EF27226D18082001C950E /* kram */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = kram; sourceTree = BUILT_PRODUCTS_DIR; };
 		706EF28026D18223001C950E /* KramMain.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = KramMain.cpp; sourceTree = "<group>"; };
 		706EF28226D18251001C950E /* libkram.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; path = libkram.a; sourceTree = BUILT_PRODUCTS_DIR; };
 		706EF28426D18257001C950E /* libate.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = libate.tbd; path = usr/lib/libate.tbd; sourceTree = SDKROOT; };
@@ -59,7 +59,7 @@
 		706EF27326D18082001C950E /* Products */ = {
 			isa = PBXGroup;
 			children = (
-				706EF27226D18082001C950E /* kramc */,
+				706EF27226D18082001C950E /* kram */,
 			);
 			name = Products;
 			sourceTree = "<group>";
@@ -100,7 +100,7 @@
 			);
 			name = kramc;
 			productName = kramc;
-			productReference = 706EF27226D18082001C950E /* kramc */;
+			productReference = 706EF27226D18082001C950E /* kram */;
 			productType = "com.apple.product-type.tool";
 		};
 /* End PBXNativeTarget section */
@@ -275,7 +275,7 @@
 				GCC_WARN_STRICT_SELECTOR_MATCH = YES;
 				HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram";
 				MACOSX_DEPLOYMENT_TARGET = 10.15;
-				PRODUCT_NAME = "$(TARGET_NAME)";
+				PRODUCT_NAME = kram;
 			};
 			name = Debug;
 		};
@@ -298,7 +298,7 @@
 				GCC_WARN_STRICT_SELECTOR_MATCH = YES;
 				HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram";
 				MACOSX_DEPLOYMENT_TARGET = 10.15;
-				PRODUCT_NAME = "$(TARGET_NAME)";
+				PRODUCT_NAME = kram;
 			};
 			name = Release;
 		};
diff --git a/kramv/Info.plist b/kramv/Info.plist
index 63515502..e3bba179 100644
--- a/kramv/Info.plist
+++ b/kramv/Info.plist
@@ -103,6 +103,8 @@
 	<string>1.0</string>
 	<key>CFBundleVersion</key>
 	<string>1</string>
+	<key>LSApplicationCategoryType</key>
+	<string>public.app-category.developer-tools</string>
 	<key>LSMinimumSystemVersion</key>
 	<string>$(MACOSX_DEPLOYMENT_TARGET)</string>
 	<key>NSMainStoryboardFile</key>

From 5967eca5a792e4757f118133a9919b9d2d46e721 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 22 Aug 2021 12:24:08 -0700
Subject: [PATCH 164/901] kram - move cibuild over to xcodebuild for macOS/iOS

Trying to reduce dependence on CMake.
---
 scripts/cibuild.sh | 44 +++++++++++++++++++++++++++++++++++++-------
 1 file changed, 37 insertions(+), 7 deletions(-)

diff --git a/scripts/cibuild.sh b/scripts/cibuild.sh
index 6d795cee..363a7ceb 100755
--- a/scripts/cibuild.sh
+++ b/scripts/cibuild.sh
@@ -35,26 +35,56 @@ set -e
 
 # create directories to install results
 mkdir -p bin
+
+# need absolute path
+binPath=$(realpath bin)
+
 mkdir -p build
 
 pushd build
 
 # can't just use cmake .. on osx, cmake gets confused about metal files since language not recognized
-# but Xcode knows how to build these.  I don't to setup special rule for metal files right now.
+# but Xcode knows how to build these.  I don't want to setup special rule for metal files right now.
 if [[ $buildType == macos ]]; then
-	cmake .. -G Xcode
+
+	# not using CMake anymore on mac/iOS.  Using custom workspace and projects.
+	#cmake .. -G Xcode
+
+	# build the release build
+	#cmake --build . --config Release
+
+	# copy it to bin directory
+	#cmake --install . --config Release
+
+	pushd ../build2
+
+	xcodebuild build -sdk iphoneos -workspace kram.xcworkspace -scheme kram-ios CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
+	xcodebuild build -sdk macosx -workspace kram.xcworkspace -scheme kram CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
+
+	xcodebuild install -sdk macosx -workspace kram.xcworkspace -scheme kramc DSTROOT=${binPath} INSTALL_PATH=.
+	xcodebuild install -sdk macosx -workspace kram.xcworkspace -scheme kramv DSTROOT=${binPath} INSTALL_PATH=.
+
+	popd
+
 elif [[ $buildType == windows ]]; then
 	#cmake .. -G "Visual Studio 15 2017 Win64"
 	cmake .. -G "Visual Studio 16 2019" -A x64
+
+	# build the release build
+	cmake --build . --config Release
+
+	# copy it to bin directory
+	cmake --install . --config Release
+
 elif [[ $buildType == linux ]]; then
 	cmake .. 
-fi
 
-# build the release build
-cmake --build . --config Release
+	# build the release build
+	cmake --build . --config Release
 
-# copy it to bin directory
-cmake --install . --config Release
+	# copy it to bin directory
+	cmake --install . --config Release
+fi
 
 popd
 

From 3f6f381544a70633b3023073b405acf123fe57e7 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 22 Aug 2021 12:46:46 -0700
Subject: [PATCH 165/901] Scripts - setup pre-commit for clang-format

to set this up

cd .git/hooks
ln -s ../../scripts/pre-commit .
---
 scripts/pre-commit | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)
 create mode 100755 scripts/pre-commit

diff --git a/scripts/pre-commit b/scripts/pre-commit
new file mode 100755
index 00000000..b1c5a3c6
--- /dev/null
+++ b/scripts/pre-commit
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+# derived from here
+# https://github.com/andrewseidl/githook-clang-format/blob/master/clang-format.hook
+
+format_file() {
+  file="${1}"
+
+  if [ -f $file ]; then
+    clang-format -style=file -i ${file}
+    git add ${file}
+  fi
+}
+
+for file in `git diff-index --cached --name-only HEAD | grep -iE '\.(cpp|h)$' ` ; do
+  format_file "${file}"
+done
+

From 542a3efc115bd4722164d365419a24af53b86c4a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 22 Aug 2021 12:50:20 -0700
Subject: [PATCH 166/901] kram - run clang-format

---
 kramv/KramLoader.h              |   41 +-
 kramv/KramLoader.mm             | 1122 +++----
 kramv/KramRenderer.h            |   29 +-
 kramv/KramRenderer.mm           | 3376 ++++++++++----------
 kramv/KramShaders.h             |  270 +-
 kramv/KramViewerBase.cpp        |  484 +--
 kramv/KramViewerBase.h          |  416 +--
 kramv/KramViewerMain.mm         | 5093 ++++++++++++++++---------------
 libkram/kram/KTXImage.cpp       |  314 +-
 libkram/kram/KTXImage.h         |   78 +-
 libkram/kram/Kram.cpp           |  545 ++--
 libkram/kram/Kram.h             |    4 +-
 libkram/kram/KramConfig.h       |   44 +-
 libkram/kram/KramFileHelper.cpp |   74 +-
 libkram/kram/KramFileHelper.h   |    2 +-
 libkram/kram/KramImage.cpp      |  643 ++--
 libkram/kram/KramImage.h        |   39 +-
 libkram/kram/KramImageInfo.cpp  |  131 +-
 libkram/kram/KramImageInfo.h    |   23 +-
 libkram/kram/KramLib.h          |   15 +-
 libkram/kram/KramLog.cpp        |   34 +-
 libkram/kram/KramLog.h          |    9 +-
 libkram/kram/KramMipper.cpp     |  118 +-
 libkram/kram/KramMipper.h       |   14 +-
 libkram/kram/KramMmapHelper.cpp |    5 +-
 libkram/kram/KramMmapHelper.h   |    4 +-
 libkram/kram/KramSDFMipper.cpp  |    6 +-
 libkram/kram/KramTimer.h        |    4 +-
 libkram/kram/KramZipHelper.cpp  |  138 +-
 libkram/kram/KramZipHelper.h    |   38 +-
 scripts/pre-commit              |    2 +-
 31 files changed, 6650 insertions(+), 6465 deletions(-)

diff --git a/kramv/KramLoader.h b/kramv/KramLoader.h
index 9fc3e768..f1e90870 100644
--- a/kramv/KramLoader.h
+++ b/kramv/KramLoader.h
@@ -18,28 +18,43 @@ class KTXImage;
 class KTXImageData;
 }
 
-// This loads KTX/2 and PNG data.  Moving towards KTX/2 files only, with a PNG to KTX/2 conversion.
+// This loads KTX/2 and PNG data.  Moving towards KTX/2 files only, with a PNG
+// to KTX/2 conversion.
 @interface KramLoader : NSObject
 
-// from mem,  copied to MTLBuffer if available, if not caller must keep mem alive
-- (nullable id<MTLTexture>)loadTextureFromData:(nonnull const uint8_t *)imageData imageDataLength:(int32_t)imageDataLength originalFormat:(nullable MTLPixelFormat*)originalFormat;
+// from mem,  copied to MTLBuffer if available, if not caller must keep mem
+// alive
+- (nullable id<MTLTexture>)
+    loadTextureFromData:(nonnull const uint8_t *)imageData
+        imageDataLength:(int32_t)imageDataLength
+         originalFormat:(nullable MTLPixelFormat *)originalFormat;
 
 // from mem, copied to MTLBuffer if available, if not caller must keep mem alive
-- (nullable id<MTLTexture>)loadTextureFromData:(nonnull NSData*)imageData originalFormat:(nullable MTLPixelFormat*)originalFormat;
+- (nullable id<MTLTexture>)loadTextureFromData:(nonnull NSData *)imageData
+                                originalFormat:
+                                    (nullable MTLPixelFormat *)originalFormat;
 
 // load from a KTXImage
-- (nullable id<MTLTexture>)loadTextureFromImage:(const kram::KTXImage&)image originalFormat:(nullable MTLPixelFormat*)originalFormat;
+- (nullable id<MTLTexture>)loadTextureFromImage:(const kram::KTXImage &)image
+                                 originalFormat:
+                                     (nullable MTLPixelFormat *)originalFormat;
 
 // load into KTXImage and KTXImageData, can use with loadTextureFromImage
-- (BOOL)loadImageFromURL:(nonnull NSURL *)url image:(kram::KTXImage&)image imageData:(kram::KTXImageData&)imageData;
+- (BOOL)loadImageFromURL:(nonnull NSURL *)url
+                   image:(kram::KTXImage &)image
+               imageData:(kram::KTXImageData &)imageData;
 
 // from url (mmap)
-- (nullable id<MTLTexture>)loadTextureFromURL:(nonnull NSURL *)url originalFormat:(nullable MTLPixelFormat*)originalFormat;
+- (nullable id<MTLTexture>)loadTextureFromURL:(nonnull NSURL *)url
+                               originalFormat:
+                                   (nullable MTLPixelFormat *)originalFormat;
 
-// handle auto-mipgen and upload mips from staging MTLBuffer to mips of various private MTLTexture
-- (void)uploadTexturesIfNeeded:(nonnull id<MTLBlitCommandEncoder>)blitEncoder commandBuffer:(nonnull id<MTLCommandBuffer>)commandBuffer;
+// handle auto-mipgen and upload mips from staging MTLBuffer to mips of various
+// private MTLTexture
+- (void)uploadTexturesIfNeeded:(nonnull id<MTLBlitCommandEncoder>)blitEncoder
+                 commandBuffer:(nonnull id<MTLCommandBuffer>)commandBuffer;
 
-@property (retain, nonatomic, readwrite, nonnull) id<MTLDevice> device;
+@property(retain, nonatomic, readwrite, nonnull) id<MTLDevice> device;
 
 @end
 
@@ -49,8 +64,8 @@ class KTXImageData;
 //#include <string>
 
 namespace kram {
-    using namespace NAMESPACE_STL;
+using namespace NAMESPACE_STL;
 
-    // provide access to lowercase strings
-    string toLower(const string& text);
+// provide access to lowercase strings
+string toLower(const string &text);
 }
diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index f0abe370..999f2855 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -21,51 +21,56 @@
 using namespace NAMESPACE_STL;
 using namespace simd;
 
-string kram::toLower(const string& text) {
-    return string([NSString stringWithUTF8String:text.c_str()].lowercaseString.UTF8String);
+string kram::toLower(const string &text) {
+  return string(
+      [NSString stringWithUTF8String:text.c_str()].lowercaseString.UTF8String);
 }
 
-// defer data need to blit staging MTLBuffer to MTLTexture at the start of rendering
-struct KramBlit
-{
-    uint32_t w;
-    uint32_t h;
-    uint32_t chunkNum;
-    uint32_t mipLevelNumber;
-    
-    uint64_t mipStorageSize;
-    uint64_t mipOffset;
-    
-    uint32_t textureIndex;
-    uint32_t bytesPerRow;
-    bool     is3D;
+// defer data need to blit staging MTLBuffer to MTLTexture at the start of
+// rendering
+struct KramBlit {
+  uint32_t w;
+  uint32_t h;
+  uint32_t chunkNum;
+  uint32_t mipLevelNumber;
+
+  uint64_t mipStorageSize;
+  uint64_t mipOffset;
+
+  uint32_t textureIndex;
+  uint32_t bytesPerRow;
+  bool is3D;
 };
 
 //-----------------------------------------------
-    
+
 @implementation KramLoader {
-    // only one of these for now
-    id<MTLBuffer> _buffer;
-    uint8_t* _data;
-    uint32_t _bufferOffset;
-    
-    vector<KramBlit> _blits;
-    NSMutableArray<id<MTLTexture>>* _blitTextures;
-    NSMutableArray<id<MTLTexture>>* _mipgenTextures;
+  // only one of these for now
+  id<MTLBuffer> _buffer;
+  uint8_t *_data;
+  uint32_t _bufferOffset;
+
+  vector<KramBlit> _blits;
+  NSMutableArray<id<MTLTexture>> *_blitTextures;
+  NSMutableArray<id<MTLTexture>> *_mipgenTextures;
 }
 
 - (instancetype)init {
-    self = [super init];
-    
-    _blitTextures = [[NSMutableArray alloc] init];
-    _mipgenTextures = [[NSMutableArray alloc] init];
-    
-    return self;
+  self = [super init];
+
+  _blitTextures = [[NSMutableArray alloc] init];
+  _mipgenTextures = [[NSMutableArray alloc] init];
+
+  return self;
 }
 
-- (nullable id<MTLTexture>)loadTextureFromData:(nonnull NSData*)imageData originalFormat:(nullable MTLPixelFormat*)originalFormat {
-    
-    return [self loadTextureFromData:(const uint8_t*)imageData.bytes imageDataLength:(int32_t)imageData.length originalFormat:originalFormat];
+- (nullable id<MTLTexture>)loadTextureFromData:(nonnull NSData *)imageData
+                                originalFormat:
+                                    (nullable MTLPixelFormat *)originalFormat {
+
+  return [self loadTextureFromData:(const uint8_t *)imageData.bytes
+                   imageDataLength:(int32_t)imageData.length
+                    originalFormat:originalFormat];
 }
 
 // for macOS/win Intel need to decode astc/etc
@@ -74,42 +79,40 @@ - (instancetype)init {
 
 #if DO_DECODE
 
-// this means format isnt supported on platform, but can be decoded to rgba to display
+// this means format isnt supported on platform, but can be decoded to rgba to
+// display
 bool isDecodeImageNeeded(MyMTLPixelFormat pixelFormat) {
-    bool needsDecode = false;
-    
-    if (isETCFormat(pixelFormat)) {
-        needsDecode = true;
-    }
-    else if (isASTCFormat(pixelFormat)) {
-        needsDecode = true;
-    }
-    
-    return needsDecode;
+  bool needsDecode = false;
+
+  if (isETCFormat(pixelFormat)) {
+    needsDecode = true;
+  } else if (isASTCFormat(pixelFormat)) {
+    needsDecode = true;
+  }
+
+  return needsDecode;
 }
 
-bool decodeImage(const KTXImage& image, KTXImage& imageDecoded)
-{
-    KramDecoderParams decoderParams;
-    KramDecoder decoder;
-    
-    if (isETCFormat(image.pixelFormat)) {
-        if (!decoder.decode(image, imageDecoded, decoderParams)) {
-            return NO;
-        }
-    }
-    else if (isASTCFormat(image.pixelFormat)) {
-        if (!decoder.decode(image, imageDecoded, decoderParams)) {
-            return NO;
-        }
+bool decodeImage(const KTXImage &image, KTXImage &imageDecoded) {
+  KramDecoderParams decoderParams;
+  KramDecoder decoder;
+
+  if (isETCFormat(image.pixelFormat)) {
+    if (!decoder.decode(image, imageDecoded, decoderParams)) {
+      return NO;
     }
-    else {
-        assert(false); // don't call this routine if decode not needed
+  } else if (isASTCFormat(image.pixelFormat)) {
+    if (!decoder.decode(image, imageDecoded, decoderParams)) {
+      return NO;
     }
-    
-    // TODO: decode BC format on iOS when not supported, but viewer only on macOS for now
-    
-    return YES;
+  } else {
+    assert(false); // don't call this routine if decode not needed
+  }
+
+  // TODO: decode BC format on iOS when not supported, but viewer only on macOS
+  // for now
+
+  return YES;
 }
 
 #endif
@@ -117,131 +120,136 @@ bool decodeImage(const KTXImage& image, KTXImage& imageDecoded)
 #if SUPPORT_RGB
 
 inline bool isInternalRGBFormat(MyMTLPixelFormat format) {
-    bool isInternal = false;
-    switch(format) {
-        case MyMTLPixelFormatRGB8Unorm_internal:
-        case MyMTLPixelFormatRGB8Unorm_sRGB_internal:
-        case MyMTLPixelFormatRGB16Float_internal:
-        case MyMTLPixelFormatRGB32Float_internal:
-            isInternal = true;
-            break;
-        default:
-            break;
-    }
-    return isInternal;
+  bool isInternal = false;
+  switch (format) {
+  case MyMTLPixelFormatRGB8Unorm_internal:
+  case MyMTLPixelFormatRGB8Unorm_sRGB_internal:
+  case MyMTLPixelFormatRGB16Float_internal:
+  case MyMTLPixelFormatRGB32Float_internal:
+    isInternal = true;
+    break;
+  default:
+    break;
+  }
+  return isInternal;
 }
 
 inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) {
-    MyMTLPixelFormat remapFormat = MyMTLPixelFormatInvalid;
-    switch(format) {
-        case MyMTLPixelFormatRGB8Unorm_internal:
-            remapFormat = MyMTLPixelFormatRGBA8Unorm;
-            break;
-        case MyMTLPixelFormatRGB8Unorm_sRGB_internal:
-            remapFormat = MyMTLPixelFormatRGBA8Unorm_sRGB;
-            break;
-        case MyMTLPixelFormatRGB16Float_internal:
-            remapFormat = MyMTLPixelFormatRGBA32Float;
-            break;
-        case MyMTLPixelFormatRGB32Float_internal:
-            remapFormat = MyMTLPixelFormatRGBA32Float;
-            break;
-        default:
-            break;
-    }
-    return remapFormat;
+  MyMTLPixelFormat remapFormat = MyMTLPixelFormatInvalid;
+  switch (format) {
+  case MyMTLPixelFormatRGB8Unorm_internal:
+    remapFormat = MyMTLPixelFormatRGBA8Unorm;
+    break;
+  case MyMTLPixelFormatRGB8Unorm_sRGB_internal:
+    remapFormat = MyMTLPixelFormatRGBA8Unorm_sRGB;
+    break;
+  case MyMTLPixelFormatRGB16Float_internal:
+    remapFormat = MyMTLPixelFormatRGBA32Float;
+    break;
+  case MyMTLPixelFormatRGB32Float_internal:
+    remapFormat = MyMTLPixelFormatRGBA32Float;
+    break;
+  default:
+    break;
+  }
+  return remapFormat;
 }
 
 #endif
 
-- (nullable id<MTLTexture>)loadTextureFromData:(nonnull const uint8_t *)imageData imageDataLength:(int32_t)imageDataLength originalFormat:(nullable MTLPixelFormat*)originalFormat
-{
-    KTXImage image;
-    
-    // isInfoOnly = true keeps compressed mips on KTX2 and aliases original mip data
-    // but have decode etc2/astc path below that uncompressed mips
-    // and the rgb conversion path below as well in the viewer.
-    // games would want to decompress directly from aliased mmap ktx2 data into staging
-    // or have blocks pre-twiddled in hw morton order.
-    
-    bool isInfoOnly = true;
-    if (!image.open(imageData, imageDataLength, isInfoOnly)) {
-        return nil;
-    }
+- (nullable id<MTLTexture>)
+    loadTextureFromData:(nonnull const uint8_t *)imageData
+        imageDataLength:(int32_t)imageDataLength
+         originalFormat:(nullable MTLPixelFormat *)originalFormat {
+  KTXImage image;
+
+  // isInfoOnly = true keeps compressed mips on KTX2 and aliases original mip
+  // data but have decode etc2/astc path below that uncompressed mips and the
+  // rgb conversion path below as well in the viewer. games would want to
+  // decompress directly from aliased mmap ktx2 data into staging or have blocks
+  // pre-twiddled in hw morton order.
+
+  bool isInfoOnly = true;
+  if (!image.open(imageData, imageDataLength, isInfoOnly)) {
+    return nil;
+  }
 
-    return [self loadTextureFromImage:image originalFormat:originalFormat];
+  return [self loadTextureFromImage:image originalFormat:originalFormat];
 }
 
-- (nullable id<MTLTexture>)loadTextureFromImage:(const KTXImage&)image originalFormat:(nullable MTLPixelFormat*)originalFormat
-{
+- (nullable id<MTLTexture>)loadTextureFromImage:(const KTXImage &)image
+                                 originalFormat:
+                                     (nullable MTLPixelFormat *)originalFormat {
 #if SUPPORT_RGB
-    if (isInternalRGBFormat(image.pixelFormat)) {
-        // loads and converts top level mip from RGB to RGBA (RGB0)
-        // handles all texture types
-        Image rgbaImage;
-        if (!rgbaImage.loadImageFromKTX(image))
-            return nil;
-        
-        // re-encode it as a KTXImage, even though this is just a copy
-        KTXImage rbgaImage2;
-       
-        ImageInfoArgs dstImageInfoArgs;
-        dstImageInfoArgs.textureType = image.textureType;
-        dstImageInfoArgs.pixelFormat = remapInternalRGBFormat(image.pixelFormat);
-        dstImageInfoArgs.doMipmaps = image.header.numberOfMipmapLevels > 1; // ignore 0
-        dstImageInfoArgs.textureEncoder = kTexEncoderExplicit;
-
-        // set chunk count, so it's explicit
-        // the chunks are loaded into a vertical strip
-        dstImageInfoArgs.chunksX = 1;
-        dstImageInfoArgs.chunksY =
-        dstImageInfoArgs.chunksCount = image.totalChunks();
-                                                         
-        ImageInfo dstImageInfo;
-        dstImageInfo.initWithArgs(dstImageInfoArgs);
-       
-        // this will build mips if needed
-        KramEncoder encoder;
-        if (!encoder.encode(dstImageInfo, rgbaImage, rbgaImage2)) {
-            return nil;
-        }
-        
-        if (originalFormat != nullptr) {
-            *originalFormat = (MTLPixelFormat)rbgaImage2.pixelFormat; // TODO: should this return rgbaImage.pixelFormat ?
-        }
-        
-        return [self blitTextureFromImage:rbgaImage2];
+  if (isInternalRGBFormat(image.pixelFormat)) {
+    // loads and converts top level mip from RGB to RGBA (RGB0)
+    // handles all texture types
+    Image rgbaImage;
+    if (!rgbaImage.loadImageFromKTX(image))
+      return nil;
+
+    // re-encode it as a KTXImage, even though this is just a copy
+    KTXImage rbgaImage2;
+
+    ImageInfoArgs dstImageInfoArgs;
+    dstImageInfoArgs.textureType = image.textureType;
+    dstImageInfoArgs.pixelFormat = remapInternalRGBFormat(image.pixelFormat);
+    dstImageInfoArgs.doMipmaps =
+        image.header.numberOfMipmapLevels > 1; // ignore 0
+    dstImageInfoArgs.textureEncoder = kTexEncoderExplicit;
+
+    // set chunk count, so it's explicit
+    // the chunks are loaded into a vertical strip
+    dstImageInfoArgs.chunksX = 1;
+    dstImageInfoArgs.chunksY = dstImageInfoArgs.chunksCount =
+        image.totalChunks();
+
+    ImageInfo dstImageInfo;
+    dstImageInfo.initWithArgs(dstImageInfoArgs);
+
+    // this will build mips if needed
+    KramEncoder encoder;
+    if (!encoder.encode(dstImageInfo, rgbaImage, rbgaImage2)) {
+      return nil;
     }
-#endif
-   
+
     if (originalFormat != nullptr) {
-        *originalFormat = (MTLPixelFormat)image.pixelFormat;
+      *originalFormat =
+          (MTLPixelFormat)rbgaImage2
+              .pixelFormat; // TODO: should this return rgbaImage.pixelFormat ?
     }
-    
+
+    return [self blitTextureFromImage:rbgaImage2];
+  }
+#endif
+
+  if (originalFormat != nullptr) {
+    *originalFormat = (MTLPixelFormat)image.pixelFormat;
+  }
+
 #if DO_DECODE
-    if (isDecodeImageNeeded(image.pixelFormat)) {
-        KTXImage imageDecoded;
-        if (!decodeImage(image, imageDecoded)) {
-            return nil;
-        }
-        
-        return [self blitTextureFromImage:imageDecoded];
+  if (isDecodeImageNeeded(image.pixelFormat)) {
+    KTXImage imageDecoded;
+    if (!decodeImage(image, imageDecoded)) {
+      return nil;
     }
-    else
+
+    return [self blitTextureFromImage:imageDecoded];
+  } else
 #endif
-    {
-        // fast load path directly from mmap'ed data, decompress direct to staging
-        return [self blitTextureFromImage:image];
-    }
+  {
+    // fast load path directly from mmap'ed data, decompress direct to staging
+    return [self blitTextureFromImage:image];
+  }
 }
 
 /*
- 
+
 static uint32_t numberOfMipmapLevels(const Image& image) {
     uint32_t w = image.width();
     uint32_t h = image.height();
     uint32_t maxDim = MAX(w,h);
-    
+
     uint32_t numberOfMips = 1;
     while (maxDim > 1) {
         numberOfMips++;
@@ -249,232 +257,250 @@ static uint32_t numberOfMipmapLevels(const Image& image) {
     }
     return numberOfMips;
 }
-    
-- (nullable id<MTLTexture>)_loadTextureFromPNGData:(const uint8_t*)data dataSize:(int32_t)dataSize isSRGB:(BOOL)isSRGB originalFormat:(nullable MTLPixelFormat*)originalFormat
+
+- (nullable id<MTLTexture>)_loadTextureFromPNGData:(const uint8_t*)data
+dataSize:(int32_t)dataSize isSRGB:(BOOL)isSRGB originalFormat:(nullable
+MTLPixelFormat*)originalFormat
 {
-    // can only load 8u and 16u from png, no hdr formats, no premul either, no props
+    // can only load 8u and 16u from png, no hdr formats, no premul either, no
+props
     // this also doesn't handle strips like done in libkram.
-    
+
    // Image sourceImage;
     bool isLoaded = LoadPng(data, dataSize, false, false, image);
     if (!isLoaded) {
         return nil;
     }
-    
-   
+
+
     // TODO: replace this with code that gens a KTXImage from png (and cpu mips)
-    // instead of needing to use autogenmip that has it's own filters (probably a box)
-    
+    // instead of needing to use autogenmip that has it's own filters (probably
+a box)
+
     id<MTLTexture> texture = [self createTexture:image isPrivate:true];
     if (!texture) {
         return nil;
     }
-    
+
     if (originalFormat != nullptr) {
         *originalFormat = (MTLPixelFormat)image.pixelFormat;
     }
-    
+
     // this means KTXImage must hold data
     [self blitTextureFromImage:image];
-    
-    
+
+
     // cpu copy the bytes from the data object into the texture
     const MTLRegion region = {
         { 0, 0, 0 }, // MTLOrigin
-        { static_cast<NSUInteger>(image.width), static_cast<NSUInteger>(image.height), 1 }  // MTLSize
+        { static_cast<NSUInteger>(image.width),
+static_cast<NSUInteger>(image.height), 1 }  // MTLSize
     };
-    
+
     size_t bytesPerRow = 4 * sourceImage.width();
-    
+
     [texture replaceRegion:region
                 mipmapLevel:0
                   withBytes:sourceImage.pixels().data()
                 bytesPerRow:bytesPerRow];
-  
-    
+
+
     // have to schedule autogen inside render using MTLBlitEncoder
     if (image.header.numberOfMipmapLevels > 1) {
         [_mipgenTextures addObject: texture];
     }
-    
+
     return texture;
 }
 */
 
-- (BOOL)loadImageFromURL:(nonnull NSURL *)url image:(KTXImage&)image imageData:(KTXImageData&)imageData
-{
-    const char *path = url.absoluteURL.path.UTF8String;
-
-    // TODO: could also ignore extension, and look at header/signature instead
-    // files can be renamed to the incorrect extensions
-    string filename = toLower(path);
-
-    if (endsWithExtension(filename.c_str(), ".png")) {
-        // set title to filename, chop this to just file+ext, not directory
-        string filenameShort = filename;
-        const char* filenameSlash = strrchr(filenameShort.c_str(), '/');
-        if (filenameSlash != nullptr) {
-            filenameShort = filenameSlash + 1;
-        }
-        
-        // now chop off the extension
-        filenameShort = filenameShort.substr(0, filenameShort.find_last_of("."));
-        
-        // dealing with png means fabricating the format, texture type, and other data
-        bool isNormal = false;
-        bool isSDF = false;
-        if (endsWith(filenameShort, "-n") || endsWith(filenameShort, "_normal")) {
-            isNormal = true;
-        }
-        else if (endsWith(filenameShort, "-sdf")) {
-            isSDF = true;
-        }
-        
-        bool isSRGB = (!isNormal && !isSDF);
-
-        if (!imageData.open(path, image)) {
-            return NO;
-        }
-        
-        // have to adjust the format if srgb
-        if (isSRGB) {
-            image.pixelFormat = MyMTLPixelFormatRGBA8Unorm_sRGB;
-        }
+- (BOOL)loadImageFromURL:(nonnull NSURL *)url
+                   image:(KTXImage &)image
+               imageData:(KTXImageData &)imageData {
+  const char *path = url.absoluteURL.path.UTF8String;
+
+  // TODO: could also ignore extension, and look at header/signature instead
+  // files can be renamed to the incorrect extensions
+  string filename = toLower(path);
+
+  if (endsWithExtension(filename.c_str(), ".png")) {
+    // set title to filename, chop this to just file+ext, not directory
+    string filenameShort = filename;
+    const char *filenameSlash = strrchr(filenameShort.c_str(), '/');
+    if (filenameSlash != nullptr) {
+      filenameShort = filenameSlash + 1;
     }
-    else {
-        if (!imageData.open(path, image)) {
-            return NO;
-        }
+
+    // now chop off the extension
+    filenameShort = filenameShort.substr(0, filenameShort.find_last_of("."));
+
+    // dealing with png means fabricating the format, texture type, and other
+    // data
+    bool isNormal = false;
+    bool isSDF = false;
+    if (endsWith(filenameShort, "-n") || endsWith(filenameShort, "_normal")) {
+      isNormal = true;
+    } else if (endsWith(filenameShort, "-sdf")) {
+      isSDF = true;
     }
-    
-    return YES;
-}
 
-- (nullable id<MTLTexture>)loadTextureFromURL:(nonnull NSURL *)url originalFormat:(nullable MTLPixelFormat*)originalFormat
-{
-    KTXImage image;
-    KTXImageData imageData;
-   
-    if (![self loadImageFromURL:url image:image imageData:imageData]) {
-        return nil;
+    bool isSRGB = (!isNormal && !isSDF);
+
+    if (!imageData.open(path, image)) {
+      return NO;
     }
-        
-    return [self loadTextureFromImage:image originalFormat:originalFormat];
-}
 
-- (nullable id<MTLTexture>)createTexture:(const KTXImage&)image isPrivate:(bool)isPrivate {
-    MTLTextureDescriptor *textureDescriptor = [[MTLTextureDescriptor alloc] init];
+    // have to adjust the format if srgb
+    if (isSRGB) {
+      image.pixelFormat = MyMTLPixelFormatRGBA8Unorm_sRGB;
+    }
+  } else {
+    if (!imageData.open(path, image)) {
+      return NO;
+    }
+  }
 
-    // Indicate that each pixel has a blue, green, red, and alpha channel, where each channel is
-    // an 8-bit unsigned normalized value (i.e. 0 maps to 0.0 and 255 maps to 1.0)
-    textureDescriptor.textureType = (MTLTextureType)image.textureType;
-    textureDescriptor.pixelFormat = (MTLPixelFormat)image.pixelFormat;
+  return YES;
+}
 
-    // Set the pixel dimensions of the texture
-    textureDescriptor.width = image.width;
-    textureDescriptor.height = MAX(1, image.height);
-    textureDescriptor.depth = MAX(1, image.depth);
+- (nullable id<MTLTexture>)loadTextureFromURL:(nonnull NSURL *)url
+                               originalFormat:
+                                   (nullable MTLPixelFormat *)originalFormat {
+  KTXImage image;
+  KTXImageData imageData;
 
-    textureDescriptor.arrayLength = MAX(1, image.header.numberOfArrayElements);
+  if (![self loadImageFromURL:url image:image imageData:imageData]) {
+    return nil;
+  }
 
-    // ignoring 0 (auto mip), but might need to support for explicit formats
-    // must have hw filtering support for format, and 32f filtering only first appeared on A14/M1
-    // and only get box filtering in API-level filters.  But would cut storage.
-    textureDescriptor.mipmapLevelCount = MAX(1, image.header.numberOfMipmapLevels);
+  return [self loadTextureFromImage:image originalFormat:originalFormat];
+}
 
-    // this is needed for blit
-    if (isPrivate)
-        textureDescriptor.storageMode = MTLStorageModePrivate;
-    
-    // Create the texture from the device by using the descriptor
-    id<MTLTexture> texture = [self.device newTextureWithDescriptor:textureDescriptor];
-    if (!texture) {
-        KLOGE("kramv", "could not allocate texture");
-        return nil;
-    }
-    
-    return texture;
+- (nullable id<MTLTexture>)createTexture:(const KTXImage &)image
+                               isPrivate:(bool)isPrivate {
+  MTLTextureDescriptor *textureDescriptor = [[MTLTextureDescriptor alloc] init];
+
+  // Indicate that each pixel has a blue, green, red, and alpha channel, where
+  // each channel is an 8-bit unsigned normalized value (i.e. 0 maps to 0.0 and
+  // 255 maps to 1.0)
+  textureDescriptor.textureType = (MTLTextureType)image.textureType;
+  textureDescriptor.pixelFormat = (MTLPixelFormat)image.pixelFormat;
+
+  // Set the pixel dimensions of the texture
+  textureDescriptor.width = image.width;
+  textureDescriptor.height = MAX(1, image.height);
+  textureDescriptor.depth = MAX(1, image.depth);
+
+  textureDescriptor.arrayLength = MAX(1, image.header.numberOfArrayElements);
+
+  // ignoring 0 (auto mip), but might need to support for explicit formats
+  // must have hw filtering support for format, and 32f filtering only first
+  // appeared on A14/M1 and only get box filtering in API-level filters.  But
+  // would cut storage.
+  textureDescriptor.mipmapLevelCount =
+      MAX(1, image.header.numberOfMipmapLevels);
+
+  // this is needed for blit
+  if (isPrivate)
+    textureDescriptor.storageMode = MTLStorageModePrivate;
+
+  // Create the texture from the device by using the descriptor
+  id<MTLTexture> texture =
+      [self.device newTextureWithDescriptor:textureDescriptor];
+  if (!texture) {
+    KLOGE("kramv", "could not allocate texture");
+    return nil;
+  }
+
+  return texture;
 }
 
 /* just for reference now
- 
-// Has a synchronous upload via replaceRegion that only works for shared/managed (f.e. ktx),
-// and another path for private that uses a blitEncoder and must have block aligned data (f.e. ktxa, ktx2).
-// Could repack ktx data into ktxa before writing to temporary file, or when copying NSData into MTLBuffer.
+
+// Has a synchronous upload via replaceRegion that only works for shared/managed
+(f.e. ktx),
+// and another path for private that uses a blitEncoder and must have block
+aligned data (f.e. ktxa, ktx2).
+// Could repack ktx data into ktxa before writing to temporary file, or when
+copying NSData into MTLBuffer.
 - (nullable id<MTLTexture>)loadTextureFromImage:(KTXImage &)image
 {
     // TODO: about aligning to 4k for base + length
     // http://metalkit.org/2017/05/26/working-with-memory-in-metal-part-2.html
-    
+
     int32_t w = image.width;
     int32_t h = image.height;
     int32_t d = image.depth;
-    
+
     int32_t numMips     = MAX(1, image.header.numberOfMipmapLevels);
     int32_t numArrays   = MAX(1, image.header.numberOfArrayElements);
     int32_t numFaces    = MAX(1, image.header.numberOfFaces);
     int32_t numSlices   = MAX(1, image.depth);
-    
+
     Int2 blockDims = image.blockDims();
-    
+
     uint32_t numChunks = image.totalChunks();
-    
-    // TODO: reuse staging _buffer and _bufferOffset here, these large allocations take time
-    vector<uint8_t> mipStorage;
-    mipStorage.resize(image.mipLengthLargest() * numChunks); // enough to hold biggest mip
-    
+
+    // TODO: reuse staging _buffer and _bufferOffset here, these large
+allocations take time vector<uint8_t> mipStorage;
+    mipStorage.resize(image.mipLengthLargest() * numChunks); // enough to hold
+biggest mip
+
     //-----------------
-    
+
     id<MTLTexture> texture = [self createTexture:image isPrivate:false];
     if (!texture) {
         return nil;
     }
-    
+
     const uint8_t* srcLevelData = image.fileData;
-    
+
     for (int mipLevelNumber = 0; mipLevelNumber < numMips; ++mipLevelNumber) {
         // there's a 4 byte levelSize for each mipLevel
         // the mipLevel.offset is immediately after this
-        
+
         const KTXImageLevel& mipLevel = image.mipLevels[mipLevelNumber];
-        
+
         // this is offset to a given level
         uint64_t mipBaseOffset = mipLevel.offset;
-        
+
         // unpack the whole level in-place
         if (image.isSupercompressed()) {
-            if (!image.unpackLevel(mipLevelNumber, image.fileData + mipLevel.offset, mipStorage.data())) {
-                return nil;
+            if (!image.unpackLevel(mipLevelNumber, image.fileData +
+mipLevel.offset, mipStorage.data())) { return nil;
             }
             srcLevelData = mipStorage.data();
-            
+
             // going to upload from mipStorage temp array
             mipBaseOffset = 0;
         }
-        
+
         // only have face, face+array, or slice but this handles all cases
         for (int array = 0; array < numArrays; ++array) {
             for (int face = 0; face < numFaces; ++face) {
                 for (int slice = 0; slice < numSlices; ++slice) {
-    
+
                     uint32_t bytesPerRow = 0;
-                    
+
                     // 1D/1DArray textures set bytesPerRow to 0
                     if (//image.textureType != MyMTLTextureType1D &&
                         image.textureType != MyMTLTextureType1DArray)
                     {
-                        // for compressed, bytesPerRow needs to be multiple of block size
-                        // so divide by the number of blocks making up the height
+                        // for compressed, bytesPerRow needs to be multiple of
+block size
+                        // so divide by the number of blocks making up the
+height
                         //int xBlocks = ((w + blockDims.x - 1) / blockDims.x);
-                        uint32_t yBlocks = ((h + blockDims.y - 1) / blockDims.y);
-                        
+                        uint32_t yBlocks = ((h + blockDims.y - 1) /
+blockDims.y);
+
                         // Calculate the number of bytes per row in the image.
                         // for compressed images this is xBlocks * blockSize
                         bytesPerRow = (uint32_t)mipLevel.length / yBlocks;
                     }
-                    
+
                     int32_t chunkNum = 0;
-                                    
+
                     if (image.header.numberOfArrayElements > 0) {
                         // can be 1d, 2d, or cube array
                         chunkNum = array;
@@ -489,53 +515,57 @@ - (BOOL)loadImageFromURL:(nonnull NSURL *)url image:(KTXImage&)image imageData:(
                             chunkNum = face;
                         }
                     }
-                    
-                    // this is size of one face/slice/texture, not the levels size
-                    uint64_t mipStorageSize = mipLevel.length;
-                    
-                    uint64_t mipOffset = mipBaseOffset + chunkNum * mipStorageSize;
-                    
+
+                    // this is size of one face/slice/texture, not the levels
+size uint64_t mipStorageSize = mipLevel.length;
+
+                    uint64_t mipOffset = mipBaseOffset + chunkNum *
+mipStorageSize;
+
                     // offset into the level
                     const uint8_t *srcBytes = srcLevelData + mipOffset;
-                    
+
                     {
                         // Note: this only works for managed/shared textures.
-                        // For private upload to buffer and then use blitEncoder to copy to texture.
-                        // See KramBlitLoader for that.  This is all synchronous upload too.
+                        // For private upload to buffer and then use blitEncoder
+to copy to texture.
+                        // See KramBlitLoader for that.  This is all synchronous
+upload too.
                         //
-                        // Note: due to API limit we can only copy one chunk at a time.  With KramBlitLoader
-                        // can copy the whole level to buffer, and then reference chunks within.
-                        
+                        // Note: due to API limit we can only copy one chunk at
+a time.  With KramBlitLoader
+                        // can copy the whole level to buffer, and then
+reference chunks within.
+
                         bool is3D = image.textureType == MyMTLTextureType3D;
 
-                        // sync cpu copy the bytes from the data object into the texture
-                        MTLRegion region = {
-                            { 0, 0, 0 }, // MTLOrigin
-                            { (NSUInteger)w,  (NSUInteger)h, 1 }  // MTLSize
+                        // sync cpu copy the bytes from the data object into the
+texture MTLRegion region = { { 0, 0, 0 }, // MTLOrigin { (NSUInteger)w,
+(NSUInteger)h, 1 }  // MTLSize
                         };
-                        
+
                         size_t bytesPerImage = 0;
                         if (is3D) {
                             region.origin.z = chunkNum;
                             chunkNum = 0;
                             bytesPerImage = mipStorageSize;
                         }
-                    
+
                         [texture replaceRegion:region
                                     mipmapLevel:mipLevelNumber
                                          slice:chunkNum
                                       withBytes:srcBytes
                                     bytesPerRow:bytesPerRow
                                  bytesPerImage:bytesPerImage];
-                       
+
                     }
                 }
             }
         }
-        
+
         mipDown(w, h, d);
     }
-        
+
     return texture;
 }
 */
@@ -543,251 +573,245 @@ - (BOOL)loadImageFromURL:(nonnull NSURL *)url image:(KTXImage&)image imageData:(
 //--------------------------
 
 - (void)createStagingBufffer:(uint64_t)dataSize {
-    
-    // must be aligned to pagesize() or can't use with newBufferWithBytesNoCopy
-    // enough to upload 4k x 4k @ 4 bytes no mips, careful with array and cube that get too big
-    
-    // allocate system memory for bufffer, can memcopy to this
-    posix_memalign((void**)&_data, getpagesize(), dataSize);
-    
-    // allocate memory for circular staging buffer, only need to memcpy to this
-    // but need a rolling buffer atop to track current begin/end.
-    
-    _buffer = [_device newBufferWithBytesNoCopy:_data
-                                            length:dataSize
-                                           options:MTLResourceStorageModeShared
-                                       deallocator: ^(void *macroUnusedArg(pointer), NSUInteger macroUnusedArg(length)) {
-                                            delete self->_data;
-                                            self->_data = nullptr;
-                                            }
-    ];
-}
-
 
+  // must be aligned to pagesize() or can't use with newBufferWithBytesNoCopy
+  // enough to upload 4k x 4k @ 4 bytes no mips, careful with array and cube
+  // that get too big
+
+  // allocate system memory for bufffer, can memcopy to this
+  posix_memalign((void **)&_data, getpagesize(), dataSize);
+
+  // allocate memory for circular staging buffer, only need to memcpy to this
+  // but need a rolling buffer atop to track current begin/end.
+
+  _buffer =
+      [_device newBufferWithBytesNoCopy:_data
+                                 length:dataSize
+                                options:MTLResourceStorageModeShared
+                            deallocator:^(void *macroUnusedArg(pointer),
+                                          NSUInteger macroUnusedArg(length)) {
+                              delete self->_data;
+                              self->_data = nullptr;
+                            }];
+}
 
-- (void)uploadTexturesIfNeeded:(id<MTLBlitCommandEncoder>)blitEncoder commandBuffer:(id<MTLCommandBuffer>)commandBuffer {
-    if (_mipgenTextures.count > 0) {
-        for (id<MTLTexture> texture in _mipgenTextures) {
-            // autogen mips will include srgb conversions, so toggling srgb on/off isn't quite correct
-            [blitEncoder generateMipmapsForTexture:texture];
-        }
-        
-        // reset the arra
-        [_mipgenTextures removeAllObjects];
+- (void)uploadTexturesIfNeeded:(id<MTLBlitCommandEncoder>)blitEncoder
+                 commandBuffer:(id<MTLCommandBuffer>)commandBuffer {
+  if (_mipgenTextures.count > 0) {
+    for (id<MTLTexture> texture in _mipgenTextures) {
+      // autogen mips will include srgb conversions, so toggling srgb on/off
+      // isn't quite correct
+      [blitEncoder generateMipmapsForTexture:texture];
     }
 
-    if (!_blits.empty()) {
-        // now upload from staging MTLBuffer to private MTLTexture
-        for (const auto& blit: _blits) {
-            MTLRegion region = {
-                { 0, 0, 0 }, // MTLOrigin
-                { (NSUInteger)blit.w,  (NSUInteger)blit.h, 1 }  // MTLSize
-            };
-            
-            uint32_t chunkNum = blit.chunkNum;
-            if (blit.is3D) {
-                region.origin.z = chunkNum;
-                chunkNum = 0;
-            }
-            
-            //assert(blit.textureIndex < _blitTextures.count);
-            id<MTLTexture> texture = _blitTextures[blit.textureIndex];
-            
-            [blitEncoder copyFromBuffer:_buffer
-                           sourceOffset:blit.mipOffset
-                  sourceBytesPerRow:blit.bytesPerRow
-                sourceBytesPerImage:blit.mipStorageSize
-                         sourceSize:region.size
-             
-                          toTexture:texture
-                   destinationSlice:chunkNum
-                   destinationLevel:blit.mipLevelNumber
-                  destinationOrigin:region.origin
-                            options:MTLBlitOptionNone
-             ];
-        }
-        
-        // reset the array and buffer offset, so can upload more textures
-        _blits.clear();
-        [_blitTextures removeAllObjects];
-        
-        // TODO: use atomic on this
-        uint32_t bufferOffsetCopy = _bufferOffset;
-        [commandBuffer addCompletedHandler:^(id<MTLCommandBuffer> /* buffer */)
-         {
-            // can only reset this once gpu completes the blits above
-            // also guard against addding to this in blitTextureFromImage when completion handler will reset to 0
-            if (self->_bufferOffset == bufferOffsetCopy)
-                self->_bufferOffset = 0;
-        }];
+    // reset the arra
+    [_mipgenTextures removeAllObjects];
+  }
+
+  if (!_blits.empty()) {
+    // now upload from staging MTLBuffer to private MTLTexture
+    for (const auto &blit : _blits) {
+      MTLRegion region = {
+          {0, 0, 0},                                  // MTLOrigin
+          {(NSUInteger)blit.w, (NSUInteger)blit.h, 1} // MTLSize
+      };
+
+      uint32_t chunkNum = blit.chunkNum;
+      if (blit.is3D) {
+        region.origin.z = chunkNum;
+        chunkNum = 0;
+      }
+
+      // assert(blit.textureIndex < _blitTextures.count);
+      id<MTLTexture> texture = _blitTextures[blit.textureIndex];
+
+      [blitEncoder copyFromBuffer:_buffer
+                     sourceOffset:blit.mipOffset
+                sourceBytesPerRow:blit.bytesPerRow
+              sourceBytesPerImage:blit.mipStorageSize
+                       sourceSize:region.size
+
+                        toTexture:texture
+                 destinationSlice:chunkNum
+                 destinationLevel:blit.mipLevelNumber
+                destinationOrigin:region.origin
+                          options:MTLBlitOptionNone];
     }
+
+    // reset the array and buffer offset, so can upload more textures
+    _blits.clear();
+    [_blitTextures removeAllObjects];
+
+    // TODO: use atomic on this
+    uint32_t bufferOffsetCopy = _bufferOffset;
+    [commandBuffer addCompletedHandler:^(id<MTLCommandBuffer> /* buffer */) {
+      // can only reset this once gpu completes the blits above
+      // also guard against addding to this in blitTextureFromImage when
+      // completion handler will reset to 0
+      if (self->_bufferOffset == bufferOffsetCopy)
+        self->_bufferOffset = 0;
+    }];
+  }
 }
 
 inline uint64_t alignOffset(uint64_t offset, uint64_t alignment) {
-    return offset + (alignment - offset % alignment) % alignment;
+  return offset + (alignment - offset % alignment) % alignment;
 }
 
-// Has a synchronous upload via replaceRegion that only works for shared/managed (f.e. ktx),
-// and another path for private that uses a blitEncoder and must have block aligned data (f.e. ktxa, ktx2).
-// Could repack ktx data into ktxa before writing to temporary file, or when copying NSData into MTLBuffer.
-- (nullable id<MTLTexture>)blitTextureFromImage:(const KTXImage &)image
-{
-    if (_buffer == nil) {
-        // this is enough to upload 4k x 4x @ RGBA8u with mips, 8k x 8k compressed with mips @96MB
-        [self createStagingBufffer: 128*1024*1024];
-    }
-    
-    // TODO: first make sure have enough buffer to upload, otherwise need to queue this image
-    // try not to load much until that's established
-    // queue would need KTXImage and mmap to stay alive long enough for queue to be completed
-//    if (_bufferOffset != 0) {
-//        return nil;
-//    }
-    
-    id<MTLTexture> texture = [self createTexture:image isPrivate:true];
-    if (!texture)
-        return nil;
-    
-    // this is index where texture will be added
-    uint32_t textureIndex = (uint32_t)_blitTextures.count;
-    
-    //--------------------------------
-    // upload mip levels
-    
-    // TODO: about aligning to 4k for base + length
-    // http://metalkit.org/2017/05/26/working-with-memory-in-metal-part-2.html
-    
-    uint32_t w = image.width;
-    uint32_t h = image.height;
-    uint32_t d = image.depth;
-    
-    uint32_t numMips     = MAX(1, image.header.numberOfMipmapLevels);
-    uint32_t numArrays   = MAX(1, image.header.numberOfArrayElements);
-    uint32_t numFaces    = MAX(1, image.header.numberOfFaces);
-    uint32_t numSlices   = MAX(1, image.depth);
-    
-    Int2 blockDims = image.blockDims();
-    
-    // Note: copy entire decompressed level from KTX, but then upload
-    // each chunk of that with separate blit calls below.
-    size_t blockSize = image.blockSize();
-    
-    vector<uint64_t> bufferOffsets;
-    uint8_t* bufferData = (uint8_t*)_buffer.contents;
-    const uint8_t* mipData = (const uint8_t*)image.fileData;
-    bufferOffsets.resize(image.mipLevels.size());
-    
-    uint32_t numChunks = image.totalChunks();
-    
-    uint32_t bufferOffset = _bufferOffset;
-    
-    for (uint32_t i = 0; i < numMips; ++i) {
-        const KTXImageLevel& mipLevel = image.mipLevels[i];
-        
-        // pad buffer offset to a multiple of the blockSize
-        bufferOffset = alignOffset(bufferOffset, blockSize);
-        
-        // this may have to decompress the level data
-        if (!image.unpackLevel(i, mipData + mipLevel.offset, bufferData + bufferOffset)) {
-            return nil;
-        }
-        
-        bufferOffsets[i] = bufferOffset;
-        bufferOffset += mipLevel.length * numChunks;
+// Has a synchronous upload via replaceRegion that only works for shared/managed
+// (f.e. ktx), and another path for private that uses a blitEncoder and must
+// have block aligned data (f.e. ktxa, ktx2). Could repack ktx data into ktxa
+// before writing to temporary file, or when copying NSData into MTLBuffer.
+- (nullable id<MTLTexture>)blitTextureFromImage:(const KTXImage &)image {
+  if (_buffer == nil) {
+    // this is enough to upload 4k x 4x @ RGBA8u with mips, 8k x 8k compressed
+    // with mips @96MB
+    [self createStagingBufffer:128 * 1024 * 1024];
+  }
+
+  // TODO: first make sure have enough buffer to upload, otherwise need to queue
+  // this image try not to load much until that's established queue would need
+  // KTXImage and mmap to stay alive long enough for queue to be completed
+  //    if (_bufferOffset != 0) {
+  //        return nil;
+  //    }
+
+  id<MTLTexture> texture = [self createTexture:image isPrivate:true];
+  if (!texture)
+    return nil;
+
+  // this is index where texture will be added
+  uint32_t textureIndex = (uint32_t)_blitTextures.count;
+
+  //--------------------------------
+  // upload mip levels
+
+  // TODO: about aligning to 4k for base + length
+  // http://metalkit.org/2017/05/26/working-with-memory-in-metal-part-2.html
+
+  uint32_t w = image.width;
+  uint32_t h = image.height;
+  uint32_t d = image.depth;
+
+  uint32_t numMips = MAX(1, image.header.numberOfMipmapLevels);
+  uint32_t numArrays = MAX(1, image.header.numberOfArrayElements);
+  uint32_t numFaces = MAX(1, image.header.numberOfFaces);
+  uint32_t numSlices = MAX(1, image.depth);
+
+  Int2 blockDims = image.blockDims();
+
+  // Note: copy entire decompressed level from KTX, but then upload
+  // each chunk of that with separate blit calls below.
+  size_t blockSize = image.blockSize();
+
+  vector<uint64_t> bufferOffsets;
+  uint8_t *bufferData = (uint8_t *)_buffer.contents;
+  const uint8_t *mipData = (const uint8_t *)image.fileData;
+  bufferOffsets.resize(image.mipLevels.size());
+
+  uint32_t numChunks = image.totalChunks();
+
+  uint32_t bufferOffset = _bufferOffset;
+
+  for (uint32_t i = 0; i < numMips; ++i) {
+    const KTXImageLevel &mipLevel = image.mipLevels[i];
+
+    // pad buffer offset to a multiple of the blockSize
+    bufferOffset = alignOffset(bufferOffset, blockSize);
+
+    // this may have to decompress the level data
+    if (!image.unpackLevel(i, mipData + mipLevel.offset,
+                           bufferData + bufferOffset)) {
+      return nil;
     }
-    
-    // everything succeded, so advance the offset
-    _bufferOffset = bufferOffset;
-    [_blitTextures addObject: texture];
-    
-    
-    // defer the blits from buffer until start of render thread when BlitEncoder is available
-    
-    for (uint32_t mipLevelNumber = 0; mipLevelNumber < numMips; ++mipLevelNumber) {
-        // there's a 4 byte levelSize for each mipLevel
-        // the mipLevel.offset is immediately after this
-        
-        // this is offset to a given level
-        const KTXImageLevel& mipLevel = image.mipLevels[mipLevelNumber];
-        
-        // only have face, face+array, or slice but this handles all cases
-        for (uint32_t array = 0; array < numArrays; ++array) {
-            for (uint32_t face = 0; face < numFaces; ++face) {
-                for (uint32_t slice = 0; slice < numSlices; ++slice) {
-    
-                    uint32_t bytesPerRow = 0;
-                    
-                    // 1D/1DArray textures set bytesPerRow to 0
-                    if (//image.textureType != MyMTLTextureType1D &&
-                        image.textureType != MyMTLTextureType1DArray)
-                    {
-                        // for compressed, bytesPerRow needs to be multiple of block size
-                        // so divide by the number of blocks making up the height
-                        //int xBlocks = ((w + blockDims.x - 1) / blockDims.x);
-                        uint32_t yBlocks = ((h + blockDims.y - 1) / blockDims.y);
-                        
-                        // Calculate the number of bytes per row in the image.
-                        // for compressed images this is xBlocks * blockSize
-                        bytesPerRow = mipLevel.length / yBlocks;
-                    }
-                    
-                    uint32_t chunkNum = 0;
-                                    
-                    if (image.header.numberOfArrayElements > 1) {
-                        // can be 1d, 2d, or cube array
-                        chunkNum = array;
-                        if (numFaces > 1) {
-                            chunkNum = 6 * chunkNum + face;
-                        }
-                    }
-                    else {
-                        // can be 1d, 2d, or 3d
-                        chunkNum = slice;
-                        if (numFaces > 1) {
-                            chunkNum = face;
-                        }
-                    }
-                    
-                    // This is size of one chunk
-                    uint64_t mipStorageSize = mipLevel.length;
-                    
-                    // Have uploaded to buffer in same order visiting chunks.
-                    // Note: no call on MTLBlitEncoder to copy entire level of mips like glTexImage3D
-                    uint64_t mipOffset = bufferOffsets[mipLevelNumber] + chunkNum * mipStorageSize;
-                    
-                    {
-                        bool is3D = image.textureType == MyMTLTextureType3D;
-                        
-                        _blits.push_back({
-                            // use named inits here
-                             w, h,
-                             chunkNum,
-                            
-                             mipLevelNumber,
-                             mipStorageSize,
-                             mipOffset,
-                            
-                             textureIndex,
-                             bytesPerRow,
-                             is3D // could derive from textureIndex lookup
-                        });
-                    }
-                }
+
+    bufferOffsets[i] = bufferOffset;
+    bufferOffset += mipLevel.length * numChunks;
+  }
+
+  // everything succeded, so advance the offset
+  _bufferOffset = bufferOffset;
+  [_blitTextures addObject:texture];
+
+  // defer the blits from buffer until start of render thread when BlitEncoder
+  // is available
+
+  for (uint32_t mipLevelNumber = 0; mipLevelNumber < numMips;
+       ++mipLevelNumber) {
+    // there's a 4 byte levelSize for each mipLevel
+    // the mipLevel.offset is immediately after this
+
+    // this is offset to a given level
+    const KTXImageLevel &mipLevel = image.mipLevels[mipLevelNumber];
+
+    // only have face, face+array, or slice but this handles all cases
+    for (uint32_t array = 0; array < numArrays; ++array) {
+      for (uint32_t face = 0; face < numFaces; ++face) {
+        for (uint32_t slice = 0; slice < numSlices; ++slice) {
+
+          uint32_t bytesPerRow = 0;
+
+          // 1D/1DArray textures set bytesPerRow to 0
+          if ( // image.textureType != MyMTLTextureType1D &&
+              image.textureType != MyMTLTextureType1DArray) {
+            // for compressed, bytesPerRow needs to be multiple of block size
+            // so divide by the number of blocks making up the height
+            // int xBlocks = ((w + blockDims.x - 1) / blockDims.x);
+            uint32_t yBlocks = ((h + blockDims.y - 1) / blockDims.y);
+
+            // Calculate the number of bytes per row in the image.
+            // for compressed images this is xBlocks * blockSize
+            bytesPerRow = mipLevel.length / yBlocks;
+          }
+
+          uint32_t chunkNum = 0;
+
+          if (image.header.numberOfArrayElements > 1) {
+            // can be 1d, 2d, or cube array
+            chunkNum = array;
+            if (numFaces > 1) {
+              chunkNum = 6 * chunkNum + face;
             }
-        }
-        
-        mipDown(w, h, d);
-    }
-        
-   
-    // this texture cannot be used until buffer uploads complete
-    // but those happen at beginning of frame, so can attach to shaders, etc
-    return texture;
-}
+          } else {
+            // can be 1d, 2d, or 3d
+            chunkNum = slice;
+            if (numFaces > 1) {
+              chunkNum = face;
+            }
+          }
 
+          // This is size of one chunk
+          uint64_t mipStorageSize = mipLevel.length;
 
-@end
+          // Have uploaded to buffer in same order visiting chunks.
+          // Note: no call on MTLBlitEncoder to copy entire level of mips like
+          // glTexImage3D
+          uint64_t mipOffset =
+              bufferOffsets[mipLevelNumber] + chunkNum * mipStorageSize;
+
+          {
+            bool is3D = image.textureType == MyMTLTextureType3D;
 
+            _blits.push_back({
+                // use named inits here
+                w, h, chunkNum,
 
+                mipLevelNumber, mipStorageSize, mipOffset,
 
+                textureIndex, bytesPerRow,
+                is3D // could derive from textureIndex lookup
+            });
+          }
+        }
+      }
+    }
+
+    mipDown(w, h, d);
+  }
 
+  // this texture cannot be used until buffer uploads complete
+  // but those happen at beginning of frame, so can attach to shaders, etc
+  return texture;
+}
+
+@end
diff --git a/kramv/KramRenderer.h b/kramv/KramRenderer.h
index 41106049..e481daff 100644
--- a/kramv/KramRenderer.h
+++ b/kramv/KramRenderer.h
@@ -2,37 +2,38 @@
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
-
-#import <MetalKit/MetalKit.h>
 #import <Foundation/NSURL.h>
+#import <MetalKit/MetalKit.h>
 
 #include "KramLib.h"
 #import "KramShaders.h" // for TextureChannels
 
 namespace kram {
-    class ShowSettings;
-    class KTXImage;
+class ShowSettings;
+class KTXImage;
 }
 
-// Our platform independent renderer class.   Implements the MTKViewDelegate protocol which
+// Our platform independent renderer class.   Implements the MTKViewDelegate
+// protocol which
 //   allows it to accept per-frame update and drawable resize callbacks.
 @interface Renderer : NSObject <MTKViewDelegate>
 
-- (nonnull instancetype)initWithMetalKitView:(nonnull MTKView *)view settings:(nonnull kram::ShowSettings*)settings;
+- (nonnull instancetype)initWithMetalKitView:(nonnull MTKView *)view
+                                    settings:
+                                        (nonnull kram::ShowSettings *)settings;
 
-- (BOOL)loadTextureFromImage:(nonnull const char*)fullFilenameString
+- (BOOL)loadTextureFromImage:(nonnull const char *)fullFilenameString
                    timestamp:(double)timestamp
-                       image:(kram::KTXImage&)image
-                 imageNormal:(nullable kram::KTXImage*)imageNormal
+                       image:(kram::KTXImage &)image
+                 imageNormal:(nullable kram::KTXImage *)imageNormal
                    isArchive:(BOOL)isArchive;
 
-
 - (BOOL)loadTexture:(nonnull NSURL *)url;
 
-- (simd::float4x4)computeImageTransform:(float)panX panY:(float)panY zoom:(float)zoom;
+- (simd::float4x4)computeImageTransform:(float)panX
+                                   panY:(float)panY
+                                   zoom:(float)zoom;
 
-- (BOOL)hotloadShaders:(nonnull const char*)filename;
+- (BOOL)hotloadShaders:(nonnull const char *)filename;
 
 @end
-
-
diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index f9b1f28b..fdcd5cae 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -8,9 +8,10 @@
 
 #import <ModelIO/ModelIO.h>
 
-// Include header shared between C code here, which executes Metal API commands, and .metal files
-#import "KramShaders.h"
+// Include header shared between C code here, which executes Metal API commands,
+// and .metal files
 #import "KramLoader.h"
+#import "KramShaders.h"
 
 #include "KramViewerBase.h"
 
@@ -21,1743 +22,1866 @@
 
 // Capture what we need to build the renderPieplines, without needing view
 struct ViewFramebufferData {
-    MTLPixelFormat colorPixelFormat = MTLPixelFormatInvalid;
-    MTLPixelFormat depthStencilPixelFormat = MTLPixelFormatInvalid;
-    uint32_t sampleCount = 0;
+  MTLPixelFormat colorPixelFormat = MTLPixelFormatInvalid;
+  MTLPixelFormat depthStencilPixelFormat = MTLPixelFormatInvalid;
+  uint32_t sampleCount = 0;
 };
 
-@implementation Renderer
-{
-    dispatch_semaphore_t _inFlightSemaphore;
-    id<MTLDevice> _device;
-    id<MTLCommandQueue> _commandQueue;
-
-    id<MTLBuffer> _dynamicUniformBuffer[MaxBuffersInFlight];
-    
-    id<MTLRenderPipelineState> _pipelineState1DArray;
-    id<MTLRenderPipelineState> _pipelineStateImage;
-    id<MTLRenderPipelineState> _pipelineStateImageArray;
-    id<MTLRenderPipelineState> _pipelineStateCube;
-    id<MTLRenderPipelineState> _pipelineStateCubeArray;
-    id<MTLRenderPipelineState> _pipelineStateVolume;
-    
-    id<MTLComputePipelineState> _pipelineState1DArrayCS;
-    id<MTLComputePipelineState> _pipelineStateImageCS;
-    id<MTLComputePipelineState> _pipelineStateImageArrayCS;
-    id<MTLComputePipelineState> _pipelineStateCubeCS;
-    id<MTLComputePipelineState> _pipelineStateCubeArrayCS;
-    id<MTLComputePipelineState> _pipelineStateVolumeCS;
-    
-    id<MTLDepthStencilState> _depthStateFull;
-    id<MTLDepthStencilState> _depthStateNone;
-   
-    MTLVertexDescriptor *_mtlVertexDescriptor;
-
-    // TODO: Array< id<MTLTexture> > _textures;
-    id<MTLTexture> _colorMap;
-    id<MTLTexture> _normalMap;
-    id<MTLTexture> _lastDrawableTexture;
-    
-    // border is a better edge sample, but at edges it filters in the transparent color
-    // around the border which is undesirable.  It would be better if the hw did
-    // clamp to edge until uv outside 0 to 1.  This results in having to inset the uv by 0.5 px
-    // to avoid this artifact, but on small texturs that are 4x4, a 1 px inset is noticeable.
-    
-    id<MTLSamplerState> _colorMapSamplerNearestWrap;
-    id<MTLSamplerState> _colorMapSamplerNearestBorder;
-    id<MTLSamplerState> _colorMapSamplerNearestEdge;
-    
-    id<MTLSamplerState> _colorMapSamplerFilterWrap;
-    id<MTLSamplerState> _colorMapSamplerFilterBorder;
-    id<MTLSamplerState> _colorMapSamplerFilterEdge;
-    
-    //id<MTLTexture> _sampleRT;
-    id<MTLTexture> _sampleComputeTex;
-    id<MTLTexture> _sampleRenderTex;
-   
-    uint8_t _uniformBufferIndex;
-
-    float4x4 _projectionMatrix;
-    
-    // 2d versions
-    float4x4 _viewMatrix;
-    float4x4 _modelMatrix;
-    
-    // 3d versions
-    float4x4 _viewMatrix3D;
-    float4x4 _modelMatrix3D;
-
-    //float _rotation;
-    KramLoader *_loader;
-    MTKMesh *_mesh;
-    
-    MDLVertexDescriptor *_mdlVertexDescriptor;
-    
-    //MTKMesh *_meshPlane; // really a thin gox
-    MTKMesh *_meshBox;
-    MTKMesh *_meshSphere;
-    MTKMesh *_meshSphereMirrored;
-    //MTKMesh *_meshCylinder;
-    MTKMesh *_meshCapsule;
-    MTKMeshBufferAllocator *_metalAllocator;
-    
-    id<MTLLibrary> _shaderLibrary;
-    NSURL* _metallibFileURL;
-    NSDate* _metallibFileDate;
-    ViewFramebufferData _viewFramebuffer;
-    
-    ShowSettings* _showSettings;
+@implementation Renderer {
+  dispatch_semaphore_t _inFlightSemaphore;
+  id<MTLDevice> _device;
+  id<MTLCommandQueue> _commandQueue;
+
+  id<MTLBuffer> _dynamicUniformBuffer[MaxBuffersInFlight];
+
+  id<MTLRenderPipelineState> _pipelineState1DArray;
+  id<MTLRenderPipelineState> _pipelineStateImage;
+  id<MTLRenderPipelineState> _pipelineStateImageArray;
+  id<MTLRenderPipelineState> _pipelineStateCube;
+  id<MTLRenderPipelineState> _pipelineStateCubeArray;
+  id<MTLRenderPipelineState> _pipelineStateVolume;
+
+  id<MTLComputePipelineState> _pipelineState1DArrayCS;
+  id<MTLComputePipelineState> _pipelineStateImageCS;
+  id<MTLComputePipelineState> _pipelineStateImageArrayCS;
+  id<MTLComputePipelineState> _pipelineStateCubeCS;
+  id<MTLComputePipelineState> _pipelineStateCubeArrayCS;
+  id<MTLComputePipelineState> _pipelineStateVolumeCS;
+
+  id<MTLDepthStencilState> _depthStateFull;
+  id<MTLDepthStencilState> _depthStateNone;
+
+  MTLVertexDescriptor *_mtlVertexDescriptor;
+
+  // TODO: Array< id<MTLTexture> > _textures;
+  id<MTLTexture> _colorMap;
+  id<MTLTexture> _normalMap;
+  id<MTLTexture> _lastDrawableTexture;
+
+  // border is a better edge sample, but at edges it filters in the transparent
+  // color around the border which is undesirable.  It would be better if the hw
+  // did clamp to edge until uv outside 0 to 1.  This results in having to inset
+  // the uv by 0.5 px to avoid this artifact, but on small texturs that are 4x4,
+  // a 1 px inset is noticeable.
+
+  id<MTLSamplerState> _colorMapSamplerNearestWrap;
+  id<MTLSamplerState> _colorMapSamplerNearestBorder;
+  id<MTLSamplerState> _colorMapSamplerNearestEdge;
+
+  id<MTLSamplerState> _colorMapSamplerFilterWrap;
+  id<MTLSamplerState> _colorMapSamplerFilterBorder;
+  id<MTLSamplerState> _colorMapSamplerFilterEdge;
+
+  // id<MTLTexture> _sampleRT;
+  id<MTLTexture> _sampleComputeTex;
+  id<MTLTexture> _sampleRenderTex;
+
+  uint8_t _uniformBufferIndex;
+
+  float4x4 _projectionMatrix;
+
+  // 2d versions
+  float4x4 _viewMatrix;
+  float4x4 _modelMatrix;
+
+  // 3d versions
+  float4x4 _viewMatrix3D;
+  float4x4 _modelMatrix3D;
+
+  // float _rotation;
+  KramLoader *_loader;
+  MTKMesh *_mesh;
+
+  MDLVertexDescriptor *_mdlVertexDescriptor;
+
+  // MTKMesh *_meshPlane; // really a thin gox
+  MTKMesh *_meshBox;
+  MTKMesh *_meshSphere;
+  MTKMesh *_meshSphereMirrored;
+  // MTKMesh *_meshCylinder;
+  MTKMesh *_meshCapsule;
+  MTKMeshBufferAllocator *_metalAllocator;
+
+  id<MTLLibrary> _shaderLibrary;
+  NSURL *_metallibFileURL;
+  NSDate *_metallibFileDate;
+  ViewFramebufferData _viewFramebuffer;
+
+  ShowSettings *_showSettings;
 }
 
--(nonnull instancetype)initWithMetalKitView:(nonnull MTKView *)view settings:(nonnull ShowSettings*)settings
-{
-    self = [super init];
-    if(self)
-    {
-        _showSettings = settings;
-        
-        _device = view.device;
-        
-        _loader = [KramLoader new];
-        _loader.device = _device;
-        
-        _metalAllocator = [[MTKMeshBufferAllocator alloc] initWithDevice: _device];
-
-        _inFlightSemaphore = dispatch_semaphore_create(MaxBuffersInFlight);
-        [self _loadMetalWithView:view];
-        [self _loadAssets];
-    }
+- (nonnull instancetype)initWithMetalKitView:(nonnull MTKView *)view
+                                    settings:(nonnull ShowSettings *)settings {
+  self = [super init];
+  if (self) {
+    _showSettings = settings;
+
+    _device = view.device;
 
-    return self;
+    _loader = [KramLoader new];
+    _loader.device = _device;
+
+    _metalAllocator = [[MTKMeshBufferAllocator alloc] initWithDevice:_device];
+
+    _inFlightSemaphore = dispatch_semaphore_create(MaxBuffersInFlight);
+    [self _loadMetalWithView:view];
+    [self _loadAssets];
+  }
+
+  return self;
 }
 
-- (void)_createSamplers
-{
-    MTLSamplerDescriptor *samplerDescriptor = [MTLSamplerDescriptor new];
-    samplerDescriptor.minFilter = MTLSamplerMinMagFilterNearest;
-    samplerDescriptor.magFilter = MTLSamplerMinMagFilterNearest;
-    samplerDescriptor.mipFilter = MTLSamplerMipFilterNearest;
-    
-    samplerDescriptor.sAddressMode = MTLSamplerAddressModeRepeat;
-    samplerDescriptor.tAddressMode = MTLSamplerAddressModeRepeat;
-    samplerDescriptor.rAddressMode = MTLSamplerAddressModeRepeat;
-    samplerDescriptor.label = @"colorMapSamplerNearestWrap";
-    
-    _colorMapSamplerNearestWrap = [_device newSamplerStateWithDescriptor:samplerDescriptor];
-    
-    samplerDescriptor.sAddressMode = MTLSamplerAddressModeClampToBorderColor;
-    samplerDescriptor.tAddressMode = MTLSamplerAddressModeClampToBorderColor;
-    samplerDescriptor.rAddressMode = MTLSamplerAddressModeClampToBorderColor;
-    samplerDescriptor.label = @"colorMapSamplerNearestBorder";
-   
-    _colorMapSamplerNearestBorder = [_device newSamplerStateWithDescriptor:samplerDescriptor];
-    
-    samplerDescriptor.sAddressMode = MTLSamplerAddressModeClampToEdge;
-    samplerDescriptor.tAddressMode = MTLSamplerAddressModeClampToEdge;
-    samplerDescriptor.rAddressMode = MTLSamplerAddressModeClampToEdge;
-    samplerDescriptor.label = @"colorMapSamplerNearsetEdge";
-    
-    _colorMapSamplerNearestEdge = [_device newSamplerStateWithDescriptor:samplerDescriptor];
-    
-    // -----
-    
-    // these are for preview mode
-    // use the mips, and specify linear for min/mag for SDF case
-    samplerDescriptor.minFilter = MTLSamplerMinMagFilterLinear;
-    samplerDescriptor.magFilter = MTLSamplerMinMagFilterLinear;
-    samplerDescriptor.mipFilter = MTLSamplerMipFilterLinear;
-    samplerDescriptor.maxAnisotropy = 4; // 1,2,4,8,16 are choices
-    
-    samplerDescriptor.sAddressMode = MTLSamplerAddressModeClampToBorderColor;
-    samplerDescriptor.tAddressMode = MTLSamplerAddressModeClampToBorderColor;
-    samplerDescriptor.rAddressMode = MTLSamplerAddressModeClampToBorderColor;
-    samplerDescriptor.label = @"colorMapSamplerFilterBorder";
-   
-    _colorMapSamplerFilterBorder = [_device newSamplerStateWithDescriptor:samplerDescriptor];
-    
-    samplerDescriptor.sAddressMode = MTLSamplerAddressModeClampToEdge;
-    samplerDescriptor.tAddressMode = MTLSamplerAddressModeClampToEdge;
-    samplerDescriptor.rAddressMode = MTLSamplerAddressModeClampToEdge;
-    samplerDescriptor.label = @"colorMapSamplerFilterEdge";
-   
-    _colorMapSamplerFilterEdge = [_device newSamplerStateWithDescriptor:samplerDescriptor];
-    
-    samplerDescriptor.sAddressMode = MTLSamplerAddressModeRepeat;
-    samplerDescriptor.tAddressMode = MTLSamplerAddressModeRepeat;
-    samplerDescriptor.rAddressMode = MTLSamplerAddressModeRepeat;
-    samplerDescriptor.label = @"colorMapSamplerBilinearWrap";
-    
-    _colorMapSamplerFilterWrap = [_device newSamplerStateWithDescriptor:samplerDescriptor];
+- (void)_createSamplers {
+  MTLSamplerDescriptor *samplerDescriptor = [MTLSamplerDescriptor new];
+  samplerDescriptor.minFilter = MTLSamplerMinMagFilterNearest;
+  samplerDescriptor.magFilter = MTLSamplerMinMagFilterNearest;
+  samplerDescriptor.mipFilter = MTLSamplerMipFilterNearest;
+
+  samplerDescriptor.sAddressMode = MTLSamplerAddressModeRepeat;
+  samplerDescriptor.tAddressMode = MTLSamplerAddressModeRepeat;
+  samplerDescriptor.rAddressMode = MTLSamplerAddressModeRepeat;
+  samplerDescriptor.label = @"colorMapSamplerNearestWrap";
+
+  _colorMapSamplerNearestWrap =
+      [_device newSamplerStateWithDescriptor:samplerDescriptor];
+
+  samplerDescriptor.sAddressMode = MTLSamplerAddressModeClampToBorderColor;
+  samplerDescriptor.tAddressMode = MTLSamplerAddressModeClampToBorderColor;
+  samplerDescriptor.rAddressMode = MTLSamplerAddressModeClampToBorderColor;
+  samplerDescriptor.label = @"colorMapSamplerNearestBorder";
+
+  _colorMapSamplerNearestBorder =
+      [_device newSamplerStateWithDescriptor:samplerDescriptor];
+
+  samplerDescriptor.sAddressMode = MTLSamplerAddressModeClampToEdge;
+  samplerDescriptor.tAddressMode = MTLSamplerAddressModeClampToEdge;
+  samplerDescriptor.rAddressMode = MTLSamplerAddressModeClampToEdge;
+  samplerDescriptor.label = @"colorMapSamplerNearsetEdge";
+
+  _colorMapSamplerNearestEdge =
+      [_device newSamplerStateWithDescriptor:samplerDescriptor];
+
+  // -----
+
+  // these are for preview mode
+  // use the mips, and specify linear for min/mag for SDF case
+  samplerDescriptor.minFilter = MTLSamplerMinMagFilterLinear;
+  samplerDescriptor.magFilter = MTLSamplerMinMagFilterLinear;
+  samplerDescriptor.mipFilter = MTLSamplerMipFilterLinear;
+  samplerDescriptor.maxAnisotropy = 4; // 1,2,4,8,16 are choices
+
+  samplerDescriptor.sAddressMode = MTLSamplerAddressModeClampToBorderColor;
+  samplerDescriptor.tAddressMode = MTLSamplerAddressModeClampToBorderColor;
+  samplerDescriptor.rAddressMode = MTLSamplerAddressModeClampToBorderColor;
+  samplerDescriptor.label = @"colorMapSamplerFilterBorder";
+
+  _colorMapSamplerFilterBorder =
+      [_device newSamplerStateWithDescriptor:samplerDescriptor];
+
+  samplerDescriptor.sAddressMode = MTLSamplerAddressModeClampToEdge;
+  samplerDescriptor.tAddressMode = MTLSamplerAddressModeClampToEdge;
+  samplerDescriptor.rAddressMode = MTLSamplerAddressModeClampToEdge;
+  samplerDescriptor.label = @"colorMapSamplerFilterEdge";
+
+  _colorMapSamplerFilterEdge =
+      [_device newSamplerStateWithDescriptor:samplerDescriptor];
+
+  samplerDescriptor.sAddressMode = MTLSamplerAddressModeRepeat;
+  samplerDescriptor.tAddressMode = MTLSamplerAddressModeRepeat;
+  samplerDescriptor.rAddressMode = MTLSamplerAddressModeRepeat;
+  samplerDescriptor.label = @"colorMapSamplerBilinearWrap";
+
+  _colorMapSamplerFilterWrap =
+      [_device newSamplerStateWithDescriptor:samplerDescriptor];
 }
-    
-- (void)_createVertexDescriptor
-{
-    _mtlVertexDescriptor = [[MTLVertexDescriptor alloc] init];
-
-    _mtlVertexDescriptor.attributes[VertexAttributePosition].format = MTLVertexFormatFloat3;
-    _mtlVertexDescriptor.attributes[VertexAttributePosition].offset = 0;
-    _mtlVertexDescriptor.attributes[VertexAttributePosition].bufferIndex = BufferIndexMeshPosition;
-
-    _mtlVertexDescriptor.attributes[VertexAttributeTexcoord].format = MTLVertexFormatFloat2; // TODO: compress
-    _mtlVertexDescriptor.attributes[VertexAttributeTexcoord].offset = 0;
-    _mtlVertexDescriptor.attributes[VertexAttributeTexcoord].bufferIndex = BufferIndexMeshUV0;
-
-    _mtlVertexDescriptor.attributes[VertexAttributeNormal].format = MTLVertexFormatFloat3; // TODO: compress
-    _mtlVertexDescriptor.attributes[VertexAttributeNormal].offset = 0;
-    _mtlVertexDescriptor.attributes[VertexAttributeNormal].bufferIndex = BufferIndexMeshNormal;
-
-    _mtlVertexDescriptor.attributes[VertexAttributeTangent].format = MTLVertexFormatFloat4; // TODO: compress
-    _mtlVertexDescriptor.attributes[VertexAttributeTangent].offset = 0;
-    _mtlVertexDescriptor.attributes[VertexAttributeTangent].bufferIndex = BufferIndexMeshTangent;
-   
-    //_mtlVertexDescriptor.layouts[BufferIndexMeshPosition].stepRate = 1;
-    //_mtlVertexDescriptor.layouts[BufferIndexMeshPosition].stepFunction = MTLVertexStepFunctionPerVertex;
-
-    _mtlVertexDescriptor.layouts[BufferIndexMeshPosition].stride = 3*4;
-    _mtlVertexDescriptor.layouts[BufferIndexMeshUV0].stride = 2*4;
-    _mtlVertexDescriptor.layouts[BufferIndexMeshNormal].stride = 3*4;
-    _mtlVertexDescriptor.layouts[BufferIndexMeshTangent].stride = 4*4;
-    
-    //-----------------------
-    // for ModelIO
-    _mdlVertexDescriptor =
-        MTKModelIOVertexDescriptorFromMetal(_mtlVertexDescriptor);
-
-    _mdlVertexDescriptor.attributes[VertexAttributePosition].name  = MDLVertexAttributePosition;
-    _mdlVertexDescriptor.attributes[VertexAttributeTexcoord].name  = MDLVertexAttributeTextureCoordinate;
-    _mdlVertexDescriptor.attributes[VertexAttributeNormal].name    = MDLVertexAttributeNormal;
-    _mdlVertexDescriptor.attributes[VertexAttributeTangent].name   = MDLVertexAttributeTangent;
+
+- (void)_createVertexDescriptor {
+  _mtlVertexDescriptor = [[MTLVertexDescriptor alloc] init];
+
+  _mtlVertexDescriptor.attributes[VertexAttributePosition].format =
+      MTLVertexFormatFloat3;
+  _mtlVertexDescriptor.attributes[VertexAttributePosition].offset = 0;
+  _mtlVertexDescriptor.attributes[VertexAttributePosition].bufferIndex =
+      BufferIndexMeshPosition;
+
+  _mtlVertexDescriptor.attributes[VertexAttributeTexcoord].format =
+      MTLVertexFormatFloat2; // TODO: compress
+  _mtlVertexDescriptor.attributes[VertexAttributeTexcoord].offset = 0;
+  _mtlVertexDescriptor.attributes[VertexAttributeTexcoord].bufferIndex =
+      BufferIndexMeshUV0;
+
+  _mtlVertexDescriptor.attributes[VertexAttributeNormal].format =
+      MTLVertexFormatFloat3; // TODO: compress
+  _mtlVertexDescriptor.attributes[VertexAttributeNormal].offset = 0;
+  _mtlVertexDescriptor.attributes[VertexAttributeNormal].bufferIndex =
+      BufferIndexMeshNormal;
+
+  _mtlVertexDescriptor.attributes[VertexAttributeTangent].format =
+      MTLVertexFormatFloat4; // TODO: compress
+  _mtlVertexDescriptor.attributes[VertexAttributeTangent].offset = 0;
+  _mtlVertexDescriptor.attributes[VertexAttributeTangent].bufferIndex =
+      BufferIndexMeshTangent;
+
+  //_mtlVertexDescriptor.layouts[BufferIndexMeshPosition].stepRate = 1;
+  //_mtlVertexDescriptor.layouts[BufferIndexMeshPosition].stepFunction =
+  //MTLVertexStepFunctionPerVertex;
+
+  _mtlVertexDescriptor.layouts[BufferIndexMeshPosition].stride = 3 * 4;
+  _mtlVertexDescriptor.layouts[BufferIndexMeshUV0].stride = 2 * 4;
+  _mtlVertexDescriptor.layouts[BufferIndexMeshNormal].stride = 3 * 4;
+  _mtlVertexDescriptor.layouts[BufferIndexMeshTangent].stride = 4 * 4;
+
+  //-----------------------
+  // for ModelIO
+  _mdlVertexDescriptor =
+      MTKModelIOVertexDescriptorFromMetal(_mtlVertexDescriptor);
+
+  _mdlVertexDescriptor.attributes[VertexAttributePosition].name =
+      MDLVertexAttributePosition;
+  _mdlVertexDescriptor.attributes[VertexAttributeTexcoord].name =
+      MDLVertexAttributeTextureCoordinate;
+  _mdlVertexDescriptor.attributes[VertexAttributeNormal].name =
+      MDLVertexAttributeNormal;
+  _mdlVertexDescriptor.attributes[VertexAttributeTangent].name =
+      MDLVertexAttributeTangent;
 }
 
+- (void)_loadMetalWithView:(nonnull MTKView *)view {
+  /// Load Metal state objects and initialize renderer dependent view properties
 
+  view.colorPixelFormat = MTLPixelFormatRGBA16Float;
+  view.depthStencilPixelFormat = MTLPixelFormatDepth32Float_Stencil8;
+  view.sampleCount = 1;
 
-- (void)_loadMetalWithView:(nonnull MTKView *)view
-{
-    /// Load Metal state objects and initialize renderer dependent view properties
-
-    view.colorPixelFormat = MTLPixelFormatRGBA16Float;
-    view.depthStencilPixelFormat = MTLPixelFormatDepth32Float_Stencil8;
-    view.sampleCount = 1;
-
-    _viewFramebuffer.colorPixelFormat = view.colorPixelFormat;
-    _viewFramebuffer.depthStencilPixelFormat = view.depthStencilPixelFormat;
-    _viewFramebuffer.sampleCount = view.sampleCount;
-    
-    [self _createVertexDescriptor];
-    
-    // first time use the default library, if reload is called then use different library
-    _shaderLibrary = [_device newDefaultLibrary];
-
-    
-    [self _createRenderPipelines];
-    
-    //-----------------------
-   
-    MTLDepthStencilDescriptor *depthStateDesc = [[MTLDepthStencilDescriptor alloc] init];
-    depthStateDesc.depthCompareFunction = _showSettings->isReverseZ ? MTLCompareFunctionGreaterEqual : MTLCompareFunctionLessEqual;
-    depthStateDesc.depthWriteEnabled = YES;
-    _depthStateFull = [_device newDepthStencilStateWithDescriptor:depthStateDesc];
-
-    depthStateDesc.depthCompareFunction = _showSettings->isReverseZ ? MTLCompareFunctionGreaterEqual : MTLCompareFunctionLessEqual;
-    depthStateDesc.depthWriteEnabled = NO;
-    _depthStateNone = [_device newDepthStencilStateWithDescriptor:depthStateDesc];
-    
-    for(NSUInteger i = 0; i < MaxBuffersInFlight; i++)
-    {
-        _dynamicUniformBuffer[i] = [_device newBufferWithLength:sizeof(Uniforms)
-                                                        options:MTLResourceStorageModeShared];
-
-        _dynamicUniformBuffer[i].label = @"UniformBuffer";
-    }
+  _viewFramebuffer.colorPixelFormat = view.colorPixelFormat;
+  _viewFramebuffer.depthStencilPixelFormat = view.depthStencilPixelFormat;
+  _viewFramebuffer.sampleCount = view.sampleCount;
 
-    _commandQueue = [_device newCommandQueue];
-    
-    [self _createSamplers];
-    
-    //-----------------------
-   
-    [self _createComputePipelines];
-   
-    [self _createSampleRender];
-}
+  [self _createVertexDescriptor];
 
-- (BOOL)hotloadShaders:(const char*)filename
-{
-    _metallibFileURL = [NSURL fileURLWithPath:[NSString stringWithUTF8String:filename]];
+  // first time use the default library, if reload is called then use different
+  // library
+  _shaderLibrary = [_device newDefaultLibrary];
 
-    NSError* err = nil;
-    NSDate *fileDate = nil;
-    [_metallibFileURL getResourceValue:&fileDate forKey:NSURLContentModificationDateKey error:&err];
+  [self _createRenderPipelines];
 
-    // only reload if the metallib changed timestamp, otherwise default.metallib has most recent copy
-    if (err != nil || [_metallibFileDate isEqualToDate:fileDate]) {
-        return NO;
-    }
-    _metallibFileDate = fileDate;
-    
-    // Now dynamically load the metallib
-    NSData* dataNS = [NSData dataWithContentsOfURL:_metallibFileURL options:NSDataReadingMappedIfSafe
- error:&err];
-    if (dataNS == nil) {
-        return NO;
-    }
-    dispatch_data_t data = dispatch_data_create(dataNS.bytes, dataNS.length, dispatch_get_main_queue(), DISPATCH_DATA_DESTRUCTOR_DEFAULT);
-    
-    id<MTLLibrary> shaderLibrary = [_device newLibraryWithData:data error:&err];
-    if (err != nil) {
-        return NO;
-    }
-    _shaderLibrary = shaderLibrary;
-    
-    // rebuild the shaders and pipelines that use the shader
-    [self _createRenderPipelines];
-
-    [self _createComputePipelines];
-   
-    [self _createSampleRender];
-    
-    return YES;
+  //-----------------------
+
+  MTLDepthStencilDescriptor *depthStateDesc =
+      [[MTLDepthStencilDescriptor alloc] init];
+  depthStateDesc.depthCompareFunction = _showSettings->isReverseZ
+                                            ? MTLCompareFunctionGreaterEqual
+                                            : MTLCompareFunctionLessEqual;
+  depthStateDesc.depthWriteEnabled = YES;
+  _depthStateFull = [_device newDepthStencilStateWithDescriptor:depthStateDesc];
+
+  depthStateDesc.depthCompareFunction = _showSettings->isReverseZ
+                                            ? MTLCompareFunctionGreaterEqual
+                                            : MTLCompareFunctionLessEqual;
+  depthStateDesc.depthWriteEnabled = NO;
+  _depthStateNone = [_device newDepthStencilStateWithDescriptor:depthStateDesc];
+
+  for (NSUInteger i = 0; i < MaxBuffersInFlight; i++) {
+    _dynamicUniformBuffer[i] =
+        [_device newBufferWithLength:sizeof(Uniforms)
+                             options:MTLResourceStorageModeShared];
+
+    _dynamicUniformBuffer[i].label = @"UniformBuffer";
+  }
+
+  _commandQueue = [_device newCommandQueue];
+
+  [self _createSamplers];
+
+  //-----------------------
+
+  [self _createComputePipelines];
+
+  [self _createSampleRender];
 }
 
-- (id<MTLComputePipelineState>)_createComputePipeline:(const char*)name
-{
-    NSString* nameNS = [NSString stringWithUTF8String:name];
-    NSError *error = nil;
-    id<MTLFunction> computeFunction = [_shaderLibrary newFunctionWithName:nameNS];
-    
-    id<MTLComputePipelineState> pipe;
-    if (computeFunction) {
-        computeFunction.label = nameNS;
-        
-        pipe = [_device newComputePipelineStateWithFunction:computeFunction error:&error];
-    }
+- (BOOL)hotloadShaders:(const char *)filename {
+  _metallibFileURL =
+      [NSURL fileURLWithPath:[NSString stringWithUTF8String:filename]];
+
+  NSError *err = nil;
+  NSDate *fileDate = nil;
+  [_metallibFileURL getResourceValue:&fileDate
+                              forKey:NSURLContentModificationDateKey
+                               error:&err];
+
+  // only reload if the metallib changed timestamp, otherwise default.metallib
+  // has most recent copy
+  if (err != nil || [_metallibFileDate isEqualToDate:fileDate]) {
+    return NO;
+  }
+  _metallibFileDate = fileDate;
+
+  // Now dynamically load the metallib
+  NSData *dataNS = [NSData dataWithContentsOfURL:_metallibFileURL
+                                         options:NSDataReadingMappedIfSafe
+                                           error:&err];
+  if (dataNS == nil) {
+    return NO;
+  }
+  dispatch_data_t data = dispatch_data_create(dataNS.bytes, dataNS.length,
+                                              dispatch_get_main_queue(),
+                                              DISPATCH_DATA_DESTRUCTOR_DEFAULT);
+
+  id<MTLLibrary> shaderLibrary = [_device newLibraryWithData:data error:&err];
+  if (err != nil) {
+    return NO;
+  }
+  _shaderLibrary = shaderLibrary;
+
+  // rebuild the shaders and pipelines that use the shader
+  [self _createRenderPipelines];
+
+  [self _createComputePipelines];
+
+  [self _createSampleRender];
+
+  return YES;
+}
 
-    if (!pipe) {
-        KLOGE("kramv", "Failed to create compute pipeline state for %s, error %s", name, error ? error.localizedDescription.UTF8String : "");
-        return nil;
-    }
-    
-    return pipe;
+- (id<MTLComputePipelineState>)_createComputePipeline:(const char *)name {
+  NSString *nameNS = [NSString stringWithUTF8String:name];
+  NSError *error = nil;
+  id<MTLFunction> computeFunction = [_shaderLibrary newFunctionWithName:nameNS];
+
+  id<MTLComputePipelineState> pipe;
+  if (computeFunction) {
+    computeFunction.label = nameNS;
+
+    pipe = [_device newComputePipelineStateWithFunction:computeFunction
+                                                  error:&error];
+  }
+
+  if (!pipe) {
+    KLOGE("kramv", "Failed to create compute pipeline state for %s, error %s",
+          name, error ? error.localizedDescription.UTF8String : "");
+    return nil;
+  }
+
+  return pipe;
 }
 
-- (void)_createComputePipelines
-{
-    _pipelineStateImageCS       = [self _createComputePipeline:"SampleImageCS"];
-    _pipelineStateImageArrayCS  = [self _createComputePipeline:"SampleImageArrayCS"];
-    _pipelineStateVolumeCS      = [self _createComputePipeline:"SampleVolumeCS"];
-    _pipelineStateCubeCS        = [self _createComputePipeline:"SampleCubeCS"];
-    _pipelineStateCubeArrayCS   = [self _createComputePipeline:"SampleCubeArrayCS"];
-    _pipelineState1DArrayCS     = [self _createComputePipeline:"SampleImage1DArrayCS"];
+- (void)_createComputePipelines {
+  _pipelineStateImageCS = [self _createComputePipeline:"SampleImageCS"];
+  _pipelineStateImageArrayCS =
+      [self _createComputePipeline:"SampleImageArrayCS"];
+  _pipelineStateVolumeCS = [self _createComputePipeline:"SampleVolumeCS"];
+  _pipelineStateCubeCS = [self _createComputePipeline:"SampleCubeCS"];
+  _pipelineStateCubeArrayCS = [self _createComputePipeline:"SampleCubeArrayCS"];
+  _pipelineState1DArrayCS =
+      [self _createComputePipeline:"SampleImage1DArrayCS"];
 }
 
-- (id<MTLRenderPipelineState>)_createRenderPipeline:(const char*)vs fs:(const char*)fs
-{
-    NSString* vsNameNS = [NSString stringWithUTF8String:vs];
-    NSString* fsNameNS = [NSString stringWithUTF8String:fs];
-   
-    id <MTLFunction> vertexFunction;
-    id <MTLFunction> fragmentFunction;
-    
-    MTLRenderPipelineDescriptor *pipelineStateDescriptor = [[MTLRenderPipelineDescriptor alloc] init];
-    pipelineStateDescriptor.label = fsNameNS;
-    pipelineStateDescriptor.sampleCount = _viewFramebuffer.sampleCount;
-    pipelineStateDescriptor.vertexDescriptor = _mtlVertexDescriptor;
-    pipelineStateDescriptor.colorAttachments[0].pixelFormat = _viewFramebuffer.colorPixelFormat;
-    
-    // TODO: could drop these for images, but want a 3D preview of content
-    // or might make these memoryless.
-    pipelineStateDescriptor.depthAttachmentPixelFormat = _viewFramebuffer.depthStencilPixelFormat;
-    pipelineStateDescriptor.stencilAttachmentPixelFormat = _viewFramebuffer.depthStencilPixelFormat;
-
-    NSError *error = NULL;
-    
-    //-----------------------
-   
-    vertexFunction = [_shaderLibrary newFunctionWithName:vsNameNS];
-    fragmentFunction = [_shaderLibrary newFunctionWithName:fsNameNS];
-    
-    id<MTLRenderPipelineState> pipe;
-    
-    if (vertexFunction && fragmentFunction) {
-        vertexFunction.label = vsNameNS;
-        fragmentFunction.label = fsNameNS;
-       
-        pipelineStateDescriptor.vertexFunction = vertexFunction;
-        pipelineStateDescriptor.fragmentFunction = fragmentFunction;
-        
-        pipe = [_device newRenderPipelineStateWithDescriptor:pipelineStateDescriptor error:&error];
-    }
-    
-    if (!pipe)
-    {
-        KLOGE("kramv", "Failed to create render pipeline state for %s, error %s", fs, error ? error.description.UTF8String : "");
-        return nil;
-    }
-    
-    return pipe;
+- (id<MTLRenderPipelineState>)_createRenderPipeline:(const char *)vs
+                                                 fs:(const char *)fs {
+  NSString *vsNameNS = [NSString stringWithUTF8String:vs];
+  NSString *fsNameNS = [NSString stringWithUTF8String:fs];
+
+  id<MTLFunction> vertexFunction;
+  id<MTLFunction> fragmentFunction;
+
+  MTLRenderPipelineDescriptor *pipelineStateDescriptor =
+      [[MTLRenderPipelineDescriptor alloc] init];
+  pipelineStateDescriptor.label = fsNameNS;
+  pipelineStateDescriptor.sampleCount = _viewFramebuffer.sampleCount;
+  pipelineStateDescriptor.vertexDescriptor = _mtlVertexDescriptor;
+  pipelineStateDescriptor.colorAttachments[0].pixelFormat =
+      _viewFramebuffer.colorPixelFormat;
+
+  // TODO: could drop these for images, but want a 3D preview of content
+  // or might make these memoryless.
+  pipelineStateDescriptor.depthAttachmentPixelFormat =
+      _viewFramebuffer.depthStencilPixelFormat;
+  pipelineStateDescriptor.stencilAttachmentPixelFormat =
+      _viewFramebuffer.depthStencilPixelFormat;
+
+  NSError *error = NULL;
+
+  //-----------------------
+
+  vertexFunction = [_shaderLibrary newFunctionWithName:vsNameNS];
+  fragmentFunction = [_shaderLibrary newFunctionWithName:fsNameNS];
+
+  id<MTLRenderPipelineState> pipe;
+
+  if (vertexFunction && fragmentFunction) {
+    vertexFunction.label = vsNameNS;
+    fragmentFunction.label = fsNameNS;
+
+    pipelineStateDescriptor.vertexFunction = vertexFunction;
+    pipelineStateDescriptor.fragmentFunction = fragmentFunction;
+
+    pipe = [_device newRenderPipelineStateWithDescriptor:pipelineStateDescriptor
+                                                   error:&error];
+  }
+
+  if (!pipe) {
+    KLOGE("kramv", "Failed to create render pipeline state for %s, error %s",
+          fs, error ? error.description.UTF8String : "");
+    return nil;
+  }
+
+  return pipe;
 }
 
-- (void)_createRenderPipelines
-{
-    _pipelineStateImage         = [self _createRenderPipeline:"DrawImageVS" fs:"DrawImagePS"];
-    _pipelineStateImageArray    = [self _createRenderPipeline:"DrawImageVS" fs:"DrawImageArrayPS"];
-    _pipelineState1DArray       = [self _createRenderPipeline:"DrawImageVS" fs:"Draw1DArrayPS"];
-    _pipelineStateCube          = [self _createRenderPipeline:"DrawCubeVS" fs:"DrawCubePS"];
-    _pipelineStateCubeArray     = [self _createRenderPipeline:"DrawCubeVS" fs:"DrawCubeArrayPS"];
-    _pipelineStateVolume        = [self _createRenderPipeline:"DrawVolumeVS" fs:"DrawVolumePS"];
+- (void)_createRenderPipelines {
+  _pipelineStateImage = [self _createRenderPipeline:"DrawImageVS"
+                                                 fs:"DrawImagePS"];
+  _pipelineStateImageArray = [self _createRenderPipeline:"DrawImageVS"
+                                                      fs:"DrawImageArrayPS"];
+  _pipelineState1DArray = [self _createRenderPipeline:"DrawImageVS"
+                                                   fs:"Draw1DArrayPS"];
+  _pipelineStateCube = [self _createRenderPipeline:"DrawCubeVS"
+                                                fs:"DrawCubePS"];
+  _pipelineStateCubeArray = [self _createRenderPipeline:"DrawCubeVS"
+                                                     fs:"DrawCubeArrayPS"];
+  _pipelineStateVolume = [self _createRenderPipeline:"DrawVolumeVS"
+                                                  fs:"DrawVolumePS"];
 }
 
-- (void)_createSampleRender
-{
-    {
-        // writing to this texture
-        MTLTextureDescriptor* textureDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA32Float width:1 height:1 mipmapped:NO];
-        
-        textureDesc.usage = MTLTextureUsageShaderWrite | MTLTextureUsageShaderRead;
-        textureDesc.storageMode = MTLStorageModeManaged;
-        _sampleComputeTex = [_device newTextureWithDescriptor:textureDesc];
-    }
-    
-    {
-        // this must match drawable format due to using a blit to copy pixel out of drawable
-        MTLTextureDescriptor* textureDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA16Float width:1 height:1 mipmapped:NO];
-        //textureDesc.usage = MTLTextureUsageShaderWrite | MTLTextureUsageShaderRead;
-        textureDesc.storageMode = MTLStorageModeManaged;
-
-        _sampleRenderTex = [_device newTextureWithDescriptor:textureDesc];
-    }
+- (void)_createSampleRender {
+  {
+    // writing to this texture
+    MTLTextureDescriptor *textureDesc = [MTLTextureDescriptor
+        texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA32Float
+                                     width:1
+                                    height:1
+                                 mipmapped:NO];
+
+    textureDesc.usage = MTLTextureUsageShaderWrite | MTLTextureUsageShaderRead;
+    textureDesc.storageMode = MTLStorageModeManaged;
+    _sampleComputeTex = [_device newTextureWithDescriptor:textureDesc];
+  }
+
+  {
+    // this must match drawable format due to using a blit to copy pixel out of
+    // drawable
+    MTLTextureDescriptor *textureDesc = [MTLTextureDescriptor
+        texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA16Float
+                                     width:1
+                                    height:1
+                                 mipmapped:NO];
+    // textureDesc.usage = MTLTextureUsageShaderWrite |
+    // MTLTextureUsageShaderRead;
+    textureDesc.storageMode = MTLStorageModeManaged;
+
+    _sampleRenderTex = [_device newTextureWithDescriptor:textureDesc];
+  }
 }
 
-- (MTKMesh*)_createMeshAsset:(const char*)name mdlMesh:(MDLMesh*)mdlMesh doFlipUV:(bool)doFlipUV
-{
-    NSError* error = nil;
+- (MTKMesh *)_createMeshAsset:(const char *)name
+                      mdlMesh:(MDLMesh *)mdlMesh
+                     doFlipUV:(bool)doFlipUV {
+  NSError *error = nil;
 
-    mdlMesh.vertexDescriptor = _mdlVertexDescriptor;
-    
-    // ModelIO has the uv going counterclockwise on sphere/cylinder, but not on the box.
-    // And it also has a flipped bitangent.w.
-    
-    // flip the u coordinate
-    if (doFlipUV)
-    {
-        id<MDLMeshBuffer> uvs = mdlMesh.vertexBuffers[BufferIndexMeshUV0];
-        MDLMeshBufferMap *uvsMap = [uvs map];
-        
-        packed_float2* uvData = (packed_float2*)uvsMap.bytes;
-    
-        for (uint32_t i = 0; i < mdlMesh.vertexCount; ++i) {
-            auto& uv = uvData[i];
-            
-            uv.x = 1.0f - uv.x;
-        }
-    }
-    
-    [mdlMesh addOrthTanBasisForTextureCoordinateAttributeNamed: MDLVertexAttributeTextureCoordinate
-                                          normalAttributeNamed: MDLVertexAttributeNormal
-                                         tangentAttributeNamed: MDLVertexAttributeTangent];
-    
-    // DONE: flip the bitangent.w sign here, and remove the flip in the shader
-    bool doFlipBitangent = true;
-    if (doFlipBitangent)
-    {
-        id<MDLMeshBuffer> uvs = mdlMesh.vertexBuffers[BufferIndexMeshTangent];
-        MDLMeshBufferMap *uvsMap = [uvs map];
-        packed_float4* uvData = (packed_float4*)uvsMap.bytes;
-        
-        for (uint32_t i = 0; i < mdlMesh.vertexCount; ++i) {
-//            if (uvData[i].w != -1.0f && uvData[i].w != 1.0f) {
-//                int bp = 0;
-//                bp = bp;
-//            }
-            
-            uvData[i].w = -uvData[i].w;
-        }
+  mdlMesh.vertexDescriptor = _mdlVertexDescriptor;
+
+  // ModelIO has the uv going counterclockwise on sphere/cylinder, but not on
+  // the box. And it also has a flipped bitangent.w.
+
+  // flip the u coordinate
+  if (doFlipUV) {
+    id<MDLMeshBuffer> uvs = mdlMesh.vertexBuffers[BufferIndexMeshUV0];
+    MDLMeshBufferMap *uvsMap = [uvs map];
+
+    packed_float2 *uvData = (packed_float2 *)uvsMap.bytes;
+
+    for (uint32_t i = 0; i < mdlMesh.vertexCount; ++i) {
+      auto &uv = uvData[i];
+
+      uv.x = 1.0f - uv.x;
     }
-    
-   
-    // now set it into mtk mesh
-    MTKMesh* mesh = [[MTKMesh alloc] initWithMesh:mdlMesh
-                                   device:_device
-                                    error:&error];
-    mesh.name = [NSString stringWithUTF8String:name];
-
-    
-    // these range names may only show up when looking at geometry in capture
-    // These don't seem to appear as the buffer name that is suballocated from
-    {
-        // name the vertex range on the vb
-        MTKMeshBuffer* pos = mesh.vertexBuffers[BufferIndexMeshPosition];
-        MTKMeshBuffer* uvs = mesh.vertexBuffers[BufferIndexMeshUV0];
-        MTKMeshBuffer* normals = mesh.vertexBuffers[BufferIndexMeshNormal];
-        MTKMeshBuffer* tangents = mesh.vertexBuffers[BufferIndexMeshTangent];
-    
-        [pos.buffer addDebugMarker:@"Pos" range:NSMakeRange(pos.offset, pos.length)];
-        [uvs.buffer addDebugMarker:@"UV" range:NSMakeRange(uvs.offset, uvs.length)];
-        [normals.buffer addDebugMarker:@"Nor" range:NSMakeRange(normals.offset, normals.length)];
-        [tangents.buffer addDebugMarker:@"Tan" range:NSMakeRange(tangents.offset, tangents.length)];
-        
-        // This seems to already be named "ellisoid-Indices",
-        // need to do for ib as well
-        for (MTKSubmesh* submesh in mesh.submeshes) {
-            [submesh.indexBuffer.buffer addDebugMarker:mesh.name range:NSMakeRange(submesh.indexBuffer.offset, submesh.indexBuffer.length)];
-        }
+  }
+
+  [mdlMesh
+    addOrthTanBasisForTextureCoordinateAttributeNamed:
+        MDLVertexAttributeTextureCoordinate
+                                 normalAttributeNamed:MDLVertexAttributeNormal
+                                tangentAttributeNamed:
+                                    MDLVertexAttributeTangent];
+
+  // DONE: flip the bitangent.w sign here, and remove the flip in the shader
+  bool doFlipBitangent = true;
+  if (doFlipBitangent) {
+    id<MDLMeshBuffer> uvs = mdlMesh.vertexBuffers[BufferIndexMeshTangent];
+    MDLMeshBufferMap *uvsMap = [uvs map];
+    packed_float4 *uvData = (packed_float4 *)uvsMap.bytes;
+
+    for (uint32_t i = 0; i < mdlMesh.vertexCount; ++i) {
+      //            if (uvData[i].w != -1.0f && uvData[i].w != 1.0f) {
+      //                int bp = 0;
+      //                bp = bp;
+      //            }
+
+      uvData[i].w = -uvData[i].w;
     }
-    
-    if(!mesh || error)
-    {
-        KLOGE("kramv", "Error creating MetalKit mesh %s", error.localizedDescription.UTF8String);
-        return nil;
+  }
+
+  // now set it into mtk mesh
+  MTKMesh *mesh = [[MTKMesh alloc] initWithMesh:mdlMesh
+                                         device:_device
+                                          error:&error];
+  mesh.name = [NSString stringWithUTF8String:name];
+
+  // these range names may only show up when looking at geometry in capture
+  // These don't seem to appear as the buffer name that is suballocated from
+  {
+    // name the vertex range on the vb
+    MTKMeshBuffer *pos = mesh.vertexBuffers[BufferIndexMeshPosition];
+    MTKMeshBuffer *uvs = mesh.vertexBuffers[BufferIndexMeshUV0];
+    MTKMeshBuffer *normals = mesh.vertexBuffers[BufferIndexMeshNormal];
+    MTKMeshBuffer *tangents = mesh.vertexBuffers[BufferIndexMeshTangent];
+
+    [pos.buffer addDebugMarker:@"Pos"
+                         range:NSMakeRange(pos.offset, pos.length)];
+    [uvs.buffer addDebugMarker:@"UV" range:NSMakeRange(uvs.offset, uvs.length)];
+    [normals.buffer addDebugMarker:@"Nor"
+                             range:NSMakeRange(normals.offset, normals.length)];
+    [tangents.buffer
+        addDebugMarker:@"Tan"
+                 range:NSMakeRange(tangents.offset, tangents.length)];
+
+    // This seems to already be named "ellisoid-Indices",
+    // need to do for ib as well
+    for (MTKSubmesh *submesh in mesh.submeshes) {
+      [submesh.indexBuffer.buffer
+          addDebugMarker:mesh.name
+                   range:NSMakeRange(submesh.indexBuffer.offset,
+                                     submesh.indexBuffer.length)];
     }
+  }
+
+  if (!mesh || error) {
+    KLOGE("kramv", "Error creating MetalKit mesh %s",
+          error.localizedDescription.UTF8String);
+    return nil;
+  }
 
-    return mesh;
+  return mesh;
 }
 
 // why isn't this defined in simd lib?
 struct packed_float3 {
-    float x,y,z;
+  float x, y, z;
 };
 
-- (void)_loadAssets
-{
-    /// Load assets into metal objects
-    
-    MDLMesh *mdlMesh;
-    
-    mdlMesh = [MDLMesh newBoxWithDimensions:(vector_float3){1, 1, 1}
-                                            segments:(vector_uint3){1, 1, 1}
-                                        geometryType:MDLGeometryTypeTriangles
-                                       inwardNormals:NO
-                                           allocator:_metalAllocator];
-    
-    _meshBox = [self _createMeshAsset:"MeshBox" mdlMesh:mdlMesh doFlipUV:false];
-    
-    // The sphere/cylinder shapes are v increasing in -Y, and u increasing conterclockwise,
-    // u is the opposite direction to the cube/plane, so need to flip those coords
-    // I think this has also flipped the tangents the wrong way, but building tangents after
-    // flipping u direction doesn't flip the bitangent.  So bitangent.w is flipped.
-    // For sanity, Tangent is increasing u, and Bitangent is increasing v.
-    
-    // All prims are viewed with +Y, not +Z up
-    
-    mdlMesh = [MDLMesh newEllipsoidWithRadii:(vector_float3){0.5, 0.5, 0.5} radialSegments:16 verticalSegments:16 geometryType:MDLGeometryTypeTriangles inwardNormals:NO hemisphere:NO allocator:_metalAllocator];
-
-    float angle = M_PI * 0.5;
-    float2 cosSin = float2m(cos(angle), sin(angle));
-    
-    {
-        mdlMesh.vertexDescriptor = _mdlVertexDescriptor;
-        
-        id<MDLMeshBuffer> posBuffer = mdlMesh.vertexBuffers[BufferIndexMeshPosition];
-        MDLMeshBufferMap *posMap = [posBuffer map];
-        packed_float3* posData = (packed_float3*)posMap.bytes;
-        
-        id<MDLMeshBuffer> normalBuffer = mdlMesh.vertexBuffers[BufferIndexMeshNormal];
-        MDLMeshBufferMap *normalsMap = [normalBuffer map];
-        packed_float3* normalData = (packed_float3*)normalsMap.bytes;
-        
-        // vertexCount reports 306, but vertex 289+ are garbage
-        uint32_t numVertices = 289; // mdlMesh.vertexCount
-        
-        for (uint32_t i = 0; i < numVertices; ++i) {
-            {
-                auto& pos = posData[i];
-            
-                // dumb rotate about Y-axis
-                auto copy = pos;
-                
-                pos.x = copy.x * cosSin.x - copy.z * cosSin.y;
-                pos.z = copy.x * cosSin.y + copy.z * cosSin.x;
-            }
-            
-            {
-                auto& normal = normalData[i];
-                auto copy = normal;
-                normal.x = copy.x * cosSin.x - copy.z * cosSin.y;
-                normal.z = copy.x * cosSin.y + copy.z * cosSin.x;
-            }
-        }
-        
-        // Hack - knock out all bogus tris from ModelIO that lead to garbage tris
-        for (uint32_t i = numVertices; i < mdlMesh.vertexCount; ++i) {
-            auto& pos = posData[i];
-            pos.x = NAN;
-        }
-            
+- (void)_loadAssets {
+  /// Load assets into metal objects
+
+  MDLMesh *mdlMesh;
+
+  mdlMesh = [MDLMesh newBoxWithDimensions:(vector_float3){1, 1, 1}
+                                 segments:(vector_uint3){1, 1, 1}
+                             geometryType:MDLGeometryTypeTriangles
+                            inwardNormals:NO
+                                allocator:_metalAllocator];
+
+  _meshBox = [self _createMeshAsset:"MeshBox" mdlMesh:mdlMesh doFlipUV:false];
+
+  // The sphere/cylinder shapes are v increasing in -Y, and u increasing
+  // conterclockwise, u is the opposite direction to the cube/plane, so need to
+  // flip those coords I think this has also flipped the tangents the wrong way,
+  // but building tangents after flipping u direction doesn't flip the
+  // bitangent.  So bitangent.w is flipped. For sanity, Tangent is increasing u,
+  // and Bitangent is increasing v.
+
+  // All prims are viewed with +Y, not +Z up
+
+  mdlMesh = [MDLMesh newEllipsoidWithRadii:(vector_float3){0.5, 0.5, 0.5}
+                            radialSegments:16
+                          verticalSegments:16
+                              geometryType:MDLGeometryTypeTriangles
+                             inwardNormals:NO
+                                hemisphere:NO
+                                 allocator:_metalAllocator];
+
+  float angle = M_PI * 0.5;
+  float2 cosSin = float2m(cos(angle), sin(angle));
+
+  {
+    mdlMesh.vertexDescriptor = _mdlVertexDescriptor;
+
+    id<MDLMeshBuffer> posBuffer =
+        mdlMesh.vertexBuffers[BufferIndexMeshPosition];
+    MDLMeshBufferMap *posMap = [posBuffer map];
+    packed_float3 *posData = (packed_float3 *)posMap.bytes;
+
+    id<MDLMeshBuffer> normalBuffer =
+        mdlMesh.vertexBuffers[BufferIndexMeshNormal];
+    MDLMeshBufferMap *normalsMap = [normalBuffer map];
+    packed_float3 *normalData = (packed_float3 *)normalsMap.bytes;
+
+    // vertexCount reports 306, but vertex 289+ are garbage
+    uint32_t numVertices = 289; // mdlMesh.vertexCount
+
+    for (uint32_t i = 0; i < numVertices; ++i) {
+      {
+        auto &pos = posData[i];
+
+        // dumb rotate about Y-axis
+        auto copy = pos;
+
+        pos.x = copy.x * cosSin.x - copy.z * cosSin.y;
+        pos.z = copy.x * cosSin.y + copy.z * cosSin.x;
+      }
+
+      {
+        auto &normal = normalData[i];
+        auto copy = normal;
+        normal.x = copy.x * cosSin.x - copy.z * cosSin.y;
+        normal.z = copy.x * cosSin.y + copy.z * cosSin.x;
+      }
     }
-    
-    _meshSphere = [self _createMeshAsset:"MeshSphere" mdlMesh:mdlMesh doFlipUV:true];
-       
-    
-    mdlMesh = [MDLMesh newEllipsoidWithRadii:(vector_float3){0.5, 0.5, 0.5} radialSegments:16 verticalSegments:16 geometryType:MDLGeometryTypeTriangles inwardNormals:NO hemisphere:NO allocator:_metalAllocator];
-    
-    
-    // ModelIO has the uv going counterclockwise on sphere/cylinder, but not on the box.
-    // And it also has a flipped bitangent.w.
-    
-    // flip the u coordinate
-    bool doFlipUV = true;
-    if (doFlipUV)
-    {
-        mdlMesh.vertexDescriptor = _mdlVertexDescriptor;
-        
-        id<MDLMeshBuffer> uvsBuffer = mdlMesh.vertexBuffers[BufferIndexMeshUV0];
-        MDLMeshBufferMap *uvsMap = [uvsBuffer map];
-        packed_float2* uvData = (packed_float2*)uvsMap.bytes;
-    
-        // this is all aos
-        
-        id<MDLMeshBuffer> posBuffer = mdlMesh.vertexBuffers[BufferIndexMeshPosition];
-        MDLMeshBufferMap *posMap = [posBuffer map];
-        packed_float3* posData = (packed_float3*)posMap.bytes;
-        
-        id<MDLMeshBuffer> normalsBuffe = mdlMesh.vertexBuffers[BufferIndexMeshNormal];
-        MDLMeshBufferMap *normalsMap = [normalsBuffe map];
-        packed_float3* normalData = (packed_float3*)normalsMap.bytes;
-        
-        
-        // vertexCount reports 306, but vertex 289+ are garbage
-        uint32_t numVertices = 289; // mdlMesh.vertexCount
-        
-        for (uint32_t i = 0; i < numVertices; ++i) {
-            {
-                auto& pos = posData[i];
-                
-                // dumb rotate about Y-axis
-                auto copy = pos;
-                pos.x = copy.x * cosSin.x - copy.z * cosSin.y;
-                pos.z = copy.x * cosSin.y + copy.z * cosSin.x;
-            }
-            
-            {
-                auto& normal = normalData[i];
-                auto copy = normal;
-                normal.x = copy.x * cosSin.x - copy.z * cosSin.y;
-                normal.z = copy.x * cosSin.y + copy.z * cosSin.x;
-            }
-            
-            auto& uv = uvData[i];
-        
-//            if (uv.x < 0.0 || uv.x > 1.0) {
-//                int bp = 0;
-//                bp = bp;
-//            }
-            
-            // this makes it counterclockwise 0 to 1
-            float x = uv.x;
-            
-            x = 1.0f - x;
-            
-            // -1 to 1 counterclockwise
-            x = 2.0f * x - 1.0f;
-
-            if (x <= 0) {
-                // now -1 to 0 is 0 to 1 clockwise with 1 in back
-                x = 1.0f + x;
-            }
-            else {
-                // 0 to 1, now 1 to 0 with 1 in back
-                x = 1.0f - x;
-            }
-            
-            uv.x = x;
-        }
-        
-        // Hack - knock out all bogus tris from ModelIO that lead to garbage tris
-        for (uint32_t i = numVertices; i < mdlMesh.vertexCount; ++i) {
-            auto& pos = posData[i];
-            pos.x = NAN;
-        }
-        
-        // TODO: may need to flip tangent on the inverted side
-        // otherwise lighting is just wrong, but tangents generated in _createMeshAsset
-        // move that here, and flip the tangents in the loop
+
+    // Hack - knock out all bogus tris from ModelIO that lead to garbage tris
+    for (uint32_t i = numVertices; i < mdlMesh.vertexCount; ++i) {
+      auto &pos = posData[i];
+      pos.x = NAN;
     }
-        
-    _meshSphereMirrored = [self _createMeshAsset:"MeshSphereMirrored" mdlMesh:mdlMesh doFlipUV:false];
-    
-    
-// this maps 1/3rd of texture to the caps, and just isn't a very good uv mapping, using capsule instead
-//    mdlMesh = [MDLMesh newCylinderWithHeight:1.0
-//                                       radii:(vector_float2){0.5, 0.5}
-//                                            radialSegments:16
-//                                        verticalSegments:1
-//                                        geometryType:MDLGeometryTypeTriangles
-//                                       inwardNormals:NO
-//                                           allocator:_metalAllocator];
-//
-//    _meshCylinder = [self _createMeshAsset:"MeshCylinder" mdlMesh:mdlMesh doFlipUV:true];
-    
-    mdlMesh = [MDLMesh newCapsuleWithHeight:1.0
-                                   radii:(vector_float2){0.5, 0.25} // vertical cap subtracted from height
-                          radialSegments:16
-                        verticalSegments:1
-                      hemisphereSegments:16
-                            geometryType:MDLGeometryTypeTriangles
-                           inwardNormals:NO
-                               allocator:_metalAllocator];
-
-    
-    _meshCapsule = [self _createMeshAsset:"MeshCapsule" mdlMesh:mdlMesh doFlipUV:true];
-    
-    _mesh = _meshBox;
-    
-}
+  }
+
+  _meshSphere = [self _createMeshAsset:"MeshSphere"
+                               mdlMesh:mdlMesh
+                              doFlipUV:true];
+
+  mdlMesh = [MDLMesh newEllipsoidWithRadii:(vector_float3){0.5, 0.5, 0.5}
+                            radialSegments:16
+                          verticalSegments:16
+                              geometryType:MDLGeometryTypeTriangles
+                             inwardNormals:NO
+                                hemisphere:NO
+                                 allocator:_metalAllocator];
+
+  // ModelIO has the uv going counterclockwise on sphere/cylinder, but not on
+  // the box. And it also has a flipped bitangent.w.
+
+  // flip the u coordinate
+  bool doFlipUV = true;
+  if (doFlipUV) {
+    mdlMesh.vertexDescriptor = _mdlVertexDescriptor;
 
+    id<MDLMeshBuffer> uvsBuffer = mdlMesh.vertexBuffers[BufferIndexMeshUV0];
+    MDLMeshBufferMap *uvsMap = [uvsBuffer map];
+    packed_float2 *uvData = (packed_float2 *)uvsMap.bytes;
 
+    // this is all aos
 
-- (BOOL)loadTextureFromImage:(nonnull const char*)fullFilenameString
-                   timestamp:(double)timestamp
-                       image:(kram::KTXImage&)image
-                 imageNormal:(kram::KTXImage*)imageNormal
-                    isArchive:(BOOL)isArchive
-{
-    // image can be decoded to rgba8u if platform can't display format natively
-    // but still want to identify blockSize from original format
-    string fullFilename = fullFilenameString;
-    
-    // Note that modstamp can change, but content data hash may be the same
-    bool isNewFile = (fullFilename != _showSettings->lastFilename);
-    bool isTextureChanged = isNewFile || (timestamp != _showSettings->lastTimestamp);
-    
-    if (isTextureChanged) {     
-        // synchronously cpu upload from ktx file to buffer, with eventual gpu blit from buffer to returned texture.  TODO: If buffer is full, then something needs to keep KTXImage and data alive.  This load may also decode the texture to RGBA8.
-        
-        MTLPixelFormat originalFormatMTL = MTLPixelFormatInvalid;
-        id<MTLTexture> texture = [_loader loadTextureFromImage:image originalFormat:&originalFormatMTL];
-        if (!texture) {
-            return NO;
-        }
-        
-        // hacking in the normal texture here, so can display them together during preview
-        id<MTLTexture> normalTexture;
-        if (imageNormal) {
-            normalTexture = [_loader loadTextureFromImage:*imageNormal originalFormat:nil];
-            if (!normalTexture) {
-                return NO;
-            }
-        }
-        
-        // if archive contained png, then it's been converted to ktx
-        // so the info below may not reflect original data
-        // Would need original png data to look at header
-        // This is only info on image, not on imageNormal
-        
-        bool isPNG = isPNGFilename(fullFilename.c_str());
-        if (!isArchive && isPNG) {
-            _showSettings->imageInfo = kramInfoToString(fullFilename, false);
-            _showSettings->imageInfoVerbose = kramInfoToString(fullFilename, true);
-        }
-        else {
-            _showSettings->imageInfo = kramInfoKTXToString(fullFilename, image, false);
-            _showSettings->imageInfoVerbose = kramInfoKTXToString(fullFilename, image, true);
-        }
-        
-        _showSettings->originalFormat = (MyMTLPixelFormat)originalFormatMTL;
-        _showSettings->decodedFormat = (MyMTLPixelFormat)texture.pixelFormat;
-        
-        _showSettings->lastFilename = fullFilename;
-        _showSettings->lastTimestamp = timestamp;
-        
-        @autoreleasepool {
-            _colorMap = texture;
-            _normalMap = normalTexture;
-        }
-        
-        [self updateImageSettings:fullFilename image:image];
+    id<MDLMeshBuffer> posBuffer =
+        mdlMesh.vertexBuffers[BufferIndexMeshPosition];
+    MDLMeshBufferMap *posMap = [posBuffer map];
+    packed_float3 *posData = (packed_float3 *)posMap.bytes;
+
+    id<MDLMeshBuffer> normalsBuffe =
+        mdlMesh.vertexBuffers[BufferIndexMeshNormal];
+    MDLMeshBufferMap *normalsMap = [normalsBuffe map];
+    packed_float3 *normalData = (packed_float3 *)normalsMap.bytes;
+
+    // vertexCount reports 306, but vertex 289+ are garbage
+    uint32_t numVertices = 289; // mdlMesh.vertexCount
+
+    for (uint32_t i = 0; i < numVertices; ++i) {
+      {
+        auto &pos = posData[i];
+
+        // dumb rotate about Y-axis
+        auto copy = pos;
+        pos.x = copy.x * cosSin.x - copy.z * cosSin.y;
+        pos.z = copy.x * cosSin.y + copy.z * cosSin.x;
+      }
+
+      {
+        auto &normal = normalData[i];
+        auto copy = normal;
+        normal.x = copy.x * cosSin.x - copy.z * cosSin.y;
+        normal.z = copy.x * cosSin.y + copy.z * cosSin.x;
+      }
+
+      auto &uv = uvData[i];
+
+      //            if (uv.x < 0.0 || uv.x > 1.0) {
+      //                int bp = 0;
+      //                bp = bp;
+      //            }
+
+      // this makes it counterclockwise 0 to 1
+      float x = uv.x;
+
+      x = 1.0f - x;
+
+      // -1 to 1 counterclockwise
+      x = 2.0f * x - 1.0f;
+
+      if (x <= 0) {
+        // now -1 to 0 is 0 to 1 clockwise with 1 in back
+        x = 1.0f + x;
+      } else {
+        // 0 to 1, now 1 to 0 with 1 in back
+        x = 1.0f - x;
+      }
+
+      uv.x = x;
     }
-    
-    [self resetSomeImageSettings:isNewFile];
-    
-    return YES;
-}
-    
-- (BOOL)loadTexture:(nonnull NSURL *)url
-{
-    string fullFilename = url.path.UTF8String;
-    
-    // can use this to pull, or use fstat on FileHelper
-    NSDate *fileDate = nil;
-    NSError *error = nil;
-    [url getResourceValue:&fileDate forKey:NSURLContentModificationDateKey error:&error];
-    
-    // DONE: tie this to url and modstamp differences
-    double timestamp = fileDate.timeIntervalSince1970;
-    bool isNewFile =  (fullFilename != _showSettings->lastFilename);
-    
-    bool isTextureChanged = isNewFile || (timestamp != _showSettings->lastTimestamp);
-    
-    // image can be decoded to rgba8u if platform can't display format natively
-    // but still want to identify blockSize from original format
-    if (isTextureChanged) {
-        // TODO: hold onto these, so can reference block data
-        KTXImage image;
-        KTXImageData imageData;
-        
-        if (![_loader loadImageFromURL:url image:image imageData:imageData]) {
-            return NO;
-        }
-        
-        MTLPixelFormat originalFormatMTL = MTLPixelFormatInvalid;
-        id<MTLTexture> texture = [_loader loadTextureFromImage:image originalFormat:&originalFormatMTL];
-        if (!texture) {
-            return NO;
-        }
-        
-        // This doesn't look for or load corresponding normal map, but should
-        
-        // this is not the png data, but info on converted png to ktx level
-        // But this avoids loading the image 2 more times
-        // Size of png is very different than decompressed or recompressed ktx
-        bool isPNG = isPNGFilename(fullFilename.c_str());
-        if (isPNG) {
-            _showSettings->imageInfo = kramInfoToString(fullFilename, false);
-            _showSettings->imageInfoVerbose = kramInfoToString(fullFilename, true);
-        }
-        else {
-            _showSettings->imageInfo = kramInfoKTXToString(fullFilename, image, false);
-            _showSettings->imageInfoVerbose = kramInfoKTXToString(fullFilename, image, true);
-        }
-        
-        _showSettings->originalFormat = (MyMTLPixelFormat)originalFormatMTL;
-        _showSettings->decodedFormat = (MyMTLPixelFormat)texture.pixelFormat;
-        
-        _showSettings->lastFilename = fullFilename;
-        _showSettings->lastTimestamp = timestamp;
-        
-        @autoreleasepool {
-            _colorMap = texture;
-            _normalMap = nil;
-        }
-        
-        [self updateImageSettings:fullFilename image:image];
+
+    // Hack - knock out all bogus tris from ModelIO that lead to garbage tris
+    for (uint32_t i = numVertices; i < mdlMesh.vertexCount; ++i) {
+      auto &pos = posData[i];
+      pos.x = NAN;
     }
-    
-    [self resetSomeImageSettings:isNewFile];
-    
-    return YES;
+
+    // TODO: may need to flip tangent on the inverted side
+    // otherwise lighting is just wrong, but tangents generated in
+    // _createMeshAsset move that here, and flip the tangents in the loop
+  }
+
+  _meshSphereMirrored = [self _createMeshAsset:"MeshSphereMirrored"
+                                       mdlMesh:mdlMesh
+                                      doFlipUV:false];
+
+  // this maps 1/3rd of texture to the caps, and just isn't a very good uv
+  // mapping, using capsule instead
+  //    mdlMesh = [MDLMesh newCylinderWithHeight:1.0
+  //                                       radii:(vector_float2){0.5, 0.5}
+  //                                            radialSegments:16
+  //                                        verticalSegments:1
+  //                                        geometryType:MDLGeometryTypeTriangles
+  //                                       inwardNormals:NO
+  //                                           allocator:_metalAllocator];
+  //
+  //    _meshCylinder = [self _createMeshAsset:"MeshCylinder" mdlMesh:mdlMesh
+  //    doFlipUV:true];
+
+  mdlMesh = [MDLMesh newCapsuleWithHeight:1.0
+                                    radii:(vector_float2){0.5, 0.25}
+                           // vertical cap subtracted from height
+                           radialSegments:16
+                         verticalSegments:1
+                       hemisphereSegments:16
+                             geometryType:MDLGeometryTypeTriangles
+                            inwardNormals:NO
+                                allocator:_metalAllocator];
+
+  _meshCapsule = [self _createMeshAsset:"MeshCapsule"
+                                mdlMesh:mdlMesh
+                               doFlipUV:true];
+
+  _mesh = _meshBox;
 }
 
-// only called on new or modstamp-changed image
-- (void)updateImageSettings:(const string&)fullFilename image:(KTXImage&)image
-{
-    // this is the actual format, may have been decoded
-    id<MTLTexture> texture = _colorMap;
-    MyMTLPixelFormat format = (MyMTLPixelFormat)texture.pixelFormat;
-    
-    // format may be trancoded to gpu-friendly format
-    MyMTLPixelFormat originalFormat = image.pixelFormat;
-    
-    _showSettings->blockX = image.blockDims().x;
-    _showSettings->blockY = image.blockDims().y;
-    
-    _showSettings->isSigned = isSignedFormat(format);
-    
-    string fullFilenameCopy = fullFilename;
-    string filename = toLower(fullFilenameCopy);
-
-    // set title to filename, chop this to just file+ext, not directory
-    string filenameShort = filename;
-    const char* filenameSlash = strrchr(filenameShort.c_str(), '/');
-    if (filenameSlash != nullptr) {
-        filenameShort = filenameSlash + 1;
-    }
-    
-    // now chop off the extension
-    filenameShort = filenameShort.substr(0, filenameShort.find_last_of("."));
-    
-    bool isAlbedo = false;
-    bool isNormal = false;
-    bool isSDF = false;
-    
-    // could cycle between rrr1 and r001.
-    int32_t numChannels = numChannelsOfFormat(originalFormat);
-    bool isSigned = isSignedFormat(originalFormat);
-    
-    // note that decoded textures are 3/4 channel even though they are normal/sdf originally, so test those first
-    if (numChannels == 2 || endsWith(filenameShort, "-n") || endsWith(filenameShort, "_normal")) {
-        isNormal = true;
-    }
-    else if ((numChannels == 1 && isSigned) || endsWith(filenameShort, "-sdf")) {
-        isSDF = true;
+- (BOOL)loadTextureFromImage:(nonnull const char *)fullFilenameString
+                   timestamp:(double)timestamp
+                       image:(kram::KTXImage &)image
+                 imageNormal:(kram::KTXImage *)imageNormal
+                   isArchive:(BOOL)isArchive {
+  // image can be decoded to rgba8u if platform can't display format natively
+  // but still want to identify blockSize from original format
+  string fullFilename = fullFilenameString;
+
+  // Note that modstamp can change, but content data hash may be the same
+  bool isNewFile = (fullFilename != _showSettings->lastFilename);
+  bool isTextureChanged =
+      isNewFile || (timestamp != _showSettings->lastTimestamp);
+
+  if (isTextureChanged) {
+    // synchronously cpu upload from ktx file to buffer, with eventual gpu blit
+    // from buffer to returned texture.  TODO: If buffer is full, then something
+    // needs to keep KTXImage and data alive.  This load may also decode the
+    // texture to RGBA8.
+
+    MTLPixelFormat originalFormatMTL = MTLPixelFormatInvalid;
+    id<MTLTexture> texture = [_loader loadTextureFromImage:image
+                                            originalFormat:&originalFormatMTL];
+    if (!texture) {
+      return NO;
     }
-    else if (numChannels == 3 || numChannels == 4 || endsWith(filenameShort, "-a") || endsWith(filenameShort, "_basecolor")) {
-        isAlbedo = true;
+
+    // hacking in the normal texture here, so can display them together during
+    // preview
+    id<MTLTexture> normalTexture;
+    if (imageNormal) {
+      normalTexture = [_loader loadTextureFromImage:*imageNormal
+                                     originalFormat:nil];
+      if (!normalTexture) {
+        return NO;
+      }
     }
-    
-    _showSettings->isNormal = isNormal;
-    _showSettings->isSDF = isSDF;
-    
-    // textures are already premul, so don't need to premul in shader
-    // should really have 3 modes, unmul, default, premul
-    bool isPNG = isPNGFilename(filename.c_str());
-    
-    _showSettings->isPremul = false;
-    if (isAlbedo && isPNG) {
-        _showSettings->isPremul = true; // convert to premul in shader, so can see other channels
+
+    // if archive contained png, then it's been converted to ktx
+    // so the info below may not reflect original data
+    // Would need original png data to look at header
+    // This is only info on image, not on imageNormal
+
+    bool isPNG = isPNGFilename(fullFilename.c_str());
+    if (!isArchive && isPNG) {
+      _showSettings->imageInfo = kramInfoToString(fullFilename, false);
+      _showSettings->imageInfoVerbose = kramInfoToString(fullFilename, true);
+    } else {
+      _showSettings->imageInfo =
+          kramInfoKTXToString(fullFilename, image, false);
+      _showSettings->imageInfoVerbose =
+          kramInfoKTXToString(fullFilename, image, true);
     }
-    else if (isNormal || isSDF) {
-        _showSettings->isPremul = false;
+
+    _showSettings->originalFormat = (MyMTLPixelFormat)originalFormatMTL;
+    _showSettings->decodedFormat = (MyMTLPixelFormat)texture.pixelFormat;
+
+    _showSettings->lastFilename = fullFilename;
+    _showSettings->lastTimestamp = timestamp;
+
+    @autoreleasepool {
+      _colorMap = texture;
+      _normalMap = normalTexture;
     }
-        
-    _showSettings->numChannels = numChannels;
-    
-    // TODO: identify if texture holds normal data from the props
-    // have too many 4 channel normals that shouldn't swizzle like this
-    // kramTextures.py is using etc2rg on iOS for now, and not astc.
-    
-    _showSettings->isSwizzleAGToRG = false;
-
-// For best sdf and normal reconstruct from ASTC or BC3, must use RRR1 and GGGR or RRRG
-// BC1nm multiply r*a in the shader, but just use BC5 anymore.
-//    if (isASTCFormat(originalFormat) && isNormal) {
-//        // channels after = "ag01"
-//        _showSettings->isSwizzleAGToRG = true;
-//    }
-        
-    // can derive these from texture queries
-    _showSettings->mipCount = (int32_t)image.header.numberOfMipmapLevels;
-    _showSettings->faceCount = (image.textureType == MyMTLTextureTypeCube ||
-                               image.textureType == MyMTLTextureTypeCubeArray) ? 6 : 0;
-    _showSettings->arrayCount = (int32_t)image.header.numberOfArrayElements;
-    _showSettings->sliceCount = (int32_t)image.depth;
-    
-    _showSettings->imageBoundsX = (int32_t)image.width;
-    _showSettings->imageBoundsY = (int32_t)image.height;
+
+    [self updateImageSettings:fullFilename image:image];
+  }
+
+  [self resetSomeImageSettings:isNewFile];
+
+  return YES;
 }
 
-float zoom3D = 1.0f;
+- (BOOL)loadTexture:(nonnull NSURL *)url {
+  string fullFilename = url.path.UTF8String;
 
-- (void)resetSomeImageSettings:(BOOL)isNewFile {
-    
-    // only reset these on new texture, but have to revalidate
-    if (isNewFile) {
-        // then can manipulate this after loading
-        _showSettings->mipNumber = 0;
-        _showSettings->faceNumber = 0;
-        _showSettings->arrayNumber = 0;
-        _showSettings->sliceNumber = 0;
-        
-        
-        _showSettings->channels = TextureChannels::ModeRGBA;
-        
-        // wish could keep existing setting, but new texture might not
-        // be supported debugMode for new texture
-        _showSettings->debugMode = DebugMode::DebugModeNone;
-        
-        _showSettings->shapeChannel = ShapeChannel::ShapeChannelNone;
-    }
-    else {
-        // reloaded file may have different limits
-        _showSettings->mipNumber = std::min(_showSettings->mipNumber, _showSettings->mipCount);
-        _showSettings->faceNumber = std::min(_showSettings->faceNumber, _showSettings->faceCount);
-        _showSettings->arrayNumber = std::min(_showSettings->arrayNumber, _showSettings->arrayCount);
-        _showSettings->sliceNumber = std::min(_showSettings->sliceNumber, _showSettings->sliceCount);
+  // can use this to pull, or use fstat on FileHelper
+  NSDate *fileDate = nil;
+  NSError *error = nil;
+  [url getResourceValue:&fileDate
+                 forKey:NSURLContentModificationDateKey
+                  error:&error];
+
+  // DONE: tie this to url and modstamp differences
+  double timestamp = fileDate.timeIntervalSince1970;
+  bool isNewFile = (fullFilename != _showSettings->lastFilename);
+
+  bool isTextureChanged =
+      isNewFile || (timestamp != _showSettings->lastTimestamp);
+
+  // image can be decoded to rgba8u if platform can't display format natively
+  // but still want to identify blockSize from original format
+  if (isTextureChanged) {
+    // TODO: hold onto these, so can reference block data
+    KTXImage image;
+    KTXImageData imageData;
+
+    if (![_loader loadImageFromURL:url image:image imageData:imageData]) {
+      return NO;
     }
 
-    
-    [self updateViewTransforms];
-    
-    // this controls viewMatrix (global to all visible textures)
-    _showSettings->panX = 0.0f;
-    _showSettings->panY = 0.0f;
-    
-    _showSettings->zoom = _showSettings->zoomFit;
-    
-    // test rendering with inversion and mirroring and non-uniform scale
-    bool doInvertX = false;
-    bool doScaleX = false;
-    
-    // have one of these for each texture added to the viewer
-    float scaleX = MAX(1, _showSettings->imageBoundsX);
-    float scaleY = MAX(1, _showSettings->imageBoundsY);
-    float scaleZ = MAX(scaleX, scaleY); // don't want 1.0f, or specular is all off due to extreme scale differences
-    
-    float tmpScaleX = scaleX;
-    if (doInvertX) {
-        tmpScaleX = -tmpScaleX;
+    MTLPixelFormat originalFormatMTL = MTLPixelFormatInvalid;
+    id<MTLTexture> texture = [_loader loadTextureFromImage:image
+                                            originalFormat:&originalFormatMTL];
+    if (!texture) {
+      return NO;
     }
-    if (doScaleX) {
-        tmpScaleX *= 2.0f;
+
+    // This doesn't look for or load corresponding normal map, but should
+
+    // this is not the png data, but info on converted png to ktx level
+    // But this avoids loading the image 2 more times
+    // Size of png is very different than decompressed or recompressed ktx
+    bool isPNG = isPNGFilename(fullFilename.c_str());
+    if (isPNG) {
+      _showSettings->imageInfo = kramInfoToString(fullFilename, false);
+      _showSettings->imageInfoVerbose = kramInfoToString(fullFilename, true);
+    } else {
+      _showSettings->imageInfo =
+          kramInfoKTXToString(fullFilename, image, false);
+      _showSettings->imageInfoVerbose =
+          kramInfoKTXToString(fullFilename, image, true);
     }
-    
-    _modelMatrix = float4x4(float4m(tmpScaleX, scaleY, scaleZ, 1.0f)); // non uniform scale
-    _modelMatrix = _modelMatrix * matrix4x4_translation(0.0f, 0.0f, -1.0); // set z=-1 unit back
-    
-    // uniform scaled 3d primitive
-    float scale = MAX(scaleX, scaleY);
-    
-    // store the zoom into thew view matrix
-    // fragment tangents seem to break down at high model scale due to precision differences between worldPos and uv
-    static bool useZoom3D = false;
-    if (useZoom3D) {
-        zoom3D = scale; // * _showSettings->viewSizeX / 2.0f;
-        scale = 1.0;
+
+    _showSettings->originalFormat = (MyMTLPixelFormat)originalFormatMTL;
+    _showSettings->decodedFormat = (MyMTLPixelFormat)texture.pixelFormat;
+
+    _showSettings->lastFilename = fullFilename;
+    _showSettings->lastTimestamp = timestamp;
+
+    @autoreleasepool {
+      _colorMap = texture;
+      _normalMap = nil;
     }
-    
-    _modelMatrix3D = float4x4(float4m((doScaleX || doInvertX) ? tmpScaleX : scale, scale, scale, 1.0f)); // uniform scale
-    _modelMatrix3D = _modelMatrix3D * matrix4x4_translation(0.0f, 0.0f, -1.0f); // set z=-1 unit back
+
+    [self updateImageSettings:fullFilename image:image];
+  }
+
+  [self resetSomeImageSettings:isNewFile];
+
+  return YES;
 }
 
-- (float4x4)computeImageTransform:(float)panX panY:(float)panY zoom:(float)zoom {
-    // translate
-    float4x4 panTransform = matrix4x4_translation(-panX, panY, 0.0);
-    
-    // non-uniform scale is okay here, only affects ortho volume
-    // setting this to uniform zoom and object is not visible, zoom can be 20x in x and y
-    if (_showSettings->is3DView) {
-        zoom *= zoom3D;
-    }
-    
-    float4x4 viewMatrix = float4x4(float4m(zoom, zoom, 1.0f, 1.0f));
-    viewMatrix = panTransform * viewMatrix;
-    
-    // scale
-    if (_showSettings->is3DView) {
-        return _projectionMatrix * viewMatrix * _modelMatrix3D;
-    }
-    else {
-        return _projectionMatrix * viewMatrix * _modelMatrix;
-    }
+// only called on new or modstamp-changed image
+- (void)updateImageSettings:(const string &)fullFilename
+                      image:(KTXImage &)image {
+  // this is the actual format, may have been decoded
+  id<MTLTexture> texture = _colorMap;
+  MyMTLPixelFormat format = (MyMTLPixelFormat)texture.pixelFormat;
+
+  // format may be trancoded to gpu-friendly format
+  MyMTLPixelFormat originalFormat = image.pixelFormat;
+
+  _showSettings->blockX = image.blockDims().x;
+  _showSettings->blockY = image.blockDims().y;
+
+  _showSettings->isSigned = isSignedFormat(format);
+
+  string fullFilenameCopy = fullFilename;
+  string filename = toLower(fullFilenameCopy);
+
+  // set title to filename, chop this to just file+ext, not directory
+  string filenameShort = filename;
+  const char *filenameSlash = strrchr(filenameShort.c_str(), '/');
+  if (filenameSlash != nullptr) {
+    filenameShort = filenameSlash + 1;
+  }
+
+  // now chop off the extension
+  filenameShort = filenameShort.substr(0, filenameShort.find_last_of("."));
+
+  bool isAlbedo = false;
+  bool isNormal = false;
+  bool isSDF = false;
+
+  // could cycle between rrr1 and r001.
+  int32_t numChannels = numChannelsOfFormat(originalFormat);
+  bool isSigned = isSignedFormat(originalFormat);
+
+  // note that decoded textures are 3/4 channel even though they are normal/sdf
+  // originally, so test those first
+  if (numChannels == 2 || endsWith(filenameShort, "-n") ||
+      endsWith(filenameShort, "_normal")) {
+    isNormal = true;
+  } else if ((numChannels == 1 && isSigned) ||
+             endsWith(filenameShort, "-sdf")) {
+    isSDF = true;
+  } else if (numChannels == 3 || numChannels == 4 ||
+             endsWith(filenameShort, "-a") ||
+             endsWith(filenameShort, "_basecolor")) {
+    isAlbedo = true;
+  }
+
+  _showSettings->isNormal = isNormal;
+  _showSettings->isSDF = isSDF;
+
+  // textures are already premul, so don't need to premul in shader
+  // should really have 3 modes, unmul, default, premul
+  bool isPNG = isPNGFilename(filename.c_str());
+
+  _showSettings->isPremul = false;
+  if (isAlbedo && isPNG) {
+    _showSettings->isPremul =
+        true; // convert to premul in shader, so can see other channels
+  } else if (isNormal || isSDF) {
+    _showSettings->isPremul = false;
+  }
+
+  _showSettings->numChannels = numChannels;
+
+  // TODO: identify if texture holds normal data from the props
+  // have too many 4 channel normals that shouldn't swizzle like this
+  // kramTextures.py is using etc2rg on iOS for now, and not astc.
+
+  _showSettings->isSwizzleAGToRG = false;
+
+  // For best sdf and normal reconstruct from ASTC or BC3, must use RRR1 and
+  // GGGR or RRRG BC1nm multiply r*a in the shader, but just use BC5 anymore.
+  //    if (isASTCFormat(originalFormat) && isNormal) {
+  //        // channels after = "ag01"
+  //        _showSettings->isSwizzleAGToRG = true;
+  //    }
+
+  // can derive these from texture queries
+  _showSettings->mipCount = (int32_t)image.header.numberOfMipmapLevels;
+  _showSettings->faceCount = (image.textureType == MyMTLTextureTypeCube ||
+                              image.textureType == MyMTLTextureTypeCubeArray)
+                                 ? 6
+                                 : 0;
+  _showSettings->arrayCount = (int32_t)image.header.numberOfArrayElements;
+  _showSettings->sliceCount = (int32_t)image.depth;
+
+  _showSettings->imageBoundsX = (int32_t)image.width;
+  _showSettings->imageBoundsY = (int32_t)image.height;
 }
 
-bool almost_equal_elements(float3 v, float tol) {
-    return (fabs(v.x - v.y) < tol) && (fabs(v.x - v.z) < tol);
+float zoom3D = 1.0f;
+
+- (void)resetSomeImageSettings:(BOOL)isNewFile {
+
+  // only reset these on new texture, but have to revalidate
+  if (isNewFile) {
+    // then can manipulate this after loading
+    _showSettings->mipNumber = 0;
+    _showSettings->faceNumber = 0;
+    _showSettings->arrayNumber = 0;
+    _showSettings->sliceNumber = 0;
+
+    _showSettings->channels = TextureChannels::ModeRGBA;
+
+    // wish could keep existing setting, but new texture might not
+    // be supported debugMode for new texture
+    _showSettings->debugMode = DebugMode::DebugModeNone;
+
+    _showSettings->shapeChannel = ShapeChannel::ShapeChannelNone;
+  } else {
+    // reloaded file may have different limits
+    _showSettings->mipNumber =
+        std::min(_showSettings->mipNumber, _showSettings->mipCount);
+    _showSettings->faceNumber =
+        std::min(_showSettings->faceNumber, _showSettings->faceCount);
+    _showSettings->arrayNumber =
+        std::min(_showSettings->arrayNumber, _showSettings->arrayCount);
+    _showSettings->sliceNumber =
+        std::min(_showSettings->sliceNumber, _showSettings->sliceCount);
+  }
+
+  [self updateViewTransforms];
+
+  // this controls viewMatrix (global to all visible textures)
+  _showSettings->panX = 0.0f;
+  _showSettings->panY = 0.0f;
+
+  _showSettings->zoom = _showSettings->zoomFit;
+
+  // test rendering with inversion and mirroring and non-uniform scale
+  bool doInvertX = false;
+  bool doScaleX = false;
+
+  // have one of these for each texture added to the viewer
+  float scaleX = MAX(1, _showSettings->imageBoundsX);
+  float scaleY = MAX(1, _showSettings->imageBoundsY);
+  float scaleZ = MAX(scaleX, scaleY); // don't want 1.0f, or specular is all off
+                                      // due to extreme scale differences
+
+  float tmpScaleX = scaleX;
+  if (doInvertX) {
+    tmpScaleX = -tmpScaleX;
+  }
+  if (doScaleX) {
+    tmpScaleX *= 2.0f;
+  }
+
+  _modelMatrix =
+      float4x4(float4m(tmpScaleX, scaleY, scaleZ, 1.0f)); // non uniform scale
+  _modelMatrix = _modelMatrix *
+                 matrix4x4_translation(0.0f, 0.0f, -1.0); // set z=-1 unit back
+
+  // uniform scaled 3d primitive
+  float scale = MAX(scaleX, scaleY);
+
+  // store the zoom into thew view matrix
+  // fragment tangents seem to break down at high model scale due to precision
+  // differences between worldPos and uv
+  static bool useZoom3D = false;
+  if (useZoom3D) {
+    zoom3D = scale; // * _showSettings->viewSizeX / 2.0f;
+    scale = 1.0;
+  }
+
+  _modelMatrix3D = float4x4(float4m((doScaleX || doInvertX) ? tmpScaleX : scale,
+                                    scale, scale, 1.0f)); // uniform scale
+  _modelMatrix3D =
+      _modelMatrix3D *
+      matrix4x4_translation(0.0f, 0.0f, -1.0f); // set z=-1 unit back
 }
 
-const float3x3& toFloat3x3(const float4x4& m) {
-    return (const float3x3&)m;
+- (float4x4)computeImageTransform:(float)panX
+                             panY:(float)panY
+                             zoom:(float)zoom {
+  // translate
+  float4x4 panTransform = matrix4x4_translation(-panX, panY, 0.0);
+
+  // non-uniform scale is okay here, only affects ortho volume
+  // setting this to uniform zoom and object is not visible, zoom can be 20x in
+  // x and y
+  if (_showSettings->is3DView) {
+    zoom *= zoom3D;
+  }
+
+  float4x4 viewMatrix = float4x4(float4m(zoom, zoom, 1.0f, 1.0f));
+  viewMatrix = panTransform * viewMatrix;
+
+  // scale
+  if (_showSettings->is3DView) {
+    return _projectionMatrix * viewMatrix * _modelMatrix3D;
+  } else {
+    return _projectionMatrix * viewMatrix * _modelMatrix;
+  }
 }
 
-float4 inverseScaleSquared(const float4x4& m) {
-    float3 scaleSquared = float3m(
-        length_squared(m.columns[0].xyz),
-        length_squared(m.columns[1].xyz),
-        length_squared(m.columns[2].xyz));
-    
-    // if uniform, then set scaleSquared all to 1
-    if (almost_equal_elements(scaleSquared, 1e-5)) {
-        scaleSquared = float3m(1.0);
-    }
-   
-    // don't divide by 0
-    float3 invScaleSquared = recip(simd::max(float3m(0.0001 * 0.0001), scaleSquared));
-        
-    // identify determinant here for flipping orientation
-    // all shapes with negative determinant need orientation flipped for backfacing
-    // and need to be grouned together if rendering with instancing
-    float det = determinant(toFloat3x3(m));
-    
-    return float4m(invScaleSquared, det);
+bool almost_equal_elements(float3 v, float tol) {
+  return (fabs(v.x - v.y) < tol) && (fabs(v.x - v.z) < tol);
 }
 
-- (void)_updateGameState
-{
-    /// Update any game state before encoding rendering commands to our drawable
-
-    Uniforms& uniforms = *(Uniforms*)_dynamicUniformBuffer[_uniformBufferIndex].contents;
-
-    uniforms.isNormal = _showSettings->isNormal;
-    uniforms.isPremul = _showSettings->isPremul;
-    uniforms.isSigned = _showSettings->isSigned;
-    uniforms.isSwizzleAGToRG = _showSettings->isSwizzleAGToRG;
-    
-    uniforms.isSDF = _showSettings->isSDF;
-    uniforms.numChannels = _showSettings->numChannels;
-    uniforms.lightingMode = (ShaderLightingMode)_showSettings->lightingMode;
-    
-    MyMTLTextureType textureType = MyMTLTextureType2D;
-    MyMTLPixelFormat textureFormat = MyMTLPixelFormatInvalid;
-    if (_colorMap) {
-        textureType = (MyMTLTextureType)_colorMap.textureType;
-        textureFormat = (MyMTLPixelFormat)_colorMap.pixelFormat;
-    }
-    
-    uniforms.isCheckerboardShown = _showSettings->isCheckerboardShown;
-    
-    // addressing mode
-    bool isCube = (textureType == MyMTLTextureTypeCube || textureType == MyMTLTextureTypeCubeArray);
-    bool doWrap = !isCube &&  _showSettings->isWrap;
-    bool doEdge = !doWrap;
-    bool doZero = !doEdge;
-    uniforms.isWrap = doWrap ? _showSettings->isWrap : false;
-    
-    uniforms.isPreview = _showSettings->isPreview;
-    
-    uniforms.isNormalMapPreview = false;
-    if (uniforms.isPreview) {
-        uniforms.isNormalMapPreview = uniforms.isNormal || (_normalMap != nil);
-        
-        if (_normalMap != nil) {
-            uniforms.isNormalMapSigned = isSignedFormat((MyMTLPixelFormat)_normalMap.pixelFormat);
-            uniforms.isNormalMapSwizzleAGToRG = false; // TODO: need a prop for this
-        }
-    }
-    
-    // a few things to fix before enabling this
-    uniforms.useTangent = _showSettings->useTangent;
-        
-    uniforms.gridX = 0;
-    uniforms.gridY = 0;
-    
-    if (_showSettings->isPixelGridShown) {
-        uniforms.gridX = 1;
-        uniforms.gridY = 1;
-    }
-    else if (_showSettings->isBlockGridShown) {
-        if (_showSettings->blockX > 1) {
-            uniforms.gridX = _showSettings->blockX;
-            uniforms.gridY = _showSettings->blockY;
-        }
-    }
-    else if (_showSettings->isAtlasGridShown) {
-        uniforms.gridX = _showSettings->gridSizeX;
-        uniforms.gridY = _showSettings->gridSizeY;
-    }
-    
-    // no debug mode when preview kicks on, make it possible to toggle back and forth more easily
-    uniforms.debugMode = (ShaderDebugMode)_showSettings->debugMode;
-    uniforms.shapeChannel = (ShaderShapeChannel)_showSettings->shapeChannel;
-    uniforms.channels = (ShaderTextureChannels)_showSettings->channels;
+const float3x3 &toFloat3x3(const float4x4 &m) { return (const float3x3 &)m; }
 
-    // turn these off in preview mode, but they may be useful?
-    if (_showSettings->isPreview) {
-        uniforms.debugMode = ShaderDebugMode::ShDebugModeNone;
-        uniforms.shapeChannel = ShaderShapeChannel::ShShapeChannelNone;
-    }
-   
-    // crude shape experiment
-    _showSettings->is3DView = true;
-    switch(_showSettings->meshNumber) {
-        case 0: _mesh = _meshBox; _showSettings->is3DView = false; break;
-        case 1: _mesh = _meshBox; break;
-        case 2: _mesh = _meshSphere; break;
-        case 3: _mesh = _meshSphereMirrored; break;
-        //case 3: _mesh = _meshCylinder; break;
-        case 4: _mesh = _meshCapsule; break;
-    }
-    uniforms.is3DView = _showSettings->is3DView;
-   
-    // on small textures can really see missing pixel (3 instead of 4 pixels)
-    // so only do this on the sphere/capsule which wrap-around uv space
-    uniforms.isInsetByHalfPixel = false;
-    if (_showSettings->meshNumber >= 2 && doZero) {
-        uniforms.isInsetByHalfPixel = true;
-    }
-    
-    // translate
-    float4x4 panTransform = matrix4x4_translation(-_showSettings->panX, _showSettings->panY, 0.0);
-    
-    // scale
-    float zoom = _showSettings->zoom;
-    
-    if (_showSettings->is3DView) {
-        _viewMatrix3D = float4x4(float4m(zoom, zoom, 1.0f, 1.0f)); // non-uniform
-        _viewMatrix3D = panTransform * _viewMatrix3D;
-        
-        // viewMatrix should typically be the inverse
-        //_viewMatrix = simd_inverse(_viewMatrix3D);
-       
-        float4x4 projectionViewMatrix = _projectionMatrix * _viewMatrix3D;
-        uniforms.projectionViewMatrix = projectionViewMatrix;
-        
-        // works when only one texture, but switch to projectViewMatrix
-        uniforms.modelMatrix = _modelMatrix3D;
-       
-        uniforms.modelMatrixInvScale2 = inverseScaleSquared(_modelMatrix3D);
-        
-        _showSettings->isInverted = uniforms.modelMatrixInvScale2.w < 0.0f;
-        
-        // this was stored so view could use it, but now that code calcs the transform via computeImageTransform
-        _showSettings->projectionViewModelMatrix = uniforms.projectionViewMatrix * uniforms.modelMatrix;
-        
-        // cache the camera position
-        uniforms.cameraPosition = inverse(_viewMatrix3D).columns[3].xyz; // this is all ortho
-    }
-    else {
-        _viewMatrix = float4x4(float4m(zoom, zoom, 1.0f, 1.0f));
-        _viewMatrix = panTransform * _viewMatrix;
-        
-        // viewMatrix should typically be the inverse
-        //_viewMatrix = simd_inverse(_viewMatrix3D);
-       
-        float4x4 projectionViewMatrix = _projectionMatrix * _viewMatrix;
-        uniforms.projectionViewMatrix = projectionViewMatrix;
-        
-        // works when only one texture, but switch to projectViewMatrix
-        uniforms.modelMatrix = _modelMatrix;
-       
-        uniforms.modelMatrixInvScale2 = inverseScaleSquared(_modelMatrix);
-        
-        _showSettings->isInverted = uniforms.modelMatrixInvScale2.w < 0.0f;
-        
-        // this was stored so view could use it, but now that code calcs the transform via computeImageTransform
-        _showSettings->projectionViewModelMatrix = uniforms.projectionViewMatrix * uniforms.modelMatrix ;
-        
-        // cache the camera position
-        uniforms.cameraPosition = inverse(_viewMatrix).columns[3].xyz; // this is all ortho
-    }
+float4 inverseScaleSquared(const float4x4 &m) {
+  float3 scaleSquared = float3m(length_squared(m.columns[0].xyz),
+                                length_squared(m.columns[1].xyz),
+                                length_squared(m.columns[2].xyz));
 
-    //_rotation += .01;
+  // if uniform, then set scaleSquared all to 1
+  if (almost_equal_elements(scaleSquared, 1e-5)) {
+    scaleSquared = float3m(1.0);
+  }
+
+  // don't divide by 0
+  float3 invScaleSquared =
+      recip(simd::max(float3m(0.0001 * 0.0001), scaleSquared));
+
+  // identify determinant here for flipping orientation
+  // all shapes with negative determinant need orientation flipped for
+  // backfacing and need to be grouned together if rendering with instancing
+  float det = determinant(toFloat3x3(m));
+
+  return float4m(invScaleSquared, det);
 }
 
-- (void)_setUniformsLevel:(UniformsLevel&)uniforms mipLOD:(int32_t)mipLOD
-{
-    uniforms.mipLOD = mipLOD;
-    
-    uniforms.arrayOrSlice = 0;
-    uniforms.face  = 0;
-
-    uniforms.textureSize = float4m(0.0f);
-    MyMTLTextureType textureType = MyMTLTextureType2D;
-    if (_colorMap) {
-        textureType = (MyMTLTextureType)_colorMap.textureType;
-        uniforms.textureSize = float4m(_colorMap.width, _colorMap.height, 1.0f/_colorMap.width, 1.0f/_colorMap.height);
+- (void)_updateGameState {
+  /// Update any game state before encoding rendering commands to our drawable
+
+  Uniforms &uniforms =
+      *(Uniforms *)_dynamicUniformBuffer[_uniformBufferIndex].contents;
+
+  uniforms.isNormal = _showSettings->isNormal;
+  uniforms.isPremul = _showSettings->isPremul;
+  uniforms.isSigned = _showSettings->isSigned;
+  uniforms.isSwizzleAGToRG = _showSettings->isSwizzleAGToRG;
+
+  uniforms.isSDF = _showSettings->isSDF;
+  uniforms.numChannels = _showSettings->numChannels;
+  uniforms.lightingMode = (ShaderLightingMode)_showSettings->lightingMode;
+
+  MyMTLTextureType textureType = MyMTLTextureType2D;
+  MyMTLPixelFormat textureFormat = MyMTLPixelFormatInvalid;
+  if (_colorMap) {
+    textureType = (MyMTLTextureType)_colorMap.textureType;
+    textureFormat = (MyMTLPixelFormat)_colorMap.pixelFormat;
+  }
+
+  uniforms.isCheckerboardShown = _showSettings->isCheckerboardShown;
+
+  // addressing mode
+  bool isCube = (textureType == MyMTLTextureTypeCube ||
+                 textureType == MyMTLTextureTypeCubeArray);
+  bool doWrap = !isCube && _showSettings->isWrap;
+  bool doEdge = !doWrap;
+  bool doZero = !doEdge;
+  uniforms.isWrap = doWrap ? _showSettings->isWrap : false;
+
+  uniforms.isPreview = _showSettings->isPreview;
+
+  uniforms.isNormalMapPreview = false;
+  if (uniforms.isPreview) {
+    uniforms.isNormalMapPreview = uniforms.isNormal || (_normalMap != nil);
+
+    if (_normalMap != nil) {
+      uniforms.isNormalMapSigned =
+          isSignedFormat((MyMTLPixelFormat)_normalMap.pixelFormat);
+      uniforms.isNormalMapSwizzleAGToRG = false; // TODO: need a prop for this
     }
-    
-    // TODO: set texture specific uniforms, but using single _colorMap for now
-    switch(textureType) {
-        case MyMTLTextureType2D:
-            // nothing
-            break;
-        case MyMTLTextureType3D:
-            uniforms.arrayOrSlice = _showSettings->sliceNumber;
-            break;
-        case MyMTLTextureTypeCube:
-            uniforms.face = _showSettings->faceNumber;
-            break;
-            
-        case MyMTLTextureTypeCubeArray:
-            uniforms.face = _showSettings->faceNumber;
-            uniforms.arrayOrSlice = _showSettings->arrayNumber;
-            break;
-        case MyMTLTextureType2DArray:
-            uniforms.arrayOrSlice = _showSettings->arrayNumber;
-            break;
-        case MyMTLTextureType1DArray:
-            uniforms.arrayOrSlice = _showSettings->arrayNumber;
-            break;
-        
-        default:
-            break;
+  }
+
+  // a few things to fix before enabling this
+  uniforms.useTangent = _showSettings->useTangent;
+
+  uniforms.gridX = 0;
+  uniforms.gridY = 0;
+
+  if (_showSettings->isPixelGridShown) {
+    uniforms.gridX = 1;
+    uniforms.gridY = 1;
+  } else if (_showSettings->isBlockGridShown) {
+    if (_showSettings->blockX > 1) {
+      uniforms.gridX = _showSettings->blockX;
+      uniforms.gridY = _showSettings->blockY;
     }
+  } else if (_showSettings->isAtlasGridShown) {
+    uniforms.gridX = _showSettings->gridSizeX;
+    uniforms.gridY = _showSettings->gridSizeY;
+  }
+
+  // no debug mode when preview kicks on, make it possible to toggle back and
+  // forth more easily
+  uniforms.debugMode = (ShaderDebugMode)_showSettings->debugMode;
+  uniforms.shapeChannel = (ShaderShapeChannel)_showSettings->shapeChannel;
+  uniforms.channels = (ShaderTextureChannels)_showSettings->channels;
+
+  // turn these off in preview mode, but they may be useful?
+  if (_showSettings->isPreview) {
+    uniforms.debugMode = ShaderDebugMode::ShDebugModeNone;
+    uniforms.shapeChannel = ShaderShapeChannel::ShShapeChannelNone;
+  }
+
+  // crude shape experiment
+  _showSettings->is3DView = true;
+  switch (_showSettings->meshNumber) {
+  case 0:
+    _mesh = _meshBox;
+    _showSettings->is3DView = false;
+    break;
+  case 1:
+    _mesh = _meshBox;
+    break;
+  case 2:
+    _mesh = _meshSphere;
+    break;
+  case 3:
+    _mesh = _meshSphereMirrored;
+    break;
+  // case 3: _mesh = _meshCylinder; break;
+  case 4:
+    _mesh = _meshCapsule;
+    break;
+  }
+  uniforms.is3DView = _showSettings->is3DView;
+
+  // on small textures can really see missing pixel (3 instead of 4 pixels)
+  // so only do this on the sphere/capsule which wrap-around uv space
+  uniforms.isInsetByHalfPixel = false;
+  if (_showSettings->meshNumber >= 2 && doZero) {
+    uniforms.isInsetByHalfPixel = true;
+  }
+
+  // translate
+  float4x4 panTransform =
+      matrix4x4_translation(-_showSettings->panX, _showSettings->panY, 0.0);
+
+  // scale
+  float zoom = _showSettings->zoom;
+
+  if (_showSettings->is3DView) {
+    _viewMatrix3D = float4x4(float4m(zoom, zoom, 1.0f, 1.0f)); // non-uniform
+    _viewMatrix3D = panTransform * _viewMatrix3D;
+
+    // viewMatrix should typically be the inverse
+    //_viewMatrix = simd_inverse(_viewMatrix3D);
+
+    float4x4 projectionViewMatrix = _projectionMatrix * _viewMatrix3D;
+    uniforms.projectionViewMatrix = projectionViewMatrix;
+
+    // works when only one texture, but switch to projectViewMatrix
+    uniforms.modelMatrix = _modelMatrix3D;
+
+    uniforms.modelMatrixInvScale2 = inverseScaleSquared(_modelMatrix3D);
+
+    _showSettings->isInverted = uniforms.modelMatrixInvScale2.w < 0.0f;
+
+    // this was stored so view could use it, but now that code calcs the
+    // transform via computeImageTransform
+    _showSettings->projectionViewModelMatrix =
+        uniforms.projectionViewMatrix * uniforms.modelMatrix;
+
+    // cache the camera position
+    uniforms.cameraPosition =
+        inverse(_viewMatrix3D).columns[3].xyz; // this is all ortho
+  } else {
+    _viewMatrix = float4x4(float4m(zoom, zoom, 1.0f, 1.0f));
+    _viewMatrix = panTransform * _viewMatrix;
+
+    // viewMatrix should typically be the inverse
+    //_viewMatrix = simd_inverse(_viewMatrix3D);
+
+    float4x4 projectionViewMatrix = _projectionMatrix * _viewMatrix;
+    uniforms.projectionViewMatrix = projectionViewMatrix;
+
+    // works when only one texture, but switch to projectViewMatrix
+    uniforms.modelMatrix = _modelMatrix;
+
+    uniforms.modelMatrixInvScale2 = inverseScaleSquared(_modelMatrix);
+
+    _showSettings->isInverted = uniforms.modelMatrixInvScale2.w < 0.0f;
+
+    // this was stored so view could use it, but now that code calcs the
+    // transform via computeImageTransform
+    _showSettings->projectionViewModelMatrix =
+        uniforms.projectionViewMatrix * uniforms.modelMatrix;
+
+    // cache the camera position
+    uniforms.cameraPosition =
+        inverse(_viewMatrix).columns[3].xyz; // this is all ortho
+  }
+
+  //_rotation += .01;
 }
 
-- (void)drawInMTKView:(nonnull MTKView *)view
-{
-    @autoreleasepool {
-        
-        /// Per frame updates here
-
-        // TODO: move this out, needs to get called off mouseMove, but don't want to call drawMain
-        [self drawSample];
-        
-        dispatch_semaphore_wait(_inFlightSemaphore, DISPATCH_TIME_FOREVER);
-
-        _uniformBufferIndex = (_uniformBufferIndex + 1) % MaxBuffersInFlight;
-
-        id<MTLCommandBuffer> commandBuffer = [_commandQueue commandBuffer];
-        commandBuffer.label = @"MyCommand";
-
-        __block dispatch_semaphore_t block_sema = _inFlightSemaphore;
-        [commandBuffer addCompletedHandler:^(id<MTLCommandBuffer> /* buffer */)
-         {
-             dispatch_semaphore_signal(block_sema);
-         }];
-
-        [self _updateGameState];
-        
-        // use to autogen mipmaps if needed, might eliminate this since it's always box filter
-        // TODO: do mips via kram instead, but was useful for pow-2 mip comparisons.
-        
-        // also use to readback pixels
-        // also use for async texture upload
-        id<MTLBlitCommandEncoder> blitEncoder = [commandBuffer blitCommandEncoder];
-        if (blitEncoder)
-        {
-            blitEncoder.label = @"MyBlitEncoder";
-            [_loader uploadTexturesIfNeeded:blitEncoder commandBuffer:commandBuffer];
-            [blitEncoder endEncoding];
-        }
-        
-        
-        [self drawMain:commandBuffer view:view];
-        
-        // hold onto this for sampling from it via eyedropper
-        _lastDrawableTexture = view.currentDrawable.texture;
-        
-        [commandBuffer presentDrawable:view.currentDrawable];
-        [commandBuffer commit];
-    }
+- (void)_setUniformsLevel:(UniformsLevel &)uniforms mipLOD:(int32_t)mipLOD {
+  uniforms.mipLOD = mipLOD;
+
+  uniforms.arrayOrSlice = 0;
+  uniforms.face = 0;
+
+  uniforms.textureSize = float4m(0.0f);
+  MyMTLTextureType textureType = MyMTLTextureType2D;
+  if (_colorMap) {
+    textureType = (MyMTLTextureType)_colorMap.textureType;
+    uniforms.textureSize =
+        float4m(_colorMap.width, _colorMap.height, 1.0f / _colorMap.width,
+                1.0f / _colorMap.height);
+  }
+
+  // TODO: set texture specific uniforms, but using single _colorMap for now
+  switch (textureType) {
+  case MyMTLTextureType2D:
+    // nothing
+    break;
+  case MyMTLTextureType3D:
+    uniforms.arrayOrSlice = _showSettings->sliceNumber;
+    break;
+  case MyMTLTextureTypeCube:
+    uniforms.face = _showSettings->faceNumber;
+    break;
+
+  case MyMTLTextureTypeCubeArray:
+    uniforms.face = _showSettings->faceNumber;
+    uniforms.arrayOrSlice = _showSettings->arrayNumber;
+    break;
+  case MyMTLTextureType2DArray:
+    uniforms.arrayOrSlice = _showSettings->arrayNumber;
+    break;
+  case MyMTLTextureType1DArray:
+    uniforms.arrayOrSlice = _showSettings->arrayNumber;
+    break;
+
+  default:
+    break;
+  }
 }
 
-- (void)drawMain:(id<MTLCommandBuffer>)commandBuffer view:(nonnull MTKView *)view {
-    // Delay getting the currentRenderPassDescriptor until absolutely needed. This avoids
-    //   holding onto the drawable and blocking the display pipeline any longer than necessary
-    MTLRenderPassDescriptor* renderPassDescriptor = view.currentRenderPassDescriptor;
+- (void)drawInMTKView:(nonnull MTKView *)view {
+  @autoreleasepool {
 
-    if (renderPassDescriptor == nil) {
-        return;
-    }
-    
-    if (_colorMap == nil) {
-        // this will clear target
-        id<MTLRenderCommandEncoder> renderEncoder =
-            [commandBuffer renderCommandEncoderWithDescriptor:renderPassDescriptor];
-        
-        if (renderEncoder) {
-            renderEncoder.label = @"MainRender";
-            [renderEncoder endEncoding];
-        }
-        
-        return;
+    /// Per frame updates here
+
+    // TODO: move this out, needs to get called off mouseMove, but don't want to
+    // call drawMain
+    [self drawSample];
+
+    dispatch_semaphore_wait(_inFlightSemaphore, DISPATCH_TIME_FOREVER);
+
+    _uniformBufferIndex = (_uniformBufferIndex + 1) % MaxBuffersInFlight;
+
+    id<MTLCommandBuffer> commandBuffer = [_commandQueue commandBuffer];
+    commandBuffer.label = @"MyCommand";
+
+    __block dispatch_semaphore_t block_sema = _inFlightSemaphore;
+    [commandBuffer addCompletedHandler:^(id<MTLCommandBuffer> /* buffer */) {
+      dispatch_semaphore_signal(block_sema);
+    }];
+
+    [self _updateGameState];
+
+    // use to autogen mipmaps if needed, might eliminate this since it's always
+    // box filter
+    // TODO: do mips via kram instead, but was useful for pow-2 mip comparisons.
+
+    // also use to readback pixels
+    // also use for async texture upload
+    id<MTLBlitCommandEncoder> blitEncoder = [commandBuffer blitCommandEncoder];
+    if (blitEncoder) {
+      blitEncoder.label = @"MyBlitEncoder";
+      [_loader uploadTexturesIfNeeded:blitEncoder commandBuffer:commandBuffer];
+      [blitEncoder endEncoding];
     }
-    
-    // Final pass rendering code here
+
+    [self drawMain:commandBuffer view:view];
+
+    // hold onto this for sampling from it via eyedropper
+    _lastDrawableTexture = view.currentDrawable.texture;
+
+    [commandBuffer presentDrawable:view.currentDrawable];
+    [commandBuffer commit];
+  }
+}
+
+- (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
+            view:(nonnull MTKView *)view {
+  // Delay getting the currentRenderPassDescriptor until absolutely needed. This
+  // avoids
+  //   holding onto the drawable and blocking the display pipeline any longer
+  //   than necessary
+  MTLRenderPassDescriptor *renderPassDescriptor =
+      view.currentRenderPassDescriptor;
+
+  if (renderPassDescriptor == nil) {
+    return;
+  }
+
+  if (_colorMap == nil) {
+    // this will clear target
     id<MTLRenderCommandEncoder> renderEncoder =
-    [commandBuffer renderCommandEncoderWithDescriptor:renderPassDescriptor];
-    if (!renderEncoder) {
-        return;
+        [commandBuffer renderCommandEncoderWithDescriptor:renderPassDescriptor];
+
+    if (renderEncoder) {
+      renderEncoder.label = @"MainRender";
+      [renderEncoder endEncoding];
     }
-    
-    renderEncoder.label = @"MainRender";
-
-    // set raster state
-    [renderEncoder setFrontFacingWinding:_showSettings->isInverted ?
-                        MTLWindingCounterClockwise : MTLWindingCounterClockwise];
-    [renderEncoder setCullMode:MTLCullModeBack];
-    [renderEncoder setDepthStencilState:_depthStateFull];
-
-    [renderEncoder pushDebugGroup:@"DrawShape"];
-
-    // set the mesh shape
-    for (NSUInteger bufferIndex = 0; bufferIndex < _mesh.vertexBuffers.count; bufferIndex++)
-    {
-        MTKMeshBuffer *vertexBuffer = _mesh.vertexBuffers[bufferIndex];
-        if((NSNull*)vertexBuffer != [NSNull null])
-        {
-            [renderEncoder setVertexBuffer:vertexBuffer.buffer
-                                    offset:vertexBuffer.offset
-                                   atIndex:bufferIndex];
-        }
+
+    return;
+  }
+
+  // Final pass rendering code here
+  id<MTLRenderCommandEncoder> renderEncoder =
+      [commandBuffer renderCommandEncoderWithDescriptor:renderPassDescriptor];
+  if (!renderEncoder) {
+    return;
+  }
+
+  renderEncoder.label = @"MainRender";
+
+  // set raster state
+  [renderEncoder setFrontFacingWinding:_showSettings->isInverted
+                                           ? MTLWindingCounterClockwise
+                                           : MTLWindingCounterClockwise];
+  [renderEncoder setCullMode:MTLCullModeBack];
+  [renderEncoder setDepthStencilState:_depthStateFull];
+
+  [renderEncoder pushDebugGroup:@"DrawShape"];
+
+  // set the mesh shape
+  for (NSUInteger bufferIndex = 0; bufferIndex < _mesh.vertexBuffers.count;
+       bufferIndex++) {
+    MTKMeshBuffer *vertexBuffer = _mesh.vertexBuffers[bufferIndex];
+    if ((NSNull *)vertexBuffer != [NSNull null]) {
+      [renderEncoder setVertexBuffer:vertexBuffer.buffer
+                              offset:vertexBuffer.offset
+                             atIndex:bufferIndex];
+    }
+  }
+
+  //---------------------------------------
+  // figure out the sampler
+
+  id<MTLSamplerState> sampler;
+
+  MyMTLTextureType textureType = (MyMTLTextureType)_colorMap.textureType;
+
+  bool isCube = (textureType == MyMTLTextureTypeCube ||
+                 textureType == MyMTLTextureTypeCubeArray);
+  bool doWrap = !isCube && _showSettings->isWrap;
+  bool doEdge = !doWrap;
+
+  if (_showSettings->isPreview) {
+    sampler = doWrap ? _colorMapSamplerFilterWrap
+                     : (doEdge ? _colorMapSamplerFilterEdge
+                               : _colorMapSamplerFilterBorder);
+  } else {
+    sampler = doWrap ? _colorMapSamplerNearestWrap
+                     : (doEdge ? _colorMapSamplerNearestEdge
+                               : _colorMapSamplerNearestBorder);
+  }
+
+  //---------------------------------------
+  // for (texture in _textures) // TODO: setup
+  // if (_colorMap)
+  {
+    // TODO: set texture specific uniforms, but using single _colorMap for now
+    switch (_colorMap.textureType) {
+    case MTLTextureType1DArray:
+      [renderEncoder setRenderPipelineState:_pipelineState1DArray];
+      break;
+
+    case MTLTextureType2D:
+      [renderEncoder setRenderPipelineState:_pipelineStateImage];
+      break;
+
+    case MTLTextureType2DArray:
+      [renderEncoder setRenderPipelineState:_pipelineStateImageArray];
+      break;
+
+    case MTLTextureType3D:
+      [renderEncoder setRenderPipelineState:_pipelineStateVolume];
+      break;
+    case MTLTextureTypeCube:
+      [renderEncoder setRenderPipelineState:_pipelineStateCube];
+      break;
+    case MTLTextureTypeCubeArray:
+      [renderEncoder setRenderPipelineState:_pipelineStateCubeArray];
+      break;
+
+    default:
+      break;
     }
 
-    //---------------------------------------
-    // figure out the sampler
+    id<MTLBuffer> uniformBuffer = _dynamicUniformBuffer[_uniformBufferIndex];
+    [renderEncoder setVertexBuffer:uniformBuffer
+                            offset:0
+                           atIndex:BufferIndexUniforms];
 
-    id <MTLSamplerState> sampler;
+    [renderEncoder setFragmentBuffer:uniformBuffer
+                              offset:0
+                             atIndex:BufferIndexUniforms];
 
-    MyMTLTextureType textureType = (MyMTLTextureType)_colorMap.textureType;
-    
-    bool isCube = (textureType == MyMTLTextureTypeCube || textureType == MyMTLTextureTypeCubeArray);
-    bool doWrap = !isCube && _showSettings->isWrap;
-    bool doEdge = !doWrap;
-    
-    if (_showSettings->isPreview) {
-        sampler = doWrap ? _colorMapSamplerFilterWrap : (doEdge ? _colorMapSamplerFilterEdge : _colorMapSamplerFilterBorder);
-    }
-    else {
-        sampler = doWrap ? _colorMapSamplerNearestWrap : (doEdge ? _colorMapSamplerNearestEdge : _colorMapSamplerNearestBorder);
+    // set the texture up
+    [renderEncoder setFragmentTexture:_colorMap atIndex:TextureIndexColor];
+
+    // setup normal map
+    if (_normalMap && _showSettings->isPreview) {
+      [renderEncoder setFragmentTexture:_normalMap atIndex:TextureIndexNormal];
     }
-    
-    //---------------------------------------
-    //for (texture in _textures) // TODO: setup
-    //if (_colorMap)
-    {
-        // TODO: set texture specific uniforms, but using single _colorMap for now
-        switch(_colorMap.textureType) {
-            case MTLTextureType1DArray:
-                [renderEncoder setRenderPipelineState:_pipelineState1DArray];
-                break;
-                
-            case MTLTextureType2D:
-                [renderEncoder setRenderPipelineState:_pipelineStateImage];
-                break;
-                
-            case MTLTextureType2DArray:
-                [renderEncoder setRenderPipelineState:_pipelineStateImageArray];
-                break;
-                
-            case MTLTextureType3D:
-                [renderEncoder setRenderPipelineState:_pipelineStateVolume];
-                break;
-            case MTLTextureTypeCube:
-                [renderEncoder setRenderPipelineState:_pipelineStateCube];
-                break;
-            case MTLTextureTypeCubeArray:
-                [renderEncoder setRenderPipelineState:_pipelineStateCubeArray];
-                break;
-                
-            default:
-                break;
-        }
-        
-        id<MTLBuffer> uniformBuffer = _dynamicUniformBuffer[_uniformBufferIndex];
-        [renderEncoder setVertexBuffer:uniformBuffer
-                                offset:0
-                               atIndex:BufferIndexUniforms];
-
-        [renderEncoder setFragmentBuffer:uniformBuffer
-                                  offset:0
-                                 atIndex:BufferIndexUniforms];
-
-        
-        // set the texture up
-        [renderEncoder setFragmentTexture:_colorMap
-                                  atIndex:TextureIndexColor];
-        
-        // setup normal map
-        if (_normalMap && _showSettings->isPreview)
-        {
-            [renderEncoder setFragmentTexture:_normalMap
-                                      atIndex:TextureIndexNormal];
-        }
 
-        UniformsLevel uniformsLevel;
-        uniformsLevel.drawOffset = float2m(0.0f);
-        
-        if (_showSettings->isPreview) {
-            // upload this on each face drawn, since want to be able to draw all mips/levels at once
-            [self _setUniformsLevel:uniformsLevel mipLOD:_showSettings->mipNumber];
-            
-            [renderEncoder setVertexBytes:&uniformsLevel
-                                    length:sizeof(uniformsLevel)
-                                   atIndex:BufferIndexUniformsLevel];
-
-            [renderEncoder setFragmentBytes:&uniformsLevel
-                                      length:sizeof(uniformsLevel)
-                                     atIndex:BufferIndexUniformsLevel];
-            
-            // use exisiting lod, and mip
-            [renderEncoder setFragmentSamplerState:sampler atIndex:SamplerIndexColor];
-            
-            for(MTKSubmesh *submesh in _mesh.submeshes)
-            {
-                [renderEncoder drawIndexedPrimitives:submesh.primitiveType
-                                          indexCount:submesh.indexCount
-                                           indexType:submesh.indexType
-                                         indexBuffer:submesh.indexBuffer.buffer
-                                   indexBufferOffset:submesh.indexBuffer.offset];
-            }
-            
-        }
-        else if (_showSettings->isShowingAllLevelsAndMips) {
-            int32_t w = _colorMap.width;
-            int32_t h = _colorMap.height;
-            //int32_t d = _colorMap.depth;
-                        
-            // gap the contact sheet, note this 2 pixels is scaled on small textures by the zoom
-            int32_t gap = _showSettings->showAllPixelGap; // * _showSettings->viewContentScaleFactor;
-            
-            for (int32_t mip = 0; mip < _showSettings->mipCount; ++mip) {
-                
-                // upload this on each face drawn, since want to be able to draw all mips/levels at once
-                [self _setUniformsLevel:uniformsLevel mipLOD:mip];
-                
-                if (mip == 0) {
-                    uniformsLevel.drawOffset.y = 0.0f;
-                }
-                else {
-                    // all mips draw at top mip size currently
-                    uniformsLevel.drawOffset.y -= h + gap;
-                }
-                
-                // this its ktxImage.totalChunks()
-                int32_t numLevels =  _showSettings->totalChunks();
-                
-                for (int32_t level = 0; level < numLevels; ++level) {
-                    
-                    if (isCube) {
-                        uniformsLevel.face = level % 6;
-                        uniformsLevel.arrayOrSlice = level / 6;
-                    }
-                    else {
-                        uniformsLevel.arrayOrSlice = level;
-                    }
-                    
-                    // advance x across faces/slices/array elements, 1d array and 2d thin array are weird though.
-                    if (level == 0) {
-                        uniformsLevel.drawOffset.x = 0.0f;
-                    }
-                    else {
-                        uniformsLevel.drawOffset.x += w + gap;
-                    }
-                    
-                    [renderEncoder setVertexBytes:&uniformsLevel
-                                            length:sizeof(uniformsLevel)
-                                           atIndex:BufferIndexUniformsLevel];
-
-                    [renderEncoder setFragmentBytes:&uniformsLevel
-                                              length:sizeof(uniformsLevel)
-                                             atIndex:BufferIndexUniformsLevel];
-                    
-                    // force lod, and don't mip
-                    [renderEncoder setFragmentSamplerState:sampler
-                                               lodMinClamp:mip
-                                               lodMaxClamp:mip + 1
-                                                   atIndex:SamplerIndexColor];
-                
-
-                    // TODO: since this isn't a preview, have mode to display all faces and mips on on screen
-                    // faces and arrays and slices go across in a row,
-                    // and mips are displayed down from each of those in a column
-                    
-                    for(MTKSubmesh *submesh in _mesh.submeshes)
-                    {
-                        [renderEncoder drawIndexedPrimitives:submesh.primitiveType
-                                                  indexCount:submesh.indexCount
-                                                   indexType:submesh.indexType
-                                                 indexBuffer:submesh.indexBuffer.buffer
-                                           indexBufferOffset:submesh.indexBuffer.offset];
-                    }
-                }
-            }
+    UniformsLevel uniformsLevel;
+    uniformsLevel.drawOffset = float2m(0.0f);
+
+    if (_showSettings->isPreview) {
+      // upload this on each face drawn, since want to be able to draw all
+      // mips/levels at once
+      [self _setUniformsLevel:uniformsLevel mipLOD:_showSettings->mipNumber];
+
+      [renderEncoder setVertexBytes:&uniformsLevel
+                             length:sizeof(uniformsLevel)
+                            atIndex:BufferIndexUniformsLevel];
+
+      [renderEncoder setFragmentBytes:&uniformsLevel
+                               length:sizeof(uniformsLevel)
+                              atIndex:BufferIndexUniformsLevel];
+
+      // use exisiting lod, and mip
+      [renderEncoder setFragmentSamplerState:sampler atIndex:SamplerIndexColor];
+
+      for (MTKSubmesh *submesh in _mesh.submeshes) {
+        [renderEncoder drawIndexedPrimitives:submesh.primitiveType
+                                  indexCount:submesh.indexCount
+                                   indexType:submesh.indexType
+                                 indexBuffer:submesh.indexBuffer.buffer
+                           indexBufferOffset:submesh.indexBuffer.offset];
+      }
+
+    } else if (_showSettings->isShowingAllLevelsAndMips) {
+      int32_t w = _colorMap.width;
+      int32_t h = _colorMap.height;
+      // int32_t d = _colorMap.depth;
+
+      // gap the contact sheet, note this 2 pixels is scaled on small textures
+      // by the zoom
+      int32_t gap =
+          _showSettings
+              ->showAllPixelGap; // * _showSettings->viewContentScaleFactor;
+
+      for (int32_t mip = 0; mip < _showSettings->mipCount; ++mip) {
+
+        // upload this on each face drawn, since want to be able to draw all
+        // mips/levels at once
+        [self _setUniformsLevel:uniformsLevel mipLOD:mip];
+
+        if (mip == 0) {
+          uniformsLevel.drawOffset.y = 0.0f;
+        } else {
+          // all mips draw at top mip size currently
+          uniformsLevel.drawOffset.y -= h + gap;
         }
-        else {
-            int32_t mip = _showSettings->mipNumber;
-            
-            // upload this on each face drawn, since want to be able to draw all mips/levels at once
-            [self _setUniformsLevel:uniformsLevel mipLOD:mip];
-            
-            [renderEncoder setVertexBytes:&uniformsLevel
-                                    length:sizeof(uniformsLevel)
-                                   atIndex:BufferIndexUniformsLevel];
-
-            [renderEncoder setFragmentBytes:&uniformsLevel
-                                      length:sizeof(uniformsLevel)
-                                     atIndex:BufferIndexUniformsLevel];
-            
-            // force lod, and don't mip
-            [renderEncoder setFragmentSamplerState:sampler
-                                       lodMinClamp:mip
-                                       lodMaxClamp:mip + 1
-                                           atIndex:SamplerIndexColor];
-        
-
-            // TODO: since this isn't a preview, have mode to display all faces and mips on on screen
-            // faces and arrays and slices go across in a row,
-            // and mips are displayed down from each of those in a column
-            
-            for(MTKSubmesh *submesh in _mesh.submeshes)
-            {
-                [renderEncoder drawIndexedPrimitives:submesh.primitiveType
-                                          indexCount:submesh.indexCount
-                                           indexType:submesh.indexType
-                                         indexBuffer:submesh.indexBuffer.buffer
-                                   indexBufferOffset:submesh.indexBuffer.offset];
-            }
+
+        // this its ktxImage.totalChunks()
+        int32_t numLevels = _showSettings->totalChunks();
+
+        for (int32_t level = 0; level < numLevels; ++level) {
+
+          if (isCube) {
+            uniformsLevel.face = level % 6;
+            uniformsLevel.arrayOrSlice = level / 6;
+          } else {
+            uniformsLevel.arrayOrSlice = level;
+          }
+
+          // advance x across faces/slices/array elements, 1d array and 2d thin
+          // array are weird though.
+          if (level == 0) {
+            uniformsLevel.drawOffset.x = 0.0f;
+          } else {
+            uniformsLevel.drawOffset.x += w + gap;
+          }
+
+          [renderEncoder setVertexBytes:&uniformsLevel
+                                 length:sizeof(uniformsLevel)
+                                atIndex:BufferIndexUniformsLevel];
+
+          [renderEncoder setFragmentBytes:&uniformsLevel
+                                   length:sizeof(uniformsLevel)
+                                  atIndex:BufferIndexUniformsLevel];
+
+          // force lod, and don't mip
+          [renderEncoder setFragmentSamplerState:sampler
+                                     lodMinClamp:mip
+                                     lodMaxClamp:mip + 1
+                                         atIndex:SamplerIndexColor];
+
+          // TODO: since this isn't a preview, have mode to display all faces
+          // and mips on on screen faces and arrays and slices go across in a
+          // row, and mips are displayed down from each of those in a column
+
+          for (MTKSubmesh *submesh in _mesh.submeshes) {
+            [renderEncoder drawIndexedPrimitives:submesh.primitiveType
+                                      indexCount:submesh.indexCount
+                                       indexType:submesh.indexType
+                                     indexBuffer:submesh.indexBuffer.buffer
+                               indexBufferOffset:submesh.indexBuffer.offset];
+          }
         }
+      }
+    } else {
+      int32_t mip = _showSettings->mipNumber;
+
+      // upload this on each face drawn, since want to be able to draw all
+      // mips/levels at once
+      [self _setUniformsLevel:uniformsLevel mipLOD:mip];
+
+      [renderEncoder setVertexBytes:&uniformsLevel
+                             length:sizeof(uniformsLevel)
+                            atIndex:BufferIndexUniformsLevel];
+
+      [renderEncoder setFragmentBytes:&uniformsLevel
+                               length:sizeof(uniformsLevel)
+                              atIndex:BufferIndexUniformsLevel];
+
+      // force lod, and don't mip
+      [renderEncoder setFragmentSamplerState:sampler
+                                 lodMinClamp:mip
+                                 lodMaxClamp:mip + 1
+                                     atIndex:SamplerIndexColor];
+
+      // TODO: since this isn't a preview, have mode to display all faces and
+      // mips on on screen faces and arrays and slices go across in a row, and
+      // mips are displayed down from each of those in a column
+
+      for (MTKSubmesh *submesh in _mesh.submeshes) {
+        [renderEncoder drawIndexedPrimitives:submesh.primitiveType
+                                  indexCount:submesh.indexCount
+                                   indexType:submesh.indexType
+                                 indexBuffer:submesh.indexBuffer.buffer
+                           indexBufferOffset:submesh.indexBuffer.offset];
+      }
     }
-    
-    [renderEncoder popDebugGroup];
+  }
+
+  [renderEncoder popDebugGroup];
 
-    [renderEncoder endEncoding];
-    
-    // TODO: run any post-processing on each texture visible as fsw
-    // TODO: environment map preview should be done as fsq
+  [renderEncoder endEncoding];
+
+  // TODO: run any post-processing on each texture visible as fsw
+  // TODO: environment map preview should be done as fsq
 }
 
 // want to run samples independent of redrawing the main view
-- (void)drawSample
-{
-    if (_colorMap == nil) {
-        return;
+- (void)drawSample {
+  if (_colorMap == nil) {
+    return;
+  }
+
+  // this can occur during a resize
+  if (!_lastDrawableTexture)
+    return;
+
+  id<MTLCommandBuffer> commandBuffer = [_commandQueue commandBuffer];
+  if (!commandBuffer)
+    return;
+
+  commandBuffer.label = @"MyCommand";
+
+  // this reads directly from compressed texture via a compute shader
+  int32_t textureLookupX = _showSettings->textureLookupX;
+  int32_t textureLookupY = _showSettings->textureLookupY;
+
+  bool isDrawableBlit = _showSettings->isEyedropperFromDrawable();
+
+  // TODO: only don't blit for plane + no debug or shape
+  // otherwise want the pixel under the cursor, but this may include grid mixed
+  // in and other debug overlays
+  if (isDrawableBlit) {
+    MTLOrigin srcOrigin =
+        MTLOriginMake(_showSettings->cursorX, _showSettings->cursorY, 0);
+    srcOrigin.x *= _showSettings->viewContentScaleFactor;
+    srcOrigin.y *= _showSettings->viewContentScaleFactor;
+
+    if ((srcOrigin.x >= 0 && srcOrigin.x < _lastDrawableTexture.width) &&
+        (srcOrigin.y >= 0 && srcOrigin.y < _lastDrawableTexture.height)) {
+
+      // Note: here we don't know the uv in original texture, would have to
+      // write that out to another texture.  Also on shapes, texel may not
+      // change but lighting might.
+
+      // can simply blit the color out of the render buffer
+      id<MTLBlitCommandEncoder> blitCommandEncoder =
+          [commandBuffer blitCommandEncoder];
+      if (blitCommandEncoder) {
+        [blitCommandEncoder copyFromTexture:_lastDrawableTexture
+                                sourceSlice:0
+                                sourceLevel:0
+                               sourceOrigin:srcOrigin
+                                 sourceSize:MTLSizeMake(1, 1, 1)
+                                  toTexture:_sampleRenderTex
+                           destinationSlice:0
+                           destinationLevel:0
+                          destinationOrigin:MTLOriginMake(0, 0, 0)];
+        [blitCommandEncoder synchronizeResource:_sampleRenderTex];
+        [blitCommandEncoder endEncoding];
+      }
     }
-    
-    // this can occur during a resize
-    if (!_lastDrawableTexture)
-        return;
-    
-    id <MTLCommandBuffer> commandBuffer = [_commandQueue commandBuffer];
-    if (!commandBuffer)
-        return;
-    
-    commandBuffer.label = @"MyCommand";
+  } else {
 
-    // this reads directly from compressed texture via a compute shader
-    int32_t textureLookupX = _showSettings->textureLookupX;
-    int32_t textureLookupY = _showSettings->textureLookupY;
-    
-    bool isDrawableBlit = _showSettings->isEyedropperFromDrawable();
-    
-    // TODO: only don't blit for plane + no debug or shape
-    // otherwise want the pixel under the cursor, but this may include grid mixed in and other debug overlays
-    if (isDrawableBlit) {
-        MTLOrigin srcOrigin = MTLOriginMake(_showSettings->cursorX, _showSettings->cursorY, 0);
-        srcOrigin.x *= _showSettings->viewContentScaleFactor;
-        srcOrigin.y *= _showSettings->viewContentScaleFactor;
-        
-        if ((srcOrigin.x >= 0 && srcOrigin.x < _lastDrawableTexture.width) &&
-            (srcOrigin.y >= 0 && srcOrigin.y < _lastDrawableTexture.height))
-        {
-            
-            // Note: here we don't know the uv in original texture, would have to write that out to another
-            // texture.  Also on shapes, texel may not change but lighting might.
-            
-            // can simply blit the color out of the render buffer
-            id <MTLBlitCommandEncoder> blitCommandEncoder = [commandBuffer blitCommandEncoder];
-            if (blitCommandEncoder) {
-                [blitCommandEncoder copyFromTexture:_lastDrawableTexture
-                                        sourceSlice:0 sourceLevel:0 sourceOrigin:srcOrigin sourceSize:MTLSizeMake(1,1,1)
-                                          toTexture:_sampleRenderTex
-                                   destinationSlice:0 destinationLevel:0 destinationOrigin:MTLOriginMake(0,0,0)
-                ];
-                [blitCommandEncoder synchronizeResource:_sampleRenderTex];
-                [blitCommandEncoder endEncoding];
-            }
-        }
-    }
-    else {
-        
-        int32_t textureLookupMipX = _showSettings->textureLookupMipX;
-        int32_t textureLookupMipY = _showSettings->textureLookupMipY;
-        
-        [self drawSamples:commandBuffer lookupX:textureLookupMipX lookupY:textureLookupMipY];
-        
-        // Synchronize the managed texture.
-        id <MTLBlitCommandEncoder> blitCommandEncoder = [commandBuffer blitCommandEncoder];
-        if (blitCommandEncoder) {
-            [blitCommandEncoder synchronizeResource:_sampleComputeTex];
-            [blitCommandEncoder endEncoding];
-        }
-    }
-    
-    // After synchonization, copy value back to the cpu
-    id<MTLTexture> texture = isDrawableBlit ? _sampleRenderTex : _sampleComputeTex;
-    
-    [commandBuffer addCompletedHandler:^(id<MTLCommandBuffer> buffer)
-    {
-        if (buffer.error != nil) {
-            return;
-        }
-        // only 1 pixel in the texture right now
-        float4 data;
-        
-        // copy from texture back to CPU, might be easier using MTLBuffer.contents
-        MTLRegion region = {
-            { 0, 0, 0 }, // MTLOrigin
-            { 1, 1, 1 }  // MTLSize
-        };
-        
-        if (isDrawableBlit) {
-            half4 data16f;
-            [texture getBytes:&data16f bytesPerRow:8 fromRegion:region mipmapLevel:0];
-            data = toFloat4(data16f);
-        }
-        else {
-            [texture getBytes:&data bytesPerRow:16 fromRegion:region mipmapLevel:0];
-        }
-        
-        // return the value at the sample
-        self->_showSettings->textureResult = data;
-        self->_showSettings->textureResultX = textureLookupX;
-        self->_showSettings->textureResultY = textureLookupY;
-        
-        //printf("Color %f %f %f %f\n", data.x, data.y, data.z, data.w);
-    }];
-    
-    [commandBuffer commit];
-}
+    int32_t textureLookupMipX = _showSettings->textureLookupMipX;
+    int32_t textureLookupMipY = _showSettings->textureLookupMipY;
 
+    [self drawSamples:commandBuffer
+              lookupX:textureLookupMipX
+              lookupY:textureLookupMipY];
 
-- (void)drawSamples:(id<MTLCommandBuffer>)commandBuffer lookupX:(int32_t)lookupX lookupY:(int32_t)lookupY {
-    
-    // Final pass rendering code here
-    id<MTLComputeCommandEncoder> renderEncoder = [commandBuffer computeCommandEncoder];
-    if (!renderEncoder)
-        return;
-    
-    renderEncoder.label = @"SampleCompute";
+    // Synchronize the managed texture.
+    id<MTLBlitCommandEncoder> blitCommandEncoder =
+        [commandBuffer blitCommandEncoder];
+    if (blitCommandEncoder) {
+      [blitCommandEncoder synchronizeResource:_sampleComputeTex];
+      [blitCommandEncoder endEncoding];
+    }
+  }
 
-    [renderEncoder pushDebugGroup:@"DrawShape"];
+  // After synchonization, copy value back to the cpu
+  id<MTLTexture> texture =
+      isDrawableBlit ? _sampleRenderTex : _sampleComputeTex;
 
-    UniformsCS uniforms;
-    uniforms.uv.x = lookupX;
-    uniforms.uv.y = lookupY;
-    
-    uniforms.face = _showSettings->faceNumber;
-    uniforms.arrayOrSlice = _showSettings->arrayNumber;
-    if (_showSettings->sliceNumber) {
-        uniforms.arrayOrSlice = _showSettings->sliceNumber;
+  [commandBuffer addCompletedHandler:^(id<MTLCommandBuffer> buffer) {
+    if (buffer.error != nil) {
+      return;
     }
-    uniforms.mipLOD = _showSettings->mipNumber;
-    
-    // run compute here, don't need a shape
-    switch(_colorMap.textureType) {
-        case MTLTextureType1DArray:
-            [renderEncoder setComputePipelineState:_pipelineState1DArrayCS];
-            break;
-            
-        case MTLTextureType2D:
-            [renderEncoder setComputePipelineState:_pipelineStateImageCS];
-            break;
-            
-        case MTLTextureType2DArray:
-            [renderEncoder setComputePipelineState:_pipelineStateImageArrayCS];
-            break;
-            
-        case MTLTextureType3D:
-            [renderEncoder setComputePipelineState:_pipelineStateVolumeCS];
-            break;
-        case MTLTextureTypeCube:
-            [renderEncoder setComputePipelineState:_pipelineStateCubeCS];
-            break;
-        case MTLTextureTypeCubeArray:
-            [renderEncoder setComputePipelineState:_pipelineStateCubeArrayCS];
-            break;
-            
-        default:
-            break;
+    // only 1 pixel in the texture right now
+    float4 data;
+
+    // copy from texture back to CPU, might be easier using MTLBuffer.contents
+    MTLRegion region = {
+        {0, 0, 0}, // MTLOrigin
+        {1, 1, 1}  // MTLSize
+    };
+
+    if (isDrawableBlit) {
+      half4 data16f;
+      [texture getBytes:&data16f bytesPerRow:8 fromRegion:region mipmapLevel:0];
+      data = toFloat4(data16f);
+    } else {
+      [texture getBytes:&data bytesPerRow:16 fromRegion:region mipmapLevel:0];
     }
 
-    // input and output texture
-    [renderEncoder setTexture:_colorMap
-                              atIndex:TextureIndexColor];
-    
-    [renderEncoder setTexture:_sampleComputeTex
-                      atIndex:TextureIndexSamples];
-    
-    [renderEncoder setBytes:&uniforms length:sizeof(UniformsCS) atIndex:BufferIndexUniformsCS];
-    
-    // sample and copy back pixels off the offset
-    [renderEncoder dispatchThreads:MTLSizeMake(1,1,1) threadsPerThreadgroup:MTLSizeMake(1,1,1)];
-    
-    [renderEncoder popDebugGroup];
-    [renderEncoder endEncoding];
+    // return the value at the sample
+    self->_showSettings->textureResult = data;
+    self->_showSettings->textureResultX = textureLookupX;
+    self->_showSettings->textureResultY = textureLookupY;
+
+    // printf("Color %f %f %f %f\n", data.x, data.y, data.z, data.w);
+  }];
+
+  [commandBuffer commit];
+}
+
+- (void)drawSamples:(id<MTLCommandBuffer>)commandBuffer
+            lookupX:(int32_t)lookupX
+            lookupY:(int32_t)lookupY {
+
+  // Final pass rendering code here
+  id<MTLComputeCommandEncoder> renderEncoder =
+      [commandBuffer computeCommandEncoder];
+  if (!renderEncoder)
+    return;
+
+  renderEncoder.label = @"SampleCompute";
+
+  [renderEncoder pushDebugGroup:@"DrawShape"];
+
+  UniformsCS uniforms;
+  uniforms.uv.x = lookupX;
+  uniforms.uv.y = lookupY;
+
+  uniforms.face = _showSettings->faceNumber;
+  uniforms.arrayOrSlice = _showSettings->arrayNumber;
+  if (_showSettings->sliceNumber) {
+    uniforms.arrayOrSlice = _showSettings->sliceNumber;
+  }
+  uniforms.mipLOD = _showSettings->mipNumber;
+
+  // run compute here, don't need a shape
+  switch (_colorMap.textureType) {
+  case MTLTextureType1DArray:
+    [renderEncoder setComputePipelineState:_pipelineState1DArrayCS];
+    break;
+
+  case MTLTextureType2D:
+    [renderEncoder setComputePipelineState:_pipelineStateImageCS];
+    break;
+
+  case MTLTextureType2DArray:
+    [renderEncoder setComputePipelineState:_pipelineStateImageArrayCS];
+    break;
+
+  case MTLTextureType3D:
+    [renderEncoder setComputePipelineState:_pipelineStateVolumeCS];
+    break;
+  case MTLTextureTypeCube:
+    [renderEncoder setComputePipelineState:_pipelineStateCubeCS];
+    break;
+  case MTLTextureTypeCubeArray:
+    [renderEncoder setComputePipelineState:_pipelineStateCubeArrayCS];
+    break;
+
+  default:
+    break;
+  }
+
+  // input and output texture
+  [renderEncoder setTexture:_colorMap atIndex:TextureIndexColor];
+
+  [renderEncoder setTexture:_sampleComputeTex atIndex:TextureIndexSamples];
+
+  [renderEncoder setBytes:&uniforms
+                   length:sizeof(UniformsCS)
+                  atIndex:BufferIndexUniformsCS];
+
+  // sample and copy back pixels off the offset
+  [renderEncoder dispatchThreads:MTLSizeMake(1, 1, 1)
+           threadsPerThreadgroup:MTLSizeMake(1, 1, 1)];
+
+  [renderEncoder popDebugGroup];
+  [renderEncoder endEncoding];
 }
 
+- (void)mtkView:(nonnull MTKView *)view drawableSizeWillChange:(CGSize)size {
+  // Don't crashing trying to readback from the cached drawable during a resize.
+  _lastDrawableTexture = nil;
+
+  /// Respond to drawable size or orientation changes here
+  _showSettings->viewSizeX = size.width;
+  _showSettings->viewSizeY = size.height;
 
-- (void)mtkView:(nonnull MTKView *)view drawableSizeWillChange:(CGSize)size
-{
-    // Don't crashing trying to readback from the cached drawable during a resize.
-    _lastDrawableTexture = nil;
-    
-    /// Respond to drawable size or orientation changes here
-    _showSettings->viewSizeX = size.width;
-    _showSettings->viewSizeY = size.height;
-    
-    // TODO: only set this when size changes, but for now keep setting here and adjust zoom
-    CGFloat framebufferScale = view.window.screen.backingScaleFactor ? view.window.screen.backingScaleFactor : NSScreen.mainScreen.backingScaleFactor;
-    
-    _showSettings->viewContentScaleFactor = framebufferScale;
-    
-    [self updateViewTransforms];
+  // TODO: only set this when size changes, but for now keep setting here and
+  // adjust zoom
+  CGFloat framebufferScale = view.window.screen.backingScaleFactor
+                                 ? view.window.screen.backingScaleFactor
+                                 : NSScreen.mainScreen.backingScaleFactor;
+
+  _showSettings->viewContentScaleFactor = framebufferScale;
+
+  [self updateViewTransforms];
 }
 
 - (void)updateViewTransforms {
-    
-    //float aspect = size.width / (float)size.height;
-    //_projectionMatrix = perspective_rhs(45.0f * (M_PI / 180.0f), aspect, 0.1f, 100.0f);
-    _projectionMatrix = orthographic_rhs(_showSettings->viewSizeX, _showSettings->viewSizeY, 0.1f, 100000.0f, _showSettings->isReverseZ);
-    
-    // DONE: adjust zoom to fit the entire image to the window
-    _showSettings->zoomFit = MIN((float)_showSettings->viewSizeX,  (float)_showSettings->viewSizeY) /
-        MAX(1, MAX((float)_showSettings->imageBoundsX, (float)_showSettings->imageBoundsY));
-    
-    // already using drawableSize which includes scale
-    // TODO: remove contentScaleFactor of view, this can be 1.0 to 2.0f
-    // why does this always report 2x even when I change monitor res.
-    //_showSettings->zoomFit /= _showSettings->viewContentScaleFactor;
-}
 
+  // float aspect = size.width / (float)size.height;
+  //_projectionMatrix = perspective_rhs(45.0f * (M_PI / 180.0f), aspect, 0.1f,
+  //100.0f);
+  _projectionMatrix =
+      orthographic_rhs(_showSettings->viewSizeX, _showSettings->viewSizeY, 0.1f,
+                       100000.0f, _showSettings->isReverseZ);
+
+  // DONE: adjust zoom to fit the entire image to the window
+  _showSettings->zoomFit =
+      MIN((float)_showSettings->viewSizeX, (float)_showSettings->viewSizeY) /
+      MAX(1, MAX((float)_showSettings->imageBoundsX,
+                 (float)_showSettings->imageBoundsY));
+
+  // already using drawableSize which includes scale
+  // TODO: remove contentScaleFactor of view, this can be 1.0 to 2.0f
+  // why does this always report 2x even when I change monitor res.
+  //_showSettings->zoomFit /= _showSettings->viewContentScaleFactor;
+}
 
 @end
diff --git a/kramv/KramShaders.h b/kramv/KramShaders.h
index 932c732b..261c814f 100644
--- a/kramv/KramShaders.h
+++ b/kramv/KramShaders.h
@@ -13,174 +13,166 @@
 #import <simd/simd.h>
 
 #ifdef __METAL_VERSION__
-    #define NS_ENUM(_type, _name) enum _name : _type _name; enum _name : _type
+#define NS_ENUM(_type, _name)                                                  \
+  enum _name : _type _name;                                                    \
+  enum _name : _type
 #endif
 
+typedef NS_ENUM(int32_t, BufferIndex) {
+  // mesh
+  BufferIndexMeshPosition = 0, // pos
+  BufferIndexMeshUV0 = 1,      // uv
+  BufferIndexMeshNormal = 2,   // normals
+  BufferIndexMeshTangent = 3,  // normals
 
-typedef NS_ENUM(int32_t, BufferIndex)
-{
-    // mesh
-    BufferIndexMeshPosition  = 0, // pos
-    BufferIndexMeshUV0       = 1, // uv
-    BufferIndexMeshNormal    = 2, // normals
-    BufferIndexMeshTangent   = 3, // normals
-    
-    BufferIndexUniforms      = 16,
-    BufferIndexUniformsLevel = 17,
-    
-    // for compute
-    BufferIndexUniformsCS = 16,
+  BufferIndexUniforms = 16,
+  BufferIndexUniformsLevel = 17,
+
+  // for compute
+  BufferIndexUniformsCS = 16,
 };
 
-typedef NS_ENUM(int32_t, VertexAttribute)
-{
-    VertexAttributePosition  = 0,
-    VertexAttributeTexcoord  = 1,
-    VertexAttributeNormal    = 2,
-    VertexAttributeTangent   = 3,
+typedef NS_ENUM(int32_t, VertexAttribute) {
+  VertexAttributePosition = 0,
+  VertexAttributeTexcoord = 1,
+  VertexAttributeNormal = 2,
+  VertexAttributeTangent = 3,
 };
 
-typedef NS_ENUM(int32_t, TextureIndex)
-{
-    TextureIndexColor = 0,
-    TextureIndexNormal = 1,
-    
-    TextureIndexSamples = 2, // used for compute
+typedef NS_ENUM(int32_t, TextureIndex) {
+  TextureIndexColor = 0,
+  TextureIndexNormal = 1,
+
+  TextureIndexSamples = 2, // used for compute
 };
 
-typedef NS_ENUM(int32_t, SamplerIndex)
-{
-    SamplerIndexColor = 0,
+typedef NS_ENUM(int32_t, SamplerIndex) {
+  SamplerIndexColor = 0,
 };
 
 // keep in sync with enum TextureChannels
-typedef NS_ENUM(int32_t, ShaderTextureChannels)
-{
-    ShModeRGBA = 0,
-    
-    ShModeR001 = 1,
-    ShMode0G01 = 2,
-    ShMode00B1 = 3,
-    
-    // see grayscale channels
-//    ShModeRRR1 = 5,
-//    ShModeGGG1 = 6,
-//    ShModeBBB1 = 7,
-    ShModeAAA1 = 8,
+typedef NS_ENUM(int32_t, ShaderTextureChannels) {
+  ShModeRGBA = 0,
+
+  ShModeR001 = 1,
+  ShMode0G01 = 2,
+  ShMode00B1 = 3,
+
+  // see grayscale channels
+  //    ShModeRRR1 = 5,
+  //    ShModeGGG1 = 6,
+  //    ShModeBBB1 = 7,
+  ShModeAAA1 = 8,
 };
 
 // keep in sync with enum DebugMode
-typedef NS_ENUM(int32_t, ShaderDebugMode)
-{
-    ShDebugModeNone = 0,
-    ShDebugModeTransparent, // alpha < 255
-    ShDebugModeNonZero, // any(rgba) > 0
-    ShDebugModeColor,
-    ShDebugModeGray,
-    ShDebugModeHDR,
-    
-    ShDebugModePosX,
-    ShDebugModePosY,
-    ShDebugModeCircleXY,
-    
-    ShDebugModeCount
+typedef NS_ENUM(int32_t, ShaderDebugMode) {
+  ShDebugModeNone = 0,
+  ShDebugModeTransparent, // alpha < 255
+  ShDebugModeNonZero,     // any(rgba) > 0
+  ShDebugModeColor,
+  ShDebugModeGray,
+  ShDebugModeHDR,
+
+  ShDebugModePosX,
+  ShDebugModePosY,
+  ShDebugModeCircleXY,
+
+  ShDebugModeCount
 };
 
 // keep in sync with enum ShapeChannel
-typedef NS_ENUM(int32_t, ShaderShapeChannel)
-{
-    ShShapeChannelNone = 0,
-
-    ShShapeChannelDepth,
-    
-    ShShapeChannelUV0,
-    
-    ShShapeChannelFaceNormal,
-    
-    ShShapeChannelNormal,
-    ShShapeChannelTangent,
-    ShShapeChannelBitangent,
-    
-    ShShapeChannelMipLevel,
-    
-    // ShShapeChannelBumpNormal,
+typedef NS_ENUM(int32_t, ShaderShapeChannel) {
+  ShShapeChannelNone = 0,
+
+  ShShapeChannelDepth,
+
+  ShShapeChannelUV0,
+
+  ShShapeChannelFaceNormal,
+
+  ShShapeChannelNormal,
+  ShShapeChannelTangent,
+  ShShapeChannelBitangent,
+
+  ShShapeChannelMipLevel,
+
+  // ShShapeChannelBumpNormal,
 };
 
-typedef NS_ENUM(int32_t, ShaderLightingMode)
-{
-    ShLightingModeDiffuse = 0,
-    ShLightingModeSpecular,
+typedef NS_ENUM(int32_t, ShaderLightingMode) {
+  ShLightingModeDiffuse = 0,
+  ShLightingModeSpecular,
 };
 
 // TODO: placement of these elements in the struct breaks transfer
 // of data. This seems to work.  Alignment issues with mixing these differently.
-struct Uniforms
-{
-    simd::float4x4 projectionViewMatrix;
-    simd::float4x4 modelMatrix;
-    simd::float4 modelMatrixInvScale2; // to supply inverse, w is determinant
-    simd::float3 cameraPosition; // world-space
-    
-    bool isSigned;
-    bool isNormal;
-    bool isSwizzleAGToRG;
-    bool isPremul;
-
-    bool isCheckerboardShown;
-    bool isWrap;
-    bool isSDF;
-    bool isPreview;
-    
-    bool is3DView;
-    bool isNormalMapPreview; // for isNormal or combined
-    
-    bool isNormalMapSigned;
-    bool isNormalMapSwizzleAGToRG;
-    
-    // this is used on wrap-around objects to avoid black transparent using clampToZero
-    bool isInsetByHalfPixel;
-    
-    // this means pull tangent from vertex
-    bool useTangent;
-    
-    uint32_t numChannels;
-    
-    // control the pixel grid dimensions, can be block size, or pixel size
-    uint32_t gridX;
-    uint32_t gridY;
-    
-    // View pixels that meet criteria of the debugMode
-    ShaderDebugMode debugMode;
-    
-    // View various aspects of shape geometry (depth, normal, tangent, ...)
-    ShaderShapeChannel shapeChannel;
-    
-    // View the r,g,b,a channels of the texture
-    ShaderTextureChannels channels; // mask
-    
-    // Can turn on/off specular
-    ShaderLightingMode lightingMode;
+struct Uniforms {
+  simd::float4x4 projectionViewMatrix;
+  simd::float4x4 modelMatrix;
+  simd::float4 modelMatrixInvScale2; // to supply inverse, w is determinant
+  simd::float3 cameraPosition;       // world-space
+
+  bool isSigned;
+  bool isNormal;
+  bool isSwizzleAGToRG;
+  bool isPremul;
+
+  bool isCheckerboardShown;
+  bool isWrap;
+  bool isSDF;
+  bool isPreview;
+
+  bool is3DView;
+  bool isNormalMapPreview; // for isNormal or combined
+
+  bool isNormalMapSigned;
+  bool isNormalMapSwizzleAGToRG;
+
+  // this is used on wrap-around objects to avoid black transparent using
+  // clampToZero
+  bool isInsetByHalfPixel;
+
+  // this means pull tangent from vertex
+  bool useTangent;
+
+  uint32_t numChannels;
+
+  // control the pixel grid dimensions, can be block size, or pixel size
+  uint32_t gridX;
+  uint32_t gridY;
+
+  // View pixels that meet criteria of the debugMode
+  ShaderDebugMode debugMode;
+
+  // View various aspects of shape geometry (depth, normal, tangent, ...)
+  ShaderShapeChannel shapeChannel;
+
+  // View the r,g,b,a channels of the texture
+  ShaderTextureChannels channels; // mask
+
+  // Can turn on/off specular
+  ShaderLightingMode lightingMode;
 };
 
-// uploaded separately, so multiple mips, faces, array can be drawn to the screen at one time
-// although modelMatrix offset changes.  Could store offset in here.
+// uploaded separately, so multiple mips, faces, array can be drawn to the
+// screen at one time although modelMatrix offset changes.  Could store offset
+// in here.
 struct UniformsLevel {
-    uint32_t mipLOD;
-    uint32_t face;
-    uint32_t arrayOrSlice;
-    simd::float2 drawOffset; // pixel offset to apply
-    simd::float4 textureSize; // width, height, 1/width, 1/height
+  uint32_t mipLOD;
+  uint32_t face;
+  uint32_t arrayOrSlice;
+  simd::float2 drawOffset;  // pixel offset to apply
+  simd::float4 textureSize; // width, height, 1/width, 1/height
 };
 
 // This is all tied to a single level sample
-struct UniformsCS
-{
-    simd::uint2 uv;
-    
-    uint32_t arrayOrSlice;
-    uint32_t face;
-    uint32_t mipLOD;
-};
+struct UniformsCS {
+  simd::uint2 uv;
 
-#endif 
+  uint32_t arrayOrSlice;
+  uint32_t face;
+  uint32_t mipLOD;
+};
 
+#endif
diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index c0a38b4e..8d41c1e9 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -1,219 +1,279 @@
 #include "KramViewerBase.h"
 
-namespace kram
-{
+namespace kram {
 using namespace simd;
 using namespace NAMESPACE_STL;
 
 int32_t ShowSettings::totalChunks() const {
-    int32_t one = 1;
-    return std::max(one, faceCount) * std::max(one, arrayCount) * std::max(one, sliceCount);
+  int32_t one = 1;
+  return std::max(one, faceCount) * std::max(one, arrayCount) *
+         std::max(one, sliceCount);
 }
 
-const char* ShowSettings::meshNumberText() const {
-    const char* text = "";
-    
-    switch(meshNumber) {
-        case 0: text = "Shape Plane"; break;
-        case 1: text = "Shape Box"; break;
-        case 2: text = "Shape Sphere"; break;
-        case 3: text = "Shape Sphere MirrorU"; break;
-        case 4: text = "Shape Capsule"; break;
-        default: break;
-    }
-    
-    return text;
+const char *ShowSettings::meshNumberText() const {
+  const char *text = "";
+
+  switch (meshNumber) {
+  case 0:
+    text = "Shape Plane";
+    break;
+  case 1:
+    text = "Shape Box";
+    break;
+  case 2:
+    text = "Shape Sphere";
+    break;
+  case 3:
+    text = "Shape Sphere MirrorU";
+    break;
+  case 4:
+    text = "Shape Capsule";
+    break;
+  default:
+    break;
+  }
+
+  return text;
 }
 
-const char* ShowSettings::shapeChannelText() const {
-    const char* text = "";
-    
-    switch(shapeChannel) {
-        case ShapeChannelNone: text = "Show Off"; break;
-        case ShapeChannelUV0: text = "Show UV0"; break;
-        case ShapeChannelNormal: text = "Show Normal"; break;
-        case ShapeChannelTangent: text = "Show Tangent"; break;
-        case ShapeChannelBitangent: text = "Show Bitangent"; break;
-        case ShapeChannelDepth: text = "Show Depth"; break;
-        case ShapeChannelFaceNormal: text = "Show Faces"; break;
-        //case ShapeChannelBumpNormal: text = "Show Bumps"; break;
-        case ShapeChannelMipLevel: text = "Show Mip Levels"; break;
-        default: break;
-    }
-    
-    return text;
+const char *ShowSettings::shapeChannelText() const {
+  const char *text = "";
+
+  switch (shapeChannel) {
+  case ShapeChannelNone:
+    text = "Show Off";
+    break;
+  case ShapeChannelUV0:
+    text = "Show UV0";
+    break;
+  case ShapeChannelNormal:
+    text = "Show Normal";
+    break;
+  case ShapeChannelTangent:
+    text = "Show Tangent";
+    break;
+  case ShapeChannelBitangent:
+    text = "Show Bitangent";
+    break;
+  case ShapeChannelDepth:
+    text = "Show Depth";
+    break;
+  case ShapeChannelFaceNormal:
+    text = "Show Faces";
+    break;
+  // case ShapeChannelBumpNormal: text = "Show Bumps"; break;
+  case ShapeChannelMipLevel:
+    text = "Show Mip Levels";
+    break;
+  default:
+    break;
+  }
+
+  return text;
 }
 
-const char* ShowSettings::debugModeText() const {
-    const char* text = "";
-    
-    switch(debugMode) {
-        case DebugModeNone: text = "Debug Off"; break;
-        case DebugModeTransparent: text = "Debug Transparent"; break;
-        case DebugModeNonZero: text = "Debug NonZero"; break;
-        case DebugModeColor: text = "Debug Color"; break;
-        case DebugModeGray: text = "Debug Gray"; break;
-        case DebugModeHDR: text = "Debug HDR"; break;
-        case DebugModePosX: text = "Debug +X"; break;
-        case DebugModePosY: text = "Debug +Y"; break;
-        case DebugModeCircleXY: text = "Debug XY>=1"; break;
-        default: break;
-    }
-    return text;
+const char *ShowSettings::debugModeText() const {
+  const char *text = "";
+
+  switch (debugMode) {
+  case DebugModeNone:
+    text = "Debug Off";
+    break;
+  case DebugModeTransparent:
+    text = "Debug Transparent";
+    break;
+  case DebugModeNonZero:
+    text = "Debug NonZero";
+    break;
+  case DebugModeColor:
+    text = "Debug Color";
+    break;
+  case DebugModeGray:
+    text = "Debug Gray";
+    break;
+  case DebugModeHDR:
+    text = "Debug HDR";
+    break;
+  case DebugModePosX:
+    text = "Debug +X";
+    break;
+  case DebugModePosY:
+    text = "Debug +Y";
+    break;
+  case DebugModeCircleXY:
+    text = "Debug XY>=1";
+    break;
+  default:
+    break;
+  }
+  return text;
 }
 
-const char* ShowSettings::lightingModeText() const {
-    const char* text = "";
-    
-    switch(lightingMode) {
-        case LightingModeDiffuse: text = "Light Diffuse"; break;
-        case LightingModeSpecular: text = "Light Specular"; break;
-        default: break;
-    }
-    return text;
+const char *ShowSettings::lightingModeText() const {
+  const char *text = "";
+
+  switch (lightingMode) {
+  case LightingModeDiffuse:
+    text = "Light Diffuse";
+    break;
+  case LightingModeSpecular:
+    text = "Light Specular";
+    break;
+  default:
+    break;
+  }
+  return text;
 }
 
 bool ShowSettings::isEyedropperFromDrawable() {
-    return meshNumber > 0 || isPreview || isShowingAllLevelsAndMips || shapeChannel > 0;
+  return meshNumber > 0 || isPreview || isShowingAllLevelsAndMips ||
+         shapeChannel > 0;
 }
 
-
 void ShowSettings::advanceMeshNumber(bool decrement) {
-    int32_t numEnums = meshCount;
-    int32_t number = meshNumber;
-    if (decrement) {
-        number += numEnums - 1;
-    }
-    else {
-        number += 1;
-    }
-    
-    meshNumber = number % numEnums;
+  int32_t numEnums = meshCount;
+  int32_t number = meshNumber;
+  if (decrement) {
+    number += numEnums - 1;
+  } else {
+    number += 1;
+  }
+
+  meshNumber = number % numEnums;
 }
 
 void ShowSettings::advanceShapeChannel(bool decrement) {
-    int32_t numEnums = ShapeChannelCount;
-    int32_t mode = shapeChannel;
-    if (decrement) {
-        mode += numEnums - 1;
-    }
-    else {
-        mode += 1;
-    }
-    
-    shapeChannel = (ShapeChannel)(mode % numEnums);
-    
-    // skip this channel for now, in ortho it's mostly pure white
-    if (shapeChannel == ShapeChannelDepth) {
-        advanceShapeChannel(decrement);
-    }
+  int32_t numEnums = ShapeChannelCount;
+  int32_t mode = shapeChannel;
+  if (decrement) {
+    mode += numEnums - 1;
+  } else {
+    mode += 1;
+  }
+
+  shapeChannel = (ShapeChannel)(mode % numEnums);
+
+  // skip this channel for now, in ortho it's mostly pure white
+  if (shapeChannel == ShapeChannelDepth) {
+    advanceShapeChannel(decrement);
+  }
 }
-    
+
 void ShowSettings::advanceLightingMode(bool decrement) {
-    int32_t numEnums = LightingModeCount;
-    int32_t number = lightingMode;
-    if (decrement) {
-        number += numEnums - 1;
-    }
-    else {
-        number += 1;
-    }
-    
-    lightingMode = (LightingMode)(number % numEnums);
-}
+  int32_t numEnums = LightingModeCount;
+  int32_t number = lightingMode;
+  if (decrement) {
+    number += numEnums - 1;
+  } else {
+    number += 1;
+  }
 
+  lightingMode = (LightingMode)(number % numEnums);
+}
 
 void ShowSettings::advanceDebugMode(bool decrement) {
-    int32_t numEnums = DebugModeCount;
-    int32_t mode = debugMode;
-    if (decrement) {
-        mode += numEnums - 1;
-    }
-    else {
-        mode += 1;
-    }
-    
-    debugMode = (DebugMode)(mode % numEnums);
-    
-    MyMTLPixelFormat format = (MyMTLPixelFormat)originalFormat;
-    bool isHdr = isHdrFormat(format);
-    
-    // DONE: work on skipping some of these based on image
-    bool isAlpha = isAlphaFormat(format);
-    bool isColor = isColorFormat(format);
-    
-    if (debugMode == DebugModeTransparent && (numChannels <= 3 || !isAlpha)) {
-        advanceDebugMode(decrement);
-    }
-    
-    // 2 channel textures don't really have color or grayscale pixels
-    if (debugMode == DebugModeColor && (numChannels <= 2 || !isColor)) {
-        advanceDebugMode(decrement);
-    }
-    
-    if (debugMode == DebugModeGray && numChannels <= 2) {
-        advanceDebugMode(decrement);
-    }
-    
-    if (debugMode == DebugModeHDR && !isHdr) {
-        advanceDebugMode(decrement);
-    }
-    
-    // for 3 and for channel textures could skip these with more info about image (hasColor)
-    // if (_showSettings->debugMode == DebugModeGray && !hasColor) advanceDebugMode(isShiftKeyDown);
+  int32_t numEnums = DebugModeCount;
+  int32_t mode = debugMode;
+  if (decrement) {
+    mode += numEnums - 1;
+  } else {
+    mode += 1;
+  }
 
-    // for normals show directions
-    if (debugMode == DebugModePosX && !(isNormal || isSDF)) {
-        advanceDebugMode(decrement);
-    }
-    if (debugMode == DebugModePosY && !(isNormal)) {
-        advanceDebugMode(decrement);
-    }
-    if (debugMode == DebugModeCircleXY && !(isNormal)) {
-        advanceDebugMode(decrement);
-    }
-    
-    // TODO: have a clipping mode against a variable range too, only show pixels within that range
-    // to help isolate problem pixels.  Useful for depth, and have auto-range scaling for it and hdr.
-    // make sure to ignore 0 or 1 for clear color of farPlane.
-    
+  debugMode = (DebugMode)(mode % numEnums);
+
+  MyMTLPixelFormat format = (MyMTLPixelFormat)originalFormat;
+  bool isHdr = isHdrFormat(format);
+
+  // DONE: work on skipping some of these based on image
+  bool isAlpha = isAlphaFormat(format);
+  bool isColor = isColorFormat(format);
+
+  if (debugMode == DebugModeTransparent && (numChannels <= 3 || !isAlpha)) {
+    advanceDebugMode(decrement);
+  }
+
+  // 2 channel textures don't really have color or grayscale pixels
+  if (debugMode == DebugModeColor && (numChannels <= 2 || !isColor)) {
+    advanceDebugMode(decrement);
+  }
+
+  if (debugMode == DebugModeGray && numChannels <= 2) {
+    advanceDebugMode(decrement);
+  }
+
+  if (debugMode == DebugModeHDR && !isHdr) {
+    advanceDebugMode(decrement);
+  }
+
+  // for 3 and for channel textures could skip these with more info about image
+  // (hasColor) if (_showSettings->debugMode == DebugModeGray && !hasColor)
+  // advanceDebugMode(isShiftKeyDown);
+
+  // for normals show directions
+  if (debugMode == DebugModePosX && !(isNormal || isSDF)) {
+    advanceDebugMode(decrement);
+  }
+  if (debugMode == DebugModePosY && !(isNormal)) {
+    advanceDebugMode(decrement);
+  }
+  if (debugMode == DebugModeCircleXY && !(isNormal)) {
+    advanceDebugMode(decrement);
+  }
+
+  // TODO: have a clipping mode against a variable range too, only show pixels
+  // within that range to help isolate problem pixels.  Useful for depth, and
+  // have auto-range scaling for it and hdr. make sure to ignore 0 or 1 for
+  // clear color of farPlane.
 }
 
-void printChannels(string& tmp, const string& label, float4 c, int32_t numChannels, bool isFloat, bool isSigned)
-{
-    if (isFloat || isSigned) {
-        switch(numChannels) {
-            case 1: sprintf(tmp, "%s%.3f\n", label.c_str(), c.r); break;
-            case 2: sprintf(tmp, "%s%.3f, %.3f\n", label.c_str(), c.r, c.g); break;
-            case 3: sprintf(tmp, "%s%.3f, %.3f, %.3f\n", label.c_str(), c.r, c.g, c.b); break;
-            case 4: sprintf(tmp, "%s%.3f, %.3f, %.3f, %.3f\n", label.c_str(), c.r, c.g, c.b, c.a); break;
-        }
+void printChannels(string &tmp, const string &label, float4 c,
+                   int32_t numChannels, bool isFloat, bool isSigned) {
+  if (isFloat || isSigned) {
+    switch (numChannels) {
+    case 1:
+      sprintf(tmp, "%s%.3f\n", label.c_str(), c.r);
+      break;
+    case 2:
+      sprintf(tmp, "%s%.3f, %.3f\n", label.c_str(), c.r, c.g);
+      break;
+    case 3:
+      sprintf(tmp, "%s%.3f, %.3f, %.3f\n", label.c_str(), c.r, c.g, c.b);
+      break;
+    case 4:
+      sprintf(tmp, "%s%.3f, %.3f, %.3f, %.3f\n", label.c_str(), c.r, c.g, c.b,
+              c.a);
+      break;
     }
-    else {
-        // unorm data, 8-bit values displayed
-        c *= 255.1f;
-        
-        switch(numChannels) {
-            case 1: sprintf(tmp, "%s%.0f\n", label.c_str(), c.r); break;
-            case 2: sprintf(tmp, "%s%.0f, %.0f\n", label.c_str(), c.r, c.g); break;
-            case 3: sprintf(tmp, "%s%.0f, %.0f, %.0f\n", label.c_str(), c.r, c.g, c.b); break;
-            case 4: sprintf(tmp, "%s%.0f, %.0f, %.0f, %.0f\n", label.c_str(), c.r, c.g, c.b, c.a); break;
-        }
+  } else {
+    // unorm data, 8-bit values displayed
+    c *= 255.1f;
+
+    switch (numChannels) {
+    case 1:
+      sprintf(tmp, "%s%.0f\n", label.c_str(), c.r);
+      break;
+    case 2:
+      sprintf(tmp, "%s%.0f, %.0f\n", label.c_str(), c.r, c.g);
+      break;
+    case 3:
+      sprintf(tmp, "%s%.0f, %.0f, %.0f\n", label.c_str(), c.r, c.g, c.b);
+      break;
+    case 4:
+      sprintf(tmp, "%s%.0f, %.0f, %.0f, %.0f\n", label.c_str(), c.r, c.g, c.b,
+              c.a);
+      break;
     }
+  }
 }
 
-float4x4 matrix4x4_translation(float tx, float ty, float tz)
-{
-    float4x4 m = {
-        (float4){ 1,   0,  0,  0 },
-        (float4){ 0,   1,  0,  0 },
-        (float4){ 0,   0,  1,  0 },
-        (float4){ tx, ty, tz,  1 }
-    };
-    return m;
+float4x4 matrix4x4_translation(float tx, float ty, float tz) {
+  float4x4 m = {(float4){1, 0, 0, 0}, (float4){0, 1, 0, 0},
+                (float4){0, 0, 1, 0}, (float4){tx, ty, tz, 1}};
+  return m;
 }
 
-//static float4x4 matrix4x4_rotation(float radians, vector_float3 axis)
+// static float4x4 matrix4x4_rotation(float radians, vector_float3 axis)
 //{
 //    axis = vector_normalize(axis);
 //    float ct = cosf(radians);
@@ -222,15 +282,17 @@ float4x4 matrix4x4_translation(float tx, float ty, float tz)
 //    float x = axis.x, y = axis.y, z = axis.z;
 //
 //    float4x4 m = {
-//        (float4){ ct + x * x * ci,     y * x * ci + z * st, z * x * ci - y * st, 0},
-//        (float4){ x * y * ci - z * st,     ct + y * y * ci, z * y * ci + x * st, 0},
-//        (float4){ x * z * ci + y * st, y * z * ci - x * st,     ct + z * z * ci, 0},
-//        (float4){                   0,                   0,                   0, 1}
+//        (float4){ ct + x * x * ci,     y * x * ci + z * st, z * x * ci - y *
+//        st, 0}, (float4){ x * y * ci - z * st,     ct + y * y * ci, z * y * ci
+//        + x * st, 0}, (float4){ x * z * ci + y * st, y * z * ci - x * st, ct +
+//        z * z * ci, 0}, (float4){                   0,                   0, 0,
+//        1}
 //    };
 //    return m;
 //}
 //
-//float4x4 perspective_rhs(float fovyRadians, float aspect, float nearZ, float farZ)
+// float4x4 perspective_rhs(float fovyRadians, float aspect, float nearZ, float
+// farZ)
 //{
 //    float ys = 1 / tanf(fovyRadians * 0.5);
 //    float xs = ys / aspect;
@@ -246,34 +308,30 @@ float4x4 matrix4x4_translation(float tx, float ty, float tz)
 //    return m;
 //}
 
-float4x4 orthographic_rhs(float width, float height, float nearZ, float farZ, bool isReverseZ)
-{
-    //float aspectRatio = width / height;
-    float xs = 2.0f/width;
-    float ys = 2.0f/height;
-    
-    float xoff = 0.0f; // -0.5f * width;
-    float yoff = 0.0f; // -0.5f * height;
-    
-    float dz = -(farZ - nearZ);
-    float zs = 1.0f / dz;
-    
-    float m22 = zs;
-    float m23 = zs * nearZ;
-
-    // revZ, can't use infiniteZ with ortho view
-    if (isReverseZ) {
-        m22 = -m22;
-        m23 = 1.0f - m23;
-    }
-    
-    float4x4 m = {
-        (float4){ xs,   0,      0,  0 },
-        (float4){  0,   ys,     0,  0 },
-        (float4){ 0,     0,     m22, 0 },
-        (float4){ xoff, yoff,    m23,  1 }
-    };
-    return m;
-}
+float4x4 orthographic_rhs(float width, float height, float nearZ, float farZ,
+                          bool isReverseZ) {
+  // float aspectRatio = width / height;
+  float xs = 2.0f / width;
+  float ys = 2.0f / height;
+
+  float xoff = 0.0f; // -0.5f * width;
+  float yoff = 0.0f; // -0.5f * height;
+
+  float dz = -(farZ - nearZ);
+  float zs = 1.0f / dz;
 
+  float m22 = zs;
+  float m23 = zs * nearZ;
+
+  // revZ, can't use infiniteZ with ortho view
+  if (isReverseZ) {
+    m22 = -m22;
+    m23 = 1.0f - m23;
+  }
+
+  float4x4 m = {(float4){xs, 0, 0, 0}, (float4){0, ys, 0, 0},
+                (float4){0, 0, m22, 0}, (float4){xoff, yoff, m23, 1}};
+  return m;
 }
+
+} // namespace kram
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index 73280cc4..5b14ee14 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -15,226 +15,230 @@ namespace kram {
 using namespace NAMESPACE_STL;
 using namespace simd;
 
-enum TextureChannels
-{
-    ModeRGBA = 0,
-    
-    ModeR001 = 1,
-    Mode0G01 = 2,
-    Mode00B1 = 3,
-    
-    // see grayscale channels
-//    ModeRRR1 = 5,
-//    ModeGGG1 = 6,
-//    ModeBBB1 = 7,
-    
-    ModeAAA1 = 8,
+enum TextureChannels {
+  ModeRGBA = 0,
+
+  ModeR001 = 1,
+  Mode0G01 = 2,
+  Mode00B1 = 3,
+
+  // see grayscale channels
+  //    ModeRRR1 = 5,
+  //    ModeGGG1 = 6,
+  //    ModeBBB1 = 7,
+
+  ModeAAA1 = 8,
 };
 
 // Must line up with ShDebugMode
-enum DebugMode
-{
-    DebugModeNone = 0,
-    DebugModeTransparent,
-    DebugModeNonZero,
-    DebugModeColor,
-    DebugModeGray,
-    DebugModeHDR,
-    
-    DebugModePosX,
-    DebugModePosY,
-    DebugModeCircleXY,
-    
-    DebugModeCount
+enum DebugMode {
+  DebugModeNone = 0,
+  DebugModeTransparent,
+  DebugModeNonZero,
+  DebugModeColor,
+  DebugModeGray,
+  DebugModeHDR,
+
+  DebugModePosX,
+  DebugModePosY,
+  DebugModeCircleXY,
+
+  DebugModeCount
 };
 
-enum ShapeChannel
-{
-    ShapeChannelNone = 0,
-    
-    ShapeChannelDepth,
-    
-    ShapeChannelUV0,
-    
-    ShapeChannelFaceNormal, // gen from dfdx and dfdy
-    
-    ShapeChannelNormal, // vertex normal
-    ShapeChannelTangent,
-    ShapeChannelBitangent,
-    
-    ShapeChannelMipLevel, // can estimate mip chose off dfdx/dfdy, and pseudocolor
-    
-    // don't need bump, since can already see it, but what if combined diffuse + normal
-    // ShapeChannelBumpNormal,
-    
-    ShapeChannelCount
+enum ShapeChannel {
+  ShapeChannelNone = 0,
+
+  ShapeChannelDepth,
+
+  ShapeChannelUV0,
+
+  ShapeChannelFaceNormal, // gen from dfdx and dfdy
+
+  ShapeChannelNormal, // vertex normal
+  ShapeChannelTangent,
+  ShapeChannelBitangent,
+
+  ShapeChannelMipLevel, // can estimate mip chose off dfdx/dfdy, and pseudocolor
+
+  // don't need bump, since can already see it, but what if combined diffuse +
+  // normal ShapeChannelBumpNormal,
+
+  ShapeChannelCount
 };
 
-enum LightingMode
-{
-    LightingModeDiffuse = 0, // amb + diffuse
-    LightingModeSpecular = 1,  // amb + diffuse + specular
-    
-    LightingModeCount,
+enum LightingMode {
+  LightingModeDiffuse = 0,  // amb + diffuse
+  LightingModeSpecular = 1, // amb + diffuse + specular
+
+  LightingModeCount,
 };
 
 class ShowSettings {
 public:
-    // Can mask various channels (r/g/b/a only, vs. all), may also add toggle of channel
-    TextureChannels channels;
-    
-    // this is gap used for showAll
-    int32_t showAllPixelGap = 2;
-    
-    // These control which texture is viewed in single texture mode
-    int32_t mipNumber = 0;
-    int32_t mipCount = 1;
-    
-    int32_t faceNumber = 0;
-    int32_t faceCount = 0;
-    
-    int32_t arrayNumber = 0;
-    int32_t arrayCount = 0;
-    
-    int32_t sliceNumber = 0;
-    int32_t sliceCount = 0;
-    
-    int32_t totalChunks() const;
-    
-    // DONE: hook all these up to shader and view
-    bool isHudShown = true;
-   
-    // transparency checkboard under the image
-    bool isCheckerboardShown = false;
-   
-    // draw a 1x1 or blockSize grid, note ASTC has non-square grid sizes
-    bool isPixelGridShown = false;
-    bool isBlockGridShown = false;
-    bool isAtlasGridShown = false;
-   
-    bool isAnyGridShown() const { return isPixelGridShown || isBlockGridShown || isAtlasGridShown; }
-    
-    // show all mips, faces, arrays all at once
-    bool isShowingAllLevelsAndMips = false;
-    
-    // expands uv from [0,1] to [0,2] in shader to see the repeat pattern
-    bool isWrap = false;
-    
-    bool isNormal = false;
-    bool isSigned = false;
-    bool isPremul = false; // needed for png which only holds unmul
-    bool isSwizzleAGToRG = false;
-    bool isSDF = false;
-    
-    // this mode shows the content with lighting or with bilinear/mips active
-    bool isPreview = false;
-    
-    // the 2d view doesn't want to inset pixels for clamp, or point sampling is thrown off
-    // expecially on small 4x4 textures
-    bool is3DView = false;
-    
-    // TODO: Might eliminate this, since mips are either built with or without srgb
-    // and disabling with a MTLView caused many flags to have to be set on MTLTexture
-    //bool isSRGBShown = true;
-    
-    // whether to use normal to tangent (false), or vertex tangents (true)
-    bool useTangent = true;
-    
-    // draw with reverseZ to better match perspective
-    bool isReverseZ = true;
-    
-    // whether files are pulled from zip archive.
-    bool isArchive = false;
-    
-    // whether files are pulled from folder(s)
-    bool isFolder = false;
-    
-    // can sample from drawable or from single source texture
-    bool isEyedropperFromDrawable();
-    
-    // can have up to 5 channels (xyz as xy, 2 other channels)
-    int32_t numChannels = 0;
-    
-    // this could be boundary of all visible images, so that pan doesn't go flying off to nowhere
-    int32_t imageBoundsX = 0; // px
-    int32_t imageBoundsY = 0; // px
- 
-    // size of the block, used in block grid drawing
-    int32_t blockX = 1;
-    int32_t blockY = 1;
-    
-    // set when isGridShown is true
-    int32_t gridSizeX = 1;
-    int32_t gridSizeY = 1;
-    
-    // for eyedropper, lookup this pixel value, and return it to CPU
-    int32_t textureLookupX = 0;
-    int32_t textureLookupY = 0;
-    
-    int32_t lastCursorX = 0;
-    int32_t lastCursorY = 0;
-    
-    // exact pixel in the mip level
-    int32_t textureLookupMipX = 0;
-    int32_t textureLookupMipY = 0;
-    
-    int32_t textureResultX = 0;
-    int32_t textureResultY = 0;
-    float4 textureResult;
-    
-    // size of the view and its contentScaleFactor
-    int32_t viewSizeX = 1; // px
-    int32_t viewSizeY = 1; // px
-    float viewContentScaleFactor = 1.0f;
-    
-    // cursor is in view coordinates, but doesn't include contentScaleFactor
-    int32_t cursorX = 0;
-    int32_t cursorY = 0;
-    
-    // these control the view transform, zoomFit fits the image vertically to he view bound
-    float zoomFit = 1.0f;
-    float zoom = 1.0f;
-    float panX = 0.0f;
-    float panY = 0.0f;
-    
-    DebugMode debugMode = DebugModeNone;
-    
-    ShapeChannel shapeChannel = ShapeChannelNone;
-    
-    LightingMode lightingMode = LightingModeDiffuse;
-    
-    float4x4 projectionViewModelMatrix;
-    bool isInverted;
-    
-    // cached on load, raw info about the texture from libkram
-    string imageInfo;
-    string imageInfoVerbose;
-   
-    // format before any transcode to supported formats
-    MyMTLPixelFormat originalFormat;
-    MyMTLPixelFormat decodedFormat;
-   
-    void advanceMeshNumber(bool decrement);
-    void advanceDebugMode(bool decrement);
-    void advanceShapeChannel(bool decrement);
-    void advanceLightingMode(bool decrement);
-
-    const char* meshNumberText() const;
-    const char* shapeChannelText() const;
-    const char* debugModeText() const;
-    const char* lightingModeText() const;
-
-    string lastFilename;
-    double lastTimestamp = 0.0;
-    
-    int32_t meshNumber = 0;
-    int32_t meshCount = 5;
+  // Can mask various channels (r/g/b/a only, vs. all), may also add toggle of
+  // channel
+  TextureChannels channels;
+
+  // this is gap used for showAll
+  int32_t showAllPixelGap = 2;
+
+  // These control which texture is viewed in single texture mode
+  int32_t mipNumber = 0;
+  int32_t mipCount = 1;
+
+  int32_t faceNumber = 0;
+  int32_t faceCount = 0;
+
+  int32_t arrayNumber = 0;
+  int32_t arrayCount = 0;
+
+  int32_t sliceNumber = 0;
+  int32_t sliceCount = 0;
+
+  int32_t totalChunks() const;
+
+  // DONE: hook all these up to shader and view
+  bool isHudShown = true;
+
+  // transparency checkboard under the image
+  bool isCheckerboardShown = false;
+
+  // draw a 1x1 or blockSize grid, note ASTC has non-square grid sizes
+  bool isPixelGridShown = false;
+  bool isBlockGridShown = false;
+  bool isAtlasGridShown = false;
+
+  bool isAnyGridShown() const {
+    return isPixelGridShown || isBlockGridShown || isAtlasGridShown;
+  }
+
+  // show all mips, faces, arrays all at once
+  bool isShowingAllLevelsAndMips = false;
+
+  // expands uv from [0,1] to [0,2] in shader to see the repeat pattern
+  bool isWrap = false;
+
+  bool isNormal = false;
+  bool isSigned = false;
+  bool isPremul = false; // needed for png which only holds unmul
+  bool isSwizzleAGToRG = false;
+  bool isSDF = false;
+
+  // this mode shows the content with lighting or with bilinear/mips active
+  bool isPreview = false;
+
+  // the 2d view doesn't want to inset pixels for clamp, or point sampling is
+  // thrown off expecially on small 4x4 textures
+  bool is3DView = false;
+
+  // TODO: Might eliminate this, since mips are either built with or without
+  // srgb and disabling with a MTLView caused many flags to have to be set on
+  // MTLTexture
+  // bool isSRGBShown = true;
+
+  // whether to use normal to tangent (false), or vertex tangents (true)
+  bool useTangent = true;
+
+  // draw with reverseZ to better match perspective
+  bool isReverseZ = true;
+
+  // whether files are pulled from zip archive.
+  bool isArchive = false;
+
+  // whether files are pulled from folder(s)
+  bool isFolder = false;
+
+  // can sample from drawable or from single source texture
+  bool isEyedropperFromDrawable();
+
+  // can have up to 5 channels (xyz as xy, 2 other channels)
+  int32_t numChannels = 0;
+
+  // this could be boundary of all visible images, so that pan doesn't go flying
+  // off to nowhere
+  int32_t imageBoundsX = 0; // px
+  int32_t imageBoundsY = 0; // px
+
+  // size of the block, used in block grid drawing
+  int32_t blockX = 1;
+  int32_t blockY = 1;
+
+  // set when isGridShown is true
+  int32_t gridSizeX = 1;
+  int32_t gridSizeY = 1;
+
+  // for eyedropper, lookup this pixel value, and return it to CPU
+  int32_t textureLookupX = 0;
+  int32_t textureLookupY = 0;
+
+  int32_t lastCursorX = 0;
+  int32_t lastCursorY = 0;
+
+  // exact pixel in the mip level
+  int32_t textureLookupMipX = 0;
+  int32_t textureLookupMipY = 0;
+
+  int32_t textureResultX = 0;
+  int32_t textureResultY = 0;
+  float4 textureResult;
+
+  // size of the view and its contentScaleFactor
+  int32_t viewSizeX = 1; // px
+  int32_t viewSizeY = 1; // px
+  float viewContentScaleFactor = 1.0f;
+
+  // cursor is in view coordinates, but doesn't include contentScaleFactor
+  int32_t cursorX = 0;
+  int32_t cursorY = 0;
+
+  // these control the view transform, zoomFit fits the image vertically to he
+  // view bound
+  float zoomFit = 1.0f;
+  float zoom = 1.0f;
+  float panX = 0.0f;
+  float panY = 0.0f;
+
+  DebugMode debugMode = DebugModeNone;
+
+  ShapeChannel shapeChannel = ShapeChannelNone;
+
+  LightingMode lightingMode = LightingModeDiffuse;
+
+  float4x4 projectionViewModelMatrix;
+  bool isInverted;
+
+  // cached on load, raw info about the texture from libkram
+  string imageInfo;
+  string imageInfoVerbose;
+
+  // format before any transcode to supported formats
+  MyMTLPixelFormat originalFormat;
+  MyMTLPixelFormat decodedFormat;
+
+  void advanceMeshNumber(bool decrement);
+  void advanceDebugMode(bool decrement);
+  void advanceShapeChannel(bool decrement);
+  void advanceLightingMode(bool decrement);
+
+  const char *meshNumberText() const;
+  const char *shapeChannelText() const;
+  const char *debugModeText() const;
+  const char *lightingModeText() const;
+
+  string lastFilename;
+  double lastTimestamp = 0.0;
+
+  int32_t meshNumber = 0;
+  int32_t meshCount = 5;
 };
 
 float4x4 matrix4x4_translation(float tx, float ty, float tz);
 
-float4x4 orthographic_rhs(float width, float height, float nearZ, float farZ, bool isReverseZ);
+float4x4 orthographic_rhs(float width, float height, float nearZ, float farZ,
+                          bool isReverseZ);
 
-void printChannels(string& tmp, const string& label, float4 c, int32_t numChannels, bool isFloat, bool isSigned);
+void printChannels(string &tmp, const string &label, float4 c,
+                   int32_t numChannels, bool isFloat, bool isSigned);
 
-}
+} // namespace kram
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index a1d83881..3ab17b51 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -39,16 +39,17 @@
 
 @interface MyMTKView : MTKView
 // for now only have a single imageURL
-@property (retain, nonatomic, readwrite, nullable) NSURL* imageURL;
+@property(retain, nonatomic, readwrite, nullable) NSURL *imageURL;
 
 //@property (nonatomic, readwrite, nullable) NSPanGestureRecognizer* panGesture;
-@property (retain, nonatomic, readwrite, nullable) NSMagnificationGestureRecognizer* zoomGesture;
+@property(retain, nonatomic, readwrite, nullable)
+    NSMagnificationGestureRecognizer *zoomGesture;
 
-@property (nonatomic, readwrite) double lastArchiveTimestamp;
+@property(nonatomic, readwrite) double lastArchiveTimestamp;
 
-- (BOOL)loadTextureFromURL:(NSURL*)url;
+- (BOOL)loadTextureFromURL:(NSURL *)url;
 
-- (void)setHudText:(const char*)text;
+- (void)setHudText:(const char *)text;
 
 @end
 
@@ -58,7 +59,6 @@ @interface KramDocument : NSDocument
 
 @end
 
-
 @interface KramDocument ()
 
 @end
@@ -66,85 +66,86 @@ @interface KramDocument ()
 @implementation KramDocument
 
 - (instancetype)init {
-    self = [super init];
-    if (self) {
-        // Add your subclass-specific initialization here.
-    }
-    return self;
+  self = [super init];
+  if (self) {
+    // Add your subclass-specific initialization here.
+  }
+  return self;
 }
 
 + (BOOL)autosavesInPlace {
-    return NO; // YES;
+  return NO; // YES;
 }
 
 // call when "new" called
 - (void)makeWindowControllers {
-    // Override to return the Storyboard file name of the document.
-    //NSStoryboard* storyboard = [NSStoryboard storyboardWithName:@"Main" bundle:nil];
-    //NSWindowController* controller = [storyboard instantiateControllerWithIdentifier:@"NameNeeded];
-    //[self addWindowController:controller];
+  // Override to return the Storyboard file name of the document.
+  // NSStoryboard* storyboard = [NSStoryboard storyboardWithName:@"Main"
+  // bundle:nil]; NSWindowController* controller = [storyboard
+  // instantiateControllerWithIdentifier:@"NameNeeded]; [self
+  //addWindowController:controller];
 }
 
-
-- (NSData *)dataOfType:(nonnull NSString *)typeName error:(NSError * _Nullable *)outError {
-    // Insert code here to write your document to data of the specified type. If outError != NULL, ensure that you create and set an appropriate error if you return nil.
-    // Alternatively, you could remove this method and override -fileWrapperOfType:error:, -writeToURL:ofType:error:, or -writeToURL:ofType:forSaveOperation:originalContentsURL:error: instead.
-    [NSException raise:@"UnimplementedMethod" format:@"%@ is unimplemented", NSStringFromSelector(_cmd)];
-    return nil;
+- (NSData *)dataOfType:(nonnull NSString *)typeName
+                 error:(NSError *_Nullable *)outError {
+  // Insert code here to write your document to data of the specified type. If
+  // outError != NULL, ensure that you create and set an appropriate error if
+  // you return nil. Alternatively, you could remove this method and override
+  // -fileWrapperOfType:error:, -writeToURL:ofType:error:, or
+  // -writeToURL:ofType:forSaveOperation:originalContentsURL:error: instead.
+  [NSException raise:@"UnimplementedMethod"
+              format:@"%@ is unimplemented", NSStringFromSelector(_cmd)];
+  return nil;
 }
 
+- (BOOL)readFromURL:(nonnull NSURL *)url
+             ofType:(nonnull NSString *)typeName
+              error:(NSError *_Nullable *)outError {
+
+  // called from OpenRecent documents menu
 
-- (BOOL)readFromURL:(nonnull NSURL *)url ofType:(nonnull NSString *)typeName error:(NSError * _Nullable*)outError {
-    
-    
-    // called from OpenRecent documents menu
-    
 #if 0
     //MyMTKView* view = self.windowControllers.firstObject.window.contentView;
     //return [view loadTextureFromURL:url];
 #else
 
-    NSApplication* app = [NSApplication sharedApplication];
-    MyMTKView* view = app.mainWindow.contentView;
-    BOOL success = [view loadTextureFromURL:url];
-    if (success)
-    {
-        [view setHudText:""];
-    
-        // DONE: this recent menu only seems to work the first time
-        // and not in subsequent calls to the same entry.  readFromUrl isn't even called.
-        // So don't get a chance to switch back to a recent texture.
-        // Maybe there's some list of documents created and so it doesn't
-        // think the file needs to be reloaded.
-        //
-        // Note: if I return NO from this call then a dialog pops up that image
-        // couldn't be loaded, but then the readFromURL is called everytime a new
-        // image is picked from the list.
-        
-        // Clear the document list so readFromURL keeps getting called
-        // Can't remove currentDoc, so have to skip that
-        NSDocumentController* dc = [NSDocumentController sharedDocumentController];
-        NSDocument* currentDoc = dc.currentDocument;
-        NSMutableArray* docsToRemove = [[NSMutableArray alloc] init];
-        for (NSDocument* doc in dc.documents) {
-            if (doc != currentDoc)
-                [docsToRemove addObject: doc];
-        }
-        
-        for (NSDocument* doc in docsToRemove) {
-            [dc removeDocument: doc];
-        }
-    }
-    
-    return success;
+  NSApplication *app = [NSApplication sharedApplication];
+  MyMTKView *view = app.mainWindow.contentView;
+  BOOL success = [view loadTextureFromURL:url];
+  if (success) {
+    [view setHudText:""];
+
+    // DONE: this recent menu only seems to work the first time
+    // and not in subsequent calls to the same entry.  readFromUrl isn't even
+    // called. So don't get a chance to switch back to a recent texture. Maybe
+    // there's some list of documents created and so it doesn't think the file
+    // needs to be reloaded.
+    //
+    // Note: if I return NO from this call then a dialog pops up that image
+    // couldn't be loaded, but then the readFromURL is called everytime a new
+    // image is picked from the list.
+
+    // Clear the document list so readFromURL keeps getting called
+    // Can't remove currentDoc, so have to skip that
+    NSDocumentController *dc = [NSDocumentController sharedDocumentController];
+    NSDocument *currentDoc = dc.currentDocument;
+    NSMutableArray *docsToRemove = [[NSMutableArray alloc] init];
+    for (NSDocument *doc in dc.documents) {
+      if (doc != currentDoc)
+        [docsToRemove addObject:doc];
+    }
+
+    for (NSDocument *doc in docsToRemove) {
+      [dc removeDocument:doc];
+    }
+  }
+
+  return success;
 #endif
 }
 
-
 @end
 
-
-
 //-------------
 
 @interface AppDelegate : NSObject <NSApplicationDelegate>
@@ -159,156 +160,158 @@ @interface AppDelegate ()
 
 @implementation AppDelegate
 
-
 - (void)applicationDidFinishLaunching:(NSNotification *)aNotification {
-    // Insert code here to initialize your application
-    
+  // Insert code here to initialize your application
 }
 
-
 - (void)applicationWillTerminate:(NSNotification *)aNotification {
-    // Insert code here to tear down your application
+  // Insert code here to tear down your application
 }
 
-
-- (BOOL)applicationShouldTerminateAfterLastWindowClosed:(NSApplication *)sender {
-    return YES;
+- (BOOL)applicationShouldTerminateAfterLastWindowClosed:
+    (NSApplication *)sender {
+  return YES;
 }
 
-- (void)application:(NSApplication *)sender openURLs:(nonnull NSArray<NSURL *> *)urls
-{
-    // this is called from "Open In..."
-    MyMTKView* view = sender.mainWindow.contentView;
-    
-    // TODO: if more than one url dropped, and they are albedo/nmap, then display them
-    // together with the single uv set.  Need controls to show one or all together.
-    
-    // TODO: also do an overlapping diff if two files are dropped with same dimensions.
-    
-    NSURL *url = urls.firstObject;
-    [view loadTextureFromURL:url];
+- (void)application:(NSApplication *)sender
+           openURLs:(nonnull NSArray<NSURL *> *)urls {
+  // this is called from "Open In..."
+  MyMTKView *view = sender.mainWindow.contentView;
+
+  // TODO: if more than one url dropped, and they are albedo/nmap, then display
+  // them together with the single uv set.  Need controls to show one or all
+  // together.
+
+  // TODO: also do an overlapping diff if two files are dropped with same
+  // dimensions.
+
+  NSURL *url = urls.firstObject;
+  [view loadTextureFromURL:url];
 }
 
 - (IBAction)showAboutDialog:(id)sender {
-    // calls openDocumentWithContentsOfURL above
-    NSMutableDictionary<NSAboutPanelOptionKey, id>* options = [[NSMutableDictionary alloc] init];
-
-    // name and icon are already supplied
-
-    // want to embed the git tag here
-    options[@"Copyright"] =  [NSString stringWithUTF8String:
-        "kram ©2020,2021 by Alec Miller"
-    ];
-    
-    // add a link to kram website, skip the Visit text
-    NSMutableAttributedString* str = [[NSMutableAttributedString alloc] initWithString:@"https://github.com/alecazam/kram"];
-    [str addAttribute: NSLinkAttributeName value: @"https://github.com/alecazam/kram" range: NSMakeRange(0, str.length)];
-
-    [str appendAttributedString: [[NSAttributedString alloc] initWithString:[NSString stringWithUTF8String:
-        "\n"
-        "kram is open-source and inspired by the\n"
-        "software technologies of these companies\n"
-        "  Khronos, Binomial, ARM, Google, and Apple\n"
-        "and devs who generously shared their work.\n"
-        "  Simon Brown, Rich Geldreich, Pete Harris,\n"
-        "  Philip Rideout, Romain Guy, Colt McAnlis,\n"
-        "  John Ratcliff, Sean Parent, David Ireland,\n"
-        "  Mark Callow, Mike Frysinger, Yann Collett\n"
-    ]]];
-    
-    options[NSAboutPanelOptionCredits] = str;
-     
-     
-    // skip the v character
-    const char* version = KRAM_VERSION;
-    version += 1;
-    
-    // this is the build version, should be github hash?
-    options[NSAboutPanelOptionVersion] = @"";
-    
-    // this is app version
-    options[NSAboutPanelOptionApplicationVersion] = [NSString stringWithUTF8String:
-        version
-    ];
-
-    [[NSApplication sharedApplication] orderFrontStandardAboutPanelWithOptions:options];
-    
-    //[[NSApplication sharedApplication] orderFrontStandardAboutPanel:sender];
+  // calls openDocumentWithContentsOfURL above
+  NSMutableDictionary<NSAboutPanelOptionKey, id> *options =
+      [[NSMutableDictionary alloc] init];
+
+  // name and icon are already supplied
+
+  // want to embed the git tag here
+  options[@"Copyright"] =
+      [NSString stringWithUTF8String:"kram ©2020,2021 by Alec Miller"];
+
+  // add a link to kram website, skip the Visit text
+  NSMutableAttributedString *str = [[NSMutableAttributedString alloc]
+      initWithString:@"https://github.com/alecazam/kram"];
+  [str addAttribute:NSLinkAttributeName
+              value:@"https://github.com/alecazam/kram"
+              range:NSMakeRange(0, str.length)];
+
+  [str appendAttributedString:
+           [[NSAttributedString alloc]
+               initWithString:
+                   [NSString
+                       stringWithUTF8String:
+                           "\n"
+                           "kram is open-source and inspired by the\n"
+                           "software technologies of these companies\n"
+                           "  Khronos, Binomial, ARM, Google, and Apple\n"
+                           "and devs who generously shared their work.\n"
+                           "  Simon Brown, Rich Geldreich, Pete Harris,\n"
+                           "  Philip Rideout, Romain Guy, Colt McAnlis,\n"
+                           "  John Ratcliff, Sean Parent, David Ireland,\n"
+                           "  Mark Callow, Mike Frysinger, Yann Collett\n"]]];
+
+  options[NSAboutPanelOptionCredits] = str;
+
+  // skip the v character
+  const char *version = KRAM_VERSION;
+  version += 1;
+
+  // this is the build version, should be github hash?
+  options[NSAboutPanelOptionVersion] = @"";
+
+  // this is app version
+  options[NSAboutPanelOptionApplicationVersion] =
+      [NSString stringWithUTF8String:version];
+
+  [[NSApplication sharedApplication]
+      orderFrontStandardAboutPanelWithOptions:options];
+
+  //[[NSApplication sharedApplication] orderFrontStandardAboutPanel:sender];
 }
 
 @end
 
-
-
-
-
 //-------------
 
 enum Key {
-    A                    = 0x00,
-    S                    = 0x01,
-    D                    = 0x02,
-    F                    = 0x03,
-    H                    = 0x04,
-    G                    = 0x05,
-    Z                    = 0x06,
-    X                    = 0x07,
-    C                    = 0x08,
-    V                    = 0x09,
-    B                    = 0x0B,
-    Q                    = 0x0C,
-    W                    = 0x0D,
-    E                    = 0x0E,
-    R                    = 0x0F,
-    Y                    = 0x10,
-    T                    = 0x11,
-    O                    = 0x1F,
-    U                    = 0x20,
-    I                    = 0x22,
-    P                    = 0x23,
-    L                    = 0x25,
-    J                    = 0x26,
-    K                    = 0x28,
-    N                    = 0x2D,
-    M                    = 0x2E,
-
-    // https://eastmanreference.com/complete-list-of-applescript-key-codes
-    Num1                 = 0x12,
-    Num2                 = 0x13,
-    Num3                 = 0x14,
-    Num4                 = 0x15,
-    Num5                 = 0x17,
-    Num6                 = 0x16,
-    Num7                 = 0x1A,
-    Num8                 = 0x1C,
-    Num9                 = 0x19,
-    Num0                 = 0x1D,
-    
-    LeftBrace            = 0x21,
-    RightBrace           = 0x1E,
-
-    LeftBracket          = 0x21,
-    RightBracket         = 0x1E,
-
-    Quote                = 0x27,
-    Semicolon            = 0x29,
-    Backslash            = 0x2A,
-    Comma                = 0x2B,
-    Slash                = 0x2C,
-    
-    LeftArrow            = 0x7B,
-    RightArrow           = 0x7C,
-    DownArrow            = 0x7D,
-    UpArrow              = 0x7E,
+  A = 0x00,
+  S = 0x01,
+  D = 0x02,
+  F = 0x03,
+  H = 0x04,
+  G = 0x05,
+  Z = 0x06,
+  X = 0x07,
+  C = 0x08,
+  V = 0x09,
+  B = 0x0B,
+  Q = 0x0C,
+  W = 0x0D,
+  E = 0x0E,
+  R = 0x0F,
+  Y = 0x10,
+  T = 0x11,
+  O = 0x1F,
+  U = 0x20,
+  I = 0x22,
+  P = 0x23,
+  L = 0x25,
+  J = 0x26,
+  K = 0x28,
+  N = 0x2D,
+  M = 0x2E,
+
+  // https://eastmanreference.com/complete-list-of-applescript-key-codes
+  Num1 = 0x12,
+  Num2 = 0x13,
+  Num3 = 0x14,
+  Num4 = 0x15,
+  Num5 = 0x17,
+  Num6 = 0x16,
+  Num7 = 0x1A,
+  Num8 = 0x1C,
+  Num9 = 0x19,
+  Num0 = 0x1D,
+
+  LeftBrace = 0x21,
+  RightBrace = 0x1E,
+
+  LeftBracket = 0x21,
+  RightBracket = 0x1E,
+
+  Quote = 0x27,
+  Semicolon = 0x29,
+  Backslash = 0x2A,
+  Comma = 0x2B,
+  Slash = 0x2C,
+
+  LeftArrow = 0x7B,
+  RightArrow = 0x7C,
+  DownArrow = 0x7D,
+  UpArrow = 0x7E,
 };
 
 /*
- 
+
 // This is meant to advance a given image through a variety of encoder formats.
 // Then can compare the encoding results and pick the better one.
-// This could help artist see the effect on all mips of an encoder choice, and dial up/down the setting.
+// This could help artist see the effect on all mips of an encoder choice, and
+dial up/down the setting.
 // Would this cycle through astc and bc together, or separately.
-MyMTLPixelFormat encodeSrcTextureAsFormat(MyMTLPixelFormat currentFormat, bool increment) {
+MyMTLPixelFormat encodeSrcTextureAsFormat(MyMTLPixelFormat currentFormat, bool
+increment) {
     // if dev drops a png, then have a mode to see different encoding styles
     // on normals, it would just be BC5, ETCrg, ASTCla and blocks
     #define findIndex(array, x) \
@@ -323,18 +326,24 @@ MyMTLPixelFormat encodeSrcTextureAsFormat(MyMTLPixelFormat currentFormat, bool i
                 break; \
             } \
         }
- 
+
     MyMTLPixelFormat newFormat = currentFormat;
-    
+
     // these are formats to cycle through
-    MyMTLPixelFormat bc[]     = { MyMTLPixelFormatBC7_RGBAUnorm, MyMTLPixelFormatBC3_RGBA, MyMTLPixelFormatBC1_RGBA };
-    MyMTLPixelFormat bcsrgb[] = { MyMTLPixelFormatBC7_RGBAUnorm_sRGB, MyMTLPixelFormatBC3_RGBA_sRGB, MyMTLPixelFormatBC1_RGBA_sRGB };
-    
+    MyMTLPixelFormat bc[]     = { MyMTLPixelFormatBC7_RGBAUnorm,
+MyMTLPixelFormatBC3_RGBA, MyMTLPixelFormatBC1_RGBA }; MyMTLPixelFormat bcsrgb[]
+= { MyMTLPixelFormatBC7_RGBAUnorm_sRGB, MyMTLPixelFormatBC3_RGBA_sRGB,
+MyMTLPixelFormatBC1_RGBA_sRGB };
+
     // TODO: support non-square block with astcenc
-    MyMTLPixelFormat astc[]     = { MyMTLPixelFormatASTC_4x4_LDR, MyMTLPixelFormatASTC_5x5_LDR, MyMTLPixelFormatASTC_6x6_LDR, MyMTLPixelFormatASTC_8x8_LDR };
-    MyMTLPixelFormat astcsrgb[] = { MyMTLPixelFormatASTC_4x4_sRGB, MyMTLPixelFormatASTC_5x5_sRGB, MyMTLPixelFormatASTC_6x6_sRGB, MyMTLPixelFormatASTC_8x8_sRGB };
-    MyMTLPixelFormat astchdr[]  = { MyMTLPixelFormatASTC_4x4_HDR, MyMTLPixelFormatASTC_5x5_HDR, MyMTLPixelFormatASTC_6x6_HDR, MyMTLPixelFormatASTC_8x8_HDR };
- 
+    MyMTLPixelFormat astc[]     = { MyMTLPixelFormatASTC_4x4_LDR,
+MyMTLPixelFormatASTC_5x5_LDR, MyMTLPixelFormatASTC_6x6_LDR,
+MyMTLPixelFormatASTC_8x8_LDR }; MyMTLPixelFormat astcsrgb[] = {
+MyMTLPixelFormatASTC_4x4_sRGB, MyMTLPixelFormatASTC_5x5_sRGB,
+MyMTLPixelFormatASTC_6x6_sRGB, MyMTLPixelFormatASTC_8x8_sRGB }; MyMTLPixelFormat
+astchdr[]  = { MyMTLPixelFormatASTC_4x4_HDR, MyMTLPixelFormatASTC_5x5_HDR,
+MyMTLPixelFormatASTC_6x6_HDR, MyMTLPixelFormatASTC_8x8_HDR };
+
     if (isASTCFormat(currentFormat)) {
         if (isHDRFormat(currentFormat)) {
             // skip it, need hdr decode for Intel
@@ -358,40 +367,44 @@ MyMTLPixelFormat encodeSrcTextureAsFormat(MyMTLPixelFormat currentFormat, bool i
             findIndex(bc, currentFormat);
         }
     }
-    
+
     #undef findIndex
-    
+
     return newFormat;
 }
 
 void encodeSrcForEncodeComparisons(bool increment) {
     auto newFormat = encodeSrcTextureAsFormat(displayedFormat, increment);
-    
+
     // This is really only useful for variable block size formats like ASTC
     // maybe some value in BC7 to BC1 comparison (original vs. BC7 vs. BC1)
- 
+
      // TODO: have to encode and then decode astc/etc on macOS-Intel
-     // load png and keep it around, and then call encode and then diff the image against the original pixels
+     // load png and keep it around, and then call encode and then diff the
+image against the original pixels
      // 565 will always differ from the original.
-     
+
      // Once encode generated, then cache result, only ever display two textures
      // in a comparison mode. Also have eyedropper sample from second texture
      // could display src vs. encode, or cached textures against each other
      // have PSNR too ?
-     
+
     // see if the format is in cache
-    
+
     // encode incremented format and cache, that way don't wait too long
-    // and once all encode formats generated, can cycle through them until next image loaded
-    
-    // Could reuse the same buffer for all ASTC formats, larger blocks always need less mem
-    //KramImage image; // TODO: move encode to KTXImage, convert png to one layer KTXImage
+    // and once all encode formats generated, can cycle through them until next
+image loaded
+
+    // Could reuse the same buffer for all ASTC formats, larger blocks always
+need less mem
+    //KramImage image; // TODO: move encode to KTXImage, convert png to one
+layer KTXImage
     //image.open(...);
     //image.encode(dstImage);
     //decodeIfNeeded(dstImage, dstImageDecoded);
     //comparisonTexture = [createImage:image];
     //set that onto the shader to diff against after recontruct
-    
+
     // this format after decode may not be the same
     displayedFormat = newFormat;
 }
@@ -399,45 +412,40 @@ void encodeSrcForEncodeComparisons(bool increment) {
 
 // also NSPasteboardTypeURL
 // also NSPasteboardTypeTIFF
-NSArray<NSString*>* pasteboardTypes = @[
-    NSPasteboardTypeFileURL
-];
-
-
-@implementation MyMTKView
-{
-    NSMenu* _viewMenu; // really the items
-    NSStackView* _buttonStack;
-    NSMutableArray<NSButton*>* _buttonArray;
-    NSTextField* _hudLabel;
-    NSTextField* _hudLabel2;
-    
-    vector<string> _textSlots;
-    ShowSettings* _showSettings;
-    
-    // allow zip files to be dropped and opened, and can advance through bundle content
-    ZipHelper _zip;
-    MmapHelper _zipMmap;
-    int32_t _fileArchiveIndex;
-    BOOL _noImageLoaded;
-    
-    vector<string> _folderFiles;
-    int32_t _fileFolderIndex;
+NSArray<NSString *> *pasteboardTypes = @[ NSPasteboardTypeFileURL ];
+
+@implementation MyMTKView {
+  NSMenu *_viewMenu; // really the items
+  NSStackView *_buttonStack;
+  NSMutableArray<NSButton *> *_buttonArray;
+  NSTextField *_hudLabel;
+  NSTextField *_hudLabel2;
+
+  vector<string> _textSlots;
+  ShowSettings *_showSettings;
+
+  // allow zip files to be dropped and opened, and can advance through bundle
+  // content
+  ZipHelper _zip;
+  MmapHelper _zipMmap;
+  int32_t _fileArchiveIndex;
+  BOOL _noImageLoaded;
+
+  vector<string> _folderFiles;
+  int32_t _fileFolderIndex;
 }
 
-- (void)awakeFromNib
-{
-    [super awakeFromNib];
-    
-    // TODO: see if can only open this
-    // NSLog(@"AwakeFromNIB");
+- (void)awakeFromNib {
+  [super awakeFromNib];
+
+  // TODO: see if can only open this
+  // NSLog(@"AwakeFromNIB");
 }
 
 // to get upper left origin like on UIView
 #if KRAM_MAC
--(BOOL)isFlipped
-{
-    return YES;
+- (BOOL)isFlipped {
+  return YES;
 }
 #endif
 
@@ -447,123 +455,157 @@ -(BOOL)isFlipped
 // Also zoom recognizer jumps instead of smooth pan in/out.
 // Could mix pinch-zoom with wheel-based pan.
 //
-// TODO: Sometimes getting panels from right side popping in when trying to pan on macOS
-// without using pan gesture.
-
-- (instancetype)initWithCoder:(NSCoder*)coder {
-    self = [super initWithCoder:coder];
-    
-    _showSettings = new ShowSettings;
-    
-    self.clearColor = MTLClearColorMake(0.0f, 0.0f, 0.0f, 0.0f);
-    
-    self.clearDepth = _showSettings->isReverseZ ? 0.0f : 1.0f;
-    
-    // only re-render when changes are made
-    // Note: this breaks ability to gpu capture, since display link not running.
-    // so disable this if want to do captures.
+// TODO: Sometimes getting panels from right side popping in when trying to pan
+// on macOS without using pan gesture.
+
+- (instancetype)initWithCoder:(NSCoder *)coder {
+  self = [super initWithCoder:coder];
+
+  _showSettings = new ShowSettings;
+
+  self.clearColor = MTLClearColorMake(0.0f, 0.0f, 0.0f, 0.0f);
+
+  self.clearDepth = _showSettings->isReverseZ ? 0.0f : 1.0f;
+
+  // only re-render when changes are made
+  // Note: this breaks ability to gpu capture, since display link not running.
+  // so disable this if want to do captures.
 #ifndef NDEBUG // KRAM_RELEASE
-    self.enableSetNeedsDisplay = YES;
+  self.enableSetNeedsDisplay = YES;
 #endif
-    // openFile in appDelegate handles "Open in..."
-    
-    _textSlots.resize(2);
-    
-    // added for drag-drop support
-    [self registerForDraggedTypes:pasteboardTypes];
-        
-    _zoomGesture = [[NSMagnificationGestureRecognizer alloc] initWithTarget:self action:@selector(handleGesture:)];
-    [self addGestureRecognizer:_zoomGesture];
-       
-    _buttonArray = [[NSMutableArray alloc] init];
-    _buttonStack = [self _addButtons];
-    
-    // hide until image loaded
-    _buttonStack.hidden = YES;
-    _noImageLoaded = YES;
-    
-    _hudLabel2 = [self _addHud:YES];
-    _hudLabel  = [self _addHud:NO];
-    [self setHudText:""];
-   
-    return self;
+  // openFile in appDelegate handles "Open in..."
+
+  _textSlots.resize(2);
+
+  // added for drag-drop support
+  [self registerForDraggedTypes:pasteboardTypes];
+
+  _zoomGesture = [[NSMagnificationGestureRecognizer alloc]
+      initWithTarget:self
+              action:@selector(handleGesture:)];
+  [self addGestureRecognizer:_zoomGesture];
+
+  _buttonArray = [[NSMutableArray alloc] init];
+  _buttonStack = [self _addButtons];
+
+  // hide until image loaded
+  _buttonStack.hidden = YES;
+  _noImageLoaded = YES;
+
+  _hudLabel2 = [self _addHud:YES];
+  _hudLabel = [self _addHud:NO];
+  [self setHudText:""];
+
+  return self;
 }
 
-- (nonnull ShowSettings*)showSettings {
-    return _showSettings;
+- (nonnull ShowSettings *)showSettings {
+  return _showSettings;
 }
 
-- (NSStackView*)_addButtons {
-    const int32_t numButtons = 30;
-    const char* names[numButtons*2] = {
-        
-        "?", "Help",
-        "I", "Info",
-        "H", "Hud",
-        "S", "Show All",
-        
-        "O", "Preview",
-        "W", "Repeat",
-        "P", "Premul",
-        "N", "Signed",
-        
-        "-", "",
-    
-        "E", "Debug",
-        "D", "Grid",
-        "C", "Checker",
-        "U", "Toggle UI",
-      
-        "-", "",
-        
-        "M", "Mip",
-        "F", "Face",
-        "Y", "Array",
-        "J", "Next",
-        "L", "Reload",
-        "0", "Fit",
-        
-        "-", "",
-        
-        "8", "Shape",
-        "6", "Shape Channel",
-        "5", "Lighting",
-        "T", "Tangents",
-        
-        // TODO: need to shift hud over a little
-        // "UI", - add to show/hide buttons
-    
-        "-", "",
-        
-        // make these individual toggles and exclusive toggle off shift
-        "R", "Red",
-        "G", "Green",
-        "B", "Blue",
-        "A", "Alpha",
-    };
-    
-    NSRect rect = NSMakeRect(0,10,30,30);
-    
-    //#define ArrayCount(x) ((x) / sizeof(x[0]))
-    
-    NSMutableArray* buttons = [[NSMutableArray alloc] init];
-    
-    for (int32_t i = 0; i < numButtons; ++i) {
-        const char* icon = names[2*i+0];
-        const char* tip = names[2*i+1];
-        
-        NSString* name = [NSString stringWithUTF8String:icon];
-        NSString* toolTip = [NSString stringWithUTF8String:tip];
-        
-        NSButton* button = nil;
-        
-        button = [NSButton buttonWithTitle:name target:self action:@selector(handleAction:)];
-        [button setToolTip:toolTip];
-        button.hidden = NO;
-        
-        button.buttonType = NSButtonTypeToggle;
-        //NSButtonTypeOnOff
-        
+- (NSStackView *)_addButtons {
+  const int32_t numButtons = 30;
+  const char *names[numButtons * 2] = {
+
+      "?",
+      "Help",
+      "I",
+      "Info",
+      "H",
+      "Hud",
+      "S",
+      "Show All",
+
+      "O",
+      "Preview",
+      "W",
+      "Repeat",
+      "P",
+      "Premul",
+      "N",
+      "Signed",
+
+      "-",
+      "",
+
+      "E",
+      "Debug",
+      "D",
+      "Grid",
+      "C",
+      "Checker",
+      "U",
+      "Toggle UI",
+
+      "-",
+      "",
+
+      "M",
+      "Mip",
+      "F",
+      "Face",
+      "Y",
+      "Array",
+      "J",
+      "Next",
+      "L",
+      "Reload",
+      "0",
+      "Fit",
+
+      "-",
+      "",
+
+      "8",
+      "Shape",
+      "6",
+      "Shape Channel",
+      "5",
+      "Lighting",
+      "T",
+      "Tangents",
+
+      // TODO: need to shift hud over a little
+      // "UI", - add to show/hide buttons
+
+      "-",
+      "",
+
+      // make these individual toggles and exclusive toggle off shift
+      "R",
+      "Red",
+      "G",
+      "Green",
+      "B",
+      "Blue",
+      "A",
+      "Alpha",
+  };
+
+  NSRect rect = NSMakeRect(0, 10, 30, 30);
+
+  //#define ArrayCount(x) ((x) / sizeof(x[0]))
+
+  NSMutableArray *buttons = [[NSMutableArray alloc] init];
+
+  for (int32_t i = 0; i < numButtons; ++i) {
+    const char *icon = names[2 * i + 0];
+    const char *tip = names[2 * i + 1];
+
+    NSString *name = [NSString stringWithUTF8String:icon];
+    NSString *toolTip = [NSString stringWithUTF8String:tip];
+
+    NSButton *button = nil;
+
+    button = [NSButton buttonWithTitle:name
+                                target:self
+                                action:@selector(handleAction:)];
+    [button setToolTip:toolTip];
+    button.hidden = NO;
+
+    button.buttonType = NSButtonTypeToggle;
+    // NSButtonTypeOnOff
+
 #if 0
         // can use this with border
         // TODO: for some reason this breaks clicking on buttons
@@ -571,2265 +613,2252 @@ - (NSStackView*)_addButtons {
         button.showsBorderOnlyWhileMouseInside = YES;
         button.bordered = YES;
 #else
-        button.bordered = NO;
+    button.bordered = NO;
 #endif
-        [button setFrame:rect];
-        
-        // stackView seems to disperse the items evenly across the area, so this doesn't work
-        if (icon[0] == '-') {
-            //rect.origin.y += 11;
-            button.enabled = NO;
-        }
-        else {
-            //sKrect.origin.y += 25;
-            
-            // keep all buttons, since stackView will remove and pack the stack
-            [_buttonArray addObject:button];
-        }
-        
-        [buttons addObject:button];
-        
-        
+    [button setFrame:rect];
+
+    // stackView seems to disperse the items evenly across the area, so this
+    // doesn't work
+    if (icon[0] == '-') {
+      // rect.origin.y += 11;
+      button.enabled = NO;
+    } else {
+      // sKrect.origin.y += 25;
+
+      // keep all buttons, since stackView will remove and pack the stack
+      [_buttonArray addObject:button];
     }
-    
-    NSStackView* stackView = [NSStackView stackViewWithViews:buttons];
-    stackView.orientation = NSUserInterfaceLayoutOrientationVertical;
-    stackView.detachesHiddenViews = YES; // default, but why have to have _buttonArrary
-    [self addSubview: stackView];
-    
+
+    [buttons addObject:button];
+  }
+
+  NSStackView *stackView = [NSStackView stackViewWithViews:buttons];
+  stackView.orientation = NSUserInterfaceLayoutOrientationVertical;
+  stackView.detachesHiddenViews =
+      YES; // default, but why have to have _buttonArrary
+  [self addSubview:stackView];
+
 #if 1
-    // Want menus, so user can define their own shortcuts to commands
-    // Also need to enable/disable this via validateUserInterfaceItem
-    NSApplication* app = [NSApplication sharedApplication];
-
-    NSMenu* mainMenu = app.mainMenu;
-    NSMenuItem* viewMenuItem = mainMenu.itemArray[2];
-    _viewMenu = viewMenuItem.submenu;
-    
-    // TODO: add a view menu in the storyboard
-    //NSMenu* menu = app.windowsMenu;
-    //[menu addItem:[NSMenuItem separatorItem]];
-
-    for (int32_t i = 0; i < numButtons; ++i) {
-        const char* icon = names[2*i+0]; // single char
-        const char* title = names[2*i+1];
-    
-        NSString* toolTip = [NSString stringWithUTF8String:icon];
-        NSString* name = [NSString stringWithUTF8String:title];
-        NSString* shortcut = @""; // for now, or AppKit turns key int cmd+shift+key
-        
-        if (icon[0] == '-') {
-            [_viewMenu addItem:[NSMenuItem separatorItem]];
-        }
-        else {
-            NSMenuItem* menuItem = [[NSMenuItem alloc] initWithTitle:name action:@selector(handleAction:) keyEquivalent:shortcut];
-            menuItem.toolTip = toolTip; // use in findMenuItem
-            
-            // TODO: menus and buttons should reflect any toggle state
-            // menuItem.state = Mixed/Off/On;
-            
-            [_viewMenu addItem: menuItem];
-        }
+  // Want menus, so user can define their own shortcuts to commands
+  // Also need to enable/disable this via validateUserInterfaceItem
+  NSApplication *app = [NSApplication sharedApplication];
+
+  NSMenu *mainMenu = app.mainMenu;
+  NSMenuItem *viewMenuItem = mainMenu.itemArray[2];
+  _viewMenu = viewMenuItem.submenu;
+
+  // TODO: add a view menu in the storyboard
+  // NSMenu* menu = app.windowsMenu;
+  //[menu addItem:[NSMenuItem separatorItem]];
+
+  for (int32_t i = 0; i < numButtons; ++i) {
+    const char *icon = names[2 * i + 0]; // single char
+    const char *title = names[2 * i + 1];
+
+    NSString *toolTip = [NSString stringWithUTF8String:icon];
+    NSString *name = [NSString stringWithUTF8String:title];
+    NSString *shortcut = @""; // for now, or AppKit turns key int cmd+shift+key
+
+    if (icon[0] == '-') {
+      [_viewMenu addItem:[NSMenuItem separatorItem]];
+    } else {
+      NSMenuItem *menuItem =
+          [[NSMenuItem alloc] initWithTitle:name
+                                     action:@selector(handleAction:)
+                              keyEquivalent:shortcut];
+      menuItem.toolTip = toolTip; // use in findMenuItem
+
+      // TODO: menus and buttons should reflect any toggle state
+      // menuItem.state = Mixed/Off/On;
+
+      [_viewMenu addItem:menuItem];
     }
-    
-    [_viewMenu addItem:[NSMenuItem separatorItem]];
+  }
+
+  [_viewMenu addItem:[NSMenuItem separatorItem]];
 #endif
-    
-    return stackView;
+
+  return stackView;
 }
 
-- (NSTextField*)_addHud:(BOOL)isShadow
-{
-    // TODO: This text field is clamping to the height, so have it set to 1200.
-    // really want field to expand to fill the window height for large output
-    
-    // add a label for the hud
-    NSTextField *label = [[NSTextField alloc] initWithFrame:NSMakeRect(isShadow ? 21 : 20, isShadow ? 21 : 20, 800, 1200)];
-    label.drawsBackground = NO;
-    label.textColor = !isShadow ?
-        [NSColor colorWithSRGBRed:0 green:1 blue:0 alpha:1] :
-        [NSColor colorWithSRGBRed:0 green:0 blue:0 alpha:1];
-    label.bezeled = NO;
-    label.editable = NO;
-    label.selectable = NO;
-    label.lineBreakMode = NSLineBreakByClipping;
-    label.maximumNumberOfLines = 0; // fill to height
-    
-    label.cell.scrollable = NO;
-    label.cell.wraps = NO;
-
-    label.hidden = !_showSettings->isHudShown;
-    //label.font = NSFont.systemFont(ofSize: NSFont.systemFontSize(for: label.controlSize))
-
-    // UILabel has shadowColor/shadowOffset but NSTextField doesn't
-    
-    [self addSubview: label];
-    
-    // add vertical constrains to have it fill window, but keep 800 width
-    label.preferredMaxLayoutWidth = 800;
-
-    //NSDictionary* views = @{ @"label" : label };
-    //[self addConstraints:[NSLayoutConstraint constraintsWithVisualFormat:@"H:|-[label]" options:0 metrics:nil views:views]];
-    //[self addConstraints:[NSLayoutConstraint constraintsWithVisualFormat:@"V:|-[label]" options:0 metrics:nil views:views]];
-    
-    return label;
+- (NSTextField *)_addHud:(BOOL)isShadow {
+  // TODO: This text field is clamping to the height, so have it set to 1200.
+  // really want field to expand to fill the window height for large output
+
+  // add a label for the hud
+  NSTextField *label = [[NSTextField alloc]
+      initWithFrame:NSMakeRect(isShadow ? 21 : 20, isShadow ? 21 : 20, 800,
+                               1200)];
+  label.drawsBackground = NO;
+  label.textColor = !isShadow
+                        ? [NSColor colorWithSRGBRed:0 green:1 blue:0 alpha:1]
+                        : [NSColor colorWithSRGBRed:0 green:0 blue:0 alpha:1];
+  label.bezeled = NO;
+  label.editable = NO;
+  label.selectable = NO;
+  label.lineBreakMode = NSLineBreakByClipping;
+  label.maximumNumberOfLines = 0; // fill to height
+
+  label.cell.scrollable = NO;
+  label.cell.wraps = NO;
+
+  label.hidden = !_showSettings->isHudShown;
+  // label.font = NSFont.systemFont(ofSize: NSFont.systemFontSize(for:
+  // label.controlSize))
+
+  // UILabel has shadowColor/shadowOffset but NSTextField doesn't
+
+  [self addSubview:label];
+
+  // add vertical constrains to have it fill window, but keep 800 width
+  label.preferredMaxLayoutWidth = 800;
+
+  // NSDictionary* views = @{ @"label" : label };
+  //[self addConstraints:[NSLayoutConstraint
+  //constraintsWithVisualFormat:@"H:|-[label]" options:0 metrics:nil
+  //views:views]]; [self addConstraints:[NSLayoutConstraint
+  //constraintsWithVisualFormat:@"V:|-[label]" options:0 metrics:nil
+  //views:views]];
+
+  return label;
 }
 
-- (void)doZoomMath:(float)newZoom newPan:(float2&)newPan {
-    // transform the cursor to texture coordinate, or clamped version if outside
-    Renderer* renderer = (Renderer*)self.delegate;
-    float4x4 projectionViewModelMatrix = [renderer computeImageTransform:_showSettings->panX panY:_showSettings->panY zoom:_showSettings->zoom];
-
-    // convert to clip space, or else need to apply additional viewport transform
-    float halfX = _showSettings->viewSizeX * 0.5f;
-    float halfY = _showSettings->viewSizeY * 0.5f;
-
-    // sometimes get viewSizeX that's scaled by retina, and other times not.
-    // account for contentScaleFactor (viewSizeX is 2x bigger than cursorX on retina display)
-    // now passing down drawableSize instead of view.bounds.size
-    halfX /= (float)_showSettings->viewContentScaleFactor;
-    halfY /= (float)_showSettings->viewContentScaleFactor;
-
-    NSPoint point = NSMakePoint(_showSettings->cursorX, _showSettings->cursorY);
-    NSPoint clipPoint;
-    clipPoint.x = (point.x - halfX) / halfX;
-    clipPoint.y = -(point.y - halfY) / halfY;
-
-    // convert point in window to point in texture
-    float4x4 mInv = simd_inverse(projectionViewModelMatrix);
-    mInv.columns[3].w = 1.0f; // fixes inverse, calls always leaves m[3][3] = 0.999
-
-    float4 pixel = mInv * float4m(clipPoint.x, clipPoint.y, 1.0f, 1.0f);
-    //pixel /= pixel.w; // in case perspective used
-
-    // allow pan to extend to show all
-    float maxX = 0.5f;
-    float minY = -0.5f;
-    if (_showSettings->isShowingAllLevelsAndMips) {
-        maxX += 1.0f * (_showSettings->totalChunks() - 1);
-        minY -= 1.0f * (_showSettings->mipCount - 1);
-    }
-    
-    // that's in model space (+/0.5f, +/0.5f), so convert to texture space
-    pixel.x = NAMESPACE_STL::clamp(pixel.x, -0.5f, maxX);
-    pixel.y = NAMESPACE_STL::clamp(pixel.y, minY, 0.5f);
-    
-    // now that's the point that we want to zoom towards
-    // No checkson this zoom
-    // old - newPosition from the zoom
-    
-    newPan.x = _showSettings->panX - (_showSettings->zoom  - newZoom) * _showSettings->imageBoundsX * pixel.x;
-    newPan.y = _showSettings->panY + (_showSettings->zoom  - newZoom) * _showSettings->imageBoundsY * pixel.y;
+- (void)doZoomMath:(float)newZoom newPan:(float2 &)newPan {
+  // transform the cursor to texture coordinate, or clamped version if outside
+  Renderer *renderer = (Renderer *)self.delegate;
+  float4x4 projectionViewModelMatrix =
+      [renderer computeImageTransform:_showSettings->panX
+                                 panY:_showSettings->panY
+                                 zoom:_showSettings->zoom];
+
+  // convert to clip space, or else need to apply additional viewport transform
+  float halfX = _showSettings->viewSizeX * 0.5f;
+  float halfY = _showSettings->viewSizeY * 0.5f;
+
+  // sometimes get viewSizeX that's scaled by retina, and other times not.
+  // account for contentScaleFactor (viewSizeX is 2x bigger than cursorX on
+  // retina display) now passing down drawableSize instead of view.bounds.size
+  halfX /= (float)_showSettings->viewContentScaleFactor;
+  halfY /= (float)_showSettings->viewContentScaleFactor;
+
+  NSPoint point = NSMakePoint(_showSettings->cursorX, _showSettings->cursorY);
+  NSPoint clipPoint;
+  clipPoint.x = (point.x - halfX) / halfX;
+  clipPoint.y = -(point.y - halfY) / halfY;
+
+  // convert point in window to point in texture
+  float4x4 mInv = simd_inverse(projectionViewModelMatrix);
+  mInv.columns[3].w =
+      1.0f; // fixes inverse, calls always leaves m[3][3] = 0.999
+
+  float4 pixel = mInv * float4m(clipPoint.x, clipPoint.y, 1.0f, 1.0f);
+  // pixel /= pixel.w; // in case perspective used
+
+  // allow pan to extend to show all
+  float maxX = 0.5f;
+  float minY = -0.5f;
+  if (_showSettings->isShowingAllLevelsAndMips) {
+    maxX += 1.0f * (_showSettings->totalChunks() - 1);
+    minY -= 1.0f * (_showSettings->mipCount - 1);
+  }
+
+  // that's in model space (+/0.5f, +/0.5f), so convert to texture space
+  pixel.x = NAMESPACE_STL::clamp(pixel.x, -0.5f, maxX);
+  pixel.y = NAMESPACE_STL::clamp(pixel.y, minY, 0.5f);
+
+  // now that's the point that we want to zoom towards
+  // No checkson this zoom
+  // old - newPosition from the zoom
+
+  newPan.x = _showSettings->panX - (_showSettings->zoom - newZoom) *
+                                       _showSettings->imageBoundsX * pixel.x;
+  newPan.y = _showSettings->panY + (_showSettings->zoom - newZoom) *
+                                       _showSettings->imageBoundsY * pixel.y;
 }
 
-- (void)handleGesture:(NSGestureRecognizer *)gestureRecognizer
-{
-    // https://cocoaosxrevisited.wordpress.com/2018/01/06/chapter-18-mouse-events/
-    if  (gestureRecognizer != _zoomGesture) {
-        return;
-    }
-    
-    bool isFirstGesture = _zoomGesture.state == NSGestureRecognizerStateBegan;
-    
-    static float _originalZoom = 1.0f;
-    
-    float zoom = _zoomGesture.magnification;
-    if (isFirstGesture) {
-        _zoomGesture.magnification = 1.0f;
-        zoom = _showSettings->zoom;
-    }
-    else if (zoom * _originalZoom < 0.1f) {
-        // can go negative otherwise
-        zoom = 0.1f / _originalZoom;
-        _zoomGesture.magnification = zoom;
-    }
-    
-    static float _validMagnification;
-    
-    //-------------------------------------
-    
-    // https://developer.apple.com/documentation/uikit/touches_presses_and_gestures/handling_uikit_gestures/handling_pinch_gestures?language=objc
-    // need to sync up the zoom when action begins or zoom will jump
-    if (isFirstGesture) {
-        _validMagnification = 1.0f;
-        _originalZoom = zoom;
-    }
-    else {
-        // try expontental (this causes a jump, comparison avoids an initial jump
-        //zoom = powf(zoom, 1.05f);
-        
-        // doing multiply instead of equals here, also does exponential zom
-        zoom *= _originalZoom;
-    }
-    
-    // https://stackoverflow.com/questions/30002361/image-zoom-centered-on-mouse-position
-    
-    // find the cursor location with respect to the image
-    float4 bottomLeftCorner = float4m(-0.5, -0.5f, 0.0f, 1.0f);
-    float4 topRightCorner = float4m(0.5, 0.5f, 0.0f, 1.0f);
-    
-    Renderer* renderer = (Renderer*)self.delegate;
-    float4x4 newMatrix = [renderer computeImageTransform:_showSettings->panX panY:_showSettings->panY zoom:zoom];
-
-    // don't allow panning the entire image off the view boundary
-    // transform the upper left and bottom right corner of the image
-    float4 pt0 = newMatrix * bottomLeftCorner;
-    float4 pt1 = newMatrix * topRightCorner;
-    
-    // for perspective
-    //pt0 /= pt0.w;
-    //pt1 /= pt1.w;
-    
-    // see that rectangle intersects the view, view is -1 to 1
-    // this handles inversion
-    float2 ptOrigin = simd::min(pt0.xy, pt1.xy);
-    float2 ptSize = abs(pt0.xy - pt1.xy);
-    
-    CGRect imageRect = CGRectMake(ptOrigin.x, ptOrigin.y, ptSize.x, ptSize.y);
-    CGRect viewRect = CGRectMake(-1.0f, -1.0f, 2.0f, 2.0f);
-   
-    int32_t numTexturesX = _showSettings->totalChunks();
-    int32_t numTexturesY = _showSettings->mipCount;
-    
-    if (_showSettings->isShowingAllLevelsAndMips) {
-        imageRect.origin.y -= (numTexturesY - 1 ) * imageRect.size.height;
-        
-        imageRect.size.width *= numTexturesX;
-        imageRect.size.height *= numTexturesY;
-    }
-    
-    float visibleWidth = imageRect.size.width * _showSettings->viewSizeX / _showSettings->viewContentScaleFactor;
-    float visibleHeight = imageRect.size.height * _showSettings->viewSizeY / _showSettings->viewContentScaleFactor;
-    
-    // don't allow image to get too big
-    // take into account zoomFit, or need to limit zoomFit and have smaller images be smaller on screen
-    float maxZoom = std::max(128.0f, _showSettings->zoomFit);
-        
-    // don't allow image to get too big
-    int32_t gap = _showSettings->showAllPixelGap;
-    if ((visibleWidth > maxZoom * (_showSettings->imageBoundsX + gap) * numTexturesX) ||
-        (visibleHeight > maxZoom * (_showSettings->imageBoundsY + gap) * numTexturesY)) {
-        _zoomGesture.magnification = _validMagnification;
-        return;
-    }
-    
-    // don't allow image to get too small
-    int32_t minPixelSize = 4;
-    if ((visibleWidth < std::min((int32_t)_showSettings->imageBoundsX, minPixelSize)) ||
-        (visibleHeight < std::min((int32_t)_showSettings->imageBoundsY, minPixelSize))) {
-        _zoomGesture.magnification = _validMagnification;
-        return;
-    }
-    
-    if (!NSIntersectsRect(imageRect, viewRect)) {
-        _zoomGesture.magnification = _validMagnification;
-        return;
-    }
-    
-    
-    if (_showSettings->zoom != zoom) {
-        // DONE: zoom also changes the pan to zoom about the cursor, otherwise zoom feels wrong.
-        // now adjust the pan so that cursor text stays locked under (zoom to cursor)
-        float2 newPan;
-        [self doZoomMath:zoom newPan:newPan];
-        
-        // store this
-        _validMagnification = _zoomGesture.magnification;
-        
-        _showSettings->zoom = zoom;
-        
-        _showSettings->panX = newPan.x;
-        _showSettings->panY = newPan.y;
-        
-        if (doPrintPanZoom) {
-            string text;
-            sprintf(text,
-                "Pan %.3f,%.3f\n"
-                "Zoom %.2fx\n",
-                _showSettings->panX, _showSettings->panY,
-                _showSettings->zoom);
-            [self setHudText:text.c_str()];
-        }
-        
-        [self updateEyedropper];
-        self.needsDisplay = YES;
+- (void)handleGesture:(NSGestureRecognizer *)gestureRecognizer {
+  // https://cocoaosxrevisited.wordpress.com/2018/01/06/chapter-18-mouse-events/
+  if (gestureRecognizer != _zoomGesture) {
+    return;
+  }
+
+  bool isFirstGesture = _zoomGesture.state == NSGestureRecognizerStateBegan;
+
+  static float _originalZoom = 1.0f;
+
+  float zoom = _zoomGesture.magnification;
+  if (isFirstGesture) {
+    _zoomGesture.magnification = 1.0f;
+    zoom = _showSettings->zoom;
+  } else if (zoom * _originalZoom < 0.1f) {
+    // can go negative otherwise
+    zoom = 0.1f / _originalZoom;
+    _zoomGesture.magnification = zoom;
+  }
+
+  static float _validMagnification;
+
+  //-------------------------------------
+
+  // https://developer.apple.com/documentation/uikit/touches_presses_and_gestures/handling_uikit_gestures/handling_pinch_gestures?language=objc
+  // need to sync up the zoom when action begins or zoom will jump
+  if (isFirstGesture) {
+    _validMagnification = 1.0f;
+    _originalZoom = zoom;
+  } else {
+    // try expontental (this causes a jump, comparison avoids an initial jump
+    // zoom = powf(zoom, 1.05f);
+
+    // doing multiply instead of equals here, also does exponential zom
+    zoom *= _originalZoom;
+  }
+
+  // https://stackoverflow.com/questions/30002361/image-zoom-centered-on-mouse-position
+
+  // find the cursor location with respect to the image
+  float4 bottomLeftCorner = float4m(-0.5, -0.5f, 0.0f, 1.0f);
+  float4 topRightCorner = float4m(0.5, 0.5f, 0.0f, 1.0f);
+
+  Renderer *renderer = (Renderer *)self.delegate;
+  float4x4 newMatrix = [renderer computeImageTransform:_showSettings->panX
+                                                  panY:_showSettings->panY
+                                                  zoom:zoom];
+
+  // don't allow panning the entire image off the view boundary
+  // transform the upper left and bottom right corner of the image
+  float4 pt0 = newMatrix * bottomLeftCorner;
+  float4 pt1 = newMatrix * topRightCorner;
+
+  // for perspective
+  // pt0 /= pt0.w;
+  // pt1 /= pt1.w;
+
+  // see that rectangle intersects the view, view is -1 to 1
+  // this handles inversion
+  float2 ptOrigin = simd::min(pt0.xy, pt1.xy);
+  float2 ptSize = abs(pt0.xy - pt1.xy);
+
+  CGRect imageRect = CGRectMake(ptOrigin.x, ptOrigin.y, ptSize.x, ptSize.y);
+  CGRect viewRect = CGRectMake(-1.0f, -1.0f, 2.0f, 2.0f);
+
+  int32_t numTexturesX = _showSettings->totalChunks();
+  int32_t numTexturesY = _showSettings->mipCount;
+
+  if (_showSettings->isShowingAllLevelsAndMips) {
+    imageRect.origin.y -= (numTexturesY - 1) * imageRect.size.height;
+
+    imageRect.size.width *= numTexturesX;
+    imageRect.size.height *= numTexturesY;
+  }
+
+  float visibleWidth = imageRect.size.width * _showSettings->viewSizeX /
+                       _showSettings->viewContentScaleFactor;
+  float visibleHeight = imageRect.size.height * _showSettings->viewSizeY /
+                        _showSettings->viewContentScaleFactor;
+
+  // don't allow image to get too big
+  // take into account zoomFit, or need to limit zoomFit and have smaller images
+  // be smaller on screen
+  float maxZoom = std::max(128.0f, _showSettings->zoomFit);
+
+  // don't allow image to get too big
+  int32_t gap = _showSettings->showAllPixelGap;
+  if ((visibleWidth >
+       maxZoom * (_showSettings->imageBoundsX + gap) * numTexturesX) ||
+      (visibleHeight >
+       maxZoom * (_showSettings->imageBoundsY + gap) * numTexturesY)) {
+    _zoomGesture.magnification = _validMagnification;
+    return;
+  }
+
+  // don't allow image to get too small
+  int32_t minPixelSize = 4;
+  if ((visibleWidth <
+       std::min((int32_t)_showSettings->imageBoundsX, minPixelSize)) ||
+      (visibleHeight <
+       std::min((int32_t)_showSettings->imageBoundsY, minPixelSize))) {
+    _zoomGesture.magnification = _validMagnification;
+    return;
+  }
+
+  if (!NSIntersectsRect(imageRect, viewRect)) {
+    _zoomGesture.magnification = _validMagnification;
+    return;
+  }
+
+  if (_showSettings->zoom != zoom) {
+    // DONE: zoom also changes the pan to zoom about the cursor, otherwise zoom
+    // feels wrong. now adjust the pan so that cursor text stays locked under
+    // (zoom to cursor)
+    float2 newPan;
+    [self doZoomMath:zoom newPan:newPan];
+
+    // store this
+    _validMagnification = _zoomGesture.magnification;
+
+    _showSettings->zoom = zoom;
+
+    _showSettings->panX = newPan.x;
+    _showSettings->panY = newPan.y;
+
+    if (doPrintPanZoom) {
+      string text;
+      sprintf(text,
+              "Pan %.3f,%.3f\n"
+              "Zoom %.2fx\n",
+              _showSettings->panX, _showSettings->panY, _showSettings->zoom);
+      [self setHudText:text.c_str()];
     }
+
+    [self updateEyedropper];
+    self.needsDisplay = YES;
+  }
 }
 
+- (void)mouseMoved:(NSEvent *)event {
+  // pixel in non-square window coords, run thorugh inverse to get texel space
+  // I think magnofication of zoom gesture is affecting coordinates reported by
+  // this
 
-- (void)mouseMoved:(NSEvent*)event
-{
-    // pixel in non-square window coords, run thorugh inverse to get texel space
-    // I think magnofication of zoom gesture is affecting coordinates reported by this
-    
-    NSPoint point = [self convertPoint:[event locationInWindow] fromView:nil];
-    
-    // this needs to change if view is resized, but will likely receive mouseMoved events
-    _showSettings->cursorX = (int32_t)point.x;
-    _showSettings->cursorY = (int32_t)point.y;
-    
-    // should really do this in draw call, since moved messeage come in so quickly
-    [self updateEyedropper];
+  NSPoint point = [self convertPoint:[event locationInWindow] fromView:nil];
+
+  // this needs to change if view is resized, but will likely receive mouseMoved
+  // events
+  _showSettings->cursorX = (int32_t)point.x;
+  _showSettings->cursorY = (int32_t)point.y;
+
+  // should really do this in draw call, since moved messeage come in so quickly
+  [self updateEyedropper];
 }
 
-inline float4 toPremul(const float4& c)
-{
-    // premul with a
-    float4 cpremul = c;
-    float a = c.a;
-    cpremul.w = 1.0f;
-    cpremul *= a;
-    return cpremul;
+inline float4 toPremul(const float4 &c) {
+  // premul with a
+  float4 cpremul = c;
+  float a = c.a;
+  cpremul.w = 1.0f;
+  cpremul *= a;
+  return cpremul;
 }
 
 // Writing out to rgba32 for sampling, but unorm formats like ASTC and RGBA8
 // are still off and need to use the following.
-float toSnorm8(float c)
-{
-    return (255.0 / 127.0) * c - (128 / 127.0);
-}
+float toSnorm8(float c) { return (255.0 / 127.0) * c - (128 / 127.0); }
 
-float2 toSnorm8(float2 c)
-{
-    return (255.0 / 127.0) * c - (128 / 127.0);
-}
+float2 toSnorm8(float2 c) { return (255.0 / 127.0) * c - (128 / 127.0); }
 
-float3 toSnorm8(float3 c)
-{
-    return (255.0 / 127.0) * c - (128 / 127.0);
-}
-float4 toSnorm8(float4 c)
-{
-    return (255.0 / 127.0) * c - (128 / 127.0);
-}
+float3 toSnorm8(float3 c) { return (255.0 / 127.0) * c - (128 / 127.0); }
+float4 toSnorm8(float4 c) { return (255.0 / 127.0) * c - (128 / 127.0); }
 
-float4 toSnorm(float4 c)
-{
-    return 2.0f * c - 1.0f;
-}
+float4 toSnorm(float4 c) { return 2.0f * c - 1.0f; }
 
+- (void)updateEyedropper {
+  if ((!_showSettings->isHudShown)) {
+    return;
+  }
+
+  if (_showSettings->imageBoundsX == 0) {
+    // TODO: this return will leave old hud text up
+    return;
+  }
+
+  // don't wait on renderer to update this matrix
+  Renderer *renderer = (Renderer *)self.delegate;
+
+  if (_showSettings->isEyedropperFromDrawable()) {
+    // this only needs the cursor location, but can't supply uv to
+    // displayPixelData
+
+    if (_showSettings->lastCursorX != _showSettings->cursorX ||
+        _showSettings->lastCursorY != _showSettings->cursorY) {
+      // TODO: this means pan/zoom doesn't update data, may want to track some
+      // absolute location in virtal canvas.
+
+      _showSettings->lastCursorX = _showSettings->cursorX;
+      _showSettings->lastCursorY = _showSettings->cursorY;
+
+      // This just samples from drawable, so no re-render is needed
+      [self showEyedropperData:float2m(0, 0)];
+
+      // TODO: remove this, but only way to get drawSamples to execute right
+      // now, but then entire texture re-renders and that's not power efficient.
+      // Really just want to sample from the already rendered texture since
+      // content isn't animated.
+
+      self.needsDisplay = YES;
+    }
+
+    return;
+  }
+
+  float4x4 projectionViewModelMatrix =
+      [renderer computeImageTransform:_showSettings->panX
+                                 panY:_showSettings->panY
+                                 zoom:_showSettings->zoom];
+
+  // convert to clip space, or else need to apply additional viewport transform
+  float halfX = _showSettings->viewSizeX * 0.5f;
+  float halfY = _showSettings->viewSizeY * 0.5f;
+
+  // sometimes get viewSizeX that's scaled by retina, and other times not.
+  // account for contentScaleFactor (viewSizeX is 2x bigger than cursorX on
+  // retina display) now passing down drawableSize instead of view.bounds.size
+  halfX /= (float)_showSettings->viewContentScaleFactor;
+  halfY /= (float)_showSettings->viewContentScaleFactor;
+
+  NSPoint point = NSMakePoint(_showSettings->cursorX, _showSettings->cursorY);
+  NSPoint clipPoint;
+  clipPoint.x = (point.x - halfX) / halfX;
+  clipPoint.y = -(point.y - halfY) / halfY;
+
+  // convert point in window to point in texture
+  float4x4 mInv = simd_inverse(projectionViewModelMatrix);
+  mInv.columns[3].w =
+      1.0f; // fixes inverse, calls always leaves m[3][3] = 0.999
+
+  float4 pixel = mInv * float4m(clipPoint.x, clipPoint.y, 1.0f, 1.0f);
+  // pixel /= pixel.w; // in case perspective used
+
+  // that's in model space (+/0.5f, +/0.5f), so convert to texture space
+  pixel.y *= -1.0f;
+
+  pixel.x += 0.5f;
+  pixel.y += 0.5f;
+
+  pixel.x *= 0.999f;
+  pixel.y *= 0.999f;
+
+  float2 uv = pixel.xy;
+
+  // pixels are 0 based
+  pixel.x *= _showSettings->imageBoundsX;
+  pixel.y *= _showSettings->imageBoundsY;
+
+  // TODO: finish this logic, need to account for gaps too, and then isolate to
+  // a given level and mip to sample
+  //    if (_showSettings->isShowingAllLevelsAndMips) {
+  //        pixel.x *= _showSettings->totalChunks();
+  //        pixel.y *= _showSettings->mipCount;
+  //    }
+
+  // TODO: clearing out the last px visited makes it hard to gather data
+  // put value on clipboard, or allow click to lock the displayed pixel and
+  // value. Might just change header to px(last): ...
+  string text;
+
+  // only display pixel if over image
+  if (pixel.x < 0.0f || pixel.x >= (float)_showSettings->imageBoundsX) {
+    sprintf(text, "canvas: %d %d\n", (int32_t)pixel.x, (int32_t)pixel.y);
+    [self setEyedropperText:text.c_str()]; // ick
+    return;
+  }
+  if (pixel.y < 0.0f || pixel.y >= (float)_showSettings->imageBoundsY) {
+
+    // was blanking out, but was going blank on color_grid-a when over zoomed in
+    // image maybe not enough precision with float.
+    sprintf(text, "canvas: %d %d\n", (int32_t)pixel.x, (int32_t)pixel.y);
+    [self setEyedropperText:text.c_str()];
+    return;
+  }
 
+  // Note: fromView: nil returns isFlipped coordinate, fromView:self flips it
+  // back.
 
-- (void)updateEyedropper {
-    if ((!_showSettings->isHudShown)) {
-        return;
-    }
-    
-    if (_showSettings->imageBoundsX == 0) {
-        // TODO: this return will leave old hud text up
-        return;
-    }
-    
-    // don't wait on renderer to update this matrix
-    Renderer* renderer = (Renderer*)self.delegate;
-    
-    if (_showSettings->isEyedropperFromDrawable()) {
-        // this only needs the cursor location, but can't supply uv to displayPixelData
-        
-        if (_showSettings->lastCursorX != _showSettings->cursorX ||
-            _showSettings->lastCursorY != _showSettings->cursorY)
-        {
-            // TODO: this means pan/zoom doesn't update data, may want to track some absolute
-            // location in virtal canvas.
-            
-            _showSettings->lastCursorX = _showSettings->cursorX;
-            _showSettings->lastCursorY = _showSettings->cursorY;
-            
-            // This just samples from drawable, so no re-render is needed
-            [self showEyedropperData:float2m(0,0)];
-            
-            // TODO: remove this, but only way to get drawSamples to execute right now, but then
-            // entire texture re-renders and that's not power efficient.  Really just want to sample
-            // from the already rendered texture since content isn't animated.
-            
-            self.needsDisplay = YES;
-        }
-        
-        return;
-    }
-        
-    float4x4 projectionViewModelMatrix = [renderer computeImageTransform:_showSettings->panX panY:_showSettings->panY zoom:_showSettings->zoom];
-
-    // convert to clip space, or else need to apply additional viewport transform
-    float halfX = _showSettings->viewSizeX * 0.5f;
-    float halfY = _showSettings->viewSizeY * 0.5f;
-    
-    // sometimes get viewSizeX that's scaled by retina, and other times not.
-    // account for contentScaleFactor (viewSizeX is 2x bigger than cursorX on retina display)
-    // now passing down drawableSize instead of view.bounds.size
-    halfX /= (float)_showSettings->viewContentScaleFactor;
-    halfY /= (float)_showSettings->viewContentScaleFactor;
-    
-    NSPoint point = NSMakePoint(_showSettings->cursorX, _showSettings->cursorY);
-    NSPoint clipPoint;
-    clipPoint.x = (point.x - halfX) / halfX;
-    clipPoint.y = -(point.y - halfY) / halfY;
-    
-    // convert point in window to point in texture
-    float4x4 mInv = simd_inverse(projectionViewModelMatrix);
-    mInv.columns[3].w = 1.0f; // fixes inverse, calls always leaves m[3][3] = 0.999
-    
-    float4 pixel = mInv * float4m(clipPoint.x, clipPoint.y, 1.0f, 1.0f);
-    //pixel /= pixel.w; // in case perspective used
-    
-    // that's in model space (+/0.5f, +/0.5f), so convert to texture space
-    pixel.y *= -1.0f;
-    
-    pixel.x += 0.5f;
-    pixel.y += 0.5f;
-    
-    pixel.x *= 0.999f;
-    pixel.y *= 0.999f;
-    
-    float2 uv = pixel.xy;
-    
-    // pixels are 0 based
-    pixel.x *= _showSettings->imageBoundsX;
-    pixel.y *= _showSettings->imageBoundsY;
-    
-// TODO: finish this logic, need to account for gaps too, and then isolate to a given level and mip to sample
-//    if (_showSettings->isShowingAllLevelsAndMips) {
-//        pixel.x *= _showSettings->totalChunks();
-//        pixel.y *= _showSettings->mipCount;
-//    }
-    
-    // TODO: clearing out the last px visited makes it hard to gather data
-    // put value on clipboard, or allow click to lock the displayed pixel and value.
-    // Might just change header to px(last): ...
-    string text;
-    
-    // only display pixel if over image
-    if (pixel.x < 0.0f || pixel.x >= (float)_showSettings->imageBoundsX) {
-        sprintf(text, "canvas: %d %d\n", (int32_t)pixel.x, (int32_t)pixel.y);
-        [self setEyedropperText:text.c_str()]; // ick
-        return;
-    }
-    if (pixel.y < 0.0f || pixel.y >= (float)_showSettings->imageBoundsY) {
-        
-        // was blanking out, but was going blank on color_grid-a when over zoomed in image
-        // maybe not enough precision with float.
-        sprintf(text, "canvas: %d %d\n", (int32_t)pixel.x, (int32_t)pixel.y);
-        [self setEyedropperText:text.c_str()];
-        return;
-    }
+  int32_t newX = (int32_t)pixel.x;
+  int32_t newY = (int32_t)pixel.y;
 
-    
-    // Note: fromView: nil returns isFlipped coordinate, fromView:self flips it back.
-
-    int32_t newX = (int32_t)pixel.x;
-    int32_t newY = (int32_t)pixel.y;
-    
-    if (_showSettings->textureLookupX != newX ||
-        _showSettings->textureLookupY != newY)
-    {
-        // Note: this only samples from the original texture via compute shaders
-        // so preview mode pixel colors are not conveyed.  But can see underlying data driving preview.
-        
-        // %.0f rounds the value, but want truncation
-        _showSettings->textureLookupX = newX;
-        _showSettings->textureLookupY = newY;
-
-        [self showEyedropperData:uv];
-        
-        // TODO: remove this, but only way to get drawSamples to execute right now, but then
-        // entire texture re-renders and that's not power efficient.
-        self.needsDisplay = YES;
+  if (_showSettings->textureLookupX != newX ||
+      _showSettings->textureLookupY != newY) {
+    // Note: this only samples from the original texture via compute shaders
+    // so preview mode pixel colors are not conveyed.  But can see underlying
+    // data driving preview.
 
-    }
+    // %.0f rounds the value, but want truncation
+    _showSettings->textureLookupX = newX;
+    _showSettings->textureLookupY = newY;
+
+    [self showEyedropperData:uv];
+
+    // TODO: remove this, but only way to get drawSamples to execute right now,
+    // but then entire texture re-renders and that's not power efficient.
+    self.needsDisplay = YES;
+  }
 }
 
 - (void)showEyedropperData:(float2)uv {
-    string text;
-    string tmp;
-    
-    float4 c = _showSettings->textureResult;
-    
-    // DONE: use these to format the text
-    MyMTLPixelFormat format = _showSettings->originalFormat;
-    bool isSrgb = isSrgbFormat(format);
-    bool isSigned = isSignedFormat(format);
-    
-    bool isHdr = isHdrFormat(format);
-    bool isFloat = isHdr;
-    
-    int32_t numChannels = _showSettings->numChannels;
-    
-    bool isNormal = _showSettings->isNormal;
-    bool isColor = !isNormal;
-    
-    bool isDirection = false;
-    bool isValue = false;
-    
-    if (_showSettings->isEyedropperFromDrawable()) {
-        
-        // TODO: could write barycentric, then lookup uv from that
-        // then could show the block info.
-        
-        // interpret based on shapeChannel, debugMode, etc
-        switch(_showSettings->shapeChannel) {
-            case ShapeChannelDepth:
-                isSigned = false; // using fract on uv
-                
-                isValue = true;
-                isFloat = true;
-                numChannels = 1;
-                break;
-            case ShapeChannelUV0:
-                isSigned = false; // using fract on uv
-                
-                isValue = true;
-                isFloat = true;
-                numChannels = 2; // TODO: fix for 3d uvw
-                break;
-                
-            case ShapeChannelFaceNormal:
-            case ShapeChannelNormal:
-            case ShapeChannelTangent:
-            case ShapeChannelBitangent:
-                isDirection = true;
-                numChannels = 3;
-            
-                // convert unorm to snnorm
-                c = toSnorm(c);
-                break;
-                
-            case ShapeChannelMipLevel:
-                isValue = true;
-                isSigned = false;
-                isFloat = true;
-                
-                // viz is mipNumber as alpha
-                numChannels = 1;
-                c.r = 4.0 - (c.a * 4.0);
-                break;
-                
-            default:
-                break;
-        }
-        
-        // debug mode
-        
-        // preview vs. not
-        
-        
-    }
-    else {
-    
-        // this will be out of sync with gpu eval, so may want to only display px from returned lookup
-        // this will always be a linear color
-        
-        int32_t x = _showSettings->textureResultX;
-        int32_t y = _showSettings->textureResultY;
-        
-        // show uv, so can relate to gpu coordinates stored in geometry and find atlas areas
-        append_sprintf(text, "uv:%0.3f %0.3f\n",
-            (float)x / _showSettings->imageBoundsX,
-            (float)y / _showSettings->imageBoundsY
-        );
-        
-        // pixel at top-level mip
-        append_sprintf(text, "px:%d %d\n", x, y);
-        
-        // show block num
-        int mipLOD = _showSettings->mipNumber;
-        
-        int mipX = _showSettings->imageBoundsX;
-        int mipY = _showSettings->imageBoundsY;
-        
-        mipX = mipX >> mipLOD;
-        mipY = mipY >> mipLOD;
-        
-        mipX = std::max(1, mipX);
-        mipY = std::max(1, mipY);
-        
-        mipX = (int32_t)(uv.x * mipX);
-        mipY = (int32_t)(uv.y * mipY);
-        
-        _showSettings->textureLookupMipX = mipX;
-        _showSettings->textureLookupMipY = mipY;
-        
-        // TODO: may want to return mip in pixel readback
-        // don't have it right now, so don't display if preview is enabled
-        if (_showSettings->isPreview)
-            mipLOD = 0;
-        
-        auto blockDims = blockDimsOfFormat(format);
-        if (blockDims.x > 1)
-            append_sprintf(text, "bpx: %d %d\n", mipX / blockDims.x, mipY / blockDims.y);
-        
-        // TODO: on astc if we have original blocks can run analysis from astc-encoder
-        // about each block.
-        
-        // show the mip pixel (only if not preview and mip changed)
-        if (mipLOD > 0 && !_showSettings->isPreview)
-            append_sprintf(text, "mpx: %d %d\n", mipX, mipY);
-        
-        // TODO: more criteria here, can have 2 channel PBR metal-roughness
-        // also have 4 channel normals where zw store other data.
-        
-        bool isDecodeSigned = isSignedFormat(_showSettings->decodedFormat);
-        if (isSigned && !isDecodeSigned) {
-            c = toSnorm8(c);
-        }
+  string text;
+  string tmp;
+
+  float4 c = _showSettings->textureResult;
+
+  // DONE: use these to format the text
+  MyMTLPixelFormat format = _showSettings->originalFormat;
+  bool isSrgb = isSrgbFormat(format);
+  bool isSigned = isSignedFormat(format);
+
+  bool isHdr = isHdrFormat(format);
+  bool isFloat = isHdr;
+
+  int32_t numChannels = _showSettings->numChannels;
+
+  bool isNormal = _showSettings->isNormal;
+  bool isColor = !isNormal;
+
+  bool isDirection = false;
+  bool isValue = false;
+
+  if (_showSettings->isEyedropperFromDrawable()) {
+
+    // TODO: could write barycentric, then lookup uv from that
+    // then could show the block info.
+
+    // interpret based on shapeChannel, debugMode, etc
+    switch (_showSettings->shapeChannel) {
+    case ShapeChannelDepth:
+      isSigned = false; // using fract on uv
+
+      isValue = true;
+      isFloat = true;
+      numChannels = 1;
+      break;
+    case ShapeChannelUV0:
+      isSigned = false; // using fract on uv
+
+      isValue = true;
+      isFloat = true;
+      numChannels = 2; // TODO: fix for 3d uvw
+      break;
+
+    case ShapeChannelFaceNormal:
+    case ShapeChannelNormal:
+    case ShapeChannelTangent:
+    case ShapeChannelBitangent:
+      isDirection = true;
+      numChannels = 3;
+
+      // convert unorm to snnorm
+      c = toSnorm(c);
+      break;
+
+    case ShapeChannelMipLevel:
+      isValue = true;
+      isSigned = false;
+      isFloat = true;
+
+      // viz is mipNumber as alpha
+      numChannels = 1;
+      c.r = 4.0 - (c.a * 4.0);
+      break;
+
+    default:
+      break;
     }
 
-    if (isValue) {
-        printChannels(tmp, "val: ", c, numChannels, isFloat, isSigned);
-        text += tmp;
+    // debug mode
+
+    // preview vs. not
+
+  } else {
+
+    // this will be out of sync with gpu eval, so may want to only display px
+    // from returned lookup this will always be a linear color
+
+    int32_t x = _showSettings->textureResultX;
+    int32_t y = _showSettings->textureResultY;
+
+    // show uv, so can relate to gpu coordinates stored in geometry and find
+    // atlas areas
+    append_sprintf(text, "uv:%0.3f %0.3f\n",
+                   (float)x / _showSettings->imageBoundsX,
+                   (float)y / _showSettings->imageBoundsY);
+
+    // pixel at top-level mip
+    append_sprintf(text, "px:%d %d\n", x, y);
+
+    // show block num
+    int mipLOD = _showSettings->mipNumber;
+
+    int mipX = _showSettings->imageBoundsX;
+    int mipY = _showSettings->imageBoundsY;
+
+    mipX = mipX >> mipLOD;
+    mipY = mipY >> mipLOD;
+
+    mipX = std::max(1, mipX);
+    mipY = std::max(1, mipY);
+
+    mipX = (int32_t)(uv.x * mipX);
+    mipY = (int32_t)(uv.y * mipY);
+
+    _showSettings->textureLookupMipX = mipX;
+    _showSettings->textureLookupMipY = mipY;
+
+    // TODO: may want to return mip in pixel readback
+    // don't have it right now, so don't display if preview is enabled
+    if (_showSettings->isPreview)
+      mipLOD = 0;
+
+    auto blockDims = blockDimsOfFormat(format);
+    if (blockDims.x > 1)
+      append_sprintf(text, "bpx: %d %d\n", mipX / blockDims.x,
+                     mipY / blockDims.y);
+
+    // TODO: on astc if we have original blocks can run analysis from
+    // astc-encoder about each block.
+
+    // show the mip pixel (only if not preview and mip changed)
+    if (mipLOD > 0 && !_showSettings->isPreview)
+      append_sprintf(text, "mpx: %d %d\n", mipX, mipY);
+
+    // TODO: more criteria here, can have 2 channel PBR metal-roughness
+    // also have 4 channel normals where zw store other data.
+
+    bool isDecodeSigned = isSignedFormat(_showSettings->decodedFormat);
+    if (isSigned && !isDecodeSigned) {
+      c = toSnorm8(c);
     }
-    else if (isDirection) {
-        // print direction
-        isFloat = true;
-        isSigned = true;
-        
-        printChannels(tmp, "dir: ", c, numChannels, isFloat, isSigned);
-        text += tmp;
+  }
+
+  if (isValue) {
+    printChannels(tmp, "val: ", c, numChannels, isFloat, isSigned);
+    text += tmp;
+  } else if (isDirection) {
+    // print direction
+    isFloat = true;
+    isSigned = true;
+
+    printChannels(tmp, "dir: ", c, numChannels, isFloat, isSigned);
+    text += tmp;
+  } else if (isNormal) {
+    float nx = c.x;
+    float ny = c.y;
+
+    // unorm -> snorm
+    if (!isSigned) {
+      nx = toSnorm8(nx);
+      ny = toSnorm8(ny);
     }
-    else if (isNormal) {
-        float nx = c.x;
-        float ny = c.y;
-        
-        // unorm -> snorm
-        if (!isSigned) {
-            nx = toSnorm8(nx);
-            ny = toSnorm8(ny);
-        }
-        
-        // Note: not clamping nx,ny to < 1 like in shader
-        
-        // this is always postive on tan-space normals
-        // assuming we're not viewing world normals
-        const float maxLen2 = 0.999 * 0.999;
-        float len2 = nx * nx + ny * ny;
-        if (len2 > maxLen2)
-            len2 = maxLen2;
-        
-        float nz = sqrt(1.0f - len2);
-        
-        // print the underlying color (some nmaps are xy in 4 channels)
-        printChannels(tmp, "lin: ", c, numChannels, isFloat, isSigned);
-        text += tmp;
-        
-        // print direction
-        float4 d = float4m(nx,ny,nz,0.0f);
-        isFloat = true;
-        isSigned = true;
-        printChannels(tmp, "dir: ", d, 3, isFloat, isSigned);
-        text += tmp;
+
+    // Note: not clamping nx,ny to < 1 like in shader
+
+    // this is always postive on tan-space normals
+    // assuming we're not viewing world normals
+    const float maxLen2 = 0.999 * 0.999;
+    float len2 = nx * nx + ny * ny;
+    if (len2 > maxLen2)
+      len2 = maxLen2;
+
+    float nz = sqrt(1.0f - len2);
+
+    // print the underlying color (some nmaps are xy in 4 channels)
+    printChannels(tmp, "lin: ", c, numChannels, isFloat, isSigned);
+    text += tmp;
+
+    // print direction
+    float4 d = float4m(nx, ny, nz, 0.0f);
+    isFloat = true;
+    isSigned = true;
+    printChannels(tmp, "dir: ", d, 3, isFloat, isSigned);
+    text += tmp;
+  } else if (isColor) {
+    // DONE: write some print helpers based on float4 and length
+    printChannels(tmp, "lin: ", c, numChannels, isFloat, isSigned);
+    text += tmp;
+
+    if (isSrgb) {
+      // this saturates the value, so don't use for extended srgb
+      float4 s = linearToSRGB(c);
+
+      printChannels(tmp, "srg: ", s, numChannels, isFloat, isSigned);
+      text += tmp;
     }
-    else if (isColor) {
-        // DONE: write some print helpers based on float4 and length
-        printChannels(tmp, "lin: ", c, numChannels, isFloat, isSigned);
+
+    // display the premul values too, but not fully transparent pixels
+    if (c.a > 0.0 && c.a < 1.0f) {
+      printChannels(tmp, "lnp: ", toPremul(c), numChannels, isFloat, isSigned);
+      text += tmp;
+
+      // TODO: do we need the premul srgb color too?
+      if (isSrgb) {
+        // this saturates the value, so don't use for extended srgb
+        float4 s = linearToSRGB(c);
+
+        printChannels(tmp, "srp: ", toPremul(s), numChannels, isFloat,
+                      isSigned);
         text += tmp;
-        
-        if (isSrgb) {
-            // this saturates the value, so don't use for extended srgb
-            float4 s = linearToSRGB(c);
-            
-            printChannels(tmp, "srg: ", s, numChannels, isFloat, isSigned);
-            text += tmp;
-        }
-        
-        // display the premul values too, but not fully transparent pixels
-        if (c.a > 0.0 && c.a < 1.0f)
-        {
-            printChannels(tmp, "lnp: ", toPremul(c), numChannels, isFloat, isSigned);
-            text += tmp;
-            
-            // TODO: do we need the premul srgb color too?
-            if (isSrgb) {
-                // this saturates the value, so don't use for extended srgb
-                float4 s = linearToSRGB(c);
-                
-                printChannels(tmp, "srp: ", toPremul(s), numChannels, isFloat, isSigned);
-                text += tmp;
-            }
-        }
+      }
     }
-    
-    [self setEyedropperText:text.c_str()];
+  }
 
-    // TODO: range display of pixels is useful, only showing pixels that fall
-    // within a given range, but would need slider then, and determine range of pixels.
-    // TODO: Auto-range is also useful for depth (ignore far plane of 0 or 1).
-    
-    // TOOD: display histogram from compute, bin into buffer counts of pixels
-    
-    // DONE: stop clobbering hud text, need another set of labels
-    // and a zoom preview of the pixels under the cursor.
-    // Otherwise, can't really see the underlying color.
-    
-    // TODO: Stuff these on clipboard with a click, or use cmd+C?
+  [self setEyedropperText:text.c_str()];
 
+  // TODO: range display of pixels is useful, only showing pixels that fall
+  // within a given range, but would need slider then, and determine range of
+  // pixels.
+  // TODO: Auto-range is also useful for depth (ignore far plane of 0 or 1).
 
+  // TOOD: display histogram from compute, bin into buffer counts of pixels
+
+  // DONE: stop clobbering hud text, need another set of labels
+  // and a zoom preview of the pixels under the cursor.
+  // Otherwise, can't really see the underlying color.
+
+  // TODO: Stuff these on clipboard with a click, or use cmd+C?
 }
 
-- (void)setEyedropperText:(const char*)text {
-    _textSlots[0] = text;
-    
-    [self updateHudText];
+- (void)setEyedropperText:(const char *)text {
+  _textSlots[0] = text;
+
+  [self updateHudText];
 }
 
-- (void)setHudText:(const char*)text {
-    _textSlots[1] = text;
-    
-    [self updateHudText];
+- (void)setHudText:(const char *)text {
+  _textSlots[1] = text;
+
+  [self updateHudText];
 }
 
 - (void)updateHudText {
-    // combine textSlots
-    string text = _textSlots[0] + _textSlots[1];
-    
-    NSString* textNS = [NSString stringWithUTF8String:text.c_str()];
-    _hudLabel2.stringValue = textNS;
-    _hudLabel2.needsDisplay = YES;
-    
-    _hudLabel.stringValue = textNS;
-    _hudLabel.needsDisplay = YES;
+  // combine textSlots
+  string text = _textSlots[0] + _textSlots[1];
+
+  NSString *textNS = [NSString stringWithUTF8String:text.c_str()];
+  _hudLabel2.stringValue = textNS;
+  _hudLabel2.needsDisplay = YES;
+
+  _hudLabel.stringValue = textNS;
+  _hudLabel.needsDisplay = YES;
 }
 
-- (void)scrollWheel:(NSEvent *)event
-{
-    double wheelX = [event scrollingDeltaX];
-    double wheelY = [event scrollingDeltaY];
-    
-//    if ([event hasPreciseScrollingDeltas])
-//    {
-//        //wheelX *= 0.01;
-//        //wheelY *= 0.01;
-//    }
-//    else {
-//        //wheelX *= 0.1;
-//        //wheelY *= 0.1;
-//    }
-
-    //---------------------------------------
-    
-    // pan
-    wheelY = -wheelY;
-    wheelX = -wheelX;
-
-    float panX = _showSettings->panX + wheelX;
-    float panY = _showSettings->panY + wheelY;
-    
-    Renderer* renderer = (Renderer*)self.delegate;
-    float4x4 projectionViewModelMatrix = [renderer computeImageTransform:panX panY:panY zoom:_showSettings->zoom];
-
-    // don't allow panning the entire image off the view boundary
-    // transform the upper left and bottom right corner or the image
-    
-    // what if zoom moves it outside?
-    
-    float4 pt0 = projectionViewModelMatrix * float4m(-0.5, -0.5f, 0.0f, 1.0f);
-    float4 pt1 = projectionViewModelMatrix * float4m(0.5, 0.5f, 0.0f, 1.0f);
-    
-    // for perspective
-    //pt0 /= pt0.w;
-    //pt1 /= pt1.w;
-    
-    float2 ptOrigin = simd::min(pt0.xy, pt1.xy);
-    float2 ptSize = abs(pt0.xy - pt1.xy);
-    
-    // see that rectangle intersects the view, view is -1 to 1
-    CGRect imageRect = CGRectMake(ptOrigin.x, ptOrigin.y, ptSize.x, ptSize.y);
-    CGRect viewRect = CGRectMake(-1.0f, -1.0f, 2.0f, 2.0f);
-   
-    int32_t numTexturesX = _showSettings->totalChunks();
-    int32_t numTexturesY = _showSettings->mipCount;
-    
-    if (_showSettings->isShowingAllLevelsAndMips) {
-        imageRect.origin.y -= (numTexturesY - 1 ) * imageRect.size.height;
-        
-        imageRect.size.width *= numTexturesX;
-        imageRect.size.height *= numTexturesY;
-    }
-    
-    if (!NSIntersectsRect(imageRect, viewRect)) {
-        return;
-    }
-    
-    if (_showSettings->panX != panX ||
-        _showSettings->panY != panY)
-    {
-        _showSettings->panX = panX;
-        _showSettings->panY = panY;
-        
-        if (doPrintPanZoom) {
-            string text;
-            sprintf(text,
-                "Pan %.3f,%.3f\n"
-                "Zoom %.2fx\n",
-                _showSettings->panX, _showSettings->panY,
-                _showSettings->zoom);
-            [self setHudText:text.c_str()];
-        }
-        
-        [self updateEyedropper];
-        self.needsDisplay = YES;
+- (void)scrollWheel:(NSEvent *)event {
+  double wheelX = [event scrollingDeltaX];
+  double wheelY = [event scrollingDeltaY];
+
+  //    if ([event hasPreciseScrollingDeltas])
+  //    {
+  //        //wheelX *= 0.01;
+  //        //wheelY *= 0.01;
+  //    }
+  //    else {
+  //        //wheelX *= 0.1;
+  //        //wheelY *= 0.1;
+  //    }
+
+  //---------------------------------------
+
+  // pan
+  wheelY = -wheelY;
+  wheelX = -wheelX;
+
+  float panX = _showSettings->panX + wheelX;
+  float panY = _showSettings->panY + wheelY;
+
+  Renderer *renderer = (Renderer *)self.delegate;
+  float4x4 projectionViewModelMatrix =
+      [renderer computeImageTransform:panX panY:panY zoom:_showSettings->zoom];
+
+  // don't allow panning the entire image off the view boundary
+  // transform the upper left and bottom right corner or the image
+
+  // what if zoom moves it outside?
+
+  float4 pt0 = projectionViewModelMatrix * float4m(-0.5, -0.5f, 0.0f, 1.0f);
+  float4 pt1 = projectionViewModelMatrix * float4m(0.5, 0.5f, 0.0f, 1.0f);
+
+  // for perspective
+  // pt0 /= pt0.w;
+  // pt1 /= pt1.w;
+
+  float2 ptOrigin = simd::min(pt0.xy, pt1.xy);
+  float2 ptSize = abs(pt0.xy - pt1.xy);
+
+  // see that rectangle intersects the view, view is -1 to 1
+  CGRect imageRect = CGRectMake(ptOrigin.x, ptOrigin.y, ptSize.x, ptSize.y);
+  CGRect viewRect = CGRectMake(-1.0f, -1.0f, 2.0f, 2.0f);
+
+  int32_t numTexturesX = _showSettings->totalChunks();
+  int32_t numTexturesY = _showSettings->mipCount;
+
+  if (_showSettings->isShowingAllLevelsAndMips) {
+    imageRect.origin.y -= (numTexturesY - 1) * imageRect.size.height;
+
+    imageRect.size.width *= numTexturesX;
+    imageRect.size.height *= numTexturesY;
+  }
+
+  if (!NSIntersectsRect(imageRect, viewRect)) {
+    return;
+  }
+
+  if (_showSettings->panX != panX || _showSettings->panY != panY) {
+    _showSettings->panX = panX;
+    _showSettings->panY = panY;
+
+    if (doPrintPanZoom) {
+      string text;
+      sprintf(text,
+              "Pan %.3f,%.3f\n"
+              "Zoom %.2fx\n",
+              _showSettings->panX, _showSettings->panY, _showSettings->zoom);
+      [self setHudText:text.c_str()];
     }
+
+    [self updateEyedropper];
+    self.needsDisplay = YES;
+  }
 }
 
-- (NSButton*)findButton:(const char*)name {
-    NSString* title = [NSString stringWithUTF8String:name];
-    for (NSButton* button in _buttonArray) {
-        if (button.title == title)
-            return button;
-    }
-    return nil;
+- (NSButton *)findButton:(const char *)name {
+  NSString *title = [NSString stringWithUTF8String:name];
+  for (NSButton *button in _buttonArray) {
+    if (button.title == title)
+      return button;
+  }
+  return nil;
 }
 
-- (NSMenuItem*)findMenuItem:(const char*)name {
-    NSString* title = [NSString stringWithUTF8String:name];
-    
-    for (NSMenuItem* menuItem in _viewMenu.itemArray) {
-        if (menuItem.toolTip == title)
-            return menuItem;
-    }
-    return nil;
+- (NSMenuItem *)findMenuItem:(const char *)name {
+  NSString *title = [NSString stringWithUTF8String:name];
+
+  for (NSMenuItem *menuItem in _viewMenu.itemArray) {
+    if (menuItem.toolTip == title)
+      return menuItem;
+  }
+  return nil;
 }
 
 // use this to enable/disable menus, buttons, etc.  Called on every event
 // when not implemented, then user items are always enabled
-- (BOOL)validateUserInterfaceItem:(id<NSValidatedUserInterfaceItem>)item
-{
-    // TODO: tie to menus and buttons states for enable/disable toggles
-    // https://developer.apple.com/library/archive/documentation/Cocoa/Conceptual/MenuList/Articles/EnablingMenuItems.html
-    
-    // MTKView is not doc based, so can't all super
-    //return [super validateUserInterfaceItem:anItem];
-    
-    return YES;
+- (BOOL)validateUserInterfaceItem:(id<NSValidatedUserInterfaceItem>)item {
+  // TODO: tie to menus and buttons states for enable/disable toggles
+  // https://developer.apple.com/library/archive/documentation/Cocoa/Conceptual/MenuList/Articles/EnablingMenuItems.html
+
+  // MTKView is not doc based, so can't all super
+  // return [super validateUserInterfaceItem:anItem];
+
+  return YES;
 }
 
 - (void)updateUIAfterLoad {
-    
-    // TODO: move these to actions, and test their state instead of looking up buttons
-    // here and in HandleKey.
-    
-    // base on showSettings, hide some fo the buttons
-    bool isShowAllHidden = _showSettings->totalChunks() <= 1 && _showSettings->mipCount <= 1;
-    
-    bool isArrayHidden = _showSettings->arrayCount <= 1;
-    bool isFaceSliceHidden = _showSettings->faceCount <= 1 && _showSettings->sliceCount <= 1;
-    bool isMipHidden = _showSettings->mipCount <= 1;
-    
-    bool isJumpToNextHidden = !(_showSettings->isArchive || _showSettings->isFolder);
-    
-    bool isRedHidden = false;
-    bool isGreenHidden = _showSettings->numChannels <= 1;
-    bool isBlueHidden  = _showSettings->numChannels <= 2 && !_showSettings->isNormal; // reconstruct z = b on normals
-    
-    // TODO: also need a hasAlpha for pixels, since many compressed formats like ASTC always have 4 channels
-    // but internally store R,RG01,... etc.  Can get more data from swizzle in the props.
-    // Often alpha doesn't store anything useful to view.
-    
-    // TODO: may want to disable isPremul on block textures that already have premul in data
-    // or else premul is applied a second time to the visual
-    
-    bool hasAlpha = _showSettings->numChannels >= 3;
-    
-    bool isAlphaHidden = !hasAlpha;
-    bool isPremulHidden = !hasAlpha;
-    bool isCheckerboardHidden = !hasAlpha;
-   
-    bool isSignedHidden = !isSignedFormat(_showSettings->originalFormat);
-    
-    // buttons
-    [self findButton:"Y"].hidden = isArrayHidden;
-    [self findButton:"F"].hidden = isFaceSliceHidden;
-    [self findButton:"M"].hidden = isMipHidden;
-    [self findButton:"S"].hidden = isShowAllHidden;
-    [self findButton:"J"].hidden = isJumpToNextHidden;
-    
-    [self findButton:"R"].hidden = isRedHidden;
-    [self findButton:"G"].hidden = isGreenHidden;
-    [self findButton:"B"].hidden = isBlueHidden;
-    [self findButton:"A"].hidden = isAlphaHidden;
-    
-    [self findButton:"P"].hidden = isPremulHidden;
-    [self findButton:"N"].hidden = isSignedHidden;
-    [self findButton:"C"].hidden = isCheckerboardHidden;
-    
-    // menus (may want to disable, not hide)
-    // problem is crashes since menu seems to strip hidden items
-    // enabled state has to be handled in validateUserInterfaceItem
-    [self findMenuItem:"Y"].hidden = isArrayHidden;
-    [self findMenuItem:"F"].hidden = isFaceSliceHidden;
-    [self findMenuItem:"M"].hidden = isMipHidden;
-    [self findMenuItem:"S"].hidden = isShowAllHidden;
-    [self findMenuItem:"J"].hidden = isJumpToNextHidden;
-    
-    [self findMenuItem:"R"].hidden = isRedHidden;
-    [self findMenuItem:"G"].hidden = isGreenHidden;
-    [self findMenuItem:"B"].hidden = isBlueHidden;
-    [self findMenuItem:"A"].hidden = isAlphaHidden;
-    
-    [self findMenuItem:"P"].hidden = isPremulHidden;
-    [self findMenuItem:"N"].hidden = isSignedHidden;
-    [self findMenuItem:"C"].hidden = isCheckerboardHidden;
-    
-    // also need to call after each toggle
-    [self updateUIControlState];
-}
 
-- (void)updateUIControlState
-{
-    // there is also mixed
-    auto On = NSControlStateValueOn;
-    auto Off = NSControlStateValueOff;
-    #define toState(x) (x) ? On : Off
-    
-    auto showAllState = toState(_showSettings->isShowingAllLevelsAndMips);
-    auto premulState =  toState(_showSettings->isPremul);
-    auto signedState =  toState(_showSettings->isSigned);
-    auto checkerboardState =  toState(_showSettings->isCheckerboardShown);
-    auto previewState = toState(_showSettings->isPreview);
-    auto gridState =  toState(_showSettings->isAnyGridShown());
-    auto wrapState = toState(_showSettings->isWrap);
-    auto debugState = toState(_showSettings->debugMode != DebugModeNone);
-    
-    TextureChannels& channels = _showSettings->channels;
-
-    auto redState = toState(channels == TextureChannels::ModeR001);
-    auto greenState = toState(channels == TextureChannels::Mode0G01);
-    auto blueState = toState(channels == TextureChannels::Mode00B1);
-    auto alphaState = toState(channels == TextureChannels::ModeAAA1);
-    
-    auto arrayState = toState(_showSettings->arrayNumber > 0);
-    auto faceState = toState(_showSettings->faceNumber > 0);
-    auto mipState = toState(_showSettings->mipNumber > 0);
-    
-    auto meshState = toState(_showSettings->meshNumber > 0);
-    auto meshChannelState = toState(_showSettings->shapeChannel > 0);
-    auto lightingState = toState(_showSettings->lightingMode != LightingModeDiffuse);
-    auto tangentState = toState(_showSettings->useTangent);
-    
-    // TODO: UI state, and vertical state
-    auto uiState = toState(_buttonStack.hidden);
-    
-    auto helpState = Off;
-    auto infoState = Off;
-    auto jumpState = Off;
-    
-    
-    // buttons
-    [self findButton:"?"].state = helpState;
-    [self findButton:"I"].state = infoState;
-   
-    [self findButton:"Y"].state = arrayState;
-    [self findButton:"F"].state = faceState;
-    [self findButton:"M"].state = mipState;
-
-    [self findButton:"J"].state = jumpState;
-    [self findButton:"U"].state = Off; // always off
-
-    [self findButton:"R"].state = redState;
-    [self findButton:"G"].state = greenState;
-    [self findButton:"B"].state = blueState;
-    [self findButton:"A"].state = alphaState;
-    
-    [self findButton:"S"].state = showAllState;
-    [self findButton:"O"].state = previewState;
-    [self findButton:"8"].state = meshState;
-    [self findButton:"6"].state = meshChannelState;
-    [self findButton:"5"].state = lightingState;
-    [self findButton:"W"].state = wrapState;
-    [self findButton:"D"].state = gridState;
-    [self findButton:"E"].state = debugState;
-    [self findButton:"T"].state = tangentState;
-    
-    [self findButton:"P"].state = premulState;
-    [self findButton:"N"].state = signedState;
-    [self findButton:"C"].state = checkerboardState;
-    
-    // menus (may want to disable, not hide)
-    // problem is crashes since menu seems to strip hidden items
-    // enabled state has to be handled in validateUserInterfaceItem
-    
-    // when menu state is selected, it may not uncheck when advancing through state
-    [self findMenuItem:"?"].state = helpState;
-    [self findMenuItem:"I"].state = infoState;
-   
-    [self findMenuItem:"Y"].state = arrayState;
-    [self findMenuItem:"F"].state = faceState;
-    [self findMenuItem:"M"].state = mipState;
-    [self findMenuItem:"J"].state = jumpState;
-    [self findMenuItem:"U"].state = uiState;
-
-    [self findMenuItem:"R"].state = redState;
-    [self findMenuItem:"G"].state = greenState;
-    [self findMenuItem:"B"].state = blueState;
-    [self findMenuItem:"A"].state = alphaState;
-   
-    [self findMenuItem:"S"].state = showAllState;
-    [self findMenuItem:"O"].state = previewState;
-    [self findMenuItem:"8"].state = meshState;
-    [self findMenuItem:"6"].state = meshChannelState;
-    [self findMenuItem:"5"].state = lightingState;
-    [self findMenuItem:"T"].state = tangentState;
-   
-    [self findMenuItem:"W"].state = wrapState;
-    [self findMenuItem:"D"].state = gridState;
-    [self findMenuItem:"E"].state = debugState;
-    
-    [self findMenuItem:"P"].state = premulState;
-    [self findMenuItem:"N"].state = signedState;
-    [self findMenuItem:"C"].state = checkerboardState;
+  // TODO: move these to actions, and test their state instead of looking up
+  // buttons here and in HandleKey.
+
+  // base on showSettings, hide some fo the buttons
+  bool isShowAllHidden =
+      _showSettings->totalChunks() <= 1 && _showSettings->mipCount <= 1;
+
+  bool isArrayHidden = _showSettings->arrayCount <= 1;
+  bool isFaceSliceHidden =
+      _showSettings->faceCount <= 1 && _showSettings->sliceCount <= 1;
+  bool isMipHidden = _showSettings->mipCount <= 1;
+
+  bool isJumpToNextHidden =
+      !(_showSettings->isArchive || _showSettings->isFolder);
+
+  bool isRedHidden = false;
+  bool isGreenHidden = _showSettings->numChannels <= 1;
+  bool isBlueHidden = _showSettings->numChannels <= 2 &&
+                      !_showSettings->isNormal; // reconstruct z = b on normals
+
+  // TODO: also need a hasAlpha for pixels, since many compressed formats like
+  // ASTC always have 4 channels but internally store R,RG01,... etc.  Can get
+  // more data from swizzle in the props. Often alpha doesn't store anything
+  // useful to view.
+
+  // TODO: may want to disable isPremul on block textures that already have
+  // premul in data or else premul is applied a second time to the visual
+
+  bool hasAlpha = _showSettings->numChannels >= 3;
+
+  bool isAlphaHidden = !hasAlpha;
+  bool isPremulHidden = !hasAlpha;
+  bool isCheckerboardHidden = !hasAlpha;
+
+  bool isSignedHidden = !isSignedFormat(_showSettings->originalFormat);
+
+  // buttons
+  [self findButton:"Y"].hidden = isArrayHidden;
+  [self findButton:"F"].hidden = isFaceSliceHidden;
+  [self findButton:"M"].hidden = isMipHidden;
+  [self findButton:"S"].hidden = isShowAllHidden;
+  [self findButton:"J"].hidden = isJumpToNextHidden;
+
+  [self findButton:"R"].hidden = isRedHidden;
+  [self findButton:"G"].hidden = isGreenHidden;
+  [self findButton:"B"].hidden = isBlueHidden;
+  [self findButton:"A"].hidden = isAlphaHidden;
+
+  [self findButton:"P"].hidden = isPremulHidden;
+  [self findButton:"N"].hidden = isSignedHidden;
+  [self findButton:"C"].hidden = isCheckerboardHidden;
+
+  // menus (may want to disable, not hide)
+  // problem is crashes since menu seems to strip hidden items
+  // enabled state has to be handled in validateUserInterfaceItem
+  [self findMenuItem:"Y"].hidden = isArrayHidden;
+  [self findMenuItem:"F"].hidden = isFaceSliceHidden;
+  [self findMenuItem:"M"].hidden = isMipHidden;
+  [self findMenuItem:"S"].hidden = isShowAllHidden;
+  [self findMenuItem:"J"].hidden = isJumpToNextHidden;
+
+  [self findMenuItem:"R"].hidden = isRedHidden;
+  [self findMenuItem:"G"].hidden = isGreenHidden;
+  [self findMenuItem:"B"].hidden = isBlueHidden;
+  [self findMenuItem:"A"].hidden = isAlphaHidden;
+
+  [self findMenuItem:"P"].hidden = isPremulHidden;
+  [self findMenuItem:"N"].hidden = isSignedHidden;
+  [self findMenuItem:"C"].hidden = isCheckerboardHidden;
+
+  // also need to call after each toggle
+  [self updateUIControlState];
 }
 
+- (void)updateUIControlState {
+  // there is also mixed
+  auto On = NSControlStateValueOn;
+  auto Off = NSControlStateValueOff;
+#define toState(x) (x) ? On : Off
+
+  auto showAllState = toState(_showSettings->isShowingAllLevelsAndMips);
+  auto premulState = toState(_showSettings->isPremul);
+  auto signedState = toState(_showSettings->isSigned);
+  auto checkerboardState = toState(_showSettings->isCheckerboardShown);
+  auto previewState = toState(_showSettings->isPreview);
+  auto gridState = toState(_showSettings->isAnyGridShown());
+  auto wrapState = toState(_showSettings->isWrap);
+  auto debugState = toState(_showSettings->debugMode != DebugModeNone);
+
+  TextureChannels &channels = _showSettings->channels;
+
+  auto redState = toState(channels == TextureChannels::ModeR001);
+  auto greenState = toState(channels == TextureChannels::Mode0G01);
+  auto blueState = toState(channels == TextureChannels::Mode00B1);
+  auto alphaState = toState(channels == TextureChannels::ModeAAA1);
+
+  auto arrayState = toState(_showSettings->arrayNumber > 0);
+  auto faceState = toState(_showSettings->faceNumber > 0);
+  auto mipState = toState(_showSettings->mipNumber > 0);
+
+  auto meshState = toState(_showSettings->meshNumber > 0);
+  auto meshChannelState = toState(_showSettings->shapeChannel > 0);
+  auto lightingState =
+      toState(_showSettings->lightingMode != LightingModeDiffuse);
+  auto tangentState = toState(_showSettings->useTangent);
+
+  // TODO: UI state, and vertical state
+  auto uiState = toState(_buttonStack.hidden);
+
+  auto helpState = Off;
+  auto infoState = Off;
+  auto jumpState = Off;
+
+  // buttons
+  [self findButton:"?"].state = helpState;
+  [self findButton:"I"].state = infoState;
+
+  [self findButton:"Y"].state = arrayState;
+  [self findButton:"F"].state = faceState;
+  [self findButton:"M"].state = mipState;
+
+  [self findButton:"J"].state = jumpState;
+  [self findButton:"U"].state = Off; // always off
+
+  [self findButton:"R"].state = redState;
+  [self findButton:"G"].state = greenState;
+  [self findButton:"B"].state = blueState;
+  [self findButton:"A"].state = alphaState;
+
+  [self findButton:"S"].state = showAllState;
+  [self findButton:"O"].state = previewState;
+  [self findButton:"8"].state = meshState;
+  [self findButton:"6"].state = meshChannelState;
+  [self findButton:"5"].state = lightingState;
+  [self findButton:"W"].state = wrapState;
+  [self findButton:"D"].state = gridState;
+  [self findButton:"E"].state = debugState;
+  [self findButton:"T"].state = tangentState;
+
+  [self findButton:"P"].state = premulState;
+  [self findButton:"N"].state = signedState;
+  [self findButton:"C"].state = checkerboardState;
+
+  // menus (may want to disable, not hide)
+  // problem is crashes since menu seems to strip hidden items
+  // enabled state has to be handled in validateUserInterfaceItem
+
+  // when menu state is selected, it may not uncheck when advancing through
+  // state
+  [self findMenuItem:"?"].state = helpState;
+  [self findMenuItem:"I"].state = infoState;
+
+  [self findMenuItem:"Y"].state = arrayState;
+  [self findMenuItem:"F"].state = faceState;
+  [self findMenuItem:"M"].state = mipState;
+  [self findMenuItem:"J"].state = jumpState;
+  [self findMenuItem:"U"].state = uiState;
+
+  [self findMenuItem:"R"].state = redState;
+  [self findMenuItem:"G"].state = greenState;
+  [self findMenuItem:"B"].state = blueState;
+  [self findMenuItem:"A"].state = alphaState;
+
+  [self findMenuItem:"S"].state = showAllState;
+  [self findMenuItem:"O"].state = previewState;
+  [self findMenuItem:"8"].state = meshState;
+  [self findMenuItem:"6"].state = meshChannelState;
+  [self findMenuItem:"5"].state = lightingState;
+  [self findMenuItem:"T"].state = tangentState;
+
+  [self findMenuItem:"W"].state = wrapState;
+  [self findMenuItem:"D"].state = gridState;
+  [self findMenuItem:"E"].state = debugState;
+
+  [self findMenuItem:"P"].state = premulState;
+  [self findMenuItem:"N"].state = signedState;
+  [self findMenuItem:"C"].state = checkerboardState;
+}
 
 // TODO: convert to C++ actions, and then call into Base holding all this
 // move pan/zoom logic too.  Then use that as start of Win32 kramv.
 
 - (IBAction)handleAction:(id)sender {
-    
-    NSEvent* theEvent = [NSApp currentEvent];
-    bool isShiftKeyDown = (theEvent.modifierFlags & NSEventModifierFlagShift);
-    
-    string title;
-    
-    // sender is the UI element/NSButton
-    if ([sender isKindOfClass:[NSButton class]]) {
-        NSButton* button = (NSButton*)sender;
-        title = button.title.UTF8String;
-    }
-    else if ([sender isKindOfClass:[NSMenuItem class]]) {
-        NSMenuItem* menuItem = (NSMenuItem*)sender;
-        title = menuItem.toolTip.UTF8String;
-    }
-    else {
-        KLOGE("kram", "unknown UI element");
-        return;
-    }
-    
-    int32_t keyCode = -1;
-    
-    if (title == "?")
-        keyCode = Key::Slash; // help
-    else if (title == "I")
-        keyCode = Key::I;
-    else if (title == "H")
-        keyCode = Key::H;
-    
-    else if (title == "S")
-        keyCode = Key::S;
-    else if (title == "O")
-        keyCode = Key::O;
-    else if (title == "W")
-        keyCode = Key::W;
-    else if (title == "P")
-        keyCode = Key::P;
-    else if (title == "N")
-        keyCode = Key::N;
-    
-    else if (title == "E")
-        keyCode = Key::E;
-    else if (title == "D")
-        keyCode = Key::D;
-    else if (title == "C")
-        keyCode = Key::C;
-    else if (title == "U")
-        keyCode = Key::U;
-    
-    else if (title == "M")
-        keyCode = Key::M;
-    else if (title == "F")
-        keyCode = Key::F;
-    else if (title == "Y")
-        keyCode = Key::Y;
-    else if (title == "J")
-        keyCode = Key::J;
-    
-    // reload/refit
-    else if (title == "L")
-        keyCode = Key::L;
-    else if (title == "0")
-        keyCode = Key::Num0;
-    
-    // mesh
-    else if (title == "8")
-        keyCode = Key::Num8;
-    else if (title == "6")
-        keyCode = Key::Num6;
-    else if (title == "5")
-        keyCode = Key::Num5;
-    else if (title == "T")
-        keyCode = Key::T;
-   
-    else if (title == "R")
-        keyCode = Key::R;
-    else if (title == "G")
-        keyCode = Key::G;
-    else if (title == "B")
-        keyCode = Key::B;
-    else if (title == "A")
-        keyCode = Key::A;
-    
-    if (keyCode >= 0)
-        [self handleKey:keyCode isShiftKeyDown:isShiftKeyDown];
+
+  NSEvent *theEvent = [NSApp currentEvent];
+  bool isShiftKeyDown = (theEvent.modifierFlags & NSEventModifierFlagShift);
+
+  string title;
+
+  // sender is the UI element/NSButton
+  if ([sender isKindOfClass:[NSButton class]]) {
+    NSButton *button = (NSButton *)sender;
+    title = button.title.UTF8String;
+  } else if ([sender isKindOfClass:[NSMenuItem class]]) {
+    NSMenuItem *menuItem = (NSMenuItem *)sender;
+    title = menuItem.toolTip.UTF8String;
+  } else {
+    KLOGE("kram", "unknown UI element");
+    return;
+  }
+
+  int32_t keyCode = -1;
+
+  if (title == "?")
+    keyCode = Key::Slash; // help
+  else if (title == "I")
+    keyCode = Key::I;
+  else if (title == "H")
+    keyCode = Key::H;
+
+  else if (title == "S")
+    keyCode = Key::S;
+  else if (title == "O")
+    keyCode = Key::O;
+  else if (title == "W")
+    keyCode = Key::W;
+  else if (title == "P")
+    keyCode = Key::P;
+  else if (title == "N")
+    keyCode = Key::N;
+
+  else if (title == "E")
+    keyCode = Key::E;
+  else if (title == "D")
+    keyCode = Key::D;
+  else if (title == "C")
+    keyCode = Key::C;
+  else if (title == "U")
+    keyCode = Key::U;
+
+  else if (title == "M")
+    keyCode = Key::M;
+  else if (title == "F")
+    keyCode = Key::F;
+  else if (title == "Y")
+    keyCode = Key::Y;
+  else if (title == "J")
+    keyCode = Key::J;
+
+  // reload/refit
+  else if (title == "L")
+    keyCode = Key::L;
+  else if (title == "0")
+    keyCode = Key::Num0;
+
+  // mesh
+  else if (title == "8")
+    keyCode = Key::Num8;
+  else if (title == "6")
+    keyCode = Key::Num6;
+  else if (title == "5")
+    keyCode = Key::Num5;
+  else if (title == "T")
+    keyCode = Key::T;
+
+  else if (title == "R")
+    keyCode = Key::R;
+  else if (title == "G")
+    keyCode = Key::G;
+  else if (title == "B")
+    keyCode = Key::B;
+  else if (title == "A")
+    keyCode = Key::A;
+
+  if (keyCode >= 0)
+    [self handleKey:keyCode isShiftKeyDown:isShiftKeyDown];
 }
 
-- (void)keyDown:(NSEvent *)theEvent
-{
-    bool isShiftKeyDown = theEvent.modifierFlags & NSEventModifierFlagShift;
-    uint32_t keyCode = theEvent.keyCode;
-
-    bool isHandled = [self handleKey:keyCode isShiftKeyDown:isShiftKeyDown];
-    if (!isHandled)
-    {
-        // this will bonk
-        [super keyDown:theEvent];
-    }
+- (void)keyDown:(NSEvent *)theEvent {
+  bool isShiftKeyDown = theEvent.modifierFlags & NSEventModifierFlagShift;
+  uint32_t keyCode = theEvent.keyCode;
+
+  bool isHandled = [self handleKey:keyCode isShiftKeyDown:isShiftKeyDown];
+  if (!isHandled) {
+    // this will bonk
+    [super keyDown:theEvent];
+  }
 }
 
-- (bool)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
-{
-    // Some data depends on the texture data (isSigned, isNormal, ..)
-    bool isChanged = false;
-    bool isStateChanged = false;
-    
-    // TODO: fix isChanged to only be set when value changes
-    // f.e. clamped values don't need to re-render
-    string text;
-    
-    switch(keyCode) {
-        case Key::V: {
-            bool isVertical = _buttonStack.orientation == NSUserInterfaceLayoutOrientationVertical;
-            isVertical = !isVertical;
-            
-            _buttonStack.orientation = isVertical ? NSUserInterfaceLayoutOrientationVertical : NSUserInterfaceLayoutOrientationHorizontal;
-            text = isVertical ? "Vert UI" : "Horiz UI";
-            
-            // just to update toggle state to Off
-            isStateChanged = true;
-            break;
-        }
-        case Key::U:
-            // this means no image loaded yet
-            if (_noImageLoaded) {
-                return true;
-            }
-            
-            _buttonStack.hidden = !_buttonStack.hidden;
-            text = _buttonStack.hidden ? "Hide UI" : "Show UI";
-            
-            // just to update toggle state to Off
-            isStateChanged = true;
-            break;
-            
-        // rgba channels
-        case Key::Num1:
-        case Key::R:
-            if (![self findButton:"R"].isHidden) {
-                TextureChannels& channels = _showSettings->channels;
-
-                if (channels == TextureChannels::ModeR001) {
-                    channels = TextureChannels::ModeRGBA;
-                    text = "Mask RGBA";
-                }
-                else {
-                    channels = TextureChannels::ModeR001;
-                    text = "Mask R001";
-                }
-                isChanged = true;
-            }
-    
-            break;
-            
-        case Key::Num2:
-        case Key::G:
-            if (![self findButton:"G"].isHidden) {
-                TextureChannels& channels = _showSettings->channels;
-
-                if (channels == TextureChannels::Mode0G01) {
-                    channels = TextureChannels::ModeRGBA;
-                    text = "Mask RGBA";
-                }
-                else {
-                    channels = TextureChannels::Mode0G01;
-                    text = "Mask 0G01";
-                }
-                isChanged = true;
-            }
-            break;
-            
-        case Key::Num3:
-        case Key::B:
-            if (![self findButton:"B"].isHidden) {
-                TextureChannels& channels = _showSettings->channels;
-
-                if (channels == TextureChannels::Mode00B1) {
-                    channels = TextureChannels::ModeRGBA;
-                    text = "Mask RGBA";
-                }
-                else {
-                    channels = TextureChannels::Mode00B1;
-                    text = "Mask 00B1";
-                }
-
-                isChanged = true;
-            }
-            break;
-            
-        case Key::Num4:
-        case Key::A:
-            if (![self findButton:"A"].isHidden) {
-                TextureChannels& channels = _showSettings->channels;
-
-                if (channels == TextureChannels::ModeAAA1) {
-                    channels = TextureChannels::ModeRGBA;
-                    text = "Mask RGBA";
-                }
-                else {
-                    channels = TextureChannels::ModeAAA1;
-                    text = "Mask AAA1";
-                }
-
-                isChanged = true;
-            }
-            break;
-            
-        case Key::Num6: {
-            _showSettings->advanceShapeChannel(isShiftKeyDown);
-            text = _showSettings->shapeChannelText();
-            isChanged = true;
-            break;
-        }
-        case Key::Num5: {
-            _showSettings->advanceLightingMode(isShiftKeyDown);
-            text = _showSettings->lightingModeText();
-            isChanged = true;
-            break;
-        }
-        case Key::T: {
-            _showSettings->useTangent = !_showSettings->useTangent;
-            if (_showSettings->useTangent)
-                text = "Vertex Tangents";
-            else
-                text = "Fragment Tangents";
-            isChanged = true;
-            break;
-        }
-        case Key::E: {
-            _showSettings->advanceDebugMode(isShiftKeyDown);
-            text = _showSettings->debugModeText();
-            isChanged = true;
-            break;
+- (bool)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown {
+  // Some data depends on the texture data (isSigned, isNormal, ..)
+  bool isChanged = false;
+  bool isStateChanged = false;
+
+  // TODO: fix isChanged to only be set when value changes
+  // f.e. clamped values don't need to re-render
+  string text;
+
+  switch (keyCode) {
+  case Key::V: {
+    bool isVertical =
+        _buttonStack.orientation == NSUserInterfaceLayoutOrientationVertical;
+    isVertical = !isVertical;
+
+    _buttonStack.orientation = isVertical
+                                   ? NSUserInterfaceLayoutOrientationVertical
+                                   : NSUserInterfaceLayoutOrientationHorizontal;
+    text = isVertical ? "Vert UI" : "Horiz UI";
+
+    // just to update toggle state to Off
+    isStateChanged = true;
+    break;
+  }
+  case Key::U:
+    // this means no image loaded yet
+    if (_noImageLoaded) {
+      return true;
+    }
+
+    _buttonStack.hidden = !_buttonStack.hidden;
+    text = _buttonStack.hidden ? "Hide UI" : "Show UI";
+
+    // just to update toggle state to Off
+    isStateChanged = true;
+    break;
+
+  // rgba channels
+  case Key::Num1:
+  case Key::R:
+    if (![self findButton:"R"].isHidden) {
+      TextureChannels &channels = _showSettings->channels;
+
+      if (channels == TextureChannels::ModeR001) {
+        channels = TextureChannels::ModeRGBA;
+        text = "Mask RGBA";
+      } else {
+        channels = TextureChannels::ModeR001;
+        text = "Mask R001";
+      }
+      isChanged = true;
+    }
+
+    break;
+
+  case Key::Num2:
+  case Key::G:
+    if (![self findButton:"G"].isHidden) {
+      TextureChannels &channels = _showSettings->channels;
+
+      if (channels == TextureChannels::Mode0G01) {
+        channels = TextureChannels::ModeRGBA;
+        text = "Mask RGBA";
+      } else {
+        channels = TextureChannels::Mode0G01;
+        text = "Mask 0G01";
+      }
+      isChanged = true;
+    }
+    break;
+
+  case Key::Num3:
+  case Key::B:
+    if (![self findButton:"B"].isHidden) {
+      TextureChannels &channels = _showSettings->channels;
+
+      if (channels == TextureChannels::Mode00B1) {
+        channels = TextureChannels::ModeRGBA;
+        text = "Mask RGBA";
+      } else {
+        channels = TextureChannels::Mode00B1;
+        text = "Mask 00B1";
+      }
+
+      isChanged = true;
+    }
+    break;
+
+  case Key::Num4:
+  case Key::A:
+    if (![self findButton:"A"].isHidden) {
+      TextureChannels &channels = _showSettings->channels;
+
+      if (channels == TextureChannels::ModeAAA1) {
+        channels = TextureChannels::ModeRGBA;
+        text = "Mask RGBA";
+      } else {
+        channels = TextureChannels::ModeAAA1;
+        text = "Mask AAA1";
+      }
+
+      isChanged = true;
+    }
+    break;
+
+  case Key::Num6: {
+    _showSettings->advanceShapeChannel(isShiftKeyDown);
+    text = _showSettings->shapeChannelText();
+    isChanged = true;
+    break;
+  }
+  case Key::Num5: {
+    _showSettings->advanceLightingMode(isShiftKeyDown);
+    text = _showSettings->lightingModeText();
+    isChanged = true;
+    break;
+  }
+  case Key::T: {
+    _showSettings->useTangent = !_showSettings->useTangent;
+    if (_showSettings->useTangent)
+      text = "Vertex Tangents";
+    else
+      text = "Fragment Tangents";
+    isChanged = true;
+    break;
+  }
+  case Key::E: {
+    _showSettings->advanceDebugMode(isShiftKeyDown);
+    text = _showSettings->debugModeText();
+    isChanged = true;
+    break;
+  }
+  case Key::Slash: // has ? mark above it
+    // display the chars for now
+    text = "⇧RGBA, O-preview, ⇧E-debug, Show all\n"
+           "Hud, ⇧L-reload, ⇧0-fit\n"
+           "Checker, ⇧D-block/px grid, Info\n"
+           "W-wrap, Premul, N-signed\n"
+           "⇧Mip, ⇧Face, ⇧Y-array/slice\n"
+           "⇧J-next bundle image\n";
+
+    // just to update toggle state to Off
+    isStateChanged = true;
+    break;
+
+  case Key::Num0: { // scale and reset pan
+    float zoom;
+    // fit image or mip
+    if (isShiftKeyDown) {
+      zoom = 1.0f;
+    } else {
+      // fit to topmost image
+      zoom = _showSettings->zoomFit;
+    }
+
+    // This zoom needs to be checked against zoom limits
+    // there's a cap on the zoom multiplier.
+    // This is reducing zoom which expands the image.
+    zoom *= 1.0f / (1 << _showSettings->mipNumber);
+
+    // even if zoom same, still do this since it resets the pan
+    _showSettings->zoom = zoom;
+
+    _showSettings->panX = 0.0f;
+    _showSettings->panY = 0.0f;
+
+    text = "Scale Image\n";
+    if (doPrintPanZoom) {
+      string tmp;
+      sprintf(tmp,
+              "Pan %.3f,%.3f\n"
+              "Zoom %.2fx\n",
+              _showSettings->panX, _showSettings->panY, _showSettings->zoom);
+      text += tmp;
+    }
+
+    isChanged = true;
+
+    break;
+  }
+  // reload key (also a quick way to reset the settings)
+  case Key::L:
+    [self loadTextureFromURL:self.imageURL];
+
+    // reload at actual size
+    if (isShiftKeyDown) {
+      _showSettings->zoom = 1.0f;
+    }
+
+    text = "Reload Image";
+    if (doPrintPanZoom) {
+      string tmp;
+      sprintf(tmp,
+              "Pan %.3f,%.3f\n"
+              "Zoom %.2fx\n",
+              _showSettings->panX, _showSettings->panY, _showSettings->zoom);
+      text += tmp;
+    }
+
+    isChanged = true;
+    break;
+
+  // P already used for premul
+  case Key::O:
+    _showSettings->isPreview = !_showSettings->isPreview;
+    isChanged = true;
+    text = "Preview ";
+    text += _showSettings->isPreview ? "On" : "Off";
+    break;
+
+  // TODO: might switch c to channel cycle, so could just hit that
+  // and depending on the content, it cycles through reasonable channel masks
+
+  // toggle checkerboard for transparency
+  case Key::C:
+    if (![self findButton:"C"].isHidden) {
+      _showSettings->isCheckerboardShown = !_showSettings->isCheckerboardShown;
+      isChanged = true;
+      text = "Checker ";
+      text += _showSettings->isCheckerboardShown ? "On" : "Off";
+    }
+    break;
+
+  // toggle pixel grid when magnified above 1 pixel, can happen from mipmap
+  // changes too
+  case Key::D: {
+    static int grid = 0;
+    static const int kNumGrids = 7;
+
+#define advanceGrid(g, dec)                                                    \
+  grid = (grid + kNumGrids + (dec ? -1 : 1)) % kNumGrids
+
+    // TODO: display how many blocks there are
+
+    // if block size is 1, then this shouldn't toggle
+    _showSettings->isBlockGridShown = false;
+    _showSettings->isAtlasGridShown = false;
+    _showSettings->isPixelGridShown = false;
+
+    advanceGrid(grid, isShiftKeyDown);
+
+    if (grid == 2 && _showSettings->blockX == 1) {
+      // skip it
+      advanceGrid(grid, isShiftKeyDown);
+    }
+
+    static const uint32_t gridSizes[kNumGrids] = {
+        0, 1, 2, 32, 64, 128, 256 // atlas sizes
+    };
+
+    if (grid == 0) {
+      sprintf(text, "Grid Off");
+    } else if (grid == 1) {
+      _showSettings->isPixelGridShown = true;
+
+      sprintf(text, "Pixel Grid 1x1 On");
+    } else if (grid == 2) {
+      _showSettings->isBlockGridShown = true;
+
+      sprintf(text, "Block Grid %dx%d On", _showSettings->blockX,
+              _showSettings->blockY);
+    } else {
+      _showSettings->isAtlasGridShown = true;
+
+      // want to be able to show altases tht have long entries derived from
+      // props but right now just a square grid atlas
+      _showSettings->gridSizeX = _showSettings->gridSizeY = gridSizes[grid];
+
+      sprintf(text, "Atlas Grid %dx%d On", _showSettings->gridSizeX,
+              _showSettings->gridSizeY);
+    }
+
+    isChanged = true;
+
+    break;
+  }
+  case Key::S:
+    if (![self findButton:"S"].isHidden) {
+
+      // TODO: have drawAllMips, drawAllLevels, drawAllLevelsAndMips
+      _showSettings->isShowingAllLevelsAndMips =
+          !_showSettings->isShowingAllLevelsAndMips;
+      isChanged = true;
+      text = "Show All ";
+      text += _showSettings->isShowingAllLevelsAndMips ? "On" : "Off";
+    }
+    break;
+
+  // toggle hud that shows name and pixel value under the cursor
+  // this may require calling setNeedsDisplay on the UILabel as cursor moves
+  case Key::H:
+    _showSettings->isHudShown = !_showSettings->isHudShown;
+    _hudLabel.hidden = !_showSettings->isHudShown;
+    _hudLabel2.hidden = !_showSettings->isHudShown;
+    // isChanged = true;
+    text = "Hud ";
+    text += _showSettings->isHudShown ? "On" : "Off";
+    break;
+
+  // info on the texture, could request info from lib, but would want to cache
+  // that info
+  case Key::I:
+    if (_showSettings->isHudShown) {
+      sprintf(text, "%s",
+              isShiftKeyDown ? _showSettings->imageInfoVerbose.c_str()
+                             : _showSettings->imageInfo.c_str());
+    }
+    // just to update toggle state to Off
+    isStateChanged = true;
+    break;
+
+  // toggle wrap/clamp
+  case Key::W:
+    // TODO: cycle through all possible modes (clamp, repeat, mirror-once,
+    // mirror-repeat, ...)
+    _showSettings->isWrap = !_showSettings->isWrap;
+    isChanged = true;
+    text = "Wrap ";
+    text += _showSettings->isWrap ? "On" : "Off";
+    break;
+
+  // toggle signed vs. unsigned
+  case Key::N:
+    if (![self findButton:"N"].isHidden) {
+      _showSettings->isSigned = !_showSettings->isSigned;
+      isChanged = true;
+      text = "Signed ";
+      text += _showSettings->isSigned ? "On" : "Off";
+    }
+    break;
+
+  // toggle premul alpha vs. unmul
+  case Key::P:
+    if (![self findButton:"P"].isHidden) {
+      _showSettings->isPremul = !_showSettings->isPremul;
+      isChanged = true;
+      text = "Premul ";
+      text += _showSettings->isPremul ? "On" : "Off";
+    }
+    break;
+
+  case Key::J:
+    if (![self findButton:"J"].isHidden) {
+      if (_showSettings->isArchive) {
+        if ([self advanceTextureFromAchive:!isShiftKeyDown]) {
+          isChanged = true;
+          text = "Loaded " + _showSettings->lastFilename;
         }
-        case Key::Slash: // has ? mark above it
-            // display the chars for now
-            text = "⇧RGBA, O-preview, ⇧E-debug, Show all\n"
-                   "Hud, ⇧L-reload, ⇧0-fit\n"
-                   "Checker, ⇧D-block/px grid, Info\n"
-                   "W-wrap, Premul, N-signed\n"
-                   "⇧Mip, ⇧Face, ⇧Y-array/slice\n"
-                   "⇧J-next bundle image\n";
-            
-            // just to update toggle state to Off
-            isStateChanged = true;
-            break;
-            
-        case Key::Num0: { // scale and reset pan
-            float zoom;
-            // fit image or mip
-            if (isShiftKeyDown) {
-                zoom = 1.0f;
-            }
-            else {
-                // fit to topmost image
-                zoom = _showSettings->zoomFit;
-            }
-            
-            // This zoom needs to be checked against zoom limits
-            // there's a cap on the zoom multiplier.
-            // This is reducing zoom which expands the image.
-            zoom *= 1.0f / (1 << _showSettings->mipNumber);
-            
-            // even if zoom same, still do this since it resets the pan
-            _showSettings->zoom = zoom;
-            
-            _showSettings->panX = 0.0f;
-            _showSettings->panY = 0.0f;
-            
-            text = "Scale Image\n";
-            if (doPrintPanZoom) {
-                string tmp;
-                sprintf(tmp,
-                        "Pan %.3f,%.3f\n"
-                        "Zoom %.2fx\n",
-                    _showSettings->panX, _showSettings->panY,
-                    _showSettings->zoom);
-                text += tmp;
-            }
-            
-            isChanged = true;
-            
-            break;
+      } else if (_showSettings->isFolder) {
+        if ([self advanceTextureFromFolder:!isShiftKeyDown]) {
+          isChanged = true;
+          text = "Loaded " + _showSettings->lastFilename;
         }
-        // reload key (also a quick way to reset the settings)
-        case Key::L:
-            [self loadTextureFromURL:self.imageURL];
-            
-            // reload at actual size
-            if (isShiftKeyDown) {
-                _showSettings->zoom = 1.0f;
-            }
-            
-            text = "Reload Image";
-            if (doPrintPanZoom) {
-                string tmp;
-                sprintf(tmp,
-                        "Pan %.3f,%.3f\n"
-                        "Zoom %.2fx\n",
-                    _showSettings->panX, _showSettings->panY,
-                    _showSettings->zoom);
-                text += tmp;
-            }
-            
-            isChanged = true;
-            break;
-            
-        // P already used for premul
-        case Key::O:
-            _showSettings->isPreview = !_showSettings->isPreview;
-            isChanged = true;
-            text = "Preview ";
-            text += _showSettings->isPreview ? "On" : "Off";
-            break;
-            
-        // TODO: might switch c to channel cycle, so could just hit that
-        // and depending on the content, it cycles through reasonable channel masks
-            
-        // toggle checkerboard for transparency
-        case Key::C:
-            if (![self findButton:"C"].isHidden) {
-                _showSettings->isCheckerboardShown = !_showSettings->isCheckerboardShown;
-                isChanged = true;
-                text = "Checker ";
-                text += _showSettings->isCheckerboardShown ? "On" : "Off";
-            }
-            break;
-        
-        // toggle pixel grid when magnified above 1 pixel, can happen from mipmap changes too
-        case Key::D: {
-            static int grid = 0;
-            static const int kNumGrids = 7;
-            
-            #define advanceGrid(g, dec) \
-                grid = (grid + kNumGrids + (dec ? -1 : 1)) % kNumGrids
-            
-            // TODO: display how many blocks there are
-                
-            // if block size is 1, then this shouldn't toggle
-            _showSettings->isBlockGridShown = false;
-            _showSettings->isAtlasGridShown = false;
-            _showSettings->isPixelGridShown = false;
-
-            advanceGrid(grid, isShiftKeyDown);
-
-            if (grid == 2 && _showSettings->blockX == 1) {
-                // skip it
-                advanceGrid(grid, isShiftKeyDown);
-            }
-
-            static const uint32_t gridSizes[kNumGrids] = {
-                0, 1, 2,
-                32, 64, 128, 256 // atlas sizes
-            };
-
-            if (grid == 0) {
-                sprintf(text, "Grid Off");
-            }
-            else if (grid == 1) {
-                _showSettings->isPixelGridShown = true;
-
-                sprintf(text, "Pixel Grid 1x1 On");
-            }
-            else if (grid == 2) {
-                _showSettings->isBlockGridShown = true;
-
-                sprintf(text, "Block Grid %dx%d On",
-                        _showSettings->blockX, _showSettings->blockY);
-            }
-            else {
-                _showSettings->isAtlasGridShown = true;
-
-                // want to be able to show altases tht have long entries derived from props
-                // but right now just a square grid atlas
-                _showSettings->gridSizeX =
-                _showSettings->gridSizeY = gridSizes[grid];
-                
-                sprintf(text, "Atlas Grid %dx%d On",
-                        _showSettings->gridSizeX, _showSettings->gridSizeY);
-            }
-            
-            isChanged = true;
-            
-            break;
-        }
-        case Key::S:
-            if (![self findButton:"S"].isHidden) {
-            
-                // TODO: have drawAllMips, drawAllLevels, drawAllLevelsAndMips
-                _showSettings->isShowingAllLevelsAndMips = !_showSettings->isShowingAllLevelsAndMips;
-                isChanged = true;
-                text = "Show All ";
-                text += _showSettings->isShowingAllLevelsAndMips ? "On" : "Off";
-            }
-            break;
-            
-        // toggle hud that shows name and pixel value under the cursor
-        // this may require calling setNeedsDisplay on the UILabel as cursor moves
-        case Key::H:
-            _showSettings->isHudShown = !_showSettings->isHudShown;
-            _hudLabel.hidden = !_showSettings->isHudShown;
-            _hudLabel2.hidden = !_showSettings->isHudShown;
-            //isChanged = true;
-            text = "Hud ";
-            text += _showSettings->isHudShown ? "On" : "Off";
-            break;
-            
-        // info on the texture, could request info from lib, but would want to cache that info
-        case Key::I:
-            if (_showSettings->isHudShown) {
-                sprintf(text, "%s", isShiftKeyDown ? _showSettings->imageInfoVerbose.c_str() : _showSettings->imageInfo.c_str());
-            }
-            // just to update toggle state to Off
-            isStateChanged = true;
-            break;
-        
-        // toggle wrap/clamp
-        case Key::W:
-            // TODO: cycle through all possible modes (clamp, repeat, mirror-once, mirror-repeat, ...)
-            _showSettings->isWrap = !_showSettings->isWrap;
-            isChanged = true;
-            text = "Wrap ";
-            text += _showSettings->isWrap ? "On" : "Off";
-            break;
-            
-        // toggle signed vs. unsigned
-        case Key::N:
-            if (![self findButton:"N"].isHidden) {
-                _showSettings->isSigned = !_showSettings->isSigned;
-                isChanged = true;
-                text = "Signed ";
-                text += _showSettings->isSigned ? "On" : "Off";
-            }
-            break;
-            
-        // toggle premul alpha vs. unmul
-        case Key::P:
-            if (![self findButton:"P"].isHidden) {
-                _showSettings->isPremul = !_showSettings->isPremul;
-                isChanged = true;
-                text = "Premul ";
-                text += _showSettings->isPremul ? "On" : "Off";
-            }
-            break;
-            
-        case Key::J:
-            if (![self findButton:"J"].isHidden) {
-                if (_showSettings->isArchive) {
-                    if ([self advanceTextureFromAchive:!isShiftKeyDown]) {
-                        isChanged = true;
-                        text = "Loaded " + _showSettings->lastFilename;
-                    }
-                }
-                else if (_showSettings->isFolder) {
-                    if ([self advanceTextureFromFolder:!isShiftKeyDown]) {
-                        isChanged = true;
-                        text = "Loaded " + _showSettings->lastFilename;
-                    }
-                }
-            }
-            break;
-            
-        // test out different shapes, not offiical support yet
-        case Key::Num8:
-            if (_showSettings->meshCount > 1) {
-                _showSettings->advanceMeshNumber(isShiftKeyDown);
-                text = _showSettings->meshNumberText();
-                isChanged = true;
-            }
-            break;
-            
-        // TODO: should probably have these wrap and not clamp to count limits
-            
-        // mip up/down
-        case Key::M:
-            if (_showSettings->mipCount > 1) {
-                if (isShiftKeyDown) {
-                    _showSettings->mipNumber = MAX(_showSettings->mipNumber - 1, 0);
-                }
-                else {
-                    _showSettings->mipNumber = MIN(_showSettings->mipNumber + 1, _showSettings->mipCount - 1);
-                }
-                sprintf(text, "Mip %d/%d", _showSettings->mipNumber, _showSettings->mipCount);
-                isChanged = true;
-            }
-            break;
-           
-        case Key::F:
-            // cube or cube array, but hit s to pick cubearray
-            if (_showSettings->faceCount > 1) {
-                if (isShiftKeyDown) {
-                    _showSettings->faceNumber = MAX(_showSettings->faceNumber - 1, 0);
-                }
-                else {
-                    _showSettings->faceNumber = MIN(_showSettings->faceNumber + 1, _showSettings->faceCount - 1);
-                }
-                sprintf(text, "Face %d/%d", _showSettings->faceNumber, _showSettings->faceCount);
-                isChanged = true;
-            }
-            break;
-            
-        
-        case Key::Y:
-            // slice
-            if (_showSettings->sliceCount > 1) {
-                if (isShiftKeyDown) {
-                    _showSettings->sliceNumber = MAX(_showSettings->sliceNumber - 1, 0);
-                }
-                else {
-                    _showSettings->sliceNumber = MIN(_showSettings->sliceNumber + 1, _showSettings->sliceCount - 1);
-                }
-                sprintf(text, "Slice %d/%d", _showSettings->sliceNumber, _showSettings->sliceCount);
-                isChanged = true;
-            }
-            // array
-            else if (_showSettings->arrayCount > 1) {
-                if (isShiftKeyDown) {
-                    _showSettings->arrayNumber = MAX(_showSettings->arrayNumber - 1, 0);
-                }
-                else {
-                    _showSettings->arrayNumber = MIN(_showSettings->arrayNumber + 1, _showSettings->arrayCount - 1);
-                }
-                sprintf(text, "Array %d/%d", _showSettings->arrayNumber, _showSettings->arrayCount);
-                isChanged = true;
-            }
-            break;
-        default:
-            // non-handled key
-            return false;
-    }
-    
-    if (!text.empty()) {
-        [self setHudText:text.c_str()];
-    }
-    
-    if (isChanged || isStateChanged) {
-        [self updateUIControlState];
-    }
-    
-    if (isChanged) {
-        self.needsDisplay = YES;
-    }
-    return true;
-}
-
+      }
+    }
+    break;
+
+  // test out different shapes, not offiical support yet
+  case Key::Num8:
+    if (_showSettings->meshCount > 1) {
+      _showSettings->advanceMeshNumber(isShiftKeyDown);
+      text = _showSettings->meshNumberText();
+      isChanged = true;
+    }
+    break;
+
+  // TODO: should probably have these wrap and not clamp to count limits
+
+  // mip up/down
+  case Key::M:
+    if (_showSettings->mipCount > 1) {
+      if (isShiftKeyDown) {
+        _showSettings->mipNumber = MAX(_showSettings->mipNumber - 1, 0);
+      } else {
+        _showSettings->mipNumber =
+            MIN(_showSettings->mipNumber + 1, _showSettings->mipCount - 1);
+      }
+      sprintf(text, "Mip %d/%d", _showSettings->mipNumber,
+              _showSettings->mipCount);
+      isChanged = true;
+    }
+    break;
+
+  case Key::F:
+    // cube or cube array, but hit s to pick cubearray
+    if (_showSettings->faceCount > 1) {
+      if (isShiftKeyDown) {
+        _showSettings->faceNumber = MAX(_showSettings->faceNumber - 1, 0);
+      } else {
+        _showSettings->faceNumber =
+            MIN(_showSettings->faceNumber + 1, _showSettings->faceCount - 1);
+      }
+      sprintf(text, "Face %d/%d", _showSettings->faceNumber,
+              _showSettings->faceCount);
+      isChanged = true;
+    }
+    break;
+
+  case Key::Y:
+    // slice
+    if (_showSettings->sliceCount > 1) {
+      if (isShiftKeyDown) {
+        _showSettings->sliceNumber = MAX(_showSettings->sliceNumber - 1, 0);
+      } else {
+        _showSettings->sliceNumber =
+            MIN(_showSettings->sliceNumber + 1, _showSettings->sliceCount - 1);
+      }
+      sprintf(text, "Slice %d/%d", _showSettings->sliceNumber,
+              _showSettings->sliceCount);
+      isChanged = true;
+    }
+    // array
+    else if (_showSettings->arrayCount > 1) {
+      if (isShiftKeyDown) {
+        _showSettings->arrayNumber = MAX(_showSettings->arrayNumber - 1, 0);
+      } else {
+        _showSettings->arrayNumber =
+            MIN(_showSettings->arrayNumber + 1, _showSettings->arrayCount - 1);
+      }
+      sprintf(text, "Array %d/%d", _showSettings->arrayNumber,
+              _showSettings->arrayCount);
+      isChanged = true;
+    }
+    break;
+  default:
+    // non-handled key
+    return false;
+  }
+
+  if (!text.empty()) {
+    [self setHudText:text.c_str()];
+  }
+
+  if (isChanged || isStateChanged) {
+    [self updateUIControlState];
+  }
 
+  if (isChanged) {
+    self.needsDisplay = YES;
+  }
+  return true;
+}
 
-// Note: docs state that drag&drop should be handled automatically by UTI setup via openURLs
-// but I find these calls are needed, or it doesn't work.  Maybe need to register for NSRUL
-// instead of NSPasteboardTypeFileURL.  For example, in canReadObjectForClasses had to use NSURL.
+// Note: docs state that drag&drop should be handled automatically by UTI setup
+// via openURLs but I find these calls are needed, or it doesn't work.  Maybe
+// need to register for NSRUL instead of NSPasteboardTypeFileURL.  For example,
+// in canReadObjectForClasses had to use NSURL.
 
 // drag and drop support
 - (NSDragOperation)draggingEntered:(id)sender {
-   if ((NSDragOperationGeneric & [sender draggingSourceOperationMask]) == NSDragOperationGeneric) {
-       NSPasteboard *pasteboard = [sender draggingPasteboard];
-       
-       bool canReadPasteboardObjects = [pasteboard canReadObjectForClasses: @[[NSURL class]] options: nil];
-       
-       // don't copy dropped item, want to alias large files on disk without that
-       if (canReadPasteboardObjects) {
-           return NSDragOperationGeneric;
-       }
-   }
-
-   // not a drag we can use
-   return NSDragOperationNone;
+  if ((NSDragOperationGeneric & [sender draggingSourceOperationMask]) ==
+      NSDragOperationGeneric) {
+    NSPasteboard *pasteboard = [sender draggingPasteboard];
+
+    bool canReadPasteboardObjects =
+        [pasteboard canReadObjectForClasses:@[ [NSURL class] ] options:nil];
+
+    // don't copy dropped item, want to alias large files on disk without that
+    if (canReadPasteboardObjects) {
+      return NSDragOperationGeneric;
+    }
+  }
+
+  // not a drag we can use
+  return NSDragOperationNone;
 }
 
 - (BOOL)prepareForDragOperation:(id)sender {
-   return YES;
+  return YES;
 }
 
-
 - (BOOL)performDragOperation:(id)sender {
-    NSPasteboard *pasteboard = [sender draggingPasteboard];
+  NSPasteboard *pasteboard = [sender draggingPasteboard];
 
-    NSString *desiredType =
-                [pasteboard availableTypeFromArray:pasteboardTypes];
-
-    if ([desiredType isEqualToString:NSPasteboardTypeFileURL]) {
-        // TODO: use readObjects to drag multiple files onto one view
-        // load one mip of all those, use smaller mips for thumbnail
-        
-        // the pasteboard contains a list of filenames
-        NSString *urlString = [pasteboard propertyListForType:NSPasteboardTypeFileURL];
-        
-        // this turns it into a real path (supposedly works even with sandbox)
-        NSURL * url = [NSURL URLWithString:urlString];
-        
-        // convert the original path and then back to a url, otherwise reload fails
-        // when this file is replaced.
-        const char* filename = url.fileSystemRepresentation;
-        if (filename == nullptr)
-        {
-            KLOGE("kramv", "Fix this drop url returning nil issue");
-            return NO;
-        }
-        
-        NSString* filenameString = [NSString stringWithUTF8String:filename];
-        
-        url = [NSURL fileURLWithPath:filenameString];
-        
-        if ([self loadTextureFromURL:url]) {
-            [self setHudText:""];
-    
-            return YES;
-        }
-   }
+  NSString *desiredType = [pasteboard availableTypeFromArray:pasteboardTypes];
 
-    return NO;
-}
+  if ([desiredType isEqualToString:NSPasteboardTypeFileURL]) {
+    // TODO: use readObjects to drag multiple files onto one view
+    // load one mip of all those, use smaller mips for thumbnail
 
+    // the pasteboard contains a list of filenames
+    NSString *urlString =
+        [pasteboard propertyListForType:NSPasteboardTypeFileURL];
 
--(BOOL)loadArchive:(const char*)zipFilename
-{
-    _zipMmap.close();
-    if (!_zipMmap.open(zipFilename)) {
-        return NO;
-    }
-   
-    // Note: if mmap fails, could read entire zip into memory
-    // and then still use the same code below.
+    // this turns it into a real path (supposedly works even with sandbox)
+    NSURL *url = [NSURL URLWithString:urlString];
 
-    if (!_zip.openForRead(_zipMmap.data(), _zipMmap.dataLength())) {
-        return NO;
+    // convert the original path and then back to a url, otherwise reload fails
+    // when this file is replaced.
+    const char *filename = url.fileSystemRepresentation;
+    if (filename == nullptr) {
+      KLOGE("kramv", "Fix this drop url returning nil issue");
+      return NO;
     }
-    
-    // filter out unsupported extensions
-    
-    _zip.filterExtensions({".ktx", ".ktx2", ".png"});
 
-    // don't switch to empty archive
-    if (_zip.zipEntrys().empty()) {
-        return NO;
+    NSString *filenameString = [NSString stringWithUTF8String:filename];
+
+    url = [NSURL fileURLWithPath:filenameString];
+
+    if ([self loadTextureFromURL:url]) {
+      [self setHudText:""];
+
+      return YES;
     }
-    
-    // load the first entry in the archive
-    _fileArchiveIndex = 0;
-    
-    return YES;
+  }
+
+  return NO;
 }
 
--(BOOL)advanceTextureFromAchive:(BOOL)increment
-{
-    if (!_zipMmap.data()) {
-        // no archive loaded
-        return NO;
-    }
-    
-    if (_zip.zipEntrys().empty()) {
-        return NO;
-    }
-   
-    size_t numEntries = _zip.zipEntrys().size();
-    
-    if (increment)
-        _fileArchiveIndex++;
-    else
-        _fileArchiveIndex += numEntries - 1; // back 1
+- (BOOL)loadArchive:(const char *)zipFilename {
+  _zipMmap.close();
+  if (!_zipMmap.open(zipFilename)) {
+    return NO;
+  }
+
+  // Note: if mmap fails, could read entire zip into memory
+  // and then still use the same code below.
+
+  if (!_zip.openForRead(_zipMmap.data(), _zipMmap.dataLength())) {
+    return NO;
+  }
 
-    _fileArchiveIndex = _fileArchiveIndex % numEntries;
-    
-    return [self loadTextureFromArchive];
+  // filter out unsupported extensions
+
+  _zip.filterExtensions({".ktx", ".ktx2", ".png"});
+
+  // don't switch to empty archive
+  if (_zip.zipEntrys().empty()) {
+    return NO;
+  }
+
+  // load the first entry in the archive
+  _fileArchiveIndex = 0;
+
+  return YES;
 }
 
--(BOOL)advanceTextureFromFolder:(BOOL)increment
-{
-    if (_folderFiles.empty()) {
-        // no archive loaded
-        return NO;
-    }
+- (BOOL)advanceTextureFromAchive:(BOOL)increment {
+  if (!_zipMmap.data()) {
+    // no archive loaded
+    return NO;
+  }
 
-    size_t numEntries = _folderFiles.size();
-    if (increment)
-        _fileFolderIndex++;
-    else
-        _fileFolderIndex += numEntries - 1; // back 1
+  if (_zip.zipEntrys().empty()) {
+    return NO;
+  }
+
+  size_t numEntries = _zip.zipEntrys().size();
+
+  if (increment)
+    _fileArchiveIndex++;
+  else
+    _fileArchiveIndex += numEntries - 1; // back 1
 
-    _fileFolderIndex = _fileFolderIndex % numEntries;
-    
-    return [self loadTextureFromFolder];
+  _fileArchiveIndex = _fileArchiveIndex % numEntries;
+
+  return [self loadTextureFromArchive];
 }
 
-- (BOOL)findFilenameInFolders:(const string&)filename {
-    // TODO: binary search for the filename in the array, but would have to be in same directory
-    
-    bool isFound = false;
-    for (const auto& search : _folderFiles) {
-        if (search == filename) {
-            isFound = true;
-            break;
-        }
-    }
-    return isFound;
+- (BOOL)advanceTextureFromFolder:(BOOL)increment {
+  if (_folderFiles.empty()) {
+    // no archive loaded
+    return NO;
+  }
+
+  size_t numEntries = _folderFiles.size();
+  if (increment)
+    _fileFolderIndex++;
+  else
+    _fileFolderIndex += numEntries - 1; // back 1
+
+  _fileFolderIndex = _fileFolderIndex % numEntries;
+
+  return [self loadTextureFromFolder];
 }
 
-- (BOOL)loadTextureFromFolder
-{
-    // now lookup the filename and data at that entry
-    const char* filename = _folderFiles[_fileFolderIndex].c_str();
-    string fullFilename = filename;
-    auto timestamp = FileHelper::modificationTimestamp(filename);
-    
-    // have already filtered filenames out, so this should never get hit
-    bool isPNG = isPNGFilename(filename);
-    if (!(isPNG ||
-          endsWithExtension(filename, ".ktx") ||
-          endsWithExtension(filename, ".ktx2")) )
-    {
-        return NO;
+- (BOOL)findFilenameInFolders:(const string &)filename {
+  // TODO: binary search for the filename in the array, but would have to be in
+  // same directory
+
+  bool isFound = false;
+  for (const auto &search : _folderFiles) {
+    if (search == filename) {
+      isFound = true;
+      break;
     }
-    
-    const char* ext = strrchr(filename, '.');
-    
-    // first only do this on albedo/diffuse textures
-    
-    // find matching png
-    string search = "-a";
+  }
+  return isFound;
+}
+
+- (BOOL)loadTextureFromFolder {
+  // now lookup the filename and data at that entry
+  const char *filename = _folderFiles[_fileFolderIndex].c_str();
+  string fullFilename = filename;
+  auto timestamp = FileHelper::modificationTimestamp(filename);
+
+  // have already filtered filenames out, so this should never get hit
+  bool isPNG = isPNGFilename(filename);
+  if (!(isPNG || endsWithExtension(filename, ".ktx") ||
+        endsWithExtension(filename, ".ktx2"))) {
+    return NO;
+  }
+
+  const char *ext = strrchr(filename, '.');
+
+  // first only do this on albedo/diffuse textures
+
+  // find matching png
+  string search = "-a";
+  search += ext;
+
+  auto searchPos = fullFilename.find(search);
+  bool isFound = searchPos != string::npos;
+
+  if (!isFound) {
+    search = "-d";
     search += ext;
-    
-    auto searchPos = fullFilename.find(search);
-    bool isFound = searchPos != string::npos;
-    
-    if (!isFound) {
-        search = "-d";
-        search += ext;
-        
-        searchPos = fullFilename.find(search);
-        isFound = searchPos != string::npos;
-    }
-    
-    bool isSrgb = isFound;
-    
-    string normalFilename;
-    bool hasNormal = false;
-    
-    if (isFound) {
-        normalFilename = fullFilename;
-        normalFilename = normalFilename.erase(searchPos);
-        normalFilename += "-n";
-        normalFilename += ext;
-        
-        hasNormal = [self findFilenameInFolders:normalFilename];
-    }
-    
-    //-------------------------------
-    
-    KTXImage image;
-    KTXImageData imageDataKTX;
-    
-    KTXImage imageNormal;
-    KTXImageData imageNormalDataKTX;
-    
-    // this requires decode and conversion to RGBA8u
-    if (!imageDataKTX.open(fullFilename.c_str(), image)) {
-        return NO;
-    }
-    
-    if (hasNormal && imageNormalDataKTX.open(normalFilename.c_str(), imageNormal)) {
-        // shaders only pull from albedo + normal on these texture types
-        if (imageNormal.textureType == image.textureType &&
-            (imageNormal.textureType == MyMTLTextureType2D ||
-             imageNormal.textureType == MyMTLTextureType2DArray))
-        {
-            //hasNormal = true;
-        }
-        else {
-            hasNormal = false;
-        }
-    }
-    
-    if (isPNG && isSrgb) {
-        image.pixelFormat = MyMTLPixelFormatRGBA8Unorm_sRGB;
-    }
-    
-    Renderer* renderer = (Renderer*)self.delegate;
-    if (![renderer loadTextureFromImage:fullFilename.c_str() timestamp:timestamp image:image imageNormal:hasNormal ? &imageNormal : nullptr isArchive:NO]) {
-        return NO;
-    }
 
-    //-------------------------------
-    
-    // set title to filename, chop this to just file+ext, not directory
-    const char* filenameShort = strrchr(filename, '/');
-    if (filenameShort == nullptr) {
-        filenameShort = filename;
-    }
-    else {
-        filenameShort += 1;
-    }
-        
-    // was using subtitle, but that's macOS 11.0 feature.
-    string title = "kramv - ";
-    title += formatTypeName(_showSettings->originalFormat);
-    title += " - ";
-    title += filenameShort;
-    
-    self.window.title = [NSString stringWithUTF8String: title.c_str()];
-        
-    // doesn't set imageURL or update the recent document menu
-    
-    // show the controls
-    if (_noImageLoaded) {
-        _buttonStack.hidden = NO; // show controls
-        _noImageLoaded = NO;
-    }
+    searchPos = fullFilename.find(search);
+    isFound = searchPos != string::npos;
+  }
 
-    _showSettings->isArchive = false;
-    _showSettings->isFolder = true;
-   
-    // show/hide button
-    [self updateUIAfterLoad];
-    
-    self.needsDisplay = YES;
-    return YES;
+  bool isSrgb = isFound;
+
+  string normalFilename;
+  bool hasNormal = false;
+
+  if (isFound) {
+    normalFilename = fullFilename;
+    normalFilename = normalFilename.erase(searchPos);
+    normalFilename += "-n";
+    normalFilename += ext;
+
+    hasNormal = [self findFilenameInFolders:normalFilename];
+  }
+
+  //-------------------------------
+
+  KTXImage image;
+  KTXImageData imageDataKTX;
+
+  KTXImage imageNormal;
+  KTXImageData imageNormalDataKTX;
+
+  // this requires decode and conversion to RGBA8u
+  if (!imageDataKTX.open(fullFilename.c_str(), image)) {
+    return NO;
+  }
+
+  if (hasNormal &&
+      imageNormalDataKTX.open(normalFilename.c_str(), imageNormal)) {
+    // shaders only pull from albedo + normal on these texture types
+    if (imageNormal.textureType == image.textureType &&
+        (imageNormal.textureType == MyMTLTextureType2D ||
+         imageNormal.textureType == MyMTLTextureType2DArray)) {
+      // hasNormal = true;
+    } else {
+      hasNormal = false;
+    }
+  }
+
+  if (isPNG && isSrgb) {
+    image.pixelFormat = MyMTLPixelFormatRGBA8Unorm_sRGB;
+  }
+
+  Renderer *renderer = (Renderer *)self.delegate;
+  if (![renderer loadTextureFromImage:fullFilename.c_str()
+                            timestamp:timestamp
+                                image:image
+                          imageNormal:hasNormal ? &imageNormal : nullptr
+                            isArchive:NO]) {
+    return NO;
+  }
+
+  //-------------------------------
+
+  // set title to filename, chop this to just file+ext, not directory
+  const char *filenameShort = strrchr(filename, '/');
+  if (filenameShort == nullptr) {
+    filenameShort = filename;
+  } else {
+    filenameShort += 1;
+  }
+
+  // was using subtitle, but that's macOS 11.0 feature.
+  string title = "kramv - ";
+  title += formatTypeName(_showSettings->originalFormat);
+  title += " - ";
+  title += filenameShort;
+
+  self.window.title = [NSString stringWithUTF8String:title.c_str()];
+
+  // doesn't set imageURL or update the recent document menu
+
+  // show the controls
+  if (_noImageLoaded) {
+    _buttonStack.hidden = NO; // show controls
+    _noImageLoaded = NO;
+  }
+
+  _showSettings->isArchive = false;
+  _showSettings->isFolder = true;
+
+  // show/hide button
+  [self updateUIAfterLoad];
+
+  self.needsDisplay = YES;
+  return YES;
 }
 
-- (BOOL)loadTextureFromArchive
-{
-    // now lookup the filename and data at that entry
-    const auto& entry = _zip.zipEntrys()[_fileArchiveIndex];
-    const char* filename = entry.filename;
-    string fullFilename = filename;
-    double timestamp = (double)entry.modificationDate;
-    
-    // have already filtered filenames out, so this should never get hit
-    bool isPNG = isPNGFilename(filename);
-
-    if (!(isPNG ||
-          endsWithExtension(filename, ".ktx") ||
-          endsWithExtension(filename, ".ktx2")) )
-    {
-        return NO;
-    }
-     
-    const char* ext = strrchr(filename, '.');
-    
-    
-    // first only do this on albedo/diffuse textures
-    
-    string search = "-a";
+- (BOOL)loadTextureFromArchive {
+  // now lookup the filename and data at that entry
+  const auto &entry = _zip.zipEntrys()[_fileArchiveIndex];
+  const char *filename = entry.filename;
+  string fullFilename = filename;
+  double timestamp = (double)entry.modificationDate;
+
+  // have already filtered filenames out, so this should never get hit
+  bool isPNG = isPNGFilename(filename);
+
+  if (!(isPNG || endsWithExtension(filename, ".ktx") ||
+        endsWithExtension(filename, ".ktx2"))) {
+    return NO;
+  }
+
+  const char *ext = strrchr(filename, '.');
+
+  // first only do this on albedo/diffuse textures
+
+  string search = "-a";
+  search += ext;
+
+  auto searchPos = fullFilename.find(search);
+  bool isFound = searchPos != string::npos;
+
+  if (!isFound) {
+    search = "-d";
     search += ext;
-    
-    auto searchPos = fullFilename.find(search);
-    bool isFound = searchPos != string::npos;
-    
-    if (!isFound) {
-        search = "-d";
-        search += ext;
-        
-        searchPos = fullFilename.find(search);
-        isFound = searchPos != string::npos;
-    }
-    
-    bool isSrgb = isFound;
-    
-    //---------------------------
-    
-    const uint8_t* imageData = nullptr;
-    uint64_t imageDataLength = 0;
-   
-    const uint8_t* imageNormalData = nullptr;
-    uint64_t imageNormalDataLength = 0;
-   
-    // search for main file - can be albedo or normal
-    if (!_zip.extractRaw(filename, &imageData, imageDataLength)) {
-        return NO;
-    }
-     
-    // search for normal map in the same archive
-    string normalFilename;
-    bool hasNormal = false;
-    
-    if (isFound) {
-        normalFilename = fullFilename;
-        normalFilename = normalFilename.erase(searchPos);
-        normalFilename += "-n";
-        normalFilename += ext;
-        
-        hasNormal = _zip.extractRaw(normalFilename.c_str(), &imageNormalData, imageNormalDataLength);
-    }
-    
-    //---------------------------
-    
-    // files in archive are just offsets into the mmap
-    // That's why we can't just pass filenames to the renderer
-    KTXImage image;
-    KTXImageData imageDataKTX;
-    
-    KTXImage imageNormal;
-    KTXImageData imageNormalDataKTX;
-
-    if (!imageDataKTX.open(imageData, imageDataLength, image)) {
-        return NO;
-    }
-    
-    if (hasNormal && imageNormalDataKTX.open(imageNormalData, imageNormalDataLength, imageNormal)) {
-        
-        // shaders only pull from albedo + normal on these texture types
-        if (imageNormal.textureType == image.textureType &&
-            (imageNormal.textureType == MyMTLTextureType2D ||
-             imageNormal.textureType == MyMTLTextureType2DArray))
-        {
-            //hasNormal = true;
-        }
-        else {
-            hasNormal = false;
-        }
-    }
-    
-    if (isPNG && isSrgb) {
-        image.pixelFormat = MyMTLPixelFormatRGBA8Unorm_sRGB;
-    }
-    
-    Renderer* renderer = (Renderer*)self.delegate;
-    if (![renderer loadTextureFromImage:fullFilename.c_str() timestamp:(double)timestamp
-                             image:image imageNormal:hasNormal ? &imageNormal : nullptr isArchive:YES])
-    {
-        return NO;
-    }
-    
-    //---------------------------------
-    
-    // set title to filename, chop this to just file+ext, not directory
-    const char* filenameShort = strrchr(filename, '/');
-    if (filenameShort == nullptr) {
-        filenameShort = filename;
-    }
-    else {
-        filenameShort += 1;
-    }
-        
-    // was using subtitle, but that's macOS 11.0 feature.
-    string title = "kramv - ";
-    title += formatTypeName(_showSettings->originalFormat);
-    title += " - ";
-    title += filenameShort;
-    
-    self.window.title = [NSString stringWithUTF8String: title.c_str()];
-        
-    // doesn't set imageURL or update the recent document menu
-    
-    // show the controls
-    if (_noImageLoaded) {
-        _buttonStack.hidden = NO; // show controls
-        _noImageLoaded = NO;
-    }
 
-    _showSettings->isArchive = true;
-    _showSettings->isFolder = false;
-   
-    // show/hide button
-    [self updateUIAfterLoad];
-    
-    self.needsDisplay = YES;
-    return YES;
+    searchPos = fullFilename.find(search);
+    isFound = searchPos != string::npos;
+  }
+
+  bool isSrgb = isFound;
+
+  //---------------------------
+
+  const uint8_t *imageData = nullptr;
+  uint64_t imageDataLength = 0;
+
+  const uint8_t *imageNormalData = nullptr;
+  uint64_t imageNormalDataLength = 0;
+
+  // search for main file - can be albedo or normal
+  if (!_zip.extractRaw(filename, &imageData, imageDataLength)) {
+    return NO;
+  }
+
+  // search for normal map in the same archive
+  string normalFilename;
+  bool hasNormal = false;
+
+  if (isFound) {
+    normalFilename = fullFilename;
+    normalFilename = normalFilename.erase(searchPos);
+    normalFilename += "-n";
+    normalFilename += ext;
+
+    hasNormal = _zip.extractRaw(normalFilename.c_str(), &imageNormalData,
+                                imageNormalDataLength);
+  }
+
+  //---------------------------
+
+  // files in archive are just offsets into the mmap
+  // That's why we can't just pass filenames to the renderer
+  KTXImage image;
+  KTXImageData imageDataKTX;
+
+  KTXImage imageNormal;
+  KTXImageData imageNormalDataKTX;
+
+  if (!imageDataKTX.open(imageData, imageDataLength, image)) {
+    return NO;
+  }
+
+  if (hasNormal && imageNormalDataKTX.open(
+                       imageNormalData, imageNormalDataLength, imageNormal)) {
+
+    // shaders only pull from albedo + normal on these texture types
+    if (imageNormal.textureType == image.textureType &&
+        (imageNormal.textureType == MyMTLTextureType2D ||
+         imageNormal.textureType == MyMTLTextureType2DArray)) {
+      // hasNormal = true;
+    } else {
+      hasNormal = false;
+    }
+  }
+
+  if (isPNG && isSrgb) {
+    image.pixelFormat = MyMTLPixelFormatRGBA8Unorm_sRGB;
+  }
+
+  Renderer *renderer = (Renderer *)self.delegate;
+  if (![renderer loadTextureFromImage:fullFilename.c_str()
+                            timestamp:(double)timestamp
+                                image:image
+                          imageNormal:hasNormal ? &imageNormal : nullptr
+                            isArchive:YES]) {
+    return NO;
+  }
+
+  //---------------------------------
+
+  // set title to filename, chop this to just file+ext, not directory
+  const char *filenameShort = strrchr(filename, '/');
+  if (filenameShort == nullptr) {
+    filenameShort = filename;
+  } else {
+    filenameShort += 1;
+  }
+
+  // was using subtitle, but that's macOS 11.0 feature.
+  string title = "kramv - ";
+  title += formatTypeName(_showSettings->originalFormat);
+  title += " - ";
+  title += filenameShort;
+
+  self.window.title = [NSString stringWithUTF8String:title.c_str()];
+
+  // doesn't set imageURL or update the recent document menu
+
+  // show the controls
+  if (_noImageLoaded) {
+    _buttonStack.hidden = NO; // show controls
+    _noImageLoaded = NO;
+  }
+
+  _showSettings->isArchive = true;
+  _showSettings->isFolder = false;
+
+  // show/hide button
+  [self updateUIAfterLoad];
+
+  self.needsDisplay = YES;
+  return YES;
 }
-    
-- (BOOL)loadTextureFromURL:(NSURL*)url {
-    //NSLog(@"LoadTexture");
-    
-    const char* filename = url.fileSystemRepresentation;
-    if (filename == nullptr)
-    {
-        // Fixed by converting dropped urls into paths then back to a url.
-        // When file replaced the drop url is no longer valid.
-        KLOGE("kramv", "Fix this load url returning nil issue");
+
+- (BOOL)loadTextureFromURL:(NSURL *)url {
+  // NSLog(@"LoadTexture");
+
+  const char *filename = url.fileSystemRepresentation;
+  if (filename == nullptr) {
+    // Fixed by converting dropped urls into paths then back to a url.
+    // When file replaced the drop url is no longer valid.
+    KLOGE("kramv", "Fix this load url returning nil issue");
+    return NO;
+  }
+
+  // this likely means it's a local file directory
+  if (strchr(filename, '.') == nullptr) {
+    // make list of all file in the directory
+
+    if (!self.imageURL || (!([self.imageURL isEqualTo:url]))) {
+
+      NSDirectoryEnumerator *directoryEnumerator =
+          [[NSFileManager defaultManager]
+                         enumeratorAtURL:url
+              includingPropertiesForKeys:[NSArray array]
+                                 options:0
+                            errorHandler: // nil
+                                ^BOOL(NSURL *urlArg, NSError *error) {
+                                  macroUnusedVar(urlArg);
+                                  macroUnusedVar(error);
+
+                                  // handle error
+                                  return NO;
+                                }];
+
+      vector<string> files;
+      while (NSURL *fileOrDirectoryURL = [directoryEnumerator nextObject]) {
+        const char *name = fileOrDirectoryURL.fileSystemRepresentation;
+
+        // filter only types that are supported
+        bool isPNG = isPNGFilename(name);
+
+        if (isPNG || endsWithExtension(name, ".ktx") ||
+            endsWithExtension(name, ".ktx2")) {
+          files.push_back(name);
+        }
+      }
+
+      // don't change to this folder if it's devoid of content
+      if (files.empty()) {
         return NO;
-    }
-    
-    // this likely means it's a local file directory
-    if (strchr(filename, '.') == nullptr) {
-        // make list of all file in the directory
-        
-        if (!self.imageURL || (!([self.imageURL isEqualTo:url]))) {
-            
-            
-            NSDirectoryEnumerator *directoryEnumerator = [[NSFileManager defaultManager] enumeratorAtURL:url includingPropertiesForKeys:[NSArray array] options:0 errorHandler://nil
-               ^BOOL(NSURL *urlArg, NSError *error) {
-                macroUnusedVar(urlArg);
-                macroUnusedVar(error);
-
-                // handle error
-                return NO;
-                }
-            ];
-
-            vector<string> files;
-            while (NSURL *fileOrDirectoryURL = [directoryEnumerator nextObject]) {
-                const char* name = fileOrDirectoryURL.fileSystemRepresentation;
-                
-                // filter only types that are supported
-                bool isPNG = isPNGFilename(name);
-
-                if (isPNG ||
-                    endsWithExtension(name, ".ktx") ||
-                    endsWithExtension(name, ".ktx2"))
-                {
-                    files.push_back(name);
-                }
-            }
-            
-            // don't change to this folder if it's devoid of content
-            if (files.empty()) {
-                return NO;
-            }
-            
-            // add it to recent docs
-            NSDocumentController* dc = [NSDocumentController sharedDocumentController];
-            [dc noteNewRecentDocumentURL:url];
-        
-            // sort them
+      }
+
+      // add it to recent docs
+      NSDocumentController *dc =
+          [NSDocumentController sharedDocumentController];
+      [dc noteNewRecentDocumentURL:url];
+
+      // sort them
 #if USE_EASTL
-            NAMESPACE_STL::quick_sort(files.begin(), files.end());
+      NAMESPACE_STL::quick_sort(files.begin(), files.end());
 #else
-            NAMESPACE_STL::sort(files.begin(), files.end());
+      NAMESPACE_STL::sort(files.begin(), files.end());
 #endif
-            // replicate archive logic below
-            
-            self.imageURL = url;
-            
-            // preserve old folder
-            string existingFilename;
-            if (_fileFolderIndex < (int32_t)_folderFiles.size())
-                existingFilename = _folderFiles[_fileFolderIndex];
-            else
-                _fileFolderIndex = 0;
-            
-            _folderFiles = files;
-            
-            // TODO: preserve filename before load, and restore that index, by finding that name in refreshed folder list
-            
-            if (!existingFilename.empty()) {
-                uint32_t index = 0;
-                for (const auto& fileIt : _folderFiles) {
-                    if (fileIt == existingFilename) {
-                        break;
-                    }
-                }
-                
-                _fileFolderIndex = index;
-            }
-        }
-        
-        // now load image from directory
-        _showSettings->isArchive = false;
-        _showSettings->isFolder = true;
-           
-        // now load the file at the index
-        setErrorLogCapture(true);
-        
-        BOOL success = [self loadTextureFromFolder];
-        
-        if (!success) {
-            // get back error text from the failed load
-            string errorText;
-            getErrorLogCaptureText(errorText);
-            setErrorLogCapture(false);
-            
-            const string& folder = _folderFiles[_fileFolderIndex];
-            
-            // prepend filename
-            string finalErrorText;
-            append_sprintf(finalErrorText,
-                           "Could not load from folder:\n %s\n", folder.c_str());
-            finalErrorText += errorText;
-            
-            [self setHudText: finalErrorText.c_str()];
+      // replicate archive logic below
+
+      self.imageURL = url;
+
+      // preserve old folder
+      string existingFilename;
+      if (_fileFolderIndex < (int32_t)_folderFiles.size())
+        existingFilename = _folderFiles[_fileFolderIndex];
+      else
+        _fileFolderIndex = 0;
+
+      _folderFiles = files;
+
+      // TODO: preserve filename before load, and restore that index, by finding
+      // that name in refreshed folder list
+
+      if (!existingFilename.empty()) {
+        uint32_t index = 0;
+        for (const auto &fileIt : _folderFiles) {
+          if (fileIt == existingFilename) {
+            break;
+          }
         }
-        
-        setErrorLogCapture(false);
-        return success;
+
+        _fileFolderIndex = index;
+      }
     }
-    
-    //-------------------
-    
-    if (endsWithExtension(filename, ".metallib")) {
-       
-        Renderer* renderer = (Renderer*)self.delegate;
-        if ([renderer hotloadShaders: filename]) {
-            NSURL* metallibFileURL = [NSURL fileURLWithPath:[NSString stringWithUTF8String:filename]];
-
-            // add to recent docs, so can reload quickly
-            NSDocumentController* dc = [NSDocumentController sharedDocumentController];
-            [dc noteNewRecentDocumentURL:metallibFileURL];
-        
-            return YES;
-        }
-        return NO;
+
+    // now load image from directory
+    _showSettings->isArchive = false;
+    _showSettings->isFolder = true;
+
+    // now load the file at the index
+    setErrorLogCapture(true);
+
+    BOOL success = [self loadTextureFromFolder];
+
+    if (!success) {
+      // get back error text from the failed load
+      string errorText;
+      getErrorLogCaptureText(errorText);
+      setErrorLogCapture(false);
+
+      const string &folder = _folderFiles[_fileFolderIndex];
+
+      // prepend filename
+      string finalErrorText;
+      append_sprintf(finalErrorText, "Could not load from folder:\n %s\n",
+                     folder.c_str());
+      finalErrorText += errorText;
+
+      [self setHudText:finalErrorText.c_str()];
     }
-          
-    // file is not a supported extension
-    if (!(endsWithExtension(filename, ".zip") ||
-          isPNGFilename(filename) ||
-          endsWithExtension(filename, ".ktx") ||
-          endsWithExtension(filename, ".ktx2")) )
-    {
-        string errorText = "Unsupported file extension, must be .zip, .png, .ktx, ktx2\n";
-        
-        string finalErrorText;
-        append_sprintf(finalErrorText,
-                       "Could not load from file:\n %s\n", filename);
-        finalErrorText += errorText;
-        
-        [self setHudText: finalErrorText.c_str()];
-        return NO;
+
+    setErrorLogCapture(false);
+    return success;
+  }
+
+  //-------------------
+
+  if (endsWithExtension(filename, ".metallib")) {
+
+    Renderer *renderer = (Renderer *)self.delegate;
+    if ([renderer hotloadShaders:filename]) {
+      NSURL *metallibFileURL =
+          [NSURL fileURLWithPath:[NSString stringWithUTF8String:filename]];
+
+      // add to recent docs, so can reload quickly
+      NSDocumentController *dc =
+          [NSDocumentController sharedDocumentController];
+      [dc noteNewRecentDocumentURL:metallibFileURL];
+
+      return YES;
     }
-    
-    //-------------------
-    
-    if (endsWithExtension(filename, ".zip")) {
-        auto archiveTimestamp = FileHelper::modificationTimestamp(filename);
-        
-        if (!self.imageURL || (!([self.imageURL isEqualTo:url])) || (self.lastArchiveTimestamp != archiveTimestamp)) {
-            
-            // copy this out before it's replaced
-            string existingFilename;
-            if (_fileArchiveIndex < (int32_t)_zip.zipEntrys().size())
-                existingFilename = _zip.zipEntrys()[_fileArchiveIndex].filename;
-            else
-                _fileArchiveIndex = 0;
-            
-            BOOL isArchiveLoaded = [self loadArchive:filename];
-            if (!isArchiveLoaded) {
-                return NO;
-            }
-            
-            // store the archive url
-            self.imageURL = url;
-            self.lastArchiveTimestamp = archiveTimestamp;
-            
-            // add it to recent docs
-            NSDocumentController* dc = [NSDocumentController sharedDocumentController];
-            [dc noteNewRecentDocumentURL:url];
-        
-            // now reload the filename if needed
-            if (!existingFilename.empty()) {
-                const ZipEntry* formerEntry = _zip.zipEntry(existingFilename.c_str());
-                if (formerEntry) {
-                    // lookup the index in the remapIndices table
-                    _fileArchiveIndex = (uintptr_t)(formerEntry - &_zip.zipEntrys().front());
-                }
-                else {
-                    _fileArchiveIndex = 0;
-                }
-            }
-        }
-        
-        setErrorLogCapture(true);
-        
-        BOOL success = [self loadTextureFromArchive];
-        
-        if (!success) {
-            // get back error text from the failed load
-            string errorText;
-            getErrorLogCaptureText(errorText);
-            setErrorLogCapture(false);
-            
-            const auto& entry =_zip.zipEntrys()[_fileArchiveIndex];
-            const char* archiveFilename = entry.filename;
-            
-            // prepend filename
-            string finalErrorText;
-            append_sprintf(finalErrorText,
-                           "Could not load from archive:\n %s\n", archiveFilename);
-            finalErrorText += errorText;
-            
-            [self setHudText: finalErrorText.c_str()];
+    return NO;
+  }
+
+  // file is not a supported extension
+  if (!(endsWithExtension(filename, ".zip") || isPNGFilename(filename) ||
+        endsWithExtension(filename, ".ktx") ||
+        endsWithExtension(filename, ".ktx2"))) {
+    string errorText =
+        "Unsupported file extension, must be .zip, .png, .ktx, ktx2\n";
+
+    string finalErrorText;
+    append_sprintf(finalErrorText, "Could not load from file:\n %s\n",
+                   filename);
+    finalErrorText += errorText;
+
+    [self setHudText:finalErrorText.c_str()];
+    return NO;
+  }
+
+  //-------------------
+
+  if (endsWithExtension(filename, ".zip")) {
+    auto archiveTimestamp = FileHelper::modificationTimestamp(filename);
+
+    if (!self.imageURL || (!([self.imageURL isEqualTo:url])) ||
+        (self.lastArchiveTimestamp != archiveTimestamp)) {
+
+      // copy this out before it's replaced
+      string existingFilename;
+      if (_fileArchiveIndex < (int32_t)_zip.zipEntrys().size())
+        existingFilename = _zip.zipEntrys()[_fileArchiveIndex].filename;
+      else
+        _fileArchiveIndex = 0;
+
+      BOOL isArchiveLoaded = [self loadArchive:filename];
+      if (!isArchiveLoaded) {
+        return NO;
+      }
+
+      // store the archive url
+      self.imageURL = url;
+      self.lastArchiveTimestamp = archiveTimestamp;
+
+      // add it to recent docs
+      NSDocumentController *dc =
+          [NSDocumentController sharedDocumentController];
+      [dc noteNewRecentDocumentURL:url];
+
+      // now reload the filename if needed
+      if (!existingFilename.empty()) {
+        const ZipEntry *formerEntry = _zip.zipEntry(existingFilename.c_str());
+        if (formerEntry) {
+          // lookup the index in the remapIndices table
+          _fileArchiveIndex =
+              (uintptr_t)(formerEntry - &_zip.zipEntrys().front());
+        } else {
+          _fileArchiveIndex = 0;
         }
-        
-        setErrorLogCapture(false);
-        return success;
+      }
     }
-        
-    //-------------------
-    
-    Renderer* renderer = (Renderer*)self.delegate;
+
     setErrorLogCapture(true);
-    
-    BOOL success = [renderer loadTexture:url];
-    
+
+    BOOL success = [self loadTextureFromArchive];
+
     if (!success) {
-        // get back error text from the failed load
-        string errorText;
-        getErrorLogCaptureText(errorText);
-        setErrorLogCapture(false);
-        
-        // prepend filename
-        string finalErrorText;
-        append_sprintf(finalErrorText,
-                       "Could not load from file\n %s\n", filename);
-        finalErrorText += errorText;
-       
-        [self setHudText: finalErrorText.c_str()];
-        return NO;
+      // get back error text from the failed load
+      string errorText;
+      getErrorLogCaptureText(errorText);
+      setErrorLogCapture(false);
+
+      const auto &entry = _zip.zipEntrys()[_fileArchiveIndex];
+      const char *archiveFilename = entry.filename;
+
+      // prepend filename
+      string finalErrorText;
+      append_sprintf(finalErrorText, "Could not load from archive:\n %s\n",
+                     archiveFilename);
+      finalErrorText += errorText;
+
+      [self setHudText:finalErrorText.c_str()];
     }
+
     setErrorLogCapture(false);
-    
-    // set title to filename, chop this to just file+ext, not directory
-    const char* filenameShort = strrchr(filename, '/');
-    if (filenameShort == nullptr) {
-        filenameShort = filename;
-    }
-    else {
-        filenameShort += 1;
-    }
-        
-    // was using subtitle, but that's macOS 11.0 feature.
-    string title = "kramv - ";
-    title += formatTypeName(_showSettings->originalFormat);
-    title += " - ";
-    title += filenameShort;
-    
-    self.window.title = [NSString stringWithUTF8String: title.c_str()];
-    
-     // topmost entry will be the recently opened document
-    // some entries may go stale if directories change, not sure who validates the list
-    
-    // add to recent document menu
-    NSDocumentController* dc = [NSDocumentController sharedDocumentController];
-    [dc noteNewRecentDocumentURL:url];
-
-    self.imageURL = url;
-    
-    // show the controls
-    if (_noImageLoaded) {
-        _buttonStack.hidden = NO; // show controls
-        _noImageLoaded = NO;
-    }
-    
-    _showSettings->isArchive = false;
-    _showSettings->isFolder = false;
-   
-    // show/hide button
-    [self updateUIAfterLoad];
-    
-    self.needsDisplay = YES;
-    return YES;
-}
+    return success;
+  }
 
+  //-------------------
 
-- (void)concludeDragOperation:(id)sender {
-    // did setNeedsDisplay, but already doing that in loadTextureFromURL
-}
+  Renderer *renderer = (Renderer *)self.delegate;
+  setErrorLogCapture(true);
 
+  BOOL success = [renderer loadTexture:url];
 
+  if (!success) {
+    // get back error text from the failed load
+    string errorText;
+    getErrorLogCaptureText(errorText);
+    setErrorLogCapture(false);
 
-// this doesn't seem to enable New.  Was able to get "Open" to highlight by setting NSDocument as class for doc types.
+    // prepend filename
+    string finalErrorText;
+    append_sprintf(finalErrorText, "Could not load from file\n %s\n", filename);
+    finalErrorText += errorText;
+
+    [self setHudText:finalErrorText.c_str()];
+    return NO;
+  }
+  setErrorLogCapture(false);
+
+  // set title to filename, chop this to just file+ext, not directory
+  const char *filenameShort = strrchr(filename, '/');
+  if (filenameShort == nullptr) {
+    filenameShort = filename;
+  } else {
+    filenameShort += 1;
+  }
+
+  // was using subtitle, but that's macOS 11.0 feature.
+  string title = "kramv - ";
+  title += formatTypeName(_showSettings->originalFormat);
+  title += " - ";
+  title += filenameShort;
+
+  self.window.title = [NSString stringWithUTF8String:title.c_str()];
+
+  // topmost entry will be the recently opened document
+  // some entries may go stale if directories change, not sure who validates the
+  // list
+
+  // add to recent document menu
+  NSDocumentController *dc = [NSDocumentController sharedDocumentController];
+  [dc noteNewRecentDocumentURL:url];
+
+  self.imageURL = url;
+
+  // show the controls
+  if (_noImageLoaded) {
+    _buttonStack.hidden = NO; // show controls
+    _noImageLoaded = NO;
+  }
+
+  _showSettings->isArchive = false;
+  _showSettings->isFolder = false;
+
+  // show/hide button
+  [self updateUIAfterLoad];
+
+  self.needsDisplay = YES;
+  return YES;
+}
+
+- (void)concludeDragOperation:(id)sender {
+  // did setNeedsDisplay, but already doing that in loadTextureFromURL
+}
+
+// this doesn't seem to enable New.  Was able to get "Open" to highlight by
+// setting NSDocument as class for doc types.
 // https://developer.apple.com/library/archive/documentation/Cocoa/Conceptual/EventOverview/EventArchitecture/EventArchitecture.html
 #if 0
 /*
@@ -2860,12 +2889,10 @@ - (IBAction)newDocument {
 */
 #endif
 
-
 - (BOOL)acceptsFirstResponder {
-    return YES;
+  return YES;
 }
 
-
 @end
 
 //-------------
@@ -2875,67 +2902,65 @@ @interface GameViewController : NSViewController
 
 @end
 
-@implementation GameViewController
-{
-    MyMTKView *_view;
+@implementation GameViewController {
+  MyMTKView *_view;
 
-    Renderer *_renderer;
-    
-    NSTrackingArea *_trackingArea;
+  Renderer *_renderer;
+
+  NSTrackingArea *_trackingArea;
 }
 
+- (void)viewDidLoad {
+  [super viewDidLoad];
 
-- (void)viewDidLoad
-{
-    [super viewDidLoad];
-
-    _view = (MyMTKView *)self.view;
-    
-    // have to disable this since reading back from textures
-    // that slows the blit to the screen
-    _view.framebufferOnly = NO;
-    
-    _view.device = MTLCreateSystemDefaultDevice();
-
-    if(!_view.device)
-    {
-        return;
-    }
+  _view = (MyMTKView *)self.view;
 
-    _renderer = [[Renderer alloc] initWithMetalKitView:_view settings:_view.showSettings];
-
-    // original sample code was sending down _view.bounds.size, but need drawableSize
-    // this was causing all sorts of inconsistencies
-    [_renderer mtkView:_view drawableSizeWillChange:_view.drawableSize];
-
-    _view.delegate = _renderer;
-    
-    // https://developer.apple.com/library/archive/documentation/Cocoa/Conceptual/EventOverview/TrackingAreaObjects/TrackingAreaObjects.html
-    // this is better than requesting mousemoved events, they're only sent when cursor is inside
-    _trackingArea = [[NSTrackingArea alloc] initWithRect:_view.bounds
-                options: (NSTrackingMouseEnteredAndExited | NSTrackingMouseMoved |
-                          
-                          //NSTrackingActiveWhenFirstResponder
-                          NSTrackingActiveInActiveApp
-                          //NSTrackingActiveInKeyWindow
-                          )
-                owner:_view userInfo:nil];
-    [_view addTrackingArea:_trackingArea];
-    
-    // programmatically add some buttons
-    // think limited to 11 viewws before they must be wrapepd in a container.  That's how SwiftUI was.
-    
-}
+  // have to disable this since reading back from textures
+  // that slows the blit to the screen
+  _view.framebufferOnly = NO;
 
+  _view.device = MTLCreateSystemDefaultDevice();
 
-@end
+  if (!_view.device) {
+    return;
+  }
+
+  _renderer = [[Renderer alloc] initWithMetalKitView:_view
+                                            settings:_view.showSettings];
 
+  // original sample code was sending down _view.bounds.size, but need
+  // drawableSize this was causing all sorts of inconsistencies
+  [_renderer mtkView:_view drawableSizeWillChange:_view.drawableSize];
+
+  _view.delegate = _renderer;
+
+  // https://developer.apple.com/library/archive/documentation/Cocoa/Conceptual/EventOverview/TrackingAreaObjects/TrackingAreaObjects.html
+  // this is better than requesting mousemoved events, they're only sent when
+  // cursor is inside
+  _trackingArea = [[NSTrackingArea alloc]
+      initWithRect:_view.bounds
+           options:(NSTrackingMouseEnteredAndExited | NSTrackingMouseMoved |
+
+                    // NSTrackingActiveWhenFirstResponder
+                    NSTrackingActiveInActiveApp
+                    // NSTrackingActiveInKeyWindow
+                    )
+             owner:_view
+          userInfo:nil];
+  [_view addTrackingArea:_trackingArea];
+
+  // programmatically add some buttons
+  // think limited to 11 viewws before they must be wrapepd in a container.
+  // That's how SwiftUI was.
+}
+
+@end
 
 //-------------
 
-int main(int argc, const char * argv[]) {
-    @autoreleasepool {
-        // Setup code that might create autoreleased objects goes here.
-    }
-    return NSApplicationMain(argc, argv);
+int main(int argc, const char *argv[]) {
+  @autoreleasepool {
+    // Setup code that might create autoreleased objects goes here.
+  }
+  return NSApplicationMain(argc, argv);
 }
diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index 38a444e5..0e04a673 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -41,7 +41,6 @@ const uint8_t kKTX2Identifier[kKTXIdentifierSize] = {
     // '«', 'K', 'T', 'X', ' ', '2', '0', '»', '\r', '\n', '\x1A', '\n'
 };
 
-
 //---------------------------------------------------
 
 enum GLFormat {
@@ -151,12 +150,12 @@ enum GLFormat {
     GL_RGBA32F = 0x8814,
 
 #if SUPPORT_RGB
-    GL_RGB8   = 0x8051,
-    GL_SRGB8  = 0x8C41,
+    GL_RGB8 = 0x8051,
+    GL_SRGB8 = 0x8C41,
     GL_RGB16F = 0x881B,
     GL_RGB32F = 0x8815
 #endif
-    
+
     /* These are all of the variants of ASTC, ugh.  Only way to identify them is to
  walk blocks and it's unclear how to convert from 3D to 2D blocks or whether hw
  supports sliced 3D.
@@ -275,36 +274,36 @@ enum VKFormat {
     VK_FORMAT_ASTC_8x8_UNORM_BLOCK = 171,
     VK_FORMAT_ASTC_8x8_SRGB_BLOCK = 172,
 
-    // not support these
-    //    VK_FORMAT_ASTC_5x4_UNORM_BLOCK = 159,
-    //    VK_FORMAT_ASTC_5x4_SRGB_BLOCK = 160,
-    //    VK_FORMAT_ASTC_6x5_UNORM_BLOCK = 163,
-    //    VK_FORMAT_ASTC_6x5_SRGB_BLOCK = 164,
-    //    VK_FORMAT_ASTC_8x5_UNORM_BLOCK = 167,
-    //    VK_FORMAT_ASTC_8x5_SRGB_BLOCK = 168,
-    //    VK_FORMAT_ASTC_8x6_UNORM_BLOCK = 169,
-    //    VK_FORMAT_ASTC_8x6_SRGB_BLOCK = 170,
-
-    //    VK_FORMAT_ASTC_10x5_UNORM_BLOCK = 173,
-    //    VK_FORMAT_ASTC_10x5_SRGB_BLOCK = 174,
-    //    VK_FORMAT_ASTC_10x6_UNORM_BLOCK = 175,
-    //    VK_FORMAT_ASTC_10x6_SRGB_BLOCK = 176,
-    //    VK_FORMAT_ASTC_10x8_UNORM_BLOCK = 177,
-    //    VK_FORMAT_ASTC_10x8_SRGB_BLOCK = 178,
-    //    VK_FORMAT_ASTC_10x10_UNORM_BLOCK = 179,
-    //    VK_FORMAT_ASTC_10x10_SRGB_BLOCK = 180,
-    //    VK_FORMAT_ASTC_12x10_UNORM_BLOCK = 181,
-    //    VK_FORMAT_ASTC_12x10_SRGB_BLOCK = 182,
-    //    VK_FORMAT_ASTC_12x12_UNORM_BLOCK = 183,
-    //    VK_FORMAT_ASTC_12x12_SRGB_BLOCK = 184,
+// not support these
+//    VK_FORMAT_ASTC_5x4_UNORM_BLOCK = 159,
+//    VK_FORMAT_ASTC_5x4_SRGB_BLOCK = 160,
+//    VK_FORMAT_ASTC_6x5_UNORM_BLOCK = 163,
+//    VK_FORMAT_ASTC_6x5_SRGB_BLOCK = 164,
+//    VK_FORMAT_ASTC_8x5_UNORM_BLOCK = 167,
+//    VK_FORMAT_ASTC_8x5_SRGB_BLOCK = 168,
+//    VK_FORMAT_ASTC_8x6_UNORM_BLOCK = 169,
+//    VK_FORMAT_ASTC_8x6_SRGB_BLOCK = 170,
+
+//    VK_FORMAT_ASTC_10x5_UNORM_BLOCK = 173,
+//    VK_FORMAT_ASTC_10x5_SRGB_BLOCK = 174,
+//    VK_FORMAT_ASTC_10x6_UNORM_BLOCK = 175,
+//    VK_FORMAT_ASTC_10x6_SRGB_BLOCK = 176,
+//    VK_FORMAT_ASTC_10x8_UNORM_BLOCK = 177,
+//    VK_FORMAT_ASTC_10x8_SRGB_BLOCK = 178,
+//    VK_FORMAT_ASTC_10x10_UNORM_BLOCK = 179,
+//    VK_FORMAT_ASTC_10x10_SRGB_BLOCK = 180,
+//    VK_FORMAT_ASTC_12x10_UNORM_BLOCK = 181,
+//    VK_FORMAT_ASTC_12x10_SRGB_BLOCK = 182,
+//    VK_FORMAT_ASTC_12x12_UNORM_BLOCK = 183,
+//    VK_FORMAT_ASTC_12x12_SRGB_BLOCK = 184,
 
 #if SUPPORT_RGB
     // import only
-    VK_FORMAT_R8G8B8_UNORM      = 23,
-    VK_FORMAT_R8G8B8_SRGB       = 29,
-    VK_FORMAT_R16G16B16_SFLOAT  = 90,
-    VK_FORMAT_R32G32B32_SFLOAT   = 106,
-   
+    VK_FORMAT_R8G8B8_UNORM = 23,
+    VK_FORMAT_R8G8B8_SRGB = 29,
+    VK_FORMAT_R16G16B16_SFLOAT = 90,
+    VK_FORMAT_R32G32B32_SFLOAT = 106,
+
 #endif
 };
 
@@ -423,9 +422,9 @@ static bool initFormatsIfNeeded()
     if (gFormatTable) {
         return true;
     }
-    
+
     gFormatTable = new unordered_map<uint32_t /*MyMTLPixelFormat*/, KTXFormatInfo>();
-    
+
 // the following table could be included multiple ties to build switch statements, but instead use a hashmap
 #define KTX_FORMAT(fmt, metalType, vulkanType, glType, glBase, x, y, blockSize, numChannels, flags) \
     (*gFormatTable)[(uint32_t)metalType] = KTXFormatInfo(                                           \
@@ -505,7 +504,7 @@ static bool initFormatsIfNeeded()
     KTX_FORMAT(EXPrgb16f, MyMTLPixelFormatRGB16Float_internal, VK_FORMAT_R16G16B16_SFLOAT, GL_RGB16F, GL_RGB, 1, 1, 6, 3, FLAG_16F)
     KTX_FORMAT(EXPrgb32f, MyMTLPixelFormatRGB32Float_internal, VK_FORMAT_R32G32B32_SFLOAT, GL_RGB32F, GL_RGB, 1, 1, 12, 3, FLAG_32F)
 #endif
-    
+
     return true;
 }
 
@@ -516,7 +515,7 @@ static bool initFormatsIfNeeded()
 const KTXFormatInfo& formatInfo(MyMTLPixelFormat format)
 {
     initFormatsIfNeeded();
-    
+
     const auto& it = gFormatTable->find(format);
     if (it == gFormatTable->end()) {
         return gFormatTable->find(MyMTLPixelFormatInvalid)->second;
@@ -622,7 +621,7 @@ uint32_t metalType(MyMTLPixelFormat format)
 const char* metalTypeName(MyMTLPixelFormat format)
 {
     const auto& it = formatInfo(format);
-    return it.metalName + 2; // strlen("My"); 
+    return it.metalName + 2;  // strlen("My");
 }
 
 const char* formatTypeName(MyMTLPixelFormat format)
@@ -658,7 +657,7 @@ uint32_t glType(MyMTLPixelFormat format)
 MyMTLPixelFormat glToMetalFormat(uint32_t format)
 {
     initFormatsIfNeeded();
-    
+
     for (const auto& it : *gFormatTable) {
         // this isn't 1:1, since ASTC_HDR aren't unique types, prefer ldr type
         const KTXFormatInfo& info = it.second;
@@ -673,7 +672,7 @@ MyMTLPixelFormat glToMetalFormat(uint32_t format)
 MyMTLPixelFormat vulkanToMetalFormat(uint32_t format)
 {
     initFormatsIfNeeded();
-    
+
     for (const auto& it : *gFormatTable) {
         // this isn't 1:1, since ASTC_HDR aren't unique types, prefer ldr type
         const KTXFormatInfo& info = it.second;
@@ -685,8 +684,6 @@ MyMTLPixelFormat vulkanToMetalFormat(uint32_t format)
     return MyMTLPixelFormatInvalid;
 }
 
-
-
 MyMTLPixelFormat toggleSrgbFormat(MyMTLPixelFormat format)
 {
     switch (format) {
@@ -753,11 +750,19 @@ MyMTLPixelFormat toggleSrgbFormat(MyMTLPixelFormat format)
 const char* supercompressionName(KTX2Supercompression type)
 {
     const char* name = "Unknown";
-    switch(type) {
-        case KTX2SupercompressionNone:    name = "None"; break;
-        case KTX2SupercompressionBasisLZ: name = "BasisLZ"; break;
-        case KTX2SupercompressionZstd:    name = "Zstd"; break;
-        case KTX2SupercompressionZlib:    name = "Zlib"; break;
+    switch (type) {
+        case KTX2SupercompressionNone:
+            name = "None";
+            break;
+        case KTX2SupercompressionBasisLZ:
+            name = "BasisLZ";
+            break;
+        case KTX2SupercompressionZstd:
+            name = "Zstd";
+            break;
+        case KTX2SupercompressionZlib:
+            name = "Zlib";
+            break;
     }
     return name;
 }
@@ -781,7 +786,7 @@ uint32_t KTXImage::mipLengthCalc(uint32_t mipNumber) const
     uint32_t w = width;
     uint32_t h = height;
     uint32_t d = depth;
-    
+
     mipDown(w, h, d, mipNumber);
     return mipLengthCalc(w, h);
 }
@@ -793,11 +798,10 @@ uint32_t KTXImage::blockCountRows(uint32_t width_) const
     Int2 dims = blockDims();
 
     width_ = (width_ + dims.x - 1) / dims.x;
-    
+
     return width_;
 }
-    
-    
+
 uint32_t KTXImage::blockCount(uint32_t width_, uint32_t height_) const
 {
     assert(width_ >= 1 && height_ >= 1);
@@ -922,19 +926,19 @@ bool KTXImage::open(const uint8_t* imageData, size_t imageDataLength, bool isInf
     if ((size_t)imageDataLength < sizeof(kKTX2Identifier)) {
         return false;
     }
-    
+
     // check for ktx2
     if (memcmp(imageData, kKTX2Identifier, sizeof(kKTX2Identifier)) == 0) {
         return openKTX2(imageData, imageDataLength, isInfoOnly);
     }
-    
+
     // check for ktx1
     if (memcmp(imageData, kKTXIdentifier, sizeof(kKTXIdentifier)) != 0) {
         return false;
     }
-   
+
     //skipImageLength = skipImageLength_;
-    
+
     // since KTX1 doesn't have compressed mips, can alias the file data directly
     fileData = imageData;
     fileDataLength = imageDataLength;
@@ -971,7 +975,7 @@ bool KTXImage::open(const uint8_t* imageData, size_t imageDataLength, bool isInf
         KLOGE("kram", "unsupported texture format glBase/glInternalFormat 0x%04X 0x%04X", header.glBaseInternalFormat, header.glInternalFormat);
         return false;
     }
-    
+
     initMipLevels(sizeof(KTXHeader) + header.bytesOfKeyValueData);
     return validateMipLevels();
 }
@@ -1128,17 +1132,17 @@ void KTXImage::toPropsData(vector<uint8_t>& propsData)
 
 void KTXImage::initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxSize, int32_t mipSkip, uint32_t& numSkippedMips)
 {
-     // dst levels
+    // dst levels
     int32_t w = width;
     int32_t h = height;
     int32_t d = depth;
-    
+
     numSkippedMips = mipSkip;
-    
+
     bool needsDownsample = (numSkippedMips > 0) || (w > mipMaxSize || h > mipMaxSize);
 
     int32_t maxMipLevels = 16;  // 64K x 64K
-    
+
     // can still downsample src multiple times even with only 1 level exported
     if ((!doMipmaps) && needsDownsample) {
         maxMipLevels = 1;
@@ -1147,18 +1151,18 @@ void KTXImage::initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxS
     KTXImageLevel level;
     //level.offset = 0; // compute later, once know ktx vs. ktx2
     //level.lengthCompressed = 0;
-    
+
     mipLevels.clear();
-    
+
     if (doMipmaps || needsDownsample) {
         bool keepMip =
             (numSkippedMips >= (uint32_t)mipSkip) ||
             ((w >= mipMinSize && w <= mipMaxSize) &&
-            (h >= mipMinSize && h <= mipMaxSize));
-        
+             (h >= mipMinSize && h <= mipMaxSize));
+
         if (keepMip) {
             level.length = mipLengthCalc(w, h);
-            
+
             if (mipLevels.empty()) {
                 // adjust the top dimensions
                 width = w;
@@ -1179,19 +1183,19 @@ void KTXImage::initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxS
             keepMip =
                 (numSkippedMips >= (uint32_t)mipSkip) ||
                 ((w >= mipMinSize && w <= mipMaxSize) &&
-                (h >= mipMinSize && h <= mipMaxSize));
-            
+                 (h >= mipMinSize && h <= mipMaxSize));
+
             if (keepMip && (mipLevels.size() < (size_t)maxMipLevels)) {
                 // length needs to be multiplied by chunk size before writing out
                 level.length = mipLengthCalc(w, h);
-                
+
                 if (mipLevels.empty()) {
                     // adjust the top dimensions
                     width = w;
                     height = h;
                     depth = d;
                 }
-                
+
                 mipLevels.push_back(level);
             }
             else {
@@ -1205,22 +1209,21 @@ void KTXImage::initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxS
     else {
         // length needs to be multiplied by chunk size before writing out
         level.length = mipLengthCalc(w, h);
-        
+
         mipLevels.push_back(level);
     }
-    
-    
+
     header.numberOfMipmapLevels = mipLevels.size();
-    
+
     header.pixelWidth = width;
     header.pixelHeight = height;
 }
-    
+
 void KTXImage::initMipLevels(size_t mipOffset)
 {
     // largest mips are first in file
     uint32_t numMips = max(1u, header.numberOfMipmapLevels);
-   
+
     int numChunks = header.totalChunks();
 
     mipLevels.reserve(numMips);
@@ -1231,7 +1234,7 @@ void KTXImage::initMipLevels(size_t mipOffset)
     int32_t w = width;
     int32_t h = height;
     int32_t d = depth;
-    
+
     for (uint32_t i = 0; i < numMips; ++i) {
         size_t dataSize = mipLengthCalc(w, h);
 
@@ -1239,30 +1242,31 @@ void KTXImage::initMipLevels(size_t mipOffset)
 
         // TODO: align mip offset to multiple of 4 bytes for KTX1, may need for kTX2
         // make sure when adding up offsets with length to include this padding
-//        if (!skipImageLength) {
-//            offset += 3 - (offset & 3); // align level to 4 bytes
-//        }
-        
+        //        if (!skipImageLength) {
+        //            offset += 3 - (offset & 3); // align level to 4 bytes
+        //        }
+
         // compute dataSize from header data
         if (!skipImageLength) {
             // advance past the length
             offset += sizeof(uint32_t);
         }
-        
+
         // level holds single texture size not level size, but offset reflects level start
-        KTXImageLevel level = { offset, 0, dataSize };
+        KTXImageLevel level = {offset, 0, dataSize};
         mipLevels.push_back(level);
 
         offset += levelSize;
-        
+
         mipDown(w, h, d);
     }
 }
 
-bool KTXImage::validateMipLevels() const {
+bool KTXImage::validateMipLevels() const
+{
     if (skipImageLength)
         return true;
-    
+
     bool isValid = true;
 
     int numChunks = header.totalChunks();
@@ -1270,10 +1274,10 @@ bool KTXImage::validateMipLevels() const {
     // validate that no weird size to image
     for (uint32_t i = 0; i < mipLevels.size(); ++i) {
         auto& level = mipLevels[i];
-        
+
         const uint8_t* levelSizeField = (const uint8_t*)fileData + level.offset - sizeof(uint32_t);
         uint32_t levelSizeFromRead = *(const uint32_t*)levelSizeField;
-        
+
         // cube only stores size of one face, ugh
         if (textureType == MyMTLTextureTypeCube) {
             levelSizeFromRead *= 6;
@@ -1285,7 +1289,7 @@ bool KTXImage::validateMipLevels() const {
             break;
         }
     }
-    
+
     return isValid;
 }
 
@@ -1310,8 +1314,6 @@ const char* textureTypeName(MyMTLTextureType textureType)
     return name;
 }
 
-
-
 // KTX2 layout
 //// Data Format Descriptor
 //uint32_t dfdTotalSize = 0;
@@ -1348,12 +1350,12 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength, bool i
     if ((size_t)imageDataLength < sizeof(KTX2Header)) {
         return false;
     }
-    
+
     // identifier not detected
     if (memcmp(imageData, kKTX2Identifier, sizeof(kKTX2Identifier)) != 0) {
         return false;
     }
-    
+
     // these are set after decompress of mips if needed
     //fileData = imageData;
     //fileDataLength = (int)imageDataLength;
@@ -1361,35 +1363,34 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength, bool i
     // copy out the header,
     const KTX2Header& header2 = *(const KTX2Header*)imageData;
 
-
     if (header2.supercompressionScheme == KTX2SupercompressionBasisLZ) {
         KLOGE("kram", "Basis decode not yet supported");
         return false;
     }
-    
+
     if (header2.supercompressionScheme != KTX2SupercompressionNone &&
         header2.supercompressionScheme != KTX2SupercompressionZstd &&
         header2.supercompressionScheme != KTX2SupercompressionZlib) {
         KLOGE("kram", "Unknown supercompression %d", header2.supercompressionScheme);
         return false;
     }
-    
+
     bool isCompressed = header2.supercompressionScheme != KTX2SupercompressionNone;
-    
+
     // This typically means UASTC encoding + zstd supercompression, and code doesn't handle that below yet
     if (header2.vkFormat == 0) {
         KLOGE("kram", "UASTC and vkFormat of 0 decode not yet supported");
         return false;
     }
-    
-    header.pixelWidth  = header2.pixelWidth;
+
+    header.pixelWidth = header2.pixelWidth;
     header.pixelHeight = header2.pixelHeight;
-    header.pixelDepth  = header2.pixelDepth;
-    
+    header.pixelDepth = header2.pixelDepth;
+
     header.numberOfArrayElements = header2.layerCount;
     header.numberOfFaces = header2.faceCount;
     header.numberOfMipmapLevels = max((int)header2.levelCount, 1);
-    
+
     // for 2d and 3d textures
     width = header.pixelWidth;
     height = max(1, (int)header.pixelHeight);
@@ -1398,57 +1399,55 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength, bool i
     textureType = header.metalTextureType();
 
     int32_t numChunks = totalChunks();
-    
+
     vector<KTXImageLevel> levels;
     uint32_t levelOffset = sizeof(KTX2Header);
     for (uint32_t i = 0; i < header.numberOfMipmapLevels; ++i) {
         // ktx2 stores levels in same order as ktx1, but larger mips occur later in the file
         // only KTX2 writes this array out due to lengthCompressed field.
-        
+
         auto level = *(const KTXImageLevel*)(imageData + levelOffset + sizeof(KTXImageLevel) * i);
-        
+
         assert(level.length % numChunks == 0);
-        
+
         // ktx2 doesn't mess up the length like ktx1 does on cubemaps.  This is the true level length.
         // Divide by chunks so that can compare against KTX1 mips from initMipLevels.
         level.length /= numChunks;
-        
+
         levels.push_back(level);
     }
-    
-   
+
     // convert format to MyMTLPixelFormat
     pixelFormat = vulkanToMetalFormat(header2.vkFormat);
-    
+
     // kram can only load a subset of format
-    if (pixelFormat == MyMTLPixelFormatInvalid)
-    {
+    if (pixelFormat == MyMTLPixelFormatInvalid) {
         KLOGE("kram", "unsupported texture format VK_FORMAT %u", header2.vkFormat);
         return false;
     }
-    
+
     // transfer key-value data pairs
     // bytesOfKeyValueData will be updated if props written out
     header.bytesOfKeyValueData = 0;
     initProps(imageData + header2.kvdByteOffset, header2.kvdByteLength);
-   
+
     // skip parsing th elevels
     if (isInfoOnly) {
         skipImageLength = true;
         fileData = imageData;
         fileDataLength = imageDataLength;
-        
+
         // copy this in to return as info
         supercompressionType = (KTX2Supercompression)header2.supercompressionScheme;
-        
+
         // copy these over from ktx2
         mipLevels = levels;
-        
+
         // copy the original ktx2 levels, this includes mip compression
         isCompressed =
             (mipLevels[0].lengthCompressed > 0) &&
             ((mipLevels[0].length * numChunks) != mipLevels[0].lengthCompressed);
-        
+
         if (!isCompressed) {
             for (auto& level : mipLevels) {
                 level.lengthCompressed = 0;
@@ -1456,40 +1455,39 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength, bool i
         }
         return true;
     }
-    
+
     if (!isCompressed) {
         // Note: this is aliasing the mips from a ktx2 file into a ktx1 KTXImage
         // This is highly unsafe but mostly works for input.
-        
+
         // Note: KTX2 also doesn't have the length field embedded the mipData
         // so need to be able to set skipLength to unify the mipgen if aliasing the mip data
         // Only reading this format, never writing it out.
         skipImageLength = true;
-        
+
         fileData = imageData;
         fileDataLength = imageDataLength;
-        
+
         // these are mip offset for KTX2 file
         size_t mipOffset = header2.sgdByteOffset + header2.sgdByteLength;
         initMipLevels(mipOffset);
-        
+
         // TODO: KTX1 packs rows to 4 bytes, but KTX2 packs tightly to 1
         // for now just reverse the ktx2 mips back to ktx1, aliasing fileData
         // Note ktx2 is align 1, and ktx1 is align 4, so can't always do this
-        
+
         for (uint32_t i = 0; i < header.numberOfMipmapLevels; ++i) {
             const auto& level2 = levels[i];
             auto& level1 = mipLevels[i];
-            
+
             // the offsets are reversed in ktx2 file
             level1.offset = level2.offset;
             assert(level1.lengthCompressed == 0);
-            
-            if (level1.length != level2.length)
-            {
+
+            if (level1.length != level2.length) {
                 // This is likely due to the reversal of mips
                 // but many of the test images from libkx are hitting this, fix this issue.
-                
+
                 KLOGE("kram", "mip sizes aren't equal");
                 return false;
             }
@@ -1499,66 +1497,66 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength, bool i
         // This is decompressing KTX2 into KTX1
         size_t mipOffset = sizeof(KTXHeader) + header.bytesOfKeyValueData;
         initMipLevels(mipOffset);
-        
+
         // compute the decompressed size
         // Note: initMipLevels computes but doesn't store this
         reserveImageData();
-        
+
         // TODO: may need to fill out length field in fileData
-        
+
         supercompressionType = (KTX2Supercompression)header2.supercompressionScheme;
-        
+
         // need to decompress mips here
         for (uint32_t i = 0; i < header.numberOfMipmapLevels; ++i) {
             // compresssed level
             const auto& level2 = levels[i];
             const uint8_t* srcData = imageData + level2.offset;
-           
+
             // uncompressed level
             auto& level1 = mipLevels[i];
-            level1.lengthCompressed = level2.lengthCompressed; // need this for copyLevel to have enough data
-            uint8_t* dstData = (uint8_t*)fileData + level1.offset; // can const_cast, since class owns data
-          
+            level1.lengthCompressed = level2.lengthCompressed;      // need this for copyLevel to have enough data
+            uint8_t* dstData = (uint8_t*)fileData + level1.offset;  // can const_cast, since class owns data
+
             if (!unpackLevel(i, srcData, dstData)) {
                 return false;
             }
-            
+
             // have decompressed here, so set to 0
             level1.lengthCompressed = 0;
         }
-        
+
         // have decompressed ktx1, so change back to None
         supercompressionType = KTX2SupercompressionNone;
     }
-    
+
     return true;
 }
 
-bool KTXImage::unpackLevel(uint32_t mipNumber, const uint8_t* srcData, uint8_t* dstData) const {
-    
+bool KTXImage::unpackLevel(uint32_t mipNumber, const uint8_t* srcData, uint8_t* dstData) const
+{
     // uncompressed level
     uint32_t numChunks = totalChunks();
     const auto& level = mipLevels[mipNumber];
     size_t dstDataSize = level.length * numChunks;
-    
+
     if (level.lengthCompressed == 0) {
         memcpy(dstData, srcData, dstDataSize);
     }
     else {
         size_t srcDataSize = level.lengthCompressed;
-        
+
         // TODO: use basis transcoder (single file) for Basis UASTC here, then don't need libktx yet
         // wont work for BasisLZ (which is ETC1S).
         // copy this in to return as info
-        
-        switch(supercompressionType) {
+
+        switch (supercompressionType) {
             case KTX2SupercompressionZstd: {
                 // decompress from zstd directly into ktx1 ordered chunk
                 // Note: decode fails with FSE_decompress.
                 size_t dstDataSizeZstd = ZSTD_decompress(
                     dstData, dstDataSize,
                     srcData, srcDataSize);
-                
+
                 if (ZSTD_isError(dstDataSizeZstd)) {
                     KLOGE("kram", "decode mip zstd failed");
                     return false;
@@ -1569,7 +1567,7 @@ bool KTXImage::unpackLevel(uint32_t mipNumber, const uint8_t* srcData, uint8_t*
                 }
                 break;
             }
-            
+
             case KTX2SupercompressionZlib: {
                 // can use miniz or libCompression
                 mz_ulong dstDataSizeMiniz = 0;
@@ -1582,46 +1580,48 @@ bool KTXImage::unpackLevel(uint32_t mipNumber, const uint8_t* srcData, uint8_t*
                     KLOGE("kram", "decode mip zlib size not expected");
                     return false;
                 }
-                
+
                 break;
             }
-                
+
             // already checked at top of function
             default: {
                 return false;
             }
         }
     }
-    
+
     return true;
 }
 
-vector<uint8_t>& KTXImage::imageData() {
+vector<uint8_t>& KTXImage::imageData()
+{
     return _imageData;
 }
 
-void KTXImage::reserveImageData() {
+void KTXImage::reserveImageData()
+{
     int32_t numChunks = totalChunks();
-    
+
     // on KTX1 the last mip is the smallest and last in the file
     // on KTX2 the first mip is the largest and last in the file
     const auto& firstMip = mipLevels[0];
-    const auto& lastMip = mipLevels[header.numberOfMipmapLevels-1];
-    
+    const auto& lastMip = mipLevels[header.numberOfMipmapLevels - 1];
+
     size_t firstMipOffset =
         firstMip.offset + firstMip.length * numChunks;
     size_t lastMipOffset =
         lastMip.offset + lastMip.length * numChunks;
     size_t totalSize = max(firstMipOffset, lastMipOffset);
-    
+
     reserveImageData(totalSize);
 }
 
-void KTXImage::reserveImageData(size_t totalSize) {
-    
+void KTXImage::reserveImageData(size_t totalSize)
+{
     _imageData.resize(totalSize);
     memset(_imageData.data(), 0, totalSize);
-    
+
     fileDataLength = totalSize;
     fileData = _imageData.data();
 }
diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h
index e1f4e7a5..d9242db4 100644
--- a/libkram/kram/KTXImage.h
+++ b/libkram/kram/KTXImage.h
@@ -100,9 +100,9 @@ enum MyMTLPixelFormat {
     MyMTLPixelFormatRG32Float = 105,
     MyMTLPixelFormatRGBA32Float = 125,
 
-    // TODO: also need rgb9e5 for fallback if ASTC HDR/6H not supported
-    // That is Unity's fallback if alpha not needed, otherwise RGBA16F.
-    
+// TODO: also need rgb9e5 for fallback if ASTC HDR/6H not supported
+// That is Unity's fallback if alpha not needed, otherwise RGBA16F.
+
 #if SUPPORT_RGB
     // Can import files from KTX/KTX2 with RGB data, but convert right away to RGBA.
     // These are not export formats.  Watch alignment on these too.  These
@@ -116,7 +116,7 @@ enum MyMTLPixelFormat {
 
 enum MyMTLTextureType {
     // MyMTLTextureType1D = 0,   // not twiddled or compressed, more like a buffer but with texture limits
-    MyMTLTextureType1DArray = 1, // not twiddled or compressed, more like a buffer but with texture limits
+    MyMTLTextureType1DArray = 1,  // not twiddled or compressed, more like a buffer but with texture limits
     MyMTLTextureType2D = 2,
     MyMTLTextureType2DArray = 3,
     // MyMTLTextureType2DMultisample = 4,
@@ -143,7 +143,8 @@ class KTXHeader {
     // As such, this doesn't have much functionality, other than to hold the header.
 
     // 64-byte header
-    uint8_t identifier[kKTXIdentifierSize] = { // same is kKTXIdentifier
+    uint8_t identifier[kKTXIdentifierSize] = {
+        // same is kKTXIdentifier
         0xAB, 0x4B, 0x54, 0x58, 0x20, 0x31, 0x31, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A
         //'«', 'K', 'T', 'X', ' ', '1', '1', '»', '\r', '\n', '\x1A', '\n'
     };
@@ -193,14 +194,15 @@ class KTXHeader {
 // and this stores an array of supercompressed levels, and has dfds.
 class KTX2Header {
 public:
-     uint8_t identifier[kKTXIdentifierSize] = { // same is kKTX2Identifier
+    uint8_t identifier[kKTXIdentifierSize] = {
+        // same is kKTX2Identifier
         0xAB, 0x4B, 0x54, 0x58, 0x20, 0x32, 0x30, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A
         // '«', 'K', 'T', 'X', ' ', '2', '0', '»', '\r', '\n', '\x1A', '\n'
     };
 
-    uint32_t vkFormat = 0; // invalid format
+    uint32_t vkFormat = 0;  // invalid format
     uint32_t typeSize = 1;
-    
+
     uint32_t pixelWidth = 1;
     uint32_t pixelHeight = 0;
     uint32_t pixelDepth = 0;
@@ -237,16 +239,16 @@ class KTX2Header {
 // and the offsts include a 4 byte length at the start of each level.
 class KTXImageLevel {
 public:
-    uint64_t offset = 0; //  differ in ordering - ktx largest first, ktx2 smallest first
-    uint64_t lengthCompressed = 0; // set to 0 if not compresseds
-    uint64_t length = 0; // numChunks * mipSize when written for non cube on KTX1 or all KTX2, internally only stores mipSize
+    uint64_t offset = 0;            //  differ in ordering - ktx largest first, ktx2 smallest first
+    uint64_t lengthCompressed = 0;  // set to 0 if not compresseds
+    uint64_t length = 0;            // numChunks * mipSize when written for non cube on KTX1 or all KTX2, internally only stores mipSize
 };
 
 enum KTX2Supercompression {
     KTX2SupercompressionNone = 0,
-    KTX2SupercompressionBasisLZ = 1, // can transcode, but can't gen from KTX file using ktxsc, uses sgdByteLength
-    KTX2SupercompressionZstd = 2, // faster deflate, ktxsc support
-    KTX2SupercompressionZlib = 3, // deflate, no ktxsc support (use miniz)
+    KTX2SupercompressionBasisLZ = 1,  // can transcode, but can't gen from KTX file using ktxsc, uses sgdByteLength
+    KTX2SupercompressionZstd = 2,     // faster deflate, ktxsc support
+    KTX2SupercompressionZlib = 3,     // deflate, no ktxsc support (use miniz)
     // TODO: Need LZFSE?
     // TODO: need Kraken for PS4
     // TODO: need Xbox format
@@ -254,29 +256,27 @@ enum KTX2Supercompression {
 
 struct KTX2Compressor {
     KTX2Supercompression compressorType = KTX2SupercompressionNone;
-    float compressorLevel = 0.0f; // 0.0 is default
-    
+    float compressorLevel = 0.0f;  // 0.0 is default
+
     bool isCompressed() const { return compressorType != KTX2SupercompressionNone; }
 };
 
 //---------------------------------------------
 
-
-
 // Since can't add anything to KTXHeader without throwing off KTXHeader size,
 // this holds any mutable data for reading/writing KTX images.
 class KTXImage {
 public:
     // this calls init calls
     bool open(const uint8_t* imageData, size_t imageDataLength, bool isInfoOnly = false);
-    
+
     void initProps(const uint8_t* propsData, size_t propDataSize);
-    
+
     void initMipLevels(size_t mipOffset);
     void initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxSize, int32_t mipSkip, uint32_t& numSkippedMips);
 
     bool validateMipLevels() const;
-    
+
     // props handling
     void toPropsData(vector<uint8_t>& propsData);
 
@@ -295,8 +295,7 @@ class KTXImage {
     Int2 blockDims() const;
     uint32_t blockCount(uint32_t width_, uint32_t height_) const;
     uint32_t blockCountRows(uint32_t width_) const;
-    
-   
+
     // this is where KTXImage holds all mip data internally
     void reserveImageData();
     void reserveImageData(size_t totalSize);
@@ -304,37 +303,37 @@ class KTXImage {
 
     // for KTX2 files, the mips can be compressed using various encoders
     bool isSupercompressed() const { return isKTX2() && mipLevels[0].lengthCompressed != 0; }
-    
+
     bool isKTX1() const { return !skipImageLength; }
     bool isKTX2() const { return skipImageLength; }
-    
+
     // can use on ktx1/2 files, does a decompress if needed
     bool unpackLevel(uint32_t mipNumber, const uint8_t* srcData, uint8_t* dstData) const;
-    
+
     // helpers to work with the mipLevels array, mipLength and levelLength are important to get right
     // mip data depends on format
-    
+
     // mip
     void mipDimensions(uint32_t mipNumber, uint32_t& width_, uint32_t& height_, uint32_t& depth_) const;
     uint32_t mipLengthCalc(uint32_t width_, uint32_t height_) const;
     uint32_t mipLengthCalc(uint32_t mipNumber) const;
     size_t mipLengthLargest() const { return mipLevels[0].length; }
     size_t mipLength(uint32_t mipNumber) const { return mipLevels[mipNumber].length; }
-    
+
     // level
     size_t levelLength(uint32_t mipNumber) const { return mipLevels[mipNumber].length * totalChunks(); }
     size_t levelLengthCompressed(uint32_t mipNumber) const { return mipLevels[mipNumber].lengthCompressed; }
-    
+
     // chunk
     uint32_t totalChunks() const;
     size_t chunkOffset(uint32_t mipNumber, uint32_t chunkNumber) const { return mipLevels[mipNumber].offset + mipLevels[mipNumber].length * chunkNumber; }
-    
+
 private:
     bool openKTX2(const uint8_t* imageData, size_t imageDataLength, bool isInfoOnly);
 
     // ktx2 mips are uncompressed to convert back to ktx1, but without the image offset
     vector<uint8_t> _imageData;
-    
+
 public:  // TODO: bury this
     MyMTLTextureType textureType = MyMTLTextureType2D;
     MyMTLPixelFormat pixelFormat = MyMTLPixelFormatInvalid;
@@ -348,7 +347,7 @@ class KTXImage {
     // for ktx2
     bool skipImageLength = false;
     KTX2Supercompression supercompressionType = KTX2SupercompressionNone;
-    
+
     KTXHeader header;  // copy of KTXHeader, so can be modified and then written back
 
     // write out only string/string props, for easy of viewing
@@ -372,7 +371,7 @@ inline void mipDown(int32_t& w, int32_t& h, int32_t& d, uint32_t lod = 1)
     w >>= (int32_t)lod;
     h >>= (int32_t)lod;
     d >>= (int32_t)lod;
-    
+
     if (w < 1) w = 1;
     if (h < 1) h = 1;
     if (d < 1) d = 1;
@@ -384,19 +383,20 @@ inline void mipDown(uint32_t& w, uint32_t& h, uint32_t& d, uint32_t lod = 1)
     w >>= lod;
     h >>= lod;
     d >>= lod;
-    
+
     if (w < 1) w = 1;
     if (h < 1) h = 1;
     if (d < 1) d = 1;
 }
 
-inline void KTXImage::mipDimensions(uint32_t mipNumber, uint32_t& width_, uint32_t& height_, uint32_t& depth_) const {
+inline void KTXImage::mipDimensions(uint32_t mipNumber, uint32_t& width_, uint32_t& height_, uint32_t& depth_) const
+{
     assert(mipNumber < mipLevels.size());
-           
+
     width_ = width;
     height_ = height;
     depth_ = depth;
-    
+
     mipDown(width_, height_, depth_, mipNumber);
 }
 
@@ -429,8 +429,8 @@ const char* metalTypeName(MyMTLPixelFormat format);
 
 // vuklan
 const char* vulkanTypeName(MyMTLPixelFormat format);
-uint32_t vulkanType(MyMTLPixelFormat format);  // really VKFormat
-MyMTLPixelFormat vulkanToMetalFormat(uint32_t format); // really VKFormat
+uint32_t vulkanType(MyMTLPixelFormat format);           // really VKFormat
+MyMTLPixelFormat vulkanToMetalFormat(uint32_t format);  // really VKFormat
 
 // gl
 const char* glTypeName(MyMTLPixelFormat format);
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 51acf7ea..71442a9f 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -7,9 +7,10 @@
 #include <sys/stat.h>
 
 //#include <atomic>
+#include <inttypes.h>
+
 #include <cmath>
 #include <ctime>
-#include <inttypes.h>
 //#include <algorithm>  // for max
 //#include <string>
 //#include <vector>
@@ -32,7 +33,7 @@ void* __cdecl operator new[](size_t size, const char* name, int flags, unsigned
 
 void* operator new[](size_t size, size_t alignment, size_t alignmentOffset, const char* pName, int flags, unsigned debugFlags, const char* file, int line)
 {
-    return new uint8_t[size]; // TODO: honor alignment
+    return new uint8_t[size];  // TODO: honor alignment
 }
 
 #endif
@@ -48,62 +49,64 @@ namespace kram {
 
 using namespace NAMESPACE_STL;
 
-template<typename T>
-void releaseVector(vector<T>& v) {
+template <typename T>
+void releaseVector(vector<T>& v)
+{
     v.clear();
     v.shrink_to_fit();
 }
 
-
-bool isPNGFilename(const char* filename) {
+bool isPNGFilename(const char* filename)
+{
     // should really lookg at first 4 bytes of data
     return endsWithExtension(filename, ".png");
 }
 
-bool isPNGFilename(const uint8_t* data, size_t dataSize) {
+bool isPNGFilename(const uint8_t* data, size_t dataSize)
+{
     // read the 4 chars at the beginning of the file
     const uint32_t numChars = 8;
     if (dataSize < numChars)
         return false;
-    
-    const uint8_t kPngSignature[numChars] = { 137, 80, 78, 71, 13, 10, 26, 10 };
+
+    const uint8_t kPngSignature[numChars] = {137, 80, 78, 71, 13, 10, 26, 10};
     if (memcmp(data, kPngSignature, sizeof(kPngSignature)) != 0) {
         return false;
     }
-    
+
     return true;
 }
 
-
-bool KTXImageData::open(const char* filename, KTXImage& image) {
+bool KTXImageData::open(const char* filename, KTXImage& image)
+{
     close();
-    
+
     if (isPNGFilename(filename)) {
         return openPNG(filename, image);
     }
-        
+
     isMmap = true;
     if (!mmapHelper.open(filename)) {
         isMmap = false;
-        
+
         // open file, copy it to memory, then close it
         FileHelper fileHelper;
         if (!fileHelper.open(filename, "rb")) {
             return false;
         }
-        
+
         // read the file into memory
         size_t size = fileHelper.size();
         if (size == (size_t)-1) {
             return false;
         }
-        
+
         fileData.resize(size);
         if (!fileHelper.read(fileData.data(), size)) {
             return false;
         }
     }
-    
+
     const uint8_t* data;
     size_t dataSize;
     if (isMmap) {
@@ -114,55 +117,56 @@ bool KTXImageData::open(const char* filename, KTXImage& image) {
         data = fileData.data();
         dataSize = fileData.size();
     }
-    
+
     // read the KTXImage in from the data, it will alias mmap or fileData
     bool isLoaded = image.open(data, dataSize, isInfoOnly);
-    
+
     // this means KTXImage is using it's own storage
     if (!isLoaded || image.fileData != data) {
         close();
     }
-    
+
     if (!isLoaded) {
         return false;
     }
-    
+
     return true;
 }
 
-void KTXImageData::close() {
+void KTXImageData::close()
+{
     // don't need these anymore, singleImage holds the data
     mmapHelper.close();
     releaseVector(fileData);
     isMmap = false;
 }
 
-
-bool KTXImageData::openPNG(const char* filename, KTXImage& image) {
+bool KTXImageData::openPNG(const char* filename, KTXImage& image)
+{
     //close();
-    
+
     isMmap = true;
     if (!mmapHelper.open(filename)) {
         isMmap = false;
-        
+
         // open file, copy it to memory, then close it
         FileHelper fileHelper;
         if (!fileHelper.open(filename, "rb")) {
             return false;
         }
-        
+
         // read the file into memory
         size_t size = fileHelper.size();
         if (size == (size_t)-1) {
             return false;
         }
-        
+
         fileData.resize(size);
         if (!fileHelper.read(fileData.data(), size)) {
             return false;
         }
     }
-    
+
     const uint8_t* data;
     size_t dataSize;
     if (isMmap) {
@@ -177,22 +181,23 @@ bool KTXImageData::openPNG(const char* filename, KTXImage& image) {
     return openPNG(data, dataSize, image);
 }
 
-bool KTXImageData::openPNG(const uint8_t* data, size_t dataSize, KTXImage& image) {
+bool KTXImageData::openPNG(const uint8_t* data, size_t dataSize, KTXImage& image)
+{
     //close();
-        
+
     // the mmap/filehelper point to the png data
     // use Image to
-    
+
     Image singleImage;
     bool isLoaded = LoadPng(data, dataSize, false, false, singleImage);
-    
+
     // don't need png data anymore
     close();
-    
+
     if (!isLoaded) {
         return false;
     }
-    
+
     // now move the png pixels into the KTXImage
 
     image.width = singleImage.width();
@@ -203,33 +208,33 @@ bool KTXImageData::openPNG(const uint8_t* data, size_t dataSize, KTXImage& image
     image.header.numberOfMipmapLevels = 1;
     image.textureType = MyMTLTextureType2D;
     image.pixelFormat = /*isSrgb ? MyMTLPixelFormatRGBA8Unorm_sRGB : */ MyMTLPixelFormatRGBA8Unorm;
-    
+
     // TODO: support mips with blitEncoder but tha confuses mipCount in KTXImage
     //     Mipper can also generate on cpu side.  Mipped can do premul conversion though.
-    
+
     // TODO: support chunks and striped png, but may need to copy horizontal to vertical
-    
+
     // TODO: png has 16u format useful for heights
-    
-    image.initMipLevels(sizeof(KTXHeader)); // TODO: could also make this ktx2 with zstd compress
+
+    image.initMipLevels(sizeof(KTXHeader));  // TODO: could also make this ktx2 with zstd compress
     image.reserveImageData();
     memcpy((uint8_t*)image.fileData, &image.header, sizeof(KTXHeader));
-    
+
     memcpy((uint8_t*)image.fileData + image.mipLevels[0].offset, singleImage.pixels().data(), image.levelLength(0));
-    
+
     return true;
 }
 
 bool KTXImageData::open(const uint8_t* data, size_t dataSize, KTXImage& image)
 {
     close();
-    
+
     if (isPNGFilename(data, dataSize)) {
         return openPNG(data, dataSize, image);
     }
-      
+
     // image will likely alias incoming data, so KTXImageData is unused
-    
+
     if (!image.open(data, dataSize, isInfoOnly)) {
         return false;
     }
@@ -253,41 +258,40 @@ bool SetupSourceKTX(KTXImageData& srcImageData,
 // linear-order block textures.  But on some platforms may be able to directly use the block
 // and pixel data if organized in the exact twiddle order the hw uses.
 // Code adapted from KTX doc example.
-class MortonOrder
-{
+class MortonOrder {
 public:
-MortonOrder(uint32_t width, uint32_t height) {
-    minDim = (width <= height) ? width : height;
-    
-    // Smaller size must be a power of 2
-    assert((minDim & (minDim - 1)) == 0);
-
-    // Larger size must be a multiple of the smaller
-    assert(width % minDim == 0 && height % minDim == 0);
-}
-    
-// For a given xy block in a mip level, find the block offset in morton order
-uint32_t mortonOffset(uint32_t x, uint32_t y)
-{
-    uint32_t offset = 0, shift = 0;
+    MortonOrder(uint32_t width, uint32_t height)
+    {
+        minDim = (width <= height) ? width : height;
+
+        // Smaller size must be a power of 2
+        assert((minDim & (minDim - 1)) == 0);
 
-    for (uint32_t mask = 1; mask < minDim; mask <<= 1) {
-        offset |= (((y & mask) << 1) | (x & mask)) << shift;
-        shift++;
+        // Larger size must be a multiple of the smaller
+        assert(width % minDim == 0 && height % minDim == 0);
+    }
+
+    // For a given xy block in a mip level, find the block offset in morton order
+    uint32_t mortonOffset(uint32_t x, uint32_t y)
+    {
+        uint32_t offset = 0, shift = 0;
+
+        for (uint32_t mask = 1; mask < minDim; mask <<= 1) {
+            offset |= (((y & mask) << 1) | (x & mask)) << shift;
+            shift++;
+        }
+
+        // At least one of width and height will have run out of most-significant bits
+        offset |= ((x | y) >> shift) << (shift * 2);
+        return offset;
     }
 
-    // At least one of width and height will have run out of most-significant bits
-    offset |= ((x | y) >> shift) << (shift * 2);
-    return offset;
-}
-    
 private:
     uint32_t minDim = 0;
 };
 
-
-
-inline Color toPremul(Color c) {
+inline Color toPremul(Color c)
+{
     // these are really all fractional, but try this
     c.r = ((uint32_t)c.r * (uint32_t)c.a) / 255;
     c.g = ((uint32_t)c.g * (uint32_t)c.a) / 255;
@@ -297,18 +301,18 @@ inline Color toPremul(Color c) {
 
 // rec709
 // https://en.wikipedia.org/wiki/Grayscale
-inline Color toGrayscaleRec709(Color c, const Mipper& mipper) {
-    
-    const float4 kRec709Conversion = float4m(0.2126f, 0.7152f, 0.0722f, 0.0f); // really a float3
-    
+inline Color toGrayscaleRec709(Color c, const Mipper& mipper)
+{
+    const float4 kRec709Conversion = float4m(0.2126f, 0.7152f, 0.0722f, 0.0f);  // really a float3
+
     // convert to linear, do luminance, then back to srgb primary
-    
+
     float4 clin = mipper.toLinear(c);
     float luminance = dot(clin, kRec709Conversion);
-    luminance = std::min(luminance, 1.0f); // to avoid assert if math goes above 1.0
-    
+    luminance = std::min(luminance, 1.0f);  // to avoid assert if math goes above 1.0
+
     c.r = (uint8_t)(roundf(linearToSRGBFunc(luminance) * 255.0f));
-    
+
     // can just copy into the other 3 terms
     c.g = c.b = c.r;
     return c;
@@ -317,7 +321,7 @@ inline Color toGrayscaleRec709(Color c, const Mipper& mipper) {
 bool LoadKtx(const uint8_t* data, size_t dataSize, Image& sourceImage)
 {
     KTXImage image;
-    bool isInfoOnly = true; // don't decompress entire image, only going to unpack top level mip
+    bool isInfoOnly = true;  // don't decompress entire image, only going to unpack top level mip
     if (!image.open(data, dataSize, isInfoOnly)) {
         return false;
     }
@@ -383,32 +387,31 @@ bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray
         return false;
     }
 
-    
     // convert to grasycale on load
     // better if could do this later in pipeline to stay in linear fp16 color
     if (hasColor && isGray) {
         Mipper mipper;
-            
+
         Color* colors = (Color*)pixels.data();
-        for (int32_t i = 0, iEnd = width*height; i < iEnd; ++i) {
+        for (int32_t i = 0, iEnd = width * height; i < iEnd; ++i) {
             colors[i] = toGrayscaleRec709(colors[i], mipper);
         }
-        
+
         hasColor = false;
     }
-    
+
     // apply premul srgb right away, don't use with -premul or alpha is applied twice
     // this may throw off the props.  Note this ignores srgb conversion.
     // This is hack to look like Photoshop and Apple Preview, where they process srgb wrong
     // on premul PNG data on load, and colors look much darker.
-    
+
     if (hasAlpha && isPremulRgb) {
         Color* colors = (Color*)pixels.data();
-        for (int32_t i = 0, iEnd = width*height; i < iEnd; ++i) {
+        for (int32_t i = 0, iEnd = width * height; i < iEnd; ++i) {
             colors[i] = toPremul(colors[i]);
         }
     }
-    
+
     return sourceImage.loadImageFromPixels(pixels, width, height, hasColor, hasAlpha);
 }
 
@@ -432,7 +435,7 @@ bool SetupSourceImage(const string& srcFilename, Image& sourceImage,
     // TODO: basically KTXImageData, but the encode can't take in a KTXImage yet
     // so here it's generate a single Image.  Also here the LoadKTX converts
     // 1/2/3/4 channel formats to 4.
-    
+
     MmapHelper mmapHelper;
     vector<uint8_t> fileData;
 
@@ -440,9 +443,9 @@ bool SetupSourceImage(const string& srcFilename, Image& sourceImage,
     bool isMmap = true;
     if (!mmapHelper.open(srcFilename.c_str())) {
         isMmap = false;
-        
+
         FileHelper fileHelper;
-        
+
         // fallback to opening file if no mmap support or it didn't work
         if (!fileHelper.open(srcFilename.c_str(), "rb")) {
             KLOGE("Kram", "File input \"%s\" could not be opened for read.\n",
@@ -455,14 +458,14 @@ bool SetupSourceImage(const string& srcFilename, Image& sourceImage,
         if (size == (size_t)-1) {
             return false;
         }
-        
+
         fileData.resize(size);
 
         if (!fileHelper.read(fileData.data(), size)) {
             return false;
         }
     }
-    
+
     const uint8_t* data;
     size_t dataSize;
     if (isMmap) {
@@ -473,9 +476,9 @@ bool SetupSourceImage(const string& srcFilename, Image& sourceImage,
         data = fileData.data();
         dataSize = fileData.size();
     }
-    
+
     //-----------------------
-    
+
     if (isPNG) {
         if (!LoadPng(data, dataSize, isPremulSrgb, isGray, sourceImage)) {
             return false;  // error
@@ -486,13 +489,10 @@ bool SetupSourceImage(const string& srcFilename, Image& sourceImage,
             return false;  // error
         }
     }
-    
-    
+
     return true;
 }
 
-
-
 // better countof in C++11, https://www.g-truc.net/post-0708.html
 template <typename T, size_t N>
 constexpr size_t countof(T const (&)[N]) noexcept
@@ -765,7 +765,7 @@ string formatInputAndOutput(int32_t testNumber, const char* srcFilename, MyMTLPi
     size_t extSeparator = dst.rfind('.');
     assert(extSeparator != string::npos);
     dst.erase(extSeparator);
-    dst.append(".ktx"); // TODO: test ktx2 too
+    dst.append(".ktx");  // TODO: test ktx2 too
 
     cmd += dst;
 
@@ -1139,7 +1139,7 @@ void kramDecodeUsage(bool showVersion = true)
           "\t -i/nput .ktx\n"
           "\t -o/utput .ktx\n"
           "\n",
-          showVersion ? usageName : "" );
+          showVersion ? usageName : "");
 }
 
 void kramInfoUsage(bool showVersion = true)
@@ -1174,24 +1174,24 @@ void kramEncodeUsage(bool showVersion = true)
     const char* ateEnabled = "";
     const char* etcencEnabled = "";
     const char* astcencEnabled = "";
-    
-    // prerocessor in MSVC can't handle this
-    #if !COMPILE_SQUISH
+
+// prerocessor in MSVC can't handle this
+#if !COMPILE_SQUISH
     squishEnabled = "(DISABLED)";
-    #endif
-    #if !COMPILE_BCENC
+#endif
+#if !COMPILE_BCENC
     bcencEnabled = "(DISABLED)";
-    #endif
-    #if !COMPILE_ATE
+#endif
+#if !COMPILE_ATE
     ateEnabled = "(DISABLED)";
-    #endif
-    #if !COMPILE_ASTCENC
+#endif
+#if !COMPILE_ASTCENC
     astcencEnabled = "(DISABLED)";
-    #endif
-    #if !COMPILE_ETCENC
+#endif
+#if !COMPILE_ETCENC
     etcencEnabled = "(DISABLED)";
-    #endif
-    
+#endif
+
     KLOGI("Kram",
           "%s\n"
           "Usage: kram encode\n"
@@ -1239,22 +1239,22 @@ void kramEncodeUsage(bool showVersion = true)
 
           // can force an encoder when there is overlap
           "\t-encoder squish"
-          "\tbc[1,3,4,5] %s\n" // can be disabled
-          
+          "\tbc[1,3,4,5] %s\n"  // can be disabled
+
           "\t-encoder bcenc"
-          "\tbc[1,3,4,5,7] %s\n" // can be disabled
-          
+          "\tbc[1,3,4,5,7] %s\n"  // can be disabled
+
           "\t-encoder ate"
-          "\tbc[1,4,5,7] %s\n" // can be disabled
-          
+          "\tbc[1,4,5,7] %s\n"  // can be disabled
+
           "\t-encoder ate"
-          "\tastc[4x4,8x8] %s\n" // can be disabled
+          "\tastc[4x4,8x8] %s\n"  // can be disabled
 
           "\t-encoder astcenc"
-          "\tastc[4x4,5x5,6x6,8x8] ldr/hdr support %s\n" // can be disabled
+          "\tastc[4x4,5x5,6x6,8x8] ldr/hdr support %s\n"  // can be disabled
 
           "\t-encoder etcenc"
-          "\tetc2[r,rg,rgb,rgba] %s\n" // can be disabled
+          "\tetc2[r,rg,rgb,rgba] %s\n"  // can be disabled
 
           "\t-encoder explicit"
           "\tr|rg|rgba[8|16f|32f]\n"
@@ -1266,14 +1266,14 @@ void kramEncodeUsage(bool showVersion = true)
 
           "\t-mipmin size"
           "\tOnly output mips >= size px\n"
-          
+
           "\t-mipmax size"
           "\tOnly output mips <= size px\n"
-          
+
           "\t-mipskip count"
           "\tOnly output largest mips >= count, similar to mipmax but with count instead of size px\n"
           "\n"
-          
+
           // tex to normal
           "\t-height"
           "\tConvert height.x to normal.xy\n"
@@ -1291,39 +1291,39 @@ void kramEncodeUsage(bool showVersion = true)
           "\tNormal map rg storage signed for etc/bc (rg01), only unsigned astc L+A (gggr).\n"
           "\t-sdf"
           "\tGenerate single-channel SDF from a bitmap, can mip and drop large mips. Encode to r8, bc4, etc2r, astc4x4 (Unorm LLL1) to encode\n"
-          
+
           "\t-gray"
           "\tConvert to grayscale before premul\n"
-          
+
           // premul is not on by default, but really should be or textures aren't sampled correctly
           // but this really only applies to color channel textures, so off by default.
           "\t-premul"
           "\tPremultiplied alpha to src pixels before output\n"
-          
+
           // This is meant to work with shaders that (incorrectly) premul after sampling.
           // limits the rgb bleed in regions that should not display colors.  Can stil have black color halos.
           "\t-prezero"
           "\tPremultiplied alpha to src pixels before output but only where a=0\n"
-          
+
           // This emulates Photoshop premul only on png files.  Multiplies  srgbColor.rgb * a.
           "\t-premulrgb"
           "\tPremultiplied alpha to src pixels at load to emulate Photoshop srgbColor.rgb * a, don't use with -premul\n"
           "\n"
-          
+
           "\t-optopaque"
           "\tChange format from bc7/3 to bc1, or etc2rgba to rgba if opaque\n"
           "\n"
 
           "\t-chunks 4x4"
           "\tSpecifies how many chunks to split up texture into 2darray\n"
-          
+
           // ktx2 specific settings
           "\tktx2 mip compression, if not present then no compresion used\n"
           "\t-zstd 0"
           "\tktx2 with zstd mip compressor, 0 for default, 0 to 100\n"
           "\t-zlib 0"
           "\tktx2 with zlib mip compressor, 0 for default, 0 to 11\n"
-          
+
           "\t-swizzle [rgba01 x4]"
           "\tSpecifies pre-encode swizzle pattern\n"
           "\t-avg [rgba]"
@@ -1333,22 +1333,21 @@ void kramEncodeUsage(bool showVersion = true)
           "\tVerbose encoding output\n"
           "\n",
           showVersion ? usageName : "",
-          
+
           squishEnabled,
           bcencEnabled,
           ateEnabled,
           ateEnabled,
           astcencEnabled,
-          etcencEnabled
-          );
+          etcencEnabled);
 }
 
 void kramUsage()
 {
     KLOGI("Kram",
-          usageName "\n"
-          "SYNTAX\nkram [encode | decode | info | script | ...]\n"
-          );
+          usageName
+          "\n"
+          "SYNTAX\nkram [encode | decode | info | script | ...]\n");
 
     kramEncodeUsage(false);
     kramInfoUsage(false);
@@ -1364,7 +1363,7 @@ static int32_t kramAppInfo(vector<const char*>& args)
         kramInfoUsage();
         return 0;
     }
-    
+
     string srcFilename;
     string dstFilename;
 
@@ -1472,13 +1471,13 @@ string kramInfoToString(const string& srcFilename, bool isVerbose)
 
         // This was taken out of SetupSourceImage, dont want to decode PNG yet
         // just peek at the header.
-        
+
         // first try mmap, and then use file -> buffer
         bool useMmap = true;
         if (!srcMmapHelper.open(srcFilename.c_str())) {
             // fallback to file system if no mmap or it failed
             useMmap = false;
-            
+
             FileHelper srcFileHelper;
             if (!srcFileHelper.open(srcFilename.c_str(), "rb")) {
                 KLOGE("Kram", "File input \"%s\" could not be opened for info read.\n",
@@ -1492,7 +1491,7 @@ string kramInfoToString(const string& srcFilename, bool isVerbose)
             if (size == (size_t)-1) {
                 return "";
             }
-            
+
             srcFileBuffer.resize(size);
             if (!srcFileHelper.read(srcFileBuffer.data(), size)) {
                 return "";
@@ -1510,28 +1509,26 @@ string kramInfoToString(const string& srcFilename, bool isVerbose)
             data = srcFileBuffer.data();
             dataSize = srcFileBuffer.size();
         }
-        
+
         info = kramInfoPNGToString(srcFilename, data, dataSize, isVerbose);
-        
     }
     else if (isKTX) {
         KTXImage srcImage;
         KTXImageData srcImageData;
-        
+
         bool success = SetupSourceKTX(srcImageData, srcFilename, srcImage);
         if (!success) {
             KLOGE("Kram", "File input \"%s\" could not be opened for info read.\n",
                   srcFilename.c_str());
             return "";
         }
-        
+
         info = kramInfoKTXToString(srcFilename, srcImage, isVerbose);
     }
-    
+
     return info;
 }
 
-
 string kramInfoPNGToString(const string& srcFilename, const uint8_t* data, uint64_t dataSize, bool /* isVerbose */)
 {
     // vector<uint8_t> pixels;
@@ -1550,7 +1547,7 @@ string kramInfoPNGToString(const string& srcFilename, const uint8_t* data, uint6
     }
 
     string info;
-    
+
     bool hasColor = true;
     bool hasAlpha = true;
     bool hasPalette = state.info_png.color.colortype == LCT_PALETTE;
@@ -1585,7 +1582,8 @@ string kramInfoPNGToString(const string& srcFilename, const uint8_t* data, uint6
     bool isMB = (dataSize > (512 * 1024));
     sprintf(tmp,
             "file: %s\n"
-            "size: %" PRIu64 "\n"
+            "size: %" PRIu64
+            "\n"
             "sizm: %0.3f %s\n",
             srcFilename.c_str(),
             dataSize,
@@ -1612,15 +1610,15 @@ string kramInfoPNGToString(const string& srcFilename, const uint8_t* data, uint6
 
     // optional block with ppi
     // TODO: inspect doesn't parse this block, only decode call does
-//        if (state.info_png.phys_defined && state.info_png.phys_unit == 1) {
-//            float metersToInches = 39.37;
-//            sprintf(tmp,
-//                    "ppix: %d\n"
-//                    "ppiy: %d\n",
-//                    (int32_t)(state.info_png.phys_x * metersToInches),
-//                    (int32_t)(state.info_png.phys_y * metersToInches));
-//            info += tmp;
-//        }
+    //        if (state.info_png.phys_defined && state.info_png.phys_unit == 1) {
+    //            float metersToInches = 39.37;
+    //            sprintf(tmp,
+    //                    "ppix: %d\n"
+    //                    "ppiy: %d\n",
+    //                    (int32_t)(state.info_png.phys_x * metersToInches),
+    //                    (int32_t)(state.info_png.phys_y * metersToInches));
+    //            info += tmp;
+    //        }
 
     // TODO: also bkgd blocks.
     // TODO: sRGB, cHRM, gAMA and other colorspace blocks aren't supported by lodepng,
@@ -1631,7 +1629,7 @@ string kramInfoPNGToString(const string& srcFilename, const uint8_t* data, uint6
 string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage, bool isVerbose)
 {
     string info;
-    
+
     // for now driving everything off metal type, but should switch to neutral
     MyMTLPixelFormat metalFormat = srcImage.pixelFormat;
 
@@ -1640,16 +1638,16 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage,
     //string tmp;
     bool isMB = (dataSize > (512 * 1024));
     append_sprintf(info,
-            "file: %s\n"
-            "size: %d\n"
-            "sizm: %0.3f %s\n",
-            srcFilename.c_str(),
-            dataSize,
-            isMB ? dataSize / (1024.0f * 1024.0f) : dataSize / 1024.0f,
-            isMB ? "MB" : "KB");
-    
+                   "file: %s\n"
+                   "size: %d\n"
+                   "sizm: %0.3f %s\n",
+                   srcFilename.c_str(),
+                   dataSize,
+                   isMB ? dataSize / (1024.0f * 1024.0f) : dataSize / 1024.0f,
+                   isMB ? "MB" : "KB");
+
     int32_t numChunks = srcImage.totalChunks();
-    
+
     // add up lengths and lengthCompressed
     if (srcImage.isSupercompressed()) {
         uint64_t length = 0;
@@ -1659,35 +1657,33 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage,
             length += level.length;
             lengthCompressed += level.lengthCompressed;
         }
-        
+
         length *= numChunks;
         uint64_t percent = (100 * lengthCompressed) / length;
-       
+
         isMB = (length > (512 * 1024));
         double lengthF = isMB ? length / (1024.0f * 1024.0f) : length / 1024.0f;
         double lengthCompressedF = isMB ? lengthCompressed / (1024.0f * 1024.0f) : lengthCompressed / 1024.0f;
-    
+
         append_sprintf(info,
-            "sizc: %0.3f,%0.3f %s %d%%\n"
-            "comp: %s\n",
-            lengthF, lengthCompressedF,
-            isMB ? "MB" : "KB",
-            (int)percent,
-            supercompressionName(srcImage.supercompressionType)
-        );
-    }
-                           
-    
+                       "sizc: %0.3f,%0.3f %s %d%%\n"
+                       "comp: %s\n",
+                       lengthF, lengthCompressedF,
+                       isMB ? "MB" : "KB",
+                       (int)percent,
+                       supercompressionName(srcImage.supercompressionType));
+    }
+
     float numPixels = srcImage.width * srcImage.height;
     numPixels *= (float)numChunks;
-    
+
     if (srcImage.header.numberOfMipmapLevels > 1) {
-        numPixels *= 4.0 / 3.0f; // TODO: estimate for now
+        numPixels *= 4.0 / 3.0f;  // TODO: estimate for now
     }
-    
+
     // to megapixels
     numPixels /= (1000.0f * 1000.0f);
-    
+
     auto textureType = srcImage.header.metalTextureType();
     switch (textureType) {
         case MyMTLTextureType1DArray:
@@ -1696,93 +1692,92 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage,
         case MyMTLTextureTypeCubeArray:
         case MyMTLTextureType2DArray:
             append_sprintf(info,
-                    "type: %s\n"
-                    "dims: %dx%d\n"
-                    "dimm: %0.3f MP\n"
-                    "mips: %d\n",
-                    textureTypeName(srcImage.header.metalTextureType()),
-                    srcImage.width, srcImage.height,
-                    numPixels,
-                    srcImage.header.numberOfMipmapLevels);
+                           "type: %s\n"
+                           "dims: %dx%d\n"
+                           "dimm: %0.3f MP\n"
+                           "mips: %d\n",
+                           textureTypeName(srcImage.header.metalTextureType()),
+                           srcImage.width, srcImage.height,
+                           numPixels,
+                           srcImage.header.numberOfMipmapLevels);
             break;
         case MyMTLTextureType3D:
             append_sprintf(info,
-                    "type: %s\n"
-                    "dims: %dx%dx%d\n"
-                    "dimm: %0.3f MP\n"
-                    "mips: %d\n",
-                    textureTypeName(srcImage.header.metalTextureType()),
-                    srcImage.width, srcImage.height, srcImage.depth,
-                    numPixels,
-                    srcImage.header.numberOfMipmapLevels);
+                           "type: %s\n"
+                           "dims: %dx%dx%d\n"
+                           "dimm: %0.3f MP\n"
+                           "mips: %d\n",
+                           textureTypeName(srcImage.header.metalTextureType()),
+                           srcImage.width, srcImage.height, srcImage.depth,
+                           numPixels,
+                           srcImage.header.numberOfMipmapLevels);
             break;
     }
-    
+
     // print out the array
     if (srcImage.header.numberOfArrayElements > 1) {
         append_sprintf(info,
-                "arry: %d\n",
-                srcImage.header.numberOfArrayElements);
+                       "arry: %d\n",
+                       srcImage.header.numberOfArrayElements);
     }
 
     append_sprintf(info,
-            "fmtk: %s\n"
-            "fmtm: %s (%d)\n"
-            "fmtv: %s (%d)\n"
-            "fmtg: %s (0x%04X)\n",
-            formatTypeName(metalFormat),
-            metalTypeName(metalFormat), metalFormat,
-            vulkanTypeName(metalFormat), vulkanType(metalFormat),
-            glTypeName(metalFormat), glType(metalFormat));
+                   "fmtk: %s\n"
+                   "fmtm: %s (%d)\n"
+                   "fmtv: %s (%d)\n"
+                   "fmtg: %s (0x%04X)\n",
+                   formatTypeName(metalFormat),
+                   metalTypeName(metalFormat), metalFormat,
+                   vulkanTypeName(metalFormat), vulkanType(metalFormat),
+                   glTypeName(metalFormat), glType(metalFormat));
 
     // report any props
     for (const auto& prop : srcImage.props) {
         append_sprintf(info, "prop: %s %s\n", prop.first.c_str(), prop.second.c_str());
     }
-    
+
     if (isVerbose) {
         // dump mips/dims, but this can be a lot of data on arrays
         int32_t mipLevel = 0;
-        
+
         // num chunks
         append_sprintf(info, "chun: %d\n", numChunks);
-        
+
         for (const auto& mip : srcImage.mipLevels) {
             uint32_t w, h, d;
             srcImage.mipDimensions(mipLevel, w, h, d);
-            
+
             switch (textureType) {
                 case MyMTLTextureType3D:
-                append_sprintf(info,
-                   "mipl: %d %dx%dx%d ",
-                    mipLevel++,
-                   w, h, d);
-                   break;
+                    append_sprintf(info,
+                                   "mipl: %d %dx%dx%d ",
+                                   mipLevel++,
+                                   w, h, d);
+                    break;
                 default:
-                   append_sprintf(info,
-                    "mipl: %d %dx%d ",
-                    mipLevel++,
-                    w, h);
-                   break;
+                    append_sprintf(info,
+                                   "mipl: %d %dx%d ",
+                                   mipLevel++,
+                                   w, h);
+                    break;
             }
-                           
+
             if (mip.lengthCompressed != 0) {
                 uint64_t levelSize = mip.length * numChunks;
                 uint64_t percent = (100 * mip.lengthCompressed) / levelSize;
-                
+
                 append_sprintf(info,
-                    "%" PRIu64 ",%" PRIu64 ",%" PRIu64 " %d%%\n",
-                    mip.offset,
-                    levelSize,
-                    mip.lengthCompressed,
-                    (int)percent
-                );
+                               "%" PRIu64 ",%" PRIu64 ",%" PRIu64 " %d%%\n",
+                               mip.offset,
+                               levelSize,
+                               mip.lengthCompressed,
+                               (int)percent);
             }
             else {
                 append_sprintf(info,
-                    "%" PRIu64 ",%" PRIu64 "\n",
-                    mip.offset,
-                    mip.length // only size of one mip right now, not mip * numChunks
+                               "%" PRIu64 ",%" PRIu64 "\n",
+                               mip.offset,
+                               mip.length  // only size of one mip right now, not mip * numChunks
                 );
             }
         }
@@ -1799,7 +1794,7 @@ static int32_t kramAppDecode(vector<const char*>& args)
         kramDecodeUsage();
         return 0;
     }
-    
+
     // decode and write out to ktx file for now
     // all mips, or no mips, can preserve name-value pairs in original
 
@@ -1924,7 +1919,7 @@ static int32_t kramAppDecode(vector<const char*>& args)
     KTXImage srcImage;
     KTXImageData srcImageData;
     FileHelper tmpFileHelper;
-    
+
     bool success = SetupSourceKTX(srcImageData, srcFilename, srcImage);
 
     // TODO: for hdr decode, may need to walk blocks or ask caller to pass -hdr flag
@@ -1941,12 +1936,12 @@ static int32_t kramAppDecode(vector<const char*>& args)
               metalTypeName(srcImage.pixelFormat),
               encoderName(textureDecoder));
     }
-    
+
     KramDecoderParams params;
     params.isVerbose = isVerbose;
     params.decoder = textureDecoder;
     params.swizzleText = swizzleText;
-    
+
     KramDecoder decoder;  // just to call decode
     success = success && decoder.decode(srcImage, tmpFileHelper.pointer(), params);
 
@@ -1965,7 +1960,7 @@ static int32_t kramAppEncode(vector<const char*>& args)
         kramEncodeUsage();
         return 0;
     }
-    
+
     // parse the command-line
     string srcFilename;
     string dstFilename;
@@ -1975,7 +1970,7 @@ static int32_t kramAppEncode(vector<const char*>& args)
 
     bool isPremulRgb = false;
     bool isGray = false;
-    
+
     bool error = false;
     for (int32_t i = 0; i < argc; ++i) {
         // check for options
@@ -1999,7 +1994,7 @@ static int32_t kramAppEncode(vector<const char*>& args)
             isGray = true;
             //continue;
         }
-        
+
         // mip setting
         else if (isStringEqual(word, "-mipmax")) {
             ++i;
@@ -2015,7 +2010,7 @@ static int32_t kramAppEncode(vector<const char*>& args)
                 error = true;
                 break;
             }
-            
+
             //continue;
         }
         else if (isStringEqual(word, "-mipmin")) {
@@ -2048,7 +2043,7 @@ static int32_t kramAppEncode(vector<const char*>& args)
                 error = true;
                 break;
             }
-            
+
             //continue;
         }
         else if (isStringEqual(word, "-mipnone")) {
@@ -2064,10 +2059,10 @@ static int32_t kramAppEncode(vector<const char*>& args)
                 error = true;
                 break;
             }
-            
+
             infoArgs.isHeight = true;
             infoArgs.heightScale = atof(args[i]);
-            
+
             // Note: caller can negate scale, but don't allow scale 0.
             if (infoArgs.heightScale == 0.0f) {
                 KLOGE("Kram", "heightScale arg cannot be 0");
@@ -2085,8 +2080,7 @@ static int32_t kramAppEncode(vector<const char*>& args)
             infoArgs.isWrap = true;
             //continue;
         }
-        
-        
+
         else if (isStringEqual(word, "-e") ||
                  isStringEqual(word, "-encoder")) {
             ++i;
@@ -2134,17 +2128,17 @@ static int32_t kramAppEncode(vector<const char*>& args)
                 error = true;
                 break;
             }
-            
+
             // this is a count of how many chunks (tiles) across and down
             // currently assuming all slots contain data, but may need a total count
             // also an assumption that texture dimensions evenly divisible by count
             infoArgs.chunksX = chunksX;
             infoArgs.chunksY = chunksY;
             infoArgs.chunksCount = chunksX * chunksY;
-            
+
             //continue;
         }
-        
+
         else if (isStringEqual(word, "-avg")) {
             ++i;
             const char* channelString = args[i];
@@ -2254,7 +2248,7 @@ static int32_t kramAppEncode(vector<const char*>& args)
             isPremulRgb = true;
             //continue;
         }
-        
+
         else if (isStringEqual(word, "-v") ||
                  isStringEqual(word, "-verbose")) {
             infoArgs.isVerbose = true;
@@ -2272,7 +2266,7 @@ static int32_t kramAppEncode(vector<const char*>& args)
             infoArgs.formatString = args[i];
             //continue;
         }
-        
+
         // compressor for ktx2 mips
         // TODO: need level control
         else if (isStringEqual(word, "-zstd")) {
@@ -2284,7 +2278,7 @@ static int32_t kramAppEncode(vector<const char*>& args)
                 break;
             }
             infoArgs.compressor.compressorLevel = atoi(args[i]);
-            
+
             //continue;
         }
         else if (isStringEqual(word, "-zlib")) {
@@ -2353,7 +2347,7 @@ static int32_t kramAppEncode(vector<const char*>& args)
     // allow ktx and ktx2 output
     bool isDstKTX = endsWith(dstFilename, ".ktx");
     bool isDstKTX2 = endsWith(dstFilename, ".ktx2");
-   
+
     if (!(isDstKTX || isDstKTX2)) {
         KLOGE("Kram", "encode only supports ktx and ktx2 output");
         error = true;
@@ -2365,7 +2359,7 @@ static int32_t kramAppEncode(vector<const char*>& args)
     }
 
     infoArgs.isKTX2 = isDstKTX2;
-    
+
     // Any new settings just go into this struct which is passed into enoder
     ImageInfo info;
     info.initWithArgs(infoArgs);
@@ -2375,7 +2369,7 @@ static int32_t kramAppEncode(vector<const char*>& args)
     // incrementally. Fallback to read into fileBuffer if mmap fails.
     Image srcImage;
     FileHelper tmpFileHelper;
-    
+
     bool success = SetupSourceImage(srcFilename, srcImage, isPremulRgb, isGray);
 
     if (success) {
@@ -2445,11 +2439,11 @@ static int32_t kramAppEncode(vector<const char*>& args)
                   formatTypeName(info.pixelFormat),
                   encoderName(info.textureEncoder));
         }
-        
+
         if (success) {
             KramEncoder encoder;
             success = encoder.encode(info, srcImage, tmpFileHelper.pointer());
-            
+
             if (!success) {
                 KLOGE("Kram", "encode failed");
             }
@@ -2478,7 +2472,7 @@ int32_t kramAppScript(vector<const char*>& args)
         kramScriptUsage();
         return 0;
     }
-    
+
     string srcFilename;
 
     bool isVerbose = false;
@@ -2703,12 +2697,13 @@ CommandType parseCommandType(const char* command)
     return commandType;
 }
 
-void PSTest() {
+void PSTest()
+{
     static bool doTest = false;
     if (!doTest) {
         return;
     }
-    
+
     // So it looks like Photoshop is doing srgb * alpha right away on PNG import. This results in dimmer colors
     // when they are read on the GPU, since then the gpu does srgb to linear conversion.  values2
     // is that case below.  Also note that the Photoshop color picker shows only srgb intensities, not the linear.
@@ -2726,8 +2721,7 @@ void PSTest() {
     //
     // Here's Photoshop I think:
     // PNG unmul alpha -> srgbToLinear(rgb * alpha) -> linarToSrgb( c ) -> toUnmul( c/alpha ) -> Png
-    
-    
+
     Mipper mipper;
 
     // 1. srgb 8-bit values
@@ -2746,9 +2740,9 @@ void PSTest() {
     // now convert those values to linear color (float)
     for (int32_t i = 0; i < 256; ++i) {
         float value = mipper.toLinear(values1[i]);
-        
+
         values2[i] = uint8_t(roundf(value * 255.0f));
-        
+
         //KLOGI("srgb", "[%d] = %g\n", i, value);
     }
 
@@ -2756,23 +2750,22 @@ void PSTest() {
     for (int32_t i = 0; i < 256; ++i) {
         float value = mipper.toLinear(i);
         value *= alphaF;
-        
+
         values3[i] = uint8_t(roundf(value * 255.0));
     }
 
     // log them side-by-side for comparison
-    KLOGI("srgb", "premul by %0.3f", 200.0/255.0);
+    KLOGI("srgb", "premul by %0.3f", 200.0 / 255.0);
     for (int32_t i = 0; i < 256; ++i) {
         KLOGI("srgb", "[%d] = %u, %u, %u",
               i, values1[i], values2[i], values3[i]);
     }
 }
 
-
 int32_t kramAppCommand(vector<const char*>& args)
 {
     PSTest();
-    
+
     // make sure next arg is a valid command type
     CommandType commandType = kCommandTypeUnknown;
     if (args.size() >= 1) {
diff --git a/libkram/kram/Kram.h b/libkram/kram/Kram.h
index 3e0cdb9c..75b6f166 100644
--- a/libkram/kram/Kram.h
+++ b/libkram/kram/Kram.h
@@ -19,10 +19,10 @@ class KTXImageData {
 public:
     // class keeps the data alive in mmapHelper or fileData
     bool open(const char* filename, KTXImage& image);
-    
+
     // class aliases data, so caller must keep alive.  Useful with bundle.
     bool open(const uint8_t* data, size_t dataSize, KTXImage& image);
-    
+
     // This releases all memory associated with this class
     void close();
 
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index 06f8c756..9952293b 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -48,7 +48,7 @@
 // Disable warnings
 // can set disable, once, or error
 
-#pragma warning( disable : 4530 4305 4267 4996 4244 4305 )
+#pragma warning(disable : 4530 4305 4267 4996 4244 4305)
 
 /* Couldn't seem to place these inside the open parens
     4530 // C++ exception handler used, but unwind semantics are not enabled. Specify /EHsc
@@ -59,7 +59,6 @@
     4305 // '*=': truncation from 'double' to 'float'
 */
 
-
 #endif
 
 //------------------------
@@ -162,17 +161,18 @@
 // this probably breaks all STL debugging
 #include <EASTL/algorithm.h"  // for max
 //#include "EASTL/atomic.h"
-#include <atomic>
 #include <EASTL/deque.h>
 #include <EASTL/functional.h>
-#include <EASTL/iterator.h> // for copy_if on Win
-#include <EASTL/shared_ptr.h> // includes thread/mutex
+#include <EASTL/iterator.h>    // for copy_if on Win
+#include <EASTL/shared_ptr.h>  // includes thread/mutex
 #include <EASTL/sort.h>
 #include <EASTL/string.h>
 #include <EASTL/unique_ptr.h>
 #include <EASTL/unordered_map.h>
 #include <EASTL/vector.h>
 
+#include <atomic>
+
 #define NAMESPACE_STL eastl
 
 #else
@@ -181,8 +181,8 @@
 #include <atomic>
 #include <deque>
 #include <functional>
-#include <iterator> // for copy_if on Win
-#include <memory> // for shared_ptr
+#include <iterator>  // for copy_if on Win
+#include <memory>    // for shared_ptr
 #include <string>
 #include <unordered_map>
 #include <vector>
@@ -259,8 +259,7 @@ class half4 {
 
 #if !USE_EASTL
 
-namespace std
-{
+namespace std {
 inline float clamp(float x, float minValue, float maxValue) { return min(max(x, minValue), maxValue); }
 inline double clamp(double x, double minValue, double maxValue) { return min(max(x, minValue), maxValue); }
 
@@ -272,7 +271,7 @@ inline double clamp(uint16_t x, uint16_t minValue, uint16_t maxValue) { return m
 
 inline double clamp(int32_t x, int32_t minValue, int32_t maxValue) { return min(max(x, minValue), maxValue); }
 inline double clamp(int64_t x, int64_t minValue, int64_t maxValue) { return min(max(x, minValue), maxValue); }
-}
+}  // namespace std
 
 #endif
 
@@ -283,8 +282,7 @@ inline double clamp(int64_t x, int64_t minValue, int64_t maxValue) { return min(
 #include "float4a.h"
 #endif
 
-namespace simd
-{
+namespace simd {
 
 #if USE_SIMDLIB
 
@@ -296,15 +294,15 @@ inline float4 float4m(float3 v, float w)
 
 inline float2 float2m(float x, float y)
 {
-    return { x, y };
+    return {x, y};
 }
 inline float3 float3m(float x, float y, float z)
 {
-    return { x, y, z };
+    return {x, y, z};
 }
 inline float4 float4m(float x, float y, float z, float w)
 {
-    return { x, y, z, w };
+    return {x, y, z, w};
 }
 
 inline float2 float2m(float x)
@@ -345,7 +343,6 @@ inline float4 saturate(const float4& v)
 
 #endif
 
-
 #if USE_FLOAT16
 
 inline float4 toFloat4(const half4& vv)
@@ -419,24 +416,15 @@ inline half4 toHalf4(const float4& vv)
 // this just strips args
 #define macroUnusedVar(x) (void)x
 
-
 //---------------------------------------
 
-
-
-
-namespace kram
-{
+namespace kram {
 using namespace NAMESPACE_STL;
 
 // Use this on vectors
-template<typename T>
+template <typename T>
 inline size_t vsizeof(const vector<T>& v)
 {
     return sizeof(T) * v.size();
 }
-}
-
-
-
-
+}  // namespace kram
diff --git a/libkram/kram/KramFileHelper.cpp b/libkram/kram/KramFileHelper.cpp
index 3fa31ff6..58ba5027 100644
--- a/libkram/kram/KramFileHelper.cpp
+++ b/libkram/kram/KramFileHelper.cpp
@@ -5,11 +5,10 @@
 #include "KramFileHelper.h"
 
 // here's how to mmmap data, but NSData may have another way
+#include <errno.h>
 #include <stdio.h>
 #include <sys/stat.h>
 
-#include <errno.h>
-
 // Use this for consistent tmp file handling
 //#include <algorithm> // for min
 //#include <vector>
@@ -21,8 +20,8 @@
 #endif
 
 #if KRAM_WIN
+#include <direct.h>   // direct-ory for _mkdir, _rmdir
 #include <windows.h>  // for GetNativeSystemInfo()
-#include <direct.h> // direct-ory for _mkdir, _rmdir
 
 // Windows mkdir doesn't take permission
 #define mkdir(fname, permission) _mkdir(fname)
@@ -35,32 +34,32 @@ using namespace NAMESPACE_STL;
 #define nl "\n"
 
 // https://stackoverflow.com/questions/7430248/creating-a-new-directory-in-c
-static void mkdirRecursive(char *path) {
-    char *sep = strrchr(path, '/');
+static void mkdirRecursive(char* path)
+{
+    char* sep = strrchr(path, '/');
     if (sep != NULL) {
         *sep = 0;
         mkdirRecursive(path);
         *sep = '/';
     }
-    
-    if (*path != '\0' && mkdir(path, 0755) && errno != EEXIST)
-    {
-        KLOGE("kram", "error while trying to create '%s'" nl
-               "%s" nl,
-               path, strerror(errno)); // same as %m
+
+    if (*path != '\0' && mkdir(path, 0755) && errno != EEXIST) {
+        KLOGE("kram", "error while trying to create '%s'" nl "%s" nl,
+              path, strerror(errno));  // same as %m
     }
 }
 
-static FILE *fopen_mkdir(const char *path, const char *mode) {
-    const char *sep = strrchr(path, '/');
-    if(sep) {
-        char *path0 = strdup(path);
-        path0[ sep - path ] = 0;
+static FILE* fopen_mkdir(const char* path, const char* mode)
+{
+    const char* sep = strrchr(path, '/');
+    if (sep) {
+        char* path0 = strdup(path);
+        path0[sep - path] = 0;
         mkdirRecursive(path0);
         free(path0);
     }
-    
-    return fopen(path,mode);
+
+    return fopen(path, mode);
 }
 
 FileHelper::~FileHelper() { close(); }
@@ -150,9 +149,9 @@ bool FileHelper::copyTemporaryFileTo(const char* dstFilename)
     if (size_ == (size_t)-1) {
         return false;
     }
-    
+
     // DONE: copy in smaller buffered chunks
-    size_t maxBufferSize = 256*1024;
+    size_t maxBufferSize = 256 * 1024;
     size_t bufferSize = min(size_, maxBufferSize);
     vector<uint8_t> tmpBuf;
     tmpBuf.resize(bufferSize);
@@ -174,12 +173,12 @@ bool FileHelper::copyTemporaryFileTo(const char* dstFilename)
     if (!dstHelper.open(dstFilename, "w+b")) {
         return false;
     }
-    
+
     size_t bytesRemaining = size_;
-    while(bytesRemaining > 0) {
+    while (bytesRemaining > 0) {
         int bytesToRead = min(bufferSize, bytesRemaining);
         bytesRemaining -= bytesToRead;
-        
+
         if (!read(tmpBuf.data(), bytesToRead) ||
             !dstHelper.write(tmpBuf.data(), bytesToRead)) {
             dstHelper.close();
@@ -189,7 +188,7 @@ bool FileHelper::copyTemporaryFileTo(const char* dstFilename)
             return false;
         }
     }
-    
+
     return true;
 }
 
@@ -203,7 +202,7 @@ bool FileHelper::open(const char* filename, const char* access)
     else {
         _fp = fopen(filename, access);
     }
-    
+
     if (!_fp) {
         return false;
     }
@@ -253,32 +252,31 @@ bool FileHelper::exists(const char* filename) const
     return true;
 }
 
-uint64_t FileHelper::modificationTimestamp(const char* filename) {
-  
-    // Win has to rename all this, so make it happy using wrappers from miniz
-    #if defined(_MSC_VER) || defined(__MINGW64__)
-    #define MZ_FILE_STAT_STRUCT _stat64
-    #define MZ_FILE_STAT _stat64
-    #else
-    #define MZ_FILE_STAT_STRUCT stat
-    #define MZ_FILE_STAT stat
-    #endif
+uint64_t FileHelper::modificationTimestamp(const char* filename)
+{
+// Win has to rename all this, so make it happy using wrappers from miniz
+#if defined(_MSC_VER) || defined(__MINGW64__)
+#define MZ_FILE_STAT_STRUCT _stat64
+#define MZ_FILE_STAT _stat64
+#else
+#define MZ_FILE_STAT_STRUCT stat
+#define MZ_FILE_STAT stat
+#endif
 
     struct MZ_FILE_STAT_STRUCT stats;
     if (MZ_FILE_STAT(filename, &stats) < 0) {
         return 0;
     }
-       
+
     // https://www.quora.com/What-is-the-difference-between-mtime-atime-and-ctime
     // atime is last access time
     // ctime when attributes change
     // mtime when contents change
     // folders mtime changes when files added/deleted
-    
+
     // 32.32, only return seconds for now
     // https://stackoverflow.com/questions/11373505/getting-the-last-modified-date-of-a-file-in-c
     return stats.st_mtime;
 }
 
-
 }  // namespace kram
diff --git a/libkram/kram/KramFileHelper.h b/libkram/kram/KramFileHelper.h
index 4fa01301..e10f6aa6 100644
--- a/libkram/kram/KramFileHelper.h
+++ b/libkram/kram/KramFileHelper.h
@@ -49,7 +49,7 @@ class FileHelper {
 
     // return mod stamp on filename
     static uint64_t modificationTimestamp(const char* filename);
-        
+
     static size_t pagesize();
 
 private:
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index 994b0de8..72936647 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -38,14 +38,14 @@
 //#include <string>
 //#include <algorithm>
 
+#include <errno.h>
+
 #include "KTXImage.h"
-#include "KramMipper.h"
 #include "KramFileHelper.h"
-#include "KramZipHelper.h"
+#include "KramMipper.h"
 #include "KramSDFMipper.h"
 #include "KramTimer.h"
-
-#include <errno.h>
+#include "KramZipHelper.h"
 
 // for zlib compress
 #include "miniz.h"
@@ -86,17 +86,15 @@ class TextureData {
 };
 
 // return the block mode of a bc7 block, or -1 if finvalid
-int32_t decodeBC7BlockMode(const void *pBlock)
+int32_t decodeBC7BlockMode(const void* pBlock)
 {
     const uint32_t first_byte = static_cast<const uint8_t*>(pBlock)[0];
 
-    for (uint32_t mode = 0; mode <= 7; mode++)
-    {
+    for (uint32_t mode = 0; mode <= 7; mode++) {
         // bit followed by zeros, mask out upper
         uint8_t bits = (1U << mode);
-        
-        if ((first_byte & bits) == bits)
-        {
+
+        if ((first_byte & bits) == bits) {
             return mode;
         }
     }
@@ -121,7 +119,7 @@ bool Image::loadImageFromKTX(const KTXImage& image)
     // a verticaly chunke image, will be converted to chunks in encode
     _width = image.width;
     _height = image.height * image.totalChunks();
-    
+
     if (image.header.numberOfMipmapLevels > 1) {
         KLOGW("Image", "Skipping custom mip levels from KTX load, but will build them from top level");
     }
@@ -131,34 +129,32 @@ bool Image::loadImageFromKTX(const KTXImage& image)
 
     // preserve chunk count from the conversion
     setChunksY(image.totalChunks());
-   
+
     // TODO: this assumes 1,2,3 channel srcData has no rowPadding to say 4 bytes
     return convertToFourChannel(image);
 }
 
-bool Image::convertToFourChannel(const KTXImage& image) {
-    
+bool Image::convertToFourChannel(const KTXImage& image)
+{
     const uint32_t mipNumber = 0;
     const auto& srcMipLevel = image.mipLevels[mipNumber];
-    
+
     // this is offset to a given level
     uint64_t mipBaseOffset = srcMipLevel.offset;
     const uint8_t* srcLevelData = image.fileData;
-    
-    
+
     vector<uint8_t> mipStorage;
     if (image.isSupercompressed()) {
-        
         mipStorage.resize(image.levelLength(mipNumber));
         if (!image.unpackLevel(mipNumber, srcLevelData + srcMipLevel.offset, mipStorage.data())) {
             return false;
         }
         srcLevelData = mipStorage.data();
-        
+
         // going to upload from mipStorage temp array
         mipBaseOffset = 0;
     }
-    
+
     switch (image.pixelFormat) {
         case MyMTLPixelFormatR8Unorm:
         case MyMTLPixelFormatRG8Unorm:
@@ -167,24 +163,23 @@ bool Image::convertToFourChannel(const KTXImage& image) {
         case MyMTLPixelFormatRGB8Unorm_internal:
 #endif
         case MyMTLPixelFormatRGBA8Unorm_sRGB:
-        case MyMTLPixelFormatRGBA8Unorm:
-        {
+        case MyMTLPixelFormatRGBA8Unorm: {
             const uint8_t* srcPixels = srcLevelData;
-          
+
             int32_t numSrcChannels = numChannelsOfFormat(image.pixelFormat);
-           
+
             _pixels.resize(4 * _width * _height);
-            
+
             Color* dstPixels = (Color*)_pixels.data();
 
-            Color dstTemp = {0,0,0,255};
-            
+            Color dstTemp = {0, 0, 0, 255};
+
             for (int32_t y = 0; y < _height; ++y) {
                 int32_t y0 = y * _width;
 
                 for (int32_t x = 0; x < _width; ++x) {
                     int32_t srcX = (y0 + x) * numSrcChannels;
-                    int32_t dstX = (y0 + x); // * numDstChannels;
+                    int32_t dstX = (y0 + x);  // * numDstChannels;
 
                     for (int32_t i = 0; i < numSrcChannels; ++i) {
                         *(&dstTemp.r + i) = srcPixels[srcX + i];
@@ -203,16 +198,16 @@ bool Image::convertToFourChannel(const KTXImage& image) {
 #endif
         case MyMTLPixelFormatRGBA16Float: {
             int32_t numSrcChannels = numChannelsOfFormat(image.pixelFormat);
-            
+
             _pixelsFloat.resize(_width * _height);
 
             // treat as float for per channel copies
             float4* dstPixels = _pixelsFloat.data();
 
             const half* srcPixels = (const half*)srcLevelData;
-               
+
             half4 dstTemp = toHalf4(float4m(0.0f, 0.0f, 0.0f, 1.0f));
-        
+
             for (int32_t y = 0; y < _height; ++y) {
                 int32_t y0 = y * _width;
 
@@ -237,17 +232,17 @@ bool Image::convertToFourChannel(const KTXImage& image) {
 #if SUPPORT_RGB
         case MyMTLPixelFormatRGB32Float_internal:
 #endif
-       case MyMTLPixelFormatRGBA32Float: {
+        case MyMTLPixelFormatRGBA32Float: {
             const float* srcPixels = (const float*)srcLevelData;
 
             int32_t numSrcChannels = numChannelsOfFormat(image.pixelFormat);
-           
+
             _pixelsFloat.resize(_width * _height);
-           
+
             // treat as float for per channel copies
             float4* dstPixels = _pixelsFloat.data();
             float4 dstTemp = float4m(0.0f, 0.0f, 0.0f, 1.0f);
-           
+
             for (int32_t y = 0; y < _height; ++y) {
                 int32_t y0 = y * _width;
 
@@ -258,11 +253,11 @@ bool Image::convertToFourChannel(const KTXImage& image) {
                     for (int32_t i = 0; i < numSrcChannels; ++i) {
                         dstTemp[i] = srcPixels[srcX + i];
                     }
-                    
+
                     dstPixels[dstX] = dstTemp;
                 }
             }
-           
+
             break;
         }
         default:
@@ -293,8 +288,6 @@ bool Image::loadImageFromPixels(const vector<uint8_t>& pixels, int32_t width,
     return true;
 }
 
-
-
 // Can average any channels per block, this means they are constant across the
 // block and use endpoint storage but do not affect the endpoint fitting.
 // Results in a low-res, blocky version of those channels, but better
@@ -395,7 +388,7 @@ static bool writeDataAtOffset(const uint8_t* data, size_t dataSize, size_t dataO
 
 bool KramDecoder::decode(const KTXImage& srcImage, FILE* dstFile, const KramDecoderParams& params) const
 {
-    KTXImage dstImage; // thrown out, data written to file
+    KTXImage dstImage;  // thrown out, data written to file
     return decodeImpl(srcImage, dstFile, dstImage, params);
 }
 
@@ -405,23 +398,22 @@ bool KramDecoder::decode(const KTXImage& srcImage, KTXImage& dstImage, const Kra
 }
 
 bool KramDecoder::decodeBlocks(
-            int32_t w, int32_t h,
-            const uint8_t* blockData, uint32_t blockDataSize, MyMTLPixelFormat blockFormat,
-            vector<uint8_t>& outputTexture, // currently Color
-            const KramDecoderParams& params) const
+    int32_t w, int32_t h,
+    const uint8_t* blockData, uint32_t blockDataSize, MyMTLPixelFormat blockFormat,
+    vector<uint8_t>& outputTexture,  // currently Color
+    const KramDecoderParams& params) const
 {
-    
     bool success = false;
-    
+
     // could tie use flags to format filter, or encoder settings
     // or may want to disable if decoders don't gen correct output
     TexEncoder decoder = params.decoder;
-    
+
     if (!validateFormatAndDecoder(MyMTLTextureType2D, blockFormat, decoder)) {
         KLOGE("Kram", "block decode only supports specific block types");
         return false;
     }
-    
+
 #if COMPILE_ATE
     // Encode/decode formats differ depending on library version
     // but it's likely the fastest decoder.  Only on macOS/iOS.
@@ -436,12 +428,12 @@ bool KramDecoder::decodeBlocks(
 #if COMPILE_ASTCENC
     bool useAstcenc = decoder == kTexEncoderAstcenc;
 #endif
-    
+
     // TODO: hook to block decode logic below
     // copy srcData if using ATE, it says it needs 16-byte aligned data for encode
     // and assume for decode too.  Output texture is already 16-byte aligned.
     const uint8_t* srcData = blockData;
-    
+
 #if COMPILE_ATE
     vector<uint8_t> srcTexture;
     if (useATE && (((uintptr_t)srcData & 15) != 0)) {
@@ -450,12 +442,12 @@ bool KramDecoder::decodeBlocks(
         srcData = srcTexture.data();
     }
 #endif
-    
+
     Int2 blockDims = blockDimsOfFormat(blockFormat);
     bool isVerbose = params.isVerbose;
     const string& swizzleText = params.swizzleText;
     bool isHDR = isHdrFormat(blockFormat);
-    
+
     // start decoding after format pulled from KTX file
     if (isBCFormat(blockFormat)) {
         // bc via ate, or squish for bc1-5 if on other platforms
@@ -646,14 +638,13 @@ bool KramDecoder::decodeBlocks(
             dstImageASTC.dim_z = 1;  // Not using 3D blocks, not supported on iOS
             //dstImageASTC.dim_pad = 0;
             dstImageASTC.data_type = ASTCENC_TYPE_U8;
-            
-            
+
             // encode/encode still setup on array of 2d slices, so need address of data
             uint8_t* outData = outputTexture.data();
             dstImageASTC.data = (void**)&outData;
 
             uint32_t srcDataLength = blockDataSize;
-           
+
             astcenc_profile profile;
             profile = ASTCENC_PRF_LDR;  // isSrgb ? ASTCENC_PRF_LDR_SRGB : ASTCENC_PRF_LDR;
             if (isHDR) {
@@ -708,13 +699,12 @@ bool KramDecoder::decodeBlocks(
     if (!swizzleText.empty()) {
         ImageInfo::swizzleTextureLDR(w, h, (Color*)outputTexture.data(), swizzleText.c_str());
     }
-    
+
     return true;
 }
 
 bool KramDecoder::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage& dstImage, const KramDecoderParams& params) const
 {
-    
     // read existing KTX file into mip offset, then start decoding the blocks
     // and write these to 8u,16f,32f ktx with mips
     // write out KTXHeader for the explicit image, this should be similar to other code
@@ -722,14 +712,14 @@ bool KramDecoder::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage&
     // Image sorta represents uncompressed Image mips, not compressed.
     // But wriing things out to dstFile.
     int32_t numChunks = srcImage.totalChunks();
-   
+
     MyMTLPixelFormat pixelFormat = srcImage.pixelFormat;
     bool isSrgb = isSrgbFormat(pixelFormat);
-    
+
     // setup dstImage
     //KTXImage dstImage;
     dstImage = srcImage;  // copy src (name-value pairs copied too)
-    
+
     // important otherwise offsets are wrong if src is ktx2
     if (srcImage.skipImageLength) {
         dstImage.skipImageLength = false;
@@ -738,15 +728,15 @@ bool KramDecoder::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage&
     dstImage.fileDataLength = 0;
 
     KTXHeader& dstHeader = dstImage.header;
-    
+
     // changing format, so update props
     auto dstPixelFormat = isSrgb ? MyMTLPixelFormatRGBA8Unorm_sRGB : MyMTLPixelFormatRGBA8Unorm;
-    
+
     // DONE: Support ASTC and BC7 HDR decode to RGBA16F here
     if (isHdrFormat(srcImage.pixelFormat)) {
         dstPixelFormat = MyMTLPixelFormatRGBA16Float;
     }
-    
+
     dstHeader.initFormatGL(dstPixelFormat);
     dstImage.pixelFormat = dstPixelFormat;
     dstImage.addFormatProps();  // update format prop
@@ -754,29 +744,28 @@ bool KramDecoder::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage&
     vector<uint8_t> propsData;
     dstImage.toPropsData(propsData);
     dstHeader.bytesOfKeyValueData = (uint32_t)vsizeof(propsData);
-    
+
     size_t mipOffset = sizeof(KTXHeader) + dstHeader.bytesOfKeyValueData;
     dstImage.initMipLevels(mipOffset);
-    
+
     // allocate to hold props and entire image to write out
     if (!dstFile) {
         dstImage.reserveImageData();
     }
-    
-    
+
     // 1d textures need to write out 0 width
     KTXHeader headerCopy = dstHeader;
-    
+
     if (dstImage.textureType == MyMTLTextureType1DArray) {
         headerCopy.pixelHeight = 0;
         headerCopy.pixelDepth = 0;
     }
-    
+
     // write the header out
     if (!writeDataAtOffset((const uint8_t*)&headerCopy, sizeof(KTXHeader), 0, dstFile, dstImage)) {
         return false;
     }
-    
+
     // write out the props
     if (!writeDataAtOffset(propsData.data(), vsizeof(propsData), sizeof(KTXHeader), dstFile, dstImage)) {
         return false;
@@ -786,78 +775,76 @@ bool KramDecoder::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage&
 
     vector<uint8_t> outputTexture;
     vector<uint8_t> srcTexture;
-    
+
     // DONE: walk chunks here and seek to src and dst offsets in conversion
     // make sure to walk chunks in the exact same order they are written, array then face, or slice
-    
+
     bool success = true;
 
     vector<uint8_t> mipStorage;
-    mipStorage.resize(srcImage.mipLengthLargest() * numChunks); // enough to hold biggest mip
-    
+    mipStorage.resize(srcImage.mipLengthLargest() * numChunks);  // enough to hold biggest mip
+
     for (uint32_t i = 0; i < srcImage.header.numberOfMipmapLevels; ++i) {
         // DONE: to decode compressed KTX2 want to walk all chunks of a single level
         // after decompressing the level.   This isn't doing unpackLevel and needs to here.
-        
+
         const KTXImageLevel& srcMipLevel = srcImage.mipLevels[i];
-        
+
         // this is offset to a given level
         uint64_t mipBaseOffset = srcMipLevel.offset;
         const uint8_t* srcLevelData = srcImage.fileData;
-        
+
         if (srcImage.isSupercompressed()) {
-            
             if (!srcImage.unpackLevel(i, srcLevelData + srcMipLevel.offset, mipStorage.data())) {
                 return false;
             }
             srcLevelData = mipStorage.data();
-            
+
             // going to upload from mipStorage temp array
             mipBaseOffset = 0;
         }
-        
+
         uint32_t w, h, d;
         srcImage.mipDimensions(i, w, h, d);
-        
+
         const KTXImageLevel& dstMipLevel = dstImage.mipLevels[i];
         outputTexture.resize(dstMipLevel.length);
 
-        
         for (int32_t chunk = 0; chunk < numChunks; ++chunk) {
             const uint8_t* srcData = srcLevelData + mipBaseOffset + chunk * srcMipLevel.length;
-            
+
             // decode the blocks to LDR RGBA8
             if (!decodeBlocks(w, h, srcData, srcMipLevel.length, srcImage.pixelFormat, outputTexture, params)) {
                 return false;
             }
-            
+
             // write the mips out to the file, and code above can then decode into the same buffer
             // This isn't correct for cubes, arrays, and other types.  The mip length is only written out once for all mips.
-            
+
             if (chunk == 0 && !dstImage.skipImageLength) {
                 // sie of one mip
                 uint32_t levelSize = dstMipLevel.length;
-                
+
                 // cubes write the face size, not the levels size, ugh
                 if (srcImage.textureType != MyMTLTextureTypeCube) {
                     levelSize *= numChunks;
                 }
-                
+
                 if (!writeDataAtOffset((const uint8_t*)&levelSize, sizeof(levelSize), dstMipLevel.offset - sizeof(levelSize), dstFile, dstImage)) {
                     return false;
                 }
             }
-            
+
             // only writing one mip at a time in the level here
             // so written bytes are only length and not numChunks * length
             int32_t dstMipOffset = dstMipLevel.offset + chunk * dstMipLevel.length;
-            
+
             if (!writeDataAtOffset(outputTexture.data(), dstMipLevel.length, dstMipOffset, dstFile, dstImage)) {
                 return false;
             }
         }
     }
-    
+
     return success;
 }
 
@@ -903,8 +890,7 @@ bool Image::resizeImage(int32_t wResize, int32_t hResize, bool resizePow2, Image
     return true;
 }
 
-
-bool KramEncoder::encode(ImageInfo& info, Image& singleImage,KTXImage& dstImage) const
+bool KramEncoder::encode(ImageInfo& info, Image& singleImage, KTXImage& dstImage) const
 {
     return encodeImpl(info, singleImage, nullptr, dstImage);
 }
@@ -913,7 +899,7 @@ bool KramEncoder::encode(ImageInfo& info, Image& singleImage, FILE* dstFile) con
 {
     // dstImage will be ignored
     KTXImage dstImage;
-    
+
     return encodeImpl(info, singleImage, dstFile, dstImage);
 }
 
@@ -936,23 +922,21 @@ struct MipConstructData {
     // Subdividing strips of larger images into cube/atlas/etc.
     // These offsets are where to find each chunk in that larger image
     vector<Int2> chunkOffsets;
-    
+
     // Can skip the larger and smaller mips.  This is the larger mips skipped.
     uint32_t numSkippedMips = 0;
-    
+
     // 2d image src after accounting for chunks for a strip of array/cube data
     uint32_t chunkWidth = 0;
     uint32_t chunkHeight = 0;
 };
 
-
-
 // See here:
 // https://www.khronos.org/registry/DataFormat/specs/1.3/dataformat.1.3.html
 
 enum KHR_DF_MODEL {
     KHR_DF_MODEL_RGBSDA = 1,
-    
+
     KHR_DF_MODEL_BC1A = 128,
     // KHR_DF_MODEL_BC2 = 129,
     KHR_DF_MODEL_BC3 = 130,
@@ -960,14 +944,14 @@ enum KHR_DF_MODEL {
     KHR_DF_MODEL_BC5 = 132,
     KHR_DF_MODEL_BC6H = 133,
     KHR_DF_MODEL_BC7 = 134,
-    
+
     //KHR_DF_MODEL_ETC1 = 160,
     KHR_DF_MODEL_ETC2 = 161,
-    
+
     KHR_DF_MODEL_ASTC = 162,
-    
+
     //KHR_DF_MODEL_ETC1S = 163,
-    
+
 };
 
 enum KHR_DF_CHANNEL {
@@ -976,42 +960,41 @@ enum KHR_DF_CHANNEL {
     KHR_DF_CHANNEL_GREEN = 1,
     KHR_DF_CHANNEL_BLUE = 2,
     KHR_DF_CHANNEL_ALPHA = 15,
-    
+
     // BC
     //KHR_DF_CHANNEL_BC1A_COLOR = 0,
     KHR_DF_CHANNEL_BC1A_ALPHA = 15,
-    
+
     //KHR_DF_CHANNEL_BC2_COLOR = 0,
     KHR_DF_CHANNEL_BC2_ALPHA = 15,
 
     //KHR_DF_CHANNEL_BC3_COLOR = 0,
     KHR_DF_CHANNEL_BC3_ALPHA = 15,
-    
+
     //KHR_DF_CHANNEL_BC4_DATA = 0,
-  
+
     //KHR_DF_CHANNEL_BC5_RED = 0,
     KHR_DF_CHANNEL_BC5_GREEN = 1,
-  
+
     //KHR_DF_CHANNEL_BC6H_COLOR = 0,
     //KHR_DF_CHANNEL_BC7_COLOR = 0,
-    
+
     // ETC2
     //KHR_DF_CHANNEL_ETC2_RED = 0,
     KHR_DF_CHANNEL_ETC2_GREEN = 1,
-    KHR_DF_CHANNEL_ETC2_COLOR = 2, // RGB
+    KHR_DF_CHANNEL_ETC2_COLOR = 2,  // RGB
     KHR_DF_CHANNEL_ETC2_ALPHA = 15,
-    
+
     // ASTC
     //KHR_DF_CHANNEL_ASTC_DATA = 0,
 };
 
-
 enum KHR_DF_PRIMARIES {
     KHR_DF_PRIMARIES_BT709 = 1
 };
 
 enum KHR_DF_TRANSFER {
-    KHR_DF_TRANSFER_LINEAR = 1, // ?
+    KHR_DF_TRANSFER_LINEAR = 1,  // ?
     KHR_DF_TRANSFER_SRGB = 2,
 };
 
@@ -1019,16 +1002,15 @@ enum KHR_DF_ALPHA {
     KHR_DF_FLAG_ALPHA_STRAIGHT = 0,
     KHR_DF_FLAG_ALPHA_PREMULTIPLIED = 1,
 };
-    
+
 // 16 bytes total
 struct KTX2DescriptorChannelBlock {
-    
     // 32-bits
     uint16_t bitOffset = 0;
     uint8_t bitLength = 0;
-    uint8_t channelType : 4; // RED, GREEN, BLUE, RRR, GGG
-    uint8_t FSEL : 4; // L is low bit - Float, Signed, Exponent, Linear (used on Alpha)
-    
+    uint8_t channelType : 4;  // RED, GREEN, BLUE, RRR, GGG
+    uint8_t FSEL : 4;         // L is low bit - Float, Signed, Exponent, Linear (used on Alpha)
+
     // 32-bits
     uint8_t samplePositions[4] = {0};
 
@@ -1039,74 +1021,75 @@ struct KTX2DescriptorChannelBlock {
 // This can be up to 7 x 4 = 24 + 16 x channels in size
 struct KTX2DescriptorFileBlock {
     KTX2DescriptorFileBlock(MyMTLPixelFormat format, bool isPremul, bool isCompressed);
-    
-    uint32_t totalSize = 0; // descriptorBlockSize + 4
-    
+
+    uint32_t totalSize = 0;  // descriptorBlockSize + 4
+
     uint32_t vendorID : 18;
     uint32_t descriptorType : 14;
     uint16_t versionNumber = 2;
-    uint16_t descriptorBlockSize = 0; // 24B + channels (doesn't include totalSize)
-    
+    uint16_t descriptorBlockSize = 0;  // 24B + channels (doesn't include totalSize)
+
     uint8_t colorModel = 0;
     uint8_t colorPrimaries = 0;
     uint8_t transferFunction = 0;
     uint8_t flags = 0;
-    
+
     uint8_t textureBlockDimensions[4] = {0};
     uint8_t bytesPlane[8] = {0};
-    
+
     // now 16 bytes for each channel present
-    KTX2DescriptorChannelBlock channels[4]; // max channels
+    KTX2DescriptorChannelBlock channels[4];  // max channels
 };
 
-KTX2DescriptorFileBlock::KTX2DescriptorFileBlock(MyMTLPixelFormat format, bool isPremul, bool isCompressed) {
+KTX2DescriptorFileBlock::KTX2DescriptorFileBlock(MyMTLPixelFormat format, bool isPremul, bool isCompressed)
+{
     uint32_t numChannels = numChannelsOfFormat(format);
     Int2 blockDims = blockDimsOfFormat(format);
     bool isSrgb = isSrgbFormat(format);
     uint32_t blockSize = blockSizeOfFormat(format);
     bool isFloat = isFloatFormat(format);
     bool isSigned = isSignedFormat(format);
-    
+
     totalSize = sizeof(KTX2DescriptorFileBlock) -
-        (4 - numChannels) * sizeof(KTX2DescriptorChannelBlock);
+                (4 - numChannels) * sizeof(KTX2DescriptorChannelBlock);
     descriptorBlockSize = totalSize - 4;
-    
+
     // ugly that these are all -1, can't simply read them in debugger
     textureBlockDimensions[0] = blockDims.x - 1;
     textureBlockDimensions[1] = blockDims.y - 1;
-    
+
     vendorID = 0;
     descriptorType = 0;
-    
+
     // these formats are all single-planes
     // some indication this should be 0 if zstd applied
     if (!isCompressed) {
         bytesPlane[0] = blockSize;
     }
-    
+
     for (uint32_t i = 0; i < numChannels; ++i) {
         auto& c = channels[i];
-        
+
         c.FSEL = 0;
         if (isSigned)
             c.FSEL |= 0x4;
         if (isFloat)
             c.FSEL |= 0x8;
-    
+
         // TODO: what are E & L, nothing in docs about these ?
         // no examples of use of these either
-        
+
         c.channelType = 0;
-        
+
         if (isFloat) {
             // This is for BC6H, TODO: might be half only so test for isHalf?
             if (isSigned) {
-                c.sampleLower = 0xBF800000U; // -1.0f;
-                c.sampleUpper = 0x7F800000U; //  1.0f;
+                c.sampleLower = 0xBF800000U;  // -1.0f;
+                c.sampleUpper = 0x7F800000U;  //  1.0f;
             }
             else {
-                c.sampleLower = 0xBF800000U; //  -1.0f;
-                c.sampleUpper = 0x7F800000U; //   1.0f;
+                c.sampleLower = 0xBF800000U;  //  -1.0f;
+                c.sampleUpper = 0x7F800000U;  //   1.0f;
             }
         }
         else if (isSigned) {
@@ -1117,76 +1100,73 @@ KTX2DescriptorFileBlock::KTX2DescriptorFileBlock(MyMTLPixelFormat format, bool i
 
     // set this since it applies to so many block formats
     channels[0].bitOffset = 0;
-    channels[0].bitLength = blockSize * 8 - 1; // needs to be split of channel bits
-   
-    
-    switch(format) {
+    channels[0].bitLength = blockSize * 8 - 1;  // needs to be split of channel bits
+
+    switch (format) {
         case MyMTLPixelFormatBC1_RGBA:
         case MyMTLPixelFormatBC1_RGBA_sRGB:
             // if ever do punchthrough-alpha
             //channels[1].channelType = KHR_DF_CHANNEL_BC1A_ALPHA;
             break;
-            
+
         case MyMTLPixelFormatBC3_RGBA:
         case MyMTLPixelFormatBC3_RGBA_sRGB:
             // alpha is first
             channels[0].channelType = KHR_DF_CHANNEL_BC3_ALPHA;
-            
+
             channels[0].bitOffset = 0;
             channels[0].bitLength = 64 - 1;
-            
+
             channels[1].bitOffset = 64;
             channels[1].bitLength = 64 - 1;
-            
+
             break;
-            
+
         case MyMTLPixelFormatBC5_RGUnorm:
         case MyMTLPixelFormatBC5_RGSnorm:
             channels[1].channelType = KHR_DF_CHANNEL_BC3_ALPHA;
-            
+
             channels[0].bitOffset = 0;
             channels[0].bitLength = 64 - 1;
-            
+
             channels[1].bitOffset = 64;
             channels[1].bitLength = 64 - 1;
-            
+
             break;
-            
+
         // TODO: fix bc6h sampleLower/Upper
-            
+
         // TODO: handle etc2
         case MyMTLPixelFormatEAC_RG11Unorm:
         case MyMTLPixelFormatEAC_RG11Snorm:
             channels[1].channelType = KHR_DF_CHANNEL_ETC2_GREEN;
-            
+
             channels[0].bitOffset = 0;
             channels[0].bitLength = 64 - 1;
-            
+
             channels[1].bitOffset = 64;
             channels[1].bitLength = 64 - 1;
             break;
-            
+
         case MyMTLPixelFormatETC2_RGB8:
         case MyMTLPixelFormatETC2_RGB8_sRGB:
             channels[0].channelType = KHR_DF_CHANNEL_ETC2_COLOR;
             break;
-            
-            
+
         case MyMTLPixelFormatEAC_RGBA8:
         case MyMTLPixelFormatEAC_RGBA8_sRGB:
             channels[0].channelType = KHR_DF_CHANNEL_ETC2_ALPHA;
             channels[1].channelType = KHR_DF_CHANNEL_ETC2_COLOR;
-            
+
             channels[0].bitOffset = 0;
             channels[0].bitLength = 64 - 1;
-            
+
             channels[1].bitOffset = 64;
             channels[1].bitLength = 64 - 1;
             break;
-            
-            
-        // NOTE: astc is all the same, and can already use defaults
-    
+
+            // NOTE: astc is all the same, and can already use defaults
+
         default: {
             uint32_t numChannelBits = (blockSize * 8) / numChannels;
             // handle uniform explcit types with offset per channel
@@ -1196,15 +1176,15 @@ KTX2DescriptorFileBlock::KTX2DescriptorFileBlock(MyMTLPixelFormat format, bool i
                 c.channelType = KHR_DF_CHANNEL_RED + i;
                 c.bitOffset = lastBitOffset;
                 c.bitLength = numChannelBits - 1;
-                
+
                 lastBitOffset += numChannelBits;
             }
-                    
+
             colorModel = KHR_DF_MODEL_RGBSDA;
             break;
         }
     }
-            
+
     colorPrimaries = KHR_DF_PRIMARIES_BT709;
     transferFunction = isSrgb ? KHR_DF_TRANSFER_SRGB : KHR_DF_TRANSFER_LINEAR;
     flags = isPremul ? KHR_DF_FLAG_ALPHA_PREMULTIPLIED : KHR_DF_FLAG_ALPHA_STRAIGHT;
@@ -1269,21 +1249,19 @@ void KramEncoder::addBaseProps(const ImageInfo& info, KTXImage& dstImage) const
 }
 
 // wish C++ had a defer
-struct ZSTDScope
-{
+struct ZSTDScope {
     ZSTDScope(ZSTD_CCtx* ctx_) : ctx(ctx_) {}
     ~ZSTDScope() { ZSTD_freeCCtx(ctx); }
-    
+
 private:
     ZSTD_CCtx* ctx = nullptr;
 };
 
-
 bool KramEncoder::encodeImpl(ImageInfo& info, Image& singleImage, FILE* dstFile, KTXImage& dstImage) const
 {
     KTXHeader& header = dstImage.header;
     MipConstructData mipConstructData;
-    
+
     vector<Int2>& chunkOffsets = mipConstructData.chunkOffsets;
 
     int32_t w = singleImage.width();
@@ -1293,8 +1271,7 @@ bool KramEncoder::encodeImpl(ImageInfo& info, Image& singleImage, FILE* dstFile,
     // the code allows a vertical or horizontal strip or grid of chunks
     if (!validateTextureType(info.textureType, w, h, chunkOffsets, header,
                              info.doMipmaps,
-                             info.chunksX, info.chunksY, info.chunksCount))
-    {
+                             info.chunksX, info.chunksY, info.chunksCount)) {
         return false;
     }
 
@@ -1302,7 +1279,7 @@ bool KramEncoder::encodeImpl(ImageInfo& info, Image& singleImage, FILE* dstFile,
     // dstImage will start at this, but may mip down smaller base on mipMaxSize
     mipConstructData.chunkWidth = w;
     mipConstructData.chunkHeight = h;
-    
+
     // work out how much memory we need to load
     header.initFormatGL(info.pixelFormat);
 
@@ -1312,10 +1289,10 @@ bool KramEncoder::encodeImpl(ImageInfo& info, Image& singleImage, FILE* dstFile,
     // whd might be changed by initMipLevels based on min/max mip size
     dstImage.width = w;
     dstImage.height = h;
-    dstImage.depth = header.pixelDepth; // from validate above
-    
+    dstImage.depth = header.pixelDepth;  // from validate above
+
     dstImage.initMipLevels(info.doMipmaps, info.mipMinSize, info.mipMaxSize, info.mipSkip, mipConstructData.numSkippedMips);
-    
+
     if (dstImage.mipLevels.empty()) {
         KLOGE("kram", "skipped all mips");
         return false;
@@ -1328,7 +1305,7 @@ bool KramEncoder::encodeImpl(ImageInfo& info, Image& singleImage, FILE* dstFile,
     // props
 
     addBaseProps(info, dstImage);
-    
+
     // convert props into a data blob that can be written out
     vector<uint8_t> propsData;
     dstImage.toPropsData(propsData);
@@ -1340,76 +1317,75 @@ bool KramEncoder::encodeImpl(ImageInfo& info, Image& singleImage, FILE* dstFile,
     // It has two different blocks, supercompression for BasisLZ
     // and a DFD block which details the block content.
     // And mips are reversed.
-    
+
     // dstImage case - in memory version will always be KTX1 format for now
     // this even gens a KTX1 dstImage, and then just compresses the mip levels
-    
-    if (info.isKTX2 && dstFile)
-    {
+
+    if (info.isKTX2 && dstFile) {
         // generate KTX1 file with uncompressed mips first
         // a big memory hit here, since all mips stored in memory despite built in-place
         // could build and compress and entire level at a time, but can't write any of it
         // out until smallest mips are constructed.  Only then are offsets resolved.
-        
+
         // A better way would be to do mips in-place, but in-order, and compressing the large
         // to small mips into an array of open compressor streams.  Then only need one mip instead of
         // all levels in memory.
         if (!writeKTX1FileOrImage(info, singleImage, mipConstructData, propsData, nullptr, dstImage)) {
             return false;
         }
-        
+
         // now convert from ktx1 to ktx2
-        
+
         KTX2Header header2;
-        
+
         header2.vkFormat = vulkanType(info.pixelFormat);
         // header2.typeSize = 1; // skip
-        
+
         header2.pixelWidth = header.pixelWidth;
         header2.pixelHeight = header.pixelHeight;
         header2.pixelDepth = header.pixelDepth;
-                
+
         header2.layerCount = header.numberOfArrayElements;
         header2.faceCount = header.numberOfFaces;
         header2.levelCount = header.numberOfMipmapLevels;
-        
+
         header2.supercompressionScheme = info.compressor.compressorType;
-        
+
         // compute the dfd
         KTX2DescriptorFileBlock dfdData(info.pixelFormat, info.hasAlpha && info.isPremultiplied, info.compressor.isCompressed());
-        
+
         // TODO: sgdData only used for BasisLZ, UASTC + zstd don't use this
         vector<uint8_t> sgdData;
-        
+
         size_t levelByteLength = header2.levelCount * sizeof(KTXImageLevel);
         size_t levelByteOffset = sizeof(KTX2Header);
-       
+
         // compute offsets and lengts of data blocks
         header2.dfdByteOffset = levelByteOffset + levelByteLength;
         header2.kvdByteOffset = header2.dfdByteOffset + dfdData.totalSize;
         header2.sgdByteOffset = header2.kvdByteOffset + vsizeof(propsData);
-        
+
         header2.dfdByteLength = dfdData.totalSize;
         header2.kvdByteLength = vsizeof(propsData);
         header2.sgdByteLength = vsizeof(sgdData);
-         
+
         // write the header
         if (!writeDataAtOffset((const uint8_t*)&header2, sizeof(KTX2Header), 0, dstFile, dstImage)) {
             return false;
         }
-        
+
         // next are levels, but those are written out later
-        
+
         // write the dfd
         if (!writeDataAtOffset((const uint8_t*)&dfdData, dfdData.totalSize, header2.dfdByteOffset, dstFile, dstImage)) {
             return false;
         }
-        
+
         // write the props
         if (!writeDataAtOffset(propsData.data(), vsizeof(propsData), header2.kvdByteOffset, dstFile, dstImage)) {
             return false;
         }
-        
+
         // skip supercompression block
         if (!sgdData.empty()) {
             // TODO: align(8) sgdPadding
@@ -1417,76 +1393,74 @@ bool KramEncoder::encodeImpl(ImageInfo& info, Image& singleImage, FILE* dstFile,
                 return false;
             }
         }
-        
+
         // offsets will be largest last unlike KTX
         // data is packed without any length or alignment unllike in KTX
         // reverse the mip levels offsets (but not the order) for KTX2
-        
+
         size_t imageByteOffset = header2.sgdByteOffset + header2.sgdByteLength;
-        
+
         size_t lastImageByteOffset = imageByteOffset;
-        
+
         vector<KTXImageLevel> ktx2Levels(dstImage.mipLevels);
         for (int32_t i = ktx2Levels.size() - 1; i >= 0; --i) {
-            
             // align the offset to leastCommonMultiple(4, texel_block_size);
             if (lastImageByteOffset & 0x3) {
                 lastImageByteOffset += 4 - (lastImageByteOffset & 0x3);
             }
-            
+
             auto& level = ktx2Levels[i];
             level.length *= numChunks;
             level.lengthCompressed = level.length;
             level.offset = lastImageByteOffset;
-            
+
             lastImageByteOffset = level.offset + level.length;
         }
-    
+
         if (!info.compressor.isCompressed()) {
             if (!writeDataAtOffset((const uint8_t*)ktx2Levels.data(), vsizeof(ktx2Levels), levelByteOffset, dstFile, dstImage)) {
                 return false;
             }
-            
+
             // write the levels out
             for (int32_t i = 0; i < (int32_t)ktx2Levels.size(); ++i) {
                 auto& level2 = ktx2Levels[i];
                 auto& level1 = dstImage.mipLevels[i];
-               
+
                 if (!writeDataAtOffset(dstImage.fileData + level1.offset, level2.length, level2.offset, dstFile, dstImage)) {
                     return false;
                 }
             }
         }
         else {
-
             // start compression with the smallest mips first, then can write out data as we go through it all
-        
+
             // update the offsets and compressed sizes
             lastImageByteOffset = imageByteOffset;
-            
+
             // allocate big enough to hold entire uncompressed level
             vector<uint8_t> compressedData;
-            compressedData.resize(mz_compressBound(ktx2Levels.front().length)); // largest mip
+            compressedData.resize(mz_compressBound(ktx2Levels.front().length));  // largest mip
             size_t compressedDataSize = 0;
-            
+
             // reuse a context here
             ZSTD_CCtx* cctx = nullptr;
             int zlibLevel = MZ_DEFAULT_COMPRESSION;
-                
+
             if (info.compressor.compressorType == KTX2SupercompressionZstd) {
                 cctx = ZSTD_createCCtx();
                 if (!cctx) {
                     return false;
                 }
-                
+
                 if (info.compressor.compressorLevel > 0.0f) {
-                   int zstdLevel = (int)round(info.compressor.compressorLevel);
+                    int zstdLevel = (int)round(info.compressor.compressorLevel);
                     if (zstdLevel > 100) {
                         zstdLevel = 100;
                     }
-                    
+
                     ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, zstdLevel);
-                    
+
                     // may need to reset the compressor context, but says call starts a new frame
                 }
             }
@@ -1499,22 +1473,21 @@ bool KramEncoder::encodeImpl(ImageInfo& info, Image& singleImage, FILE* dstFile,
                     }
                 }
             }
-            
+
             ZSTDScope scope(cctx);
-            
+
             for (int32_t i = (int32_t)ktx2Levels.size() - 1; i >= 0; --i) {
-                
                 auto& level2 = ktx2Levels[i];
                 auto& level1 = dstImage.mipLevels[i];
-               
+
                 const uint8_t* levelData = dstImage.fileData + level1.offset;
-                
+
                 // compress each mip
-                switch(info.compressor.compressorType) {
+                switch (info.compressor.compressorType) {
                     case KTX2SupercompressionZstd: {
                         // this resets the frame on each call
                         compressedDataSize = ZSTD_compress2(cctx, compressedData.data(), compressedData.size(), levelData, level2.length);
-                        
+
                         if (ZSTD_isError(compressedDataSize)) {
                             KLOGE("kram", "encode mip zstd failed");
                             return false;
@@ -1523,37 +1496,36 @@ bool KramEncoder::encodeImpl(ImageInfo& info, Image& singleImage, FILE* dstFile,
                     }
                     case KTX2SupercompressionZlib: {
                         mz_ulong dstSize = compressedData.size();
-                        if (mz_compress2(compressedData.data(), &dstSize, levelData, level2.length, zlibLevel) != MZ_OK)
-                        {
+                        if (mz_compress2(compressedData.data(), &dstSize, levelData, level2.length, zlibLevel) != MZ_OK) {
                             KLOGE("kram", "encode mip zlib failed");
                             return false;
                         }
                         compressedDataSize = dstSize;
-                        
+
                         break;
                     }
                     default:
                         // should never get here
                         return false;
                 }
-                
+
                 // also need for compressed levels?
                 // align the offset to leastCommonMultiple(4, texel_block_size);
                 if (lastImageByteOffset & 0x3) {
                     lastImageByteOffset += 4 - (lastImageByteOffset & 0x3);
                 }
-                
+
                 level2.lengthCompressed = compressedDataSize;
                 level2.offset = lastImageByteOffset;
-                
+
                 lastImageByteOffset = level2.offset + level2.lengthCompressed;
-                
+
                 // write the mip
                 if (!writeDataAtOffset(compressedData.data(), compressedDataSize, level2.offset, dstFile, dstImage)) {
                     return false;
                 }
             }
-            
+
             // write out mip level size/offsets
             if (!writeDataAtOffset((const uint8_t*)ktx2Levels.data(), vsizeof(ktx2Levels), levelByteOffset, dstFile, dstImage)) {
                 return false;
@@ -1566,16 +1538,16 @@ bool KramEncoder::encodeImpl(ImageInfo& info, Image& singleImage, FILE* dstFile,
             return false;
         }
     }
-    
+
     return true;
 }
 
 bool KramEncoder::writeKTX1FileOrImage(
-     ImageInfo& info,
-     Image& singleImage,
-     MipConstructData& mipConstructData,
-     const vector<uint8_t>& propsData,
-     FILE* dstFile, KTXImage& dstImage) const
+    ImageInfo& info,
+    Image& singleImage,
+    MipConstructData& mipConstructData,
+    const vector<uint8_t>& propsData,
+    FILE* dstFile, KTXImage& dstImage) const
 {
     // recompute, it's had mips added into it above
     size_t mipOffset = sizeof(KTXHeader) + dstImage.header.bytesOfKeyValueData;
@@ -1583,33 +1555,33 @@ bool KramEncoder::writeKTX1FileOrImage(
     // allocate to hold props and entire image to write out
     if (!dstFile) {
         dstImage.initMipLevels(mipOffset);
-        
+
         dstImage.reserveImageData();
     }
     else {
         int32_t numChunks = (int32_t)mipConstructData.chunkOffsets.size();
-        
+
         // set offsets up for ktx1
         size_t lastMipOffset = mipOffset;
-        
+
         for (int32_t i = 0; i < (int32_t)dstImage.mipLevels.size(); ++i) {
             auto& level = dstImage.mipLevels[i];
-            level.offset = lastMipOffset + 4; // offset by length
-            
+            level.offset = lastMipOffset + 4;  // offset by length
+
             lastMipOffset = level.offset + level.length * numChunks;
         }
     }
-    
+
     // write the header out
     KTXHeader headerCopy = dstImage.header;
-    
+
     // fix header for 1d array
     // TODO: move to initMipLevels, and just use the header
     if (dstImage.textureType == MyMTLTextureType1DArray) {
         headerCopy.pixelHeight = 0;
         headerCopy.pixelDepth = 0;
     }
-    
+
     if (!writeDataAtOffset((const uint8_t*)&headerCopy, sizeof(headerCopy), 0, dstFile, dstImage)) {
         return false;
     }
@@ -1623,19 +1595,20 @@ bool KramEncoder::writeKTX1FileOrImage(
     if (!createMipsFromChunks(info, singleImage, mipConstructData, dstFile, dstImage)) {
         return false;
     }
-    
+
     return true;
 }
 
-void printBCBlock(const uint8_t* bcBlock, MyMTLPixelFormat format) {
+void printBCBlock(const uint8_t* bcBlock, MyMTLPixelFormat format)
+{
     // https://docs.microsoft.com/en-us/windows/win32/direct3d11/bc7-format-mode-reference#mode-6
     if (!(format == MyMTLPixelFormatBC7_RGBAUnorm || format == MyMTLPixelFormatBC7_RGBAUnorm_sRGB)) {
         return;
     }
-    
+
     uint32_t mode = decodeBC7BlockMode(bcBlock);
-    
-    switch(mode) {
+
+    switch (mode) {
         case 6: {
             const uint64_t* block = (const uint64_t*)bcBlock;
             // 6 bits of signature - LSB 000001
@@ -1643,27 +1616,27 @@ void printBCBlock(const uint8_t* bcBlock, MyMTLPixelFormat format) {
             // 7 bits G0, 7 bits G1
             // 7 bits B0, 7 bits B1
             // 7 bits A0, 7 bits A1
-            
+
             // 1 bit P0, 1 bit  P1
             // 63 bits of index data, how dos that work?
-            
-            uint32_t R0 = (uint32_t)((block[0] >> uint64_t(7*1)) & uint64_t(0b1111111));
-            uint32_t R1 = (uint32_t)((block[0] >> uint64_t(7*2)) & uint64_t(0b1111111));
-            
-            uint32_t G0 = (uint32_t)((block[0] >> uint64_t(7*3)) & uint64_t(0b1111111));
-            uint32_t G1 = (uint32_t)((block[0] >> uint64_t(7*4)) & uint64_t(0b1111111));
-            
-            uint32_t B0 = (uint32_t)((block[0] >> uint64_t(7*5)) & uint64_t(0b1111111));
-            uint32_t B1 = (uint32_t)((block[0] >> uint64_t(7*6)) & uint64_t(0b1111111));
-            
-            uint32_t A0 = (uint32_t)((block[0] >> uint64_t(7*7)) & uint64_t(0b1111111));
-            uint32_t A1 = (uint32_t)((block[0] >> uint64_t(7*8)) & uint64_t(0b1111111));
-            
-            uint32_t P0 = (uint32_t)((block[0] >> uint64_t(7*9)) & uint64_t(0b1));
+
+            uint32_t R0 = (uint32_t)((block[0] >> uint64_t(7 * 1)) & uint64_t(0b1111111));
+            uint32_t R1 = (uint32_t)((block[0] >> uint64_t(7 * 2)) & uint64_t(0b1111111));
+
+            uint32_t G0 = (uint32_t)((block[0] >> uint64_t(7 * 3)) & uint64_t(0b1111111));
+            uint32_t G1 = (uint32_t)((block[0] >> uint64_t(7 * 4)) & uint64_t(0b1111111));
+
+            uint32_t B0 = (uint32_t)((block[0] >> uint64_t(7 * 5)) & uint64_t(0b1111111));
+            uint32_t B1 = (uint32_t)((block[0] >> uint64_t(7 * 6)) & uint64_t(0b1111111));
+
+            uint32_t A0 = (uint32_t)((block[0] >> uint64_t(7 * 7)) & uint64_t(0b1111111));
+            uint32_t A1 = (uint32_t)((block[0] >> uint64_t(7 * 8)) & uint64_t(0b1111111));
+
+            uint32_t P0 = (uint32_t)((block[0] >> uint64_t(7 * 9)) & uint64_t(0b1));
             uint32_t P1 = (uint32_t)((block[1] >> uint64_t(0)) & uint64_t(0b1));
-           
+
             // r,g,b,a to be or-ed with the pbit to get tha actual value of the endpoints
-            
+
             KLOGI("kram",
                   "R0=%d, R1=%d\n"
                   "G0=%d, G1=%d\n"
@@ -1675,23 +1648,22 @@ void printBCBlock(const uint8_t* bcBlock, MyMTLPixelFormat format) {
                   B0, B1,
                   A0, A1,
                   P0, P1);
-            
+
             break;
         }
     }
-    
+
     // Have a block debug mode that hud's the mode pixel values
     // over the hovered block.
-    uint32_t pixels[4*4];
+    uint32_t pixels[4 * 4];
     if (!unpack_bc7(bcBlock, (bc7decomp::color_rgba*)pixels)) {
         return;
     }
-    
+
     for (uint32_t y = 0; y < 4; ++y) {
         KLOGI("kram",
               "[%u] = %08X %08X %08X %08X\n",
-              y, pixels[4*y + 0], pixels[4*y + 1], pixels[4*y + 2], pixels[4*y + 3]
-              );
+              y, pixels[4 * y + 0], pixels[4 * y + 1], pixels[4 * y + 2], pixels[4 * y + 3]);
     }
 }
 
@@ -1700,11 +1672,10 @@ bool KramEncoder::createMipsFromChunks(
     Image& singleImage,
     MipConstructData& data,
     FILE* dstFile,
-    KTXImage& dstImage
-) const
+    KTXImage& dstImage) const
 {
     // ----------------------------------------------------
-    
+
     // set the structure fields and allocate it, only need enough to hold single
     // mip (reuses mem) also because mips are written out to file after
     // generated.
@@ -1717,16 +1688,16 @@ bool KramEncoder::createMipsFromChunks(
     ImageData srcImage;
     srcImage.width = data.chunkWidth;
     srcImage.height = data.chunkHeight;
-    
+
     // KramMipper uses these
     srcImage.isSRGB = info.isSRGB;
     srcImage.isHDR = info.isHDR;
 
     int32_t w = srcImage.width;
     int32_t h = srcImage.height;
-    
+
     // ----------------------------------------------------
-    
+
     // use this for complex texture types, copy data from vertical/horizotnal
     // strip image into here to then gen mips
     vector<Color>& copyImage = data.copyImage;
@@ -1762,7 +1733,7 @@ bool KramEncoder::createMipsFromChunks(
                 for (const auto& pixel : singleImage.pixelsFloat()) {
                     float alpha = pixel.w;
                     float4& pixelChange = const_cast<float4&>(pixel);
-                    
+
                     // only premul at 0 alpha regions
                     if (alpha == 0.0f) {
                         pixelChange *= alpha;
@@ -1807,15 +1778,15 @@ bool KramEncoder::createMipsFromChunks(
             srcImage.pixelsHalf = halfImage.data();
         }
     }
-    
+
     Mipper mipper;
     SDFMipper sdfMipper;
-    
+
     vector<KTXImageLevel>& dstMipLevels = dstImage.mipLevels;
-    
+
     int32_t srcTopMipWidth = srcImage.width;
     int32_t srcTopMipHeight = srcImage.height;
-    
+
     for (int32_t chunk = 0; chunk < numChunks; ++chunk) {
         // this needs to append before chunkOffset copy below
         w = srcTopMipWidth;
@@ -1827,13 +1798,13 @@ bool KramEncoder::createMipsFromChunks(
         // reset these dimensions, or the mip mapping drops them to 1x1
         srcImage.width = w;
         srcImage.height = h;
-        
+
         if (info.isHDR) {
             if (isMultichunk) {
                 const float4* srcPixels = (const float4*)singleImage.pixelsFloat().data();
                 for (int32_t y = 0; y < h; ++y) {
                     int32_t y0 = y * w;
-                    
+
                     // offset into original strip/atlas
                     int32_t yOffset = (y + chunkOffset.y) * singleImage.width() + chunkOffset.x;
 
@@ -1850,7 +1821,7 @@ bool KramEncoder::createMipsFromChunks(
                 const Color* srcPixels = (const Color*)singleImage.pixels().data();
                 for (int32_t y = 0; y < h; ++y) {
                     int32_t y0 = y * w;
-                    
+
                     // offset into original strip/atlas
                     int32_t yOffset = (y + chunkOffset.y) * singleImage.width() + chunkOffset.x;
 
@@ -1885,12 +1856,11 @@ bool KramEncoder::createMipsFromChunks(
         // from available image
 
         int32_t numSkippedMips = data.numSkippedMips;
-        
+
         for (int32_t mipLevel = 0; mipLevel < (int32_t)dstMipLevels.size(); ++mipLevel) {
             const auto& dstMipLevel = dstMipLevels[mipLevel];
-            
-            if (mipLevel == 0 && !info.doSDF)
-            {
+
+            if (mipLevel == 0 && !info.doSDF) {
                 if (numSkippedMips > 0) {
                     // this does in-place mipmap to dstImage (also updates floatPixels if used)
                     for (int32_t i = 0; i < numSkippedMips; ++i) {
@@ -1926,13 +1896,13 @@ bool KramEncoder::createMipsFromChunks(
                     h = dstImageData.height;
                 }
             }
-           
+
             // size of one mip, not levelSize = numChunks * mipStorageSize
             size_t mipStorageSize = dstMipLevel.length;
-            
+
             // offset only valid for KTX and KTX2 w/o isCompressed
             size_t mipChunkOffset = dstMipLevel.offset + chunk * mipStorageSize;
-           
+
             // just to check that each mip has a unique offset
             //KLOGI("Image", "chunk:%d %d\n", chunk, mipOffset);
 
@@ -1948,7 +1918,6 @@ bool KramEncoder::createMipsFromChunks(
                 mipImage.pixelsFloat = nullptr;
             }
 
-            
             Timer timer;
             bool success =
                 compressMipLevel(info, dstImage,
@@ -1984,7 +1953,7 @@ bool KramEncoder::createMipsFromChunks(
                     return false;
                 }
             }
-            
+
             // Note that default ktx alignment is 4, so r8u, r16f mips need to be padded out to 4 bytes
             // may need to write these out row by row, and let fseek pad the rows to 4.
 
@@ -1997,12 +1966,12 @@ bool KramEncoder::createMipsFromChunks(
 }
 
 bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
-                             ImageData& mipImage, TextureData& outputTexture,
-                             int32_t mipStorageSize) const
+                                   ImageData& mipImage, TextureData& outputTexture,
+                                   int32_t mipStorageSize) const
 {
     int32_t w = mipImage.width;
     int32_t h = mipImage.height;
-    
+
     const Color* srcPixelData = mipImage.pixels;
     const float4* srcPixelDataFloat4 = mipImage.pixelsFloat;
 
@@ -2011,8 +1980,8 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
     int32_t numBlocks = image.blockCount(w, h);
     int32_t blockSize = image.blockSize();
     int32_t mipLength = image.mipLengthCalc(w, h);
-    Int2    blockDims = image.blockDims();
-    
+    Int2 blockDims = image.blockDims();
+
     if (info.isExplicit) {
         switch (info.pixelFormat) {
             case MyMTLPixelFormatR8Unorm:
@@ -2284,7 +2253,6 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
             //int32_t blocks_y = (h + blockDim - 1) / blockDim;
             for (int32_t y = 0; y < h; y += blockDim) {
                 for (int32_t x = 0; x < w; x += blockDim) {
-                    
                     // Have to copy to temp block, since encode doesn't test w/h edges
                     // copy src to 4x4 clamping the edge pixels
                     // TODO: do clamped edge pixels get weighted more then on non-multiple of 4 images ?
@@ -2314,14 +2282,14 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
                     // bc7enc is not setting pbit on bc7 mode6 and doesn's support opaque mode3 yet
                     // , so opaque textures repro as 254 alpha on Toof-a.png.
                     // ate sets pbits on mode 6 for same block.  Also fixed mip weights in non-pow2 mipper.
-                    
-//                    bool doPrintBlock = false;
-//                    if (bx == 8 && by == 1) {
-//                        int32_t bp = 0;
-//                        bp = bp;
-//                        doPrintBlock = true;
-//                    }
-                    
+
+                    //                    bool doPrintBlock = false;
+                    //                    if (bx == 8 && by == 1) {
+                    //                        int32_t bp = 0;
+                    //                        bp = bp;
+                    //                        doPrintBlock = true;
+                    //                    }
+
                     switch (info.pixelFormat) {
                         case MyMTLPixelFormatBC1_RGBA:
                         case MyMTLPixelFormatBC1_RGBA_sRGB: {
@@ -2351,10 +2319,10 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
                         case MyMTLPixelFormatBC7_RGBAUnorm:
                         case MyMTLPixelFormatBC7_RGBAUnorm_sRGB: {
                             bc7enc_compress_block(dstBlock, srcPixelCopy, &bc7params);
-                            
-//                            if (doPrintBlock) {
-//                                printBCBlock(dstBlock, info.pixelFormat);
-//                            }
+
+                            //                            if (doPrintBlock) {
+                            //                                printBCBlock(dstBlock, info.pixelFormat);
+                            //                            }
                             break;
                         }
                         default: {
@@ -2397,12 +2365,11 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
             if (info.isSigned) {
                 doRemapSnormEndpoints = true;
             }
-            
-            
+
             // find the 8,1 block and print it
-//            uint32_t numRowBlocks = image.blockCountRows(w);
-//            const uint8_t* block = outputTexture.data.data() + (numRowBlocks * 1 + 8) * blockSize;
-//            printBCBlock(block, pixelFormatRemap);
+            //            uint32_t numRowBlocks = image.blockCountRows(w);
+            //            const uint8_t* block = outputTexture.data.data() + (numRowBlocks * 1 + 8) * blockSize;
+            //            printBCBlock(block, pixelFormatRemap);
         }
 #endif
 #if COMPILE_SQUISH
@@ -2575,20 +2542,20 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
 
             // convert quality to present
             float quality = info.quality;
-            
-//            ASTCENC_PRE_FAST;
-//            if (info.quality <= 10) {
-//                preset = ASTCENC_PRE_FAST;
-//            }
-//            else if (info.quality <= 50) {
-//                preset = ASTCENC_PRE_MEDIUM;
-//            }
-//            else if (info.quality < 90) {
-//                preset = ASTCENC_PRE_THOROUGH;
-//            }
-//            else {
-//                preset = ASTCENC_PRE_EXHAUSTIVE;
-//            }
+
+            //            ASTCENC_PRE_FAST;
+            //            if (info.quality <= 10) {
+            //                preset = ASTCENC_PRE_FAST;
+            //            }
+            //            else if (info.quality <= 50) {
+            //                preset = ASTCENC_PRE_MEDIUM;
+            //            }
+            //            else if (info.quality < 90) {
+            //                preset = ASTCENC_PRE_THOROUGH;
+            //            }
+            //            else {
+            //                preset = ASTCENC_PRE_EXHAUSTIVE;
+            //            }
 
             astcenc_config config;
             astcenc_error error = astcenc_config_init(
diff --git a/libkram/kram/KramImage.h b/libkram/kram/KramImage.h
index 46b5ff10..c8a19fee 100644
--- a/libkram/kram/KramImage.h
+++ b/libkram/kram/KramImage.h
@@ -8,9 +8,9 @@
 //#include <vector>
 
 #include "KTXImage.h"  // for MyMTLTextureType
-#include "KramMipper.h"
 #include "KramConfig.h"
 #include "KramImageInfo.h"
+#include "KramMipper.h"
 
 namespace kram {
 
@@ -45,7 +45,6 @@ class Image {
     // convert top level to single-image
     bool loadImageFromKTX(const KTXImage& image);
 
-    
     // this is only for 2d images
     bool resizeImage(int32_t wResize, int32_t hResize, bool resizePow2, ImageResizeFilter filter = kImageResizeFilterPoint);
 
@@ -58,11 +57,11 @@ class Image {
 
     bool hasColor() const { return _hasColor; }
     bool hasAlpha() const { return _hasAlpha; }
-    
+
     // if converted a KTX/2 image to Image, then this field will be non-zero
     uint32_t chunksY() const { return _chunksY; }
     void setChunksY(uint32_t chunksY) { _chunksY = chunksY; }
-    
+
 private:
     bool convertToFourChannel(const KTXImage& image);
 
@@ -81,13 +80,13 @@ class Image {
     vector<uint8_t> _pixels;  // TODO: change to Color?
     //vector<half4> _pixelsHalf; // TODO: add support to import fp16
     vector<float4> _pixelsFloat;
-    
+
     uint32_t _chunksY = 0;
 };
 
 class KramDecoderParams {
 public:
-    TexEncoder decoder = kTexEncoderUnknown; // will pick best available from format
+    TexEncoder decoder = kTexEncoderUnknown;  // will pick best available from format
     bool isVerbose = false;
     string swizzleText;
 };
@@ -98,15 +97,15 @@ class KramDecoderParams {
 class KramDecoder {
 public:
     bool decode(const KTXImage& image, FILE* dstFile, const KramDecoderParams& params) const;
-    
+
     bool decode(const KTXImage& image, KTXImage& dstImage, const KramDecoderParams& params) const;
-    
+
     bool decodeBlocks(
-                int32_t w, int32_t h,
-                const uint8_t* blockData, uint32_t numBlocks, MyMTLPixelFormat blockFormat,
-                vector<uint8_t>& dstPixels, // currently Color
-                const KramDecoderParams& params) const;
-    
+        int32_t w, int32_t h,
+        const uint8_t* blockData, uint32_t numBlocks, MyMTLPixelFormat blockFormat,
+        vector<uint8_t>& dstPixels,  // currently Color
+        const KramDecoderParams& params) const;
+
 private:
     bool decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage& dstImage, const KramDecoderParams& params) const;
 };
@@ -125,7 +124,7 @@ class KramEncoder {
     // TODO: supply encode() that takes a KTXImage src with mips already generated
     // and then can encode them to a block format.  In-place mips from Image don't
     // allow for custom mips, and also require conversion of KTXImage to Image.
-    
+
 private:
     bool encodeImpl(ImageInfo& info, Image& singleImage, FILE* dstFile, KTXImage& dstImage) const;
 
@@ -150,15 +149,13 @@ class KramEncoder {
                               FILE* dstFile, KTXImage& dstImage) const;
 
     bool writeKTX1FileOrImage(
-         ImageInfo& info,
-         Image& singleImage,
-         MipConstructData& mipConstructData,
-         const vector<uint8_t>& propsData,
-         FILE* dstFile, KTXImage& dstImage) const;
+        ImageInfo& info,
+        Image& singleImage,
+        MipConstructData& mipConstructData,
+        const vector<uint8_t>& propsData,
+        FILE* dstFile, KTXImage& dstImage) const;
 
     void addBaseProps(const ImageInfo& info, KTXImage& dstImage) const;
-
 };
 
-
 }  // namespace kram
diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp
index bece6819..29355301 100644
--- a/libkram/kram/KramImageInfo.cpp
+++ b/libkram/kram/KramImageInfo.cpp
@@ -119,16 +119,20 @@ static MyMTLPixelFormat parseFormat(ImageInfoArgs& infoArgs)
     // or RGBA to save endpoint storage dual plane can occur for more than just
     // RGB+A, any one channel can be a plane to itself if encoder supports
     else if (isStringEqual(formatString, "astc4x4")) {
-        format = infoArgs.isHDR ? MyMTLPixelFormatASTC_4x4_HDR : infoArgs.isSRGB ? MyMTLPixelFormatASTC_4x4_sRGB : MyMTLPixelFormatASTC_4x4_LDR;
+        format = infoArgs.isHDR ? MyMTLPixelFormatASTC_4x4_HDR : infoArgs.isSRGB ? MyMTLPixelFormatASTC_4x4_sRGB
+                                                                                 : MyMTLPixelFormatASTC_4x4_LDR;
     }
     else if (isStringEqual(formatString, "astc5x5")) {
-        format = infoArgs.isHDR ? MyMTLPixelFormatASTC_5x5_HDR : infoArgs.isSRGB ? MyMTLPixelFormatASTC_5x5_sRGB : MyMTLPixelFormatASTC_5x5_LDR;
+        format = infoArgs.isHDR ? MyMTLPixelFormatASTC_5x5_HDR : infoArgs.isSRGB ? MyMTLPixelFormatASTC_5x5_sRGB
+                                                                                 : MyMTLPixelFormatASTC_5x5_LDR;
     }
     else if (isStringEqual(formatString, "astc6x6")) {
-        format = infoArgs.isHDR ? MyMTLPixelFormatASTC_6x6_HDR : infoArgs.isSRGB ? MyMTLPixelFormatASTC_6x6_sRGB : MyMTLPixelFormatASTC_6x6_LDR;
+        format = infoArgs.isHDR ? MyMTLPixelFormatASTC_6x6_HDR : infoArgs.isSRGB ? MyMTLPixelFormatASTC_6x6_sRGB
+                                                                                 : MyMTLPixelFormatASTC_6x6_LDR;
     }
     else if (isStringEqual(formatString, "astc8x8")) {
-        format = infoArgs.isHDR ? MyMTLPixelFormatASTC_8x8_HDR : infoArgs.isSRGB ? MyMTLPixelFormatASTC_8x8_sRGB : MyMTLPixelFormatASTC_8x8_LDR;
+        format = infoArgs.isHDR ? MyMTLPixelFormatASTC_8x8_HDR : infoArgs.isSRGB ? MyMTLPixelFormatASTC_8x8_sRGB
+                                                                                 : MyMTLPixelFormatASTC_8x8_LDR;
     }
 
     // explicit formats
@@ -748,14 +752,13 @@ bool validateTextureType(MyMTLTextureType textureType, int32_t& w, int32_t& h,
         }
     }
     else if (textureType == MyMTLTextureType2DArray) {
-        
         if (chunksCount > 0) {
             w = w / chunksX;
             h = h / chunksY;
-            
+
             // this can be smaller than chunksX * chunkY, but assume chunks packed up and to left
             header.numberOfArrayElements = chunksCount;
-            
+
             int32_t x = 0;
             int32_t y = 0;
             for (int32_t i = 0; i < (int32_t)header.numberOfArrayElements; ++i) {
@@ -763,10 +766,10 @@ bool validateTextureType(MyMTLTextureType textureType, int32_t& w, int32_t& h,
                     y++;
                     x = 0;
                 }
-                
+
                 Int2 chunkOffset = {w * x, h * y};
                 chunkOffsets.push_back(chunkOffset);
-                
+
                 x++;
             }
         }
@@ -997,12 +1000,12 @@ void ImageInfo::initWithArgs(const ImageInfoArgs& args)
 
     isKTX2 = args.isKTX2;
     compressor = args.compressor;
-    
+
     isPrezero = args.isPrezero;
     isPremultiplied = args.isPremultiplied;
     if (isPremultiplied)
         isPrezero = false;
-    
+
     isNormal = args.isNormal;
 
     doSDF = args.doSDF;
@@ -1012,8 +1015,8 @@ void ImageInfo::initWithArgs(const ImageInfoArgs& args)
     doMipmaps = args.doMipmaps;
     mipMinSize = args.mipMinSize;
     mipMaxSize = args.mipMaxSize;
-    mipSkip    = args.mipSkip;
-    
+    mipSkip = args.mipSkip;
+
     swizzleText = args.swizzleText;
     averageChannels = args.averageChannels;
 
@@ -1027,7 +1030,7 @@ void ImageInfo::initWithArgs(const ImageInfoArgs& args)
     heightScale = args.heightScale;
     if (isHeight)
         isNormal = true;
-    
+
     // Note: difference between input srgb and output srgb, but it's mingled
     // here a bit
 
@@ -1059,7 +1062,7 @@ void ImageInfo::initWithArgs(const ImageInfoArgs& args)
         hasAlpha = false;
     if (!isColorFormat(pixelFormat))
         hasColor = false;
-    
+
     // copy over chunks
     chunksX = args.chunksX;
     chunksY = args.chunksY;
@@ -1105,11 +1108,12 @@ void ImageInfo::initWithSourceImage(Image& sourceImage)
     // transfer the chunk count, this was a ktx/2 import
     if (sourceImage.chunksY() > 0) {
         chunksX = 1;
-        
+
         chunksY =
-        chunksCount = sourceImage.chunksY();;
+            chunksCount = sourceImage.chunksY();
+        ;
     }
-    
+
     // these come from png header, but hasn't walked pixels yet
     if (!sourceImage.hasAlpha()) {
         hasAlpha = false;
@@ -1122,7 +1126,7 @@ void ImageInfo::initWithSourceImage(Image& sourceImage)
     if (isHeight) {
         heightToNormals(w, h, srcPixelsFloat, srcPixels, heightScale, isWrap);
     }
-    
+
     // this updates hasColor/hasAlpha
     if (!swizzleText.empty()) {
         // set any channels that are constant
@@ -1206,15 +1210,13 @@ void ImageInfo::initWithSourceImage(Image& sourceImage)
     }
 }
 
-
-
 // TODO: tread 16u png into pixelsFlat, then gen an 8-bit normal xy
 // from that.  This is more like SDF where a single height is used.
 
 void ImageInfo::heightToNormals(int32_t w, int32_t h,
-                            float4* srcPixels,
-                            Color* srcPixels8,
-                            float scale, bool isWrap)
+                                float4* srcPixels,
+                                Color* srcPixels8,
+                                float scale, bool isWrap)
 {
     // see here
     // https://developer.download.nvidia.com/CgTutorial/cg_tutorial_chapter08.html
@@ -1223,48 +1225,48 @@ void ImageInfo::heightToNormals(int32_t w, int32_t h,
     // may need to copy a row/column of pixels for wrap
     float4* dstPixels = srcPixels;
     Color* dstPixels8 = srcPixels8;
-    
+
     bool isFloat = srcPixels;
-    
+
     // copy the texture, or there are too many edge cases in the code below
     vector<Color> srcDataCopy8;
     vector<float4> srcDataCopy;
     if (isFloat) {
-        srcDataCopy.resize(w*h);
+        srcDataCopy.resize(w * h);
         memcpy(srcDataCopy.data(), srcPixels, vsizeof(srcDataCopy));
         srcPixels = srcDataCopy.data();
     }
     else {
-        srcDataCopy8.resize(w*h);
+        srcDataCopy8.resize(w * h);
         memcpy(srcDataCopy8.data(), srcPixels8, vsizeof(srcDataCopy8));
         srcPixels8 = srcDataCopy8.data();
     }
-    
+
     //-----------------------
-    
+
     bool isWrapX = isWrap;
     bool isWrapY = isWrap;
-    
+
     // 2.0 is distance betwen +1 and -1
     // don't scale by this, want caller to be able to pass 1.0 as default scale not 2.0
-    float scaleX = scale; // / 2.0;
-    float scaleY = scale; // / 2.0;
+    float scaleX = scale;  // / 2.0;
+    float scaleY = scale;  // / 2.0;
 
     if (!isFloat) {
         scaleX /= 255.0f;
         scaleY /= 255.0f;
     }
-    
+
     // TODO: doing this at image level doesn't support chunk conversion
     // so this would only work for 2D images, and not atlas strips to a 2D array.
-    
+
     // TODO: Larger kernel support to 2x2, 3x3, 5x5, 7x7, 9x9
     // This pattern is a 3x3 cross here only 4 cardinal samples are used.
     // Bigger areas have bleed especially if this is run on a chart.
-    
+
     // this recommends generating a few maps, and blending between them
     // https://vrayschool.com/normal-map/
-    
+
     for (int32_t y = 0; y < h; ++y) {
         int32_t y0 = y;
         int32_t ym = y - 1;
@@ -1299,9 +1301,7 @@ void ImageInfo::heightToNormals(int32_t w, int32_t h,
                 if (xp > (w - 1)) xp = w - 1;
             }
 
-            
             if (isFloat) {
-                
                 // cross pattern
                 // height channel is in x
                 float cN = srcPixels[ym + x].x;
@@ -1312,61 +1312,61 @@ void ImageInfo::heightToNormals(int32_t w, int32_t h,
                 // up is N, so this is rhcs
                 float dx = (cE - cW) * scaleX;
                 float dy = (cN - cS) * scaleY;
-           
+
                 dx = -dx;
                 dy = -dy;
-                
+
                 float4 normal = float4m(dx, dy, 1.0f, 0.0f);
                 normal = normalize(normal);
-                
+
                 // convert to unorm
                 // TODO: may need to do around unorm8 offset of unorm 255/127 and + 128/127
                 normal = normal * 0.5 + 0.5f;
-                
+
                 // write out the result
                 float4& dstPixel = dstPixels[y0 + x];
 
                 dstPixel.x = normal.x;
                 dstPixel.y = normal.y;
-                
+
                 // TODO: consider storing in z, easier to see data channel, not premul
                 // store height in alpha.  Let caller provide the swizzle xyzh01
                 //dstPixel.z = normal.z; // can reconstruct from xy
                 //dstPixel.w = srcPixels[y0 + x0].x;
-                
+
                 dstPixel.z = srcPixels[y0 + x].z;
                 dstPixel.w = srcPixels[y0 + x].w;
             }
             else {
                 // cross pattern
                 // height channel is in x
-                uint8_t cN = srcPixels8[ym + x].r; // assumes first elem (.r) is height channel
+                uint8_t cN = srcPixels8[ym + x].r;  // assumes first elem (.r) is height channel
                 uint8_t cS = srcPixels8[yp + x].r;
                 uint8_t cE = srcPixels8[y0 + xp].r;
                 uint8_t cW = srcPixels8[y0 + xm].r;
 
                 float dx = (cE - cW) * scaleX;
                 float dy = (cN - cS) * scaleY;
-           
+
                 dx = -dx;
                 dy = -dy;
-                
+
                 float4 normal = float4m(dx, dy, 1.0f, 0.0f);
                 normal = normalize(normal);
-                
+
                 // convert to unorm
                 normal = round(normal * 127.0f) + 128.0f;
-                
+
                 Color& dstPixel8 = dstPixels8[y0 + x];
 
                 dstPixel8.r = normal.x;
                 dstPixel8.g = normal.y;
-                
+
                 // TODO: consider storing height in z, easier to see data channel, not premul
                 // store height in alpha.  Let caller provide the swizzle xyzh01
                 //dstPixel8.b = normal.z; // can reconstruct from xy
                 //dstPixel8.a = srcPixels8[y0 + x0].r;
-                
+
                 dstPixel8.b = srcPixels8[y0 + x].b;
                 dstPixel8.a = srcPixels8[y0 + x].a;
             }
@@ -1374,18 +1374,25 @@ void ImageInfo::heightToNormals(int32_t w, int32_t h,
     }
 }
 
-
 const char* encoderName(TexEncoder encoder)
 {
-    switch(encoder) {
-        case kTexEncoderExplicit: return "Explicit";
-        case kTexEncoderATE: return "ATE";
-        case kTexEncoderSquish: return "Squish";
-        case kTexEncoderBcenc: return "Bcenc";
-        case kTexEncoderEtcenc: return "Etcenc";
-        case kTexEncoderAstcenc: return "Astcenc";
-        case kTexEncoderUnknown: return "Unknown";
-        default: return "Unknown"; // to fix Visual Studio C4715
+    switch (encoder) {
+        case kTexEncoderExplicit:
+            return "Explicit";
+        case kTexEncoderATE:
+            return "ATE";
+        case kTexEncoderSquish:
+            return "Squish";
+        case kTexEncoderBcenc:
+            return "Bcenc";
+        case kTexEncoderEtcenc:
+            return "Etcenc";
+        case kTexEncoderAstcenc:
+            return "Astcenc";
+        case kTexEncoderUnknown:
+            return "Unknown";
+        default:
+            return "Unknown";  // to fix Visual Studio C4715
     }
 }
 
diff --git a/libkram/kram/KramImageInfo.h b/libkram/kram/KramImageInfo.h
index 2b7dc2da..20dd7201 100644
--- a/libkram/kram/KramImageInfo.h
+++ b/libkram/kram/KramImageInfo.h
@@ -8,8 +8,8 @@
 //#include <vector>
 
 #include "KTXImage.h"
-#include "KramMipper.h"  // for Color
 #include "KramConfig.h"
+#include "KramMipper.h"  // for Color
 
 namespace kram {
 class Image;
@@ -49,13 +49,13 @@ class ImageInfoArgs {
     int32_t mipMinSize = 1;
     int32_t mipMaxSize = 32 * 1024;
     int32_t mipSkip = 0;
-    
+
     int32_t quality = 49;  // may want float
 
     // ktx2 has a compression type and level
     KTX2Compressor compressor;
     bool isKTX2 = false;
-    
+
     //bool skipImageLength = false;
     bool doMipmaps = true;  // default to mips on
     bool isVerbose = false;
@@ -63,7 +63,7 @@ class ImageInfoArgs {
     bool isPremultiplied = false;
     bool isPrezero = false;
     bool isNormal = false;  // signed, but may be stored unorm and swizzled (f.e. astc/bc3nm gggr or rrrg)
-    
+
     // can pick a smaller format if alpha = 1 (only for bc and etc)
     bool optimizeFormatForOpaque = false;
 
@@ -77,10 +77,10 @@ class ImageInfoArgs {
     bool isHeight = false;
     bool isWrap = false;
     float heightScale = 1.0f;
-    
+
     string swizzleText;
     string averageChannels;
-    
+
     // count of chunks to help turn 2d texture into 2d array, 3d, cubearray
     int32_t chunksX = 0;
     int32_t chunksY = 0;
@@ -101,12 +101,13 @@ class ImageInfo {
                                   const char* swizzleText);
     static void swizzleTextureLDR(int32_t w, int32_t h, Color* srcPixels_,
                                   const char* swizzleText);
-    
+
     // convert x field to normals
     static void heightToNormals(int32_t w, int32_t h,
                                 float4* srcPixelsFloat_,
                                 Color* srcPixels_,
                                 float scale, bool isWrap = false);
+
 private:
     // this walks pixels for hasColor and hasAlpha if not already set to false
     void updateImageTraitsHDR(int32_t w, int32_t h,
@@ -127,7 +128,7 @@ class ImageInfo {
     // ktx2 has a compression type and level
     KTX2Compressor compressor;
     bool isKTX2 = false;
-    
+
     // output image state
     // Note: difference between input srgb and output srgb, but it's mingled
     // here a bit
@@ -168,13 +169,13 @@ class ImageInfo {
     bool isHeight = false;
     bool isWrap = false;
     float heightScale = 1.0f;
-    
+
     int32_t quality = 49;
 
     int32_t mipMinSize = 1;
     int32_t mipMaxSize = 32 * 1024;
-    int32_t mipSkip = 0; // count of large mips to skip
-    
+    int32_t mipSkip = 0;  // count of large mips to skip
+
     int32_t chunksX = 0;
     int32_t chunksY = 0;
     int32_t chunksCount = 0;
diff --git a/libkram/kram/KramLib.h b/libkram/kram/KramLib.h
index daee5799..8d4df038 100644
--- a/libkram/kram/KramLib.h
+++ b/libkram/kram/KramLib.h
@@ -10,17 +10,14 @@
 #include "KramConfig.h"
 
 // helpers
-#include "KramFileHelper.h"
-#include "KramMmapHelper.h"
-#include "KramZipHelper.h"
-#include "KramTimer.h"
-#include "KramLog.h"
-
+#include "KTXImage.h"
 #include "Kram.h"
-
+#include "KramFileHelper.h"
 #include "KramImage.h"
 #include "KramImageInfo.h"
-#include "KTXImage.h"
+#include "KramLog.h"
 #include "KramMipper.h"
+#include "KramMmapHelper.h"
 #include "KramSDFMipper.h"
-
+#include "KramTimer.h"
+#include "KramZipHelper.h"
diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp
index b60c2821..95d72ac3 100644
--- a/libkram/kram/KramLog.cpp
+++ b/libkram/kram/KramLog.cpp
@@ -11,7 +11,6 @@
 
 #include <mutex>
 
-
 namespace kram {
 
 using mymutex = std::mutex;
@@ -22,7 +21,8 @@ using namespace NAMESPACE_STL;
 static mymutex gLogLock;
 static string gErrorLogCaptureText;
 static bool gIsErrorLogCapture = false;
-void setErrorLogCapture(bool enable) {
+void setErrorLogCapture(bool enable)
+{
     gIsErrorLogCapture = enable;
     if (enable) {
         lock_t lock(gLogLock);
@@ -31,9 +31,9 @@ void setErrorLogCapture(bool enable) {
 }
 bool isErrorLogCapture() { return gIsErrorLogCapture; }
 
-
 // return the text
-void getErrorLogCaptureText(string& text) {
+void getErrorLogCaptureText(string& text)
+{
     if (gIsErrorLogCapture) {
         lock_t lock(gLogLock);
         text = gErrorLogCaptureText;
@@ -43,7 +43,6 @@ void getErrorLogCaptureText(string& text) {
     }
 }
 
-
 // TODO: install assert handler to intercept, and also add a verify (assert that leaves source in)
 //void __assert(const char *expression, const char *file, int32_t line) {
 //
@@ -52,8 +51,7 @@ void getErrorLogCaptureText(string& text) {
 // Note: careful with stdio sscanf.  In clang, this does and initial strlen which for long buffers
 // being parsed (f.e. mmapped Json) this can significantly slow a parser down.
 
-
-int32_t append_vsprintf(string& str, const char* format, va_list args) 
+int32_t append_vsprintf(string& str, const char* format, va_list args)
 {
     // for KLOGE("group", "%s", "text")
     if (strcmp(format, "%s") == 0) {
@@ -61,7 +59,7 @@ int32_t append_vsprintf(string& str, const char* format, va_list args)
         str += firstArg;
         return strlen(firstArg);
     }
-    
+
     // This is important for the case where ##VAR_ARGS only leaves the format.
     // In this case "text" must be a compile time constant string to avoid security warning needed for above.
     // for KLOGE("group", "text")
@@ -75,20 +73,19 @@ int32_t append_vsprintf(string& str, const char* format, va_list args)
     va_copy(argsCopy, args);
     int32_t len = vsnprintf(NULL, 0, format, argsCopy);
     va_end(argsCopy);
-    
+
     if (len > 0) {
         size_t existingLen = str.length();
-        
+
         // resize and format again into string
         str.resize(existingLen + len, 0);
 
         vsnprintf((char*)str.data() + existingLen, len + 1, format, args);
     }
-    
+
     return len;
 }
 
-
 static int32_t vsprintf(string& str, const char* format, va_list args)
 {
     str.clear();
@@ -115,7 +112,6 @@ int32_t append_sprintf(string& str, const char* format, ...)
     return len;
 }
 
-
 bool startsWith(const char* str, const string& substring)
 {
     return strncmp(str, substring.c_str(), substring.size()) == 0;
@@ -127,7 +123,7 @@ bool endsWith(const string& value, const string& ending)
     if (ending.size() > value.size()) {
         return false;
     }
-    
+
     // reverse comparison at end of value
     return equal(ending.rbegin(), ending.rend(), value.rbegin());
 }
@@ -143,14 +139,14 @@ bool endsWithExtension(const char* str, const string& substring)
 }
 
 extern int32_t logMessage(const char* group, int32_t logLevel,
-                      const char* file, int32_t line, const char* func,
-                      const char* fmt, ...)
+                          const char* file, int32_t line, const char* func,
+                          const char* fmt, ...)
 {
     // TOOD: add any filtering up here
 
     // convert var ags to a msg
     const char* msg;
-    
+
     string str;
     if (strrchr(fmt, '%') == nullptr) {
         msg = fmt;
@@ -163,7 +159,7 @@ extern int32_t logMessage(const char* group, int32_t logLevel,
 
         msg = str.c_str();
     }
-   
+
     // pipe to correct place, could even be file output
     FILE* fp = stdout;
     if (logLevel >= LogLevelWarning)
@@ -241,7 +237,7 @@ extern int32_t logMessage(const char* group, int32_t logLevel,
             gErrorLogCaptureText += "\n";
         }
     }
-    
+
     fprintf(fp, "%s%s%s%s%s%s", tag, groupString, space, msg, needsNewline ? "\n" : "", fileLineFunc.c_str());
 
     return 0;  // reserved for later
diff --git a/libkram/kram/KramLog.h b/libkram/kram/KramLog.h
index cbb28020..afda332e 100644
--- a/libkram/kram/KramLog.h
+++ b/libkram/kram/KramLog.h
@@ -21,15 +21,15 @@ enum LogLevel {
 // these validate the inputs to any sprintf like format + args
 // these come from sys/cdefs.h on Apple, but need to be define for __clang__ on other platforms
 #ifndef __printflike
-    #define __printflike(fmtIndex, varargIndex)
+#define __printflike(fmtIndex, varargIndex)
 #endif
 #ifndef __scanflike
-    #define __scanflike(fmtIndex, varargIndex)
+#define __scanflike(fmtIndex, varargIndex)
 #endif
 
 extern int32_t logMessage(const char* group, int32_t logLevel,
-                      const char* file, int32_t line, const char* func,
-                      const char* fmt, ...) __printflike(6, 7);
+                          const char* file, int32_t line, const char* func,
+                          const char* fmt, ...) __printflike(6, 7);
 
 // verify leaves conditional code in the build
 #if KRAM_DEBUG
@@ -72,5 +72,4 @@ bool endsWithExtension(const char* str, const string& substring);
 // https://stackoverflow.com/questions/874134/find-out-if-string-ends-with-another-string-in-c
 bool endsWith(const string& value, const string& ending);
 
-
 }  // namespace kram
diff --git a/libkram/kram/KramMipper.cpp b/libkram/kram/KramMipper.cpp
index ee897260..5ccb2423 100644
--- a/libkram/kram/KramMipper.cpp
+++ b/libkram/kram/KramMipper.cpp
@@ -7,7 +7,7 @@
 //#include <algorithm>
 #include <cassert>
 
-#include "KTXImage.h" // for mipDown
+#include "KTXImage.h"  // for mipDown
 
 namespace kram {
 
@@ -36,7 +36,7 @@ int32_t nextPow2(int32_t num)
 
 inline uint8_t floatToUint8(float value)
 {
-    return (uint8_t)roundf(value * 255.0f); // or use 255.1f ?
+    return (uint8_t)roundf(value * 255.0f);  // or use 255.1f ?
 }
 
 inline Color Unormfloat4ToColor(float4 value)
@@ -53,7 +53,7 @@ inline Color Unormfloat4ToColor(float4 value)
 inline Color Snormfloat4ToColor(float4 value)
 {
     Color c;
-    value = round(127.0f * value) + float4(128.0f); 
+    value = round(127.0f * value) + float4(128.0f);
     c.r = (uint8_t)value.x;
     c.g = (uint8_t)value.y;
     c.b = (uint8_t)value.z;
@@ -75,15 +75,15 @@ inline float srgbToLinearFunc(float s)
                                      : powf((s + 0.055f) / 1.055f, 2.4f);
 }
 
-float4 linearToSRGB(float4 lin) {
+float4 linearToSRGB(float4 lin)
+{
     lin = saturate(lin);
-    
+
     return float4m(
         linearToSRGBFunc(lin.x),
         linearToSRGBFunc(lin.y),
         linearToSRGBFunc(lin.z),
-        lin.w
-    );
+        lin.w);
 }
 
 //
@@ -169,10 +169,10 @@ void Mipper::initTables()
 void Mipper::initPixelsHalfIfNeeded(ImageData& srcImage, bool doPremultiply, bool doPrezero,
                                     vector<half4>& halfImage) const
 {
-    Color zeroColor = { 0, 0, 0, 0 };
-    float4 zeroColorf = float4m(0.0, 0.0f, 0.0f, 0.f); // need a constant for this
+    Color zeroColor = {0, 0, 0, 0};
+    float4 zeroColorf = float4m(0.0, 0.0f, 0.0f, 0.f);  // need a constant for this
     half4 zeroColorh = toHalf4(zeroColorf);
-    
+
     int32_t w = srcImage.width;
     int32_t h = srcImage.height;
 
@@ -183,7 +183,6 @@ void Mipper::initPixelsHalfIfNeeded(ImageData& srcImage, bool doPremultiply, boo
         assert(false);
     }
     else if (srcImage.isSRGB) {
-        
         // this does srgb and premul conversion
         for (int32_t y = 0; y < h; y++) {
             int32_t y0 = y * w;
@@ -239,7 +238,7 @@ void Mipper::initPixelsHalfIfNeeded(ImageData& srcImage, bool doPremultiply, boo
             int32_t y0 = y * w;
             for (int32_t x = 0; x < w; x++) {
                 Color& c0 = srcImage.pixels[y0 + x];
-                
+
                 // TODO: assumes 16, need 32f path too
                 if (c0.a == 0) {
                     c0 = zeroColor;
@@ -247,7 +246,7 @@ void Mipper::initPixelsHalfIfNeeded(ImageData& srcImage, bool doPremultiply, boo
                 }
                 else {
                     float4 cFloat = {alphaToFloat[c0.r], alphaToFloat[c0.g],
-                                    alphaToFloat[c0.b], alphaToFloat[c0.a]};
+                                     alphaToFloat[c0.b], alphaToFloat[c0.a]};
                     halfImage[y0 + x] = toHalf4(cFloat);
                 }
             }
@@ -333,7 +332,7 @@ void Mipper::mipmap(const ImageData& srcImage, ImageData& dstImage) const
     dstImage.width = srcImage.width;
     dstImage.height = srcImage.height;
     dstImage.depth = srcImage.depth;
-    
+
     mipDown(dstImage.width, dstImage.height, dstImage.depth);
 
     // this assumes that we can read mip-1 from srcImage
@@ -344,7 +343,7 @@ void Mipper::mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) cons
 {
     int32_t width = srcImage.width;
     int32_t height = srcImage.height;
-    
+
     // this can receive premul, srgb data
     // the mip chain is linear data only
     Color* cDstColor = dstImage.pixels;
@@ -355,9 +354,9 @@ void Mipper::mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) cons
 
     half4* cDstHalf = dstImage.pixelsHalf;
     const half4* srcHalf = srcImage.pixelsHalf;
-    
+
     // Note the ptrs above may point to same memory
-    
+
     int32_t dstIndex = 0;
 
     bool isOddX = width & 1;
@@ -366,79 +365,78 @@ void Mipper::mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) cons
     // advance always by 2, but sample from neighbors
     int32_t mipWidth = std::max(1, width / 2);
     int32_t mipHeight = std::max(1, height / 2);
-    
-    float invWidth = 1.0f/width;
-    float invHeight = 1.0f/height;
-    
+
+    float invWidth = 1.0f / width;
+    float invHeight = 1.0f / height;
+
     for (int32_t y = isOddY ? 1 : 0; y < height; y += 2) {
         int32_t ym = y - 1;
         int32_t y0 = y;
         int32_t y1 = y + 1;
 
         // weights
-        int32_t mipY = y/2;
+        int32_t mipY = y / 2;
         float ymw = (mipHeight - mipY - 1) * invHeight;
         float y0w = mipHeight * invHeight;
         float y1w = mipY * invHeight;
-        
+
         if (!isOddY) {
-            ym = y; // weight is 0
-            
+            ym = y;  // weight is 0
+
             ymw = 0.0f;
             y0w = 0.5f;
             y1w = 0.5f;
         }
-        
+
         // normalize weights
         float totalY = ymw + y0w + y1w;
         ymw /= totalY;
         y0w /= totalY;
         y1w /= totalY;
-//
+        //
         ym *= width;
         y0 *= width;
         y1 *= width;
 
         for (int32_t x = isOddX ? 1 : 0; x < width; x += 2) {
- 
             int32_t xm = x - 1;
             int32_t x0 = x;
             int32_t x1 = x + 1;
- 
+
             // weights
-            int32_t mipX = x/2;
+            int32_t mipX = x / 2;
             float xmw = (mipWidth - mipX - 1) * invWidth;
             float x0w = mipWidth * invWidth;
             float x1w = mipX * invWidth;
-            
+
             if (!isOddX) {
-                xm = x; // weight is 0
-                
+                xm = x;  // weight is 0
+
                 xmw = 0.0f;
                 x0w = 0.5f;
                 x1w = 0.5f;
             }
-            
+
             // this mipgen is pulling down alpha of 255 to 241 and smaller over the course of the whole mip chain
             float totalX = xmw + x0w + x1w;
             xmw /= totalX;
             x0w /= totalX;
             x1w /= totalX;
-            
+
             // we have 3x2, 2x3 or 3x3 pattern to weight
             // now lookup the 9 values from the buffer
-            
+
             float4 c[9];
-            
+
             if (srcHalf) {
                 c[0] = toFloat4(srcHalf[ym + xm]);
                 c[1] = toFloat4(srcHalf[ym + x0]);
                 c[2] = toFloat4(srcHalf[ym + x1]);
-                
+
                 c[3] = toFloat4(srcHalf[y0 + xm]);
                 c[4] = toFloat4(srcHalf[y0 + x0]);
                 c[5] = toFloat4(srcHalf[y0 + x1]);
-                
+
                 c[6] = toFloat4(srcHalf[y1 + xm]);
                 c[7] = toFloat4(srcHalf[y1 + x0]);
                 c[8] = toFloat4(srcHalf[y1 + x1]);
@@ -447,11 +445,11 @@ void Mipper::mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) cons
                 c[0] = srcFloat[ym + xm];
                 c[1] = srcFloat[ym + x0];
                 c[2] = srcFloat[ym + x1];
-                
+
                 c[3] = srcFloat[y0 + xm];
                 c[4] = srcFloat[y0 + x0];
                 c[5] = srcFloat[y0 + x1];
-                
+
                 c[6] = srcFloat[y1 + xm];
                 c[7] = srcFloat[y1 + x0];
                 c[8] = srcFloat[y1 + x1];
@@ -460,37 +458,36 @@ void Mipper::mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) cons
                 c[0] = ColorToUnormFloat4(srcColor[ym + xm]);
                 c[1] = ColorToUnormFloat4(srcColor[ym + x0]);
                 c[2] = ColorToUnormFloat4(srcColor[ym + x1]);
-                
+
                 c[3] = ColorToUnormFloat4(srcColor[y0 + xm]);
                 c[4] = ColorToUnormFloat4(srcColor[y0 + x0]);
                 c[5] = ColorToUnormFloat4(srcColor[y0 + x1]);
-                
+
                 c[6] = ColorToUnormFloat4(srcColor[y1 + xm]);
                 c[7] = ColorToUnormFloat4(srcColor[y1 + x0]);
                 c[8] = ColorToUnormFloat4(srcColor[y1 + x1]);
             }
-                
+
             // apply weights to columns/rows
             for (int32_t i = 0; i < 3; i++) {
-                c[3*i+0] *= xmw;
-                c[3*i+1] *= x0w;
-                c[3*i+2] *= x1w;
+                c[3 * i + 0] *= xmw;
+                c[3 * i + 1] *= x0w;
+                c[3 * i + 2] *= x1w;
             }
-             
+
             for (int32_t i = 0; i < 3; i++) {
-                c[0+i] *= ymw;
-                c[3+i] *= y0w;
-                c[6+i] *= y1w;
+                c[0 + i] *= ymw;
+                c[3 + i] *= y0w;
+                c[6 + i] *= y1w;
             }
-                
+
             // add them all up
             float4 cFloat = c[0];
             for (int32_t i = 1; i < 9; ++i) {
                 cFloat += c[i];
             }
-                
-            if (srcHalf) {
 
+            if (srcHalf) {
                 // overwrite float4 image
                 cDstHalf[dstIndex] = toHalf4(cFloat);
 
@@ -510,7 +507,6 @@ void Mipper::mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) cons
                 }
             }
             else if (srcFloat) {
-
                 // overwrite float4 image
                 cDstFloat[dstIndex] = cFloat;
 
@@ -530,7 +526,6 @@ void Mipper::mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) cons
                 }
             }
             else {
-
                 // can overwrite memory on linear image, some precision loss, but fast
                 Color color = Unormfloat4ToColor(cFloat);
                 cDstColor[dstIndex] = color;
@@ -541,7 +536,6 @@ void Mipper::mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) cons
     }
 }
 
-    
 void Mipper::mipmapLevel(const ImageData& srcImage, ImageData& dstImage) const
 {
     int32_t width = srcImage.width;
@@ -549,14 +543,14 @@ void Mipper::mipmapLevel(const ImageData& srcImage, ImageData& dstImage) const
 
     bool isOddX = width & 1;
     bool isOddY = height & 1;
-    
+
     if (isOddX || isOddY) {
         mipmapLevelOdd(srcImage, dstImage);
         return;
     }
-    
+
     // fast path for 2x2 downsample below, can do in 4 taps
-    
+
     // this can receive premul, srgb data
     // the mip chain is linear data only
     Color* cDstColor = dstImage.pixels;
@@ -567,9 +561,9 @@ void Mipper::mipmapLevel(const ImageData& srcImage, ImageData& dstImage) const
 
     half4* cDstHalf = dstImage.pixelsHalf;
     const half4* srcHalf = srcImage.pixelsHalf;
-    
+
     // Note the ptrs above may point to same memory
-    
+
     int32_t dstIndex = 0;
 
     for (int32_t y = 0; y < height; y += 2) {
diff --git a/libkram/kram/KramMipper.h b/libkram/kram/KramMipper.h
index b8610f15..f3ebf43d 100644
--- a/libkram/kram/KramMipper.h
+++ b/libkram/kram/KramMipper.h
@@ -55,7 +55,7 @@ class ImageData {
     int32_t width = 0;
     int32_t height = 0;
     int32_t depth = 0;
-    
+
     bool isSRGB = false;
     bool isHDR = false;  // only updates pixelsFloat
 };
@@ -77,17 +77,17 @@ class Mipper {
     // these use table lookups, so need to be class members
     float toLinear(uint8_t srgb) const { return srgbToLinear[srgb]; }
     float toAlphaFloat(uint8_t alpha) const { return alphaToFloat[alpha]; }
-    
-    float4 toLinear(const Color& c) const { return float4m(toLinear(c.r), toLinear(c.g), toLinear(c.b), toAlphaFloat(c.a)); }
-    
+
+    float4 toLinear(const Color &c) const { return float4m(toLinear(c.r), toLinear(c.g), toLinear(c.b), toAlphaFloat(c.a)); }
+
     uint8_t toPremul(uint8_t channelIntensity, uint8_t alpha) const { return ((uint32_t)channelIntensity * (uint32_t)alpha) / 255; }
-    
+
 private:
     void initTables();
 
     void mipmapLevel(const ImageData &srcImage, ImageData &dstImage) const;
-    
-    void mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) const;
+
+    void mipmapLevelOdd(const ImageData &srcImage, ImageData &dstImage) const;
 };
 
 }  // namespace kram
diff --git a/libkram/kram/KramMmapHelper.cpp b/libkram/kram/KramMmapHelper.cpp
index 11a5874f..29a4bf5b 100644
--- a/libkram/kram/KramMmapHelper.cpp
+++ b/libkram/kram/KramMmapHelper.cpp
@@ -18,10 +18,11 @@
 #endif
 
 MmapHelper::MmapHelper() {}
-MmapHelper::MmapHelper(MmapHelper&& rhs) {
+MmapHelper::MmapHelper(MmapHelper &&rhs)
+{
     addr = rhs.addr;
     length = rhs.length;
-    
+
     // prevent close after move
     rhs.addr = nullptr;
     rhs.length = 0;
diff --git a/libkram/kram/KramMmapHelper.h b/libkram/kram/KramMmapHelper.h
index 3510f9b6..131cd4f6 100644
--- a/libkram/kram/KramMmapHelper.h
+++ b/libkram/kram/KramMmapHelper.h
@@ -13,7 +13,7 @@
 class MmapHelper {
 public:
     MmapHelper();
-    MmapHelper(MmapHelper&& rhs);
+    MmapHelper(MmapHelper &&rhs);
     ~MmapHelper();
 
     bool open(const char *filename);
@@ -21,7 +21,7 @@ class MmapHelper {
 
     const uint8_t *data() { return addr; }
     size_t dataLength() { return length; }
-    
+
 private:
     const uint8_t *addr = nullptr;
     size_t length = 0;
diff --git a/libkram/kram/KramSDFMipper.cpp b/libkram/kram/KramSDFMipper.cpp
index df8d21eb..27848bba 100644
--- a/libkram/kram/KramSDFMipper.cpp
+++ b/libkram/kram/KramSDFMipper.cpp
@@ -6,8 +6,8 @@
 
 //#include <algorithm>
 
+#include "KTXImage.h"  // for mipDown
 #include "KramMipper.h"
-#include "KTXImage.h" // for mipDown
 #include "hedistance.h"
 
 namespace kram {
@@ -58,9 +58,9 @@ void SDFMipper::mipmap(ImageData& dstImage, int32_t mipLevel)
     int32_t w = srcBitmapImage.width;
     int32_t h = srcBitmapImage.height;
     int32_t d = 1;
-    
+
     mipDown(w, h, d, mipLevel);
-    
+
     dstImage.width = w;
     dstImage.height = h;
 
diff --git a/libkram/kram/KramTimer.h b/libkram/kram/KramTimer.h
index c28a3196..21ce9dad 100644
--- a/libkram/kram/KramTimer.h
+++ b/libkram/kram/KramTimer.h
@@ -4,10 +4,10 @@
 
 #pragma once
 
-#include "KramConfig.h"
-
 #include <cassert>
 
+#include "KramConfig.h"
+
 namespace kram {
 // Can obtain a timestamp to nanosecond accuracy.
 extern double currentTimestamp();
diff --git a/libkram/kram/KramZipHelper.cpp b/libkram/kram/KramZipHelper.cpp
index afa46702..47987643 100644
--- a/libkram/kram/KramZipHelper.cpp
+++ b/libkram/kram/KramZipHelper.cpp
@@ -18,41 +18,43 @@
 namespace kram {
 using namespace NAMESPACE_STL;
 
-ZipHelper::ZipHelper() {
-    
+ZipHelper::ZipHelper()
+{
 }
 
-ZipHelper::~ZipHelper() {
+ZipHelper::~ZipHelper()
+{
     close();
 }
-  
-bool ZipHelper::openForRead(const uint8_t* zipData_, uint64_t zipDataSize) { // const char* filename) {
-//    mmap = std::make_unique<MmapHelper>();
-//    if (!mmap->openForRead(filename)) {
-//        close();
-//        return false;
-//    }
-    
+
+bool ZipHelper::openForRead(const uint8_t* zipData_, uint64_t zipDataSize)
+{   // const char* filename) {
+    //    mmap = std::make_unique<MmapHelper>();
+    //    if (!mmap->openForRead(filename)) {
+    //        close();
+    //        return false;
+    //    }
+
     zipData = zipData_;
-    
+
     zip = make_unique<mz_zip_archive>();
     mz_zip_zero_struct(zip.get());
-    
+
     mz_uint flags = 0;
     mz_bool success = mz_zip_reader_init_mem(zip.get(), zipData, zipDataSize, flags);
     if (!success) {
         close();
         return false;
     }
-    
+
     initZipEntryTables();
     return true;
 }
 
-void ZipHelper::filterExtensions(const vector<string>& extensions) {
-    
+void ZipHelper::filterExtensions(const vector<string>& extensions)
+{
     vector<ZipEntry> zipEntrysFiltered;
-    
+
     std::copy_if(_zipEntrys.begin(), _zipEntrys.end(), std::back_inserter(zipEntrysFiltered), [&extensions](const auto& zipEntry) {
         for (const auto& ext : extensions) {
             if (endsWithExtension(zipEntry.filename, ext)) {
@@ -61,34 +63,35 @@ void ZipHelper::filterExtensions(const vector<string>& extensions) {
         }
         return false;
     });
-    
+
     _zipEntrys = zipEntrysFiltered;
 }
 
-void ZipHelper::close() {
+void ZipHelper::close()
+{
     if (zip != nullptr) {
         mz_zip_end(zip.get());
         zip.reset();
     }
-    
-//    if (mmap != nullptr) {
-//        mmap->close();
-//        mmap.reset();
-//    }
-}
 
+    //    if (mmap != nullptr) {
+    //        mmap->close();
+    //        mmap.reset();
+    //    }
+}
 
-void ZipHelper::initZipEntryTables() {
+void ZipHelper::initZipEntryTables()
+{
     int32_t numFiles = mz_zip_reader_get_num_files(zip.get());
-    
+
     // allocate array to hold all filenames in one block of memory
     uint64_t totalFilenameSizes = 0;
     for (int32_t i = 0; i < numFiles; ++i) {
         totalFilenameSizes += mz_zip_reader_get_filename(zip.get(), i, nullptr, 0);
     }
-        
+
     const uint32_t* remappedIndices = mz_zip_reader_sorted_file_indices(zip.get());
-    
+
     allFilenames.resize(totalFilenameSizes);
 
     // allocate an array with the data from the archive that we care about
@@ -96,86 +99,89 @@ void ZipHelper::initZipEntryTables() {
 
     int32_t index = 0;
     uint64_t length = 0;
-    
+
     for (int32_t i = 0; i < numFiles; ++i) {
         uint32_t sortedFileIndex = remappedIndices[i];
-        
+
         // file_stat does quite a bit of work, but only want a few fields out of it
         mz_zip_archive_file_stat stat;
         mz_zip_reader_file_stat(zip.get(), sortedFileIndex, &stat);
         if (stat.m_is_directory || !stat.m_is_supported) {
             continue;
         }
-        
+
         // skipping directories and unsupported items
         // also the ordering here is in filename not fileIndex order
-        
+
         // copy all filenames into fixed storage that's all
         // contguous, so that can alis the strings for lookup
         uint64_t filenameLength = std::min((uint64_t)512, (uint64_t)strlen(stat.m_filename) + 1);
-        char *filename = &allFilenames[length];
+        char* filename = &allFilenames[length];
         strncpy(filename, stat.m_filename, filenameLength);
         length += filenameLength;
-                                 
-        ZipEntry &zipEntry = _zipEntrys[index];
+
+        ZipEntry& zipEntry = _zipEntrys[index];
         zipEntry.fileIndex = stat.m_file_index;
-        zipEntry.filename = filename; // can alias
+        zipEntry.filename = filename;  // can alias
         zipEntry.uncompressedSize = stat.m_uncomp_size;
         zipEntry.compressedSize = stat.m_comp_size;
-        zipEntry.modificationDate = (int32_t)stat.m_time; // really a time_t
-        
+        zipEntry.modificationDate = (int32_t)stat.m_time;  // really a time_t
+
         // TODO: stat.m_time, state.m_crc32
-        
+
         index++;
     }
-         
-    
+
     // resize, since entries and filenames were skipped
     // this should change the addresses used above
     allFilenames.resize(length);
     _zipEntrys.resize(index);
 }
 
-int32_t ZipHelper::zipEntryIndex(const char* name) const {
+int32_t ZipHelper::zipEntryIndex(const char* name) const
+{
     // central directory is sorted, so this does binary search on entries
     return mz_zip_reader_locate_file(zip.get(), name, "", 0);
 }
 
-const ZipEntry* ZipHelper::zipEntry(const char* name) const {
+const ZipEntry* ZipHelper::zipEntry(const char* name) const
+{
     int32_t index = zipEntryIndex(name);
     if (index < 0) {
         return nullptr;
     }
-    
+
     // have to find the zipEntry, have skipped and sorted entries by filename
     // the array build skips directories, so those can throw off the fileIndex
     // TODO: do a binary search here, and don't use mz_zip call?
-    
+
     int32_t numEntries = (int32_t)_zipEntrys.size();
     for (int32_t i = 0; i < numEntries; ++i) {
         if (_zipEntrys[i].fileIndex == index) {
             return &_zipEntrys[i];
         }
     }
-    
+
     return nullptr;
 }
 
-bool ZipHelper::extract(const char *filename, vector<uint8_t>& buffer) const {
+bool ZipHelper::extract(const char* filename, vector<uint8_t>& buffer) const
+{
     auto entry = zipEntry(filename);
     if (!entry) {
         return false;
     }
-    
+
     buffer.resize(entry->uncompressedSize);
     if (!extract(entry->fileIndex, buffer.data(), buffer.size())) {
         return false;
     }
-    
+
     return true;
 }
 
-bool ZipHelper::extractPartial(const char *filename, vector<uint8_t>& buffer) const {
+bool ZipHelper::extractPartial(const char* filename, vector<uint8_t>& buffer) const
+{
     if (buffer.size() == 0) {
         assert(false);
         return false;
@@ -185,13 +191,13 @@ bool ZipHelper::extractPartial(const char *filename, vector<uint8_t>& buffer) co
     if (!entry) {
         return false;
     }
-    
+
     if (buffer.size() > entry->uncompressedSize) {
         return false;
     }
-    
+
     bool success = false;
-      
+
     mz_zip_reader_extract_iter_state* iter = mz_zip_reader_extract_iter_new(zip.get(), entry->fileIndex, 0);
     uint64_t bytesRead = mz_zip_reader_extract_iter_read(iter, buffer.data(), buffer.size());
     if (bytesRead == buffer.size()) {
@@ -201,14 +207,14 @@ bool ZipHelper::extractPartial(const char *filename, vector<uint8_t>& buffer) co
     return success;
 }
 
-
-bool ZipHelper::extract(int32_t fileIndex, void* buffer, uint64_t bufferSize) const {
+bool ZipHelper::extract(int32_t fileIndex, void* buffer, uint64_t bufferSize) const
+{
     // TODO: here could use the compression lib with optimized deflate
-    
+
     // this pulls pages from mmap, no allocations
     mz_bool success = mz_zip_reader_extract_to_mem(
         zip.get(), fileIndex, buffer, bufferSize, 0);
-    
+
     /* TODO: alternative using optimized Apple library libCompression
      
        this can do partial compression, so don't check uncompressedSize always
@@ -222,36 +228,34 @@ bool ZipHelper::extract(int32_t fileIndex, void* buffer, uint64_t bufferSize) co
     {
     }
     */
-    
+
     return success;
 }
 
 // uncompressed content in the archive can be aliased directly by offset into the archive
-bool ZipHelper::extractRaw(const char *filename, const uint8_t** bufferData, uint64_t& bufferDataSize) const
+bool ZipHelper::extractRaw(const char* filename, const uint8_t** bufferData, uint64_t& bufferDataSize) const
 {
     auto entry = zipEntry(filename);
     if (!entry) {
         return false;
     }
-    
+
     mz_zip_archive_file_stat stat;
     mz_zip_reader_file_stat(zip.get(), entry->fileIndex, &stat);
     if (stat.m_is_directory || !stat.m_is_supported) {
         return false;
     }
-    
+
     // this should really be cached with zipEntry data
     const uint8_t* data = mz_zip_reader_get_raw_data(zip.get(), entry->fileIndex);
     if (!data) {
         return false;
     }
-    
+
     *bufferData = data;
     bufferDataSize = stat.m_uncomp_size;
-    
+
     return true;
 }
 
-
-
-}
+}  // namespace kram
diff --git a/libkram/kram/KramZipHelper.h b/libkram/kram/KramZipHelper.h
index e7e98957..a3410ea2 100644
--- a/libkram/kram/KramZipHelper.h
+++ b/libkram/kram/KramZipHelper.h
@@ -17,9 +17,9 @@ namespace kram {
 using namespace NAMESPACE_STL;
 
 struct ZipEntry {
-    const char *filename; // max 512, aliased
+    const char* filename;  // max 512, aliased
     int32_t fileIndex;
-    
+
     // attributes
     uint64_t uncompressedSize;
     uint64_t compressedSize;
@@ -31,47 +31,47 @@ struct ZipEntry {
 struct ZipHelper {
     ZipHelper();
     ~ZipHelper();
-    
+
     bool openForRead(const uint8_t* zipData, uint64_t zipDataSize);
     void close();
-    
+
     // Only keep entries that match the extensions provided
     void filterExtensions(const vector<string>& extensions);
 
     // buffer is resized if smaller, can use to lookat headers (f.e. ktx or mod)
     // the zip decodes only the length of the buffer passed in, and this should be small
     // since an iterator is called once to extract data
-    bool extractPartial(const char *filename, vector<uint8_t>& buffer) const;
-    
+    bool extractPartial(const char* filename, vector<uint8_t>& buffer) const;
+
     // must read the entire contents
-    bool extract(const char *filename, vector<uint8_t>& buffer) const;
-    
+    bool extract(const char* filename, vector<uint8_t>& buffer) const;
+
     // uncompressed content in the archive like ktx2 files can be aliased directly
     // while referencing this data, don't close mmap() since bufferData is offset into that
-    bool extractRaw(const char *filename, const uint8_t** bufferData, uint64_t& bufferDataSize) const;
-    
+    bool extractRaw(const char* filename, const uint8_t** bufferData, uint64_t& bufferDataSize) const;
+
     const vector<ZipEntry>& zipEntrys() const { return _zipEntrys; }
-    
+
     const ZipEntry* zipEntry(const char* name) const;
-    
+
 private:
     bool extract(int32_t fileIndex, void* buffer, uint64_t bufferSize) const;
-    
+
     void initZipEntryTables();
 
     // returns -1 if file not found, does binary search off sorted names
     // to find fileIndex, then lookups the array index from that
     int32_t zipEntryIndex(const char* name) const;
-    
+
 private:
     unique_ptr<mz_zip_archive> zip;
     vector<ZipEntry> _zipEntrys;
-    
-    const uint8_t* zipData; // aliased
-    
+
+    const uint8_t* zipData;  // aliased
+
     // DONE: eliminated this
     // unique_ptr<MmapHelper> mmap;
-    
+
     vector<char> allFilenames;
 };
-}
+}  // namespace kram
diff --git a/scripts/pre-commit b/scripts/pre-commit
index b1c5a3c6..7ec4a865 100755
--- a/scripts/pre-commit
+++ b/scripts/pre-commit
@@ -12,7 +12,7 @@ format_file() {
   fi
 }
 
-for file in `git diff-index --cached --name-only HEAD | grep -iE '\.(cpp|h)$' ` ; do
+for file in `git diff-index --cached --name-only HEAD | grep -iE '\.(cpp|h|mm)$' ` ; do
   format_file "${file}"
 done
 

From f52e02e80b20a60358dc68a153715db1fa175926 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 22 Aug 2021 12:50:57 -0700
Subject: [PATCH 167/901] Pre-commit - limit to Kram files

This will ignore KTX files.
---
 scripts/pre-commit | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/pre-commit b/scripts/pre-commit
index 7ec4a865..606d7428 100755
--- a/scripts/pre-commit
+++ b/scripts/pre-commit
@@ -12,7 +12,7 @@ format_file() {
   fi
 }
 
-for file in `git diff-index --cached --name-only HEAD | grep -iE '\.(cpp|h|mm)$' ` ; do
+for file in `git diff-index --cached --name-only HEAD | grep -iE '\Kram*.(cpp|h|mm)$' ` ; do
   format_file "${file}"
 done
 

From 9ae35741893cc64ad57a5f78927e75f9d3f591d6 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 22 Aug 2021 13:17:03 -0700
Subject: [PATCH 168/901] Scripts - missing _clang-format file

Using this filename, since it's visible unlike .clang-format
Add .m and .mm files to pre-commit hook
Remove Language spec from _clang-format so it applies to C++ and Objective-C/C++
---
 _clang-format                  |  156 ++
 kramv/KramLoader.h             |    4 +-
 kramv/KramLoader.mm            |  864 +++---
 kramv/KramRenderer.h           |    2 +-
 kramv/KramRenderer.mm          | 3255 +++++++++++-----------
 kramv/KramShaders.h            |  188 +-
 kramv/KramViewerBase.cpp       |  536 ++--
 kramv/KramViewerBase.h         |  303 +--
 kramv/KramViewerMain.mm        | 4594 ++++++++++++++++----------------
 libkram/kram/KramZipHelper.cpp |    2 +-
 scripts/pre-commit             |    2 +-
 11 files changed, 5104 insertions(+), 4802 deletions(-)
 create mode 100644 _clang-format

diff --git a/_clang-format b/_clang-format
new file mode 100644
index 00000000..d1af2d18
--- /dev/null
+++ b/_clang-format
@@ -0,0 +1,156 @@
+---
+# Language:        Cpp
+# BasedOnStyle:  Google
+AccessModifierOffset: -4
+AlignAfterOpenBracket: Align
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+AlignEscapedNewlines: Left
+AlignOperands:   true
+AlignTrailingComments: true
+AllowAllArgumentsOnNextLine: true
+AllowAllConstructorInitializersOnNextLine: true
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: All
+AllowShortLambdasOnASingleLine: All
+AllowShortIfStatementsOnASingleLine: WithoutElse
+AllowShortLoopsOnASingleLine: true
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: true
+AlwaysBreakTemplateDeclarations: Yes
+BinPackArguments: true
+BinPackParameters: true
+BraceWrapping:   
+  AfterCaseLabel:  false
+  AfterClass:      false
+  AfterControlStatement: false
+  AfterEnum:       false
+  AfterFunction:   false
+  AfterNamespace:  false
+  AfterObjCDeclaration: false
+  AfterStruct:     false
+  AfterUnion:      false
+  AfterExternBlock: false
+  BeforeCatch:     false
+  BeforeElse:      false
+  IndentBraces:    false
+  SplitEmptyFunction: true
+  SplitEmptyRecord: true
+  SplitEmptyNamespace: true
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Stroustrup
+BreakBeforeInheritanceComma: false
+BreakInheritanceList: BeforeColon
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializersBeforeComma: false
+BreakConstructorInitializers: BeforeColon
+BreakAfterJavaFieldAnnotations: false
+BreakStringLiterals: true
+ColumnLimit:     0
+CommentPragmas:  '^ IWYU pragma:'
+CompactNamespaces: false
+ConstructorInitializerAllOnOneLineOrOnePerLine: true
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: true
+DerivePointerAlignment: true
+DisableFormat:   false
+ExperimentalAutoDetectBinPacking: false
+FixNamespaceComments: true
+ForEachMacros:   
+  - foreach
+  - Q_FOREACH
+  - BOOST_FOREACH
+IncludeBlocks:   Regroup
+IncludeCategories: 
+  - Regex:           '^<ext/.*\.h>'
+    Priority:        2
+  - Regex:           '^<.*\.h>'
+    Priority:        1
+  - Regex:           '^<.*'
+    Priority:        2
+  - Regex:           '.*'
+    Priority:        3
+IncludeIsMainRegex: '([-_](test|unittest))?$'
+IndentCaseLabels: true
+IndentPPDirectives: None
+IndentWidth:     4
+IndentWrappedFunctionNames: false
+JavaScriptQuotes: Leave
+JavaScriptWrapImports: true
+KeepEmptyLinesAtTheStartOfBlocks: false
+MacroBlockBegin: ''
+MacroBlockEnd:   ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBinPackProtocolList: Never
+ObjCBlockIndentWidth: 4
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: true
+PenaltyBreakAssignment: 2
+PenaltyBreakBeforeFirstCallParameter: 1
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 120
+PenaltyBreakString: 1000
+PenaltyBreakTemplateDeclaration: 10
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 200
+PointerAlignment: Left
+RawStringFormats: 
+  - Language:        Cpp
+    Delimiters:      
+      - cc
+      - CC
+      - cpp
+      - Cpp
+      - CPP
+      - 'c++'
+      - 'C++'
+    CanonicalDelimiter: ''
+    BasedOnStyle:    google
+  - Language:        TextProto
+    Delimiters:      
+      - pb
+      - PB
+      - proto
+      - PROTO
+    EnclosingFunctions: 
+      - EqualsProto
+      - EquivToProto
+      - PARSE_PARTIAL_TEXT_PROTO
+      - PARSE_TEST_PROTO
+      - PARSE_TEXT_PROTO
+      - ParseTextOrDie
+      - ParseTextProtoOrDie
+    CanonicalDelimiter: ''
+    BasedOnStyle:    google
+ReflowComments:  true
+SortIncludes:    true
+SortUsingDeclarations: true
+SpaceAfterCStyleCast: false
+SpaceAfterLogicalNot: false
+SpaceAfterTemplateKeyword: true
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeCpp11BracedList: false
+SpaceBeforeCtorInitializerColon: true
+SpaceBeforeInheritanceColon: true
+SpaceBeforeParens: ControlStatements
+SpaceBeforeRangeBasedForLoopColon: true
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 2
+SpacesInAngles:  false
+SpacesInContainerLiterals: true
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard:        Auto
+StatementMacros: 
+  - Q_UNUSED
+  - QT_REQUIRE_VERSION
+TabWidth:        4
+UseTab:          Never
+...
+
diff --git a/kramv/KramLoader.h b/kramv/KramLoader.h
index f1e90870..898ec1cf 100644
--- a/kramv/KramLoader.h
+++ b/kramv/KramLoader.h
@@ -2,10 +2,10 @@
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
-#include "KramLib.h"
-
 #import <Foundation/Foundation.h>
 
+#include "KramLib.h"
+
 // protocol requires imports
 #import <Metal/MTLBlitCommandEncoder.h>
 #import <Metal/MTLCommandBuffer.h>
diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index 999f2855..f7546941 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -21,56 +21,58 @@
 using namespace NAMESPACE_STL;
 using namespace simd;
 
-string kram::toLower(const string &text) {
-  return string(
-      [NSString stringWithUTF8String:text.c_str()].lowercaseString.UTF8String);
+string kram::toLower(const string &text)
+{
+    return string(
+        [NSString stringWithUTF8String:text.c_str()].lowercaseString.UTF8String);
 }
 
 // defer data need to blit staging MTLBuffer to MTLTexture at the start of
 // rendering
 struct KramBlit {
-  uint32_t w;
-  uint32_t h;
-  uint32_t chunkNum;
-  uint32_t mipLevelNumber;
+    uint32_t w;
+    uint32_t h;
+    uint32_t chunkNum;
+    uint32_t mipLevelNumber;
 
-  uint64_t mipStorageSize;
-  uint64_t mipOffset;
+    uint64_t mipStorageSize;
+    uint64_t mipOffset;
 
-  uint32_t textureIndex;
-  uint32_t bytesPerRow;
-  bool is3D;
+    uint32_t textureIndex;
+    uint32_t bytesPerRow;
+    bool is3D;
 };
 
 //-----------------------------------------------
 
 @implementation KramLoader {
-  // only one of these for now
-  id<MTLBuffer> _buffer;
-  uint8_t *_data;
-  uint32_t _bufferOffset;
-
-  vector<KramBlit> _blits;
-  NSMutableArray<id<MTLTexture>> *_blitTextures;
-  NSMutableArray<id<MTLTexture>> *_mipgenTextures;
+    // only one of these for now
+    id<MTLBuffer> _buffer;
+    uint8_t *_data;
+    uint32_t _bufferOffset;
+
+    vector<KramBlit> _blits;
+    NSMutableArray<id<MTLTexture>> *_blitTextures;
+    NSMutableArray<id<MTLTexture>> *_mipgenTextures;
 }
 
-- (instancetype)init {
-  self = [super init];
+- (instancetype)init
+{
+    self = [super init];
 
-  _blitTextures = [[NSMutableArray alloc] init];
-  _mipgenTextures = [[NSMutableArray alloc] init];
+    _blitTextures = [[NSMutableArray alloc] init];
+    _mipgenTextures = [[NSMutableArray alloc] init];
 
-  return self;
+    return self;
 }
 
 - (nullable id<MTLTexture>)loadTextureFromData:(nonnull NSData *)imageData
                                 originalFormat:
-                                    (nullable MTLPixelFormat *)originalFormat {
-
-  return [self loadTextureFromData:(const uint8_t *)imageData.bytes
-                   imageDataLength:(int32_t)imageData.length
-                    originalFormat:originalFormat];
+                                    (nullable MTLPixelFormat *)originalFormat
+{
+    return [self loadTextureFromData:(const uint8_t *)imageData.bytes
+                     imageDataLength:(int32_t)imageData.length
+                      originalFormat:originalFormat];
 }
 
 // for macOS/win Intel need to decode astc/etc
@@ -81,78 +83,85 @@ - (instancetype)init {
 
 // this means format isnt supported on platform, but can be decoded to rgba to
 // display
-bool isDecodeImageNeeded(MyMTLPixelFormat pixelFormat) {
-  bool needsDecode = false;
+bool isDecodeImageNeeded(MyMTLPixelFormat pixelFormat)
+{
+    bool needsDecode = false;
 
-  if (isETCFormat(pixelFormat)) {
-    needsDecode = true;
-  } else if (isASTCFormat(pixelFormat)) {
-    needsDecode = true;
-  }
+    if (isETCFormat(pixelFormat)) {
+        needsDecode = true;
+    }
+    else if (isASTCFormat(pixelFormat)) {
+        needsDecode = true;
+    }
 
-  return needsDecode;
+    return needsDecode;
 }
 
-bool decodeImage(const KTXImage &image, KTXImage &imageDecoded) {
-  KramDecoderParams decoderParams;
-  KramDecoder decoder;
+bool decodeImage(const KTXImage &image, KTXImage &imageDecoded)
+{
+    KramDecoderParams decoderParams;
+    KramDecoder decoder;
 
-  if (isETCFormat(image.pixelFormat)) {
-    if (!decoder.decode(image, imageDecoded, decoderParams)) {
-      return NO;
+    if (isETCFormat(image.pixelFormat)) {
+        if (!decoder.decode(image, imageDecoded, decoderParams)) {
+            return NO;
+        }
+    }
+    else if (isASTCFormat(image.pixelFormat)) {
+        if (!decoder.decode(image, imageDecoded, decoderParams)) {
+            return NO;
+        }
     }
-  } else if (isASTCFormat(image.pixelFormat)) {
-    if (!decoder.decode(image, imageDecoded, decoderParams)) {
-      return NO;
+    else {
+        assert(false);  // don't call this routine if decode not needed
     }
-  } else {
-    assert(false); // don't call this routine if decode not needed
-  }
 
-  // TODO: decode BC format on iOS when not supported, but viewer only on macOS
-  // for now
+    // TODO: decode BC format on iOS when not supported, but viewer only on macOS
+    // for now
 
-  return YES;
+    return YES;
 }
 
 #endif
 
 #if SUPPORT_RGB
 
-inline bool isInternalRGBFormat(MyMTLPixelFormat format) {
-  bool isInternal = false;
-  switch (format) {
-  case MyMTLPixelFormatRGB8Unorm_internal:
-  case MyMTLPixelFormatRGB8Unorm_sRGB_internal:
-  case MyMTLPixelFormatRGB16Float_internal:
-  case MyMTLPixelFormatRGB32Float_internal:
-    isInternal = true;
-    break;
-  default:
-    break;
-  }
-  return isInternal;
+inline bool isInternalRGBFormat(MyMTLPixelFormat format)
+{
+    bool isInternal = false;
+    switch (format) {
+        case MyMTLPixelFormatRGB8Unorm_internal:
+        case MyMTLPixelFormatRGB8Unorm_sRGB_internal:
+        case MyMTLPixelFormatRGB16Float_internal:
+        case MyMTLPixelFormatRGB32Float_internal:
+            isInternal = true;
+            break;
+        default:
+            break;
+    }
+    return isInternal;
 }
 
-inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) {
-  MyMTLPixelFormat remapFormat = MyMTLPixelFormatInvalid;
-  switch (format) {
-  case MyMTLPixelFormatRGB8Unorm_internal:
-    remapFormat = MyMTLPixelFormatRGBA8Unorm;
-    break;
-  case MyMTLPixelFormatRGB8Unorm_sRGB_internal:
-    remapFormat = MyMTLPixelFormatRGBA8Unorm_sRGB;
-    break;
-  case MyMTLPixelFormatRGB16Float_internal:
-    remapFormat = MyMTLPixelFormatRGBA32Float;
-    break;
-  case MyMTLPixelFormatRGB32Float_internal:
-    remapFormat = MyMTLPixelFormatRGBA32Float;
-    break;
-  default:
-    break;
-  }
-  return remapFormat;
+inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format)
+{
+    MyMTLPixelFormat remapFormat = MyMTLPixelFormatInvalid;
+    switch (format) {
+        case MyMTLPixelFormatRGB8Unorm_internal:
+            remapFormat = MyMTLPixelFormatRGBA8Unorm;
+            break;
+        case MyMTLPixelFormatRGB8Unorm_sRGB_internal:
+            remapFormat = MyMTLPixelFormatRGBA8Unorm_sRGB;
+            break;
+        case MyMTLPixelFormatRGB16Float_internal:
+            remapFormat = MyMTLPixelFormatRGBA32Float;
+            break;
+        case MyMTLPixelFormatRGB32Float_internal:
+            remapFormat = MyMTLPixelFormatRGBA32Float;
+            break;
+        default:
+            break;
+    }
+    return remapFormat;
 }
 
 #endif
@@ -160,87 +169,90 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) {
 - (nullable id<MTLTexture>)
     loadTextureFromData:(nonnull const uint8_t *)imageData
         imageDataLength:(int32_t)imageDataLength
-         originalFormat:(nullable MTLPixelFormat *)originalFormat {
-  KTXImage image;
+         originalFormat:(nullable MTLPixelFormat *)originalFormat
+{
+    KTXImage image;
 
-  // isInfoOnly = true keeps compressed mips on KTX2 and aliases original mip
-  // data but have decode etc2/astc path below that uncompressed mips and the
-  // rgb conversion path below as well in the viewer. games would want to
-  // decompress directly from aliased mmap ktx2 data into staging or have blocks
-  // pre-twiddled in hw morton order.
+    // isInfoOnly = true keeps compressed mips on KTX2 and aliases original mip
+    // data but have decode etc2/astc path below that uncompressed mips and the
+    // rgb conversion path below as well in the viewer. games would want to
+    // decompress directly from aliased mmap ktx2 data into staging or have blocks
+    // pre-twiddled in hw morton order.
 
-  bool isInfoOnly = true;
-  if (!image.open(imageData, imageDataLength, isInfoOnly)) {
-    return nil;
-  }
+    bool isInfoOnly = true;
+    if (!image.open(imageData, imageDataLength, isInfoOnly)) {
+        return nil;
+    }
 
-  return [self loadTextureFromImage:image originalFormat:originalFormat];
+    return [self loadTextureFromImage:image originalFormat:originalFormat];
 }
 
 - (nullable id<MTLTexture>)loadTextureFromImage:(const KTXImage &)image
                                  originalFormat:
-                                     (nullable MTLPixelFormat *)originalFormat {
+                                     (nullable MTLPixelFormat *)originalFormat
+{
 #if SUPPORT_RGB
-  if (isInternalRGBFormat(image.pixelFormat)) {
-    // loads and converts top level mip from RGB to RGBA (RGB0)
-    // handles all texture types
-    Image rgbaImage;
-    if (!rgbaImage.loadImageFromKTX(image))
-      return nil;
-
-    // re-encode it as a KTXImage, even though this is just a copy
-    KTXImage rbgaImage2;
-
-    ImageInfoArgs dstImageInfoArgs;
-    dstImageInfoArgs.textureType = image.textureType;
-    dstImageInfoArgs.pixelFormat = remapInternalRGBFormat(image.pixelFormat);
-    dstImageInfoArgs.doMipmaps =
-        image.header.numberOfMipmapLevels > 1; // ignore 0
-    dstImageInfoArgs.textureEncoder = kTexEncoderExplicit;
-
-    // set chunk count, so it's explicit
-    // the chunks are loaded into a vertical strip
-    dstImageInfoArgs.chunksX = 1;
-    dstImageInfoArgs.chunksY = dstImageInfoArgs.chunksCount =
-        image.totalChunks();
-
-    ImageInfo dstImageInfo;
-    dstImageInfo.initWithArgs(dstImageInfoArgs);
-
-    // this will build mips if needed
-    KramEncoder encoder;
-    if (!encoder.encode(dstImageInfo, rgbaImage, rbgaImage2)) {
-      return nil;
-    }
+    if (isInternalRGBFormat(image.pixelFormat)) {
+        // loads and converts top level mip from RGB to RGBA (RGB0)
+        // handles all texture types
+        Image rgbaImage;
+        if (!rgbaImage.loadImageFromKTX(image))
+            return nil;
+
+        // re-encode it as a KTXImage, even though this is just a copy
+        KTXImage rbgaImage2;
+
+        ImageInfoArgs dstImageInfoArgs;
+        dstImageInfoArgs.textureType = image.textureType;
+        dstImageInfoArgs.pixelFormat = remapInternalRGBFormat(image.pixelFormat);
+        dstImageInfoArgs.doMipmaps =
+            image.header.numberOfMipmapLevels > 1;  // ignore 0
+        dstImageInfoArgs.textureEncoder = kTexEncoderExplicit;
+
+        // set chunk count, so it's explicit
+        // the chunks are loaded into a vertical strip
+        dstImageInfoArgs.chunksX = 1;
+        dstImageInfoArgs.chunksY = dstImageInfoArgs.chunksCount =
+            image.totalChunks();
+
+        ImageInfo dstImageInfo;
+        dstImageInfo.initWithArgs(dstImageInfoArgs);
+
+        // this will build mips if needed
+        KramEncoder encoder;
+        if (!encoder.encode(dstImageInfo, rgbaImage, rbgaImage2)) {
+            return nil;
+        }
 
-    if (originalFormat != nullptr) {
-      *originalFormat =
-          (MTLPixelFormat)rbgaImage2
-              .pixelFormat; // TODO: should this return rgbaImage.pixelFormat ?
-    }
+        if (originalFormat != nullptr) {
+            *originalFormat =
+                (MTLPixelFormat)rbgaImage2
+                    .pixelFormat;  // TODO: should this return rgbaImage.pixelFormat ?
+        }
 
-    return [self blitTextureFromImage:rbgaImage2];
-  }
+        return [self blitTextureFromImage:rbgaImage2];
+    }
 #endif
 
-  if (originalFormat != nullptr) {
-    *originalFormat = (MTLPixelFormat)image.pixelFormat;
-  }
+    if (originalFormat != nullptr) {
+        *originalFormat = (MTLPixelFormat)image.pixelFormat;
+    }
 
 #if DO_DECODE
-  if (isDecodeImageNeeded(image.pixelFormat)) {
-    KTXImage imageDecoded;
-    if (!decodeImage(image, imageDecoded)) {
-      return nil;
-    }
+    if (isDecodeImageNeeded(image.pixelFormat)) {
+        KTXImage imageDecoded;
+        if (!decodeImage(image, imageDecoded)) {
+            return nil;
+        }
 
-    return [self blitTextureFromImage:imageDecoded];
-  } else
+        return [self blitTextureFromImage:imageDecoded];
+    }
+    else
 #endif
-  {
-    // fast load path directly from mmap'ed data, decompress direct to staging
-    return [self blitTextureFromImage:image];
-  }
+    {
+        // fast load path directly from mmap'ed data, decompress direct to staging
+        return [self blitTextureFromImage:image];
+    }
 }
 
 /*
@@ -316,103 +328,108 @@ static uint32_t numberOfMipmapLevels(const Image& image) {
 
 - (BOOL)loadImageFromURL:(nonnull NSURL *)url
                    image:(KTXImage &)image
-               imageData:(KTXImageData &)imageData {
-  const char *path = url.absoluteURL.path.UTF8String;
-
-  // TODO: could also ignore extension, and look at header/signature instead
-  // files can be renamed to the incorrect extensions
-  string filename = toLower(path);
-
-  if (endsWithExtension(filename.c_str(), ".png")) {
-    // set title to filename, chop this to just file+ext, not directory
-    string filenameShort = filename;
-    const char *filenameSlash = strrchr(filenameShort.c_str(), '/');
-    if (filenameSlash != nullptr) {
-      filenameShort = filenameSlash + 1;
-    }
+               imageData:(KTXImageData &)imageData
+{
+    const char *path = url.absoluteURL.path.UTF8String;
+
+    // TODO: could also ignore extension, and look at header/signature instead
+    // files can be renamed to the incorrect extensions
+    string filename = toLower(path);
+
+    if (endsWithExtension(filename.c_str(), ".png")) {
+        // set title to filename, chop this to just file+ext, not directory
+        string filenameShort = filename;
+        const char *filenameSlash = strrchr(filenameShort.c_str(), '/');
+        if (filenameSlash != nullptr) {
+            filenameShort = filenameSlash + 1;
+        }
 
-    // now chop off the extension
-    filenameShort = filenameShort.substr(0, filenameShort.find_last_of("."));
-
-    // dealing with png means fabricating the format, texture type, and other
-    // data
-    bool isNormal = false;
-    bool isSDF = false;
-    if (endsWith(filenameShort, "-n") || endsWith(filenameShort, "_normal")) {
-      isNormal = true;
-    } else if (endsWith(filenameShort, "-sdf")) {
-      isSDF = true;
-    }
+        // now chop off the extension
+        filenameShort = filenameShort.substr(0, filenameShort.find_last_of("."));
 
-    bool isSRGB = (!isNormal && !isSDF);
+        // dealing with png means fabricating the format, texture type, and other
+        // data
+        bool isNormal = false;
+        bool isSDF = false;
+        if (endsWith(filenameShort, "-n") || endsWith(filenameShort, "_normal")) {
+            isNormal = true;
+        }
+        else if (endsWith(filenameShort, "-sdf")) {
+            isSDF = true;
+        }
 
-    if (!imageData.open(path, image)) {
-      return NO;
-    }
+        bool isSRGB = (!isNormal && !isSDF);
 
-    // have to adjust the format if srgb
-    if (isSRGB) {
-      image.pixelFormat = MyMTLPixelFormatRGBA8Unorm_sRGB;
+        if (!imageData.open(path, image)) {
+            return NO;
+        }
+
+        // have to adjust the format if srgb
+        if (isSRGB) {
+            image.pixelFormat = MyMTLPixelFormatRGBA8Unorm_sRGB;
+        }
     }
-  } else {
-    if (!imageData.open(path, image)) {
-      return NO;
+    else {
+        if (!imageData.open(path, image)) {
+            return NO;
+        }
     }
-  }
 
-  return YES;
+    return YES;
 }
 
 - (nullable id<MTLTexture>)loadTextureFromURL:(nonnull NSURL *)url
                                originalFormat:
-                                   (nullable MTLPixelFormat *)originalFormat {
-  KTXImage image;
-  KTXImageData imageData;
+                                   (nullable MTLPixelFormat *)originalFormat
+{
+    KTXImage image;
+    KTXImageData imageData;
 
-  if (![self loadImageFromURL:url image:image imageData:imageData]) {
-    return nil;
-  }
+    if (![self loadImageFromURL:url image:image imageData:imageData]) {
+        return nil;
+    }
 
-  return [self loadTextureFromImage:image originalFormat:originalFormat];
+    return [self loadTextureFromImage:image originalFormat:originalFormat];
 }
 
 - (nullable id<MTLTexture>)createTexture:(const KTXImage &)image
-                               isPrivate:(bool)isPrivate {
-  MTLTextureDescriptor *textureDescriptor = [[MTLTextureDescriptor alloc] init];
-
-  // Indicate that each pixel has a blue, green, red, and alpha channel, where
-  // each channel is an 8-bit unsigned normalized value (i.e. 0 maps to 0.0 and
-  // 255 maps to 1.0)
-  textureDescriptor.textureType = (MTLTextureType)image.textureType;
-  textureDescriptor.pixelFormat = (MTLPixelFormat)image.pixelFormat;
-
-  // Set the pixel dimensions of the texture
-  textureDescriptor.width = image.width;
-  textureDescriptor.height = MAX(1, image.height);
-  textureDescriptor.depth = MAX(1, image.depth);
-
-  textureDescriptor.arrayLength = MAX(1, image.header.numberOfArrayElements);
-
-  // ignoring 0 (auto mip), but might need to support for explicit formats
-  // must have hw filtering support for format, and 32f filtering only first
-  // appeared on A14/M1 and only get box filtering in API-level filters.  But
-  // would cut storage.
-  textureDescriptor.mipmapLevelCount =
-      MAX(1, image.header.numberOfMipmapLevels);
-
-  // this is needed for blit
-  if (isPrivate)
-    textureDescriptor.storageMode = MTLStorageModePrivate;
-
-  // Create the texture from the device by using the descriptor
-  id<MTLTexture> texture =
-      [self.device newTextureWithDescriptor:textureDescriptor];
-  if (!texture) {
-    KLOGE("kramv", "could not allocate texture");
-    return nil;
-  }
-
-  return texture;
+                               isPrivate:(bool)isPrivate
+{
+    MTLTextureDescriptor *textureDescriptor = [[MTLTextureDescriptor alloc] init];
+
+    // Indicate that each pixel has a blue, green, red, and alpha channel, where
+    // each channel is an 8-bit unsigned normalized value (i.e. 0 maps to 0.0 and
+    // 255 maps to 1.0)
+    textureDescriptor.textureType = (MTLTextureType)image.textureType;
+    textureDescriptor.pixelFormat = (MTLPixelFormat)image.pixelFormat;
+
+    // Set the pixel dimensions of the texture
+    textureDescriptor.width = image.width;
+    textureDescriptor.height = MAX(1, image.height);
+    textureDescriptor.depth = MAX(1, image.depth);
+
+    textureDescriptor.arrayLength = MAX(1, image.header.numberOfArrayElements);
+
+    // ignoring 0 (auto mip), but might need to support for explicit formats
+    // must have hw filtering support for format, and 32f filtering only first
+    // appeared on A14/M1 and only get box filtering in API-level filters.  But
+    // would cut storage.
+    textureDescriptor.mipmapLevelCount =
+        MAX(1, image.header.numberOfMipmapLevels);
+
+    // this is needed for blit
+    if (isPrivate)
+        textureDescriptor.storageMode = MTLStorageModePrivate;
+
+    // Create the texture from the device by using the descriptor
+    id<MTLTexture> texture =
+        [self.device newTextureWithDescriptor:textureDescriptor];
+    if (!texture) {
+        KLOGE("kramv", "could not allocate texture");
+        return nil;
+    }
+
+    return texture;
 }
 
 /* just for reference now
@@ -572,246 +589,249 @@ aligned data (f.e. ktxa, ktx2).
 
 //--------------------------
 
-- (void)createStagingBufffer:(uint64_t)dataSize {
+- (void)createStagingBufffer:(uint64_t)dataSize
+{
+    // must be aligned to pagesize() or can't use with newBufferWithBytesNoCopy
+    // enough to upload 4k x 4k @ 4 bytes no mips, careful with array and cube
+    // that get too big
+
+    // allocate system memory for bufffer, can memcopy to this
+    posix_memalign((void **)&_data, getpagesize(), dataSize);
+
+    // allocate memory for circular staging buffer, only need to memcpy to this
+    // but need a rolling buffer atop to track current begin/end.
+
+    _buffer =
+        [_device newBufferWithBytesNoCopy:_data
+                                   length:dataSize
+                                  options:MTLResourceStorageModeShared
+                              deallocator:^(void *macroUnusedArg(pointer),
+                                            NSUInteger macroUnusedArg(length)) {
+                                  delete self->_data;
+                                  self->_data = nullptr;
+                              }];
+}
 
-  // must be aligned to pagesize() or can't use with newBufferWithBytesNoCopy
-  // enough to upload 4k x 4k @ 4 bytes no mips, careful with array and cube
-  // that get too big
+- (void)uploadTexturesIfNeeded:(id<MTLBlitCommandEncoder>)blitEncoder
+                 commandBuffer:(id<MTLCommandBuffer>)commandBuffer
+{
+    if (_mipgenTextures.count > 0) {
+        for (id<MTLTexture> texture in _mipgenTextures) {
+            // autogen mips will include srgb conversions, so toggling srgb on/off
+            // isn't quite correct
+            [blitEncoder generateMipmapsForTexture:texture];
+        }
 
-  // allocate system memory for bufffer, can memcopy to this
-  posix_memalign((void **)&_data, getpagesize(), dataSize);
+        // reset the arra
+        [_mipgenTextures removeAllObjects];
+    }
 
-  // allocate memory for circular staging buffer, only need to memcpy to this
-  // but need a rolling buffer atop to track current begin/end.
+    if (!_blits.empty()) {
+        // now upload from staging MTLBuffer to private MTLTexture
+        for (const auto &blit : _blits) {
+            MTLRegion region = {
+                {0, 0, 0},                                   // MTLOrigin
+                {(NSUInteger)blit.w, (NSUInteger)blit.h, 1}  // MTLSize
+            };
+
+            uint32_t chunkNum = blit.chunkNum;
+            if (blit.is3D) {
+                region.origin.z = chunkNum;
+                chunkNum = 0;
+            }
 
-  _buffer =
-      [_device newBufferWithBytesNoCopy:_data
-                                 length:dataSize
-                                options:MTLResourceStorageModeShared
-                            deallocator:^(void *macroUnusedArg(pointer),
-                                          NSUInteger macroUnusedArg(length)) {
-                              delete self->_data;
-                              self->_data = nullptr;
-                            }];
-}
+            // assert(blit.textureIndex < _blitTextures.count);
+            id<MTLTexture> texture = _blitTextures[blit.textureIndex];
 
-- (void)uploadTexturesIfNeeded:(id<MTLBlitCommandEncoder>)blitEncoder
-                 commandBuffer:(id<MTLCommandBuffer>)commandBuffer {
-  if (_mipgenTextures.count > 0) {
-    for (id<MTLTexture> texture in _mipgenTextures) {
-      // autogen mips will include srgb conversions, so toggling srgb on/off
-      // isn't quite correct
-      [blitEncoder generateMipmapsForTexture:texture];
-    }
+            [blitEncoder copyFromBuffer:_buffer
+                           sourceOffset:blit.mipOffset
+                      sourceBytesPerRow:blit.bytesPerRow
+                    sourceBytesPerImage:blit.mipStorageSize
+                             sourceSize:region.size
 
-    // reset the arra
-    [_mipgenTextures removeAllObjects];
-  }
-
-  if (!_blits.empty()) {
-    // now upload from staging MTLBuffer to private MTLTexture
-    for (const auto &blit : _blits) {
-      MTLRegion region = {
-          {0, 0, 0},                                  // MTLOrigin
-          {(NSUInteger)blit.w, (NSUInteger)blit.h, 1} // MTLSize
-      };
-
-      uint32_t chunkNum = blit.chunkNum;
-      if (blit.is3D) {
-        region.origin.z = chunkNum;
-        chunkNum = 0;
-      }
-
-      // assert(blit.textureIndex < _blitTextures.count);
-      id<MTLTexture> texture = _blitTextures[blit.textureIndex];
-
-      [blitEncoder copyFromBuffer:_buffer
-                     sourceOffset:blit.mipOffset
-                sourceBytesPerRow:blit.bytesPerRow
-              sourceBytesPerImage:blit.mipStorageSize
-                       sourceSize:region.size
-
-                        toTexture:texture
-                 destinationSlice:chunkNum
-                 destinationLevel:blit.mipLevelNumber
-                destinationOrigin:region.origin
-                          options:MTLBlitOptionNone];
-    }
+                              toTexture:texture
+                       destinationSlice:chunkNum
+                       destinationLevel:blit.mipLevelNumber
+                      destinationOrigin:region.origin
+                                options:MTLBlitOptionNone];
+        }
 
-    // reset the array and buffer offset, so can upload more textures
-    _blits.clear();
-    [_blitTextures removeAllObjects];
-
-    // TODO: use atomic on this
-    uint32_t bufferOffsetCopy = _bufferOffset;
-    [commandBuffer addCompletedHandler:^(id<MTLCommandBuffer> /* buffer */) {
-      // can only reset this once gpu completes the blits above
-      // also guard against addding to this in blitTextureFromImage when
-      // completion handler will reset to 0
-      if (self->_bufferOffset == bufferOffsetCopy)
-        self->_bufferOffset = 0;
-    }];
-  }
+        // reset the array and buffer offset, so can upload more textures
+        _blits.clear();
+        [_blitTextures removeAllObjects];
+
+        // TODO: use atomic on this
+        uint32_t bufferOffsetCopy = _bufferOffset;
+        [commandBuffer addCompletedHandler:^(id<MTLCommandBuffer> /* buffer */) {
+            // can only reset this once gpu completes the blits above
+            // also guard against addding to this in blitTextureFromImage when
+            // completion handler will reset to 0
+            if (self->_bufferOffset == bufferOffsetCopy)
+                self->_bufferOffset = 0;
+        }];
+    }
 }
 
-inline uint64_t alignOffset(uint64_t offset, uint64_t alignment) {
-  return offset + (alignment - offset % alignment) % alignment;
+inline uint64_t alignOffset(uint64_t offset, uint64_t alignment)
+{
+    return offset + (alignment - offset % alignment) % alignment;
 }
 
 // Has a synchronous upload via replaceRegion that only works for shared/managed
 // (f.e. ktx), and another path for private that uses a blitEncoder and must
 // have block aligned data (f.e. ktxa, ktx2). Could repack ktx data into ktxa
 // before writing to temporary file, or when copying NSData into MTLBuffer.
-- (nullable id<MTLTexture>)blitTextureFromImage:(const KTXImage &)image {
-  if (_buffer == nil) {
-    // this is enough to upload 4k x 4x @ RGBA8u with mips, 8k x 8k compressed
-    // with mips @96MB
-    [self createStagingBufffer:128 * 1024 * 1024];
-  }
-
-  // TODO: first make sure have enough buffer to upload, otherwise need to queue
-  // this image try not to load much until that's established queue would need
-  // KTXImage and mmap to stay alive long enough for queue to be completed
-  //    if (_bufferOffset != 0) {
-  //        return nil;
-  //    }
+- (nullable id<MTLTexture>)blitTextureFromImage:(const KTXImage &)image
+{
+    if (_buffer == nil) {
+        // this is enough to upload 4k x 4x @ RGBA8u with mips, 8k x 8k compressed
+        // with mips @96MB
+        [self createStagingBufffer:128 * 1024 * 1024];
+    }
 
-  id<MTLTexture> texture = [self createTexture:image isPrivate:true];
-  if (!texture)
-    return nil;
+    // TODO: first make sure have enough buffer to upload, otherwise need to queue
+    // this image try not to load much until that's established queue would need
+    // KTXImage and mmap to stay alive long enough for queue to be completed
+    //    if (_bufferOffset != 0) {
+    //        return nil;
+    //    }
 
-  // this is index where texture will be added
-  uint32_t textureIndex = (uint32_t)_blitTextures.count;
+    id<MTLTexture> texture = [self createTexture:image isPrivate:true];
+    if (!texture)
+        return nil;
 
-  //--------------------------------
-  // upload mip levels
+    // this is index where texture will be added
+    uint32_t textureIndex = (uint32_t)_blitTextures.count;
 
-  // TODO: about aligning to 4k for base + length
-  // http://metalkit.org/2017/05/26/working-with-memory-in-metal-part-2.html
+    //--------------------------------
+    // upload mip levels
 
-  uint32_t w = image.width;
-  uint32_t h = image.height;
-  uint32_t d = image.depth;
+    // TODO: about aligning to 4k for base + length
+    // http://metalkit.org/2017/05/26/working-with-memory-in-metal-part-2.html
 
-  uint32_t numMips = MAX(1, image.header.numberOfMipmapLevels);
-  uint32_t numArrays = MAX(1, image.header.numberOfArrayElements);
-  uint32_t numFaces = MAX(1, image.header.numberOfFaces);
-  uint32_t numSlices = MAX(1, image.depth);
+    uint32_t w = image.width;
+    uint32_t h = image.height;
+    uint32_t d = image.depth;
 
-  Int2 blockDims = image.blockDims();
+    uint32_t numMips = MAX(1, image.header.numberOfMipmapLevels);
+    uint32_t numArrays = MAX(1, image.header.numberOfArrayElements);
+    uint32_t numFaces = MAX(1, image.header.numberOfFaces);
+    uint32_t numSlices = MAX(1, image.depth);
 
-  // Note: copy entire decompressed level from KTX, but then upload
-  // each chunk of that with separate blit calls below.
-  size_t blockSize = image.blockSize();
+    Int2 blockDims = image.blockDims();
 
-  vector<uint64_t> bufferOffsets;
-  uint8_t *bufferData = (uint8_t *)_buffer.contents;
-  const uint8_t *mipData = (const uint8_t *)image.fileData;
-  bufferOffsets.resize(image.mipLevels.size());
+    // Note: copy entire decompressed level from KTX, but then upload
+    // each chunk of that with separate blit calls below.
+    size_t blockSize = image.blockSize();
 
-  uint32_t numChunks = image.totalChunks();
+    vector<uint64_t> bufferOffsets;
+    uint8_t *bufferData = (uint8_t *)_buffer.contents;
+    const uint8_t *mipData = (const uint8_t *)image.fileData;
+    bufferOffsets.resize(image.mipLevels.size());
 
-  uint32_t bufferOffset = _bufferOffset;
+    uint32_t numChunks = image.totalChunks();
 
-  for (uint32_t i = 0; i < numMips; ++i) {
-    const KTXImageLevel &mipLevel = image.mipLevels[i];
+    uint32_t bufferOffset = _bufferOffset;
 
-    // pad buffer offset to a multiple of the blockSize
-    bufferOffset = alignOffset(bufferOffset, blockSize);
+    for (uint32_t i = 0; i < numMips; ++i) {
+        const KTXImageLevel &mipLevel = image.mipLevels[i];
 
-    // this may have to decompress the level data
-    if (!image.unpackLevel(i, mipData + mipLevel.offset,
-                           bufferData + bufferOffset)) {
-      return nil;
-    }
+        // pad buffer offset to a multiple of the blockSize
+        bufferOffset = alignOffset(bufferOffset, blockSize);
 
-    bufferOffsets[i] = bufferOffset;
-    bufferOffset += mipLevel.length * numChunks;
-  }
+        // this may have to decompress the level data
+        if (!image.unpackLevel(i, mipData + mipLevel.offset,
+                               bufferData + bufferOffset)) {
+            return nil;
+        }
 
-  // everything succeded, so advance the offset
-  _bufferOffset = bufferOffset;
-  [_blitTextures addObject:texture];
+        bufferOffsets[i] = bufferOffset;
+        bufferOffset += mipLevel.length * numChunks;
+    }
 
-  // defer the blits from buffer until start of render thread when BlitEncoder
-  // is available
+    // everything succeded, so advance the offset
+    _bufferOffset = bufferOffset;
+    [_blitTextures addObject:texture];
 
-  for (uint32_t mipLevelNumber = 0; mipLevelNumber < numMips;
-       ++mipLevelNumber) {
-    // there's a 4 byte levelSize for each mipLevel
-    // the mipLevel.offset is immediately after this
+    // defer the blits from buffer until start of render thread when BlitEncoder
+    // is available
 
-    // this is offset to a given level
-    const KTXImageLevel &mipLevel = image.mipLevels[mipLevelNumber];
+    for (uint32_t mipLevelNumber = 0; mipLevelNumber < numMips;
+         ++mipLevelNumber) {
+        // there's a 4 byte levelSize for each mipLevel
+        // the mipLevel.offset is immediately after this
 
-    // only have face, face+array, or slice but this handles all cases
-    for (uint32_t array = 0; array < numArrays; ++array) {
-      for (uint32_t face = 0; face < numFaces; ++face) {
-        for (uint32_t slice = 0; slice < numSlices; ++slice) {
+        // this is offset to a given level
+        const KTXImageLevel &mipLevel = image.mipLevels[mipLevelNumber];
 
-          uint32_t bytesPerRow = 0;
+        // only have face, face+array, or slice but this handles all cases
+        for (uint32_t array = 0; array < numArrays; ++array) {
+            for (uint32_t face = 0; face < numFaces; ++face) {
+                for (uint32_t slice = 0; slice < numSlices; ++slice) {
+                    uint32_t bytesPerRow = 0;
 
-          // 1D/1DArray textures set bytesPerRow to 0
-          if ( // image.textureType != MyMTLTextureType1D &&
-              image.textureType != MyMTLTextureType1DArray) {
-            // for compressed, bytesPerRow needs to be multiple of block size
-            // so divide by the number of blocks making up the height
-            // int xBlocks = ((w + blockDims.x - 1) / blockDims.x);
-            uint32_t yBlocks = ((h + blockDims.y - 1) / blockDims.y);
+                    // 1D/1DArray textures set bytesPerRow to 0
+                    if (  // image.textureType != MyMTLTextureType1D &&
+                        image.textureType != MyMTLTextureType1DArray) {
+                        // for compressed, bytesPerRow needs to be multiple of block size
+                        // so divide by the number of blocks making up the height
+                        // int xBlocks = ((w + blockDims.x - 1) / blockDims.x);
+                        uint32_t yBlocks = ((h + blockDims.y - 1) / blockDims.y);
 
-            // Calculate the number of bytes per row in the image.
-            // for compressed images this is xBlocks * blockSize
-            bytesPerRow = mipLevel.length / yBlocks;
-          }
+                        // Calculate the number of bytes per row in the image.
+                        // for compressed images this is xBlocks * blockSize
+                        bytesPerRow = mipLevel.length / yBlocks;
+                    }
 
-          uint32_t chunkNum = 0;
+                    uint32_t chunkNum = 0;
 
-          if (image.header.numberOfArrayElements > 1) {
-            // can be 1d, 2d, or cube array
-            chunkNum = array;
-            if (numFaces > 1) {
-              chunkNum = 6 * chunkNum + face;
-            }
-          } else {
-            // can be 1d, 2d, or 3d
-            chunkNum = slice;
-            if (numFaces > 1) {
-              chunkNum = face;
-            }
-          }
+                    if (image.header.numberOfArrayElements > 1) {
+                        // can be 1d, 2d, or cube array
+                        chunkNum = array;
+                        if (numFaces > 1) {
+                            chunkNum = 6 * chunkNum + face;
+                        }
+                    }
+                    else {
+                        // can be 1d, 2d, or 3d
+                        chunkNum = slice;
+                        if (numFaces > 1) {
+                            chunkNum = face;
+                        }
+                    }
 
-          // This is size of one chunk
-          uint64_t mipStorageSize = mipLevel.length;
+                    // This is size of one chunk
+                    uint64_t mipStorageSize = mipLevel.length;
 
-          // Have uploaded to buffer in same order visiting chunks.
-          // Note: no call on MTLBlitEncoder to copy entire level of mips like
-          // glTexImage3D
-          uint64_t mipOffset =
-              bufferOffsets[mipLevelNumber] + chunkNum * mipStorageSize;
+                    // Have uploaded to buffer in same order visiting chunks.
+                    // Note: no call on MTLBlitEncoder to copy entire level of mips like
+                    // glTexImage3D
+                    uint64_t mipOffset =
+                        bufferOffsets[mipLevelNumber] + chunkNum * mipStorageSize;
 
-          {
-            bool is3D = image.textureType == MyMTLTextureType3D;
+                    {
+                        bool is3D = image.textureType == MyMTLTextureType3D;
 
-            _blits.push_back({
-                // use named inits here
-                w, h, chunkNum,
+                        _blits.push_back({
+                            // use named inits here
+                            w, h, chunkNum,
 
-                mipLevelNumber, mipStorageSize, mipOffset,
+                            mipLevelNumber, mipStorageSize, mipOffset,
 
-                textureIndex, bytesPerRow,
-                is3D // could derive from textureIndex lookup
-            });
-          }
+                            textureIndex, bytesPerRow,
+                            is3D  // could derive from textureIndex lookup
+                        });
+                    }
+                }
+            }
         }
-      }
-    }
 
-    mipDown(w, h, d);
-  }
+        mipDown(w, h, d);
+    }
 
-  // this texture cannot be used until buffer uploads complete
-  // but those happen at beginning of frame, so can attach to shaders, etc
-  return texture;
+    // this texture cannot be used until buffer uploads complete
+    // but those happen at beginning of frame, so can attach to shaders, etc
+    return texture;
 }
 
 @end
diff --git a/kramv/KramRenderer.h b/kramv/KramRenderer.h
index e481daff..7ac59115 100644
--- a/kramv/KramRenderer.h
+++ b/kramv/KramRenderer.h
@@ -6,7 +6,7 @@
 #import <MetalKit/MetalKit.h>
 
 #include "KramLib.h"
-#import "KramShaders.h" // for TextureChannels
+#import "KramShaders.h"  // for TextureChannels
 
 namespace kram {
 class ShowSettings;
diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index fdcd5cae..1c010548 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -4,15 +4,13 @@
 
 #import "KramRenderer.h"
 
-#import <TargetConditionals.h>
-
 #import <ModelIO/ModelIO.h>
+#import <TargetConditionals.h>
 
 // Include header shared between C code here, which executes Metal API commands,
 // and .metal files
 #import "KramLoader.h"
 #import "KramShaders.h"
-
 #include "KramViewerBase.h"
 
 static const NSUInteger MaxBuffersInFlight = 3;
@@ -22,1866 +20,1907 @@
 
 // Capture what we need to build the renderPieplines, without needing view
 struct ViewFramebufferData {
-  MTLPixelFormat colorPixelFormat = MTLPixelFormatInvalid;
-  MTLPixelFormat depthStencilPixelFormat = MTLPixelFormatInvalid;
-  uint32_t sampleCount = 0;
+    MTLPixelFormat colorPixelFormat = MTLPixelFormatInvalid;
+    MTLPixelFormat depthStencilPixelFormat = MTLPixelFormatInvalid;
+    uint32_t sampleCount = 0;
 };
 
 @implementation Renderer {
-  dispatch_semaphore_t _inFlightSemaphore;
-  id<MTLDevice> _device;
-  id<MTLCommandQueue> _commandQueue;
-
-  id<MTLBuffer> _dynamicUniformBuffer[MaxBuffersInFlight];
-
-  id<MTLRenderPipelineState> _pipelineState1DArray;
-  id<MTLRenderPipelineState> _pipelineStateImage;
-  id<MTLRenderPipelineState> _pipelineStateImageArray;
-  id<MTLRenderPipelineState> _pipelineStateCube;
-  id<MTLRenderPipelineState> _pipelineStateCubeArray;
-  id<MTLRenderPipelineState> _pipelineStateVolume;
-
-  id<MTLComputePipelineState> _pipelineState1DArrayCS;
-  id<MTLComputePipelineState> _pipelineStateImageCS;
-  id<MTLComputePipelineState> _pipelineStateImageArrayCS;
-  id<MTLComputePipelineState> _pipelineStateCubeCS;
-  id<MTLComputePipelineState> _pipelineStateCubeArrayCS;
-  id<MTLComputePipelineState> _pipelineStateVolumeCS;
-
-  id<MTLDepthStencilState> _depthStateFull;
-  id<MTLDepthStencilState> _depthStateNone;
-
-  MTLVertexDescriptor *_mtlVertexDescriptor;
-
-  // TODO: Array< id<MTLTexture> > _textures;
-  id<MTLTexture> _colorMap;
-  id<MTLTexture> _normalMap;
-  id<MTLTexture> _lastDrawableTexture;
-
-  // border is a better edge sample, but at edges it filters in the transparent
-  // color around the border which is undesirable.  It would be better if the hw
-  // did clamp to edge until uv outside 0 to 1.  This results in having to inset
-  // the uv by 0.5 px to avoid this artifact, but on small texturs that are 4x4,
-  // a 1 px inset is noticeable.
-
-  id<MTLSamplerState> _colorMapSamplerNearestWrap;
-  id<MTLSamplerState> _colorMapSamplerNearestBorder;
-  id<MTLSamplerState> _colorMapSamplerNearestEdge;
-
-  id<MTLSamplerState> _colorMapSamplerFilterWrap;
-  id<MTLSamplerState> _colorMapSamplerFilterBorder;
-  id<MTLSamplerState> _colorMapSamplerFilterEdge;
-
-  // id<MTLTexture> _sampleRT;
-  id<MTLTexture> _sampleComputeTex;
-  id<MTLTexture> _sampleRenderTex;
-
-  uint8_t _uniformBufferIndex;
-
-  float4x4 _projectionMatrix;
-
-  // 2d versions
-  float4x4 _viewMatrix;
-  float4x4 _modelMatrix;
-
-  // 3d versions
-  float4x4 _viewMatrix3D;
-  float4x4 _modelMatrix3D;
-
-  // float _rotation;
-  KramLoader *_loader;
-  MTKMesh *_mesh;
-
-  MDLVertexDescriptor *_mdlVertexDescriptor;
-
-  // MTKMesh *_meshPlane; // really a thin gox
-  MTKMesh *_meshBox;
-  MTKMesh *_meshSphere;
-  MTKMesh *_meshSphereMirrored;
-  // MTKMesh *_meshCylinder;
-  MTKMesh *_meshCapsule;
-  MTKMeshBufferAllocator *_metalAllocator;
-
-  id<MTLLibrary> _shaderLibrary;
-  NSURL *_metallibFileURL;
-  NSDate *_metallibFileDate;
-  ViewFramebufferData _viewFramebuffer;
-
-  ShowSettings *_showSettings;
+    dispatch_semaphore_t _inFlightSemaphore;
+    id<MTLDevice> _device;
+    id<MTLCommandQueue> _commandQueue;
+
+    id<MTLBuffer> _dynamicUniformBuffer[MaxBuffersInFlight];
+
+    id<MTLRenderPipelineState> _pipelineState1DArray;
+    id<MTLRenderPipelineState> _pipelineStateImage;
+    id<MTLRenderPipelineState> _pipelineStateImageArray;
+    id<MTLRenderPipelineState> _pipelineStateCube;
+    id<MTLRenderPipelineState> _pipelineStateCubeArray;
+    id<MTLRenderPipelineState> _pipelineStateVolume;
+
+    id<MTLComputePipelineState> _pipelineState1DArrayCS;
+    id<MTLComputePipelineState> _pipelineStateImageCS;
+    id<MTLComputePipelineState> _pipelineStateImageArrayCS;
+    id<MTLComputePipelineState> _pipelineStateCubeCS;
+    id<MTLComputePipelineState> _pipelineStateCubeArrayCS;
+    id<MTLComputePipelineState> _pipelineStateVolumeCS;
+
+    id<MTLDepthStencilState> _depthStateFull;
+    id<MTLDepthStencilState> _depthStateNone;
+
+    MTLVertexDescriptor *_mtlVertexDescriptor;
+
+    // TODO: Array< id<MTLTexture> > _textures;
+    id<MTLTexture> _colorMap;
+    id<MTLTexture> _normalMap;
+    id<MTLTexture> _lastDrawableTexture;
+
+    // border is a better edge sample, but at edges it filters in the transparent
+    // color around the border which is undesirable.  It would be better if the hw
+    // did clamp to edge until uv outside 0 to 1.  This results in having to inset
+    // the uv by 0.5 px to avoid this artifact, but on small texturs that are 4x4,
+    // a 1 px inset is noticeable.
+
+    id<MTLSamplerState> _colorMapSamplerNearestWrap;
+    id<MTLSamplerState> _colorMapSamplerNearestBorder;
+    id<MTLSamplerState> _colorMapSamplerNearestEdge;
+
+    id<MTLSamplerState> _colorMapSamplerFilterWrap;
+    id<MTLSamplerState> _colorMapSamplerFilterBorder;
+    id<MTLSamplerState> _colorMapSamplerFilterEdge;
+
+    // id<MTLTexture> _sampleRT;
+    id<MTLTexture> _sampleComputeTex;
+    id<MTLTexture> _sampleRenderTex;
+
+    uint8_t _uniformBufferIndex;
+
+    float4x4 _projectionMatrix;
+
+    // 2d versions
+    float4x4 _viewMatrix;
+    float4x4 _modelMatrix;
+
+    // 3d versions
+    float4x4 _viewMatrix3D;
+    float4x4 _modelMatrix3D;
+
+    // float _rotation;
+    KramLoader *_loader;
+    MTKMesh *_mesh;
+
+    MDLVertexDescriptor *_mdlVertexDescriptor;
+
+    // MTKMesh *_meshPlane; // really a thin gox
+    MTKMesh *_meshBox;
+    MTKMesh *_meshSphere;
+    MTKMesh *_meshSphereMirrored;
+    // MTKMesh *_meshCylinder;
+    MTKMesh *_meshCapsule;
+    MTKMeshBufferAllocator *_metalAllocator;
+
+    id<MTLLibrary> _shaderLibrary;
+    NSURL *_metallibFileURL;
+    NSDate *_metallibFileDate;
+    ViewFramebufferData _viewFramebuffer;
+
+    ShowSettings *_showSettings;
 }
 
 - (nonnull instancetype)initWithMetalKitView:(nonnull MTKView *)view
-                                    settings:(nonnull ShowSettings *)settings {
-  self = [super init];
-  if (self) {
-    _showSettings = settings;
+                                    settings:(nonnull ShowSettings *)settings
+{
+    self = [super init];
+    if (self) {
+        _showSettings = settings;
 
-    _device = view.device;
+        _device = view.device;
 
-    _loader = [KramLoader new];
-    _loader.device = _device;
+        _loader = [KramLoader new];
+        _loader.device = _device;
 
-    _metalAllocator = [[MTKMeshBufferAllocator alloc] initWithDevice:_device];
+        _metalAllocator = [[MTKMeshBufferAllocator alloc] initWithDevice:_device];
 
-    _inFlightSemaphore = dispatch_semaphore_create(MaxBuffersInFlight);
-    [self _loadMetalWithView:view];
-    [self _loadAssets];
-  }
+        _inFlightSemaphore = dispatch_semaphore_create(MaxBuffersInFlight);
+        [self _loadMetalWithView:view];
+        [self _loadAssets];
+    }
 
-  return self;
+    return self;
 }
 
-- (void)_createSamplers {
-  MTLSamplerDescriptor *samplerDescriptor = [MTLSamplerDescriptor new];
-  samplerDescriptor.minFilter = MTLSamplerMinMagFilterNearest;
-  samplerDescriptor.magFilter = MTLSamplerMinMagFilterNearest;
-  samplerDescriptor.mipFilter = MTLSamplerMipFilterNearest;
-
-  samplerDescriptor.sAddressMode = MTLSamplerAddressModeRepeat;
-  samplerDescriptor.tAddressMode = MTLSamplerAddressModeRepeat;
-  samplerDescriptor.rAddressMode = MTLSamplerAddressModeRepeat;
-  samplerDescriptor.label = @"colorMapSamplerNearestWrap";
-
-  _colorMapSamplerNearestWrap =
-      [_device newSamplerStateWithDescriptor:samplerDescriptor];
-
-  samplerDescriptor.sAddressMode = MTLSamplerAddressModeClampToBorderColor;
-  samplerDescriptor.tAddressMode = MTLSamplerAddressModeClampToBorderColor;
-  samplerDescriptor.rAddressMode = MTLSamplerAddressModeClampToBorderColor;
-  samplerDescriptor.label = @"colorMapSamplerNearestBorder";
-
-  _colorMapSamplerNearestBorder =
-      [_device newSamplerStateWithDescriptor:samplerDescriptor];
-
-  samplerDescriptor.sAddressMode = MTLSamplerAddressModeClampToEdge;
-  samplerDescriptor.tAddressMode = MTLSamplerAddressModeClampToEdge;
-  samplerDescriptor.rAddressMode = MTLSamplerAddressModeClampToEdge;
-  samplerDescriptor.label = @"colorMapSamplerNearsetEdge";
-
-  _colorMapSamplerNearestEdge =
-      [_device newSamplerStateWithDescriptor:samplerDescriptor];
+- (void)_createSamplers
+{
+    MTLSamplerDescriptor *samplerDescriptor = [MTLSamplerDescriptor new];
+    samplerDescriptor.minFilter = MTLSamplerMinMagFilterNearest;
+    samplerDescriptor.magFilter = MTLSamplerMinMagFilterNearest;
+    samplerDescriptor.mipFilter = MTLSamplerMipFilterNearest;
+
+    samplerDescriptor.sAddressMode = MTLSamplerAddressModeRepeat;
+    samplerDescriptor.tAddressMode = MTLSamplerAddressModeRepeat;
+    samplerDescriptor.rAddressMode = MTLSamplerAddressModeRepeat;
+    samplerDescriptor.label = @"colorMapSamplerNearestWrap";
+
+    _colorMapSamplerNearestWrap =
+        [_device newSamplerStateWithDescriptor:samplerDescriptor];
+
+    samplerDescriptor.sAddressMode = MTLSamplerAddressModeClampToBorderColor;
+    samplerDescriptor.tAddressMode = MTLSamplerAddressModeClampToBorderColor;
+    samplerDescriptor.rAddressMode = MTLSamplerAddressModeClampToBorderColor;
+    samplerDescriptor.label = @"colorMapSamplerNearestBorder";
+
+    _colorMapSamplerNearestBorder =
+        [_device newSamplerStateWithDescriptor:samplerDescriptor];
+
+    samplerDescriptor.sAddressMode = MTLSamplerAddressModeClampToEdge;
+    samplerDescriptor.tAddressMode = MTLSamplerAddressModeClampToEdge;
+    samplerDescriptor.rAddressMode = MTLSamplerAddressModeClampToEdge;
+    samplerDescriptor.label = @"colorMapSamplerNearsetEdge";
+
+    _colorMapSamplerNearestEdge =
+        [_device newSamplerStateWithDescriptor:samplerDescriptor];
+
+    // -----
+
+    // these are for preview mode
+    // use the mips, and specify linear for min/mag for SDF case
+    samplerDescriptor.minFilter = MTLSamplerMinMagFilterLinear;
+    samplerDescriptor.magFilter = MTLSamplerMinMagFilterLinear;
+    samplerDescriptor.mipFilter = MTLSamplerMipFilterLinear;
+    samplerDescriptor.maxAnisotropy = 4;  // 1,2,4,8,16 are choices
+
+    samplerDescriptor.sAddressMode = MTLSamplerAddressModeClampToBorderColor;
+    samplerDescriptor.tAddressMode = MTLSamplerAddressModeClampToBorderColor;
+    samplerDescriptor.rAddressMode = MTLSamplerAddressModeClampToBorderColor;
+    samplerDescriptor.label = @"colorMapSamplerFilterBorder";
+
+    _colorMapSamplerFilterBorder =
+        [_device newSamplerStateWithDescriptor:samplerDescriptor];
+
+    samplerDescriptor.sAddressMode = MTLSamplerAddressModeClampToEdge;
+    samplerDescriptor.tAddressMode = MTLSamplerAddressModeClampToEdge;
+    samplerDescriptor.rAddressMode = MTLSamplerAddressModeClampToEdge;
+    samplerDescriptor.label = @"colorMapSamplerFilterEdge";
+
+    _colorMapSamplerFilterEdge =
+        [_device newSamplerStateWithDescriptor:samplerDescriptor];
+
+    samplerDescriptor.sAddressMode = MTLSamplerAddressModeRepeat;
+    samplerDescriptor.tAddressMode = MTLSamplerAddressModeRepeat;
+    samplerDescriptor.rAddressMode = MTLSamplerAddressModeRepeat;
+    samplerDescriptor.label = @"colorMapSamplerBilinearWrap";
+
+    _colorMapSamplerFilterWrap =
+        [_device newSamplerStateWithDescriptor:samplerDescriptor];
+}
 
-  // -----
+- (void)_createVertexDescriptor
+{
+    _mtlVertexDescriptor = [[MTLVertexDescriptor alloc] init];
+
+    _mtlVertexDescriptor.attributes[VertexAttributePosition].format =
+        MTLVertexFormatFloat3;
+    _mtlVertexDescriptor.attributes[VertexAttributePosition].offset = 0;
+    _mtlVertexDescriptor.attributes[VertexAttributePosition].bufferIndex =
+        BufferIndexMeshPosition;
+
+    _mtlVertexDescriptor.attributes[VertexAttributeTexcoord].format =
+        MTLVertexFormatFloat2;  // TODO: compress
+    _mtlVertexDescriptor.attributes[VertexAttributeTexcoord].offset = 0;
+    _mtlVertexDescriptor.attributes[VertexAttributeTexcoord].bufferIndex =
+        BufferIndexMeshUV0;
+
+    _mtlVertexDescriptor.attributes[VertexAttributeNormal].format =
+        MTLVertexFormatFloat3;  // TODO: compress
+    _mtlVertexDescriptor.attributes[VertexAttributeNormal].offset = 0;
+    _mtlVertexDescriptor.attributes[VertexAttributeNormal].bufferIndex =
+        BufferIndexMeshNormal;
+
+    _mtlVertexDescriptor.attributes[VertexAttributeTangent].format =
+        MTLVertexFormatFloat4;  // TODO: compress
+    _mtlVertexDescriptor.attributes[VertexAttributeTangent].offset = 0;
+    _mtlVertexDescriptor.attributes[VertexAttributeTangent].bufferIndex =
+        BufferIndexMeshTangent;
+
+    //_mtlVertexDescriptor.layouts[BufferIndexMeshPosition].stepRate = 1;
+    //_mtlVertexDescriptor.layouts[BufferIndexMeshPosition].stepFunction =
+    //MTLVertexStepFunctionPerVertex;
+
+    _mtlVertexDescriptor.layouts[BufferIndexMeshPosition].stride = 3 * 4;
+    _mtlVertexDescriptor.layouts[BufferIndexMeshUV0].stride = 2 * 4;
+    _mtlVertexDescriptor.layouts[BufferIndexMeshNormal].stride = 3 * 4;
+    _mtlVertexDescriptor.layouts[BufferIndexMeshTangent].stride = 4 * 4;
+
+    //-----------------------
+    // for ModelIO
+    _mdlVertexDescriptor =
+        MTKModelIOVertexDescriptorFromMetal(_mtlVertexDescriptor);
+
+    _mdlVertexDescriptor.attributes[VertexAttributePosition].name =
+        MDLVertexAttributePosition;
+    _mdlVertexDescriptor.attributes[VertexAttributeTexcoord].name =
+        MDLVertexAttributeTextureCoordinate;
+    _mdlVertexDescriptor.attributes[VertexAttributeNormal].name =
+        MDLVertexAttributeNormal;
+    _mdlVertexDescriptor.attributes[VertexAttributeTangent].name =
+        MDLVertexAttributeTangent;
+}
 
-  // these are for preview mode
-  // use the mips, and specify linear for min/mag for SDF case
-  samplerDescriptor.minFilter = MTLSamplerMinMagFilterLinear;
-  samplerDescriptor.magFilter = MTLSamplerMinMagFilterLinear;
-  samplerDescriptor.mipFilter = MTLSamplerMipFilterLinear;
-  samplerDescriptor.maxAnisotropy = 4; // 1,2,4,8,16 are choices
+- (void)_loadMetalWithView:(nonnull MTKView *)view
+{
+    /// Load Metal state objects and initialize renderer dependent view properties
 
-  samplerDescriptor.sAddressMode = MTLSamplerAddressModeClampToBorderColor;
-  samplerDescriptor.tAddressMode = MTLSamplerAddressModeClampToBorderColor;
-  samplerDescriptor.rAddressMode = MTLSamplerAddressModeClampToBorderColor;
-  samplerDescriptor.label = @"colorMapSamplerFilterBorder";
+    view.colorPixelFormat = MTLPixelFormatRGBA16Float;
+    view.depthStencilPixelFormat = MTLPixelFormatDepth32Float_Stencil8;
+    view.sampleCount = 1;
 
-  _colorMapSamplerFilterBorder =
-      [_device newSamplerStateWithDescriptor:samplerDescriptor];
+    _viewFramebuffer.colorPixelFormat = view.colorPixelFormat;
+    _viewFramebuffer.depthStencilPixelFormat = view.depthStencilPixelFormat;
+    _viewFramebuffer.sampleCount = view.sampleCount;
 
-  samplerDescriptor.sAddressMode = MTLSamplerAddressModeClampToEdge;
-  samplerDescriptor.tAddressMode = MTLSamplerAddressModeClampToEdge;
-  samplerDescriptor.rAddressMode = MTLSamplerAddressModeClampToEdge;
-  samplerDescriptor.label = @"colorMapSamplerFilterEdge";
+    [self _createVertexDescriptor];
 
-  _colorMapSamplerFilterEdge =
-      [_device newSamplerStateWithDescriptor:samplerDescriptor];
+    // first time use the default library, if reload is called then use different
+    // library
+    _shaderLibrary = [_device newDefaultLibrary];
 
-  samplerDescriptor.sAddressMode = MTLSamplerAddressModeRepeat;
-  samplerDescriptor.tAddressMode = MTLSamplerAddressModeRepeat;
-  samplerDescriptor.rAddressMode = MTLSamplerAddressModeRepeat;
-  samplerDescriptor.label = @"colorMapSamplerBilinearWrap";
+    [self _createRenderPipelines];
 
-  _colorMapSamplerFilterWrap =
-      [_device newSamplerStateWithDescriptor:samplerDescriptor];
-}
+    //-----------------------
 
-- (void)_createVertexDescriptor {
-  _mtlVertexDescriptor = [[MTLVertexDescriptor alloc] init];
-
-  _mtlVertexDescriptor.attributes[VertexAttributePosition].format =
-      MTLVertexFormatFloat3;
-  _mtlVertexDescriptor.attributes[VertexAttributePosition].offset = 0;
-  _mtlVertexDescriptor.attributes[VertexAttributePosition].bufferIndex =
-      BufferIndexMeshPosition;
-
-  _mtlVertexDescriptor.attributes[VertexAttributeTexcoord].format =
-      MTLVertexFormatFloat2; // TODO: compress
-  _mtlVertexDescriptor.attributes[VertexAttributeTexcoord].offset = 0;
-  _mtlVertexDescriptor.attributes[VertexAttributeTexcoord].bufferIndex =
-      BufferIndexMeshUV0;
-
-  _mtlVertexDescriptor.attributes[VertexAttributeNormal].format =
-      MTLVertexFormatFloat3; // TODO: compress
-  _mtlVertexDescriptor.attributes[VertexAttributeNormal].offset = 0;
-  _mtlVertexDescriptor.attributes[VertexAttributeNormal].bufferIndex =
-      BufferIndexMeshNormal;
-
-  _mtlVertexDescriptor.attributes[VertexAttributeTangent].format =
-      MTLVertexFormatFloat4; // TODO: compress
-  _mtlVertexDescriptor.attributes[VertexAttributeTangent].offset = 0;
-  _mtlVertexDescriptor.attributes[VertexAttributeTangent].bufferIndex =
-      BufferIndexMeshTangent;
-
-  //_mtlVertexDescriptor.layouts[BufferIndexMeshPosition].stepRate = 1;
-  //_mtlVertexDescriptor.layouts[BufferIndexMeshPosition].stepFunction =
-  //MTLVertexStepFunctionPerVertex;
-
-  _mtlVertexDescriptor.layouts[BufferIndexMeshPosition].stride = 3 * 4;
-  _mtlVertexDescriptor.layouts[BufferIndexMeshUV0].stride = 2 * 4;
-  _mtlVertexDescriptor.layouts[BufferIndexMeshNormal].stride = 3 * 4;
-  _mtlVertexDescriptor.layouts[BufferIndexMeshTangent].stride = 4 * 4;
-
-  //-----------------------
-  // for ModelIO
-  _mdlVertexDescriptor =
-      MTKModelIOVertexDescriptorFromMetal(_mtlVertexDescriptor);
-
-  _mdlVertexDescriptor.attributes[VertexAttributePosition].name =
-      MDLVertexAttributePosition;
-  _mdlVertexDescriptor.attributes[VertexAttributeTexcoord].name =
-      MDLVertexAttributeTextureCoordinate;
-  _mdlVertexDescriptor.attributes[VertexAttributeNormal].name =
-      MDLVertexAttributeNormal;
-  _mdlVertexDescriptor.attributes[VertexAttributeTangent].name =
-      MDLVertexAttributeTangent;
-}
+    MTLDepthStencilDescriptor *depthStateDesc =
+        [[MTLDepthStencilDescriptor alloc] init];
+    depthStateDesc.depthCompareFunction = _showSettings->isReverseZ
+                                              ? MTLCompareFunctionGreaterEqual
+                                              : MTLCompareFunctionLessEqual;
+    depthStateDesc.depthWriteEnabled = YES;
+    _depthStateFull = [_device newDepthStencilStateWithDescriptor:depthStateDesc];
 
-- (void)_loadMetalWithView:(nonnull MTKView *)view {
-  /// Load Metal state objects and initialize renderer dependent view properties
+    depthStateDesc.depthCompareFunction = _showSettings->isReverseZ
+                                              ? MTLCompareFunctionGreaterEqual
+                                              : MTLCompareFunctionLessEqual;
+    depthStateDesc.depthWriteEnabled = NO;
+    _depthStateNone = [_device newDepthStencilStateWithDescriptor:depthStateDesc];
 
-  view.colorPixelFormat = MTLPixelFormatRGBA16Float;
-  view.depthStencilPixelFormat = MTLPixelFormatDepth32Float_Stencil8;
-  view.sampleCount = 1;
+    for (NSUInteger i = 0; i < MaxBuffersInFlight; i++) {
+        _dynamicUniformBuffer[i] =
+            [_device newBufferWithLength:sizeof(Uniforms)
+                                 options:MTLResourceStorageModeShared];
 
-  _viewFramebuffer.colorPixelFormat = view.colorPixelFormat;
-  _viewFramebuffer.depthStencilPixelFormat = view.depthStencilPixelFormat;
-  _viewFramebuffer.sampleCount = view.sampleCount;
+        _dynamicUniformBuffer[i].label = @"UniformBuffer";
+    }
 
-  [self _createVertexDescriptor];
+    _commandQueue = [_device newCommandQueue];
 
-  // first time use the default library, if reload is called then use different
-  // library
-  _shaderLibrary = [_device newDefaultLibrary];
+    [self _createSamplers];
 
-  [self _createRenderPipelines];
+    //-----------------------
 
-  //-----------------------
+    [self _createComputePipelines];
 
-  MTLDepthStencilDescriptor *depthStateDesc =
-      [[MTLDepthStencilDescriptor alloc] init];
-  depthStateDesc.depthCompareFunction = _showSettings->isReverseZ
-                                            ? MTLCompareFunctionGreaterEqual
-                                            : MTLCompareFunctionLessEqual;
-  depthStateDesc.depthWriteEnabled = YES;
-  _depthStateFull = [_device newDepthStencilStateWithDescriptor:depthStateDesc];
+    [self _createSampleRender];
+}
 
-  depthStateDesc.depthCompareFunction = _showSettings->isReverseZ
-                                            ? MTLCompareFunctionGreaterEqual
-                                            : MTLCompareFunctionLessEqual;
-  depthStateDesc.depthWriteEnabled = NO;
-  _depthStateNone = [_device newDepthStencilStateWithDescriptor:depthStateDesc];
+- (BOOL)hotloadShaders:(const char *)filename
+{
+    _metallibFileURL =
+        [NSURL fileURLWithPath:[NSString stringWithUTF8String:filename]];
 
-  for (NSUInteger i = 0; i < MaxBuffersInFlight; i++) {
-    _dynamicUniformBuffer[i] =
-        [_device newBufferWithLength:sizeof(Uniforms)
-                             options:MTLResourceStorageModeShared];
+    NSError *err = nil;
+    NSDate *fileDate = nil;
+    [_metallibFileURL getResourceValue:&fileDate
+                                forKey:NSURLContentModificationDateKey
+                                 error:&err];
 
-    _dynamicUniformBuffer[i].label = @"UniformBuffer";
-  }
+    // only reload if the metallib changed timestamp, otherwise default.metallib
+    // has most recent copy
+    if (err != nil || [_metallibFileDate isEqualToDate:fileDate]) {
+        return NO;
+    }
+    _metallibFileDate = fileDate;
 
-  _commandQueue = [_device newCommandQueue];
+    // Now dynamically load the metallib
+    NSData *dataNS = [NSData dataWithContentsOfURL:_metallibFileURL
+                                           options:NSDataReadingMappedIfSafe
+                                             error:&err];
+    if (dataNS == nil) {
+        return NO;
+    }
+    dispatch_data_t data = dispatch_data_create(dataNS.bytes, dataNS.length,
+                                                dispatch_get_main_queue(),
+                                                DISPATCH_DATA_DESTRUCTOR_DEFAULT);
 
-  [self _createSamplers];
+    id<MTLLibrary> shaderLibrary = [_device newLibraryWithData:data error:&err];
+    if (err != nil) {
+        return NO;
+    }
+    _shaderLibrary = shaderLibrary;
 
-  //-----------------------
+    // rebuild the shaders and pipelines that use the shader
+    [self _createRenderPipelines];
 
-  [self _createComputePipelines];
+    [self _createComputePipelines];
 
-  [self _createSampleRender];
-}
+    [self _createSampleRender];
 
-- (BOOL)hotloadShaders:(const char *)filename {
-  _metallibFileURL =
-      [NSURL fileURLWithPath:[NSString stringWithUTF8String:filename]];
-
-  NSError *err = nil;
-  NSDate *fileDate = nil;
-  [_metallibFileURL getResourceValue:&fileDate
-                              forKey:NSURLContentModificationDateKey
-                               error:&err];
-
-  // only reload if the metallib changed timestamp, otherwise default.metallib
-  // has most recent copy
-  if (err != nil || [_metallibFileDate isEqualToDate:fileDate]) {
-    return NO;
-  }
-  _metallibFileDate = fileDate;
-
-  // Now dynamically load the metallib
-  NSData *dataNS = [NSData dataWithContentsOfURL:_metallibFileURL
-                                         options:NSDataReadingMappedIfSafe
-                                           error:&err];
-  if (dataNS == nil) {
-    return NO;
-  }
-  dispatch_data_t data = dispatch_data_create(dataNS.bytes, dataNS.length,
-                                              dispatch_get_main_queue(),
-                                              DISPATCH_DATA_DESTRUCTOR_DEFAULT);
-
-  id<MTLLibrary> shaderLibrary = [_device newLibraryWithData:data error:&err];
-  if (err != nil) {
-    return NO;
-  }
-  _shaderLibrary = shaderLibrary;
-
-  // rebuild the shaders and pipelines that use the shader
-  [self _createRenderPipelines];
-
-  [self _createComputePipelines];
-
-  [self _createSampleRender];
-
-  return YES;
+    return YES;
 }
 
-- (id<MTLComputePipelineState>)_createComputePipeline:(const char *)name {
-  NSString *nameNS = [NSString stringWithUTF8String:name];
-  NSError *error = nil;
-  id<MTLFunction> computeFunction = [_shaderLibrary newFunctionWithName:nameNS];
+- (id<MTLComputePipelineState>)_createComputePipeline:(const char *)name
+{
+    NSString *nameNS = [NSString stringWithUTF8String:name];
+    NSError *error = nil;
+    id<MTLFunction> computeFunction = [_shaderLibrary newFunctionWithName:nameNS];
 
-  id<MTLComputePipelineState> pipe;
-  if (computeFunction) {
-    computeFunction.label = nameNS;
+    id<MTLComputePipelineState> pipe;
+    if (computeFunction) {
+        computeFunction.label = nameNS;
 
-    pipe = [_device newComputePipelineStateWithFunction:computeFunction
-                                                  error:&error];
-  }
+        pipe = [_device newComputePipelineStateWithFunction:computeFunction
+                                                      error:&error];
+    }
 
-  if (!pipe) {
-    KLOGE("kramv", "Failed to create compute pipeline state for %s, error %s",
-          name, error ? error.localizedDescription.UTF8String : "");
-    return nil;
-  }
+    if (!pipe) {
+        KLOGE("kramv", "Failed to create compute pipeline state for %s, error %s",
+              name, error ? error.localizedDescription.UTF8String : "");
+        return nil;
+    }
 
-  return pipe;
+    return pipe;
 }
 
-- (void)_createComputePipelines {
-  _pipelineStateImageCS = [self _createComputePipeline:"SampleImageCS"];
-  _pipelineStateImageArrayCS =
-      [self _createComputePipeline:"SampleImageArrayCS"];
-  _pipelineStateVolumeCS = [self _createComputePipeline:"SampleVolumeCS"];
-  _pipelineStateCubeCS = [self _createComputePipeline:"SampleCubeCS"];
-  _pipelineStateCubeArrayCS = [self _createComputePipeline:"SampleCubeArrayCS"];
-  _pipelineState1DArrayCS =
-      [self _createComputePipeline:"SampleImage1DArrayCS"];
+- (void)_createComputePipelines
+{
+    _pipelineStateImageCS = [self _createComputePipeline:"SampleImageCS"];
+    _pipelineStateImageArrayCS =
+        [self _createComputePipeline:"SampleImageArrayCS"];
+    _pipelineStateVolumeCS = [self _createComputePipeline:"SampleVolumeCS"];
+    _pipelineStateCubeCS = [self _createComputePipeline:"SampleCubeCS"];
+    _pipelineStateCubeArrayCS = [self _createComputePipeline:"SampleCubeArrayCS"];
+    _pipelineState1DArrayCS =
+        [self _createComputePipeline:"SampleImage1DArrayCS"];
 }
 
 - (id<MTLRenderPipelineState>)_createRenderPipeline:(const char *)vs
-                                                 fs:(const char *)fs {
-  NSString *vsNameNS = [NSString stringWithUTF8String:vs];
-  NSString *fsNameNS = [NSString stringWithUTF8String:fs];
+                                                 fs:(const char *)fs
+{
+    NSString *vsNameNS = [NSString stringWithUTF8String:vs];
+    NSString *fsNameNS = [NSString stringWithUTF8String:fs];
 
-  id<MTLFunction> vertexFunction;
-  id<MTLFunction> fragmentFunction;
+    id<MTLFunction> vertexFunction;
+    id<MTLFunction> fragmentFunction;
 
-  MTLRenderPipelineDescriptor *pipelineStateDescriptor =
-      [[MTLRenderPipelineDescriptor alloc] init];
-  pipelineStateDescriptor.label = fsNameNS;
-  pipelineStateDescriptor.sampleCount = _viewFramebuffer.sampleCount;
-  pipelineStateDescriptor.vertexDescriptor = _mtlVertexDescriptor;
-  pipelineStateDescriptor.colorAttachments[0].pixelFormat =
-      _viewFramebuffer.colorPixelFormat;
+    MTLRenderPipelineDescriptor *pipelineStateDescriptor =
+        [[MTLRenderPipelineDescriptor alloc] init];
+    pipelineStateDescriptor.label = fsNameNS;
+    pipelineStateDescriptor.sampleCount = _viewFramebuffer.sampleCount;
+    pipelineStateDescriptor.vertexDescriptor = _mtlVertexDescriptor;
+    pipelineStateDescriptor.colorAttachments[0].pixelFormat =
+        _viewFramebuffer.colorPixelFormat;
 
-  // TODO: could drop these for images, but want a 3D preview of content
-  // or might make these memoryless.
-  pipelineStateDescriptor.depthAttachmentPixelFormat =
-      _viewFramebuffer.depthStencilPixelFormat;
-  pipelineStateDescriptor.stencilAttachmentPixelFormat =
-      _viewFramebuffer.depthStencilPixelFormat;
+    // TODO: could drop these for images, but want a 3D preview of content
+    // or might make these memoryless.
+    pipelineStateDescriptor.depthAttachmentPixelFormat =
+        _viewFramebuffer.depthStencilPixelFormat;
+    pipelineStateDescriptor.stencilAttachmentPixelFormat =
+        _viewFramebuffer.depthStencilPixelFormat;
 
-  NSError *error = NULL;
+    NSError *error = NULL;
 
-  //-----------------------
+    //-----------------------
 
-  vertexFunction = [_shaderLibrary newFunctionWithName:vsNameNS];
-  fragmentFunction = [_shaderLibrary newFunctionWithName:fsNameNS];
+    vertexFunction = [_shaderLibrary newFunctionWithName:vsNameNS];
+    fragmentFunction = [_shaderLibrary newFunctionWithName:fsNameNS];
 
-  id<MTLRenderPipelineState> pipe;
+    id<MTLRenderPipelineState> pipe;
 
-  if (vertexFunction && fragmentFunction) {
-    vertexFunction.label = vsNameNS;
-    fragmentFunction.label = fsNameNS;
+    if (vertexFunction && fragmentFunction) {
+        vertexFunction.label = vsNameNS;
+        fragmentFunction.label = fsNameNS;
 
-    pipelineStateDescriptor.vertexFunction = vertexFunction;
-    pipelineStateDescriptor.fragmentFunction = fragmentFunction;
+        pipelineStateDescriptor.vertexFunction = vertexFunction;
+        pipelineStateDescriptor.fragmentFunction = fragmentFunction;
 
-    pipe = [_device newRenderPipelineStateWithDescriptor:pipelineStateDescriptor
-                                                   error:&error];
-  }
+        pipe = [_device newRenderPipelineStateWithDescriptor:pipelineStateDescriptor
+                                                       error:&error];
+    }
 
-  if (!pipe) {
-    KLOGE("kramv", "Failed to create render pipeline state for %s, error %s",
-          fs, error ? error.description.UTF8String : "");
-    return nil;
-  }
+    if (!pipe) {
+        KLOGE("kramv", "Failed to create render pipeline state for %s, error %s",
+              fs, error ? error.description.UTF8String : "");
+        return nil;
+    }
 
-  return pipe;
+    return pipe;
 }
 
-- (void)_createRenderPipelines {
-  _pipelineStateImage = [self _createRenderPipeline:"DrawImageVS"
-                                                 fs:"DrawImagePS"];
-  _pipelineStateImageArray = [self _createRenderPipeline:"DrawImageVS"
-                                                      fs:"DrawImageArrayPS"];
-  _pipelineState1DArray = [self _createRenderPipeline:"DrawImageVS"
-                                                   fs:"Draw1DArrayPS"];
-  _pipelineStateCube = [self _createRenderPipeline:"DrawCubeVS"
-                                                fs:"DrawCubePS"];
-  _pipelineStateCubeArray = [self _createRenderPipeline:"DrawCubeVS"
-                                                     fs:"DrawCubeArrayPS"];
-  _pipelineStateVolume = [self _createRenderPipeline:"DrawVolumeVS"
-                                                  fs:"DrawVolumePS"];
+- (void)_createRenderPipelines
+{
+    _pipelineStateImage = [self _createRenderPipeline:"DrawImageVS"
+                                                   fs:"DrawImagePS"];
+    _pipelineStateImageArray = [self _createRenderPipeline:"DrawImageVS"
+                                                        fs:"DrawImageArrayPS"];
+    _pipelineState1DArray = [self _createRenderPipeline:"DrawImageVS"
+                                                     fs:"Draw1DArrayPS"];
+    _pipelineStateCube = [self _createRenderPipeline:"DrawCubeVS"
+                                                  fs:"DrawCubePS"];
+    _pipelineStateCubeArray = [self _createRenderPipeline:"DrawCubeVS"
+                                                       fs:"DrawCubeArrayPS"];
+    _pipelineStateVolume = [self _createRenderPipeline:"DrawVolumeVS"
+                                                    fs:"DrawVolumePS"];
 }
 
-- (void)_createSampleRender {
-  {
-    // writing to this texture
-    MTLTextureDescriptor *textureDesc = [MTLTextureDescriptor
-        texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA32Float
-                                     width:1
-                                    height:1
-                                 mipmapped:NO];
-
-    textureDesc.usage = MTLTextureUsageShaderWrite | MTLTextureUsageShaderRead;
-    textureDesc.storageMode = MTLStorageModeManaged;
-    _sampleComputeTex = [_device newTextureWithDescriptor:textureDesc];
-  }
-
-  {
-    // this must match drawable format due to using a blit to copy pixel out of
-    // drawable
-    MTLTextureDescriptor *textureDesc = [MTLTextureDescriptor
-        texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA16Float
-                                     width:1
-                                    height:1
-                                 mipmapped:NO];
-    // textureDesc.usage = MTLTextureUsageShaderWrite |
-    // MTLTextureUsageShaderRead;
-    textureDesc.storageMode = MTLStorageModeManaged;
-
-    _sampleRenderTex = [_device newTextureWithDescriptor:textureDesc];
-  }
+- (void)_createSampleRender
+{
+    {
+        // writing to this texture
+        MTLTextureDescriptor *textureDesc = [MTLTextureDescriptor
+            texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA32Float
+                                         width:1
+                                        height:1
+                                     mipmapped:NO];
+
+        textureDesc.usage = MTLTextureUsageShaderWrite | MTLTextureUsageShaderRead;
+        textureDesc.storageMode = MTLStorageModeManaged;
+        _sampleComputeTex = [_device newTextureWithDescriptor:textureDesc];
+    }
+
+    {
+        // this must match drawable format due to using a blit to copy pixel out of
+        // drawable
+        MTLTextureDescriptor *textureDesc = [MTLTextureDescriptor
+            texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA16Float
+                                         width:1
+                                        height:1
+                                     mipmapped:NO];
+        // textureDesc.usage = MTLTextureUsageShaderWrite |
+        // MTLTextureUsageShaderRead;
+        textureDesc.storageMode = MTLStorageModeManaged;
+
+        _sampleRenderTex = [_device newTextureWithDescriptor:textureDesc];
+    }
 }
 
 - (MTKMesh *)_createMeshAsset:(const char *)name
                       mdlMesh:(MDLMesh *)mdlMesh
-                     doFlipUV:(bool)doFlipUV {
-  NSError *error = nil;
+                     doFlipUV:(bool)doFlipUV
+{
+    NSError *error = nil;
 
-  mdlMesh.vertexDescriptor = _mdlVertexDescriptor;
+    mdlMesh.vertexDescriptor = _mdlVertexDescriptor;
 
-  // ModelIO has the uv going counterclockwise on sphere/cylinder, but not on
-  // the box. And it also has a flipped bitangent.w.
+    // ModelIO has the uv going counterclockwise on sphere/cylinder, but not on
+    // the box. And it also has a flipped bitangent.w.
 
-  // flip the u coordinate
-  if (doFlipUV) {
-    id<MDLMeshBuffer> uvs = mdlMesh.vertexBuffers[BufferIndexMeshUV0];
-    MDLMeshBufferMap *uvsMap = [uvs map];
+    // flip the u coordinate
+    if (doFlipUV) {
+        id<MDLMeshBuffer> uvs = mdlMesh.vertexBuffers[BufferIndexMeshUV0];
+        MDLMeshBufferMap *uvsMap = [uvs map];
 
-    packed_float2 *uvData = (packed_float2 *)uvsMap.bytes;
+        packed_float2 *uvData = (packed_float2 *)uvsMap.bytes;
 
-    for (uint32_t i = 0; i < mdlMesh.vertexCount; ++i) {
-      auto &uv = uvData[i];
+        for (uint32_t i = 0; i < mdlMesh.vertexCount; ++i) {
+            auto &uv = uvData[i];
 
-      uv.x = 1.0f - uv.x;
+            uv.x = 1.0f - uv.x;
+        }
     }
-  }
-
-  [mdlMesh
-    addOrthTanBasisForTextureCoordinateAttributeNamed:
-        MDLVertexAttributeTextureCoordinate
-                                 normalAttributeNamed:MDLVertexAttributeNormal
-                                tangentAttributeNamed:
-                                    MDLVertexAttributeTangent];
-
-  // DONE: flip the bitangent.w sign here, and remove the flip in the shader
-  bool doFlipBitangent = true;
-  if (doFlipBitangent) {
-    id<MDLMeshBuffer> uvs = mdlMesh.vertexBuffers[BufferIndexMeshTangent];
-    MDLMeshBufferMap *uvsMap = [uvs map];
-    packed_float4 *uvData = (packed_float4 *)uvsMap.bytes;
-
-    for (uint32_t i = 0; i < mdlMesh.vertexCount; ++i) {
-      //            if (uvData[i].w != -1.0f && uvData[i].w != 1.0f) {
-      //                int bp = 0;
-      //                bp = bp;
-      //            }
-
-      uvData[i].w = -uvData[i].w;
+
+    [mdlMesh
+        addOrthTanBasisForTextureCoordinateAttributeNamed:
+            MDLVertexAttributeTextureCoordinate
+                                     normalAttributeNamed:MDLVertexAttributeNormal
+                                    tangentAttributeNamed:
+                                        MDLVertexAttributeTangent];
+
+    // DONE: flip the bitangent.w sign here, and remove the flip in the shader
+    bool doFlipBitangent = true;
+    if (doFlipBitangent) {
+        id<MDLMeshBuffer> uvs = mdlMesh.vertexBuffers[BufferIndexMeshTangent];
+        MDLMeshBufferMap *uvsMap = [uvs map];
+        packed_float4 *uvData = (packed_float4 *)uvsMap.bytes;
+
+        for (uint32_t i = 0; i < mdlMesh.vertexCount; ++i) {
+            //            if (uvData[i].w != -1.0f && uvData[i].w != 1.0f) {
+            //                int bp = 0;
+            //                bp = bp;
+            //            }
+
+            uvData[i].w = -uvData[i].w;
+        }
     }
-  }
-
-  // now set it into mtk mesh
-  MTKMesh *mesh = [[MTKMesh alloc] initWithMesh:mdlMesh
-                                         device:_device
-                                          error:&error];
-  mesh.name = [NSString stringWithUTF8String:name];
-
-  // these range names may only show up when looking at geometry in capture
-  // These don't seem to appear as the buffer name that is suballocated from
-  {
-    // name the vertex range on the vb
-    MTKMeshBuffer *pos = mesh.vertexBuffers[BufferIndexMeshPosition];
-    MTKMeshBuffer *uvs = mesh.vertexBuffers[BufferIndexMeshUV0];
-    MTKMeshBuffer *normals = mesh.vertexBuffers[BufferIndexMeshNormal];
-    MTKMeshBuffer *tangents = mesh.vertexBuffers[BufferIndexMeshTangent];
-
-    [pos.buffer addDebugMarker:@"Pos"
-                         range:NSMakeRange(pos.offset, pos.length)];
-    [uvs.buffer addDebugMarker:@"UV" range:NSMakeRange(uvs.offset, uvs.length)];
-    [normals.buffer addDebugMarker:@"Nor"
-                             range:NSMakeRange(normals.offset, normals.length)];
-    [tangents.buffer
-        addDebugMarker:@"Tan"
-                 range:NSMakeRange(tangents.offset, tangents.length)];
-
-    // This seems to already be named "ellisoid-Indices",
-    // need to do for ib as well
-    for (MTKSubmesh *submesh in mesh.submeshes) {
-      [submesh.indexBuffer.buffer
-          addDebugMarker:mesh.name
-                   range:NSMakeRange(submesh.indexBuffer.offset,
-                                     submesh.indexBuffer.length)];
+
+    // now set it into mtk mesh
+    MTKMesh *mesh = [[MTKMesh alloc] initWithMesh:mdlMesh
+                                           device:_device
+                                            error:&error];
+    mesh.name = [NSString stringWithUTF8String:name];
+
+    // these range names may only show up when looking at geometry in capture
+    // These don't seem to appear as the buffer name that is suballocated from
+    {
+        // name the vertex range on the vb
+        MTKMeshBuffer *pos = mesh.vertexBuffers[BufferIndexMeshPosition];
+        MTKMeshBuffer *uvs = mesh.vertexBuffers[BufferIndexMeshUV0];
+        MTKMeshBuffer *normals = mesh.vertexBuffers[BufferIndexMeshNormal];
+        MTKMeshBuffer *tangents = mesh.vertexBuffers[BufferIndexMeshTangent];
+
+        [pos.buffer addDebugMarker:@"Pos"
+                             range:NSMakeRange(pos.offset, pos.length)];
+        [uvs.buffer addDebugMarker:@"UV" range:NSMakeRange(uvs.offset, uvs.length)];
+        [normals.buffer addDebugMarker:@"Nor"
+                                 range:NSMakeRange(normals.offset, normals.length)];
+        [tangents.buffer
+            addDebugMarker:@"Tan"
+                     range:NSMakeRange(tangents.offset, tangents.length)];
+
+        // This seems to already be named "ellisoid-Indices",
+        // need to do for ib as well
+        for (MTKSubmesh *submesh in mesh.submeshes) {
+            [submesh.indexBuffer.buffer
+                addDebugMarker:mesh.name
+                         range:NSMakeRange(submesh.indexBuffer.offset,
+                                           submesh.indexBuffer.length)];
+        }
     }
-  }
 
-  if (!mesh || error) {
-    KLOGE("kramv", "Error creating MetalKit mesh %s",
-          error.localizedDescription.UTF8String);
-    return nil;
-  }
+    if (!mesh || error) {
+        KLOGE("kramv", "Error creating MetalKit mesh %s",
+              error.localizedDescription.UTF8String);
+        return nil;
+    }
 
-  return mesh;
+    return mesh;
 }
 
 // why isn't this defined in simd lib?
 struct packed_float3 {
-  float x, y, z;
+    float x, y, z;
 };
 
-- (void)_loadAssets {
-  /// Load assets into metal objects
-
-  MDLMesh *mdlMesh;
+- (void)_loadAssets
+{
+    /// Load assets into metal objects
+
+    MDLMesh *mdlMesh;
+
+    mdlMesh = [MDLMesh newBoxWithDimensions:(vector_float3){1, 1, 1}
+                                   segments:(vector_uint3){1, 1, 1}
+                               geometryType:MDLGeometryTypeTriangles
+                              inwardNormals:NO
+                                  allocator:_metalAllocator];
+
+    _meshBox = [self _createMeshAsset:"MeshBox" mdlMesh:mdlMesh doFlipUV:false];
+
+    // The sphere/cylinder shapes are v increasing in -Y, and u increasing
+    // conterclockwise, u is the opposite direction to the cube/plane, so need to
+    // flip those coords I think this has also flipped the tangents the wrong way,
+    // but building tangents after flipping u direction doesn't flip the
+    // bitangent.  So bitangent.w is flipped. For sanity, Tangent is increasing u,
+    // and Bitangent is increasing v.
+
+    // All prims are viewed with +Y, not +Z up
+
+    mdlMesh = [MDLMesh newEllipsoidWithRadii:(vector_float3){0.5, 0.5, 0.5}
+                              radialSegments:16
+                            verticalSegments:16
+                                geometryType:MDLGeometryTypeTriangles
+                               inwardNormals:NO
+                                  hemisphere:NO
+                                   allocator:_metalAllocator];
+
+    float angle = M_PI * 0.5;
+    float2 cosSin = float2m(cos(angle), sin(angle));
+
+    {
+        mdlMesh.vertexDescriptor = _mdlVertexDescriptor;
+
+        id<MDLMeshBuffer> posBuffer =
+            mdlMesh.vertexBuffers[BufferIndexMeshPosition];
+        MDLMeshBufferMap *posMap = [posBuffer map];
+        packed_float3 *posData = (packed_float3 *)posMap.bytes;
+
+        id<MDLMeshBuffer> normalBuffer =
+            mdlMesh.vertexBuffers[BufferIndexMeshNormal];
+        MDLMeshBufferMap *normalsMap = [normalBuffer map];
+        packed_float3 *normalData = (packed_float3 *)normalsMap.bytes;
+
+        // vertexCount reports 306, but vertex 289+ are garbage
+        uint32_t numVertices = 289;  // mdlMesh.vertexCount
+
+        for (uint32_t i = 0; i < numVertices; ++i) {
+            {
+                auto &pos = posData[i];
+
+                // dumb rotate about Y-axis
+                auto copy = pos;
+
+                pos.x = copy.x * cosSin.x - copy.z * cosSin.y;
+                pos.z = copy.x * cosSin.y + copy.z * cosSin.x;
+            }
+
+            {
+                auto &normal = normalData[i];
+                auto copy = normal;
+                normal.x = copy.x * cosSin.x - copy.z * cosSin.y;
+                normal.z = copy.x * cosSin.y + copy.z * cosSin.x;
+            }
+        }
 
-  mdlMesh = [MDLMesh newBoxWithDimensions:(vector_float3){1, 1, 1}
-                                 segments:(vector_uint3){1, 1, 1}
-                             geometryType:MDLGeometryTypeTriangles
-                            inwardNormals:NO
-                                allocator:_metalAllocator];
+        // Hack - knock out all bogus tris from ModelIO that lead to garbage tris
+        for (uint32_t i = numVertices; i < mdlMesh.vertexCount; ++i) {
+            auto &pos = posData[i];
+            pos.x = NAN;
+        }
+    }
 
-  _meshBox = [self _createMeshAsset:"MeshBox" mdlMesh:mdlMesh doFlipUV:false];
+    _meshSphere = [self _createMeshAsset:"MeshSphere"
+                                 mdlMesh:mdlMesh
+                                doFlipUV:true];
+
+    mdlMesh = [MDLMesh newEllipsoidWithRadii:(vector_float3){0.5, 0.5, 0.5}
+                              radialSegments:16
+                            verticalSegments:16
+                                geometryType:MDLGeometryTypeTriangles
+                               inwardNormals:NO
+                                  hemisphere:NO
+                                   allocator:_metalAllocator];
+
+    // ModelIO has the uv going counterclockwise on sphere/cylinder, but not on
+    // the box. And it also has a flipped bitangent.w.
+
+    // flip the u coordinate
+    bool doFlipUV = true;
+    if (doFlipUV) {
+        mdlMesh.vertexDescriptor = _mdlVertexDescriptor;
+
+        id<MDLMeshBuffer> uvsBuffer = mdlMesh.vertexBuffers[BufferIndexMeshUV0];
+        MDLMeshBufferMap *uvsMap = [uvsBuffer map];
+        packed_float2 *uvData = (packed_float2 *)uvsMap.bytes;
+
+        // this is all aos
+
+        id<MDLMeshBuffer> posBuffer =
+            mdlMesh.vertexBuffers[BufferIndexMeshPosition];
+        MDLMeshBufferMap *posMap = [posBuffer map];
+        packed_float3 *posData = (packed_float3 *)posMap.bytes;
+
+        id<MDLMeshBuffer> normalsBuffe =
+            mdlMesh.vertexBuffers[BufferIndexMeshNormal];
+        MDLMeshBufferMap *normalsMap = [normalsBuffe map];
+        packed_float3 *normalData = (packed_float3 *)normalsMap.bytes;
+
+        // vertexCount reports 306, but vertex 289+ are garbage
+        uint32_t numVertices = 289;  // mdlMesh.vertexCount
+
+        for (uint32_t i = 0; i < numVertices; ++i) {
+            {
+                auto &pos = posData[i];
+
+                // dumb rotate about Y-axis
+                auto copy = pos;
+                pos.x = copy.x * cosSin.x - copy.z * cosSin.y;
+                pos.z = copy.x * cosSin.y + copy.z * cosSin.x;
+            }
+
+            {
+                auto &normal = normalData[i];
+                auto copy = normal;
+                normal.x = copy.x * cosSin.x - copy.z * cosSin.y;
+                normal.z = copy.x * cosSin.y + copy.z * cosSin.x;
+            }
+
+            auto &uv = uvData[i];
+
+            //            if (uv.x < 0.0 || uv.x > 1.0) {
+            //                int bp = 0;
+            //                bp = bp;
+            //            }
+
+            // this makes it counterclockwise 0 to 1
+            float x = uv.x;
+
+            x = 1.0f - x;
+
+            // -1 to 1 counterclockwise
+            x = 2.0f * x - 1.0f;
+
+            if (x <= 0) {
+                // now -1 to 0 is 0 to 1 clockwise with 1 in back
+                x = 1.0f + x;
+            }
+            else {
+                // 0 to 1, now 1 to 0 with 1 in back
+                x = 1.0f - x;
+            }
+
+            uv.x = x;
+        }
 
-  // The sphere/cylinder shapes are v increasing in -Y, and u increasing
-  // conterclockwise, u is the opposite direction to the cube/plane, so need to
-  // flip those coords I think this has also flipped the tangents the wrong way,
-  // but building tangents after flipping u direction doesn't flip the
-  // bitangent.  So bitangent.w is flipped. For sanity, Tangent is increasing u,
-  // and Bitangent is increasing v.
+        // Hack - knock out all bogus tris from ModelIO that lead to garbage tris
+        for (uint32_t i = numVertices; i < mdlMesh.vertexCount; ++i) {
+            auto &pos = posData[i];
+            pos.x = NAN;
+        }
 
-  // All prims are viewed with +Y, not +Z up
+        // TODO: may need to flip tangent on the inverted side
+        // otherwise lighting is just wrong, but tangents generated in
+        // _createMeshAsset move that here, and flip the tangents in the loop
+    }
 
-  mdlMesh = [MDLMesh newEllipsoidWithRadii:(vector_float3){0.5, 0.5, 0.5}
-                            radialSegments:16
-                          verticalSegments:16
-                              geometryType:MDLGeometryTypeTriangles
-                             inwardNormals:NO
-                                hemisphere:NO
-                                 allocator:_metalAllocator];
+    _meshSphereMirrored = [self _createMeshAsset:"MeshSphereMirrored"
+                                         mdlMesh:mdlMesh
+                                        doFlipUV:false];
+
+    // this maps 1/3rd of texture to the caps, and just isn't a very good uv
+    // mapping, using capsule instead
+    //    mdlMesh = [MDLMesh newCylinderWithHeight:1.0
+    //                                       radii:(vector_float2){0.5, 0.5}
+    //                                            radialSegments:16
+    //                                        verticalSegments:1
+    //                                        geometryType:MDLGeometryTypeTriangles
+    //                                       inwardNormals:NO
+    //                                           allocator:_metalAllocator];
+    //
+    //    _meshCylinder = [self _createMeshAsset:"MeshCylinder" mdlMesh:mdlMesh
+    //    doFlipUV:true];
+
+    mdlMesh = [MDLMesh newCapsuleWithHeight:1.0
+                                      radii:(vector_float2){0.5, 0.25}
+                             // vertical cap subtracted from height
+                             radialSegments:16
+                           verticalSegments:1
+                         hemisphereSegments:16
+                               geometryType:MDLGeometryTypeTriangles
+                              inwardNormals:NO
+                                  allocator:_metalAllocator];
+
+    _meshCapsule = [self _createMeshAsset:"MeshCapsule"
+                                  mdlMesh:mdlMesh
+                                 doFlipUV:true];
 
-  float angle = M_PI * 0.5;
-  float2 cosSin = float2m(cos(angle), sin(angle));
+    _mesh = _meshBox;
+}
 
-  {
-    mdlMesh.vertexDescriptor = _mdlVertexDescriptor;
+- (BOOL)loadTextureFromImage:(nonnull const char *)fullFilenameString
+                   timestamp:(double)timestamp
+                       image:(kram::KTXImage &)image
+                 imageNormal:(kram::KTXImage *)imageNormal
+                   isArchive:(BOOL)isArchive
+{
+    // image can be decoded to rgba8u if platform can't display format natively
+    // but still want to identify blockSize from original format
+    string fullFilename = fullFilenameString;
+
+    // Note that modstamp can change, but content data hash may be the same
+    bool isNewFile = (fullFilename != _showSettings->lastFilename);
+    bool isTextureChanged =
+        isNewFile || (timestamp != _showSettings->lastTimestamp);
+
+    if (isTextureChanged) {
+        // synchronously cpu upload from ktx file to buffer, with eventual gpu blit
+        // from buffer to returned texture.  TODO: If buffer is full, then something
+        // needs to keep KTXImage and data alive.  This load may also decode the
+        // texture to RGBA8.
+
+        MTLPixelFormat originalFormatMTL = MTLPixelFormatInvalid;
+        id<MTLTexture> texture = [_loader loadTextureFromImage:image
+                                                originalFormat:&originalFormatMTL];
+        if (!texture) {
+            return NO;
+        }
 
-    id<MDLMeshBuffer> posBuffer =
-        mdlMesh.vertexBuffers[BufferIndexMeshPosition];
-    MDLMeshBufferMap *posMap = [posBuffer map];
-    packed_float3 *posData = (packed_float3 *)posMap.bytes;
+        // hacking in the normal texture here, so can display them together during
+        // preview
+        id<MTLTexture> normalTexture;
+        if (imageNormal) {
+            normalTexture = [_loader loadTextureFromImage:*imageNormal
+                                           originalFormat:nil];
+            if (!normalTexture) {
+                return NO;
+            }
+        }
 
-    id<MDLMeshBuffer> normalBuffer =
-        mdlMesh.vertexBuffers[BufferIndexMeshNormal];
-    MDLMeshBufferMap *normalsMap = [normalBuffer map];
-    packed_float3 *normalData = (packed_float3 *)normalsMap.bytes;
+        // if archive contained png, then it's been converted to ktx
+        // so the info below may not reflect original data
+        // Would need original png data to look at header
+        // This is only info on image, not on imageNormal
 
-    // vertexCount reports 306, but vertex 289+ are garbage
-    uint32_t numVertices = 289; // mdlMesh.vertexCount
+        bool isPNG = isPNGFilename(fullFilename.c_str());
+        if (!isArchive && isPNG) {
+            _showSettings->imageInfo = kramInfoToString(fullFilename, false);
+            _showSettings->imageInfoVerbose = kramInfoToString(fullFilename, true);
+        }
+        else {
+            _showSettings->imageInfo =
+                kramInfoKTXToString(fullFilename, image, false);
+            _showSettings->imageInfoVerbose =
+                kramInfoKTXToString(fullFilename, image, true);
+        }
 
-    for (uint32_t i = 0; i < numVertices; ++i) {
-      {
-        auto &pos = posData[i];
+        _showSettings->originalFormat = (MyMTLPixelFormat)originalFormatMTL;
+        _showSettings->decodedFormat = (MyMTLPixelFormat)texture.pixelFormat;
 
-        // dumb rotate about Y-axis
-        auto copy = pos;
+        _showSettings->lastFilename = fullFilename;
+        _showSettings->lastTimestamp = timestamp;
 
-        pos.x = copy.x * cosSin.x - copy.z * cosSin.y;
-        pos.z = copy.x * cosSin.y + copy.z * cosSin.x;
-      }
+        @autoreleasepool {
+            _colorMap = texture;
+            _normalMap = normalTexture;
+        }
 
-      {
-        auto &normal = normalData[i];
-        auto copy = normal;
-        normal.x = copy.x * cosSin.x - copy.z * cosSin.y;
-        normal.z = copy.x * cosSin.y + copy.z * cosSin.x;
-      }
+        [self updateImageSettings:fullFilename
+                            image:image];
     }
 
-    // Hack - knock out all bogus tris from ModelIO that lead to garbage tris
-    for (uint32_t i = numVertices; i < mdlMesh.vertexCount; ++i) {
-      auto &pos = posData[i];
-      pos.x = NAN;
-    }
-  }
-
-  _meshSphere = [self _createMeshAsset:"MeshSphere"
-                               mdlMesh:mdlMesh
-                              doFlipUV:true];
-
-  mdlMesh = [MDLMesh newEllipsoidWithRadii:(vector_float3){0.5, 0.5, 0.5}
-                            radialSegments:16
-                          verticalSegments:16
-                              geometryType:MDLGeometryTypeTriangles
-                             inwardNormals:NO
-                                hemisphere:NO
-                                 allocator:_metalAllocator];
-
-  // ModelIO has the uv going counterclockwise on sphere/cylinder, but not on
-  // the box. And it also has a flipped bitangent.w.
-
-  // flip the u coordinate
-  bool doFlipUV = true;
-  if (doFlipUV) {
-    mdlMesh.vertexDescriptor = _mdlVertexDescriptor;
+    [self resetSomeImageSettings:isNewFile];
 
-    id<MDLMeshBuffer> uvsBuffer = mdlMesh.vertexBuffers[BufferIndexMeshUV0];
-    MDLMeshBufferMap *uvsMap = [uvsBuffer map];
-    packed_float2 *uvData = (packed_float2 *)uvsMap.bytes;
+    return YES;
+}
 
-    // this is all aos
+- (BOOL)loadTexture:(nonnull NSURL *)url
+{
+    string fullFilename = url.path.UTF8String;
+
+    // can use this to pull, or use fstat on FileHelper
+    NSDate *fileDate = nil;
+    NSError *error = nil;
+    [url getResourceValue:&fileDate
+                   forKey:NSURLContentModificationDateKey
+                    error:&error];
+
+    // DONE: tie this to url and modstamp differences
+    double timestamp = fileDate.timeIntervalSince1970;
+    bool isNewFile = (fullFilename != _showSettings->lastFilename);
+
+    bool isTextureChanged =
+        isNewFile || (timestamp != _showSettings->lastTimestamp);
+
+    // image can be decoded to rgba8u if platform can't display format natively
+    // but still want to identify blockSize from original format
+    if (isTextureChanged) {
+        // TODO: hold onto these, so can reference block data
+        KTXImage image;
+        KTXImageData imageData;
+
+        if (![_loader loadImageFromURL:url image:image imageData:imageData]) {
+            return NO;
+        }
 
-    id<MDLMeshBuffer> posBuffer =
-        mdlMesh.vertexBuffers[BufferIndexMeshPosition];
-    MDLMeshBufferMap *posMap = [posBuffer map];
-    packed_float3 *posData = (packed_float3 *)posMap.bytes;
+        MTLPixelFormat originalFormatMTL = MTLPixelFormatInvalid;
+        id<MTLTexture> texture = [_loader loadTextureFromImage:image
+                                                originalFormat:&originalFormatMTL];
+        if (!texture) {
+            return NO;
+        }
 
-    id<MDLMeshBuffer> normalsBuffe =
-        mdlMesh.vertexBuffers[BufferIndexMeshNormal];
-    MDLMeshBufferMap *normalsMap = [normalsBuffe map];
-    packed_float3 *normalData = (packed_float3 *)normalsMap.bytes;
+        // This doesn't look for or load corresponding normal map, but should
 
-    // vertexCount reports 306, but vertex 289+ are garbage
-    uint32_t numVertices = 289; // mdlMesh.vertexCount
+        // this is not the png data, but info on converted png to ktx level
+        // But this avoids loading the image 2 more times
+        // Size of png is very different than decompressed or recompressed ktx
+        bool isPNG = isPNGFilename(fullFilename.c_str());
+        if (isPNG) {
+            _showSettings->imageInfo = kramInfoToString(fullFilename, false);
+            _showSettings->imageInfoVerbose = kramInfoToString(fullFilename, true);
+        }
+        else {
+            _showSettings->imageInfo =
+                kramInfoKTXToString(fullFilename, image, false);
+            _showSettings->imageInfoVerbose =
+                kramInfoKTXToString(fullFilename, image, true);
+        }
 
-    for (uint32_t i = 0; i < numVertices; ++i) {
-      {
-        auto &pos = posData[i];
+        _showSettings->originalFormat = (MyMTLPixelFormat)originalFormatMTL;
+        _showSettings->decodedFormat = (MyMTLPixelFormat)texture.pixelFormat;
 
-        // dumb rotate about Y-axis
-        auto copy = pos;
-        pos.x = copy.x * cosSin.x - copy.z * cosSin.y;
-        pos.z = copy.x * cosSin.y + copy.z * cosSin.x;
-      }
+        _showSettings->lastFilename = fullFilename;
+        _showSettings->lastTimestamp = timestamp;
 
-      {
-        auto &normal = normalData[i];
-        auto copy = normal;
-        normal.x = copy.x * cosSin.x - copy.z * cosSin.y;
-        normal.z = copy.x * cosSin.y + copy.z * cosSin.x;
-      }
+        @autoreleasepool {
+            _colorMap = texture;
+            _normalMap = nil;
+        }
 
-      auto &uv = uvData[i];
+        [self updateImageSettings:fullFilename
+                            image:image];
+    }
 
-      //            if (uv.x < 0.0 || uv.x > 1.0) {
-      //                int bp = 0;
-      //                bp = bp;
-      //            }
+    [self resetSomeImageSettings:isNewFile];
 
-      // this makes it counterclockwise 0 to 1
-      float x = uv.x;
+    return YES;
+}
 
-      x = 1.0f - x;
+// only called on new or modstamp-changed image
+- (void)updateImageSettings:(const string &)fullFilename
+                      image:(KTXImage &)image
+{
+    // this is the actual format, may have been decoded
+    id<MTLTexture> texture = _colorMap;
+    MyMTLPixelFormat format = (MyMTLPixelFormat)texture.pixelFormat;
 
-      // -1 to 1 counterclockwise
-      x = 2.0f * x - 1.0f;
+    // format may be trancoded to gpu-friendly format
+    MyMTLPixelFormat originalFormat = image.pixelFormat;
 
-      if (x <= 0) {
-        // now -1 to 0 is 0 to 1 clockwise with 1 in back
-        x = 1.0f + x;
-      } else {
-        // 0 to 1, now 1 to 0 with 1 in back
-        x = 1.0f - x;
-      }
+    _showSettings->blockX = image.blockDims().x;
+    _showSettings->blockY = image.blockDims().y;
 
-      uv.x = x;
-    }
+    _showSettings->isSigned = isSignedFormat(format);
 
-    // Hack - knock out all bogus tris from ModelIO that lead to garbage tris
-    for (uint32_t i = numVertices; i < mdlMesh.vertexCount; ++i) {
-      auto &pos = posData[i];
-      pos.x = NAN;
+    string fullFilenameCopy = fullFilename;
+    string filename = toLower(fullFilenameCopy);
+
+    // set title to filename, chop this to just file+ext, not directory
+    string filenameShort = filename;
+    const char *filenameSlash = strrchr(filenameShort.c_str(), '/');
+    if (filenameSlash != nullptr) {
+        filenameShort = filenameSlash + 1;
     }
 
-    // TODO: may need to flip tangent on the inverted side
-    // otherwise lighting is just wrong, but tangents generated in
-    // _createMeshAsset move that here, and flip the tangents in the loop
-  }
-
-  _meshSphereMirrored = [self _createMeshAsset:"MeshSphereMirrored"
-                                       mdlMesh:mdlMesh
-                                      doFlipUV:false];
-
-  // this maps 1/3rd of texture to the caps, and just isn't a very good uv
-  // mapping, using capsule instead
-  //    mdlMesh = [MDLMesh newCylinderWithHeight:1.0
-  //                                       radii:(vector_float2){0.5, 0.5}
-  //                                            radialSegments:16
-  //                                        verticalSegments:1
-  //                                        geometryType:MDLGeometryTypeTriangles
-  //                                       inwardNormals:NO
-  //                                           allocator:_metalAllocator];
-  //
-  //    _meshCylinder = [self _createMeshAsset:"MeshCylinder" mdlMesh:mdlMesh
-  //    doFlipUV:true];
-
-  mdlMesh = [MDLMesh newCapsuleWithHeight:1.0
-                                    radii:(vector_float2){0.5, 0.25}
-                           // vertical cap subtracted from height
-                           radialSegments:16
-                         verticalSegments:1
-                       hemisphereSegments:16
-                             geometryType:MDLGeometryTypeTriangles
-                            inwardNormals:NO
-                                allocator:_metalAllocator];
-
-  _meshCapsule = [self _createMeshAsset:"MeshCapsule"
-                                mdlMesh:mdlMesh
-                               doFlipUV:true];
-
-  _mesh = _meshBox;
-}
+    // now chop off the extension
+    filenameShort = filenameShort.substr(0, filenameShort.find_last_of("."));
 
-- (BOOL)loadTextureFromImage:(nonnull const char *)fullFilenameString
-                   timestamp:(double)timestamp
-                       image:(kram::KTXImage &)image
-                 imageNormal:(kram::KTXImage *)imageNormal
-                   isArchive:(BOOL)isArchive {
-  // image can be decoded to rgba8u if platform can't display format natively
-  // but still want to identify blockSize from original format
-  string fullFilename = fullFilenameString;
-
-  // Note that modstamp can change, but content data hash may be the same
-  bool isNewFile = (fullFilename != _showSettings->lastFilename);
-  bool isTextureChanged =
-      isNewFile || (timestamp != _showSettings->lastTimestamp);
-
-  if (isTextureChanged) {
-    // synchronously cpu upload from ktx file to buffer, with eventual gpu blit
-    // from buffer to returned texture.  TODO: If buffer is full, then something
-    // needs to keep KTXImage and data alive.  This load may also decode the
-    // texture to RGBA8.
-
-    MTLPixelFormat originalFormatMTL = MTLPixelFormatInvalid;
-    id<MTLTexture> texture = [_loader loadTextureFromImage:image
-                                            originalFormat:&originalFormatMTL];
-    if (!texture) {
-      return NO;
-    }
+    bool isAlbedo = false;
+    bool isNormal = false;
+    bool isSDF = false;
 
-    // hacking in the normal texture here, so can display them together during
-    // preview
-    id<MTLTexture> normalTexture;
-    if (imageNormal) {
-      normalTexture = [_loader loadTextureFromImage:*imageNormal
-                                     originalFormat:nil];
-      if (!normalTexture) {
-        return NO;
-      }
-    }
+    // could cycle between rrr1 and r001.
+    int32_t numChannels = numChannelsOfFormat(originalFormat);
+    bool isSigned = isSignedFormat(originalFormat);
 
-    // if archive contained png, then it's been converted to ktx
-    // so the info below may not reflect original data
-    // Would need original png data to look at header
-    // This is only info on image, not on imageNormal
-
-    bool isPNG = isPNGFilename(fullFilename.c_str());
-    if (!isArchive && isPNG) {
-      _showSettings->imageInfo = kramInfoToString(fullFilename, false);
-      _showSettings->imageInfoVerbose = kramInfoToString(fullFilename, true);
-    } else {
-      _showSettings->imageInfo =
-          kramInfoKTXToString(fullFilename, image, false);
-      _showSettings->imageInfoVerbose =
-          kramInfoKTXToString(fullFilename, image, true);
+    // note that decoded textures are 3/4 channel even though they are normal/sdf
+    // originally, so test those first
+    if (numChannels == 2 || endsWith(filenameShort, "-n") ||
+        endsWith(filenameShort, "_normal")) {
+        isNormal = true;
+    }
+    else if ((numChannels == 1 && isSigned) ||
+             endsWith(filenameShort, "-sdf")) {
+        isSDF = true;
+    }
+    else if (numChannels == 3 || numChannels == 4 ||
+             endsWith(filenameShort, "-a") ||
+             endsWith(filenameShort, "_basecolor")) {
+        isAlbedo = true;
     }
 
-    _showSettings->originalFormat = (MyMTLPixelFormat)originalFormatMTL;
-    _showSettings->decodedFormat = (MyMTLPixelFormat)texture.pixelFormat;
+    _showSettings->isNormal = isNormal;
+    _showSettings->isSDF = isSDF;
 
-    _showSettings->lastFilename = fullFilename;
-    _showSettings->lastTimestamp = timestamp;
+    // textures are already premul, so don't need to premul in shader
+    // should really have 3 modes, unmul, default, premul
+    bool isPNG = isPNGFilename(filename.c_str());
 
-    @autoreleasepool {
-      _colorMap = texture;
-      _normalMap = normalTexture;
+    _showSettings->isPremul = false;
+    if (isAlbedo && isPNG) {
+        _showSettings->isPremul =
+            true;  // convert to premul in shader, so can see other channels
     }
+    else if (isNormal || isSDF) {
+        _showSettings->isPremul = false;
+    }
+
+    _showSettings->numChannels = numChannels;
 
-    [self updateImageSettings:fullFilename image:image];
-  }
+    // TODO: identify if texture holds normal data from the props
+    // have too many 4 channel normals that shouldn't swizzle like this
+    // kramTextures.py is using etc2rg on iOS for now, and not astc.
 
-  [self resetSomeImageSettings:isNewFile];
+    _showSettings->isSwizzleAGToRG = false;
 
-  return YES;
+    // For best sdf and normal reconstruct from ASTC or BC3, must use RRR1 and
+    // GGGR or RRRG BC1nm multiply r*a in the shader, but just use BC5 anymore.
+    //    if (isASTCFormat(originalFormat) && isNormal) {
+    //        // channels after = "ag01"
+    //        _showSettings->isSwizzleAGToRG = true;
+    //    }
+
+    // can derive these from texture queries
+    _showSettings->mipCount = (int32_t)image.header.numberOfMipmapLevels;
+    _showSettings->faceCount = (image.textureType == MyMTLTextureTypeCube ||
+                                image.textureType == MyMTLTextureTypeCubeArray)
+                                   ? 6
+                                   : 0;
+    _showSettings->arrayCount = (int32_t)image.header.numberOfArrayElements;
+    _showSettings->sliceCount = (int32_t)image.depth;
+
+    _showSettings->imageBoundsX = (int32_t)image.width;
+    _showSettings->imageBoundsY = (int32_t)image.height;
 }
 
-- (BOOL)loadTexture:(nonnull NSURL *)url {
-  string fullFilename = url.path.UTF8String;
+float zoom3D = 1.0f;
+
+- (void)resetSomeImageSettings:(BOOL)isNewFile
+{
+    // only reset these on new texture, but have to revalidate
+    if (isNewFile) {
+        // then can manipulate this after loading
+        _showSettings->mipNumber = 0;
+        _showSettings->faceNumber = 0;
+        _showSettings->arrayNumber = 0;
+        _showSettings->sliceNumber = 0;
+
+        _showSettings->channels = TextureChannels::ModeRGBA;
+
+        // wish could keep existing setting, but new texture might not
+        // be supported debugMode for new texture
+        _showSettings->debugMode = DebugMode::DebugModeNone;
+
+        _showSettings->shapeChannel = ShapeChannel::ShapeChannelNone;
+    }
+    else {
+        // reloaded file may have different limits
+        _showSettings->mipNumber =
+            std::min(_showSettings->mipNumber, _showSettings->mipCount);
+        _showSettings->faceNumber =
+            std::min(_showSettings->faceNumber, _showSettings->faceCount);
+        _showSettings->arrayNumber =
+            std::min(_showSettings->arrayNumber, _showSettings->arrayCount);
+        _showSettings->sliceNumber =
+            std::min(_showSettings->sliceNumber, _showSettings->sliceCount);
+    }
+
+    [self updateViewTransforms];
 
-  // can use this to pull, or use fstat on FileHelper
-  NSDate *fileDate = nil;
-  NSError *error = nil;
-  [url getResourceValue:&fileDate
-                 forKey:NSURLContentModificationDateKey
-                  error:&error];
+    // this controls viewMatrix (global to all visible textures)
+    _showSettings->panX = 0.0f;
+    _showSettings->panY = 0.0f;
 
-  // DONE: tie this to url and modstamp differences
-  double timestamp = fileDate.timeIntervalSince1970;
-  bool isNewFile = (fullFilename != _showSettings->lastFilename);
+    _showSettings->zoom = _showSettings->zoomFit;
 
-  bool isTextureChanged =
-      isNewFile || (timestamp != _showSettings->lastTimestamp);
+    // test rendering with inversion and mirroring and non-uniform scale
+    bool doInvertX = false;
+    bool doScaleX = false;
 
-  // image can be decoded to rgba8u if platform can't display format natively
-  // but still want to identify blockSize from original format
-  if (isTextureChanged) {
-    // TODO: hold onto these, so can reference block data
-    KTXImage image;
-    KTXImageData imageData;
+    // have one of these for each texture added to the viewer
+    float scaleX = MAX(1, _showSettings->imageBoundsX);
+    float scaleY = MAX(1, _showSettings->imageBoundsY);
+    float scaleZ = MAX(scaleX, scaleY);  // don't want 1.0f, or specular is all off
+                                         // due to extreme scale differences
 
-    if (![_loader loadImageFromURL:url image:image imageData:imageData]) {
-      return NO;
+    float tmpScaleX = scaleX;
+    if (doInvertX) {
+        tmpScaleX = -tmpScaleX;
+    }
+    if (doScaleX) {
+        tmpScaleX *= 2.0f;
     }
 
-    MTLPixelFormat originalFormatMTL = MTLPixelFormatInvalid;
-    id<MTLTexture> texture = [_loader loadTextureFromImage:image
-                                            originalFormat:&originalFormatMTL];
-    if (!texture) {
-      return NO;
+    _modelMatrix =
+        float4x4(float4m(tmpScaleX, scaleY, scaleZ, 1.0f));  // non uniform scale
+    _modelMatrix = _modelMatrix *
+                   matrix4x4_translation(0.0f, 0.0f, -1.0);  // set z=-1 unit back
+
+    // uniform scaled 3d primitive
+    float scale = MAX(scaleX, scaleY);
+
+    // store the zoom into thew view matrix
+    // fragment tangents seem to break down at high model scale due to precision
+    // differences between worldPos and uv
+    static bool useZoom3D = false;
+    if (useZoom3D) {
+        zoom3D = scale;  // * _showSettings->viewSizeX / 2.0f;
+        scale = 1.0;
     }
 
-    // This doesn't look for or load corresponding normal map, but should
-
-    // this is not the png data, but info on converted png to ktx level
-    // But this avoids loading the image 2 more times
-    // Size of png is very different than decompressed or recompressed ktx
-    bool isPNG = isPNGFilename(fullFilename.c_str());
-    if (isPNG) {
-      _showSettings->imageInfo = kramInfoToString(fullFilename, false);
-      _showSettings->imageInfoVerbose = kramInfoToString(fullFilename, true);
-    } else {
-      _showSettings->imageInfo =
-          kramInfoKTXToString(fullFilename, image, false);
-      _showSettings->imageInfoVerbose =
-          kramInfoKTXToString(fullFilename, image, true);
+    _modelMatrix3D = float4x4(float4m((doScaleX || doInvertX) ? tmpScaleX : scale,
+                                      scale, scale, 1.0f));  // uniform scale
+    _modelMatrix3D =
+        _modelMatrix3D *
+        matrix4x4_translation(0.0f, 0.0f, -1.0f);  // set z=-1 unit back
+}
+
+- (float4x4)computeImageTransform:(float)panX
+                             panY:(float)panY
+                             zoom:(float)zoom
+{
+    // translate
+    float4x4 panTransform = matrix4x4_translation(-panX, panY, 0.0);
+
+    // non-uniform scale is okay here, only affects ortho volume
+    // setting this to uniform zoom and object is not visible, zoom can be 20x in
+    // x and y
+    if (_showSettings->is3DView) {
+        zoom *= zoom3D;
     }
 
-    _showSettings->originalFormat = (MyMTLPixelFormat)originalFormatMTL;
-    _showSettings->decodedFormat = (MyMTLPixelFormat)texture.pixelFormat;
+    float4x4 viewMatrix = float4x4(float4m(zoom, zoom, 1.0f, 1.0f));
+    viewMatrix = panTransform * viewMatrix;
 
-    _showSettings->lastFilename = fullFilename;
-    _showSettings->lastTimestamp = timestamp;
+    // scale
+    if (_showSettings->is3DView) {
+        return _projectionMatrix * viewMatrix * _modelMatrix3D;
+    }
+    else {
+        return _projectionMatrix * viewMatrix * _modelMatrix;
+    }
+}
 
-    @autoreleasepool {
-      _colorMap = texture;
-      _normalMap = nil;
+bool almost_equal_elements(float3 v, float tol)
+{
+    return (fabs(v.x - v.y) < tol) && (fabs(v.x - v.z) < tol);
+}
+
+const float3x3 &toFloat3x3(const float4x4 &m) { return (const float3x3 &)m; }
+
+float4 inverseScaleSquared(const float4x4 &m)
+{
+    float3 scaleSquared = float3m(length_squared(m.columns[0].xyz),
+                                  length_squared(m.columns[1].xyz),
+                                  length_squared(m.columns[2].xyz));
+
+    // if uniform, then set scaleSquared all to 1
+    if (almost_equal_elements(scaleSquared, 1e-5)) {
+        scaleSquared = float3m(1.0);
     }
 
-    [self updateImageSettings:fullFilename image:image];
-  }
+    // don't divide by 0
+    float3 invScaleSquared =
+        recip(simd::max(float3m(0.0001 * 0.0001), scaleSquared));
 
-  [self resetSomeImageSettings:isNewFile];
+    // identify determinant here for flipping orientation
+    // all shapes with negative determinant need orientation flipped for
+    // backfacing and need to be grouned together if rendering with instancing
+    float det = determinant(toFloat3x3(m));
 
-  return YES;
+    return float4m(invScaleSquared, det);
 }
 
-// only called on new or modstamp-changed image
-- (void)updateImageSettings:(const string &)fullFilename
-                      image:(KTXImage &)image {
-  // this is the actual format, may have been decoded
-  id<MTLTexture> texture = _colorMap;
-  MyMTLPixelFormat format = (MyMTLPixelFormat)texture.pixelFormat;
-
-  // format may be trancoded to gpu-friendly format
-  MyMTLPixelFormat originalFormat = image.pixelFormat;
-
-  _showSettings->blockX = image.blockDims().x;
-  _showSettings->blockY = image.blockDims().y;
-
-  _showSettings->isSigned = isSignedFormat(format);
-
-  string fullFilenameCopy = fullFilename;
-  string filename = toLower(fullFilenameCopy);
-
-  // set title to filename, chop this to just file+ext, not directory
-  string filenameShort = filename;
-  const char *filenameSlash = strrchr(filenameShort.c_str(), '/');
-  if (filenameSlash != nullptr) {
-    filenameShort = filenameSlash + 1;
-  }
-
-  // now chop off the extension
-  filenameShort = filenameShort.substr(0, filenameShort.find_last_of("."));
-
-  bool isAlbedo = false;
-  bool isNormal = false;
-  bool isSDF = false;
-
-  // could cycle between rrr1 and r001.
-  int32_t numChannels = numChannelsOfFormat(originalFormat);
-  bool isSigned = isSignedFormat(originalFormat);
-
-  // note that decoded textures are 3/4 channel even though they are normal/sdf
-  // originally, so test those first
-  if (numChannels == 2 || endsWith(filenameShort, "-n") ||
-      endsWith(filenameShort, "_normal")) {
-    isNormal = true;
-  } else if ((numChannels == 1 && isSigned) ||
-             endsWith(filenameShort, "-sdf")) {
-    isSDF = true;
-  } else if (numChannels == 3 || numChannels == 4 ||
-             endsWith(filenameShort, "-a") ||
-             endsWith(filenameShort, "_basecolor")) {
-    isAlbedo = true;
-  }
+- (void)_updateGameState
+{
+    /// Update any game state before encoding rendering commands to our drawable
 
-  _showSettings->isNormal = isNormal;
-  _showSettings->isSDF = isSDF;
+    Uniforms &uniforms =
+        *(Uniforms *)_dynamicUniformBuffer[_uniformBufferIndex].contents;
 
-  // textures are already premul, so don't need to premul in shader
-  // should really have 3 modes, unmul, default, premul
-  bool isPNG = isPNGFilename(filename.c_str());
+    uniforms.isNormal = _showSettings->isNormal;
+    uniforms.isPremul = _showSettings->isPremul;
+    uniforms.isSigned = _showSettings->isSigned;
+    uniforms.isSwizzleAGToRG = _showSettings->isSwizzleAGToRG;
 
-  _showSettings->isPremul = false;
-  if (isAlbedo && isPNG) {
-    _showSettings->isPremul =
-        true; // convert to premul in shader, so can see other channels
-  } else if (isNormal || isSDF) {
-    _showSettings->isPremul = false;
-  }
-
-  _showSettings->numChannels = numChannels;
-
-  // TODO: identify if texture holds normal data from the props
-  // have too many 4 channel normals that shouldn't swizzle like this
-  // kramTextures.py is using etc2rg on iOS for now, and not astc.
-
-  _showSettings->isSwizzleAGToRG = false;
-
-  // For best sdf and normal reconstruct from ASTC or BC3, must use RRR1 and
-  // GGGR or RRRG BC1nm multiply r*a in the shader, but just use BC5 anymore.
-  //    if (isASTCFormat(originalFormat) && isNormal) {
-  //        // channels after = "ag01"
-  //        _showSettings->isSwizzleAGToRG = true;
-  //    }
-
-  // can derive these from texture queries
-  _showSettings->mipCount = (int32_t)image.header.numberOfMipmapLevels;
-  _showSettings->faceCount = (image.textureType == MyMTLTextureTypeCube ||
-                              image.textureType == MyMTLTextureTypeCubeArray)
-                                 ? 6
-                                 : 0;
-  _showSettings->arrayCount = (int32_t)image.header.numberOfArrayElements;
-  _showSettings->sliceCount = (int32_t)image.depth;
-
-  _showSettings->imageBoundsX = (int32_t)image.width;
-  _showSettings->imageBoundsY = (int32_t)image.height;
-}
+    uniforms.isSDF = _showSettings->isSDF;
+    uniforms.numChannels = _showSettings->numChannels;
+    uniforms.lightingMode = (ShaderLightingMode)_showSettings->lightingMode;
 
-float zoom3D = 1.0f;
+    MyMTLTextureType textureType = MyMTLTextureType2D;
+    MyMTLPixelFormat textureFormat = MyMTLPixelFormatInvalid;
+    if (_colorMap) {
+        textureType = (MyMTLTextureType)_colorMap.textureType;
+        textureFormat = (MyMTLPixelFormat)_colorMap.pixelFormat;
+    }
 
-- (void)resetSomeImageSettings:(BOOL)isNewFile {
-
-  // only reset these on new texture, but have to revalidate
-  if (isNewFile) {
-    // then can manipulate this after loading
-    _showSettings->mipNumber = 0;
-    _showSettings->faceNumber = 0;
-    _showSettings->arrayNumber = 0;
-    _showSettings->sliceNumber = 0;
-
-    _showSettings->channels = TextureChannels::ModeRGBA;
-
-    // wish could keep existing setting, but new texture might not
-    // be supported debugMode for new texture
-    _showSettings->debugMode = DebugMode::DebugModeNone;
-
-    _showSettings->shapeChannel = ShapeChannel::ShapeChannelNone;
-  } else {
-    // reloaded file may have different limits
-    _showSettings->mipNumber =
-        std::min(_showSettings->mipNumber, _showSettings->mipCount);
-    _showSettings->faceNumber =
-        std::min(_showSettings->faceNumber, _showSettings->faceCount);
-    _showSettings->arrayNumber =
-        std::min(_showSettings->arrayNumber, _showSettings->arrayCount);
-    _showSettings->sliceNumber =
-        std::min(_showSettings->sliceNumber, _showSettings->sliceCount);
-  }
-
-  [self updateViewTransforms];
-
-  // this controls viewMatrix (global to all visible textures)
-  _showSettings->panX = 0.0f;
-  _showSettings->panY = 0.0f;
-
-  _showSettings->zoom = _showSettings->zoomFit;
-
-  // test rendering with inversion and mirroring and non-uniform scale
-  bool doInvertX = false;
-  bool doScaleX = false;
-
-  // have one of these for each texture added to the viewer
-  float scaleX = MAX(1, _showSettings->imageBoundsX);
-  float scaleY = MAX(1, _showSettings->imageBoundsY);
-  float scaleZ = MAX(scaleX, scaleY); // don't want 1.0f, or specular is all off
-                                      // due to extreme scale differences
-
-  float tmpScaleX = scaleX;
-  if (doInvertX) {
-    tmpScaleX = -tmpScaleX;
-  }
-  if (doScaleX) {
-    tmpScaleX *= 2.0f;
-  }
-
-  _modelMatrix =
-      float4x4(float4m(tmpScaleX, scaleY, scaleZ, 1.0f)); // non uniform scale
-  _modelMatrix = _modelMatrix *
-                 matrix4x4_translation(0.0f, 0.0f, -1.0); // set z=-1 unit back
-
-  // uniform scaled 3d primitive
-  float scale = MAX(scaleX, scaleY);
-
-  // store the zoom into thew view matrix
-  // fragment tangents seem to break down at high model scale due to precision
-  // differences between worldPos and uv
-  static bool useZoom3D = false;
-  if (useZoom3D) {
-    zoom3D = scale; // * _showSettings->viewSizeX / 2.0f;
-    scale = 1.0;
-  }
-
-  _modelMatrix3D = float4x4(float4m((doScaleX || doInvertX) ? tmpScaleX : scale,
-                                    scale, scale, 1.0f)); // uniform scale
-  _modelMatrix3D =
-      _modelMatrix3D *
-      matrix4x4_translation(0.0f, 0.0f, -1.0f); // set z=-1 unit back
-}
+    uniforms.isCheckerboardShown = _showSettings->isCheckerboardShown;
 
-- (float4x4)computeImageTransform:(float)panX
-                             panY:(float)panY
-                             zoom:(float)zoom {
-  // translate
-  float4x4 panTransform = matrix4x4_translation(-panX, panY, 0.0);
-
-  // non-uniform scale is okay here, only affects ortho volume
-  // setting this to uniform zoom and object is not visible, zoom can be 20x in
-  // x and y
-  if (_showSettings->is3DView) {
-    zoom *= zoom3D;
-  }
-
-  float4x4 viewMatrix = float4x4(float4m(zoom, zoom, 1.0f, 1.0f));
-  viewMatrix = panTransform * viewMatrix;
-
-  // scale
-  if (_showSettings->is3DView) {
-    return _projectionMatrix * viewMatrix * _modelMatrix3D;
-  } else {
-    return _projectionMatrix * viewMatrix * _modelMatrix;
-  }
-}
+    // addressing mode
+    bool isCube = (textureType == MyMTLTextureTypeCube ||
+                   textureType == MyMTLTextureTypeCubeArray);
+    bool doWrap = !isCube && _showSettings->isWrap;
+    bool doEdge = !doWrap;
+    bool doZero = !doEdge;
+    uniforms.isWrap = doWrap ? _showSettings->isWrap : false;
 
-bool almost_equal_elements(float3 v, float tol) {
-  return (fabs(v.x - v.y) < tol) && (fabs(v.x - v.z) < tol);
-}
+    uniforms.isPreview = _showSettings->isPreview;
 
-const float3x3 &toFloat3x3(const float4x4 &m) { return (const float3x3 &)m; }
+    uniforms.isNormalMapPreview = false;
+    if (uniforms.isPreview) {
+        uniforms.isNormalMapPreview = uniforms.isNormal || (_normalMap != nil);
 
-float4 inverseScaleSquared(const float4x4 &m) {
-  float3 scaleSquared = float3m(length_squared(m.columns[0].xyz),
-                                length_squared(m.columns[1].xyz),
-                                length_squared(m.columns[2].xyz));
+        if (_normalMap != nil) {
+            uniforms.isNormalMapSigned =
+                isSignedFormat((MyMTLPixelFormat)_normalMap.pixelFormat);
+            uniforms.isNormalMapSwizzleAGToRG = false;  // TODO: need a prop for this
+        }
+    }
 
-  // if uniform, then set scaleSquared all to 1
-  if (almost_equal_elements(scaleSquared, 1e-5)) {
-    scaleSquared = float3m(1.0);
-  }
+    // a few things to fix before enabling this
+    uniforms.useTangent = _showSettings->useTangent;
+
+    uniforms.gridX = 0;
+    uniforms.gridY = 0;
+
+    if (_showSettings->isPixelGridShown) {
+        uniforms.gridX = 1;
+        uniforms.gridY = 1;
+    }
+    else if (_showSettings->isBlockGridShown) {
+        if (_showSettings->blockX > 1) {
+            uniforms.gridX = _showSettings->blockX;
+            uniforms.gridY = _showSettings->blockY;
+        }
+    }
+    else if (_showSettings->isAtlasGridShown) {
+        uniforms.gridX = _showSettings->gridSizeX;
+        uniforms.gridY = _showSettings->gridSizeY;
+    }
 
-  // don't divide by 0
-  float3 invScaleSquared =
-      recip(simd::max(float3m(0.0001 * 0.0001), scaleSquared));
+    // no debug mode when preview kicks on, make it possible to toggle back and
+    // forth more easily
+    uniforms.debugMode = (ShaderDebugMode)_showSettings->debugMode;
+    uniforms.shapeChannel = (ShaderShapeChannel)_showSettings->shapeChannel;
+    uniforms.channels = (ShaderTextureChannels)_showSettings->channels;
 
-  // identify determinant here for flipping orientation
-  // all shapes with negative determinant need orientation flipped for
-  // backfacing and need to be grouned together if rendering with instancing
-  float det = determinant(toFloat3x3(m));
+    // turn these off in preview mode, but they may be useful?
+    if (_showSettings->isPreview) {
+        uniforms.debugMode = ShaderDebugMode::ShDebugModeNone;
+        uniforms.shapeChannel = ShaderShapeChannel::ShShapeChannelNone;
+    }
 
-  return float4m(invScaleSquared, det);
-}
+    // crude shape experiment
+    _showSettings->is3DView = true;
+    switch (_showSettings->meshNumber) {
+        case 0:
+            _mesh = _meshBox;
+            _showSettings->is3DView = false;
+            break;
+        case 1:
+            _mesh = _meshBox;
+            break;
+        case 2:
+            _mesh = _meshSphere;
+            break;
+        case 3:
+            _mesh = _meshSphereMirrored;
+            break;
+        // case 3: _mesh = _meshCylinder; break;
+        case 4:
+            _mesh = _meshCapsule;
+            break;
+    }
+    uniforms.is3DView = _showSettings->is3DView;
 
-- (void)_updateGameState {
-  /// Update any game state before encoding rendering commands to our drawable
+    // on small textures can really see missing pixel (3 instead of 4 pixels)
+    // so only do this on the sphere/capsule which wrap-around uv space
+    uniforms.isInsetByHalfPixel = false;
+    if (_showSettings->meshNumber >= 2 && doZero) {
+        uniforms.isInsetByHalfPixel = true;
+    }
+
+    // translate
+    float4x4 panTransform =
+        matrix4x4_translation(-_showSettings->panX, _showSettings->panY, 0.0);
 
-  Uniforms &uniforms =
-      *(Uniforms *)_dynamicUniformBuffer[_uniformBufferIndex].contents;
+    // scale
+    float zoom = _showSettings->zoom;
 
-  uniforms.isNormal = _showSettings->isNormal;
-  uniforms.isPremul = _showSettings->isPremul;
-  uniforms.isSigned = _showSettings->isSigned;
-  uniforms.isSwizzleAGToRG = _showSettings->isSwizzleAGToRG;
+    if (_showSettings->is3DView) {
+        _viewMatrix3D = float4x4(float4m(zoom, zoom, 1.0f, 1.0f));  // non-uniform
+        _viewMatrix3D = panTransform * _viewMatrix3D;
 
-  uniforms.isSDF = _showSettings->isSDF;
-  uniforms.numChannels = _showSettings->numChannels;
-  uniforms.lightingMode = (ShaderLightingMode)_showSettings->lightingMode;
+        // viewMatrix should typically be the inverse
+        //_viewMatrix = simd_inverse(_viewMatrix3D);
 
-  MyMTLTextureType textureType = MyMTLTextureType2D;
-  MyMTLPixelFormat textureFormat = MyMTLPixelFormatInvalid;
-  if (_colorMap) {
-    textureType = (MyMTLTextureType)_colorMap.textureType;
-    textureFormat = (MyMTLPixelFormat)_colorMap.pixelFormat;
-  }
+        float4x4 projectionViewMatrix = _projectionMatrix * _viewMatrix3D;
+        uniforms.projectionViewMatrix = projectionViewMatrix;
 
-  uniforms.isCheckerboardShown = _showSettings->isCheckerboardShown;
+        // works when only one texture, but switch to projectViewMatrix
+        uniforms.modelMatrix = _modelMatrix3D;
 
-  // addressing mode
-  bool isCube = (textureType == MyMTLTextureTypeCube ||
-                 textureType == MyMTLTextureTypeCubeArray);
-  bool doWrap = !isCube && _showSettings->isWrap;
-  bool doEdge = !doWrap;
-  bool doZero = !doEdge;
-  uniforms.isWrap = doWrap ? _showSettings->isWrap : false;
+        uniforms.modelMatrixInvScale2 = inverseScaleSquared(_modelMatrix3D);
 
-  uniforms.isPreview = _showSettings->isPreview;
+        _showSettings->isInverted = uniforms.modelMatrixInvScale2.w < 0.0f;
 
-  uniforms.isNormalMapPreview = false;
-  if (uniforms.isPreview) {
-    uniforms.isNormalMapPreview = uniforms.isNormal || (_normalMap != nil);
+        // this was stored so view could use it, but now that code calcs the
+        // transform via computeImageTransform
+        _showSettings->projectionViewModelMatrix =
+            uniforms.projectionViewMatrix * uniforms.modelMatrix;
 
-    if (_normalMap != nil) {
-      uniforms.isNormalMapSigned =
-          isSignedFormat((MyMTLPixelFormat)_normalMap.pixelFormat);
-      uniforms.isNormalMapSwizzleAGToRG = false; // TODO: need a prop for this
+        // cache the camera position
+        uniforms.cameraPosition =
+            inverse(_viewMatrix3D).columns[3].xyz;  // this is all ortho
     }
-  }
+    else {
+        _viewMatrix = float4x4(float4m(zoom, zoom, 1.0f, 1.0f));
+        _viewMatrix = panTransform * _viewMatrix;
+
+        // viewMatrix should typically be the inverse
+        //_viewMatrix = simd_inverse(_viewMatrix3D);
+
+        float4x4 projectionViewMatrix = _projectionMatrix * _viewMatrix;
+        uniforms.projectionViewMatrix = projectionViewMatrix;
 
-  // a few things to fix before enabling this
-  uniforms.useTangent = _showSettings->useTangent;
+        // works when only one texture, but switch to projectViewMatrix
+        uniforms.modelMatrix = _modelMatrix;
 
-  uniforms.gridX = 0;
-  uniforms.gridY = 0;
+        uniforms.modelMatrixInvScale2 = inverseScaleSquared(_modelMatrix);
 
-  if (_showSettings->isPixelGridShown) {
-    uniforms.gridX = 1;
-    uniforms.gridY = 1;
-  } else if (_showSettings->isBlockGridShown) {
-    if (_showSettings->blockX > 1) {
-      uniforms.gridX = _showSettings->blockX;
-      uniforms.gridY = _showSettings->blockY;
+        _showSettings->isInverted = uniforms.modelMatrixInvScale2.w < 0.0f;
+
+        // this was stored so view could use it, but now that code calcs the
+        // transform via computeImageTransform
+        _showSettings->projectionViewModelMatrix =
+            uniforms.projectionViewMatrix * uniforms.modelMatrix;
+
+        // cache the camera position
+        uniforms.cameraPosition =
+            inverse(_viewMatrix).columns[3].xyz;  // this is all ortho
     }
-  } else if (_showSettings->isAtlasGridShown) {
-    uniforms.gridX = _showSettings->gridSizeX;
-    uniforms.gridY = _showSettings->gridSizeY;
-  }
-
-  // no debug mode when preview kicks on, make it possible to toggle back and
-  // forth more easily
-  uniforms.debugMode = (ShaderDebugMode)_showSettings->debugMode;
-  uniforms.shapeChannel = (ShaderShapeChannel)_showSettings->shapeChannel;
-  uniforms.channels = (ShaderTextureChannels)_showSettings->channels;
-
-  // turn these off in preview mode, but they may be useful?
-  if (_showSettings->isPreview) {
-    uniforms.debugMode = ShaderDebugMode::ShDebugModeNone;
-    uniforms.shapeChannel = ShaderShapeChannel::ShShapeChannelNone;
-  }
-
-  // crude shape experiment
-  _showSettings->is3DView = true;
-  switch (_showSettings->meshNumber) {
-  case 0:
-    _mesh = _meshBox;
-    _showSettings->is3DView = false;
-    break;
-  case 1:
-    _mesh = _meshBox;
-    break;
-  case 2:
-    _mesh = _meshSphere;
-    break;
-  case 3:
-    _mesh = _meshSphereMirrored;
-    break;
-  // case 3: _mesh = _meshCylinder; break;
-  case 4:
-    _mesh = _meshCapsule;
-    break;
-  }
-  uniforms.is3DView = _showSettings->is3DView;
-
-  // on small textures can really see missing pixel (3 instead of 4 pixels)
-  // so only do this on the sphere/capsule which wrap-around uv space
-  uniforms.isInsetByHalfPixel = false;
-  if (_showSettings->meshNumber >= 2 && doZero) {
-    uniforms.isInsetByHalfPixel = true;
-  }
-
-  // translate
-  float4x4 panTransform =
-      matrix4x4_translation(-_showSettings->panX, _showSettings->panY, 0.0);
-
-  // scale
-  float zoom = _showSettings->zoom;
-
-  if (_showSettings->is3DView) {
-    _viewMatrix3D = float4x4(float4m(zoom, zoom, 1.0f, 1.0f)); // non-uniform
-    _viewMatrix3D = panTransform * _viewMatrix3D;
-
-    // viewMatrix should typically be the inverse
-    //_viewMatrix = simd_inverse(_viewMatrix3D);
-
-    float4x4 projectionViewMatrix = _projectionMatrix * _viewMatrix3D;
-    uniforms.projectionViewMatrix = projectionViewMatrix;
-
-    // works when only one texture, but switch to projectViewMatrix
-    uniforms.modelMatrix = _modelMatrix3D;
-
-    uniforms.modelMatrixInvScale2 = inverseScaleSquared(_modelMatrix3D);
-
-    _showSettings->isInverted = uniforms.modelMatrixInvScale2.w < 0.0f;
-
-    // this was stored so view could use it, but now that code calcs the
-    // transform via computeImageTransform
-    _showSettings->projectionViewModelMatrix =
-        uniforms.projectionViewMatrix * uniforms.modelMatrix;
-
-    // cache the camera position
-    uniforms.cameraPosition =
-        inverse(_viewMatrix3D).columns[3].xyz; // this is all ortho
-  } else {
-    _viewMatrix = float4x4(float4m(zoom, zoom, 1.0f, 1.0f));
-    _viewMatrix = panTransform * _viewMatrix;
-
-    // viewMatrix should typically be the inverse
-    //_viewMatrix = simd_inverse(_viewMatrix3D);
-
-    float4x4 projectionViewMatrix = _projectionMatrix * _viewMatrix;
-    uniforms.projectionViewMatrix = projectionViewMatrix;
-
-    // works when only one texture, but switch to projectViewMatrix
-    uniforms.modelMatrix = _modelMatrix;
-
-    uniforms.modelMatrixInvScale2 = inverseScaleSquared(_modelMatrix);
-
-    _showSettings->isInverted = uniforms.modelMatrixInvScale2.w < 0.0f;
-
-    // this was stored so view could use it, but now that code calcs the
-    // transform via computeImageTransform
-    _showSettings->projectionViewModelMatrix =
-        uniforms.projectionViewMatrix * uniforms.modelMatrix;
-
-    // cache the camera position
-    uniforms.cameraPosition =
-        inverse(_viewMatrix).columns[3].xyz; // this is all ortho
-  }
-
-  //_rotation += .01;
+
+    //_rotation += .01;
 }
 
-- (void)_setUniformsLevel:(UniformsLevel &)uniforms mipLOD:(int32_t)mipLOD {
-  uniforms.mipLOD = mipLOD;
-
-  uniforms.arrayOrSlice = 0;
-  uniforms.face = 0;
-
-  uniforms.textureSize = float4m(0.0f);
-  MyMTLTextureType textureType = MyMTLTextureType2D;
-  if (_colorMap) {
-    textureType = (MyMTLTextureType)_colorMap.textureType;
-    uniforms.textureSize =
-        float4m(_colorMap.width, _colorMap.height, 1.0f / _colorMap.width,
-                1.0f / _colorMap.height);
-  }
-
-  // TODO: set texture specific uniforms, but using single _colorMap for now
-  switch (textureType) {
-  case MyMTLTextureType2D:
-    // nothing
-    break;
-  case MyMTLTextureType3D:
-    uniforms.arrayOrSlice = _showSettings->sliceNumber;
-    break;
-  case MyMTLTextureTypeCube:
-    uniforms.face = _showSettings->faceNumber;
-    break;
+- (void)_setUniformsLevel:(UniformsLevel &)uniforms mipLOD:(int32_t)mipLOD
+{
+    uniforms.mipLOD = mipLOD;
 
-  case MyMTLTextureTypeCubeArray:
-    uniforms.face = _showSettings->faceNumber;
-    uniforms.arrayOrSlice = _showSettings->arrayNumber;
-    break;
-  case MyMTLTextureType2DArray:
-    uniforms.arrayOrSlice = _showSettings->arrayNumber;
-    break;
-  case MyMTLTextureType1DArray:
-    uniforms.arrayOrSlice = _showSettings->arrayNumber;
-    break;
+    uniforms.arrayOrSlice = 0;
+    uniforms.face = 0;
 
-  default:
-    break;
-  }
-}
+    uniforms.textureSize = float4m(0.0f);
+    MyMTLTextureType textureType = MyMTLTextureType2D;
+    if (_colorMap) {
+        textureType = (MyMTLTextureType)_colorMap.textureType;
+        uniforms.textureSize =
+            float4m(_colorMap.width, _colorMap.height, 1.0f / _colorMap.width,
+                    1.0f / _colorMap.height);
+    }
 
-- (void)drawInMTKView:(nonnull MTKView *)view {
-  @autoreleasepool {
+    // TODO: set texture specific uniforms, but using single _colorMap for now
+    switch (textureType) {
+        case MyMTLTextureType2D:
+            // nothing
+            break;
+        case MyMTLTextureType3D:
+            uniforms.arrayOrSlice = _showSettings->sliceNumber;
+            break;
+        case MyMTLTextureTypeCube:
+            uniforms.face = _showSettings->faceNumber;
+            break;
+
+        case MyMTLTextureTypeCubeArray:
+            uniforms.face = _showSettings->faceNumber;
+            uniforms.arrayOrSlice = _showSettings->arrayNumber;
+            break;
+        case MyMTLTextureType2DArray:
+            uniforms.arrayOrSlice = _showSettings->arrayNumber;
+            break;
+        case MyMTLTextureType1DArray:
+            uniforms.arrayOrSlice = _showSettings->arrayNumber;
+            break;
+
+        default:
+            break;
+    }
+}
 
-    /// Per frame updates here
+- (void)drawInMTKView:(nonnull MTKView *)view
+{
+    @autoreleasepool {
+        /// Per frame updates here
 
-    // TODO: move this out, needs to get called off mouseMove, but don't want to
-    // call drawMain
-    [self drawSample];
+        // TODO: move this out, needs to get called off mouseMove, but don't want to
+        // call drawMain
+        [self drawSample];
 
-    dispatch_semaphore_wait(_inFlightSemaphore, DISPATCH_TIME_FOREVER);
+        dispatch_semaphore_wait(_inFlightSemaphore, DISPATCH_TIME_FOREVER);
 
-    _uniformBufferIndex = (_uniformBufferIndex + 1) % MaxBuffersInFlight;
+        _uniformBufferIndex = (_uniformBufferIndex + 1) % MaxBuffersInFlight;
 
-    id<MTLCommandBuffer> commandBuffer = [_commandQueue commandBuffer];
-    commandBuffer.label = @"MyCommand";
+        id<MTLCommandBuffer> commandBuffer = [_commandQueue commandBuffer];
+        commandBuffer.label = @"MyCommand";
 
-    __block dispatch_semaphore_t block_sema = _inFlightSemaphore;
-    [commandBuffer addCompletedHandler:^(id<MTLCommandBuffer> /* buffer */) {
-      dispatch_semaphore_signal(block_sema);
-    }];
+        __block dispatch_semaphore_t block_sema = _inFlightSemaphore;
+        [commandBuffer addCompletedHandler:^(id<MTLCommandBuffer> /* buffer */) {
+            dispatch_semaphore_signal(block_sema);
+        }];
 
-    [self _updateGameState];
+        [self _updateGameState];
 
-    // use to autogen mipmaps if needed, might eliminate this since it's always
-    // box filter
-    // TODO: do mips via kram instead, but was useful for pow-2 mip comparisons.
+        // use to autogen mipmaps if needed, might eliminate this since it's always
+        // box filter
+        // TODO: do mips via kram instead, but was useful for pow-2 mip comparisons.
 
-    // also use to readback pixels
-    // also use for async texture upload
-    id<MTLBlitCommandEncoder> blitEncoder = [commandBuffer blitCommandEncoder];
-    if (blitEncoder) {
-      blitEncoder.label = @"MyBlitEncoder";
-      [_loader uploadTexturesIfNeeded:blitEncoder commandBuffer:commandBuffer];
-      [blitEncoder endEncoding];
-    }
+        // also use to readback pixels
+        // also use for async texture upload
+        id<MTLBlitCommandEncoder> blitEncoder = [commandBuffer blitCommandEncoder];
+        if (blitEncoder) {
+            blitEncoder.label = @"MyBlitEncoder";
+            [_loader uploadTexturesIfNeeded:blitEncoder commandBuffer:commandBuffer];
+            [blitEncoder endEncoding];
+        }
 
-    [self drawMain:commandBuffer view:view];
+        [self drawMain:commandBuffer
+                  view:view];
 
-    // hold onto this for sampling from it via eyedropper
-    _lastDrawableTexture = view.currentDrawable.texture;
+        // hold onto this for sampling from it via eyedropper
+        _lastDrawableTexture = view.currentDrawable.texture;
 
-    [commandBuffer presentDrawable:view.currentDrawable];
-    [commandBuffer commit];
-  }
+        [commandBuffer presentDrawable:view.currentDrawable];
+        [commandBuffer commit];
+    }
 }
 
 - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
-            view:(nonnull MTKView *)view {
-  // Delay getting the currentRenderPassDescriptor until absolutely needed. This
-  // avoids
-  //   holding onto the drawable and blocking the display pipeline any longer
-  //   than necessary
-  MTLRenderPassDescriptor *renderPassDescriptor =
-      view.currentRenderPassDescriptor;
-
-  if (renderPassDescriptor == nil) {
-    return;
-  }
-
-  if (_colorMap == nil) {
-    // this will clear target
-    id<MTLRenderCommandEncoder> renderEncoder =
-        [commandBuffer renderCommandEncoderWithDescriptor:renderPassDescriptor];
+            view:(nonnull MTKView *)view
+{
+    // Delay getting the currentRenderPassDescriptor until absolutely needed. This
+    // avoids
+    //   holding onto the drawable and blocking the display pipeline any longer
+    //   than necessary
+    MTLRenderPassDescriptor *renderPassDescriptor =
+        view.currentRenderPassDescriptor;
+
+    if (renderPassDescriptor == nil) {
+        return;
+    }
 
-    if (renderEncoder) {
-      renderEncoder.label = @"MainRender";
-      [renderEncoder endEncoding];
+    if (_colorMap == nil) {
+        // this will clear target
+        id<MTLRenderCommandEncoder> renderEncoder =
+            [commandBuffer renderCommandEncoderWithDescriptor:renderPassDescriptor];
+
+        if (renderEncoder) {
+            renderEncoder.label = @"MainRender";
+            [renderEncoder endEncoding];
+        }
+
+        return;
     }
 
-    return;
-  }
-
-  // Final pass rendering code here
-  id<MTLRenderCommandEncoder> renderEncoder =
-      [commandBuffer renderCommandEncoderWithDescriptor:renderPassDescriptor];
-  if (!renderEncoder) {
-    return;
-  }
-
-  renderEncoder.label = @"MainRender";
-
-  // set raster state
-  [renderEncoder setFrontFacingWinding:_showSettings->isInverted
-                                           ? MTLWindingCounterClockwise
-                                           : MTLWindingCounterClockwise];
-  [renderEncoder setCullMode:MTLCullModeBack];
-  [renderEncoder setDepthStencilState:_depthStateFull];
-
-  [renderEncoder pushDebugGroup:@"DrawShape"];
-
-  // set the mesh shape
-  for (NSUInteger bufferIndex = 0; bufferIndex < _mesh.vertexBuffers.count;
-       bufferIndex++) {
-    MTKMeshBuffer *vertexBuffer = _mesh.vertexBuffers[bufferIndex];
-    if ((NSNull *)vertexBuffer != [NSNull null]) {
-      [renderEncoder setVertexBuffer:vertexBuffer.buffer
-                              offset:vertexBuffer.offset
-                             atIndex:bufferIndex];
+    // Final pass rendering code here
+    id<MTLRenderCommandEncoder> renderEncoder =
+        [commandBuffer renderCommandEncoderWithDescriptor:renderPassDescriptor];
+    if (!renderEncoder) {
+        return;
     }
-  }
-
-  //---------------------------------------
-  // figure out the sampler
-
-  id<MTLSamplerState> sampler;
-
-  MyMTLTextureType textureType = (MyMTLTextureType)_colorMap.textureType;
-
-  bool isCube = (textureType == MyMTLTextureTypeCube ||
-                 textureType == MyMTLTextureTypeCubeArray);
-  bool doWrap = !isCube && _showSettings->isWrap;
-  bool doEdge = !doWrap;
-
-  if (_showSettings->isPreview) {
-    sampler = doWrap ? _colorMapSamplerFilterWrap
-                     : (doEdge ? _colorMapSamplerFilterEdge
-                               : _colorMapSamplerFilterBorder);
-  } else {
-    sampler = doWrap ? _colorMapSamplerNearestWrap
-                     : (doEdge ? _colorMapSamplerNearestEdge
-                               : _colorMapSamplerNearestBorder);
-  }
-
-  //---------------------------------------
-  // for (texture in _textures) // TODO: setup
-  // if (_colorMap)
-  {
-    // TODO: set texture specific uniforms, but using single _colorMap for now
-    switch (_colorMap.textureType) {
-    case MTLTextureType1DArray:
-      [renderEncoder setRenderPipelineState:_pipelineState1DArray];
-      break;
-
-    case MTLTextureType2D:
-      [renderEncoder setRenderPipelineState:_pipelineStateImage];
-      break;
-
-    case MTLTextureType2DArray:
-      [renderEncoder setRenderPipelineState:_pipelineStateImageArray];
-      break;
-
-    case MTLTextureType3D:
-      [renderEncoder setRenderPipelineState:_pipelineStateVolume];
-      break;
-    case MTLTextureTypeCube:
-      [renderEncoder setRenderPipelineState:_pipelineStateCube];
-      break;
-    case MTLTextureTypeCubeArray:
-      [renderEncoder setRenderPipelineState:_pipelineStateCubeArray];
-      break;
-
-    default:
-      break;
+
+    renderEncoder.label = @"MainRender";
+
+    // set raster state
+    [renderEncoder setFrontFacingWinding:_showSettings->isInverted
+                                             ? MTLWindingCounterClockwise
+                                             : MTLWindingCounterClockwise];
+    [renderEncoder setCullMode:MTLCullModeBack];
+    [renderEncoder setDepthStencilState:_depthStateFull];
+
+    [renderEncoder pushDebugGroup:@"DrawShape"];
+
+    // set the mesh shape
+    for (NSUInteger bufferIndex = 0; bufferIndex < _mesh.vertexBuffers.count;
+         bufferIndex++) {
+        MTKMeshBuffer *vertexBuffer = _mesh.vertexBuffers[bufferIndex];
+        if ((NSNull *)vertexBuffer != [NSNull null]) {
+            [renderEncoder setVertexBuffer:vertexBuffer.buffer
+                                    offset:vertexBuffer.offset
+                                   atIndex:bufferIndex];
+        }
     }
 
-    id<MTLBuffer> uniformBuffer = _dynamicUniformBuffer[_uniformBufferIndex];
-    [renderEncoder setVertexBuffer:uniformBuffer
-                            offset:0
-                           atIndex:BufferIndexUniforms];
+    //---------------------------------------
+    // figure out the sampler
 
-    [renderEncoder setFragmentBuffer:uniformBuffer
-                              offset:0
-                             atIndex:BufferIndexUniforms];
+    id<MTLSamplerState> sampler;
 
-    // set the texture up
-    [renderEncoder setFragmentTexture:_colorMap atIndex:TextureIndexColor];
+    MyMTLTextureType textureType = (MyMTLTextureType)_colorMap.textureType;
 
-    // setup normal map
-    if (_normalMap && _showSettings->isPreview) {
-      [renderEncoder setFragmentTexture:_normalMap atIndex:TextureIndexNormal];
+    bool isCube = (textureType == MyMTLTextureTypeCube ||
+                   textureType == MyMTLTextureTypeCubeArray);
+    bool doWrap = !isCube && _showSettings->isWrap;
+    bool doEdge = !doWrap;
+
+    if (_showSettings->isPreview) {
+        sampler = doWrap ? _colorMapSamplerFilterWrap
+                         : (doEdge ? _colorMapSamplerFilterEdge
+                                   : _colorMapSamplerFilterBorder);
+    }
+    else {
+        sampler = doWrap ? _colorMapSamplerNearestWrap
+                         : (doEdge ? _colorMapSamplerNearestEdge
+                                   : _colorMapSamplerNearestBorder);
     }
 
-    UniformsLevel uniformsLevel;
-    uniformsLevel.drawOffset = float2m(0.0f);
+    //---------------------------------------
+    // for (texture in _textures) // TODO: setup
+    // if (_colorMap)
+    {
+        // TODO: set texture specific uniforms, but using single _colorMap for now
+        switch (_colorMap.textureType) {
+            case MTLTextureType1DArray:
+                [renderEncoder setRenderPipelineState:_pipelineState1DArray];
+                break;
+
+            case MTLTextureType2D:
+                [renderEncoder setRenderPipelineState:_pipelineStateImage];
+                break;
+
+            case MTLTextureType2DArray:
+                [renderEncoder setRenderPipelineState:_pipelineStateImageArray];
+                break;
+
+            case MTLTextureType3D:
+                [renderEncoder setRenderPipelineState:_pipelineStateVolume];
+                break;
+            case MTLTextureTypeCube:
+                [renderEncoder setRenderPipelineState:_pipelineStateCube];
+                break;
+            case MTLTextureTypeCubeArray:
+                [renderEncoder setRenderPipelineState:_pipelineStateCubeArray];
+                break;
+
+            default:
+                break;
+        }
 
-    if (_showSettings->isPreview) {
-      // upload this on each face drawn, since want to be able to draw all
-      // mips/levels at once
-      [self _setUniformsLevel:uniformsLevel mipLOD:_showSettings->mipNumber];
-
-      [renderEncoder setVertexBytes:&uniformsLevel
-                             length:sizeof(uniformsLevel)
-                            atIndex:BufferIndexUniformsLevel];
-
-      [renderEncoder setFragmentBytes:&uniformsLevel
-                               length:sizeof(uniformsLevel)
-                              atIndex:BufferIndexUniformsLevel];
-
-      // use exisiting lod, and mip
-      [renderEncoder setFragmentSamplerState:sampler atIndex:SamplerIndexColor];
-
-      for (MTKSubmesh *submesh in _mesh.submeshes) {
-        [renderEncoder drawIndexedPrimitives:submesh.primitiveType
-                                  indexCount:submesh.indexCount
-                                   indexType:submesh.indexType
-                                 indexBuffer:submesh.indexBuffer.buffer
-                           indexBufferOffset:submesh.indexBuffer.offset];
-      }
-
-    } else if (_showSettings->isShowingAllLevelsAndMips) {
-      int32_t w = _colorMap.width;
-      int32_t h = _colorMap.height;
-      // int32_t d = _colorMap.depth;
-
-      // gap the contact sheet, note this 2 pixels is scaled on small textures
-      // by the zoom
-      int32_t gap =
-          _showSettings
-              ->showAllPixelGap; // * _showSettings->viewContentScaleFactor;
-
-      for (int32_t mip = 0; mip < _showSettings->mipCount; ++mip) {
-
-        // upload this on each face drawn, since want to be able to draw all
-        // mips/levels at once
-        [self _setUniformsLevel:uniformsLevel mipLOD:mip];
-
-        if (mip == 0) {
-          uniformsLevel.drawOffset.y = 0.0f;
-        } else {
-          // all mips draw at top mip size currently
-          uniformsLevel.drawOffset.y -= h + gap;
+        id<MTLBuffer> uniformBuffer = _dynamicUniformBuffer[_uniformBufferIndex];
+        [renderEncoder setVertexBuffer:uniformBuffer
+                                offset:0
+                               atIndex:BufferIndexUniforms];
+
+        [renderEncoder setFragmentBuffer:uniformBuffer
+                                  offset:0
+                                 atIndex:BufferIndexUniforms];
+
+        // set the texture up
+        [renderEncoder setFragmentTexture:_colorMap atIndex:TextureIndexColor];
+
+        // setup normal map
+        if (_normalMap && _showSettings->isPreview) {
+            [renderEncoder setFragmentTexture:_normalMap atIndex:TextureIndexNormal];
         }
 
-        // this its ktxImage.totalChunks()
-        int32_t numLevels = _showSettings->totalChunks();
+        UniformsLevel uniformsLevel;
+        uniformsLevel.drawOffset = float2m(0.0f);
+
+        if (_showSettings->isPreview) {
+            // upload this on each face drawn, since want to be able to draw all
+            // mips/levels at once
+            [self _setUniformsLevel:uniformsLevel mipLOD:_showSettings->mipNumber];
+
+            [renderEncoder setVertexBytes:&uniformsLevel
+                                   length:sizeof(uniformsLevel)
+                                  atIndex:BufferIndexUniformsLevel];
 
-        for (int32_t level = 0; level < numLevels; ++level) {
+            [renderEncoder setFragmentBytes:&uniformsLevel
+                                     length:sizeof(uniformsLevel)
+                                    atIndex:BufferIndexUniformsLevel];
 
-          if (isCube) {
-            uniformsLevel.face = level % 6;
-            uniformsLevel.arrayOrSlice = level / 6;
-          } else {
-            uniformsLevel.arrayOrSlice = level;
-          }
+            // use exisiting lod, and mip
+            [renderEncoder setFragmentSamplerState:sampler atIndex:SamplerIndexColor];
 
-          // advance x across faces/slices/array elements, 1d array and 2d thin
-          // array are weird though.
-          if (level == 0) {
-            uniformsLevel.drawOffset.x = 0.0f;
-          } else {
-            uniformsLevel.drawOffset.x += w + gap;
-          }
+            for (MTKSubmesh *submesh in _mesh.submeshes) {
+                [renderEncoder drawIndexedPrimitives:submesh.primitiveType
+                                          indexCount:submesh.indexCount
+                                           indexType:submesh.indexType
+                                         indexBuffer:submesh.indexBuffer.buffer
+                                   indexBufferOffset:submesh.indexBuffer.offset];
+            }
+        }
+        else if (_showSettings->isShowingAllLevelsAndMips) {
+            int32_t w = _colorMap.width;
+            int32_t h = _colorMap.height;
+            // int32_t d = _colorMap.depth;
+
+            // gap the contact sheet, note this 2 pixels is scaled on small textures
+            // by the zoom
+            int32_t gap =
+                _showSettings
+                    ->showAllPixelGap;  // * _showSettings->viewContentScaleFactor;
+
+            for (int32_t mip = 0; mip < _showSettings->mipCount; ++mip) {
+                // upload this on each face drawn, since want to be able to draw all
+                // mips/levels at once
+                [self _setUniformsLevel:uniformsLevel mipLOD:mip];
+
+                if (mip == 0) {
+                    uniformsLevel.drawOffset.y = 0.0f;
+                }
+                else {
+                    // all mips draw at top mip size currently
+                    uniformsLevel.drawOffset.y -= h + gap;
+                }
+
+                // this its ktxImage.totalChunks()
+                int32_t numLevels = _showSettings->totalChunks();
+
+                for (int32_t level = 0; level < numLevels; ++level) {
+                    if (isCube) {
+                        uniformsLevel.face = level % 6;
+                        uniformsLevel.arrayOrSlice = level / 6;
+                    }
+                    else {
+                        uniformsLevel.arrayOrSlice = level;
+                    }
+
+                    // advance x across faces/slices/array elements, 1d array and 2d thin
+                    // array are weird though.
+                    if (level == 0) {
+                        uniformsLevel.drawOffset.x = 0.0f;
+                    }
+                    else {
+                        uniformsLevel.drawOffset.x += w + gap;
+                    }
+
+                    [renderEncoder setVertexBytes:&uniformsLevel
+                                           length:sizeof(uniformsLevel)
+                                          atIndex:BufferIndexUniformsLevel];
+
+                    [renderEncoder setFragmentBytes:&uniformsLevel
+                                             length:sizeof(uniformsLevel)
+                                            atIndex:BufferIndexUniformsLevel];
+
+                    // force lod, and don't mip
+                    [renderEncoder setFragmentSamplerState:sampler
+                                               lodMinClamp:mip
+                                               lodMaxClamp:mip + 1
+                                                   atIndex:SamplerIndexColor];
+
+                    // TODO: since this isn't a preview, have mode to display all faces
+                    // and mips on on screen faces and arrays and slices go across in a
+                    // row, and mips are displayed down from each of those in a column
+
+                    for (MTKSubmesh *submesh in _mesh.submeshes) {
+                        [renderEncoder drawIndexedPrimitives:submesh.primitiveType
+                                                  indexCount:submesh.indexCount
+                                                   indexType:submesh.indexType
+                                                 indexBuffer:submesh.indexBuffer.buffer
+                                           indexBufferOffset:submesh.indexBuffer.offset];
+                    }
+                }
+            }
+        }
+        else {
+            int32_t mip = _showSettings->mipNumber;
 
-          [renderEncoder setVertexBytes:&uniformsLevel
-                                 length:sizeof(uniformsLevel)
-                                atIndex:BufferIndexUniformsLevel];
+            // upload this on each face drawn, since want to be able to draw all
+            // mips/levels at once
+            [self _setUniformsLevel:uniformsLevel mipLOD:mip];
 
-          [renderEncoder setFragmentBytes:&uniformsLevel
+            [renderEncoder setVertexBytes:&uniformsLevel
                                    length:sizeof(uniformsLevel)
                                   atIndex:BufferIndexUniformsLevel];
 
-          // force lod, and don't mip
-          [renderEncoder setFragmentSamplerState:sampler
-                                     lodMinClamp:mip
-                                     lodMaxClamp:mip + 1
-                                         atIndex:SamplerIndexColor];
-
-          // TODO: since this isn't a preview, have mode to display all faces
-          // and mips on on screen faces and arrays and slices go across in a
-          // row, and mips are displayed down from each of those in a column
-
-          for (MTKSubmesh *submesh in _mesh.submeshes) {
-            [renderEncoder drawIndexedPrimitives:submesh.primitiveType
-                                      indexCount:submesh.indexCount
-                                       indexType:submesh.indexType
-                                     indexBuffer:submesh.indexBuffer.buffer
-                               indexBufferOffset:submesh.indexBuffer.offset];
-          }
+            [renderEncoder setFragmentBytes:&uniformsLevel
+                                     length:sizeof(uniformsLevel)
+                                    atIndex:BufferIndexUniformsLevel];
+
+            // force lod, and don't mip
+            [renderEncoder setFragmentSamplerState:sampler
+                                       lodMinClamp:mip
+                                       lodMaxClamp:mip + 1
+                                           atIndex:SamplerIndexColor];
+
+            // TODO: since this isn't a preview, have mode to display all faces and
+            // mips on on screen faces and arrays and slices go across in a row, and
+            // mips are displayed down from each of those in a column
+
+            for (MTKSubmesh *submesh in _mesh.submeshes) {
+                [renderEncoder drawIndexedPrimitives:submesh.primitiveType
+                                          indexCount:submesh.indexCount
+                                           indexType:submesh.indexType
+                                         indexBuffer:submesh.indexBuffer.buffer
+                                   indexBufferOffset:submesh.indexBuffer.offset];
+            }
         }
-      }
-    } else {
-      int32_t mip = _showSettings->mipNumber;
-
-      // upload this on each face drawn, since want to be able to draw all
-      // mips/levels at once
-      [self _setUniformsLevel:uniformsLevel mipLOD:mip];
-
-      [renderEncoder setVertexBytes:&uniformsLevel
-                             length:sizeof(uniformsLevel)
-                            atIndex:BufferIndexUniformsLevel];
-
-      [renderEncoder setFragmentBytes:&uniformsLevel
-                               length:sizeof(uniformsLevel)
-                              atIndex:BufferIndexUniformsLevel];
-
-      // force lod, and don't mip
-      [renderEncoder setFragmentSamplerState:sampler
-                                 lodMinClamp:mip
-                                 lodMaxClamp:mip + 1
-                                     atIndex:SamplerIndexColor];
-
-      // TODO: since this isn't a preview, have mode to display all faces and
-      // mips on on screen faces and arrays and slices go across in a row, and
-      // mips are displayed down from each of those in a column
-
-      for (MTKSubmesh *submesh in _mesh.submeshes) {
-        [renderEncoder drawIndexedPrimitives:submesh.primitiveType
-                                  indexCount:submesh.indexCount
-                                   indexType:submesh.indexType
-                                 indexBuffer:submesh.indexBuffer.buffer
-                           indexBufferOffset:submesh.indexBuffer.offset];
-      }
     }
-  }
 
-  [renderEncoder popDebugGroup];
+    [renderEncoder popDebugGroup];
 
-  [renderEncoder endEncoding];
+    [renderEncoder endEncoding];
 
-  // TODO: run any post-processing on each texture visible as fsw
-  // TODO: environment map preview should be done as fsq
+    // TODO: run any post-processing on each texture visible as fsw
+    // TODO: environment map preview should be done as fsq
 }
 
 // want to run samples independent of redrawing the main view
-- (void)drawSample {
-  if (_colorMap == nil) {
-    return;
-  }
-
-  // this can occur during a resize
-  if (!_lastDrawableTexture)
-    return;
-
-  id<MTLCommandBuffer> commandBuffer = [_commandQueue commandBuffer];
-  if (!commandBuffer)
-    return;
-
-  commandBuffer.label = @"MyCommand";
-
-  // this reads directly from compressed texture via a compute shader
-  int32_t textureLookupX = _showSettings->textureLookupX;
-  int32_t textureLookupY = _showSettings->textureLookupY;
-
-  bool isDrawableBlit = _showSettings->isEyedropperFromDrawable();
-
-  // TODO: only don't blit for plane + no debug or shape
-  // otherwise want the pixel under the cursor, but this may include grid mixed
-  // in and other debug overlays
-  if (isDrawableBlit) {
-    MTLOrigin srcOrigin =
-        MTLOriginMake(_showSettings->cursorX, _showSettings->cursorY, 0);
-    srcOrigin.x *= _showSettings->viewContentScaleFactor;
-    srcOrigin.y *= _showSettings->viewContentScaleFactor;
-
-    if ((srcOrigin.x >= 0 && srcOrigin.x < _lastDrawableTexture.width) &&
-        (srcOrigin.y >= 0 && srcOrigin.y < _lastDrawableTexture.height)) {
-
-      // Note: here we don't know the uv in original texture, would have to
-      // write that out to another texture.  Also on shapes, texel may not
-      // change but lighting might.
-
-      // can simply blit the color out of the render buffer
-      id<MTLBlitCommandEncoder> blitCommandEncoder =
-          [commandBuffer blitCommandEncoder];
-      if (blitCommandEncoder) {
-        [blitCommandEncoder copyFromTexture:_lastDrawableTexture
-                                sourceSlice:0
-                                sourceLevel:0
-                               sourceOrigin:srcOrigin
-                                 sourceSize:MTLSizeMake(1, 1, 1)
-                                  toTexture:_sampleRenderTex
-                           destinationSlice:0
-                           destinationLevel:0
-                          destinationOrigin:MTLOriginMake(0, 0, 0)];
-        [blitCommandEncoder synchronizeResource:_sampleRenderTex];
-        [blitCommandEncoder endEncoding];
-      }
+- (void)drawSample
+{
+    if (_colorMap == nil) {
+        return;
     }
-  } else {
 
-    int32_t textureLookupMipX = _showSettings->textureLookupMipX;
-    int32_t textureLookupMipY = _showSettings->textureLookupMipY;
+    // this can occur during a resize
+    if (!_lastDrawableTexture)
+        return;
 
-    [self drawSamples:commandBuffer
-              lookupX:textureLookupMipX
-              lookupY:textureLookupMipY];
-
-    // Synchronize the managed texture.
-    id<MTLBlitCommandEncoder> blitCommandEncoder =
-        [commandBuffer blitCommandEncoder];
-    if (blitCommandEncoder) {
-      [blitCommandEncoder synchronizeResource:_sampleComputeTex];
-      [blitCommandEncoder endEncoding];
-    }
-  }
+    id<MTLCommandBuffer> commandBuffer = [_commandQueue commandBuffer];
+    if (!commandBuffer)
+        return;
 
-  // After synchonization, copy value back to the cpu
-  id<MTLTexture> texture =
-      isDrawableBlit ? _sampleRenderTex : _sampleComputeTex;
+    commandBuffer.label = @"MyCommand";
 
-  [commandBuffer addCompletedHandler:^(id<MTLCommandBuffer> buffer) {
-    if (buffer.error != nil) {
-      return;
-    }
-    // only 1 pixel in the texture right now
-    float4 data;
+    // this reads directly from compressed texture via a compute shader
+    int32_t textureLookupX = _showSettings->textureLookupX;
+    int32_t textureLookupY = _showSettings->textureLookupY;
 
-    // copy from texture back to CPU, might be easier using MTLBuffer.contents
-    MTLRegion region = {
-        {0, 0, 0}, // MTLOrigin
-        {1, 1, 1}  // MTLSize
-    };
+    bool isDrawableBlit = _showSettings->isEyedropperFromDrawable();
 
+    // TODO: only don't blit for plane + no debug or shape
+    // otherwise want the pixel under the cursor, but this may include grid mixed
+    // in and other debug overlays
     if (isDrawableBlit) {
-      half4 data16f;
-      [texture getBytes:&data16f bytesPerRow:8 fromRegion:region mipmapLevel:0];
-      data = toFloat4(data16f);
-    } else {
-      [texture getBytes:&data bytesPerRow:16 fromRegion:region mipmapLevel:0];
+        MTLOrigin srcOrigin =
+            MTLOriginMake(_showSettings->cursorX, _showSettings->cursorY, 0);
+        srcOrigin.x *= _showSettings->viewContentScaleFactor;
+        srcOrigin.y *= _showSettings->viewContentScaleFactor;
+
+        if ((srcOrigin.x >= 0 && srcOrigin.x < _lastDrawableTexture.width) &&
+            (srcOrigin.y >= 0 && srcOrigin.y < _lastDrawableTexture.height)) {
+            // Note: here we don't know the uv in original texture, would have to
+            // write that out to another texture.  Also on shapes, texel may not
+            // change but lighting might.
+
+            // can simply blit the color out of the render buffer
+            id<MTLBlitCommandEncoder> blitCommandEncoder =
+                [commandBuffer blitCommandEncoder];
+            if (blitCommandEncoder) {
+                [blitCommandEncoder copyFromTexture:_lastDrawableTexture
+                                        sourceSlice:0
+                                        sourceLevel:0
+                                       sourceOrigin:srcOrigin
+                                         sourceSize:MTLSizeMake(1, 1, 1)
+                                          toTexture:_sampleRenderTex
+                                   destinationSlice:0
+                                   destinationLevel:0
+                                  destinationOrigin:MTLOriginMake(0, 0, 0)];
+                [blitCommandEncoder synchronizeResource:_sampleRenderTex];
+                [blitCommandEncoder endEncoding];
+            }
+        }
     }
+    else {
+        int32_t textureLookupMipX = _showSettings->textureLookupMipX;
+        int32_t textureLookupMipY = _showSettings->textureLookupMipY;
+
+        [self drawSamples:commandBuffer
+                  lookupX:textureLookupMipX
+                  lookupY:textureLookupMipY];
+
+        // Synchronize the managed texture.
+        id<MTLBlitCommandEncoder> blitCommandEncoder =
+            [commandBuffer blitCommandEncoder];
+        if (blitCommandEncoder) {
+            [blitCommandEncoder synchronizeResource:_sampleComputeTex];
+            [blitCommandEncoder endEncoding];
+        }
+    }
+
+    // After synchonization, copy value back to the cpu
+    id<MTLTexture> texture =
+        isDrawableBlit ? _sampleRenderTex : _sampleComputeTex;
+
+    [commandBuffer addCompletedHandler:^(id<MTLCommandBuffer> buffer) {
+        if (buffer.error != nil) {
+            return;
+        }
+        // only 1 pixel in the texture right now
+        float4 data;
+
+        // copy from texture back to CPU, might be easier using MTLBuffer.contents
+        MTLRegion region = {
+            {0, 0, 0},  // MTLOrigin
+            {1, 1, 1}   // MTLSize
+        };
+
+        if (isDrawableBlit) {
+            half4 data16f;
+            [texture getBytes:&data16f bytesPerRow:8 fromRegion:region mipmapLevel:0];
+            data = toFloat4(data16f);
+        }
+        else {
+            [texture getBytes:&data bytesPerRow:16 fromRegion:region mipmapLevel:0];
+        }
 
-    // return the value at the sample
-    self->_showSettings->textureResult = data;
-    self->_showSettings->textureResultX = textureLookupX;
-    self->_showSettings->textureResultY = textureLookupY;
+        // return the value at the sample
+        self->_showSettings->textureResult = data;
+        self->_showSettings->textureResultX = textureLookupX;
+        self->_showSettings->textureResultY = textureLookupY;
 
-    // printf("Color %f %f %f %f\n", data.x, data.y, data.z, data.w);
-  }];
+        // printf("Color %f %f %f %f\n", data.x, data.y, data.z, data.w);
+    }];
 
-  [commandBuffer commit];
+    [commandBuffer commit];
 }
 
 - (void)drawSamples:(id<MTLCommandBuffer>)commandBuffer
             lookupX:(int32_t)lookupX
-            lookupY:(int32_t)lookupY {
-
-  // Final pass rendering code here
-  id<MTLComputeCommandEncoder> renderEncoder =
-      [commandBuffer computeCommandEncoder];
-  if (!renderEncoder)
-    return;
-
-  renderEncoder.label = @"SampleCompute";
-
-  [renderEncoder pushDebugGroup:@"DrawShape"];
-
-  UniformsCS uniforms;
-  uniforms.uv.x = lookupX;
-  uniforms.uv.y = lookupY;
-
-  uniforms.face = _showSettings->faceNumber;
-  uniforms.arrayOrSlice = _showSettings->arrayNumber;
-  if (_showSettings->sliceNumber) {
-    uniforms.arrayOrSlice = _showSettings->sliceNumber;
-  }
-  uniforms.mipLOD = _showSettings->mipNumber;
-
-  // run compute here, don't need a shape
-  switch (_colorMap.textureType) {
-  case MTLTextureType1DArray:
-    [renderEncoder setComputePipelineState:_pipelineState1DArrayCS];
-    break;
-
-  case MTLTextureType2D:
-    [renderEncoder setComputePipelineState:_pipelineStateImageCS];
-    break;
-
-  case MTLTextureType2DArray:
-    [renderEncoder setComputePipelineState:_pipelineStateImageArrayCS];
-    break;
-
-  case MTLTextureType3D:
-    [renderEncoder setComputePipelineState:_pipelineStateVolumeCS];
-    break;
-  case MTLTextureTypeCube:
-    [renderEncoder setComputePipelineState:_pipelineStateCubeCS];
-    break;
-  case MTLTextureTypeCubeArray:
-    [renderEncoder setComputePipelineState:_pipelineStateCubeArrayCS];
-    break;
-
-  default:
-    break;
-  }
-
-  // input and output texture
-  [renderEncoder setTexture:_colorMap atIndex:TextureIndexColor];
-
-  [renderEncoder setTexture:_sampleComputeTex atIndex:TextureIndexSamples];
-
-  [renderEncoder setBytes:&uniforms
-                   length:sizeof(UniformsCS)
-                  atIndex:BufferIndexUniformsCS];
-
-  // sample and copy back pixels off the offset
-  [renderEncoder dispatchThreads:MTLSizeMake(1, 1, 1)
-           threadsPerThreadgroup:MTLSizeMake(1, 1, 1)];
-
-  [renderEncoder popDebugGroup];
-  [renderEncoder endEncoding];
+            lookupY:(int32_t)lookupY
+{
+    // Final pass rendering code here
+    id<MTLComputeCommandEncoder> renderEncoder =
+        [commandBuffer computeCommandEncoder];
+    if (!renderEncoder)
+        return;
+
+    renderEncoder.label = @"SampleCompute";
+
+    [renderEncoder pushDebugGroup:@"DrawShape"];
+
+    UniformsCS uniforms;
+    uniforms.uv.x = lookupX;
+    uniforms.uv.y = lookupY;
+
+    uniforms.face = _showSettings->faceNumber;
+    uniforms.arrayOrSlice = _showSettings->arrayNumber;
+    if (_showSettings->sliceNumber) {
+        uniforms.arrayOrSlice = _showSettings->sliceNumber;
+    }
+    uniforms.mipLOD = _showSettings->mipNumber;
+
+    // run compute here, don't need a shape
+    switch (_colorMap.textureType) {
+        case MTLTextureType1DArray:
+            [renderEncoder setComputePipelineState:_pipelineState1DArrayCS];
+            break;
+
+        case MTLTextureType2D:
+            [renderEncoder setComputePipelineState:_pipelineStateImageCS];
+            break;
+
+        case MTLTextureType2DArray:
+            [renderEncoder setComputePipelineState:_pipelineStateImageArrayCS];
+            break;
+
+        case MTLTextureType3D:
+            [renderEncoder setComputePipelineState:_pipelineStateVolumeCS];
+            break;
+        case MTLTextureTypeCube:
+            [renderEncoder setComputePipelineState:_pipelineStateCubeCS];
+            break;
+        case MTLTextureTypeCubeArray:
+            [renderEncoder setComputePipelineState:_pipelineStateCubeArrayCS];
+            break;
+
+        default:
+            break;
+    }
+
+    // input and output texture
+    [renderEncoder setTexture:_colorMap
+                      atIndex:TextureIndexColor];
+
+    [renderEncoder setTexture:_sampleComputeTex atIndex:TextureIndexSamples];
+
+    [renderEncoder setBytes:&uniforms
+                     length:sizeof(UniformsCS)
+                    atIndex:BufferIndexUniformsCS];
+
+    // sample and copy back pixels off the offset
+    [renderEncoder dispatchThreads:MTLSizeMake(1, 1, 1)
+             threadsPerThreadgroup:MTLSizeMake(1, 1, 1)];
+
+    [renderEncoder popDebugGroup];
+    [renderEncoder endEncoding];
 }
 
-- (void)mtkView:(nonnull MTKView *)view drawableSizeWillChange:(CGSize)size {
-  // Don't crashing trying to readback from the cached drawable during a resize.
-  _lastDrawableTexture = nil;
+- (void)mtkView:(nonnull MTKView *)view drawableSizeWillChange:(CGSize)size
+{
+    // Don't crashing trying to readback from the cached drawable during a resize.
+    _lastDrawableTexture = nil;
 
-  /// Respond to drawable size or orientation changes here
-  _showSettings->viewSizeX = size.width;
-  _showSettings->viewSizeY = size.height;
+    /// Respond to drawable size or orientation changes here
+    _showSettings->viewSizeX = size.width;
+    _showSettings->viewSizeY = size.height;
 
-  // TODO: only set this when size changes, but for now keep setting here and
-  // adjust zoom
-  CGFloat framebufferScale = view.window.screen.backingScaleFactor
-                                 ? view.window.screen.backingScaleFactor
-                                 : NSScreen.mainScreen.backingScaleFactor;
+    // TODO: only set this when size changes, but for now keep setting here and
+    // adjust zoom
+    CGFloat framebufferScale = view.window.screen.backingScaleFactor
+                                   ? view.window.screen.backingScaleFactor
+                                   : NSScreen.mainScreen.backingScaleFactor;
 
-  _showSettings->viewContentScaleFactor = framebufferScale;
+    _showSettings->viewContentScaleFactor = framebufferScale;
 
-  [self updateViewTransforms];
+    [self updateViewTransforms];
 }
 
-- (void)updateViewTransforms {
-
-  // float aspect = size.width / (float)size.height;
-  //_projectionMatrix = perspective_rhs(45.0f * (M_PI / 180.0f), aspect, 0.1f,
-  //100.0f);
-  _projectionMatrix =
-      orthographic_rhs(_showSettings->viewSizeX, _showSettings->viewSizeY, 0.1f,
-                       100000.0f, _showSettings->isReverseZ);
-
-  // DONE: adjust zoom to fit the entire image to the window
-  _showSettings->zoomFit =
-      MIN((float)_showSettings->viewSizeX, (float)_showSettings->viewSizeY) /
-      MAX(1, MAX((float)_showSettings->imageBoundsX,
-                 (float)_showSettings->imageBoundsY));
-
-  // already using drawableSize which includes scale
-  // TODO: remove contentScaleFactor of view, this can be 1.0 to 2.0f
-  // why does this always report 2x even when I change monitor res.
-  //_showSettings->zoomFit /= _showSettings->viewContentScaleFactor;
+- (void)updateViewTransforms
+{
+    // float aspect = size.width / (float)size.height;
+    //_projectionMatrix = perspective_rhs(45.0f * (M_PI / 180.0f), aspect, 0.1f,
+    //100.0f);
+    _projectionMatrix =
+        orthographic_rhs(_showSettings->viewSizeX, _showSettings->viewSizeY, 0.1f,
+                         100000.0f, _showSettings->isReverseZ);
+
+    // DONE: adjust zoom to fit the entire image to the window
+    _showSettings->zoomFit =
+        MIN((float)_showSettings->viewSizeX, (float)_showSettings->viewSizeY) /
+        MAX(1, MAX((float)_showSettings->imageBoundsX,
+                   (float)_showSettings->imageBoundsY));
+
+    // already using drawableSize which includes scale
+    // TODO: remove contentScaleFactor of view, this can be 1.0 to 2.0f
+    // why does this always report 2x even when I change monitor res.
+    //_showSettings->zoomFit /= _showSettings->viewContentScaleFactor;
 }
 
 @end
diff --git a/kramv/KramShaders.h b/kramv/KramShaders.h
index 261c814f..469e7e69 100644
--- a/kramv/KramShaders.h
+++ b/kramv/KramShaders.h
@@ -13,166 +13,166 @@
 #import <simd/simd.h>
 
 #ifdef __METAL_VERSION__
-#define NS_ENUM(_type, _name)                                                  \
-  enum _name : _type _name;                                                    \
-  enum _name : _type
+#define NS_ENUM(_type, _name) \
+    enum _name : _type _name; \
+    enum _name : _type
 #endif
 
 typedef NS_ENUM(int32_t, BufferIndex) {
-  // mesh
-  BufferIndexMeshPosition = 0, // pos
-  BufferIndexMeshUV0 = 1,      // uv
-  BufferIndexMeshNormal = 2,   // normals
-  BufferIndexMeshTangent = 3,  // normals
+    // mesh
+    BufferIndexMeshPosition = 0,  // pos
+    BufferIndexMeshUV0 = 1,       // uv
+    BufferIndexMeshNormal = 2,    // normals
+    BufferIndexMeshTangent = 3,   // normals
 
-  BufferIndexUniforms = 16,
-  BufferIndexUniformsLevel = 17,
+    BufferIndexUniforms = 16,
+    BufferIndexUniformsLevel = 17,
 
-  // for compute
-  BufferIndexUniformsCS = 16,
+    // for compute
+    BufferIndexUniformsCS = 16,
 };
 
 typedef NS_ENUM(int32_t, VertexAttribute) {
-  VertexAttributePosition = 0,
-  VertexAttributeTexcoord = 1,
-  VertexAttributeNormal = 2,
-  VertexAttributeTangent = 3,
+    VertexAttributePosition = 0,
+    VertexAttributeTexcoord = 1,
+    VertexAttributeNormal = 2,
+    VertexAttributeTangent = 3,
 };
 
 typedef NS_ENUM(int32_t, TextureIndex) {
-  TextureIndexColor = 0,
-  TextureIndexNormal = 1,
+    TextureIndexColor = 0,
+    TextureIndexNormal = 1,
 
-  TextureIndexSamples = 2, // used for compute
+    TextureIndexSamples = 2,  // used for compute
 };
 
 typedef NS_ENUM(int32_t, SamplerIndex) {
-  SamplerIndexColor = 0,
+    SamplerIndexColor = 0,
 };
 
 // keep in sync with enum TextureChannels
 typedef NS_ENUM(int32_t, ShaderTextureChannels) {
-  ShModeRGBA = 0,
+    ShModeRGBA = 0,
 
-  ShModeR001 = 1,
-  ShMode0G01 = 2,
-  ShMode00B1 = 3,
+    ShModeR001 = 1,
+    ShMode0G01 = 2,
+    ShMode00B1 = 3,
 
-  // see grayscale channels
-  //    ShModeRRR1 = 5,
-  //    ShModeGGG1 = 6,
-  //    ShModeBBB1 = 7,
-  ShModeAAA1 = 8,
+    // see grayscale channels
+    //    ShModeRRR1 = 5,
+    //    ShModeGGG1 = 6,
+    //    ShModeBBB1 = 7,
+    ShModeAAA1 = 8,
 };
 
 // keep in sync with enum DebugMode
 typedef NS_ENUM(int32_t, ShaderDebugMode) {
-  ShDebugModeNone = 0,
-  ShDebugModeTransparent, // alpha < 255
-  ShDebugModeNonZero,     // any(rgba) > 0
-  ShDebugModeColor,
-  ShDebugModeGray,
-  ShDebugModeHDR,
-
-  ShDebugModePosX,
-  ShDebugModePosY,
-  ShDebugModeCircleXY,
-
-  ShDebugModeCount
+    ShDebugModeNone = 0,
+    ShDebugModeTransparent,  // alpha < 255
+    ShDebugModeNonZero,      // any(rgba) > 0
+    ShDebugModeColor,
+    ShDebugModeGray,
+    ShDebugModeHDR,
+
+    ShDebugModePosX,
+    ShDebugModePosY,
+    ShDebugModeCircleXY,
+
+    ShDebugModeCount
 };
 
 // keep in sync with enum ShapeChannel
 typedef NS_ENUM(int32_t, ShaderShapeChannel) {
-  ShShapeChannelNone = 0,
+    ShShapeChannelNone = 0,
 
-  ShShapeChannelDepth,
+    ShShapeChannelDepth,
 
-  ShShapeChannelUV0,
+    ShShapeChannelUV0,
 
-  ShShapeChannelFaceNormal,
+    ShShapeChannelFaceNormal,
 
-  ShShapeChannelNormal,
-  ShShapeChannelTangent,
-  ShShapeChannelBitangent,
+    ShShapeChannelNormal,
+    ShShapeChannelTangent,
+    ShShapeChannelBitangent,
 
-  ShShapeChannelMipLevel,
+    ShShapeChannelMipLevel,
 
-  // ShShapeChannelBumpNormal,
+    // ShShapeChannelBumpNormal,
 };
 
 typedef NS_ENUM(int32_t, ShaderLightingMode) {
-  ShLightingModeDiffuse = 0,
-  ShLightingModeSpecular,
+    ShLightingModeDiffuse = 0,
+    ShLightingModeSpecular,
 };
 
 // TODO: placement of these elements in the struct breaks transfer
 // of data. This seems to work.  Alignment issues with mixing these differently.
 struct Uniforms {
-  simd::float4x4 projectionViewMatrix;
-  simd::float4x4 modelMatrix;
-  simd::float4 modelMatrixInvScale2; // to supply inverse, w is determinant
-  simd::float3 cameraPosition;       // world-space
+    simd::float4x4 projectionViewMatrix;
+    simd::float4x4 modelMatrix;
+    simd::float4 modelMatrixInvScale2;  // to supply inverse, w is determinant
+    simd::float3 cameraPosition;        // world-space
 
-  bool isSigned;
-  bool isNormal;
-  bool isSwizzleAGToRG;
-  bool isPremul;
+    bool isSigned;
+    bool isNormal;
+    bool isSwizzleAGToRG;
+    bool isPremul;
 
-  bool isCheckerboardShown;
-  bool isWrap;
-  bool isSDF;
-  bool isPreview;
+    bool isCheckerboardShown;
+    bool isWrap;
+    bool isSDF;
+    bool isPreview;
 
-  bool is3DView;
-  bool isNormalMapPreview; // for isNormal or combined
+    bool is3DView;
+    bool isNormalMapPreview;  // for isNormal or combined
 
-  bool isNormalMapSigned;
-  bool isNormalMapSwizzleAGToRG;
+    bool isNormalMapSigned;
+    bool isNormalMapSwizzleAGToRG;
 
-  // this is used on wrap-around objects to avoid black transparent using
-  // clampToZero
-  bool isInsetByHalfPixel;
+    // this is used on wrap-around objects to avoid black transparent using
+    // clampToZero
+    bool isInsetByHalfPixel;
 
-  // this means pull tangent from vertex
-  bool useTangent;
+    // this means pull tangent from vertex
+    bool useTangent;
 
-  uint32_t numChannels;
+    uint32_t numChannels;
 
-  // control the pixel grid dimensions, can be block size, or pixel size
-  uint32_t gridX;
-  uint32_t gridY;
+    // control the pixel grid dimensions, can be block size, or pixel size
+    uint32_t gridX;
+    uint32_t gridY;
 
-  // View pixels that meet criteria of the debugMode
-  ShaderDebugMode debugMode;
+    // View pixels that meet criteria of the debugMode
+    ShaderDebugMode debugMode;
 
-  // View various aspects of shape geometry (depth, normal, tangent, ...)
-  ShaderShapeChannel shapeChannel;
+    // View various aspects of shape geometry (depth, normal, tangent, ...)
+    ShaderShapeChannel shapeChannel;
 
-  // View the r,g,b,a channels of the texture
-  ShaderTextureChannels channels; // mask
+    // View the r,g,b,a channels of the texture
+    ShaderTextureChannels channels;  // mask
 
-  // Can turn on/off specular
-  ShaderLightingMode lightingMode;
+    // Can turn on/off specular
+    ShaderLightingMode lightingMode;
 };
 
 // uploaded separately, so multiple mips, faces, array can be drawn to the
 // screen at one time although modelMatrix offset changes.  Could store offset
 // in here.
 struct UniformsLevel {
-  uint32_t mipLOD;
-  uint32_t face;
-  uint32_t arrayOrSlice;
-  simd::float2 drawOffset;  // pixel offset to apply
-  simd::float4 textureSize; // width, height, 1/width, 1/height
+    uint32_t mipLOD;
+    uint32_t face;
+    uint32_t arrayOrSlice;
+    simd::float2 drawOffset;   // pixel offset to apply
+    simd::float4 textureSize;  // width, height, 1/width, 1/height
 };
 
 // This is all tied to a single level sample
 struct UniformsCS {
-  simd::uint2 uv;
+    simd::uint2 uv;
 
-  uint32_t arrayOrSlice;
-  uint32_t face;
-  uint32_t mipLOD;
+    uint32_t arrayOrSlice;
+    uint32_t face;
+    uint32_t mipLOD;
 };
 
 #endif
diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index 8d41c1e9..ac1dd614 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -4,273 +4,290 @@ namespace kram {
 using namespace simd;
 using namespace NAMESPACE_STL;
 
-int32_t ShowSettings::totalChunks() const {
-  int32_t one = 1;
-  return std::max(one, faceCount) * std::max(one, arrayCount) *
-         std::max(one, sliceCount);
+int32_t ShowSettings::totalChunks() const
+{
+    int32_t one = 1;
+    return std::max(one, faceCount) * std::max(one, arrayCount) *
+           std::max(one, sliceCount);
 }
 
-const char *ShowSettings::meshNumberText() const {
-  const char *text = "";
-
-  switch (meshNumber) {
-  case 0:
-    text = "Shape Plane";
-    break;
-  case 1:
-    text = "Shape Box";
-    break;
-  case 2:
-    text = "Shape Sphere";
-    break;
-  case 3:
-    text = "Shape Sphere MirrorU";
-    break;
-  case 4:
-    text = "Shape Capsule";
-    break;
-  default:
-    break;
-  }
-
-  return text;
+const char *ShowSettings::meshNumberText() const
+{
+    const char *text = "";
+
+    switch (meshNumber) {
+        case 0:
+            text = "Shape Plane";
+            break;
+        case 1:
+            text = "Shape Box";
+            break;
+        case 2:
+            text = "Shape Sphere";
+            break;
+        case 3:
+            text = "Shape Sphere MirrorU";
+            break;
+        case 4:
+            text = "Shape Capsule";
+            break;
+        default:
+            break;
+    }
+
+    return text;
 }
 
-const char *ShowSettings::shapeChannelText() const {
-  const char *text = "";
-
-  switch (shapeChannel) {
-  case ShapeChannelNone:
-    text = "Show Off";
-    break;
-  case ShapeChannelUV0:
-    text = "Show UV0";
-    break;
-  case ShapeChannelNormal:
-    text = "Show Normal";
-    break;
-  case ShapeChannelTangent:
-    text = "Show Tangent";
-    break;
-  case ShapeChannelBitangent:
-    text = "Show Bitangent";
-    break;
-  case ShapeChannelDepth:
-    text = "Show Depth";
-    break;
-  case ShapeChannelFaceNormal:
-    text = "Show Faces";
-    break;
-  // case ShapeChannelBumpNormal: text = "Show Bumps"; break;
-  case ShapeChannelMipLevel:
-    text = "Show Mip Levels";
-    break;
-  default:
-    break;
-  }
-
-  return text;
+const char *ShowSettings::shapeChannelText() const
+{
+    const char *text = "";
+
+    switch (shapeChannel) {
+        case ShapeChannelNone:
+            text = "Show Off";
+            break;
+        case ShapeChannelUV0:
+            text = "Show UV0";
+            break;
+        case ShapeChannelNormal:
+            text = "Show Normal";
+            break;
+        case ShapeChannelTangent:
+            text = "Show Tangent";
+            break;
+        case ShapeChannelBitangent:
+            text = "Show Bitangent";
+            break;
+        case ShapeChannelDepth:
+            text = "Show Depth";
+            break;
+        case ShapeChannelFaceNormal:
+            text = "Show Faces";
+            break;
+        // case ShapeChannelBumpNormal: text = "Show Bumps"; break;
+        case ShapeChannelMipLevel:
+            text = "Show Mip Levels";
+            break;
+        default:
+            break;
+    }
+
+    return text;
 }
 
-const char *ShowSettings::debugModeText() const {
-  const char *text = "";
-
-  switch (debugMode) {
-  case DebugModeNone:
-    text = "Debug Off";
-    break;
-  case DebugModeTransparent:
-    text = "Debug Transparent";
-    break;
-  case DebugModeNonZero:
-    text = "Debug NonZero";
-    break;
-  case DebugModeColor:
-    text = "Debug Color";
-    break;
-  case DebugModeGray:
-    text = "Debug Gray";
-    break;
-  case DebugModeHDR:
-    text = "Debug HDR";
-    break;
-  case DebugModePosX:
-    text = "Debug +X";
-    break;
-  case DebugModePosY:
-    text = "Debug +Y";
-    break;
-  case DebugModeCircleXY:
-    text = "Debug XY>=1";
-    break;
-  default:
-    break;
-  }
-  return text;
+const char *ShowSettings::debugModeText() const
+{
+    const char *text = "";
+
+    switch (debugMode) {
+        case DebugModeNone:
+            text = "Debug Off";
+            break;
+        case DebugModeTransparent:
+            text = "Debug Transparent";
+            break;
+        case DebugModeNonZero:
+            text = "Debug NonZero";
+            break;
+        case DebugModeColor:
+            text = "Debug Color";
+            break;
+        case DebugModeGray:
+            text = "Debug Gray";
+            break;
+        case DebugModeHDR:
+            text = "Debug HDR";
+            break;
+        case DebugModePosX:
+            text = "Debug +X";
+            break;
+        case DebugModePosY:
+            text = "Debug +Y";
+            break;
+        case DebugModeCircleXY:
+            text = "Debug XY>=1";
+            break;
+        default:
+            break;
+    }
+    return text;
 }
 
-const char *ShowSettings::lightingModeText() const {
-  const char *text = "";
-
-  switch (lightingMode) {
-  case LightingModeDiffuse:
-    text = "Light Diffuse";
-    break;
-  case LightingModeSpecular:
-    text = "Light Specular";
-    break;
-  default:
-    break;
-  }
-  return text;
+const char *ShowSettings::lightingModeText() const
+{
+    const char *text = "";
+
+    switch (lightingMode) {
+        case LightingModeDiffuse:
+            text = "Light Diffuse";
+            break;
+        case LightingModeSpecular:
+            text = "Light Specular";
+            break;
+        default:
+            break;
+    }
+    return text;
 }
 
-bool ShowSettings::isEyedropperFromDrawable() {
-  return meshNumber > 0 || isPreview || isShowingAllLevelsAndMips ||
-         shapeChannel > 0;
+bool ShowSettings::isEyedropperFromDrawable()
+{
+    return meshNumber > 0 || isPreview || isShowingAllLevelsAndMips ||
+           shapeChannel > 0;
 }
 
-void ShowSettings::advanceMeshNumber(bool decrement) {
-  int32_t numEnums = meshCount;
-  int32_t number = meshNumber;
-  if (decrement) {
-    number += numEnums - 1;
-  } else {
-    number += 1;
-  }
+void ShowSettings::advanceMeshNumber(bool decrement)
+{
+    int32_t numEnums = meshCount;
+    int32_t number = meshNumber;
+    if (decrement) {
+        number += numEnums - 1;
+    }
+    else {
+        number += 1;
+    }
 
-  meshNumber = number % numEnums;
+    meshNumber = number % numEnums;
 }
 
-void ShowSettings::advanceShapeChannel(bool decrement) {
-  int32_t numEnums = ShapeChannelCount;
-  int32_t mode = shapeChannel;
-  if (decrement) {
-    mode += numEnums - 1;
-  } else {
-    mode += 1;
-  }
-
-  shapeChannel = (ShapeChannel)(mode % numEnums);
-
-  // skip this channel for now, in ortho it's mostly pure white
-  if (shapeChannel == ShapeChannelDepth) {
-    advanceShapeChannel(decrement);
-  }
+void ShowSettings::advanceShapeChannel(bool decrement)
+{
+    int32_t numEnums = ShapeChannelCount;
+    int32_t mode = shapeChannel;
+    if (decrement) {
+        mode += numEnums - 1;
+    }
+    else {
+        mode += 1;
+    }
+
+    shapeChannel = (ShapeChannel)(mode % numEnums);
+
+    // skip this channel for now, in ortho it's mostly pure white
+    if (shapeChannel == ShapeChannelDepth) {
+        advanceShapeChannel(decrement);
+    }
 }
 
-void ShowSettings::advanceLightingMode(bool decrement) {
-  int32_t numEnums = LightingModeCount;
-  int32_t number = lightingMode;
-  if (decrement) {
-    number += numEnums - 1;
-  } else {
-    number += 1;
-  }
+void ShowSettings::advanceLightingMode(bool decrement)
+{
+    int32_t numEnums = LightingModeCount;
+    int32_t number = lightingMode;
+    if (decrement) {
+        number += numEnums - 1;
+    }
+    else {
+        number += 1;
+    }
 
-  lightingMode = (LightingMode)(number % numEnums);
+    lightingMode = (LightingMode)(number % numEnums);
 }
 
-void ShowSettings::advanceDebugMode(bool decrement) {
-  int32_t numEnums = DebugModeCount;
-  int32_t mode = debugMode;
-  if (decrement) {
-    mode += numEnums - 1;
-  } else {
-    mode += 1;
-  }
-
-  debugMode = (DebugMode)(mode % numEnums);
-
-  MyMTLPixelFormat format = (MyMTLPixelFormat)originalFormat;
-  bool isHdr = isHdrFormat(format);
-
-  // DONE: work on skipping some of these based on image
-  bool isAlpha = isAlphaFormat(format);
-  bool isColor = isColorFormat(format);
-
-  if (debugMode == DebugModeTransparent && (numChannels <= 3 || !isAlpha)) {
-    advanceDebugMode(decrement);
-  }
-
-  // 2 channel textures don't really have color or grayscale pixels
-  if (debugMode == DebugModeColor && (numChannels <= 2 || !isColor)) {
-    advanceDebugMode(decrement);
-  }
-
-  if (debugMode == DebugModeGray && numChannels <= 2) {
-    advanceDebugMode(decrement);
-  }
-
-  if (debugMode == DebugModeHDR && !isHdr) {
-    advanceDebugMode(decrement);
-  }
-
-  // for 3 and for channel textures could skip these with more info about image
-  // (hasColor) if (_showSettings->debugMode == DebugModeGray && !hasColor)
-  // advanceDebugMode(isShiftKeyDown);
-
-  // for normals show directions
-  if (debugMode == DebugModePosX && !(isNormal || isSDF)) {
-    advanceDebugMode(decrement);
-  }
-  if (debugMode == DebugModePosY && !(isNormal)) {
-    advanceDebugMode(decrement);
-  }
-  if (debugMode == DebugModeCircleXY && !(isNormal)) {
-    advanceDebugMode(decrement);
-  }
-
-  // TODO: have a clipping mode against a variable range too, only show pixels
-  // within that range to help isolate problem pixels.  Useful for depth, and
-  // have auto-range scaling for it and hdr. make sure to ignore 0 or 1 for
-  // clear color of farPlane.
+void ShowSettings::advanceDebugMode(bool decrement)
+{
+    int32_t numEnums = DebugModeCount;
+    int32_t mode = debugMode;
+    if (decrement) {
+        mode += numEnums - 1;
+    }
+    else {
+        mode += 1;
+    }
+
+    debugMode = (DebugMode)(mode % numEnums);
+
+    MyMTLPixelFormat format = (MyMTLPixelFormat)originalFormat;
+    bool isHdr = isHdrFormat(format);
+
+    // DONE: work on skipping some of these based on image
+    bool isAlpha = isAlphaFormat(format);
+    bool isColor = isColorFormat(format);
+
+    if (debugMode == DebugModeTransparent && (numChannels <= 3 || !isAlpha)) {
+        advanceDebugMode(decrement);
+    }
+
+    // 2 channel textures don't really have color or grayscale pixels
+    if (debugMode == DebugModeColor && (numChannels <= 2 || !isColor)) {
+        advanceDebugMode(decrement);
+    }
+
+    if (debugMode == DebugModeGray && numChannels <= 2) {
+        advanceDebugMode(decrement);
+    }
+
+    if (debugMode == DebugModeHDR && !isHdr) {
+        advanceDebugMode(decrement);
+    }
+
+    // for 3 and for channel textures could skip these with more info about image
+    // (hasColor) if (_showSettings->debugMode == DebugModeGray && !hasColor)
+    // advanceDebugMode(isShiftKeyDown);
+
+    // for normals show directions
+    if (debugMode == DebugModePosX && !(isNormal || isSDF)) {
+        advanceDebugMode(decrement);
+    }
+    if (debugMode == DebugModePosY && !(isNormal)) {
+        advanceDebugMode(decrement);
+    }
+    if (debugMode == DebugModeCircleXY && !(isNormal)) {
+        advanceDebugMode(decrement);
+    }
+
+    // TODO: have a clipping mode against a variable range too, only show pixels
+    // within that range to help isolate problem pixels.  Useful for depth, and
+    // have auto-range scaling for it and hdr. make sure to ignore 0 or 1 for
+    // clear color of farPlane.
 }
 
 void printChannels(string &tmp, const string &label, float4 c,
-                   int32_t numChannels, bool isFloat, bool isSigned) {
-  if (isFloat || isSigned) {
-    switch (numChannels) {
-    case 1:
-      sprintf(tmp, "%s%.3f\n", label.c_str(), c.r);
-      break;
-    case 2:
-      sprintf(tmp, "%s%.3f, %.3f\n", label.c_str(), c.r, c.g);
-      break;
-    case 3:
-      sprintf(tmp, "%s%.3f, %.3f, %.3f\n", label.c_str(), c.r, c.g, c.b);
-      break;
-    case 4:
-      sprintf(tmp, "%s%.3f, %.3f, %.3f, %.3f\n", label.c_str(), c.r, c.g, c.b,
-              c.a);
-      break;
+                   int32_t numChannels, bool isFloat, bool isSigned)
+{
+    if (isFloat || isSigned) {
+        switch (numChannels) {
+            case 1:
+                sprintf(tmp, "%s%.3f\n", label.c_str(), c.r);
+                break;
+            case 2:
+                sprintf(tmp, "%s%.3f, %.3f\n", label.c_str(), c.r, c.g);
+                break;
+            case 3:
+                sprintf(tmp, "%s%.3f, %.3f, %.3f\n", label.c_str(), c.r, c.g, c.b);
+                break;
+            case 4:
+                sprintf(tmp, "%s%.3f, %.3f, %.3f, %.3f\n", label.c_str(), c.r, c.g, c.b,
+                        c.a);
+                break;
+        }
     }
-  } else {
-    // unorm data, 8-bit values displayed
-    c *= 255.1f;
-
-    switch (numChannels) {
-    case 1:
-      sprintf(tmp, "%s%.0f\n", label.c_str(), c.r);
-      break;
-    case 2:
-      sprintf(tmp, "%s%.0f, %.0f\n", label.c_str(), c.r, c.g);
-      break;
-    case 3:
-      sprintf(tmp, "%s%.0f, %.0f, %.0f\n", label.c_str(), c.r, c.g, c.b);
-      break;
-    case 4:
-      sprintf(tmp, "%s%.0f, %.0f, %.0f, %.0f\n", label.c_str(), c.r, c.g, c.b,
-              c.a);
-      break;
+    else {
+        // unorm data, 8-bit values displayed
+        c *= 255.1f;
+
+        switch (numChannels) {
+            case 1:
+                sprintf(tmp, "%s%.0f\n", label.c_str(), c.r);
+                break;
+            case 2:
+                sprintf(tmp, "%s%.0f, %.0f\n", label.c_str(), c.r, c.g);
+                break;
+            case 3:
+                sprintf(tmp, "%s%.0f, %.0f, %.0f\n", label.c_str(), c.r, c.g, c.b);
+                break;
+            case 4:
+                sprintf(tmp, "%s%.0f, %.0f, %.0f, %.0f\n", label.c_str(), c.r, c.g, c.b,
+                        c.a);
+                break;
+        }
     }
-  }
 }
 
-float4x4 matrix4x4_translation(float tx, float ty, float tz) {
-  float4x4 m = {(float4){1, 0, 0, 0}, (float4){0, 1, 0, 0},
-                (float4){0, 0, 1, 0}, (float4){tx, ty, tz, 1}};
-  return m;
+float4x4 matrix4x4_translation(float tx, float ty, float tz)
+{
+    float4x4 m = {(float4){1, 0, 0, 0}, (float4){0, 1, 0, 0},
+                  (float4){0, 0, 1, 0}, (float4){tx, ty, tz, 1}};
+    return m;
 }
 
 // static float4x4 matrix4x4_rotation(float radians, vector_float3 axis)
@@ -309,29 +326,30 @@ float4x4 matrix4x4_translation(float tx, float ty, float tz) {
 //}
 
 float4x4 orthographic_rhs(float width, float height, float nearZ, float farZ,
-                          bool isReverseZ) {
-  // float aspectRatio = width / height;
-  float xs = 2.0f / width;
-  float ys = 2.0f / height;
+                          bool isReverseZ)
+{
+    // float aspectRatio = width / height;
+    float xs = 2.0f / width;
+    float ys = 2.0f / height;
 
-  float xoff = 0.0f; // -0.5f * width;
-  float yoff = 0.0f; // -0.5f * height;
+    float xoff = 0.0f;  // -0.5f * width;
+    float yoff = 0.0f;  // -0.5f * height;
 
-  float dz = -(farZ - nearZ);
-  float zs = 1.0f / dz;
+    float dz = -(farZ - nearZ);
+    float zs = 1.0f / dz;
 
-  float m22 = zs;
-  float m23 = zs * nearZ;
+    float m22 = zs;
+    float m23 = zs * nearZ;
 
-  // revZ, can't use infiniteZ with ortho view
-  if (isReverseZ) {
-    m22 = -m22;
-    m23 = 1.0f - m23;
-  }
+    // revZ, can't use infiniteZ with ortho view
+    if (isReverseZ) {
+        m22 = -m22;
+        m23 = 1.0f - m23;
+    }
 
-  float4x4 m = {(float4){xs, 0, 0, 0}, (float4){0, ys, 0, 0},
-                (float4){0, 0, m22, 0}, (float4){xoff, yoff, m23, 1}};
-  return m;
+    float4x4 m = {(float4){xs, 0, 0, 0}, (float4){0, ys, 0, 0},
+                  (float4){0, 0, m22, 0}, (float4){xoff, yoff, m23, 1}};
+    return m;
 }
 
-} // namespace kram
+}  // namespace kram
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index 5b14ee14..c263389f 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -2,9 +2,9 @@
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
-#include "KramLib.h" // for MyMTLPixelFormat
-
 #include <cstdint>
+
+#include "KramLib.h"  // for MyMTLPixelFormat
 //#include <string>
 //#include <simd/simd.h>
 
@@ -16,221 +16,222 @@ using namespace NAMESPACE_STL;
 using namespace simd;
 
 enum TextureChannels {
-  ModeRGBA = 0,
+    ModeRGBA = 0,
 
-  ModeR001 = 1,
-  Mode0G01 = 2,
-  Mode00B1 = 3,
+    ModeR001 = 1,
+    Mode0G01 = 2,
+    Mode00B1 = 3,
 
-  // see grayscale channels
-  //    ModeRRR1 = 5,
-  //    ModeGGG1 = 6,
-  //    ModeBBB1 = 7,
+    // see grayscale channels
+    //    ModeRRR1 = 5,
+    //    ModeGGG1 = 6,
+    //    ModeBBB1 = 7,
 
-  ModeAAA1 = 8,
+    ModeAAA1 = 8,
 };
 
 // Must line up with ShDebugMode
 enum DebugMode {
-  DebugModeNone = 0,
-  DebugModeTransparent,
-  DebugModeNonZero,
-  DebugModeColor,
-  DebugModeGray,
-  DebugModeHDR,
-
-  DebugModePosX,
-  DebugModePosY,
-  DebugModeCircleXY,
-
-  DebugModeCount
+    DebugModeNone = 0,
+    DebugModeTransparent,
+    DebugModeNonZero,
+    DebugModeColor,
+    DebugModeGray,
+    DebugModeHDR,
+
+    DebugModePosX,
+    DebugModePosY,
+    DebugModeCircleXY,
+
+    DebugModeCount
 };
 
 enum ShapeChannel {
-  ShapeChannelNone = 0,
+    ShapeChannelNone = 0,
 
-  ShapeChannelDepth,
+    ShapeChannelDepth,
 
-  ShapeChannelUV0,
+    ShapeChannelUV0,
 
-  ShapeChannelFaceNormal, // gen from dfdx and dfdy
+    ShapeChannelFaceNormal,  // gen from dfdx and dfdy
 
-  ShapeChannelNormal, // vertex normal
-  ShapeChannelTangent,
-  ShapeChannelBitangent,
+    ShapeChannelNormal,  // vertex normal
+    ShapeChannelTangent,
+    ShapeChannelBitangent,
 
-  ShapeChannelMipLevel, // can estimate mip chose off dfdx/dfdy, and pseudocolor
+    ShapeChannelMipLevel,  // can estimate mip chose off dfdx/dfdy, and pseudocolor
 
-  // don't need bump, since can already see it, but what if combined diffuse +
-  // normal ShapeChannelBumpNormal,
+    // don't need bump, since can already see it, but what if combined diffuse +
+    // normal ShapeChannelBumpNormal,
 
-  ShapeChannelCount
+    ShapeChannelCount
 };
 
 enum LightingMode {
-  LightingModeDiffuse = 0,  // amb + diffuse
-  LightingModeSpecular = 1, // amb + diffuse + specular
+    LightingModeDiffuse = 0,   // amb + diffuse
+    LightingModeSpecular = 1,  // amb + diffuse + specular
 
-  LightingModeCount,
+    LightingModeCount,
 };
 
 class ShowSettings {
 public:
-  // Can mask various channels (r/g/b/a only, vs. all), may also add toggle of
-  // channel
-  TextureChannels channels;
+    // Can mask various channels (r/g/b/a only, vs. all), may also add toggle of
+    // channel
+    TextureChannels channels;
 
-  // this is gap used for showAll
-  int32_t showAllPixelGap = 2;
+    // this is gap used for showAll
+    int32_t showAllPixelGap = 2;
 
-  // These control which texture is viewed in single texture mode
-  int32_t mipNumber = 0;
-  int32_t mipCount = 1;
+    // These control which texture is viewed in single texture mode
+    int32_t mipNumber = 0;
+    int32_t mipCount = 1;
 
-  int32_t faceNumber = 0;
-  int32_t faceCount = 0;
+    int32_t faceNumber = 0;
+    int32_t faceCount = 0;
 
-  int32_t arrayNumber = 0;
-  int32_t arrayCount = 0;
+    int32_t arrayNumber = 0;
+    int32_t arrayCount = 0;
 
-  int32_t sliceNumber = 0;
-  int32_t sliceCount = 0;
+    int32_t sliceNumber = 0;
+    int32_t sliceCount = 0;
 
-  int32_t totalChunks() const;
+    int32_t totalChunks() const;
 
-  // DONE: hook all these up to shader and view
-  bool isHudShown = true;
+    // DONE: hook all these up to shader and view
+    bool isHudShown = true;
 
-  // transparency checkboard under the image
-  bool isCheckerboardShown = false;
+    // transparency checkboard under the image
+    bool isCheckerboardShown = false;
 
-  // draw a 1x1 or blockSize grid, note ASTC has non-square grid sizes
-  bool isPixelGridShown = false;
-  bool isBlockGridShown = false;
-  bool isAtlasGridShown = false;
+    // draw a 1x1 or blockSize grid, note ASTC has non-square grid sizes
+    bool isPixelGridShown = false;
+    bool isBlockGridShown = false;
+    bool isAtlasGridShown = false;
 
-  bool isAnyGridShown() const {
-    return isPixelGridShown || isBlockGridShown || isAtlasGridShown;
-  }
+    bool isAnyGridShown() const
+    {
+        return isPixelGridShown || isBlockGridShown || isAtlasGridShown;
+    }
 
-  // show all mips, faces, arrays all at once
-  bool isShowingAllLevelsAndMips = false;
+    // show all mips, faces, arrays all at once
+    bool isShowingAllLevelsAndMips = false;
 
-  // expands uv from [0,1] to [0,2] in shader to see the repeat pattern
-  bool isWrap = false;
+    // expands uv from [0,1] to [0,2] in shader to see the repeat pattern
+    bool isWrap = false;
 
-  bool isNormal = false;
-  bool isSigned = false;
-  bool isPremul = false; // needed for png which only holds unmul
-  bool isSwizzleAGToRG = false;
-  bool isSDF = false;
+    bool isNormal = false;
+    bool isSigned = false;
+    bool isPremul = false;  // needed for png which only holds unmul
+    bool isSwizzleAGToRG = false;
+    bool isSDF = false;
 
-  // this mode shows the content with lighting or with bilinear/mips active
-  bool isPreview = false;
+    // this mode shows the content with lighting or with bilinear/mips active
+    bool isPreview = false;
 
-  // the 2d view doesn't want to inset pixels for clamp, or point sampling is
-  // thrown off expecially on small 4x4 textures
-  bool is3DView = false;
+    // the 2d view doesn't want to inset pixels for clamp, or point sampling is
+    // thrown off expecially on small 4x4 textures
+    bool is3DView = false;
 
-  // TODO: Might eliminate this, since mips are either built with or without
-  // srgb and disabling with a MTLView caused many flags to have to be set on
-  // MTLTexture
-  // bool isSRGBShown = true;
+    // TODO: Might eliminate this, since mips are either built with or without
+    // srgb and disabling with a MTLView caused many flags to have to be set on
+    // MTLTexture
+    // bool isSRGBShown = true;
 
-  // whether to use normal to tangent (false), or vertex tangents (true)
-  bool useTangent = true;
+    // whether to use normal to tangent (false), or vertex tangents (true)
+    bool useTangent = true;
 
-  // draw with reverseZ to better match perspective
-  bool isReverseZ = true;
+    // draw with reverseZ to better match perspective
+    bool isReverseZ = true;
 
-  // whether files are pulled from zip archive.
-  bool isArchive = false;
+    // whether files are pulled from zip archive.
+    bool isArchive = false;
 
-  // whether files are pulled from folder(s)
-  bool isFolder = false;
+    // whether files are pulled from folder(s)
+    bool isFolder = false;
 
-  // can sample from drawable or from single source texture
-  bool isEyedropperFromDrawable();
+    // can sample from drawable or from single source texture
+    bool isEyedropperFromDrawable();
 
-  // can have up to 5 channels (xyz as xy, 2 other channels)
-  int32_t numChannels = 0;
+    // can have up to 5 channels (xyz as xy, 2 other channels)
+    int32_t numChannels = 0;
 
-  // this could be boundary of all visible images, so that pan doesn't go flying
-  // off to nowhere
-  int32_t imageBoundsX = 0; // px
-  int32_t imageBoundsY = 0; // px
+    // this could be boundary of all visible images, so that pan doesn't go flying
+    // off to nowhere
+    int32_t imageBoundsX = 0;  // px
+    int32_t imageBoundsY = 0;  // px
 
-  // size of the block, used in block grid drawing
-  int32_t blockX = 1;
-  int32_t blockY = 1;
+    // size of the block, used in block grid drawing
+    int32_t blockX = 1;
+    int32_t blockY = 1;
 
-  // set when isGridShown is true
-  int32_t gridSizeX = 1;
-  int32_t gridSizeY = 1;
+    // set when isGridShown is true
+    int32_t gridSizeX = 1;
+    int32_t gridSizeY = 1;
 
-  // for eyedropper, lookup this pixel value, and return it to CPU
-  int32_t textureLookupX = 0;
-  int32_t textureLookupY = 0;
+    // for eyedropper, lookup this pixel value, and return it to CPU
+    int32_t textureLookupX = 0;
+    int32_t textureLookupY = 0;
 
-  int32_t lastCursorX = 0;
-  int32_t lastCursorY = 0;
+    int32_t lastCursorX = 0;
+    int32_t lastCursorY = 0;
 
-  // exact pixel in the mip level
-  int32_t textureLookupMipX = 0;
-  int32_t textureLookupMipY = 0;
+    // exact pixel in the mip level
+    int32_t textureLookupMipX = 0;
+    int32_t textureLookupMipY = 0;
 
-  int32_t textureResultX = 0;
-  int32_t textureResultY = 0;
-  float4 textureResult;
+    int32_t textureResultX = 0;
+    int32_t textureResultY = 0;
+    float4 textureResult;
 
-  // size of the view and its contentScaleFactor
-  int32_t viewSizeX = 1; // px
-  int32_t viewSizeY = 1; // px
-  float viewContentScaleFactor = 1.0f;
+    // size of the view and its contentScaleFactor
+    int32_t viewSizeX = 1;  // px
+    int32_t viewSizeY = 1;  // px
+    float viewContentScaleFactor = 1.0f;
 
-  // cursor is in view coordinates, but doesn't include contentScaleFactor
-  int32_t cursorX = 0;
-  int32_t cursorY = 0;
+    // cursor is in view coordinates, but doesn't include contentScaleFactor
+    int32_t cursorX = 0;
+    int32_t cursorY = 0;
 
-  // these control the view transform, zoomFit fits the image vertically to he
-  // view bound
-  float zoomFit = 1.0f;
-  float zoom = 1.0f;
-  float panX = 0.0f;
-  float panY = 0.0f;
+    // these control the view transform, zoomFit fits the image vertically to he
+    // view bound
+    float zoomFit = 1.0f;
+    float zoom = 1.0f;
+    float panX = 0.0f;
+    float panY = 0.0f;
 
-  DebugMode debugMode = DebugModeNone;
+    DebugMode debugMode = DebugModeNone;
 
-  ShapeChannel shapeChannel = ShapeChannelNone;
+    ShapeChannel shapeChannel = ShapeChannelNone;
 
-  LightingMode lightingMode = LightingModeDiffuse;
+    LightingMode lightingMode = LightingModeDiffuse;
 
-  float4x4 projectionViewModelMatrix;
-  bool isInverted;
+    float4x4 projectionViewModelMatrix;
+    bool isInverted;
 
-  // cached on load, raw info about the texture from libkram
-  string imageInfo;
-  string imageInfoVerbose;
+    // cached on load, raw info about the texture from libkram
+    string imageInfo;
+    string imageInfoVerbose;
 
-  // format before any transcode to supported formats
-  MyMTLPixelFormat originalFormat;
-  MyMTLPixelFormat decodedFormat;
+    // format before any transcode to supported formats
+    MyMTLPixelFormat originalFormat;
+    MyMTLPixelFormat decodedFormat;
 
-  void advanceMeshNumber(bool decrement);
-  void advanceDebugMode(bool decrement);
-  void advanceShapeChannel(bool decrement);
-  void advanceLightingMode(bool decrement);
+    void advanceMeshNumber(bool decrement);
+    void advanceDebugMode(bool decrement);
+    void advanceShapeChannel(bool decrement);
+    void advanceLightingMode(bool decrement);
 
-  const char *meshNumberText() const;
-  const char *shapeChannelText() const;
-  const char *debugModeText() const;
-  const char *lightingModeText() const;
+    const char *meshNumberText() const;
+    const char *shapeChannelText() const;
+    const char *debugModeText() const;
+    const char *lightingModeText() const;
 
-  string lastFilename;
-  double lastTimestamp = 0.0;
+    string lastFilename;
+    double lastTimestamp = 0.0;
 
-  int32_t meshNumber = 0;
-  int32_t meshCount = 5;
+    int32_t meshNumber = 0;
+    int32_t meshCount = 5;
 };
 
 float4x4 matrix4x4_translation(float tx, float ty, float tz);
@@ -241,4 +242,4 @@ float4x4 orthographic_rhs(float width, float height, float nearZ, float farZ,
 void printChannels(string &tmp, const string &label, float4 c,
                    int32_t numChannels, bool isFloat, bool isSigned);
 
-} // namespace kram
+}  // namespace kram
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 3ab17b51..8399dc82 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -2,18 +2,17 @@
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
-#import <TargetConditionals.h>
-
 #import <Cocoa/Cocoa.h>
 #import <Metal/Metal.h>
 #import <MetalKit/MetalKit.h>
+#import <TargetConditionals.h>
 
 #import "KramRenderer.h"
 #import "KramShaders.h"
 
 // C++
 #include "KramLib.h"
-#include "KramVersion.h" // keep kramv version in sync with libkram
+#include "KramVersion.h"  // keep kramv version in sync with libkram
 
 //#include "KramMipper.h"
 
@@ -65,82 +64,86 @@ @interface KramDocument ()
 
 @implementation KramDocument
 
-- (instancetype)init {
-  self = [super init];
-  if (self) {
-    // Add your subclass-specific initialization here.
-  }
-  return self;
+- (instancetype)init
+{
+    self = [super init];
+    if (self) {
+        // Add your subclass-specific initialization here.
+    }
+    return self;
 }
 
-+ (BOOL)autosavesInPlace {
-  return NO; // YES;
++ (BOOL)autosavesInPlace
+{
+    return NO;  // YES;
 }
 
 // call when "new" called
-- (void)makeWindowControllers {
-  // Override to return the Storyboard file name of the document.
-  // NSStoryboard* storyboard = [NSStoryboard storyboardWithName:@"Main"
-  // bundle:nil]; NSWindowController* controller = [storyboard
-  // instantiateControllerWithIdentifier:@"NameNeeded]; [self
-  //addWindowController:controller];
+- (void)makeWindowControllers
+{
+    // Override to return the Storyboard file name of the document.
+    // NSStoryboard* storyboard = [NSStoryboard storyboardWithName:@"Main"
+    // bundle:nil]; NSWindowController* controller = [storyboard
+    // instantiateControllerWithIdentifier:@"NameNeeded]; [self
+    //addWindowController:controller];
 }
 
 - (NSData *)dataOfType:(nonnull NSString *)typeName
-                 error:(NSError *_Nullable *)outError {
-  // Insert code here to write your document to data of the specified type. If
-  // outError != NULL, ensure that you create and set an appropriate error if
-  // you return nil. Alternatively, you could remove this method and override
-  // -fileWrapperOfType:error:, -writeToURL:ofType:error:, or
-  // -writeToURL:ofType:forSaveOperation:originalContentsURL:error: instead.
-  [NSException raise:@"UnimplementedMethod"
-              format:@"%@ is unimplemented", NSStringFromSelector(_cmd)];
-  return nil;
+                 error:(NSError *_Nullable *)outError
+{
+    // Insert code here to write your document to data of the specified type. If
+    // outError != NULL, ensure that you create and set an appropriate error if
+    // you return nil. Alternatively, you could remove this method and override
+    // -fileWrapperOfType:error:, -writeToURL:ofType:error:, or
+    // -writeToURL:ofType:forSaveOperation:originalContentsURL:error: instead.
+    [NSException raise:@"UnimplementedMethod"
+                format:@"%@ is unimplemented", NSStringFromSelector(_cmd)];
+    return nil;
 }
 
 - (BOOL)readFromURL:(nonnull NSURL *)url
              ofType:(nonnull NSString *)typeName
-              error:(NSError *_Nullable *)outError {
-
-  // called from OpenRecent documents menu
+              error:(NSError *_Nullable *)outError
+{
+    // called from OpenRecent documents menu
 
 #if 0
     //MyMTKView* view = self.windowControllers.firstObject.window.contentView;
     //return [view loadTextureFromURL:url];
 #else
 
-  NSApplication *app = [NSApplication sharedApplication];
-  MyMTKView *view = app.mainWindow.contentView;
-  BOOL success = [view loadTextureFromURL:url];
-  if (success) {
-    [view setHudText:""];
-
-    // DONE: this recent menu only seems to work the first time
-    // and not in subsequent calls to the same entry.  readFromUrl isn't even
-    // called. So don't get a chance to switch back to a recent texture. Maybe
-    // there's some list of documents created and so it doesn't think the file
-    // needs to be reloaded.
-    //
-    // Note: if I return NO from this call then a dialog pops up that image
-    // couldn't be loaded, but then the readFromURL is called everytime a new
-    // image is picked from the list.
-
-    // Clear the document list so readFromURL keeps getting called
-    // Can't remove currentDoc, so have to skip that
-    NSDocumentController *dc = [NSDocumentController sharedDocumentController];
-    NSDocument *currentDoc = dc.currentDocument;
-    NSMutableArray *docsToRemove = [[NSMutableArray alloc] init];
-    for (NSDocument *doc in dc.documents) {
-      if (doc != currentDoc)
-        [docsToRemove addObject:doc];
-    }
+    NSApplication *app = [NSApplication sharedApplication];
+    MyMTKView *view = app.mainWindow.contentView;
+    BOOL success = [view loadTextureFromURL:url];
+    if (success) {
+        [view setHudText:""];
+
+        // DONE: this recent menu only seems to work the first time
+        // and not in subsequent calls to the same entry.  readFromUrl isn't even
+        // called. So don't get a chance to switch back to a recent texture. Maybe
+        // there's some list of documents created and so it doesn't think the file
+        // needs to be reloaded.
+        //
+        // Note: if I return NO from this call then a dialog pops up that image
+        // couldn't be loaded, but then the readFromURL is called everytime a new
+        // image is picked from the list.
+
+        // Clear the document list so readFromURL keeps getting called
+        // Can't remove currentDoc, so have to skip that
+        NSDocumentController *dc = [NSDocumentController sharedDocumentController];
+        NSDocument *currentDoc = dc.currentDocument;
+        NSMutableArray *docsToRemove = [[NSMutableArray alloc] init];
+        for (NSDocument *doc in dc.documents) {
+            if (doc != currentDoc)
+                [docsToRemove addObject:doc];
+        }
 
-    for (NSDocument *doc in docsToRemove) {
-      [dc removeDocument:doc];
+        for (NSDocument *doc in docsToRemove) {
+            [dc removeDocument:doc];
+        }
     }
-  }
 
-  return success;
+    return success;
 #endif
 }
 
@@ -160,85 +163,90 @@ @interface AppDelegate ()
 
 @implementation AppDelegate
 
-- (void)applicationDidFinishLaunching:(NSNotification *)aNotification {
-  // Insert code here to initialize your application
+- (void)applicationDidFinishLaunching:(NSNotification *)aNotification
+{
+    // Insert code here to initialize your application
 }
 
-- (void)applicationWillTerminate:(NSNotification *)aNotification {
-  // Insert code here to tear down your application
+- (void)applicationWillTerminate:(NSNotification *)aNotification
+{
+    // Insert code here to tear down your application
 }
 
 - (BOOL)applicationShouldTerminateAfterLastWindowClosed:
-    (NSApplication *)sender {
-  return YES;
+    (NSApplication *)sender
+{
+    return YES;
 }
 
 - (void)application:(NSApplication *)sender
-           openURLs:(nonnull NSArray<NSURL *> *)urls {
-  // this is called from "Open In..."
-  MyMTKView *view = sender.mainWindow.contentView;
+           openURLs:(nonnull NSArray<NSURL *> *)urls
+{
+    // this is called from "Open In..."
+    MyMTKView *view = sender.mainWindow.contentView;
 
-  // TODO: if more than one url dropped, and they are albedo/nmap, then display
-  // them together with the single uv set.  Need controls to show one or all
-  // together.
+    // TODO: if more than one url dropped, and they are albedo/nmap, then display
+    // them together with the single uv set.  Need controls to show one or all
+    // together.
 
-  // TODO: also do an overlapping diff if two files are dropped with same
-  // dimensions.
+    // TODO: also do an overlapping diff if two files are dropped with same
+    // dimensions.
 
-  NSURL *url = urls.firstObject;
-  [view loadTextureFromURL:url];
+    NSURL *url = urls.firstObject;
+    [view loadTextureFromURL:url];
 }
 
-- (IBAction)showAboutDialog:(id)sender {
-  // calls openDocumentWithContentsOfURL above
-  NSMutableDictionary<NSAboutPanelOptionKey, id> *options =
-      [[NSMutableDictionary alloc] init];
-
-  // name and icon are already supplied
-
-  // want to embed the git tag here
-  options[@"Copyright"] =
-      [NSString stringWithUTF8String:"kram ©2020,2021 by Alec Miller"];
-
-  // add a link to kram website, skip the Visit text
-  NSMutableAttributedString *str = [[NSMutableAttributedString alloc]
-      initWithString:@"https://github.com/alecazam/kram"];
-  [str addAttribute:NSLinkAttributeName
-              value:@"https://github.com/alecazam/kram"
-              range:NSMakeRange(0, str.length)];
-
-  [str appendAttributedString:
-           [[NSAttributedString alloc]
-               initWithString:
-                   [NSString
-                       stringWithUTF8String:
-                           "\n"
-                           "kram is open-source and inspired by the\n"
-                           "software technologies of these companies\n"
-                           "  Khronos, Binomial, ARM, Google, and Apple\n"
-                           "and devs who generously shared their work.\n"
-                           "  Simon Brown, Rich Geldreich, Pete Harris,\n"
-                           "  Philip Rideout, Romain Guy, Colt McAnlis,\n"
-                           "  John Ratcliff, Sean Parent, David Ireland,\n"
-                           "  Mark Callow, Mike Frysinger, Yann Collett\n"]]];
-
-  options[NSAboutPanelOptionCredits] = str;
-
-  // skip the v character
-  const char *version = KRAM_VERSION;
-  version += 1;
-
-  // this is the build version, should be github hash?
-  options[NSAboutPanelOptionVersion] = @"";
-
-  // this is app version
-  options[NSAboutPanelOptionApplicationVersion] =
-      [NSString stringWithUTF8String:version];
-
-  [[NSApplication sharedApplication]
-      orderFrontStandardAboutPanelWithOptions:options];
-
-  //[[NSApplication sharedApplication] orderFrontStandardAboutPanel:sender];
+- (IBAction)showAboutDialog:(id)sender
+{
+    // calls openDocumentWithContentsOfURL above
+    NSMutableDictionary<NSAboutPanelOptionKey, id> *options =
+        [[NSMutableDictionary alloc] init];
+
+    // name and icon are already supplied
+
+    // want to embed the git tag here
+    options[@"Copyright"] =
+        [NSString stringWithUTF8String:"kram ©2020,2021 by Alec Miller"];
+
+    // add a link to kram website, skip the Visit text
+    NSMutableAttributedString *str = [[NSMutableAttributedString alloc]
+        initWithString:@"https://github.com/alecazam/kram"];
+    [str addAttribute:NSLinkAttributeName
+                value:@"https://github.com/alecazam/kram"
+                range:NSMakeRange(0, str.length)];
+
+    [str appendAttributedString:
+             [[NSAttributedString alloc]
+                 initWithString:
+                     [NSString
+                         stringWithUTF8String:
+                             "\n"
+                             "kram is open-source and inspired by the\n"
+                             "software technologies of these companies\n"
+                             "  Khronos, Binomial, ARM, Google, and Apple\n"
+                             "and devs who generously shared their work.\n"
+                             "  Simon Brown, Rich Geldreich, Pete Harris,\n"
+                             "  Philip Rideout, Romain Guy, Colt McAnlis,\n"
+                             "  John Ratcliff, Sean Parent, David Ireland,\n"
+                             "  Mark Callow, Mike Frysinger, Yann Collett\n"]]];
+
+    options[NSAboutPanelOptionCredits] = str;
+
+    // skip the v character
+    const char *version = KRAM_VERSION;
+    version += 1;
+
+    // this is the build version, should be github hash?
+    options[NSAboutPanelOptionVersion] = @"";
+
+    // this is app version
+    options[NSAboutPanelOptionApplicationVersion] =
+        [NSString stringWithUTF8String:version];
+
+    [[NSApplication sharedApplication]
+        orderFrontStandardAboutPanelWithOptions:options];
+
+    //[[NSApplication sharedApplication] orderFrontStandardAboutPanel:sender];
 }
 
 @end
@@ -246,61 +254,61 @@ - (IBAction)showAboutDialog:(id)sender {
 //-------------
 
 enum Key {
-  A = 0x00,
-  S = 0x01,
-  D = 0x02,
-  F = 0x03,
-  H = 0x04,
-  G = 0x05,
-  Z = 0x06,
-  X = 0x07,
-  C = 0x08,
-  V = 0x09,
-  B = 0x0B,
-  Q = 0x0C,
-  W = 0x0D,
-  E = 0x0E,
-  R = 0x0F,
-  Y = 0x10,
-  T = 0x11,
-  O = 0x1F,
-  U = 0x20,
-  I = 0x22,
-  P = 0x23,
-  L = 0x25,
-  J = 0x26,
-  K = 0x28,
-  N = 0x2D,
-  M = 0x2E,
-
-  // https://eastmanreference.com/complete-list-of-applescript-key-codes
-  Num1 = 0x12,
-  Num2 = 0x13,
-  Num3 = 0x14,
-  Num4 = 0x15,
-  Num5 = 0x17,
-  Num6 = 0x16,
-  Num7 = 0x1A,
-  Num8 = 0x1C,
-  Num9 = 0x19,
-  Num0 = 0x1D,
-
-  LeftBrace = 0x21,
-  RightBrace = 0x1E,
-
-  LeftBracket = 0x21,
-  RightBracket = 0x1E,
-
-  Quote = 0x27,
-  Semicolon = 0x29,
-  Backslash = 0x2A,
-  Comma = 0x2B,
-  Slash = 0x2C,
-
-  LeftArrow = 0x7B,
-  RightArrow = 0x7C,
-  DownArrow = 0x7D,
-  UpArrow = 0x7E,
+    A = 0x00,
+    S = 0x01,
+    D = 0x02,
+    F = 0x03,
+    H = 0x04,
+    G = 0x05,
+    Z = 0x06,
+    X = 0x07,
+    C = 0x08,
+    V = 0x09,
+    B = 0x0B,
+    Q = 0x0C,
+    W = 0x0D,
+    E = 0x0E,
+    R = 0x0F,
+    Y = 0x10,
+    T = 0x11,
+    O = 0x1F,
+    U = 0x20,
+    I = 0x22,
+    P = 0x23,
+    L = 0x25,
+    J = 0x26,
+    K = 0x28,
+    N = 0x2D,
+    M = 0x2E,
+
+    // https://eastmanreference.com/complete-list-of-applescript-key-codes
+    Num1 = 0x12,
+    Num2 = 0x13,
+    Num3 = 0x14,
+    Num4 = 0x15,
+    Num5 = 0x17,
+    Num6 = 0x16,
+    Num7 = 0x1A,
+    Num8 = 0x1C,
+    Num9 = 0x19,
+    Num0 = 0x1D,
+
+    LeftBrace = 0x21,
+    RightBrace = 0x1E,
+
+    LeftBracket = 0x21,
+    RightBracket = 0x1E,
+
+    Quote = 0x27,
+    Semicolon = 0x29,
+    Backslash = 0x2A,
+    Comma = 0x2B,
+    Slash = 0x2C,
+
+    LeftArrow = 0x7B,
+    RightArrow = 0x7C,
+    DownArrow = 0x7D,
+    UpArrow = 0x7E,
 };
 
 /*
@@ -415,37 +423,39 @@ void encodeSrcForEncodeComparisons(bool increment) {
 NSArray<NSString *> *pasteboardTypes = @[ NSPasteboardTypeFileURL ];
 
 @implementation MyMTKView {
-  NSMenu *_viewMenu; // really the items
-  NSStackView *_buttonStack;
-  NSMutableArray<NSButton *> *_buttonArray;
-  NSTextField *_hudLabel;
-  NSTextField *_hudLabel2;
-
-  vector<string> _textSlots;
-  ShowSettings *_showSettings;
-
-  // allow zip files to be dropped and opened, and can advance through bundle
-  // content
-  ZipHelper _zip;
-  MmapHelper _zipMmap;
-  int32_t _fileArchiveIndex;
-  BOOL _noImageLoaded;
-
-  vector<string> _folderFiles;
-  int32_t _fileFolderIndex;
+    NSMenu *_viewMenu;  // really the items
+    NSStackView *_buttonStack;
+    NSMutableArray<NSButton *> *_buttonArray;
+    NSTextField *_hudLabel;
+    NSTextField *_hudLabel2;
+
+    vector<string> _textSlots;
+    ShowSettings *_showSettings;
+
+    // allow zip files to be dropped and opened, and can advance through bundle
+    // content
+    ZipHelper _zip;
+    MmapHelper _zipMmap;
+    int32_t _fileArchiveIndex;
+    BOOL _noImageLoaded;
+
+    vector<string> _folderFiles;
+    int32_t _fileFolderIndex;
 }
 
-- (void)awakeFromNib {
-  [super awakeFromNib];
+- (void)awakeFromNib
+{
+    [super awakeFromNib];
 
-  // TODO: see if can only open this
-  // NSLog(@"AwakeFromNIB");
+    // TODO: see if can only open this
+    // NSLog(@"AwakeFromNIB");
 }
 
 // to get upper left origin like on UIView
 #if KRAM_MAC
-- (BOOL)isFlipped {
-  return YES;
+- (BOOL)isFlipped
+{
+    return YES;
 }
 #endif
 
@@ -458,153 +468,156 @@ - (BOOL)isFlipped {
 // TODO: Sometimes getting panels from right side popping in when trying to pan
 // on macOS without using pan gesture.
 
-- (instancetype)initWithCoder:(NSCoder *)coder {
-  self = [super initWithCoder:coder];
+- (instancetype)initWithCoder:(NSCoder *)coder
+{
+    self = [super initWithCoder:coder];
 
-  _showSettings = new ShowSettings;
+    _showSettings = new ShowSettings;
 
-  self.clearColor = MTLClearColorMake(0.0f, 0.0f, 0.0f, 0.0f);
+    self.clearColor = MTLClearColorMake(0.0f, 0.0f, 0.0f, 0.0f);
 
-  self.clearDepth = _showSettings->isReverseZ ? 0.0f : 1.0f;
+    self.clearDepth = _showSettings->isReverseZ ? 0.0f : 1.0f;
 
-  // only re-render when changes are made
-  // Note: this breaks ability to gpu capture, since display link not running.
-  // so disable this if want to do captures.
-#ifndef NDEBUG // KRAM_RELEASE
-  self.enableSetNeedsDisplay = YES;
+    // only re-render when changes are made
+    // Note: this breaks ability to gpu capture, since display link not running.
+    // so disable this if want to do captures.
+#ifndef NDEBUG  // KRAM_RELEASE
+    self.enableSetNeedsDisplay = YES;
 #endif
-  // openFile in appDelegate handles "Open in..."
+    // openFile in appDelegate handles "Open in..."
 
-  _textSlots.resize(2);
+    _textSlots.resize(2);
 
-  // added for drag-drop support
-  [self registerForDraggedTypes:pasteboardTypes];
+    // added for drag-drop support
+    [self registerForDraggedTypes:pasteboardTypes];
 
-  _zoomGesture = [[NSMagnificationGestureRecognizer alloc]
-      initWithTarget:self
-              action:@selector(handleGesture:)];
-  [self addGestureRecognizer:_zoomGesture];
+    _zoomGesture = [[NSMagnificationGestureRecognizer alloc]
+        initWithTarget:self
+                action:@selector(handleGesture:)];
+    [self addGestureRecognizer:_zoomGesture];
 
-  _buttonArray = [[NSMutableArray alloc] init];
-  _buttonStack = [self _addButtons];
+    _buttonArray = [[NSMutableArray alloc] init];
+    _buttonStack = [self _addButtons];
 
-  // hide until image loaded
-  _buttonStack.hidden = YES;
-  _noImageLoaded = YES;
+    // hide until image loaded
+    _buttonStack.hidden = YES;
+    _noImageLoaded = YES;
 
-  _hudLabel2 = [self _addHud:YES];
-  _hudLabel = [self _addHud:NO];
-  [self setHudText:""];
+    _hudLabel2 = [self _addHud:YES];
+    _hudLabel = [self _addHud:NO];
+    [self setHudText:""];
 
-  return self;
+    return self;
 }
 
-- (nonnull ShowSettings *)showSettings {
-  return _showSettings;
+- (nonnull ShowSettings *)showSettings
+{
+    return _showSettings;
 }
 
-- (NSStackView *)_addButtons {
-  const int32_t numButtons = 30;
-  const char *names[numButtons * 2] = {
-
-      "?",
-      "Help",
-      "I",
-      "Info",
-      "H",
-      "Hud",
-      "S",
-      "Show All",
-
-      "O",
-      "Preview",
-      "W",
-      "Repeat",
-      "P",
-      "Premul",
-      "N",
-      "Signed",
-
-      "-",
-      "",
-
-      "E",
-      "Debug",
-      "D",
-      "Grid",
-      "C",
-      "Checker",
-      "U",
-      "Toggle UI",
-
-      "-",
-      "",
-
-      "M",
-      "Mip",
-      "F",
-      "Face",
-      "Y",
-      "Array",
-      "J",
-      "Next",
-      "L",
-      "Reload",
-      "0",
-      "Fit",
-
-      "-",
-      "",
-
-      "8",
-      "Shape",
-      "6",
-      "Shape Channel",
-      "5",
-      "Lighting",
-      "T",
-      "Tangents",
-
-      // TODO: need to shift hud over a little
-      // "UI", - add to show/hide buttons
-
-      "-",
-      "",
-
-      // make these individual toggles and exclusive toggle off shift
-      "R",
-      "Red",
-      "G",
-      "Green",
-      "B",
-      "Blue",
-      "A",
-      "Alpha",
-  };
-
-  NSRect rect = NSMakeRect(0, 10, 30, 30);
-
-  //#define ArrayCount(x) ((x) / sizeof(x[0]))
-
-  NSMutableArray *buttons = [[NSMutableArray alloc] init];
-
-  for (int32_t i = 0; i < numButtons; ++i) {
-    const char *icon = names[2 * i + 0];
-    const char *tip = names[2 * i + 1];
-
-    NSString *name = [NSString stringWithUTF8String:icon];
-    NSString *toolTip = [NSString stringWithUTF8String:tip];
-
-    NSButton *button = nil;
-
-    button = [NSButton buttonWithTitle:name
-                                target:self
-                                action:@selector(handleAction:)];
-    [button setToolTip:toolTip];
-    button.hidden = NO;
-
-    button.buttonType = NSButtonTypeToggle;
-    // NSButtonTypeOnOff
+- (NSStackView *)_addButtons
+{
+    const int32_t numButtons = 30;
+    const char *names[numButtons * 2] = {
+
+        "?",
+        "Help",
+        "I",
+        "Info",
+        "H",
+        "Hud",
+        "S",
+        "Show All",
+
+        "O",
+        "Preview",
+        "W",
+        "Repeat",
+        "P",
+        "Premul",
+        "N",
+        "Signed",
+
+        "-",
+        "",
+
+        "E",
+        "Debug",
+        "D",
+        "Grid",
+        "C",
+        "Checker",
+        "U",
+        "Toggle UI",
+
+        "-",
+        "",
+
+        "M",
+        "Mip",
+        "F",
+        "Face",
+        "Y",
+        "Array",
+        "J",
+        "Next",
+        "L",
+        "Reload",
+        "0",
+        "Fit",
+
+        "-",
+        "",
+
+        "8",
+        "Shape",
+        "6",
+        "Shape Channel",
+        "5",
+        "Lighting",
+        "T",
+        "Tangents",
+
+        // TODO: need to shift hud over a little
+        // "UI", - add to show/hide buttons
+
+        "-",
+        "",
+
+        // make these individual toggles and exclusive toggle off shift
+        "R",
+        "Red",
+        "G",
+        "Green",
+        "B",
+        "Blue",
+        "A",
+        "Alpha",
+    };
+
+    NSRect rect = NSMakeRect(0, 10, 30, 30);
+
+    //#define ArrayCount(x) ((x) / sizeof(x[0]))
+
+    NSMutableArray *buttons = [[NSMutableArray alloc] init];
+
+    for (int32_t i = 0; i < numButtons; ++i) {
+        const char *icon = names[2 * i + 0];
+        const char *tip = names[2 * i + 1];
+
+        NSString *name = [NSString stringWithUTF8String:icon];
+        NSString *toolTip = [NSString stringWithUTF8String:tip];
+
+        NSButton *button = nil;
+
+        button = [NSButton buttonWithTitle:name
+                                    target:self
+                                    action:@selector(handleAction:)];
+        [button setToolTip:toolTip];
+        button.hidden = NO;
+
+        button.buttonType = NSButtonTypeToggle;
+        // NSButtonTypeOnOff
 
 #if 0
         // can use this with border
@@ -613,1553 +626,1591 @@ - (NSStackView *)_addButtons {
         button.showsBorderOnlyWhileMouseInside = YES;
         button.bordered = YES;
 #else
-    button.bordered = NO;
+        button.bordered = NO;
 #endif
-    [button setFrame:rect];
+        [button setFrame:rect];
+
+        // stackView seems to disperse the items evenly across the area, so this
+        // doesn't work
+        if (icon[0] == '-') {
+            // rect.origin.y += 11;
+            button.enabled = NO;
+        }
+        else {
+            // sKrect.origin.y += 25;
 
-    // stackView seems to disperse the items evenly across the area, so this
-    // doesn't work
-    if (icon[0] == '-') {
-      // rect.origin.y += 11;
-      button.enabled = NO;
-    } else {
-      // sKrect.origin.y += 25;
+            // keep all buttons, since stackView will remove and pack the stack
+            [_buttonArray addObject:button];
+        }
 
-      // keep all buttons, since stackView will remove and pack the stack
-      [_buttonArray addObject:button];
+        [buttons addObject:button];
     }
 
-    [buttons addObject:button];
-  }
-
-  NSStackView *stackView = [NSStackView stackViewWithViews:buttons];
-  stackView.orientation = NSUserInterfaceLayoutOrientationVertical;
-  stackView.detachesHiddenViews =
-      YES; // default, but why have to have _buttonArrary
-  [self addSubview:stackView];
+    NSStackView *stackView = [NSStackView stackViewWithViews:buttons];
+    stackView.orientation = NSUserInterfaceLayoutOrientationVertical;
+    stackView.detachesHiddenViews =
+        YES;  // default, but why have to have _buttonArrary
+    [self addSubview:stackView];
 
 #if 1
-  // Want menus, so user can define their own shortcuts to commands
-  // Also need to enable/disable this via validateUserInterfaceItem
-  NSApplication *app = [NSApplication sharedApplication];
+    // Want menus, so user can define their own shortcuts to commands
+    // Also need to enable/disable this via validateUserInterfaceItem
+    NSApplication *app = [NSApplication sharedApplication];
 
-  NSMenu *mainMenu = app.mainMenu;
-  NSMenuItem *viewMenuItem = mainMenu.itemArray[2];
-  _viewMenu = viewMenuItem.submenu;
+    NSMenu *mainMenu = app.mainMenu;
+    NSMenuItem *viewMenuItem = mainMenu.itemArray[2];
+    _viewMenu = viewMenuItem.submenu;
 
-  // TODO: add a view menu in the storyboard
-  // NSMenu* menu = app.windowsMenu;
-  //[menu addItem:[NSMenuItem separatorItem]];
+    // TODO: add a view menu in the storyboard
+    // NSMenu* menu = app.windowsMenu;
+    //[menu addItem:[NSMenuItem separatorItem]];
 
-  for (int32_t i = 0; i < numButtons; ++i) {
-    const char *icon = names[2 * i + 0]; // single char
-    const char *title = names[2 * i + 1];
+    for (int32_t i = 0; i < numButtons; ++i) {
+        const char *icon = names[2 * i + 0];  // single char
+        const char *title = names[2 * i + 1];
 
-    NSString *toolTip = [NSString stringWithUTF8String:icon];
-    NSString *name = [NSString stringWithUTF8String:title];
-    NSString *shortcut = @""; // for now, or AppKit turns key int cmd+shift+key
+        NSString *toolTip = [NSString stringWithUTF8String:icon];
+        NSString *name = [NSString stringWithUTF8String:title];
+        NSString *shortcut = @"";  // for now, or AppKit turns key int cmd+shift+key
 
-    if (icon[0] == '-') {
-      [_viewMenu addItem:[NSMenuItem separatorItem]];
-    } else {
-      NSMenuItem *menuItem =
-          [[NSMenuItem alloc] initWithTitle:name
-                                     action:@selector(handleAction:)
-                              keyEquivalent:shortcut];
-      menuItem.toolTip = toolTip; // use in findMenuItem
+        if (icon[0] == '-') {
+            [_viewMenu addItem:[NSMenuItem separatorItem]];
+        }
+        else {
+            NSMenuItem *menuItem =
+                [[NSMenuItem alloc] initWithTitle:name
+                                           action:@selector(handleAction:)
+                                    keyEquivalent:shortcut];
+            menuItem.toolTip = toolTip;  // use in findMenuItem
 
-      // TODO: menus and buttons should reflect any toggle state
-      // menuItem.state = Mixed/Off/On;
+            // TODO: menus and buttons should reflect any toggle state
+            // menuItem.state = Mixed/Off/On;
 
-      [_viewMenu addItem:menuItem];
+            [_viewMenu addItem:menuItem];
+        }
     }
-  }
 
-  [_viewMenu addItem:[NSMenuItem separatorItem]];
+    [_viewMenu addItem:[NSMenuItem separatorItem]];
 #endif
 
-  return stackView;
-}
-
-- (NSTextField *)_addHud:(BOOL)isShadow {
-  // TODO: This text field is clamping to the height, so have it set to 1200.
-  // really want field to expand to fill the window height for large output
-
-  // add a label for the hud
-  NSTextField *label = [[NSTextField alloc]
-      initWithFrame:NSMakeRect(isShadow ? 21 : 20, isShadow ? 21 : 20, 800,
-                               1200)];
-  label.drawsBackground = NO;
-  label.textColor = !isShadow
-                        ? [NSColor colorWithSRGBRed:0 green:1 blue:0 alpha:1]
-                        : [NSColor colorWithSRGBRed:0 green:0 blue:0 alpha:1];
-  label.bezeled = NO;
-  label.editable = NO;
-  label.selectable = NO;
-  label.lineBreakMode = NSLineBreakByClipping;
-  label.maximumNumberOfLines = 0; // fill to height
-
-  label.cell.scrollable = NO;
-  label.cell.wraps = NO;
-
-  label.hidden = !_showSettings->isHudShown;
-  // label.font = NSFont.systemFont(ofSize: NSFont.systemFontSize(for:
-  // label.controlSize))
-
-  // UILabel has shadowColor/shadowOffset but NSTextField doesn't
-
-  [self addSubview:label];
-
-  // add vertical constrains to have it fill window, but keep 800 width
-  label.preferredMaxLayoutWidth = 800;
-
-  // NSDictionary* views = @{ @"label" : label };
-  //[self addConstraints:[NSLayoutConstraint
-  //constraintsWithVisualFormat:@"H:|-[label]" options:0 metrics:nil
-  //views:views]]; [self addConstraints:[NSLayoutConstraint
-  //constraintsWithVisualFormat:@"V:|-[label]" options:0 metrics:nil
-  //views:views]];
-
-  return label;
+    return stackView;
 }
 
-- (void)doZoomMath:(float)newZoom newPan:(float2 &)newPan {
-  // transform the cursor to texture coordinate, or clamped version if outside
-  Renderer *renderer = (Renderer *)self.delegate;
-  float4x4 projectionViewModelMatrix =
-      [renderer computeImageTransform:_showSettings->panX
-                                 panY:_showSettings->panY
-                                 zoom:_showSettings->zoom];
-
-  // convert to clip space, or else need to apply additional viewport transform
-  float halfX = _showSettings->viewSizeX * 0.5f;
-  float halfY = _showSettings->viewSizeY * 0.5f;
-
-  // sometimes get viewSizeX that's scaled by retina, and other times not.
-  // account for contentScaleFactor (viewSizeX is 2x bigger than cursorX on
-  // retina display) now passing down drawableSize instead of view.bounds.size
-  halfX /= (float)_showSettings->viewContentScaleFactor;
-  halfY /= (float)_showSettings->viewContentScaleFactor;
-
-  NSPoint point = NSMakePoint(_showSettings->cursorX, _showSettings->cursorY);
-  NSPoint clipPoint;
-  clipPoint.x = (point.x - halfX) / halfX;
-  clipPoint.y = -(point.y - halfY) / halfY;
-
-  // convert point in window to point in texture
-  float4x4 mInv = simd_inverse(projectionViewModelMatrix);
-  mInv.columns[3].w =
-      1.0f; // fixes inverse, calls always leaves m[3][3] = 0.999
-
-  float4 pixel = mInv * float4m(clipPoint.x, clipPoint.y, 1.0f, 1.0f);
-  // pixel /= pixel.w; // in case perspective used
-
-  // allow pan to extend to show all
-  float maxX = 0.5f;
-  float minY = -0.5f;
-  if (_showSettings->isShowingAllLevelsAndMips) {
-    maxX += 1.0f * (_showSettings->totalChunks() - 1);
-    minY -= 1.0f * (_showSettings->mipCount - 1);
-  }
-
-  // that's in model space (+/0.5f, +/0.5f), so convert to texture space
-  pixel.x = NAMESPACE_STL::clamp(pixel.x, -0.5f, maxX);
-  pixel.y = NAMESPACE_STL::clamp(pixel.y, minY, 0.5f);
-
-  // now that's the point that we want to zoom towards
-  // No checkson this zoom
-  // old - newPosition from the zoom
-
-  newPan.x = _showSettings->panX - (_showSettings->zoom - newZoom) *
-                                       _showSettings->imageBoundsX * pixel.x;
-  newPan.y = _showSettings->panY + (_showSettings->zoom - newZoom) *
-                                       _showSettings->imageBoundsY * pixel.y;
+- (NSTextField *)_addHud:(BOOL)isShadow
+{
+    // TODO: This text field is clamping to the height, so have it set to 1200.
+    // really want field to expand to fill the window height for large output
+
+    // add a label for the hud
+    NSTextField *label = [[NSTextField alloc]
+        initWithFrame:NSMakeRect(isShadow ? 21 : 20, isShadow ? 21 : 20, 800,
+                                 1200)];
+    label.drawsBackground = NO;
+    label.textColor = !isShadow
+                          ? [NSColor colorWithSRGBRed:0 green:1 blue:0 alpha:1]
+                          : [NSColor colorWithSRGBRed:0 green:0 blue:0 alpha:1];
+    label.bezeled = NO;
+    label.editable = NO;
+    label.selectable = NO;
+    label.lineBreakMode = NSLineBreakByClipping;
+    label.maximumNumberOfLines = 0;  // fill to height
+
+    label.cell.scrollable = NO;
+    label.cell.wraps = NO;
+
+    label.hidden = !_showSettings->isHudShown;
+    // label.font = NSFont.systemFont(ofSize: NSFont.systemFontSize(for:
+    // label.controlSize))
+
+    // UILabel has shadowColor/shadowOffset but NSTextField doesn't
+
+    [self addSubview:label];
+
+    // add vertical constrains to have it fill window, but keep 800 width
+    label.preferredMaxLayoutWidth = 800;
+
+    // NSDictionary* views = @{ @"label" : label };
+    //[self addConstraints:[NSLayoutConstraint
+    //constraintsWithVisualFormat:@"H:|-[label]" options:0 metrics:nil
+    //views:views]]; [self addConstraints:[NSLayoutConstraint
+    //constraintsWithVisualFormat:@"V:|-[label]" options:0 metrics:nil
+    //views:views]];
+
+    return label;
 }
 
-- (void)handleGesture:(NSGestureRecognizer *)gestureRecognizer {
-  // https://cocoaosxrevisited.wordpress.com/2018/01/06/chapter-18-mouse-events/
-  if (gestureRecognizer != _zoomGesture) {
-    return;
-  }
-
-  bool isFirstGesture = _zoomGesture.state == NSGestureRecognizerStateBegan;
-
-  static float _originalZoom = 1.0f;
-
-  float zoom = _zoomGesture.magnification;
-  if (isFirstGesture) {
-    _zoomGesture.magnification = 1.0f;
-    zoom = _showSettings->zoom;
-  } else if (zoom * _originalZoom < 0.1f) {
-    // can go negative otherwise
-    zoom = 0.1f / _originalZoom;
-    _zoomGesture.magnification = zoom;
-  }
-
-  static float _validMagnification;
-
-  //-------------------------------------
-
-  // https://developer.apple.com/documentation/uikit/touches_presses_and_gestures/handling_uikit_gestures/handling_pinch_gestures?language=objc
-  // need to sync up the zoom when action begins or zoom will jump
-  if (isFirstGesture) {
-    _validMagnification = 1.0f;
-    _originalZoom = zoom;
-  } else {
-    // try expontental (this causes a jump, comparison avoids an initial jump
-    // zoom = powf(zoom, 1.05f);
-
-    // doing multiply instead of equals here, also does exponential zom
-    zoom *= _originalZoom;
-  }
-
-  // https://stackoverflow.com/questions/30002361/image-zoom-centered-on-mouse-position
-
-  // find the cursor location with respect to the image
-  float4 bottomLeftCorner = float4m(-0.5, -0.5f, 0.0f, 1.0f);
-  float4 topRightCorner = float4m(0.5, 0.5f, 0.0f, 1.0f);
-
-  Renderer *renderer = (Renderer *)self.delegate;
-  float4x4 newMatrix = [renderer computeImageTransform:_showSettings->panX
-                                                  panY:_showSettings->panY
-                                                  zoom:zoom];
-
-  // don't allow panning the entire image off the view boundary
-  // transform the upper left and bottom right corner of the image
-  float4 pt0 = newMatrix * bottomLeftCorner;
-  float4 pt1 = newMatrix * topRightCorner;
-
-  // for perspective
-  // pt0 /= pt0.w;
-  // pt1 /= pt1.w;
-
-  // see that rectangle intersects the view, view is -1 to 1
-  // this handles inversion
-  float2 ptOrigin = simd::min(pt0.xy, pt1.xy);
-  float2 ptSize = abs(pt0.xy - pt1.xy);
-
-  CGRect imageRect = CGRectMake(ptOrigin.x, ptOrigin.y, ptSize.x, ptSize.y);
-  CGRect viewRect = CGRectMake(-1.0f, -1.0f, 2.0f, 2.0f);
-
-  int32_t numTexturesX = _showSettings->totalChunks();
-  int32_t numTexturesY = _showSettings->mipCount;
-
-  if (_showSettings->isShowingAllLevelsAndMips) {
-    imageRect.origin.y -= (numTexturesY - 1) * imageRect.size.height;
-
-    imageRect.size.width *= numTexturesX;
-    imageRect.size.height *= numTexturesY;
-  }
-
-  float visibleWidth = imageRect.size.width * _showSettings->viewSizeX /
-                       _showSettings->viewContentScaleFactor;
-  float visibleHeight = imageRect.size.height * _showSettings->viewSizeY /
-                        _showSettings->viewContentScaleFactor;
-
-  // don't allow image to get too big
-  // take into account zoomFit, or need to limit zoomFit and have smaller images
-  // be smaller on screen
-  float maxZoom = std::max(128.0f, _showSettings->zoomFit);
-
-  // don't allow image to get too big
-  int32_t gap = _showSettings->showAllPixelGap;
-  if ((visibleWidth >
-       maxZoom * (_showSettings->imageBoundsX + gap) * numTexturesX) ||
-      (visibleHeight >
-       maxZoom * (_showSettings->imageBoundsY + gap) * numTexturesY)) {
-    _zoomGesture.magnification = _validMagnification;
-    return;
-  }
-
-  // don't allow image to get too small
-  int32_t minPixelSize = 4;
-  if ((visibleWidth <
-       std::min((int32_t)_showSettings->imageBoundsX, minPixelSize)) ||
-      (visibleHeight <
-       std::min((int32_t)_showSettings->imageBoundsY, minPixelSize))) {
-    _zoomGesture.magnification = _validMagnification;
-    return;
-  }
-
-  if (!NSIntersectsRect(imageRect, viewRect)) {
-    _zoomGesture.magnification = _validMagnification;
-    return;
-  }
-
-  if (_showSettings->zoom != zoom) {
-    // DONE: zoom also changes the pan to zoom about the cursor, otherwise zoom
-    // feels wrong. now adjust the pan so that cursor text stays locked under
-    // (zoom to cursor)
-    float2 newPan;
-    [self doZoomMath:zoom newPan:newPan];
-
-    // store this
-    _validMagnification = _zoomGesture.magnification;
-
-    _showSettings->zoom = zoom;
-
-    _showSettings->panX = newPan.x;
-    _showSettings->panY = newPan.y;
-
-    if (doPrintPanZoom) {
-      string text;
-      sprintf(text,
-              "Pan %.3f,%.3f\n"
-              "Zoom %.2fx\n",
-              _showSettings->panX, _showSettings->panY, _showSettings->zoom);
-      [self setHudText:text.c_str()];
+- (void)doZoomMath:(float)newZoom newPan:(float2 &)newPan
+{
+    // transform the cursor to texture coordinate, or clamped version if outside
+    Renderer *renderer = (Renderer *)self.delegate;
+    float4x4 projectionViewModelMatrix =
+        [renderer computeImageTransform:_showSettings->panX
+                                   panY:_showSettings->panY
+                                   zoom:_showSettings->zoom];
+
+    // convert to clip space, or else need to apply additional viewport transform
+    float halfX = _showSettings->viewSizeX * 0.5f;
+    float halfY = _showSettings->viewSizeY * 0.5f;
+
+    // sometimes get viewSizeX that's scaled by retina, and other times not.
+    // account for contentScaleFactor (viewSizeX is 2x bigger than cursorX on
+    // retina display) now passing down drawableSize instead of view.bounds.size
+    halfX /= (float)_showSettings->viewContentScaleFactor;
+    halfY /= (float)_showSettings->viewContentScaleFactor;
+
+    NSPoint point = NSMakePoint(_showSettings->cursorX, _showSettings->cursorY);
+    NSPoint clipPoint;
+    clipPoint.x = (point.x - halfX) / halfX;
+    clipPoint.y = -(point.y - halfY) / halfY;
+
+    // convert point in window to point in texture
+    float4x4 mInv = simd_inverse(projectionViewModelMatrix);
+    mInv.columns[3].w =
+        1.0f;  // fixes inverse, calls always leaves m[3][3] = 0.999
+
+    float4 pixel = mInv * float4m(clipPoint.x, clipPoint.y, 1.0f, 1.0f);
+    // pixel /= pixel.w; // in case perspective used
+
+    // allow pan to extend to show all
+    float maxX = 0.5f;
+    float minY = -0.5f;
+    if (_showSettings->isShowingAllLevelsAndMips) {
+        maxX += 1.0f * (_showSettings->totalChunks() - 1);
+        minY -= 1.0f * (_showSettings->mipCount - 1);
     }
 
-    [self updateEyedropper];
-    self.needsDisplay = YES;
-  }
-}
+    // that's in model space (+/0.5f, +/0.5f), so convert to texture space
+    pixel.x = NAMESPACE_STL::clamp(pixel.x, -0.5f, maxX);
+    pixel.y = NAMESPACE_STL::clamp(pixel.y, minY, 0.5f);
 
-- (void)mouseMoved:(NSEvent *)event {
-  // pixel in non-square window coords, run thorugh inverse to get texel space
-  // I think magnofication of zoom gesture is affecting coordinates reported by
-  // this
+    // now that's the point that we want to zoom towards
+    // No checkson this zoom
+    // old - newPosition from the zoom
 
-  NSPoint point = [self convertPoint:[event locationInWindow] fromView:nil];
-
-  // this needs to change if view is resized, but will likely receive mouseMoved
-  // events
-  _showSettings->cursorX = (int32_t)point.x;
-  _showSettings->cursorY = (int32_t)point.y;
-
-  // should really do this in draw call, since moved messeage come in so quickly
-  [self updateEyedropper];
+    newPan.x = _showSettings->panX - (_showSettings->zoom - newZoom) *
+                                         _showSettings->imageBoundsX * pixel.x;
+    newPan.y = _showSettings->panY + (_showSettings->zoom - newZoom) *
+                                         _showSettings->imageBoundsY * pixel.y;
 }
 
-inline float4 toPremul(const float4 &c) {
-  // premul with a
-  float4 cpremul = c;
-  float a = c.a;
-  cpremul.w = 1.0f;
-  cpremul *= a;
-  return cpremul;
-}
+- (void)handleGesture:(NSGestureRecognizer *)gestureRecognizer
+{
+    // https://cocoaosxrevisited.wordpress.com/2018/01/06/chapter-18-mouse-events/
+    if (gestureRecognizer != _zoomGesture) {
+        return;
+    }
 
-// Writing out to rgba32 for sampling, but unorm formats like ASTC and RGBA8
-// are still off and need to use the following.
-float toSnorm8(float c) { return (255.0 / 127.0) * c - (128 / 127.0); }
+    bool isFirstGesture = _zoomGesture.state == NSGestureRecognizerStateBegan;
 
-float2 toSnorm8(float2 c) { return (255.0 / 127.0) * c - (128 / 127.0); }
+    static float _originalZoom = 1.0f;
 
-float3 toSnorm8(float3 c) { return (255.0 / 127.0) * c - (128 / 127.0); }
-float4 toSnorm8(float4 c) { return (255.0 / 127.0) * c - (128 / 127.0); }
+    float zoom = _zoomGesture.magnification;
+    if (isFirstGesture) {
+        _zoomGesture.magnification = 1.0f;
+        zoom = _showSettings->zoom;
+    }
+    else if (zoom * _originalZoom < 0.1f) {
+        // can go negative otherwise
+        zoom = 0.1f / _originalZoom;
+        _zoomGesture.magnification = zoom;
+    }
 
-float4 toSnorm(float4 c) { return 2.0f * c - 1.0f; }
+    static float _validMagnification;
+
+    //-------------------------------------
 
-- (void)updateEyedropper {
-  if ((!_showSettings->isHudShown)) {
-    return;
-  }
+    // https://developer.apple.com/documentation/uikit/touches_presses_and_gestures/handling_uikit_gestures/handling_pinch_gestures?language=objc
+    // need to sync up the zoom when action begins or zoom will jump
+    if (isFirstGesture) {
+        _validMagnification = 1.0f;
+        _originalZoom = zoom;
+    }
+    else {
+        // try expontental (this causes a jump, comparison avoids an initial jump
+        // zoom = powf(zoom, 1.05f);
 
-  if (_showSettings->imageBoundsX == 0) {
-    // TODO: this return will leave old hud text up
-    return;
-  }
+        // doing multiply instead of equals here, also does exponential zom
+        zoom *= _originalZoom;
+    }
 
-  // don't wait on renderer to update this matrix
-  Renderer *renderer = (Renderer *)self.delegate;
+    // https://stackoverflow.com/questions/30002361/image-zoom-centered-on-mouse-position
 
-  if (_showSettings->isEyedropperFromDrawable()) {
-    // this only needs the cursor location, but can't supply uv to
-    // displayPixelData
+    // find the cursor location with respect to the image
+    float4 bottomLeftCorner = float4m(-0.5, -0.5f, 0.0f, 1.0f);
+    float4 topRightCorner = float4m(0.5, 0.5f, 0.0f, 1.0f);
 
-    if (_showSettings->lastCursorX != _showSettings->cursorX ||
-        _showSettings->lastCursorY != _showSettings->cursorY) {
-      // TODO: this means pan/zoom doesn't update data, may want to track some
-      // absolute location in virtal canvas.
+    Renderer *renderer = (Renderer *)self.delegate;
+    float4x4 newMatrix = [renderer computeImageTransform:_showSettings->panX
+                                                    panY:_showSettings->panY
+                                                    zoom:zoom];
 
-      _showSettings->lastCursorX = _showSettings->cursorX;
-      _showSettings->lastCursorY = _showSettings->cursorY;
+    // don't allow panning the entire image off the view boundary
+    // transform the upper left and bottom right corner of the image
+    float4 pt0 = newMatrix * bottomLeftCorner;
+    float4 pt1 = newMatrix * topRightCorner;
 
-      // This just samples from drawable, so no re-render is needed
-      [self showEyedropperData:float2m(0, 0)];
+    // for perspective
+    // pt0 /= pt0.w;
+    // pt1 /= pt1.w;
 
-      // TODO: remove this, but only way to get drawSamples to execute right
-      // now, but then entire texture re-renders and that's not power efficient.
-      // Really just want to sample from the already rendered texture since
-      // content isn't animated.
+    // see that rectangle intersects the view, view is -1 to 1
+    // this handles inversion
+    float2 ptOrigin = simd::min(pt0.xy, pt1.xy);
+    float2 ptSize = abs(pt0.xy - pt1.xy);
 
-      self.needsDisplay = YES;
-    }
+    CGRect imageRect = CGRectMake(ptOrigin.x, ptOrigin.y, ptSize.x, ptSize.y);
+    CGRect viewRect = CGRectMake(-1.0f, -1.0f, 2.0f, 2.0f);
 
-    return;
-  }
+    int32_t numTexturesX = _showSettings->totalChunks();
+    int32_t numTexturesY = _showSettings->mipCount;
 
-  float4x4 projectionViewModelMatrix =
-      [renderer computeImageTransform:_showSettings->panX
-                                 panY:_showSettings->panY
-                                 zoom:_showSettings->zoom];
+    if (_showSettings->isShowingAllLevelsAndMips) {
+        imageRect.origin.y -= (numTexturesY - 1) * imageRect.size.height;
 
-  // convert to clip space, or else need to apply additional viewport transform
-  float halfX = _showSettings->viewSizeX * 0.5f;
-  float halfY = _showSettings->viewSizeY * 0.5f;
+        imageRect.size.width *= numTexturesX;
+        imageRect.size.height *= numTexturesY;
+    }
 
-  // sometimes get viewSizeX that's scaled by retina, and other times not.
-  // account for contentScaleFactor (viewSizeX is 2x bigger than cursorX on
-  // retina display) now passing down drawableSize instead of view.bounds.size
-  halfX /= (float)_showSettings->viewContentScaleFactor;
-  halfY /= (float)_showSettings->viewContentScaleFactor;
+    float visibleWidth = imageRect.size.width * _showSettings->viewSizeX /
+                         _showSettings->viewContentScaleFactor;
+    float visibleHeight = imageRect.size.height * _showSettings->viewSizeY /
+                          _showSettings->viewContentScaleFactor;
+
+    // don't allow image to get too big
+    // take into account zoomFit, or need to limit zoomFit and have smaller images
+    // be smaller on screen
+    float maxZoom = std::max(128.0f, _showSettings->zoomFit);
+
+    // don't allow image to get too big
+    int32_t gap = _showSettings->showAllPixelGap;
+    if ((visibleWidth >
+         maxZoom * (_showSettings->imageBoundsX + gap) * numTexturesX) ||
+        (visibleHeight >
+         maxZoom * (_showSettings->imageBoundsY + gap) * numTexturesY)) {
+        _zoomGesture.magnification = _validMagnification;
+        return;
+    }
 
-  NSPoint point = NSMakePoint(_showSettings->cursorX, _showSettings->cursorY);
-  NSPoint clipPoint;
-  clipPoint.x = (point.x - halfX) / halfX;
-  clipPoint.y = -(point.y - halfY) / halfY;
+    // don't allow image to get too small
+    int32_t minPixelSize = 4;
+    if ((visibleWidth <
+         std::min((int32_t)_showSettings->imageBoundsX, minPixelSize)) ||
+        (visibleHeight <
+         std::min((int32_t)_showSettings->imageBoundsY, minPixelSize))) {
+        _zoomGesture.magnification = _validMagnification;
+        return;
+    }
 
-  // convert point in window to point in texture
-  float4x4 mInv = simd_inverse(projectionViewModelMatrix);
-  mInv.columns[3].w =
-      1.0f; // fixes inverse, calls always leaves m[3][3] = 0.999
+    if (!NSIntersectsRect(imageRect, viewRect)) {
+        _zoomGesture.magnification = _validMagnification;
+        return;
+    }
 
-  float4 pixel = mInv * float4m(clipPoint.x, clipPoint.y, 1.0f, 1.0f);
-  // pixel /= pixel.w; // in case perspective used
+    if (_showSettings->zoom != zoom) {
+        // DONE: zoom also changes the pan to zoom about the cursor, otherwise zoom
+        // feels wrong. now adjust the pan so that cursor text stays locked under
+        // (zoom to cursor)
+        float2 newPan;
+        [self doZoomMath:zoom newPan:newPan];
 
-  // that's in model space (+/0.5f, +/0.5f), so convert to texture space
-  pixel.y *= -1.0f;
+        // store this
+        _validMagnification = _zoomGesture.magnification;
 
-  pixel.x += 0.5f;
-  pixel.y += 0.5f;
+        _showSettings->zoom = zoom;
 
-  pixel.x *= 0.999f;
-  pixel.y *= 0.999f;
+        _showSettings->panX = newPan.x;
+        _showSettings->panY = newPan.y;
 
-  float2 uv = pixel.xy;
+        if (doPrintPanZoom) {
+            string text;
+            sprintf(text,
+                    "Pan %.3f,%.3f\n"
+                    "Zoom %.2fx\n",
+                    _showSettings->panX, _showSettings->panY, _showSettings->zoom);
+            [self setHudText:text.c_str()];
+        }
 
-  // pixels are 0 based
-  pixel.x *= _showSettings->imageBoundsX;
-  pixel.y *= _showSettings->imageBoundsY;
+        [self updateEyedropper];
+        self.needsDisplay = YES;
+    }
+}
 
-  // TODO: finish this logic, need to account for gaps too, and then isolate to
-  // a given level and mip to sample
-  //    if (_showSettings->isShowingAllLevelsAndMips) {
-  //        pixel.x *= _showSettings->totalChunks();
-  //        pixel.y *= _showSettings->mipCount;
-  //    }
+- (void)mouseMoved:(NSEvent *)event
+{
+    // pixel in non-square window coords, run thorugh inverse to get texel space
+    // I think magnofication of zoom gesture is affecting coordinates reported by
+    // this
 
-  // TODO: clearing out the last px visited makes it hard to gather data
-  // put value on clipboard, or allow click to lock the displayed pixel and
-  // value. Might just change header to px(last): ...
-  string text;
+    NSPoint point = [self convertPoint:[event locationInWindow] fromView:nil];
 
-  // only display pixel if over image
-  if (pixel.x < 0.0f || pixel.x >= (float)_showSettings->imageBoundsX) {
-    sprintf(text, "canvas: %d %d\n", (int32_t)pixel.x, (int32_t)pixel.y);
-    [self setEyedropperText:text.c_str()]; // ick
-    return;
-  }
-  if (pixel.y < 0.0f || pixel.y >= (float)_showSettings->imageBoundsY) {
-
-    // was blanking out, but was going blank on color_grid-a when over zoomed in
-    // image maybe not enough precision with float.
-    sprintf(text, "canvas: %d %d\n", (int32_t)pixel.x, (int32_t)pixel.y);
-    [self setEyedropperText:text.c_str()];
-    return;
-  }
+    // this needs to change if view is resized, but will likely receive mouseMoved
+    // events
+    _showSettings->cursorX = (int32_t)point.x;
+    _showSettings->cursorY = (int32_t)point.y;
 
-  // Note: fromView: nil returns isFlipped coordinate, fromView:self flips it
-  // back.
+    // should really do this in draw call, since moved messeage come in so quickly
+    [self updateEyedropper];
+}
 
-  int32_t newX = (int32_t)pixel.x;
-  int32_t newY = (int32_t)pixel.y;
+inline float4 toPremul(const float4 &c)
+{
+    // premul with a
+    float4 cpremul = c;
+    float a = c.a;
+    cpremul.w = 1.0f;
+    cpremul *= a;
+    return cpremul;
+}
 
-  if (_showSettings->textureLookupX != newX ||
-      _showSettings->textureLookupY != newY) {
-    // Note: this only samples from the original texture via compute shaders
-    // so preview mode pixel colors are not conveyed.  But can see underlying
-    // data driving preview.
+// Writing out to rgba32 for sampling, but unorm formats like ASTC and RGBA8
+// are still off and need to use the following.
+float toSnorm8(float c) { return (255.0 / 127.0) * c - (128 / 127.0); }
 
-    // %.0f rounds the value, but want truncation
-    _showSettings->textureLookupX = newX;
-    _showSettings->textureLookupY = newY;
+float2 toSnorm8(float2 c) { return (255.0 / 127.0) * c - (128 / 127.0); }
 
-    [self showEyedropperData:uv];
+float3 toSnorm8(float3 c) { return (255.0 / 127.0) * c - (128 / 127.0); }
+float4 toSnorm8(float4 c) { return (255.0 / 127.0) * c - (128 / 127.0); }
 
-    // TODO: remove this, but only way to get drawSamples to execute right now,
-    // but then entire texture re-renders and that's not power efficient.
-    self.needsDisplay = YES;
-  }
-}
+float4 toSnorm(float4 c) { return 2.0f * c - 1.0f; }
 
-- (void)showEyedropperData:(float2)uv {
-  string text;
-  string tmp;
+- (void)updateEyedropper
+{
+    if ((!_showSettings->isHudShown)) {
+        return;
+    }
 
-  float4 c = _showSettings->textureResult;
+    if (_showSettings->imageBoundsX == 0) {
+        // TODO: this return will leave old hud text up
+        return;
+    }
 
-  // DONE: use these to format the text
-  MyMTLPixelFormat format = _showSettings->originalFormat;
-  bool isSrgb = isSrgbFormat(format);
-  bool isSigned = isSignedFormat(format);
+    // don't wait on renderer to update this matrix
+    Renderer *renderer = (Renderer *)self.delegate;
 
-  bool isHdr = isHdrFormat(format);
-  bool isFloat = isHdr;
+    if (_showSettings->isEyedropperFromDrawable()) {
+        // this only needs the cursor location, but can't supply uv to
+        // displayPixelData
 
-  int32_t numChannels = _showSettings->numChannels;
+        if (_showSettings->lastCursorX != _showSettings->cursorX ||
+            _showSettings->lastCursorY != _showSettings->cursorY) {
+            // TODO: this means pan/zoom doesn't update data, may want to track some
+            // absolute location in virtal canvas.
 
-  bool isNormal = _showSettings->isNormal;
-  bool isColor = !isNormal;
+            _showSettings->lastCursorX = _showSettings->cursorX;
+            _showSettings->lastCursorY = _showSettings->cursorY;
 
-  bool isDirection = false;
-  bool isValue = false;
+            // This just samples from drawable, so no re-render is needed
+            [self showEyedropperData:float2m(0, 0)];
 
-  if (_showSettings->isEyedropperFromDrawable()) {
+            // TODO: remove this, but only way to get drawSamples to execute right
+            // now, but then entire texture re-renders and that's not power efficient.
+            // Really just want to sample from the already rendered texture since
+            // content isn't animated.
 
-    // TODO: could write barycentric, then lookup uv from that
-    // then could show the block info.
+            self.needsDisplay = YES;
+        }
 
-    // interpret based on shapeChannel, debugMode, etc
-    switch (_showSettings->shapeChannel) {
-    case ShapeChannelDepth:
-      isSigned = false; // using fract on uv
+        return;
+    }
 
-      isValue = true;
-      isFloat = true;
-      numChannels = 1;
-      break;
-    case ShapeChannelUV0:
-      isSigned = false; // using fract on uv
+    float4x4 projectionViewModelMatrix =
+        [renderer computeImageTransform:_showSettings->panX
+                                   panY:_showSettings->panY
+                                   zoom:_showSettings->zoom];
+
+    // convert to clip space, or else need to apply additional viewport transform
+    float halfX = _showSettings->viewSizeX * 0.5f;
+    float halfY = _showSettings->viewSizeY * 0.5f;
+
+    // sometimes get viewSizeX that's scaled by retina, and other times not.
+    // account for contentScaleFactor (viewSizeX is 2x bigger than cursorX on
+    // retina display) now passing down drawableSize instead of view.bounds.size
+    halfX /= (float)_showSettings->viewContentScaleFactor;
+    halfY /= (float)_showSettings->viewContentScaleFactor;
+
+    NSPoint point = NSMakePoint(_showSettings->cursorX, _showSettings->cursorY);
+    NSPoint clipPoint;
+    clipPoint.x = (point.x - halfX) / halfX;
+    clipPoint.y = -(point.y - halfY) / halfY;
+
+    // convert point in window to point in texture
+    float4x4 mInv = simd_inverse(projectionViewModelMatrix);
+    mInv.columns[3].w =
+        1.0f;  // fixes inverse, calls always leaves m[3][3] = 0.999
+
+    float4 pixel = mInv * float4m(clipPoint.x, clipPoint.y, 1.0f, 1.0f);
+    // pixel /= pixel.w; // in case perspective used
+
+    // that's in model space (+/0.5f, +/0.5f), so convert to texture space
+    pixel.y *= -1.0f;
+
+    pixel.x += 0.5f;
+    pixel.y += 0.5f;
+
+    pixel.x *= 0.999f;
+    pixel.y *= 0.999f;
+
+    float2 uv = pixel.xy;
+
+    // pixels are 0 based
+    pixel.x *= _showSettings->imageBoundsX;
+    pixel.y *= _showSettings->imageBoundsY;
+
+    // TODO: finish this logic, need to account for gaps too, and then isolate to
+    // a given level and mip to sample
+    //    if (_showSettings->isShowingAllLevelsAndMips) {
+    //        pixel.x *= _showSettings->totalChunks();
+    //        pixel.y *= _showSettings->mipCount;
+    //    }
+
+    // TODO: clearing out the last px visited makes it hard to gather data
+    // put value on clipboard, or allow click to lock the displayed pixel and
+    // value. Might just change header to px(last): ...
+    string text;
+
+    // only display pixel if over image
+    if (pixel.x < 0.0f || pixel.x >= (float)_showSettings->imageBoundsX) {
+        sprintf(text, "canvas: %d %d\n", (int32_t)pixel.x, (int32_t)pixel.y);
+        [self setEyedropperText:text.c_str()];  // ick
+        return;
+    }
+    if (pixel.y < 0.0f || pixel.y >= (float)_showSettings->imageBoundsY) {
+        // was blanking out, but was going blank on color_grid-a when over zoomed in
+        // image maybe not enough precision with float.
+        sprintf(text, "canvas: %d %d\n", (int32_t)pixel.x, (int32_t)pixel.y);
+        [self setEyedropperText:text.c_str()];
+        return;
+    }
 
-      isValue = true;
-      isFloat = true;
-      numChannels = 2; // TODO: fix for 3d uvw
-      break;
+    // Note: fromView: nil returns isFlipped coordinate, fromView:self flips it
+    // back.
 
-    case ShapeChannelFaceNormal:
-    case ShapeChannelNormal:
-    case ShapeChannelTangent:
-    case ShapeChannelBitangent:
-      isDirection = true;
-      numChannels = 3;
+    int32_t newX = (int32_t)pixel.x;
+    int32_t newY = (int32_t)pixel.y;
 
-      // convert unorm to snnorm
-      c = toSnorm(c);
-      break;
+    if (_showSettings->textureLookupX != newX ||
+        _showSettings->textureLookupY != newY) {
+        // Note: this only samples from the original texture via compute shaders
+        // so preview mode pixel colors are not conveyed.  But can see underlying
+        // data driving preview.
 
-    case ShapeChannelMipLevel:
-      isValue = true;
-      isSigned = false;
-      isFloat = true;
+        // %.0f rounds the value, but want truncation
+        _showSettings->textureLookupX = newX;
+        _showSettings->textureLookupY = newY;
 
-      // viz is mipNumber as alpha
-      numChannels = 1;
-      c.r = 4.0 - (c.a * 4.0);
-      break;
+        [self showEyedropperData:uv];
 
-    default:
-      break;
+        // TODO: remove this, but only way to get drawSamples to execute right now,
+        // but then entire texture re-renders and that's not power efficient.
+        self.needsDisplay = YES;
     }
+}
 
-    // debug mode
-
-    // preview vs. not
+- (void)showEyedropperData:(float2)uv
+{
+    string text;
+    string tmp;
+
+    float4 c = _showSettings->textureResult;
+
+    // DONE: use these to format the text
+    MyMTLPixelFormat format = _showSettings->originalFormat;
+    bool isSrgb = isSrgbFormat(format);
+    bool isSigned = isSignedFormat(format);
+
+    bool isHdr = isHdrFormat(format);
+    bool isFloat = isHdr;
+
+    int32_t numChannels = _showSettings->numChannels;
+
+    bool isNormal = _showSettings->isNormal;
+    bool isColor = !isNormal;
+
+    bool isDirection = false;
+    bool isValue = false;
+
+    if (_showSettings->isEyedropperFromDrawable()) {
+        // TODO: could write barycentric, then lookup uv from that
+        // then could show the block info.
+
+        // interpret based on shapeChannel, debugMode, etc
+        switch (_showSettings->shapeChannel) {
+            case ShapeChannelDepth:
+                isSigned = false;  // using fract on uv
+
+                isValue = true;
+                isFloat = true;
+                numChannels = 1;
+                break;
+            case ShapeChannelUV0:
+                isSigned = false;  // using fract on uv
+
+                isValue = true;
+                isFloat = true;
+                numChannels = 2;  // TODO: fix for 3d uvw
+                break;
+
+            case ShapeChannelFaceNormal:
+            case ShapeChannelNormal:
+            case ShapeChannelTangent:
+            case ShapeChannelBitangent:
+                isDirection = true;
+                numChannels = 3;
+
+                // convert unorm to snnorm
+                c = toSnorm(c);
+                break;
+
+            case ShapeChannelMipLevel:
+                isValue = true;
+                isSigned = false;
+                isFloat = true;
+
+                // viz is mipNumber as alpha
+                numChannels = 1;
+                c.r = 4.0 - (c.a * 4.0);
+                break;
+
+            default:
+                break;
+        }
 
-  } else {
+        // debug mode
 
-    // this will be out of sync with gpu eval, so may want to only display px
-    // from returned lookup this will always be a linear color
+        // preview vs. not
+    }
+    else {
+        // this will be out of sync with gpu eval, so may want to only display px
+        // from returned lookup this will always be a linear color
 
-    int32_t x = _showSettings->textureResultX;
-    int32_t y = _showSettings->textureResultY;
+        int32_t x = _showSettings->textureResultX;
+        int32_t y = _showSettings->textureResultY;
 
-    // show uv, so can relate to gpu coordinates stored in geometry and find
-    // atlas areas
-    append_sprintf(text, "uv:%0.3f %0.3f\n",
-                   (float)x / _showSettings->imageBoundsX,
-                   (float)y / _showSettings->imageBoundsY);
+        // show uv, so can relate to gpu coordinates stored in geometry and find
+        // atlas areas
+        append_sprintf(text, "uv:%0.3f %0.3f\n",
+                       (float)x / _showSettings->imageBoundsX,
+                       (float)y / _showSettings->imageBoundsY);
 
-    // pixel at top-level mip
-    append_sprintf(text, "px:%d %d\n", x, y);
+        // pixel at top-level mip
+        append_sprintf(text, "px:%d %d\n", x, y);
 
-    // show block num
-    int mipLOD = _showSettings->mipNumber;
+        // show block num
+        int mipLOD = _showSettings->mipNumber;
 
-    int mipX = _showSettings->imageBoundsX;
-    int mipY = _showSettings->imageBoundsY;
+        int mipX = _showSettings->imageBoundsX;
+        int mipY = _showSettings->imageBoundsY;
 
-    mipX = mipX >> mipLOD;
-    mipY = mipY >> mipLOD;
+        mipX = mipX >> mipLOD;
+        mipY = mipY >> mipLOD;
 
-    mipX = std::max(1, mipX);
-    mipY = std::max(1, mipY);
+        mipX = std::max(1, mipX);
+        mipY = std::max(1, mipY);
 
-    mipX = (int32_t)(uv.x * mipX);
-    mipY = (int32_t)(uv.y * mipY);
+        mipX = (int32_t)(uv.x * mipX);
+        mipY = (int32_t)(uv.y * mipY);
 
-    _showSettings->textureLookupMipX = mipX;
-    _showSettings->textureLookupMipY = mipY;
+        _showSettings->textureLookupMipX = mipX;
+        _showSettings->textureLookupMipY = mipY;
 
-    // TODO: may want to return mip in pixel readback
-    // don't have it right now, so don't display if preview is enabled
-    if (_showSettings->isPreview)
-      mipLOD = 0;
+        // TODO: may want to return mip in pixel readback
+        // don't have it right now, so don't display if preview is enabled
+        if (_showSettings->isPreview)
+            mipLOD = 0;
 
-    auto blockDims = blockDimsOfFormat(format);
-    if (blockDims.x > 1)
-      append_sprintf(text, "bpx: %d %d\n", mipX / blockDims.x,
-                     mipY / blockDims.y);
+        auto blockDims = blockDimsOfFormat(format);
+        if (blockDims.x > 1)
+            append_sprintf(text, "bpx: %d %d\n", mipX / blockDims.x,
+                           mipY / blockDims.y);
 
-    // TODO: on astc if we have original blocks can run analysis from
-    // astc-encoder about each block.
+        // TODO: on astc if we have original blocks can run analysis from
+        // astc-encoder about each block.
 
-    // show the mip pixel (only if not preview and mip changed)
-    if (mipLOD > 0 && !_showSettings->isPreview)
-      append_sprintf(text, "mpx: %d %d\n", mipX, mipY);
+        // show the mip pixel (only if not preview and mip changed)
+        if (mipLOD > 0 && !_showSettings->isPreview)
+            append_sprintf(text, "mpx: %d %d\n", mipX, mipY);
 
-    // TODO: more criteria here, can have 2 channel PBR metal-roughness
-    // also have 4 channel normals where zw store other data.
+        // TODO: more criteria here, can have 2 channel PBR metal-roughness
+        // also have 4 channel normals where zw store other data.
 
-    bool isDecodeSigned = isSignedFormat(_showSettings->decodedFormat);
-    if (isSigned && !isDecodeSigned) {
-      c = toSnorm8(c);
+        bool isDecodeSigned = isSignedFormat(_showSettings->decodedFormat);
+        if (isSigned && !isDecodeSigned) {
+            c = toSnorm8(c);
+        }
     }
-  }
 
-  if (isValue) {
-    printChannels(tmp, "val: ", c, numChannels, isFloat, isSigned);
-    text += tmp;
-  } else if (isDirection) {
-    // print direction
-    isFloat = true;
-    isSigned = true;
-
-    printChannels(tmp, "dir: ", c, numChannels, isFloat, isSigned);
-    text += tmp;
-  } else if (isNormal) {
-    float nx = c.x;
-    float ny = c.y;
+    if (isValue) {
+        printChannels(tmp, "val: ", c, numChannels, isFloat, isSigned);
+        text += tmp;
+    }
+    else if (isDirection) {
+        // print direction
+        isFloat = true;
+        isSigned = true;
 
-    // unorm -> snorm
-    if (!isSigned) {
-      nx = toSnorm8(nx);
-      ny = toSnorm8(ny);
+        printChannels(tmp, "dir: ", c, numChannels, isFloat, isSigned);
+        text += tmp;
     }
+    else if (isNormal) {
+        float nx = c.x;
+        float ny = c.y;
+
+        // unorm -> snorm
+        if (!isSigned) {
+            nx = toSnorm8(nx);
+            ny = toSnorm8(ny);
+        }
 
-    // Note: not clamping nx,ny to < 1 like in shader
+        // Note: not clamping nx,ny to < 1 like in shader
 
-    // this is always postive on tan-space normals
-    // assuming we're not viewing world normals
-    const float maxLen2 = 0.999 * 0.999;
-    float len2 = nx * nx + ny * ny;
-    if (len2 > maxLen2)
-      len2 = maxLen2;
+        // this is always postive on tan-space normals
+        // assuming we're not viewing world normals
+        const float maxLen2 = 0.999 * 0.999;
+        float len2 = nx * nx + ny * ny;
+        if (len2 > maxLen2)
+            len2 = maxLen2;
 
-    float nz = sqrt(1.0f - len2);
+        float nz = sqrt(1.0f - len2);
 
-    // print the underlying color (some nmaps are xy in 4 channels)
-    printChannels(tmp, "lin: ", c, numChannels, isFloat, isSigned);
-    text += tmp;
+        // print the underlying color (some nmaps are xy in 4 channels)
+        printChannels(tmp, "lin: ", c, numChannels, isFloat, isSigned);
+        text += tmp;
 
-    // print direction
-    float4 d = float4m(nx, ny, nz, 0.0f);
-    isFloat = true;
-    isSigned = true;
-    printChannels(tmp, "dir: ", d, 3, isFloat, isSigned);
-    text += tmp;
-  } else if (isColor) {
-    // DONE: write some print helpers based on float4 and length
-    printChannels(tmp, "lin: ", c, numChannels, isFloat, isSigned);
-    text += tmp;
+        // print direction
+        float4 d = float4m(nx, ny, nz, 0.0f);
+        isFloat = true;
+        isSigned = true;
+        printChannels(tmp, "dir: ", d, 3, isFloat, isSigned);
+        text += tmp;
+    }
+    else if (isColor) {
+        // DONE: write some print helpers based on float4 and length
+        printChannels(tmp, "lin: ", c, numChannels, isFloat, isSigned);
+        text += tmp;
 
-    if (isSrgb) {
-      // this saturates the value, so don't use for extended srgb
-      float4 s = linearToSRGB(c);
+        if (isSrgb) {
+            // this saturates the value, so don't use for extended srgb
+            float4 s = linearToSRGB(c);
 
-      printChannels(tmp, "srg: ", s, numChannels, isFloat, isSigned);
-      text += tmp;
-    }
+            printChannels(tmp, "srg: ", s, numChannels, isFloat, isSigned);
+            text += tmp;
+        }
 
-    // display the premul values too, but not fully transparent pixels
-    if (c.a > 0.0 && c.a < 1.0f) {
-      printChannels(tmp, "lnp: ", toPremul(c), numChannels, isFloat, isSigned);
-      text += tmp;
+        // display the premul values too, but not fully transparent pixels
+        if (c.a > 0.0 && c.a < 1.0f) {
+            printChannels(tmp, "lnp: ", toPremul(c), numChannels, isFloat, isSigned);
+            text += tmp;
 
-      // TODO: do we need the premul srgb color too?
-      if (isSrgb) {
-        // this saturates the value, so don't use for extended srgb
-        float4 s = linearToSRGB(c);
+            // TODO: do we need the premul srgb color too?
+            if (isSrgb) {
+                // this saturates the value, so don't use for extended srgb
+                float4 s = linearToSRGB(c);
 
-        printChannels(tmp, "srp: ", toPremul(s), numChannels, isFloat,
-                      isSigned);
-        text += tmp;
-      }
+                printChannels(tmp, "srp: ", toPremul(s), numChannels, isFloat,
+                              isSigned);
+                text += tmp;
+            }
+        }
     }
-  }
 
-  [self setEyedropperText:text.c_str()];
+    [self setEyedropperText:text.c_str()];
 
-  // TODO: range display of pixels is useful, only showing pixels that fall
-  // within a given range, but would need slider then, and determine range of
-  // pixels.
-  // TODO: Auto-range is also useful for depth (ignore far plane of 0 or 1).
+    // TODO: range display of pixels is useful, only showing pixels that fall
+    // within a given range, but would need slider then, and determine range of
+    // pixels.
+    // TODO: Auto-range is also useful for depth (ignore far plane of 0 or 1).
 
-  // TOOD: display histogram from compute, bin into buffer counts of pixels
+    // TOOD: display histogram from compute, bin into buffer counts of pixels
 
-  // DONE: stop clobbering hud text, need another set of labels
-  // and a zoom preview of the pixels under the cursor.
-  // Otherwise, can't really see the underlying color.
+    // DONE: stop clobbering hud text, need another set of labels
+    // and a zoom preview of the pixels under the cursor.
+    // Otherwise, can't really see the underlying color.
 
-  // TODO: Stuff these on clipboard with a click, or use cmd+C?
+    // TODO: Stuff these on clipboard with a click, or use cmd+C?
 }
 
-- (void)setEyedropperText:(const char *)text {
-  _textSlots[0] = text;
+- (void)setEyedropperText:(const char *)text
+{
+    _textSlots[0] = text;
 
-  [self updateHudText];
+    [self updateHudText];
 }
 
-- (void)setHudText:(const char *)text {
-  _textSlots[1] = text;
+- (void)setHudText:(const char *)text
+{
+    _textSlots[1] = text;
 
-  [self updateHudText];
+    [self updateHudText];
 }
 
-- (void)updateHudText {
-  // combine textSlots
-  string text = _textSlots[0] + _textSlots[1];
+- (void)updateHudText
+{
+    // combine textSlots
+    string text = _textSlots[0] + _textSlots[1];
 
-  NSString *textNS = [NSString stringWithUTF8String:text.c_str()];
-  _hudLabel2.stringValue = textNS;
-  _hudLabel2.needsDisplay = YES;
+    NSString *textNS = [NSString stringWithUTF8String:text.c_str()];
+    _hudLabel2.stringValue = textNS;
+    _hudLabel2.needsDisplay = YES;
 
-  _hudLabel.stringValue = textNS;
-  _hudLabel.needsDisplay = YES;
+    _hudLabel.stringValue = textNS;
+    _hudLabel.needsDisplay = YES;
 }
 
-- (void)scrollWheel:(NSEvent *)event {
-  double wheelX = [event scrollingDeltaX];
-  double wheelY = [event scrollingDeltaY];
+- (void)scrollWheel:(NSEvent *)event
+{
+    double wheelX = [event scrollingDeltaX];
+    double wheelY = [event scrollingDeltaY];
 
-  //    if ([event hasPreciseScrollingDeltas])
-  //    {
-  //        //wheelX *= 0.01;
-  //        //wheelY *= 0.01;
-  //    }
-  //    else {
-  //        //wheelX *= 0.1;
-  //        //wheelY *= 0.1;
-  //    }
+    //    if ([event hasPreciseScrollingDeltas])
+    //    {
+    //        //wheelX *= 0.01;
+    //        //wheelY *= 0.01;
+    //    }
+    //    else {
+    //        //wheelX *= 0.1;
+    //        //wheelY *= 0.1;
+    //    }
 
-  //---------------------------------------
+    //---------------------------------------
 
-  // pan
-  wheelY = -wheelY;
-  wheelX = -wheelX;
+    // pan
+    wheelY = -wheelY;
+    wheelX = -wheelX;
 
-  float panX = _showSettings->panX + wheelX;
-  float panY = _showSettings->panY + wheelY;
+    float panX = _showSettings->panX + wheelX;
+    float panY = _showSettings->panY + wheelY;
 
-  Renderer *renderer = (Renderer *)self.delegate;
-  float4x4 projectionViewModelMatrix =
-      [renderer computeImageTransform:panX panY:panY zoom:_showSettings->zoom];
+    Renderer *renderer = (Renderer *)self.delegate;
+    float4x4 projectionViewModelMatrix =
+        [renderer computeImageTransform:panX
+                                   panY:panY
+                                   zoom:_showSettings->zoom];
 
-  // don't allow panning the entire image off the view boundary
-  // transform the upper left and bottom right corner or the image
+    // don't allow panning the entire image off the view boundary
+    // transform the upper left and bottom right corner or the image
 
-  // what if zoom moves it outside?
+    // what if zoom moves it outside?
 
-  float4 pt0 = projectionViewModelMatrix * float4m(-0.5, -0.5f, 0.0f, 1.0f);
-  float4 pt1 = projectionViewModelMatrix * float4m(0.5, 0.5f, 0.0f, 1.0f);
+    float4 pt0 = projectionViewModelMatrix * float4m(-0.5, -0.5f, 0.0f, 1.0f);
+    float4 pt1 = projectionViewModelMatrix * float4m(0.5, 0.5f, 0.0f, 1.0f);
 
-  // for perspective
-  // pt0 /= pt0.w;
-  // pt1 /= pt1.w;
+    // for perspective
+    // pt0 /= pt0.w;
+    // pt1 /= pt1.w;
 
-  float2 ptOrigin = simd::min(pt0.xy, pt1.xy);
-  float2 ptSize = abs(pt0.xy - pt1.xy);
+    float2 ptOrigin = simd::min(pt0.xy, pt1.xy);
+    float2 ptSize = abs(pt0.xy - pt1.xy);
 
-  // see that rectangle intersects the view, view is -1 to 1
-  CGRect imageRect = CGRectMake(ptOrigin.x, ptOrigin.y, ptSize.x, ptSize.y);
-  CGRect viewRect = CGRectMake(-1.0f, -1.0f, 2.0f, 2.0f);
+    // see that rectangle intersects the view, view is -1 to 1
+    CGRect imageRect = CGRectMake(ptOrigin.x, ptOrigin.y, ptSize.x, ptSize.y);
+    CGRect viewRect = CGRectMake(-1.0f, -1.0f, 2.0f, 2.0f);
 
-  int32_t numTexturesX = _showSettings->totalChunks();
-  int32_t numTexturesY = _showSettings->mipCount;
+    int32_t numTexturesX = _showSettings->totalChunks();
+    int32_t numTexturesY = _showSettings->mipCount;
 
-  if (_showSettings->isShowingAllLevelsAndMips) {
-    imageRect.origin.y -= (numTexturesY - 1) * imageRect.size.height;
+    if (_showSettings->isShowingAllLevelsAndMips) {
+        imageRect.origin.y -= (numTexturesY - 1) * imageRect.size.height;
 
-    imageRect.size.width *= numTexturesX;
-    imageRect.size.height *= numTexturesY;
-  }
+        imageRect.size.width *= numTexturesX;
+        imageRect.size.height *= numTexturesY;
+    }
 
-  if (!NSIntersectsRect(imageRect, viewRect)) {
-    return;
-  }
+    if (!NSIntersectsRect(imageRect, viewRect)) {
+        return;
+    }
 
-  if (_showSettings->panX != panX || _showSettings->panY != panY) {
-    _showSettings->panX = panX;
-    _showSettings->panY = panY;
+    if (_showSettings->panX != panX || _showSettings->panY != panY) {
+        _showSettings->panX = panX;
+        _showSettings->panY = panY;
+
+        if (doPrintPanZoom) {
+            string text;
+            sprintf(text,
+                    "Pan %.3f,%.3f\n"
+                    "Zoom %.2fx\n",
+                    _showSettings->panX, _showSettings->panY, _showSettings->zoom);
+            [self setHudText:text.c_str()];
+        }
 
-    if (doPrintPanZoom) {
-      string text;
-      sprintf(text,
-              "Pan %.3f,%.3f\n"
-              "Zoom %.2fx\n",
-              _showSettings->panX, _showSettings->panY, _showSettings->zoom);
-      [self setHudText:text.c_str()];
+        [self updateEyedropper];
+        self.needsDisplay = YES;
     }
-
-    [self updateEyedropper];
-    self.needsDisplay = YES;
-  }
 }
 
-- (NSButton *)findButton:(const char *)name {
-  NSString *title = [NSString stringWithUTF8String:name];
-  for (NSButton *button in _buttonArray) {
-    if (button.title == title)
-      return button;
-  }
-  return nil;
+- (NSButton *)findButton:(const char *)name
+{
+    NSString *title = [NSString stringWithUTF8String:name];
+    for (NSButton *button in _buttonArray) {
+        if (button.title == title)
+            return button;
+    }
+    return nil;
 }
 
-- (NSMenuItem *)findMenuItem:(const char *)name {
-  NSString *title = [NSString stringWithUTF8String:name];
+- (NSMenuItem *)findMenuItem:(const char *)name
+{
+    NSString *title = [NSString stringWithUTF8String:name];
 
-  for (NSMenuItem *menuItem in _viewMenu.itemArray) {
-    if (menuItem.toolTip == title)
-      return menuItem;
-  }
-  return nil;
+    for (NSMenuItem *menuItem in _viewMenu.itemArray) {
+        if (menuItem.toolTip == title)
+            return menuItem;
+    }
+    return nil;
 }
 
 // use this to enable/disable menus, buttons, etc.  Called on every event
 // when not implemented, then user items are always enabled
-- (BOOL)validateUserInterfaceItem:(id<NSValidatedUserInterfaceItem>)item {
-  // TODO: tie to menus and buttons states for enable/disable toggles
-  // https://developer.apple.com/library/archive/documentation/Cocoa/Conceptual/MenuList/Articles/EnablingMenuItems.html
+- (BOOL)validateUserInterfaceItem:(id<NSValidatedUserInterfaceItem>)item
+{
+    // TODO: tie to menus and buttons states for enable/disable toggles
+    // https://developer.apple.com/library/archive/documentation/Cocoa/Conceptual/MenuList/Articles/EnablingMenuItems.html
 
-  // MTKView is not doc based, so can't all super
-  // return [super validateUserInterfaceItem:anItem];
+    // MTKView is not doc based, so can't all super
+    // return [super validateUserInterfaceItem:anItem];
 
-  return YES;
+    return YES;
 }
 
-- (void)updateUIAfterLoad {
+- (void)updateUIAfterLoad
+{
+    // TODO: move these to actions, and test their state instead of looking up
+    // buttons here and in HandleKey.
+
+    // base on showSettings, hide some fo the buttons
+    bool isShowAllHidden =
+        _showSettings->totalChunks() <= 1 && _showSettings->mipCount <= 1;
+
+    bool isArrayHidden = _showSettings->arrayCount <= 1;
+    bool isFaceSliceHidden =
+        _showSettings->faceCount <= 1 && _showSettings->sliceCount <= 1;
+    bool isMipHidden = _showSettings->mipCount <= 1;
+
+    bool isJumpToNextHidden =
+        !(_showSettings->isArchive || _showSettings->isFolder);
+
+    bool isRedHidden = false;
+    bool isGreenHidden = _showSettings->numChannels <= 1;
+    bool isBlueHidden = _showSettings->numChannels <= 2 &&
+                        !_showSettings->isNormal;  // reconstruct z = b on normals
+
+    // TODO: also need a hasAlpha for pixels, since many compressed formats like
+    // ASTC always have 4 channels but internally store R,RG01,... etc.  Can get
+    // more data from swizzle in the props. Often alpha doesn't store anything
+    // useful to view.
+
+    // TODO: may want to disable isPremul on block textures that already have
+    // premul in data or else premul is applied a second time to the visual
+
+    bool hasAlpha = _showSettings->numChannels >= 3;
+
+    bool isAlphaHidden = !hasAlpha;
+    bool isPremulHidden = !hasAlpha;
+    bool isCheckerboardHidden = !hasAlpha;
+
+    bool isSignedHidden = !isSignedFormat(_showSettings->originalFormat);
+
+    // buttons
+    [self findButton:"Y"].hidden = isArrayHidden;
+    [self findButton:"F"].hidden = isFaceSliceHidden;
+    [self findButton:"M"].hidden = isMipHidden;
+    [self findButton:"S"].hidden = isShowAllHidden;
+    [self findButton:"J"].hidden = isJumpToNextHidden;
+
+    [self findButton:"R"].hidden = isRedHidden;
+    [self findButton:"G"].hidden = isGreenHidden;
+    [self findButton:"B"].hidden = isBlueHidden;
+    [self findButton:"A"].hidden = isAlphaHidden;
+
+    [self findButton:"P"].hidden = isPremulHidden;
+    [self findButton:"N"].hidden = isSignedHidden;
+    [self findButton:"C"].hidden = isCheckerboardHidden;
+
+    // menus (may want to disable, not hide)
+    // problem is crashes since menu seems to strip hidden items
+    // enabled state has to be handled in validateUserInterfaceItem
+    [self findMenuItem:"Y"].hidden = isArrayHidden;
+    [self findMenuItem:"F"].hidden = isFaceSliceHidden;
+    [self findMenuItem:"M"].hidden = isMipHidden;
+    [self findMenuItem:"S"].hidden = isShowAllHidden;
+    [self findMenuItem:"J"].hidden = isJumpToNextHidden;
+
+    [self findMenuItem:"R"].hidden = isRedHidden;
+    [self findMenuItem:"G"].hidden = isGreenHidden;
+    [self findMenuItem:"B"].hidden = isBlueHidden;
+    [self findMenuItem:"A"].hidden = isAlphaHidden;
+
+    [self findMenuItem:"P"].hidden = isPremulHidden;
+    [self findMenuItem:"N"].hidden = isSignedHidden;
+    [self findMenuItem:"C"].hidden = isCheckerboardHidden;
+
+    // also need to call after each toggle
+    [self updateUIControlState];
+}
 
-  // TODO: move these to actions, and test their state instead of looking up
-  // buttons here and in HandleKey.
+- (void)updateUIControlState
+{
+    // there is also mixed
+    auto On = NSControlStateValueOn;
+    auto Off = NSControlStateValueOff;
+#define toState(x) (x) ? On : Off
 
-  // base on showSettings, hide some fo the buttons
-  bool isShowAllHidden =
-      _showSettings->totalChunks() <= 1 && _showSettings->mipCount <= 1;
+    auto showAllState = toState(_showSettings->isShowingAllLevelsAndMips);
+    auto premulState = toState(_showSettings->isPremul);
+    auto signedState = toState(_showSettings->isSigned);
+    auto checkerboardState = toState(_showSettings->isCheckerboardShown);
+    auto previewState = toState(_showSettings->isPreview);
+    auto gridState = toState(_showSettings->isAnyGridShown());
+    auto wrapState = toState(_showSettings->isWrap);
+    auto debugState = toState(_showSettings->debugMode != DebugModeNone);
+
+    TextureChannels &channels = _showSettings->channels;
+
+    auto redState = toState(channels == TextureChannels::ModeR001);
+    auto greenState = toState(channels == TextureChannels::Mode0G01);
+    auto blueState = toState(channels == TextureChannels::Mode00B1);
+    auto alphaState = toState(channels == TextureChannels::ModeAAA1);
+
+    auto arrayState = toState(_showSettings->arrayNumber > 0);
+    auto faceState = toState(_showSettings->faceNumber > 0);
+    auto mipState = toState(_showSettings->mipNumber > 0);
+
+    auto meshState = toState(_showSettings->meshNumber > 0);
+    auto meshChannelState = toState(_showSettings->shapeChannel > 0);
+    auto lightingState =
+        toState(_showSettings->lightingMode != LightingModeDiffuse);
+    auto tangentState = toState(_showSettings->useTangent);
+
+    // TODO: UI state, and vertical state
+    auto uiState = toState(_buttonStack.hidden);
+
+    auto helpState = Off;
+    auto infoState = Off;
+    auto jumpState = Off;
+
+    // buttons
+    [self findButton:"?"].state = helpState;
+    [self findButton:"I"].state = infoState;
+
+    [self findButton:"Y"].state = arrayState;
+    [self findButton:"F"].state = faceState;
+    [self findButton:"M"].state = mipState;
+
+    [self findButton:"J"].state = jumpState;
+    [self findButton:"U"].state = Off;  // always off
+
+    [self findButton:"R"].state = redState;
+    [self findButton:"G"].state = greenState;
+    [self findButton:"B"].state = blueState;
+    [self findButton:"A"].state = alphaState;
+
+    [self findButton:"S"].state = showAllState;
+    [self findButton:"O"].state = previewState;
+    [self findButton:"8"].state = meshState;
+    [self findButton:"6"].state = meshChannelState;
+    [self findButton:"5"].state = lightingState;
+    [self findButton:"W"].state = wrapState;
+    [self findButton:"D"].state = gridState;
+    [self findButton:"E"].state = debugState;
+    [self findButton:"T"].state = tangentState;
+
+    [self findButton:"P"].state = premulState;
+    [self findButton:"N"].state = signedState;
+    [self findButton:"C"].state = checkerboardState;
+
+    // menus (may want to disable, not hide)
+    // problem is crashes since menu seems to strip hidden items
+    // enabled state has to be handled in validateUserInterfaceItem
+
+    // when menu state is selected, it may not uncheck when advancing through
+    // state
+    [self findMenuItem:"?"].state = helpState;
+    [self findMenuItem:"I"].state = infoState;
+
+    [self findMenuItem:"Y"].state = arrayState;
+    [self findMenuItem:"F"].state = faceState;
+    [self findMenuItem:"M"].state = mipState;
+    [self findMenuItem:"J"].state = jumpState;
+    [self findMenuItem:"U"].state = uiState;
+
+    [self findMenuItem:"R"].state = redState;
+    [self findMenuItem:"G"].state = greenState;
+    [self findMenuItem:"B"].state = blueState;
+    [self findMenuItem:"A"].state = alphaState;
+
+    [self findMenuItem:"S"].state = showAllState;
+    [self findMenuItem:"O"].state = previewState;
+    [self findMenuItem:"8"].state = meshState;
+    [self findMenuItem:"6"].state = meshChannelState;
+    [self findMenuItem:"5"].state = lightingState;
+    [self findMenuItem:"T"].state = tangentState;
+
+    [self findMenuItem:"W"].state = wrapState;
+    [self findMenuItem:"D"].state = gridState;
+    [self findMenuItem:"E"].state = debugState;
+
+    [self findMenuItem:"P"].state = premulState;
+    [self findMenuItem:"N"].state = signedState;
+    [self findMenuItem:"C"].state = checkerboardState;
+}
 
-  bool isArrayHidden = _showSettings->arrayCount <= 1;
-  bool isFaceSliceHidden =
-      _showSettings->faceCount <= 1 && _showSettings->sliceCount <= 1;
-  bool isMipHidden = _showSettings->mipCount <= 1;
+// TODO: convert to C++ actions, and then call into Base holding all this
+// move pan/zoom logic too.  Then use that as start of Win32 kramv.
 
-  bool isJumpToNextHidden =
-      !(_showSettings->isArchive || _showSettings->isFolder);
+- (IBAction)handleAction:(id)sender
+{
+    NSEvent *theEvent = [NSApp currentEvent];
+    bool isShiftKeyDown = (theEvent.modifierFlags & NSEventModifierFlagShift);
 
-  bool isRedHidden = false;
-  bool isGreenHidden = _showSettings->numChannels <= 1;
-  bool isBlueHidden = _showSettings->numChannels <= 2 &&
-                      !_showSettings->isNormal; // reconstruct z = b on normals
+    string title;
 
-  // TODO: also need a hasAlpha for pixels, since many compressed formats like
-  // ASTC always have 4 channels but internally store R,RG01,... etc.  Can get
-  // more data from swizzle in the props. Often alpha doesn't store anything
-  // useful to view.
+    // sender is the UI element/NSButton
+    if ([sender isKindOfClass:[NSButton class]]) {
+        NSButton *button = (NSButton *)sender;
+        title = button.title.UTF8String;
+    }
+    else if ([sender isKindOfClass:[NSMenuItem class]]) {
+        NSMenuItem *menuItem = (NSMenuItem *)sender;
+        title = menuItem.toolTip.UTF8String;
+    }
+    else {
+        KLOGE("kram", "unknown UI element");
+        return;
+    }
 
-  // TODO: may want to disable isPremul on block textures that already have
-  // premul in data or else premul is applied a second time to the visual
+    int32_t keyCode = -1;
+
+    if (title == "?")
+        keyCode = Key::Slash;  // help
+    else if (title == "I")
+        keyCode = Key::I;
+    else if (title == "H")
+        keyCode = Key::H;
+
+    else if (title == "S")
+        keyCode = Key::S;
+    else if (title == "O")
+        keyCode = Key::O;
+    else if (title == "W")
+        keyCode = Key::W;
+    else if (title == "P")
+        keyCode = Key::P;
+    else if (title == "N")
+        keyCode = Key::N;
+
+    else if (title == "E")
+        keyCode = Key::E;
+    else if (title == "D")
+        keyCode = Key::D;
+    else if (title == "C")
+        keyCode = Key::C;
+    else if (title == "U")
+        keyCode = Key::U;
+
+    else if (title == "M")
+        keyCode = Key::M;
+    else if (title == "F")
+        keyCode = Key::F;
+    else if (title == "Y")
+        keyCode = Key::Y;
+    else if (title == "J")
+        keyCode = Key::J;
+
+    // reload/refit
+    else if (title == "L")
+        keyCode = Key::L;
+    else if (title == "0")
+        keyCode = Key::Num0;
+
+    // mesh
+    else if (title == "8")
+        keyCode = Key::Num8;
+    else if (title == "6")
+        keyCode = Key::Num6;
+    else if (title == "5")
+        keyCode = Key::Num5;
+    else if (title == "T")
+        keyCode = Key::T;
+
+    else if (title == "R")
+        keyCode = Key::R;
+    else if (title == "G")
+        keyCode = Key::G;
+    else if (title == "B")
+        keyCode = Key::B;
+    else if (title == "A")
+        keyCode = Key::A;
+
+    if (keyCode >= 0)
+        [self handleKey:keyCode isShiftKeyDown:isShiftKeyDown];
+}
 
-  bool hasAlpha = _showSettings->numChannels >= 3;
+- (void)keyDown:(NSEvent *)theEvent
+{
+    bool isShiftKeyDown = theEvent.modifierFlags & NSEventModifierFlagShift;
+    uint32_t keyCode = theEvent.keyCode;
 
-  bool isAlphaHidden = !hasAlpha;
-  bool isPremulHidden = !hasAlpha;
-  bool isCheckerboardHidden = !hasAlpha;
+    bool isHandled = [self handleKey:keyCode isShiftKeyDown:isShiftKeyDown];
+    if (!isHandled) {
+        // this will bonk
+        [super keyDown:theEvent];
+    }
+}
 
-  bool isSignedHidden = !isSignedFormat(_showSettings->originalFormat);
+- (bool)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
+{
+    // Some data depends on the texture data (isSigned, isNormal, ..)
+    bool isChanged = false;
+    bool isStateChanged = false;
+
+    // TODO: fix isChanged to only be set when value changes
+    // f.e. clamped values don't need to re-render
+    string text;
+
+    switch (keyCode) {
+        case Key::V: {
+            bool isVertical =
+                _buttonStack.orientation == NSUserInterfaceLayoutOrientationVertical;
+            isVertical = !isVertical;
+
+            _buttonStack.orientation = isVertical
+                                           ? NSUserInterfaceLayoutOrientationVertical
+                                           : NSUserInterfaceLayoutOrientationHorizontal;
+            text = isVertical ? "Vert UI" : "Horiz UI";
+
+            // just to update toggle state to Off
+            isStateChanged = true;
+            break;
+        }
+        case Key::U:
+            // this means no image loaded yet
+            if (_noImageLoaded) {
+                return true;
+            }
 
-  // buttons
-  [self findButton:"Y"].hidden = isArrayHidden;
-  [self findButton:"F"].hidden = isFaceSliceHidden;
-  [self findButton:"M"].hidden = isMipHidden;
-  [self findButton:"S"].hidden = isShowAllHidden;
-  [self findButton:"J"].hidden = isJumpToNextHidden;
+            _buttonStack.hidden = !_buttonStack.hidden;
+            text = _buttonStack.hidden ? "Hide UI" : "Show UI";
 
-  [self findButton:"R"].hidden = isRedHidden;
-  [self findButton:"G"].hidden = isGreenHidden;
-  [self findButton:"B"].hidden = isBlueHidden;
-  [self findButton:"A"].hidden = isAlphaHidden;
+            // just to update toggle state to Off
+            isStateChanged = true;
+            break;
 
-  [self findButton:"P"].hidden = isPremulHidden;
-  [self findButton:"N"].hidden = isSignedHidden;
-  [self findButton:"C"].hidden = isCheckerboardHidden;
+        // rgba channels
+        case Key::Num1:
+        case Key::R:
+            if (![self findButton:"R"].isHidden) {
+                TextureChannels &channels = _showSettings->channels;
+
+                if (channels == TextureChannels::ModeR001) {
+                    channels = TextureChannels::ModeRGBA;
+                    text = "Mask RGBA";
+                }
+                else {
+                    channels = TextureChannels::ModeR001;
+                    text = "Mask R001";
+                }
+                isChanged = true;
+            }
 
-  // menus (may want to disable, not hide)
-  // problem is crashes since menu seems to strip hidden items
-  // enabled state has to be handled in validateUserInterfaceItem
-  [self findMenuItem:"Y"].hidden = isArrayHidden;
-  [self findMenuItem:"F"].hidden = isFaceSliceHidden;
-  [self findMenuItem:"M"].hidden = isMipHidden;
-  [self findMenuItem:"S"].hidden = isShowAllHidden;
-  [self findMenuItem:"J"].hidden = isJumpToNextHidden;
+            break;
 
-  [self findMenuItem:"R"].hidden = isRedHidden;
-  [self findMenuItem:"G"].hidden = isGreenHidden;
-  [self findMenuItem:"B"].hidden = isBlueHidden;
-  [self findMenuItem:"A"].hidden = isAlphaHidden;
+        case Key::Num2:
+        case Key::G:
+            if (![self findButton:"G"].isHidden) {
+                TextureChannels &channels = _showSettings->channels;
+
+                if (channels == TextureChannels::Mode0G01) {
+                    channels = TextureChannels::ModeRGBA;
+                    text = "Mask RGBA";
+                }
+                else {
+                    channels = TextureChannels::Mode0G01;
+                    text = "Mask 0G01";
+                }
+                isChanged = true;
+            }
+            break;
 
-  [self findMenuItem:"P"].hidden = isPremulHidden;
-  [self findMenuItem:"N"].hidden = isSignedHidden;
-  [self findMenuItem:"C"].hidden = isCheckerboardHidden;
+        case Key::Num3:
+        case Key::B:
+            if (![self findButton:"B"].isHidden) {
+                TextureChannels &channels = _showSettings->channels;
+
+                if (channels == TextureChannels::Mode00B1) {
+                    channels = TextureChannels::ModeRGBA;
+                    text = "Mask RGBA";
+                }
+                else {
+                    channels = TextureChannels::Mode00B1;
+                    text = "Mask 00B1";
+                }
+
+                isChanged = true;
+            }
+            break;
 
-  // also need to call after each toggle
-  [self updateUIControlState];
-}
+        case Key::Num4:
+        case Key::A:
+            if (![self findButton:"A"].isHidden) {
+                TextureChannels &channels = _showSettings->channels;
+
+                if (channels == TextureChannels::ModeAAA1) {
+                    channels = TextureChannels::ModeRGBA;
+                    text = "Mask RGBA";
+                }
+                else {
+                    channels = TextureChannels::ModeAAA1;
+                    text = "Mask AAA1";
+                }
+
+                isChanged = true;
+            }
+            break;
 
-- (void)updateUIControlState {
-  // there is also mixed
-  auto On = NSControlStateValueOn;
-  auto Off = NSControlStateValueOff;
-#define toState(x) (x) ? On : Off
+        case Key::Num6: {
+            _showSettings->advanceShapeChannel(isShiftKeyDown);
+            text = _showSettings->shapeChannelText();
+            isChanged = true;
+            break;
+        }
+        case Key::Num5: {
+            _showSettings->advanceLightingMode(isShiftKeyDown);
+            text = _showSettings->lightingModeText();
+            isChanged = true;
+            break;
+        }
+        case Key::T: {
+            _showSettings->useTangent = !_showSettings->useTangent;
+            if (_showSettings->useTangent)
+                text = "Vertex Tangents";
+            else
+                text = "Fragment Tangents";
+            isChanged = true;
+            break;
+        }
+        case Key::E: {
+            _showSettings->advanceDebugMode(isShiftKeyDown);
+            text = _showSettings->debugModeText();
+            isChanged = true;
+            break;
+        }
+        case Key::Slash:  // has ? mark above it
+            // display the chars for now
+            text =
+                "⇧RGBA, O-preview, ⇧E-debug, Show all\n"
+                "Hud, ⇧L-reload, ⇧0-fit\n"
+                "Checker, ⇧D-block/px grid, Info\n"
+                "W-wrap, Premul, N-signed\n"
+                "⇧Mip, ⇧Face, ⇧Y-array/slice\n"
+                "⇧J-next bundle image\n";
+
+            // just to update toggle state to Off
+            isStateChanged = true;
+            break;
 
-  auto showAllState = toState(_showSettings->isShowingAllLevelsAndMips);
-  auto premulState = toState(_showSettings->isPremul);
-  auto signedState = toState(_showSettings->isSigned);
-  auto checkerboardState = toState(_showSettings->isCheckerboardShown);
-  auto previewState = toState(_showSettings->isPreview);
-  auto gridState = toState(_showSettings->isAnyGridShown());
-  auto wrapState = toState(_showSettings->isWrap);
-  auto debugState = toState(_showSettings->debugMode != DebugModeNone);
-
-  TextureChannels &channels = _showSettings->channels;
-
-  auto redState = toState(channels == TextureChannels::ModeR001);
-  auto greenState = toState(channels == TextureChannels::Mode0G01);
-  auto blueState = toState(channels == TextureChannels::Mode00B1);
-  auto alphaState = toState(channels == TextureChannels::ModeAAA1);
-
-  auto arrayState = toState(_showSettings->arrayNumber > 0);
-  auto faceState = toState(_showSettings->faceNumber > 0);
-  auto mipState = toState(_showSettings->mipNumber > 0);
-
-  auto meshState = toState(_showSettings->meshNumber > 0);
-  auto meshChannelState = toState(_showSettings->shapeChannel > 0);
-  auto lightingState =
-      toState(_showSettings->lightingMode != LightingModeDiffuse);
-  auto tangentState = toState(_showSettings->useTangent);
-
-  // TODO: UI state, and vertical state
-  auto uiState = toState(_buttonStack.hidden);
-
-  auto helpState = Off;
-  auto infoState = Off;
-  auto jumpState = Off;
-
-  // buttons
-  [self findButton:"?"].state = helpState;
-  [self findButton:"I"].state = infoState;
-
-  [self findButton:"Y"].state = arrayState;
-  [self findButton:"F"].state = faceState;
-  [self findButton:"M"].state = mipState;
-
-  [self findButton:"J"].state = jumpState;
-  [self findButton:"U"].state = Off; // always off
-
-  [self findButton:"R"].state = redState;
-  [self findButton:"G"].state = greenState;
-  [self findButton:"B"].state = blueState;
-  [self findButton:"A"].state = alphaState;
-
-  [self findButton:"S"].state = showAllState;
-  [self findButton:"O"].state = previewState;
-  [self findButton:"8"].state = meshState;
-  [self findButton:"6"].state = meshChannelState;
-  [self findButton:"5"].state = lightingState;
-  [self findButton:"W"].state = wrapState;
-  [self findButton:"D"].state = gridState;
-  [self findButton:"E"].state = debugState;
-  [self findButton:"T"].state = tangentState;
-
-  [self findButton:"P"].state = premulState;
-  [self findButton:"N"].state = signedState;
-  [self findButton:"C"].state = checkerboardState;
-
-  // menus (may want to disable, not hide)
-  // problem is crashes since menu seems to strip hidden items
-  // enabled state has to be handled in validateUserInterfaceItem
-
-  // when menu state is selected, it may not uncheck when advancing through
-  // state
-  [self findMenuItem:"?"].state = helpState;
-  [self findMenuItem:"I"].state = infoState;
-
-  [self findMenuItem:"Y"].state = arrayState;
-  [self findMenuItem:"F"].state = faceState;
-  [self findMenuItem:"M"].state = mipState;
-  [self findMenuItem:"J"].state = jumpState;
-  [self findMenuItem:"U"].state = uiState;
-
-  [self findMenuItem:"R"].state = redState;
-  [self findMenuItem:"G"].state = greenState;
-  [self findMenuItem:"B"].state = blueState;
-  [self findMenuItem:"A"].state = alphaState;
-
-  [self findMenuItem:"S"].state = showAllState;
-  [self findMenuItem:"O"].state = previewState;
-  [self findMenuItem:"8"].state = meshState;
-  [self findMenuItem:"6"].state = meshChannelState;
-  [self findMenuItem:"5"].state = lightingState;
-  [self findMenuItem:"T"].state = tangentState;
-
-  [self findMenuItem:"W"].state = wrapState;
-  [self findMenuItem:"D"].state = gridState;
-  [self findMenuItem:"E"].state = debugState;
-
-  [self findMenuItem:"P"].state = premulState;
-  [self findMenuItem:"N"].state = signedState;
-  [self findMenuItem:"C"].state = checkerboardState;
-}
+        case Key::Num0: {  // scale and reset pan
+            float zoom;
+            // fit image or mip
+            if (isShiftKeyDown) {
+                zoom = 1.0f;
+            }
+            else {
+                // fit to topmost image
+                zoom = _showSettings->zoomFit;
+            }
+
+            // This zoom needs to be checked against zoom limits
+            // there's a cap on the zoom multiplier.
+            // This is reducing zoom which expands the image.
+            zoom *= 1.0f / (1 << _showSettings->mipNumber);
+
+            // even if zoom same, still do this since it resets the pan
+            _showSettings->zoom = zoom;
+
+            _showSettings->panX = 0.0f;
+            _showSettings->panY = 0.0f;
+
+            text = "Scale Image\n";
+            if (doPrintPanZoom) {
+                string tmp;
+                sprintf(tmp,
+                        "Pan %.3f,%.3f\n"
+                        "Zoom %.2fx\n",
+                        _showSettings->panX, _showSettings->panY, _showSettings->zoom);
+                text += tmp;
+            }
+
+            isChanged = true;
 
-// TODO: convert to C++ actions, and then call into Base holding all this
-// move pan/zoom logic too.  Then use that as start of Win32 kramv.
+            break;
+        }
+        // reload key (also a quick way to reset the settings)
+        case Key::L:
+            [self loadTextureFromURL:self.imageURL];
+
+            // reload at actual size
+            if (isShiftKeyDown) {
+                _showSettings->zoom = 1.0f;
+            }
+
+            text = "Reload Image";
+            if (doPrintPanZoom) {
+                string tmp;
+                sprintf(tmp,
+                        "Pan %.3f,%.3f\n"
+                        "Zoom %.2fx\n",
+                        _showSettings->panX, _showSettings->panY, _showSettings->zoom);
+                text += tmp;
+            }
+
+            isChanged = true;
+            break;
 
-- (IBAction)handleAction:(id)sender {
-
-  NSEvent *theEvent = [NSApp currentEvent];
-  bool isShiftKeyDown = (theEvent.modifierFlags & NSEventModifierFlagShift);
-
-  string title;
-
-  // sender is the UI element/NSButton
-  if ([sender isKindOfClass:[NSButton class]]) {
-    NSButton *button = (NSButton *)sender;
-    title = button.title.UTF8String;
-  } else if ([sender isKindOfClass:[NSMenuItem class]]) {
-    NSMenuItem *menuItem = (NSMenuItem *)sender;
-    title = menuItem.toolTip.UTF8String;
-  } else {
-    KLOGE("kram", "unknown UI element");
-    return;
-  }
-
-  int32_t keyCode = -1;
-
-  if (title == "?")
-    keyCode = Key::Slash; // help
-  else if (title == "I")
-    keyCode = Key::I;
-  else if (title == "H")
-    keyCode = Key::H;
-
-  else if (title == "S")
-    keyCode = Key::S;
-  else if (title == "O")
-    keyCode = Key::O;
-  else if (title == "W")
-    keyCode = Key::W;
-  else if (title == "P")
-    keyCode = Key::P;
-  else if (title == "N")
-    keyCode = Key::N;
-
-  else if (title == "E")
-    keyCode = Key::E;
-  else if (title == "D")
-    keyCode = Key::D;
-  else if (title == "C")
-    keyCode = Key::C;
-  else if (title == "U")
-    keyCode = Key::U;
-
-  else if (title == "M")
-    keyCode = Key::M;
-  else if (title == "F")
-    keyCode = Key::F;
-  else if (title == "Y")
-    keyCode = Key::Y;
-  else if (title == "J")
-    keyCode = Key::J;
-
-  // reload/refit
-  else if (title == "L")
-    keyCode = Key::L;
-  else if (title == "0")
-    keyCode = Key::Num0;
-
-  // mesh
-  else if (title == "8")
-    keyCode = Key::Num8;
-  else if (title == "6")
-    keyCode = Key::Num6;
-  else if (title == "5")
-    keyCode = Key::Num5;
-  else if (title == "T")
-    keyCode = Key::T;
-
-  else if (title == "R")
-    keyCode = Key::R;
-  else if (title == "G")
-    keyCode = Key::G;
-  else if (title == "B")
-    keyCode = Key::B;
-  else if (title == "A")
-    keyCode = Key::A;
-
-  if (keyCode >= 0)
-    [self handleKey:keyCode isShiftKeyDown:isShiftKeyDown];
-}
+        // P already used for premul
+        case Key::O:
+            _showSettings->isPreview = !_showSettings->isPreview;
+            isChanged = true;
+            text = "Preview ";
+            text += _showSettings->isPreview ? "On" : "Off";
+            break;
 
-- (void)keyDown:(NSEvent *)theEvent {
-  bool isShiftKeyDown = theEvent.modifierFlags & NSEventModifierFlagShift;
-  uint32_t keyCode = theEvent.keyCode;
+        // TODO: might switch c to channel cycle, so could just hit that
+        // and depending on the content, it cycles through reasonable channel masks
+
+        // toggle checkerboard for transparency
+        case Key::C:
+            if (![self findButton:"C"].isHidden) {
+                _showSettings->isCheckerboardShown = !_showSettings->isCheckerboardShown;
+                isChanged = true;
+                text = "Checker ";
+                text += _showSettings->isCheckerboardShown ? "On" : "Off";
+            }
+            break;
 
-  bool isHandled = [self handleKey:keyCode isShiftKeyDown:isShiftKeyDown];
-  if (!isHandled) {
-    // this will bonk
-    [super keyDown:theEvent];
-  }
-}
+        // toggle pixel grid when magnified above 1 pixel, can happen from mipmap
+        // changes too
+        case Key::D: {
+            static int grid = 0;
+            static const int kNumGrids = 7;
 
-- (bool)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown {
-  // Some data depends on the texture data (isSigned, isNormal, ..)
-  bool isChanged = false;
-  bool isStateChanged = false;
-
-  // TODO: fix isChanged to only be set when value changes
-  // f.e. clamped values don't need to re-render
-  string text;
-
-  switch (keyCode) {
-  case Key::V: {
-    bool isVertical =
-        _buttonStack.orientation == NSUserInterfaceLayoutOrientationVertical;
-    isVertical = !isVertical;
-
-    _buttonStack.orientation = isVertical
-                                   ? NSUserInterfaceLayoutOrientationVertical
-                                   : NSUserInterfaceLayoutOrientationHorizontal;
-    text = isVertical ? "Vert UI" : "Horiz UI";
-
-    // just to update toggle state to Off
-    isStateChanged = true;
-    break;
-  }
-  case Key::U:
-    // this means no image loaded yet
-    if (_noImageLoaded) {
-      return true;
-    }
-
-    _buttonStack.hidden = !_buttonStack.hidden;
-    text = _buttonStack.hidden ? "Hide UI" : "Show UI";
-
-    // just to update toggle state to Off
-    isStateChanged = true;
-    break;
-
-  // rgba channels
-  case Key::Num1:
-  case Key::R:
-    if (![self findButton:"R"].isHidden) {
-      TextureChannels &channels = _showSettings->channels;
-
-      if (channels == TextureChannels::ModeR001) {
-        channels = TextureChannels::ModeRGBA;
-        text = "Mask RGBA";
-      } else {
-        channels = TextureChannels::ModeR001;
-        text = "Mask R001";
-      }
-      isChanged = true;
-    }
-
-    break;
-
-  case Key::Num2:
-  case Key::G:
-    if (![self findButton:"G"].isHidden) {
-      TextureChannels &channels = _showSettings->channels;
-
-      if (channels == TextureChannels::Mode0G01) {
-        channels = TextureChannels::ModeRGBA;
-        text = "Mask RGBA";
-      } else {
-        channels = TextureChannels::Mode0G01;
-        text = "Mask 0G01";
-      }
-      isChanged = true;
-    }
-    break;
-
-  case Key::Num3:
-  case Key::B:
-    if (![self findButton:"B"].isHidden) {
-      TextureChannels &channels = _showSettings->channels;
-
-      if (channels == TextureChannels::Mode00B1) {
-        channels = TextureChannels::ModeRGBA;
-        text = "Mask RGBA";
-      } else {
-        channels = TextureChannels::Mode00B1;
-        text = "Mask 00B1";
-      }
-
-      isChanged = true;
-    }
-    break;
-
-  case Key::Num4:
-  case Key::A:
-    if (![self findButton:"A"].isHidden) {
-      TextureChannels &channels = _showSettings->channels;
-
-      if (channels == TextureChannels::ModeAAA1) {
-        channels = TextureChannels::ModeRGBA;
-        text = "Mask RGBA";
-      } else {
-        channels = TextureChannels::ModeAAA1;
-        text = "Mask AAA1";
-      }
-
-      isChanged = true;
-    }
-    break;
-
-  case Key::Num6: {
-    _showSettings->advanceShapeChannel(isShiftKeyDown);
-    text = _showSettings->shapeChannelText();
-    isChanged = true;
-    break;
-  }
-  case Key::Num5: {
-    _showSettings->advanceLightingMode(isShiftKeyDown);
-    text = _showSettings->lightingModeText();
-    isChanged = true;
-    break;
-  }
-  case Key::T: {
-    _showSettings->useTangent = !_showSettings->useTangent;
-    if (_showSettings->useTangent)
-      text = "Vertex Tangents";
-    else
-      text = "Fragment Tangents";
-    isChanged = true;
-    break;
-  }
-  case Key::E: {
-    _showSettings->advanceDebugMode(isShiftKeyDown);
-    text = _showSettings->debugModeText();
-    isChanged = true;
-    break;
-  }
-  case Key::Slash: // has ? mark above it
-    // display the chars for now
-    text = "⇧RGBA, O-preview, ⇧E-debug, Show all\n"
-           "Hud, ⇧L-reload, ⇧0-fit\n"
-           "Checker, ⇧D-block/px grid, Info\n"
-           "W-wrap, Premul, N-signed\n"
-           "⇧Mip, ⇧Face, ⇧Y-array/slice\n"
-           "⇧J-next bundle image\n";
-
-    // just to update toggle state to Off
-    isStateChanged = true;
-    break;
-
-  case Key::Num0: { // scale and reset pan
-    float zoom;
-    // fit image or mip
-    if (isShiftKeyDown) {
-      zoom = 1.0f;
-    } else {
-      // fit to topmost image
-      zoom = _showSettings->zoomFit;
-    }
-
-    // This zoom needs to be checked against zoom limits
-    // there's a cap on the zoom multiplier.
-    // This is reducing zoom which expands the image.
-    zoom *= 1.0f / (1 << _showSettings->mipNumber);
-
-    // even if zoom same, still do this since it resets the pan
-    _showSettings->zoom = zoom;
-
-    _showSettings->panX = 0.0f;
-    _showSettings->panY = 0.0f;
-
-    text = "Scale Image\n";
-    if (doPrintPanZoom) {
-      string tmp;
-      sprintf(tmp,
-              "Pan %.3f,%.3f\n"
-              "Zoom %.2fx\n",
-              _showSettings->panX, _showSettings->panY, _showSettings->zoom);
-      text += tmp;
-    }
-
-    isChanged = true;
-
-    break;
-  }
-  // reload key (also a quick way to reset the settings)
-  case Key::L:
-    [self loadTextureFromURL:self.imageURL];
-
-    // reload at actual size
-    if (isShiftKeyDown) {
-      _showSettings->zoom = 1.0f;
-    }
-
-    text = "Reload Image";
-    if (doPrintPanZoom) {
-      string tmp;
-      sprintf(tmp,
-              "Pan %.3f,%.3f\n"
-              "Zoom %.2fx\n",
-              _showSettings->panX, _showSettings->panY, _showSettings->zoom);
-      text += tmp;
-    }
-
-    isChanged = true;
-    break;
-
-  // P already used for premul
-  case Key::O:
-    _showSettings->isPreview = !_showSettings->isPreview;
-    isChanged = true;
-    text = "Preview ";
-    text += _showSettings->isPreview ? "On" : "Off";
-    break;
-
-  // TODO: might switch c to channel cycle, so could just hit that
-  // and depending on the content, it cycles through reasonable channel masks
-
-  // toggle checkerboard for transparency
-  case Key::C:
-    if (![self findButton:"C"].isHidden) {
-      _showSettings->isCheckerboardShown = !_showSettings->isCheckerboardShown;
-      isChanged = true;
-      text = "Checker ";
-      text += _showSettings->isCheckerboardShown ? "On" : "Off";
-    }
-    break;
-
-  // toggle pixel grid when magnified above 1 pixel, can happen from mipmap
-  // changes too
-  case Key::D: {
-    static int grid = 0;
-    static const int kNumGrids = 7;
-
-#define advanceGrid(g, dec)                                                    \
-  grid = (grid + kNumGrids + (dec ? -1 : 1)) % kNumGrids
-
-    // TODO: display how many blocks there are
-
-    // if block size is 1, then this shouldn't toggle
-    _showSettings->isBlockGridShown = false;
-    _showSettings->isAtlasGridShown = false;
-    _showSettings->isPixelGridShown = false;
+#define advanceGrid(g, dec) \
+    grid = (grid + kNumGrids + (dec ? -1 : 1)) % kNumGrids
 
-    advanceGrid(grid, isShiftKeyDown);
+            // TODO: display how many blocks there are
 
-    if (grid == 2 && _showSettings->blockX == 1) {
-      // skip it
-      advanceGrid(grid, isShiftKeyDown);
-    }
+            // if block size is 1, then this shouldn't toggle
+            _showSettings->isBlockGridShown = false;
+            _showSettings->isAtlasGridShown = false;
+            _showSettings->isPixelGridShown = false;
 
-    static const uint32_t gridSizes[kNumGrids] = {
-        0, 1, 2, 32, 64, 128, 256 // atlas sizes
-    };
+            advanceGrid(grid, isShiftKeyDown);
 
-    if (grid == 0) {
-      sprintf(text, "Grid Off");
-    } else if (grid == 1) {
-      _showSettings->isPixelGridShown = true;
-
-      sprintf(text, "Pixel Grid 1x1 On");
-    } else if (grid == 2) {
-      _showSettings->isBlockGridShown = true;
-
-      sprintf(text, "Block Grid %dx%d On", _showSettings->blockX,
-              _showSettings->blockY);
-    } else {
-      _showSettings->isAtlasGridShown = true;
-
-      // want to be able to show altases tht have long entries derived from
-      // props but right now just a square grid atlas
-      _showSettings->gridSizeX = _showSettings->gridSizeY = gridSizes[grid];
-
-      sprintf(text, "Atlas Grid %dx%d On", _showSettings->gridSizeX,
-              _showSettings->gridSizeY);
-    }
-
-    isChanged = true;
-
-    break;
-  }
-  case Key::S:
-    if (![self findButton:"S"].isHidden) {
-
-      // TODO: have drawAllMips, drawAllLevels, drawAllLevelsAndMips
-      _showSettings->isShowingAllLevelsAndMips =
-          !_showSettings->isShowingAllLevelsAndMips;
-      isChanged = true;
-      text = "Show All ";
-      text += _showSettings->isShowingAllLevelsAndMips ? "On" : "Off";
-    }
-    break;
-
-  // toggle hud that shows name and pixel value under the cursor
-  // this may require calling setNeedsDisplay on the UILabel as cursor moves
-  case Key::H:
-    _showSettings->isHudShown = !_showSettings->isHudShown;
-    _hudLabel.hidden = !_showSettings->isHudShown;
-    _hudLabel2.hidden = !_showSettings->isHudShown;
-    // isChanged = true;
-    text = "Hud ";
-    text += _showSettings->isHudShown ? "On" : "Off";
-    break;
-
-  // info on the texture, could request info from lib, but would want to cache
-  // that info
-  case Key::I:
-    if (_showSettings->isHudShown) {
-      sprintf(text, "%s",
-              isShiftKeyDown ? _showSettings->imageInfoVerbose.c_str()
-                             : _showSettings->imageInfo.c_str());
-    }
-    // just to update toggle state to Off
-    isStateChanged = true;
-    break;
-
-  // toggle wrap/clamp
-  case Key::W:
-    // TODO: cycle through all possible modes (clamp, repeat, mirror-once,
-    // mirror-repeat, ...)
-    _showSettings->isWrap = !_showSettings->isWrap;
-    isChanged = true;
-    text = "Wrap ";
-    text += _showSettings->isWrap ? "On" : "Off";
-    break;
-
-  // toggle signed vs. unsigned
-  case Key::N:
-    if (![self findButton:"N"].isHidden) {
-      _showSettings->isSigned = !_showSettings->isSigned;
-      isChanged = true;
-      text = "Signed ";
-      text += _showSettings->isSigned ? "On" : "Off";
-    }
-    break;
-
-  // toggle premul alpha vs. unmul
-  case Key::P:
-    if (![self findButton:"P"].isHidden) {
-      _showSettings->isPremul = !_showSettings->isPremul;
-      isChanged = true;
-      text = "Premul ";
-      text += _showSettings->isPremul ? "On" : "Off";
-    }
-    break;
-
-  case Key::J:
-    if (![self findButton:"J"].isHidden) {
-      if (_showSettings->isArchive) {
-        if ([self advanceTextureFromAchive:!isShiftKeyDown]) {
-          isChanged = true;
-          text = "Loaded " + _showSettings->lastFilename;
-        }
-      } else if (_showSettings->isFolder) {
-        if ([self advanceTextureFromFolder:!isShiftKeyDown]) {
-          isChanged = true;
-          text = "Loaded " + _showSettings->lastFilename;
+            if (grid == 2 && _showSettings->blockX == 1) {
+                // skip it
+                advanceGrid(grid, isShiftKeyDown);
+            }
+
+            static const uint32_t gridSizes[kNumGrids] = {
+                0, 1, 2, 32, 64, 128, 256  // atlas sizes
+            };
+
+            if (grid == 0) {
+                sprintf(text, "Grid Off");
+            }
+            else if (grid == 1) {
+                _showSettings->isPixelGridShown = true;
+
+                sprintf(text, "Pixel Grid 1x1 On");
+            }
+            else if (grid == 2) {
+                _showSettings->isBlockGridShown = true;
+
+                sprintf(text, "Block Grid %dx%d On", _showSettings->blockX,
+                        _showSettings->blockY);
+            }
+            else {
+                _showSettings->isAtlasGridShown = true;
+
+                // want to be able to show altases tht have long entries derived from
+                // props but right now just a square grid atlas
+                _showSettings->gridSizeX = _showSettings->gridSizeY = gridSizes[grid];
+
+                sprintf(text, "Atlas Grid %dx%d On", _showSettings->gridSizeX,
+                        _showSettings->gridSizeY);
+            }
+
+            isChanged = true;
+
+            break;
         }
-      }
-    }
-    break;
-
-  // test out different shapes, not offiical support yet
-  case Key::Num8:
-    if (_showSettings->meshCount > 1) {
-      _showSettings->advanceMeshNumber(isShiftKeyDown);
-      text = _showSettings->meshNumberText();
-      isChanged = true;
-    }
-    break;
-
-  // TODO: should probably have these wrap and not clamp to count limits
-
-  // mip up/down
-  case Key::M:
-    if (_showSettings->mipCount > 1) {
-      if (isShiftKeyDown) {
-        _showSettings->mipNumber = MAX(_showSettings->mipNumber - 1, 0);
-      } else {
-        _showSettings->mipNumber =
-            MIN(_showSettings->mipNumber + 1, _showSettings->mipCount - 1);
-      }
-      sprintf(text, "Mip %d/%d", _showSettings->mipNumber,
-              _showSettings->mipCount);
-      isChanged = true;
-    }
-    break;
-
-  case Key::F:
-    // cube or cube array, but hit s to pick cubearray
-    if (_showSettings->faceCount > 1) {
-      if (isShiftKeyDown) {
-        _showSettings->faceNumber = MAX(_showSettings->faceNumber - 1, 0);
-      } else {
-        _showSettings->faceNumber =
-            MIN(_showSettings->faceNumber + 1, _showSettings->faceCount - 1);
-      }
-      sprintf(text, "Face %d/%d", _showSettings->faceNumber,
-              _showSettings->faceCount);
-      isChanged = true;
-    }
-    break;
-
-  case Key::Y:
-    // slice
-    if (_showSettings->sliceCount > 1) {
-      if (isShiftKeyDown) {
-        _showSettings->sliceNumber = MAX(_showSettings->sliceNumber - 1, 0);
-      } else {
-        _showSettings->sliceNumber =
-            MIN(_showSettings->sliceNumber + 1, _showSettings->sliceCount - 1);
-      }
-      sprintf(text, "Slice %d/%d", _showSettings->sliceNumber,
-              _showSettings->sliceCount);
-      isChanged = true;
-    }
-    // array
-    else if (_showSettings->arrayCount > 1) {
-      if (isShiftKeyDown) {
-        _showSettings->arrayNumber = MAX(_showSettings->arrayNumber - 1, 0);
-      } else {
-        _showSettings->arrayNumber =
-            MIN(_showSettings->arrayNumber + 1, _showSettings->arrayCount - 1);
-      }
-      sprintf(text, "Array %d/%d", _showSettings->arrayNumber,
-              _showSettings->arrayCount);
-      isChanged = true;
-    }
-    break;
-  default:
-    // non-handled key
-    return false;
-  }
-
-  if (!text.empty()) {
-    [self setHudText:text.c_str()];
-  }
-
-  if (isChanged || isStateChanged) {
-    [self updateUIControlState];
-  }
+        case Key::S:
+            if (![self findButton:"S"].isHidden) {
+                // TODO: have drawAllMips, drawAllLevels, drawAllLevelsAndMips
+                _showSettings->isShowingAllLevelsAndMips =
+                    !_showSettings->isShowingAllLevelsAndMips;
+                isChanged = true;
+                text = "Show All ";
+                text += _showSettings->isShowingAllLevelsAndMips ? "On" : "Off";
+            }
+            break;
 
-  if (isChanged) {
-    self.needsDisplay = YES;
-  }
-  return true;
+        // toggle hud that shows name and pixel value under the cursor
+        // this may require calling setNeedsDisplay on the UILabel as cursor moves
+        case Key::H:
+            _showSettings->isHudShown = !_showSettings->isHudShown;
+            _hudLabel.hidden = !_showSettings->isHudShown;
+            _hudLabel2.hidden = !_showSettings->isHudShown;
+            // isChanged = true;
+            text = "Hud ";
+            text += _showSettings->isHudShown ? "On" : "Off";
+            break;
+
+        // info on the texture, could request info from lib, but would want to cache
+        // that info
+        case Key::I:
+            if (_showSettings->isHudShown) {
+                sprintf(text, "%s",
+                        isShiftKeyDown ? _showSettings->imageInfoVerbose.c_str()
+                                       : _showSettings->imageInfo.c_str());
+            }
+            // just to update toggle state to Off
+            isStateChanged = true;
+            break;
+
+        // toggle wrap/clamp
+        case Key::W:
+            // TODO: cycle through all possible modes (clamp, repeat, mirror-once,
+            // mirror-repeat, ...)
+            _showSettings->isWrap = !_showSettings->isWrap;
+            isChanged = true;
+            text = "Wrap ";
+            text += _showSettings->isWrap ? "On" : "Off";
+            break;
+
+        // toggle signed vs. unsigned
+        case Key::N:
+            if (![self findButton:"N"].isHidden) {
+                _showSettings->isSigned = !_showSettings->isSigned;
+                isChanged = true;
+                text = "Signed ";
+                text += _showSettings->isSigned ? "On" : "Off";
+            }
+            break;
+
+        // toggle premul alpha vs. unmul
+        case Key::P:
+            if (![self findButton:"P"].isHidden) {
+                _showSettings->isPremul = !_showSettings->isPremul;
+                isChanged = true;
+                text = "Premul ";
+                text += _showSettings->isPremul ? "On" : "Off";
+            }
+            break;
+
+        case Key::J:
+            if (![self findButton:"J"].isHidden) {
+                if (_showSettings->isArchive) {
+                    if ([self advanceTextureFromAchive:!isShiftKeyDown]) {
+                        isChanged = true;
+                        text = "Loaded " + _showSettings->lastFilename;
+                    }
+                }
+                else if (_showSettings->isFolder) {
+                    if ([self advanceTextureFromFolder:!isShiftKeyDown]) {
+                        isChanged = true;
+                        text = "Loaded " + _showSettings->lastFilename;
+                    }
+                }
+            }
+            break;
+
+        // test out different shapes, not offiical support yet
+        case Key::Num8:
+            if (_showSettings->meshCount > 1) {
+                _showSettings->advanceMeshNumber(isShiftKeyDown);
+                text = _showSettings->meshNumberText();
+                isChanged = true;
+            }
+            break;
+
+        // TODO: should probably have these wrap and not clamp to count limits
+
+        // mip up/down
+        case Key::M:
+            if (_showSettings->mipCount > 1) {
+                if (isShiftKeyDown) {
+                    _showSettings->mipNumber = MAX(_showSettings->mipNumber - 1, 0);
+                }
+                else {
+                    _showSettings->mipNumber =
+                        MIN(_showSettings->mipNumber + 1, _showSettings->mipCount - 1);
+                }
+                sprintf(text, "Mip %d/%d", _showSettings->mipNumber,
+                        _showSettings->mipCount);
+                isChanged = true;
+            }
+            break;
+
+        case Key::F:
+            // cube or cube array, but hit s to pick cubearray
+            if (_showSettings->faceCount > 1) {
+                if (isShiftKeyDown) {
+                    _showSettings->faceNumber = MAX(_showSettings->faceNumber - 1, 0);
+                }
+                else {
+                    _showSettings->faceNumber =
+                        MIN(_showSettings->faceNumber + 1, _showSettings->faceCount - 1);
+                }
+                sprintf(text, "Face %d/%d", _showSettings->faceNumber,
+                        _showSettings->faceCount);
+                isChanged = true;
+            }
+            break;
+
+        case Key::Y:
+            // slice
+            if (_showSettings->sliceCount > 1) {
+                if (isShiftKeyDown) {
+                    _showSettings->sliceNumber = MAX(_showSettings->sliceNumber - 1, 0);
+                }
+                else {
+                    _showSettings->sliceNumber =
+                        MIN(_showSettings->sliceNumber + 1, _showSettings->sliceCount - 1);
+                }
+                sprintf(text, "Slice %d/%d", _showSettings->sliceNumber,
+                        _showSettings->sliceCount);
+                isChanged = true;
+            }
+            // array
+            else if (_showSettings->arrayCount > 1) {
+                if (isShiftKeyDown) {
+                    _showSettings->arrayNumber = MAX(_showSettings->arrayNumber - 1, 0);
+                }
+                else {
+                    _showSettings->arrayNumber =
+                        MIN(_showSettings->arrayNumber + 1, _showSettings->arrayCount - 1);
+                }
+                sprintf(text, "Array %d/%d", _showSettings->arrayNumber,
+                        _showSettings->arrayCount);
+                isChanged = true;
+            }
+            break;
+        default:
+            // non-handled key
+            return false;
+    }
+
+    if (!text.empty()) {
+        [self setHudText:text.c_str()];
+    }
+
+    if (isChanged || isStateChanged) {
+        [self updateUIControlState];
+    }
+
+    if (isChanged) {
+        self.needsDisplay = YES;
+    }
+    return true;
 }
 
 // Note: docs state that drag&drop should be handled automatically by UTI setup
@@ -2168,693 +2219,707 @@ - (bool)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown {
 // in canReadObjectForClasses had to use NSURL.
 
 // drag and drop support
-- (NSDragOperation)draggingEntered:(id)sender {
-  if ((NSDragOperationGeneric & [sender draggingSourceOperationMask]) ==
-      NSDragOperationGeneric) {
-    NSPasteboard *pasteboard = [sender draggingPasteboard];
+- (NSDragOperation)draggingEntered:(id)sender
+{
+    if ((NSDragOperationGeneric & [sender draggingSourceOperationMask]) ==
+        NSDragOperationGeneric) {
+        NSPasteboard *pasteboard = [sender draggingPasteboard];
 
-    bool canReadPasteboardObjects =
-        [pasteboard canReadObjectForClasses:@[ [NSURL class] ] options:nil];
+        bool canReadPasteboardObjects =
+            [pasteboard canReadObjectForClasses:@[ [NSURL class] ]
+                                        options:nil];
 
-    // don't copy dropped item, want to alias large files on disk without that
-    if (canReadPasteboardObjects) {
-      return NSDragOperationGeneric;
+        // don't copy dropped item, want to alias large files on disk without that
+        if (canReadPasteboardObjects) {
+            return NSDragOperationGeneric;
+        }
     }
-  }
 
-  // not a drag we can use
-  return NSDragOperationNone;
+    // not a drag we can use
+    return NSDragOperationNone;
 }
 
-- (BOOL)prepareForDragOperation:(id)sender {
-  return YES;
+- (BOOL)prepareForDragOperation:(id)sender
+{
+    return YES;
 }
 
-- (BOOL)performDragOperation:(id)sender {
-  NSPasteboard *pasteboard = [sender draggingPasteboard];
+- (BOOL)performDragOperation:(id)sender
+{
+    NSPasteboard *pasteboard = [sender draggingPasteboard];
 
-  NSString *desiredType = [pasteboard availableTypeFromArray:pasteboardTypes];
+    NSString *desiredType = [pasteboard availableTypeFromArray:pasteboardTypes];
 
-  if ([desiredType isEqualToString:NSPasteboardTypeFileURL]) {
-    // TODO: use readObjects to drag multiple files onto one view
-    // load one mip of all those, use smaller mips for thumbnail
+    if ([desiredType isEqualToString:NSPasteboardTypeFileURL]) {
+        // TODO: use readObjects to drag multiple files onto one view
+        // load one mip of all those, use smaller mips for thumbnail
 
-    // the pasteboard contains a list of filenames
-    NSString *urlString =
-        [pasteboard propertyListForType:NSPasteboardTypeFileURL];
+        // the pasteboard contains a list of filenames
+        NSString *urlString =
+            [pasteboard propertyListForType:NSPasteboardTypeFileURL];
 
-    // this turns it into a real path (supposedly works even with sandbox)
-    NSURL *url = [NSURL URLWithString:urlString];
+        // this turns it into a real path (supposedly works even with sandbox)
+        NSURL *url = [NSURL URLWithString:urlString];
 
-    // convert the original path and then back to a url, otherwise reload fails
-    // when this file is replaced.
-    const char *filename = url.fileSystemRepresentation;
-    if (filename == nullptr) {
-      KLOGE("kramv", "Fix this drop url returning nil issue");
-      return NO;
-    }
+        // convert the original path and then back to a url, otherwise reload fails
+        // when this file is replaced.
+        const char *filename = url.fileSystemRepresentation;
+        if (filename == nullptr) {
+            KLOGE("kramv", "Fix this drop url returning nil issue");
+            return NO;
+        }
 
-    NSString *filenameString = [NSString stringWithUTF8String:filename];
+        NSString *filenameString = [NSString stringWithUTF8String:filename];
 
-    url = [NSURL fileURLWithPath:filenameString];
+        url = [NSURL fileURLWithPath:filenameString];
 
-    if ([self loadTextureFromURL:url]) {
-      [self setHudText:""];
+        if ([self loadTextureFromURL:url]) {
+            [self setHudText:""];
 
-      return YES;
+            return YES;
+        }
     }
-  }
 
-  return NO;
+    return NO;
 }
 
-- (BOOL)loadArchive:(const char *)zipFilename {
-  _zipMmap.close();
-  if (!_zipMmap.open(zipFilename)) {
-    return NO;
-  }
+- (BOOL)loadArchive:(const char *)zipFilename
+{
+    _zipMmap.close();
+    if (!_zipMmap.open(zipFilename)) {
+        return NO;
+    }
 
-  // Note: if mmap fails, could read entire zip into memory
-  // and then still use the same code below.
+    // Note: if mmap fails, could read entire zip into memory
+    // and then still use the same code below.
 
-  if (!_zip.openForRead(_zipMmap.data(), _zipMmap.dataLength())) {
-    return NO;
-  }
+    if (!_zip.openForRead(_zipMmap.data(), _zipMmap.dataLength())) {
+        return NO;
+    }
 
-  // filter out unsupported extensions
+    // filter out unsupported extensions
 
-  _zip.filterExtensions({".ktx", ".ktx2", ".png"});
+    _zip.filterExtensions({".ktx", ".ktx2", ".png"});
 
-  // don't switch to empty archive
-  if (_zip.zipEntrys().empty()) {
-    return NO;
-  }
+    // don't switch to empty archive
+    if (_zip.zipEntrys().empty()) {
+        return NO;
+    }
 
-  // load the first entry in the archive
-  _fileArchiveIndex = 0;
+    // load the first entry in the archive
+    _fileArchiveIndex = 0;
 
-  return YES;
+    return YES;
 }
 
-- (BOOL)advanceTextureFromAchive:(BOOL)increment {
-  if (!_zipMmap.data()) {
-    // no archive loaded
-    return NO;
-  }
+- (BOOL)advanceTextureFromAchive:(BOOL)increment
+{
+    if (!_zipMmap.data()) {
+        // no archive loaded
+        return NO;
+    }
 
-  if (_zip.zipEntrys().empty()) {
-    return NO;
-  }
+    if (_zip.zipEntrys().empty()) {
+        return NO;
+    }
 
-  size_t numEntries = _zip.zipEntrys().size();
+    size_t numEntries = _zip.zipEntrys().size();
 
-  if (increment)
-    _fileArchiveIndex++;
-  else
-    _fileArchiveIndex += numEntries - 1; // back 1
+    if (increment)
+        _fileArchiveIndex++;
+    else
+        _fileArchiveIndex += numEntries - 1;  // back 1
 
-  _fileArchiveIndex = _fileArchiveIndex % numEntries;
+    _fileArchiveIndex = _fileArchiveIndex % numEntries;
 
-  return [self loadTextureFromArchive];
+    return [self loadTextureFromArchive];
 }
 
-- (BOOL)advanceTextureFromFolder:(BOOL)increment {
-  if (_folderFiles.empty()) {
-    // no archive loaded
-    return NO;
-  }
+- (BOOL)advanceTextureFromFolder:(BOOL)increment
+{
+    if (_folderFiles.empty()) {
+        // no archive loaded
+        return NO;
+    }
 
-  size_t numEntries = _folderFiles.size();
-  if (increment)
-    _fileFolderIndex++;
-  else
-    _fileFolderIndex += numEntries - 1; // back 1
+    size_t numEntries = _folderFiles.size();
+    if (increment)
+        _fileFolderIndex++;
+    else
+        _fileFolderIndex += numEntries - 1;  // back 1
 
-  _fileFolderIndex = _fileFolderIndex % numEntries;
+    _fileFolderIndex = _fileFolderIndex % numEntries;
 
-  return [self loadTextureFromFolder];
+    return [self loadTextureFromFolder];
 }
 
-- (BOOL)findFilenameInFolders:(const string &)filename {
-  // TODO: binary search for the filename in the array, but would have to be in
-  // same directory
+- (BOOL)findFilenameInFolders:(const string &)filename
+{
+    // TODO: binary search for the filename in the array, but would have to be in
+    // same directory
 
-  bool isFound = false;
-  for (const auto &search : _folderFiles) {
-    if (search == filename) {
-      isFound = true;
-      break;
+    bool isFound = false;
+    for (const auto &search : _folderFiles) {
+        if (search == filename) {
+            isFound = true;
+            break;
+        }
     }
-  }
-  return isFound;
+    return isFound;
 }
 
-- (BOOL)loadTextureFromFolder {
-  // now lookup the filename and data at that entry
-  const char *filename = _folderFiles[_fileFolderIndex].c_str();
-  string fullFilename = filename;
-  auto timestamp = FileHelper::modificationTimestamp(filename);
+- (BOOL)loadTextureFromFolder
+{
+    // now lookup the filename and data at that entry
+    const char *filename = _folderFiles[_fileFolderIndex].c_str();
+    string fullFilename = filename;
+    auto timestamp = FileHelper::modificationTimestamp(filename);
+
+    // have already filtered filenames out, so this should never get hit
+    bool isPNG = isPNGFilename(filename);
+    if (!(isPNG || endsWithExtension(filename, ".ktx") ||
+          endsWithExtension(filename, ".ktx2"))) {
+        return NO;
+    }
 
-  // have already filtered filenames out, so this should never get hit
-  bool isPNG = isPNGFilename(filename);
-  if (!(isPNG || endsWithExtension(filename, ".ktx") ||
-        endsWithExtension(filename, ".ktx2"))) {
-    return NO;
-  }
+    const char *ext = strrchr(filename, '.');
 
-  const char *ext = strrchr(filename, '.');
+    // first only do this on albedo/diffuse textures
 
-  // first only do this on albedo/diffuse textures
+    // find matching png
+    string search = "-a";
+    search += ext;
 
-  // find matching png
-  string search = "-a";
-  search += ext;
+    auto searchPos = fullFilename.find(search);
+    bool isFound = searchPos != string::npos;
 
-  auto searchPos = fullFilename.find(search);
-  bool isFound = searchPos != string::npos;
+    if (!isFound) {
+        search = "-d";
+        search += ext;
 
-  if (!isFound) {
-    search = "-d";
-    search += ext;
+        searchPos = fullFilename.find(search);
+        isFound = searchPos != string::npos;
+    }
 
-    searchPos = fullFilename.find(search);
-    isFound = searchPos != string::npos;
-  }
+    bool isSrgb = isFound;
 
-  bool isSrgb = isFound;
+    string normalFilename;
+    bool hasNormal = false;
 
-  string normalFilename;
-  bool hasNormal = false;
+    if (isFound) {
+        normalFilename = fullFilename;
+        normalFilename = normalFilename.erase(searchPos);
+        normalFilename += "-n";
+        normalFilename += ext;
 
-  if (isFound) {
-    normalFilename = fullFilename;
-    normalFilename = normalFilename.erase(searchPos);
-    normalFilename += "-n";
-    normalFilename += ext;
+        hasNormal = [self findFilenameInFolders:normalFilename];
+    }
 
-    hasNormal = [self findFilenameInFolders:normalFilename];
-  }
+    //-------------------------------
 
-  //-------------------------------
+    KTXImage image;
+    KTXImageData imageDataKTX;
 
-  KTXImage image;
-  KTXImageData imageDataKTX;
+    KTXImage imageNormal;
+    KTXImageData imageNormalDataKTX;
 
-  KTXImage imageNormal;
-  KTXImageData imageNormalDataKTX;
+    // this requires decode and conversion to RGBA8u
+    if (!imageDataKTX.open(fullFilename.c_str(), image)) {
+        return NO;
+    }
 
-  // this requires decode and conversion to RGBA8u
-  if (!imageDataKTX.open(fullFilename.c_str(), image)) {
-    return NO;
-  }
-
-  if (hasNormal &&
-      imageNormalDataKTX.open(normalFilename.c_str(), imageNormal)) {
-    // shaders only pull from albedo + normal on these texture types
-    if (imageNormal.textureType == image.textureType &&
-        (imageNormal.textureType == MyMTLTextureType2D ||
-         imageNormal.textureType == MyMTLTextureType2DArray)) {
-      // hasNormal = true;
-    } else {
-      hasNormal = false;
-    }
-  }
-
-  if (isPNG && isSrgb) {
-    image.pixelFormat = MyMTLPixelFormatRGBA8Unorm_sRGB;
-  }
-
-  Renderer *renderer = (Renderer *)self.delegate;
-  if (![renderer loadTextureFromImage:fullFilename.c_str()
-                            timestamp:timestamp
-                                image:image
-                          imageNormal:hasNormal ? &imageNormal : nullptr
-                            isArchive:NO]) {
-    return NO;
-  }
+    if (hasNormal &&
+        imageNormalDataKTX.open(normalFilename.c_str(), imageNormal)) {
+        // shaders only pull from albedo + normal on these texture types
+        if (imageNormal.textureType == image.textureType &&
+            (imageNormal.textureType == MyMTLTextureType2D ||
+             imageNormal.textureType == MyMTLTextureType2DArray)) {
+            // hasNormal = true;
+        }
+        else {
+            hasNormal = false;
+        }
+    }
 
-  //-------------------------------
+    if (isPNG && isSrgb) {
+        image.pixelFormat = MyMTLPixelFormatRGBA8Unorm_sRGB;
+    }
 
-  // set title to filename, chop this to just file+ext, not directory
-  const char *filenameShort = strrchr(filename, '/');
-  if (filenameShort == nullptr) {
-    filenameShort = filename;
-  } else {
-    filenameShort += 1;
-  }
+    Renderer *renderer = (Renderer *)self.delegate;
+    if (![renderer loadTextureFromImage:fullFilename.c_str()
+                              timestamp:timestamp
+                                  image:image
+                            imageNormal:hasNormal ? &imageNormal : nullptr
+                              isArchive:NO]) {
+        return NO;
+    }
 
-  // was using subtitle, but that's macOS 11.0 feature.
-  string title = "kramv - ";
-  title += formatTypeName(_showSettings->originalFormat);
-  title += " - ";
-  title += filenameShort;
+    //-------------------------------
 
-  self.window.title = [NSString stringWithUTF8String:title.c_str()];
+    // set title to filename, chop this to just file+ext, not directory
+    const char *filenameShort = strrchr(filename, '/');
+    if (filenameShort == nullptr) {
+        filenameShort = filename;
+    }
+    else {
+        filenameShort += 1;
+    }
 
-  // doesn't set imageURL or update the recent document menu
+    // was using subtitle, but that's macOS 11.0 feature.
+    string title = "kramv - ";
+    title += formatTypeName(_showSettings->originalFormat);
+    title += " - ";
+    title += filenameShort;
 
-  // show the controls
-  if (_noImageLoaded) {
-    _buttonStack.hidden = NO; // show controls
-    _noImageLoaded = NO;
-  }
+    self.window.title = [NSString stringWithUTF8String:title.c_str()];
 
-  _showSettings->isArchive = false;
-  _showSettings->isFolder = true;
+    // doesn't set imageURL or update the recent document menu
 
-  // show/hide button
-  [self updateUIAfterLoad];
+    // show the controls
+    if (_noImageLoaded) {
+        _buttonStack.hidden = NO;  // show controls
+        _noImageLoaded = NO;
+    }
 
-  self.needsDisplay = YES;
-  return YES;
-}
+    _showSettings->isArchive = false;
+    _showSettings->isFolder = true;
 
-- (BOOL)loadTextureFromArchive {
-  // now lookup the filename and data at that entry
-  const auto &entry = _zip.zipEntrys()[_fileArchiveIndex];
-  const char *filename = entry.filename;
-  string fullFilename = filename;
-  double timestamp = (double)entry.modificationDate;
+    // show/hide button
+    [self updateUIAfterLoad];
 
-  // have already filtered filenames out, so this should never get hit
-  bool isPNG = isPNGFilename(filename);
+    self.needsDisplay = YES;
+    return YES;
+}
 
-  if (!(isPNG || endsWithExtension(filename, ".ktx") ||
-        endsWithExtension(filename, ".ktx2"))) {
-    return NO;
-  }
+- (BOOL)loadTextureFromArchive
+{
+    // now lookup the filename and data at that entry
+    const auto &entry = _zip.zipEntrys()[_fileArchiveIndex];
+    const char *filename = entry.filename;
+    string fullFilename = filename;
+    double timestamp = (double)entry.modificationDate;
 
-  const char *ext = strrchr(filename, '.');
+    // have already filtered filenames out, so this should never get hit
+    bool isPNG = isPNGFilename(filename);
 
-  // first only do this on albedo/diffuse textures
+    if (!(isPNG || endsWithExtension(filename, ".ktx") ||
+          endsWithExtension(filename, ".ktx2"))) {
+        return NO;
+    }
 
-  string search = "-a";
-  search += ext;
+    const char *ext = strrchr(filename, '.');
 
-  auto searchPos = fullFilename.find(search);
-  bool isFound = searchPos != string::npos;
+    // first only do this on albedo/diffuse textures
 
-  if (!isFound) {
-    search = "-d";
+    string search = "-a";
     search += ext;
 
-    searchPos = fullFilename.find(search);
-    isFound = searchPos != string::npos;
-  }
+    auto searchPos = fullFilename.find(search);
+    bool isFound = searchPos != string::npos;
 
-  bool isSrgb = isFound;
+    if (!isFound) {
+        search = "-d";
+        search += ext;
 
-  //---------------------------
+        searchPos = fullFilename.find(search);
+        isFound = searchPos != string::npos;
+    }
 
-  const uint8_t *imageData = nullptr;
-  uint64_t imageDataLength = 0;
+    bool isSrgb = isFound;
 
-  const uint8_t *imageNormalData = nullptr;
-  uint64_t imageNormalDataLength = 0;
+    //---------------------------
 
-  // search for main file - can be albedo or normal
-  if (!_zip.extractRaw(filename, &imageData, imageDataLength)) {
-    return NO;
-  }
+    const uint8_t *imageData = nullptr;
+    uint64_t imageDataLength = 0;
+
+    const uint8_t *imageNormalData = nullptr;
+    uint64_t imageNormalDataLength = 0;
+
+    // search for main file - can be albedo or normal
+    if (!_zip.extractRaw(filename, &imageData, imageDataLength)) {
+        return NO;
+    }
 
-  // search for normal map in the same archive
-  string normalFilename;
-  bool hasNormal = false;
+    // search for normal map in the same archive
+    string normalFilename;
+    bool hasNormal = false;
 
-  if (isFound) {
-    normalFilename = fullFilename;
-    normalFilename = normalFilename.erase(searchPos);
-    normalFilename += "-n";
-    normalFilename += ext;
+    if (isFound) {
+        normalFilename = fullFilename;
+        normalFilename = normalFilename.erase(searchPos);
+        normalFilename += "-n";
+        normalFilename += ext;
 
-    hasNormal = _zip.extractRaw(normalFilename.c_str(), &imageNormalData,
-                                imageNormalDataLength);
-  }
+        hasNormal = _zip.extractRaw(normalFilename.c_str(), &imageNormalData,
+                                    imageNormalDataLength);
+    }
 
-  //---------------------------
+    //---------------------------
 
-  // files in archive are just offsets into the mmap
-  // That's why we can't just pass filenames to the renderer
-  KTXImage image;
-  KTXImageData imageDataKTX;
+    // files in archive are just offsets into the mmap
+    // That's why we can't just pass filenames to the renderer
+    KTXImage image;
+    KTXImageData imageDataKTX;
 
-  KTXImage imageNormal;
-  KTXImageData imageNormalDataKTX;
+    KTXImage imageNormal;
+    KTXImageData imageNormalDataKTX;
 
-  if (!imageDataKTX.open(imageData, imageDataLength, image)) {
-    return NO;
-  }
-
-  if (hasNormal && imageNormalDataKTX.open(
-                       imageNormalData, imageNormalDataLength, imageNormal)) {
-
-    // shaders only pull from albedo + normal on these texture types
-    if (imageNormal.textureType == image.textureType &&
-        (imageNormal.textureType == MyMTLTextureType2D ||
-         imageNormal.textureType == MyMTLTextureType2DArray)) {
-      // hasNormal = true;
-    } else {
-      hasNormal = false;
-    }
-  }
-
-  if (isPNG && isSrgb) {
-    image.pixelFormat = MyMTLPixelFormatRGBA8Unorm_sRGB;
-  }
-
-  Renderer *renderer = (Renderer *)self.delegate;
-  if (![renderer loadTextureFromImage:fullFilename.c_str()
-                            timestamp:(double)timestamp
-                                image:image
-                          imageNormal:hasNormal ? &imageNormal : nullptr
-                            isArchive:YES]) {
-    return NO;
-  }
+    if (!imageDataKTX.open(imageData, imageDataLength, image)) {
+        return NO;
+    }
+
+    if (hasNormal && imageNormalDataKTX.open(
+                         imageNormalData, imageNormalDataLength, imageNormal)) {
+        // shaders only pull from albedo + normal on these texture types
+        if (imageNormal.textureType == image.textureType &&
+            (imageNormal.textureType == MyMTLTextureType2D ||
+             imageNormal.textureType == MyMTLTextureType2DArray)) {
+            // hasNormal = true;
+        }
+        else {
+            hasNormal = false;
+        }
+    }
+
+    if (isPNG && isSrgb) {
+        image.pixelFormat = MyMTLPixelFormatRGBA8Unorm_sRGB;
+    }
+
+    Renderer *renderer = (Renderer *)self.delegate;
+    if (![renderer loadTextureFromImage:fullFilename.c_str()
+                              timestamp:(double)timestamp
+                                  image:image
+                            imageNormal:hasNormal ? &imageNormal : nullptr
+                              isArchive:YES]) {
+        return NO;
+    }
 
-  //---------------------------------
+    //---------------------------------
 
-  // set title to filename, chop this to just file+ext, not directory
-  const char *filenameShort = strrchr(filename, '/');
-  if (filenameShort == nullptr) {
-    filenameShort = filename;
-  } else {
-    filenameShort += 1;
-  }
+    // set title to filename, chop this to just file+ext, not directory
+    const char *filenameShort = strrchr(filename, '/');
+    if (filenameShort == nullptr) {
+        filenameShort = filename;
+    }
+    else {
+        filenameShort += 1;
+    }
 
-  // was using subtitle, but that's macOS 11.0 feature.
-  string title = "kramv - ";
-  title += formatTypeName(_showSettings->originalFormat);
-  title += " - ";
-  title += filenameShort;
+    // was using subtitle, but that's macOS 11.0 feature.
+    string title = "kramv - ";
+    title += formatTypeName(_showSettings->originalFormat);
+    title += " - ";
+    title += filenameShort;
 
-  self.window.title = [NSString stringWithUTF8String:title.c_str()];
+    self.window.title = [NSString stringWithUTF8String:title.c_str()];
 
-  // doesn't set imageURL or update the recent document menu
+    // doesn't set imageURL or update the recent document menu
 
-  // show the controls
-  if (_noImageLoaded) {
-    _buttonStack.hidden = NO; // show controls
-    _noImageLoaded = NO;
-  }
+    // show the controls
+    if (_noImageLoaded) {
+        _buttonStack.hidden = NO;  // show controls
+        _noImageLoaded = NO;
+    }
 
-  _showSettings->isArchive = true;
-  _showSettings->isFolder = false;
+    _showSettings->isArchive = true;
+    _showSettings->isFolder = false;
 
-  // show/hide button
-  [self updateUIAfterLoad];
+    // show/hide button
+    [self updateUIAfterLoad];
 
-  self.needsDisplay = YES;
-  return YES;
+    self.needsDisplay = YES;
+    return YES;
 }
 
-- (BOOL)loadTextureFromURL:(NSURL *)url {
-  // NSLog(@"LoadTexture");
-
-  const char *filename = url.fileSystemRepresentation;
-  if (filename == nullptr) {
-    // Fixed by converting dropped urls into paths then back to a url.
-    // When file replaced the drop url is no longer valid.
-    KLOGE("kramv", "Fix this load url returning nil issue");
-    return NO;
-  }
-
-  // this likely means it's a local file directory
-  if (strchr(filename, '.') == nullptr) {
-    // make list of all file in the directory
-
-    if (!self.imageURL || (!([self.imageURL isEqualTo:url]))) {
-
-      NSDirectoryEnumerator *directoryEnumerator =
-          [[NSFileManager defaultManager]
-                         enumeratorAtURL:url
-              includingPropertiesForKeys:[NSArray array]
-                                 options:0
-                            errorHandler: // nil
-                                ^BOOL(NSURL *urlArg, NSError *error) {
-                                  macroUnusedVar(urlArg);
-                                  macroUnusedVar(error);
-
-                                  // handle error
-                                  return NO;
-                                }];
-
-      vector<string> files;
-      while (NSURL *fileOrDirectoryURL = [directoryEnumerator nextObject]) {
-        const char *name = fileOrDirectoryURL.fileSystemRepresentation;
-
-        // filter only types that are supported
-        bool isPNG = isPNGFilename(name);
-
-        if (isPNG || endsWithExtension(name, ".ktx") ||
-            endsWithExtension(name, ".ktx2")) {
-          files.push_back(name);
-        }
-      }
+- (BOOL)loadTextureFromURL:(NSURL *)url
+{
+    // NSLog(@"LoadTexture");
 
-      // don't change to this folder if it's devoid of content
-      if (files.empty()) {
+    const char *filename = url.fileSystemRepresentation;
+    if (filename == nullptr) {
+        // Fixed by converting dropped urls into paths then back to a url.
+        // When file replaced the drop url is no longer valid.
+        KLOGE("kramv", "Fix this load url returning nil issue");
         return NO;
-      }
-
-      // add it to recent docs
-      NSDocumentController *dc =
-          [NSDocumentController sharedDocumentController];
-      [dc noteNewRecentDocumentURL:url];
+    }
 
-      // sort them
+    // this likely means it's a local file directory
+    if (strchr(filename, '.') == nullptr) {
+        // make list of all file in the directory
+
+        if (!self.imageURL || (!([self.imageURL isEqualTo:url]))) {
+            NSDirectoryEnumerator *directoryEnumerator =
+                [[NSFileManager defaultManager]
+                               enumeratorAtURL:url
+                    includingPropertiesForKeys:[NSArray array]
+                                       options:0
+                                  errorHandler:  // nil
+                                      ^BOOL(NSURL *urlArg, NSError *error) {
+                                          macroUnusedVar(urlArg);
+                                          macroUnusedVar(error);
+
+                                          // handle error
+                                          return NO;
+                                      }];
+
+            vector<string> files;
+            while (NSURL *fileOrDirectoryURL = [directoryEnumerator nextObject]) {
+                const char *name = fileOrDirectoryURL.fileSystemRepresentation;
+
+                // filter only types that are supported
+                bool isPNG = isPNGFilename(name);
+
+                if (isPNG || endsWithExtension(name, ".ktx") ||
+                    endsWithExtension(name, ".ktx2")) {
+                    files.push_back(name);
+                }
+            }
+
+            // don't change to this folder if it's devoid of content
+            if (files.empty()) {
+                return NO;
+            }
+
+            // add it to recent docs
+            NSDocumentController *dc =
+                [NSDocumentController sharedDocumentController];
+            [dc noteNewRecentDocumentURL:url];
+
+            // sort them
 #if USE_EASTL
-      NAMESPACE_STL::quick_sort(files.begin(), files.end());
+            NAMESPACE_STL::quick_sort(files.begin(), files.end());
 #else
-      NAMESPACE_STL::sort(files.begin(), files.end());
+            NAMESPACE_STL::sort(files.begin(), files.end());
 #endif
-      // replicate archive logic below
+            // replicate archive logic below
 
-      self.imageURL = url;
+            self.imageURL = url;
 
-      // preserve old folder
-      string existingFilename;
-      if (_fileFolderIndex < (int32_t)_folderFiles.size())
-        existingFilename = _folderFiles[_fileFolderIndex];
-      else
-        _fileFolderIndex = 0;
+            // preserve old folder
+            string existingFilename;
+            if (_fileFolderIndex < (int32_t)_folderFiles.size())
+                existingFilename = _folderFiles[_fileFolderIndex];
+            else
+                _fileFolderIndex = 0;
 
-      _folderFiles = files;
+            _folderFiles = files;
 
-      // TODO: preserve filename before load, and restore that index, by finding
-      // that name in refreshed folder list
+            // TODO: preserve filename before load, and restore that index, by finding
+            // that name in refreshed folder list
 
-      if (!existingFilename.empty()) {
-        uint32_t index = 0;
-        for (const auto &fileIt : _folderFiles) {
-          if (fileIt == existingFilename) {
-            break;
-          }
+            if (!existingFilename.empty()) {
+                uint32_t index = 0;
+                for (const auto &fileIt : _folderFiles) {
+                    if (fileIt == existingFilename) {
+                        break;
+                    }
+                }
+
+                _fileFolderIndex = index;
+            }
         }
 
-        _fileFolderIndex = index;
-      }
-    }
+        // now load image from directory
+        _showSettings->isArchive = false;
+        _showSettings->isFolder = true;
 
-    // now load image from directory
-    _showSettings->isArchive = false;
-    _showSettings->isFolder = true;
+        // now load the file at the index
+        setErrorLogCapture(true);
 
-    // now load the file at the index
-    setErrorLogCapture(true);
+        BOOL success = [self loadTextureFromFolder];
 
-    BOOL success = [self loadTextureFromFolder];
+        if (!success) {
+            // get back error text from the failed load
+            string errorText;
+            getErrorLogCaptureText(errorText);
+            setErrorLogCapture(false);
 
-    if (!success) {
-      // get back error text from the failed load
-      string errorText;
-      getErrorLogCaptureText(errorText);
-      setErrorLogCapture(false);
+            const string &folder = _folderFiles[_fileFolderIndex];
 
-      const string &folder = _folderFiles[_fileFolderIndex];
+            // prepend filename
+            string finalErrorText;
+            append_sprintf(finalErrorText, "Could not load from folder:\n %s\n",
+                           folder.c_str());
+            finalErrorText += errorText;
 
-      // prepend filename
-      string finalErrorText;
-      append_sprintf(finalErrorText, "Could not load from folder:\n %s\n",
-                     folder.c_str());
-      finalErrorText += errorText;
+            [self setHudText:finalErrorText.c_str()];
+        }
 
-      [self setHudText:finalErrorText.c_str()];
+        setErrorLogCapture(false);
+        return success;
     }
 
-    setErrorLogCapture(false);
-    return success;
-  }
+    //-------------------
 
-  //-------------------
+    if (endsWithExtension(filename, ".metallib")) {
+        Renderer *renderer = (Renderer *)self.delegate;
+        if ([renderer hotloadShaders:filename]) {
+            NSURL *metallibFileURL =
+                [NSURL fileURLWithPath:[NSString stringWithUTF8String:filename]];
 
-  if (endsWithExtension(filename, ".metallib")) {
+            // add to recent docs, so can reload quickly
+            NSDocumentController *dc =
+                [NSDocumentController sharedDocumentController];
+            [dc noteNewRecentDocumentURL:metallibFileURL];
 
-    Renderer *renderer = (Renderer *)self.delegate;
-    if ([renderer hotloadShaders:filename]) {
-      NSURL *metallibFileURL =
-          [NSURL fileURLWithPath:[NSString stringWithUTF8String:filename]];
+            return YES;
+        }
+        return NO;
+    }
 
-      // add to recent docs, so can reload quickly
-      NSDocumentController *dc =
-          [NSDocumentController sharedDocumentController];
-      [dc noteNewRecentDocumentURL:metallibFileURL];
+    // file is not a supported extension
+    if (!(endsWithExtension(filename, ".zip") || isPNGFilename(filename) ||
+          endsWithExtension(filename, ".ktx") ||
+          endsWithExtension(filename, ".ktx2"))) {
+        string errorText =
+            "Unsupported file extension, must be .zip, .png, .ktx, ktx2\n";
 
-      return YES;
-    }
-    return NO;
-  }
+        string finalErrorText;
+        append_sprintf(finalErrorText, "Could not load from file:\n %s\n",
+                       filename);
+        finalErrorText += errorText;
 
-  // file is not a supported extension
-  if (!(endsWithExtension(filename, ".zip") || isPNGFilename(filename) ||
-        endsWithExtension(filename, ".ktx") ||
-        endsWithExtension(filename, ".ktx2"))) {
-    string errorText =
-        "Unsupported file extension, must be .zip, .png, .ktx, ktx2\n";
+        [self setHudText:finalErrorText.c_str()];
+        return NO;
+    }
 
-    string finalErrorText;
-    append_sprintf(finalErrorText, "Could not load from file:\n %s\n",
-                   filename);
-    finalErrorText += errorText;
+    //-------------------
+
+    if (endsWithExtension(filename, ".zip")) {
+        auto archiveTimestamp = FileHelper::modificationTimestamp(filename);
+
+        if (!self.imageURL || (!([self.imageURL isEqualTo:url])) ||
+            (self.lastArchiveTimestamp != archiveTimestamp)) {
+            // copy this out before it's replaced
+            string existingFilename;
+            if (_fileArchiveIndex < (int32_t)_zip.zipEntrys().size())
+                existingFilename = _zip.zipEntrys()[_fileArchiveIndex].filename;
+            else
+                _fileArchiveIndex = 0;
+
+            BOOL isArchiveLoaded = [self loadArchive:filename];
+            if (!isArchiveLoaded) {
+                return NO;
+            }
+
+            // store the archive url
+            self.imageURL = url;
+            self.lastArchiveTimestamp = archiveTimestamp;
+
+            // add it to recent docs
+            NSDocumentController *dc =
+                [NSDocumentController sharedDocumentController];
+            [dc noteNewRecentDocumentURL:url];
+
+            // now reload the filename if needed
+            if (!existingFilename.empty()) {
+                const ZipEntry *formerEntry = _zip.zipEntry(existingFilename.c_str());
+                if (formerEntry) {
+                    // lookup the index in the remapIndices table
+                    _fileArchiveIndex =
+                        (uintptr_t)(formerEntry - &_zip.zipEntrys().front());
+                }
+                else {
+                    _fileArchiveIndex = 0;
+                }
+            }
+        }
 
-    [self setHudText:finalErrorText.c_str()];
-    return NO;
-  }
+        setErrorLogCapture(true);
 
-  //-------------------
+        BOOL success = [self loadTextureFromArchive];
 
-  if (endsWithExtension(filename, ".zip")) {
-    auto archiveTimestamp = FileHelper::modificationTimestamp(filename);
+        if (!success) {
+            // get back error text from the failed load
+            string errorText;
+            getErrorLogCaptureText(errorText);
+            setErrorLogCapture(false);
 
-    if (!self.imageURL || (!([self.imageURL isEqualTo:url])) ||
-        (self.lastArchiveTimestamp != archiveTimestamp)) {
+            const auto &entry = _zip.zipEntrys()[_fileArchiveIndex];
+            const char *archiveFilename = entry.filename;
 
-      // copy this out before it's replaced
-      string existingFilename;
-      if (_fileArchiveIndex < (int32_t)_zip.zipEntrys().size())
-        existingFilename = _zip.zipEntrys()[_fileArchiveIndex].filename;
-      else
-        _fileArchiveIndex = 0;
+            // prepend filename
+            string finalErrorText;
+            append_sprintf(finalErrorText, "Could not load from archive:\n %s\n",
+                           archiveFilename);
+            finalErrorText += errorText;
 
-      BOOL isArchiveLoaded = [self loadArchive:filename];
-      if (!isArchiveLoaded) {
-        return NO;
-      }
-
-      // store the archive url
-      self.imageURL = url;
-      self.lastArchiveTimestamp = archiveTimestamp;
-
-      // add it to recent docs
-      NSDocumentController *dc =
-          [NSDocumentController sharedDocumentController];
-      [dc noteNewRecentDocumentURL:url];
-
-      // now reload the filename if needed
-      if (!existingFilename.empty()) {
-        const ZipEntry *formerEntry = _zip.zipEntry(existingFilename.c_str());
-        if (formerEntry) {
-          // lookup the index in the remapIndices table
-          _fileArchiveIndex =
-              (uintptr_t)(formerEntry - &_zip.zipEntrys().front());
-        } else {
-          _fileArchiveIndex = 0;
+            [self setHudText:finalErrorText.c_str()];
         }
-      }
+
+        setErrorLogCapture(false);
+        return success;
     }
 
+    //-------------------
+
+    Renderer *renderer = (Renderer *)self.delegate;
     setErrorLogCapture(true);
 
-    BOOL success = [self loadTextureFromArchive];
+    BOOL success = [renderer loadTexture:url];
 
     if (!success) {
-      // get back error text from the failed load
-      string errorText;
-      getErrorLogCaptureText(errorText);
-      setErrorLogCapture(false);
+        // get back error text from the failed load
+        string errorText;
+        getErrorLogCaptureText(errorText);
+        setErrorLogCapture(false);
 
-      const auto &entry = _zip.zipEntrys()[_fileArchiveIndex];
-      const char *archiveFilename = entry.filename;
+        // prepend filename
+        string finalErrorText;
+        append_sprintf(finalErrorText, "Could not load from file\n %s\n", filename);
+        finalErrorText += errorText;
 
-      // prepend filename
-      string finalErrorText;
-      append_sprintf(finalErrorText, "Could not load from archive:\n %s\n",
-                     archiveFilename);
-      finalErrorText += errorText;
-
-      [self setHudText:finalErrorText.c_str()];
+        [self setHudText:finalErrorText.c_str()];
+        return NO;
     }
-
-    setErrorLogCapture(false);
-    return success;
-  }
-
-  //-------------------
-
-  Renderer *renderer = (Renderer *)self.delegate;
-  setErrorLogCapture(true);
-
-  BOOL success = [renderer loadTexture:url];
-
-  if (!success) {
-    // get back error text from the failed load
-    string errorText;
-    getErrorLogCaptureText(errorText);
     setErrorLogCapture(false);
 
-    // prepend filename
-    string finalErrorText;
-    append_sprintf(finalErrorText, "Could not load from file\n %s\n", filename);
-    finalErrorText += errorText;
-
-    [self setHudText:finalErrorText.c_str()];
-    return NO;
-  }
-  setErrorLogCapture(false);
-
-  // set title to filename, chop this to just file+ext, not directory
-  const char *filenameShort = strrchr(filename, '/');
-  if (filenameShort == nullptr) {
-    filenameShort = filename;
-  } else {
-    filenameShort += 1;
-  }
+    // set title to filename, chop this to just file+ext, not directory
+    const char *filenameShort = strrchr(filename, '/');
+    if (filenameShort == nullptr) {
+        filenameShort = filename;
+    }
+    else {
+        filenameShort += 1;
+    }
 
-  // was using subtitle, but that's macOS 11.0 feature.
-  string title = "kramv - ";
-  title += formatTypeName(_showSettings->originalFormat);
-  title += " - ";
-  title += filenameShort;
+    // was using subtitle, but that's macOS 11.0 feature.
+    string title = "kramv - ";
+    title += formatTypeName(_showSettings->originalFormat);
+    title += " - ";
+    title += filenameShort;
 
-  self.window.title = [NSString stringWithUTF8String:title.c_str()];
+    self.window.title = [NSString stringWithUTF8String:title.c_str()];
 
-  // topmost entry will be the recently opened document
-  // some entries may go stale if directories change, not sure who validates the
-  // list
+    // topmost entry will be the recently opened document
+    // some entries may go stale if directories change, not sure who validates the
+    // list
 
-  // add to recent document menu
-  NSDocumentController *dc = [NSDocumentController sharedDocumentController];
-  [dc noteNewRecentDocumentURL:url];
+    // add to recent document menu
+    NSDocumentController *dc = [NSDocumentController sharedDocumentController];
+    [dc noteNewRecentDocumentURL:url];
 
-  self.imageURL = url;
+    self.imageURL = url;
 
-  // show the controls
-  if (_noImageLoaded) {
-    _buttonStack.hidden = NO; // show controls
-    _noImageLoaded = NO;
-  }
+    // show the controls
+    if (_noImageLoaded) {
+        _buttonStack.hidden = NO;  // show controls
+        _noImageLoaded = NO;
+    }
 
-  _showSettings->isArchive = false;
-  _showSettings->isFolder = false;
+    _showSettings->isArchive = false;
+    _showSettings->isFolder = false;
 
-  // show/hide button
-  [self updateUIAfterLoad];
+    // show/hide button
+    [self updateUIAfterLoad];
 
-  self.needsDisplay = YES;
-  return YES;
+    self.needsDisplay = YES;
+    return YES;
 }
 
-- (void)concludeDragOperation:(id)sender {
-  // did setNeedsDisplay, but already doing that in loadTextureFromURL
+- (void)concludeDragOperation:(id)sender
+{
+    // did setNeedsDisplay, but already doing that in loadTextureFromURL
 }
 
 // this doesn't seem to enable New.  Was able to get "Open" to highlight by
@@ -2889,8 +2954,9 @@ - (IBAction)newDocument {
 */
 #endif
 
-- (BOOL)acceptsFirstResponder {
-  return YES;
+- (BOOL)acceptsFirstResponder
+{
+    return YES;
 }
 
 @end
@@ -2903,64 +2969,66 @@ @interface GameViewController : NSViewController
 @end
 
 @implementation GameViewController {
-  MyMTKView *_view;
+    MyMTKView *_view;
 
-  Renderer *_renderer;
+    Renderer *_renderer;
 
-  NSTrackingArea *_trackingArea;
+    NSTrackingArea *_trackingArea;
 }
 
-- (void)viewDidLoad {
-  [super viewDidLoad];
-
-  _view = (MyMTKView *)self.view;
-
-  // have to disable this since reading back from textures
-  // that slows the blit to the screen
-  _view.framebufferOnly = NO;
-
-  _view.device = MTLCreateSystemDefaultDevice();
-
-  if (!_view.device) {
-    return;
-  }
-
-  _renderer = [[Renderer alloc] initWithMetalKitView:_view
-                                            settings:_view.showSettings];
+- (void)viewDidLoad
+{
+    [super viewDidLoad];
 
-  // original sample code was sending down _view.bounds.size, but need
-  // drawableSize this was causing all sorts of inconsistencies
-  [_renderer mtkView:_view drawableSizeWillChange:_view.drawableSize];
+    _view = (MyMTKView *)self.view;
 
-  _view.delegate = _renderer;
+    // have to disable this since reading back from textures
+    // that slows the blit to the screen
+    _view.framebufferOnly = NO;
 
-  // https://developer.apple.com/library/archive/documentation/Cocoa/Conceptual/EventOverview/TrackingAreaObjects/TrackingAreaObjects.html
-  // this is better than requesting mousemoved events, they're only sent when
-  // cursor is inside
-  _trackingArea = [[NSTrackingArea alloc]
-      initWithRect:_view.bounds
-           options:(NSTrackingMouseEnteredAndExited | NSTrackingMouseMoved |
+    _view.device = MTLCreateSystemDefaultDevice();
 
-                    // NSTrackingActiveWhenFirstResponder
-                    NSTrackingActiveInActiveApp
-                    // NSTrackingActiveInKeyWindow
-                    )
-             owner:_view
-          userInfo:nil];
-  [_view addTrackingArea:_trackingArea];
+    if (!_view.device) {
+        return;
+    }
 
-  // programmatically add some buttons
-  // think limited to 11 viewws before they must be wrapepd in a container.
-  // That's how SwiftUI was.
+    _renderer = [[Renderer alloc] initWithMetalKitView:_view
+                                              settings:_view.showSettings];
+
+    // original sample code was sending down _view.bounds.size, but need
+    // drawableSize this was causing all sorts of inconsistencies
+    [_renderer mtkView:_view drawableSizeWillChange:_view.drawableSize];
+
+    _view.delegate = _renderer;
+
+    // https://developer.apple.com/library/archive/documentation/Cocoa/Conceptual/EventOverview/TrackingAreaObjects/TrackingAreaObjects.html
+    // this is better than requesting mousemoved events, they're only sent when
+    // cursor is inside
+    _trackingArea = [[NSTrackingArea alloc]
+        initWithRect:_view.bounds
+             options:(NSTrackingMouseEnteredAndExited | NSTrackingMouseMoved |
+
+                      // NSTrackingActiveWhenFirstResponder
+                      NSTrackingActiveInActiveApp
+                      // NSTrackingActiveInKeyWindow
+                      )
+               owner:_view
+            userInfo:nil];
+    [_view addTrackingArea:_trackingArea];
+
+    // programmatically add some buttons
+    // think limited to 11 viewws before they must be wrapepd in a container.
+    // That's how SwiftUI was.
 }
 
 @end
 
 //-------------
 
-int main(int argc, const char *argv[]) {
-  @autoreleasepool {
-    // Setup code that might create autoreleased objects goes here.
-  }
-  return NSApplicationMain(argc, argv);
+int main(int argc, const char *argv[])
+{
+    @autoreleasepool {
+        // Setup code that might create autoreleased objects goes here.
+    }
+    return NSApplicationMain(argc, argv);
 }
diff --git a/libkram/kram/KramZipHelper.cpp b/libkram/kram/KramZipHelper.cpp
index 47987643..d37d3c48 100644
--- a/libkram/kram/KramZipHelper.cpp
+++ b/libkram/kram/KramZipHelper.cpp
@@ -28,7 +28,7 @@ ZipHelper::~ZipHelper()
 }
 
 bool ZipHelper::openForRead(const uint8_t* zipData_, uint64_t zipDataSize)
-{   // const char* filename) {
+{  // const char* filename) {
     //    mmap = std::make_unique<MmapHelper>();
     //    if (!mmap->openForRead(filename)) {
     //        close();
diff --git a/scripts/pre-commit b/scripts/pre-commit
index 606d7428..594af076 100755
--- a/scripts/pre-commit
+++ b/scripts/pre-commit
@@ -12,7 +12,7 @@ format_file() {
   fi
 }
 
-for file in `git diff-index --cached --name-only HEAD | grep -iE '\Kram*.(cpp|h|mm)$' ` ; do
+for file in `git diff-index --cached --name-only HEAD | grep -iE '\Kram*.(cpp|h|m|mm)$' ` ; do
   format_file "${file}"
 done
 

From 1031035bef75f8bd101cfe6ec55c9f4af3ec8ae4 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 22 Aug 2021 13:29:41 -0700
Subject: [PATCH 169/901] Scripts - replace realpath use

---
 scripts/cibuild.sh | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/scripts/cibuild.sh b/scripts/cibuild.sh
index 363a7ceb..8c0cd4b4 100755
--- a/scripts/cibuild.sh
+++ b/scripts/cibuild.sh
@@ -37,7 +37,13 @@ set -e
 mkdir -p bin
 
 # need absolute path
-binPath=$(realpath bin)
+# but macos left out realpath, ugh
+# https://stackoverflow.com/questions/3572030/bash-script-absolute-path-with-os-x
+if [[ $buildType == macos ]]; then
+	binPath=$( cd "$(dirname "bin")" ; pwd -P )
+else
+	binPath=$(realpath bin)
+fi
 
 mkdir -p build
 

From 2bd1e36f70adcfb832289483561e785977a39f13 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 22 Aug 2021 13:37:48 -0700
Subject: [PATCH 170/901] scripts - fix CMake all build folder names

---
 CMakeLists.txt | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index bac67552..19a82556 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -66,11 +66,11 @@ set(CMAKE_CONFIGURATION_TYPES "Debug;Release")
 set(CMAKE_BUILD_TYPE Release)
 
 if (BUILD_MAC)
-    set(CMAKE_DEFAULT_STARTUP_PROJECT "libkram")
+    set(CMAKE_DEFAULT_STARTUP_PROJECT "kramc")
 elseif (BUILD_IOS)
-    set(CMAKE_DEFAULT_STARTUP_PROJECT "kram")
+    set(CMAKE_DEFAULT_STARTUP_PROJECT "kramc")
 elseif (BUILD_WIN)
-    set(CMAKE_DEFAULT_STARTUP_PROJECT "kram")
+    set(CMAKE_DEFAULT_STARTUP_PROJECT "kramc")
 endif()
 
 
@@ -91,7 +91,7 @@ endif()
 add_subdirectory(libkram)
 
 # the CLI app for Mac/Win that can build content for other platforms, uses libkram
-add_subdirectory(kram)
+add_subdirectory(kramc)
 
 # the viewer is only written for macOS Intel/ARM currently, uses libkram
 if (BUILD_MAC)

From 5941e0e38ff89fed8f1b9146af29a628ea4f83e1 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 22 Aug 2021 20:42:43 -0700
Subject: [PATCH 171/901] kram - enable eastl across the libraries and apps

Had to rename EASTL/string.h -> EASTL/basic_string.h.   clang couldn't tell string.h from that, and it was causing compile failures.
---
 build2/kram.xcodeproj/project.pbxproj         | 1846 ++++++++++++++++-
 build2/kramv.xcodeproj/project.pbxproj        |   20 +-
 .../EASTL/{string.h => basic_string.h}        |    0
 libkram/eastl/include/EASTL/fixed_string.h    |    2 +-
 libkram/eastl/include/EASTL/fixed_substring.h |    2 +-
 .../include/EASTL/internal/char_traits.h      |    2 +-
 libkram/eastl/include/EASTL/string_hash_map.h |    2 +-
 libkram/eastl/include/EASTL/string_map.h      |    2 +-
 libkram/eastl/source/assert.cpp               |    2 +-
 libkram/eastl/source/string.cpp               |    2 +-
 libkram/kram/KramConfig.h                     |    7 +-
 libkram/kram/TaskSystem.h                     |    2 +-
 libkram/transcoder/basisu.h                   |    6 +-
 libkram/transcoder/basisu_containers.h        |    2 +-
 .../basisu_global_selector_palette.h          |    2 +-
 15 files changed, 1841 insertions(+), 58 deletions(-)
 rename libkram/eastl/include/EASTL/{string.h => basic_string.h} (100%)

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index 2b1d5538..be4eeb91 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -299,6 +299,556 @@
 		706EF1DD26D166C5001C950E /* libate.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF12A26D159F9001C950E /* libate.tbd */; };
 		706EF26426D17DCC001C950E /* ateencoder.mm in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDF926D1583E001C950E /* ateencoder.mm */; };
 		706EFC2426D1C39B001C950E /* ateencoder.mm in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDF926D1583E001C950E /* ateencoder.mm */; };
+		706EFD6126D3473F001C950E /* eaunits.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4126D3473F001C950E /* eaunits.h */; };
+		706EFD6226D3473F001C950E /* eaunits.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4126D3473F001C950E /* eaunits.h */; };
+		706EFD6326D3473F001C950E /* version.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4226D3473F001C950E /* version.h */; };
+		706EFD6426D3473F001C950E /* version.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4226D3473F001C950E /* version.h */; };
+		706EFD6526D3473F001C950E /* eacompilertraits.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4426D3473F001C950E /* eacompilertraits.h */; };
+		706EFD6626D3473F001C950E /* eacompilertraits.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4426D3473F001C950E /* eacompilertraits.h */; };
+		706EFD6726D3473F001C950E /* eacompiler.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4526D3473F001C950E /* eacompiler.h */; };
+		706EFD6826D3473F001C950E /* eacompiler.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4526D3473F001C950E /* eacompiler.h */; };
+		706EFD6926D3473F001C950E /* eaplatform.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4626D3473F001C950E /* eaplatform.h */; };
+		706EFD6A26D3473F001C950E /* eaplatform.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4626D3473F001C950E /* eaplatform.h */; };
+		706EFD6B26D3473F001C950E /* eastdarg.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4726D3473F001C950E /* eastdarg.h */; };
+		706EFD6C26D3473F001C950E /* eastdarg.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4726D3473F001C950E /* eastdarg.h */; };
+		706EFD6D26D3473F001C950E /* eahave.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4826D3473F001C950E /* eahave.h */; };
+		706EFD6E26D3473F001C950E /* eahave.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4826D3473F001C950E /* eahave.h */; };
+		706EFD6F26D3473F001C950E /* nullptr.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4926D3473F001C950E /* nullptr.h */; };
+		706EFD7026D3473F001C950E /* nullptr.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4926D3473F001C950E /* nullptr.h */; };
+		706EFD7126D3473F001C950E /* int128.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4A26D3473F001C950E /* int128.h */; };
+		706EFD7226D3473F001C950E /* int128.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4A26D3473F001C950E /* int128.h */; };
+		706EFD7326D3473F001C950E /* earesult.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4B26D3473F001C950E /* earesult.h */; };
+		706EFD7426D3473F001C950E /* earesult.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4B26D3473F001C950E /* earesult.h */; };
+		706EFD7526D3473F001C950E /* eabase.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4C26D3473F001C950E /* eabase.h */; };
+		706EFD7626D3473F001C950E /* eabase.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4C26D3473F001C950E /* eabase.h */; };
+		706EFD7726D3473F001C950E /* scoped_ptr.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4E26D3473F001C950E /* scoped_ptr.h */; };
+		706EFD7826D3473F001C950E /* scoped_ptr.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4E26D3473F001C950E /* scoped_ptr.h */; };
+		706EFD7926D3473F001C950E /* unordered_map.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4F26D3473F001C950E /* unordered_map.h */; };
+		706EFD7A26D3473F001C950E /* unordered_map.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4F26D3473F001C950E /* unordered_map.h */; };
+		706EFD7B26D3473F001C950E /* fixed_hash_set.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5026D3473F001C950E /* fixed_hash_set.h */; };
+		706EFD7C26D3473F001C950E /* fixed_hash_set.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5026D3473F001C950E /* fixed_hash_set.h */; };
+		706EFD7D26D3473F001C950E /* bitset.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5126D3473F001C950E /* bitset.h */; };
+		706EFD7E26D3473F001C950E /* bitset.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5126D3473F001C950E /* bitset.h */; };
+		706EFD7F26D3473F001C950E /* tuple_vector.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5326D3473F001C950E /* tuple_vector.h */; };
+		706EFD8026D3473F001C950E /* tuple_vector.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5326D3473F001C950E /* tuple_vector.h */; };
+		706EFD8126D3473F001C950E /* lru_cache.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5426D3473F001C950E /* lru_cache.h */; };
+		706EFD8226D3473F001C950E /* lru_cache.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5426D3473F001C950E /* lru_cache.h */; };
+		706EFD8326D3473F001C950E /* ring_buffer.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5526D3473F001C950E /* ring_buffer.h */; };
+		706EFD8426D3473F001C950E /* ring_buffer.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5526D3473F001C950E /* ring_buffer.h */; };
+		706EFD8526D3473F001C950E /* fixed_tuple_vector.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5626D3473F001C950E /* fixed_tuple_vector.h */; };
+		706EFD8626D3473F001C950E /* fixed_tuple_vector.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5626D3473F001C950E /* fixed_tuple_vector.h */; };
+		706EFD8726D3473F001C950E /* sort_extra.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5726D3473F001C950E /* sort_extra.h */; };
+		706EFD8826D3473F001C950E /* sort_extra.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5726D3473F001C950E /* sort_extra.h */; };
+		706EFD8926D3473F001C950E /* intrusive_slist.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5826D3473F001C950E /* intrusive_slist.h */; };
+		706EFD8A26D3473F001C950E /* intrusive_slist.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5826D3473F001C950E /* intrusive_slist.h */; };
+		706EFD8B26D3473F001C950E /* list_map.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5926D3473F001C950E /* list_map.h */; };
+		706EFD8C26D3473F001C950E /* list_map.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5926D3473F001C950E /* list_map.h */; };
+		706EFD8D26D3473F001C950E /* call_traits.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5A26D3473F001C950E /* call_traits.h */; };
+		706EFD8E26D3473F001C950E /* call_traits.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5A26D3473F001C950E /* call_traits.h */; };
+		706EFD8F26D3473F001C950E /* compressed_pair.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5B26D3473F001C950E /* compressed_pair.h */; };
+		706EFD9026D3473F001C950E /* compressed_pair.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5B26D3473F001C950E /* compressed_pair.h */; };
+		706EFD9126D3473F001C950E /* intrusive_sdlist.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5C26D3473F001C950E /* intrusive_sdlist.h */; };
+		706EFD9226D3473F001C950E /* intrusive_sdlist.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5C26D3473F001C950E /* intrusive_sdlist.h */; };
+		706EFD9326D3473F001C950E /* adaptors.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5D26D3473F001C950E /* adaptors.h */; };
+		706EFD9426D3473F001C950E /* adaptors.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5D26D3473F001C950E /* adaptors.h */; };
+		706EFD9526D3473F001C950E /* fixed_ring_buffer.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5E26D3473F001C950E /* fixed_ring_buffer.h */; };
+		706EFD9626D3473F001C950E /* fixed_ring_buffer.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5E26D3473F001C950E /* fixed_ring_buffer.h */; };
+		706EFD9726D3473F001C950E /* intrusive_ptr.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5F26D3473F001C950E /* intrusive_ptr.h */; };
+		706EFD9826D3473F001C950E /* intrusive_ptr.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5F26D3473F001C950E /* intrusive_ptr.h */; };
+		706EFD9926D3473F001C950E /* fixed_function.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6026D3473F001C950E /* fixed_function.h */; };
+		706EFD9A26D3473F001C950E /* fixed_function.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6026D3473F001C950E /* fixed_function.h */; };
+		706EFD9B26D3473F001C950E /* safe_ptr.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6126D3473F001C950E /* safe_ptr.h */; };
+		706EFD9C26D3473F001C950E /* safe_ptr.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6126D3473F001C950E /* safe_ptr.h */; };
+		706EFD9D26D3473F001C950E /* bitvector.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6226D3473F001C950E /* bitvector.h */; };
+		706EFD9E26D3473F001C950E /* bitvector.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6226D3473F001C950E /* bitvector.h */; };
+		706EFD9F26D3473F001C950E /* segmented_vector.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6326D3473F001C950E /* segmented_vector.h */; };
+		706EFDA026D3473F001C950E /* segmented_vector.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6326D3473F001C950E /* segmented_vector.h */; };
+		706EFDA126D3473F001C950E /* weak_ptr.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6426D3473F001C950E /* weak_ptr.h */; };
+		706EFDA226D3473F001C950E /* weak_ptr.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6426D3473F001C950E /* weak_ptr.h */; };
+		706EFDA326D3473F001C950E /* hash_map.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6526D3473F001C950E /* hash_map.h */; };
+		706EFDA426D3473F001C950E /* hash_map.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6526D3473F001C950E /* hash_map.h */; };
+		706EFDA526D3473F001C950E /* algorithm.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6626D3473F001C950E /* algorithm.h */; };
+		706EFDA626D3473F001C950E /* algorithm.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6626D3473F001C950E /* algorithm.h */; };
+		706EFDA726D3473F001C950E /* allocator_malloc.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6726D3473F001C950E /* allocator_malloc.h */; };
+		706EFDA826D3473F001C950E /* allocator_malloc.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6726D3473F001C950E /* allocator_malloc.h */; };
+		706EFDA926D3473F001C950E /* map.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6826D3473F001C950E /* map.h */; };
+		706EFDAA26D3473F001C950E /* map.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6826D3473F001C950E /* map.h */; };
+		706EFDAB26D3473F001C950E /* initializer_list.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6926D3473F001C950E /* initializer_list.h */; };
+		706EFDAC26D3473F001C950E /* initializer_list.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6926D3473F001C950E /* initializer_list.h */; };
+		706EFDAD26D3473F001C950E /* numeric.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6A26D3473F001C950E /* numeric.h */; };
+		706EFDAE26D3473F001C950E /* numeric.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6A26D3473F001C950E /* numeric.h */; };
+		706EFDAF26D3473F001C950E /* chrono.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6B26D3473F001C950E /* chrono.h */; };
+		706EFDB026D3473F001C950E /* chrono.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6B26D3473F001C950E /* chrono.h */; };
+		706EFDB126D3473F001C950E /* fixed_set.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6C26D3473F001C950E /* fixed_set.h */; };
+		706EFDB226D3473F001C950E /* fixed_set.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6C26D3473F001C950E /* fixed_set.h */; };
+		706EFDB326D3473F001C950E /* linked_ptr.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6D26D3473F001C950E /* linked_ptr.h */; };
+		706EFDB426D3473F001C950E /* linked_ptr.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6D26D3473F001C950E /* linked_ptr.h */; };
+		706EFDB526D3473F001C950E /* fixed_slist.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6E26D3473F001C950E /* fixed_slist.h */; };
+		706EFDB626D3473F001C950E /* fixed_slist.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6E26D3473F001C950E /* fixed_slist.h */; };
+		706EFDB726D3473F001C950E /* type_compound.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7026D3473F001C950E /* type_compound.h */; };
+		706EFDB826D3473F001C950E /* type_compound.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7026D3473F001C950E /* type_compound.h */; };
+		706EFDB926D3473F001C950E /* move_help.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7126D3473F001C950E /* move_help.h */; };
+		706EFDBA26D3473F001C950E /* move_help.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7126D3473F001C950E /* move_help.h */; };
+		706EFDBB26D3473F001C950E /* enable_shared.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7226D3473F001C950E /* enable_shared.h */; };
+		706EFDBC26D3473F001C950E /* enable_shared.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7226D3473F001C950E /* enable_shared.h */; };
+		706EFDBD26D3473F001C950E /* function_detail.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7326D3473F001C950E /* function_detail.h */; };
+		706EFDBE26D3473F001C950E /* function_detail.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7326D3473F001C950E /* function_detail.h */; };
+		706EFDBF26D3473F001C950E /* piecewise_construct_t.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7426D3473F001C950E /* piecewise_construct_t.h */; };
+		706EFDC026D3473F001C950E /* piecewise_construct_t.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7426D3473F001C950E /* piecewise_construct_t.h */; };
+		706EFDC126D3473F001C950E /* generic_iterator.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7526D3473F001C950E /* generic_iterator.h */; };
+		706EFDC226D3473F001C950E /* generic_iterator.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7526D3473F001C950E /* generic_iterator.h */; };
+		706EFDC326D3473F001C950E /* type_fundamental.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7626D3473F001C950E /* type_fundamental.h */; };
+		706EFDC426D3473F001C950E /* type_fundamental.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7626D3473F001C950E /* type_fundamental.h */; };
+		706EFDC526D3473F001C950E /* char_traits.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7726D3473F001C950E /* char_traits.h */; };
+		706EFDC626D3473F001C950E /* char_traits.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7726D3473F001C950E /* char_traits.h */; };
+		706EFDC726D3473F001C950E /* copy_help.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7826D3473F001C950E /* copy_help.h */; };
+		706EFDC826D3473F001C950E /* copy_help.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7826D3473F001C950E /* copy_help.h */; };
+		706EFDC926D3473F001C950E /* fill_help.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7926D3473F001C950E /* fill_help.h */; };
+		706EFDCA26D3473F001C950E /* fill_help.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7926D3473F001C950E /* fill_help.h */; };
+		706EFDCB26D3473F001C950E /* type_properties.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7A26D3473F001C950E /* type_properties.h */; };
+		706EFDCC26D3473F001C950E /* type_properties.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7A26D3473F001C950E /* type_properties.h */; };
+		706EFDCD26D3473F001C950E /* fixed_pool.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7B26D3473F001C950E /* fixed_pool.h */; };
+		706EFDCE26D3473F001C950E /* fixed_pool.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7B26D3473F001C950E /* fixed_pool.h */; };
+		706EFDCF26D3473F001C950E /* config.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7C26D3473F001C950E /* config.h */; };
+		706EFDD026D3473F001C950E /* config.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7C26D3473F001C950E /* config.h */; };
+		706EFDD126D3473F001C950E /* in_place_t.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7D26D3473F001C950E /* in_place_t.h */; };
+		706EFDD226D3473F001C950E /* in_place_t.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7D26D3473F001C950E /* in_place_t.h */; };
+		706EFDD326D3473F001C950E /* hashtable.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7E26D3473F001C950E /* hashtable.h */; };
+		706EFDD426D3473F001C950E /* hashtable.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7E26D3473F001C950E /* hashtable.h */; };
+		706EFDD526D3473F001C950E /* type_pod.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7F26D3473F001C950E /* type_pod.h */; };
+		706EFDD626D3473F001C950E /* type_pod.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7F26D3473F001C950E /* type_pod.h */; };
+		706EFDD726D3473F001C950E /* pair_fwd_decls.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8026D3473F001C950E /* pair_fwd_decls.h */; };
+		706EFDD826D3473F001C950E /* pair_fwd_decls.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8026D3473F001C950E /* pair_fwd_decls.h */; };
+		706EFDD926D3473F001C950E /* type_transformations.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8126D3473F001C950E /* type_transformations.h */; };
+		706EFDDA26D3473F001C950E /* type_transformations.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8126D3473F001C950E /* type_transformations.h */; };
+		706EFDDB26D3473F001C950E /* integer_sequence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8226D3473F001C950E /* integer_sequence.h */; };
+		706EFDDC26D3473F001C950E /* integer_sequence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8226D3473F001C950E /* integer_sequence.h */; };
+		706EFDDD26D3473F001C950E /* smart_ptr.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8326D3473F001C950E /* smart_ptr.h */; };
+		706EFDDE26D3473F001C950E /* smart_ptr.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8326D3473F001C950E /* smart_ptr.h */; };
+		706EFDDF26D3473F001C950E /* tuple_fwd_decls.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8426D3473F001C950E /* tuple_fwd_decls.h */; };
+		706EFDE026D3473F001C950E /* tuple_fwd_decls.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8426D3473F001C950E /* tuple_fwd_decls.h */; };
+		706EFDE126D3473F001C950E /* atomic_pointer.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8626D3473F001C950E /* atomic_pointer.h */; };
+		706EFDE226D3473F001C950E /* atomic_pointer.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8626D3473F001C950E /* atomic_pointer.h */; };
+		706EFDE326D3473F001C950E /* atomic_integral.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8726D3473F001C950E /* atomic_integral.h */; };
+		706EFDE426D3473F001C950E /* atomic_integral.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8726D3473F001C950E /* atomic_integral.h */; };
+		706EFDE526D3473F001C950E /* atomic_size_aligned.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8826D3473F001C950E /* atomic_size_aligned.h */; };
+		706EFDE626D3473F001C950E /* atomic_size_aligned.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8826D3473F001C950E /* atomic_size_aligned.h */; };
+		706EFDE726D3473F001C950E /* atomic_push_compiler_options.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8926D3473F001C950E /* atomic_push_compiler_options.h */; };
+		706EFDE826D3473F001C950E /* atomic_push_compiler_options.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8926D3473F001C950E /* atomic_push_compiler_options.h */; };
+		706EFDE926D3473F001C950E /* atomic_macros_fetch_xor.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8B26D3473F001C950E /* atomic_macros_fetch_xor.h */; };
+		706EFDEA26D3473F001C950E /* atomic_macros_fetch_xor.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8B26D3473F001C950E /* atomic_macros_fetch_xor.h */; };
+		706EFDEB26D3473F001C950E /* atomic_macros_cpu_pause.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8C26D3473F001C950E /* atomic_macros_cpu_pause.h */; };
+		706EFDEC26D3473F001C950E /* atomic_macros_cpu_pause.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8C26D3473F001C950E /* atomic_macros_cpu_pause.h */; };
+		706EFDED26D3473F001C950E /* atomic_macros_load.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8D26D3473F001C950E /* atomic_macros_load.h */; };
+		706EFDEE26D3473F001C950E /* atomic_macros_load.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8D26D3473F001C950E /* atomic_macros_load.h */; };
+		706EFDEF26D3473F001C950E /* atomic_macros_store.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8E26D3473F001C950E /* atomic_macros_store.h */; };
+		706EFDF026D3473F001C950E /* atomic_macros_store.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8E26D3473F001C950E /* atomic_macros_store.h */; };
+		706EFDF126D3473F001C950E /* atomic_macros_compiler_barrier.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8F26D3473F001C950E /* atomic_macros_compiler_barrier.h */; };
+		706EFDF226D3473F001C950E /* atomic_macros_compiler_barrier.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8F26D3473F001C950E /* atomic_macros_compiler_barrier.h */; };
+		706EFDF326D3473F001C950E /* atomic_macros_cmpxchg_strong.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9026D3473F001C950E /* atomic_macros_cmpxchg_strong.h */; };
+		706EFDF426D3473F001C950E /* atomic_macros_cmpxchg_strong.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9026D3473F001C950E /* atomic_macros_cmpxchg_strong.h */; };
+		706EFDF526D3473F001C950E /* atomic_macros_all.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9126D3473F001C950E /* atomic_macros_all.h */; };
+		706EFDF626D3473F001C950E /* atomic_macros_all.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9126D3473F001C950E /* atomic_macros_all.h */; };
+		706EFDF726D3473F001C950E /* atomic_macros_exchange.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9226D3473F001C950E /* atomic_macros_exchange.h */; };
+		706EFDF826D3473F001C950E /* atomic_macros_exchange.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9226D3473F001C950E /* atomic_macros_exchange.h */; };
+		706EFDF926D3473F001C950E /* atomic_macros_fetch_or.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9326D3473F001C950E /* atomic_macros_fetch_or.h */; };
+		706EFDFA26D3473F001C950E /* atomic_macros_fetch_or.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9326D3473F001C950E /* atomic_macros_fetch_or.h */; };
+		706EFDFB26D3473F001C950E /* atomic_macros_base.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9426D3473F001C950E /* atomic_macros_base.h */; };
+		706EFDFC26D3473F001C950E /* atomic_macros_base.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9426D3473F001C950E /* atomic_macros_base.h */; };
+		706EFDFD26D3473F001C950E /* atomic_macros_sub_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9526D3473F001C950E /* atomic_macros_sub_fetch.h */; };
+		706EFDFE26D3473F001C950E /* atomic_macros_sub_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9526D3473F001C950E /* atomic_macros_sub_fetch.h */; };
+		706EFDFF26D3473F001C950E /* atomic_macros_signal_fence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9626D3473F001C950E /* atomic_macros_signal_fence.h */; };
+		706EFE0026D3473F001C950E /* atomic_macros_signal_fence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9626D3473F001C950E /* atomic_macros_signal_fence.h */; };
+		706EFE0126D3473F001C950E /* atomic_macros_xor_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9726D3473F001C950E /* atomic_macros_xor_fetch.h */; };
+		706EFE0226D3473F001C950E /* atomic_macros_xor_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9726D3473F001C950E /* atomic_macros_xor_fetch.h */; };
+		706EFE0326D3473F001C950E /* atomic_macros_fetch_sub.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9826D3473F001C950E /* atomic_macros_fetch_sub.h */; };
+		706EFE0426D3473F001C950E /* atomic_macros_fetch_sub.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9826D3473F001C950E /* atomic_macros_fetch_sub.h */; };
+		706EFE0526D3473F001C950E /* atomic_macros_and_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9926D3473F001C950E /* atomic_macros_and_fetch.h */; };
+		706EFE0626D3473F001C950E /* atomic_macros_and_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9926D3473F001C950E /* atomic_macros_and_fetch.h */; };
+		706EFE0726D3473F001C950E /* atomic_macros_cmpxchg_weak.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9A26D3473F001C950E /* atomic_macros_cmpxchg_weak.h */; };
+		706EFE0826D3473F001C950E /* atomic_macros_cmpxchg_weak.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9A26D3473F001C950E /* atomic_macros_cmpxchg_weak.h */; };
+		706EFE0926D3473F001C950E /* atomic_macros_fetch_add.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9B26D3473F001C950E /* atomic_macros_fetch_add.h */; };
+		706EFE0A26D3473F001C950E /* atomic_macros_fetch_add.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9B26D3473F001C950E /* atomic_macros_fetch_add.h */; };
+		706EFE0B26D3473F001C950E /* atomic_macros_add_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9C26D3473F001C950E /* atomic_macros_add_fetch.h */; };
+		706EFE0C26D3473F001C950E /* atomic_macros_add_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9C26D3473F001C950E /* atomic_macros_add_fetch.h */; };
+		706EFE0D26D3473F001C950E /* atomic_macros_memory_barrier.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9D26D3473F001C950E /* atomic_macros_memory_barrier.h */; };
+		706EFE0E26D3473F001C950E /* atomic_macros_memory_barrier.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9D26D3473F001C950E /* atomic_macros_memory_barrier.h */; };
+		706EFE0F26D3473F001C950E /* atomic_macros_fetch_and.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9E26D3473F001C950E /* atomic_macros_fetch_and.h */; };
+		706EFE1026D3473F001C950E /* atomic_macros_fetch_and.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9E26D3473F001C950E /* atomic_macros_fetch_and.h */; };
+		706EFE1126D3473F001C950E /* atomic_macros_or_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9F26D3473F001C950E /* atomic_macros_or_fetch.h */; };
+		706EFE1226D3473F001C950E /* atomic_macros_or_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9F26D3473F001C950E /* atomic_macros_or_fetch.h */; };
+		706EFE1326D3473F001C950E /* atomic_macros_thread_fence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCA026D3473F001C950E /* atomic_macros_thread_fence.h */; };
+		706EFE1426D3473F001C950E /* atomic_macros_thread_fence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCA026D3473F001C950E /* atomic_macros_thread_fence.h */; };
+		706EFE1526D3473F001C950E /* atomic_standalone.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCA126D3473F001C950E /* atomic_standalone.h */; };
+		706EFE1626D3473F001C950E /* atomic_standalone.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCA126D3473F001C950E /* atomic_standalone.h */; };
+		706EFE1726D3473F001C950E /* atomic_memory_order.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCA226D3473F001C950E /* atomic_memory_order.h */; };
+		706EFE1826D3473F001C950E /* atomic_memory_order.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCA226D3473F001C950E /* atomic_memory_order.h */; };
+		706EFE1926D3473F001C950E /* atomic_flag_standalone.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCA326D3473F001C950E /* atomic_flag_standalone.h */; };
+		706EFE1A26D3473F001C950E /* atomic_flag_standalone.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCA326D3473F001C950E /* atomic_flag_standalone.h */; };
+		706EFE1B26D3473F001C950E /* atomic_macros.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCA426D3473F001C950E /* atomic_macros.h */; };
+		706EFE1C26D3473F001C950E /* atomic_macros.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCA426D3473F001C950E /* atomic_macros.h */; };
+		706EFE1D26D3473F001C950E /* atomic_flag.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCA526D3473F001C950E /* atomic_flag.h */; };
+		706EFE1E26D3473F001C950E /* atomic_flag.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCA526D3473F001C950E /* atomic_flag.h */; };
+		706EFE1F26D3473F001C950E /* arch_load.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCA726D3473F001C950E /* arch_load.h */; };
+		706EFE2026D3473F001C950E /* arch_load.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCA726D3473F001C950E /* arch_load.h */; };
+		706EFE2126D3473F001C950E /* arch_compiler_barrier.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCA826D3473F001C950E /* arch_compiler_barrier.h */; };
+		706EFE2226D3473F001C950E /* arch_compiler_barrier.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCA826D3473F001C950E /* arch_compiler_barrier.h */; };
+		706EFE2326D3473F001C950E /* arch_fetch_and.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCA926D3473F001C950E /* arch_fetch_and.h */; };
+		706EFE2426D3473F001C950E /* arch_fetch_and.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCA926D3473F001C950E /* arch_fetch_and.h */; };
+		706EFE2526D3473F001C950E /* arch_exchange.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCAA26D3473F001C950E /* arch_exchange.h */; };
+		706EFE2626D3473F001C950E /* arch_exchange.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCAA26D3473F001C950E /* arch_exchange.h */; };
+		706EFE2726D3473F001C950E /* arch_fetch_or.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCAB26D3473F001C950E /* arch_fetch_or.h */; };
+		706EFE2826D3473F001C950E /* arch_fetch_or.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCAB26D3473F001C950E /* arch_fetch_or.h */; };
+		706EFE2926D3473F001C950E /* arch_sub_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCAC26D3473F001C950E /* arch_sub_fetch.h */; };
+		706EFE2A26D3473F001C950E /* arch_sub_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCAC26D3473F001C950E /* arch_sub_fetch.h */; };
+		706EFE2B26D3473F001C950E /* arch_store.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCAD26D3473F001C950E /* arch_store.h */; };
+		706EFE2C26D3473F001C950E /* arch_store.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCAD26D3473F001C950E /* arch_store.h */; };
+		706EFE2D26D3473F001C950E /* arch_add_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCAE26D3473F001C950E /* arch_add_fetch.h */; };
+		706EFE2E26D3473F001C950E /* arch_add_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCAE26D3473F001C950E /* arch_add_fetch.h */; };
+		706EFE2F26D3473F001C950E /* arch_memory_barrier.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCAF26D3473F001C950E /* arch_memory_barrier.h */; };
+		706EFE3026D3473F001C950E /* arch_memory_barrier.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCAF26D3473F001C950E /* arch_memory_barrier.h */; };
+		706EFE3126D3473F001C950E /* arch_fetch_sub.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCB026D3473F001C950E /* arch_fetch_sub.h */; };
+		706EFE3226D3473F001C950E /* arch_fetch_sub.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCB026D3473F001C950E /* arch_fetch_sub.h */; };
+		706EFE3326D3473F001C950E /* arch_xor_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCB126D3473F001C950E /* arch_xor_fetch.h */; };
+		706EFE3426D3473F001C950E /* arch_xor_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCB126D3473F001C950E /* arch_xor_fetch.h */; };
+		706EFE3526D3473F001C950E /* arch_and_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCB226D3473F001C950E /* arch_and_fetch.h */; };
+		706EFE3626D3473F001C950E /* arch_and_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCB226D3473F001C950E /* arch_and_fetch.h */; };
+		706EFE3726D3473F001C950E /* arch_fetch_add.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCB326D3473F001C950E /* arch_fetch_add.h */; };
+		706EFE3826D3473F001C950E /* arch_fetch_add.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCB326D3473F001C950E /* arch_fetch_add.h */; };
+		706EFE3926D3473F001C950E /* arch_thread_fence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCB426D3473F001C950E /* arch_thread_fence.h */; };
+		706EFE3A26D3473F001C950E /* arch_thread_fence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCB426D3473F001C950E /* arch_thread_fence.h */; };
+		706EFE3B26D3473F001C950E /* arch_x86_exchange.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCB626D3473F001C950E /* arch_x86_exchange.h */; };
+		706EFE3C26D3473F001C950E /* arch_x86_exchange.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCB626D3473F001C950E /* arch_x86_exchange.h */; };
+		706EFE3D26D3473F001C950E /* arch_x86.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCB726D3473F001C950E /* arch_x86.h */; };
+		706EFE3E26D3473F001C950E /* arch_x86.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCB726D3473F001C950E /* arch_x86.h */; };
+		706EFE3F26D3473F001C950E /* arch_x86_fetch_or.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCB826D3473F001C950E /* arch_x86_fetch_or.h */; };
+		706EFE4026D3473F001C950E /* arch_x86_fetch_or.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCB826D3473F001C950E /* arch_x86_fetch_or.h */; };
+		706EFE4126D3473F001C950E /* arch_x86_load.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCB926D3473F001C950E /* arch_x86_load.h */; };
+		706EFE4226D3473F001C950E /* arch_x86_load.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCB926D3473F001C950E /* arch_x86_load.h */; };
+		706EFE4326D3473F001C950E /* arch_x86_fetch_xor.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCBA26D3473F001C950E /* arch_x86_fetch_xor.h */; };
+		706EFE4426D3473F001C950E /* arch_x86_fetch_xor.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCBA26D3473F001C950E /* arch_x86_fetch_xor.h */; };
+		706EFE4526D3473F001C950E /* arch_x86_cmpxchg_strong.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCBB26D3473F001C950E /* arch_x86_cmpxchg_strong.h */; };
+		706EFE4626D3473F001C950E /* arch_x86_cmpxchg_strong.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCBB26D3473F001C950E /* arch_x86_cmpxchg_strong.h */; };
+		706EFE4726D3473F001C950E /* arch_x86_fetch_sub.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCBC26D3473F001C950E /* arch_x86_fetch_sub.h */; };
+		706EFE4826D3473F001C950E /* arch_x86_fetch_sub.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCBC26D3473F001C950E /* arch_x86_fetch_sub.h */; };
+		706EFE4926D3473F001C950E /* arch_x86_xor_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCBD26D3473F001C950E /* arch_x86_xor_fetch.h */; };
+		706EFE4A26D3473F001C950E /* arch_x86_xor_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCBD26D3473F001C950E /* arch_x86_xor_fetch.h */; };
+		706EFE4B26D3473F001C950E /* arch_x86_and_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCBE26D3473F001C950E /* arch_x86_and_fetch.h */; };
+		706EFE4C26D3473F001C950E /* arch_x86_and_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCBE26D3473F001C950E /* arch_x86_and_fetch.h */; };
+		706EFE4D26D3473F001C950E /* arch_x86_or_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCBF26D3473F001C950E /* arch_x86_or_fetch.h */; };
+		706EFE4E26D3473F001C950E /* arch_x86_or_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCBF26D3473F001C950E /* arch_x86_or_fetch.h */; };
+		706EFE4F26D3473F001C950E /* arch_x86_memory_barrier.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCC026D3473F001C950E /* arch_x86_memory_barrier.h */; };
+		706EFE5026D3473F001C950E /* arch_x86_memory_barrier.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCC026D3473F001C950E /* arch_x86_memory_barrier.h */; };
+		706EFE5126D3473F001C950E /* arch_x86_fetch_add.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCC126D3473F001C950E /* arch_x86_fetch_add.h */; };
+		706EFE5226D3473F001C950E /* arch_x86_fetch_add.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCC126D3473F001C950E /* arch_x86_fetch_add.h */; };
+		706EFE5326D3473F001C950E /* arch_x86_store.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCC226D3473F001C950E /* arch_x86_store.h */; };
+		706EFE5426D3473F001C950E /* arch_x86_store.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCC226D3473F001C950E /* arch_x86_store.h */; };
+		706EFE5526D3473F001C950E /* arch_x86_add_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCC326D3473F001C950E /* arch_x86_add_fetch.h */; };
+		706EFE5626D3473F001C950E /* arch_x86_add_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCC326D3473F001C950E /* arch_x86_add_fetch.h */; };
+		706EFE5726D3473F001C950E /* arch_x86_sub_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCC426D3473F001C950E /* arch_x86_sub_fetch.h */; };
+		706EFE5826D3473F001C950E /* arch_x86_sub_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCC426D3473F001C950E /* arch_x86_sub_fetch.h */; };
+		706EFE5926D3473F001C950E /* arch_x86_thread_fence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCC526D3473F001C950E /* arch_x86_thread_fence.h */; };
+		706EFE5A26D3473F001C950E /* arch_x86_thread_fence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCC526D3473F001C950E /* arch_x86_thread_fence.h */; };
+		706EFE5B26D3473F001C950E /* arch_x86_fetch_and.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCC626D3473F001C950E /* arch_x86_fetch_and.h */; };
+		706EFE5C26D3473F001C950E /* arch_x86_fetch_and.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCC626D3473F001C950E /* arch_x86_fetch_and.h */; };
+		706EFE5D26D3473F001C950E /* arch_x86_cmpxchg_weak.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCC726D3473F001C950E /* arch_x86_cmpxchg_weak.h */; };
+		706EFE5E26D3473F001C950E /* arch_x86_cmpxchg_weak.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCC726D3473F001C950E /* arch_x86_cmpxchg_weak.h */; };
+		706EFE5F26D3473F001C950E /* arch_cmpxchg_weak.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCC826D3473F001C950E /* arch_cmpxchg_weak.h */; };
+		706EFE6026D3473F001C950E /* arch_cmpxchg_weak.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCC826D3473F001C950E /* arch_cmpxchg_weak.h */; };
+		706EFE6126D3473F001C950E /* arch_cmpxchg_strong.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCC926D3473F001C950E /* arch_cmpxchg_strong.h */; };
+		706EFE6226D3473F001C950E /* arch_cmpxchg_strong.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCC926D3473F001C950E /* arch_cmpxchg_strong.h */; };
+		706EFE6326D3473F001C950E /* arch_or_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCCA26D3473F001C950E /* arch_or_fetch.h */; };
+		706EFE6426D3473F001C950E /* arch_or_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCCA26D3473F001C950E /* arch_or_fetch.h */; };
+		706EFE6526D3473F001C950E /* arch_arm_load.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCCC26D3473F001C950E /* arch_arm_load.h */; };
+		706EFE6626D3473F001C950E /* arch_arm_load.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCCC26D3473F001C950E /* arch_arm_load.h */; };
+		706EFE6726D3473F001C950E /* arch_arm_thread_fence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCCD26D3473F001C950E /* arch_arm_thread_fence.h */; };
+		706EFE6826D3473F001C950E /* arch_arm_thread_fence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCCD26D3473F001C950E /* arch_arm_thread_fence.h */; };
+		706EFE6926D3473F001C950E /* arch_arm.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCCE26D3473F001C950E /* arch_arm.h */; };
+		706EFE6A26D3473F001C950E /* arch_arm.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCCE26D3473F001C950E /* arch_arm.h */; };
+		706EFE6B26D3473F001C950E /* arch_arm_store.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCCF26D3473F001C950E /* arch_arm_store.h */; };
+		706EFE6C26D3473F001C950E /* arch_arm_store.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCCF26D3473F001C950E /* arch_arm_store.h */; };
+		706EFE6D26D3473F001C950E /* arch_arm_memory_barrier.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCD026D3473F001C950E /* arch_arm_memory_barrier.h */; };
+		706EFE6E26D3473F001C950E /* arch_arm_memory_barrier.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCD026D3473F001C950E /* arch_arm_memory_barrier.h */; };
+		706EFE6F26D3473F001C950E /* arch_fetch_xor.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCD126D3473F001C950E /* arch_fetch_xor.h */; };
+		706EFE7026D3473F001C950E /* arch_fetch_xor.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCD126D3473F001C950E /* arch_fetch_xor.h */; };
+		706EFE7126D3473F001C950E /* arch_signal_fence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCD226D3473F001C950E /* arch_signal_fence.h */; };
+		706EFE7226D3473F001C950E /* arch_signal_fence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCD226D3473F001C950E /* arch_signal_fence.h */; };
+		706EFE7326D3473F001C950E /* arch_cpu_pause.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCD326D3473F001C950E /* arch_cpu_pause.h */; };
+		706EFE7426D3473F001C950E /* arch_cpu_pause.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCD326D3473F001C950E /* arch_cpu_pause.h */; };
+		706EFE7526D3473F001C950E /* arch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCD426D3473F001C950E /* arch.h */; };
+		706EFE7626D3473F001C950E /* arch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCD426D3473F001C950E /* arch.h */; };
+		706EFE7726D3473F001C950E /* atomic_base_width.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCD526D3473F001C950E /* atomic_base_width.h */; };
+		706EFE7826D3473F001C950E /* atomic_base_width.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCD526D3473F001C950E /* atomic_base_width.h */; };
+		706EFE7926D3473F001C950E /* atomic_asserts.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCD626D3473F001C950E /* atomic_asserts.h */; };
+		706EFE7A26D3473F001C950E /* atomic_asserts.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCD626D3473F001C950E /* atomic_asserts.h */; };
+		706EFE7B26D3473F001C950E /* atomic_pop_compiler_options.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCD726D3473F001C950E /* atomic_pop_compiler_options.h */; };
+		706EFE7C26D3473F001C950E /* atomic_pop_compiler_options.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCD726D3473F001C950E /* atomic_pop_compiler_options.h */; };
+		706EFE7D26D3473F001C950E /* compiler_exchange.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCD926D3473F001C950E /* compiler_exchange.h */; };
+		706EFE7E26D3473F001C950E /* compiler_exchange.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCD926D3473F001C950E /* compiler_exchange.h */; };
+		706EFE7F26D3473F001C950E /* compiler.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCDA26D3473F001C950E /* compiler.h */; };
+		706EFE8026D3473F001C950E /* compiler.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCDA26D3473F001C950E /* compiler.h */; };
+		706EFE8126D3473F001C950E /* compiler_msvc_add_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCDC26D3473F001C950E /* compiler_msvc_add_fetch.h */; };
+		706EFE8226D3473F001C950E /* compiler_msvc_add_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCDC26D3473F001C950E /* compiler_msvc_add_fetch.h */; };
+		706EFE8326D3473F001C950E /* compiler_msvc_cmpxchg_weak.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCDD26D3473F001C950E /* compiler_msvc_cmpxchg_weak.h */; };
+		706EFE8426D3473F001C950E /* compiler_msvc_cmpxchg_weak.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCDD26D3473F001C950E /* compiler_msvc_cmpxchg_weak.h */; };
+		706EFE8526D3473F001C950E /* compiler_msvc_barrier.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCDE26D3473F001C950E /* compiler_msvc_barrier.h */; };
+		706EFE8626D3473F001C950E /* compiler_msvc_barrier.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCDE26D3473F001C950E /* compiler_msvc_barrier.h */; };
+		706EFE8726D3473F001C950E /* compiler_msvc_fetch_add.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCDF26D3473F001C950E /* compiler_msvc_fetch_add.h */; };
+		706EFE8826D3473F001C950E /* compiler_msvc_fetch_add.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCDF26D3473F001C950E /* compiler_msvc_fetch_add.h */; };
+		706EFE8926D3473F001C950E /* compiler_msvc_and_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCE026D3473F001C950E /* compiler_msvc_and_fetch.h */; };
+		706EFE8A26D3473F001C950E /* compiler_msvc_and_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCE026D3473F001C950E /* compiler_msvc_and_fetch.h */; };
+		706EFE8B26D3473F001C950E /* compiler_msvc_fetch_sub.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCE126D3473F001C950E /* compiler_msvc_fetch_sub.h */; };
+		706EFE8C26D3473F001C950E /* compiler_msvc_fetch_sub.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCE126D3473F001C950E /* compiler_msvc_fetch_sub.h */; };
+		706EFE8D26D3473F001C950E /* compiler_msvc_xor_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCE226D3473F001C950E /* compiler_msvc_xor_fetch.h */; };
+		706EFE8E26D3473F001C950E /* compiler_msvc_xor_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCE226D3473F001C950E /* compiler_msvc_xor_fetch.h */; };
+		706EFE8F26D3473F001C950E /* compiler_msvc.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCE326D3473F001C950E /* compiler_msvc.h */; };
+		706EFE9026D3473F001C950E /* compiler_msvc.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCE326D3473F001C950E /* compiler_msvc.h */; };
+		706EFE9126D3473F001C950E /* compiler_msvc_fetch_or.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCE426D3473F001C950E /* compiler_msvc_fetch_or.h */; };
+		706EFE9226D34740001C950E /* compiler_msvc_fetch_or.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCE426D3473F001C950E /* compiler_msvc_fetch_or.h */; };
+		706EFE9326D34740001C950E /* compiler_msvc_sub_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCE526D3473F001C950E /* compiler_msvc_sub_fetch.h */; };
+		706EFE9426D34740001C950E /* compiler_msvc_sub_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCE526D3473F001C950E /* compiler_msvc_sub_fetch.h */; };
+		706EFE9526D34740001C950E /* compiler_msvc_exchange.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCE626D3473F001C950E /* compiler_msvc_exchange.h */; };
+		706EFE9626D34740001C950E /* compiler_msvc_exchange.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCE626D3473F001C950E /* compiler_msvc_exchange.h */; };
+		706EFE9726D34740001C950E /* compiler_msvc_signal_fence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCE726D3473F001C950E /* compiler_msvc_signal_fence.h */; };
+		706EFE9826D34740001C950E /* compiler_msvc_signal_fence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCE726D3473F001C950E /* compiler_msvc_signal_fence.h */; };
+		706EFE9926D34740001C950E /* compiler_msvc_fetch_and.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCE826D3473F001C950E /* compiler_msvc_fetch_and.h */; };
+		706EFE9A26D34740001C950E /* compiler_msvc_fetch_and.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCE826D3473F001C950E /* compiler_msvc_fetch_and.h */; };
+		706EFE9B26D34740001C950E /* compiler_msvc_cmpxchg_strong.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCE926D3473F001C950E /* compiler_msvc_cmpxchg_strong.h */; };
+		706EFE9C26D34740001C950E /* compiler_msvc_cmpxchg_strong.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCE926D3473F001C950E /* compiler_msvc_cmpxchg_strong.h */; };
+		706EFE9D26D34740001C950E /* compiler_msvc_cpu_pause.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCEA26D3473F001C950E /* compiler_msvc_cpu_pause.h */; };
+		706EFE9E26D34740001C950E /* compiler_msvc_cpu_pause.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCEA26D3473F001C950E /* compiler_msvc_cpu_pause.h */; };
+		706EFE9F26D34740001C950E /* compiler_msvc_or_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCEB26D3473F001C950E /* compiler_msvc_or_fetch.h */; };
+		706EFEA026D34740001C950E /* compiler_msvc_or_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCEB26D3473F001C950E /* compiler_msvc_or_fetch.h */; };
+		706EFEA126D34740001C950E /* compiler_msvc_fetch_xor.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCEC26D3473F001C950E /* compiler_msvc_fetch_xor.h */; };
+		706EFEA226D34740001C950E /* compiler_msvc_fetch_xor.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCEC26D3473F001C950E /* compiler_msvc_fetch_xor.h */; };
+		706EFEA326D34740001C950E /* compiler_fetch_or.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCED26D3473F001C950E /* compiler_fetch_or.h */; };
+		706EFEA426D34740001C950E /* compiler_fetch_or.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCED26D3473F001C950E /* compiler_fetch_or.h */; };
+		706EFEA526D34740001C950E /* compiler_barrier.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCEE26D3473F001C950E /* compiler_barrier.h */; };
+		706EFEA626D34740001C950E /* compiler_barrier.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCEE26D3473F001C950E /* compiler_barrier.h */; };
+		706EFEA726D34740001C950E /* compiler_cpu_pause.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCEF26D3473F001C950E /* compiler_cpu_pause.h */; };
+		706EFEA826D34740001C950E /* compiler_cpu_pause.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCEF26D3473F001C950E /* compiler_cpu_pause.h */; };
+		706EFEA926D34740001C950E /* compiler_memory_barrier.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCF026D3473F001C950E /* compiler_memory_barrier.h */; };
+		706EFEAA26D34740001C950E /* compiler_memory_barrier.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCF026D3473F001C950E /* compiler_memory_barrier.h */; };
+		706EFEAB26D34740001C950E /* compiler_gcc_cpu_pause.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCF226D3473F001C950E /* compiler_gcc_cpu_pause.h */; };
+		706EFEAC26D34740001C950E /* compiler_gcc_cpu_pause.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCF226D3473F001C950E /* compiler_gcc_cpu_pause.h */; };
+		706EFEAD26D34740001C950E /* compiler_gcc_thread_fence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCF326D3473F001C950E /* compiler_gcc_thread_fence.h */; };
+		706EFEAE26D34740001C950E /* compiler_gcc_thread_fence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCF326D3473F001C950E /* compiler_gcc_thread_fence.h */; };
+		706EFEAF26D34740001C950E /* compiler_gcc_or_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCF426D3473F001C950E /* compiler_gcc_or_fetch.h */; };
+		706EFEB026D34740001C950E /* compiler_gcc_or_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCF426D3473F001C950E /* compiler_gcc_or_fetch.h */; };
+		706EFEB126D34740001C950E /* compiler_gcc_fetch_xor.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCF526D3473F001C950E /* compiler_gcc_fetch_xor.h */; };
+		706EFEB226D34740001C950E /* compiler_gcc_fetch_xor.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCF526D3473F001C950E /* compiler_gcc_fetch_xor.h */; };
+		706EFEB326D34740001C950E /* compiler_gcc_barrier.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCF626D3473F001C950E /* compiler_gcc_barrier.h */; };
+		706EFEB426D34740001C950E /* compiler_gcc_barrier.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCF626D3473F001C950E /* compiler_gcc_barrier.h */; };
+		706EFEB526D34740001C950E /* compiler_gcc.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCF726D3473F001C950E /* compiler_gcc.h */; };
+		706EFEB626D34740001C950E /* compiler_gcc.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCF726D3473F001C950E /* compiler_gcc.h */; };
+		706EFEB726D34740001C950E /* compiler_gcc_cmpxchg_strong.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCF826D3473F001C950E /* compiler_gcc_cmpxchg_strong.h */; };
+		706EFEB826D34740001C950E /* compiler_gcc_cmpxchg_strong.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCF826D3473F001C950E /* compiler_gcc_cmpxchg_strong.h */; };
+		706EFEB926D34740001C950E /* compiler_gcc_cmpxchg_weak.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCF926D3473F001C950E /* compiler_gcc_cmpxchg_weak.h */; };
+		706EFEBA26D34740001C950E /* compiler_gcc_cmpxchg_weak.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCF926D3473F001C950E /* compiler_gcc_cmpxchg_weak.h */; };
+		706EFEBB26D34740001C950E /* compiler_gcc_store.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCFA26D3473F001C950E /* compiler_gcc_store.h */; };
+		706EFEBC26D34740001C950E /* compiler_gcc_store.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCFA26D3473F001C950E /* compiler_gcc_store.h */; };
+		706EFEBD26D34740001C950E /* compiler_gcc_signal_fence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCFB26D3473F001C950E /* compiler_gcc_signal_fence.h */; };
+		706EFEBE26D34740001C950E /* compiler_gcc_signal_fence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCFB26D3473F001C950E /* compiler_gcc_signal_fence.h */; };
+		706EFEBF26D34740001C950E /* compiler_gcc_fetch_add.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCFC26D3473F001C950E /* compiler_gcc_fetch_add.h */; };
+		706EFEC026D34740001C950E /* compiler_gcc_fetch_add.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCFC26D3473F001C950E /* compiler_gcc_fetch_add.h */; };
+		706EFEC126D34740001C950E /* compiler_gcc_and_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCFD26D3473F001C950E /* compiler_gcc_and_fetch.h */; };
+		706EFEC226D34740001C950E /* compiler_gcc_and_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCFD26D3473F001C950E /* compiler_gcc_and_fetch.h */; };
+		706EFEC326D34740001C950E /* compiler_gcc_xor_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCFE26D3473F001C950E /* compiler_gcc_xor_fetch.h */; };
+		706EFEC426D34740001C950E /* compiler_gcc_xor_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCFE26D3473F001C950E /* compiler_gcc_xor_fetch.h */; };
+		706EFEC526D34740001C950E /* compiler_gcc_fetch_sub.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCFF26D3473F001C950E /* compiler_gcc_fetch_sub.h */; };
+		706EFEC626D34740001C950E /* compiler_gcc_fetch_sub.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCFF26D3473F001C950E /* compiler_gcc_fetch_sub.h */; };
+		706EFEC726D34740001C950E /* compiler_gcc_add_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0026D3473F001C950E /* compiler_gcc_add_fetch.h */; };
+		706EFEC826D34740001C950E /* compiler_gcc_add_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0026D3473F001C950E /* compiler_gcc_add_fetch.h */; };
+		706EFEC926D34740001C950E /* compiler_gcc_sub_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0126D3473F001C950E /* compiler_gcc_sub_fetch.h */; };
+		706EFECA26D34740001C950E /* compiler_gcc_sub_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0126D3473F001C950E /* compiler_gcc_sub_fetch.h */; };
+		706EFECB26D34740001C950E /* compiler_gcc_exchange.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0226D3473F001C950E /* compiler_gcc_exchange.h */; };
+		706EFECC26D34740001C950E /* compiler_gcc_exchange.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0226D3473F001C950E /* compiler_gcc_exchange.h */; };
+		706EFECD26D34740001C950E /* compiler_gcc_fetch_or.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0326D3473F001C950E /* compiler_gcc_fetch_or.h */; };
+		706EFECE26D34740001C950E /* compiler_gcc_fetch_or.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0326D3473F001C950E /* compiler_gcc_fetch_or.h */; };
+		706EFECF26D34740001C950E /* compiler_gcc_fetch_and.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0426D3473F001C950E /* compiler_gcc_fetch_and.h */; };
+		706EFED026D34740001C950E /* compiler_gcc_fetch_and.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0426D3473F001C950E /* compiler_gcc_fetch_and.h */; };
+		706EFED126D34740001C950E /* compiler_gcc_load.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0526D3473F001C950E /* compiler_gcc_load.h */; };
+		706EFED226D34740001C950E /* compiler_gcc_load.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0526D3473F001C950E /* compiler_gcc_load.h */; };
+		706EFED326D34740001C950E /* compiler_fetch_xor.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0626D3473F001C950E /* compiler_fetch_xor.h */; };
+		706EFED426D34740001C950E /* compiler_fetch_xor.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0626D3473F001C950E /* compiler_fetch_xor.h */; };
+		706EFED526D34740001C950E /* compiler_thread_fence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0726D3473F001C950E /* compiler_thread_fence.h */; };
+		706EFED626D34740001C950E /* compiler_thread_fence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0726D3473F001C950E /* compiler_thread_fence.h */; };
+		706EFED726D34740001C950E /* compiler_fetch_and.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0826D3473F001C950E /* compiler_fetch_and.h */; };
+		706EFED826D34740001C950E /* compiler_fetch_and.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0826D3473F001C950E /* compiler_fetch_and.h */; };
+		706EFED926D34740001C950E /* compiler_store.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0926D3473F001C950E /* compiler_store.h */; };
+		706EFEDA26D34740001C950E /* compiler_store.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0926D3473F001C950E /* compiler_store.h */; };
+		706EFEDB26D34740001C950E /* compiler_or_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0A26D3473F001C950E /* compiler_or_fetch.h */; };
+		706EFEDC26D34740001C950E /* compiler_or_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0A26D3473F001C950E /* compiler_or_fetch.h */; };
+		706EFEDD26D34740001C950E /* compiler_cmpxchg_weak.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0B26D3473F001C950E /* compiler_cmpxchg_weak.h */; };
+		706EFEDE26D34740001C950E /* compiler_cmpxchg_weak.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0B26D3473F001C950E /* compiler_cmpxchg_weak.h */; };
+		706EFEDF26D34740001C950E /* compiler_add_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0C26D3473F001C950E /* compiler_add_fetch.h */; };
+		706EFEE026D34740001C950E /* compiler_add_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0C26D3473F001C950E /* compiler_add_fetch.h */; };
+		706EFEE126D34740001C950E /* compiler_fetch_sub.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0D26D3473F001C950E /* compiler_fetch_sub.h */; };
+		706EFEE226D34740001C950E /* compiler_fetch_sub.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0D26D3473F001C950E /* compiler_fetch_sub.h */; };
+		706EFEE326D34740001C950E /* compiler_xor_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0E26D3473F001C950E /* compiler_xor_fetch.h */; };
+		706EFEE426D34740001C950E /* compiler_xor_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0E26D3473F001C950E /* compiler_xor_fetch.h */; };
+		706EFEE526D34740001C950E /* compiler_and_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0F26D3473F001C950E /* compiler_and_fetch.h */; };
+		706EFEE626D34740001C950E /* compiler_and_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0F26D3473F001C950E /* compiler_and_fetch.h */; };
+		706EFEE726D34740001C950E /* compiler_cmpxchg_strong.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1026D3473F001C950E /* compiler_cmpxchg_strong.h */; };
+		706EFEE826D34740001C950E /* compiler_cmpxchg_strong.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1026D3473F001C950E /* compiler_cmpxchg_strong.h */; };
+		706EFEE926D34740001C950E /* compiler_fetch_add.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1126D3473F001C950E /* compiler_fetch_add.h */; };
+		706EFEEA26D34740001C950E /* compiler_fetch_add.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1126D3473F001C950E /* compiler_fetch_add.h */; };
+		706EFEEB26D34740001C950E /* compiler_signal_fence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1226D3473F001C950E /* compiler_signal_fence.h */; };
+		706EFEEC26D34740001C950E /* compiler_signal_fence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1226D3473F001C950E /* compiler_signal_fence.h */; };
+		706EFEED26D34740001C950E /* compiler_sub_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1326D3473F001C950E /* compiler_sub_fetch.h */; };
+		706EFEEE26D34740001C950E /* compiler_sub_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1326D3473F001C950E /* compiler_sub_fetch.h */; };
+		706EFEEF26D34740001C950E /* compiler_load.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1426D3473F001C950E /* compiler_load.h */; };
+		706EFEF026D34740001C950E /* compiler_load.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1426D3473F001C950E /* compiler_load.h */; };
+		706EFEF126D34740001C950E /* atomic_casts.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1526D3473F001C950E /* atomic_casts.h */; };
+		706EFEF226D34740001C950E /* atomic_casts.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1526D3473F001C950E /* atomic_casts.h */; };
+		706EFEF326D34740001C950E /* atomic.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1626D3473F001C950E /* atomic.h */; };
+		706EFEF426D34740001C950E /* atomic.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1626D3473F001C950E /* atomic.h */; };
+		706EFEF526D34740001C950E /* thread_support.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1726D3473F001C950E /* thread_support.h */; };
+		706EFEF626D34740001C950E /* thread_support.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1726D3473F001C950E /* thread_support.h */; };
+		706EFEF726D34740001C950E /* intrusive_hashtable.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1826D3473F001C950E /* intrusive_hashtable.h */; };
+		706EFEF826D34740001C950E /* intrusive_hashtable.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1826D3473F001C950E /* intrusive_hashtable.h */; };
+		706EFEF926D34740001C950E /* mem_fn.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1926D3473F001C950E /* mem_fn.h */; };
+		706EFEFA26D34740001C950E /* mem_fn.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1926D3473F001C950E /* mem_fn.h */; };
+		706EFEFB26D34740001C950E /* function_help.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1A26D3473F001C950E /* function_help.h */; };
+		706EFEFC26D34740001C950E /* function_help.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1A26D3473F001C950E /* function_help.h */; };
+		706EFEFD26D34740001C950E /* memory_base.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1B26D3473F001C950E /* memory_base.h */; };
+		706EFEFE26D34740001C950E /* memory_base.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1B26D3473F001C950E /* memory_base.h */; };
+		706EFEFF26D34740001C950E /* functional_base.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1C26D3473F001C950E /* functional_base.h */; };
+		706EFF0026D34740001C950E /* functional_base.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1C26D3473F001C950E /* functional_base.h */; };
+		706EFF0126D34740001C950E /* function.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1D26D3473F001C950E /* function.h */; };
+		706EFF0226D34740001C950E /* function.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1D26D3473F001C950E /* function.h */; };
+		706EFF0326D34740001C950E /* red_black_tree.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1E26D3473F001C950E /* red_black_tree.h */; };
+		706EFF0426D34740001C950E /* red_black_tree.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1E26D3473F001C950E /* red_black_tree.h */; };
+		706EFF0526D34740001C950E /* intrusive_hash_map.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1F26D3473F001C950E /* intrusive_hash_map.h */; };
+		706EFF0626D34740001C950E /* intrusive_hash_map.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1F26D3473F001C950E /* intrusive_hash_map.h */; };
+		706EFF0726D34740001C950E /* fixed_allocator.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2026D3473F001C950E /* fixed_allocator.h */; };
+		706EFF0826D34740001C950E /* fixed_allocator.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2026D3473F001C950E /* fixed_allocator.h */; };
+		706EFF0926D34740001C950E /* priority_queue.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2126D3473F001C950E /* priority_queue.h */; };
+		706EFF0A26D34740001C950E /* priority_queue.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2126D3473F001C950E /* priority_queue.h */; };
+		706EFF0B26D34740001C950E /* vector_multimap.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2226D3473F001C950E /* vector_multimap.h */; };
+		706EFF0C26D34740001C950E /* vector_multimap.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2226D3473F001C950E /* vector_multimap.h */; };
+		706EFF0D26D34740001C950E /* optional.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2326D3473F001C950E /* optional.h */; };
+		706EFF0E26D34740001C950E /* optional.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2326D3473F001C950E /* optional.h */; };
+		706EFF0F26D34740001C950E /* functional.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2426D3473F001C950E /* functional.h */; };
+		706EFF1026D34740001C950E /* functional.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2426D3473F001C950E /* functional.h */; };
+		706EFF1126D34740001C950E /* vector_set.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2526D3473F001C950E /* vector_set.h */; };
+		706EFF1226D34740001C950E /* vector_set.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2526D3473F001C950E /* vector_set.h */; };
+		706EFF1326D34740001C950E /* shared_array.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2626D3473F001C950E /* shared_array.h */; };
+		706EFF1426D34740001C950E /* shared_array.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2626D3473F001C950E /* shared_array.h */; };
+		706EFF1526D34740001C950E /* string_map.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2726D3473F001C950E /* string_map.h */; };
+		706EFF1626D34740001C950E /* string_map.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2726D3473F001C950E /* string_map.h */; };
+		706EFF1726D34740001C950E /* list.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2826D3473F001C950E /* list.h */; };
+		706EFF1826D34740001C950E /* list.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2826D3473F001C950E /* list.h */; };
+		706EFF1926D34740001C950E /* fixed_substring.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2926D3473F001C950E /* fixed_substring.h */; };
+		706EFF1A26D34740001C950E /* fixed_substring.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2926D3473F001C950E /* fixed_substring.h */; };
+		706EFF1B26D34740001C950E /* fixed_map.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2A26D3473F001C950E /* fixed_map.h */; };
+		706EFF1C26D34740001C950E /* fixed_map.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2A26D3473F001C950E /* fixed_map.h */; };
+		706EFF1D26D34740001C950E /* numeric_limits.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2B26D3473F001C950E /* numeric_limits.h */; };
+		706EFF1E26D34740001C950E /* numeric_limits.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2B26D3473F001C950E /* numeric_limits.h */; };
+		706EFF1F26D34740001C950E /* shared_ptr.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2C26D3473F001C950E /* shared_ptr.h */; };
+		706EFF2026D34740001C950E /* shared_ptr.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2C26D3473F001C950E /* shared_ptr.h */; };
+		706EFF2126D34740001C950E /* fixed_list.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2D26D3473F001C950E /* fixed_list.h */; };
+		706EFF2226D34740001C950E /* fixed_list.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2D26D3473F001C950E /* fixed_list.h */; };
+		706EFF2326D34740001C950E /* allocator.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2E26D3473F001C950E /* allocator.h */; };
+		706EFF2426D34740001C950E /* allocator.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2E26D3473F001C950E /* allocator.h */; };
+		706EFF2526D34740001C950E /* fixed_string.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2F26D3473F001C950E /* fixed_string.h */; };
+		706EFF2626D34740001C950E /* fixed_string.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2F26D3473F001C950E /* fixed_string.h */; };
+		706EFF2726D34740001C950E /* slist.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3026D3473F001C950E /* slist.h */; };
+		706EFF2826D34740001C950E /* slist.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3026D3473F001C950E /* slist.h */; };
+		706EFF2926D34740001C950E /* array.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3126D3473F001C950E /* array.h */; };
+		706EFF2A26D34740001C950E /* array.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3126D3473F001C950E /* array.h */; };
+		706EFF2B26D34740001C950E /* unique_ptr.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3226D3473F001C950E /* unique_ptr.h */; };
+		706EFF2C26D34740001C950E /* unique_ptr.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3226D3473F001C950E /* unique_ptr.h */; };
+		706EFF2D26D34740001C950E /* intrusive_hash_set.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3326D3473F001C950E /* intrusive_hash_set.h */; };
+		706EFF2E26D34740001C950E /* intrusive_hash_set.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3326D3473F001C950E /* intrusive_hash_set.h */; };
+		706EFF2F26D34740001C950E /* deque.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3426D3473F001C950E /* deque.h */; };
+		706EFF3026D34740001C950E /* deque.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3426D3473F001C950E /* deque.h */; };
+		706EFF3126D34740001C950E /* stack.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3526D3473F001C950E /* stack.h */; };
+		706EFF3226D34740001C950E /* stack.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3526D3473F001C950E /* stack.h */; };
+		706EFF3326D34740001C950E /* ratio.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3626D3473F001C950E /* ratio.h */; };
+		706EFF3426D34740001C950E /* ratio.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3626D3473F001C950E /* ratio.h */; };
+		706EFF3526D34740001C950E /* vector_map.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3726D3473F001C950E /* vector_map.h */; };
+		706EFF3626D34740001C950E /* vector_map.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3726D3473F001C950E /* vector_map.h */; };
+		706EFF3726D34740001C950E /* intrusive_list.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3826D3473F001C950E /* intrusive_list.h */; };
+		706EFF3826D34740001C950E /* intrusive_list.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3826D3473F001C950E /* intrusive_list.h */; };
+		706EFF3926D34740001C950E /* heap.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3926D3473F001C950E /* heap.h */; };
+		706EFF3A26D34740001C950E /* heap.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3926D3473F001C950E /* heap.h */; };
+		706EFF3B26D34740001C950E /* tuple.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3A26D3473F001C950E /* tuple.h */; };
+		706EFF3C26D34740001C950E /* tuple.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3A26D3473F001C950E /* tuple.h */; };
+		706EFF3D26D34740001C950E /* memory.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3B26D3473F001C950E /* memory.h */; };
+		706EFF3E26D34740001C950E /* memory.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3B26D3473F001C950E /* memory.h */; };
+		706EFF3F26D34740001C950E /* vector_multiset.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3C26D3473F001C950E /* vector_multiset.h */; };
+		706EFF4026D34740001C950E /* vector_multiset.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3C26D3473F001C950E /* vector_multiset.h */; };
+		706EFF4126D34740001C950E /* fixed_vector.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3D26D3473F001C950E /* fixed_vector.h */; };
+		706EFF4226D34740001C950E /* fixed_vector.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3D26D3473F001C950E /* fixed_vector.h */; };
+		706EFF4326D34740001C950E /* unordered_set.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3E26D3473F001C950E /* unordered_set.h */; };
+		706EFF4426D34740001C950E /* unordered_set.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3E26D3473F001C950E /* unordered_set.h */; };
+		706EFF4526D34740001C950E /* span.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3F26D3473F001C950E /* span.h */; };
+		706EFF4626D34740001C950E /* span.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3F26D3473F001C950E /* span.h */; };
+		706EFF4726D34740001C950E /* string_hash_map.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4026D3473F001C950E /* string_hash_map.h */; };
+		706EFF4826D34740001C950E /* string_hash_map.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4026D3473F001C950E /* string_hash_map.h */; };
+		706EFF4926D34740001C950E /* meta.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4126D3473F001C950E /* meta.h */; };
+		706EFF4A26D34740001C950E /* meta.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4126D3473F001C950E /* meta.h */; };
+		706EFF4B26D34740001C950E /* sort.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4226D3473F001C950E /* sort.h */; };
+		706EFF4C26D34740001C950E /* sort.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4226D3473F001C950E /* sort.h */; };
+		706EFF4D26D34740001C950E /* core_allocator_adapter.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4326D3473F001C950E /* core_allocator_adapter.h */; };
+		706EFF4E26D34740001C950E /* core_allocator_adapter.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4326D3473F001C950E /* core_allocator_adapter.h */; };
+		706EFF4F26D34740001C950E /* linked_array.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4426D3473F001C950E /* linked_array.h */; };
+		706EFF5026D34740001C950E /* linked_array.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4426D3473F001C950E /* linked_array.h */; };
+		706EFF5126D34740001C950E /* core_allocator.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4526D3473F001C950E /* core_allocator.h */; };
+		706EFF5226D34740001C950E /* core_allocator.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4526D3473F001C950E /* core_allocator.h */; };
+		706EFF5326D34740001C950E /* scoped_array.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4626D3473F001C950E /* scoped_array.h */; };
+		706EFF5426D34740001C950E /* scoped_array.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4626D3473F001C950E /* scoped_array.h */; };
+		706EFF5526D34740001C950E /* utility.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4726D3473F001C950E /* utility.h */; };
+		706EFF5626D34740001C950E /* utility.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4726D3473F001C950E /* utility.h */; };
+		706EFF5726D34740001C950E /* type_traits.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4826D3473F001C950E /* type_traits.h */; };
+		706EFF5826D34740001C950E /* type_traits.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4826D3473F001C950E /* type_traits.h */; };
+		706EFF5926D34740001C950E /* vector.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4926D3473F001C950E /* vector.h */; };
+		706EFF5A26D34740001C950E /* vector.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4926D3473F001C950E /* vector.h */; };
+		706EFF5B26D34740001C950E /* fixed_hash_map.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4A26D3473F001C950E /* fixed_hash_map.h */; };
+		706EFF5C26D34740001C950E /* fixed_hash_map.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4A26D3473F001C950E /* fixed_hash_map.h */; };
+		706EFF5D26D34740001C950E /* hash_set.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4B26D3473F001C950E /* hash_set.h */; };
+		706EFF5E26D34740001C950E /* hash_set.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4B26D3473F001C950E /* hash_set.h */; };
+		706EFF5F26D34740001C950E /* finally.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4C26D3473F001C950E /* finally.h */; };
+		706EFF6026D34740001C950E /* finally.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4C26D3473F001C950E /* finally.h */; };
+		706EFF6126D34740001C950E /* iterator.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4D26D3473F001C950E /* iterator.h */; };
+		706EFF6226D34740001C950E /* iterator.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4D26D3473F001C950E /* iterator.h */; };
+		706EFF6326D34740001C950E /* any.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4E26D3473F001C950E /* any.h */; };
+		706EFF6426D34740001C950E /* any.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4E26D3473F001C950E /* any.h */; };
+		706EFF6526D34740001C950E /* queue.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4F26D3473F001C950E /* queue.h */; };
+		706EFF6626D34740001C950E /* queue.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4F26D3473F001C950E /* queue.h */; };
+		706EFF6726D34740001C950E /* random.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD5026D3473F001C950E /* random.h */; };
+		706EFF6826D34740001C950E /* random.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD5026D3473F001C950E /* random.h */; };
+		706EFF6926D34740001C950E /* string_view.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD5126D3473F001C950E /* string_view.h */; };
+		706EFF6A26D34740001C950E /* string_view.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD5126D3473F001C950E /* string_view.h */; };
+		706EFF6B26D34740001C950E /* set.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD5226D3473F001C950E /* set.h */; };
+		706EFF6C26D34740001C950E /* set.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD5226D3473F001C950E /* set.h */; };
+		706EFF6D26D34740001C950E /* basic_string.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD5326D3473F001C950E /* basic_string.h */; };
+		706EFF6E26D34740001C950E /* basic_string.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD5326D3473F001C950E /* basic_string.h */; };
+		706EFF6F26D34740001C950E /* variant.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD5426D3473F001C950E /* variant.h */; };
+		706EFF7026D34740001C950E /* variant.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD5426D3473F001C950E /* variant.h */; };
+		706EFF7126D34740001C950E /* atomic.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD5526D3473F001C950E /* atomic.h */; };
+		706EFF7226D34740001C950E /* atomic.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD5526D3473F001C950E /* atomic.h */; };
+		706EFF7326D34740001C950E /* thread_support.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5726D3473F001C950E /* thread_support.cpp */; };
+		706EFF7426D34740001C950E /* thread_support.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5726D3473F001C950E /* thread_support.cpp */; };
+		706EFF7526D34740001C950E /* assert.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5826D3473F001C950E /* assert.cpp */; };
+		706EFF7626D34740001C950E /* assert.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5826D3473F001C950E /* assert.cpp */; };
+		706EFF7726D34740001C950E /* string.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5926D3473F001C950E /* string.cpp */; };
+		706EFF7826D34740001C950E /* string.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5926D3473F001C950E /* string.cpp */; };
+		706EFF7926D34740001C950E /* allocator_eastl.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5A26D3473F001C950E /* allocator_eastl.cpp */; };
+		706EFF7A26D34740001C950E /* allocator_eastl.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5A26D3473F001C950E /* allocator_eastl.cpp */; };
+		706EFF7B26D34740001C950E /* numeric_limits.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5B26D3473F001C950E /* numeric_limits.cpp */; };
+		706EFF7C26D34740001C950E /* numeric_limits.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5B26D3473F001C950E /* numeric_limits.cpp */; };
+		706EFF7D26D34740001C950E /* atomic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5C26D3473F001C950E /* atomic.cpp */; };
+		706EFF7E26D34740001C950E /* atomic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5C26D3473F001C950E /* atomic.cpp */; };
+		706EFF7F26D34740001C950E /* intrusive_list.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5D26D3473F001C950E /* intrusive_list.cpp */; };
+		706EFF8026D34740001C950E /* intrusive_list.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5D26D3473F001C950E /* intrusive_list.cpp */; };
+		706EFF8126D34740001C950E /* hashtable.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5E26D3473F001C950E /* hashtable.cpp */; };
+		706EFF8226D34740001C950E /* hashtable.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5E26D3473F001C950E /* hashtable.cpp */; };
+		706EFF8326D34740001C950E /* red_black_tree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5F26D3473F001C950E /* red_black_tree.cpp */; };
+		706EFF8426D34740001C950E /* red_black_tree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5F26D3473F001C950E /* red_black_tree.cpp */; };
+		706EFF8526D34740001C950E /* fixed_pool.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD6026D3473F001C950E /* fixed_pool.cpp */; };
+		706EFF8626D34740001C950E /* fixed_pool.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD6026D3473F001C950E /* fixed_pool.cpp */; };
 /* End PBXBuildFile section */
 
 /* Begin PBXFileReference section */
@@ -465,6 +1015,281 @@
 		706EEE5926D1583F001C950E /* tmpfileplus.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = tmpfileplus.h; sourceTree = "<group>"; };
 		706EF12A26D159F9001C950E /* libate.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = libate.tbd; path = usr/lib/libate.tbd; sourceTree = SDKROOT; };
 		706EF1E126D166C5001C950E /* libkram-ios.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = "libkram-ios.a"; sourceTree = BUILT_PRODUCTS_DIR; };
+		706EFC4126D3473F001C950E /* eaunits.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = eaunits.h; sourceTree = "<group>"; };
+		706EFC4226D3473F001C950E /* version.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = version.h; sourceTree = "<group>"; };
+		706EFC4426D3473F001C950E /* eacompilertraits.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = eacompilertraits.h; sourceTree = "<group>"; };
+		706EFC4526D3473F001C950E /* eacompiler.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = eacompiler.h; sourceTree = "<group>"; };
+		706EFC4626D3473F001C950E /* eaplatform.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = eaplatform.h; sourceTree = "<group>"; };
+		706EFC4726D3473F001C950E /* eastdarg.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = eastdarg.h; sourceTree = "<group>"; };
+		706EFC4826D3473F001C950E /* eahave.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = eahave.h; sourceTree = "<group>"; };
+		706EFC4926D3473F001C950E /* nullptr.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = nullptr.h; sourceTree = "<group>"; };
+		706EFC4A26D3473F001C950E /* int128.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = int128.h; sourceTree = "<group>"; };
+		706EFC4B26D3473F001C950E /* earesult.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = earesult.h; sourceTree = "<group>"; };
+		706EFC4C26D3473F001C950E /* eabase.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = eabase.h; sourceTree = "<group>"; };
+		706EFC4E26D3473F001C950E /* scoped_ptr.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = scoped_ptr.h; sourceTree = "<group>"; };
+		706EFC4F26D3473F001C950E /* unordered_map.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = unordered_map.h; sourceTree = "<group>"; };
+		706EFC5026D3473F001C950E /* fixed_hash_set.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = fixed_hash_set.h; sourceTree = "<group>"; };
+		706EFC5126D3473F001C950E /* bitset.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bitset.h; sourceTree = "<group>"; };
+		706EFC5326D3473F001C950E /* tuple_vector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = tuple_vector.h; sourceTree = "<group>"; };
+		706EFC5426D3473F001C950E /* lru_cache.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = lru_cache.h; sourceTree = "<group>"; };
+		706EFC5526D3473F001C950E /* ring_buffer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ring_buffer.h; sourceTree = "<group>"; };
+		706EFC5626D3473F001C950E /* fixed_tuple_vector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = fixed_tuple_vector.h; sourceTree = "<group>"; };
+		706EFC5726D3473F001C950E /* sort_extra.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sort_extra.h; sourceTree = "<group>"; };
+		706EFC5826D3473F001C950E /* intrusive_slist.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = intrusive_slist.h; sourceTree = "<group>"; };
+		706EFC5926D3473F001C950E /* list_map.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = list_map.h; sourceTree = "<group>"; };
+		706EFC5A26D3473F001C950E /* call_traits.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = call_traits.h; sourceTree = "<group>"; };
+		706EFC5B26D3473F001C950E /* compressed_pair.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compressed_pair.h; sourceTree = "<group>"; };
+		706EFC5C26D3473F001C950E /* intrusive_sdlist.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = intrusive_sdlist.h; sourceTree = "<group>"; };
+		706EFC5D26D3473F001C950E /* adaptors.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = adaptors.h; sourceTree = "<group>"; };
+		706EFC5E26D3473F001C950E /* fixed_ring_buffer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = fixed_ring_buffer.h; sourceTree = "<group>"; };
+		706EFC5F26D3473F001C950E /* intrusive_ptr.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = intrusive_ptr.h; sourceTree = "<group>"; };
+		706EFC6026D3473F001C950E /* fixed_function.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = fixed_function.h; sourceTree = "<group>"; };
+		706EFC6126D3473F001C950E /* safe_ptr.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = safe_ptr.h; sourceTree = "<group>"; };
+		706EFC6226D3473F001C950E /* bitvector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bitvector.h; sourceTree = "<group>"; };
+		706EFC6326D3473F001C950E /* segmented_vector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = segmented_vector.h; sourceTree = "<group>"; };
+		706EFC6426D3473F001C950E /* weak_ptr.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = weak_ptr.h; sourceTree = "<group>"; };
+		706EFC6526D3473F001C950E /* hash_map.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = hash_map.h; sourceTree = "<group>"; };
+		706EFC6626D3473F001C950E /* algorithm.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = algorithm.h; sourceTree = "<group>"; };
+		706EFC6726D3473F001C950E /* allocator_malloc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = allocator_malloc.h; sourceTree = "<group>"; };
+		706EFC6826D3473F001C950E /* map.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = map.h; sourceTree = "<group>"; };
+		706EFC6926D3473F001C950E /* initializer_list.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = initializer_list.h; sourceTree = "<group>"; };
+		706EFC6A26D3473F001C950E /* numeric.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = numeric.h; sourceTree = "<group>"; };
+		706EFC6B26D3473F001C950E /* chrono.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chrono.h; sourceTree = "<group>"; };
+		706EFC6C26D3473F001C950E /* fixed_set.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = fixed_set.h; sourceTree = "<group>"; };
+		706EFC6D26D3473F001C950E /* linked_ptr.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = linked_ptr.h; sourceTree = "<group>"; };
+		706EFC6E26D3473F001C950E /* fixed_slist.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = fixed_slist.h; sourceTree = "<group>"; };
+		706EFC7026D3473F001C950E /* type_compound.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = type_compound.h; sourceTree = "<group>"; };
+		706EFC7126D3473F001C950E /* move_help.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = move_help.h; sourceTree = "<group>"; };
+		706EFC7226D3473F001C950E /* enable_shared.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = enable_shared.h; sourceTree = "<group>"; };
+		706EFC7326D3473F001C950E /* function_detail.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = function_detail.h; sourceTree = "<group>"; };
+		706EFC7426D3473F001C950E /* piecewise_construct_t.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = piecewise_construct_t.h; sourceTree = "<group>"; };
+		706EFC7526D3473F001C950E /* generic_iterator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = generic_iterator.h; sourceTree = "<group>"; };
+		706EFC7626D3473F001C950E /* type_fundamental.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = type_fundamental.h; sourceTree = "<group>"; };
+		706EFC7726D3473F001C950E /* char_traits.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = char_traits.h; sourceTree = "<group>"; };
+		706EFC7826D3473F001C950E /* copy_help.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = copy_help.h; sourceTree = "<group>"; };
+		706EFC7926D3473F001C950E /* fill_help.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = fill_help.h; sourceTree = "<group>"; };
+		706EFC7A26D3473F001C950E /* type_properties.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = type_properties.h; sourceTree = "<group>"; };
+		706EFC7B26D3473F001C950E /* fixed_pool.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = fixed_pool.h; sourceTree = "<group>"; };
+		706EFC7C26D3473F001C950E /* config.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = config.h; sourceTree = "<group>"; };
+		706EFC7D26D3473F001C950E /* in_place_t.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = in_place_t.h; sourceTree = "<group>"; };
+		706EFC7E26D3473F001C950E /* hashtable.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = hashtable.h; sourceTree = "<group>"; };
+		706EFC7F26D3473F001C950E /* type_pod.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = type_pod.h; sourceTree = "<group>"; };
+		706EFC8026D3473F001C950E /* pair_fwd_decls.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = pair_fwd_decls.h; sourceTree = "<group>"; };
+		706EFC8126D3473F001C950E /* type_transformations.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = type_transformations.h; sourceTree = "<group>"; };
+		706EFC8226D3473F001C950E /* integer_sequence.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = integer_sequence.h; sourceTree = "<group>"; };
+		706EFC8326D3473F001C950E /* smart_ptr.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = smart_ptr.h; sourceTree = "<group>"; };
+		706EFC8426D3473F001C950E /* tuple_fwd_decls.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = tuple_fwd_decls.h; sourceTree = "<group>"; };
+		706EFC8626D3473F001C950E /* atomic_pointer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_pointer.h; sourceTree = "<group>"; };
+		706EFC8726D3473F001C950E /* atomic_integral.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_integral.h; sourceTree = "<group>"; };
+		706EFC8826D3473F001C950E /* atomic_size_aligned.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_size_aligned.h; sourceTree = "<group>"; };
+		706EFC8926D3473F001C950E /* atomic_push_compiler_options.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_push_compiler_options.h; sourceTree = "<group>"; };
+		706EFC8B26D3473F001C950E /* atomic_macros_fetch_xor.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros_fetch_xor.h; sourceTree = "<group>"; };
+		706EFC8C26D3473F001C950E /* atomic_macros_cpu_pause.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros_cpu_pause.h; sourceTree = "<group>"; };
+		706EFC8D26D3473F001C950E /* atomic_macros_load.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros_load.h; sourceTree = "<group>"; };
+		706EFC8E26D3473F001C950E /* atomic_macros_store.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros_store.h; sourceTree = "<group>"; };
+		706EFC8F26D3473F001C950E /* atomic_macros_compiler_barrier.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros_compiler_barrier.h; sourceTree = "<group>"; };
+		706EFC9026D3473F001C950E /* atomic_macros_cmpxchg_strong.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros_cmpxchg_strong.h; sourceTree = "<group>"; };
+		706EFC9126D3473F001C950E /* atomic_macros_all.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros_all.h; sourceTree = "<group>"; };
+		706EFC9226D3473F001C950E /* atomic_macros_exchange.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros_exchange.h; sourceTree = "<group>"; };
+		706EFC9326D3473F001C950E /* atomic_macros_fetch_or.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros_fetch_or.h; sourceTree = "<group>"; };
+		706EFC9426D3473F001C950E /* atomic_macros_base.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros_base.h; sourceTree = "<group>"; };
+		706EFC9526D3473F001C950E /* atomic_macros_sub_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros_sub_fetch.h; sourceTree = "<group>"; };
+		706EFC9626D3473F001C950E /* atomic_macros_signal_fence.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros_signal_fence.h; sourceTree = "<group>"; };
+		706EFC9726D3473F001C950E /* atomic_macros_xor_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros_xor_fetch.h; sourceTree = "<group>"; };
+		706EFC9826D3473F001C950E /* atomic_macros_fetch_sub.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros_fetch_sub.h; sourceTree = "<group>"; };
+		706EFC9926D3473F001C950E /* atomic_macros_and_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros_and_fetch.h; sourceTree = "<group>"; };
+		706EFC9A26D3473F001C950E /* atomic_macros_cmpxchg_weak.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros_cmpxchg_weak.h; sourceTree = "<group>"; };
+		706EFC9B26D3473F001C950E /* atomic_macros_fetch_add.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros_fetch_add.h; sourceTree = "<group>"; };
+		706EFC9C26D3473F001C950E /* atomic_macros_add_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros_add_fetch.h; sourceTree = "<group>"; };
+		706EFC9D26D3473F001C950E /* atomic_macros_memory_barrier.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros_memory_barrier.h; sourceTree = "<group>"; };
+		706EFC9E26D3473F001C950E /* atomic_macros_fetch_and.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros_fetch_and.h; sourceTree = "<group>"; };
+		706EFC9F26D3473F001C950E /* atomic_macros_or_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros_or_fetch.h; sourceTree = "<group>"; };
+		706EFCA026D3473F001C950E /* atomic_macros_thread_fence.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros_thread_fence.h; sourceTree = "<group>"; };
+		706EFCA126D3473F001C950E /* atomic_standalone.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_standalone.h; sourceTree = "<group>"; };
+		706EFCA226D3473F001C950E /* atomic_memory_order.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_memory_order.h; sourceTree = "<group>"; };
+		706EFCA326D3473F001C950E /* atomic_flag_standalone.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_flag_standalone.h; sourceTree = "<group>"; };
+		706EFCA426D3473F001C950E /* atomic_macros.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros.h; sourceTree = "<group>"; };
+		706EFCA526D3473F001C950E /* atomic_flag.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_flag.h; sourceTree = "<group>"; };
+		706EFCA726D3473F001C950E /* arch_load.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_load.h; sourceTree = "<group>"; };
+		706EFCA826D3473F001C950E /* arch_compiler_barrier.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_compiler_barrier.h; sourceTree = "<group>"; };
+		706EFCA926D3473F001C950E /* arch_fetch_and.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_fetch_and.h; sourceTree = "<group>"; };
+		706EFCAA26D3473F001C950E /* arch_exchange.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_exchange.h; sourceTree = "<group>"; };
+		706EFCAB26D3473F001C950E /* arch_fetch_or.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_fetch_or.h; sourceTree = "<group>"; };
+		706EFCAC26D3473F001C950E /* arch_sub_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_sub_fetch.h; sourceTree = "<group>"; };
+		706EFCAD26D3473F001C950E /* arch_store.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_store.h; sourceTree = "<group>"; };
+		706EFCAE26D3473F001C950E /* arch_add_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_add_fetch.h; sourceTree = "<group>"; };
+		706EFCAF26D3473F001C950E /* arch_memory_barrier.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_memory_barrier.h; sourceTree = "<group>"; };
+		706EFCB026D3473F001C950E /* arch_fetch_sub.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_fetch_sub.h; sourceTree = "<group>"; };
+		706EFCB126D3473F001C950E /* arch_xor_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_xor_fetch.h; sourceTree = "<group>"; };
+		706EFCB226D3473F001C950E /* arch_and_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_and_fetch.h; sourceTree = "<group>"; };
+		706EFCB326D3473F001C950E /* arch_fetch_add.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_fetch_add.h; sourceTree = "<group>"; };
+		706EFCB426D3473F001C950E /* arch_thread_fence.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_thread_fence.h; sourceTree = "<group>"; };
+		706EFCB626D3473F001C950E /* arch_x86_exchange.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_x86_exchange.h; sourceTree = "<group>"; };
+		706EFCB726D3473F001C950E /* arch_x86.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_x86.h; sourceTree = "<group>"; };
+		706EFCB826D3473F001C950E /* arch_x86_fetch_or.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_x86_fetch_or.h; sourceTree = "<group>"; };
+		706EFCB926D3473F001C950E /* arch_x86_load.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_x86_load.h; sourceTree = "<group>"; };
+		706EFCBA26D3473F001C950E /* arch_x86_fetch_xor.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_x86_fetch_xor.h; sourceTree = "<group>"; };
+		706EFCBB26D3473F001C950E /* arch_x86_cmpxchg_strong.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_x86_cmpxchg_strong.h; sourceTree = "<group>"; };
+		706EFCBC26D3473F001C950E /* arch_x86_fetch_sub.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_x86_fetch_sub.h; sourceTree = "<group>"; };
+		706EFCBD26D3473F001C950E /* arch_x86_xor_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_x86_xor_fetch.h; sourceTree = "<group>"; };
+		706EFCBE26D3473F001C950E /* arch_x86_and_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_x86_and_fetch.h; sourceTree = "<group>"; };
+		706EFCBF26D3473F001C950E /* arch_x86_or_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_x86_or_fetch.h; sourceTree = "<group>"; };
+		706EFCC026D3473F001C950E /* arch_x86_memory_barrier.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_x86_memory_barrier.h; sourceTree = "<group>"; };
+		706EFCC126D3473F001C950E /* arch_x86_fetch_add.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_x86_fetch_add.h; sourceTree = "<group>"; };
+		706EFCC226D3473F001C950E /* arch_x86_store.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_x86_store.h; sourceTree = "<group>"; };
+		706EFCC326D3473F001C950E /* arch_x86_add_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_x86_add_fetch.h; sourceTree = "<group>"; };
+		706EFCC426D3473F001C950E /* arch_x86_sub_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_x86_sub_fetch.h; sourceTree = "<group>"; };
+		706EFCC526D3473F001C950E /* arch_x86_thread_fence.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_x86_thread_fence.h; sourceTree = "<group>"; };
+		706EFCC626D3473F001C950E /* arch_x86_fetch_and.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_x86_fetch_and.h; sourceTree = "<group>"; };
+		706EFCC726D3473F001C950E /* arch_x86_cmpxchg_weak.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_x86_cmpxchg_weak.h; sourceTree = "<group>"; };
+		706EFCC826D3473F001C950E /* arch_cmpxchg_weak.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_cmpxchg_weak.h; sourceTree = "<group>"; };
+		706EFCC926D3473F001C950E /* arch_cmpxchg_strong.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_cmpxchg_strong.h; sourceTree = "<group>"; };
+		706EFCCA26D3473F001C950E /* arch_or_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_or_fetch.h; sourceTree = "<group>"; };
+		706EFCCC26D3473F001C950E /* arch_arm_load.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_arm_load.h; sourceTree = "<group>"; };
+		706EFCCD26D3473F001C950E /* arch_arm_thread_fence.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_arm_thread_fence.h; sourceTree = "<group>"; };
+		706EFCCE26D3473F001C950E /* arch_arm.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_arm.h; sourceTree = "<group>"; };
+		706EFCCF26D3473F001C950E /* arch_arm_store.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_arm_store.h; sourceTree = "<group>"; };
+		706EFCD026D3473F001C950E /* arch_arm_memory_barrier.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_arm_memory_barrier.h; sourceTree = "<group>"; };
+		706EFCD126D3473F001C950E /* arch_fetch_xor.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_fetch_xor.h; sourceTree = "<group>"; };
+		706EFCD226D3473F001C950E /* arch_signal_fence.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_signal_fence.h; sourceTree = "<group>"; };
+		706EFCD326D3473F001C950E /* arch_cpu_pause.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_cpu_pause.h; sourceTree = "<group>"; };
+		706EFCD426D3473F001C950E /* arch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch.h; sourceTree = "<group>"; };
+		706EFCD526D3473F001C950E /* atomic_base_width.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_base_width.h; sourceTree = "<group>"; };
+		706EFCD626D3473F001C950E /* atomic_asserts.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_asserts.h; sourceTree = "<group>"; };
+		706EFCD726D3473F001C950E /* atomic_pop_compiler_options.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_pop_compiler_options.h; sourceTree = "<group>"; };
+		706EFCD926D3473F001C950E /* compiler_exchange.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_exchange.h; sourceTree = "<group>"; };
+		706EFCDA26D3473F001C950E /* compiler.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler.h; sourceTree = "<group>"; };
+		706EFCDC26D3473F001C950E /* compiler_msvc_add_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_msvc_add_fetch.h; sourceTree = "<group>"; };
+		706EFCDD26D3473F001C950E /* compiler_msvc_cmpxchg_weak.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_msvc_cmpxchg_weak.h; sourceTree = "<group>"; };
+		706EFCDE26D3473F001C950E /* compiler_msvc_barrier.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_msvc_barrier.h; sourceTree = "<group>"; };
+		706EFCDF26D3473F001C950E /* compiler_msvc_fetch_add.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_msvc_fetch_add.h; sourceTree = "<group>"; };
+		706EFCE026D3473F001C950E /* compiler_msvc_and_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_msvc_and_fetch.h; sourceTree = "<group>"; };
+		706EFCE126D3473F001C950E /* compiler_msvc_fetch_sub.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_msvc_fetch_sub.h; sourceTree = "<group>"; };
+		706EFCE226D3473F001C950E /* compiler_msvc_xor_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_msvc_xor_fetch.h; sourceTree = "<group>"; };
+		706EFCE326D3473F001C950E /* compiler_msvc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_msvc.h; sourceTree = "<group>"; };
+		706EFCE426D3473F001C950E /* compiler_msvc_fetch_or.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_msvc_fetch_or.h; sourceTree = "<group>"; };
+		706EFCE526D3473F001C950E /* compiler_msvc_sub_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_msvc_sub_fetch.h; sourceTree = "<group>"; };
+		706EFCE626D3473F001C950E /* compiler_msvc_exchange.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_msvc_exchange.h; sourceTree = "<group>"; };
+		706EFCE726D3473F001C950E /* compiler_msvc_signal_fence.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_msvc_signal_fence.h; sourceTree = "<group>"; };
+		706EFCE826D3473F001C950E /* compiler_msvc_fetch_and.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_msvc_fetch_and.h; sourceTree = "<group>"; };
+		706EFCE926D3473F001C950E /* compiler_msvc_cmpxchg_strong.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_msvc_cmpxchg_strong.h; sourceTree = "<group>"; };
+		706EFCEA26D3473F001C950E /* compiler_msvc_cpu_pause.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_msvc_cpu_pause.h; sourceTree = "<group>"; };
+		706EFCEB26D3473F001C950E /* compiler_msvc_or_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_msvc_or_fetch.h; sourceTree = "<group>"; };
+		706EFCEC26D3473F001C950E /* compiler_msvc_fetch_xor.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_msvc_fetch_xor.h; sourceTree = "<group>"; };
+		706EFCED26D3473F001C950E /* compiler_fetch_or.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_fetch_or.h; sourceTree = "<group>"; };
+		706EFCEE26D3473F001C950E /* compiler_barrier.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_barrier.h; sourceTree = "<group>"; };
+		706EFCEF26D3473F001C950E /* compiler_cpu_pause.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_cpu_pause.h; sourceTree = "<group>"; };
+		706EFCF026D3473F001C950E /* compiler_memory_barrier.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_memory_barrier.h; sourceTree = "<group>"; };
+		706EFCF226D3473F001C950E /* compiler_gcc_cpu_pause.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_gcc_cpu_pause.h; sourceTree = "<group>"; };
+		706EFCF326D3473F001C950E /* compiler_gcc_thread_fence.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_gcc_thread_fence.h; sourceTree = "<group>"; };
+		706EFCF426D3473F001C950E /* compiler_gcc_or_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_gcc_or_fetch.h; sourceTree = "<group>"; };
+		706EFCF526D3473F001C950E /* compiler_gcc_fetch_xor.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_gcc_fetch_xor.h; sourceTree = "<group>"; };
+		706EFCF626D3473F001C950E /* compiler_gcc_barrier.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_gcc_barrier.h; sourceTree = "<group>"; };
+		706EFCF726D3473F001C950E /* compiler_gcc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_gcc.h; sourceTree = "<group>"; };
+		706EFCF826D3473F001C950E /* compiler_gcc_cmpxchg_strong.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_gcc_cmpxchg_strong.h; sourceTree = "<group>"; };
+		706EFCF926D3473F001C950E /* compiler_gcc_cmpxchg_weak.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_gcc_cmpxchg_weak.h; sourceTree = "<group>"; };
+		706EFCFA26D3473F001C950E /* compiler_gcc_store.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_gcc_store.h; sourceTree = "<group>"; };
+		706EFCFB26D3473F001C950E /* compiler_gcc_signal_fence.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_gcc_signal_fence.h; sourceTree = "<group>"; };
+		706EFCFC26D3473F001C950E /* compiler_gcc_fetch_add.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_gcc_fetch_add.h; sourceTree = "<group>"; };
+		706EFCFD26D3473F001C950E /* compiler_gcc_and_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_gcc_and_fetch.h; sourceTree = "<group>"; };
+		706EFCFE26D3473F001C950E /* compiler_gcc_xor_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_gcc_xor_fetch.h; sourceTree = "<group>"; };
+		706EFCFF26D3473F001C950E /* compiler_gcc_fetch_sub.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_gcc_fetch_sub.h; sourceTree = "<group>"; };
+		706EFD0026D3473F001C950E /* compiler_gcc_add_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_gcc_add_fetch.h; sourceTree = "<group>"; };
+		706EFD0126D3473F001C950E /* compiler_gcc_sub_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_gcc_sub_fetch.h; sourceTree = "<group>"; };
+		706EFD0226D3473F001C950E /* compiler_gcc_exchange.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_gcc_exchange.h; sourceTree = "<group>"; };
+		706EFD0326D3473F001C950E /* compiler_gcc_fetch_or.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_gcc_fetch_or.h; sourceTree = "<group>"; };
+		706EFD0426D3473F001C950E /* compiler_gcc_fetch_and.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_gcc_fetch_and.h; sourceTree = "<group>"; };
+		706EFD0526D3473F001C950E /* compiler_gcc_load.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_gcc_load.h; sourceTree = "<group>"; };
+		706EFD0626D3473F001C950E /* compiler_fetch_xor.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_fetch_xor.h; sourceTree = "<group>"; };
+		706EFD0726D3473F001C950E /* compiler_thread_fence.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_thread_fence.h; sourceTree = "<group>"; };
+		706EFD0826D3473F001C950E /* compiler_fetch_and.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_fetch_and.h; sourceTree = "<group>"; };
+		706EFD0926D3473F001C950E /* compiler_store.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_store.h; sourceTree = "<group>"; };
+		706EFD0A26D3473F001C950E /* compiler_or_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_or_fetch.h; sourceTree = "<group>"; };
+		706EFD0B26D3473F001C950E /* compiler_cmpxchg_weak.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_cmpxchg_weak.h; sourceTree = "<group>"; };
+		706EFD0C26D3473F001C950E /* compiler_add_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_add_fetch.h; sourceTree = "<group>"; };
+		706EFD0D26D3473F001C950E /* compiler_fetch_sub.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_fetch_sub.h; sourceTree = "<group>"; };
+		706EFD0E26D3473F001C950E /* compiler_xor_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_xor_fetch.h; sourceTree = "<group>"; };
+		706EFD0F26D3473F001C950E /* compiler_and_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_and_fetch.h; sourceTree = "<group>"; };
+		706EFD1026D3473F001C950E /* compiler_cmpxchg_strong.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_cmpxchg_strong.h; sourceTree = "<group>"; };
+		706EFD1126D3473F001C950E /* compiler_fetch_add.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_fetch_add.h; sourceTree = "<group>"; };
+		706EFD1226D3473F001C950E /* compiler_signal_fence.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_signal_fence.h; sourceTree = "<group>"; };
+		706EFD1326D3473F001C950E /* compiler_sub_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_sub_fetch.h; sourceTree = "<group>"; };
+		706EFD1426D3473F001C950E /* compiler_load.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_load.h; sourceTree = "<group>"; };
+		706EFD1526D3473F001C950E /* atomic_casts.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_casts.h; sourceTree = "<group>"; };
+		706EFD1626D3473F001C950E /* atomic.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic.h; sourceTree = "<group>"; };
+		706EFD1726D3473F001C950E /* thread_support.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = thread_support.h; sourceTree = "<group>"; };
+		706EFD1826D3473F001C950E /* intrusive_hashtable.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = intrusive_hashtable.h; sourceTree = "<group>"; };
+		706EFD1926D3473F001C950E /* mem_fn.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mem_fn.h; sourceTree = "<group>"; };
+		706EFD1A26D3473F001C950E /* function_help.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = function_help.h; sourceTree = "<group>"; };
+		706EFD1B26D3473F001C950E /* memory_base.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = memory_base.h; sourceTree = "<group>"; };
+		706EFD1C26D3473F001C950E /* functional_base.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = functional_base.h; sourceTree = "<group>"; };
+		706EFD1D26D3473F001C950E /* function.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = function.h; sourceTree = "<group>"; };
+		706EFD1E26D3473F001C950E /* red_black_tree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = red_black_tree.h; sourceTree = "<group>"; };
+		706EFD1F26D3473F001C950E /* intrusive_hash_map.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = intrusive_hash_map.h; sourceTree = "<group>"; };
+		706EFD2026D3473F001C950E /* fixed_allocator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = fixed_allocator.h; sourceTree = "<group>"; };
+		706EFD2126D3473F001C950E /* priority_queue.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = priority_queue.h; sourceTree = "<group>"; };
+		706EFD2226D3473F001C950E /* vector_multimap.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = vector_multimap.h; sourceTree = "<group>"; };
+		706EFD2326D3473F001C950E /* optional.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = optional.h; sourceTree = "<group>"; };
+		706EFD2426D3473F001C950E /* functional.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = functional.h; sourceTree = "<group>"; };
+		706EFD2526D3473F001C950E /* vector_set.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = vector_set.h; sourceTree = "<group>"; };
+		706EFD2626D3473F001C950E /* shared_array.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = shared_array.h; sourceTree = "<group>"; };
+		706EFD2726D3473F001C950E /* string_map.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = string_map.h; sourceTree = "<group>"; };
+		706EFD2826D3473F001C950E /* list.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = list.h; sourceTree = "<group>"; };
+		706EFD2926D3473F001C950E /* fixed_substring.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = fixed_substring.h; sourceTree = "<group>"; };
+		706EFD2A26D3473F001C950E /* fixed_map.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = fixed_map.h; sourceTree = "<group>"; };
+		706EFD2B26D3473F001C950E /* numeric_limits.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = numeric_limits.h; sourceTree = "<group>"; };
+		706EFD2C26D3473F001C950E /* shared_ptr.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = shared_ptr.h; sourceTree = "<group>"; };
+		706EFD2D26D3473F001C950E /* fixed_list.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = fixed_list.h; sourceTree = "<group>"; };
+		706EFD2E26D3473F001C950E /* allocator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = allocator.h; sourceTree = "<group>"; };
+		706EFD2F26D3473F001C950E /* fixed_string.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = fixed_string.h; sourceTree = "<group>"; };
+		706EFD3026D3473F001C950E /* slist.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = slist.h; sourceTree = "<group>"; };
+		706EFD3126D3473F001C950E /* array.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = array.h; sourceTree = "<group>"; };
+		706EFD3226D3473F001C950E /* unique_ptr.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = unique_ptr.h; sourceTree = "<group>"; };
+		706EFD3326D3473F001C950E /* intrusive_hash_set.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = intrusive_hash_set.h; sourceTree = "<group>"; };
+		706EFD3426D3473F001C950E /* deque.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = deque.h; sourceTree = "<group>"; };
+		706EFD3526D3473F001C950E /* stack.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = stack.h; sourceTree = "<group>"; };
+		706EFD3626D3473F001C950E /* ratio.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ratio.h; sourceTree = "<group>"; };
+		706EFD3726D3473F001C950E /* vector_map.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = vector_map.h; sourceTree = "<group>"; };
+		706EFD3826D3473F001C950E /* intrusive_list.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = intrusive_list.h; sourceTree = "<group>"; };
+		706EFD3926D3473F001C950E /* heap.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = heap.h; sourceTree = "<group>"; };
+		706EFD3A26D3473F001C950E /* tuple.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = tuple.h; sourceTree = "<group>"; };
+		706EFD3B26D3473F001C950E /* memory.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = memory.h; sourceTree = "<group>"; };
+		706EFD3C26D3473F001C950E /* vector_multiset.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = vector_multiset.h; sourceTree = "<group>"; };
+		706EFD3D26D3473F001C950E /* fixed_vector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = fixed_vector.h; sourceTree = "<group>"; };
+		706EFD3E26D3473F001C950E /* unordered_set.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = unordered_set.h; sourceTree = "<group>"; };
+		706EFD3F26D3473F001C950E /* span.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = span.h; sourceTree = "<group>"; };
+		706EFD4026D3473F001C950E /* string_hash_map.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = string_hash_map.h; sourceTree = "<group>"; };
+		706EFD4126D3473F001C950E /* meta.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = meta.h; sourceTree = "<group>"; };
+		706EFD4226D3473F001C950E /* sort.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sort.h; sourceTree = "<group>"; };
+		706EFD4326D3473F001C950E /* core_allocator_adapter.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = core_allocator_adapter.h; sourceTree = "<group>"; };
+		706EFD4426D3473F001C950E /* linked_array.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = linked_array.h; sourceTree = "<group>"; };
+		706EFD4526D3473F001C950E /* core_allocator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = core_allocator.h; sourceTree = "<group>"; };
+		706EFD4626D3473F001C950E /* scoped_array.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = scoped_array.h; sourceTree = "<group>"; };
+		706EFD4726D3473F001C950E /* utility.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = utility.h; sourceTree = "<group>"; };
+		706EFD4826D3473F001C950E /* type_traits.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = type_traits.h; sourceTree = "<group>"; };
+		706EFD4926D3473F001C950E /* vector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = vector.h; sourceTree = "<group>"; };
+		706EFD4A26D3473F001C950E /* fixed_hash_map.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = fixed_hash_map.h; sourceTree = "<group>"; };
+		706EFD4B26D3473F001C950E /* hash_set.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = hash_set.h; sourceTree = "<group>"; };
+		706EFD4C26D3473F001C950E /* finally.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = finally.h; sourceTree = "<group>"; };
+		706EFD4D26D3473F001C950E /* iterator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = iterator.h; sourceTree = "<group>"; };
+		706EFD4E26D3473F001C950E /* any.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = any.h; sourceTree = "<group>"; };
+		706EFD4F26D3473F001C950E /* queue.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = queue.h; sourceTree = "<group>"; };
+		706EFD5026D3473F001C950E /* random.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = random.h; sourceTree = "<group>"; };
+		706EFD5126D3473F001C950E /* string_view.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = string_view.h; sourceTree = "<group>"; };
+		706EFD5226D3473F001C950E /* set.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = set.h; sourceTree = "<group>"; };
+		706EFD5326D3473F001C950E /* basic_string.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = basic_string.h; sourceTree = "<group>"; };
+		706EFD5426D3473F001C950E /* variant.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = variant.h; sourceTree = "<group>"; };
+		706EFD5526D3473F001C950E /* atomic.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic.h; sourceTree = "<group>"; };
+		706EFD5726D3473F001C950E /* thread_support.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = thread_support.cpp; sourceTree = "<group>"; };
+		706EFD5826D3473F001C950E /* assert.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = assert.cpp; sourceTree = "<group>"; };
+		706EFD5926D3473F001C950E /* string.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = string.cpp; sourceTree = "<group>"; };
+		706EFD5A26D3473F001C950E /* allocator_eastl.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = allocator_eastl.cpp; sourceTree = "<group>"; };
+		706EFD5B26D3473F001C950E /* numeric_limits.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = numeric_limits.cpp; sourceTree = "<group>"; };
+		706EFD5C26D3473F001C950E /* atomic.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = atomic.cpp; sourceTree = "<group>"; };
+		706EFD5D26D3473F001C950E /* intrusive_list.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = intrusive_list.cpp; sourceTree = "<group>"; };
+		706EFD5E26D3473F001C950E /* hashtable.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = hashtable.cpp; sourceTree = "<group>"; };
+		706EFD5F26D3473F001C950E /* red_black_tree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = red_black_tree.cpp; sourceTree = "<group>"; };
+		706EFD6026D3473F001C950E /* fixed_pool.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = fixed_pool.cpp; sourceTree = "<group>"; };
 /* End PBXFileReference section */
 
 /* Begin PBXFrameworksBuildPhase section */
@@ -508,6 +1333,7 @@
 		706EEDA826D1583E001C950E /* libkram */ = {
 			isa = PBXGroup;
 			children = (
+				706EFC3E26D3473F001C950E /* eastl */,
 				706EEDA926D1583E001C950E /* etc2comp */,
 				706EEDC926D1583E001C950E /* bc7enc */,
 				706EEDD226D1583E001C950E /* astc-encoder */,
@@ -785,6 +1611,408 @@
 			name = Frameworks;
 			sourceTree = "<group>";
 		};
+		706EFC3E26D3473F001C950E /* eastl */ = {
+			isa = PBXGroup;
+			children = (
+				706EFC3F26D3473F001C950E /* include */,
+				706EFD5626D3473F001C950E /* source */,
+			);
+			path = eastl;
+			sourceTree = "<group>";
+		};
+		706EFC3F26D3473F001C950E /* include */ = {
+			isa = PBXGroup;
+			children = (
+				706EFC4026D3473F001C950E /* EABase */,
+				706EFC4D26D3473F001C950E /* EASTL */,
+			);
+			path = include;
+			sourceTree = "<group>";
+		};
+		706EFC4026D3473F001C950E /* EABase */ = {
+			isa = PBXGroup;
+			children = (
+				706EFC4126D3473F001C950E /* eaunits.h */,
+				706EFC4226D3473F001C950E /* version.h */,
+				706EFC4326D3473F001C950E /* config */,
+				706EFC4726D3473F001C950E /* eastdarg.h */,
+				706EFC4826D3473F001C950E /* eahave.h */,
+				706EFC4926D3473F001C950E /* nullptr.h */,
+				706EFC4A26D3473F001C950E /* int128.h */,
+				706EFC4B26D3473F001C950E /* earesult.h */,
+				706EFC4C26D3473F001C950E /* eabase.h */,
+			);
+			path = EABase;
+			sourceTree = "<group>";
+		};
+		706EFC4326D3473F001C950E /* config */ = {
+			isa = PBXGroup;
+			children = (
+				706EFC4426D3473F001C950E /* eacompilertraits.h */,
+				706EFC4526D3473F001C950E /* eacompiler.h */,
+				706EFC4626D3473F001C950E /* eaplatform.h */,
+			);
+			path = config;
+			sourceTree = "<group>";
+		};
+		706EFC4D26D3473F001C950E /* EASTL */ = {
+			isa = PBXGroup;
+			children = (
+				706EFC4E26D3473F001C950E /* scoped_ptr.h */,
+				706EFC4F26D3473F001C950E /* unordered_map.h */,
+				706EFC5026D3473F001C950E /* fixed_hash_set.h */,
+				706EFC5126D3473F001C950E /* bitset.h */,
+				706EFC5226D3473F001C950E /* bonus */,
+				706EFC5F26D3473F001C950E /* intrusive_ptr.h */,
+				706EFC6026D3473F001C950E /* fixed_function.h */,
+				706EFC6126D3473F001C950E /* safe_ptr.h */,
+				706EFC6226D3473F001C950E /* bitvector.h */,
+				706EFC6326D3473F001C950E /* segmented_vector.h */,
+				706EFC6426D3473F001C950E /* weak_ptr.h */,
+				706EFC6526D3473F001C950E /* hash_map.h */,
+				706EFC6626D3473F001C950E /* algorithm.h */,
+				706EFC6726D3473F001C950E /* allocator_malloc.h */,
+				706EFC6826D3473F001C950E /* map.h */,
+				706EFC6926D3473F001C950E /* initializer_list.h */,
+				706EFC6A26D3473F001C950E /* numeric.h */,
+				706EFC6B26D3473F001C950E /* chrono.h */,
+				706EFC6C26D3473F001C950E /* fixed_set.h */,
+				706EFC6D26D3473F001C950E /* linked_ptr.h */,
+				706EFC6E26D3473F001C950E /* fixed_slist.h */,
+				706EFC6F26D3473F001C950E /* internal */,
+				706EFD1F26D3473F001C950E /* intrusive_hash_map.h */,
+				706EFD2026D3473F001C950E /* fixed_allocator.h */,
+				706EFD2126D3473F001C950E /* priority_queue.h */,
+				706EFD2226D3473F001C950E /* vector_multimap.h */,
+				706EFD2326D3473F001C950E /* optional.h */,
+				706EFD2426D3473F001C950E /* functional.h */,
+				706EFD2526D3473F001C950E /* vector_set.h */,
+				706EFD2626D3473F001C950E /* shared_array.h */,
+				706EFD2726D3473F001C950E /* string_map.h */,
+				706EFD2826D3473F001C950E /* list.h */,
+				706EFD2926D3473F001C950E /* fixed_substring.h */,
+				706EFD2A26D3473F001C950E /* fixed_map.h */,
+				706EFD2B26D3473F001C950E /* numeric_limits.h */,
+				706EFD2C26D3473F001C950E /* shared_ptr.h */,
+				706EFD2D26D3473F001C950E /* fixed_list.h */,
+				706EFD2E26D3473F001C950E /* allocator.h */,
+				706EFD2F26D3473F001C950E /* fixed_string.h */,
+				706EFD3026D3473F001C950E /* slist.h */,
+				706EFD3126D3473F001C950E /* array.h */,
+				706EFD3226D3473F001C950E /* unique_ptr.h */,
+				706EFD3326D3473F001C950E /* intrusive_hash_set.h */,
+				706EFD3426D3473F001C950E /* deque.h */,
+				706EFD3526D3473F001C950E /* stack.h */,
+				706EFD3626D3473F001C950E /* ratio.h */,
+				706EFD3726D3473F001C950E /* vector_map.h */,
+				706EFD3826D3473F001C950E /* intrusive_list.h */,
+				706EFD3926D3473F001C950E /* heap.h */,
+				706EFD3A26D3473F001C950E /* tuple.h */,
+				706EFD3B26D3473F001C950E /* memory.h */,
+				706EFD3C26D3473F001C950E /* vector_multiset.h */,
+				706EFD3D26D3473F001C950E /* fixed_vector.h */,
+				706EFD3E26D3473F001C950E /* unordered_set.h */,
+				706EFD3F26D3473F001C950E /* span.h */,
+				706EFD4026D3473F001C950E /* string_hash_map.h */,
+				706EFD4126D3473F001C950E /* meta.h */,
+				706EFD4226D3473F001C950E /* sort.h */,
+				706EFD4326D3473F001C950E /* core_allocator_adapter.h */,
+				706EFD4426D3473F001C950E /* linked_array.h */,
+				706EFD4526D3473F001C950E /* core_allocator.h */,
+				706EFD4626D3473F001C950E /* scoped_array.h */,
+				706EFD4726D3473F001C950E /* utility.h */,
+				706EFD4826D3473F001C950E /* type_traits.h */,
+				706EFD4926D3473F001C950E /* vector.h */,
+				706EFD4A26D3473F001C950E /* fixed_hash_map.h */,
+				706EFD4B26D3473F001C950E /* hash_set.h */,
+				706EFD4C26D3473F001C950E /* finally.h */,
+				706EFD4D26D3473F001C950E /* iterator.h */,
+				706EFD4E26D3473F001C950E /* any.h */,
+				706EFD4F26D3473F001C950E /* queue.h */,
+				706EFD5026D3473F001C950E /* random.h */,
+				706EFD5126D3473F001C950E /* string_view.h */,
+				706EFD5226D3473F001C950E /* set.h */,
+				706EFD5326D3473F001C950E /* basic_string.h */,
+				706EFD5426D3473F001C950E /* variant.h */,
+				706EFD5526D3473F001C950E /* atomic.h */,
+			);
+			path = EASTL;
+			sourceTree = "<group>";
+		};
+		706EFC5226D3473F001C950E /* bonus */ = {
+			isa = PBXGroup;
+			children = (
+				706EFC5326D3473F001C950E /* tuple_vector.h */,
+				706EFC5426D3473F001C950E /* lru_cache.h */,
+				706EFC5526D3473F001C950E /* ring_buffer.h */,
+				706EFC5626D3473F001C950E /* fixed_tuple_vector.h */,
+				706EFC5726D3473F001C950E /* sort_extra.h */,
+				706EFC5826D3473F001C950E /* intrusive_slist.h */,
+				706EFC5926D3473F001C950E /* list_map.h */,
+				706EFC5A26D3473F001C950E /* call_traits.h */,
+				706EFC5B26D3473F001C950E /* compressed_pair.h */,
+				706EFC5C26D3473F001C950E /* intrusive_sdlist.h */,
+				706EFC5D26D3473F001C950E /* adaptors.h */,
+				706EFC5E26D3473F001C950E /* fixed_ring_buffer.h */,
+			);
+			path = bonus;
+			sourceTree = "<group>";
+		};
+		706EFC6F26D3473F001C950E /* internal */ = {
+			isa = PBXGroup;
+			children = (
+				706EFC7026D3473F001C950E /* type_compound.h */,
+				706EFC7126D3473F001C950E /* move_help.h */,
+				706EFC7226D3473F001C950E /* enable_shared.h */,
+				706EFC7326D3473F001C950E /* function_detail.h */,
+				706EFC7426D3473F001C950E /* piecewise_construct_t.h */,
+				706EFC7526D3473F001C950E /* generic_iterator.h */,
+				706EFC7626D3473F001C950E /* type_fundamental.h */,
+				706EFC7726D3473F001C950E /* char_traits.h */,
+				706EFC7826D3473F001C950E /* copy_help.h */,
+				706EFC7926D3473F001C950E /* fill_help.h */,
+				706EFC7A26D3473F001C950E /* type_properties.h */,
+				706EFC7B26D3473F001C950E /* fixed_pool.h */,
+				706EFC7C26D3473F001C950E /* config.h */,
+				706EFC7D26D3473F001C950E /* in_place_t.h */,
+				706EFC7E26D3473F001C950E /* hashtable.h */,
+				706EFC7F26D3473F001C950E /* type_pod.h */,
+				706EFC8026D3473F001C950E /* pair_fwd_decls.h */,
+				706EFC8126D3473F001C950E /* type_transformations.h */,
+				706EFC8226D3473F001C950E /* integer_sequence.h */,
+				706EFC8326D3473F001C950E /* smart_ptr.h */,
+				706EFC8426D3473F001C950E /* tuple_fwd_decls.h */,
+				706EFC8526D3473F001C950E /* atomic */,
+				706EFD1726D3473F001C950E /* thread_support.h */,
+				706EFD1826D3473F001C950E /* intrusive_hashtable.h */,
+				706EFD1926D3473F001C950E /* mem_fn.h */,
+				706EFD1A26D3473F001C950E /* function_help.h */,
+				706EFD1B26D3473F001C950E /* memory_base.h */,
+				706EFD1C26D3473F001C950E /* functional_base.h */,
+				706EFD1D26D3473F001C950E /* function.h */,
+				706EFD1E26D3473F001C950E /* red_black_tree.h */,
+			);
+			path = internal;
+			sourceTree = "<group>";
+		};
+		706EFC8526D3473F001C950E /* atomic */ = {
+			isa = PBXGroup;
+			children = (
+				706EFC8626D3473F001C950E /* atomic_pointer.h */,
+				706EFC8726D3473F001C950E /* atomic_integral.h */,
+				706EFC8826D3473F001C950E /* atomic_size_aligned.h */,
+				706EFC8926D3473F001C950E /* atomic_push_compiler_options.h */,
+				706EFC8A26D3473F001C950E /* atomic_macros */,
+				706EFCA126D3473F001C950E /* atomic_standalone.h */,
+				706EFCA226D3473F001C950E /* atomic_memory_order.h */,
+				706EFCA326D3473F001C950E /* atomic_flag_standalone.h */,
+				706EFCA426D3473F001C950E /* atomic_macros.h */,
+				706EFCA526D3473F001C950E /* atomic_flag.h */,
+				706EFCA626D3473F001C950E /* arch */,
+				706EFCD526D3473F001C950E /* atomic_base_width.h */,
+				706EFCD626D3473F001C950E /* atomic_asserts.h */,
+				706EFCD726D3473F001C950E /* atomic_pop_compiler_options.h */,
+				706EFCD826D3473F001C950E /* compiler */,
+				706EFD1526D3473F001C950E /* atomic_casts.h */,
+				706EFD1626D3473F001C950E /* atomic.h */,
+			);
+			path = atomic;
+			sourceTree = "<group>";
+		};
+		706EFC8A26D3473F001C950E /* atomic_macros */ = {
+			isa = PBXGroup;
+			children = (
+				706EFC8B26D3473F001C950E /* atomic_macros_fetch_xor.h */,
+				706EFC8C26D3473F001C950E /* atomic_macros_cpu_pause.h */,
+				706EFC8D26D3473F001C950E /* atomic_macros_load.h */,
+				706EFC8E26D3473F001C950E /* atomic_macros_store.h */,
+				706EFC8F26D3473F001C950E /* atomic_macros_compiler_barrier.h */,
+				706EFC9026D3473F001C950E /* atomic_macros_cmpxchg_strong.h */,
+				706EFC9126D3473F001C950E /* atomic_macros_all.h */,
+				706EFC9226D3473F001C950E /* atomic_macros_exchange.h */,
+				706EFC9326D3473F001C950E /* atomic_macros_fetch_or.h */,
+				706EFC9426D3473F001C950E /* atomic_macros_base.h */,
+				706EFC9526D3473F001C950E /* atomic_macros_sub_fetch.h */,
+				706EFC9626D3473F001C950E /* atomic_macros_signal_fence.h */,
+				706EFC9726D3473F001C950E /* atomic_macros_xor_fetch.h */,
+				706EFC9826D3473F001C950E /* atomic_macros_fetch_sub.h */,
+				706EFC9926D3473F001C950E /* atomic_macros_and_fetch.h */,
+				706EFC9A26D3473F001C950E /* atomic_macros_cmpxchg_weak.h */,
+				706EFC9B26D3473F001C950E /* atomic_macros_fetch_add.h */,
+				706EFC9C26D3473F001C950E /* atomic_macros_add_fetch.h */,
+				706EFC9D26D3473F001C950E /* atomic_macros_memory_barrier.h */,
+				706EFC9E26D3473F001C950E /* atomic_macros_fetch_and.h */,
+				706EFC9F26D3473F001C950E /* atomic_macros_or_fetch.h */,
+				706EFCA026D3473F001C950E /* atomic_macros_thread_fence.h */,
+			);
+			path = atomic_macros;
+			sourceTree = "<group>";
+		};
+		706EFCA626D3473F001C950E /* arch */ = {
+			isa = PBXGroup;
+			children = (
+				706EFCA726D3473F001C950E /* arch_load.h */,
+				706EFCA826D3473F001C950E /* arch_compiler_barrier.h */,
+				706EFCA926D3473F001C950E /* arch_fetch_and.h */,
+				706EFCAA26D3473F001C950E /* arch_exchange.h */,
+				706EFCAB26D3473F001C950E /* arch_fetch_or.h */,
+				706EFCAC26D3473F001C950E /* arch_sub_fetch.h */,
+				706EFCAD26D3473F001C950E /* arch_store.h */,
+				706EFCAE26D3473F001C950E /* arch_add_fetch.h */,
+				706EFCAF26D3473F001C950E /* arch_memory_barrier.h */,
+				706EFCB026D3473F001C950E /* arch_fetch_sub.h */,
+				706EFCB126D3473F001C950E /* arch_xor_fetch.h */,
+				706EFCB226D3473F001C950E /* arch_and_fetch.h */,
+				706EFCB326D3473F001C950E /* arch_fetch_add.h */,
+				706EFCB426D3473F001C950E /* arch_thread_fence.h */,
+				706EFCB526D3473F001C950E /* x86 */,
+				706EFCC826D3473F001C950E /* arch_cmpxchg_weak.h */,
+				706EFCC926D3473F001C950E /* arch_cmpxchg_strong.h */,
+				706EFCCA26D3473F001C950E /* arch_or_fetch.h */,
+				706EFCCB26D3473F001C950E /* arm */,
+				706EFCD126D3473F001C950E /* arch_fetch_xor.h */,
+				706EFCD226D3473F001C950E /* arch_signal_fence.h */,
+				706EFCD326D3473F001C950E /* arch_cpu_pause.h */,
+				706EFCD426D3473F001C950E /* arch.h */,
+			);
+			path = arch;
+			sourceTree = "<group>";
+		};
+		706EFCB526D3473F001C950E /* x86 */ = {
+			isa = PBXGroup;
+			children = (
+				706EFCB626D3473F001C950E /* arch_x86_exchange.h */,
+				706EFCB726D3473F001C950E /* arch_x86.h */,
+				706EFCB826D3473F001C950E /* arch_x86_fetch_or.h */,
+				706EFCB926D3473F001C950E /* arch_x86_load.h */,
+				706EFCBA26D3473F001C950E /* arch_x86_fetch_xor.h */,
+				706EFCBB26D3473F001C950E /* arch_x86_cmpxchg_strong.h */,
+				706EFCBC26D3473F001C950E /* arch_x86_fetch_sub.h */,
+				706EFCBD26D3473F001C950E /* arch_x86_xor_fetch.h */,
+				706EFCBE26D3473F001C950E /* arch_x86_and_fetch.h */,
+				706EFCBF26D3473F001C950E /* arch_x86_or_fetch.h */,
+				706EFCC026D3473F001C950E /* arch_x86_memory_barrier.h */,
+				706EFCC126D3473F001C950E /* arch_x86_fetch_add.h */,
+				706EFCC226D3473F001C950E /* arch_x86_store.h */,
+				706EFCC326D3473F001C950E /* arch_x86_add_fetch.h */,
+				706EFCC426D3473F001C950E /* arch_x86_sub_fetch.h */,
+				706EFCC526D3473F001C950E /* arch_x86_thread_fence.h */,
+				706EFCC626D3473F001C950E /* arch_x86_fetch_and.h */,
+				706EFCC726D3473F001C950E /* arch_x86_cmpxchg_weak.h */,
+			);
+			path = x86;
+			sourceTree = "<group>";
+		};
+		706EFCCB26D3473F001C950E /* arm */ = {
+			isa = PBXGroup;
+			children = (
+				706EFCCC26D3473F001C950E /* arch_arm_load.h */,
+				706EFCCD26D3473F001C950E /* arch_arm_thread_fence.h */,
+				706EFCCE26D3473F001C950E /* arch_arm.h */,
+				706EFCCF26D3473F001C950E /* arch_arm_store.h */,
+				706EFCD026D3473F001C950E /* arch_arm_memory_barrier.h */,
+			);
+			path = arm;
+			sourceTree = "<group>";
+		};
+		706EFCD826D3473F001C950E /* compiler */ = {
+			isa = PBXGroup;
+			children = (
+				706EFCD926D3473F001C950E /* compiler_exchange.h */,
+				706EFCDA26D3473F001C950E /* compiler.h */,
+				706EFCDB26D3473F001C950E /* msvc */,
+				706EFCED26D3473F001C950E /* compiler_fetch_or.h */,
+				706EFCEE26D3473F001C950E /* compiler_barrier.h */,
+				706EFCEF26D3473F001C950E /* compiler_cpu_pause.h */,
+				706EFCF026D3473F001C950E /* compiler_memory_barrier.h */,
+				706EFCF126D3473F001C950E /* gcc */,
+				706EFD0626D3473F001C950E /* compiler_fetch_xor.h */,
+				706EFD0726D3473F001C950E /* compiler_thread_fence.h */,
+				706EFD0826D3473F001C950E /* compiler_fetch_and.h */,
+				706EFD0926D3473F001C950E /* compiler_store.h */,
+				706EFD0A26D3473F001C950E /* compiler_or_fetch.h */,
+				706EFD0B26D3473F001C950E /* compiler_cmpxchg_weak.h */,
+				706EFD0C26D3473F001C950E /* compiler_add_fetch.h */,
+				706EFD0D26D3473F001C950E /* compiler_fetch_sub.h */,
+				706EFD0E26D3473F001C950E /* compiler_xor_fetch.h */,
+				706EFD0F26D3473F001C950E /* compiler_and_fetch.h */,
+				706EFD1026D3473F001C950E /* compiler_cmpxchg_strong.h */,
+				706EFD1126D3473F001C950E /* compiler_fetch_add.h */,
+				706EFD1226D3473F001C950E /* compiler_signal_fence.h */,
+				706EFD1326D3473F001C950E /* compiler_sub_fetch.h */,
+				706EFD1426D3473F001C950E /* compiler_load.h */,
+			);
+			path = compiler;
+			sourceTree = "<group>";
+		};
+		706EFCDB26D3473F001C950E /* msvc */ = {
+			isa = PBXGroup;
+			children = (
+				706EFCDC26D3473F001C950E /* compiler_msvc_add_fetch.h */,
+				706EFCDD26D3473F001C950E /* compiler_msvc_cmpxchg_weak.h */,
+				706EFCDE26D3473F001C950E /* compiler_msvc_barrier.h */,
+				706EFCDF26D3473F001C950E /* compiler_msvc_fetch_add.h */,
+				706EFCE026D3473F001C950E /* compiler_msvc_and_fetch.h */,
+				706EFCE126D3473F001C950E /* compiler_msvc_fetch_sub.h */,
+				706EFCE226D3473F001C950E /* compiler_msvc_xor_fetch.h */,
+				706EFCE326D3473F001C950E /* compiler_msvc.h */,
+				706EFCE426D3473F001C950E /* compiler_msvc_fetch_or.h */,
+				706EFCE526D3473F001C950E /* compiler_msvc_sub_fetch.h */,
+				706EFCE626D3473F001C950E /* compiler_msvc_exchange.h */,
+				706EFCE726D3473F001C950E /* compiler_msvc_signal_fence.h */,
+				706EFCE826D3473F001C950E /* compiler_msvc_fetch_and.h */,
+				706EFCE926D3473F001C950E /* compiler_msvc_cmpxchg_strong.h */,
+				706EFCEA26D3473F001C950E /* compiler_msvc_cpu_pause.h */,
+				706EFCEB26D3473F001C950E /* compiler_msvc_or_fetch.h */,
+				706EFCEC26D3473F001C950E /* compiler_msvc_fetch_xor.h */,
+			);
+			path = msvc;
+			sourceTree = "<group>";
+		};
+		706EFCF126D3473F001C950E /* gcc */ = {
+			isa = PBXGroup;
+			children = (
+				706EFCF226D3473F001C950E /* compiler_gcc_cpu_pause.h */,
+				706EFCF326D3473F001C950E /* compiler_gcc_thread_fence.h */,
+				706EFCF426D3473F001C950E /* compiler_gcc_or_fetch.h */,
+				706EFCF526D3473F001C950E /* compiler_gcc_fetch_xor.h */,
+				706EFCF626D3473F001C950E /* compiler_gcc_barrier.h */,
+				706EFCF726D3473F001C950E /* compiler_gcc.h */,
+				706EFCF826D3473F001C950E /* compiler_gcc_cmpxchg_strong.h */,
+				706EFCF926D3473F001C950E /* compiler_gcc_cmpxchg_weak.h */,
+				706EFCFA26D3473F001C950E /* compiler_gcc_store.h */,
+				706EFCFB26D3473F001C950E /* compiler_gcc_signal_fence.h */,
+				706EFCFC26D3473F001C950E /* compiler_gcc_fetch_add.h */,
+				706EFCFD26D3473F001C950E /* compiler_gcc_and_fetch.h */,
+				706EFCFE26D3473F001C950E /* compiler_gcc_xor_fetch.h */,
+				706EFCFF26D3473F001C950E /* compiler_gcc_fetch_sub.h */,
+				706EFD0026D3473F001C950E /* compiler_gcc_add_fetch.h */,
+				706EFD0126D3473F001C950E /* compiler_gcc_sub_fetch.h */,
+				706EFD0226D3473F001C950E /* compiler_gcc_exchange.h */,
+				706EFD0326D3473F001C950E /* compiler_gcc_fetch_or.h */,
+				706EFD0426D3473F001C950E /* compiler_gcc_fetch_and.h */,
+				706EFD0526D3473F001C950E /* compiler_gcc_load.h */,
+			);
+			path = gcc;
+			sourceTree = "<group>";
+		};
+		706EFD5626D3473F001C950E /* source */ = {
+			isa = PBXGroup;
+			children = (
+				706EFD5726D3473F001C950E /* thread_support.cpp */,
+				706EFD5826D3473F001C950E /* assert.cpp */,
+				706EFD5926D3473F001C950E /* string.cpp */,
+				706EFD5A26D3473F001C950E /* allocator_eastl.cpp */,
+				706EFD5B26D3473F001C950E /* numeric_limits.cpp */,
+				706EFD5C26D3473F001C950E /* atomic.cpp */,
+				706EFD5D26D3473F001C950E /* intrusive_list.cpp */,
+				706EFD5E26D3473F001C950E /* hashtable.cpp */,
+				706EFD5F26D3473F001C950E /* red_black_tree.cpp */,
+				706EFD6026D3473F001C950E /* fixed_pool.cpp */,
+			);
+			path = source;
+			sourceTree = "<group>";
+		};
 /* End PBXGroup section */
 
 /* Begin PBXHeadersBuildPhase section */
@@ -792,82 +2020,347 @@
 			isa = PBXHeadersBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
+				706EFDB526D3473F001C950E /* fixed_slist.h in Headers */,
+				706EFD8D26D3473F001C950E /* call_traits.h in Headers */,
+				706EFDCD26D3473F001C950E /* fixed_pool.h in Headers */,
+				706EFF5126D34740001C950E /* core_allocator.h in Headers */,
+				706EFE2F26D3473F001C950E /* arch_memory_barrier.h in Headers */,
+				706EFF3326D34740001C950E /* ratio.h in Headers */,
+				706EFE0726D3473F001C950E /* atomic_macros_cmpxchg_weak.h in Headers */,
 				706EEFD126D15984001C950E /* EtcErrorMetric.h in Headers */,
+				706EFD6326D3473F001C950E /* version.h in Headers */,
+				706EFE1B26D3473F001C950E /* atomic_macros.h in Headers */,
+				706EFDE726D3473F001C950E /* atomic_push_compiler_options.h in Headers */,
+				706EFF5726D34740001C950E /* type_traits.h in Headers */,
+				706EFE3926D3473F001C950E /* arch_thread_fence.h in Headers */,
+				706EFF6926D34740001C950E /* string_view.h in Headers */,
 				706EEFD226D15984001C950E /* EtcColor.h in Headers */,
+				706EFF5B26D34740001C950E /* fixed_hash_map.h in Headers */,
 				706EEFD326D15984001C950E /* EtcDifferentialTrys.h in Headers */,
+				706EFF0926D34740001C950E /* priority_queue.h in Headers */,
+				706EFF0326D34740001C950E /* red_black_tree.h in Headers */,
 				706EEFD426D15984001C950E /* EtcBlock4x4Encoding_RGB8.h in Headers */,
+				706EFE6526D3473F001C950E /* arch_arm_load.h in Headers */,
+				706EFF3526D34740001C950E /* vector_map.h in Headers */,
+				706EFDCF26D3473F001C950E /* config.h in Headers */,
 				706EEFD526D15984001C950E /* EtcConfig.h in Headers */,
+				706EFF2B26D34740001C950E /* unique_ptr.h in Headers */,
 				706EEFD626D15984001C950E /* EtcBlock4x4Encoding_R11.h in Headers */,
+				706EFEF326D34740001C950E /* atomic.h in Headers */,
 				706EEFD726D15984001C950E /* EtcBlock4x4Encoding_RG11.h in Headers */,
+				706EFE2D26D3473F001C950E /* arch_add_fetch.h in Headers */,
+				706EFE5926D3473F001C950E /* arch_x86_thread_fence.h in Headers */,
+				706EFF2526D34740001C950E /* fixed_string.h in Headers */,
+				706EFD7F26D3473F001C950E /* tuple_vector.h in Headers */,
 				706EEFD826D15984001C950E /* EtcMath.h in Headers */,
 				706EEFD926D15984001C950E /* EtcIndividualTrys.h in Headers */,
+				706EFEAD26D34740001C950E /* compiler_gcc_thread_fence.h in Headers */,
+				706EFF2726D34740001C950E /* slist.h in Headers */,
+				706EFEA326D34740001C950E /* compiler_fetch_or.h in Headers */,
 				706EEFDA26D15984001C950E /* EtcBlock4x4EncodingBits.h in Headers */,
+				706EFDBB26D3473F001C950E /* enable_shared.h in Headers */,
 				706EEFDB26D15984001C950E /* EtcBlock4x4Encoding_RGB8A1.h in Headers */,
+				706EFE8526D3473F001C950E /* compiler_msvc_barrier.h in Headers */,
+				706EFE4F26D3473F001C950E /* arch_x86_memory_barrier.h in Headers */,
+				706EFDDB26D3473F001C950E /* integer_sequence.h in Headers */,
+				706EFD9326D3473F001C950E /* adaptors.h in Headers */,
+				706EFDA126D3473F001C950E /* weak_ptr.h in Headers */,
+				706EFDD726D3473F001C950E /* pair_fwd_decls.h in Headers */,
+				706EFDFB26D3473F001C950E /* atomic_macros_base.h in Headers */,
+				706EFE2126D3473F001C950E /* arch_compiler_barrier.h in Headers */,
+				706EFF6526D34740001C950E /* queue.h in Headers */,
+				706EFEE126D34740001C950E /* compiler_fetch_sub.h in Headers */,
+				706EFD8726D3473F001C950E /* sort_extra.h in Headers */,
+				706EFE4726D3473F001C950E /* arch_x86_fetch_sub.h in Headers */,
+				706EFD6F26D3473F001C950E /* nullptr.h in Headers */,
 				706EEFDC26D15984001C950E /* EtcBlock4x4.h in Headers */,
+				706EFEA526D34740001C950E /* compiler_barrier.h in Headers */,
 				706EEFDD26D15984001C950E /* Etc.h in Headers */,
+				706EFEFF26D34740001C950E /* functional_base.h in Headers */,
+				706EFEBD26D34740001C950E /* compiler_gcc_signal_fence.h in Headers */,
+				706EFE0326D3473F001C950E /* atomic_macros_fetch_sub.h in Headers */,
+				706EFED126D34740001C950E /* compiler_gcc_load.h in Headers */,
 				706EEFDE26D15984001C950E /* EtcImage.h in Headers */,
+				706EFEE726D34740001C950E /* compiler_cmpxchg_strong.h in Headers */,
+				706EFDE326D3473F001C950E /* atomic_integral.h in Headers */,
+				706EFDDF26D3473F001C950E /* tuple_fwd_decls.h in Headers */,
+				706EFF6D26D34740001C950E /* basic_string.h in Headers */,
+				706EFDB126D3473F001C950E /* fixed_set.h in Headers */,
+				706EFDAB26D3473F001C950E /* initializer_list.h in Headers */,
+				706EFF1126D34740001C950E /* vector_set.h in Headers */,
+				706EFED926D34740001C950E /* compiler_store.h in Headers */,
+				706EFE4D26D3473F001C950E /* arch_x86_or_fetch.h in Headers */,
+				706EFE2326D3473F001C950E /* arch_fetch_and.h in Headers */,
+				706EFEB526D34740001C950E /* compiler_gcc.h in Headers */,
+				706EFE8926D3473F001C950E /* compiler_msvc_and_fetch.h in Headers */,
+				706EFF1F26D34740001C950E /* shared_ptr.h in Headers */,
+				706EFE3F26D3473F001C950E /* arch_x86_fetch_or.h in Headers */,
+				706EFE1726D3473F001C950E /* atomic_memory_order.h in Headers */,
+				706EFF1526D34740001C950E /* string_map.h in Headers */,
+				706EFEE326D34740001C950E /* compiler_xor_fetch.h in Headers */,
 				706EEFDF26D15984001C950E /* EtcBlock4x4Encoding_ETC1.h in Headers */,
 				706EEFE026D15984001C950E /* EtcBlock4x4Encoding_RGBA8.h in Headers */,
+				706EFEF726D34740001C950E /* intrusive_hashtable.h in Headers */,
+				706EFE1526D3473F001C950E /* atomic_standalone.h in Headers */,
 				706EEFE126D15984001C950E /* EtcColorFloatRGBA.h in Headers */,
+				706EFDC126D3473F001C950E /* generic_iterator.h in Headers */,
+				706EFE6F26D3473F001C950E /* arch_fetch_xor.h in Headers */,
+				706EFE8126D3473F001C950E /* compiler_msvc_add_fetch.h in Headers */,
+				706EFDD326D3473F001C950E /* hashtable.h in Headers */,
+				706EFE1326D3473F001C950E /* atomic_macros_thread_fence.h in Headers */,
+				706EFD7326D3473F001C950E /* earesult.h in Headers */,
+				706EFEB326D34740001C950E /* compiler_gcc_barrier.h in Headers */,
+				706EFE7B26D3473F001C950E /* atomic_pop_compiler_options.h in Headers */,
+				706EFD9B26D3473F001C950E /* safe_ptr.h in Headers */,
+				706EFECF26D34740001C950E /* compiler_gcc_fetch_and.h in Headers */,
+				706EFD7526D3473F001C950E /* eabase.h in Headers */,
+				706EFD6D26D3473F001C950E /* eahave.h in Headers */,
+				706EFF4926D34740001C950E /* meta.h in Headers */,
+				706EFF1926D34740001C950E /* fixed_substring.h in Headers */,
+				706EFE7926D3473F001C950E /* atomic_asserts.h in Headers */,
+				706EFDD526D3473F001C950E /* type_pod.h in Headers */,
 				706EEFE226D15984001C950E /* EtcBlock4x4Encoding.h in Headers */,
 				706EEFE326D15984001C950E /* rgbcx.h in Headers */,
+				706EFF4726D34740001C950E /* string_hash_map.h in Headers */,
+				706EFEE926D34740001C950E /* compiler_fetch_add.h in Headers */,
+				706EFE2926D3473F001C950E /* arch_sub_fetch.h in Headers */,
+				706EFF1D26D34740001C950E /* numeric_limits.h in Headers */,
+				706EFDBF26D3473F001C950E /* piecewise_construct_t.h in Headers */,
+				706EFE9126D3473F001C950E /* compiler_msvc_fetch_or.h in Headers */,
+				706EFEA726D34740001C950E /* compiler_cpu_pause.h in Headers */,
+				706EFD8526D3473F001C950E /* fixed_tuple_vector.h in Headers */,
 				706EEFE426D15984001C950E /* bc7enc.h in Headers */,
 				706EEFE526D15984001C950E /* bc7decomp.h in Headers */,
+				706EFE9926D34740001C950E /* compiler_msvc_fetch_and.h in Headers */,
+				706EFEF526D34740001C950E /* thread_support.h in Headers */,
+				706EFE6D26D3473F001C950E /* arch_arm_memory_barrier.h in Headers */,
 				706EEFE626D15984001C950E /* rgbcx_table4.h in Headers */,
+				706EFD6526D3473F001C950E /* eacompilertraits.h in Headers */,
+				706EFE9F26D34740001C950E /* compiler_msvc_or_fetch.h in Headers */,
+				706EFE7726D3473F001C950E /* atomic_base_width.h in Headers */,
 				706EEFE726D15984001C950E /* astcenc_diagnostic_trace.h in Headers */,
 				706EEFE826D15984001C950E /* astcenc_vecmathlib.h in Headers */,
+				706EFE6326D3473F001C950E /* arch_or_fetch.h in Headers */,
+				706EFD9726D3473F001C950E /* intrusive_ptr.h in Headers */,
 				706EEFE926D15984001C950E /* astcenc_vecmathlib_avx2_8.h in Headers */,
 				706EEFEA26D15984001C950E /* astcenc.h in Headers */,
+				706EFEFD26D34740001C950E /* memory_base.h in Headers */,
+				706EFD9526D3473F001C950E /* fixed_ring_buffer.h in Headers */,
+				706EFF4326D34740001C950E /* unordered_set.h in Headers */,
+				706EFE4126D3473F001C950E /* arch_x86_load.h in Headers */,
+				706EFF2F26D34740001C950E /* deque.h in Headers */,
 				706EEFEB26D15984001C950E /* astcenc_internal.h in Headers */,
 				706EEFEC26D15984001C950E /* astcenc_vecmathlib_neon_armv7_4.h in Headers */,
 				706EEFED26D15984001C950E /* astcenc_vecmathlib_sse_4.h in Headers */,
+				706EFE6926D3473F001C950E /* arch_arm.h in Headers */,
+				706EFEB126D34740001C950E /* compiler_gcc_fetch_xor.h in Headers */,
 				706EEFEE26D15984001C950E /* astcenc_vecmathlib_neon_4.h in Headers */,
 				706EEFEF26D15984001C950E /* astcenc_vecmathlib_none_4.h in Headers */,
+				706EFF5926D34740001C950E /* vector.h in Headers */,
 				706EEFF026D15984001C950E /* astcenc_vecmathlib_common_4.h in Headers */,
+				706EFDF326D3473F001C950E /* atomic_macros_cmpxchg_strong.h in Headers */,
+				706EFE3D26D3473F001C950E /* arch_x86.h in Headers */,
+				706EFDFF26D3473F001C950E /* atomic_macros_signal_fence.h in Headers */,
+				706EFD7926D3473F001C950E /* unordered_map.h in Headers */,
+				706EFDF926D3473F001C950E /* atomic_macros_fetch_or.h in Headers */,
+				706EFDE526D3473F001C950E /* atomic_size_aligned.h in Headers */,
+				706EFE3126D3473F001C950E /* arch_fetch_sub.h in Headers */,
+				706EFED326D34740001C950E /* compiler_fetch_xor.h in Headers */,
+				706EFE2726D3473F001C950E /* arch_fetch_or.h in Headers */,
+				706EFF3D26D34740001C950E /* memory.h in Headers */,
+				706EFF1726D34740001C950E /* list.h in Headers */,
+				706EFF1326D34740001C950E /* shared_array.h in Headers */,
+				706EFF5326D34740001C950E /* scoped_array.h in Headers */,
+				706EFDA726D3473F001C950E /* allocator_malloc.h in Headers */,
+				706EFD8326D3473F001C950E /* ring_buffer.h in Headers */,
+				706EFEF126D34740001C950E /* atomic_casts.h in Headers */,
+				706EFD8B26D3473F001C950E /* list_map.h in Headers */,
+				706EFEC326D34740001C950E /* compiler_gcc_xor_fetch.h in Headers */,
+				706EFE0126D3473F001C950E /* atomic_macros_xor_fetch.h in Headers */,
+				706EFF2D26D34740001C950E /* intrusive_hash_set.h in Headers */,
+				706EFF3726D34740001C950E /* intrusive_list.h in Headers */,
 				706EEFF126D15984001C950E /* astcenc_mathlib.h in Headers */,
+				706EFF4B26D34740001C950E /* sort.h in Headers */,
+				706EFD9F26D3473F001C950E /* segmented_vector.h in Headers */,
+				706EFF2126D34740001C950E /* fixed_list.h in Headers */,
+				706EFE0D26D3473F001C950E /* atomic_macros_memory_barrier.h in Headers */,
+				706EFF3926D34740001C950E /* heap.h in Headers */,
+				706EFDD926D3473F001C950E /* type_transformations.h in Headers */,
+				706EFE4526D3473F001C950E /* arch_x86_cmpxchg_strong.h in Headers */,
+				706EFDB926D3473F001C950E /* move_help.h in Headers */,
+				706EFDF526D3473F001C950E /* atomic_macros_all.h in Headers */,
+				706EFDF726D3473F001C950E /* atomic_macros_exchange.h in Headers */,
 				706EEFF226D15984001C950E /* ateencoder.h in Headers */,
+				706EFE9726D34740001C950E /* compiler_msvc_signal_fence.h in Headers */,
+				706EFF0D26D34740001C950E /* optional.h in Headers */,
+				706EFEDD26D34740001C950E /* compiler_cmpxchg_weak.h in Headers */,
+				706EFDCB26D3473F001C950E /* type_properties.h in Headers */,
 				706EEFF326D15984001C950E /* basisu_transcoder.h in Headers */,
+				706EFD6926D3473F001C950E /* eaplatform.h in Headers */,
+				706EFE8F26D3473F001C950E /* compiler_msvc.h in Headers */,
+				706EFF0126D34740001C950E /* function.h in Headers */,
+				706EFEE526D34740001C950E /* compiler_and_fetch.h in Headers */,
+				706EFE7326D3473F001C950E /* arch_cpu_pause.h in Headers */,
+				706EFE1126D3473F001C950E /* atomic_macros_or_fetch.h in Headers */,
+				706EFE0926D3473F001C950E /* atomic_macros_fetch_add.h in Headers */,
+				706EFD8126D3473F001C950E /* lru_cache.h in Headers */,
+				706EFF6F26D34740001C950E /* variant.h in Headers */,
+				706EFEC726D34740001C950E /* compiler_gcc_add_fetch.h in Headers */,
+				706EFDBD26D3473F001C950E /* function_detail.h in Headers */,
+				706EFDA526D3473F001C950E /* algorithm.h in Headers */,
+				706EFE8B26D3473F001C950E /* compiler_msvc_fetch_sub.h in Headers */,
 				706EEFF426D15984001C950E /* basisu_containers.h in Headers */,
+				706EFD8F26D3473F001C950E /* compressed_pair.h in Headers */,
+				706EFDEF26D3473F001C950E /* atomic_macros_store.h in Headers */,
+				706EFED726D34740001C950E /* compiler_fetch_and.h in Headers */,
 				706EEFF526D15985001C950E /* basisu_containers_impl.h in Headers */,
+				706EFF5F26D34740001C950E /* finally.h in Headers */,
+				706EFE3326D3473F001C950E /* arch_xor_fetch.h in Headers */,
 				706EEFF626D15985001C950E /* basisu_transcoder_internal.h in Headers */,
+				706EFECB26D34740001C950E /* compiler_gcc_exchange.h in Headers */,
+				706EFF4126D34740001C950E /* fixed_vector.h in Headers */,
+				706EFE1D26D3473F001C950E /* atomic_flag.h in Headers */,
+				706EFD6B26D3473F001C950E /* eastdarg.h in Headers */,
+				706EFDC926D3473F001C950E /* fill_help.h in Headers */,
+				706EFEEB26D34740001C950E /* compiler_signal_fence.h in Headers */,
 				706EEFF726D15985001C950E /* basisu_global_selector_cb.h in Headers */,
 				706EEFF826D15985001C950E /* basisu_transcoder_uastc.h in Headers */,
+				706EFECD26D34740001C950E /* compiler_gcc_fetch_or.h in Headers */,
 				706EEFF926D15985001C950E /* basisu_global_selector_palette.h in Headers */,
+				706EFD7B26D3473F001C950E /* fixed_hash_set.h in Headers */,
+				706EFE8326D3473F001C950E /* compiler_msvc_cmpxchg_weak.h in Headers */,
+				706EFDEB26D3473F001C950E /* atomic_macros_cpu_pause.h in Headers */,
+				706EFE2526D3473F001C950E /* arch_exchange.h in Headers */,
+				706EFD9126D3473F001C950E /* intrusive_sdlist.h in Headers */,
+				706EFE9B26D34740001C950E /* compiler_msvc_cmpxchg_strong.h in Headers */,
+				706EFE0B26D3473F001C950E /* atomic_macros_add_fetch.h in Headers */,
+				706EFE4B26D3473F001C950E /* arch_x86_and_fetch.h in Headers */,
+				706EFF4F26D34740001C950E /* linked_array.h in Headers */,
 				706EEFFA26D15985001C950E /* basisu.h in Headers */,
+				706EFD7126D3473F001C950E /* int128.h in Headers */,
+				706EFE7F26D3473F001C950E /* compiler.h in Headers */,
+				706EFF6B26D34740001C950E /* set.h in Headers */,
+				706EFDC326D3473F001C950E /* type_fundamental.h in Headers */,
+				706EFDAD26D3473F001C950E /* numeric.h in Headers */,
+				706EFDF126D3473F001C950E /* atomic_macros_compiler_barrier.h in Headers */,
+				706EFF5D26D34740001C950E /* hash_set.h in Headers */,
 				706EEFFB26D15985001C950E /* basisu_file_headers.h in Headers */,
+				706EFE8726D3473F001C950E /* compiler_msvc_fetch_add.h in Headers */,
+				706EFF3F26D34740001C950E /* vector_multiset.h in Headers */,
 				706EEFFC26D15985001C950E /* miniz.h in Headers */,
+				706EFE3B26D3473F001C950E /* arch_x86_exchange.h in Headers */,
 				706EEFFD26D15985001C950E /* hedistance.h in Headers */,
 				706EEFFE26D15985001C950E /* stb_rect_pack.h in Headers */,
+				706EFE5B26D3473F001C950E /* arch_x86_fetch_and.h in Headers */,
+				706EFDC526D3473F001C950E /* char_traits.h in Headers */,
 				706EEFFF26D15985001C950E /* KramZipHelper.h in Headers */,
+				706EFD7D26D3473F001C950E /* bitset.h in Headers */,
+				706EFE6126D3473F001C950E /* arch_cmpxchg_strong.h in Headers */,
+				706EFEC126D34740001C950E /* compiler_gcc_and_fetch.h in Headers */,
+				706EFF3B26D34740001C950E /* tuple.h in Headers */,
 				706EF00026D15985001C950E /* KramSDFMipper.h in Headers */,
 				706EF00126D15985001C950E /* sse2neon.h in Headers */,
+				706EFE7126D3473F001C950E /* arch_signal_fence.h in Headers */,
+				706EFE4926D3473F001C950E /* arch_x86_xor_fetch.h in Headers */,
 				706EF00226D15985001C950E /* KramConfig.h in Headers */,
+				706EFE3726D3473F001C950E /* arch_fetch_add.h in Headers */,
+				706EFDB726D3473F001C950E /* type_compound.h in Headers */,
+				706EFF0B26D34740001C950E /* vector_multimap.h in Headers */,
+				706EFDA326D3473F001C950E /* hash_map.h in Headers */,
+				706EFE5126D3473F001C950E /* arch_x86_fetch_add.h in Headers */,
+				706EFE9326D34740001C950E /* compiler_msvc_sub_fetch.h in Headers */,
 				706EF00326D15985001C950E /* KramLog.h in Headers */,
+				706EFEB726D34740001C950E /* compiler_gcc_cmpxchg_strong.h in Headers */,
+				706EFD8926D3473F001C950E /* intrusive_slist.h in Headers */,
 				706EF00426D15985001C950E /* KramLib.h in Headers */,
+				706EFE5726D3473F001C950E /* arch_x86_sub_fetch.h in Headers */,
 				706EF00526D15985001C950E /* KramVersion.h in Headers */,
+				706EFF0F26D34740001C950E /* functional.h in Headers */,
+				706EFD6126D3473F001C950E /* eaunits.h in Headers */,
+				706EFEAF26D34740001C950E /* compiler_gcc_or_fetch.h in Headers */,
 				706EF00626D15985001C950E /* KramImage.h in Headers */,
+				706EFEC526D34740001C950E /* compiler_gcc_fetch_sub.h in Headers */,
+				706EFF4526D34740001C950E /* span.h in Headers */,
+				706EFD9D26D3473F001C950E /* bitvector.h in Headers */,
+				706EFE6726D3473F001C950E /* arch_arm_thread_fence.h in Headers */,
+				706EFE3526D3473F001C950E /* arch_and_fetch.h in Headers */,
+				706EFF6326D34740001C950E /* any.h in Headers */,
+				706EFDDD26D3473F001C950E /* smart_ptr.h in Headers */,
+				706EFDA926D3473F001C950E /* map.h in Headers */,
+				706EFDC726D3473F001C950E /* copy_help.h in Headers */,
+				706EFF2326D34740001C950E /* allocator.h in Headers */,
 				706EF00726D15985001C950E /* win_mmap.h in Headers */,
+				706EFF1B26D34740001C950E /* fixed_map.h in Headers */,
+				706EFE5D26D3473F001C950E /* arch_x86_cmpxchg_weak.h in Headers */,
 				706EF00826D15985001C950E /* Kram.h in Headers */,
+				706EFED526D34740001C950E /* compiler_thread_fence.h in Headers */,
+				706EFF7126D34740001C950E /* atomic.h in Headers */,
+				706EFF2926D34740001C950E /* array.h in Headers */,
+				706EFEDF26D34740001C950E /* compiler_add_fetch.h in Headers */,
+				706EFEC926D34740001C950E /* compiler_gcc_sub_fetch.h in Headers */,
 				706EF00926D15985001C950E /* KTXImage.h in Headers */,
+				706EFDD126D3473F001C950E /* in_place_t.h in Headers */,
+				706EFF4D26D34740001C950E /* core_allocator_adapter.h in Headers */,
 				706EF00A26D15985001C950E /* KramImageInfo.h in Headers */,
 				706EF00B26D15985001C950E /* KramTimer.h in Headers */,
+				706EFDFD26D3473F001C950E /* atomic_macros_sub_fetch.h in Headers */,
+				706EFE2B26D3473F001C950E /* arch_store.h in Headers */,
+				706EFEA126D34740001C950E /* compiler_msvc_fetch_xor.h in Headers */,
+				706EFDE926D3473F001C950E /* atomic_macros_fetch_xor.h in Headers */,
+				706EFD7726D3473F001C950E /* scoped_ptr.h in Headers */,
 				706EF00C26D15985001C950E /* KramMmapHelper.h in Headers */,
+				706EFDE126D3473F001C950E /* atomic_pointer.h in Headers */,
+				706EFEB926D34740001C950E /* compiler_gcc_cmpxchg_weak.h in Headers */,
+				706EFE5F26D3473F001C950E /* arch_cmpxchg_weak.h in Headers */,
+				706EFF6726D34740001C950E /* random.h in Headers */,
+				706EFE6B26D3473F001C950E /* arch_arm_store.h in Headers */,
 				706EF00D26D15985001C950E /* float4a.h in Headers */,
+				706EFE5526D3473F001C950E /* arch_x86_add_fetch.h in Headers */,
+				706EFE7D26D3473F001C950E /* compiler_exchange.h in Headers */,
+				706EFF3126D34740001C950E /* stack.h in Headers */,
+				706EFEEF26D34740001C950E /* compiler_load.h in Headers */,
 				706EF00E26D15985001C950E /* KramFileHelper.h in Headers */,
 				706EF00F26D15985001C950E /* KramMipper.h in Headers */,
 				706EF01026D15985001C950E /* TaskSystem.h in Headers */,
+				706EFEA926D34740001C950E /* compiler_memory_barrier.h in Headers */,
+				706EFE5326D3473F001C950E /* arch_x86_store.h in Headers */,
+				706EFD6726D3473F001C950E /* eacompiler.h in Headers */,
 				706EF01126D15985001C950E /* squish.h in Headers */,
+				706EFDED26D3473F001C950E /* atomic_macros_load.h in Headers */,
+				706EFF5526D34740001C950E /* utility.h in Headers */,
+				706EFE1926D3473F001C950E /* atomic_flag_standalone.h in Headers */,
 				706EF01226D15985001C950E /* clusterfit.h in Headers */,
 				706EF01326D15985001C950E /* colourfit.h in Headers */,
+				706EFEFB26D34740001C950E /* function_help.h in Headers */,
+				706EFD9926D3473F001C950E /* fixed_function.h in Headers */,
 				706EF01426D15985001C950E /* alpha.h in Headers */,
 				706EF01526D15985001C950E /* singlecolourfit.h in Headers */,
+				706EFE0526D3473F001C950E /* atomic_macros_and_fetch.h in Headers */,
+				706EFE4326D3473F001C950E /* arch_x86_fetch_xor.h in Headers */,
+				706EFEED26D34740001C950E /* compiler_sub_fetch.h in Headers */,
+				706EFE9D26D34740001C950E /* compiler_msvc_cpu_pause.h in Headers */,
+				706EFEAB26D34740001C950E /* compiler_gcc_cpu_pause.h in Headers */,
+				706EFF0526D34740001C950E /* intrusive_hash_map.h in Headers */,
+				706EFE9526D34740001C950E /* compiler_msvc_exchange.h in Headers */,
 				706EF01626D15985001C950E /* maths.h in Headers */,
+				706EFEBF26D34740001C950E /* compiler_gcc_fetch_add.h in Headers */,
+				706EFE0F26D3473F001C950E /* atomic_macros_fetch_and.h in Headers */,
 				706EF01726D15985001C950E /* colourset.h in Headers */,
 				706EF01826D15985001C950E /* colourblock.h in Headers */,
+				706EFDAF26D3473F001C950E /* chrono.h in Headers */,
 				706EF01926D15985001C950E /* rangefit.h in Headers */,
+				706EFE8D26D3473F001C950E /* compiler_msvc_xor_fetch.h in Headers */,
 				706EF01A26D15985001C950E /* zstd.h in Headers */,
+				706EFF6126D34740001C950E /* iterator.h in Headers */,
+				706EFF0726D34740001C950E /* fixed_allocator.h in Headers */,
+				706EFE7526D3473F001C950E /* arch.h in Headers */,
+				706EFEBB26D34740001C950E /* compiler_gcc_store.h in Headers */,
+				706EFEF926D34740001C950E /* mem_fn.h in Headers */,
 				706EF01B26D15985001C950E /* lodepng.h in Headers */,
+				706EFDB326D3473F001C950E /* linked_ptr.h in Headers */,
+				706EFE1F26D3473F001C950E /* arch_load.h in Headers */,
 				706EF01C26D15985001C950E /* tmpfileplus.h in Headers */,
+				706EFEDB26D34740001C950E /* compiler_or_fetch.h in Headers */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@@ -875,82 +2368,347 @@
 			isa = PBXHeadersBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
+				706EFDB626D3473F001C950E /* fixed_slist.h in Headers */,
+				706EFD8E26D3473F001C950E /* call_traits.h in Headers */,
+				706EFDCE26D3473F001C950E /* fixed_pool.h in Headers */,
+				706EFF5226D34740001C950E /* core_allocator.h in Headers */,
+				706EFE3026D3473F001C950E /* arch_memory_barrier.h in Headers */,
+				706EFF3426D34740001C950E /* ratio.h in Headers */,
+				706EFE0826D3473F001C950E /* atomic_macros_cmpxchg_weak.h in Headers */,
 				706EF14B26D166C5001C950E /* EtcErrorMetric.h in Headers */,
+				706EFD6426D3473F001C950E /* version.h in Headers */,
+				706EFE1C26D3473F001C950E /* atomic_macros.h in Headers */,
+				706EFDE826D3473F001C950E /* atomic_push_compiler_options.h in Headers */,
+				706EFF5826D34740001C950E /* type_traits.h in Headers */,
+				706EFE3A26D3473F001C950E /* arch_thread_fence.h in Headers */,
+				706EFF6A26D34740001C950E /* string_view.h in Headers */,
 				706EF14C26D166C5001C950E /* EtcColor.h in Headers */,
+				706EFF5C26D34740001C950E /* fixed_hash_map.h in Headers */,
 				706EF14D26D166C5001C950E /* EtcDifferentialTrys.h in Headers */,
+				706EFF0A26D34740001C950E /* priority_queue.h in Headers */,
+				706EFF0426D34740001C950E /* red_black_tree.h in Headers */,
 				706EF14E26D166C5001C950E /* EtcBlock4x4Encoding_RGB8.h in Headers */,
+				706EFE6626D3473F001C950E /* arch_arm_load.h in Headers */,
+				706EFF3626D34740001C950E /* vector_map.h in Headers */,
+				706EFDD026D3473F001C950E /* config.h in Headers */,
 				706EF14F26D166C5001C950E /* EtcConfig.h in Headers */,
+				706EFF2C26D34740001C950E /* unique_ptr.h in Headers */,
 				706EF15026D166C5001C950E /* EtcBlock4x4Encoding_R11.h in Headers */,
+				706EFEF426D34740001C950E /* atomic.h in Headers */,
 				706EF15126D166C5001C950E /* EtcBlock4x4Encoding_RG11.h in Headers */,
+				706EFE2E26D3473F001C950E /* arch_add_fetch.h in Headers */,
+				706EFE5A26D3473F001C950E /* arch_x86_thread_fence.h in Headers */,
+				706EFF2626D34740001C950E /* fixed_string.h in Headers */,
+				706EFD8026D3473F001C950E /* tuple_vector.h in Headers */,
 				706EF15226D166C5001C950E /* EtcMath.h in Headers */,
 				706EF15326D166C5001C950E /* EtcIndividualTrys.h in Headers */,
+				706EFEAE26D34740001C950E /* compiler_gcc_thread_fence.h in Headers */,
+				706EFF2826D34740001C950E /* slist.h in Headers */,
+				706EFEA426D34740001C950E /* compiler_fetch_or.h in Headers */,
 				706EF15426D166C5001C950E /* EtcBlock4x4EncodingBits.h in Headers */,
+				706EFDBC26D3473F001C950E /* enable_shared.h in Headers */,
 				706EF15526D166C5001C950E /* EtcBlock4x4Encoding_RGB8A1.h in Headers */,
+				706EFE8626D3473F001C950E /* compiler_msvc_barrier.h in Headers */,
+				706EFE5026D3473F001C950E /* arch_x86_memory_barrier.h in Headers */,
+				706EFDDC26D3473F001C950E /* integer_sequence.h in Headers */,
+				706EFD9426D3473F001C950E /* adaptors.h in Headers */,
+				706EFDA226D3473F001C950E /* weak_ptr.h in Headers */,
+				706EFDD826D3473F001C950E /* pair_fwd_decls.h in Headers */,
+				706EFDFC26D3473F001C950E /* atomic_macros_base.h in Headers */,
+				706EFE2226D3473F001C950E /* arch_compiler_barrier.h in Headers */,
+				706EFF6626D34740001C950E /* queue.h in Headers */,
+				706EFEE226D34740001C950E /* compiler_fetch_sub.h in Headers */,
+				706EFD8826D3473F001C950E /* sort_extra.h in Headers */,
+				706EFE4826D3473F001C950E /* arch_x86_fetch_sub.h in Headers */,
+				706EFD7026D3473F001C950E /* nullptr.h in Headers */,
 				706EF15626D166C5001C950E /* EtcBlock4x4.h in Headers */,
+				706EFEA626D34740001C950E /* compiler_barrier.h in Headers */,
 				706EF15726D166C5001C950E /* Etc.h in Headers */,
+				706EFF0026D34740001C950E /* functional_base.h in Headers */,
+				706EFEBE26D34740001C950E /* compiler_gcc_signal_fence.h in Headers */,
+				706EFE0426D3473F001C950E /* atomic_macros_fetch_sub.h in Headers */,
+				706EFED226D34740001C950E /* compiler_gcc_load.h in Headers */,
 				706EF15826D166C5001C950E /* EtcImage.h in Headers */,
+				706EFEE826D34740001C950E /* compiler_cmpxchg_strong.h in Headers */,
+				706EFDE426D3473F001C950E /* atomic_integral.h in Headers */,
+				706EFDE026D3473F001C950E /* tuple_fwd_decls.h in Headers */,
+				706EFF6E26D34740001C950E /* basic_string.h in Headers */,
+				706EFDB226D3473F001C950E /* fixed_set.h in Headers */,
+				706EFDAC26D3473F001C950E /* initializer_list.h in Headers */,
+				706EFF1226D34740001C950E /* vector_set.h in Headers */,
+				706EFEDA26D34740001C950E /* compiler_store.h in Headers */,
+				706EFE4E26D3473F001C950E /* arch_x86_or_fetch.h in Headers */,
+				706EFE2426D3473F001C950E /* arch_fetch_and.h in Headers */,
+				706EFEB626D34740001C950E /* compiler_gcc.h in Headers */,
+				706EFE8A26D3473F001C950E /* compiler_msvc_and_fetch.h in Headers */,
+				706EFF2026D34740001C950E /* shared_ptr.h in Headers */,
+				706EFE4026D3473F001C950E /* arch_x86_fetch_or.h in Headers */,
+				706EFE1826D3473F001C950E /* atomic_memory_order.h in Headers */,
+				706EFF1626D34740001C950E /* string_map.h in Headers */,
+				706EFEE426D34740001C950E /* compiler_xor_fetch.h in Headers */,
 				706EF15926D166C5001C950E /* EtcBlock4x4Encoding_ETC1.h in Headers */,
 				706EF15A26D166C5001C950E /* EtcBlock4x4Encoding_RGBA8.h in Headers */,
+				706EFEF826D34740001C950E /* intrusive_hashtable.h in Headers */,
+				706EFE1626D3473F001C950E /* atomic_standalone.h in Headers */,
 				706EF15B26D166C5001C950E /* EtcColorFloatRGBA.h in Headers */,
+				706EFDC226D3473F001C950E /* generic_iterator.h in Headers */,
+				706EFE7026D3473F001C950E /* arch_fetch_xor.h in Headers */,
+				706EFE8226D3473F001C950E /* compiler_msvc_add_fetch.h in Headers */,
+				706EFDD426D3473F001C950E /* hashtable.h in Headers */,
+				706EFE1426D3473F001C950E /* atomic_macros_thread_fence.h in Headers */,
+				706EFD7426D3473F001C950E /* earesult.h in Headers */,
+				706EFEB426D34740001C950E /* compiler_gcc_barrier.h in Headers */,
+				706EFE7C26D3473F001C950E /* atomic_pop_compiler_options.h in Headers */,
+				706EFD9C26D3473F001C950E /* safe_ptr.h in Headers */,
+				706EFED026D34740001C950E /* compiler_gcc_fetch_and.h in Headers */,
+				706EFD7626D3473F001C950E /* eabase.h in Headers */,
+				706EFD6E26D3473F001C950E /* eahave.h in Headers */,
+				706EFF4A26D34740001C950E /* meta.h in Headers */,
+				706EFF1A26D34740001C950E /* fixed_substring.h in Headers */,
+				706EFE7A26D3473F001C950E /* atomic_asserts.h in Headers */,
+				706EFDD626D3473F001C950E /* type_pod.h in Headers */,
 				706EF15C26D166C5001C950E /* EtcBlock4x4Encoding.h in Headers */,
 				706EF15D26D166C5001C950E /* rgbcx.h in Headers */,
+				706EFF4826D34740001C950E /* string_hash_map.h in Headers */,
+				706EFEEA26D34740001C950E /* compiler_fetch_add.h in Headers */,
+				706EFE2A26D3473F001C950E /* arch_sub_fetch.h in Headers */,
+				706EFF1E26D34740001C950E /* numeric_limits.h in Headers */,
+				706EFDC026D3473F001C950E /* piecewise_construct_t.h in Headers */,
+				706EFE9226D34740001C950E /* compiler_msvc_fetch_or.h in Headers */,
+				706EFEA826D34740001C950E /* compiler_cpu_pause.h in Headers */,
+				706EFD8626D3473F001C950E /* fixed_tuple_vector.h in Headers */,
 				706EF15E26D166C5001C950E /* bc7enc.h in Headers */,
 				706EF15F26D166C5001C950E /* bc7decomp.h in Headers */,
+				706EFE9A26D34740001C950E /* compiler_msvc_fetch_and.h in Headers */,
+				706EFEF626D34740001C950E /* thread_support.h in Headers */,
+				706EFE6E26D3473F001C950E /* arch_arm_memory_barrier.h in Headers */,
 				706EF16026D166C5001C950E /* rgbcx_table4.h in Headers */,
+				706EFD6626D3473F001C950E /* eacompilertraits.h in Headers */,
+				706EFEA026D34740001C950E /* compiler_msvc_or_fetch.h in Headers */,
+				706EFE7826D3473F001C950E /* atomic_base_width.h in Headers */,
 				706EF16126D166C5001C950E /* astcenc_diagnostic_trace.h in Headers */,
 				706EF16226D166C5001C950E /* astcenc_vecmathlib.h in Headers */,
+				706EFE6426D3473F001C950E /* arch_or_fetch.h in Headers */,
+				706EFD9826D3473F001C950E /* intrusive_ptr.h in Headers */,
 				706EF16326D166C5001C950E /* astcenc_vecmathlib_avx2_8.h in Headers */,
 				706EF16426D166C5001C950E /* astcenc.h in Headers */,
+				706EFEFE26D34740001C950E /* memory_base.h in Headers */,
+				706EFD9626D3473F001C950E /* fixed_ring_buffer.h in Headers */,
+				706EFF4426D34740001C950E /* unordered_set.h in Headers */,
+				706EFE4226D3473F001C950E /* arch_x86_load.h in Headers */,
+				706EFF3026D34740001C950E /* deque.h in Headers */,
 				706EF16526D166C5001C950E /* astcenc_internal.h in Headers */,
 				706EF16626D166C5001C950E /* astcenc_vecmathlib_neon_armv7_4.h in Headers */,
 				706EF16726D166C5001C950E /* astcenc_vecmathlib_sse_4.h in Headers */,
+				706EFE6A26D3473F001C950E /* arch_arm.h in Headers */,
+				706EFEB226D34740001C950E /* compiler_gcc_fetch_xor.h in Headers */,
 				706EF16826D166C5001C950E /* astcenc_vecmathlib_neon_4.h in Headers */,
 				706EF16926D166C5001C950E /* astcenc_vecmathlib_none_4.h in Headers */,
+				706EFF5A26D34740001C950E /* vector.h in Headers */,
 				706EF16A26D166C5001C950E /* astcenc_vecmathlib_common_4.h in Headers */,
+				706EFDF426D3473F001C950E /* atomic_macros_cmpxchg_strong.h in Headers */,
+				706EFE3E26D3473F001C950E /* arch_x86.h in Headers */,
+				706EFE0026D3473F001C950E /* atomic_macros_signal_fence.h in Headers */,
+				706EFD7A26D3473F001C950E /* unordered_map.h in Headers */,
+				706EFDFA26D3473F001C950E /* atomic_macros_fetch_or.h in Headers */,
+				706EFDE626D3473F001C950E /* atomic_size_aligned.h in Headers */,
+				706EFE3226D3473F001C950E /* arch_fetch_sub.h in Headers */,
+				706EFED426D34740001C950E /* compiler_fetch_xor.h in Headers */,
+				706EFE2826D3473F001C950E /* arch_fetch_or.h in Headers */,
+				706EFF3E26D34740001C950E /* memory.h in Headers */,
+				706EFF1826D34740001C950E /* list.h in Headers */,
+				706EFF1426D34740001C950E /* shared_array.h in Headers */,
+				706EFF5426D34740001C950E /* scoped_array.h in Headers */,
+				706EFDA826D3473F001C950E /* allocator_malloc.h in Headers */,
+				706EFD8426D3473F001C950E /* ring_buffer.h in Headers */,
+				706EFEF226D34740001C950E /* atomic_casts.h in Headers */,
+				706EFD8C26D3473F001C950E /* list_map.h in Headers */,
+				706EFEC426D34740001C950E /* compiler_gcc_xor_fetch.h in Headers */,
+				706EFE0226D3473F001C950E /* atomic_macros_xor_fetch.h in Headers */,
+				706EFF2E26D34740001C950E /* intrusive_hash_set.h in Headers */,
+				706EFF3826D34740001C950E /* intrusive_list.h in Headers */,
 				706EF16B26D166C5001C950E /* astcenc_mathlib.h in Headers */,
+				706EFF4C26D34740001C950E /* sort.h in Headers */,
+				706EFDA026D3473F001C950E /* segmented_vector.h in Headers */,
+				706EFF2226D34740001C950E /* fixed_list.h in Headers */,
+				706EFE0E26D3473F001C950E /* atomic_macros_memory_barrier.h in Headers */,
+				706EFF3A26D34740001C950E /* heap.h in Headers */,
+				706EFDDA26D3473F001C950E /* type_transformations.h in Headers */,
+				706EFE4626D3473F001C950E /* arch_x86_cmpxchg_strong.h in Headers */,
+				706EFDBA26D3473F001C950E /* move_help.h in Headers */,
+				706EFDF626D3473F001C950E /* atomic_macros_all.h in Headers */,
+				706EFDF826D3473F001C950E /* atomic_macros_exchange.h in Headers */,
 				706EF16C26D166C5001C950E /* ateencoder.h in Headers */,
+				706EFE9826D34740001C950E /* compiler_msvc_signal_fence.h in Headers */,
+				706EFF0E26D34740001C950E /* optional.h in Headers */,
+				706EFEDE26D34740001C950E /* compiler_cmpxchg_weak.h in Headers */,
+				706EFDCC26D3473F001C950E /* type_properties.h in Headers */,
 				706EF16D26D166C5001C950E /* basisu_transcoder.h in Headers */,
+				706EFD6A26D3473F001C950E /* eaplatform.h in Headers */,
+				706EFE9026D3473F001C950E /* compiler_msvc.h in Headers */,
+				706EFF0226D34740001C950E /* function.h in Headers */,
+				706EFEE626D34740001C950E /* compiler_and_fetch.h in Headers */,
+				706EFE7426D3473F001C950E /* arch_cpu_pause.h in Headers */,
+				706EFE1226D3473F001C950E /* atomic_macros_or_fetch.h in Headers */,
+				706EFE0A26D3473F001C950E /* atomic_macros_fetch_add.h in Headers */,
+				706EFD8226D3473F001C950E /* lru_cache.h in Headers */,
+				706EFF7026D34740001C950E /* variant.h in Headers */,
+				706EFEC826D34740001C950E /* compiler_gcc_add_fetch.h in Headers */,
+				706EFDBE26D3473F001C950E /* function_detail.h in Headers */,
+				706EFDA626D3473F001C950E /* algorithm.h in Headers */,
+				706EFE8C26D3473F001C950E /* compiler_msvc_fetch_sub.h in Headers */,
 				706EF16E26D166C5001C950E /* basisu_containers.h in Headers */,
+				706EFD9026D3473F001C950E /* compressed_pair.h in Headers */,
+				706EFDF026D3473F001C950E /* atomic_macros_store.h in Headers */,
+				706EFED826D34740001C950E /* compiler_fetch_and.h in Headers */,
 				706EF16F26D166C5001C950E /* basisu_containers_impl.h in Headers */,
+				706EFF6026D34740001C950E /* finally.h in Headers */,
+				706EFE3426D3473F001C950E /* arch_xor_fetch.h in Headers */,
 				706EF17026D166C5001C950E /* basisu_transcoder_internal.h in Headers */,
+				706EFECC26D34740001C950E /* compiler_gcc_exchange.h in Headers */,
+				706EFF4226D34740001C950E /* fixed_vector.h in Headers */,
+				706EFE1E26D3473F001C950E /* atomic_flag.h in Headers */,
+				706EFD6C26D3473F001C950E /* eastdarg.h in Headers */,
+				706EFDCA26D3473F001C950E /* fill_help.h in Headers */,
+				706EFEEC26D34740001C950E /* compiler_signal_fence.h in Headers */,
 				706EF17126D166C5001C950E /* basisu_global_selector_cb.h in Headers */,
 				706EF17226D166C5001C950E /* basisu_transcoder_uastc.h in Headers */,
+				706EFECE26D34740001C950E /* compiler_gcc_fetch_or.h in Headers */,
 				706EF17326D166C5001C950E /* basisu_global_selector_palette.h in Headers */,
+				706EFD7C26D3473F001C950E /* fixed_hash_set.h in Headers */,
+				706EFE8426D3473F001C950E /* compiler_msvc_cmpxchg_weak.h in Headers */,
+				706EFDEC26D3473F001C950E /* atomic_macros_cpu_pause.h in Headers */,
+				706EFE2626D3473F001C950E /* arch_exchange.h in Headers */,
+				706EFD9226D3473F001C950E /* intrusive_sdlist.h in Headers */,
+				706EFE9C26D34740001C950E /* compiler_msvc_cmpxchg_strong.h in Headers */,
+				706EFE0C26D3473F001C950E /* atomic_macros_add_fetch.h in Headers */,
+				706EFE4C26D3473F001C950E /* arch_x86_and_fetch.h in Headers */,
+				706EFF5026D34740001C950E /* linked_array.h in Headers */,
 				706EF17426D166C5001C950E /* basisu.h in Headers */,
+				706EFD7226D3473F001C950E /* int128.h in Headers */,
+				706EFE8026D3473F001C950E /* compiler.h in Headers */,
+				706EFF6C26D34740001C950E /* set.h in Headers */,
+				706EFDC426D3473F001C950E /* type_fundamental.h in Headers */,
+				706EFDAE26D3473F001C950E /* numeric.h in Headers */,
+				706EFDF226D3473F001C950E /* atomic_macros_compiler_barrier.h in Headers */,
+				706EFF5E26D34740001C950E /* hash_set.h in Headers */,
 				706EF17526D166C5001C950E /* basisu_file_headers.h in Headers */,
+				706EFE8826D3473F001C950E /* compiler_msvc_fetch_add.h in Headers */,
+				706EFF4026D34740001C950E /* vector_multiset.h in Headers */,
 				706EF17626D166C5001C950E /* miniz.h in Headers */,
+				706EFE3C26D3473F001C950E /* arch_x86_exchange.h in Headers */,
 				706EF17726D166C5001C950E /* hedistance.h in Headers */,
 				706EF17826D166C5001C950E /* stb_rect_pack.h in Headers */,
+				706EFE5C26D3473F001C950E /* arch_x86_fetch_and.h in Headers */,
+				706EFDC626D3473F001C950E /* char_traits.h in Headers */,
 				706EF17926D166C5001C950E /* KramZipHelper.h in Headers */,
+				706EFD7E26D3473F001C950E /* bitset.h in Headers */,
+				706EFE6226D3473F001C950E /* arch_cmpxchg_strong.h in Headers */,
+				706EFEC226D34740001C950E /* compiler_gcc_and_fetch.h in Headers */,
+				706EFF3C26D34740001C950E /* tuple.h in Headers */,
 				706EF17A26D166C5001C950E /* KramSDFMipper.h in Headers */,
 				706EF17B26D166C5001C950E /* sse2neon.h in Headers */,
+				706EFE7226D3473F001C950E /* arch_signal_fence.h in Headers */,
+				706EFE4A26D3473F001C950E /* arch_x86_xor_fetch.h in Headers */,
 				706EF17C26D166C5001C950E /* KramConfig.h in Headers */,
+				706EFE3826D3473F001C950E /* arch_fetch_add.h in Headers */,
+				706EFDB826D3473F001C950E /* type_compound.h in Headers */,
+				706EFF0C26D34740001C950E /* vector_multimap.h in Headers */,
+				706EFDA426D3473F001C950E /* hash_map.h in Headers */,
+				706EFE5226D3473F001C950E /* arch_x86_fetch_add.h in Headers */,
+				706EFE9426D34740001C950E /* compiler_msvc_sub_fetch.h in Headers */,
 				706EF17D26D166C5001C950E /* KramLog.h in Headers */,
+				706EFEB826D34740001C950E /* compiler_gcc_cmpxchg_strong.h in Headers */,
+				706EFD8A26D3473F001C950E /* intrusive_slist.h in Headers */,
 				706EF17E26D166C5001C950E /* KramLib.h in Headers */,
+				706EFE5826D3473F001C950E /* arch_x86_sub_fetch.h in Headers */,
 				706EF17F26D166C5001C950E /* KramVersion.h in Headers */,
+				706EFF1026D34740001C950E /* functional.h in Headers */,
+				706EFD6226D3473F001C950E /* eaunits.h in Headers */,
+				706EFEB026D34740001C950E /* compiler_gcc_or_fetch.h in Headers */,
 				706EF18026D166C5001C950E /* KramImage.h in Headers */,
+				706EFEC626D34740001C950E /* compiler_gcc_fetch_sub.h in Headers */,
+				706EFF4626D34740001C950E /* span.h in Headers */,
+				706EFD9E26D3473F001C950E /* bitvector.h in Headers */,
+				706EFE6826D3473F001C950E /* arch_arm_thread_fence.h in Headers */,
+				706EFE3626D3473F001C950E /* arch_and_fetch.h in Headers */,
+				706EFF6426D34740001C950E /* any.h in Headers */,
+				706EFDDE26D3473F001C950E /* smart_ptr.h in Headers */,
+				706EFDAA26D3473F001C950E /* map.h in Headers */,
+				706EFDC826D3473F001C950E /* copy_help.h in Headers */,
+				706EFF2426D34740001C950E /* allocator.h in Headers */,
 				706EF18126D166C5001C950E /* win_mmap.h in Headers */,
+				706EFF1C26D34740001C950E /* fixed_map.h in Headers */,
+				706EFE5E26D3473F001C950E /* arch_x86_cmpxchg_weak.h in Headers */,
 				706EF18226D166C5001C950E /* Kram.h in Headers */,
+				706EFED626D34740001C950E /* compiler_thread_fence.h in Headers */,
+				706EFF7226D34740001C950E /* atomic.h in Headers */,
+				706EFF2A26D34740001C950E /* array.h in Headers */,
+				706EFEE026D34740001C950E /* compiler_add_fetch.h in Headers */,
+				706EFECA26D34740001C950E /* compiler_gcc_sub_fetch.h in Headers */,
 				706EF18326D166C5001C950E /* KTXImage.h in Headers */,
+				706EFDD226D3473F001C950E /* in_place_t.h in Headers */,
+				706EFF4E26D34740001C950E /* core_allocator_adapter.h in Headers */,
 				706EF18426D166C5001C950E /* KramImageInfo.h in Headers */,
 				706EF18526D166C5001C950E /* KramTimer.h in Headers */,
+				706EFDFE26D3473F001C950E /* atomic_macros_sub_fetch.h in Headers */,
+				706EFE2C26D3473F001C950E /* arch_store.h in Headers */,
+				706EFEA226D34740001C950E /* compiler_msvc_fetch_xor.h in Headers */,
+				706EFDEA26D3473F001C950E /* atomic_macros_fetch_xor.h in Headers */,
+				706EFD7826D3473F001C950E /* scoped_ptr.h in Headers */,
 				706EF18626D166C5001C950E /* KramMmapHelper.h in Headers */,
+				706EFDE226D3473F001C950E /* atomic_pointer.h in Headers */,
+				706EFEBA26D34740001C950E /* compiler_gcc_cmpxchg_weak.h in Headers */,
+				706EFE6026D3473F001C950E /* arch_cmpxchg_weak.h in Headers */,
+				706EFF6826D34740001C950E /* random.h in Headers */,
+				706EFE6C26D3473F001C950E /* arch_arm_store.h in Headers */,
 				706EF18726D166C5001C950E /* float4a.h in Headers */,
+				706EFE5626D3473F001C950E /* arch_x86_add_fetch.h in Headers */,
+				706EFE7E26D3473F001C950E /* compiler_exchange.h in Headers */,
+				706EFF3226D34740001C950E /* stack.h in Headers */,
+				706EFEF026D34740001C950E /* compiler_load.h in Headers */,
 				706EF18826D166C5001C950E /* KramFileHelper.h in Headers */,
 				706EF18926D166C5001C950E /* KramMipper.h in Headers */,
 				706EF18A26D166C5001C950E /* TaskSystem.h in Headers */,
+				706EFEAA26D34740001C950E /* compiler_memory_barrier.h in Headers */,
+				706EFE5426D3473F001C950E /* arch_x86_store.h in Headers */,
+				706EFD6826D3473F001C950E /* eacompiler.h in Headers */,
 				706EF18B26D166C5001C950E /* squish.h in Headers */,
+				706EFDEE26D3473F001C950E /* atomic_macros_load.h in Headers */,
+				706EFF5626D34740001C950E /* utility.h in Headers */,
+				706EFE1A26D3473F001C950E /* atomic_flag_standalone.h in Headers */,
 				706EF18C26D166C5001C950E /* clusterfit.h in Headers */,
 				706EF18D26D166C5001C950E /* colourfit.h in Headers */,
+				706EFEFC26D34740001C950E /* function_help.h in Headers */,
+				706EFD9A26D3473F001C950E /* fixed_function.h in Headers */,
 				706EF18E26D166C5001C950E /* alpha.h in Headers */,
 				706EF18F26D166C5001C950E /* singlecolourfit.h in Headers */,
+				706EFE0626D3473F001C950E /* atomic_macros_and_fetch.h in Headers */,
+				706EFE4426D3473F001C950E /* arch_x86_fetch_xor.h in Headers */,
+				706EFEEE26D34740001C950E /* compiler_sub_fetch.h in Headers */,
+				706EFE9E26D34740001C950E /* compiler_msvc_cpu_pause.h in Headers */,
+				706EFEAC26D34740001C950E /* compiler_gcc_cpu_pause.h in Headers */,
+				706EFF0626D34740001C950E /* intrusive_hash_map.h in Headers */,
+				706EFE9626D34740001C950E /* compiler_msvc_exchange.h in Headers */,
 				706EF19026D166C5001C950E /* maths.h in Headers */,
+				706EFEC026D34740001C950E /* compiler_gcc_fetch_add.h in Headers */,
+				706EFE1026D3473F001C950E /* atomic_macros_fetch_and.h in Headers */,
 				706EF19126D166C5001C950E /* colourset.h in Headers */,
 				706EF19226D166C5001C950E /* colourblock.h in Headers */,
+				706EFDB026D3473F001C950E /* chrono.h in Headers */,
 				706EF19326D166C5001C950E /* rangefit.h in Headers */,
+				706EFE8E26D3473F001C950E /* compiler_msvc_xor_fetch.h in Headers */,
 				706EF19426D166C5001C950E /* zstd.h in Headers */,
+				706EFF6226D34740001C950E /* iterator.h in Headers */,
+				706EFF0826D34740001C950E /* fixed_allocator.h in Headers */,
+				706EFE7626D3473F001C950E /* arch.h in Headers */,
+				706EFEBC26D34740001C950E /* compiler_gcc_store.h in Headers */,
+				706EFEFA26D34740001C950E /* mem_fn.h in Headers */,
 				706EF19526D166C5001C950E /* lodepng.h in Headers */,
+				706EFDB426D3473F001C950E /* linked_ptr.h in Headers */,
+				706EFE2026D3473F001C950E /* arch_load.h in Headers */,
 				706EF19626D166C5001C950E /* tmpfileplus.h in Headers */,
+				706EFEDC26D34740001C950E /* compiler_or_fetch.h in Headers */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@@ -1048,6 +2806,7 @@
 				706EEF8E26D1595D001C950E /* astcenc_pick_best_endpoint_format.cpp in Sources */,
 				706EEF8F26D1595D001C950E /* astcenc_integer_sequence.cpp in Sources */,
 				706EEF9026D1595D001C950E /* astcenc_compute_variance.cpp in Sources */,
+				706EFF7726D34740001C950E /* string.cpp in Sources */,
 				706EEF9126D1595D001C950E /* astcenc_quantization.cpp in Sources */,
 				706EEF9226D1595D001C950E /* astcenc_color_unquantize.cpp in Sources */,
 				706EEF9326D1595D001C950E /* astcenc_mathlib_softfloat.cpp in Sources */,
@@ -1057,11 +2816,13 @@
 				706EEF9726D1595D001C950E /* astcenc_partition_tables.cpp in Sources */,
 				706EEF9826D1595D001C950E /* astcenc_decompress_symbolic.cpp in Sources */,
 				706EEF9926D1595D001C950E /* astcenc_color_quantize.cpp in Sources */,
+				706EFF7526D34740001C950E /* assert.cpp in Sources */,
 				706EEF9A26D1595D001C950E /* astcenc_platform_isa_detection.cpp in Sources */,
 				706EEF9B26D1595D001C950E /* astcenc_image.cpp in Sources */,
 				706EEF9C26D1595D001C950E /* astcenc_kmeans_partitioning.cpp in Sources */,
 				706EEF9D26D1595D001C950E /* astcenc_compress_symbolic.cpp in Sources */,
 				706EEF9E26D1595D001C950E /* astcenc_ideal_endpoints_and_weights.cpp in Sources */,
+				706EFF8526D34740001C950E /* fixed_pool.cpp in Sources */,
 				706EEF9F26D1595D001C950E /* astcenc_mathlib.cpp in Sources */,
 				706EEFA026D1595D001C950E /* astcenc_find_best_partitioning.cpp in Sources */,
 				706EEFA126D1595D001C950E /* astcenc_diagnostic_trace.cpp in Sources */,
@@ -1071,6 +2832,8 @@
 				706EEFA526D1595D001C950E /* astcenc_entry.cpp in Sources */,
 				706EEFA626D1595D001C950E /* astcenc_averages_and_directions.cpp in Sources */,
 				706EEFA726D1595D001C950E /* basisu_transcoder.cpp in Sources */,
+				706EFF8326D34740001C950E /* red_black_tree.cpp in Sources */,
+				706EFF7F26D34740001C950E /* intrusive_list.cpp in Sources */,
 				706EEFA826D1595D001C950E /* miniz.cpp in Sources */,
 				706EEFA926D1595D001C950E /* hedistance.cpp in Sources */,
 				706EEFAA26D1595D001C950E /* KramTimer.cpp in Sources */,
@@ -1079,25 +2842,30 @@
 				706EEFAD26D1595D001C950E /* KramZipHelper.cpp in Sources */,
 				706EEFAE26D1595D001C950E /* TaskSystem.cpp in Sources */,
 				706EEFAF26D1595D001C950E /* KramFileHelper.cpp in Sources */,
+				706EFF7B26D34740001C950E /* numeric_limits.cpp in Sources */,
 				706EEFB026D1595D001C950E /* KramImageInfo.cpp in Sources */,
 				706EEFB126D1595D001C950E /* KramImage.cpp in Sources */,
 				706EEFB226D1595D001C950E /* KramLog.cpp in Sources */,
 				706EEFB326D1595D001C950E /* KramSDFMipper.cpp in Sources */,
 				706EEFB426D1595D001C950E /* KramMmapHelper.cpp in Sources */,
 				706EEFB526D1595D001C950E /* float4a.cpp in Sources */,
+				706EFF7326D34740001C950E /* thread_support.cpp in Sources */,
 				706EEFB626D1595D001C950E /* Kram.cpp in Sources */,
 				706EEFB726D1595D001C950E /* squish.cpp in Sources */,
 				706EEFB826D1595D001C950E /* colourset.cpp in Sources */,
+				706EFF8126D34740001C950E /* hashtable.cpp in Sources */,
 				706EEFB926D1595D001C950E /* clusterfit.cpp in Sources */,
 				706EEFBB26D1595D001C950E /* rangefit.cpp in Sources */,
 				706EEFBC26D1595D001C950E /* alpha.cpp in Sources */,
 				706EEFBD26D1595D001C950E /* colourblock.cpp in Sources */,
 				706EEFBE26D1595E001C950E /* colourfit.cpp in Sources */,
+				706EFF7926D34740001C950E /* allocator_eastl.cpp in Sources */,
 				706EEFC026D1595E001C950E /* maths.cpp in Sources */,
 				706EEFC126D1595E001C950E /* singlecolourfit.cpp in Sources */,
 				706EEFC226D1595E001C950E /* zstd.cpp in Sources */,
 				706EEFC326D1595E001C950E /* zstddeclib.cpp in Sources */,
 				706EEFC426D1595E001C950E /* lodepng.cpp in Sources */,
+				706EFF7D26D34740001C950E /* atomic.cpp in Sources */,
 				706EEFC526D1595E001C950E /* tmpfileplus.cpp in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
@@ -1124,6 +2892,7 @@
 				706EF1A626D166C5001C950E /* astcenc_pick_best_endpoint_format.cpp in Sources */,
 				706EF1A726D166C5001C950E /* astcenc_integer_sequence.cpp in Sources */,
 				706EF1A826D166C5001C950E /* astcenc_compute_variance.cpp in Sources */,
+				706EFF7826D34740001C950E /* string.cpp in Sources */,
 				706EF1A926D166C5001C950E /* astcenc_quantization.cpp in Sources */,
 				706EF1AA26D166C5001C950E /* astcenc_color_unquantize.cpp in Sources */,
 				706EF1AB26D166C5001C950E /* astcenc_mathlib_softfloat.cpp in Sources */,
@@ -1133,11 +2902,13 @@
 				706EF1AF26D166C5001C950E /* astcenc_partition_tables.cpp in Sources */,
 				706EF1B026D166C5001C950E /* astcenc_decompress_symbolic.cpp in Sources */,
 				706EF1B126D166C5001C950E /* astcenc_color_quantize.cpp in Sources */,
+				706EFF7626D34740001C950E /* assert.cpp in Sources */,
 				706EF1B226D166C5001C950E /* astcenc_platform_isa_detection.cpp in Sources */,
 				706EF1B326D166C5001C950E /* astcenc_image.cpp in Sources */,
 				706EF1B426D166C5001C950E /* astcenc_kmeans_partitioning.cpp in Sources */,
 				706EF1B526D166C5001C950E /* astcenc_compress_symbolic.cpp in Sources */,
 				706EF1B626D166C5001C950E /* astcenc_ideal_endpoints_and_weights.cpp in Sources */,
+				706EFF8626D34740001C950E /* fixed_pool.cpp in Sources */,
 				706EF1B726D166C5001C950E /* astcenc_mathlib.cpp in Sources */,
 				706EF1B826D166C5001C950E /* astcenc_find_best_partitioning.cpp in Sources */,
 				706EF1B926D166C5001C950E /* astcenc_diagnostic_trace.cpp in Sources */,
@@ -1147,6 +2918,8 @@
 				706EF1BD26D166C5001C950E /* astcenc_entry.cpp in Sources */,
 				706EF1BE26D166C5001C950E /* astcenc_averages_and_directions.cpp in Sources */,
 				706EF1BF26D166C5001C950E /* basisu_transcoder.cpp in Sources */,
+				706EFF8426D34740001C950E /* red_black_tree.cpp in Sources */,
+				706EFF8026D34740001C950E /* intrusive_list.cpp in Sources */,
 				706EF1C026D166C5001C950E /* miniz.cpp in Sources */,
 				706EF1C126D166C5001C950E /* hedistance.cpp in Sources */,
 				706EF1C226D166C5001C950E /* KramTimer.cpp in Sources */,
@@ -1155,25 +2928,30 @@
 				706EF1C526D166C5001C950E /* KramZipHelper.cpp in Sources */,
 				706EF1C626D166C5001C950E /* TaskSystem.cpp in Sources */,
 				706EF1C726D166C5001C950E /* KramFileHelper.cpp in Sources */,
+				706EFF7C26D34740001C950E /* numeric_limits.cpp in Sources */,
 				706EF1C826D166C5001C950E /* KramImageInfo.cpp in Sources */,
 				706EF1C926D166C5001C950E /* KramImage.cpp in Sources */,
 				706EF1CA26D166C5001C950E /* KramLog.cpp in Sources */,
 				706EF1CB26D166C5001C950E /* KramSDFMipper.cpp in Sources */,
 				706EF1CC26D166C5001C950E /* KramMmapHelper.cpp in Sources */,
 				706EF1CD26D166C5001C950E /* float4a.cpp in Sources */,
+				706EFF7426D34740001C950E /* thread_support.cpp in Sources */,
 				706EF1CE26D166C5001C950E /* Kram.cpp in Sources */,
 				706EF1CF26D166C5001C950E /* squish.cpp in Sources */,
 				706EF1D026D166C5001C950E /* colourset.cpp in Sources */,
+				706EFF8226D34740001C950E /* hashtable.cpp in Sources */,
 				706EF1D126D166C5001C950E /* clusterfit.cpp in Sources */,
 				706EF1D226D166C5001C950E /* rangefit.cpp in Sources */,
 				706EF1D326D166C5001C950E /* alpha.cpp in Sources */,
 				706EF1D426D166C5001C950E /* colourblock.cpp in Sources */,
 				706EF1D526D166C5001C950E /* colourfit.cpp in Sources */,
+				706EFF7A26D34740001C950E /* allocator_eastl.cpp in Sources */,
 				706EF1D626D166C5001C950E /* maths.cpp in Sources */,
 				706EF1D726D166C5001C950E /* singlecolourfit.cpp in Sources */,
 				706EF1D826D166C5001C950E /* zstd.cpp in Sources */,
 				706EF1D926D166C5001C950E /* zstddeclib.cpp in Sources */,
 				706EF1DA26D166C5001C950E /* lodepng.cpp in Sources */,
+				706EFF7E26D34740001C950E /* atomic.cpp in Sources */,
 				706EF1DB26D166C5001C950E /* tmpfileplus.cpp in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
@@ -1234,10 +3012,24 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
+				HEADER_SEARCH_PATHS = (
+					"$(PROJECT_DIR)/../libkram/kram",
+					"$(PROJECT_DIR)/../libkram/eastl/include",
+				);
 				MACOSX_DEPLOYMENT_TARGET = 10.15;
 				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
 				MTL_FAST_MATH = YES;
 				ONLY_ACTIVE_ARCH = YES;
+				OTHER_CFLAGS = (
+					"-DCOMPILE_ASTCENC=1",
+					"-DCOMPILE_ATE=1",
+					"-DCOMPILE_ETCENC=1",
+					"-DCOMPILE_SQUISH=1",
+					"-DCOMPILE_BCENC=1",
+					"-DCOMPILE_EASTL=1",
+					"-include",
+					KramConfig.h,
+				);
 				SDKROOT = macosx;
 			};
 			name = Debug;
@@ -1289,9 +3081,23 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
+				HEADER_SEARCH_PATHS = (
+					"$(PROJECT_DIR)/../libkram/kram",
+					"$(PROJECT_DIR)/../libkram/eastl/include",
+				);
 				MACOSX_DEPLOYMENT_TARGET = 10.15;
 				MTL_ENABLE_DEBUG_INFO = NO;
 				MTL_FAST_MATH = YES;
+				OTHER_CFLAGS = (
+					"-DCOMPILE_ASTCENC=1",
+					"-DCOMPILE_ATE=1",
+					"-DCOMPILE_ETCENC=1",
+					"-DCOMPILE_SQUISH=1",
+					"-DCOMPILE_BCENC=1",
+					"-DCOMPILE_EASTL=1",
+					"-include",
+					KramConfig.h,
+				);
 				SDKROOT = macosx;
 			};
 			name = Release;
@@ -1319,16 +3125,6 @@
 				GCC_WARN_NON_VIRTUAL_DESTRUCTOR = YES;
 				GCC_WARN_SHADOW = YES;
 				GCC_WARN_STRICT_SELECTOR_MATCH = YES;
-				HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram/**";
-				OTHER_CFLAGS = (
-					"-DCOMPILE_ASTCENC=1",
-					"-DCOMPILE_ATE=1",
-					"-DCOMPILE_ETCENC=1",
-					"-DCOMPILE_SQUISH=1",
-					"-DCOMPILE_BCENC=1",
-					"-include",
-					KramConfig.h,
-				);
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SKIP_INSTALL = YES;
 				SYSTEM_HEADER_SEARCH_PATHS = "";
@@ -1359,16 +3155,6 @@
 				GCC_WARN_NON_VIRTUAL_DESTRUCTOR = YES;
 				GCC_WARN_SHADOW = YES;
 				GCC_WARN_STRICT_SELECTOR_MATCH = YES;
-				HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram/**";
-				OTHER_CFLAGS = (
-					"-DCOMPILE_ASTCENC=1",
-					"-DCOMPILE_ATE=1",
-					"-DCOMPILE_ETCENC=1",
-					"-DCOMPILE_SQUISH=1",
-					"-DCOMPILE_BCENC=1",
-					"-include",
-					KramConfig.h,
-				);
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SKIP_INSTALL = YES;
 				SYSTEM_HEADER_SEARCH_PATHS = "";
@@ -1388,17 +3174,7 @@
 				GCC_PREFIX_HEADER = "$(PROJECT_DIR)/../libkram/kram/KramConfig.h";
 				GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
 				GCC_WARN_SHADOW = YES;
-				HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram/**";
 				IPHONEOS_DEPLOYMENT_TARGET = 14.1;
-				OTHER_CFLAGS = (
-					"-DCOMPILE_ASTCENC=1",
-					"-DCOMPILE_ATE=1",
-					"-DCOMPILE_ETCENC=1",
-					"-DCOMPILE_SQUISH=1",
-					"-DCOMPILE_BCENC=1",
-					"-include",
-					KramConfig.h,
-				);
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SDKROOT = iphoneos;
 				SKIP_INSTALL = YES;
@@ -1421,17 +3197,7 @@
 				GCC_PREFIX_HEADER = "$(PROJECT_DIR)/../libkram/kram/KramConfig.h";
 				GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
 				GCC_WARN_SHADOW = YES;
-				HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram/**";
 				IPHONEOS_DEPLOYMENT_TARGET = 14.1;
-				OTHER_CFLAGS = (
-					"-DCOMPILE_ASTCENC=1",
-					"-DCOMPILE_ATE=1",
-					"-DCOMPILE_ETCENC=1",
-					"-DCOMPILE_SQUISH=1",
-					"-DCOMPILE_BCENC=1",
-					"-include",
-					KramConfig.h,
-				);
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SDKROOT = iphoneos;
 				SKIP_INSTALL = YES;
diff --git a/build2/kramv.xcodeproj/project.pbxproj b/build2/kramv.xcodeproj/project.pbxproj
index a4f967f8..48bbaefc 100644
--- a/build2/kramv.xcodeproj/project.pbxproj
+++ b/build2/kramv.xcodeproj/project.pbxproj
@@ -258,10 +258,19 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
+				HEADER_SEARCH_PATHS = (
+					"$(PROJECT_DIR)/../libkram/kram",
+					"$(PROJECT_DIR)/../libkram/eastl/include",
+				);
 				MACOSX_DEPLOYMENT_TARGET = 11.1;
 				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
 				MTL_FAST_MATH = YES;
 				ONLY_ACTIVE_ARCH = YES;
+				OTHER_CFLAGS = (
+					"-DCOMPILE_EASTL=1",
+					"-include",
+					KramConfig.h,
+				);
 				SDKROOT = macosx;
 			};
 			name = Debug;
@@ -311,9 +320,18 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
+				HEADER_SEARCH_PATHS = (
+					"$(PROJECT_DIR)/../libkram/kram",
+					"$(PROJECT_DIR)/../libkram/eastl/include",
+				);
 				MACOSX_DEPLOYMENT_TARGET = 11.1;
 				MTL_ENABLE_DEBUG_INFO = NO;
 				MTL_FAST_MATH = YES;
+				OTHER_CFLAGS = (
+					"-DCOMPILE_EASTL=1",
+					"-include",
+					KramConfig.h,
+				);
 				SDKROOT = macosx;
 			};
 			name = Release;
@@ -339,7 +357,6 @@
 				GCC_WARN_NON_VIRTUAL_DESTRUCTOR = YES;
 				GCC_WARN_SHADOW = YES;
 				GCC_WARN_STRICT_SELECTOR_MATCH = YES;
-				HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram";
 				INFOPLIST_FILE = "$(SRCROOT)/../kramv/Info.plist";
 				LD_RUNPATH_SEARCH_PATHS = (
 					"$(inherited)",
@@ -373,7 +390,6 @@
 				GCC_WARN_NON_VIRTUAL_DESTRUCTOR = YES;
 				GCC_WARN_SHADOW = YES;
 				GCC_WARN_STRICT_SELECTOR_MATCH = YES;
-				HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram";
 				INFOPLIST_FILE = "$(SRCROOT)/../kramv/Info.plist";
 				LD_RUNPATH_SEARCH_PATHS = (
 					"$(inherited)",
diff --git a/libkram/eastl/include/EASTL/string.h b/libkram/eastl/include/EASTL/basic_string.h
similarity index 100%
rename from libkram/eastl/include/EASTL/string.h
rename to libkram/eastl/include/EASTL/basic_string.h
diff --git a/libkram/eastl/include/EASTL/fixed_string.h b/libkram/eastl/include/EASTL/fixed_string.h
index a888cf71..b92d4f6b 100644
--- a/libkram/eastl/include/EASTL/fixed_string.h
+++ b/libkram/eastl/include/EASTL/fixed_string.h
@@ -12,7 +12,7 @@
 #pragma once
 
 #include <EASTL/internal/config.h>
-#include <EASTL/string.h>
+#include <EASTL/basic_string.h>
 #include <EASTL/internal/fixed_pool.h>
 
 
diff --git a/libkram/eastl/include/EASTL/fixed_substring.h b/libkram/eastl/include/EASTL/fixed_substring.h
index c2bfe643..4333e550 100644
--- a/libkram/eastl/include/EASTL/fixed_substring.h
+++ b/libkram/eastl/include/EASTL/fixed_substring.h
@@ -5,7 +5,7 @@
 
 #pragma once
 
-#include <EASTL/string.h>
+#include <EASTL/basic_string.h>
 
 namespace eastl
 {
diff --git a/libkram/eastl/include/EASTL/internal/char_traits.h b/libkram/eastl/include/EASTL/internal/char_traits.h
index c27afb7f..1cca731e 100644
--- a/libkram/eastl/include/EASTL/internal/char_traits.h
+++ b/libkram/eastl/include/EASTL/internal/char_traits.h
@@ -18,7 +18,7 @@
 
 EA_DISABLE_ALL_VC_WARNINGS()
 #include <ctype.h>              // toupper, etc.
-#include <string.h>             // memset, etc.
+#include <cstring>             // memset, etc.
 EA_RESTORE_ALL_VC_WARNINGS()
 
 namespace eastl
diff --git a/libkram/eastl/include/EASTL/string_hash_map.h b/libkram/eastl/include/EASTL/string_hash_map.h
index 031a1eac..74d99704 100644
--- a/libkram/eastl/include/EASTL/string_hash_map.h
+++ b/libkram/eastl/include/EASTL/string_hash_map.h
@@ -5,7 +5,7 @@
 #pragma once
 
 #include <EASTL/hash_map.h>
-#include <EASTL/string.h>
+#include <EASTL/basic_string.h>
 
 namespace eastl
 {
diff --git a/libkram/eastl/include/EASTL/string_map.h b/libkram/eastl/include/EASTL/string_map.h
index 761cb5a3..4094f6e0 100644
--- a/libkram/eastl/include/EASTL/string_map.h
+++ b/libkram/eastl/include/EASTL/string_map.h
@@ -5,7 +5,7 @@
 #pragma once
 
 #include <EASTL/map.h>
-#include <EASTL/string.h>
+#include <EASTL/basic_string.h>
 
 namespace eastl
 {
diff --git a/libkram/eastl/source/assert.cpp b/libkram/eastl/source/assert.cpp
index 7d32d5ec..56520f12 100644
--- a/libkram/eastl/source/assert.cpp
+++ b/libkram/eastl/source/assert.cpp
@@ -4,7 +4,7 @@
 
 
 #include <EASTL/internal/config.h>
-#include <EASTL/string.h>
+#include <EASTL/basic_string.h>
 #include <EABase/eabase.h>
 
 #if defined(EA_PLATFORM_MICROSOFT)
diff --git a/libkram/eastl/source/string.cpp b/libkram/eastl/source/string.cpp
index ae73f114..5b18a184 100644
--- a/libkram/eastl/source/string.cpp
+++ b/libkram/eastl/source/string.cpp
@@ -4,7 +4,7 @@
 
 
 #include <EASTL/internal/config.h>
-#include <EASTL/string.h>
+#include <EASTL/basic_string.h>
 #include <EABase/eabase.h>
 #include <string.h>
 
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index 9952293b..d2b3bdf0 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -159,14 +159,15 @@
 #if USE_EASTL
 
 // this probably breaks all STL debugging
-#include <EASTL/algorithm.h"  // for max
+#include <EASTL/algorithm.h>  // for max
 //#include "EASTL/atomic.h"
-#include <EASTL/deque.h>
 #include <EASTL/functional.h>
+
+#include <EASTL/deque.h>
 #include <EASTL/iterator.h>    // for copy_if on Win
 #include <EASTL/shared_ptr.h>  // includes thread/mutex
 #include <EASTL/sort.h>
-#include <EASTL/string.h>
+#include <EASTL/basic_string.h>
 #include <EASTL/unique_ptr.h>
 #include <EASTL/unordered_map.h>
 #include <EASTL/vector.h>
diff --git a/libkram/kram/TaskSystem.h b/libkram/kram/TaskSystem.h
index e35a23d9..d740cec7 100644
--- a/libkram/kram/TaskSystem.h
+++ b/libkram/kram/TaskSystem.h
@@ -83,7 +83,7 @@ class notification_queue {
             lock_t lock{_mutex};
             // TODO: fix this construct, it's saying no matching sctor for eastl::deque<eastl::function<void ()>>>::value_type
 #if USE_EASTL
-            _q.emplace_back(forward<F>(f));
+            //_q.emplace_back(forward<F>(f));
 #else
             _q.emplace_back(forward<F>(f));
 #endif
diff --git a/libkram/transcoder/basisu.h b/libkram/transcoder/basisu.h
index 4489f79c..4557c17c 100644
--- a/libkram/transcoder/basisu.h
+++ b/libkram/transcoder/basisu.h
@@ -58,10 +58,10 @@
 #include <limits.h>
 #include <stdint.h>
 
-#include <algorithm>
+//#include <algorithm>
 #include <limits>
-#include <functional>
-#include <iterator>
+//#include <functional>
+//#include <iterator>
 #include <type_traits>
 #include <assert.h>
 #include <random>
diff --git a/libkram/transcoder/basisu_containers.h b/libkram/transcoder/basisu_containers.h
index 0359c9f8..1f0f9d5d 100644
--- a/libkram/transcoder/basisu_containers.h
+++ b/libkram/transcoder/basisu_containers.h
@@ -4,7 +4,7 @@
 #include <stdio.h>
 #include <stdint.h>
 #include <assert.h>
-#include <algorithm>
+//#include <algorithm>
 
 #if !__clang__ && __GNUC__ // Grumble clang grumble
 #pragma GCC diagnostic push
diff --git a/libkram/transcoder/basisu_global_selector_palette.h b/libkram/transcoder/basisu_global_selector_palette.h
index 8bedf947..2ef4ec4b 100644
--- a/libkram/transcoder/basisu_global_selector_palette.h
+++ b/libkram/transcoder/basisu_global_selector_palette.h
@@ -16,7 +16,7 @@
 // limitations under the License.
 #pragma once
 #include "basisu_transcoder_internal.h"
-#include <algorithm>
+//#include <algorithm>
 
 namespace basist
 {

From 34338d7426a825de3528c5e16c1cc17264e2dc08 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 22 Aug 2021 22:10:27 -0700
Subject: [PATCH 172/901] kram - remove EASTL/internal/atomic folder

This doesn't compile on github due to paths not found that are present.
---
 build2/kram.xcodeproj/project.pbxproj | 892 --------------------------
 1 file changed, 892 deletions(-)

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index be4eeb91..20b6b8f9 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -427,282 +427,6 @@
 		706EFDDE26D3473F001C950E /* smart_ptr.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8326D3473F001C950E /* smart_ptr.h */; };
 		706EFDDF26D3473F001C950E /* tuple_fwd_decls.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8426D3473F001C950E /* tuple_fwd_decls.h */; };
 		706EFDE026D3473F001C950E /* tuple_fwd_decls.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8426D3473F001C950E /* tuple_fwd_decls.h */; };
-		706EFDE126D3473F001C950E /* atomic_pointer.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8626D3473F001C950E /* atomic_pointer.h */; };
-		706EFDE226D3473F001C950E /* atomic_pointer.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8626D3473F001C950E /* atomic_pointer.h */; };
-		706EFDE326D3473F001C950E /* atomic_integral.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8726D3473F001C950E /* atomic_integral.h */; };
-		706EFDE426D3473F001C950E /* atomic_integral.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8726D3473F001C950E /* atomic_integral.h */; };
-		706EFDE526D3473F001C950E /* atomic_size_aligned.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8826D3473F001C950E /* atomic_size_aligned.h */; };
-		706EFDE626D3473F001C950E /* atomic_size_aligned.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8826D3473F001C950E /* atomic_size_aligned.h */; };
-		706EFDE726D3473F001C950E /* atomic_push_compiler_options.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8926D3473F001C950E /* atomic_push_compiler_options.h */; };
-		706EFDE826D3473F001C950E /* atomic_push_compiler_options.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8926D3473F001C950E /* atomic_push_compiler_options.h */; };
-		706EFDE926D3473F001C950E /* atomic_macros_fetch_xor.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8B26D3473F001C950E /* atomic_macros_fetch_xor.h */; };
-		706EFDEA26D3473F001C950E /* atomic_macros_fetch_xor.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8B26D3473F001C950E /* atomic_macros_fetch_xor.h */; };
-		706EFDEB26D3473F001C950E /* atomic_macros_cpu_pause.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8C26D3473F001C950E /* atomic_macros_cpu_pause.h */; };
-		706EFDEC26D3473F001C950E /* atomic_macros_cpu_pause.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8C26D3473F001C950E /* atomic_macros_cpu_pause.h */; };
-		706EFDED26D3473F001C950E /* atomic_macros_load.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8D26D3473F001C950E /* atomic_macros_load.h */; };
-		706EFDEE26D3473F001C950E /* atomic_macros_load.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8D26D3473F001C950E /* atomic_macros_load.h */; };
-		706EFDEF26D3473F001C950E /* atomic_macros_store.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8E26D3473F001C950E /* atomic_macros_store.h */; };
-		706EFDF026D3473F001C950E /* atomic_macros_store.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8E26D3473F001C950E /* atomic_macros_store.h */; };
-		706EFDF126D3473F001C950E /* atomic_macros_compiler_barrier.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8F26D3473F001C950E /* atomic_macros_compiler_barrier.h */; };
-		706EFDF226D3473F001C950E /* atomic_macros_compiler_barrier.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8F26D3473F001C950E /* atomic_macros_compiler_barrier.h */; };
-		706EFDF326D3473F001C950E /* atomic_macros_cmpxchg_strong.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9026D3473F001C950E /* atomic_macros_cmpxchg_strong.h */; };
-		706EFDF426D3473F001C950E /* atomic_macros_cmpxchg_strong.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9026D3473F001C950E /* atomic_macros_cmpxchg_strong.h */; };
-		706EFDF526D3473F001C950E /* atomic_macros_all.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9126D3473F001C950E /* atomic_macros_all.h */; };
-		706EFDF626D3473F001C950E /* atomic_macros_all.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9126D3473F001C950E /* atomic_macros_all.h */; };
-		706EFDF726D3473F001C950E /* atomic_macros_exchange.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9226D3473F001C950E /* atomic_macros_exchange.h */; };
-		706EFDF826D3473F001C950E /* atomic_macros_exchange.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9226D3473F001C950E /* atomic_macros_exchange.h */; };
-		706EFDF926D3473F001C950E /* atomic_macros_fetch_or.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9326D3473F001C950E /* atomic_macros_fetch_or.h */; };
-		706EFDFA26D3473F001C950E /* atomic_macros_fetch_or.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9326D3473F001C950E /* atomic_macros_fetch_or.h */; };
-		706EFDFB26D3473F001C950E /* atomic_macros_base.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9426D3473F001C950E /* atomic_macros_base.h */; };
-		706EFDFC26D3473F001C950E /* atomic_macros_base.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9426D3473F001C950E /* atomic_macros_base.h */; };
-		706EFDFD26D3473F001C950E /* atomic_macros_sub_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9526D3473F001C950E /* atomic_macros_sub_fetch.h */; };
-		706EFDFE26D3473F001C950E /* atomic_macros_sub_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9526D3473F001C950E /* atomic_macros_sub_fetch.h */; };
-		706EFDFF26D3473F001C950E /* atomic_macros_signal_fence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9626D3473F001C950E /* atomic_macros_signal_fence.h */; };
-		706EFE0026D3473F001C950E /* atomic_macros_signal_fence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9626D3473F001C950E /* atomic_macros_signal_fence.h */; };
-		706EFE0126D3473F001C950E /* atomic_macros_xor_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9726D3473F001C950E /* atomic_macros_xor_fetch.h */; };
-		706EFE0226D3473F001C950E /* atomic_macros_xor_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9726D3473F001C950E /* atomic_macros_xor_fetch.h */; };
-		706EFE0326D3473F001C950E /* atomic_macros_fetch_sub.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9826D3473F001C950E /* atomic_macros_fetch_sub.h */; };
-		706EFE0426D3473F001C950E /* atomic_macros_fetch_sub.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9826D3473F001C950E /* atomic_macros_fetch_sub.h */; };
-		706EFE0526D3473F001C950E /* atomic_macros_and_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9926D3473F001C950E /* atomic_macros_and_fetch.h */; };
-		706EFE0626D3473F001C950E /* atomic_macros_and_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9926D3473F001C950E /* atomic_macros_and_fetch.h */; };
-		706EFE0726D3473F001C950E /* atomic_macros_cmpxchg_weak.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9A26D3473F001C950E /* atomic_macros_cmpxchg_weak.h */; };
-		706EFE0826D3473F001C950E /* atomic_macros_cmpxchg_weak.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9A26D3473F001C950E /* atomic_macros_cmpxchg_weak.h */; };
-		706EFE0926D3473F001C950E /* atomic_macros_fetch_add.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9B26D3473F001C950E /* atomic_macros_fetch_add.h */; };
-		706EFE0A26D3473F001C950E /* atomic_macros_fetch_add.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9B26D3473F001C950E /* atomic_macros_fetch_add.h */; };
-		706EFE0B26D3473F001C950E /* atomic_macros_add_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9C26D3473F001C950E /* atomic_macros_add_fetch.h */; };
-		706EFE0C26D3473F001C950E /* atomic_macros_add_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9C26D3473F001C950E /* atomic_macros_add_fetch.h */; };
-		706EFE0D26D3473F001C950E /* atomic_macros_memory_barrier.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9D26D3473F001C950E /* atomic_macros_memory_barrier.h */; };
-		706EFE0E26D3473F001C950E /* atomic_macros_memory_barrier.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9D26D3473F001C950E /* atomic_macros_memory_barrier.h */; };
-		706EFE0F26D3473F001C950E /* atomic_macros_fetch_and.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9E26D3473F001C950E /* atomic_macros_fetch_and.h */; };
-		706EFE1026D3473F001C950E /* atomic_macros_fetch_and.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9E26D3473F001C950E /* atomic_macros_fetch_and.h */; };
-		706EFE1126D3473F001C950E /* atomic_macros_or_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9F26D3473F001C950E /* atomic_macros_or_fetch.h */; };
-		706EFE1226D3473F001C950E /* atomic_macros_or_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC9F26D3473F001C950E /* atomic_macros_or_fetch.h */; };
-		706EFE1326D3473F001C950E /* atomic_macros_thread_fence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCA026D3473F001C950E /* atomic_macros_thread_fence.h */; };
-		706EFE1426D3473F001C950E /* atomic_macros_thread_fence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCA026D3473F001C950E /* atomic_macros_thread_fence.h */; };
-		706EFE1526D3473F001C950E /* atomic_standalone.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCA126D3473F001C950E /* atomic_standalone.h */; };
-		706EFE1626D3473F001C950E /* atomic_standalone.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCA126D3473F001C950E /* atomic_standalone.h */; };
-		706EFE1726D3473F001C950E /* atomic_memory_order.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCA226D3473F001C950E /* atomic_memory_order.h */; };
-		706EFE1826D3473F001C950E /* atomic_memory_order.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCA226D3473F001C950E /* atomic_memory_order.h */; };
-		706EFE1926D3473F001C950E /* atomic_flag_standalone.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCA326D3473F001C950E /* atomic_flag_standalone.h */; };
-		706EFE1A26D3473F001C950E /* atomic_flag_standalone.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCA326D3473F001C950E /* atomic_flag_standalone.h */; };
-		706EFE1B26D3473F001C950E /* atomic_macros.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCA426D3473F001C950E /* atomic_macros.h */; };
-		706EFE1C26D3473F001C950E /* atomic_macros.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCA426D3473F001C950E /* atomic_macros.h */; };
-		706EFE1D26D3473F001C950E /* atomic_flag.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCA526D3473F001C950E /* atomic_flag.h */; };
-		706EFE1E26D3473F001C950E /* atomic_flag.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCA526D3473F001C950E /* atomic_flag.h */; };
-		706EFE1F26D3473F001C950E /* arch_load.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCA726D3473F001C950E /* arch_load.h */; };
-		706EFE2026D3473F001C950E /* arch_load.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCA726D3473F001C950E /* arch_load.h */; };
-		706EFE2126D3473F001C950E /* arch_compiler_barrier.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCA826D3473F001C950E /* arch_compiler_barrier.h */; };
-		706EFE2226D3473F001C950E /* arch_compiler_barrier.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCA826D3473F001C950E /* arch_compiler_barrier.h */; };
-		706EFE2326D3473F001C950E /* arch_fetch_and.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCA926D3473F001C950E /* arch_fetch_and.h */; };
-		706EFE2426D3473F001C950E /* arch_fetch_and.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCA926D3473F001C950E /* arch_fetch_and.h */; };
-		706EFE2526D3473F001C950E /* arch_exchange.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCAA26D3473F001C950E /* arch_exchange.h */; };
-		706EFE2626D3473F001C950E /* arch_exchange.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCAA26D3473F001C950E /* arch_exchange.h */; };
-		706EFE2726D3473F001C950E /* arch_fetch_or.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCAB26D3473F001C950E /* arch_fetch_or.h */; };
-		706EFE2826D3473F001C950E /* arch_fetch_or.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCAB26D3473F001C950E /* arch_fetch_or.h */; };
-		706EFE2926D3473F001C950E /* arch_sub_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCAC26D3473F001C950E /* arch_sub_fetch.h */; };
-		706EFE2A26D3473F001C950E /* arch_sub_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCAC26D3473F001C950E /* arch_sub_fetch.h */; };
-		706EFE2B26D3473F001C950E /* arch_store.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCAD26D3473F001C950E /* arch_store.h */; };
-		706EFE2C26D3473F001C950E /* arch_store.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCAD26D3473F001C950E /* arch_store.h */; };
-		706EFE2D26D3473F001C950E /* arch_add_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCAE26D3473F001C950E /* arch_add_fetch.h */; };
-		706EFE2E26D3473F001C950E /* arch_add_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCAE26D3473F001C950E /* arch_add_fetch.h */; };
-		706EFE2F26D3473F001C950E /* arch_memory_barrier.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCAF26D3473F001C950E /* arch_memory_barrier.h */; };
-		706EFE3026D3473F001C950E /* arch_memory_barrier.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCAF26D3473F001C950E /* arch_memory_barrier.h */; };
-		706EFE3126D3473F001C950E /* arch_fetch_sub.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCB026D3473F001C950E /* arch_fetch_sub.h */; };
-		706EFE3226D3473F001C950E /* arch_fetch_sub.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCB026D3473F001C950E /* arch_fetch_sub.h */; };
-		706EFE3326D3473F001C950E /* arch_xor_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCB126D3473F001C950E /* arch_xor_fetch.h */; };
-		706EFE3426D3473F001C950E /* arch_xor_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCB126D3473F001C950E /* arch_xor_fetch.h */; };
-		706EFE3526D3473F001C950E /* arch_and_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCB226D3473F001C950E /* arch_and_fetch.h */; };
-		706EFE3626D3473F001C950E /* arch_and_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCB226D3473F001C950E /* arch_and_fetch.h */; };
-		706EFE3726D3473F001C950E /* arch_fetch_add.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCB326D3473F001C950E /* arch_fetch_add.h */; };
-		706EFE3826D3473F001C950E /* arch_fetch_add.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCB326D3473F001C950E /* arch_fetch_add.h */; };
-		706EFE3926D3473F001C950E /* arch_thread_fence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCB426D3473F001C950E /* arch_thread_fence.h */; };
-		706EFE3A26D3473F001C950E /* arch_thread_fence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCB426D3473F001C950E /* arch_thread_fence.h */; };
-		706EFE3B26D3473F001C950E /* arch_x86_exchange.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCB626D3473F001C950E /* arch_x86_exchange.h */; };
-		706EFE3C26D3473F001C950E /* arch_x86_exchange.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCB626D3473F001C950E /* arch_x86_exchange.h */; };
-		706EFE3D26D3473F001C950E /* arch_x86.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCB726D3473F001C950E /* arch_x86.h */; };
-		706EFE3E26D3473F001C950E /* arch_x86.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCB726D3473F001C950E /* arch_x86.h */; };
-		706EFE3F26D3473F001C950E /* arch_x86_fetch_or.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCB826D3473F001C950E /* arch_x86_fetch_or.h */; };
-		706EFE4026D3473F001C950E /* arch_x86_fetch_or.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCB826D3473F001C950E /* arch_x86_fetch_or.h */; };
-		706EFE4126D3473F001C950E /* arch_x86_load.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCB926D3473F001C950E /* arch_x86_load.h */; };
-		706EFE4226D3473F001C950E /* arch_x86_load.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCB926D3473F001C950E /* arch_x86_load.h */; };
-		706EFE4326D3473F001C950E /* arch_x86_fetch_xor.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCBA26D3473F001C950E /* arch_x86_fetch_xor.h */; };
-		706EFE4426D3473F001C950E /* arch_x86_fetch_xor.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCBA26D3473F001C950E /* arch_x86_fetch_xor.h */; };
-		706EFE4526D3473F001C950E /* arch_x86_cmpxchg_strong.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCBB26D3473F001C950E /* arch_x86_cmpxchg_strong.h */; };
-		706EFE4626D3473F001C950E /* arch_x86_cmpxchg_strong.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCBB26D3473F001C950E /* arch_x86_cmpxchg_strong.h */; };
-		706EFE4726D3473F001C950E /* arch_x86_fetch_sub.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCBC26D3473F001C950E /* arch_x86_fetch_sub.h */; };
-		706EFE4826D3473F001C950E /* arch_x86_fetch_sub.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCBC26D3473F001C950E /* arch_x86_fetch_sub.h */; };
-		706EFE4926D3473F001C950E /* arch_x86_xor_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCBD26D3473F001C950E /* arch_x86_xor_fetch.h */; };
-		706EFE4A26D3473F001C950E /* arch_x86_xor_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCBD26D3473F001C950E /* arch_x86_xor_fetch.h */; };
-		706EFE4B26D3473F001C950E /* arch_x86_and_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCBE26D3473F001C950E /* arch_x86_and_fetch.h */; };
-		706EFE4C26D3473F001C950E /* arch_x86_and_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCBE26D3473F001C950E /* arch_x86_and_fetch.h */; };
-		706EFE4D26D3473F001C950E /* arch_x86_or_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCBF26D3473F001C950E /* arch_x86_or_fetch.h */; };
-		706EFE4E26D3473F001C950E /* arch_x86_or_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCBF26D3473F001C950E /* arch_x86_or_fetch.h */; };
-		706EFE4F26D3473F001C950E /* arch_x86_memory_barrier.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCC026D3473F001C950E /* arch_x86_memory_barrier.h */; };
-		706EFE5026D3473F001C950E /* arch_x86_memory_barrier.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCC026D3473F001C950E /* arch_x86_memory_barrier.h */; };
-		706EFE5126D3473F001C950E /* arch_x86_fetch_add.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCC126D3473F001C950E /* arch_x86_fetch_add.h */; };
-		706EFE5226D3473F001C950E /* arch_x86_fetch_add.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCC126D3473F001C950E /* arch_x86_fetch_add.h */; };
-		706EFE5326D3473F001C950E /* arch_x86_store.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCC226D3473F001C950E /* arch_x86_store.h */; };
-		706EFE5426D3473F001C950E /* arch_x86_store.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCC226D3473F001C950E /* arch_x86_store.h */; };
-		706EFE5526D3473F001C950E /* arch_x86_add_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCC326D3473F001C950E /* arch_x86_add_fetch.h */; };
-		706EFE5626D3473F001C950E /* arch_x86_add_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCC326D3473F001C950E /* arch_x86_add_fetch.h */; };
-		706EFE5726D3473F001C950E /* arch_x86_sub_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCC426D3473F001C950E /* arch_x86_sub_fetch.h */; };
-		706EFE5826D3473F001C950E /* arch_x86_sub_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCC426D3473F001C950E /* arch_x86_sub_fetch.h */; };
-		706EFE5926D3473F001C950E /* arch_x86_thread_fence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCC526D3473F001C950E /* arch_x86_thread_fence.h */; };
-		706EFE5A26D3473F001C950E /* arch_x86_thread_fence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCC526D3473F001C950E /* arch_x86_thread_fence.h */; };
-		706EFE5B26D3473F001C950E /* arch_x86_fetch_and.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCC626D3473F001C950E /* arch_x86_fetch_and.h */; };
-		706EFE5C26D3473F001C950E /* arch_x86_fetch_and.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCC626D3473F001C950E /* arch_x86_fetch_and.h */; };
-		706EFE5D26D3473F001C950E /* arch_x86_cmpxchg_weak.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCC726D3473F001C950E /* arch_x86_cmpxchg_weak.h */; };
-		706EFE5E26D3473F001C950E /* arch_x86_cmpxchg_weak.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCC726D3473F001C950E /* arch_x86_cmpxchg_weak.h */; };
-		706EFE5F26D3473F001C950E /* arch_cmpxchg_weak.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCC826D3473F001C950E /* arch_cmpxchg_weak.h */; };
-		706EFE6026D3473F001C950E /* arch_cmpxchg_weak.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCC826D3473F001C950E /* arch_cmpxchg_weak.h */; };
-		706EFE6126D3473F001C950E /* arch_cmpxchg_strong.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCC926D3473F001C950E /* arch_cmpxchg_strong.h */; };
-		706EFE6226D3473F001C950E /* arch_cmpxchg_strong.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCC926D3473F001C950E /* arch_cmpxchg_strong.h */; };
-		706EFE6326D3473F001C950E /* arch_or_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCCA26D3473F001C950E /* arch_or_fetch.h */; };
-		706EFE6426D3473F001C950E /* arch_or_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCCA26D3473F001C950E /* arch_or_fetch.h */; };
-		706EFE6526D3473F001C950E /* arch_arm_load.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCCC26D3473F001C950E /* arch_arm_load.h */; };
-		706EFE6626D3473F001C950E /* arch_arm_load.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCCC26D3473F001C950E /* arch_arm_load.h */; };
-		706EFE6726D3473F001C950E /* arch_arm_thread_fence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCCD26D3473F001C950E /* arch_arm_thread_fence.h */; };
-		706EFE6826D3473F001C950E /* arch_arm_thread_fence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCCD26D3473F001C950E /* arch_arm_thread_fence.h */; };
-		706EFE6926D3473F001C950E /* arch_arm.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCCE26D3473F001C950E /* arch_arm.h */; };
-		706EFE6A26D3473F001C950E /* arch_arm.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCCE26D3473F001C950E /* arch_arm.h */; };
-		706EFE6B26D3473F001C950E /* arch_arm_store.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCCF26D3473F001C950E /* arch_arm_store.h */; };
-		706EFE6C26D3473F001C950E /* arch_arm_store.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCCF26D3473F001C950E /* arch_arm_store.h */; };
-		706EFE6D26D3473F001C950E /* arch_arm_memory_barrier.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCD026D3473F001C950E /* arch_arm_memory_barrier.h */; };
-		706EFE6E26D3473F001C950E /* arch_arm_memory_barrier.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCD026D3473F001C950E /* arch_arm_memory_barrier.h */; };
-		706EFE6F26D3473F001C950E /* arch_fetch_xor.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCD126D3473F001C950E /* arch_fetch_xor.h */; };
-		706EFE7026D3473F001C950E /* arch_fetch_xor.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCD126D3473F001C950E /* arch_fetch_xor.h */; };
-		706EFE7126D3473F001C950E /* arch_signal_fence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCD226D3473F001C950E /* arch_signal_fence.h */; };
-		706EFE7226D3473F001C950E /* arch_signal_fence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCD226D3473F001C950E /* arch_signal_fence.h */; };
-		706EFE7326D3473F001C950E /* arch_cpu_pause.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCD326D3473F001C950E /* arch_cpu_pause.h */; };
-		706EFE7426D3473F001C950E /* arch_cpu_pause.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCD326D3473F001C950E /* arch_cpu_pause.h */; };
-		706EFE7526D3473F001C950E /* arch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCD426D3473F001C950E /* arch.h */; };
-		706EFE7626D3473F001C950E /* arch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCD426D3473F001C950E /* arch.h */; };
-		706EFE7726D3473F001C950E /* atomic_base_width.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCD526D3473F001C950E /* atomic_base_width.h */; };
-		706EFE7826D3473F001C950E /* atomic_base_width.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCD526D3473F001C950E /* atomic_base_width.h */; };
-		706EFE7926D3473F001C950E /* atomic_asserts.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCD626D3473F001C950E /* atomic_asserts.h */; };
-		706EFE7A26D3473F001C950E /* atomic_asserts.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCD626D3473F001C950E /* atomic_asserts.h */; };
-		706EFE7B26D3473F001C950E /* atomic_pop_compiler_options.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCD726D3473F001C950E /* atomic_pop_compiler_options.h */; };
-		706EFE7C26D3473F001C950E /* atomic_pop_compiler_options.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCD726D3473F001C950E /* atomic_pop_compiler_options.h */; };
-		706EFE7D26D3473F001C950E /* compiler_exchange.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCD926D3473F001C950E /* compiler_exchange.h */; };
-		706EFE7E26D3473F001C950E /* compiler_exchange.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCD926D3473F001C950E /* compiler_exchange.h */; };
-		706EFE7F26D3473F001C950E /* compiler.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCDA26D3473F001C950E /* compiler.h */; };
-		706EFE8026D3473F001C950E /* compiler.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCDA26D3473F001C950E /* compiler.h */; };
-		706EFE8126D3473F001C950E /* compiler_msvc_add_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCDC26D3473F001C950E /* compiler_msvc_add_fetch.h */; };
-		706EFE8226D3473F001C950E /* compiler_msvc_add_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCDC26D3473F001C950E /* compiler_msvc_add_fetch.h */; };
-		706EFE8326D3473F001C950E /* compiler_msvc_cmpxchg_weak.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCDD26D3473F001C950E /* compiler_msvc_cmpxchg_weak.h */; };
-		706EFE8426D3473F001C950E /* compiler_msvc_cmpxchg_weak.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCDD26D3473F001C950E /* compiler_msvc_cmpxchg_weak.h */; };
-		706EFE8526D3473F001C950E /* compiler_msvc_barrier.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCDE26D3473F001C950E /* compiler_msvc_barrier.h */; };
-		706EFE8626D3473F001C950E /* compiler_msvc_barrier.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCDE26D3473F001C950E /* compiler_msvc_barrier.h */; };
-		706EFE8726D3473F001C950E /* compiler_msvc_fetch_add.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCDF26D3473F001C950E /* compiler_msvc_fetch_add.h */; };
-		706EFE8826D3473F001C950E /* compiler_msvc_fetch_add.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCDF26D3473F001C950E /* compiler_msvc_fetch_add.h */; };
-		706EFE8926D3473F001C950E /* compiler_msvc_and_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCE026D3473F001C950E /* compiler_msvc_and_fetch.h */; };
-		706EFE8A26D3473F001C950E /* compiler_msvc_and_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCE026D3473F001C950E /* compiler_msvc_and_fetch.h */; };
-		706EFE8B26D3473F001C950E /* compiler_msvc_fetch_sub.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCE126D3473F001C950E /* compiler_msvc_fetch_sub.h */; };
-		706EFE8C26D3473F001C950E /* compiler_msvc_fetch_sub.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCE126D3473F001C950E /* compiler_msvc_fetch_sub.h */; };
-		706EFE8D26D3473F001C950E /* compiler_msvc_xor_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCE226D3473F001C950E /* compiler_msvc_xor_fetch.h */; };
-		706EFE8E26D3473F001C950E /* compiler_msvc_xor_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCE226D3473F001C950E /* compiler_msvc_xor_fetch.h */; };
-		706EFE8F26D3473F001C950E /* compiler_msvc.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCE326D3473F001C950E /* compiler_msvc.h */; };
-		706EFE9026D3473F001C950E /* compiler_msvc.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCE326D3473F001C950E /* compiler_msvc.h */; };
-		706EFE9126D3473F001C950E /* compiler_msvc_fetch_or.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCE426D3473F001C950E /* compiler_msvc_fetch_or.h */; };
-		706EFE9226D34740001C950E /* compiler_msvc_fetch_or.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCE426D3473F001C950E /* compiler_msvc_fetch_or.h */; };
-		706EFE9326D34740001C950E /* compiler_msvc_sub_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCE526D3473F001C950E /* compiler_msvc_sub_fetch.h */; };
-		706EFE9426D34740001C950E /* compiler_msvc_sub_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCE526D3473F001C950E /* compiler_msvc_sub_fetch.h */; };
-		706EFE9526D34740001C950E /* compiler_msvc_exchange.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCE626D3473F001C950E /* compiler_msvc_exchange.h */; };
-		706EFE9626D34740001C950E /* compiler_msvc_exchange.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCE626D3473F001C950E /* compiler_msvc_exchange.h */; };
-		706EFE9726D34740001C950E /* compiler_msvc_signal_fence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCE726D3473F001C950E /* compiler_msvc_signal_fence.h */; };
-		706EFE9826D34740001C950E /* compiler_msvc_signal_fence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCE726D3473F001C950E /* compiler_msvc_signal_fence.h */; };
-		706EFE9926D34740001C950E /* compiler_msvc_fetch_and.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCE826D3473F001C950E /* compiler_msvc_fetch_and.h */; };
-		706EFE9A26D34740001C950E /* compiler_msvc_fetch_and.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCE826D3473F001C950E /* compiler_msvc_fetch_and.h */; };
-		706EFE9B26D34740001C950E /* compiler_msvc_cmpxchg_strong.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCE926D3473F001C950E /* compiler_msvc_cmpxchg_strong.h */; };
-		706EFE9C26D34740001C950E /* compiler_msvc_cmpxchg_strong.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCE926D3473F001C950E /* compiler_msvc_cmpxchg_strong.h */; };
-		706EFE9D26D34740001C950E /* compiler_msvc_cpu_pause.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCEA26D3473F001C950E /* compiler_msvc_cpu_pause.h */; };
-		706EFE9E26D34740001C950E /* compiler_msvc_cpu_pause.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCEA26D3473F001C950E /* compiler_msvc_cpu_pause.h */; };
-		706EFE9F26D34740001C950E /* compiler_msvc_or_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCEB26D3473F001C950E /* compiler_msvc_or_fetch.h */; };
-		706EFEA026D34740001C950E /* compiler_msvc_or_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCEB26D3473F001C950E /* compiler_msvc_or_fetch.h */; };
-		706EFEA126D34740001C950E /* compiler_msvc_fetch_xor.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCEC26D3473F001C950E /* compiler_msvc_fetch_xor.h */; };
-		706EFEA226D34740001C950E /* compiler_msvc_fetch_xor.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCEC26D3473F001C950E /* compiler_msvc_fetch_xor.h */; };
-		706EFEA326D34740001C950E /* compiler_fetch_or.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCED26D3473F001C950E /* compiler_fetch_or.h */; };
-		706EFEA426D34740001C950E /* compiler_fetch_or.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCED26D3473F001C950E /* compiler_fetch_or.h */; };
-		706EFEA526D34740001C950E /* compiler_barrier.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCEE26D3473F001C950E /* compiler_barrier.h */; };
-		706EFEA626D34740001C950E /* compiler_barrier.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCEE26D3473F001C950E /* compiler_barrier.h */; };
-		706EFEA726D34740001C950E /* compiler_cpu_pause.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCEF26D3473F001C950E /* compiler_cpu_pause.h */; };
-		706EFEA826D34740001C950E /* compiler_cpu_pause.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCEF26D3473F001C950E /* compiler_cpu_pause.h */; };
-		706EFEA926D34740001C950E /* compiler_memory_barrier.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCF026D3473F001C950E /* compiler_memory_barrier.h */; };
-		706EFEAA26D34740001C950E /* compiler_memory_barrier.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCF026D3473F001C950E /* compiler_memory_barrier.h */; };
-		706EFEAB26D34740001C950E /* compiler_gcc_cpu_pause.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCF226D3473F001C950E /* compiler_gcc_cpu_pause.h */; };
-		706EFEAC26D34740001C950E /* compiler_gcc_cpu_pause.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCF226D3473F001C950E /* compiler_gcc_cpu_pause.h */; };
-		706EFEAD26D34740001C950E /* compiler_gcc_thread_fence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCF326D3473F001C950E /* compiler_gcc_thread_fence.h */; };
-		706EFEAE26D34740001C950E /* compiler_gcc_thread_fence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCF326D3473F001C950E /* compiler_gcc_thread_fence.h */; };
-		706EFEAF26D34740001C950E /* compiler_gcc_or_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCF426D3473F001C950E /* compiler_gcc_or_fetch.h */; };
-		706EFEB026D34740001C950E /* compiler_gcc_or_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCF426D3473F001C950E /* compiler_gcc_or_fetch.h */; };
-		706EFEB126D34740001C950E /* compiler_gcc_fetch_xor.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCF526D3473F001C950E /* compiler_gcc_fetch_xor.h */; };
-		706EFEB226D34740001C950E /* compiler_gcc_fetch_xor.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCF526D3473F001C950E /* compiler_gcc_fetch_xor.h */; };
-		706EFEB326D34740001C950E /* compiler_gcc_barrier.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCF626D3473F001C950E /* compiler_gcc_barrier.h */; };
-		706EFEB426D34740001C950E /* compiler_gcc_barrier.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCF626D3473F001C950E /* compiler_gcc_barrier.h */; };
-		706EFEB526D34740001C950E /* compiler_gcc.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCF726D3473F001C950E /* compiler_gcc.h */; };
-		706EFEB626D34740001C950E /* compiler_gcc.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCF726D3473F001C950E /* compiler_gcc.h */; };
-		706EFEB726D34740001C950E /* compiler_gcc_cmpxchg_strong.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCF826D3473F001C950E /* compiler_gcc_cmpxchg_strong.h */; };
-		706EFEB826D34740001C950E /* compiler_gcc_cmpxchg_strong.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCF826D3473F001C950E /* compiler_gcc_cmpxchg_strong.h */; };
-		706EFEB926D34740001C950E /* compiler_gcc_cmpxchg_weak.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCF926D3473F001C950E /* compiler_gcc_cmpxchg_weak.h */; };
-		706EFEBA26D34740001C950E /* compiler_gcc_cmpxchg_weak.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCF926D3473F001C950E /* compiler_gcc_cmpxchg_weak.h */; };
-		706EFEBB26D34740001C950E /* compiler_gcc_store.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCFA26D3473F001C950E /* compiler_gcc_store.h */; };
-		706EFEBC26D34740001C950E /* compiler_gcc_store.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCFA26D3473F001C950E /* compiler_gcc_store.h */; };
-		706EFEBD26D34740001C950E /* compiler_gcc_signal_fence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCFB26D3473F001C950E /* compiler_gcc_signal_fence.h */; };
-		706EFEBE26D34740001C950E /* compiler_gcc_signal_fence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCFB26D3473F001C950E /* compiler_gcc_signal_fence.h */; };
-		706EFEBF26D34740001C950E /* compiler_gcc_fetch_add.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCFC26D3473F001C950E /* compiler_gcc_fetch_add.h */; };
-		706EFEC026D34740001C950E /* compiler_gcc_fetch_add.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCFC26D3473F001C950E /* compiler_gcc_fetch_add.h */; };
-		706EFEC126D34740001C950E /* compiler_gcc_and_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCFD26D3473F001C950E /* compiler_gcc_and_fetch.h */; };
-		706EFEC226D34740001C950E /* compiler_gcc_and_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCFD26D3473F001C950E /* compiler_gcc_and_fetch.h */; };
-		706EFEC326D34740001C950E /* compiler_gcc_xor_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCFE26D3473F001C950E /* compiler_gcc_xor_fetch.h */; };
-		706EFEC426D34740001C950E /* compiler_gcc_xor_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCFE26D3473F001C950E /* compiler_gcc_xor_fetch.h */; };
-		706EFEC526D34740001C950E /* compiler_gcc_fetch_sub.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCFF26D3473F001C950E /* compiler_gcc_fetch_sub.h */; };
-		706EFEC626D34740001C950E /* compiler_gcc_fetch_sub.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFCFF26D3473F001C950E /* compiler_gcc_fetch_sub.h */; };
-		706EFEC726D34740001C950E /* compiler_gcc_add_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0026D3473F001C950E /* compiler_gcc_add_fetch.h */; };
-		706EFEC826D34740001C950E /* compiler_gcc_add_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0026D3473F001C950E /* compiler_gcc_add_fetch.h */; };
-		706EFEC926D34740001C950E /* compiler_gcc_sub_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0126D3473F001C950E /* compiler_gcc_sub_fetch.h */; };
-		706EFECA26D34740001C950E /* compiler_gcc_sub_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0126D3473F001C950E /* compiler_gcc_sub_fetch.h */; };
-		706EFECB26D34740001C950E /* compiler_gcc_exchange.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0226D3473F001C950E /* compiler_gcc_exchange.h */; };
-		706EFECC26D34740001C950E /* compiler_gcc_exchange.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0226D3473F001C950E /* compiler_gcc_exchange.h */; };
-		706EFECD26D34740001C950E /* compiler_gcc_fetch_or.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0326D3473F001C950E /* compiler_gcc_fetch_or.h */; };
-		706EFECE26D34740001C950E /* compiler_gcc_fetch_or.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0326D3473F001C950E /* compiler_gcc_fetch_or.h */; };
-		706EFECF26D34740001C950E /* compiler_gcc_fetch_and.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0426D3473F001C950E /* compiler_gcc_fetch_and.h */; };
-		706EFED026D34740001C950E /* compiler_gcc_fetch_and.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0426D3473F001C950E /* compiler_gcc_fetch_and.h */; };
-		706EFED126D34740001C950E /* compiler_gcc_load.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0526D3473F001C950E /* compiler_gcc_load.h */; };
-		706EFED226D34740001C950E /* compiler_gcc_load.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0526D3473F001C950E /* compiler_gcc_load.h */; };
-		706EFED326D34740001C950E /* compiler_fetch_xor.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0626D3473F001C950E /* compiler_fetch_xor.h */; };
-		706EFED426D34740001C950E /* compiler_fetch_xor.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0626D3473F001C950E /* compiler_fetch_xor.h */; };
-		706EFED526D34740001C950E /* compiler_thread_fence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0726D3473F001C950E /* compiler_thread_fence.h */; };
-		706EFED626D34740001C950E /* compiler_thread_fence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0726D3473F001C950E /* compiler_thread_fence.h */; };
-		706EFED726D34740001C950E /* compiler_fetch_and.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0826D3473F001C950E /* compiler_fetch_and.h */; };
-		706EFED826D34740001C950E /* compiler_fetch_and.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0826D3473F001C950E /* compiler_fetch_and.h */; };
-		706EFED926D34740001C950E /* compiler_store.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0926D3473F001C950E /* compiler_store.h */; };
-		706EFEDA26D34740001C950E /* compiler_store.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0926D3473F001C950E /* compiler_store.h */; };
-		706EFEDB26D34740001C950E /* compiler_or_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0A26D3473F001C950E /* compiler_or_fetch.h */; };
-		706EFEDC26D34740001C950E /* compiler_or_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0A26D3473F001C950E /* compiler_or_fetch.h */; };
-		706EFEDD26D34740001C950E /* compiler_cmpxchg_weak.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0B26D3473F001C950E /* compiler_cmpxchg_weak.h */; };
-		706EFEDE26D34740001C950E /* compiler_cmpxchg_weak.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0B26D3473F001C950E /* compiler_cmpxchg_weak.h */; };
-		706EFEDF26D34740001C950E /* compiler_add_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0C26D3473F001C950E /* compiler_add_fetch.h */; };
-		706EFEE026D34740001C950E /* compiler_add_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0C26D3473F001C950E /* compiler_add_fetch.h */; };
-		706EFEE126D34740001C950E /* compiler_fetch_sub.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0D26D3473F001C950E /* compiler_fetch_sub.h */; };
-		706EFEE226D34740001C950E /* compiler_fetch_sub.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0D26D3473F001C950E /* compiler_fetch_sub.h */; };
-		706EFEE326D34740001C950E /* compiler_xor_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0E26D3473F001C950E /* compiler_xor_fetch.h */; };
-		706EFEE426D34740001C950E /* compiler_xor_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0E26D3473F001C950E /* compiler_xor_fetch.h */; };
-		706EFEE526D34740001C950E /* compiler_and_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0F26D3473F001C950E /* compiler_and_fetch.h */; };
-		706EFEE626D34740001C950E /* compiler_and_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD0F26D3473F001C950E /* compiler_and_fetch.h */; };
-		706EFEE726D34740001C950E /* compiler_cmpxchg_strong.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1026D3473F001C950E /* compiler_cmpxchg_strong.h */; };
-		706EFEE826D34740001C950E /* compiler_cmpxchg_strong.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1026D3473F001C950E /* compiler_cmpxchg_strong.h */; };
-		706EFEE926D34740001C950E /* compiler_fetch_add.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1126D3473F001C950E /* compiler_fetch_add.h */; };
-		706EFEEA26D34740001C950E /* compiler_fetch_add.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1126D3473F001C950E /* compiler_fetch_add.h */; };
-		706EFEEB26D34740001C950E /* compiler_signal_fence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1226D3473F001C950E /* compiler_signal_fence.h */; };
-		706EFEEC26D34740001C950E /* compiler_signal_fence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1226D3473F001C950E /* compiler_signal_fence.h */; };
-		706EFEED26D34740001C950E /* compiler_sub_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1326D3473F001C950E /* compiler_sub_fetch.h */; };
-		706EFEEE26D34740001C950E /* compiler_sub_fetch.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1326D3473F001C950E /* compiler_sub_fetch.h */; };
-		706EFEEF26D34740001C950E /* compiler_load.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1426D3473F001C950E /* compiler_load.h */; };
-		706EFEF026D34740001C950E /* compiler_load.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1426D3473F001C950E /* compiler_load.h */; };
-		706EFEF126D34740001C950E /* atomic_casts.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1526D3473F001C950E /* atomic_casts.h */; };
-		706EFEF226D34740001C950E /* atomic_casts.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1526D3473F001C950E /* atomic_casts.h */; };
-		706EFEF326D34740001C950E /* atomic.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1626D3473F001C950E /* atomic.h */; };
-		706EFEF426D34740001C950E /* atomic.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1626D3473F001C950E /* atomic.h */; };
 		706EFEF526D34740001C950E /* thread_support.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1726D3473F001C950E /* thread_support.h */; };
 		706EFEF626D34740001C950E /* thread_support.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1726D3473F001C950E /* thread_support.h */; };
 		706EFEF726D34740001C950E /* intrusive_hashtable.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1826D3473F001C950E /* intrusive_hashtable.h */; };
@@ -1079,144 +803,6 @@
 		706EFC8226D3473F001C950E /* integer_sequence.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = integer_sequence.h; sourceTree = "<group>"; };
 		706EFC8326D3473F001C950E /* smart_ptr.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = smart_ptr.h; sourceTree = "<group>"; };
 		706EFC8426D3473F001C950E /* tuple_fwd_decls.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = tuple_fwd_decls.h; sourceTree = "<group>"; };
-		706EFC8626D3473F001C950E /* atomic_pointer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_pointer.h; sourceTree = "<group>"; };
-		706EFC8726D3473F001C950E /* atomic_integral.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_integral.h; sourceTree = "<group>"; };
-		706EFC8826D3473F001C950E /* atomic_size_aligned.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_size_aligned.h; sourceTree = "<group>"; };
-		706EFC8926D3473F001C950E /* atomic_push_compiler_options.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_push_compiler_options.h; sourceTree = "<group>"; };
-		706EFC8B26D3473F001C950E /* atomic_macros_fetch_xor.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros_fetch_xor.h; sourceTree = "<group>"; };
-		706EFC8C26D3473F001C950E /* atomic_macros_cpu_pause.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros_cpu_pause.h; sourceTree = "<group>"; };
-		706EFC8D26D3473F001C950E /* atomic_macros_load.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros_load.h; sourceTree = "<group>"; };
-		706EFC8E26D3473F001C950E /* atomic_macros_store.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros_store.h; sourceTree = "<group>"; };
-		706EFC8F26D3473F001C950E /* atomic_macros_compiler_barrier.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros_compiler_barrier.h; sourceTree = "<group>"; };
-		706EFC9026D3473F001C950E /* atomic_macros_cmpxchg_strong.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros_cmpxchg_strong.h; sourceTree = "<group>"; };
-		706EFC9126D3473F001C950E /* atomic_macros_all.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros_all.h; sourceTree = "<group>"; };
-		706EFC9226D3473F001C950E /* atomic_macros_exchange.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros_exchange.h; sourceTree = "<group>"; };
-		706EFC9326D3473F001C950E /* atomic_macros_fetch_or.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros_fetch_or.h; sourceTree = "<group>"; };
-		706EFC9426D3473F001C950E /* atomic_macros_base.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros_base.h; sourceTree = "<group>"; };
-		706EFC9526D3473F001C950E /* atomic_macros_sub_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros_sub_fetch.h; sourceTree = "<group>"; };
-		706EFC9626D3473F001C950E /* atomic_macros_signal_fence.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros_signal_fence.h; sourceTree = "<group>"; };
-		706EFC9726D3473F001C950E /* atomic_macros_xor_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros_xor_fetch.h; sourceTree = "<group>"; };
-		706EFC9826D3473F001C950E /* atomic_macros_fetch_sub.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros_fetch_sub.h; sourceTree = "<group>"; };
-		706EFC9926D3473F001C950E /* atomic_macros_and_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros_and_fetch.h; sourceTree = "<group>"; };
-		706EFC9A26D3473F001C950E /* atomic_macros_cmpxchg_weak.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros_cmpxchg_weak.h; sourceTree = "<group>"; };
-		706EFC9B26D3473F001C950E /* atomic_macros_fetch_add.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros_fetch_add.h; sourceTree = "<group>"; };
-		706EFC9C26D3473F001C950E /* atomic_macros_add_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros_add_fetch.h; sourceTree = "<group>"; };
-		706EFC9D26D3473F001C950E /* atomic_macros_memory_barrier.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros_memory_barrier.h; sourceTree = "<group>"; };
-		706EFC9E26D3473F001C950E /* atomic_macros_fetch_and.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros_fetch_and.h; sourceTree = "<group>"; };
-		706EFC9F26D3473F001C950E /* atomic_macros_or_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros_or_fetch.h; sourceTree = "<group>"; };
-		706EFCA026D3473F001C950E /* atomic_macros_thread_fence.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros_thread_fence.h; sourceTree = "<group>"; };
-		706EFCA126D3473F001C950E /* atomic_standalone.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_standalone.h; sourceTree = "<group>"; };
-		706EFCA226D3473F001C950E /* atomic_memory_order.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_memory_order.h; sourceTree = "<group>"; };
-		706EFCA326D3473F001C950E /* atomic_flag_standalone.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_flag_standalone.h; sourceTree = "<group>"; };
-		706EFCA426D3473F001C950E /* atomic_macros.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_macros.h; sourceTree = "<group>"; };
-		706EFCA526D3473F001C950E /* atomic_flag.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_flag.h; sourceTree = "<group>"; };
-		706EFCA726D3473F001C950E /* arch_load.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_load.h; sourceTree = "<group>"; };
-		706EFCA826D3473F001C950E /* arch_compiler_barrier.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_compiler_barrier.h; sourceTree = "<group>"; };
-		706EFCA926D3473F001C950E /* arch_fetch_and.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_fetch_and.h; sourceTree = "<group>"; };
-		706EFCAA26D3473F001C950E /* arch_exchange.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_exchange.h; sourceTree = "<group>"; };
-		706EFCAB26D3473F001C950E /* arch_fetch_or.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_fetch_or.h; sourceTree = "<group>"; };
-		706EFCAC26D3473F001C950E /* arch_sub_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_sub_fetch.h; sourceTree = "<group>"; };
-		706EFCAD26D3473F001C950E /* arch_store.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_store.h; sourceTree = "<group>"; };
-		706EFCAE26D3473F001C950E /* arch_add_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_add_fetch.h; sourceTree = "<group>"; };
-		706EFCAF26D3473F001C950E /* arch_memory_barrier.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_memory_barrier.h; sourceTree = "<group>"; };
-		706EFCB026D3473F001C950E /* arch_fetch_sub.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_fetch_sub.h; sourceTree = "<group>"; };
-		706EFCB126D3473F001C950E /* arch_xor_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_xor_fetch.h; sourceTree = "<group>"; };
-		706EFCB226D3473F001C950E /* arch_and_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_and_fetch.h; sourceTree = "<group>"; };
-		706EFCB326D3473F001C950E /* arch_fetch_add.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_fetch_add.h; sourceTree = "<group>"; };
-		706EFCB426D3473F001C950E /* arch_thread_fence.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_thread_fence.h; sourceTree = "<group>"; };
-		706EFCB626D3473F001C950E /* arch_x86_exchange.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_x86_exchange.h; sourceTree = "<group>"; };
-		706EFCB726D3473F001C950E /* arch_x86.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_x86.h; sourceTree = "<group>"; };
-		706EFCB826D3473F001C950E /* arch_x86_fetch_or.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_x86_fetch_or.h; sourceTree = "<group>"; };
-		706EFCB926D3473F001C950E /* arch_x86_load.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_x86_load.h; sourceTree = "<group>"; };
-		706EFCBA26D3473F001C950E /* arch_x86_fetch_xor.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_x86_fetch_xor.h; sourceTree = "<group>"; };
-		706EFCBB26D3473F001C950E /* arch_x86_cmpxchg_strong.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_x86_cmpxchg_strong.h; sourceTree = "<group>"; };
-		706EFCBC26D3473F001C950E /* arch_x86_fetch_sub.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_x86_fetch_sub.h; sourceTree = "<group>"; };
-		706EFCBD26D3473F001C950E /* arch_x86_xor_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_x86_xor_fetch.h; sourceTree = "<group>"; };
-		706EFCBE26D3473F001C950E /* arch_x86_and_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_x86_and_fetch.h; sourceTree = "<group>"; };
-		706EFCBF26D3473F001C950E /* arch_x86_or_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_x86_or_fetch.h; sourceTree = "<group>"; };
-		706EFCC026D3473F001C950E /* arch_x86_memory_barrier.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_x86_memory_barrier.h; sourceTree = "<group>"; };
-		706EFCC126D3473F001C950E /* arch_x86_fetch_add.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_x86_fetch_add.h; sourceTree = "<group>"; };
-		706EFCC226D3473F001C950E /* arch_x86_store.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_x86_store.h; sourceTree = "<group>"; };
-		706EFCC326D3473F001C950E /* arch_x86_add_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_x86_add_fetch.h; sourceTree = "<group>"; };
-		706EFCC426D3473F001C950E /* arch_x86_sub_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_x86_sub_fetch.h; sourceTree = "<group>"; };
-		706EFCC526D3473F001C950E /* arch_x86_thread_fence.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_x86_thread_fence.h; sourceTree = "<group>"; };
-		706EFCC626D3473F001C950E /* arch_x86_fetch_and.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_x86_fetch_and.h; sourceTree = "<group>"; };
-		706EFCC726D3473F001C950E /* arch_x86_cmpxchg_weak.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_x86_cmpxchg_weak.h; sourceTree = "<group>"; };
-		706EFCC826D3473F001C950E /* arch_cmpxchg_weak.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_cmpxchg_weak.h; sourceTree = "<group>"; };
-		706EFCC926D3473F001C950E /* arch_cmpxchg_strong.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_cmpxchg_strong.h; sourceTree = "<group>"; };
-		706EFCCA26D3473F001C950E /* arch_or_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_or_fetch.h; sourceTree = "<group>"; };
-		706EFCCC26D3473F001C950E /* arch_arm_load.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_arm_load.h; sourceTree = "<group>"; };
-		706EFCCD26D3473F001C950E /* arch_arm_thread_fence.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_arm_thread_fence.h; sourceTree = "<group>"; };
-		706EFCCE26D3473F001C950E /* arch_arm.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_arm.h; sourceTree = "<group>"; };
-		706EFCCF26D3473F001C950E /* arch_arm_store.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_arm_store.h; sourceTree = "<group>"; };
-		706EFCD026D3473F001C950E /* arch_arm_memory_barrier.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_arm_memory_barrier.h; sourceTree = "<group>"; };
-		706EFCD126D3473F001C950E /* arch_fetch_xor.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_fetch_xor.h; sourceTree = "<group>"; };
-		706EFCD226D3473F001C950E /* arch_signal_fence.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_signal_fence.h; sourceTree = "<group>"; };
-		706EFCD326D3473F001C950E /* arch_cpu_pause.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch_cpu_pause.h; sourceTree = "<group>"; };
-		706EFCD426D3473F001C950E /* arch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arch.h; sourceTree = "<group>"; };
-		706EFCD526D3473F001C950E /* atomic_base_width.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_base_width.h; sourceTree = "<group>"; };
-		706EFCD626D3473F001C950E /* atomic_asserts.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_asserts.h; sourceTree = "<group>"; };
-		706EFCD726D3473F001C950E /* atomic_pop_compiler_options.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_pop_compiler_options.h; sourceTree = "<group>"; };
-		706EFCD926D3473F001C950E /* compiler_exchange.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_exchange.h; sourceTree = "<group>"; };
-		706EFCDA26D3473F001C950E /* compiler.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler.h; sourceTree = "<group>"; };
-		706EFCDC26D3473F001C950E /* compiler_msvc_add_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_msvc_add_fetch.h; sourceTree = "<group>"; };
-		706EFCDD26D3473F001C950E /* compiler_msvc_cmpxchg_weak.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_msvc_cmpxchg_weak.h; sourceTree = "<group>"; };
-		706EFCDE26D3473F001C950E /* compiler_msvc_barrier.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_msvc_barrier.h; sourceTree = "<group>"; };
-		706EFCDF26D3473F001C950E /* compiler_msvc_fetch_add.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_msvc_fetch_add.h; sourceTree = "<group>"; };
-		706EFCE026D3473F001C950E /* compiler_msvc_and_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_msvc_and_fetch.h; sourceTree = "<group>"; };
-		706EFCE126D3473F001C950E /* compiler_msvc_fetch_sub.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_msvc_fetch_sub.h; sourceTree = "<group>"; };
-		706EFCE226D3473F001C950E /* compiler_msvc_xor_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_msvc_xor_fetch.h; sourceTree = "<group>"; };
-		706EFCE326D3473F001C950E /* compiler_msvc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_msvc.h; sourceTree = "<group>"; };
-		706EFCE426D3473F001C950E /* compiler_msvc_fetch_or.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_msvc_fetch_or.h; sourceTree = "<group>"; };
-		706EFCE526D3473F001C950E /* compiler_msvc_sub_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_msvc_sub_fetch.h; sourceTree = "<group>"; };
-		706EFCE626D3473F001C950E /* compiler_msvc_exchange.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_msvc_exchange.h; sourceTree = "<group>"; };
-		706EFCE726D3473F001C950E /* compiler_msvc_signal_fence.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_msvc_signal_fence.h; sourceTree = "<group>"; };
-		706EFCE826D3473F001C950E /* compiler_msvc_fetch_and.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_msvc_fetch_and.h; sourceTree = "<group>"; };
-		706EFCE926D3473F001C950E /* compiler_msvc_cmpxchg_strong.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_msvc_cmpxchg_strong.h; sourceTree = "<group>"; };
-		706EFCEA26D3473F001C950E /* compiler_msvc_cpu_pause.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_msvc_cpu_pause.h; sourceTree = "<group>"; };
-		706EFCEB26D3473F001C950E /* compiler_msvc_or_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_msvc_or_fetch.h; sourceTree = "<group>"; };
-		706EFCEC26D3473F001C950E /* compiler_msvc_fetch_xor.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_msvc_fetch_xor.h; sourceTree = "<group>"; };
-		706EFCED26D3473F001C950E /* compiler_fetch_or.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_fetch_or.h; sourceTree = "<group>"; };
-		706EFCEE26D3473F001C950E /* compiler_barrier.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_barrier.h; sourceTree = "<group>"; };
-		706EFCEF26D3473F001C950E /* compiler_cpu_pause.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_cpu_pause.h; sourceTree = "<group>"; };
-		706EFCF026D3473F001C950E /* compiler_memory_barrier.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_memory_barrier.h; sourceTree = "<group>"; };
-		706EFCF226D3473F001C950E /* compiler_gcc_cpu_pause.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_gcc_cpu_pause.h; sourceTree = "<group>"; };
-		706EFCF326D3473F001C950E /* compiler_gcc_thread_fence.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_gcc_thread_fence.h; sourceTree = "<group>"; };
-		706EFCF426D3473F001C950E /* compiler_gcc_or_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_gcc_or_fetch.h; sourceTree = "<group>"; };
-		706EFCF526D3473F001C950E /* compiler_gcc_fetch_xor.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_gcc_fetch_xor.h; sourceTree = "<group>"; };
-		706EFCF626D3473F001C950E /* compiler_gcc_barrier.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_gcc_barrier.h; sourceTree = "<group>"; };
-		706EFCF726D3473F001C950E /* compiler_gcc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_gcc.h; sourceTree = "<group>"; };
-		706EFCF826D3473F001C950E /* compiler_gcc_cmpxchg_strong.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_gcc_cmpxchg_strong.h; sourceTree = "<group>"; };
-		706EFCF926D3473F001C950E /* compiler_gcc_cmpxchg_weak.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_gcc_cmpxchg_weak.h; sourceTree = "<group>"; };
-		706EFCFA26D3473F001C950E /* compiler_gcc_store.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_gcc_store.h; sourceTree = "<group>"; };
-		706EFCFB26D3473F001C950E /* compiler_gcc_signal_fence.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_gcc_signal_fence.h; sourceTree = "<group>"; };
-		706EFCFC26D3473F001C950E /* compiler_gcc_fetch_add.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_gcc_fetch_add.h; sourceTree = "<group>"; };
-		706EFCFD26D3473F001C950E /* compiler_gcc_and_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_gcc_and_fetch.h; sourceTree = "<group>"; };
-		706EFCFE26D3473F001C950E /* compiler_gcc_xor_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_gcc_xor_fetch.h; sourceTree = "<group>"; };
-		706EFCFF26D3473F001C950E /* compiler_gcc_fetch_sub.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_gcc_fetch_sub.h; sourceTree = "<group>"; };
-		706EFD0026D3473F001C950E /* compiler_gcc_add_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_gcc_add_fetch.h; sourceTree = "<group>"; };
-		706EFD0126D3473F001C950E /* compiler_gcc_sub_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_gcc_sub_fetch.h; sourceTree = "<group>"; };
-		706EFD0226D3473F001C950E /* compiler_gcc_exchange.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_gcc_exchange.h; sourceTree = "<group>"; };
-		706EFD0326D3473F001C950E /* compiler_gcc_fetch_or.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_gcc_fetch_or.h; sourceTree = "<group>"; };
-		706EFD0426D3473F001C950E /* compiler_gcc_fetch_and.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_gcc_fetch_and.h; sourceTree = "<group>"; };
-		706EFD0526D3473F001C950E /* compiler_gcc_load.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_gcc_load.h; sourceTree = "<group>"; };
-		706EFD0626D3473F001C950E /* compiler_fetch_xor.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_fetch_xor.h; sourceTree = "<group>"; };
-		706EFD0726D3473F001C950E /* compiler_thread_fence.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_thread_fence.h; sourceTree = "<group>"; };
-		706EFD0826D3473F001C950E /* compiler_fetch_and.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_fetch_and.h; sourceTree = "<group>"; };
-		706EFD0926D3473F001C950E /* compiler_store.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_store.h; sourceTree = "<group>"; };
-		706EFD0A26D3473F001C950E /* compiler_or_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_or_fetch.h; sourceTree = "<group>"; };
-		706EFD0B26D3473F001C950E /* compiler_cmpxchg_weak.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_cmpxchg_weak.h; sourceTree = "<group>"; };
-		706EFD0C26D3473F001C950E /* compiler_add_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_add_fetch.h; sourceTree = "<group>"; };
-		706EFD0D26D3473F001C950E /* compiler_fetch_sub.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_fetch_sub.h; sourceTree = "<group>"; };
-		706EFD0E26D3473F001C950E /* compiler_xor_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_xor_fetch.h; sourceTree = "<group>"; };
-		706EFD0F26D3473F001C950E /* compiler_and_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_and_fetch.h; sourceTree = "<group>"; };
-		706EFD1026D3473F001C950E /* compiler_cmpxchg_strong.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_cmpxchg_strong.h; sourceTree = "<group>"; };
-		706EFD1126D3473F001C950E /* compiler_fetch_add.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_fetch_add.h; sourceTree = "<group>"; };
-		706EFD1226D3473F001C950E /* compiler_signal_fence.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_signal_fence.h; sourceTree = "<group>"; };
-		706EFD1326D3473F001C950E /* compiler_sub_fetch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_sub_fetch.h; sourceTree = "<group>"; };
-		706EFD1426D3473F001C950E /* compiler_load.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler_load.h; sourceTree = "<group>"; };
-		706EFD1526D3473F001C950E /* atomic_casts.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_casts.h; sourceTree = "<group>"; };
-		706EFD1626D3473F001C950E /* atomic.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic.h; sourceTree = "<group>"; };
 		706EFD1726D3473F001C950E /* thread_support.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = thread_support.h; sourceTree = "<group>"; };
 		706EFD1826D3473F001C950E /* intrusive_hashtable.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = intrusive_hashtable.h; sourceTree = "<group>"; };
 		706EFD1926D3473F001C950E /* mem_fn.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mem_fn.h; sourceTree = "<group>"; };
@@ -1782,7 +1368,6 @@
 				706EFC8226D3473F001C950E /* integer_sequence.h */,
 				706EFC8326D3473F001C950E /* smart_ptr.h */,
 				706EFC8426D3473F001C950E /* tuple_fwd_decls.h */,
-				706EFC8526D3473F001C950E /* atomic */,
 				706EFD1726D3473F001C950E /* thread_support.h */,
 				706EFD1826D3473F001C950E /* intrusive_hashtable.h */,
 				706EFD1926D3473F001C950E /* mem_fn.h */,
@@ -1795,207 +1380,6 @@
 			path = internal;
 			sourceTree = "<group>";
 		};
-		706EFC8526D3473F001C950E /* atomic */ = {
-			isa = PBXGroup;
-			children = (
-				706EFC8626D3473F001C950E /* atomic_pointer.h */,
-				706EFC8726D3473F001C950E /* atomic_integral.h */,
-				706EFC8826D3473F001C950E /* atomic_size_aligned.h */,
-				706EFC8926D3473F001C950E /* atomic_push_compiler_options.h */,
-				706EFC8A26D3473F001C950E /* atomic_macros */,
-				706EFCA126D3473F001C950E /* atomic_standalone.h */,
-				706EFCA226D3473F001C950E /* atomic_memory_order.h */,
-				706EFCA326D3473F001C950E /* atomic_flag_standalone.h */,
-				706EFCA426D3473F001C950E /* atomic_macros.h */,
-				706EFCA526D3473F001C950E /* atomic_flag.h */,
-				706EFCA626D3473F001C950E /* arch */,
-				706EFCD526D3473F001C950E /* atomic_base_width.h */,
-				706EFCD626D3473F001C950E /* atomic_asserts.h */,
-				706EFCD726D3473F001C950E /* atomic_pop_compiler_options.h */,
-				706EFCD826D3473F001C950E /* compiler */,
-				706EFD1526D3473F001C950E /* atomic_casts.h */,
-				706EFD1626D3473F001C950E /* atomic.h */,
-			);
-			path = atomic;
-			sourceTree = "<group>";
-		};
-		706EFC8A26D3473F001C950E /* atomic_macros */ = {
-			isa = PBXGroup;
-			children = (
-				706EFC8B26D3473F001C950E /* atomic_macros_fetch_xor.h */,
-				706EFC8C26D3473F001C950E /* atomic_macros_cpu_pause.h */,
-				706EFC8D26D3473F001C950E /* atomic_macros_load.h */,
-				706EFC8E26D3473F001C950E /* atomic_macros_store.h */,
-				706EFC8F26D3473F001C950E /* atomic_macros_compiler_barrier.h */,
-				706EFC9026D3473F001C950E /* atomic_macros_cmpxchg_strong.h */,
-				706EFC9126D3473F001C950E /* atomic_macros_all.h */,
-				706EFC9226D3473F001C950E /* atomic_macros_exchange.h */,
-				706EFC9326D3473F001C950E /* atomic_macros_fetch_or.h */,
-				706EFC9426D3473F001C950E /* atomic_macros_base.h */,
-				706EFC9526D3473F001C950E /* atomic_macros_sub_fetch.h */,
-				706EFC9626D3473F001C950E /* atomic_macros_signal_fence.h */,
-				706EFC9726D3473F001C950E /* atomic_macros_xor_fetch.h */,
-				706EFC9826D3473F001C950E /* atomic_macros_fetch_sub.h */,
-				706EFC9926D3473F001C950E /* atomic_macros_and_fetch.h */,
-				706EFC9A26D3473F001C950E /* atomic_macros_cmpxchg_weak.h */,
-				706EFC9B26D3473F001C950E /* atomic_macros_fetch_add.h */,
-				706EFC9C26D3473F001C950E /* atomic_macros_add_fetch.h */,
-				706EFC9D26D3473F001C950E /* atomic_macros_memory_barrier.h */,
-				706EFC9E26D3473F001C950E /* atomic_macros_fetch_and.h */,
-				706EFC9F26D3473F001C950E /* atomic_macros_or_fetch.h */,
-				706EFCA026D3473F001C950E /* atomic_macros_thread_fence.h */,
-			);
-			path = atomic_macros;
-			sourceTree = "<group>";
-		};
-		706EFCA626D3473F001C950E /* arch */ = {
-			isa = PBXGroup;
-			children = (
-				706EFCA726D3473F001C950E /* arch_load.h */,
-				706EFCA826D3473F001C950E /* arch_compiler_barrier.h */,
-				706EFCA926D3473F001C950E /* arch_fetch_and.h */,
-				706EFCAA26D3473F001C950E /* arch_exchange.h */,
-				706EFCAB26D3473F001C950E /* arch_fetch_or.h */,
-				706EFCAC26D3473F001C950E /* arch_sub_fetch.h */,
-				706EFCAD26D3473F001C950E /* arch_store.h */,
-				706EFCAE26D3473F001C950E /* arch_add_fetch.h */,
-				706EFCAF26D3473F001C950E /* arch_memory_barrier.h */,
-				706EFCB026D3473F001C950E /* arch_fetch_sub.h */,
-				706EFCB126D3473F001C950E /* arch_xor_fetch.h */,
-				706EFCB226D3473F001C950E /* arch_and_fetch.h */,
-				706EFCB326D3473F001C950E /* arch_fetch_add.h */,
-				706EFCB426D3473F001C950E /* arch_thread_fence.h */,
-				706EFCB526D3473F001C950E /* x86 */,
-				706EFCC826D3473F001C950E /* arch_cmpxchg_weak.h */,
-				706EFCC926D3473F001C950E /* arch_cmpxchg_strong.h */,
-				706EFCCA26D3473F001C950E /* arch_or_fetch.h */,
-				706EFCCB26D3473F001C950E /* arm */,
-				706EFCD126D3473F001C950E /* arch_fetch_xor.h */,
-				706EFCD226D3473F001C950E /* arch_signal_fence.h */,
-				706EFCD326D3473F001C950E /* arch_cpu_pause.h */,
-				706EFCD426D3473F001C950E /* arch.h */,
-			);
-			path = arch;
-			sourceTree = "<group>";
-		};
-		706EFCB526D3473F001C950E /* x86 */ = {
-			isa = PBXGroup;
-			children = (
-				706EFCB626D3473F001C950E /* arch_x86_exchange.h */,
-				706EFCB726D3473F001C950E /* arch_x86.h */,
-				706EFCB826D3473F001C950E /* arch_x86_fetch_or.h */,
-				706EFCB926D3473F001C950E /* arch_x86_load.h */,
-				706EFCBA26D3473F001C950E /* arch_x86_fetch_xor.h */,
-				706EFCBB26D3473F001C950E /* arch_x86_cmpxchg_strong.h */,
-				706EFCBC26D3473F001C950E /* arch_x86_fetch_sub.h */,
-				706EFCBD26D3473F001C950E /* arch_x86_xor_fetch.h */,
-				706EFCBE26D3473F001C950E /* arch_x86_and_fetch.h */,
-				706EFCBF26D3473F001C950E /* arch_x86_or_fetch.h */,
-				706EFCC026D3473F001C950E /* arch_x86_memory_barrier.h */,
-				706EFCC126D3473F001C950E /* arch_x86_fetch_add.h */,
-				706EFCC226D3473F001C950E /* arch_x86_store.h */,
-				706EFCC326D3473F001C950E /* arch_x86_add_fetch.h */,
-				706EFCC426D3473F001C950E /* arch_x86_sub_fetch.h */,
-				706EFCC526D3473F001C950E /* arch_x86_thread_fence.h */,
-				706EFCC626D3473F001C950E /* arch_x86_fetch_and.h */,
-				706EFCC726D3473F001C950E /* arch_x86_cmpxchg_weak.h */,
-			);
-			path = x86;
-			sourceTree = "<group>";
-		};
-		706EFCCB26D3473F001C950E /* arm */ = {
-			isa = PBXGroup;
-			children = (
-				706EFCCC26D3473F001C950E /* arch_arm_load.h */,
-				706EFCCD26D3473F001C950E /* arch_arm_thread_fence.h */,
-				706EFCCE26D3473F001C950E /* arch_arm.h */,
-				706EFCCF26D3473F001C950E /* arch_arm_store.h */,
-				706EFCD026D3473F001C950E /* arch_arm_memory_barrier.h */,
-			);
-			path = arm;
-			sourceTree = "<group>";
-		};
-		706EFCD826D3473F001C950E /* compiler */ = {
-			isa = PBXGroup;
-			children = (
-				706EFCD926D3473F001C950E /* compiler_exchange.h */,
-				706EFCDA26D3473F001C950E /* compiler.h */,
-				706EFCDB26D3473F001C950E /* msvc */,
-				706EFCED26D3473F001C950E /* compiler_fetch_or.h */,
-				706EFCEE26D3473F001C950E /* compiler_barrier.h */,
-				706EFCEF26D3473F001C950E /* compiler_cpu_pause.h */,
-				706EFCF026D3473F001C950E /* compiler_memory_barrier.h */,
-				706EFCF126D3473F001C950E /* gcc */,
-				706EFD0626D3473F001C950E /* compiler_fetch_xor.h */,
-				706EFD0726D3473F001C950E /* compiler_thread_fence.h */,
-				706EFD0826D3473F001C950E /* compiler_fetch_and.h */,
-				706EFD0926D3473F001C950E /* compiler_store.h */,
-				706EFD0A26D3473F001C950E /* compiler_or_fetch.h */,
-				706EFD0B26D3473F001C950E /* compiler_cmpxchg_weak.h */,
-				706EFD0C26D3473F001C950E /* compiler_add_fetch.h */,
-				706EFD0D26D3473F001C950E /* compiler_fetch_sub.h */,
-				706EFD0E26D3473F001C950E /* compiler_xor_fetch.h */,
-				706EFD0F26D3473F001C950E /* compiler_and_fetch.h */,
-				706EFD1026D3473F001C950E /* compiler_cmpxchg_strong.h */,
-				706EFD1126D3473F001C950E /* compiler_fetch_add.h */,
-				706EFD1226D3473F001C950E /* compiler_signal_fence.h */,
-				706EFD1326D3473F001C950E /* compiler_sub_fetch.h */,
-				706EFD1426D3473F001C950E /* compiler_load.h */,
-			);
-			path = compiler;
-			sourceTree = "<group>";
-		};
-		706EFCDB26D3473F001C950E /* msvc */ = {
-			isa = PBXGroup;
-			children = (
-				706EFCDC26D3473F001C950E /* compiler_msvc_add_fetch.h */,
-				706EFCDD26D3473F001C950E /* compiler_msvc_cmpxchg_weak.h */,
-				706EFCDE26D3473F001C950E /* compiler_msvc_barrier.h */,
-				706EFCDF26D3473F001C950E /* compiler_msvc_fetch_add.h */,
-				706EFCE026D3473F001C950E /* compiler_msvc_and_fetch.h */,
-				706EFCE126D3473F001C950E /* compiler_msvc_fetch_sub.h */,
-				706EFCE226D3473F001C950E /* compiler_msvc_xor_fetch.h */,
-				706EFCE326D3473F001C950E /* compiler_msvc.h */,
-				706EFCE426D3473F001C950E /* compiler_msvc_fetch_or.h */,
-				706EFCE526D3473F001C950E /* compiler_msvc_sub_fetch.h */,
-				706EFCE626D3473F001C950E /* compiler_msvc_exchange.h */,
-				706EFCE726D3473F001C950E /* compiler_msvc_signal_fence.h */,
-				706EFCE826D3473F001C950E /* compiler_msvc_fetch_and.h */,
-				706EFCE926D3473F001C950E /* compiler_msvc_cmpxchg_strong.h */,
-				706EFCEA26D3473F001C950E /* compiler_msvc_cpu_pause.h */,
-				706EFCEB26D3473F001C950E /* compiler_msvc_or_fetch.h */,
-				706EFCEC26D3473F001C950E /* compiler_msvc_fetch_xor.h */,
-			);
-			path = msvc;
-			sourceTree = "<group>";
-		};
-		706EFCF126D3473F001C950E /* gcc */ = {
-			isa = PBXGroup;
-			children = (
-				706EFCF226D3473F001C950E /* compiler_gcc_cpu_pause.h */,
-				706EFCF326D3473F001C950E /* compiler_gcc_thread_fence.h */,
-				706EFCF426D3473F001C950E /* compiler_gcc_or_fetch.h */,
-				706EFCF526D3473F001C950E /* compiler_gcc_fetch_xor.h */,
-				706EFCF626D3473F001C950E /* compiler_gcc_barrier.h */,
-				706EFCF726D3473F001C950E /* compiler_gcc.h */,
-				706EFCF826D3473F001C950E /* compiler_gcc_cmpxchg_strong.h */,
-				706EFCF926D3473F001C950E /* compiler_gcc_cmpxchg_weak.h */,
-				706EFCFA26D3473F001C950E /* compiler_gcc_store.h */,
-				706EFCFB26D3473F001C950E /* compiler_gcc_signal_fence.h */,
-				706EFCFC26D3473F001C950E /* compiler_gcc_fetch_add.h */,
-				706EFCFD26D3473F001C950E /* compiler_gcc_and_fetch.h */,
-				706EFCFE26D3473F001C950E /* compiler_gcc_xor_fetch.h */,
-				706EFCFF26D3473F001C950E /* compiler_gcc_fetch_sub.h */,
-				706EFD0026D3473F001C950E /* compiler_gcc_add_fetch.h */,
-				706EFD0126D3473F001C950E /* compiler_gcc_sub_fetch.h */,
-				706EFD0226D3473F001C950E /* compiler_gcc_exchange.h */,
-				706EFD0326D3473F001C950E /* compiler_gcc_fetch_or.h */,
-				706EFD0426D3473F001C950E /* compiler_gcc_fetch_and.h */,
-				706EFD0526D3473F001C950E /* compiler_gcc_load.h */,
-			);
-			path = gcc;
-			sourceTree = "<group>";
-		};
 		706EFD5626D3473F001C950E /* source */ = {
 			isa = PBXGroup;
 			children = (
@@ -2024,15 +1408,10 @@
 				706EFD8D26D3473F001C950E /* call_traits.h in Headers */,
 				706EFDCD26D3473F001C950E /* fixed_pool.h in Headers */,
 				706EFF5126D34740001C950E /* core_allocator.h in Headers */,
-				706EFE2F26D3473F001C950E /* arch_memory_barrier.h in Headers */,
 				706EFF3326D34740001C950E /* ratio.h in Headers */,
-				706EFE0726D3473F001C950E /* atomic_macros_cmpxchg_weak.h in Headers */,
 				706EEFD126D15984001C950E /* EtcErrorMetric.h in Headers */,
 				706EFD6326D3473F001C950E /* version.h in Headers */,
-				706EFE1B26D3473F001C950E /* atomic_macros.h in Headers */,
-				706EFDE726D3473F001C950E /* atomic_push_compiler_options.h in Headers */,
 				706EFF5726D34740001C950E /* type_traits.h in Headers */,
-				706EFE3926D3473F001C950E /* arch_thread_fence.h in Headers */,
 				706EFF6926D34740001C950E /* string_view.h in Headers */,
 				706EEFD226D15984001C950E /* EtcColor.h in Headers */,
 				706EFF5B26D34740001C950E /* fixed_hash_map.h in Headers */,
@@ -2040,250 +1419,149 @@
 				706EFF0926D34740001C950E /* priority_queue.h in Headers */,
 				706EFF0326D34740001C950E /* red_black_tree.h in Headers */,
 				706EEFD426D15984001C950E /* EtcBlock4x4Encoding_RGB8.h in Headers */,
-				706EFE6526D3473F001C950E /* arch_arm_load.h in Headers */,
 				706EFF3526D34740001C950E /* vector_map.h in Headers */,
 				706EFDCF26D3473F001C950E /* config.h in Headers */,
 				706EEFD526D15984001C950E /* EtcConfig.h in Headers */,
 				706EFF2B26D34740001C950E /* unique_ptr.h in Headers */,
 				706EEFD626D15984001C950E /* EtcBlock4x4Encoding_R11.h in Headers */,
-				706EFEF326D34740001C950E /* atomic.h in Headers */,
 				706EEFD726D15984001C950E /* EtcBlock4x4Encoding_RG11.h in Headers */,
-				706EFE2D26D3473F001C950E /* arch_add_fetch.h in Headers */,
-				706EFE5926D3473F001C950E /* arch_x86_thread_fence.h in Headers */,
 				706EFF2526D34740001C950E /* fixed_string.h in Headers */,
 				706EFD7F26D3473F001C950E /* tuple_vector.h in Headers */,
 				706EEFD826D15984001C950E /* EtcMath.h in Headers */,
 				706EEFD926D15984001C950E /* EtcIndividualTrys.h in Headers */,
-				706EFEAD26D34740001C950E /* compiler_gcc_thread_fence.h in Headers */,
 				706EFF2726D34740001C950E /* slist.h in Headers */,
-				706EFEA326D34740001C950E /* compiler_fetch_or.h in Headers */,
 				706EEFDA26D15984001C950E /* EtcBlock4x4EncodingBits.h in Headers */,
 				706EFDBB26D3473F001C950E /* enable_shared.h in Headers */,
 				706EEFDB26D15984001C950E /* EtcBlock4x4Encoding_RGB8A1.h in Headers */,
-				706EFE8526D3473F001C950E /* compiler_msvc_barrier.h in Headers */,
-				706EFE4F26D3473F001C950E /* arch_x86_memory_barrier.h in Headers */,
 				706EFDDB26D3473F001C950E /* integer_sequence.h in Headers */,
 				706EFD9326D3473F001C950E /* adaptors.h in Headers */,
 				706EFDA126D3473F001C950E /* weak_ptr.h in Headers */,
 				706EFDD726D3473F001C950E /* pair_fwd_decls.h in Headers */,
-				706EFDFB26D3473F001C950E /* atomic_macros_base.h in Headers */,
-				706EFE2126D3473F001C950E /* arch_compiler_barrier.h in Headers */,
 				706EFF6526D34740001C950E /* queue.h in Headers */,
-				706EFEE126D34740001C950E /* compiler_fetch_sub.h in Headers */,
 				706EFD8726D3473F001C950E /* sort_extra.h in Headers */,
-				706EFE4726D3473F001C950E /* arch_x86_fetch_sub.h in Headers */,
 				706EFD6F26D3473F001C950E /* nullptr.h in Headers */,
 				706EEFDC26D15984001C950E /* EtcBlock4x4.h in Headers */,
-				706EFEA526D34740001C950E /* compiler_barrier.h in Headers */,
 				706EEFDD26D15984001C950E /* Etc.h in Headers */,
 				706EFEFF26D34740001C950E /* functional_base.h in Headers */,
-				706EFEBD26D34740001C950E /* compiler_gcc_signal_fence.h in Headers */,
-				706EFE0326D3473F001C950E /* atomic_macros_fetch_sub.h in Headers */,
-				706EFED126D34740001C950E /* compiler_gcc_load.h in Headers */,
 				706EEFDE26D15984001C950E /* EtcImage.h in Headers */,
-				706EFEE726D34740001C950E /* compiler_cmpxchg_strong.h in Headers */,
-				706EFDE326D3473F001C950E /* atomic_integral.h in Headers */,
 				706EFDDF26D3473F001C950E /* tuple_fwd_decls.h in Headers */,
 				706EFF6D26D34740001C950E /* basic_string.h in Headers */,
 				706EFDB126D3473F001C950E /* fixed_set.h in Headers */,
 				706EFDAB26D3473F001C950E /* initializer_list.h in Headers */,
 				706EFF1126D34740001C950E /* vector_set.h in Headers */,
-				706EFED926D34740001C950E /* compiler_store.h in Headers */,
-				706EFE4D26D3473F001C950E /* arch_x86_or_fetch.h in Headers */,
-				706EFE2326D3473F001C950E /* arch_fetch_and.h in Headers */,
-				706EFEB526D34740001C950E /* compiler_gcc.h in Headers */,
-				706EFE8926D3473F001C950E /* compiler_msvc_and_fetch.h in Headers */,
 				706EFF1F26D34740001C950E /* shared_ptr.h in Headers */,
-				706EFE3F26D3473F001C950E /* arch_x86_fetch_or.h in Headers */,
-				706EFE1726D3473F001C950E /* atomic_memory_order.h in Headers */,
 				706EFF1526D34740001C950E /* string_map.h in Headers */,
-				706EFEE326D34740001C950E /* compiler_xor_fetch.h in Headers */,
 				706EEFDF26D15984001C950E /* EtcBlock4x4Encoding_ETC1.h in Headers */,
 				706EEFE026D15984001C950E /* EtcBlock4x4Encoding_RGBA8.h in Headers */,
 				706EFEF726D34740001C950E /* intrusive_hashtable.h in Headers */,
-				706EFE1526D3473F001C950E /* atomic_standalone.h in Headers */,
 				706EEFE126D15984001C950E /* EtcColorFloatRGBA.h in Headers */,
 				706EFDC126D3473F001C950E /* generic_iterator.h in Headers */,
-				706EFE6F26D3473F001C950E /* arch_fetch_xor.h in Headers */,
-				706EFE8126D3473F001C950E /* compiler_msvc_add_fetch.h in Headers */,
 				706EFDD326D3473F001C950E /* hashtable.h in Headers */,
-				706EFE1326D3473F001C950E /* atomic_macros_thread_fence.h in Headers */,
 				706EFD7326D3473F001C950E /* earesult.h in Headers */,
-				706EFEB326D34740001C950E /* compiler_gcc_barrier.h in Headers */,
-				706EFE7B26D3473F001C950E /* atomic_pop_compiler_options.h in Headers */,
 				706EFD9B26D3473F001C950E /* safe_ptr.h in Headers */,
-				706EFECF26D34740001C950E /* compiler_gcc_fetch_and.h in Headers */,
 				706EFD7526D3473F001C950E /* eabase.h in Headers */,
 				706EFD6D26D3473F001C950E /* eahave.h in Headers */,
 				706EFF4926D34740001C950E /* meta.h in Headers */,
 				706EFF1926D34740001C950E /* fixed_substring.h in Headers */,
-				706EFE7926D3473F001C950E /* atomic_asserts.h in Headers */,
 				706EFDD526D3473F001C950E /* type_pod.h in Headers */,
 				706EEFE226D15984001C950E /* EtcBlock4x4Encoding.h in Headers */,
 				706EEFE326D15984001C950E /* rgbcx.h in Headers */,
 				706EFF4726D34740001C950E /* string_hash_map.h in Headers */,
-				706EFEE926D34740001C950E /* compiler_fetch_add.h in Headers */,
-				706EFE2926D3473F001C950E /* arch_sub_fetch.h in Headers */,
 				706EFF1D26D34740001C950E /* numeric_limits.h in Headers */,
 				706EFDBF26D3473F001C950E /* piecewise_construct_t.h in Headers */,
-				706EFE9126D3473F001C950E /* compiler_msvc_fetch_or.h in Headers */,
-				706EFEA726D34740001C950E /* compiler_cpu_pause.h in Headers */,
 				706EFD8526D3473F001C950E /* fixed_tuple_vector.h in Headers */,
 				706EEFE426D15984001C950E /* bc7enc.h in Headers */,
 				706EEFE526D15984001C950E /* bc7decomp.h in Headers */,
-				706EFE9926D34740001C950E /* compiler_msvc_fetch_and.h in Headers */,
 				706EFEF526D34740001C950E /* thread_support.h in Headers */,
-				706EFE6D26D3473F001C950E /* arch_arm_memory_barrier.h in Headers */,
 				706EEFE626D15984001C950E /* rgbcx_table4.h in Headers */,
 				706EFD6526D3473F001C950E /* eacompilertraits.h in Headers */,
-				706EFE9F26D34740001C950E /* compiler_msvc_or_fetch.h in Headers */,
-				706EFE7726D3473F001C950E /* atomic_base_width.h in Headers */,
 				706EEFE726D15984001C950E /* astcenc_diagnostic_trace.h in Headers */,
 				706EEFE826D15984001C950E /* astcenc_vecmathlib.h in Headers */,
-				706EFE6326D3473F001C950E /* arch_or_fetch.h in Headers */,
 				706EFD9726D3473F001C950E /* intrusive_ptr.h in Headers */,
 				706EEFE926D15984001C950E /* astcenc_vecmathlib_avx2_8.h in Headers */,
 				706EEFEA26D15984001C950E /* astcenc.h in Headers */,
 				706EFEFD26D34740001C950E /* memory_base.h in Headers */,
 				706EFD9526D3473F001C950E /* fixed_ring_buffer.h in Headers */,
 				706EFF4326D34740001C950E /* unordered_set.h in Headers */,
-				706EFE4126D3473F001C950E /* arch_x86_load.h in Headers */,
 				706EFF2F26D34740001C950E /* deque.h in Headers */,
 				706EEFEB26D15984001C950E /* astcenc_internal.h in Headers */,
 				706EEFEC26D15984001C950E /* astcenc_vecmathlib_neon_armv7_4.h in Headers */,
 				706EEFED26D15984001C950E /* astcenc_vecmathlib_sse_4.h in Headers */,
-				706EFE6926D3473F001C950E /* arch_arm.h in Headers */,
-				706EFEB126D34740001C950E /* compiler_gcc_fetch_xor.h in Headers */,
 				706EEFEE26D15984001C950E /* astcenc_vecmathlib_neon_4.h in Headers */,
 				706EEFEF26D15984001C950E /* astcenc_vecmathlib_none_4.h in Headers */,
 				706EFF5926D34740001C950E /* vector.h in Headers */,
 				706EEFF026D15984001C950E /* astcenc_vecmathlib_common_4.h in Headers */,
-				706EFDF326D3473F001C950E /* atomic_macros_cmpxchg_strong.h in Headers */,
-				706EFE3D26D3473F001C950E /* arch_x86.h in Headers */,
-				706EFDFF26D3473F001C950E /* atomic_macros_signal_fence.h in Headers */,
 				706EFD7926D3473F001C950E /* unordered_map.h in Headers */,
-				706EFDF926D3473F001C950E /* atomic_macros_fetch_or.h in Headers */,
-				706EFDE526D3473F001C950E /* atomic_size_aligned.h in Headers */,
-				706EFE3126D3473F001C950E /* arch_fetch_sub.h in Headers */,
-				706EFED326D34740001C950E /* compiler_fetch_xor.h in Headers */,
-				706EFE2726D3473F001C950E /* arch_fetch_or.h in Headers */,
 				706EFF3D26D34740001C950E /* memory.h in Headers */,
 				706EFF1726D34740001C950E /* list.h in Headers */,
 				706EFF1326D34740001C950E /* shared_array.h in Headers */,
 				706EFF5326D34740001C950E /* scoped_array.h in Headers */,
 				706EFDA726D3473F001C950E /* allocator_malloc.h in Headers */,
 				706EFD8326D3473F001C950E /* ring_buffer.h in Headers */,
-				706EFEF126D34740001C950E /* atomic_casts.h in Headers */,
 				706EFD8B26D3473F001C950E /* list_map.h in Headers */,
-				706EFEC326D34740001C950E /* compiler_gcc_xor_fetch.h in Headers */,
-				706EFE0126D3473F001C950E /* atomic_macros_xor_fetch.h in Headers */,
 				706EFF2D26D34740001C950E /* intrusive_hash_set.h in Headers */,
 				706EFF3726D34740001C950E /* intrusive_list.h in Headers */,
 				706EEFF126D15984001C950E /* astcenc_mathlib.h in Headers */,
 				706EFF4B26D34740001C950E /* sort.h in Headers */,
 				706EFD9F26D3473F001C950E /* segmented_vector.h in Headers */,
 				706EFF2126D34740001C950E /* fixed_list.h in Headers */,
-				706EFE0D26D3473F001C950E /* atomic_macros_memory_barrier.h in Headers */,
 				706EFF3926D34740001C950E /* heap.h in Headers */,
 				706EFDD926D3473F001C950E /* type_transformations.h in Headers */,
-				706EFE4526D3473F001C950E /* arch_x86_cmpxchg_strong.h in Headers */,
 				706EFDB926D3473F001C950E /* move_help.h in Headers */,
-				706EFDF526D3473F001C950E /* atomic_macros_all.h in Headers */,
-				706EFDF726D3473F001C950E /* atomic_macros_exchange.h in Headers */,
 				706EEFF226D15984001C950E /* ateencoder.h in Headers */,
-				706EFE9726D34740001C950E /* compiler_msvc_signal_fence.h in Headers */,
 				706EFF0D26D34740001C950E /* optional.h in Headers */,
-				706EFEDD26D34740001C950E /* compiler_cmpxchg_weak.h in Headers */,
 				706EFDCB26D3473F001C950E /* type_properties.h in Headers */,
 				706EEFF326D15984001C950E /* basisu_transcoder.h in Headers */,
 				706EFD6926D3473F001C950E /* eaplatform.h in Headers */,
-				706EFE8F26D3473F001C950E /* compiler_msvc.h in Headers */,
 				706EFF0126D34740001C950E /* function.h in Headers */,
-				706EFEE526D34740001C950E /* compiler_and_fetch.h in Headers */,
-				706EFE7326D3473F001C950E /* arch_cpu_pause.h in Headers */,
-				706EFE1126D3473F001C950E /* atomic_macros_or_fetch.h in Headers */,
-				706EFE0926D3473F001C950E /* atomic_macros_fetch_add.h in Headers */,
 				706EFD8126D3473F001C950E /* lru_cache.h in Headers */,
 				706EFF6F26D34740001C950E /* variant.h in Headers */,
-				706EFEC726D34740001C950E /* compiler_gcc_add_fetch.h in Headers */,
 				706EFDBD26D3473F001C950E /* function_detail.h in Headers */,
 				706EFDA526D3473F001C950E /* algorithm.h in Headers */,
-				706EFE8B26D3473F001C950E /* compiler_msvc_fetch_sub.h in Headers */,
 				706EEFF426D15984001C950E /* basisu_containers.h in Headers */,
 				706EFD8F26D3473F001C950E /* compressed_pair.h in Headers */,
-				706EFDEF26D3473F001C950E /* atomic_macros_store.h in Headers */,
-				706EFED726D34740001C950E /* compiler_fetch_and.h in Headers */,
 				706EEFF526D15985001C950E /* basisu_containers_impl.h in Headers */,
 				706EFF5F26D34740001C950E /* finally.h in Headers */,
-				706EFE3326D3473F001C950E /* arch_xor_fetch.h in Headers */,
 				706EEFF626D15985001C950E /* basisu_transcoder_internal.h in Headers */,
-				706EFECB26D34740001C950E /* compiler_gcc_exchange.h in Headers */,
 				706EFF4126D34740001C950E /* fixed_vector.h in Headers */,
-				706EFE1D26D3473F001C950E /* atomic_flag.h in Headers */,
 				706EFD6B26D3473F001C950E /* eastdarg.h in Headers */,
 				706EFDC926D3473F001C950E /* fill_help.h in Headers */,
-				706EFEEB26D34740001C950E /* compiler_signal_fence.h in Headers */,
 				706EEFF726D15985001C950E /* basisu_global_selector_cb.h in Headers */,
 				706EEFF826D15985001C950E /* basisu_transcoder_uastc.h in Headers */,
-				706EFECD26D34740001C950E /* compiler_gcc_fetch_or.h in Headers */,
 				706EEFF926D15985001C950E /* basisu_global_selector_palette.h in Headers */,
 				706EFD7B26D3473F001C950E /* fixed_hash_set.h in Headers */,
-				706EFE8326D3473F001C950E /* compiler_msvc_cmpxchg_weak.h in Headers */,
-				706EFDEB26D3473F001C950E /* atomic_macros_cpu_pause.h in Headers */,
-				706EFE2526D3473F001C950E /* arch_exchange.h in Headers */,
 				706EFD9126D3473F001C950E /* intrusive_sdlist.h in Headers */,
-				706EFE9B26D34740001C950E /* compiler_msvc_cmpxchg_strong.h in Headers */,
-				706EFE0B26D3473F001C950E /* atomic_macros_add_fetch.h in Headers */,
-				706EFE4B26D3473F001C950E /* arch_x86_and_fetch.h in Headers */,
 				706EFF4F26D34740001C950E /* linked_array.h in Headers */,
 				706EEFFA26D15985001C950E /* basisu.h in Headers */,
 				706EFD7126D3473F001C950E /* int128.h in Headers */,
-				706EFE7F26D3473F001C950E /* compiler.h in Headers */,
 				706EFF6B26D34740001C950E /* set.h in Headers */,
 				706EFDC326D3473F001C950E /* type_fundamental.h in Headers */,
 				706EFDAD26D3473F001C950E /* numeric.h in Headers */,
-				706EFDF126D3473F001C950E /* atomic_macros_compiler_barrier.h in Headers */,
 				706EFF5D26D34740001C950E /* hash_set.h in Headers */,
 				706EEFFB26D15985001C950E /* basisu_file_headers.h in Headers */,
-				706EFE8726D3473F001C950E /* compiler_msvc_fetch_add.h in Headers */,
 				706EFF3F26D34740001C950E /* vector_multiset.h in Headers */,
 				706EEFFC26D15985001C950E /* miniz.h in Headers */,
-				706EFE3B26D3473F001C950E /* arch_x86_exchange.h in Headers */,
 				706EEFFD26D15985001C950E /* hedistance.h in Headers */,
 				706EEFFE26D15985001C950E /* stb_rect_pack.h in Headers */,
-				706EFE5B26D3473F001C950E /* arch_x86_fetch_and.h in Headers */,
 				706EFDC526D3473F001C950E /* char_traits.h in Headers */,
 				706EEFFF26D15985001C950E /* KramZipHelper.h in Headers */,
 				706EFD7D26D3473F001C950E /* bitset.h in Headers */,
-				706EFE6126D3473F001C950E /* arch_cmpxchg_strong.h in Headers */,
-				706EFEC126D34740001C950E /* compiler_gcc_and_fetch.h in Headers */,
 				706EFF3B26D34740001C950E /* tuple.h in Headers */,
 				706EF00026D15985001C950E /* KramSDFMipper.h in Headers */,
 				706EF00126D15985001C950E /* sse2neon.h in Headers */,
-				706EFE7126D3473F001C950E /* arch_signal_fence.h in Headers */,
-				706EFE4926D3473F001C950E /* arch_x86_xor_fetch.h in Headers */,
 				706EF00226D15985001C950E /* KramConfig.h in Headers */,
-				706EFE3726D3473F001C950E /* arch_fetch_add.h in Headers */,
 				706EFDB726D3473F001C950E /* type_compound.h in Headers */,
 				706EFF0B26D34740001C950E /* vector_multimap.h in Headers */,
 				706EFDA326D3473F001C950E /* hash_map.h in Headers */,
-				706EFE5126D3473F001C950E /* arch_x86_fetch_add.h in Headers */,
-				706EFE9326D34740001C950E /* compiler_msvc_sub_fetch.h in Headers */,
 				706EF00326D15985001C950E /* KramLog.h in Headers */,
-				706EFEB726D34740001C950E /* compiler_gcc_cmpxchg_strong.h in Headers */,
 				706EFD8926D3473F001C950E /* intrusive_slist.h in Headers */,
 				706EF00426D15985001C950E /* KramLib.h in Headers */,
-				706EFE5726D3473F001C950E /* arch_x86_sub_fetch.h in Headers */,
 				706EF00526D15985001C950E /* KramVersion.h in Headers */,
 				706EFF0F26D34740001C950E /* functional.h in Headers */,
 				706EFD6126D3473F001C950E /* eaunits.h in Headers */,
-				706EFEAF26D34740001C950E /* compiler_gcc_or_fetch.h in Headers */,
 				706EF00626D15985001C950E /* KramImage.h in Headers */,
-				706EFEC526D34740001C950E /* compiler_gcc_fetch_sub.h in Headers */,
 				706EFF4526D34740001C950E /* span.h in Headers */,
 				706EFD9D26D3473F001C950E /* bitvector.h in Headers */,
-				706EFE6726D3473F001C950E /* arch_arm_thread_fence.h in Headers */,
-				706EFE3526D3473F001C950E /* arch_and_fetch.h in Headers */,
 				706EFF6326D34740001C950E /* any.h in Headers */,
 				706EFDDD26D3473F001C950E /* smart_ptr.h in Headers */,
 				706EFDA926D3473F001C950E /* map.h in Headers */,
@@ -2291,76 +1569,44 @@
 				706EFF2326D34740001C950E /* allocator.h in Headers */,
 				706EF00726D15985001C950E /* win_mmap.h in Headers */,
 				706EFF1B26D34740001C950E /* fixed_map.h in Headers */,
-				706EFE5D26D3473F001C950E /* arch_x86_cmpxchg_weak.h in Headers */,
 				706EF00826D15985001C950E /* Kram.h in Headers */,
-				706EFED526D34740001C950E /* compiler_thread_fence.h in Headers */,
 				706EFF7126D34740001C950E /* atomic.h in Headers */,
 				706EFF2926D34740001C950E /* array.h in Headers */,
-				706EFEDF26D34740001C950E /* compiler_add_fetch.h in Headers */,
-				706EFEC926D34740001C950E /* compiler_gcc_sub_fetch.h in Headers */,
 				706EF00926D15985001C950E /* KTXImage.h in Headers */,
 				706EFDD126D3473F001C950E /* in_place_t.h in Headers */,
 				706EFF4D26D34740001C950E /* core_allocator_adapter.h in Headers */,
 				706EF00A26D15985001C950E /* KramImageInfo.h in Headers */,
 				706EF00B26D15985001C950E /* KramTimer.h in Headers */,
-				706EFDFD26D3473F001C950E /* atomic_macros_sub_fetch.h in Headers */,
-				706EFE2B26D3473F001C950E /* arch_store.h in Headers */,
-				706EFEA126D34740001C950E /* compiler_msvc_fetch_xor.h in Headers */,
-				706EFDE926D3473F001C950E /* atomic_macros_fetch_xor.h in Headers */,
 				706EFD7726D3473F001C950E /* scoped_ptr.h in Headers */,
 				706EF00C26D15985001C950E /* KramMmapHelper.h in Headers */,
-				706EFDE126D3473F001C950E /* atomic_pointer.h in Headers */,
-				706EFEB926D34740001C950E /* compiler_gcc_cmpxchg_weak.h in Headers */,
-				706EFE5F26D3473F001C950E /* arch_cmpxchg_weak.h in Headers */,
 				706EFF6726D34740001C950E /* random.h in Headers */,
-				706EFE6B26D3473F001C950E /* arch_arm_store.h in Headers */,
 				706EF00D26D15985001C950E /* float4a.h in Headers */,
-				706EFE5526D3473F001C950E /* arch_x86_add_fetch.h in Headers */,
-				706EFE7D26D3473F001C950E /* compiler_exchange.h in Headers */,
 				706EFF3126D34740001C950E /* stack.h in Headers */,
-				706EFEEF26D34740001C950E /* compiler_load.h in Headers */,
 				706EF00E26D15985001C950E /* KramFileHelper.h in Headers */,
 				706EF00F26D15985001C950E /* KramMipper.h in Headers */,
 				706EF01026D15985001C950E /* TaskSystem.h in Headers */,
-				706EFEA926D34740001C950E /* compiler_memory_barrier.h in Headers */,
-				706EFE5326D3473F001C950E /* arch_x86_store.h in Headers */,
 				706EFD6726D3473F001C950E /* eacompiler.h in Headers */,
 				706EF01126D15985001C950E /* squish.h in Headers */,
-				706EFDED26D3473F001C950E /* atomic_macros_load.h in Headers */,
 				706EFF5526D34740001C950E /* utility.h in Headers */,
-				706EFE1926D3473F001C950E /* atomic_flag_standalone.h in Headers */,
 				706EF01226D15985001C950E /* clusterfit.h in Headers */,
 				706EF01326D15985001C950E /* colourfit.h in Headers */,
 				706EFEFB26D34740001C950E /* function_help.h in Headers */,
 				706EFD9926D3473F001C950E /* fixed_function.h in Headers */,
 				706EF01426D15985001C950E /* alpha.h in Headers */,
 				706EF01526D15985001C950E /* singlecolourfit.h in Headers */,
-				706EFE0526D3473F001C950E /* atomic_macros_and_fetch.h in Headers */,
-				706EFE4326D3473F001C950E /* arch_x86_fetch_xor.h in Headers */,
-				706EFEED26D34740001C950E /* compiler_sub_fetch.h in Headers */,
-				706EFE9D26D34740001C950E /* compiler_msvc_cpu_pause.h in Headers */,
-				706EFEAB26D34740001C950E /* compiler_gcc_cpu_pause.h in Headers */,
 				706EFF0526D34740001C950E /* intrusive_hash_map.h in Headers */,
-				706EFE9526D34740001C950E /* compiler_msvc_exchange.h in Headers */,
 				706EF01626D15985001C950E /* maths.h in Headers */,
-				706EFEBF26D34740001C950E /* compiler_gcc_fetch_add.h in Headers */,
-				706EFE0F26D3473F001C950E /* atomic_macros_fetch_and.h in Headers */,
 				706EF01726D15985001C950E /* colourset.h in Headers */,
 				706EF01826D15985001C950E /* colourblock.h in Headers */,
 				706EFDAF26D3473F001C950E /* chrono.h in Headers */,
 				706EF01926D15985001C950E /* rangefit.h in Headers */,
-				706EFE8D26D3473F001C950E /* compiler_msvc_xor_fetch.h in Headers */,
 				706EF01A26D15985001C950E /* zstd.h in Headers */,
 				706EFF6126D34740001C950E /* iterator.h in Headers */,
 				706EFF0726D34740001C950E /* fixed_allocator.h in Headers */,
-				706EFE7526D3473F001C950E /* arch.h in Headers */,
-				706EFEBB26D34740001C950E /* compiler_gcc_store.h in Headers */,
 				706EFEF926D34740001C950E /* mem_fn.h in Headers */,
 				706EF01B26D15985001C950E /* lodepng.h in Headers */,
 				706EFDB326D3473F001C950E /* linked_ptr.h in Headers */,
-				706EFE1F26D3473F001C950E /* arch_load.h in Headers */,
 				706EF01C26D15985001C950E /* tmpfileplus.h in Headers */,
-				706EFEDB26D34740001C950E /* compiler_or_fetch.h in Headers */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@@ -2372,15 +1618,10 @@
 				706EFD8E26D3473F001C950E /* call_traits.h in Headers */,
 				706EFDCE26D3473F001C950E /* fixed_pool.h in Headers */,
 				706EFF5226D34740001C950E /* core_allocator.h in Headers */,
-				706EFE3026D3473F001C950E /* arch_memory_barrier.h in Headers */,
 				706EFF3426D34740001C950E /* ratio.h in Headers */,
-				706EFE0826D3473F001C950E /* atomic_macros_cmpxchg_weak.h in Headers */,
 				706EF14B26D166C5001C950E /* EtcErrorMetric.h in Headers */,
 				706EFD6426D3473F001C950E /* version.h in Headers */,
-				706EFE1C26D3473F001C950E /* atomic_macros.h in Headers */,
-				706EFDE826D3473F001C950E /* atomic_push_compiler_options.h in Headers */,
 				706EFF5826D34740001C950E /* type_traits.h in Headers */,
-				706EFE3A26D3473F001C950E /* arch_thread_fence.h in Headers */,
 				706EFF6A26D34740001C950E /* string_view.h in Headers */,
 				706EF14C26D166C5001C950E /* EtcColor.h in Headers */,
 				706EFF5C26D34740001C950E /* fixed_hash_map.h in Headers */,
@@ -2388,250 +1629,149 @@
 				706EFF0A26D34740001C950E /* priority_queue.h in Headers */,
 				706EFF0426D34740001C950E /* red_black_tree.h in Headers */,
 				706EF14E26D166C5001C950E /* EtcBlock4x4Encoding_RGB8.h in Headers */,
-				706EFE6626D3473F001C950E /* arch_arm_load.h in Headers */,
 				706EFF3626D34740001C950E /* vector_map.h in Headers */,
 				706EFDD026D3473F001C950E /* config.h in Headers */,
 				706EF14F26D166C5001C950E /* EtcConfig.h in Headers */,
 				706EFF2C26D34740001C950E /* unique_ptr.h in Headers */,
 				706EF15026D166C5001C950E /* EtcBlock4x4Encoding_R11.h in Headers */,
-				706EFEF426D34740001C950E /* atomic.h in Headers */,
 				706EF15126D166C5001C950E /* EtcBlock4x4Encoding_RG11.h in Headers */,
-				706EFE2E26D3473F001C950E /* arch_add_fetch.h in Headers */,
-				706EFE5A26D3473F001C950E /* arch_x86_thread_fence.h in Headers */,
 				706EFF2626D34740001C950E /* fixed_string.h in Headers */,
 				706EFD8026D3473F001C950E /* tuple_vector.h in Headers */,
 				706EF15226D166C5001C950E /* EtcMath.h in Headers */,
 				706EF15326D166C5001C950E /* EtcIndividualTrys.h in Headers */,
-				706EFEAE26D34740001C950E /* compiler_gcc_thread_fence.h in Headers */,
 				706EFF2826D34740001C950E /* slist.h in Headers */,
-				706EFEA426D34740001C950E /* compiler_fetch_or.h in Headers */,
 				706EF15426D166C5001C950E /* EtcBlock4x4EncodingBits.h in Headers */,
 				706EFDBC26D3473F001C950E /* enable_shared.h in Headers */,
 				706EF15526D166C5001C950E /* EtcBlock4x4Encoding_RGB8A1.h in Headers */,
-				706EFE8626D3473F001C950E /* compiler_msvc_barrier.h in Headers */,
-				706EFE5026D3473F001C950E /* arch_x86_memory_barrier.h in Headers */,
 				706EFDDC26D3473F001C950E /* integer_sequence.h in Headers */,
 				706EFD9426D3473F001C950E /* adaptors.h in Headers */,
 				706EFDA226D3473F001C950E /* weak_ptr.h in Headers */,
 				706EFDD826D3473F001C950E /* pair_fwd_decls.h in Headers */,
-				706EFDFC26D3473F001C950E /* atomic_macros_base.h in Headers */,
-				706EFE2226D3473F001C950E /* arch_compiler_barrier.h in Headers */,
 				706EFF6626D34740001C950E /* queue.h in Headers */,
-				706EFEE226D34740001C950E /* compiler_fetch_sub.h in Headers */,
 				706EFD8826D3473F001C950E /* sort_extra.h in Headers */,
-				706EFE4826D3473F001C950E /* arch_x86_fetch_sub.h in Headers */,
 				706EFD7026D3473F001C950E /* nullptr.h in Headers */,
 				706EF15626D166C5001C950E /* EtcBlock4x4.h in Headers */,
-				706EFEA626D34740001C950E /* compiler_barrier.h in Headers */,
 				706EF15726D166C5001C950E /* Etc.h in Headers */,
 				706EFF0026D34740001C950E /* functional_base.h in Headers */,
-				706EFEBE26D34740001C950E /* compiler_gcc_signal_fence.h in Headers */,
-				706EFE0426D3473F001C950E /* atomic_macros_fetch_sub.h in Headers */,
-				706EFED226D34740001C950E /* compiler_gcc_load.h in Headers */,
 				706EF15826D166C5001C950E /* EtcImage.h in Headers */,
-				706EFEE826D34740001C950E /* compiler_cmpxchg_strong.h in Headers */,
-				706EFDE426D3473F001C950E /* atomic_integral.h in Headers */,
 				706EFDE026D3473F001C950E /* tuple_fwd_decls.h in Headers */,
 				706EFF6E26D34740001C950E /* basic_string.h in Headers */,
 				706EFDB226D3473F001C950E /* fixed_set.h in Headers */,
 				706EFDAC26D3473F001C950E /* initializer_list.h in Headers */,
 				706EFF1226D34740001C950E /* vector_set.h in Headers */,
-				706EFEDA26D34740001C950E /* compiler_store.h in Headers */,
-				706EFE4E26D3473F001C950E /* arch_x86_or_fetch.h in Headers */,
-				706EFE2426D3473F001C950E /* arch_fetch_and.h in Headers */,
-				706EFEB626D34740001C950E /* compiler_gcc.h in Headers */,
-				706EFE8A26D3473F001C950E /* compiler_msvc_and_fetch.h in Headers */,
 				706EFF2026D34740001C950E /* shared_ptr.h in Headers */,
-				706EFE4026D3473F001C950E /* arch_x86_fetch_or.h in Headers */,
-				706EFE1826D3473F001C950E /* atomic_memory_order.h in Headers */,
 				706EFF1626D34740001C950E /* string_map.h in Headers */,
-				706EFEE426D34740001C950E /* compiler_xor_fetch.h in Headers */,
 				706EF15926D166C5001C950E /* EtcBlock4x4Encoding_ETC1.h in Headers */,
 				706EF15A26D166C5001C950E /* EtcBlock4x4Encoding_RGBA8.h in Headers */,
 				706EFEF826D34740001C950E /* intrusive_hashtable.h in Headers */,
-				706EFE1626D3473F001C950E /* atomic_standalone.h in Headers */,
 				706EF15B26D166C5001C950E /* EtcColorFloatRGBA.h in Headers */,
 				706EFDC226D3473F001C950E /* generic_iterator.h in Headers */,
-				706EFE7026D3473F001C950E /* arch_fetch_xor.h in Headers */,
-				706EFE8226D3473F001C950E /* compiler_msvc_add_fetch.h in Headers */,
 				706EFDD426D3473F001C950E /* hashtable.h in Headers */,
-				706EFE1426D3473F001C950E /* atomic_macros_thread_fence.h in Headers */,
 				706EFD7426D3473F001C950E /* earesult.h in Headers */,
-				706EFEB426D34740001C950E /* compiler_gcc_barrier.h in Headers */,
-				706EFE7C26D3473F001C950E /* atomic_pop_compiler_options.h in Headers */,
 				706EFD9C26D3473F001C950E /* safe_ptr.h in Headers */,
-				706EFED026D34740001C950E /* compiler_gcc_fetch_and.h in Headers */,
 				706EFD7626D3473F001C950E /* eabase.h in Headers */,
 				706EFD6E26D3473F001C950E /* eahave.h in Headers */,
 				706EFF4A26D34740001C950E /* meta.h in Headers */,
 				706EFF1A26D34740001C950E /* fixed_substring.h in Headers */,
-				706EFE7A26D3473F001C950E /* atomic_asserts.h in Headers */,
 				706EFDD626D3473F001C950E /* type_pod.h in Headers */,
 				706EF15C26D166C5001C950E /* EtcBlock4x4Encoding.h in Headers */,
 				706EF15D26D166C5001C950E /* rgbcx.h in Headers */,
 				706EFF4826D34740001C950E /* string_hash_map.h in Headers */,
-				706EFEEA26D34740001C950E /* compiler_fetch_add.h in Headers */,
-				706EFE2A26D3473F001C950E /* arch_sub_fetch.h in Headers */,
 				706EFF1E26D34740001C950E /* numeric_limits.h in Headers */,
 				706EFDC026D3473F001C950E /* piecewise_construct_t.h in Headers */,
-				706EFE9226D34740001C950E /* compiler_msvc_fetch_or.h in Headers */,
-				706EFEA826D34740001C950E /* compiler_cpu_pause.h in Headers */,
 				706EFD8626D3473F001C950E /* fixed_tuple_vector.h in Headers */,
 				706EF15E26D166C5001C950E /* bc7enc.h in Headers */,
 				706EF15F26D166C5001C950E /* bc7decomp.h in Headers */,
-				706EFE9A26D34740001C950E /* compiler_msvc_fetch_and.h in Headers */,
 				706EFEF626D34740001C950E /* thread_support.h in Headers */,
-				706EFE6E26D3473F001C950E /* arch_arm_memory_barrier.h in Headers */,
 				706EF16026D166C5001C950E /* rgbcx_table4.h in Headers */,
 				706EFD6626D3473F001C950E /* eacompilertraits.h in Headers */,
-				706EFEA026D34740001C950E /* compiler_msvc_or_fetch.h in Headers */,
-				706EFE7826D3473F001C950E /* atomic_base_width.h in Headers */,
 				706EF16126D166C5001C950E /* astcenc_diagnostic_trace.h in Headers */,
 				706EF16226D166C5001C950E /* astcenc_vecmathlib.h in Headers */,
-				706EFE6426D3473F001C950E /* arch_or_fetch.h in Headers */,
 				706EFD9826D3473F001C950E /* intrusive_ptr.h in Headers */,
 				706EF16326D166C5001C950E /* astcenc_vecmathlib_avx2_8.h in Headers */,
 				706EF16426D166C5001C950E /* astcenc.h in Headers */,
 				706EFEFE26D34740001C950E /* memory_base.h in Headers */,
 				706EFD9626D3473F001C950E /* fixed_ring_buffer.h in Headers */,
 				706EFF4426D34740001C950E /* unordered_set.h in Headers */,
-				706EFE4226D3473F001C950E /* arch_x86_load.h in Headers */,
 				706EFF3026D34740001C950E /* deque.h in Headers */,
 				706EF16526D166C5001C950E /* astcenc_internal.h in Headers */,
 				706EF16626D166C5001C950E /* astcenc_vecmathlib_neon_armv7_4.h in Headers */,
 				706EF16726D166C5001C950E /* astcenc_vecmathlib_sse_4.h in Headers */,
-				706EFE6A26D3473F001C950E /* arch_arm.h in Headers */,
-				706EFEB226D34740001C950E /* compiler_gcc_fetch_xor.h in Headers */,
 				706EF16826D166C5001C950E /* astcenc_vecmathlib_neon_4.h in Headers */,
 				706EF16926D166C5001C950E /* astcenc_vecmathlib_none_4.h in Headers */,
 				706EFF5A26D34740001C950E /* vector.h in Headers */,
 				706EF16A26D166C5001C950E /* astcenc_vecmathlib_common_4.h in Headers */,
-				706EFDF426D3473F001C950E /* atomic_macros_cmpxchg_strong.h in Headers */,
-				706EFE3E26D3473F001C950E /* arch_x86.h in Headers */,
-				706EFE0026D3473F001C950E /* atomic_macros_signal_fence.h in Headers */,
 				706EFD7A26D3473F001C950E /* unordered_map.h in Headers */,
-				706EFDFA26D3473F001C950E /* atomic_macros_fetch_or.h in Headers */,
-				706EFDE626D3473F001C950E /* atomic_size_aligned.h in Headers */,
-				706EFE3226D3473F001C950E /* arch_fetch_sub.h in Headers */,
-				706EFED426D34740001C950E /* compiler_fetch_xor.h in Headers */,
-				706EFE2826D3473F001C950E /* arch_fetch_or.h in Headers */,
 				706EFF3E26D34740001C950E /* memory.h in Headers */,
 				706EFF1826D34740001C950E /* list.h in Headers */,
 				706EFF1426D34740001C950E /* shared_array.h in Headers */,
 				706EFF5426D34740001C950E /* scoped_array.h in Headers */,
 				706EFDA826D3473F001C950E /* allocator_malloc.h in Headers */,
 				706EFD8426D3473F001C950E /* ring_buffer.h in Headers */,
-				706EFEF226D34740001C950E /* atomic_casts.h in Headers */,
 				706EFD8C26D3473F001C950E /* list_map.h in Headers */,
-				706EFEC426D34740001C950E /* compiler_gcc_xor_fetch.h in Headers */,
-				706EFE0226D3473F001C950E /* atomic_macros_xor_fetch.h in Headers */,
 				706EFF2E26D34740001C950E /* intrusive_hash_set.h in Headers */,
 				706EFF3826D34740001C950E /* intrusive_list.h in Headers */,
 				706EF16B26D166C5001C950E /* astcenc_mathlib.h in Headers */,
 				706EFF4C26D34740001C950E /* sort.h in Headers */,
 				706EFDA026D3473F001C950E /* segmented_vector.h in Headers */,
 				706EFF2226D34740001C950E /* fixed_list.h in Headers */,
-				706EFE0E26D3473F001C950E /* atomic_macros_memory_barrier.h in Headers */,
 				706EFF3A26D34740001C950E /* heap.h in Headers */,
 				706EFDDA26D3473F001C950E /* type_transformations.h in Headers */,
-				706EFE4626D3473F001C950E /* arch_x86_cmpxchg_strong.h in Headers */,
 				706EFDBA26D3473F001C950E /* move_help.h in Headers */,
-				706EFDF626D3473F001C950E /* atomic_macros_all.h in Headers */,
-				706EFDF826D3473F001C950E /* atomic_macros_exchange.h in Headers */,
 				706EF16C26D166C5001C950E /* ateencoder.h in Headers */,
-				706EFE9826D34740001C950E /* compiler_msvc_signal_fence.h in Headers */,
 				706EFF0E26D34740001C950E /* optional.h in Headers */,
-				706EFEDE26D34740001C950E /* compiler_cmpxchg_weak.h in Headers */,
 				706EFDCC26D3473F001C950E /* type_properties.h in Headers */,
 				706EF16D26D166C5001C950E /* basisu_transcoder.h in Headers */,
 				706EFD6A26D3473F001C950E /* eaplatform.h in Headers */,
-				706EFE9026D3473F001C950E /* compiler_msvc.h in Headers */,
 				706EFF0226D34740001C950E /* function.h in Headers */,
-				706EFEE626D34740001C950E /* compiler_and_fetch.h in Headers */,
-				706EFE7426D3473F001C950E /* arch_cpu_pause.h in Headers */,
-				706EFE1226D3473F001C950E /* atomic_macros_or_fetch.h in Headers */,
-				706EFE0A26D3473F001C950E /* atomic_macros_fetch_add.h in Headers */,
 				706EFD8226D3473F001C950E /* lru_cache.h in Headers */,
 				706EFF7026D34740001C950E /* variant.h in Headers */,
-				706EFEC826D34740001C950E /* compiler_gcc_add_fetch.h in Headers */,
 				706EFDBE26D3473F001C950E /* function_detail.h in Headers */,
 				706EFDA626D3473F001C950E /* algorithm.h in Headers */,
-				706EFE8C26D3473F001C950E /* compiler_msvc_fetch_sub.h in Headers */,
 				706EF16E26D166C5001C950E /* basisu_containers.h in Headers */,
 				706EFD9026D3473F001C950E /* compressed_pair.h in Headers */,
-				706EFDF026D3473F001C950E /* atomic_macros_store.h in Headers */,
-				706EFED826D34740001C950E /* compiler_fetch_and.h in Headers */,
 				706EF16F26D166C5001C950E /* basisu_containers_impl.h in Headers */,
 				706EFF6026D34740001C950E /* finally.h in Headers */,
-				706EFE3426D3473F001C950E /* arch_xor_fetch.h in Headers */,
 				706EF17026D166C5001C950E /* basisu_transcoder_internal.h in Headers */,
-				706EFECC26D34740001C950E /* compiler_gcc_exchange.h in Headers */,
 				706EFF4226D34740001C950E /* fixed_vector.h in Headers */,
-				706EFE1E26D3473F001C950E /* atomic_flag.h in Headers */,
 				706EFD6C26D3473F001C950E /* eastdarg.h in Headers */,
 				706EFDCA26D3473F001C950E /* fill_help.h in Headers */,
-				706EFEEC26D34740001C950E /* compiler_signal_fence.h in Headers */,
 				706EF17126D166C5001C950E /* basisu_global_selector_cb.h in Headers */,
 				706EF17226D166C5001C950E /* basisu_transcoder_uastc.h in Headers */,
-				706EFECE26D34740001C950E /* compiler_gcc_fetch_or.h in Headers */,
 				706EF17326D166C5001C950E /* basisu_global_selector_palette.h in Headers */,
 				706EFD7C26D3473F001C950E /* fixed_hash_set.h in Headers */,
-				706EFE8426D3473F001C950E /* compiler_msvc_cmpxchg_weak.h in Headers */,
-				706EFDEC26D3473F001C950E /* atomic_macros_cpu_pause.h in Headers */,
-				706EFE2626D3473F001C950E /* arch_exchange.h in Headers */,
 				706EFD9226D3473F001C950E /* intrusive_sdlist.h in Headers */,
-				706EFE9C26D34740001C950E /* compiler_msvc_cmpxchg_strong.h in Headers */,
-				706EFE0C26D3473F001C950E /* atomic_macros_add_fetch.h in Headers */,
-				706EFE4C26D3473F001C950E /* arch_x86_and_fetch.h in Headers */,
 				706EFF5026D34740001C950E /* linked_array.h in Headers */,
 				706EF17426D166C5001C950E /* basisu.h in Headers */,
 				706EFD7226D3473F001C950E /* int128.h in Headers */,
-				706EFE8026D3473F001C950E /* compiler.h in Headers */,
 				706EFF6C26D34740001C950E /* set.h in Headers */,
 				706EFDC426D3473F001C950E /* type_fundamental.h in Headers */,
 				706EFDAE26D3473F001C950E /* numeric.h in Headers */,
-				706EFDF226D3473F001C950E /* atomic_macros_compiler_barrier.h in Headers */,
 				706EFF5E26D34740001C950E /* hash_set.h in Headers */,
 				706EF17526D166C5001C950E /* basisu_file_headers.h in Headers */,
-				706EFE8826D3473F001C950E /* compiler_msvc_fetch_add.h in Headers */,
 				706EFF4026D34740001C950E /* vector_multiset.h in Headers */,
 				706EF17626D166C5001C950E /* miniz.h in Headers */,
-				706EFE3C26D3473F001C950E /* arch_x86_exchange.h in Headers */,
 				706EF17726D166C5001C950E /* hedistance.h in Headers */,
 				706EF17826D166C5001C950E /* stb_rect_pack.h in Headers */,
-				706EFE5C26D3473F001C950E /* arch_x86_fetch_and.h in Headers */,
 				706EFDC626D3473F001C950E /* char_traits.h in Headers */,
 				706EF17926D166C5001C950E /* KramZipHelper.h in Headers */,
 				706EFD7E26D3473F001C950E /* bitset.h in Headers */,
-				706EFE6226D3473F001C950E /* arch_cmpxchg_strong.h in Headers */,
-				706EFEC226D34740001C950E /* compiler_gcc_and_fetch.h in Headers */,
 				706EFF3C26D34740001C950E /* tuple.h in Headers */,
 				706EF17A26D166C5001C950E /* KramSDFMipper.h in Headers */,
 				706EF17B26D166C5001C950E /* sse2neon.h in Headers */,
-				706EFE7226D3473F001C950E /* arch_signal_fence.h in Headers */,
-				706EFE4A26D3473F001C950E /* arch_x86_xor_fetch.h in Headers */,
 				706EF17C26D166C5001C950E /* KramConfig.h in Headers */,
-				706EFE3826D3473F001C950E /* arch_fetch_add.h in Headers */,
 				706EFDB826D3473F001C950E /* type_compound.h in Headers */,
 				706EFF0C26D34740001C950E /* vector_multimap.h in Headers */,
 				706EFDA426D3473F001C950E /* hash_map.h in Headers */,
-				706EFE5226D3473F001C950E /* arch_x86_fetch_add.h in Headers */,
-				706EFE9426D34740001C950E /* compiler_msvc_sub_fetch.h in Headers */,
 				706EF17D26D166C5001C950E /* KramLog.h in Headers */,
-				706EFEB826D34740001C950E /* compiler_gcc_cmpxchg_strong.h in Headers */,
 				706EFD8A26D3473F001C950E /* intrusive_slist.h in Headers */,
 				706EF17E26D166C5001C950E /* KramLib.h in Headers */,
-				706EFE5826D3473F001C950E /* arch_x86_sub_fetch.h in Headers */,
 				706EF17F26D166C5001C950E /* KramVersion.h in Headers */,
 				706EFF1026D34740001C950E /* functional.h in Headers */,
 				706EFD6226D3473F001C950E /* eaunits.h in Headers */,
-				706EFEB026D34740001C950E /* compiler_gcc_or_fetch.h in Headers */,
 				706EF18026D166C5001C950E /* KramImage.h in Headers */,
-				706EFEC626D34740001C950E /* compiler_gcc_fetch_sub.h in Headers */,
 				706EFF4626D34740001C950E /* span.h in Headers */,
 				706EFD9E26D3473F001C950E /* bitvector.h in Headers */,
-				706EFE6826D3473F001C950E /* arch_arm_thread_fence.h in Headers */,
-				706EFE3626D3473F001C950E /* arch_and_fetch.h in Headers */,
 				706EFF6426D34740001C950E /* any.h in Headers */,
 				706EFDDE26D3473F001C950E /* smart_ptr.h in Headers */,
 				706EFDAA26D3473F001C950E /* map.h in Headers */,
@@ -2639,76 +1779,44 @@
 				706EFF2426D34740001C950E /* allocator.h in Headers */,
 				706EF18126D166C5001C950E /* win_mmap.h in Headers */,
 				706EFF1C26D34740001C950E /* fixed_map.h in Headers */,
-				706EFE5E26D3473F001C950E /* arch_x86_cmpxchg_weak.h in Headers */,
 				706EF18226D166C5001C950E /* Kram.h in Headers */,
-				706EFED626D34740001C950E /* compiler_thread_fence.h in Headers */,
 				706EFF7226D34740001C950E /* atomic.h in Headers */,
 				706EFF2A26D34740001C950E /* array.h in Headers */,
-				706EFEE026D34740001C950E /* compiler_add_fetch.h in Headers */,
-				706EFECA26D34740001C950E /* compiler_gcc_sub_fetch.h in Headers */,
 				706EF18326D166C5001C950E /* KTXImage.h in Headers */,
 				706EFDD226D3473F001C950E /* in_place_t.h in Headers */,
 				706EFF4E26D34740001C950E /* core_allocator_adapter.h in Headers */,
 				706EF18426D166C5001C950E /* KramImageInfo.h in Headers */,
 				706EF18526D166C5001C950E /* KramTimer.h in Headers */,
-				706EFDFE26D3473F001C950E /* atomic_macros_sub_fetch.h in Headers */,
-				706EFE2C26D3473F001C950E /* arch_store.h in Headers */,
-				706EFEA226D34740001C950E /* compiler_msvc_fetch_xor.h in Headers */,
-				706EFDEA26D3473F001C950E /* atomic_macros_fetch_xor.h in Headers */,
 				706EFD7826D3473F001C950E /* scoped_ptr.h in Headers */,
 				706EF18626D166C5001C950E /* KramMmapHelper.h in Headers */,
-				706EFDE226D3473F001C950E /* atomic_pointer.h in Headers */,
-				706EFEBA26D34740001C950E /* compiler_gcc_cmpxchg_weak.h in Headers */,
-				706EFE6026D3473F001C950E /* arch_cmpxchg_weak.h in Headers */,
 				706EFF6826D34740001C950E /* random.h in Headers */,
-				706EFE6C26D3473F001C950E /* arch_arm_store.h in Headers */,
 				706EF18726D166C5001C950E /* float4a.h in Headers */,
-				706EFE5626D3473F001C950E /* arch_x86_add_fetch.h in Headers */,
-				706EFE7E26D3473F001C950E /* compiler_exchange.h in Headers */,
 				706EFF3226D34740001C950E /* stack.h in Headers */,
-				706EFEF026D34740001C950E /* compiler_load.h in Headers */,
 				706EF18826D166C5001C950E /* KramFileHelper.h in Headers */,
 				706EF18926D166C5001C950E /* KramMipper.h in Headers */,
 				706EF18A26D166C5001C950E /* TaskSystem.h in Headers */,
-				706EFEAA26D34740001C950E /* compiler_memory_barrier.h in Headers */,
-				706EFE5426D3473F001C950E /* arch_x86_store.h in Headers */,
 				706EFD6826D3473F001C950E /* eacompiler.h in Headers */,
 				706EF18B26D166C5001C950E /* squish.h in Headers */,
-				706EFDEE26D3473F001C950E /* atomic_macros_load.h in Headers */,
 				706EFF5626D34740001C950E /* utility.h in Headers */,
-				706EFE1A26D3473F001C950E /* atomic_flag_standalone.h in Headers */,
 				706EF18C26D166C5001C950E /* clusterfit.h in Headers */,
 				706EF18D26D166C5001C950E /* colourfit.h in Headers */,
 				706EFEFC26D34740001C950E /* function_help.h in Headers */,
 				706EFD9A26D3473F001C950E /* fixed_function.h in Headers */,
 				706EF18E26D166C5001C950E /* alpha.h in Headers */,
 				706EF18F26D166C5001C950E /* singlecolourfit.h in Headers */,
-				706EFE0626D3473F001C950E /* atomic_macros_and_fetch.h in Headers */,
-				706EFE4426D3473F001C950E /* arch_x86_fetch_xor.h in Headers */,
-				706EFEEE26D34740001C950E /* compiler_sub_fetch.h in Headers */,
-				706EFE9E26D34740001C950E /* compiler_msvc_cpu_pause.h in Headers */,
-				706EFEAC26D34740001C950E /* compiler_gcc_cpu_pause.h in Headers */,
 				706EFF0626D34740001C950E /* intrusive_hash_map.h in Headers */,
-				706EFE9626D34740001C950E /* compiler_msvc_exchange.h in Headers */,
 				706EF19026D166C5001C950E /* maths.h in Headers */,
-				706EFEC026D34740001C950E /* compiler_gcc_fetch_add.h in Headers */,
-				706EFE1026D3473F001C950E /* atomic_macros_fetch_and.h in Headers */,
 				706EF19126D166C5001C950E /* colourset.h in Headers */,
 				706EF19226D166C5001C950E /* colourblock.h in Headers */,
 				706EFDB026D3473F001C950E /* chrono.h in Headers */,
 				706EF19326D166C5001C950E /* rangefit.h in Headers */,
-				706EFE8E26D3473F001C950E /* compiler_msvc_xor_fetch.h in Headers */,
 				706EF19426D166C5001C950E /* zstd.h in Headers */,
 				706EFF6226D34740001C950E /* iterator.h in Headers */,
 				706EFF0826D34740001C950E /* fixed_allocator.h in Headers */,
-				706EFE7626D3473F001C950E /* arch.h in Headers */,
-				706EFEBC26D34740001C950E /* compiler_gcc_store.h in Headers */,
 				706EFEFA26D34740001C950E /* mem_fn.h in Headers */,
 				706EF19526D166C5001C950E /* lodepng.h in Headers */,
 				706EFDB426D3473F001C950E /* linked_ptr.h in Headers */,
-				706EFE2026D3473F001C950E /* arch_load.h in Headers */,
 				706EF19626D166C5001C950E /* tmpfileplus.h in Headers */,
-				706EFEDC26D34740001C950E /* compiler_or_fetch.h in Headers */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};

From 3643da9d865337a8180e228ec79b47bfc52b4694 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 22 Aug 2021 22:18:24 -0700
Subject: [PATCH 173/901] kram - remove atomic.cpp from Build Phase

---
 build2/kram.xcodeproj/project.pbxproj | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index 20b6b8f9..9c900e69 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -563,8 +563,6 @@
 		706EFF7A26D34740001C950E /* allocator_eastl.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5A26D3473F001C950E /* allocator_eastl.cpp */; };
 		706EFF7B26D34740001C950E /* numeric_limits.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5B26D3473F001C950E /* numeric_limits.cpp */; };
 		706EFF7C26D34740001C950E /* numeric_limits.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5B26D3473F001C950E /* numeric_limits.cpp */; };
-		706EFF7D26D34740001C950E /* atomic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5C26D3473F001C950E /* atomic.cpp */; };
-		706EFF7E26D34740001C950E /* atomic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5C26D3473F001C950E /* atomic.cpp */; };
 		706EFF7F26D34740001C950E /* intrusive_list.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5D26D3473F001C950E /* intrusive_list.cpp */; };
 		706EFF8026D34740001C950E /* intrusive_list.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5D26D3473F001C950E /* intrusive_list.cpp */; };
 		706EFF8126D34740001C950E /* hashtable.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5E26D3473F001C950E /* hashtable.cpp */; };
@@ -1973,7 +1971,6 @@
 				706EEFC226D1595E001C950E /* zstd.cpp in Sources */,
 				706EEFC326D1595E001C950E /* zstddeclib.cpp in Sources */,
 				706EEFC426D1595E001C950E /* lodepng.cpp in Sources */,
-				706EFF7D26D34740001C950E /* atomic.cpp in Sources */,
 				706EEFC526D1595E001C950E /* tmpfileplus.cpp in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
@@ -2059,7 +2056,6 @@
 				706EF1D826D166C5001C950E /* zstd.cpp in Sources */,
 				706EF1D926D166C5001C950E /* zstddeclib.cpp in Sources */,
 				706EF1DA26D166C5001C950E /* lodepng.cpp in Sources */,
-				706EFF7E26D34740001C950E /* atomic.cpp in Sources */,
 				706EF1DB26D166C5001C950E /* tmpfileplus.cpp in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;

From b29a597b019bb41144145a81b9e5447e8244ba48 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 4 Sep 2021 11:27:49 -0700
Subject: [PATCH 174/901] Kram - more work to make Xcode projects build with
 eastl or stl

---
 build2/kram.xcodeproj/project.pbxproj         | 14 ++++++-------
 build2/kramc.xcodeproj/project.pbxproj        |  4 ++--
 build2/kramv.xcodeproj/project.pbxproj        |  4 ++--
 libkram/astc-encoder/astcenc_entry.cpp        |  4 +++-
 .../astc-encoder/astcenc_integer_sequence.cpp |  7 ++++---
 libkram/kram/KramConfig.h                     | 21 ++++++++++++++-----
 libkram/kram/TaskSystem.h                     |  1 +
 7 files changed, 34 insertions(+), 21 deletions(-)

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index 9c900e69..2dcb7f7a 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -2116,10 +2116,8 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
-				HEADER_SEARCH_PATHS = (
-					"$(PROJECT_DIR)/../libkram/kram",
-					"$(PROJECT_DIR)/../libkram/eastl/include",
-				);
+				HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/eastl/include";
+				IPHONEOS_DEPLOYMENT_TARGET = 14.1;
 				MACOSX_DEPLOYMENT_TARGET = 10.15;
 				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
 				MTL_FAST_MATH = YES;
@@ -2135,6 +2133,7 @@
 					KramConfig.h,
 				);
 				SDKROOT = macosx;
+				USER_HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram";
 			};
 			name = Debug;
 		};
@@ -2185,10 +2184,8 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
-				HEADER_SEARCH_PATHS = (
-					"$(PROJECT_DIR)/../libkram/kram",
-					"$(PROJECT_DIR)/../libkram/eastl/include",
-				);
+				HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/eastl/include";
+				IPHONEOS_DEPLOYMENT_TARGET = 14.1;
 				MACOSX_DEPLOYMENT_TARGET = 10.15;
 				MTL_ENABLE_DEBUG_INFO = NO;
 				MTL_FAST_MATH = YES;
@@ -2203,6 +2200,7 @@
 					KramConfig.h,
 				);
 				SDKROOT = macosx;
+				USER_HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram";
 			};
 			name = Release;
 		};
diff --git a/build2/kramc.xcodeproj/project.pbxproj b/build2/kramc.xcodeproj/project.pbxproj
index 303a5fa5..4f879921 100644
--- a/build2/kramc.xcodeproj/project.pbxproj
+++ b/build2/kramc.xcodeproj/project.pbxproj
@@ -197,7 +197,7 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
-				MACOSX_DEPLOYMENT_TARGET = 11.1;
+				MACOSX_DEPLOYMENT_TARGET = 10.15;
 				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
 				MTL_FAST_MATH = YES;
 				ONLY_ACTIVE_ARCH = YES;
@@ -250,7 +250,7 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
-				MACOSX_DEPLOYMENT_TARGET = 11.1;
+				MACOSX_DEPLOYMENT_TARGET = 10.15;
 				MTL_ENABLE_DEBUG_INFO = NO;
 				MTL_FAST_MATH = YES;
 				SDKROOT = macosx;
diff --git a/build2/kramv.xcodeproj/project.pbxproj b/build2/kramv.xcodeproj/project.pbxproj
index 48bbaefc..196629f7 100644
--- a/build2/kramv.xcodeproj/project.pbxproj
+++ b/build2/kramv.xcodeproj/project.pbxproj
@@ -262,7 +262,7 @@
 					"$(PROJECT_DIR)/../libkram/kram",
 					"$(PROJECT_DIR)/../libkram/eastl/include",
 				);
-				MACOSX_DEPLOYMENT_TARGET = 11.1;
+				MACOSX_DEPLOYMENT_TARGET = 10.15;
 				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
 				MTL_FAST_MATH = YES;
 				ONLY_ACTIVE_ARCH = YES;
@@ -324,7 +324,7 @@
 					"$(PROJECT_DIR)/../libkram/kram",
 					"$(PROJECT_DIR)/../libkram/eastl/include",
 				);
-				MACOSX_DEPLOYMENT_TARGET = 11.1;
+				MACOSX_DEPLOYMENT_TARGET = 10.15;
 				MTL_ENABLE_DEBUG_INFO = NO;
 				MTL_FAST_MATH = YES;
 				OTHER_CFLAGS = (
diff --git a/libkram/astc-encoder/astcenc_entry.cpp b/libkram/astc-encoder/astcenc_entry.cpp
index 2cd4366c..32b339ec 100644
--- a/libkram/astc-encoder/astcenc_entry.cpp
+++ b/libkram/astc-encoder/astcenc_entry.cpp
@@ -27,6 +27,8 @@
 #include "astcenc_internal.h"
 #include "astcenc_diagnostic_trace.h"
 
+using namespace NAMESPACE_STL;
+
 // The ASTC codec is written with the assumption that a float threaded through
 // the "if32" union will in fact be stored and reloaded as a 32-bit IEEE-754 single-precision
 // float, stored with round-to-nearest rounding. This is always the case in an
@@ -109,7 +111,7 @@ struct astcenc_preset_config {
 /**
  * @brief The static quality presets that are built-in.
  */
-static const std::array<astcenc_preset_config, 5> preset_configs {{
+static const array<astcenc_preset_config, 5> preset_configs {{
 	{
 		ASTCENC_PRE_FASTEST,
 		4, 2, 30, 1, 1, 79, 57, 2.0f, 2.0f, 1.0f, 0.5f
diff --git a/libkram/astc-encoder/astcenc_integer_sequence.cpp b/libkram/astc-encoder/astcenc_integer_sequence.cpp
index d7079446..25ffc2c3 100644
--- a/libkram/astc-encoder/astcenc_integer_sequence.cpp
+++ b/libkram/astc-encoder/astcenc_integer_sequence.cpp
@@ -21,7 +21,8 @@
 
 #include "astcenc_internal.h"
 
-#include <array>
+using namespace NAMESPACE_STL;
+//#include <array>
 
 // unpacked quint triplets <low,middle,high> for each packed-quint value
 static const uint8_t quints_of_integer[128][3] = {
@@ -351,7 +352,7 @@ struct btq_count {
 /**
  * @brief The table of bits, trits, and quints needed for a quant encode.
  */
-static const std::array<btq_count, 21> btq_counts = {{
+static const array<btq_count, 21> btq_counts = {{
 	{   QUANT_2, 1, 0, 0 },
 	{   QUANT_3, 0, 1, 0 },
 	{   QUANT_4, 2, 0, 0 },
@@ -398,7 +399,7 @@ struct ise_size {
 /**
  * @brief The table of scale, round, and divisors needed for quant sizing.
  */
-static const std::array<ise_size, 21> ise_sizes = {{
+static const array<ise_size, 21> ise_sizes = {{
 	{   QUANT_2,  1, 0, 1 },
 	{   QUANT_3,  8, 4, 5 },
 	{   QUANT_4,  2, 0, 1 },
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index d2b3bdf0..699fb398 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -165,13 +165,18 @@
 
 #include <EASTL/deque.h>
 #include <EASTL/iterator.h>    // for copy_if on Win
-#include <EASTL/shared_ptr.h>  // includes thread/mutex
 #include <EASTL/sort.h>
 #include <EASTL/basic_string.h>
-#include <EASTL/unique_ptr.h>
+
+#include <EASTL/array.h>
+#include <EASTL/map.h>
 #include <EASTL/unordered_map.h>
 #include <EASTL/vector.h>
 
+#include <EASTL/shared_ptr.h>  // includes thread/mutex
+#include <EASTL/unique_ptr.h>
+#include <EASTL/initializer_list.h>
+
 #include <atomic>
 
 #define NAMESPACE_STL eastl
@@ -179,12 +184,18 @@
 #else
 
 #include <algorithm>  // for max
-#include <atomic>
-#include <deque>
 #include <functional>
+
+#include <deque>
 #include <iterator>  // for copy_if on Win
-#include <memory>    // for shared_ptr
 #include <string>
+
+#include <atomic>
+#include <memory>    // for shared_ptr
+#include <initializer_list>
+
+#include <array>
+#include <map>
 #include <unordered_map>
 #include <vector>
 
diff --git a/libkram/kram/TaskSystem.h b/libkram/kram/TaskSystem.h
index d740cec7..12fd72b5 100644
--- a/libkram/kram/TaskSystem.h
+++ b/libkram/kram/TaskSystem.h
@@ -83,6 +83,7 @@ class notification_queue {
             lock_t lock{_mutex};
             // TODO: fix this construct, it's saying no matching sctor for eastl::deque<eastl::function<void ()>>>::value_type
 #if USE_EASTL
+            KLOGE("TaskSystem", "Fix eastl deque or function");
             //_q.emplace_back(forward<F>(f));
 #else
             _q.emplace_back(forward<F>(f));

From 9cfcee69a53f1d8e0a49fc74b4faf220d7ff02f0 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 6 Sep 2021 17:47:30 -0700
Subject: [PATCH 175/901] Kram - build fixes, and decompression fixes

BC decoders are unorm only, so need snorm to unorm conversion for normals.
Fixed bcenc decode path for bc4/5snorm which needed remap from snorm to unorm endpoints first.
Use unorm only path on ATE decode.
Clear to 0001 (0,0,0,255) all the channels.  Since r/rg decodes don't store data to these channels
Fix cibuild.sh so it can be run locally.  Fix the binPath.   Use ./scripts/cibuild.sh to do what cmake typically did.

TODO: snorm to unorm needs applied to ate/squish decode paths too, or they output incorrect data.
---
 libkram/ate/ateencoder.mm   | 32 +++++++++++++++++++++---
 libkram/kram/KramImage.cpp  | 49 +++++++++++++++++++++++++++++++++----
 libkram/kram/KramMipper.cpp | 18 ++++++++++++++
 libkram/kram/KramMipper.h   |  3 +++
 libkram/squish/squish.cpp   | 17 ++++++++-----
 scripts/cibuild.sh          | 33 ++++++++++++++++---------
 6 files changed, 126 insertions(+), 26 deletions(-)

diff --git a/libkram/ate/ateencoder.mm b/libkram/ate/ateencoder.mm
index 180b6132..1836ebda 100644
--- a/libkram/ate/ateencoder.mm
+++ b/libkram/ate/ateencoder.mm
@@ -154,13 +154,16 @@ inline my_at_block_format_t pixelToDecoderFormat(MyMTLPixelFormat format, bool i
             // Note: remapping unorm inputs to snorm outside of call
             case MyMTLPixelFormatBC4_RUnorm:
                 encoderFormat = my_at_block_format_bc4; break;
+                
+            // Can't use bc4s/5s decoding, or must pass a float image to the ate decoder
+            // can correct this unorm to snorm after image is loaded?
             case MyMTLPixelFormatBC4_RSnorm:
-                encoderFormat = my_at_block_format_bc4s; break;
+                encoderFormat = my_at_block_format_bc4; break; // my_at_block_format_bc4s; break;
                 
             case MyMTLPixelFormatBC5_RGUnorm:
                 encoderFormat = my_at_block_format_bc5; break;
             case MyMTLPixelFormatBC5_RGSnorm:
-                encoderFormat = my_at_block_format_bc5s; break;
+                encoderFormat = my_at_block_format_bc5; break; // my_at_block_format_bc5s; break;
                 
             // hdr
             case MyMTLPixelFormatBC6H_RGBUfloat:
@@ -363,9 +366,12 @@ inline my_at_block_format_t pixelToDecoderFormat(MyMTLPixelFormat format, bool i
     uint32_t flags = at_flags_skip_parameter_checking
                | at_flags_disable_multithreading;
     
+    // Notes say the following.  So
+    // Signed or HDR block formats must be paired with a float texel format and cannot be paired with at_flags_srgb_linear_texels.
+    
 // this is a decode only flag that presumably does srgb -> linear conversion, but leave the texels as is
 //    if (isSrgb) {
-        flags |= at_flags_srgb_linear_texels;
+//        flags |= at_flags_srgb_linear_texels;
 //    }
     
     if (isVerbose) {
@@ -390,6 +396,17 @@ inline my_at_block_format_t pixelToDecoderFormat(MyMTLPixelFormat format, bool i
 
     // content may be premul, but don't want decoder multiplying
     at_alpha_t srcAlphaType = at_alpha_not_premultiplied;
+    // drop the alpha if needed
+    if (blockFormat == my_at_block_format_bc1 ||
+        blockFormat == my_at_block_format_bc4 ||
+        blockFormat == my_at_block_format_bc4s ||
+        blockFormat == my_at_block_format_bc5 ||
+        blockFormat == my_at_block_format_bc5s ||
+        blockFormat == my_at_block_format_bc6 ||
+        blockFormat == my_at_block_format_bc6u)
+    {
+        srcAlphaType = at_alpha_opaque;
+    }
     at_alpha_t dstAlphaType = srcAlphaType;
     
     at_encoder_t encoder = at_encoder_create(
@@ -418,6 +435,15 @@ inline my_at_block_format_t pixelToDecoderFormat(MyMTLPixelFormat format, bool i
                                    at_flags_t(flags)
                                  );
 
+    // decode is leaving a=60 for some bizarro reason, so correct that
+    if (srcAlphaType == at_alpha_opaque)
+    {
+        for (uint32_t i = 0, iEnd = w*h; i < iEnd; ++i)
+        {
+            dstData[4*i+3] = 255;
+        }
+    }
+    
     if (error != at_error_success) {
         KLOGE("ATEEncoder", "decode failed");
         return false;
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index 72936647..ea3f5be3 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -447,7 +447,8 @@ bool KramDecoder::decodeBlocks(
     bool isVerbose = params.isVerbose;
     const string& swizzleText = params.swizzleText;
     bool isHDR = isHdrFormat(blockFormat);
-
+    //bool isSigned = isSignedFormat(blockFormat);
+    
     // start decoding after format pulled from KTX file
     if (isBCFormat(blockFormat)) {
         // bc via ate, or squish for bc1-5 if on other platforms
@@ -471,9 +472,37 @@ bool KramDecoder::decodeBlocks(
                     int32_t bb0 = bby * blocks_x + bbx;
                     const uint8_t* srcBlock = &srcData[bb0 * blockSize];
 
+                    // Clear to 0001
+                    // TODO: could only do for bc4/5
+                    Color pixels[blockDim * blockDim] = {};
+                    for (uint32_t i = 0, iEnd = blockDim*blockDim; i < iEnd; ++i)
+                    {
+                        pixels[i].a = 255;
+                    }
+                    
+                    // TODO: need this for bc4/5/6sn on other decoders (ate + squish)
+                    // but have to run through all blocks before passing.  Here doing one block
+                    // at a time.  EAC_R11/RG11sn already do this conversion on decode.
+                    
+                    // Switch from unorm to snorm if needed
+                    uint16_t* e0;
+                    uint16_t* e1;
+
+                    e0 = (uint16_t*)&srcBlock[0];
+                    
+                    if (blockFormat == MyMTLPixelFormatBC4_RSnorm) {
+                        // 2 8-bit endpoints
+                        remapFromSignedBCEndpoint88(*e0);
+                    }
+                    else if (blockFormat == MyMTLPixelFormatBC5_RGSnorm) {
+                        // 4 8-bit endpoints
+                        remapFromSignedBCEndpoint88(*e0);
+                        
+                        e1 = (uint16_t*)&srcBlock[4*2];
+                        remapFromSignedBCEndpoint88(*e1);
+                    }
+                    
                     // decode into temp 4x4 pixels
-                    Color pixels[blockDim * blockDim];
-
                     success = true;
 
                     switch (blockFormat) {
@@ -484,13 +513,17 @@ bool KramDecoder::decodeBlocks(
                             break;
                         case MyMTLPixelFormatBC3_RGBA_sRGB:
                         case MyMTLPixelFormatBC3_RGBA:
-                            // Returns true if the block uses 3 color punchthrough alpha mode.
+                            // Returns false if the block uses 3 color punchthrough alpha mode.
                             rgbcx::unpack_bc3(srcBlock, pixels);
                             break;
+                            
+                        // writes r packed
                         case MyMTLPixelFormatBC4_RSnorm:
                         case MyMTLPixelFormatBC4_RUnorm:
                             rgbcx::unpack_bc4(srcBlock, (uint8_t*)pixels);
                             break;
+                            
+                        // writes rg packed
                         case MyMTLPixelFormatBC5_RGSnorm:
                         case MyMTLPixelFormatBC5_RGUnorm:
                             rgbcx::unpack_bc5(srcBlock, pixels);
@@ -524,7 +557,8 @@ bool KramDecoder::decodeBlocks(
                                 break;  // go to next y above
                             }
 
-                            dstPixels[yy * w + xx] = pixels[by * blockDim + bx];
+                            const Color& c = pixels[by * blockDim + bx];
+                            dstPixels[yy * w + xx] = c;
                         }
                     }
                 }
@@ -562,7 +596,9 @@ bool KramDecoder::decodeBlocks(
 
             if (success) {
                 // only handles bc1,3,4,5
+                // TODO: colors still don't look correct on rs, rgs.  Above it always requests unorm.
                 squish::DecompressImage(outputTexture.data(), w, h, srcData, format);
+                
                 success = true;
             }
         }
@@ -570,6 +606,9 @@ bool KramDecoder::decodeBlocks(
 #if COMPILE_ATE
         else if (useATE) {
             ATEEncoder encoder;
+            
+            // TODO: colors still don't look correct on rs, rgs
+            // docs mention needing to pass float pixels for snorm, but always using unorm decode format now
             success = encoder.Decode(blockFormat, blockDataSize, blockDims.y,
                                      isVerbose,
                                      w, h, srcData, outputTexture.data());
diff --git a/libkram/kram/KramMipper.cpp b/libkram/kram/KramMipper.cpp
index 5ccb2423..d978146a 100644
--- a/libkram/kram/KramMipper.cpp
+++ b/libkram/kram/KramMipper.cpp
@@ -316,6 +316,13 @@ inline int8_t signedConvertUint8(uint8_t x)
     return (int8_t)xx;
 }
 
+inline uint8_t unsignedConvertInt8(int8_t x)
+{
+    // split into +/- values
+    int32_t xx = (int32_t)x + 128;
+    return (uint8_t)xx;
+}
+
 void remapToSignedBCEndpoint88(uint16_t& endpoint)
 {
     uint8_t e0val = endpoint & 0xFF;
@@ -327,6 +334,17 @@ void remapToSignedBCEndpoint88(uint16_t& endpoint)
     endpoint = (*(const uint8_t*)&e0) | ((*(const uint8_t*)&e1) << 8);
 }
 
+void remapFromSignedBCEndpoint88(uint16_t& endpoint)
+{
+    uint8_t e0val = endpoint & 0xFF;
+    uint8_t e1val = (endpoint >> 8) & 0xFF;
+
+    int8_t e0 = unsignedConvertInt8(e0val);
+    int8_t e1 = unsignedConvertInt8(e1val);
+
+    endpoint = (*(const uint8_t*)&e0) | ((*(const uint8_t*)&e1) << 8);
+}
+
 void Mipper::mipmap(const ImageData& srcImage, ImageData& dstImage) const
 {
     dstImage.width = srcImage.width;
diff --git a/libkram/kram/KramMipper.h b/libkram/kram/KramMipper.h
index f3ebf43d..04e1e6a4 100644
--- a/libkram/kram/KramMipper.h
+++ b/libkram/kram/KramMipper.h
@@ -39,6 +39,9 @@ inline float4 ColorToSnormFloat4(const Color &value)
 // for signed bc4/5, remap the endpoints after unorm fit
 void remapToSignedBCEndpoint88(uint16_t &endpoint);
 
+// for decoding bc4/5 snorm, convert block to unsigned endpoints before decode
+void remapFromSignedBCEndpoint88(uint16_t& endpoint);
+
 float4 linearToSRGB(float4 lin);
 
 // return srgb from a linear intesnity
diff --git a/libkram/squish/squish.cpp b/libkram/squish/squish.cpp
index c61d67b8..5922ead2 100644
--- a/libkram/squish/squish.cpp
+++ b/libkram/squish/squish.cpp
@@ -190,21 +190,21 @@ void CompressImage( u8 const* rgba, int width, int height, void* blocks, int for
 void Decompress( u8* rgba, void const* block, int format )
 {
     // get the block locations
-    void const * alphaBock = reinterpret_cast< u8 const* >( block ) + 8;
+    void const * alphaBlock = reinterpret_cast< u8 const* >( block ) + 8;
  
     // decompress colour
     switch( format )
     {
         case kBC1:
-            DecompressColour( rgba, block, true);
+            DecompressColour( rgba, block, true); // a=1
             break;
         case kBC2:
             DecompressColour( rgba, block, false);
-            DecompressAlphaBC2( rgba, alphaBock ); // put in a
+            DecompressAlphaBC2( rgba, alphaBlock ); // put in a
             break;
         case kBC3:
             DecompressColour( rgba, block, false);
-            DecompressAlphaBC3( rgba, alphaBock, 3 ); // put in a
+            DecompressAlphaBC3( rgba, alphaBlock, 3 ); // put in a
             break;
  
         case kBC4:
@@ -212,7 +212,7 @@ void Decompress( u8* rgba, void const* block, int format )
             break;
         case kBC5:
             DecompressAlphaBC3( rgba, block, 0 ); // put in r
-            DecompressAlphaBC3( rgba, alphaBock, 1 ); // put in g
+            DecompressAlphaBC3( rgba, alphaBlock, 1 ); // put in g
             break;
     }
 }
@@ -230,7 +230,12 @@ void DecompressImage( u8* rgba, int width, int height, void const* blocks, int f
 		for( int x = 0; x < width; x += 4 )
 		{
 			// decompress the block
-			u8 targetRgba[4*16];
+            
+            // Clear to 0001 (for bc4/5)
+            u8 targetRgba[4*16] = {};
+            for (uint32_t i = 0; i < 16; ++i)
+                targetRgba[4*i+3] = 255;
+            
 			Decompress( targetRgba, sourceBlock, format );
 			
 			// write the decompressed pixels to the correct image locations
diff --git a/scripts/cibuild.sh b/scripts/cibuild.sh
index 8c0cd4b4..4cf21172 100755
--- a/scripts/cibuild.sh
+++ b/scripts/cibuild.sh
@@ -19,12 +19,12 @@ buildType=$1
 # translate Github Actions to os typpe
 if [[ $buildType =~ "macos" ]]; then
 	buildType="macos"
-fi
-if [[ $buildType =~ "windows" ]]; then
+elif [[ $buildType =~ "windows" ]]; then
 	buildType="windows"
-fi
-if [[ $buildType =~ "ubuntu" ]]; then
+elif [[ $buildType =~ "ubuntu" ]]; then
 	buildType="linux"
+else
+	buildType="macos"
 fi
 
 # exit on failure of any command
@@ -41,28 +41,26 @@ mkdir -p bin
 # https://stackoverflow.com/questions/3572030/bash-script-absolute-path-with-os-x
 if [[ $buildType == macos ]]; then
 	binPath=$( cd "$(dirname "bin")" ; pwd -P )
+
+	# not sure why above doesn't include the folder name
+	binPath="${binPath}/bin"
 else
 	binPath=$(realpath bin)
 fi
 
-mkdir -p build
-
-pushd build
-
 # can't just use cmake .. on osx, cmake gets confused about metal files since language not recognized
 # but Xcode knows how to build these.  I don't want to setup special rule for metal files right now.
 if [[ $buildType == macos ]]; then
 
 	# not using CMake anymore on mac/iOS.  Using custom workspace and projects.
 	#cmake .. -G Xcode
-
 	# build the release build
 	#cmake --build . --config Release
-
 	# copy it to bin directory
 	#cmake --install . --config Release
 
-	pushd ../build2
+	# this dir already exists, so don't have to mkdir
+	pushd build2
 
 	xcodebuild build -sdk iphoneos -workspace kram.xcworkspace -scheme kram-ios CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
 	xcodebuild build -sdk macosx -workspace kram.xcworkspace -scheme kram CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
@@ -73,6 +71,10 @@ if [[ $buildType == macos ]]; then
 	popd
 
 elif [[ $buildType == windows ]]; then
+	mkdir -p build
+
+	pushd build
+
 	#cmake .. -G "Visual Studio 15 2017 Win64"
 	cmake .. -G "Visual Studio 16 2019" -A x64
 
@@ -82,7 +84,13 @@ elif [[ $buildType == windows ]]; then
 	# copy it to bin directory
 	cmake --install . --config Release
 
+	popd
+
 elif [[ $buildType == linux ]]; then
+	mkdir -p build
+
+	pushd build
+
 	cmake .. 
 
 	# build the release build
@@ -90,9 +98,10 @@ elif [[ $buildType == linux ]]; then
 
 	# copy it to bin directory
 	cmake --install . --config Release
+
+	popd
 fi
 
-popd
 
 #---------------------------------
 

From 25d528b7660084e642afebba54d51ea34f23b38f Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 15 Sep 2021 09:07:40 -0700
Subject: [PATCH 176/901] kram - mutex on gFormatTable, better decode handling,
 new thumbnail call

This does a simple clamp on HDR formats to get to unorm8.
---
 libkram/kram/KTXImage.cpp  |  22 +++-
 libkram/kram/Kram.cpp      |  34 ++++--
 libkram/kram/KramImage.cpp | 240 ++++++++++++++++++++++++++++++++-----
 libkram/kram/KramImage.h   |  19 ++-
 libkram/kram/KramMipper.h  |   7 ++
 5 files changed, 269 insertions(+), 53 deletions(-)

diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index 0e04a673..1abb8455 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -415,19 +415,31 @@ class KTXFormatInfo {
 
 //using tFormatTable = unordered_map<MyMTLPixelFormat, KTXFormatInfo>;
 
-static unordered_map<uint32_t /*MyMTLPixelFormat*/, KTXFormatInfo>* gFormatTable = nullptr;
+using mymap = unordered_map<uint32_t /*MyMTLPixelFormat*/, KTXFormatInfo>;
+static mymap* gFormatTable = nullptr;
+
+// thumbnailing can hit this from multiple threads
+using mymutex = std::mutex;
+using lock_t = std::unique_lock<mymutex>;
+static mymutex gFormatTableMutex;
 
 static bool initFormatsIfNeeded()
 {
     if (gFormatTable) {
         return true;
     }
-
-    gFormatTable = new unordered_map<uint32_t /*MyMTLPixelFormat*/, KTXFormatInfo>();
+    
+    lock_t lock(gFormatTableMutex);
+    
+    if (gFormatTable) {
+        return true;
+    }
+    
+    mymap* formatTable = new unordered_map<uint32_t /*MyMTLPixelFormat*/, KTXFormatInfo>();
 
 // the following table could be included multiple ties to build switch statements, but instead use a hashmap
 #define KTX_FORMAT(fmt, metalType, vulkanType, glType, glBase, x, y, blockSize, numChannels, flags) \
-    (*gFormatTable)[(uint32_t)metalType] = KTXFormatInfo(                                           \
+    (*formatTable)[(uint32_t)metalType] = KTXFormatInfo(                                           \
         #fmt, #metalType, #vulkanType, #glType,                                                     \
         metalType, vulkanType, glType, glBase,                                                      \
         x, y, blockSize, numChannels, (flags));
@@ -505,6 +517,8 @@ static bool initFormatsIfNeeded()
     KTX_FORMAT(EXPrgb32f, MyMTLPixelFormatRGB32Float_internal, VK_FORMAT_R32G32B32_SFLOAT, GL_RGB32F, GL_RGB, 1, 1, 12, 3, FLAG_32F)
 #endif
 
+    gFormatTable = formatTable;
+    
     return true;
 }
 
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 71442a9f..e36122c8 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -380,19 +380,24 @@ bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray
     }
 
     // this inserts onto end of array, it doesn't resize
-    vector<uint8_t> pixels;
-    pixels.clear();
-    errorLode = lodepng::decode(pixels, width, height, data, dataSize, LCT_RGBA, 8);
+    vector<uint8_t> pixelsPNG;
+    pixelsPNG.clear();
+    errorLode = lodepng::decode(pixelsPNG, width, height, data, dataSize, LCT_RGBA, 8);
     if (errorLode != 0) {
         return false;
     }
 
+    // Note: could probably do a cast of vector<uint8_t> to vector<Color>, but do a copy here instead
+    vector<Color> pixels;
+    pixels.resize(width * height);
+    memcpy(pixels.data(), pixelsPNG.data(), vsizeof(pixelsPNG));
+
     // convert to grasycale on load
     // better if could do this later in pipeline to stay in linear fp16 color
     if (hasColor && isGray) {
         Mipper mipper;
 
-        Color* colors = (Color*)pixels.data();
+        Color* colors = pixels.data();
         for (int32_t i = 0, iEnd = width * height; i < iEnd; ++i) {
             colors[i] = toGrayscaleRec709(colors[i], mipper);
         }
@@ -406,7 +411,7 @@ bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray
     // on premul PNG data on load, and colors look much darker.
 
     if (hasAlpha && isPremulRgb) {
-        Color* colors = (Color*)pixels.data();
+        Color* colors = pixels.data();
         for (int32_t i = 0, iEnd = width * height; i < iEnd; ++i) {
             colors[i] = toPremul(colors[i]);
         }
@@ -1904,10 +1909,10 @@ static int32_t kramAppDecode(vector<const char*>& args)
     }
 
     bool isDstKTX = endsWith(dstFilename, ".ktx");
-    bool isDstKTX2 = endsWith(dstFilename, ".ktx2");
+    //bool isDstKTX2 = endsWith(dstFilename, ".ktx2");
 
-    if (!(isDstKTX || isDstKTX2)) {
-        KLOGE("Kram", "decode only supports ktx and ktx2 output");
+    if (!(isDstKTX)) {  //} || isDstKTX2)) {
+        KLOGE("Kram", "decode only supports ktx output");
         error = true;
     }
 
@@ -1921,6 +1926,8 @@ static int32_t kramAppDecode(vector<const char*>& args)
     FileHelper tmpFileHelper;
 
     bool success = SetupSourceKTX(srcImageData, srcFilename, srcImage);
+    if (!success)
+        return -1;
 
     // TODO: for hdr decode, may need to walk blocks or ask caller to pass -hdr flag
     if (!validateFormatAndDecoder(srcImage.textureType, srcImage.pixelFormat, textureDecoder)) {
@@ -1928,9 +1935,11 @@ static int32_t kramAppDecode(vector<const char*>& args)
         return -1;
     }
 
-    success = success && SetupTmpFile(tmpFileHelper, isDstKTX2 ? ".ktx2" : ".ktx");
+    success = SetupTmpFile(tmpFileHelper, /* isDstKTX2 ? ".ktx2" : */ ".ktx");
+    if (!success)
+        return -1;
 
-    if (success && isVerbose) {
+    if (isVerbose) {
         KLOGI("Kram", "Decoding %s to %s with %s\n",
               textureTypeName(srcImage.textureType),
               metalTypeName(srcImage.pixelFormat),
@@ -1943,11 +1952,12 @@ static int32_t kramAppDecode(vector<const char*>& args)
     params.swizzleText = swizzleText;
 
     KramDecoder decoder;  // just to call decode
-    success = success && decoder.decode(srcImage, tmpFileHelper.pointer(), params);
+    success = decoder.decode(srcImage, tmpFileHelper.pointer(), params);
 
     // rename to dest filepath, note this only occurs if above succeeded
     // so any existing files are left alone on failure.
-    success = success && tmpFileHelper.copyTemporaryFileTo(dstFilename.c_str());
+    if (success)
+        success = tmpFileHelper.copyTemporaryFileTo(dstFilename.c_str());
 
     return success ? 0 : -1;
 }
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index ea3f5be3..8da50df6 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -113,15 +113,10 @@ Image::Image() : _width(0), _height(0), _hasColor(false), _hasAlpha(false)
 // TODO: image here is very specifically a single level of chunks of float4 or Color (RGBA8Unorm)
 // the encoder is only written to deal with those types.
 
-bool Image::loadImageFromKTX(const KTXImage& image)
+bool Image::loadImageFromKTX(const KTXImage& image, uint32_t mipNumber)
 {
-    // copy the data into a contiguous array
-    // a verticaly chunke image, will be converted to chunks in encode
-    _width = image.width;
-    _height = image.height * image.totalChunks();
-
     if (image.header.numberOfMipmapLevels > 1) {
-        KLOGW("Image", "Skipping custom mip levels from KTX load, but will build them from top level");
+        KLOGW("Image", "Decode single mip level from KTX/2, but can rebuild them from requested mip level %d", mipNumber);
     }
 
     _hasColor = isColorFormat(image.pixelFormat);
@@ -131,14 +126,23 @@ bool Image::loadImageFromKTX(const KTXImage& image)
     setChunksY(image.totalChunks());
 
     // TODO: this assumes 1,2,3 channel srcData has no rowPadding to say 4 bytes
-    return convertToFourChannel(image);
+    return convertToFourChannel(image, mipNumber);
 }
 
-bool Image::convertToFourChannel(const KTXImage& image)
+bool Image::convertToFourChannel(const KTXImage& image, uint32_t mipNumber)
 {
-    const uint32_t mipNumber = 0;
+    if (mipNumber >= image.header.numberOfMipmapLevels)
+        return false;
+    
     const auto& srcMipLevel = image.mipLevels[mipNumber];
 
+    // copy the data into a contiguous array
+    // a verticaly chunked image, will be converted to chunks in encode
+    uint32_t width, height, depth;
+    image.mipDimensions(mipNumber, width, height, depth);
+    _width = width;
+    _height = height * chunksY();
+
     // this is offset to a given level
     uint64_t mipBaseOffset = srcMipLevel.offset;
     const uint8_t* srcLevelData = image.fileData;
@@ -164,13 +168,13 @@ bool Image::convertToFourChannel(const KTXImage& image)
 #endif
         case MyMTLPixelFormatRGBA8Unorm_sRGB:
         case MyMTLPixelFormatRGBA8Unorm: {
-            const uint8_t* srcPixels = srcLevelData;
+            const uint8_t* srcPixels = srcLevelData + mipBaseOffset;
 
             int32_t numSrcChannels = numChannelsOfFormat(image.pixelFormat);
 
-            _pixels.resize(4 * _width * _height);
+            _pixels.resize(_width * _height);
 
-            Color* dstPixels = (Color*)_pixels.data();
+            Color* dstPixels = _pixels.data();
 
             Color dstTemp = {0, 0, 0, 255};
 
@@ -204,7 +208,7 @@ bool Image::convertToFourChannel(const KTXImage& image)
             // treat as float for per channel copies
             float4* dstPixels = _pixelsFloat.data();
 
-            const half* srcPixels = (const half*)srcLevelData;
+            const half* srcPixels = (const half*)(srcLevelData + mipBaseOffset);
 
             half4 dstTemp = toHalf4(float4m(0.0f, 0.0f, 0.0f, 1.0f));
 
@@ -233,7 +237,7 @@ bool Image::convertToFourChannel(const KTXImage& image)
         case MyMTLPixelFormatRGB32Float_internal:
 #endif
         case MyMTLPixelFormatRGBA32Float: {
-            const float* srcPixels = (const float*)srcLevelData;
+            const float* srcPixels = (const float*)(srcLevelData + mipBaseOffset);
 
             int32_t numSrcChannels = numChannelsOfFormat(image.pixelFormat);
 
@@ -268,7 +272,171 @@ bool Image::convertToFourChannel(const KTXImage& image)
     return true;
 }
 
-bool Image::loadImageFromPixels(const vector<uint8_t>& pixels, int32_t width,
+bool Image::loadThumbnailFromKTX(const KTXImage& image, uint32_t mipNumber)
+{
+    if (image.header.numberOfMipmapLevels > 1) {
+        KLOGW("Image", "Decode single mip level from KTX/2, but can rebuild them from requested mip level %d", mipNumber);
+    }
+
+    _hasColor = isColorFormat(image.pixelFormat);
+    _hasAlpha = isAlphaFormat(image.pixelFormat);
+
+    // preserve chunk count from the conversion
+    setChunksY(1); // image.totalChunks());
+
+    // TODO: this assumes 1,2,3 channel srcData has no rowPadding to 4 bytes
+    return convertToFourChannelForThumbnail(image, mipNumber);
+}
+
+// converts to RGBA8Unorm (or srgb)
+bool Image::convertToFourChannelForThumbnail(const KTXImage& image, uint32_t mipNumber)
+{
+    if (mipNumber >= image.header.numberOfMipmapLevels)
+        return false;
+    
+    const auto& srcMipLevel = image.mipLevels[mipNumber];
+    
+    uint32_t chunkCount = chunksY();
+    
+    // copy the data into a contiguous array
+    // a verticaly chunked image, will be converted to chunks in encode
+    uint32_t width, height, depth;
+    image.mipDimensions(mipNumber, width, height, depth);
+    _width = width;
+    _height = height * chunkCount;
+
+    // this is offset to a given level
+    uint64_t mipBaseOffset = srcMipLevel.offset;
+    const uint8_t* srcLevelData = image.fileData;
+
+    vector<uint8_t> mipStorage;
+    if (image.isSupercompressed()) {
+        mipStorage.resize(image.levelLength(mipNumber));
+        if (!image.unpackLevel(mipNumber, srcLevelData + srcMipLevel.offset, mipStorage.data())) {
+            return false;
+        }
+        srcLevelData = mipStorage.data();
+
+        // going to upload from mipStorage temp array
+        mipBaseOffset = 0;
+    }
+
+    switch (image.pixelFormat) {
+        case MyMTLPixelFormatR8Unorm:
+        case MyMTLPixelFormatRG8Unorm:
+#if SUPPORT_RGB
+        case MyMTLPixelFormatRGB8Unorm_sRGB_internal:
+        case MyMTLPixelFormatRGB8Unorm_internal:
+#endif
+        case MyMTLPixelFormatRGBA8Unorm_sRGB:
+        case MyMTLPixelFormatRGBA8Unorm: {
+            const uint8_t* srcPixels = srcLevelData + mipBaseOffset;
+
+            int32_t numSrcChannels = numChannelsOfFormat(image.pixelFormat);
+
+            _pixels.resize(_width * _height);
+
+            Color* dstPixels = _pixels.data();
+
+            Color dstTemp = {0, 0, 0, 255};
+
+            for (int32_t y = 0; y < _height; ++y) {
+                int32_t y0 = y * _width;
+
+                for (int32_t x = 0; x < _width; ++x) {
+                    int32_t srcX = (y0 + x) * numSrcChannels;
+                    int32_t dstX = (y0 + x);  // * numDstChannels;
+
+                    for (int32_t i = 0; i < numSrcChannels; ++i) {
+                        *(&dstTemp.r + i) = srcPixels[srcX + i];
+                    }
+
+                    dstPixels[dstX] = dstTemp;
+                }
+            }
+            break;
+        }
+
+        case MyMTLPixelFormatR16Float:
+        case MyMTLPixelFormatRG16Float:
+#if SUPPORT_RGB
+        case MyMTLPixelFormatRGB16Float_internal:
+#endif
+        case MyMTLPixelFormatRGBA16Float: {
+            int32_t numSrcChannels = numChannelsOfFormat(image.pixelFormat);
+
+            _pixels.resize(_width * _height);
+
+            // treat as float for per channel copies
+            Color* dstPixels = _pixels.data();
+
+            const half* srcPixels = (const half*)(srcLevelData + mipBaseOffset);
+
+            half4 dstTemp = toHalf4(float4m(0.0f, 0.0f, 0.0f, 1.0f));
+
+            for (int32_t y = 0; y < _height; ++y) {
+                int32_t y0 = y * _width;
+
+                for (int32_t x = 0; x < _width; ++x) {
+                    int32_t srcX = (y0 + x) * numSrcChannels;
+                    int32_t dstX = (y0 + x);
+
+                    // copy in available values
+                    for (int32_t i = 0; i < numSrcChannels; ++i) {
+                        dstTemp.v[i] = srcPixels[srcX + i];
+                    }
+
+                    // use AVX to convert
+                    // This is a simple saturate to unorm8
+                    dstPixels[dstX] = ColorFromUnormFloat4(toFloat4(dstTemp));
+                }
+            }
+            break;
+        }
+
+        case MyMTLPixelFormatR32Float:
+        case MyMTLPixelFormatRG32Float:
+#if SUPPORT_RGB
+        case MyMTLPixelFormatRGB32Float_internal:
+#endif
+        case MyMTLPixelFormatRGBA32Float: {
+            const float* srcPixels = (const float*)(srcLevelData + mipBaseOffset);
+
+            int32_t numSrcChannels = numChannelsOfFormat(image.pixelFormat);
+
+            _pixels.resize(_width * _height);
+
+            // treat as float for per channel copies
+            Color* dstPixels = _pixels.data();
+            float4 dstTemp = float4m(0.0f, 0.0f, 0.0f, 1.0f);
+
+            for (int32_t y = 0; y < _height; ++y) {
+                int32_t y0 = y * _width;
+
+                for (int32_t x = 0; x < _width; ++x) {
+                    int32_t srcX = (y0 + x) * numSrcChannels;
+                    int32_t dstX = (y0 + x);
+
+                    for (int32_t i = 0; i < numSrcChannels; ++i) {
+                        dstTemp[i] = srcPixels[srcX + i];
+                    }
+
+                    // This is a simple saturate to unorm8
+                    dstPixels[dstX] = ColorFromUnormFloat4(dstTemp);
+                }
+            }
+
+            break;
+        }
+        default:
+            KLOGE("Image", "Unsupported KTX format\n");
+            return false;
+    }
+
+    return true;
+}
+
+bool Image::loadImageFromPixels(const vector<Color>& pixels, int32_t width,
                                 int32_t height, bool hasColor, bool hasAlpha)
 {
     // copy the data into a contiguous array
@@ -282,7 +450,7 @@ bool Image::loadImageFromPixels(const vector<uint8_t>& pixels, int32_t width,
 
     // always assumes 4 rgba8 channels
     // _pixels.resize(4 * _width * _height);
-    assert((int32_t)pixels.size() == (width * height * 4));
+    assert((int32_t)pixels.size() == (width * height));
     _pixels = pixels;
 
     return true;
@@ -408,8 +576,9 @@ bool KramDecoder::decodeBlocks(
     // could tie use flags to format filter, or encoder settings
     // or may want to disable if decoders don't gen correct output
     TexEncoder decoder = params.decoder;
-
-    if (!validateFormatAndDecoder(MyMTLTextureType2D, blockFormat, decoder)) {
+    MyMTLTextureType textureType = MyMTLTextureType2D; // Note: this is a lie to get decode to occur
+    
+    if (!validateFormatAndDecoder(textureType, blockFormat, decoder)) {
         KLOGE("Kram", "block decode only supports specific block types");
         return false;
     }
@@ -429,7 +598,6 @@ bool KramDecoder::decodeBlocks(
     bool useAstcenc = decoder == kTexEncoderAstcenc;
 #endif
 
-    // TODO: hook to block decode logic below
     // copy srcData if using ATE, it says it needs 16-byte aligned data for encode
     // and assume for decode too.  Output texture is already 16-byte aligned.
     const uint8_t* srcData = blockData;
@@ -447,10 +615,13 @@ bool KramDecoder::decodeBlocks(
     bool isVerbose = params.isVerbose;
     const string& swizzleText = params.swizzleText;
     bool isHDR = isHdrFormat(blockFormat);
-    //bool isSigned = isSignedFormat(blockFormat);
     
     // start decoding after format pulled from KTX file
-    if (isBCFormat(blockFormat)) {
+    if (isExplicitFormat(blockFormat)) { 
+        // Could convert r/rg/rgb/rgba8 and 16f/32f to rgba8u image for png 8-bit output
+        // for now just copying these to ktx format which supports these formats
+    }
+    else if (isBCFormat(blockFormat)) {
         // bc via ate, or squish for bc1-5 if on other platforms
         // bcenc also likely has decode for bc7
         if (false) {
@@ -771,10 +942,14 @@ bool KramDecoder::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage&
     // changing format, so update props
     auto dstPixelFormat = isSrgb ? MyMTLPixelFormatRGBA8Unorm_sRGB : MyMTLPixelFormatRGBA8Unorm;
 
-    // DONE: Support ASTC and BC7 HDR decode to RGBA16F here
-    if (isHdrFormat(srcImage.pixelFormat)) {
+    // DONE: Support ASTC and BC6 HDR decode to RGBA16F here
+    // also handle explicit formats by converting to 4 channels
+    if (isHalfFormat(srcImage.pixelFormat)) {
         dstPixelFormat = MyMTLPixelFormatRGBA16Float;
     }
+    else if (isFloatFormat(srcImage.pixelFormat)) {
+        dstPixelFormat = MyMTLPixelFormatRGBA32Float;
+    }
 
     dstHeader.initFormatGL(dstPixelFormat);
     dstImage.pixelFormat = dstPixelFormat;
@@ -784,6 +959,8 @@ bool KramDecoder::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage&
     dstImage.toPropsData(propsData);
     dstHeader.bytesOfKeyValueData = (uint32_t)vsizeof(propsData);
 
+    // Note: this always decodes to KTX
+    // TODO: also support decode to KTX2?
     size_t mipOffset = sizeof(KTXHeader) + dstHeader.bytesOfKeyValueData;
     dstImage.initMipLevels(mipOffset);
 
@@ -853,12 +1030,15 @@ bool KramDecoder::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage&
             const uint8_t* srcData = srcLevelData + mipBaseOffset + chunk * srcMipLevel.length;
 
             // decode the blocks to LDR RGBA8
-            if (!decodeBlocks(w, h, srcData, srcMipLevel.length, srcImage.pixelFormat, outputTexture, params)) {
+            if (isExplicitFormat(srcImage.pixelFormat)) {
+                // just copy the data as is
+                memcpy(outputTexture.data(), srcData, srcMipLevel.length);
+            }
+            else if (!decodeBlocks(w, h, srcData, srcMipLevel.length, srcImage.pixelFormat, outputTexture, params)) {
                 return false;
             }
 
             // write the mips out to the file, and code above can then decode into the same buffer
-            // This isn't correct for cubes, arrays, and other types.  The mip length is only written out once for all mips.
 
             if (chunk == 0 && !dstImage.skipImageLength) {
                 // sie of one mip
@@ -874,8 +1054,6 @@ bool KramDecoder::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage&
                 }
             }
 
-            // only writing one mip at a time in the level here
-            // so written bytes are only length and not numChunks * length
             int32_t dstMipOffset = dstMipLevel.offset + chunk * dstMipLevel.length;
 
             if (!writeDataAtOffset(outputTexture.data(), dstMipLevel.length, dstMipOffset, dstFile, dstImage)) {
@@ -907,10 +1085,10 @@ bool Image::resizeImage(int32_t wResize, int32_t hResize, bool resizePow2, Image
     }
 
     if (!_pixels.empty()) {
-        vector<uint8_t> pixelsResize;
-        pixelsResize.resize(wResize * hResize * sizeof(Color));
+        vector<Color> pixelsResize;
+        pixelsResize.resize(wResize * hResize);
 
-        pointFilterImage(_width, _height, (const Color*)_pixels.data(), wResize, hResize, (Color*)pixelsResize.data());
+        pointFilterImage(_width, _height, _pixels.data(), wResize, hResize, pixelsResize.data());
 
         _pixels = pixelsResize;
     }
diff --git a/libkram/kram/KramImage.h b/libkram/kram/KramImage.h
index c8a19fee..3a8e4a92 100644
--- a/libkram/kram/KramImage.h
+++ b/libkram/kram/KramImage.h
@@ -39,11 +39,14 @@ class Image {
     Image();
 
     // these 3 calls for Encode
-    bool loadImageFromPixels(const vector<uint8_t>& pixels, int32_t width,
+    bool loadImageFromPixels(const vector<Color>& pixels, int32_t width,
                              int32_t height, bool hasColor, bool hasAlpha);
 
-    // convert top level to single-image
-    bool loadImageFromKTX(const KTXImage& image);
+    // convert mip level of explicit format to single-image
+    bool loadImageFromKTX(const KTXImage& image, uint32_t mipNumber = 0);
+
+    // convert mip level of explicit format to single-image thumbnail
+    bool loadThumbnailFromKTX(const KTXImage& image, uint32_t mipNumber);
 
     // this is only for 2d images
     bool resizeImage(int32_t wResize, int32_t hResize, bool resizePow2, ImageResizeFilter filter = kImageResizeFilterPoint);
@@ -52,7 +55,7 @@ class Image {
     int32_t width() const { return _width; }
     int32_t height() const { return _height; }
 
-    const vector<uint8_t>& pixels() const { return _pixels; }
+    const vector<Color>& pixels() const { return _pixels; }
     const vector<float4>& pixelsFloat() const { return _pixelsFloat; }
 
     bool hasColor() const { return _hasColor; }
@@ -63,7 +66,11 @@ class Image {
     void setChunksY(uint32_t chunksY) { _chunksY = chunksY; }
 
 private:
-    bool convertToFourChannel(const KTXImage& image);
+    // convert r/rg/rgb to rgba, 16f -> 32f
+    bool convertToFourChannel(const KTXImage& image, uint32_t mipNumber);
+
+    // converts all to rgba8unorm
+    bool convertToFourChannelForThumbnail(const KTXImage& image, uint32_t mipNumber);
 
 private:
     // pixel size of image
@@ -77,7 +84,7 @@ class Image {
 
     // this is the entire strip data, float version can be passed for HDR
     // sources always 4 channels RGBA for 8 and 32f data.  16f promoted to 32f.
-    vector<uint8_t> _pixels;  // TODO: change to Color?
+    vector<Color> _pixels;
     //vector<half4> _pixelsHalf; // TODO: add support to import fp16
     vector<float4> _pixelsFloat;
 
diff --git a/libkram/kram/KramMipper.h b/libkram/kram/KramMipper.h
index 04e1e6a4..1153ef0a 100644
--- a/libkram/kram/KramMipper.h
+++ b/libkram/kram/KramMipper.h
@@ -36,6 +36,13 @@ inline float4 ColorToSnormFloat4(const Color &value)
     return (c - float4(128.0f)) / 255.0f;
 }
 
+inline Color ColorFromUnormFloat4(const float4 &value)
+{
+    float4 c = round(saturate(value) * 255.0f);
+    Color color = { (uint8_t)c.x, (uint8_t)c.y, (uint8_t)c.z, (uint8_t)c.w };
+    return color;
+}
+
 // for signed bc4/5, remap the endpoints after unorm fit
 void remapToSignedBCEndpoint88(uint16_t &endpoint);
 

From 465546eb4f6996773cec8cbb1d42cdfedd96bc60 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 15 Sep 2021 09:55:09 -0700
Subject: [PATCH 177/901] kram - add isPremul() call, fix mutex include

---
 kramv/KramRenderer.mm     |  7 ++-----
 libkram/kram/KTXImage.cpp | 10 ++++++++++
 libkram/kram/KTXImage.h   |  3 +++
 3 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 1c010548..7d186c93 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -971,7 +971,7 @@ - (void)updateImageSettings:(const string &)fullFilename
 
     // note that decoded textures are 3/4 channel even though they are normal/sdf
     // originally, so test those first
-    if (numChannels == 2 || endsWith(filenameShort, "-n") ||
+    if (/*numChannels == 2 || */ endsWith(filenameShort, "-n") ||
         endsWith(filenameShort, "_normal")) {
         isNormal = true;
     }
@@ -979,7 +979,7 @@ - (void)updateImageSettings:(const string &)fullFilename
              endsWith(filenameShort, "-sdf")) {
         isSDF = true;
     }
-    else if (numChannels == 3 || numChannels == 4 ||
+    else if (numChannels == 3 || numChannels == 4 || // TODO: elim channel test, only rely on suffix?, also have key-value pairs
              endsWith(filenameShort, "-a") ||
              endsWith(filenameShort, "_basecolor")) {
         isAlbedo = true;
@@ -997,9 +997,6 @@ - (void)updateImageSettings:(const string &)fullFilename
         _showSettings->isPremul =
             true;  // convert to premul in shader, so can see other channels
     }
-    else if (isNormal || isSDF) {
-        _showSettings->isPremul = false;
-    }
 
     _showSettings->numChannels = numChannels;
 
diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index 1abb8455..8be93a0b 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -9,6 +9,7 @@
 //#include <algorithm>
 //#include <map>
 //#include <unordered_map>
+#include <mutex>
 
 // for zlib decompress
 #include "miniz.h"
@@ -1116,6 +1117,15 @@ void KTXImage::addChannelProps(const char* channelContent)
     // Rgh.x,Mtl.x,X,X
 }
 
+bool KTXImage::isPremul() const
+{
+    string channels = getProp(kPropChannels);
+    if (strstr(channels.c_str(), "Alb.ra,Alb.ga,Alb.ba") == 0) {
+        return true;
+    }
+    return false;
+}
+
 void KTXImage::toPropsData(vector<uint8_t>& propsData)
 {
     for (const auto& prop : props) {
diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h
index d9242db4..f919f217 100644
--- a/libkram/kram/KTXImage.h
+++ b/libkram/kram/KTXImage.h
@@ -307,6 +307,9 @@ class KTXImage {
     bool isKTX1() const { return !skipImageLength; }
     bool isKTX2() const { return skipImageLength; }
 
+    // determine if image stores rgb * a
+    bool isPremul() const;
+    
     // can use on ktx1/2 files, does a decompress if needed
     bool unpackLevel(uint32_t mipNumber, const uint8_t* srcData, uint8_t* dstData) const;
 

From 2e7dee0a03a7d43ad3558b89d91c609b55b3fef7 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 17 Sep 2021 00:56:02 -0700
Subject: [PATCH 178/901] kramv - add thumbnail and preview support for Finder

This allows thumbnails with correct premul or alpha in Finder and from QuickLook preview (hit space).  Works with all KTX and KTX2 file types, even ETC2 which finder never supported.  Also works with explicit formats too, but does a simple HDR saturate to RGBAUnorm8.  Just drop kramv into the Applications folder, and if it should start generating thumbnails in Finder.  Debugging these extensions is a PITA, so don't break this system.  Console prints NSError messages as <private> since os_log stinks, so a unique error code from 1-8 is returned via the NSError as well.
---
 build2/kram.xcodeproj/project.pbxproj         |  60 ++--
 build2/kramv.xcodeproj/project.pbxproj        | 318 ++++++++++++++++++
 .../Base.lproj/KramPreviewViewController.xib  |  22 ++
 kram-preview/KramPreviewViewController.h      |   9 +
 kram-preview/KramPreviewViewController.mm     | 250 ++++++++++++++
 kram-preview/kram_preview.entitlements        |  10 +
 kram-thumb/Info.plist                         |   4 +-
 kram-thumb/KramThumbnailProvider.h            |   9 +-
 kram-thumb/KramThumbnailProvider.mm           | 242 +++++++++----
 kramv/KramLoader.mm                           |  14 +-
 kramv/KramRenderer.mm                         |   2 +-
 libkram/kram/KTXImage.h                       |   9 +-
 libkram/kram/Kram.cpp                         |   6 +-
 libkram/kram/KramImage.cpp                    |  10 +-
 14 files changed, 829 insertions(+), 136 deletions(-)
 create mode 100644 kram-preview/Base.lproj/KramPreviewViewController.xib
 create mode 100644 kram-preview/KramPreviewViewController.h
 create mode 100644 kram-preview/KramPreviewViewController.mm
 create mode 100644 kram-preview/kram_preview.entitlements

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index 2dcb7f7a..f8bfad79 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -2074,20 +2074,23 @@
 				CLANG_ENABLE_MODULES = YES;
 				CLANG_ENABLE_OBJC_ARC = YES;
 				CLANG_ENABLE_OBJC_WEAK = YES;
+				CLANG_WARN_ASSIGN_ENUM = YES;
 				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
 				CLANG_WARN_BOOL_CONVERSION = YES;
-				CLANG_WARN_COMMA = YES;
 				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_CXX0X_EXTENSIONS = YES;
 				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
 				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
-				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
 				CLANG_WARN_EMPTY_BODY = YES;
 				CLANG_WARN_ENUM_CONVERSION = YES;
 				CLANG_WARN_INFINITE_RECURSION = YES;
 				CLANG_WARN_INT_CONVERSION = YES;
 				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+				CLANG_WARN_OBJC_IMPLICIT_ATOMIC_PROPERTIES = YES;
 				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+				CLANG_WARN_OBJC_INTERFACE_IVARS = YES;
 				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+				CLANG_WARN_OBJC_MISSING_PROPERTY_SYNTHESIS = YES;
 				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
 				CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
 				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
@@ -2110,8 +2113,13 @@
 					"DEBUG=1",
 					"$(inherited)",
 				);
-				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
 				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_HIDDEN_VIRTUAL_FUNCTIONS = YES;
+				GCC_WARN_INITIALIZER_NOT_FULLY_BRACKETED = YES;
+				GCC_WARN_NON_VIRTUAL_DESTRUCTOR = YES;
+				GCC_WARN_SHADOW = YES;
+				GCC_WARN_STRICT_SELECTOR_MATCH = YES;
 				GCC_WARN_UNDECLARED_SELECTOR = YES;
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
@@ -2148,20 +2156,23 @@
 				CLANG_ENABLE_MODULES = YES;
 				CLANG_ENABLE_OBJC_ARC = YES;
 				CLANG_ENABLE_OBJC_WEAK = YES;
+				CLANG_WARN_ASSIGN_ENUM = YES;
 				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
 				CLANG_WARN_BOOL_CONVERSION = YES;
-				CLANG_WARN_COMMA = YES;
 				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_CXX0X_EXTENSIONS = YES;
 				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
 				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
-				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
 				CLANG_WARN_EMPTY_BODY = YES;
 				CLANG_WARN_ENUM_CONVERSION = YES;
 				CLANG_WARN_INFINITE_RECURSION = YES;
 				CLANG_WARN_INT_CONVERSION = YES;
 				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+				CLANG_WARN_OBJC_IMPLICIT_ATOMIC_PROPERTIES = YES;
 				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+				CLANG_WARN_OBJC_INTERFACE_IVARS = YES;
 				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+				CLANG_WARN_OBJC_MISSING_PROPERTY_SYNTHESIS = YES;
 				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
 				CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
 				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
@@ -2178,8 +2189,13 @@
 				GCC_ENABLE_CPP_EXCEPTIONS = NO;
 				GCC_ENABLE_CPP_RTTI = NO;
 				GCC_NO_COMMON_BLOCKS = YES;
-				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
 				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_HIDDEN_VIRTUAL_FUNCTIONS = YES;
+				GCC_WARN_INITIALIZER_NOT_FULLY_BRACKETED = YES;
+				GCC_WARN_NON_VIRTUAL_DESTRUCTOR = YES;
+				GCC_WARN_SHADOW = YES;
+				GCC_WARN_STRICT_SELECTOR_MATCH = YES;
 				GCC_WARN_UNDECLARED_SELECTOR = YES;
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
@@ -2207,26 +2223,13 @@
 		706ECDEA26D1577A001C950E /* Debug */ = {
 			isa = XCBuildConfiguration;
 			buildSettings = {
-				CLANG_WARN_ATOMIC_IMPLICIT_SEQ_CST = YES;
-				CLANG_WARN_COMMA = NO;
-				CLANG_WARN_CXX0X_EXTENSIONS = YES;
-				CLANG_WARN_DOCUMENTATION_COMMENTS = NO;
 				CLANG_WARN_OBJC_EXPLICIT_OWNERSHIP_TYPE = YES;
-				CLANG_WARN_OBJC_IMPLICIT_ATOMIC_PROPERTIES = YES;
-				CLANG_WARN_OBJC_INTERFACE_IVARS = YES;
-				CLANG_WARN_OBJC_MISSING_PROPERTY_SYNTHESIS = YES;
 				CLANG_WARN_OBJC_REPEATED_USE_OF_WEAK = YES;
-				CLANG_WARN__EXIT_TIME_DESTRUCTORS = NO;
 				CLANG_X86_VECTOR_INSTRUCTIONS = avx;
 				CODE_SIGN_STYLE = Automatic;
 				EXECUTABLE_PREFIX = lib;
 				GCC_PRECOMPILE_PREFIX_HEADER = NO;
 				GCC_PREFIX_HEADER = "$(PROJECT_DIR)/../libkram/kram/KramConfig.h";
-				GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
-				GCC_WARN_HIDDEN_VIRTUAL_FUNCTIONS = YES;
-				GCC_WARN_NON_VIRTUAL_DESTRUCTOR = YES;
-				GCC_WARN_SHADOW = YES;
-				GCC_WARN_STRICT_SELECTOR_MATCH = YES;
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SKIP_INSTALL = YES;
 				SYSTEM_HEADER_SEARCH_PATHS = "";
@@ -2236,27 +2239,14 @@
 		706ECDEB26D1577A001C950E /* Release */ = {
 			isa = XCBuildConfiguration;
 			buildSettings = {
-				CLANG_WARN_ATOMIC_IMPLICIT_SEQ_CST = YES;
-				CLANG_WARN_COMMA = NO;
-				CLANG_WARN_CXX0X_EXTENSIONS = YES;
-				CLANG_WARN_DOCUMENTATION_COMMENTS = NO;
 				CLANG_WARN_OBJC_EXPLICIT_OWNERSHIP_TYPE = YES;
-				CLANG_WARN_OBJC_IMPLICIT_ATOMIC_PROPERTIES = YES;
-				CLANG_WARN_OBJC_INTERFACE_IVARS = YES;
-				CLANG_WARN_OBJC_MISSING_PROPERTY_SYNTHESIS = YES;
 				CLANG_WARN_OBJC_REPEATED_USE_OF_WEAK = YES;
-				CLANG_WARN__EXIT_TIME_DESTRUCTORS = NO;
 				CLANG_X86_VECTOR_INSTRUCTIONS = avx;
 				CODE_SIGN_STYLE = Automatic;
 				DEAD_CODE_STRIPPING = NO;
 				EXECUTABLE_PREFIX = lib;
 				GCC_PRECOMPILE_PREFIX_HEADER = NO;
 				GCC_PREFIX_HEADER = "$(PROJECT_DIR)/../libkram/kram/KramConfig.h";
-				GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
-				GCC_WARN_HIDDEN_VIRTUAL_FUNCTIONS = YES;
-				GCC_WARN_NON_VIRTUAL_DESTRUCTOR = YES;
-				GCC_WARN_SHADOW = YES;
-				GCC_WARN_STRICT_SELECTOR_MATCH = YES;
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SKIP_INSTALL = YES;
 				SYSTEM_HEADER_SEARCH_PATHS = "";
@@ -2267,15 +2257,12 @@
 			isa = XCBuildConfiguration;
 			buildSettings = {
 				ARCHS = arm64;
-				CLANG_WARN_COMMA = NO;
 				CLANG_WARN_DOCUMENTATION_COMMENTS = NO;
 				CLANG_X86_VECTOR_INSTRUCTIONS = default;
 				CODE_SIGN_STYLE = Automatic;
 				EXECUTABLE_PREFIX = lib;
 				GCC_PRECOMPILE_PREFIX_HEADER = NO;
 				GCC_PREFIX_HEADER = "$(PROJECT_DIR)/../libkram/kram/KramConfig.h";
-				GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
-				GCC_WARN_SHADOW = YES;
 				IPHONEOS_DEPLOYMENT_TARGET = 14.1;
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SDKROOT = iphoneos;
@@ -2289,7 +2276,6 @@
 			isa = XCBuildConfiguration;
 			buildSettings = {
 				ARCHS = arm64;
-				CLANG_WARN_COMMA = NO;
 				CLANG_WARN_DOCUMENTATION_COMMENTS = NO;
 				CLANG_X86_VECTOR_INSTRUCTIONS = default;
 				CODE_SIGN_STYLE = Automatic;
@@ -2297,8 +2283,6 @@
 				EXECUTABLE_PREFIX = lib;
 				GCC_PRECOMPILE_PREFIX_HEADER = NO;
 				GCC_PREFIX_HEADER = "$(PROJECT_DIR)/../libkram/kram/KramConfig.h";
-				GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
-				GCC_WARN_SHADOW = YES;
 				IPHONEOS_DEPLOYMENT_TARGET = 14.1;
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SDKROOT = iphoneos;
diff --git a/build2/kramv.xcodeproj/project.pbxproj b/build2/kramv.xcodeproj/project.pbxproj
index 196629f7..5b40ff19 100644
--- a/build2/kramv.xcodeproj/project.pbxproj
+++ b/build2/kramv.xcodeproj/project.pbxproj
@@ -21,8 +21,57 @@
 		706EF25526D17C85001C950E /* Metal.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF25426D17C85001C950E /* Metal.framework */; };
 		706EF25726D17C9D001C950E /* AppKit.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF25626D17C9D001C950E /* AppKit.framework */; };
 		706EF26726D17DFA001C950E /* libate.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF25926D17CAA001C950E /* libate.tbd */; };
+		70E33EC826E536BF00CBA422 /* QuickLookThumbnailing.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 70E33EC726E536BF00CBA422 /* QuickLookThumbnailing.framework */; };
+		70E33ECA26E536BF00CBA422 /* Quartz.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 70E33EC926E536BF00CBA422 /* Quartz.framework */; };
+		70E33ECE26E536BF00CBA422 /* KramThumbnailProvider.mm in Sources */ = {isa = PBXBuildFile; fileRef = 70E33ECD26E536BF00CBA422 /* KramThumbnailProvider.mm */; };
+		70E33ED326E536BF00CBA422 /* kram-thumb.appex in Embed App Extensions */ = {isa = PBXBuildFile; fileRef = 70E33EC626E536BF00CBA422 /* kram-thumb.appex */; settings = {ATTRIBUTES = (RemoveHeadersOnCopy, ); }; };
+		70E33ED826E5377000CBA422 /* libkram.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF24826D17BC2001C950E /* libkram.a */; };
+		70E33ED926E5378800CBA422 /* libate.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF25926D17CAA001C950E /* libate.tbd */; };
+		70E33EDB26E5379900CBA422 /* CoreGraphics.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 70E33EDA26E5379900CBA422 /* CoreGraphics.framework */; };
+		70E33EDD26E537AD00CBA422 /* Accelerate.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 70E33EDC26E537AD00CBA422 /* Accelerate.framework */; };
+		70E33EE326E5478900CBA422 /* Quartz.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 70E33EC926E536BF00CBA422 /* Quartz.framework */; };
+		70E33EE726E5478900CBA422 /* KramPreviewViewController.mm in Sources */ = {isa = PBXBuildFile; fileRef = 70E33EE626E5478900CBA422 /* KramPreviewViewController.mm */; };
+		70E33EEA26E5478900CBA422 /* KramPreviewViewController.xib in Resources */ = {isa = PBXBuildFile; fileRef = 70E33EE826E5478900CBA422 /* KramPreviewViewController.xib */; };
+		70E33EEF26E5478900CBA422 /* kram-preview.appex in Embed App Extensions */ = {isa = PBXBuildFile; fileRef = 70E33EE226E5478900CBA422 /* kram-preview.appex */; settings = {ATTRIBUTES = (RemoveHeadersOnCopy, ); }; };
+		70E33EF326E548C700CBA422 /* libkram.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF24826D17BC2001C950E /* libkram.a */; };
+		70E33EF426E548CF00CBA422 /* libate.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF25926D17CAA001C950E /* libate.tbd */; };
+		70E33EF526E548D800CBA422 /* CoreGraphics.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 70E33EDA26E5379900CBA422 /* CoreGraphics.framework */; };
+		70E33EF626E548E200CBA422 /* Accelerate.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 70E33EDC26E537AD00CBA422 /* Accelerate.framework */; };
+		70E33EF726E553B900CBA422 /* AppKit.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF25626D17C9D001C950E /* AppKit.framework */; };
 /* End PBXBuildFile section */
 
+/* Begin PBXContainerItemProxy section */
+		70E33ED126E536BF00CBA422 /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = 706EF20726D17A26001C950E /* Project object */;
+			proxyType = 1;
+			remoteGlobalIDString = 70E33EC526E536BF00CBA422;
+			remoteInfo = "kram-thumb";
+		};
+		70E33EED26E5478900CBA422 /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = 706EF20726D17A26001C950E /* Project object */;
+			proxyType = 1;
+			remoteGlobalIDString = 70E33EE126E5478900CBA422;
+			remoteInfo = "kram-preview";
+		};
+/* End PBXContainerItemProxy section */
+
+/* Begin PBXCopyFilesBuildPhase section */
+		70E33ED426E536BF00CBA422 /* Embed App Extensions */ = {
+			isa = PBXCopyFilesBuildPhase;
+			buildActionMask = 2147483647;
+			dstPath = "";
+			dstSubfolderSpec = 13;
+			files = (
+				70E33EEF26E5478900CBA422 /* kram-preview.appex in Embed App Extensions */,
+				70E33ED326E536BF00CBA422 /* kram-thumb.appex in Embed App Extensions */,
+			);
+			name = "Embed App Extensions";
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXCopyFilesBuildPhase section */
+
 /* Begin PBXFileReference section */
 		706EF20F26D17A26001C950E /* kramv.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = kramv.app; sourceTree = BUILT_PRODUCTS_DIR; };
 		706EF22A26D17A81001C950E /* KramViewerBase.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = KramViewerBase.h; sourceTree = "<group>"; };
@@ -45,6 +94,21 @@
 		706EF25426D17C85001C950E /* Metal.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Metal.framework; path = System/Library/Frameworks/Metal.framework; sourceTree = SDKROOT; };
 		706EF25626D17C9D001C950E /* AppKit.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = AppKit.framework; path = System/Library/Frameworks/AppKit.framework; sourceTree = SDKROOT; };
 		706EF25926D17CAA001C950E /* libate.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = libate.tbd; path = usr/lib/libate.tbd; sourceTree = SDKROOT; };
+		70E33EC626E536BF00CBA422 /* kram-thumb.appex */ = {isa = PBXFileReference; explicitFileType = "wrapper.app-extension"; includeInIndex = 0; path = "kram-thumb.appex"; sourceTree = BUILT_PRODUCTS_DIR; };
+		70E33EC726E536BF00CBA422 /* QuickLookThumbnailing.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = QuickLookThumbnailing.framework; path = System/Library/Frameworks/QuickLookThumbnailing.framework; sourceTree = SDKROOT; };
+		70E33EC926E536BF00CBA422 /* Quartz.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Quartz.framework; path = System/Library/Frameworks/Quartz.framework; sourceTree = SDKROOT; };
+		70E33ECC26E536BF00CBA422 /* KramThumbnailProvider.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KramThumbnailProvider.h; sourceTree = "<group>"; };
+		70E33ECD26E536BF00CBA422 /* KramThumbnailProvider.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = KramThumbnailProvider.mm; sourceTree = "<group>"; };
+		70E33ECF26E536BF00CBA422 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
+		70E33ED026E536BF00CBA422 /* kram_thumb.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = kram_thumb.entitlements; sourceTree = "<group>"; };
+		70E33EDA26E5379900CBA422 /* CoreGraphics.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreGraphics.framework; path = System/Library/Frameworks/CoreGraphics.framework; sourceTree = SDKROOT; };
+		70E33EDC26E537AD00CBA422 /* Accelerate.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Accelerate.framework; path = System/Library/Frameworks/Accelerate.framework; sourceTree = SDKROOT; };
+		70E33EE226E5478900CBA422 /* kram-preview.appex */ = {isa = PBXFileReference; explicitFileType = "wrapper.app-extension"; includeInIndex = 0; path = "kram-preview.appex"; sourceTree = BUILT_PRODUCTS_DIR; };
+		70E33EE526E5478900CBA422 /* KramPreviewViewController.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KramPreviewViewController.h; sourceTree = "<group>"; };
+		70E33EE626E5478900CBA422 /* KramPreviewViewController.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = KramPreviewViewController.mm; sourceTree = "<group>"; };
+		70E33EE926E5478900CBA422 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.xib; name = Base; path = Base.lproj/KramPreviewViewController.xib; sourceTree = "<group>"; };
+		70E33EEB26E5478900CBA422 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
+		70E33EEC26E5478900CBA422 /* kram_preview.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = kram_preview.entitlements; sourceTree = "<group>"; };
 /* End PBXFileReference section */
 
 /* Begin PBXFrameworksBuildPhase section */
@@ -62,6 +126,32 @@
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
+		70E33EC326E536BF00CBA422 /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				70E33EC826E536BF00CBA422 /* QuickLookThumbnailing.framework in Frameworks */,
+				70E33ECA26E536BF00CBA422 /* Quartz.framework in Frameworks */,
+				70E33ED826E5377000CBA422 /* libkram.a in Frameworks */,
+				70E33EDD26E537AD00CBA422 /* Accelerate.framework in Frameworks */,
+				70E33EDB26E5379900CBA422 /* CoreGraphics.framework in Frameworks */,
+				70E33ED926E5378800CBA422 /* libate.tbd in Frameworks */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+		70E33EDF26E5478900CBA422 /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				70E33EF526E548D800CBA422 /* CoreGraphics.framework in Frameworks */,
+				70E33EF426E548CF00CBA422 /* libate.tbd in Frameworks */,
+				70E33EF626E548E200CBA422 /* Accelerate.framework in Frameworks */,
+				70E33EF726E553B900CBA422 /* AppKit.framework in Frameworks */,
+				70E33EF326E548C700CBA422 /* libkram.a in Frameworks */,
+				70E33EE326E5478900CBA422 /* Quartz.framework in Frameworks */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
 /* End PBXFrameworksBuildPhase section */
 
 /* Begin PBXGroup section */
@@ -69,6 +159,8 @@
 			isa = PBXGroup;
 			children = (
 				706EF22926D17A81001C950E /* kramv */,
+				70E33ECB26E536BF00CBA422 /* kram-thumb */,
+				70E33EE426E5478900CBA422 /* kram-preview */,
 				706EF21026D17A26001C950E /* Products */,
 				706EF24726D17BC2001C950E /* Frameworks */,
 			);
@@ -78,6 +170,8 @@
 			isa = PBXGroup;
 			children = (
 				706EF20F26D17A26001C950E /* kramv.app */,
+				70E33EC626E536BF00CBA422 /* kram-thumb.appex */,
+				70E33EE226E5478900CBA422 /* kram-preview.appex */,
 			);
 			name = Products;
 			sourceTree = "<group>";
@@ -106,6 +200,8 @@
 		706EF24726D17BC2001C950E /* Frameworks */ = {
 			isa = PBXGroup;
 			children = (
+				70E33EDC26E537AD00CBA422 /* Accelerate.framework */,
+				70E33EDA26E5379900CBA422 /* CoreGraphics.framework */,
 				706EF25926D17CAA001C950E /* libate.tbd */,
 				706EF25626D17C9D001C950E /* AppKit.framework */,
 				706EF25426D17C85001C950E /* Metal.framework */,
@@ -113,10 +209,37 @@
 				706EF24E26D17C43001C950E /* Foundation.framework */,
 				706EF24C26D17C30001C950E /* ModelIO.framework */,
 				706EF24826D17BC2001C950E /* libkram.a */,
+				70E33EC726E536BF00CBA422 /* QuickLookThumbnailing.framework */,
+				70E33EC926E536BF00CBA422 /* Quartz.framework */,
 			);
 			name = Frameworks;
 			sourceTree = "<group>";
 		};
+		70E33ECB26E536BF00CBA422 /* kram-thumb */ = {
+			isa = PBXGroup;
+			children = (
+				70E33ECC26E536BF00CBA422 /* KramThumbnailProvider.h */,
+				70E33ECD26E536BF00CBA422 /* KramThumbnailProvider.mm */,
+				70E33ECF26E536BF00CBA422 /* Info.plist */,
+				70E33ED026E536BF00CBA422 /* kram_thumb.entitlements */,
+			);
+			name = "kram-thumb";
+			path = "../kram-thumb";
+			sourceTree = "<group>";
+		};
+		70E33EE426E5478900CBA422 /* kram-preview */ = {
+			isa = PBXGroup;
+			children = (
+				70E33EE526E5478900CBA422 /* KramPreviewViewController.h */,
+				70E33EE626E5478900CBA422 /* KramPreviewViewController.mm */,
+				70E33EE826E5478900CBA422 /* KramPreviewViewController.xib */,
+				70E33EEB26E5478900CBA422 /* Info.plist */,
+				70E33EEC26E5478900CBA422 /* kram_preview.entitlements */,
+			);
+			name = "kram-preview";
+			path = "../kram-preview";
+			sourceTree = "<group>";
+		};
 /* End PBXGroup section */
 
 /* Begin PBXNativeTarget section */
@@ -127,16 +250,53 @@
 				706EF20B26D17A26001C950E /* Sources */,
 				706EF20C26D17A26001C950E /* Frameworks */,
 				706EF20D26D17A26001C950E /* Resources */,
+				70E33ED426E536BF00CBA422 /* Embed App Extensions */,
 			);
 			buildRules = (
 			);
 			dependencies = (
+				70E33ED226E536BF00CBA422 /* PBXTargetDependency */,
+				70E33EEE26E5478900CBA422 /* PBXTargetDependency */,
 			);
 			name = kramv;
 			productName = kramv;
 			productReference = 706EF20F26D17A26001C950E /* kramv.app */;
 			productType = "com.apple.product-type.application";
 		};
+		70E33EC526E536BF00CBA422 /* kram-thumb */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 70E33ED726E536BF00CBA422 /* Build configuration list for PBXNativeTarget "kram-thumb" */;
+			buildPhases = (
+				70E33EC226E536BF00CBA422 /* Sources */,
+				70E33EC326E536BF00CBA422 /* Frameworks */,
+				70E33EC426E536BF00CBA422 /* Resources */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = "kram-thumb";
+			productName = "kram-thumb";
+			productReference = 70E33EC626E536BF00CBA422 /* kram-thumb.appex */;
+			productType = "com.apple.product-type.app-extension";
+		};
+		70E33EE126E5478900CBA422 /* kram-preview */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 70E33EF026E5478900CBA422 /* Build configuration list for PBXNativeTarget "kram-preview" */;
+			buildPhases = (
+				70E33EDE26E5478900CBA422 /* Sources */,
+				70E33EDF26E5478900CBA422 /* Frameworks */,
+				70E33EE026E5478900CBA422 /* Resources */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = "kram-preview";
+			productName = "kram-preview";
+			productReference = 70E33EE226E5478900CBA422 /* kram-preview.appex */;
+			productType = "com.apple.product-type.app-extension";
+		};
 /* End PBXNativeTarget section */
 
 /* Begin PBXProject section */
@@ -148,6 +308,12 @@
 					706EF20E26D17A26001C950E = {
 						CreatedOnToolsVersion = 12.4;
 					};
+					70E33EC526E536BF00CBA422 = {
+						CreatedOnToolsVersion = 12.5.1;
+					};
+					70E33EE126E5478900CBA422 = {
+						CreatedOnToolsVersion = 12.5.1;
+					};
 				};
 			};
 			buildConfigurationList = 706EF20A26D17A26001C950E /* Build configuration list for PBXProject "kramv" */;
@@ -164,6 +330,8 @@
 			projectRoot = "";
 			targets = (
 				706EF20E26D17A26001C950E /* kramv */,
+				70E33EC526E536BF00CBA422 /* kram-thumb */,
+				70E33EE126E5478900CBA422 /* kram-preview */,
 			);
 		};
 /* End PBXProject section */
@@ -178,6 +346,21 @@
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
+		70E33EC426E536BF00CBA422 /* Resources */ = {
+			isa = PBXResourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+		70E33EE026E5478900CBA422 /* Resources */ = {
+			isa = PBXResourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				70E33EEA26E5478900CBA422 /* KramPreviewViewController.xib in Resources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
 /* End PBXResourcesBuildPhase section */
 
 /* Begin PBXSourcesBuildPhase section */
@@ -193,8 +376,37 @@
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
+		70E33EC226E536BF00CBA422 /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				70E33ECE26E536BF00CBA422 /* KramThumbnailProvider.mm in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+		70E33EDE26E5478900CBA422 /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				70E33EE726E5478900CBA422 /* KramPreviewViewController.mm in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
 /* End PBXSourcesBuildPhase section */
 
+/* Begin PBXTargetDependency section */
+		70E33ED226E536BF00CBA422 /* PBXTargetDependency */ = {
+			isa = PBXTargetDependency;
+			target = 70E33EC526E536BF00CBA422 /* kram-thumb */;
+			targetProxy = 70E33ED126E536BF00CBA422 /* PBXContainerItemProxy */;
+		};
+		70E33EEE26E5478900CBA422 /* PBXTargetDependency */ = {
+			isa = PBXTargetDependency;
+			target = 70E33EE126E5478900CBA422 /* kram-preview */;
+			targetProxy = 70E33EED26E5478900CBA422 /* PBXContainerItemProxy */;
+		};
+/* End PBXTargetDependency section */
+
 /* Begin PBXVariantGroup section */
 		706EF23326D17A81001C950E /* Main.storyboard */ = {
 			isa = PBXVariantGroup;
@@ -204,6 +416,14 @@
 			name = Main.storyboard;
 			sourceTree = "<group>";
 		};
+		70E33EE826E5478900CBA422 /* KramPreviewViewController.xib */ = {
+			isa = PBXVariantGroup;
+			children = (
+				70E33EE926E5478900CBA422 /* Base */,
+			);
+			name = KramPreviewViewController.xib;
+			sourceTree = "<group>";
+		};
 /* End PBXVariantGroup section */
 
 /* Begin XCBuildConfiguration section */
@@ -242,6 +462,7 @@
 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
 				COPY_PHASE_STRIP = NO;
 				DEBUG_INFORMATION_FORMAT = dwarf;
+				DONT_GENERATE_INFOPLIST_FILE = YES;
 				ENABLE_STRICT_OBJC_MSGSEND = YES;
 				ENABLE_TESTABILITY = YES;
 				GCC_C_LANGUAGE_STANDARD = gnu11;
@@ -310,6 +531,7 @@
 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
 				COPY_PHASE_STRIP = NO;
 				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+				DONT_GENERATE_INFOPLIST_FILE = YES;
 				ENABLE_NS_ASSERTIONS = NO;
 				ENABLE_STRICT_OBJC_MSGSEND = YES;
 				GCC_C_LANGUAGE_STANDARD = gnu11;
@@ -352,6 +574,7 @@
 				CODE_SIGN_ENTITLEMENTS = "$(PROJECT_DIR)/../kramv/kramv.entitlements";
 				CODE_SIGN_STYLE = Automatic;
 				COMBINE_HIDPI_IMAGES = YES;
+				DEVELOPMENT_TEAM = "";
 				GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
 				GCC_WARN_HIDDEN_VIRTUAL_FUNCTIONS = YES;
 				GCC_WARN_NON_VIRTUAL_DESTRUCTOR = YES;
@@ -385,6 +608,7 @@
 				CODE_SIGN_STYLE = Automatic;
 				COMBINE_HIDPI_IMAGES = YES;
 				DEAD_CODE_STRIPPING = YES;
+				DEVELOPMENT_TEAM = "";
 				GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
 				GCC_WARN_HIDDEN_VIRTUAL_FUNCTIONS = YES;
 				GCC_WARN_NON_VIRTUAL_DESTRUCTOR = YES;
@@ -401,6 +625,82 @@
 			};
 			name = Release;
 		};
+		70E33ED526E536BF00CBA422 /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				CODE_SIGN_ENTITLEMENTS = "$(SRCROOT)/../kram-thumb/kram_thumb.entitlements";
+				CODE_SIGN_STYLE = Automatic;
+				DEVELOPMENT_TEAM = "";
+				INFOPLIST_FILE = "$(SRCROOT)/../kram-thumb/Info.plist";
+				LD_RUNPATH_SEARCH_PATHS = (
+					"$(inherited)",
+					"@executable_path/../Frameworks",
+					"@executable_path/../../../../Frameworks",
+				);
+				MACOSX_DEPLOYMENT_TARGET = 10.15;
+				PRODUCT_BUNDLE_IDENTIFIER = "com.ba.kramv.kram-thumb";
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				SKIP_INSTALL = YES;
+			};
+			name = Debug;
+		};
+		70E33ED626E536BF00CBA422 /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				CODE_SIGN_ENTITLEMENTS = "$(SRCROOT)/../kram-thumb/kram_thumb.entitlements";
+				CODE_SIGN_STYLE = Automatic;
+				DEVELOPMENT_TEAM = "";
+				INFOPLIST_FILE = "$(SRCROOT)/../kram-thumb/Info.plist";
+				LD_RUNPATH_SEARCH_PATHS = (
+					"$(inherited)",
+					"@executable_path/../Frameworks",
+					"@executable_path/../../../../Frameworks",
+				);
+				MACOSX_DEPLOYMENT_TARGET = 10.15;
+				PRODUCT_BUNDLE_IDENTIFIER = "com.ba.kramv.kram-thumb";
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				SKIP_INSTALL = YES;
+			};
+			name = Release;
+		};
+		70E33EF126E5478900CBA422 /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				CODE_SIGN_ENTITLEMENTS = "$(SRCROOT)/../kram-preview/kram_preview.entitlements";
+				CODE_SIGN_STYLE = Automatic;
+				DEVELOPMENT_TEAM = "";
+				INFOPLIST_FILE = "$(SRCROOT)/../kram-preview/Info.plist";
+				LD_RUNPATH_SEARCH_PATHS = (
+					"$(inherited)",
+					"@executable_path/../Frameworks",
+					"@executable_path/../../../../Frameworks",
+				);
+				MACOSX_DEPLOYMENT_TARGET = 10.15;
+				PRODUCT_BUNDLE_IDENTIFIER = "com.ba.kramv.kram-preview";
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				SKIP_INSTALL = YES;
+			};
+			name = Debug;
+		};
+		70E33EF226E5478900CBA422 /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				CODE_SIGN_ENTITLEMENTS = "$(SRCROOT)/../kram-preview/kram_preview.entitlements";
+				CODE_SIGN_STYLE = Automatic;
+				DEVELOPMENT_TEAM = "";
+				INFOPLIST_FILE = "$(SRCROOT)/../kram-preview/Info.plist";
+				LD_RUNPATH_SEARCH_PATHS = (
+					"$(inherited)",
+					"@executable_path/../Frameworks",
+					"@executable_path/../../../../Frameworks",
+				);
+				MACOSX_DEPLOYMENT_TARGET = 10.15;
+				PRODUCT_BUNDLE_IDENTIFIER = "com.ba.kramv.kram-preview";
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				SKIP_INSTALL = YES;
+			};
+			name = Release;
+		};
 /* End XCBuildConfiguration section */
 
 /* Begin XCConfigurationList section */
@@ -422,6 +722,24 @@
 			defaultConfigurationIsVisible = 0;
 			defaultConfigurationName = Release;
 		};
+		70E33ED726E536BF00CBA422 /* Build configuration list for PBXNativeTarget "kram-thumb" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				70E33ED526E536BF00CBA422 /* Debug */,
+				70E33ED626E536BF00CBA422 /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		70E33EF026E5478900CBA422 /* Build configuration list for PBXNativeTarget "kram-preview" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				70E33EF126E5478900CBA422 /* Debug */,
+				70E33EF226E5478900CBA422 /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
 /* End XCConfigurationList section */
 	};
 	rootObject = 706EF20726D17A26001C950E /* Project object */;
diff --git a/kram-preview/Base.lproj/KramPreviewViewController.xib b/kram-preview/Base.lproj/KramPreviewViewController.xib
new file mode 100644
index 00000000..bd049d4d
--- /dev/null
+++ b/kram-preview/Base.lproj/KramPreviewViewController.xib
@@ -0,0 +1,22 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<document type="com.apple.InterfaceBuilder3.Cocoa.XIB" version="3.0" toolsVersion="18122" targetRuntime="MacOSX.Cocoa" propertyAccessControl="none" useAutolayout="YES" customObjectInstantitationMethod="direct">
+    <dependencies>
+        <deployment identifier="macosx"/>
+        <plugIn identifier="com.apple.InterfaceBuilder.CocoaPlugin" version="18122"/>
+        <capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
+    </dependencies>
+    <objects>
+        <customObject id="-2" userLabel="File's Owner" customClass="KramPreviewViewController">
+            <connections>
+                <outlet property="view" destination="c22-O7-iKe" id="NRM-P4-wb6"/>
+            </connections>
+        </customObject>
+        <customObject id="-1" userLabel="First Responder" customClass="FirstResponder"/>
+        <customObject id="-3" userLabel="Application" customClass="NSObject"/>
+        <customView id="c22-O7-iKe" userLabel="Preview View" customClass="NSImageView">
+            <rect key="frame" x="0.0" y="0.0" width="480" height="272"/>
+            <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMinY="YES"/>
+            <point key="canvasLocation" x="18" y="120"/>
+        </customView>
+    </objects>
+</document>
diff --git a/kram-preview/KramPreviewViewController.h b/kram-preview/KramPreviewViewController.h
new file mode 100644
index 00000000..1095682e
--- /dev/null
+++ b/kram-preview/KramPreviewViewController.h
@@ -0,0 +1,9 @@
+// kram - Copyright 2020 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
+
+#import <Cocoa/Cocoa.h>
+
+@interface KramPreviewViewController : NSViewController
+
+@end
diff --git a/kram-preview/KramPreviewViewController.mm b/kram-preview/KramPreviewViewController.mm
new file mode 100644
index 00000000..50c7d0d2
--- /dev/null
+++ b/kram-preview/KramPreviewViewController.mm
@@ -0,0 +1,250 @@
+// kram - Copyright 2020 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
+
+#import "KramPreviewViewController.h"
+#import <Quartz/Quartz.h>
+
+#include <CoreGraphics/CoreGraphics.h>
+#import <Accelerate/Accelerate.h>
+
+#include "KramLib.h"
+
+using namespace kram;
+
+// Same code in Preview and Thumbnail
+inline NSError* KLOGF(uint32_t code, const char* format, ...) {
+    string str;
+    
+    va_list args;
+    va_start(args, format);
+    /* int32_t len = */ append_vsprintf(str, format, args);
+    va_end(args);
+    
+    // log here, so it can see it in Console.  But this never appears.
+    // How are you supposed to debug failures?  Resorted to passing a unique code into this call.
+    // It wasn't originally supposed to generate an NSError
+    //NSLog(@"%s", str.c_str());
+    
+    // Console prints this as <private>, so what's the point of producing a localizedString ?
+    // This doesn't seem to work to Console app, but maybe if logs are to terminal
+    // sudo log config --mode "level:debug" --subsystem com.ba.kramv
+    
+    NSString* errorText = [NSString stringWithUTF8String:str.c_str()];
+    return [NSError errorWithDomain:@"com.ba.kramv" code:code userInfo:@{NSLocalizedDescriptionKey: errorText}];
+}
+
+@interface KramPreviewViewController () <QLPreviewingController>
+@end
+
+@implementation KramPreviewViewController
+
+- (NSString *)nibName {
+    return @"KramPreviewViewController";
+}
+
+- (void)loadView {
+    [super loadView];
+    // Do any additional setup after loading the view.
+}
+
+/*
+ * Implement this method and set QLSupportsSearchableItems to YES in the Info.plist of the extension if you support CoreSpotlight.
+ *
+- (void)preparePreviewOfSearchableItemWithIdentifier:(NSString *)identifier queryString:(NSString *)queryString completionHandler:(void (^)(NSError * _Nullable))handler {
+    
+    // Perform any setup necessary in order to prepare the view.
+    
+    // Call the completion handler so Quick Look knows that the preview is fully loaded.
+    // Quick Look will display a loading spinner while the completion handler is not called.
+
+    handler(nil);
+}
+*/
+
+- (void)preparePreviewOfFileAtURL:(NSURL *)url completionHandler:(void (^)(NSError * _Nullable))handler {
+    
+    // Add the supported content types to the QLSupportedContentTypes array in the Info.plist of the extension.
+    // Perform any setup necessary in order to prepare the view.
+    
+    // The follwing is copied out of Thumbnailer
+    
+    // No request here, may need to use view size
+    uint32_t maxWidth = 128; // request.maximumSize.width
+    uint32_t maxHeight = 128; // request.maximumSize.height
+    
+    const char* filename = [url fileSystemRepresentation];
+
+    NSError* error = nil;
+    
+    // ignore upper case extensions
+    if (!(endsWith(filename, ".ktx") || endsWith(filename, ".ktx2"))) {
+        error = KLOGF(1, "kramv %s only supports ktx/ktx2 files\n", filename);
+        handler(error);
+        return;
+    }
+         
+    KTXImage image;
+    KTXImageData imageData;
+    TexEncoder decoderType = kTexEncoderUnknown;
+   
+    if (!imageData.open(filename, image)) {
+        error = KLOGF(2, "kramv %s coould not open file\n", filename);
+        handler(error);
+        return;
+    }
+    
+    // This will set decoder
+    auto textureType = MyMTLTextureType2D; // image.textureType
+    if (!validateFormatAndDecoder(textureType, image.pixelFormat, decoderType)) {
+        error = KLOGF(3, "format decode only supports ktx and ktx2 output");
+        handler(error);
+        return;
+    }
+    
+    bool isPremul = image.isPremul();
+    bool isSrgb = isSrgbFormat(image.pixelFormat);
+    
+    // unpack a level to get the blocks
+    uint32_t mipNumber = 0;
+    
+    uint32_t mipCount = image.mipCount();
+    uint32_t w, h, d;
+    for (uint32_t i = 0; i < mipCount; ++i) {
+        image.mipDimensions(i, w, h, d);
+        if (w > maxWidth || h > maxHeight) {
+            mipNumber++;
+        }
+    }
+    
+    // clamp to smallest
+    mipNumber = std::min(mipNumber, mipCount - 1);
+    image.mipDimensions(mipNumber, w, h, d);
+    
+    uint32_t chunkNum = 0; // TODO: could embed chunk(s) to gen thumbnail from, cube/array?
+    uint32_t numChunks = image.totalChunks();
+    
+    vector<uint8_t> mipData;
+
+    // new decode the blocks in that chunk
+    if (isBlockFormat(image.pixelFormat)) {
+        
+        uint64_t mipLength = image.mipLevels[mipNumber].length;
+        
+         // then decode any blocks to rgba8u, not dealing with HDR formats yet
+        if (image.isSupercompressed()) {
+            const uint8_t* srcData = image.fileData + image.mipLevels[mipNumber].offset;
+            
+            mipData.resize(mipLength * numChunks);
+            uint8_t* dstData = mipData.data();
+            if (!image.unpackLevel(mipNumber, srcData, dstData)) {
+                error = KLOGF(5, "kramv %s failed to unpack mip\n", filename);
+                handler(error);
+                return;
+            }
+            
+            // now extract the chunk for the thumbnail out of that level
+            if (numChunks > 1) {
+                macroUnusedVar(chunkNum);
+                assert(chunkNum == 0);
+
+                // this just truncate to chunk 0 instead of copying chunkNum first
+                mipData.resize(mipLength);
+            }
+        }
+        else
+        {
+            // this just truncate to chunk 0 instead of copying chunkNum first
+            mipData.resize(mipLength);
+            
+            const uint8_t* srcData = image.fileData + image.mipLevels[mipNumber].offset;
+            
+            memcpy(mipData.data(), srcData, mipLength);
+        }
+        
+        KramDecoder decoder;
+        KramDecoderParams params;
+        
+        vector<uint8_t> dstMipData;
+        
+        // only space for one chunk for now
+        dstMipData.resize(numChunks * h * w * sizeof(Color));
+        
+        // want to just decode one chunk of the level that was unpacked abovve
+        if (!decoder.decodeBlocks(w, h, mipData.data(), (int32_t)mipData.size(), image.pixelFormat, dstMipData, params)) {
+            error = KLOGF(6, "kramv %s failed to decode blocks\n", filename);
+            handler(error);
+            return;
+        }
+        
+        mipData = dstMipData;
+    }
+    else if (isExplicitFormat(image.pixelFormat))
+    {
+        Image image2D;
+        if (!image2D.loadThumbnailFromKTX(image, mipNumber)) {
+            error = KLOGF(7, "kramv %s failed to convert image to 4 channels\n", filename);
+            handler(error);
+            return;
+        }
+        
+        // copy from Color back to uint8_t
+        uint32_t mipSize = h * w * sizeof(Color);
+        mipData.resize(mipSize);
+        memcpy(mipData.data(), image2D.pixels().data(), mipSize);
+    }
+    
+    // https://developer.apple.com/library/archive/documentation/GraphicsImaging/Conceptual/drawingwithquartz2d/dq_images/dq_images.html#//apple_ref/doc/uid/TP30001066-CH212-TPXREF101
+
+    uint32_t rowBytes = w * sizeof(Color);
+
+    // use vimage in the Accelerate.framework
+    // https://developer.apple.com/library/archive/releasenotes/Performance/RN-vecLib/index.html#//apple_ref/doc/uid/TP40001049
+
+    vImage_Buffer buf = { mipData.data(), h, w, rowBytes };
+
+    // Declare the pixel format for the vImage_Buffer
+    vImage_CGImageFormat format = {
+        .bitsPerComponent   = 8,
+        .bitsPerPixel       = 32,
+    };
+    
+    format.bitmapInfo = kCGBitmapByteOrderDefault | (isPremul ? kCGImageAlphaPremultipliedLast : kCGImageAlphaLast);
+    format.colorSpace = isSrgb ? CGColorSpaceCreateWithName(kCGColorSpaceSRGB) : CGColorSpaceCreateDeviceRGB();
+    
+    // don't need to allocate, can requse memory from mip
+
+    // TODO: might want to convert to PNG, but maybe thumbnail system does that automatically?
+    // see how big thumbs.db is after running this
+    
+    vImage_Error err = 0;
+    CGImageRef cgImage = vImageCreateCGImageFromBuffer(&buf, &format, NULL, NULL, kvImageNoAllocate, &err);
+    if (err) {
+        error = KLOGF(8, "kramv %s failed create cgimage\n", filename);
+        handler(error);
+        return;
+    }
+    CGRect rect = CGRectMake(0, 0, w, h);
+
+    NSImage* nsImage = [[NSImage alloc] initWithCGImage:cgImage size:rect.size];
+    
+    NSImageView* nsImageView = (NSImageView*)self.view;
+    
+    // Change default white to transparent
+    [nsImageView.layer setBackgroundColor: [NSColor clearColor].CGColor];
+    nsImageView.image = nsImage;
+
+    // This seems to cause plugin to fail
+    //CGImageRelease(cgImage);
+    
+    // TODO: could add description with info from texture (format, etc)
+    // self.textView.text = ...
+    
+    // Call the completion handler so Quick Look knows that the preview is fully loaded.
+    // Quick Look will display a loading spinner while the completion handler is not called.
+    
+    handler(nil);
+}
+
+@end
+
diff --git a/kram-preview/kram_preview.entitlements b/kram-preview/kram_preview.entitlements
new file mode 100644
index 00000000..f2ef3ae0
--- /dev/null
+++ b/kram-preview/kram_preview.entitlements
@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+    <key>com.apple.security.app-sandbox</key>
+    <true/>
+    <key>com.apple.security.files.user-selected.read-only</key>
+    <true/>
+</dict>
+</plist>
diff --git a/kram-thumb/Info.plist b/kram-thumb/Info.plist
index 34f68a99..e28f2091 100644
--- a/kram-thumb/Info.plist
+++ b/kram-thumb/Info.plist
@@ -32,12 +32,12 @@
 				<string>public.ktx2</string>
 			</array>
 			<key>QLThumbnailMinimumDimension</key>
-			<integer>64</integer>
+			<integer>0</integer>
 		</dict>
 		<key>NSExtensionPointIdentifier</key>
 		<string>com.apple.quicklook.thumbnail</string>
 		<key>NSExtensionPrincipalClass</key>
-		<string>ThumbnailProvider</string>
+		<string>KramThumbnailProvider</string>
 	</dict>
 </dict>
 </plist>
diff --git a/kram-thumb/KramThumbnailProvider.h b/kram-thumb/KramThumbnailProvider.h
index 7ee38563..efb1566d 100644
--- a/kram-thumb/KramThumbnailProvider.h
+++ b/kram-thumb/KramThumbnailProvider.h
@@ -1,9 +1,6 @@
-//
-//  KramThumbnailProvider.h
-//  kram-thumb
-//
-//  Created by Alec on 5/26/21.
-//
+// kram - Copyright 2020 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
 
 #import <QuickLookThumbnailing/QuickLookThumbnailing.h>
 
diff --git a/kram-thumb/KramThumbnailProvider.mm b/kram-thumb/KramThumbnailProvider.mm
index cf264062..d259aa68 100644
--- a/kram-thumb/KramThumbnailProvider.mm
+++ b/kram-thumb/KramThumbnailProvider.mm
@@ -1,27 +1,19 @@
-//
-//  KramThumbnailProvider.mm
-//  kram-thumb
-//
-//  Created by Alec on 5/26/21.
-//
+// kram - Copyright 2020 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
 
 #import "KramThumbnailProvider.h"
-
-#include "Kram.h"
-#include "KramLog.h"
-#include "KTXImage.h"
-#include "KramImage.h" // for KramDecoder
+#include "KramLib.h"
 
 #include <CoreGraphics/CoreGraphics.h>
-
-//@import Accelerate // for vimage
-#import <Accelerate/Accelerate.h>
+#import <Accelerate/Accelerate.h> // for vImage
 
 using namespace kram;
+using namespace NAMESPACE_STL;
 
 @implementation KramThumbnailProvider
 
-void KLOGF(const char* format, ...) {
+inline NSError* KLOGF(uint32_t code, const char* format, ...) {
     string str;
     
     va_list args;
@@ -29,46 +21,107 @@ void KLOGF(const char* format, ...) {
     /* int32_t len = */ append_vsprintf(str, format, args);
     va_end(args);
     
-    // log here, so it can see it in Console
-    NSLog(@"%@", [NSString stringWithUTF8String: str.c_str()]);
+    // log here, so it can see it in Console.  But this never appears.
+    // How are you supposed to debug failures?  Resorted to passing a unique code into this call.
+    // It wasn't originally supposed to generate an NSError
+    //NSLog(@"%s", str.c_str());
+    
+    // Console prints this as <private>, so what's the point of producing a localizedString ?
+    // This doesn't seem to work to Console app, but maybe if logs are to terminal
+    // sudo log config --mode "level:debug" --subsystem com.ba.kramv
+    
+    NSString* errorText = [NSString stringWithUTF8String:str.c_str()];
+    return [NSError errorWithDomain:@"com.ba.kramv" code:code userInfo:@{NSLocalizedDescriptionKey:errorText}];
 }
 
+struct ImageToPass
+{
+    KTXImage image;
+    KTXImageData imageData;
+};
+
 - (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest *)request completionHandler:(void (^)(QLThumbnailReply * _Nullable, NSError * _Nullable))handler {
 
-    // This
-    // Second way: Draw the thumbnail into a context passed to your block, set up with Core Graphics's coordinate system.
-    handler([QLThumbnailReply replyWithContextSize:request.maximumSize drawingBlock:^BOOL(CGContextRef  _Nonnull context)
-    {
-        const char* filename = [request.fileURL fileSystemRepresentation];
+    //  Draw the thumbnail into a context passed to your block, set up with Core Graphics's coordinate system.
+    
+    const char* filename = [request.fileURL fileSystemRepresentation];
 
-        if (!(endsWith(filename, ".ktx") || endsWith(filename, ".ktx2"))) {
-            KLOGF("kramv %s only supports ktx/ktx2 files\n", filename);
-            return NO;
-        }
-             
-        KTXImage image;
-        KTXImageData imageData;
+    // DONE: could return NSError to caller if non-null
+    NSError* error = nil;
+    string errorText;
+    
+    if (!(endsWith(filename, ".ktx") || endsWith(filename, ".ktx2"))) {
+        error = KLOGF(1, "kramv %s only supports ktx/ktx2 files\n", filename);
+        handler(nil, error);
+        return;
+    }
+        
+    shared_ptr<ImageToPass> imageToPass = make_shared<ImageToPass>();
+    TexEncoder decoderType = kTexEncoderUnknown;
+    uint32_t imageWidth, imageHeight;
+    
+    {
+        KTXImage& image = imageToPass->image;
+        KTXImageData& imageData = imageToPass->imageData;
         
         if (!imageData.open(filename, image)) {
-            KLOGF("kramv %s coould not open file\n", filename);
-            
+            error = KLOGF(2, "kramv %s coould not open file\n", filename);
+            handler(nil, error);
+            return;
         }
-        // no BC6 or ASTC HDR yet for thumbs, just do LDR first
-        if (isHdrFormat(image.pixelFormat)) {
-            KLOGF("kramv %s doesn't support hdr thumbnails yet\n", filename);
-            return NO;
+        
+        // This will set decoder
+        auto textureType = MyMTLTextureType2D; // image.textureType
+        if (!validateFormatAndDecoder(textureType, image.pixelFormat, decoderType)) {
+            error = KLOGF(3, "format decode only supports ktx and ktx2 output");
+            handler(nil, error);
+            return;
         }
         
-        // TODO: hookup to whether content is already premul with alpha
-        // will have to come from props. ASTC always 4 channels but may hold other daa.
-        bool isPremul = numChannelsOfFormat(image.pixelFormat) >= 4;
+        imageWidth = NAMESPACE_STL::max(1U, image.width);
+        imageHeight = NAMESPACE_STL::max(1U, image.height);
+    }
+
+    // This is retina factor
+    float requestScale = request.scale;
+    
+    // One of the sides must match maximumSize, but can have
+    // different aspect ratios below that on a given sides.
+    NSSize contextSize = request.maximumSize;
+   
+    // compute w/h from aspect ratio of image
+    float requestWidth, requestHeight;
+    
+    float imageAspect = imageWidth / (float)imageHeight;
+    if (imageAspect >= 1.0f)
+    {
+        requestWidth = contextSize.width;
+        requestHeight = NAMESPACE_STL::clamp((contextSize.width / imageAspect), 1.0, contextSize.height);
+    }
+    else
+    {
+        requestWidth = NAMESPACE_STL::clamp((contextSize.height * imageAspect), 1.0, contextSize.width);
+        requestHeight = contextSize.height;
+    }
+    
+    // will be further scaled by requestScale
+    contextSize = CGSizeMake(requestWidth, requestHeight);
+    
+    handler([QLThumbnailReply replyWithContextSize:contextSize drawingBlock:^BOOL(CGContextRef  _Nonnull context)
+    {
+        KTXImage& image = imageToPass->image;
+        
+        bool isPremul = image.isPremul();
         bool isSrgb = isSrgbFormat(image.pixelFormat);
         
+        //-----------------
+        
         // unpack a level to get the blocks
         uint32_t mipNumber = 0;
+        uint32_t mipCount = image.mipCount();
         
         uint32_t w, h, d;
-        for (uint32_t i = 0; i < image.header.numberOfMipmapLevels; ++i) {
+        for (uint32_t i = 0; i < mipCount; ++i) {
             image.mipDimensions(i, w, h, d);
             if (w > request.maximumSize.width || h > request.maximumSize.height) {
                 mipNumber++;
@@ -76,56 +129,90 @@ - (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest *)request complet
         }
         
         // clamp to smallest
-        mipNumber = std::min(mipNumber, image.header.numberOfMipmapLevels);
+        mipNumber = std::min(mipNumber, mipCount - 1);
         image.mipDimensions(mipNumber, w, h, d);
         
+        //-----------------
+        
         uint32_t chunkNum = 0; // TODO: could embed chunk(s) to gen thumbnail from, cube/array?
         uint32_t numChunks = image.totalChunks();
         
         vector<uint8_t> mipData;
         
-         // then decode any blocks to rgba8u, not dealing with HDR formats yet
-        if (image.isSupercompressed()) {
-            const uint8_t* srcData = image.fileData + image.mipLevels[mipNumber].offset;
+        // now decode the blocks in that chunk to Color
+        if (isBlockFormat(image.pixelFormat)) {
             
-            mipData.resize(image.mipLevels[mipNumber].length * numChunks);
-            uint8_t* dstData = mipData.data();
-            if (!image.unpackLevel(mipNumber, srcData, dstData)) {
-                KLOGF("kramv %s failed to unpack mip\n", filename);
-                return NO;
+            // then decode any blocks to rgba8u, not dealing with HDR formats yet
+            uint64_t mipLength = image.mipLevels[mipNumber].length;
+
+            if (image.isSupercompressed()) {
+                const uint8_t* srcData = image.fileData + image.mipLevels[mipNumber].offset;
+
+                mipData.resize(mipLength * numChunks);
+                uint8_t* dstData = mipData.data();
+                if (!image.unpackLevel(mipNumber, srcData, dstData)) {
+                   //KLOGF("kramv %s failed to unpack mip\n", filename);
+                   return NO;
+                }
+
+                // now extract the chunk for the thumbnail out of that level
+                if (numChunks > 1) {
+                   macroUnusedVar(chunkNum);
+                   assert(chunkNum == 0);
+
+                   // this just truncate to chunk 0 instead of copying chunkNum first
+                   mipData.resize(mipLength);
+                }
+            }
+            else
+            {
+                // this just truncate to chunk 0 instead of copying chunkNum first
+                mipData.resize(mipLength);
+
+                const uint8_t* srcData = image.fileData + image.mipLevels[mipNumber].offset;
+
+                memcpy(mipData.data(), srcData, mipLength);
             }
-        }
-        
-        // now extract the chunk for the thumbnail out of that level
-        if (numChunks > 1) {
-            macroUnusedVar(chunkNum);
-            assert(chunkNum == 0);
-            
-            // this just truncate to chunk 0 instead of copying chunkNum first
-            mipData.resize(image.mipLevels[mipNumber].length);
-        }
-        
-        // new decode the blocks in that chunk to
-        KTXImage imageDecoded;
-        if (isBlockFormat(image.pixelFormat)) {
             
             KramDecoder decoder;
             KramDecoderParams params;
+            params.decoder = decoderType;
             
             vector<uint8_t> dstMipData;
             
+            // only space for one chunk for now
+            dstMipData.resize(h * w * sizeof(Color));
+            
             // want to just decode one chunk of the level that was unpacked abovve
             if (!decoder.decodeBlocks(w, h, mipData.data(), (int32_t)mipData.size(), image.pixelFormat, dstMipData, params)) {
-                KLOGF("kramv %s failed to decode blocks\n", filename);
+                // Can't return NSError
+                //error = KLOGF("kramv %s failed to decode blocks\n", filename);
                 return NO;
             }
             
+            // copy over original encoded data
             mipData = dstMipData;
         }
+        else if (isExplicitFormat(image.pixelFormat)) {
+            // explicit formats like r/rg/rgb and 16f/32F need to be converted to rgba8 here
+            // this should currently clamp, but could do range tonemap, see Image::convertToFourChannel()
+            // but this needs to be slightly different.  This will decompress mip again
+            
+            Image image2D;
+            if (!image2D.loadThumbnailFromKTX(image, mipNumber)) {
+                //KLOGF("kramv %s failed to convert image to 4 channels\n", filename);
+                return NO;
+            }
+            
+            // copy from Color back to uint8_t
+            uint32_t mipSize = h * w * sizeof(Color);
+            mipData.resize(mipSize);
+            memcpy(mipData.data(), image2D.pixels().data(), mipSize);
+        }
         
         // https://developer.apple.com/library/archive/documentation/GraphicsImaging/Conceptual/drawingwithquartz2d/dq_images/dq_images.html#//apple_ref/doc/uid/TP30001066-CH212-TPXREF101
 
-        uint32_t rowBytes = w * sizeof(uint32_t);
+        uint32_t rowBytes = w * sizeof(Color);
 
         // use vimage in the Accelerate.framework
         // https://developer.apple.com/library/archive/releasenotes/Performance/RN-vecLib/index.html#//apple_ref/doc/uid/TP40001049
@@ -141,23 +228,34 @@ - (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest *)request complet
         format.bitmapInfo = kCGBitmapByteOrderDefault | (isPremul ? kCGImageAlphaPremultipliedLast : kCGImageAlphaLast);
         format.colorSpace = isSrgb ? CGColorSpaceCreateWithName(kCGColorSpaceSRGB) : CGColorSpaceCreateDeviceRGB();
         
-        // don't need to allocate, can requse memory from mip
+        // don't need to allocate, can reuse memory from mip
 
-        // TODO: might want to convert to PNG, but maybe thumbnail system does that automatically?
-        // see how big thumbs.db is after running this
-        
         vImage_Error err = 0;
         CGImageRef cgImage = vImageCreateCGImageFromBuffer(&buf, &format, NULL, NULL, kvImageNoAllocate, &err);
         if (err) {
-            KLOGF("kramv %s failed create cgimage\n", filename);
+            // Can't return NSError
+            //error = KLOGF("kramv %s failed create cgimage\n", filename);
             return NO;
         }
-        CGRect rect = CGRectMake(0, 0, w, h);
+        
+        CGRect rect = CGRectMake(0, 0,
+                                 (uint32_t)roundf(contextSize.width * requestScale),
+                                 (uint32_t)roundf(contextSize.height * requestScale));
 
-        // The image is scaled—disproportionately, if necessary—to fit the bounds
-        // specified by the rect parameter.
+        // TODO: should this clear to NSColor clearColor ?
+        // don't want default white?
+        
+        // The image is scaled—disproportionately
+        
+        CGContextSetBlendMode(context, kCGBlendModeCopy);
+        //CGContextSetBlendMode(context, kCGBlendModeNormal);
+        
         CGContextDrawImage(context, rect, cgImage);
 
+        // This seems to cause plugin to fail
+        // Needed?
+        //CGImageRelease(cgImage);
+        
         return YES;
      }], nil);
 }
diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index f7546941..ced95844 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -206,7 +206,7 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format)
         dstImageInfoArgs.textureType = image.textureType;
         dstImageInfoArgs.pixelFormat = remapInternalRGBFormat(image.pixelFormat);
         dstImageInfoArgs.doMipmaps =
-            image.header.numberOfMipmapLevels > 1;  // ignore 0
+            image.mipCount() > 1;  // ignore 0
         dstImageInfoArgs.textureEncoder = kTexEncoderExplicit;
 
         // set chunk count, so it's explicit
@@ -318,7 +318,7 @@ static uint32_t numberOfMipmapLevels(const Image& image) {
 
 
     // have to schedule autogen inside render using MTLBlitEncoder
-    if (image.header.numberOfMipmapLevels > 1) {
+    if (image.mipCount() > 1) {
         [_mipgenTextures addObject: texture];
     }
 
@@ -408,14 +408,14 @@ - (BOOL)loadImageFromURL:(nonnull NSURL *)url
     textureDescriptor.height = MAX(1, image.height);
     textureDescriptor.depth = MAX(1, image.depth);
 
-    textureDescriptor.arrayLength = MAX(1, image.header.numberOfArrayElements);
+    textureDescriptor.arrayLength = MAX(1, image.arrayCount());
 
     // ignoring 0 (auto mip), but might need to support for explicit formats
     // must have hw filtering support for format, and 32f filtering only first
     // appeared on A14/M1 and only get box filtering in API-level filters.  But
     // would cut storage.
     textureDescriptor.mipmapLevelCount =
-        MAX(1, image.header.numberOfMipmapLevels);
+        MAX(1, image.mipCount());
 
     // this is needed for blit
     if (isPrivate)
@@ -449,9 +449,9 @@ aligned data (f.e. ktxa, ktx2).
     int32_t h = image.height;
     int32_t d = image.depth;
 
-    int32_t numMips     = MAX(1, image.header.numberOfMipmapLevels);
-    int32_t numArrays   = MAX(1, image.header.numberOfArrayElements);
-    int32_t numFaces    = MAX(1, image.header.numberOfFaces);
+    int32_t numMips     = MAX(1, image.mipCount());
+    int32_t numArrays   = MAX(1, image.arrayCount());
+    int32_t numFaces    = MAX(1, image.faceCount());
     int32_t numSlices   = MAX(1, image.depth);
 
     Int2 blockDims = image.blockDims();
diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 7d186c93..4cfff3d3 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -1014,7 +1014,7 @@ - (void)updateImageSettings:(const string &)fullFilename
     //    }
 
     // can derive these from texture queries
-    _showSettings->mipCount = (int32_t)image.header.numberOfMipmapLevels;
+    _showSettings->mipCount = (int32_t)image.mipLevels.size();
     _showSettings->faceCount = (image.textureType == MyMTLTextureTypeCube ||
                                 image.textureType == MyMTLTextureTypeCubeArray)
                                    ? 6
diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h
index f919f217..a0b4ca75 100644
--- a/libkram/kram/KTXImage.h
+++ b/libkram/kram/KTXImage.h
@@ -322,7 +322,7 @@ class KTXImage {
     uint32_t mipLengthCalc(uint32_t mipNumber) const;
     size_t mipLengthLargest() const { return mipLevels[0].length; }
     size_t mipLength(uint32_t mipNumber) const { return mipLevels[mipNumber].length; }
-
+    
     // level
     size_t levelLength(uint32_t mipNumber) const { return mipLevels[mipNumber].length * totalChunks(); }
     size_t levelLengthCompressed(uint32_t mipNumber) const { return mipLevels[mipNumber].lengthCompressed; }
@@ -331,6 +331,11 @@ class KTXImage {
     uint32_t totalChunks() const;
     size_t chunkOffset(uint32_t mipNumber, uint32_t chunkNumber) const { return mipLevels[mipNumber].offset + mipLevels[mipNumber].length * chunkNumber; }
 
+    // trying to bury access to KTX1 header, since this supports KTX2 now
+    uint32_t arrayCount() const { return std::max(1u, header.numberOfArrayElements); }
+    uint32_t mipCount() const   { return std::max(1u, header.numberOfMipmapLevels); }
+    uint32_t faceCount() const  { return std::max(1u, header.numberOfFaces); }
+    
 private:
     bool openKTX2(const uint8_t* imageData, size_t imageDataLength, bool isInfoOnly);
 
@@ -351,7 +356,7 @@ class KTXImage {
     bool skipImageLength = false;
     KTX2Supercompression supercompressionType = KTX2SupercompressionNone;
 
-    KTXHeader header;  // copy of KTXHeader, so can be modified and then written back
+    KTXHeader header;  // copy of KTXHeader from KTX1, so can be modified and then written back
 
     // write out only string/string props, for easy of viewing
     vector<pair<string, string> > props;
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index e36122c8..1b481f83 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -1682,7 +1682,7 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage,
     float numPixels = srcImage.width * srcImage.height;
     numPixels *= (float)numChunks;
 
-    if (srcImage.header.numberOfMipmapLevels > 1) {
+    if (srcImage.mipCount() > 1) {
         numPixels *= 4.0 / 3.0f;  // TODO: estimate for now
     }
 
@@ -1704,7 +1704,7 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage,
                            textureTypeName(srcImage.header.metalTextureType()),
                            srcImage.width, srcImage.height,
                            numPixels,
-                           srcImage.header.numberOfMipmapLevels);
+                           srcImage.mipCount());
             break;
         case MyMTLTextureType3D:
             append_sprintf(info,
@@ -1715,7 +1715,7 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage,
                            textureTypeName(srcImage.header.metalTextureType()),
                            srcImage.width, srcImage.height, srcImage.depth,
                            numPixels,
-                           srcImage.header.numberOfMipmapLevels);
+                           srcImage.mipCount());
             break;
     }
 
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index 8da50df6..b4fa115c 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -115,7 +115,7 @@ Image::Image() : _width(0), _height(0), _hasColor(false), _hasAlpha(false)
 
 bool Image::loadImageFromKTX(const KTXImage& image, uint32_t mipNumber)
 {
-    if (image.header.numberOfMipmapLevels > 1) {
+    if (image.mipLevels.size() > 1) {
         KLOGW("Image", "Decode single mip level from KTX/2, but can rebuild them from requested mip level %d", mipNumber);
     }
 
@@ -131,7 +131,7 @@ bool Image::loadImageFromKTX(const KTXImage& image, uint32_t mipNumber)
 
 bool Image::convertToFourChannel(const KTXImage& image, uint32_t mipNumber)
 {
-    if (mipNumber >= image.header.numberOfMipmapLevels)
+    if (mipNumber >= image.mipLevels.size())
         return false;
     
     const auto& srcMipLevel = image.mipLevels[mipNumber];
@@ -274,7 +274,7 @@ bool Image::convertToFourChannel(const KTXImage& image, uint32_t mipNumber)
 
 bool Image::loadThumbnailFromKTX(const KTXImage& image, uint32_t mipNumber)
 {
-    if (image.header.numberOfMipmapLevels > 1) {
+    if (image.mipLevels.size() > 1) {
         KLOGW("Image", "Decode single mip level from KTX/2, but can rebuild them from requested mip level %d", mipNumber);
     }
 
@@ -291,7 +291,7 @@ bool Image::loadThumbnailFromKTX(const KTXImage& image, uint32_t mipNumber)
 // converts to RGBA8Unorm (or srgb)
 bool Image::convertToFourChannelForThumbnail(const KTXImage& image, uint32_t mipNumber)
 {
-    if (mipNumber >= image.header.numberOfMipmapLevels)
+    if (mipNumber >= image.mipLevels.size())
         return false;
     
     const auto& srcMipLevel = image.mipLevels[mipNumber];
@@ -1000,7 +1000,7 @@ bool KramDecoder::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage&
     vector<uint8_t> mipStorage;
     mipStorage.resize(srcImage.mipLengthLargest() * numChunks);  // enough to hold biggest mip
 
-    for (uint32_t i = 0; i < srcImage.header.numberOfMipmapLevels; ++i) {
+    for (uint32_t i = 0; i < srcImage.mipLevels.size(); ++i) {
         // DONE: to decode compressed KTX2 want to walk all chunks of a single level
         // after decompressing the level.   This isn't doing unpackLevel and needs to here.
 

From 83fbdbcae2ccb883cc26fd5f54d9c7e83f7c7215 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 17 Sep 2021 01:14:03 -0700
Subject: [PATCH 179/901] kramv - preview was missing Info.plist due to bad
 .gitignore

---
 kram-preview/Info.plist | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)
 create mode 100644 kram-preview/Info.plist

diff --git a/kram-preview/Info.plist b/kram-preview/Info.plist
new file mode 100644
index 00000000..2d320287
--- /dev/null
+++ b/kram-preview/Info.plist
@@ -0,0 +1,40 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>CFBundleDevelopmentRegion</key>
+	<string>$(DEVELOPMENT_LANGUAGE)</string>
+	<key>CFBundleDisplayName</key>
+	<string>kram-preview</string>
+	<key>CFBundleExecutable</key>
+	<string>$(EXECUTABLE_NAME)</string>
+	<key>CFBundleIdentifier</key>
+	<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
+	<key>CFBundleInfoDictionaryVersion</key>
+	<string>6.0</string>
+	<key>CFBundleName</key>
+	<string>$(PRODUCT_NAME)</string>
+	<key>CFBundlePackageType</key>
+	<string>$(PRODUCT_BUNDLE_PACKAGE_TYPE)</string>
+	<key>CFBundleShortVersionString</key>
+	<string>1.0</string>
+	<key>CFBundleVersion</key>
+	<string>1</string>
+	<key>LSMinimumSystemVersion</key>
+	<string>$(MACOSX_DEPLOYMENT_TARGET)</string>
+	<key>NSExtension</key>
+	<dict>
+		<key>NSExtensionAttributes</key>
+		<dict>
+			<key>QLSupportedContentTypes</key>
+			<array/>
+			<key>QLSupportsSearchableItems</key>
+			<true/>
+		</dict>
+		<key>NSExtensionPointIdentifier</key>
+		<string>com.apple.quicklook.preview</string>
+		<key>NSExtensionPrincipalClass</key>
+		<string>KramPreviewViewController</string>
+	</dict>
+</dict>
+</plist>

From 27cc9ab6c4a75ec42c82820b9424f0c7ec20a9d6 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 17 Sep 2021 01:33:40 -0700
Subject: [PATCH 180/901] kramv - try relative links to Info.plist files

Info.plist were not found for the appex on GitHub Actions build when using $(SRCROOT), so now just use ../kram-thumb and ../kram-preview.
---
 build2/kramv.xcodeproj/project.pbxproj | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/build2/kramv.xcodeproj/project.pbxproj b/build2/kramv.xcodeproj/project.pbxproj
index 5b40ff19..7a448993 100644
--- a/build2/kramv.xcodeproj/project.pbxproj
+++ b/build2/kramv.xcodeproj/project.pbxproj
@@ -631,7 +631,7 @@
 				CODE_SIGN_ENTITLEMENTS = "$(SRCROOT)/../kram-thumb/kram_thumb.entitlements";
 				CODE_SIGN_STYLE = Automatic;
 				DEVELOPMENT_TEAM = "";
-				INFOPLIST_FILE = "$(SRCROOT)/../kram-thumb/Info.plist";
+				INFOPLIST_FILE = "../kram-thumb/Info.plist";
 				LD_RUNPATH_SEARCH_PATHS = (
 					"$(inherited)",
 					"@executable_path/../Frameworks",
@@ -650,7 +650,7 @@
 				CODE_SIGN_ENTITLEMENTS = "$(SRCROOT)/../kram-thumb/kram_thumb.entitlements";
 				CODE_SIGN_STYLE = Automatic;
 				DEVELOPMENT_TEAM = "";
-				INFOPLIST_FILE = "$(SRCROOT)/../kram-thumb/Info.plist";
+				INFOPLIST_FILE = "../kram-thumb/Info.plist";
 				LD_RUNPATH_SEARCH_PATHS = (
 					"$(inherited)",
 					"@executable_path/../Frameworks",
@@ -669,7 +669,7 @@
 				CODE_SIGN_ENTITLEMENTS = "$(SRCROOT)/../kram-preview/kram_preview.entitlements";
 				CODE_SIGN_STYLE = Automatic;
 				DEVELOPMENT_TEAM = "";
-				INFOPLIST_FILE = "$(SRCROOT)/../kram-preview/Info.plist";
+				INFOPLIST_FILE = "../kram-preview/Info.plist";
 				LD_RUNPATH_SEARCH_PATHS = (
 					"$(inherited)",
 					"@executable_path/../Frameworks",
@@ -688,7 +688,7 @@
 				CODE_SIGN_ENTITLEMENTS = "$(SRCROOT)/../kram-preview/kram_preview.entitlements";
 				CODE_SIGN_STYLE = Automatic;
 				DEVELOPMENT_TEAM = "";
-				INFOPLIST_FILE = "$(SRCROOT)/../kram-preview/Info.plist";
+				INFOPLIST_FILE = "../kram-preview/Info.plist";
 				LD_RUNPATH_SEARCH_PATHS = (
 					"$(inherited)",
 					"@executable_path/../Frameworks",

From 80afb8d099bda58ea01c9af333662a44f79896d1 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 17 Sep 2021 01:48:51 -0700
Subject: [PATCH 181/901] kramv - also move kramv to relative path for
 Info.plist

---
 build2/kramv.xcodeproj/project.pbxproj | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/build2/kramv.xcodeproj/project.pbxproj b/build2/kramv.xcodeproj/project.pbxproj
index 7a448993..046648a0 100644
--- a/build2/kramv.xcodeproj/project.pbxproj
+++ b/build2/kramv.xcodeproj/project.pbxproj
@@ -580,7 +580,7 @@
 				GCC_WARN_NON_VIRTUAL_DESTRUCTOR = YES;
 				GCC_WARN_SHADOW = YES;
 				GCC_WARN_STRICT_SELECTOR_MATCH = YES;
-				INFOPLIST_FILE = "$(SRCROOT)/../kramv/Info.plist";
+				INFOPLIST_FILE = ../kramv/Info.plist;
 				LD_RUNPATH_SEARCH_PATHS = (
 					"$(inherited)",
 					"@executable_path/../Frameworks",
@@ -614,7 +614,7 @@
 				GCC_WARN_NON_VIRTUAL_DESTRUCTOR = YES;
 				GCC_WARN_SHADOW = YES;
 				GCC_WARN_STRICT_SELECTOR_MATCH = YES;
-				INFOPLIST_FILE = "$(SRCROOT)/../kramv/Info.plist";
+				INFOPLIST_FILE = ../kramv/Info.plist;
 				LD_RUNPATH_SEARCH_PATHS = (
 					"$(inherited)",
 					"@executable_path/../Frameworks",

From c67b3ce8442afd8429acdb84d493059fc5a07c20 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 18 Sep 2021 11:27:29 -0700
Subject: [PATCH 182/901] kram - disable warning that eastl hits

---
 build2/kramc.xcodeproj/project.pbxproj | 2 --
 build2/kramv.xcodeproj/project.pbxproj | 2 --
 2 files changed, 4 deletions(-)

diff --git a/build2/kramc.xcodeproj/project.pbxproj b/build2/kramc.xcodeproj/project.pbxproj
index 4f879921..9def6b8d 100644
--- a/build2/kramc.xcodeproj/project.pbxproj
+++ b/build2/kramc.xcodeproj/project.pbxproj
@@ -260,7 +260,6 @@
 		706EF27A26D18082001C950E /* Debug */ = {
 			isa = XCBuildConfiguration;
 			buildSettings = {
-				CLANG_WARN_ATOMIC_IMPLICIT_SEQ_CST = YES;
 				CLANG_WARN_COMMA = NO;
 				CLANG_WARN_CXX0X_EXTENSIONS = YES;
 				CLANG_WARN_OBJC_EXPLICIT_OWNERSHIP_TYPE = YES;
@@ -282,7 +281,6 @@
 		706EF27B26D18082001C950E /* Release */ = {
 			isa = XCBuildConfiguration;
 			buildSettings = {
-				CLANG_WARN_ATOMIC_IMPLICIT_SEQ_CST = YES;
 				CLANG_WARN_COMMA = NO;
 				CLANG_WARN_CXX0X_EXTENSIONS = YES;
 				CLANG_WARN_OBJC_EXPLICIT_OWNERSHIP_TYPE = YES;
diff --git a/build2/kramv.xcodeproj/project.pbxproj b/build2/kramv.xcodeproj/project.pbxproj
index 046648a0..a995237a 100644
--- a/build2/kramv.xcodeproj/project.pbxproj
+++ b/build2/kramv.xcodeproj/project.pbxproj
@@ -563,7 +563,6 @@
 			buildSettings = {
 				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
 				ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
-				CLANG_WARN_ATOMIC_IMPLICIT_SEQ_CST = YES;
 				CLANG_WARN_COMMA = NO;
 				CLANG_WARN_CXX0X_EXTENSIONS = YES;
 				CLANG_WARN_OBJC_EXPLICIT_OWNERSHIP_TYPE = YES;
@@ -596,7 +595,6 @@
 			buildSettings = {
 				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
 				ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
-				CLANG_WARN_ATOMIC_IMPLICIT_SEQ_CST = YES;
 				CLANG_WARN_COMMA = NO;
 				CLANG_WARN_CXX0X_EXTENSIONS = YES;
 				CLANG_WARN_OBJC_EXPLICIT_OWNERSHIP_TYPE = YES;

From 730848239b007f94b52bd9acaab4968248fc80eb Mon Sep 17 00:00:00 2001
From: Alec Miller <alecazam@users.noreply.github.com>
Date: Sat, 18 Sep 2021 18:03:25 -0700
Subject: [PATCH 183/901] Update README.md

---
 README.md | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 2abedd00..09ac6e7a 100644
--- a/README.md
+++ b/README.md
@@ -139,9 +139,21 @@ At runtime:
 ```
 
 ### Building
-kram uses CMake to setup the projects and build.  kramv.app, kram, and libkram are generated, but kramv.app and kram are stand-alone.  The library can be useful in apps that want to include the decoder, or runtime compression of gpu-generated data.
+kram has switched from CMake to an explicit Xcode workspace and projects on Apple platforms.  CMake can't clean, build workspaces, or handle app extensions needed for thumbnails/previews.  I spent a lot of time trying to keep this system working since it keeps kram from being tied to Xcode releases, but I also wanted to added better Finder integration and debugging.  These all live in 'build2' to distinguish from the 'build' directory created for CMake.  Like CMake, the cibuild.h script runs xcodebuild from the command line to generate all the libraries and apps into the bin directory.  Note that Xcode has never been able to simultaneously open the same project included in different workspaces, so organize derivative workspaces carefullly.
 
-For Mac, the build is out-of-source, and can be built from the command line, or debugged from the xcodeproj that is built.  Ninja and Makefiles can also be generated from cmake, but remember to trash the CMakeCache.txt file.
+I also tried to use a CMake framework build, but this required changing all the include paths to one parent directory.  Since libkram includes eastl, and other includes this requirement was not ideal.  So for now using libkram involves setting header and library search paths.  See the workspace projects for how this is setup.
+
+```
+./scripts/cibuild.h
+
+open build2/kram.xcworkspace
+
+```
+
+
+kram was using CMake to setup the projects and build.  kramv.app, kram, and libkram are generated, but kramv.app and kram are stand-alone.  The library can be useful in apps that want to include the decoder, or runtime compression of gpu-generated data.
+
+For Mac, the CMake build is out-of-source, and can be built from the command line, or debugged from the xcodeproj that is built.  Ninja and Makefiles can also be generated from cmake, but remember to trash the CMakeCache.txt file.
 
 ```
 mkdir build
@@ -154,7 +166,7 @@ or
 cmake --install ../bin --config Release
 ```
 
-For Windows, the steps are similar. I tried to fix CMake to build the library into the app directory so the app is updated.  "Rebuild Solution" if your changes don't take effect, or if breakpoints stop being hit.
+For Windows, CMake is still used. I tried to fix CMake to build the library into the app directory so the app is updated.  "Rebuild Solution" if your changes don't take effect, or if breakpoints stop being hit.
 
 ```
 mkdir build

From ac4c7c509cd103a57f6f6b7e4592422f5d367ab1 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 19 Sep 2021 12:23:47 -0700
Subject: [PATCH 184/901] kramv - add a table view of files and shapes, hit Esc
 to hide this menu

This only appears when meshNumber or the fileFolderIndex or fileArchiveIndex is advanced.  These aren't working for picking from the list yet.
---
 kramv/Base.lproj/Main.storyboard | 142 ++++++++++++++++++++++++++++-
 kramv/KramViewerBase.cpp         |  27 ++++++
 kramv/KramViewerBase.h           |   4 +-
 kramv/KramViewerMain.mm          | 147 +++++++++++++++++++++++++++++--
 4 files changed, 312 insertions(+), 8 deletions(-)

diff --git a/kramv/Base.lproj/Main.storyboard b/kramv/Base.lproj/Main.storyboard
index 86041b06..0bbd904c 100644
--- a/kramv/Base.lproj/Main.storyboard
+++ b/kramv/Base.lproj/Main.storyboard
@@ -1,8 +1,8 @@
 <?xml version="1.0" encoding="UTF-8"?>
-<document type="com.apple.InterfaceBuilder3.Cocoa.Storyboard.XIB" version="3.0" toolsVersion="17701" targetRuntime="MacOSX.Cocoa" propertyAccessControl="none" useAutolayout="YES" initialViewController="B8D-0N-5wS">
+<document type="com.apple.InterfaceBuilder3.Cocoa.Storyboard.XIB" version="3.0" toolsVersion="18122" targetRuntime="MacOSX.Cocoa" propertyAccessControl="none" useAutolayout="YES" initialViewController="B8D-0N-5wS">
     <dependencies>
         <deployment identifier="macosx"/>
-        <plugIn identifier="com.apple.InterfaceBuilder.CocoaPlugin" version="17701"/>
+        <plugIn identifier="com.apple.InterfaceBuilder.CocoaPlugin" version="18122"/>
         <capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
     </dependencies>
     <scenes>
@@ -173,9 +173,147 @@
                     <view key="view" id="m2S-Jp-Qdl" userLabel="MyMTKView" customClass="MyMTKView">
                         <rect key="frame" x="0.0" y="0.0" width="800" height="600"/>
                         <autoresizingMask key="autoresizingMask"/>
+                        <subviews>
+                            <scrollView fixedFrame="YES" borderType="none" autohidesScrollers="YES" horizontalLineScroll="24" horizontalPageScroll="10" verticalLineScroll="24" verticalPageScroll="10" usesPredominantAxisScrolling="NO" translatesAutoresizingMaskIntoConstraints="NO" id="sGH-FI-BDN" userLabel="ShapesScrollView">
+                                <rect key="frame" x="20" y="0.0" width="207" height="413"/>
+                                <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" heightSizable="YES" flexibleMaxY="YES"/>
+                                <clipView key="contentView" drawsBackground="NO" id="ZHD-QY-oGf">
+                                    <rect key="frame" x="0.0" y="0.0" width="207" height="413"/>
+                                    <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
+                                    <subviews>
+                                        <tableView verticalHuggingPriority="750" allowsExpansionToolTips="YES" columnAutoresizingStyle="lastColumnOnly" multipleSelection="NO" emptySelection="NO" autosaveColumns="NO" rowHeight="24" rowSizeStyle="automatic" viewBased="YES" id="vPC-mQ-zsL" userLabel="ShapesTableView">
+                                            <rect key="frame" x="0.0" y="0.0" width="207" height="413"/>
+                                            <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
+                                            <size key="intercellSpacing" width="17" height="0.0"/>
+                                            <color key="backgroundColor" white="0.0" alpha="0.0" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
+                                            <color key="gridColor" white="0.0" alpha="0.0" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
+                                            <tableColumns>
+                                                <tableColumn editable="NO" width="175" minWidth="40" maxWidth="1000" id="CoU-u5-LiB">
+                                                    <tableHeaderCell key="headerCell" lineBreakMode="truncatingTail" borderStyle="border" alignment="left" title="Archive">
+                                                        <color key="textColor" name="headerTextColor" catalog="System" colorSpace="catalog"/>
+                                                        <color key="backgroundColor" name="headerColor" catalog="System" colorSpace="catalog"/>
+                                                    </tableHeaderCell>
+                                                    <textFieldCell key="dataCell" lineBreakMode="truncatingTail" selectable="YES" title="Text Cell" id="Wiv-qG-tfu">
+                                                        <font key="font" metaFont="system"/>
+                                                        <color key="textColor" name="controlTextColor" catalog="System" colorSpace="catalog"/>
+                                                        <color key="backgroundColor" name="controlBackgroundColor" catalog="System" colorSpace="catalog"/>
+                                                    </textFieldCell>
+                                                    <tableColumnResizingMask key="resizingMask" resizeWithTable="YES" userResizable="YES"/>
+                                                    <prototypeCellViews>
+                                                        <tableCellView id="h00-7l-MSC">
+                                                            <rect key="frame" x="18" y="0.0" width="170" height="24"/>
+                                                            <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
+                                                            <subviews>
+                                                                <textField horizontalHuggingPriority="251" verticalHuggingPriority="750" horizontalCompressionResistancePriority="250" fixedFrame="YES" translatesAutoresizingMaskIntoConstraints="NO" id="vH5-RJ-dMX">
+                                                                    <rect key="frame" x="0.0" y="4" width="170" height="16"/>
+                                                                    <autoresizingMask key="autoresizingMask" widthSizable="YES" flexibleMinY="YES" flexibleMaxY="YES"/>
+                                                                    <textFieldCell key="cell" lineBreakMode="truncatingTail" sendsActionOnEndEditing="YES" title="Table View Cell" id="V0l-MI-gNz">
+                                                                        <font key="font" usesAppearanceFont="YES"/>
+                                                                        <color key="textColor" name="controlTextColor" catalog="System" colorSpace="catalog"/>
+                                                                        <color key="backgroundColor" name="textBackgroundColor" catalog="System" colorSpace="catalog"/>
+                                                                    </textFieldCell>
+                                                                </textField>
+                                                            </subviews>
+                                                            <connections>
+                                                                <outlet property="textField" destination="vH5-RJ-dMX" id="niy-32-PVc"/>
+                                                            </connections>
+                                                        </tableCellView>
+                                                    </prototypeCellViews>
+                                                </tableColumn>
+                                            </tableColumns>
+                                            <connections>
+                                                <outlet property="dataSource" destination="8Hb-tc-yTO" id="Pxa-Xs-op7"/>
+                                                <outlet property="delegate" destination="8Hb-tc-yTO" id="Xof-Wb-RoC"/>
+                                            </connections>
+                                        </tableView>
+                                    </subviews>
+                                    <color key="backgroundColor" white="0.0" alpha="0.0" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
+                                </clipView>
+                                <scroller key="horizontalScroller" hidden="YES" wantsLayer="YES" verticalHuggingPriority="750" horizontal="YES" id="zCQ-Sl-upT">
+                                    <rect key="frame" x="0.0" y="260" width="176" height="16"/>
+                                    <autoresizingMask key="autoresizingMask"/>
+                                </scroller>
+                                <scroller key="verticalScroller" hidden="YES" wantsLayer="YES" verticalHuggingPriority="750" horizontal="NO" id="a9C-f1-G9k">
+                                    <rect key="frame" x="224" y="17" width="15" height="102"/>
+                                    <autoresizingMask key="autoresizingMask"/>
+                                </scroller>
+                            </scrollView>
+                            <scrollView fixedFrame="YES" borderType="none" autohidesScrollers="YES" horizontalLineScroll="24" horizontalPageScroll="10" verticalLineScroll="24" verticalPageScroll="10" usesPredominantAxisScrolling="NO" translatesAutoresizingMaskIntoConstraints="NO" id="CPB-x5-bmZ" userLabel="FilesScrollView">
+                                <rect key="frame" x="20" y="0.0" width="207" height="413"/>
+                                <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" heightSizable="YES" flexibleMaxY="YES"/>
+                                <clipView key="contentView" drawsBackground="NO" id="R7E-tN-iH2">
+                                    <rect key="frame" x="0.0" y="0.0" width="207" height="413"/>
+                                    <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
+                                    <subviews>
+                                        <tableView verticalHuggingPriority="750" allowsExpansionToolTips="YES" columnAutoresizingStyle="lastColumnOnly" multipleSelection="NO" emptySelection="NO" autosaveColumns="NO" rowHeight="24" rowSizeStyle="automatic" viewBased="YES" id="Ydb-sa-YEf" userLabel="FilesTableView">
+                                            <rect key="frame" x="0.0" y="0.0" width="207" height="413"/>
+                                            <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
+                                            <size key="intercellSpacing" width="17" height="0.0"/>
+                                            <color key="backgroundColor" white="0.0" alpha="0.0" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
+                                            <color key="gridColor" white="0.0" alpha="0.0" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
+                                            <tableColumns>
+                                                <tableColumn editable="NO" width="175" minWidth="40" maxWidth="1000" id="41H-gl-33j">
+                                                    <tableHeaderCell key="headerCell" lineBreakMode="truncatingTail" borderStyle="border" alignment="left" title="Archive">
+                                                        <color key="textColor" name="headerTextColor" catalog="System" colorSpace="catalog"/>
+                                                        <color key="backgroundColor" name="headerColor" catalog="System" colorSpace="catalog"/>
+                                                    </tableHeaderCell>
+                                                    <textFieldCell key="dataCell" lineBreakMode="truncatingTail" selectable="YES" title="Text Cell" id="OxE-cJ-mkj">
+                                                        <font key="font" metaFont="system"/>
+                                                        <color key="textColor" name="controlTextColor" catalog="System" colorSpace="catalog"/>
+                                                        <color key="backgroundColor" name="controlBackgroundColor" catalog="System" colorSpace="catalog"/>
+                                                    </textFieldCell>
+                                                    <tableColumnResizingMask key="resizingMask" resizeWithTable="YES" userResizable="YES"/>
+                                                    <prototypeCellViews>
+                                                        <tableCellView id="KoD-0S-NFZ">
+                                                            <rect key="frame" x="18" y="0.0" width="170" height="24"/>
+                                                            <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
+                                                            <subviews>
+                                                                <textField horizontalHuggingPriority="251" verticalHuggingPriority="750" horizontalCompressionResistancePriority="250" fixedFrame="YES" translatesAutoresizingMaskIntoConstraints="NO" id="DRr-2v-dcm">
+                                                                    <rect key="frame" x="0.0" y="4" width="170" height="16"/>
+                                                                    <autoresizingMask key="autoresizingMask" widthSizable="YES" flexibleMinY="YES" flexibleMaxY="YES"/>
+                                                                    <textFieldCell key="cell" lineBreakMode="truncatingTail" sendsActionOnEndEditing="YES" title="Table View Cell" id="fSo-T3-czq">
+                                                                        <font key="font" usesAppearanceFont="YES"/>
+                                                                        <color key="textColor" name="controlTextColor" catalog="System" colorSpace="catalog"/>
+                                                                        <color key="backgroundColor" name="textBackgroundColor" catalog="System" colorSpace="catalog"/>
+                                                                    </textFieldCell>
+                                                                </textField>
+                                                            </subviews>
+                                                            <connections>
+                                                                <outlet property="textField" destination="DRr-2v-dcm" id="3Kd-t9-tbD"/>
+                                                            </connections>
+                                                        </tableCellView>
+                                                    </prototypeCellViews>
+                                                </tableColumn>
+                                            </tableColumns>
+                                            <connections>
+                                                <outlet property="dataSource" destination="Ivt-wI-wYi" id="Nlx-k8-bJG"/>
+                                                <outlet property="delegate" destination="Ivt-wI-wYi" id="KOT-sg-g1u"/>
+                                            </connections>
+                                        </tableView>
+                                    </subviews>
+                                    <color key="backgroundColor" white="0.0" alpha="0.0" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
+                                </clipView>
+                                <scroller key="horizontalScroller" hidden="YES" wantsLayer="YES" verticalHuggingPriority="750" horizontal="YES" id="OMJ-6E-Nip">
+                                    <rect key="frame" x="1" y="396" width="205" height="16"/>
+                                    <autoresizingMask key="autoresizingMask"/>
+                                </scroller>
+                                <scroller key="verticalScroller" hidden="YES" wantsLayer="YES" verticalHuggingPriority="750" horizontal="NO" id="eKh-Lb-yXI">
+                                    <rect key="frame" x="224" y="17" width="15" height="102"/>
+                                    <autoresizingMask key="autoresizingMask"/>
+                                </scroller>
+                            </scrollView>
+                        </subviews>
+                        <connections>
+                            <outlet property="_shapesTableView" destination="vPC-mQ-zsL" id="j7A-ww-DGP"/>
+                            <outlet property="_shapesTableViewController" destination="8Hb-tc-yTO" id="jai-kf-4vR"/>
+                            <outlet property="_tableView" destination="Ydb-sa-YEf" id="PjO-jm-pnf"/>
+                            <outlet property="_tableViewController" destination="Ivt-wI-wYi" id="l0c-Dj-gAP"/>
+                        </connections>
                     </view>
                 </viewController>
                 <customObject id="rPt-NT-nkU" userLabel="First Responder" customClass="NSResponder" sceneMemberID="firstResponder"/>
+                <customObject id="Ivt-wI-wYi" userLabel="FilesViewController" customClass="TableViewController"/>
+                <customObject id="8Hb-tc-yTO" userLabel="ShapesViewController" customClass="TableViewController"/>
             </objects>
             <point key="canvasLocation" x="75" y="817"/>
         </scene>
diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index ac1dd614..57c0be45 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -11,6 +11,33 @@ int32_t ShowSettings::totalChunks() const
            std::max(one, sliceCount);
 }
 
+const char *ShowSettings::meshNumberName(uint32_t meshNumber_) const
+{
+    const char *text = "";
+
+    switch (meshNumber_) {
+        case 0:
+            text = "Plane";
+            break;
+        case 1:
+            text = "Box";
+            break;
+        case 2:
+            text = "Sphere";
+            break;
+        case 3:
+            text = "Sphere MirrorU";
+            break;
+        case 4:
+            text = "Capsule";
+            break;
+        default:
+            break;
+    }
+
+    return text;
+}
+
 const char *ShowSettings::meshNumberText() const
 {
     const char *text = "";
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index c263389f..4acd8cb5 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -226,7 +226,9 @@ class ShowSettings {
     const char *shapeChannelText() const;
     const char *debugModeText() const;
     const char *lightingModeText() const;
-
+    
+    const char *meshNumberName(uint32_t meshNumber) const;
+    
     string lastFilename;
     double lastTimestamp = 0.0;
 
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 8399dc82..e122702a 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -54,6 +54,45 @@ - (void)setHudText:(const char *)text;
 
 //-------------
 
+// https://medium.com/@kevingutowski/how-to-setup-a-tableview-in-2019-obj-c-c7dece203333
+@interface TableViewController : NSObject <NSTableViewDataSource, NSTableViewDelegate>
+@property (nonatomic, strong) NSMutableArray<NSString*>* items;
+@end
+
+@implementation TableViewController
+
+- (instancetype)init {
+    self = [super init];
+    
+    _items = [[NSMutableArray alloc] init];
+    return self;
+}
+
+// NSTableViewDataSource
+- (NSInteger)numberOfRowsInTableView:(NSTableView *)tableView
+{
+    return self.items.count;
+}
+
+// NSTableViewDelegate
+-(NSView *)tableView:(NSTableView *)tableView viewForTableColumn:(NSTableColumn *)tableColumn row:(NSInteger)row
+{
+    NSString *identifier = tableColumn.identifier;
+    NSTableCellView *cell = [tableView makeViewWithIdentifier:identifier owner:self];
+    cell.textField.stringValue = [self.items objectAtIndex:row];
+    return cell;
+}
+
+- (void)tableViewSelectionDidChange:(NSNotification *)notification
+{
+    //NSInteger selectedRow = [myTableView selectedRow];
+    
+    KLOGI("kramv", "tableView changed");
+}
+@end
+
+//-------------
+
 @interface KramDocument : NSDocument
 
 @end
@@ -309,6 +348,8 @@ - (IBAction)showAboutDialog:(id)sender
     RightArrow = 0x7C,
     DownArrow = 0x7D,
     UpArrow = 0x7E,
+    
+    Escape = 0x35,
 };
 
 /*
@@ -428,7 +469,15 @@ @implementation MyMTKView {
     NSMutableArray<NSButton *> *_buttonArray;
     NSTextField *_hudLabel;
     NSTextField *_hudLabel2;
-
+    
+    // Offer list of files in archives
+    // TODO: move to NSOutlineView since that can show archive folders with content inside
+    IBOutlet NSTableView *_tableView;
+    IBOutlet TableViewController *_tableViewController;
+    
+    IBOutlet NSTableView *_shapesTableView;
+    IBOutlet TableViewController *_shapesTableViewController;
+   
     vector<string> _textSlots;
     ShowSettings *_showSettings;
 
@@ -1756,6 +1805,12 @@ - (void)keyDown:(NSEvent *)theEvent
     }
 }
 
+- (void)hideTables
+{
+    _tableView.hidden = YES;
+    _shapesTableView.hidden = YES;
+}
+
 - (bool)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
 {
     // Some data depends on the texture data (isSigned, isNormal, ..)
@@ -1767,6 +1822,11 @@ - (bool)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
     string text;
 
     switch (keyCode) {
+        // for now hit esc to hide the table views
+        case Key::Escape: {
+            [self hideTables];
+            break;
+        }
         case Key::V: {
             bool isVertical =
                 _buttonStack.orientation == NSUserInterfaceLayoutOrientationVertical;
@@ -1868,6 +1928,7 @@ - (bool)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
 
         case Key::Num6: {
             _showSettings->advanceShapeChannel(isShiftKeyDown);
+            
             text = _showSettings->shapeChannelText();
             isChanged = true;
             break;
@@ -2123,12 +2184,20 @@ - (bool)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
             }
             break;
 
-        // test out different shapes, not offiical support yet
+        // test out different shapes
         case Key::Num8:
             if (_showSettings->meshCount > 1) {
                 _showSettings->advanceMeshNumber(isShiftKeyDown);
                 text = _showSettings->meshNumberText();
                 isChanged = true;
+                
+                // update shapes table
+                [_shapesTableView selectRowIndexes:[NSIndexSet indexSetWithIndex:_showSettings->meshNumber] byExtendingSelection:NO];
+                [_shapesTableView scrollRowToVisible:_showSettings->meshNumber];
+                
+                // show the shapes table
+                _tableView.hidden = YES;
+                _shapesTableView.hidden = NO;
             }
             break;
 
@@ -2283,6 +2352,22 @@ - (BOOL)performDragOperation:(id)sender
     return NO;
 }
 
+- (void)updateShapesTable
+{
+    // no dynamic shapes from archive/folder yet, so init once
+    // TODO: tie to default shapes and those in the archive/folder
+    // may not want single archive with meshes/textures, so support different
+    if (_shapesTableViewController.items.count > 0)
+        return;
+    
+    // setup shapes view too
+    [_shapesTableViewController.items removeAllObjects];
+    for (uint32_t i = 0; i < _showSettings->meshCount; ++i) {
+        [_shapesTableViewController.items addObject: [NSString stringWithUTF8String: _showSettings->meshNumberName(i)]];
+    }
+    [_shapesTableView reloadData];
+}
+
 - (BOOL)loadArchive:(const char *)zipFilename
 {
     _zipMmap.close();
@@ -2308,7 +2393,24 @@ - (BOOL)loadArchive:(const char *)zipFilename
 
     // load the first entry in the archive
     _fileArchiveIndex = 0;
-
+    
+    // copy names into the files view
+    [_tableViewController.items removeAllObjects];
+    for (const auto& entry: _zip.zipEntrys()) {
+        [_tableViewController.items addObject: [NSString stringWithUTF8String: entry.filename]];
+    }
+    [_tableView reloadData];
+    
+    // set selection
+    [_tableView selectRowIndexes:[NSIndexSet indexSetWithIndex:_fileArchiveIndex] byExtendingSelection:NO];
+    [_tableView scrollRowToVisible:_fileArchiveIndex];
+    
+    [self updateShapesTable];
+    
+    // hack to see shape table
+    _tableView.hidden = YES;
+    _shapesTableView.hidden = YES;
+    
     return YES;
 }
 
@@ -2332,6 +2434,14 @@ - (BOOL)advanceTextureFromAchive:(BOOL)increment
 
     _fileArchiveIndex = _fileArchiveIndex % numEntries;
 
+    // set selection
+    [_tableView selectRowIndexes:[NSIndexSet indexSetWithIndex:_fileArchiveIndex] byExtendingSelection:NO];
+    [_tableView scrollRowToVisible:_fileArchiveIndex];
+    
+    // show the files table
+    _tableView.hidden = NO;
+    _shapesTableView.hidden = YES;
+    
     return [self loadTextureFromArchive];
 }
 
@@ -2350,6 +2460,14 @@ - (BOOL)advanceTextureFromFolder:(BOOL)increment
 
     _fileFolderIndex = _fileFolderIndex % numEntries;
 
+    // set selection
+    [_tableView selectRowIndexes:[NSIndexSet indexSetWithIndex:_fileFolderIndex] byExtendingSelection:NO];
+    [_tableView scrollRowToVisible:_fileFolderIndex];
+    
+    // show the files table
+    _tableView.hidden = NO;
+    _shapesTableView.hidden = YES;
+    
     return [self loadTextureFromFolder];
 }
 
@@ -2486,7 +2604,9 @@ - (BOOL)loadTextureFromFolder
 
     // show/hide button
     [self updateUIAfterLoad];
-
+    // no need for file table on single files
+    _tableView.hidden = YES;
+    
     self.needsDisplay = YES;
     return YES;
 }
@@ -2717,6 +2837,21 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
 
                 _fileFolderIndex = index;
             }
+            
+            // TODO: may need to chop off full path here
+            [_tableViewController.items removeAllObjects];
+            for (const auto& file: files) {
+                [_tableViewController.items addObject: [NSString stringWithUTF8String: file.c_str()]];
+            }
+            [_tableView reloadData];
+            
+            [_tableView selectRowIndexes:[NSIndexSet indexSetWithIndex:_fileFolderIndex] byExtendingSelection:NO];
+            [_tableView scrollRowToVisible:_fileFolderIndex];
+            
+            [self updateShapesTable];
+            
+            _tableView.hidden = YES;
+            _shapesTableView.hidden = YES;
         }
 
         // now load image from directory
@@ -2912,7 +3047,9 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
 
     // show/hide button
     [self updateUIAfterLoad];
-
+    // no need for file table on single files
+    _tableView.hidden = YES;
+    
     self.needsDisplay = YES;
     return YES;
 }

From fca4fc55e64b4213527620c234c9c5e5aa865720 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 19 Sep 2021 12:38:19 -0700
Subject: [PATCH 185/901] kramv - shorten the filenames

Especially on folder drops, the full path is too long to display.
---
 kramv/KramViewerMain.mm | 31 ++++++++++++++++++-------------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index e122702a..a073f04d 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -34,6 +34,18 @@
 using namespace kram;
 using namespace NAMESPACE_STL;
 
+// this aliases the existing string, so can't chop extension
+inline const char* toFilenameShort(const char* filename) {
+    const char* filenameShort = strrchr(filename, '/');
+    if (filenameShort == nullptr) {
+        filenameShort = filename;
+    }
+    else {
+        filenameShort += 1;
+    }
+    return filenameShort;
+}
+
 //-------------
 
 @interface MyMTKView : MTKView
@@ -2397,7 +2409,8 @@ - (BOOL)loadArchive:(const char *)zipFilename
     // copy names into the files view
     [_tableViewController.items removeAllObjects];
     for (const auto& entry: _zip.zipEntrys()) {
-        [_tableViewController.items addObject: [NSString stringWithUTF8String: entry.filename]];
+        const char *filenameShort = toFilenameShort(entry.filename);
+        [_tableViewController.items addObject: [NSString stringWithUTF8String: filenameShort]];
     }
     [_tableView reloadData];
     
@@ -2604,8 +2617,6 @@ - (BOOL)loadTextureFromFolder
 
     // show/hide button
     [self updateUIAfterLoad];
-    // no need for file table on single files
-    _tableView.hidden = YES;
     
     self.needsDisplay = YES;
     return YES;
@@ -2838,10 +2849,10 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
                 _fileFolderIndex = index;
             }
             
-            // TODO: may need to chop off full path here
             [_tableViewController.items removeAllObjects];
             for (const auto& file: files) {
-                [_tableViewController.items addObject: [NSString stringWithUTF8String: file.c_str()]];
+                const char *filenameShort = toFilenameShort(file.c_str());
+                [_tableViewController.items addObject: [NSString stringWithUTF8String: filenameShort]];
             }
             [_tableView reloadData];
             
@@ -3010,14 +3021,8 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
     setErrorLogCapture(false);
 
     // set title to filename, chop this to just file+ext, not directory
-    const char *filenameShort = strrchr(filename, '/');
-    if (filenameShort == nullptr) {
-        filenameShort = filename;
-    }
-    else {
-        filenameShort += 1;
-    }
-
+    const char *filenameShort = toFilenameShort(filename);
+    
     // was using subtitle, but that's macOS 11.0 feature.
     string title = "kramv - ";
     title += formatTypeName(_showSettings->originalFormat);

From 33a9402cad974afc92a6868842698fd5727e6145 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 3 Oct 2021 20:24:33 -0700
Subject: [PATCH 186/901] Kram - compressonator bc6h and basis sources in
 libkram

The bc6h encode/decode isn't connected to anything yet.  Just getting in the split out sources with minimal dependency.  Unclear if this encoder has quality or not, but it can fill one hole in kram when connected.  This can encode rgb16f values, but lacks alpha unlike ASTC HDR.  bc6h_codec.cpp has example of how to call encode/decode, but didn't want to bring that into libkram.

Note: TODO: about CMP_HALF conversions.  Tried my best to line up some of these types. But this is all split off and compiling.

Also turn on basis for ktx2 use eventually.  Fix the zstd.h reference.
---
 build2/kram.xcodeproj/project.pbxproj         |   66 +
 build2/kramv.xcodeproj/project.pbxproj        |    2 +
 libkram/compressonator/bc6h/bc6h_decode.cpp   |  763 +++++
 libkram/compressonator/bc6h/bc6h_decode.h     |   44 +
 .../compressonator/bc6h/bc6h_definitions.h    |  572 ++++
 libkram/compressonator/bc6h/bc6h_encode.cpp   | 1473 ++++++++
 libkram/compressonator/bc6h/bc6h_encode.h     |  134 +
 libkram/compressonator/bc6h/bc6h_utils.h      |  118 +
 libkram/compressonator/bc6h/hdr_encode.cpp    | 2954 +++++++++++++++++
 libkram/compressonator/bc6h/hdr_encode.h      |  114 +
 libkram/kram/KramConfig.h                     |   10 +
 libkram/transcoder/basisu_transcoder.cpp      |    8 +-
 12 files changed, 6257 insertions(+), 1 deletion(-)
 create mode 100644 libkram/compressonator/bc6h/bc6h_decode.cpp
 create mode 100644 libkram/compressonator/bc6h/bc6h_decode.h
 create mode 100644 libkram/compressonator/bc6h/bc6h_definitions.h
 create mode 100644 libkram/compressonator/bc6h/bc6h_encode.cpp
 create mode 100644 libkram/compressonator/bc6h/bc6h_encode.h
 create mode 100644 libkram/compressonator/bc6h/bc6h_utils.h
 create mode 100644 libkram/compressonator/bc6h/hdr_encode.cpp
 create mode 100644 libkram/compressonator/bc6h/hdr_encode.h

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index f8bfad79..b10077f4 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -571,6 +571,22 @@
 		706EFF8426D34740001C950E /* red_black_tree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5F26D3473F001C950E /* red_black_tree.cpp */; };
 		706EFF8526D34740001C950E /* fixed_pool.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD6026D3473F001C950E /* fixed_pool.cpp */; };
 		706EFF8626D34740001C950E /* fixed_pool.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD6026D3473F001C950E /* fixed_pool.cpp */; };
+		708A6A962708CE4700BA5410 /* bc6h_decode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 708A6A8B2708CE4700BA5410 /* bc6h_decode.cpp */; };
+		708A6A972708CE4700BA5410 /* bc6h_decode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 708A6A8B2708CE4700BA5410 /* bc6h_decode.cpp */; };
+		708A6A982708CE4700BA5410 /* bc6h_decode.h in Headers */ = {isa = PBXBuildFile; fileRef = 708A6A8C2708CE4700BA5410 /* bc6h_decode.h */; };
+		708A6A992708CE4700BA5410 /* bc6h_decode.h in Headers */ = {isa = PBXBuildFile; fileRef = 708A6A8C2708CE4700BA5410 /* bc6h_decode.h */; };
+		708A6A9A2708CE4700BA5410 /* bc6h_encode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 708A6A8D2708CE4700BA5410 /* bc6h_encode.cpp */; };
+		708A6A9B2708CE4700BA5410 /* bc6h_encode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 708A6A8D2708CE4700BA5410 /* bc6h_encode.cpp */; };
+		708A6A9C2708CE4700BA5410 /* bc6h_encode.h in Headers */ = {isa = PBXBuildFile; fileRef = 708A6A8E2708CE4700BA5410 /* bc6h_encode.h */; };
+		708A6A9D2708CE4700BA5410 /* bc6h_encode.h in Headers */ = {isa = PBXBuildFile; fileRef = 708A6A8E2708CE4700BA5410 /* bc6h_encode.h */; };
+		708A6AA02708CE4700BA5410 /* bc6h_definitions.h in Headers */ = {isa = PBXBuildFile; fileRef = 708A6A902708CE4700BA5410 /* bc6h_definitions.h */; };
+		708A6AA12708CE4700BA5410 /* bc6h_definitions.h in Headers */ = {isa = PBXBuildFile; fileRef = 708A6A902708CE4700BA5410 /* bc6h_definitions.h */; };
+		708A6AA42708CE4700BA5410 /* bc6h_utils.h in Headers */ = {isa = PBXBuildFile; fileRef = 708A6A922708CE4700BA5410 /* bc6h_utils.h */; };
+		708A6AA52708CE4700BA5410 /* bc6h_utils.h in Headers */ = {isa = PBXBuildFile; fileRef = 708A6A922708CE4700BA5410 /* bc6h_utils.h */; };
+		70A7BD3027092A1200DBCCF7 /* hdr_encode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70A7BD2E27092A1200DBCCF7 /* hdr_encode.cpp */; };
+		70A7BD3127092A1200DBCCF7 /* hdr_encode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70A7BD2E27092A1200DBCCF7 /* hdr_encode.cpp */; };
+		70A7BD3227092A1200DBCCF7 /* hdr_encode.h in Headers */ = {isa = PBXBuildFile; fileRef = 70A7BD2F27092A1200DBCCF7 /* hdr_encode.h */; };
+		70A7BD3327092A1200DBCCF7 /* hdr_encode.h in Headers */ = {isa = PBXBuildFile; fileRef = 70A7BD2F27092A1200DBCCF7 /* hdr_encode.h */; };
 /* End PBXBuildFile section */
 
 /* Begin PBXFileReference section */
@@ -874,6 +890,14 @@
 		706EFD5E26D3473F001C950E /* hashtable.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = hashtable.cpp; sourceTree = "<group>"; };
 		706EFD5F26D3473F001C950E /* red_black_tree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = red_black_tree.cpp; sourceTree = "<group>"; };
 		706EFD6026D3473F001C950E /* fixed_pool.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = fixed_pool.cpp; sourceTree = "<group>"; };
+		708A6A8B2708CE4700BA5410 /* bc6h_decode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bc6h_decode.cpp; sourceTree = "<group>"; };
+		708A6A8C2708CE4700BA5410 /* bc6h_decode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bc6h_decode.h; sourceTree = "<group>"; };
+		708A6A8D2708CE4700BA5410 /* bc6h_encode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bc6h_encode.cpp; sourceTree = "<group>"; };
+		708A6A8E2708CE4700BA5410 /* bc6h_encode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bc6h_encode.h; sourceTree = "<group>"; };
+		708A6A902708CE4700BA5410 /* bc6h_definitions.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bc6h_definitions.h; sourceTree = "<group>"; };
+		708A6A922708CE4700BA5410 /* bc6h_utils.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bc6h_utils.h; sourceTree = "<group>"; };
+		70A7BD2E27092A1200DBCCF7 /* hdr_encode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = hdr_encode.cpp; sourceTree = "<group>"; };
+		70A7BD2F27092A1200DBCCF7 /* hdr_encode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = hdr_encode.h; sourceTree = "<group>"; };
 /* End PBXFileReference section */
 
 /* Begin PBXFrameworksBuildPhase section */
@@ -917,6 +941,7 @@
 		706EEDA826D1583E001C950E /* libkram */ = {
 			isa = PBXGroup;
 			children = (
+				708A6A882708CE4700BA5410 /* compressonator */,
 				706EFC3E26D3473F001C950E /* eastl */,
 				706EEDA926D1583E001C950E /* etc2comp */,
 				706EEDC926D1583E001C950E /* bc7enc */,
@@ -1395,6 +1420,29 @@
 			path = source;
 			sourceTree = "<group>";
 		};
+		708A6A882708CE4700BA5410 /* compressonator */ = {
+			isa = PBXGroup;
+			children = (
+				708A6A892708CE4700BA5410 /* bc6h */,
+			);
+			path = compressonator;
+			sourceTree = "<group>";
+		};
+		708A6A892708CE4700BA5410 /* bc6h */ = {
+			isa = PBXGroup;
+			children = (
+				70A7BD2E27092A1200DBCCF7 /* hdr_encode.cpp */,
+				70A7BD2F27092A1200DBCCF7 /* hdr_encode.h */,
+				708A6A8B2708CE4700BA5410 /* bc6h_decode.cpp */,
+				708A6A8C2708CE4700BA5410 /* bc6h_decode.h */,
+				708A6A8D2708CE4700BA5410 /* bc6h_encode.cpp */,
+				708A6A8E2708CE4700BA5410 /* bc6h_encode.h */,
+				708A6A902708CE4700BA5410 /* bc6h_definitions.h */,
+				708A6A922708CE4700BA5410 /* bc6h_utils.h */,
+			);
+			path = bc6h;
+			sourceTree = "<group>";
+		};
 /* End PBXGroup section */
 
 /* Begin PBXHeadersBuildPhase section */
@@ -1447,6 +1495,7 @@
 				706EFDB126D3473F001C950E /* fixed_set.h in Headers */,
 				706EFDAB26D3473F001C950E /* initializer_list.h in Headers */,
 				706EFF1126D34740001C950E /* vector_set.h in Headers */,
+				708A6A9C2708CE4700BA5410 /* bc6h_encode.h in Headers */,
 				706EFF1F26D34740001C950E /* shared_ptr.h in Headers */,
 				706EFF1526D34740001C950E /* string_map.h in Headers */,
 				706EEFDF26D15984001C950E /* EtcBlock4x4Encoding_ETC1.h in Headers */,
@@ -1510,9 +1559,11 @@
 				706EFF0D26D34740001C950E /* optional.h in Headers */,
 				706EFDCB26D3473F001C950E /* type_properties.h in Headers */,
 				706EEFF326D15984001C950E /* basisu_transcoder.h in Headers */,
+				70A7BD3227092A1200DBCCF7 /* hdr_encode.h in Headers */,
 				706EFD6926D3473F001C950E /* eaplatform.h in Headers */,
 				706EFF0126D34740001C950E /* function.h in Headers */,
 				706EFD8126D3473F001C950E /* lru_cache.h in Headers */,
+				708A6AA02708CE4700BA5410 /* bc6h_definitions.h in Headers */,
 				706EFF6F26D34740001C950E /* variant.h in Headers */,
 				706EFDBD26D3473F001C950E /* function_detail.h in Headers */,
 				706EFDA526D3473F001C950E /* algorithm.h in Headers */,
@@ -1591,10 +1642,12 @@
 				706EFEFB26D34740001C950E /* function_help.h in Headers */,
 				706EFD9926D3473F001C950E /* fixed_function.h in Headers */,
 				706EF01426D15985001C950E /* alpha.h in Headers */,
+				708A6A982708CE4700BA5410 /* bc6h_decode.h in Headers */,
 				706EF01526D15985001C950E /* singlecolourfit.h in Headers */,
 				706EFF0526D34740001C950E /* intrusive_hash_map.h in Headers */,
 				706EF01626D15985001C950E /* maths.h in Headers */,
 				706EF01726D15985001C950E /* colourset.h in Headers */,
+				708A6AA42708CE4700BA5410 /* bc6h_utils.h in Headers */,
 				706EF01826D15985001C950E /* colourblock.h in Headers */,
 				706EFDAF26D3473F001C950E /* chrono.h in Headers */,
 				706EF01926D15985001C950E /* rangefit.h in Headers */,
@@ -1657,6 +1710,7 @@
 				706EFDB226D3473F001C950E /* fixed_set.h in Headers */,
 				706EFDAC26D3473F001C950E /* initializer_list.h in Headers */,
 				706EFF1226D34740001C950E /* vector_set.h in Headers */,
+				708A6A9D2708CE4700BA5410 /* bc6h_encode.h in Headers */,
 				706EFF2026D34740001C950E /* shared_ptr.h in Headers */,
 				706EFF1626D34740001C950E /* string_map.h in Headers */,
 				706EF15926D166C5001C950E /* EtcBlock4x4Encoding_ETC1.h in Headers */,
@@ -1720,9 +1774,11 @@
 				706EFF0E26D34740001C950E /* optional.h in Headers */,
 				706EFDCC26D3473F001C950E /* type_properties.h in Headers */,
 				706EF16D26D166C5001C950E /* basisu_transcoder.h in Headers */,
+				70A7BD3327092A1200DBCCF7 /* hdr_encode.h in Headers */,
 				706EFD6A26D3473F001C950E /* eaplatform.h in Headers */,
 				706EFF0226D34740001C950E /* function.h in Headers */,
 				706EFD8226D3473F001C950E /* lru_cache.h in Headers */,
+				708A6AA12708CE4700BA5410 /* bc6h_definitions.h in Headers */,
 				706EFF7026D34740001C950E /* variant.h in Headers */,
 				706EFDBE26D3473F001C950E /* function_detail.h in Headers */,
 				706EFDA626D3473F001C950E /* algorithm.h in Headers */,
@@ -1801,10 +1857,12 @@
 				706EFEFC26D34740001C950E /* function_help.h in Headers */,
 				706EFD9A26D3473F001C950E /* fixed_function.h in Headers */,
 				706EF18E26D166C5001C950E /* alpha.h in Headers */,
+				708A6A992708CE4700BA5410 /* bc6h_decode.h in Headers */,
 				706EF18F26D166C5001C950E /* singlecolourfit.h in Headers */,
 				706EFF0626D34740001C950E /* intrusive_hash_map.h in Headers */,
 				706EF19026D166C5001C950E /* maths.h in Headers */,
 				706EF19126D166C5001C950E /* colourset.h in Headers */,
+				708A6AA52708CE4700BA5410 /* bc6h_utils.h in Headers */,
 				706EF19226D166C5001C950E /* colourblock.h in Headers */,
 				706EFDB026D3473F001C950E /* chrono.h in Headers */,
 				706EF19326D166C5001C950E /* rangefit.h in Headers */,
@@ -1910,12 +1968,15 @@
 				706EEF8C26D1595D001C950E /* bc7decomp.cpp in Sources */,
 				706EEF8D26D1595D001C950E /* bc7enc.cpp in Sources */,
 				706EEF8E26D1595D001C950E /* astcenc_pick_best_endpoint_format.cpp in Sources */,
+				708A6A9A2708CE4700BA5410 /* bc6h_encode.cpp in Sources */,
+				70A7BD3027092A1200DBCCF7 /* hdr_encode.cpp in Sources */,
 				706EEF8F26D1595D001C950E /* astcenc_integer_sequence.cpp in Sources */,
 				706EEF9026D1595D001C950E /* astcenc_compute_variance.cpp in Sources */,
 				706EFF7726D34740001C950E /* string.cpp in Sources */,
 				706EEF9126D1595D001C950E /* astcenc_quantization.cpp in Sources */,
 				706EEF9226D1595D001C950E /* astcenc_color_unquantize.cpp in Sources */,
 				706EEF9326D1595D001C950E /* astcenc_mathlib_softfloat.cpp in Sources */,
+				708A6A962708CE4700BA5410 /* bc6h_decode.cpp in Sources */,
 				706EEF9426D1595D001C950E /* astcenc_weight_quant_xfer_tables.cpp in Sources */,
 				706EEF9526D1595D001C950E /* astcenc_encoding_choice_error.cpp in Sources */,
 				706EEF9626D1595D001C950E /* astcenc_percentile_tables.cpp in Sources */,
@@ -1995,12 +2056,15 @@
 				706EF1A426D166C5001C950E /* bc7decomp.cpp in Sources */,
 				706EF1A526D166C5001C950E /* bc7enc.cpp in Sources */,
 				706EF1A626D166C5001C950E /* astcenc_pick_best_endpoint_format.cpp in Sources */,
+				708A6A9B2708CE4700BA5410 /* bc6h_encode.cpp in Sources */,
+				70A7BD3127092A1200DBCCF7 /* hdr_encode.cpp in Sources */,
 				706EF1A726D166C5001C950E /* astcenc_integer_sequence.cpp in Sources */,
 				706EF1A826D166C5001C950E /* astcenc_compute_variance.cpp in Sources */,
 				706EFF7826D34740001C950E /* string.cpp in Sources */,
 				706EF1A926D166C5001C950E /* astcenc_quantization.cpp in Sources */,
 				706EF1AA26D166C5001C950E /* astcenc_color_unquantize.cpp in Sources */,
 				706EF1AB26D166C5001C950E /* astcenc_mathlib_softfloat.cpp in Sources */,
+				708A6A972708CE4700BA5410 /* bc6h_decode.cpp in Sources */,
 				706EF1AC26D166C5001C950E /* astcenc_weight_quant_xfer_tables.cpp in Sources */,
 				706EF1AD26D166C5001C950E /* astcenc_encoding_choice_error.cpp in Sources */,
 				706EF1AE26D166C5001C950E /* astcenc_percentile_tables.cpp in Sources */,
@@ -2137,6 +2201,7 @@
 					"-DCOMPILE_SQUISH=1",
 					"-DCOMPILE_BCENC=1",
 					"-DCOMPILE_EASTL=1",
+					"-DCOMPILE_BASIS=1",
 					"-include",
 					KramConfig.h,
 				);
@@ -2212,6 +2277,7 @@
 					"-DCOMPILE_SQUISH=1",
 					"-DCOMPILE_BCENC=1",
 					"-DCOMPILE_EASTL=1",
+					"-DCOMPILE_BASIS=1",
 					"-include",
 					KramConfig.h,
 				);
diff --git a/build2/kramv.xcodeproj/project.pbxproj b/build2/kramv.xcodeproj/project.pbxproj
index a995237a..d0b37778 100644
--- a/build2/kramv.xcodeproj/project.pbxproj
+++ b/build2/kramv.xcodeproj/project.pbxproj
@@ -486,6 +486,7 @@
 				MACOSX_DEPLOYMENT_TARGET = 10.15;
 				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
 				MTL_FAST_MATH = YES;
+				MTL_LANGUAGE_REVISION = UseDeploymentTarget;
 				ONLY_ACTIVE_ARCH = YES;
 				OTHER_CFLAGS = (
 					"-DCOMPILE_EASTL=1",
@@ -549,6 +550,7 @@
 				MACOSX_DEPLOYMENT_TARGET = 10.15;
 				MTL_ENABLE_DEBUG_INFO = NO;
 				MTL_FAST_MATH = YES;
+				MTL_LANGUAGE_REVISION = UseDeploymentTarget;
 				OTHER_CFLAGS = (
 					"-DCOMPILE_EASTL=1",
 					"-include",
diff --git a/libkram/compressonator/bc6h/bc6h_decode.cpp b/libkram/compressonator/bc6h/bc6h_decode.cpp
new file mode 100644
index 00000000..13ec78b6
--- /dev/null
+++ b/libkram/compressonator/bc6h/bc6h_decode.cpp
@@ -0,0 +1,763 @@
+//===============================================================================
+// Copyright (c) 2014-2016  Advanced Micro Devices, Inc. All rights reserved.
+//===============================================================================
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+//
+// BC6H_Decode.cpp : Decoder for BC6H
+//
+// Revision
+// 0.1    First implementation
+//
+
+#include "bc6h_decode.h"
+
+#include <assert.h>
+//#include "debug.h"
+//#include "common.h"
+//#include "hdr_encode.h"
+//#include "bc6h_definitions.h"
+#include "bc6h_utils.h"
+#include <bitset>
+#include <stddef.h>
+
+#ifdef TEST_CMP_CORE_DECODER
+#include "cmp_core.h"
+#endif
+
+#ifdef BC6H_DECODE_DEBUG
+int  g_dblock = 0;
+#endif
+
+/*using namespace std;*/
+using namespace HDR_Encode;
+
+float dec_red_out[MAX_SUBSET_SIZE][MAX_DIMENSION_BIG] = {};
+
+//---------------------------------------------------------------------------------------------------------------------------------------
+// Need improve or use alternate implementation: These segments are based on NV code and need to be redone
+//---------------------------------------------------------------------------------------------------------------------------------------
+
+#define    SIGN_EXTEND(w,tbits)    ((((signed(w))&(1<<((tbits)-1)))?((~0)<<(tbits)):0)|(signed(w)))
+#define    MASK(n)    ((1<<(n))-1)
+
+// NV code : used with modifications
+void extract_compressed_endpoints2(AMD_BC6H_Format& bc6h_format) {
+    int i;
+    int t;
+
+    if (bc6h_format.issigned) {
+        if (bc6h_format.istransformed) {
+            for (i=0; i<NCHANNELS; i++) {
+                bc6h_format.E[0].A[i] = SIGN_EXTEND(bc6h_format.EC[0].A[i],bc6h_format.wBits);
+
+                t = SIGN_EXTEND(bc6h_format.EC[0].B[i], bc6h_format.tBits[i]); // C_RED
+                t = (t + bc6h_format.EC[0].A[i]) & MASK(bc6h_format.wBits);
+                bc6h_format.E[0].B[i] = SIGN_EXTEND(t,bc6h_format.wBits);
+
+                t = SIGN_EXTEND(bc6h_format.EC[1].A[i], bc6h_format.tBits[i]); //C_GREEN
+                t = (t + bc6h_format.EC[0].A[i]) & MASK(bc6h_format.wBits);
+                bc6h_format.E[1].A[i] = SIGN_EXTEND(t,bc6h_format.wBits);
+
+                t = SIGN_EXTEND(bc6h_format.EC[1].B[i], bc6h_format.tBits[i]); //C_BLUE
+                t = (t + bc6h_format.EC[0].A[i]) & MASK(bc6h_format.wBits);
+                bc6h_format.E[1].B[i] = SIGN_EXTEND(t,bc6h_format.wBits);
+            }
+        } else {
+            for (i=0; i<NCHANNELS; i++) {
+                bc6h_format.E[0].A[i] = SIGN_EXTEND(bc6h_format.EC[0].A[i],bc6h_format.wBits);
+                bc6h_format.E[0].B[i] = SIGN_EXTEND(bc6h_format.EC[0].B[i],bc6h_format.tBits[i]); //C_RED
+                bc6h_format.E[1].A[i] = SIGN_EXTEND(bc6h_format.EC[1].A[i],bc6h_format.tBits[i]); //C_GREEN
+                bc6h_format.E[1].B[i] = SIGN_EXTEND(bc6h_format.EC[1].B[i],bc6h_format.tBits[i]); //C_BLUE
+            }
+        }
+
+    } else {
+        if (bc6h_format.istransformed) {
+            for (i=0; i<NCHANNELS; i++) {
+                bc6h_format.E[0].A[i] = bc6h_format.EC[0].A[i];
+                t = SIGN_EXTEND(bc6h_format.EC[0].B[i], bc6h_format.tBits[i]); // C_RED
+                bc6h_format.E[0].B[i] = (t + bc6h_format.EC[0].A[i]) & MASK(bc6h_format.wBits);
+
+                t = SIGN_EXTEND(bc6h_format.EC[1].A[i], bc6h_format.tBits[i]); // C_GREEN
+                bc6h_format.E[1].A[i] = (t + bc6h_format.EC[0].A[i]) & MASK(bc6h_format.wBits);
+
+                t = SIGN_EXTEND(bc6h_format.EC[1].B[i], bc6h_format.tBits[i]); //C_BLUE
+                bc6h_format.E[1].B[i] = (t + bc6h_format.EC[0].A[i]) & MASK(bc6h_format.wBits);
+            }
+        } else {
+            for (i=0; i<NCHANNELS; i++) {
+                bc6h_format.E[0].A[i] = bc6h_format.EC[0].A[i];
+                bc6h_format.E[0].B[i] = bc6h_format.EC[0].B[i];
+                bc6h_format.E[1].A[i] = bc6h_format.EC[1].A[i];
+                bc6h_format.E[1].B[i] = bc6h_format.EC[1].B[i];
+            }
+        }
+    }
+
+}
+
+void extract_compressed_endpoints(AMD_BC6H_Format& bc6h_format) {
+    int i;
+    int t;
+
+    if (bc6h_format.issigned) {
+        if (bc6h_format.istransformed) {
+            for (i=0; i<NCHANNELS; i++) {
+                bc6h_format.E[0].A[i] = SIGN_EXTEND(bc6h_format.EC[0].A[i],bc6h_format.wBits);
+
+                t = SIGN_EXTEND(bc6h_format.EC[0].B[i], bc6h_format.tBits[i]); //C_RED
+                t = (t + bc6h_format.EC[0].A[i]) & MASK(bc6h_format.wBits);
+                bc6h_format.E[0].B[i] = SIGN_EXTEND(t,bc6h_format.wBits);
+            }
+        } else {
+            for (i=0; i<NCHANNELS; i++) {
+                bc6h_format.E[0].A[i] = SIGN_EXTEND(bc6h_format.EC[0].A[i],bc6h_format.wBits);
+                bc6h_format.E[0].B[i] = SIGN_EXTEND(bc6h_format.EC[0].B[i],bc6h_format.tBits[i]); //C_RED
+            }
+        }
+
+    } else {
+        if (bc6h_format.istransformed) {
+            for (i=0; i<NCHANNELS; i++) {
+                bc6h_format.E[0].A[i] = bc6h_format.EC[0].A[i];
+                t = SIGN_EXTEND(bc6h_format.EC[0].B[i], bc6h_format.tBits[i]); //C_RED
+                bc6h_format.E[0].B[i] = (t + bc6h_format.EC[0].A[i]) & MASK(bc6h_format.wBits);
+            }
+        } else {
+            for (i=0; i<NCHANNELS; i++) {
+                bc6h_format.E[0].A[i] = bc6h_format.EC[0].A[i];
+                bc6h_format.E[0].B[i] = bc6h_format.EC[0].B[i];
+            }
+        }
+    }
+
+}
+
+// NV code: Used with modifcations
+int unquantize(AMD_BC6H_Format& bc6h_format, int q, int prec) {
+    int unq = 0, s;
+
+    switch (bc6h_format.format) {
+    // modify this case to move the multiplication by 31 after interpolation.
+    // Need to use finish_unquantize.
+
+    // since we have 16 bits available, let's unquantize this to 16 bits unsigned
+    // thus the scale factor is [0-7c00)/[0-10000) = 31/64
+    case UNSIGNED_F16:
+        if (prec >= 15)
+            unq = q;
+        else if (q == 0)
+            unq = 0;
+        else if (q == ((1<<prec)-1))
+            unq = U16MAX;
+        else
+            unq = (q * (U16MAX+1) + (U16MAX+1)/2) >> prec;
+        break;
+
+    // here, let's stick with S16 (no apparent quality benefit from going to S17)
+    // range is (-7c00..7c00)/(-8000..8000) = 31/32
+    case SIGNED_F16:
+        // don't remove this test even though it appears equivalent to the code below
+        // as it isn't -- the code below can overflow for prec = 16
+        if (prec >= 16)
+            unq = q;
+        else {
+            if (q < 0) {
+                s = 1;
+                q = -q;
+            } else s = 0;
+
+            if (q == 0)
+                unq = 0;
+            else if (q >= ((1<<(prec-1))-1))
+                unq = s ? -S16MAX : S16MAX;
+            else {
+                unq = (q * (S16MAX+1) + (S16MAX+1)/2) >> (prec-1);
+                if (s)
+                    unq = -unq;
+            }
+        }
+        break;
+    }
+    return unq;
+}
+
+int lerp(int a, int b, int i, int denom) {
+    assert (denom == 3 || denom == 7 || denom == 15);
+    assert (i >= 0 && i <= denom);
+
+    int shift = 6, *weights = NULL;
+
+    switch(denom) {
+    case 3:
+        denom *= 5;
+        i *= 5;    // fall through to case 15
+            
+/// TODO: Alec fix these missing
+    case 15:
+        weights = g_aWeights4;
+        break;
+    case 7:
+        weights = g_aWeights3;
+        break;
+//
+    default:
+        assert(0);
+    }
+
+#pragma warning(disable:4244)
+    // no need to round these as this is an exact division
+    return (int)(a*weights[denom-i] +b*weights[i]) / float(1 << shift);
+}
+
+int finish_unquantize(AMD_BC6H_Format bc6h_format, int q) {
+    if (bc6h_format.format == UNSIGNED_F16)
+        return (q * 31) >> 6;                                        // scale the magnitude by 31/64
+    else if (bc6h_format.format == SIGNED_F16)
+        return (q < 0) ? -(((-q) * 31) >> 5) : (q * 31) >> 5;        // scale the magnitude by 31/32
+    else
+        return q;
+}
+
+void generate_palette_quantized(int max, AMD_BC6H_Format& bc6h_format, int region) {
+    // scale endpoints
+    int a, b, c;            // really need a IntVec3...
+
+    a = unquantize(bc6h_format, bc6h_format.E[region].A[0], bc6h_format.wBits);
+    b = unquantize(bc6h_format, bc6h_format.E[region].B[0], bc6h_format.wBits);
+
+    // interpolate : This part of code is used for debuging data
+    for (int i = 0; i < max; i++) {
+        c = finish_unquantize(bc6h_format, lerp(a, b, i, max-1));
+        bc6h_format.Palete[region][i].x = c;
+    }
+
+    a = unquantize(bc6h_format, bc6h_format.E[region].A[1], bc6h_format.wBits);
+    b = unquantize(bc6h_format, bc6h_format.E[region].B[1], bc6h_format.wBits);
+
+    // interpolate
+    for (int i = 0; i < max; i++)
+        bc6h_format.Palete[region][i].y = finish_unquantize(bc6h_format, lerp(a, b, i, max-1));
+
+    a = unquantize(bc6h_format,bc6h_format.E[region].A[2], bc6h_format.wBits);
+    b = unquantize(bc6h_format,bc6h_format.E[region].B[2], bc6h_format.wBits);
+
+    // interpolate
+    for (int i = 0; i < max; i++)
+        bc6h_format.Palete[region][i].z = finish_unquantize(bc6h_format, lerp(a, b, i, max-1));
+}
+
+AMD_BC6H_Format extract_format(BYTE in[BC6H_COMPRESSED_BLOCK_SIZE]) {
+    AMD_BC6H_Format bc6h_format;
+    unsigned short decvalue;
+    BYTE iData[COMPRESSED_BLOCK_SIZE];
+    memcpy(iData,in,COMPRESSED_BLOCK_SIZE);
+
+    memset(&bc6h_format,0,sizeof(AMD_BC6H_Format));
+
+    // 2 bit mode has Mode bit:2 = 0 and mode bits:1 = 0 or 1
+    // 5 bit mode has Mode bit:2 = 1
+    if ((in[0]&0x02) > 0) {
+        decvalue = (in[0]&0x1F);    // first five bits
+    } else {
+        decvalue = (in[0]&0x01);    // first two bits
+    }
+
+    BitHeader header(in,16);
+
+    switch (decvalue) {
+    case 0x00:
+        bc6h_format.m_mode          = 1; // 10:5:5:5
+        bc6h_format.wBits           = 10;
+        bc6h_format.tBits[C_RED]    = 5;
+        bc6h_format.tBits[C_GREEN]  = 5;
+        bc6h_format.tBits[C_BLUE]   = 5;
+        bc6h_format.rw = header.getvalue(5,10);             // 10:   rw[9:0]
+        bc6h_format.rx = header.getvalue(35,5);             // 5:    rx[4:0]
+        bc6h_format.ry = header.getvalue(65,5);             // 5:    ry[4:0]
+        bc6h_format.rz = header.getvalue(71,5);             // 5:    rz[4:0]
+        bc6h_format.gw = header.getvalue(15,10);            // 10:   gw[9:0]
+        bc6h_format.gx = header.getvalue(45,5);             // 5:    gx[4:0]
+        bc6h_format.gy = header.getvalue(41,4) |            // 5:    gy[3:0]
+                         (header.getvalue(2,1) << 4);        //       gy[4]
+        bc6h_format.gz = header.getvalue(51,4) |            // 5:    gz[3:0]
+                         (header.getvalue(40,1) << 4);       //       gz[4]
+        bc6h_format.bw = header.getvalue(25,10);            // 10:   bw[9:0]
+        bc6h_format.bx = header.getvalue(55,5);             // 5:    bx[4:0]
+        bc6h_format.by = header.getvalue(61,4) |            // 5:    by[3:0]
+                         (header.getvalue(3,1) << 4);        //       by[4]
+        bc6h_format.bz = header.getvalue(50,1) |            // 5:    bz[0]
+                         (header.getvalue(60,1) << 1) |      //       bz[1]
+                         (header.getvalue(70,1) << 2) |      //       bz[2]
+                         (header.getvalue(76,1) << 3) |      //       bz[3]
+                         (header.getvalue(4,1) << 4);        //       bz[4]
+        break;
+    case 0x01:
+        bc6h_format.m_mode          = 2;    // 7:6:6:6
+        bc6h_format.wBits           = 7;
+        bc6h_format.tBits[C_RED]    = 6;
+        bc6h_format.tBits[C_GREEN]  = 6;
+        bc6h_format.tBits[C_BLUE]   = 6;
+        bc6h_format.rw = header.getvalue(5,7);               // 7:    rw[6:0]
+        bc6h_format.rx = header.getvalue(35,6);              // 6:    rx[5:0]
+        bc6h_format.ry = header.getvalue(65,6);              // 6:    ry[5:0]
+        bc6h_format.rz = header.getvalue(71,6);              // 6:    rz[5:0]
+        bc6h_format.gw = header.getvalue(15,7);              // 7:    gw[6:0]
+        bc6h_format.gx = header.getvalue(45,6);              // 6:    gx[5:0]
+        bc6h_format.gy = header.getvalue(41,4)    |          // 6:    gy[3:0]
+                         (header.getvalue(24,1) << 4) |       //       gy[4]
+                         (header.getvalue(2,1)   << 5);       //       gy[5]
+        bc6h_format.gz = header.getvalue(51,4)    |          // 6:    gz[3:0]
+                         (header.getvalue(3,1) << 4) |        //       gz[4]
+                         (header.getvalue(4,1) << 5);         //       gz[5]
+        bc6h_format.bw = header.getvalue(25,7);              // 7:    bw[6:0]
+        bc6h_format.bx = header.getvalue(55,6);              // 6:    bx[5:0]
+        bc6h_format.by = header.getvalue(61,4)    |          // 6:    by[3:0]
+                         (header.getvalue(14,1) << 4) |       //       by[4]
+                         (header.getvalue(22,1) << 5);        //       by[5]
+        bc6h_format.bz = header.getvalue(12,1)    |          // 6:    bz[0]
+                         (header.getvalue(13,1) << 1) |       //       bz[1]
+                         (header.getvalue(23,1) << 2) |       //       bz[2]
+                         (header.getvalue(32,1) << 3) |       //       bz[3]
+                         (header.getvalue(34,1) << 4) |       //       bz[4]
+                         (header.getvalue(33,1) << 5);        //       bz[5]
+        break;
+    case 0x02:
+        bc6h_format.m_mode          = 3;  // 11:5:4:4
+        bc6h_format.wBits           = 11;
+        bc6h_format.tBits[C_RED]    = 5;
+        bc6h_format.tBits[C_GREEN]  = 4;
+        bc6h_format.tBits[C_BLUE]   = 4;
+        bc6h_format.rw = header.getvalue(5,10)  |            //11:    rw[9:0]
+                         (header.getvalue(40,1) << 10);       //       rw[10]
+        bc6h_format.rx = header.getvalue(35,5);              // 5:    rx[4:0]
+        bc6h_format.ry = header.getvalue(65,5);              // 5:    ry[4:0]
+        bc6h_format.rz = header.getvalue(71,5);              // 5:    rz[4:0]
+        bc6h_format.gw = header.getvalue(15,10) |            //11:    gw[9:0]
+                         (header.getvalue(49,1) << 10);       //       gw[10]
+        bc6h_format.gx = header.getvalue(45,4);              //4:     gx[3:0]
+        bc6h_format.gy = header.getvalue(41,4);              //4:     gy[3:0]
+        bc6h_format.gz = header.getvalue(51,4);              //4:     gz[3:0]
+        bc6h_format.bw = header.getvalue(25,10) |            //11:    bw[9:0]
+                         (header.getvalue(59,1) << 10);       //       bw[10]
+        bc6h_format.bx = header.getvalue(55,4);              //4:     bx[3:0]
+        bc6h_format.by = header.getvalue(61,4);              //4:     by[3:0]
+        bc6h_format.bz = header.getvalue(50,1) |             //4:     bz[0]
+                         (header.getvalue(60,1) << 1) |       //       bz[1]
+                         (header.getvalue(70,1) << 2) |       //       bz[2]
+                         (header.getvalue(76,1) << 3);        //       bz[3]
+        break;
+    case 0x06:
+        bc6h_format.m_mode          = 4;  // 11:4:5:4
+        bc6h_format.wBits           = 11;
+        bc6h_format.tBits[C_RED]    = 4;
+        bc6h_format.tBits[C_GREEN]  = 5;
+        bc6h_format.tBits[C_BLUE]   = 4;
+        bc6h_format.rw = header.getvalue(5,10)  |             //11:   rw[9:0]
+                         (header.getvalue(39,1) << 10);        //      rw[10]
+        bc6h_format.rx = header.getvalue(35,4);               //4:    rx[3:0]
+        bc6h_format.ry = header.getvalue(65,4);               //4:    ry[3:0]
+        bc6h_format.rz = header.getvalue(71,4);               //4:    rz[3:0]
+        bc6h_format.gw = header.getvalue(15,10) |             //11:   gw[9:0]
+                         (header.getvalue(50,1) << 10);        //      gw[10]
+        bc6h_format.gx = header.getvalue(45,5);               //5:    gx[4:0]
+        bc6h_format.gy = header.getvalue(41,4) |              //5:    gy[3:0]
+                         (header.getvalue(75,1) << 4);         //      gy[4]
+        bc6h_format.gz = header.getvalue(51,4) |              //5:    gz[3:0]
+                         (header.getvalue(40,1) << 4);         //      gz[4]
+        bc6h_format.bw = header.getvalue(25,10) |             //11:   bw[9:0]
+                         (header.getvalue(59,1) << 10);        //      bw[10]
+        bc6h_format.bx = header.getvalue(55,4);               //4:    bx[3:0]
+        bc6h_format.by = header.getvalue(61,4);               //4:    by[3:0]
+        bc6h_format.bz = header.getvalue(69,1) |              //4:    bz[0]
+                         (header.getvalue(60,1) << 1) |        //      bz[1]
+                         (header.getvalue(70,1) << 2) |        //      bz[2]
+                         (header.getvalue(76,1) << 3);         //      bz[3]
+        break;
+    case 0x0A:
+        bc6h_format.m_mode          = 5; // 11:4:4:5
+        bc6h_format.wBits           = 11;
+        bc6h_format.tBits[C_RED]    = 4;
+        bc6h_format.tBits[C_GREEN]  = 4;
+        bc6h_format.tBits[C_BLUE]   = 5;
+        bc6h_format.rw = header.getvalue(5,10)  |             //11:   rw[9:0]
+                         (header.getvalue(39,1) << 10);        //      rw[10]
+        bc6h_format.rx = header.getvalue(35,4);               //4:    rx[3:0]
+        bc6h_format.ry = header.getvalue(65,4);               //4:    ry[3:0]
+        bc6h_format.rz = header.getvalue(71,4);               //4:    rz[3:0]
+        bc6h_format.gw = header.getvalue(15,10) |             //11:   gw[9:0]
+                         (header.getvalue(49,1) << 10);        //      gw[10]
+        bc6h_format.gx = header.getvalue(45,4);               //4:    gx[3:0]
+        bc6h_format.gy = header.getvalue(41,4);               //4:    gy[3:0]
+        bc6h_format.gz = header.getvalue(51,4);               //4:    gz[3:0]
+        bc6h_format.bw = header.getvalue(25,10) |             //11:   bw[9:0]
+                         (header.getvalue(60,1) << 10);        //      bw[10]
+        bc6h_format.bx = header.getvalue(55,5);               //5:    bx[4:0]
+        bc6h_format.by = header.getvalue(61,4);               //5:    by[3:0]
+        (header.getvalue(40,1) << 4);         //      by[4]
+        bc6h_format.bz = header.getvalue(50,1) |              //5:    bz[0]
+                         (header.getvalue(69,1) << 1) |        //      bz[1]
+                         (header.getvalue(70,1) << 2) |        //      bz[2]
+                         (header.getvalue(76,1) << 3) |        //      bz[3]
+                         (header.getvalue(75,1) << 4);         //      bz[4]
+        break;
+    case 0x0E:
+        bc6h_format.m_mode          = 6;  // 9:5:5:5
+        bc6h_format.wBits           = 9;
+        bc6h_format.tBits[C_RED]    = 5;
+        bc6h_format.tBits[C_GREEN]  = 5;
+        bc6h_format.tBits[C_BLUE]   = 5;
+        bc6h_format.rw = header.getvalue(5,9);                 //9:   rw[8:0]
+        bc6h_format.gw = header.getvalue(15,9);                //9:   gw[8:0]
+        bc6h_format.bw = header.getvalue(25,9);                //9:   bw[8:0]
+        bc6h_format.rx = header.getvalue(35,5);                //5:   rx[4:0]
+        bc6h_format.gx = header.getvalue(45,5);                //5:   gx[4:0]
+        bc6h_format.bx = header.getvalue(55,5);                //5:   bx[4:0]
+        bc6h_format.ry = header.getvalue(65,5);                //5:   ry[4:0]
+        bc6h_format.gy = header.getvalue(41,4) |               //5:   gy[3:0]
+                         (header.getvalue(24,1) << 4);          //     gy[4]
+        bc6h_format.by = header.getvalue(61,4) |               //5:   by[3:0]
+                         (header.getvalue(14,1) << 4);          //     by[4]
+        bc6h_format.rz = header.getvalue(71,5);                //5:   rz[4:0]
+        bc6h_format.gz = header.getvalue(51,4) |               //5:   gz[3:0]
+                         (header.getvalue(40,1) << 4);          //     gz[4]
+        bc6h_format.bz = header.getvalue(50,1) |               //5:   bz[0]
+                         (header.getvalue(60,1) << 1) |         //     bz[1]
+                         (header.getvalue(70,1) << 2) |         //     bz[2]
+                         (header.getvalue(76,1) << 3) |         //     bz[3]
+                         (header.getvalue(34,1) << 4);          //     bz[4]
+        break;
+    case 0x12:
+        bc6h_format.m_mode          = 7;  // 8:6:5:5
+        bc6h_format.wBits           = 8;
+        bc6h_format.tBits[C_RED]    = 6;
+        bc6h_format.tBits[C_GREEN]  = 5;
+        bc6h_format.tBits[C_BLUE]   = 5;
+        bc6h_format.rw = header.getvalue(5,8);                 //8:    rw[7:0]
+        bc6h_format.gw = header.getvalue(15,8);                //8:    gw[7:0]
+        bc6h_format.bw = header.getvalue(25,8);                //8:    bw[7:0]
+        bc6h_format.rx = header.getvalue(35,6);                //6:    rx[5:0]
+        bc6h_format.gx = header.getvalue(45,5);                //5:    gx[4:0]
+        bc6h_format.bx = header.getvalue(55,5);                //5:    bx[4:0]
+        bc6h_format.ry = header.getvalue(65,6);                //6:    ry[5:0]
+        bc6h_format.gy = header.getvalue(41,4) |               //5:    gy[3:0]
+                         (header.getvalue(24,1) << 4);          //      gy[4]
+        bc6h_format.by = header.getvalue(61,4) |               //5:    by[3:0]
+                         (header.getvalue(14,1) << 4);          //      by[4]
+        bc6h_format.rz = header.getvalue(71,6);                //6:    rz[5:0]
+        bc6h_format.gz = header.getvalue(51,4) |               //5:    gz[3:0]
+                         (header.getvalue(13,1) << 4);          //      gz[4]
+        bc6h_format.bz = header.getvalue(50,1) |               //5:    bz[0]
+                         (header.getvalue(60,1) << 1) |         //      bz[1]
+                         (header.getvalue(23,1) << 2) |         //      bz[2]
+                         (header.getvalue(33,1) << 3) |         //      bz[3]
+                         (header.getvalue(34,1) << 4);          //      bz[4]
+        break;
+    case 0x16:
+        bc6h_format.m_mode          = 8;  // 8:5:6:5
+        bc6h_format.wBits           = 8;
+        bc6h_format.tBits[C_RED]    = 5;
+        bc6h_format.tBits[C_GREEN]  = 6;
+        bc6h_format.tBits[C_BLUE]   = 5;
+        bc6h_format.rw = header.getvalue(5,8);                 //8:    rw[7:0]
+        bc6h_format.gw = header.getvalue(15,8);                //8:    gw[7:0]
+        bc6h_format.bw = header.getvalue(25,8);                //8:    bw[7:0]
+        bc6h_format.rx = header.getvalue(35,5);                //5:    rx[4:0]
+        bc6h_format.gx = header.getvalue(45,6);                //6:    gx[5:0]
+        bc6h_format.bx = header.getvalue(55,5);                //5:    bx[4:0]
+        bc6h_format.ry = header.getvalue(65,5);                //5:    ry[4:0]
+        bc6h_format.gy = header.getvalue(41,4) |               //6:    gy[3:0]
+                         (header.getvalue(24,1) << 4) |         //      gy[4]
+                         (header.getvalue(23,1) << 5);          //      gy[5]
+        bc6h_format.by = header.getvalue(61,4) |               //5:    by[3:0]
+                         (header.getvalue(14,1) << 4);          //      by[4]
+        bc6h_format.rz = header.getvalue(71,5);                //5:    rz[4:0]
+        bc6h_format.gz = header.getvalue(51,4) |               //6:    gz[3:0]
+                         (header.getvalue(40,1) << 4) |         //      gz[4]
+                         (header.getvalue(33,1) << 5);          //      gz[5]
+        bc6h_format.bz = header.getvalue(13,1) |               //5:    bz[0]
+                         (header.getvalue(60,1) << 1) |         //      bz[1]
+                         (header.getvalue(70,1) << 2) |         //      bz[2]
+                         (header.getvalue(76,1) << 3) |         //      bz[3]
+                         (header.getvalue(34,1) << 4);          //      bz[4]
+        break;
+    case 0x1A:
+        bc6h_format.m_mode          = 9;  // 8:5:5:6
+        bc6h_format.wBits           = 8;
+        bc6h_format.tBits[C_RED]    = 5;
+        bc6h_format.tBits[C_GREEN]  = 5;
+        bc6h_format.tBits[C_BLUE]   = 6;
+        bc6h_format.rw = header.getvalue(5,8);                 //8:    rw[7:0]
+        bc6h_format.gw = header.getvalue(15,8);                //8:    gw[7:0]
+        bc6h_format.bw = header.getvalue(25,8);                //8:    bw[7:0]
+        bc6h_format.rx = header.getvalue(35,5);                //5:    rx[4:0]
+        bc6h_format.gx = header.getvalue(45,5);                //5:    gx[4:0]
+        bc6h_format.bx = header.getvalue(55,6);                //6:    bx[5:0]
+        bc6h_format.ry = header.getvalue(65,5);                //5:    ry[4:0]
+        bc6h_format.gy = header.getvalue(41,4) |               //5:    gy[3:0]
+                         (header.getvalue(24,1) << 4);          //      gy[4]
+        bc6h_format.by = header.getvalue(61,4)    |            //6:    by[3:0]
+                         (header.getvalue(14,1) << 4) |         //      by[4]
+                         (header.getvalue(23,1) << 5);          //      by[5]
+        bc6h_format.rz = header.getvalue(71,5);                //5:    rz[4:0]
+        bc6h_format.gz = header.getvalue(51,4) |               //5:    gz[3:0]
+                         (header.getvalue(40,1) << 4);          //      gz[4]
+        bc6h_format.bz = header.getvalue(50,1) |               //6:    bz[0]
+                         (header.getvalue(13,1) << 1) |         //      bz[1]
+                         (header.getvalue(70,1) << 2) |         //      bz[2]
+                         (header.getvalue(76,1) << 3) |         //      bz[3]
+                         (header.getvalue(34,1) << 4) |         //      bz[4]
+                         (header.getvalue(33,1) << 5);          //      bz[5]
+        break;
+    case 0x1E:
+        bc6h_format.m_mode          = 10;  // 6:6:6:6
+        bc6h_format.istransformed   = FALSE;
+        bc6h_format.wBits           = 6;
+        bc6h_format.tBits[C_RED]    = 6;
+        bc6h_format.tBits[C_GREEN]  = 6;
+        bc6h_format.tBits[C_BLUE]   = 6;
+        bc6h_format.rw = header.getvalue(5,6);                 //6:    rw[5:0]
+        bc6h_format.gw = header.getvalue(15,6);                //6:    gw[5:0]
+        bc6h_format.bw = header.getvalue(25,6);                //6:    bw[5:0]
+        bc6h_format.rx = header.getvalue(35,6);                //6:    rx[5:0]
+        bc6h_format.gx = header.getvalue(45,6);                //6:    gx[5:0]
+        bc6h_format.bx = header.getvalue(55,6);                //6:    bx[5:0]
+        bc6h_format.ry = header.getvalue(65,6);                //6:    ry[5:0]
+        bc6h_format.gy = header.getvalue(41,4) |               //6:    gy[3:0]
+                         (header.getvalue(24,1) << 4) |         //      gy[4]
+                         (header.getvalue(21,1) << 5);          //      gy[5]
+        bc6h_format.by = header.getvalue(61,4)    |            //6:    by[3:0]
+                         (header.getvalue(14,1) << 4) |         //      by[4]
+                         (header.getvalue(22,1) << 5);          //      by[5]
+        bc6h_format.rz = header.getvalue(71,6);                //6:    rz[5:0]
+        bc6h_format.gz = header.getvalue(51,4) |               //6:    gz[3:0]
+                         (header.getvalue(11,1) << 4) |         //      gz[4]
+                         (header.getvalue(31,1) << 5);          //      gz[5]
+        bc6h_format.bz = header.getvalue(12,1) |               //6:    bz[0]
+                         (header.getvalue(13,1) << 1) |         //      bz[1]
+                         (header.getvalue(23,1) << 2) |         //      bz[2]
+                         (header.getvalue(32,1) << 3) |         //      bz[3]
+                         (header.getvalue(34,1) << 4) |         //      bz[4]
+                         (header.getvalue(33,1) << 5);          //      bz[5]
+        break;
+
+    // Single region modes
+    case 0x03:
+        bc6h_format.m_mode            = 11;  // 10:10
+        bc6h_format.wBits             = 10;
+        bc6h_format.tBits[C_RED]      = 10;
+        bc6h_format.tBits[C_GREEN]    = 10;
+        bc6h_format.tBits[C_BLUE]     = 10;
+        bc6h_format.rw = header.getvalue(5,10);             // 10: rw[9:0]
+        bc6h_format.gw = header.getvalue(15,10);            // 10: gw[9:0]
+        bc6h_format.bw = header.getvalue(25,10);            // 10: bw[9:0]
+        bc6h_format.rx = header.getvalue(35,10);            // 10: rx[9:0]
+        bc6h_format.gx = header.getvalue(45,10);            // 10: gx[9:0]
+        bc6h_format.bx = header.getvalue(55,10);            // 10: bx[9:0]
+        break;
+    case 0x07:
+        bc6h_format.m_mode              = 12;  // 11:9
+        bc6h_format.wBits               = 11;
+        bc6h_format.tBits[C_RED]        = 9;
+        bc6h_format.tBits[C_GREEN]      = 9;
+        bc6h_format.tBits[C_BLUE]       = 9;
+        bc6h_format.rw = header.getvalue(5,10) |               // 10:   rw[9:0]
+                         (header.getvalue(44,1) << 10);         //       rw[10]
+        bc6h_format.gw = header.getvalue(15,10) |              // 10:   gw[9:0]
+                         (header.getvalue(54,1) << 10);         //       gw[10]
+        bc6h_format.bw = header.getvalue(25,10) |              // 10:   bw[9:0]
+                         (header.getvalue(64,1) << 10);         //       bw[10]
+        bc6h_format.rx = header.getvalue(35,9);                // 9:    rx[8:0]
+        bc6h_format.gx = header.getvalue(45,9);                // 9:    gx[8:0]
+        bc6h_format.bx = header.getvalue(55,9);                // 9:    bx[8:0]
+        break;
+    case 0x0B:
+        bc6h_format.m_mode              = 13;  // 12:8
+        bc6h_format.wBits               = 12;
+        bc6h_format.tBits[C_RED]        = 8;
+        bc6h_format.tBits[C_GREEN]      = 8;
+        bc6h_format.tBits[C_BLUE]       = 8;
+        bc6h_format.rw = header.getvalue(5, 10) |               // 12:   rw[9:0]
+                         (header.getvalue(43, 1) << 11) |       //       rw[11]
+                         (header.getvalue(44, 1) << 10);        //       rw[10]
+        bc6h_format.gw = header.getvalue(15, 10) |              // 12:   gw[9:0]
+                         (header.getvalue(53, 1) << 11) |       //       gw[11]
+                         (header.getvalue(54, 1) << 10);        //       gw[10]
+        bc6h_format.bw = header.getvalue(25,10) |               // 12:   bw[9:0]
+                         (header.getvalue(63, 1) << 11) |       //       bw[11]
+                         (header.getvalue(64,1) << 10);         //       bw[10]
+        bc6h_format.rx = header.getvalue(35,8);                 //  8:   rx[7:0]
+        bc6h_format.gx = header.getvalue(45,8);                 //  8:   gx[7:0]
+        bc6h_format.bx = header.getvalue(55,8);                 //  8:   bx[7:0]
+        break;
+    case 0x0F:
+        bc6h_format.m_mode          = 14;  // 16:4
+        bc6h_format.wBits           = 16;
+        bc6h_format.tBits[C_RED]    = 4;
+        bc6h_format.tBits[C_GREEN]  = 4;
+        bc6h_format.tBits[C_BLUE]   = 4;
+        bc6h_format.rw = header.getvalue(5,10) |                // 16:   rw[9:0]
+                         (header.getvalue(39, 1) << 15) |       //       rw[15]
+                         (header.getvalue(40, 1) << 14) |       //       rw[14]
+                         (header.getvalue(41, 1) << 13) |       //       rw[13]
+                         (header.getvalue(42, 1) << 12) |       //       rw[12]
+                         (header.getvalue(43, 1) << 11) |       //       rw[11]
+                         (header.getvalue(44, 1) << 10);        //       rw[10]
+        bc6h_format.gw = header.getvalue(15,10) |               // 16:   gw[9:0]
+                         (header.getvalue(49, 1) << 15) |       //       gw[15]
+                         (header.getvalue(50, 1) << 14) |       //       gw[14]
+                         (header.getvalue(51, 1) << 13) |       //       gw[13]
+                         (header.getvalue(52, 1) << 12) |       //       gw[12]
+                         (header.getvalue(53, 1) << 11) |       //       gw[11]
+                         (header.getvalue(54, 1) << 10);        //       gw[10]
+        bc6h_format.bw = header.getvalue(25,10) |               // 16:   bw[9:0]
+                         (header.getvalue(59, 1) << 15) |       //       bw[15]
+                         (header.getvalue(60, 1) << 14) |       //       bw[14]
+                         (header.getvalue(61, 1) << 13) |       //       bw[13]
+                         (header.getvalue(62, 1) << 12) |       //       bw[12]
+                         (header.getvalue(63, 1) << 11) |       //       bw[11]
+                         (header.getvalue(64, 1) << 10);        //       bw[10]
+        bc6h_format.rx = header.getvalue(35,4);                 // 4:    rx[3:0]
+        bc6h_format.gx = header.getvalue(45,4);                 // 4:    gx[3:0]
+        bc6h_format.bx = header.getvalue(55,4);                 // 4:    bx[3:0]
+        break;
+    default:
+        bc6h_format.m_mode = 0;
+        return bc6h_format;
+    }
+
+    // Each format in the mode table can be uniquely identified by the mode bits.
+    // The first ten modes are used for two-region tiles, and the mode bit field
+    // can be either two or five bits long. These blocks also have fields for
+    // the compressed color endpoints (72 or 75 bits), the partition (5 bits),
+    // and the partition indices (46 bits).
+
+    if (bc6h_format.m_mode <= 10) {
+        bc6h_format.region = BC6_TWO;
+        // Get the shape index bits 77 to 81
+        bc6h_format.d_shape_index = (unsigned short) header.getvalue(77,5);
+        bc6h_format.istransformed = (bc6h_format.m_mode < 10) ? TRUE : FALSE;
+    } else {
+        bc6h_format.region           = BC6_ONE;
+        bc6h_format.d_shape_index    = 0;
+        bc6h_format.istransformed    = (bc6h_format.m_mode > 11) ? TRUE : FALSE;
+    }
+
+    // Save the points in a form easy to compute with
+    bc6h_format.EC[0].A[0] = bc6h_format.rw;
+    bc6h_format.EC[0].B[0] = bc6h_format.rx;
+    bc6h_format.EC[1].A[0] = bc6h_format.ry;
+    bc6h_format.EC[1].B[0] = bc6h_format.rz;
+    bc6h_format.EC[0].A[1] = bc6h_format.gw;
+    bc6h_format.EC[0].B[1] = bc6h_format.gx;
+    bc6h_format.EC[1].A[1] = bc6h_format.gy;
+    bc6h_format.EC[1].B[1] = bc6h_format.gz;
+    bc6h_format.EC[0].A[2] = bc6h_format.bw;
+    bc6h_format.EC[0].B[2] = bc6h_format.bx;
+    bc6h_format.EC[1].A[2] = bc6h_format.by;
+    bc6h_format.EC[1].B[2] = bc6h_format.bz;
+
+    if (bc6h_format.region    == BC6_ONE) {
+        int startbits = ONE_REGION_INDEX_OFFSET;
+        bc6h_format.indices16[0] = (std::uint8_t) header.getvalue(startbits,3);
+        startbits+=3;
+        for (int i=1; i<16; i++) {
+            bc6h_format.indices16[i] = (std::uint8_t) header.getvalue(startbits,4);
+            startbits+=4;
+        }
+    } else {
+        int startbit = TWO_REGION_INDEX_OFFSET,
+            nbits = 2;
+        bc6h_format.indices16[0 ] = (std::uint8_t) header.getvalue(startbit,2);
+        for (int i= 1; i<16; i++) {
+            startbit += nbits; // offset start bit for next index using prior nbits used
+            nbits    = g_indexfixups[bc6h_format.d_shape_index] == i?2:3; // get new number of bit to save index with
+            bc6h_format.indices16[i] = (std::uint8_t) header.getvalue(startbit,nbits);
+        }
+
+    }
+
+    return bc6h_format;
+}
+
+//---------------------------------------------------------------------------------------------------------------------------------------
+
+void BC6HBlockDecoder::DecompressBlock( float out[MAX_SUBSET_SIZE][MAX_DIMENSION_BIG],BYTE in[BC6H_COMPRESSED_BLOCK_SIZE]) {
+
+    AMD_BC6H_Format bc6h_format = extract_format(in);
+    if (!bc6signed)
+        bc6h_format.format = UNSIGNED_F16;
+    else
+        bc6h_format.format = SIGNED_F16;
+
+    if(bc6h_format.region == BC6_ONE) {
+        extract_compressed_endpoints(bc6h_format);
+        generate_palette_quantized(16,bc6h_format,0);
+    } else { //mode.type == BC6_TWO
+        extract_compressed_endpoints2(bc6h_format);
+        for (int r=0; r<2; r++) {
+            generate_palette_quantized(8,bc6h_format,r);
+        }
+    }
+
+
+    BC6H_Vec3 data;
+    int indexPos=0;
+    CMP_HALF rgb[3];
+
+    // Note first 32 BC6H_PARTIONS is shared with BC6H
+    // Partitioning is always arranged such that index 0 is always in subset 0 of BC6H_PARTIONS array
+    // Partition order goes from top-left to bottom-right, moving left to right and then top to bottom.
+    for (int block_row = 0; block_row < 4; block_row++)
+        for (int block_col = 0; block_col < 4; block_col++) {
+            // Need to check region logic
+            // gets the region (0 or 1) in the partition set
+            //int region = bc6h_format.region == BC6_ONE?0:REGION(block_col,block_row,bc6h_format.d_shape_index);
+            // for a one region partitions : its always return 0 so there is room for performance improvement
+            // by seperating the condition into another looped call.
+            //int region = bc6h_format.region == BC6_ONE?0:BC6H_PARTITIONS[1][bc6h_format.d_shape_index][indexPos];
+            int region = bc6h_format.region == BC6_ONE?0:PARTITIONS[1][bc6h_format.d_shape_index][indexPos];
+
+            // Index is validated as ok
+            int paleteIndex  = bc6h_format.indices[block_row][block_col];
+
+            // this result is validated ok for region = BC6_ONE , BC6_TWO To be determined
+            data = bc6h_format.Palete[region][paleteIndex];
+
+            // Int to Half
+            rgb[0] = (CMP_HALF) data.x;
+            rgb[1] = (CMP_HALF) data.y;
+            rgb[2] = (CMP_HALF) data.z;
+
+            out[indexPos][0]  = (float) rgb[0];    // r;
+            out[indexPos][1]  = (float) rgb[1]; // g;
+            out[indexPos][2]  = (float) rgb[2];    // b;
+            out[indexPos][3]  = 1.0f;
+
+            indexPos++;
+        }
+
+#ifdef BC6H_DECODE_DEBUG
+    g_dblock++;
+#endif
+
+}
+
diff --git a/libkram/compressonator/bc6h/bc6h_decode.h b/libkram/compressonator/bc6h/bc6h_decode.h
new file mode 100644
index 00000000..c7983797
--- /dev/null
+++ b/libkram/compressonator/bc6h/bc6h_decode.h
@@ -0,0 +1,44 @@
+//===============================================================================
+// Copyright (c) 2014-2016  Advanced Micro Devices, Inc. All rights reserved.
+//===============================================================================
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+//////////////////////////////////////////////////////////////////////////////////
+
+#pragma once
+//#ifndef _BC6H_DECODE_H_
+//#define _BC6H_DECODE_H_
+
+#include <float.h>
+//#include "bc6h_library.h"
+#include "bc6h_definitions.h"
+
+class BC6HBlockDecoder {
+  public:
+    BC6HBlockDecoder() {};
+    ~BC6HBlockDecoder() {};
+    void DecompressBlock(float out[MAX_SUBSET_SIZE][MAX_DIMENSION_BIG],BYTE in[BC6H_COMPRESSED_BLOCK_SIZE]);
+
+    bool bc6signed = false; // this is suppiled by user for compression for SIGNED_F16 or UNSIGNED_F16 or obtained during decompression
+
+};
+
+
+//#endif
diff --git a/libkram/compressonator/bc6h/bc6h_definitions.h b/libkram/compressonator/bc6h/bc6h_definitions.h
new file mode 100644
index 00000000..c3ec17f1
--- /dev/null
+++ b/libkram/compressonator/bc6h/bc6h_definitions.h
@@ -0,0 +1,572 @@
+//===============================================================================
+// Copyright (c) 2014-2016  Advanced Micro Devices, Inc. All rights reserved.
+//===============================================================================
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+//////////////////////////////////////////////////////////////////////////////////
+
+#pragma once
+
+//#ifndef _BC6H_DEFINITIONS_H_
+//#define _BC6H_DEFINITIONS_H_
+
+#include "hdr_encode.h"
+
+
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+
+using CMP_HALF = uint16_t; // ?
+
+typedef std::uint8_t BYTE;
+typedef std::uint32_t DWORD;
+
+#define TRUE 1
+#define FALSE 0
+
+#define BC6H_NUM_BLOCK_TYPES            14
+#define BC6H_MAX_PARTITIONS             32
+#define BC6H_MAX_SUBSETS                2
+#define BC6H_MAX_SUBSET_SIZE            16
+#define BC6H_COMPRESSED_BLOCK_SIZE      16
+#define BC6H_MAX_PARTITIONS_TABLE       (1+64+64)
+#define BC6H_MAX_INDEX_BITS             4        // Maximum number of index bits: This should be set to 3
+#define NCHANNELS                       3
+#define NINDICES                        16
+#define MAXENDPOINTS                    2
+#define F16NEGPREC_LIMIT_VAL            -2048.0f //f16 negative precision limit value
+#define F16MAX                          0x7bff
+#define F16S_MASK                       0x8000   // f16 sign mask
+#define F16EM_MASK                      0x7fff   // f16 exp & mantissa mask
+#define U16MAX                          0xffff
+#define S16MIN                          (-0x8000)
+#define S16MAX                          0x7fff
+#define INT16_MASK                      0xffff
+#define BC6H_WEIGHT_MAX                 64       // scale from 0 to 64
+#define BC6H_WEIGHT_SHIFT               6        // Using shift operator (Multipler or divider by 64)
+#define BC6H_WEIGHT_ROUND               32       // take care of round up errors
+#define BC6H_OFFSET_MAG                 31       // This offsets the colors from the actual line by a factor mag/32 or mag/64
+#define MAX_TWOREGION_MODES             10
+#define MAX_BC6H_MODES                  14
+#define MAX_BC6H_PARTITIONS             32
+#define HIGH_INDEX_BIT                  4
+#define ONE_REGION_INDEX_OFFSET         65        // bit location to start saving color index values for single region shape
+#define TWO_REGION_INDEX_OFFSET         82        // bit location to start saving color index values for two region shapes
+#define MIN_MODE_FOR_ONE_REGION         11        // Two regions shapes use modes 1..9 and single use 11..14
+#define R_0(ep)                         (ep)[0][0][i]
+#define R_1(ep)                         (ep)[0][1][i]
+#define R_2(ep)                         (ep)[1][0][i]
+#define R_3(ep)                         (ep)[1][1][i]
+#define MASK(n)                         ((1<<(n))-1)
+#define SIGN_EXTEND(w,tbits)            ((((signed(w))&(1<<((tbits)-1)))?((~0)<<(tbits)):0)|(signed(w)))
+#define REGION(x,y,si)                  shapes[((si)&3)*4+((si)>>2)*64+(x)+(y)*16]
+#define BC6H_NREGIONS                   2                 // shapes for two regions
+
+// this might get optimized away
+inline bool cmp_isnan(float v) { return v != v; }
+
+typedef unsigned int uint;
+typedef enum _BC6H_COMPONENT {
+    BC6H_COMP_RED   = 0,
+    BC6H_COMP_GREEN = 1,
+    BC6H_COMP_BLUE =  2,
+    BC6H_COMP_ALPHA = 3
+} BC6H_COMPONENT;
+
+enum {
+    BC6_ONE = 0,
+    BC6_TWO
+};
+
+enum {
+    C_RED = 0,
+    C_GREEN,
+    C_BLUE
+};
+
+enum {
+    UNSIGNED_F16 = 1,
+    SIGNED_F16     = 2
+};
+
+enum EField {
+    NA, // N/A
+    M,  // Mode
+    D,  // Shape
+    RW,
+    RX,
+    RY,
+    RZ,
+    GW,
+    GX,
+    GY,
+    GZ,
+    BW,
+    BX,
+    BY,
+    BZ,
+};
+
+struct EndPointPair {
+    int A;
+    int B;
+};
+
+
+struct BC6H_Vec3 {
+    int x,y,z;
+};
+
+struct BC6H_Vec3f {
+    float x, y, z;
+};
+
+struct UShortVec3 {
+    unsigned short x,y,z;
+};
+
+
+struct ModePartitions {
+    int nbits;              // Number of bits
+    int prec[3];            // precission of the Qunatized RGB endpoints
+    int transformed;        // if 0, deltas are unsigned and no transform; otherwise, signed and transformed
+    int modebits;           // number of mode bits
+    int IndexPrec;          // Index Precision
+    int mode;               // Mode value to save
+    int lowestPrec;         // Step size of each precesion incriment
+};
+
+static ModePartitions ModePartition[MAX_BC6H_MODES +1] = {
+    {0,    {0,0,0},        0,    0,    0,    0,     0  },// Mode = Invaild
+    
+    // Two region Partition
+    {10,   {5,5,5},        1,    2,    3,    0x00,  31 },  // Mode = 1
+    {7,    {6,6,6},        1,    2,    3,    0x01,  248},  // Mode = 2
+    {11,   {5,4,4},        1,    5,    3,    0x02,  15 },  // Mode = 3
+    {11,   {4,5,4},        1,    5,    3,    0x06,  15 },  // Mode = 4
+    {11,   {4,4,5},        1,    5,    3,    0x0a,  15 },  // Mode = 5
+    {9,    {5,5,5},        1,    5,    3,    0x0e,  62 },  // Mode = 6
+    {8,    {6,5,5},        1,    5,    3,    0x12,  124},  // Mode = 7
+    {8,    {5,6,5},        1,    5,    3,    0x16,  124},  // Mode = 8
+    {8,    {5,5,6},        1,    5,    3,    0x1a,  124},  // Mode = 9
+    {6,    {6,6,6},        0,    5,    3,    0x1e,  496},  // Mode = 10
+            
+    // One region Partition
+    {10,   {10,10,10},     0,    5,    4,    0x03,  31 },  // Mode = 11
+    {11,   {9,9,9},        1,    5,    4,    0x07,  15 },  // Mode = 12
+    {12,   {8,8,8},        1,    5,    4,    0x0b,  7  },  // Mode = 13
+    {16,   {4,4,4},        1,    5,    4,    0x0f,  1  }  // Mode = 14
+};
+
+
+//  Shape, Fixup Index locations for region 1's (subset = 2)
+// locations accroding to BC6H documentation on a 4x4 block
+//        15,15,15, 15,
+//        15,15,15, 15,
+//        15,15,15, 15,
+//        15,15,15, 15,
+//        15, 2, 8,  2,
+//         2, 8, 8, 15,
+//         2, 8, 2,  2,
+//         8, 8, 2,  2,
+//
+// The Region2FixUps are for our index[subset = 2][16][3] locations
+// indexed by shape region 2
+static const int g_Region2FixUp[32] = {
+    7, 3, 11, 7,
+    3, 11, 9, 5,
+    2, 12, 7, 3,
+    11, 7, 11, 3,
+    7, 1, 0, 1,
+    0, 1, 0, 7,
+    0, 1, 1, 0,
+    4, 4, 1, 0,
+};
+
+// Indexed by all shape regions
+// Partition Set Fixups for region 1 note region 0 is always at 0
+// that means normally we use 3 bits to define an index value
+// if its at the fix up location then its one bit less
+static const int g_indexfixups[32] = {
+    15,15,15,15,
+    15,15,15,15,
+    15,15,15,15,
+    15,15,15,15,
+    15, 2, 8, 2,
+    2, 8, 8,15,
+    2, 8, 2, 2,
+    8, 8, 2, 2,
+};
+
+
+//------------------------------------------------------------------------------
+
+
+/*============================================================================================================
+MODE FORMAT TABLE:
+
+Mode    Partition Indices    Partition    Color Endpoints                        Mode Bits            Dec value
+1        46 bits                5 bits        75 bits (10.555, 10.555, 10.555)   2 bits (00)           0
+2        46 bits                5 bits        75 bits (7666, 7666, 7666)         2 bits (01)           1
+3        46 bits                5 bits        72 bits (11.555, 11.444, 11.444)   5 bits (00010)        2
+4        46 bits                5 bits        72 bits (11.444, 11.555, 11.444)   5 bits (00110)        6
+5        46 bits                5 bits        72 bits (11.444, 11.444, 11.555)   5 bits (01010)        10
+6        46 bits                5 bits        72 bits (9555, 9555, 9555)         5 bits (01110)        14
+7        46 bits                5 bits        72 bits (8666, 8555, 8555)         5 bits (10010)        18
+8        46 bits                5 bits        72 bits (8555, 8666, 8555)         5 bits (10110)        22
+9        46 bits                5 bits        72 bits (8555, 8555, 8666)         5 bits (11010)        26
+10       46 bits                5 bits        72 bits (6666, 6666, 6666)         5 bits (11110)        30
+11       63 bits                0 bits        60 bits (10.10, 10.10, 10.10)      5 bits (00011)        3
+12       63 bits                0 bits        60 bits (11.9, 11.9, 11.9)         5 bits (00111)        7
+13       63 bits                0 bits        60 bits (12.8, 12.8, 12.8)         5 bits (01011)        11
+14       63 bits                0 bits        60 bits (16.4, 16.4, 16.4)         5 bits (01111)        15
+============================================================================================================*/
+
+struct END_Points {
+    int A[NCHANNELS];
+    int B[NCHANNELS];
+};
+
+struct AMD_BC6H_Format {
+    unsigned short region;             // one or two
+    unsigned short m_mode;             // m
+    int d_shape_index;                 // d
+    int rw;                            // endpt[0].A[0]
+    int rx;                            // endpt[0].B[0]
+    int ry;                            // endpt[1].A[0]
+    int rz;                            // endpt[1].B[0]
+    int gw;                            // endpt[0].A[1]
+    int gx;                            // endpt[0].B[1]
+    int gy;                            // endpt[1].A[1]
+    int gz;                            // endpt[1].B[1]
+    int bw;                            // endpt[0].A[2]
+    int bx;                            // endpt[0].B[2]
+    int by;                            // endpt[1].A[2]
+    int bz;                            // endpt[1].B[2]
+
+    union {
+        std::uint8_t indices[4][4];            // Indices data after header block
+        std::uint8_t indices16[16];
+    };
+
+    float         din[MAX_SUBSET_SIZE][MAX_DIMENSION_BIG];   // Original data input
+    END_Points    EC[MAXENDPOINTS];    // compressed endpoints expressed as endpt[0].A[] and endpt[1].B[]
+    END_Points    E[MAXENDPOINTS];     // decompressed endpoints
+    bool          issigned;            // Format is 16 bit signed floating point
+    bool          istransformed;       // region two: all modes = true except mode=10
+    short         wBits;               // number of bits for the root endpoint
+    short         tBits[NCHANNELS];    // number of bits used for the transformed endpoints
+    int           format;              // floating point format are we using for decompression
+    BC6H_Vec3      Palete[2][16];
+    BC6H_Vec3f     Paletef[2][16];
+
+    int           index;               // for debugging
+    float         fEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG];
+    float         cur_best_fEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG];
+    int           shape_indices[MAX_SUBSETS][MAX_SUBSET_SIZE];
+    int           cur_best_shape_indices[MAX_SUBSETS][MAX_SUBSET_SIZE];
+    int           entryCount[MAX_SUBSETS];
+    int           cur_best_entryCount[MAX_SUBSETS];
+    float         partition[MAX_SUBSETS][MAX_SUBSET_SIZE][MAX_DIMENSION_BIG];
+    float         cur_best_partition[MAX_SUBSETS][MAX_SUBSET_SIZE][MAX_DIMENSION_BIG];
+    bool          optimized;           // were end points optimized during final encoding
+};
+
+//extern unsigned short    g_BC6H_FORMAT;
+
+#ifndef DEFINE_BC6H_PARTITIONS
+extern uint8_t   BC6_PARTITIONS[MAX_SUBSETS][MAX_PARTITIONS][MAX_SUBSET_SIZE];
+
+#else
+uint8_t   BC6_PARTITIONS[MAX_SUBSETS][MAX_PARTITIONS][MAX_SUBSET_SIZE] = {
+    // Single subset partitions for both BC6H abd BC7 up to 32 elements.
+    {
+        {
+            0, 0, 0, 0,
+            0, 0, 0, 0,
+            0, 0, 0, 0,
+            0, 0, 0, 0,
+        },
+    },
+
+    {
+        {
+            // 0
+            0,0,1,1,
+            0,0,1,1,
+            0,0,1,1,
+            0,0,1,1
+        },
+
+        {
+            // 1
+            0,0,0,1,
+            0,0,0,1,
+            0,0,0,1,
+            0,0,0,1
+        },
+
+        {
+            // 2
+            0,1,1,1,
+            0,1,1,1,
+            0,1,1,1,
+            0,1,1,1
+        },
+
+        {
+            // 3
+            0,0,0,1,
+            0,0,1,1,
+            0,0,1,1,
+            0,1,1,1
+        },
+
+        {
+            // 4
+            0,0,0,0,
+            0,0,0,1,
+            0,0,0,1,
+            0,0,1,1
+        },
+
+        {
+            // 5
+            0,0,1,1,
+            0,1,1,1,
+            0,1,1,1,
+            1,1,1,1
+        },
+
+        {
+            // 6
+            0,0,0,1,
+            0,0,1,1,
+            0,1,1,1,
+            1,1,1,1
+        },
+
+        {
+            // 7
+            0,0,0,0,
+            0,0,0,1,
+            0,0,1,1,
+            0,1,1,1
+        },
+
+        {
+            // 8
+            0,0,0,0,
+            0,0,0,0,
+            0,0,0,1,
+            0,0,1,1
+        },
+
+        {
+            // 9
+            0,0,1,1,
+            0,1,1,1,
+            1,1,1,1,
+            1,1,1,1
+        },
+
+        {
+            // 10
+            0,0,0,0,
+            0,0,0,1,
+            0,1,1,1,
+            1,1,1,1
+        },
+
+        {
+            // 11
+            0,0,0,0,
+            0,0,0,0,
+            0,0,0,1,
+            0,1,1,1
+        },
+
+        {
+            // 12
+            0,0,0,1,
+            0,1,1,1,
+            1,1,1,1,
+            1,1,1,1
+        },
+
+        {
+            // 13
+            0,0,0,0,
+            0,0,0,0,
+            1,1,1,1,
+            1,1,1,1
+        },
+
+        {
+            // 14
+            0,0,0,0,
+            1,1,1,1,
+            1,1,1,1,
+            1,1,1,1
+        },
+
+        {
+            // 15
+            0,0,0,0,
+            0,0,0,0,
+            0,0,0,0,
+            1,1,1,1
+        },
+
+        {
+            // 16
+            0,0,0,0,
+            1,0,0,0,
+            1,1,1,0,
+            1,1,1,1
+        },
+
+        {
+            // 17
+            0,1,1,1,
+            0,0,0,1,
+            0,0,0,0,
+            0,0,0,0
+        },
+
+        {
+            // 18
+            0,0,0,0,
+            0,0,0,0,
+            1,0,0,0,
+            1,1,1,0
+        },
+
+        {
+            // 19
+            0,1,1,1,
+            0,0,1,1,
+            0,0,0,1,
+            0,0,0,0
+        },
+
+        {
+            // 20
+            0,0,1,1,
+            0,0,0,1,
+            0,0,0,0,
+            0,0,0,0
+        },
+
+        {
+            // 21
+            0,0,0,0,
+            1,0,0,0,
+            1,1,0,0,
+            1,1,1,0
+        },
+
+        {
+            // 22
+            0,0,0,0,
+            0,0,0,0,
+            1,0,0,0,
+            1,1,0,0
+        },
+
+        {
+            // 23
+            0,1,1,1,
+            0,0,1,1,
+            0,0,1,1,
+            0,0,0,1
+        },
+
+        {
+            // 24
+            0,0,1,1,
+            0,0,0,1,
+            0,0,0,1,
+            0,0,0,0
+        },
+
+        {
+            // 25
+            0,0,0,0,
+            1,0,0,0,
+            1,0,0,0,
+            1,1,0,0
+        },
+
+        {
+            // 26
+            0,1,1,0,
+            0,1,1,0,
+            0,1,1,0,
+            0,1,1,0
+        },
+
+        {
+            // 27
+            0,0,1,1,
+            0,1,1,0,
+            0,1,1,0,
+            1,1,0,0
+        },
+
+        {
+            // 28
+            0,0,0,1,
+            0,1,1,1,
+            1,1,1,0,
+            1,0,0,0
+        },
+
+        {
+            // 29
+            0,0,0,0,
+            1,1,1,1,
+            1,1,1,1,
+            0,0,0,0
+        },
+
+        {
+            // 30
+            0,1,1,1,
+            0,0,0,1,
+            1,0,0,0,
+            1,1,1,0
+        },
+
+        {
+            // 31
+            0,0,1,1,
+            1,0,0,1,
+            1,0,0,1,
+            1,1,0,0
+        },
+    }
+};
+#endif
+
+#define PARTITIONS  BC6_PARTITIONS
+
+//#endif
diff --git a/libkram/compressonator/bc6h/bc6h_encode.cpp b/libkram/compressonator/bc6h/bc6h_encode.cpp
new file mode 100644
index 00000000..ecd23713
--- /dev/null
+++ b/libkram/compressonator/bc6h/bc6h_encode.cpp
@@ -0,0 +1,1473 @@
+//===============================================================================
+// Copyright (c) 2014-2016  Advanced Micro Devices, Inc. All rights reserved.
+//===============================================================================
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+// BC6H_Encode.cpp : Encoder for BC6H
+//
+// Revision
+// 0.1    First implementation
+// 0.2    Removed unused code and disabeled optimization
+//
+
+// So partition table is defined out of bc6h_definitions.h
+#define DEFINE_BC6H_PARTITIONS 1
+
+#include "bc6h_encode.h"
+
+#include <assert.h>
+//#include "debug.h"
+#include <float.h>
+#include <stdio.h>
+#include <math.h>
+//#include "common.h"
+#include "bc6h_utils.h"
+
+//#include "hdr_encode.h"
+using namespace HDR_Encode;
+
+
+#define USE_SHAKERHD  // reserved for future use!
+
+BYTE Cmp_Red_Block[16] = { 0xc2,0x7b,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xe0,0x03,0x00,0x00,0x00,0x00,0x00 };
+
+extern int  g_block;
+extern FILE *g_fp;
+int gl_block = 0;
+
+#ifdef DEBUG_PATTERNS
+// random pixel noise range
+float BC6HBlockEncoder::DoPixelNoise() {
+    float ret = (rand() % RANDOM_NOISE_LEVEL)/ 100.0;
+    return (ret);
+}
+#endif
+
+/*
+Reserved Feature MONOSHAPE_PATTERNS
+int BC6HBlockEncoder::FindPattern();
+*/
+
+void SaveDataBlock(AMD_BC6H_Format bc6h_format, BYTE out[BC6H_COMPRESSED_BLOCK_SIZE]) {
+    BitHeader header(NULL, BC6H_COMPRESSED_BLOCK_SIZE);
+
+    // Save the RGB end point values
+    switch (bc6h_format.m_mode) {
+    case 1: //0x00
+        header.setvalue(0, 2, 0x00);
+        header.setvalue(2, 1, bc6h_format.gy, 4);        //        gy[4]
+        header.setvalue(3, 1, bc6h_format.by, 4);        //        by[4]
+        header.setvalue(4, 1, bc6h_format.bz, 4);        //        bz[4]
+        header.setvalue(5, 10, bc6h_format.rw);          // 10:    rw[9:0]
+        header.setvalue(15, 10, bc6h_format.gw);          // 10:    gw[9:0]
+        header.setvalue(25, 10, bc6h_format.bw);          // 10:    bw[9:0]
+        header.setvalue(35, 5, bc6h_format.rx);          // 5:     rx[4:0]
+        header.setvalue(40, 1, bc6h_format.gz, 4);        //        gz[4]
+        header.setvalue(41, 4, bc6h_format.gy);          // 5:     gy[3:0]
+        header.setvalue(45, 5, bc6h_format.gx);          // 5:     gx[4:0]
+        header.setvalue(50, 1, bc6h_format.bz);          // 5:     bz[0]
+        header.setvalue(51, 4, bc6h_format.gz);          // 5:     gz[3:0]
+        header.setvalue(55, 5, bc6h_format.bx);          // 5:     bx[4:0]
+        header.setvalue(60, 1, bc6h_format.bz, 1);        //        bz[1]
+        header.setvalue(61, 4, bc6h_format.by);          // 5:     by[3:0]
+        header.setvalue(65, 5, bc6h_format.ry);          // 5:     ry[4:0]
+        header.setvalue(70, 1, bc6h_format.bz, 2);        //        bz[2]
+        header.setvalue(71, 5, bc6h_format.rz);          // 5:     rz[4:0]
+        header.setvalue(76, 1, bc6h_format.bz, 3);        //        bz[3]
+        break;
+    case 2: // 0x01
+        header.setvalue(0, 2, 0x01);
+        header.setvalue(2, 1, bc6h_format.gy, 5);        //        gy[5]
+        header.setvalue(3, 1, bc6h_format.gz, 4);        //        gz[4]
+        header.setvalue(4, 1, bc6h_format.gz, 5);        //        gz[5]
+        header.setvalue(5, 7, bc6h_format.rw);          //        rw[6:0]
+        header.setvalue(12, 1, bc6h_format.bz);          //        bz[0]
+        header.setvalue(13, 1, bc6h_format.bz, 1);        //        bz[1]
+        header.setvalue(14, 1, bc6h_format.by, 4);        //        by[4]
+        header.setvalue(15, 7, bc6h_format.gw);          //        gw[6:0]
+        header.setvalue(22, 1, bc6h_format.by, 5);        //        by[5]
+        header.setvalue(23, 1, bc6h_format.bz, 2);        //        bz[2]
+        header.setvalue(24, 1, bc6h_format.gy, 4);        //        gy[4]
+        header.setvalue(25, 7, bc6h_format.bw);          // 7:     bw[6:0]
+        header.setvalue(32, 1, bc6h_format.bz, 3);        //        bz[3]
+        header.setvalue(33, 1, bc6h_format.bz, 5);        //        bz[5]
+        header.setvalue(34, 1, bc6h_format.bz, 4);        //        bz[4]
+        header.setvalue(35, 6, bc6h_format.rx);          // 6:     rx[5:0]
+        header.setvalue(41, 4, bc6h_format.gy);          // 6:     gy[3:0]
+        header.setvalue(45, 6, bc6h_format.gx);          // 6:     gx[5:0]
+        header.setvalue(51, 4, bc6h_format.gz);          // 6:     gz[3:0]
+        header.setvalue(55, 6, bc6h_format.bx);          // 6:     bx[5:0]
+        header.setvalue(61, 4, bc6h_format.by);          // 6:     by[3:0]
+        header.setvalue(65, 6, bc6h_format.ry);          // 6:     ry[5:0]
+        header.setvalue(71, 6, bc6h_format.rz);          // 6:     rz[5:0]
+        break;
+    case 3: // 0x02
+        header.setvalue(0, 5, 0x02);
+        header.setvalue(5, 10, bc6h_format.rw);          // 11:    rw[9:0]
+        header.setvalue(15, 10, bc6h_format.gw);          // 11:    gw[9:0]
+        header.setvalue(25, 10, bc6h_format.bw);          // 11:    bw[9:0]
+        header.setvalue(35, 5, bc6h_format.rx);          // 5:     rx[4:0]
+        header.setvalue(40, 1, bc6h_format.rw, 10);       //        rw[10]
+        header.setvalue(41, 4, bc6h_format.gy);          // 4:     gy[3:0]
+        header.setvalue(45, 4, bc6h_format.gx);          // 4:     gx[3:0]
+        header.setvalue(49, 1, bc6h_format.gw, 10);       //        gw[10]
+        header.setvalue(50, 1, bc6h_format.bz);          // 4:     bz[0]
+        header.setvalue(51, 4, bc6h_format.gz);          // 4:     gz[3:0]
+        header.setvalue(55, 4, bc6h_format.bx);          // 4:     bx[3:0]
+        header.setvalue(59, 1, bc6h_format.bw, 10);       //        bw[10]
+        header.setvalue(60, 1, bc6h_format.bz, 1);        //        bz[1]
+        header.setvalue(61, 4, bc6h_format.by);          // 4:     by[3:0]
+        header.setvalue(65, 5, bc6h_format.ry);          // 5:     ry[4:0]
+        header.setvalue(70, 1, bc6h_format.bz, 2);        //        bz[2]
+        header.setvalue(71, 5, bc6h_format.rz);          // 5:     rz[4:0]
+        header.setvalue(76, 1, bc6h_format.bz, 3);        //        bz[3]
+        break;
+    case 4: // 0x06
+        header.setvalue(0, 5, 0x06);
+        header.setvalue(5, 10, bc6h_format.rw);          // 11:    rw[9:0]
+        header.setvalue(15, 10, bc6h_format.gw);          // 11:    gw[9:0]
+        header.setvalue(25, 10, bc6h_format.bw);          // 11:    bw[9:0]
+        header.setvalue(35, 4, bc6h_format.rx);          //        rx[3:0]
+        header.setvalue(39, 1, bc6h_format.rw, 10);       //        rw[10]
+        header.setvalue(40, 1, bc6h_format.gz, 4);        //        gz[4]
+        header.setvalue(41, 4, bc6h_format.gy);          // 5:     gy[3:0]
+        header.setvalue(45, 5, bc6h_format.gx);          //        gx[4:0]
+        header.setvalue(50, 1, bc6h_format.gw, 10);       // 5:     gw[10]
+        header.setvalue(51, 4, bc6h_format.gz);          // 5:     gz[3:0]
+        header.setvalue(55, 4, bc6h_format.bx);          // 4:     bx[3:0]
+        header.setvalue(59, 1, bc6h_format.bw, 10);       //        bw[10]
+        header.setvalue(60, 1, bc6h_format.bz, 1);        //        bz[1]
+        header.setvalue(61, 4, bc6h_format.by);          // 4:     by[3:0]
+        header.setvalue(65, 4, bc6h_format.ry);          // 4:     ry[3:0]
+        header.setvalue(69, 1, bc6h_format.bz);          // 4:     bz[0]
+        header.setvalue(70, 1, bc6h_format.bz, 2);        //        bz[2]
+        header.setvalue(71, 4, bc6h_format.rz);          // 4:     rz[3:0]
+        header.setvalue(75, 1, bc6h_format.gy, 4);        //        gy[4]
+        header.setvalue(76, 1, bc6h_format.bz, 3);        //        bz[3]
+        break;
+    case 5: // 0x0A
+        header.setvalue(0, 5, 0x0A);
+        header.setvalue(5, 10, bc6h_format.rw);           // 11:   rw[9:0]
+        header.setvalue(15, 10, bc6h_format.gw);           // 11:   gw[9:0]
+        header.setvalue(25, 10, bc6h_format.bw);           // 11:   bw[9:0]
+        header.setvalue(35, 4, bc6h_format.rx);           // 4:    rx[3:0]
+        header.setvalue(39, 1, bc6h_format.rw, 10);        //       rw[10]
+        header.setvalue(40, 1, bc6h_format.by, 4);         //       by[4]
+        header.setvalue(41, 4, bc6h_format.gy);           // 4:    gy[3:0]
+        header.setvalue(45, 4, bc6h_format.gx);           // 4:    gx[3:0]
+        header.setvalue(49, 1, bc6h_format.gw, 10);        //       gw[10]
+        header.setvalue(50, 1, bc6h_format.bz);           // 5:    bz[0]
+        header.setvalue(51, 4, bc6h_format.gz);           // 4:    gz[3:0]
+        header.setvalue(55, 5, bc6h_format.bx);           // 5:    bx[4:0]
+        header.setvalue(60, 1, bc6h_format.bw, 10);        //       bw[10]
+        header.setvalue(61, 4, bc6h_format.by);           // 5:    by[3:0]
+        header.setvalue(65, 4, bc6h_format.ry);           // 4:    ry[3:0]
+        header.setvalue(69, 1, bc6h_format.bz, 1);         //       bz[1]
+        header.setvalue(70, 1, bc6h_format.bz, 2);         //       bz[2]
+        header.setvalue(71, 4, bc6h_format.rz);           // 4:    rz[3:0]
+        header.setvalue(75, 1, bc6h_format.bz, 4);         //       bz[4]
+        header.setvalue(76, 1, bc6h_format.bz, 3);         //       bz[3]
+        break;
+    case 6: // 0x0E
+        header.setvalue(0, 5, 0x0E);
+        header.setvalue(5, 9, bc6h_format.rw);           // 9:    rw[8:0]
+        header.setvalue(14, 1, bc6h_format.by, 4);         //       by[4]
+        header.setvalue(15, 9, bc6h_format.gw);           // 9:    gw[8:0]
+        header.setvalue(24, 1, bc6h_format.gy, 4);         //       gy[4]
+        header.setvalue(25, 9, bc6h_format.bw);           // 9:    bw[8:0]
+        header.setvalue(34, 1, bc6h_format.bz, 4);         //       bz[4]
+        header.setvalue(35, 5, bc6h_format.rx);           // 5:    rx[4:0]
+        header.setvalue(40, 1, bc6h_format.gz, 4);         //       gz[4]
+        header.setvalue(41, 4, bc6h_format.gy);           // 5:    gy[3:0]
+        header.setvalue(45, 5, bc6h_format.gx);           // 5:    gx[4:0]
+        header.setvalue(50, 1, bc6h_format.bz);           // 5:    bz[0]
+        header.setvalue(51, 4, bc6h_format.gz);           // 5:    gz[3:0]
+        header.setvalue(55, 5, bc6h_format.bx);           // 5:    bx[4:0]
+        header.setvalue(60, 1, bc6h_format.bz, 1);         //       bz[1]
+        header.setvalue(61, 4, bc6h_format.by);           // 5:    by[3:0]
+        header.setvalue(65, 5, bc6h_format.ry);           // 5:    ry[4:0]
+        header.setvalue(70, 1, bc6h_format.bz, 2);         //       bz[2]
+        header.setvalue(71, 5, bc6h_format.rz);           // 5:    rz[4:0]
+        header.setvalue(76, 1, bc6h_format.bz, 3);         //       bz[3]
+        break;
+    case 7: // 0x12
+        header.setvalue(0, 5, 0x12);
+        header.setvalue(5, 8, bc6h_format.rw);           // 8:    rw[7:0]
+        header.setvalue(13, 1, bc6h_format.gz, 4);         //       gz[4]
+        header.setvalue(14, 1, bc6h_format.by, 4);         //       by[4]
+        header.setvalue(15, 8, bc6h_format.gw);           // 8:    gw[7:0]
+        header.setvalue(23, 1, bc6h_format.bz, 2);         //       bz[2]
+        header.setvalue(24, 1, bc6h_format.gy, 4);         //       gy[4]
+        header.setvalue(25, 8, bc6h_format.bw);           // 8:    bw[7:0]
+        header.setvalue(33, 1, bc6h_format.bz, 3);         //       bz[3]
+        header.setvalue(34, 1, bc6h_format.bz, 4);         //       bz[4]
+        header.setvalue(35, 6, bc6h_format.rx);           // 6:    rx[5:0]
+        header.setvalue(41, 4, bc6h_format.gy);           // 5:    gy[3:0]
+        header.setvalue(45, 5, bc6h_format.gx);           // 5:    gx[4:0]
+        header.setvalue(50, 1, bc6h_format.bz);           // 5:    bz[0]
+        header.setvalue(51, 4, bc6h_format.gz);           // 5:    gz[3:0]
+        header.setvalue(55, 5, bc6h_format.bx);           // 5:    bx[4:0]
+        header.setvalue(60, 1, bc6h_format.bz, 1);         //       bz[1]
+        header.setvalue(61, 4, bc6h_format.by);           // 5:    by[3:0]
+        header.setvalue(65, 6, bc6h_format.ry);           // 6:    ry[5:0]
+        header.setvalue(71, 6, bc6h_format.rz);           // 6:    rz[5:0]
+        break;
+    case 8: // 0x16
+        header.setvalue(0, 5, 0x16);
+        header.setvalue(5, 8, bc6h_format.rw);            // 8:   rw[7:0]
+        header.setvalue(13, 1, bc6h_format.bz);            // 5:   bz[0]
+        header.setvalue(14, 1, bc6h_format.by, 4);          //      by[4]
+        header.setvalue(15, 8, bc6h_format.gw);            // 8:   gw[7:0]
+        header.setvalue(23, 1, bc6h_format.gy, 5);          //      gy[5]
+        header.setvalue(24, 1, bc6h_format.gy, 4);          //      gy[4]
+        header.setvalue(25, 8, bc6h_format.bw);            // 8:   bw[7:0]
+        header.setvalue(33, 1, bc6h_format.gz, 5);          //      gz[5]
+        header.setvalue(34, 1, bc6h_format.bz, 4);          //      bz[4]
+        header.setvalue(35, 5, bc6h_format.rx);            // 5:   rx[4:0]
+        header.setvalue(40, 1, bc6h_format.gz, 4);          //      gz[4]
+        header.setvalue(41, 4, bc6h_format.gy);            // 6:   gy[3:0]
+        header.setvalue(45, 6, bc6h_format.gx);            // 6:   gx[5:0]
+        header.setvalue(51, 4, bc6h_format.gz);            // 6:   gz[3:0]
+        header.setvalue(55, 5, bc6h_format.bx);            // 5:   bx[4:0]
+        header.setvalue(60, 1, bc6h_format.bz, 1);          //      bz[1]
+        header.setvalue(61, 4, bc6h_format.by);            // 5:   by[3:0]
+        header.setvalue(65, 5, bc6h_format.ry);            // 5:   ry[4:0]
+        header.setvalue(70, 1, bc6h_format.bz, 2);          //      bz[2]
+        header.setvalue(71, 5, bc6h_format.rz);            // 5:   rz[4:0]
+        header.setvalue(76, 1, bc6h_format.bz, 3);          //      bz[3]
+        break;
+    case 9: // 0x1A
+        header.setvalue(0, 5, 0x1A);
+        header.setvalue(5, 8, bc6h_format.rw);            // 8:   rw[7:0]
+        header.setvalue(13, 1, bc6h_format.bz, 1);          //      bz[1]
+        header.setvalue(14, 1, bc6h_format.by, 4);          //      by[4]
+        header.setvalue(15, 8, bc6h_format.gw);            // 8:   gw[7:0]
+        header.setvalue(23, 1, bc6h_format.by, 5);          //      by[5]
+        header.setvalue(24, 1, bc6h_format.gy, 4);          //      gy[4]
+        header.setvalue(25, 8, bc6h_format.bw);            // 8:   bw[7:0]
+        header.setvalue(33, 1, bc6h_format.bz, 5);          //      bz[5]
+        header.setvalue(34, 1, bc6h_format.bz, 4);          //      bz[4]
+        header.setvalue(35, 5, bc6h_format.rx);            // 5:   rx[4:0]
+        header.setvalue(40, 1, bc6h_format.gz, 4);          //      gz[4]
+        header.setvalue(41, 4, bc6h_format.gy);            // 5:   gy[3:0]
+        header.setvalue(45, 5, bc6h_format.gx);            // 5:   gx[4:0]
+        header.setvalue(50, 1, bc6h_format.bz);            // 6:   bz[0]
+        header.setvalue(51, 4, bc6h_format.gz);            // 5:   gz[3:0]
+        header.setvalue(55, 6, bc6h_format.bx);            // 6:   bx[5:0]
+        header.setvalue(61, 4, bc6h_format.by);            // 6:   by[3:0]
+        header.setvalue(65, 5, bc6h_format.ry);            // 5:   ry[4:0]
+        header.setvalue(70, 1, bc6h_format.bz, 2);          //      bz[2]
+        header.setvalue(71, 5, bc6h_format.rz);            // 5:   rz[4:0]
+        header.setvalue(76, 1, bc6h_format.bz, 3);          //      bz[3]
+        break;
+    case 10: // 0x1E
+        header.setvalue(0, 5, 0x1E);
+        header.setvalue(5, 6, bc6h_format.rw);            // 6:   rw[5:0]
+        header.setvalue(11, 1, bc6h_format.gz, 4);          //      gz[4]
+        header.setvalue(12, 1, bc6h_format.bz);            // 6:   bz[0]
+        header.setvalue(13, 1, bc6h_format.bz, 1);          //      bz[1]
+        header.setvalue(14, 1, bc6h_format.by, 4);          //      by[4]
+        header.setvalue(15, 6, bc6h_format.gw);            // 6:   gw[5:0]
+        header.setvalue(21, 1, bc6h_format.gy, 5);          //      gy[5]
+        header.setvalue(22, 1, bc6h_format.by, 5);          //      by[5]
+        header.setvalue(23, 1, bc6h_format.bz, 2);          //      bz[2]
+        header.setvalue(24, 1, bc6h_format.gy, 4);          //      gy[4]
+        header.setvalue(25, 6, bc6h_format.bw);            // 6:   bw[5:0]
+        header.setvalue(31, 1, bc6h_format.gz, 5);          //      gz[5]
+        header.setvalue(32, 1, bc6h_format.bz, 3);          //      bz[3]
+        header.setvalue(33, 1, bc6h_format.bz, 5);          //      bz[5]
+        header.setvalue(34, 1, bc6h_format.bz, 4);          //      bz[4]
+        header.setvalue(35, 6, bc6h_format.rx);            // 6:   rx[5:0]
+        header.setvalue(41, 4, bc6h_format.gy);            // 6:   gy[3:0]
+        header.setvalue(45, 6, bc6h_format.gx);            // 6:   gx[5:0]
+        header.setvalue(51, 4, bc6h_format.gz);            // 6:   gz[3:0]
+        header.setvalue(55, 6, bc6h_format.bx);            // 6:   bx[5:0]
+        header.setvalue(61, 4, bc6h_format.by);            // 6:   by[3:0]
+        header.setvalue(65, 6, bc6h_format.ry);            // 6:   ry[5:0]
+        header.setvalue(71, 6, bc6h_format.rz);            // 6:   rz[5:0]
+        break;
+
+    // Single regions Modes
+    case 11: // 0x03
+        header.setvalue(0, 5, 0x03);
+        header.setvalue(5, 10, bc6h_format.rw);            // 10:   rw[9:0]
+        header.setvalue(15, 10, bc6h_format.gw);            // 10:   gw[9:0]
+        header.setvalue(25, 10, bc6h_format.bw);            // 10:   bw[9:0]
+        header.setvalue(35, 10, bc6h_format.rx);            // 10:   rx[9:0]
+        header.setvalue(45, 10, bc6h_format.gx);            // 10:   gx[9:0]
+        header.setvalue(55, 10, bc6h_format.bx);            // 10:   bx[9:0]
+        break;
+    case 12: // 0x07
+        header.setvalue(0, 5, 0x07);
+        header.setvalue(5, 10, bc6h_format.rw);            // 11:   rw[9:0]
+        header.setvalue(15, 10, bc6h_format.gw);            // 11:   gw[9:0]
+        header.setvalue(25, 10, bc6h_format.bw);            // 11:   bw[9:0]
+        header.setvalue(35, 9, bc6h_format.rx);            // 9:    rx[8:0]
+        header.setvalue(44, 1, bc6h_format.rw, 10);         //       rw[10]
+        header.setvalue(45, 9, bc6h_format.gx);            // 9:    gx[8:0]
+        header.setvalue(54, 1, bc6h_format.gw, 10);         //       gw[10]
+        header.setvalue(55, 9, bc6h_format.bx);            // 9:    bx[8:0]
+        header.setvalue(64, 1, bc6h_format.bw, 10);         //       bw[10]
+        break;
+    case 13: // 0x0B
+        header.setvalue(0, 5, 0x0B);
+        header.setvalue(5, 10, bc6h_format.rw);            // 12:   rw[9:0]
+        header.setvalue(15, 10, bc6h_format.gw);            // 12:   gw[9:0]
+        header.setvalue(25, 10, bc6h_format.bw);            // 12:   bw[9:0]
+        header.setvalue(35, 8, bc6h_format.rx);            // 8:    rx[7:0]
+        header.setvalue(43, 1, bc6h_format.rw, 11);         //       rw[11]
+        header.setvalue(44, 1, bc6h_format.rw, 10);         //       rw[10]
+        header.setvalue(45, 8, bc6h_format.gx);            // 8:    gx[7:0]
+        header.setvalue(53, 1, bc6h_format.gw, 11);         //       gw[11]
+        header.setvalue(54, 1, bc6h_format.gw, 10);         //       gw[10]
+        header.setvalue(55, 8, bc6h_format.bx);            // 8:    bx[7:0]
+        header.setvalue(63, 1, bc6h_format.bw, 11);         //       bw[11]
+        header.setvalue(64, 1, bc6h_format.bw, 10);         //       bw[10]
+        break;
+    case 14: // 0x0F
+        header.setvalue(0, 5, 0x0F);
+        header.setvalue(5, 10, bc6h_format.rw);            // 16:   rw[9:0]
+        header.setvalue(15, 10, bc6h_format.gw);            // 16:   gw[9:0]
+        header.setvalue(25, 10, bc6h_format.bw);            // 16:   bw[9:0]
+        header.setvalue(35, 4, bc6h_format.rx);            //  4:   rx[3:0]
+        header.setvalue(39, 6, bc6h_format.rw, 10);         //       rw[15:10]
+        header.setvalue(45, 4, bc6h_format.gx);            //  4:   gx[3:0]
+        header.setvalue(49, 6, bc6h_format.gw, 10);         //       gw[15:10]
+        header.setvalue(55, 4, bc6h_format.bx);            //  4:   bx[3:0]
+        header.setvalue(59, 6, bc6h_format.bw, 10);         //       bw[15:10]
+        break;
+    default: // Need to indicate error!
+        return;
+    }
+
+    // Each format in the mode table can be uniquely identified by the mode bits.
+    // The first ten modes are used for two-region tiles, and the mode bit field
+    // can be either two or five bits long. These blocks also have fields for
+    // the compressed color endpoints (72 or 75 bits), the partition (5 bits),
+    // and the partition indices (46 bits).
+
+    if (bc6h_format.m_mode >= MIN_MODE_FOR_ONE_REGION) {
+        int startbit = ONE_REGION_INDEX_OFFSET;
+        header.setvalue(startbit, 3, bc6h_format.indices16[0]);
+        startbit += 3;
+        for (int i = 1; i<16; i++) {
+            header.setvalue(startbit, 4, bc6h_format.indices16[i]);
+            startbit += 4;
+        }
+    } else {
+        header.setvalue(77, 5, bc6h_format.d_shape_index);            // Shape Index
+        int startbit = TWO_REGION_INDEX_OFFSET,
+            nbits = 2;
+        header.setvalue(startbit, nbits, bc6h_format.indices16[0]);
+        for (int i = 1; i<16; i++) {
+            startbit += nbits; // offset start bit for next index using prior nbits used
+            nbits = g_indexfixups[bc6h_format.d_shape_index] == i ? 2 : 3; // get new number of bit to save index with
+            header.setvalue(startbit, nbits, bc6h_format.indices16[i]);
+        }
+    }
+
+    // save to output buffer our new bit values
+    // this can be optimized if header is part of bc6h_format struct
+    header.transferbits(out, 16);
+
+}
+
+
+// decompress endpoints
+static void decompress_endpts(const int in[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], int out[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], const int mode, bool issigned) {
+
+    if (ModePartition[mode].transformed) {
+        for (int i=0; i<3; ++i) {
+            R_0(out) = issigned ? SIGN_EXTEND(R_0(in),ModePartition[mode].nbits) : R_0(in);
+            int t;
+            t = SIGN_EXTEND(R_1(in), ModePartition[mode].prec[i]);
+            t = (t + R_0(in)) & MASK(ModePartition[mode].nbits);
+            R_1(out) = issigned ? SIGN_EXTEND(t,ModePartition[mode].nbits) : t;
+
+            t = SIGN_EXTEND(R_2(in), ModePartition[mode].prec[i]);
+            t = (t + R_0(in)) & MASK(ModePartition[mode].nbits);
+            R_2(out) = issigned ? SIGN_EXTEND(t,ModePartition[mode].nbits) : t;
+
+            t = SIGN_EXTEND(R_3(in), ModePartition[mode].prec[i]);
+            t = (t + R_0(in)) & MASK(ModePartition[mode].nbits);
+            R_3(out) = issigned ? SIGN_EXTEND(t,ModePartition[mode].nbits) : t;
+        }
+    } else {
+        for (int i=0; i<3; ++i) {
+            R_0(out) = issigned ? SIGN_EXTEND(R_0(in),ModePartition[mode].nbits)   : R_0(in);
+            R_1(out) = issigned ? SIGN_EXTEND(R_1(in),ModePartition[mode].prec[i]) : R_1(in);
+            R_2(out) = issigned ? SIGN_EXTEND(R_2(in),ModePartition[mode].prec[i]) : R_2(in);
+            R_3(out) = issigned ? SIGN_EXTEND(R_3(in),ModePartition[mode].prec[i]) : R_3(in);
+        }
+    }
+}
+
+// endpoints fit only if the compression was lossless
+static bool endpts_fit(const int orig[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], const int compressed[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], const int mode, int max_subsets, bool issigned) {
+    int uncompressed[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG];
+
+    decompress_endpts(compressed, uncompressed, mode, issigned);
+
+    for (int j=0; j<max_subsets; ++j)
+        for (int i=0; i<3; ++i) {
+            if (orig[j][0][i] != uncompressed[j][0][i]) return false;
+            if (orig[j][1][i] != uncompressed[j][1][i]) return false;
+        }
+
+    return true;
+}
+
+// Dont know exact limits : for now just say is -2.0 to +2.0
+void BC6HBlockEncoder::clampF16Max(float EndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG]) {
+    for(int region=0; region<2; region++)
+        for(int ab = 0; ab<2; ab++)
+            for (int rgb=0; rgb<3; rgb++) {
+                if (m_isSigned) {
+                    if (EndPoints[region][ab][rgb] < -F16MAX) EndPoints[region][ab][rgb] = -F16MAX;
+                    else if (EndPoints[region][ab][rgb] > F16MAX) EndPoints[region][ab][rgb] = F16MAX;
+                } else {
+                    if (EndPoints[region][ab][rgb] < 0.0) EndPoints[region][ab][rgb] = 0;
+                    else if (EndPoints[region][ab][rgb] > F16MAX) EndPoints[region][ab][rgb] = F16MAX;
+                }
+                // Zero region
+                // if ((EndPoints[region][ab][rgb] > -0.01) && ((EndPoints[region][ab][rgb] < 0.01))) EndPoints[region][ab][rgb] = 0.0;
+            }
+}
+
+/*=================================================================
+    Quantize Endpoints
+    for a given mode
+==================================================================*/
+
+void BC6HBlockEncoder::QuantizeEndPointToF16Prec(float EndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], int iEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], int max_subsets, int prec) {
+
+    for (int subset = 0; subset < max_subsets; ++subset) {
+        iEndPoints[subset][0][0] = QuantizeToInt((short)EndPoints[subset][0][0],prec,m_isSigned, m_Exposure);    // A.Red
+        iEndPoints[subset][0][1] = QuantizeToInt((short)EndPoints[subset][0][1],prec,m_isSigned, m_Exposure);    // A.Green
+        iEndPoints[subset][0][2] = QuantizeToInt((short)EndPoints[subset][0][2],prec,m_isSigned, m_Exposure);    // A.Blue
+        iEndPoints[subset][1][0] = QuantizeToInt((short)EndPoints[subset][1][0],prec,m_isSigned, m_Exposure);    // B.Red
+        iEndPoints[subset][1][1] = QuantizeToInt((short)EndPoints[subset][1][1],prec,m_isSigned, m_Exposure);    // B.Green
+        iEndPoints[subset][1][2] = QuantizeToInt((short)EndPoints[subset][1][2],prec,m_isSigned, m_Exposure);    // B.Blue
+    }
+}
+
+/*=================================================================
+    Swap Indices
+    so that indices at fix up points have higher order bit set to 0
+==================================================================*/
+
+void BC6HBlockEncoder::SwapIndices(int iEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], int iIndices[3][BC6H_MAX_SUBSET_SIZE], int  entryCount[BC6H_MAX_SUBSETS], int max_subsets, int mode, int shape_pattern) {
+
+    unsigned int uNumIndices    = 1 << ModePartition[mode].IndexPrec;
+    unsigned int uHighIndexBit    = uNumIndices >> 1;
+
+    for(int subset = 0; subset < max_subsets; ++subset) {
+        // region 0 (subset = 0) The fix-up index for this subset is allways index 0
+        // region 1 (subset = 1) The fix-up index for this subset varies based on the shape
+        size_t i = subset?g_Region2FixUp[shape_pattern]:0;
+
+        if(iIndices[subset][i] & uHighIndexBit) {
+
+            // high bit is set, swap the aEndPts and indices for this region
+            std::swap(iEndPoints[subset][0][0], iEndPoints[subset][1][0]);
+            std::swap(iEndPoints[subset][0][1], iEndPoints[subset][1][1]);
+            std::swap(iEndPoints[subset][0][2], iEndPoints[subset][1][2]);
+            for(size_t j = 0; j < (size_t)entryCount[subset]; ++j) {
+                iIndices[subset][j] = uNumIndices - 1 - iIndices[subset][j] ;
+            }
+        }
+
+    }
+}
+
+
+/*=================================================================
+    Tranforms according to shape precission
+==================================================================*/
+// helper function to check transform overflow
+bool isOverflow(int endpoint, int nbit, bool bIsSigned) {
+    if (bIsSigned) {
+        int nbRequired; //bits required for the encode
+        int nb;
+        if (endpoint == 0) {
+            return false; // no overflow
+        } else if (endpoint > 0) {
+            for (nb = 0; endpoint; ++nb, endpoint >>= 1);
+            nbRequired = nb + (bIsSigned ? 1 : 0);
+            if (nbRequired > nbit) //overflow
+                return true;
+        } else { //negative endpoints
+            if (!bIsSigned) return true;
+
+            for (nb = 0; endpoint < -1; ++nb, endpoint >>= 1);
+            nbRequired = nb + 1;
+            if (nbRequired > nbit) //overflow
+                return true;
+        }
+
+        return false;
+    } else {
+        int maxRange = (int)pow(2, nbit - 1) - 1;
+        int minRange = (int)-(pow(2, nbit - 1));
+
+        //no overflow
+        if ((endpoint >= minRange) && (endpoint <= maxRange))
+            return false;
+        else //overflow
+            return true;
+    }
+}
+
+// Bug in this code : Need to add signed bit to values
+bool BC6HBlockEncoder::TransformEndPoints(AMD_BC6H_Format &BC6H_data, int iEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], int oEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG],int max_subsets, int mode) {
+    int Mask;
+    if ( ModePartition[mode].transformed) {
+        BC6H_data.istransformed = true;
+        for (int i=0; i<3; ++i) {
+            Mask = MASK(ModePartition[mode].nbits);
+            oEndPoints[0][0][i] = iEndPoints[0][0][i] & Mask;    // [0][A]
+
+            Mask = MASK(ModePartition[mode].prec[i]);
+            oEndPoints[0][1][i] = iEndPoints[0][1][i]- iEndPoints[0][0][i]; // [0][B] - [0][A]
+
+            if (isOverflow(oEndPoints[0][1][i], ModePartition[mode].prec[i], BC6H_data.issigned))
+                return false;
+
+            oEndPoints[0][1][i] = (oEndPoints[0][1][i] & Mask);
+
+            //redo the check for sign overflow for one region case
+            if (max_subsets <= 1) {
+                if (isOverflow(oEndPoints[0][1][i], ModePartition[mode].prec[i], BC6H_data.issigned))
+                    return false;
+            }
+
+            if (max_subsets > 1) {
+                oEndPoints[1][0][i] = iEndPoints[1][0][i] - iEndPoints[0][0][i];  // [1][A] - [0][A]
+                if (isOverflow(oEndPoints[1][0][i], ModePartition[mode].prec[i], BC6H_data.issigned))
+                    return false;
+
+                oEndPoints[1][0][i] = (oEndPoints[1][0][i] & Mask);
+
+                oEndPoints[1][1][i] = iEndPoints[1][1][i] - iEndPoints[0][0][i];  // [1][B] - [0][A]
+                if (isOverflow(oEndPoints[1][1][i], ModePartition[mode].prec[i], BC6H_data.issigned))
+                    return false;
+
+                oEndPoints[1][1][i] = (oEndPoints[1][1][i] & Mask);
+            }
+        }
+    } else {
+        BC6H_data.istransformed = false;
+        for (int i=0; i<3; ++i) {
+            Mask = MASK(ModePartition[mode].nbits);
+            oEndPoints[0][0][i] = iEndPoints[0][0][i] & Mask;
+
+            Mask = MASK(ModePartition[mode].prec[i]);
+            oEndPoints[0][1][i] = iEndPoints[0][1][i] & Mask;
+
+            if (max_subsets > 1) {
+                oEndPoints[1][0][i] = iEndPoints[1][0][i] & Mask;
+                oEndPoints[1][1][i] = iEndPoints[1][1][i] & Mask;
+            }
+        }
+    }
+
+    return true;
+}
+
+
+void BC6HBlockEncoder::SaveCompressedBlockData( AMD_BC6H_Format &BC6H_data,
+        int oEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG],
+        int iIndices[2][MAX_SUBSET_SIZE],
+        int max_subsets,
+        int mode) {
+    BC6H_data.m_mode    = (unsigned short)mode;
+    BC6H_data.index++;
+
+    // Save the data to output
+    BC6H_data.rw = oEndPoints[0][0][0]; // rw
+    BC6H_data.gw = oEndPoints[0][0][1]; // gw
+    BC6H_data.bw = oEndPoints[0][0][2]; // bw
+    BC6H_data.rx = oEndPoints[0][1][0]; // rx
+    BC6H_data.gx = oEndPoints[0][1][1]; // gx
+    BC6H_data.bx = oEndPoints[0][1][2]; // bx
+
+    if (max_subsets > 1) {
+        // Save the data to output
+        BC6H_data.ry = oEndPoints[1][0][0]; // ry
+        BC6H_data.gy = oEndPoints[1][0][1]; // gy
+        BC6H_data.by = oEndPoints[1][0][2]; // by
+        BC6H_data.rz = oEndPoints[1][1][0]; // rz
+        BC6H_data.gz = oEndPoints[1][1][1]; // gz
+        BC6H_data.bz = oEndPoints[1][1][2]; // bz
+    }
+
+    // Map our two subset Indices for the shape to output 4x4 block
+    int pos[2] = {0,0};
+    int asubset;
+    for (int i=0; i<BC6H_MAX_SUBSET_SIZE; i++) {
+        if (max_subsets > 1)
+            asubset                = PARTITIONS[1][BC6H_data.d_shape_index][i]; // Two region shapes
+        else
+            asubset                = PARTITIONS[0][BC6H_data.d_shape_index][i]; // One region shapes
+        BC6H_data.indices16[i]    = (std::uint8_t)iIndices[asubset][pos[asubset]];
+        pos[asubset]++;
+    }
+
+}
+
+
+void palitizeEndPointsF(AMD_BC6H_Format &BC6H_data, float fEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG]) {
+    // scale endpoints
+    float  Ar,Ag,Ab, Br,Bg,Bb;
+
+
+    // Compose index colors from end points
+    if (BC6H_data.region == 1) {
+        Ar = fEndPoints[0][0][0];
+        Ag = fEndPoints[0][0][1];
+        Ab = fEndPoints[0][0][2];
+        Br = fEndPoints[0][1][0];
+        Bg = fEndPoints[0][1][1];
+        Bb = fEndPoints[0][1][2];
+
+        for (int i = 0; i < 16; i++) {
+
+            // Red
+            BC6H_data.Paletef[0][i].x = lerpf(Ar, Br, i, 15);
+            // Green
+            BC6H_data.Paletef[0][i].y = lerpf(Ag, Bg, i, 15);
+            // Blue
+            BC6H_data.Paletef[0][i].z = lerpf(Ab, Bb, i, 15);
+        }
+
+    } else { //mode.type == BC6_TWO
+        for (int region = 0; region<2; region++) {
+            Ar = fEndPoints[region][0][0];
+            Ag = fEndPoints[region][0][1];
+            Ab = fEndPoints[region][0][2];
+            Br = fEndPoints[region][1][0];
+            Bg = fEndPoints[region][1][1];
+            Bb = fEndPoints[region][1][2];
+            for (int i = 0; i < 8; i++) {
+                // Red
+                BC6H_data.Paletef[region][i].x = lerpf(Ar, Br, i, 7);
+                // Greed
+                BC6H_data.Paletef[region][i].y = lerpf(Ag, Bg, i, 7);
+                // Blue
+                BC6H_data.Paletef[region][i].z = lerpf(Ab, Bb, i, 7);
+            }
+
+        }
+    }
+}
+
+float CalcOneRegionEndPtsError(AMD_BC6H_Format &BC6H_data, float fEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], int shape_indices[MAX_SUBSETS][MAX_SUBSET_SIZE]) {
+    float error = 0;
+
+    for (int i = 0; i < BC6H_MAX_SUBSET_SIZE; i++) {
+        for (int m = 0; m < MAX_END_POINTS; m++) {
+            for (int n = 0; n < NCHANNELS; n++) {
+                float calencpts = fEndPoints[0][m][n] + (abs(fEndPoints[0][m][n] - fEndPoints[0][m][n]) * (shape_indices[0][i] / 15));
+                error += abs(BC6H_data.din[i][n] - calencpts);
+            }
+        }
+    }
+
+    return error;
+}
+
+float CalcShapeError(AMD_BC6H_Format &BC6H_data, float fEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], bool SkipPallet) {
+    int maxPallet;
+    int subset = 0;
+    float totalError = 0.0f;
+    int region = (BC6H_data.region - 1);
+
+    if (region == 0)
+        maxPallet = 16;
+    else
+        maxPallet = 8;
+
+    if (!SkipPallet)
+        palitizeEndPointsF(BC6H_data, fEndPoints);
+
+    for (int i =0; i < MAX_SUBSET_SIZE; i++) {
+        float error = 0.0f;
+        float bestError = 0.0f;
+
+        if (region == 0) {
+            subset = 0;
+        } else {
+            //subset 0 or subset 1
+            subset = PARTITIONS[region][BC6H_data.d_shape_index][i];
+        }
+
+        // initialize bestError to the difference for first data
+        bestError = abs(BC6H_data.din[i][0] - BC6H_data.Paletef[subset][0].x) +
+                    abs(BC6H_data.din[i][1] - BC6H_data.Paletef[subset][0].y) +
+                    abs(BC6H_data.din[i][2] - BC6H_data.Paletef[subset][0].z);
+
+        // loop through the rest of the data until find the best error
+        for (int j = 1; j < maxPallet && bestError > 0; j++) {
+            error = abs(BC6H_data.din[i][0] - BC6H_data.Paletef[subset][j].x) +
+                    abs(BC6H_data.din[i][1] - BC6H_data.Paletef[subset][j].y) +
+                    abs(BC6H_data.din[i][2] - BC6H_data.Paletef[subset][j].z);
+
+            if (error <= bestError)
+                bestError = error;
+            else
+                break;
+        }
+        totalError += bestError;
+    }
+
+    return totalError;
+}
+
+void ReIndexShapef(AMD_BC6H_Format &BC6H_data, int shape_indices[BC6H_MAX_SUBSETS][MAX_SUBSET_SIZE]) {
+    float error = 0;
+    float bestError;
+    int bestIndex = 0;
+    int sub0index = 0;
+    int sub1index = 0;
+    int MaxPallet;
+    int region = (BC6H_data.region - 1);
+
+    if (region == 0)
+        MaxPallet = 16;
+    else
+        MaxPallet = 8;
+
+    for (int i = 0; i < BC6H_MAX_SUBSET_SIZE; i++) {
+        // subset 0 or subset 1
+        if (PARTITIONS[region][BC6H_data.d_shape_index][i]) {
+            bestError = FLT_MAX;
+            bestIndex = 0;
+
+            // For two shape regions max Pallet is 8
+            for (int j = 0; j < MaxPallet; j++) {
+                // Calculate error from original
+                error = abs(BC6H_data.din[i][0] - BC6H_data.Paletef[1][j].x) +
+                        abs(BC6H_data.din[i][1] - BC6H_data.Paletef[1][j].y) +
+                        abs(BC6H_data.din[i][2] - BC6H_data.Paletef[1][j].z);
+                if (error < bestError) {
+                    bestError = error;
+                    bestIndex = j;
+                }
+            }
+
+            shape_indices[1][sub1index] = bestIndex;
+            sub1index++;
+        } else {
+            // This is shared for one or two shape regions max Pallet either 16 or 8
+            bestError = FLT_MAX;
+            bestIndex = 0;
+
+            for (int j = 0; j < MaxPallet; j++) {
+                // Calculate error from original
+                error = abs(BC6H_data.din[i][0] - BC6H_data.Paletef[0][j].x) +
+                        abs(BC6H_data.din[i][1] - BC6H_data.Paletef[0][j].y) +
+                        abs(BC6H_data.din[i][2] - BC6H_data.Paletef[0][j].z);
+                if (error < bestError) {
+                    bestError = error;
+                    bestIndex = j;
+                }
+            }
+
+            shape_indices[0][sub0index] = bestIndex;
+            sub0index++;
+        }
+    }
+
+}
+
+float    BC6HBlockEncoder::FindBestPattern(AMD_BC6H_Format &BC6H_data,
+        bool TwoRegionShapes,
+        int shape_pattern) {
+    // Index bit size for the patterns been used.
+    // All two zone shapes have 3 bits per color, max index value < 8
+    // All one zone shapes gave 4 bits per color, max index value < 16
+    int        Index_BitSize = TwoRegionShapes ? 8 : 16;
+    int     max_subsets = TwoRegionShapes ? 2 : 1;
+    float  direction[NCHANNELS];
+    float  step;
+
+    BC6H_data.region    = (unsigned short)max_subsets;
+    BC6H_data.index        = 0;
+    BC6H_data.d_shape_index = (unsigned short)shape_pattern;
+    memset(BC6H_data.partition, 0, sizeof(BC6H_data.partition));
+    memset(BC6H_data.shape_indices, 0, sizeof(BC6H_data.shape_indices));
+
+    // Get the pattern to encode with
+    Partition( shape_pattern,          // Shape pattern we want to get
+               BC6H_data.din,          // Input data
+               BC6H_data.partition,    // Returns the patterned shape data
+               BC6H_data.entryCount,   // counts the number of pixel used in each subset region num of 0's amd 1's
+               max_subsets,            // Table Shapes to use eithe one regions 1 or two regions 2
+               3);                     // rgb no alpha always = 3
+
+
+    float  error[MAX_SUBSETS] = { 0.0,FLT_MAX,FLT_MAX };
+    int    BestOutB = 0;
+    float  BestError;        //the lowest error from vector direction quantization
+    float  BestError_endpts; //the lowest error from endpoints extracted from the vector direction quantization
+
+    float  outB[2][2][MAX_SUBSET_SIZE][MAX_DIMENSION_BIG];
+    int     shape_indicesB[2][MAX_SUBSETS][MAX_SUBSET_SIZE];
+
+    for (int subset = 0; subset < max_subsets; subset++) {
+        error[0] += optQuantAnD_d(
+                        BC6H_data.partition[subset],        // input data
+                        BC6H_data.entryCount[subset],       // number of input points above (not clear about 1, better to avoid)
+                        Index_BitSize,                      // number of clusters on the ramp, 8  or 16
+                        shape_indicesB[0][subset],          // output index, if not all points of the ramp used, 0 may not be assigned
+                        outB[0][subset],                    // resulting quantization
+                        direction,                          // direction vector of the ramp (check normalization)
+                        &step,                              // step size (check normalization)
+                        3,                                  // number of channels (always 3 = RGB for BC6H)
+                        m_quality                           // Quality set number of retry to get good end points
+                        // Max retries = MAX_TRY = 4000 when Quality is 1.0
+                        // Min = 0 and default with quality 0.05 is 200 times
+                    );
+    }
+
+    BestError = error[0];
+    BestOutB  = 0;
+
+    // The following code is almost complete - runs very slow and not sure if % of improvement is justified..
+#ifdef USE_SHAKERHD
+    // Valid only for 2 region shapes
+    if ((max_subsets > 1) && (m_quality > 0.80)) {
+        int     tempIndices[MAX_SUBSET_SIZE];
+        // int     temp_epo_code[2][2][MAX_DIMENSION_BIG];
+        int     bits[3] = { 8,8,8 };     // Channel index bit size
+
+        // float   epo[2][MAX_DIMENSION_BIG];
+        int     epo_code[MAX_SUBSETS][2][MAX_DIMENSION_BIG];
+        // int     shakeSize = 8;
+
+        error[1] = 0.0;
+        for (int subset = 0; subset < max_subsets; subset++) {
+            for (int k = 0; k < BC6H_data.entryCount[subset]; k++) {
+                tempIndices[k] = shape_indicesB[0][subset][k];
+            }
+
+            error[1] += ep_shaker_HD(
+                            BC6H_data.partition[subset],
+                            BC6H_data.entryCount[subset],
+                            tempIndices,                    // output index, if not all points of the ramp used, 0 may not be assigned
+                            outB[1][subset],                // resulting quantization
+                            epo_code[subset],
+                            BC6H_data.entryCount[subset] - 1,
+                            bits,
+                            3
+                        );
+
+            // error[1] += ep_shaker_2_d(
+            //      BC6H_data.partition[subset],
+            //      BC6H_data.entryCount[subset],
+            //      tempIndices,                    // output index, if not all points of the ramp used, 0 may not be assigned
+            //      outB[1][subset],                // resulting quantization
+            //      epo_code[subset],
+            //      shakeSize,
+            //      BC6H_data.entryCount[subset] - 1,
+            //      bits[0],
+            //      3,
+            //      epo
+            //      );
+
+
+            for (int k = 0; k < BC6H_data.entryCount[subset]; k++) {
+                shape_indicesB[1][subset][k] = tempIndices[k];
+            }
+
+        } // subsets
+
+        if (BestError > error[1]) {
+            BestError = error[1];
+            BestOutB = 1;
+            for (int subset = 0; subset < max_subsets; subset++) {
+                for (int k = 0; k < MAX_DIMENSION_BIG; k++) {
+                    BC6H_data.fEndPoints[subset][0][k] = (float)epo_code[subset][0][k];
+                    BC6H_data.fEndPoints[subset][1][k] = (float)epo_code[subset][1][k];
+                }
+            }
+        }
+
+    }
+#endif
+
+    // Save the best for BC6H data processing later
+    if (BestOutB == 0)
+        GetEndPoints(BC6H_data.fEndPoints, outB[BestOutB], max_subsets, BC6H_data.entryCount);
+
+    memcpy(BC6H_data.shape_indices, shape_indicesB[BestOutB], sizeof(BC6H_data.shape_indices));
+    clampF16Max(BC6H_data.fEndPoints);
+
+    BestError_endpts = CalcShapeError(BC6H_data, BC6H_data.fEndPoints, false);
+    return BestError_endpts;
+}
+
+int finish_unquantizeF16(int q, bool isSigned) {
+    // Is it F16 Signed else F16 Unsigned
+    if (isSigned)
+        return (q < 0) ? -(((-q) * 31) >> 5) : (q * 31) >> 5;       // scale the magnitude by 31/32
+    else
+        return (q * 31) >> 6;                                       // scale the magnitude by 31/64
+
+    // Note for Undefined we should return q as is
+
+}
+
+void decompress_endpoints1(AMD_BC6H_Format& bc6h_format, int oEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], float outf[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], int mode) {
+    int i;
+    int t;
+    int out[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG];
+
+    if (bc6h_format.issigned) {
+        if (bc6h_format.istransformed) {
+            for (i = 0; i<NCHANNELS; i++) {
+                out[0][0][i] = SIGN_EXTEND(oEndPoints[0][0][i], ModePartition[mode].nbits);
+
+                t = SIGN_EXTEND(oEndPoints[0][1][i], ModePartition[mode].prec[i]); //C_RED
+                t = (t + oEndPoints[0][0][i]) & MASK(ModePartition[mode].nbits);
+                out[0][1][i] = SIGN_EXTEND(t, ModePartition[mode].nbits);
+
+                // Unquantize all points to nbits
+                out[0][0][i] = Unquantize(out[0][0][i], (unsigned char)ModePartition[mode].nbits, true);
+                out[0][1][i] = Unquantize(out[0][1][i], (unsigned char)ModePartition[mode].nbits, true);
+
+                // F16 format
+                outf[0][0][i] = (float)finish_unquantizeF16(out[0][0][i], true);
+                outf[0][1][i] = (float)finish_unquantizeF16(out[0][1][i], true);
+            }
+        } else {
+            for (i = 0; i<NCHANNELS; i++) {
+                out[0][0][i] = SIGN_EXTEND(oEndPoints[0][0][i], ModePartition[mode].nbits);
+                out[0][1][i] = SIGN_EXTEND(oEndPoints[0][1][i], ModePartition[mode].prec[i]);
+
+                // Unquantize all points to nbits
+                out[0][0][i] = Unquantize(out[0][0][i], (unsigned char)ModePartition[mode].nbits, true);
+                out[0][1][i] = Unquantize(out[0][1][i], (unsigned char)ModePartition[mode].nbits, true);
+
+                // F16 format
+                outf[0][0][i] = (float)finish_unquantizeF16(out[0][0][i], true);
+                outf[0][1][i] = (float)finish_unquantizeF16(out[0][1][i], true);
+            }
+        }
+
+    } else {
+        if (bc6h_format.istransformed) {
+            for (i = 0; i<NCHANNELS; i++) {
+                out[0][0][i] = oEndPoints[0][0][i];
+                t = SIGN_EXTEND(oEndPoints[0][1][i], ModePartition[mode].prec[i]);
+                out[0][1][i] = (t + oEndPoints[0][0][i]) & MASK(ModePartition[mode].nbits);
+
+                // Unquantize all points to nbits
+                out[0][0][i] = Unquantize(out[0][0][i], (unsigned char)ModePartition[mode].nbits, false);
+                out[0][1][i] = Unquantize(out[0][1][i], (unsigned char)ModePartition[mode].nbits, false);
+
+                // F16 format
+                outf[0][0][i] = (float)finish_unquantizeF16(out[0][0][i], false);
+                outf[0][1][i] = (float)finish_unquantizeF16(out[0][1][i], false);
+            }
+        } else {
+            for (i = 0; i<NCHANNELS; i++) {
+                out[0][0][i] = oEndPoints[0][0][i];
+                out[0][1][i] = oEndPoints[0][1][i];
+
+                // Unquantize all points to nbits
+                out[0][0][i] = Unquantize(out[0][0][i], (unsigned char)ModePartition[mode].nbits, false);
+                out[0][1][i] = Unquantize(out[0][1][i], (unsigned char)ModePartition[mode].nbits, false);
+
+                // F16 format
+                outf[0][0][i] = (float)finish_unquantizeF16(out[0][0][i], false);
+                outf[0][1][i] = (float)finish_unquantizeF16(out[0][1][i], false);
+            }
+        }
+    }
+}
+
+void decompress_endpoints2(AMD_BC6H_Format& bc6h_format, int oEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], float outf[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], int mode) {
+    int i;
+    int t;
+    int out[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG];
+
+    if (bc6h_format.issigned) {
+        if (bc6h_format.istransformed) {
+            for (i = 0; i<NCHANNELS; i++) {
+                // get the quantized values
+                out[0][0][i] = SIGN_EXTEND(oEndPoints[0][0][i], ModePartition[mode].nbits);
+
+                t = SIGN_EXTEND(oEndPoints[0][1][i], ModePartition[mode].prec[i]);
+                t = (t + oEndPoints[0][0][i]) & MASK(ModePartition[mode].nbits);
+                out[0][1][i] = SIGN_EXTEND(t, ModePartition[mode].nbits);
+
+                t = SIGN_EXTEND(oEndPoints[1][0][i], ModePartition[mode].prec[i]);
+                t = (t + oEndPoints[0][0][i]) & MASK(ModePartition[mode].nbits);
+                out[1][0][i] = SIGN_EXTEND(t, ModePartition[mode].nbits);
+
+                t = SIGN_EXTEND(oEndPoints[1][1][i], ModePartition[mode].prec[i]);
+                t = (t + oEndPoints[0][0][i]) & MASK(ModePartition[mode].nbits);
+                out[1][1][i] = SIGN_EXTEND(t, ModePartition[mode].nbits);
+
+                // Unquantize all points to nbits
+                out[0][0][i] = Unquantize(out[0][0][i], (unsigned char)ModePartition[mode].nbits, true);
+                out[0][1][i] = Unquantize(out[0][1][i], (unsigned char)ModePartition[mode].nbits, true);
+                out[1][0][i] = Unquantize(out[1][0][i], (unsigned char)ModePartition[mode].nbits, true);
+                out[1][1][i] = Unquantize(out[1][1][i], (unsigned char)ModePartition[mode].nbits, true);
+
+                // F16 format
+                outf[0][0][i] = (float)finish_unquantizeF16(out[0][0][i], true);
+                outf[0][1][i] = (float)finish_unquantizeF16(out[0][1][i], true);
+                outf[1][0][i] = (float)finish_unquantizeF16(out[1][0][i], true);
+                outf[1][1][i] = (float)finish_unquantizeF16(out[1][1][i], true);
+
+            }
+        } else {
+            for (i = 0; i<NCHANNELS; i++) {
+                out[0][0][i] = SIGN_EXTEND(oEndPoints[0][0][i], ModePartition[mode].nbits);
+                out[0][1][i] = SIGN_EXTEND(oEndPoints[0][1][i], ModePartition[mode].prec[i]);
+                out[1][0][i] = SIGN_EXTEND(oEndPoints[1][0][i], ModePartition[mode].prec[i]);
+                out[1][1][i] = SIGN_EXTEND(oEndPoints[1][1][i], ModePartition[mode].prec[i]);
+
+                // Unquantize all points to nbits
+                out[0][0][i] = Unquantize(out[0][0][i], (unsigned char)ModePartition[mode].nbits, true);
+                out[0][1][i] = Unquantize(out[0][1][i], (unsigned char)ModePartition[mode].nbits, true);
+                out[1][0][i] = Unquantize(out[1][0][i], (unsigned char)ModePartition[mode].nbits, true);
+                out[1][1][i] = Unquantize(out[1][1][i], (unsigned char)ModePartition[mode].nbits, true);
+
+                // nbits to F16 format
+                outf[0][0][i] = (float)finish_unquantizeF16(out[0][0][i], true);
+                outf[0][1][i] = (float)finish_unquantizeF16(out[0][1][i], true);
+                outf[1][0][i] = (float)finish_unquantizeF16(out[1][0][i], true);
+                outf[1][1][i] = (float)finish_unquantizeF16(out[1][1][i], true);
+            }
+        }
+
+    } else {
+        if (bc6h_format.istransformed) {
+            for (i = 0; i<NCHANNELS; i++) {
+                out[0][0][i] = oEndPoints[0][0][i];
+                t = SIGN_EXTEND(oEndPoints[0][1][i], ModePartition[mode].prec[i]);
+                out[0][1][i] = (t + oEndPoints[0][0][i]) & MASK(ModePartition[mode].nbits);
+
+                t = SIGN_EXTEND(oEndPoints[1][0][i], ModePartition[mode].prec[i]);
+                out[1][0][i] = (t + oEndPoints[0][0][i]) & MASK(ModePartition[mode].nbits);
+
+                t = SIGN_EXTEND(oEndPoints[1][1][i], ModePartition[mode].prec[i]);
+                out[1][1][i] = (t + oEndPoints[0][0][i]) & MASK(ModePartition[mode].nbits);
+
+                // Unquantize all points to nbits
+                out[0][0][i] = Unquantize(out[0][0][i], (unsigned char)ModePartition[mode].nbits, false);
+                out[0][1][i] = Unquantize(out[0][1][i], (unsigned char)ModePartition[mode].nbits, false);
+                out[1][0][i] = Unquantize(out[1][0][i], (unsigned char)ModePartition[mode].nbits, false);
+                out[1][1][i] = Unquantize(out[1][1][i], (unsigned char)ModePartition[mode].nbits, false);
+
+                // nbits to F16 format
+                outf[0][0][i] = (float)finish_unquantizeF16(out[0][0][i], false);
+                outf[0][1][i] = (float)finish_unquantizeF16(out[0][1][i], false);
+                outf[1][0][i] = (float)finish_unquantizeF16(out[1][0][i], false);
+                outf[1][1][i] = (float)finish_unquantizeF16(out[1][1][i], false);
+
+            }
+        } else {
+            for (i = 0; i<NCHANNELS; i++) {
+                out[0][0][i] = oEndPoints[0][0][i];
+                out[0][1][i] = oEndPoints[0][1][i];
+                out[1][0][i] = oEndPoints[1][0][i];
+                out[1][1][i] = oEndPoints[1][1][i];
+
+                // Unquantize all points to nbits
+                out[0][0][i] = Unquantize(out[0][0][i], (unsigned char) ModePartition[mode].nbits, false);
+                out[0][1][i] = Unquantize(out[0][1][i], (unsigned char) ModePartition[mode].nbits, false);
+                out[1][0][i] = Unquantize(out[1][0][i], (unsigned char) ModePartition[mode].nbits, false);
+                out[1][1][i] = Unquantize(out[1][1][i], (unsigned char) ModePartition[mode].nbits, false);
+
+                // nbits to F16 format
+                outf[0][0][i] = (float) finish_unquantizeF16(out[0][0][i], false);
+                outf[0][1][i] = (float) finish_unquantizeF16(out[0][1][i], false);
+                outf[1][0][i] = (float) finish_unquantizeF16(out[1][0][i], false);
+                outf[1][1][i] = (float) finish_unquantizeF16(out[1][1][i], false);
+            }
+        }
+    }
+}
+
+void BC6HBlockEncoder::AverageEndPoint(float EndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], float fEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], int max_subsets, int mode) {
+
+    if (ModePartition[mode].nbits > 7) {
+        for (int subset = 0; subset < max_subsets; ++subset) {
+            fEndPoints[subset][0][0] = EndPoints[subset][0][0];    // A.Red
+            fEndPoints[subset][0][1] = EndPoints[subset][0][1];    // A.Green
+            fEndPoints[subset][0][2] = EndPoints[subset][0][2];    // A.Blue
+            fEndPoints[subset][1][0] = EndPoints[subset][1][0];    // A.Red
+            fEndPoints[subset][1][1] = EndPoints[subset][1][1];    // A.Green
+            fEndPoints[subset][1][2] = EndPoints[subset][1][2];    // A.Blue
+        }
+
+        return;
+    }
+
+    float diff;
+    float avr;
+
+    // determin differance level based on lowest precision of the mode
+    m_DiffLevel = (float)ModePartition[mode].lowestPrec;
+
+    for (int subset = 0; subset < max_subsets; ++subset) {
+        avr = (EndPoints[subset][0][0] +
+               EndPoints[subset][0][1] +
+               EndPoints[subset][0][2]) / 3.0f;
+
+        // determine average diff
+        diff = (abs(EndPoints[subset][0][0] - avr) +
+                abs(EndPoints[subset][0][1] - avr) +
+                abs(EndPoints[subset][0][2] - avr)) / 3;
+
+        if ((diff < m_DiffLevel) && (avr > m_DiffLevel)) {
+            fEndPoints[subset][0][0] = avr;    // A.Red
+            fEndPoints[subset][0][1] = avr;    // A.Green
+            fEndPoints[subset][0][2] = avr;    // A.Blue
+        } else {
+            fEndPoints[subset][0][0] = EndPoints[subset][0][0];    // A.Red
+            fEndPoints[subset][0][1] = EndPoints[subset][0][1];    // A.Green
+            fEndPoints[subset][0][2] = EndPoints[subset][0][2];    // A.Blue
+        }
+
+        avr = (EndPoints[subset][1][0] +
+               EndPoints[subset][1][1] +
+               EndPoints[subset][1][2]) / 3.0f;
+
+        diff = (abs(EndPoints[subset][1][0] - avr) +
+                abs(EndPoints[subset][1][1] - avr) +
+                abs(EndPoints[subset][1][2] - avr)) / 3;
+
+        if ((diff < m_DiffLevel) && (avr > m_DiffLevel)) {
+            fEndPoints[subset][1][0] = avr;   // B.Red
+            fEndPoints[subset][1][1] = avr;   // B.Green
+            fEndPoints[subset][1][2] = avr;   // B.Blue
+        } else {
+            fEndPoints[subset][1][0] = EndPoints[subset][1][0];    // A.Red
+            fEndPoints[subset][1][1] = EndPoints[subset][1][1];    // A.Green
+            fEndPoints[subset][1][2] = EndPoints[subset][1][2];    // A.Blue
+        }
+    }
+}
+
+//================================================
+// Mode Pathern order to try on endpoints
+// The order can be rearranged to set which modes gets processed first
+// for now it is set in order.
+//================================================
+static int ModeFitOrder[MAX_BC6H_MODES +1] = {
+    0,                //0: N/A
+    // ----  2 region lower bits ---
+    1,                // 10 5 5 5
+    2,                // 7  6 6 6
+    3,                // 11 5 4 5
+    4,                // 11 4 5 4
+    5,                // 11 4 4 5
+    6,                // 9  5 5 5
+    7,                // 8  6 5 5
+    8,                // 8  5 6 5
+    9,                // 8  5 5 6
+    10,               // 6  6 6 6
+    //------ 1 region high bits ---
+    11,               // 10 10 10 10
+    12,               // 11 9  9  9
+    13,               // 12 8  8  8
+    14                // 16 4  4  4
+};
+
+float    BC6HBlockEncoder::EncodePattern(AMD_BC6H_Format &BC6H_data, float  error) {
+    int        max_subsets            = BC6H_data.region;
+
+    // now we have input colors (in), output colors (outB) mapped to a line of ends (EndPoints)
+    // and a set of colors on the line equally spaced (indexedcolors)
+    // Lets assign indices
+
+    //float SrcEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG];                  // temp endpoints used during calculations
+
+    // Quantize the EndPoints
+    int F16EndPoints[MAX_BC6H_MODES + 1][MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG];                    // temp endpoints used during calculations
+    int quantEndPoints[MAX_BC6H_MODES + 1][MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG];                    // endpoints to save for a given mode
+
+    // ModePartition[] starts from 1 to 14
+    // If we have a shape pattern set the loop to check modes from 1 to 10 else from 11 to 14
+    // of the ModePartition table
+    int     min_mode = (BC6H_data.region == 2)?1:11;
+    int     max_mode = (BC6H_data.region == 2)?MAX_TWOREGION_MODES: MAX_BC6H_MODES;
+
+    bool    fits[15];
+    memset(fits,0,sizeof(fits));
+
+    int bestFit = 0;
+    int bestEndpointMode = 0;
+    float bestError = FLT_MAX;
+    float bestEndpointsErr = FLT_MAX;
+    float endPointErr = 0;
+
+    // Try Optimization for the Mode
+    float       best_EndPoints[MAX_BC6H_MODES + 1][MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG];
+    int         best_Indices[MAX_BC6H_MODES + 1][MAX_SUBSETS][MAX_SUBSET_SIZE];
+    float       opt_toterr[MAX_BC6H_MODES + 1];
+
+    // for debugging
+    memset(opt_toterr, 0, sizeof(opt_toterr));
+
+    int numfits = 0;
+    //
+    // Notes;  Only the endpoints are varying; the indices stay fixed in values!
+    // so to optimize which mode we need only check the endpoints error against our original to pick the mode to save
+    //
+    for (int modes = min_mode; modes <= max_mode; ++modes) {
+        memcpy(best_EndPoints[modes], BC6H_data.fEndPoints,     sizeof(BC6H_data.fEndPoints));
+        memcpy(best_Indices[modes],   BC6H_data.shape_indices, sizeof(BC6H_data.shape_indices));
+
+        // For some modes the differances between channels can be quite small
+        // typically for 6 bits 0..32 an increment of 1 in a channel can cause
+        // unwanted color artifacts.
+        // Check if computed channel endpoint have a wide spread between channels if not
+        // scale all the channels to a avarage so that the variance is not noticed at lower bit values
+        //if (m_bAverageEndPoint)
+        //{
+        //    AverageEndPoint(best_EndPoints[modes], SrcEndPoints, max_subsets, ModeFitOrder[modes]);
+        //    QuantizeEndPointToF16Prec(SrcEndPoints, F16EndPoints[modes], max_subsets, ModePartition[ModeFitOrder[modes]].nbits);
+        //}
+        //else
+        {
+            QuantizeEndPointToF16Prec(best_EndPoints[modes], F16EndPoints[modes], max_subsets, ModePartition[ModeFitOrder[modes]].nbits);
+        }
+
+        // Indices data to save for given mode
+        SwapIndices(F16EndPoints[modes], best_Indices[modes], BC6H_data.entryCount, max_subsets, ModeFitOrder[modes], BC6H_data.d_shape_index);
+        bool transformfit = TransformEndPoints(BC6H_data, F16EndPoints[modes], quantEndPoints[modes], max_subsets,ModeFitOrder[modes]);
+        fits[modes] = endpts_fit(F16EndPoints[modes], quantEndPoints[modes], ModeFitOrder[modes],max_subsets, m_isSigned);
+        if (fits[modes] && transformfit) {
+            numfits++;
+
+            // The new compressed end points fit the mode
+            // recalculate the error for this mode with a new set of indices
+            // since we have shifted the end points from what we origially calc
+            // from the find_bestpattern
+            float uncompressed[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG];
+            if (BC6H_data.region == 1)
+                decompress_endpoints1(BC6H_data, quantEndPoints[modes], uncompressed, ModeFitOrder[modes]);
+            else
+                decompress_endpoints2(BC6H_data, quantEndPoints[modes], uncompressed, ModeFitOrder[modes]);
+            // Takes the end points and creates a pallet of colors
+            // based on preset weights along a vector formed by the two end points
+            palitizeEndPointsF(BC6H_data, uncompressed);
+
+            // Once we have the pallet - recalculate the optimal indices using the pallet
+            // and the original image data stored in BC6H_data.din[]
+            if (!m_isSigned)
+                ReIndexShapef(BC6H_data, best_Indices[modes]);
+
+            // Calculate the error of the new tile vs the old tile data
+            opt_toterr[modes] = CalcShapeError(BC6H_data, uncompressed, true);
+
+            if (BC6H_data.region == 1) {
+                endPointErr = CalcOneRegionEndPtsError(BC6H_data, uncompressed, best_Indices[modes]);
+                if (endPointErr < bestEndpointsErr) {
+                    bestEndpointsErr = endPointErr;
+                    bestEndpointMode = modes;
+                }
+            }
+
+            bool transformFit = true;
+            // Save hold this mode fit data if its better than the last one checked.
+            if (opt_toterr[modes] < bestError) {
+                if (!m_isSigned) {
+                    QuantizeEndPointToF16Prec(uncompressed, F16EndPoints[modes], max_subsets, ModePartition[ModeFitOrder[modes]].nbits);
+                    SwapIndices(F16EndPoints[modes], best_Indices[modes], BC6H_data.entryCount, max_subsets, ModeFitOrder[modes], BC6H_data.d_shape_index);
+                    transformFit = TransformEndPoints(BC6H_data, F16EndPoints[modes], quantEndPoints[modes], max_subsets, ModeFitOrder[modes]);
+                }
+                if (transformFit) {
+                    if (BC6H_data.region == 1) {
+                        bestFit = (modes == bestEndpointMode) ? modes: ((modes<bestEndpointMode)? modes:bestEndpointMode);
+                    } else {
+                        bestFit = modes;
+                    }
+                    bestError = opt_toterr[bestFit];
+                    error = bestError;
+                }
+            }
+
+        }
+    }
+
+    if (numfits > 0) {
+        SaveCompressedBlockData(BC6H_data, quantEndPoints[bestFit], best_Indices[bestFit], max_subsets, ModeFitOrder[bestFit]);
+        return error;
+    }
+
+    // Should not get here!
+    return error;
+}
+
+//==================================================================================
+// CompressBlock
+// in[]  is half float32 data  [0..1] for unsigned and [-1..+1] for signed
+// it will be converted to 16 bit half CMP_HALFSHORT (short with signed component) for processing
+//
+// out is 128 bits BC6H Encoded data
+//==================================================================================
+
+//#define DEBUG_A_BLOCK
+#ifdef DEBUG_A_BLOCK
+float Testdin[MAX_SUBSET_SIZE][MAX_DIMENSION_BIG] = {
+    {29440.0000, 29440.0000, 30255.0000, 0.000000000},
+    {29440.0000, 29440.0000, 30123.0000, 0.000000000},
+    {29440.0000, 29440.0000, 29440.0000, 0.000000000},
+    {29440.0000, 29440.0000, 29440.0000, 0.000000000},
+    {29440.0000, 29440.0000, 30251.0000, 0.000000000},
+    {29440.0000, 29440.0000, 30105.0000, 0.000000000},
+    {29440.0000, 29440.0000, 29440.0000, 0.000000000},
+    {29440.0000, 29440.0000, 29440.0000, 0.000000000},
+    {29440.0000, 29440.0000, 30246.0000, 0.000000000},
+    {29440.0000, 29440.0000, 30086.0000, 0.000000000},
+    {29440.0000, 29440.0000, 29440.0000, 0.000000000},
+    {29440.0000, 29440.0000, 29440.0000, 0.000000000},
+    {29440.0000, 29440.0000, 30240.0000, 0.000000000},
+    {29440.0000, 29440.0000, 30047.0000, 0.000000000},
+    {29440.0000, 29440.0000, 29440.0000, 0.000000000},
+    {29440.0000, 29440.0000, 29440.0000, 0.000000000},
+};
+#endif
+
+// TODO:: Alec modded this.  This is just casting the float to a short though.
+// Original code was using .bits() function on CMP_HALF, and a division by normalization
+// but that normalization was 1.0f.
+inline float toHalf(float val, bool isSigned) {
+    // Our Half floats will be restricted to 0x7BFF with a sign components
+    // so use 0..0x7BFF and sign bit for the floats
+
+    if (cmp_isnan(val)) {
+        return isSigned ? F16NEGPREC_LIMIT_VAL : 0.0f;
+    }
+    else if (val < 0.00001) {
+        return isSigned ? -CMP_HALF(abs(val)) : 0.0f;
+    }
+    return CMP_HALF(val);
+}
+        
+float BC6HBlockEncoder::CompressBlock(float in[MAX_SUBSET_SIZE][MAX_DIMENSION_BIG], BYTE out[COMPRESSED_BLOCK_SIZE]) {
+    /* Reserved feature:
+    float smono[16];
+    float R,G,B;
+    float smin = FLT_MAX;
+    */
+
+#ifdef DEBUG_PATTERNS
+    srand(100);
+    // Save delta image to file
+    fi = fopen("deltaImages.txt", "w");
+#endif
+
+    float    bestError = FLT_MAX;
+    float    error = FLT_MAX;
+    int      bestShape = 0;
+
+    AMD_BC6H_Format            BC6H_data;
+
+    memset(&BC6H_data, 0, sizeof(AMD_BC6H_Format));
+
+    for (int i = 0; i < BC6H_MAX_SUBSET_SIZE; i++) {
+        BC6H_data.din[i][0] = toHalf(in[i][0], m_isSigned);
+        BC6H_data.din[i][1] = toHalf(in[i][1], m_isSigned);
+        BC6H_data.din[i][2] = toHalf(in[i][2], m_isSigned);
+        BC6H_data.din[i][3] = 0.0;
+    }
+
+    BC6H_data.issigned = m_isSigned;
+#ifdef DEBUG_A_BLOCK
+    // Used for debugging blocks!
+    for (int i = 0; i < BC6H_MAX_SUBSET_SIZE; i++) {
+        BC6H_data.din[i][0] = Testdin[i][0];
+        BC6H_data.din[i][1] = Testdin[i][1];
+        BC6H_data.din[i][2] = Testdin[i][2];
+        BC6H_data.din[i][3] = Testdin[i][3];
+    }
+#endif
+
+    if (m_useMonoShapePatterns) {
+        /*
+        Reserved Feature MONOSHAPE_PATTERNS
+        */
+    }
+
+    // run through no partition first
+    error = FindBestPattern(BC6H_data, false, 0);
+    if (error < bestError) {
+        bestError = error;
+        bestShape = -1;
+        memcpy(BC6H_data.cur_best_shape_indices, BC6H_data.shape_indices, sizeof(BC6H_data.shape_indices));
+        memcpy(BC6H_data.cur_best_partition, BC6H_data.partition, sizeof(BC6H_data.partition));
+        memcpy(BC6H_data.cur_best_fEndPoints, BC6H_data.fEndPoints, sizeof(BC6H_data.fEndPoints));
+        memcpy(BC6H_data.cur_best_entryCount, BC6H_data.entryCount, sizeof(BC6H_data.entryCount));
+        BC6H_data.d_shape_index = bestShape;
+    }
+
+    // now run through all two regions shapes to find the best pattern
+    for (int shape = 0; shape < MAX_BC6H_PARTITIONS; shape++) {
+        error = FindBestPattern(BC6H_data, true, shape);
+        if (error < bestError) {
+            bestError = error;
+            bestShape = shape;
+
+            memcpy(BC6H_data.cur_best_shape_indices, BC6H_data.shape_indices, sizeof(BC6H_data.shape_indices));
+            memcpy(BC6H_data.cur_best_partition, BC6H_data.partition, sizeof(BC6H_data.partition));
+            memcpy(BC6H_data.cur_best_fEndPoints, BC6H_data.fEndPoints, sizeof(BC6H_data.fEndPoints));
+            memcpy(BC6H_data.cur_best_entryCount, BC6H_data.entryCount, sizeof(BC6H_data.entryCount));
+            BC6H_data.d_shape_index = bestShape;
+        } else {
+            if (bestShape != -1) {
+                BC6H_data.d_shape_index = bestShape;
+                memcpy(BC6H_data.shape_indices, BC6H_data.cur_best_shape_indices, sizeof(BC6H_data.shape_indices));
+                memcpy(BC6H_data.partition, BC6H_data.cur_best_partition, sizeof(BC6H_data.partition));
+                memcpy(BC6H_data.fEndPoints, BC6H_data.cur_best_fEndPoints, sizeof(BC6H_data.fEndPoints));
+                memcpy(BC6H_data.entryCount, BC6H_data.cur_best_entryCount, sizeof(BC6H_data.entryCount));
+            }
+        }
+    }
+
+    // Optimize the result for encoding
+    bestError = EncodePattern(BC6H_data, bestError);
+
+    // used for debugging modes, set the value you want to debug with
+    if (BC6H_data.m_mode != 0) {
+        // do final encoding and save to output block
+        SaveDataBlock(BC6H_data, out);
+    } else
+        memcpy(out, Cmp_Red_Block, 16);
+
+    // do final encoding and save to output block
+    // SaveDataBlock(best_BC6H_data,out);
+
+#ifdef DEBUG_PATTERNS
+    if (fi)
+        fclose(fi);
+#endif
+
+    g_block++;
+
+    return (float)bestError;
+}
diff --git a/libkram/compressonator/bc6h/bc6h_encode.h b/libkram/compressonator/bc6h/bc6h_encode.h
new file mode 100644
index 00000000..447480d6
--- /dev/null
+++ b/libkram/compressonator/bc6h/bc6h_encode.h
@@ -0,0 +1,134 @@
+//===============================================================================
+// Copyright (c) 2014-2016  Advanced Micro Devices, Inc. All rights reserved.
+//===============================================================================
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+//////////////////////////////////////////////////////////////////////////////////
+
+#pragma once
+//#ifndef _BC6H_ENCODE_H_
+//#define _BC6H_ENCODE_H_
+
+//#include "compressonator.h"
+#include "bc6h_definitions.h"
+
+#include <float.h>
+
+//#define DEBUG_PATTERNS                // Define if you want to debug pattern matching
+//#define USE_KNOWN_PATTERNS            // Enable this if you want to bipass using user images and use the known 32 BC6H patterns
+
+#ifdef DEBUG_PATTERNS
+#define TOP_OFFSET          0
+#define BASE_OFFSET         0
+#define RANDOM_NOISE_LEVEL 10
+#endif
+
+#define NOISE_BAND_WIDTH 0.2
+#define LEVEL_BAND_GAP   0.2
+
+#define DELTA_UP         0
+#define DELTA_RIGHT      1
+#define DELTA_DOWN       2
+#define DELTA_LEFT       3
+
+struct CMP_BC6H_BLOCK_PARAMETERS {
+    float quality;
+    bool usePatternRec;
+    bool isSigned;
+    DWORD modeMask;
+    float exposure;
+};
+
+class BC6HBlockEncoder {
+  public:
+
+    BC6HBlockEncoder() { }
+    
+    BC6HBlockEncoder(CMP_BC6H_BLOCK_PARAMETERS user_options) {
+        m_quality                 = user_options.quality;
+        m_useMonoShapePatterns    = user_options.usePatternRec;
+        m_isSigned                = user_options.isSigned;
+        m_ModeMask                = user_options.modeMask;
+        m_Exposure                = user_options.exposure;
+        m_bAverageEndPoint        = true;
+        m_DiffLevel               = 0.01f;
+    };
+    
+    ~BC6HBlockEncoder() {};
+
+    float   CompressBlock(float in[MAX_SUBSET_SIZE][MAX_DIMENSION_BIG],BYTE   out[COMPRESSED_BLOCK_SIZE]);
+    void    clampF16Max(float EndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG]);
+    void    AverageEndPoint(float EndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], float iEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], int max_subsets, int mode);
+    void    QuantizeEndPointToF16Prec(float  EndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], int iEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], int max_subsets, int prec);
+    void    SwapIndices(int iEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], int iIndices[3][MAX_SUBSET_SIZE], int  entryCount[MAX_SUBSETS], int max_subsets, int mode, int shape_pattern);
+    bool   TransformEndPoints(AMD_BC6H_Format &BC6H_data, int iEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], int oEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG],int max_subsets, int mode);
+
+  private:
+
+#ifdef DEBUG_PATTERNS
+    float DoPixelNoise();                        // Used for debugging... adds existing shape patterns with random noise
+#endif
+
+    /*
+    Reserved Feature MONOSHAPE_PATTERNS
+    int BC6HBlockEncoder::FindPattern();
+    */
+
+    float    FindBestPattern(AMD_BC6H_Format &BC6H_data,
+                             bool    TwoRegionShapes,
+                             int    shape_pattern);
+
+
+    float    EncodePattern(AMD_BC6H_Format &BC6H_data,
+                           float  error);
+
+    void    SaveCompressedBlockData(AMD_BC6H_Format &BC6H_data,
+                                    int oEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG],
+                                    int iIndices[3][MAX_SUBSET_SIZE],
+                                    int max_subsets,
+                                    int mode);
+
+
+    // ==========================================================
+    // Reserved Feature
+    // ==========================================================
+    bool    m_useMonoShapePatterns;
+
+    // ==========================================================
+    // Bulky temporary data used during compression of a block
+    // ==========================================================
+    int     m_sortedModes[BC6H_MAX_PARTITIONS];
+
+    // This stores the min and max for the components of the block, and the ranges
+    float  m_blockMin[MAX_DIMENSION_BIG];
+    float  m_blockMax[MAX_DIMENSION_BIG];
+    float  m_blockRange[MAX_DIMENSION_BIG];
+    float  m_blockMaxRange;
+
+    // data setup at initialization time
+    float  m_quality;
+    DWORD   m_ModeMask;
+    bool    m_isSigned;
+    float  m_Exposure;
+    bool    m_bAverageEndPoint;         // Enables Averaging Endpoints for low bits modes
+    float   m_DiffLevel;                // Threashhold for Channel diferance to set Averages value of channels on Endpoints
+};
+
+//#endif
diff --git a/libkram/compressonator/bc6h/bc6h_utils.h b/libkram/compressonator/bc6h/bc6h_utils.h
new file mode 100644
index 00000000..918fe569
--- /dev/null
+++ b/libkram/compressonator/bc6h/bc6h_utils.h
@@ -0,0 +1,118 @@
+//===============================================================================
+// Copyright (c) 2014-2016  Advanced Micro Devices, Inc. All rights reserved.
+//===============================================================================
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+// BC6H_utils.h Genenral functions and definitions for use in encoder and decoder
+// Revision
+// 0.1    First implementation
+//////////////////////////////////////////////////////////////////////////////////
+
+#pragma once
+//#ifndef _BC6H_UTILS_H_
+//#define _BC6H_UTILS_H_
+
+#include "bc6h_definitions.h"
+#include <bitset>
+#include <stddef.h>
+
+
+class BitHeader {
+  public:
+    BitHeader(BYTE in[], int sizeinbytes) {
+        m_bits.reset();
+        m_sizeinbytes = sizeinbytes;
+
+        if ((in != NULL) && (sizeinbytes <=16)) {
+            // Init bits set with given data
+            int bitpos = 0;
+            for (int i=0; i<sizeinbytes; i++) {
+                int bit = 1;
+                for (int j=0; j<8; j++) {
+                    m_bits[bitpos] = in[i] & bit?1:0;
+                    bit = bit << 1;
+                    bitpos++;
+                }
+            }
+        }
+    }
+
+    ~BitHeader() {
+    }
+
+    void transferbits(BYTE in[],int sizeinbytes) {
+        if ((sizeinbytes <= m_sizeinbytes) && (in!=NULL)) {
+            // Init bits set with given data
+            memset(in,0,sizeinbytes);
+            int bitpos = 0;
+            for (int i=0; i<sizeinbytes; i++) {
+                int bit = 1;
+                for (int j=0; j<8; j++) {
+                    if (m_bits[bitpos]) in[i]|=bit;
+                    bit = bit << 1;
+                    bitpos++;
+                }
+            }
+        }
+    }
+
+    int getvalue(int start, int bitsize) {
+        int value = 0;
+        int end = start + bitsize - 1;
+        for (; end >= start; end--) {
+            value |= m_bits[end]?1:0;
+            if (end > start) value <<= 1;
+        }
+
+        return value;
+    }
+
+    void setvalue(int start, int bitsize, int value, int maskshift = 0) {
+        int end = start + bitsize - 1;
+        int mask = 0x1 << maskshift;
+        for (; start <= end; start++) {
+            m_bits[start] = (value&mask)?1:0;
+            mask <<= 1;
+        }
+    }
+
+    std::bitset<128> m_bits;        // 16 bytes max
+    int     m_sizeinbytes;
+};
+
+// These don't seem to exist
+//extern void ViewData(unsigned char data[]);
+//
+//extern void    BC6H_WriteBit(BYTE   *base,
+//                             int  offset,
+//                             BYTE   bitVal);
+//
+//extern void BC6H_GetRamp(DWORD endpoint[][MAX_DIMENSION_BIG],
+//                         double ramp[MAX_DIMENSION_BIG][(1<<BC6H_MAX_INDEX_BITS)],
+//                         DWORD clusters[2],
+//                         DWORD componentBits[MAX_DIMENSION_BIG]);
+//
+//extern const double  BC6H_rampLerpWeights[5][1<<BC6H_MAX_INDEX_BITS];
+
+
+
+
+//#endif
+
diff --git a/libkram/compressonator/bc6h/hdr_encode.cpp b/libkram/compressonator/bc6h/hdr_encode.cpp
new file mode 100644
index 00000000..9704945b
--- /dev/null
+++ b/libkram/compressonator/bc6h/hdr_encode.cpp
@@ -0,0 +1,2954 @@
+//===============================================================================
+// Copyright (c) 2007-2017  Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2004-2006 ATI Technologies Inc.
+//===============================================================================
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+//
+//  File Name:   HDR_Encode.cpp
+//  Description: Reserved utils function for HDR process
+//
+//////////////////////////////////////////////////////////////////////////////
+
+
+#include "hdr_encode.h"
+#include <assert.h>
+#include <math.h>
+#include <float.h>
+#include <cstdlib>
+
+namespace HDR_Encode {
+
+#define USE_NEWRAMP
+
+#ifdef USE_RAMPS
+#include <mutex>
+static int g_init_ramps = 0;
+std::mutex mtx;
+#endif
+
+
+
+//==============================================================================================
+// return # of bits needed to store n. handle signed or unsigned cases properly
+inline int NBits(int n, bool bIsSigned) {
+    int nb;
+    if (n == 0) {
+        return 0; // no bits needed for 0, signed or not
+    } else if (n > 0) {
+        for (nb = 0; n; ++nb, n >>= 1);
+        return nb + (bIsSigned ? 1 : 0);
+    } else {
+        assert(bIsSigned);
+        for (nb = 0; n < -1; ++nb, n >>= 1);
+        return nb + 1;
+    }
+}
+
+float lerpf(float a, float b, int i, int denom) {
+    assert(denom == 3 || denom == 7 || denom == 15);
+    assert(i >= 0 && i <= denom);
+
+    int *weights = NULL;
+
+    switch (denom) {
+    case 3:
+        denom *= 5;
+        i *= 5;    // fall through to case 15
+    case 7:
+        weights = g_aWeights3;
+        break;
+    case 15:
+        weights = g_aWeights4;
+        break;
+    default:
+        assert(0);
+    }
+    return (a*weights[denom - i] + b*weights[i]) / 64.0f;
+}
+
+int QuantizeToInt(short value, int prec, bool signedfloat16, float macroUnusedArg(exposure)) {
+    if (prec <= 1) return 0;
+    bool negvalue = false;
+
+    // move data to use extra bits for processing
+    int ivalue = value;
+
+    if (signedfloat16) {
+        if (value < 0) {
+            negvalue = true;
+            value = -value;
+        }
+        prec--;
+    } else {
+        // clamp -ve
+        if (value < 0)
+            value = 0;
+    }
+
+    int iQuantized;
+    int bias = (prec > 10 && prec != 16) ? ((1 << (prec - 11)) - 1) : 0;
+    bias = (prec == 16) ? 15 : bias;
+
+    iQuantized = ((ivalue << prec) + bias) / (F16HMAX + 1);
+
+    return (negvalue ? -iQuantized : iQuantized);
+}
+
+int Unquantize(int comp, unsigned char uBitsPerComp, bool bSigned) {
+    int unq = 0, s = 0;
+    if (bSigned) {
+        if (uBitsPerComp >= 16) {
+            unq = comp;
+        } else {
+            if (comp < 0) {
+                s = 1;
+                comp = -comp;
+            }
+
+            if (comp == 0) unq = 0;
+            else if (comp >= ((1 << (uBitsPerComp - 1)) - 1)) unq = 0x7FFF;
+            else unq = ((comp << 15) + 0x4000) >> (uBitsPerComp - 1);
+
+            if (s) unq = -unq;
+        }
+    } else {
+        if (uBitsPerComp >= 15) unq = comp;
+        else if (comp == 0) unq = 0;
+        else if (comp == ((1 << uBitsPerComp) - 1)) unq = 0xFFFF;
+        else unq = ((comp << 16) + 0x8000) >> uBitsPerComp;
+    }
+
+    return unq;
+}
+
+//==============================================================================================
+int   PARTITIONS[MAX_SUBSETS][MAX_PARTITIONS][MAX_SUBSET_SIZE] = {
+    // Single subset partitions for both BC6H abd BC7
+    {
+        {
+            0, 0, 0, 0,
+            0, 0, 0, 0,
+            0, 0, 0, 0,
+            0, 0, 0, 0,
+        },
+    },
+
+    {
+        {
+            // 0
+            0,0,1,1,
+            0,0,1,1,
+            0,0,1,1,
+            0,0,1,1
+        },
+
+        {
+            // 1
+            0,0,0,1,
+            0,0,0,1,
+            0,0,0,1,
+            0,0,0,1
+        },
+
+        {
+            // 2
+            0,1,1,1,
+            0,1,1,1,
+            0,1,1,1,
+            0,1,1,1
+        },
+
+        {
+            // 3
+            0,0,0,1,
+            0,0,1,1,
+            0,0,1,1,
+            0,1,1,1
+        },
+
+        {
+            // 4
+            0,0,0,0,
+            0,0,0,1,
+            0,0,0,1,
+            0,0,1,1
+        },
+
+        {
+            // 5
+            0,0,1,1,
+            0,1,1,1,
+            0,1,1,1,
+            1,1,1,1
+        },
+
+        {
+            // 6
+            0,0,0,1,
+            0,0,1,1,
+            0,1,1,1,
+            1,1,1,1
+        },
+
+        {
+            // 7
+            0,0,0,0,
+            0,0,0,1,
+            0,0,1,1,
+            0,1,1,1
+        },
+
+        {
+            // 8
+            0,0,0,0,
+            0,0,0,0,
+            0,0,0,1,
+            0,0,1,1
+        },
+
+        {
+            // 9
+            0,0,1,1,
+            0,1,1,1,
+            1,1,1,1,
+            1,1,1,1
+        },
+
+        {
+            // 10
+            0,0,0,0,
+            0,0,0,1,
+            0,1,1,1,
+            1,1,1,1
+        },
+
+        {
+            // 11
+            0,0,0,0,
+            0,0,0,0,
+            0,0,0,1,
+            0,1,1,1
+        },
+
+        {
+            // 12
+            0,0,0,1,
+            0,1,1,1,
+            1,1,1,1,
+            1,1,1,1
+        },
+
+        {
+            // 13
+            0,0,0,0,
+            0,0,0,0,
+            1,1,1,1,
+            1,1,1,1
+        },
+
+        {
+            // 14
+            0,0,0,0,
+            1,1,1,1,
+            1,1,1,1,
+            1,1,1,1
+        },
+
+        {
+            // 15
+            0,0,0,0,
+            0,0,0,0,
+            0,0,0,0,
+            1,1,1,1
+        },
+
+        {
+            // 16
+            0,0,0,0,
+            1,0,0,0,
+            1,1,1,0,
+            1,1,1,1
+        },
+
+        {
+            // 17
+            0,1,1,1,
+            0,0,0,1,
+            0,0,0,0,
+            0,0,0,0
+        },
+
+        {
+            // 18
+            0,0,0,0,
+            0,0,0,0,
+            1,0,0,0,
+            1,1,1,0
+        },
+
+        {
+            // 19
+            0,1,1,1,
+            0,0,1,1,
+            0,0,0,1,
+            0,0,0,0
+        },
+
+        {
+            // 20
+            0,0,1,1,
+            0,0,0,1,
+            0,0,0,0,
+            0,0,0,0
+        },
+
+        {
+            // 21
+            0,0,0,0,
+            1,0,0,0,
+            1,1,0,0,
+            1,1,1,0
+        },
+
+        {
+            // 22
+            0,0,0,0,
+            0,0,0,0,
+            1,0,0,0,
+            1,1,0,0
+        },
+
+        {
+            // 23
+            0,1,1,1,
+            0,0,1,1,
+            0,0,1,1,
+            0,0,0,1
+        },
+
+        {
+            // 24
+            0,0,1,1,
+            0,0,0,1,
+            0,0,0,1,
+            0,0,0,0
+        },
+
+        {
+            // 25
+            0,0,0,0,
+            1,0,0,0,
+            1,0,0,0,
+            1,1,0,0
+        },
+
+        {
+            // 26
+            0,1,1,0,
+            0,1,1,0,
+            0,1,1,0,
+            0,1,1,0
+        },
+
+        {
+            // 27
+            0,0,1,1,
+            0,1,1,0,
+            0,1,1,0,
+            1,1,0,0
+        },
+
+        {
+            // 28
+            0,0,0,1,
+            0,1,1,1,
+            1,1,1,0,
+            1,0,0,0
+        },
+
+        {
+            // 29
+            0,0,0,0,
+            1,1,1,1,
+            1,1,1,1,
+            0,0,0,0
+        },
+
+        {
+            // 30
+            0,1,1,1,
+            0,0,0,1,
+            1,0,0,0,
+            1,1,1,0
+        },
+
+        {
+            // 31
+            0,0,1,1,
+            1,0,0,1,
+            1,0,0,1,
+            1,1,0,0
+        },
+        // -----------  BC7 only shapes from here on -------------
+        {
+            // 32
+            0,1,0,1,
+            0,1,0,1,
+            0,1,0,1,
+            0,1,0,1
+        },
+
+        {
+            // 33
+            0,0,0,0,
+            1,1,1,1,
+            0,0,0,0,
+            1,1,1,1
+        },
+
+        {
+            // 34
+            0,1,0,1,
+            1,0,1,0,
+            0,1,0,1,
+            1,0,1,0
+        },
+
+        {
+            // 35
+            0,0,1,1,
+            0,0,1,1,
+            1,1,0,0,
+            1,1,0,0
+        },
+
+        {
+            // 36
+            0,0,1,1,
+            1,1,0,0,
+            0,0,1,1,
+            1,1,0,0
+        },
+
+        {
+            // 37
+            0,1,0,1,
+            0,1,0,1,
+            1,0,1,0,
+            1,0,1,0
+        },
+
+        {
+            // 38
+            0,1,1,0,
+            1,0,0,1,
+            0,1,1,0,
+            1,0,0,1
+        },
+
+        {
+            // 39
+            0,1,0,1,
+            1,0,1,0,
+            1,0,1,0,
+            0,1,0,1
+        },
+
+        {
+            // 40
+            0,1,1,1,
+            0,0,1,1,
+            1,1,0,0,
+            1,1,1,0
+        },
+
+        {
+            // 41
+            0,0,0,1,
+            0,0,1,1,
+            1,1,0,0,
+            1,0,0,0
+        },
+
+        {
+            // 42
+            0,0,1,1,
+            0,0,1,0,
+            0,1,0,0,
+            1,1,0,0
+        },
+
+        {
+            // 43
+            0,0,1,1,
+            1,0,1,1,
+            1,1,0,1,
+            1,1,0,0
+        },
+
+        {
+            // 44
+            0,1,1,0,
+            1,0,0,1,
+            1,0,0,1,
+            0,1,1,0
+        },
+
+        {
+            // 45
+            0,0,1,1,
+            1,1,0,0,
+            1,1,0,0,
+            0,0,1,1
+        },
+
+        {
+            // 46
+            0,1,1,0,
+            0,1,1,0,
+            1,0,0,1,
+            1,0,0,1
+        },
+
+        {
+            // 47
+            0,0,0,0,
+            0,1,1,0,
+            0,1,1,0,
+            0,0,0,0
+        },
+
+        {
+            // 48
+            0,1,0,0,
+            1,1,1,0,
+            0,1,0,0,
+            0,0,0,0
+        },
+
+        {
+            // 49
+            0,0,1,0,
+            0,1,1,1,
+            0,0,1,0,
+            0,0,0,0
+        },
+
+        {
+            // 50
+            0,0,0,0,
+            0,0,1,0,
+            0,1,1,1,
+            0,0,1,0
+        },
+
+        {
+            // 51
+            0,0,0,0,
+            0,1,0,0,
+            1,1,1,0,
+            0,1,0,0
+        },
+
+        {
+            // 52
+            0,1,1,0,
+            1,1,0,0,
+            1,0,0,1,
+            0,0,1,1
+        },
+
+        {
+            // 53
+            0,0,1,1,
+            0,1,1,0,
+            1,1,0,0,
+            1,0,0,1
+        },
+
+        {
+            // 54
+            0,1,1,0,
+            0,0,1,1,
+            1,0,0,1,
+            1,1,0,0
+        },
+
+        {
+            // 55
+            0,0,1,1,
+            1,0,0,1,
+            1,1,0,0,
+            0,1,1,0
+        },
+
+        {
+            // 56
+            0,1,1,0,
+            1,1,0,0,
+            1,1,0,0,
+            1,0,0,1
+        },
+
+        {
+            // 57
+            0,1,1,0,
+            0,0,1,1,
+            0,0,1,1,
+            1,0,0,1
+        },
+
+        {
+            // 58
+            0,1,1,1,
+            1,1,1,0,
+            1,0,0,0,
+            0,0,0,1
+        },
+
+        {
+            // 59
+            0,0,0,1,
+            1,0,0,0,
+            1,1,1,0,
+            0,1,1,1
+        },
+
+        {
+            // 60
+            0,0,0,0,
+            1,1,1,1,
+            0,0,1,1,
+            0,0,1,1
+        },
+
+        {
+            // 61
+            0,0,1,1,
+            0,0,1,1,
+            1,1,1,1,
+            0,0,0,0
+        },
+
+        {
+            // 62
+            0,0,1,0,
+            0,0,1,0,
+            1,1,1,0,
+            1,1,1,0
+        },
+
+        {
+            // 63
+            0,1,0,0,
+            0,1,0,0,
+            0,1,1,1,
+            0,1,1,1
+        },
+    },
+
+
+    // Table.P3 - only for BC7
+
+    {
+
+        {
+            0,0,1,1,
+            0,0,1,1,
+            0,2,2,1,
+            2,2,2,2
+        },
+
+        {
+            0,0,0,1,
+            0,0,1,1,
+            2,2,1,1,
+            2,2,2,1
+        },
+
+        {
+            0,0,0,0,
+            2,0,0,1,
+            2,2,1,1,
+            2,2,1,1
+        },
+
+        {
+            0,2,2,2,
+            0,0,2,2,
+            0,0,1,1,
+            0,1,1,1
+        },
+
+        {
+            0,0,0,0,
+            0,0,0,0,
+            1,1,2,2,
+            1,1,2,2
+        },
+
+        {
+            0,0,1,1,
+            0,0,1,1,
+            0,0,2,2,
+            0,0,2,2
+        },
+
+        {
+            0,0,2,2,
+            0,0,2,2,
+            1,1,1,1,
+            1,1,1,1
+        },
+
+        {
+            0,0,1,1,
+            0,0,1,1,
+            2,2,1,1,
+            2,2,1,1
+        },
+
+        {
+            0,0,0,0,
+            0,0,0,0,
+            1,1,1,1,
+            2,2,2,2
+        },
+
+        {
+            0,0,0,0,
+            1,1,1,1,
+            1,1,1,1,
+            2,2,2,2
+        },
+
+        {
+            0,0,0,0,
+            1,1,1,1,
+            2,2,2,2,
+            2,2,2,2
+        },
+
+        {
+            0,0,1,2,
+            0,0,1,2,
+            0,0,1,2,
+            0,0,1,2
+        },
+
+        {
+            0,1,1,2,
+            0,1,1,2,
+            0,1,1,2,
+            0,1,1,2
+        },
+
+        {
+            0,1,2,2,
+            0,1,2,2,
+            0,1,2,2,
+            0,1,2,2
+        },
+
+        {
+            0,0,1,1,
+            0,1,1,2,
+            1,1,2,2,
+            1,2,2,2
+        },
+
+        {
+            0,0,1,1,
+            2,0,0,1,
+            2,2,0,0,
+            2,2,2,0
+        },
+
+        {
+            0,0,0,1,
+            0,0,1,1,
+            0,1,1,2,
+            1,1,2,2
+        },
+
+        {
+            0,1,1,1,
+            0,0,1,1,
+            2,0,0,1,
+            2,2,0,0
+        },
+
+        {
+            0,0,0,0,
+            1,1,2,2,
+            1,1,2,2,
+            1,1,2,2
+        },
+
+        {
+            0,0,2,2,
+            0,0,2,2,
+            0,0,2,2,
+            1,1,1,1
+        },
+
+        {
+            0,1,1,1,
+            0,1,1,1,
+            0,2,2,2,
+            0,2,2,2
+        },
+
+        {
+            0,0,0,1,
+            0,0,0,1,
+            2,2,2,1,
+            2,2,2,1
+        },
+
+        {
+            0,0,0,0,
+            0,0,1,1,
+            0,1,2,2,
+            0,1,2,2
+        },
+
+        {
+            0,0,0,0,
+            1,1,0,0,
+            2,2,1,0,
+            2,2,1,0
+        },
+
+        {
+            0,1,2,2,
+            0,1,2,2,
+            0,0,1,1,
+            0,0,0,0
+        },
+
+        {
+            0,0,1,2,
+            0,0,1,2,
+            1,1,2,2,
+            2,2,2,2
+        },
+
+        {
+            0,1,1,0,
+            1,2,2,1,
+            1,2,2,1,
+            0,1,1,0
+        },
+
+        {
+            0,0,0,0,
+            0,1,1,0,
+            1,2,2,1,
+            1,2,2,1
+        },
+
+        {
+            0,0,2,2,
+            1,1,0,2,
+            1,1,0,2,
+            0,0,2,2
+        },
+
+        {
+            0,1,1,0,
+            0,1,1,0,
+            2,0,0,2,
+            2,2,2,2
+        },
+
+        {
+            0,0,1,1,
+            0,1,2,2,
+            0,1,2,2,
+            0,0,1,1
+        },
+
+        {
+            0,0,0,0,
+            2,0,0,0,
+            2,2,1,1,
+            2,2,2,1
+        },
+
+        {
+            0,0,0,0,
+            0,0,0,2,
+            1,1,2,2,
+            1,2,2,2
+        },
+
+        {
+            0,2,2,2,
+            0,0,2,2,
+            0,0,1,2,
+            0,0,1,1
+        },
+
+        {
+            0,0,1,1,
+            0,0,1,2,
+            0,0,2,2,
+            0,2,2,2
+        },
+
+        {
+            0,1,2,0,
+            0,1,2,0,
+            0,1,2,0,
+            0,1,2,0
+        },
+
+        {
+            0,0,0,0,
+            1,1,1,1,
+            2,2,2,2,
+            0,0,0,0
+        },
+
+        {
+            0,1,2,0,
+            1,2,0,1,
+            2,0,1,2,
+            0,1,2,0
+        },
+
+        {
+            0,1,2,0,
+            2,0,1,2,
+            1,2,0,1,
+            0,1,2,0
+        },
+
+        {
+            0,0,1,1,
+            2,2,0,0,
+            1,1,2,2,
+            0,0,1,1
+        },
+
+        {
+            0,0,1,1,
+            1,1,2,2,
+            2,2,0,0,
+            0,0,1,1
+        },
+
+        {
+            0,1,0,1,
+            0,1,0,1,
+            2,2,2,2,
+            2,2,2,2
+        },
+
+        {
+            0,0,0,0,
+            0,0,0,0,
+            2,1,2,1,
+            2,1,2,1
+        },
+
+        {
+            0,0,2,2,
+            1,1,2,2,
+            0,0,2,2,
+            1,1,2,2
+        },
+
+        {
+            0,0,2,2,
+            0,0,1,1,
+            0,0,2,2,
+            0,0,1,1
+        },
+
+        {
+            0,2,2,0,
+            1,2,2,1,
+            0,2,2,0,
+            1,2,2,1
+        },
+
+        {
+            0,1,0,1,
+            2,2,2,2,
+            2,2,2,2,
+            0,1,0,1
+        },
+
+        {
+            0,0,0,0,
+            2,1,2,1,
+            2,1,2,1,
+            2,1,2,1
+        },
+
+        {
+            0,1,0,1,
+            0,1,0,1,
+            0,1,0,1,
+            2,2,2,2
+        },
+
+        {
+            0,2,2,2,
+            0,1,1,1,
+            0,2,2,2,
+            0,1,1,1
+        },
+
+        {
+            0,0,0,2,
+            1,1,1,2,
+            0,0,0,2,
+            1,1,1,2
+        },
+
+        {
+            0,0,0,0,
+            2,1,1,2,
+            2,1,1,2,
+            2,1,1,2
+        },
+
+        {
+            0,2,2,2,
+            0,1,1,1,
+            0,1,1,1,
+            0,2,2,2
+        },
+
+        {
+            0,0,0,2,
+            1,1,1,2,
+            1,1,1,2,
+            0,0,0,2
+        },
+
+        {
+            0,1,1,0,
+            0,1,1,0,
+            0,1,1,0,
+            2,2,2,2
+        },
+
+        {
+            0,0,0,0,
+            0,0,0,0,
+            2,1,1,2,
+            2,1,1,2
+        },
+
+        {
+            0,1,1,0,
+            0,1,1,0,
+            2,2,2,2,
+            2,2,2,2
+        },
+
+        {
+            0,0,2,2,
+            0,0,1,1,
+            0,0,1,1,
+            0,0,2,2
+        },
+
+        {
+            0,0,2,2,
+            1,1,2,2,
+            1,1,2,2,
+            0,0,2,2
+        },
+
+        {
+            0,0,0,0,
+            0,0,0,0,
+            0,0,0,0,
+            2,1,1,2
+        },
+
+        {
+            0,0,0,2,
+            0,0,0,1,
+            0,0,0,2,
+            0,0,0,1
+        },
+
+        {
+            0,2,2,2,
+            1,2,2,2,
+            0,2,2,2,
+            1,2,2,2
+        },
+
+        {
+            0,1,0,1,
+            2,2,2,2,
+            2,2,2,2,
+            2,2,2,2
+        },
+
+        {
+            0,1,1,1,
+            2,0,1,1,
+            2,2,0,1,
+            2,2,2,0
+        },
+    },
+};
+void    Partition(
+    int       shape,
+    float    in[][MAX_DIMENSION_BIG],
+    float    subsets[MAX_SUBSETS][MAX_SUBSET_SIZE][MAX_DIMENSION_BIG],
+    int       count[MAX_SUBSETS],
+    int       ShapeTableToUse,
+    int       dimension) {
+    int   i, j;
+    int   *table = NULL;
+
+    // Dont use memset: this is better for now
+    for (i = 0; i<MAX_SUBSETS; i++) count[i] = 0;
+
+    switch (ShapeTableToUse) {
+    case    0:
+    case    1:
+        table = &(PARTITIONS[0][0][0]);
+        break;
+    case    2:
+        table = &(PARTITIONS[1][shape][0]);
+        break;
+    default:
+        break;
+    }
+
+    // Nothing to do!!: Must indicate an error to user
+    if (table == NULL) return;
+
+    for (i = 0; i<MAX_SUBSET_SIZE; i++) {
+        int   subset = table[i];
+        for (j = 0; j<dimension; j++) {
+            subsets[subset][count[subset]][j] = in[i][j];
+        }
+        if (dimension < MAX_DIMENSION_BIG) {
+            subsets[subset][count[subset]][j] = 0.0;
+        }
+        count[subset]++;
+    }
+}
+
+//=================================================================================================
+
+void GetEndPoints(float EndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], float outB[MAX_SUBSETS][MAX_SUBSET_SIZE][MAX_DIMENSION_BIG], int max_subsets, int entryCount[MAX_SUBSETS]) {
+    // Should have some sort of error notification!
+    if (max_subsets > MAX_SUBSETS) return;
+
+    // Save Min and Max OutB points as EndPoints
+    for (int subset = 0; subset<max_subsets; subset++) {
+        // We now have points on direction vector(s)
+        // find the min and max points
+        float min = FLT_MAX;
+        float max = 0;
+        float val;
+        int mini = 0;
+        int maxi = 0;
+
+
+        for (int i = 0; i < entryCount[subset]; i++) {
+            val = outB[subset][i][0] + outB[subset][i][1] + outB[subset][i][2];
+            if (val < min) {
+                min = val;
+                mini = i;
+            }
+            if (val > max) {
+                max = val;
+                maxi = i;
+            }
+        }
+
+        // Is round best for this !
+        for (int c = 0; c < MAX_DIMENSION_BIG; c++) {
+            EndPoints[subset][0][c] = outB[subset][mini][c];
+        }
+
+        for (int c = 0; c < MAX_DIMENSION_BIG; c++) {
+            EndPoints[subset][1][c] = outB[subset][maxi][c];
+        }
+    }
+}
+
+void covariance_d(float data[][MAX_DIMENSION_BIG], int numEntries, float cov[MAX_DIMENSION_BIG][MAX_DIMENSION_BIG], int dimension) {
+    int i, j, k;
+
+    for (i = 0; i<dimension; i++)
+        for (j = 0; j <= i; j++) {
+            cov[i][j] = 0;
+            for (k = 0; k<numEntries; k++)
+                cov[i][j] += data[k][i] * data[k][j];
+        }
+
+    for (i = 0; i<dimension; i++)
+        for (j = i + 1; j<dimension; j++)
+            cov[i][j] = cov[j][i];
+}
+
+void centerInPlace_d(float data[][MAX_DIMENSION_BIG], int numEntries, float mean[MAX_DIMENSION_BIG], int dimension) {
+    int i, k;
+
+    for (i = 0; i<dimension; i++) {
+        mean[i] = 0;
+        for (k = 0; k<numEntries; k++)
+            mean[i] += data[k][i];
+    }
+
+    if (!numEntries)
+        return;
+
+    for (i = 0; i<dimension; i++) {
+        mean[i] /= (float)numEntries;
+        for (k = 0; k<numEntries; k++)
+            data[k][i] -= mean[i];
+    }
+}
+
+void eigenVector_d(float cov[MAX_DIMENSION_BIG][MAX_DIMENSION_BIG], float vector[MAX_DIMENSION_BIG], int dimension) {
+    // calculate an eigenvecto corresponding to a biggest eigenvalue
+    // will work for non-zero non-negative matricies only
+
+#define EV_ITERATION_NUMBER 20
+#define EV_SLACK            2        /* additive for exp base 2)*/
+
+
+    int i, j, k, l, m, n, p, q;
+    float c[2][MAX_DIMENSION_BIG][MAX_DIMENSION_BIG];
+    float maxDiag;
+
+    for (i = 0; i<dimension; i++)
+        for (j = 0; j<dimension; j++)
+            c[0][i][j] = cov[i][j];
+
+    p = (int)floorf(logf((HDR_FLT_MAX_EXP - EV_SLACK) / ceilf(logf((float)dimension) / logf(2.0f))) / logf(2.0f));
+
+    //assert(p>0);
+
+    p = p >0 ? p : 1;
+
+    q = (EV_ITERATION_NUMBER + p - 1) / p;
+
+    l = 0;
+
+    for (n = 0; n<q; n++) {
+        maxDiag = 0;
+
+        for (i = 0; i<dimension; i++)
+            maxDiag = c[l][i][i] > maxDiag ? c[l][i][i] : maxDiag;
+
+        if (maxDiag <= 0) {
+            return;
+        }
+
+        //assert(maxDiag >0);
+
+        for (i = 0; i<dimension; i++)
+            for (j = 0; j<dimension; j++)
+                c[l][i][j] /= maxDiag;
+
+        for (m = 0; m<p; m++) {
+            for (i = 0; i<dimension; i++)
+                for (j = 0; j<dimension; j++) {
+                    float temp = 0;
+                    for (k = 0; k<dimension; k++) {
+                        // Notes:
+                        // This is the most consuming portion of the code and needs optimizing for perfromance
+                        temp += c[l][i][k] * c[l][k][j];
+                    }
+                    c[1 - l][i][j] = temp;
+                }
+            l = 1 - l;
+        }
+    }
+
+    maxDiag = 0;
+    k = 0;
+
+    for (i = 0; i<dimension; i++) {
+        k = c[l][i][i] > maxDiag ? i : k;
+        maxDiag = c[l][i][i] > maxDiag ? c[l][i][i] : maxDiag;
+    }
+    float t;
+    t = 0;
+    for (i = 0; i<dimension; i++) {
+        t += c[l][k][i] * c[l][k][i];
+        vector[i] = c[l][k][i];
+    }
+    // normalization is really optional
+    t = sqrtf(t);
+    //assert(t>0);
+
+    if (t <= 0) {
+        return;
+    }
+    for (i = 0; i<dimension; i++)
+        vector[i] /= t;
+}
+
+void project_d(float data[][MAX_DIMENSION_BIG], int numEntries, float vector[MAX_DIMENSION_BIG], float projection[MAX_ENTRIES], int dimension) {
+    // assume that vector is normalized already
+    int i, k;
+
+    for (k = 0; k<numEntries; k++) {
+        projection[k] = 0;
+        for (i = 0; i<dimension; i++) {
+            projection[k] += data[k][i] * vector[i];
+        }
+    }
+}
+
+typedef struct {
+    float d;
+    int i;
+} a;
+
+inline int a_compare(const void *arg1, const void *arg2) {
+    if (((a*)arg1)->d - ((a*)arg2)->d > 0) return 1;
+    if (((a*)arg1)->d - ((a*)arg2)->d < 0) return -1;
+    return 0;
+};
+
+void sortProjection(float projection[MAX_ENTRIES], int order[MAX_ENTRIES], int numEntries) {
+    int i;
+    a what[MAX_ENTRIES + MAX_PARTITIONS_TABLE];
+
+    for (i = 0; i < numEntries; i++)
+        what[what[i].i = i].d = projection[i];
+
+    qsort((void*)&what, numEntries, sizeof(a), a_compare);
+
+    for (i = 0; i < numEntries; i++)
+        order[i] = what[i].i;
+};
+
+float totalError_d(float data[MAX_ENTRIES][MAX_DIMENSION_BIG], float data2[MAX_ENTRIES][MAX_DIMENSION_BIG], int numEntries, int dimension) {
+    int i, j;
+    float t = 0;
+    for (i = 0; i<numEntries; i++)
+        for (j = 0; j<dimension; j++)
+            t += (data[i][j] - data2[i][j])*(data[i][j] - data2[i][j]);
+
+    return t;
+};
+
+// input:
+//
+// v_  points, might be uncentered
+// k - number of points in the ramp
+// n - number of points in v_
+//
+// output:
+// index, uncentered, in the range 0..k-1
+//
+void quant_AnD_Shell(float* v_, int k, int n, int *idx) {
+#define MAX_BLOCK MAX_ENTRIES
+    int i, j;
+    float v[MAX_BLOCK];
+    float z[MAX_BLOCK];
+    a d[MAX_BLOCK];
+    float l;
+    float mm;
+    float r = 0;
+    int mi;
+
+    //assert((v_ != NULL) && (n>1) && (k>1));
+
+    float m, M, s, dm = 0.;
+    m = M = v_[0];
+
+    for (i = 1; i < n; i++) {
+        m = m < v_[i] ? m : v_[i];
+        M = M > v_[i] ? M : v_[i];
+    }
+    if (M == m) {
+        for (i = 0; i < n; i++)
+            idx[i] = 0;
+        return;
+    }
+
+    //assert(M - m >0);
+    s = (k - 1) / (M - m);
+    for (i = 0; i < n; i++) {
+        v[i] = v_[i] * s;
+
+        idx[i] = (int)(z[i] = (v[i] + 0.5f /* stabilizer*/ - m *s));  //floorf(v[i] + 0.5f /* stabilizer*/ - m *s));
+
+        d[i].d = v[i] - z[i] - m *s;
+        d[i].i = i;
+        dm += d[i].d;
+        r += d[i].d*d[i].d;
+    }
+    if (n*r - dm*dm >= (float)(n - 1) / 4 /*slack*/ / 2) {
+
+        dm /= (float)n;
+
+        for (i = 0; i < n; i++)
+            d[i].d -= dm;
+
+        qsort((void*)&d, n, sizeof(a), a_compare);
+
+        // got into fundamental simplex
+        // move coordinate system origin to its center
+        for (i = 0; i < n; i++)
+            d[i].d -= (2.0f*(float)i + 1.0f - (float)n) / 2.0f / (float)n;
+
+        mm = l = 0.;
+        j = -1;
+        for (i = 0; i < n; i++) {
+            l += d[i].d;
+            if (l < mm) {
+                mm = l;
+                j = i;
+            }
+        }
+
+        // position which should be in 0
+        j = ++j % n;
+
+        for (i = j; i < n; i++)
+            idx[d[i].i]++;
+    }
+    // get rid of an offset in idx
+    mi = idx[0];
+    for (i = 1; i < n; i++)
+        mi = mi < idx[i] ? mi : idx[i];
+
+    for (i = 0; i < n; i++)
+        idx[i] -= mi;
+}
+
+float optQuantAnD_d(
+    float data[MAX_ENTRIES][MAX_DIMENSION_BIG],
+    int numEntries,
+    int numClusters,
+    int index[MAX_ENTRIES],
+    float out[MAX_ENTRIES][MAX_DIMENSION_BIG],
+    float direction[MAX_DIMENSION_BIG], float *step,
+    int dimension,
+    float quality
+) {
+    int index_[MAX_ENTRIES];
+
+    int maxTry = (int)(MAX_TRY * quality);
+    int try_two = 50;
+
+    int i, j, k;
+    float t, s;
+
+    float centered[MAX_ENTRIES][MAX_DIMENSION_BIG];
+
+    float mean[MAX_DIMENSION_BIG];
+
+    float cov[MAX_DIMENSION_BIG][MAX_DIMENSION_BIG];
+
+    float projected[MAX_ENTRIES];
+
+    int order_[MAX_ENTRIES];
+
+
+    for (i = 0; i<numEntries; i++)
+        for (j = 0; j<dimension; j++)
+            centered[i][j] = data[i][j];
+
+    centerInPlace_d(centered, numEntries, mean, dimension);
+    covariance_d(centered, numEntries, cov, dimension);
+
+    // check if they all are the same
+
+    t = 0;
+    for (j = 0; j<dimension; j++)
+        t += cov[j][j];
+
+    if (numEntries == 0) {
+        for (i = 0; i<numEntries; i++) {
+            index[i] = 0;
+            for (j = 0; j<dimension; j++)
+                out[i][j] = mean[j];
+        }
+        return 0.;
+    }
+
+    eigenVector_d(cov, direction, dimension);
+    project_d(centered, numEntries, direction, projected, dimension);
+
+    for (i = 0; i<maxTry; i++) {
+        int done = 0;
+
+        if (i) {
+            do {
+                float q;
+                q = s = t = 0;
+
+                for (k = 0; k<numEntries; k++) {
+                    s += index[k];
+                    t += index[k] * index[k];
+                }
+
+                for (j = 0; j<dimension; j++) {
+                    direction[j] = 0;
+                    for (k = 0; k<numEntries; k++)
+                        direction[j] += centered[k][j] * index[k];
+                    q += direction[j] * direction[j];
+
+                }
+
+                s /= (float)numEntries;
+                t = t - s * s * (float)numEntries;
+                //assert(t != 0);
+                t = (t == 0.0f ? 0.0f : 1.0f / t);
+                // We need to requantize
+
+                q = sqrtf(q);
+                t *= q;
+
+                if (q != 0)
+                    for (j = 0; j<dimension; j++)
+                        direction[j] /= q;
+
+                // direction normalized
+
+                project_d(centered, numEntries, direction, projected, dimension);
+                sortProjection(projected, order_, numEntries);
+
+                int index__[MAX_ENTRIES];
+
+                // it's projected and centered; cluster centers are (index[i]-s)*t (*dir)
+                k = 0;
+                for (j = 0; j < numEntries; j++) {
+                    while (projected[order_[j]] >(k + 0.5 - s)*t  && k < numClusters - 1)
+                        k++;
+                    index__[order_[j]] = k;
+                }
+                done = 1;
+                for (j = 0; j < numEntries; j++) {
+                    done = (done && (index__[j] == index[j]));
+                    index[j] = index__[j];
+                }
+            } while (!done && try_two--);
+
+            if (i == 1)
+                for (j = 0; j < numEntries; j++)
+                    index_[j] = index[j];
+            else {
+                done = 1;
+                for (j = 0; j < numEntries; j++) {
+                    done = (done && (index_[j] == index[j]));
+                    index_[j] = index_[j];
+                }
+                if (done)
+                    break;
+
+            }
+        }
+
+        quant_AnD_Shell(projected, numClusters, numEntries, index);
+    }
+    s = t = 0;
+
+    float q = 0;
+
+    for (k = 0; k<numEntries; k++) {
+        s += index[k];
+        t += index[k] * index[k];
+    }
+
+    for (j = 0; j<dimension; j++) {
+        direction[j] = 0;
+        for (k = 0; k<numEntries; k++)
+            direction[j] += centered[k][j] * index[k];
+        q += direction[j] * direction[j];
+    }
+
+    s /= (float)numEntries;
+
+    t = t - s * s * (float)numEntries;
+
+    //assert(t != 0);
+
+    t = (t == 0.0 ? 0.0f : 1.0f / t);
+
+    for (i = 0; i<numEntries; i++)
+        for (j = 0; j<dimension; j++)
+            out[i][j] = mean[j] + direction[j] * t*(index[i] - s);
+
+    // normalize direction for output
+
+    q = sqrtf(q);
+    *step = t*q;
+    for (j = 0; j<dimension; j++)
+        direction[j] /= q;
+
+    return totalError_d(data, out, numEntries, dimension);
+}
+
+
+//=====================================================================================================================
+#define LOG_CL_BASE         2
+#define BIT_BASE            5
+#define LOG_CL_RANGE        5
+#define BIT_RANGE           9
+#define MAX_CLUSTERS_BIG    16
+#define BTT(bits)           (bits-BIT_BASE)
+#define CLT(cl)             (cl-LOG_CL_BASE)
+
+const float rampLerpWeights[5][16] = {
+    { 0.0 }, // 0 bit index
+    { 0.0, 1.0 }, // 1 bit index
+    { 0.0, 21.0 / 64.0, 43.0 / 64.0, 1.0 }, // 2 bit index
+    { 0.0, 9.0 / 64.0, 18.0 / 64.0, 27.0 / 64.0, 37.0 / 64.0, 46.0 / 64.0, 55.0 / 64.0, 1.0 }, // 3 bit index
+    {
+        0.0, 4.0 / 64.0, 9.0 / 64.0, 13.0 / 64.0, 17.0 / 64.0, 21.0 / 64.0, 26.0 / 64.0, 30.0 / 64.0,
+        34.0 / 64.0, 38.0 / 64.0, 43.0 / 64.0, 47.0 / 64.0, 51.0 / 64.0, 55.0 / 64.0, 60.0 / 64.0, 1.0
+    } // 4 bit index
+};
+
+float rampf(int clog, int macroUnusedArg(bits), float p1, float p2, int indexPos) {
+
+    // (clog+ LOG_CL_BASE) starts from 2 to 4
+    return  (float)p1 + rampLerpWeights[clog + LOG_CL_BASE][indexPos] * (p2 - p1);
+}
+
+int all_same_d(float d[][MAX_DIMENSION_BIG], int n, int dimension) {
+    int i, j;
+    int same = 1;
+    for (i = 1; i< n; i++)
+        for (j = 0; j< dimension; j++)
+            same = same && (d[0][j] == d[i][j]);
+
+    return(same);
+}
+
+// return the max index from a set of indexes
+int max_index(int a[], int n) {
+    int i, m = a[0];
+    for (i = 0; i< n; i++)
+        m = m > a[i] ? m : a[i];
+    return (m);
+}
+
+int cluster_mean_d_d(float d[MAX_ENTRIES][MAX_DIMENSION_BIG], float mean[MAX_ENTRIES][MAX_DIMENSION_BIG], int index[], int i_comp[], int i_cnt[], int n, int dimension) {
+    // unused index values are underfined
+    int i, j, k;
+    //assert(n!=0);
+
+    for (i = 0; i< n; i++)
+        for (j = 0; j< dimension; j++) {
+            // assert(index[i]<MAX_CLUSTERS_BIG);
+            mean[index[i]][j] = 0;
+            i_cnt[index[i]] = 0;
+        }
+    k = 0;
+    for (i = 0; i< n; i++) {
+        for (j = 0; j< dimension; j++)
+            mean[index[i]][j] += d[i][j];
+        if (i_cnt[index[i]] == 0)
+            i_comp[k++] = index[i];
+        i_cnt[index[i]]++;
+    }
+
+    for (i = 0; i< k; i++)
+        for (j = 0; j< dimension; j++)
+            mean[i_comp[i]][j] /= (float)i_cnt[i_comp[i]];
+    return k;
+}
+
+void mean_d_d(float d[][MAX_DIMENSION_BIG], float mean[MAX_DIMENSION_BIG], int n, int dimension) {
+    int i, j;
+    for (j = 0; j< dimension; j++)
+        mean[j] = 0;
+    for (i = 0; i< n; i++)
+        for (j = 0; j< dimension; j++)
+            mean[j] += d[i][j];
+    for (j = 0; j< dimension; j++)
+        mean[j] /= (float)n;
+}
+
+void index_collapse_kernel(int index[], int numEntries) {
+    int k;
+    int d, D;
+    int mi;
+    int Mi;
+    if (numEntries == 0)
+        return;
+
+    mi = Mi = index[0];
+    for (k = 1; k<numEntries; k++) {
+        mi = mi < index[k] ? mi : index[k];
+        Mi = Mi > index[k] ? Mi : index[k];
+    }
+    D = 1;
+    for (d = 2; d <= Mi - mi; d++) {
+
+        for (k = 0; k<numEntries; k++)
+            if ((index[k] - mi) % d != 0)
+                break;
+        if (k >= numEntries)
+            D = d;
+    }
+    for (k = 0; k<numEntries; k++)
+        index[k] = (index[k] - mi) / D;
+}
+
+
+//========================================================================================================================
+//-------------------------------------------------------------------------------------------------------------------------
+
+int max_i(int a[], int n) {
+    int i, m = a[0];
+    for (i = 0; i< n; i++)
+        m = m > a[i] ? m : a[i];
+    return (m);
+}
+
+int npv_nd[2][2 * MAX_DIMENSION_BIG] = {
+    { 1,2,4,8,16,32,0,0 }, //dimension = 3
+    { 1,2,4,0,0,0,0,0 }    //dimension = 4
+};
+
+short par_vectors_nd[2][8][128][2][MAX_DIMENSION_BIG] = {
+    {
+        // Dimension = 3
+        {
+            { { 0,0,0,0 },{ 0,0,0,0 } },
+            { { 0,0,0,0 },{ 0,0,0,0 } }
+        },
+
+        // 3*n+1    BCC          3*n+1        Cartesian 3*n            //same parity
+        {
+            // SAME_PAR
+            { { 0,0,0 },{ 0,0,0 } },
+            { { 1,1,1 },{ 1,1,1 } }
+        },
+        // 3*n+2    BCC          3*n+1        BCC          3*n+1
+        {
+            // BCC
+            { { 0,0,0 },{ 0,0,0 } },
+            { { 0,0,0 },{ 1,1,1 } },
+            { { 1,1,1 },{ 0,0,0 } },
+            { { 1,1,1 },{ 1,1,1 } }
+        },
+        // 3*n+3    FCC                    ???                        // ??????
+        // BCC with FCC same or inverted, symmetric
+        {
+            // BCC_SAME_FCC
+            { { 0,0,0 },{ 0,0,0 } },
+            { { 1,1,0 },{ 1,1,0 } },
+            { { 1,0,1 },{ 1,0,1 } },
+            { { 0,1,1 },{ 0,1,1 } },
+
+            { { 0,0,0 },{ 1,1,1 } },
+            { { 1,1,1 },{ 0,0,0 } },
+            { { 0,1,0 },{ 0,1,0 } },  // ??
+            { { 1,1,1 },{ 1,1,1 } },
+
+        },
+        // 3*n+4    FCC          3*n+2        FCC          3*n+2
+        {
+
+            { { 0,0,0 },{ 0,0,0 } },
+            { { 1,1,0 },{ 0,0,0 } },
+            { { 1,0,1 },{ 0,0,0 } },
+            { { 0,1,1 },{ 0,0,0 } },
+
+            { { 0,0,0 },{ 1,1,0 } },
+            { { 1,1,0 },{ 1,1,0 } },
+            { { 1,0,1 },{ 1,1,0 } },
+            { { 0,1,1 },{ 1,1,0 } },
+
+            { { 0,0,0 },{ 1,0,1 } },
+            { { 1,1,0 },{ 1,0,1 } },
+            { { 1,0,1 },{ 1,0,1 } },
+            { { 0,1,1 },{ 1,0,1 } },
+
+            { { 0,0,0 },{ 0,1,1 } },
+            { { 1,1,0 },{ 0,1,1 } },
+            { { 1,0,1 },{ 0,1,1 } },
+            { { 0,1,1 },{ 0,1,1 } }
+        },
+
+
+        // 3*n+5    Cartesian 3*n+3        FCC          3*n+2            //D^*[6]
+        {
+
+            { { 0,0,0 },{ 0,0,0 } },
+            { { 1,1,0 },{ 0,0,0 } },
+            { { 1,0,1 },{ 0,0,0 } },
+            { { 0,1,1 },{ 0,0,0 } },
+
+            { { 0,0,0 },{ 1,1,0 } },
+            { { 1,1,0 },{ 1,1,0 } },
+            { { 1,0,1 },{ 1,1,0 } },
+            { { 0,1,1 },{ 1,1,0 } },
+
+            { { 0,0,0 },{ 1,0,1 } },
+            { { 1,1,0 },{ 1,0,1 } },
+            { { 1,0,1 },{ 1,0,1 } },
+            { { 0,1,1 },{ 1,0,1 } },
+
+            { { 0,0,0 },{ 0,1,1 } },
+            { { 1,1,0 },{ 0,1,1 } },
+            { { 1,0,1 },{ 0,1,1 } },
+            { { 0,1,1 },{ 0,1,1 } },
+
+
+            { { 1,0,0 },{ 1,1,1 } },
+            { { 0,1,0 },{ 1,1,1 } },
+            { { 0,0,1 },{ 1,1,1 } },
+            { { 1,1,1 },{ 1,1,1 } },
+
+            { { 1,0,0 },{ 0,0,1 } },
+            { { 0,1,0 },{ 0,0,1 } },
+            { { 0,0,1 },{ 0,0,1 } },
+            { { 1,1,1 },{ 0,0,1 } },
+
+            { { 1,0,0 },{ 1,0,0 } },
+            { { 0,1,0 },{ 1,0,0 } },
+            { { 0,0,1 },{ 1,0,0 } },
+            { { 1,1,1 },{ 1,0,0 } },
+
+            { { 1,0,0 },{ 0,1,0 } },
+            { { 0,1,0 },{ 0,1,0 } },
+            { { 0,0,1 },{ 0,1,0 } },
+            { { 1,1,1 },{ 0,1,0 } }
+        }
+    },// Dimension = 3
+    {
+        // Dimension = 4
+        {
+            { { 0,0,0,0 },{ 0,0,0,0 } },
+            { { 0,0,0,0 },{ 0,0,0,0 } }
+        },
+
+        // 3*n+1    BCC          3*n+1        Cartesian 3*n            //same parity
+        {
+            // SAME_PAR
+            { { 0,0,0,0 },{ 0,0,0,0 } },
+            { { 1,1,1,1 },{ 1,1,1,1 } }
+        },
+        // 3*n+2    BCC          3*n+1        BCC          3*n+1
+        {
+            // BCC
+            { { 0,0,0,0 },{ 0,0,0,0 } },
+            { { 0,0,0,0 },{ 1,1,1,1 } },
+            { { 1,1,1,1 },{ 0,0,0,0 } },
+            { { 1,1,1,1 },{ 1,1,1,1 } }
+        },
+        // 3 PBIT
+        {
+            { { 0,0,0,0 },{ 0,0,0,0 } },
+            { { 0,0,0,0 },{ 0,1,1,1 } },
+            { { 0,1,1,1 },{ 0,0,0,0 } },
+            { { 0,1,1,1 },{ 0,1,1,1 } },
+
+            { { 1,0,0,0 },{ 1,0,0,0 } },
+            { { 1,0,0,0 },{ 1,1,1,1 } },
+            { { 1,1,1,1 },{ 1,0,0,0 } },
+            { { 1,1,1,1 },{ 1,1,1,1 } }
+        },
+
+        // 4 PBIT
+        {
+            { { 0,0,0,0 },{ 0,0,0,0 } },
+            { { 0,0,0,0 },{ 0,1,1,1 } },
+            { { 0,1,1,1 },{ 0,0,0,0 } },
+            { { 0,1,1,1 },{ 0,1,1,1 } },
+
+            { { 1,0,0,0 },{ 1,0,0,0 } },
+            { { 1,0,0,0 },{ 1,1,1,1 } },
+            { { 1,1,1,1 },{ 1,0,0,0 } },
+            { { 1,1,1,1 },{ 1,1,1,1 } },
+
+            { { 0,0,0,0 },{ 0,0,0,0 } },
+            { { 0,0,0,0 },{ 0,0,1,1 } },
+            { { 0,0,1,1 },{ 0,0,0,0 } },
+            { { 0,1,0,1 },{ 0,1,0,1 } },
+
+            { { 1,0,0,0 },{ 1,0,0,0 } },
+            { { 1,0,0,0 },{ 1,0,1,1 } },
+            { { 1,0,1,1 },{ 1,0,0,0 } },
+            { { 1,1,0,1 },{ 1,1,0,1 } },
+
+        },
+
+    } // Dimension = 4
+
+};
+
+int get_par_vector(int dim1, int dim2, int dim3, int dim4, int dim5) {
+    return par_vectors_nd[dim1][dim2][dim3][dim4][dim5];
+}
+
+
+
+float quant_single_point_d
+(
+    float data[MAX_ENTRIES][MAX_DIMENSION_BIG],
+    int numEntries, int index[MAX_ENTRIES],
+    float out[MAX_ENTRIES][MAX_DIMENSION_BIG],
+    int epo_1[2][MAX_DIMENSION_BIG],
+    int Mi_,                // last cluster
+    int bits[3],            // including parity
+    int type,
+    int dimension           // This should be either 3 or 4
+) {
+    if (dimension < 3) return FLT_MAX;
+
+    int i, j;
+
+    float err_0 = FLT_MAX;
+    float err_1 = FLT_MAX;
+
+    int idx = 0;
+    int idx_1 = 0;
+
+    int epo_0[2][MAX_DIMENSION_BIG];
+
+    int use_par = (type != 0);
+
+    int clog = 0;
+    i = Mi_ + 1;
+    while (i >>= 1)
+        clog++;
+
+    //    assert((1<<clog)== Mi_+1);
+
+    int pn;
+    for (pn = 0; pn<npv_nd[dimension - 3][type]; pn++) {
+        //1
+
+        int dim1 = dimension - 3;
+        int dim2 = type;
+        int dim3 = pn;
+
+
+        int o1[2][MAX_DIMENSION_BIG]; // = { 0,2 };
+        int o2[2][MAX_DIMENSION_BIG]; // = { 0,2 };
+
+        for (j = 0; j<dimension; j++) {
+            //A
+            o2[0][j] = o1[0][j] = 0;
+            o2[1][j] = o1[1][j] = 2;
+
+            if (use_par) {
+                if (get_par_vector(dim1, dim2, dim3, 0, j))
+                    o1[0][j] = 1;
+                else
+                    o1[1][j] = 1;
+                if (get_par_vector(dim1, dim2, dim3, 1, j))
+                    o2[0][j] = 1;
+                else
+                    o2[1][j] = 1;
+            }
+        } //A
+
+        int t1, t2;
+
+        int dr[MAX_DIMENSION_BIG];
+        int dr_0[MAX_DIMENSION_BIG];
+        //float tr;
+
+        for (i = 0; i< (1 << clog); i++) {
+            //E
+            float t = 0;
+            int t1o[MAX_DIMENSION_BIG], t2o[MAX_DIMENSION_BIG];
+
+            for (j = 0; j<dimension; j++) {
+                // D
+                float t_ = FLT_MAX;
+
+                for (t1 = o1[0][j]; t1<o1[1][j]; t1++) {
+                    // C
+                    for (t2 = o2[0][j]; t2<o2[1][j]; t2++)
+                        // This is needed for non-integer mean points of "collapsed" sets
+                    {
+                        // B
+
+#ifdef USE_RAMPS
+                        int tf = (int)floorf(data[0][j]);
+                        int tc = (int)ceilf(data[0][j]);
+                        // if they are not equal, the same representalbe point is used for
+                        // both of them, as all representable points are integers in the rage
+                        if (sperr(tf, CLT(clog), BTT(bits[j]), t1, t2, i) > sperr(tc, CLT(clog), BTT(bits[j]), t1, t2, i))
+                            dr[j] = tc;
+                        else if (sperr(tf, CLT(clog), BTT(bits[j]), t1, t2, i) < sperr(tc, CLT(clog), BTT(bits[j]), t1, t2, i))
+                            dr[j] = tf;
+                        else
+#endif
+                            dr[j] = (int)floorf(data[0][j] + 0.5f);
+
+#ifdef USE_RAMPS
+                        tr = sperr(dr[j], CLT(clog), BTT(bits[j]), t1, t2, i) + 2.0f * sqrtf(sperr(dr[j], CLT(clog), BTT(bits[j]), t1, t2, i)) * fabsf((float)dr[j] - data[0][j]) +
+                             (dr[j] - data[0][j])* (dr[j] - data[0][j]);
+                        if (tr < t_) {
+                            t_ = tr;
+#else
+                        t_ = 0;
+#endif
+
+                            t1o[j] = t1;
+                            t2o[j] = t2;
+                            dr_0[j] = dr[j];
+#ifdef USE_RAMPS
+                            if ((dr_0[j] < 0) || (dr_0[j] > 255)) {
+                                dr_0[j] = 0; // Error!
+                            }
+                        }
+#endif
+                    } // B
+                } //C
+
+                t += t_;
+            } // D
+
+
+            if (t < err_0) {
+
+                idx = i;
+
+                for (j = 0; j<dimension; j++) {
+#ifdef USE_RAMPS
+                    int p1 = CLT(clog);        // < 3
+                    int p2 = BTT(bits[j]);     // < 4
+                    int in_data = dr_0[j];          // < SP_ERRIDX_MAX
+                    int p4 = t1o[j];           // < 2
+                    int p5 = t2o[j];           // < 2
+                    int p6 = i;                // < 16
+
+                    // New spidx
+                    epo_0[0][j] = spidx(in_data, p1, p2, p4, p5, p6, 0);
+                    epo_0[1][j] = spidx(in_data, p1, p2, p4, p5, p6, 1);
+
+                    if (epo_0[1][j] >= SP_ERRIDX_MAX) {
+                        epo_0[1][j] = 0; // Error!!
+                    }
+#else
+                    epo_0[0][j] = 0;
+                    epo_0[1][j] = 0;
+#endif
+                }
+                err_0 = t;
+            }
+            if (err_0 == 0)
+                break;
+        } // E
+
+        if (err_0 < err_1) {
+            idx_1 = idx;
+            for (j = 0; j<dimension; j++) {
+                epo_1[0][j] = epo_0[0][j];
+                epo_1[1][j] = epo_0[1][j];
+            }
+            err_1 = err_0;
+        }
+
+        if (err_1 == 0)
+            break;
+    } //1
+
+    for (i = 0; i< numEntries; i++) {
+        index[i] = idx_1;
+        for (j = 0; j<dimension; j++) {
+            int p1 = CLT(clog);        // < 3
+            int p2 = BTT(bits[j]);     // < 4
+            int p3 = epo_1[0][j];      // < SP_ERRIDX_MAX
+            int p4 = epo_1[1][j];      // < SP_ERRIDX_MAX
+            int p5 = idx_1;            // < 16
+#ifndef USE_NEWRAMP
+            out[i][j] = ramp[p1][p2][p3][p4][p5];
+#else
+#pragma warning( push )
+#pragma warning(disable:4244)
+            out[i][j] = (int)rampf(p1, p2, p3, p4, p5);
+#pragma warning( pop )
+#endif
+        }
+    }
+    return err_1 * numEntries;
+}
+
+#define SP_ERRIDX_MAX 256
+
+int expandbits_(int bits, int v) {
+    return (v << (8 - bits) | v >> (2 * bits - 8));
+}
+
+
+#ifndef USE_NEWRAMP
+float    ep_d[4][SP_ERRIDX_MAX];
+float    ramp[3][4][SP_ERRIDX_MAX][SP_ERRIDX_MAX][16];
+#else
+float ep_df(int bits, int p1) {
+    return (float)expandbits_(bits + BIT_BASE, p1);
+}
+
+float rampf(int clog, int bits, int p1, int p2, int i) {
+    // (clog+ LOG_CL_BASE) starts from 2 to 4
+    float ret = floorf((float)ep_df(bits, p1) + rampLerpWeights[clog + LOG_CL_BASE][i] * (float)((ep_df(bits, p2) - ep_df(bits, p1))) + 0.5F);
+    if (ret > SP_ERRIDX_MAX) return SP_ERRIDX_MAX - 1;
+    return ret;
+}
+#endif
+
+#ifdef USE_RAMPS
+
+int spidx(int in_data, int in_clog, int in_bits, int in_p2, int in_o1, int in_o2, int in_i) {
+    return sp_data[in_data].sp_idx[in_clog][in_bits][in_p2][in_o1][in_o2][in_i];
+}
+
+float sperr(int in_data, int clog, int bits, int p2, int o1, int o2) {
+    return sp_data[in_data].sp_err[clog][bits][p2][o1][o2];
+}
+#endif
+
+void init_ramps() {
+#ifdef USE_RAMPS
+    int clog, bits;
+    int in_data; // p1;
+    int p2;
+    int i;
+    int o1, o2;
+
+    if (g_init_ramps > 0) {
+        g_init_ramps++;
+        return;
+    }
+
+    mtx.lock();
+
+
+    // sp_datap = (SP_DATA **)malloc(SP_ERRIDX_MAX*sizeof(struct SP_DATA));
+    // assert(sp_datap);
+    // for (int i = 0; i < SP_ERRIDX_MAX; i++)
+    // {
+    //     sp_datap[i] = (SP_DATA *)malloc(sizeof(struct SP_DATA));
+    // }
+
+#ifndef USE_NEWRAMP
+
+    for (bits = BIT_BASE; bits < BIT_RANGE; bits++)
+        for (p1 = 0; p1 < (1 << bits); p1++) {
+            ep_d[BTT(bits)][p1] = (float)expandbits_(bits, p1);
+        }
+
+
+    for (clog = LOG_CL_BASE; clog < LOG_CL_RANGE; clog++)
+        for (bits = BIT_BASE; bits < BIT_RANGE; bits++)
+            for (p1 = 0; p1 < (1 << bits); p1++)
+                for (p2 = 0; p2 < (1 << bits); p2++) {
+                    for (o1 = 0; o1 < (1 << clog); o1++) {
+                        ramp[CLT(clog)][BTT(bits)][p1][p2][o1] =
+                            floorf((float)ep_d[BTT(bits)][p1] + rampLerpWeights[clog][o1] * (float)((ep_d[BTT(bits)][p2] - ep_d[BTT(bits)][p1])) + 0.5F);
+                    }
+                }
+#endif
+
+    //-----------------------------------------------------------------------------
+    // Step 1
+
+    for (clog = LOG_CL_BASE; clog<LOG_CL_RANGE; clog++)
+        for (bits = BIT_BASE; bits<BIT_RANGE; bits++)
+            for (in_data = 0; in_data<SP_ERRIDX_MAX; in_data++)
+                for (o1 = 0; o1<2; o1++)
+                    for (o2 = 0; o2<2; o2++)
+                        for (i = 0; i<16; i++) {
+                            sp_data[in_data].sp_err[CLT(clog)][BTT(bits)][o1][o2][i] = FLT_MAX;
+                            sp_data[in_data].sp_idx[CLT(clog)][BTT(bits)][o1][o2][i][0] = -1;
+                        }
+
+    // Step 2
+    for (clog = LOG_CL_BASE; clog<LOG_CL_RANGE; clog++)
+        for (bits = BIT_BASE; bits<BIT_RANGE; bits++)
+            for (in_data = 0; in_data<(1 << bits); in_data++)
+                for (p2 = 0; p2<(1 << bits); p2++)
+                    for (i = 0; i<(1 << clog); i++) {
+#ifndef USE_NEWRAMP
+                        sp_data[(int)ramp[clog].sp_idx[clog]bits][bits][p1][p2][o1]][p1 & 0x1][p2 & 0x1][o1][0] = p1;
+                        sp_data[(int)ramp[clog].sp_idx[clog]bits][bits][p1][p2][o1]][p1 & 0x1][p2 & 0x1][o1][1] = p2;
+                        sp_data[(int)ramp[clog].sp_err[clog]bits][bits][p1][p2][o1]][p1 & 0x1][p2 & 0x1][o1] = 0.;
+#else
+                        int spd_i = (int)rampf(CLT(clog), BTT(bits), in_data, p2, o1);
+                        if (spd_i > SP_ERRIDX_MAX) spd_i = SP_ERRIDX_MAX - 1;
+
+                        sp_data[spd_i].sp_idx[CLT(clog)][BTT(bits)][in_data & 0x1][p2 & 0x1][o1][0] = in_data;
+                        sp_data[spd_i].sp_idx[CLT(clog)][BTT(bits)][in_data & 0x1][p2 & 0x1][o1][1] = p2;
+                        sp_data[spd_i].sp_err[CLT(clog)][BTT(bits)][in_data & 0x1][p2 & 0x1][o1] = 0.;
+#endif
+                    }
+
+    // Step 3
+    for (clog = LOG_CL_BASE; clog<LOG_CL_RANGE; clog++)
+        for (bits = BIT_BASE; bits<BIT_RANGE; bits++)
+            for (in_data = 0; in_data<SP_ERRIDX_MAX; in_data++)
+                for (o1 = 0; o1<2; o1++)
+                    for (o2 = 0; o2<2; o2++)
+                        for (i = 0; i<(1 << clog); i++)
+                            if (sp_data[in_data].sp_idx[CLT(clog)][BTT(bits)][o1][o2][i][0]<0) {
+                                int k;
+                                for (k = 1; k<SP_ERRIDX_MAX; k++)
+                                    if ((in_data - k >= 0 && sp_data[in_data - k].sp_err[CLT(clog)][BTT(bits)][o1][o2][i] == 0) ||
+                                            (in_data + k < SP_ERRIDX_MAX && sp_data[in_data + k].sp_err[CLT(clog)][BTT(bits)][o1][o2][i] == 0))
+                                        break;
+                                {
+                                    if ((in_data - k >= 0 && sp_data[in_data - k].sp_err[CLT(clog)][BTT(bits)][o1][o2][i] == 0)) {
+                                        sp_data[in_data].sp_idx[CLT(clog)][BTT(bits)][o1][o2][i][0] = sp_data[in_data - k].sp_idx[CLT(clog)][BTT(bits)][o1][o2][i][0];
+                                        sp_data[in_data].sp_idx[CLT(clog)][BTT(bits)][o1][o2][i][1] = sp_data[in_data - k].sp_idx[CLT(clog)][BTT(bits)][o1][o2][i][1];
+                                        //printf("sp_data[%2d].sp_idx[%2d][%2d][%2d][%2d][%2d][0] = (%d)\n", in_data, CLT(clog), BTT(bits), o1, o2, i, sp_data[in_data].sp_idx[CLT(clog)][BTT(bits)][o1][o2][i][0]);
+                                        //printf("sp_data[%2d].sp_idx[%2d][%2d][%2d][%2d][%2d][1] = (%d)\n", in_data, CLT(clog), BTT(bits), o1, o2, i, sp_data[in_data].sp_idx[CLT(clog)][BTT(bits)][o1][o2][i][1]);
+                                    } else if ((in_data + k < SP_ERRIDX_MAX && sp_data[in_data + k].sp_err[CLT(clog)][BTT(bits)][o1][o2][i] == 0)) {
+                                        sp_data[in_data].sp_idx[CLT(clog)][BTT(bits)][o1][o2][i][0] = sp_data[in_data + k].sp_idx[CLT(clog)][BTT(bits)][o1][o2][i][0];
+                                        sp_data[in_data].sp_idx[CLT(clog)][BTT(bits)][o1][o2][i][1] = sp_data[in_data + k].sp_idx[CLT(clog)][BTT(bits)][o1][o2][i][1];
+                                        //printf("sp_data[%2d].sp_idx[%2d][%2d][%2d][%2d][%2d][0] = %d\n", in_data, CLT(clog), BTT(bits), o1, o2, i, sp_data[in_data].sp_idx[CLT(clog)][BTT(bits)][o1][o2][i][0]);
+                                        //printf("sp_data[%2d].sp_idx[%2d][%2d][%2d][%2d][%2d][1] = %d\n", in_data, CLT(clog), BTT(bits), o1, o2, i, sp_data[in_data].sp_idx[CLT(clog)][BTT(bits)][o1][o2][i][1]);
+                                    }
+
+                                    sp_data[in_data].sp_err[CLT(clog)][BTT(bits)][o1][o2][i] = (float)k*k;
+                                }
+                            }
+
+    //for (clog = LOG_CL_BASE; clog<LOG_CL_RANGE; clog++)
+    //    for (bits = BIT_BASE; bits<BIT_RANGE; bits++)
+    //        for (in_data = 0; in_data<SP_ERRIDX_MAX; in_data++)
+    //            for (o1 = 0; o1<2; o1++)
+    //                for (o2 = 0; o2<2; o2++)
+    //                    for (i = 0; i<16; i++)
+    //                         {
+    //                             printf("sp_data[%2d].sp_idx[%2d][%2d][%2d][%2d][%2d][0] = %d\n", in_data,CLT(clog), BTT(bits),o1,o2,i, sp_data[in_data].sp_idx[CLT(clog)][BTT(bits)][o1][o2][i][0]);
+    //                             printf("sp_data[%2d].sp_idx[%2d][%2d][%2d][%2d][%2d][1] = %d\n", in_data,CLT(clog), BTT(bits),o1,o2,i, sp_data[in_data].sp_idx[CLT(clog)][BTT(bits)][o1][o2][i][1]);
+    //                         }
+
+    g_init_ramps++;
+
+    mtx.unlock();
+
+#endif
+}
+
+void deinit_ramps() {
+#ifdef USE_RAMPS
+    if (g_init_ramps > 1)
+        g_init_ramps--;
+#endif
+}
+
+int ep_find_floor(float v, int bits, int use_par, int odd) {
+#ifndef USE_NEWRAMP
+    float *p = ep_d[BTT(bits)];
+#endif
+    int i1 = 0;
+    int i2 = 1 << (bits - use_par);
+    odd = use_par ? odd : 0;
+    while (i2 - i1>1) {
+        int j = (i1 + i2) / 2;
+#ifndef USE_NEWRAMP
+        if (v >= p[(j << use_par) + odd])
+#else
+        if (v >= ep_df(BTT(bits), (j << use_par) + odd))
+#endif
+            i1 = j;
+        else
+            i2 = j;
+    }
+    return (i1 << use_par) + odd;
+}
+
+
+
+//based on code : ep_shaker_d in BC7 shaker
+float ep_shaker_HD(
+    float data[MAX_ENTRIES][MAX_DIMENSION_BIG],
+    int numEntries,
+    int index_[MAX_ENTRIES],
+    float out[MAX_ENTRIES][MAX_DIMENSION_BIG],
+    int epo_code[2][MAX_DIMENSION_BIG],
+    int Mi_,                // last cluster
+    int bits[3],            // including parity
+    int dimension
+) {
+    int i, j, k;
+    int use_par = 0;
+    int clog = 0;
+
+    i = Mi_ + 1;
+    while (i >>= 1)
+        clog++;
+
+    float mean[MAX_DIMENSION_BIG];
+    int index[MAX_ENTRIES];
+    int Mi;
+
+    int maxTry = 1;
+
+    for (k = 0; k < numEntries; k++) {
+        index[k] = index_[k];
+    }
+
+    int done;
+    int change;
+
+    int better;
+
+    float   err_o = FLT_MAX;
+    float   out_2[MAX_ENTRIES][MAX_DIMENSION_BIG];
+    int     idx_2[MAX_ENTRIES];
+    int     epo_2[2][MAX_DIMENSION_BIG];
+
+    int max_bits[MAX_DIMENSION_BIG];
+    int type = bits[0] % (2 * dimension);
+
+    for (j = 0; j < dimension; j++)
+        max_bits[j] = (bits[0] + 2 * dimension - 1) / (2 * dimension);
+
+
+    // handled below automatically
+    int alls = all_same_d(data, numEntries, dimension);
+
+    mean_d_d(data, mean, numEntries, dimension);
+
+    do {
+        index_collapse_kernel(index, numEntries);
+
+        Mi = max_index(index, numEntries);  // index can be from requantizer
+
+        int p, q;
+        int p0 = -1, q0 = -1;
+
+        float err_2 = FLT_MAX;
+
+        if (Mi == 0) {
+            float t;
+            int    epo_0[2][MAX_DIMENSION_BIG];
+            // either sinle point from the beginning or collapsed index
+            if (alls) {
+                t = quant_single_point_d(data, numEntries, index, out_2, epo_0, Mi_, bits, type, dimension);
+            } else {
+                quant_single_point_d(&mean, numEntries, index, out_2, epo_0, Mi_, bits, type, dimension);
+                t = totalError_d(data, out_2, numEntries, dimension);
+            }
+
+            if (t < err_o) {
+                for (k = 0; k<numEntries; k++) {
+                    index_[k] = index[k];
+                    for (j = 0; j<dimension; j++) {
+                        out[k][j] = out_2[k][j];
+                        epo_code[0][j] = epo_0[0][j];
+                        epo_code[1][j] = epo_0[1][j];
+                    }
+                };
+                err_o = t;
+            }
+            return err_o;
+        }
+
+
+
+        for (q = 1; Mi != 0 && q*Mi <= Mi_; q++) { // does not work for single point collapsed index!!!
+            for (p = 0; p <= Mi_ - q*Mi; p++) {
+                int cidx[MAX_ENTRIES];
+
+                for (k = 0; k<numEntries; k++) {
+                    cidx[k] = index[k] * q + p;
+                }
+
+                float epa[2][MAX_DIMENSION_BIG];
+
+                //
+                // solve RMS problem for center
+                //
+
+                float im[2][2] = { { 0,0 },{ 0,0 } };   // matrix /inverse matrix
+                float rp[2][MAX_DIMENSION_BIG];            // right part for RMS fit problem
+
+                // get ideal clustr centers
+                float cc[MAX_CLUSTERS_BIG][MAX_DIMENSION_BIG];
+                int index_cnt[MAX_CLUSTERS_BIG];                        // count of index entries
+                int index_comp[MAX_CLUSTERS_BIG];                       // compacted index
+                int index_ncl;                                            // number of unique indexes
+
+                index_ncl = cluster_mean_d_d(data, cc, cidx, index_comp, index_cnt, numEntries, dimension); // unrounded
+
+                for (i = 0; i<index_ncl; i++)
+                    for (j = 0; j<dimension; j++)
+                        cc[index_comp[i]][j] = (float)floorf(cc[index_comp[i]][j] + 0.5f); // more or less ideal location
+
+                for (j = 0; j<dimension; j++) {
+                    rp[0][j] = rp[1][j] = 0;
+                }
+
+                // weight with cnt if runnning on compacted index
+                for (k = 0; k<numEntries; k++) {
+                    im[0][0] += (Mi_ - cidx[k])* (Mi_ - cidx[k]);
+                    im[0][1] += cidx[k] * (Mi_ - cidx[k]);           // im is symmetric
+                    im[1][1] += cidx[k] * cidx[k];
+
+                    for (j = 0; j<dimension; j++) {
+                        rp[0][j] += (Mi_ - cidx[k]) * cc[cidx[k]][j];
+                        rp[1][j] += cidx[k] * cc[cidx[k]][j];
+                    }
+                }
+
+                float dd = im[0][0] * im[1][1] - im[0][1] * im[0][1];
+
+                //assert(dd !=0);
+
+                // dd=0 means that cidx[k] and (Mi_-cidx[k]) collinear which implies only one active index;
+                // taken care of separately
+
+                im[1][0] = im[0][0];
+                im[0][0] = im[1][1] / dd;
+                im[1][1] = im[1][0] / dd;
+                im[1][0] = im[0][1] = -im[0][1] / dd;
+
+                for (j = 0; j<dimension; j++) {
+                    epa[0][j] = (im[0][0] * rp[0][j] + im[0][1] * rp[1][j])*Mi_;
+                    epa[1][j] = (im[1][0] * rp[0][j] + im[1][1] * rp[1][j])*Mi_;
+                }
+
+                float err_1 = FLT_MAX;
+                float out_1[MAX_ENTRIES][MAX_DIMENSION_BIG];
+                int idx_1[MAX_ENTRIES];
+                int epo_1[2][MAX_DIMENSION_BIG];
+                int s1 = 0;
+                float epd[2][MAX_DIMENSION_BIG][2];   // first second, coord, begin range end range
+
+                for (j = 0; j<dimension; j++) {
+                    for (i = 0; i<2; i++) {
+                        // set range
+                        epd[i][j][0] = epd[i][j][1] = epa[i][j];
+                        epd[i][j][1] += ((1 << bits[j]) - 1 - (int)epd[i][j][1]  < (1 << use_par) ?
+                                         (1 << bits[j]) - 1 - (int)epd[i][j][1] : (1 << use_par)) & (~use_par);
+                    }
+                }
+
+                float ce[MAX_ENTRIES][MAX_CLUSTERS_BIG][MAX_DIMENSION_BIG];
+                float err_0 = 0;
+                float out_0[MAX_ENTRIES][MAX_DIMENSION_BIG];
+                int idx_0[MAX_ENTRIES];
+
+                for (i = 0; i<numEntries; i++) {
+                    float d[4];
+                    d[0] = data[i][0];
+                    d[1] = data[i][1];
+                    d[2] = data[i][2];
+                    d[3] = data[i][3];
+                    for (j = 0; j<(1 << clog); j++)
+                        for (k = 0; k < dimension; k++) {
+                            ce[i][j][k] = (rampf(CLT(clog), BTT(bits[k]), epd[0][k][0], epd[1][k][0], j) - d[k])*
+                                          (rampf(CLT(clog), BTT(bits[k]), epd[0][k][0], epd[1][k][0], j) - d[k]);
+                        }
+                }
+
+                int s = 0, p1, g;
+                int ei0 = 0, ei1 = 0;
+
+                for (p1 = 0; p1<64; p1++) {
+                    int j0 = 0;
+
+                    // Gray code increment
+                    g = p1 & (-p1);
+
+                    err_0 = 0;
+
+                    for (j = 0; j<dimension; j++) {
+                        if (((g >> (2 * j)) & 0x3) != 0) {
+                            j0 = j;
+                            // new cords
+                            ei0 = (((s^g) >> (2 * j)) & 0x1);
+                            ei1 = (((s^g) >> (2 * j + 1)) & 0x1);
+                        }
+                    }
+                    s = s ^ g;
+                    err_0 = 0;
+
+                    for (i = 0; i<numEntries; i++) {
+                        float d[4];
+                        d[0] = data[i][0];
+                        d[1] = data[i][1];
+                        d[2] = data[i][2];
+                        d[3] = data[i][3];
+                        int    ci = 0;
+                        float cmin = FLT_MAX;
+
+                        for (j = 0; j<(1 << clog); j++) {
+                            float t_ = 0.;
+                            ce[i][j][j0] = (rampf(CLT(clog), BTT(bits[j0]), epd[0][j0][ei0], epd[1][j0][ei1], j) - d[j0])*
+                                           (rampf(CLT(clog), BTT(bits[j0]), epd[0][j0][ei0], epd[1][j0][ei1], j) - d[j0]);
+                            for (k = 0; k<dimension; k++) {
+                                t_ += ce[i][j][k];
+                            }
+
+                            if (t_< cmin) {
+                                cmin = t_;
+                                ci = j;
+                            }
+                        }
+
+                        idx_0[i] = ci;
+                        for (k = 0; k<dimension; k++) {
+                            out_0[i][k] = rampf(CLT(clog), BTT(bits[k]), epd[0][k][ei0], epd[1][k][ei1], ci);
+                        }
+                        err_0 += cmin;
+                    }
+
+                    if (err_0 < err_1) {
+                        // best in the curent ep cube run
+                        for (i = 0; i < numEntries; i++) {
+                            idx_1[i] = idx_0[i];
+                            for (j = 0; j<dimension; j++)
+                                out_1[i][j] = out_0[i][j];
+                        }
+                        err_1 = err_0;
+
+                        s1 = s; // epo coding
+                    }
+                }
+
+                // reconstruct epo
+                for (j = 0; j<dimension; j++) {
+                    {
+                        // new cords
+                        ei0 = ((s1 >> (2 * j)) & 0x1);
+                        ei1 = ((s1 >> (2 * j + 1)) & 0x1);
+                        epo_1[0][j] = (int)epd[0][j][ei0];
+                        epo_1[1][j] = (int)epd[1][j][ei1];
+                    }
+                }
+
+                if (err_1 < err_2) {
+                    // best in the curent ep cube run
+                    for (i = 0; i < numEntries; i++) {
+                        idx_2[i] = idx_1[i];
+                        for (j = 0; j<dimension; j++)
+                            out_2[i][j] = out_1[i][j];
+                    }
+                    err_2 = err_1;
+                    for (j = 0; j<dimension; j++) {
+                        epo_2[0][j] = epo_1[0][j];
+                        epo_2[1][j] = epo_1[1][j];
+                    }
+                    p0 = p;
+                    q0 = q;
+                }
+            }
+        }
+
+        // change/better
+        change = 0;
+        for (k = 0; k<numEntries; k++)
+            change = change || (index[k] * q0 + p0 != idx_2[k]);
+
+        better = err_2 < err_o;
+
+        if (better) {
+            for (k = 0; k<numEntries; k++) {
+                index_[k] = index[k] = idx_2[k];
+                for (j = 0; j<dimension; j++) {
+                    out[k][j] = out_2[k][j];
+                    epo_code[0][j] = epo_2[0][j];
+                    epo_code[1][j] = epo_2[1][j];
+                }
+            }
+            err_o = err_2;
+        }
+
+        done = !(change  &&  better);
+
+        if (maxTry > 0) maxTry--;
+        else maxTry = 0;
+
+    } while (!done && maxTry);
+
+    return err_o;
+}
+
+float ep_shaker_2_d(
+    float data[MAX_ENTRIES][MAX_DIMENSION_BIG],
+    int numEntries,
+    int index_[MAX_ENTRIES],
+    float out[MAX_ENTRIES][MAX_DIMENSION_BIG],
+    int epo_code[2][MAX_DIMENSION_BIG],
+    int size,
+    int Mi_,             // last cluster
+    int bits,            // total for all channels
+    // defined by total numbe of bits and dimensioin
+    int dimension,
+    float epo[2][MAX_DIMENSION_BIG]
+) {
+    if (dimension < 3) return FLT_MAX;
+
+    int i, j, k;
+    int max_bits[MAX_DIMENSION_BIG];
+    int type = bits % (2 * dimension);
+    int use_par = (type != 0);
+
+    for (j = 0; j < dimension; j++)
+        max_bits[j] = (bits + 2 * dimension - 1) / (2 * dimension);
+
+    int clog = 0;
+    i = Mi_ + 1;
+    while (i >>= 1)
+        clog++;
+
+    if (CLT(clog) > 3)
+        return FLT_MAX;
+
+    float mean[MAX_DIMENSION_BIG];
+    int index[MAX_ENTRIES];
+    int Mi;
+
+    int maxTry = 8;
+
+    for (k = 0; k < numEntries; k++) {
+        index[k] = index_[k];
+    }
+
+    int done;
+    int change;
+
+    int better;
+
+    float err_o = FLT_MAX;
+
+    int epo_0[2][MAX_DIMENSION_BIG];
+
+    float outg[MAX_ENTRIES][MAX_DIMENSION_BIG];
+
+    // handled below automatically
+    int alls = all_same_d(data, numEntries, dimension);
+    mean_d_d(data, mean, numEntries, dimension);
+
+    do {
+        index_collapse_kernel(index, numEntries);
+
+        Mi = max_i(index, numEntries);  // index can be from requantizer
+
+        int p, q;
+        int p0 = -1, q0 = -1;
+
+        float err_0 = FLT_MAX;
+
+        if (Mi == 0) {
+            float t;
+            // either single point from the beginning or collapsed index
+            if (alls) {
+                t = quant_single_point_d(data, numEntries, index, outg, epo_0, Mi_, max_bits, type, dimension);
+            } else {
+                quant_single_point_d(&mean, numEntries, index, outg, epo_0, Mi_, max_bits, type, dimension);
+                t = totalError_d(data, outg, numEntries, dimension);
+            }
+
+            if (t < err_o) {
+                for (k = 0; k<numEntries; k++) {
+                    index_[k] = index[k];
+                    for (j = 0; j<dimension; j++) {
+                        out[k][j] = outg[k][j];
+                        epo_code[0][j] = epo_0[0][j];
+                        epo_code[1][j] = epo_0[1][j];
+                    }
+                };
+                err_o = t;
+            }
+
+            for (j = 0; j<dimension; j++) {
+#ifndef USE_NEWRAMP
+                epo[0][j] = ramp[CLT(clog)][BTT(max_bits[j])][epo_code[0][j]][epo_code[1][j]][0];
+                epo[1][j] = ramp[CLT(clog)][BTT(max_bits[j])][epo_code[0][j]][epo_code[1][j]][(1 << clog) - 1];
+#else
+                epo[0][j] = rampf(CLT(clog), BTT(max_bits[j]), epo_code[0][j], epo_code[1][j], 0);
+                epo[1][j] = rampf(CLT(clog), BTT(max_bits[j]), epo_code[0][j], epo_code[1][j], (1 << clog) - 1);
+#endif
+            }
+
+            return err_o;
+        }
+
+        for (q = 1; Mi != 0 && q*Mi <= Mi_; q++) // does not work for single point collapsed index!!!
+            for (p = 0; p <= Mi_ - q*Mi; p++) {
+                int cidx[MAX_ENTRIES];
+
+                for (k = 0; k<numEntries; k++)
+                    cidx[k] = index[k] * q + p;
+
+                float epa[2][MAX_DIMENSION_BIG];
+
+                //
+                // solve RMS problem for center
+                //
+
+                float im[2][2] = { { 0,0 },{ 0,0 } };   // matrix /inverse matrix
+                float rp[2][MAX_DIMENSION_BIG];            // right part for RMS fit problem
+
+                // get ideal clustr centers
+                float cc[MAX_CLUSTERS_BIG][MAX_DIMENSION_BIG];
+                int i_cnt[MAX_CLUSTERS_BIG]; // count of index entries
+                int i_comp[MAX_CLUSTERS_BIG];   // compacted index
+                int ncl;                        // number of unique indexes
+
+                ncl = cluster_mean_d_d(data, cc, cidx, i_comp, i_cnt, numEntries, dimension); // unrounded
+
+                // round
+                for (i = 0; i<ncl; i++)
+                    for (j = 0; j<dimension; j++)
+                        cc[i_comp[i]][j] = (float)floorf(cc[i_comp[i]][j] + 0.5F); // more or less ideal location
+
+                for (j = 0; j<dimension; j++)
+                    rp[0][j] = rp[1][j] = 0;
+
+                // weight with cnt if runnning on compacted index
+                for (k = 0; k<numEntries; k++) {
+                    im[0][0] += (Mi_ - cidx[k])* (Mi_ - cidx[k]);
+                    im[0][1] += cidx[k] * (Mi_ - cidx[k]);           // im is symmetric
+                    im[1][1] += cidx[k] * cidx[k];
+
+                    for (j = 0; j<dimension; j++) {
+                        rp[0][j] += (Mi_ - cidx[k]) * cc[cidx[k]][j];
+                        rp[1][j] += cidx[k] * cc[cidx[k]][j];
+                    }
+                }
+
+                float dd = im[0][0] * im[1][1] - im[0][1] * im[0][1];
+
+                // dd=0 means that cidx[k] and (Mi_-cidx[k]) collinear which implies only one active index;
+                // taken care of separately
+
+                im[1][0] = im[0][0];
+                im[0][0] = im[1][1] / dd;
+                im[1][1] = im[1][0] / dd;
+                im[1][0] = im[0][1] = -im[0][1] / dd;
+
+                for (j = 0; j<dimension; j++) {
+                    epa[0][j] = (im[0][0] * rp[0][j] + im[0][1] * rp[1][j])*Mi_;
+                    epa[1][j] = (im[1][0] * rp[0][j] + im[1][1] * rp[1][j])*Mi_;
+                }
+
+                // shake single or                                   - cartesian
+                // shake odd/odd and even/even or                    - same parity
+                // shake odd/odd odd/even , even/odd and even/even   - bcc
+
+                float err_1 = FLT_MAX;
+                int epo_1[2][MAX_DIMENSION_BIG];
+
+                float ed[2][2][MAX_DIMENSION_BIG];
+                int epo_2_[2][2][2][MAX_DIMENSION_BIG];
+
+                for (j = 0; j<dimension; j++) {
+
+#ifndef USE_NEWRAMP
+                    float(*rb)[SP_ERRIDX_MAX][16] = ramp[CLT(clog)][BTT(max_bits[j])];
+#endif
+
+                    int pp[2] = { 0,0 };
+                    int rr = (use_par ? 2 : 1);
+
+                    int epi[2][2];  // first/second, coord, begin rage end range
+
+
+                    for (pp[0] = 0; pp[0]<rr; pp[0]++) {
+                        for (pp[1] = 0; pp[1]<rr; pp[1]++) {
+
+                            for (i = 0; i<2; i++) {     // set range
+                                epi[i][0] = epi[i][1] = ep_find_floor(epa[i][j], max_bits[j], use_par, pp[i]);
+
+                                epi[i][0] -= ((epi[i][0]  < (size >> 1) - 1 ? epi[i][0] : (size >> 1) - 1))  & (~use_par);
+
+                                epi[i][1] += ((1 << max_bits[j]) - 1 - epi[i][1]  < (size >> 1) ?
+                                              (1 << max_bits[j]) - 1 - epi[i][1] : (size >> 1)) & (~use_par);
+                            }
+
+                            int p1, p2, step = (1 << use_par);
+
+                            ed[pp[0]][pp[1]][j] = FLT_MAX;
+
+                            for (p1 = epi[0][0]; p1 <= epi[0][1]; p1 += step)
+                                for (p2 = epi[1][0]; p2 <= epi[1][1]; p2 += step) {
+#ifndef USE_NEWRAMP
+                                    float *rbp = rb[p1][p2];
+#endif
+                                    float t = 0;
+                                    int    *ci = cidx;
+                                    int    m = numEntries;
+                                    int _mc = m;
+
+                                    while (_mc > 0) {
+#ifndef USE_NEWRAMP
+                                        t += (rbp[ci[_mc - 1]] - data[_mc - 1][j])
+                                             *(rbp[ci[_mc - 1]] - data[_mc - 1][j]);
+#else
+                                        t += (rampf(CLT(clog), BTT(max_bits[j]), p1, p2, ci[_mc - 1]) - data[_mc - 1][j])
+                                             *(rampf(CLT(clog), BTT(max_bits[j]), p1, p2, ci[_mc - 1]) - data[_mc - 1][j]);
+#endif
+                                        _mc--;
+                                    }
+
+                                    if (t<ed[pp[0]][pp[1]][j]) {
+                                        ed[pp[0]][pp[1]][j] = t;
+                                        epo_2_[pp[0]][pp[1]][0][j] = p1;
+                                        epo_2_[pp[0]][pp[1]][1][j] = p2;
+                                    }
+                                }
+                        } // pp[1]
+                    } // pp[0]
+                } // j
+
+                int pn;
+
+                for (pn = 0; pn<npv_nd[dimension - 3][type]; pn++) {
+                    int dim1 = dimension - 3;
+                    int dim2 = type;
+                    int dim3 = pn;
+                    int j1;
+                    float err_2 = 0;
+
+                    for (j1 = 0; j1<dimension; j1++)
+                        err_2 += ed[
+                                     get_par_vector(dim1, dim2, dim3, 0, j1)][get_par_vector(dim1, dim2, dim3, 1, j1)][j1];
+                    if (err_2 < err_1) {
+                        err_1 = err_2;
+                        for (j1 = 0; j1<dimension; j1++) {
+                            epo_1[0][j1] = epo_2_[get_par_vector(dim1, dim2, dim3, 0, j1)][get_par_vector(dim1, dim2, dim3, 1, j1)][0][j1];
+                            epo_1[1][j1] = epo_2_[get_par_vector(dim1, dim2, dim3, 0, j1)][get_par_vector(dim1, dim2, dim3, 1, j1)][1][j1];
+                        }
+                    }
+                }
+
+                if (err_1 <= err_0) { // we'd want to get expanded index;
+                    err_0 = err_1;
+                    p0 = p;
+                    q0 = q;
+                    for (j = 0; j<dimension; j++) {
+                        epo_0[0][j] = epo_1[0][j];
+                        epo_0[1][j] = epo_1[1][j];
+                    }
+                }
+            }
+        // requantize
+#ifndef USE_NEWRAMP
+        float *r[MAX_DIMENSION_BIG];
+#endif
+        int idg[MAX_ENTRIES];
+
+        float err_r = 0;
+
+        if (CLT(clog) > (LOG_CL_RANGE - LOG_CL_BASE))
+            return FLT_MAX;
+
+        for (int jj = 0; jj<dimension; jj++) {
+            if (BTT(max_bits[jj]) >(BIT_RANGE - BIT_BASE))
+                return FLT_MAX;
+            if ((epo_0[0][jj] > 255) || (epo_0[0][jj] < 0))
+                return FLT_MAX;
+            if ((epo_0[1][jj] > 255) || (epo_0[0][jj] < 0))
+                return FLT_MAX;
+#ifndef USE_NEWRAMP
+            r[jj] = ramp[CLT(clog)][BTT(max_bits[jj])][epo_0[0][jj]][epo_0[1][jj]];
+#endif
+        }
+
+        for (i = 0; i<numEntries; i++) {
+            float  cmin = FLT_MAX;
+            int        ci = 0;
+            float   d[4];
+            d[0] = data[i][0];
+            d[1] = data[i][1];
+            d[2] = data[i][2];
+            d[3] = data[i][3];
+
+            for (j = 0; j < (1 << clog); j++) {
+                float t_ = 0.;
+
+                for (k = 0; k<dimension; k++) {
+#ifndef USE_NEWRAMP
+                    t_ += (r[k][j] - d[k])*
+                          (r[k][j] - d[k]);
+#else
+                    t_ += (rampf(CLT(clog), BTT(max_bits[k]), epo_0[0][k], epo_0[1][k], j) - d[k])*
+                          (rampf(CLT(clog), BTT(max_bits[k]), epo_0[0][k], epo_0[1][k], j) - d[k]);
+#endif
+                }
+
+                if (t_<cmin) {
+                    cmin = t_;
+                    ci = j;
+                }
+            }
+
+            idg[i] = ci;
+            for (k = 0; k<dimension; k++) {
+#ifndef USE_NEWRAMP
+                outg[i][k] = r[k][ci];
+#else
+                outg[i][k] = rampf(CLT(clog), BTT(max_bits[k]), epo_0[0][k], epo_0[1][k], ci);
+#endif
+            }
+
+            err_r += cmin;
+        }
+
+        change = 0;
+        for (k = 0; k<numEntries; k++)
+            change = change || (index[k] * q0 + p0 != idg[k]);
+
+        better = err_r < err_o;
+
+        if (better) {
+            for (k = 0; k<numEntries; k++) {
+                index_[k] = index[k] = idg[k];
+                for (j = 0; j<dimension; j++) {
+                    out[k][j] = outg[k][j];
+                    epo_code[0][j] = epo_0[0][j];
+                    epo_code[1][j] = epo_0[1][j];
+                }
+            }
+            err_o = err_r;
+        }
+
+        done = !(change  &&  better);
+        if (maxTry > 0) maxTry--;
+        else maxTry = 0;
+    } while (!done && maxTry);
+
+    for (j = 0; j<dimension; j++) {
+#ifndef USE_NEWRAMP
+        epo[0][j] = ramp[CLT(clog)][BTT(max_bits[j])][epo_code[0][j]][epo_code[1][j]][0];
+        epo[1][j] = ramp[CLT(clog)][BTT(max_bits[j])][epo_code[0][j]][epo_code[1][j]][(1 << clog) - 1];
+#else
+        epo[0][j] = rampf(CLT(clog), BTT(max_bits[j]), epo_code[0][j], epo_code[1][j], 0);
+        epo[1][j] = rampf(CLT(clog), BTT(max_bits[j]), epo_code[0][j], epo_code[1][j], (1 << clog) - 1);
+#endif
+    }
+
+    return err_o;
+}
+
+
+
+}
+
diff --git a/libkram/compressonator/bc6h/hdr_encode.h b/libkram/compressonator/bc6h/hdr_encode.h
new file mode 100644
index 00000000..b719b79b
--- /dev/null
+++ b/libkram/compressonator/bc6h/hdr_encode.h
@@ -0,0 +1,114 @@
+#pragma once
+//#ifndef HDR_Encode_H
+//#define HDR_Encode_H
+
+//===============================================================================
+// Copyright (c) 2007-2017  Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2004-2006 ATI Technologies Inc.
+//===============================================================================
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+//
+//  File Name:   HDR_Encode.h
+//  Description: Reserved utils function for HDR process
+//
+//////////////////////////////////////////////////////////////////////////////
+
+namespace HDR_Encode {
+#define MAX_SUBSET_SIZE         16                       // Largest possible size for an individual subset
+#define MAX_SUBSETS             3                        // Maximum number of possible subsets
+#define MAX_INDEX_BITS          4                        // Maximum number of index bits
+#define MAX_PARTITIONS          64                       // Maximum number of partition types
+#define NUM_BLOCK_TYPES         8                        // Number of block types in the format
+#define COMPRESSED_BLOCK_SIZE   16                       // Size of a compressed block in bytes
+#define MAX_DIMENSION_BIG       4                        // Maximun number of Channels per Texel,  
+// BC6H uses 3 channels in current encoder, 4th is reserved for future use
+#define F16HMAX                 0x7bff                   // Max 16bit half float value (0x7BFF) + 1
+#define MAX_END_POINTS          2                        // BC6H Maximum number of end point pairs (AB)
+
+static int g_aWeights3[] = { 0, 9, 18, 27, 37, 46, 55, 64 };                                // 3 bit color Indices
+static int g_aWeights4[] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 }; // 4 bit color indices
+
+extern float lerpf(float a, float b, int i, int denom);
+extern inline int NBits(int n, bool bIsSigned);
+extern int QuantizeToInt(short value, int prec, bool signedfloat16, float exposure);
+extern int Unquantize(int comp, unsigned char uBitsPerComp, bool bSigned);
+
+extern  void    Partition(  int       shape,
+                            float    in[][MAX_DIMENSION_BIG],
+                            float    subsets[MAX_SUBSETS][MAX_SUBSET_SIZE][MAX_DIMENSION_BIG],
+                            int       count[MAX_SUBSETS],
+                            int       ShapeTableToUse,
+                            int       dimension);
+
+// Used by optQuantAnD_d
+#define MAX_ENTRIES                             64
+#define MAX_TRY                                 4000
+#define HDR_FLT_MAX_EXP                         128 // DBL_MAX_EXP_ = 1024
+#define MAX_PARTITIONS_TABLE                    (1+64+64)
+
+// Out contains all endpoints (out) calaculated for the input shape (data) and pattern (index)
+extern  float optQuantAnD_d(
+    float data[MAX_ENTRIES][MAX_DIMENSION_BIG],
+    int numEntries,
+    int numClusters,
+    int index[MAX_ENTRIES],
+    float out[MAX_ENTRIES][MAX_DIMENSION_BIG],
+    float direction[MAX_DIMENSION_BIG],
+    float *step,
+    int dimension,
+    float quality
+);
+
+extern  void  GetEndPoints(float EndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], float outB[MAX_SUBSETS][MAX_SUBSET_SIZE][MAX_DIMENSION_BIG], int max_subsets, int entryCount[MAX_SUBSETS]);
+extern  int   PARTITIONS[MAX_SUBSETS][MAX_PARTITIONS][MAX_SUBSET_SIZE];
+
+extern  float ep_shaker_HD(
+    float data[MAX_ENTRIES][MAX_DIMENSION_BIG],
+    int numEntries,
+    int index_[MAX_ENTRIES],
+    float out[MAX_ENTRIES][MAX_DIMENSION_BIG],
+    int epo_code[2][MAX_DIMENSION_BIG],
+    int Mi_,                // last cluster
+    int bits[3],            // including parity
+    int dimension
+);
+
+
+//===========================================================================================
+extern void init_ramps();
+extern void deinit_ramps();
+
+extern float ep_shaker_2_d(
+    float data[MAX_ENTRIES][MAX_DIMENSION_BIG],
+    int numEntries,
+    int index_[MAX_ENTRIES],
+    float out[MAX_ENTRIES][MAX_DIMENSION_BIG],
+    int epo_code[2][MAX_DIMENSION_BIG],
+    int size,
+    int Mi_,             // last cluster
+    int bits,            // total for all channels
+    // defined by total numbe of bits and dimensioin
+    int dimension,
+    float epo[2][MAX_DIMENSION_BIG]
+);
+
+} // namespace HDR_Encode
+//#endif
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index 699fb398..13aa65a3 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -123,6 +123,7 @@
 #define USE_FLOAT16 0
 #endif
 
+
 // can override from build system
 
 // can't have ATE defined to 1 on other platforms
@@ -150,6 +151,11 @@
 #define COMPILE_EASTL 0
 #endif
 
+// basis transcoder only (read not writes)
+#ifndef COMPILE_BASIS
+#define COMPILE_BASIS 0
+#endif
+
 // rgb8/16f/32f formats only supported for import, Metal doesn't expose these formats
 #ifndef SUPPORT_RGB
 #define SUPPORT_RGB 1
@@ -203,6 +209,10 @@
 
 #endif
 
+#if COMPILE_BASIS
+#include "basisu_transcoder.h"
+#endif
+
 // includes that are usable across all files
 #include "KramLog.h"
 
diff --git a/libkram/transcoder/basisu_transcoder.cpp b/libkram/transcoder/basisu_transcoder.cpp
index 3cebab1b..ef6f9af9 100644
--- a/libkram/transcoder/basisu_transcoder.cpp
+++ b/libkram/transcoder/basisu_transcoder.cpp
@@ -17,6 +17,12 @@
 #include <limits.h>
 #include "basisu_containers_impl.h"
 
+// Alec stripping out some support to shrink the compile size
+#define BASISD_SUPPORT_PVRTC1 0
+#define BASISD_SUPPORT_ATC 0
+#define BASISD_SUPPORT_PVRTC2 0
+#define BASISD_SUPPORT_FXT1 0
+
 #ifndef BASISD_IS_BIG_ENDIAN
 // TODO: This doesn't work on OSX. How can this be so difficult?
 //#if defined(__BIG_ENDIAN__) || defined(_BIG_ENDIAN) || defined(BIG_ENDIAN)
@@ -147,7 +153,7 @@ constexpr bool BASISD_IS_BIG_ENDIAN = false;
    // If BASISD_SUPPORT_KTX2_ZSTD is 0, UASTC files compressed with Zstd cannot be loaded.
 	#if BASISD_SUPPORT_KTX2_ZSTD
 		// We only use two Zstd API's: ZSTD_decompress() and ZSTD_isError()
-		#include "../zstd/zstd.h"
+		#include "zstd.h"
 	#endif
 #endif
 

From 4d0dd5ce3a5ea7ca86ec99b846254fc2d44d6704 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 3 Oct 2021 23:35:29 -0700
Subject: [PATCH 187/901] kravm - try to fix Github action signing

See if setting INSTALL_PATH to blank instead of . fixes the failures to build kramc/kramv.
---
 scripts/cibuild.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/cibuild.sh b/scripts/cibuild.sh
index 4cf21172..79e68c92 100755
--- a/scripts/cibuild.sh
+++ b/scripts/cibuild.sh
@@ -65,8 +65,8 @@ if [[ $buildType == macos ]]; then
 	xcodebuild build -sdk iphoneos -workspace kram.xcworkspace -scheme kram-ios CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
 	xcodebuild build -sdk macosx -workspace kram.xcworkspace -scheme kram CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
 
-	xcodebuild install -sdk macosx -workspace kram.xcworkspace -scheme kramc DSTROOT=${binPath} INSTALL_PATH=.
-	xcodebuild install -sdk macosx -workspace kram.xcworkspace -scheme kramv DSTROOT=${binPath} INSTALL_PATH=.
+	xcodebuild install -sdk macosx -workspace kram.xcworkspace -scheme kramc DSTROOT=${binPath} INSTALL_PATH=
+	xcodebuild install -sdk macosx -workspace kram.xcworkspace -scheme kramv DSTROOT=${binPath} INSTALL_PATH=
 
 	popd
 

From c49304207e416cc9a281816352b583260115cb3f Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 6 Oct 2021 09:09:21 -0700
Subject: [PATCH 188/901] kram - fix bc6h decoder

This is detailed here
https://github.com/GPUOpen-Tools/compressonator/issues/172
---
 libkram/compressonator/bc6h/bc6h_decode.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libkram/compressonator/bc6h/bc6h_decode.cpp b/libkram/compressonator/bc6h/bc6h_decode.cpp
index 13ec78b6..7e5c0eb8 100644
--- a/libkram/compressonator/bc6h/bc6h_decode.cpp
+++ b/libkram/compressonator/bc6h/bc6h_decode.cpp
@@ -411,8 +411,8 @@ AMD_BC6H_Format extract_format(BYTE in[BC6H_COMPRESSED_BLOCK_SIZE]) {
         bc6h_format.bw = header.getvalue(25,10) |             //11:   bw[9:0]
                          (header.getvalue(60,1) << 10);        //      bw[10]
         bc6h_format.bx = header.getvalue(55,5);               //5:    bx[4:0]
-        bc6h_format.by = header.getvalue(61,4);               //5:    by[3:0]
-        (header.getvalue(40,1) << 4);         //      by[4]
+        bc6h_format.by = header.getvalue(61,4) |               //5:    by[3:0]
+                         (header.getvalue(40,1) << 4);         //      by[4]
         bc6h_format.bz = header.getvalue(50,1) |              //5:    bz[0]
                          (header.getvalue(69,1) << 1) |        //      bz[1]
                          (header.getvalue(70,1) << 2) |        //      bz[2]

From 1c8ffa278e7d756249d5bb2a85273187e9ed38c5 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 7 Oct 2021 07:06:05 -0700
Subject: [PATCH 189/901] kramv - fix the image and shape table view

Wasted a lot of time, then realized the only way to stop NSTextField for hud to stop intercepting clicks was to override hitTest in a custom derived class.
Really should probably show/hide the hud until pressing esc to hide the table view.  It just looks ugly overlapped, but at least now list is usable.
Up/down arrow keys move through list, since table view sets first responder when shown.
---
 kramv/KramViewerMain.mm                     | 138 +++++++++++++++++---
 libkram/compressonator/bc6h/bc6h_decode.cpp |   6 +-
 2 files changed, 122 insertions(+), 22 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index a073f04d..45bab123 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -48,6 +48,28 @@
 
 //-------------
 
+// This is so annoying, but otherwise the hud always intercepts clicks intended
+// for the underlying TableView.
+// https://stackoverflow.com/questions/15891098/nstextfield-click-through
+@interface MyNSTextField : NSTextField
+
+@end
+
+@implementation MyNSTextField
+{
+    
+}
+
+// override to allow clickthrough
+- (NSView*)hitTest:(NSPoint)aPoint
+{
+    return nil;
+}
+
+@end
+
+//-------------
+
 @interface MyMTKView : MTKView
 // for now only have a single imageURL
 @property(retain, nonatomic, readwrite, nullable) NSURL *imageURL;
@@ -62,6 +84,12 @@ - (BOOL)loadTextureFromURL:(NSURL *)url;
 
 - (void)setHudText:(const char *)text;
 
+- (void)tableViewSelectionDidChange:(NSNotification *)notification;
+
+- (void)addNotifications;
+
+- (void)removeNotifications;
+
 @end
 
 //-------------
@@ -140,7 +168,7 @@ - (void)makeWindowControllers
 }
 
 - (NSData *)dataOfType:(nonnull NSString *)typeName
-                 error:(NSError *_Nullable *)outError
+                 error:(NSError *_Nullable __autoreleasing *)outError
 {
     // Insert code here to write your document to data of the specified type. If
     // outError != NULL, ensure that you create and set an appropriate error if
@@ -154,7 +182,7 @@ - (NSData *)dataOfType:(nonnull NSString *)typeName
 
 - (BOOL)readFromURL:(nonnull NSURL *)url
              ofType:(nonnull NSString *)typeName
-              error:(NSError *_Nullable *)outError
+              error:(NSError *_Nullable __autoreleasing *)outError
 {
     // called from OpenRecent documents menu
 
@@ -567,7 +595,7 @@ - (instancetype)initWithCoder:(NSCoder *)coder
     _hudLabel2 = [self _addHud:YES];
     _hudLabel = [self _addHud:NO];
     [self setHudText:""];
-
+    
     return self;
 }
 
@@ -763,7 +791,7 @@ - (NSTextField *)_addHud:(BOOL)isShadow
     // really want field to expand to fill the window height for large output
 
     // add a label for the hud
-    NSTextField *label = [[NSTextField alloc]
+    NSTextField *label = [[MyNSTextField alloc]
         initWithFrame:NSMakeRect(isShadow ? 21 : 20, isShadow ? 21 : 20, 800,
                                  1200)];
     label.drawsBackground = NO;
@@ -776,6 +804,10 @@ - (NSTextField *)_addHud:(BOOL)isShadow
     label.lineBreakMode = NSLineBreakByClipping;
     label.maximumNumberOfLines = 0;  // fill to height
 
+    // important or interferes with table view
+    label.refusesFirstResponder = YES;
+    label.enabled = NO;
+    
     label.cell.scrollable = NO;
     label.cell.wraps = NO;
 
@@ -1412,7 +1444,7 @@ - (void)updateHudText
     NSString *textNS = [NSString stringWithUTF8String:text.c_str()];
     _hudLabel2.stringValue = textNS;
     _hudLabel2.needsDisplay = YES;
-
+    
     _hudLabel.stringValue = textNS;
     _hudLabel.needsDisplay = YES;
 }
@@ -2210,6 +2242,9 @@ - (bool)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
                 // show the shapes table
                 _tableView.hidden = YES;
                 _shapesTableView.hidden = NO;
+                
+                // want it to respond to arrow keys
+                [self.window makeFirstResponder: _shapesTableView];
             }
             break;
 
@@ -2418,6 +2453,9 @@ - (BOOL)loadArchive:(const char *)zipFilename
     [_tableView selectRowIndexes:[NSIndexSet indexSetWithIndex:_fileArchiveIndex] byExtendingSelection:NO];
     [_tableView scrollRowToVisible:_fileArchiveIndex];
     
+    // want it to respond to arrow keys
+    [self.window makeFirstResponder: _tableView];
+    
     [self updateShapesTable];
     
     // hack to see shape table
@@ -2429,15 +2467,10 @@ - (BOOL)loadArchive:(const char *)zipFilename
 
 - (BOOL)advanceTextureFromAchive:(BOOL)increment
 {
-    if (!_zipMmap.data()) {
-        // no archive loaded
-        return NO;
-    }
-
-    if (_zip.zipEntrys().empty()) {
+    if ((!_zipMmap.data()) || _zip.zipEntrys().empty()) {
+        // no archive loaded or it's empty
         return NO;
     }
-
     size_t numEntries = _zip.zipEntrys().size();
 
     if (increment)
@@ -2451,6 +2484,9 @@ - (BOOL)advanceTextureFromAchive:(BOOL)increment
     [_tableView selectRowIndexes:[NSIndexSet indexSetWithIndex:_fileArchiveIndex] byExtendingSelection:NO];
     [_tableView scrollRowToVisible:_fileArchiveIndex];
     
+    // want it to respond to arrow keys
+    [self.window makeFirstResponder: _tableView];
+    
     // show the files table
     _tableView.hidden = NO;
     _shapesTableView.hidden = YES;
@@ -2477,6 +2513,9 @@ - (BOOL)advanceTextureFromFolder:(BOOL)increment
     [_tableView selectRowIndexes:[NSIndexSet indexSetWithIndex:_fileFolderIndex] byExtendingSelection:NO];
     [_tableView scrollRowToVisible:_fileFolderIndex];
     
+    // want it to respond to arrow keys
+    [self.window makeFirstResponder: _tableView];
+    
     // show the files table
     _tableView.hidden = NO;
     _shapesTableView.hidden = YES;
@@ -2484,6 +2523,28 @@ - (BOOL)advanceTextureFromFolder:(BOOL)increment
     return [self loadTextureFromFolder];
 }
 
+- (BOOL)setImageFromSelection:(NSInteger)index {
+    if (_zipMmap.data() && !_zip.zipEntrys().empty()) {
+        if (_fileArchiveIndex != index) {
+            _fileArchiveIndex = index;
+            return [self loadTextureFromArchive];
+        }
+    }
+
+    if (!_folderFiles.empty()) {
+        if (_fileFolderIndex != index) {
+            _fileFolderIndex = index;
+            return [self loadTextureFromFolder];
+        }
+    }
+    return NO;
+}
+
+- (BOOL)setShapeFromSelection:(NSInteger)index {
+    _showSettings->meshNumber = index;
+    return YES;
+}
+
 - (BOOL)findFilenameInFolders:(const string &)filename
 {
     // TODO: binary search for the filename in the array, but would have to be in
@@ -3096,6 +3157,38 @@ - (IBAction)newDocument {
 */
 #endif
 
+- (void)tableViewSelectionDidChange:(NSNotification *)notification
+{
+    if (notification.object == _tableView)
+    {
+        // image
+        NSInteger selectedRow = [_tableView selectedRow];
+        [self setImageFromSelection:selectedRow];
+        
+    }
+    else if (notification.object == _shapesTableView)
+    {
+        // shape
+        NSInteger selectedRow = [_shapesTableView selectedRow];
+        [self setShapeFromSelection:selectedRow];
+    }
+}
+
+- (void)addNotifications
+{
+    // listen for the selection change messages
+    [[NSNotificationCenter defaultCenter] addObserver:self
+                                              selector:@selector(tableViewSelectionDidChange:)
+                                                  name:NSTableViewSelectionDidChangeNotification object:nil];
+}
+
+- (void)removeNotifications
+{
+    // listen for the selection change messages
+    [[NSNotificationCenter defaultCenter] removeObserver:self];
+}
+
+
 - (BOOL)acceptsFirstResponder
 {
     return YES;
@@ -3118,6 +3211,11 @@ @implementation GameViewController {
     NSTrackingArea *_trackingArea;
 }
 
+- (void)viewWillDisappear
+{
+    [_view removeNotifications];
+}
+
 - (void)viewDidLoad
 {
     [super viewDidLoad];
@@ -3137,12 +3235,7 @@ - (void)viewDidLoad
     _renderer = [[Renderer alloc] initWithMetalKitView:_view
                                               settings:_view.showSettings];
 
-    // original sample code was sending down _view.bounds.size, but need
-    // drawableSize this was causing all sorts of inconsistencies
-    [_renderer mtkView:_view drawableSizeWillChange:_view.drawableSize];
-
-    _view.delegate = _renderer;
-
+    
     // https://developer.apple.com/library/archive/documentation/Cocoa/Conceptual/EventOverview/TrackingAreaObjects/TrackingAreaObjects.html
     // this is better than requesting mousemoved events, they're only sent when
     // cursor is inside
@@ -3161,8 +3254,17 @@ - (void)viewDidLoad
     // programmatically add some buttons
     // think limited to 11 viewws before they must be wrapepd in a container.
     // That's how SwiftUI was.
+    [_view addNotifications];
+    
+    // original sample code was sending down _view.bounds.size, but need
+    // drawableSize this was causing all sorts of inconsistencies
+    [_renderer mtkView:_view drawableSizeWillChange:_view.drawableSize];
+
+    _view.delegate = _renderer;
 }
 
+
+
 @end
 
 //-------------
diff --git a/libkram/compressonator/bc6h/bc6h_decode.cpp b/libkram/compressonator/bc6h/bc6h_decode.cpp
index 7e5c0eb8..100664bf 100644
--- a/libkram/compressonator/bc6h/bc6h_decode.cpp
+++ b/libkram/compressonator/bc6h/bc6h_decode.cpp
@@ -210,15 +210,12 @@ int lerp(int a, int b, int i, int denom) {
     case 3:
         denom *= 5;
         i *= 5;    // fall through to case 15
-            
-/// TODO: Alec fix these missing
     case 15:
         weights = g_aWeights4;
         break;
     case 7:
         weights = g_aWeights3;
         break;
-//
     default:
         assert(0);
     }
@@ -411,8 +408,9 @@ AMD_BC6H_Format extract_format(BYTE in[BC6H_COMPRESSED_BLOCK_SIZE]) {
         bc6h_format.bw = header.getvalue(25,10) |             //11:   bw[9:0]
                          (header.getvalue(60,1) << 10);        //      bw[10]
         bc6h_format.bx = header.getvalue(55,5);               //5:    bx[4:0]
+        // https://github.com/GPUOpen-Tools/compressonator/issues/172
         bc6h_format.by = header.getvalue(61,4) |               //5:    by[3:0]
-                         (header.getvalue(40,1) << 4);         //      by[4]
+                         (header.getvalue(40,1) << 4);         //      by[4]   TODO: Alec, verify this, was unused before adding or
         bc6h_format.bz = header.getvalue(50,1) |              //5:    bz[0]
                          (header.getvalue(69,1) << 1) |        //      bz[1]
                          (header.getvalue(70,1) << 2) |        //      bz[2]

From 55c4d44dcb9cc96c0a3045b484e5783356484216 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 9 Oct 2021 11:17:38 -0700
Subject: [PATCH 190/901] kramv - hide hud while list view shown, fix shape
 list update

---
 kramv/KramViewerMain.mm | 40 ++++++++++++++++++++++++++++++++++------
 1 file changed, 34 insertions(+), 6 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 45bab123..ba2989ce 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -80,6 +80,9 @@ @interface MyMTKView : MTKView
 
 @property(nonatomic, readwrite) double lastArchiveTimestamp;
 
+// can hide hud while list view is up
+@property(nonatomic, readwrite) bool hudHidden;
+
 - (BOOL)loadTextureFromURL:(NSURL *)url;
 
 - (void)setHudText:(const char *)text;
@@ -1855,6 +1858,11 @@ - (void)hideTables
     _shapesTableView.hidden = YES;
 }
 
+- (void)updateHudVisibility {
+    _hudLabel.hidden = _hudHidden || !_showSettings->isHudShown;
+    _hudLabel2.hidden = _hudHidden || !_showSettings->isHudShown;
+}
+
 - (bool)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
 {
     // Some data depends on the texture data (isSigned, isNormal, ..)
@@ -1868,7 +1876,9 @@ - (bool)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
     switch (keyCode) {
         // for now hit esc to hide the table views
         case Key::Escape: {
+            _hudHidden = false;
             [self hideTables];
+            [self updateHudVisibility];
             break;
         }
         case Key::V: {
@@ -2162,8 +2172,7 @@ - (bool)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
         // this may require calling setNeedsDisplay on the UILabel as cursor moves
         case Key::H:
             _showSettings->isHudShown = !_showSettings->isHudShown;
-            _hudLabel.hidden = !_showSettings->isHudShown;
-            _hudLabel2.hidden = !_showSettings->isHudShown;
+            [self updateHudVisibility];
             // isChanged = true;
             text = "Hud ";
             text += _showSettings->isHudShown ? "On" : "Off";
@@ -2215,12 +2224,18 @@ - (bool)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
             if (![self findButton:"J"].isHidden) {
                 if (_showSettings->isArchive) {
                     if ([self advanceTextureFromAchive:!isShiftKeyDown]) {
+                        _hudHidden = true;
+                        [self updateHudVisibility];
+                        
                         isChanged = true;
                         text = "Loaded " + _showSettings->lastFilename;
                     }
                 }
                 else if (_showSettings->isFolder) {
                     if ([self advanceTextureFromFolder:!isShiftKeyDown]) {
+                        _hudHidden = true;
+                        [self updateHudVisibility];
+                        
                         isChanged = true;
                         text = "Loaded " + _showSettings->lastFilename;
                     }
@@ -2243,6 +2258,9 @@ - (bool)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
                 _tableView.hidden = YES;
                 _shapesTableView.hidden = NO;
                 
+                _hudHidden = true;
+                [self updateHudVisibility];
+                
                 // want it to respond to arrow keys
                 [self.window makeFirstResponder: _shapesTableView];
             }
@@ -2458,7 +2476,7 @@ - (BOOL)loadArchive:(const char *)zipFilename
     
     [self updateShapesTable];
     
-    // hack to see shape table
+    // hack to see table
     _tableView.hidden = YES;
     _shapesTableView.hidden = YES;
     
@@ -2520,6 +2538,9 @@ - (BOOL)advanceTextureFromFolder:(BOOL)increment
     _tableView.hidden = NO;
     _shapesTableView.hidden = YES;
     
+    _hudHidden = true;
+    [self updateHudVisibility];
+    
     return [self loadTextureFromFolder];
 }
 
@@ -2541,8 +2562,12 @@ - (BOOL)setImageFromSelection:(NSInteger)index {
 }
 
 - (BOOL)setShapeFromSelection:(NSInteger)index {
-    _showSettings->meshNumber = index;
-    return YES;
+    if (_showSettings->meshNumber != index) {
+        _showSettings->meshNumber = index;
+        self.needsDisplay = YES;
+        return YES;
+    }
+    return NO;
 }
 
 - (BOOL)findFilenameInFolders:(const string &)filename
@@ -2827,6 +2852,10 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
 {
     // NSLog(@"LoadTexture");
 
+    // turn back on the hud if was in a list view
+    _hudHidden = false;
+    [self updateHudVisibility];
+    
     const char *filename = url.fileSystemRepresentation;
     if (filename == nullptr) {
         // Fixed by converting dropped urls into paths then back to a url.
@@ -3164,7 +3193,6 @@ - (void)tableViewSelectionDidChange:(NSNotification *)notification
         // image
         NSInteger selectedRow = [_tableView selectedRow];
         [self setImageFromSelection:selectedRow];
-        
     }
     else if (notification.object == _shapesTableView)
     {

From ec14a4e79ff5a2c2a7531e7f14abf628bc7fcd8c Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 9 Oct 2021 12:24:40 -0700
Subject: [PATCH 191/901] kram - reroute lodepng decompress to miniz

This required some reshuffling of how defaults and decompress are handled in lodepng.  lodepng needed to allocate memory, and also use the defaults that are set. Rich was doing this, and miniz can ignore crc and has faster than 1-bit decode.  I didn't fully update the encode path yet, since it's chopped out.

Add defines to top of lodepng to chop out a bunch of source.  Will eventually move to config.
---
 libkram/kram/Kram.cpp       | 16 ++++++++++
 libkram/lodepng/lodepng.cpp | 58 +++++++++++++++++++++++--------------
 libkram/lodepng/lodepng.h   |  9 ++++--
 3 files changed, 60 insertions(+), 23 deletions(-)

diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 1b481f83..bd883b0a 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -23,6 +23,7 @@
 #include "KramVersion.h"
 #include "TaskSystem.h"
 #include "lodepng.h"
+#include "miniz.h"
 
 // one .cpp must supply these new overrides
 #if USE_EASTL
@@ -330,6 +331,16 @@ bool LoadKtx(const uint8_t* data, size_t dataSize, Image& sourceImage)
     return sourceImage.loadImageFromKTX(image);
 }
 
+// wrap miniz decoder, since it ignores crc checksum and is faster than default png
+unsigned LodepngDeflateUsingMiniz(
+    unsigned char** dstData, size_t* dstDataSize,
+    const unsigned char* srcData, size_t srcDataSize,
+    const LodePNGDecompressSettings* settings)
+{
+    return mz_uncompress(*dstData, dstDataSize,
+                         srcData, srcDataSize);  // != MZ_OK;
+}
+
 bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray, Image& sourceImage)
 {
     uint32_t width = 0;
@@ -379,6 +390,11 @@ bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray
             break;
     }
 
+    // Point deflate on decoder to faster version in miniz.
+    // This also is already set to ignore crc.  Is passed as default arg to decodes.
+    auto& settings = lodepng_default_decompress_settings;
+    settings.custom_zlib = LodepngDeflateUsingMiniz;
+
     // this inserts onto end of array, it doesn't resize
     vector<uint8_t> pixelsPNG;
     pixelsPNG.clear();
diff --git a/libkram/lodepng/lodepng.cpp b/libkram/lodepng/lodepng.cpp
index 285699ed..4c73d6f7 100644
--- a/libkram/lodepng/lodepng.cpp
+++ b/libkram/lodepng/lodepng.cpp
@@ -2233,13 +2233,24 @@ static unsigned zlib_decompress(unsigned char** out, size_t* outsize, size_t exp
                                 const unsigned char* in, size_t insize, const LodePNGDecompressSettings* settings) {
   unsigned error;
   if(settings->custom_zlib) {
-    error = settings->custom_zlib(out, outsize, in, insize, settings);
+    ucvector v = ucvector_init(*out, *outsize);
+    if(expected_size) {
+        assert(*outsize == 0);
+        /*reserve the memory to avoid intermediate reallocations*/
+        ucvector_resize(&v, expected_size);
+        v.size = expected_size;
+    }
+
+    error = settings->custom_zlib(&v.data, &v.size, in, insize, settings);
     if(error) {
       /*the custom zlib is allowed to have its own error codes, however, we translate it to code 110*/
       error = 110;
       /*if there's a max output size, and the custom zlib returned error, then indicate that error instead*/
       if(settings->max_output_size && *outsize > settings->max_output_size) error = 109;
     }
+      
+    *out = v.data;
+    *outsize = v.size;
   } else {
     ucvector v = ucvector_init(*out, *outsize);
     if(expected_size) {
@@ -2336,38 +2347,43 @@ static unsigned zlib_compress(unsigned char** out, size_t* outsize, const unsign
 /*this is a good tradeoff between speed and compression ratio*/
 #define DEFAULT_WINDOWSIZE 2048
 
+/*const*/ LodePNGCompressSettings lodepng_default_compress_settings = {2, 1, DEFAULT_WINDOWSIZE, 3, 128, 1, 0, 0, 0};
+
+
 void lodepng_compress_settings_init(LodePNGCompressSettings* settings) {
   /*compress with dynamic huffman tree (not in the mathematical sense, just not the predefined one)*/
-  settings->btype = 2;
-  settings->use_lz77 = 1;
-  settings->windowsize = DEFAULT_WINDOWSIZE;
-  settings->minmatch = 3;
-  settings->nicematch = 128;
-  settings->lazymatching = 1;
-
-  settings->custom_zlib = 0;
-  settings->custom_deflate = 0;
-  settings->custom_context = 0;
+//  settings->btype = 2;
+//  settings->use_lz77 = 1;
+//  settings->windowsize = DEFAULT_WINDOWSIZE;
+//  settings->minmatch = 3;
+//  settings->nicematch = 128;
+//  settings->lazymatching = 1;
+//
+//  settings->custom_zlib = 0;
+//  settings->custom_deflate = 0;
+//  settings->custom_context = 0;
+    *settings = lodepng_default_compress_settings;
 }
 
-const LodePNGCompressSettings lodepng_default_compress_settings = {2, 1, DEFAULT_WINDOWSIZE, 3, 128, 1, 0, 0, 0};
-
 
 #endif /*LODEPNG_COMPILE_ENCODER*/
 
 #ifdef LODEPNG_COMPILE_DECODER
 
-void lodepng_decompress_settings_init(LodePNGDecompressSettings* settings) {
-  settings->ignore_adler32 = 0;
-  settings->ignore_nlen = 0;
-  settings->max_output_size = 0;
+/*const*/ LodePNGDecompressSettings lodepng_default_decompress_settings = {0, 0, 0, 0, 0, 0};
 
-  settings->custom_zlib = 0;
-  settings->custom_inflate = 0;
-  settings->custom_context = 0;
+void lodepng_decompress_settings_init(LodePNGDecompressSettings* settings) {
+//  settings->ignore_adler32 = 0;
+//  settings->ignore_nlen = 0;
+//  settings->max_output_size = 0;
+//
+//  settings->custom_zlib = 0;
+//  settings->custom_inflate = 0;
+//  settings->custom_context = 0;
+    
+    *settings = lodepng_default_decompress_settings;
 }
 
-const LodePNGDecompressSettings lodepng_default_decompress_settings = {0, 0, 0, 0, 0, 0};
 
 #endif /*LODEPNG_COMPILE_DECODER*/
 
diff --git a/libkram/lodepng/lodepng.h b/libkram/lodepng/lodepng.h
index 3c0d571d..1019f027 100644
--- a/libkram/lodepng/lodepng.h
+++ b/libkram/lodepng/lodepng.h
@@ -32,6 +32,11 @@ using namespace NAMESPACE_STL;
 
 extern const char* LODEPNG_VERSION_STRING;
 
+// TODO: Alec - move these to config
+#define LODEPNG_NO_COMPILE_DISK
+#define LODEPNG_COMPILE_ZLIB
+#define LODEPNG_NO_COMPILE_ENCODER
+
 /*
 The following #defines are used to create code sections. They can be disabled
 to disable code sections, which can give faster compile time and smaller binary.
@@ -294,7 +299,7 @@ struct LodePNGDecompressSettings {
   const void* custom_context; /*optional custom settings for custom functions*/
 };
 
-extern const LodePNGDecompressSettings lodepng_default_decompress_settings;
+extern /*const*/ LodePNGDecompressSettings lodepng_default_decompress_settings;
 void lodepng_decompress_settings_init(LodePNGDecompressSettings* settings);
 #endif /*LODEPNG_COMPILE_DECODER*/
 
@@ -327,7 +332,7 @@ struct LodePNGCompressSettings /*deflate = compress*/ {
   const void* custom_context; /*optional custom settings for custom functions*/
 };
 
-extern const LodePNGCompressSettings lodepng_default_compress_settings;
+extern /*const*/ LodePNGCompressSettings lodepng_default_compress_settings;
 void lodepng_compress_settings_init(LodePNGCompressSettings* settings);
 #endif /*LODEPNG_COMPILE_ENCODER*/
 

From b6dd59bb77887e97cf322d6160a2891df36d11c0 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 9 Oct 2021 12:54:59 -0700
Subject: [PATCH 192/901] kram - fix decode path when expected_size is 0 (ICCP
 blocks)

Don't see easy way to get decoded size before decompress out of miniz api to preallocated the dstArray.  expected_size was passed for the main image, but not for ICCP block which is also zlib compressed.
So hardcode a number in lodepng for excpected_size = 0 case.
---
 libkram/lodepng/lodepng.cpp | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/libkram/lodepng/lodepng.cpp b/libkram/lodepng/lodepng.cpp
index 4c73d6f7..f7789c49 100644
--- a/libkram/lodepng/lodepng.cpp
+++ b/libkram/lodepng/lodepng.cpp
@@ -2235,12 +2235,17 @@ static unsigned zlib_decompress(unsigned char** out, size_t* outsize, size_t exp
   if(settings->custom_zlib) {
     ucvector v = ucvector_init(*out, *outsize);
     if(expected_size) {
-        assert(*outsize == 0);
         /*reserve the memory to avoid intermediate reallocations*/
         ucvector_resize(&v, expected_size);
         v.size = expected_size;
     }
-
+      
+    // this only happens on iccp block
+    if (*outsize == 0 && expected_size == 0) {
+        expected_size = 16*1024;
+        ucvector_resize(&v, expected_size);
+    }
+      
     error = settings->custom_zlib(&v.data, &v.size, in, insize, settings);
     if(error) {
       /*the custom zlib is allowed to have its own error codes, however, we translate it to code 110*/

From 708cabbd055ff731eb9edd42d365394949ae9e14 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 9 Oct 2021 13:02:44 -0700
Subject: [PATCH 193/901] kram - try to fix mz_ulong mismatch with size_t on
 Windows

---
 libkram/kram/Kram.cpp | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index bd883b0a..c4812a7a 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -337,8 +337,15 @@ unsigned LodepngDeflateUsingMiniz(
     const unsigned char* srcData, size_t srcDataSize,
     const LodePNGDecompressSettings* settings)
 {
-    return mz_uncompress(*dstData, dstDataSize,
-                         srcData, srcDataSize);  // != MZ_OK;
+    // mz_ulong doesn't line up with size_t on Windows, but does on macOS
+    mz_ulong dstDataSizeUL = *dstDataSize;
+
+    int result = mz_uncompress(*dstData, &dstDataSizeUL,
+                               srcData, srcDataSize);
+
+    *dstDataSize = dstDataSizeUL;
+
+    return result;
 }
 
 bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray, Image& sourceImage)

From eb2543b047c9c975e99175a58ea97a488a3f3aec Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 10 Oct 2021 11:26:57 -0700
Subject: [PATCH 194/901] kramv - add bc6h encode/decode to bc7enc path for now
 (all LDR), fix isPremul test for thumbs/preview

Found out preview appex defined no supported extensions for ktx/ktx2, so the thumbnailer was running for previews instead.
Tried adding extensions, but the NSImageView is blank so leaving them out for now.

This fixes all premul thumbnails.  The test in isPremul() was incorrect searching for text in premul string.
Hook bc6h up to bc7enc path until can add another decoder.  This all needs a special hdr path which takes only 16f and 32f mips.
   Decode is to float, and then LDR clamped to 0,1.  Snorm is all lost.

Fixup CMake file for Windows build
---
 build2/kram.xcodeproj/project.pbxproj         |  2 +
 .../Base.lproj/KramPreviewViewController.xib  |  8 +--
 kram-preview/KramPreviewViewController.mm     | 31 ++++++++++--
 kram-thumb/KramThumbnailProvider.mm           | 10 ++--
 libkram/CMakeLists.txt                        | 13 +++++
 libkram/compressonator/bc6h/bc6h_decode.cpp   |  2 +-
 libkram/compressonator/bc6h/bc6h_decode.h     |  2 +-
 libkram/compressonator/bc6h/bc6h_encode.cpp   |  8 +--
 libkram/compressonator/bc6h/bc6h_encode.h     |  2 +
 libkram/kram/KTXImage.cpp                     |  2 +-
 libkram/kram/Kram.cpp                         | 35 ++++++-------
 libkram/kram/KramImage.cpp                    | 49 +++++++++++++++++++
 libkram/kram/KramImageInfo.cpp                | 16 ++++--
 libkram/kram/KramMipper.cpp                   |  1 +
 libkram/kram/KramMipper.h                     | 13 +++++
 15 files changed, 150 insertions(+), 44 deletions(-)

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index b10077f4..8a1bd260 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -2201,6 +2201,7 @@
 					"-DCOMPILE_SQUISH=1",
 					"-DCOMPILE_BCENC=1",
 					"-DCOMPILE_EASTL=1",
+					"-DCOMPILE_COMP=1",
 					"-DCOMPILE_BASIS=1",
 					"-include",
 					KramConfig.h,
@@ -2277,6 +2278,7 @@
 					"-DCOMPILE_SQUISH=1",
 					"-DCOMPILE_BCENC=1",
 					"-DCOMPILE_EASTL=1",
+					"-DCOMPILE_COMP=1",
 					"-DCOMPILE_BASIS=1",
 					"-include",
 					KramConfig.h,
diff --git a/kram-preview/Base.lproj/KramPreviewViewController.xib b/kram-preview/Base.lproj/KramPreviewViewController.xib
index bd049d4d..0272e17f 100644
--- a/kram-preview/Base.lproj/KramPreviewViewController.xib
+++ b/kram-preview/Base.lproj/KramPreviewViewController.xib
@@ -1,20 +1,20 @@
 <?xml version="1.0" encoding="UTF-8"?>
-<document type="com.apple.InterfaceBuilder3.Cocoa.XIB" version="3.0" toolsVersion="18122" targetRuntime="MacOSX.Cocoa" propertyAccessControl="none" useAutolayout="YES" customObjectInstantitationMethod="direct">
+<document type="com.apple.InterfaceBuilder3.Cocoa.XIB" version="3.0" toolsVersion="19162" targetRuntime="MacOSX.Cocoa" propertyAccessControl="none" useAutolayout="YES" customObjectInstantitationMethod="direct">
     <dependencies>
         <deployment identifier="macosx"/>
-        <plugIn identifier="com.apple.InterfaceBuilder.CocoaPlugin" version="18122"/>
+        <plugIn identifier="com.apple.InterfaceBuilder.CocoaPlugin" version="19162"/>
         <capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
     </dependencies>
     <objects>
         <customObject id="-2" userLabel="File's Owner" customClass="KramPreviewViewController">
             <connections>
-                <outlet property="view" destination="c22-O7-iKe" id="NRM-P4-wb6"/>
+                <outlet property="view" destination="c22-O7-iKe" id="49o-T0-h9c"/>
             </connections>
         </customObject>
         <customObject id="-1" userLabel="First Responder" customClass="FirstResponder"/>
         <customObject id="-3" userLabel="Application" customClass="NSObject"/>
         <customView id="c22-O7-iKe" userLabel="Preview View" customClass="NSImageView">
-            <rect key="frame" x="0.0" y="0.0" width="480" height="272"/>
+            <rect key="frame" x="0.0" y="0.0" width="256" height="256"/>
             <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMinY="YES"/>
             <point key="canvasLocation" x="18" y="120"/>
         </customView>
diff --git a/kram-preview/KramPreviewViewController.mm b/kram-preview/KramPreviewViewController.mm
index 50c7d0d2..00b7d4a9 100644
--- a/kram-preview/KramPreviewViewController.mm
+++ b/kram-preview/KramPreviewViewController.mm
@@ -67,18 +67,21 @@ - (void)preparePreviewOfFileAtURL:(NSURL *)url completionHandler:(void (^)(NSErr
     // Add the supported content types to the QLSupportedContentTypes array in the Info.plist of the extension.
     // Perform any setup necessary in order to prepare the view.
     
-    // The follwing is copied out of Thumbnailer
+    // The following is adapted out of Thumbnailer
     
     // No request here, may need to use view size
-    uint32_t maxWidth = 128; // request.maximumSize.width
-    uint32_t maxHeight = 128; // request.maximumSize.height
+    uint32_t maxWidth = self.view.frame.size.width;
+    uint32_t maxHeight = self.view.frame.size.height;
     
     const char* filename = [url fileSystemRepresentation];
 
     NSError* error = nil;
     
+    bool isKTX = endsWith(filename, ".ktx");
+    bool isKTX2 = endsWith(filename, ".ktx2");
+    
     // ignore upper case extensions
-    if (!(endsWith(filename, ".ktx") || endsWith(filename, ".ktx2"))) {
+    if (!(isKTX || isKTX2)) {
         error = KLOGF(1, "kramv %s only supports ktx/ktx2 files\n", filename);
         handler(error);
         return;
@@ -217,6 +220,8 @@ - (void)preparePreviewOfFileAtURL:(NSURL *)url completionHandler:(void (^)(NSErr
     // TODO: might want to convert to PNG, but maybe thumbnail system does that automatically?
     // see how big thumbs.db is after running this
     
+    // This doesn't allocate, but in an imageView that must outlast the handle call, does that work?
+    
     vImage_Error err = 0;
     CGImageRef cgImage = vImageCreateCGImageFromBuffer(&buf, &format, NULL, NULL, kvImageNoAllocate, &err);
     if (err) {
@@ -228,10 +233,26 @@ - (void)preparePreviewOfFileAtURL:(NSURL *)url completionHandler:(void (^)(NSErr
 
     NSImage* nsImage = [[NSImage alloc] initWithCGImage:cgImage size:rect.size];
     
+    if (![self.view isKindOfClass:[NSImageView class]]) {
+        error = KLOGF(9, "kramv %s expected NSImageView \n", filename);
+        handler(error);
+        return;
+    }
+    
     NSImageView* nsImageView = (NSImageView*)self.view;
     
+    // Copositing is like it's using NSCompositeCopy instead of SourceOver
+    // The default is NSCompositeSourceOver. NSRectFill() ignores
+    // -[NSGraphicsContext compositingOperation] and continues to use NSCompositeCopy.
+    // So may have to use NSFillRect which uses SourceOver
+    // https://cocoadev.github.io/NSCompositingOperation/
+    
+    // no frame
+    nsImageView.imageFrameStyle = NSImageFrameNone;
     // Change default white to transparent
-    [nsImageView.layer setBackgroundColor: [NSColor clearColor].CGColor];
+    [nsImageView.layer setBackgroundColor: [NSColor blackColor].CGColor];
+    //[nsImageView.layer setBackgroundColor: [NSColor clearColor].CGColor];
+    
     nsImageView.image = nsImage;
 
     // This seems to cause plugin to fail
diff --git a/kram-thumb/KramThumbnailProvider.mm b/kram-thumb/KramThumbnailProvider.mm
index d259aa68..0cb50d16 100644
--- a/kram-thumb/KramThumbnailProvider.mm
+++ b/kram-thumb/KramThumbnailProvider.mm
@@ -50,7 +50,11 @@ - (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest *)request complet
     NSError* error = nil;
     string errorText;
     
-    if (!(endsWith(filename, ".ktx") || endsWith(filename, ".ktx2"))) {
+    // TODO: use first x-many bytes also to validate, open will do that
+    bool isKTX = endsWith(filename, ".ktx");
+    bool isKTX2 = endsWith(filename, ".ktx2");
+    
+    if (!(isKTX || isKTX2)) {
         error = KLOGF(1, "kramv %s only supports ktx/ktx2 files\n", filename);
         handler(nil, error);
         return;
@@ -247,8 +251,8 @@ - (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest *)request complet
         
         // The image is scaled—disproportionately
         
-        CGContextSetBlendMode(context, kCGBlendModeCopy);
-        //CGContextSetBlendMode(context, kCGBlendModeNormal);
+        //CGContextSetBlendMode(context, kCGBlendModeCopy);
+        CGContextSetBlendMode(context, kCGBlendModeNormal);
         
         CGContextDrawImage(context, rect, cgImage);
 
diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index a2f1df37..ece485ec 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -31,6 +31,8 @@ option(ETCENC "Compile ETC2Comp Encoder" ON)
 option(SQUISH "Compile Squish Encoder" ON)
 option(ASTCENC "Compile ASTCenc Encoder" ON)
 option(BCENC "Compile BCenc Encoder" ON)
+option(COMP "Compile Compressenator Encoder" ON)
+
 option(EASTL "Compile EASTL" OFF)
 
 # convert ON to 1, UGH
@@ -39,6 +41,7 @@ set(COMPILE_BCENC 0)
 set(COMPILE_ETCENC 0)
 set(COMPILE_SQUISH 0)
 set(COMPILE_ASTCENC 0)
+set(COMPILE_COMP 0)
 
 if (ATE AND (BUILD_MAC OR BUILD_IOS))
     set(COMPILE_ATE 1)
@@ -60,6 +63,11 @@ if (ASTCENC)
     set(COMPILE_ASTCENC 1)
 endif()
 
+if (COMP)
+    set(COMPILE_COMP 1)
+endif()
+
+
 # this isn't an encoder, but replaces stl with eastl
 set(COMPILE_EASTL 0)
 
@@ -131,6 +139,9 @@ file(GLOB_RECURSE libSources CONFIGURE_DEPENDS
     "${SOURCE_DIR}/squish/*.cpp"
     "${SOURCE_DIR}/squish/*.h"
 
+    "${SOURCE_DIR}/compressenator/*.cpp"
+    "${SOURCE_DIR}/compressenator/*.h"
+
     "${SOURCE_DIR}/tmpfileplus/tmpfileplus.cpp"
     "${SOURCE_DIR}/tmpfileplus/tmpfileplus.h"
     
@@ -189,6 +200,7 @@ target_include_directories(${myTargetLib} PRIVATE
     "${SOURCE_DIR}/astc-encoder/"
     "${SOURCE_DIR}/ate/"
     "${SOURCE_DIR}/bc7enc/"
+    "${SOURCE_DIR}/compressenator/"
     "${SOURCE_DIR}/etc2comp/"
     "${SOURCE_DIR}/heman/"
     "${SOURCE_DIR}/lodepng"
@@ -271,5 +283,6 @@ target_compile_definitions(${myTargetLib} PUBLIC
     "-DCOMPILE_ETCENC=${COMPILE_ETCENC}"
     "-DCOMPILE_SQUISH=${COMPILE_SQUISH}"
     "-DCOMPILE_ASTCENC=${COMPILE_ASTCENC}"
+    "-DCOMPILE_COMP=${COMPILE_COMP}"
     "-DCOMPILE_EASTL=${COMPILE_EASTL}"
 )
diff --git a/libkram/compressonator/bc6h/bc6h_decode.cpp b/libkram/compressonator/bc6h/bc6h_decode.cpp
index 100664bf..f3d7be77 100644
--- a/libkram/compressonator/bc6h/bc6h_decode.cpp
+++ b/libkram/compressonator/bc6h/bc6h_decode.cpp
@@ -698,7 +698,7 @@ AMD_BC6H_Format extract_format(BYTE in[BC6H_COMPRESSED_BLOCK_SIZE]) {
 
 //---------------------------------------------------------------------------------------------------------------------------------------
 
-void BC6HBlockDecoder::DecompressBlock( float out[MAX_SUBSET_SIZE][MAX_DIMENSION_BIG],BYTE in[BC6H_COMPRESSED_BLOCK_SIZE]) {
+void BC6HBlockDecoder::DecompressBlock( float out[MAX_SUBSET_SIZE][MAX_DIMENSION_BIG], BYTE in[BC6H_COMPRESSED_BLOCK_SIZE]) {
 
     AMD_BC6H_Format bc6h_format = extract_format(in);
     if (!bc6signed)
diff --git a/libkram/compressonator/bc6h/bc6h_decode.h b/libkram/compressonator/bc6h/bc6h_decode.h
index c7983797..c309b50d 100644
--- a/libkram/compressonator/bc6h/bc6h_decode.h
+++ b/libkram/compressonator/bc6h/bc6h_decode.h
@@ -34,7 +34,7 @@ class BC6HBlockDecoder {
   public:
     BC6HBlockDecoder() {};
     ~BC6HBlockDecoder() {};
-    void DecompressBlock(float out[MAX_SUBSET_SIZE][MAX_DIMENSION_BIG],BYTE in[BC6H_COMPRESSED_BLOCK_SIZE]);
+    void DecompressBlock(float out[MAX_SUBSET_SIZE][MAX_DIMENSION_BIG], BYTE in[BC6H_COMPRESSED_BLOCK_SIZE]);
 
     bool bc6signed = false; // this is suppiled by user for compression for SIGNED_F16 or UNSIGNED_F16 or obtained during decompression
 
diff --git a/libkram/compressonator/bc6h/bc6h_encode.cpp b/libkram/compressonator/bc6h/bc6h_encode.cpp
index ecd23713..97fd41cd 100644
--- a/libkram/compressonator/bc6h/bc6h_encode.cpp
+++ b/libkram/compressonator/bc6h/bc6h_encode.cpp
@@ -48,9 +48,9 @@ using namespace HDR_Encode;
 
 BYTE Cmp_Red_Block[16] = { 0xc2,0x7b,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xe0,0x03,0x00,0x00,0x00,0x00,0x00 };
 
-extern int  g_block;
-extern FILE *g_fp;
-int gl_block = 0;
+//extern int  g_block;
+//extern FILE *g_fp;
+//int gl_block = 0;
 
 #ifdef DEBUG_PATTERNS
 // random pixel noise range
@@ -1467,7 +1467,7 @@ float BC6HBlockEncoder::CompressBlock(float in[MAX_SUBSET_SIZE][MAX_DIMENSION_BI
         fclose(fi);
 #endif
 
-    g_block++;
+    //g_block++;
 
     return (float)bestError;
 }
diff --git a/libkram/compressonator/bc6h/bc6h_encode.h b/libkram/compressonator/bc6h/bc6h_encode.h
index 447480d6..da73b437 100644
--- a/libkram/compressonator/bc6h/bc6h_encode.h
+++ b/libkram/compressonator/bc6h/bc6h_encode.h
@@ -74,6 +74,8 @@ class BC6HBlockEncoder {
     ~BC6HBlockEncoder() {};
 
     float   CompressBlock(float in[MAX_SUBSET_SIZE][MAX_DIMENSION_BIG],BYTE   out[COMPRESSED_BLOCK_SIZE]);
+    
+private:
     void    clampF16Max(float EndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG]);
     void    AverageEndPoint(float EndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], float iEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], int max_subsets, int mode);
     void    QuantizeEndPointToF16Prec(float  EndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], int iEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], int max_subsets, int prec);
diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index 8be93a0b..c54ea7f9 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -1120,7 +1120,7 @@ void KTXImage::addChannelProps(const char* channelContent)
 bool KTXImage::isPremul() const
 {
     string channels = getProp(kPropChannels);
-    if (strstr(channels.c_str(), "Alb.ra,Alb.ga,Alb.ba") == 0) {
+    if (strstr(channels.c_str(), "Alb.ra,Alb.ga,Alb.ba") != nullptr) {
         return true;
     }
     return false;
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index c4812a7a..ccaedea1 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -291,15 +291,6 @@ class MortonOrder {
     uint32_t minDim = 0;
 };
 
-inline Color toPremul(Color c)
-{
-    // these are really all fractional, but try this
-    c.r = ((uint32_t)c.r * (uint32_t)c.a) / 255;
-    c.g = ((uint32_t)c.g * (uint32_t)c.a) / 255;
-    c.b = ((uint32_t)c.b * (uint32_t)c.a) / 255;
-    return c;
-}
-
 // rec709
 // https://en.wikipedia.org/wiki/Grayscale
 inline Color toGrayscaleRec709(Color c, const Mipper& mipper)
@@ -354,9 +345,14 @@ bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray
     uint32_t height = 0;
     uint32_t errorLode = 0;
 
+    // Point deflate on decoder to faster version in miniz.
+    auto& settings = lodepng_default_decompress_settings;
+    settings.custom_zlib = LodepngDeflateUsingMiniz;
+
     // can identify 16unorm data for heightmaps via this call
     LodePNGState state;
     lodepng_state_init(&state);
+    state.decoder.ignore_crc = 1;
 
     errorLode = lodepng_inspect(&width, &height, &state, data, dataSize);
     if (errorLode != 0) {
@@ -397,11 +393,6 @@ bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray
             break;
     }
 
-    // Point deflate on decoder to faster version in miniz.
-    // This also is already set to ignore crc.  Is passed as default arg to decodes.
-    auto& settings = lodepng_default_decompress_settings;
-    settings.custom_zlib = LodepngDeflateUsingMiniz;
-
     // this inserts onto end of array, it doesn't resize
     vector<uint8_t> pixelsPNG;
     pixelsPNG.clear();
@@ -1564,9 +1555,13 @@ string kramInfoPNGToString(const string& srcFilename, const uint8_t* data, uint6
     uint32_t height = 0;
     uint32_t errorLode = 0;
 
+    auto& settings = lodepng_default_decompress_settings;
+    settings.custom_zlib = LodepngDeflateUsingMiniz;
+
     // can identify 16unorm data for heightmaps via this call
     LodePNGState state;
     lodepng_state_init(&state);
+    state.decoder.ignore_crc = 1;
 
     errorLode = lodepng_inspect(&width, &height, &state, data, dataSize);
     if (errorLode != 0) {
@@ -2446,12 +2441,12 @@ static int32_t kramAppEncode(vector<const char*>& args)
             }
 
             // TODO: find an encoder for BC6
-            bool isBC = format == MyMTLPixelFormatBC6H_RGBFloat ||
-                        format == MyMTLPixelFormatBC6H_RGBUfloat;
-            if (isBC) {
-                KLOGE("Kram", "don't have a bc6 encoder");
-                return -1;
-            }
+            //            bool isBC = format == MyMTLPixelFormatBC6H_RGBFloat ||
+            //                        format == MyMTLPixelFormatBC6H_RGBUfloat;
+            //            if (isBC) {
+            //                KLOGE("Kram", "don't have a bc6 encoder");
+            //                return -1;
+            //            }
 
             // allows explicit output
         }
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index b4fa115c..a258cb6b 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -4,6 +4,7 @@
 
 #include "KramImage.h"
 
+
 #if COMPILE_ATE
 #include "ateencoder.h"  // astc/bc encoder, apple only
 #endif
@@ -16,6 +17,11 @@
 #include "squish.h"  // bc encoder
 #endif
 
+#if COMPILE_COMP
+#include "bc6h_encode.h"  // bc encoder
+#include "bc6h_decode.h"  // bc decoder
+#endif
+
 #if COMPILE_BCENC
 // TODO: move to CMake file?
 #define RGBCX_IMPLEMENTATION 1
@@ -700,6 +706,28 @@ bool KramDecoder::decodeBlocks(
                             rgbcx::unpack_bc5(srcBlock, pixels);
                             break;
 
+#if COMPILE_COMP
+                        // writes rg packed
+                        case MyMTLPixelFormatBC6H_RGBUfloat:
+                        case MyMTLPixelFormatBC6H_RGBFloat: {
+                            // go to compressenator calls here
+                            float pixelsFloat[16][4]; // really rgb x fp16, a=1.0
+                            uint8_t srcBlockForDecompress[16];
+                            for (uint32_t i = 0; i < 16; ++i) {
+                                srcBlockForDecompress[i] = srcBlock[i];
+                            }
+                            
+                            BC6HBlockDecoder decoderCompressenator;
+                            decoderCompressenator.DecompressBlock(pixelsFloat, srcBlockForDecompress);
+                            
+                            // losing snorm and chopping to 8-bit
+                            for (uint32_t i = 0; i < 16; ++i) {
+                                pixels[i] = ColorFromUnormFloat4(*(const float4*)&pixelsFloat[i]);
+                            }
+                            break;
+                        }
+#endif
+                            
                         case MyMTLPixelFormatBC7_RGBAUnorm:
                         case MyMTLPixelFormatBC7_RGBAUnorm_sRGB:
                             bc7decomp::unpack_bc7(srcBlock, (bc7decomp::color_rgba*)pixels);
@@ -2533,6 +2561,26 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
                             break;
                         }
 
+#if COMPILE_COMP
+                        case MyMTLPixelFormatBC6H_RGBUfloat:
+                        case MyMTLPixelFormatBC6H_RGBFloat: {
+                            CMP_BC6H_BLOCK_PARAMETERS options;
+                            options.isSigned = info.isSigned;
+                            
+                            BC6HBlockEncoder encoderCompressenator(options);
+                            
+                            // TODO: this needs HDR data
+                            float   srcPixelCopyFloat[16][4];
+                            for (int i = 0; i < 16; ++i) {
+                                srcPixelCopyFloat[i][0] = srcPixelCopy[i * 4 + 0];
+                                srcPixelCopyFloat[i][1] = srcPixelCopy[i * 4 + 1];
+                                srcPixelCopyFloat[i][2] = srcPixelCopy[i * 4 + 2];
+                                srcPixelCopyFloat[i][3] = 1.0f;
+                            }
+                            encoderCompressenator.CompressBlock(srcPixelCopyFloat, dstBlock);
+                            break;
+                        }
+#endif
                         case MyMTLPixelFormatBC7_RGBAUnorm:
                         case MyMTLPixelFormatBC7_RGBAUnorm_sRGB: {
                             bc7enc_compress_block(dstBlock, srcPixelCopy, &bc7params);
@@ -2549,6 +2597,7 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
                 }
             }
 
+            // TODO: shouldn't set for bc6
             if (info.isSigned) {
                 doRemapSnormEndpoints = true;
             }
diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp
index 29355301..ef21e41c 100644
--- a/libkram/kram/KramImageInfo.cpp
+++ b/libkram/kram/KramImageInfo.cpp
@@ -339,6 +339,12 @@ static const MyMTLPixelFormat kEncodingFormatsBcenc[] =
         MyMTLPixelFormatBC5_RGSnorm,
         MyMTLPixelFormatBC5_RGUnorm,
 
+#if COMPILE_COMP
+        // really compresensator
+        MyMTLPixelFormatBC6H_RGBUfloat,
+        MyMTLPixelFormatBC6H_RGBFloat,
+#endif
+        
         MyMTLPixelFormatBC7_RGBAUnorm,
         MyMTLPixelFormatBC7_RGBAUnorm_sRGB,
 };
@@ -557,11 +563,11 @@ bool validateFormatAndEncoder(ImageInfoArgs& infoArgs)
 
     // check arguments
     // flag unsupported formats
-    if (format == MyMTLPixelFormatBC6H_RGBUfloat ||
-        format == MyMTLPixelFormatBC6H_RGBFloat) {
-        KLOGE("ImageInfo", "bc6 not supported\n");
-        error = true;
-    }
+//    if (format == MyMTLPixelFormatBC6H_RGBUfloat ||
+//        format == MyMTLPixelFormatBC6H_RGBFloat) {
+//        KLOGE("ImageInfo", "bc6 not supported\n");
+//        error = true;
+//    }
 
     infoArgs.pixelFormat = format;
 
diff --git a/libkram/kram/KramMipper.cpp b/libkram/kram/KramMipper.cpp
index d978146a..935f9c8b 100644
--- a/libkram/kram/KramMipper.cpp
+++ b/libkram/kram/KramMipper.cpp
@@ -39,6 +39,7 @@ inline uint8_t floatToUint8(float value)
     return (uint8_t)roundf(value * 255.0f);  // or use 255.1f ?
 }
 
+// same as ColorFromUnormFloat4
 inline Color Unormfloat4ToColor(float4 value)
 {
     Color c;
diff --git a/libkram/kram/KramMipper.h b/libkram/kram/KramMipper.h
index 1153ef0a..44295991 100644
--- a/libkram/kram/KramMipper.h
+++ b/libkram/kram/KramMipper.h
@@ -23,6 +23,19 @@ struct Color {
     uint8_t r, g, b, a;
 };
 
+// This doesn't convert to srgb before premul, but is cheap.
+inline Color toPremul(Color c)
+{
+    if (c.a != 255) {
+        // these are really all fractional, but try this
+        c.r = ((uint32_t)c.r * (uint32_t)c.a) / 255;
+        c.g = ((uint32_t)c.g * (uint32_t)c.a) / 255;
+        c.b = ((uint32_t)c.b * (uint32_t)c.a) / 255;
+    }
+    return c;
+}
+
+
 inline float4 ColorToUnormFloat4(const Color &value)
 {
     // simd lib can't ctor these even in C++, so will make abstracting harder

From 339f3b7b7e661855a63fa33c6a60f1258421dcdb Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 10 Oct 2021 11:33:47 -0700
Subject: [PATCH 195/901] kram - fix spelling on folder

---
 libkram/CMakeLists.txt | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index ece485ec..3934856d 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -31,7 +31,7 @@ option(ETCENC "Compile ETC2Comp Encoder" ON)
 option(SQUISH "Compile Squish Encoder" ON)
 option(ASTCENC "Compile ASTCenc Encoder" ON)
 option(BCENC "Compile BCenc Encoder" ON)
-option(COMP "Compile Compressenator Encoder" ON)
+option(COMP "Compile Compressonator Encoder" ON)
 
 option(EASTL "Compile EASTL" OFF)
 
@@ -139,8 +139,8 @@ file(GLOB_RECURSE libSources CONFIGURE_DEPENDS
     "${SOURCE_DIR}/squish/*.cpp"
     "${SOURCE_DIR}/squish/*.h"
 
-    "${SOURCE_DIR}/compressenator/*.cpp"
-    "${SOURCE_DIR}/compressenator/*.h"
+    "${SOURCE_DIR}/compressonator/*.cpp"
+    "${SOURCE_DIR}/compressonator/*.h"
 
     "${SOURCE_DIR}/tmpfileplus/tmpfileplus.cpp"
     "${SOURCE_DIR}/tmpfileplus/tmpfileplus.h"
@@ -200,7 +200,7 @@ target_include_directories(${myTargetLib} PRIVATE
     "${SOURCE_DIR}/astc-encoder/"
     "${SOURCE_DIR}/ate/"
     "${SOURCE_DIR}/bc7enc/"
-    "${SOURCE_DIR}/compressenator/"
+    "${SOURCE_DIR}/compressonator/"
     "${SOURCE_DIR}/etc2comp/"
     "${SOURCE_DIR}/heman/"
     "${SOURCE_DIR}/lodepng"

From 00c375c54baf71bd7b7bb14f3f13d7de2c417e56 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 10 Oct 2021 11:40:49 -0700
Subject: [PATCH 196/901] kram - bc6h is in subdir

---
 libkram/CMakeLists.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index 3934856d..479589bf 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -139,8 +139,8 @@ file(GLOB_RECURSE libSources CONFIGURE_DEPENDS
     "${SOURCE_DIR}/squish/*.cpp"
     "${SOURCE_DIR}/squish/*.h"
 
-    "${SOURCE_DIR}/compressonator/*.cpp"
-    "${SOURCE_DIR}/compressonator/*.h"
+    "${SOURCE_DIR}/compressonator/bc6h/*.cpp"
+    "${SOURCE_DIR}/compressonator/bc6h/*.h"
 
     "${SOURCE_DIR}/tmpfileplus/tmpfileplus.cpp"
     "${SOURCE_DIR}/tmpfileplus/tmpfileplus.h"
@@ -200,7 +200,7 @@ target_include_directories(${myTargetLib} PRIVATE
     "${SOURCE_DIR}/astc-encoder/"
     "${SOURCE_DIR}/ate/"
     "${SOURCE_DIR}/bc7enc/"
-    "${SOURCE_DIR}/compressonator/"
+    "${SOURCE_DIR}/compressonator/bc6h/"
     "${SOURCE_DIR}/etc2comp/"
     "${SOURCE_DIR}/heman/"
     "${SOURCE_DIR}/lodepng"

From cad0a407ba28329dd38dbdcfd300acd404660556 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecazam@users.noreply.github.com>
Date: Mon, 11 Oct 2021 22:58:14 -0700
Subject: [PATCH 197/901] Update README.md

---
 README.md | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index 09ac6e7a..5f7a0cc7 100644
--- a/README.md
+++ b/README.md
@@ -1,16 +1,16 @@
 # kram, kram.exe
-C++11 main to libkram to create CLI tool.  Encode/decode/info PNG/KTX files with LDR/HDR and BC/ASTC/ETC2.  Runs on iOS/macOS/winOS.
+C++11 main to libkram to create CLI tool.  Encode/decode/info on PNG/KTX/KTX2 files with LDR/HDR and BC/ASTC/ETC2.  Runs on macOS/win.
 
-# libkram.a, kram.lib
-C++11 library from 200 to 800KB in size depending on encoder options.  Compiles for iOS (ARM), macOS (ARM/Intel), winOS (Intel).
+# libkram.a, libkram-ios.a, kram.lib
+C++11 library from 200 to 800KB in size depending on encoder options.  Compiles for iOS (ARM), macOS (ARM/Intel), win (Intel).
 
 # kramv.app
-ObjC++ Viewer for PNG/KTX supported files from kram.  530KB in size.  Uses Metal compute and shaders, eyedropper, grids, debugging, preview.  Supports HDR and all texture types.  Mip, face, and array access.  No dmg yet, just drop onto /Applications folder, and then run scripts/fixfinder.sh to flush LaunchServices (see below).  Runs on macOS (ARM/Intel).
+ObjC++ Viewer for PNG/KTX/KTX2 supported files from kram.  530KB in size.  Uses Metal compute and shaders, eyedropper, grids, debugging, preview.  Supports HDR and all texture types.  Mip, face, and array access.  No dmg yet, just drop onto /Applications folder.  Runs on macOS (ARM/Intel).
 
 Diagrams and screenshots can be located here:
 https://www.figma.com/file/bPmPSpBGTi2xTVnBDqVEq0/kram
 
-#### Releases includes builds for macOS (Xcode 12.3 - arm64/x64) and Windows x64 (VS 2019 - x64).  libkram can be built for iOS/Android.
+#### Releases includes builds for macOS (Xcode 12.3 - arm64/x64) and Windows x64 (VS 2019 - x64).  kramv for macOS, kram for macOS/Win, libkram for macOS/iOS/Win.  Android library via NDK is possible, but f16 support is spotty on devices.
 
 ### About kram
 kram is a wrapper to several popular encoders.  Most encoders have sources, and have been optimized to use very little memory and generate high quality encodings at all settings.  All kram encoders are currently CPU-based.  Some of these encoders use SSE, and a SSE to Neon layer translates those.  kram was built to be small and used as a library or app.  It's also designed for mobile and desktop use.  The final size with all encoders is under 1MB, and disabling each encoder chops off around 200KB down to a final 200KB app size via dead-code stripping.  The code should compile with C++11 or higher.
@@ -19,10 +19,10 @@ kram focuses on sending data efficiently and precisely to the encoders.  kram ha
 
 Many of the encoder sources can multithread a single image, but that is unused.  kram is designed to batch process one texture per core/thread via a python script or a C++11 task system inside kram.  This can use more ram depending on the core count.  Texture-per-process and scripted modes currently both take the same amount of CPU time, but scripted mode is best if kram ever adds GPU-accelerated encoding.
 
-Similar to a makefile system, the script sample kramtexture.py uses modstamps to skip textures that have already been processed.  If the source png/ktx is older than the ktx output, then the file is skipped.  Command line options are not yet compared, so if those change then use --force on the python script to rebuild all textures.  Also a crc/hash could be used instead when modstamp isn't sufficient or the same data could come from different folders.
+Similar to a makefile system, the script sample kramtexture.py uses modstamps to skip textures that have already been processed.  If the source png/ktx/ktx2 is older than the output, then the file is skipped.  Command line options are not yet compared, so if those change then use --force on the python script to rebuild all textures.  Also a crc/hash could be used instead when modstamp isn't sufficient or the same data could come from different folders.
 
 ### About kramv
-kramv is a viewer for the BC/ASTC/ETC2 and HDR KTX/2 textures generated by kram from LDR PNG and LDR/HDR KTX/2 sources.  kramv decodes ASTC/ETC2 textures on macOS Intel, where the GPU doesn't support them.  macOS with Apple Silicon supports all three formats, and doesn't need to decode.   
+kramv is a viewer for the BC/ASTC/ETC2 and HDR KTX/KTX2 textures generated by kram from LDR PNG and LDR/HDR KTX/KTX2 sources.  kramv decodes ASTC/ETC2 textures on macOS Intel, where the GPU doesn't support them.  macOS with Apple Silicon supports all three formats, and doesn't need to decode.   
 
 kramv uses ObjC++ with the intent to port to Windows C++ as time permits.  Uses menus, buttons, and keyboard handling useful for texture triage and analysis.  Drag and drop folders, bundles, and click-to-launch are supported.  Recently used textures/folders/bundles are listed in the menu.  The app currently shows a single document at a time.  Subsequent opens reuse the same document Window.  With bundles and folders, kramv will attempt to pair albedo and normal maps together by filename for the preview. 
 
@@ -62,9 +62,9 @@ N - toggle signed/unsigned
 ⇧Y advance array 
 ⇧F advance face
 ⇧M advance mip
-⇧8 advance shape (plane, unit box, sphere, capsule)
+⇧8 advance shape (plane, unit box, sphere, capsule), displays list, esc to get out of list
 
-⇧J advance bundle/folder image (can traverse zip of ktx/ktx2 files)
+⇧J advance bundle/folder image (can traverse zip of ktx/ktx2 files), displays list, esc to get out of list
 
 ```
 

From 4aec41c93511c6a66e00e3aed49cc436c2ccb5d7 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 17 Jan 2022 10:35:01 -0800
Subject: [PATCH 198/901] Breaking changes for gltf support

Tried to add in gltf support to kramv, but the move to perspective views broke 2d zoom in the viewer.  So I'm checking in what I can, and will revert the viewer-specific changes.  GLTF is disabled for now.

Turn off eastl - removed membership of headers, and put USE_EASTL around .cpp files.   Would like to restore this, but need lldb debugging support, and also to fix the bug with threads/futures.

Fix preview of cubemap textures. Now will show each face of the cube map.

Simplify zoom to not use non-uniform scale anymore, but instead move the camera position to zoom a 3d model.

Added Warren Moore's GLTF loader via USE_GLTF.  This has a few small changes to shaders, and I also had to skip files with Draco mesh compression, since the shaders miscompiled with char3 and other usages.  This library is objC, so also had to define a protocol to allow libkram (C++) to do ktx/ktx2/png loading.  But I don't have jpg support.  This bypasses using Apple's MTK loader since MTK isn't good or flexible.  There is a new GLTFKit2 which has async loading, but no renderer, and that uses and underlying C++ lib that can read/write gltf.  So might simply move to that.

Added model loading support to folder and archive loader.  This likely won't handle the supporting images from gltf files.  But sandboxed apps can't load supporting files if the gltf file is chosen, but can if the folder (or subfolder is provided).

Add removeAllPendingTextures from the loader, and add mutex lock.  This allows loading another file, but not doing the upload or mipgen from previous textures that didn't reach the gpu.  Then those metal texture can also be released reducing memory pressure, and prevents overflow of the staging buffer.

Added space bar play/pause of animations when models are loaded.
Added rotation to spin the 3d models, but not yet connected to UI element.

Fix _clang-format to use PointerAlignment rule instead of deriving from source
---
 build2/kram.xcodeproj/project.pbxproj         |  546 +------
 .../kram.xcworkspace/contents.xcworkspacedata |    6 +
 build2/kramv.xcodeproj/project.pbxproj        |  101 +-
 gtlf/GLTF/GLTF.h                              |   53 +
 gtlf/GLTF/GLTF.xcodeproj/project.pbxproj      |  538 +++++++
 .../Headers/Extensions/GLTFExtensionNames.h   |   25 +
 gtlf/GLTF/Headers/Extensions/GLTFKHRLight.h   |   53 +
 gtlf/GLTF/Headers/GLTFAccessor.h              |   40 +
 gtlf/GLTF/Headers/GLTFAnimation.h             |   46 +
 gtlf/GLTF/Headers/GLTFAsset.h                 |   66 +
 gtlf/GLTF/Headers/GLTFBinaryChunk.h           |   35 +
 gtlf/GLTF/Headers/GLTFBuffer.h                |   30 +
 gtlf/GLTF/Headers/GLTFBufferAllocator.h       |   29 +
 gtlf/GLTF/Headers/GLTFBufferView.h            |   32 +
 gtlf/GLTF/Headers/GLTFCamera.h                |   42 +
 .../GLTF/Headers/GLTFDefaultBufferAllocator.h |   27 +
 gtlf/GLTF/Headers/GLTFEnums.h                 |  178 +++
 gtlf/GLTF/Headers/GLTFImage.h                 |   40 +
 gtlf/GLTF/Headers/GLTFMaterial.h              |   61 +
 gtlf/GLTF/Headers/GLTFMesh.h                  |   43 +
 gtlf/GLTF/Headers/GLTFNode.h                  |   49 +
 gtlf/GLTF/Headers/GLTFNodeVisitor.h           |   33 +
 gtlf/GLTF/Headers/GLTFObject.h                |   45 +
 gtlf/GLTF/Headers/GLTFScene.h                 |   35 +
 gtlf/GLTF/Headers/GLTFSkin.h                  |   30 +
 gtlf/GLTF/Headers/GLTFTexture.h               |   77 +
 gtlf/GLTF/Headers/GLTFTextureSampler.h        |   34 +
 gtlf/GLTF/Headers/GLTFUtilities.h             |   96 ++
 gtlf/GLTF/Headers/GLTFVertexDescriptor.h      |   54 +
 gtlf/GLTF/Info.plist                          |   26 +
 .../Source/Extensions/GLTFExtensionNames.m    |   25 +
 gtlf/GLTF/Source/Extensions/GLTFKHRLight.m    |   33 +
 gtlf/GLTF/Source/GLTFAccessor.m               |   26 +
 gtlf/GLTF/Source/GLTFAnimation.m              |  172 +++
 gtlf/GLTF/Source/GLTFAsset.m                  | 1296 +++++++++++++++++
 gtlf/GLTF/Source/GLTFBinaryChunk.m            |   23 +
 gtlf/GLTF/Source/GLTFBufferView.m             |   26 +
 gtlf/GLTF/Source/GLTFCamera.m                 |  115 ++
 gtlf/GLTF/Source/GLTFDefaultBufferAllocator.m |   93 ++
 gtlf/GLTF/Source/GLTFImage.m                  |   57 +
 gtlf/GLTF/Source/GLTFMaterial.m               |   35 +
 gtlf/GLTF/Source/GLTFMesh.m                   |   69 +
 gtlf/GLTF/Source/GLTFNode.m                   |  171 +++
 gtlf/GLTF/Source/GLTFObject.m                 |   32 +
 gtlf/GLTF/Source/GLTFScene.m                  |   57 +
 gtlf/GLTF/Source/GLTFSkin.m                   |   20 +
 gtlf/GLTF/Source/GLTFTexture.m                |   75 +
 gtlf/GLTF/Source/GLTFTextureSampler.m         |   60 +
 gtlf/GLTF/Source/GLTFUtilities.m              |  338 +++++
 gtlf/GLTF/Source/GLTFVertexDescriptor.m       |   71 +
 gtlf/GLTFMTL/GLTFMTL.h                        |   36 +
 .../GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj |  399 +++++
 gtlf/GLTFMTL/Headers/GLTFMTLBufferAllocator.h |   36 +
 .../Headers/GLTFMTLLightingEnvironment.h      |   42 +
 gtlf/GLTFMTL/Headers/GLTFMTLRenderer.h        |   56 +
 gtlf/GLTFMTL/Headers/GLTFMTLShaderBuilder.h   |   42 +
 gtlf/GLTFMTL/Headers/GLTFMTLTextureLoader.h   |   39 +
 gtlf/GLTFMTL/Headers/GLTFMTLUtilities.h       |   57 +
 gtlf/GLTFMTL/Info.plist                       |   26 +
 gtlf/GLTFMTL/Source/GLTFMTLBufferAllocator.m  |  101 ++
 .../Source/GLTFMTLLightingEnvironment.m       |  212 +++
 gtlf/GLTFMTL/Source/GLTFMTLRenderer.m         |  622 ++++++++
 gtlf/GLTFMTL/Source/GLTFMTLShaderBuilder.m    |  246 ++++
 gtlf/GLTFMTL/Source/GLTFMTLTextureLoader.m    |  242 +++
 gtlf/GLTFMTL/Source/GLTFMTLUtilities.m        |  290 ++++
 kramv/Info.plist                              |   34 +-
 kramv/KramLoader.h                            |    7 +-
 kramv/KramLoader.mm                           |  115 +-
 kramv/KramViewerBase.cpp                      |  118 +-
 kramv/KramViewerBase.h                        |   23 +-
 kramv/{ => Shaders}/KramShaders.h             |    0
 kramv/{ => Shaders}/KramShaders.metal         |   92 +-
 kramv/Shaders/brdf.metal                      |  230 +++
 kramv/Shaders/hdr.metal                       |  118 ++
 kramv/Shaders/pbr.metal                       |  451 ++++++
 kramv/Shaders/pbr.txt                         |  451 ++++++
 kramv/Shaders/skybox.metal                    |   57 +
 kramv/Textures/piazza_san_marco.ktx           |    3 +
 kramv/Textures/tropical_beach.ktx             |    3 +
 libkram/astc-encoder/astcenc_internal.h       |   18 +-
 libkram/compressonator/bc6h/hdr_encode.cpp    |    2 +-
 libkram/eastl/source/allocator_eastl.cpp      |    4 +-
 libkram/eastl/source/assert.cpp               |    4 +-
 libkram/eastl/source/atomic.cpp               |    4 +-
 libkram/eastl/source/fixed_pool.cpp           |    4 +-
 libkram/eastl/source/hashtable.cpp            |    4 +-
 libkram/eastl/source/intrusive_list.cpp       |    3 +-
 libkram/eastl/source/numeric_limits.cpp       |    4 +-
 libkram/eastl/source/red_black_tree.cpp       |    4 +-
 libkram/eastl/source/string.cpp               |    4 +-
 libkram/eastl/source/thread_support.cpp       |    4 +-
 libkram/kram/KTXImage.cpp                     |    6 +-
 libkram/kram/Kram.cpp                         |   16 +
 libkram/kram/Kram.h                           |    6 +
 libkram/kram/KramConfig.h                     |    2 +
 libkram/kram/KramLog.cpp                      |   10 +-
 libkram/kram/TaskSystem.h                     |   20 +-
 libkram/kram/_clang-format                    |    2 +-
 98 files changed, 8839 insertions(+), 694 deletions(-)
 create mode 100644 gtlf/GLTF/GLTF.h
 create mode 100644 gtlf/GLTF/GLTF.xcodeproj/project.pbxproj
 create mode 100644 gtlf/GLTF/Headers/Extensions/GLTFExtensionNames.h
 create mode 100644 gtlf/GLTF/Headers/Extensions/GLTFKHRLight.h
 create mode 100644 gtlf/GLTF/Headers/GLTFAccessor.h
 create mode 100644 gtlf/GLTF/Headers/GLTFAnimation.h
 create mode 100644 gtlf/GLTF/Headers/GLTFAsset.h
 create mode 100644 gtlf/GLTF/Headers/GLTFBinaryChunk.h
 create mode 100644 gtlf/GLTF/Headers/GLTFBuffer.h
 create mode 100644 gtlf/GLTF/Headers/GLTFBufferAllocator.h
 create mode 100644 gtlf/GLTF/Headers/GLTFBufferView.h
 create mode 100644 gtlf/GLTF/Headers/GLTFCamera.h
 create mode 100644 gtlf/GLTF/Headers/GLTFDefaultBufferAllocator.h
 create mode 100644 gtlf/GLTF/Headers/GLTFEnums.h
 create mode 100644 gtlf/GLTF/Headers/GLTFImage.h
 create mode 100644 gtlf/GLTF/Headers/GLTFMaterial.h
 create mode 100644 gtlf/GLTF/Headers/GLTFMesh.h
 create mode 100644 gtlf/GLTF/Headers/GLTFNode.h
 create mode 100644 gtlf/GLTF/Headers/GLTFNodeVisitor.h
 create mode 100644 gtlf/GLTF/Headers/GLTFObject.h
 create mode 100644 gtlf/GLTF/Headers/GLTFScene.h
 create mode 100644 gtlf/GLTF/Headers/GLTFSkin.h
 create mode 100644 gtlf/GLTF/Headers/GLTFTexture.h
 create mode 100644 gtlf/GLTF/Headers/GLTFTextureSampler.h
 create mode 100644 gtlf/GLTF/Headers/GLTFUtilities.h
 create mode 100644 gtlf/GLTF/Headers/GLTFVertexDescriptor.h
 create mode 100644 gtlf/GLTF/Info.plist
 create mode 100644 gtlf/GLTF/Source/Extensions/GLTFExtensionNames.m
 create mode 100644 gtlf/GLTF/Source/Extensions/GLTFKHRLight.m
 create mode 100644 gtlf/GLTF/Source/GLTFAccessor.m
 create mode 100644 gtlf/GLTF/Source/GLTFAnimation.m
 create mode 100644 gtlf/GLTF/Source/GLTFAsset.m
 create mode 100644 gtlf/GLTF/Source/GLTFBinaryChunk.m
 create mode 100644 gtlf/GLTF/Source/GLTFBufferView.m
 create mode 100644 gtlf/GLTF/Source/GLTFCamera.m
 create mode 100644 gtlf/GLTF/Source/GLTFDefaultBufferAllocator.m
 create mode 100644 gtlf/GLTF/Source/GLTFImage.m
 create mode 100644 gtlf/GLTF/Source/GLTFMaterial.m
 create mode 100644 gtlf/GLTF/Source/GLTFMesh.m
 create mode 100644 gtlf/GLTF/Source/GLTFNode.m
 create mode 100644 gtlf/GLTF/Source/GLTFObject.m
 create mode 100644 gtlf/GLTF/Source/GLTFScene.m
 create mode 100644 gtlf/GLTF/Source/GLTFSkin.m
 create mode 100644 gtlf/GLTF/Source/GLTFTexture.m
 create mode 100644 gtlf/GLTF/Source/GLTFTextureSampler.m
 create mode 100644 gtlf/GLTF/Source/GLTFUtilities.m
 create mode 100644 gtlf/GLTF/Source/GLTFVertexDescriptor.m
 create mode 100644 gtlf/GLTFMTL/GLTFMTL.h
 create mode 100644 gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj
 create mode 100644 gtlf/GLTFMTL/Headers/GLTFMTLBufferAllocator.h
 create mode 100644 gtlf/GLTFMTL/Headers/GLTFMTLLightingEnvironment.h
 create mode 100644 gtlf/GLTFMTL/Headers/GLTFMTLRenderer.h
 create mode 100644 gtlf/GLTFMTL/Headers/GLTFMTLShaderBuilder.h
 create mode 100644 gtlf/GLTFMTL/Headers/GLTFMTLTextureLoader.h
 create mode 100644 gtlf/GLTFMTL/Headers/GLTFMTLUtilities.h
 create mode 100644 gtlf/GLTFMTL/Info.plist
 create mode 100644 gtlf/GLTFMTL/Source/GLTFMTLBufferAllocator.m
 create mode 100644 gtlf/GLTFMTL/Source/GLTFMTLLightingEnvironment.m
 create mode 100644 gtlf/GLTFMTL/Source/GLTFMTLRenderer.m
 create mode 100644 gtlf/GLTFMTL/Source/GLTFMTLShaderBuilder.m
 create mode 100644 gtlf/GLTFMTL/Source/GLTFMTLTextureLoader.m
 create mode 100644 gtlf/GLTFMTL/Source/GLTFMTLUtilities.m
 rename kramv/{ => Shaders}/KramShaders.h (100%)
 rename kramv/{ => Shaders}/KramShaders.metal (97%)
 create mode 100644 kramv/Shaders/brdf.metal
 create mode 100644 kramv/Shaders/hdr.metal
 create mode 100644 kramv/Shaders/pbr.metal
 create mode 100644 kramv/Shaders/pbr.txt
 create mode 100644 kramv/Shaders/skybox.metal
 create mode 100644 kramv/Textures/piazza_san_marco.ktx
 create mode 100644 kramv/Textures/tropical_beach.ktx

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index 8a1bd260..0dd63c3c 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -299,260 +299,6 @@
 		706EF1DD26D166C5001C950E /* libate.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF12A26D159F9001C950E /* libate.tbd */; };
 		706EF26426D17DCC001C950E /* ateencoder.mm in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDF926D1583E001C950E /* ateencoder.mm */; };
 		706EFC2426D1C39B001C950E /* ateencoder.mm in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDF926D1583E001C950E /* ateencoder.mm */; };
-		706EFD6126D3473F001C950E /* eaunits.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4126D3473F001C950E /* eaunits.h */; };
-		706EFD6226D3473F001C950E /* eaunits.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4126D3473F001C950E /* eaunits.h */; };
-		706EFD6326D3473F001C950E /* version.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4226D3473F001C950E /* version.h */; };
-		706EFD6426D3473F001C950E /* version.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4226D3473F001C950E /* version.h */; };
-		706EFD6526D3473F001C950E /* eacompilertraits.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4426D3473F001C950E /* eacompilertraits.h */; };
-		706EFD6626D3473F001C950E /* eacompilertraits.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4426D3473F001C950E /* eacompilertraits.h */; };
-		706EFD6726D3473F001C950E /* eacompiler.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4526D3473F001C950E /* eacompiler.h */; };
-		706EFD6826D3473F001C950E /* eacompiler.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4526D3473F001C950E /* eacompiler.h */; };
-		706EFD6926D3473F001C950E /* eaplatform.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4626D3473F001C950E /* eaplatform.h */; };
-		706EFD6A26D3473F001C950E /* eaplatform.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4626D3473F001C950E /* eaplatform.h */; };
-		706EFD6B26D3473F001C950E /* eastdarg.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4726D3473F001C950E /* eastdarg.h */; };
-		706EFD6C26D3473F001C950E /* eastdarg.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4726D3473F001C950E /* eastdarg.h */; };
-		706EFD6D26D3473F001C950E /* eahave.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4826D3473F001C950E /* eahave.h */; };
-		706EFD6E26D3473F001C950E /* eahave.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4826D3473F001C950E /* eahave.h */; };
-		706EFD6F26D3473F001C950E /* nullptr.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4926D3473F001C950E /* nullptr.h */; };
-		706EFD7026D3473F001C950E /* nullptr.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4926D3473F001C950E /* nullptr.h */; };
-		706EFD7126D3473F001C950E /* int128.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4A26D3473F001C950E /* int128.h */; };
-		706EFD7226D3473F001C950E /* int128.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4A26D3473F001C950E /* int128.h */; };
-		706EFD7326D3473F001C950E /* earesult.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4B26D3473F001C950E /* earesult.h */; };
-		706EFD7426D3473F001C950E /* earesult.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4B26D3473F001C950E /* earesult.h */; };
-		706EFD7526D3473F001C950E /* eabase.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4C26D3473F001C950E /* eabase.h */; };
-		706EFD7626D3473F001C950E /* eabase.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4C26D3473F001C950E /* eabase.h */; };
-		706EFD7726D3473F001C950E /* scoped_ptr.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4E26D3473F001C950E /* scoped_ptr.h */; };
-		706EFD7826D3473F001C950E /* scoped_ptr.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4E26D3473F001C950E /* scoped_ptr.h */; };
-		706EFD7926D3473F001C950E /* unordered_map.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4F26D3473F001C950E /* unordered_map.h */; };
-		706EFD7A26D3473F001C950E /* unordered_map.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC4F26D3473F001C950E /* unordered_map.h */; };
-		706EFD7B26D3473F001C950E /* fixed_hash_set.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5026D3473F001C950E /* fixed_hash_set.h */; };
-		706EFD7C26D3473F001C950E /* fixed_hash_set.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5026D3473F001C950E /* fixed_hash_set.h */; };
-		706EFD7D26D3473F001C950E /* bitset.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5126D3473F001C950E /* bitset.h */; };
-		706EFD7E26D3473F001C950E /* bitset.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5126D3473F001C950E /* bitset.h */; };
-		706EFD7F26D3473F001C950E /* tuple_vector.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5326D3473F001C950E /* tuple_vector.h */; };
-		706EFD8026D3473F001C950E /* tuple_vector.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5326D3473F001C950E /* tuple_vector.h */; };
-		706EFD8126D3473F001C950E /* lru_cache.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5426D3473F001C950E /* lru_cache.h */; };
-		706EFD8226D3473F001C950E /* lru_cache.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5426D3473F001C950E /* lru_cache.h */; };
-		706EFD8326D3473F001C950E /* ring_buffer.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5526D3473F001C950E /* ring_buffer.h */; };
-		706EFD8426D3473F001C950E /* ring_buffer.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5526D3473F001C950E /* ring_buffer.h */; };
-		706EFD8526D3473F001C950E /* fixed_tuple_vector.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5626D3473F001C950E /* fixed_tuple_vector.h */; };
-		706EFD8626D3473F001C950E /* fixed_tuple_vector.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5626D3473F001C950E /* fixed_tuple_vector.h */; };
-		706EFD8726D3473F001C950E /* sort_extra.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5726D3473F001C950E /* sort_extra.h */; };
-		706EFD8826D3473F001C950E /* sort_extra.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5726D3473F001C950E /* sort_extra.h */; };
-		706EFD8926D3473F001C950E /* intrusive_slist.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5826D3473F001C950E /* intrusive_slist.h */; };
-		706EFD8A26D3473F001C950E /* intrusive_slist.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5826D3473F001C950E /* intrusive_slist.h */; };
-		706EFD8B26D3473F001C950E /* list_map.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5926D3473F001C950E /* list_map.h */; };
-		706EFD8C26D3473F001C950E /* list_map.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5926D3473F001C950E /* list_map.h */; };
-		706EFD8D26D3473F001C950E /* call_traits.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5A26D3473F001C950E /* call_traits.h */; };
-		706EFD8E26D3473F001C950E /* call_traits.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5A26D3473F001C950E /* call_traits.h */; };
-		706EFD8F26D3473F001C950E /* compressed_pair.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5B26D3473F001C950E /* compressed_pair.h */; };
-		706EFD9026D3473F001C950E /* compressed_pair.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5B26D3473F001C950E /* compressed_pair.h */; };
-		706EFD9126D3473F001C950E /* intrusive_sdlist.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5C26D3473F001C950E /* intrusive_sdlist.h */; };
-		706EFD9226D3473F001C950E /* intrusive_sdlist.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5C26D3473F001C950E /* intrusive_sdlist.h */; };
-		706EFD9326D3473F001C950E /* adaptors.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5D26D3473F001C950E /* adaptors.h */; };
-		706EFD9426D3473F001C950E /* adaptors.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5D26D3473F001C950E /* adaptors.h */; };
-		706EFD9526D3473F001C950E /* fixed_ring_buffer.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5E26D3473F001C950E /* fixed_ring_buffer.h */; };
-		706EFD9626D3473F001C950E /* fixed_ring_buffer.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5E26D3473F001C950E /* fixed_ring_buffer.h */; };
-		706EFD9726D3473F001C950E /* intrusive_ptr.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5F26D3473F001C950E /* intrusive_ptr.h */; };
-		706EFD9826D3473F001C950E /* intrusive_ptr.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC5F26D3473F001C950E /* intrusive_ptr.h */; };
-		706EFD9926D3473F001C950E /* fixed_function.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6026D3473F001C950E /* fixed_function.h */; };
-		706EFD9A26D3473F001C950E /* fixed_function.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6026D3473F001C950E /* fixed_function.h */; };
-		706EFD9B26D3473F001C950E /* safe_ptr.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6126D3473F001C950E /* safe_ptr.h */; };
-		706EFD9C26D3473F001C950E /* safe_ptr.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6126D3473F001C950E /* safe_ptr.h */; };
-		706EFD9D26D3473F001C950E /* bitvector.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6226D3473F001C950E /* bitvector.h */; };
-		706EFD9E26D3473F001C950E /* bitvector.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6226D3473F001C950E /* bitvector.h */; };
-		706EFD9F26D3473F001C950E /* segmented_vector.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6326D3473F001C950E /* segmented_vector.h */; };
-		706EFDA026D3473F001C950E /* segmented_vector.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6326D3473F001C950E /* segmented_vector.h */; };
-		706EFDA126D3473F001C950E /* weak_ptr.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6426D3473F001C950E /* weak_ptr.h */; };
-		706EFDA226D3473F001C950E /* weak_ptr.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6426D3473F001C950E /* weak_ptr.h */; };
-		706EFDA326D3473F001C950E /* hash_map.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6526D3473F001C950E /* hash_map.h */; };
-		706EFDA426D3473F001C950E /* hash_map.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6526D3473F001C950E /* hash_map.h */; };
-		706EFDA526D3473F001C950E /* algorithm.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6626D3473F001C950E /* algorithm.h */; };
-		706EFDA626D3473F001C950E /* algorithm.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6626D3473F001C950E /* algorithm.h */; };
-		706EFDA726D3473F001C950E /* allocator_malloc.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6726D3473F001C950E /* allocator_malloc.h */; };
-		706EFDA826D3473F001C950E /* allocator_malloc.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6726D3473F001C950E /* allocator_malloc.h */; };
-		706EFDA926D3473F001C950E /* map.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6826D3473F001C950E /* map.h */; };
-		706EFDAA26D3473F001C950E /* map.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6826D3473F001C950E /* map.h */; };
-		706EFDAB26D3473F001C950E /* initializer_list.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6926D3473F001C950E /* initializer_list.h */; };
-		706EFDAC26D3473F001C950E /* initializer_list.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6926D3473F001C950E /* initializer_list.h */; };
-		706EFDAD26D3473F001C950E /* numeric.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6A26D3473F001C950E /* numeric.h */; };
-		706EFDAE26D3473F001C950E /* numeric.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6A26D3473F001C950E /* numeric.h */; };
-		706EFDAF26D3473F001C950E /* chrono.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6B26D3473F001C950E /* chrono.h */; };
-		706EFDB026D3473F001C950E /* chrono.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6B26D3473F001C950E /* chrono.h */; };
-		706EFDB126D3473F001C950E /* fixed_set.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6C26D3473F001C950E /* fixed_set.h */; };
-		706EFDB226D3473F001C950E /* fixed_set.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6C26D3473F001C950E /* fixed_set.h */; };
-		706EFDB326D3473F001C950E /* linked_ptr.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6D26D3473F001C950E /* linked_ptr.h */; };
-		706EFDB426D3473F001C950E /* linked_ptr.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6D26D3473F001C950E /* linked_ptr.h */; };
-		706EFDB526D3473F001C950E /* fixed_slist.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6E26D3473F001C950E /* fixed_slist.h */; };
-		706EFDB626D3473F001C950E /* fixed_slist.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC6E26D3473F001C950E /* fixed_slist.h */; };
-		706EFDB726D3473F001C950E /* type_compound.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7026D3473F001C950E /* type_compound.h */; };
-		706EFDB826D3473F001C950E /* type_compound.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7026D3473F001C950E /* type_compound.h */; };
-		706EFDB926D3473F001C950E /* move_help.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7126D3473F001C950E /* move_help.h */; };
-		706EFDBA26D3473F001C950E /* move_help.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7126D3473F001C950E /* move_help.h */; };
-		706EFDBB26D3473F001C950E /* enable_shared.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7226D3473F001C950E /* enable_shared.h */; };
-		706EFDBC26D3473F001C950E /* enable_shared.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7226D3473F001C950E /* enable_shared.h */; };
-		706EFDBD26D3473F001C950E /* function_detail.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7326D3473F001C950E /* function_detail.h */; };
-		706EFDBE26D3473F001C950E /* function_detail.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7326D3473F001C950E /* function_detail.h */; };
-		706EFDBF26D3473F001C950E /* piecewise_construct_t.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7426D3473F001C950E /* piecewise_construct_t.h */; };
-		706EFDC026D3473F001C950E /* piecewise_construct_t.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7426D3473F001C950E /* piecewise_construct_t.h */; };
-		706EFDC126D3473F001C950E /* generic_iterator.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7526D3473F001C950E /* generic_iterator.h */; };
-		706EFDC226D3473F001C950E /* generic_iterator.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7526D3473F001C950E /* generic_iterator.h */; };
-		706EFDC326D3473F001C950E /* type_fundamental.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7626D3473F001C950E /* type_fundamental.h */; };
-		706EFDC426D3473F001C950E /* type_fundamental.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7626D3473F001C950E /* type_fundamental.h */; };
-		706EFDC526D3473F001C950E /* char_traits.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7726D3473F001C950E /* char_traits.h */; };
-		706EFDC626D3473F001C950E /* char_traits.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7726D3473F001C950E /* char_traits.h */; };
-		706EFDC726D3473F001C950E /* copy_help.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7826D3473F001C950E /* copy_help.h */; };
-		706EFDC826D3473F001C950E /* copy_help.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7826D3473F001C950E /* copy_help.h */; };
-		706EFDC926D3473F001C950E /* fill_help.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7926D3473F001C950E /* fill_help.h */; };
-		706EFDCA26D3473F001C950E /* fill_help.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7926D3473F001C950E /* fill_help.h */; };
-		706EFDCB26D3473F001C950E /* type_properties.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7A26D3473F001C950E /* type_properties.h */; };
-		706EFDCC26D3473F001C950E /* type_properties.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7A26D3473F001C950E /* type_properties.h */; };
-		706EFDCD26D3473F001C950E /* fixed_pool.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7B26D3473F001C950E /* fixed_pool.h */; };
-		706EFDCE26D3473F001C950E /* fixed_pool.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7B26D3473F001C950E /* fixed_pool.h */; };
-		706EFDCF26D3473F001C950E /* config.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7C26D3473F001C950E /* config.h */; };
-		706EFDD026D3473F001C950E /* config.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7C26D3473F001C950E /* config.h */; };
-		706EFDD126D3473F001C950E /* in_place_t.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7D26D3473F001C950E /* in_place_t.h */; };
-		706EFDD226D3473F001C950E /* in_place_t.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7D26D3473F001C950E /* in_place_t.h */; };
-		706EFDD326D3473F001C950E /* hashtable.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7E26D3473F001C950E /* hashtable.h */; };
-		706EFDD426D3473F001C950E /* hashtable.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7E26D3473F001C950E /* hashtable.h */; };
-		706EFDD526D3473F001C950E /* type_pod.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7F26D3473F001C950E /* type_pod.h */; };
-		706EFDD626D3473F001C950E /* type_pod.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC7F26D3473F001C950E /* type_pod.h */; };
-		706EFDD726D3473F001C950E /* pair_fwd_decls.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8026D3473F001C950E /* pair_fwd_decls.h */; };
-		706EFDD826D3473F001C950E /* pair_fwd_decls.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8026D3473F001C950E /* pair_fwd_decls.h */; };
-		706EFDD926D3473F001C950E /* type_transformations.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8126D3473F001C950E /* type_transformations.h */; };
-		706EFDDA26D3473F001C950E /* type_transformations.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8126D3473F001C950E /* type_transformations.h */; };
-		706EFDDB26D3473F001C950E /* integer_sequence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8226D3473F001C950E /* integer_sequence.h */; };
-		706EFDDC26D3473F001C950E /* integer_sequence.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8226D3473F001C950E /* integer_sequence.h */; };
-		706EFDDD26D3473F001C950E /* smart_ptr.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8326D3473F001C950E /* smart_ptr.h */; };
-		706EFDDE26D3473F001C950E /* smart_ptr.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8326D3473F001C950E /* smart_ptr.h */; };
-		706EFDDF26D3473F001C950E /* tuple_fwd_decls.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8426D3473F001C950E /* tuple_fwd_decls.h */; };
-		706EFDE026D3473F001C950E /* tuple_fwd_decls.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFC8426D3473F001C950E /* tuple_fwd_decls.h */; };
-		706EFEF526D34740001C950E /* thread_support.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1726D3473F001C950E /* thread_support.h */; };
-		706EFEF626D34740001C950E /* thread_support.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1726D3473F001C950E /* thread_support.h */; };
-		706EFEF726D34740001C950E /* intrusive_hashtable.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1826D3473F001C950E /* intrusive_hashtable.h */; };
-		706EFEF826D34740001C950E /* intrusive_hashtable.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1826D3473F001C950E /* intrusive_hashtable.h */; };
-		706EFEF926D34740001C950E /* mem_fn.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1926D3473F001C950E /* mem_fn.h */; };
-		706EFEFA26D34740001C950E /* mem_fn.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1926D3473F001C950E /* mem_fn.h */; };
-		706EFEFB26D34740001C950E /* function_help.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1A26D3473F001C950E /* function_help.h */; };
-		706EFEFC26D34740001C950E /* function_help.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1A26D3473F001C950E /* function_help.h */; };
-		706EFEFD26D34740001C950E /* memory_base.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1B26D3473F001C950E /* memory_base.h */; };
-		706EFEFE26D34740001C950E /* memory_base.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1B26D3473F001C950E /* memory_base.h */; };
-		706EFEFF26D34740001C950E /* functional_base.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1C26D3473F001C950E /* functional_base.h */; };
-		706EFF0026D34740001C950E /* functional_base.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1C26D3473F001C950E /* functional_base.h */; };
-		706EFF0126D34740001C950E /* function.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1D26D3473F001C950E /* function.h */; };
-		706EFF0226D34740001C950E /* function.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1D26D3473F001C950E /* function.h */; };
-		706EFF0326D34740001C950E /* red_black_tree.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1E26D3473F001C950E /* red_black_tree.h */; };
-		706EFF0426D34740001C950E /* red_black_tree.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1E26D3473F001C950E /* red_black_tree.h */; };
-		706EFF0526D34740001C950E /* intrusive_hash_map.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1F26D3473F001C950E /* intrusive_hash_map.h */; };
-		706EFF0626D34740001C950E /* intrusive_hash_map.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD1F26D3473F001C950E /* intrusive_hash_map.h */; };
-		706EFF0726D34740001C950E /* fixed_allocator.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2026D3473F001C950E /* fixed_allocator.h */; };
-		706EFF0826D34740001C950E /* fixed_allocator.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2026D3473F001C950E /* fixed_allocator.h */; };
-		706EFF0926D34740001C950E /* priority_queue.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2126D3473F001C950E /* priority_queue.h */; };
-		706EFF0A26D34740001C950E /* priority_queue.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2126D3473F001C950E /* priority_queue.h */; };
-		706EFF0B26D34740001C950E /* vector_multimap.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2226D3473F001C950E /* vector_multimap.h */; };
-		706EFF0C26D34740001C950E /* vector_multimap.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2226D3473F001C950E /* vector_multimap.h */; };
-		706EFF0D26D34740001C950E /* optional.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2326D3473F001C950E /* optional.h */; };
-		706EFF0E26D34740001C950E /* optional.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2326D3473F001C950E /* optional.h */; };
-		706EFF0F26D34740001C950E /* functional.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2426D3473F001C950E /* functional.h */; };
-		706EFF1026D34740001C950E /* functional.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2426D3473F001C950E /* functional.h */; };
-		706EFF1126D34740001C950E /* vector_set.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2526D3473F001C950E /* vector_set.h */; };
-		706EFF1226D34740001C950E /* vector_set.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2526D3473F001C950E /* vector_set.h */; };
-		706EFF1326D34740001C950E /* shared_array.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2626D3473F001C950E /* shared_array.h */; };
-		706EFF1426D34740001C950E /* shared_array.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2626D3473F001C950E /* shared_array.h */; };
-		706EFF1526D34740001C950E /* string_map.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2726D3473F001C950E /* string_map.h */; };
-		706EFF1626D34740001C950E /* string_map.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2726D3473F001C950E /* string_map.h */; };
-		706EFF1726D34740001C950E /* list.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2826D3473F001C950E /* list.h */; };
-		706EFF1826D34740001C950E /* list.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2826D3473F001C950E /* list.h */; };
-		706EFF1926D34740001C950E /* fixed_substring.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2926D3473F001C950E /* fixed_substring.h */; };
-		706EFF1A26D34740001C950E /* fixed_substring.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2926D3473F001C950E /* fixed_substring.h */; };
-		706EFF1B26D34740001C950E /* fixed_map.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2A26D3473F001C950E /* fixed_map.h */; };
-		706EFF1C26D34740001C950E /* fixed_map.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2A26D3473F001C950E /* fixed_map.h */; };
-		706EFF1D26D34740001C950E /* numeric_limits.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2B26D3473F001C950E /* numeric_limits.h */; };
-		706EFF1E26D34740001C950E /* numeric_limits.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2B26D3473F001C950E /* numeric_limits.h */; };
-		706EFF1F26D34740001C950E /* shared_ptr.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2C26D3473F001C950E /* shared_ptr.h */; };
-		706EFF2026D34740001C950E /* shared_ptr.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2C26D3473F001C950E /* shared_ptr.h */; };
-		706EFF2126D34740001C950E /* fixed_list.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2D26D3473F001C950E /* fixed_list.h */; };
-		706EFF2226D34740001C950E /* fixed_list.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2D26D3473F001C950E /* fixed_list.h */; };
-		706EFF2326D34740001C950E /* allocator.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2E26D3473F001C950E /* allocator.h */; };
-		706EFF2426D34740001C950E /* allocator.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2E26D3473F001C950E /* allocator.h */; };
-		706EFF2526D34740001C950E /* fixed_string.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2F26D3473F001C950E /* fixed_string.h */; };
-		706EFF2626D34740001C950E /* fixed_string.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD2F26D3473F001C950E /* fixed_string.h */; };
-		706EFF2726D34740001C950E /* slist.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3026D3473F001C950E /* slist.h */; };
-		706EFF2826D34740001C950E /* slist.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3026D3473F001C950E /* slist.h */; };
-		706EFF2926D34740001C950E /* array.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3126D3473F001C950E /* array.h */; };
-		706EFF2A26D34740001C950E /* array.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3126D3473F001C950E /* array.h */; };
-		706EFF2B26D34740001C950E /* unique_ptr.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3226D3473F001C950E /* unique_ptr.h */; };
-		706EFF2C26D34740001C950E /* unique_ptr.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3226D3473F001C950E /* unique_ptr.h */; };
-		706EFF2D26D34740001C950E /* intrusive_hash_set.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3326D3473F001C950E /* intrusive_hash_set.h */; };
-		706EFF2E26D34740001C950E /* intrusive_hash_set.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3326D3473F001C950E /* intrusive_hash_set.h */; };
-		706EFF2F26D34740001C950E /* deque.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3426D3473F001C950E /* deque.h */; };
-		706EFF3026D34740001C950E /* deque.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3426D3473F001C950E /* deque.h */; };
-		706EFF3126D34740001C950E /* stack.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3526D3473F001C950E /* stack.h */; };
-		706EFF3226D34740001C950E /* stack.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3526D3473F001C950E /* stack.h */; };
-		706EFF3326D34740001C950E /* ratio.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3626D3473F001C950E /* ratio.h */; };
-		706EFF3426D34740001C950E /* ratio.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3626D3473F001C950E /* ratio.h */; };
-		706EFF3526D34740001C950E /* vector_map.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3726D3473F001C950E /* vector_map.h */; };
-		706EFF3626D34740001C950E /* vector_map.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3726D3473F001C950E /* vector_map.h */; };
-		706EFF3726D34740001C950E /* intrusive_list.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3826D3473F001C950E /* intrusive_list.h */; };
-		706EFF3826D34740001C950E /* intrusive_list.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3826D3473F001C950E /* intrusive_list.h */; };
-		706EFF3926D34740001C950E /* heap.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3926D3473F001C950E /* heap.h */; };
-		706EFF3A26D34740001C950E /* heap.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3926D3473F001C950E /* heap.h */; };
-		706EFF3B26D34740001C950E /* tuple.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3A26D3473F001C950E /* tuple.h */; };
-		706EFF3C26D34740001C950E /* tuple.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3A26D3473F001C950E /* tuple.h */; };
-		706EFF3D26D34740001C950E /* memory.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3B26D3473F001C950E /* memory.h */; };
-		706EFF3E26D34740001C950E /* memory.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3B26D3473F001C950E /* memory.h */; };
-		706EFF3F26D34740001C950E /* vector_multiset.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3C26D3473F001C950E /* vector_multiset.h */; };
-		706EFF4026D34740001C950E /* vector_multiset.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3C26D3473F001C950E /* vector_multiset.h */; };
-		706EFF4126D34740001C950E /* fixed_vector.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3D26D3473F001C950E /* fixed_vector.h */; };
-		706EFF4226D34740001C950E /* fixed_vector.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3D26D3473F001C950E /* fixed_vector.h */; };
-		706EFF4326D34740001C950E /* unordered_set.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3E26D3473F001C950E /* unordered_set.h */; };
-		706EFF4426D34740001C950E /* unordered_set.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3E26D3473F001C950E /* unordered_set.h */; };
-		706EFF4526D34740001C950E /* span.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3F26D3473F001C950E /* span.h */; };
-		706EFF4626D34740001C950E /* span.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD3F26D3473F001C950E /* span.h */; };
-		706EFF4726D34740001C950E /* string_hash_map.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4026D3473F001C950E /* string_hash_map.h */; };
-		706EFF4826D34740001C950E /* string_hash_map.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4026D3473F001C950E /* string_hash_map.h */; };
-		706EFF4926D34740001C950E /* meta.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4126D3473F001C950E /* meta.h */; };
-		706EFF4A26D34740001C950E /* meta.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4126D3473F001C950E /* meta.h */; };
-		706EFF4B26D34740001C950E /* sort.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4226D3473F001C950E /* sort.h */; };
-		706EFF4C26D34740001C950E /* sort.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4226D3473F001C950E /* sort.h */; };
-		706EFF4D26D34740001C950E /* core_allocator_adapter.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4326D3473F001C950E /* core_allocator_adapter.h */; };
-		706EFF4E26D34740001C950E /* core_allocator_adapter.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4326D3473F001C950E /* core_allocator_adapter.h */; };
-		706EFF4F26D34740001C950E /* linked_array.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4426D3473F001C950E /* linked_array.h */; };
-		706EFF5026D34740001C950E /* linked_array.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4426D3473F001C950E /* linked_array.h */; };
-		706EFF5126D34740001C950E /* core_allocator.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4526D3473F001C950E /* core_allocator.h */; };
-		706EFF5226D34740001C950E /* core_allocator.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4526D3473F001C950E /* core_allocator.h */; };
-		706EFF5326D34740001C950E /* scoped_array.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4626D3473F001C950E /* scoped_array.h */; };
-		706EFF5426D34740001C950E /* scoped_array.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4626D3473F001C950E /* scoped_array.h */; };
-		706EFF5526D34740001C950E /* utility.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4726D3473F001C950E /* utility.h */; };
-		706EFF5626D34740001C950E /* utility.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4726D3473F001C950E /* utility.h */; };
-		706EFF5726D34740001C950E /* type_traits.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4826D3473F001C950E /* type_traits.h */; };
-		706EFF5826D34740001C950E /* type_traits.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4826D3473F001C950E /* type_traits.h */; };
-		706EFF5926D34740001C950E /* vector.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4926D3473F001C950E /* vector.h */; };
-		706EFF5A26D34740001C950E /* vector.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4926D3473F001C950E /* vector.h */; };
-		706EFF5B26D34740001C950E /* fixed_hash_map.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4A26D3473F001C950E /* fixed_hash_map.h */; };
-		706EFF5C26D34740001C950E /* fixed_hash_map.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4A26D3473F001C950E /* fixed_hash_map.h */; };
-		706EFF5D26D34740001C950E /* hash_set.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4B26D3473F001C950E /* hash_set.h */; };
-		706EFF5E26D34740001C950E /* hash_set.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4B26D3473F001C950E /* hash_set.h */; };
-		706EFF5F26D34740001C950E /* finally.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4C26D3473F001C950E /* finally.h */; };
-		706EFF6026D34740001C950E /* finally.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4C26D3473F001C950E /* finally.h */; };
-		706EFF6126D34740001C950E /* iterator.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4D26D3473F001C950E /* iterator.h */; };
-		706EFF6226D34740001C950E /* iterator.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4D26D3473F001C950E /* iterator.h */; };
-		706EFF6326D34740001C950E /* any.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4E26D3473F001C950E /* any.h */; };
-		706EFF6426D34740001C950E /* any.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4E26D3473F001C950E /* any.h */; };
-		706EFF6526D34740001C950E /* queue.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4F26D3473F001C950E /* queue.h */; };
-		706EFF6626D34740001C950E /* queue.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD4F26D3473F001C950E /* queue.h */; };
-		706EFF6726D34740001C950E /* random.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD5026D3473F001C950E /* random.h */; };
-		706EFF6826D34740001C950E /* random.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD5026D3473F001C950E /* random.h */; };
-		706EFF6926D34740001C950E /* string_view.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD5126D3473F001C950E /* string_view.h */; };
-		706EFF6A26D34740001C950E /* string_view.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD5126D3473F001C950E /* string_view.h */; };
-		706EFF6B26D34740001C950E /* set.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD5226D3473F001C950E /* set.h */; };
-		706EFF6C26D34740001C950E /* set.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD5226D3473F001C950E /* set.h */; };
-		706EFF6D26D34740001C950E /* basic_string.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD5326D3473F001C950E /* basic_string.h */; };
-		706EFF6E26D34740001C950E /* basic_string.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD5326D3473F001C950E /* basic_string.h */; };
-		706EFF6F26D34740001C950E /* variant.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD5426D3473F001C950E /* variant.h */; };
-		706EFF7026D34740001C950E /* variant.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD5426D3473F001C950E /* variant.h */; };
-		706EFF7126D34740001C950E /* atomic.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD5526D3473F001C950E /* atomic.h */; };
-		706EFF7226D34740001C950E /* atomic.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EFD5526D3473F001C950E /* atomic.h */; };
 		706EFF7326D34740001C950E /* thread_support.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5726D3473F001C950E /* thread_support.cpp */; };
 		706EFF7426D34740001C950E /* thread_support.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5726D3473F001C950E /* thread_support.cpp */; };
 		706EFF7526D34740001C950E /* assert.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5826D3473F001C950E /* assert.cpp */; };
@@ -1122,37 +868,37 @@
 			isa = PBXGroup;
 			children = (
 				706EEE1926D1583F001C950E /* KramZipHelper.h */,
-				706EEE1A26D1583F001C950E /* KramTimer.cpp */,
-				706EEE1B26D1583F001C950E /* KTXImage.cpp */,
-				706EEE1C26D1583F001C950E /* KramMipper.cpp */,
-				706EEE1D26D1583F001C950E /* _clang-format */,
 				706EEE1E26D1583F001C950E /* KramZipHelper.cpp */,
-				706EEE1F26D1583F001C950E /* TaskSystem.cpp */,
-				706EEE2026D1583F001C950E /* KramSDFMipper.h */,
-				706EEE2126D1583F001C950E /* KramFileHelper.cpp */,
-				706EEE2226D1583F001C950E /* sse2neon.h */,
 				706EEE2326D1583F001C950E /* KramConfig.h */,
-				706EEE2426D1583F001C950E /* KramLog.h */,
 				706EEE2526D1583F001C950E /* KramImageInfo.cpp */,
 				706EEE2626D1583F001C950E /* KramImage.cpp */,
 				706EEE2726D1583F001C950E /* KramLib.h */,
+				706EEE2426D1583F001C950E /* KramLog.h */,
 				706EEE2826D1583F001C950E /* KramLog.cpp */,
 				706EEE2926D1583F001C950E /* KramVersion.h */,
 				706EEE2A26D1583F001C950E /* KramImage.h */,
+				706EEE2026D1583F001C950E /* KramSDFMipper.h */,
 				706EEE2B26D1583F001C950E /* KramSDFMipper.cpp */,
-				706EEE2C26D1583F001C950E /* KramMmapHelper.cpp */,
-				706EEE2D26D1583F001C950E /* win_mmap.h */,
-				706EEE2E26D1583F001C950E /* Kram.h */,
-				706EEE2F26D1583F001C950E /* float4a.cpp */,
 				706EEE3026D1583F001C950E /* KTXImage.h */,
+				706EEE1B26D1583F001C950E /* KTXImage.cpp */,
 				706EEE3126D1583F001C950E /* KramImageInfo.h */,
 				706EEE3226D1583F001C950E /* KramTimer.h */,
+				706EEE1A26D1583F001C950E /* KramTimer.cpp */,
 				706EEE3326D1583F001C950E /* KramMmapHelper.h */,
-				706EEE3426D1583F001C950E /* float4a.h */,
+				706EEE2C26D1583F001C950E /* KramMmapHelper.cpp */,
+				706EEE2E26D1583F001C950E /* Kram.h */,
 				706EEE3526D1583F001C950E /* Kram.cpp */,
 				706EEE3626D1583F001C950E /* KramFileHelper.h */,
+				706EEE2126D1583F001C950E /* KramFileHelper.cpp */,
 				706EEE3726D1583F001C950E /* KramMipper.h */,
+				706EEE1C26D1583F001C950E /* KramMipper.cpp */,
+				706EEE1D26D1583F001C950E /* _clang-format */,
+				706EEE2D26D1583F001C950E /* win_mmap.h */,
+				706EEE2226D1583F001C950E /* sse2neon.h */,
+				706EEE3426D1583F001C950E /* float4a.h */,
+				706EEE2F26D1583F001C950E /* float4a.cpp */,
 				706EEE3826D1583F001C950E /* TaskSystem.h */,
+				706EEE1F26D1583F001C950E /* TaskSystem.cpp */,
 			);
 			path = kram;
 			sourceTree = "<group>";
@@ -1450,213 +1196,86 @@
 			isa = PBXHeadersBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
-				706EFDB526D3473F001C950E /* fixed_slist.h in Headers */,
-				706EFD8D26D3473F001C950E /* call_traits.h in Headers */,
-				706EFDCD26D3473F001C950E /* fixed_pool.h in Headers */,
-				706EFF5126D34740001C950E /* core_allocator.h in Headers */,
-				706EFF3326D34740001C950E /* ratio.h in Headers */,
 				706EEFD126D15984001C950E /* EtcErrorMetric.h in Headers */,
-				706EFD6326D3473F001C950E /* version.h in Headers */,
-				706EFF5726D34740001C950E /* type_traits.h in Headers */,
-				706EFF6926D34740001C950E /* string_view.h in Headers */,
 				706EEFD226D15984001C950E /* EtcColor.h in Headers */,
-				706EFF5B26D34740001C950E /* fixed_hash_map.h in Headers */,
 				706EEFD326D15984001C950E /* EtcDifferentialTrys.h in Headers */,
-				706EFF0926D34740001C950E /* priority_queue.h in Headers */,
-				706EFF0326D34740001C950E /* red_black_tree.h in Headers */,
 				706EEFD426D15984001C950E /* EtcBlock4x4Encoding_RGB8.h in Headers */,
-				706EFF3526D34740001C950E /* vector_map.h in Headers */,
-				706EFDCF26D3473F001C950E /* config.h in Headers */,
 				706EEFD526D15984001C950E /* EtcConfig.h in Headers */,
-				706EFF2B26D34740001C950E /* unique_ptr.h in Headers */,
 				706EEFD626D15984001C950E /* EtcBlock4x4Encoding_R11.h in Headers */,
 				706EEFD726D15984001C950E /* EtcBlock4x4Encoding_RG11.h in Headers */,
-				706EFF2526D34740001C950E /* fixed_string.h in Headers */,
-				706EFD7F26D3473F001C950E /* tuple_vector.h in Headers */,
 				706EEFD826D15984001C950E /* EtcMath.h in Headers */,
 				706EEFD926D15984001C950E /* EtcIndividualTrys.h in Headers */,
-				706EFF2726D34740001C950E /* slist.h in Headers */,
 				706EEFDA26D15984001C950E /* EtcBlock4x4EncodingBits.h in Headers */,
-				706EFDBB26D3473F001C950E /* enable_shared.h in Headers */,
 				706EEFDB26D15984001C950E /* EtcBlock4x4Encoding_RGB8A1.h in Headers */,
-				706EFDDB26D3473F001C950E /* integer_sequence.h in Headers */,
-				706EFD9326D3473F001C950E /* adaptors.h in Headers */,
-				706EFDA126D3473F001C950E /* weak_ptr.h in Headers */,
-				706EFDD726D3473F001C950E /* pair_fwd_decls.h in Headers */,
-				706EFF6526D34740001C950E /* queue.h in Headers */,
-				706EFD8726D3473F001C950E /* sort_extra.h in Headers */,
-				706EFD6F26D3473F001C950E /* nullptr.h in Headers */,
 				706EEFDC26D15984001C950E /* EtcBlock4x4.h in Headers */,
 				706EEFDD26D15984001C950E /* Etc.h in Headers */,
-				706EFEFF26D34740001C950E /* functional_base.h in Headers */,
 				706EEFDE26D15984001C950E /* EtcImage.h in Headers */,
-				706EFDDF26D3473F001C950E /* tuple_fwd_decls.h in Headers */,
-				706EFF6D26D34740001C950E /* basic_string.h in Headers */,
-				706EFDB126D3473F001C950E /* fixed_set.h in Headers */,
-				706EFDAB26D3473F001C950E /* initializer_list.h in Headers */,
-				706EFF1126D34740001C950E /* vector_set.h in Headers */,
 				708A6A9C2708CE4700BA5410 /* bc6h_encode.h in Headers */,
-				706EFF1F26D34740001C950E /* shared_ptr.h in Headers */,
-				706EFF1526D34740001C950E /* string_map.h in Headers */,
 				706EEFDF26D15984001C950E /* EtcBlock4x4Encoding_ETC1.h in Headers */,
 				706EEFE026D15984001C950E /* EtcBlock4x4Encoding_RGBA8.h in Headers */,
-				706EFEF726D34740001C950E /* intrusive_hashtable.h in Headers */,
 				706EEFE126D15984001C950E /* EtcColorFloatRGBA.h in Headers */,
-				706EFDC126D3473F001C950E /* generic_iterator.h in Headers */,
-				706EFDD326D3473F001C950E /* hashtable.h in Headers */,
-				706EFD7326D3473F001C950E /* earesult.h in Headers */,
-				706EFD9B26D3473F001C950E /* safe_ptr.h in Headers */,
-				706EFD7526D3473F001C950E /* eabase.h in Headers */,
-				706EFD6D26D3473F001C950E /* eahave.h in Headers */,
-				706EFF4926D34740001C950E /* meta.h in Headers */,
-				706EFF1926D34740001C950E /* fixed_substring.h in Headers */,
-				706EFDD526D3473F001C950E /* type_pod.h in Headers */,
 				706EEFE226D15984001C950E /* EtcBlock4x4Encoding.h in Headers */,
 				706EEFE326D15984001C950E /* rgbcx.h in Headers */,
-				706EFF4726D34740001C950E /* string_hash_map.h in Headers */,
-				706EFF1D26D34740001C950E /* numeric_limits.h in Headers */,
-				706EFDBF26D3473F001C950E /* piecewise_construct_t.h in Headers */,
-				706EFD8526D3473F001C950E /* fixed_tuple_vector.h in Headers */,
 				706EEFE426D15984001C950E /* bc7enc.h in Headers */,
 				706EEFE526D15984001C950E /* bc7decomp.h in Headers */,
-				706EFEF526D34740001C950E /* thread_support.h in Headers */,
 				706EEFE626D15984001C950E /* rgbcx_table4.h in Headers */,
-				706EFD6526D3473F001C950E /* eacompilertraits.h in Headers */,
 				706EEFE726D15984001C950E /* astcenc_diagnostic_trace.h in Headers */,
 				706EEFE826D15984001C950E /* astcenc_vecmathlib.h in Headers */,
-				706EFD9726D3473F001C950E /* intrusive_ptr.h in Headers */,
 				706EEFE926D15984001C950E /* astcenc_vecmathlib_avx2_8.h in Headers */,
 				706EEFEA26D15984001C950E /* astcenc.h in Headers */,
-				706EFEFD26D34740001C950E /* memory_base.h in Headers */,
-				706EFD9526D3473F001C950E /* fixed_ring_buffer.h in Headers */,
-				706EFF4326D34740001C950E /* unordered_set.h in Headers */,
-				706EFF2F26D34740001C950E /* deque.h in Headers */,
 				706EEFEB26D15984001C950E /* astcenc_internal.h in Headers */,
 				706EEFEC26D15984001C950E /* astcenc_vecmathlib_neon_armv7_4.h in Headers */,
 				706EEFED26D15984001C950E /* astcenc_vecmathlib_sse_4.h in Headers */,
 				706EEFEE26D15984001C950E /* astcenc_vecmathlib_neon_4.h in Headers */,
 				706EEFEF26D15984001C950E /* astcenc_vecmathlib_none_4.h in Headers */,
-				706EFF5926D34740001C950E /* vector.h in Headers */,
 				706EEFF026D15984001C950E /* astcenc_vecmathlib_common_4.h in Headers */,
-				706EFD7926D3473F001C950E /* unordered_map.h in Headers */,
-				706EFF3D26D34740001C950E /* memory.h in Headers */,
-				706EFF1726D34740001C950E /* list.h in Headers */,
-				706EFF1326D34740001C950E /* shared_array.h in Headers */,
-				706EFF5326D34740001C950E /* scoped_array.h in Headers */,
-				706EFDA726D3473F001C950E /* allocator_malloc.h in Headers */,
-				706EFD8326D3473F001C950E /* ring_buffer.h in Headers */,
-				706EFD8B26D3473F001C950E /* list_map.h in Headers */,
-				706EFF2D26D34740001C950E /* intrusive_hash_set.h in Headers */,
-				706EFF3726D34740001C950E /* intrusive_list.h in Headers */,
 				706EEFF126D15984001C950E /* astcenc_mathlib.h in Headers */,
-				706EFF4B26D34740001C950E /* sort.h in Headers */,
-				706EFD9F26D3473F001C950E /* segmented_vector.h in Headers */,
-				706EFF2126D34740001C950E /* fixed_list.h in Headers */,
-				706EFF3926D34740001C950E /* heap.h in Headers */,
-				706EFDD926D3473F001C950E /* type_transformations.h in Headers */,
-				706EFDB926D3473F001C950E /* move_help.h in Headers */,
 				706EEFF226D15984001C950E /* ateencoder.h in Headers */,
-				706EFF0D26D34740001C950E /* optional.h in Headers */,
-				706EFDCB26D3473F001C950E /* type_properties.h in Headers */,
 				706EEFF326D15984001C950E /* basisu_transcoder.h in Headers */,
 				70A7BD3227092A1200DBCCF7 /* hdr_encode.h in Headers */,
-				706EFD6926D3473F001C950E /* eaplatform.h in Headers */,
-				706EFF0126D34740001C950E /* function.h in Headers */,
-				706EFD8126D3473F001C950E /* lru_cache.h in Headers */,
 				708A6AA02708CE4700BA5410 /* bc6h_definitions.h in Headers */,
-				706EFF6F26D34740001C950E /* variant.h in Headers */,
-				706EFDBD26D3473F001C950E /* function_detail.h in Headers */,
-				706EFDA526D3473F001C950E /* algorithm.h in Headers */,
 				706EEFF426D15984001C950E /* basisu_containers.h in Headers */,
-				706EFD8F26D3473F001C950E /* compressed_pair.h in Headers */,
 				706EEFF526D15985001C950E /* basisu_containers_impl.h in Headers */,
-				706EFF5F26D34740001C950E /* finally.h in Headers */,
 				706EEFF626D15985001C950E /* basisu_transcoder_internal.h in Headers */,
-				706EFF4126D34740001C950E /* fixed_vector.h in Headers */,
-				706EFD6B26D3473F001C950E /* eastdarg.h in Headers */,
-				706EFDC926D3473F001C950E /* fill_help.h in Headers */,
 				706EEFF726D15985001C950E /* basisu_global_selector_cb.h in Headers */,
 				706EEFF826D15985001C950E /* basisu_transcoder_uastc.h in Headers */,
 				706EEFF926D15985001C950E /* basisu_global_selector_palette.h in Headers */,
-				706EFD7B26D3473F001C950E /* fixed_hash_set.h in Headers */,
-				706EFD9126D3473F001C950E /* intrusive_sdlist.h in Headers */,
-				706EFF4F26D34740001C950E /* linked_array.h in Headers */,
 				706EEFFA26D15985001C950E /* basisu.h in Headers */,
-				706EFD7126D3473F001C950E /* int128.h in Headers */,
-				706EFF6B26D34740001C950E /* set.h in Headers */,
-				706EFDC326D3473F001C950E /* type_fundamental.h in Headers */,
-				706EFDAD26D3473F001C950E /* numeric.h in Headers */,
-				706EFF5D26D34740001C950E /* hash_set.h in Headers */,
 				706EEFFB26D15985001C950E /* basisu_file_headers.h in Headers */,
-				706EFF3F26D34740001C950E /* vector_multiset.h in Headers */,
 				706EEFFC26D15985001C950E /* miniz.h in Headers */,
 				706EEFFD26D15985001C950E /* hedistance.h in Headers */,
 				706EEFFE26D15985001C950E /* stb_rect_pack.h in Headers */,
-				706EFDC526D3473F001C950E /* char_traits.h in Headers */,
 				706EEFFF26D15985001C950E /* KramZipHelper.h in Headers */,
-				706EFD7D26D3473F001C950E /* bitset.h in Headers */,
-				706EFF3B26D34740001C950E /* tuple.h in Headers */,
 				706EF00026D15985001C950E /* KramSDFMipper.h in Headers */,
 				706EF00126D15985001C950E /* sse2neon.h in Headers */,
 				706EF00226D15985001C950E /* KramConfig.h in Headers */,
-				706EFDB726D3473F001C950E /* type_compound.h in Headers */,
-				706EFF0B26D34740001C950E /* vector_multimap.h in Headers */,
-				706EFDA326D3473F001C950E /* hash_map.h in Headers */,
 				706EF00326D15985001C950E /* KramLog.h in Headers */,
-				706EFD8926D3473F001C950E /* intrusive_slist.h in Headers */,
 				706EF00426D15985001C950E /* KramLib.h in Headers */,
 				706EF00526D15985001C950E /* KramVersion.h in Headers */,
-				706EFF0F26D34740001C950E /* functional.h in Headers */,
-				706EFD6126D3473F001C950E /* eaunits.h in Headers */,
 				706EF00626D15985001C950E /* KramImage.h in Headers */,
-				706EFF4526D34740001C950E /* span.h in Headers */,
-				706EFD9D26D3473F001C950E /* bitvector.h in Headers */,
-				706EFF6326D34740001C950E /* any.h in Headers */,
-				706EFDDD26D3473F001C950E /* smart_ptr.h in Headers */,
-				706EFDA926D3473F001C950E /* map.h in Headers */,
-				706EFDC726D3473F001C950E /* copy_help.h in Headers */,
-				706EFF2326D34740001C950E /* allocator.h in Headers */,
 				706EF00726D15985001C950E /* win_mmap.h in Headers */,
-				706EFF1B26D34740001C950E /* fixed_map.h in Headers */,
 				706EF00826D15985001C950E /* Kram.h in Headers */,
-				706EFF7126D34740001C950E /* atomic.h in Headers */,
-				706EFF2926D34740001C950E /* array.h in Headers */,
 				706EF00926D15985001C950E /* KTXImage.h in Headers */,
-				706EFDD126D3473F001C950E /* in_place_t.h in Headers */,
-				706EFF4D26D34740001C950E /* core_allocator_adapter.h in Headers */,
 				706EF00A26D15985001C950E /* KramImageInfo.h in Headers */,
 				706EF00B26D15985001C950E /* KramTimer.h in Headers */,
-				706EFD7726D3473F001C950E /* scoped_ptr.h in Headers */,
 				706EF00C26D15985001C950E /* KramMmapHelper.h in Headers */,
-				706EFF6726D34740001C950E /* random.h in Headers */,
 				706EF00D26D15985001C950E /* float4a.h in Headers */,
-				706EFF3126D34740001C950E /* stack.h in Headers */,
 				706EF00E26D15985001C950E /* KramFileHelper.h in Headers */,
 				706EF00F26D15985001C950E /* KramMipper.h in Headers */,
 				706EF01026D15985001C950E /* TaskSystem.h in Headers */,
-				706EFD6726D3473F001C950E /* eacompiler.h in Headers */,
 				706EF01126D15985001C950E /* squish.h in Headers */,
-				706EFF5526D34740001C950E /* utility.h in Headers */,
 				706EF01226D15985001C950E /* clusterfit.h in Headers */,
 				706EF01326D15985001C950E /* colourfit.h in Headers */,
-				706EFEFB26D34740001C950E /* function_help.h in Headers */,
-				706EFD9926D3473F001C950E /* fixed_function.h in Headers */,
 				706EF01426D15985001C950E /* alpha.h in Headers */,
 				708A6A982708CE4700BA5410 /* bc6h_decode.h in Headers */,
 				706EF01526D15985001C950E /* singlecolourfit.h in Headers */,
-				706EFF0526D34740001C950E /* intrusive_hash_map.h in Headers */,
 				706EF01626D15985001C950E /* maths.h in Headers */,
 				706EF01726D15985001C950E /* colourset.h in Headers */,
 				708A6AA42708CE4700BA5410 /* bc6h_utils.h in Headers */,
 				706EF01826D15985001C950E /* colourblock.h in Headers */,
-				706EFDAF26D3473F001C950E /* chrono.h in Headers */,
 				706EF01926D15985001C950E /* rangefit.h in Headers */,
 				706EF01A26D15985001C950E /* zstd.h in Headers */,
-				706EFF6126D34740001C950E /* iterator.h in Headers */,
-				706EFF0726D34740001C950E /* fixed_allocator.h in Headers */,
-				706EFEF926D34740001C950E /* mem_fn.h in Headers */,
 				706EF01B26D15985001C950E /* lodepng.h in Headers */,
-				706EFDB326D3473F001C950E /* linked_ptr.h in Headers */,
 				706EF01C26D15985001C950E /* tmpfileplus.h in Headers */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
@@ -1665,213 +1284,86 @@
 			isa = PBXHeadersBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
-				706EFDB626D3473F001C950E /* fixed_slist.h in Headers */,
-				706EFD8E26D3473F001C950E /* call_traits.h in Headers */,
-				706EFDCE26D3473F001C950E /* fixed_pool.h in Headers */,
-				706EFF5226D34740001C950E /* core_allocator.h in Headers */,
-				706EFF3426D34740001C950E /* ratio.h in Headers */,
 				706EF14B26D166C5001C950E /* EtcErrorMetric.h in Headers */,
-				706EFD6426D3473F001C950E /* version.h in Headers */,
-				706EFF5826D34740001C950E /* type_traits.h in Headers */,
-				706EFF6A26D34740001C950E /* string_view.h in Headers */,
 				706EF14C26D166C5001C950E /* EtcColor.h in Headers */,
-				706EFF5C26D34740001C950E /* fixed_hash_map.h in Headers */,
 				706EF14D26D166C5001C950E /* EtcDifferentialTrys.h in Headers */,
-				706EFF0A26D34740001C950E /* priority_queue.h in Headers */,
-				706EFF0426D34740001C950E /* red_black_tree.h in Headers */,
 				706EF14E26D166C5001C950E /* EtcBlock4x4Encoding_RGB8.h in Headers */,
-				706EFF3626D34740001C950E /* vector_map.h in Headers */,
-				706EFDD026D3473F001C950E /* config.h in Headers */,
 				706EF14F26D166C5001C950E /* EtcConfig.h in Headers */,
-				706EFF2C26D34740001C950E /* unique_ptr.h in Headers */,
 				706EF15026D166C5001C950E /* EtcBlock4x4Encoding_R11.h in Headers */,
 				706EF15126D166C5001C950E /* EtcBlock4x4Encoding_RG11.h in Headers */,
-				706EFF2626D34740001C950E /* fixed_string.h in Headers */,
-				706EFD8026D3473F001C950E /* tuple_vector.h in Headers */,
 				706EF15226D166C5001C950E /* EtcMath.h in Headers */,
 				706EF15326D166C5001C950E /* EtcIndividualTrys.h in Headers */,
-				706EFF2826D34740001C950E /* slist.h in Headers */,
 				706EF15426D166C5001C950E /* EtcBlock4x4EncodingBits.h in Headers */,
-				706EFDBC26D3473F001C950E /* enable_shared.h in Headers */,
 				706EF15526D166C5001C950E /* EtcBlock4x4Encoding_RGB8A1.h in Headers */,
-				706EFDDC26D3473F001C950E /* integer_sequence.h in Headers */,
-				706EFD9426D3473F001C950E /* adaptors.h in Headers */,
-				706EFDA226D3473F001C950E /* weak_ptr.h in Headers */,
-				706EFDD826D3473F001C950E /* pair_fwd_decls.h in Headers */,
-				706EFF6626D34740001C950E /* queue.h in Headers */,
-				706EFD8826D3473F001C950E /* sort_extra.h in Headers */,
-				706EFD7026D3473F001C950E /* nullptr.h in Headers */,
 				706EF15626D166C5001C950E /* EtcBlock4x4.h in Headers */,
 				706EF15726D166C5001C950E /* Etc.h in Headers */,
-				706EFF0026D34740001C950E /* functional_base.h in Headers */,
 				706EF15826D166C5001C950E /* EtcImage.h in Headers */,
-				706EFDE026D3473F001C950E /* tuple_fwd_decls.h in Headers */,
-				706EFF6E26D34740001C950E /* basic_string.h in Headers */,
-				706EFDB226D3473F001C950E /* fixed_set.h in Headers */,
-				706EFDAC26D3473F001C950E /* initializer_list.h in Headers */,
-				706EFF1226D34740001C950E /* vector_set.h in Headers */,
 				708A6A9D2708CE4700BA5410 /* bc6h_encode.h in Headers */,
-				706EFF2026D34740001C950E /* shared_ptr.h in Headers */,
-				706EFF1626D34740001C950E /* string_map.h in Headers */,
 				706EF15926D166C5001C950E /* EtcBlock4x4Encoding_ETC1.h in Headers */,
 				706EF15A26D166C5001C950E /* EtcBlock4x4Encoding_RGBA8.h in Headers */,
-				706EFEF826D34740001C950E /* intrusive_hashtable.h in Headers */,
 				706EF15B26D166C5001C950E /* EtcColorFloatRGBA.h in Headers */,
-				706EFDC226D3473F001C950E /* generic_iterator.h in Headers */,
-				706EFDD426D3473F001C950E /* hashtable.h in Headers */,
-				706EFD7426D3473F001C950E /* earesult.h in Headers */,
-				706EFD9C26D3473F001C950E /* safe_ptr.h in Headers */,
-				706EFD7626D3473F001C950E /* eabase.h in Headers */,
-				706EFD6E26D3473F001C950E /* eahave.h in Headers */,
-				706EFF4A26D34740001C950E /* meta.h in Headers */,
-				706EFF1A26D34740001C950E /* fixed_substring.h in Headers */,
-				706EFDD626D3473F001C950E /* type_pod.h in Headers */,
 				706EF15C26D166C5001C950E /* EtcBlock4x4Encoding.h in Headers */,
 				706EF15D26D166C5001C950E /* rgbcx.h in Headers */,
-				706EFF4826D34740001C950E /* string_hash_map.h in Headers */,
-				706EFF1E26D34740001C950E /* numeric_limits.h in Headers */,
-				706EFDC026D3473F001C950E /* piecewise_construct_t.h in Headers */,
-				706EFD8626D3473F001C950E /* fixed_tuple_vector.h in Headers */,
 				706EF15E26D166C5001C950E /* bc7enc.h in Headers */,
 				706EF15F26D166C5001C950E /* bc7decomp.h in Headers */,
-				706EFEF626D34740001C950E /* thread_support.h in Headers */,
 				706EF16026D166C5001C950E /* rgbcx_table4.h in Headers */,
-				706EFD6626D3473F001C950E /* eacompilertraits.h in Headers */,
 				706EF16126D166C5001C950E /* astcenc_diagnostic_trace.h in Headers */,
 				706EF16226D166C5001C950E /* astcenc_vecmathlib.h in Headers */,
-				706EFD9826D3473F001C950E /* intrusive_ptr.h in Headers */,
 				706EF16326D166C5001C950E /* astcenc_vecmathlib_avx2_8.h in Headers */,
 				706EF16426D166C5001C950E /* astcenc.h in Headers */,
-				706EFEFE26D34740001C950E /* memory_base.h in Headers */,
-				706EFD9626D3473F001C950E /* fixed_ring_buffer.h in Headers */,
-				706EFF4426D34740001C950E /* unordered_set.h in Headers */,
-				706EFF3026D34740001C950E /* deque.h in Headers */,
 				706EF16526D166C5001C950E /* astcenc_internal.h in Headers */,
 				706EF16626D166C5001C950E /* astcenc_vecmathlib_neon_armv7_4.h in Headers */,
 				706EF16726D166C5001C950E /* astcenc_vecmathlib_sse_4.h in Headers */,
 				706EF16826D166C5001C950E /* astcenc_vecmathlib_neon_4.h in Headers */,
 				706EF16926D166C5001C950E /* astcenc_vecmathlib_none_4.h in Headers */,
-				706EFF5A26D34740001C950E /* vector.h in Headers */,
 				706EF16A26D166C5001C950E /* astcenc_vecmathlib_common_4.h in Headers */,
-				706EFD7A26D3473F001C950E /* unordered_map.h in Headers */,
-				706EFF3E26D34740001C950E /* memory.h in Headers */,
-				706EFF1826D34740001C950E /* list.h in Headers */,
-				706EFF1426D34740001C950E /* shared_array.h in Headers */,
-				706EFF5426D34740001C950E /* scoped_array.h in Headers */,
-				706EFDA826D3473F001C950E /* allocator_malloc.h in Headers */,
-				706EFD8426D3473F001C950E /* ring_buffer.h in Headers */,
-				706EFD8C26D3473F001C950E /* list_map.h in Headers */,
-				706EFF2E26D34740001C950E /* intrusive_hash_set.h in Headers */,
-				706EFF3826D34740001C950E /* intrusive_list.h in Headers */,
 				706EF16B26D166C5001C950E /* astcenc_mathlib.h in Headers */,
-				706EFF4C26D34740001C950E /* sort.h in Headers */,
-				706EFDA026D3473F001C950E /* segmented_vector.h in Headers */,
-				706EFF2226D34740001C950E /* fixed_list.h in Headers */,
-				706EFF3A26D34740001C950E /* heap.h in Headers */,
-				706EFDDA26D3473F001C950E /* type_transformations.h in Headers */,
-				706EFDBA26D3473F001C950E /* move_help.h in Headers */,
 				706EF16C26D166C5001C950E /* ateencoder.h in Headers */,
-				706EFF0E26D34740001C950E /* optional.h in Headers */,
-				706EFDCC26D3473F001C950E /* type_properties.h in Headers */,
 				706EF16D26D166C5001C950E /* basisu_transcoder.h in Headers */,
 				70A7BD3327092A1200DBCCF7 /* hdr_encode.h in Headers */,
-				706EFD6A26D3473F001C950E /* eaplatform.h in Headers */,
-				706EFF0226D34740001C950E /* function.h in Headers */,
-				706EFD8226D3473F001C950E /* lru_cache.h in Headers */,
 				708A6AA12708CE4700BA5410 /* bc6h_definitions.h in Headers */,
-				706EFF7026D34740001C950E /* variant.h in Headers */,
-				706EFDBE26D3473F001C950E /* function_detail.h in Headers */,
-				706EFDA626D3473F001C950E /* algorithm.h in Headers */,
 				706EF16E26D166C5001C950E /* basisu_containers.h in Headers */,
-				706EFD9026D3473F001C950E /* compressed_pair.h in Headers */,
 				706EF16F26D166C5001C950E /* basisu_containers_impl.h in Headers */,
-				706EFF6026D34740001C950E /* finally.h in Headers */,
 				706EF17026D166C5001C950E /* basisu_transcoder_internal.h in Headers */,
-				706EFF4226D34740001C950E /* fixed_vector.h in Headers */,
-				706EFD6C26D3473F001C950E /* eastdarg.h in Headers */,
-				706EFDCA26D3473F001C950E /* fill_help.h in Headers */,
 				706EF17126D166C5001C950E /* basisu_global_selector_cb.h in Headers */,
 				706EF17226D166C5001C950E /* basisu_transcoder_uastc.h in Headers */,
 				706EF17326D166C5001C950E /* basisu_global_selector_palette.h in Headers */,
-				706EFD7C26D3473F001C950E /* fixed_hash_set.h in Headers */,
-				706EFD9226D3473F001C950E /* intrusive_sdlist.h in Headers */,
-				706EFF5026D34740001C950E /* linked_array.h in Headers */,
 				706EF17426D166C5001C950E /* basisu.h in Headers */,
-				706EFD7226D3473F001C950E /* int128.h in Headers */,
-				706EFF6C26D34740001C950E /* set.h in Headers */,
-				706EFDC426D3473F001C950E /* type_fundamental.h in Headers */,
-				706EFDAE26D3473F001C950E /* numeric.h in Headers */,
-				706EFF5E26D34740001C950E /* hash_set.h in Headers */,
 				706EF17526D166C5001C950E /* basisu_file_headers.h in Headers */,
-				706EFF4026D34740001C950E /* vector_multiset.h in Headers */,
 				706EF17626D166C5001C950E /* miniz.h in Headers */,
 				706EF17726D166C5001C950E /* hedistance.h in Headers */,
 				706EF17826D166C5001C950E /* stb_rect_pack.h in Headers */,
-				706EFDC626D3473F001C950E /* char_traits.h in Headers */,
 				706EF17926D166C5001C950E /* KramZipHelper.h in Headers */,
-				706EFD7E26D3473F001C950E /* bitset.h in Headers */,
-				706EFF3C26D34740001C950E /* tuple.h in Headers */,
 				706EF17A26D166C5001C950E /* KramSDFMipper.h in Headers */,
 				706EF17B26D166C5001C950E /* sse2neon.h in Headers */,
 				706EF17C26D166C5001C950E /* KramConfig.h in Headers */,
-				706EFDB826D3473F001C950E /* type_compound.h in Headers */,
-				706EFF0C26D34740001C950E /* vector_multimap.h in Headers */,
-				706EFDA426D3473F001C950E /* hash_map.h in Headers */,
 				706EF17D26D166C5001C950E /* KramLog.h in Headers */,
-				706EFD8A26D3473F001C950E /* intrusive_slist.h in Headers */,
 				706EF17E26D166C5001C950E /* KramLib.h in Headers */,
 				706EF17F26D166C5001C950E /* KramVersion.h in Headers */,
-				706EFF1026D34740001C950E /* functional.h in Headers */,
-				706EFD6226D3473F001C950E /* eaunits.h in Headers */,
 				706EF18026D166C5001C950E /* KramImage.h in Headers */,
-				706EFF4626D34740001C950E /* span.h in Headers */,
-				706EFD9E26D3473F001C950E /* bitvector.h in Headers */,
-				706EFF6426D34740001C950E /* any.h in Headers */,
-				706EFDDE26D3473F001C950E /* smart_ptr.h in Headers */,
-				706EFDAA26D3473F001C950E /* map.h in Headers */,
-				706EFDC826D3473F001C950E /* copy_help.h in Headers */,
-				706EFF2426D34740001C950E /* allocator.h in Headers */,
 				706EF18126D166C5001C950E /* win_mmap.h in Headers */,
-				706EFF1C26D34740001C950E /* fixed_map.h in Headers */,
 				706EF18226D166C5001C950E /* Kram.h in Headers */,
-				706EFF7226D34740001C950E /* atomic.h in Headers */,
-				706EFF2A26D34740001C950E /* array.h in Headers */,
 				706EF18326D166C5001C950E /* KTXImage.h in Headers */,
-				706EFDD226D3473F001C950E /* in_place_t.h in Headers */,
-				706EFF4E26D34740001C950E /* core_allocator_adapter.h in Headers */,
 				706EF18426D166C5001C950E /* KramImageInfo.h in Headers */,
 				706EF18526D166C5001C950E /* KramTimer.h in Headers */,
-				706EFD7826D3473F001C950E /* scoped_ptr.h in Headers */,
 				706EF18626D166C5001C950E /* KramMmapHelper.h in Headers */,
-				706EFF6826D34740001C950E /* random.h in Headers */,
 				706EF18726D166C5001C950E /* float4a.h in Headers */,
-				706EFF3226D34740001C950E /* stack.h in Headers */,
 				706EF18826D166C5001C950E /* KramFileHelper.h in Headers */,
 				706EF18926D166C5001C950E /* KramMipper.h in Headers */,
 				706EF18A26D166C5001C950E /* TaskSystem.h in Headers */,
-				706EFD6826D3473F001C950E /* eacompiler.h in Headers */,
 				706EF18B26D166C5001C950E /* squish.h in Headers */,
-				706EFF5626D34740001C950E /* utility.h in Headers */,
 				706EF18C26D166C5001C950E /* clusterfit.h in Headers */,
 				706EF18D26D166C5001C950E /* colourfit.h in Headers */,
-				706EFEFC26D34740001C950E /* function_help.h in Headers */,
-				706EFD9A26D3473F001C950E /* fixed_function.h in Headers */,
 				706EF18E26D166C5001C950E /* alpha.h in Headers */,
 				708A6A992708CE4700BA5410 /* bc6h_decode.h in Headers */,
 				706EF18F26D166C5001C950E /* singlecolourfit.h in Headers */,
-				706EFF0626D34740001C950E /* intrusive_hash_map.h in Headers */,
 				706EF19026D166C5001C950E /* maths.h in Headers */,
 				706EF19126D166C5001C950E /* colourset.h in Headers */,
 				708A6AA52708CE4700BA5410 /* bc6h_utils.h in Headers */,
 				706EF19226D166C5001C950E /* colourblock.h in Headers */,
-				706EFDB026D3473F001C950E /* chrono.h in Headers */,
 				706EF19326D166C5001C950E /* rangefit.h in Headers */,
 				706EF19426D166C5001C950E /* zstd.h in Headers */,
-				706EFF6226D34740001C950E /* iterator.h in Headers */,
-				706EFF0826D34740001C950E /* fixed_allocator.h in Headers */,
-				706EFEFA26D34740001C950E /* mem_fn.h in Headers */,
 				706EF19526D166C5001C950E /* lodepng.h in Headers */,
-				706EFDB426D3473F001C950E /* linked_ptr.h in Headers */,
 				706EF19626D166C5001C950E /* tmpfileplus.h in Headers */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
@@ -1884,8 +1376,8 @@
 			buildConfigurationList = 706ECDE926D1577A001C950E /* Build configuration list for PBXNativeTarget "kram" */;
 			buildPhases = (
 				706ECDDA26D1577A001C950E /* Headers */,
-				706ECDDB26D1577A001C950E /* Sources */,
 				706ECDDC26D1577A001C950E /* Frameworks */,
+				706ECDDB26D1577A001C950E /* Sources */,
 			);
 			buildRules = (
 			);
@@ -2188,7 +1680,7 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
-				HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/eastl/include";
+				HEADER_SEARCH_PATHS = "";
 				IPHONEOS_DEPLOYMENT_TARGET = 14.1;
 				MACOSX_DEPLOYMENT_TARGET = 10.15;
 				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
@@ -2200,7 +1692,7 @@
 					"-DCOMPILE_ETCENC=1",
 					"-DCOMPILE_SQUISH=1",
 					"-DCOMPILE_BCENC=1",
-					"-DCOMPILE_EASTL=1",
+					"-DCOMPILE_EASTL=0",
 					"-DCOMPILE_COMP=1",
 					"-DCOMPILE_BASIS=1",
 					"-include",
@@ -2266,7 +1758,7 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
-				HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/eastl/include";
+				HEADER_SEARCH_PATHS = "";
 				IPHONEOS_DEPLOYMENT_TARGET = 14.1;
 				MACOSX_DEPLOYMENT_TARGET = 10.15;
 				MTL_ENABLE_DEBUG_INFO = NO;
@@ -2277,7 +1769,7 @@
 					"-DCOMPILE_ETCENC=1",
 					"-DCOMPILE_SQUISH=1",
 					"-DCOMPILE_BCENC=1",
-					"-DCOMPILE_EASTL=1",
+					"-DCOMPILE_EASTL=0",
 					"-DCOMPILE_COMP=1",
 					"-DCOMPILE_BASIS=1",
 					"-include",
diff --git a/build2/kram.xcworkspace/contents.xcworkspacedata b/build2/kram.xcworkspace/contents.xcworkspacedata
index 64369490..6c500c6d 100644
--- a/build2/kram.xcworkspace/contents.xcworkspacedata
+++ b/build2/kram.xcworkspace/contents.xcworkspacedata
@@ -1,6 +1,12 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <Workspace
    version = "1.0">
+   <FileRef
+      location = "group:../gtlf/GLTFMTL/GLTFMTL.xcodeproj">
+   </FileRef>
+   <FileRef
+      location = "group:../gtlf/GLTF/GLTF.xcodeproj">
+   </FileRef>
    <FileRef
       location = "group:kram.xcodeproj">
    </FileRef>
diff --git a/build2/kramv.xcodeproj/project.pbxproj b/build2/kramv.xcodeproj/project.pbxproj
index d0b37778..0d6ff913 100644
--- a/build2/kramv.xcodeproj/project.pbxproj
+++ b/build2/kramv.xcodeproj/project.pbxproj
@@ -21,6 +21,17 @@
 		706EF25526D17C85001C950E /* Metal.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF25426D17C85001C950E /* Metal.framework */; };
 		706EF25726D17C9D001C950E /* AppKit.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF25626D17C9D001C950E /* AppKit.framework */; };
 		706EF26726D17DFA001C950E /* libate.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF25926D17CAA001C950E /* libate.tbd */; };
+		70833665271575E50077BCB6 /* GLTF.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 70833664271575E50077BCB6 /* GLTF.framework */; };
+		70833666271575E50077BCB6 /* GLTF.framework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 70833664271575E50077BCB6 /* GLTF.framework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; };
+		70833669271575EA0077BCB6 /* GLTFMTL.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 70833668271575EA0077BCB6 /* GLTFMTL.framework */; };
+		7083366A271575EA0077BCB6 /* GLTFMTL.framework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 70833668271575EA0077BCB6 /* GLTFMTL.framework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; };
+		708D44CA272B022500783DCE /* pbr.metal in Sources */ = {isa = PBXBuildFile; fileRef = 708D44C6272B022500783DCE /* pbr.metal */; };
+		708D44CB272B022500783DCE /* hdr.metal in Sources */ = {isa = PBXBuildFile; fileRef = 708D44C7272B022500783DCE /* hdr.metal */; };
+		708D44CC272B022500783DCE /* skybox.metal in Sources */ = {isa = PBXBuildFile; fileRef = 708D44C8272B022500783DCE /* skybox.metal */; };
+		708D44CD272B022500783DCE /* brdf.metal in Sources */ = {isa = PBXBuildFile; fileRef = 708D44C9272B022500783DCE /* brdf.metal */; };
+		708D44D0272B03BF00783DCE /* pbr.txt in Resources */ = {isa = PBXBuildFile; fileRef = 708D44CF272B03BF00783DCE /* pbr.txt */; };
+		708D44D4272FA4C800783DCE /* tropical_beach.ktx in Resources */ = {isa = PBXBuildFile; fileRef = 708D44D2272FA4C800783DCE /* tropical_beach.ktx */; };
+		708D44D5272FA4C800783DCE /* piazza_san_marco.ktx in Resources */ = {isa = PBXBuildFile; fileRef = 708D44D3272FA4C800783DCE /* piazza_san_marco.ktx */; };
 		70E33EC826E536BF00CBA422 /* QuickLookThumbnailing.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 70E33EC726E536BF00CBA422 /* QuickLookThumbnailing.framework */; };
 		70E33ECA26E536BF00CBA422 /* Quartz.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 70E33EC926E536BF00CBA422 /* Quartz.framework */; };
 		70E33ECE26E536BF00CBA422 /* KramThumbnailProvider.mm in Sources */ = {isa = PBXBuildFile; fileRef = 70E33ECD26E536BF00CBA422 /* KramThumbnailProvider.mm */; };
@@ -58,6 +69,18 @@
 /* End PBXContainerItemProxy section */
 
 /* Begin PBXCopyFilesBuildPhase section */
+		70833667271575E50077BCB6 /* Embed Frameworks */ = {
+			isa = PBXCopyFilesBuildPhase;
+			buildActionMask = 2147483647;
+			dstPath = "";
+			dstSubfolderSpec = 10;
+			files = (
+				70833666271575E50077BCB6 /* GLTF.framework in Embed Frameworks */,
+				7083366A271575EA0077BCB6 /* GLTFMTL.framework in Embed Frameworks */,
+			);
+			name = "Embed Frameworks";
+			runOnlyForDeploymentPostprocessing = 0;
+		};
 		70E33ED426E536BF00CBA422 /* Embed App Extensions */ = {
 			isa = PBXCopyFilesBuildPhase;
 			buildActionMask = 2147483647;
@@ -94,6 +117,17 @@
 		706EF25426D17C85001C950E /* Metal.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Metal.framework; path = System/Library/Frameworks/Metal.framework; sourceTree = SDKROOT; };
 		706EF25626D17C9D001C950E /* AppKit.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = AppKit.framework; path = System/Library/Frameworks/AppKit.framework; sourceTree = SDKROOT; };
 		706EF25926D17CAA001C950E /* libate.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = libate.tbd; path = usr/lib/libate.tbd; sourceTree = SDKROOT; };
+		7083365D2715642C0077BCB6 /* GLTF.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = GLTF.framework; sourceTree = BUILT_PRODUCTS_DIR; };
+		70833661271564320077BCB6 /* GLTFMTL.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = GLTFMTL.framework; sourceTree = BUILT_PRODUCTS_DIR; };
+		70833664271575E50077BCB6 /* GLTF.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = GLTF.framework; sourceTree = BUILT_PRODUCTS_DIR; };
+		70833668271575EA0077BCB6 /* GLTFMTL.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = GLTFMTL.framework; sourceTree = BUILT_PRODUCTS_DIR; };
+		708D44C6272B022500783DCE /* pbr.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = pbr.metal; sourceTree = "<group>"; };
+		708D44C7272B022500783DCE /* hdr.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = hdr.metal; sourceTree = "<group>"; };
+		708D44C8272B022500783DCE /* skybox.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = skybox.metal; sourceTree = "<group>"; };
+		708D44C9272B022500783DCE /* brdf.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = brdf.metal; sourceTree = "<group>"; };
+		708D44CF272B03BF00783DCE /* pbr.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = pbr.txt; sourceTree = "<group>"; };
+		708D44D2272FA4C800783DCE /* tropical_beach.ktx */ = {isa = PBXFileReference; lastKnownFileType = file; path = tropical_beach.ktx; sourceTree = "<group>"; };
+		708D44D3272FA4C800783DCE /* piazza_san_marco.ktx */ = {isa = PBXFileReference; lastKnownFileType = file; path = piazza_san_marco.ktx; sourceTree = "<group>"; };
 		70E33EC626E536BF00CBA422 /* kram-thumb.appex */ = {isa = PBXFileReference; explicitFileType = "wrapper.app-extension"; includeInIndex = 0; path = "kram-thumb.appex"; sourceTree = BUILT_PRODUCTS_DIR; };
 		70E33EC726E536BF00CBA422 /* QuickLookThumbnailing.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = QuickLookThumbnailing.framework; path = System/Library/Frameworks/QuickLookThumbnailing.framework; sourceTree = SDKROOT; };
 		70E33EC926E536BF00CBA422 /* Quartz.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Quartz.framework; path = System/Library/Frameworks/Quartz.framework; sourceTree = SDKROOT; };
@@ -120,9 +154,11 @@
 				706EF25226D17C6F001C950E /* MetalKit.framework in Frameworks */,
 				706EF25526D17C85001C950E /* Metal.framework in Frameworks */,
 				706EF25726D17C9D001C950E /* AppKit.framework in Frameworks */,
+				70833669271575EA0077BCB6 /* GLTFMTL.framework in Frameworks */,
 				706EF26726D17DFA001C950E /* libate.tbd in Frameworks */,
 				706EF24F26D17C43001C950E /* Foundation.framework in Frameworks */,
 				706EF24926D17BC2001C950E /* libkram.a in Frameworks */,
+				70833665271575E50077BCB6 /* GLTF.framework in Frameworks */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@@ -158,6 +194,8 @@
 		706EF20626D17A26001C950E = {
 			isa = PBXGroup;
 			children = (
+				708D44D1272FA4C800783DCE /* Textures */,
+				708D44C5272B01E900783DCE /* Shaders */,
 				706EF22926D17A81001C950E /* kramv */,
 				70E33ECB26E536BF00CBA422 /* kram-thumb */,
 				70E33EE426E5478900CBA422 /* kram-preview */,
@@ -185,8 +223,6 @@
 				706EF23026D17A81001C950E /* KramLoader.mm */,
 				706EF23226D17A81001C950E /* KramRenderer.h */,
 				706EF23526D17A81001C950E /* KramLoader.h */,
-				706EF22E26D17A81001C950E /* KramShaders.h */,
-				706EF23626D17A81001C950E /* KramShaders.metal */,
 				706EF23726D17A81001C950E /* KramRenderer.mm */,
 				706EF22B26D17A81001C950E /* kramv.entitlements */,
 				706EF23126D17A81001C950E /* Assets.xcassets */,
@@ -200,6 +236,10 @@
 		706EF24726D17BC2001C950E /* Frameworks */ = {
 			isa = PBXGroup;
 			children = (
+				70833668271575EA0077BCB6 /* GLTFMTL.framework */,
+				70833664271575E50077BCB6 /* GLTF.framework */,
+				70833661271564320077BCB6 /* GLTFMTL.framework */,
+				7083365D2715642C0077BCB6 /* GLTF.framework */,
 				70E33EDC26E537AD00CBA422 /* Accelerate.framework */,
 				70E33EDA26E5379900CBA422 /* CoreGraphics.framework */,
 				706EF25926D17CAA001C950E /* libate.tbd */,
@@ -215,6 +255,31 @@
 			name = Frameworks;
 			sourceTree = "<group>";
 		};
+		708D44C5272B01E900783DCE /* Shaders */ = {
+			isa = PBXGroup;
+			children = (
+				706EF22E26D17A81001C950E /* KramShaders.h */,
+				706EF23626D17A81001C950E /* KramShaders.metal */,
+				708D44C9272B022500783DCE /* brdf.metal */,
+				708D44C7272B022500783DCE /* hdr.metal */,
+				708D44C6272B022500783DCE /* pbr.metal */,
+				708D44CF272B03BF00783DCE /* pbr.txt */,
+				708D44C8272B022500783DCE /* skybox.metal */,
+			);
+			name = Shaders;
+			path = ../kramv/Shaders;
+			sourceTree = "<group>";
+		};
+		708D44D1272FA4C800783DCE /* Textures */ = {
+			isa = PBXGroup;
+			children = (
+				708D44D2272FA4C800783DCE /* tropical_beach.ktx */,
+				708D44D3272FA4C800783DCE /* piazza_san_marco.ktx */,
+			);
+			name = Textures;
+			path = ../kramv/Textures;
+			sourceTree = "<group>";
+		};
 		70E33ECB26E536BF00CBA422 /* kram-thumb */ = {
 			isa = PBXGroup;
 			children = (
@@ -251,6 +316,7 @@
 				706EF20C26D17A26001C950E /* Frameworks */,
 				706EF20D26D17A26001C950E /* Resources */,
 				70E33ED426E536BF00CBA422 /* Embed App Extensions */,
+				70833667271575E50077BCB6 /* Embed Frameworks */,
 			);
 			buildRules = (
 			);
@@ -343,6 +409,9 @@
 			files = (
 				706EF23E26D17A81001C950E /* Main.storyboard in Resources */,
 				706EF23D26D17A81001C950E /* Assets.xcassets in Resources */,
+				708D44D0272B03BF00783DCE /* pbr.txt in Resources */,
+				708D44D4272FA4C800783DCE /* tropical_beach.ktx in Resources */,
+				708D44D5272FA4C800783DCE /* piazza_san_marco.ktx in Resources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@@ -373,6 +442,10 @@
 				706EF23926D17A81001C950E /* KramViewerMain.mm in Sources */,
 				706EF23B26D17A81001C950E /* KramViewerBase.cpp in Sources */,
 				706EF23F26D17A81001C950E /* KramShaders.metal in Sources */,
+				708D44CD272B022500783DCE /* brdf.metal in Sources */,
+				708D44CA272B022500783DCE /* pbr.metal in Sources */,
+				708D44CC272B022500783DCE /* skybox.metal in Sources */,
+				708D44CB272B022500783DCE /* hdr.metal in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@@ -479,20 +552,22 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
-				HEADER_SEARCH_PATHS = (
-					"$(PROJECT_DIR)/../libkram/kram",
-					"$(PROJECT_DIR)/../libkram/eastl/include",
-				);
+				HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram";
 				MACOSX_DEPLOYMENT_TARGET = 10.15;
 				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
 				MTL_FAST_MATH = YES;
 				MTL_LANGUAGE_REVISION = UseDeploymentTarget;
 				ONLY_ACTIVE_ARCH = YES;
 				OTHER_CFLAGS = (
-					"-DCOMPILE_EASTL=1",
+					"-DCOMPILE_EASTL=0",
 					"-include",
 					KramConfig.h,
 				);
+				OTHER_CPLUSPLUSFLAGS = (
+					"$(OTHER_CFLAGS)",
+					"-fcxx-modules",
+					"-fmodules",
+				);
 				SDKROOT = macosx;
 			};
 			name = Debug;
@@ -543,19 +618,21 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
-				HEADER_SEARCH_PATHS = (
-					"$(PROJECT_DIR)/../libkram/kram",
-					"$(PROJECT_DIR)/../libkram/eastl/include",
-				);
+				HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram";
 				MACOSX_DEPLOYMENT_TARGET = 10.15;
 				MTL_ENABLE_DEBUG_INFO = NO;
 				MTL_FAST_MATH = YES;
 				MTL_LANGUAGE_REVISION = UseDeploymentTarget;
 				OTHER_CFLAGS = (
-					"-DCOMPILE_EASTL=1",
+					"-DCOMPILE_EASTL=0",
 					"-include",
 					KramConfig.h,
 				);
+				OTHER_CPLUSPLUSFLAGS = (
+					"$(OTHER_CFLAGS)",
+					"-fcxx-modules",
+					"-fmodules",
+				);
 				SDKROOT = macosx;
 			};
 			name = Release;
diff --git a/gtlf/GLTF/GLTF.h b/gtlf/GLTF/GLTF.h
new file mode 100644
index 00000000..0be075b8
--- /dev/null
+++ b/gtlf/GLTF/GLTF.h
@@ -0,0 +1,53 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#include "TargetConditionals.h"
+
+#if TARGET_OS_OSX
+@import Cocoa;
+#elif TARGET_OS_IOS
+@import UIKit;
+#endif
+
+//! Project version number for GLTF.
+FOUNDATION_EXPORT double GLTFVersionNumber;
+
+//! Project version string for GLTF.
+FOUNDATION_EXPORT const unsigned char GLTFVersionString[];
+
+#import <GLTF/GLTFAccessor.h>
+#import <GLTF/GLTFAnimation.h>
+#import <GLTF/GLTFAsset.h>
+#import <GLTF/GLTFBinaryChunk.h>
+#import <GLTF/GLTFBuffer.h>
+#import <GLTF/GLTFBufferAllocator.h>
+#import <GLTF/GLTFBufferView.h>
+#import <GLTF/GLTFCamera.h>
+#import <GLTF/GLTFDefaultBufferAllocator.h>
+#import <GLTF/GLTFEnums.h>
+#import <GLTF/GLTFExtensionNames.h>
+#import <GLTF/GLTFImage.h>
+#import <GLTF/GLTFKHRLight.h>
+#import <GLTF/GLTFMaterial.h>
+#import <GLTF/GLTFMesh.h>
+#import <GLTF/GLTFNode.h>
+#import <GLTF/GLTFObject.h>
+#import <GLTF/GLTFScene.h>
+#import <GLTF/GLTFSkin.h>
+#import <GLTF/GLTFTexture.h>
+#import <GLTF/GLTFTextureSampler.h>
+#import <GLTF/GLTFVertexDescriptor.h>
+#import <GLTF/GLTFUtilities.h>
diff --git a/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj b/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj
new file mode 100644
index 00000000..06789696
--- /dev/null
+++ b/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj
@@ -0,0 +1,538 @@
+// !$*UTF8*$!
+{
+	archiveVersion = 1;
+	classes = {
+	};
+	objectVersion = 46;
+	objects = {
+
+/* Begin PBXBuildFile section */
+		83319299202589FC00B6C7E9 /* GLTFBinaryChunk.h in Headers */ = {isa = PBXBuildFile; fileRef = 83319297202589FC00B6C7E9 /* GLTFBinaryChunk.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		8331929C20258A4000B6C7E9 /* GLTFBinaryChunk.m in Sources */ = {isa = PBXBuildFile; fileRef = 8331929B20258A4000B6C7E9 /* GLTFBinaryChunk.m */; };
+		8331929F2025911D00B6C7E9 /* GLTFExtensionNames.m in Sources */ = {isa = PBXBuildFile; fileRef = 8331929E2025911D00B6C7E9 /* GLTFExtensionNames.m */; };
+		833192A4202591CC00B6C7E9 /* GLTFExtensionNames.h in Headers */ = {isa = PBXBuildFile; fileRef = 833192A22025916600B6C7E9 /* GLTFExtensionNames.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		83534F401FA284E10063B351 /* GLTFDefaultBufferAllocator.m in Sources */ = {isa = PBXBuildFile; fileRef = 83534F3E1FA284E10063B351 /* GLTFDefaultBufferAllocator.m */; };
+		837EEE461FA2B0C0004BA504 /* GLTFDefaultBufferAllocator.h in Headers */ = {isa = PBXBuildFile; fileRef = 837EEE441FA2B0C0004BA504 /* GLTFDefaultBufferAllocator.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		837EEE471FA2B0C0004BA504 /* GLTFVertexDescriptor.h in Headers */ = {isa = PBXBuildFile; fileRef = 837EEE451FA2B0C0004BA504 /* GLTFVertexDescriptor.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		837EEE491FA2B0DE004BA504 /* GLTFSkin.m in Sources */ = {isa = PBXBuildFile; fileRef = 837EEE481FA2B0DE004BA504 /* GLTFSkin.m */; };
+		8382500C2034E1A200D9811A /* GLTFKHRLight.h in Headers */ = {isa = PBXBuildFile; fileRef = 833192A32025916600B6C7E9 /* GLTFKHRLight.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		83C900931F981AEE003893A9 /* GLTFNodeVisitor.h in Headers */ = {isa = PBXBuildFile; fileRef = 83C900921F981AEE003893A9 /* GLTFNodeVisitor.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		83D6003B1F48C2FF00F71E0C /* Accelerate.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 83D6003A1F48C2FF00F71E0C /* Accelerate.framework */; };
+		83D600B91F4A195400F71E0C /* GLTFBufferAllocator.h in Headers */ = {isa = PBXBuildFile; fileRef = 83D600B81F4A195400F71E0C /* GLTFBufferAllocator.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		83D6FF7F1F48BBFA00F71E0C /* GLTFAccessor.h in Headers */ = {isa = PBXBuildFile; fileRef = 83D6FF551F48BBFA00F71E0C /* GLTFAccessor.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		83D6FF801F48BBFA00F71E0C /* GLTFAnimation.h in Headers */ = {isa = PBXBuildFile; fileRef = 83D6FF561F48BBFA00F71E0C /* GLTFAnimation.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		83D6FF811F48BBFA00F71E0C /* GLTFAsset.h in Headers */ = {isa = PBXBuildFile; fileRef = 83D6FF571F48BBFA00F71E0C /* GLTFAsset.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		83D6FF821F48BBFA00F71E0C /* GLTFBuffer.h in Headers */ = {isa = PBXBuildFile; fileRef = 83D6FF581F48BBFA00F71E0C /* GLTFBuffer.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		83D6FF831F48BBFA00F71E0C /* GLTFBufferView.h in Headers */ = {isa = PBXBuildFile; fileRef = 83D6FF591F48BBFA00F71E0C /* GLTFBufferView.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		83D6FF841F48BBFA00F71E0C /* GLTFCamera.h in Headers */ = {isa = PBXBuildFile; fileRef = 83D6FF5A1F48BBFA00F71E0C /* GLTFCamera.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		83D6FF851F48BBFA00F71E0C /* GLTFEnums.h in Headers */ = {isa = PBXBuildFile; fileRef = 83D6FF5B1F48BBFA00F71E0C /* GLTFEnums.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		83D6FF861F48BBFA00F71E0C /* GLTFImage.h in Headers */ = {isa = PBXBuildFile; fileRef = 83D6FF5C1F48BBFA00F71E0C /* GLTFImage.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		83D6FF871F48BBFA00F71E0C /* GLTFMaterial.h in Headers */ = {isa = PBXBuildFile; fileRef = 83D6FF5D1F48BBFA00F71E0C /* GLTFMaterial.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		83D6FF881F48BBFA00F71E0C /* GLTFMesh.h in Headers */ = {isa = PBXBuildFile; fileRef = 83D6FF5E1F48BBFA00F71E0C /* GLTFMesh.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		83D6FF891F48BBFA00F71E0C /* GLTFNode.h in Headers */ = {isa = PBXBuildFile; fileRef = 83D6FF5F1F48BBFA00F71E0C /* GLTFNode.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		83D6FF8A1F48BBFA00F71E0C /* GLTFObject.h in Headers */ = {isa = PBXBuildFile; fileRef = 83D6FF601F48BBFA00F71E0C /* GLTFObject.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		83D6FF8B1F48BBFA00F71E0C /* GLTFScene.h in Headers */ = {isa = PBXBuildFile; fileRef = 83D6FF611F48BBFA00F71E0C /* GLTFScene.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		83D6FF8C1F48BBFA00F71E0C /* GLTFSkin.h in Headers */ = {isa = PBXBuildFile; fileRef = 83D6FF621F48BBFA00F71E0C /* GLTFSkin.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		83D6FF8E1F48BBFA00F71E0C /* GLTFTexture.h in Headers */ = {isa = PBXBuildFile; fileRef = 83D6FF641F48BBFA00F71E0C /* GLTFTexture.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		83D6FF8F1F48BBFA00F71E0C /* GLTFTextureSampler.h in Headers */ = {isa = PBXBuildFile; fileRef = 83D6FF651F48BBFA00F71E0C /* GLTFTextureSampler.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		83D6FF901F48BBFA00F71E0C /* GLTFUtilities.h in Headers */ = {isa = PBXBuildFile; fileRef = 83D6FF661F48BBFA00F71E0C /* GLTFUtilities.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		83D6FF921F48BBFA00F71E0C /* GLTFKHRLight.m in Sources */ = {isa = PBXBuildFile; fileRef = 83D6FF6A1F48BBFA00F71E0C /* GLTFKHRLight.m */; };
+		83D6FF931F48BBFA00F71E0C /* GLTFAccessor.m in Sources */ = {isa = PBXBuildFile; fileRef = 83D6FF6B1F48BBFA00F71E0C /* GLTFAccessor.m */; };
+		83D6FF941F48BBFA00F71E0C /* GLTFAnimation.m in Sources */ = {isa = PBXBuildFile; fileRef = 83D6FF6C1F48BBFA00F71E0C /* GLTFAnimation.m */; };
+		83D6FF951F48BBFA00F71E0C /* GLTFAsset.m in Sources */ = {isa = PBXBuildFile; fileRef = 83D6FF6D1F48BBFA00F71E0C /* GLTFAsset.m */; };
+		83D6FF981F48BBFA00F71E0C /* GLTFBufferView.m in Sources */ = {isa = PBXBuildFile; fileRef = 83D6FF701F48BBFA00F71E0C /* GLTFBufferView.m */; };
+		83D6FF991F48BBFA00F71E0C /* GLTFCamera.m in Sources */ = {isa = PBXBuildFile; fileRef = 83D6FF711F48BBFA00F71E0C /* GLTFCamera.m */; };
+		83D6FF9A1F48BBFA00F71E0C /* GLTFImage.m in Sources */ = {isa = PBXBuildFile; fileRef = 83D6FF721F48BBFA00F71E0C /* GLTFImage.m */; };
+		83D6FF9B1F48BBFA00F71E0C /* GLTFMaterial.m in Sources */ = {isa = PBXBuildFile; fileRef = 83D6FF731F48BBFA00F71E0C /* GLTFMaterial.m */; };
+		83D6FF9C1F48BBFA00F71E0C /* GLTFMesh.m in Sources */ = {isa = PBXBuildFile; fileRef = 83D6FF741F48BBFA00F71E0C /* GLTFMesh.m */; };
+		83D6FF9D1F48BBFA00F71E0C /* GLTFNode.m in Sources */ = {isa = PBXBuildFile; fileRef = 83D6FF751F48BBFA00F71E0C /* GLTFNode.m */; };
+		83D6FF9E1F48BBFA00F71E0C /* GLTFObject.m in Sources */ = {isa = PBXBuildFile; fileRef = 83D6FF761F48BBFA00F71E0C /* GLTFObject.m */; };
+		83D6FF9F1F48BBFA00F71E0C /* GLTFScene.m in Sources */ = {isa = PBXBuildFile; fileRef = 83D6FF771F48BBFA00F71E0C /* GLTFScene.m */; };
+		83D6FFA01F48BBFA00F71E0C /* GLTFTexture.m in Sources */ = {isa = PBXBuildFile; fileRef = 83D6FF781F48BBFA00F71E0C /* GLTFTexture.m */; };
+		83D6FFA11F48BBFA00F71E0C /* GLTFTextureSampler.m in Sources */ = {isa = PBXBuildFile; fileRef = 83D6FF791F48BBFA00F71E0C /* GLTFTextureSampler.m */; };
+		83D6FFA21F48BBFA00F71E0C /* GLTFUtilities.m in Sources */ = {isa = PBXBuildFile; fileRef = 83D6FF7A1F48BBFA00F71E0C /* GLTFUtilities.m */; };
+		83D6FFA41F48BBFA00F71E0C /* GLTFVertexDescriptor.m in Sources */ = {isa = PBXBuildFile; fileRef = 83D6FF7C1F48BBFA00F71E0C /* GLTFVertexDescriptor.m */; };
+		83D6FFA51F48BBFA00F71E0C /* GLTF.h in Headers */ = {isa = PBXBuildFile; fileRef = 83D6FF7D1F48BBFA00F71E0C /* GLTF.h */; settings = {ATTRIBUTES = (Public, ); }; };
+/* End PBXBuildFile section */
+
+/* Begin PBXFileReference section */
+		83319297202589FC00B6C7E9 /* GLTFBinaryChunk.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = GLTFBinaryChunk.h; sourceTree = "<group>"; };
+		8331929B20258A4000B6C7E9 /* GLTFBinaryChunk.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = GLTFBinaryChunk.m; sourceTree = "<group>"; };
+		8331929E2025911D00B6C7E9 /* GLTFExtensionNames.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = GLTFExtensionNames.m; sourceTree = "<group>"; };
+		833192A22025916600B6C7E9 /* GLTFExtensionNames.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = GLTFExtensionNames.h; sourceTree = "<group>"; };
+		833192A32025916600B6C7E9 /* GLTFKHRLight.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = GLTFKHRLight.h; sourceTree = "<group>"; };
+		83534F3E1FA284E10063B351 /* GLTFDefaultBufferAllocator.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; name = GLTFDefaultBufferAllocator.m; path = Source/GLTFDefaultBufferAllocator.m; sourceTree = SOURCE_ROOT; };
+		837EEE441FA2B0C0004BA504 /* GLTFDefaultBufferAllocator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = GLTFDefaultBufferAllocator.h; path = Headers/GLTFDefaultBufferAllocator.h; sourceTree = SOURCE_ROOT; };
+		837EEE451FA2B0C0004BA504 /* GLTFVertexDescriptor.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = GLTFVertexDescriptor.h; path = Headers/GLTFVertexDescriptor.h; sourceTree = SOURCE_ROOT; };
+		837EEE481FA2B0DE004BA504 /* GLTFSkin.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = GLTFSkin.m; sourceTree = "<group>"; };
+		83C900921F981AEE003893A9 /* GLTFNodeVisitor.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = GLTFNodeVisitor.h; sourceTree = "<group>"; };
+		83D6003A1F48C2FF00F71E0C /* Accelerate.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Accelerate.framework; path = System/Library/Frameworks/Accelerate.framework; sourceTree = SDKROOT; };
+		83D600B81F4A195400F71E0C /* GLTFBufferAllocator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = GLTFBufferAllocator.h; sourceTree = "<group>"; };
+		83D6FF481F48BB3A00F71E0C /* GLTF.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = GLTF.framework; sourceTree = BUILT_PRODUCTS_DIR; };
+		83D6FF551F48BBFA00F71E0C /* GLTFAccessor.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = GLTFAccessor.h; sourceTree = "<group>"; };
+		83D6FF561F48BBFA00F71E0C /* GLTFAnimation.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = GLTFAnimation.h; sourceTree = "<group>"; };
+		83D6FF571F48BBFA00F71E0C /* GLTFAsset.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = GLTFAsset.h; sourceTree = "<group>"; };
+		83D6FF581F48BBFA00F71E0C /* GLTFBuffer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = GLTFBuffer.h; sourceTree = "<group>"; };
+		83D6FF591F48BBFA00F71E0C /* GLTFBufferView.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = GLTFBufferView.h; sourceTree = "<group>"; };
+		83D6FF5A1F48BBFA00F71E0C /* GLTFCamera.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = GLTFCamera.h; sourceTree = "<group>"; };
+		83D6FF5B1F48BBFA00F71E0C /* GLTFEnums.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = GLTFEnums.h; sourceTree = "<group>"; };
+		83D6FF5C1F48BBFA00F71E0C /* GLTFImage.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = GLTFImage.h; sourceTree = "<group>"; };
+		83D6FF5D1F48BBFA00F71E0C /* GLTFMaterial.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = GLTFMaterial.h; sourceTree = "<group>"; };
+		83D6FF5E1F48BBFA00F71E0C /* GLTFMesh.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = GLTFMesh.h; sourceTree = "<group>"; };
+		83D6FF5F1F48BBFA00F71E0C /* GLTFNode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = GLTFNode.h; sourceTree = "<group>"; };
+		83D6FF601F48BBFA00F71E0C /* GLTFObject.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = GLTFObject.h; sourceTree = "<group>"; };
+		83D6FF611F48BBFA00F71E0C /* GLTFScene.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = GLTFScene.h; sourceTree = "<group>"; };
+		83D6FF621F48BBFA00F71E0C /* GLTFSkin.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = GLTFSkin.h; sourceTree = "<group>"; };
+		83D6FF641F48BBFA00F71E0C /* GLTFTexture.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = GLTFTexture.h; sourceTree = "<group>"; };
+		83D6FF651F48BBFA00F71E0C /* GLTFTextureSampler.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = GLTFTextureSampler.h; sourceTree = "<group>"; };
+		83D6FF661F48BBFA00F71E0C /* GLTFUtilities.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = GLTFUtilities.h; sourceTree = "<group>"; };
+		83D6FF6A1F48BBFA00F71E0C /* GLTFKHRLight.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = GLTFKHRLight.m; sourceTree = "<group>"; };
+		83D6FF6B1F48BBFA00F71E0C /* GLTFAccessor.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = GLTFAccessor.m; sourceTree = "<group>"; };
+		83D6FF6C1F48BBFA00F71E0C /* GLTFAnimation.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = GLTFAnimation.m; sourceTree = "<group>"; };
+		83D6FF6D1F48BBFA00F71E0C /* GLTFAsset.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = GLTFAsset.m; sourceTree = "<group>"; };
+		83D6FF701F48BBFA00F71E0C /* GLTFBufferView.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = GLTFBufferView.m; sourceTree = "<group>"; };
+		83D6FF711F48BBFA00F71E0C /* GLTFCamera.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = GLTFCamera.m; sourceTree = "<group>"; };
+		83D6FF721F48BBFA00F71E0C /* GLTFImage.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = GLTFImage.m; sourceTree = "<group>"; };
+		83D6FF731F48BBFA00F71E0C /* GLTFMaterial.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = GLTFMaterial.m; sourceTree = "<group>"; };
+		83D6FF741F48BBFA00F71E0C /* GLTFMesh.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = GLTFMesh.m; sourceTree = "<group>"; };
+		83D6FF751F48BBFA00F71E0C /* GLTFNode.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = GLTFNode.m; sourceTree = "<group>"; };
+		83D6FF761F48BBFA00F71E0C /* GLTFObject.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = GLTFObject.m; sourceTree = "<group>"; };
+		83D6FF771F48BBFA00F71E0C /* GLTFScene.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = GLTFScene.m; sourceTree = "<group>"; };
+		83D6FF781F48BBFA00F71E0C /* GLTFTexture.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = GLTFTexture.m; sourceTree = "<group>"; };
+		83D6FF791F48BBFA00F71E0C /* GLTFTextureSampler.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = GLTFTextureSampler.m; sourceTree = "<group>"; };
+		83D6FF7A1F48BBFA00F71E0C /* GLTFUtilities.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = GLTFUtilities.m; sourceTree = "<group>"; };
+		83D6FF7C1F48BBFA00F71E0C /* GLTFVertexDescriptor.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = GLTFVertexDescriptor.m; sourceTree = "<group>"; };
+		83D6FF7D1F48BBFA00F71E0C /* GLTF.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = GLTF.h; sourceTree = SOURCE_ROOT; };
+		83D6FF7E1F48BBFA00F71E0C /* Info.plist */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = SOURCE_ROOT; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+		83D6FF441F48BB3A00F71E0C /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				83D6003B1F48C2FF00F71E0C /* Accelerate.framework in Frameworks */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+		833192A12025916600B6C7E9 /* Extensions */ = {
+			isa = PBXGroup;
+			children = (
+				833192A22025916600B6C7E9 /* GLTFExtensionNames.h */,
+				833192A32025916600B6C7E9 /* GLTFKHRLight.h */,
+			);
+			path = Extensions;
+			sourceTree = "<group>";
+		};
+		83D600391F48C2FF00F71E0C /* Frameworks */ = {
+			isa = PBXGroup;
+			children = (
+				83D6003A1F48C2FF00F71E0C /* Accelerate.framework */,
+			);
+			name = Frameworks;
+			sourceTree = "<group>";
+		};
+		83D6FF3E1F48BB3A00F71E0C = {
+			isa = PBXGroup;
+			children = (
+				83D6FF541F48BBFA00F71E0C /* Headers */,
+				83D6FF671F48BBFA00F71E0C /* Source */,
+				83D6FF7D1F48BBFA00F71E0C /* GLTF.h */,
+				83D6FF7E1F48BBFA00F71E0C /* Info.plist */,
+				83D6FF491F48BB3A00F71E0C /* Products */,
+				83D600391F48C2FF00F71E0C /* Frameworks */,
+			);
+			sourceTree = "<group>";
+		};
+		83D6FF491F48BB3A00F71E0C /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				83D6FF481F48BB3A00F71E0C /* GLTF.framework */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
+		83D6FF541F48BBFA00F71E0C /* Headers */ = {
+			isa = PBXGroup;
+			children = (
+				833192A12025916600B6C7E9 /* Extensions */,
+				83D6FF551F48BBFA00F71E0C /* GLTFAccessor.h */,
+				83D6FF561F48BBFA00F71E0C /* GLTFAnimation.h */,
+				83D6FF571F48BBFA00F71E0C /* GLTFAsset.h */,
+				83319297202589FC00B6C7E9 /* GLTFBinaryChunk.h */,
+				83D6FF581F48BBFA00F71E0C /* GLTFBuffer.h */,
+				83D600B81F4A195400F71E0C /* GLTFBufferAllocator.h */,
+				83D6FF591F48BBFA00F71E0C /* GLTFBufferView.h */,
+				83D6FF5A1F48BBFA00F71E0C /* GLTFCamera.h */,
+				837EEE441FA2B0C0004BA504 /* GLTFDefaultBufferAllocator.h */,
+				83D6FF5B1F48BBFA00F71E0C /* GLTFEnums.h */,
+				83D6FF5C1F48BBFA00F71E0C /* GLTFImage.h */,
+				83D6FF5D1F48BBFA00F71E0C /* GLTFMaterial.h */,
+				83D6FF5E1F48BBFA00F71E0C /* GLTFMesh.h */,
+				83D6FF5F1F48BBFA00F71E0C /* GLTFNode.h */,
+				83C900921F981AEE003893A9 /* GLTFNodeVisitor.h */,
+				83D6FF601F48BBFA00F71E0C /* GLTFObject.h */,
+				83D6FF611F48BBFA00F71E0C /* GLTFScene.h */,
+				83D6FF621F48BBFA00F71E0C /* GLTFSkin.h */,
+				83D6FF641F48BBFA00F71E0C /* GLTFTexture.h */,
+				83D6FF651F48BBFA00F71E0C /* GLTFTextureSampler.h */,
+				83D6FF661F48BBFA00F71E0C /* GLTFUtilities.h */,
+				837EEE451FA2B0C0004BA504 /* GLTFVertexDescriptor.h */,
+			);
+			path = Headers;
+			sourceTree = SOURCE_ROOT;
+		};
+		83D6FF671F48BBFA00F71E0C /* Source */ = {
+			isa = PBXGroup;
+			children = (
+				83D6FF681F48BBFA00F71E0C /* Extensions */,
+				83D6FF6B1F48BBFA00F71E0C /* GLTFAccessor.m */,
+				83D6FF6C1F48BBFA00F71E0C /* GLTFAnimation.m */,
+				83D6FF6D1F48BBFA00F71E0C /* GLTFAsset.m */,
+				8331929B20258A4000B6C7E9 /* GLTFBinaryChunk.m */,
+				83D6FF701F48BBFA00F71E0C /* GLTFBufferView.m */,
+				83D6FF711F48BBFA00F71E0C /* GLTFCamera.m */,
+				83534F3E1FA284E10063B351 /* GLTFDefaultBufferAllocator.m */,
+				83D6FF721F48BBFA00F71E0C /* GLTFImage.m */,
+				83D6FF731F48BBFA00F71E0C /* GLTFMaterial.m */,
+				83D6FF741F48BBFA00F71E0C /* GLTFMesh.m */,
+				83D6FF751F48BBFA00F71E0C /* GLTFNode.m */,
+				83D6FF761F48BBFA00F71E0C /* GLTFObject.m */,
+				83D6FF771F48BBFA00F71E0C /* GLTFScene.m */,
+				837EEE481FA2B0DE004BA504 /* GLTFSkin.m */,
+				83D6FF781F48BBFA00F71E0C /* GLTFTexture.m */,
+				83D6FF791F48BBFA00F71E0C /* GLTFTextureSampler.m */,
+				83D6FF7A1F48BBFA00F71E0C /* GLTFUtilities.m */,
+				83D6FF7C1F48BBFA00F71E0C /* GLTFVertexDescriptor.m */,
+			);
+			path = Source;
+			sourceTree = SOURCE_ROOT;
+		};
+		83D6FF681F48BBFA00F71E0C /* Extensions */ = {
+			isa = PBXGroup;
+			children = (
+				8331929E2025911D00B6C7E9 /* GLTFExtensionNames.m */,
+				83D6FF6A1F48BBFA00F71E0C /* GLTFKHRLight.m */,
+			);
+			path = Extensions;
+			sourceTree = "<group>";
+		};
+/* End PBXGroup section */
+
+/* Begin PBXHeadersBuildPhase section */
+		83D6FF451F48BB3A00F71E0C /* Headers */ = {
+			isa = PBXHeadersBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				83D6FFA51F48BBFA00F71E0C /* GLTF.h in Headers */,
+				83D6FF7F1F48BBFA00F71E0C /* GLTFAccessor.h in Headers */,
+				83D6FF801F48BBFA00F71E0C /* GLTFAnimation.h in Headers */,
+				83319299202589FC00B6C7E9 /* GLTFBinaryChunk.h in Headers */,
+				83D6FF811F48BBFA00F71E0C /* GLTFAsset.h in Headers */,
+				83D6FF821F48BBFA00F71E0C /* GLTFBuffer.h in Headers */,
+				83D600B91F4A195400F71E0C /* GLTFBufferAllocator.h in Headers */,
+				83D6FF831F48BBFA00F71E0C /* GLTFBufferView.h in Headers */,
+				837EEE461FA2B0C0004BA504 /* GLTFDefaultBufferAllocator.h in Headers */,
+				83D6FF841F48BBFA00F71E0C /* GLTFCamera.h in Headers */,
+				83D6FF851F48BBFA00F71E0C /* GLTFEnums.h in Headers */,
+				83D6FF861F48BBFA00F71E0C /* GLTFImage.h in Headers */,
+				8382500C2034E1A200D9811A /* GLTFKHRLight.h in Headers */,
+				83D6FF871F48BBFA00F71E0C /* GLTFMaterial.h in Headers */,
+				83D6FF881F48BBFA00F71E0C /* GLTFMesh.h in Headers */,
+				83D6FF891F48BBFA00F71E0C /* GLTFNode.h in Headers */,
+				83C900931F981AEE003893A9 /* GLTFNodeVisitor.h in Headers */,
+				833192A4202591CC00B6C7E9 /* GLTFExtensionNames.h in Headers */,
+				83D6FF8A1F48BBFA00F71E0C /* GLTFObject.h in Headers */,
+				83D6FF8B1F48BBFA00F71E0C /* GLTFScene.h in Headers */,
+				83D6FF8C1F48BBFA00F71E0C /* GLTFSkin.h in Headers */,
+				83D6FF8E1F48BBFA00F71E0C /* GLTFTexture.h in Headers */,
+				83D6FF8F1F48BBFA00F71E0C /* GLTFTextureSampler.h in Headers */,
+				837EEE471FA2B0C0004BA504 /* GLTFVertexDescriptor.h in Headers */,
+				83D6FF901F48BBFA00F71E0C /* GLTFUtilities.h in Headers */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXHeadersBuildPhase section */
+
+/* Begin PBXNativeTarget section */
+		83D6FF471F48BB3A00F71E0C /* GLTF */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 83D6FF501F48BB3A00F71E0C /* Build configuration list for PBXNativeTarget "GLTF" */;
+			buildPhases = (
+				83D6FF431F48BB3A00F71E0C /* Sources */,
+				83D6FF441F48BB3A00F71E0C /* Frameworks */,
+				83D6FF451F48BB3A00F71E0C /* Headers */,
+				83D6FF461F48BB3A00F71E0C /* Resources */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = GLTF;
+			productName = GLTF;
+			productReference = 83D6FF481F48BB3A00F71E0C /* GLTF.framework */;
+			productType = "com.apple.product-type.framework";
+		};
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+		83D6FF3F1F48BB3A00F71E0C /* Project object */ = {
+			isa = PBXProject;
+			attributes = {
+				LastUpgradeCheck = 0930;
+				ORGANIZATIONNAME = "Warren Moore";
+				TargetAttributes = {
+					83D6FF471F48BB3A00F71E0C = {
+						CreatedOnToolsVersion = 8.3.3;
+						ProvisioningStyle = Automatic;
+					};
+				};
+			};
+			buildConfigurationList = 83D6FF421F48BB3A00F71E0C /* Build configuration list for PBXProject "GLTF" */;
+			compatibilityVersion = "Xcode 3.2";
+			developmentRegion = English;
+			hasScannedForEncodings = 0;
+			knownRegions = (
+				English,
+				en,
+			);
+			mainGroup = 83D6FF3E1F48BB3A00F71E0C;
+			productRefGroup = 83D6FF491F48BB3A00F71E0C /* Products */;
+			projectDirPath = "";
+			projectRoot = "";
+			targets = (
+				83D6FF471F48BB3A00F71E0C /* GLTF */,
+			);
+		};
+/* End PBXProject section */
+
+/* Begin PBXResourcesBuildPhase section */
+		83D6FF461F48BB3A00F71E0C /* Resources */ = {
+			isa = PBXResourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXResourcesBuildPhase section */
+
+/* Begin PBXSourcesBuildPhase section */
+		83D6FF431F48BB3A00F71E0C /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				83534F401FA284E10063B351 /* GLTFDefaultBufferAllocator.m in Sources */,
+				83D6FF9D1F48BBFA00F71E0C /* GLTFNode.m in Sources */,
+				83D6FFA11F48BBFA00F71E0C /* GLTFTextureSampler.m in Sources */,
+				83D6FF9A1F48BBFA00F71E0C /* GLTFImage.m in Sources */,
+				83D6FF9E1F48BBFA00F71E0C /* GLTFObject.m in Sources */,
+				83D6FF921F48BBFA00F71E0C /* GLTFKHRLight.m in Sources */,
+				83D6FF951F48BBFA00F71E0C /* GLTFAsset.m in Sources */,
+				83D6FFA21F48BBFA00F71E0C /* GLTFUtilities.m in Sources */,
+				83D6FF9F1F48BBFA00F71E0C /* GLTFScene.m in Sources */,
+				83D6FF9C1F48BBFA00F71E0C /* GLTFMesh.m in Sources */,
+				83D6FF931F48BBFA00F71E0C /* GLTFAccessor.m in Sources */,
+				837EEE491FA2B0DE004BA504 /* GLTFSkin.m in Sources */,
+				83D6FF991F48BBFA00F71E0C /* GLTFCamera.m in Sources */,
+				83D6FF981F48BBFA00F71E0C /* GLTFBufferView.m in Sources */,
+				8331929C20258A4000B6C7E9 /* GLTFBinaryChunk.m in Sources */,
+				83D6FF941F48BBFA00F71E0C /* GLTFAnimation.m in Sources */,
+				83D6FFA41F48BBFA00F71E0C /* GLTFVertexDescriptor.m in Sources */,
+				8331929F2025911D00B6C7E9 /* GLTFExtensionNames.m in Sources */,
+				83D6FF9B1F48BBFA00F71E0C /* GLTFMaterial.m in Sources */,
+				83D6FFA01F48BBFA00F71E0C /* GLTFTexture.m in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXSourcesBuildPhase section */
+
+/* Begin XCBuildConfiguration section */
+		83D6FF4E1F48BB3A00F71E0C /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_ANALYZER_NONNULL = YES;
+				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x";
+				CLANG_CXX_LIBRARY = "libc++";
+				CLANG_ENABLE_MODULES = YES;
+				CLANG_ENABLE_OBJC_ARC = YES;
+				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_COMMA = YES;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INFINITE_RECURSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = NO;
+				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+				CLANG_WARN_STRICT_PROTOTYPES = YES;
+				CLANG_WARN_SUSPICIOUS_MOVE = YES;
+				CLANG_WARN_UNREACHABLE_CODE = YES;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				CODE_SIGN_IDENTITY = "-";
+				COPY_PHASE_STRIP = NO;
+				CURRENT_PROJECT_VERSION = 1;
+				DEBUG_INFORMATION_FORMAT = dwarf;
+				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				ENABLE_TESTABILITY = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu99;
+				GCC_DYNAMIC_NO_PIC = NO;
+				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_OPTIMIZATION_LEVEL = 0;
+				GCC_PREPROCESSOR_DEFINITIONS = (
+					"DEBUG=1",
+					"$(inherited)",
+				);
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				IPHONEOS_DEPLOYMENT_TARGET = 8.0;
+				MACOSX_DEPLOYMENT_TARGET = 10.12;
+				MTL_ENABLE_DEBUG_INFO = YES;
+				ONLY_ACTIVE_ARCH = YES;
+				SDKROOT = macosx;
+				VERSIONING_SYSTEM = "apple-generic";
+				VERSION_INFO_PREFIX = "";
+			};
+			name = Debug;
+		};
+		83D6FF4F1F48BB3A00F71E0C /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_ANALYZER_NONNULL = YES;
+				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x";
+				CLANG_CXX_LIBRARY = "libc++";
+				CLANG_ENABLE_MODULES = YES;
+				CLANG_ENABLE_OBJC_ARC = YES;
+				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_COMMA = YES;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INFINITE_RECURSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = NO;
+				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+				CLANG_WARN_STRICT_PROTOTYPES = YES;
+				CLANG_WARN_SUSPICIOUS_MOVE = YES;
+				CLANG_WARN_UNREACHABLE_CODE = YES;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				CODE_SIGN_IDENTITY = "-";
+				COPY_PHASE_STRIP = NO;
+				CURRENT_PROJECT_VERSION = 1;
+				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+				ENABLE_NS_ASSERTIONS = NO;
+				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu99;
+				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				IPHONEOS_DEPLOYMENT_TARGET = 8.0;
+				MACOSX_DEPLOYMENT_TARGET = 10.12;
+				MTL_ENABLE_DEBUG_INFO = NO;
+				SDKROOT = macosx;
+				VERSIONING_SYSTEM = "apple-generic";
+				VERSION_INFO_PREFIX = "";
+			};
+			name = Release;
+		};
+		83D6FF511F48BB3A00F71E0C /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				CLANG_ENABLE_OBJC_WEAK = YES;
+				CODE_SIGN_IDENTITY = "";
+				COMBINE_HIDPI_IMAGES = YES;
+				DEFINES_MODULE = YES;
+				DEVELOPMENT_TEAM = "";
+				DYLIB_COMPATIBILITY_VERSION = 1;
+				DYLIB_CURRENT_VERSION = 1;
+				DYLIB_INSTALL_NAME_BASE = "@rpath";
+				FRAMEWORK_VERSION = A;
+				INFOPLIST_FILE = Info.plist;
+				INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks";
+				IPHONEOS_DEPLOYMENT_TARGET = 10.0;
+				LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/../Frameworks @loader_path/Frameworks";
+				PRODUCT_BUNDLE_IDENTIFIER = net.warrenmoore.GLTF;
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				SKIP_INSTALL = YES;
+				SUPPORTED_PLATFORMS = "macosx iphoneos iphonesimulator";
+				VALID_ARCHS = "i386 x86_64 arm64 armv7 armv7s";
+			};
+			name = Debug;
+		};
+		83D6FF521F48BB3A00F71E0C /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				CLANG_ENABLE_OBJC_WEAK = YES;
+				CODE_SIGN_IDENTITY = "";
+				COMBINE_HIDPI_IMAGES = YES;
+				DEFINES_MODULE = YES;
+				DEVELOPMENT_TEAM = "";
+				DYLIB_COMPATIBILITY_VERSION = 1;
+				DYLIB_CURRENT_VERSION = 1;
+				DYLIB_INSTALL_NAME_BASE = "@rpath";
+				FRAMEWORK_VERSION = A;
+				INFOPLIST_FILE = Info.plist;
+				INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks";
+				IPHONEOS_DEPLOYMENT_TARGET = 10.0;
+				LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/../Frameworks @loader_path/Frameworks";
+				PRODUCT_BUNDLE_IDENTIFIER = net.warrenmoore.GLTF;
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				SKIP_INSTALL = YES;
+				SUPPORTED_PLATFORMS = "macosx iphoneos iphonesimulator";
+				VALID_ARCHS = "i386 x86_64 arm64 armv7 armv7s";
+			};
+			name = Release;
+		};
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+		83D6FF421F48BB3A00F71E0C /* Build configuration list for PBXProject "GLTF" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				83D6FF4E1F48BB3A00F71E0C /* Debug */,
+				83D6FF4F1F48BB3A00F71E0C /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		83D6FF501F48BB3A00F71E0C /* Build configuration list for PBXNativeTarget "GLTF" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				83D6FF511F48BB3A00F71E0C /* Debug */,
+				83D6FF521F48BB3A00F71E0C /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+/* End XCConfigurationList section */
+	};
+	rootObject = 83D6FF3F1F48BB3A00F71E0C /* Project object */;
+}
diff --git a/gtlf/GLTF/Headers/Extensions/GLTFExtensionNames.h b/gtlf/GLTF/Headers/Extensions/GLTFExtensionNames.h
new file mode 100644
index 00000000..5f77b187
--- /dev/null
+++ b/gtlf/GLTF/Headers/Extensions/GLTFExtensionNames.h
@@ -0,0 +1,25 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+@import Foundation;
+
+extern NSString *const GLTFExtensionKHRMaterialsPBRSpecularGlossiness;
+extern NSString *const GLTFExtensionKHRLights;
+extern NSString *const GLTFExtensionKHRMaterialsUnlit;
+extern NSString *const GLTFExtensionKHRTextureTransform;
+extern NSString *const GLTFExtensionEXTPBRAttributes;
+extern NSString *const GLTFExtensionKHRDracoMeshCompression;
+extern NSString *const GLTFExtensionKHRMeshQuantization;
diff --git a/gtlf/GLTF/Headers/Extensions/GLTFKHRLight.h b/gtlf/GLTF/Headers/Extensions/GLTFKHRLight.h
new file mode 100644
index 00000000..d359064f
--- /dev/null
+++ b/gtlf/GLTF/Headers/Extensions/GLTFKHRLight.h
@@ -0,0 +1,53 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import "GLTFObject.h"
+
+@import simd;
+
+typedef NS_ENUM(NSInteger, GLTFKHRLightType) {
+    GLTFKHRLightTypeAmbient,
+    GLTFKHRLightTypeDirectional,
+    GLTFKHRLightTypePoint,
+    GLTFKHRLightTypeSpot,
+};
+
+@interface GLTFKHRLight : GLTFObject
+
+@property (nonatomic, assign) GLTFKHRLightType type;
+
+/// Color of light in a linear RGB color space
+@property (nonatomic, assign) simd_float4 color;
+
+/// Brightness of light. Point and spot lights use luminous intensity in candela (lm/sr),
+/// while directional lights use illuminance in lux (lm/m^2).
+@property (nonatomic, assign) float intensity;
+
+/// Distance threshold at which the light's intensity may be considered to have reached zero,
+/// expressed in meters. Default is 0, signifying effectively infinite range.
+@property (nonatomic, assign) float range;
+
+/// Angle, in radians, from the center of a spotlight to where falloff begins.
+/// Must be greater than or equal to 0, less than or equal to `outerConeAngle`,
+/// and less than pi / 2. Default value is 0.
+@property (nonatomic, assign) float innerConeAngle;
+
+/// Angle, in radians, from the center of a spotlight to where falloff ends.
+/// Must be greater than or equal to 0, greater than or equal to `innerConeAngle`,
+/// and less than pi / 2. Default value is pi / 4.
+@property (nonatomic, assign) float outerConeAngle;
+
+@end
diff --git a/gtlf/GLTF/Headers/GLTFAccessor.h b/gtlf/GLTF/Headers/GLTFAccessor.h
new file mode 100644
index 00000000..29ef9363
--- /dev/null
+++ b/gtlf/GLTF/Headers/GLTFAccessor.h
@@ -0,0 +1,40 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import "GLTFObject.h"
+#import "GLTFEnums.h"
+
+@import simd;
+
+NS_ASSUME_NONNULL_BEGIN
+
+@class GLTFBufferView;
+
+typedef struct {
+    float minValue[16];
+    float maxValue[16];
+} GLTFValueRange;
+
+@interface GLTFAccessor : GLTFObject
+@property (nonatomic, weak) GLTFBufferView *bufferView;
+@property (nonatomic, assign) GLTFDataType componentType;
+@property (nonatomic, assign) GLTFDataDimension dimension;
+@property (nonatomic, assign) NSInteger offset;
+@property (nonatomic, assign) NSInteger count;
+@property (nonatomic, assign) GLTFValueRange valueRange;
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/gtlf/GLTF/Headers/GLTFAnimation.h b/gtlf/GLTF/Headers/GLTFAnimation.h
new file mode 100644
index 00000000..b70ab091
--- /dev/null
+++ b/gtlf/GLTF/Headers/GLTFAnimation.h
@@ -0,0 +1,46 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import "GLTFObject.h"
+#import "GLTFEnums.h"
+
+NS_ASSUME_NONNULL_BEGIN
+
+@class GLTFNode, GLTFAccessor;
+
+@interface GLTFAnimation : GLTFObject
+@property (nonatomic, copy) NSArray *channels;
+@property (nonatomic, copy) NSDictionary *samplers;
+
+- (void)runAtTime:(NSTimeInterval)time;
+@end
+
+@interface GLTFAnimationSampler : GLTFObject
+@property (nonatomic, weak) GLTFAccessor *inputAccessor;
+@property (nonatomic, weak) GLTFAccessor *outputAccessor;
+@property (nonatomic, assign) GLTFInterpolationMode interpolationMode;
+@end
+
+@interface GLTFAnimationChannel : NSObject
+@property (nonatomic, weak) GLTFNode *targetNode;
+@property (nonatomic, weak) NSString *targetPath;
+@property (nonatomic, weak) GLTFAnimationSampler *sampler;
+@property (nonatomic, readonly, assign) NSTimeInterval duration;
+@property (nonatomic, readonly, assign) NSTimeInterval startTime;
+@property (nonatomic, readonly, assign) NSTimeInterval endTime;
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/gtlf/GLTF/Headers/GLTFAsset.h b/gtlf/GLTF/Headers/GLTFAsset.h
new file mode 100644
index 00000000..7ebcb3a7
--- /dev/null
+++ b/gtlf/GLTF/Headers/GLTFAsset.h
@@ -0,0 +1,66 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+@import Foundation;
+
+#import "GLTFObject.h"
+#import "GLTFEnums.h"
+
+NS_ASSUME_NONNULL_BEGIN
+
+@class GLTFAsset;
+@class GLTFScene, GLTFCamera, GLTFAnimation;
+@class GLTFKHRLight;
+@protocol GLTFBufferAllocator;
+
+@protocol GLTFAssetLoadingDelegate
+- (void)assetWithURL:(NSURL *)assetURL requiresContentsOfURL:(NSURL *)url completionHandler:(void (^)(NSData *_Nullable, NSError *_Nullable))completionHandler;
+- (void)assetWithURL:(NSURL *)assetURL didFinishLoading:(GLTFAsset *)asset;
+- (void)assetWithURL:(NSURL *)assetURL didFailToLoadWithError:(NSError *)error;
+@end
+
+@interface GLTFAsset : NSObject
+
+@property (nonatomic, readonly, strong) NSArray<GLTFScene *> *scenes;
+@property (nonatomic, readonly) GLTFScene * _Nullable defaultScene;
+
+@property (nonatomic, readonly, strong) NSArray<GLTFAnimation *> *animations;
+
+@property (nonatomic, readonly, strong) NSArray<GLTFKHRLight *> *lights;
+
+@property (nonatomic, readonly, strong) NSArray<GLTFCamera *> *cameras;
+
+@property (nonatomic, copy) NSString * _Nullable generator;
+@property (nonatomic, copy) NSString * _Nullable copyright;
+@property (nonatomic, copy) NSString * _Nullable formatVersion;
+
+@property (nonatomic, copy) NSArray<NSString *> *extensionsUsed;
+
+/// Load an asset asynchronously. The asset may either be a local asset or a remote asset; the provided
+/// delegate will receive callbacks requesting the contents of remote URLs referenced by the asset. These
+/// callbacks will occur on an arbitrary internal queue. 
++ (void)loadAssetWithURL:(NSURL *)url bufferAllocator:(id<GLTFBufferAllocator>)bufferAllocator delegate:(id<GLTFAssetLoadingDelegate>)delegate;
+
+/// Load a local asset. The provided URL must be a file URL, or else loading will fail.
+- (instancetype)initWithURL:(NSURL *)url bufferAllocator:(id<GLTFBufferAllocator>)bufferAllocator;
+
+- (void)addLight:(GLTFKHRLight *)light;
+
+- (void)addCamera:(GLTFCamera *)camera;
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/gtlf/GLTF/Headers/GLTFBinaryChunk.h b/gtlf/GLTF/Headers/GLTFBinaryChunk.h
new file mode 100644
index 00000000..27e821f2
--- /dev/null
+++ b/gtlf/GLTF/Headers/GLTFBinaryChunk.h
@@ -0,0 +1,35 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+@import Foundation;
+
+extern const UInt32 GLTFBinaryMagic;
+
+typedef NS_ENUM(NSInteger, GLTFChunkType) {
+    GLTFChunkTypeJSON   = 0x4E4F534A,
+    GLTFChunkTypeBinary = 0x004E4942
+};
+
+typedef struct {
+    UInt32 magic;
+    UInt32 version;
+    UInt32 length;
+} GLTFBinaryHeader;
+
+@interface GLTFBinaryChunk : NSObject
+@property (nonatomic, assign) GLTFChunkType chunkType;
+@property (nonatomic, strong) NSData *data;
+@end
diff --git a/gtlf/GLTF/Headers/GLTFBuffer.h b/gtlf/GLTF/Headers/GLTFBuffer.h
new file mode 100644
index 00000000..0be3d201
--- /dev/null
+++ b/gtlf/GLTF/Headers/GLTFBuffer.h
@@ -0,0 +1,30 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import "GLTFObject.h"
+
+NS_ASSUME_NONNULL_BEGIN
+
+@protocol GLTFBuffer <GLTFObject>
+
+@property (nonatomic, readonly) NSInteger length;
+
+/// The actual data associated with this buffer. If uri is a data URI, this is populated when loading; else it is nil
+@property (nonatomic, readonly) void *contents;
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/gtlf/GLTF/Headers/GLTFBufferAllocator.h b/gtlf/GLTF/Headers/GLTFBufferAllocator.h
new file mode 100644
index 00000000..585ae8a1
--- /dev/null
+++ b/gtlf/GLTF/Headers/GLTFBufferAllocator.h
@@ -0,0 +1,29 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import "GLTFBuffer.h"
+
+@import Foundation;
+
+NS_ASSUME_NONNULL_BEGIN
+
+@protocol GLTFBufferAllocator
++ (uint64_t)liveAllocationSize;
+- (id<GLTFBuffer>)newBufferWithLength:(NSInteger)length;
+- (id<GLTFBuffer>)newBufferWithData:(NSData *)data;
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/gtlf/GLTF/Headers/GLTFBufferView.h b/gtlf/GLTF/Headers/GLTFBufferView.h
new file mode 100644
index 00000000..2b41ec2f
--- /dev/null
+++ b/gtlf/GLTF/Headers/GLTFBufferView.h
@@ -0,0 +1,32 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import "GLTFObject.h"
+#import "GLTFEnums.h"
+
+NS_ASSUME_NONNULL_BEGIN
+
+@protocol GLTFBuffer;
+
+@interface GLTFBufferView : GLTFObject
+@property (nonatomic, weak) id<GLTFBuffer> buffer;
+@property (nonatomic, assign) GLTFTarget target;
+@property (nonatomic, assign) NSInteger length;
+@property (nonatomic, assign) NSInteger offset;
+@property (nonatomic, assign) NSInteger stride;
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/gtlf/GLTF/Headers/GLTFCamera.h b/gtlf/GLTF/Headers/GLTFCamera.h
new file mode 100644
index 00000000..2c771157
--- /dev/null
+++ b/gtlf/GLTF/Headers/GLTFCamera.h
@@ -0,0 +1,42 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import "GLTFObject.h"
+#import "GLTFEnums.h"
+
+@import simd;
+
+NS_ASSUME_NONNULL_BEGIN
+
+@class GLTFNode;
+
+@interface GLTFCamera : GLTFObject
+@property (nonatomic, assign) GLTFCameraType cameraType;
+
+@property (nonatomic, assign) float aspectRatio; // Only applicable when type is perspective
+@property (nonatomic, assign) float yfov;        // Only applicable when type is perspective
+@property (nonatomic, assign) float xmag;        // Only applicable when type is orthographic
+@property (nonatomic, assign) float ymag;        // Only applicable when type is orthographic
+@property (nonatomic, assign) float znear;
+@property (nonatomic, assign) float zfar;
+
+@property (nonatomic, assign) simd_float4x4 projectionMatrix;
+
+@property (nonatomic, copy) NSArray<GLTFNode *> *referencingNodes;
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/gtlf/GLTF/Headers/GLTFDefaultBufferAllocator.h b/gtlf/GLTF/Headers/GLTFDefaultBufferAllocator.h
new file mode 100644
index 00000000..dc0078f7
--- /dev/null
+++ b/gtlf/GLTF/Headers/GLTFDefaultBufferAllocator.h
@@ -0,0 +1,27 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+@import Foundation;
+
+#import "GLTFBufferAllocator.h"
+
+NS_ASSUME_NONNULL_BEGIN
+
+@interface GLTFDefaultBufferAllocator : NSObject <GLTFBufferAllocator>
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/gtlf/GLTF/Headers/GLTFEnums.h b/gtlf/GLTF/Headers/GLTFEnums.h
new file mode 100644
index 00000000..1ed2e8d7
--- /dev/null
+++ b/gtlf/GLTF/Headers/GLTFEnums.h
@@ -0,0 +1,178 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+@import Foundation;
+
+typedef NS_ENUM(NSInteger, GLTFDataType) {
+    GLTFBaseTypeUnknown,
+    GLTFDataTypeChar      = 0x1400,
+    GLTFDataTypeUChar     = 0x1401,
+    GLTFDataTypeShort     = 0x1402,
+    GLTFDataTypeUShort    = 0x1403,
+    GLTFDataTypeInt       = 0x1404,
+    GLTFDataTypeUInt      = 0x1405,
+    GLTFDataTypeFloat     = 0x1406,
+    
+    GLTFDataTypeFloat2    = 0x8B50,
+    GLTFDataTypeFloat3    = 0x8B51,
+    GLTFDataTypeFloat4    = 0x8B52,
+    GLTFDataTypeInt2      = 0x8B53,
+    GLTFDataTypeInt3      = 0x8B54,
+    GLTFDataTypeInt4      = 0x8B55,
+    GLTFDataTypeBool      = 0x8B56,
+    GLTFDataTypeBool2     = 0x8B57,
+    GLTFDataTypeBool3     = 0x8B58,
+    GLTFDataTypeBool4     = 0x8B59,
+    GLTFDataTypeFloat2x2  = 0x8B5A,
+    GLTFDataTypeFloat3x3  = 0x8B5B,
+    GLTFDataTypeFloat4x4  = 0x8B5C,
+    GLTFDataTypeSampler2D = 0x8B5E,
+};
+
+typedef NS_ENUM(NSInteger, GLTFDataDimension) {
+    GLTFDataDimensionUnknown,
+    GLTFDataDimensionScalar,
+    GLTFDataDimensionVector2,
+    GLTFDataDimensionVector3,
+    GLTFDataDimensionVector4,
+    GLTFDataDimensionMatrix2x2,
+    GLTFDataDimensionMatrix3x3,
+    GLTFDataDimensionMatrix4x4,
+};
+
+typedef NS_ENUM(NSInteger, GLTFTarget) {
+    GLTFTargetUnknown,
+    GLTFTargetArrayBuffer        = 0x8892,
+    GLTFTargetElementArrayBuffer = 0x8893,
+};
+
+typedef NS_ENUM(NSInteger, GLTFPrimitiveType) {
+    GLTFPrimitiveTypePoints,
+    GLTFPrimitiveTypeLines,
+    GLTFPrimitiveTypeLineLoop,
+    GLTFPrimitiveTypeLineStrip,
+    GLTFPrimitiveTypeTriangles,
+    GLTFPrimitiveTypeTriangleStrip,
+    GLTFPrimitiveTypeTriangleFan,
+};
+
+typedef NS_ENUM(NSInteger, GLTFCameraType) {
+    GLTFCameraTypePerspective,
+    GLTFCameraTypeOrthographic,
+};
+
+typedef NS_ENUM(NSInteger, GLTFTextureTarget) {
+    GLTFTextureTargetTexture2D = 0x0DE1,
+};
+
+typedef NS_ENUM(NSInteger, GLTFTextureFormat) {
+    GLTFTextureFormatUnknown,
+    GLTFTextureFormatAlpha          = 0x1906,
+    GLTFTextureFormatRGB            = 0x1907,
+    GLTFTextureFormatRGBA           = 0x1908,
+    GLTFTextureFormatLuminance      = 0x1909,
+    GLTFTextureFormatLuminanceAlpha = 0x190A,
+};
+
+typedef NS_ENUM(NSInteger, GLTFTextureType) {
+    GLTFTextureTypeUnknown,
+    GLTFTextureTypeUChar      = 0x1401,
+    GLTFTextureTypeUShort565  = 0x8363,
+    GLTFTextureTypeUShort4444 = 0x8033,
+    GLTFTextureTypeUShort5551 = 0x8034,
+};
+
+typedef NS_ENUM(NSInteger, GLTFShaderType) {
+    GLTFShaderTypeVertex   = 0x8B31,
+    GLTFShaderTypeFragment = 0x8B30,
+};
+
+typedef NS_ENUM(NSInteger, GLTFSamplingFilter) {
+    GLTFSamplingFilterUnknown,
+    GLTFSamplingFilterNearest           = 0x2600,
+    GLTFSamplingFilterLinear            = 0x2601,
+    GLTFSamplingFilterNearestMipNearest = 0x2700,
+    GLTFSamplingFilterLinearMipNearest  = 0x2701,
+    GLTFSamplingFilterNearestMipLinear  = 0x2702,
+    GLTFSamplingLinearMipLinear         = 0x2703,
+};
+
+typedef NS_ENUM(NSInteger, GLTFAddressMode) {
+    GLTFAddressModeUnknown,
+    GLTFAddressModeClampToEdge    = 0x812F,
+    GLTFAddressModeMirroredRepeat = 0x8370,
+    GLTFAddressModeRepeat         = 0x2901,
+};
+
+typedef NS_ENUM(NSInteger, GLTFComparisonFunc) {
+    GLTFComparisonFuncLess         = 0x0201,
+    GLTFComparisonFuncEqual        = 0x0202,
+    GLTFComparisonFuncLessEqual    = 0x0203,
+    GLTFComparisonFuncGreater      = 0x0204,
+    GLTFComparisonFuncNotEqual     = 0x0205,
+    GLTFComparisonFuncGreaterEqual = 0x0206,
+    GLTFComparisonFuncAlways       = 0x0207,
+};
+
+typedef NS_ENUM(NSInteger, GLTFFace) {
+    GLTFFaceFront        = 0x0404,
+    GLTFFaceBack         = 0x405,
+    GLTFFaceFrontAndBack = 0x408,
+};
+
+typedef NS_ENUM(NSInteger, GLTFWinding) {
+    GLTFWindingClockwise        = 0x900,
+    GLTFWindingCounterclockwise = 0x0901,
+};
+
+typedef NS_ENUM(NSInteger, GLTFState) {
+    GLTFStateBlendingEnabled          = 0x0BE2,
+    GLTFStateCullFaceEnabled          = 0x0B44,
+    GLTFStateDepthTestEnabled         = 0x0B71,
+    GLTFStatePolygonOffsetFillEnabled = 0x8037,
+    GLTFStateAlphaToCoverageEnabled   = 0x809E,
+    GLTFStateScissorTestEnabled       = 0x0C11,
+};
+
+typedef NS_ENUM(NSInteger, GLTFBlendFunction) {
+    GLTFBlendFunctionAdd             = 0x8006,
+    GLTFBlendFunctionSubtract        = 0x800A,
+    GLTFBlendFunctionReverseSubtract = 0x800B,
+};
+
+typedef NS_ENUM(NSInteger, GLTFBlendEquation) {
+    GLTFBlendEquationZero               = 0x0000,
+    GLTFBlendEquationOne                = 0x0001,
+    GLTFBlendEquationSrcColor           = 0x0300,
+    GLTFBlendEquationOneMinusSrcColor   = 0x0301,
+    GLTFBlendEquationSrcAlpha           = 0x0302,
+    GLTFBlendEquationOneMinusSrcAlpha   = 0x0303,
+    GLTFBlendEquationDestAlpha          = 0x0304,
+    GLTFBlendEquationOneMinusDestAlpha  = 0x0305,
+    GLTFBlendEquationDestColor          = 0x0306,
+    GLTFBlendEquationOneMinusDestColor  = 0x0307,
+    GLTFBlendEquationSrcAlphaSaturate   = 0x0308,
+    GLTFBlendEquationConstantColor      = 0x8001,
+    GLTFBlendEquationOneMinusConstColor = 0x8002,
+    GLTFBlendEquationConstantAlpha      = 0x8003,
+    GLTFBlendEquationOneMinusConstAlpha = 0x8004,
+};
+
+typedef NS_ENUM(NSInteger, GLTFInterpolationMode) {
+    GLTFInterpolationModeStep,
+    GLTFInterpolationModeLinear,
+    GLTFInterpolationModeCubic,
+};
diff --git a/gtlf/GLTF/Headers/GLTFImage.h b/gtlf/GLTF/Headers/GLTFImage.h
new file mode 100644
index 00000000..add922e6
--- /dev/null
+++ b/gtlf/GLTF/Headers/GLTFImage.h
@@ -0,0 +1,40 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import "GLTFObject.h"
+#import "GLTFBufferView.h"
+
+@import Foundation;
+
+NS_ASSUME_NONNULL_BEGIN
+
+@interface GLTFImage : GLTFObject
+
+/// A reference to a buffer view containing image data, if url is nil
+@property (nonatomic, strong) GLTFBufferView * _Nullable bufferView;
+
+/// The MIME type of the data contained in this image's buffer view
+@property (nonatomic, copy) NSString * _Nullable mimeType;
+
+/// A file URL, if the URI was not a decodable data-uri; otherwise nil
+@property (nonatomic, copy) NSURL * _Nullable url;
+
+/// A data object containing the data encoded in the image's data-uri, if present; otherwise nil
+@property (nonatomic, strong) NSData *imageData;
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/gtlf/GLTF/Headers/GLTFMaterial.h b/gtlf/GLTF/Headers/GLTFMaterial.h
new file mode 100644
index 00000000..17ec4198
--- /dev/null
+++ b/gtlf/GLTF/Headers/GLTFMaterial.h
@@ -0,0 +1,61 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import "GLTFObject.h"
+#import "GLTFTexture.h"
+
+@import simd;
+
+NS_ASSUME_NONNULL_BEGIN
+
+@class GLTFParameter;
+
+typedef NS_ENUM(NSInteger, GLTFAlphaMode) {
+    GLTFAlphaModeOpaque,
+    GLTFAlphaModeMask,
+    GLTFAlphaModeBlend,
+};
+
+@interface GLTFMaterial : GLTFObject
+
+@property (nonatomic, assign) simd_float4 baseColorFactor;
+@property (nonatomic, assign) float metalnessFactor;
+@property (nonatomic, assign) float roughnessFactor;
+@property (nonatomic, assign) float normalTextureScale;
+@property (nonatomic, assign) float occlusionStrength;
+@property (nonatomic, assign) simd_float3 emissiveFactor;
+
+@property (nonatomic, assign) float glossinessFactor; // Only used by KHR_materials_pbrSpecularGlossiness extension
+@property (nonatomic, assign) simd_float3 specularFactor; // Only used by KHR_materials_pbrSpecularGlossiness extension
+
+@property (nonatomic, strong) GLTFTextureInfo * _Nullable baseColorTexture;
+@property (nonatomic, strong) GLTFTextureInfo * _Nullable metallicRoughnessTexture;
+@property (nonatomic, strong) GLTFTextureInfo * _Nullable normalTexture;
+@property (nonatomic, strong) GLTFTextureInfo * _Nullable emissiveTexture;
+@property (nonatomic, strong) GLTFTextureInfo * _Nullable occlusionTexture;
+
+@property (nonatomic, assign) BOOL hasTextureTransforms; // Only used when KHR_texture_transform extension is present
+
+@property (nonatomic, assign, getter=isDoubleSided) BOOL doubleSided;
+
+@property (nonatomic, assign) GLTFAlphaMode alphaMode;
+@property (nonatomic, assign) float alphaCutoff; // Only used when `alphaMode` == GLTFAlphaModeMask
+
+@property (nonatomic, assign, getter=isUnlit) BOOL unlit; // Only used when KHR_materials_unlit extension is present
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/gtlf/GLTF/Headers/GLTFMesh.h b/gtlf/GLTF/Headers/GLTFMesh.h
new file mode 100644
index 00000000..3bf40f62
--- /dev/null
+++ b/gtlf/GLTF/Headers/GLTFMesh.h
@@ -0,0 +1,43 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import "GLTFObject.h"
+#import "GLTFEnums.h"
+
+NS_ASSUME_NONNULL_BEGIN
+
+@class GLTFAccessor, GLTFVertexDescriptor, GLTFSubmesh, GLTFMaterial;
+
+@interface GLTFMesh : GLTFObject
+@property (nonatomic, copy) NSArray<GLTFSubmesh *> *submeshes;
+@property (nonatomic, copy) NSArray<NSNumber *> *defaultMorphTargetWeights;
+@end
+
+@interface GLTFMorphTarget : GLTFObject
+@property (nonatomic, copy) NSDictionary<NSString *, GLTFAccessor *> *accessorsForAttributes;
+@end
+
+@interface GLTFSubmesh : GLTFObject
+@property (nonatomic, copy) NSDictionary<NSString *, GLTFAccessor *> *accessorsForAttributes;
+@property (nonatomic, weak) GLTFAccessor *indexAccessor;
+@property (nonatomic, weak) GLTFMaterial *material;
+@property (nonatomic, assign) GLTFPrimitiveType primitiveType;
+@property (nonatomic, copy) NSArray<GLTFMorphTarget *> *morphTargets;
+
+@property (nonatomic, readonly) GLTFVertexDescriptor *vertexDescriptor;
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/gtlf/GLTF/Headers/GLTFNode.h b/gtlf/GLTF/Headers/GLTFNode.h
new file mode 100644
index 00000000..f2df6512
--- /dev/null
+++ b/gtlf/GLTF/Headers/GLTFNode.h
@@ -0,0 +1,49 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import "GLTFObject.h"
+#import "GLTFUtilities.h"
+#import "GLTFNodeVisitor.h"
+
+NS_ASSUME_NONNULL_BEGIN
+
+@import simd;
+
+@class GLTFCamera, GLTFSkin, GLTFMesh;
+@class GLTFKHRLight;
+
+@interface GLTFNode : GLTFObject <GLTFNodeVisitable>
+@property (nonatomic, weak) GLTFCamera * _Nullable camera;
+@property (nonatomic, weak) GLTFKHRLight * _Nullable light;
+@property (nonatomic, weak) GLTFNode * _Nullable parent;
+@property (nonatomic, copy) NSArray<GLTFNode *> *children;
+@property (nonatomic, weak) GLTFSkin * _Nullable skin;
+@property (nonatomic, copy) NSString * _Nullable jointName;
+@property (nonatomic, weak) GLTFMesh * _Nullable mesh;
+@property (nonatomic, copy) NSArray<NSNumber *> *morphTargetWeights;
+@property (nonatomic, assign) GLTFQuaternion rotationQuaternion;
+@property (nonatomic, assign) simd_float3 scale;
+@property (nonatomic, assign) simd_float3 translation;
+@property (nonatomic, assign) simd_float4x4 localTransform;
+@property (nonatomic, readonly, assign) simd_float4x4 globalTransform;
+@property (nonatomic, readonly, assign) GLTFBoundingBox approximateBounds; // axis-aligned; in local coordinates
+
+- (void)addChildNode:(GLTFNode *)node;
+- (void)removeFromParent;
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/gtlf/GLTF/Headers/GLTFNodeVisitor.h b/gtlf/GLTF/Headers/GLTFNodeVisitor.h
new file mode 100644
index 00000000..36be13b8
--- /dev/null
+++ b/gtlf/GLTF/Headers/GLTFNodeVisitor.h
@@ -0,0 +1,33 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+@import Foundation;
+
+NS_ASSUME_NONNULL_BEGIN
+
+@class GLTFNode;
+
+typedef NS_ENUM(NSInteger, GLTFVisitationStrategy) {
+    GLTFVisitationStrategyDepthFirst
+};
+
+typedef void (^GLTFNodeVisitor)(GLTFNode *node, int depth, BOOL *stop);
+
+@protocol GLTFNodeVisitable <NSObject>
+- (void)acceptVisitor:(GLTFNodeVisitor)visitor strategy:(GLTFVisitationStrategy)strategy;
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/gtlf/GLTF/Headers/GLTFObject.h b/gtlf/GLTF/Headers/GLTFObject.h
new file mode 100644
index 00000000..9b7cc8de
--- /dev/null
+++ b/gtlf/GLTF/Headers/GLTFObject.h
@@ -0,0 +1,45 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+@import Foundation;
+
+NS_ASSUME_NONNULL_BEGIN
+
+@protocol GLTFObject
+
+/// The user-defined name of this object. Need not be unique.
+@property (nonatomic, copy) NSString * _Nullable name;
+/// Data specific to any extensions used in this document
+@property (nonatomic, copy) NSDictionary *extensions;
+/// Contains application-specific information that is passed through but not parsed
+@property (nonatomic, copy) NSDictionary *extras;
+
+@end
+
+@interface GLTFObject : NSObject <GLTFObject>
+
+/// A unique identifier for this object
+@property (nonatomic, readonly) NSUUID *identifier;
+/// The user-defined name of this object. Need not be unique.
+@property (nonatomic, copy) NSString * _Nullable name;
+/// Data specific to any extensions used in this document
+@property (nonatomic, copy) NSDictionary *extensions;
+/// Contains application-specific information that is passed through but not parsed
+@property (nonatomic, copy) NSDictionary *extras;
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/gtlf/GLTF/Headers/GLTFScene.h b/gtlf/GLTF/Headers/GLTFScene.h
new file mode 100644
index 00000000..cf5ae848
--- /dev/null
+++ b/gtlf/GLTF/Headers/GLTFScene.h
@@ -0,0 +1,35 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import "GLTFObject.h"
+#import "GLTFUtilities.h"
+#import "GLTFNodeVisitor.h"
+
+NS_ASSUME_NONNULL_BEGIN
+
+@class GLTFNode;
+@class GLTFKHRLight;
+
+@interface GLTFScene : GLTFObject <GLTFNodeVisitable>
+@property (nonatomic, copy) NSArray<GLTFNode *> *nodes;
+@property (nonatomic, weak) GLTFKHRLight * _Nullable ambientLight;
+@property (nonatomic, readonly, assign) GLTFBoundingBox approximateBounds;
+
+- (void)addNode:(GLTFNode *)node;
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/gtlf/GLTF/Headers/GLTFSkin.h b/gtlf/GLTF/Headers/GLTFSkin.h
new file mode 100644
index 00000000..da289182
--- /dev/null
+++ b/gtlf/GLTF/Headers/GLTFSkin.h
@@ -0,0 +1,30 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+@import Foundation;
+#import "GLTFObject.h"
+
+NS_ASSUME_NONNULL_BEGIN
+
+@class GLTFAccessor, GLTFNode;
+
+@interface GLTFSkin : GLTFObject
+@property (nonatomic, strong) GLTFAccessor *inverseBindMatricesAccessor;
+@property (nonatomic, copy) NSArray<GLTFNode *> *jointNodes;
+@property (nonatomic, assign) GLTFNode *skeletonRootNode;
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/gtlf/GLTF/Headers/GLTFTexture.h b/gtlf/GLTF/Headers/GLTFTexture.h
new file mode 100644
index 00000000..62a24bee
--- /dev/null
+++ b/gtlf/GLTF/Headers/GLTFTexture.h
@@ -0,0 +1,77 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import "GLTFObject.h"
+#import "GLTFEnums.h"
+
+@import simd;
+
+typedef struct {
+    simd_float2 offset;
+    simd_float2 scale;
+    float rotation;
+} GLTFTextureTransform;
+
+extern GLTFTextureTransform GLTFTextureTransformMakeIdentity(void);
+
+extern GLTFTextureTransform GLTFTextureTransformMakeSRT(simd_float2 scale, float rotation, simd_float2 offset);
+
+extern simd_float3x3 GLTFTextureMatrixFromTransform(GLTFTextureTransform transform);
+
+NS_ASSUME_NONNULL_BEGIN
+
+@class GLTFTextureSampler, GLTFImage;
+
+@interface GLTFTexture : GLTFObject
+
+@property (nonatomic, weak) GLTFTextureSampler *sampler;
+
+@property (nonatomic, weak) GLTFImage *image;
+
+// GLTFTextureFormatRGBA
+@property (nonatomic, assign) GLTFTextureFormat format;
+
+// GLTFTextureFormatRGBA
+@property (nonatomic, assign) GLTFTextureFormat internalFormat;
+
+// GLTFTextureTypeUChar
+@property (nonatomic, assign) GLTFTextureType type;
+
+// GLTFTextureTargetTexture2D
+@property (nonatomic, assign) GLTFTextureTarget target;
+
+@end
+
+@interface GLTFTextureInfo : NSObject
+
+@property (nonatomic, strong) GLTFTexture *texture;
+
+@property (nonatomic, assign) NSInteger texCoord;
+
+// The transform to apply to texture coordinates before sampling from this texture.
+// Defaults to the identity transform. Only populated if KHR_texture_transform is included
+// as an optional or required extension for the containing asset.
+@property (nonatomic, assign) GLTFTextureTransform transform;
+
+@property (nonatomic, strong) NSDictionary *_Nullable extensions;
+
+@property (nonatomic, strong) NSDictionary *_Nullable extras;
+
+@end
+
+
+NS_ASSUME_NONNULL_END
+
diff --git a/gtlf/GLTF/Headers/GLTFTextureSampler.h b/gtlf/GLTF/Headers/GLTFTextureSampler.h
new file mode 100644
index 00000000..a3b37249
--- /dev/null
+++ b/gtlf/GLTF/Headers/GLTFTextureSampler.h
@@ -0,0 +1,34 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import "GLTFObject.h"
+#import "GLTFEnums.h"
+
+NS_ASSUME_NONNULL_BEGIN
+
+@interface GLTFTextureSampler : GLTFObject <NSCopying>
+
+// GLTFSamplingFilterLinear
+@property (nonatomic, assign) GLTFSamplingFilter magFilter;
+// GLTFSamplingFilterNearestMipLinear
+@property (nonatomic, assign) GLTFSamplingFilter minFilter;
+// GLTFAddressModeRepeat
+@property (nonatomic, assign) GLTFAddressMode sAddressMode;
+// GLTFAddressModeRepeat
+@property (nonatomic, assign) GLTFAddressMode tAddressMode;
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/gtlf/GLTF/Headers/GLTFUtilities.h b/gtlf/GLTF/Headers/GLTFUtilities.h
new file mode 100644
index 00000000..22528ef4
--- /dev/null
+++ b/gtlf/GLTF/Headers/GLTFUtilities.h
@@ -0,0 +1,96 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import "GLTFEnums.h"
+
+@import Foundation;
+@import simd;
+
+NS_ASSUME_NONNULL_BEGIN
+
+typedef struct __attribute__((packed)) {
+    float x, y, z;
+} GLTFVector3;
+
+typedef struct __attribute__((packed)) {
+    float x, y, z, w;
+} GLTFVector4;
+
+typedef struct __attribute__((packed)) {
+    GLTFVector4 columns[4];
+} GLTFMatrix4;
+
+typedef simd_quatf GLTFQuaternion;
+
+typedef struct {
+    simd_float3 minPoint;
+    simd_float3 maxPoint;
+} GLTFBoundingBox;
+
+typedef struct {
+    simd_float3 center;
+    float radius;
+} GLTFBoundingSphere;
+
+extern bool GLTFBoundingBoxIsEmpty(GLTFBoundingBox b);
+
+extern GLTFBoundingBox *GLTFBoundingBoxUnion(GLTFBoundingBox *a, GLTFBoundingBox b);
+
+extern void GLTFBoundingBoxTransform(GLTFBoundingBox *b, simd_float4x4 transform);
+
+extern GLTFBoundingSphere GLTFBoundingSphereFromBox(const GLTFBoundingBox b);
+
+extern GLTFQuaternion GLTFQuaternionFromEulerAngles(float pitch, float yaw, float roll);
+
+extern simd_float4x4 GLTFMatrixFromUniformScale(float);
+
+extern simd_float4x4 GLTFMatrixFromScale(simd_float3);
+
+extern simd_float4x4 GLTFMatrixFromTranslation(simd_float3);
+
+extern simd_float4x4 GLTFRotationMatrixFromAxisAngle(simd_float3 axis, float angle);
+
+extern simd_float3x3 GLTFMatrixUpperLeft3x3(simd_float4x4);
+
+extern simd_float4x4 GLTFNormalMatrixFromModelMatrix(simd_float4x4);
+
+extern simd_float4x4 GLTFPerspectiveProjectionMatrixAspectFovRH(const float fovY, const float aspect, const float nearZ, const float farZ);
+
+extern simd_float3 GLTFAxisX;
+extern simd_float3 GLTFAxisY;
+extern simd_float3 GLTFAxisZ;
+
+extern GLTFDataDimension GLTFDataDimensionForName(NSString *name);
+
+extern size_t GLTFSizeOfDataType(GLTFDataType type);
+
+extern size_t GLTFSizeOfComponentTypeWithDimension(GLTFDataType baseType, GLTFDataDimension dimension);
+
+extern NSInteger GLTFComponentCountForDimension(GLTFDataDimension dimension);
+
+extern BOOL GLTFDataTypeComponentsAreFloats(GLTFDataType type);
+
+extern simd_float2 GLTFVectorFloat2FromArray(NSArray *array);
+
+extern simd_float3 GLTFVectorFloat3FromArray(NSArray *array);
+
+extern simd_float4 GLTFVectorFloat4FromArray(NSArray *array);
+
+extern GLTFQuaternion GLTFQuaternionFromArray(NSArray *array);
+
+extern simd_float4x4 GLTFMatrixFloat4x4FromArray(NSArray *array);
+
+NS_ASSUME_NONNULL_END
diff --git a/gtlf/GLTF/Headers/GLTFVertexDescriptor.h b/gtlf/GLTF/Headers/GLTFVertexDescriptor.h
new file mode 100644
index 00000000..a94f96fb
--- /dev/null
+++ b/gtlf/GLTF/Headers/GLTFVertexDescriptor.h
@@ -0,0 +1,54 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+@import Foundation;
+#import "GLTFEnums.h"
+
+NS_ASSUME_NONNULL_BEGIN
+
+extern const NSInteger GLTFVertexDescriptorMaxAttributeCount;
+extern const NSInteger GLTFVertexDescriptorMaxBufferLayoutCount;
+
+extern NSString *const GLTFAttributeSemanticPosition;
+extern NSString *const GLTFAttributeSemanticTangent;
+extern NSString *const GLTFAttributeSemanticNormal;
+extern NSString *const GLTFAttributeSemanticTexCoord0;
+extern NSString *const GLTFAttributeSemanticTexCoord1;
+extern NSString *const GLTFAttributeSemanticColor0;
+extern NSString *const GLTFAttributeSemanticJoints0;
+extern NSString *const GLTFAttributeSemanticJoints1;
+extern NSString *const GLTFAttributeSemanticWeights0;
+extern NSString *const GLTFAttributeSemanticWeights1;
+extern NSString *const GLTFAttributeSemanticRoughness;
+extern NSString *const GLTFAttributeSemanticMetallic;
+
+@interface GLTFVertexAttribute : NSObject
+@property (nonatomic, copy) NSString *semantic;
+@property (nonatomic, assign) GLTFDataType componentType;
+@property (nonatomic, assign) GLTFDataDimension dimension;
+@property (nonatomic, assign) NSInteger offset;
+@end
+
+@interface GLTFBufferLayout : NSObject
+@property (nonatomic, assign) NSInteger stride;
+@end
+
+@interface GLTFVertexDescriptor: NSObject
+@property (nonatomic, copy) NSArray<GLTFVertexAttribute *> *attributes;
+@property (nonatomic, copy) NSArray<GLTFBufferLayout *> *bufferLayouts;
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/gtlf/GLTF/Info.plist b/gtlf/GLTF/Info.plist
new file mode 100644
index 00000000..655e3f26
--- /dev/null
+++ b/gtlf/GLTF/Info.plist
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>CFBundleDevelopmentRegion</key>
+	<string>en</string>
+	<key>CFBundleExecutable</key>
+	<string>$(EXECUTABLE_NAME)</string>
+	<key>CFBundleIdentifier</key>
+	<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
+	<key>CFBundleInfoDictionaryVersion</key>
+	<string>6.0</string>
+	<key>CFBundleName</key>
+	<string>$(PRODUCT_NAME)</string>
+	<key>CFBundlePackageType</key>
+	<string>FMWK</string>
+	<key>CFBundleShortVersionString</key>
+	<string>1.0</string>
+	<key>CFBundleVersion</key>
+	<string>$(CURRENT_PROJECT_VERSION)</string>
+	<key>NSHumanReadableCopyright</key>
+	<string>Copyright © 2018 Warren Moore. All rights reserved.</string>
+	<key>NSPrincipalClass</key>
+	<string></string>
+</dict>
+</plist>
diff --git a/gtlf/GLTF/Source/Extensions/GLTFExtensionNames.m b/gtlf/GLTF/Source/Extensions/GLTFExtensionNames.m
new file mode 100644
index 00000000..29c08bc5
--- /dev/null
+++ b/gtlf/GLTF/Source/Extensions/GLTFExtensionNames.m
@@ -0,0 +1,25 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import "GLTFExtensionNames.h"
+
+NSString *const GLTFExtensionKHRMaterialsPBRSpecularGlossiness = @"KHR_materials_pbrSpecularGlossiness";
+NSString *const GLTFExtensionKHRLights = @"KHR_lights";
+NSString *const GLTFExtensionKHRMaterialsUnlit = @"KHR_materials_unlit";
+NSString *const GLTFExtensionKHRTextureTransform = @"KHR_texture_transform";
+NSString *const GLTFExtensionEXTPBRAttributes = @"EXT_pbr_attributes";
+NSString *const GLTFExtensionKHRDracoMeshCompression = @"KHR_draco_mesh_compression";
+NSString *const GLTFExtensionKHRMeshQuantization = @"KHR_mesh_quantization";
diff --git a/gtlf/GLTF/Source/Extensions/GLTFKHRLight.m b/gtlf/GLTF/Source/Extensions/GLTFKHRLight.m
new file mode 100644
index 00000000..eff3be6a
--- /dev/null
+++ b/gtlf/GLTF/Source/Extensions/GLTFKHRLight.m
@@ -0,0 +1,33 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import "GLTFKHRLight.h"
+
+@implementation GLTFKHRLight
+
+- (instancetype)init {
+    if ((self = [super init])) {
+        _type = GLTFKHRLightTypeDirectional;
+        _color = (simd_float4){ 1, 1, 1, 1 };
+        _intensity = 1;
+        _range = 0;
+        _innerConeAngle = 0;
+        _outerConeAngle = M_PI_4;
+    }
+    return self;
+}
+
+@end
diff --git a/gtlf/GLTF/Source/GLTFAccessor.m b/gtlf/GLTF/Source/GLTFAccessor.m
new file mode 100644
index 00000000..fdcc77e6
--- /dev/null
+++ b/gtlf/GLTF/Source/GLTFAccessor.m
@@ -0,0 +1,26 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import "GLTFAccessor.h"
+
+@implementation GLTFAccessor
+
+- (NSString *)description {
+    return [NSString stringWithFormat:@"GLTFAccessor: count: %d, component type: %d, dimension: %d, offset: %d, view: %@",
+            (int)self.count, (int)self.componentType, (int)self.dimension, (int)self.offset, self.bufferView];
+}
+
+@end
diff --git a/gtlf/GLTF/Source/GLTFAnimation.m b/gtlf/GLTF/Source/GLTFAnimation.m
new file mode 100644
index 00000000..7bde7e06
--- /dev/null
+++ b/gtlf/GLTF/Source/GLTFAnimation.m
@@ -0,0 +1,172 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import "GLTFAnimation.h"
+#import "GLTFAccessor.h"
+#import "GLTFBufferView.h"
+#import "GLTFBuffer.h"
+#import "GLTFNode.h"
+
+@implementation GLTFAnimationSampler
+
+- (NSString *)description {
+    return [NSString stringWithFormat:@"%@ interpolation: %d", super.description, (int)self.interpolationMode];
+}
+
+- (const void *)inputValues {
+    return [self.inputAccessor.bufferView.buffer contents] + self.inputAccessor.bufferView.offset + self.inputAccessor.offset;
+}
+
+- (const void *)outputValues {
+    return [self.outputAccessor.bufferView.buffer contents] + self.outputAccessor.bufferView.offset + self.outputAccessor.offset;
+}
+
+- (int)keyFrameCount {
+    return (int)self.inputAccessor.count;
+}
+
+@end
+
+@implementation GLTFAnimationChannel
+
+- (NSString *)description {
+    return [NSString stringWithFormat:@"%@ target: %@; path: %@; sampler: %@", super.description, self.targetNode, self.targetPath, self.sampler];
+}
+
+- (NSTimeInterval)startTime {
+    GLTFAnimationSampler *sampler = self.sampler;
+    const float *timeValues = sampler.inputValues;
+    float startTime = timeValues[0];
+    return startTime;
+}
+
+- (NSTimeInterval)endTime {
+    GLTFAnimationSampler *sampler = self.sampler;
+    const float *timeValues = sampler.inputValues;
+    int keyFrameCount = sampler.keyFrameCount;
+    float endTime = timeValues[keyFrameCount - 1];
+    return endTime;
+}
+
+- (NSTimeInterval)duration {
+    return self.endTime - self.startTime;
+}
+
+@end
+
+@implementation GLTFAnimation
+
+- (NSString *)description {
+    return [NSString stringWithFormat:@"%@ channels: %@", super.description, self.channels];
+}
+
+- (void)runAtTime:(NSTimeInterval)time {
+    for (GLTFAnimationChannel *channel in self.channels) {
+        GLTFNode *target = channel.targetNode;
+        NSString *path = channel.targetPath;
+        GLTFAnimationSampler *sampler = channel.sampler;
+        
+        int keyFrameCount = sampler.keyFrameCount;
+        
+        const float *timeValues = sampler.inputValues;
+        
+        float minTime = timeValues[0];
+        float maxTime = timeValues[keyFrameCount - 1];
+
+        if (time < minTime || time > maxTime) {
+            continue;
+        }
+        
+        int previousKeyFrame = 0, nextKeyFrame = 1;
+        while (timeValues[nextKeyFrame] < time) {
+            ++previousKeyFrame;
+            ++nextKeyFrame;
+        }
+        
+        if (previousKeyFrame >= keyFrameCount) {
+            previousKeyFrame = 0;
+        }
+        
+        if (nextKeyFrame >= keyFrameCount) {
+            nextKeyFrame = 0;
+        }
+        
+        float frameTimeDelta = timeValues[nextKeyFrame] - timeValues[previousKeyFrame];
+        float timeWithinFrame = time - timeValues[previousKeyFrame];
+        float frameProgress = timeWithinFrame / frameTimeDelta;
+        
+        if ([path isEqualToString:@"rotation"]) {
+            if(sampler.outputAccessor.componentType != GLTFDataTypeFloat) {
+                static dispatch_once_t floatRotationsNonce;
+                dispatch_once(&floatRotationsNonce, ^{
+                    NSLog(@"WARNING: Only float accessors are supported for rotation animations. This will only be reported once.");
+                });
+            }
+            const GLTFQuaternion *rotationValues = sampler.outputValues;
+            
+            GLTFQuaternion previousRotation = rotationValues[previousKeyFrame];
+            GLTFQuaternion nextRotation = rotationValues[nextKeyFrame];
+            GLTFQuaternion interpRotation = simd_slerp(previousRotation, nextRotation, frameProgress);
+
+            target.rotationQuaternion = interpRotation;
+        } else if ([path isEqualToString:@"translation"]) {
+            const GLTFVector3 *translationValues = sampler.outputValues;
+            
+            GLTFVector3 previousTranslation = translationValues[previousKeyFrame];
+            GLTFVector3 nextTranslation = translationValues[nextKeyFrame];
+            
+            GLTFVector3 interpTranslation = (GLTFVector3) {
+                ((1 - frameProgress) * previousTranslation.x) + (frameProgress * nextTranslation.x),
+                ((1 - frameProgress) * previousTranslation.y) + (frameProgress * nextTranslation.y),
+                ((1 - frameProgress) * previousTranslation.z) + (frameProgress * nextTranslation.z)
+            };
+
+            target.translation = (simd_float3){ interpTranslation.x, interpTranslation.y, interpTranslation.z };
+        } else if ([path isEqualToString:@"scale"]) {
+            const float *scaleValues = sampler.outputValues;
+            
+            float previousScale = scaleValues[previousKeyFrame];
+            float nextScale = scaleValues[nextKeyFrame];
+            
+            float interpScale = ((1 - frameProgress) * previousScale) + (frameProgress * nextScale);
+            
+            target.scale = (simd_float3)interpScale;
+        } else if ([path isEqualToString:@"weights"]) {
+            if(sampler.outputAccessor.componentType != GLTFDataTypeFloat) {
+                static dispatch_once_t floatWeightsNonce;
+                dispatch_once(&floatWeightsNonce, ^{
+                    NSLog(@"WARNING: Only scalar float accessors are supported for weight animations. This will only be reported once.");
+                });
+            }
+            const float *weightValues = sampler.outputValues;
+            
+            long weightCount = sampler.outputAccessor.count / keyFrameCount;
+            
+            const float *previousWeights = weightValues + (previousKeyFrame * weightCount);
+            const float *nextWeights = weightValues + (nextKeyFrame * weightCount);
+            
+            NSMutableArray *interpWeights = [NSMutableArray array];
+            for (int i = 0; i < weightCount; ++i) {
+                float interpWeight = ((1 - frameProgress) * previousWeights[i]) + (frameProgress * nextWeights[i]);
+                [interpWeights addObject:@(interpWeight)];
+            }
+
+            target.morphTargetWeights = [interpWeights copy];
+        }
+    }
+}
+
+@end
diff --git a/gtlf/GLTF/Source/GLTFAsset.m b/gtlf/GLTF/Source/GLTFAsset.m
new file mode 100644
index 00000000..eb61d2c1
--- /dev/null
+++ b/gtlf/GLTF/Source/GLTFAsset.m
@@ -0,0 +1,1296 @@
+
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import "GLTFAsset.h"
+#import "GLTFAnimation.h"
+#import "GLTFAccessor.h"
+#import "GLTFBinaryChunk.h"
+#import "GLTFBuffer.h"
+#import "GLTFBufferAllocator.h"
+#import "GLTFBufferView.h"
+#import "GLTFCamera.h"
+#import "GLTFExtensionNames.h"
+#import "GLTFImage.h"
+#import "GLTFKHRLight.h"
+#import "GLTFMaterial.h"
+#import "GLTFMesh.h"
+#import "GLTFNode.h"
+#import "GLTFTexture.h"
+#import "GLTFTextureSampler.h"
+#import "GLTFScene.h"
+#import "GLTFSkin.h"
+#import "GLTFUtilities.h"
+
+@import simd;
+
+#define USE_AGGRESSIVE_ALIGNMENT 0
+
+@interface GLTFAsset ()
+@property (nonatomic, strong) NSURL *url;
+@property (nonatomic, strong) id<GLTFBufferAllocator> bufferAllocator;
+@property (nonatomic, weak) id<GLTFAssetLoadingDelegate> delegate;
+@property (nonatomic, copy) NSArray<GLTFAccessor *> *accessors;
+@property (nonatomic, copy) NSArray<id<GLTFBuffer>> *buffers;
+@property (nonatomic, copy) NSArray<GLTFBufferView *> *bufferViews;
+@property (nonatomic, copy) NSArray<GLTFImage *> *images;
+@property (nonatomic, copy) NSArray<GLTFTextureSampler *> *samplers;
+@property (nonatomic, copy) NSArray<GLTFTexture *> *textures;
+@property (nonatomic, copy) NSArray<GLTFMesh *> *meshes;
+@property (nonatomic, copy) NSArray<GLTFMaterial *> *materials;
+@property (nonatomic, strong) NSMutableArray<GLTFKHRLight *> *lights;
+@property (nonatomic, strong) NSMutableArray<GLTFCamera *> *cameras;
+@property (nonatomic, copy) NSArray<GLTFNode *> *nodes;
+@property (nonatomic, strong) NSArray<GLTFAnimation *> *animations;
+@property (nonatomic, copy) NSArray<GLTFSkin *> *skins;
+@property (nonatomic, copy) NSArray<GLTFBinaryChunk *> *chunks;
+@property (nonatomic, strong) GLTFMaterial *defaultMaterial;
+@property (nonatomic, strong) GLTFTextureSampler *defaultSampler;
+@property (nonatomic, assign) BOOL usesPBRSpecularGlossiness;
+@property (nonatomic, assign) BOOL usesEXTPBRAttributes;
+@property (nonatomic, assign) BOOL usesKHRLights;
+@property (nonatomic, assign) BOOL usesKHRTextureTransform;
+@property (nonatomic, assign) BOOL usesKHRMaterialsUnlit;
+@property (nonatomic, assign) BOOL usesKHRDracoMeshCompression;
+@property (nonatomic, assign) BOOL usesKHRMeshQuantization;
+@property (nonatomic, assign) BOOL usesUnsupportedExtension;
+@end
+
+@implementation GLTFAsset
+
++ (dispatch_queue_t)assetLoaderQueue {
+    static dispatch_once_t onceToken;
+    static dispatch_queue_t _assetLoaderQueue;
+    dispatch_once(&onceToken, ^{
+        _assetLoaderQueue = dispatch_queue_create("net.warrenmoore.gltfkit.asset-loader-queue", DISPATCH_QUEUE_CONCURRENT);
+    });
+    return _assetLoaderQueue;
+}
+
++ (void)loadAssetWithURL:(NSURL *)url bufferAllocator:(id<GLTFBufferAllocator>)bufferAllocator delegate:(id<GLTFAssetLoadingDelegate>)delegate {
+    dispatch_async(GLTFAsset.assetLoaderQueue, ^{
+        (void)[[GLTFAsset alloc] _initWithURL:url bufferAllocator:bufferAllocator delegate:delegate];
+    });
+}
+
+- (instancetype)initWithURL:(NSURL *)url bufferAllocator:(id<GLTFBufferAllocator>)bufferAllocator {
+    return [self _initWithURL:url bufferAllocator:bufferAllocator delegate:nil];
+}
+
+- (instancetype)_initWithURL:(NSURL *)url bufferAllocator:(id<GLTFBufferAllocator>)bufferAllocator delegate:(id<GLTFAssetLoadingDelegate>)delegate {
+    if ((self = [super init])) {
+        _url = url;
+        _bufferAllocator = bufferAllocator;
+        _delegate = delegate;
+        NSError *error = nil;
+        if (![self loadWithError:&error]) {
+            [_delegate assetWithURL:_url didFailToLoadWithError:error];
+            return nil;
+        }
+    }
+    
+    [_delegate assetWithURL:_url didFinishLoading:self];
+    
+    return self;
+}
+
+- (void)addLight:(GLTFKHRLight *)light {
+    [_lights addObject:light];
+}
+
+- (void)addCamera:(GLTFCamera *)camera {
+    [_cameras addObject:camera];
+}
+
+- (NSData *)imageDataForDataURI:(NSString *)uriData {
+    NSString *prefix = @"data:";
+    if ([uriData hasPrefix:prefix]) {
+        NSInteger prefixEnd = prefix.length;
+        NSInteger firstComma = [uriData rangeOfString:@","].location;
+        if (firstComma != NSNotFound) {
+            NSString *mediaTypeAndTokenString = [uriData substringWithRange:NSMakeRange(prefixEnd, firstComma - prefixEnd)];
+            NSArray *mediaTypeAndToken = [mediaTypeAndTokenString componentsSeparatedByString:@";"];
+            if (mediaTypeAndToken.count > 0) {
+                NSString *encodedImageData = [uriData substringFromIndex:firstComma + 1];
+                NSData *imageData = [[NSData alloc] initWithBase64EncodedString:encodedImageData
+                                                                        options:NSDataBase64DecodingIgnoreUnknownCharacters];
+                return imageData;
+            }
+        }
+    }
+    return nil;
+}
+
+- (NSData *_Nullable)_contentsOfURL:(NSURL *)url error:(NSError **)error {
+    __block NSData *urlData = nil;
+    __block NSError *internalError = nil;
+    if (![_url isFileURL]) {
+        dispatch_semaphore_t loadingSemaphore = dispatch_semaphore_create(0);
+        [_delegate assetWithURL:_url requiresContentsOfURL:url completionHandler:^(NSData *_Nullable data, NSError *_Nullable responseError) {
+            urlData = data;
+            internalError = responseError;
+            dispatch_semaphore_signal(loadingSemaphore);
+        }];
+        dispatch_semaphore_wait(loadingSemaphore, DISPATCH_TIME_FOREVER);
+    } else {
+        urlData = [NSData dataWithContentsOfURL:url];
+    }
+    
+    if (internalError != nil && error != nil) {
+        *error = internalError;
+    }
+
+    return urlData;
+}
+
+- (BOOL)loadWithError:(NSError **)errorOrNil {
+    NSError *error = nil;
+    NSDictionary *rootObject = nil;
+    
+    NSData *assetData = [self _contentsOfURL:_url error:&error];
+    if (assetData == nil) {
+        return NO;
+    }
+    
+    if ([self assetIsGLB:assetData]) {
+        [self readBinaryChunks:assetData];
+        rootObject = [NSJSONSerialization JSONObjectWithData:_chunks.firstObject.data options:0 error:&error];
+    } else {
+        rootObject = [NSJSONSerialization JSONObjectWithData:assetData options:0 error:&error];
+    }
+    
+    if (!rootObject) {
+        if (errorOrNil) { *errorOrNil = error; }
+        return NO;
+    }
+    
+    _extensionsUsed = [rootObject[@"extensionsUsed"] ?: @[] copy];
+    
+    [self toggleExtensionFeatureFlags];
+
+    // Draco compression breaks deep in renderer trying to compile shaders
+    // that don't use float.  So reject if unsupported used.
+    if (_usesKHRDracoMeshCompression)
+        return NO;
+    else if (_usesKHRMeshQuantization)
+        return NO;
+    
+    // may be other unsupported extensions that break shaders
+    // but going to allow ones that do to pass.
+    
+    _defaultSampler =  [GLTFTextureSampler new];
+    
+    _defaultMaterial = [GLTFMaterial new];
+    
+    _lights = [NSMutableArray array];
+    
+    _cameras = [NSMutableArray array];
+    
+    // Since we aren't streaming, we have the properties for all objects in memory
+    // and we can load in the order that makes the least work for us, i.e. by
+    // reducing the number of name resolutions we have to do after we deserialize
+    // everything into glTF objects. The only object subgraph that can't be
+    // resolved by careful ordering of loading is the subgraph of nodes itself,
+    // which is stored unordered and may contain arbitrary node-node relationships.
+    // Therefore, we run a post-load fix-up pass to resolve all node graph edges
+    // into real object references. Refer to `fixNodeRelationships` below.
+    
+    [self loadAssetProperties:rootObject[@"asset"]];
+    [self loadBuffers:rootObject[@"buffers"]];
+    [self loadBufferViews:rootObject[@"bufferViews"]];
+    [self loadAccessors:rootObject[@"accessors"]];
+    [self loadSamplers:rootObject[@"samplers"]];
+    [self loadImages:rootObject[@"images"]];
+    [self loadTextures:rootObject[@"textures"]];
+    [self loadMaterials:rootObject[@"materials"]];
+    if (_usesKHRLights) {
+        NSDictionary *extensionProperties = rootObject[@"extensions"][GLTFExtensionKHRLights];
+        NSArray *lightsProperties = extensionProperties[@"lights"];
+        [self loadLights:lightsProperties];
+    }
+    [self loadCameras:rootObject[@"cameras"]];
+    [self loadSkins:rootObject[@"skins"]];
+    [self loadMeshes:rootObject[@"meshes"]];
+    [self loadNodes:rootObject[@"nodes"]];
+    [self loadAnimations:rootObject[@"animations"]];
+    [self loadScenes:rootObject[@"scenes"]];
+    [self loadDefaultScene:rootObject[@"scene"]];
+
+    return YES;
+}
+
+- (void)toggleExtensionFeatureFlags {
+    for (NSString *extension in _extensionsUsed) {
+        if ([extension isEqualToString:GLTFExtensionKHRMaterialsPBRSpecularGlossiness]) {
+            NSLog(@"WARNING: Extension \"%@\" is not fully supported", extension);
+            _usesPBRSpecularGlossiness = YES;
+        } else if ([extension isEqualToString:GLTFExtensionEXTPBRAttributes]) {
+            _usesEXTPBRAttributes = YES;
+        } else if ([extension isEqualToString:GLTFExtensionKHRLights]) {
+            _usesKHRLights = YES;
+        } else if ([extension isEqualToString:GLTFExtensionKHRMaterialsUnlit]) {
+            _usesKHRMaterialsUnlit = YES;
+        } else if ([extension isEqualToString:GLTFExtensionKHRTextureTransform]) {
+            _usesKHRTextureTransform = YES;
+        }
+        else if ([extension isEqualToString:GLTFExtensionKHRDracoMeshCompression]) {
+            NSLog(@"Error: Unsupported extension. Breaks shader compile.\"%@\" used", extension);
+            _usesKHRDracoMeshCompression = YES;
+        }
+        else if ([extension isEqualToString:GLTFExtensionKHRMeshQuantization]) {
+            NSLog(@"Error: Unsupported extension. Breaks shader compile. \"%@\" used", extension);
+            _usesKHRMeshQuantization = YES;
+        }
+        else {
+            NSLog(@"WARNING: Unsupported extension \"%@\" used", extension);
+            _usesUnsupportedExtension = YES;
+        }
+    }
+}
+
+- (BOOL)assetIsGLB:(NSData *)assetData {
+    if (assetData.length < sizeof(GLTFBinaryHeader)) {
+        return NO;
+    } else {
+        GLTFBinaryHeader header;
+        [assetData getBytes:&header length:sizeof(header)];
+        return (header.magic == GLTFBinaryMagic);
+    }
+}
+
+- (void)readBinaryChunks:(NSData *)assetData {
+    NSMutableArray *chunks = [NSMutableArray array];
+    
+    GLTFBinaryHeader header;
+    [assetData getBytes:&header length:sizeof(GLTFBinaryHeader)];
+    
+    NSInteger offset = sizeof(GLTFBinaryHeader);
+    while (offset < header.length && offset < assetData.length) {
+        GLTFBinaryChunk *chunk = [GLTFBinaryChunk new];
+        struct {
+            UInt32 length;
+            UInt32 type;
+        } chunkHeader;
+        
+        [assetData getBytes:&chunkHeader range:NSMakeRange(offset, sizeof(chunkHeader))];
+        
+        NSData *chunkData = [NSData dataWithBytesNoCopy:(void *)(assetData.bytes + offset + sizeof(chunkHeader))
+                                                 length:chunkHeader.length
+                                           freeWhenDone:NO];
+        chunk.data = chunkData;
+        chunk.chunkType = chunkHeader.type;
+        
+        [chunks addObject:chunk];
+        
+        offset += sizeof(chunkHeader) + chunkHeader.length;
+    }
+    
+    _chunks = [chunks copy];
+    
+    NSAssert(_chunks.firstObject.chunkType == GLTFChunkTypeJSON, @"First chunk in GLB file had type %u rather than expected %u",
+             (unsigned int)_chunks.firstObject.chunkType, (unsigned int)GLTFChunkTypeJSON);
+}
+
+- (BOOL)loadAssetProperties:(NSDictionary *)propertiesMap {
+    _generator = [propertiesMap[@"generator"] copy];
+    _copyright = [propertiesMap[@"copyright"] copy];
+    _formatVersion = propertiesMap[@"version"] ?: @"2.0";
+    return YES;
+}
+
+- (BOOL)loadAccessors:(NSArray *)accessorsMap {
+    if (accessorsMap.count == 0) {
+        _accessors = @[];
+        return YES;
+    }
+    
+    NSMutableArray *accessors = [NSMutableArray arrayWithCapacity:accessorsMap.count];
+    for (NSDictionary *properties in accessorsMap) {
+        GLTFAccessor *accessor = [[GLTFAccessor alloc] init];
+        accessor.componentType = [properties[@"componentType"] integerValue];
+        accessor.dimension = GLTFDataDimensionForName(properties[@"type"]);
+        accessor.offset = [properties[@"byteOffset"] integerValue];
+        accessor.count = [properties[@"count"] integerValue];
+        NSUInteger bufferViewIndex = [properties[@"bufferView"] intValue];
+        if (bufferViewIndex < _bufferViews.count) {
+            accessor.bufferView = _bufferViews[bufferViewIndex];
+#if USE_AGGRESSIVE_ALIGNMENT
+            size_t alignment = GLTFSizeOfComponentTypeWithDimension(accessor.componentType, accessor.dimension);
+#else
+            size_t alignment = GLTFSizeOfDataType(accessor.componentType);
+#endif
+            NSInteger dataOffset = accessor.offset + accessor.bufferView.offset;
+            if (dataOffset % alignment != 0) {
+                size_t elementSize = GLTFSizeOfComponentTypeWithDimension(accessor.componentType, accessor.dimension);
+                size_t length = accessor.count * elementSize;
+                NSLog(@"WARNING: Accessor had misaligned offset %d, which is not a multiple of %d. Building auxiliary buffer of length %d and continuing...",
+                      (int)dataOffset, (int)alignment, (int)length);
+                id<GLTFBuffer> buffer = [_bufferAllocator newBufferWithLength:length];
+                memcpy(buffer.contents, accessor.bufferView.buffer.contents + accessor.bufferView.offset + accessor.offset, buffer.length);
+                _buffers = [_buffers arrayByAddingObject:buffer];
+
+                GLTFBufferView *bufferView = [GLTFBufferView new];
+                bufferView.buffer = buffer;
+                bufferView.offset = 0;
+                bufferView.stride = 0;
+                _bufferViews = [_bufferViews arrayByAddingObject:bufferView];
+
+                accessor.bufferView = bufferView;
+                accessor.offset = 0;
+            }
+        }
+
+        __block GLTFValueRange valueRange = { 0 };
+        NSArray *minValues = properties[@"min"];
+        [minValues enumerateObjectsUsingBlock:^(NSNumber *num, NSUInteger index, BOOL *stop) {
+            valueRange.minValue[index] = num.floatValue;
+        }];
+        NSArray *maxValues = properties[@"max"];
+        [maxValues enumerateObjectsUsingBlock:^(NSNumber *num, NSUInteger index, BOOL *stop) {
+            valueRange.maxValue[index] = num.floatValue;
+        }];
+        accessor.valueRange = valueRange;
+        
+        [accessors addObject:accessor];
+    }
+    
+    _accessors = [accessors copy];
+    
+    return YES;
+}
+
+- (BOOL)loadBuffers:(NSArray *)buffersMap {
+    if (buffersMap.count == 0) {
+        _buffers = @[];
+    }
+    
+    NSMutableArray *buffers = [NSMutableArray arrayWithCapacity:buffersMap.count];
+    for (NSDictionary *properties in buffersMap) {
+        NSUInteger byteLength = [properties[@"byteLength"] integerValue];
+
+        NSString *uri = properties[@"uri"];
+        NSData *data = nil;
+        
+        if ([uri hasPrefix:@"data:"]) {
+            if ([uri hasPrefix:@"data:application/octet-stream;base64,"]) {
+                NSString *dataSubstring = [uri substringFromIndex:[@"data:application/octet-stream;base64," length]];
+                data = [[NSData alloc] initWithBase64EncodedString:dataSubstring options:0];
+            } else {
+                NSLog(@"WARNING: Encountered URL-encoded buffer that did not have the expected MIME type or encoding. Skipping...");
+                continue;
+            }
+        } else if (uri.length > 0) {
+            NSURL *bufferURL = [[_url URLByDeletingLastPathComponent] URLByAppendingPathComponent:uri];
+            NSError *error = nil;
+            data = [self _contentsOfURL:bufferURL error:&error];
+            NSAssert(data != nil, @"Unable to load data at URL %@; error %@", bufferURL, error);
+        } else if (_chunks.count > 1) {
+            data = _chunks[1].data;
+        } else {
+            NSLog(@"WARNING: Encountered buffer which was not URL-encoded, nor a file reference, nor a GLB chunk reference. Skipping...");
+            continue;
+        }
+        
+        id<GLTFBuffer> buffer = [_bufferAllocator newBufferWithData:data];
+        
+        if (byteLength != [buffer length]) {
+            NSLog(@"WARNING: Expected to load buffer of length %lu bytes; got %lu bytes", (unsigned long)byteLength, (unsigned long)[buffer length]);
+        }
+        [buffers addObject: buffer];
+    }
+    
+    _buffers = [buffers copy];
+    return YES;
+}
+
+- (BOOL)loadBufferViews:(NSArray *)bufferViewsMap {
+    if (bufferViewsMap.count == 0) {
+        _bufferViews = @[];
+    }
+    
+    NSMutableArray *bufferViews = [NSMutableArray arrayWithCapacity:bufferViewsMap.count];
+    [bufferViewsMap enumerateObjectsUsingBlock:^(NSDictionary *properties, NSUInteger index, BOOL *stop) {
+        
+        GLTFBufferView *bufferView = [[GLTFBufferView alloc] init];
+        NSUInteger bufferIndex = [properties[@"buffer"] intValue];
+        if (bufferIndex < _buffers.count) {
+            bufferView.buffer = _buffers[bufferIndex];
+        }
+        bufferView.length = [properties[@"byteLength"] integerValue];
+        bufferView.stride = [properties[@"byteStride"] integerValue];
+        bufferView.offset = [properties[@"byteOffset"] integerValue];
+        bufferView.target = [properties[@"target"] integerValue];
+
+//        if ((bufferView.buffer != nil) && (bufferView.offset % 16 != 0)) {
+//            NSLog(@"WARNING: Buffer view %d had misaligned offset of %d. Creating auxilliary buffer of length %d and continuing...",
+//                  (int)index, (int)bufferView.offset, (int)bufferView.length);
+//            id<GLTFBuffer> alignedBuffer = [_bufferAllocator newBufferWithLength:bufferView.length];
+//            _buffers = [_buffers arrayByAddingObject:alignedBuffer];
+//            memcpy([alignedBuffer contents], bufferView.buffer.contents + bufferView.offset, bufferView.length);
+//            bufferView.buffer = alignedBuffer;
+//            bufferView.offset = 0;
+//        }
+        
+        [bufferViews addObject: bufferView];
+    }];
+    
+    _bufferViews = [bufferViews copy];
+    return YES;
+}
+
+- (BOOL)loadSamplers:(NSArray *)samplersMap {
+    if (samplersMap.count == 0) {
+        _samplers = @[];
+    }
+
+    NSMutableArray *samplers = [NSMutableArray arrayWithCapacity:samplersMap.count];
+    for (NSDictionary *properties in samplersMap) {
+        GLTFTextureSampler *sampler = [[GLTFTextureSampler alloc] init];
+        sampler.minFilter = [properties[@"minFilter"] integerValue] ?: sampler.minFilter;
+        sampler.magFilter = [properties[@"magFilter"] integerValue] ?: sampler.magFilter;
+        sampler.sAddressMode = [properties[@"wrapS"] integerValue] ?: sampler.sAddressMode;
+        sampler.tAddressMode = [properties[@"wrapT"] integerValue] ?: sampler.tAddressMode;
+        sampler.name = properties[@"name"];
+        sampler.extensions = properties[@"extensions"];
+        sampler.extras = properties[@"extras"];
+
+        [samplers addObject:sampler];
+    }
+
+    _samplers = [samplers copy];
+    return YES;
+}
+
+- (BOOL)loadImages:(NSArray *)imagesMap {
+    if (imagesMap.count == 0) {
+        _images = @[];
+    }
+    
+    NSMutableArray *images = [NSMutableArray arrayWithCapacity:imagesMap.count];
+    for (NSDictionary *properties in imagesMap) {
+        GLTFImage *image = [[GLTFImage alloc] init];
+        
+        NSString *uri = properties[@"uri"];
+        
+        if ([uri hasPrefix:@"data:image/"]) {
+            image.imageData = [self imageDataForDataURI:uri];
+        } else if (uri.length > 0) {
+            NSURL *resourceURL = [self.url URLByDeletingLastPathComponent];
+            image.url = [resourceURL URLByAppendingPathComponent:uri];
+        }
+        
+        image.mimeType = properties[@"mimeType"];
+        
+        NSString *bufferViewIndexString = properties[@"bufferView"];
+        if (bufferViewIndexString) {
+            NSUInteger bufferViewIndex = bufferViewIndexString.integerValue;
+            if (bufferViewIndex < _bufferViews.count) {
+                image.bufferView = _bufferViews[bufferViewIndex];
+            }
+        }
+        
+        image.name = properties[@"name"];
+        image.extensions = properties[@"extensions"];
+        image.extras = properties[@"extras"];
+
+        [images addObject:image];
+    }
+    
+    _images = [images copy];
+    return YES;
+}
+
+- (BOOL)loadTextures:(NSArray *)texturesMap {
+    if (texturesMap.count == 0) {
+        _textures = @[];
+    }
+
+    NSMutableArray *textures = [NSMutableArray arrayWithCapacity:texturesMap.count];
+    for (NSDictionary *properties in texturesMap) {
+        GLTFTexture *texture = [[GLTFTexture alloc] init];
+
+        NSUInteger samplerIndex = [properties[@"sampler"] intValue];
+        if (samplerIndex < _samplers.count) {
+            texture.sampler = _samplers[samplerIndex];
+        } else {
+            texture.sampler = _defaultSampler;
+        }
+
+        NSUInteger imageIndex = [properties[@"source"] intValue];
+        if (imageIndex < _images.count) {
+            texture.image = _images[imageIndex];
+        }
+
+        texture.format = [properties[@"format"] integerValue] ?: texture.format;
+        texture.internalFormat = [properties[@"internalFormat"] integerValue] ?: texture.internalFormat;
+        texture.target = [properties[@"target"] integerValue] ?: texture.target;
+        texture.type = [properties[@"type"] integerValue] ?: texture.type;
+        texture.name = properties[@"name"];
+        texture.extensions = properties[@"extensions"];
+        texture.extras = properties[@"extras"];
+        
+        [textures addObject: texture];
+    }
+
+    _textures = [textures copy];
+    return YES;
+}
+
+
+- (BOOL)loadCameras:(NSArray *)camerasMap {
+    if (camerasMap.count == 0) {
+        _cameras = [NSMutableArray array];
+        return YES;
+    }
+    
+    _cameras = [NSMutableArray arrayWithCapacity:camerasMap.count];
+    for (NSDictionary *properties in camerasMap) {
+        GLTFCamera *camera = [[GLTFCamera alloc] init];
+        
+        camera.cameraType = [properties[@"type"] isEqualToString:@"orthographic"] ? GLTFCameraTypeOrthographic : GLTFCameraTypePerspective;
+        
+        NSDictionary *params = properties[properties[@"type"]];
+        
+        switch (camera.cameraType) {
+            case GLTFCameraTypeOrthographic:
+                camera.xmag = [params[@"xmag"] floatValue];
+                camera.ymag = [params[@"ymag"] floatValue];
+                break;
+            case GLTFCameraTypePerspective:
+            default: {
+                NSNumber *aspectRatioValue = params[@"aspectRatio"];
+                camera.aspectRatio = (aspectRatioValue != nil) ? aspectRatioValue.floatValue : 1.0;
+                camera.yfov = [params[@"yfov"] floatValue];
+                break;
+            }
+        }
+        
+        camera.znear = [params[@"znear"] floatValue];
+        
+        if (camera.cameraType == GLTFCameraTypePerspective && (params[@"zfar"] == nil)) {
+            camera.zfar = FLT_MAX;
+        } else {
+            camera.zfar = [params[@"zfar"] floatValue];
+        }
+
+        camera.extensions = properties[@"extensions"];
+        camera.extras = properties[@"extras"];
+
+        [_cameras addObject: camera];
+    }
+    
+    return YES;
+}
+
+- (BOOL)loadLights:(NSArray *)lightsMap {
+    if (lightsMap.count == 0) {
+        return YES;
+    }
+    
+    _lights = [NSMutableArray arrayWithCapacity:lightsMap.count];
+    [lightsMap enumerateObjectsUsingBlock:^(NSDictionary *properties, NSUInteger index, BOOL *stop) {
+        GLTFKHRLight *light = [GLTFKHRLight new];
+        NSString *lightTypeName = properties[@"type"];
+        if ([lightTypeName isEqualToString:@"ambient"]) {
+            light.type = GLTFKHRLightTypeAmbient;
+        } else if ([lightTypeName isEqualToString:@"directional"]) {
+            light.type = GLTFKHRLightTypeDirectional;
+        } else if ([lightTypeName isEqualToString:@"point"]) {
+            light.type = GLTFKHRLightTypePoint;
+        } else if ([lightTypeName isEqualToString:@"spot"]) {
+            light.type = GLTFKHRLightTypeSpot;
+        }
+        
+        NSArray *colorArray = properties[@"color"];
+        switch ([colorArray count]) {
+            case 3: // This is out of spec, but it happens in the wild, so be graceful.
+                light.color = GLTFVectorFloat4FromArray([colorArray arrayByAddingObject:@(1)]);
+                break;
+            case 4:
+                light.color = GLTFVectorFloat4FromArray(colorArray);
+                break;
+        }
+        
+        NSNumber *intensityValue = properties[@"intensity"];
+        if (intensityValue != nil) {
+            light.intensity = [intensityValue floatValue];
+        }
+
+        if (light.type == GLTFKHRLightTypeSpot) {
+            NSDictionary *spotProperties = properties[@"spot"];
+            NSNumber *innerConeAngleValue = spotProperties[@"innerConeAngle"];
+            if (innerConeAngleValue != nil) {
+                light.innerConeAngle = [innerConeAngleValue floatValue];
+            }
+            NSNumber *outerConeAngleValue = spotProperties[@"outerConeAngle"];
+            if (outerConeAngleValue != nil) {
+                light.outerConeAngle = [outerConeAngleValue floatValue];
+            }
+        }
+        
+        [_lights addObject:light];
+    }];
+
+    return YES;
+}
+
+- (BOOL)loadMeshes:(NSArray *)meshesMap {
+    if (meshesMap.count == 0) {
+        _meshes = @[];
+    }
+    
+    NSMutableArray *meshes = [NSMutableArray arrayWithCapacity:meshesMap.count];
+    for (NSDictionary *properties in meshesMap) {
+        GLTFMesh *mesh = [[GLTFMesh alloc] init];
+        mesh.name = properties[@"name"];
+        mesh.extensions = properties[@"extensions"];
+        mesh.extras = properties[@"extras"];
+        
+        mesh.defaultMorphTargetWeights = properties[@"weights"] ?: @[];
+        
+        NSArray *submeshesProperties = properties[@"primitives"];
+        NSMutableArray *submeshes = [NSMutableArray arrayWithCapacity:submeshesProperties.count];
+        for (NSDictionary *submeshProperties in submeshesProperties) {
+            GLTFSubmesh *submesh = [[GLTFSubmesh alloc] init];
+            
+            NSDictionary *submeshAttributes = submeshProperties[@"attributes"];
+            
+            NSMutableDictionary *attributeAccessors = [NSMutableDictionary dictionaryWithCapacity:submeshAttributes.count];
+            [submeshAttributes enumerateKeysAndObjectsUsingBlock:^(NSString *attributeName, NSNumber *accessorIndexValue, BOOL *stop) {
+                NSUInteger accessorIndex = accessorIndexValue.unsignedIntegerValue;
+                if (accessorIndex < _accessors.count) {
+                    GLTFAccessor *accessor = _accessors[accessorIndex];
+                    attributeAccessors[attributeName] = accessor;
+                }
+            }];
+
+            submesh.accessorsForAttributes = attributeAccessors;
+            
+            NSUInteger materialIndex = [submeshProperties[@"material"] intValue];
+            if (materialIndex < _materials.count) {
+                submesh.material = _materials[materialIndex];
+            } else {
+                submesh.material = _defaultMaterial;
+            }
+            
+            NSUInteger indexAccessorIndex = [submeshProperties[@"indices"] intValue];
+            if (indexAccessorIndex < _accessors.count) {
+                GLTFAccessor *indexAccessor = _accessors[indexAccessorIndex];
+                if (indexAccessor.componentType == GLTFTextureTypeUChar) {
+                    // Fix up 8-bit indices, since they're unsupported in modern APIs
+                    uint8_t *sourceIndices = indexAccessor.bufferView.buffer.contents + indexAccessor.offset + indexAccessor.bufferView.offset;
+                    
+                    id<GLTFBuffer> shortBuffer = [_bufferAllocator newBufferWithLength:indexAccessor.count * sizeof(uint16_t)];
+                    uint16_t *destIndices = shortBuffer.contents;
+                    for (int i = 0; i < indexAccessor.count; ++i) {
+                        destIndices[i] = (uint16_t)sourceIndices[i];
+                    }
+                    _buffers = [_buffers arrayByAddingObject:shortBuffer];
+                    
+                    GLTFBufferView *shortBufferView = [GLTFBufferView new];
+                    shortBufferView.buffer = shortBuffer;
+                    shortBufferView.offset = 0;
+                    shortBufferView.stride = 0;
+                    _bufferViews = [_bufferViews arrayByAddingObject:shortBufferView];
+                    
+                    GLTFAccessor *shortAccessor = [GLTFAccessor new];
+                    shortAccessor.bufferView = shortBufferView;
+                    shortAccessor.componentType = GLTFDataTypeUShort;
+                    shortAccessor.dimension = GLTFDataDimensionScalar;
+                    shortAccessor.count = indexAccessor.count;
+                    shortAccessor.offset = 0;
+                    shortAccessor.valueRange = indexAccessor.valueRange;
+                    _accessors = [_accessors arrayByAddingObject:shortAccessor];
+                    
+                    indexAccessor = shortAccessor;
+                }
+                submesh.indexAccessor = indexAccessor;
+            }
+            
+            if (submeshProperties[@"mode"]) {
+                submesh.primitiveType = (GLTFPrimitiveType)[submeshProperties[@"mode"] intValue];
+            }
+            
+            NSMutableArray *morphTargets = [NSMutableArray array];
+            for (NSDictionary *targetProperties in submeshProperties[@"targets"]) {
+                GLTFMorphTarget *morphTarget = [GLTFMorphTarget new];
+                NSMutableDictionary *attributeAccessors = [NSMutableDictionary dictionaryWithCapacity:submeshAttributes.count];
+                [targetProperties enumerateKeysAndObjectsUsingBlock:^(NSString *attributeName, NSNumber *accessorIndexValue, BOOL *stop) {
+                    NSUInteger accessorIndex = accessorIndexValue.unsignedIntegerValue;
+                    if (accessorIndex < _accessors.count) {
+                        GLTFAccessor *accessor = _accessors[accessorIndex];
+                        attributeAccessors[attributeName] = accessor;
+                    }
+                }];
+                morphTarget.accessorsForAttributes = [attributeAccessors copy];
+                [morphTargets addObject:morphTarget];
+            }
+            submesh.morphTargets = [morphTargets copy];
+            
+            [submeshes addObject:submesh];
+        }
+        
+        mesh.submeshes = [submeshes copy];
+        
+        [meshes addObject:mesh];
+    }
+    
+    _meshes = [meshes copy];
+    return YES;
+}
+
+
+- (BOOL)loadMaterials:(NSArray *)materialsMap {
+    if (materialsMap.count == 0) {
+        _materials = @[];
+        return YES;
+    }
+    
+    NSMutableArray *materials = [NSMutableArray arrayWithCapacity:materialsMap.count];
+    for (NSDictionary *properties in materialsMap) {
+        GLTFMaterial *material = [[GLTFMaterial alloc] init];
+
+        NSDictionary *pbrValuesMap = properties[@"pbrMetallicRoughness"];
+        if (pbrValuesMap) {
+            NSDictionary *baseColorTextureMap = pbrValuesMap[@"baseColorTexture"];
+            NSNumber *baseColorTextureIndexValue = baseColorTextureMap[@"index"];
+            if (baseColorTextureIndexValue != nil) {
+                material.baseColorTexture = [[GLTFTextureInfo alloc] init];
+                NSUInteger baseColorTextureIndex = baseColorTextureIndexValue.integerValue;
+                if (baseColorTextureIndex < _textures.count) {
+                    material.baseColorTexture.texture = _textures[baseColorTextureIndex];
+                }
+            }
+            NSNumber *baseColorTexCoordValue = baseColorTextureMap[@"texCoord"];
+            if (baseColorTexCoordValue != nil) {
+                material.baseColorTexture.texCoord = baseColorTexCoordValue.integerValue;
+            }
+            
+            material.baseColorTexture.extras = baseColorTextureMap[@"extras"];
+            material.baseColorTexture.extensions = baseColorTextureMap[@"extensions"];
+
+            NSArray *baseColorFactorComponents = pbrValuesMap[@"baseColorFactor"];
+            if (baseColorFactorComponents.count == 4) {
+                material.baseColorFactor = GLTFVectorFloat4FromArray(baseColorFactorComponents);
+            }
+            
+            NSNumber *metallicFactor = pbrValuesMap[@"metallicFactor"];
+            if (metallicFactor != nil) {
+                material.metalnessFactor = metallicFactor.floatValue;
+            }
+
+            NSNumber *roughnessFactor = pbrValuesMap[@"roughnessFactor"];
+            if (roughnessFactor != nil) {
+                material.roughnessFactor = roughnessFactor.floatValue;
+            }
+
+            NSDictionary *metallicRoughnessTextureMap = pbrValuesMap[@"metallicRoughnessTexture"];
+            NSNumber *metallicRoughnessTextureIndexValue = metallicRoughnessTextureMap[@"index"];
+            if (metallicRoughnessTextureIndexValue != nil) {
+                material.metallicRoughnessTexture = [[GLTFTextureInfo alloc] init];
+                NSUInteger metallicRoughnessTextureIndex = metallicRoughnessTextureIndexValue.integerValue;
+                if (metallicRoughnessTextureIndex < _textures.count) {
+                    material.metallicRoughnessTexture.texture = _textures[metallicRoughnessTextureIndex];
+                }
+            }
+
+            NSNumber *metallicRoughnessTexCoordValue = metallicRoughnessTextureMap[@"texCoord"];
+            if (metallicRoughnessTexCoordValue != nil) {
+                material.metallicRoughnessTexture.texCoord = metallicRoughnessTexCoordValue.integerValue;
+            }
+
+            material.metallicRoughnessTexture.extras = metallicRoughnessTextureMap[@"extras"];
+            material.metallicRoughnessTexture.extensions = metallicRoughnessTextureMap[@"extensions"];
+        }
+        
+        NSDictionary *normalTextureMap = properties[@"normalTexture"];
+        if (normalTextureMap) {
+            material.normalTexture = [[GLTFTextureInfo alloc] init];
+            NSNumber *normalTextureIndexValue = normalTextureMap[@"index"];
+            NSUInteger normalTextureIndex = normalTextureIndexValue.integerValue;
+            if (normalTextureIndex < _textures.count) {
+                material.normalTexture.texture = _textures[normalTextureIndex];
+            }
+            NSNumber *normalTextureScaleValue = normalTextureMap[@"scale"];
+            material.normalTextureScale = (normalTextureScaleValue != nil) ? normalTextureScaleValue.floatValue : 1.0;
+
+            NSNumber *normalTexCoordValue = normalTextureMap[@"texCoord"];
+            if (normalTexCoordValue != nil) {
+                material.normalTexture.texCoord = normalTexCoordValue.integerValue;
+            }
+            
+            material.normalTexture.extras = normalTextureMap[@"extras"];
+            material.normalTexture.extensions = normalTextureMap[@"extensions"];
+        }
+
+        NSDictionary *emissiveTextureMap = properties[@"emissiveTexture"];
+        if (emissiveTextureMap) {
+            material.emissiveTexture = [[GLTFTextureInfo alloc] init];
+            NSNumber *emissiveTextureIndexValue = emissiveTextureMap[@"index"];
+            NSUInteger emissiveTextureIndex = emissiveTextureIndexValue.integerValue;
+            if (emissiveTextureIndex < _textures.count) {
+                material.emissiveTexture.texture = _textures[emissiveTextureIndex];
+            }
+            NSNumber *emissiveTexCoordValue = emissiveTextureMap[@"texCoord"];
+            if (emissiveTexCoordValue != nil) {
+                material.emissiveTexture.texCoord = emissiveTexCoordValue.integerValue;
+            }
+
+            material.emissiveTexture.extras = emissiveTextureMap[@"extras"];
+            material.emissiveTexture.extensions = emissiveTextureMap[@"extensions"];
+        }
+        
+        NSArray *emissiveFactorArray = properties[@"emissiveFactor"];
+        if (emissiveFactorArray.count == 3) {
+            material.emissiveFactor = GLTFVectorFloat3FromArray(emissiveFactorArray);
+        }
+        
+        NSDictionary *occlusionTextureMap = properties[@"occlusionTexture"];
+        if (occlusionTextureMap) {
+            material.occlusionTexture = [[GLTFTextureInfo alloc] init];
+            NSNumber *occlusionTextureIndexValue = occlusionTextureMap[@"index"];
+            NSUInteger occlusionTextureIndex = occlusionTextureIndexValue.integerValue;
+            if (occlusionTextureIndex < _textures.count) {
+                material.occlusionTexture.texture = _textures[occlusionTextureIndex];
+            }
+            NSNumber *occlusionTexCoordValue = occlusionTextureMap[@"texCoord"];
+            if (occlusionTexCoordValue != nil) {
+                material.occlusionTexture.texCoord = occlusionTexCoordValue.integerValue;
+            }
+            NSNumber *occlusionStrengthValue = occlusionTextureMap[@"strength"];
+            if (occlusionStrengthValue != nil) {
+                material.occlusionStrength = occlusionStrengthValue.floatValue;
+            }
+
+            material.occlusionTexture.extras = occlusionTextureMap[@"extras"];
+            material.occlusionTexture.extensions = occlusionTextureMap[@"extensions"];
+        }
+        
+        NSNumber *doubleSidedValue = properties[@"doubleSided"];
+        material.doubleSided = (doubleSidedValue == nil) || (doubleSidedValue != nil && doubleSidedValue.boolValue);
+        
+        NSString *alphaMode = properties[@"alphaMode"];
+        if ([alphaMode isEqualToString:@"BLEND"]) {
+            material.alphaMode = GLTFAlphaModeBlend;
+        } else if ([alphaMode isEqualToString:@"MASK"]) {
+            material.alphaMode = GLTFAlphaModeMask;
+        } else {
+            material.alphaMode = GLTFAlphaModeOpaque;
+        }
+        
+        NSNumber *alphaCutoffValue = properties[@"alphaCutoff"];
+        if (alphaCutoffValue != nil) {
+            material.alphaCutoff = alphaCutoffValue.floatValue;
+        }
+
+        material.name = properties[@"name"];
+        material.extensions = properties[@"extensions"];
+        material.extras = properties[@"extras"];
+
+        if (_usesPBRSpecularGlossiness) {
+            NSDictionary *pbrSpecularGlossinessProperties = material.extensions[GLTFExtensionKHRMaterialsPBRSpecularGlossiness];
+            if (pbrSpecularGlossinessProperties != nil) {
+                NSDictionary *diffuseTextureMap = pbrSpecularGlossinessProperties[@"diffuseTexture"];
+                if (diffuseTextureMap != nil) {
+                    material.baseColorTexture = [[GLTFTextureInfo alloc] init];
+                    
+                    NSNumber *diffuseTextureIndexValue = diffuseTextureMap[@"index"];
+                    if (diffuseTextureIndexValue != nil) {
+                        NSUInteger diffuseTextureIndex = diffuseTextureIndexValue.integerValue;
+                        if (diffuseTextureIndex < _textures.count) {
+                            material.baseColorTexture.texture = _textures[diffuseTextureIndex];
+                        }
+                    }
+                    NSNumber *diffuseTexCoordValue = diffuseTextureMap[@"texCoord"];
+                    if (diffuseTexCoordValue != nil) {
+                        material.baseColorTexture.texCoord = diffuseTexCoordValue.integerValue;
+                    }
+                }
+                
+                // TODO: Support specularGlossinessTexture
+                
+                // TODO: Support texture transform of specular-glossiness map
+
+                NSArray *diffuseFactorComponents = pbrSpecularGlossinessProperties[@"diffuseFactor"];
+                if (diffuseFactorComponents.count == 4) {
+                    material.baseColorFactor = GLTFVectorFloat4FromArray(diffuseFactorComponents);
+                }
+                
+                NSNumber *glossinessFactorValue = pbrSpecularGlossinessProperties[@"glossinessFactor"];
+                material.glossinessFactor = (glossinessFactorValue != nil) ? glossinessFactorValue.floatValue : 0.0;
+
+                NSArray *specularFactorComponents = pbrSpecularGlossinessProperties[@"specularFactor"];
+                if (specularFactorComponents.count == 3) {
+                    material.specularFactor = GLTFVectorFloat3FromArray(specularFactorComponents);
+                }
+            }
+        }
+        
+        if (_usesKHRMaterialsUnlit) {
+            NSDictionary *unlitMap = material.extensions[GLTFExtensionKHRMaterialsUnlit];
+            if (unlitMap != nil) {
+                material.unlit = YES;
+            }
+        }
+        
+        if (_usesKHRTextureTransform) {
+            [self _fixMaterialTextureTransforms:material];
+        }
+
+        [materials addObject: material];
+    }
+
+    _materials = [materials copy];
+
+    return YES;
+}
+
+- (GLTFTextureTransform)_textureTransformWithProperties:(NSDictionary *)properties {
+    GLTFTextureTransform transform = GLTFTextureTransformMakeIdentity();
+    NSArray *offsetArray = properties[@"offset"];
+    if (offsetArray != nil && offsetArray.count == 2) {
+        transform.offset = GLTFVectorFloat2FromArray(offsetArray);
+    }
+    NSNumber *rotationValue = properties[@"rotation"];
+    if (rotationValue != nil) {
+        transform.rotation = rotationValue.floatValue;
+    }
+    NSArray *scaleArray = properties[@"scale"];
+    if (scaleArray != nil && scaleArray.count == 2) {
+        transform.scale = GLTFVectorFloat2FromArray(scaleArray);
+    }
+    return transform;
+}
+
+- (void)_fixMaterialTextureTransforms:(GLTFMaterial *)material {
+    NSDictionary *baseColorTransformProperties = material.baseColorTexture.extensions[GLTFExtensionKHRTextureTransform];
+    if (baseColorTransformProperties != nil) {
+        material.baseColorTexture.transform = [self _textureTransformWithProperties:baseColorTransformProperties];
+        NSNumber *texCoordValue = baseColorTransformProperties[@"texCoord"];
+        if (texCoordValue != nil) {
+            material.baseColorTexture.texCoord = texCoordValue.intValue;
+        }
+    }
+    NSDictionary *normalTransformProperties = material.normalTexture.extensions[GLTFExtensionKHRTextureTransform];
+    if (normalTransformProperties != nil) {
+        material.normalTexture.transform = [self _textureTransformWithProperties:normalTransformProperties];
+        NSNumber *texCoordValue = normalTransformProperties[@"texCoord"];
+        if (texCoordValue != nil) {
+            material.normalTexture.texCoord = texCoordValue.intValue;
+        }
+    }
+    NSDictionary *metallicRoughnessTransformProperties = material.metallicRoughnessTexture.extensions[GLTFExtensionKHRTextureTransform];
+    if (metallicRoughnessTransformProperties != nil) {
+        material.metallicRoughnessTexture.transform = [self _textureTransformWithProperties:metallicRoughnessTransformProperties];
+        NSNumber *texCoordValue = metallicRoughnessTransformProperties[@"texCoord"];
+        if (texCoordValue != nil) {
+            material.metallicRoughnessTexture.texCoord = texCoordValue.intValue;
+        }
+    }
+    NSDictionary *occlusionTransformProperties = material.occlusionTexture.extensions[GLTFExtensionKHRTextureTransform];
+    if (occlusionTransformProperties != nil) {
+        material.occlusionTexture.transform = [self _textureTransformWithProperties:occlusionTransformProperties];
+        NSNumber *texCoordValue = occlusionTransformProperties[@"texCoord"];
+        if (texCoordValue != nil) {
+            material.occlusionTexture.texCoord = texCoordValue.intValue;
+        }
+    }
+    NSDictionary *emissiveTransformProperties = material.emissiveTexture.extensions[GLTFExtensionKHRTextureTransform];
+    if (emissiveTransformProperties != nil) {
+        material.emissiveTexture.transform = [self _textureTransformWithProperties:emissiveTransformProperties];
+        NSNumber *texCoordValue = emissiveTransformProperties[@"texCoord"];
+        if (texCoordValue != nil) {
+            material.emissiveTexture.texCoord = texCoordValue.intValue;
+        }
+    }
+    
+    material.hasTextureTransforms = YES;
+}
+
+- (BOOL)loadNodes:(NSArray *)nodesMap {
+    if (nodesMap.count == 0) {
+        _nodes = @[];
+        return YES;
+    }
+
+    NSMutableArray *nodes = [NSMutableArray arrayWithCapacity:nodesMap.count];
+    for (NSDictionary *properties in nodesMap) {
+        GLTFNode *node = [[GLTFNode alloc] init];
+
+        NSString *cameraIdentifierString = properties[@"camera"];
+        if (cameraIdentifierString) {
+            NSUInteger cameraIndex = cameraIdentifierString.integerValue;
+            if (cameraIndex < _cameras.count) {
+                GLTFCamera *camera = _cameras[cameraIndex];
+                node.camera = camera;
+                camera.referencingNodes = [camera.referencingNodes arrayByAddingObject:node];
+            }
+        }
+
+        // Copy array of indices for now; we fix this up later in another pass once all nodes are in memory.
+        node.children = [properties[@"children"] copy];
+
+        NSNumber *skinIndexValue = properties[@"skin"];
+        if (skinIndexValue != nil) {
+            NSUInteger skinIndex = skinIndexValue.integerValue;
+            if (skinIndex < _skins.count) {
+                node.skin = _skins[skinIndex];
+            }
+        }
+
+        node.jointName = properties[@"jointName"];
+
+        NSNumber *meshIndexValue = properties[@"mesh"];
+        if (meshIndexValue != nil) {
+            NSUInteger meshIndex = meshIndexValue.integerValue;
+            if (meshIndex < _meshes.count) {
+                node.mesh = _meshes[meshIndex];
+            }
+        }
+
+        NSArray *matrixArray = properties[@"matrix"];
+        if (matrixArray) {
+            node.localTransform = GLTFMatrixFloat4x4FromArray(matrixArray);
+        }
+
+        NSArray *rotationArray = properties[@"rotation"];
+        if (rotationArray) {
+            node.rotationQuaternion = GLTFQuaternionFromArray(rotationArray);
+        }
+
+        NSArray *scaleArray = properties[@"scale"];
+        if (scaleArray) {
+            node.scale = GLTFVectorFloat3FromArray(scaleArray);
+        }
+
+        NSArray *translationArray = properties[@"translation"];
+        if (translationArray) {
+            node.translation = GLTFVectorFloat3FromArray(translationArray);
+        }
+        
+        node.name = properties[@"name"];
+        node.extensions = properties[@"extensions"];
+        node.extras = properties[@"extras"];
+        
+        if (_usesKHRLights) {
+            NSDictionary *lightProperties = node.extensions[GLTFExtensionKHRLights];
+            NSNumber *lightIdentifierValue = lightProperties[@"light"];
+            if (lightIdentifierValue && lightIdentifierValue.integerValue < _lights.count) {
+                node.light = _lights[lightIdentifierValue.integerValue];
+            }
+        }
+        
+        [nodes addObject: node];
+    }
+
+    _nodes = [nodes copy];
+    
+    return [self fixNodeRelationships];
+}
+
+- (BOOL)fixNodeRelationships {
+    for (GLTFNode *node in _nodes) {
+        NSArray *childIdentifiers = node.children;
+        NSMutableArray *children = [NSMutableArray arrayWithCapacity:childIdentifiers.count];
+        for (NSNumber *childIndexValue in childIdentifiers) {
+            NSUInteger childIndex = childIndexValue.integerValue;
+            if (childIndex < _nodes.count) {
+                GLTFNode *child = _nodes[childIndex];
+                child.parent = node;
+                [children addObject:child];
+            }
+        }
+        node.children = children;
+    }
+    
+    for (GLTFSkin *skin in _skins) {
+        NSMutableArray *nodes = [NSMutableArray arrayWithCapacity:skin.jointNodes.count];
+        for (NSInteger i = 0; i < skin.jointNodes.count; ++i) {
+            NSNumber *jointIndexValue = (NSNumber *)skin.jointNodes[i];
+            if (jointIndexValue != nil && jointIndexValue.intValue < _nodes.count) {
+                [nodes addObject:_nodes[jointIndexValue.intValue]];
+            }
+        }
+        skin.jointNodes = [nodes copy];
+
+        NSNumber *skeletonIndexValue = (NSNumber *)skin.skeletonRootNode;
+        if (skeletonIndexValue != nil && skeletonIndexValue.intValue < _nodes.count) {
+            skin.skeletonRootNode = _nodes[skeletonIndexValue.intValue];
+        }
+    }
+    
+    return YES;
+}
+
+- (BOOL)loadAnimations:(NSArray *)animationsMap {
+    if (animationsMap.count == 0) {
+        _animations = @[];
+        return YES;
+    }
+
+    NSArray *interpolationModes = @[ @"STEP", @"LINEAR", @"CUBICSPLINE" ];
+
+    NSMutableArray *animations = [NSMutableArray arrayWithCapacity:animationsMap.count];
+
+    for (NSDictionary *properties in animationsMap) {
+        GLTFAnimation *animation = [[GLTFAnimation alloc] init];
+        
+        NSArray *samplersProperties = properties[@"samplers"];
+        NSMutableArray *samplers = [NSMutableArray arrayWithCapacity:samplersProperties.count];
+        [samplersProperties enumerateObjectsUsingBlock:^(NSDictionary *samplerProperties, NSUInteger index, BOOL *stop) {
+            GLTFAnimationSampler *sampler = [[GLTFAnimationSampler alloc] init];
+            NSNumber *inputIndexValue = samplerProperties[@"input"];
+            if (inputIndexValue && inputIndexValue.integerValue < _accessors.count) {
+                sampler.inputAccessor = _accessors[inputIndexValue.integerValue];
+            }
+            NSNumber *outputIndexValue = samplerProperties[@"output"];
+            if (outputIndexValue && outputIndexValue.integerValue < _accessors.count) {
+                sampler.outputAccessor = _accessors[outputIndexValue.integerValue];
+            }
+            if (samplerProperties[@"interpolation"]) {
+                sampler.interpolationMode = (GLTFInterpolationMode)[interpolationModes indexOfObject:samplerProperties[@"interpolation"]];
+            }
+            [samplers addObject:sampler];
+        }];
+
+        animation.samplers = [samplers copy];
+
+        NSArray *channelsProperties = properties[@"channels"];
+        NSMutableArray *channels = [NSMutableArray arrayWithCapacity:channelsProperties.count];
+        [channelsProperties enumerateObjectsUsingBlock:^(NSDictionary *channelProperties, NSUInteger index, BOOL *stop) {
+            GLTFAnimationChannel *channel = [GLTFAnimationChannel new];
+            NSNumber *samplerIndexValue = channelProperties[@"sampler"];
+            if (samplerIndexValue && samplerIndexValue.integerValue < samplers.count) {
+                channel.sampler = samplers[samplerIndexValue.integerValue];
+            }
+            NSDictionary *targetProperties = channelProperties[@"target"];
+            NSNumber *targetNodeIndexValue = targetProperties[@"node"];
+            if (targetNodeIndexValue && targetNodeIndexValue.integerValue < _nodes.count) {
+                channel.targetNode = _nodes[targetNodeIndexValue.integerValue];
+            }
+            channel.targetPath = targetProperties[@"path"];
+            [channels addObject:channel];
+        }];
+
+        animation.channels = [channels copy];
+
+        animation.name = properties[@"name"];
+        animation.extensions = properties[@"extensions"];
+        animation.extras = properties[@"extras"];
+        
+        [animations addObject: animation];
+    }
+    
+    _animations = [animations copy];
+    
+    return YES;
+}
+
+- (BOOL)loadSkins:(NSArray *)skinsMap {
+    if (skinsMap.count == 0) {
+        _skins = @[];
+        return YES;
+    }
+    
+    NSMutableArray *skins = [NSMutableArray arrayWithCapacity:skinsMap.count];
+    for (NSDictionary *properties in skinsMap) {
+        GLTFSkin *skin = [[GLTFSkin alloc] init];
+        
+        NSNumber *inverseBindMatricesAccessorIndexValue = properties[@"inverseBindMatrices"];
+        if (inverseBindMatricesAccessorIndexValue != nil) {
+            NSInteger inverseBindMatricesAccessorIndex = inverseBindMatricesAccessorIndexValue.integerValue;
+            if (inverseBindMatricesAccessorIndex < _accessors.count) {
+                skin.inverseBindMatricesAccessor = _accessors[inverseBindMatricesAccessorIndex];
+            }
+        }
+        
+        NSArray *jointIndices = properties[@"joints"];
+        if (jointIndices.count > 0) {
+            skin.jointNodes = [jointIndices copy];
+        }
+        
+        NSNumber *skeletonIndexValue = properties[@"skeleton"];
+        if (skeletonIndexValue != nil) {
+            skin.skeletonRootNode = (id)skeletonIndexValue;
+        }
+        
+        skin.name = properties[@"name"];
+        skin.extensions = properties[@"extensions"];
+        skin.extras = properties[@"extras"];
+        
+        [skins addObject:skin];
+    }
+    
+    _skins = [skins copy];
+    
+    return YES;
+}
+
+- (BOOL)loadScenes:(NSArray *)scenesMap {
+    if (scenesMap.count == 0) {
+        _scenes = @[];
+        return YES;
+    }
+
+    NSMutableArray *scenes = [NSMutableArray arrayWithCapacity:scenesMap.count];
+    for (NSDictionary *properties in scenesMap) {
+        GLTFScene *scene = [[GLTFScene alloc] init];
+
+        NSArray *rootNodeIndices = properties[@"nodes"];
+        NSMutableArray *rootNodes = [NSMutableArray arrayWithCapacity:rootNodeIndices.count];
+        for (NSNumber *nodeIndexValue in rootNodeIndices) {
+            NSUInteger nodeIndex = nodeIndexValue.integerValue;
+            if (nodeIndex < _nodes.count) {
+                GLTFNode *node = _nodes[nodeIndex];
+                [rootNodes addObject:node];
+            }
+        }
+        scene.nodes = [rootNodes copy];
+
+        scene.name = properties[@"name"];
+        scene.extensions = properties[@"extensions"];
+        scene.extras = properties[@"extras"];
+        
+        if (_usesKHRLights) {
+            NSDictionary *lightProperties = scene.extensions[GLTFExtensionKHRLights];
+            NSNumber *lightIdentifierValue = lightProperties[@"light"];
+            if (lightIdentifierValue != nil && lightIdentifierValue.integerValue < _lights.count) {
+                scene.ambientLight = _lights[lightIdentifierValue.integerValue];
+            }
+        }
+
+        [scenes addObject:scene];
+    }
+
+    _scenes = [scenes copy];
+
+    return YES;
+}
+
+- (BOOL)loadDefaultScene:(NSNumber *)defaultSceneIndexValue
+{
+    if (defaultSceneIndexValue != nil) {
+        NSUInteger defaultSceneIndex = defaultSceneIndexValue.integerValue;
+        if (defaultSceneIndex < _scenes.count) {
+            _defaultScene = _scenes[defaultSceneIndex];
+        }
+    } else {
+        _defaultScene = _scenes.firstObject;
+    }
+
+    return YES;
+}
+
+@end
diff --git a/gtlf/GLTF/Source/GLTFBinaryChunk.m b/gtlf/GLTF/Source/GLTFBinaryChunk.m
new file mode 100644
index 00000000..415273fd
--- /dev/null
+++ b/gtlf/GLTF/Source/GLTFBinaryChunk.m
@@ -0,0 +1,23 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import "GLTFBinaryChunk.h"
+
+const UInt32 GLTFBinaryMagic = 0x46546C67;
+
+@implementation GLTFBinaryChunk
+@end
+
diff --git a/gtlf/GLTF/Source/GLTFBufferView.m b/gtlf/GLTF/Source/GLTFBufferView.m
new file mode 100644
index 00000000..3d4ba74e
--- /dev/null
+++ b/gtlf/GLTF/Source/GLTFBufferView.m
@@ -0,0 +1,26 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import "GLTFBufferView.h"
+
+@implementation GLTFBufferView
+
+- (NSString *)description {
+    return [NSString stringWithFormat:@"GLTFBufferView: length: %d, offset: %d, stride: %d, target: %d, buffer: %@",
+            (int)self.length, (int)self.offset, (int)self.stride, (int)self.target, self.buffer];
+}
+
+@end
diff --git a/gtlf/GLTF/Source/GLTFCamera.m b/gtlf/GLTF/Source/GLTFCamera.m
new file mode 100644
index 00000000..fd2d9c27
--- /dev/null
+++ b/gtlf/GLTF/Source/GLTFCamera.m
@@ -0,0 +1,115 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import "GLTFCamera.h"
+
+@interface GLTFCamera ()
+@property (nonatomic, assign, getter=projectionMatrixIsDirty) BOOL projectionMatrixDirty;
+@end
+
+@implementation GLTFCamera
+
+@synthesize projectionMatrix=_projectionMatrix;
+
+- (instancetype)init {
+    if ((self = [super init])) {
+        _referencingNodes = @[];
+    }
+    return self;
+}
+
+- (void)setCameraType:(GLTFCameraType)cameraType {
+    _cameraType = cameraType;
+    _projectionMatrixDirty = YES;
+}
+
+- (void)setAspectRatio:(float)aspectRatio {
+    _aspectRatio = aspectRatio;
+    _projectionMatrixDirty = YES;
+}
+
+- (void)setYfov:(float)yfov {
+    _yfov = yfov;
+    _projectionMatrixDirty = YES;
+}
+
+- (void)setXmag:(float)xmag {
+    _xmag = xmag;
+    _projectionMatrixDirty = YES;
+}
+
+- (void)setYmag:(float)ymag {
+    _ymag = ymag;
+    _projectionMatrixDirty = YES;
+}
+
+- (void)setZnear:(float)znear {
+    _znear = znear;
+    _projectionMatrixDirty = YES;
+}
+
+- (void)setZfar:(float)zfar {
+    _zfar = zfar;
+    _projectionMatrixDirty = YES;
+}
+
+- (void)setProjectionMatrix:(simd_float4x4)projectionMatrix {
+    _projectionMatrix = projectionMatrix;
+    _projectionMatrixDirty = NO;
+}
+
+- (simd_float4x4)projectionMatrix {
+    if (self.projectionMatrixIsDirty) {
+        [self _buildProjectionMatrix];
+    }
+    return _projectionMatrix;
+}
+
+- (void)_buildProjectionMatrix {
+    switch (_cameraType) {
+        case GLTFCameraTypeOrthographic: {
+            simd_float4 X = (simd_float4){ 1 / _xmag, 0, 0, 0 };
+            simd_float4 Y = (simd_float4){ 0, 1 / _ymag, 0, 0 };
+            simd_float4 Z = (simd_float4){ 0, 0, 2 / (_znear - _zfar), 0 };
+            simd_float4 W = (simd_float4){ 0, 0, (_zfar + _znear) / (_znear - _zfar), 1 };
+            _projectionMatrix = (simd_float4x4){ { X, Y, Z, W } };
+            break;
+        }
+        case GLTFCameraTypePerspective:
+        default: {
+            simd_float4 X = (simd_float4){ 1 / (_aspectRatio * tanf(0.5 * _yfov)), 0, 0, 0 };
+            simd_float4 Y = (simd_float4){ 0, 1 / tanf(0.5 * _yfov), 0, 0 };
+            simd_float4 Z = (simd_float4){ 0, 0, -1, -1 };
+            simd_float4 W = (simd_float4){ 0, 0, -2 * _znear, 0 };
+            if (_zfar != FLT_MAX) {
+                Z = (simd_float4){ 0, 0, (_zfar + _znear) / (_znear - _zfar), -1 };
+                W = (simd_float4){ 0, 0, (2 * _zfar * _znear) / (_znear - _zfar), 0 };
+            }
+            _projectionMatrix = (simd_float4x4){ { X, Y, Z, W } };
+            break;
+        }
+    }
+    simd_float4x4 glToMetal = (simd_float4x4){{
+        { 1, 0,   0, 0 },
+        { 0, 1,   0, 0 },
+        { 0, 0, 0.5, 0 },
+        { 0, 0, 0.5, 1 },
+    }};
+    _projectionMatrix = simd_mul(glToMetal, _projectionMatrix);
+    _projectionMatrixDirty = NO;
+}
+
+@end
diff --git a/gtlf/GLTF/Source/GLTFDefaultBufferAllocator.m b/gtlf/GLTF/Source/GLTFDefaultBufferAllocator.m
new file mode 100644
index 00000000..d27ceba2
--- /dev/null
+++ b/gtlf/GLTF/Source/GLTFDefaultBufferAllocator.m
@@ -0,0 +1,93 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import "GLTFDefaultBufferAllocator.h"
+
+static uint64_t _liveAllocationSize;
+
+@interface GLTFDefaultBufferAllocator ()
++ (void)incrementLiveAllocationSizeByLength:(uint64_t)length;
++ (void)decrementLiveAllocationSizeByLength:(uint64_t)length;
+@end
+
+@interface GLTFMemoryBuffer: NSObject <GLTFBuffer>
+@property (nonatomic, assign) void *bytes;
+@property (nonatomic, assign) NSInteger length;
+- (instancetype)initWithLength:(NSInteger)length;
+- (instancetype)initWithData:(NSData *)data;
+@end
+
+@implementation GLTFMemoryBuffer
+
+@synthesize name;
+@synthesize extras;
+@synthesize extensions;
+
+- (instancetype)initWithLength:(NSInteger)length {
+    if ((self = [super init])) {
+        _bytes = malloc(length);
+        _length = length;
+        [GLTFDefaultBufferAllocator incrementLiveAllocationSizeByLength:_length];
+    }
+    return self;
+}
+
+- (instancetype)initWithData:(NSData *)data {
+    if ((self = [super init])) {
+        _length = data.length;
+        _bytes = malloc(_length);
+        memcpy(_bytes, data.bytes, _length);
+        [GLTFDefaultBufferAllocator incrementLiveAllocationSizeByLength:_length];
+    }
+    return self;
+}
+
+- (void)dealloc {
+    free(_bytes);
+    [GLTFDefaultBufferAllocator decrementLiveAllocationSizeByLength:_length];
+}
+
+- (void *)contents NS_RETURNS_INNER_POINTER {
+    return self.bytes;
+}
+
+@end
+
+@implementation GLTFDefaultBufferAllocator
+
++ (void)incrementLiveAllocationSizeByLength:(uint64_t)length {
+    _liveAllocationSize += length;
+}
+
++ (void)decrementLiveAllocationSizeByLength:(uint64_t)length {
+    _liveAllocationSize -= length;
+}
+
++ (uint64_t)liveAllocationSize {
+    return _liveAllocationSize;
+}
+
+- (id<GLTFBuffer>)newBufferWithLength:(NSInteger)length {
+    GLTFMemoryBuffer *buffer = [[GLTFMemoryBuffer alloc] initWithLength:length];
+    return buffer;
+}
+
+- (id<GLTFBuffer>)newBufferWithData:(NSData *)data {
+    GLTFMemoryBuffer *buffer = [[GLTFMemoryBuffer alloc] initWithData:data];
+    return buffer;
+}
+
+@end
diff --git a/gtlf/GLTF/Source/GLTFImage.m b/gtlf/GLTF/Source/GLTFImage.m
new file mode 100644
index 00000000..b5063c7c
--- /dev/null
+++ b/gtlf/GLTF/Source/GLTFImage.m
@@ -0,0 +1,57 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import "GLTFImage.h"
+
+@import CoreGraphics;
+
+@implementation GLTFImage
+
++ (CGImageRef)newImageForData:(NSData *)data mimeType:(NSString *)mimeType {
+    CGImageRef image = NULL;
+    CGDataProviderRef provider = CGDataProviderCreateWithCFData((__bridge CFDataRef)data);
+    if ([mimeType isEqualToString:@"image/jpeg"]) {
+        image = CGImageCreateWithJPEGDataProvider(provider, NULL, false, kCGRenderingIntentDefault);
+    } else if ([mimeType isEqualToString:@"image/png"]) {
+        image = CGImageCreateWithPNGDataProvider(provider, NULL, false, kCGRenderingIntentDefault);
+    } else {
+        NSLog(@"Unknown MIME type encountered when decoding image: %@", mimeType);
+    }
+    CGDataProviderRelease(provider);
+    return image;
+}
+
++ (CGImageRef)newImageForDataURI:(NSString *)uriData {
+    NSString *prefix = @"data:";
+    if ([uriData hasPrefix:prefix]) {
+        NSInteger prefixEnd = prefix.length;
+        NSInteger firstComma = [uriData rangeOfString:@","].location;
+        if (firstComma != NSNotFound) {
+            NSString *mediaTypeAndTokenString = [uriData substringWithRange:NSMakeRange(prefixEnd, firstComma - prefixEnd)];
+            NSArray *mediaTypeAndToken = [mediaTypeAndTokenString componentsSeparatedByString:@";"];
+            if (mediaTypeAndToken.count > 0) {
+                NSString *mediaType = mediaTypeAndToken.firstObject;
+                NSString *encodedImageData = [uriData substringFromIndex:firstComma + 1];
+                NSData *imageData = [[NSData alloc] initWithBase64EncodedString:encodedImageData
+                                                                        options:NSDataBase64DecodingIgnoreUnknownCharacters];
+                return [self newImageForData:imageData mimeType:mediaType];
+            }
+        }
+    }
+    return NULL;
+}
+
+@end
diff --git a/gtlf/GLTF/Source/GLTFMaterial.m b/gtlf/GLTF/Source/GLTFMaterial.m
new file mode 100644
index 00000000..e8f78a47
--- /dev/null
+++ b/gtlf/GLTF/Source/GLTFMaterial.m
@@ -0,0 +1,35 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import "GLTFMaterial.h"
+
+@implementation GLTFMaterial
+
+- (instancetype)init {
+    if ((self = [super init])) {
+        _baseColorFactor = (simd_float4){ 1, 1, 1, 1 };
+        _metalnessFactor = 1.0;
+        _roughnessFactor = 1.0;
+        _occlusionStrength = 1.0;
+        _emissiveFactor = (simd_float3) { 0.0, 0.0, 0.0 };
+        _normalTextureScale = 1.0;
+        _alphaMode = GLTFAlphaModeOpaque;
+        _alphaCutoff = 0.5;
+    }
+    return self;
+}
+
+@end
diff --git a/gtlf/GLTF/Source/GLTFMesh.m b/gtlf/GLTF/Source/GLTFMesh.m
new file mode 100644
index 00000000..bc6ae3a8
--- /dev/null
+++ b/gtlf/GLTF/Source/GLTFMesh.m
@@ -0,0 +1,69 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import "GLTFMesh.h"
+#import "GLTFVertexDescriptor.h"
+#import "GLTFAccessor.h"
+#import "GLTFBufferView.h"
+#import "GLTFUtilities.h"
+
+@implementation GLTFMesh
+@end
+
+@implementation GLTFMorphTarget
+@end
+
+@interface GLTFSubmesh ()
+@property (nonatomic, strong) GLTFVertexDescriptor *cachedVertexDescriptor;
+@end
+
+@implementation GLTFSubmesh
+
+- (instancetype)init {
+    if ((self = [super init])) {
+        _primitiveType = GLTFPrimitiveTypeTriangles;
+    }
+    return self;
+}
+
+- (void)setAccessorsForAttributes:(NSDictionary *)accessorsForAttributes {
+    _accessorsForAttributes = accessorsForAttributes;
+    _cachedVertexDescriptor = nil;
+}
+
+- (GLTFVertexDescriptor *)vertexDescriptor {
+    if (self.cachedVertexDescriptor == nil) {
+        GLTFVertexDescriptor *descriptor = [GLTFVertexDescriptor new];
+        __block NSUInteger index = 0;
+        [self.accessorsForAttributes enumerateKeysAndObjectsUsingBlock:^(NSString *name, GLTFAccessor *accessor, BOOL *stop) {
+            descriptor.attributes[index].componentType = accessor.componentType;
+            descriptor.attributes[index].dimension = accessor.dimension;
+            descriptor.attributes[index].offset = accessor.offset;
+            descriptor.attributes[index].semantic = name;
+            if (accessor.bufferView.stride > 0) {
+                descriptor.bufferLayouts[index].stride = accessor.bufferView.stride;
+            } else {
+                descriptor.bufferLayouts[index].stride = GLTFSizeOfComponentTypeWithDimension(accessor.componentType, accessor.dimension);
+            }
+            ++index;
+        }];
+        self.cachedVertexDescriptor = descriptor;
+    }
+    
+    return self.cachedVertexDescriptor;
+}
+
+@end
diff --git a/gtlf/GLTF/Source/GLTFNode.m b/gtlf/GLTF/Source/GLTFNode.m
new file mode 100644
index 00000000..52dd177f
--- /dev/null
+++ b/gtlf/GLTF/Source/GLTFNode.m
@@ -0,0 +1,171 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import "GLTFNode.h"
+#import "GLTFAccessor.h"
+#import "GLTFMesh.h"
+#import "GLTFVertexDescriptor.h"
+
+@interface GLTFNode ()
+@property (nonatomic, assign, getter=localTransformIsDirty) BOOL localTransformDirty;
+@property (nonatomic, strong) NSMutableArray *mutableChildren;
+@end
+
+@implementation GLTFNode
+
+@synthesize localTransform=_localTransform;
+
+- (instancetype)init {
+    if ((self = [super init])) {
+        _localTransform = matrix_identity_float4x4;
+        _rotationQuaternion = simd_quaternion(0.f, 0.f, 0.f, 1.f);
+        _scale = vector3(1.0f, 1.0f, 1.0f);
+        _translation = vector3(0.0f, 0.0f, 0.0f);
+        _morphTargetWeights = @[];
+    }
+    return self;
+}
+
+- (void)setChildren:(NSArray<GLTFNode *> *)children {
+    _mutableChildren = [children mutableCopy];
+}
+
+- (NSArray<GLTFNode *> *)children {
+    return [_mutableChildren copy];
+}
+
+- (void)addChildNode:(GLTFNode *)node {
+    if (node.parent) {
+        [node removeFromParent];
+    }
+    node.parent = self;
+    [self.mutableChildren addObject:node];
+}
+
+- (void)removeFromParent {
+    [self.parent _removeChildNode:self];
+}
+
+- (void)_removeChildNode:(GLTFNode *)child {
+    [self.mutableChildren removeObject:child];
+    child.parent = nil;
+}
+
+- (void)setScale:(simd_float3)scale {
+    _scale = scale;
+    _localTransformDirty = YES;
+}
+
+- (void)setRotationQuaternion:(GLTFQuaternion)rotationQuaternion {
+    _rotationQuaternion = rotationQuaternion;
+    _localTransformDirty = YES;
+}
+
+- (void)setTranslation:(simd_float3)translation {
+    _translation = translation;
+    _localTransformDirty = YES;
+}
+
+- (simd_float4x4)globalTransform {
+    simd_float4x4 localTransform = self.localTransform;
+    simd_float4x4 ancestorTransform = self.parent ? self.parent.globalTransform : matrix_identity_float4x4;
+    return matrix_multiply(ancestorTransform, localTransform);
+}
+
+- (void)setLocalTransform:(simd_float4x4)localTransform {
+    _localTransform = localTransform;
+}
+
+- (simd_float4x4)localTransform {
+    if (self.localTransformIsDirty) {
+        [self computeLocalTransform];
+    }
+    
+    return _localTransform;
+}
+
+- (void)computeLocalTransform {
+    simd_float4x4 translationMatrix = matrix_identity_float4x4;
+    translationMatrix.columns[3][0] = _translation[0];
+    translationMatrix.columns[3][1] = _translation[1];
+    translationMatrix.columns[3][2] = _translation[2];
+
+    simd_float4x4 rotationMatrix = simd_matrix4x4(_rotationQuaternion);
+    
+    simd_float4x4 scaleMatrix = matrix_identity_float4x4;
+    scaleMatrix.columns[0][0] = _scale[0];
+    scaleMatrix.columns[1][1] = _scale[1];
+    scaleMatrix.columns[2][2] = _scale[2];
+    
+    _localTransform = matrix_multiply(matrix_multiply(translationMatrix, rotationMatrix), scaleMatrix);
+    _localTransformDirty = NO;
+}
+
+- (GLTFBoundingBox)approximateBounds {
+    return [self _approximateBoundsRecursive:matrix_identity_float4x4];
+}
+
+- (GLTFBoundingBox)_approximateBoundsRecursive:(simd_float4x4)transform {
+    GLTFBoundingBox bounds = { 0 };
+    
+    if (self.mesh != nil) {
+        for (GLTFSubmesh *submesh in self.mesh.submeshes) {
+            GLTFBoundingBox submeshBounds = { 0 };
+            GLTFAccessor *positionAccessor = submesh.accessorsForAttributes[GLTFAttributeSemanticPosition];
+            GLTFValueRange positionRange = positionAccessor.valueRange;
+            submeshBounds.minPoint.x = positionRange.minValue[0];
+            submeshBounds.minPoint.y = positionRange.minValue[1];
+            submeshBounds.minPoint.z = positionRange.minValue[2];
+            submeshBounds.maxPoint.x = positionRange.maxValue[0];
+            submeshBounds.maxPoint.y = positionRange.maxValue[1];
+            submeshBounds.maxPoint.z = positionRange.maxValue[2];
+            GLTFBoundingBoxUnion(&bounds, submeshBounds);
+        }
+    }
+    
+    simd_float4x4 globalTransform = matrix_multiply(transform, self.localTransform);
+    
+    GLTFBoundingBoxTransform(&bounds, globalTransform);
+    
+    for (GLTFNode *child in self.children) {
+        GLTFBoundingBox childBounds = [child _approximateBoundsRecursive:globalTransform];
+        GLTFBoundingBoxUnion(&bounds, childBounds);
+    }
+    
+    return bounds;
+}
+
+- (void)acceptVisitor:(GLTFNodeVisitor)visitor strategy:(GLTFVisitationStrategy)strategy {
+    [self _acceptVisitor:visitor depth:0 strategy:strategy];
+}
+
+- (void)_acceptVisitor:(GLTFNodeVisitor)visitor depth:(int)depth strategy:(GLTFVisitationStrategy)strategy {
+    switch (strategy) {
+        case GLTFVisitationStrategyDepthFirst:
+        default:
+        {
+            BOOL recurse = YES;
+            visitor(self, depth, &recurse);
+            if (recurse) {
+                for (GLTFNode *child in self.children) {
+                    [child _acceptVisitor:visitor depth:(depth + 1) strategy:strategy];
+                }
+            }
+        }
+    }
+}
+
+@end
diff --git a/gtlf/GLTF/Source/GLTFObject.m b/gtlf/GLTF/Source/GLTFObject.m
new file mode 100644
index 00000000..42e9e791
--- /dev/null
+++ b/gtlf/GLTF/Source/GLTFObject.m
@@ -0,0 +1,32 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import "GLTFObject.h"
+
+@implementation GLTFObject
+
+- (instancetype)init {
+    if ((self = [super init])) {
+        _identifier = [NSUUID new];
+    }
+    return self;
+}
+
+- (NSString *)description {
+    return [NSString stringWithFormat:@"%@; name = %@", super.description, self.name];
+}
+
+@end
diff --git a/gtlf/GLTF/Source/GLTFScene.m b/gtlf/GLTF/Source/GLTFScene.m
new file mode 100644
index 00000000..3ec31b78
--- /dev/null
+++ b/gtlf/GLTF/Source/GLTFScene.m
@@ -0,0 +1,57 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import "GLTFScene.h"
+#import "GLTFNode.h"
+
+@interface GLTFScene ()
+@property (nonatomic, strong) NSMutableArray *mutableNodes;
+@end
+
+@implementation GLTFScene
+
+- (void)setNodes:(NSArray<GLTFNode *> *)nodes {
+    _mutableNodes = [nodes mutableCopy];
+}
+
+- (NSArray<GLTFNode *> *)nodes {
+    return [_mutableNodes copy];
+}
+
+- (void)addNode:(GLTFNode *)node {
+    if (node.parent) {
+        [node removeFromParent];
+    }
+    
+    [_mutableNodes addObject:node];
+}
+
+- (GLTFBoundingBox)approximateBounds {
+    GLTFBoundingBox sceneBounds = { 0 };
+    for (GLTFNode *node in self.nodes) {
+        GLTFBoundingBox nodeBounds = node.approximateBounds;
+        GLTFBoundingBoxUnion(&sceneBounds, nodeBounds);
+    }
+    return sceneBounds;
+}
+
+- (void)acceptVisitor:(GLTFNodeVisitor)visitor strategy:(GLTFVisitationStrategy)strategy {
+    for (GLTFNode *node in self.nodes) {
+        [node acceptVisitor:visitor strategy:strategy];
+    }
+}
+
+@end
diff --git a/gtlf/GLTF/Source/GLTFSkin.m b/gtlf/GLTF/Source/GLTFSkin.m
new file mode 100644
index 00000000..c6c5dfbf
--- /dev/null
+++ b/gtlf/GLTF/Source/GLTFSkin.m
@@ -0,0 +1,20 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import "GLTFSkin.h"
+
+@implementation GLTFSkin
+@end
diff --git a/gtlf/GLTF/Source/GLTFTexture.m b/gtlf/GLTF/Source/GLTFTexture.m
new file mode 100644
index 00000000..6cab4710
--- /dev/null
+++ b/gtlf/GLTF/Source/GLTFTexture.m
@@ -0,0 +1,75 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import "GLTFTexture.h"
+#import "GLTFUtilities.h"
+
+GLTFTextureTransform GLTFTextureTransformMakeIdentity(void) {
+    GLTFTextureTransform t = {
+        .offset = (simd_float2){ 0, 0 },
+        .scale  = (simd_float2){ 1, 1 },
+        .rotation = 0,
+    };
+    return t;
+}
+
+GLTFTextureTransform GLTFTextureTransformMakeSRT(simd_float2 scale, float rotation, simd_float2 offset) {
+    GLTFTextureTransform t = {
+        .offset = offset,
+        .scale = scale,
+        .rotation = rotation,
+    };
+    return t;
+}
+
+simd_float3x3 GLTFTextureMatrixFromTransform(GLTFTextureTransform transform) {
+    float rs = sinf(-transform.rotation);
+    float rc = cosf(-transform.rotation);
+    float tx = transform.offset.x;
+    float ty = transform.offset.y;
+    float sx = transform.scale.x;
+    float sy = transform.scale.y;
+    simd_float3 c0 = (simd_float3){  rc * sx, rs * sy, 0 };
+    simd_float3 c1 = (simd_float3){ -rs * sx, rc * sy, 0 };
+    simd_float3 c2 = (simd_float3){       tx,      ty, 1 };
+    simd_float3x3 m = (simd_float3x3){ c0, c1, c2 };
+    return m;
+}
+
+@implementation GLTFTexture
+
+- (instancetype)init {
+    if ((self = [super init])) {
+        _format = GLTFTextureFormatRGBA;
+        _internalFormat = GLTFTextureFormatRGBA;
+        _type = GLTFTextureTypeUChar;
+        _target = GLTFTextureTargetTexture2D;
+    }
+    return self;
+}
+
+@end
+
+@implementation GLTFTextureInfo
+
+- (instancetype)init {
+    if ((self = [super init])) {
+        _transform = GLTFTextureTransformMakeIdentity();
+    }
+    return self;
+}
+
+@end
diff --git a/gtlf/GLTF/Source/GLTFTextureSampler.m b/gtlf/GLTF/Source/GLTFTextureSampler.m
new file mode 100644
index 00000000..2078acb5
--- /dev/null
+++ b/gtlf/GLTF/Source/GLTFTextureSampler.m
@@ -0,0 +1,60 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import "GLTFTextureSampler.h"
+
+@implementation GLTFTextureSampler
+
+- (instancetype)init {
+    if ((self = [super init])) {
+        _magFilter = GLTFSamplingFilterLinear;
+        _minFilter = GLTFSamplingFilterNearestMipLinear;
+        _sAddressMode = GLTFAddressModeRepeat;
+        _tAddressMode = GLTFAddressModeRepeat;
+    }
+    return self;
+}
+
+- (BOOL)isEqual:(id)other {
+    if (![other isKindOfClass:[GLTFTextureSampler class]]) {
+        return NO;
+    }
+    
+    GLTFTextureSampler *otherSampler = (GLTFTextureSampler *)other;
+    BOOL areEqual =
+        (otherSampler.magFilter == self.magFilter) &&
+        (otherSampler.minFilter == self.minFilter) &&
+        (otherSampler.sAddressMode == self.sAddressMode) &&
+        (otherSampler.tAddressMode == self.tAddressMode);
+    return areEqual;
+}
+
+- (NSUInteger)hash {
+    NSUInteger hashValue = ((_sAddressMode << 16) | _tAddressMode) + ((_magFilter << 16) | _minFilter);
+    return hashValue;
+}
+
+- (id)copyWithZone:(NSZone *)zone {
+    GLTFTextureSampler *copy = [[GLTFTextureSampler allocWithZone:zone] init];
+    copy.magFilter = self.magFilter;
+    copy.minFilter = self.minFilter;
+    copy.sAddressMode = self.sAddressMode;
+    copy.tAddressMode = self.tAddressMode;
+    return copy;
+}
+
+@end
+
diff --git a/gtlf/GLTF/Source/GLTFUtilities.m b/gtlf/GLTF/Source/GLTFUtilities.m
new file mode 100644
index 00000000..a21bad4b
--- /dev/null
+++ b/gtlf/GLTF/Source/GLTFUtilities.m
@@ -0,0 +1,338 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import "GLTFUtilities.h"
+
+bool GLTFBoundingBoxIsEmpty(GLTFBoundingBox b) {
+    return (b.minPoint.x == b.maxPoint.x) && (b.minPoint.y == b.maxPoint.y) && (b.minPoint.z == b.maxPoint.z);
+}
+
+GLTFBoundingBox *GLTFBoundingBoxUnion(GLTFBoundingBox *a, GLTFBoundingBox b) {
+    bool leftEmpty = GLTFBoundingBoxIsEmpty(*a);
+    bool rightEmpty = GLTFBoundingBoxIsEmpty(b);
+    
+    if (leftEmpty) {
+        if (!rightEmpty) {
+            *a = b;
+        }
+    } else if (!rightEmpty) {
+        if (b.minPoint.x < a->minPoint.x) { a->minPoint.x = b.minPoint.x; }
+        if (b.minPoint.y < a->minPoint.y) { a->minPoint.y = b.minPoint.y; }
+        if (b.minPoint.z < a->minPoint.z) { a->minPoint.z = b.minPoint.z; }
+        if (b.maxPoint.x > a->maxPoint.x) { a->maxPoint.x = b.maxPoint.x; }
+        if (b.maxPoint.y > a->maxPoint.y) { a->maxPoint.y = b.maxPoint.y; }
+        if (b.maxPoint.z > a->maxPoint.z) { a->maxPoint.z = b.maxPoint.z; }
+    }
+    
+    return a;
+}
+
+void GLTFBoundingBoxTransform(GLTFBoundingBox *b, simd_float4x4 transform) {
+    simd_float4 ltf = (simd_float4) { b->minPoint.x, b->maxPoint.y, b->maxPoint.z, 1 };
+    simd_float4 rtf = (simd_float4) { b->maxPoint.x, b->maxPoint.y, b->maxPoint.z, 1 };
+    simd_float4 lbf = (simd_float4) { b->minPoint.x, b->minPoint.y, b->maxPoint.z, 1 };
+    simd_float4 rbf = (simd_float4) { b->maxPoint.x, b->minPoint.y, b->maxPoint.z, 1 };
+    simd_float4 ltb = (simd_float4) { b->minPoint.x, b->maxPoint.y, b->minPoint.z, 1 };
+    simd_float4 rtb = (simd_float4) { b->maxPoint.x, b->maxPoint.y, b->minPoint.z, 1 };
+    simd_float4 lbb = (simd_float4) { b->minPoint.x, b->minPoint.y, b->minPoint.z, 1 };
+    simd_float4 rbb = (simd_float4) { b->maxPoint.x, b->minPoint.y, b->minPoint.z, 1 };
+    
+    ltf = matrix_multiply(transform, ltf);
+    rtf = matrix_multiply(transform, rtf);
+    lbf = matrix_multiply(transform, lbf);
+    rbf = matrix_multiply(transform, rbf);
+    ltb = matrix_multiply(transform, ltb);
+    rtb = matrix_multiply(transform, rtb);
+    lbb = matrix_multiply(transform, lbb);
+    rbb = matrix_multiply(transform, rbb);
+    
+    b->minPoint.x = fmin(fmin(fmin(fmin(fmin(fmin(fmin(ltf.x, rtf.x), lbf.x), rbf.x), ltb.x), rtb.x), lbb.x), rbb.x);
+    b->minPoint.y = fmin(fmin(fmin(fmin(fmin(fmin(fmin(ltf.y, rtf.y), lbf.y), rbf.y), ltb.y), rtb.y), lbb.y), rbb.y);
+    b->minPoint.z = fmin(fmin(fmin(fmin(fmin(fmin(fmin(ltf.z, rtf.z), lbf.z), rbf.z), ltb.z), rtb.z), lbb.z), rbb.z);
+    b->maxPoint.x = fmax(fmax(fmax(fmax(fmax(fmax(fmax(ltf.x, rtf.x), lbf.x), rbf.x), ltb.x), rtb.x), lbb.x), rbb.x);
+    b->maxPoint.y = fmax(fmax(fmax(fmax(fmax(fmax(fmax(ltf.y, rtf.y), lbf.y), rbf.y), ltb.y), rtb.y), lbb.y), rbb.y);
+    b->maxPoint.z = fmax(fmax(fmax(fmax(fmax(fmax(fmax(ltf.z, rtf.z), lbf.z), rbf.z), ltb.z), rtb.z), lbb.z), rbb.z);
+}
+
+GLTFBoundingSphere GLTFBoundingSphereFromBox(const GLTFBoundingBox b) {
+    GLTFBoundingSphere s;
+    float midX = (b.maxPoint.x + b.minPoint.x) * 0.5;
+    float midY = (b.maxPoint.y + b.minPoint.y) * 0.5;
+    float midZ = (b.maxPoint.z + b.minPoint.z) * 0.5;
+    
+    float r = sqrt(pow(b.maxPoint.x - midX, 2) + pow(b.maxPoint.y - midY, 2) + pow(b.maxPoint.z - midZ, 2));
+    
+    s.center = (simd_float3){ midX, midY, midZ };
+    s.radius = r;
+    return s;
+}
+
+GLTFQuaternion GLTFQuaternionFromEulerAngles(float pitch, float yaw, float roll) {
+    float cx = cos(pitch / 2);
+    float sx = sin(pitch / 2);
+    float cy = cos(yaw / 2);
+    float sy = sin(yaw / 2);
+    float cz = cos(roll / 2);
+    float sz = sin(roll / 2);
+    
+    GLTFQuaternion q = simd_quaternion(
+        sx*cy*cz + cx*sy*sz,
+        cx*sy*cz + sx*cy*sz,
+        cx*cy*sz - sx*sy*cz,
+        cx*cy*cz - sx*sy*sz
+    );
+    return q;
+}
+
+simd_float4x4 GLTFMatrixFromUniformScale(float s) {
+    simd_float4x4 m = matrix_identity_float4x4;
+    m.columns[0].x = s;
+    m.columns[1].y = s;
+    m.columns[2].z = s;
+    return m;
+}
+
+simd_float4x4 GLTFMatrixFromScale(simd_float3 s) {
+    simd_float4x4 m = matrix_identity_float4x4;
+    m.columns[0].x = s.x;
+    m.columns[1].y = s.y;
+    m.columns[2].z = s.z;
+    return m;
+}
+
+simd_float4x4 GLTFMatrixFromTranslation(simd_float3 t) {
+    simd_float4x4 m = matrix_identity_float4x4;
+    m.columns[3] = (simd_float4) { t.x, t.y, t.z, 1.0 };
+    return m;
+}
+
+simd_float4x4 GLTFRotationMatrixFromAxisAngle(simd_float3 axis, float angle) {
+    float x = axis.x, y = axis.y, z = axis.z;
+    float c = cosf(angle);
+    float s = sinf(angle);
+    float t = 1 - c;
+    
+    simd_float4 c0 = { t * x * x + c,     t * x * y + z * s, t * x * z - y * s, 0 };
+    simd_float4 c1 = { t * x * y - z * s, t * y * y + c,     t * y * z + x * s, 0 };
+    simd_float4 c2 = { t * x * z + y * s, t * y * z - x * s, t * z * z + c,     0 };
+    simd_float4 c3 = {                 0,                 0,             0,     1 };
+    
+    return (simd_float4x4){ c0, c1, c2, c3 };
+}
+
+simd_float3 GLTFAxisX = (simd_float3){ 1, 0, 0 };
+simd_float3 GLTFAxisY = (simd_float3){ 0, 1, 0 };
+simd_float3 GLTFAxisZ = (simd_float3){ 0, 0, 1 };
+
+simd_float3x3 GLTFMatrixUpperLeft3x3(simd_float4x4 m) {
+    simd_float3x3 mout = { {
+        { m.columns[0][0], m.columns[0][1], m.columns[0][2] },
+        { m.columns[1][0], m.columns[1][1], m.columns[1][2] },
+        { m.columns[2][0], m.columns[2][1], m.columns[2][2] }
+    } };
+    return mout;
+}
+
+simd_float4x4 GLTFNormalMatrixFromModelMatrix(simd_float4x4 m) {
+    simd_float3x3 nm = simd_inverse(simd_transpose(GLTFMatrixUpperLeft3x3(m)));
+    simd_float4x4 mout = { {
+        { nm.columns[0][0], nm.columns[0][1], nm.columns[0][2], 0 },
+        { nm.columns[1][0], nm.columns[1][1], nm.columns[1][2], 0 },
+        { nm.columns[2][0], nm.columns[2][1], nm.columns[2][2], 0 },
+        {                0,                0,                0, 1 }
+    } };
+    return mout;
+}
+
+simd_float4x4 GLTFPerspectiveProjectionMatrixAspectFovRH(const float fovY, const float aspect, const float nearZ, const float farZ)
+{
+    float yscale = 1 / tanf(fovY * 0.5f); // 1 / tan == cot
+    float xscale = yscale / aspect;
+    float q = -farZ / (farZ - nearZ);
+    
+    simd_float4x4 m = {
+        .columns[0] = { xscale, 0, 0, 0 },
+        .columns[1] = { 0, yscale, 0, 0 },
+        .columns[2] = { 0, 0, q, -1 },
+        .columns[3] = { 0, 0, q * nearZ, 0 }
+    };
+    
+    return m;
+}
+
+GLTFDataDimension GLTFDataDimensionForName(NSString *name) {
+    if ([name isEqualToString:@"SCALAR"]) {
+        return GLTFDataDimensionScalar;
+    } else if ([name isEqualToString:@"VEC2"]) {
+        return GLTFDataDimensionVector2;
+    } else if ([name isEqualToString:@"VEC2"]) {
+        return GLTFDataDimensionVector2;
+    } else if ([name isEqualToString:@"VEC3"]) {
+        return GLTFDataDimensionVector3;
+    } else if ([name isEqualToString:@"VEC4"]) {
+        return GLTFDataDimensionVector4;
+    } else if ([name isEqualToString:@"MAT2"]) {
+        return GLTFDataDimensionMatrix2x2;
+    } else if ([name isEqualToString:@"MAT3"]) {
+        return GLTFDataDimensionMatrix3x3;
+    } else if ([name isEqualToString:@"MAT4"]) {
+        return GLTFDataDimensionMatrix4x4;
+    }
+    
+    return -1;
+}
+
+size_t GLTFSizeOfDataType(GLTFDataType type) {
+    
+    switch (type) {
+        case GLTFDataTypeChar:      return sizeof(char);
+        case GLTFDataTypeUChar:     return sizeof(unsigned char);
+        case GLTFDataTypeShort:     return sizeof(short);
+        case GLTFDataTypeUShort:    return sizeof(unsigned short);
+        case GLTFDataTypeInt:       return sizeof(int);
+        case GLTFDataTypeUInt:      return sizeof(unsigned int);
+        case GLTFDataTypeFloat:     return sizeof(float);
+        case GLTFDataTypeFloat2:    return sizeof(float) * 2;
+        case GLTFDataTypeFloat3:    return sizeof(float) * 3;
+        case GLTFDataTypeFloat4:    return sizeof(float) * 4;
+        case GLTFDataTypeInt2:      return sizeof(int)   * 2;
+        case GLTFDataTypeInt3:      return sizeof(int)   * 3;
+        case GLTFDataTypeInt4:      return sizeof(int)   * 4;
+        case GLTFDataTypeBool:      return sizeof(bool);
+        case GLTFDataTypeBool2:     return sizeof(bool)  * 2;
+        case GLTFDataTypeBool3:     return sizeof(bool)  * 3;
+        case GLTFDataTypeBool4:     return sizeof(bool)  * 4;
+        case GLTFDataTypeFloat2x2:  return sizeof(float) * 4;
+        case GLTFDataTypeFloat3x3:  return sizeof(float) * 9;
+        case GLTFDataTypeFloat4x4:  return sizeof(float) * 16;
+        case GLTFDataTypeSampler2D: return sizeof(size_t);
+        default:                    return 0;
+    }
+}
+
+size_t GLTFSizeOfComponentTypeWithDimension(GLTFDataType baseType, GLTFDataDimension dimension)
+{
+    switch (baseType) {
+        case GLTFDataTypeChar:
+        case GLTFDataTypeUChar:
+            switch (dimension) {
+                case GLTFDataDimensionVector2:
+                    return 2;
+                case GLTFDataDimensionVector3:
+                    return 3;
+                case GLTFDataDimensionVector4:
+                    return 4;
+                default:
+                    break;
+            }
+        case GLTFDataTypeShort:
+        case GLTFDataTypeUShort:
+            switch (dimension) {
+                case GLTFDataDimensionVector2:
+                    return 4;
+                case GLTFDataDimensionVector3:
+                    return 6;
+                case GLTFDataDimensionVector4:
+                    return 8;
+                default:
+                    break;
+            }
+        case GLTFDataTypeInt:
+        case GLTFDataTypeUInt:
+        case GLTFDataTypeFloat:
+            switch (dimension) {
+                case GLTFDataDimensionScalar:
+                    return 4;
+                case GLTFDataDimensionVector2:
+                    return 8;
+                case GLTFDataDimensionVector3:
+                    return 12;
+                case GLTFDataDimensionVector4:
+                case GLTFDataDimensionMatrix2x2:
+                    return 16;
+                case GLTFDataDimensionMatrix3x3:
+                    return 36;
+                case GLTFDataDimensionMatrix4x4:
+                    return 64;
+                default:
+                    break;
+            }
+        default:
+            break;
+    }
+    return 0;
+}
+
+NSInteger GLTFComponentCountForDimension(GLTFDataDimension dimension) {
+    switch (dimension) {
+        case GLTFDataDimensionScalar:
+            return 1;
+        case GLTFDataDimensionVector2:
+            return 2;
+        case GLTFDataDimensionVector3:
+            return 3;
+        case GLTFDataDimensionVector4:
+            return 4;
+        case GLTFDataDimensionMatrix2x2:
+            return 4;
+        case GLTFDataDimensionMatrix3x3:
+            return 9;
+        case GLTFDataDimensionMatrix4x4:
+            return 16;
+        default:
+            return 0;
+    }
+}
+
+BOOL GLTFDataTypeComponentsAreFloats(GLTFDataType type) {
+    switch (type) {
+        case GLTFDataTypeFloat:
+        case GLTFDataTypeFloat2:
+        case GLTFDataTypeFloat3:
+        case GLTFDataTypeFloat4:
+        case GLTFDataTypeFloat2x2:
+        case GLTFDataTypeFloat3x3:
+        case GLTFDataTypeFloat4x4:
+            return YES;
+        default:
+            return NO;
+    }
+}
+
+simd_float2 GLTFVectorFloat2FromArray(NSArray *array) {
+    return (simd_float2){ [array[0] floatValue], [array[1] floatValue] };
+}
+
+simd_float3 GLTFVectorFloat3FromArray(NSArray *array) {
+    return (simd_float3){ [array[0] floatValue], [array[1] floatValue], [array[2] floatValue] };
+}
+
+simd_float4 GLTFVectorFloat4FromArray(NSArray *array) {
+    return (simd_float4){ [array[0] floatValue], [array[1] floatValue], [array[2] floatValue], [array[3] floatValue] };
+}
+
+GLTFQuaternion GLTFQuaternionFromArray(NSArray *array) {
+    return simd_quaternion([array[0] floatValue], [array[1] floatValue], [array[2] floatValue], [array[3] floatValue]);
+}
+
+simd_float4x4 GLTFMatrixFloat4x4FromArray(NSArray *array) {
+    return (simd_float4x4){ {
+        {  [array[0] floatValue],  [array[1] floatValue],  [array[2] floatValue],  [array[3] floatValue] },
+        {  [array[4] floatValue],  [array[5] floatValue],  [array[6] floatValue],  [array[7] floatValue] },
+        {  [array[8] floatValue],  [array[9] floatValue], [array[10] floatValue], [array[11] floatValue] },
+        { [array[12] floatValue], [array[13] floatValue], [array[14] floatValue], [array[15] floatValue] }
+    } };
+}
diff --git a/gtlf/GLTF/Source/GLTFVertexDescriptor.m b/gtlf/GLTF/Source/GLTFVertexDescriptor.m
new file mode 100644
index 00000000..599d8706
--- /dev/null
+++ b/gtlf/GLTF/Source/GLTFVertexDescriptor.m
@@ -0,0 +1,71 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import "GLTFVertexDescriptor.h"
+
+const NSInteger GLTFVertexDescriptorMaxAttributeCount = 16;
+const NSInteger GLTFVertexDescriptorMaxBufferLayoutCount = 16;
+
+NSString *const GLTFAttributeSemanticPosition  = @"POSITION";
+NSString *const GLTFAttributeSemanticTangent   = @"TANGENT";
+NSString *const GLTFAttributeSemanticNormal    = @"NORMAL";
+NSString *const GLTFAttributeSemanticTexCoord0 = @"TEXCOORD_0";
+NSString *const GLTFAttributeSemanticTexCoord1 = @"TEXCOORD_1";
+NSString *const GLTFAttributeSemanticColor0    = @"COLOR_0";
+NSString *const GLTFAttributeSemanticJoints0   = @"JOINTS_0";
+NSString *const GLTFAttributeSemanticJoints1   = @"JOINTS_1";
+NSString *const GLTFAttributeSemanticWeights0  = @"WEIGHTS_0";
+NSString *const GLTFAttributeSemanticWeights1  = @"WEIGHTS_1";
+NSString *const GLTFAttributeSemanticRoughness = @"ROUGHNESS";
+NSString *const GLTFAttributeSemanticMetallic  = @"METALLIC";
+
+@implementation GLTFVertexAttribute
+
+- (NSString *)description {
+    return [NSString stringWithFormat:@"GLTFVertexAttribute: component type: %d, count: %d, offset %d [[%@]]",
+            (int)self.componentType, (int)self.dimension, (int)self.offset, self.semantic];
+}
+
+@end
+
+@implementation GLTFBufferLayout
+@end
+
+@implementation GLTFVertexDescriptor
+
+- (instancetype)init {
+    if ((self = [super init])) {
+        NSMutableArray *mutableAttributes = [NSMutableArray arrayWithCapacity:GLTFVertexDescriptorMaxAttributeCount];
+        for (int i = 0; i < GLTFVertexDescriptorMaxAttributeCount; ++i) {
+            [mutableAttributes addObject: [GLTFVertexAttribute new]];
+        }
+        _attributes = [mutableAttributes copy];
+        
+        NSMutableArray *mutableLayouts = [NSMutableArray arrayWithCapacity:GLTFVertexDescriptorMaxBufferLayoutCount];
+        for (int i = 0; i < GLTFVertexDescriptorMaxBufferLayoutCount; ++i) {
+            [mutableLayouts addObject: [GLTFBufferLayout new]];
+        }
+        _bufferLayouts = [mutableLayouts copy];
+    }
+    return self;
+}
+
+- (NSString *)description {
+    return [NSString stringWithFormat:@"GLTFVertexDescriptor:\nattributes: %@\nlayouts: %@",
+            self.attributes, self.bufferLayouts];
+}
+
+@end
diff --git a/gtlf/GLTFMTL/GLTFMTL.h b/gtlf/GLTFMTL/GLTFMTL.h
new file mode 100644
index 00000000..35bc6931
--- /dev/null
+++ b/gtlf/GLTFMTL/GLTFMTL.h
@@ -0,0 +1,36 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import <TargetConditionals.h>
+
+#if TARGET_OS_OSX
+@import Cocoa;
+#elif TARGET_OS_IOS
+@import UIKit;
+#endif
+
+//! Project version number for GLTFMTL.
+FOUNDATION_EXPORT double GLTFMTLVersionNumber;
+
+//! Project version string for GLTFMTL.
+FOUNDATION_EXPORT const unsigned char GLTFMTLVersionString[];
+
+#import <GLTFMTL/GLTFMTLBufferAllocator.h>
+#import <GLTFMTL/GLTFMTLTextureLoader.h>
+#import <GLTFMTL/GLTFMTLLightingEnvironment.h>
+#import <GLTFMTL/GLTFMTLRenderer.h>
+#import <GLTFMTL/GLTFMTLShaderBuilder.h>
+#import <GLTFMTL/GLTFMTLUtilities.h>
diff --git a/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj b/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj
new file mode 100644
index 00000000..ebc83f38
--- /dev/null
+++ b/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj
@@ -0,0 +1,399 @@
+// !$*UTF8*$!
+{
+	archiveVersion = 1;
+	classes = {
+	};
+	objectVersion = 46;
+	objects = {
+
+/* Begin PBXBuildFile section */
+		839945CB1F641E9000642E68 /* GLTFMTLLightingEnvironment.h in Headers */ = {isa = PBXBuildFile; fileRef = 839945C91F641E9000642E68 /* GLTFMTLLightingEnvironment.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		839945CC1F641E9000642E68 /* GLTFMTLLightingEnvironment.m in Sources */ = {isa = PBXBuildFile; fileRef = 839945CA1F641E9000642E68 /* GLTFMTLLightingEnvironment.m */; };
+		83AF30CC1FC4DB4D00053BED /* GLTFMTLTextureLoader.h in Headers */ = {isa = PBXBuildFile; fileRef = 83AF30CA1FC4DB4D00053BED /* GLTFMTLTextureLoader.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		83AF30CD1FC4DB4D00053BED /* GLTFMTLTextureLoader.m in Sources */ = {isa = PBXBuildFile; fileRef = 83AF30CB1FC4DB4D00053BED /* GLTFMTLTextureLoader.m */; };
+		83D600361F48C24F00F71E0C /* GLTF.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 83D600351F48C24F00F71E0C /* GLTF.framework */; };
+		83D600381F48C25500F71E0C /* MetalKit.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 83D600371F48C25500F71E0C /* MetalKit.framework */; };
+		83D6003C1F48C55C00F71E0C /* GLTFMTL.h in Headers */ = {isa = PBXBuildFile; fileRef = 83D6FFD81F48BDFB00F71E0C /* GLTFMTL.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		83D6FFD01F48BDE700F71E0C /* GLTFMTLBufferAllocator.h in Headers */ = {isa = PBXBuildFile; fileRef = 83D6FFC71F48BDE700F71E0C /* GLTFMTLBufferAllocator.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		83D6FFD11F48BDE700F71E0C /* GLTFMTLShaderBuilder.h in Headers */ = {isa = PBXBuildFile; fileRef = 83D6FFC81F48BDE700F71E0C /* GLTFMTLShaderBuilder.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		83D6FFD21F48BDE700F71E0C /* GLTFMTLUtilities.h in Headers */ = {isa = PBXBuildFile; fileRef = 83D6FFC91F48BDE700F71E0C /* GLTFMTLUtilities.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		83D6FFD31F48BDE700F71E0C /* GLTFMTLRenderer.h in Headers */ = {isa = PBXBuildFile; fileRef = 83D6FFCA1F48BDE700F71E0C /* GLTFMTLRenderer.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		83D6FFD41F48BDE700F71E0C /* GLTFMTLBufferAllocator.m in Sources */ = {isa = PBXBuildFile; fileRef = 83D6FFCC1F48BDE700F71E0C /* GLTFMTLBufferAllocator.m */; };
+		83D6FFD51F48BDE700F71E0C /* GLTFMTLShaderBuilder.m in Sources */ = {isa = PBXBuildFile; fileRef = 83D6FFCD1F48BDE700F71E0C /* GLTFMTLShaderBuilder.m */; };
+		83D6FFD61F48BDE700F71E0C /* GLTFMTLUtilities.m in Sources */ = {isa = PBXBuildFile; fileRef = 83D6FFCE1F48BDE700F71E0C /* GLTFMTLUtilities.m */; };
+		83D6FFD71F48BDE700F71E0C /* GLTFMTLRenderer.m in Sources */ = {isa = PBXBuildFile; fileRef = 83D6FFCF1F48BDE700F71E0C /* GLTFMTLRenderer.m */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXFileReference section */
+		839945C91F641E9000642E68 /* GLTFMTLLightingEnvironment.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = GLTFMTLLightingEnvironment.h; sourceTree = "<group>"; };
+		839945CA1F641E9000642E68 /* GLTFMTLLightingEnvironment.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = GLTFMTLLightingEnvironment.m; sourceTree = "<group>"; };
+		83AF30CA1FC4DB4D00053BED /* GLTFMTLTextureLoader.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = GLTFMTLTextureLoader.h; sourceTree = "<group>"; };
+		83AF30CB1FC4DB4D00053BED /* GLTFMTLTextureLoader.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = GLTFMTLTextureLoader.m; sourceTree = "<group>"; };
+		83D600351F48C24F00F71E0C /* GLTF.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = GLTF.framework; path = "../../../../../Library/Developer/Xcode/DerivedData/glTF-hfjwsbtojkmckmakuumesfrunwqe/Build/Products/Debug/GLTF.framework"; sourceTree = "<group>"; };
+		83D600371F48C25500F71E0C /* MetalKit.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = MetalKit.framework; path = System/Library/Frameworks/MetalKit.framework; sourceTree = SDKROOT; };
+		83D6FFB11F48BCB500F71E0C /* GLTFMTL.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = GLTFMTL.framework; sourceTree = BUILT_PRODUCTS_DIR; };
+		83D6FFC71F48BDE700F71E0C /* GLTFMTLBufferAllocator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = GLTFMTLBufferAllocator.h; sourceTree = "<group>"; };
+		83D6FFC81F48BDE700F71E0C /* GLTFMTLShaderBuilder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = GLTFMTLShaderBuilder.h; sourceTree = "<group>"; };
+		83D6FFC91F48BDE700F71E0C /* GLTFMTLUtilities.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = GLTFMTLUtilities.h; sourceTree = "<group>"; };
+		83D6FFCA1F48BDE700F71E0C /* GLTFMTLRenderer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = GLTFMTLRenderer.h; sourceTree = "<group>"; };
+		83D6FFCC1F48BDE700F71E0C /* GLTFMTLBufferAllocator.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = GLTFMTLBufferAllocator.m; sourceTree = "<group>"; };
+		83D6FFCD1F48BDE700F71E0C /* GLTFMTLShaderBuilder.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = GLTFMTLShaderBuilder.m; sourceTree = "<group>"; };
+		83D6FFCE1F48BDE700F71E0C /* GLTFMTLUtilities.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = GLTFMTLUtilities.m; sourceTree = "<group>"; };
+		83D6FFCF1F48BDE700F71E0C /* GLTFMTLRenderer.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = GLTFMTLRenderer.m; sourceTree = "<group>"; };
+		83D6FFD81F48BDFB00F71E0C /* GLTFMTL.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = GLTFMTL.h; sourceTree = "<group>"; };
+		83D6FFD91F48BDFB00F71E0C /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+		83D6FFAD1F48BCB500F71E0C /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				83D600381F48C25500F71E0C /* MetalKit.framework in Frameworks */,
+				83D600361F48C24F00F71E0C /* GLTF.framework in Frameworks */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+		83D600341F48C24F00F71E0C /* Frameworks */ = {
+			isa = PBXGroup;
+			children = (
+				83D600371F48C25500F71E0C /* MetalKit.framework */,
+				83D600351F48C24F00F71E0C /* GLTF.framework */,
+			);
+			name = Frameworks;
+			sourceTree = "<group>";
+		};
+		83D6FFA71F48BCB500F71E0C = {
+			isa = PBXGroup;
+			children = (
+				83D6FFC61F48BDE700F71E0C /* Headers */,
+				83D6FFCB1F48BDE700F71E0C /* Source */,
+				83D6FFD81F48BDFB00F71E0C /* GLTFMTL.h */,
+				83D6FFD91F48BDFB00F71E0C /* Info.plist */,
+				83D6FFB21F48BCB500F71E0C /* Products */,
+				83D600341F48C24F00F71E0C /* Frameworks */,
+			);
+			sourceTree = "<group>";
+		};
+		83D6FFB21F48BCB500F71E0C /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				83D6FFB11F48BCB500F71E0C /* GLTFMTL.framework */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
+		83D6FFC61F48BDE700F71E0C /* Headers */ = {
+			isa = PBXGroup;
+			children = (
+				83D6FFC71F48BDE700F71E0C /* GLTFMTLBufferAllocator.h */,
+				839945C91F641E9000642E68 /* GLTFMTLLightingEnvironment.h */,
+				83D6FFCA1F48BDE700F71E0C /* GLTFMTLRenderer.h */,
+				83D6FFC81F48BDE700F71E0C /* GLTFMTLShaderBuilder.h */,
+				83AF30CA1FC4DB4D00053BED /* GLTFMTLTextureLoader.h */,
+				83D6FFC91F48BDE700F71E0C /* GLTFMTLUtilities.h */,
+			);
+			path = Headers;
+			sourceTree = "<group>";
+		};
+		83D6FFCB1F48BDE700F71E0C /* Source */ = {
+			isa = PBXGroup;
+			children = (
+				83D6FFCC1F48BDE700F71E0C /* GLTFMTLBufferAllocator.m */,
+				839945CA1F641E9000642E68 /* GLTFMTLLightingEnvironment.m */,
+				83D6FFCF1F48BDE700F71E0C /* GLTFMTLRenderer.m */,
+				83D6FFCD1F48BDE700F71E0C /* GLTFMTLShaderBuilder.m */,
+				83AF30CB1FC4DB4D00053BED /* GLTFMTLTextureLoader.m */,
+				83D6FFCE1F48BDE700F71E0C /* GLTFMTLUtilities.m */,
+			);
+			path = Source;
+			sourceTree = "<group>";
+		};
+/* End PBXGroup section */
+
+/* Begin PBXHeadersBuildPhase section */
+		83D6FFAE1F48BCB500F71E0C /* Headers */ = {
+			isa = PBXHeadersBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				83D6FFD31F48BDE700F71E0C /* GLTFMTLRenderer.h in Headers */,
+				83D6FFD01F48BDE700F71E0C /* GLTFMTLBufferAllocator.h in Headers */,
+				839945CB1F641E9000642E68 /* GLTFMTLLightingEnvironment.h in Headers */,
+				83D6FFD11F48BDE700F71E0C /* GLTFMTLShaderBuilder.h in Headers */,
+				83AF30CC1FC4DB4D00053BED /* GLTFMTLTextureLoader.h in Headers */,
+				83D6FFD21F48BDE700F71E0C /* GLTFMTLUtilities.h in Headers */,
+				83D6003C1F48C55C00F71E0C /* GLTFMTL.h in Headers */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXHeadersBuildPhase section */
+
+/* Begin PBXNativeTarget section */
+		83D6FFB01F48BCB500F71E0C /* GLTFMTL */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 83D6FFB91F48BCB500F71E0C /* Build configuration list for PBXNativeTarget "GLTFMTL" */;
+			buildPhases = (
+				83D6FFAC1F48BCB500F71E0C /* Sources */,
+				83D6FFAD1F48BCB500F71E0C /* Frameworks */,
+				83D6FFAE1F48BCB500F71E0C /* Headers */,
+				83D6FFAF1F48BCB500F71E0C /* Resources */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = GLTFMTL;
+			productName = GLTFMTL;
+			productReference = 83D6FFB11F48BCB500F71E0C /* GLTFMTL.framework */;
+			productType = "com.apple.product-type.framework";
+		};
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+		83D6FFA81F48BCB500F71E0C /* Project object */ = {
+			isa = PBXProject;
+			attributes = {
+				LastUpgradeCheck = 0930;
+				ORGANIZATIONNAME = "Warren Moore";
+				TargetAttributes = {
+					83D6FFB01F48BCB500F71E0C = {
+						CreatedOnToolsVersion = 8.3.3;
+						ProvisioningStyle = Automatic;
+					};
+				};
+			};
+			buildConfigurationList = 83D6FFAB1F48BCB500F71E0C /* Build configuration list for PBXProject "GLTFMTL" */;
+			compatibilityVersion = "Xcode 3.2";
+			developmentRegion = English;
+			hasScannedForEncodings = 0;
+			knownRegions = (
+				en,
+			);
+			mainGroup = 83D6FFA71F48BCB500F71E0C;
+			productRefGroup = 83D6FFB21F48BCB500F71E0C /* Products */;
+			projectDirPath = "";
+			projectRoot = "";
+			targets = (
+				83D6FFB01F48BCB500F71E0C /* GLTFMTL */,
+			);
+		};
+/* End PBXProject section */
+
+/* Begin PBXResourcesBuildPhase section */
+		83D6FFAF1F48BCB500F71E0C /* Resources */ = {
+			isa = PBXResourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXResourcesBuildPhase section */
+
+/* Begin PBXSourcesBuildPhase section */
+		83D6FFAC1F48BCB500F71E0C /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				83D6FFD41F48BDE700F71E0C /* GLTFMTLBufferAllocator.m in Sources */,
+				839945CC1F641E9000642E68 /* GLTFMTLLightingEnvironment.m in Sources */,
+				83D6FFD71F48BDE700F71E0C /* GLTFMTLRenderer.m in Sources */,
+				83D6FFD61F48BDE700F71E0C /* GLTFMTLUtilities.m in Sources */,
+				83D6FFD51F48BDE700F71E0C /* GLTFMTLShaderBuilder.m in Sources */,
+				83AF30CD1FC4DB4D00053BED /* GLTFMTLTextureLoader.m in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXSourcesBuildPhase section */
+
+/* Begin XCBuildConfiguration section */
+		83D6FFB71F48BCB500F71E0C /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_ANALYZER_NONNULL = YES;
+				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x";
+				CLANG_CXX_LIBRARY = "libc++";
+				CLANG_ENABLE_MODULES = YES;
+				CLANG_ENABLE_OBJC_ARC = YES;
+				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_COMMA = YES;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INFINITE_RECURSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+				CLANG_WARN_STRICT_PROTOTYPES = YES;
+				CLANG_WARN_SUSPICIOUS_MOVE = YES;
+				CLANG_WARN_UNREACHABLE_CODE = YES;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				CODE_SIGN_IDENTITY = "-";
+				COPY_PHASE_STRIP = NO;
+				CURRENT_PROJECT_VERSION = 1;
+				DEBUG_INFORMATION_FORMAT = dwarf;
+				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				ENABLE_TESTABILITY = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu99;
+				GCC_DYNAMIC_NO_PIC = NO;
+				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_OPTIMIZATION_LEVEL = 0;
+				GCC_PREPROCESSOR_DEFINITIONS = (
+					"DEBUG=1",
+					"$(inherited)",
+				);
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				IPHONEOS_DEPLOYMENT_TARGET = 10.0;
+				MACOSX_DEPLOYMENT_TARGET = 10.12;
+				MTL_ENABLE_DEBUG_INFO = YES;
+				ONLY_ACTIVE_ARCH = YES;
+				SDKROOT = macosx;
+				SUPPORTED_PLATFORMS = "macosx iphoneos";
+				VALID_ARCHS = "i386 x86_64 armv7s armv7 arm64";
+				VERSIONING_SYSTEM = "apple-generic";
+				VERSION_INFO_PREFIX = "";
+			};
+			name = Debug;
+		};
+		83D6FFB81F48BCB500F71E0C /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_ANALYZER_NONNULL = YES;
+				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x";
+				CLANG_CXX_LIBRARY = "libc++";
+				CLANG_ENABLE_MODULES = YES;
+				CLANG_ENABLE_OBJC_ARC = YES;
+				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_COMMA = YES;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INFINITE_RECURSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+				CLANG_WARN_STRICT_PROTOTYPES = YES;
+				CLANG_WARN_SUSPICIOUS_MOVE = YES;
+				CLANG_WARN_UNREACHABLE_CODE = YES;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				CODE_SIGN_IDENTITY = "-";
+				COPY_PHASE_STRIP = NO;
+				CURRENT_PROJECT_VERSION = 1;
+				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+				ENABLE_NS_ASSERTIONS = NO;
+				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu99;
+				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				IPHONEOS_DEPLOYMENT_TARGET = 10.0;
+				MACOSX_DEPLOYMENT_TARGET = 10.12;
+				MTL_ENABLE_DEBUG_INFO = NO;
+				SDKROOT = macosx;
+				SUPPORTED_PLATFORMS = "macosx iphoneos";
+				VALID_ARCHS = "i386 x86_64 armv7s armv7 arm64";
+				VERSIONING_SYSTEM = "apple-generic";
+				VERSION_INFO_PREFIX = "";
+			};
+			name = Release;
+		};
+		83D6FFBA1F48BCB500F71E0C /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				CLANG_ENABLE_OBJC_WEAK = YES;
+				CODE_SIGN_IDENTITY = "";
+				COMBINE_HIDPI_IMAGES = YES;
+				DEFINES_MODULE = YES;
+				DEVELOPMENT_TEAM = "";
+				DYLIB_COMPATIBILITY_VERSION = 1;
+				DYLIB_CURRENT_VERSION = 1;
+				DYLIB_INSTALL_NAME_BASE = "@rpath";
+				FRAMEWORK_VERSION = A;
+				INFOPLIST_FILE = "$(SRCROOT)/Info.plist";
+				INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks";
+				LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/../Frameworks @loader_path/Frameworks";
+				PRODUCT_BUNDLE_IDENTIFIER = net.warrenmoore.GLTFMTL;
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				SKIP_INSTALL = YES;
+				SUPPORTED_PLATFORMS = "macosx iphoneos iphonesimulator";
+				VALID_ARCHS = "i386 x86_64 arm64 armv7 armv7s";
+			};
+			name = Debug;
+		};
+		83D6FFBB1F48BCB500F71E0C /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				CLANG_ENABLE_OBJC_WEAK = YES;
+				CODE_SIGN_IDENTITY = "";
+				COMBINE_HIDPI_IMAGES = YES;
+				DEFINES_MODULE = YES;
+				DEVELOPMENT_TEAM = "";
+				DYLIB_COMPATIBILITY_VERSION = 1;
+				DYLIB_CURRENT_VERSION = 1;
+				DYLIB_INSTALL_NAME_BASE = "@rpath";
+				FRAMEWORK_VERSION = A;
+				INFOPLIST_FILE = "$(SRCROOT)/Info.plist";
+				INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks";
+				LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/../Frameworks @loader_path/Frameworks";
+				PRODUCT_BUNDLE_IDENTIFIER = net.warrenmoore.GLTFMTL;
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				SKIP_INSTALL = YES;
+				SUPPORTED_PLATFORMS = "macosx iphoneos iphonesimulator";
+				VALID_ARCHS = "i386 x86_64 arm64 armv7 armv7s";
+			};
+			name = Release;
+		};
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+		83D6FFAB1F48BCB500F71E0C /* Build configuration list for PBXProject "GLTFMTL" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				83D6FFB71F48BCB500F71E0C /* Debug */,
+				83D6FFB81F48BCB500F71E0C /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		83D6FFB91F48BCB500F71E0C /* Build configuration list for PBXNativeTarget "GLTFMTL" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				83D6FFBA1F48BCB500F71E0C /* Debug */,
+				83D6FFBB1F48BCB500F71E0C /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+/* End XCConfigurationList section */
+	};
+	rootObject = 83D6FFA81F48BCB500F71E0C /* Project object */;
+}
diff --git a/gtlf/GLTFMTL/Headers/GLTFMTLBufferAllocator.h b/gtlf/GLTFMTL/Headers/GLTFMTLBufferAllocator.h
new file mode 100644
index 00000000..f62c4025
--- /dev/null
+++ b/gtlf/GLTFMTL/Headers/GLTFMTLBufferAllocator.h
@@ -0,0 +1,36 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import <GLTF/GLTF.h>
+
+@import Foundation;
+@import Metal;
+
+NS_ASSUME_NONNULL_BEGIN
+
+@interface GLTFMTLBuffer : NSObject<GLTFBuffer>
+
+@property (nonatomic, readonly) id<MTLBuffer> buffer;
+
+@end
+
+@interface GLTFMTLBufferAllocator : NSObject<GLTFBufferAllocator>
+
+- (instancetype)initWithDevice:(id<MTLDevice>)device;
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/gtlf/GLTFMTL/Headers/GLTFMTLLightingEnvironment.h b/gtlf/GLTFMTL/Headers/GLTFMTLLightingEnvironment.h
new file mode 100644
index 00000000..6045a5fc
--- /dev/null
+++ b/gtlf/GLTFMTL/Headers/GLTFMTLLightingEnvironment.h
@@ -0,0 +1,42 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import <GLTF/GLTF.h>
+
+@import Foundation;
+@import Metal;
+
+NS_ASSUME_NONNULL_BEGIN
+
+@interface GLTFMTLLightingEnvironment : NSObject
+
+@property (nonatomic, retain) id<MTLTexture> environmentCube;
+@property (nonatomic, retain) id<MTLTexture> diffuseCube;
+@property (nonatomic, retain) id<MTLTexture> specularCube;
+@property (nonatomic, retain) id<MTLTexture> brdfLUT;
+
+@property (nonatomic, assign) float intensity;
+@property (nonatomic, readonly, assign) int specularMipLevelCount;
+
+- (instancetype)initWithLibrary:(id<MTLLibrary>)library;
+
+- (void)generateFromCubeTexture:(id<MTLTexture>)environmentCubeSource commandBuffer:(id<MTLCommandBuffer>)commandBuffer;
+
+- (void)generateFromEquirectTexture:(id<MTLTexture>)environmentEquirectSource commandBuffer:(id<MTLCommandBuffer>)commandBuffer;
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/gtlf/GLTFMTL/Headers/GLTFMTLRenderer.h b/gtlf/GLTFMTL/Headers/GLTFMTLRenderer.h
new file mode 100644
index 00000000..d0030b2d
--- /dev/null
+++ b/gtlf/GLTFMTL/Headers/GLTFMTLRenderer.h
@@ -0,0 +1,56 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import <GLTF/GLTF.h>
+#import "GLTFMTLTextureLoader.h"
+
+@import Foundation;
+@import Metal;
+
+NS_ASSUME_NONNULL_BEGIN
+
+#define GLTFMTLRendererMaxInflightFrames 3
+
+@class GLTFMTLLightingEnvironment;
+
+@interface GLTFMTLRenderer : NSObject
+
+@property (nonatomic, assign) CGSize drawableSize;
+
+@property (nonatomic, assign) simd_float4x4 viewMatrix;
+@property (nonatomic, assign) simd_float4x4 projectionMatrix;
+
+@property (nonatomic, assign) int sampleCount;
+@property (nonatomic, assign) MTLPixelFormat colorPixelFormat;
+@property (nonatomic, assign) MTLPixelFormat depthStencilPixelFormat;
+
+@property (nonatomic, strong) GLTFMTLLightingEnvironment * _Nullable lightingEnvironment;
+@property (nonatomic, strong) id<IGLTFMTLTextureLoader> _Nullable textureLoader;
+
+- (instancetype)initWithDevice:(id<MTLDevice>)device;
+
+- (void)renderScene:(GLTFScene *)scene
+      commandBuffer:(id<MTLCommandBuffer>)commandBuffer
+     commandEncoder:(id<MTLRenderCommandEncoder>)renderEncoder;
+
+- (void)signalFrameCompletion;
+
+/// call this before loading a new asset
+- (void)releaseAllResources;
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/gtlf/GLTFMTL/Headers/GLTFMTLShaderBuilder.h b/gtlf/GLTFMTL/Headers/GLTFMTLShaderBuilder.h
new file mode 100644
index 00000000..d78bd96c
--- /dev/null
+++ b/gtlf/GLTFMTL/Headers/GLTFMTLShaderBuilder.h
@@ -0,0 +1,42 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import <GLTF/GLTF.h>
+#import "GLTFMTLUtilities.h"
+
+@import Metal;
+
+#define GLTFMTLMaximumLightCount 3
+
+// Maximum number of textures supplied by a material; excludes IBL textures, etc.
+#define GLTFMTLMaximumTextureCount (GLTFTextureBindIndexEmissive + 1)
+
+NS_ASSUME_NONNULL_BEGIN
+
+@class GLTFMTLLightingEnvironment;
+
+@interface GLTFMTLShaderBuilder : NSObject
+
+- (id<MTLRenderPipelineState>)renderPipelineStateForSubmesh:(GLTFSubmesh *)submesh
+                                        lightingEnvironment:(GLTFMTLLightingEnvironment * _Nullable)lightingEnvironment
+                                           colorPixelFormat:(MTLPixelFormat)colorPixelFormat
+                                    depthStencilPixelFormat:(MTLPixelFormat)depthStencilPixelFormat
+                                                sampleCount:(int)sampleCount
+                                                     device:(id<MTLDevice>)device;
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/gtlf/GLTFMTL/Headers/GLTFMTLTextureLoader.h b/gtlf/GLTFMTL/Headers/GLTFMTLTextureLoader.h
new file mode 100644
index 00000000..92d9f0ab
--- /dev/null
+++ b/gtlf/GLTFMTL/Headers/GLTFMTLTextureLoader.h
@@ -0,0 +1,39 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import <GLTF/GLTF.h>
+
+@import Metal;
+
+NS_ASSUME_NONNULL_BEGIN
+
+extern NSString *const GLTFMTLTextureLoaderOptionGenerateMipmaps;
+extern NSString *const GLTFMTLTextureLoaderOptionUsageFlags;
+extern NSString *const GLTFMTLTextureLoaderOptionSRGB;
+
+@protocol IGLTFMTLTextureLoader <NSObject>
+//- (instancetype)initWithDevice:(id<MTLDevice>)device;
+- (id<MTLTexture> _Nullable)newTextureWithContentsOfURL:(NSURL *)url options:(NSDictionary * _Nullable)options error:(NSError **)error;
+- (id<MTLTexture> _Nullable)newTextureWithData:(NSData *)data options:(NSDictionary * _Nullable)options error:(NSError **)error;
+@end
+
+@interface GLTFMTLTextureLoader : NSObject <IGLTFMTLTextureLoader>
+- (instancetype)initWithDevice:(id<MTLDevice>)device;
+- (id<MTLTexture> _Nullable)newTextureWithContentsOfURL:(NSURL *)url options:(NSDictionary * _Nullable)options error:(NSError **)error;
+- (id<MTLTexture> _Nullable)newTextureWithData:(NSData *)data options:(NSDictionary * _Nullable)options error:(NSError **)error;
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/gtlf/GLTFMTL/Headers/GLTFMTLUtilities.h b/gtlf/GLTFMTL/Headers/GLTFMTLUtilities.h
new file mode 100644
index 00000000..3338443e
--- /dev/null
+++ b/gtlf/GLTFMTL/Headers/GLTFMTLUtilities.h
@@ -0,0 +1,57 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import <GLTF/GLTF.h>
+
+@import Metal;
+@import simd;
+
+NS_ASSUME_NONNULL_BEGIN
+
+typedef NS_ENUM(NSInteger, GLTFMTLTextureBindIndex) {
+    GLTFTextureBindIndexBaseColor,
+    GLTFTextureBindIndexNormal,
+    GLTFTextureBindIndexMetallicRoughness,
+    GLTFTextureBindIndexOcclusion,
+    GLTFTextureBindIndexEmissive,
+    GLTFTextureBindIndexDiffuseEnvironment,
+    GLTFTextureBindIndexSpecularEnvironment,
+    GLTFTextureBindIndexBRDFLookup,
+};
+
+extern MTLPrimitiveType GLTFMTLPrimitiveTypeForPrimitiveType(GLTFPrimitiveType gltfType);
+
+extern MTLBlendOperation GLTFMTLBlendOperationForBlendFunction(GLTFBlendFunction f);
+
+extern MTLBlendFactor GLTFBlendFactorForBlendEquation(GLTFBlendEquation e);
+
+extern MTLCompareFunction GLTFMTLCompareFunctionForComparisonFunc(GLTFComparisonFunc f);
+
+extern MTLWinding GLTFMTLWindingForWinding(GLTFWinding w);
+
+extern MTLCullMode GLTFMTLCullModeForCullFace(GLTFFace face);
+
+extern MTLSamplerMinMagFilter GLTFMTLSamplerMinMagFilterForSamplingFilter(GLTFSamplingFilter mode);
+
+extern MTLSamplerMipFilter GLTFMTLSamplerMipFilterForSamplingFilter(GLTFSamplingFilter mode);
+
+extern MTLSamplerAddressMode GLTFMTLSamplerAddressModeForSamplerAddressMode(GLTFAddressMode mode);
+
+extern NSString *GLTFMTLTypeNameForType(GLTFDataType baseType, GLTFDataDimension dimension, BOOL packedIfPossible);
+
+extern MTLVertexFormat GLTFMTLVertexFormatForComponentTypeAndDimension(GLTFDataType baseType, GLTFDataDimension dimension);
+
+NS_ASSUME_NONNULL_END
diff --git a/gtlf/GLTFMTL/Info.plist b/gtlf/GLTFMTL/Info.plist
new file mode 100644
index 00000000..655e3f26
--- /dev/null
+++ b/gtlf/GLTFMTL/Info.plist
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>CFBundleDevelopmentRegion</key>
+	<string>en</string>
+	<key>CFBundleExecutable</key>
+	<string>$(EXECUTABLE_NAME)</string>
+	<key>CFBundleIdentifier</key>
+	<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
+	<key>CFBundleInfoDictionaryVersion</key>
+	<string>6.0</string>
+	<key>CFBundleName</key>
+	<string>$(PRODUCT_NAME)</string>
+	<key>CFBundlePackageType</key>
+	<string>FMWK</string>
+	<key>CFBundleShortVersionString</key>
+	<string>1.0</string>
+	<key>CFBundleVersion</key>
+	<string>$(CURRENT_PROJECT_VERSION)</string>
+	<key>NSHumanReadableCopyright</key>
+	<string>Copyright © 2018 Warren Moore. All rights reserved.</string>
+	<key>NSPrincipalClass</key>
+	<string></string>
+</dict>
+</plist>
diff --git a/gtlf/GLTFMTL/Source/GLTFMTLBufferAllocator.m b/gtlf/GLTFMTL/Source/GLTFMTLBufferAllocator.m
new file mode 100644
index 00000000..335b57ab
--- /dev/null
+++ b/gtlf/GLTFMTL/Source/GLTFMTLBufferAllocator.m
@@ -0,0 +1,101 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import "GLTFMTLBufferAllocator.h"
+
+@import Metal;
+
+static uint64_t _liveAllocationSize;
+
+@interface GLTFMTLBufferAllocator ()
++ (void)incrementLiveAllocationSizeByLength:(uint64_t)length;
++ (void)decrementLiveAllocationSizeByLength:(uint64_t)length;
+@end
+
+@interface GLTFMTLBuffer ()
+@property (nonatomic, strong) id<MTLBuffer> buffer;
+
+- (instancetype)initWithBuffer:(id<MTLBuffer>)buffer;
+
+@end
+
+@implementation GLTFMTLBuffer
+
+@synthesize name;
+@synthesize extras;
+@synthesize extensions;
+
+- (instancetype)initWithBuffer:(id<MTLBuffer>)buffer {
+    if ((self = [super init])) {
+        _buffer = buffer;
+        [GLTFMTLBufferAllocator incrementLiveAllocationSizeByLength:_buffer.length];
+    }
+    return self;
+}
+
+- (void)dealloc {
+    [GLTFMTLBufferAllocator decrementLiveAllocationSizeByLength:_buffer.length];
+}
+
+- (NSInteger)length {
+    return [self.buffer length];
+}
+
+- (void *)contents {
+    return [self.buffer contents];
+}
+
+@end
+
+@interface GLTFMTLBufferAllocator ()
+@property (nonatomic, strong) id<MTLDevice> device;
+@end
+
+@implementation GLTFMTLBufferAllocator
+
++ (void)incrementLiveAllocationSizeByLength:(uint64_t)length {
+    _liveAllocationSize += length;
+}
+
++ (void)decrementLiveAllocationSizeByLength:(uint64_t)length {
+    _liveAllocationSize -= length;
+}
+
++ (uint64_t)liveAllocationSize {
+    return _liveAllocationSize;
+}
+
+- (instancetype)initWithDevice:(id<MTLDevice>)device {
+    if ((self = [super init])) {
+        _device = device;
+    }
+    return self;
+}
+
+- (id<GLTFBuffer>)newBufferWithLength:(NSInteger)length {
+    MTLResourceOptions options = MTLResourceCPUCacheModeDefaultCache | MTLResourceStorageModeShared;
+    id<MTLBuffer> underlying = [self.device newBufferWithLength:length options:options];
+    return [[GLTFMTLBuffer alloc] initWithBuffer:underlying];
+}
+
+
+- (id<GLTFBuffer>)newBufferWithData:(NSData *)data {
+    MTLResourceOptions options = MTLResourceCPUCacheModeDefaultCache | MTLResourceStorageModeShared;
+    id<MTLBuffer> underlying = [self.device newBufferWithBytes:data.bytes length:data.length options:options];
+    return [[GLTFMTLBuffer alloc] initWithBuffer:underlying];
+}
+
+@end
diff --git a/gtlf/GLTFMTL/Source/GLTFMTLLightingEnvironment.m b/gtlf/GLTFMTL/Source/GLTFMTLLightingEnvironment.m
new file mode 100644
index 00000000..b6eecb13
--- /dev/null
+++ b/gtlf/GLTFMTL/Source/GLTFMTLLightingEnvironment.m
@@ -0,0 +1,212 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import "GLTFMTLLightingEnvironment.h"
+#import "GLTFMTLTextureLoader.h"
+
+@import MetalKit;
+
+@interface GLTFMTLLightingEnvironment ()
+@property (nonatomic, strong) id<MTLDevice> device;
+@property (nonatomic, strong) id<MTLLibrary> library;
+//@property (nonatomic, strong) id<MTLCommandQueue> commandQueue;
+@property (nonatomic, strong) id<MTLComputePipelineState> brdfComputePipeline;
+@property (nonatomic, strong) id<MTLComputePipelineState> equirectToCubePipeline;
+@property (nonatomic, strong) id<MTLComputePipelineState> irradiancePipeline;
+@property (nonatomic, strong) id<MTLComputePipelineState> specularPipeline;
+//@property (nonatomic, strong) GLTFMTLTextureLoader *textureLoader;
+@end
+
+@implementation GLTFMTLLightingEnvironment
+
+@synthesize specularMipLevelCount=_specularMipLevelCount;
+
+- (instancetype)initWithLibrary:(id<MTLLibrary>)library
+{
+    //NSParameterAssert(device != nil);
+    
+    if ((self = [super init])) {
+        _intensity = 1;
+        _device = library.device;
+        self.library = library;
+        
+        NSError* error = nil;
+        if (![self _buildPipelineStatesWithError:&error]) {
+            return nil;
+        }
+    }
+    
+    return self;
+}
+
+- (void)generateFromCubeTexture:(id<MTLTexture>)environmentCubeSource commandBuffer:(id<MTLCommandBuffer>)commandBuffer
+{
+    _environmentCube = environmentCubeSource;
+    
+    [self _generateIrradianceCubeMapWithSize:64 fromRadianceCubeMap:_environmentCube commandBuffer:commandBuffer];
+  
+    [self _generateSpecularCubeMapWithSize:256 roughnessLevels:9 fromRadianceCubeMap:_environmentCube commandBuffer:commandBuffer];
+    
+    [self _generateBRDFLookupWithSize:128 commandBuffer:commandBuffer];
+}
+
+- (void)generateFromEquirectTexture:(id<MTLTexture>)environmentEquirectSource commandBuffer:(id<MTLCommandBuffer>)commandBuffer
+{
+    [self _generateEnvironmentCubeMapWithSize:512 fromEquirectTexture:environmentEquirectSource commandBuffer:commandBuffer];
+  
+    [self _generateIrradianceCubeMapWithSize:64 fromRadianceCubeMap:_environmentCube commandBuffer:commandBuffer];
+    
+    [self _generateSpecularCubeMapWithSize:256 roughnessLevels:9 fromRadianceCubeMap:_environmentCube commandBuffer:commandBuffer];
+    
+    [self _generateBRDFLookupWithSize:128 commandBuffer:commandBuffer];
+}
+
+- (BOOL)_buildPipelineStatesWithError:(NSError **)error {
+    id<MTLFunction> brdfFunction = [_library newFunctionWithName:@"integrate_brdf"];
+    _brdfComputePipeline = [_device newComputePipelineStateWithFunction:brdfFunction error:error];
+    if (_brdfComputePipeline == nil) {
+        return NO;
+    }
+    
+    id<MTLFunction> equirectFunction = [_library newFunctionWithName:@"equirect_to_cube"];
+    _equirectToCubePipeline = [_device newComputePipelineStateWithFunction:equirectFunction error:error];
+    if (_equirectToCubePipeline == nil) {
+        return NO;
+    }
+
+    id<MTLFunction> irradianceFunction = [_library newFunctionWithName:@"compute_irradiance"];
+    _irradiancePipeline = [_device newComputePipelineStateWithFunction:irradianceFunction error:error];
+    if (_irradiancePipeline == nil) {
+        return NO;
+    }
+    
+    id<MTLFunction> specularFunction = [_library newFunctionWithName:@"compute_prefiltered_specular"];
+    _specularPipeline = [_device newComputePipelineStateWithFunction:specularFunction error:error];
+    if (_specularPipeline == nil) {
+        return NO;
+    }
+
+    return YES;
+}
+
+- (void)_generateEnvironmentCubeMapWithSize:(int)size
+                        fromEquirectTexture:(id<MTLTexture>)equirectTexture
+                              commandBuffer:(id<MTLCommandBuffer>)commandBuffer
+{
+    MTLTextureDescriptor *textureDesc = [MTLTextureDescriptor textureCubeDescriptorWithPixelFormat:MTLPixelFormatRGBA16Float
+                                                                                              size:size
+                                                                                         mipmapped:YES];
+    textureDesc.usage = MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite;
+    id <MTLTexture> cubeTexture = [_device newTextureWithDescriptor:textureDesc];
+
+    id<MTLComputeCommandEncoder> commandEncoder = [commandBuffer computeCommandEncoder];
+    [commandEncoder setComputePipelineState:_equirectToCubePipeline];
+    [commandEncoder setTexture:equirectTexture atIndex:0];
+    [commandEncoder setTexture:cubeTexture atIndex:1];
+    MTLSize threadsPerThreadgroup = MTLSizeMake(16, 16, 1);
+    MTLSize threadgroups = MTLSizeMake(size / threadsPerThreadgroup.width, size / threadsPerThreadgroup.height, 6);
+    [commandEncoder dispatchThreadgroups:threadgroups threadsPerThreadgroup:threadsPerThreadgroup];
+    [commandEncoder endEncoding];
+
+    id<MTLBlitCommandEncoder> blitEncoder = [commandBuffer blitCommandEncoder];
+    [blitEncoder generateMipmapsForTexture:cubeTexture];
+    [blitEncoder endEncoding];
+
+    _environmentCube = cubeTexture;
+}
+
+- (void)_generateIrradianceCubeMapWithSize:(int)size
+                       fromRadianceCubeMap:(id<MTLTexture>)environmentCube
+                             commandBuffer:(id<MTLCommandBuffer>)commandBuffer
+{
+    MTLTextureDescriptor *textureDesc = [MTLTextureDescriptor textureCubeDescriptorWithPixelFormat:MTLPixelFormatRGBA16Float
+                                                                                              size:size
+                                                                                         mipmapped:NO];
+    textureDesc.usage = MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite;
+    id <MTLTexture> diffuseCube = [_device newTextureWithDescriptor:textureDesc];
+    
+    id<MTLComputeCommandEncoder> commandEncoder = [commandBuffer computeCommandEncoder];
+    [commandEncoder setComputePipelineState:_irradiancePipeline];
+    [commandEncoder setTexture:environmentCube atIndex:0];
+    [commandEncoder setTexture:diffuseCube atIndex:1];
+    MTLSize threadsPerThreadgroup = MTLSizeMake(16, 16, 1);
+    MTLSize threadgroups = MTLSizeMake(size / threadsPerThreadgroup.width, size / threadsPerThreadgroup.height, 6);
+    [commandEncoder dispatchThreadgroups:threadgroups threadsPerThreadgroup:threadsPerThreadgroup];
+    [commandEncoder endEncoding];
+    
+    _diffuseCube = diffuseCube;
+}
+
+- (void)_generateSpecularCubeMapWithSize:(int)size
+                         roughnessLevels:(int)roughnessLevels
+                     fromRadianceCubeMap:(id<MTLTexture>)environmentCube
+                           commandBuffer:(id<MTLCommandBuffer>)commandBuffer
+{
+    MTLTextureDescriptor *textureDesc = [MTLTextureDescriptor textureCubeDescriptorWithPixelFormat:MTLPixelFormatRGBA16Float
+                                                                                              size:size
+                                                                                         mipmapped:YES];
+    textureDesc.usage = MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite;
+    id <MTLTexture> specularCube = [_device newTextureWithDescriptor:textureDesc];
+
+    id<MTLComputeCommandEncoder> commandEncoder = [commandBuffer computeCommandEncoder];
+    [commandEncoder setComputePipelineState:_specularPipeline];
+    [commandEncoder setTexture:environmentCube atIndex:0];
+    
+    int mipSize = size;
+    for (int lod = 0; lod < roughnessLevels; ++lod) {
+        float roughness = lod / (float)(roughnessLevels - 1);
+        [commandEncoder setBytes:&roughness length:sizeof(float) atIndex:0];
+        id<MTLTexture> specularCubeView = [specularCube newTextureViewWithPixelFormat:MTLPixelFormatRGBA16Float
+                                                                          textureType:MTLTextureTypeCube
+                                                                               levels:NSMakeRange(lod, 1)
+                                                                               slices:NSMakeRange(0, 6)];
+        [commandEncoder setTexture:specularCubeView atIndex:1];
+        MTLSize threadsPerThreadgroup = MTLSizeMake(MIN(mipSize, 16), MIN(mipSize, 16), 1);
+        MTLSize threadgroups = MTLSizeMake(size / threadsPerThreadgroup.width, size / threadsPerThreadgroup.height, 6);
+        [commandEncoder dispatchThreadgroups:threadgroups threadsPerThreadgroup:threadsPerThreadgroup];
+        mipSize = mipSize / 2;
+    }
+    
+    [commandEncoder endEncoding];
+
+    _specularCube = specularCube;
+    _specularMipLevelCount = roughnessLevels;
+}
+
+- (void)_generateBRDFLookupWithSize:(int)size commandBuffer:(id<MTLCommandBuffer>)commandBuffer {
+    MTLTextureDescriptor *textureDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRG16Float
+                                                                                           width:size
+                                                                                          height:size
+                                                                                       mipmapped:NO];
+    textureDesc.usage = MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite;
+    id <MTLTexture> lookupTexture = [_device newTextureWithDescriptor:textureDesc];
+    
+    id<MTLComputeCommandEncoder> commandEncoder = [commandBuffer computeCommandEncoder];
+    [commandEncoder setComputePipelineState:_brdfComputePipeline];
+    [commandEncoder setTexture:lookupTexture atIndex:0];
+    MTLSize threadsPerThreadgroup = MTLSizeMake(16, 16, 1);
+    MTLSize threadgroups = MTLSizeMake(size / threadsPerThreadgroup.width, size / threadsPerThreadgroup.height, 1);
+    [commandEncoder dispatchThreadgroups:threadgroups threadsPerThreadgroup:threadsPerThreadgroup];
+    [commandEncoder endEncoding];
+    
+    _brdfLUT = lookupTexture;
+}
+
+- (int)specularMipLevelCount {
+    return _specularMipLevelCount;
+}
+
+@end
diff --git a/gtlf/GLTFMTL/Source/GLTFMTLRenderer.m b/gtlf/GLTFMTL/Source/GLTFMTLRenderer.m
new file mode 100644
index 00000000..4d388b0b
--- /dev/null
+++ b/gtlf/GLTFMTL/Source/GLTFMTLRenderer.m
@@ -0,0 +1,622 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import "GLTFMTLRenderer.h"
+#import "GLTFMTLShaderBuilder.h"
+#import "GLTFMTLUtilities.h"
+#import "GLTFMTLBufferAllocator.h"
+#import "GLTFMTLLightingEnvironment.h"
+#import "GLTFMTLTextureLoader.h"
+
+@import ImageIO;
+@import MetalKit;
+
+typedef struct {
+    simd_float4x4 viewProjectionMatrix;
+    
+    // split this out to per instance data, do we really need normalMatrix?
+    simd_float4x4 modelMatrix;
+    simd_float4x4 normalMatrix;
+} VertexUniforms;
+
+typedef struct {
+    simd_float4 position;
+    simd_float4 color;
+    float intensity;
+    float innerConeAngle;
+    float outerConeAngle;
+    float range;
+    simd_float4 spotDirection;
+} Light;
+
+typedef struct {
+    float normalScale;
+    simd_float3 emissiveFactor;
+    float occlusionStrength;
+    simd_float2 metallicRoughnessValues;
+    simd_float4 baseColorFactor;
+    simd_float3 camera; // pos?
+    float alphaCutoff;
+    float envIntensity;
+    simd_float3x3 textureMatrices[GLTFMTLMaximumTextureCount];
+    
+    // split off lighting from material
+    Light ambientLight;
+    Light lights[GLTFMTLMaximumLightCount];
+} FragmentUniforms;
+
+@interface GLTFMTLRenderItem: NSObject
+@property (nonatomic, strong) NSString *label;
+@property (nonatomic, strong) GLTFNode *node;
+@property (nonatomic, strong) GLTFSubmesh *submesh;
+@property (nonatomic, assign) VertexUniforms vertexUniforms;
+@property (nonatomic, assign) FragmentUniforms fragmentUniforms;
+@end
+
+@implementation GLTFMTLRenderItem
+@end
+
+@interface GLTFMTLRenderer ()
+
+@property (nonatomic, strong) id<MTLDevice> device;
+//@property (nonatomic, strong) id<MTLCommandQueue> commandQueue;
+
+@property (nonatomic, strong) GLTFMTLTextureLoader* textureLoaderJpg;
+
+@property (nonatomic, strong) dispatch_semaphore_t frameBoundarySemaphore;
+
+@property (nonatomic, strong) NSMutableDictionary<NSUUID *, id<MTLRenderPipelineState>> *pipelineStatesForSubmeshes;
+@property (nonatomic, strong) NSMutableDictionary<NSNumber *, id<MTLDepthStencilState>> *depthStencilStateMap;
+@property (nonatomic, strong) NSMutableDictionary<NSUUID *, id<MTLTexture>> *texturesForImageIdentifiers;
+@property (nonatomic, strong) NSMutableDictionary<GLTFTextureSampler *, id<MTLSamplerState>> *samplerStatesForSamplers;
+
+@property (nonatomic, strong) NSMutableArray<GLTFMTLRenderItem *> *opaqueRenderItems;
+@property (nonatomic, strong) NSMutableArray<GLTFMTLRenderItem *> *transparentRenderItems;
+@property (nonatomic, strong) NSMutableArray<GLTFNode *> *currentLightNodes;
+@property (nonatomic, strong) NSMutableArray<id<MTLBuffer>> *deferredReusableBuffers;
+@property (nonatomic, strong) NSMutableArray<id<MTLBuffer>> *bufferPool;
+
+@property (nonatomic, weak) GLTFKHRLight *ambientLight;
+
+@end
+
+@implementation GLTFMTLRenderer
+
+- (instancetype)initWithDevice:(id<MTLDevice>)device {
+    if ((self = [super init])) {
+        _device = device;
+        
+        //_commandQueue = [_device newCommandQueue];
+        
+        _viewMatrix = matrix_identity_float4x4;
+        _projectionMatrix = matrix_identity_float4x4;
+
+        _drawableSize = CGSizeMake(1, 1);
+        _colorPixelFormat = MTLPixelFormatBGRA8Unorm;
+        _depthStencilPixelFormat = MTLPixelFormatDepth32Float_Stencil8;
+        _sampleCount = 1;
+
+        _textureLoaderJpg = [[GLTFMTLTextureLoader alloc] initWithDevice:_device];
+
+        _frameBoundarySemaphore = dispatch_semaphore_create(GLTFMTLRendererMaxInflightFrames);
+        
+        _depthStencilStateMap = [NSMutableDictionary dictionary];
+        _texturesForImageIdentifiers = [NSMutableDictionary dictionary];
+        _pipelineStatesForSubmeshes = [NSMutableDictionary dictionary];
+        _samplerStatesForSamplers = [NSMutableDictionary dictionary];
+        
+        // these are cleared every render
+        _opaqueRenderItems = [NSMutableArray array];
+        _transparentRenderItems = [NSMutableArray array];
+        _currentLightNodes = [NSMutableArray array];
+        _deferredReusableBuffers = [NSMutableArray array];
+        
+        _bufferPool = [NSMutableArray array];
+    }
+    
+    return self;
+}
+
+- (void)dealloc {
+    // This is gross. It's necessary because we may have pending frame completions that
+    // we don't actually care about, but which are waiting, and would thus cause a crash
+    // if we don't artificially spin the semaphore down to zero before it's released.
+    while (dispatch_semaphore_signal(_frameBoundarySemaphore) != 0) { }
+}
+
+- (void)enqueueReusableBuffer:(id<MTLBuffer>)buffer {
+    [self.bufferPool addObject:buffer];
+}
+
+- (id<MTLBuffer>)dequeueReusableBufferOfLength:(size_t)length {
+    int indexToReuse = -1;
+    for (int i = 0; i < self.bufferPool.count; ++i) {
+        if (self.bufferPool[i].length >= length) {
+            indexToReuse = i;
+        }
+    }
+    
+    if (indexToReuse >= 0) {
+        id <MTLBuffer> buffer = self.bufferPool[indexToReuse];
+        [self.bufferPool removeObjectAtIndex:indexToReuse];
+        return buffer;
+    } else {
+        return [self.device newBufferWithLength:length options:MTLResourceStorageModeShared];
+    }
+}
+
+- (id<MTLTexture>)textureForImage:(GLTFImage *)image preferSRGB:(BOOL)sRGB {
+    
+    if (image == nil)
+        return nil;
+    
+    // This paramete assert is failing on some models (DamagedHelmet.gltf?)
+    // NSParameterAssert(image != nil);
+    
+    id<MTLTexture> texture = self.texturesForImageIdentifiers[image.identifier];
+    
+    if (texture) {
+        return texture;
+    }
+    
+    NSDictionary *options = @{ GLTFMTLTextureLoaderOptionGenerateMipmaps : @YES,
+                               GLTFMTLTextureLoaderOptionSRGB : @(sRGB)
+                             };
+    
+    NSError *error = nil;
+    if (image.imageData != nil) {
+        // Kram doesn't load jpg, so use existing loder for that, ick!
+        // TODO: identify jpg data by first 4 chars
+        bool isJpg = false;
+        
+        if (isJpg)
+            texture = [self.textureLoaderJpg newTextureWithData:image.imageData options:options error:&error];
+        else
+            texture = [self.textureLoader newTextureWithData:image.imageData options:options error:&error];
+       
+        if (image.name)
+            texture.label = image.name;
+    } else if (image.url != nil) {
+        NSString* name = image.url.absoluteString;
+        bool isJpg = [name.lowercaseString hasSuffix:@"jpg"] ||
+                     [name.lowercaseString hasSuffix:@"jpeg"];
+        if (isJpg)
+            texture = [self.textureLoaderJpg newTextureWithContentsOfURL:image.url options:options error:&error];
+        else
+            texture = [self.textureLoader newTextureWithContentsOfURL:image.url options:options error:&error];
+        
+        texture.label = image.name ? image.name : image.url.lastPathComponent;
+    } else if (image.bufferView != nil) {
+        GLTFBufferView *bufferView = image.bufferView;
+        NSData *data = [NSData dataWithBytesNoCopy:bufferView.buffer.contents + bufferView.offset length:bufferView.length freeWhenDone:NO];
+        
+        // TODO: identify jpg data by first 4 chars, hande with textureLoaderJpb
+        bool isJpg = false;
+        
+        if (isJpg)
+            texture = [self.textureLoaderJpg newTextureWithData:data options:options error:&error];
+        else
+            texture = [self.textureLoader newTextureWithData:data options:options error:&error];
+        
+        // name seems to be nil
+        if (image.name)
+        texture.label = image.name;
+    }
+    
+    if (!texture) {
+        NSLog(@"Error occurred while loading texture: %@", error);
+    } else {
+        self.texturesForImageIdentifiers[image.identifier] = texture;
+    }
+    
+    return texture;
+}
+
+- (id<MTLSamplerState>)samplerStateForSampler:(GLTFTextureSampler *)sampler {
+    if (sampler == nil)
+        return nil;
+     
+    // This is also asserting/failing and thrown exception on DamagedHelmet.gltfjjj
+    // NSParameterAssert(sampler != nil);
+    
+    id<MTLSamplerState> samplerState = self.samplerStatesForSamplers[sampler];
+    if (samplerState == nil) {
+        MTLSamplerDescriptor *descriptor = [MTLSamplerDescriptor new];
+        descriptor.magFilter = GLTFMTLSamplerMinMagFilterForSamplingFilter(sampler.magFilter);
+        descriptor.minFilter = GLTFMTLSamplerMinMagFilterForSamplingFilter(sampler.minFilter);
+        descriptor.mipFilter = GLTFMTLSamplerMipFilterForSamplingFilter(sampler.minFilter);
+        
+        descriptor.sAddressMode = GLTFMTLSamplerAddressModeForSamplerAddressMode(sampler.sAddressMode);
+        descriptor.tAddressMode = GLTFMTLSamplerAddressModeForSamplerAddressMode(sampler.tAddressMode);
+        // TODO: this isn't setting up rAddressMode
+        
+        descriptor.normalizedCoordinates = YES;
+        samplerState = [self.device newSamplerStateWithDescriptor:descriptor];
+        self.samplerStatesForSamplers[sampler] = samplerState;
+    }
+    return samplerState;
+}
+
+- (id<MTLRenderPipelineState>)renderPipelineStateForSubmesh:(GLTFSubmesh *)submesh {
+    id<MTLRenderPipelineState> pipeline = self.pipelineStatesForSubmeshes[submesh.identifier];
+    
+    if (pipeline == nil) {
+        GLTFMTLShaderBuilder *shaderBuilder = [[GLTFMTLShaderBuilder alloc] init];
+        pipeline = [shaderBuilder renderPipelineStateForSubmesh: submesh
+                                            lightingEnvironment:self.lightingEnvironment
+                                               colorPixelFormat:self.colorPixelFormat
+                                        depthStencilPixelFormat:self.depthStencilPixelFormat
+                                                    sampleCount:self.sampleCount
+                                                         device:self.device];
+        self.pipelineStatesForSubmeshes[submesh.identifier] = pipeline;
+    }
+
+    return pipeline;
+}
+
+- (id<MTLDepthStencilState>)depthStencilStateForDepthWriteEnabled:(BOOL)depthWriteEnabled
+                                                 depthTestEnabled:(BOOL)depthTestEnabled
+                                                  compareFunction:(MTLCompareFunction)compareFunction
+{
+    NSInteger depthWriteBit = depthWriteEnabled ? 1 : 0;
+    NSInteger depthTestBit = depthTestEnabled ? 1 : 0;
+    
+    NSInteger hash = (compareFunction << 2) | (depthWriteBit << 1) | depthTestBit;
+    
+    id <MTLDepthStencilState> depthStencilState = self.depthStencilStateMap[@(hash)];
+    if (depthStencilState) {
+        return depthStencilState;
+    }
+    
+    MTLDepthStencilDescriptor *depthDescriptor = [MTLDepthStencilDescriptor new];
+    depthDescriptor.depthCompareFunction = depthTestEnabled ? compareFunction : MTLCompareFunctionAlways;
+    depthDescriptor.depthWriteEnabled = depthWriteEnabled;
+    depthStencilState = [self.device newDepthStencilStateWithDescriptor:depthDescriptor];
+    
+    self.depthStencilStateMap[@(hash)] = depthStencilState;
+    
+    return depthStencilState;
+}
+
+- (void)renderScene:(GLTFScene *)scene
+      commandBuffer:(id<MTLCommandBuffer>)commandBuffer
+     commandEncoder:(id<MTLRenderCommandEncoder>)renderEncoder
+{
+    if (scene == nil) {
+        return;
+    }
+    
+    long timedOut = dispatch_semaphore_wait(self.frameBoundarySemaphore, dispatch_time(0, 1 * NSEC_PER_SEC));
+    if (timedOut) {
+        NSLog(@"Failed to receive frame boundary signal before timing out; calling signalFrameCompletion manually. "
+              "Remember to call signalFrameCompletion on GLTFMTLRenderer from the completion handler of the command buffer "
+              "into which you encode the work for drawing assets");
+        [self signalFrameCompletion];
+    }
+    
+    self.ambientLight = scene.ambientLight;
+
+    for (GLTFNode *rootNode in scene.nodes) {
+        [self buildLightListRecursive:rootNode];
+    }
+    
+    for (GLTFNode *rootNode in scene.nodes) {
+        [self buildRenderListRecursive:rootNode modelMatrix:matrix_identity_float4x4];
+    }
+    
+    NSMutableArray *renderList = [NSMutableArray arrayWithArray:self.opaqueRenderItems];
+    [renderList addObjectsFromArray:self.transparentRenderItems];
+    
+    [self drawRenderList:renderList commandEncoder:renderEncoder];
+    
+    NSArray *copiedDeferredReusableBuffers = [self.deferredReusableBuffers copy];
+    [commandBuffer addCompletedHandler:^(id<MTLCommandBuffer> commandBuffer) {
+        dispatch_async(dispatch_get_main_queue(), ^{
+            for (id<MTLBuffer> buffer in copiedDeferredReusableBuffers) {
+                [self enqueueReusableBuffer:buffer];
+            }
+        });
+    }];
+    
+    [self.opaqueRenderItems removeAllObjects];
+    [self.transparentRenderItems removeAllObjects];
+    [self.currentLightNodes removeAllObjects];
+    [self.deferredReusableBuffers removeAllObjects];
+}
+
+- (void)bindTexturesForMaterial:(GLTFMaterial *)material commandEncoder:(id<MTLRenderCommandEncoder>)renderEncoder {
+    if (material.baseColorTexture != nil) {
+        id<MTLTexture> texture = [self textureForImage:material.baseColorTexture.texture.image preferSRGB:YES];
+        id<MTLSamplerState> sampler = [self samplerStateForSampler:material.baseColorTexture.texture.sampler];
+        [renderEncoder setFragmentTexture:texture atIndex:GLTFTextureBindIndexBaseColor];
+        [renderEncoder setFragmentSamplerState:sampler atIndex:GLTFTextureBindIndexBaseColor];
+    }
+    
+    if (material.normalTexture != nil) {
+        id<MTLTexture> texture = [self textureForImage:material.normalTexture.texture.image preferSRGB:NO];
+        id<MTLSamplerState> sampler = [self samplerStateForSampler:material.normalTexture.texture.sampler];
+        [renderEncoder setFragmentTexture:texture atIndex:GLTFTextureBindIndexNormal];
+        [renderEncoder setFragmentSamplerState:sampler atIndex:GLTFTextureBindIndexNormal];
+    }
+    
+    if (material.metallicRoughnessTexture != nil) {
+        id<MTLTexture> texture = [self textureForImage:material.metallicRoughnessTexture.texture.image preferSRGB:NO];
+        id<MTLSamplerState> sampler = [self samplerStateForSampler:material.metallicRoughnessTexture.texture.sampler];
+        [renderEncoder setFragmentTexture:texture atIndex:GLTFTextureBindIndexMetallicRoughness];
+        [renderEncoder setFragmentSamplerState:sampler atIndex:GLTFTextureBindIndexMetallicRoughness];
+    }
+    
+    if (material.emissiveTexture != nil) {
+        id<MTLTexture> texture = [self textureForImage:material.emissiveTexture.texture.image preferSRGB:YES];
+        id<MTLSamplerState> sampler = [self samplerStateForSampler:material.emissiveTexture.texture.sampler];
+        [renderEncoder setFragmentTexture:texture atIndex:GLTFTextureBindIndexEmissive];
+        [renderEncoder setFragmentSamplerState:sampler atIndex:GLTFTextureBindIndexEmissive];
+    }
+    
+    if (material.occlusionTexture != nil) {
+        id<MTLTexture> texture = [self textureForImage:material.occlusionTexture.texture.image preferSRGB:NO];
+        id<MTLSamplerState> sampler = [self samplerStateForSampler:material.occlusionTexture.texture.sampler];
+        [renderEncoder setFragmentTexture:texture atIndex:GLTFTextureBindIndexOcclusion];
+        [renderEncoder setFragmentSamplerState:sampler atIndex:GLTFTextureBindIndexOcclusion];
+    }
+    
+    if (self.lightingEnvironment) {
+        [renderEncoder setFragmentTexture:self.lightingEnvironment.specularCube atIndex:GLTFTextureBindIndexSpecularEnvironment];
+        [renderEncoder setFragmentTexture:self.lightingEnvironment.diffuseCube atIndex:GLTFTextureBindIndexDiffuseEnvironment];
+        [renderEncoder setFragmentTexture:self.lightingEnvironment.brdfLUT atIndex:GLTFTextureBindIndexBRDFLookup];
+    }
+}
+
+- (void)computeJointsForSubmesh:(GLTFSubmesh *)submesh inNode:(GLTFNode *)node buffer:(id<MTLBuffer>)jointBuffer {
+    GLTFAccessor *jointsAccessor = submesh.accessorsForAttributes[GLTFAttributeSemanticJoints0];
+    GLTFSkin *skin = node.skin;
+    GLTFAccessor *inverseBindingAccessor = node.skin.inverseBindMatricesAccessor;
+    
+    if (jointsAccessor != nil && inverseBindingAccessor != nil) {
+        NSInteger jointCount = skin.jointNodes.count;
+        simd_float4x4 *jointMatrices = (simd_float4x4 *)jointBuffer.contents;
+        simd_float4x4 *inverseBindMatrices = inverseBindingAccessor.bufferView.buffer.contents + inverseBindingAccessor.bufferView.offset + inverseBindingAccessor.offset;
+        for (NSInteger i = 0; i < jointCount; ++i) {
+            GLTFNode *joint = skin.jointNodes[i];
+            simd_float4x4 inverseBindMatrix = inverseBindMatrices ? inverseBindMatrices[i] : matrix_identity_float4x4;
+            jointMatrices[i] = matrix_multiply(matrix_invert(node.globalTransform), matrix_multiply(joint.globalTransform, inverseBindMatrix));
+        }
+    }
+}
+
+- (void)buildLightListRecursive:(GLTFNode *)node {
+    if (node.light != nil) {
+        [self.currentLightNodes addObject:node];
+    }
+
+    for (GLTFNode *childNode in node.children) {
+        [self buildLightListRecursive:childNode];
+    }
+}
+
+- (void)buildRenderListRecursive:(GLTFNode *)node
+                     modelMatrix:(simd_float4x4)modelMatrix
+{
+    modelMatrix = matrix_multiply(modelMatrix, node.localTransform);
+
+    GLTFMesh *mesh = node.mesh;
+    if (mesh) {
+        // TODO: compute all this outside the recursion
+        // code had this inside loop for mvp, but no longer combining those
+        simd_float3x3 viewAffine = simd_inverse(GLTFMatrixUpperLeft3x3(self.viewMatrix));
+        simd_float3 cameraPos = self.viewMatrix.columns[3].xyz;
+        simd_float3 cameraWorldPos = matrix_multiply(viewAffine, -cameraPos);
+        simd_float4x4 viewProjectionMatrix = matrix_multiply(self.projectionMatrix, self.viewMatrix);
+        
+        for (GLTFSubmesh *submesh in mesh.submeshes) {
+            GLTFMaterial *material = submesh.material;
+            
+            VertexUniforms vertexUniforms;
+            vertexUniforms.viewProjectionMatrix = viewProjectionMatrix; // move out
+            
+            // this is all instance data
+            vertexUniforms.modelMatrix = modelMatrix;
+            vertexUniforms.normalMatrix = GLTFNormalMatrixFromModelMatrix(modelMatrix);
+            
+            FragmentUniforms fragmentUniforms = { 0 };
+            fragmentUniforms.normalScale = material.normalTextureScale;
+            fragmentUniforms.emissiveFactor = material.emissiveFactor;
+            fragmentUniforms.occlusionStrength = material.occlusionStrength;
+            fragmentUniforms.metallicRoughnessValues = (simd_float2){ material.metalnessFactor, material.roughnessFactor };
+            fragmentUniforms.baseColorFactor = material.baseColorFactor;
+            fragmentUniforms.alphaCutoff = material.alphaCutoff;
+            fragmentUniforms.envIntensity = self.lightingEnvironment.intensity;
+            
+            if (material.baseColorTexture != nil) {
+                fragmentUniforms.textureMatrices[GLTFTextureBindIndexBaseColor] = GLTFTextureMatrixFromTransform(material.baseColorTexture.transform);
+            }
+            if (material.normalTexture != nil) {
+                fragmentUniforms.textureMatrices[GLTFTextureBindIndexNormal] = GLTFTextureMatrixFromTransform(material.normalTexture.transform);
+            }
+            if (material.metallicRoughnessTexture != nil) {
+                fragmentUniforms.textureMatrices[GLTFTextureBindIndexMetallicRoughness] = GLTFTextureMatrixFromTransform(material.metallicRoughnessTexture.transform);
+            }
+            if (material.occlusionTexture != nil) {
+                fragmentUniforms.textureMatrices[GLTFTextureBindIndexOcclusion] = GLTFTextureMatrixFromTransform(material.occlusionTexture.transform);
+            }
+            if (material.emissiveTexture != nil) {
+                fragmentUniforms.textureMatrices[GLTFTextureBindIndexEmissive] = GLTFTextureMatrixFromTransform(material.emissiveTexture.transform);
+            }
+
+            // TODO: Make this more efficient. Iterating the light list for every submesh is pretty silly.
+            fragmentUniforms.camera = cameraWorldPos;
+            
+            if (self.ambientLight != nil) {
+                fragmentUniforms.ambientLight.color = self.ambientLight.color;
+                fragmentUniforms.ambientLight.intensity = self.ambientLight.intensity;
+            }
+
+            for (int lightIndex = 0; lightIndex < self.currentLightNodes.count; ++lightIndex) {
+                GLTFNode *lightNode = self.currentLightNodes[lightIndex];
+                GLTFKHRLight *light = lightNode.light;
+                if (light.type == GLTFKHRLightTypeDirectional) {
+                    fragmentUniforms.lights[lightIndex].position = lightNode.globalTransform.columns[2];
+                } else {
+                    fragmentUniforms.lights[lightIndex].position = lightNode.globalTransform.columns[3];
+                }
+                fragmentUniforms.lights[lightIndex].color = light.color;
+                fragmentUniforms.lights[lightIndex].intensity = light.intensity;
+                fragmentUniforms.lights[lightIndex].range = light.range;
+                if (light.type == GLTFKHRLightTypeSpot) {
+                    fragmentUniforms.lights[lightIndex].innerConeAngle = light.innerConeAngle;
+                    fragmentUniforms.lights[lightIndex].outerConeAngle = light.outerConeAngle;
+                } else {
+                    fragmentUniforms.lights[lightIndex].innerConeAngle = 0;
+                    fragmentUniforms.lights[lightIndex].outerConeAngle = M_PI;
+                }
+                fragmentUniforms.lights[lightIndex].spotDirection = lightNode.globalTransform.columns[2];
+            }
+            
+            GLTFMTLRenderItem *item = [GLTFMTLRenderItem new];
+            item.label = [NSString stringWithFormat:@"%@ - %@", node.name ?: @"Unnamed node", submesh.name ?: @"Unnamed primitive"];
+            item.node = node;
+            item.submesh = submesh;
+            item.vertexUniforms = vertexUniforms;
+            item.fragmentUniforms = fragmentUniforms;
+            
+            if (submesh.material.alphaMode == GLTFAlphaModeBlend) {
+                [self.transparentRenderItems addObject:item];
+            } else {
+                [self.opaqueRenderItems addObject:item];
+            }
+        }
+    }
+    
+    for (GLTFNode *childNode in node.children) {
+        [self buildRenderListRecursive:childNode modelMatrix:modelMatrix];
+    }
+}
+
+
+static simd_float3x3 toFloat3x3(simd_float4x4 m)
+{
+    return (simd_float3x3){m.columns[0].xyz, m.columns[1].xyz, m.columns[2].xyz};
+}
+
+- (void)drawRenderList:(NSArray<GLTFMTLRenderItem *> *)renderList commandEncoder:(id<MTLRenderCommandEncoder>)renderEncoder {
+    for (GLTFMTLRenderItem *item in renderList) {
+        GLTFNode *node = item.node;
+        GLTFSubmesh *submesh = item.submesh;
+        GLTFMaterial *material = submesh.material;
+        
+        id<MTLRenderPipelineState> renderPipelineState = [self renderPipelineStateForSubmesh:submesh];
+        if (!renderPipelineState) {
+            NSLog(@"Failed to create shader pipeline");
+            return;
+        }
+        
+        [renderEncoder pushDebugGroup:[NSString stringWithFormat:@"%@", item.label]];
+        
+        [renderEncoder setRenderPipelineState:renderPipelineState];
+        
+       
+        NSDictionary *accessorsForAttributes = submesh.accessorsForAttributes;
+                
+        GLTFAccessor *indexAccessor = submesh.indexAccessor;
+        BOOL useIndexBuffer = (indexAccessor != nil);
+                
+        // TODO: Check primitive type for unsupported types (tri fan, line loop), and modify draw calls as appropriate
+        MTLPrimitiveType primitiveType = GLTFMTLPrimitiveTypeForPrimitiveType(submesh.primitiveType);
+        
+        [self bindTexturesForMaterial:material commandEncoder:renderEncoder];
+        
+        VertexUniforms vertexUniforms = item.vertexUniforms;
+        [renderEncoder setVertexBytes:&vertexUniforms length:sizeof(vertexUniforms) atIndex:GLTFVertexDescriptorMaxAttributeCount + 0];
+        
+        if (node.skin != nil && node.skin.jointNodes != nil && node.skin.jointNodes.count > 0) {
+            // TODO: this looks like it's creating and uploading the same joints
+            // over and over for every node, even for nodes that share the same skeleton
+            
+            id<MTLBuffer> jointBuffer = [self dequeueReusableBufferOfLength: node.skin.jointNodes.count * sizeof(simd_float4x4)];
+            [self computeJointsForSubmesh:submesh inNode:node buffer:jointBuffer];
+            [renderEncoder setVertexBuffer:jointBuffer offset:0 atIndex:GLTFVertexDescriptorMaxAttributeCount + 1];
+            [self.deferredReusableBuffers addObject:jointBuffer];
+        }
+        
+        FragmentUniforms fragmentUniforms = item.fragmentUniforms;
+        [renderEncoder setFragmentBytes:&fragmentUniforms length: sizeof(fragmentUniforms) atIndex: 0];
+                
+        GLTFVertexDescriptor *vertexDescriptor = submesh.vertexDescriptor;
+        for (int i = 0; i < GLTFVertexDescriptorMaxAttributeCount; ++i) {
+            NSString *semantic = vertexDescriptor.attributes[i].semantic;
+            if (semantic == nil) { continue; }
+            GLTFAccessor *accessor = submesh.accessorsForAttributes[semantic];
+            
+            [renderEncoder setVertexBuffer:((GLTFMTLBuffer *)accessor.bufferView.buffer).buffer
+                                    offset:accessor.offset + accessor.bufferView.offset
+                                   atIndex:i];
+        }
+        
+        if (material.alphaMode == GLTFAlphaModeBlend){
+            id<MTLDepthStencilState> depthStencilState = [self depthStencilStateForDepthWriteEnabled:YES
+                                                                                    depthTestEnabled:YES
+                                                                                     compareFunction:MTLCompareFunctionGreaterEqual]; // for reverseZ
+            [renderEncoder setDepthStencilState:depthStencilState];
+        } else {
+            id<MTLDepthStencilState> depthStencilState = [self depthStencilStateForDepthWriteEnabled:YES
+                                                                                    depthTestEnabled:YES
+                                                                                     compareFunction:MTLCompareFunctionGreaterEqual]; // for reverseZ
+            [renderEncoder setDepthStencilState:depthStencilState];
+        }
+        
+        if (material.isDoubleSided) {
+            [renderEncoder setCullMode:MTLCullModeNone];
+        } else {
+            [renderEncoder setCullMode:MTLCullModeBack];
+        }
+        
+        // This handles isInverted case, means negative scale of 1 or 3 axes for mirroring.
+        // May need to tell shader too, but this might be sufficient.
+        bool isInverted = simd_determinant(toFloat3x3(vertexUniforms.modelMatrix)) < 0.0f;
+        [renderEncoder setFrontFacingWinding:isInverted ? MTLWindingClockwise : MTLWindingCounterClockwise];
+
+        if (useIndexBuffer) {
+            GLTFMTLBuffer *indexBuffer = (GLTFMTLBuffer *)indexAccessor.bufferView.buffer;
+            
+            MTLIndexType indexType = (indexAccessor.componentType == GLTFDataTypeUShort) ? MTLIndexTypeUInt16 : MTLIndexTypeUInt32;
+            
+            [renderEncoder drawIndexedPrimitives:primitiveType
+                                      indexCount:indexAccessor.count
+                                       indexType:indexType
+                                     indexBuffer:[indexBuffer buffer]
+                               indexBufferOffset:indexAccessor.offset + indexAccessor.bufferView.offset];
+        } else {
+            GLTFAccessor *positionAccessor = accessorsForAttributes[GLTFAttributeSemanticPosition];
+            [renderEncoder drawPrimitives:primitiveType vertexStart:0 vertexCount:positionAccessor.count];
+        }
+        
+        [renderEncoder popDebugGroup];
+    }
+}
+
+- (void)signalFrameCompletion {
+    dispatch_semaphore_signal(self.frameBoundarySemaphore);
+}
+
+/// call this before loading a new asset
+- (void)releaseAllResources
+{
+    [_texturesForImageIdentifiers removeAllObjects];
+    [_pipelineStatesForSubmeshes removeAllObjects];
+    [_bufferPool removeAllObjects];
+}
+
+@end
diff --git a/gtlf/GLTFMTL/Source/GLTFMTLShaderBuilder.m b/gtlf/GLTFMTL/Source/GLTFMTLShaderBuilder.m
new file mode 100644
index 00000000..47a6b125
--- /dev/null
+++ b/gtlf/GLTFMTL/Source/GLTFMTLShaderBuilder.m
@@ -0,0 +1,246 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import <GLTF/GLTF.h>
+
+#import "GLTFMTLLightingEnvironment.h"
+#import "GLTFMTLShaderBuilder.h"
+
+@implementation GLTFMTLShaderBuilder
+
+- (id<MTLRenderPipelineState>)renderPipelineStateForSubmesh:(GLTFSubmesh *)submesh
+                                        lightingEnvironment:(GLTFMTLLightingEnvironment *)lightingEnvironment
+                                           colorPixelFormat:(MTLPixelFormat)colorPixelFormat
+                                    depthStencilPixelFormat:(MTLPixelFormat)depthStencilPixelFormat
+                                                sampleCount:(int)sampleCount
+                                                     device:(id<MTLDevice>)device
+{
+    NSParameterAssert(submesh);
+    NSParameterAssert(submesh.material);
+    NSParameterAssert(submesh.vertexDescriptor);
+    
+    NSError *error = nil;
+    NSString *shaderSource = [self shaderSource];
+    
+    shaderSource = [self rewriteSource:shaderSource forSubmesh:submesh lightingEnvironment:lightingEnvironment];
+    
+    id<MTLLibrary> library = [device newLibraryWithSource:shaderSource options:nil error:&error];
+    if (!library) {
+        NSLog(@"Error occurred while creating library for material : %@", error);
+        return nil;
+    }
+
+    id <MTLFunction> vertexFunction = nil;
+    id <MTLFunction> fragmentFunction = nil;
+
+    for (NSString *functionName in [library functionNames]) {
+        id<MTLFunction> function = [library newFunctionWithName:functionName];
+        if ([function functionType] == MTLFunctionTypeVertex) {
+            vertexFunction = function;
+        } else if ([function functionType] == MTLFunctionTypeFragment) {
+            fragmentFunction = function;
+        }
+    }
+    
+    if (!vertexFunction || !fragmentFunction) {
+        NSLog(@"Failed to find a vertex and fragment function in library source");
+        return nil;
+    }
+    
+    MTLVertexDescriptor *vertexDescriptor = [self vertexDescriptorForSubmesh: submesh];
+
+    MTLRenderPipelineDescriptor *pipelineDescriptor = [MTLRenderPipelineDescriptor new];
+    pipelineDescriptor.vertexFunction = vertexFunction;
+    pipelineDescriptor.fragmentFunction = fragmentFunction;
+    pipelineDescriptor.vertexDescriptor = vertexDescriptor;
+    
+    pipelineDescriptor.colorAttachments[0].pixelFormat = colorPixelFormat;
+    pipelineDescriptor.sampleCount = sampleCount;
+
+    if (submesh.material.alphaMode == GLTFAlphaModeBlend) {
+        pipelineDescriptor.colorAttachments[0].blendingEnabled = YES;
+        pipelineDescriptor.colorAttachments[0].rgbBlendOperation = MTLBlendOperationAdd;
+        pipelineDescriptor.colorAttachments[0].alphaBlendOperation = MTLBlendOperationAdd;
+        
+        // this is a premul blend
+        pipelineDescriptor.colorAttachments[0].sourceRGBBlendFactor = MTLBlendFactorOne;
+        pipelineDescriptor.colorAttachments[0].sourceAlphaBlendFactor = MTLBlendFactorOne;
+        pipelineDescriptor.colorAttachments[0].destinationRGBBlendFactor = MTLBlendFactorOneMinusSourceAlpha;
+        pipelineDescriptor.colorAttachments[0].destinationAlphaBlendFactor = MTLBlendFactorOneMinusSourceAlpha;
+    }
+
+    pipelineDescriptor.depthAttachmentPixelFormat = depthStencilPixelFormat;
+    pipelineDescriptor.stencilAttachmentPixelFormat = depthStencilPixelFormat;
+    
+    id<MTLRenderPipelineState> pipeline = [device newRenderPipelineStateWithDescriptor:pipelineDescriptor error:&error];
+    if (!pipeline) {
+        NSLog(@"Error occurred when creating render pipeline state: %@", error);
+    }
+    
+    return pipeline;
+}
+
+- (NSString *)shaderSource {
+    NSError *error = nil;
+    NSURL *shaderURL = [[NSBundle mainBundle] URLForResource:@"pbr" withExtension:@"txt"]; // was .metal, but that gets compiled
+    if (shaderURL == nil) {
+        NSLog(@"ERROR: Shader source not found in main bundle; pipeline states cannot be generated");
+    }
+    return [NSString stringWithContentsOfURL:shaderURL encoding:NSUTF8StringEncoding error:&error];
+}
+
+- (NSString *)rewriteSource:(NSString *)source
+                 forSubmesh:(GLTFSubmesh *)submesh
+        lightingEnvironment:(GLTFMTLLightingEnvironment *)lightingEnvironment
+{
+    GLTFMaterial *material = submesh.material;
+    
+    BOOL usePBR = YES;
+    BOOL useIBL = lightingEnvironment != nil;
+    BOOL useDoubleSided = material.isDoubleSided;
+    BOOL hasTexCoord0 = submesh.accessorsForAttributes[GLTFAttributeSemanticTexCoord0] != nil;
+    BOOL hasTexCoord1 = submesh.accessorsForAttributes[GLTFAttributeSemanticTexCoord1] != nil;
+    BOOL hasNormals = submesh.accessorsForAttributes[GLTFAttributeSemanticNormal] != nil;
+    BOOL hasTangents = submesh.accessorsForAttributes[GLTFAttributeSemanticTangent] != nil;
+    BOOL hasBaseColorMap = material.baseColorTexture != nil;
+    BOOL hasOcclusionMap = material.occlusionTexture != nil;
+    BOOL hasEmissiveMap = material.emissiveTexture != nil;
+    BOOL hasNormalMap = material.normalTexture != nil;
+    BOOL hasMetallicRoughnessMap = material.metallicRoughnessTexture != nil;
+    BOOL hasTextureTransforms = material.hasTextureTransforms;
+    BOOL hasSkinningData = submesh.accessorsForAttributes[GLTFAttributeSemanticJoints0] != nil &&
+                           submesh.accessorsForAttributes[GLTFAttributeSemanticWeights0] != nil;
+    BOOL hasExtendedSkinning = submesh.accessorsForAttributes[GLTFAttributeSemanticJoints1] != nil &&
+                               submesh.accessorsForAttributes[GLTFAttributeSemanticWeights1] != nil;
+    BOOL hasVertexColor = submesh.accessorsForAttributes[GLTFAttributeSemanticColor0] != nil;
+    BOOL vertexColorIsRGB = submesh.accessorsForAttributes[GLTFAttributeSemanticColor0].dimension == GLTFDataDimensionVector3;
+    BOOL hasVertexRoughness = submesh.accessorsForAttributes[GLTFAttributeSemanticRoughness] != nil;
+    BOOL hasVertexMetallic = submesh.accessorsForAttributes[GLTFAttributeSemanticMetallic] != nil;
+    BOOL premultiplyBaseColor = material.alphaMode == GLTFAlphaModeBlend;
+    BOOL materialIsUnlit = material.isUnlit;
+    BOOL useAlphaTest = material.alphaMode == GLTFAlphaModeMask;
+
+    NSMutableString *shaderFeatures = [NSMutableString string];
+    [shaderFeatures appendFormat:@"#define USE_PBR %d\n", usePBR];
+    [shaderFeatures appendFormat:@"#define USE_IBL %d\n", useIBL];
+    [shaderFeatures appendFormat:@"#define USE_ALPHA_TEST %d\n", useAlphaTest];
+    [shaderFeatures appendFormat:@"#define USE_VERTEX_SKINNING %d\n", hasSkinningData];
+    [shaderFeatures appendFormat:@"#define USE_EXTENDED_VERTEX_SKINNING %d\n", hasExtendedSkinning];
+    [shaderFeatures appendFormat:@"#define USE_DOUBLE_SIDED_MATERIAL %d\n", useDoubleSided];
+    [shaderFeatures appendFormat:@"#define HAS_TEXCOORD_0 %d\n", hasTexCoord0];
+    [shaderFeatures appendFormat:@"#define HAS_TEXCOORD_1 %d\n", hasTexCoord1];
+    [shaderFeatures appendFormat:@"#define HAS_NORMALS %d\n", hasNormals];
+    [shaderFeatures appendFormat:@"#define HAS_TANGENTS %d\n", hasTangents];
+    [shaderFeatures appendFormat:@"#define HAS_VERTEX_COLOR %d\n", hasVertexColor];
+    [shaderFeatures appendFormat:@"#define VERTEX_COLOR_IS_RGB %d\n", vertexColorIsRGB];
+    [shaderFeatures appendFormat:@"#define HAS_BASE_COLOR_MAP %d\n", hasBaseColorMap];
+    [shaderFeatures appendFormat:@"#define HAS_NORMAL_MAP %d\n", hasNormalMap];
+    [shaderFeatures appendFormat:@"#define HAS_METALLIC_ROUGHNESS_MAP %d\n", hasMetallicRoughnessMap];
+    [shaderFeatures appendFormat:@"#define HAS_OCCLUSION_MAP %d\n", hasOcclusionMap];
+    [shaderFeatures appendFormat:@"#define HAS_EMISSIVE_MAP %d\n", hasEmissiveMap];
+    [shaderFeatures appendFormat:@"#define HAS_VERTEX_ROUGHNESS %d\n", hasVertexRoughness];
+    [shaderFeatures appendFormat:@"#define HAS_VERTEX_METALLIC %d\n", hasVertexMetallic];
+    [shaderFeatures appendFormat:@"#define HAS_TEXTURE_TRANSFORM %d\n", hasTextureTransforms];
+    [shaderFeatures appendFormat:@"#define PREMULTIPLY_BASE_COLOR %d\n", premultiplyBaseColor];
+    [shaderFeatures appendFormat:@"#define MATERIAL_IS_UNLIT %d\n", materialIsUnlit];
+    [shaderFeatures appendFormat:@"#define SPECULAR_ENV_MIP_LEVELS %d\n", lightingEnvironment.specularMipLevelCount];
+    [shaderFeatures appendFormat:@"#define MAX_LIGHTS %d\n", (int)GLTFMTLMaximumLightCount];
+    [shaderFeatures appendFormat:@"#define MAX_MATERIAL_TEXTURES %d\n\n", (int)GLTFMTLMaximumTextureCount];
+
+    [shaderFeatures appendFormat:@"#define BaseColorTexCoord          texCoord%d\n", (int)material.baseColorTexture.texCoord];
+    [shaderFeatures appendFormat:@"#define NormalTexCoord             texCoord%d\n", (int)material.normalTexture.texCoord];
+    [shaderFeatures appendFormat:@"#define MetallicRoughnessTexCoord  texCoord%d\n", (int)material.metallicRoughnessTexture.texCoord];
+    [shaderFeatures appendFormat:@"#define EmissiveTexCoord           texCoord%d\n", (int)material.emissiveTexture.texCoord];
+    [shaderFeatures appendFormat:@"#define OcclusionTexCoord          texCoord%d\n\n", (int)material.occlusionTexture.texCoord];
+
+    NSString *preamble = @"struct VertexIn {\n";
+    NSString *epilogue = @"\n};";
+    
+    NSMutableArray *attribs = [NSMutableArray array];
+    int i = 0;
+    for (GLTFVertexAttribute *attribute in submesh.vertexDescriptor.attributes) {
+        if (attribute.componentType == GLTFBaseTypeUnknown) { continue; }
+        if ([attribute.semantic isEqualToString:GLTFAttributeSemanticPosition]) {
+            [attribs addObject:[NSString stringWithFormat:@"    %@ position  [[attribute(%d)]];", GLTFMTLTypeNameForType(attribute.componentType, 4 /* attribute.dimension */, false), i]];
+        } else if ([attribute.semantic isEqualToString:GLTFAttributeSemanticNormal]) {
+            [attribs addObject:[NSString stringWithFormat:@"    %@ normal    [[attribute(%d)]];", GLTFMTLTypeNameForType(attribute.componentType, attribute.dimension, false), i]];
+        } else if ([attribute.semantic isEqualToString:GLTFAttributeSemanticTangent]) {
+            [attribs addObject:[NSString stringWithFormat:@"    %@ tangent   [[attribute(%d)]];", GLTFMTLTypeNameForType(attribute.componentType, attribute.dimension, false), i]];
+        } else if ([attribute.semantic isEqualToString:GLTFAttributeSemanticTexCoord0]) {
+            [attribs addObject:[NSString stringWithFormat:@"    %@ texCoord0 [[attribute(%d)]];", GLTFMTLTypeNameForType(attribute.componentType, attribute.dimension, false), i]];
+        } else if ([attribute.semantic isEqualToString:GLTFAttributeSemanticTexCoord1]) {
+            [attribs addObject:[NSString stringWithFormat:@"    %@ texCoord1 [[attribute(%d)]];", GLTFMTLTypeNameForType(attribute.componentType, attribute.dimension, false), i]];
+        } else if ([attribute.semantic isEqualToString:GLTFAttributeSemanticColor0]) {
+            [attribs addObject:[NSString stringWithFormat:@"    %@ color     [[attribute(%d)]];", GLTFMTLTypeNameForType(attribute.componentType, attribute.dimension, false), i]];
+        } else if ([attribute.semantic isEqualToString:GLTFAttributeSemanticJoints0]) {
+            [attribs addObject:[NSString stringWithFormat:@"    %@ joints0  [[attribute(%d)]];", GLTFMTLTypeNameForType(attribute.componentType, attribute.dimension, false), i]];
+        } else if ([attribute.semantic isEqualToString:GLTFAttributeSemanticJoints1]) {
+            [attribs addObject:[NSString stringWithFormat:@"    %@ joints1  [[attribute(%d)]];", GLTFMTLTypeNameForType(attribute.componentType, attribute.dimension, false), i]];
+        } else if ([attribute.semantic isEqualToString:GLTFAttributeSemanticWeights0]) {
+            [attribs addObject:[NSString stringWithFormat:@"    %@ weights0  [[attribute(%d)]];", GLTFMTLTypeNameForType(attribute.componentType, attribute.dimension, false), i]];
+        } else if ([attribute.semantic isEqualToString:GLTFAttributeSemanticWeights1]) {
+            [attribs addObject:[NSString stringWithFormat:@"    %@ weights1  [[attribute(%d)]];", GLTFMTLTypeNameForType(attribute.componentType, attribute.dimension, false), i]];
+        } else if ([attribute.semantic isEqualToString:GLTFAttributeSemanticRoughness]) {
+            [attribs addObject:[NSString stringWithFormat:@"    %@ roughness [[attribute(%d)]];", GLTFMTLTypeNameForType(attribute.componentType, attribute.dimension, false), i]];
+        } else if ([attribute.semantic isEqualToString:GLTFAttributeSemanticMetallic]) {
+            [attribs addObject:[NSString stringWithFormat:@"    %@ metalness [[attribute(%d)]];", GLTFMTLTypeNameForType(attribute.componentType, attribute.dimension, false), i]];
+        }
+        
+        ++i;
+    }
+    
+    NSString *decls = [NSString stringWithFormat:@"%@%@%@%@",
+                       shaderFeatures, preamble, [attribs componentsJoinedByString:@"\n"], epilogue];
+    
+    NSRange startSigilRange = [source rangeOfString:@"/*%begin_replace_decls%*/"];
+    NSRange endSigilRange = [source rangeOfString:@"/*%end_replace_decls%*/"];
+    
+    NSRange declRange = NSUnionRange(startSigilRange, endSigilRange);
+    
+    source = [source stringByReplacingCharactersInRange:declRange withString:decls];
+
+    return source;
+}
+
+- (MTLVertexDescriptor *)vertexDescriptorForSubmesh:(GLTFSubmesh *)submesh {
+    MTLVertexDescriptor *vertexDescriptor = [MTLVertexDescriptor new];
+    
+    GLTFVertexDescriptor *descriptor = submesh.vertexDescriptor;
+    
+    for (NSInteger attributeIndex = 0; attributeIndex < GLTFVertexDescriptorMaxAttributeCount; ++attributeIndex) {
+        GLTFVertexAttribute *attribute = descriptor.attributes[attributeIndex];
+        GLTFBufferLayout *layout = descriptor.bufferLayouts[attributeIndex];
+        
+        if (attribute.componentType == 0) {
+            continue;
+        }
+        
+        MTLVertexFormat vertexFormat = GLTFMTLVertexFormatForComponentTypeAndDimension(attribute.componentType, attribute.dimension);
+        
+        vertexDescriptor.attributes[attributeIndex].offset = 0;
+        vertexDescriptor.attributes[attributeIndex].format = vertexFormat;
+        vertexDescriptor.attributes[attributeIndex].bufferIndex = attributeIndex;
+        
+        vertexDescriptor.layouts[attributeIndex].stride = layout.stride;
+        vertexDescriptor.layouts[attributeIndex].stepRate = 1;
+        // this is already the default
+        //vertexDescriptor.layouts[attributeIndex].stepFunction = MTLStepFunctionPerVertex;
+    }
+
+    return vertexDescriptor;
+}
+
+@end
diff --git a/gtlf/GLTFMTL/Source/GLTFMTLTextureLoader.m b/gtlf/GLTFMTL/Source/GLTFMTLTextureLoader.m
new file mode 100644
index 00000000..1ac39ffe
--- /dev/null
+++ b/gtlf/GLTFMTL/Source/GLTFMTLTextureLoader.m
@@ -0,0 +1,242 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import "GLTFMTLTextureLoader.h"
+@import Accelerate;
+
+NSString *const GLTFMTLTextureLoaderOptionGenerateMipmaps = @"GLTFMTLTextureLoaderOptionGenerateMipmaps";
+NSString *const GLTFMTLTextureLoaderOptionUsageFlags = @"GLTFMTLTextureLoaderOptionUsageFlags";
+NSString *const GLTFMTLTextureLoaderOptionSRGB = @"GLTFMTLTextureLoaderOptionSRGB";
+
+__fp16 *GLTFMTLConvertImageToRGBA16F(CGImageRef image)
+{
+    size_t width = CGImageGetWidth(image);
+    size_t height = CGImageGetHeight(image);
+
+    void *dstPixels = malloc(sizeof(__fp16) * 4 * width * height);
+    size_t dstBytesPerRow = sizeof(__fp16) * 4 * width;
+    vImage_Buffer dstBuffer = {
+        .data = dstPixels,
+        .height = height,
+        .width = width,
+        .rowBytes = dstBytesPerRow
+    };
+
+    vImage_CGImageFormat srcFormat = {
+        .bitsPerComponent = (uint32_t)CGImageGetBitsPerComponent(image),
+        .bitsPerPixel = (uint32_t)CGImageGetBitsPerPixel(image),
+        .colorSpace = CGImageGetColorSpace(image),
+        .bitmapInfo = CGImageGetBitmapInfo(image)
+    };
+
+    vImage_CGImageFormat dstFormat = {
+        .bitsPerComponent = sizeof(__fp16) * 8,
+        .bitsPerPixel = sizeof(__fp16) * 8 * 4,
+        .colorSpace = CGImageGetColorSpace(image),
+        .bitmapInfo = kCGBitmapByteOrder16Little | kCGBitmapFloatComponents | kCGImageAlphaLast
+    };
+
+    vImage_Error error = kvImageNoError;
+    CGFloat background[] = { 0, 0, 0, 1 };
+    vImageConverterRef converter = vImageConverter_CreateWithCGImageFormat(&srcFormat,
+                                                                           &dstFormat,
+                                                                           background,
+                                                                           kvImageNoFlags,
+                                                                           &error);
+
+    CGDataProviderRef dataProvider = CGImageGetDataProvider(image);
+    CFDataRef srcData = CGDataProviderCopyData(dataProvider);
+
+    const void *srcPixels = CFDataGetBytePtr(srcData);
+    size_t srcBytesPerRow = CGImageGetBytesPerRow(image);
+
+    vImage_Buffer srcBuffer = {
+        .data = (void *)srcPixels,
+        .height = height,
+        .width = width,
+        .rowBytes = srcBytesPerRow
+    };
+
+    error = vImageConvert_AnyToAny(converter, &srcBuffer, &dstBuffer, NULL, kvImageNoFlags);
+
+    vImageConverter_Release(converter);
+    CFRelease(srcData);
+
+    return dstPixels;
+}
+
+unsigned char *GLTFMTLConvertImageToRGBA8U(CGImageRef image)
+{
+    size_t width = CGImageGetWidth(image);
+    size_t height = CGImageGetHeight(image);
+    
+    CGColorSpaceRef srcColorSpace = CGImageGetColorSpace(image);
+
+    vImage_CGImageFormat srcFormat = {
+        .bitsPerComponent = (uint32_t)CGImageGetBitsPerComponent(image),
+        .bitsPerPixel = (uint32_t)CGImageGetBitsPerPixel(image),
+        .colorSpace = srcColorSpace,
+        .bitmapInfo = CGImageGetBitmapInfo(image)
+    };
+
+    void *dstPixels = malloc(sizeof(unsigned char) * 4 * width * height);
+    vImage_Buffer dstBuffer = {
+        .data = dstPixels,
+        .height = height,
+        .width = width,
+        .rowBytes = sizeof(unsigned char) * 4 * width
+    };
+    
+    CGColorSpaceRef dstColorSpace = CGColorSpaceCreateWithName(kCGColorSpaceSRGB);
+
+    vImage_CGImageFormat dstFormat = {
+        .bitsPerComponent = sizeof(unsigned char) * 8,
+        .bitsPerPixel = sizeof(unsigned char) * 8 * 4,
+        .colorSpace = dstColorSpace,
+        .bitmapInfo = kCGBitmapByteOrder32Big | kCGImageAlphaLast
+    };
+    
+    vImage_Error error = kvImageNoError;
+    CGFloat background[] = { 0, 0, 0, 1 };
+    vImageConverterRef converter = vImageConverter_CreateWithCGImageFormat(&srcFormat,
+                                                                           &dstFormat,
+                                                                           background,
+                                                                           kvImageNoFlags,
+                                                                           &error);
+    
+    CGDataProviderRef dataProvider = CGImageGetDataProvider(image);
+    CFDataRef srcData = CGDataProviderCopyData(dataProvider);
+    
+    const void *srcPixels = CFDataGetBytePtr(srcData);
+    
+    size_t srcBytesPerPixel = CGImageGetBitsPerPixel(image) / 8;
+    
+    vImage_Buffer srcBuffer = {
+        .data = (void *)srcPixels,
+        .height = height,
+        .width = width,
+        .rowBytes = srcBytesPerPixel * width
+    };
+
+    vImageConvert_AnyToAny(converter, &srcBuffer, &dstBuffer, NULL, kvImageNoFlags);
+    
+    vImageConverter_Release(converter);
+    CFRelease(srcData);
+    
+    return dstPixels;
+}
+
+@interface GLTFMTLTextureLoader ()
+@property (nonatomic, strong) id<MTLDevice> device;
+@property (nonatomic, strong) id<MTLCommandQueue> commandQueue;
+@end
+
+@implementation GLTFMTLTextureLoader
+
+- (instancetype)initWithDevice:(id<MTLDevice>)device {
+    if ((self = [super init])) {
+        _device = device;
+        _commandQueue = [device newCommandQueue];
+    }
+    return self;
+}
+
+- (id<MTLTexture>)newTextureWithContentsOfURL:(NSURL *)url options:(NSDictionary *)options error:(NSError **)error {
+    if (url == nil) {
+        return nil;
+    }
+
+    NSData *data = [NSData dataWithContentsOfURL:url];
+
+    return [self newTextureWithData:data options:options error:error];
+}
+
+- (id<MTLTexture>)newTextureWithData:(NSData *)data options:(NSDictionary *)options error:(NSError **)error {
+    if (data == nil) {
+        return nil;
+    }
+    
+    NSNumber *sRGBOption = options[GLTFMTLTextureLoaderOptionSRGB];
+    BOOL sRGB = (sRGBOption != nil) ? sRGBOption.boolValue : NO;
+
+    CGImageSourceRef imageSource = CGImageSourceCreateWithData((__bridge CFDataRef)data, nil);
+    CGImageRef image = CGImageSourceCreateImageAtIndex(imageSource, 0, nil);
+    size_t width = CGImageGetWidth(image);
+    size_t height = CGImageGetHeight(image);
+    size_t bitsPerComponent = CGImageGetBitsPerComponent(image);
+
+    void *dstBytes = NULL;
+    MTLPixelFormat pixelFormat = MTLPixelFormatInvalid;
+    if (bitsPerComponent == 8) {
+        pixelFormat = sRGB ? MTLPixelFormatRGBA8Unorm_sRGB : MTLPixelFormatRGBA8Unorm;
+        dstBytes = GLTFMTLConvertImageToRGBA8U(image);
+        bitsPerComponent = 8;
+    } else if (bitsPerComponent == 16 || bitsPerComponent == 32) {
+        pixelFormat = MTLPixelFormatRGBA16Float;
+        dstBytes = GLTFMTLConvertImageToRGBA16F(image);
+        bitsPerComponent = 16;
+    }
+
+    size_t bytesPerRow = (bitsPerComponent / 8) * 4 * width;
+    NSNumber *mipmapOption = options[GLTFMTLTextureLoaderOptionGenerateMipmaps];
+    BOOL mipmapped = (mipmapOption != nil) ? mipmapOption.boolValue : NO;
+
+    MTLTextureDescriptor *descriptor = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:pixelFormat
+                                                                                          width:width
+                                                                                         height:height
+                                                                                      mipmapped:mipmapped];
+
+    id<MTLTexture> texture = [self newTextureWithBytes:dstBytes
+                                           bytesPerRow:bytesPerRow
+                                            descriptor:descriptor
+                                               options:options
+                                                 error:error];
+    
+    free(dstBytes);
+    CGImageRelease(image);
+    CFRelease(imageSource);
+
+    return texture;
+}
+
+- (id<MTLTexture> _Nullable)newTextureWithBytes:(const unsigned char *)bytes
+                                    bytesPerRow:(size_t)bytesPerRow
+                                     descriptor:(MTLTextureDescriptor *)descriptor
+                                        options:(NSDictionary * _Nullable)options
+                                          error:(NSError **)error
+{
+    NSNumber *usageOption = options[GLTFMTLTextureLoaderOptionUsageFlags];
+    descriptor.usage = (usageOption != nil) ? usageOption.integerValue : MTLTextureUsageShaderRead;
+    
+    id<MTLTexture> texture = [self.device newTextureWithDescriptor:descriptor];
+    
+    [texture replaceRegion:MTLRegionMake2D(0, 0, texture.width, texture.height)
+               mipmapLevel:0
+                 withBytes:bytes
+               bytesPerRow:bytesPerRow];
+
+    if (texture != nil && (texture.mipmapLevelCount > 1)) {
+        id<MTLCommandBuffer> commandBuffer = [self.commandQueue commandBuffer];
+        id<MTLBlitCommandEncoder> commandEncoder = [commandBuffer blitCommandEncoder];
+        [commandEncoder generateMipmapsForTexture:texture];
+        [commandEncoder endEncoding];
+        [commandBuffer commit];
+    }
+    
+    return texture;
+}
+
+@end
diff --git a/gtlf/GLTFMTL/Source/GLTFMTLUtilities.m b/gtlf/GLTFMTL/Source/GLTFMTLUtilities.m
new file mode 100644
index 00000000..182067e1
--- /dev/null
+++ b/gtlf/GLTFMTL/Source/GLTFMTLUtilities.m
@@ -0,0 +1,290 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#import "GLTFMTLUtilities.h"
+
+MTLPrimitiveType GLTFMTLPrimitiveTypeForPrimitiveType(GLTFPrimitiveType gltfType) {
+    switch (gltfType) {
+        case GLTFPrimitiveTypePoints:
+            return MTLPrimitiveTypePoint;
+        case GLTFPrimitiveTypeLines:
+            return MTLPrimitiveTypeLine;
+        case GLTFPrimitiveTypeLineStrip:
+            return MTLPrimitiveTypeLineStrip;
+        case GLTFPrimitiveTypeTriangles:
+            return MTLPrimitiveTypeTriangle;
+        case GLTFPrimitiveTypeTriangleStrip:
+            return MTLPrimitiveTypeTriangleStrip;
+            
+            // Not supported; need to duplicate first element and restitch into tri strip, respectively
+        case GLTFPrimitiveTypeLineLoop:
+        case GLTFPrimitiveTypeTriangleFan:
+        default:
+            return -1;
+    }
+}
+
+MTLBlendOperation GLTFMTLBlendOperationForBlendFunction(GLTFBlendFunction f) {
+    switch (f) {
+        case GLTFBlendFunctionAdd:
+            return MTLBlendOperationAdd;
+        case GLTFBlendFunctionSubtract:
+            return MTLBlendOperationSubtract;
+        case GLTFBlendFunctionReverseSubtract:
+            return MTLBlendOperationReverseSubtract;
+    }
+}
+
+MTLBlendFactor GLTFBlendFactorForBlendEquation(GLTFBlendEquation e) {
+    switch (e) {
+        case GLTFBlendEquationOne:
+            return MTLBlendFactorOne;
+        case GLTFBlendEquationZero:
+            return MTLBlendFactorZero;
+        case GLTFBlendEquationSrcAlpha:
+            return MTLBlendFactorSourceAlpha;
+        case GLTFBlendEquationSrcColor:
+            return MTLBlendFactorSourceColor;
+        case GLTFBlendEquationDestAlpha:
+            return MTLBlendFactorDestinationAlpha;
+        case GLTFBlendEquationDestColor:
+            return MTLBlendFactorDestinationColor;
+        case GLTFBlendEquationOneMinusSrcAlpha:
+            return MTLBlendFactorOneMinusSourceAlpha;
+        case GLTFBlendEquationOneMinusSrcColor:
+            return MTLBlendFactorOneMinusSourceColor;
+        case GLTFBlendEquationSrcAlphaSaturate:
+            return MTLBlendFactorSourceAlphaSaturated;
+        case GLTFBlendEquationOneMinusDestAlpha:
+            return MTLBlendFactorOneMinusDestinationAlpha;
+        case GLTFBlendEquationOneMinusDestColor:
+            return MTLBlendFactorOneMinusDestinationColor;
+        case GLTFBlendEquationOneMinusConstAlpha:
+            return MTLBlendFactorOneMinusDestinationColor;
+        default:
+            NSLog(@"Unsupported blend equation %d", (int)e);
+            return MTLBlendFactorOne;
+    }
+}
+
+MTLCompareFunction GLTFMTLCompareFunctionForComparisonFunc(GLTFComparisonFunc f) {
+    switch (f) {
+        case GLTFComparisonFuncLess:
+            return MTLCompareFunctionLess;
+        case GLTFComparisonFuncEqual:
+            return MTLCompareFunctionEqual;
+        case GLTFComparisonFuncAlways:
+            return MTLCompareFunctionAlways;
+        case GLTFComparisonFuncGreater:
+            return MTLCompareFunctionGreater;
+        case GLTFComparisonFuncNotEqual:
+            return MTLCompareFunctionNotEqual;
+        case GLTFComparisonFuncLessEqual:
+            return MTLCompareFunctionLessEqual;
+        case GLTFComparisonFuncGreaterEqual:
+            return MTLCompareFunctionGreaterEqual;
+        default:
+            NSLog(@"Unsupported comparison function %d", (int)f);
+            return MTLCompareFunctionLess;
+    }
+}
+
+MTLWinding GLTFMTLWindingForWinding(GLTFWinding w) {
+    switch (w) {
+        case GLTFWindingCounterclockwise:
+            return MTLWindingCounterClockwise;
+        case GLTFWindingClockwise:
+        default:
+            return MTLWindingClockwise;
+    }
+}
+
+MTLCullMode GLTFMTLCullModeForCullFace(GLTFFace face) {
+    switch (face) {
+        case GLTFFaceBack:
+            return MTLCullModeBack;
+        case GLTFFaceFront:
+            return MTLCullModeFront;
+        default:
+            return MTLCullModeBack;
+    }
+}
+
+MTLSamplerMinMagFilter GLTFMTLSamplerMinMagFilterForSamplingFilter(GLTFSamplingFilter mode) {
+    switch (mode) {
+        case GLTFSamplingFilterNearest:
+            return MTLSamplerMinMagFilterNearest;
+        default:
+            return MTLSamplerMinMagFilterLinear;
+    }
+}
+
+MTLSamplerMipFilter GLTFMTLSamplerMipFilterForSamplingFilter(GLTFSamplingFilter mode) {
+    switch (mode) {
+        case GLTFSamplingFilterNearest:
+        case GLTFSamplingFilterLinear:
+            return MTLSamplerMipFilterNotMipmapped;
+        case GLTFSamplingFilterNearestMipNearest:
+        case GLTFSamplingFilterLinearMipNearest:
+            return MTLSamplerMipFilterNearest;
+        default:
+            return MTLSamplerMipFilterLinear;
+    }
+}
+
+MTLSamplerAddressMode GLTFMTLSamplerAddressModeForSamplerAddressMode(GLTFAddressMode mode) {
+    switch (mode) {
+        case GLTFAddressModeClampToEdge:
+            return MTLSamplerAddressModeClampToEdge;
+        case GLTFAddressModeMirroredRepeat:
+            return MTLSamplerAddressModeMirrorRepeat;
+        default:
+            return MTLSamplerAddressModeRepeat;
+    }
+}
+
+NSString *GLTFMTLTypeNameForType(GLTFDataType baseType, GLTFDataDimension dimension, BOOL packed) {
+    NSString *typeName = @"float";
+    NSString *packingPrefix = @"";
+    NSString *dimensionSuffix = @"";
+    
+    if (packed && (dimension != GLTFDataDimensionScalar)) {
+        packingPrefix = @"packed_";
+    }
+
+    switch (baseType) {
+        case GLTFDataTypeBool:      typeName = @"bool";      break;
+        case GLTFDataTypeChar:      typeName = @"char";      break;
+        case GLTFDataTypeUChar:     typeName = @"uchar";     break;
+        case GLTFDataTypeShort:     typeName = @"short";     break;
+        case GLTFDataTypeUShort:    typeName = @"ushort";    break;
+        case GLTFDataTypeInt:       typeName = @"int";       break;
+        case GLTFDataTypeUInt:      typeName = @"uint";      break;
+        case GLTFDataTypeFloat:     typeName = @"float";     break;
+        case GLTFDataTypeSampler2D: typeName = @"texture2d"; break;
+        default:
+            return @"__UNKNOWN_TYPE__";
+    }
+    
+    switch (dimension) {
+        case GLTFDataDimensionScalar:    dimensionSuffix = @"";         break;
+        case GLTFDataDimensionVector2:   dimensionSuffix = @"2";        break;
+        case GLTFDataDimensionVector3:   dimensionSuffix = @"3";        break;
+        case GLTFDataDimensionVector4:   dimensionSuffix = @"4";        break;
+        case GLTFDataDimensionMatrix2x2: dimensionSuffix = @"float2x2"; break;
+        case GLTFDataDimensionMatrix3x3: dimensionSuffix = @"float3x3"; break;
+        case GLTFDataDimensionMatrix4x4: dimensionSuffix = @"float4x4"; break;
+        default:
+            return @"__UNKNOWN_TYPE__";
+    }
+    
+    return [NSString stringWithFormat:@"%@%@%@", packingPrefix, typeName, dimensionSuffix];
+}
+
+MTLVertexFormat GLTFMTLVertexFormatForComponentTypeAndDimension(GLTFDataType baseType, GLTFDataDimension dimension)
+{
+    switch (baseType) {
+        case GLTFDataTypeChar:
+            switch (dimension) {
+                case GLTFDataDimensionVector2:
+                    return MTLVertexFormatChar2;
+                case GLTFDataDimensionVector3:
+                    return MTLVertexFormatChar3;
+                case GLTFDataDimensionVector4:
+                    return MTLVertexFormatChar4;
+                default:
+                    break;
+            }
+        case GLTFDataTypeUChar:
+            switch (dimension) {
+                case GLTFDataDimensionVector2:
+                    return MTLVertexFormatUChar2;
+                case GLTFDataDimensionVector3:
+                    return MTLVertexFormatUChar3;
+                case GLTFDataDimensionVector4:
+                    return MTLVertexFormatUChar4;
+                default:
+                    break;
+            }
+        case GLTFDataTypeShort:
+            switch (dimension) {
+                case GLTFDataDimensionVector2:
+                    return MTLVertexFormatShort2;
+                case GLTFDataDimensionVector3:
+                    return MTLVertexFormatShort3;
+                case GLTFDataDimensionVector4:
+                    return MTLVertexFormatShort4;
+                default:
+                    break;
+            }
+        case GLTFDataTypeUShort:
+            switch (dimension) {
+                case GLTFDataDimensionVector2:
+                    return MTLVertexFormatUShort2;
+                case GLTFDataDimensionVector3:
+                    return MTLVertexFormatUShort3;
+                case GLTFDataDimensionVector4:
+                    return MTLVertexFormatUShort4;
+                default:
+                    break;
+            }
+        case GLTFDataTypeInt:
+            switch (dimension) {
+                case GLTFDataDimensionScalar:
+                    return MTLVertexFormatInt;
+                case GLTFDataDimensionVector2:
+                    return MTLVertexFormatInt2;
+                case GLTFDataDimensionVector3:
+                    return MTLVertexFormatInt3;
+                case GLTFDataDimensionVector4:
+                    return MTLVertexFormatInt4;
+                default:
+                    break;
+            }
+        case GLTFDataTypeUInt:
+            switch (dimension) {
+                case GLTFDataDimensionScalar:
+                    return MTLVertexFormatUInt;
+                case GLTFDataDimensionVector2:
+                    return MTLVertexFormatUInt2;
+                case GLTFDataDimensionVector3:
+                    return MTLVertexFormatUInt3;
+                case GLTFDataDimensionVector4:
+                    return MTLVertexFormatUInt4;
+                default:
+                    break;
+            }
+        case GLTFDataTypeFloat:
+            switch (dimension) {
+                case GLTFDataDimensionScalar:
+                    return MTLVertexFormatFloat;
+                case GLTFDataDimensionVector2:
+                    return MTLVertexFormatFloat2;
+                case GLTFDataDimensionVector3:
+                    return MTLVertexFormatFloat3;
+                case GLTFDataDimensionVector4:
+                    return MTLVertexFormatFloat4;
+                default:
+                    break;
+            }
+        default:
+            break;
+    }
+    
+    return MTLVertexFormatInvalid;
+}
+
+
diff --git a/kramv/Info.plist b/kramv/Info.plist
index e3bba179..2c434fc0 100644
--- a/kramv/Info.plist
+++ b/kramv/Info.plist
@@ -44,7 +44,7 @@
 			<key>CFBundleTypeName</key>
 			<string>KTX2</string>
 			<key>CFBundleTypeRole</key>
-			<string>Editor</string>
+			<string>Viewer</string>
 			<key>LSHandlerRank</key>
 			<string>Default</string>
 			<key>LSItemContentTypes</key>
@@ -86,6 +86,38 @@
 			<key>NSDocumentClass</key>
 			<string>KramDocument</string>
 		</dict>
+		<dict>
+			<key>CFBundleTypeIconSystemGenerated</key>
+			<integer>1</integer>
+			<key>CFBundleTypeName</key>
+			<string>GLTF</string>
+			<key>CFBundleTypeRole</key>
+			<string>Viewer</string>
+			<key>LSHandlerRank</key>
+			<string>Default</string>
+			<key>LSItemContentTypes</key>
+			<array>
+				<string>model/gltf+json</string>
+			</array>
+			<key>NSDocumentClass</key>
+			<string>KramDocument</string>
+		</dict>
+		<dict>
+			<key>CFBundleTypeIconSystemGenerated</key>
+			<integer>1</integer>
+			<key>CFBundleTypeName</key>
+			<string>GLTF Binary</string>
+			<key>CFBundleTypeRole</key>
+			<string>Viewer</string>
+			<key>LSHandlerRank</key>
+			<string>Default</string>
+			<key>LSItemContentTypes</key>
+			<array>
+				<string>model/gltf-binary</string>
+			</array>
+			<key>NSDocumentClass</key>
+			<string>KramDocument</string>
+		</dict>
 	</array>
 	<key>CFBundleExecutable</key>
 	<string>$(EXECUTABLE_NAME)</string>
diff --git a/kramv/KramLoader.h b/kramv/KramLoader.h
index 898ec1cf..8f67cf83 100644
--- a/kramv/KramLoader.h
+++ b/kramv/KramLoader.h
@@ -37,7 +37,8 @@ class KTXImageData;
 // load from a KTXImage
 - (nullable id<MTLTexture>)loadTextureFromImage:(const kram::KTXImage &)image
                                  originalFormat:
-                                     (nullable MTLPixelFormat *)originalFormat;
+                                     (nullable MTLPixelFormat *)originalFormat
+                                name:(nonnull const char*)name;
 
 // load into KTXImage and KTXImageData, can use with loadTextureFromImage
 - (BOOL)loadImageFromURL:(nonnull NSURL *)url
@@ -54,6 +55,10 @@ class KTXImageData;
 - (void)uploadTexturesIfNeeded:(nonnull id<MTLBlitCommandEncoder>)blitEncoder
                  commandBuffer:(nonnull id<MTLCommandBuffer>)commandBuffer;
 
+// Important to call this before loading a new model/texture, otherwise staging buffer
+// will flood, and unnecessary textures will be uploaded to.
+- (void)releaseAllPendingTextures;
+
 @property(retain, nonatomic, readwrite, nonnull) id<MTLDevice> device;
 
 @end
diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index ced95844..87e3fc9d 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -9,6 +9,7 @@
 //#include <vector>
 //#include <algorithm> // for max
 #include <mm_malloc.h>
+#include <mutex>
 
 #include "KramLib.h"
 //#include "KramLog.h"
@@ -21,6 +22,10 @@
 using namespace NAMESPACE_STL;
 using namespace simd;
 
+using mymutex = std::recursive_mutex;
+using mylock = std::unique_lock<mymutex>;
+static mymutex gTextureLock;
+
 string kram::toLower(const string &text)
 {
     return string(
@@ -173,23 +178,47 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format)
 {
     KTXImage image;
 
-    // isInfoOnly = true keeps compressed mips on KTX2 and aliases original mip
-    // data but have decode etc2/astc path below that uncompressed mips and the
-    // rgb conversion path below as well in the viewer. games would want to
-    // decompress directly from aliased mmap ktx2 data into staging or have blocks
-    // pre-twiddled in hw morton order.
-
-    bool isInfoOnly = true;
-    if (!image.open(imageData, imageDataLength, isInfoOnly)) {
+    if (imageDataLength > 3 &&
+        imageData[0] == 0xff && imageData[1] == 0xd8 && imageData[2] == 0xff )
+    {
+        KLOGE("kramv", "loader does not support jpg files");
         return nil;
     }
+        
+    // if png, then need to load from KTXImageData which uses loadpng
+    // \x89, P, N, G
+    if (imageDataLength > 4 &&
+        imageData[0] == 137 && imageData[1] == 'P' && imageData[2] == 'N' && imageData[3] == 'G')
+    {
+        KTXImageData imageDataReader;
+        if (!imageDataReader.open(imageData, imageDataLength, image)) {
+            return nil;
+        }
+        
+        return [self loadTextureFromImage:image originalFormat:originalFormat name:""];
+    }
+    else
+    {
+    
+        // isInfoOnly = true keeps compressed mips on KTX2 and aliases original mip
+        // data but have decode etc2/astc path below that uncompressed mips and the
+        // rgb conversion path below as well in the viewer. games would want to
+        // decompress directly from aliased mmap ktx2 data into staging or have blocks
+        // pre-twiddled in hw morton order.
+
+        bool isInfoOnly = true;
+        if (!image.open(imageData, imageDataLength, isInfoOnly)) {
+            return nil;
+        }
 
-    return [self loadTextureFromImage:image originalFormat:originalFormat];
+        return [self loadTextureFromImage:image originalFormat:originalFormat name:""];
+    }
 }
 
 - (nullable id<MTLTexture>)loadTextureFromImage:(const KTXImage &)image
                                  originalFormat:
                                      (nullable MTLPixelFormat *)originalFormat
+                                           name:(const char*)name
 {
 #if SUPPORT_RGB
     if (isInternalRGBFormat(image.pixelFormat)) {
@@ -230,7 +259,7 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format)
                     .pixelFormat;  // TODO: should this return rgbaImage.pixelFormat ?
         }
 
-        return [self blitTextureFromImage:rbgaImage2];
+        return [self blitTextureFromImage:rbgaImage2 name:name];
     }
 #endif
 
@@ -245,13 +274,13 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format)
             return nil;
         }
 
-        return [self blitTextureFromImage:imageDecoded];
+        return [self blitTextureFromImage:imageDecoded name:name];
     }
     else
 #endif
     {
         // fast load path directly from mmap'ed data, decompress direct to staging
-        return [self blitTextureFromImage:image];
+        return [self blitTextureFromImage:image name:name];
     }
 }
 
@@ -389,7 +418,7 @@ - (BOOL)loadImageFromURL:(nonnull NSURL *)url
         return nil;
     }
 
-    return [self loadTextureFromImage:image originalFormat:originalFormat];
+    return [self loadTextureFromImage:image originalFormat:originalFormat name:imageData.name()];
 }
 
 - (nullable id<MTLTexture>)createTexture:(const KTXImage &)image
@@ -615,17 +644,8 @@ NSUInteger macroUnusedArg(length)) {
 - (void)uploadTexturesIfNeeded:(id<MTLBlitCommandEncoder>)blitEncoder
                  commandBuffer:(id<MTLCommandBuffer>)commandBuffer
 {
-    if (_mipgenTextures.count > 0) {
-        for (id<MTLTexture> texture in _mipgenTextures) {
-            // autogen mips will include srgb conversions, so toggling srgb on/off
-            // isn't quite correct
-            [blitEncoder generateMipmapsForTexture:texture];
-        }
-
-        // reset the arra
-        [_mipgenTextures removeAllObjects];
-    }
-
+    mylock lock(gTextureLock);
+    
     if (!_blits.empty()) {
         // now upload from staging MTLBuffer to private MTLTexture
         for (const auto &blit : _blits) {
@@ -660,7 +680,7 @@ - (void)uploadTexturesIfNeeded:(id<MTLBlitCommandEncoder>)blitEncoder
         _blits.clear();
         [_blitTextures removeAllObjects];
 
-        // TODO: use atomic on this
+        // TODO: use atomic on this, but have lock now
         uint32_t bufferOffsetCopy = _bufferOffset;
         [commandBuffer addCompletedHandler:^(id<MTLCommandBuffer> /* buffer */) {
             // can only reset this once gpu completes the blits above
@@ -670,6 +690,30 @@ - (void)uploadTexturesIfNeeded:(id<MTLBlitCommandEncoder>)blitEncoder
                 self->_bufferOffset = 0;
         }];
     }
+    
+    // mipgen after possible initial blit above
+    if (_mipgenTextures.count > 0) {
+        for (id<MTLTexture> texture in _mipgenTextures) {
+            // autogen mips will include srgb conversions, so toggling srgb on/off
+            // isn't quite correct
+            [blitEncoder generateMipmapsForTexture:texture];
+        }
+
+        // reset the arra
+        [_mipgenTextures removeAllObjects];
+    }
+}
+
+- (void)releaseAllPendingTextures
+{
+    mylock lock(gTextureLock);
+    
+    _bufferOffset = 0;
+    
+    _blits.clear();
+    
+    [_mipgenTextures removeAllObjects];
+    [_blitTextures removeAllObjects];
 }
 
 inline uint64_t alignOffset(uint64_t offset, uint64_t alignment)
@@ -681,12 +725,17 @@ inline uint64_t alignOffset(uint64_t offset, uint64_t alignment)
 // (f.e. ktx), and another path for private that uses a blitEncoder and must
 // have block aligned data (f.e. ktxa, ktx2). Could repack ktx data into ktxa
 // before writing to temporary file, or when copying NSData into MTLBuffer.
-- (nullable id<MTLTexture>)blitTextureFromImage:(const KTXImage &)image
+- (nullable id<MTLTexture>)blitTextureFromImage:(const KTXImage &)image name:(const char*)name
 {
+    mylock lock(gTextureLock);
+    
     if (_buffer == nil) {
+        // Was set to 128, but models like FlightHelmet.gltf exceeded that buffer
+        static const size_t kStagingBufferSize = 256 * 1024 * 1024;
+        
         // this is enough to upload 4k x 4x @ RGBA8u with mips, 8k x 8k compressed
         // with mips @96MB
-        [self createStagingBufffer:128 * 1024 * 1024];
+        [self createStagingBufffer:kStagingBufferSize];
     }
 
     // TODO: first make sure have enough buffer to upload, otherwise need to queue
@@ -699,7 +748,10 @@ inline uint64_t alignOffset(uint64_t offset, uint64_t alignment)
     id<MTLTexture> texture = [self createTexture:image isPrivate:true];
     if (!texture)
         return nil;
-
+    
+    // set a label so can identify in captures
+    texture.label = [NSString stringWithUTF8String:name];
+    
     // this is index where texture will be added
     uint32_t textureIndex = (uint32_t)_blitTextures.count;
 
@@ -731,6 +783,7 @@ inline uint64_t alignOffset(uint64_t offset, uint64_t alignment)
 
     uint32_t numChunks = image.totalChunks();
 
+    // This offset, needs to keep incrementing.  Cleared in blit code.
     uint32_t bufferOffset = _bufferOffset;
 
     for (uint32_t i = 0; i < numMips; ++i) {
@@ -739,6 +792,12 @@ inline uint64_t alignOffset(uint64_t offset, uint64_t alignment)
         // pad buffer offset to a multiple of the blockSize
         bufferOffset = alignOffset(bufferOffset, blockSize);
 
+        if ((bufferOffset + mipLevel.length * numChunks) > _buffer.allocatedSize) {
+            KLOGE("kramv", "Ran out of buffer space to upload images");
+            return nil;
+        }
+        
+        
         // this may have to decompress the level data
         if (!image.unpackLevel(i, mipData + mipLevel.offset,
                                bufferData + bufferOffset)) {
diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index 57c0be45..9e67b4c3 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -312,45 +312,83 @@ void printChannels(string &tmp, const string &label, float4 c,
 
 float4x4 matrix4x4_translation(float tx, float ty, float tz)
 {
-    float4x4 m = {(float4){1, 0, 0, 0}, (float4){0, 1, 0, 0},
-                  (float4){0, 0, 1, 0}, (float4){tx, ty, tz, 1}};
+    float4x4 m = {(float4){1, 0, 0, 0},
+                  (float4){0, 1, 0, 0},
+                  (float4){0, 0, 1, 0},
+                  (float4){tx, ty, tz, 1}};
     return m;
 }
 
-// static float4x4 matrix4x4_rotation(float radians, vector_float3 axis)
-//{
-//    axis = vector_normalize(axis);
-//    float ct = cosf(radians);
-//    float st = sinf(radians);
-//    float ci = 1 - ct;
-//    float x = axis.x, y = axis.y, z = axis.z;
-//
-//    float4x4 m = {
-//        (float4){ ct + x * x * ci,     y * x * ci + z * st, z * x * ci - y *
-//        st, 0}, (float4){ x * y * ci - z * st,     ct + y * y * ci, z * y * ci
-//        + x * st, 0}, (float4){ x * z * ci + y * st, y * z * ci - x * st, ct +
-//        z * z * ci, 0}, (float4){                   0,                   0, 0,
-//        1}
-//    };
-//    return m;
-//}
-//
-// float4x4 perspective_rhs(float fovyRadians, float aspect, float nearZ, float
-// farZ)
-//{
-//    float ys = 1 / tanf(fovyRadians * 0.5);
-//    float xs = ys / aspect;
-//    float zs = farZ / (nearZ - farZ);
-//
-//    TODO: handle isReverseZ if add option to draw with perspective
-//    float4x4 m = {
-//        (float4){ xs,   0,          0,  0 },
-//        (float4){  0,  ys,          0,  0 },
-//        (float4){  0,   0,         zs, -1 },
-//        (float4){  0,   0, nearZ * zs,  0 }
-//    };
-//    return m;
-//}
+float4x4 matrix4x4_rotation(float radians, vector_float3 axis)
+{
+    axis = vector_normalize(axis);
+    float ct = cosf(radians);
+    float st = sinf(radians);
+    float ci = 1 - ct;
+    float x = axis.x, y = axis.y, z = axis.z;
+
+    float4x4 m = {
+        (float4){ ct + x * x * ci,     y * x * ci + z * st, z * x * ci - y *
+        st, 0}, (float4){ x * y * ci - z * st,     ct + y * y * ci, z * y * ci
+        + x * st, 0}, (float4){ x * z * ci + y * st, y * z * ci - x * st, ct +
+        z * z * ci, 0}, (float4){                   0,                   0, 0,
+        1}
+    };
+    return m;
+}
+
+float4x4 perspective_rhs(float fovyRadians, float aspectXtoY, float nearZ, float farZ, bool isReverseZ)
+{
+    // form tangents
+    float tanY = tanf(fovyRadians * 0.5f);
+    float tanX = tanY * aspectXtoY;
+
+    // currently symmetric
+    // all postive values from center
+    float4 tangents = { tanY, tanY, tanX, tanX };
+    tangents *= nearZ;
+    
+    float t =  tangents.x;
+    float b = -tangents.y;
+    float r =  tangents.z;
+    float l = -tangents.w;
+    
+    float dx = (r - l);
+    float dy = (t - b);
+     
+    float xs = 2.0f * nearZ / dx;
+    float ys = 2.0f * nearZ / dy;
+
+    // 0.5x?
+    float xoff = (r + l) / dx;
+    float yoff = (t + b) / dy;
+
+    float m22;
+    float m23;
+
+    if (isReverseZ) {
+        // zs drops out since zs = inf / -inf = 1, 1-1 = 0
+        // z' = near / -z
+        
+        m22 = 0;
+        m23 = nearZ;
+    }
+    else {
+        float zs = farZ / (nearZ - farZ);
+
+        m22 = zs;
+        m23 = zs * nearZ;
+    }
+     
+    float4x4 m = {
+        (float4){ xs,       0,   0,  0 },
+        (float4){  0,      ys,   0,  0 },
+        (float4){  xoff, yoff, m22, -1 },
+        (float4){  0,       0, m23,  0 }
+    };
+     
+    return m;
+}
 
 float4x4 orthographic_rhs(float width, float height, float nearZ, float farZ,
                           bool isReverseZ)
@@ -374,8 +412,12 @@ float4x4 orthographic_rhs(float width, float height, float nearZ, float farZ,
         m23 = 1.0f - m23;
     }
 
-    float4x4 m = {(float4){xs, 0, 0, 0}, (float4){0, ys, 0, 0},
-                  (float4){0, 0, m22, 0}, (float4){xoff, yoff, m23, 1}};
+    float4x4 m = {
+        (float4){xs, 0, 0, 0},
+        (float4){0, ys, 0, 0},
+        (float4){0, 0, m22, 0},
+        (float4){xoff, yoff, m23, 1}
+    };
     return m;
 }
 
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index 4acd8cb5..e35eb108 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -131,8 +131,12 @@ class ShowSettings {
 
     // the 2d view doesn't want to inset pixels for clamp, or point sampling is
     // thrown off expecially on small 4x4 textures
+#if USE_PERSPECTIVE
+    bool is3DView = true; // only use perspective
+#else
     bool is3DView = false;
-
+#endif
+    
     // TODO: Might eliminate this, since mips are either built with or without
     // srgb and disabling with a MTLView caused many flags to have to be set on
     // MTLTexture
@@ -150,6 +154,9 @@ class ShowSettings {
     // whether files are pulled from folder(s)
     bool isFolder = false;
 
+    // image vs. gltf model
+    bool isModel = false;
+    
     // can sample from drawable or from single source texture
     bool isEyedropperFromDrawable();
 
@@ -206,7 +213,8 @@ class ShowSettings {
 
     LightingMode lightingMode = LightingModeDiffuse;
 
-    float4x4 projectionViewModelMatrix;
+    // have multiple models, so don't store this, use split projView and modelMatrix
+    //float4x4 projectionViewModelMatrix;
     bool isInverted;
 
     // cached on load, raw info about the texture from libkram
@@ -229,6 +237,12 @@ class ShowSettings {
     
     const char *meshNumberName(uint32_t meshNumber) const;
     
+    float imageAspectRatio() const {
+        float ar = 1.0f;
+        if (meshNumber == 0 && !isModel && imageBoundsY > 0)
+            ar = imageBoundsX / (float)imageBoundsY;
+        return ar;
+    }
     string lastFilename;
     double lastTimestamp = 0.0;
 
@@ -238,9 +252,14 @@ class ShowSettings {
 
 float4x4 matrix4x4_translation(float tx, float ty, float tz);
 
+float4x4 perspective_rhs(float fovyRadians, float aspect, float nearZ, float
+                         farZ, bool isReverseZ);
+
 float4x4 orthographic_rhs(float width, float height, float nearZ, float farZ,
                           bool isReverseZ);
 
+float4x4 matrix4x4_rotation(float radians, vector_float3 axis);
+
 void printChannels(string &tmp, const string &label, float4 c,
                    int32_t numChannels, bool isFloat, bool isSigned);
 
diff --git a/kramv/KramShaders.h b/kramv/Shaders/KramShaders.h
similarity index 100%
rename from kramv/KramShaders.h
rename to kramv/Shaders/KramShaders.h
diff --git a/kramv/KramShaders.metal b/kramv/Shaders/KramShaders.metal
similarity index 97%
rename from kramv/KramShaders.metal
rename to kramv/Shaders/KramShaders.metal
index f48f5413..2848571a 100644
--- a/kramv/KramShaders.metal
+++ b/kramv/Shaders/KramShaders.metal
@@ -499,7 +499,7 @@ void skinPosAndBasis(thread float4& position, thread float3& tangent, thread flo
         tangent = (float4(tangent, 0.0) * bindPoseToBoneTransform);
 }
 
-float3x3 toFloat3x3(float4x4 m)
+inline float3x3 toFloat3x3(float4x4 m)
 {
     return float3x3(m[0].xyz, m[1].xyz, m[2].xyz);
 }
@@ -640,49 +640,57 @@ vertex ColorInOut DrawCubeVS(
     ColorInOut out = DrawImageFunc(in, uniforms, uniformsLevel);
     
     // convert to -1 to 1
-    float3 uvw = out.texCoordXYZ;
-    uvw.xy = toSnorm(uvw.xy);
-    uvw.z = 1.0;
-    
-    // switch to the face
-    switch(uniformsLevel.face) {
-        case 0: // x
-            uvw = uvw.zyx;
-            uvw.zy *= -1; // to match original cube image
-            break;
-            
-        case 1: // -x
-            uvw = uvw.zyx;
-            uvw.yz *= -1;
-            uvw.z *= -1; // to match PVR
-            uvw.x = -1;
-            break;
-            
-        case 2: // y
-            uvw = uvw.xzy;
-            break;
-            
-        case 3: // -y
-            uvw = uvw.xzy;
-            uvw.xz *= -1;
-            uvw.x *= -1; // to match PVR
-            uvw.y = -1;
-            break;
-            
-        case 4: // z
-            uvw = uvw.xyz; // noop
-            uvw.y *= -1;
-            break;
-            
-        case 5: // -z
-            uvw = uvw.xyz;
-            uvw.xy *= -1;
-            uvw.z = -1;
-            break;
-    }
+    float3 uvw;
 
-    out.texCoordXYZ = uvw;
+    // if preview, then actually sample from cube map
+    // and don't override to the face
+    if (uniforms.isPreview) {
+        uvw = 2 * in.position.xyz; // use model-space pos
+    }
+    else {
+        uvw = out.texCoordXYZ;
+        uvw.xy = toSnorm(uvw.xy);
+        uvw.z = 1.0;
+        
+        // switch to the face
+        switch(uniformsLevel.face) {
+            case 0: // x
+                uvw = uvw.zyx;
+                uvw.zy *= -1; // to match original cube image
+                break;
+                
+            case 1: // -x
+                uvw = uvw.zyx;
+                uvw.yz *= -1;
+                uvw.z *= -1; // to match PVR
+                uvw.x = -1;
+                break;
+                
+            case 2: // y
+                uvw = uvw.xzy;
+                break;
+                
+            case 3: // -y
+                uvw = uvw.xzy;
+                uvw.xz *= -1;
+                uvw.x *= -1; // to match PVR
+                uvw.y = -1;
+                break;
+                
+            case 4: // z
+                uvw = uvw.xyz; // noop
+                uvw.y *= -1;
+                break;
+                
+            case 5: // -z
+                uvw = uvw.xyz;
+                uvw.xy *= -1;
+                uvw.z = -1;
+                break;
+        }
+    }
     
+    out.texCoordXYZ = uvw;
     return out;
 }
 
diff --git a/kramv/Shaders/brdf.metal b/kramv/Shaders/brdf.metal
new file mode 100644
index 00000000..1f4e6b73
--- /dev/null
+++ b/kramv/Shaders/brdf.metal
@@ -0,0 +1,230 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#include <metal_stdlib>
+using namespace metal;
+
+// http://holger.dammertz.org/stuff/notes_HammersleyOnHemisphere.html
+static float RadicalInverse_VdC(uint bits)
+{
+    bits = (bits << 16u) | (bits >> 16u);
+    bits = ((bits & 0x55555555u) << 1u) | ((bits & 0xAAAAAAAAu) >> 1u);
+    bits = ((bits & 0x33333333u) << 2u) | ((bits & 0xCCCCCCCCu) >> 2u);
+    bits = ((bits & 0x0F0F0F0Fu) << 4u) | ((bits & 0xF0F0F0F0u) >> 4u);
+    bits = ((bits & 0x00FF00FFu) << 8u) | ((bits & 0xFF00FF00u) >> 8u);
+    return float(bits) * 2.3283064365386963e-10; // / 0x100000000
+}
+
+static float2 Hammersley(uint i, uint N) {
+    return float2(float(i) / float(N), RadicalInverse_VdC(i));
+}
+
+// http://blog.selfshadow.com/publications/s2013-shading-course/karis/s2013_pbs_epic_notes_v2.pdf
+static float GeometrySchlickGGX(float NdotV, float roughness) {
+    float a = roughness;
+    float k = (a * a) / 2.0;
+    return NdotV / (NdotV * (1.0 - k) + k);
+}
+
+static float GeometrySmith(float NdotL, float NdotV, float roughness) {
+    float ggx2 = GeometrySchlickGGX(NdotV, roughness);
+    float ggx1 = GeometrySchlickGGX(NdotL, roughness);
+    return ggx1 * ggx2;
+}
+
+static float3 ImportanceSampleGGX(float2 xi, float3 N, float roughness) {
+    float a = roughness * roughness;
+    float phi = 2 * M_PI_F * xi.x;
+    float cosTheta = sqrt((1 - xi.y) / (1 + (a * a - 1) * xi.y));
+    float sinTheta = sqrt(1 - cosTheta * cosTheta);
+    
+    float3 H(sinTheta * cos(phi), sinTheta * sin(phi), cosTheta);
+    
+    float3 up = fabs(N.z) < 0.999 ? float3(0, 0, 1) : float3(1, 0, 0);
+    float3 tangent = normalize(cross(up, N));
+    float3 bitangent = cross(N, tangent);
+
+    return normalize(tangent * H.x + bitangent * H.y + N * H.z);
+}
+
+static float2 IntegrateBRDF(float roughness, float NdotV) {
+    float3 N(0, 0, 1);
+    float3 V(sqrt(1.0 - NdotV * NdotV), 0, NdotV);
+    float A = 0;
+    float B = 0;
+
+    const uint sampleCount = 1024;
+    for(uint i = 0; i < sampleCount; ++i) {
+        float2 x = Hammersley(i, sampleCount);
+        float3 H = ImportanceSampleGGX(x, N, roughness);
+        float3 L = normalize(2 * dot(V, H) * H - V);
+        
+        float NdotL = saturate(L.z);
+        float NdotH = saturate(H.z);
+        float VdotH = saturate(dot(V, H));
+        
+        if(NdotL > 0) {
+            float G = GeometrySmith(NdotL, NdotV, roughness);
+            float G_Vis = G * VdotH / (NdotH * NdotV);
+            float Fc = powr(1 - VdotH, 5);
+            A += (1 - Fc) * G_Vis;
+            B += Fc * G_Vis;
+        }
+    }
+
+    return float2(A, B) / float(sampleCount);
+}
+
+kernel void integrate_brdf(texture2d<half, access::write> lookup [[texture(0)]],
+                           uint2 tpig [[thread_position_in_grid]])
+{
+    float NdotV = (tpig.x + 1) / float(lookup.get_width());
+    float roughness = (tpig.y + 1) / float(lookup.get_height());
+    float2 scaleAndBias = IntegrateBRDF(roughness, NdotV);
+    half4 color(scaleAndBias.x, scaleAndBias.y, 0, 0);
+    lookup.write(color, tpig);
+}
+
+static float2 CubeToEquirectCoords(float3 v) {
+    const float2 invAtan(0.1591, 0.3183);
+    float2 uv = float2(atan2(v.z, v.x), asin(v.y)) * invAtan + 0.5;
+    return uv;
+}
+
+static float3 CubeDirectionFromUVAndFace(float2 uv, int face) {
+    float u = uv.x;
+    float v = uv.y;
+    float3 dir = float3(0);
+    switch (face) {
+        case 0:
+            dir = float3(-1,  v, -u); break; // +X
+        case 1:
+            dir = float3( 1,  v,  u); break; // -X
+        case 2:
+            dir = float3(-u, -1,  v); break; // +Y
+        case 3:
+            dir = float3(-u,  1, -v); break; // -Y
+        case 4:
+            dir = float3(-u,  v,  1); break; // +Z
+        case 5:
+            dir = float3( u,  v, -1); break; // -Z
+    }
+    
+    dir = normalize(dir);
+    return dir;
+}
+
+kernel void equirect_to_cube(texture2d<half, access::sample> equirectangularMap,
+                             texturecube<half, access::write> cubeMap,
+                             uint3 tpig [[thread_position_in_grid]])
+{
+    constexpr sampler sampler2d(coord::normalized, filter::linear, address::repeat);
+
+    float cubeSize = cubeMap.get_width();
+    float2 cubeUV = ((float2(tpig.xy) / cubeSize) * 2 - 1);
+    int face = tpig.z;
+    float3 dir = CubeDirectionFromUVAndFace(cubeUV, face);
+    float2 rectUV = CubeToEquirectCoords(dir);
+    half4 color = equirectangularMap.sample(sampler2d, rectUV);
+    uint2 coords = tpig.xy;
+    cubeMap.write(color, coords, face);
+}
+
+static float3 ComputeIrradiance(float3 N, texturecube<half, access::sample> environmentTexture) {
+    constexpr sampler cubeSampler(coord::normalized, filter::linear);
+    float3 irradiance = float3(0.0);
+    
+    float3 up(0.0, 1.0, 0.0);
+    float3 right = cross(up, N);
+    up = cross(N, right);
+
+    float sampleDelta = 0.025;
+    float sampleCount = 0;
+    for (float phi = 0.0; phi < M_PI_F * 2; phi += sampleDelta) {
+        for (float theta = 0.0; theta < M_PI_F * 0.5; theta += sampleDelta) {
+            // spherical to cartesian (in tangent space)
+            float3 tangentSample(sin(theta) * cos(phi),  sin(theta) * sin(phi), cos(theta));
+            // tangent space to world
+            float3 dir = tangentSample.x * right + tangentSample.y * up + tangentSample.z * N;
+
+            irradiance += float3(environmentTexture.sample(cubeSampler, dir).rgb) * cos(theta) * sin(theta);
+            sampleCount += 1;
+        }
+    }
+
+    irradiance = M_PI_F * irradiance * (1.0 / sampleCount);
+    return irradiance;
+}
+
+kernel void compute_irradiance(texturecube<half, access::sample> environmentMap,
+                               texturecube<half, access::write> irradianceMap,
+                               uint3 tpig [[thread_position_in_grid]])
+{
+    float cubeSize = irradianceMap.get_width();
+    float2 cubeUV = ((float2(tpig.xy) / cubeSize) * 2 - 1);
+    int face = tpig.z;
+    float3 dir = CubeDirectionFromUVAndFace(cubeUV, face);
+    dir *= float3(-1, -1, 1);
+    float3 irrad = ComputeIrradiance(dir, environmentMap);
+    uint2 coords = tpig.xy;
+    irradianceMap.write(half4(half3(irrad), 1), coords, face);
+}
+
+static float3 PrefilterEnvMap(float roughness, float3 R, texturecube<half, access::sample> environmentTexture) {
+    constexpr sampler cubeSampler(coord::normalized, filter::linear, mip_filter::linear);
+    
+    float3 N = R;
+    float3 V = R;
+    
+    float3 prefilteredColor(0);
+    float totalWeight = 0;
+    float resolution = environmentTexture.get_width();
+    float saTexel  = 4.0 * M_PI_F / (6.0 * resolution * resolution);
+
+    const uint sampleCount = 512;
+    for(uint i = 0; i < sampleCount; ++i) {
+        float2 xi = Hammersley(i, sampleCount);
+        float3 H = ImportanceSampleGGX(xi, N, roughness);
+        float3 L = normalize(2 * dot(V, H) * H - V);
+        float NdotL = saturate(dot(N, L));
+        if(NdotL > 0) {
+            float NdotH = saturate(dot(N, H));
+            float HdotV = saturate(dot(H, V));
+            float D   = GeometrySchlickGGX(NdotH, roughness);
+            float pdf = (D * NdotH / (4.0 * HdotV)) + 0.0001;
+            float saSample = 1.0 / (float(sampleCount) * pdf + 0.0001);
+            float mipLevel = roughness == 0.0 ? 0.0 : 0.5 * log2(saSample / saTexel);
+            prefilteredColor += NdotL * float3(environmentTexture.sample(cubeSampler, L, level(mipLevel)).rgb);
+            totalWeight += NdotL;
+        }
+    }
+    return prefilteredColor / totalWeight;
+}
+
+kernel void compute_prefiltered_specular(texturecube<half, access::sample> environmentMap,
+                                         texturecube<half, access::write> specularMap,
+                                         constant float &roughness [[buffer(0)]],
+                                         uint3 tpig [[thread_position_in_grid]])
+{
+    float cubeSize = specularMap.get_width();
+    float2 cubeUV = ((float2(tpig.xy) / cubeSize) * 2 - 1);
+    int face = tpig.z;
+    float3 dir = CubeDirectionFromUVAndFace(cubeUV, face);
+    dir *= float3(-1, -1, 1);
+    float3 irrad = PrefilterEnvMap(roughness, dir, environmentMap);
+    uint2 coords = tpig.xy;
+    specularMap.write(half4(half3(irrad), 1), coords, face);
+}
diff --git a/kramv/Shaders/hdr.metal b/kramv/Shaders/hdr.metal
new file mode 100644
index 00000000..bab4a725
--- /dev/null
+++ b/kramv/Shaders/hdr.metal
@@ -0,0 +1,118 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#include <metal_stdlib>
+using namespace metal;
+
+constexpr sampler linearSampler(coord::normalized, min_filter::linear, mag_filter::linear);
+constexpr sampler nearestSampler(coord::normalized, min_filter::nearest, mag_filter::nearest);
+
+struct VertexOut {
+    float4 position [[position]];
+    float2 texCoords;
+};
+
+vertex VertexOut quad_vertex_main(constant packed_float4 *vertices [[buffer(0)]],
+                                  uint vid [[vertex_id]])
+{
+    VertexOut out;
+    float4 in = vertices[vid];
+    out.position = float4(in.xy, 0, 1);
+    out.texCoords = in.zw;
+    return out;
+}
+
+typedef VertexOut FragmentIn;
+
+static half3 reinhardToneMapping(half3 color) {
+    half exposure = 1.5;
+    color *= exposure / (1 + color / exposure);
+    return color;
+}
+
+fragment half4 tonemap_fragment_main(FragmentIn in [[stage_in]],
+                                     texture2d<half, access::sample> sourceTexture [[texture(0)]])
+{
+    half3 color = sourceTexture.sample(linearSampler, in.texCoords).rgb;
+    return half4(reinhardToneMapping(color), 1);
+}
+
+
+fragment half4 bloom_threshold_fragment_main(FragmentIn in [[stage_in]],
+                                             texture2d<half, access::sample> sourceTexture [[texture(0)]])
+{
+    const float bloomThreshold = 0.88;
+    const float bloomIntensity = 2;
+    half4 color = sourceTexture.sample(linearSampler, in.texCoords);
+    half luma = dot(color.rgb, half3(0.2126, 0.7152, 0.0722));
+    return (luma > bloomThreshold) ? color * bloomIntensity : half4(0, 0, 0, 1);
+}
+
+fragment half4 blur_horizontal7_fragment_main(FragmentIn in [[stage_in]],
+                                              texture2d<half, access::sample> sourceTexture [[texture(0)]])
+{
+    float weights[]{ 0.134032, 0.126854, 0.107545, 0.08167, 0.055555, 0.033851, 0.018476, 0.009033 };
+    float offset = 1.0 / sourceTexture.get_width();
+    half4 color(0);
+    color += weights[7] * sourceTexture.sample(nearestSampler, in.texCoords - float2(offset * 7, 0));
+    color += weights[6] * sourceTexture.sample(nearestSampler, in.texCoords - float2(offset * 6, 0));
+    color += weights[5] * sourceTexture.sample(nearestSampler, in.texCoords - float2(offset * 5, 0));
+    color += weights[4] * sourceTexture.sample(nearestSampler, in.texCoords - float2(offset * 4, 0));
+    color += weights[3] * sourceTexture.sample(nearestSampler, in.texCoords - float2(offset * 3, 0));
+    color += weights[2] * sourceTexture.sample(nearestSampler, in.texCoords - float2(offset * 2, 0));
+    color += weights[1] * sourceTexture.sample(nearestSampler, in.texCoords - float2(offset * 1, 0));
+    color += weights[0] * sourceTexture.sample(nearestSampler, in.texCoords + float2(offset * 0, 0));
+    color += weights[1] * sourceTexture.sample(nearestSampler, in.texCoords + float2(offset * 1, 0));
+    color += weights[2] * sourceTexture.sample(nearestSampler, in.texCoords + float2(offset * 2, 0));
+    color += weights[3] * sourceTexture.sample(nearestSampler, in.texCoords + float2(offset * 3, 0));
+    color += weights[4] * sourceTexture.sample(nearestSampler, in.texCoords + float2(offset * 4, 0));
+    color += weights[5] * sourceTexture.sample(nearestSampler, in.texCoords + float2(offset * 5, 0));
+    color += weights[6] * sourceTexture.sample(nearestSampler, in.texCoords + float2(offset * 6, 0));
+    color += weights[7] * sourceTexture.sample(nearestSampler, in.texCoords + float2(offset * 7, 0));
+    return color;
+}
+
+fragment half4 blur_vertical7_fragment_main(FragmentIn in [[stage_in]],
+                                            texture2d<half, access::sample> sourceTexture [[texture(0)]])
+{
+    float weights[]{ 0.134032, 0.126854, 0.107545, 0.08167, 0.055555, 0.033851, 0.018476, 0.009033 };
+    float offset = 1.0 / sourceTexture.get_height();
+    half4 color(0);
+    color += weights[7] * sourceTexture.sample(nearestSampler, in.texCoords - float2(0, offset * 7));
+    color += weights[6] * sourceTexture.sample(nearestSampler, in.texCoords - float2(0, offset * 6));
+    color += weights[5] * sourceTexture.sample(nearestSampler, in.texCoords - float2(0, offset * 5));
+    color += weights[4] * sourceTexture.sample(nearestSampler, in.texCoords - float2(0, offset * 4));
+    color += weights[3] * sourceTexture.sample(nearestSampler, in.texCoords - float2(0, offset * 3));
+    color += weights[2] * sourceTexture.sample(nearestSampler, in.texCoords - float2(0, offset * 2));
+    color += weights[1] * sourceTexture.sample(nearestSampler, in.texCoords - float2(0, offset * 1));
+    color += weights[0] * sourceTexture.sample(nearestSampler, in.texCoords + float2(0, offset * 0));
+    color += weights[1] * sourceTexture.sample(nearestSampler, in.texCoords + float2(0, offset * 1));
+    color += weights[2] * sourceTexture.sample(nearestSampler, in.texCoords + float2(0, offset * 2));
+    color += weights[3] * sourceTexture.sample(nearestSampler, in.texCoords + float2(0, offset * 3));
+    color += weights[4] * sourceTexture.sample(nearestSampler, in.texCoords + float2(0, offset * 4));
+    color += weights[5] * sourceTexture.sample(nearestSampler, in.texCoords + float2(0, offset * 5));
+    color += weights[6] * sourceTexture.sample(nearestSampler, in.texCoords + float2(0, offset * 6));
+    color += weights[7] * sourceTexture.sample(nearestSampler, in.texCoords + float2(0, offset * 7));
+    return color;
+}
+
+fragment half4 additive_blend_fragment_main(FragmentIn in [[stage_in]],
+                                            texture2d<half, access::sample> sourceTexture [[texture(0)]])
+{
+    half4 color = sourceTexture.sample(linearSampler, in.texCoords);
+    return color;
+}
+
diff --git a/kramv/Shaders/pbr.metal b/kramv/Shaders/pbr.metal
new file mode 100644
index 00000000..487db2ba
--- /dev/null
+++ b/kramv/Shaders/pbr.metal
@@ -0,0 +1,451 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#include <metal_stdlib>
+using namespace metal;
+
+constant half minRoughness = 0.04;
+
+constant int textureIndexBaseColor           = 0;
+constant int textureIndexNormal              = 1;
+constant int textureIndexMetallicRoughness   = 2;
+constant int textureIndexOcclusion           = 3;
+constant int textureIndexEmissive            = 4;
+constant int textureIndexDiffuseEnvironment  = 5;
+constant int textureIndexSpecularEnvironment = 6;
+constant int textureIndexBRDFLookup          = 7;
+
+/*%begin_replace_decls%*/
+#define USE_PBR 1
+#define USE_IBL 1
+#define USE_ALPHA_TEST 0
+#define USE_VERTEX_SKINNING 1
+#define USE_EXTENDED_VERTEX_SKINNING 1
+#define USE_DOUBLE_SIDED_MATERIAL 1
+#define HAS_TEXCOORD_0 1
+#define HAS_TEXCOORD_1 1
+#define HAS_VERTEX_COLOR 1
+#define HAS_VERTEX_ROUGHNESS 1
+#define HAS_VERTEX_METALLIC 1
+#define HAS_NORMALS 1
+#define HAS_TANGENTS 1
+#define HAS_BASE_COLOR_MAP 1
+#define HAS_NORMAL_MAP 1
+#define HAS_METALLIC_ROUGHNESS_MAP 1
+#define HAS_OCCLUSION_MAP 1
+#define HAS_EMISSIVE_MAP 1
+#define HAS_TEXTURE_TRANSFORM 1
+#define MATERIAL_IS_UNLIT 0
+#define PREMULTIPLY_BASE_COLOR 1
+#define SPECULAR_ENV_MIP_LEVELS 6
+#define MAX_LIGHTS 3
+#define MAX_MATERIAL_TEXTURES 5
+
+#define BaseColorTexCoord          texCoord0
+#define NormalTexCoord             texCoord0
+#define MetallicRoughnessTexCoord  texCoord0
+#define EmissiveTexCoord           texCoord0
+#define OcclusionTexCoord          texCoord0
+
+struct VertexIn {
+    float4 position  [[attribute(0)]];
+    float3 normal    [[attribute(1)]];
+    float4 tangent   [[attribute(2)]];
+    float2 texCoord0 [[attribute(3)]];
+    float2 texCoord1 [[attribute(4)]];
+    float4 color     [[attribute(5)]];
+    float4 weights0  [[attribute(6)]];
+    float4 weights1  [[attribute(7)]];
+    ushort4 joints0  [[attribute(8)]];
+    ushort4 joints1  [[attribute(9)]];
+    float roughness  [[attribute(10)]];
+    float metalness  [[attribute(11)]];
+};
+/*%end_replace_decls%*/
+
+struct VertexOut {
+    float4 position [[position]];
+    float3 worldPosition;
+    float2 texCoord0;
+    float2 texCoord1;
+    float4 color;
+    float3 tangent;
+    float3 bitangent;
+    float3 normal;
+    float roughness;
+    float metalness;
+};
+
+struct VertexUniforms {
+    float4x4 viewProjectionMatrix; // really projView
+    
+    float4x4 modelMatrix;
+    float4x4 normalMatrix; // really 3x3, and same as model if URT
+};
+
+struct Light {
+    float4 position;
+    float4 color;
+    float intensity;
+    float innerConeAngle;
+    float outerConeAngle;
+    float range;
+    float4 spotDirection;
+};
+
+struct FragmentUniforms {
+    float normalScale;
+    float3 emissiveFactor;
+    float occlusionStrength;
+    float2 metallicRoughnessValues;
+    float4 baseColorFactor;
+    float3 camera;
+    float alphaCutoff;
+    float envIntensity;
+    Light ambientLight;
+    Light lights[MAX_LIGHTS];
+    float3x3 textureMatrices[MAX_MATERIAL_TEXTURES];
+};
+
+inline float3x3 toFloat3x3(float4x4 m)
+{
+    return float3x3(m[0].xyz, m[1].xyz, m[2].xyz);
+}
+
+vertex VertexOut vertex_main(VertexIn in [[stage_in]],
+                             constant VertexUniforms &uniforms [[buffer(16)]]
+#if USE_VERTEX_SKINNING
+                           , constant float4x4 *jointMatrices  [[buffer(17)]]
+#endif
+)
+{
+    VertexOut out = { 0 };
+    
+    float3x3 normalMatrix = toFloat3x3(uniforms.normalMatrix);
+    
+    #if USE_VERTEX_SKINNING
+        ushort4 jointIndices = ushort4(in.joints0); // TODO: compres to uint8
+        float4 jointWeights = float4(in.weights0); // TODO: compress to uint8n
+        
+        float4x4 skinMatrix = jointWeights[0] * jointMatrices[jointIndices[0]] +
+                              jointWeights[1] * jointMatrices[jointIndices[1]] +
+                              jointWeights[2] * jointMatrices[jointIndices[2]] +
+                              jointWeights[3] * jointMatrices[jointIndices[3]];
+
+        #if USE_EXTENDED_VERTEX_SKINNING
+            jointIndices = ushort4(in.joints1);
+            jointWeights = float4(in.weights1);
+
+            skinMatrix += jointWeights[0] * jointMatrices[jointIndices[0]] +
+                          jointWeights[1] * jointMatrices[jointIndices[1]] +
+                          jointWeights[2] * jointMatrices[jointIndices[2]] +
+                          jointWeights[3] * jointMatrices[jointIndices[3]];
+        #endif
+        
+        float4 skinnedPosition = skinMatrix * in.position;
+    
+        // might be expensive
+        normalMatrix = normalMatrix * toFloat3x3(skinMatrix);
+    #else
+        float4 skinnedPosition = in.position;
+    #endif
+
+    float4 position = uniforms.modelMatrix * skinnedPosition;
+    out.worldPosition = position.xyz;
+    
+    out.position = uniforms.viewProjectionMatrix * position;
+
+    #if HAS_NORMALS
+        #if HAS_TANGENTS
+            float3 normalW = normalize(normalMatrix * in.normal.xyz);
+            float3 tangentW = normalize(normalMatrix * in.tangent.xyz);
+            float3 bitangentW = cross(normalW, tangentW) * in.tangent.w;
+            out.tangent = tangentW;
+            out.bitangent = bitangentW;
+            out.normal = normalW;
+        #else
+            out.normal = normalize(normalMatrix * in.normal.xyz);
+        #endif
+    #endif
+
+    #if HAS_VERTEX_COLOR
+        #if VERTEX_COLOR_IS_RGB
+            out.color = float4(in.color, 1);
+        #else
+            out.color = in.color;
+        #endif
+    #endif
+    
+    #if HAS_VERTEX_ROUGHNESS
+        out.roughness = in.roughness;
+    #endif
+    
+    #if HAS_VERTEX_METALLIC
+        out.metalness = in.metalness;
+    #endif
+
+    #if HAS_TEXCOORD_0
+        out.texCoord0 = in.texCoord0;
+    #endif
+    
+    #if HAS_TEXCOORD_1
+        out.texCoord1 = in.texCoord1;
+    #endif
+
+    return out;
+}
+
+static half3 LambertDiffuse(half3 baseColor)
+{
+    return baseColor / M_PI_F;
+}
+
+static half3 SchlickFresnel(half3 F0, float LdotH)
+{
+    return F0 + (1 - F0) * pow(1.0 - LdotH, 5.0);
+}
+
+static float SmithGeometric(float NdotL, float NdotV, float roughness)
+{
+    float k = roughness * 0.5;
+    float Gl = NdotL / ((NdotL * (1 - k)) + k);
+    float Gv = NdotV / ((NdotV * (1 - k)) + k);
+    return Gl * Gv;
+}
+
+static float TrowbridgeReitzNDF(float NdotH, float roughness)
+{
+    float roughnessSq = roughness * roughness;
+    float f = NdotH * (NdotH * roughnessSq - NdotH) + 1;
+    return roughnessSq / (M_PI_F * f * f);
+}
+
+fragment half4 fragment_main(VertexOut in [[stage_in]],
+#if HAS_BASE_COLOR_MAP
+                              texture2d<half, access::sample> baseColorTexture [[texture(textureIndexBaseColor)]],
+                              sampler baseColorSampler [[sampler(textureIndexBaseColor)]],
+#endif
+#if HAS_NORMAL_MAP
+                              texture2d<float, access::sample> normalTexture [[texture(textureIndexNormal)]],
+                              sampler normalSampler [[sampler(textureIndexNormal)]],
+#endif
+#if HAS_EMISSIVE_MAP
+                              texture2d<half, access::sample> emissiveTexture [[texture(textureIndexEmissive)]],
+                              sampler emissiveSampler [[sampler(textureIndexEmissive)]],
+#endif
+#if HAS_METALLIC_ROUGHNESS_MAP
+                              texture2d<half, access::sample> metallicRoughnessTexture [[texture(textureIndexMetallicRoughness)]],
+                              sampler metallicRoughnessSampler [[sampler(textureIndexMetallicRoughness)]],
+#endif
+#if HAS_OCCLUSION_MAP
+                              texture2d<half, access::sample> occlusionTexture [[texture(textureIndexOcclusion)]],
+                              sampler occlusionSampler [[sampler(textureIndexOcclusion)]],
+#endif
+#if USE_IBL
+                              texturecube<half, access::sample> diffuseEnvTexture [[texture(textureIndexDiffuseEnvironment)]],
+                              texturecube<half, access::sample> specularEnvTexture [[texture(textureIndexSpecularEnvironment)]],
+                              texture2d<half, access::sample> brdfLUT [[texture(textureIndexBRDFLookup)]],
+#endif
+                              constant FragmentUniforms &uniforms [[buffer(0)]],
+                              bool frontFacing [[front_facing]])
+{
+    #if HAS_TEXTURE_TRANSFORM
+        float2 baseColorTexCoord = (uniforms.textureMatrices[textureIndexBaseColor] * float3(in.BaseColorTexCoord, 1)).xy;
+        float2 normalTexCoord = (uniforms.textureMatrices[textureIndexNormal] * float3(in.NormalTexCoord, 1)).xy;
+        float2 metallicRoughnessTexCoord = (uniforms.textureMatrices[textureIndexMetallicRoughness] * float3(in.MetallicRoughnessTexCoord, 1)).xy;
+        float2 occlusionTexCoord = (uniforms.textureMatrices[textureIndexOcclusion] * float3(in.OcclusionTexCoord, 1)).xy;
+        float2 emissiveTexCoord = (uniforms.textureMatrices[textureIndexEmissive] * float3(in.EmissiveTexCoord, 1)).xy;
+    #else
+        float2 baseColorTexCoord = in.BaseColorTexCoord;
+        float2 normalTexCoord = in.NormalTexCoord;
+        float2 metallicRoughnessTexCoord = in.MetallicRoughnessTexCoord;
+        float2 occlusionTexCoord = in.OcclusionTexCoord;
+        float2 emissiveTexCoord = in.EmissiveTexCoord;
+    #endif
+    
+    float3x3 tbn;
+    #if !HAS_TANGENTS
+        float3 pos_dx = dfdx(in.worldPosition);
+        float3 pos_dy = dfdy(in.worldPosition);
+        float3 tex_dx = dfdx(float3(normalTexCoord, 0));
+        float3 tex_dy = dfdy(float3(normalTexCoord, 0));
+        float3 t = (tex_dy.y * pos_dx - tex_dx.y * pos_dy) / (tex_dx.x * tex_dy.y - tex_dy.x * tex_dx.y);
+
+        float3 ng(0);
+        #if HAS_NORMALS
+            ng = normalize(in.normal);
+        #else
+            ng = cross(pos_dx, pos_dy);
+        #endif
+        t = normalize(t - ng * dot(ng, t));
+        float3 b = normalize(cross(ng, t));
+        tbn = float3x3(t, b, ng);
+    #else
+        tbn = float3x3(in.tangent, in.bitangent, in.normal);
+    #endif
+
+    float3 N(0, 0, 1);
+    #if HAS_NORMAL_MAP
+        N = normalTexture.sample(normalSampler, normalTexCoord).rgb;
+        N = normalize(tbn * ((2 * N - 1) * float3(uniforms.normalScale, uniforms.normalScale, 1)));
+    #else
+        N = tbn[2].xyz;
+    #endif
+
+    #if USE_DOUBLE_SIDED_MATERIAL
+        N *= frontFacing ? 1 : -1;
+    #endif
+
+    half perceptualRoughness = uniforms.metallicRoughnessValues.y;
+    half metallic = uniforms.metallicRoughnessValues.x;
+    
+    #if HAS_METALLIC_ROUGHNESS_MAP
+        half4 mrSample = metallicRoughnessTexture.sample(metallicRoughnessSampler, metallicRoughnessTexCoord);
+        perceptualRoughness = mrSample.g * perceptualRoughness;
+        metallic = mrSample.b * metallic;
+    #endif
+    
+    #if HAS_VERTEX_ROUGHNESS
+        perceptualRoughness = in.roughness;
+    #endif
+        
+    #if HAS_VERTEX_METALLIC
+        metallic = in.metalness;
+    #endif
+
+    perceptualRoughness = clamp(perceptualRoughness, minRoughness, 1.0h);
+    metallic = saturate(metallic);
+
+    half4 baseColor;
+    #if HAS_BASE_COLOR_MAP
+        baseColor = baseColorTexture.sample(baseColorSampler, baseColorTexCoord);
+        #if PREMULTIPLY_BASE_COLOR
+            baseColor.rgb *= baseColor.a;
+        #endif
+        baseColor *= half4(uniforms.baseColorFactor);
+    #else
+        baseColor = half4(uniforms.baseColorFactor);
+    #endif
+
+    #if HAS_VERTEX_COLOR
+        baseColor *= half4(in.color);
+    #endif
+    
+    #if MATERIAL_IS_UNLIT
+        #if USE_ALPHA_TEST
+            if (baseColor.a < uniforms.alphaCutoff) {
+                discard_fragment();
+            }
+        #endif
+        return baseColor;
+    #endif
+    
+    half3 f0(0.04);
+    half3 diffuseColor = mix(baseColor.rgb * (1 - f0), half3(0), metallic);
+    half3 specularColor = mix(f0, baseColor.rgb, metallic);
+
+    half3 F0 = specularColor.rgb;
+
+    float alphaRoughness = perceptualRoughness * perceptualRoughness;
+
+    float3 V = normalize(uniforms.camera - in.worldPosition);
+    float NdotV = saturate(dot(N, V));
+    
+    float3 reflection = normalize(reflect(V, N)) * float3(-1, -1, 1);
+
+    half3 color(0);
+
+    #if USE_PBR
+        color += half3(uniforms.ambientLight.color.rgb * uniforms.ambientLight.intensity) * diffuseColor;
+
+        for (int i = 0; i < MAX_LIGHTS; ++i) {
+            Light light = uniforms.lights[i];
+            
+            float3 L = normalize((light.position.w == 0) ? -light.position.xyz : (light.position.xyz - in.worldPosition));
+            float3 H = normalize(L + V);
+
+            float NdotL = saturate(dot(N, L));
+            float NdotH = saturate(dot(N, H));
+            float VdotH = saturate(dot(V, H));
+            
+            half3 F = SchlickFresnel(F0, VdotH);
+            float G = SmithGeometric(NdotL, NdotV, alphaRoughness);
+            float D = TrowbridgeReitzNDF(NdotH, alphaRoughness);
+            
+            half3 diffuseContrib(0);
+            half3 specContrib(0);
+            if (NdotL > 0) {
+                diffuseContrib = NdotL * LambertDiffuse(diffuseColor);
+                specContrib = NdotL * D * F * G / (4.0 * NdotL * NdotV);
+            }
+            
+            half lightDist = length(light.position.xyz - in.worldPosition);
+            half attenNum = (light.range > 0) ? saturate(1.0 - powr(lightDist / light.range, 4)) : 1;
+            half atten = (light.position.w == 0) ? 1 : attenNum / powr(lightDist, 2);
+
+            float relativeSpotAngle = acos(dot(-L, normalize(light.spotDirection.xyz)));
+            float spotAttenParam = 1 - clamp((relativeSpotAngle - light.innerConeAngle) / max(0.001, light.outerConeAngle - light.innerConeAngle), 0.0, 1.0);
+            float spotAtten = spotAttenParam * spotAttenParam * (3 - 2 * spotAttenParam);
+            atten *= spotAtten;
+
+            color += half3(light.color.rgb * light.intensity) * atten * (diffuseContrib + specContrib);
+        }
+    #endif
+
+    #if USE_IBL
+        constexpr sampler cubeSampler(coord::normalized, filter::linear, mip_filter::linear);
+        float mipCount = SPECULAR_ENV_MIP_LEVELS;
+        float lod = perceptualRoughness * (mipCount - 1);
+        half2 brdf = brdfLUT.sample(cubeSampler, float2(NdotV, perceptualRoughness)).xy;
+        half3 diffuseLight = diffuseEnvTexture.sample(cubeSampler, N).rgb;
+        diffuseLight *= uniforms.envIntensity;
+    
+        half3 specularLight(0);
+        if (mipCount > 1) {
+            specularLight = specularEnvTexture.sample(cubeSampler, reflection, level(lod)).rgb;
+        } else {
+            specularLight = specularEnvTexture.sample(cubeSampler, reflection).rgb;
+        }
+        specularLight *= uniforms.envIntensity;
+    
+        half3 iblDiffuse = diffuseLight * diffuseColor;
+        half3 iblSpecular = specularLight * ((specularColor * brdf.x) + brdf.y);
+    
+        half3 iblColor = iblDiffuse + iblSpecular;
+
+        color += iblColor;
+    #endif
+    
+    #if HAS_OCCLUSION_MAP
+        half ao = occlusionTexture.sample(occlusionSampler, occlusionTexCoord).r;
+        color = mix(color, color * ao, half(uniforms.occlusionStrength));
+    #endif
+    
+    #if HAS_EMISSIVE_MAP
+        half3 emissive = emissiveTexture.sample(emissiveSampler, emissiveTexCoord).rgb;
+        color += emissive * half3(uniforms.emissiveFactor);
+    #else
+        color += half3(uniforms.emissiveFactor);
+    #endif
+    
+    #if USE_ALPHA_TEST
+        if (baseColor.a < uniforms.alphaCutoff) {
+            discard_fragment();
+        }
+    #endif
+
+    return half4(color, baseColor.a);
+}
diff --git a/kramv/Shaders/pbr.txt b/kramv/Shaders/pbr.txt
new file mode 100644
index 00000000..487db2ba
--- /dev/null
+++ b/kramv/Shaders/pbr.txt
@@ -0,0 +1,451 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#include <metal_stdlib>
+using namespace metal;
+
+constant half minRoughness = 0.04;
+
+constant int textureIndexBaseColor           = 0;
+constant int textureIndexNormal              = 1;
+constant int textureIndexMetallicRoughness   = 2;
+constant int textureIndexOcclusion           = 3;
+constant int textureIndexEmissive            = 4;
+constant int textureIndexDiffuseEnvironment  = 5;
+constant int textureIndexSpecularEnvironment = 6;
+constant int textureIndexBRDFLookup          = 7;
+
+/*%begin_replace_decls%*/
+#define USE_PBR 1
+#define USE_IBL 1
+#define USE_ALPHA_TEST 0
+#define USE_VERTEX_SKINNING 1
+#define USE_EXTENDED_VERTEX_SKINNING 1
+#define USE_DOUBLE_SIDED_MATERIAL 1
+#define HAS_TEXCOORD_0 1
+#define HAS_TEXCOORD_1 1
+#define HAS_VERTEX_COLOR 1
+#define HAS_VERTEX_ROUGHNESS 1
+#define HAS_VERTEX_METALLIC 1
+#define HAS_NORMALS 1
+#define HAS_TANGENTS 1
+#define HAS_BASE_COLOR_MAP 1
+#define HAS_NORMAL_MAP 1
+#define HAS_METALLIC_ROUGHNESS_MAP 1
+#define HAS_OCCLUSION_MAP 1
+#define HAS_EMISSIVE_MAP 1
+#define HAS_TEXTURE_TRANSFORM 1
+#define MATERIAL_IS_UNLIT 0
+#define PREMULTIPLY_BASE_COLOR 1
+#define SPECULAR_ENV_MIP_LEVELS 6
+#define MAX_LIGHTS 3
+#define MAX_MATERIAL_TEXTURES 5
+
+#define BaseColorTexCoord          texCoord0
+#define NormalTexCoord             texCoord0
+#define MetallicRoughnessTexCoord  texCoord0
+#define EmissiveTexCoord           texCoord0
+#define OcclusionTexCoord          texCoord0
+
+struct VertexIn {
+    float4 position  [[attribute(0)]];
+    float3 normal    [[attribute(1)]];
+    float4 tangent   [[attribute(2)]];
+    float2 texCoord0 [[attribute(3)]];
+    float2 texCoord1 [[attribute(4)]];
+    float4 color     [[attribute(5)]];
+    float4 weights0  [[attribute(6)]];
+    float4 weights1  [[attribute(7)]];
+    ushort4 joints0  [[attribute(8)]];
+    ushort4 joints1  [[attribute(9)]];
+    float roughness  [[attribute(10)]];
+    float metalness  [[attribute(11)]];
+};
+/*%end_replace_decls%*/
+
+struct VertexOut {
+    float4 position [[position]];
+    float3 worldPosition;
+    float2 texCoord0;
+    float2 texCoord1;
+    float4 color;
+    float3 tangent;
+    float3 bitangent;
+    float3 normal;
+    float roughness;
+    float metalness;
+};
+
+struct VertexUniforms {
+    float4x4 viewProjectionMatrix; // really projView
+    
+    float4x4 modelMatrix;
+    float4x4 normalMatrix; // really 3x3, and same as model if URT
+};
+
+struct Light {
+    float4 position;
+    float4 color;
+    float intensity;
+    float innerConeAngle;
+    float outerConeAngle;
+    float range;
+    float4 spotDirection;
+};
+
+struct FragmentUniforms {
+    float normalScale;
+    float3 emissiveFactor;
+    float occlusionStrength;
+    float2 metallicRoughnessValues;
+    float4 baseColorFactor;
+    float3 camera;
+    float alphaCutoff;
+    float envIntensity;
+    Light ambientLight;
+    Light lights[MAX_LIGHTS];
+    float3x3 textureMatrices[MAX_MATERIAL_TEXTURES];
+};
+
+inline float3x3 toFloat3x3(float4x4 m)
+{
+    return float3x3(m[0].xyz, m[1].xyz, m[2].xyz);
+}
+
+vertex VertexOut vertex_main(VertexIn in [[stage_in]],
+                             constant VertexUniforms &uniforms [[buffer(16)]]
+#if USE_VERTEX_SKINNING
+                           , constant float4x4 *jointMatrices  [[buffer(17)]]
+#endif
+)
+{
+    VertexOut out = { 0 };
+    
+    float3x3 normalMatrix = toFloat3x3(uniforms.normalMatrix);
+    
+    #if USE_VERTEX_SKINNING
+        ushort4 jointIndices = ushort4(in.joints0); // TODO: compres to uint8
+        float4 jointWeights = float4(in.weights0); // TODO: compress to uint8n
+        
+        float4x4 skinMatrix = jointWeights[0] * jointMatrices[jointIndices[0]] +
+                              jointWeights[1] * jointMatrices[jointIndices[1]] +
+                              jointWeights[2] * jointMatrices[jointIndices[2]] +
+                              jointWeights[3] * jointMatrices[jointIndices[3]];
+
+        #if USE_EXTENDED_VERTEX_SKINNING
+            jointIndices = ushort4(in.joints1);
+            jointWeights = float4(in.weights1);
+
+            skinMatrix += jointWeights[0] * jointMatrices[jointIndices[0]] +
+                          jointWeights[1] * jointMatrices[jointIndices[1]] +
+                          jointWeights[2] * jointMatrices[jointIndices[2]] +
+                          jointWeights[3] * jointMatrices[jointIndices[3]];
+        #endif
+        
+        float4 skinnedPosition = skinMatrix * in.position;
+    
+        // might be expensive
+        normalMatrix = normalMatrix * toFloat3x3(skinMatrix);
+    #else
+        float4 skinnedPosition = in.position;
+    #endif
+
+    float4 position = uniforms.modelMatrix * skinnedPosition;
+    out.worldPosition = position.xyz;
+    
+    out.position = uniforms.viewProjectionMatrix * position;
+
+    #if HAS_NORMALS
+        #if HAS_TANGENTS
+            float3 normalW = normalize(normalMatrix * in.normal.xyz);
+            float3 tangentW = normalize(normalMatrix * in.tangent.xyz);
+            float3 bitangentW = cross(normalW, tangentW) * in.tangent.w;
+            out.tangent = tangentW;
+            out.bitangent = bitangentW;
+            out.normal = normalW;
+        #else
+            out.normal = normalize(normalMatrix * in.normal.xyz);
+        #endif
+    #endif
+
+    #if HAS_VERTEX_COLOR
+        #if VERTEX_COLOR_IS_RGB
+            out.color = float4(in.color, 1);
+        #else
+            out.color = in.color;
+        #endif
+    #endif
+    
+    #if HAS_VERTEX_ROUGHNESS
+        out.roughness = in.roughness;
+    #endif
+    
+    #if HAS_VERTEX_METALLIC
+        out.metalness = in.metalness;
+    #endif
+
+    #if HAS_TEXCOORD_0
+        out.texCoord0 = in.texCoord0;
+    #endif
+    
+    #if HAS_TEXCOORD_1
+        out.texCoord1 = in.texCoord1;
+    #endif
+
+    return out;
+}
+
+static half3 LambertDiffuse(half3 baseColor)
+{
+    return baseColor / M_PI_F;
+}
+
+static half3 SchlickFresnel(half3 F0, float LdotH)
+{
+    return F0 + (1 - F0) * pow(1.0 - LdotH, 5.0);
+}
+
+static float SmithGeometric(float NdotL, float NdotV, float roughness)
+{
+    float k = roughness * 0.5;
+    float Gl = NdotL / ((NdotL * (1 - k)) + k);
+    float Gv = NdotV / ((NdotV * (1 - k)) + k);
+    return Gl * Gv;
+}
+
+static float TrowbridgeReitzNDF(float NdotH, float roughness)
+{
+    float roughnessSq = roughness * roughness;
+    float f = NdotH * (NdotH * roughnessSq - NdotH) + 1;
+    return roughnessSq / (M_PI_F * f * f);
+}
+
+fragment half4 fragment_main(VertexOut in [[stage_in]],
+#if HAS_BASE_COLOR_MAP
+                              texture2d<half, access::sample> baseColorTexture [[texture(textureIndexBaseColor)]],
+                              sampler baseColorSampler [[sampler(textureIndexBaseColor)]],
+#endif
+#if HAS_NORMAL_MAP
+                              texture2d<float, access::sample> normalTexture [[texture(textureIndexNormal)]],
+                              sampler normalSampler [[sampler(textureIndexNormal)]],
+#endif
+#if HAS_EMISSIVE_MAP
+                              texture2d<half, access::sample> emissiveTexture [[texture(textureIndexEmissive)]],
+                              sampler emissiveSampler [[sampler(textureIndexEmissive)]],
+#endif
+#if HAS_METALLIC_ROUGHNESS_MAP
+                              texture2d<half, access::sample> metallicRoughnessTexture [[texture(textureIndexMetallicRoughness)]],
+                              sampler metallicRoughnessSampler [[sampler(textureIndexMetallicRoughness)]],
+#endif
+#if HAS_OCCLUSION_MAP
+                              texture2d<half, access::sample> occlusionTexture [[texture(textureIndexOcclusion)]],
+                              sampler occlusionSampler [[sampler(textureIndexOcclusion)]],
+#endif
+#if USE_IBL
+                              texturecube<half, access::sample> diffuseEnvTexture [[texture(textureIndexDiffuseEnvironment)]],
+                              texturecube<half, access::sample> specularEnvTexture [[texture(textureIndexSpecularEnvironment)]],
+                              texture2d<half, access::sample> brdfLUT [[texture(textureIndexBRDFLookup)]],
+#endif
+                              constant FragmentUniforms &uniforms [[buffer(0)]],
+                              bool frontFacing [[front_facing]])
+{
+    #if HAS_TEXTURE_TRANSFORM
+        float2 baseColorTexCoord = (uniforms.textureMatrices[textureIndexBaseColor] * float3(in.BaseColorTexCoord, 1)).xy;
+        float2 normalTexCoord = (uniforms.textureMatrices[textureIndexNormal] * float3(in.NormalTexCoord, 1)).xy;
+        float2 metallicRoughnessTexCoord = (uniforms.textureMatrices[textureIndexMetallicRoughness] * float3(in.MetallicRoughnessTexCoord, 1)).xy;
+        float2 occlusionTexCoord = (uniforms.textureMatrices[textureIndexOcclusion] * float3(in.OcclusionTexCoord, 1)).xy;
+        float2 emissiveTexCoord = (uniforms.textureMatrices[textureIndexEmissive] * float3(in.EmissiveTexCoord, 1)).xy;
+    #else
+        float2 baseColorTexCoord = in.BaseColorTexCoord;
+        float2 normalTexCoord = in.NormalTexCoord;
+        float2 metallicRoughnessTexCoord = in.MetallicRoughnessTexCoord;
+        float2 occlusionTexCoord = in.OcclusionTexCoord;
+        float2 emissiveTexCoord = in.EmissiveTexCoord;
+    #endif
+    
+    float3x3 tbn;
+    #if !HAS_TANGENTS
+        float3 pos_dx = dfdx(in.worldPosition);
+        float3 pos_dy = dfdy(in.worldPosition);
+        float3 tex_dx = dfdx(float3(normalTexCoord, 0));
+        float3 tex_dy = dfdy(float3(normalTexCoord, 0));
+        float3 t = (tex_dy.y * pos_dx - tex_dx.y * pos_dy) / (tex_dx.x * tex_dy.y - tex_dy.x * tex_dx.y);
+
+        float3 ng(0);
+        #if HAS_NORMALS
+            ng = normalize(in.normal);
+        #else
+            ng = cross(pos_dx, pos_dy);
+        #endif
+        t = normalize(t - ng * dot(ng, t));
+        float3 b = normalize(cross(ng, t));
+        tbn = float3x3(t, b, ng);
+    #else
+        tbn = float3x3(in.tangent, in.bitangent, in.normal);
+    #endif
+
+    float3 N(0, 0, 1);
+    #if HAS_NORMAL_MAP
+        N = normalTexture.sample(normalSampler, normalTexCoord).rgb;
+        N = normalize(tbn * ((2 * N - 1) * float3(uniforms.normalScale, uniforms.normalScale, 1)));
+    #else
+        N = tbn[2].xyz;
+    #endif
+
+    #if USE_DOUBLE_SIDED_MATERIAL
+        N *= frontFacing ? 1 : -1;
+    #endif
+
+    half perceptualRoughness = uniforms.metallicRoughnessValues.y;
+    half metallic = uniforms.metallicRoughnessValues.x;
+    
+    #if HAS_METALLIC_ROUGHNESS_MAP
+        half4 mrSample = metallicRoughnessTexture.sample(metallicRoughnessSampler, metallicRoughnessTexCoord);
+        perceptualRoughness = mrSample.g * perceptualRoughness;
+        metallic = mrSample.b * metallic;
+    #endif
+    
+    #if HAS_VERTEX_ROUGHNESS
+        perceptualRoughness = in.roughness;
+    #endif
+        
+    #if HAS_VERTEX_METALLIC
+        metallic = in.metalness;
+    #endif
+
+    perceptualRoughness = clamp(perceptualRoughness, minRoughness, 1.0h);
+    metallic = saturate(metallic);
+
+    half4 baseColor;
+    #if HAS_BASE_COLOR_MAP
+        baseColor = baseColorTexture.sample(baseColorSampler, baseColorTexCoord);
+        #if PREMULTIPLY_BASE_COLOR
+            baseColor.rgb *= baseColor.a;
+        #endif
+        baseColor *= half4(uniforms.baseColorFactor);
+    #else
+        baseColor = half4(uniforms.baseColorFactor);
+    #endif
+
+    #if HAS_VERTEX_COLOR
+        baseColor *= half4(in.color);
+    #endif
+    
+    #if MATERIAL_IS_UNLIT
+        #if USE_ALPHA_TEST
+            if (baseColor.a < uniforms.alphaCutoff) {
+                discard_fragment();
+            }
+        #endif
+        return baseColor;
+    #endif
+    
+    half3 f0(0.04);
+    half3 diffuseColor = mix(baseColor.rgb * (1 - f0), half3(0), metallic);
+    half3 specularColor = mix(f0, baseColor.rgb, metallic);
+
+    half3 F0 = specularColor.rgb;
+
+    float alphaRoughness = perceptualRoughness * perceptualRoughness;
+
+    float3 V = normalize(uniforms.camera - in.worldPosition);
+    float NdotV = saturate(dot(N, V));
+    
+    float3 reflection = normalize(reflect(V, N)) * float3(-1, -1, 1);
+
+    half3 color(0);
+
+    #if USE_PBR
+        color += half3(uniforms.ambientLight.color.rgb * uniforms.ambientLight.intensity) * diffuseColor;
+
+        for (int i = 0; i < MAX_LIGHTS; ++i) {
+            Light light = uniforms.lights[i];
+            
+            float3 L = normalize((light.position.w == 0) ? -light.position.xyz : (light.position.xyz - in.worldPosition));
+            float3 H = normalize(L + V);
+
+            float NdotL = saturate(dot(N, L));
+            float NdotH = saturate(dot(N, H));
+            float VdotH = saturate(dot(V, H));
+            
+            half3 F = SchlickFresnel(F0, VdotH);
+            float G = SmithGeometric(NdotL, NdotV, alphaRoughness);
+            float D = TrowbridgeReitzNDF(NdotH, alphaRoughness);
+            
+            half3 diffuseContrib(0);
+            half3 specContrib(0);
+            if (NdotL > 0) {
+                diffuseContrib = NdotL * LambertDiffuse(diffuseColor);
+                specContrib = NdotL * D * F * G / (4.0 * NdotL * NdotV);
+            }
+            
+            half lightDist = length(light.position.xyz - in.worldPosition);
+            half attenNum = (light.range > 0) ? saturate(1.0 - powr(lightDist / light.range, 4)) : 1;
+            half atten = (light.position.w == 0) ? 1 : attenNum / powr(lightDist, 2);
+
+            float relativeSpotAngle = acos(dot(-L, normalize(light.spotDirection.xyz)));
+            float spotAttenParam = 1 - clamp((relativeSpotAngle - light.innerConeAngle) / max(0.001, light.outerConeAngle - light.innerConeAngle), 0.0, 1.0);
+            float spotAtten = spotAttenParam * spotAttenParam * (3 - 2 * spotAttenParam);
+            atten *= spotAtten;
+
+            color += half3(light.color.rgb * light.intensity) * atten * (diffuseContrib + specContrib);
+        }
+    #endif
+
+    #if USE_IBL
+        constexpr sampler cubeSampler(coord::normalized, filter::linear, mip_filter::linear);
+        float mipCount = SPECULAR_ENV_MIP_LEVELS;
+        float lod = perceptualRoughness * (mipCount - 1);
+        half2 brdf = brdfLUT.sample(cubeSampler, float2(NdotV, perceptualRoughness)).xy;
+        half3 diffuseLight = diffuseEnvTexture.sample(cubeSampler, N).rgb;
+        diffuseLight *= uniforms.envIntensity;
+    
+        half3 specularLight(0);
+        if (mipCount > 1) {
+            specularLight = specularEnvTexture.sample(cubeSampler, reflection, level(lod)).rgb;
+        } else {
+            specularLight = specularEnvTexture.sample(cubeSampler, reflection).rgb;
+        }
+        specularLight *= uniforms.envIntensity;
+    
+        half3 iblDiffuse = diffuseLight * diffuseColor;
+        half3 iblSpecular = specularLight * ((specularColor * brdf.x) + brdf.y);
+    
+        half3 iblColor = iblDiffuse + iblSpecular;
+
+        color += iblColor;
+    #endif
+    
+    #if HAS_OCCLUSION_MAP
+        half ao = occlusionTexture.sample(occlusionSampler, occlusionTexCoord).r;
+        color = mix(color, color * ao, half(uniforms.occlusionStrength));
+    #endif
+    
+    #if HAS_EMISSIVE_MAP
+        half3 emissive = emissiveTexture.sample(emissiveSampler, emissiveTexCoord).rgb;
+        color += emissive * half3(uniforms.emissiveFactor);
+    #else
+        color += half3(uniforms.emissiveFactor);
+    #endif
+    
+    #if USE_ALPHA_TEST
+        if (baseColor.a < uniforms.alphaCutoff) {
+            discard_fragment();
+        }
+    #endif
+
+    return half4(color, baseColor.a);
+}
diff --git a/kramv/Shaders/skybox.metal b/kramv/Shaders/skybox.metal
new file mode 100644
index 00000000..675ccdbb
--- /dev/null
+++ b/kramv/Shaders/skybox.metal
@@ -0,0 +1,57 @@
+//
+//  Copyright (c) 2018 Warren Moore. All rights reserved.
+//
+//  Permission to use, copy, modify, and distribute this software for any
+//  purpose with or without fee is hereby granted, provided that the above
+//  copyright notice and this permission notice appear in all copies.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+//  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+//  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+//  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+//  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+//  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+//  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#include <metal_stdlib>
+using namespace metal;
+
+struct VertexUniforms {
+    float4x4 modelMatrix;
+    float4x4 viewProjectionMatrix;
+};
+
+struct VertexOut {
+    float4 clipPosition [[position]];
+    float4 worldPosition;
+};
+
+vertex VertexOut skybox_vertex_main(constant packed_float3 *vertices [[buffer(0)]],
+                                    constant VertexUniforms &uniforms [[buffer(1)]],
+                                    uint vid [[vertex_id]])
+{
+    VertexOut out;
+    float4 position = float4(float3(vertices[vid]), 1);
+    out.worldPosition = uniforms.modelMatrix * position;
+    
+    out.clipPosition = (uniforms.viewProjectionMatrix * out.worldPosition).xyww;
+    
+    // this means z = 1 always, but that doesn't work for reverseZ, need it to be 0 always
+    out.clipPosition.z = 0.0f;
+    
+    return out;
+}
+
+typedef VertexOut FragmentIn;
+
+fragment half4 skybox_fragment_main(FragmentIn in [[stage_in]],
+                                     constant float &environmentIntensity [[buffer(0)]],
+                                     texturecube<half, access::sample> skyboxTexture [[texture(0)]])
+{
+    constexpr sampler linearSampler(coord::normalized, min_filter::linear, mag_filter::linear, mip_filter::linear);
+    float3 normal = normalize(in.worldPosition.xyz);
+    normal *= float3(1, 1, -1);
+    half3 color = skyboxTexture.sample(linearSampler, normal).rgb;
+    return half4(color, 1);
+}
diff --git a/kramv/Textures/piazza_san_marco.ktx b/kramv/Textures/piazza_san_marco.ktx
new file mode 100644
index 00000000..a6e8ebd0
--- /dev/null
+++ b/kramv/Textures/piazza_san_marco.ktx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f82bd492c978b4f3635c1ce8af0c543dc4ae118568f82209fde13472b908a82e
+size 16777284
diff --git a/kramv/Textures/tropical_beach.ktx b/kramv/Textures/tropical_beach.ktx
new file mode 100644
index 00000000..b288bb84
--- /dev/null
+++ b/kramv/Textures/tropical_beach.ktx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ec461d7a52169a108f677fb2646180479e9a97a0ce80242df14ca9fadb0ebd54
+size 16777284
diff --git a/libkram/astc-encoder/astcenc_internal.h b/libkram/astc-encoder/astcenc_internal.h
index f9e82b90..51363e47 100644
--- a/libkram/astc-encoder/astcenc_internal.h
+++ b/libkram/astc-encoder/astcenc_internal.h
@@ -181,9 +181,13 @@ static const unsigned int TUNE_MAX_TRIAL_CANDIDATES { 4 };
  */
 class ParallelManager
 {
+    using mymutex = std::recursive_mutex;
+    using mylock = std::unique_lock<mymutex>;
+    using mycondition = std::condition_variable_any;
+
 private:
 	/** @brief Lock used for critical section and condition synchronization. */
-	std::mutex m_lock;
+    mymutex m_lock;
 
 	/** @brief True if the stage init() step has been executed. */
 	bool m_init_done;
@@ -192,7 +196,7 @@ class ParallelManager
 	bool m_term_done;
 
 	/** @brief Contition variable for tracking stage processing completion. */
-	std::condition_variable m_complete;
+	mycondition m_complete;
 
 	/** @brief Number of tasks started, but not necessarily finished. */
 	std::atomic<unsigned int> m_start_count;
@@ -237,7 +241,7 @@ class ParallelManager
 	 */
 	void init(std::function<unsigned int(void)> init_func)
 	{
-		std::lock_guard<std::mutex> lck(m_lock);
+		mylock lck(m_lock);
 		if (!m_init_done)
 		{
 			m_task_count = init_func();
@@ -256,7 +260,7 @@ class ParallelManager
 	 */
 	void init(unsigned int task_count)
 	{
-		std::lock_guard<std::mutex> lck(m_lock);
+        mylock lck(m_lock);
 		if (!m_init_done)
 		{
 			m_task_count = task_count;
@@ -301,7 +305,7 @@ class ParallelManager
 	{
 		// Note: m_done_count cannot use an atomic without the mutex; this has
 		// a race between the update here and the wait() for other threads
-		std::unique_lock<std::mutex> lck(m_lock);
+        mylock lck(m_lock);
 		this->m_done_count += count;
 		if (m_done_count == m_task_count)
 		{
@@ -315,7 +319,7 @@ class ParallelManager
 	 */
 	void wait()
 	{
-		std::unique_lock<std::mutex> lck(m_lock);
+        mylock lck(m_lock);
 		m_complete.wait(lck, [this]{ return m_done_count == m_task_count; });
 	}
 
@@ -330,7 +334,7 @@ class ParallelManager
 	 */
 	void term(std::function<void(void)> term_func)
 	{
-		std::lock_guard<std::mutex> lck(m_lock);
+        mylock lck(m_lock);
 		if (!m_term_done)
 		{
 			term_func();
diff --git a/libkram/compressonator/bc6h/hdr_encode.cpp b/libkram/compressonator/bc6h/hdr_encode.cpp
index 9704945b..32b4090f 100644
--- a/libkram/compressonator/bc6h/hdr_encode.cpp
+++ b/libkram/compressonator/bc6h/hdr_encode.cpp
@@ -41,7 +41,7 @@ namespace HDR_Encode {
 #ifdef USE_RAMPS
 #include <mutex>
 static int g_init_ramps = 0;
-std::mutex mtx;
+std::recursive_mutex mtx;
 #endif
 
 
diff --git a/libkram/eastl/source/allocator_eastl.cpp b/libkram/eastl/source/allocator_eastl.cpp
index 6b481682..fdccdaad 100644
--- a/libkram/eastl/source/allocator_eastl.cpp
+++ b/libkram/eastl/source/allocator_eastl.cpp
@@ -1,7 +1,7 @@
 /////////////////////////////////////////////////////////////////////////////
 // Copyright (c) Electronic Arts Inc. All rights reserved.
 /////////////////////////////////////////////////////////////////////////////
-
+#if USE_EASTL
 
 #include <EASTL/internal/config.h>
 #include <EASTL/allocator.h>
@@ -43,7 +43,7 @@
 
 
 #endif // EASTL_USER_DEFINED_ALLOCATOR
-
+#endif
 
 
diff --git a/libkram/eastl/source/assert.cpp b/libkram/eastl/source/assert.cpp
index 56520f12..47684835 100644
--- a/libkram/eastl/source/assert.cpp
+++ b/libkram/eastl/source/assert.cpp
@@ -1,7 +1,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 // Copyright (c) Electronic Arts Inc. All rights reserved.
 ///////////////////////////////////////////////////////////////////////////////
-
+#if USE_EASTL
 
 #include <EASTL/internal/config.h>
 #include <EASTL/basic_string.h>
@@ -93,7 +93,7 @@ namespace eastl
 } // namespace eastl
 
 
-
+#endif
 
 
diff --git a/libkram/eastl/source/atomic.cpp b/libkram/eastl/source/atomic.cpp
index 38cda307..47627a7f 100644
--- a/libkram/eastl/source/atomic.cpp
+++ b/libkram/eastl/source/atomic.cpp
@@ -1,7 +1,7 @@
 /////////////////////////////////////////////////////////////////////////////////
 // Copyright (c) Electronic Arts Inc. All rights reserved.
 /////////////////////////////////////////////////////////////////////////////////
-
+#if USE_EASTL
 
 #include <EASTL/atomic.h>
 
@@ -23,3 +23,5 @@ volatile CompilerBarrierDataDependencyFuncPtr gCompilerBarrierDataDependencyFunc
 } // namespace internal
 
 } // namespace eastl
+
+#endif
diff --git a/libkram/eastl/source/fixed_pool.cpp b/libkram/eastl/source/fixed_pool.cpp
index 73b9be01..342cfcc3 100644
--- a/libkram/eastl/source/fixed_pool.cpp
+++ b/libkram/eastl/source/fixed_pool.cpp
@@ -1,7 +1,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 // Copyright (c) Electronic Arts Inc. All rights reserved.
 ///////////////////////////////////////////////////////////////////////////////
-
+#if USE_EASTL
 
 #include <EASTL/internal/fixed_pool.h>
 #include <EASTL/fixed_allocator.h>
@@ -53,7 +53,7 @@ namespace eastl
 } // namespace eastl
 
 
-
+#endif
 
 
diff --git a/libkram/eastl/source/hashtable.cpp b/libkram/eastl/source/hashtable.cpp
index 8d31663c..654011d0 100644
--- a/libkram/eastl/source/hashtable.cpp
+++ b/libkram/eastl/source/hashtable.cpp
@@ -1,7 +1,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 // Copyright (c) Electronic Arts Inc. All rights reserved.
 ///////////////////////////////////////////////////////////////////////////////
-
+#if USE_EASTL
 
 #include <EASTL/internal/hashtable.h>
 #include <EASTL/utility.h>
@@ -175,3 +175,5 @@ namespace eastl
 } // namespace eastl
 
 EA_RESTORE_VC_WARNING();
+
+#endif
diff --git a/libkram/eastl/source/intrusive_list.cpp b/libkram/eastl/source/intrusive_list.cpp
index c8e8a25b..1ecd3772 100644
--- a/libkram/eastl/source/intrusive_list.cpp
+++ b/libkram/eastl/source/intrusive_list.cpp
@@ -1,6 +1,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 // Copyright (c) Electronic Arts Inc. All rights reserved.
 ///////////////////////////////////////////////////////////////////////////////
+#if USE_EASTL
 
 #include <EASTL/intrusive_list.h>
 
@@ -70,7 +71,7 @@ namespace eastl
 } // namespace eastl
 
 
-
+#endif
 
 
diff --git a/libkram/eastl/source/numeric_limits.cpp b/libkram/eastl/source/numeric_limits.cpp
index 7b7bf2f8..65eed189 100644
--- a/libkram/eastl/source/numeric_limits.cpp
+++ b/libkram/eastl/source/numeric_limits.cpp
@@ -1,7 +1,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 // Copyright (c) Electronic Arts Inc. All rights reserved.
 ///////////////////////////////////////////////////////////////////////////////
-
+#if USE_EASTL
 
 #include <EASTL/numeric_limits.h>
 
@@ -568,5 +568,5 @@
 	} // namespace eastl
 
 #endif // (VC++ 2010 or earlier)
-
+#endif
 
diff --git a/libkram/eastl/source/red_black_tree.cpp b/libkram/eastl/source/red_black_tree.cpp
index d9797b93..4e3d9d5a 100644
--- a/libkram/eastl/source/red_black_tree.cpp
+++ b/libkram/eastl/source/red_black_tree.cpp
@@ -1,7 +1,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 // Copyright (c) Electronic Arts Inc. All rights reserved.
 ///////////////////////////////////////////////////////////////////////////////
-
+#if USE_EASTL
 
 ///////////////////////////////////////////////////////////////////////////////
 // The tree insert and erase functions below are based on the original 
@@ -494,7 +494,7 @@ namespace eastl
 } // namespace eastl
 
 
-
+#endif
 
 
diff --git a/libkram/eastl/source/string.cpp b/libkram/eastl/source/string.cpp
index 5b18a184..1632f93b 100644
--- a/libkram/eastl/source/string.cpp
+++ b/libkram/eastl/source/string.cpp
@@ -1,7 +1,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 // Copyright (c) Electronic Arts Inc. All rights reserved.
 ///////////////////////////////////////////////////////////////////////////////
-
+#if USE_EASTL
 
 #include <EASTL/internal/config.h>
 #include <EASTL/basic_string.h>
@@ -450,7 +450,7 @@ namespace eastl
 } // namespace eastl
 
 
-
+#endif
 
 
diff --git a/libkram/eastl/source/thread_support.cpp b/libkram/eastl/source/thread_support.cpp
index 1b036295..2bcf76f3 100644
--- a/libkram/eastl/source/thread_support.cpp
+++ b/libkram/eastl/source/thread_support.cpp
@@ -1,7 +1,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 // Copyright (c) Electronic Arts Inc. All rights reserved.
 ///////////////////////////////////////////////////////////////////////////////
-
+#if USE_EASTL
 
 #include <EASTL/internal/config.h>
 #include <EASTL/internal/thread_support.h>
@@ -106,7 +106,7 @@ namespace eastl
 } // namespace eastl
 
 
-
+#endif
 
 
diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index c54ea7f9..7cb01508 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -420,8 +420,8 @@ using mymap = unordered_map<uint32_t /*MyMTLPixelFormat*/, KTXFormatInfo>;
 static mymap* gFormatTable = nullptr;
 
 // thumbnailing can hit this from multiple threads
-using mymutex = std::mutex;
-using lock_t = std::unique_lock<mymutex>;
+using mymutex = std::recursive_mutex;
+using mylock = std::unique_lock<mymutex>;
 static mymutex gFormatTableMutex;
 
 static bool initFormatsIfNeeded()
@@ -430,7 +430,7 @@ static bool initFormatsIfNeeded()
         return true;
     }
     
-    lock_t lock(gFormatTableMutex);
+    mylock lock(gFormatTableMutex);
     
     if (gFormatTable) {
         return true;
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index ccaedea1..763cee31 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -78,10 +78,26 @@ bool isPNGFilename(const uint8_t* data, size_t dataSize)
     return true;
 }
 
+// this aliases the existing string, so can't chop extension
+inline const char* toFilenameShort(const char* filename)
+{
+    const char* filenameShort = strrchr(filename, '/');
+    if (filenameShort == nullptr) {
+        filenameShort = filename;
+    }
+    else {
+        filenameShort += 1;
+    }
+    return filenameShort;
+}
+
 bool KTXImageData::open(const char* filename, KTXImage& image)
 {
     close();
 
+    // set name from filename
+    _name = toFilenameShort(filename);
+
     if (isPNGFilename(filename)) {
         return openPNG(filename, image);
     }
diff --git a/libkram/kram/Kram.h b/libkram/kram/Kram.h
index 75b6f166..dfff94a2 100644
--- a/libkram/kram/Kram.h
+++ b/libkram/kram/Kram.h
@@ -26,6 +26,11 @@ class KTXImageData {
     // This releases all memory associated with this class
     void close();
 
+    // Allow caller to set a label that is passed onto created texture.
+    // If loaded from filename, then defaults to that.
+    void setName(const char* name) { _name = name; }
+    const char* name() const { return _name.c_str(); }
+
 private:
     // Open png image into a KTXImage as a single-level mip
     // Only handles 2d case and only srgba/rgba conversion.
@@ -36,6 +41,7 @@ class KTXImageData {
     bool openPNG(const uint8_t* data, size_t dataSize, KTXImage& image);
 
 private:
+    string _name;
     MmapHelper mmapHelper;
     vector<uint8_t> fileData;
     bool isMmap = false;
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index 13aa65a3..9a5f43d8 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -161,6 +161,8 @@
 #define SUPPORT_RGB 1
 #endif
 
+// This needs debug support that native stl already has.
+// EASTL only seems to define that for Visual Studio, and not lldb
 #define USE_EASTL COMPILE_EASTL
 #if USE_EASTL
 
diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp
index 95d72ac3..a42b9980 100644
--- a/libkram/kram/KramLog.cpp
+++ b/libkram/kram/KramLog.cpp
@@ -13,8 +13,8 @@
 
 namespace kram {
 
-using mymutex = std::mutex;
-using lock_t = std::unique_lock<mymutex>;
+using mymutex = std::recursive_mutex;
+using mylock = std::unique_lock<mymutex>;
 
 using namespace NAMESPACE_STL;
 
@@ -25,7 +25,7 @@ void setErrorLogCapture(bool enable)
 {
     gIsErrorLogCapture = enable;
     if (enable) {
-        lock_t lock(gLogLock);
+        mylock lock(gLogLock);
         gErrorLogCaptureText.clear();
     }
 }
@@ -35,7 +35,7 @@ bool isErrorLogCapture() { return gIsErrorLogCapture; }
 void getErrorLogCaptureText(string& text)
 {
     if (gIsErrorLogCapture) {
-        lock_t lock(gLogLock);
+        mylock lock(gLogLock);
         text = gErrorLogCaptureText;
     }
     else {
@@ -228,7 +228,7 @@ extern int32_t logMessage(const char* group, int32_t logLevel,
     }
 
     // stdout isn't thread safe, so to prevent mixed output put this under mutex
-    lock_t lock(gLogLock);
+    mylock lock(gLogLock);
 
     // this means caller needs to know all errors to display in the hud
     if (gIsErrorLogCapture && logLevel == LogLevelError) {
diff --git a/libkram/kram/TaskSystem.h b/libkram/kram/TaskSystem.h
index 12fd72b5..3805841b 100644
--- a/libkram/kram/TaskSystem.h
+++ b/libkram/kram/TaskSystem.h
@@ -13,6 +13,7 @@
 //#include <functional>
 //#include <memory>
 #include <mutex>
+#include <condition_variable>
 #include <thread>
 //#include <vector>
 
@@ -23,19 +24,20 @@ using namespace NAMESPACE_STL;
 
 /**************************************************************************************************/
 
-using mymutex = std::mutex; // for mutex
-using lock_t = std::unique_lock<mymutex>;
+using mymutex = std::recursive_mutex;
+using mylock = std::unique_lock<mymutex>;
+using mycondition = std::condition_variable_any;
 
 class notification_queue {
     deque<function<void()>> _q;
     bool _done = false;
     mymutex _mutex;
-    std::condition_variable _ready;
+    mycondition _ready;
 
 public:
     bool try_pop(function<void()>& x)
     {
-        lock_t lock{_mutex, std::try_to_lock};
+        mylock lock{_mutex, std::try_to_lock};
         if (!lock || _q.empty()) {
             return false;
         }
@@ -46,7 +48,7 @@ class notification_queue {
 
     bool pop(function<void()>& x)
     {
-        lock_t lock{_mutex};
+        mylock lock{_mutex};
         while (_q.empty() && !_done) {
             _ready.wait(lock);  // this is what blocks a given thread to avoid spin loop
         }
@@ -66,7 +68,7 @@ class notification_queue {
     bool try_push(F&& f)
     {
         {
-            lock_t lock{_mutex, std::try_to_lock};
+            mylock lock{_mutex, std::try_to_lock};
             if (!lock) {
                 return false;
             }
@@ -80,7 +82,7 @@ class notification_queue {
     void push(F&& f)
     {
         {
-            lock_t lock{_mutex};
+            mylock lock{_mutex};
             // TODO: fix this construct, it's saying no matching sctor for eastl::deque<eastl::function<void ()>>>::value_type
 #if USE_EASTL
             KLOGE("TaskSystem", "Fix eastl deque or function");
@@ -96,7 +98,7 @@ class notification_queue {
     // has queue been marked done or not
     bool is_done() const
     {
-        lock_t lock{const_cast<mymutex&>(_mutex)};  // ugh
+        mylock lock{const_cast<mymutex&>(_mutex)};  // ugh
         bool done_ = _done;
         return done_;
     }
@@ -105,7 +107,7 @@ class notification_queue {
     void set_done()
     {
         {
-            lock_t lock{_mutex};
+            mylock lock{_mutex};
             _done = true;
         }
         _ready.notify_all();
diff --git a/libkram/kram/_clang-format b/libkram/kram/_clang-format
index f1d12ae3..bb5f2bf3 100644
--- a/libkram/kram/_clang-format
+++ b/libkram/kram/_clang-format
@@ -56,7 +56,7 @@ ConstructorInitializerAllOnOneLineOrOnePerLine: true
 ConstructorInitializerIndentWidth: 4
 ContinuationIndentWidth: 4
 Cpp11BracedListStyle: true
-DerivePointerAlignment: true
+DerivePointerAlignment: false
 DisableFormat:   false
 ExperimentalAutoDetectBinPacking: false
 FixNamespaceComments: true

From 6258c9b71eb629e3a26f1c2d62f0290f7909d601 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 17 Jan 2022 11:16:13 -0800
Subject: [PATCH 199/901] Kram - update code with part of gltf loading

This omits the gltf loading support, but keeps image zoom working for now.
---
 kramv/KramRenderer.h    |  17 ++
 kramv/KramRenderer.mm   |  58 +++--
 kramv/KramViewerBase.h  |   2 -
 kramv/KramViewerMain.mm | 554 +++++++++++++++++++++++++++++-----------
 4 files changed, 469 insertions(+), 162 deletions(-)

diff --git a/kramv/KramRenderer.h b/kramv/KramRenderer.h
index 7ac59115..89aa0953 100644
--- a/kramv/KramRenderer.h
+++ b/kramv/KramRenderer.h
@@ -8,6 +8,13 @@
 #include "KramLib.h"
 #import "KramShaders.h"  // for TextureChannels
 
+// Turn on GLTF loading support for 3d models.  This relies on Warren Moore's first GLTFKit
+// which only handles import and synchronous loading.
+#define USE_GLTF 0
+
+// Only use a perspective transform for models/images, otherwise perspective only used for models
+#define USE_PERSPECTIVE 0
+
 namespace kram {
 class ShowSettings;
 class KTXImage;
@@ -36,4 +43,14 @@ class KTXImage;
 
 - (BOOL)hotloadShaders:(nonnull const char *)filename;
 
+
+// unload textures and gltf model textures
+- (void)releaseAllPendingTextures;
+
+// for gltf models
+- (void)unloadModel;
+
+@property (nonatomic) BOOL playAnimations;
+
 @end
+
diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 4cfff3d3..0b36e6ad 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -108,6 +108,8 @@ @implementation Renderer {
     ShowSettings *_showSettings;
 }
 
+@synthesize playAnimations;
+
 - (nonnull instancetype)initWithMetalKitView:(nonnull MTKView *)view
                                     settings:(nonnull ShowSettings *)settings
 {
@@ -579,6 +581,23 @@ - (MTKMesh *)_createMeshAsset:(const char *)name
     float x, y, z;
 };
 
+
+- (void)releaseAllPendingTextures
+{
+    @autoreleasepool {
+        [_loader releaseAllPendingTextures];
+        
+        // also release the model and cached textures in the renderer
+        [self unloadModel];
+    }
+}
+
+- (void)unloadModel
+{
+    // TODO:
+}
+
+
 - (void)_loadAssets
 {
     /// Load assets into metal objects
@@ -767,7 +786,7 @@ - (void)_loadAssets
     //    doFlipUV:true];
 
     mdlMesh = [MDLMesh newCapsuleWithHeight:1.0
-                                      radii:(vector_float2){0.5, 0.25}
+                                      radii:(vector_float2){1.0f/3.0f, 1.0f/3.0f} // circle
                              // vertical cap subtracted from height
                              radialSegments:16
                            verticalSegments:1
@@ -783,6 +802,19 @@ - (void)_loadAssets
     _mesh = _meshBox;
 }
 
+// this aliases the existing string, so can't chop extension
+inline const char* toFilenameShort(const char* filename) {
+    const char* filenameShort = strrchr(filename, '/');
+    if (filenameShort == nullptr) {
+        filenameShort = filename;
+    }
+    else {
+        filenameShort += 1;
+    }
+    return filenameShort;
+}
+
+
 - (BOOL)loadTextureFromImage:(nonnull const char *)fullFilenameString
                    timestamp:(double)timestamp
                        image:(kram::KTXImage &)image
@@ -792,7 +824,8 @@ - (BOOL)loadTextureFromImage:(nonnull const char *)fullFilenameString
     // image can be decoded to rgba8u if platform can't display format natively
     // but still want to identify blockSize from original format
     string fullFilename = fullFilenameString;
-
+    const char* filenameShort = toFilenameShort(fullFilename.c_str());
+    
     // Note that modstamp can change, but content data hash may be the same
     bool isNewFile = (fullFilename != _showSettings->lastFilename);
     bool isTextureChanged =
@@ -806,7 +839,8 @@ - (BOOL)loadTextureFromImage:(nonnull const char *)fullFilenameString
 
         MTLPixelFormat originalFormatMTL = MTLPixelFormatInvalid;
         id<MTLTexture> texture = [_loader loadTextureFromImage:image
-                                                originalFormat:&originalFormatMTL];
+                                                originalFormat:&originalFormatMTL
+                                                          name:filenameShort];
         if (!texture) {
             return NO;
         }
@@ -816,7 +850,8 @@ - (BOOL)loadTextureFromImage:(nonnull const char *)fullFilenameString
         id<MTLTexture> normalTexture;
         if (imageNormal) {
             normalTexture = [_loader loadTextureFromImage:*imageNormal
-                                           originalFormat:nil];
+                                           originalFormat:nil
+                                                     name:filenameShort];
             if (!normalTexture) {
                 return NO;
             }
@@ -888,9 +923,12 @@ - (BOOL)loadTexture:(nonnull NSURL *)url
             return NO;
         }
 
+        const char* filenameShort = toFilenameShort(fullFilename.c_str());
+        
         MTLPixelFormat originalFormatMTL = MTLPixelFormatInvalid;
         id<MTLTexture> texture = [_loader loadTextureFromImage:image
-                                                originalFormat:&originalFormatMTL];
+                                                originalFormat:&originalFormatMTL
+                                                          name:filenameShort];
         if (!texture) {
             return NO;
         }
@@ -1297,11 +1335,6 @@ - (void)_updateGameState
 
         _showSettings->isInverted = uniforms.modelMatrixInvScale2.w < 0.0f;
 
-        // this was stored so view could use it, but now that code calcs the
-        // transform via computeImageTransform
-        _showSettings->projectionViewModelMatrix =
-            uniforms.projectionViewMatrix * uniforms.modelMatrix;
-
         // cache the camera position
         uniforms.cameraPosition =
             inverse(_viewMatrix3D).columns[3].xyz;  // this is all ortho
@@ -1323,11 +1356,6 @@ - (void)_updateGameState
 
         _showSettings->isInverted = uniforms.modelMatrixInvScale2.w < 0.0f;
 
-        // this was stored so view could use it, but now that code calcs the
-        // transform via computeImageTransform
-        _showSettings->projectionViewModelMatrix =
-            uniforms.projectionViewMatrix * uniforms.modelMatrix;
-
         // cache the camera position
         uniforms.cameraPosition =
             inverse(_viewMatrix).columns[3].xyz;  // this is all ortho
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index e35eb108..dc847b3a 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -213,8 +213,6 @@ class ShowSettings {
 
     LightingMode lightingMode = LightingModeDiffuse;
 
-    // have multiple models, so don't store this, use split projView and modelMatrix
-    //float4x4 projectionViewModelMatrix;
     bool isInverted;
 
     // cached on load, raw info about the texture from libkram
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index ba2989ce..60a7e66a 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -2,10 +2,15 @@
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
-#import <Cocoa/Cocoa.h>
-#import <Metal/Metal.h>
-#import <MetalKit/MetalKit.h>
-#import <TargetConditionals.h>
+// using -fmodules and -fcxx-modules
+@import Cocoa;
+@import Metal;
+@import MetalKit;
+
+//#import <Cocoa/Cocoa.h>
+//#import <Metal/Metal.h>
+//#import <MetalKit/MetalKit.h>
+//#import <TargetConditionals.h>
 
 #import "KramRenderer.h"
 #import "KramShaders.h"
@@ -24,6 +29,8 @@
 //#include "KramImage.h"
 #include "KramViewerBase.h"
 
+
+
 #ifdef NDEBUG
 static bool doPrintPanZoom = false;
 #else
@@ -34,6 +41,7 @@
 using namespace kram;
 using namespace NAMESPACE_STL;
 
+
 // this aliases the existing string, so can't chop extension
 inline const char* toFilenameShort(const char* filename) {
     const char* filenameShort = strrchr(filename, '/');
@@ -93,6 +101,8 @@ - (void)addNotifications;
 
 - (void)removeNotifications;
 
+- (void)fixupDocumentList;
+
 @end
 
 //-------------
@@ -128,9 +138,7 @@ -(NSView *)tableView:(NSTableView *)tableView viewForTableColumn:(NSTableColumn
 
 - (void)tableViewSelectionDidChange:(NSNotification *)notification
 {
-    //NSInteger selectedRow = [myTableView selectedRow];
-    
-    KLOGI("kramv", "tableView changed");
+   // does not need to respond, have a listener on this notification
 }
 @end
 
@@ -183,6 +191,8 @@ - (NSData *)dataOfType:(nonnull NSString *)typeName
     return nil;
 }
 
+
+
 - (BOOL)readFromURL:(nonnull NSURL *)url
              ofType:(nonnull NSString *)typeName
               error:(NSError *_Nullable __autoreleasing *)outError
@@ -194,35 +204,19 @@ - (BOOL)readFromURL:(nonnull NSURL *)url
     //return [view loadTextureFromURL:url];
 #else
 
+    // TODO: This is only getting called on first open on macOS 12.0 even with hack below.
+    // find out why.
+    
     NSApplication *app = [NSApplication sharedApplication];
     MyMTKView *view = app.mainWindow.contentView;
     BOOL success = [view loadTextureFromURL:url];
     if (success) {
-        [view setHudText:""];
-
-        // DONE: this recent menu only seems to work the first time
-        // and not in subsequent calls to the same entry.  readFromUrl isn't even
-        // called. So don't get a chance to switch back to a recent texture. Maybe
-        // there's some list of documents created and so it doesn't think the file
-        // needs to be reloaded.
-        //
         // Note: if I return NO from this call then a dialog pops up that image
         // couldn't be loaded, but then the readFromURL is called everytime a new
         // image is picked from the list.
 
-        // Clear the document list so readFromURL keeps getting called
-        // Can't remove currentDoc, so have to skip that
-        NSDocumentController *dc = [NSDocumentController sharedDocumentController];
-        NSDocument *currentDoc = dc.currentDocument;
-        NSMutableArray *docsToRemove = [[NSMutableArray alloc] init];
-        for (NSDocument *doc in dc.documents) {
-            if (doc != currentDoc)
-                [docsToRemove addObject:doc];
-        }
-
-        for (NSDocument *doc in docsToRemove) {
-            [dc removeDocument:doc];
-        }
+        [view setHudText:""];
+        [view fixupDocumentList];
     }
 
     return success;
@@ -276,8 +270,11 @@ - (void)application:(NSApplication *)sender
 
     NSURL *url = urls.firstObject;
     [view loadTextureFromURL:url];
+    [view fixupDocumentList];
 }
 
+
+
 - (IBAction)showAboutDialog:(id)sender
 {
     // calls openDocumentWithContentsOfURL above
@@ -392,6 +389,7 @@ - (IBAction)showAboutDialog:(id)sender
     DownArrow = 0x7D,
     UpArrow = 0x7E,
     
+    Space = 0x31,
     Escape = 0x35,
 };
 
@@ -572,7 +570,7 @@ - (instancetype)initWithCoder:(NSCoder *)coder
 
     // only re-render when changes are made
     // Note: this breaks ability to gpu capture, since display link not running.
-    // so disable this if want to do captures.
+    // so disable this if want to do captures.  Or just move the cursor to capture.
 #ifndef NDEBUG  // KRAM_RELEASE
     self.enableSetNeedsDisplay = YES;
 #endif
@@ -607,11 +605,36 @@ - (nonnull ShowSettings *)showSettings
     return _showSettings;
 }
 
+-(void)fixupDocumentList
+{
+    // DONE: this recent menu only seems to work the first time
+    // and not in subsequent calls to the same entry.  readFromUrl isn't even
+    // called. So don't get a chance to switch back to a recent texture. Maybe
+    // there's some list of documents created and so it doesn't think the file
+    // needs to be reloaded.
+   
+    // Clear the document list so readFromURL keeps getting called
+    // Can't remove currentDoc, so have to skip that
+    NSDocumentController *dc = [NSDocumentController sharedDocumentController];
+    NSDocument *currentDoc = dc.currentDocument;
+    NSMutableArray *docsToRemove = [[NSMutableArray alloc] init];
+    for (NSDocument *doc in dc.documents) {
+        if (doc != currentDoc)
+            [docsToRemove addObject:doc];
+    }
+
+    for (NSDocument *doc in docsToRemove) {
+        [dc removeDocument:doc];
+    }
+}
+
 - (NSStackView *)_addButtons
 {
-    const int32_t numButtons = 30;
+    const int32_t numButtons = 31;
     const char *names[numButtons * 2] = {
 
+        " ",
+        "Play",
         "?",
         "Help",
         "I",
@@ -744,7 +767,6 @@ - (NSStackView *)_addButtons
         YES;  // default, but why have to have _buttonArrary
     [self addSubview:stackView];
 
-#if 1
     // Want menus, so user can define their own shortcuts to commands
     // Also need to enable/disable this via validateUserInterfaceItem
     NSApplication *app = [NSApplication sharedApplication];
@@ -783,7 +805,6 @@ - (NSStackView *)_addButtons
     }
 
     [_viewMenu addItem:[NSMenuItem separatorItem]];
-#endif
 
     return stackView;
 }
@@ -792,11 +813,16 @@ - (NSTextField *)_addHud:(BOOL)isShadow
 {
     // TODO: This text field is clamping to the height, so have it set to 1200.
     // really want field to expand to fill the window height for large output
-
+    uint32_t w = 800;
+    uint32_t h = 1220;
+    
     // add a label for the hud
     NSTextField *label = [[MyNSTextField alloc]
-        initWithFrame:NSMakeRect(isShadow ? 21 : 20, isShadow ? 21 : 20, 800,
-                                 1200)];
+        initWithFrame:NSMakeRect(isShadow ? 21 : 20, isShadow ? 21 : 20, w,
+                                 h)];
+    
+    label.preferredMaxLayoutWidth = w;
+
     label.drawsBackground = NO;
     label.textColor = !isShadow
                           ? [NSColor colorWithSRGBRed:0 green:1 blue:0 alpha:1]
@@ -823,8 +849,7 @@ - (NSTextField *)_addHud:(BOOL)isShadow
     [self addSubview:label];
 
     // add vertical constrains to have it fill window, but keep 800 width
-    label.preferredMaxLayoutWidth = 800;
-
+    // this didn't seem to work, can do in Storyboard
     // NSDictionary* views = @{ @"label" : label };
     //[self addConstraints:[NSLayoutConstraint
     //constraintsWithVisualFormat:@"H:|-[label]" options:0 metrics:nil
@@ -844,49 +869,69 @@ - (void)doZoomMath:(float)newZoom newPan:(float2 &)newPan
                                    panY:_showSettings->panY
                                    zoom:_showSettings->zoom];
 
-    // convert to clip space, or else need to apply additional viewport transform
+    // convert from pixel to clip space
     float halfX = _showSettings->viewSizeX * 0.5f;
     float halfY = _showSettings->viewSizeY * 0.5f;
-
+    
     // sometimes get viewSizeX that's scaled by retina, and other times not.
     // account for contentScaleFactor (viewSizeX is 2x bigger than cursorX on
     // retina display) now passing down drawableSize instead of view.bounds.size
     halfX /= (float)_showSettings->viewContentScaleFactor;
     halfY /= (float)_showSettings->viewContentScaleFactor;
+    
+    float4x4 viewportMatrix =
+    {
+        (float4){ halfX,      0, 0, 0 },
+        (float4){ 0,     -halfY, 0, 0 },
+        (float4){ 0,          0, 1, 0 },
+        (float4){ halfX,  halfY, 0, 1 },
+    };
+    viewportMatrix = inverse(viewportMatrix);
+    
+    float4 cursor = float4m(_showSettings->cursorX, _showSettings->cursorY, 0.0f, 1.0f);
+    
+    cursor = viewportMatrix * cursor;
+    
+    //NSPoint clipPoint;
+    //clipPoint.x = (point.x - halfX) / halfX;
+    //clipPoint.y = -(point.y - halfY) / halfY;
 
-    NSPoint point = NSMakePoint(_showSettings->cursorX, _showSettings->cursorY);
-    NSPoint clipPoint;
-    clipPoint.x = (point.x - halfX) / halfX;
-    clipPoint.y = -(point.y - halfY) / halfY;
-
-    // convert point in window to point in texture
-    float4x4 mInv = simd_inverse(projectionViewModelMatrix);
-    mInv.columns[3].w =
-        1.0f;  // fixes inverse, calls always leaves m[3][3] = 0.999
-
-    float4 pixel = mInv * float4m(clipPoint.x, clipPoint.y, 1.0f, 1.0f);
-    // pixel /= pixel.w; // in case perspective used
+    // convert point in window to point in model space
+    float4x4 mInv = inverse(projectionViewModelMatrix);
+    
+    float4 pixel = mInv * float4m(cursor.x, cursor.y, 1.0f, 1.0f);
+    pixel.xyz /= pixel.w; // in case perspective used
 
     // allow pan to extend to show all
-    float maxX = 0.5f;
+    float ar = _showSettings->imageAspectRatio();
+    float maxX = 0.5f * ar;
     float minY = -0.5f;
     if (_showSettings->isShowingAllLevelsAndMips) {
-        maxX += 1.0f * (_showSettings->totalChunks() - 1);
+        maxX += ar * 1.0f * (_showSettings->totalChunks() - 1);
         minY -= 1.0f * (_showSettings->mipCount - 1);
     }
 
+    // X bound may need adjusted for ar ?
     // that's in model space (+/0.5f, +/0.5f), so convert to texture space
-    pixel.x = NAMESPACE_STL::clamp(pixel.x, -0.5f, maxX);
+    pixel.x = NAMESPACE_STL::clamp(pixel.x, -0.5f * ar, maxX);
     pixel.y = NAMESPACE_STL::clamp(pixel.y, minY, 0.5f);
 
     // now that's the point that we want to zoom towards
-    // No checkson this zoom
+    // No checks on this zoom
     // old - newPosition from the zoom
 
+#if USE_PERSPECTIVE
+    // TODO: this doesn't work for perspective
+    newPan.x = _showSettings->panX - (_showSettings->zoom - newZoom) *
+                                         _showSettings->imageBoundsX * pixel.x;
+    newPan.y = _showSettings->panY + (_showSettings->zoom - newZoom) *
+                                         _showSettings->imageBoundsY * pixel.y;
+#else
     newPan.x = _showSettings->panX - (_showSettings->zoom - newZoom) *
                                          _showSettings->imageBoundsX * pixel.x;
     newPan.y = _showSettings->panY + (_showSettings->zoom - newZoom) *
                                          _showSettings->imageBoundsY * pixel.y;
+#endif
 }
 
 - (void)handleGesture:(NSGestureRecognizer *)gestureRecognizer
@@ -898,7 +943,9 @@ - (void)handleGesture:(NSGestureRecognizer *)gestureRecognizer
 
     bool isFirstGesture = _zoomGesture.state == NSGestureRecognizerStateBegan;
 
+    // TODO: move into object
     static float _originalZoom = 1.0f;
+    static float _validMagnification = 1.0f;
 
     float zoom = _zoomGesture.magnification;
     if (isFirstGesture) {
@@ -910,9 +957,7 @@ - (void)handleGesture:(NSGestureRecognizer *)gestureRecognizer
         zoom = 0.1f / _originalZoom;
         _zoomGesture.magnification = zoom;
     }
-
-    static float _validMagnification;
-
+    
     //-------------------------------------
 
     // https://developer.apple.com/documentation/uikit/touches_presses_and_gestures/handling_uikit_gestures/handling_pinch_gestures?language=objc
@@ -931,9 +976,13 @@ - (void)handleGesture:(NSGestureRecognizer *)gestureRecognizer
 
     // https://stackoverflow.com/questions/30002361/image-zoom-centered-on-mouse-position
 
+    // DONE: rect is now ar:1 for rect case, so these x values need to be half ar
+    // and that's only if it's not rotated.  box/cube/ellipse make also not correspond
+    float ar = _showSettings->imageAspectRatio();
+    
     // find the cursor location with respect to the image
-    float4 bottomLeftCorner = float4m(-0.5, -0.5f, 0.0f, 1.0f);
-    float4 topRightCorner = float4m(0.5, 0.5f, 0.0f, 1.0f);
+    float4 bottomLeftCorner = float4m(-0.5 * ar, -0.5f, 0.0f, 1.0f);
+    float4 topRightCorner = float4m(0.5 * ar, 0.5f, 0.0f, 1.0f);
 
     Renderer *renderer = (Renderer *)self.delegate;
     float4x4 newMatrix = [renderer computeImageTransform:_showSettings->panX
@@ -946,8 +995,8 @@ - (void)handleGesture:(NSGestureRecognizer *)gestureRecognizer
     float4 pt1 = newMatrix * topRightCorner;
 
     // for perspective
-    // pt0 /= pt0.w;
-    // pt1 /= pt1.w;
+    pt0 /= pt0.w;
+    pt1 /= pt1.w;
 
     // see that rectangle intersects the view, view is -1 to 1
     // this handles inversion
@@ -976,28 +1025,41 @@ - (void)handleGesture:(NSGestureRecognizer *)gestureRecognizer
     // take into account zoomFit, or need to limit zoomFit and have smaller images
     // be smaller on screen
     float maxZoom = std::max(128.0f, _showSettings->zoomFit);
+    //float minZoom = std::min(1.0f/8.0f, _showSettings->zoomFit);
 
-    // don't allow image to get too big
+    // TODO: 3d models have imageBoundsY of 1, so the limits are hit immediately
+    
     int32_t gap = _showSettings->showAllPixelGap;
-    if ((visibleWidth >
-         maxZoom * (_showSettings->imageBoundsX + gap) * numTexturesX) ||
-        (visibleHeight >
-         maxZoom * (_showSettings->imageBoundsY + gap) * numTexturesY)) {
-        _zoomGesture.magnification = _validMagnification;
-        return;
+    
+    // Note this includes chunks and mips even if those are not shown
+    // so image could be not visible.
+    float2 maxZoomXY;
+    maxZoomXY.x = maxZoom * (_showSettings->imageBoundsX + gap) * numTexturesX;
+    maxZoomXY.y = maxZoom * (_showSettings->imageBoundsY + gap) * numTexturesY;
+    
+    float minPixelSize = 4;
+    float2 minZoomXY;
+    minZoomXY.x = minPixelSize; // minZoom * (_showSettings->imageBoundsX + gap) * numTexturesX;
+    minZoomXY.y = minPixelSize; // minZoom * (_showSettings->imageBoundsY + gap) * numTexturesY;
+   
+    // don't allow image to get too big
+    bool isZoomChanged = true;
+    
+    if (visibleWidth > maxZoomXY.x || visibleHeight > maxZoomXY.y) {
+        isZoomChanged = false;
     }
 
-    // don't allow image to get too small
-    int32_t minPixelSize = 4;
-    if ((visibleWidth <
-         std::min((int32_t)_showSettings->imageBoundsX, minPixelSize)) ||
-        (visibleHeight <
-         std::min((int32_t)_showSettings->imageBoundsY, minPixelSize))) {
-        _zoomGesture.magnification = _validMagnification;
-        return;
+    // or too small
+    if (visibleWidth < minZoomXY.x || visibleHeight < minZoomXY.y) {
+        isZoomChanged = false;
     }
 
+    // or completely off-screen
     if (!NSIntersectsRect(imageRect, viewRect)) {
+        isZoomChanged = false;
+    }
+    
+    if (!isZoomChanged) {
         _zoomGesture.magnification = _validMagnification;
         return;
     }
@@ -1060,14 +1122,12 @@ inline float4 toPremul(const float4 &c)
 
 // Writing out to rgba32 for sampling, but unorm formats like ASTC and RGBA8
 // are still off and need to use the following.
-float toSnorm8(float c) { return (255.0 / 127.0) * c - (128 / 127.0); }
-
-float2 toSnorm8(float2 c) { return (255.0 / 127.0) * c - (128 / 127.0); }
-
-float3 toSnorm8(float3 c) { return (255.0 / 127.0) * c - (128 / 127.0); }
-float4 toSnorm8(float4 c) { return (255.0 / 127.0) * c - (128 / 127.0); }
+float  toSnorm8(float c)  { return (255.0f / 127.0f) * c - (128.0f / 127.0f); }
+float2 toSnorm8(float2 c) { return (255.0f / 127.0f) * c - (128.0f / 127.0f); }
+float3 toSnorm8(float3 c) { return (255.0f / 127.0f) * c - (128.0f / 127.0f); }
+float4 toSnorm8(float4 c) { return (255.0f / 127.0f) * c - (128.0f / 127.0f); }
 
-float4 toSnorm(float4 c) { return 2.0f * c - 1.0f; }
+float4 toSnorm(float4 c)  { return 2.0f * c - 1.0f; }
 
 - (void)updateEyedropper
 {
@@ -1124,27 +1184,34 @@ - (void)updateEyedropper
     halfX /= (float)_showSettings->viewContentScaleFactor;
     halfY /= (float)_showSettings->viewContentScaleFactor;
 
-    NSPoint point = NSMakePoint(_showSettings->cursorX, _showSettings->cursorY);
-    NSPoint clipPoint;
-    clipPoint.x = (point.x - halfX) / halfX;
-    clipPoint.y = -(point.y - halfY) / halfY;
+    float4 cursor = float4m(_showSettings->cursorX, _showSettings->cursorY, 0.0f, 1.0f);
+    
+    float4x4 pixelToClipTfm =
+    {
+        (float4){ halfX,      0, 0, 0 },
+        (float4){ 0,     -halfY, 0, 0 },
+        (float4){ 0,          0, 1, 0 },
+        (float4){ halfX,  halfY, 0, 1 },
+    };
+    pixelToClipTfm = inverse(pixelToClipTfm);
+    
+    cursor = pixelToClipTfm * cursor;
+    
+    //float4 clipPoint;
+    //clipPoint.x = (point.x - halfX) / halfX;
+    //clipPoint.y = -(point.y - halfY) / halfY;
 
     // convert point in window to point in texture
-    float4x4 mInv = simd_inverse(projectionViewModelMatrix);
-    mInv.columns[3].w =
-        1.0f;  // fixes inverse, calls always leaves m[3][3] = 0.999
-
-    float4 pixel = mInv * float4m(clipPoint.x, clipPoint.y, 1.0f, 1.0f);
-    // pixel /= pixel.w; // in case perspective used
-
-    // that's in model space (+/0.5f, +/0.5f), so convert to texture space
-    pixel.y *= -1.0f;
-
-    pixel.x += 0.5f;
-    pixel.y += 0.5f;
+    float4x4 mInv = inverse(projectionViewModelMatrix);
+    
+    float4 pixel = mInv * float4m(cursor.x, cursor.y, 1.0f, 1.0f);
+    pixel.xyz /= pixel.w; // in case perspective used
 
-    pixel.x *= 0.999f;
-    pixel.y *= 0.999f;
+    float ar = _showSettings->imageAspectRatio();
+    
+    // that's in model space (+/0.5f * ar, +/0.5f), so convert to texture space
+    pixel.x = 0.999f * (pixel.x / ar + 0.5f);
+    pixel.y = 0.999f * (-pixel.y + 0.5f);
 
     float2 uv = pixel.xy;
 
@@ -1486,13 +1553,14 @@ - (void)scrollWheel:(NSEvent *)event
     // transform the upper left and bottom right corner or the image
 
     // what if zoom moves it outside?
-
-    float4 pt0 = projectionViewModelMatrix * float4m(-0.5, -0.5f, 0.0f, 1.0f);
-    float4 pt1 = projectionViewModelMatrix * float4m(0.5, 0.5f, 0.0f, 1.0f);
+    float ar = _showSettings->imageAspectRatio();
+    
+    float4 pt0 = projectionViewModelMatrix * float4m(-0.5 * ar, -0.5f, 0.0f, 1.0f);
+    float4 pt1 = projectionViewModelMatrix * float4m(0.5 * ar, 0.5f, 0.0f, 1.0f);
 
     // for perspective
-    // pt0 /= pt0.w;
-    // pt1 /= pt1.w;
+    pt0.xyz /= pt0.w;
+    pt1.xyz /= pt1.w;
 
     float2 ptOrigin = simd::min(pt0.xy, pt1.xy);
     float2 ptSize = abs(pt0.xy - pt1.xy);
@@ -1584,7 +1652,7 @@ - (void)updateUIAfterLoad
     bool isJumpToNextHidden =
         !(_showSettings->isArchive || _showSettings->isFolder);
 
-    bool isRedHidden = false;
+    bool isRedHidden = _showSettings->numChannels == 0; // models don't show rgba
     bool isGreenHidden = _showSettings->numChannels <= 1;
     bool isBlueHidden = _showSettings->numChannels <= 2 &&
                         !_showSettings->isNormal;  // reconstruct z = b on normals
@@ -1604,8 +1672,10 @@ - (void)updateUIAfterLoad
     bool isCheckerboardHidden = !hasAlpha;
 
     bool isSignedHidden = !isSignedFormat(_showSettings->originalFormat);
-
+    bool isPlayHidden = !_showSettings->isModel;
+    
     // buttons
+    [self findButton:" "].hidden = isPlayHidden;
     [self findButton:"Y"].hidden = isArrayHidden;
     [self findButton:"F"].hidden = isFaceSliceHidden;
     [self findButton:"M"].hidden = isMipHidden;
@@ -1624,6 +1694,7 @@ - (void)updateUIAfterLoad
     // menus (may want to disable, not hide)
     // problem is crashes since menu seems to strip hidden items
     // enabled state has to be handled in validateUserInterfaceItem
+    [self findMenuItem:" "].hidden = isPlayHidden;
     [self findMenuItem:"Y"].hidden = isArrayHidden;
     [self findMenuItem:"F"].hidden = isFaceSliceHidden;
     [self findMenuItem:"M"].hidden = isMipHidden;
@@ -1650,6 +1721,7 @@ - (void)updateUIControlState
     auto Off = NSControlStateValueOff;
 #define toState(x) (x) ? On : Off
 
+    Renderer* renderer = (Renderer*)self.delegate;
     auto showAllState = toState(_showSettings->isShowingAllLevelsAndMips);
     auto premulState = toState(_showSettings->isPremul);
     auto signedState = toState(_showSettings->isSigned);
@@ -1658,6 +1730,7 @@ - (void)updateUIControlState
     auto gridState = toState(_showSettings->isAnyGridShown());
     auto wrapState = toState(_showSettings->isWrap);
     auto debugState = toState(_showSettings->debugMode != DebugModeNone);
+    auto playState = toState(_showSettings->isModel && renderer.playAnimations);
 
     TextureChannels &channels = _showSettings->channels;
 
@@ -1684,6 +1757,7 @@ - (void)updateUIControlState
     auto jumpState = Off;
 
     // buttons
+    [self findButton:" "].state = playState;
     [self findButton:"?"].state = helpState;
     [self findButton:"I"].state = infoState;
 
@@ -1719,6 +1793,7 @@ - (void)updateUIControlState
 
     // when menu state is selected, it may not uncheck when advancing through
     // state
+    [self findMenuItem:" "].state = playState;
     [self findMenuItem:"?"].state = helpState;
     [self findMenuItem:"I"].state = infoState;
 
@@ -1836,6 +1911,9 @@ - (IBAction)handleAction:(id)sender
     else if (title == "A")
         keyCode = Key::A;
 
+    else if (title == " ")
+        keyCode = Key::Space;
+    
     if (keyCode >= 0)
         [self handleKey:keyCode isShiftKeyDown:isShiftKeyDown];
 }
@@ -1962,6 +2040,25 @@ - (bool)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
             }
             break;
 
+        case Key::Space: {
+            if (![self findButton:" "].isHidden) {
+                 Renderer *renderer = (Renderer *)self.delegate;
+                
+                renderer.playAnimations = !renderer.playAnimations;
+                
+                text = renderer.playAnimations ? "Play" : "Pause";
+                isChanged = true;
+                
+                self.enableSetNeedsDisplay = !renderer.playAnimations;
+                self.paused = !renderer.playAnimations;
+            }
+            else {
+                self.enableSetNeedsDisplay = YES;
+                self.paused = YES;
+            }
+            break;
+        }
+            
         case Key::Num4:
         case Key::A:
             if (![self findButton:"A"].isHidden) {
@@ -2223,7 +2320,7 @@ - (bool)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
         case Key::J:
             if (![self findButton:"J"].isHidden) {
                 if (_showSettings->isArchive) {
-                    if ([self advanceTextureFromAchive:!isShiftKeyDown]) {
+                    if ([self advanceFileFromAchive:!isShiftKeyDown]) {
                         _hudHidden = true;
                         [self updateHudVisibility];
                         
@@ -2232,7 +2329,7 @@ - (bool)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
                     }
                 }
                 else if (_showSettings->isFolder) {
-                    if ([self advanceTextureFromFolder:!isShiftKeyDown]) {
+                    if ([self advanceFileFromFolder:!isShiftKeyDown]) {
                         _hudHidden = true;
                         [self updateHudVisibility];
                         
@@ -2448,8 +2545,14 @@ - (BOOL)loadArchive:(const char *)zipFilename
     }
 
     // filter out unsupported extensions
-
-    _zip.filterExtensions({".ktx", ".ktx2", ".png"});
+    vector<string> extensions = {
+        ".ktx", ".ktx2", ".png" // textures
+#if USE_GLTF
+        , ".glb", ".gltf" // models
+#endif
+    };
+    
+    _zip.filterExtensions(extensions);
 
     // don't switch to empty archive
     if (_zip.zipEntrys().empty()) {
@@ -2483,7 +2586,7 @@ - (BOOL)loadArchive:(const char *)zipFilename
     return YES;
 }
 
-- (BOOL)advanceTextureFromAchive:(BOOL)increment
+- (BOOL)advanceFileFromAchive:(BOOL)increment
 {
     if ((!_zipMmap.data()) || _zip.zipEntrys().empty()) {
         // no archive loaded or it's empty
@@ -2509,10 +2612,10 @@ - (BOOL)advanceTextureFromAchive:(BOOL)increment
     _tableView.hidden = NO;
     _shapesTableView.hidden = YES;
     
-    return [self loadTextureFromArchive];
+    return [self loadFileFromArchive];
 }
 
-- (BOOL)advanceTextureFromFolder:(BOOL)increment
+- (BOOL)advanceFileFromFolder:(BOOL)increment
 {
     if (_folderFiles.empty()) {
         // no archive loaded
@@ -2541,21 +2644,21 @@ - (BOOL)advanceTextureFromFolder:(BOOL)increment
     _hudHidden = true;
     [self updateHudVisibility];
     
-    return [self loadTextureFromFolder];
+    return [self loadFileFromFolder];
 }
 
 - (BOOL)setImageFromSelection:(NSInteger)index {
     if (_zipMmap.data() && !_zip.zipEntrys().empty()) {
         if (_fileArchiveIndex != index) {
             _fileArchiveIndex = index;
-            return [self loadTextureFromArchive];
+            return [self loadFileFromArchive];
         }
     }
 
     if (!_folderFiles.empty()) {
         if (_fileFolderIndex != index) {
             _fileFolderIndex = index;
-            return [self loadTextureFromFolder];
+            return [self loadFileFromFolder];
         }
     }
     return NO;
@@ -2585,16 +2688,23 @@ - (BOOL)findFilenameInFolders:(const string &)filename
     return isFound;
 }
 
-- (BOOL)loadTextureFromFolder
+- (BOOL)loadFileFromFolder
 {
     // now lookup the filename and data at that entry
     const char *filename = _folderFiles[_fileFolderIndex].c_str();
     string fullFilename = filename;
     auto timestamp = FileHelper::modificationTimestamp(filename);
-
+    
+    bool isModel =
+        endsWithExtension(filename, ".gltf") ||
+        endsWithExtension(filename, ".gtb");
+    if (isModel)
+        return [self loadModelFile:nil filename:filename];
+    
     // have already filtered filenames out, so this should never get hit
     bool isPNG = isPNGFilename(filename);
-    if (!(isPNG || endsWithExtension(filename, ".ktx") ||
+    if (!(isPNG ||
+          endsWithExtension(filename, ".ktx") ||
           endsWithExtension(filename, ".ktx2"))) {
         return NO;
     }
@@ -2662,7 +2772,10 @@ - (BOOL)loadTextureFromFolder
         image.pixelFormat = MyMTLPixelFormatRGBA8Unorm_sRGB;
     }
 
+    
     Renderer *renderer = (Renderer *)self.delegate;
+    [renderer releaseAllPendingTextures];
+    
     if (![renderer loadTextureFromImage:fullFilename.c_str()
                               timestamp:timestamp
                                   image:image
@@ -2708,18 +2821,26 @@ - (BOOL)loadTextureFromFolder
     return YES;
 }
 
-- (BOOL)loadTextureFromArchive
+- (BOOL)loadFileFromArchive
 {
     // now lookup the filename and data at that entry
-    const auto &entry = _zip.zipEntrys()[_fileArchiveIndex];
-    const char *filename = entry.filename;
+    const auto& entry = _zip.zipEntrys()[_fileArchiveIndex];
+    const char* filename = entry.filename;
     string fullFilename = filename;
     double timestamp = (double)entry.modificationDate;
 
-    // have already filtered filenames out, so this should never get hit
+    bool isModel =
+        endsWithExtension(filename, ".gltf") ||
+        endsWithExtension(filename, ".gtb");
+    if (isModel)
+        return [self loadModelFile:nil filename:filename];
+    
+    //--------
+    
     bool isPNG = isPNGFilename(filename);
 
-    if (!(isPNG || endsWithExtension(filename, ".ktx") ||
+    if (!(isPNG ||
+          endsWithExtension(filename, ".ktx") ||
           endsWithExtension(filename, ".ktx2"))) {
         return NO;
     }
@@ -2803,6 +2924,8 @@ - (BOOL)loadTextureFromArchive
     }
 
     Renderer *renderer = (Renderer *)self.delegate;
+    [renderer releaseAllPendingTextures];
+    
     if (![renderer loadTextureFromImage:fullFilename.c_str()
                               timestamp:(double)timestamp
                                   image:image
@@ -2844,6 +2967,7 @@ - (BOOL)loadTextureFromArchive
     // show/hide button
     [self updateUIAfterLoad];
 
+    
     self.needsDisplay = YES;
     return YES;
 }
@@ -2863,9 +2987,14 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
         KLOGE("kramv", "Fix this load url returning nil issue");
         return NO;
     }
-
+    
+    Renderer *renderer = (Renderer *)self.delegate;
+    
+    // folders can have a . in them f.e. 2.0/blah/...
+    bool isDirectory = url.hasDirectoryPath;
+    
     // this likely means it's a local file directory
-    if (strchr(filename, '.') == nullptr) {
+    if (isDirectory) {
         // make list of all file in the directory
 
         if (!self.imageURL || (!([self.imageURL isEqualTo:url]))) {
@@ -2884,19 +3013,37 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
                                       }];
 
             vector<string> files;
+#if USE_GLTF
+            // only display models in folder if found, ignore the png/jpg files
             while (NSURL *fileOrDirectoryURL = [directoryEnumerator nextObject]) {
                 const char *name = fileOrDirectoryURL.fileSystemRepresentation;
 
-                // filter only types that are supported
-                bool isPNG = isPNGFilename(name);
-
-                if (isPNG || endsWithExtension(name, ".ktx") ||
-                    endsWithExtension(name, ".ktx2")) {
+                bool isGLTF = endsWithExtension(name, ".gltf");
+                bool isGLB = endsWithExtension(name, ".glb");
+                if (isGLTF || isGLB)
+                {
                     files.push_back(name);
                 }
             }
+#endif
 
             // don't change to this folder if it's devoid of content
+            if (files.empty()) {
+                while (NSURL *fileOrDirectoryURL = [directoryEnumerator nextObject]) {
+                    const char *name = fileOrDirectoryURL.fileSystemRepresentation;
+
+                    // filter only types that are supported
+                    bool isPNG = isPNGFilename(name);
+                    bool isKTX = endsWithExtension(name, ".ktx");
+                    bool isKTX2 = endsWithExtension(name, ".ktx2");
+    
+                    if (isPNG || isKTX || isKTX2)
+                    {
+                        files.push_back(name);
+                    }
+                }
+            }
+            
             if (files.empty()) {
                 return NO;
             }
@@ -2959,10 +3106,11 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
         _showSettings->isArchive = false;
         _showSettings->isFolder = true;
 
+        
         // now load the file at the index
         setErrorLogCapture(true);
 
-        BOOL success = [self loadTextureFromFolder];
+        BOOL success = [self loadFileFromFolder];
 
         if (!success) {
             // get back error text from the failed load
@@ -2986,9 +3134,8 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
     }
 
     //-------------------
-
+    
     if (endsWithExtension(filename, ".metallib")) {
-        Renderer *renderer = (Renderer *)self.delegate;
         if ([renderer hotloadShaders:filename]) {
             NSURL *metallibFileURL =
                 [NSURL fileURLWithPath:[NSString stringWithUTF8String:filename]];
@@ -3004,11 +3151,22 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
     }
 
     // file is not a supported extension
-    if (!(endsWithExtension(filename, ".zip") || isPNGFilename(filename) ||
+    if (!(
+          // archive
+          endsWithExtension(filename, ".zip") ||
+          
+          // images
+          isPNGFilename(filename) ||
           endsWithExtension(filename, ".ktx") ||
-          endsWithExtension(filename, ".ktx2"))) {
+          endsWithExtension(filename, ".ktx2") ||
+          
+          // models
+          endsWithExtension(filename, ".gltf") ||
+          endsWithExtension(filename, ".glb")
+        ))
+    {
         string errorText =
-            "Unsupported file extension, must be .zip, .png, .ktx, ktx2\n";
+            "Unsupported file extension, must be .zip, .png, .ktx, .ktx2, .gltf, .glb\n";
 
         string finalErrorText;
         append_sprintf(finalErrorText, "Could not load from file:\n %s\n",
@@ -3019,6 +3177,16 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
         return NO;
     }
 
+    if (endsWithExtension(filename, ".gltf") ||
+        endsWithExtension(filename, ".glb"))
+    {
+        return [self loadModelFile:url filename:nullptr];
+    }
+    
+    // for now, knock out model if loading an image
+    // TODO: might want to unload even before loading a new model
+    [renderer unloadModel];
+    
     //-------------------
 
     if (endsWithExtension(filename, ".zip")) {
@@ -3063,7 +3231,7 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
 
         setErrorLogCapture(true);
 
-        BOOL success = [self loadTextureFromArchive];
+        BOOL success = [self loadFileFromArchive];
 
         if (!success) {
             // get back error text from the failed load
@@ -3087,11 +3255,110 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
         return success;
     }
 
-    //-------------------
+    return [self loadImageFile:url];
+}
+
+-(BOOL)loadModelFile:(NSURL*)url filename:(const char*)filename
+{
+#if USE_GLTF
+    // Right now can only load these if they are embedded, since sandbox will
+    // fail to load related .png and .bin files.  There's a way to opt into
+    // related items, but they must all be named the same.  I think if folder
+    // instead of the file is selected, then could search and find the gltf files
+    // and the other files.
+
+    //----------------------
+    // These assets should be combined into a single hierarchy, and be able to
+    // save out a scene with all of them in a single scene.  But that should
+    // probably reference original content in case it's updated.
+    
+    Renderer *renderer = (Renderer *)self.delegate;
+    [renderer releaseAllPendingTextures];
+    
+    setErrorLogCapture(true);
+
+    // set title to filename, chop this to just file+ext, not directory
+    if (url != nil)
+        filename = url.fileSystemRepresentation;
+    const char* filenameShort = toFilenameShort(filename);
+
+    NSURL* gltfFileURL =
+        [NSURL fileURLWithPath:[NSString stringWithUTF8String:filename]];
 
+    BOOL success = [renderer loadModel:gltfFileURL];
+    
+    // TODO: split this off to a completion handler, since loadModel is async
+    // and should probably also have a cancellation (or counter)
+    
+    // show/hide button
+    [self updateUIAfterLoad];
+    
+    if (!success) {
+        string errorText;
+        getErrorLogCaptureText(errorText);
+        setErrorLogCapture(false);
+        
+        string finalErrorText;
+        append_sprintf(finalErrorText, "Could not load model from file:\n %s\n",
+                       filename);
+        finalErrorText += errorText;
+
+        [self setHudText:finalErrorText.c_str()];
+        
+        return NO;
+    }
+
+    // was using subtitle, but that's macOS 11.0 feature.
+    string title = "kramv - ";
+    title += filenameShort;
+
+    self.window.title = [NSString stringWithUTF8String:title.c_str()];
+
+    // if url is nil, then loading out of archive or folder
+    // and don't want to save that or set imageURL
+    if (url != nil)
+    {
+        // add to recent docs, so can reload quickly
+        NSDocumentController *dc =
+            [NSDocumentController sharedDocumentController];
+        [dc noteNewRecentDocumentURL:gltfFileURL];
+
+        // TODO: not really an image
+        self.imageURL = gltfFileURL;
+        
+        // this may be loading out of folder/archive, but if url passed then it isn't
+        _showSettings->isArchive = false;
+        _showSettings->isFolder = false;
+        
+        // no need for file table on single files
+        _tableView.hidden = YES;
+    }
+    
+    // show the controls
+    if (_noImageLoaded) {
+        _buttonStack.hidden = NO;  // show controls
+        _noImageLoaded = NO;
+    }
+
+    setErrorLogCapture(false);
+
+    self.needsDisplay = YES;
+
+    return success;
+#else
+    return NO;
+#endif
+}
+
+-(BOOL)loadImageFile:(NSURL*)url
+{
     Renderer *renderer = (Renderer *)self.delegate;
     setErrorLogCapture(true);
 
+    // set title to filename, chop this to just file+ext, not directory
+    const char* filename = url.fileSystemRepresentation;
+    const char* filenameShort = toFilenameShort(filename);
+    
     BOOL success = [renderer loadTexture:url];
 
     if (!success) {
@@ -3110,9 +3377,6 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
     }
     setErrorLogCapture(false);
 
-    // set title to filename, chop this to just file+ext, not directory
-    const char *filenameShort = toFilenameShort(filename);
-    
     // was using subtitle, but that's macOS 11.0 feature.
     string title = "kramv - ";
     title += formatTypeName(_showSettings->originalFormat);

From 6887f59c94560dbc1a44e320a7fbfa541b7e36ce Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 17 Jan 2022 14:58:01 -0800
Subject: [PATCH 200/901] Kramv- add in gltf renderer code

signposts so Instruments captures reflect perf critical sections of code
fix folder loading to fallback to png/ktx/ktx2 files when folder doesn't contain gltf files
---
 kramv/KramRenderer.h     |  22 +-
 kramv/KramRenderer.mm    | 869 ++++++++++++++++++++++++++++-----------
 kramv/KramViewerBase.cpp |   9 +-
 kramv/KramViewerMain.mm  |  16 +
 4 files changed, 676 insertions(+), 240 deletions(-)

diff --git a/kramv/KramRenderer.h b/kramv/KramRenderer.h
index 89aa0953..ee226812 100644
--- a/kramv/KramRenderer.h
+++ b/kramv/KramRenderer.h
@@ -2,8 +2,10 @@
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
-#import <Foundation/NSURL.h>
-#import <MetalKit/MetalKit.h>
+@import Foundation;
+@import MetalKit;
+//#import <Foundation/NSURL.h>
+//#import <MetalKit/MetalKit.h>
 
 #include "KramLib.h"
 #import "KramShaders.h"  // for TextureChannels
@@ -15,6 +17,12 @@
 // Only use a perspective transform for models/images, otherwise perspective only used for models
 #define USE_PERSPECTIVE 0
 
+#if USE_GLTF
+@import GLTF;
+@import GLTFMTL;
+#endif
+
+
 namespace kram {
 class ShowSettings;
 class KTXImage;
@@ -23,7 +31,11 @@ class KTXImage;
 // Our platform independent renderer class.   Implements the MTKViewDelegate
 // protocol which
 //   allows it to accept per-frame update and drawable resize callbacks.
+#if USE_GLTF
+@interface Renderer : NSObject <MTKViewDelegate, GLTFAssetLoadingDelegate>
+#else
 @interface Renderer : NSObject <MTKViewDelegate>
+#endif
 
 - (nonnull instancetype)initWithMetalKitView:(nonnull MTKView *)view
                                     settings:
@@ -47,9 +59,13 @@ class KTXImage;
 // unload textures and gltf model textures
 - (void)releaseAllPendingTextures;
 
-// for gltf models
+// load a gltf model
+- (BOOL)loadModel:(nonnull NSURL*)url;
+
+// unload gltf model
 - (void)unloadModel;
 
+// can play animations in gltf models
 @property (nonatomic) BOOL playAnimations;
 
 @end
diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 0b36e6ad..7d1aad39 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -4,8 +4,9 @@
 
 #import "KramRenderer.h"
 
-#import <ModelIO/ModelIO.h>
-#import <TargetConditionals.h>
+@import ModelIO;
+//#import <ModelIO/ModelIO.h>
+//#import <TargetConditionals.h>
 
 // Include header shared between C code here, which executes Metal API commands,
 // and .metal files
@@ -13,6 +14,91 @@
 #import "KramShaders.h"
 #include "KramViewerBase.h"
 
+// c interface to signposts similar to dtrace on macOS/iOS
+#include <os/signpost.h>
+#include <mutex> // for recursive_mutex
+
+os_log_t gLogKramv = os_log_create("com.ba.kramv", "");
+
+class Signpost
+{
+public:
+    Signpost(const char* name)
+        : _name(name), _ended(false)
+    {
+        if (os_signpost_enabled(gLogKramv)) // pretty much always true
+            os_signpost_interval_begin(gLogKramv, OS_SIGNPOST_ID_EXCLUSIVE, "kram", "%s", _name);
+        else
+            _ended = true;
+    }
+    
+    ~Signpost()
+    {
+        stop();
+    }
+    
+    void stop()
+    {
+        if (!_ended) {
+            os_signpost_interval_end(gLogKramv, OS_SIGNPOST_ID_EXCLUSIVE, "kram", "%s", _name);
+            _ended = true;
+        }
+    }
+    
+private:
+    const char* _name;
+    bool _ended;
+};
+
+
+#if USE_GLTF
+
+using mymutex = std::recursive_mutex;
+using mylock = std::unique_lock<mymutex>;
+
+// TODO: make part of renderer
+static mymutex gModelLock;
+
+// patch this into GLTFRenderer, so can use kram to load ktx/2 and png files
+// doesn't support jpg, hdr, or exr files.  Can't yet load ktx2 w/basis.
+
+@interface KramGLTFTextureLoader : NSObject <IGLTFMTLTextureLoader>
+- (instancetype)initWithLoader:(KramLoader*)loader;
+- (id<MTLTexture> _Nullable)newTextureWithContentsOfURL:(NSURL *)url options:(NSDictionary * _Nullable)options error:(NSError **)error;
+- (id<MTLTexture> _Nullable)newTextureWithData:(NSData *)data options:(NSDictionary * _Nullable)options error:(NSError **)error;
+@end
+
+@interface KramGLTFTextureLoader ()
+@property (nonatomic, strong) KramLoader* loader;
+@end
+
+@implementation KramGLTFTextureLoader
+
+- (instancetype)initWithLoader:(KramLoader*)loader
+{
+    if ((self = [super init])) {
+        _loader = loader;
+    }
+    return self;
+}
+
+// TODO: this ignores options and error.  Default png loading may need to request srgb.
+- (id<MTLTexture> _Nullable)newTextureWithContentsOfURL:(NSURL *)url options:(NSDictionary * _Nullable)options error:(NSError **)error
+{
+    return [_loader loadTextureFromURL:url originalFormat:nil];
+}
+
+// TODO: this ignores options and error.  Default png loading may need to request srgb.
+- (id<MTLTexture> _Nullable)newTextureWithData:(NSData *)data options:(NSDictionary * _Nullable)options error:(NSError **)error
+{
+    return [_loader loadTextureFromData:data originalFormat:nil];
+}
+
+@end
+
+#endif
+
+
 static const NSUInteger MaxBuffersInFlight = 3;
 
 using namespace kram;
@@ -92,7 +178,7 @@ @implementation Renderer {
 
     MDLVertexDescriptor *_mdlVertexDescriptor;
 
-    // MTKMesh *_meshPlane; // really a thin gox
+    MTKMesh *_meshRect;
     MTKMesh *_meshBox;
     MTKMesh *_meshSphere;
     MTKMesh *_meshSphereMirrored;
@@ -106,6 +192,20 @@ @implementation Renderer {
     ViewFramebufferData _viewFramebuffer;
 
     ShowSettings *_showSettings;
+    
+#if USE_GLTF
+    KramGLTFTextureLoader* _textureLoader;
+    id<GLTFBufferAllocator> _bufferAllocator;
+    GLTFMTLRenderer* _gltfRenderer;
+    GLTFAsset *_asset; // only 1 for now
+    double _animationTime;
+    
+    id<MTLTexture> _environmentTexture;
+    bool _environmentNeedsUpdate;
+    
+    NSURLSession* _urlSession;
+#endif
+
 }
 
 @synthesize playAnimations;
@@ -127,6 +227,25 @@ - (nonnull instancetype)initWithMetalKitView:(nonnull MTKView *)view
         _inFlightSemaphore = dispatch_semaphore_create(MaxBuffersInFlight);
         [self _loadMetalWithView:view];
         [self _loadAssets];
+        
+#if USE_GLTF
+        _bufferAllocator = [[GLTFMTLBufferAllocator alloc] initWithDevice:_device];
+        _gltfRenderer = [[GLTFMTLRenderer alloc] initWithDevice:_device];
+        
+        // This aliases the existing kram loader, can handle png, ktx, ktx2
+        _textureLoader = [[KramGLTFTextureLoader alloc] initWithLoader:_loader];
+        _gltfRenderer.textureLoader = _textureLoader;
+        
+        // load the environment from a cube map for now
+        // runs this after _shaderLibrary established above
+        _gltfRenderer.lightingEnvironment = [[GLTFMTLLightingEnvironment alloc] initWithLibrary: _shaderLibrary];
+        
+        //NSURL* environmentURL = [[NSBundle mainBundle] URLForResource:@"piazza_san_marco" withExtension:@"ktx"];
+        NSURL* environmentURL = [[NSBundle mainBundle] URLForResource:@"tropical_beach" withExtension:@"ktx"];
+        _environmentTexture = [_loader loadTextureFromURL:environmentURL originalFormat:nil];
+        _environmentNeedsUpdate = true;
+#endif
+
     }
 
     return self;
@@ -592,11 +711,137 @@ - (void)releaseAllPendingTextures
     }
 }
 
+- (void)updateModelSettings:(const string &)fullFilename
+{
+    _showSettings->isModel = true;
+    _showSettings->numChannels = 0; // hides rgba
+    
+    // don't want any scale on view, or as little as possible
+    _showSettings->imageBoundsX = 1;
+    _showSettings->imageBoundsY = 1;
+    
+    BOOL isNewFile = YES;
+    [self resetSomeImageSettings:isNewFile];
+}
+
+- (BOOL)loadModel:(nonnull NSURL*)url
+{
+#if USE_GLTF
+
+        // TODO: move to async version of this, many of these load slow
+        // but is there a way to cancel the load.  Or else move to cgltf which is faster.
+        // see GLTFKit2.
+
+#define DO_ASYNC 0
+#if DO_ASYNC
+        [GLTFAsset loadAssetWithURL:url bufferAllocator:_bufferAllocator delegate:self];
+#else
+    @autoreleasepool {
+        GLTFAsset* newAsset = [[GLTFAsset alloc] initWithURL:url bufferAllocator:_bufferAllocator];
+
+        if (!newAsset) {
+            return NO;
+        }
+
+        // tie into delegate callback
+        [self assetWithURL:url didFinishLoading:newAsset];
+    }
+#endif
+
+    // Can't really report YES to caller, since it may fail to load async
+    return YES;
+#else
+    return NO;
+#endif
+}
+
 - (void)unloadModel
 {
-    // TODO:
+#if USE_GLTF
+    _asset = nil;
+    _animationTime = 0.0;
+    [_gltfRenderer releaseAllResources];
+#endif
+}
+
+// TODO: remove this
+//- (void)updateProjTransform
+//{
+//    // float aspect = size.width / (float)size.height;
+//    //_projectionMatrix = perspective_rhs(45.0f * (M_PI / 180.0f), aspect, 0.1f,
+//    //100.0f);
+//    _projectionMatrix =
+//        orthographic_rhs(_showSettings->viewSizeX, _showSettings->viewSizeY, 0.1f,
+//                         100000.0f, _showSettings->isReverseZ);
+//
+//    // DONE: adjust zoom to fit the entire image to the window
+//    _showSettings->zoomFit =
+//        MIN((float)_showSettings->viewSizeX, (float)_showSettings->viewSizeY) /
+//        MAX(1, MAX((float)_showSettings->imageBoundsX,
+//                   (float)_showSettings->imageBoundsY));
+//
+//    // already using drawableSize which includes scale
+//    // TODO: remove contentScaleFactor of view, this can be 1.0 to 2.0f
+//    // why does this always report 2x even when I change monitor res.
+//    //_showSettings->zoomFit /= _showSettings->viewContentScaleFactor;
+//}
+
+- (void)updateProjTransform
+{
+    // Want to move to always using perspective even for 2d images, but still more math
+    // to work out to keep zoom to cursor working.
+#if USE_PERSPECTIVE
+    float aspect = _showSettings->viewSizeX /  (float)_showSettings->viewSizeY;
+    _projectionMatrix = perspective_rhs(90.0f * (M_PI / 180.0f), aspect, 0.1f, 100000.0f, _showSettings->isReverseZ);
+
+    // This was used to reset zoom to a baseline that had a nice zoom.  But little connected to it now.
+    // Remember with rotation, the bounds can hit the nearClip.  Note all shapes are 0.5 radius,
+    // so at 1 this is 2x to leave gap around the shape for now.
+    float shapeHeightInY = 1;
+    _showSettings->zoomFit = shapeHeightInY; // / (float)_showSettings->viewSizeY;
+
+#else
+
+    if (_showSettings->isModel) {
+        float aspect = _showSettings->viewSizeX /  (float)_showSettings->viewSizeY;
+        _projectionMatrix = perspective_rhs(90.0f * (M_PI / 180.0f), aspect, 0.1f, 100000.0f, _showSettings->isReverseZ);
+
+        _showSettings->zoomFit = 1;
+    }
+    else {
+        _projectionMatrix =
+            orthographic_rhs(_showSettings->viewSizeX, _showSettings->viewSizeY, 0.1f,
+                             100000.0f, _showSettings->isReverseZ);
+
+        // DONE: adjust zoom to fit the entire image to the window
+        _showSettings->zoomFit =
+            MIN((float)_showSettings->viewSizeX, (float)_showSettings->viewSizeY) /
+            MAX(1, MAX((float)_showSettings->imageBoundsX,
+                       (float)_showSettings->imageBoundsY));
+    }
+#endif
 }
+    
+- (void)_createMeshRect:(float)aspectRatioXToY
+{
+    // This is a box that's smashed down to a thin 2d z plane, can get very close to it
+    // due to the thinness of the volume without nearZ intersect
+    
+    /// Load assets into metal objects
+    MDLMesh *mdlMesh;
+
+    mdlMesh = [MDLMesh newBoxWithDimensions:(vector_float3){aspectRatioXToY, 1, 0.001}
+                                   segments:(vector_uint3){1, 1, 1}
+                               geometryType:MDLGeometryTypeTriangles
+                              inwardNormals:NO
+                                  allocator:_metalAllocator];
 
+    // for some reason normals are all n = 1,0,0 which doesn't make sense on a box
+    // for the side that is being viewed.
+    
+    // only one of these for now, but really should store per image
+    _meshRect = [self _createMeshAsset:"MeshRect" mdlMesh:mdlMesh doFlipUV:false];
+}
 
 - (void)_loadAssets
 {
@@ -799,7 +1044,8 @@ - (void)_loadAssets
                                   mdlMesh:mdlMesh
                                  doFlipUV:true];
 
-    _mesh = _meshBox;
+    // this will get set based on sahpe
+    _mesh = nil;
 }
 
 // this aliases the existing string, so can't chop extension
@@ -974,6 +1220,8 @@ - (BOOL)loadTexture:(nonnull NSURL *)url
 - (void)updateImageSettings:(const string &)fullFilename
                       image:(KTXImage &)image
 {
+    _showSettings->isModel = false;
+
     // this is the actual format, may have been decoded
     id<MTLTexture> texture = _colorMap;
     MyMTLPixelFormat format = (MyMTLPixelFormat)texture.pixelFormat;
@@ -1096,7 +1344,11 @@ - (void)resetSomeImageSettings:(BOOL)isNewFile
             std::min(_showSettings->sliceNumber, _showSettings->sliceCount);
     }
 
-    [self updateViewTransforms];
+    [self updateProjTransform];
+
+    // the rect is ar:1 for images
+    float aspectRatioXtoY = _showSettings->imageAspectRatio();
+    [self _createMeshRect:aspectRatioXtoY];
 
     // this controls viewMatrix (global to all visible textures)
     _showSettings->panX = 0.0f;
@@ -1172,12 +1424,12 @@ - (float4x4)computeImageTransform:(float)panX
     }
 }
 
-bool almost_equal_elements(float3 v, float tol)
+inline bool almost_equal_elements(float3 v, float tol)
 {
     return (fabs(v.x - v.y) < tol) && (fabs(v.x - v.z) < tol);
 }
 
-const float3x3 &toFloat3x3(const float4x4 &m) { return (const float3x3 &)m; }
+inline const float3x3& toFloat3x3(const float4x4 &m) { return (const float3x3 &)m; }
 
 float4 inverseScaleSquared(const float4x4 &m)
 {
@@ -1186,8 +1438,8 @@ float4 inverseScaleSquared(const float4x4 &m)
                                   length_squared(m.columns[2].xyz));
 
     // if uniform, then set scaleSquared all to 1
-    if (almost_equal_elements(scaleSquared, 1e-5)) {
-        scaleSquared = float3m(1.0);
+    if (almost_equal_elements(scaleSquared, 1e-5f)) {
+        scaleSquared = float3m(1.0f);
     }
 
     // don't divide by 0
@@ -1285,7 +1537,7 @@ - (void)_updateGameState
     _showSettings->is3DView = true;
     switch (_showSettings->meshNumber) {
         case 0:
-            _mesh = _meshBox;
+            _mesh = _meshRect;
             _showSettings->is3DView = false;
             break;
         case 1:
@@ -1417,17 +1669,33 @@ - (void)drawInMTKView:(nonnull MTKView *)view
         // call drawMain
         [self drawSample];
 
+        // decrement count, proceeds if sema >= 0 afterwards
+        Signpost postWait("waitOnSemaphore");
         dispatch_semaphore_wait(_inFlightSemaphore, DISPATCH_TIME_FOREVER);
-
+        postWait.stop();
+        
         _uniformBufferIndex = (_uniformBufferIndex + 1) % MaxBuffersInFlight;
 
         id<MTLCommandBuffer> commandBuffer = [_commandQueue commandBuffer];
         commandBuffer.label = @"MyCommand";
 
         __block dispatch_semaphore_t block_sema = _inFlightSemaphore;
-        [commandBuffer addCompletedHandler:^(id<MTLCommandBuffer> /* buffer */) {
-            dispatch_semaphore_signal(block_sema);
-        }];
+        
+        #if USE_GLTF
+                GLTFMTLRenderer* gltfRenderer = _gltfRenderer;
+                [commandBuffer addCompletedHandler:^(id<MTLCommandBuffer> /* buffer */) {
+                    [gltfRenderer signalFrameCompletion];
+        
+                    // increment count
+                    dispatch_semaphore_signal(block_sema);
+                }];
+        
+        #else
+                [commandBuffer addCompletedHandler:^(id<MTLCommandBuffer> /* buffer */) {
+                    // increment count
+                    dispatch_semaphore_signal(block_sema);
+                }];
+        #endif
 
         [self _updateGameState];
 
@@ -1439,22 +1707,46 @@ - (void)drawInMTKView:(nonnull MTKView *)view
         // also use for async texture upload
         id<MTLBlitCommandEncoder> blitEncoder = [commandBuffer blitCommandEncoder];
         if (blitEncoder) {
+            Signpost postUpload("uploadTextures");
             blitEncoder.label = @"MyBlitEncoder";
             [_loader uploadTexturesIfNeeded:blitEncoder commandBuffer:commandBuffer];
             [blitEncoder endEncoding];
         }
 
+        Signpost postDraw("Draw");
         [self drawMain:commandBuffer
                   view:view];
-
+        postDraw.stop();
+        
         // hold onto this for sampling from it via eyedropper
-        _lastDrawableTexture = view.currentDrawable.texture;
+        id<CAMetalDrawable> drawable = view.currentDrawable;
+        _lastDrawableTexture = drawable.texture;
+
+        // These are equivalent
+        // [commandBuffer presentDrawable:view.currentDrawable];
+        [commandBuffer addScheduledHandler:^(id<MTLCommandBuffer> cmdBuf) {
+            Signpost postPresent("presentDrawble");
+            [drawable present];
+        }];
 
-        [commandBuffer presentDrawable:view.currentDrawable];
         [commandBuffer commit];
     }
 }
 
+#if USE_GLTF
+
+static GLTFBoundingSphere GLTFBoundingSphereFromBox2(const GLTFBoundingBox b) {
+    GLTFBoundingSphere s;
+    float3 center = 0.5f * (b.minPoint + b.maxPoint);
+    float r = simd::distance(b.maxPoint, center);
+    
+    s.center = center;
+    s.radius = r;
+    return s;
+}
+#endif
+
+
 - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
             view:(nonnull MTKView *)view
 {
@@ -1462,14 +1754,23 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
     // avoids
     //   holding onto the drawable and blocking the display pipeline any longer
     //   than necessary
-    MTLRenderPassDescriptor *renderPassDescriptor =
-        view.currentRenderPassDescriptor;
-
+    MTLRenderPassDescriptor* renderPassDescriptor = nil;
+    
+    // This retrieval can take 20ms+ when gpu is busy
+    Signpost post("nextDrawable");
+    renderPassDescriptor = view.currentRenderPassDescriptor;
+    post.stop();
+    
     if (renderPassDescriptor == nil) {
         return;
     }
 
-    if (_colorMap == nil) {
+    if (_colorMap == nil
+#if USE_GLTF
+        && _asset == nil
+#endif
+    )
+    {
         // this will clear target
         id<MTLRenderCommandEncoder> renderEncoder =
             [commandBuffer renderCommandEncoderWithDescriptor:renderPassDescriptor];
@@ -1482,6 +1783,28 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
         return;
     }
 
+#if USE_GLTF
+    {
+        mylock lock(gModelLock);
+    
+        if (_asset) {
+            
+            // TODO: needs to be done in the render loop, since it must run compute
+            // This runs compute to generate radiance/irradiance in mip levels
+            // Also an equirect version for a 2d image
+            if (_environmentNeedsUpdate) {
+                if (_environmentTexture.textureType == MTLTextureTypeCube)
+                    [_gltfRenderer.lightingEnvironment generateFromCubeTexture:_environmentTexture commandBuffer:commandBuffer];
+                else
+                    [_gltfRenderer.lightingEnvironment generateFromEquirectTexture:_environmentTexture commandBuffer:commandBuffer];
+                
+                _environmentNeedsUpdate = false;
+            }
+        }
+    }
+#endif
+
+    
     // Final pass rendering code here
     id<MTLRenderCommandEncoder> renderEncoder =
         [commandBuffer renderCommandEncoderWithDescriptor:renderPassDescriptor];
@@ -1493,232 +1816,292 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
 
     // set raster state
     [renderEncoder setFrontFacingWinding:_showSettings->isInverted
-                                             ? MTLWindingCounterClockwise
+                                             ? MTLWindingClockwise
                                              : MTLWindingCounterClockwise];
     [renderEncoder setCullMode:MTLCullModeBack];
     [renderEncoder setDepthStencilState:_depthStateFull];
 
-    [renderEncoder pushDebugGroup:@"DrawShape"];
+    bool drawShape = true;
+    
+    #if USE_GLTF
+    {
+        mylock lock(gModelLock);
 
-    // set the mesh shape
-    for (NSUInteger bufferIndex = 0; bufferIndex < _mesh.vertexBuffers.count;
-         bufferIndex++) {
-        MTKMeshBuffer *vertexBuffer = _mesh.vertexBuffers[bufferIndex];
-        if ((NSNull *)vertexBuffer != [NSNull null]) {
-            [renderEncoder setVertexBuffer:vertexBuffer.buffer
-                                    offset:vertexBuffer.offset
-                                   atIndex:bufferIndex];
+        if (_asset) {
+            drawShape = false;
+    
+            // update animations
+            if (self.playAnimations) {
+                _animationTime += 1.0/60.0;
+    
+                NSTimeInterval maxAnimDuration = 0;
+                for (GLTFAnimation *animation in _asset.animations) {
+                    for (GLTFAnimationChannel *channel in animation.channels) {
+                        if (channel.duration > maxAnimDuration) {
+                            maxAnimDuration = channel.duration;
+                        }
+                    }
+                }
+            
+                NSTimeInterval animTime = fmod(_animationTime, maxAnimDuration);
+    
+                for (GLTFAnimation *animation in _asset.animations) {
+                    [animation runAtTime:animTime];
+                }
+            }
+            
+            // regularization scales the model to 1 unit dimension, may animate out of this box
+            // just a scale to diameter 1, and translate back from center and viewer z
+            GLTFBoundingSphere bounds = GLTFBoundingSphereFromBox2(_asset.defaultScene.approximateBounds);
+            float invScale = (bounds.radius > 0) ? (0.5 / (bounds.radius)) : 1.0;
+            float4x4 centerScale = float4x4(float4m(invScale,invScale,invScale,1));
+            float4x4 centerTranslation = matrix_identity_float4x4;
+            centerTranslation.columns[3] = vector4(-bounds.center, 1.0f);
+            float4x4 regularizationMatrix = centerScale * centerTranslation;
+    
+            // incorporate the rotation now
+            Uniforms &uniforms =
+                *(Uniforms *)_dynamicUniformBuffer[_uniformBufferIndex].contents;
+    
+            regularizationMatrix = regularizationMatrix * uniforms.modelMatrix;
+    
+            // TODO: be able to pass regularization to affect root of modelMatrix tree,
+            // do not modify viewMatrix here since that messes with world space.
+    
+            // set the view and projection matrix
+            _gltfRenderer.viewMatrix = _viewMatrix * regularizationMatrix;
+            _gltfRenderer.projectionMatrix = _projectionMatrix;
+    
+            [renderEncoder pushDebugGroup:@"DrawModel"];
+            [_gltfRenderer renderScene:_asset.defaultScene commandBuffer:commandBuffer commandEncoder:renderEncoder];
+            [renderEncoder popDebugGroup];
         }
     }
+    #endif
+    
+    if (drawShape) {
+        [renderEncoder pushDebugGroup:@"DrawShape"];
+
+        // set the mesh shape
+        for (NSUInteger bufferIndex = 0; bufferIndex < _mesh.vertexBuffers.count;
+             bufferIndex++) {
+            MTKMeshBuffer *vertexBuffer = _mesh.vertexBuffers[bufferIndex];
+            if ((NSNull *)vertexBuffer != [NSNull null]) {
+                [renderEncoder setVertexBuffer:vertexBuffer.buffer
+                                        offset:vertexBuffer.offset
+                                       atIndex:bufferIndex];
+            }
+        }
 
-    //---------------------------------------
-    // figure out the sampler
-
-    id<MTLSamplerState> sampler;
+        //---------------------------------------
+        // figure out the sampler
 
-    MyMTLTextureType textureType = (MyMTLTextureType)_colorMap.textureType;
+        id<MTLSamplerState> sampler;
 
-    bool isCube = (textureType == MyMTLTextureTypeCube ||
-                   textureType == MyMTLTextureTypeCubeArray);
-    bool doWrap = !isCube && _showSettings->isWrap;
-    bool doEdge = !doWrap;
+        MyMTLTextureType textureType = (MyMTLTextureType)_colorMap.textureType;
 
-    if (_showSettings->isPreview) {
-        sampler = doWrap ? _colorMapSamplerFilterWrap
-                         : (doEdge ? _colorMapSamplerFilterEdge
-                                   : _colorMapSamplerFilterBorder);
-    }
-    else {
-        sampler = doWrap ? _colorMapSamplerNearestWrap
-                         : (doEdge ? _colorMapSamplerNearestEdge
-                                   : _colorMapSamplerNearestBorder);
-    }
+        bool isCube = (textureType == MyMTLTextureTypeCube ||
+                       textureType == MyMTLTextureTypeCubeArray);
+        bool doWrap = !isCube && _showSettings->isWrap;
+        bool doEdge = !doWrap;
 
-    //---------------------------------------
-    // for (texture in _textures) // TODO: setup
-    // if (_colorMap)
-    {
-        // TODO: set texture specific uniforms, but using single _colorMap for now
-        switch (_colorMap.textureType) {
-            case MTLTextureType1DArray:
-                [renderEncoder setRenderPipelineState:_pipelineState1DArray];
-                break;
-
-            case MTLTextureType2D:
-                [renderEncoder setRenderPipelineState:_pipelineStateImage];
-                break;
-
-            case MTLTextureType2DArray:
-                [renderEncoder setRenderPipelineState:_pipelineStateImageArray];
-                break;
-
-            case MTLTextureType3D:
-                [renderEncoder setRenderPipelineState:_pipelineStateVolume];
-                break;
-            case MTLTextureTypeCube:
-                [renderEncoder setRenderPipelineState:_pipelineStateCube];
-                break;
-            case MTLTextureTypeCubeArray:
-                [renderEncoder setRenderPipelineState:_pipelineStateCubeArray];
-                break;
-
-            default:
-                break;
+        if (_showSettings->isPreview) {
+            sampler = doWrap ? _colorMapSamplerFilterWrap
+                             : (doEdge ? _colorMapSamplerFilterEdge
+                                       : _colorMapSamplerFilterBorder);
+        }
+        else {
+            sampler = doWrap ? _colorMapSamplerNearestWrap
+                             : (doEdge ? _colorMapSamplerNearestEdge
+                                       : _colorMapSamplerNearestBorder);
         }
 
-        id<MTLBuffer> uniformBuffer = _dynamicUniformBuffer[_uniformBufferIndex];
-        [renderEncoder setVertexBuffer:uniformBuffer
-                                offset:0
-                               atIndex:BufferIndexUniforms];
-
-        [renderEncoder setFragmentBuffer:uniformBuffer
-                                  offset:0
-                                 atIndex:BufferIndexUniforms];
+        //---------------------------------------
+        // for (texture in _textures) // TODO: setup
+        // if (_colorMap)
+        {
+            // TODO: set texture specific uniforms, but using single _colorMap for now
+            switch (_colorMap.textureType) {
+                case MTLTextureType1DArray:
+                    [renderEncoder setRenderPipelineState:_pipelineState1DArray];
+                    break;
+
+                case MTLTextureType2D:
+                    [renderEncoder setRenderPipelineState:_pipelineStateImage];
+                    break;
+
+                case MTLTextureType2DArray:
+                    [renderEncoder setRenderPipelineState:_pipelineStateImageArray];
+                    break;
+
+                case MTLTextureType3D:
+                    [renderEncoder setRenderPipelineState:_pipelineStateVolume];
+                    break;
+                case MTLTextureTypeCube:
+                    [renderEncoder setRenderPipelineState:_pipelineStateCube];
+                    break;
+                case MTLTextureTypeCubeArray:
+                    [renderEncoder setRenderPipelineState:_pipelineStateCubeArray];
+                    break;
+
+                default:
+                    break;
+            }
 
-        // set the texture up
-        [renderEncoder setFragmentTexture:_colorMap atIndex:TextureIndexColor];
+            id<MTLBuffer> uniformBuffer = _dynamicUniformBuffer[_uniformBufferIndex];
+            [renderEncoder setVertexBuffer:uniformBuffer
+                                    offset:0
+                                   atIndex:BufferIndexUniforms];
 
-        // setup normal map
-        if (_normalMap && _showSettings->isPreview) {
-            [renderEncoder setFragmentTexture:_normalMap atIndex:TextureIndexNormal];
-        }
+            [renderEncoder setFragmentBuffer:uniformBuffer
+                                      offset:0
+                                     atIndex:BufferIndexUniforms];
 
-        UniformsLevel uniformsLevel;
-        uniformsLevel.drawOffset = float2m(0.0f);
+            // set the texture up
+            [renderEncoder setFragmentTexture:_colorMap atIndex:TextureIndexColor];
 
-        if (_showSettings->isPreview) {
-            // upload this on each face drawn, since want to be able to draw all
-            // mips/levels at once
-            [self _setUniformsLevel:uniformsLevel mipLOD:_showSettings->mipNumber];
-
-            [renderEncoder setVertexBytes:&uniformsLevel
-                                   length:sizeof(uniformsLevel)
-                                  atIndex:BufferIndexUniformsLevel];
-
-            [renderEncoder setFragmentBytes:&uniformsLevel
-                                     length:sizeof(uniformsLevel)
-                                    atIndex:BufferIndexUniformsLevel];
-
-            // use exisiting lod, and mip
-            [renderEncoder setFragmentSamplerState:sampler atIndex:SamplerIndexColor];
-
-            for (MTKSubmesh *submesh in _mesh.submeshes) {
-                [renderEncoder drawIndexedPrimitives:submesh.primitiveType
-                                          indexCount:submesh.indexCount
-                                           indexType:submesh.indexType
-                                         indexBuffer:submesh.indexBuffer.buffer
-                                   indexBufferOffset:submesh.indexBuffer.offset];
+            // setup normal map
+            if (_normalMap && _showSettings->isPreview) {
+                [renderEncoder setFragmentTexture:_normalMap atIndex:TextureIndexNormal];
             }
-        }
-        else if (_showSettings->isShowingAllLevelsAndMips) {
-            int32_t w = _colorMap.width;
-            int32_t h = _colorMap.height;
-            // int32_t d = _colorMap.depth;
-
-            // gap the contact sheet, note this 2 pixels is scaled on small textures
-            // by the zoom
-            int32_t gap =
-                _showSettings
-                    ->showAllPixelGap;  // * _showSettings->viewContentScaleFactor;
-
-            for (int32_t mip = 0; mip < _showSettings->mipCount; ++mip) {
+
+            UniformsLevel uniformsLevel;
+            uniformsLevel.drawOffset = float2m(0.0f);
+
+            if (_showSettings->isPreview) {
                 // upload this on each face drawn, since want to be able to draw all
                 // mips/levels at once
-                [self _setUniformsLevel:uniformsLevel mipLOD:mip];
+                [self _setUniformsLevel:uniformsLevel mipLOD:_showSettings->mipNumber];
 
-                if (mip == 0) {
-                    uniformsLevel.drawOffset.y = 0.0f;
-                }
-                else {
-                    // all mips draw at top mip size currently
-                    uniformsLevel.drawOffset.y -= h + gap;
-                }
+                [renderEncoder setVertexBytes:&uniformsLevel
+                                       length:sizeof(uniformsLevel)
+                                      atIndex:BufferIndexUniformsLevel];
 
-                // this its ktxImage.totalChunks()
-                int32_t numLevels = _showSettings->totalChunks();
+                [renderEncoder setFragmentBytes:&uniformsLevel
+                                         length:sizeof(uniformsLevel)
+                                        atIndex:BufferIndexUniformsLevel];
 
-                for (int32_t level = 0; level < numLevels; ++level) {
-                    if (isCube) {
-                        uniformsLevel.face = level % 6;
-                        uniformsLevel.arrayOrSlice = level / 6;
-                    }
-                    else {
-                        uniformsLevel.arrayOrSlice = level;
-                    }
+                // use exisiting lod, and mip
+                [renderEncoder setFragmentSamplerState:sampler atIndex:SamplerIndexColor];
 
-                    // advance x across faces/slices/array elements, 1d array and 2d thin
-                    // array are weird though.
-                    if (level == 0) {
-                        uniformsLevel.drawOffset.x = 0.0f;
+                for (MTKSubmesh *submesh in _mesh.submeshes) {
+                    [renderEncoder drawIndexedPrimitives:submesh.primitiveType
+                                              indexCount:submesh.indexCount
+                                               indexType:submesh.indexType
+                                             indexBuffer:submesh.indexBuffer.buffer
+                                       indexBufferOffset:submesh.indexBuffer.offset];
+                }
+            }
+            else if (_showSettings->isShowingAllLevelsAndMips) {
+                int32_t w = _colorMap.width;
+                int32_t h = _colorMap.height;
+                // int32_t d = _colorMap.depth;
+
+                // gap the contact sheet, note this 2 pixels is scaled on small textures
+                // by the zoom
+                int32_t gap =
+                    _showSettings
+                        ->showAllPixelGap;  // * _showSettings->viewContentScaleFactor;
+
+                for (int32_t mip = 0; mip < _showSettings->mipCount; ++mip) {
+                    // upload this on each face drawn, since want to be able to draw all
+                    // mips/levels at once
+                    [self _setUniformsLevel:uniformsLevel mipLOD:mip];
+
+                    if (mip == 0) {
+                        uniformsLevel.drawOffset.y = 0.0f;
                     }
                     else {
-                        uniformsLevel.drawOffset.x += w + gap;
+                        // all mips draw at top mip size currently
+                        uniformsLevel.drawOffset.y -= h + gap;
                     }
 
-                    [renderEncoder setVertexBytes:&uniformsLevel
-                                           length:sizeof(uniformsLevel)
-                                          atIndex:BufferIndexUniformsLevel];
-
-                    [renderEncoder setFragmentBytes:&uniformsLevel
-                                             length:sizeof(uniformsLevel)
-                                            atIndex:BufferIndexUniformsLevel];
-
-                    // force lod, and don't mip
-                    [renderEncoder setFragmentSamplerState:sampler
-                                               lodMinClamp:mip
-                                               lodMaxClamp:mip + 1
-                                                   atIndex:SamplerIndexColor];
-
-                    // TODO: since this isn't a preview, have mode to display all faces
-                    // and mips on on screen faces and arrays and slices go across in a
-                    // row, and mips are displayed down from each of those in a column
-
-                    for (MTKSubmesh *submesh in _mesh.submeshes) {
-                        [renderEncoder drawIndexedPrimitives:submesh.primitiveType
-                                                  indexCount:submesh.indexCount
-                                                   indexType:submesh.indexType
-                                                 indexBuffer:submesh.indexBuffer.buffer
-                                           indexBufferOffset:submesh.indexBuffer.offset];
+                    // this its ktxImage.totalChunks()
+                    int32_t numLevels = _showSettings->totalChunks();
+
+                    for (int32_t level = 0; level < numLevels; ++level) {
+                        if (isCube) {
+                            uniformsLevel.face = level % 6;
+                            uniformsLevel.arrayOrSlice = level / 6;
+                        }
+                        else {
+                            uniformsLevel.arrayOrSlice = level;
+                        }
+
+                        // advance x across faces/slices/array elements, 1d array and 2d thin
+                        // array are weird though.
+                        if (level == 0) {
+                            uniformsLevel.drawOffset.x = 0.0f;
+                        }
+                        else {
+                            uniformsLevel.drawOffset.x += w + gap;
+                        }
+
+                        [renderEncoder setVertexBytes:&uniformsLevel
+                                               length:sizeof(uniformsLevel)
+                                              atIndex:BufferIndexUniformsLevel];
+
+                        [renderEncoder setFragmentBytes:&uniformsLevel
+                                                 length:sizeof(uniformsLevel)
+                                                atIndex:BufferIndexUniformsLevel];
+
+                        // force lod, and don't mip
+                        [renderEncoder setFragmentSamplerState:sampler
+                                                   lodMinClamp:mip
+                                                   lodMaxClamp:mip + 1
+                                                       atIndex:SamplerIndexColor];
+
+                        // TODO: since this isn't a preview, have mode to display all faces
+                        // and mips on on screen faces and arrays and slices go across in a
+                        // row, and mips are displayed down from each of those in a column
+
+                        for (MTKSubmesh *submesh in _mesh.submeshes) {
+                            [renderEncoder drawIndexedPrimitives:submesh.primitiveType
+                                                      indexCount:submesh.indexCount
+                                                       indexType:submesh.indexType
+                                                     indexBuffer:submesh.indexBuffer.buffer
+                                               indexBufferOffset:submesh.indexBuffer.offset];
+                        }
                     }
                 }
             }
-        }
-        else {
-            int32_t mip = _showSettings->mipNumber;
-
-            // upload this on each face drawn, since want to be able to draw all
-            // mips/levels at once
-            [self _setUniformsLevel:uniformsLevel mipLOD:mip];
-
-            [renderEncoder setVertexBytes:&uniformsLevel
-                                   length:sizeof(uniformsLevel)
-                                  atIndex:BufferIndexUniformsLevel];
-
-            [renderEncoder setFragmentBytes:&uniformsLevel
-                                     length:sizeof(uniformsLevel)
-                                    atIndex:BufferIndexUniformsLevel];
-
-            // force lod, and don't mip
-            [renderEncoder setFragmentSamplerState:sampler
-                                       lodMinClamp:mip
-                                       lodMaxClamp:mip + 1
-                                           atIndex:SamplerIndexColor];
-
-            // TODO: since this isn't a preview, have mode to display all faces and
-            // mips on on screen faces and arrays and slices go across in a row, and
-            // mips are displayed down from each of those in a column
-
-            for (MTKSubmesh *submesh in _mesh.submeshes) {
-                [renderEncoder drawIndexedPrimitives:submesh.primitiveType
-                                          indexCount:submesh.indexCount
-                                           indexType:submesh.indexType
-                                         indexBuffer:submesh.indexBuffer.buffer
-                                   indexBufferOffset:submesh.indexBuffer.offset];
+            else {
+                int32_t mip = _showSettings->mipNumber;
+
+                // upload this on each face drawn, since want to be able to draw all
+                // mips/levels at once
+                [self _setUniformsLevel:uniformsLevel mipLOD:mip];
+
+                [renderEncoder setVertexBytes:&uniformsLevel
+                                       length:sizeof(uniformsLevel)
+                                      atIndex:BufferIndexUniformsLevel];
+
+                [renderEncoder setFragmentBytes:&uniformsLevel
+                                         length:sizeof(uniformsLevel)
+                                        atIndex:BufferIndexUniformsLevel];
+
+                // force lod, and don't mip
+                [renderEncoder setFragmentSamplerState:sampler
+                                           lodMinClamp:mip
+                                           lodMaxClamp:mip + 1
+                                               atIndex:SamplerIndexColor];
+
+                // TODO: since this isn't a preview, have mode to display all faces and
+                // mips on on screen faces and arrays and slices go across in a row, and
+                // mips are displayed down from each of those in a column
+
+                for (MTKSubmesh *submesh in _mesh.submeshes) {
+                    [renderEncoder drawIndexedPrimitives:submesh.primitiveType
+                                              indexCount:submesh.indexCount
+                                               indexType:submesh.indexType
+                                             indexBuffer:submesh.indexBuffer.buffer
+                                       indexBufferOffset:submesh.indexBuffer.offset];
+                }
             }
         }
-    }
 
-    [renderEncoder popDebugGroup];
+        [renderEncoder popDebugGroup];
+    }
 
     [renderEncoder endEncoding];
 
@@ -1924,28 +2307,50 @@ - (void)mtkView:(nonnull MTKView *)view drawableSizeWillChange:(CGSize)size
 
     _showSettings->viewContentScaleFactor = framebufferScale;
 
-    [self updateViewTransforms];
+    [self updateProjTransform];
+    
+#if USE_GLTF
+    _gltfRenderer.drawableSize = size;
+    _gltfRenderer.colorPixelFormat = view.colorPixelFormat;
+    _gltfRenderer.depthStencilPixelFormat = view.depthStencilPixelFormat;
+#endif
+    
+    [self updateProjTransform];
+}
+
+#if USE_GLTF
+// @protocol GLTFAssetLoadingDelegate
+- (void)assetWithURL:(NSURL *)assetURL requiresContentsOfURL:(NSURL *)url completionHandler:(void (^)(NSData *_Nullable, NSError *_Nullable))completionHandler
+{
+    // This can handle remote assets
+    NSURLSessionDataTask *task = [_urlSession dataTaskWithURL:url
+                                                        completionHandler:^(NSData *data, NSURLResponse *response, NSError *error)
+    {
+        completionHandler(data, error);
+    }];
+    
+    [task resume];
+}
+
+- (void)assetWithURL:(NSURL *)assetURL didFinishLoading:(GLTFAsset *)asset
+{
+    mylock lock(gModelLock);
+    
+    _asset = asset;
+    
+    _animationTime = 0.0;
+    
+    string fullFilename = assetURL.path.UTF8String;
+    [self updateModelSettings:fullFilename];
 }
 
-- (void)updateViewTransforms
+- (void)assetWithURL:(NSURL *)assetURL didFailToLoadWithError:(NSError *)error;
 {
-    // float aspect = size.width / (float)size.height;
-    //_projectionMatrix = perspective_rhs(45.0f * (M_PI / 180.0f), aspect, 0.1f,
-    //100.0f);
-    _projectionMatrix =
-        orthographic_rhs(_showSettings->viewSizeX, _showSettings->viewSizeY, 0.1f,
-                         100000.0f, _showSettings->isReverseZ);
-
-    // DONE: adjust zoom to fit the entire image to the window
-    _showSettings->zoomFit =
-        MIN((float)_showSettings->viewSizeX, (float)_showSettings->viewSizeY) /
-        MAX(1, MAX((float)_showSettings->imageBoundsX,
-                   (float)_showSettings->imageBoundsY));
-
-    // already using drawableSize which includes scale
-    // TODO: remove contentScaleFactor of view, this can be 1.0 to 2.0f
-    // why does this always report 2x even when I change monitor res.
-    //_showSettings->zoomFit /= _showSettings->viewContentScaleFactor;
+    // TODO: display this error to the user
+    NSLog(@"Asset load failed with error: %@", error);
 }
+#endif
+
+
 
 @end
diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index 9e67b4c3..d3cfad78 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -328,11 +328,10 @@ float4x4 matrix4x4_rotation(float radians, vector_float3 axis)
     float x = axis.x, y = axis.y, z = axis.z;
 
     float4x4 m = {
-        (float4){ ct + x * x * ci,     y * x * ci + z * st, z * x * ci - y *
-        st, 0}, (float4){ x * y * ci - z * st,     ct + y * y * ci, z * y * ci
-        + x * st, 0}, (float4){ x * z * ci + y * st, y * z * ci - x * st, ct +
-        z * z * ci, 0}, (float4){                   0,                   0, 0,
-        1}
+        (float4){ ct + x * x * ci,     y * x * ci + z * st, z * x * ci - y * st, 0},
+        (float4){ x * y * ci - z * st,     ct + y * y * ci, z * y * ci + x * st, 0},
+        (float4){ x * z * ci + y * st, y * z * ci - x * st,     ct + z * z * ci, 0},
+        (float4){                   0,                   0,                   0, 1}
     };
     return m;
 }
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 60a7e66a..c33d9e4a 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -3029,6 +3029,22 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
 
             // don't change to this folder if it's devoid of content
             if (files.empty()) {
+#if !USE_GLTF
+                // reset the enumerator
+                directoryEnumerator =
+                    [[NSFileManager defaultManager]
+                                   enumeratorAtURL:url
+                        includingPropertiesForKeys:[NSArray array]
+                                           options:0
+                                      errorHandler:  // nil
+                                          ^BOOL(NSURL *urlArg, NSError *error) {
+                                              macroUnusedVar(urlArg);
+                                              macroUnusedVar(error);
+
+                                              // handle error
+                                              return NO;
+                                          }];
+#endif
                 while (NSURL *fileOrDirectoryURL = [directoryEnumerator nextObject]) {
                     const char *name = fileOrDirectoryURL.fileSystemRepresentation;
 

From 7b27854a9f60ca27e481136eeb507b1a8070b0c5 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 18 Jan 2022 17:15:16 -0800
Subject: [PATCH 201/901] Kramv - fix showing the shape table, enable gltf
 support

Was confusing since the table wasn't shown, but the hud was also hidden.  Now it displays properly.
Turned on gltf support for fun, but pan-zoom is broken.  Also need to resize window or hit play to have lighting/environment show up properly.  Need to track this down.
---
 kramv/KramRenderer.h    |  2 +-
 kramv/KramViewerMain.mm | 35 +++++++++++++++++++++++------------
 2 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/kramv/KramRenderer.h b/kramv/KramRenderer.h
index ee226812..4ec41bf5 100644
--- a/kramv/KramRenderer.h
+++ b/kramv/KramRenderer.h
@@ -12,7 +12,7 @@
 
 // Turn on GLTF loading support for 3d models.  This relies on Warren Moore's first GLTFKit
 // which only handles import and synchronous loading.
-#define USE_GLTF 0
+#define USE_GLTF 1
 
 // Only use a perspective transform for models/images, otherwise perspective only used for models
 #define USE_PERSPECTIVE 0
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index c33d9e4a..8738f3e6 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -577,7 +577,7 @@ - (instancetype)initWithCoder:(NSCoder *)coder
     // openFile in appDelegate handles "Open in..."
 
     _textSlots.resize(2);
-
+    
     // added for drag-drop support
     [self registerForDraggedTypes:pasteboardTypes];
 
@@ -632,7 +632,6 @@ - (NSStackView *)_addButtons
 {
     const int32_t numButtons = 31;
     const char *names[numButtons * 2] = {
-
         " ",
         "Play",
         "?",
@@ -1954,8 +1953,9 @@ - (bool)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
     switch (keyCode) {
         // for now hit esc to hide the table views
         case Key::Escape: {
-            _hudHidden = false;
             [self hideTables];
+            
+            _hudHidden = false;
             [self updateHudVisibility];
             break;
         }
@@ -2352,9 +2352,10 @@ - (bool)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
                 [_shapesTableView scrollRowToVisible:_showSettings->meshNumber];
                 
                 // show the shapes table
-                _tableView.hidden = YES;
+                [self hideTables];
                 _shapesTableView.hidden = NO;
                 
+                // also have to hide hud or it will obscure the visible table
                 _hudHidden = true;
                 [self updateHudVisibility];
                 
@@ -2580,8 +2581,7 @@ - (BOOL)loadArchive:(const char *)zipFilename
     [self updateShapesTable];
     
     // hack to see table
-    _tableView.hidden = YES;
-    _shapesTableView.hidden = YES;
+    [self hideTables];
     
     return YES;
 }
@@ -2609,8 +2609,12 @@ - (BOOL)advanceFileFromAchive:(BOOL)increment
     [self.window makeFirstResponder: _tableView];
     
     // show the files table
+    [self hideTables];
     _tableView.hidden = NO;
-    _shapesTableView.hidden = YES;
+    
+    // also have to hide hud or it will obscure the visible table
+    _hudHidden = true;
+    [self updateHudVisibility];
     
     return [self loadFileFromArchive];
 }
@@ -2638,8 +2642,8 @@ - (BOOL)advanceFileFromFolder:(BOOL)increment
     [self.window makeFirstResponder: _tableView];
     
     // show the files table
+    [self hideTables];
     _tableView.hidden = NO;
-    _shapesTableView.hidden = YES;
     
     _hudHidden = true;
     [self updateHudVisibility];
@@ -3029,7 +3033,7 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
 
             // don't change to this folder if it's devoid of content
             if (files.empty()) {
-#if !USE_GLTF
+#if USE_GLTF
                 // reset the enumerator
                 directoryEnumerator =
                     [[NSFileManager defaultManager]
@@ -3113,9 +3117,7 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
             [_tableView scrollRowToVisible:_fileFolderIndex];
             
             [self updateShapesTable];
-            
-            _tableView.hidden = YES;
-            _shapesTableView.hidden = YES;
+            [self hideTables];
         }
 
         // now load image from directory
@@ -3429,6 +3431,13 @@ -(BOOL)loadImageFile:(NSURL*)url
     return YES;
 }
 
+- (void)setupUI
+{
+    // Load the basic shapes table once
+    [self updateShapesTable];
+    [self hideTables];
+}
+
 - (void)concludeDragOperation:(id)sender
 {
     // did setNeedsDisplay, but already doing that in loadTextureFromURL
@@ -3564,6 +3573,8 @@ - (void)viewDidLoad
     // That's how SwiftUI was.
     [_view addNotifications];
     
+    [_view setupUI];
+    
     // original sample code was sending down _view.bounds.size, but need
     // drawableSize this was causing all sorts of inconsistencies
     [_renderer mtkView:_view drawableSizeWillChange:_view.drawableSize];

From 99cbbb291c8b4011fbde420965fcffcef49a75b2 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 21 Jan 2022 10:11:47 -0800
Subject: [PATCH 202/901] Kramv - remove mm_malloc.h

This wasn't compiling on arm64 architecture, but also wasn't used.  Should fix universal builds.
---
 kramv/KramLoader.mm | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index 87e3fc9d..1ec4694f 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -8,7 +8,6 @@
 
 //#include <vector>
 //#include <algorithm> // for max
-#include <mm_malloc.h>
 #include <mutex>
 
 #include "KramLib.h"

From f774a51bc4a8d58bffd626cf635b2083c5d3c226 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 27 Jan 2022 18:50:32 -0800
Subject: [PATCH 203/901] kram - support for DDS load/save.

Since many of the Windows tools only take DDS, add load/save support for this forgotten container format.  The format can only hold BC and explicit formats.  ASTC was a DXGIFormat when Microsoft was pursuing mobile, but I can't find these types anymore.  Definitely no DDS support for ETC.  Don't use DDS except in an emergency.

This likely needs more bulletproofing, since DDS have two forms.  The load isn't checking file length, etc.   Also can't yet go from DDS import to KTX or KTX2 export yet, since that path is entangled in the encode logic.

Tested by exporting as DDS, and importing this into macOS Preview and into kramv.  As of macOS 11, Preview can open DDS files.

Added DDSHelper to help with loading/saving these.  DDS get rearranged into KTX ordered mips, so that these can pass through other parts of the app.
Added ktx/ktx2 content types to previewer to support generating these
Added dds type to kramv/previewer/thumbnailer
Use function calls for determining file types.
Added supported dxgiformat into the formats.  Can query from/to that now for DDS load/save.
---
 build2/kram.xcodeproj/project.pbxproj     |  12 +
 kram-preview/Info.plist                   |   6 +-
 kram-preview/KramPreviewViewController.mm |   9 +-
 kram-thumb/Info.plist                     |   1 +
 kram-thumb/KramThumbnailProvider.mm       |   9 +-
 kramv/Base.lproj/Main.storyboard          |   4 +-
 kramv/Info.plist                          |  16 ++
 kramv/KramLoader.mm                       |   2 +-
 kramv/KramViewerMain.mm                   |  27 +-
 libkram/kram/KTXImage.cpp                 | 253 +++++++++++++----
 libkram/kram/KTXImage.h                   |   8 +-
 libkram/kram/Kram.cpp                     | 233 ++++++++++++----
 libkram/kram/Kram.h                       |   8 +
 libkram/kram/KramDDSHelper.cpp            | 325 ++++++++++++++++++++++
 libkram/kram/KramDDSHelper.h              |  34 +++
 libkram/kram/KramFileHelper.h             |   2 +-
 16 files changed, 824 insertions(+), 125 deletions(-)
 create mode 100644 libkram/kram/KramDDSHelper.cpp
 create mode 100644 libkram/kram/KramDDSHelper.h

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index 0dd63c3c..2dcdd1a1 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -333,6 +333,10 @@
 		70A7BD3127092A1200DBCCF7 /* hdr_encode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70A7BD2E27092A1200DBCCF7 /* hdr_encode.cpp */; };
 		70A7BD3227092A1200DBCCF7 /* hdr_encode.h in Headers */ = {isa = PBXBuildFile; fileRef = 70A7BD2F27092A1200DBCCF7 /* hdr_encode.h */; };
 		70A7BD3327092A1200DBCCF7 /* hdr_encode.h in Headers */ = {isa = PBXBuildFile; fileRef = 70A7BD2F27092A1200DBCCF7 /* hdr_encode.h */; };
+		70CDB65027A1382700A546C1 /* KramDDSHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 70CDB64E27A1382600A546C1 /* KramDDSHelper.h */; };
+		70CDB65127A1382700A546C1 /* KramDDSHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 70CDB64E27A1382600A546C1 /* KramDDSHelper.h */; };
+		70CDB65227A1382700A546C1 /* KramDDSHelper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70CDB64F27A1382600A546C1 /* KramDDSHelper.cpp */; };
+		70CDB65327A1382700A546C1 /* KramDDSHelper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70CDB64F27A1382600A546C1 /* KramDDSHelper.cpp */; };
 /* End PBXBuildFile section */
 
 /* Begin PBXFileReference section */
@@ -644,6 +648,8 @@
 		708A6A922708CE4700BA5410 /* bc6h_utils.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bc6h_utils.h; sourceTree = "<group>"; };
 		70A7BD2E27092A1200DBCCF7 /* hdr_encode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = hdr_encode.cpp; sourceTree = "<group>"; };
 		70A7BD2F27092A1200DBCCF7 /* hdr_encode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = hdr_encode.h; sourceTree = "<group>"; };
+		70CDB64E27A1382600A546C1 /* KramDDSHelper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = KramDDSHelper.h; sourceTree = "<group>"; };
+		70CDB64F27A1382600A546C1 /* KramDDSHelper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = KramDDSHelper.cpp; sourceTree = "<group>"; };
 /* End PBXFileReference section */
 
 /* Begin PBXFrameworksBuildPhase section */
@@ -867,6 +873,8 @@
 		706EEE1826D1583F001C950E /* kram */ = {
 			isa = PBXGroup;
 			children = (
+				70CDB64E27A1382600A546C1 /* KramDDSHelper.h */,
+				70CDB64F27A1382600A546C1 /* KramDDSHelper.cpp */,
 				706EEE1926D1583F001C950E /* KramZipHelper.h */,
 				706EEE1E26D1583F001C950E /* KramZipHelper.cpp */,
 				706EEE2326D1583F001C950E /* KramConfig.h */,
@@ -1210,6 +1218,7 @@
 				706EEFDC26D15984001C950E /* EtcBlock4x4.h in Headers */,
 				706EEFDD26D15984001C950E /* Etc.h in Headers */,
 				706EEFDE26D15984001C950E /* EtcImage.h in Headers */,
+				70CDB65027A1382700A546C1 /* KramDDSHelper.h in Headers */,
 				708A6A9C2708CE4700BA5410 /* bc6h_encode.h in Headers */,
 				706EEFDF26D15984001C950E /* EtcBlock4x4Encoding_ETC1.h in Headers */,
 				706EEFE026D15984001C950E /* EtcBlock4x4Encoding_RGBA8.h in Headers */,
@@ -1298,6 +1307,7 @@
 				706EF15626D166C5001C950E /* EtcBlock4x4.h in Headers */,
 				706EF15726D166C5001C950E /* Etc.h in Headers */,
 				706EF15826D166C5001C950E /* EtcImage.h in Headers */,
+				70CDB65127A1382700A546C1 /* KramDDSHelper.h in Headers */,
 				708A6A9D2708CE4700BA5410 /* bc6h_encode.h in Headers */,
 				706EF15926D166C5001C950E /* EtcBlock4x4Encoding_ETC1.h in Headers */,
 				706EF15A26D166C5001C950E /* EtcBlock4x4Encoding_RGBA8.h in Headers */,
@@ -1446,6 +1456,7 @@
 			files = (
 				706EF26426D17DCC001C950E /* ateencoder.mm in Sources */,
 				706EEF7F26D1595D001C950E /* EtcBlock4x4Encoding_RGB8.cpp in Sources */,
+				70CDB65227A1382700A546C1 /* KramDDSHelper.cpp in Sources */,
 				706EEF8026D1595D001C950E /* EtcImage.cpp in Sources */,
 				706EEF8126D1595D001C950E /* EtcDifferentialTrys.cpp in Sources */,
 				706EEF8226D1595D001C950E /* EtcMath.cpp in Sources */,
@@ -1534,6 +1545,7 @@
 			files = (
 				706EFC2426D1C39B001C950E /* ateencoder.mm in Sources */,
 				706EF19826D166C5001C950E /* EtcBlock4x4Encoding_RGB8.cpp in Sources */,
+				70CDB65327A1382700A546C1 /* KramDDSHelper.cpp in Sources */,
 				706EF19926D166C5001C950E /* EtcImage.cpp in Sources */,
 				706EF19A26D166C5001C950E /* EtcDifferentialTrys.cpp in Sources */,
 				706EF19B26D166C5001C950E /* EtcMath.cpp in Sources */,
diff --git a/kram-preview/Info.plist b/kram-preview/Info.plist
index 2d320287..303b35ec 100644
--- a/kram-preview/Info.plist
+++ b/kram-preview/Info.plist
@@ -27,7 +27,11 @@
 		<key>NSExtensionAttributes</key>
 		<dict>
 			<key>QLSupportedContentTypes</key>
-			<array/>
+			<array>
+				<string>org.khronos.ktx</string>
+				<string>public.ktx2</string>
+				<string>image/vnd-ms.dds</string>
+			</array>
 			<key>QLSupportsSearchableItems</key>
 			<true/>
 		</dict>
diff --git a/kram-preview/KramPreviewViewController.mm b/kram-preview/KramPreviewViewController.mm
index 00b7d4a9..a7ae8c33 100644
--- a/kram-preview/KramPreviewViewController.mm
+++ b/kram-preview/KramPreviewViewController.mm
@@ -77,12 +77,13 @@ - (void)preparePreviewOfFileAtURL:(NSURL *)url completionHandler:(void (^)(NSErr
 
     NSError* error = nil;
     
-    bool isKTX = endsWith(filename, ".ktx");
-    bool isKTX2 = endsWith(filename, ".ktx2");
+    bool isKTX = isKTXFilename(filename);
+    bool isKTX2 = isKTX2Filename(filename);
+    bool isDDS = isDDSFilename(filename);
     
     // ignore upper case extensions
-    if (!(isKTX || isKTX2)) {
-        error = KLOGF(1, "kramv %s only supports ktx/ktx2 files\n", filename);
+    if (!(isKTX || isKTX2 || isDDS)) {
+        error = KLOGF(1, "kramv %s only supports ktx, ktx2, dds files\n", filename);
         handler(error);
         return;
     }
diff --git a/kram-thumb/Info.plist b/kram-thumb/Info.plist
index e28f2091..388f405e 100644
--- a/kram-thumb/Info.plist
+++ b/kram-thumb/Info.plist
@@ -30,6 +30,7 @@
 			<array>
 				<string>org.khronos.ktx</string>
 				<string>public.ktx2</string>
+				<string>image/vnd-ms.dds</string>
 			</array>
 			<key>QLThumbnailMinimumDimension</key>
 			<integer>0</integer>
diff --git a/kram-thumb/KramThumbnailProvider.mm b/kram-thumb/KramThumbnailProvider.mm
index 0cb50d16..bdd1ad7d 100644
--- a/kram-thumb/KramThumbnailProvider.mm
+++ b/kram-thumb/KramThumbnailProvider.mm
@@ -51,11 +51,12 @@ - (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest *)request complet
     string errorText;
     
     // TODO: use first x-many bytes also to validate, open will do that
-    bool isKTX = endsWith(filename, ".ktx");
-    bool isKTX2 = endsWith(filename, ".ktx2");
+    bool isKTX = isKTXFilename(filename);
+    bool isKTX2 = isKTX2Filename(filename);
+    bool isDDS = isDDSFilename(filename);
     
-    if (!(isKTX || isKTX2)) {
-        error = KLOGF(1, "kramv %s only supports ktx/ktx2 files\n", filename);
+    if (!(isKTX || isKTX2 || isDDS)) {
+        error = KLOGF(1, "kramv %s only supports ktx,ktx2,dds files\n", filename);
         handler(nil, error);
         return;
     }
diff --git a/kramv/Base.lproj/Main.storyboard b/kramv/Base.lproj/Main.storyboard
index 0bbd904c..d781755e 100644
--- a/kramv/Base.lproj/Main.storyboard
+++ b/kramv/Base.lproj/Main.storyboard
@@ -1,8 +1,8 @@
 <?xml version="1.0" encoding="UTF-8"?>
-<document type="com.apple.InterfaceBuilder3.Cocoa.Storyboard.XIB" version="3.0" toolsVersion="18122" targetRuntime="MacOSX.Cocoa" propertyAccessControl="none" useAutolayout="YES" initialViewController="B8D-0N-5wS">
+<document type="com.apple.InterfaceBuilder3.Cocoa.Storyboard.XIB" version="3.0" toolsVersion="19455" targetRuntime="MacOSX.Cocoa" propertyAccessControl="none" useAutolayout="YES" initialViewController="B8D-0N-5wS">
     <dependencies>
         <deployment identifier="macosx"/>
-        <plugIn identifier="com.apple.InterfaceBuilder.CocoaPlugin" version="18122"/>
+        <plugIn identifier="com.apple.InterfaceBuilder.CocoaPlugin" version="19455"/>
         <capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
     </dependencies>
     <scenes>
diff --git a/kramv/Info.plist b/kramv/Info.plist
index 2c434fc0..b2afa4c7 100644
--- a/kramv/Info.plist
+++ b/kramv/Info.plist
@@ -118,6 +118,22 @@
 			<key>NSDocumentClass</key>
 			<string>KramDocument</string>
 		</dict>
+		<dict>
+			<key>CFBundleTypeIconSystemGenerated</key>
+			<integer>1</integer>
+			<key>CFBundleTypeName</key>
+			<string>DDS</string>
+			<key>CFBundleTypeRole</key>
+			<string>Viewer</string>
+			<key>LSHandlerRank</key>
+			<string>Default</string>
+			<key>LSItemContentTypes</key>
+			<array>
+				<string>image/vnd-ms.dds</string>
+			</array>
+			<key>NSDocumentClass</key>
+			<string>KramDocument</string>
+		</dict>
 	</array>
 	<key>CFBundleExecutable</key>
 	<string>$(EXECUTABLE_NAME)</string>
diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index 1ec4694f..b9d062ee 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -364,7 +364,7 @@ - (BOOL)loadImageFromURL:(nonnull NSURL *)url
     // files can be renamed to the incorrect extensions
     string filename = toLower(path);
 
-    if (endsWithExtension(filename.c_str(), ".png")) {
+    if (isPNGFilename(filename)) {
         // set title to filename, chop this to just file+ext, not directory
         string filenameShort = filename;
         const char *filenameSlash = strrchr(filenameShort.c_str(), '/');
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 8738f3e6..465e8d53 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -2547,7 +2547,7 @@ - (BOOL)loadArchive:(const char *)zipFilename
 
     // filter out unsupported extensions
     vector<string> extensions = {
-        ".ktx", ".ktx2", ".png" // textures
+        ".ktx", ".ktx2", ".png", ".dds" // textures
 #if USE_GLTF
         , ".glb", ".gltf" // models
 #endif
@@ -2708,8 +2708,10 @@ - (BOOL)loadFileFromFolder
     // have already filtered filenames out, so this should never get hit
     bool isPNG = isPNGFilename(filename);
     if (!(isPNG ||
-          endsWithExtension(filename, ".ktx") ||
-          endsWithExtension(filename, ".ktx2"))) {
+          isKTXFilename(filename) ||
+          isKTX2Filename(filename) ||
+          isDDSFilename(filename))
+    ) {
         return NO;
     }
 
@@ -2844,8 +2846,9 @@ - (BOOL)loadFileFromArchive
     bool isPNG = isPNGFilename(filename);
 
     if (!(isPNG ||
-          endsWithExtension(filename, ".ktx") ||
-          endsWithExtension(filename, ".ktx2"))) {
+          isKTXFilename(filename) ||
+          isKTX2Filename(filename) ||
+          isDDSFilename(filename))) {
         return NO;
     }
 
@@ -3054,10 +3057,11 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
 
                     // filter only types that are supported
                     bool isPNG = isPNGFilename(name);
-                    bool isKTX = endsWithExtension(name, ".ktx");
-                    bool isKTX2 = endsWithExtension(name, ".ktx2");
+                    bool isKTX = isKTXFilename(name);
+                    bool isKTX2 = isKTX2Filename(name);
+                    bool isDDS = isDDSFilename(name);
     
-                    if (isPNG || isKTX || isKTX2)
+                    if (isPNG || isKTX || isKTX2 || isDDS)
                     {
                         files.push_back(name);
                     }
@@ -3175,8 +3179,9 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
           
           // images
           isPNGFilename(filename) ||
-          endsWithExtension(filename, ".ktx") ||
-          endsWithExtension(filename, ".ktx2") ||
+          isKTXFilename(filename) ||
+          isKTX2Filename(filename) ||
+          isDDSFilename(filename) ||
           
           // models
           endsWithExtension(filename, ".gltf") ||
@@ -3184,7 +3189,7 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
         ))
     {
         string errorText =
-            "Unsupported file extension, must be .zip, .png, .ktx, .ktx2, .gltf, .glb\n";
+            "Unsupported file extension, must be .zip, .png, .ktx, .ktx2, .dds, .gltf, .glb\n";
 
         string finalErrorText;
         append_sprintf(finalErrorText, "Could not load from file:\n %s\n",
diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index 7cb01508..bc34ff61 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -44,7 +44,116 @@ const uint8_t kKTX2Identifier[kKTXIdentifierSize] = {
 
 //---------------------------------------------------
 
-enum GLFormat {
+// enum based on dxgiformat.h
+// Copyright (C) Microsoft.  All rights reserved.
+enum DXFormat : uint32_t
+{
+    DX_UNKNOWN                                 = 0,
+    
+    //DX_R32G32B32A32_TYPELESS                   = 1,
+    DX_R32G32B32A32_FLOAT                      = 2,
+    DX_R32G32B32A32_UINT                       = 3,
+    DX_R32G32B32A32_SINT                       = 4,
+    //DX_R32G32B32_TYPELESS                      = 5,
+    DX_R32G32B32_FLOAT                         = 6,
+    DX_R32G32B32_UINT                          = 7,
+    DX_R32G32B32_SINT                          = 8,
+    
+    //DX_R16G16B16A16_TYPELESS                   = 9,
+    DX_R16G16B16A16_FLOAT                      = 10,
+    DX_R16G16B16A16_UNORM                      = 11,
+    DX_R16G16B16A16_UINT                       = 12,
+    DX_R16G16B16A16_SNORM                      = 13,
+    DX_R16G16B16A16_SINT                       = 14,
+    
+    //DX_R32G32_TYPELESS                         = 15,
+    DX_R32G32_FLOAT                            = 16,
+    DX_R32G32_UINT                             = 17,
+    DX_R32G32_SINT                             = 18,
+    //DX_R32G8X24_TYPELESS                       = 19,
+    //DX_D32_FLOAT_S8X24_UINT                    = 20,
+    //DX_R32_FLOAT_X8X24_TYPELESS                = 21,
+    //DX_X32_TYPELESS_G8X24_UINT                 = 22,
+    
+    //DX_R10G10B10A2_TYPELESS                    = 23,
+    //DX_R10G10B10A2_UNORM                       = 24,
+    //DX_R10G10B10A2_UINT                        = 25,
+    //DX_R11G11B10_FLOAT                         = 26,
+    
+    //DX_R8G8B8A8_TYPELESS                       = 27,
+    DX_R8G8B8A8_UNORM                          = 28,
+    DX_R8G8B8A8_UNORM_SRGB                     = 29,
+    DX_R8G8B8A8_UINT                           = 30,
+    DX_R8G8B8A8_SNORM                          = 31,
+    DX_R8G8B8A8_SINT                           = 32,
+    
+    //DX_R16G16_TYPELESS                         = 33,
+    DX_R16G16_FLOAT                            = 34,
+    DX_R16G16_UNORM                            = 35,
+    DX_R16G16_UINT                             = 36,
+    DX_R16G16_SNORM                            = 37,
+    DX_R16G16_SINT                             = 38,
+    
+    //DX_R32_TYPELESS                            = 39,
+    //DX_D32_FLOAT                               = 40,
+    DX_R32_FLOAT                               = 41,
+    DX_R32_UINT                                = 42,
+    DX_R32_SINT                                = 43,
+    
+    //DX_R24G8_TYPELESS                          = 44,
+    //DX_D24_UNORM_S8_UINT                       = 45,
+    //DX_R24_UNORM_X8_TYPELESS                   = 46,
+    //DX_X24_TYPELESS_G8_UINT                    = 47,
+    //DX_R8G8_TYPELESS                           = 48,
+    
+    DX_R8G8_UNORM                              = 49,
+    DX_R8G8_UINT                               = 50,
+    DX_R8G8_SNORM                              = 51,
+    DX_R8G8_SINT                               = 52,
+    
+    DX_R16_FLOAT                               = 54,
+    //DX_D16_UNORM                               = 55,
+    DX_R16_UNORM                               = 56,
+    DX_R16_UINT                                = 57,
+    DX_R16_SNORM                               = 58,
+    DX_R16_SINT                                = 59,
+    
+    DX_R8_UNORM                                = 61,
+    DX_R8_UINT                                 = 62,
+    DX_R8_SNORM                                = 63,
+    DX_R8_SINT                                 = 64,
+    
+    //DX_A8_UNORM                                = 65,
+    //DX_R1_UNORM                                = 66,
+    //DX_R9G9B9E5_SHAREDEXP                      = 67,
+    
+    //DX_R8G8_B8G8_UNORM                         = 68,
+    //DX_G8R8_G8B8_UNORM                         = 69,
+    
+    DX_BC1_UNORM                               = 71,
+    DX_BC1_UNORM_SRGB                          = 72,
+    DX_BC3_UNORM                               = 77,
+    DX_BC3_UNORM_SRGB                          = 78,
+    DX_BC4_UNORM                               = 80,
+    DX_BC4_SNORM                               = 81,
+    DX_BC5_UNORM                               = 83,
+    DX_BC5_SNORM                               = 84,
+    DX_BC6H_UF16                               = 95,
+    DX_BC6H_SF16                               = 96,
+    DX_BC7_UNORM                               = 98,
+    DX_BC7_UNORM_SRGB                          = 99,
+    
+    DX_B8G8R8A8_UNORM                          = 87,
+    DX_B8G8R8X8_UNORM                          = 88,
+    //DX_B8G8R8A8_TYPELESS                       = 90,
+    DX_B8G8R8A8_UNORM_SRGB                     = 91,
+    //DX_B8G8R8X8_TYPELESS                       = 92,
+    DX_B8G8R8X8_UNORM_SRGB                     = 93,
+};
+
+//---------------------------------------------------
+
+enum GLFormat : uint32_t {
     GL_FORMAT_UNKNOWN = 0,
 
     //#ifndef GL_EXT_texture_compression_s3tc
@@ -336,10 +445,12 @@ class KTXFormatInfo {
         const char* formatName_,
         const char* metalName_,
         const char* vulkanName_,
+        const char* directxName_,
         const char* glName_,
 
         MyMTLPixelFormat metalType_,
         VKFormat vulkanType_,
+        DXFormat directxType_,
         GLFormat glType_,
         GLFormatBase glBase_,
 
@@ -354,10 +465,12 @@ class KTXFormatInfo {
         formatName = formatName_;
         metalName = metalName_;
         vulkanName = vulkanName_;
+        directxName = directxName_;
         glName = glName_;
 
         metalType = metalType_;
         vulkanType = vulkanType_;
+        directxType = directxType_;
         glType = glType_;
         glBase = glBase_;
 
@@ -373,10 +486,12 @@ class KTXFormatInfo {
     const char* formatName;
     const char* metalName;
     const char* vulkanName;
+    const char* directxName;
     const char* glName;
 
     uint16_t metalType;
     uint16_t vulkanType;
+    uint16_t directxType;
     uint16_t glType;
     uint16_t glBase;
 
@@ -439,83 +554,84 @@ static bool initFormatsIfNeeded()
     mymap* formatTable = new unordered_map<uint32_t /*MyMTLPixelFormat*/, KTXFormatInfo>();
 
 // the following table could be included multiple ties to build switch statements, but instead use a hashmap
-#define KTX_FORMAT(fmt, metalType, vulkanType, glType, glBase, x, y, blockSize, numChannels, flags) \
-    (*formatTable)[(uint32_t)metalType] = KTXFormatInfo(                                           \
-        #fmt, #metalType, #vulkanType, #glType,                                                     \
-        metalType, vulkanType, glType, glBase,                                                      \
+#define KTX_FORMAT(fmt, metalType, vulkanType, directxType, glType, glBase, x, y, blockSize, numChannels, flags) \
+    (*formatTable)[(uint32_t)metalType] = KTXFormatInfo(             \
+        #fmt, #metalType, #vulkanType, #directxType, #glType,        \
+        metalType, vulkanType, directxType, glType, glBase,          \
         x, y, blockSize, numChannels, (flags));
 
-    KTX_FORMAT(Invalid, MyMTLPixelFormatInvalid, VK_FORMAT_UNDEFINED, GL_FORMAT_UNKNOWN, GL_RGBA, 1, 1, 0, 0, 0)
+    KTX_FORMAT(Invalid, MyMTLPixelFormatInvalid, VK_FORMAT_UNDEFINED, DX_UNKNOWN, GL_FORMAT_UNKNOWN, GL_RGBA, 1, 1, 0, 0, 0)
 
     // BC
-    KTX_FORMAT(BC1, MyMTLPixelFormatBC1_RGBA, VK_FORMAT_BC1_RGB_UNORM_BLOCK, GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, 4, 4, 8, 3, FLAG_ENC_BC)
-    KTX_FORMAT(BC1s, MyMTLPixelFormatBC1_RGBA_sRGB, VK_FORMAT_BC1_RGB_SRGB_BLOCK, GL_COMPRESSED_SRGB_S3TC_DXT1_EXT, GL_SRGB, 4, 4, 8, 3, FLAG_ENC_BC | FLAG_SRGB)
+    KTX_FORMAT(BC1, MyMTLPixelFormatBC1_RGBA, VK_FORMAT_BC1_RGB_UNORM_BLOCK, DX_BC1_UNORM, GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, 4, 4, 8, 3, FLAG_ENC_BC)
+    KTX_FORMAT(BC1s, MyMTLPixelFormatBC1_RGBA_sRGB, VK_FORMAT_BC1_RGB_SRGB_BLOCK, DX_BC1_UNORM_SRGB,GL_COMPRESSED_SRGB_S3TC_DXT1_EXT, GL_SRGB, 4, 4, 8, 3, FLAG_ENC_BC | FLAG_SRGB)
 
-    KTX_FORMAT(BC3, MyMTLPixelFormatBC3_RGBA, VK_FORMAT_BC3_UNORM_BLOCK, GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, 4, 4, 16, 4, FLAG_ENC_BC)
-    KTX_FORMAT(BC3s, MyMTLPixelFormatBC3_RGBA_sRGB, VK_FORMAT_BC3_SRGB_BLOCK, GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_SRGB_ALPHA, 4, 4, 16, 4, FLAG_ENC_BC | FLAG_SRGB)
+    KTX_FORMAT(BC3, MyMTLPixelFormatBC3_RGBA, VK_FORMAT_BC3_UNORM_BLOCK, DX_BC3_UNORM, GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, 4, 4, 16, 4, FLAG_ENC_BC)
+    KTX_FORMAT(BC3s, MyMTLPixelFormatBC3_RGBA_sRGB, VK_FORMAT_BC3_SRGB_BLOCK, DX_BC3_UNORM_SRGB, GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_SRGB_ALPHA, 4, 4, 16, 4, FLAG_ENC_BC | FLAG_SRGB)
 
-    KTX_FORMAT(BC4, MyMTLPixelFormatBC4_RUnorm, VK_FORMAT_BC4_UNORM_BLOCK, GL_COMPRESSED_RED_RGTC1, GL_RED, 4, 4, 8, 1, FLAG_ENC_BC)
-    KTX_FORMAT(BC4sn, MyMTLPixelFormatBC4_RSnorm, VK_FORMAT_BC4_SNORM_BLOCK, GL_COMPRESSED_SIGNED_RED_RGTC1, GL_RED, 4, 4, 8, 1, FLAG_ENC_BC | FLAG_SIGNED)
+    KTX_FORMAT(BC4, MyMTLPixelFormatBC4_RUnorm, VK_FORMAT_BC4_UNORM_BLOCK, DX_BC4_UNORM, GL_COMPRESSED_RED_RGTC1, GL_RED, 4, 4, 8, 1, FLAG_ENC_BC)
+    KTX_FORMAT(BC4sn, MyMTLPixelFormatBC4_RSnorm, VK_FORMAT_BC4_SNORM_BLOCK, DX_BC4_SNORM, GL_COMPRESSED_SIGNED_RED_RGTC1, GL_RED, 4, 4, 8, 1, FLAG_ENC_BC | FLAG_SIGNED)
 
-    KTX_FORMAT(BC5, MyMTLPixelFormatBC5_RGUnorm, VK_FORMAT_BC5_UNORM_BLOCK, GL_COMPRESSED_RG_RGTC2, GL_RG, 4, 4, 16, 2, FLAG_ENC_BC)
-    KTX_FORMAT(BC5sn, MyMTLPixelFormatBC5_RGSnorm, VK_FORMAT_BC5_SNORM_BLOCK, GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, 4, 4, 16, 2, FLAG_ENC_BC | FLAG_SIGNED)
+    KTX_FORMAT(BC5, MyMTLPixelFormatBC5_RGUnorm, VK_FORMAT_BC5_UNORM_BLOCK, DX_BC5_UNORM, GL_COMPRESSED_RG_RGTC2, GL_RG, 4, 4, 16, 2, FLAG_ENC_BC)
+    KTX_FORMAT(BC5sn, MyMTLPixelFormatBC5_RGSnorm, VK_FORMAT_BC5_SNORM_BLOCK, DX_BC5_SNORM, GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, 4, 4, 16, 2, FLAG_ENC_BC | FLAG_SIGNED)
 
-    KTX_FORMAT(BC6h, MyMTLPixelFormatBC6H_RGBFloat, VK_FORMAT_BC6H_SFLOAT_BLOCK, GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT_ARB, GL_RGB, 4, 4, 16, 4, FLAG_ENC_BC | FLAG_16F)
-    KTX_FORMAT(BC6uh, MyMTLPixelFormatBC6H_RGBUfloat, VK_FORMAT_BC6H_SFLOAT_BLOCK, GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT_ARB, GL_RGB, 4, 4, 16, 4, FLAG_ENC_BC | FLAG_16F)
+    KTX_FORMAT(BC6h, MyMTLPixelFormatBC6H_RGBFloat, VK_FORMAT_BC6H_SFLOAT_BLOCK, DX_BC6H_SF16, GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT_ARB, GL_RGB, 4, 4, 16, 4, FLAG_ENC_BC | FLAG_16F)
+    KTX_FORMAT(BC6uh, MyMTLPixelFormatBC6H_RGBUfloat, VK_FORMAT_BC6H_SFLOAT_BLOCK, DX_BC6H_UF16, GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT_ARB, GL_RGB, 4, 4, 16, 4, FLAG_ENC_BC | FLAG_16F)
 
-    KTX_FORMAT(BC7, MyMTLPixelFormatBC7_RGBAUnorm, VK_FORMAT_BC7_UNORM_BLOCK, GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_RGBA, 4, 4, 16, 4, FLAG_ENC_BC)
-    KTX_FORMAT(BC7s, MyMTLPixelFormatBC7_RGBAUnorm_sRGB, VK_FORMAT_BC7_SRGB_BLOCK, GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM_ARB, GL_SRGB_ALPHA, 4, 4, 16, 4, FLAG_ENC_BC | FLAG_SRGB)
+    KTX_FORMAT(BC7, MyMTLPixelFormatBC7_RGBAUnorm, VK_FORMAT_BC7_UNORM_BLOCK, DX_BC7_UNORM, GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_RGBA, 4, 4, 16, 4, FLAG_ENC_BC)
+    KTX_FORMAT(BC7s, MyMTLPixelFormatBC7_RGBAUnorm_sRGB, VK_FORMAT_BC7_SRGB_BLOCK, DX_BC7_UNORM_SRGB, GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM_ARB, GL_SRGB_ALPHA, 4, 4, 16, 4, FLAG_ENC_BC | FLAG_SRGB)
 
     // ETC
-    KTX_FORMAT(ETCr, MyMTLPixelFormatEAC_R11Unorm, VK_FORMAT_EAC_R11_UNORM_BLOCK, GL_COMPRESSED_R11_EAC, GL_RED, 4, 4, 8, 1, FLAG_ENC_ETC)
-    KTX_FORMAT(ETCrsn, MyMTLPixelFormatEAC_R11Snorm, VK_FORMAT_EAC_R11_SNORM_BLOCK, GL_COMPRESSED_SIGNED_R11_EAC, GL_RED, 4, 4, 8, 1, FLAG_ENC_ETC | FLAG_SIGNED)
+    KTX_FORMAT(ETCr, MyMTLPixelFormatEAC_R11Unorm, VK_FORMAT_EAC_R11_UNORM_BLOCK, DX_UNKNOWN, GL_COMPRESSED_R11_EAC, GL_RED, 4, 4, 8, 1, FLAG_ENC_ETC)
+    KTX_FORMAT(ETCrsn, MyMTLPixelFormatEAC_R11Snorm, VK_FORMAT_EAC_R11_SNORM_BLOCK, DX_UNKNOWN, GL_COMPRESSED_SIGNED_R11_EAC, GL_RED, 4, 4, 8, 1, FLAG_ENC_ETC | FLAG_SIGNED)
 
-    KTX_FORMAT(ETCrg, MyMTLPixelFormatEAC_RG11Unorm, VK_FORMAT_EAC_R11G11_UNORM_BLOCK, GL_COMPRESSED_RG11_EAC, GL_RG, 4, 4, 16, 2, FLAG_ENC_ETC)
-    KTX_FORMAT(ETCrgsn, MyMTLPixelFormatEAC_RG11Snorm, VK_FORMAT_EAC_R11G11_SNORM_BLOCK, GL_COMPRESSED_SIGNED_RG11_EAC, GL_RG, 4, 4, 16, 2, FLAG_ENC_ETC | FLAG_SIGNED)
+    KTX_FORMAT(ETCrg, MyMTLPixelFormatEAC_RG11Unorm, VK_FORMAT_EAC_R11G11_UNORM_BLOCK, DX_UNKNOWN, GL_COMPRESSED_RG11_EAC, GL_RG, 4, 4, 16, 2, FLAG_ENC_ETC)
+    KTX_FORMAT(ETCrgsn, MyMTLPixelFormatEAC_RG11Snorm, VK_FORMAT_EAC_R11G11_SNORM_BLOCK, DX_UNKNOWN, GL_COMPRESSED_SIGNED_RG11_EAC, GL_RG, 4, 4, 16, 2, FLAG_ENC_ETC | FLAG_SIGNED)
 
-    KTX_FORMAT(ETCrgb, MyMTLPixelFormatETC2_RGB8, VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK, GL_COMPRESSED_RGB8_ETC2, GL_RGB, 4, 4, 8, 3, FLAG_ENC_ETC)
-    KTX_FORMAT(ETCsrgb, MyMTLPixelFormatETC2_RGB8_sRGB, VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK, GL_COMPRESSED_SRGB8_ETC2, GL_SRGB, 4, 4, 8, 3, FLAG_ENC_ETC | FLAG_SRGB)
+    KTX_FORMAT(ETCrgb, MyMTLPixelFormatETC2_RGB8, VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK, DX_UNKNOWN, GL_COMPRESSED_RGB8_ETC2, GL_RGB, 4, 4, 8, 3, FLAG_ENC_ETC)
+    KTX_FORMAT(ETCsrgb, MyMTLPixelFormatETC2_RGB8_sRGB, VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK, DX_UNKNOWN, GL_COMPRESSED_SRGB8_ETC2, GL_SRGB, 4, 4, 8, 3, FLAG_ENC_ETC | FLAG_SRGB)
 
-    KTX_FORMAT(ETCrgba, MyMTLPixelFormatEAC_RGBA8, VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK, GL_COMPRESSED_RGBA8_ETC2_EAC, GL_RGBA, 4, 4, 16, 4, FLAG_ENC_ETC)
-    KTX_FORMAT(ETCsrgba, MyMTLPixelFormatEAC_RGBA8_sRGB, VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK, GL_COMPRESSED_SRGBA8_ETC2_EAC, GL_SRGB_ALPHA, 4, 4, 16, 4, FLAG_ENC_ETC | FLAG_SRGB)
+    KTX_FORMAT(ETCrgba, MyMTLPixelFormatEAC_RGBA8, VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK, DX_UNKNOWN, GL_COMPRESSED_RGBA8_ETC2_EAC, GL_RGBA, 4, 4, 16, 4, FLAG_ENC_ETC)
+    KTX_FORMAT(ETCsrgba, MyMTLPixelFormatEAC_RGBA8_sRGB, VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK, DX_UNKNOWN, GL_COMPRESSED_SRGBA8_ETC2_EAC, GL_SRGB_ALPHA, 4, 4, 16, 4, FLAG_ENC_ETC | FLAG_SRGB)
 
     // ASTC
-    KTX_FORMAT(ASTC4x4, MyMTLPixelFormatASTC_4x4_LDR, VK_FORMAT_ASTC_4x4_UNORM_BLOCK, GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_RGBA, 4, 4, 16, 4, FLAG_ENC_ASTC)
-    KTX_FORMAT(ASTC4x4s, MyMTLPixelFormatASTC_4x4_sRGB, VK_FORMAT_ASTC_4x4_SRGB_BLOCK, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR, GL_SRGB_ALPHA, 4, 4, 16, 4, FLAG_ENC_ASTC | FLAG_SRGB)
-    KTX_FORMAT(ASTC4x4h, MyMTLPixelFormatASTC_4x4_HDR, VK_FORMAT_ASTC_4x4_SFLOAT_BLOCK_EXT, GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_RGBA, 4, 4, 16, 4, FLAG_ENC_ASTC | FLAG_16F)  // gl type same as LDR
+    KTX_FORMAT(ASTC4x4, MyMTLPixelFormatASTC_4x4_LDR, VK_FORMAT_ASTC_4x4_UNORM_BLOCK, DX_UNKNOWN, GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_RGBA, 4, 4, 16, 4, FLAG_ENC_ASTC)
+    KTX_FORMAT(ASTC4x4s, MyMTLPixelFormatASTC_4x4_sRGB, VK_FORMAT_ASTC_4x4_SRGB_BLOCK, DX_UNKNOWN, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR, GL_SRGB_ALPHA, 4, 4, 16, 4, FLAG_ENC_ASTC | FLAG_SRGB)
+    KTX_FORMAT(ASTC4x4h, MyMTLPixelFormatASTC_4x4_HDR, VK_FORMAT_ASTC_4x4_SFLOAT_BLOCK_EXT, DX_UNKNOWN, GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_RGBA, 4, 4, 16, 4, FLAG_ENC_ASTC | FLAG_16F)  // gl type same as LDR
 
-    KTX_FORMAT(ASTC5x5, MyMTLPixelFormatASTC_5x5_LDR, VK_FORMAT_ASTC_5x5_UNORM_BLOCK, GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_RGBA, 5, 5, 16, 4, FLAG_ENC_ASTC)
-    KTX_FORMAT(ASTC5x5s, MyMTLPixelFormatASTC_5x5_sRGB, VK_FORMAT_ASTC_5x5_SRGB_BLOCK, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR, GL_SRGB_ALPHA, 5, 5, 16, 4, FLAG_ENC_ASTC | FLAG_SRGB)
-    KTX_FORMAT(ASTC5x5h, MyMTLPixelFormatASTC_5x5_HDR, VK_FORMAT_ASTC_5x5_SFLOAT_BLOCK_EXT, GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_RGBA, 5, 5, 16, 4, FLAG_ENC_ASTC | FLAG_16F)  // gl type same as LDR
+    KTX_FORMAT(ASTC5x5, MyMTLPixelFormatASTC_5x5_LDR, VK_FORMAT_ASTC_5x5_UNORM_BLOCK, DX_UNKNOWN, GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_RGBA, 5, 5, 16, 4, FLAG_ENC_ASTC)
+    KTX_FORMAT(ASTC5x5s, MyMTLPixelFormatASTC_5x5_sRGB, VK_FORMAT_ASTC_5x5_SRGB_BLOCK, DX_UNKNOWN, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR, GL_SRGB_ALPHA, 5, 5, 16, 4, FLAG_ENC_ASTC | FLAG_SRGB)
+    KTX_FORMAT(ASTC5x5h, MyMTLPixelFormatASTC_5x5_HDR, VK_FORMAT_ASTC_5x5_SFLOAT_BLOCK_EXT, DX_UNKNOWN, GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_RGBA, 5, 5, 16, 4, FLAG_ENC_ASTC | FLAG_16F)  // gl type same as LDR
 
-    KTX_FORMAT(ASTC6x6, MyMTLPixelFormatASTC_6x6_LDR, VK_FORMAT_ASTC_6x6_UNORM_BLOCK, GL_COMPRESSED_RGBA_ASTC_6x6_KHR, GL_RGBA, 6, 6, 16, 4, FLAG_ENC_ASTC)
-    KTX_FORMAT(ASTC6x6s, MyMTLPixelFormatASTC_6x6_sRGB, VK_FORMAT_ASTC_6x6_SRGB_BLOCK, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR, GL_SRGB_ALPHA, 6, 6, 16, 4, FLAG_ENC_ASTC | FLAG_SRGB)
-    KTX_FORMAT(ASTC6x6h, MyMTLPixelFormatASTC_6x6_HDR, VK_FORMAT_ASTC_6x6_SFLOAT_BLOCK_EXT, GL_COMPRESSED_RGBA_ASTC_6x6_KHR, GL_RGBA, 6, 6, 16, 4, FLAG_ENC_ASTC | FLAG_16F)  // gl type same as LDR
+    KTX_FORMAT(ASTC6x6, MyMTLPixelFormatASTC_6x6_LDR, VK_FORMAT_ASTC_6x6_UNORM_BLOCK, DX_UNKNOWN, GL_COMPRESSED_RGBA_ASTC_6x6_KHR, GL_RGBA, 6, 6, 16, 4, FLAG_ENC_ASTC)
+    KTX_FORMAT(ASTC6x6s, MyMTLPixelFormatASTC_6x6_sRGB, VK_FORMAT_ASTC_6x6_SRGB_BLOCK, DX_UNKNOWN, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR, GL_SRGB_ALPHA, 6, 6, 16, 4, FLAG_ENC_ASTC | FLAG_SRGB)
+    KTX_FORMAT(ASTC6x6h, MyMTLPixelFormatASTC_6x6_HDR, VK_FORMAT_ASTC_6x6_SFLOAT_BLOCK_EXT, DX_UNKNOWN, GL_COMPRESSED_RGBA_ASTC_6x6_KHR, GL_RGBA, 6, 6, 16, 4, FLAG_ENC_ASTC | FLAG_16F)  // gl type same as LDR
 
-    KTX_FORMAT(ASTC8x8, MyMTLPixelFormatASTC_8x8_LDR, VK_FORMAT_ASTC_8x8_UNORM_BLOCK, GL_COMPRESSED_RGBA_ASTC_8x8_KHR, GL_RGBA, 8, 8, 16, 4, FLAG_ENC_ASTC)
-    KTX_FORMAT(ASTC8x8s, MyMTLPixelFormatASTC_8x8_sRGB, VK_FORMAT_ASTC_8x8_SRGB_BLOCK, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR, GL_SRGB_ALPHA, 8, 8, 16, 4, FLAG_ENC_ASTC | FLAG_SRGB)
-    KTX_FORMAT(ASTC8x8h, MyMTLPixelFormatASTC_8x8_HDR, VK_FORMAT_ASTC_8x8_SFLOAT_BLOCK_EXT, GL_COMPRESSED_RGBA_ASTC_8x8_KHR, GL_RGBA, 8, 8, 16, 4, FLAG_ENC_ASTC | FLAG_16F)  // gl type same as LDR
+    KTX_FORMAT(ASTC8x8, MyMTLPixelFormatASTC_8x8_LDR, VK_FORMAT_ASTC_8x8_UNORM_BLOCK, DX_UNKNOWN, GL_COMPRESSED_RGBA_ASTC_8x8_KHR, GL_RGBA, 8, 8, 16, 4, FLAG_ENC_ASTC)
+    KTX_FORMAT(ASTC8x8s, MyMTLPixelFormatASTC_8x8_sRGB, VK_FORMAT_ASTC_8x8_SRGB_BLOCK, DX_UNKNOWN, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR, GL_SRGB_ALPHA, 8, 8, 16, 4, FLAG_ENC_ASTC | FLAG_SRGB)
+    KTX_FORMAT(ASTC8x8h, MyMTLPixelFormatASTC_8x8_HDR, VK_FORMAT_ASTC_8x8_SFLOAT_BLOCK_EXT, DX_UNKNOWN, GL_COMPRESSED_RGBA_ASTC_8x8_KHR, GL_RGBA, 8, 8, 16, 4, FLAG_ENC_ASTC | FLAG_16F)  // gl type same as LDR
 
     // Explicit
-    KTX_FORMAT(EXPr8, MyMTLPixelFormatR8Unorm, VK_FORMAT_R8_UNORM, GL_R8, GL_RED, 1, 1, 1, 1, 0)
-    KTX_FORMAT(EXPrg8, MyMTLPixelFormatRG8Unorm, VK_FORMAT_R8G8_UNORM, GL_RG8, GL_RG, 1, 1, 2, 2, 0)
-    KTX_FORMAT(EXPrgba8, MyMTLPixelFormatRGBA8Unorm, VK_FORMAT_R8G8B8A8_UNORM, GL_RGBA8, GL_RGBA, 1, 1, 4, 4, 0)
-    KTX_FORMAT(EXPsrgba8, MyMTLPixelFormatRGBA8Unorm_sRGB, VK_FORMAT_R8G8B8A8_SRGB, GL_SRGB8_ALPHA8, GL_SRGB_ALPHA, 1, 1, 4, 4, FLAG_SRGB)
+    KTX_FORMAT(EXPr8, MyMTLPixelFormatR8Unorm, VK_FORMAT_R8_UNORM, DX_R8_UNORM, GL_R8, GL_RED, 1, 1, 1, 1, 0)
+    KTX_FORMAT(EXPrg8, MyMTLPixelFormatRG8Unorm,  VK_FORMAT_R8G8_UNORM, DX_R8G8_UNORM,GL_RG8, GL_RG, 1, 1, 2, 2, 0)
+    KTX_FORMAT(EXPrgba8, MyMTLPixelFormatRGBA8Unorm, VK_FORMAT_R8G8B8A8_UNORM, DX_R8G8B8A8_UNORM, GL_RGBA8, GL_RGBA, 1, 1, 4, 4, 0)
+    KTX_FORMAT(EXPsrgba8, MyMTLPixelFormatRGBA8Unorm_sRGB, VK_FORMAT_R8G8B8A8_SRGB, DX_R8G8B8A8_UNORM_SRGB, GL_SRGB8_ALPHA8, GL_SRGB_ALPHA, 1, 1, 4, 4, FLAG_SRGB)
 
-    KTX_FORMAT(EXPr16f, MyMTLPixelFormatR16Float, VK_FORMAT_R16_SFLOAT, GL_R16F, GL_RED, 1, 1, 2, 1, FLAG_16F)
-    KTX_FORMAT(EXPrg16f, MyMTLPixelFormatRG16Float, VK_FORMAT_R16G16_SFLOAT, GL_RG16F, GL_RG, 1, 1, 4, 2, FLAG_16F)
-    KTX_FORMAT(EXPrgba16f, MyMTLPixelFormatRGBA16Float, VK_FORMAT_R16G16B16A16_SFLOAT, GL_RGBA16F, GL_RGBA, 1, 1, 8, 4, FLAG_16F)
+    KTX_FORMAT(EXPr16f, MyMTLPixelFormatR16Float, VK_FORMAT_R16_SFLOAT, DX_R16_FLOAT, GL_R16F, GL_RED, 1, 1, 2, 1, FLAG_16F)
+    KTX_FORMAT(EXPrg16f, MyMTLPixelFormatRG16Float, VK_FORMAT_R16G16_SFLOAT, DX_R16G16_FLOAT, GL_RG16F, GL_RG, 1, 1, 4, 2, FLAG_16F)
+    KTX_FORMAT(EXPrgba16f, MyMTLPixelFormatRGBA16Float, VK_FORMAT_R16G16B16A16_SFLOAT, DX_R16G16B16A16_FLOAT, GL_RGBA16F, GL_RGBA, 1, 1, 8, 4, FLAG_16F)
 
-    KTX_FORMAT(EXPr32f, MyMTLPixelFormatR32Float, VK_FORMAT_R32_SFLOAT, GL_R32F, GL_RED, 1, 1, 4, 1, FLAG_32F)
-    KTX_FORMAT(EXPrg32f, MyMTLPixelFormatRG32Float, VK_FORMAT_R32G32_SFLOAT, GL_RG32F, GL_RG, 1, 1, 8, 2, FLAG_32F)
-    KTX_FORMAT(EXPrgba32f, MyMTLPixelFormatRGBA32Float, VK_FORMAT_R32G32B32A32_SFLOAT, GL_RGBA32F, GL_RGBA, 1, 1, 16, 4, FLAG_32F)
+    KTX_FORMAT(EXPr32f, MyMTLPixelFormatR32Float, VK_FORMAT_R32_SFLOAT, DX_R32_FLOAT, GL_R32F, GL_RED, 1, 1, 4, 1, FLAG_32F)
+    KTX_FORMAT(EXPrg32f, MyMTLPixelFormatRG32Float, VK_FORMAT_R32G32_SFLOAT, DX_R32G32_FLOAT, GL_RG32F, GL_RG, 1, 1, 8, 2, FLAG_32F)
+    KTX_FORMAT(EXPrgba32f, MyMTLPixelFormatRGBA32Float, VK_FORMAT_R32G32B32A32_SFLOAT, DX_R32G32B32A32_FLOAT, GL_RGBA32F, GL_RGBA, 1, 1, 16, 4, FLAG_32F)
 
 #if SUPPORT_RGB
     // these are import only formats
-    KTX_FORMAT(EXPrgb8, MyMTLPixelFormatRGB8Unorm_internal, VK_FORMAT_R8G8B8_UNORM, GL_RGB8, GL_RGB, 1, 1, 3, 3, 0)
-    KTX_FORMAT(EXPsrgb8, MyMTLPixelFormatRGB8Unorm_sRGB_internal, VK_FORMAT_R8G8B8_SRGB, GL_SRGB8, GL_SRGB, 1, 1, 3, 3, FLAG_SRGB)
-    KTX_FORMAT(EXPrgb16f, MyMTLPixelFormatRGB16Float_internal, VK_FORMAT_R16G16B16_SFLOAT, GL_RGB16F, GL_RGB, 1, 1, 6, 3, FLAG_16F)
-    KTX_FORMAT(EXPrgb32f, MyMTLPixelFormatRGB32Float_internal, VK_FORMAT_R32G32B32_SFLOAT, GL_RGB32F, GL_RGB, 1, 1, 12, 3, FLAG_32F)
+    // DX only has one of these as a valid type
+    KTX_FORMAT(EXPrgb8, MyMTLPixelFormatRGB8Unorm_internal, VK_FORMAT_R8G8B8_UNORM, DX_UNKNOWN, GL_RGB8, GL_RGB, 1, 1, 3, 3, 0)
+    KTX_FORMAT(EXPsrgb8, MyMTLPixelFormatRGB8Unorm_sRGB_internal, VK_FORMAT_R8G8B8_SRGB, DX_UNKNOWN, GL_SRGB8, GL_SRGB, 1, 1, 3, 3, FLAG_SRGB)
+    KTX_FORMAT(EXPrgb16f, MyMTLPixelFormatRGB16Float_internal, VK_FORMAT_R16G16B16_SFLOAT, DX_UNKNOWN, GL_RGB16F, GL_RGB, 1, 1, 6, 3, FLAG_16F)
+    KTX_FORMAT(EXPrgb32f, MyMTLPixelFormatRGB32Float_internal, VK_FORMAT_R32G32B32_SFLOAT, DX_R32G32B32_FLOAT, GL_RGB32F, GL_RGB, 1, 1, 12, 3, FLAG_32F)
 #endif
 
     gFormatTable = formatTable;
@@ -645,6 +761,19 @@ const char* formatTypeName(MyMTLPixelFormat format)
     return it.formatName;
 }
 
+const char* directxTypeName(MyMTLPixelFormat format)
+{
+    const auto& it = formatInfo(format);
+    return it.directxName;
+}
+
+uint32_t directxType(MyMTLPixelFormat format)
+{
+    const auto& it = formatInfo(format);
+    return it.directxType;
+}
+
+
 const char* vulkanTypeName(MyMTLPixelFormat format)
 {
     const auto& it = formatInfo(format);
@@ -699,6 +828,21 @@ MyMTLPixelFormat vulkanToMetalFormat(uint32_t format)
     return MyMTLPixelFormatInvalid;
 }
 
+MyMTLPixelFormat directxToMetalFormat(uint32_t format)
+{
+    initFormatsIfNeeded();
+
+    for (const auto& it : *gFormatTable) {
+        // many formats are missing (Astc/Etc and rgb formats), so only use for limited load/save
+        const KTXFormatInfo& info = it.second;
+        if (info.directxType == format) {
+            return (MyMTLPixelFormat)info.metalType;
+        }
+    }
+
+    return MyMTLPixelFormatInvalid;
+}
+
 MyMTLPixelFormat toggleSrgbFormat(MyMTLPixelFormat format)
 {
     switch (format) {
@@ -1623,6 +1767,11 @@ vector<uint8_t>& KTXImage::imageData()
     return _imageData;
 }
 
+const vector<uint8_t>& KTXImage::imageData() const
+{
+    return _imageData;
+}
+
 void KTXImage::reserveImageData()
 {
     int32_t numChunks = totalChunks();
diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h
index a0b4ca75..7312b676 100644
--- a/libkram/kram/KTXImage.h
+++ b/libkram/kram/KTXImage.h
@@ -300,6 +300,7 @@ class KTXImage {
     void reserveImageData();
     void reserveImageData(size_t totalSize);
     vector<uint8_t>& imageData();
+    const vector<uint8_t>& imageData() const;
 
     // for KTX2 files, the mips can be compressed using various encoders
     bool isSupercompressed() const { return isKTX2() && mipLevels[0].lengthCompressed != 0; }
@@ -432,8 +433,13 @@ uint32_t numChannelsOfFormat(MyMTLPixelFormat format);
 const char* formatTypeName(MyMTLPixelFormat format);
 
 // metal
-uint32_t metalType(MyMTLPixelFormat format);  // really MTLPixelFormat
 const char* metalTypeName(MyMTLPixelFormat format);
+uint32_t metalType(MyMTLPixelFormat format);  // really MTLPixelFormat
+
+// directx
+const char* directxTypeName(MyMTLPixelFormat format);
+uint32_t directxType(MyMTLPixelFormat format);           // really DXFormat
+MyMTLPixelFormat directxToMetalFormat(uint32_t format);  // really DXFormat
 
 // vuklan
 const char* vulkanTypeName(MyMTLPixelFormat format);
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 763cee31..2a7f664c 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -16,6 +16,7 @@
 //#include <vector>
 
 #include "KTXImage.h"
+#include "KramDDSHelper.h"
 #include "KramFileHelper.h"
 #include "KramImage.h"  // has config defines, move them out
 #include "KramMmapHelper.h"
@@ -57,13 +58,43 @@ void releaseVector(vector<T>& v)
     v.shrink_to_fit();
 }
 
+bool isKTXFilename(const char* filename)
+{
+    // should really lookg at first 4 bytes of data
+    return endsWithExtension(filename, ".ktx");
+}
+bool isKTX2Filename(const char* filename)
+{
+    // should really lookg at first 4 bytes of data
+    return endsWithExtension(filename, ".ktx2");
+}
+bool isDDSFilename(const char* filename)
+{
+    // should really lookg at first 4 bytes of data
+    return endsWithExtension(filename, ".dds");
+}
 bool isPNGFilename(const char* filename)
 {
     // should really lookg at first 4 bytes of data
     return endsWithExtension(filename, ".png");
 }
 
-bool isPNGFilename(const uint8_t* data, size_t dataSize)
+static bool isDDSFilename(const uint8_t* data, size_t dataSize)
+{
+    // read the 4 chars at the beginning of the file
+    const uint32_t numChars = 4;
+    if (dataSize < numChars)
+        return false;
+
+    const uint8_t kDdsSignature[numChars] = {'D', 'D', 'S', ' '};
+    if (memcmp(data, kDdsSignature, sizeof(kDdsSignature)) != 0) {
+        return false;
+    }
+
+    return true;
+}
+
+static bool isPNGFilename(const uint8_t* data, size_t dataSize)
 {
     // read the 4 chars at the beginning of the file
     const uint32_t numChars = 8;
@@ -135,8 +166,16 @@ bool KTXImageData::open(const char* filename, KTXImage& image)
         dataSize = fileData.size();
     }
 
-    // read the KTXImage in from the data, it will alias mmap or fileData
-    bool isLoaded = image.open(data, dataSize, isInfoOnly);
+    bool isLoaded = true;
+
+    if (isDDSFilename(data, dataSize)) {
+        DDSHelper ddsHelper;
+        isLoaded = ddsHelper.load(data, dataSize, image);
+    }
+    else {
+        // read the KTXImage in from the data, it will alias mmap or fileData
+        isLoaded = image.open(data, dataSize, isInfoOnly);
+    }
 
     // this means KTXImage is using it's own storage
     if (!isLoaded || image.fileData != data) {
@@ -221,12 +260,16 @@ bool KTXImageData::openPNG(const uint8_t* data, size_t dataSize, KTXImage& image
     image.height = singleImage.height();
     image.depth = 0;
 
+    image.header.pixelWidth = image.width;
+    image.header.pixelHeight = image.height;
+    image.header.pixelDepth = image.depth;
     image.header.numberOfArrayElements = 0;
     image.header.numberOfMipmapLevels = 1;
+
     image.textureType = MyMTLTextureType2D;
     image.pixelFormat = /*isSrgb ? MyMTLPixelFormatRGBA8Unorm_sRGB : */ MyMTLPixelFormatRGBA8Unorm;
 
-    // TODO: support mips with blitEncoder but tha confuses mipCount in KTXImage
+    // TODO: support mips with blitEncoder but that confuses mipCount in KTXImage
     //     Mipper can also generate on cpu side.  Mipped can do premul conversion though.
 
     // TODO: support chunks and striped png, but may need to copy horizontal to vertical
@@ -247,8 +290,15 @@ bool KTXImageData::open(const uint8_t* data, size_t dataSize, KTXImage& image)
     close();
 
     if (isPNGFilename(data, dataSize)) {
+        // data stored in image
         return openPNG(data, dataSize, image);
     }
+    else if (isDDSFilename(data, dataSize)) {
+        // converts dds to ktx, data stored in image
+        // Note: unlike png, this data may already be block encoded
+        DDSHelper ddsHelper;
+        return ddsHelper.load(data, dataSize, image);
+    }
 
     // image will likely alias incoming data, so KTXImageData is unused
 
@@ -458,10 +508,11 @@ bool SetupTmpFile(FileHelper& tmpFileHelper, const char* suffix)
 bool SetupSourceImage(const string& srcFilename, Image& sourceImage,
                       bool isPremulSrgb = false, bool isGray = false)
 {
-    bool isKTX = endsWith(srcFilename, ".ktx") || endsWith(srcFilename, ".ktx2");
-    bool isPNG = endsWith(srcFilename, ".png");
+    bool isKTX = isKTXFilename(srcFilename);
+    bool isKTX2 = isKTX2Filename(srcFilename);
+    bool isPNG = isPNGFilename(srcFilename);
 
-    if (!(isKTX || isPNG)) {
+    if (!(isKTX || isKTX2 || isPNG)) {
         KLOGE("Kram", "File input \"%s\" isn't a png, ktx, ktx2 file.\n",
               srcFilename.c_str());
         return false;
@@ -1455,11 +1506,13 @@ static int32_t kramAppInfo(vector<const char*>& args)
         error = true;
     }
 
-    bool isPNG = endsWith(srcFilename, ".png");
-    bool isKTX = endsWith(srcFilename, ".ktx") || endsWith(srcFilename, ".ktx2");
+    bool isPNG = isPNGFilename(srcFilename);
+    bool isKTX = isKTXFilename(srcFilename);
+    bool isKTX2 = isKTX2Filename(srcFilename);
+    bool isDDS = isDDSFilename(srcFilename);
 
-    if (!(isPNG || isKTX)) {
-        KLOGE("Kram", "info only supports png, ktx, ktx2 inputs");
+    if (!(isPNG || isKTX || isKTX2 || isDDS)) {
+        KLOGE("Kram", "info only supports png, ktx, ktx2, dds inputs");
         error = true;
     }
 
@@ -1494,12 +1547,14 @@ static int32_t kramAppInfo(vector<const char*>& args)
 // this is the main chunk of info generation, can be called without writing result to stdio
 string kramInfoToString(const string& srcFilename, bool isVerbose)
 {
-    bool isPNG = endsWith(srcFilename, ".png");
-    bool isKTX = endsWith(srcFilename, ".ktx") || endsWith(srcFilename, ".ktx2");
+    bool isPNG = isPNGFilename(srcFilename);
+    bool isKTX = isKTXFilename(srcFilename);
+    bool isKTX2 = isKTX2Filename(srcFilename);
+    bool isDDS = isDDSFilename(srcFilename);
 
     string info;
 
-    // handle png and ktx
+    // handle png and ktx and dds
     if (isPNG) {
         MmapHelper srcMmapHelper;
         vector<uint8_t> srcFileBuffer;
@@ -1547,7 +1602,7 @@ string kramInfoToString(const string& srcFilename, bool isVerbose)
 
         info = kramInfoPNGToString(srcFilename, data, dataSize, isVerbose);
     }
-    else if (isKTX) {
+    else if (isKTX || isKTX2 || isDDS) {
         KTXImage srcImage;
         KTXImageData srcImageData;
 
@@ -1935,17 +1990,20 @@ static int32_t kramAppDecode(vector<const char*>& args)
         error = true;
     }
 
-    bool isKTX = endsWith(srcFilename, ".ktx") || endsWith(srcFilename, ".ktx2");
+    bool isKTX = isKTXFilename(srcFilename);
+    bool isKTX2 = isKTX2Filename(srcFilename);
+    bool isDDS = isDDSFilename(srcFilename);
 
-    if (!isKTX) {
-        KLOGE("Kram", "decode only supports ktx and ktx2 input");
+    if (!(isKTX || isKTX2 || isDDS)) {
+        KLOGE("Kram", "decode only supports ktx, ktx2, dds input");
         error = true;
     }
 
-    bool isDstKTX = endsWith(dstFilename, ".ktx");
-    //bool isDstKTX2 = endsWith(dstFilename, ".ktx2");
+    bool isDstKTX = isKTXFilename(dstFilename);
+    //bool isDstKTX2 = isKTX2Filename(dstFilename); // TODO:
+    //bool isDstDDS = isDDSFilename(dstFilename); // TODO:
 
-    if (!(isDstKTX)) {  //} || isDstKTX2)) {
+    if (!(isDstKTX)) {
         KLOGE("Kram", "decode only supports ktx output");
         error = true;
     }
@@ -1955,6 +2013,12 @@ static int32_t kramAppDecode(vector<const char*>& args)
         return -1;
     }
 
+    const char* dstExt = ".ktx";
+    //    if (isDstKTX2)
+    //        dstExt = ".ktx2";
+    //    if (isDstDDS)
+    //        dstExt = ".dds";
+
     KTXImage srcImage;
     KTXImageData srcImageData;
     FileHelper tmpFileHelper;
@@ -1965,11 +2029,11 @@ static int32_t kramAppDecode(vector<const char*>& args)
 
     // TODO: for hdr decode, may need to walk blocks or ask caller to pass -hdr flag
     if (!validateFormatAndDecoder(srcImage.textureType, srcImage.pixelFormat, textureDecoder)) {
-        KLOGE("Kram", "format decode only supports ktx and ktx2 output");
+        KLOGE("Kram", "format decode only supports ktx output");
         return -1;
     }
 
-    success = SetupTmpFile(tmpFileHelper, /* isDstKTX2 ? ".ktx2" : */ ".ktx");
+    success = SetupTmpFile(tmpFileHelper, dstExt);
     if (!success)
         return -1;
 
@@ -2380,20 +2444,24 @@ static int32_t kramAppEncode(vector<const char*>& args)
         error = true;
     }
 
-    bool isKTX = endsWith(srcFilename, ".ktx") || endsWith(srcFilename, ".ktx2");
-    bool isPNG = endsWith(srcFilename, ".png");
+    // allow input
+    bool isDDS = isDDSFilename(srcFilename);
+    bool isKTX = isKTXFilename(srcFilename);
+    bool isKTX2 = isKTX2Filename(srcFilename);
+    bool isPNG = isPNGFilename(srcFilename);
 
-    if (!(isPNG || isKTX)) {
-        KLOGE("Kram", "encode only supports png, ktx, ktx2 input");
+    if (!(isPNG || isKTX || isKTX2 || isDDS)) {
+        KLOGE("Kram", "encode only supports png, ktx, ktx2, dds input");
         error = true;
     }
 
-    // allow ktx and ktx2 output
-    bool isDstKTX = endsWith(dstFilename, ".ktx");
-    bool isDstKTX2 = endsWith(dstFilename, ".ktx2");
+    // allow output
+    bool isDstDDS = isDDSFilename(dstFilename);
+    bool isDstKTX = isKTXFilename(dstFilename);
+    bool isDstKTX2 = isKTX2Filename(dstFilename);
 
-    if (!(isDstKTX || isDstKTX2)) {
-        KLOGE("Kram", "encode only supports ktx and ktx2 output");
+    if (!(isDstKTX || isDstKTX2 || isDstDDS)) {
+        KLOGE("Kram", "encode only supports ktx, ktx2, dds output");
         error = true;
     }
 
@@ -2402,9 +2470,15 @@ static int32_t kramAppEncode(vector<const char*>& args)
         return -1;
     }
 
+    const char* dstExt = ".ktx";
+    if (isDstKTX2)
+        dstExt = ".ktx2";
+    else if (isDstDDS)
+        dstExt = ".dds";
+
     infoArgs.isKTX2 = isDstKTX2;
 
-    // Any new settings just go into this struct which is passed into enoder
+    // Any new settings just go into this struct which is passed into encoder
     ImageInfo info;
     info.initWithArgs(infoArgs);
 
@@ -2412,28 +2486,78 @@ static int32_t kramAppEncode(vector<const char*>& args)
     // The helper keeps ktx mips in mmap alive in case want to read them
     // incrementally. Fallback to read into fileBuffer if mmap fails.
     Image srcImage;
+    KTXImage srcImageKTX;
+
     FileHelper tmpFileHelper;
 
-    bool success = SetupSourceImage(srcFilename, srcImage, isPremulRgb, isGray);
+    bool canEncodeInput = true;
+    bool success = true;
+    if (isDDS) {
+        // Note: this is type KTXImage, not Image.
+
+        KTXImageData srcImageData;
+        success = SetupSourceKTX(srcImageData, srcFilename, srcImageKTX);
+
+        if (success) {
+            if (isBlockFormat(srcImageKTX.pixelFormat)) {
+                // can only export to dds/ktx (KTX2 would need to supercompress in Image encode path)
+                canEncodeInput = false;
+
+                if (isDstKTX2) {
+                    KLOGE("Kram", "encode can only export dds import to ktx, dds");
+                    success = false;
+                }
+            }
+            else {
+                success = srcImage.loadImageFromKTX(srcImageKTX);
+            }
+        }
+    }
+    else {
+        success = SetupSourceImage(srcFilename, srcImage, isPremulRgb, isGray);
+    }
 
     if (success) {
-        success = SetupTmpFile(tmpFileHelper, isDstKTX2 ? ".ktx2" : ".ktx");
+        success = SetupTmpFile(tmpFileHelper, dstExt);
 
         if (!success) {
             KLOGE("Kram", "encode couldn't generate tmp file for output");
         }
     }
 
+    if (success && !canEncodeInput) {
+        // write the image out with mips to the file (no encode is done)
+
+        // Allow DDS -> DDS?
+        if (isDstDDS) {
+            DDSHelper ddsHelper;
+            success = ddsHelper.save(srcImageKTX, tmpFileHelper);
+        }
+        else {
+            // TODO: write out KTXImage data/mips as a KTX file
+            success = false;
+        }
+
+        // rename to dest filepath, note this only occurs if above succeeded
+        // so any existing files are left alone on failure.
+        if (success) {
+            success = tmpFileHelper.copyTemporaryFileTo(dstFilename.c_str());
+
+            if (!success) {
+                KLOGE("Kram", "rename of temp file failed");
+            }
+        }
+    }
+
     // so now can complete validation knowing hdr vs. ldr input
     // this checks the dst format
-    if (success) {
+    else if (success) {
         bool isHDR = !srcImage.pixelsFloat().empty();
 
         if (isHDR) {
             MyMTLPixelFormat format = info.pixelFormat;
 
-            // astcecnc is only hdr encoder currently and explicit output to
-            // 16f/32f mips.
+            // astcecnc and bcenc are only hdr encoder with explicit input from 16f/32f mips.
             if (!isFloatFormat(format)) {
                 KLOGE("Kram", "only explicit and encoded float formats for hdr");
                 return -1;
@@ -2456,13 +2580,7 @@ static int32_t kramAppEncode(vector<const char*>& args)
                 }
             }
 
-            // TODO: find an encoder for BC6
-            //            bool isBC = format == MyMTLPixelFormatBC6H_RGBFloat ||
-            //                        format == MyMTLPixelFormatBC6H_RGBUfloat;
-            //            if (isBC) {
-            //                KLOGE("Kram", "don't have a bc6 encoder");
-            //                return -1;
-            //            }
+            // TODO: test bc6h and bcenc?
 
             // allows explicit output
         }
@@ -2486,10 +2604,29 @@ static int32_t kramAppEncode(vector<const char*>& args)
 
         if (success) {
             KramEncoder encoder;
-            success = encoder.encode(info, srcImage, tmpFileHelper.pointer());
 
-            if (!success) {
-                KLOGE("Kram", "encode failed");
+            if (isDstDDS) {
+                // encode to ktx
+                KTXImage dstImage;
+                success = encoder.encode(info, srcImage, dstImage);
+                if (!success) {
+                    KLOGE("Kram", "encode failed");
+                }
+
+                // save as dds
+                if (success) {
+                    DDSHelper ddsHelper;
+                    success = ddsHelper.save(dstImage, tmpFileHelper);
+                    if (!success) {
+                        KLOGE("Kram", "encode dds convert failed");
+                    }
+                }
+            }
+            else {
+                success = encoder.encode(info, srcImage, tmpFileHelper.pointer());
+                if (!success) {
+                    KLOGE("Kram", "encode failed");
+                }
             }
         }
 
diff --git a/libkram/kram/Kram.h b/libkram/kram/Kram.h
index dfff94a2..ee6326db 100644
--- a/libkram/kram/Kram.h
+++ b/libkram/kram/Kram.h
@@ -48,8 +48,16 @@ class KTXImageData {
     bool isInfoOnly = true;
 };
 
+bool isKTXFilename(const char* filename);
+bool isKTX2Filename(const char* filename);
+bool isDDSFilename(const char* filename);
 bool isPNGFilename(const char* filename);
 
+inline bool isKTXFilename(const string& filename) { return isKTXFilename(filename.c_str()); }
+inline bool isKTX2Filename(const string& filename) { return isKTX2Filename(filename.c_str()); }
+inline bool isDDSFilename(const string& filename) { return isDDSFilename(filename.c_str()); }
+inline bool isPNGFilename(const string& filename) { return isPNGFilename(filename.c_str()); }
+
 // helpers to source from a png or single level of a ktx
 bool LoadKtx(const uint8_t* data, size_t dataSize, Image& sourceImage);
 bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulSrgb, bool isGray, Image& sourceImage);
diff --git a/libkram/kram/KramDDSHelper.cpp b/libkram/kram/KramDDSHelper.cpp
new file mode 100644
index 00000000..3ca1f5cf
--- /dev/null
+++ b/libkram/kram/KramDDSHelper.cpp
@@ -0,0 +1,325 @@
+// kram - Copyright 2020 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
+
+#include "KramDDSHelper.h"
+
+#include "KTXImage.h"
+#include "KramFileHelper.h"
+
+namespace kram {
+using namespace NAMESPACE_STL;
+
+const uint32_t DDS_MAGIC = 0x20534444; // "DDS "
+
+#define MAKEFOURCC(ch0, ch1, ch2, ch3)                              \
+    ((uint32_t)(uint8_t)(ch0) | ((uint32_t)(uint8_t)(ch1) << 8) |       \
+    ((uint32_t)(uint8_t)(ch2) << 16) | ((uint32_t)(uint8_t)(ch3) << 24 ))
+
+enum DDS_FLAGS : uint32_t
+{
+    
+    DDSD_HEIGHT = 0x00000002,
+    DDSD_DEPTH  = 0x00800000,
+
+    DDSD_WIDTH = 0x00000004,
+    DDSD_LINEARSIZE = 0x00080000,
+    DDSD_PITCH = 0x00000008,
+
+    DDSD_CAPS        = 0x00000001,
+    DDSD_PIXELFORMAT = 0x00001000,
+    DDSD_MIPMAPCOUNT = 0x00020000,
+
+    // ddspf
+    DDSPF_ALPHAPIXELS = 0x00000001,
+    DDSPF_FOURCC =      0x00000004,
+    DDSPF_RGB =         0x00000040,
+    //DDSPF_LUMINANCE =   0x00020000,
+    //DDSPF_ALPHA =       0x00000002,
+    //DDSPF_BUMPDUDV =    0x00080000,
+
+    // caps
+    DDSCAPS_TEXTURE = 0x00001000,
+    DDSCAPS_MIPMAP  = 0x00400000,
+    DDSCAPS_COMPLEX = 0x00000008,
+    
+    // caps2
+    DDSCAPS2_VOLUME = 0x200000,
+    DDSCAPS2_CUBEMAP_ALLFACES = 0x0000FA00, // DDSCAPS2_CUBEMAP | all faces
+    DDSCAPS2_CUBEMAP = 0x00000200, // DDSCAPS2_CUBEMAP
+    
+    DDS_RESOURCE_MISC_TEXTURECUBE = 0x4,
+    
+    // resourceDimension
+    DDS_DIMENSION_TEXTURE1D = 2,
+    DDS_DIMENSION_TEXTURE2D = 3,
+    DDS_DIMENSION_TEXTURE3D = 4,
+};
+
+struct DDS_PIXELFORMAT
+{
+    uint32_t    size;
+    uint32_t    flags;
+    uint32_t    fourCC;
+    uint32_t    RGBBitCount;
+    uint32_t    RBitMask;
+    uint32_t    GBitMask;
+    uint32_t    BBitMask;
+    uint32_t    ABitMask;
+};
+
+struct DDS_HEADER
+{
+    uint32_t        size;
+    uint32_t        flags;
+    uint32_t        height;
+    uint32_t        width;
+    uint32_t        pitchOrLinearSize;
+    uint32_t        depth; // only if DDS_HEADER_FLAGS_VOLUME is set in flags
+    uint32_t        mipMapCount;
+    uint32_t        reserved1[11];
+    DDS_PIXELFORMAT ddspf;
+    uint32_t        caps;
+    uint32_t        caps2;
+    uint32_t        caps3;
+    uint32_t        caps4;
+    uint32_t        reserved2;
+};
+
+struct DDS_HEADER_DXT10
+{
+    uint32_t /*DXGI_FORMAT*/     dxgiFormat;
+    uint32_t        resourceDimension;
+    uint32_t        miscFlag; // see D3D11_RESOURCE_MISC_FLAG
+    uint32_t        arraySize;
+    uint32_t        miscFlags2;
+};
+
+bool DDSHelper::load(const uint8_t* data, size_t dataSize, KTXImage& image)
+{
+    uint32_t magic = *(uint32_t*)data;
+    const uint32_t magicSize = sizeof(uint32_t);
+    const DDS_HEADER& hdr = *(const DDS_HEADER*)(data + magicSize);
+    const DDS_HEADER_DXT10& hdr10 = *(const DDS_HEADER_DXT10*)(data + magicSize + sizeof(DDS_HEADER));
+    const DDS_PIXELFORMAT& format = hdr.ddspf;
+    uint32_t mipDataOffset = magicSize + sizeof(DDS_HEADER) + sizeof(DDS_HEADER_DXT10);
+    
+    if (magic != DDS_MAGIC) {
+        return false;
+    }
+
+    // only load DX10 formatted DDS for now
+    if (hdr.size != sizeof(DDS_HEADER) || format.size != sizeof(DDS_PIXELFORMAT)) {
+        return false;
+    }
+    
+    // Kram only supports a subset of DDS formats
+    auto pixelFormat = directxToMetalFormat(hdr10.dxgiFormat);
+    if (pixelFormat == 0) {
+        return false;
+    }
+    
+    // make sure to copy mips/slices from DDS array-ordered to mip-ordered for KTX
+    image.width = hdr.width;
+    image.height = hdr.height;
+    image.depth = (hdr.flags & DDSD_DEPTH) ? hdr.depth : 1;
+    
+    uint32_t mipCount = (hdr.flags & DDSD_MIPMAPCOUNT) ? hdr.mipMapCount : 1;
+    
+    auto& ktxHdr = image.header;
+    ktxHdr.pixelWidth  = image.width;
+    ktxHdr.pixelHeight = image.height;
+    ktxHdr.pixelDepth  = image.depth;
+    
+    ktxHdr.initFormatGL(pixelFormat);
+    
+    bool isCube = (hdr10.miscFlag & DDS_RESOURCE_MISC_TEXTURECUBE);
+    bool isArray = hdr10.arraySize > 1;
+    
+    ktxHdr.numberOfFaces = isCube ? 6 : 1;
+    ktxHdr.numberOfMipmapLevels = mipCount;
+    ktxHdr.numberOfArrayElements = hdr10.arraySize;
+    
+    switch(hdr10.resourceDimension) {
+        case DDS_DIMENSION_TEXTURE1D:
+            image.textureType = MyMTLTextureType1DArray;
+            break;
+        case DDS_DIMENSION_TEXTURE2D:
+            if (isCube) {
+                image.textureType = isArray ? MyMTLTextureTypeCubeArray : MyMTLTextureTypeCube;
+            }
+            else {
+                image.textureType = isArray ? MyMTLTextureType2DArray : MyMTLTextureType2D;
+            }
+            break;
+        case DDS_DIMENSION_TEXTURE3D:
+            image.textureType = MyMTLTextureType3D;
+            break;
+    }
+
+    image.pixelFormat = pixelFormat;
+    
+    // allocate data
+    image.initMipLevels(mipDataOffset);
+    image.reserveImageData();
+    
+    uint8_t* dstImageData = image.imageData().data();
+    const uint8_t* srcImageData = data + mipDataOffset;
+    
+    size_t srcOffset = 0;
+    for (uint32_t chunkNum = 0; chunkNum < image.totalChunks(); ++chunkNum) {
+        for (uint32_t mipNum = 0; mipNum < image.mipCount(); ++mipNum) {
+            // memcpy from src to dst
+            size_t dstOffset = image.chunkOffset(mipNum, chunkNum);
+            size_t mipLength = image.mipLevels[mipNum].length;
+    
+            memcpy(dstImageData + dstOffset, srcImageData + srcOffset, mipLength);
+            
+            srcOffset += mipLength;
+        }
+    }
+    
+    // Now have a valid KTX or KTX2 file from the DDS
+    
+    return true;
+}
+
+bool DDSHelper::save(const KTXImage& image, FileHelper& fileHelper)
+{
+    // Need to order mips/slices/arrays according to DDS convention for saving.
+    // This ordering is different than KTX/KTX2 ordering.  Mips cannot
+    // be compressed when writing to DDS, so KTX conversion is simpler.
+    if (image.isSupercompressed())
+        return false;
+    
+    // Can only write out if matching format in DDS
+    if (directxType(image.pixelFormat) == 0)
+        return false;
+    
+    // https://docs.microsoft.com/en-us/windows/win32/direct3ddds/dds-header
+    
+    // lots of headers, this is newer dx10 style dds
+    DDS_HEADER hdr = {};
+    DDS_PIXELFORMAT& format = hdr.ddspf;
+    DDS_HEADER_DXT10 hdr10 = {};
+    
+    hdr.size = sizeof(DDS_HEADER);
+    format.size = sizeof(DDS_PIXELFORMAT);
+    
+    hdr.width = image.width;
+    hdr.height = image.height;
+    hdr.depth = image.depth;
+    
+    hdr.mipMapCount = image.mipCount();
+    
+    hdr.caps |= DDSCAPS_TEXTURE;
+    if (image.mipCount() > 1) {
+        hdr.caps |= DDSCAPS_MIPMAP;
+        hdr.flags |= DDSD_MIPMAPCOUNT;
+    }
+    // indicate this is newer dds file with pixelFormat
+    format.fourCC = MAKEFOURCC('D', 'X', '1', '0');
+    
+    hdr.flags |= DDSD_CAPS | DDSD_WIDTH | DDSD_HEIGHT | DDSD_PIXELFORMAT;
+    
+    if (hdr.depth > 1)
+        hdr.flags |= DDSD_DEPTH;
+    
+    if (isBlockFormat(image.pixelFormat)) {
+        hdr.flags |= DDSD_LINEARSIZE;
+        
+        // This is assuming BC 4x4 blocks
+        hdr.pitchOrLinearSize = image.blockDims().x * blockSizeOfFormat(image.pixelFormat);
+    }
+    else {
+        hdr.flags |= DDSD_PITCH;
+        hdr.pitchOrLinearSize = image.blockDims().x * blockSizeOfFormat(image.pixelFormat);
+    }
+    
+    hdr10.arraySize = image.arrayCount();
+    hdr10.dxgiFormat = directxType(image.pixelFormat);
+    
+    switch (image.textureType) {
+        case MyMTLTextureType1DArray:
+            hdr.caps |= DDSCAPS_COMPLEX;
+            
+            hdr10.resourceDimension = DDS_DIMENSION_TEXTURE1D;
+            break;
+        case MyMTLTextureTypeCube:
+        case MyMTLTextureTypeCubeArray:
+            hdr.caps |= DDSCAPS_COMPLEX;
+            hdr.caps2 = DDSCAPS2_CUBEMAP | DDSCAPS2_CUBEMAP_ALLFACES;
+            
+            hdr10.miscFlag = DDS_RESOURCE_MISC_TEXTURECUBE;
+            hdr10.resourceDimension = DDS_DIMENSION_TEXTURE2D;
+            break;
+        case MyMTLTextureType2D:
+            if (image.mipCount() > 1)
+                hdr.caps |= DDSCAPS_COMPLEX;
+            hdr10.resourceDimension = DDS_DIMENSION_TEXTURE2D;
+            break;
+        case MyMTLTextureType2DArray:
+            hdr.caps |= DDSCAPS_COMPLEX;
+            hdr10.resourceDimension = DDS_DIMENSION_TEXTURE2D;
+            break;
+        case MyMTLTextureType3D:
+            hdr.caps |= DDSCAPS_COMPLEX;
+            hdr.caps2 = DDSCAPS2_VOLUME;
+            
+            hdr10.resourceDimension = DDS_DIMENSION_TEXTURE3D;
+            break;
+    }
+    
+    // fill out in the format fields
+    if (!isBlockFormat(image.pixelFormat)) {
+        if (isColorFormat(image.pixelFormat)) {
+            bool hasG = numChannelsOfFormat(image.pixelFormat) >= 2;
+            bool hasB = numChannelsOfFormat(image.pixelFormat) >= 3;
+            
+            format.flags |= DDSPF_RGB;
+            // supposed to include alpha bits too
+            format.RGBBitCount = blockSizeOfFormat(image.pixelFormat) * 8;
+            format.RBitMask = 0x000000ff;
+            format.GBitMask = hasG ? 0x0000ff00 : 0;
+            format.BBitMask = hasB ? 0x00ff0000 : 0;
+        }
+        if (isAlphaFormat(image.pixelFormat)) {
+            format.flags |= DDSPF_ALPHAPIXELS;
+            format.ABitMask = 0xff000000;
+        }
+    }
+    
+    bool success = true;
+    
+    success = success && fileHelper.write((const uint8_t*)&DDS_MAGIC, sizeof(DDS_MAGIC));
+    success = success && fileHelper.write((const uint8_t*)&hdr, sizeof(hdr));
+    success = success && fileHelper.write((const uint8_t*)&hdr10, sizeof(hdr10));
+    
+    if (success) {
+        // Now write the mip data out in the order dds expects
+        // Ugh, dds stores each array item mips, then the next array item mips.
+        const uint8_t* imageData = image.imageData().data();
+        for (uint32_t chunkNum = 0; chunkNum < image.totalChunks(); ++chunkNum) {
+            
+            for (uint32_t mipNum = 0; mipNum < image.mipCount(); ++mipNum) {
+                size_t offset = image.chunkOffset(mipNum, chunkNum);
+                size_t mipLength = image.mipLevels[mipNum].length;
+                
+                success = fileHelper.write(imageData + offset, mipLength);
+                if (!success) {
+                    break;
+                }
+            }
+            
+            if (!success) {
+                break;
+            }
+        }
+    }
+    
+    return success;
+}
+
+
+
+}  // namespace kram
diff --git a/libkram/kram/KramDDSHelper.h b/libkram/kram/KramDDSHelper.h
new file mode 100644
index 00000000..eaf2397c
--- /dev/null
+++ b/libkram/kram/KramDDSHelper.h
@@ -0,0 +1,34 @@
+// kram - Copyright 2020 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
+
+#pragma once
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "KramConfig.h"
+
+namespace kram {
+using namespace NAMESPACE_STL;
+
+class KTXImage;
+class FileHelper;
+
+// Help read/write dds files.
+// No ASTC or ETC constants, DDS is really only a transport for uncompressed or BC content.
+// and isn't as universal as KTX nor can mips be individually supercompressed.
+// It also has two variants (DX9 and DX10 era even though Microsoft is on DX12 now).
+// Often tools only support the old DX9 era DDS without an explicit DXGI format or array support.
+// macOS 11 now supports some DDS formats in Preview.
+//
+// I'd hoped to avoid this container, but many tools only read/write DDS which stinks.
+// I couldn't find any ktx <-> dds converters or they often decompress
+// BC-compressed blocks before conversion.
+class DDSHelper {
+public:
+    bool load(const uint8_t* data, size_t dataSize, KTXImage& image);
+    bool save(const KTXImage& image, FileHelper& fileHelper);
+};
+
+}  // namespace kram
diff --git a/libkram/kram/KramFileHelper.h b/libkram/kram/KramFileHelper.h
index e10f6aa6..46887702 100644
--- a/libkram/kram/KramFileHelper.h
+++ b/libkram/kram/KramFileHelper.h
@@ -21,7 +21,7 @@ class FileHelper {
 public:
     ~FileHelper();
 
-    bool exists(const char* filenae) const;
+    bool exists(const char* filename) const;
 
     bool open(const char* filename, const char* access);
 

From 1884604930664eb27eba46ec533ff38d54f80f94 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 27 Jan 2022 21:18:00 -0800
Subject: [PATCH 204/901] kram - added saveKTX1 call

This allows DDS to load, and then save the file back out as KTX1.  I didn't add saveKTX2 path yet.
---
 libkram/kram/KTXImage.cpp  |  2 +-
 libkram/kram/KTXImage.h    |  2 +-
 libkram/kram/Kram.cpp      | 12 ++++---
 libkram/kram/KramImage.cpp | 68 ++++++++++++++++++++++++++++++++++++--
 libkram/kram/KramImage.h   |  5 ++-
 5 files changed, 78 insertions(+), 11 deletions(-)

diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index bc34ff61..c9b45f91 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -1270,7 +1270,7 @@ bool KTXImage::isPremul() const
     return false;
 }
 
-void KTXImage::toPropsData(vector<uint8_t>& propsData)
+void KTXImage::toPropsData(vector<uint8_t>& propsData) const
 {
     for (const auto& prop : props) {
         uint32_t size = uint32_t(prop.first.length() + 1 +
diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h
index 7312b676..ce155846 100644
--- a/libkram/kram/KTXImage.h
+++ b/libkram/kram/KTXImage.h
@@ -278,7 +278,7 @@ class KTXImage {
     bool validateMipLevels() const;
 
     // props handling
-    void toPropsData(vector<uint8_t>& propsData);
+    void toPropsData(vector<uint8_t>& propsData) const;
 
     string getProp(const char* name) const;
     void addProp(const char* name, const char* value);
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 2a7f664c..fc44bab7 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -2528,13 +2528,17 @@ static int32_t kramAppEncode(vector<const char*>& args)
     if (success && !canEncodeInput) {
         // write the image out with mips to the file (no encode is done)
 
-        // Allow DDS -> DDS?
-        if (isDstDDS) {
+        // Don't allow DDS -> DDS?  This is really the only case for this
+        if (isDstDDS && !isDDS) {
             DDSHelper ddsHelper;
             success = ddsHelper.save(srcImageKTX, tmpFileHelper);
         }
-        else {
-            // TODO: write out KTXImage data/mips as a KTX file
+        else if (isDstKTX) {
+            KramEncoder encoder;
+            success = encoder.saveKTX1(srcImageKTX, tmpFileHelper.pointer());
+        }
+        else if (isDstKTX2) {
+            // TODO: save out to KTX2 with and without supercompresion
             success = false;
         }
 
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index a258cb6b..eff9dc2f 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -1836,7 +1836,7 @@ bool KramEncoder::writeKTX1FileOrImage(
         return false;
     }
 
-    // build and weite out the mip data
+    // build and write out the mip data
     if (!createMipsFromChunks(info, singleImage, mipConstructData, dstFile, dstImage)) {
         return false;
     }
@@ -1844,6 +1844,69 @@ bool KramEncoder::writeKTX1FileOrImage(
     return true;
 }
 
+bool KramEncoder::saveKTX1(const KTXImage& image, FILE* dstFile) const {
+    // write the header out
+    KTXHeader headerCopy = image.header;
+    
+    // fixup header for 1d array
+    if (image.textureType == MyMTLTextureType1DArray) {
+        headerCopy.pixelHeight = 0;
+        headerCopy.pixelDepth = 0;
+    }
+
+    // This is unused
+    KTXImage dstImage;
+    
+    vector<uint8_t> propsData;
+    image.toPropsData(propsData);
+    
+    if (!writeDataAtOffset((const uint8_t*)&headerCopy, sizeof(headerCopy), 0, dstFile, dstImage)) {
+        return false;
+    }
+
+    // write out the props
+    if (!writeDataAtOffset(propsData.data(), vsizeof(propsData), sizeof(KTXHeader), dstFile, dstImage)) {
+        return false;
+    }
+
+    // build and write out the mip data
+    uint32_t dstOffset = sizeof(KTXHeader) + vsizeof(propsData);
+    
+    // This may not have been allocated, might be aliasing original
+    const uint8_t* mipLevelData = image.imageData().data();
+    const auto& mipLevels = image.mipLevels;
+    
+    // KTX writes largest mips first
+    
+    uint32_t chunkCount = image.totalChunks();
+    for (uint32_t mipNum = 0; mipNum < image.mipCount(); ++mipNum) {
+        // ktx weirdly writes size differently for cube, but not cube array
+        // also this completely throws off block alignment
+        uint32_t mipStorageSize = mipLevels[mipNum].length;
+        uint32_t levelDataSize = mipStorageSize * chunkCount;
+        
+        if (image.textureType != MyMTLTextureTypeCube) {
+            mipStorageSize *= 6;
+        }
+        
+        size_t chunkOffset = image.chunkOffset(mipNum, 0);
+        
+        // write length of mip
+        if (!writeDataAtOffset((const uint8_t*)&mipStorageSize, sizeof(uint32_t), dstOffset, dstFile, dstImage)) {
+            return false;
+        }
+        dstOffset += sizeof(uint32_t);
+        
+        // write the level pixels
+        if (!writeDataAtOffset(mipLevelData + chunkOffset, levelDataSize, dstOffset, dstFile, dstImage)) {
+            return false;
+        }
+        dstOffset += levelDataSize;
+    }
+    
+    return true;
+}
+
 void printBCBlock(const uint8_t* bcBlock, MyMTLPixelFormat format)
 {
     // https://docs.microsoft.com/en-us/windows/win32/direct3d11/bc7-format-mode-reference#mode-6
@@ -2178,7 +2241,8 @@ bool KramEncoder::createMipsFromChunks(
             // Write out the mip size on chunk 0, all other mips are this size since not supercompressed.
             // This throws off block alignment and gpu loading of ktx files from mmap.  I guess 3d textures
             // and arrays can then load entire level in a single call.
-            if ((!info.isKTX2) && chunk == 0) {
+            bool isDstKTX1 = !info.isKTX2;
+            if (isDstKTX1 && chunk == 0) {
                 // some clarification on what imageSize means, but best to look at ktx codebase itself
                 // https://github.com/BinomialLLC/basis_universal/issues/40
 
diff --git a/libkram/kram/KramImage.h b/libkram/kram/KramImage.h
index 3a8e4a92..18ba7b74 100644
--- a/libkram/kram/KramImage.h
+++ b/libkram/kram/KramImage.h
@@ -128,9 +128,8 @@ class KramEncoder {
     // encode/decode to a memory block
     bool encode(ImageInfo& info, Image& singleImage, KTXImage& dstImage) const;
 
-    // TODO: supply encode() that takes a KTXImage src with mips already generated
-    // and then can encode them to a block format.  In-place mips from Image don't
-    // allow for custom mips, and also require conversion of KTXImage to Image.
+    // can save out to ktx1 directly, if say imported from dds
+    bool saveKTX1(const KTXImage& image, FILE* dstFile) const;
 
 private:
     bool encodeImpl(ImageInfo& info, Image& singleImage, FILE* dstFile, KTXImage& dstImage) const;

From 1a2e69611675791f09ef6b10b7a31d7518f1b44e Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 27 Jan 2022 21:30:41 -0800
Subject: [PATCH 205/901] kram - one fix to success logic

---
 libkram/kram/Kram.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index fc44bab7..b6b95a4a 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -2528,6 +2528,8 @@ static int32_t kramAppEncode(vector<const char*>& args)
     if (success && !canEncodeInput) {
         // write the image out with mips to the file (no encode is done)
 
+        success = false;
+
         // Don't allow DDS -> DDS?  This is really the only case for this
         if (isDstDDS && !isDDS) {
             DDSHelper ddsHelper;
@@ -2539,7 +2541,8 @@ static int32_t kramAppEncode(vector<const char*>& args)
         }
         else if (isDstKTX2) {
             // TODO: save out to KTX2 with and without supercompresion
-            success = false;
+            //KramEncoder encoder;
+            //success = encoder.saveKTX2(srcImageKTX, tmpFileHelper.pointer());
         }
 
         // rename to dest filepath, note this only occurs if above succeeded

From 800a8a8e8204db89f4860188a1a70cc0286cebc1 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 27 Jan 2022 21:42:03 -0800
Subject: [PATCH 206/901] ram - fixup detection tests for file types

---
 libkram/kram/KTXImage.cpp | 31 ++++++++++++++++++++++++-------
 libkram/kram/Kram.cpp     | 14 +++++++++-----
 2 files changed, 33 insertions(+), 12 deletions(-)

diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index c9b45f91..830e5721 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -1079,20 +1079,37 @@ MyMTLTextureType KTXHeader::metalTextureType() const
 
 //---------------------------------------------------
 
-bool KTXImage::open(const uint8_t* imageData, size_t imageDataLength, bool isInfoOnly)
-{
-    // Note: never trust the extension, always load based on the identifier
-    if ((size_t)imageDataLength < sizeof(kKTX2Identifier)) {
+inline bool isKTX2File(const uint8_t* data, size_t dataSize) {
+    if (dataSize < sizeof(kKTX2Identifier)) {
+        return false;
+    }
+    
+    if (memcmp(data, kKTX2Identifier, sizeof(kKTX2Identifier)) != 0) {
         return false;
     }
+    return true;
+}
 
-    // check for ktx2
-    if (memcmp(imageData, kKTX2Identifier, sizeof(kKTX2Identifier)) == 0) {
+inline bool isKTX1File(const uint8_t* data, size_t dataSize) {
+    if (dataSize < sizeof(kKTXIdentifier)) {
+        return false;
+    }
+    
+    if (memcmp(data, kKTXIdentifier, sizeof(kKTXIdentifier)) != 0) {
+        return false;
+    }
+    return true;
+}
+
+bool KTXImage::open(const uint8_t* imageData, size_t imageDataLength, bool isInfoOnly)
+{
+    // Note: never trust the extension, always load based on the identifier
+    if (isKTX2File(imageData, imageDataLength)) {
         return openKTX2(imageData, imageDataLength, isInfoOnly);
     }
 
     // check for ktx1
-    if (memcmp(imageData, kKTXIdentifier, sizeof(kKTXIdentifier)) != 0) {
+    if (!isKTX1File(imageData, imageDataLength)) {
         return false;
     }
 
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index b6b95a4a..d1756d41 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -79,7 +79,7 @@ bool isPNGFilename(const char* filename)
     return endsWithExtension(filename, ".png");
 }
 
-static bool isDDSFilename(const uint8_t* data, size_t dataSize)
+static bool isDDSFile(const uint8_t* data, size_t dataSize)
 {
     // read the 4 chars at the beginning of the file
     const uint32_t numChars = 4;
@@ -94,7 +94,7 @@ static bool isDDSFilename(const uint8_t* data, size_t dataSize)
     return true;
 }
 
-static bool isPNGFilename(const uint8_t* data, size_t dataSize)
+static bool isPNGFile(const uint8_t* data, size_t dataSize)
 {
     // read the 4 chars at the beginning of the file
     const uint32_t numChars = 8;
@@ -168,7 +168,7 @@ bool KTXImageData::open(const char* filename, KTXImage& image)
 
     bool isLoaded = true;
 
-    if (isDDSFilename(data, dataSize)) {
+    if (isDDSFile(data, dataSize)) {
         DDSHelper ddsHelper;
         isLoaded = ddsHelper.load(data, dataSize, image);
     }
@@ -289,11 +289,11 @@ bool KTXImageData::open(const uint8_t* data, size_t dataSize, KTXImage& image)
 {
     close();
 
-    if (isPNGFilename(data, dataSize)) {
+    if (isPNGFile(data, dataSize)) {
         // data stored in image
         return openPNG(data, dataSize, image);
     }
-    else if (isDDSFilename(data, dataSize)) {
+    else if (isDDSFile(data, dataSize)) {
         // converts dds to ktx, data stored in image
         // Note: unlike png, this data may already be block encoded
         DDSHelper ddsHelper;
@@ -2545,6 +2545,10 @@ static int32_t kramAppEncode(vector<const char*>& args)
             //success = encoder.saveKTX2(srcImageKTX, tmpFileHelper.pointer());
         }
 
+        if (!success) {
+            KLOGE("Kram", "save to format failed");
+        }
+
         // rename to dest filepath, note this only occurs if above succeeded
         // so any existing files are left alone on failure.
         if (success) {

From ac437331573b292d1f0b9148407cb709be480352 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 27 Jan 2022 21:49:25 -0800
Subject: [PATCH 207/901] kram - use image.fileData in more places so aliased
 mmap data can be written out.

---
 libkram/kram/KramDDSHelper.cpp | 2 +-
 libkram/kram/KramImage.cpp     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/libkram/kram/KramDDSHelper.cpp b/libkram/kram/KramDDSHelper.cpp
index 3ca1f5cf..bbf5f767 100644
--- a/libkram/kram/KramDDSHelper.cpp
+++ b/libkram/kram/KramDDSHelper.cpp
@@ -298,7 +298,7 @@ bool DDSHelper::save(const KTXImage& image, FileHelper& fileHelper)
     if (success) {
         // Now write the mip data out in the order dds expects
         // Ugh, dds stores each array item mips, then the next array item mips.
-        const uint8_t* imageData = image.imageData().data();
+        const uint8_t* imageData = image.fileData;
         for (uint32_t chunkNum = 0; chunkNum < image.totalChunks(); ++chunkNum) {
             
             for (uint32_t mipNum = 0; mipNum < image.mipCount(); ++mipNum) {
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index eff9dc2f..f467c7ab 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -1873,7 +1873,7 @@ bool KramEncoder::saveKTX1(const KTXImage& image, FILE* dstFile) const {
     uint32_t dstOffset = sizeof(KTXHeader) + vsizeof(propsData);
     
     // This may not have been allocated, might be aliasing original
-    const uint8_t* mipLevelData = image.imageData().data();
+    const uint8_t* mipLevelData = image.fileData;
     const auto& mipLevels = image.mipLevels;
     
     // KTX writes largest mips first

From 597ad4f3a64a70281cf51d896a6e676b71b780c2 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecazam@users.noreply.github.com>
Date: Thu, 27 Jan 2022 21:59:45 -0800
Subject: [PATCH 208/901] Add DDS info

---
 README.md | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 5f7a0cc7..ed825a38 100644
--- a/README.md
+++ b/README.md
@@ -1,11 +1,11 @@
 # kram, kram.exe
-C++11 main to libkram to create CLI tool.  Encode/decode/info on PNG/KTX/KTX2 files with LDR/HDR and BC/ASTC/ETC2.  Runs on macOS/win.
+C++11 main to libkram to create CLI tool.  Encode/decode/info on PNG/KTX/KTX2/DDS files with LDR/HDR and BC/ASTC/ETC2.  Runs on macOS/win.
 
 # libkram.a, libkram-ios.a, kram.lib
 C++11 library from 200 to 800KB in size depending on encoder options.  Compiles for iOS (ARM), macOS (ARM/Intel), win (Intel).
 
 # kramv.app
-ObjC++ Viewer for PNG/KTX/KTX2 supported files from kram.  530KB in size.  Uses Metal compute and shaders, eyedropper, grids, debugging, preview.  Supports HDR and all texture types.  Mip, face, and array access.  No dmg yet, just drop onto /Applications folder.  Runs on macOS (ARM/Intel).
+ObjC++ Viewer for PNG/KTX/KTX2/DDS supported files from kram.  530KB in size.  Uses Metal compute and shaders, eyedropper, grids, debugging, preview.  Supports HDR and all texture types.  Mip, face, and array access.  No dmg yet, just drop onto /Applications folder.  Runs on macOS (ARM/Intel).
 
 Diagrams and screenshots can be located here:
 https://www.figma.com/file/bPmPSpBGTi2xTVnBDqVEq0/kram
@@ -22,7 +22,7 @@ Many of the encoder sources can multithread a single image, but that is unused.
 Similar to a makefile system, the script sample kramtexture.py uses modstamps to skip textures that have already been processed.  If the source png/ktx/ktx2 is older than the output, then the file is skipped.  Command line options are not yet compared, so if those change then use --force on the python script to rebuild all textures.  Also a crc/hash could be used instead when modstamp isn't sufficient or the same data could come from different folders.
 
 ### About kramv
-kramv is a viewer for the BC/ASTC/ETC2 and HDR KTX/KTX2 textures generated by kram from LDR PNG and LDR/HDR KTX/KTX2 sources.  kramv decodes ASTC/ETC2 textures on macOS Intel, where the GPU doesn't support them.  macOS with Apple Silicon supports all three formats, and doesn't need to decode.   
+kramv is a viewer for the BC/ASTC/ETC2 LDR/HDR KTX/KTX2/DDS textures generated by kram from LDR PNG and LDR/HDR KTX/KTX2/DDS sources.  kramv decodes ASTC/ETC2 textures on macOS Intel, where the GPU doesn't support them.  macOS with Apple Silicon supports all three formats, and doesn't need to decode.   
 
 kramv uses ObjC++ with the intent to port to Windows C++ as time permits.  Uses menus, buttons, and keyboard handling useful for texture triage and analysis.  Drag and drop folders, bundles, and click-to-launch are supported.  Recently used textures/folders/bundles are listed in the menu.  The app currently shows a single document at a time.  Subsequent opens reuse the same document Window.  With bundles and folders, kramv will attempt to pair albedo and normal maps together by filename for the preview. 
 
@@ -100,7 +100,7 @@ R8/RG8/R16F - input/output rowBytes not aligned to 4 bytes to match KTX spec, co
 PVRTC - unsupported, no open-source encoders, requires pow2 size
 
 Containers
-PVR/DDS/Basis/Crunch - unsupoorted 
+PVR/Basis/Crunch - unsupoorted 
 
 KTX - only uncompressed, mip levels are unaligned to block size from 4 byte length at chunk 0 
   metadata/props aren't standardized or prevalent
@@ -110,6 +110,10 @@ KTX2 - works in kram and viewer, has aligned levels of mips when uncompressed,
   libkram supports None/Zlib/Zstd supercompression for read/write
   libkram does not support UASTC or BasisLZ yet
   
+DDS - works in kram and viewer, no mip compression, only BC and explicit formats
+  kram/kramv only support newer DX10 style DDS format.  Can view in Preview on macOS too.
+  DDSHelper provides load/save.  Pixel data ordered by chunk instead of by mips.
+  
 ```
 
 ### An example pipeline
@@ -597,7 +601,7 @@ The rgba8u image and half4 image are stored for mips.  1gb can be used to proces
 | 4096^2 | 16   |  64   |  128   |  256   |
 | 8192^2 | 64   |  256  |  512   |  1024  |
 
-### On lossless PNG, KTX, KTX2 output formats:
+### On lossless PNG, KTX, KTX2, DDS output formats:
 
 PNG is limited to srgb8u/8u/16u data.  Most editors can work with this format.  Gimp and Photoshop can maintain 16u data.  There's no provision for mips, or cube, 3d, hdr, or premultiplied alpha.  And most tools always set png as sRGB data.  Content tools, image editors, browsers really need to replace PNG and DDS with compressed KTX2 for source and output content. 
 

From c8bc02cec850348f05e8a757dd99954b310da486 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 28 Jan 2022 09:38:22 -0800
Subject: [PATCH 209/901] kram - fix saveKTX1

---
 libkram/kram/KTXImage.cpp  |  2 +-
 libkram/kram/KTXImage.h    |  5 +++--
 libkram/kram/KramImage.cpp | 21 +++++++++++++--------
 3 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index 830e5721..6bc03f5f 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -37,7 +37,7 @@ const uint8_t kKTXIdentifier[kKTXIdentifierSize] = {
     0xAB, 0x4B, 0x54, 0x58, 0x20, 0x31, 0x31, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A
     //'«', 'K', 'T', 'X', ' ', '1', '1', '»', '\r', '\n', '\x1A', '\n'
 };
-const uint8_t kKTX2Identifier[kKTXIdentifierSize] = {
+const uint8_t kKTX2Identifier[kKTX2IdentifierSize] = {
     0xAB, 0x4B, 0x54, 0x58, 0x20, 0x32, 0x30, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A
     // '«', 'K', 'T', 'X', ' ', '2', '0', '»', '\r', '\n', '\x1A', '\n'
 };
diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h
index ce155846..967d9bfc 100644
--- a/libkram/kram/KTXImage.h
+++ b/libkram/kram/KTXImage.h
@@ -134,8 +134,9 @@ struct Int2 {
 //---------------------------------------------
 
 constexpr int32_t kKTXIdentifierSize = 12;
+constexpr int32_t kKTX2IdentifierSize = 12;
 extern const uint8_t kKTXIdentifier[kKTXIdentifierSize];
-extern const uint8_t kKTX2Identifier[kKTXIdentifierSize];
+extern const uint8_t kKTX2Identifier[kKTX2IdentifierSize];
 
 class KTXHeader {
 public:
@@ -194,7 +195,7 @@ class KTXHeader {
 // and this stores an array of supercompressed levels, and has dfds.
 class KTX2Header {
 public:
-    uint8_t identifier[kKTXIdentifierSize] = {
+    uint8_t identifier[kKTX2IdentifierSize] = {
         // same is kKTX2Identifier
         0xAB, 0x4B, 0x54, 0x58, 0x20, 0x32, 0x30, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A
         // '«', 'K', 'T', 'X', ' ', '2', '0', '»', '\r', '\n', '\x1A', '\n'
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index f467c7ab..e4ab6534 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -1859,18 +1859,22 @@ bool KramEncoder::saveKTX1(const KTXImage& image, FILE* dstFile) const {
     
     vector<uint8_t> propsData;
     image.toPropsData(propsData);
+    headerCopy.bytesOfKeyValueData = (uint32_t)vsizeof(propsData);
     
-    if (!writeDataAtOffset((const uint8_t*)&headerCopy, sizeof(headerCopy), 0, dstFile, dstImage)) {
+    uint32_t dstOffset = 0;
+    
+    if (!writeDataAtOffset((const uint8_t*)&headerCopy, sizeof(KTXHeader), 0, dstFile, dstImage)) {
         return false;
     }
-
+    dstOffset += sizeof(KTXHeader);
+    
     // write out the props
-    if (!writeDataAtOffset(propsData.data(), vsizeof(propsData), sizeof(KTXHeader), dstFile, dstImage)) {
+    if (!writeDataAtOffset(propsData.data(), headerCopy.bytesOfKeyValueData, sizeof(KTXHeader), dstFile, dstImage)) {
         return false;
     }
-
+    dstOffset += headerCopy.bytesOfKeyValueData;
+    
     // build and write out the mip data
-    uint32_t dstOffset = sizeof(KTXHeader) + vsizeof(propsData);
     
     // This may not have been allocated, might be aliasing original
     const uint8_t* mipLevelData = image.fileData;
@@ -1878,15 +1882,16 @@ bool KramEncoder::saveKTX1(const KTXImage& image, FILE* dstFile) const {
     
     // KTX writes largest mips first
     
-    uint32_t chunkCount = image.totalChunks();
+    uint32_t numChunks = image.totalChunks();
     for (uint32_t mipNum = 0; mipNum < image.mipCount(); ++mipNum) {
         // ktx weirdly writes size differently for cube, but not cube array
         // also this completely throws off block alignment
         uint32_t mipStorageSize = mipLevels[mipNum].length;
-        uint32_t levelDataSize = mipStorageSize * chunkCount;
+        uint32_t levelDataSize = mipStorageSize * numChunks;
         
+        // cube stores size of one face, ugh
         if (image.textureType != MyMTLTextureTypeCube) {
-            mipStorageSize *= 6;
+            mipStorageSize *= numChunks;
         }
         
         size_t chunkOffset = image.chunkOffset(mipNum, 0);

From ff8352e2d568f4af6787b304ca1df9d10963176e Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 28 Jan 2022 09:53:21 -0800
Subject: [PATCH 210/901] DDS - fix values to match how KTX works

---
 libkram/kram/KramDDSHelper.cpp | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/libkram/kram/KramDDSHelper.cpp b/libkram/kram/KramDDSHelper.cpp
index bbf5f767..9695e2bb 100644
--- a/libkram/kram/KramDDSHelper.cpp
+++ b/libkram/kram/KramDDSHelper.cpp
@@ -120,9 +120,9 @@ bool DDSHelper::load(const uint8_t* data, size_t dataSize, KTXImage& image)
     }
     
     // make sure to copy mips/slices from DDS array-ordered to mip-ordered for KTX
-    image.width = hdr.width;
-    image.height = hdr.height;
-    image.depth = (hdr.flags & DDSD_DEPTH) ? hdr.depth : 1;
+    image.width = (hdr.flags & DDSD_WIDTH) ? hdr.width : 1;
+    image.height = (hdr.flags & DDSD_HEIGHT) ? hdr.height : 1;
+    image.depth = (hdr.flags & DDSD_DEPTH) ? hdr.depth : 0;
     
     uint32_t mipCount = (hdr.flags & DDSD_MIPMAPCOUNT) ? hdr.mipMapCount : 1;
     
@@ -143,6 +143,7 @@ bool DDSHelper::load(const uint8_t* data, size_t dataSize, KTXImage& image)
     switch(hdr10.resourceDimension) {
         case DDS_DIMENSION_TEXTURE1D:
             image.textureType = MyMTLTextureType1DArray;
+            isArray = true; // kram doesn't support 1d
             break;
         case DDS_DIMENSION_TEXTURE2D:
             if (isCube) {
@@ -157,6 +158,14 @@ bool DDSHelper::load(const uint8_t* data, size_t dataSize, KTXImage& image)
             break;
     }
 
+    // fixuup the values, so that can convert header properly to type in info
+    if (!isArray)
+        ktxHdr.numberOfArrayElements = 0;
+    if (image.textureType != MyMTLTextureType3D)
+        ktxHdr.pixelDepth = 0;
+    if (image.textureType == MyMTLTextureType1DArray)
+        ktxHdr.pixelHeight = 0;
+    
     image.pixelFormat = pixelFormat;
     
     // allocate data

From 0565bb17cf88436c0c8131c4533edff28800e480 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 30 Jan 2022 21:11:12 -0800
Subject: [PATCH 211/901] kram - break out saveKTX2

Note these save calls don't addBaseProps yet.  Need to be able to pass some flags like -premul and others.
Updated the command line usage to reflect new formats.
---
 libkram/kram/KTXImage.cpp      |   2 +-
 libkram/kram/Kram.cpp          |  83 +++++---
 libkram/kram/Kram.h            |   6 +-
 libkram/kram/KramDDSHelper.cpp |  39 ++--
 libkram/kram/KramDDSHelper.h   |   2 +-
 libkram/kram/KramImage.cpp     | 353 ++++++++++++++++-----------------
 libkram/kram/KramImage.h       |   4 +-
 7 files changed, 259 insertions(+), 230 deletions(-)

diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index 6bc03f5f..e8420b32 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -1616,7 +1616,7 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength, bool i
     header.bytesOfKeyValueData = 0;
     initProps(imageData + header2.kvdByteOffset, header2.kvdByteLength);
 
-    // skip parsing th elevels
+    // skip parsing the levels
     if (isInfoOnly) {
         skipImageLength = true;
         fileData = imageData;
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index d1756d41..6df9625d 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -122,8 +122,10 @@ inline const char* toFilenameShort(const char* filename)
     return filenameShort;
 }
 
-bool KTXImageData::open(const char* filename, KTXImage& image)
+bool KTXImageData::open(const char* filename, KTXImage& image, bool isInfoOnly_)
 {
+    isInfoOnly = isInfoOnly_;
+
     close();
 
     // set name from filename
@@ -170,7 +172,7 @@ bool KTXImageData::open(const char* filename, KTXImage& image)
 
     if (isDDSFile(data, dataSize)) {
         DDSHelper ddsHelper;
-        isLoaded = ddsHelper.load(data, dataSize, image);
+        isLoaded = ddsHelper.load(data, dataSize, image, isInfoOnly);
     }
     else {
         // read the KTXImage in from the data, it will alias mmap or fileData
@@ -285,19 +287,20 @@ bool KTXImageData::openPNG(const uint8_t* data, size_t dataSize, KTXImage& image
     return true;
 }
 
-bool KTXImageData::open(const uint8_t* data, size_t dataSize, KTXImage& image)
+bool KTXImageData::open(const uint8_t* data, size_t dataSize, KTXImage& image, bool isInfoOnly_)
 {
+    isInfoOnly = isInfoOnly_;
     close();
 
     if (isPNGFile(data, dataSize)) {
         // data stored in image
-        return openPNG(data, dataSize, image);
+        return openPNG(data, dataSize, image);  // TODO: pass isInfoOnly
     }
     else if (isDDSFile(data, dataSize)) {
         // converts dds to ktx, data stored in image
         // Note: unlike png, this data may already be block encoded
         DDSHelper ddsHelper;
-        return ddsHelper.load(data, dataSize, image);
+        return ddsHelper.load(data, dataSize, image, isInfoOnly);
     }
 
     // image will likely alias incoming data, so KTXImageData is unused
@@ -311,9 +314,10 @@ bool KTXImageData::open(const uint8_t* data, size_t dataSize, KTXImage& image)
 // decoding reads a ktx file into KTXImage (not Image)
 bool SetupSourceKTX(KTXImageData& srcImageData,
                     const string& srcFilename,
-                    KTXImage& sourceImage)
+                    KTXImage& sourceImage,
+                    bool isInfoOnly)
 {
-    if (!srcImageData.open(srcFilename.c_str(), sourceImage)) {
+    if (!srcImageData.open(srcFilename.c_str(), sourceImage, isInfoOnly)) {
         KLOGE("Kram", "File input \"%s\" could not be opened for read.\n",
               srcFilename.c_str());
         return false;
@@ -379,7 +383,11 @@ inline Color toGrayscaleRec709(Color c, const Mipper& mipper)
 bool LoadKtx(const uint8_t* data, size_t dataSize, Image& sourceImage)
 {
     KTXImage image;
-    bool isInfoOnly = true;  // don't decompress entire image, only going to unpack top level mip
+
+    // don't decompress pixel data, only going to unpack top level mip on KTX2
+    // this stil aliases the incoming pixel data
+    bool isInfoOnly = true;
+
     if (!image.open(data, dataSize, isInfoOnly)) {
         return false;
     }
@@ -1222,7 +1230,8 @@ void kramDecodeUsage(bool showVersion = true)
           "\t [-swizzle rgba01]\n"
           "\t [-e/ncoder (squish | ate | etcenc | bcenc | astcenc | explicit | ..)]\n"
           "\t [-v/erbose]\n"
-          "\t -i/nput .ktx\n"
+          // TODO: does this support .ktx2, .dds?
+          "\t -i/nput <.ktx | .ktx2 | .dds>\n"
           "\t -o/utput .ktx\n"
           "\n",
           showVersion ? usageName : "");
@@ -1233,7 +1242,7 @@ void kramInfoUsage(bool showVersion = true)
     KLOGI("Kram",
           "%s\n"
           "Usage: kram info\n"
-          "\t -i/nput <.png | .ktx | .ktx2>\n"
+          "\t -i/nput <.png | .ktx | .ktx2 | .dds>\n"
           "\t [-o/utput info.txt]\n"
           "\t [-v/erbose]\n"
           "\n",
@@ -1284,8 +1293,8 @@ void kramEncodeUsage(bool showVersion = true)
           "\t -f/ormat (bc1 | astc4x4 | etc2rgba | rgba16f) [-quality 0-100]\n"
           "\t [-zstd 0] or [-zlib 0] (for .ktx2 output)\n"
           "\t [-srgb] [-signed] [-normal]\n"
-          "\t -i/nput <source.png | .ktx | .ktx2>\n"
-          "\t -o/utput <target.ktx | .ktx | .ktx2>\n"
+          "\t -i/nput <source.png | .ktx | .ktx2 | .dds>\n"
+          "\t -o/utput <target.ktx | .ktx | .ktx2 | .dds>\n"
           "\n"
           "\t [-type 2d|3d|..]\n"
           "\t [-e/ncoder (squish | ate | etcenc | bcenc | astcenc | explicit | ..)]\n"
@@ -1606,7 +1615,8 @@ string kramInfoToString(const string& srcFilename, bool isVerbose)
         KTXImage srcImage;
         KTXImageData srcImageData;
 
-        bool success = SetupSourceKTX(srcImageData, srcFilename, srcImage);
+        // TODO: pass isInfoOnly = true to this
+        bool success = SetupSourceKTX(srcImageData, srcFilename, srcImage, true);
         if (!success) {
             KLOGE("Kram", "File input \"%s\" could not be opened for info read.\n",
                   srcFilename.c_str());
@@ -1778,7 +1788,7 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage,
     // to megapixels
     numPixels /= (1000.0f * 1000.0f);
 
-    auto textureType = srcImage.header.metalTextureType();
+    auto textureType = srcImage.textureType;
     switch (textureType) {
         case MyMTLTextureType1DArray:
         case MyMTLTextureType2D:
@@ -1790,7 +1800,7 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage,
                            "dims: %dx%d\n"
                            "dimm: %0.3f MP\n"
                            "mips: %d\n",
-                           textureTypeName(srcImage.header.metalTextureType()),
+                           textureTypeName(srcImage.textureType),
                            srcImage.width, srcImage.height,
                            numPixels,
                            srcImage.mipCount());
@@ -1801,7 +1811,7 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage,
                            "dims: %dx%dx%d\n"
                            "dimm: %0.3f MP\n"
                            "mips: %d\n",
-                           textureTypeName(srcImage.header.metalTextureType()),
+                           textureTypeName(srcImage.textureType),
                            srcImage.width, srcImage.height, srcImage.depth,
                            numPixels,
                            srcImage.mipCount());
@@ -1815,15 +1825,24 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage,
                        srcImage.header.numberOfArrayElements);
     }
 
-    append_sprintf(info,
-                   "fmtk: %s\n"
-                   "fmtm: %s (%d)\n"
-                   "fmtv: %s (%d)\n"
-                   "fmtg: %s (0x%04X)\n",
-                   formatTypeName(metalFormat),
-                   metalTypeName(metalFormat), metalFormat,
-                   vulkanTypeName(metalFormat), vulkanType(metalFormat),
-                   glTypeName(metalFormat), glType(metalFormat));
+    if (isVerbose) {
+        append_sprintf(info,
+                       "fmtk: %s\n"
+                       "fmtm: %s (%d)\n"
+                       "fmtv: %s (%d)\n"
+                       "fmtd: %s (0x%04X)\n"
+                       "fmtg: %s (0x%04X)\n",
+                       formatTypeName(metalFormat),
+                       metalTypeName(metalFormat), metalFormat,
+                       vulkanTypeName(metalFormat), vulkanType(metalFormat),
+                       directxTypeName(metalFormat), directxType(metalFormat),
+                       glTypeName(metalFormat), glType(metalFormat));
+    }
+    else {
+        append_sprintf(info,
+                       "fmtk: %s\n",
+                       formatTypeName(metalFormat));
+    }
 
     // report any props
     for (const auto& prop : srcImage.props) {
@@ -2023,7 +2042,7 @@ static int32_t kramAppDecode(vector<const char*>& args)
     KTXImageData srcImageData;
     FileHelper tmpFileHelper;
 
-    bool success = SetupSourceKTX(srcImageData, srcFilename, srcImage);
+    bool success = SetupSourceKTX(srcImageData, srcFilename, srcImage, false);
     if (!success)
         return -1;
 
@@ -2496,7 +2515,7 @@ static int32_t kramAppEncode(vector<const char*>& args)
         // Note: this is type KTXImage, not Image.
 
         KTXImageData srcImageData;
-        success = SetupSourceKTX(srcImageData, srcFilename, srcImageKTX);
+        success = SetupSourceKTX(srcImageData, srcFilename, srcImageKTX, false);
 
         if (success) {
             if (isBlockFormat(srcImageKTX.pixelFormat)) {
@@ -2540,9 +2559,13 @@ static int32_t kramAppEncode(vector<const char*>& args)
             success = encoder.saveKTX1(srcImageKTX, tmpFileHelper.pointer());
         }
         else if (isDstKTX2) {
-            // TODO: save out to KTX2 with and without supercompresion
-            //KramEncoder encoder;
-            //success = encoder.saveKTX2(srcImageKTX, tmpFileHelper.pointer());
+            KramEncoder encoder;
+
+            // default to zstd compressor
+            KTX2Compressor compressor;
+            compressor.compressorType = KTX2SupercompressionZstd;
+
+            success = encoder.saveKTX2(srcImageKTX, compressor, tmpFileHelper.pointer());
         }
 
         if (!success) {
diff --git a/libkram/kram/Kram.h b/libkram/kram/Kram.h
index ee6326db..31e311dd 100644
--- a/libkram/kram/Kram.h
+++ b/libkram/kram/Kram.h
@@ -18,10 +18,10 @@ class KTXImage;
 class KTXImageData {
 public:
     // class keeps the data alive in mmapHelper or fileData
-    bool open(const char* filename, KTXImage& image);
+    bool open(const char* filename, KTXImage& image, bool isInfoOnly_ = false);
 
     // class aliases data, so caller must keep alive.  Useful with bundle.
-    bool open(const uint8_t* data, size_t dataSize, KTXImage& image);
+    bool open(const uint8_t* data, size_t dataSize, KTXImage& image, bool isInfoOnly_ = false);
 
     // This releases all memory associated with this class
     void close();
@@ -45,7 +45,7 @@ class KTXImageData {
     MmapHelper mmapHelper;
     vector<uint8_t> fileData;
     bool isMmap = false;
-    bool isInfoOnly = true;
+    bool isInfoOnly = false;
 };
 
 bool isKTXFilename(const char* filename);
diff --git a/libkram/kram/KramDDSHelper.cpp b/libkram/kram/KramDDSHelper.cpp
index 9695e2bb..5f4fdd5a 100644
--- a/libkram/kram/KramDDSHelper.cpp
+++ b/libkram/kram/KramDDSHelper.cpp
@@ -95,7 +95,7 @@ struct DDS_HEADER_DXT10
     uint32_t        miscFlags2;
 };
 
-bool DDSHelper::load(const uint8_t* data, size_t dataSize, KTXImage& image)
+bool DDSHelper::load(const uint8_t* data, size_t dataSize, KTXImage& image, bool isInfoOnly)
 {
     uint32_t magic = *(uint32_t*)data;
     const uint32_t magicSize = sizeof(uint32_t);
@@ -166,25 +166,34 @@ bool DDSHelper::load(const uint8_t* data, size_t dataSize, KTXImage& image)
     if (image.textureType == MyMTLTextureType1DArray)
         ktxHdr.pixelHeight = 0;
     
+    // make sure these line up
+    if (image.header.metalTextureType() != image.textureType) {
+        return false;
+    }
+    
     image.pixelFormat = pixelFormat;
     
     // allocate data
     image.initMipLevels(mipDataOffset);
-    image.reserveImageData();
-    
-    uint8_t* dstImageData = image.imageData().data();
-    const uint8_t* srcImageData = data + mipDataOffset;
     
-    size_t srcOffset = 0;
-    for (uint32_t chunkNum = 0; chunkNum < image.totalChunks(); ++chunkNum) {
-        for (uint32_t mipNum = 0; mipNum < image.mipCount(); ++mipNum) {
-            // memcpy from src to dst
-            size_t dstOffset = image.chunkOffset(mipNum, chunkNum);
-            size_t mipLength = image.mipLevels[mipNum].length;
-    
-            memcpy(dstImageData + dstOffset, srcImageData + srcOffset, mipLength);
-            
-            srcOffset += mipLength;
+    // Skip allocating the pixels
+    if (!isInfoOnly) {
+        image.reserveImageData();
+        
+        uint8_t* dstImageData = image.imageData().data();
+        const uint8_t* srcImageData = data + mipDataOffset;
+        
+        size_t srcOffset = 0;
+        for (uint32_t chunkNum = 0; chunkNum < image.totalChunks(); ++chunkNum) {
+            for (uint32_t mipNum = 0; mipNum < image.mipCount(); ++mipNum) {
+                // memcpy from src to dst
+                size_t dstOffset = image.chunkOffset(mipNum, chunkNum);
+                size_t mipLength = image.mipLevels[mipNum].length;
+        
+                memcpy(dstImageData + dstOffset, srcImageData + srcOffset, mipLength);
+                
+                srcOffset += mipLength;
+            }
         }
     }
     
diff --git a/libkram/kram/KramDDSHelper.h b/libkram/kram/KramDDSHelper.h
index eaf2397c..d585223c 100644
--- a/libkram/kram/KramDDSHelper.h
+++ b/libkram/kram/KramDDSHelper.h
@@ -27,7 +27,7 @@ class FileHelper;
 // BC-compressed blocks before conversion.
 class DDSHelper {
 public:
-    bool load(const uint8_t* data, size_t dataSize, KTXImage& image);
+    bool load(const uint8_t* data, size_t dataSize, KTXImage& image, bool isInfoOnly = false);
     bool save(const KTXImage& image, FileHelper& fileHelper);
 };
 
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index e4ab6534..1b3a3745 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -1542,248 +1542,238 @@ bool KramEncoder::encodeImpl(ImageInfo& info, Image& singleImage, FILE* dstFile,
         KLOGE("kram", "skipped all mips");
         return false;
     }
-    // ----------------------------------------------------
-
-    int32_t numChunks = (int32_t)chunkOffsets.size();
-
-    //---------------
-    // props
 
     addBaseProps(info, dstImage);
 
-    // convert props into a data blob that can be written out
-    vector<uint8_t> propsData;
-    dstImage.toPropsData(propsData);
-    header.bytesOfKeyValueData = (uint32_t)vsizeof(propsData);
-
-    // ----------------------------------------------------
-
-    // can go out to KTX2 here instead
-    // It has two different blocks, supercompression for BasisLZ
-    // and a DFD block which details the block content.
-    // And mips are reversed.
-
-    // dstImage case - in memory version will always be KTX1 format for now
-    // this even gens a KTX1 dstImage, and then just compresses the mip levels
-
     if (info.isKTX2 && dstFile) {
-        // generate KTX1 file with uncompressed mips first
-        // a big memory hit here, since all mips stored in memory despite built in-place
-        // could build and compress and entire level at a time, but can't write any of it
-        // out until smallest mips are constructed.  Only then are offsets resolved.
-
-        // A better way would be to do mips in-place, but in-order, and compressing the large
-        // to small mips into an array of open compressor streams.  Then only need one mip instead of
-        // all levels in memory.
-        if (!writeKTX1FileOrImage(info, singleImage, mipConstructData, propsData, nullptr, dstImage)) {
+        // build ktx1 file first in memory
+        if (!writeKTX1FileOrImage(info, singleImage, mipConstructData, nullptr, dstImage)) {
             return false;
         }
 
-        // now convert from ktx1 to ktx2
+        // now write that as ktx2 with potentially supercompressed mips
+        if (!saveKTX2(dstImage, info.compressor, dstFile)) {
+            return false;
+        }
+    }
+    else {
+        // this is purely ktx1 output path
+        if (!writeKTX1FileOrImage(info, singleImage, mipConstructData, dstFile, dstImage)) {
+            return false;
+        }
+    }
 
-        KTX2Header header2;
+    return true;
+}
 
-        header2.vkFormat = vulkanType(info.pixelFormat);
-        // header2.typeSize = 1; // skip
+bool KramEncoder::saveKTX2(const KTXImage& srcImage, const KTX2Compressor& compressor, FILE* dstFile) const
+{
+    // TODO: move this propsData into KTXImage
+    vector<uint8_t> propsData;
+    srcImage.toPropsData(propsData);
+    
+    // now convert from ktx1 to ktx2
+    const KTXHeader& header = srcImage.header;
+    
+    KTXImage dstImage; // unused, just passed to reference
+    
+    KTX2Header header2;
 
-        header2.pixelWidth = header.pixelWidth;
-        header2.pixelHeight = header.pixelHeight;
-        header2.pixelDepth = header.pixelDepth;
+    header2.vkFormat = vulkanType(srcImage.pixelFormat);
+    // header2.typeSize = 1; // skip
 
-        header2.layerCount = header.numberOfArrayElements;
-        header2.faceCount = header.numberOfFaces;
-        header2.levelCount = header.numberOfMipmapLevels;
+    header2.pixelWidth = header.pixelWidth;
+    header2.pixelHeight = header.pixelHeight;
+    header2.pixelDepth = header.pixelDepth;
 
-        header2.supercompressionScheme = info.compressor.compressorType;
+    header2.layerCount = header.numberOfArrayElements;
+    header2.faceCount = header.numberOfFaces;
+    header2.levelCount = header.numberOfMipmapLevels;
 
-        // compute the dfd
-        KTX2DescriptorFileBlock dfdData(info.pixelFormat, info.hasAlpha && info.isPremultiplied, info.compressor.isCompressed());
+    header2.supercompressionScheme = compressor.compressorType;
 
-        // TODO: sgdData only used for BasisLZ, UASTC + zstd don't use this
-        vector<uint8_t> sgdData;
+    // compute the dfd
+    KTX2DescriptorFileBlock dfdData(srcImage.pixelFormat, srcImage.isPremul(), compressor.isCompressed());
 
-        size_t levelByteLength = header2.levelCount * sizeof(KTXImageLevel);
-        size_t levelByteOffset = sizeof(KTX2Header);
+    // TODO: sgdData only used for BasisLZ, UASTC + zstd don't use this
+    vector<uint8_t> sgdData;
 
-        // compute offsets and lengts of data blocks
-        header2.dfdByteOffset = levelByteOffset + levelByteLength;
-        header2.kvdByteOffset = header2.dfdByteOffset + dfdData.totalSize;
-        header2.sgdByteOffset = header2.kvdByteOffset + vsizeof(propsData);
+    size_t levelByteLength = header2.levelCount * sizeof(KTXImageLevel);
+    size_t levelByteOffset = sizeof(KTX2Header);
 
-        header2.dfdByteLength = dfdData.totalSize;
-        header2.kvdByteLength = vsizeof(propsData);
-        header2.sgdByteLength = vsizeof(sgdData);
+    // compute offsets and lengts of data blocks
+    header2.dfdByteOffset = levelByteOffset + levelByteLength;
+    header2.kvdByteOffset = header2.dfdByteOffset + dfdData.totalSize;
+    header2.sgdByteOffset = header2.kvdByteOffset + vsizeof(propsData);
 
-        // write the header
-        if (!writeDataAtOffset((const uint8_t*)&header2, sizeof(KTX2Header), 0, dstFile, dstImage)) {
-            return false;
-        }
+    header2.dfdByteLength = dfdData.totalSize;
+    header2.kvdByteLength = vsizeof(propsData);
+    header2.sgdByteLength = vsizeof(sgdData);
 
-        // next are levels, but those are written out later
+    // write the header
+    if (!writeDataAtOffset((const uint8_t*)&header2, sizeof(KTX2Header), 0, dstFile, dstImage)) {
+        return false;
+    }
 
-        // write the dfd
-        if (!writeDataAtOffset((const uint8_t*)&dfdData, dfdData.totalSize, header2.dfdByteOffset, dstFile, dstImage)) {
-            return false;
-        }
+    // next are levels, but those are written out later
 
-        // write the props
-        if (!writeDataAtOffset(propsData.data(), vsizeof(propsData), header2.kvdByteOffset, dstFile, dstImage)) {
+    // write the dfd
+    if (!writeDataAtOffset((const uint8_t*)&dfdData, dfdData.totalSize, header2.dfdByteOffset, dstFile, dstImage)) {
+        return false;
+    }
+
+    // write the props
+    if (!writeDataAtOffset(propsData.data(), vsizeof(propsData), header2.kvdByteOffset, dstFile, dstImage)) {
+        return false;
+    }
+
+    // skip supercompression block
+    if (!sgdData.empty()) {
+        // TODO: align(8) sgdPadding
+        if (!writeDataAtOffset(sgdData.data(), vsizeof(sgdData), header2.sgdByteOffset, dstFile, dstImage)) {
             return false;
         }
+    }
 
-        // skip supercompression block
-        if (!sgdData.empty()) {
-            // TODO: align(8) sgdPadding
-            if (!writeDataAtOffset(sgdData.data(), vsizeof(sgdData), header2.sgdByteOffset, dstFile, dstImage)) {
-                return false;
-            }
-        }
+    // offsets will be largest last unlike KTX
+    // data is packed without any length or alignment unllike in KTX
+    // reverse the mip levels offsets (but not the order) for KTX2
 
-        // offsets will be largest last unlike KTX
-        // data is packed without any length or alignment unllike in KTX
-        // reverse the mip levels offsets (but not the order) for KTX2
+    size_t imageByteOffset = header2.sgdByteOffset + header2.sgdByteLength;
 
-        size_t imageByteOffset = header2.sgdByteOffset + header2.sgdByteLength;
+    size_t lastImageByteOffset = imageByteOffset;
 
-        size_t lastImageByteOffset = imageByteOffset;
+    uint32_t numChunks = srcImage.totalChunks();
+    vector<KTXImageLevel> ktx2Levels(dstImage.mipLevels);
+    for (int32_t i = ktx2Levels.size() - 1; i >= 0; --i) {
+        // align the offset to leastCommonMultiple(4, texel_block_size);
+        if (lastImageByteOffset & 0x3) {
+            lastImageByteOffset += 4 - (lastImageByteOffset & 0x3);
+        }
 
-        vector<KTXImageLevel> ktx2Levels(dstImage.mipLevels);
-        for (int32_t i = ktx2Levels.size() - 1; i >= 0; --i) {
-            // align the offset to leastCommonMultiple(4, texel_block_size);
-            if (lastImageByteOffset & 0x3) {
-                lastImageByteOffset += 4 - (lastImageByteOffset & 0x3);
-            }
+        auto& level = ktx2Levels[i];
+        level.length *= numChunks;
+        level.lengthCompressed = level.length;
+        level.offset = lastImageByteOffset;
 
-            auto& level = ktx2Levels[i];
-            level.length *= numChunks;
-            level.lengthCompressed = level.length;
-            level.offset = lastImageByteOffset;
+        lastImageByteOffset = level.offset + level.length;
+    }
 
-            lastImageByteOffset = level.offset + level.length;
+    if (!compressor.isCompressed()) {
+        if (!writeDataAtOffset((const uint8_t*)ktx2Levels.data(), vsizeof(ktx2Levels), levelByteOffset, dstFile, dstImage)) {
+            return false;
         }
 
-        if (!info.compressor.isCompressed()) {
-            if (!writeDataAtOffset((const uint8_t*)ktx2Levels.data(), vsizeof(ktx2Levels), levelByteOffset, dstFile, dstImage)) {
-                return false;
-            }
+        // write the levels out
+        for (int32_t i = 0; i < (int32_t)ktx2Levels.size(); ++i) {
+            auto& level2 = ktx2Levels[i];
+            auto& level1 = dstImage.mipLevels[i];
 
-            // write the levels out
-            for (int32_t i = 0; i < (int32_t)ktx2Levels.size(); ++i) {
-                auto& level2 = ktx2Levels[i];
-                auto& level1 = dstImage.mipLevels[i];
-
-                if (!writeDataAtOffset(dstImage.fileData + level1.offset, level2.length, level2.offset, dstFile, dstImage)) {
-                    return false;
-                }
+            if (!writeDataAtOffset(dstImage.fileData + level1.offset, level2.length, level2.offset, dstFile, dstImage)) {
+                return false;
             }
         }
-        else {
-            // start compression with the smallest mips first, then can write out data as we go through it all
+    }
+    else {
+        // start compression with the smallest mips first, then can write out data as we go through it all
 
-            // update the offsets and compressed sizes
-            lastImageByteOffset = imageByteOffset;
+        // update the offsets and compressed sizes
+        lastImageByteOffset = imageByteOffset;
 
-            // allocate big enough to hold entire uncompressed level
-            vector<uint8_t> compressedData;
-            compressedData.resize(mz_compressBound(ktx2Levels.front().length));  // largest mip
-            size_t compressedDataSize = 0;
+        // allocate big enough to hold entire uncompressed level
+        vector<uint8_t> compressedData;
+        compressedData.resize(mz_compressBound(ktx2Levels.front().length));  // largest mip
+        size_t compressedDataSize = 0;
 
-            // reuse a context here
-            ZSTD_CCtx* cctx = nullptr;
-            int zlibLevel = MZ_DEFAULT_COMPRESSION;
+        // reuse a context here
+        ZSTD_CCtx* cctx = nullptr;
+        int zlibLevel = MZ_DEFAULT_COMPRESSION;
 
-            if (info.compressor.compressorType == KTX2SupercompressionZstd) {
-                cctx = ZSTD_createCCtx();
-                if (!cctx) {
-                    return false;
-                }
+        if (compressor.compressorType == KTX2SupercompressionZstd) {
+            cctx = ZSTD_createCCtx();
+            if (!cctx) {
+                return false;
+            }
 
-                if (info.compressor.compressorLevel > 0.0f) {
-                    int zstdLevel = (int)round(info.compressor.compressorLevel);
-                    if (zstdLevel > 100) {
-                        zstdLevel = 100;
-                    }
+            if (compressor.compressorLevel > 0.0f) {
+                int zstdLevel = (int)round(compressor.compressorLevel);
+                if (zstdLevel > 100) {
+                    zstdLevel = 100;
+                }
 
-                    ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, zstdLevel);
+                ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, zstdLevel);
 
-                    // may need to reset the compressor context, but says call starts a new frame
-                }
+                // may need to reset the compressor context, but says call starts a new frame
             }
-            else if (info.compressor.compressorType == KTX2SupercompressionZlib) {
-                // set the level up
-                if (info.compressor.compressorLevel > 0.0f) {
-                    zlibLevel = (int)round(info.compressor.compressorLevel);
-                    if (zlibLevel > 10) {
-                        zlibLevel = 10;
-                    }
+        }
+        else if (compressor.compressorType == KTX2SupercompressionZlib) {
+            // set the level up
+            if (compressor.compressorLevel > 0.0f) {
+                zlibLevel = (int)round(compressor.compressorLevel);
+                if (zlibLevel > 10) {
+                    zlibLevel = 10;
                 }
             }
+        }
 
-            ZSTDScope scope(cctx);
-
-            for (int32_t i = (int32_t)ktx2Levels.size() - 1; i >= 0; --i) {
-                auto& level2 = ktx2Levels[i];
-                auto& level1 = dstImage.mipLevels[i];
+        ZSTDScope scope(cctx);
 
-                const uint8_t* levelData = dstImage.fileData + level1.offset;
+        for (int32_t i = (int32_t)ktx2Levels.size() - 1; i >= 0; --i) {
+            auto& level2 = ktx2Levels[i];
+            auto& level1 = dstImage.mipLevels[i];
 
-                // compress each mip
-                switch (info.compressor.compressorType) {
-                    case KTX2SupercompressionZstd: {
-                        // this resets the frame on each call
-                        compressedDataSize = ZSTD_compress2(cctx, compressedData.data(), compressedData.size(), levelData, level2.length);
+            const uint8_t* levelData = dstImage.fileData + level1.offset;
 
-                        if (ZSTD_isError(compressedDataSize)) {
-                            KLOGE("kram", "encode mip zstd failed");
-                            return false;
-                        }
-                        break;
-                    }
-                    case KTX2SupercompressionZlib: {
-                        mz_ulong dstSize = compressedData.size();
-                        if (mz_compress2(compressedData.data(), &dstSize, levelData, level2.length, zlibLevel) != MZ_OK) {
-                            KLOGE("kram", "encode mip zlib failed");
-                            return false;
-                        }
-                        compressedDataSize = dstSize;
+            // compress each mip
+            switch (compressor.compressorType) {
+                case KTX2SupercompressionZstd: {
+                    // this resets the frame on each call
+                    compressedDataSize = ZSTD_compress2(cctx, compressedData.data(), compressedData.size(), levelData, level2.length);
 
-                        break;
-                    }
-                    default:
-                        // should never get here
+                    if (ZSTD_isError(compressedDataSize)) {
+                        KLOGE("kram", "encode mip zstd failed");
                         return false;
+                    }
+                    break;
                 }
+                case KTX2SupercompressionZlib: {
+                    mz_ulong dstSize = compressedData.size();
+                    if (mz_compress2(compressedData.data(), &dstSize, levelData, level2.length, zlibLevel) != MZ_OK) {
+                        KLOGE("kram", "encode mip zlib failed");
+                        return false;
+                    }
+                    compressedDataSize = dstSize;
 
-                // also need for compressed levels?
-                // align the offset to leastCommonMultiple(4, texel_block_size);
-                if (lastImageByteOffset & 0x3) {
-                    lastImageByteOffset += 4 - (lastImageByteOffset & 0x3);
+                    break;
                 }
+                default:
+                    // should never get here
+                    return false;
+            }
 
-                level2.lengthCompressed = compressedDataSize;
-                level2.offset = lastImageByteOffset;
+            // also need for compressed levels?
+            // align the offset to leastCommonMultiple(4, texel_block_size);
+            if (lastImageByteOffset & 0x3) {
+                lastImageByteOffset += 4 - (lastImageByteOffset & 0x3);
+            }
 
-                lastImageByteOffset = level2.offset + level2.lengthCompressed;
+            level2.lengthCompressed = compressedDataSize;
+            level2.offset = lastImageByteOffset;
 
-                // write the mip
-                if (!writeDataAtOffset(compressedData.data(), compressedDataSize, level2.offset, dstFile, dstImage)) {
-                    return false;
-                }
-            }
+            lastImageByteOffset = level2.offset + level2.lengthCompressed;
 
-            // write out mip level size/offsets
-            if (!writeDataAtOffset((const uint8_t*)ktx2Levels.data(), vsizeof(ktx2Levels), levelByteOffset, dstFile, dstImage)) {
+            // write the mip
+            if (!writeDataAtOffset(compressedData.data(), compressedDataSize, level2.offset, dstFile, dstImage)) {
                 return false;
             }
         }
-    }
-    else {
-        // this is purely ktx1 output path
-        if (!writeKTX1FileOrImage(info, singleImage, mipConstructData, propsData, dstFile, dstImage)) {
+
+        // write out mip level size/offsets
+        if (!writeDataAtOffset((const uint8_t*)ktx2Levels.data(), vsizeof(ktx2Levels), levelByteOffset, dstFile, dstImage)) {
             return false;
         }
     }
-
+    
     return true;
 }
 
@@ -1791,9 +1781,14 @@ bool KramEncoder::writeKTX1FileOrImage(
     ImageInfo& info,
     Image& singleImage,
     MipConstructData& mipConstructData,
-    const vector<uint8_t>& propsData,
+    //const vector<uint8_t>& propsData,
     FILE* dstFile, KTXImage& dstImage) const
 {
+    // convert props into a data blob that can be written out
+    vector<uint8_t> propsData;
+    dstImage.toPropsData(propsData);
+    dstImage.header.bytesOfKeyValueData = (uint32_t)vsizeof(propsData);
+
     // recompute, it's had mips added into it above
     size_t mipOffset = sizeof(KTXHeader) + dstImage.header.bytesOfKeyValueData;
 
diff --git a/libkram/kram/KramImage.h b/libkram/kram/KramImage.h
index 18ba7b74..c76cc53a 100644
--- a/libkram/kram/KramImage.h
+++ b/libkram/kram/KramImage.h
@@ -131,6 +131,9 @@ class KramEncoder {
     // can save out to ktx1 directly, if say imported from dds
     bool saveKTX1(const KTXImage& image, FILE* dstFile) const;
 
+    // can save out to ktx2 directly, this can supercompress mips
+    bool saveKTX2(const KTXImage& srcImage, const KTX2Compressor& compressor, FILE* dstFile) const;
+    
 private:
     bool encodeImpl(ImageInfo& info, Image& singleImage, FILE* dstFile, KTXImage& dstImage) const;
 
@@ -158,7 +161,6 @@ class KramEncoder {
         ImageInfo& info,
         Image& singleImage,
         MipConstructData& mipConstructData,
-        const vector<uint8_t>& propsData,
         FILE* dstFile, KTXImage& dstImage) const;
 
     void addBaseProps(const ImageInfo& info, KTXImage& dstImage) const;

From 889a47d0a095329ad52731c505d26b2c9f0fdadf Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 1 Feb 2022 09:29:24 -0800
Subject: [PATCH 212/901] gtlf - fix rotation animation crash in release builds

Code would cast an array of floats to quatf which expects 16-byte alignment. GTLFVector3 is really packed_float3, but do the same there in case it is switched to float3.
---
 gtlf/GLTF/Source/GLTFAnimation.m | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/gtlf/GLTF/Source/GLTFAnimation.m b/gtlf/GLTF/Source/GLTFAnimation.m
index 7bde7e06..24ccd08f 100644
--- a/gtlf/GLTF/Source/GLTFAnimation.m
+++ b/gtlf/GLTF/Source/GLTFAnimation.m
@@ -115,18 +115,26 @@ - (void)runAtTime:(NSTimeInterval)time {
                     NSLog(@"WARNING: Only float accessors are supported for rotation animations. This will only be reported once.");
                 });
             }
-            const GLTFQuaternion *rotationValues = sampler.outputValues;
             
-            GLTFQuaternion previousRotation = rotationValues[previousKeyFrame];
-            GLTFQuaternion nextRotation = rotationValues[nextKeyFrame];
+            // This was using an offset of 12 into outputValues, and trying to cast
+            // as a simd type.  Use raw floats instead.
+            const float *rotationValues = sampler.outputValues;
+            const float *prev = &rotationValues[4 * previousKeyFrame];
+            const float *next = &rotationValues[4 * nextKeyFrame];
+            
+            GLTFQuaternion previousRotation = {{ prev[0], prev[1], prev[2], prev[3] }};
+            GLTFQuaternion nextRotation = {{ next[0], next[1], next[2], next[3] }};
             GLTFQuaternion interpRotation = simd_slerp(previousRotation, nextRotation, frameProgress);
 
             target.rotationQuaternion = interpRotation;
         } else if ([path isEqualToString:@"translation"]) {
-            const GLTFVector3 *translationValues = sampler.outputValues;
+            const float *translationValues = sampler.outputValues;
+            
+            const float *prev = &translationValues[3 * previousKeyFrame];
+            const float *next = &translationValues[3 * nextKeyFrame];
             
-            GLTFVector3 previousTranslation = translationValues[previousKeyFrame];
-            GLTFVector3 nextTranslation = translationValues[nextKeyFrame];
+            GLTFVector3 previousTranslation = { prev[0], prev[1], prev[2] };
+            GLTFVector3 nextTranslation = { next[0], next[1], next[2] };
             
             GLTFVector3 interpTranslation = (GLTFVector3) {
                 ((1 - frameProgress) * previousTranslation.x) + (frameProgress * nextTranslation.x),

From 81c11932fcfc438d44237fa4b399d7a0e59bf0c1 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 12 Feb 2022 20:44:07 -0800
Subject: [PATCH 213/901] kram - fix skipping mips

---
 libkram/kram/KTXImage.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index e8420b32..be8eb58c 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -1341,7 +1341,7 @@ void KTXImage::initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxS
 
     if (doMipmaps || needsDownsample) {
         bool keepMip =
-            (numSkippedMips >= (uint32_t)mipSkip) ||
+            (mipSkip != 0 && numSkippedMips >= (uint32_t)mipSkip) ||
             ((w >= mipMinSize && w <= mipMaxSize) &&
              (h >= mipMinSize && h <= mipMaxSize));
 

From e08419a04d29be53c606604343be5c331227402e Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 12 Feb 2022 20:54:51 -0800
Subject: [PATCH 214/901] kram - another fix to mipmin/mipmax and mipskip.

---
 libkram/kram/KTXImage.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index be8eb58c..6334e93f 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -1366,7 +1366,7 @@ void KTXImage::initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxS
             mipDown(w, h, d);
 
             keepMip =
-                (numSkippedMips >= (uint32_t)mipSkip) ||
+                (mipSkip != 0 && numSkippedMips >= (uint32_t)mipSkip) ||
                 ((w >= mipMinSize && w <= mipMaxSize) &&
                  (h >= mipMinSize && h <= mipMaxSize));
 

From 7f57bf4afc73843b6821c160f224a32d117a2a74 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 13 Feb 2022 11:08:09 -0800
Subject: [PATCH 215/901] Build - update cibuild.sh to build kramc too

---
 scripts/cibuild.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/cibuild.sh b/scripts/cibuild.sh
index 79e68c92..4d76cc0b 100755
--- a/scripts/cibuild.sh
+++ b/scripts/cibuild.sh
@@ -64,6 +64,7 @@ if [[ $buildType == macos ]]; then
 
 	xcodebuild build -sdk iphoneos -workspace kram.xcworkspace -scheme kram-ios CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
 	xcodebuild build -sdk macosx -workspace kram.xcworkspace -scheme kram CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
+	xcodebuild build -sdk macosx -workspace kramc.xcworkspace -scheme kram CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
 
 	xcodebuild install -sdk macosx -workspace kram.xcworkspace -scheme kramc DSTROOT=${binPath} INSTALL_PATH=
 	xcodebuild install -sdk macosx -workspace kram.xcworkspace -scheme kramv DSTROOT=${binPath} INSTALL_PATH=

From 9636a12c5b40bbdc7cb76b34c2838836de041b53 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 13 Feb 2022 11:09:44 -0800
Subject: [PATCH 216/901] Build - set the scheme not the workspace

---
 scripts/cibuild.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/cibuild.sh b/scripts/cibuild.sh
index 4d76cc0b..6ff54942 100755
--- a/scripts/cibuild.sh
+++ b/scripts/cibuild.sh
@@ -64,7 +64,7 @@ if [[ $buildType == macos ]]; then
 
 	xcodebuild build -sdk iphoneos -workspace kram.xcworkspace -scheme kram-ios CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
 	xcodebuild build -sdk macosx -workspace kram.xcworkspace -scheme kram CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
-	xcodebuild build -sdk macosx -workspace kramc.xcworkspace -scheme kram CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
+	xcodebuild build -sdk macosx -workspace kram.xcworkspace -scheme kramc CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
 
 	xcodebuild install -sdk macosx -workspace kram.xcworkspace -scheme kramc DSTROOT=${binPath} INSTALL_PATH=
 	xcodebuild install -sdk macosx -workspace kram.xcworkspace -scheme kramv DSTROOT=${binPath} INSTALL_PATH=

From e5634edf6c7d26ff909c9d3471869fcd0af8d307 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 13 Feb 2022 13:36:13 -0800
Subject: [PATCH 217/901] kram - fix cibuild.sh script using xcodebuild

xcodebuild is so frustrating. It says it succeeds, but install does't copy anything to the build directory if INSTALL_PATH is set to empty string.  So use the kram parent folder for DSTROOT, and bin for the INSTALL_PATH.  Ugh.
---
 scripts/cibuild.sh | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/scripts/cibuild.sh b/scripts/cibuild.sh
index 6ff54942..f9e58980 100755
--- a/scripts/cibuild.sh
+++ b/scripts/cibuild.sh
@@ -43,6 +43,7 @@ if [[ $buildType == macos ]]; then
 	binPath=$( cd "$(dirname "bin")" ; pwd -P )
 
 	# not sure why above doesn't include the folder name
+	binHolderPath="${binPath}"
 	binPath="${binPath}/bin"
 else
 	binPath=$(realpath bin)
@@ -62,12 +63,16 @@ if [[ $buildType == macos ]]; then
 	# this dir already exists, so don't have to mkdir
 	pushd build2
 
-	xcodebuild build -sdk iphoneos -workspace kram.xcworkspace -scheme kram-ios CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
-	xcodebuild build -sdk macosx -workspace kram.xcworkspace -scheme kram CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
-	xcodebuild build -sdk macosx -workspace kram.xcworkspace -scheme kramc CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
-
-	xcodebuild install -sdk macosx -workspace kram.xcworkspace -scheme kramc DSTROOT=${binPath} INSTALL_PATH=
-	xcodebuild install -sdk macosx -workspace kram.xcworkspace -scheme kramv DSTROOT=${binPath} INSTALL_PATH=
+	# build libraries
+	# see here about destination arg
+	# https://github.com/appcelerator/titanium_mobile/pull/13098
+	xcodebuild build -sdk iphoneos -workspace kram.xcworkspace -scheme kram-ios -configuration Release -destination generic/platform=iOS CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
+	xcodebuild build -sdk macosx -workspace kram.xcworkspace -scheme kram -configuration Release -destination generic/platform=macOS CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
+	
+	# install apps so they are signed
+	# can't specify empty INSTALL_PATH, or xcodebuild succeeds but copies nothing to bin
+	xcodebuild install -sdk macosx -workspace kram.xcworkspace -scheme kramc -configuration Release -destination generic/platform=macOS DSTROOT=${binHolderPath} INSTALL_PATH=bin
+	xcodebuild install -sdk macosx -workspace kram.xcworkspace -scheme kramv -configuration Release -destination generic/platform=macOS DSTROOT=${binHolderPath} INSTALL_PATH=bin
 
 	popd
 

From 36e15987c37b6aca726cf8ac05fe25dca7d79720 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 13 Feb 2022 14:36:30 -0800
Subject: [PATCH 218/901] kramv - switch from image/vnd-ms-dds to image/dds
 mimeType

macOS kept indicating that it wasn't sure that kramv could handle a dds file.  Since this is all based on obscure and out-of-date mimeTypes, I keep having to guess at which one Apple wants.
---
 kram-preview/Info.plist | 2 +-
 kram-thumb/Info.plist   | 2 +-
 kramv/Info.plist        | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/kram-preview/Info.plist b/kram-preview/Info.plist
index 303b35ec..4c2a7678 100644
--- a/kram-preview/Info.plist
+++ b/kram-preview/Info.plist
@@ -30,7 +30,7 @@
 			<array>
 				<string>org.khronos.ktx</string>
 				<string>public.ktx2</string>
-				<string>image/vnd-ms.dds</string>
+				<string>image/dds</string>
 			</array>
 			<key>QLSupportsSearchableItems</key>
 			<true/>
diff --git a/kram-thumb/Info.plist b/kram-thumb/Info.plist
index 388f405e..61778c92 100644
--- a/kram-thumb/Info.plist
+++ b/kram-thumb/Info.plist
@@ -30,7 +30,7 @@
 			<array>
 				<string>org.khronos.ktx</string>
 				<string>public.ktx2</string>
-				<string>image/vnd-ms.dds</string>
+				<string>image/dds</string>
 			</array>
 			<key>QLThumbnailMinimumDimension</key>
 			<integer>0</integer>
diff --git a/kramv/Info.plist b/kramv/Info.plist
index b2afa4c7..ce247db3 100644
--- a/kramv/Info.plist
+++ b/kramv/Info.plist
@@ -129,7 +129,7 @@
 			<string>Default</string>
 			<key>LSItemContentTypes</key>
 			<array>
-				<string>image/vnd-ms.dds</string>
+				<string>image/dds</string>
 			</array>
 			<key>NSDocumentClass</key>
 			<string>KramDocument</string>

From a573c8f9afa7fa3f8102d1e1a1e57a29ce914780 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 13 Feb 2022 22:02:49 -0800
Subject: [PATCH 219/901] kramv - add xxx- to supported preview extension
 names, go back to using thumbnail for the preview instead

There's so little documentation on how to do preview extension on macOS.  This is dead-simple code that loads an image into a CGImage then to the NSImage of an NSImageView.  Setting a red background on the NSImageView shows up 30 to 50% of the time, and the rest of the time nothing shows up.  I've used qlManage to make sure that the end of the code is reached since it will display text from error/print messages, and cavmeman debugging shows the numbers all check out.  The Quick Look Simulator has no docs, and is worthless when hitting run from what I can tell.   It does nothing, not even searching the drive to display a preview.

Add a ViewController to keep QuickLook happy.
---
 .../Base.lproj/KramPreviewViewController.xib      | 15 ++++++++++-----
 kram-preview/Info.plist                           |  8 ++++----
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/kram-preview/Base.lproj/KramPreviewViewController.xib b/kram-preview/Base.lproj/KramPreviewViewController.xib
index 0272e17f..9a091b88 100644
--- a/kram-preview/Base.lproj/KramPreviewViewController.xib
+++ b/kram-preview/Base.lproj/KramPreviewViewController.xib
@@ -1,21 +1,26 @@
 <?xml version="1.0" encoding="UTF-8"?>
-<document type="com.apple.InterfaceBuilder3.Cocoa.XIB" version="3.0" toolsVersion="19162" targetRuntime="MacOSX.Cocoa" propertyAccessControl="none" useAutolayout="YES" customObjectInstantitationMethod="direct">
+<document type="com.apple.InterfaceBuilder3.Cocoa.XIB" version="3.0" toolsVersion="19455" targetRuntime="MacOSX.Cocoa" propertyAccessControl="none" useAutolayout="YES" customObjectInstantitationMethod="direct">
     <dependencies>
         <deployment identifier="macosx"/>
-        <plugIn identifier="com.apple.InterfaceBuilder.CocoaPlugin" version="19162"/>
+        <plugIn identifier="com.apple.InterfaceBuilder.CocoaPlugin" version="19455"/>
         <capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
     </dependencies>
     <objects>
         <customObject id="-2" userLabel="File's Owner" customClass="KramPreviewViewController">
             <connections>
-                <outlet property="view" destination="c22-O7-iKe" id="49o-T0-h9c"/>
+                <outlet property="view" destination="c22-O7-iKe" id="2PL-BV-beL"/>
             </connections>
         </customObject>
         <customObject id="-1" userLabel="First Responder" customClass="FirstResponder"/>
         <customObject id="-3" userLabel="Application" customClass="NSObject"/>
-        <customView id="c22-O7-iKe" userLabel="Preview View" customClass="NSImageView">
+        <viewController id="4Wn-BW-n32" userLabel="KramPreviewViewController" customClass="KramPreviewViewController">
+            <connections>
+                <outlet property="view" destination="c22-O7-iKe" id="xPG-l6-AGE"/>
+            </connections>
+        </viewController>
+        <customView focusRingType="none" id="c22-O7-iKe" userLabel="KramPreviewView" customClass="NSImageView">
             <rect key="frame" x="0.0" y="0.0" width="256" height="256"/>
-            <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMinY="YES"/>
+            <autoresizingMask key="autoresizingMask"/>
             <point key="canvasLocation" x="18" y="120"/>
         </customView>
     </objects>
diff --git a/kram-preview/Info.plist b/kram-preview/Info.plist
index 4c2a7678..fd9b42f4 100644
--- a/kram-preview/Info.plist
+++ b/kram-preview/Info.plist
@@ -28,12 +28,12 @@
 		<dict>
 			<key>QLSupportedContentTypes</key>
 			<array>
-				<string>org.khronos.ktx</string>
-				<string>public.ktx2</string>
-				<string>image/dds</string>
+				<string>xxx-org.khronos.ktx</string>
+				<string>xxx-public.ktx2</string>
+				<string>xxx-image/dds</string>
 			</array>
 			<key>QLSupportsSearchableItems</key>
-			<true/>
+			<false/>
 		</dict>
 		<key>NSExtensionPointIdentifier</key>
 		<string>com.apple.quicklook.preview</string>

From ce9a9ad0e8b7f0bc1060a6ebf0cbf1e48601d3a6 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 13 Feb 2022 23:47:25 -0800
Subject: [PATCH 220/901] kramv - fix the preview extension

Now can preview ktx/dds/ktx2 files supported by kram in their full glory on macOS.   The extension tries to find the closest mip to the size when the preview is opened.  Can then quickly iterate through a directory seeing all the files.  Will add more features to the preview that just isn't possible with a cached thumbnail.

Re-enabled the extensions.
---
 .../Base.lproj/KramPreviewViewController.xib  |  9 +---
 kram-preview/Info.plist                       |  6 +--
 kram-preview/KramPreviewViewController.mm     | 51 +++++++++++++------
 3 files changed, 41 insertions(+), 25 deletions(-)

diff --git a/kram-preview/Base.lproj/KramPreviewViewController.xib b/kram-preview/Base.lproj/KramPreviewViewController.xib
index 9a091b88..99d6de6e 100644
--- a/kram-preview/Base.lproj/KramPreviewViewController.xib
+++ b/kram-preview/Base.lproj/KramPreviewViewController.xib
@@ -13,14 +13,9 @@
         </customObject>
         <customObject id="-1" userLabel="First Responder" customClass="FirstResponder"/>
         <customObject id="-3" userLabel="Application" customClass="NSObject"/>
-        <viewController id="4Wn-BW-n32" userLabel="KramPreviewViewController" customClass="KramPreviewViewController">
-            <connections>
-                <outlet property="view" destination="c22-O7-iKe" id="xPG-l6-AGE"/>
-            </connections>
-        </viewController>
-        <customView focusRingType="none" id="c22-O7-iKe" userLabel="KramPreviewView" customClass="NSImageView">
+        <customView focusRingType="none" id="c22-O7-iKe" userLabel="KramPreviewView">
             <rect key="frame" x="0.0" y="0.0" width="256" height="256"/>
-            <autoresizingMask key="autoresizingMask"/>
+            <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
             <point key="canvasLocation" x="18" y="120"/>
         </customView>
     </objects>
diff --git a/kram-preview/Info.plist b/kram-preview/Info.plist
index fd9b42f4..7daa5edc 100644
--- a/kram-preview/Info.plist
+++ b/kram-preview/Info.plist
@@ -28,9 +28,9 @@
 		<dict>
 			<key>QLSupportedContentTypes</key>
 			<array>
-				<string>xxx-org.khronos.ktx</string>
-				<string>xxx-public.ktx2</string>
-				<string>xxx-image/dds</string>
+				<string>org.khronos.ktx</string>
+				<string>public.ktx2</string>
+				<string>image/dds</string>
 			</array>
 			<key>QLSupportsSearchableItems</key>
 			<false/>
diff --git a/kram-preview/KramPreviewViewController.mm b/kram-preview/KramPreviewViewController.mm
index a7ae8c33..d1b08c5c 100644
--- a/kram-preview/KramPreviewViewController.mm
+++ b/kram-preview/KramPreviewViewController.mm
@@ -37,7 +37,9 @@
 @interface KramPreviewViewController () <QLPreviewingController>
 @end
 
-@implementation KramPreviewViewController
+@implementation KramPreviewViewController {
+    NSImageView* _imageView;
+}
 
 - (NSString *)nibName {
     return @"KramPreviewViewController";
@@ -46,6 +48,22 @@ - (NSString *)nibName {
 - (void)loadView {
     [super loadView];
     // Do any additional setup after loading the view.
+    
+    _imageView = [[NSImageView alloc] initWithFrame:self.view.frame];
+    [_imageView setTranslatesAutoresizingMaskIntoConstraints:NO]; //Required to opt-in to autolayout
+
+    _imageView.imageScaling = NSImageScaleProportionallyUpOrDown;
+    
+    [self.view addSubview: _imageView];
+    
+    NSDictionary* views = @{@"myview": _imageView};
+    [self.view addConstraints:[NSLayoutConstraint
+                               constraintsWithVisualFormat:@"H:|[myview]|" options:0 metrics:nil
+                               views:views]];
+    [self.view addConstraints:[NSLayoutConstraint
+                               constraintsWithVisualFormat:@"V:|[myview]|" options:0 metrics:nil
+                                views:views]];
+    //[NSLayoutConstraint activateConstraints: self.view.constraints];
 }
 
 /*
@@ -64,18 +82,24 @@ - (void)preparePreviewOfSearchableItemWithIdentifier:(NSString *)identifier quer
 
 - (void)preparePreviewOfFileAtURL:(NSURL *)url completionHandler:(void (^)(NSError * _Nullable))handler {
     
+    NSError* error = nil;
+    const char* filename = [url fileSystemRepresentation];
+
+//    if (![_imageView isKindOfClass:[NSImageView class]]) {
+//        error = KLOGF(9, "kramv %s expected NSImageView \n", filename);
+//        handler(error);
+//        return;
+//    }
+    
     // Add the supported content types to the QLSupportedContentTypes array in the Info.plist of the extension.
     // Perform any setup necessary in order to prepare the view.
     
     // The following is adapted out of Thumbnailer
     
     // No request here, may need to use view size
-    uint32_t maxWidth = self.view.frame.size.width;
-    uint32_t maxHeight = self.view.frame.size.height;
+    uint32_t maxWidth = _imageView.frame.size.width;
+    uint32_t maxHeight = _imageView.frame.size.height;
     
-    const char* filename = [url fileSystemRepresentation];
-
-    NSError* error = nil;
     
     bool isKTX = isKTXFilename(filename);
     bool isKTX2 = isKTX2Filename(filename);
@@ -224,7 +248,7 @@ - (void)preparePreviewOfFileAtURL:(NSURL *)url completionHandler:(void (^)(NSErr
     // This doesn't allocate, but in an imageView that must outlast the handle call, does that work?
     
     vImage_Error err = 0;
-    CGImageRef cgImage = vImageCreateCGImageFromBuffer(&buf, &format, NULL, NULL, kvImageNoAllocate, &err);
+    CGImageRef cgImage = vImageCreateCGImageFromBuffer(&buf, &format, NULL, NULL, /*kvImageNoAllocate*/ kvImageNoFlags, &err);
     if (err) {
         error = KLOGF(8, "kramv %s failed create cgimage\n", filename);
         handler(error);
@@ -234,13 +258,9 @@ - (void)preparePreviewOfFileAtURL:(NSURL *)url completionHandler:(void (^)(NSErr
 
     NSImage* nsImage = [[NSImage alloc] initWithCGImage:cgImage size:rect.size];
     
-    if (![self.view isKindOfClass:[NSImageView class]]) {
-        error = KLOGF(9, "kramv %s expected NSImageView \n", filename);
-        handler(error);
-        return;
-    }
     
-    NSImageView* nsImageView = (NSImageView*)self.view;
+    
+    NSImageView* nsImageView = _imageView; // (NSImageView*)self.view;
     
     // Copositing is like it's using NSCompositeCopy instead of SourceOver
     // The default is NSCompositeSourceOver. NSRectFill() ignores
@@ -248,8 +268,9 @@ - (void)preparePreviewOfFileAtURL:(NSURL *)url completionHandler:(void (^)(NSErr
     // So may have to use NSFillRect which uses SourceOver
     // https://cocoadev.github.io/NSCompositingOperation/
     
-    // no frame
-    nsImageView.imageFrameStyle = NSImageFrameNone;
+    // no frame, already the default
+    // nsImageView.imageFrameStyle = NSImageFrameNone;
+    
     // Change default white to transparent
     [nsImageView.layer setBackgroundColor: [NSColor blackColor].CGColor];
     //[nsImageView.layer setBackgroundColor: [NSColor clearColor].CGColor];

From c725cf3289eff39287f49940de4f7b836b8b05d4 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 13 Feb 2022 23:56:46 -0800
Subject: [PATCH 221/901] kramv - fix CGRelease now that CGImage owns the
 pixels.

Before this was trying to use kFlagsNoAllocate like the thumbnails, but the NSImage/CGImage have to live on past the load call.   The CGRelease no longer causes a failure, and is important for proper memory management.
---
 kram-preview/KramPreviewViewController.mm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kram-preview/KramPreviewViewController.mm b/kram-preview/KramPreviewViewController.mm
index d1b08c5c..539d8949 100644
--- a/kram-preview/KramPreviewViewController.mm
+++ b/kram-preview/KramPreviewViewController.mm
@@ -278,7 +278,7 @@ - (void)preparePreviewOfFileAtURL:(NSURL *)url completionHandler:(void (^)(NSErr
     nsImageView.image = nsImage;
 
     // This seems to cause plugin to fail
-    //CGImageRelease(cgImage);
+    CGImageRelease(cgImage);
     
     // TODO: could add description with info from texture (format, etc)
     // self.textView.text = ...

From 4148617be80bcc4877d85356ff3f64e425bd1cff Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 14 Feb 2022 09:27:24 -0800
Subject: [PATCH 222/901] kramv - fix background clear on preview

Tried switching blend on thumbnails too.  The alpha on ColorMap-a.ktx is wrong when decoded and composited in non-opaque alpha regions.  Unclear how to clear context.
---
 kram-preview/KramPreviewViewController.mm | 30 ++++++++++++++---------
 kram-thumb/KramThumbnailProvider.mm       | 12 +++++----
 2 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/kram-preview/KramPreviewViewController.mm b/kram-preview/KramPreviewViewController.mm
index 539d8949..c8daf5af 100644
--- a/kram-preview/KramPreviewViewController.mm
+++ b/kram-preview/KramPreviewViewController.mm
@@ -52,6 +52,9 @@ - (void)loadView {
     _imageView = [[NSImageView alloc] initWithFrame:self.view.frame];
     [_imageView setTranslatesAutoresizingMaskIntoConstraints:NO]; //Required to opt-in to autolayout
 
+    // no frame, already the default
+    // _imageView.imageFrameStyle = NSImageFrameNone;
+    
     _imageView.imageScaling = NSImageScaleProportionallyUpOrDown;
     
     [self.view addSubview: _imageView];
@@ -66,6 +69,15 @@ - (void)loadView {
     //[NSLayoutConstraint activateConstraints: self.view.constraints];
 }
 
+// This isn't a view, but hoping this is called
+- (void)viewDidAppear {
+    [super viewDidAppear];
+    
+    // this must be called after layer is ready
+    //self.view.layer.backgroundColor = [NSColor blackColor].CGColor;
+    _imageView.layer.backgroundColor = [NSColor blackColor].CGColor;
+}
+
 /*
  * Implement this method and set QLSupportsSearchableItems to YES in the Info.plist of the extension if you support CoreSpotlight.
  *
@@ -246,9 +258,10 @@ - (void)preparePreviewOfFileAtURL:(NSURL *)url completionHandler:(void (^)(NSErr
     // see how big thumbs.db is after running this
     
     // This doesn't allocate, but in an imageView that must outlast the handle call, does that work?
+    bool skipPixelCopy = false;
     
     vImage_Error err = 0;
-    CGImageRef cgImage = vImageCreateCGImageFromBuffer(&buf, &format, NULL, NULL, /*kvImageNoAllocate*/ kvImageNoFlags, &err);
+    CGImageRef cgImage = vImageCreateCGImageFromBuffer(&buf, &format, NULL, NULL, skipPixelCopy ? kvImageNoAllocate : kvImageNoFlags, &err);
     if (err) {
         error = KLOGF(8, "kramv %s failed create cgimage\n", filename);
         handler(error);
@@ -258,8 +271,6 @@ - (void)preparePreviewOfFileAtURL:(NSURL *)url completionHandler:(void (^)(NSErr
 
     NSImage* nsImage = [[NSImage alloc] initWithCGImage:cgImage size:rect.size];
     
-    
-    
     NSImageView* nsImageView = _imageView; // (NSImageView*)self.view;
     
     // Copositing is like it's using NSCompositeCopy instead of SourceOver
@@ -268,17 +279,12 @@ - (void)preparePreviewOfFileAtURL:(NSURL *)url completionHandler:(void (^)(NSErr
     // So may have to use NSFillRect which uses SourceOver
     // https://cocoadev.github.io/NSCompositingOperation/
     
-    // no frame, already the default
-    // nsImageView.imageFrameStyle = NSImageFrameNone;
-    
-    // Change default white to transparent
-    [nsImageView.layer setBackgroundColor: [NSColor blackColor].CGColor];
-    //[nsImageView.layer setBackgroundColor: [NSColor clearColor].CGColor];
-    
     nsImageView.image = nsImage;
 
-    // This seems to cause plugin to fail
-    CGImageRelease(cgImage);
+    // This seems to cause plugin to fail with NoAllocate set
+    // This leaks a CGImageRef, but the CGImage doesn't hold any memory w/NoAllocate.
+    if (!skipPixelCopy)
+        CGImageRelease(cgImage);
     
     // TODO: could add description with info from texture (format, etc)
     // self.textView.text = ...
diff --git a/kram-thumb/KramThumbnailProvider.mm b/kram-thumb/KramThumbnailProvider.mm
index bdd1ad7d..e2ca0114 100644
--- a/kram-thumb/KramThumbnailProvider.mm
+++ b/kram-thumb/KramThumbnailProvider.mm
@@ -234,9 +234,10 @@ - (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest *)request complet
         format.colorSpace = isSrgb ? CGColorSpaceCreateWithName(kCGColorSpaceSRGB) : CGColorSpaceCreateDeviceRGB();
         
         // don't need to allocate, can reuse memory from mip
-
+        bool skipPixelCopy = true;
+        
         vImage_Error err = 0;
-        CGImageRef cgImage = vImageCreateCGImageFromBuffer(&buf, &format, NULL, NULL, kvImageNoAllocate, &err);
+        CGImageRef cgImage = vImageCreateCGImageFromBuffer(&buf, &format, NULL, NULL, skipPixelCopy ? kvImageNoAllocate : kvImageNoFlags, &err);
         if (err) {
             // Can't return NSError
             //error = KLOGF("kramv %s failed create cgimage\n", filename);
@@ -252,14 +253,15 @@ - (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest *)request complet
         
         // The image is scaled—disproportionately
         
-        //CGContextSetBlendMode(context, kCGBlendModeCopy);
-        CGContextSetBlendMode(context, kCGBlendModeNormal);
+        CGContextSetBlendMode(context, kCGBlendModeCopy);
+        //CGContextSetBlendMode(context, kCGBlendModeNormal);
         
         CGContextDrawImage(context, rect, cgImage);
 
         // This seems to cause plugin to fail
         // Needed?
-        //CGImageRelease(cgImage);
+        if (!skipPixelCopy)
+            CGImageRelease(cgImage);
         
         return YES;
      }], nil);

From db0a811aa66b8c57ceb6b7688176461f58c08ca8 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 14 Feb 2022 10:30:34 -0800
Subject: [PATCH 223/901] kramv - fix thumbnail background color

---
 kram-thumb/KramThumbnailProvider.mm | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/kram-thumb/KramThumbnailProvider.mm b/kram-thumb/KramThumbnailProvider.mm
index e2ca0114..3fe92ffb 100644
--- a/kram-thumb/KramThumbnailProvider.mm
+++ b/kram-thumb/KramThumbnailProvider.mm
@@ -248,6 +248,11 @@ - (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest *)request complet
                                  (uint32_t)roundf(contextSize.width * requestScale),
                                  (uint32_t)roundf(contextSize.height * requestScale));
 
+        // Default is white, but that messes up all content that uses alpha
+        // and doesn't match the preview code or kramv background (or Preview).
+        CGContextSetFillColorWithColor(context, CGColorGetConstantColor(kCGColorBlack));
+        CGContextFillRect(context, rect);
+        
         // TODO: should this clear to NSColor clearColor ?
         // don't want default white?
         

From 15451e6c0731619ff0b78bced43d00acd8a3660f Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 14 Feb 2022 19:19:16 -0800
Subject: [PATCH 224/901] kram - fix saveKTX2

This code was split off, but was refrencing dummyImage data instead of the src data.   Now ktx2 export should work.
---
 libkram/kram/KramImage.cpp | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index 1b3a3745..f9e4bc02 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -1575,7 +1575,7 @@ bool KramEncoder::saveKTX2(const KTXImage& srcImage, const KTX2Compressor& compr
     // now convert from ktx1 to ktx2
     const KTXHeader& header = srcImage.header;
     
-    KTXImage dstImage; // unused, just passed to reference
+    KTXImage dummyImage; // unused, just passed to reference
     
     KTX2Header header2;
 
@@ -1611,26 +1611,26 @@ bool KramEncoder::saveKTX2(const KTXImage& srcImage, const KTX2Compressor& compr
     header2.sgdByteLength = vsizeof(sgdData);
 
     // write the header
-    if (!writeDataAtOffset((const uint8_t*)&header2, sizeof(KTX2Header), 0, dstFile, dstImage)) {
+    if (!writeDataAtOffset((const uint8_t*)&header2, sizeof(KTX2Header), 0, dstFile, dummyImage)) {
         return false;
     }
 
     // next are levels, but those are written out later
 
     // write the dfd
-    if (!writeDataAtOffset((const uint8_t*)&dfdData, dfdData.totalSize, header2.dfdByteOffset, dstFile, dstImage)) {
+    if (!writeDataAtOffset((const uint8_t*)&dfdData, dfdData.totalSize, header2.dfdByteOffset, dstFile, dummyImage)) {
         return false;
     }
 
     // write the props
-    if (!writeDataAtOffset(propsData.data(), vsizeof(propsData), header2.kvdByteOffset, dstFile, dstImage)) {
+    if (!writeDataAtOffset(propsData.data(), vsizeof(propsData), header2.kvdByteOffset, dstFile, dummyImage)) {
         return false;
     }
 
     // skip supercompression block
     if (!sgdData.empty()) {
         // TODO: align(8) sgdPadding
-        if (!writeDataAtOffset(sgdData.data(), vsizeof(sgdData), header2.sgdByteOffset, dstFile, dstImage)) {
+        if (!writeDataAtOffset(sgdData.data(), vsizeof(sgdData), header2.sgdByteOffset, dstFile, dummyImage)) {
             return false;
         }
     }
@@ -1644,7 +1644,7 @@ bool KramEncoder::saveKTX2(const KTXImage& srcImage, const KTX2Compressor& compr
     size_t lastImageByteOffset = imageByteOffset;
 
     uint32_t numChunks = srcImage.totalChunks();
-    vector<KTXImageLevel> ktx2Levels(dstImage.mipLevels);
+    vector<KTXImageLevel> ktx2Levels(srcImage.mipLevels);
     for (int32_t i = ktx2Levels.size() - 1; i >= 0; --i) {
         // align the offset to leastCommonMultiple(4, texel_block_size);
         if (lastImageByteOffset & 0x3) {
@@ -1660,16 +1660,16 @@ bool KramEncoder::saveKTX2(const KTXImage& srcImage, const KTX2Compressor& compr
     }
 
     if (!compressor.isCompressed()) {
-        if (!writeDataAtOffset((const uint8_t*)ktx2Levels.data(), vsizeof(ktx2Levels), levelByteOffset, dstFile, dstImage)) {
+        if (!writeDataAtOffset((const uint8_t*)ktx2Levels.data(), vsizeof(ktx2Levels), levelByteOffset, dstFile, dummyImage)) {
             return false;
         }
 
         // write the levels out
         for (int32_t i = 0; i < (int32_t)ktx2Levels.size(); ++i) {
             auto& level2 = ktx2Levels[i];
-            auto& level1 = dstImage.mipLevels[i];
+            const auto& level1 = srcImage.mipLevels[i];
 
-            if (!writeDataAtOffset(dstImage.fileData + level1.offset, level2.length, level2.offset, dstFile, dstImage)) {
+            if (!writeDataAtOffset(srcImage.fileData + level1.offset, level2.length, level2.offset, dstFile, dummyImage)) {
                 return false;
             }
         }
@@ -1720,9 +1720,9 @@ bool KramEncoder::saveKTX2(const KTXImage& srcImage, const KTX2Compressor& compr
 
         for (int32_t i = (int32_t)ktx2Levels.size() - 1; i >= 0; --i) {
             auto& level2 = ktx2Levels[i];
-            auto& level1 = dstImage.mipLevels[i];
+            const auto& level1 = srcImage.mipLevels[i];
 
-            const uint8_t* levelData = dstImage.fileData + level1.offset;
+            const uint8_t* levelData = srcImage.fileData + level1.offset;
 
             // compress each mip
             switch (compressor.compressorType) {
@@ -1763,13 +1763,13 @@ bool KramEncoder::saveKTX2(const KTXImage& srcImage, const KTX2Compressor& compr
             lastImageByteOffset = level2.offset + level2.lengthCompressed;
 
             // write the mip
-            if (!writeDataAtOffset(compressedData.data(), compressedDataSize, level2.offset, dstFile, dstImage)) {
+            if (!writeDataAtOffset(compressedData.data(), compressedDataSize, level2.offset, dstFile, dummyImage)) {
                 return false;
             }
         }
 
         // write out mip level size/offsets
-        if (!writeDataAtOffset((const uint8_t*)ktx2Levels.data(), vsizeof(ktx2Levels), levelByteOffset, dstFile, dstImage)) {
+        if (!writeDataAtOffset((const uint8_t*)ktx2Levels.data(), vsizeof(ktx2Levels), levelByteOffset, dstFile, dummyImage)) {
             return false;
         }
     }
@@ -1850,7 +1850,7 @@ bool KramEncoder::saveKTX1(const KTXImage& image, FILE* dstFile) const {
     }
 
     // This is unused
-    KTXImage dstImage;
+    KTXImage dummyImage;
     
     vector<uint8_t> propsData;
     image.toPropsData(propsData);
@@ -1858,13 +1858,13 @@ bool KramEncoder::saveKTX1(const KTXImage& image, FILE* dstFile) const {
     
     uint32_t dstOffset = 0;
     
-    if (!writeDataAtOffset((const uint8_t*)&headerCopy, sizeof(KTXHeader), 0, dstFile, dstImage)) {
+    if (!writeDataAtOffset((const uint8_t*)&headerCopy, sizeof(KTXHeader), 0, dstFile, dummyImage)) {
         return false;
     }
     dstOffset += sizeof(KTXHeader);
     
     // write out the props
-    if (!writeDataAtOffset(propsData.data(), headerCopy.bytesOfKeyValueData, sizeof(KTXHeader), dstFile, dstImage)) {
+    if (!writeDataAtOffset(propsData.data(), headerCopy.bytesOfKeyValueData, sizeof(KTXHeader), dstFile, dummyImage)) {
         return false;
     }
     dstOffset += headerCopy.bytesOfKeyValueData;
@@ -1892,13 +1892,13 @@ bool KramEncoder::saveKTX1(const KTXImage& image, FILE* dstFile) const {
         size_t chunkOffset = image.chunkOffset(mipNum, 0);
         
         // write length of mip
-        if (!writeDataAtOffset((const uint8_t*)&mipStorageSize, sizeof(uint32_t), dstOffset, dstFile, dstImage)) {
+        if (!writeDataAtOffset((const uint8_t*)&mipStorageSize, sizeof(uint32_t), dstOffset, dstFile, dummyImage)) {
             return false;
         }
         dstOffset += sizeof(uint32_t);
         
         // write the level pixels
-        if (!writeDataAtOffset(mipLevelData + chunkOffset, levelDataSize, dstOffset, dstFile, dstImage)) {
+        if (!writeDataAtOffset(mipLevelData + chunkOffset, levelDataSize, dstOffset, dstFile, dummyImage)) {
             return false;
         }
         dstOffset += levelDataSize;

From fbd63cd93c41d1c2029aae89722f08308691970d Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 14 Feb 2022 19:20:14 -0800
Subject: [PATCH 225/901] kramv - switch to Normal blend (to black) on
 Thumbnails

kCGBlendModeCopy call leads to weird colors.
---
 kram-thumb/KramThumbnailProvider.mm | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kram-thumb/KramThumbnailProvider.mm b/kram-thumb/KramThumbnailProvider.mm
index 3fe92ffb..ba0787dc 100644
--- a/kram-thumb/KramThumbnailProvider.mm
+++ b/kram-thumb/KramThumbnailProvider.mm
@@ -258,8 +258,8 @@ - (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest *)request complet
         
         // The image is scaled—disproportionately
         
-        CGContextSetBlendMode(context, kCGBlendModeCopy);
-        //CGContextSetBlendMode(context, kCGBlendModeNormal);
+        //CGContextSetBlendMode(context, kCGBlendModeCopy);
+        CGContextSetBlendMode(context, kCGBlendModeNormal);
         
         CGContextDrawImage(context, rect, cgImage);
 

From 1722cce71d29c9074ef6dd563f4a242572cab206 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 16 Feb 2022 10:22:19 -0800
Subject: [PATCH 226/901] DDS - add ASTC/ETC2 formats

The ASTC LDR DXGI_Formats are from Microsoft.  The ASTC HDR formats are guessed, and the ETC2 are made up.  Microsoft should really support mobile and and extensible file format here.  KTX/KTX2 are much more flexible and simple to deal with.
---
 libkram/kram/KTXImage.cpp | 345 +++++++++++++++++++++++---------------
 1 file changed, 212 insertions(+), 133 deletions(-)

diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index 6334e93f..67670fee 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -44,111 +44,190 @@ const uint8_t kKTX2Identifier[kKTX2IdentifierSize] = {
 
 //---------------------------------------------------
 
+// making this up, since Microsoft doesn't publish these constants
+const uint32_t DXGI_FORMAT_ETC2_OFFSET = 50;
+
 // enum based on dxgiformat.h
 // Copyright (C) Microsoft.  All rights reserved.
+// TODO: (renamte to DXGI_FORMAT_FORMAT) so easier to search for all
 enum DXFormat : uint32_t
 {
-    DX_UNKNOWN                                 = 0,
+    DXGI_FORMAT_UNKNOWN                                 = 0,
+    
+    //DXGI_FORMAT_R32G32B32A32_TYPELESS                   = 1,
+    DXGI_FORMAT_R32G32B32A32_FLOAT                      = 2,
+    DXGI_FORMAT_R32G32B32A32_UINT                       = 3,
+    DXGI_FORMAT_R32G32B32A32_SINT                       = 4,
+    //DXGI_FORMAT_R32G32B32_TYPELESS                      = 5,
+    DXGI_FORMAT_R32G32B32_FLOAT                         = 6,
+    DXGI_FORMAT_R32G32B32_UINT                          = 7,
+    DXGI_FORMAT_R32G32B32_SINT                          = 8,
+    
+    //DXGI_FORMAT_R16G16B16A16_TYPELESS                   = 9,
+    DXGI_FORMAT_R16G16B16A16_FLOAT                      = 10,
+    DXGI_FORMAT_R16G16B16A16_UNORM                      = 11,
+    DXGI_FORMAT_R16G16B16A16_UINT                       = 12,
+    DXGI_FORMAT_R16G16B16A16_SNORM                      = 13,
+    DXGI_FORMAT_R16G16B16A16_SINT                       = 14,
     
-    //DX_R32G32B32A32_TYPELESS                   = 1,
-    DX_R32G32B32A32_FLOAT                      = 2,
-    DX_R32G32B32A32_UINT                       = 3,
-    DX_R32G32B32A32_SINT                       = 4,
-    //DX_R32G32B32_TYPELESS                      = 5,
-    DX_R32G32B32_FLOAT                         = 6,
-    DX_R32G32B32_UINT                          = 7,
-    DX_R32G32B32_SINT                          = 8,
+    //DXGI_FORMAT_R32G32_TYPELESS                         = 15,
+    DXGI_FORMAT_R32G32_FLOAT                            = 16,
+    DXGI_FORMAT_R32G32_UINT                             = 17,
+    DXGI_FORMAT_R32G32_SINT                             = 18,
+    //DXGI_FORMAT_R32G8X24_TYPELESS                       = 19,
+    //DXGI_FORMAT_D32_FLOAT_S8X24_UINT                    = 20,
+    //DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS                = 21,
+    //DXGI_FORMAT_X32_TYPELESS_G8X24_UINT                 = 22,
     
-    //DX_R16G16B16A16_TYPELESS                   = 9,
-    DX_R16G16B16A16_FLOAT                      = 10,
-    DX_R16G16B16A16_UNORM                      = 11,
-    DX_R16G16B16A16_UINT                       = 12,
-    DX_R16G16B16A16_SNORM                      = 13,
-    DX_R16G16B16A16_SINT                       = 14,
+    //DXGI_FORMAT_R10G10B10A2_TYPELESS                    = 23,
+    //DXGI_FORMAT_R10G10B10A2_UNORM                       = 24,
+    //DXGI_FORMAT_R10G10B10A2_UINT                        = 25,
+    //DXGI_FORMAT_R11G11B10_FLOAT                         = 26,
     
-    //DX_R32G32_TYPELESS                         = 15,
-    DX_R32G32_FLOAT                            = 16,
-    DX_R32G32_UINT                             = 17,
-    DX_R32G32_SINT                             = 18,
-    //DX_R32G8X24_TYPELESS                       = 19,
-    //DX_D32_FLOAT_S8X24_UINT                    = 20,
-    //DX_R32_FLOAT_X8X24_TYPELESS                = 21,
-    //DX_X32_TYPELESS_G8X24_UINT                 = 22,
+    //DXGI_FORMAT_R8G8B8A8_TYPELESS                       = 27,
+    DXGI_FORMAT_R8G8B8A8_UNORM                          = 28,
+    DXGI_FORMAT_R8G8B8A8_UNORM_SRGB                     = 29,
+    DXGI_FORMAT_R8G8B8A8_UINT                           = 30,
+    DXGI_FORMAT_R8G8B8A8_SNORM                          = 31,
+    DXGI_FORMAT_R8G8B8A8_SINT                           = 32,
     
-    //DX_R10G10B10A2_TYPELESS                    = 23,
-    //DX_R10G10B10A2_UNORM                       = 24,
-    //DX_R10G10B10A2_UINT                        = 25,
-    //DX_R11G11B10_FLOAT                         = 26,
+    //DXGI_FORMAT_R16G16_TYPELESS                         = 33,
+    DXGI_FORMAT_R16G16_FLOAT                            = 34,
+    DXGI_FORMAT_R16G16_UNORM                            = 35,
+    DXGI_FORMAT_R16G16_UINT                             = 36,
+    DXGI_FORMAT_R16G16_SNORM                            = 37,
+    DXGI_FORMAT_R16G16_SINT                             = 38,
     
-    //DX_R8G8B8A8_TYPELESS                       = 27,
-    DX_R8G8B8A8_UNORM                          = 28,
-    DX_R8G8B8A8_UNORM_SRGB                     = 29,
-    DX_R8G8B8A8_UINT                           = 30,
-    DX_R8G8B8A8_SNORM                          = 31,
-    DX_R8G8B8A8_SINT                           = 32,
+    //DXGI_FORMAT_R32_TYPELESS                            = 39,
+    //DXGI_FORMAT_D32_FLOAT                               = 40,
+    DXGI_FORMAT_R32_FLOAT                               = 41,
+    DXGI_FORMAT_R32_UINT                                = 42,
+    DXGI_FORMAT_R32_SINT                                = 43,
     
-    //DX_R16G16_TYPELESS                         = 33,
-    DX_R16G16_FLOAT                            = 34,
-    DX_R16G16_UNORM                            = 35,
-    DX_R16G16_UINT                             = 36,
-    DX_R16G16_SNORM                            = 37,
-    DX_R16G16_SINT                             = 38,
+    //DXGI_FORMAT_R24G8_TYPELESS                          = 44,
+    //DXGI_FORMAT_D24_UNORM_S8_UINT                       = 45,
+    //DXGI_FORMAT_R24_UNORM_X8_TYPELESS                   = 46,
+    //DXGI_FORMAT_X24_TYPELESS_G8_UINT                    = 47,
+    //DXGI_FORMAT_R8G8_TYPELESS                           = 48,
     
-    //DX_R32_TYPELESS                            = 39,
-    //DX_D32_FLOAT                               = 40,
-    DX_R32_FLOAT                               = 41,
-    DX_R32_UINT                                = 42,
-    DX_R32_SINT                                = 43,
+    DXGI_FORMAT_R8G8_UNORM                              = 49,
+    DXGI_FORMAT_R8G8_UINT                               = 50,
+    DXGI_FORMAT_R8G8_SNORM                              = 51,
+    DXGI_FORMAT_R8G8_SINT                               = 52,
     
-    //DX_R24G8_TYPELESS                          = 44,
-    //DX_D24_UNORM_S8_UINT                       = 45,
-    //DX_R24_UNORM_X8_TYPELESS                   = 46,
-    //DX_X24_TYPELESS_G8_UINT                    = 47,
-    //DX_R8G8_TYPELESS                           = 48,
+    DXGI_FORMAT_R16_FLOAT                               = 54,
+    //DXGI_FORMAT_D16_UNORM                               = 55,
+    DXGI_FORMAT_R16_UNORM                               = 56,
+    DXGI_FORMAT_R16_UINT                                = 57,
+    DXGI_FORMAT_R16_SNORM                               = 58,
+    DXGI_FORMAT_R16_SINT                                = 59,
     
-    DX_R8G8_UNORM                              = 49,
-    DX_R8G8_UINT                               = 50,
-    DX_R8G8_SNORM                              = 51,
-    DX_R8G8_SINT                               = 52,
+    DXGI_FORMAT_R8_UNORM                                = 61,
+    DXGI_FORMAT_R8_UINT                                 = 62,
+    DXGI_FORMAT_R8_SNORM                                = 63,
+    DXGI_FORMAT_R8_SINT                                 = 64,
     
-    DX_R16_FLOAT                               = 54,
-    //DX_D16_UNORM                               = 55,
-    DX_R16_UNORM                               = 56,
-    DX_R16_UINT                                = 57,
-    DX_R16_SNORM                               = 58,
-    DX_R16_SINT                                = 59,
+    //DXGI_FORMAT_A8_UNORM                                = 65,
+    //DXGI_FORMAT_R1_UNORM                                = 66,
+    //DXGI_FORMAT_R9G9B9E5_SHAREDEXP                      = 67,
     
-    DX_R8_UNORM                                = 61,
-    DX_R8_UINT                                 = 62,
-    DX_R8_SNORM                                = 63,
-    DX_R8_SINT                                 = 64,
+    //DXGI_FORMAT_R8G8_B8G8_UNORM                         = 68,
+    //DXGI_FORMAT_G8R8_G8B8_UNORM                         = 69,
     
-    //DX_A8_UNORM                                = 65,
-    //DX_R1_UNORM                                = 66,
-    //DX_R9G9B9E5_SHAREDEXP                      = 67,
+    DXGI_FORMAT_BC1_UNORM                               = 71,
+    DXGI_FORMAT_BC1_UNORM_SRGB                          = 72,
+    DXGI_FORMAT_BC3_UNORM                               = 77,
+    DXGI_FORMAT_BC3_UNORM_SRGB                          = 78,
+    DXGI_FORMAT_BC4_UNORM                               = 80,
+    DXGI_FORMAT_BC4_SNORM                               = 81,
+    DXGI_FORMAT_BC5_UNORM                               = 83,
+    DXGI_FORMAT_BC5_SNORM                               = 84,
+    DXGI_FORMAT_BC6H_UF16                               = 95,
+    DXGI_FORMAT_BC6H_SF16                               = 96,
+    DXGI_FORMAT_BC7_UNORM                               = 98,
+    DXGI_FORMAT_BC7_UNORM_SRGB                          = 99,
     
-    //DX_R8G8_B8G8_UNORM                         = 68,
-    //DX_G8R8_G8B8_UNORM                         = 69,
+    DXGI_FORMAT_B8G8R8A8_UNORM                          = 87,
+    DXGI_FORMAT_B8G8R8X8_UNORM                          = 88,
+    //DXGI_FORMAT_B8G8R8A8_TYPELESS                       = 90,
+    DXGI_FORMAT_B8G8R8A8_UNORM_SRGB                     = 91,
+    //DXGI_FORMAT_B8G8R8X8_TYPELESS                       = 92,
+    DXGI_FORMAT_B8G8R8X8_UNORM_SRGB                     = 93,
     
-    DX_BC1_UNORM                               = 71,
-    DX_BC1_UNORM_SRGB                          = 72,
-    DX_BC3_UNORM                               = 77,
-    DX_BC3_UNORM_SRGB                          = 78,
-    DX_BC4_UNORM                               = 80,
-    DX_BC4_SNORM                               = 81,
-    DX_BC5_UNORM                               = 83,
-    DX_BC5_SNORM                               = 84,
-    DX_BC6H_UF16                               = 95,
-    DX_BC6H_SF16                               = 96,
-    DX_BC7_UNORM                               = 98,
-    DX_BC7_UNORM_SRGB                          = 99,
+    // Astc constants are taken from here.
+    // HDR constant weren't too hard to guess from gap, but are a guess.
+    // Not officially in DX now that Windows Mobile was killed off.
+    // https://gli.g-truc.net/0.6.1/api/a00001.html
+    DXGI_FORMAT_ASTC_4X4_UNORM = 134,
+    DXGI_FORMAT_ASTC_4X4_UNORM_SRGB = 135,
+    DXGI_FORMAT_ASTC_4X4_HDR = 136,
+    // DXGI_FORMAT_ASTC_5X4_TYPELESS = 137,
+    DXGI_FORMAT_ASTC_5X4_UNORM = 138,
+    DXGI_FORMAT_ASTC_5X4_UNORM_SRGB = 139,
+    DXGI_FORMAT_ASTC_5x4_HDR = 140,
+    // DXGI_FORMAT_ASTC_5X5_TYPELESS = 141,
+    DXGI_FORMAT_ASTC_5X5_UNORM = 142,
+    DXGI_FORMAT_ASTC_5X5_UNORM_SRGB = 143,
+    DXGI_FORMAT_ASTC_5X5_HDR = 144,
+    // DXGI_FORMAT_ASTC_6X5_TYPELESS = 145,
+    DXGI_FORMAT_ASTC_6X5_UNORM = 146,
+    DXGI_FORMAT_ASTC_6X5_UNORM_SRGB = 147,
+    DXGI_FORMAT_ASTC_6x5_HDR = 148,
+    // DXGI_FORMAT_ASTC_6X6_TYPELESS = 149,
+    DXGI_FORMAT_ASTC_6X6_UNORM = 150,
+    DXGI_FORMAT_ASTC_6X6_UNORM_SRGB = 151,
+    DXGI_FORMAT_ASTC_6X6_HDR = 152,
+    // DXGI_FORMAT_ASTC_8X5_TYPELESS = 153,
+    DXGI_FORMAT_ASTC_8X5_UNORM = 154,
+    DXGI_FORMAT_ASTC_8X5_UNORM_SRGB = 155,
+    DXGI_FORMAT_ASTC_8X5_HDR = 156,
+    // DXGI_FORMAT_ASTC_8X6_TYPELESS = 157,
+    DXGI_FORMAT_ASTC_8X6_UNORM = 158,
+    DXGI_FORMAT_ASTC_8X6_UNORM_SRGB = 159,
+    DXGI_FORMAT_ASTC_8X6_HDR = 160,
+    // DXGI_FORMAT_ASTC_8X8_TYPELESS = 161,
+    DXGI_FORMAT_ASTC_8X8_UNORM = 162,
+    DXGI_FORMAT_ASTC_8X8_UNORM_SRGB = 163,
+    DXGI_FORMAT_ASTC_8X8_HDR = 164,
+    // DXGI_FORMAT_ASTC_10X5_TYPELESS = 165,
+    DXGI_FORMAT_ASTC_10X5_UNORM = 166,
+    DXGI_FORMAT_ASTC_10X5_UNORM_SRGB = 167,
+    DXGI_FORMAT_ASTC_10X5_HDR = 168,
+    // DXGI_FORMAT_ASTC_10X6_TYPELESS = 169,
+    DXGI_FORMAT_ASTC_10X6_UNORM = 170,
+    DXGI_FORMAT_ASTC_10X6_UNORM_SRGB = 171,
+    DXGI_FORMAT_ASTC_10X6_HDR = 172,
+    // DXGI_FORMAT_ASTC_10X8_TYPELESS = 173,
+    DXGI_FORMAT_ASTC_10X8_UNORM = 174,
+    DXGI_FORMAT_ASTC_10X8_UNORM_SRGB = 175,
+    DXGI_FORMAT_ASTC_10X8_HDR = 176,
+    // DXGI_FORMAT_ASTC_10X10_TYPELESS = 177,
+    DXGI_FORMAT_ASTC_10X10_UNORM = 178,
+    DXGI_FORMAT_ASTC_10X10_UNORM_SRGB = 179,
+    DXGI_FORMAT_ASTC_10X10_HDR = 180,
+    // DXGI_FORMAT_ASTC_12X10_TYPELESS = 181,
+    DXGI_FORMAT_ASTC_12X10_UNORM = 182,
+    DXGI_FORMAT_ASTC_12X10_UNORM_SRGB = 183,
+    DXGI_FORMAT_ASTC_12X10_HDR = 181,
+    // DXGI_FORMAT_ASTC_12X12_TYPELESS = 185,
+    DXGI_FORMAT_ASTC_12X12_UNORM = 186,
+    DXGI_FORMAT_ASTC_12X12_UNORM_SRGB = 187,
+    DXGI_FORMAT_ASTC_12X12_HDR = 188,
     
-    DX_B8G8R8A8_UNORM                          = 87,
-    DX_B8G8R8X8_UNORM                          = 88,
-    //DX_B8G8R8A8_TYPELESS                       = 90,
-    DX_B8G8R8A8_UNORM_SRGB                     = 91,
-    //DX_B8G8R8X8_TYPELESS                       = 92,
-    DX_B8G8R8X8_UNORM_SRGB                     = 93,
+    // These are fabricated by kram.  See here for RFI on formats
+    // and extensibility on DDS format.  Use at own risk.
+    // Set to DXGI_FORMAT_UNKNOWN if don't like this hack.
+    // https://github.com/microsoft/DirectXTex/issues/264
+    DXGI_FORMAT_EAC_R11_UNORM = 153 + DXGI_FORMAT_ETC2_OFFSET,
+    DXGI_FORMAT_EAC_R11_SNORM = 154 + DXGI_FORMAT_ETC2_OFFSET,
+    DXGI_FORMAT_EAC_R11G11_UNORM = 155 + DXGI_FORMAT_ETC2_OFFSET,
+    DXGI_FORMAT_EAC_R11G11_SNORM = 156 + DXGI_FORMAT_ETC2_OFFSET,
+    DXGI_FORMAT_ETC2_R8G8B8_UNORM = 147 + DXGI_FORMAT_ETC2_OFFSET,
+    DXGI_FORMAT_ETC2_R8G8B8_UNORM_SRGB = 148 + DXGI_FORMAT_ETC2_OFFSET,
+    // DXGI_FORMAT_ETC2_R8G8B8A1_UNORM = 149 + DXGI_FORMAT_ETC2_OFFSET,
+    // DXGI_FORMAT_ETC2_R8G8B8A1_UNORM_SRGB = 150 + DXGI_FORMAT_ETC2_OFFSET,
+    DXGI_FORMAT_ETC2_R8G8B8A8_UNORM = 151 + DXGI_FORMAT_ETC2_OFFSET,
+    DXGI_FORMAT_ETC2_R8G8B8A8_UNORM_SRGB = 152 + DXGI_FORMAT_ETC2_OFFSET,
 };
 
 //---------------------------------------------------
@@ -560,78 +639,78 @@ static bool initFormatsIfNeeded()
         metalType, vulkanType, directxType, glType, glBase,          \
         x, y, blockSize, numChannels, (flags));
 
-    KTX_FORMAT(Invalid, MyMTLPixelFormatInvalid, VK_FORMAT_UNDEFINED, DX_UNKNOWN, GL_FORMAT_UNKNOWN, GL_RGBA, 1, 1, 0, 0, 0)
+    KTX_FORMAT(Invalid, MyMTLPixelFormatInvalid, VK_FORMAT_UNDEFINED, DXGI_FORMAT_UNKNOWN, GL_FORMAT_UNKNOWN, GL_RGBA, 1, 1, 0, 0, 0)
 
     // BC
-    KTX_FORMAT(BC1, MyMTLPixelFormatBC1_RGBA, VK_FORMAT_BC1_RGB_UNORM_BLOCK, DX_BC1_UNORM, GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, 4, 4, 8, 3, FLAG_ENC_BC)
-    KTX_FORMAT(BC1s, MyMTLPixelFormatBC1_RGBA_sRGB, VK_FORMAT_BC1_RGB_SRGB_BLOCK, DX_BC1_UNORM_SRGB,GL_COMPRESSED_SRGB_S3TC_DXT1_EXT, GL_SRGB, 4, 4, 8, 3, FLAG_ENC_BC | FLAG_SRGB)
+    KTX_FORMAT(BC1, MyMTLPixelFormatBC1_RGBA, VK_FORMAT_BC1_RGB_UNORM_BLOCK, DXGI_FORMAT_BC1_UNORM, GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, 4, 4, 8, 3, FLAG_ENC_BC)
+    KTX_FORMAT(BC1s, MyMTLPixelFormatBC1_RGBA_sRGB, VK_FORMAT_BC1_RGB_SRGB_BLOCK, DXGI_FORMAT_BC1_UNORM_SRGB,GL_COMPRESSED_SRGB_S3TC_DXT1_EXT, GL_SRGB, 4, 4, 8, 3, FLAG_ENC_BC | FLAG_SRGB)
 
-    KTX_FORMAT(BC3, MyMTLPixelFormatBC3_RGBA, VK_FORMAT_BC3_UNORM_BLOCK, DX_BC3_UNORM, GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, 4, 4, 16, 4, FLAG_ENC_BC)
-    KTX_FORMAT(BC3s, MyMTLPixelFormatBC3_RGBA_sRGB, VK_FORMAT_BC3_SRGB_BLOCK, DX_BC3_UNORM_SRGB, GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_SRGB_ALPHA, 4, 4, 16, 4, FLAG_ENC_BC | FLAG_SRGB)
+    KTX_FORMAT(BC3, MyMTLPixelFormatBC3_RGBA, VK_FORMAT_BC3_UNORM_BLOCK, DXGI_FORMAT_BC3_UNORM, GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, 4, 4, 16, 4, FLAG_ENC_BC)
+    KTX_FORMAT(BC3s, MyMTLPixelFormatBC3_RGBA_sRGB, VK_FORMAT_BC3_SRGB_BLOCK, DXGI_FORMAT_BC3_UNORM_SRGB, GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_SRGB_ALPHA, 4, 4, 16, 4, FLAG_ENC_BC | FLAG_SRGB)
 
-    KTX_FORMAT(BC4, MyMTLPixelFormatBC4_RUnorm, VK_FORMAT_BC4_UNORM_BLOCK, DX_BC4_UNORM, GL_COMPRESSED_RED_RGTC1, GL_RED, 4, 4, 8, 1, FLAG_ENC_BC)
-    KTX_FORMAT(BC4sn, MyMTLPixelFormatBC4_RSnorm, VK_FORMAT_BC4_SNORM_BLOCK, DX_BC4_SNORM, GL_COMPRESSED_SIGNED_RED_RGTC1, GL_RED, 4, 4, 8, 1, FLAG_ENC_BC | FLAG_SIGNED)
+    KTX_FORMAT(BC4, MyMTLPixelFormatBC4_RUnorm, VK_FORMAT_BC4_UNORM_BLOCK, DXGI_FORMAT_BC4_UNORM, GL_COMPRESSED_RED_RGTC1, GL_RED, 4, 4, 8, 1, FLAG_ENC_BC)
+    KTX_FORMAT(BC4sn, MyMTLPixelFormatBC4_RSnorm, VK_FORMAT_BC4_SNORM_BLOCK, DXGI_FORMAT_BC4_SNORM, GL_COMPRESSED_SIGNED_RED_RGTC1, GL_RED, 4, 4, 8, 1, FLAG_ENC_BC | FLAG_SIGNED)
 
-    KTX_FORMAT(BC5, MyMTLPixelFormatBC5_RGUnorm, VK_FORMAT_BC5_UNORM_BLOCK, DX_BC5_UNORM, GL_COMPRESSED_RG_RGTC2, GL_RG, 4, 4, 16, 2, FLAG_ENC_BC)
-    KTX_FORMAT(BC5sn, MyMTLPixelFormatBC5_RGSnorm, VK_FORMAT_BC5_SNORM_BLOCK, DX_BC5_SNORM, GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, 4, 4, 16, 2, FLAG_ENC_BC | FLAG_SIGNED)
+    KTX_FORMAT(BC5, MyMTLPixelFormatBC5_RGUnorm, VK_FORMAT_BC5_UNORM_BLOCK, DXGI_FORMAT_BC5_UNORM, GL_COMPRESSED_RG_RGTC2, GL_RG, 4, 4, 16, 2, FLAG_ENC_BC)
+    KTX_FORMAT(BC5sn, MyMTLPixelFormatBC5_RGSnorm, VK_FORMAT_BC5_SNORM_BLOCK, DXGI_FORMAT_BC5_SNORM, GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, 4, 4, 16, 2, FLAG_ENC_BC | FLAG_SIGNED)
 
-    KTX_FORMAT(BC6h, MyMTLPixelFormatBC6H_RGBFloat, VK_FORMAT_BC6H_SFLOAT_BLOCK, DX_BC6H_SF16, GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT_ARB, GL_RGB, 4, 4, 16, 4, FLAG_ENC_BC | FLAG_16F)
-    KTX_FORMAT(BC6uh, MyMTLPixelFormatBC6H_RGBUfloat, VK_FORMAT_BC6H_SFLOAT_BLOCK, DX_BC6H_UF16, GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT_ARB, GL_RGB, 4, 4, 16, 4, FLAG_ENC_BC | FLAG_16F)
+    KTX_FORMAT(BC6h, MyMTLPixelFormatBC6H_RGBFloat, VK_FORMAT_BC6H_SFLOAT_BLOCK, DXGI_FORMAT_BC6H_SF16, GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT_ARB, GL_RGB, 4, 4, 16, 4, FLAG_ENC_BC | FLAG_16F)
+    KTX_FORMAT(BC6uh, MyMTLPixelFormatBC6H_RGBUfloat, VK_FORMAT_BC6H_SFLOAT_BLOCK, DXGI_FORMAT_BC6H_UF16, GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT_ARB, GL_RGB, 4, 4, 16, 4, FLAG_ENC_BC | FLAG_16F)
 
-    KTX_FORMAT(BC7, MyMTLPixelFormatBC7_RGBAUnorm, VK_FORMAT_BC7_UNORM_BLOCK, DX_BC7_UNORM, GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_RGBA, 4, 4, 16, 4, FLAG_ENC_BC)
-    KTX_FORMAT(BC7s, MyMTLPixelFormatBC7_RGBAUnorm_sRGB, VK_FORMAT_BC7_SRGB_BLOCK, DX_BC7_UNORM_SRGB, GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM_ARB, GL_SRGB_ALPHA, 4, 4, 16, 4, FLAG_ENC_BC | FLAG_SRGB)
+    KTX_FORMAT(BC7, MyMTLPixelFormatBC7_RGBAUnorm, VK_FORMAT_BC7_UNORM_BLOCK, DXGI_FORMAT_BC7_UNORM, GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_RGBA, 4, 4, 16, 4, FLAG_ENC_BC)
+    KTX_FORMAT(BC7s, MyMTLPixelFormatBC7_RGBAUnorm_sRGB, VK_FORMAT_BC7_SRGB_BLOCK, DXGI_FORMAT_BC7_UNORM_SRGB, GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM_ARB, GL_SRGB_ALPHA, 4, 4, 16, 4, FLAG_ENC_BC | FLAG_SRGB)
 
     // ETC
-    KTX_FORMAT(ETCr, MyMTLPixelFormatEAC_R11Unorm, VK_FORMAT_EAC_R11_UNORM_BLOCK, DX_UNKNOWN, GL_COMPRESSED_R11_EAC, GL_RED, 4, 4, 8, 1, FLAG_ENC_ETC)
-    KTX_FORMAT(ETCrsn, MyMTLPixelFormatEAC_R11Snorm, VK_FORMAT_EAC_R11_SNORM_BLOCK, DX_UNKNOWN, GL_COMPRESSED_SIGNED_R11_EAC, GL_RED, 4, 4, 8, 1, FLAG_ENC_ETC | FLAG_SIGNED)
+    KTX_FORMAT(ETCr, MyMTLPixelFormatEAC_R11Unorm, VK_FORMAT_EAC_R11_UNORM_BLOCK, DXGI_FORMAT_EAC_R11_UNORM, GL_COMPRESSED_R11_EAC, GL_RED, 4, 4, 8, 1, FLAG_ENC_ETC)
+    KTX_FORMAT(ETCrsn, MyMTLPixelFormatEAC_R11Snorm, VK_FORMAT_EAC_R11_SNORM_BLOCK, DXGI_FORMAT_EAC_R11_SNORM, GL_COMPRESSED_SIGNED_R11_EAC, GL_RED, 4, 4, 8, 1, FLAG_ENC_ETC | FLAG_SIGNED)
 
-    KTX_FORMAT(ETCrg, MyMTLPixelFormatEAC_RG11Unorm, VK_FORMAT_EAC_R11G11_UNORM_BLOCK, DX_UNKNOWN, GL_COMPRESSED_RG11_EAC, GL_RG, 4, 4, 16, 2, FLAG_ENC_ETC)
-    KTX_FORMAT(ETCrgsn, MyMTLPixelFormatEAC_RG11Snorm, VK_FORMAT_EAC_R11G11_SNORM_BLOCK, DX_UNKNOWN, GL_COMPRESSED_SIGNED_RG11_EAC, GL_RG, 4, 4, 16, 2, FLAG_ENC_ETC | FLAG_SIGNED)
+    KTX_FORMAT(ETCrg, MyMTLPixelFormatEAC_RG11Unorm, VK_FORMAT_EAC_R11G11_UNORM_BLOCK, DXGI_FORMAT_EAC_R11G11_UNORM, GL_COMPRESSED_RG11_EAC, GL_RG, 4, 4, 16, 2, FLAG_ENC_ETC)
+    KTX_FORMAT(ETCrgsn, MyMTLPixelFormatEAC_RG11Snorm, VK_FORMAT_EAC_R11G11_SNORM_BLOCK, DXGI_FORMAT_EAC_R11G11_SNORM, GL_COMPRESSED_SIGNED_RG11_EAC, GL_RG, 4, 4, 16, 2, FLAG_ENC_ETC | FLAG_SIGNED)
 
-    KTX_FORMAT(ETCrgb, MyMTLPixelFormatETC2_RGB8, VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK, DX_UNKNOWN, GL_COMPRESSED_RGB8_ETC2, GL_RGB, 4, 4, 8, 3, FLAG_ENC_ETC)
-    KTX_FORMAT(ETCsrgb, MyMTLPixelFormatETC2_RGB8_sRGB, VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK, DX_UNKNOWN, GL_COMPRESSED_SRGB8_ETC2, GL_SRGB, 4, 4, 8, 3, FLAG_ENC_ETC | FLAG_SRGB)
+    KTX_FORMAT(ETCrgb, MyMTLPixelFormatETC2_RGB8, VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK, DXGI_FORMAT_ETC2_R8G8B8_UNORM, GL_COMPRESSED_RGB8_ETC2, GL_RGB, 4, 4, 8, 3, FLAG_ENC_ETC)
+    KTX_FORMAT(ETCsrgb, MyMTLPixelFormatETC2_RGB8_sRGB, VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK, DXGI_FORMAT_ETC2_R8G8B8_UNORM_SRGB, GL_COMPRESSED_SRGB8_ETC2, GL_SRGB, 4, 4, 8, 3, FLAG_ENC_ETC | FLAG_SRGB)
 
-    KTX_FORMAT(ETCrgba, MyMTLPixelFormatEAC_RGBA8, VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK, DX_UNKNOWN, GL_COMPRESSED_RGBA8_ETC2_EAC, GL_RGBA, 4, 4, 16, 4, FLAG_ENC_ETC)
-    KTX_FORMAT(ETCsrgba, MyMTLPixelFormatEAC_RGBA8_sRGB, VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK, DX_UNKNOWN, GL_COMPRESSED_SRGBA8_ETC2_EAC, GL_SRGB_ALPHA, 4, 4, 16, 4, FLAG_ENC_ETC | FLAG_SRGB)
+    KTX_FORMAT(ETCrgba, MyMTLPixelFormatEAC_RGBA8, VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK, DXGI_FORMAT_ETC2_R8G8B8A8_UNORM, GL_COMPRESSED_RGBA8_ETC2_EAC, GL_RGBA, 4, 4, 16, 4, FLAG_ENC_ETC)
+    KTX_FORMAT(ETCsrgba, MyMTLPixelFormatEAC_RGBA8_sRGB, VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK, DXGI_FORMAT_ETC2_R8G8B8A8_UNORM_SRGB, GL_COMPRESSED_SRGBA8_ETC2_EAC, GL_SRGB_ALPHA, 4, 4, 16, 4, FLAG_ENC_ETC | FLAG_SRGB)
 
     // ASTC
-    KTX_FORMAT(ASTC4x4, MyMTLPixelFormatASTC_4x4_LDR, VK_FORMAT_ASTC_4x4_UNORM_BLOCK, DX_UNKNOWN, GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_RGBA, 4, 4, 16, 4, FLAG_ENC_ASTC)
-    KTX_FORMAT(ASTC4x4s, MyMTLPixelFormatASTC_4x4_sRGB, VK_FORMAT_ASTC_4x4_SRGB_BLOCK, DX_UNKNOWN, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR, GL_SRGB_ALPHA, 4, 4, 16, 4, FLAG_ENC_ASTC | FLAG_SRGB)
-    KTX_FORMAT(ASTC4x4h, MyMTLPixelFormatASTC_4x4_HDR, VK_FORMAT_ASTC_4x4_SFLOAT_BLOCK_EXT, DX_UNKNOWN, GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_RGBA, 4, 4, 16, 4, FLAG_ENC_ASTC | FLAG_16F)  // gl type same as LDR
+    KTX_FORMAT(ASTC4x4, MyMTLPixelFormatASTC_4x4_LDR, VK_FORMAT_ASTC_4x4_UNORM_BLOCK, DXGI_FORMAT_ASTC_4X4_UNORM, GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_RGBA, 4, 4, 16, 4, FLAG_ENC_ASTC)
+    KTX_FORMAT(ASTC4x4s, MyMTLPixelFormatASTC_4x4_sRGB, VK_FORMAT_ASTC_4x4_SRGB_BLOCK, DXGI_FORMAT_ASTC_4X4_UNORM_SRGB, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR, GL_SRGB_ALPHA, 4, 4, 16, 4, FLAG_ENC_ASTC | FLAG_SRGB)
+    KTX_FORMAT(ASTC4x4h, MyMTLPixelFormatASTC_4x4_HDR, VK_FORMAT_ASTC_4x4_SFLOAT_BLOCK_EXT, DXGI_FORMAT_UNKNOWN, GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_RGBA, 4, 4, 16, 4, FLAG_ENC_ASTC | FLAG_16F)  // gl type same as LDR
 
-    KTX_FORMAT(ASTC5x5, MyMTLPixelFormatASTC_5x5_LDR, VK_FORMAT_ASTC_5x5_UNORM_BLOCK, DX_UNKNOWN, GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_RGBA, 5, 5, 16, 4, FLAG_ENC_ASTC)
-    KTX_FORMAT(ASTC5x5s, MyMTLPixelFormatASTC_5x5_sRGB, VK_FORMAT_ASTC_5x5_SRGB_BLOCK, DX_UNKNOWN, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR, GL_SRGB_ALPHA, 5, 5, 16, 4, FLAG_ENC_ASTC | FLAG_SRGB)
-    KTX_FORMAT(ASTC5x5h, MyMTLPixelFormatASTC_5x5_HDR, VK_FORMAT_ASTC_5x5_SFLOAT_BLOCK_EXT, DX_UNKNOWN, GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_RGBA, 5, 5, 16, 4, FLAG_ENC_ASTC | FLAG_16F)  // gl type same as LDR
+    KTX_FORMAT(ASTC5x5, MyMTLPixelFormatASTC_5x5_LDR, VK_FORMAT_ASTC_5x5_UNORM_BLOCK, DXGI_FORMAT_ASTC_5X5_UNORM, GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_RGBA, 5, 5, 16, 4, FLAG_ENC_ASTC)
+    KTX_FORMAT(ASTC5x5s, MyMTLPixelFormatASTC_5x5_sRGB, VK_FORMAT_ASTC_5x5_SRGB_BLOCK, DXGI_FORMAT_ASTC_5X5_UNORM_SRGB, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR, GL_SRGB_ALPHA, 5, 5, 16, 4, FLAG_ENC_ASTC | FLAG_SRGB)
+    KTX_FORMAT(ASTC5x5h, MyMTLPixelFormatASTC_5x5_HDR, VK_FORMAT_ASTC_5x5_SFLOAT_BLOCK_EXT, DXGI_FORMAT_ASTC_5X5_HDR, GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_RGBA, 5, 5, 16, 4, FLAG_ENC_ASTC | FLAG_16F)  // gl type same as LDR
 
-    KTX_FORMAT(ASTC6x6, MyMTLPixelFormatASTC_6x6_LDR, VK_FORMAT_ASTC_6x6_UNORM_BLOCK, DX_UNKNOWN, GL_COMPRESSED_RGBA_ASTC_6x6_KHR, GL_RGBA, 6, 6, 16, 4, FLAG_ENC_ASTC)
-    KTX_FORMAT(ASTC6x6s, MyMTLPixelFormatASTC_6x6_sRGB, VK_FORMAT_ASTC_6x6_SRGB_BLOCK, DX_UNKNOWN, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR, GL_SRGB_ALPHA, 6, 6, 16, 4, FLAG_ENC_ASTC | FLAG_SRGB)
-    KTX_FORMAT(ASTC6x6h, MyMTLPixelFormatASTC_6x6_HDR, VK_FORMAT_ASTC_6x6_SFLOAT_BLOCK_EXT, DX_UNKNOWN, GL_COMPRESSED_RGBA_ASTC_6x6_KHR, GL_RGBA, 6, 6, 16, 4, FLAG_ENC_ASTC | FLAG_16F)  // gl type same as LDR
+    KTX_FORMAT(ASTC6x6, MyMTLPixelFormatASTC_6x6_LDR, VK_FORMAT_ASTC_6x6_UNORM_BLOCK, DXGI_FORMAT_ASTC_6X6_UNORM, GL_COMPRESSED_RGBA_ASTC_6x6_KHR, GL_RGBA, 6, 6, 16, 4, FLAG_ENC_ASTC)
+    KTX_FORMAT(ASTC6x6s, MyMTLPixelFormatASTC_6x6_sRGB, VK_FORMAT_ASTC_6x6_SRGB_BLOCK, DXGI_FORMAT_ASTC_6X6_UNORM_SRGB, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR, GL_SRGB_ALPHA, 6, 6, 16, 4, FLAG_ENC_ASTC | FLAG_SRGB)
+    KTX_FORMAT(ASTC6x6h, MyMTLPixelFormatASTC_6x6_HDR, VK_FORMAT_ASTC_6x6_SFLOAT_BLOCK_EXT, DXGI_FORMAT_ASTC_6X6_HDR, GL_COMPRESSED_RGBA_ASTC_6x6_KHR, GL_RGBA, 6, 6, 16, 4, FLAG_ENC_ASTC | FLAG_16F)  // gl type same as LDR
 
-    KTX_FORMAT(ASTC8x8, MyMTLPixelFormatASTC_8x8_LDR, VK_FORMAT_ASTC_8x8_UNORM_BLOCK, DX_UNKNOWN, GL_COMPRESSED_RGBA_ASTC_8x8_KHR, GL_RGBA, 8, 8, 16, 4, FLAG_ENC_ASTC)
-    KTX_FORMAT(ASTC8x8s, MyMTLPixelFormatASTC_8x8_sRGB, VK_FORMAT_ASTC_8x8_SRGB_BLOCK, DX_UNKNOWN, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR, GL_SRGB_ALPHA, 8, 8, 16, 4, FLAG_ENC_ASTC | FLAG_SRGB)
-    KTX_FORMAT(ASTC8x8h, MyMTLPixelFormatASTC_8x8_HDR, VK_FORMAT_ASTC_8x8_SFLOAT_BLOCK_EXT, DX_UNKNOWN, GL_COMPRESSED_RGBA_ASTC_8x8_KHR, GL_RGBA, 8, 8, 16, 4, FLAG_ENC_ASTC | FLAG_16F)  // gl type same as LDR
+    KTX_FORMAT(ASTC8x8, MyMTLPixelFormatASTC_8x8_LDR, VK_FORMAT_ASTC_8x8_UNORM_BLOCK, DXGI_FORMAT_ASTC_8X8_UNORM, GL_COMPRESSED_RGBA_ASTC_8x8_KHR, GL_RGBA, 8, 8, 16, 4, FLAG_ENC_ASTC)
+    KTX_FORMAT(ASTC8x8s, MyMTLPixelFormatASTC_8x8_sRGB, VK_FORMAT_ASTC_8x8_SRGB_BLOCK, DXGI_FORMAT_ASTC_8X8_UNORM_SRGB, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR, GL_SRGB_ALPHA, 8, 8, 16, 4, FLAG_ENC_ASTC | FLAG_SRGB)
+    KTX_FORMAT(ASTC8x8h, MyMTLPixelFormatASTC_8x8_HDR, VK_FORMAT_ASTC_8x8_SFLOAT_BLOCK_EXT, DXGI_FORMAT_ASTC_8X8_HDR, GL_COMPRESSED_RGBA_ASTC_8x8_KHR, GL_RGBA, 8, 8, 16, 4, FLAG_ENC_ASTC | FLAG_16F)  // gl type same as LDR
 
     // Explicit
-    KTX_FORMAT(EXPr8, MyMTLPixelFormatR8Unorm, VK_FORMAT_R8_UNORM, DX_R8_UNORM, GL_R8, GL_RED, 1, 1, 1, 1, 0)
-    KTX_FORMAT(EXPrg8, MyMTLPixelFormatRG8Unorm,  VK_FORMAT_R8G8_UNORM, DX_R8G8_UNORM,GL_RG8, GL_RG, 1, 1, 2, 2, 0)
-    KTX_FORMAT(EXPrgba8, MyMTLPixelFormatRGBA8Unorm, VK_FORMAT_R8G8B8A8_UNORM, DX_R8G8B8A8_UNORM, GL_RGBA8, GL_RGBA, 1, 1, 4, 4, 0)
-    KTX_FORMAT(EXPsrgba8, MyMTLPixelFormatRGBA8Unorm_sRGB, VK_FORMAT_R8G8B8A8_SRGB, DX_R8G8B8A8_UNORM_SRGB, GL_SRGB8_ALPHA8, GL_SRGB_ALPHA, 1, 1, 4, 4, FLAG_SRGB)
+    KTX_FORMAT(EXPr8, MyMTLPixelFormatR8Unorm, VK_FORMAT_R8_UNORM, DXGI_FORMAT_R8_UNORM, GL_R8, GL_RED, 1, 1, 1, 1, 0)
+    KTX_FORMAT(EXPrg8, MyMTLPixelFormatRG8Unorm,  VK_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8_UNORM,GL_RG8, GL_RG, 1, 1, 2, 2, 0)
+    KTX_FORMAT(EXPrgba8, MyMTLPixelFormatRGBA8Unorm, VK_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_R8G8B8A8_UNORM, GL_RGBA8, GL_RGBA, 1, 1, 4, 4, 0)
+    KTX_FORMAT(EXPsrgba8, MyMTLPixelFormatRGBA8Unorm_sRGB, VK_FORMAT_R8G8B8A8_SRGB, DXGI_FORMAT_R8G8B8A8_UNORM_SRGB, GL_SRGB8_ALPHA8, GL_SRGB_ALPHA, 1, 1, 4, 4, FLAG_SRGB)
 
-    KTX_FORMAT(EXPr16f, MyMTLPixelFormatR16Float, VK_FORMAT_R16_SFLOAT, DX_R16_FLOAT, GL_R16F, GL_RED, 1, 1, 2, 1, FLAG_16F)
-    KTX_FORMAT(EXPrg16f, MyMTLPixelFormatRG16Float, VK_FORMAT_R16G16_SFLOAT, DX_R16G16_FLOAT, GL_RG16F, GL_RG, 1, 1, 4, 2, FLAG_16F)
-    KTX_FORMAT(EXPrgba16f, MyMTLPixelFormatRGBA16Float, VK_FORMAT_R16G16B16A16_SFLOAT, DX_R16G16B16A16_FLOAT, GL_RGBA16F, GL_RGBA, 1, 1, 8, 4, FLAG_16F)
+    KTX_FORMAT(EXPr16f, MyMTLPixelFormatR16Float, VK_FORMAT_R16_SFLOAT, DXGI_FORMAT_R16_FLOAT, GL_R16F, GL_RED, 1, 1, 2, 1, FLAG_16F)
+    KTX_FORMAT(EXPrg16f, MyMTLPixelFormatRG16Float, VK_FORMAT_R16G16_SFLOAT, DXGI_FORMAT_R16G16_FLOAT, GL_RG16F, GL_RG, 1, 1, 4, 2, FLAG_16F)
+    KTX_FORMAT(EXPrgba16f, MyMTLPixelFormatRGBA16Float, VK_FORMAT_R16G16B16A16_SFLOAT, DXGI_FORMAT_R16G16B16A16_FLOAT, GL_RGBA16F, GL_RGBA, 1, 1, 8, 4, FLAG_16F)
 
-    KTX_FORMAT(EXPr32f, MyMTLPixelFormatR32Float, VK_FORMAT_R32_SFLOAT, DX_R32_FLOAT, GL_R32F, GL_RED, 1, 1, 4, 1, FLAG_32F)
-    KTX_FORMAT(EXPrg32f, MyMTLPixelFormatRG32Float, VK_FORMAT_R32G32_SFLOAT, DX_R32G32_FLOAT, GL_RG32F, GL_RG, 1, 1, 8, 2, FLAG_32F)
-    KTX_FORMAT(EXPrgba32f, MyMTLPixelFormatRGBA32Float, VK_FORMAT_R32G32B32A32_SFLOAT, DX_R32G32B32A32_FLOAT, GL_RGBA32F, GL_RGBA, 1, 1, 16, 4, FLAG_32F)
+    KTX_FORMAT(EXPr32f, MyMTLPixelFormatR32Float, VK_FORMAT_R32_SFLOAT, DXGI_FORMAT_R32_FLOAT, GL_R32F, GL_RED, 1, 1, 4, 1, FLAG_32F)
+    KTX_FORMAT(EXPrg32f, MyMTLPixelFormatRG32Float, VK_FORMAT_R32G32_SFLOAT, DXGI_FORMAT_R32G32_FLOAT, GL_RG32F, GL_RG, 1, 1, 8, 2, FLAG_32F)
+    KTX_FORMAT(EXPrgba32f, MyMTLPixelFormatRGBA32Float, VK_FORMAT_R32G32B32A32_SFLOAT, DXGI_FORMAT_R32G32B32A32_FLOAT, GL_RGBA32F, GL_RGBA, 1, 1, 16, 4, FLAG_32F)
 
 #if SUPPORT_RGB
     // these are import only formats
     // DX only has one of these as a valid type
-    KTX_FORMAT(EXPrgb8, MyMTLPixelFormatRGB8Unorm_internal, VK_FORMAT_R8G8B8_UNORM, DX_UNKNOWN, GL_RGB8, GL_RGB, 1, 1, 3, 3, 0)
-    KTX_FORMAT(EXPsrgb8, MyMTLPixelFormatRGB8Unorm_sRGB_internal, VK_FORMAT_R8G8B8_SRGB, DX_UNKNOWN, GL_SRGB8, GL_SRGB, 1, 1, 3, 3, FLAG_SRGB)
-    KTX_FORMAT(EXPrgb16f, MyMTLPixelFormatRGB16Float_internal, VK_FORMAT_R16G16B16_SFLOAT, DX_UNKNOWN, GL_RGB16F, GL_RGB, 1, 1, 6, 3, FLAG_16F)
-    KTX_FORMAT(EXPrgb32f, MyMTLPixelFormatRGB32Float_internal, VK_FORMAT_R32G32B32_SFLOAT, DX_R32G32B32_FLOAT, GL_RGB32F, GL_RGB, 1, 1, 12, 3, FLAG_32F)
+    KTX_FORMAT(EXPrgb8, MyMTLPixelFormatRGB8Unorm_internal, VK_FORMAT_R8G8B8_UNORM, DXGI_FORMAT_UNKNOWN, GL_RGB8, GL_RGB, 1, 1, 3, 3, 0)
+    KTX_FORMAT(EXPsrgb8, MyMTLPixelFormatRGB8Unorm_sRGB_internal, VK_FORMAT_R8G8B8_SRGB, DXGI_FORMAT_UNKNOWN, GL_SRGB8, GL_SRGB, 1, 1, 3, 3, FLAG_SRGB)
+    KTX_FORMAT(EXPrgb16f, MyMTLPixelFormatRGB16Float_internal, VK_FORMAT_R16G16B16_SFLOAT, DXGI_FORMAT_UNKNOWN, GL_RGB16F, GL_RGB, 1, 1, 6, 3, FLAG_16F)
+    KTX_FORMAT(EXPrgb32f, MyMTLPixelFormatRGB32Float_internal, VK_FORMAT_R32G32B32_SFLOAT, DXGI_FORMAT_R32G32B32_FLOAT, GL_RGB32F, GL_RGB, 1, 1, 12, 3, FLAG_32F)
 #endif
 
     gFormatTable = formatTable;

From 949f7aeed141e7850df5cc81509d94130cac245f Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 16 Feb 2022 23:52:35 -0800
Subject: [PATCH 227/901] kramv - support pan with mouse down/drag for view

Some don't use a trackpad, so allow mouse down/drag to pan.  This tracks the cursor properly.
---
 kramv/KramViewerMain.mm | 56 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 465e8d53..dea6f8bf 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -1092,6 +1092,57 @@ - (void)handleGesture:(NSGestureRecognizer *)gestureRecognizer
     }
 }
 
+struct MouseData
+{
+    NSPoint originPoint;
+    NSPoint oldPoint;
+    NSPoint newPoint;
+    
+    NSPoint pan;
+};
+static MouseData mouseData;
+
+- (void)mouseDown:(NSEvent *)event
+{
+    mouseData.originPoint =
+    mouseData.oldPoint =
+    mouseData.newPoint = [self convertPoint:[event locationInWindow] fromView:nil];
+
+    // capture pan value and cursor value
+    mouseData.pan = NSMakePoint(_showSettings->panX, _showSettings->panY);
+}
+
+// drag is mouse movement with button down
+- (void)mouseDragged:(NSEvent *)event
+{
+    mouseData.oldPoint = mouseData.newPoint;
+    mouseData.newPoint = [self convertPoint:[event locationInWindow] fromView:nil];
+
+    // TODO: need to account for zoom
+    NSPoint delta;
+    delta.x = mouseData.newPoint.x - mouseData.originPoint.x;
+    delta.y = mouseData.newPoint.y - mouseData.originPoint.y;
+    delta.x = -delta.x;
+    delta.y = -delta.y;
+    
+    // scale to actual px or mouse cursor doesn't track drag
+    delta.x *= _showSettings->viewContentScaleFactor;
+    delta.y *= _showSettings->viewContentScaleFactor;
+    
+    // This is correct, but scale to image so cursor tracks the pick location
+    // might be over a different mip/chunk though.
+    float panX = mouseData.pan.x + delta.x;
+    float panY = mouseData.pan.y + delta.y;
+    
+    [self updatePan:panX panY:panY];
+}
+
+- (void)mouseUp:(NSEvent *)event
+{
+    // ignore up even though cursor may have moved
+
+}
+
 - (void)mouseMoved:(NSEvent *)event
 {
     // pixel in non-square window coords, run thorugh inverse to get texel space
@@ -1541,7 +1592,12 @@ - (void)scrollWheel:(NSEvent *)event
 
     float panX = _showSettings->panX + wheelX;
     float panY = _showSettings->panY + wheelY;
+    
+    [self updatePan:panX panY:(float)panY];
+}
 
+- (void)updatePan:(float)panX panY:(float)panY
+{
     Renderer *renderer = (Renderer *)self.delegate;
     float4x4 projectionViewModelMatrix =
         [renderer computeImageTransform:panX

From 2c743ac0a3080e9789876fe6100ff92aa53aa6e8 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 18 Feb 2022 17:16:49 -0800
Subject: [PATCH 228/901] kram - fix dds save/load to write/read DDSPF_FOURCC

This format flag indicates the valid data is in the fourCC field, even though it just stores DX10.  Preview and kramv were ignoring this flag, but other tools require it.
---
 libkram/kram/KramDDSHelper.cpp | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/libkram/kram/KramDDSHelper.cpp b/libkram/kram/KramDDSHelper.cpp
index 5f4fdd5a..1c1edacd 100644
--- a/libkram/kram/KramDDSHelper.cpp
+++ b/libkram/kram/KramDDSHelper.cpp
@@ -97,14 +97,19 @@ struct DDS_HEADER_DXT10
 
 bool DDSHelper::load(const uint8_t* data, size_t dataSize, KTXImage& image, bool isInfoOnly)
 {
-    uint32_t magic = *(uint32_t*)data;
+    // don't deref these, haven't established size yet
+    const uint32_t* magic = (const uint32_t*)data;
     const uint32_t magicSize = sizeof(uint32_t);
     const DDS_HEADER& hdr = *(const DDS_HEADER*)(data + magicSize);
     const DDS_HEADER_DXT10& hdr10 = *(const DDS_HEADER_DXT10*)(data + magicSize + sizeof(DDS_HEADER));
     const DDS_PIXELFORMAT& format = hdr.ddspf;
     uint32_t mipDataOffset = magicSize + sizeof(DDS_HEADER) + sizeof(DDS_HEADER_DXT10);
     
-    if (magic != DDS_MAGIC) {
+    if (dataSize <= mipDataOffset) {
+        return false;
+    }
+    
+    if (*magic != DDS_MAGIC) {
         return false;
     }
 
@@ -113,6 +118,11 @@ bool DDSHelper::load(const uint8_t* data, size_t dataSize, KTXImage& image, bool
         return false;
     }
     
+    // this flag must be set even though just using fourcc to indicate DX10
+    if ((format.flags & DDSPF_FOURCC) == 0) {
+        return false;
+    }
+    
     // Kram only supports a subset of DDS formats
     auto pixelFormat = directxToMetalFormat(hdr10.dxgiFormat);
     if (pixelFormat == 0) {
@@ -211,7 +221,7 @@ bool DDSHelper::save(const KTXImage& image, FileHelper& fileHelper)
         return false;
     
     // Can only write out if matching format in DDS
-    if (directxType(image.pixelFormat) == 0)
+    if (directxType(image.pixelFormat) == MyMTLPixelFormatInvalid)
         return false;
     
     // https://docs.microsoft.com/en-us/windows/win32/direct3ddds/dds-header
@@ -235,8 +245,11 @@ bool DDSHelper::save(const KTXImage& image, FileHelper& fileHelper)
         hdr.caps |= DDSCAPS_MIPMAP;
         hdr.flags |= DDSD_MIPMAPCOUNT;
     }
+    
     // indicate this is newer dds file with pixelFormat
+    // important to set FOURCC flag
     format.fourCC = MAKEFOURCC('D', 'X', '1', '0');
+    format.flags |= DDSPF_FOURCC;
     
     hdr.flags |= DDSD_CAPS | DDSD_WIDTH | DDSD_HEIGHT | DDSD_PIXELFORMAT;
     

From 4c21292c591683f3c0b698854ef4ac44a8460e1b Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 20 Feb 2022 11:11:46 -0800
Subject: [PATCH 229/901] kram - better dds loading, validate more and error
 messages

---
 libkram/kram/KramDDSHelper.cpp | 152 +++++++++++++++++++++++++++------
 1 file changed, 125 insertions(+), 27 deletions(-)

diff --git a/libkram/kram/KramDDSHelper.cpp b/libkram/kram/KramDDSHelper.cpp
index 1c1edacd..e71a908b 100644
--- a/libkram/kram/KramDDSHelper.cpp
+++ b/libkram/kram/KramDDSHelper.cpp
@@ -54,6 +54,19 @@ enum DDS_FLAGS : uint32_t
     DDS_DIMENSION_TEXTURE1D = 2,
     DDS_DIMENSION_TEXTURE2D = 3,
     DDS_DIMENSION_TEXTURE3D = 4,
+    
+    FOURCC_DX10 = MAKEFOURCC('D', 'X', '1', '0'),
+    
+    // dx10 misc2 flags
+    DDS_ALPHA_MODE_UNKNOWN = 0,
+    DDS_ALPHA_MODE_STRAIGHT = 1,
+    DDS_ALPHA_MODE_PREMULTIPLIED = 2,
+    DDS_ALPHA_MODE_OPAQUE = 3,
+    DDS_ALPHA_MODE_CUSTOM = 4,
+    
+    // Not worth support dx9-style files, these don't even hold srgb state
+    //FOURCC_BC1 = MAKEFOURCC('D', 'X', 'T', '1'),
+    //FOURCC_BC3 = MAKEFOURCC('D', 'X', 'T', '5'),
 };
 
 struct DDS_PIXELFORMAT
@@ -97,58 +110,100 @@ struct DDS_HEADER_DXT10
 
 bool DDSHelper::load(const uint8_t* data, size_t dataSize, KTXImage& image, bool isInfoOnly)
 {
-    // don't deref these, haven't established size yet
-    const uint32_t* magic = (const uint32_t*)data;
     const uint32_t magicSize = sizeof(uint32_t);
-    const DDS_HEADER& hdr = *(const DDS_HEADER*)(data + magicSize);
-    const DDS_HEADER_DXT10& hdr10 = *(const DDS_HEADER_DXT10*)(data + magicSize + sizeof(DDS_HEADER));
-    const DDS_PIXELFORMAT& format = hdr.ddspf;
     uint32_t mipDataOffset = magicSize + sizeof(DDS_HEADER) + sizeof(DDS_HEADER_DXT10);
     
     if (dataSize <= mipDataOffset) {
+        KLOGE("kram", "bad dataSize too small %d <= %d", dataSize, mipDataOffset);
         return false;
     }
     
-    if (*magic != DDS_MAGIC) {
+    const uint32_t& magic = *(const uint32_t*)data;
+    const DDS_HEADER& hdr = *(const DDS_HEADER*)(data + magicSize);
+    const DDS_HEADER_DXT10& hdr10 = *(const DDS_HEADER_DXT10*)(data + magicSize + sizeof(DDS_HEADER));
+    const DDS_PIXELFORMAT& format = hdr.ddspf;
+    
+    if (magic != DDS_MAGIC) {
+        KLOGE("kram", "bad magic number 0x%08X", magic);
         return false;
     }
 
     // only load DX10 formatted DDS for now
-    if (hdr.size != sizeof(DDS_HEADER) || format.size != sizeof(DDS_PIXELFORMAT)) {
+    if (hdr.size != sizeof(DDS_HEADER)) {
+        KLOGE("kram", "bad header size %d", hdr.size);
+        return false;
+    }
+    if (format.size != sizeof(DDS_PIXELFORMAT)) {
+        KLOGE("kram", "bad format size %d", format.size);
         return false;
     }
     
     // this flag must be set even though just using fourcc to indicate DX10
     if ((format.flags & DDSPF_FOURCC) == 0) {
+        KLOGE("kram", "missing format.fourCC flag");
+        return false;
+    }
+    if (format.fourCC != FOURCC_DX10) {
+        KLOGE("kram", "format.fourCC 0x%08X must be DX10", format.fourCC);
         return false;
     }
     
     // Kram only supports a subset of DDS formats
     auto pixelFormat = directxToMetalFormat(hdr10.dxgiFormat);
-    if (pixelFormat == 0) {
+    if (pixelFormat == MyMTLPixelFormatInvalid) {
+        KLOGE("kram", "bad format.dxgiFormat %d", hdr10.dxgiFormat);
         return false;
     }
     
     // make sure to copy mips/slices from DDS array-ordered to mip-ordered for KTX
-    image.width = (hdr.flags & DDSD_WIDTH) ? hdr.width : 1;
-    image.height = (hdr.flags & DDSD_HEIGHT) ? hdr.height : 1;
-    image.depth = (hdr.flags & DDSD_DEPTH) ? hdr.depth : 0;
+    uint32_t width = (hdr.flags & DDSD_WIDTH) ? hdr.width : 1;
+    uint32_t height = (hdr.flags & DDSD_HEIGHT) ? hdr.height : 1;
+    uint32_t depth = (hdr.flags & DDSD_DEPTH) ? hdr.depth : 1;
     
     uint32_t mipCount = (hdr.flags & DDSD_MIPMAPCOUNT) ? hdr.mipMapCount : 1;
+    uint32_t arrayCount = hdr10.arraySize;
+    
+    // make sure that counts are reasonable
+    const uint32_t kMaxMipCount = 16;
+    const uint32_t kMaxTextureSize = 1u << (kMaxMipCount - 1); // 32K
+    const uint32_t kMaxArrayCount = 2*1024;
+   
+    if (width > kMaxTextureSize) {
+        KLOGE("kram", "bad dimension width %d", width);
+        return false;
+    }
+    if (height > kMaxTextureSize) {
+        KLOGE("kram", "bad dimension height %d", height);
+        return false;
+    }
+    if (depth > kMaxTextureSize) {
+        KLOGE("kram", "bad dimension depth %d", depth);
+        return false;
+    }
+    if (mipCount > kMaxMipCount) {
+        KLOGE("kram", "bad dimension mipCount %d", mipCount);
+        return false;
+    }
+    if (arrayCount > kMaxArrayCount) {
+        KLOGE("kram", "bad dimension height %d", arrayCount);
+        return false;
+    }
     
-    auto& ktxHdr = image.header;
-    ktxHdr.pixelWidth  = image.width;
-    ktxHdr.pixelHeight = image.height;
-    ktxHdr.pixelDepth  = image.depth;
+    // does mipCount = 0 mean automip?
+    if (width == 0)
+        width = 1;
+    if (height == 0)
+        height = 1;
+    if (depth == 0)
+        depth = 1;
     
-    ktxHdr.initFormatGL(pixelFormat);
+    if (mipCount == 0)
+        mipCount = 1;
+    if (arrayCount == 0)
+        arrayCount = 1;
     
     bool isCube = (hdr10.miscFlag & DDS_RESOURCE_MISC_TEXTURECUBE);
-    bool isArray = hdr10.arraySize > 1;
-    
-    ktxHdr.numberOfFaces = isCube ? 6 : 1;
-    ktxHdr.numberOfMipmapLevels = mipCount;
-    ktxHdr.numberOfArrayElements = hdr10.arraySize;
+    bool isArray = arrayCount > 1;
     
     switch(hdr10.resourceDimension) {
         case DDS_DIMENSION_TEXTURE1D:
@@ -167,17 +222,49 @@ bool DDSHelper::load(const uint8_t* data, size_t dataSize, KTXImage& image, bool
             image.textureType = MyMTLTextureType3D;
             break;
     }
-
-    // fixuup the values, so that can convert header properly to type in info
+    
+    // transfer premul setting, would like to not depend on "info" to carry this
+    bool isPremul = (hdr10.miscFlags2 & DDS_ALPHA_MODE_PREMULTIPLIED) != 0;
+    if (isPremul)
+        image.addChannelProps("Alb.ra,Alb.ga,Alb.ba,Alb.a");
+    
+    //-------------
+    
+    // TODO: may need to fix these to KTX conventions first
+    image.width = width;
+    image.height = height;
+    image.depth = depth;
+    
+    auto& ktxHdr = image.header;
+    ktxHdr.pixelWidth  = image.width;
+    ktxHdr.pixelHeight = image.height;
+    ktxHdr.pixelDepth  = image.depth;
+    
+    ktxHdr.initFormatGL(pixelFormat);
+    
+    ktxHdr.numberOfFaces = isCube ? 6 : 1;
+    ktxHdr.numberOfMipmapLevels = mipCount;
+    ktxHdr.numberOfArrayElements = arrayCount;
+    
+    // fix up the values, so that can convert header properly to type in info
+    // TODO: this means image and ktx header don't match
     if (!isArray)
         ktxHdr.numberOfArrayElements = 0;
     if (image.textureType != MyMTLTextureType3D)
         ktxHdr.pixelDepth = 0;
     if (image.textureType == MyMTLTextureType1DArray)
         ktxHdr.pixelHeight = 0;
-    
-    // make sure these line up
-    if (image.header.metalTextureType() != image.textureType) {
+    if (image.textureType == MyMTLTextureTypeCube || image.textureType == MyMTLTextureTypeCubeArray) {
+        // DX10+ require all faces to be defined, DX9 could have partial cubemaps
+        if ((hdr.caps2 & DDSCAPS2_CUBEMAP_ALLFACES) != DDSCAPS2_CUBEMAP_ALLFACES) {
+            KLOGE("kram", "unsupported all faces of cubemap must be specified");
+            return false;
+        }
+    }
+        
+    // make sure derived type lines up
+    if (ktxHdr.metalTextureType() != image.textureType) {
+        KLOGE("kram", "unsupported textureType");
         return false;
     }
     
@@ -248,7 +335,7 @@ bool DDSHelper::save(const KTXImage& image, FileHelper& fileHelper)
     
     // indicate this is newer dds file with pixelFormat
     // important to set FOURCC flag
-    format.fourCC = MAKEFOURCC('D', 'X', '1', '0');
+    format.fourCC = FOURCC_DX10;
     format.flags |= DDSPF_FOURCC;
     
     hdr.flags |= DDSD_CAPS | DDSD_WIDTH | DDSD_HEIGHT | DDSD_PIXELFORMAT;
@@ -320,6 +407,17 @@ bool DDSHelper::save(const KTXImage& image, FileHelper& fileHelper)
         }
     }
     
+    // set premul state
+    // The legacy D3DX 10 and D3DX 11 utility libraries will fail to load any .DDS file with miscFlags2 not equal to zero.
+    if (image.isPremul()) {
+        hdr10.miscFlags2 |= DDS_ALPHA_MODE_PREMULTIPLIED;
+    }
+    else {
+        hdr10.miscFlags2 |= DDS_ALPHA_MODE_STRAIGHT;
+    }
+    // TODO: also hdr10.miscFlags2 |= DDS_ALPHA_MODE_OPAQUE (alpha full opaque)
+    // TODO: also hdr10.miscFlags2 |= DDS_ALPHA_MODE_CUSTOM (raw data in alpha)
+        
     bool success = true;
     
     success = success && fileHelper.write((const uint8_t*)&DDS_MAGIC, sizeof(DDS_MAGIC));

From a28bdb40b6b865a0d6a6b81fd70b10cab3ecbb44 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 20 Feb 2022 12:01:42 -0800
Subject: [PATCH 230/901] kramv - add mousewheel zoom

---
 kramv/KramViewerMain.mm        | 66 +++++++++++++++++++++++++---------
 libkram/kram/KramDDSHelper.cpp |  2 +-
 2 files changed, 51 insertions(+), 17 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index dea6f8bf..e7a5684b 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -91,6 +91,12 @@ @interface MyMTKView : MTKView
 // can hide hud while list view is up
 @property(nonatomic, readwrite) bool hudHidden;
 
+// TODO: should be a part of document, but only one doc to a view
+@property(nonatomic, readwrite) float originalZoom;
+@property(nonatomic, readwrite) float validMagnification;
+
+
+
 - (BOOL)loadTextureFromURL:(NSURL *)url;
 
 - (void)setHudText:(const char *)text;
@@ -581,11 +587,15 @@ - (instancetype)initWithCoder:(NSCoder *)coder
     // added for drag-drop support
     [self registerForDraggedTypes:pasteboardTypes];
 
+    // This gesture only works for trackpad
     _zoomGesture = [[NSMagnificationGestureRecognizer alloc]
         initWithTarget:self
                 action:@selector(handleGesture:)];
     [self addGestureRecognizer:_zoomGesture];
 
+    _originalZoom = 1.0f;
+    _validMagnification = 1.0f;
+
     _buttonArray = [[NSMutableArray alloc] init];
     _buttonStack = [self _addButtons];
 
@@ -942,14 +952,14 @@ - (void)handleGesture:(NSGestureRecognizer *)gestureRecognizer
 
     bool isFirstGesture = _zoomGesture.state == NSGestureRecognizerStateBegan;
 
-    // TODO: move into object
-    static float _originalZoom = 1.0f;
-    static float _validMagnification = 1.0f;
-
     float zoom = _zoomGesture.magnification;
     if (isFirstGesture) {
         _zoomGesture.magnification = 1.0f;
-        zoom = _showSettings->zoom;
+        
+        _validMagnification = 1.0f;
+        _originalZoom = _showSettings->zoom;
+        
+        zoom = _originalZoom;
     }
     else if (zoom * _originalZoom < 0.1f) {
         // can go negative otherwise
@@ -957,21 +967,22 @@ - (void)handleGesture:(NSGestureRecognizer *)gestureRecognizer
         _zoomGesture.magnification = zoom;
     }
     
-    //-------------------------------------
-
-    // https://developer.apple.com/documentation/uikit/touches_presses_and_gestures/handling_uikit_gestures/handling_pinch_gestures?language=objc
-    // need to sync up the zoom when action begins or zoom will jump
-    if (isFirstGesture) {
-        _validMagnification = 1.0f;
-        _originalZoom = zoom;
-    }
-    else {
+    if (!isFirstGesture) {
         // try expontental (this causes a jump, comparison avoids an initial jump
         // zoom = powf(zoom, 1.05f);
 
         // doing multiply instead of equals here, also does exponential zom
         zoom *= _originalZoom;
     }
+    
+    [self updateZoom:zoom];
+}
+
+-(void)updateZoom:(float)zoom
+{
+    // https://developer.apple.com/documentation/uikit/touches_presses_and_gestures/handling_uikit_gestures/handling_pinch_gestures?language=objc
+    // need to sync up the zoom when action begins or zoom will jump
+    
 
     // https://stackoverflow.com/questions/30002361/image-zoom-centered-on-mouse-position
 
@@ -1102,6 +1113,7 @@ - (void)handleGesture:(NSGestureRecognizer *)gestureRecognizer
 };
 static MouseData mouseData;
 
+// left mouse button down
 - (void)mouseDown:(NSEvent *)event
 {
     mouseData.originPoint =
@@ -1112,7 +1124,7 @@ - (void)mouseDown:(NSEvent *)event
     mouseData.pan = NSMakePoint(_showSettings->panX, _showSettings->panY);
 }
 
-// drag is mouse movement with button down
+// drag is mouse movement with left button down
 - (void)mouseDragged:(NSEvent *)event
 {
     mouseData.oldPoint = mouseData.newPoint;
@@ -1585,8 +1597,30 @@ - (void)scrollWheel:(NSEvent *)event
     //    }
 
     //---------------------------------------
-
+    // zoom
+    
+    if (event.modifierFlags & NSEventModifierFlagCommand)
+    {
+        // needs to set _validMagnfication, but how do we tell initial wheel event?
+        float zoom = _zoomGesture.magnification;
+        if (wheelY != 0.0)
+        {
+            wheelY *= 0.01;
+            wheelY = clamp(wheelY, -0.1, 0.1);
+            
+            zoom *= 1.0 + wheelY;
+            
+            // here have to modify the magnfication, since gesture isn't driving it
+            _zoomGesture.magnification = zoom;
+            
+            [self updateZoom: zoom];
+        }
+        return;
+    }
+    
+    //---------------------------------------
     // pan
+    
     wheelY = -wheelY;
     wheelX = -wheelX;
 
diff --git a/libkram/kram/KramDDSHelper.cpp b/libkram/kram/KramDDSHelper.cpp
index e71a908b..ffd72826 100644
--- a/libkram/kram/KramDDSHelper.cpp
+++ b/libkram/kram/KramDDSHelper.cpp
@@ -114,7 +114,7 @@ bool DDSHelper::load(const uint8_t* data, size_t dataSize, KTXImage& image, bool
     uint32_t mipDataOffset = magicSize + sizeof(DDS_HEADER) + sizeof(DDS_HEADER_DXT10);
     
     if (dataSize <= mipDataOffset) {
-        KLOGE("kram", "bad dataSize too small %d <= %d", dataSize, mipDataOffset);
+        KLOGE("kram", "bad dataSize too small %zu <= %d", dataSize, mipDataOffset);
         return false;
     }
     

From 523af904db6fd7b76bdaf322395c47d83216716d Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 20 Feb 2022 12:17:56 -0800
Subject: [PATCH 231/901] kramv - auto-swizzle BC4/EAC_R11 to rrr1 in
 viewer/thumb/previews, don't require cmd on mouse

Like Preview, don't show BC4 content as blood red.  Can convert to rrr1 to show as grayscale.
---
 kram-preview/KramPreviewViewController.mm | 18 ++++++++++++++++++
 kram-thumb/KramThumbnailProvider.mm       | 16 ++++++++++++++++
 kramv/KramViewerMain.mm                   |  8 +++++++-
 kramv/Shaders/KramShaders.metal           |  4 ++++
 4 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/kram-preview/KramPreviewViewController.mm b/kram-preview/KramPreviewViewController.mm
index c8daf5af..dbc11fda 100644
--- a/kram-preview/KramPreviewViewController.mm
+++ b/kram-preview/KramPreviewViewController.mm
@@ -205,6 +205,22 @@ - (void)preparePreviewOfFileAtURL:(NSURL *)url completionHandler:(void (^)(NSErr
         KramDecoder decoder;
         KramDecoderParams params;
         
+        // TODO: should honor swizzle in the ktx image
+        // TODO: probaby need an snorm rgba format to convert the snorm versions, so they're not all red
+        // if sdf, will be signed format and that will stay red
+       
+        switch(image.pixelFormat)
+        {
+            // To avoid showing single channel content in red, replicate to rgb
+            case MyMTLPixelFormatBC4_RUnorm:
+            case MyMTLPixelFormatEAC_R11Unorm:
+                params.swizzleText = "rrr1";
+                break;
+                
+            default:
+                break;
+        }
+        
         vector<uint8_t> dstMipData;
         
         // only space for one chunk for now
@@ -228,6 +244,8 @@ - (void)preparePreviewOfFileAtURL:(NSURL *)url completionHandler:(void (^)(NSErr
             return;
         }
         
+        // TODO: could swizzle height (single channel) textures to rrr1
+        
         // copy from Color back to uint8_t
         uint32_t mipSize = h * w * sizeof(Color);
         mipData.resize(mipSize);
diff --git a/kram-thumb/KramThumbnailProvider.mm b/kram-thumb/KramThumbnailProvider.mm
index ba0787dc..d749035b 100644
--- a/kram-thumb/KramThumbnailProvider.mm
+++ b/kram-thumb/KramThumbnailProvider.mm
@@ -183,6 +183,22 @@ - (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest *)request complet
             KramDecoderParams params;
             params.decoder = decoderType;
             
+            // TODO: should honor swizzle in the ktx image
+            // TODO: probaby need an snorm rgba format to convert the snorm versions, so they're not all red
+            // if sdf, will be signed format and that will stay red
+            
+            switch(image.pixelFormat)
+            {
+                // To avoid showing single channel content in red, replicate to rgb
+                case MyMTLPixelFormatBC4_RUnorm:
+                case MyMTLPixelFormatEAC_R11Unorm:
+                    params.swizzleText = "rrr1";
+                    break;
+                    
+                default:
+                    break;
+            }
+            
             vector<uint8_t> dstMipData;
             
             // only space for one chunk for now
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index e7a5684b..7b37568a 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -1599,7 +1599,13 @@ - (void)scrollWheel:(NSEvent *)event
     //---------------------------------------
     // zoom
     
-    if (event.modifierFlags & NSEventModifierFlagCommand)
+    // Ugh, how we we tell mouseWheel from trackpad gesture calling this?
+    // if([event phase]) - supposedly only set on trackpad, but Apple MagicMouse does this on wheel
+    //   and trackpad fires on that too causing the image to zoom away to nothing (inertia maybe)
+    // https://stackoverflow.com/questions/6642058/mac-cocoa-how-can-i-detect-trackpad-scroll-gestures
+    bool isMouse = ![event hasPreciseScrollingDeltas];
+    
+    if (isMouse || event.modifierFlags & NSEventModifierFlagCommand)
     {
         // needs to set _validMagnfication, but how do we tell initial wheel event?
         float zoom = _zoomGesture.magnification;
diff --git a/kramv/Shaders/KramShaders.metal b/kramv/Shaders/KramShaders.metal
index 2848571a..ffa7a7fe 100644
--- a/kramv/Shaders/KramShaders.metal
+++ b/kramv/Shaders/KramShaders.metal
@@ -862,6 +862,10 @@ float4 DrawPixels(
     float2 textureSize
 )
 {
+    // auto-swizzle BC4 and EAC_R11 to rrr1
+    if (uniforms.numChannels == 1 && !uniforms.isSigned)
+        c = float4(c.rrr, 1.0);
+    
     float4 sc = c;
     
     bool isPreview = uniforms.isPreview;

From 099b48a016d8e4a77267df9db9e9ac7a8a15860d Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 20 Feb 2022 16:41:16 -0800
Subject: [PATCH 232/901] kramv - skip all mouse handling until image is loaded

---
 kramv/KramViewerMain.mm        | 70 ++++++++++++++++++++++------------
 libkram/kram/KramDDSHelper.cpp |  1 +
 2 files changed, 47 insertions(+), 24 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 7b37568a..e2ebe962 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -41,6 +41,14 @@
 using namespace kram;
 using namespace NAMESPACE_STL;
 
+struct MouseData
+{
+    NSPoint originPoint;
+    NSPoint oldPoint;
+    NSPoint newPoint;
+    
+    NSPoint pan;
+};
 
 // this aliases the existing string, so can't chop extension
 inline const char* toFilenameShort(const char* filename) {
@@ -94,7 +102,7 @@ @interface MyMTKView : MTKView
 // TODO: should be a part of document, but only one doc to a view
 @property(nonatomic, readwrite) float originalZoom;
 @property(nonatomic, readwrite) float validMagnification;
-
+@property(nonatomic, readwrite) MouseData mouseData;
 
 
 - (BOOL)loadTextureFromURL:(NSURL *)url;
@@ -945,6 +953,11 @@ - (void)doZoomMath:(float)newZoom newPan:(float2 &)newPan
 
 - (void)handleGesture:(NSGestureRecognizer *)gestureRecognizer
 {
+    // skip until image loaded
+    if (_showSettings->imageBoundsX == 0) {
+        return;
+    }
+    
     // https://cocoaosxrevisited.wordpress.com/2018/01/06/chapter-18-mouse-events/
     if (gestureRecognizer != _zoomGesture) {
         return;
@@ -1103,37 +1116,38 @@ -(void)updateZoom:(float)zoom
     }
 }
 
-struct MouseData
-{
-    NSPoint originPoint;
-    NSPoint oldPoint;
-    NSPoint newPoint;
-    
-    NSPoint pan;
-};
-static MouseData mouseData;
 
 // left mouse button down
 - (void)mouseDown:(NSEvent *)event
 {
-    mouseData.originPoint =
-    mouseData.oldPoint =
-    mouseData.newPoint = [self convertPoint:[event locationInWindow] fromView:nil];
+    // skip until image loaded
+    if (_showSettings->imageBoundsX == 0) {
+        return;
+    }
+    
+    _mouseData.originPoint =
+    _mouseData.oldPoint =
+    _mouseData.newPoint = [self convertPoint:[event locationInWindow] fromView:nil];
 
     // capture pan value and cursor value
-    mouseData.pan = NSMakePoint(_showSettings->panX, _showSettings->panY);
+    _mouseData.pan = NSMakePoint(_showSettings->panX, _showSettings->panY);
 }
 
 // drag is mouse movement with left button down
 - (void)mouseDragged:(NSEvent *)event
 {
-    mouseData.oldPoint = mouseData.newPoint;
-    mouseData.newPoint = [self convertPoint:[event locationInWindow] fromView:nil];
+    // skip until image loaded
+    if (_showSettings->imageBoundsX == 0) {
+        return;
+    }
+    
+    _mouseData.oldPoint = _mouseData.newPoint;
+    _mouseData.newPoint = [self convertPoint:[event locationInWindow] fromView:nil];
 
     // TODO: need to account for zoom
     NSPoint delta;
-    delta.x = mouseData.newPoint.x - mouseData.originPoint.x;
-    delta.y = mouseData.newPoint.y - mouseData.originPoint.y;
+    delta.x = _mouseData.newPoint.x - _mouseData.originPoint.x;
+    delta.y = _mouseData.newPoint.y - _mouseData.originPoint.y;
     delta.x = -delta.x;
     delta.y = -delta.y;
     
@@ -1143,8 +1157,8 @@ - (void)mouseDragged:(NSEvent *)event
     
     // This is correct, but scale to image so cursor tracks the pick location
     // might be over a different mip/chunk though.
-    float panX = mouseData.pan.x + delta.x;
-    float panY = mouseData.pan.y + delta.y;
+    float panX = _mouseData.pan.x + delta.x;
+    float panY = _mouseData.pan.y + delta.y;
     
     [self updatePan:panX panY:panY];
 }
@@ -1157,6 +1171,11 @@ - (void)mouseUp:(NSEvent *)event
 
 - (void)mouseMoved:(NSEvent *)event
 {
+    // skip until image loaded
+    if (_showSettings->imageBoundsX == 0) {
+        return;
+    }
+    
     // pixel in non-square window coords, run thorugh inverse to get texel space
     // I think magnofication of zoom gesture is affecting coordinates reported by
     // this
@@ -1583,6 +1602,11 @@ - (void)updateHudText
 
 - (void)scrollWheel:(NSEvent *)event
 {
+    // skip until image loaded
+    if (_showSettings->imageBoundsX == 0) {
+        return;
+    }
+    
     double wheelX = [event scrollingDeltaX];
     double wheelY = [event scrollingDeltaY];
 
@@ -1605,12 +1629,10 @@ - (void)scrollWheel:(NSEvent *)event
     // https://stackoverflow.com/questions/6642058/mac-cocoa-how-can-i-detect-trackpad-scroll-gestures
     bool isMouse = ![event hasPreciseScrollingDeltas];
     
-    if (isMouse || event.modifierFlags & NSEventModifierFlagCommand)
-    {
+    if (isMouse || event.modifierFlags & NSEventModifierFlagCommand) {
         // needs to set _validMagnfication, but how do we tell initial wheel event?
         float zoom = _zoomGesture.magnification;
-        if (wheelY != 0.0)
-        {
+        if (wheelY != 0.0) {
             wheelY *= 0.01;
             wheelY = clamp(wheelY, -0.1, 0.1);
             
diff --git a/libkram/kram/KramDDSHelper.cpp b/libkram/kram/KramDDSHelper.cpp
index ffd72826..de7e65d0 100644
--- a/libkram/kram/KramDDSHelper.cpp
+++ b/libkram/kram/KramDDSHelper.cpp
@@ -220,6 +220,7 @@ bool DDSHelper::load(const uint8_t* data, size_t dataSize, KTXImage& image, bool
             break;
         case DDS_DIMENSION_TEXTURE3D:
             image.textureType = MyMTLTextureType3D;
+            isArray = false;
             break;
     }
     

From 2bb2ff3cfbdbd70a50aa5bcb802d940b3bfcc469 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 21 Feb 2022 12:05:35 -0800
Subject: [PATCH 233/901] kramv - simplify handling actions, remap some keys,
 update help, fix mouse vs. trackpad inertia to not zoom

Remove command key hold for zoom, inertial trackpad moves would then lead to zoom once modifier was pressed.

Revamp the action system, so that code is no longer so dependent on keymappings.  Hopefully user-defined keys can fire off the actions now.
  Also remapped P to preview, O to premul, G to grid, 1234 are for rgba.   These are a little more intuitive to the menu text.  Didn't have a good key for premul.
---
 kramv/KramViewerMain.mm | 1653 ++++++++++++++++++---------------------
 1 file changed, 741 insertions(+), 912 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index e2ebe962..eb8b9940 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -64,6 +64,104 @@
 
 //-------------
 
+enum Key {
+    A = 0x00,
+    S = 0x01,
+    D = 0x02,
+    F = 0x03,
+    H = 0x04,
+    G = 0x05,
+    Z = 0x06,
+    X = 0x07,
+    C = 0x08,
+    V = 0x09,
+    B = 0x0B,
+    Q = 0x0C,
+    W = 0x0D,
+    E = 0x0E,
+    R = 0x0F,
+    Y = 0x10,
+    T = 0x11,
+    O = 0x1F,
+    U = 0x20,
+    I = 0x22,
+    P = 0x23,
+    L = 0x25,
+    J = 0x26,
+    K = 0x28,
+    N = 0x2D,
+    M = 0x2E,
+
+    // https://eastmanreference.com/complete-list-of-applescript-key-codes
+    Num1 = 0x12,
+    Num2 = 0x13,
+    Num3 = 0x14,
+    Num4 = 0x15,
+    Num5 = 0x17,
+    Num6 = 0x16,
+    Num7 = 0x1A,
+    Num8 = 0x1C,
+    Num9 = 0x19,
+    Num0 = 0x1D,
+
+    LeftBrace = 0x21,
+    RightBrace = 0x1E,
+
+    LeftBracket = 0x21,
+    RightBracket = 0x1E,
+
+    Quote = 0x27,
+    Semicolon = 0x29,
+    Backslash = 0x2A,
+    Comma = 0x2B,
+    Slash = 0x2C,
+
+    LeftArrow = 0x7B,
+    RightArrow = 0x7C,
+    DownArrow = 0x7D,
+    UpArrow = 0x7E,
+    
+    Space = 0x31,
+    Escape = 0x35,
+};
+
+
+// This makes dealing with ui much simpler
+class Action {
+public:
+    Action(const char* icon_, const char* tip_, Key keyCode_): icon(icon_), tip(tip_), keyCode(keyCode_) {}
+    
+    const char* icon;
+    const char* tip;
+
+    // Note these are not ref-counted, but AppKit already does
+    id button; // NSButton*
+    id menuItem; // NSMenuItem*
+    Key keyCode;
+    
+    bool isHighlighted = false;
+    bool isHidden = false;
+    
+    void setHighlight(bool enable) {
+        isHighlighted = enable;
+        
+        auto On = NSControlStateValueOn;
+        auto Off = NSControlStateValueOff;
+        
+        ((NSButton*)button).state = enable ? On : Off;
+        ((NSMenuItem*)menuItem).state = enable ? On : Off;
+    }
+    
+    void setHidden(bool enable) {
+        isHidden = enable;
+        
+        ((NSButton*)button).hidden = enable;
+        ((NSMenuItem*)menuItem).hidden = enable;
+    }
+};
+
+//-------------
+
 // This is so annoying, but otherwise the hud always intercepts clicks intended
 // for the underlying TableView.
 // https://stackoverflow.com/questions/15891098/nstextfield-click-through
@@ -344,176 +442,6 @@ - (IBAction)showAboutDialog:(id)sender
 
 @end
 
-//-------------
-
-enum Key {
-    A = 0x00,
-    S = 0x01,
-    D = 0x02,
-    F = 0x03,
-    H = 0x04,
-    G = 0x05,
-    Z = 0x06,
-    X = 0x07,
-    C = 0x08,
-    V = 0x09,
-    B = 0x0B,
-    Q = 0x0C,
-    W = 0x0D,
-    E = 0x0E,
-    R = 0x0F,
-    Y = 0x10,
-    T = 0x11,
-    O = 0x1F,
-    U = 0x20,
-    I = 0x22,
-    P = 0x23,
-    L = 0x25,
-    J = 0x26,
-    K = 0x28,
-    N = 0x2D,
-    M = 0x2E,
-
-    // https://eastmanreference.com/complete-list-of-applescript-key-codes
-    Num1 = 0x12,
-    Num2 = 0x13,
-    Num3 = 0x14,
-    Num4 = 0x15,
-    Num5 = 0x17,
-    Num6 = 0x16,
-    Num7 = 0x1A,
-    Num8 = 0x1C,
-    Num9 = 0x19,
-    Num0 = 0x1D,
-
-    LeftBrace = 0x21,
-    RightBrace = 0x1E,
-
-    LeftBracket = 0x21,
-    RightBracket = 0x1E,
-
-    Quote = 0x27,
-    Semicolon = 0x29,
-    Backslash = 0x2A,
-    Comma = 0x2B,
-    Slash = 0x2C,
-
-    LeftArrow = 0x7B,
-    RightArrow = 0x7C,
-    DownArrow = 0x7D,
-    UpArrow = 0x7E,
-    
-    Space = 0x31,
-    Escape = 0x35,
-};
-
-/*
-
-// This is meant to advance a given image through a variety of encoder formats.
-// Then can compare the encoding results and pick the better one.
-// This could help artist see the effect on all mips of an encoder choice, and
-dial up/down the setting.
-// Would this cycle through astc and bc together, or separately.
-MyMTLPixelFormat encodeSrcTextureAsFormat(MyMTLPixelFormat currentFormat, bool
-increment) {
-    // if dev drops a png, then have a mode to see different encoding styles
-    // on normals, it would just be BC5, ETCrg, ASTCla and blocks
-    #define findIndex(array, x) \
-        for (int32_t i = 0, count = sizeof(array); i < count; ++i) { \
-            if (array[i] == x) { \
-                int32_t index = i; \
-                if (increment) \
-                    index = (index + 1) % count; \
-                else \
-                    index = (index + count - 1) % count; \
-                newFormat = array[index]; \
-                break; \
-            } \
-        }
-
-    MyMTLPixelFormat newFormat = currentFormat;
-
-    // these are formats to cycle through
-    MyMTLPixelFormat bc[]     = { MyMTLPixelFormatBC7_RGBAUnorm,
-MyMTLPixelFormatBC3_RGBA, MyMTLPixelFormatBC1_RGBA }; MyMTLPixelFormat bcsrgb[]
-= { MyMTLPixelFormatBC7_RGBAUnorm_sRGB, MyMTLPixelFormatBC3_RGBA_sRGB,
-MyMTLPixelFormatBC1_RGBA_sRGB };
-
-    // TODO: support non-square block with astcenc
-    MyMTLPixelFormat astc[]     = { MyMTLPixelFormatASTC_4x4_LDR,
-MyMTLPixelFormatASTC_5x5_LDR, MyMTLPixelFormatASTC_6x6_LDR,
-MyMTLPixelFormatASTC_8x8_LDR }; MyMTLPixelFormat astcsrgb[] = {
-MyMTLPixelFormatASTC_4x4_sRGB, MyMTLPixelFormatASTC_5x5_sRGB,
-MyMTLPixelFormatASTC_6x6_sRGB, MyMTLPixelFormatASTC_8x8_sRGB }; MyMTLPixelFormat
-astchdr[]  = { MyMTLPixelFormatASTC_4x4_HDR, MyMTLPixelFormatASTC_5x5_HDR,
-MyMTLPixelFormatASTC_6x6_HDR, MyMTLPixelFormatASTC_8x8_HDR };
-
-    if (isASTCFormat(currentFormat)) {
-        if (isHDRFormat(currentFormat)) {
-            // skip it, need hdr decode for Intel
-            // findIndex(astchdr, currentFormat);
-        }
-        else if (isSrgbFormat(currentFormat)) {
-            findIndex(astcsrgb, currentFormat);
-        }
-        else {
-            findIndex(astc, currentFormat);
-        }
-    }
-    else if (isBCFormat(currentFormat)) {
-        if (isHDRFormat(currentFormat)) {
-            // skip it for now, bc6h
-        }
-        else if (isSrgbFormat(currentFormat)) {
-            findIndex(bcsrgb, currentFormat);
-        }
-        else {
-            findIndex(bc, currentFormat);
-        }
-    }
-
-    #undef findIndex
-
-    return newFormat;
-}
-
-void encodeSrcForEncodeComparisons(bool increment) {
-    auto newFormat = encodeSrcTextureAsFormat(displayedFormat, increment);
-
-    // This is really only useful for variable block size formats like ASTC
-    // maybe some value in BC7 to BC1 comparison (original vs. BC7 vs. BC1)
-
-     // TODO: have to encode and then decode astc/etc on macOS-Intel
-     // load png and keep it around, and then call encode and then diff the
-image against the original pixels
-     // 565 will always differ from the original.
-
-     // Once encode generated, then cache result, only ever display two textures
-     // in a comparison mode. Also have eyedropper sample from second texture
-     // could display src vs. encode, or cached textures against each other
-     // have PSNR too ?
-
-    // see if the format is in cache
-
-    // encode incremented format and cache, that way don't wait too long
-    // and once all encode formats generated, can cycle through them until next
-image loaded
-
-    // Could reuse the same buffer for all ASTC formats, larger blocks always
-need less mem
-    //KramImage image; // TODO: move encode to KTXImage, convert png to one
-layer KTXImage
-    //image.open(...);
-    //image.encode(dstImage);
-    //decodeIfNeeded(dstImage, dstImageDecoded);
-    //comparisonTexture = [createImage:image];
-    //set that onto the shader to diff against after recontruct
-
-    // this format after decode may not be the same
-    displayedFormat = newFormat;
-}
-*/
-
 // also NSPasteboardTypeURL
 // also NSPasteboardTypeTIFF
 NSArray<NSString *> *pasteboardTypes = @[ NSPasteboardTypeFileURL ];
@@ -545,6 +473,42 @@ @implementation MyMTKView {
 
     vector<string> _folderFiles;
     int32_t _fileFolderIndex;
+    
+    Action* _actionPlay;
+    Action* _actionHelp;
+    Action* _actionInfo;
+    Action* _actionHud;
+    Action* _actionShowAll;
+    
+    Action* _actionPreview;
+    Action* _actionWrap;
+    Action* _actionPremul;
+    Action* _actionSigned;
+    
+    Action* _actionDebug;
+    Action* _actionGrid;
+    Action* _actionChecker;
+    Action* _actionHideUI;
+    Action* _actionVertical;
+    
+    Action* _actionMip;
+    Action* _actionFace;
+    Action* _actionArray;
+    Action* _actionItem;
+    Action* _actionReload;
+    Action* _actionFit;
+    
+    Action* _actionShapeMesh;
+    Action* _actionShapeChannel;
+    Action* _actionLighting;
+    Action* _actionTangent;
+    
+    Action* _actionR;
+    Action* _actionG;
+    Action* _actionB;
+    Action* _actionA;
+    
+    vector<Action> _actions;
 }
 
 - (void)awakeFromNib
@@ -648,94 +612,103 @@ -(void)fixupDocumentList
 
 - (NSStackView *)_addButtons
 {
-    const int32_t numButtons = 31;
-    const char *names[numButtons * 2] = {
-        " ",
-        "Play",
-        "?",
-        "Help",
-        "I",
-        "Info",
-        "H",
-        "Hud",
-        "S",
-        "Show All",
-
-        "O",
-        "Preview",
-        "W",
-        "Repeat",
-        "P",
-        "Premul",
-        "N",
-        "Signed",
-
-        "-",
-        "",
-
-        "E",
-        "Debug",
-        "D",
-        "Grid",
-        "C",
-        "Checker",
-        "U",
-        "Toggle UI",
-
-        "-",
-        "",
-
-        "M",
-        "Mip",
-        "F",
-        "Face",
-        "Y",
-        "Array",
-        "J",
-        "Next",
-        "L",
-        "Reload",
-        "0",
-        "Fit",
-
-        "-",
-        "",
-
-        "8",
-        "Shape",
-        "6",
-        "Shape Channel",
-        "5",
-        "Lighting",
-        "T",
-        "Tangents",
+    const int32_t numActions = 32;
+    Action actions[numActions] = {
+        Action(" ", "Play", Key::Space),
+        Action("?", "Help", Key::Slash),
+        Action("I", "Info", Key::I),
+        
+        Action("H", "Hud", Key::H),
+        Action("S", "Show All", Key::S),
+    
+        Action("P", "Preview", Key::P),
+        Action("W", "Repeat", Key::W), // really address mode
+        Action("O", "Premul", Key::O), // TODO: better letter for this
+        Action("N", "Signed", Key::N),
+        
+        Action("-", "", Key::A), // sep
+
+        Action("D", "Debug", Key::D),
+        Action("G", "Grid", Key::G),
+        
+        Action("C", "Checker", Key::C),
+        Action("U", "Toggle UI", Key::U),
+        Action("V", "Toggle Vertical", Key::V),
+
+        Action("-", "", Key::A), // sep
+
+        Action("M", "Mip", Key::M),
+        Action("F", "Face", Key::F),
+        Action("Y", "Array", Key::Y),
+        Action("J", "Next", Key::J),
+        Action("L", "Reload", Key::L),
+        Action("0", "Fit", Key::Num0),
+
+        Action("-", "", Key::A), // sep
+
+        Action("8", "Shape", Key::Num8),
+        Action("6", "Shape Channel", Key::Num6),
+        Action("5", "Lighting", Key::Num5),
+        Action("T", "Tangents", Key::T),
 
         // TODO: need to shift hud over a little
         // "UI", - add to show/hide buttons
 
-        "-",
-        "",
+        Action("-", "", Key::A), // sep
 
         // make these individual toggles and exclusive toggle off shift
-        "R",
-        "Red",
-        "G",
-        "Green",
-        "B",
-        "Blue",
-        "A",
-        "Alpha",
+        Action("R", "Red", Key::Num1),
+        Action("G", "Green", Key::Num2),
+        Action("B", "Blue", Key::Num3),
+        Action("A", "Alpha", Key::Num4),
     };
-
+    
+    Action** actionPtrs[] = {
+        &_actionPlay,
+        &_actionHelp,
+        &_actionInfo,
+        &_actionHud,
+        &_actionShowAll,
+        
+        &_actionPreview,
+        &_actionWrap,
+        &_actionPremul,
+        &_actionSigned,
+        
+        &_actionDebug,
+        &_actionGrid,
+        &_actionChecker,
+        &_actionHideUI,
+        &_actionVertical,
+        
+        &_actionMip,
+        &_actionFace,
+        &_actionArray,
+        &_actionItem,
+        &_actionReload,
+        &_actionFit,
+        
+        &_actionShapeMesh,
+        &_actionShapeChannel,
+        &_actionLighting,
+        &_actionTangent,
+        
+        &_actionR,
+        &_actionG,
+        &_actionB,
+        &_actionA,
+    };
+    
     NSRect rect = NSMakeRect(0, 10, 30, 30);
 
-    //#define ArrayCount(x) ((x) / sizeof(x[0]))
+    #define ArrayCount(x) ((x) / sizeof(x[0]))
 
     NSMutableArray *buttons = [[NSMutableArray alloc] init];
 
-    for (int32_t i = 0; i < numButtons; ++i) {
-        const char *icon = names[2 * i + 0];
-        const char *tip = names[2 * i + 1];
+    for (int32_t i = 0; i < numActions; ++i) {
+        Action& action = actions[i];
+        const char *icon = action.icon;
+        const char *tip = action.tip;
 
         NSString *name = [NSString stringWithUTF8String:icon];
         NSString *toolTip = [NSString stringWithUTF8String:tip];
@@ -750,26 +723,21 @@ - (NSStackView *)_addButtons
 
         button.buttonType = NSButtonTypeToggle;
         // NSButtonTypeOnOff
-
-#if 0
-        // can use this with border
-        // TODO: for some reason this breaks clicking on buttons
-        // TODO: eliminate the rounded border
-        button.showsBorderOnlyWhileMouseInside = YES;
-        button.bordered = YES;
-#else
         button.bordered = NO;
-#endif
         [button setFrame:rect];
 
         // stackView seems to disperse the items evenly across the area, so this
         // doesn't work
-        if (icon[0] == '-') {
+        bool isSeparator = icon[0] == '-';
+        
+        if (isSeparator) {
             // rect.origin.y += 11;
             button.enabled = NO;
         }
         else {
-            // sKrect.origin.y += 25;
+            action.button = button;
+            
+            // rect.origin.y += 25;
 
             // keep all buttons, since stackView will remove and pack the stack
             [_buttonArray addObject:button];
@@ -796,15 +764,17 @@ - (NSStackView *)_addButtons
     // NSMenu* menu = app.windowsMenu;
     //[menu addItem:[NSMenuItem separatorItem]];
 
-    for (int32_t i = 0; i < numButtons; ++i) {
-        const char *icon = names[2 * i + 0];  // single char
-        const char *title = names[2 * i + 1];
+    for (int32_t i = 0; i < numActions; ++i) {
+        Action& action = actions[i];
+        const char *icon = action.icon;  // single char
+        const char *title = action.tip;
 
         NSString *toolTip = [NSString stringWithUTF8String:icon];
         NSString *name = [NSString stringWithUTF8String:title];
         NSString *shortcut = @"";  // for now, or AppKit turns key int cmd+shift+key
-
-        if (icon[0] == '-') {
+        bool isSeparator = icon[0] == '-';
+        
+        if (isSeparator) {
             [_viewMenu addItem:[NSMenuItem separatorItem]];
         }
         else {
@@ -818,11 +788,33 @@ - (NSStackView *)_addButtons
             // menuItem.state = Mixed/Off/On;
 
             [_viewMenu addItem:menuItem];
+            
+            action.menuItem = menuItem;
         }
     }
 
     [_viewMenu addItem:[NSMenuItem separatorItem]];
 
+    //----------------------
+    
+    // copy all of them to a vector, and then assign the action ptrs
+    for (int32_t i = 0; i < numActions; ++i) {
+        Action& action = actions[i];
+        const char *icon = action.icon;  // single char
+        
+        // skip separators
+        bool isSeparator = icon[0] == '-';
+        if (isSeparator) continue;
+        
+        _actions.push_back(action);
+    }
+    
+    // now alias Actions to the vector above
+    //assert(_actions.size() == ArrayCount(actionPtrs));
+    for (int32_t i = 0; i < _actions.size(); ++i) {
+        *(actionPtrs[i]) = &_actions[i];
+    }
+    
     return stackView;
 }
 
@@ -865,15 +857,6 @@ - (NSTextField *)_addHud:(BOOL)isShadow
 
     [self addSubview:label];
 
-    // add vertical constrains to have it fill window, but keep 800 width
-    // this didn't seem to work, can do in Storyboard
-    // NSDictionary* views = @{ @"label" : label };
-    //[self addConstraints:[NSLayoutConstraint
-    //constraintsWithVisualFormat:@"H:|-[label]" options:0 metrics:nil
-    //views:views]]; [self addConstraints:[NSLayoutConstraint
-    //constraintsWithVisualFormat:@"V:|-[label]" options:0 metrics:nil
-    //views:views]];
-
     return label;
 }
 
@@ -1610,27 +1593,14 @@ - (void)scrollWheel:(NSEvent *)event
     double wheelX = [event scrollingDeltaX];
     double wheelY = [event scrollingDeltaY];
 
-    //    if ([event hasPreciseScrollingDeltas])
-    //    {
-    //        //wheelX *= 0.01;
-    //        //wheelY *= 0.01;
-    //    }
-    //    else {
-    //        //wheelX *= 0.1;
-    //        //wheelY *= 0.1;
-    //    }
-
-    //---------------------------------------
-    // zoom
-    
     // Ugh, how we we tell mouseWheel from trackpad gesture calling this?
     // if([event phase]) - supposedly only set on trackpad, but Apple MagicMouse does this on wheel
     //   and trackpad fires on that too causing the image to zoom away to nothing (inertia maybe)
     // https://stackoverflow.com/questions/6642058/mac-cocoa-how-can-i-detect-trackpad-scroll-gestures
     bool isMouse = ![event hasPreciseScrollingDeltas];
     
-    if (isMouse || event.modifierFlags & NSEventModifierFlagCommand) {
-        // needs to set _validMagnfication, but how do we tell initial wheel event?
+    if (isMouse) {
+        // zoom with mouse
         float zoom = _zoomGesture.magnification;
         if (wheelY != 0.0) {
             wheelY *= 0.01;
@@ -1643,19 +1613,17 @@ - (void)scrollWheel:(NSEvent *)event
             
             [self updateZoom: zoom];
         }
-        return;
     }
-    
-    //---------------------------------------
-    // pan
-    
-    wheelY = -wheelY;
-    wheelX = -wheelX;
+    else {
+        // pan with trackpad
+        wheelY = -wheelY;
+        wheelX = -wheelX;
 
-    float panX = _showSettings->panX + wheelX;
-    float panY = _showSettings->panY + wheelY;
-    
-    [self updatePan:panX panY:(float)panY];
+        float panX = _showSettings->panX + wheelX;
+        float panY = _showSettings->panY + wheelY;
+        
+        [self updatePan:panX panY:(float)panY];
+    }
 }
 
 - (void)updatePan:(float)panX panY:(float)panY
@@ -1718,27 +1686,6 @@ - (void)updatePan:(float)panX panY:(float)panY
     }
 }
 
-- (NSButton *)findButton:(const char *)name
-{
-    NSString *title = [NSString stringWithUTF8String:name];
-    for (NSButton *button in _buttonArray) {
-        if (button.title == title)
-            return button;
-    }
-    return nil;
-}
-
-- (NSMenuItem *)findMenuItem:(const char *)name
-{
-    NSString *title = [NSString stringWithUTF8String:name];
-
-    for (NSMenuItem *menuItem in _viewMenu.itemArray) {
-        if (menuItem.toolTip == title)
-            return menuItem;
-    }
-    return nil;
-}
-
 // use this to enable/disable menus, buttons, etc.  Called on every event
 // when not implemented, then user items are always enabled
 - (BOOL)validateUserInterfaceItem:(id<NSValidatedUserInterfaceItem>)item
@@ -1791,51 +1738,32 @@ - (void)updateUIAfterLoad
     bool isSignedHidden = !isSignedFormat(_showSettings->originalFormat);
     bool isPlayHidden = !_showSettings->isModel;
     
-    // buttons
-    [self findButton:" "].hidden = isPlayHidden;
-    [self findButton:"Y"].hidden = isArrayHidden;
-    [self findButton:"F"].hidden = isFaceSliceHidden;
-    [self findButton:"M"].hidden = isMipHidden;
-    [self findButton:"S"].hidden = isShowAllHidden;
-    [self findButton:"J"].hidden = isJumpToNextHidden;
-
-    [self findButton:"R"].hidden = isRedHidden;
-    [self findButton:"G"].hidden = isGreenHidden;
-    [self findButton:"B"].hidden = isBlueHidden;
-    [self findButton:"A"].hidden = isAlphaHidden;
-
-    [self findButton:"P"].hidden = isPremulHidden;
-    [self findButton:"N"].hidden = isSignedHidden;
-    [self findButton:"C"].hidden = isCheckerboardHidden;
-
-    // menus (may want to disable, not hide)
-    // problem is crashes since menu seems to strip hidden items
-    // enabled state has to be handled in validateUserInterfaceItem
-    [self findMenuItem:" "].hidden = isPlayHidden;
-    [self findMenuItem:"Y"].hidden = isArrayHidden;
-    [self findMenuItem:"F"].hidden = isFaceSliceHidden;
-    [self findMenuItem:"M"].hidden = isMipHidden;
-    [self findMenuItem:"S"].hidden = isShowAllHidden;
-    [self findMenuItem:"J"].hidden = isJumpToNextHidden;
-
-    [self findMenuItem:"R"].hidden = isRedHidden;
-    [self findMenuItem:"G"].hidden = isGreenHidden;
-    [self findMenuItem:"B"].hidden = isBlueHidden;
-    [self findMenuItem:"A"].hidden = isAlphaHidden;
-
-    [self findMenuItem:"P"].hidden = isPremulHidden;
-    [self findMenuItem:"N"].hidden = isSignedHidden;
-    [self findMenuItem:"C"].hidden = isCheckerboardHidden;
-
+    _actionPlay->setHidden(isPlayHidden);
+    _actionArray->setHidden(isArrayHidden);
+    _actionFace->setHidden(isFaceSliceHidden);
+    _actionMip->setHidden(isMipHidden);
+    _actionShowAll->setHidden(isShowAllHidden);
+    _actionItem->setHidden(isJumpToNextHidden);
+    
+    _actionR->setHidden(isRedHidden);
+    _actionG->setHidden(isGreenHidden);
+    _actionB->setHidden(isBlueHidden);
+    _actionA->setHidden(isAlphaHidden);
+    
+    _actionPremul->setHidden(isPremulHidden);
+    _actionSigned->setHidden(isSignedHidden);
+    _actionChecker->setHidden(isCheckerboardHidden);
+    
     // also need to call after each toggle
     [self updateUIControlState];
 }
 
 - (void)updateUIControlState
 {
-    // there is also mixed
-    auto On = NSControlStateValueOn;
-    auto Off = NSControlStateValueOff;
+    // there is also mixed state, but not using that
+    auto On = true;
+    auto Off = false;
+    
 #define toState(x) (x) ? On : Off
 
     Renderer* renderer = (Renderer*)self.delegate;
@@ -1866,79 +1794,43 @@ - (void)updateUIControlState
         toState(_showSettings->lightingMode != LightingModeDiffuse);
     auto tangentState = toState(_showSettings->useTangent);
 
-    // TODO: UI state, and vertical state
-    auto uiState = toState(_buttonStack.hidden);
+    // TODO: vertical state
+   auto uiState = toState(_buttonStack.hidden);
 
     auto helpState = Off;
     auto infoState = Off;
     auto jumpState = Off;
 
-    // buttons
-    [self findButton:" "].state = playState;
-    [self findButton:"?"].state = helpState;
-    [self findButton:"I"].state = infoState;
-
-    [self findButton:"Y"].state = arrayState;
-    [self findButton:"F"].state = faceState;
-    [self findButton:"M"].state = mipState;
-
-    [self findButton:"J"].state = jumpState;
-    [self findButton:"U"].state = Off;  // always off
-
-    [self findButton:"R"].state = redState;
-    [self findButton:"G"].state = greenState;
-    [self findButton:"B"].state = blueState;
-    [self findButton:"A"].state = alphaState;
-
-    [self findButton:"S"].state = showAllState;
-    [self findButton:"O"].state = previewState;
-    [self findButton:"8"].state = meshState;
-    [self findButton:"6"].state = meshChannelState;
-    [self findButton:"5"].state = lightingState;
-    [self findButton:"W"].state = wrapState;
-    [self findButton:"D"].state = gridState;
-    [self findButton:"E"].state = debugState;
-    [self findButton:"T"].state = tangentState;
-
-    [self findButton:"P"].state = premulState;
-    [self findButton:"N"].state = signedState;
-    [self findButton:"C"].state = checkerboardState;
-
-    // menus (may want to disable, not hide)
-    // problem is crashes since menu seems to strip hidden items
-    // enabled state has to be handled in validateUserInterfaceItem
-
-    // when menu state is selected, it may not uncheck when advancing through
-    // state
-    [self findMenuItem:" "].state = playState;
-    [self findMenuItem:"?"].state = helpState;
-    [self findMenuItem:"I"].state = infoState;
-
-    [self findMenuItem:"Y"].state = arrayState;
-    [self findMenuItem:"F"].state = faceState;
-    [self findMenuItem:"M"].state = mipState;
-    [self findMenuItem:"J"].state = jumpState;
-    [self findMenuItem:"U"].state = uiState;
-
-    [self findMenuItem:"R"].state = redState;
-    [self findMenuItem:"G"].state = greenState;
-    [self findMenuItem:"B"].state = blueState;
-    [self findMenuItem:"A"].state = alphaState;
-
-    [self findMenuItem:"S"].state = showAllState;
-    [self findMenuItem:"O"].state = previewState;
-    [self findMenuItem:"8"].state = meshState;
-    [self findMenuItem:"6"].state = meshChannelState;
-    [self findMenuItem:"5"].state = lightingState;
-    [self findMenuItem:"T"].state = tangentState;
-
-    [self findMenuItem:"W"].state = wrapState;
-    [self findMenuItem:"D"].state = gridState;
-    [self findMenuItem:"E"].state = debugState;
-
-    [self findMenuItem:"P"].state = premulState;
-    [self findMenuItem:"N"].state = signedState;
-    [self findMenuItem:"C"].state = checkerboardState;
+    // TODO: pass boolean, and change in the call
+    _actionPlay->setHighlight(playState);
+    _actionHelp->setHighlight(helpState);
+    _actionInfo->setHighlight(infoState);
+    
+    _actionArray->setHighlight(arrayState);
+    _actionFace->setHighlight(faceState);
+    _actionMip->setHighlight(mipState);
+    
+    _actionItem->setHighlight(jumpState);
+    _actionHideUI->setHighlight(uiState); // note below button always off, menu has state
+    
+    _actionR->setHighlight(redState);
+    _actionG->setHighlight(greenState);
+    _actionB->setHighlight(blueState);
+    _actionA->setHighlight(alphaState);
+    
+    _actionShowAll->setHighlight(showAllState);
+    _actionPreview->setHighlight(previewState);
+    _actionShapeMesh->setHighlight(meshState);
+    _actionShapeChannel->setHighlight(meshChannelState);
+    _actionLighting->setHighlight(lightingState);
+    _actionWrap->setHighlight(wrapState);
+    _actionGrid->setHighlight(gridState);
+    _actionDebug->setHighlight(debugState);
+    _actionTangent->setHighlight(tangentState);
+    
+    _actionPremul->setHighlight(premulState);
+    _actionSigned->setHighlight(signedState);
+    _actionChecker->setHighlight(checkerboardState);
 }
 
 // TODO: convert to C++ actions, and then call into Base holding all this
@@ -1949,90 +1841,32 @@ - (IBAction)handleAction:(id)sender
     NSEvent *theEvent = [NSApp currentEvent];
     bool isShiftKeyDown = (theEvent.modifierFlags & NSEventModifierFlagShift);
 
-    string title;
-
-    // sender is the UI element/NSButton
+    const Action* action = nullptr;
     if ([sender isKindOfClass:[NSButton class]]) {
         NSButton *button = (NSButton *)sender;
-        title = button.title.UTF8String;
+        for (const auto& search: _actions) {
+            if (search.button == button) {
+                action = &search;
+                break;
+            }
+        }
     }
     else if ([sender isKindOfClass:[NSMenuItem class]]) {
         NSMenuItem *menuItem = (NSMenuItem *)sender;
-        title = menuItem.toolTip.UTF8String;
+        for (const auto& search: _actions) {
+            if (search.menuItem == menuItem) {
+                action = &search;
+                break;
+            }
+        }
     }
-    else {
+    
+    if (!action) {
         KLOGE("kram", "unknown UI element");
         return;
     }
-
-    int32_t keyCode = -1;
-
-    if (title == "?")
-        keyCode = Key::Slash;  // help
-    else if (title == "I")
-        keyCode = Key::I;
-    else if (title == "H")
-        keyCode = Key::H;
-
-    else if (title == "S")
-        keyCode = Key::S;
-    else if (title == "O")
-        keyCode = Key::O;
-    else if (title == "W")
-        keyCode = Key::W;
-    else if (title == "P")
-        keyCode = Key::P;
-    else if (title == "N")
-        keyCode = Key::N;
-
-    else if (title == "E")
-        keyCode = Key::E;
-    else if (title == "D")
-        keyCode = Key::D;
-    else if (title == "C")
-        keyCode = Key::C;
-    else if (title == "U")
-        keyCode = Key::U;
-
-    else if (title == "M")
-        keyCode = Key::M;
-    else if (title == "F")
-        keyCode = Key::F;
-    else if (title == "Y")
-        keyCode = Key::Y;
-    else if (title == "J")
-        keyCode = Key::J;
-
-    // reload/refit
-    else if (title == "L")
-        keyCode = Key::L;
-    else if (title == "0")
-        keyCode = Key::Num0;
-
-    // mesh
-    else if (title == "8")
-        keyCode = Key::Num8;
-    else if (title == "6")
-        keyCode = Key::Num6;
-    else if (title == "5")
-        keyCode = Key::Num5;
-    else if (title == "T")
-        keyCode = Key::T;
-
-    else if (title == "R")
-        keyCode = Key::R;
-    else if (title == "G")
-        keyCode = Key::G;
-    else if (title == "B")
-        keyCode = Key::B;
-    else if (title == "A")
-        keyCode = Key::A;
-
-    else if (title == " ")
-        keyCode = Key::Space;
-    
-    if (keyCode >= 0)
-        [self handleKey:keyCode isShiftKeyDown:isShiftKeyDown];
+    
+    [self handleEventAction:action isShiftKeyDown:isShiftKeyDown];
 }
 
 - (void)keyDown:(NSEvent *)theEvent
@@ -2040,7 +1874,30 @@ - (void)keyDown:(NSEvent *)theEvent
     bool isShiftKeyDown = theEvent.modifierFlags & NSEventModifierFlagShift;
     uint32_t keyCode = theEvent.keyCode;
 
-    bool isHandled = [self handleKey:keyCode isShiftKeyDown:isShiftKeyDown];
+    // for now hit esc to hide the table views
+    if (keyCode == Key::Escape) {
+        [self hideTables];
+        
+        _hudHidden = false;
+        [self updateHudVisibility];
+        return;
+    }
+    
+    const Action* action = nullptr;
+    for (const auto& search: _actions) {
+        if (search.keyCode == keyCode) {
+            action = &search;
+            break;
+        }
+    }
+    
+    if (!action) {
+        [super keyDown:theEvent];
+        //KLOGE("kram", "unknown UI element");
+        return;
+    }
+    
+    bool isHandled = [self handleEventAction:action isShiftKeyDown:isShiftKeyDown];
     if (!isHandled) {
         // this will bonk
         [super keyDown:theEvent];
@@ -2058,7 +1915,7 @@ - (void)updateHudVisibility {
     _hudLabel2.hidden = _hudHidden || !_showSettings->isHudShown;
 }
 
-- (bool)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
+- (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyDown
 {
     // Some data depends on the texture data (isSigned, isNormal, ..)
     bool isChanged = false;
@@ -2068,485 +1925,457 @@ - (bool)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown
     // f.e. clamped values don't need to re-render
     string text;
 
-    switch (keyCode) {
-        // for now hit esc to hide the table views
-        case Key::Escape: {
-            [self hideTables];
-            
-            _hudHidden = false;
-            [self updateHudVisibility];
-            break;
-        }
-        case Key::V: {
-            bool isVertical =
-                _buttonStack.orientation == NSUserInterfaceLayoutOrientationVertical;
-            isVertical = !isVertical;
-
-            _buttonStack.orientation = isVertical
-                                           ? NSUserInterfaceLayoutOrientationVertical
-                                           : NSUserInterfaceLayoutOrientationHorizontal;
-            text = isVertical ? "Vert UI" : "Horiz UI";
-
-            // just to update toggle state to Off
-            isStateChanged = true;
-            break;
-        }
-        case Key::U:
-            // this means no image loaded yet
-            if (_noImageLoaded) {
-                return true;
-            }
+    if (action == _actionVertical) {
+        bool isVertical =
+            _buttonStack.orientation == NSUserInterfaceLayoutOrientationVertical;
+        isVertical = !isVertical;
 
-            _buttonStack.hidden = !_buttonStack.hidden;
-            text = _buttonStack.hidden ? "Hide UI" : "Show UI";
+        _buttonStack.orientation = isVertical
+                                       ? NSUserInterfaceLayoutOrientationVertical
+                                       : NSUserInterfaceLayoutOrientationHorizontal;
+        text = isVertical ? "Vert UI" : "Horiz UI";
 
-            // just to update toggle state to Off
-            isStateChanged = true;
-            break;
+        // just to update toggle state to Off
+        isStateChanged = true;
+    }
+    else if (action == _actionHideUI) {
+        // this means no image loaded yet
+        if (_noImageLoaded) {
+            return true;
+        }
 
-        // rgba channels
-        case Key::Num1:
-        case Key::R:
-            if (![self findButton:"R"].isHidden) {
-                TextureChannels &channels = _showSettings->channels;
+        _buttonStack.hidden = !_buttonStack.hidden;
+        text = _buttonStack.hidden ? "Hide UI" : "Show UI";
 
-                if (channels == TextureChannels::ModeR001) {
-                    channels = TextureChannels::ModeRGBA;
-                    text = "Mask RGBA";
-                }
-                else {
-                    channels = TextureChannels::ModeR001;
-                    text = "Mask R001";
-                }
-                isChanged = true;
-            }
-
-            break;
+        // just to update toggle state to Off
+        isStateChanged = true;
+    }
 
-        case Key::Num2:
-        case Key::G:
-            if (![self findButton:"G"].isHidden) {
-                TextureChannels &channels = _showSettings->channels;
+    else if (action == _actionR) {
+        if (!action->isHidden) {
+            TextureChannels& channels = _showSettings->channels;
 
-                if (channels == TextureChannels::Mode0G01) {
-                    channels = TextureChannels::ModeRGBA;
-                    text = "Mask RGBA";
-                }
-                else {
-                    channels = TextureChannels::Mode0G01;
-                    text = "Mask 0G01";
-                }
-                isChanged = true;
+            if (channels == TextureChannels::ModeR001) {
+                channels = TextureChannels::ModeRGBA;
+                text = "Mask RGBA";
             }
-            break;
-
-        case Key::Num3:
-        case Key::B:
-            if (![self findButton:"B"].isHidden) {
-                TextureChannels &channels = _showSettings->channels;
-
-                if (channels == TextureChannels::Mode00B1) {
-                    channels = TextureChannels::ModeRGBA;
-                    text = "Mask RGBA";
-                }
-                else {
-                    channels = TextureChannels::Mode00B1;
-                    text = "Mask 00B1";
-                }
-
-                isChanged = true;
+            else {
+                channels = TextureChannels::ModeR001;
+                text = "Mask R001";
             }
-            break;
+            isChanged = true;
+        }
 
-        case Key::Space: {
-            if (![self findButton:" "].isHidden) {
-                 Renderer *renderer = (Renderer *)self.delegate;
-                
-                renderer.playAnimations = !renderer.playAnimations;
-                
-                text = renderer.playAnimations ? "Play" : "Pause";
-                isChanged = true;
-                
-                self.enableSetNeedsDisplay = !renderer.playAnimations;
-                self.paused = !renderer.playAnimations;
+    }
+    else if (action == _actionG) {
+        if (!action->isHidden) {
+            TextureChannels& channels = _showSettings->channels;
+
+            if (channels == TextureChannels::Mode0G01) {
+                channels = TextureChannels::ModeRGBA;
+                text = "Mask RGBA";
             }
             else {
-                self.enableSetNeedsDisplay = YES;
-                self.paused = YES;
+                channels = TextureChannels::Mode0G01;
+                text = "Mask 0G01";
             }
-            break;
+            isChanged = true;
         }
-            
-        case Key::Num4:
-        case Key::A:
-            if (![self findButton:"A"].isHidden) {
-                TextureChannels &channels = _showSettings->channels;
-
-                if (channels == TextureChannels::ModeAAA1) {
-                    channels = TextureChannels::ModeRGBA;
-                    text = "Mask RGBA";
-                }
-                else {
-                    channels = TextureChannels::ModeAAA1;
-                    text = "Mask AAA1";
-                }
+    }
+    else if (action == _actionB) {
+        if (!action->isHidden) {
+            TextureChannels& channels = _showSettings->channels;
 
-                isChanged = true;
+            if (channels == TextureChannels::Mode00B1) {
+                channels = TextureChannels::ModeRGBA;
+                text = "Mask RGBA";
+            }
+            else {
+                channels = TextureChannels::Mode00B1;
+                text = "Mask 00B1";
             }
-            break;
 
-        case Key::Num6: {
-            _showSettings->advanceShapeChannel(isShiftKeyDown);
-            
-            text = _showSettings->shapeChannelText();
             isChanged = true;
-            break;
         }
-        case Key::Num5: {
-            _showSettings->advanceLightingMode(isShiftKeyDown);
-            text = _showSettings->lightingModeText();
+    }
+    else if (action == _actionA) {
+        if (!action->isHidden) {
+            TextureChannels& channels = _showSettings->channels;
+
+            if (channels == TextureChannels::ModeAAA1) {
+                channels = TextureChannels::ModeRGBA;
+                text = "Mask RGBA";
+            }
+            else {
+                channels = TextureChannels::ModeAAA1;
+                text = "Mask AAA1";
+            }
+
             isChanged = true;
-            break;
         }
-        case Key::T: {
-            _showSettings->useTangent = !_showSettings->useTangent;
-            if (_showSettings->useTangent)
-                text = "Vertex Tangents";
-            else
-                text = "Fragment Tangents";
+        
+    }
+    else if (action == _actionPlay) {
+        if (!action->isHidden) {
+            Renderer* renderer = (Renderer*)self.delegate;
+            
+            renderer.playAnimations = !renderer.playAnimations;
+            
+            text = renderer.playAnimations ? "Play" : "Pause";
             isChanged = true;
-            break;
+            
+            self.enableSetNeedsDisplay = !renderer.playAnimations;
+            self.paused = !renderer.playAnimations;
         }
-        case Key::E: {
-            _showSettings->advanceDebugMode(isShiftKeyDown);
-            text = _showSettings->debugModeText();
-            isChanged = true;
-            break;
+        else {
+            self.enableSetNeedsDisplay = YES;
+            self.paused = YES;
         }
-        case Key::Slash:  // has ? mark above it
-            // display the chars for now
-            text =
-                "⇧RGBA, O-preview, ⇧E-debug, Show all\n"
-                "Hud, ⇧L-reload, ⇧0-fit\n"
-                "Checker, ⇧D-block/px grid, Info\n"
-                "W-wrap, Premul, N-signed\n"
-                "⇧Mip, ⇧Face, ⇧Y-array/slice\n"
-                "⇧J-next bundle image\n";
-
-            // just to update toggle state to Off
-            isStateChanged = true;
-            break;
+       
+    }
 
-        case Key::Num0: {  // scale and reset pan
-            float zoom;
-            // fit image or mip
-            if (isShiftKeyDown) {
-                zoom = 1.0f;
-            }
-            else {
-                // fit to topmost image
-                zoom = _showSettings->zoomFit;
-            }
+    else if (action == _actionShapeChannel) {
+        _showSettings->advanceShapeChannel(isShiftKeyDown);
+        
+        text = _showSettings->shapeChannelText();
+        isChanged = true;
+    }
+    else if (action == _actionLighting) {
+        _showSettings->advanceLightingMode(isShiftKeyDown);
+        text = _showSettings->lightingModeText();
+        isChanged = true;
+    }
+    else if (action == _actionTangent) {
+        _showSettings->useTangent = !_showSettings->useTangent;
+        if (_showSettings->useTangent)
+            text = "Vertex Tangents";
+        else
+            text = "Fragment Tangents";
+        isChanged = true;
+    }
+    else if (action == _actionDebug) {
+        _showSettings->advanceDebugMode(isShiftKeyDown);
+        text = _showSettings->debugModeText();
+        isChanged = true;
+    }
+    else if (action == _actionHelp) {
+        // display the chars for now
+        text =
+            "1234-rgba, Preview, E-debug, Show all\n"
+            "Hud, L-reload, 0-fit\n"
+            "Checker, Grid, Info\n"
+            "Wrap, O-Premul, N-signed\n"
+            "Mip, Face, Y-array\n"
+            "J-next item\n";
+
+        // just to update toggle state to Off
+        isStateChanged = true;
+    }
+
+    else if (action == _actionFit) {
+        float zoom;
+        // fit image or mip
+        if (isShiftKeyDown) {
+            zoom = 1.0f;
+        }
+        else {
+            // fit to topmost image
+            zoom = _showSettings->zoomFit;
+        }
 
-            // This zoom needs to be checked against zoom limits
-            // there's a cap on the zoom multiplier.
-            // This is reducing zoom which expands the image.
-            zoom *= 1.0f / (1 << _showSettings->mipNumber);
+        // This zoom needs to be checked against zoom limits
+        // there's a cap on the zoom multiplier.
+        // This is reducing zoom which expands the image.
+        zoom *= 1.0f / (1 << _showSettings->mipNumber);
 
-            // even if zoom same, still do this since it resets the pan
-            _showSettings->zoom = zoom;
+        // even if zoom same, still do this since it resets the pan
+        _showSettings->zoom = zoom;
 
-            _showSettings->panX = 0.0f;
-            _showSettings->panY = 0.0f;
+        _showSettings->panX = 0.0f;
+        _showSettings->panY = 0.0f;
 
-            text = "Scale Image\n";
-            if (doPrintPanZoom) {
-                string tmp;
-                sprintf(tmp,
-                        "Pan %.3f,%.3f\n"
-                        "Zoom %.2fx\n",
-                        _showSettings->panX, _showSettings->panY, _showSettings->zoom);
-                text += tmp;
-            }
+        text = "Scale Image\n";
+        if (doPrintPanZoom) {
+            string tmp;
+            sprintf(tmp,
+                    "Pan %.3f,%.3f\n"
+                    "Zoom %.2fx\n",
+                    _showSettings->panX, _showSettings->panY, _showSettings->zoom);
+            text += tmp;
+        }
 
-            isChanged = true;
+        isChanged = true;
+    }
+    // reload key (also a quick way to reset the settings)
+    else if (action == _actionReload) {
+        [self loadTextureFromURL:self.imageURL];
 
-            break;
+        // reload at actual size
+        if (isShiftKeyDown) {
+            _showSettings->zoom = 1.0f;
         }
-        // reload key (also a quick way to reset the settings)
-        case Key::L:
-            [self loadTextureFromURL:self.imageURL];
-
-            // reload at actual size
-            if (isShiftKeyDown) {
-                _showSettings->zoom = 1.0f;
-            }
 
-            text = "Reload Image";
-            if (doPrintPanZoom) {
-                string tmp;
-                sprintf(tmp,
-                        "Pan %.3f,%.3f\n"
-                        "Zoom %.2fx\n",
-                        _showSettings->panX, _showSettings->panY, _showSettings->zoom);
-                text += tmp;
-            }
+        text = "Reload Image";
+        if (doPrintPanZoom) {
+            string tmp;
+            sprintf(tmp,
+                    "Pan %.3f,%.3f\n"
+                    "Zoom %.2fx\n",
+                    _showSettings->panX, _showSettings->panY, _showSettings->zoom);
+            text += tmp;
+        }
 
-            isChanged = true;
-            break;
+        isChanged = true;
+    }
+    // P already used for premul
+    else if (action == _actionPreview) {
+        _showSettings->isPreview = !_showSettings->isPreview;
+        isChanged = true;
+        text = "Preview ";
+        text += _showSettings->isPreview ? "On" : "Off";
+    }
+    // TODO: might switch c to channel cycle, so could just hit that
+    // and depending on the content, it cycles through reasonable channel masks
 
-        // P already used for premul
-        case Key::O:
-            _showSettings->isPreview = !_showSettings->isPreview;
+    // toggle checkerboard for transparency
+    else if (action == _actionChecker) {
+        if (action->isHidden) {
+            _showSettings->isCheckerboardShown = !_showSettings->isCheckerboardShown;
             isChanged = true;
-            text = "Preview ";
-            text += _showSettings->isPreview ? "On" : "Off";
-            break;
-
-        // TODO: might switch c to channel cycle, so could just hit that
-        // and depending on the content, it cycles through reasonable channel masks
-
-        // toggle checkerboard for transparency
-        case Key::C:
-            if (![self findButton:"C"].isHidden) {
-                _showSettings->isCheckerboardShown = !_showSettings->isCheckerboardShown;
-                isChanged = true;
-                text = "Checker ";
-                text += _showSettings->isCheckerboardShown ? "On" : "Off";
-            }
-            break;
+            text = "Checker ";
+            text += _showSettings->isCheckerboardShown ? "On" : "Off";
+        }
+    }
 
-        // toggle pixel grid when magnified above 1 pixel, can happen from mipmap
-        // changes too
-        case Key::D: {
-            static int grid = 0;
-            static const int kNumGrids = 7;
+    // toggle pixel grid when magnified above 1 pixel, can happen from mipmap
+    // changes too
+    else if (action == _actionDebug) {
+        static int grid = 0;
+        static const int kNumGrids = 7;
 
 #define advanceGrid(g, dec) \
-    grid = (grid + kNumGrids + (dec ? -1 : 1)) % kNumGrids
+grid = (grid + kNumGrids + (dec ? -1 : 1)) % kNumGrids
 
-            // TODO: display how many blocks there are
+        // TODO: display how many blocks there are
 
-            // if block size is 1, then this shouldn't toggle
-            _showSettings->isBlockGridShown = false;
-            _showSettings->isAtlasGridShown = false;
-            _showSettings->isPixelGridShown = false;
+        // if block size is 1, then this shouldn't toggle
+        _showSettings->isBlockGridShown = false;
+        _showSettings->isAtlasGridShown = false;
+        _showSettings->isPixelGridShown = false;
 
-            advanceGrid(grid, isShiftKeyDown);
+        advanceGrid(grid, isShiftKeyDown);
 
-            if (grid == 2 && _showSettings->blockX == 1) {
-                // skip it
-                advanceGrid(grid, isShiftKeyDown);
-            }
+        if (grid == 2 && _showSettings->blockX == 1) {
+            // skip it
+            advanceGrid(grid, isShiftKeyDown);
+        }
 
-            static const uint32_t gridSizes[kNumGrids] = {
-                0, 1, 2, 32, 64, 128, 256  // atlas sizes
-            };
+        static const uint32_t gridSizes[kNumGrids] = {
+            0, 1, 2, 32, 64, 128, 256  // atlas sizes
+        };
 
-            if (grid == 0) {
-                sprintf(text, "Grid Off");
-            }
-            else if (grid == 1) {
-                _showSettings->isPixelGridShown = true;
+        if (grid == 0) {
+            sprintf(text, "Grid Off");
+        }
+        else if (grid == 1) {
+            _showSettings->isPixelGridShown = true;
 
-                sprintf(text, "Pixel Grid 1x1 On");
-            }
-            else if (grid == 2) {
-                _showSettings->isBlockGridShown = true;
+            sprintf(text, "Pixel Grid 1x1 On");
+        }
+        else if (grid == 2) {
+            _showSettings->isBlockGridShown = true;
 
-                sprintf(text, "Block Grid %dx%d On", _showSettings->blockX,
-                        _showSettings->blockY);
-            }
-            else {
-                _showSettings->isAtlasGridShown = true;
+            sprintf(text, "Block Grid %dx%d On", _showSettings->blockX,
+                    _showSettings->blockY);
+        }
+        else {
+            _showSettings->isAtlasGridShown = true;
 
-                // want to be able to show altases tht have long entries derived from
-                // props but right now just a square grid atlas
-                _showSettings->gridSizeX = _showSettings->gridSizeY = gridSizes[grid];
+            // want to be able to show altases tht have long entries derived from
+            // props but right now just a square grid atlas
+            _showSettings->gridSizeX = _showSettings->gridSizeY = gridSizes[grid];
 
-                sprintf(text, "Atlas Grid %dx%d On", _showSettings->gridSizeX,
-                        _showSettings->gridSizeY);
-            }
+            sprintf(text, "Atlas Grid %dx%d On", _showSettings->gridSizeX,
+                    _showSettings->gridSizeY);
+        }
 
+        isChanged = true;
+    }
+    else if (action == _actionShowAll) {
+        if (!action->isHidden) {
+            // TODO: have drawAllMips, drawAllLevels, drawAllLevelsAndMips
+            _showSettings->isShowingAllLevelsAndMips =
+                !_showSettings->isShowingAllLevelsAndMips;
             isChanged = true;
-
-            break;
+            text = "Show All ";
+            text += _showSettings->isShowingAllLevelsAndMips ? "On" : "Off";
         }
-        case Key::S:
-            if (![self findButton:"S"].isHidden) {
-                // TODO: have drawAllMips, drawAllLevels, drawAllLevelsAndMips
-                _showSettings->isShowingAllLevelsAndMips =
-                    !_showSettings->isShowingAllLevelsAndMips;
-                isChanged = true;
-                text = "Show All ";
-                text += _showSettings->isShowingAllLevelsAndMips ? "On" : "Off";
-            }
-            break;
+    }
 
-        // toggle hud that shows name and pixel value under the cursor
-        // this may require calling setNeedsDisplay on the UILabel as cursor moves
-        case Key::H:
-            _showSettings->isHudShown = !_showSettings->isHudShown;
-            [self updateHudVisibility];
-            // isChanged = true;
-            text = "Hud ";
-            text += _showSettings->isHudShown ? "On" : "Off";
-            break;
+    // toggle hud that shows name and pixel value under the cursor
+    // this may require calling setNeedsDisplay on the UILabel as cursor moves
+    else if (action == _actionHud) {
+        _showSettings->isHudShown = !_showSettings->isHudShown;
+        [self updateHudVisibility];
+        // isChanged = true;
+        text = "Hud ";
+        text += _showSettings->isHudShown ? "On" : "Off";
+    }
 
-        // info on the texture, could request info from lib, but would want to cache
-        // that info
-        case Key::I:
-            if (_showSettings->isHudShown) {
-                sprintf(text, "%s",
-                        isShiftKeyDown ? _showSettings->imageInfoVerbose.c_str()
-                                       : _showSettings->imageInfo.c_str());
-            }
-            // just to update toggle state to Off
-            isStateChanged = true;
-            break;
+    // info on the texture, could request info from lib, but would want to cache
+    // that info
+    else if (action == _actionInfo) {
+        if (_showSettings->isHudShown) {
+            sprintf(text, "%s",
+                    isShiftKeyDown ? _showSettings->imageInfoVerbose.c_str()
+                                   : _showSettings->imageInfo.c_str());
+        }
+        // just to update toggle state to Off
+        isStateChanged = true;
+    }
 
-        // toggle wrap/clamp
-        case Key::W:
-            // TODO: cycle through all possible modes (clamp, repeat, mirror-once,
-            // mirror-repeat, ...)
-            _showSettings->isWrap = !_showSettings->isWrap;
-            isChanged = true;
-            text = "Wrap ";
-            text += _showSettings->isWrap ? "On" : "Off";
-            break;
+    // toggle wrap/clamp
+    else if (action == _actionWrap) {
+        // TODO: cycle through all possible modes (clamp, repeat, mirror-once,
+        // mirror-repeat, ...)
+        _showSettings->isWrap = !_showSettings->isWrap;
+        isChanged = true;
+        text = "Wrap ";
+        text += _showSettings->isWrap ? "On" : "Off";
+    }
 
-        // toggle signed vs. unsigned
-        case Key::N:
-            if (![self findButton:"N"].isHidden) {
-                _showSettings->isSigned = !_showSettings->isSigned;
-                isChanged = true;
-                text = "Signed ";
-                text += _showSettings->isSigned ? "On" : "Off";
-            }
-            break;
+    // toggle signed vs. unsigned
+    else if (action == _actionSigned) {
+        if (!action->isHidden) {
+            _showSettings->isSigned = !_showSettings->isSigned;
+            isChanged = true;
+            text = "Signed ";
+            text += _showSettings->isSigned ? "On" : "Off";
+        }
+    }
 
-        // toggle premul alpha vs. unmul
-        case Key::P:
-            if (![self findButton:"P"].isHidden) {
-                _showSettings->isPremul = !_showSettings->isPremul;
-                isChanged = true;
-                text = "Premul ";
-                text += _showSettings->isPremul ? "On" : "Off";
-            }
-            break;
+    // toggle premul alpha vs. unmul
+    else if (action == _actionPremul) {
+        if (!action->isHidden) {
+            _showSettings->isPremul = !_showSettings->isPremul;
+            isChanged = true;
+            text = "Premul ";
+            text += _showSettings->isPremul ? "On" : "Off";
+        }
+    }
 
-        case Key::J:
-            if (![self findButton:"J"].isHidden) {
-                if (_showSettings->isArchive) {
-                    if ([self advanceFileFromAchive:!isShiftKeyDown]) {
-                        _hudHidden = true;
-                        [self updateHudVisibility];
-                        
-                        isChanged = true;
-                        text = "Loaded " + _showSettings->lastFilename;
-                    }
+    else if (action == _actionItem) {
+        if (!action->isHidden) {
+            if (_showSettings->isArchive) {
+                if ([self advanceFileFromAchive:!isShiftKeyDown]) {
+                    _hudHidden = true;
+                    [self updateHudVisibility];
+                    
+                    isChanged = true;
+                    text = "Loaded " + _showSettings->lastFilename;
                 }
-                else if (_showSettings->isFolder) {
-                    if ([self advanceFileFromFolder:!isShiftKeyDown]) {
-                        _hudHidden = true;
-                        [self updateHudVisibility];
-                        
-                        isChanged = true;
-                        text = "Loaded " + _showSettings->lastFilename;
-                    }
+            }
+            else if (_showSettings->isFolder) {
+                if ([self advanceFileFromFolder:!isShiftKeyDown]) {
+                    _hudHidden = true;
+                    [self updateHudVisibility];
+                    
+                    isChanged = true;
+                    text = "Loaded " + _showSettings->lastFilename;
                 }
             }
-            break;
+        }
+    }
 
-        // test out different shapes
-        case Key::Num8:
-            if (_showSettings->meshCount > 1) {
-                _showSettings->advanceMeshNumber(isShiftKeyDown);
-                text = _showSettings->meshNumberText();
-                isChanged = true;
-                
-                // update shapes table
-                [_shapesTableView selectRowIndexes:[NSIndexSet indexSetWithIndex:_showSettings->meshNumber] byExtendingSelection:NO];
-                [_shapesTableView scrollRowToVisible:_showSettings->meshNumber];
-                
-                // show the shapes table
-                [self hideTables];
-                _shapesTableView.hidden = NO;
-                
-                // also have to hide hud or it will obscure the visible table
-                _hudHidden = true;
-                [self updateHudVisibility];
-                
-                // want it to respond to arrow keys
-                [self.window makeFirstResponder: _shapesTableView];
-            }
-            break;
+    // test out different shapes
+    else if (action == _actionShapeMesh) {
+        if (_showSettings->meshCount > 1) {
+            _showSettings->advanceMeshNumber(isShiftKeyDown);
+            text = _showSettings->meshNumberText();
+            isChanged = true;
+            
+            // update shapes table
+            [_shapesTableView selectRowIndexes:[NSIndexSet indexSetWithIndex:_showSettings->meshNumber] byExtendingSelection:NO];
+            [_shapesTableView scrollRowToVisible:_showSettings->meshNumber];
+            
+            // show the shapes table
+            [self hideTables];
+            _shapesTableView.hidden = NO;
+            
+            // also have to hide hud or it will obscure the visible table
+            _hudHidden = true;
+            [self updateHudVisibility];
+            
+            // want it to respond to arrow keys
+            [self.window makeFirstResponder: _shapesTableView];
+        }
+    }
 
-        // TODO: should probably have these wrap and not clamp to count limits
+    // TODO: should probably have these wrap and not clamp to count limits
 
-        // mip up/down
-        case Key::M:
-            if (_showSettings->mipCount > 1) {
-                if (isShiftKeyDown) {
-                    _showSettings->mipNumber = MAX(_showSettings->mipNumber - 1, 0);
-                }
-                else {
-                    _showSettings->mipNumber =
-                        MIN(_showSettings->mipNumber + 1, _showSettings->mipCount - 1);
-                }
-                sprintf(text, "Mip %d/%d", _showSettings->mipNumber,
-                        _showSettings->mipCount);
-                isChanged = true;
+    // mip up/down
+    else if (action == _actionMip) {
+        if (_showSettings->mipCount > 1) {
+            if (isShiftKeyDown) {
+                _showSettings->mipNumber = MAX(_showSettings->mipNumber - 1, 0);
             }
-            break;
+            else {
+                _showSettings->mipNumber =
+                    MIN(_showSettings->mipNumber + 1, _showSettings->mipCount - 1);
+            }
+            sprintf(text, "Mip %d/%d", _showSettings->mipNumber,
+                    _showSettings->mipCount);
+            isChanged = true;
+        }
+    }
 
-        case Key::F:
-            // cube or cube array, but hit s to pick cubearray
-            if (_showSettings->faceCount > 1) {
-                if (isShiftKeyDown) {
-                    _showSettings->faceNumber = MAX(_showSettings->faceNumber - 1, 0);
-                }
-                else {
-                    _showSettings->faceNumber =
-                        MIN(_showSettings->faceNumber + 1, _showSettings->faceCount - 1);
-                }
-                sprintf(text, "Face %d/%d", _showSettings->faceNumber,
-                        _showSettings->faceCount);
-                isChanged = true;
+    else if (action == _actionFace) {
+        // cube or cube array, but hit s to pick cubearray
+        if (_showSettings->faceCount > 1) {
+            if (isShiftKeyDown) {
+                _showSettings->faceNumber = MAX(_showSettings->faceNumber - 1, 0);
             }
-            break;
+            else {
+                _showSettings->faceNumber =
+                    MIN(_showSettings->faceNumber + 1, _showSettings->faceCount - 1);
+            }
+            sprintf(text, "Face %d/%d", _showSettings->faceNumber,
+                    _showSettings->faceCount);
+            isChanged = true;
+        }
+    }
 
-        case Key::Y:
-            // slice
-            if (_showSettings->sliceCount > 1) {
-                if (isShiftKeyDown) {
-                    _showSettings->sliceNumber = MAX(_showSettings->sliceNumber - 1, 0);
-                }
-                else {
-                    _showSettings->sliceNumber =
-                        MIN(_showSettings->sliceNumber + 1, _showSettings->sliceCount - 1);
-                }
-                sprintf(text, "Slice %d/%d", _showSettings->sliceNumber,
-                        _showSettings->sliceCount);
-                isChanged = true;
+    else if (action == _actionArray) {
+        // slice
+        if (_showSettings->sliceCount > 1) {
+            if (isShiftKeyDown) {
+                _showSettings->sliceNumber = MAX(_showSettings->sliceNumber - 1, 0);
             }
-            // array
-            else if (_showSettings->arrayCount > 1) {
-                if (isShiftKeyDown) {
-                    _showSettings->arrayNumber = MAX(_showSettings->arrayNumber - 1, 0);
-                }
-                else {
-                    _showSettings->arrayNumber =
-                        MIN(_showSettings->arrayNumber + 1, _showSettings->arrayCount - 1);
-                }
-                sprintf(text, "Array %d/%d", _showSettings->arrayNumber,
-                        _showSettings->arrayCount);
-                isChanged = true;
+            else {
+                _showSettings->sliceNumber =
+                    MIN(_showSettings->sliceNumber + 1, _showSettings->sliceCount - 1);
             }
-            break;
-        default:
-            // non-handled key
-            return false;
+            sprintf(text, "Slice %d/%d", _showSettings->sliceNumber,
+                    _showSettings->sliceCount);
+            isChanged = true;
+        }
+        // array
+        else if (_showSettings->arrayCount > 1) {
+            if (isShiftKeyDown) {
+                _showSettings->arrayNumber = MAX(_showSettings->arrayNumber - 1, 0);
+            }
+            else {
+                _showSettings->arrayNumber =
+                    MIN(_showSettings->arrayNumber + 1, _showSettings->arrayCount - 1);
+            }
+            sprintf(text, "Array %d/%d", _showSettings->arrayNumber,
+                    _showSettings->arrayCount);
+            isChanged = true;
+        }
+    }
+    else {
+        // non-handled action
+        return false;
     }
 
     if (!text.empty()) {

From 06964324f8235961bfac05bc62259cf94bbb17fe Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 21 Feb 2022 12:23:15 -0800
Subject: [PATCH 234/901] kramv - more key remap

---
 kramv/KramViewerMain.mm | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index eb8b9940..f4f2eff9 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -619,36 +619,36 @@ - (NSStackView *)_addButtons
         Action("I", "Info", Key::I),
         
         Action("H", "Hud", Key::H),
-        Action("S", "Show All", Key::S),
+        Action("A", "Show All", Key::A),
     
         Action("P", "Preview", Key::P),
-        Action("W", "Repeat", Key::W), // really address mode
-        Action("O", "Premul", Key::O), // TODO: better letter for this
-        Action("N", "Signed", Key::N),
+        Action("W", "Wrap", Key::W),
+        Action("8", "Premul", Key::Num8),
+        Action("7", "Signed", Key::Num7),
         
         Action("-", "", Key::A), // sep
 
         Action("D", "Debug", Key::D),
         Action("G", "Grid", Key::G),
         
-        Action("C", "Checker", Key::C),
-        Action("U", "Toggle UI", Key::U),
-        Action("V", "Toggle Vertical", Key::V),
+        Action("B", "Checkerboard", Key::B),
+        Action("U", "UI", Key::U),
+        Action("V", "Vertical UI", Key::V),
 
         Action("-", "", Key::A), // sep
 
         Action("M", "Mip", Key::M),
         Action("F", "Face", Key::F),
         Action("Y", "Array", Key::Y),
-        Action("J", "Next", Key::J),
-        Action("L", "Reload", Key::L),
+        Action("N", "Next Item", Key::N),
+        Action("R", "Reload", Key::R),
         Action("0", "Fit", Key::Num0),
 
         Action("-", "", Key::A), // sep
 
-        Action("8", "Shape", Key::Num8),
-        Action("6", "Shape Channel", Key::Num6),
-        Action("5", "Lighting", Key::Num5),
+        Action("S", "Shape", Key::S),
+        Action("C", "Shape Channel", Key::C),
+        Action("L", "Lighting", Key::L),
         Action("T", "Tangents", Key::T),
 
         // TODO: need to shift hud over a little
@@ -2061,12 +2061,12 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
     else if (action == _actionHelp) {
         // display the chars for now
         text =
-            "1234-rgba, Preview, E-debug, Show all\n"
-            "Hud, L-reload, 0-fit\n"
+            "1234-rgba, Preview, Debug, A-show all\n"
+            "Hud, Reload, 0-fit\n"
             "Checker, Grid, Info\n"
-            "Wrap, O-Premul, N-signed\n"
+            "Wrap, 8-signed, 9-premul\n"
             "Mip, Face, Y-array\n"
-            "J-next item\n";
+            "Next item, C-Shape channel, S-Shape\n";
 
         // just to update toggle state to Off
         isStateChanged = true;

From a00e321b44d1ad44a3542970fc33747d7f98a69a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecazam@users.noreply.github.com>
Date: Mon, 21 Feb 2022 18:35:08 -0800
Subject: [PATCH 235/901] Update README.md

Updated keymap to the new conventions
---
 README.md | 38 +++++++++++++++++++++-----------------
 1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/README.md b/README.md
index ed825a38..c512f771 100644
--- a/README.md
+++ b/README.md
@@ -39,32 +39,36 @@ Content Types - Albedo, Normal, SDF, Height
 Debug modes - transparent, color, non-zero, gray, +x, +y, xy >= 1
 Texture Types - 1darray (no mips), 2d, 2darray, 3d (no mips), cube, cube array
 
-⇧ decrement any advance listed below
+⇧ decrement any advance/toggle listed below
 
-?/ - show keyboard shortcuts
-O - toggle preview, disables debug mode, shows lit normals, and mips and filtering are enabled
-⇧D - toggle through none, pixel grid, block grid, atlas grid (32, 64, 128, 256), must be zoomed-in to see pixel grid
-⇧E - advance debug mode, this is texture content specific 
+? - show keyboard shortcuts
+P - toggle preview, disables debug mode, shows lit normals, and mips and filtering are enabled
+G - advance through none, pixel grid, block grid, atlas grid (32, 64, 128, 256), must be zoomed-in to see pixel grid
+D - advance debug mode, this is texture content specific 
 H - toggle hud
 U - toggle ui
 V - toggle vertical vs. horizontal buttons
 I - show texture info in overlay
-W - toggle repeat filter, scales uv from [0,1] to [0,2] and changes sampler to wrap/repeat
-S - show all - arrays, faces, slices and mips all on-screen
+W - toggle wrap/address filter, scales uv from [0,1] to [0,2] and changes sampler to wrap/repeat
+A - show all - arrays, faces, slices and mips all on-screen
 
-R/G/B/A - show channel in isolation, alpha as grayscale
-P - toggle shader premul, shader does this post-sample so only correct for point-sampling not preview
-N - toggle signed/unsigned
+1/2/3/4 - show rgba channels in isolation, alpha as grayscale
+7 - toggle signed/unsigned
+8 - toggle shader premul, shader does this post-sample so only correct for point-sampling not preview
 
-⇧0 - refit the current mip image to 1x, or fit view.  (at 1x with ⇧).
-⇧L - reload from disk if changed, zoom to fit (at 1x with ⇧)
+R - reload from disk if changed, zoom to fit (at 1x with ⇧)
+0 - fit the current mip image to 1x, or fit view.  (at 1x with ⇧).
 
-⇧Y advance array 
-⇧F advance face
-⇧M advance mip
-⇧8 advance shape (plane, unit box, sphere, capsule), displays list, esc to get out of list
+Y - advance array 
+F - advance face/slide
+M - advance mip
 
-⇧J advance bundle/folder image (can traverse zip of ktx/ktx2 files), displays list, esc to get out of list
+S - advance shape mesh (plane, unit box, sphere, capsule), displays list, esc to get out of list
+C - advance shape channel (depth, uv, face normal, vtx normal, tangent, bitangent, mip)
+L - advance lighting mode (lighting used in preview)
+T - advance tangent generation
+
+N - advance bundle/folder image (can traverse zip of ktx/ktx2 files), displays list, esc to get out of list
 
 ```
 

From 3cff4008e92f77cfac000ef7f6cce3d5e7110d1f Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 22 Feb 2022 11:22:36 -0800
Subject: [PATCH 236/901] kramv - fix grid, and handle 0 format case

---
 kramv/KramViewerMain.mm   |  2 +-
 libkram/kram/KTXImage.cpp | 14 +++++++++++++-
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index f4f2eff9..a908e735 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -2149,7 +2149,7 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
 
     // toggle pixel grid when magnified above 1 pixel, can happen from mipmap
     // changes too
-    else if (action == _actionDebug) {
+    else if (action == _actionGrid) {
         static int grid = 0;
         static const int kNumGrids = 7;
 
diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index 67670fee..465f93c7 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -879,6 +879,10 @@ uint32_t glType(MyMTLPixelFormat format)
 
 MyMTLPixelFormat glToMetalFormat(uint32_t format)
 {
+    if (format == 0) {
+        return MyMTLPixelFormatInvalid;
+    }
+    
     initFormatsIfNeeded();
 
     for (const auto& it : *gFormatTable) {
@@ -894,6 +898,10 @@ MyMTLPixelFormat glToMetalFormat(uint32_t format)
 
 MyMTLPixelFormat vulkanToMetalFormat(uint32_t format)
 {
+    if (format == 0) {
+        return MyMTLPixelFormatInvalid;
+    }
+    
     initFormatsIfNeeded();
 
     for (const auto& it : *gFormatTable) {
@@ -909,6 +917,10 @@ MyMTLPixelFormat vulkanToMetalFormat(uint32_t format)
 
 MyMTLPixelFormat directxToMetalFormat(uint32_t format)
 {
+    if (format == 0) {
+        return MyMTLPixelFormatInvalid;
+    }
+    
     initFormatsIfNeeded();
 
     for (const auto& it : *gFormatTable) {
@@ -982,7 +994,7 @@ MyMTLPixelFormat toggleSrgbFormat(MyMTLPixelFormat format)
             break;
     }
 
-    return MyMTLPixelFormatInvalid;
+    return format;
 }
 
 const char* supercompressionName(KTX2Supercompression type)

From b3be80bef90e0e231d8846604233ed8302463133 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 26 Feb 2022 11:28:11 -0800
Subject: [PATCH 237/901] kramv - ui fixes

Had to suppress key search on the table views.  Otherwise n, s, and c would pick item in list.
Suppress buttons that are better as menus.
Reorder the buttons/menus more logically.
Get keys to shup on on menu items.
Fix some actions that weren't reflecting correct state in menus.
---
 kramv/KramViewerMain.mm | 148 ++++++++++++++++++++++++++--------------
 1 file changed, 97 insertions(+), 51 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index a908e735..b32714ce 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -141,6 +141,7 @@
     
     bool isHighlighted = false;
     bool isHidden = false;
+    bool isButtonDisabled = false;
     
     void setHighlight(bool enable) {
         isHighlighted = enable;
@@ -148,16 +149,25 @@ void setHighlight(bool enable) {
         auto On = NSControlStateValueOn;
         auto Off = NSControlStateValueOff;
         
-        ((NSButton*)button).state = enable ? On : Off;
+        if (!isButtonDisabled) {
+            ((NSButton*)button).state = enable ? On : Off;
+        }
         ((NSMenuItem*)menuItem).state = enable ? On : Off;
     }
     
     void setHidden(bool enable) {
         isHidden = enable;
         
-        ((NSButton*)button).hidden = enable;
+        if (!isButtonDisabled) {
+            ((NSButton*)button).hidden = enable;
+        }
         ((NSMenuItem*)menuItem).hidden = enable;
     }
+    
+    void disableButton() {
+        ((NSButton*)button).hidden = true;
+        isButtonDisabled = true;
+    }
 };
 
 //-------------
@@ -248,6 +258,16 @@ -(NSView *)tableView:(NSTableView *)tableView viewForTableColumn:(NSTableColumn
     return cell;
 }
 
+// NSTableViewDelegate
+- (BOOL)tableView:(NSTableView *)tableView
+shouldTypeSelectForEvent:(NSEvent *)event
+withCurrentSearchString:(NSString *)searchString
+{
+    // Return NO to prevent type select (otherwise S or N key will search that key)
+    // This is nice on long lists though.
+    return NO;
+}
+
 - (void)tableViewSelectionDidChange:(NSNotification *)notification
 {
    // does not need to respond, have a listener on this notification
@@ -612,82 +632,80 @@ -(void)fixupDocumentList
 
 - (NSStackView *)_addButtons
 {
-    const int32_t numActions = 32;
-    Action actions[numActions] = {
-        Action(" ", "Play", Key::Space),
+    // Don't reorder without also matching actionPtrs below
+    Action actions[] = {
         Action("?", "Help", Key::Slash),
         Action("I", "Info", Key::I),
-        
         Action("H", "Hud", Key::H),
-        Action("A", "Show All", Key::A),
-    
-        Action("P", "Preview", Key::P),
-        Action("W", "Wrap", Key::W),
-        Action("8", "Premul", Key::Num8),
-        Action("7", "Signed", Key::Num7),
-        
-        Action("-", "", Key::A), // sep
+        Action("U", "UI", Key::U),
+        Action("V", "UI Vertical", Key::V),
 
         Action("D", "Debug", Key::D),
         Action("G", "Grid", Key::G),
-        
         Action("B", "Checkerboard", Key::B),
-        Action("U", "UI", Key::U),
-        Action("V", "Vertical UI", Key::V),
+        
+        Action("", "", Key::A), // sep
 
-        Action("-", "", Key::A), // sep
+        Action("P", "Preview", Key::P),
+        Action("W", "Wrap", Key::W),
+        Action("8", "Premul", Key::Num8),
+        Action("7", "Signed", Key::Num7),
+        
+        Action("", "", Key::A), // sep
 
+        Action("A", "Show All", Key::A),
         Action("M", "Mip", Key::M),
         Action("F", "Face", Key::F),
         Action("Y", "Array", Key::Y),
+        
         Action("N", "Next Item", Key::N),
         Action("R", "Reload", Key::R),
         Action("0", "Fit", Key::Num0),
 
-        Action("-", "", Key::A), // sep
+        Action("", "", Key::A), // sep
 
+        Action(" ", "Play", Key::Space), // TODO: really need icon on this
         Action("S", "Shape", Key::S),
         Action("C", "Shape Channel", Key::C),
         Action("L", "Lighting", Key::L),
         Action("T", "Tangents", Key::T),
 
-        // TODO: need to shift hud over a little
-        // "UI", - add to show/hide buttons
-
-        Action("-", "", Key::A), // sep
+        Action("", "", Key::A), // sep
 
         // make these individual toggles and exclusive toggle off shift
-        Action("R", "Red", Key::Num1),
-        Action("G", "Green", Key::Num2),
-        Action("B", "Blue", Key::Num3),
-        Action("A", "Alpha", Key::Num4),
+        Action("1", "Red", Key::Num1),
+        Action("2", "Green", Key::Num2),
+        Action("3", "Blue", Key::Num3),
+        Action("4", "Alpha", Key::Num4),
     };
     
+    // These have to be in same order as above.  May want to go back to search for text above.
     Action** actionPtrs[] = {
-        &_actionPlay,
         &_actionHelp,
         &_actionInfo,
         &_actionHud,
-        &_actionShowAll,
+        &_actionHideUI,
+        &_actionVertical,
+       
+        &_actionDebug,
+        &_actionGrid,
+        &_actionChecker,
         
         &_actionPreview,
         &_actionWrap,
         &_actionPremul,
         &_actionSigned,
         
-        &_actionDebug,
-        &_actionGrid,
-        &_actionChecker,
-        &_actionHideUI,
-        &_actionVertical,
-        
+        &_actionShowAll,
         &_actionMip,
         &_actionFace,
         &_actionArray,
+        
         &_actionItem,
         &_actionReload,
         &_actionFit,
         
+        &_actionPlay,
         &_actionShapeMesh,
         &_actionShapeChannel,
         &_actionLighting,
@@ -701,8 +719,10 @@ - (NSStackView *)_addButtons
     
     NSRect rect = NSMakeRect(0, 10, 30, 30);
 
-    #define ArrayCount(x) ((x) / sizeof(x[0]))
+    #define ArrayCount(x) (sizeof(x) / sizeof(x[0]))
 
+    int32_t numActions = ArrayCount(actions);
+    
     NSMutableArray *buttons = [[NSMutableArray alloc] init];
 
     for (int32_t i = 0; i < numActions; ++i) {
@@ -718,7 +738,7 @@ - (NSStackView *)_addButtons
         button = [NSButton buttonWithTitle:name
                                     target:self
                                     action:@selector(handleAction:)];
-        [button setToolTip:toolTip];
+        button.toolTip = toolTip;
         button.hidden = NO;
 
         button.buttonType = NSButtonTypeToggle;
@@ -728,7 +748,7 @@ - (NSStackView *)_addButtons
 
         // stackView seems to disperse the items evenly across the area, so this
         // doesn't work
-        bool isSeparator = icon[0] == '-';
+        bool isSeparator = icon[0] == 0;
         
         if (isSeparator) {
             // rect.origin.y += 11;
@@ -739,6 +759,9 @@ - (NSStackView *)_addButtons
             
             // rect.origin.y += 25;
 
+            // TODO: add icons
+            //button.image = ...;
+            
             // keep all buttons, since stackView will remove and pack the stack
             [_buttonArray addObject:button];
         }
@@ -771,19 +794,31 @@ - (NSStackView *)_addButtons
 
         NSString *toolTip = [NSString stringWithUTF8String:icon];
         NSString *name = [NSString stringWithUTF8String:title];
-        NSString *shortcut = @"";  // for now, or AppKit turns key int cmd+shift+key
-        bool isSeparator = icon[0] == '-';
+        bool isSeparator = icon[0] == 0;
         
         if (isSeparator) {
             [_viewMenu addItem:[NSMenuItem separatorItem]];
         }
         else {
+            // NSString *shortcut = @"";  // for now, or AppKit turns key int cmd+shift+key
+            NSString *shortcut = [NSString stringWithUTF8String:icon];
+            
             NSMenuItem *menuItem =
                 [[NSMenuItem alloc] initWithTitle:name
                                            action:@selector(handleAction:)
                                     keyEquivalent:shortcut];
-            menuItem.toolTip = toolTip;  // use in findMenuItem
-
+            menuItem.toolTip = toolTip;
+            
+            // All key-equivalents assume cmd, so unset cmd
+            // still leaves shift next to keys, but better than nothing
+            menuItem.keyEquivalentModifierMask = (NSEventModifierFlags)0;
+            
+            // TODO: add icons, also onStateImage, offStageImage, mixedStateImage
+            //menuItem.image = ...;
+             
+            // can set an integer constant that represents menu that avoid testing string (actionID)
+            //menuItem.tag = ...;
+            
             // TODO: menus and buttons should reflect any toggle state
             // menuItem.state = Mixed/Off/On;
 
@@ -803,7 +838,7 @@ - (NSStackView *)_addButtons
         const char *icon = action.icon;  // single char
         
         // skip separators
-        bool isSeparator = icon[0] == '-';
+        bool isSeparator = icon[0] == 0;
         if (isSeparator) continue;
         
         _actions.push_back(action);
@@ -815,6 +850,12 @@ - (NSStackView *)_addButtons
         *(actionPtrs[i]) = &_actions[i];
     }
     
+    // don't want these buttons showing up, menu only
+    _actionHud->disableButton();
+    _actionHelp->disableButton();
+    _actionHideUI->disableButton();
+    _actionVertical->disableButton();
+    
     return stackView;
 }
 
@@ -1776,7 +1817,8 @@ - (void)updateUIControlState
     auto wrapState = toState(_showSettings->isWrap);
     auto debugState = toState(_showSettings->debugMode != DebugModeNone);
     auto playState = toState(_showSettings->isModel && renderer.playAnimations);
-
+    auto hudState = toState(_showSettings->isHudShown);
+    
     TextureChannels &channels = _showSettings->channels;
 
     auto redState = toState(channels == TextureChannels::ModeR001);
@@ -1794,17 +1836,20 @@ - (void)updateUIControlState
         toState(_showSettings->lightingMode != LightingModeDiffuse);
     auto tangentState = toState(_showSettings->useTangent);
 
-    // TODO: vertical state
-   auto uiState = toState(_buttonStack.hidden);
+    auto verticalState = toState(_buttonStack.orientation == NSUserInterfaceLayoutOrientationVertical);
+    auto uiState = toState(_buttonStack.hidden);
 
     auto helpState = Off;
     auto infoState = Off;
     auto jumpState = Off;
 
+    _actionVertical->setHighlight(verticalState);
+    
     // TODO: pass boolean, and change in the call
     _actionPlay->setHighlight(playState);
     _actionHelp->setHighlight(helpState);
     _actionInfo->setHighlight(infoState);
+    _actionHud->setHighlight(hudState);
     
     _actionArray->setHighlight(arrayState);
     _actionFace->setHighlight(faceState);
@@ -1906,8 +1951,8 @@ - (void)keyDown:(NSEvent *)theEvent
 
 - (void)hideTables
 {
-    _tableView.hidden = YES;
-    _shapesTableView.hidden = YES;
+    _tableView.hidden = true;
+    _shapesTableView.hidden = true;
 }
 
 - (void)updateHudVisibility {
@@ -2180,12 +2225,12 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
         else if (grid == 1) {
             _showSettings->isPixelGridShown = true;
 
-            sprintf(text, "Pixel Grid 1x1 On");
+            sprintf(text, "Pixel Grid 1x1");
         }
         else if (grid == 2) {
             _showSettings->isBlockGridShown = true;
 
-            sprintf(text, "Block Grid %dx%d On", _showSettings->blockX,
+            sprintf(text, "Block Grid %dx%d", _showSettings->blockX,
                     _showSettings->blockY);
         }
         else {
@@ -2195,7 +2240,7 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
             // props but right now just a square grid atlas
             _showSettings->gridSizeX = _showSettings->gridSizeY = gridSizes[grid];
 
-            sprintf(text, "Atlas Grid %dx%d On", _showSettings->gridSizeX,
+            sprintf(text, "Atlas Grid %dx%d", _showSettings->gridSizeX,
                     _showSettings->gridSizeY);
         }
 
@@ -2220,6 +2265,7 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
         // isChanged = true;
         text = "Hud ";
         text += _showSettings->isHudShown ? "On" : "Off";
+        isStateChanged = true;
     }
 
     // info on the texture, could request info from lib, but would want to cache

From 385c6d1c1eae57c285b9d4ebc9052de44f0d0aa1 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 26 Feb 2022 13:40:12 -0800
Subject: [PATCH 238/901] kram - warning fixes

---
 build2/kramv.xcodeproj/project.pbxproj      |  4 ++--
 gtlf/GLTFMTL/Headers/GLTFMTLTextureLoader.h |  4 ++--
 gtlf/GLTFMTL/Source/GLTFMTLTextureLoader.m  |  4 ++--
 kramv/KramRenderer.mm                       |  4 ++--
 libkram/transcoder/basisu_transcoder.cpp    | 12 +++++++++---
 5 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/build2/kramv.xcodeproj/project.pbxproj b/build2/kramv.xcodeproj/project.pbxproj
index 0d6ff913..e98743c5 100644
--- a/build2/kramv.xcodeproj/project.pbxproj
+++ b/build2/kramv.xcodeproj/project.pbxproj
@@ -14,7 +14,6 @@
 		706EF23E26D17A81001C950E /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 706EF23326D17A81001C950E /* Main.storyboard */; };
 		706EF23F26D17A81001C950E /* KramShaders.metal in Sources */ = {isa = PBXBuildFile; fileRef = 706EF23626D17A81001C950E /* KramShaders.metal */; };
 		706EF24026D17A81001C950E /* KramRenderer.mm in Sources */ = {isa = PBXBuildFile; fileRef = 706EF23726D17A81001C950E /* KramRenderer.mm */; };
-		706EF24926D17BC2001C950E /* libkram.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF24826D17BC2001C950E /* libkram.a */; };
 		706EF24D26D17C30001C950E /* ModelIO.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF24C26D17C30001C950E /* ModelIO.framework */; };
 		706EF24F26D17C43001C950E /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF24E26D17C43001C950E /* Foundation.framework */; };
 		706EF25226D17C6F001C950E /* MetalKit.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF25126D17C6F001C950E /* MetalKit.framework */; };
@@ -25,6 +24,7 @@
 		70833666271575E50077BCB6 /* GLTF.framework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 70833664271575E50077BCB6 /* GLTF.framework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; };
 		70833669271575EA0077BCB6 /* GLTFMTL.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 70833668271575EA0077BCB6 /* GLTFMTL.framework */; };
 		7083366A271575EA0077BCB6 /* GLTFMTL.framework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 70833668271575EA0077BCB6 /* GLTFMTL.framework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; };
+		70871D4327CAD3EA00D0B9E1 /* libkram.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF24826D17BC2001C950E /* libkram.a */; };
 		708D44CA272B022500783DCE /* pbr.metal in Sources */ = {isa = PBXBuildFile; fileRef = 708D44C6272B022500783DCE /* pbr.metal */; };
 		708D44CB272B022500783DCE /* hdr.metal in Sources */ = {isa = PBXBuildFile; fileRef = 708D44C7272B022500783DCE /* hdr.metal */; };
 		708D44CC272B022500783DCE /* skybox.metal in Sources */ = {isa = PBXBuildFile; fileRef = 708D44C8272B022500783DCE /* skybox.metal */; };
@@ -152,12 +152,12 @@
 			files = (
 				706EF24D26D17C30001C950E /* ModelIO.framework in Frameworks */,
 				706EF25226D17C6F001C950E /* MetalKit.framework in Frameworks */,
+				70871D4327CAD3EA00D0B9E1 /* libkram.a in Frameworks */,
 				706EF25526D17C85001C950E /* Metal.framework in Frameworks */,
 				706EF25726D17C9D001C950E /* AppKit.framework in Frameworks */,
 				70833669271575EA0077BCB6 /* GLTFMTL.framework in Frameworks */,
 				706EF26726D17DFA001C950E /* libate.tbd in Frameworks */,
 				706EF24F26D17C43001C950E /* Foundation.framework in Frameworks */,
-				706EF24926D17BC2001C950E /* libkram.a in Frameworks */,
 				70833665271575E50077BCB6 /* GLTF.framework in Frameworks */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
diff --git a/gtlf/GLTFMTL/Headers/GLTFMTLTextureLoader.h b/gtlf/GLTFMTL/Headers/GLTFMTLTextureLoader.h
index 92d9f0ab..2818af75 100644
--- a/gtlf/GLTFMTL/Headers/GLTFMTLTextureLoader.h
+++ b/gtlf/GLTFMTL/Headers/GLTFMTLTextureLoader.h
@@ -26,8 +26,8 @@ extern NSString *const GLTFMTLTextureLoaderOptionSRGB;
 
 @protocol IGLTFMTLTextureLoader <NSObject>
 //- (instancetype)initWithDevice:(id<MTLDevice>)device;
-- (id<MTLTexture> _Nullable)newTextureWithContentsOfURL:(NSURL *)url options:(NSDictionary * _Nullable)options error:(NSError **)error;
-- (id<MTLTexture> _Nullable)newTextureWithData:(NSData *)data options:(NSDictionary * _Nullable)options error:(NSError **)error;
+- (id<MTLTexture> _Nullable)newTextureWithContentsOfURL:(NSURL *)url options:(NSDictionary * _Nullable)options error:(NSError * __autoreleasing *)error;
+- (id<MTLTexture> _Nullable)newTextureWithData:(NSData *)data options:(NSDictionary * _Nullable)options error:(NSError * __autoreleasing *)error;
 @end
 
 @interface GLTFMTLTextureLoader : NSObject <IGLTFMTLTextureLoader>
diff --git a/gtlf/GLTFMTL/Source/GLTFMTLTextureLoader.m b/gtlf/GLTFMTL/Source/GLTFMTLTextureLoader.m
index 1ac39ffe..b2259bb3 100644
--- a/gtlf/GLTFMTL/Source/GLTFMTLTextureLoader.m
+++ b/gtlf/GLTFMTL/Source/GLTFMTLTextureLoader.m
@@ -154,7 +154,7 @@ - (instancetype)initWithDevice:(id<MTLDevice>)device {
     return self;
 }
 
-- (id<MTLTexture>)newTextureWithContentsOfURL:(NSURL *)url options:(NSDictionary *)options error:(NSError **)error {
+- (id<MTLTexture>)newTextureWithContentsOfURL:(NSURL *)url options:(NSDictionary *)options error:(NSError * __autoreleasing *)error {
     if (url == nil) {
         return nil;
     }
@@ -216,7 +216,7 @@ - (instancetype)initWithDevice:(id<MTLDevice>)device {
                                     bytesPerRow:(size_t)bytesPerRow
                                      descriptor:(MTLTextureDescriptor *)descriptor
                                         options:(NSDictionary * _Nullable)options
-                                          error:(NSError **)error
+                                          error:(NSError * __autoreleasing *)error
 {
     NSNumber *usageOption = options[GLTFMTLTextureLoaderOptionUsageFlags];
     descriptor.usage = (usageOption != nil) ? usageOption.integerValue : MTLTextureUsageShaderRead;
diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 7d1aad39..7c224362 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -83,13 +83,13 @@ - (instancetype)initWithLoader:(KramLoader*)loader
 }
 
 // TODO: this ignores options and error.  Default png loading may need to request srgb.
-- (id<MTLTexture> _Nullable)newTextureWithContentsOfURL:(NSURL *)url options:(NSDictionary * _Nullable)options error:(NSError **)error
+- (id<MTLTexture> _Nullable)newTextureWithContentsOfURL:(NSURL *)url options:(NSDictionary * _Nullable)options error:(NSError * __autoreleasing *)error
 {
     return [_loader loadTextureFromURL:url originalFormat:nil];
 }
 
 // TODO: this ignores options and error.  Default png loading may need to request srgb.
-- (id<MTLTexture> _Nullable)newTextureWithData:(NSData *)data options:(NSDictionary * _Nullable)options error:(NSError **)error
+- (id<MTLTexture> _Nullable)newTextureWithData:(NSData *)data options:(NSDictionary * _Nullable)options error:(NSError * __autoreleasing *)error
 {
     return [_loader loadTextureFromData:data originalFormat:nil];
 }
diff --git a/libkram/transcoder/basisu_transcoder.cpp b/libkram/transcoder/basisu_transcoder.cpp
index ef6f9af9..081add30 100644
--- a/libkram/transcoder/basisu_transcoder.cpp
+++ b/libkram/transcoder/basisu_transcoder.cpp
@@ -211,8 +211,8 @@ namespace basist
 	}
 
 	static inline int32_t clampi(int32_t value, int32_t low, int32_t high) { if (value < low) value = low; else if (value > high) value = high;	return value; }
-	static inline float clampf(float value, float low, float high) { if (value < low) value = low; else if (value > high) value = high;	return value; }
-	static inline float saturate(float value) { return clampf(value, 0, 1.0f); }
+	//static inline float clampf(float value, float low, float high) { if (value < low) value = low; else if (value > high) value = high;	return value; }
+	//static inline float saturate(float value) { return clampf(value, 0, 1.0f); }
 
 	static inline uint8_t mul_8(uint32_t v, uint32_t q) { v = v * q + 128; return (uint8_t)((v + (v >> 8)) >> 8); }
 
@@ -323,13 +323,16 @@ namespace basist
 	};
 
 	DECLARE_ETC1_INTEN_TABLE(g_etc1_inten_tables, 1);
+
+#if BASISD_SUPPORT_PVRTC1
 	DECLARE_ETC1_INTEN_TABLE(g_etc1_inten_tables16, 16);
 	DECLARE_ETC1_INTEN_TABLE(g_etc1_inten_tables48, 3 * 16);
 
 	//const uint8_t g_etc1_to_selector_index[cETC1SelectorValues] = { 2, 3, 1, 0 };
-	const uint8_t g_selector_index_to_etc1[cETC1SelectorValues] = { 3, 2, 0, 1 };
 	
 	static const uint8_t g_etc_5_to_8[32] = { 0, 8, 16, 24, 33, 41, 49, 57, 66, 74, 82, 90, 99, 107, 115, 123, 132, 140, 148, 156, 165, 173, 181, 189, 198, 206, 214, 222, 231, 239, 247, 255 };
+#endif
+    const uint8_t g_selector_index_to_etc1[cETC1SelectorValues] = { 3, 2, 0, 1 };
 
 	struct decoder_etc_block
 	{
@@ -918,6 +921,8 @@ namespace basist
 		cDXT5SelectorBits = 3U, cDXT5SelectorValues = 1U << cDXT5SelectorBits, cDXT5SelectorMask = cDXT5SelectorValues - 1U,
 	};
 
+#if BASISD_SUPPORT_PVRTC1
+
 	static const uint8_t g_etc1_x_selector_unpack[4][256] =
 	{
 		{
@@ -963,6 +968,7 @@ namespace basist
 			2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
 		}
 	};
+#endif
 
 	struct dxt1_block
 	{

From bdb2408f0f2c2f730f222962fbfbad8547275be8 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 26 Feb 2022 16:31:38 -0800
Subject: [PATCH 239/901] ci - switch back to window-2019 from windows-latest

GA just switch to VS 2022, and that broke the Windows ci build.

 'Visual Studio 17 2022", also need Cmake 3.2.1, and use "windows-2022" for the GA Actions
---
 .github/workflows/pre-release.yml    | 2 +-
 .github/workflows/tagged-release.yml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/pre-release.yml b/.github/workflows/pre-release.yml
index 0c532581..79f9c673 100644
--- a/.github/workflows/pre-release.yml
+++ b/.github/workflows/pre-release.yml
@@ -13,7 +13,7 @@ jobs:
     strategy:
       matrix:
         #os: [ubuntu-latest, macos-latest, windows-latest]
-        os: [macos-latest, windows-latest]
+        os: [macos-latest, windows-2019]
         
     steps:
       - name: Update CMake
diff --git a/.github/workflows/tagged-release.yml b/.github/workflows/tagged-release.yml
index b6624490..231bcaba 100644
--- a/.github/workflows/tagged-release.yml
+++ b/.github/workflows/tagged-release.yml
@@ -12,7 +12,7 @@ jobs:
     strategy:
       matrix:
         #os: [ubuntu-latest, macos-latest, windows-latest]
-        os: [macos-latest, windows-latest]
+        os: [macos-latest, windows-2019]
         
     steps:
       - name: Update CMake

From 470e1fdcbdb385d95a2b2e395a94c600a19bebce Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 26 Feb 2022 16:33:13 -0800
Subject: [PATCH 240/901] kram - switch default compression to 4 (from default
 of 6)

See link in code, but 4 is 2x faster than 6 with little compression loss.  I didn't time this change to kram.  Really only affects KTX2 with zlib.
Update kramTextures.py to specify compressionLevel for the bundle op.
---
 libkram/kram/KramImage.cpp | 7 ++++++-
 scripts/kramTextures.py    | 8 ++++++--
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index f9e4bc02..32fdc73b 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -1708,7 +1708,12 @@ bool KramEncoder::saveKTX2(const KTXImage& srcImage, const KTX2Compressor& compr
         }
         else if (compressor.compressorType == KTX2SupercompressionZlib) {
             // set the level up
-            if (compressor.compressorLevel > 0.0f) {
+            if (compressor.compressorLevel == 0.0f) {
+                // https://aras-p.info/blog/2021/08/05/EXR-Zip-compression-levels/
+                // 4 is 2x faster than default of 6, only slightly worse compression
+                zlibLevel = 4;
+            }
+            else {
                 zlibLevel = (int)round(compressor.compressorLevel);
                 if (zlibLevel > 10) {
                     zlibLevel = 10;
diff --git a/scripts/kramTextures.py b/scripts/kramTextures.py
index c8b02769..c5444a20 100755
--- a/scripts/kramTextures.py
+++ b/scripts/kramTextures.py
@@ -583,11 +583,15 @@ def processTextures(platform, container, verbose, quality, jobs, force, script,
 
 		# either store ktx2 or compress ktx in updating a zip with the data
 		if ktx2:
+			compressionLevel = 0
 			dstBundle = "bundle-" + platform + "-ktx2" + ".zip"
-			command = "find {0} -name '*.ktx2' | zip -u -0 -@ {1}".format(".", dstBundle)
+			command = "find {0} -name '*.ktx2' | zip -u -{1} -@ {2}".format(".", compressionLevel, dstBundle)
 		else:
+			# see https://aras-p.info/blog/2021/08/05/EXR-Zip-compression-levels/ 
+			# compression level 4 is 2x faster, and only slightly less compression than default of 6
+			compressionLevel = 4
 			dstBundle = "bundle-" + platform + "-ktx" + ".zip"
-			command = "find {0} -name '*.ktx' | zip -u -@ {1}".format(".", dstBundle)
+			command = "find {0} -name '*.ktx' | zip -u -{1} -@ {2}".format(".", compressionLevel, dstBundle)
 
 		print("running " + command)
 

From 4d3919643e620a0406a0fab24cce5044842665d5 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 26 Feb 2022 17:04:45 -0800
Subject: [PATCH 241/901] Kram - fix overlap of tables with UI

---
 kramv/Base.lproj/Main.storyboard | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kramv/Base.lproj/Main.storyboard b/kramv/Base.lproj/Main.storyboard
index d781755e..f9da3c8c 100644
--- a/kramv/Base.lproj/Main.storyboard
+++ b/kramv/Base.lproj/Main.storyboard
@@ -175,7 +175,7 @@
                         <autoresizingMask key="autoresizingMask"/>
                         <subviews>
                             <scrollView fixedFrame="YES" borderType="none" autohidesScrollers="YES" horizontalLineScroll="24" horizontalPageScroll="10" verticalLineScroll="24" verticalPageScroll="10" usesPredominantAxisScrolling="NO" translatesAutoresizingMaskIntoConstraints="NO" id="sGH-FI-BDN" userLabel="ShapesScrollView">
-                                <rect key="frame" x="20" y="0.0" width="207" height="413"/>
+                                <rect key="frame" x="20" y="20" width="207" height="413"/>
                                 <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" heightSizable="YES" flexibleMaxY="YES"/>
                                 <clipView key="contentView" drawsBackground="NO" id="ZHD-QY-oGf">
                                     <rect key="frame" x="0.0" y="0.0" width="207" height="413"/>
@@ -239,7 +239,7 @@
                                 </scroller>
                             </scrollView>
                             <scrollView fixedFrame="YES" borderType="none" autohidesScrollers="YES" horizontalLineScroll="24" horizontalPageScroll="10" verticalLineScroll="24" verticalPageScroll="10" usesPredominantAxisScrolling="NO" translatesAutoresizingMaskIntoConstraints="NO" id="CPB-x5-bmZ" userLabel="FilesScrollView">
-                                <rect key="frame" x="20" y="0.0" width="207" height="413"/>
+                                <rect key="frame" x="20" y="20" width="207" height="413"/>
                                 <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" heightSizable="YES" flexibleMaxY="YES"/>
                                 <clipView key="contentView" drawsBackground="NO" id="R7E-tN-iH2">
                                     <rect key="frame" x="0.0" y="0.0" width="207" height="413"/>

From 469f378cbfdac41a3d363e7b49bf9c53f39c88be Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 26 Feb 2022 17:19:30 -0800
Subject: [PATCH 242/901] ci - fix KramVersion.h file gen.

Needed to reference from current folder.
---
 scripts/cibuild.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/cibuild.sh b/scripts/cibuild.sh
index f9e58980..bbc4641e 100755
--- a/scripts/cibuild.sh
+++ b/scripts/cibuild.sh
@@ -6,7 +6,7 @@
 
 # write out the git tag as a version.h file in a 
 tag=$(git describe --always --tags)
-versionFile=../libkram/kram/KramVersion.h
+versionFile=./libkram/kram/KramVersion.h
 
 echo "#pragma once" > $versionFile
 echo "#define KRAM_VERSION \"$tag\"" >> $versionFile

From f272a476560c8d09bfd88f163ee027af0a3a9724 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 26 Feb 2022 20:41:19 -0800
Subject: [PATCH 243/901] ci - ignore KramVersion.h

Have to first remove the file from fit, or it won't be ignored.  This is generated by cibuild.sh
---
 .gitignore                 | 3 +++
 libkram/kram/KramVersion.h | 4 ----
 2 files changed, 3 insertions(+), 4 deletions(-)
 delete mode 100644 libkram/kram/KramVersion.h

diff --git a/.gitignore b/.gitignore
index 1b6d6ff2..d4740c2b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,6 +7,9 @@ bin/
 # ignore out folder, test results go here
 out/
 
+# this file is generated from cibuild.sh, and is based on current tag
+libkram/kram/KramVersion.h
+
 # ignore zip files
 *.zip
 
diff --git a/libkram/kram/KramVersion.h b/libkram/kram/KramVersion.h
deleted file mode 100644
index 60a81553..00000000
--- a/libkram/kram/KramVersion.h
+++ /dev/null
@@ -1,4 +0,0 @@
-#pragma once
-
-// this will be replace with the git tag
-#define KRAM_VERSION "v0.9.0"

From 86a3579bea027a3aa12ed4e02b5a4b54fb6f2ba9 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 26 Feb 2022 21:05:31 -0800
Subject: [PATCH 244/901] kram - update copyright to 2022

---
 kram-preview/KramPreviewViewController.h  | 2 +-
 kram-preview/KramPreviewViewController.mm | 2 +-
 kram-thumb/KramThumbnailProvider.h        | 2 +-
 kram-thumb/KramThumbnailProvider.mm       | 2 +-
 kramv/KramLoader.h                        | 2 +-
 kramv/KramLoader.mm                       | 2 +-
 kramv/KramRenderer.h                      | 2 +-
 kramv/KramRenderer.mm                     | 2 +-
 kramv/KramViewerBase.h                    | 2 +-
 kramv/KramViewerMain.mm                   | 4 ++--
 kramv/Shaders/KramShaders.h               | 2 +-
 kramv/Shaders/KramShaders.metal           | 2 +-
 libkram/kram/KTXImage.cpp                 | 2 +-
 libkram/kram/KTXImage.h                   | 2 +-
 libkram/kram/Kram.cpp                     | 2 +-
 libkram/kram/Kram.h                       | 2 +-
 libkram/kram/KramConfig.h                 | 2 +-
 libkram/kram/KramDDSHelper.cpp            | 2 +-
 libkram/kram/KramDDSHelper.h              | 2 +-
 libkram/kram/KramFileHelper.cpp           | 2 +-
 libkram/kram/KramFileHelper.h             | 2 +-
 libkram/kram/KramImage.cpp                | 2 +-
 libkram/kram/KramImage.h                  | 2 +-
 libkram/kram/KramImageInfo.cpp            | 2 +-
 libkram/kram/KramImageInfo.h              | 2 +-
 libkram/kram/KramLib.h                    | 2 +-
 libkram/kram/KramLog.cpp                  | 2 +-
 libkram/kram/KramLog.h                    | 2 +-
 libkram/kram/KramMipper.cpp               | 2 +-
 libkram/kram/KramMipper.h                 | 2 +-
 libkram/kram/KramMmapHelper.cpp           | 2 +-
 libkram/kram/KramMmapHelper.h             | 2 +-
 libkram/kram/KramSDFMipper.cpp            | 2 +-
 libkram/kram/KramSDFMipper.h              | 2 +-
 libkram/kram/KramTimer.cpp                | 2 +-
 libkram/kram/KramTimer.h                  | 2 +-
 libkram/kram/float4a.cpp                  | 2 +-
 libkram/kram/float4a.h                    | 2 +-
 38 files changed, 39 insertions(+), 39 deletions(-)

diff --git a/kram-preview/KramPreviewViewController.h b/kram-preview/KramPreviewViewController.h
index 1095682e..5acdf430 100644
--- a/kram-preview/KramPreviewViewController.h
+++ b/kram-preview/KramPreviewViewController.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020 by Alec Miller. - MIT License
+// kram - Copyright 2020-2022 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/kram-preview/KramPreviewViewController.mm b/kram-preview/KramPreviewViewController.mm
index dbc11fda..42cbd0e1 100644
--- a/kram-preview/KramPreviewViewController.mm
+++ b/kram-preview/KramPreviewViewController.mm
@@ -1,4 +1,4 @@
-// kram - Copyright 2020 by Alec Miller. - MIT License
+// kram - Copyright 2020-2022 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/kram-thumb/KramThumbnailProvider.h b/kram-thumb/KramThumbnailProvider.h
index efb1566d..0ebbdcd8 100644
--- a/kram-thumb/KramThumbnailProvider.h
+++ b/kram-thumb/KramThumbnailProvider.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020 by Alec Miller. - MIT License
+// kram - Copyright 2020-2022 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/kram-thumb/KramThumbnailProvider.mm b/kram-thumb/KramThumbnailProvider.mm
index d749035b..1b37d8e1 100644
--- a/kram-thumb/KramThumbnailProvider.mm
+++ b/kram-thumb/KramThumbnailProvider.mm
@@ -1,4 +1,4 @@
-// kram - Copyright 2020 by Alec Miller. - MIT License
+// kram - Copyright 2020-2022 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/kramv/KramLoader.h b/kramv/KramLoader.h
index 8f67cf83..63991707 100644
--- a/kramv/KramLoader.h
+++ b/kramv/KramLoader.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020 by Alec Miller. - MIT License
+// kram - Copyright 2020-2022 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index b9d062ee..5b1b9b69 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -1,4 +1,4 @@
-// kram - Copyright 2020 by Alec Miller. - MIT License
+// kram - Copyright 2020-2022 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/kramv/KramRenderer.h b/kramv/KramRenderer.h
index 4ec41bf5..d2411c8e 100644
--- a/kramv/KramRenderer.h
+++ b/kramv/KramRenderer.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020 by Alec Miller. - MIT License
+// kram - Copyright 2020-2022 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 7c224362..bff7b6d3 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -1,4 +1,4 @@
-// kram - Copyright 2020 by Alec Miller. - MIT License
+// kram - Copyright 2020-2022 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index dc847b3a..891d05d0 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020 by Alec Miller. - MIT License
+// kram - Copyright 2020-2022 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index b32714ce..3d85e7eb 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -1,4 +1,4 @@
-// kram - Copyright 2020 by Alec Miller. - MIT License
+// kram - Copyright 2020-2022 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
@@ -417,7 +417,7 @@ - (IBAction)showAboutDialog:(id)sender
 
     // want to embed the git tag here
     options[@"Copyright"] =
-        [NSString stringWithUTF8String:"kram ©2020,2021 by Alec Miller"];
+        [NSString stringWithUTF8String:"kram ©2020-2022 by Alec Miller"];
 
     // add a link to kram website, skip the Visit text
     NSMutableAttributedString *str = [[NSMutableAttributedString alloc]
diff --git a/kramv/Shaders/KramShaders.h b/kramv/Shaders/KramShaders.h
index 469e7e69..854c04ba 100644
--- a/kramv/Shaders/KramShaders.h
+++ b/kramv/Shaders/KramShaders.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020 by Alec Miller. - MIT License
+// kram - Copyright 2020-2022 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/kramv/Shaders/KramShaders.metal b/kramv/Shaders/KramShaders.metal
index ffa7a7fe..a2b137c1 100644
--- a/kramv/Shaders/KramShaders.metal
+++ b/kramv/Shaders/KramShaders.metal
@@ -1,4 +1,4 @@
-// kram - Copyright 2020 by Alec Miller. - MIT License
+// kram - Copyright 2020-2022 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index 465f93c7..c880557d 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -1,4 +1,4 @@
-// kram - Copyright 2020 by Alec Miller. - MIT License
+// kram - Copyright 2020-2022 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h
index 967d9bfc..fbc0724d 100644
--- a/libkram/kram/KTXImage.h
+++ b/libkram/kram/KTXImage.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020 by Alec Miller. - MIT License
+// kram - Copyright 2020-2022 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 6df9625d..27567d03 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -1,4 +1,4 @@
-// kram - Copyright 2020 by Alec Miller. - MIT License
+// kram - Copyright 2020-2022 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/Kram.h b/libkram/kram/Kram.h
index 31e311dd..818fa6f8 100644
--- a/libkram/kram/Kram.h
+++ b/libkram/kram/Kram.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020 by Alec Miller. - MIT License
+// kram - Copyright 2020-2022 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index 9a5f43d8..2a130a0a 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020 by Alec Miller. - MIT License
+// kram - Copyright 2020-2022 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramDDSHelper.cpp b/libkram/kram/KramDDSHelper.cpp
index de7e65d0..92fe1839 100644
--- a/libkram/kram/KramDDSHelper.cpp
+++ b/libkram/kram/KramDDSHelper.cpp
@@ -1,4 +1,4 @@
-// kram - Copyright 2020 by Alec Miller. - MIT License
+// kram - Copyright 2020-2022 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramDDSHelper.h b/libkram/kram/KramDDSHelper.h
index d585223c..0fa25088 100644
--- a/libkram/kram/KramDDSHelper.h
+++ b/libkram/kram/KramDDSHelper.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020 by Alec Miller. - MIT License
+// kram - Copyright 2020-2022 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramFileHelper.cpp b/libkram/kram/KramFileHelper.cpp
index 58ba5027..a6c2b873 100644
--- a/libkram/kram/KramFileHelper.cpp
+++ b/libkram/kram/KramFileHelper.cpp
@@ -1,4 +1,4 @@
-// kram - Copyright 2020 by Alec Miller. - MIT License
+// kram - Copyright 2020-2022 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramFileHelper.h b/libkram/kram/KramFileHelper.h
index 46887702..b91f0a05 100644
--- a/libkram/kram/KramFileHelper.h
+++ b/libkram/kram/KramFileHelper.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020 by Alec Miller. - MIT License
+// kram - Copyright 2020-2022 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index 32fdc73b..a3fb39a1 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -1,4 +1,4 @@
-// kram - Copyright 2020 by Alec Miller. - MIT License
+// kram - Copyright 2020-2022 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramImage.h b/libkram/kram/KramImage.h
index c76cc53a..f5b7ca8d 100644
--- a/libkram/kram/KramImage.h
+++ b/libkram/kram/KramImage.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020 by Alec Miller. - MIT License
+// kram - Copyright 2020-2022 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp
index ef21e41c..9b0e1fdf 100644
--- a/libkram/kram/KramImageInfo.cpp
+++ b/libkram/kram/KramImageInfo.cpp
@@ -1,4 +1,4 @@
-// kram - Copyright 2020 by Alec Miller. - MIT License
+// kram - Copyright 2020-2022 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramImageInfo.h b/libkram/kram/KramImageInfo.h
index 20dd7201..b7d53ca2 100644
--- a/libkram/kram/KramImageInfo.h
+++ b/libkram/kram/KramImageInfo.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020 by Alec Miller. - MIT License
+// kram - Copyright 2020-2022 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramLib.h b/libkram/kram/KramLib.h
index 8d4df038..f737d034 100644
--- a/libkram/kram/KramLib.h
+++ b/libkram/kram/KramLib.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020 by Alec Miller. - MIT License
+// kram - Copyright 2020-2022 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp
index a42b9980..ee439cee 100644
--- a/libkram/kram/KramLog.cpp
+++ b/libkram/kram/KramLog.cpp
@@ -1,4 +1,4 @@
-// kram - Copyright 2020 by Alec Miller. - MIT License
+// kram - Copyright 2020-2022 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramLog.h b/libkram/kram/KramLog.h
index afda332e..b8333449 100644
--- a/libkram/kram/KramLog.h
+++ b/libkram/kram/KramLog.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020 by Alec Miller. - MIT License
+// kram - Copyright 2020-2022 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramMipper.cpp b/libkram/kram/KramMipper.cpp
index 935f9c8b..7b5b8104 100644
--- a/libkram/kram/KramMipper.cpp
+++ b/libkram/kram/KramMipper.cpp
@@ -1,4 +1,4 @@
-// kram - Copyright 2020 by Alec Miller. - MIT License
+// kram - Copyright 2020-2022 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramMipper.h b/libkram/kram/KramMipper.h
index 44295991..5a6ddd43 100644
--- a/libkram/kram/KramMipper.h
+++ b/libkram/kram/KramMipper.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020 by Alec Miller. - MIT License
+// kram - Copyright 2020-2022 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramMmapHelper.cpp b/libkram/kram/KramMmapHelper.cpp
index 29a4bf5b..cacfcd9a 100644
--- a/libkram/kram/KramMmapHelper.cpp
+++ b/libkram/kram/KramMmapHelper.cpp
@@ -1,4 +1,4 @@
-// kram - Copyright 2020 by Alec Miller. - MIT License
+// kram - Copyright 2020-2022 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramMmapHelper.h b/libkram/kram/KramMmapHelper.h
index 131cd4f6..7380038b 100644
--- a/libkram/kram/KramMmapHelper.h
+++ b/libkram/kram/KramMmapHelper.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020 by Alec Miller. - MIT License
+// kram - Copyright 2020-2022 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramSDFMipper.cpp b/libkram/kram/KramSDFMipper.cpp
index 27848bba..2c8d850b 100644
--- a/libkram/kram/KramSDFMipper.cpp
+++ b/libkram/kram/KramSDFMipper.cpp
@@ -1,4 +1,4 @@
-// kram - Copyright 2020 by Alec Miller. - MIT License
+// kram - Copyright 2020-2022 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramSDFMipper.h b/libkram/kram/KramSDFMipper.h
index 7fe4f6ae..4fae0da5 100644
--- a/libkram/kram/KramSDFMipper.h
+++ b/libkram/kram/KramSDFMipper.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020 by Alec Miller. - MIT License
+// kram - Copyright 2020-2022 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramTimer.cpp b/libkram/kram/KramTimer.cpp
index 818e734f..f46a1880 100644
--- a/libkram/kram/KramTimer.cpp
+++ b/libkram/kram/KramTimer.cpp
@@ -1,4 +1,4 @@
-// kram - Copyright 2020 by Alec Miller. - MIT License
+// kram - Copyright 2020-2022 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramTimer.h b/libkram/kram/KramTimer.h
index 21ce9dad..e5bf96ca 100644
--- a/libkram/kram/KramTimer.h
+++ b/libkram/kram/KramTimer.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020 by Alec Miller. - MIT License
+// kram - Copyright 2020-2022 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/float4a.cpp b/libkram/kram/float4a.cpp
index 20b8a0c7..43396f17 100644
--- a/libkram/kram/float4a.cpp
+++ b/libkram/kram/float4a.cpp
@@ -1,4 +1,4 @@
-// kram - Copyright 2020 by Alec Miller. - MIT License
+// kram - Copyright 2020-2022 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/float4a.h b/libkram/kram/float4a.h
index f70dc955..aa95be0e 100644
--- a/libkram/kram/float4a.h
+++ b/libkram/kram/float4a.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020 by Alec Miller. - MIT License
+// kram - Copyright 2020-2022 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 

From d1ab92cc7ff8110355de70a8725b80c64366b057 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 26 Feb 2022 21:21:02 -0800
Subject: [PATCH 245/901] kramv - remove shapes table, and lighting mode none

Not worth table on top of table with only 5 shapes.  Will likely have custom shape for gltf.
Lighting mode none to avoid color change from diffuse/specular.  Can still see mips.
---
 kramv/Base.lproj/Main.storyboard |   6 --
 kramv/KramViewerBase.cpp         |   3 +
 kramv/KramViewerBase.h           |   3 +-
 kramv/KramViewerMain.mm          |  78 +++++++-------
 kramv/Shaders/KramShaders.h      |   3 +-
 kramv/Shaders/KramShaders.metal  | 177 +++++++++++++++++--------------
 6 files changed, 143 insertions(+), 127 deletions(-)

diff --git a/kramv/Base.lproj/Main.storyboard b/kramv/Base.lproj/Main.storyboard
index f9da3c8c..49395152 100644
--- a/kramv/Base.lproj/Main.storyboard
+++ b/kramv/Base.lproj/Main.storyboard
@@ -221,10 +221,6 @@
                                                     </prototypeCellViews>
                                                 </tableColumn>
                                             </tableColumns>
-                                            <connections>
-                                                <outlet property="dataSource" destination="8Hb-tc-yTO" id="Pxa-Xs-op7"/>
-                                                <outlet property="delegate" destination="8Hb-tc-yTO" id="Xof-Wb-RoC"/>
-                                            </connections>
                                         </tableView>
                                     </subviews>
                                     <color key="backgroundColor" white="0.0" alpha="0.0" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
@@ -305,7 +301,6 @@
                         </subviews>
                         <connections>
                             <outlet property="_shapesTableView" destination="vPC-mQ-zsL" id="j7A-ww-DGP"/>
-                            <outlet property="_shapesTableViewController" destination="8Hb-tc-yTO" id="jai-kf-4vR"/>
                             <outlet property="_tableView" destination="Ydb-sa-YEf" id="PjO-jm-pnf"/>
                             <outlet property="_tableViewController" destination="Ivt-wI-wYi" id="l0c-Dj-gAP"/>
                         </connections>
@@ -313,7 +308,6 @@
                 </viewController>
                 <customObject id="rPt-NT-nkU" userLabel="First Responder" customClass="NSResponder" sceneMemberID="firstResponder"/>
                 <customObject id="Ivt-wI-wYi" userLabel="FilesViewController" customClass="TableViewController"/>
-                <customObject id="8Hb-tc-yTO" userLabel="ShapesViewController" customClass="TableViewController"/>
             </objects>
             <point key="canvasLocation" x="75" y="817"/>
         </scene>
diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index d3cfad78..93dd2c02 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -151,6 +151,9 @@ const char *ShowSettings::lightingModeText() const
         case LightingModeSpecular:
             text = "Light Specular";
             break;
+        case LightingModeNone:
+            text = "Light None";
+            break;
         default:
             break;
     }
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index 891d05d0..4facd3a1 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -70,7 +70,8 @@ enum ShapeChannel {
 enum LightingMode {
     LightingModeDiffuse = 0,   // amb + diffuse
     LightingModeSpecular = 1,  // amb + diffuse + specular
-
+    LightingModeNone = 2,      // no lighting, just mips
+    
     LightingModeCount,
 };
 
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 3d85e7eb..befbf390 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -478,8 +478,8 @@ @implementation MyMTKView {
     IBOutlet NSTableView *_tableView;
     IBOutlet TableViewController *_tableViewController;
     
-    IBOutlet NSTableView *_shapesTableView;
-    IBOutlet TableViewController *_shapesTableViewController;
+//    IBOutlet NSTableView *_shapesTableView;
+//    IBOutlet TableViewController *_shapesTableViewController;
    
     vector<string> _textSlots;
     ShowSettings *_showSettings;
@@ -1952,7 +1952,7 @@ - (void)keyDown:(NSEvent *)theEvent
 - (void)hideTables
 {
     _tableView.hidden = true;
-    _shapesTableView.hidden = true;
+    //_shapesTableView.hidden = true;
 }
 
 - (void)updateHudVisibility {
@@ -2341,19 +2341,19 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
             isChanged = true;
             
             // update shapes table
-            [_shapesTableView selectRowIndexes:[NSIndexSet indexSetWithIndex:_showSettings->meshNumber] byExtendingSelection:NO];
-            [_shapesTableView scrollRowToVisible:_showSettings->meshNumber];
+//            [_shapesTableView selectRowIndexes:[NSIndexSet indexSetWithIndex:_showSettings->meshNumber] byExtendingSelection:NO];
+//            [_shapesTableView scrollRowToVisible:_showSettings->meshNumber];
             
-            // show the shapes table
-            [self hideTables];
-            _shapesTableView.hidden = NO;
-            
-            // also have to hide hud or it will obscure the visible table
-            _hudHidden = true;
-            [self updateHudVisibility];
-            
-            // want it to respond to arrow keys
-            [self.window makeFirstResponder: _shapesTableView];
+//            // show the shapes table
+//            [self hideTables];
+//            _shapesTableView.hidden = NO;
+//
+//            // also have to hide hud or it will obscure the visible table
+//            _hudHidden = true;
+//            [self updateHudVisibility];
+//
+//            // want it to respond to arrow keys
+//            [self.window makeFirstResponder: _shapesTableView];
         }
     }
 
@@ -2508,21 +2508,21 @@ - (BOOL)performDragOperation:(id)sender
     return NO;
 }
 
-- (void)updateShapesTable
-{
-    // no dynamic shapes from archive/folder yet, so init once
-    // TODO: tie to default shapes and those in the archive/folder
-    // may not want single archive with meshes/textures, so support different
-    if (_shapesTableViewController.items.count > 0)
-        return;
-    
-    // setup shapes view too
-    [_shapesTableViewController.items removeAllObjects];
-    for (uint32_t i = 0; i < _showSettings->meshCount; ++i) {
-        [_shapesTableViewController.items addObject: [NSString stringWithUTF8String: _showSettings->meshNumberName(i)]];
-    }
-    [_shapesTableView reloadData];
-}
+//- (void)updateShapesTable
+//{
+//    // no dynamic shapes from archive/folder yet, so init once
+//    // TODO: tie to default shapes and those in the archive/folder
+//    // may not want single archive with meshes/textures, so support different
+//    if (_shapesTableViewController.items.count > 0)
+//        return;
+//
+//    // setup shapes view too
+//    [_shapesTableViewController.items removeAllObjects];
+//    for (uint32_t i = 0; i < _showSettings->meshCount; ++i) {
+//        [_shapesTableViewController.items addObject: [NSString stringWithUTF8String: _showSettings->meshNumberName(i)]];
+//    }
+//    [_shapesTableView reloadData];
+//}
 
 - (BOOL)loadArchive:(const char *)zipFilename
 {
@@ -2571,7 +2571,7 @@ - (BOOL)loadArchive:(const char *)zipFilename
     // want it to respond to arrow keys
     [self.window makeFirstResponder: _tableView];
     
-    [self updateShapesTable];
+    //[self updateShapesTable];
     
     // hack to see table
     [self hideTables];
@@ -3113,7 +3113,7 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
             [_tableView selectRowIndexes:[NSIndexSet indexSetWithIndex:_fileFolderIndex] byExtendingSelection:NO];
             [_tableView scrollRowToVisible:_fileFolderIndex];
             
-            [self updateShapesTable];
+            //[self updateShapesTable];
             [self hideTables];
         }
 
@@ -3432,7 +3432,7 @@ -(BOOL)loadImageFile:(NSURL*)url
 - (void)setupUI
 {
     // Load the basic shapes table once
-    [self updateShapesTable];
+    //[self updateShapesTable];
     [self hideTables];
 }
 
@@ -3481,12 +3481,12 @@ - (void)tableViewSelectionDidChange:(NSNotification *)notification
         NSInteger selectedRow = [_tableView selectedRow];
         [self setImageFromSelection:selectedRow];
     }
-    else if (notification.object == _shapesTableView)
-    {
-        // shape
-        NSInteger selectedRow = [_shapesTableView selectedRow];
-        [self setShapeFromSelection:selectedRow];
-    }
+//    else if (notification.object == _shapesTableView)
+//    {
+//        // shape
+//        NSInteger selectedRow = [_shapesTableView selectedRow];
+//        [self setShapeFromSelection:selectedRow];
+//    }
 }
 
 - (void)addNotifications
diff --git a/kramv/Shaders/KramShaders.h b/kramv/Shaders/KramShaders.h
index 854c04ba..39e90a6e 100644
--- a/kramv/Shaders/KramShaders.h
+++ b/kramv/Shaders/KramShaders.h
@@ -102,7 +102,8 @@ typedef NS_ENUM(int32_t, ShaderShapeChannel) {
 
 typedef NS_ENUM(int32_t, ShaderLightingMode) {
     ShLightingModeDiffuse = 0,
-    ShLightingModeSpecular,
+    ShLightingModeSpecular = 1,
+    ShLightingModeNone = 2,
 };
 
 // TODO: placement of these elements in the struct breaks transfer
diff --git a/kramv/Shaders/KramShaders.metal b/kramv/Shaders/KramShaders.metal
index a2b137c1..353c9000 100644
--- a/kramv/Shaders/KramShaders.metal
+++ b/kramv/Shaders/KramShaders.metal
@@ -746,99 +746,116 @@ float4 doLighting(float4 albedo, float3 viewDir, float3 bumpNormal, float3 verte
     // circle to all texture previews since it's additive.
     bool doBlinnPhongSpecular = false;
     
-    bool doSpecular = true; // can confuse lighting review, make option to enable or everything has bright white spot
-    bool doDiffuse = true;
-    bool doAmbient = true;
     
-    if (lightingMode == ShLightingModeDiffuse)
+    if (lightingMode == ShLightingModeNone)
     {
-        doSpecular = false;
+        // do nothing
     }
-    
-    // see here about energy normalization, not going to GGX just yet
-    // http://www.thetenthplanet.de/archives/255
-    
-    // Note: this isn't the same as the faceNormal, the vertexNormal is interpolated
-    // see iq's trick for flipping lighting in reflectIQ.
-    
-    // Use reflectIQ to flip specular, 
-    //float dotVertexNL = dot(vertexNormal, lightDir);
-    
-    float dotNL = dot(bumpNormal, lightDir);
-    
-    if (doSpecular) {
-        //if (dotVertexNL > 0.0) {
-            float specularAmount;
-            
-            // in lieu of a roughness map, do this
-            // fake energy conservation by multiply with gloss
-            // https://www.youtube.com/watch?v=E4PHFnvMzFc&t=946s
-            float gloss = 0.3;
-            float specularExp = exp2(gloss * 11.0) + 2.0;
-            float energyNormalization = gloss;
+    else
+    {
+        bool doAmbient = false;
+        bool doDiffuse = false;
+        bool doSpecular = false;
+       
+        if (lightingMode == ShLightingModeDiffuse)
+        {
+            doAmbient = true;
+            doDiffuse = true;
+        }
+        else if (lightingMode == ShLightingModeSpecular)
+        {
+            doAmbient = true;
+            doDiffuse = true;
             
-            if (doBlinnPhongSpecular) {
-                // this doesn't look so good as a highlight in ortho at least
-                float3 E = -viewDir;
-                float3 H = normalize(lightDir + E);
-                float dotHN = saturate(dot(H, bumpNormal));
-                specularAmount = dotHN;
+            // can confuse lighting review, make option to enable or everything has bright white spot
+            doSpecular = true;
+        }
+    
+        // see here about energy normalization, not going to GGX just yet
+        // http://www.thetenthplanet.de/archives/255
+        
+        // Note: this isn't the same as the faceNormal, the vertexNormal is interpolated
+        // see iq's trick for flipping lighting in reflectIQ.
+        
+        // Use reflectIQ to flip specular,
+        //float dotVertexNL = dot(vertexNormal, lightDir);
+        
+        float dotNL = dot(bumpNormal, lightDir);
+        
+        if (doSpecular) {
+            //if (dotVertexNL > 0.0) {
+                float specularAmount;
                 
-                // to make dotHN look like dotRL
-                // https://en.wikipedia.org/wiki/Blinn%E2%80%93Phong_reflection_model
-                specularExp *= 4.0;
+                // in lieu of a roughness map, do this
+                // fake energy conservation by multiply with gloss
+                // https://www.youtube.com/watch?v=E4PHFnvMzFc&t=946s
+                float gloss = 0.3;
+                float specularExp = exp2(gloss * 11.0) + 2.0;
+                float energyNormalization = gloss;
                 
-                //energyNormalization = (specularExp + 1.0) / (2.0 * PI);
-            }
-            else {
-                // phong
-                // and seem to recall a conversion to above but H = (L+V)/2, the normalize knocks out the 1/2
-                float3 ref = normalize(reflectIQ(viewDir, bumpNormal));
-                float dotRL = saturate(dot(ref, lightDir));
-                specularAmount = dotRL;
+                if (doBlinnPhongSpecular) {
+                    // this doesn't look so good as a highlight in ortho at least
+                    float3 E = -viewDir;
+                    float3 H = normalize(lightDir + E);
+                    float dotHN = saturate(dot(H, bumpNormal));
+                    specularAmount = dotHN;
+                    
+                    // to make dotHN look like dotRL
+                    // https://en.wikipedia.org/wiki/Blinn%E2%80%93Phong_reflection_model
+                    specularExp *= 4.0;
+                    
+                    //energyNormalization = (specularExp + 1.0) / (2.0 * PI);
+                }
+                else {
+                    // phong
+                    // and seem to recall a conversion to above but H = (L+V)/2, the normalize knocks out the 1/2
+                    float3 ref = normalize(reflectIQ(viewDir, bumpNormal));
+                    float dotRL = saturate(dot(ref, lightDir));
+                    specularAmount = dotRL;
+                    
+                    //energyNormalization = (specularExp + 1.0) / (2.0 * PI);
+                }
                 
-                //energyNormalization = (specularExp + 1.0) / (2.0 * PI);
-            }
-            
-            // above can be interpolated
-            specularAmount = pow(specularAmount, specularExp) * energyNormalization;
-            specular = specularAmount * lightColor.rgb;
-       // }
-    }
+                // above can be interpolated
+                specularAmount = pow(specularAmount, specularExp) * energyNormalization;
+                specular = specularAmount * lightColor.rgb;
+           // }
+        }
 
-    if (doDiffuse) {
+        if (doDiffuse) {
+            
+            float dotNLSat = saturate(dotNL);
+            
+            // soften the terminator off the vertNormal
+            // this is so no diffuse if normal completely off from vertex normal
+            // also limiting diffuse lighting bump to lighting by vertex normal
+            float dotVertex = saturate(dot(vertexNormal, bumpNormal));
+            dotNL *= saturate(9.0 * dotVertex);
+            
+            diffuse = dotNLSat * lightColor.rgb;
+        }
         
-        float dotNLSat = saturate(dotNL);
+        if (doAmbient) {
+            // can misconstrue as diffuse with this, but make dark side not look flat
+            float dotNLUnsat = dotNL;
+            ambient = mix(0.1, 0.2, saturate(dotNLUnsat * 0.5 + 0.5));
+        }
         
-        // soften the terminator off the vertNormal
-        // this is so no diffuse if normal completely off from vertex normal
-        // also limiting diffuse lighting bump to lighting by vertex normal
-        float dotVertex = saturate(dot(vertexNormal, bumpNormal));
-        dotNL *= saturate(9.0 * dotVertex);
+        // attenuate, and not saturate below, so no HDR yet
+        specular *= 0.8;
+        diffuse *= 0.7;
+        //ambient *= 0.2;
         
-        diffuse = dotNLSat * lightColor.rgb;
-    }
-    
-    if (doAmbient) {
-        // can misconstrue as diffuse with this, but make dark side not look flat
-        float dotNLUnsat = dotNL;
-        ambient = mix(0.1, 0.2, saturate(dotNLUnsat * 0.5 + 0.5));
+    #if 0
+        // attenuating albedo with specular knocks it all out
+        albedo.xyz *= saturate(ambient + diffuse + specular);
+    #else
+        albedo.xyz *= saturate(diffuse + ambient);
+        albedo.xyz += specular;
+        albedo.xyz = saturate(albedo.xyz);
+    #endif
     }
     
-    // attenuate, and not saturate below, so no HDR yet
-    specular *= 0.8;
-    diffuse *= 0.7;
-    //ambient *= 0.2;
-    
-#if 0
-    // attenuating albedo with specular knocks it all out
-    albedo.xyz *= saturate(ambient + diffuse + specular);
-#else
-    albedo.xyz *= saturate(diffuse + ambient);
-    albedo.xyz += specular;
-    albedo.xyz = saturate(albedo.xyz);
-#endif
-    
     return albedo;
 }
 

From 98af75a7dcc9a2eaa04f05f115f61e03d0259e61 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 27 Feb 2022 09:58:35 -0800
Subject: [PATCH 246/901] kram - cleanup format names

---
 libkram/kram/KTXImage.cpp | 33 ++++++++++++++++++++-------------
 libkram/kram/Kram.cpp     |  2 +-
 2 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index c880557d..022baf17 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -47,10 +47,10 @@ const uint8_t kKTX2Identifier[kKTX2IdentifierSize] = {
 // making this up, since Microsoft doesn't publish these constants
 const uint32_t DXGI_FORMAT_ETC2_OFFSET = 50;
 
-// enum based on dxgiformat.h
+// enum based on dxgiformat.h, but added astc ldr/hdr from obscure docs
+// and added etc2/eac by just making up constants since those aren't public
 // Copyright (C) Microsoft.  All rights reserved.
-// TODO: (renamte to DXGI_FORMAT_FORMAT) so easier to search for all
-enum DXFormat : uint32_t
+enum MyDXGIFormat : uint32_t
 {
     DXGI_FORMAT_UNKNOWN                                 = 0,
     
@@ -233,7 +233,7 @@ enum DXFormat : uint32_t
 //---------------------------------------------------
 
 enum GLFormat : uint32_t {
-    GL_FORMAT_UNKNOWN = 0,
+    GL_UNKNOWN = 0,
 
     //#ifndef GL_EXT_texture_compression_s3tc
     // BC1
@@ -392,7 +392,7 @@ enum GLFormatBase {
 // The encoder only handles sliced 3D astc and not 3D blocks.
 // Also no MTL equivalent for 3D blocks.
 // https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/vkspec.html#VkPhysicalDeviceTextureCompressionASTCHDRFeaturesEXT
-enum VKFormat {
+enum MyVKFormat {
     VK_FORMAT_UNDEFINED = 0,
 
     // this is explcit format supprt
@@ -528,8 +528,8 @@ class KTXFormatInfo {
         const char* glName_,
 
         MyMTLPixelFormat metalType_,
-        VKFormat vulkanType_,
-        DXFormat directxType_,
+        MyVKFormat vulkanType_,
+        MyDXGIFormat directxType_,
         GLFormat glType_,
         GLFormatBase glBase_,
 
@@ -541,11 +541,18 @@ class KTXFormatInfo {
 
         KTXFormatFlags flags_)
     {
+        constexpr uint32_t vulkanNameStart = sizeof("VK_FORMAT_") - 1;
+        constexpr uint32_t glNameStart = sizeof("GL_") - 1;
+        constexpr uint32_t dxNameStart = sizeof("DXGI_FORMAT_") - 1;
+        constexpr uint32_t metalNameStart = sizeof("MyMTLPixelFormat") - 1;
+        
         formatName = formatName_;
-        metalName = metalName_;
-        vulkanName = vulkanName_;
-        directxName = directxName_;
-        glName = glName_;
+        
+        // skip the redunant part
+        metalName = metalName_ + metalNameStart;
+        vulkanName = vulkanName_ + vulkanNameStart;
+        directxName = directxName_ + dxNameStart;
+        glName = glName_ + glNameStart;
 
         metalType = metalType_;
         vulkanType = vulkanType_;
@@ -639,7 +646,7 @@ static bool initFormatsIfNeeded()
         metalType, vulkanType, directxType, glType, glBase,          \
         x, y, blockSize, numChannels, (flags));
 
-    KTX_FORMAT(Invalid, MyMTLPixelFormatInvalid, VK_FORMAT_UNDEFINED, DXGI_FORMAT_UNKNOWN, GL_FORMAT_UNKNOWN, GL_RGBA, 1, 1, 0, 0, 0)
+    KTX_FORMAT(Invalid, MyMTLPixelFormatInvalid, VK_FORMAT_UNDEFINED, DXGI_FORMAT_UNKNOWN, GL_UNKNOWN, GL_RGBA, 1, 1, 0, 0, 0)
 
     // BC
     KTX_FORMAT(BC1, MyMTLPixelFormatBC1_RGBA, VK_FORMAT_BC1_RGB_UNORM_BLOCK, DXGI_FORMAT_BC1_UNORM, GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, 4, 4, 8, 3, FLAG_ENC_BC)
@@ -831,7 +838,7 @@ uint32_t metalType(MyMTLPixelFormat format)
 const char* metalTypeName(MyMTLPixelFormat format)
 {
     const auto& it = formatInfo(format);
-    return it.metalName + 2;  // strlen("My");
+    return it.metalName;
 }
 
 const char* formatTypeName(MyMTLPixelFormat format)
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 27567d03..fce11ffb 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -1830,7 +1830,7 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage,
                        "fmtk: %s\n"
                        "fmtm: %s (%d)\n"
                        "fmtv: %s (%d)\n"
-                       "fmtd: %s (0x%04X)\n"
+                       "fmtd: %s (%d)\n"
                        "fmtg: %s (0x%04X)\n",
                        formatTypeName(metalFormat),
                        metalTypeName(metalFormat), metalFormat,

From 6186835207bf2666ad9114da770515bfbbb79431 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 27 Feb 2022 10:13:51 -0800
Subject: [PATCH 247/901] kramv - fix UITableView iteration

Using up/down arrow keys now instead of key.  That way even though first responder isn't kept, and item is no longer blue, can still iterate through table.
---
 kramv/KramViewerMain.mm | 31 +++++++++++++++++++++----------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index befbf390..e7ce4e3c 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -515,6 +515,7 @@ @implementation MyMTKView {
     Action* _actionFace;
     Action* _actionArray;
     Action* _actionItem;
+    Action* _actionPrevItem;
     Action* _actionReload;
     Action* _actionFit;
     
@@ -658,7 +659,8 @@ - (NSStackView *)_addButtons
         Action("F", "Face", Key::F),
         Action("Y", "Array", Key::Y),
         
-        Action("N", "Next Item", Key::N),
+        Action("↑", "Prev Item", Key::UpArrow),
+        Action("↓", "Next Item", Key::DownArrow),
         Action("R", "Reload", Key::R),
         Action("0", "Fit", Key::Num0),
 
@@ -701,6 +703,7 @@ - (NSStackView *)_addButtons
         &_actionFace,
         &_actionArray,
         
+        &_actionPrevItem,
         &_actionItem,
         &_actionReload,
         &_actionFit,
@@ -851,6 +854,9 @@ - (NSStackView *)_addButtons
     }
     
     // don't want these buttons showing up, menu only
+    _actionPrevItem->disableButton();
+    _actionItem->disableButton();
+    
     _actionHud->disableButton();
     _actionHelp->disableButton();
     _actionHideUI->disableButton();
@@ -1784,7 +1790,9 @@ - (void)updateUIAfterLoad
     _actionFace->setHidden(isFaceSliceHidden);
     _actionMip->setHidden(isMipHidden);
     _actionShowAll->setHidden(isShowAllHidden);
+    
     _actionItem->setHidden(isJumpToNextHidden);
+    _actionPrevItem->setHidden(isJumpToNextHidden);
     
     _actionR->setHidden(isRedHidden);
     _actionG->setHidden(isGreenHidden);
@@ -1839,23 +1847,22 @@ - (void)updateUIControlState
     auto verticalState = toState(_buttonStack.orientation == NSUserInterfaceLayoutOrientationVertical);
     auto uiState = toState(_buttonStack.hidden);
 
-    auto helpState = Off;
-    auto infoState = Off;
-    auto jumpState = Off;
-
     _actionVertical->setHighlight(verticalState);
     
     // TODO: pass boolean, and change in the call
     _actionPlay->setHighlight(playState);
-    _actionHelp->setHighlight(helpState);
-    _actionInfo->setHighlight(infoState);
+    _actionHelp->setHighlight(Off);
+    _actionInfo->setHighlight(Off);
     _actionHud->setHighlight(hudState);
     
     _actionArray->setHighlight(arrayState);
     _actionFace->setHighlight(faceState);
     _actionMip->setHighlight(mipState);
     
-    _actionItem->setHighlight(jumpState);
+    // these never show check state
+    _actionItem->setHighlight(Off);
+    _actionPrevItem->setHighlight(Off);
+    
     _actionHideUI->setHighlight(uiState); // note below button always off, menu has state
     
     _actionR->setHighlight(redState);
@@ -2111,7 +2118,7 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
             "Checker, Grid, Info\n"
             "Wrap, 8-signed, 9-premul\n"
             "Mip, Face, Y-array\n"
-            "Next item, C-Shape channel, S-Shape\n";
+            "↓-Next item, C-Shape channel, S-Shape\n";
 
         // just to update toggle state to Off
         isStateChanged = true;
@@ -2310,8 +2317,12 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
         }
     }
 
-    else if (action == _actionItem) {
+    else if (action == _actionItem || action == _actionPrevItem) {
         if (!action->isHidden) {
+            // invert shift key for prev, since it's reversese
+            if (action == _actionPrevItem)
+                isShiftKeyDown = !isShiftKeyDown;
+            
             if (_showSettings->isArchive) {
                 if ([self advanceFileFromAchive:!isShiftKeyDown]) {
                     _hudHidden = true;

From badf8960d76f2b4de11abd88676f26c74a66c9d8 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 27 Feb 2022 10:29:45 -0800
Subject: [PATCH 248/901] ci - add dds to git-lfs

---
 .gitattributes | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitattributes b/.gitattributes
index 37a37d50..1ff03220 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,3 +1,4 @@
 *.png filter=lfs diff=lfs merge=lfs -text
 *.ktx filter=lfs diff=lfs merge=lfs -text
 *.ktx2 filter=lfs diff=lfs merge=lfs -text
+*.dds filter=lfs diff=lfs merge=lfs -text

From 7931151fe976e34a4b29d9a19f14c8eb74be8383 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 9 Mar 2022 22:56:41 -0800
Subject: [PATCH 249/901] kramv - remove shapes table, and fix tracking area

NSTrackingArea has option to track the visibleRect when the view resizes that must be set, or it desyncs with resized view.
---
 kramv/KramViewerMain.mm | 55 ++++-------------------------------------
 1 file changed, 5 insertions(+), 50 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index e7ce4e3c..77c876c3 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -34,7 +34,7 @@
 #ifdef NDEBUG
 static bool doPrintPanZoom = false;
 #else
-static bool doPrintPanZoom = true;
+static bool doPrintPanZoom = false;
 #endif
 
 using namespace simd;
@@ -478,9 +478,6 @@ @implementation MyMTKView {
     IBOutlet NSTableView *_tableView;
     IBOutlet TableViewController *_tableViewController;
     
-//    IBOutlet NSTableView *_shapesTableView;
-//    IBOutlet TableViewController *_shapesTableViewController;
-   
     vector<string> _textSlots;
     ShowSettings *_showSettings;
 
@@ -1959,7 +1956,6 @@ - (void)keyDown:(NSEvent *)theEvent
 - (void)hideTables
 {
     _tableView.hidden = true;
-    //_shapesTableView.hidden = true;
 }
 
 - (void)updateHudVisibility {
@@ -2350,21 +2346,6 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
             _showSettings->advanceMeshNumber(isShiftKeyDown);
             text = _showSettings->meshNumberText();
             isChanged = true;
-            
-            // update shapes table
-//            [_shapesTableView selectRowIndexes:[NSIndexSet indexSetWithIndex:_showSettings->meshNumber] byExtendingSelection:NO];
-//            [_shapesTableView scrollRowToVisible:_showSettings->meshNumber];
-            
-//            // show the shapes table
-//            [self hideTables];
-//            _shapesTableView.hidden = NO;
-//
-//            // also have to hide hud or it will obscure the visible table
-//            _hudHidden = true;
-//            [self updateHudVisibility];
-//
-//            // want it to respond to arrow keys
-//            [self.window makeFirstResponder: _shapesTableView];
         }
     }
 
@@ -2519,22 +2500,6 @@ - (BOOL)performDragOperation:(id)sender
     return NO;
 }
 
-//- (void)updateShapesTable
-//{
-//    // no dynamic shapes from archive/folder yet, so init once
-//    // TODO: tie to default shapes and those in the archive/folder
-//    // may not want single archive with meshes/textures, so support different
-//    if (_shapesTableViewController.items.count > 0)
-//        return;
-//
-//    // setup shapes view too
-//    [_shapesTableViewController.items removeAllObjects];
-//    for (uint32_t i = 0; i < _showSettings->meshCount; ++i) {
-//        [_shapesTableViewController.items addObject: [NSString stringWithUTF8String: _showSettings->meshNumberName(i)]];
-//    }
-//    [_shapesTableView reloadData];
-//}
-
 - (BOOL)loadArchive:(const char *)zipFilename
 {
     _zipMmap.close();
@@ -2582,8 +2547,6 @@ - (BOOL)loadArchive:(const char *)zipFilename
     // want it to respond to arrow keys
     [self.window makeFirstResponder: _tableView];
     
-    //[self updateShapesTable];
-    
     // hack to see table
     [self hideTables];
     
@@ -3124,7 +3087,6 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
             [_tableView selectRowIndexes:[NSIndexSet indexSetWithIndex:_fileFolderIndex] byExtendingSelection:NO];
             [_tableView scrollRowToVisible:_fileFolderIndex];
             
-            //[self updateShapesTable];
             [self hideTables];
         }
 
@@ -3442,8 +3404,6 @@ -(BOOL)loadImageFile:(NSURL*)url
 
 - (void)setupUI
 {
-    // Load the basic shapes table once
-    //[self updateShapesTable];
     [self hideTables];
 }
 
@@ -3492,12 +3452,6 @@ - (void)tableViewSelectionDidChange:(NSNotification *)notification
         NSInteger selectedRow = [_tableView selectedRow];
         [self setImageFromSelection:selectedRow];
     }
-//    else if (notification.object == _shapesTableView)
-//    {
-//        // shape
-//        NSInteger selectedRow = [_shapesTableView selectedRow];
-//        [self setShapeFromSelection:selectedRow];
-//    }
 }
 
 - (void)addNotifications
@@ -3569,9 +3523,10 @@ - (void)viewDidLoad
         initWithRect:_view.bounds
              options:(NSTrackingMouseEnteredAndExited | NSTrackingMouseMoved |
 
-                      // NSTrackingActiveWhenFirstResponder
-                      NSTrackingActiveInActiveApp
-                      // NSTrackingActiveInKeyWindow
+                      // NSTrackingActiveWhenFirstResponder |
+                      NSTrackingActiveInActiveApp |
+                      // NSTrackingActiveInKeyWindow |
+                      NSTrackingInVisibleRect // ignore rect above, bur resizes
                       )
                owner:_view
             userInfo:nil];

From 720a517a63bd937d387b2564f20c916b71554109 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 10 Mar 2022 09:43:13 -0800
Subject: [PATCH 250/901] kramv - fixup help, making lighting state enabled
 when not None

---
 kramv/KramViewerMain.mm | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 77c876c3..6449425a 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -1770,7 +1770,7 @@ - (void)updateUIAfterLoad
     // more data from swizzle in the props. Often alpha doesn't store anything
     // useful to view.
 
-    // TODO: may want to disable isPremul on block textures that already have
+    // DONE: may want to disable isPremul on block textures that already have
     // premul in data or else premul is applied a second time to the visual
 
     bool hasAlpha = _showSettings->numChannels >= 3;
@@ -1838,7 +1838,7 @@ - (void)updateUIControlState
     auto meshState = toState(_showSettings->meshNumber > 0);
     auto meshChannelState = toState(_showSettings->shapeChannel > 0);
     auto lightingState =
-        toState(_showSettings->lightingMode != LightingModeDiffuse);
+        toState(_showSettings->lightingMode != LightingModeNone);
     auto tangentState = toState(_showSettings->useTangent);
 
     auto verticalState = toState(_buttonStack.orientation == NSUserInterfaceLayoutOrientationVertical);
@@ -2110,12 +2110,12 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
         // display the chars for now
         text =
             "1234-rgba, Preview, Debug, A-show all\n"
-            "Hud, Reload, 0-fit\n"
-            "Checker, Grid, Info\n"
+            "Info, Hud, Reload, 0-fit\n"
+            "Checker, Grid\n"
             "Wrap, 8-signed, 9-premul\n"
-            "Mip, Face, Y-array\n"
-            "↓-Next item, C-Shape channel, S-Shape\n";
-
+            "Mip, Face, Y-array, ↓-next item\n"
+            "Lighting, S-shape, C-shape channel\n";
+        
         // just to update toggle state to Off
         isStateChanged = true;
     }
@@ -3522,19 +3522,15 @@ - (void)viewDidLoad
     _trackingArea = [[NSTrackingArea alloc]
         initWithRect:_view.bounds
              options:(NSTrackingMouseEnteredAndExited | NSTrackingMouseMoved |
-
                       // NSTrackingActiveWhenFirstResponder |
-                      NSTrackingActiveInActiveApp |
                       // NSTrackingActiveInKeyWindow |
-                      NSTrackingInVisibleRect // ignore rect above, bur resizes
+                      NSTrackingActiveInActiveApp |
+                      NSTrackingInVisibleRect // ignore rect above, bur resizes with view
                       )
                owner:_view
             userInfo:nil];
     [_view addTrackingArea:_trackingArea];
 
-    // programmatically add some buttons
-    // think limited to 11 viewws before they must be wrapepd in a container.
-    // That's how SwiftUI was.
     [_view addNotifications];
     
     [_view setupUI];

From c6bfae03c2b799445e2337305489511d0c9b3160 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 10 Mar 2022 10:24:32 -0800
Subject: [PATCH 251/901] kram - honor srgb from png header

I'll probably regret this, since apps set or don't set this regardless.  But this makes images loaded in kramv matchup with Photoshop and Preview.
---
 kramv/KramLoader.mm     |  8 ++++----
 kramv/KramViewerMain.mm | 16 ++++++++--------
 libkram/kram/Kram.cpp   | 25 +++++++++++++++++--------
 libkram/kram/Kram.h     |  2 +-
 4 files changed, 30 insertions(+), 21 deletions(-)

diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index 5b1b9b69..af0ac23e 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -386,16 +386,16 @@ - (BOOL)loadImageFromURL:(nonnull NSURL *)url
             isSDF = true;
         }
 
-        bool isSRGB = (!isNormal && !isSDF);
+        //bool isSRGB = (!isNormal && !isSDF);
 
         if (!imageData.open(path, image)) {
             return NO;
         }
 
         // have to adjust the format if srgb
-        if (isSRGB) {
-            image.pixelFormat = MyMTLPixelFormatRGBA8Unorm_sRGB;
-        }
+//        if (isSRGB) {
+//            image.pixelFormat = MyMTLPixelFormatRGBA8Unorm_sRGB;
+//        }
     }
     else {
         if (!imageData.open(path, image)) {
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 6449425a..d69c85cc 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -2701,7 +2701,7 @@ - (BOOL)loadFileFromFolder
         isFound = searchPos != string::npos;
     }
 
-    bool isSrgb = isFound;
+    //bool isSrgb = isFound;
 
     string normalFilename;
     bool hasNormal = false;
@@ -2741,9 +2741,9 @@ - (BOOL)loadFileFromFolder
         }
     }
 
-    if (isPNG && isSrgb) {
-        image.pixelFormat = MyMTLPixelFormatRGBA8Unorm_sRGB;
-    }
+//    if (isPNG && isSrgb) {
+//        image.pixelFormat = MyMTLPixelFormatRGBA8Unorm_sRGB;
+//    }
 
     
     Renderer *renderer = (Renderer *)self.delegate;
@@ -2837,7 +2837,7 @@ - (BOOL)loadFileFromArchive
         isFound = searchPos != string::npos;
     }
 
-    bool isSrgb = isFound;
+    //bool isSrgb = isFound;
 
     //---------------------------
 
@@ -2893,9 +2893,9 @@ - (BOOL)loadFileFromArchive
         }
     }
 
-    if (isPNG && isSrgb) {
-        image.pixelFormat = MyMTLPixelFormatRGBA8Unorm_sRGB;
-    }
+//    if (isPNG && isSrgb) {
+//        image.pixelFormat = MyMTLPixelFormatRGBA8Unorm_sRGB;
+//    }
 
     Renderer *renderer = (Renderer *)self.delegate;
     [renderer releaseAllPendingTextures];
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index fce11ffb..8959219e 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -245,10 +245,11 @@ bool KTXImageData::openPNG(const uint8_t* data, size_t dataSize, KTXImage& image
 
     // the mmap/filehelper point to the png data
     // use Image to
-
+    bool isSrgb = false;
+    
     Image singleImage;
-    bool isLoaded = LoadPng(data, dataSize, false, false, singleImage);
-
+    bool isLoaded = LoadPng(data, dataSize, false, false, isSrgb, singleImage);
+    
     // don't need png data anymore
     close();
 
@@ -269,7 +270,7 @@ bool KTXImageData::openPNG(const uint8_t* data, size_t dataSize, KTXImage& image
     image.header.numberOfMipmapLevels = 1;
 
     image.textureType = MyMTLTextureType2D;
-    image.pixelFormat = /*isSrgb ? MyMTLPixelFormatRGBA8Unorm_sRGB : */ MyMTLPixelFormatRGBA8Unorm;
+    image.pixelFormat = isSrgb ? MyMTLPixelFormatRGBA8Unorm_sRGB : MyMTLPixelFormatRGBA8Unorm;
 
     // TODO: support mips with blitEncoder but that confuses mipCount in KTXImage
     //     Mipper can also generate on cpu side.  Mipped can do premul conversion though.
@@ -413,7 +414,7 @@ unsigned LodepngDeflateUsingMiniz(
     return result;
 }
 
-bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray, Image& sourceImage)
+bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray, bool& isSrgb, Image& sourceImage)
 {
     uint32_t width = 0;
     uint32_t height = 0;
@@ -466,6 +467,9 @@ bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray
             hasAlpha = true;
             break;
     }
+    
+    // TODO: also gama 2.2 block sometimes used in older files
+    isSrgb = state.info_png.srgb_defined;
 
     // this inserts onto end of array, it doesn't resize
     vector<uint8_t> pixelsPNG;
@@ -574,7 +578,8 @@ bool SetupSourceImage(const string& srcFilename, Image& sourceImage,
     //-----------------------
 
     if (isPNG) {
-        if (!LoadPng(data, dataSize, isPremulSrgb, isGray, sourceImage)) {
+        bool isSrgb = false;
+        if (!LoadPng(data, dataSize, isPremulSrgb, isGray, isSrgb, sourceImage)) {
             return false;  // error
         }
     }
@@ -1682,6 +1687,8 @@ string kramInfoPNGToString(const string& srcFilename, const uint8_t* data, uint6
             break;
     }
 
+    bool isSrgb = state.info_png.srgb_defined;
+    
     string tmp;
     bool isMB = (dataSize > (512 * 1024));
     sprintf(tmp,
@@ -1702,14 +1709,16 @@ string kramInfoPNGToString(const string& srcFilename, const uint8_t* data, uint6
             "bitd: %d\n"
             "colr: %s\n"
             "alph: %s\n"
-            "palt: %s\n",
+            "palt: %s\n"
+            "srgb: %s\n",
             textureTypeName(MyMTLTextureType2D),
             width, height,
             width * height / (1000.0f * 1000.0f),
             state.info_png.color.bitdepth,
             hasColor ? "y" : "n",
             hasAlpha ? "y" : "n",
-            hasPalette ? "y" : "n");
+            hasPalette ? "y" : "n",
+            isSrgb ? "y" : "n");
     info += tmp;
 
     // optional block with ppi
diff --git a/libkram/kram/Kram.h b/libkram/kram/Kram.h
index 818fa6f8..14d87a4f 100644
--- a/libkram/kram/Kram.h
+++ b/libkram/kram/Kram.h
@@ -60,7 +60,7 @@ inline bool isPNGFilename(const string& filename) { return isPNGFilename(filenam
 
 // helpers to source from a png or single level of a ktx
 bool LoadKtx(const uint8_t* data, size_t dataSize, Image& sourceImage);
-bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulSrgb, bool isGray, Image& sourceImage);
+bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulSrgb, bool isGray, bool& isSrgb, Image& sourceImage);
 
 // can call these with data instead of needing a file
 string kramInfoPNGToString(const string& srcFilename, const uint8_t* data, uint64_t dataSize, bool isVerbose);

From 27d4c39e46caaf2fe71c32ad0d63a0438112ea73 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 10 Mar 2022 11:09:45 -0800
Subject: [PATCH 252/901] kram - fix srgb block parsing using lodepng

---
 libkram/kram/Kram.cpp | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 8959219e..39ad6edc 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -414,6 +414,12 @@ unsigned LodepngDeflateUsingMiniz(
     return result;
 }
 
+unsigned inspect_chunk_by_name(const unsigned char* data, const unsigned char* end,
+                               lodepng::State& state, const char type[5]) {
+  const unsigned char* p = lodepng_chunk_find_const(data, end, type);
+  return lodepng_inspect_chunk(&state, p - data, data, end - data);
+}
+
 bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray, bool& isSrgb, Image& sourceImage)
 {
     uint32_t width = 0;
@@ -429,11 +435,19 @@ bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray
     lodepng_state_init(&state);
     state.decoder.ignore_crc = 1;
 
+    // this doesn't look at any blocks including srgb, can only get those from decode
     errorLode = lodepng_inspect(&width, &height, &state, data, dataSize);
     if (errorLode != 0) {
         return false;
     }
 
+    // TODO: also gama 2.2 block sometimes used in older files
+    const uint8_t* chunkData = lodepng_chunk_find_const(data, data + dataSize, "sRGB");
+    if (chunkData) {
+        lodepng_inspect_chunk(&state, chunkData - data, data, dataSize);
+        isSrgb = state.info_png.srgb_defined;
+    }
+    
     // don't convert png bit depths, but can convert pallete data
     //    if (state.info_png.color.bitdepth != 8) {
     //        return false;
@@ -468,9 +482,7 @@ bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray
             break;
     }
     
-    // TODO: also gama 2.2 block sometimes used in older files
-    isSrgb = state.info_png.srgb_defined;
-
+    
     // this inserts onto end of array, it doesn't resize
     vector<uint8_t> pixelsPNG;
     pixelsPNG.clear();

From 7045d7688f1a168a8d91d67a5a609f7e0cd1d508 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 10 Mar 2022 11:19:03 -0800
Subject: [PATCH 253/901] kram - add gama block parsing for older files

---
 libkram/kram/Kram.cpp | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 39ad6edc..71485d1a 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -414,12 +414,6 @@ unsigned LodepngDeflateUsingMiniz(
     return result;
 }
 
-unsigned inspect_chunk_by_name(const unsigned char* data, const unsigned char* end,
-                               lodepng::State& state, const char type[5]) {
-  const unsigned char* p = lodepng_chunk_find_const(data, end, type);
-  return lodepng_inspect_chunk(&state, p - data, data, end - data);
-}
-
 bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray, bool& isSrgb, Image& sourceImage)
 {
     uint32_t width = 0;
@@ -447,6 +441,17 @@ bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray
         lodepng_inspect_chunk(&state, chunkData - data, data, dataSize);
         isSrgb = state.info_png.srgb_defined;
     }
+    else {
+        chunkData = lodepng_chunk_find_const(data, data + dataSize, "gAMA");
+        if (chunkData) {
+            lodepng_inspect_chunk(&state, chunkData - data, data, dataSize);
+            if (state.info_png.gama_defined) {
+                isSrgb = state.info_png.gama_gamma == 45455; // 1/2.2 x 100000
+            }
+                
+        }
+    }
+    
     
     // don't convert png bit depths, but can convert pallete data
     //    if (state.info_png.color.bitdepth != 8) {

From 488525c7eed46015f301ff3376301f86b6a4ea68 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 10 Mar 2022 11:42:52 -0800
Subject: [PATCH 254/901] kramv - set color space or all colors washed out when
 not setting srgb on png file

---
 kramv/KramRenderer.mm | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index bff7b6d3..ff25429a 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -372,7 +372,12 @@ - (void)_loadMetalWithView:(nonnull MTKView *)view
 {
     /// Load Metal state objects and initialize renderer dependent view properties
 
+    // Important to set color space, or colors are wrong
+    bool pickOne = true;
+    CGColorSpaceRef viewColorSpace = CGColorSpaceCreateWithName(pickOne ? kCGColorSpaceSRGB : kCGColorSpaceLinearSRGB);
+
     view.colorPixelFormat = MTLPixelFormatRGBA16Float;
+    view.colorspace = viewColorSpace;
     view.depthStencilPixelFormat = MTLPixelFormatDepth32Float_Stencil8;
     view.sampleCount = 1;
 

From 91ce19e14fefca1930a458b3838bf4649f699506 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 10 Mar 2022 13:14:05 -0800
Subject: [PATCH 255/901] kramv - fix info on srgb, and try srgb and linearSrgb
 color space

The linearSrgb works for srgb -> linear -> 16f images.
The normal srgb works for non-srgb -> linear -> 16f images.  Ugh.  Too dark on the srgb flagged images.
Using linear color space for now.
---
 kramv/KramRenderer.mm |  13 +++--
 libkram/kram/Kram.cpp | 112 +++++++++++++++++++++++++++++++++++-------
 2 files changed, 102 insertions(+), 23 deletions(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index ff25429a..a355dc05 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -372,12 +372,15 @@ - (void)_loadMetalWithView:(nonnull MTKView *)view
 {
     /// Load Metal state objects and initialize renderer dependent view properties
 
-    // Important to set color space, or colors are wrong
-    bool pickOne = true;
-    CGColorSpaceRef viewColorSpace = CGColorSpaceCreateWithName(pickOne ? kCGColorSpaceSRGB : kCGColorSpaceLinearSRGB);
-
-    view.colorPixelFormat = MTLPixelFormatRGBA16Float;
+    // Important to set color space, or colors are wrong.  Why doesn't one of these work (or the default)
+    // false is good for srgb -> rgba16f
+    // true is good for non-srgb -> rgba16f
+    bool pickOne = false;
+    CGColorSpaceRef viewColorSpace = CGColorSpaceCreateWithName(kCGColorSpaceGenericRGBLinear);
+    // pickOne ? kCGColorSpaceSRGB : kCGColorSpaceLinearSRGB);
     view.colorspace = viewColorSpace;
+    
+    view.colorPixelFormat = MTLPixelFormatRGBA16Float;
     view.depthStencilPixelFormat = MTLPixelFormatDepth32Float_Stencil8;
     view.sampleCount = 1;
 
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 71485d1a..63a683f7 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -435,23 +435,51 @@ bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray
         return false;
     }
 
+    isSrgb = false;
+    
     // TODO: also gama 2.2 block sometimes used in older files
     const uint8_t* chunkData = lodepng_chunk_find_const(data, data + dataSize, "sRGB");
     if (chunkData) {
         lodepng_inspect_chunk(&state, chunkData - data, data, dataSize);
         isSrgb = state.info_png.srgb_defined;
     }
-    else {
+    
+    if (!chunkData) {
         chunkData = lodepng_chunk_find_const(data, data + dataSize, "gAMA");
         if (chunkData) {
             lodepng_inspect_chunk(&state, chunkData - data, data, dataSize);
             if (state.info_png.gama_defined) {
-                isSrgb = state.info_png.gama_gamma == 45455; // 1/2.2 x 100000
+                if (!isSrgb)
+                    isSrgb = state.info_png.gama_gamma == 45455; // 1/2.2 x 100000
             }
                 
         }
     }
     
+    if (!chunkData) {
+        chunkData = lodepng_chunk_find_const(data, data + dataSize, "iCCP");
+        if (chunkData) {
+            lodepng_inspect_chunk(&state, chunkData - data, data, dataSize);
+            if (state.info_png.iccp_defined) {
+                // TODO: other profile names
+                if (!isSrgb)
+                    isSrgb = strcmp(state.info_png.iccp_name, "sRGB ICE61966-2.1") == 0;
+            }
+                
+        }
+    }
+    
+//    if (!chunkData) {
+//        chunkData = lodepng_chunk_find_const(data, data + dataSize, "cHRM");
+//        if (chunkData) {
+//            lodepng_inspect_chunk(&state, chunkData - data, data, dataSize);
+//            if (state.info_png.chrm_defined) {
+//                if (!isSrgb)
+//                // isSrgb = tate.info_png.chrm_red_x == ...;
+//            }
+//
+//        }
+//    }
     
     // don't convert png bit depths, but can convert pallete data
     //    if (state.info_png.color.bitdepth != 8) {
@@ -1671,7 +1699,56 @@ string kramInfoPNGToString(const string& srcFilename, const uint8_t* data, uint6
         KLOGE("Kram", "info couldn't open png file");
         return "";
     }
-
+    
+    // TODO: also gama 2.2 block sometimes used in older files
+    bool isSrgb = false;
+    
+    const uint8_t* chunkData = lodepng_chunk_find_const(data, data + dataSize, "sRGB");
+    if (chunkData) {
+        lodepng_inspect_chunk(&state, chunkData - data, data, dataSize);
+        isSrgb = state.info_png.srgb_defined;
+    }
+    
+    if (!chunkData) {
+        chunkData = lodepng_chunk_find_const(data, data + dataSize, "gAMA");
+        if (chunkData) {
+            lodepng_inspect_chunk(&state, chunkData - data, data, dataSize);
+            if (state.info_png.gama_defined) {
+                if (!isSrgb)
+                    isSrgb = state.info_png.gama_gamma == 45455; // 1/2.2 x 100000
+            }
+                
+        }
+    }
+    
+    if (!chunkData) {
+        chunkData = lodepng_chunk_find_const(data, data + dataSize, "iCCP");
+        if (chunkData) {
+            lodepng_inspect_chunk(&state, chunkData - data, data, dataSize);
+            if (state.info_png.iccp_defined) {
+                if (!isSrgb)
+                    isSrgb = strcmp(state.info_png.iccp_name, "sRGB ICE61966-2.1") == 0;
+            }
+                
+        }
+    }
+    
+//    if (!chunkData) {
+//        chunkData = lodepng_chunk_find_const(data, data + dataSize, "cHRM");
+//        if (chunkData) {
+//            lodepng_inspect_chunk(&state, chunkData - data, data, dataSize);
+//            if (state.info_png.chrm_defined) {
+//                if (!isSrgb)
+//                // isSrgb = strcmp(state.info_png.chrm_red_x, "Srgb") == 0;
+//            }
+//
+//        }
+//    }
+    
+    // TODO: also bkgd blocks.
+    
+    
+    
     string info;
 
     bool hasColor = true;
@@ -1704,8 +1781,6 @@ string kramInfoPNGToString(const string& srcFilename, const uint8_t* data, uint6
             break;
     }
 
-    bool isSrgb = state.info_png.srgb_defined;
-    
     string tmp;
     bool isMB = (dataSize > (512 * 1024));
     sprintf(tmp,
@@ -1739,20 +1814,21 @@ string kramInfoPNGToString(const string& srcFilename, const uint8_t* data, uint6
     info += tmp;
 
     // optional block with ppi
-    // TODO: inspect doesn't parse this block, only decode call does
-    //        if (state.info_png.phys_defined && state.info_png.phys_unit == 1) {
-    //            float metersToInches = 39.37;
-    //            sprintf(tmp,
-    //                    "ppix: %d\n"
-    //                    "ppiy: %d\n",
-    //                    (int32_t)(state.info_png.phys_x * metersToInches),
-    //                    (int32_t)(state.info_png.phys_y * metersToInches));
-    //            info += tmp;
-    //        }
+    chunkData = lodepng_chunk_find_const(data, data + dataSize, "iCCP");
+    if (chunkData) {
+        lodepng_inspect_chunk(&state, chunkData - data, data, dataSize);
+    
+        if (state.info_png.phys_defined && state.info_png.phys_unit == 1) {
+            float metersToInches = 39.37;
+            sprintf(tmp,
+                    "ppix: %d\n"
+                    "ppiy: %d\n",
+                    (int32_t)(state.info_png.phys_x * metersToInches),
+                    (int32_t)(state.info_png.phys_y * metersToInches));
+            info += tmp;
+        }
+    }
 
-    // TODO: also bkgd blocks.
-    // TODO: sRGB, cHRM, gAMA and other colorspace blocks aren't supported by lodepng,
-    // so can't report those would need to walk those blocks manually.
     return info;
 }
 

From c54c42724c8dae58e07f73376f40ed28b2359599 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 10 Mar 2022 14:03:47 -0800
Subject: [PATCH 256/901] kram - fix ppi

---
 libkram/kram/Kram.cpp | 49 +++++++++++++++++++++++++------------------
 1 file changed, 29 insertions(+), 20 deletions(-)

diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 63a683f7..764bd149 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -436,18 +436,22 @@ bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray
     }
 
     isSrgb = false;
+   
+    // Stop at the idat, or if not present the end of the file
+    const uint8_t* end = lodepng_chunk_find_const(data, data + dataSize, "IDAT");
+    if (!end)
+        end = data + dataSize;
     
-    // TODO: also gama 2.2 block sometimes used in older files
-    const uint8_t* chunkData = lodepng_chunk_find_const(data, data + dataSize, "sRGB");
+    const uint8_t* chunkData = lodepng_chunk_find_const(data, end, "sRGB");
     if (chunkData) {
-        lodepng_inspect_chunk(&state, chunkData - data, data, dataSize);
+        lodepng_inspect_chunk(&state, chunkData - data, data, end-data);
         isSrgb = state.info_png.srgb_defined;
     }
     
     if (!chunkData) {
-        chunkData = lodepng_chunk_find_const(data, data + dataSize, "gAMA");
+        chunkData = lodepng_chunk_find_const(data, end, "gAMA");
         if (chunkData) {
-            lodepng_inspect_chunk(&state, chunkData - data, data, dataSize);
+            lodepng_inspect_chunk(&state, chunkData - data, data, end-data);
             if (state.info_png.gama_defined) {
                 if (!isSrgb)
                     isSrgb = state.info_png.gama_gamma == 45455; // 1/2.2 x 100000
@@ -457,9 +461,9 @@ bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray
     }
     
     if (!chunkData) {
-        chunkData = lodepng_chunk_find_const(data, data + dataSize, "iCCP");
+        chunkData = lodepng_chunk_find_const(data, end, "iCCP");
         if (chunkData) {
-            lodepng_inspect_chunk(&state, chunkData - data, data, dataSize);
+            lodepng_inspect_chunk(&state, chunkData - data, data, end-data);
             if (state.info_png.iccp_defined) {
                 // TODO: other profile names
                 if (!isSrgb)
@@ -470,9 +474,9 @@ bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray
     }
     
 //    if (!chunkData) {
-//        chunkData = lodepng_chunk_find_const(data, data + dataSize, "cHRM");
+//        chunkData = lodepng_chunk_find_const(data, end, "cHRM");
 //        if (chunkData) {
-//            lodepng_inspect_chunk(&state, chunkData - data, data, dataSize);
+//            lodepng_inspect_chunk(&state, chunkData - data, data, end-data);
 //            if (state.info_png.chrm_defined) {
 //                if (!isSrgb)
 //                // isSrgb = tate.info_png.chrm_red_x == ...;
@@ -1703,16 +1707,20 @@ string kramInfoPNGToString(const string& srcFilename, const uint8_t* data, uint6
     // TODO: also gama 2.2 block sometimes used in older files
     bool isSrgb = false;
     
-    const uint8_t* chunkData = lodepng_chunk_find_const(data, data + dataSize, "sRGB");
+    const uint8_t* end = lodepng_chunk_find_const(data, data + dataSize, "IDAT");
+    if (!end)
+        end = data + dataSize;
+    
+    const uint8_t* chunkData = lodepng_chunk_find_const(data, end, "sRGB");
     if (chunkData) {
-        lodepng_inspect_chunk(&state, chunkData - data, data, dataSize);
+        lodepng_inspect_chunk(&state, chunkData - data, data, end-data);
         isSrgb = state.info_png.srgb_defined;
     }
     
     if (!chunkData) {
-        chunkData = lodepng_chunk_find_const(data, data + dataSize, "gAMA");
+        chunkData = lodepng_chunk_find_const(data, end, "gAMA");
         if (chunkData) {
-            lodepng_inspect_chunk(&state, chunkData - data, data, dataSize);
+            lodepng_inspect_chunk(&state, chunkData - data, data, end-data);
             if (state.info_png.gama_defined) {
                 if (!isSrgb)
                     isSrgb = state.info_png.gama_gamma == 45455; // 1/2.2 x 100000
@@ -1722,9 +1730,9 @@ string kramInfoPNGToString(const string& srcFilename, const uint8_t* data, uint6
     }
     
     if (!chunkData) {
-        chunkData = lodepng_chunk_find_const(data, data + dataSize, "iCCP");
+        chunkData = lodepng_chunk_find_const(data, end, "iCCP");
         if (chunkData) {
-            lodepng_inspect_chunk(&state, chunkData - data, data, dataSize);
+            lodepng_inspect_chunk(&state, chunkData - data, data, end-data);
             if (state.info_png.iccp_defined) {
                 if (!isSrgb)
                     isSrgb = strcmp(state.info_png.iccp_name, "sRGB ICE61966-2.1") == 0;
@@ -1736,7 +1744,7 @@ string kramInfoPNGToString(const string& srcFilename, const uint8_t* data, uint6
 //    if (!chunkData) {
 //        chunkData = lodepng_chunk_find_const(data, data + dataSize, "cHRM");
 //        if (chunkData) {
-//            lodepng_inspect_chunk(&state, chunkData - data, data, dataSize);
+//            lodepng_inspect_chunk(&state, chunkData - data, data, end-data);
 //            if (state.info_png.chrm_defined) {
 //                if (!isSrgb)
 //                // isSrgb = strcmp(state.info_png.chrm_red_x, "Srgb") == 0;
@@ -1814,17 +1822,18 @@ string kramInfoPNGToString(const string& srcFilename, const uint8_t* data, uint6
     info += tmp;
 
     // optional block with ppi
-    chunkData = lodepng_chunk_find_const(data, data + dataSize, "iCCP");
+    chunkData = lodepng_chunk_find_const(data, end, "pHYs");
     if (chunkData) {
-        lodepng_inspect_chunk(&state, chunkData - data, data, dataSize);
+        lodepng_inspect_chunk(&state, chunkData - data, data, end-data);
     
         if (state.info_png.phys_defined && state.info_png.phys_unit == 1) {
             float metersToInches = 39.37;
+            // TODO: there is info_pgn.phys_unit (0 - unknown, 1 - meters)
             sprintf(tmp,
                     "ppix: %d\n"
                     "ppiy: %d\n",
-                    (int32_t)(state.info_png.phys_x * metersToInches),
-                    (int32_t)(state.info_png.phys_y * metersToInches));
+                    (int32_t)(state.info_png.phys_x / metersToInches),
+                    (int32_t)(state.info_png.phys_y / metersToInches));
             info += tmp;
         }
     }

From 2a51f576997ad0fbb715528cb34894aa04690244 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 12 Mar 2022 13:02:36 -0800
Subject: [PATCH 257/901] kram - switch from astc-encoder 2.5 to latest 3.4
 release, add iccp profile parsing for srgb

Photoshop 2022 has option to embed iccp/gama block only. They never set sRGB.  But good luck finding any samples of how to identify sRGB from iccp profile.
---
 build2/kram.xcodeproj/project.pbxproj         |  216 +-
 libkram/astc-encoder/astcenc.h                |  483 ++-
 .../astcenc_averages_and_directions.cpp       |  642 ++--
 libkram/astc-encoder/astcenc_block_sizes.cpp  | 1144 +++++++
 libkram/astc-encoder/astcenc_block_sizes2.cpp |  997 ------
 .../astc-encoder/astcenc_color_quantize.cpp   |  934 +++---
 .../astc-encoder/astcenc_color_unquantize.cpp |  463 ++-
 .../astcenc_compress_symbolic.cpp             | 1655 +++++-----
 .../astc-encoder/astcenc_compute_variance.cpp |  225 +-
 .../astcenc_decompress_symbolic.cpp           |  658 ++--
 .../astc-encoder/astcenc_diagnostic_trace.cpp |   11 +
 .../astc-encoder/astcenc_diagnostic_trace.h   |   58 +-
 .../astcenc_encoding_choice_error.cpp         |  277 --
 libkram/astc-encoder/astcenc_entry.cpp        |  645 ++--
 .../astcenc_find_best_partitioning.cpp        |  939 +++---
 .../astcenc_ideal_endpoints_and_weights.cpp   | 2076 +++++-------
 libkram/astc-encoder/astcenc_image.cpp        |  523 ++--
 .../astc-encoder/astcenc_integer_sequence.cpp |  249 +-
 libkram/astc-encoder/astcenc_internal.h       | 2789 +++++++++++------
 .../astcenc_kmeans_partitioning.cpp           |  462 ---
 libkram/astc-encoder/astcenc_mathlib.cpp      |    4 +-
 libkram/astc-encoder/astcenc_mathlib.h        |  109 +-
 .../astcenc_mathlib_softfloat.cpp             |   62 +-
 .../astc-encoder/astcenc_partition_tables.cpp |  357 ++-
 .../astcenc_percentile_tables.cpp             |  206 +-
 .../astcenc_pick_best_endpoint_format.cpp     |  975 ++++--
 .../astcenc_platform_isa_detection.cpp        |   89 +-
 libkram/astc-encoder/astcenc_quantization.cpp |  221 +-
 .../astcenc_symbolic_physical.cpp             |  218 +-
 libkram/astc-encoder/astcenc_vecmathlib.h     |   59 +-
 .../astc-encoder/astcenc_vecmathlib_avx2_8.h  |   73 +-
 .../astcenc_vecmathlib_common_4.h             |   14 +-
 .../astc-encoder/astcenc_vecmathlib_neon_4.h  |   50 +-
 .../astc-encoder/astcenc_vecmathlib_none_4.h  |   32 +-
 .../astc-encoder/astcenc_vecmathlib_sse_4.h   |   20 +-
 libkram/astc-encoder/astcenc_weight_align.cpp |  637 ++--
 .../astcenc_weight_quant_xfer_tables.cpp      |   28 +-
 libkram/kram/Kram.cpp                         |  200 +-
 libkram/kram/KramImage.cpp                    |    4 +-
 39 files changed, 10273 insertions(+), 8531 deletions(-)
 create mode 100644 libkram/astc-encoder/astcenc_block_sizes.cpp
 delete mode 100644 libkram/astc-encoder/astcenc_block_sizes2.cpp
 delete mode 100644 libkram/astc-encoder/astcenc_encoding_choice_error.cpp
 delete mode 100644 libkram/astc-encoder/astcenc_kmeans_partitioning.cpp

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index 2dcdd1a1..c3376065 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -28,14 +28,12 @@
 		706EEF9226D1595D001C950E /* astcenc_color_unquantize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDD926D1583E001C950E /* astcenc_color_unquantize.cpp */; };
 		706EEF9326D1595D001C950E /* astcenc_mathlib_softfloat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDDA26D1583E001C950E /* astcenc_mathlib_softfloat.cpp */; };
 		706EEF9426D1595D001C950E /* astcenc_weight_quant_xfer_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDDB26D1583E001C950E /* astcenc_weight_quant_xfer_tables.cpp */; };
-		706EEF9526D1595D001C950E /* astcenc_encoding_choice_error.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDDD26D1583E001C950E /* astcenc_encoding_choice_error.cpp */; };
 		706EEF9626D1595D001C950E /* astcenc_percentile_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDDE26D1583E001C950E /* astcenc_percentile_tables.cpp */; };
 		706EEF9726D1595D001C950E /* astcenc_partition_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDDF26D1583E001C950E /* astcenc_partition_tables.cpp */; };
 		706EEF9826D1595D001C950E /* astcenc_decompress_symbolic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDE026D1583E001C950E /* astcenc_decompress_symbolic.cpp */; };
 		706EEF9926D1595D001C950E /* astcenc_color_quantize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDE126D1583E001C950E /* astcenc_color_quantize.cpp */; };
 		706EEF9A26D1595D001C950E /* astcenc_platform_isa_detection.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDE326D1583E001C950E /* astcenc_platform_isa_detection.cpp */; };
 		706EEF9B26D1595D001C950E /* astcenc_image.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDE426D1583E001C950E /* astcenc_image.cpp */; };
-		706EEF9C26D1595D001C950E /* astcenc_kmeans_partitioning.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDE626D1583E001C950E /* astcenc_kmeans_partitioning.cpp */; };
 		706EEF9D26D1595D001C950E /* astcenc_compress_symbolic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDE826D1583E001C950E /* astcenc_compress_symbolic.cpp */; };
 		706EEF9E26D1595D001C950E /* astcenc_ideal_endpoints_and_weights.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDE926D1583E001C950E /* astcenc_ideal_endpoints_and_weights.cpp */; };
 		706EEF9F26D1595D001C950E /* astcenc_mathlib.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDEB26D1583E001C950E /* astcenc_mathlib.cpp */; };
@@ -43,7 +41,6 @@
 		706EEFA126D1595D001C950E /* astcenc_diagnostic_trace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDEE26D1583E001C950E /* astcenc_diagnostic_trace.cpp */; };
 		706EEFA226D1595D001C950E /* astcenc_symbolic_physical.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDEF26D1583E001C950E /* astcenc_symbolic_physical.cpp */; };
 		706EEFA326D1595D001C950E /* astcenc_weight_align.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDF026D1583E001C950E /* astcenc_weight_align.cpp */; };
-		706EEFA426D1595D001C950E /* astcenc_block_sizes2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDF226D1583E001C950E /* astcenc_block_sizes2.cpp */; };
 		706EEFA526D1595D001C950E /* astcenc_entry.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDF326D1583E001C950E /* astcenc_entry.cpp */; };
 		706EEFA626D1595D001C950E /* astcenc_averages_and_directions.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDF526D1583E001C950E /* astcenc_averages_and_directions.cpp */; };
 		706EEFA726D1595D001C950E /* basisu_transcoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE0426D1583F001C950E /* basisu_transcoder.cpp */; };
@@ -249,14 +246,12 @@
 		706EF1AA26D166C5001C950E /* astcenc_color_unquantize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDD926D1583E001C950E /* astcenc_color_unquantize.cpp */; };
 		706EF1AB26D166C5001C950E /* astcenc_mathlib_softfloat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDDA26D1583E001C950E /* astcenc_mathlib_softfloat.cpp */; };
 		706EF1AC26D166C5001C950E /* astcenc_weight_quant_xfer_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDDB26D1583E001C950E /* astcenc_weight_quant_xfer_tables.cpp */; };
-		706EF1AD26D166C5001C950E /* astcenc_encoding_choice_error.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDDD26D1583E001C950E /* astcenc_encoding_choice_error.cpp */; };
 		706EF1AE26D166C5001C950E /* astcenc_percentile_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDDE26D1583E001C950E /* astcenc_percentile_tables.cpp */; };
 		706EF1AF26D166C5001C950E /* astcenc_partition_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDDF26D1583E001C950E /* astcenc_partition_tables.cpp */; };
 		706EF1B026D166C5001C950E /* astcenc_decompress_symbolic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDE026D1583E001C950E /* astcenc_decompress_symbolic.cpp */; };
 		706EF1B126D166C5001C950E /* astcenc_color_quantize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDE126D1583E001C950E /* astcenc_color_quantize.cpp */; };
 		706EF1B226D166C5001C950E /* astcenc_platform_isa_detection.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDE326D1583E001C950E /* astcenc_platform_isa_detection.cpp */; };
 		706EF1B326D166C5001C950E /* astcenc_image.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDE426D1583E001C950E /* astcenc_image.cpp */; };
-		706EF1B426D166C5001C950E /* astcenc_kmeans_partitioning.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDE626D1583E001C950E /* astcenc_kmeans_partitioning.cpp */; };
 		706EF1B526D166C5001C950E /* astcenc_compress_symbolic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDE826D1583E001C950E /* astcenc_compress_symbolic.cpp */; };
 		706EF1B626D166C5001C950E /* astcenc_ideal_endpoints_and_weights.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDE926D1583E001C950E /* astcenc_ideal_endpoints_and_weights.cpp */; };
 		706EF1B726D166C5001C950E /* astcenc_mathlib.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDEB26D1583E001C950E /* astcenc_mathlib.cpp */; };
@@ -264,7 +259,6 @@
 		706EF1B926D166C5001C950E /* astcenc_diagnostic_trace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDEE26D1583E001C950E /* astcenc_diagnostic_trace.cpp */; };
 		706EF1BA26D166C5001C950E /* astcenc_symbolic_physical.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDEF26D1583E001C950E /* astcenc_symbolic_physical.cpp */; };
 		706EF1BB26D166C5001C950E /* astcenc_weight_align.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDF026D1583E001C950E /* astcenc_weight_align.cpp */; };
-		706EF1BC26D166C5001C950E /* astcenc_block_sizes2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDF226D1583E001C950E /* astcenc_block_sizes2.cpp */; };
 		706EF1BD26D166C5001C950E /* astcenc_entry.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDF326D1583E001C950E /* astcenc_entry.cpp */; };
 		706EF1BE26D166C5001C950E /* astcenc_averages_and_directions.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDF526D1583E001C950E /* astcenc_averages_and_directions.cpp */; };
 		706EF1BF26D166C5001C950E /* basisu_transcoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE0426D1583F001C950E /* basisu_transcoder.cpp */; };
@@ -317,6 +311,72 @@
 		706EFF8426D34740001C950E /* red_black_tree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5F26D3473F001C950E /* red_black_tree.cpp */; };
 		706EFF8526D34740001C950E /* fixed_pool.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD6026D3473F001C950E /* fixed_pool.cpp */; };
 		706EFF8626D34740001C950E /* fixed_pool.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD6026D3473F001C950E /* fixed_pool.cpp */; };
+		70871D6527DC767300D0B9E1 /* astcenc_block_sizes.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D4427DC767200D0B9E1 /* astcenc_block_sizes.cpp */; };
+		70871D6627DC767300D0B9E1 /* astcenc_block_sizes.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D4427DC767200D0B9E1 /* astcenc_block_sizes.cpp */; };
+		70871D6727DC767300D0B9E1 /* astcenc_percentile_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D4527DC767200D0B9E1 /* astcenc_percentile_tables.cpp */; };
+		70871D6827DC767300D0B9E1 /* astcenc_percentile_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D4527DC767200D0B9E1 /* astcenc_percentile_tables.cpp */; };
+		70871D6927DC767300D0B9E1 /* astcenc_integer_sequence.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D4627DC767200D0B9E1 /* astcenc_integer_sequence.cpp */; };
+		70871D6A27DC767300D0B9E1 /* astcenc_integer_sequence.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D4627DC767200D0B9E1 /* astcenc_integer_sequence.cpp */; };
+		70871D6B27DC767300D0B9E1 /* astcenc_vecmathlib_neon_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871D4727DC767200D0B9E1 /* astcenc_vecmathlib_neon_4.h */; };
+		70871D6C27DC767300D0B9E1 /* astcenc_vecmathlib_neon_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871D4727DC767200D0B9E1 /* astcenc_vecmathlib_neon_4.h */; };
+		70871D6D27DC767300D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D4827DC767200D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp */; };
+		70871D6E27DC767300D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D4827DC767200D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp */; };
+		70871D6F27DC767300D0B9E1 /* astcenc_decompress_symbolic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D4927DC767200D0B9E1 /* astcenc_decompress_symbolic.cpp */; };
+		70871D7027DC767300D0B9E1 /* astcenc_decompress_symbolic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D4927DC767200D0B9E1 /* astcenc_decompress_symbolic.cpp */; };
+		70871D7127DC767300D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D4A27DC767200D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp */; };
+		70871D7227DC767300D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D4A27DC767200D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp */; };
+		70871D7327DC767300D0B9E1 /* astcenc.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871D4B27DC767200D0B9E1 /* astcenc.h */; };
+		70871D7427DC767300D0B9E1 /* astcenc.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871D4B27DC767200D0B9E1 /* astcenc.h */; };
+		70871D7527DC767300D0B9E1 /* astcenc_mathlib.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D4C27DC767200D0B9E1 /* astcenc_mathlib.cpp */; };
+		70871D7627DC767300D0B9E1 /* astcenc_mathlib.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D4C27DC767200D0B9E1 /* astcenc_mathlib.cpp */; };
+		70871D7727DC767300D0B9E1 /* astcenc_pick_best_endpoint_format.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D4D27DC767200D0B9E1 /* astcenc_pick_best_endpoint_format.cpp */; };
+		70871D7827DC767300D0B9E1 /* astcenc_pick_best_endpoint_format.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D4D27DC767200D0B9E1 /* astcenc_pick_best_endpoint_format.cpp */; };
+		70871D7927DC767300D0B9E1 /* astcenc_vecmathlib_common_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871D4E27DC767200D0B9E1 /* astcenc_vecmathlib_common_4.h */; };
+		70871D7A27DC767300D0B9E1 /* astcenc_vecmathlib_common_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871D4E27DC767200D0B9E1 /* astcenc_vecmathlib_common_4.h */; };
+		70871D7B27DC767300D0B9E1 /* astcenc_color_quantize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D4F27DC767200D0B9E1 /* astcenc_color_quantize.cpp */; };
+		70871D7C27DC767300D0B9E1 /* astcenc_color_quantize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D4F27DC767200D0B9E1 /* astcenc_color_quantize.cpp */; };
+		70871D7D27DC767300D0B9E1 /* astcenc_image.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D5027DC767200D0B9E1 /* astcenc_image.cpp */; };
+		70871D7E27DC767300D0B9E1 /* astcenc_image.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D5027DC767200D0B9E1 /* astcenc_image.cpp */; };
+		70871D7F27DC767300D0B9E1 /* astcenc_mathlib_softfloat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D5127DC767200D0B9E1 /* astcenc_mathlib_softfloat.cpp */; };
+		70871D8027DC767300D0B9E1 /* astcenc_mathlib_softfloat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D5127DC767200D0B9E1 /* astcenc_mathlib_softfloat.cpp */; };
+		70871D8127DC767300D0B9E1 /* astcenc_find_best_partitioning.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D5227DC767200D0B9E1 /* astcenc_find_best_partitioning.cpp */; };
+		70871D8227DC767300D0B9E1 /* astcenc_find_best_partitioning.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D5227DC767200D0B9E1 /* astcenc_find_best_partitioning.cpp */; };
+		70871D8327DC767300D0B9E1 /* astcenc_vecmathlib_sse_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871D5327DC767200D0B9E1 /* astcenc_vecmathlib_sse_4.h */; };
+		70871D8427DC767300D0B9E1 /* astcenc_vecmathlib_sse_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871D5327DC767200D0B9E1 /* astcenc_vecmathlib_sse_4.h */; };
+		70871D8527DC767300D0B9E1 /* astcenc_symbolic_physical.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D5427DC767200D0B9E1 /* astcenc_symbolic_physical.cpp */; };
+		70871D8627DC767300D0B9E1 /* astcenc_symbolic_physical.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D5427DC767200D0B9E1 /* astcenc_symbolic_physical.cpp */; };
+		70871D8727DC767300D0B9E1 /* astcenc_averages_and_directions.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D5527DC767200D0B9E1 /* astcenc_averages_and_directions.cpp */; };
+		70871D8827DC767300D0B9E1 /* astcenc_averages_and_directions.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D5527DC767200D0B9E1 /* astcenc_averages_and_directions.cpp */; };
+		70871D8927DC767300D0B9E1 /* astcenc_vecmathlib_avx2_8.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871D5627DC767200D0B9E1 /* astcenc_vecmathlib_avx2_8.h */; };
+		70871D8A27DC767300D0B9E1 /* astcenc_vecmathlib_avx2_8.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871D5627DC767200D0B9E1 /* astcenc_vecmathlib_avx2_8.h */; };
+		70871D8B27DC767300D0B9E1 /* astcenc_compute_variance.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D5727DC767200D0B9E1 /* astcenc_compute_variance.cpp */; };
+		70871D8C27DC767300D0B9E1 /* astcenc_compute_variance.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D5727DC767200D0B9E1 /* astcenc_compute_variance.cpp */; };
+		70871D8D27DC767300D0B9E1 /* astcenc_vecmathlib.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871D5827DC767200D0B9E1 /* astcenc_vecmathlib.h */; };
+		70871D8E27DC767300D0B9E1 /* astcenc_vecmathlib.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871D5827DC767200D0B9E1 /* astcenc_vecmathlib.h */; };
+		70871D8F27DC767300D0B9E1 /* astcenc_diagnostic_trace.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871D5927DC767200D0B9E1 /* astcenc_diagnostic_trace.h */; };
+		70871D9027DC767300D0B9E1 /* astcenc_diagnostic_trace.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871D5927DC767200D0B9E1 /* astcenc_diagnostic_trace.h */; };
+		70871D9127DC767300D0B9E1 /* astcenc_vecmathlib_none_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871D5A27DC767200D0B9E1 /* astcenc_vecmathlib_none_4.h */; };
+		70871D9227DC767300D0B9E1 /* astcenc_vecmathlib_none_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871D5A27DC767200D0B9E1 /* astcenc_vecmathlib_none_4.h */; };
+		70871D9327DC767300D0B9E1 /* astcenc_entry.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D5B27DC767200D0B9E1 /* astcenc_entry.cpp */; };
+		70871D9427DC767300D0B9E1 /* astcenc_entry.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D5B27DC767200D0B9E1 /* astcenc_entry.cpp */; };
+		70871D9527DC767300D0B9E1 /* astcenc_mathlib.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871D5C27DC767300D0B9E1 /* astcenc_mathlib.h */; };
+		70871D9627DC767300D0B9E1 /* astcenc_mathlib.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871D5C27DC767300D0B9E1 /* astcenc_mathlib.h */; };
+		70871D9727DC767300D0B9E1 /* astcenc_compress_symbolic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D5D27DC767300D0B9E1 /* astcenc_compress_symbolic.cpp */; };
+		70871D9827DC767300D0B9E1 /* astcenc_compress_symbolic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D5D27DC767300D0B9E1 /* astcenc_compress_symbolic.cpp */; };
+		70871D9927DC767300D0B9E1 /* astcenc_platform_isa_detection.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D5E27DC767300D0B9E1 /* astcenc_platform_isa_detection.cpp */; };
+		70871D9A27DC767300D0B9E1 /* astcenc_platform_isa_detection.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D5E27DC767300D0B9E1 /* astcenc_platform_isa_detection.cpp */; };
+		70871D9B27DC767300D0B9E1 /* astcenc_color_unquantize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D5F27DC767300D0B9E1 /* astcenc_color_unquantize.cpp */; };
+		70871D9C27DC767300D0B9E1 /* astcenc_color_unquantize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D5F27DC767300D0B9E1 /* astcenc_color_unquantize.cpp */; };
+		70871D9D27DC767300D0B9E1 /* astcenc_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871D6027DC767300D0B9E1 /* astcenc_internal.h */; };
+		70871D9E27DC767300D0B9E1 /* astcenc_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871D6027DC767300D0B9E1 /* astcenc_internal.h */; };
+		70871D9F27DC767300D0B9E1 /* astcenc_quantization.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D6127DC767300D0B9E1 /* astcenc_quantization.cpp */; };
+		70871DA027DC767300D0B9E1 /* astcenc_quantization.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D6127DC767300D0B9E1 /* astcenc_quantization.cpp */; };
+		70871DA127DC767300D0B9E1 /* astcenc_diagnostic_trace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D6227DC767300D0B9E1 /* astcenc_diagnostic_trace.cpp */; };
+		70871DA227DC767300D0B9E1 /* astcenc_diagnostic_trace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D6227DC767300D0B9E1 /* astcenc_diagnostic_trace.cpp */; };
+		70871DA327DC767300D0B9E1 /* astcenc_partition_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D6327DC767300D0B9E1 /* astcenc_partition_tables.cpp */; };
+		70871DA427DC767300D0B9E1 /* astcenc_partition_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D6327DC767300D0B9E1 /* astcenc_partition_tables.cpp */; };
+		70871DA527DC767300D0B9E1 /* astcenc_weight_align.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D6427DC767300D0B9E1 /* astcenc_weight_align.cpp */; };
+		70871DA627DC767300D0B9E1 /* astcenc_weight_align.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D6427DC767300D0B9E1 /* astcenc_weight_align.cpp */; };
 		708A6A962708CE4700BA5410 /* bc6h_decode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 708A6A8B2708CE4700BA5410 /* bc6h_decode.cpp */; };
 		708A6A972708CE4700BA5410 /* bc6h_decode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 708A6A8B2708CE4700BA5410 /* bc6h_decode.cpp */; };
 		708A6A982708CE4700BA5410 /* bc6h_decode.h in Headers */ = {isa = PBXBuildFile; fileRef = 708A6A8C2708CE4700BA5410 /* bc6h_decode.h */; };
@@ -389,7 +449,6 @@
 		706EEDDA26D1583E001C950E /* astcenc_mathlib_softfloat.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_mathlib_softfloat.cpp; sourceTree = "<group>"; };
 		706EEDDB26D1583E001C950E /* astcenc_weight_quant_xfer_tables.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_weight_quant_xfer_tables.cpp; sourceTree = "<group>"; };
 		706EEDDC26D1583E001C950E /* astcenc_vecmathlib_avx2_8.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = astcenc_vecmathlib_avx2_8.h; sourceTree = "<group>"; };
-		706EEDDD26D1583E001C950E /* astcenc_encoding_choice_error.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_encoding_choice_error.cpp; sourceTree = "<group>"; };
 		706EEDDE26D1583E001C950E /* astcenc_percentile_tables.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_percentile_tables.cpp; sourceTree = "<group>"; };
 		706EEDDF26D1583E001C950E /* astcenc_partition_tables.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_partition_tables.cpp; sourceTree = "<group>"; };
 		706EEDE026D1583E001C950E /* astcenc_decompress_symbolic.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_decompress_symbolic.cpp; sourceTree = "<group>"; };
@@ -398,7 +457,6 @@
 		706EEDE326D1583E001C950E /* astcenc_platform_isa_detection.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_platform_isa_detection.cpp; sourceTree = "<group>"; };
 		706EEDE426D1583E001C950E /* astcenc_image.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_image.cpp; sourceTree = "<group>"; };
 		706EEDE526D1583E001C950E /* astcenc_internal.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = astcenc_internal.h; sourceTree = "<group>"; };
-		706EEDE626D1583E001C950E /* astcenc_kmeans_partitioning.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_kmeans_partitioning.cpp; sourceTree = "<group>"; };
 		706EEDE726D1583E001C950E /* astcenc_vecmathlib_neon_armv7_4.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = astcenc_vecmathlib_neon_armv7_4.h; sourceTree = "<group>"; };
 		706EEDE826D1583E001C950E /* astcenc_compress_symbolic.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_compress_symbolic.cpp; sourceTree = "<group>"; };
 		706EEDE926D1583E001C950E /* astcenc_ideal_endpoints_and_weights.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_ideal_endpoints_and_weights.cpp; sourceTree = "<group>"; };
@@ -410,7 +468,6 @@
 		706EEDEF26D1583E001C950E /* astcenc_symbolic_physical.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_symbolic_physical.cpp; sourceTree = "<group>"; };
 		706EEDF026D1583E001C950E /* astcenc_weight_align.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_weight_align.cpp; sourceTree = "<group>"; };
 		706EEDF126D1583E001C950E /* astcenc_vecmathlib_none_4.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = astcenc_vecmathlib_none_4.h; sourceTree = "<group>"; };
-		706EEDF226D1583E001C950E /* astcenc_block_sizes2.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_block_sizes2.cpp; sourceTree = "<group>"; };
 		706EEDF326D1583E001C950E /* astcenc_entry.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_entry.cpp; sourceTree = "<group>"; };
 		706EEDF426D1583E001C950E /* astcenc_vecmathlib_common_4.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = astcenc_vecmathlib_common_4.h; sourceTree = "<group>"; };
 		706EEDF526D1583E001C950E /* astcenc_averages_and_directions.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_averages_and_directions.cpp; sourceTree = "<group>"; };
@@ -640,6 +697,39 @@
 		706EFD5E26D3473F001C950E /* hashtable.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = hashtable.cpp; sourceTree = "<group>"; };
 		706EFD5F26D3473F001C950E /* red_black_tree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = red_black_tree.cpp; sourceTree = "<group>"; };
 		706EFD6026D3473F001C950E /* fixed_pool.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = fixed_pool.cpp; sourceTree = "<group>"; };
+		70871D4427DC767200D0B9E1 /* astcenc_block_sizes.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_block_sizes.cpp; path = "../../../astc-encoder/Source/astcenc_block_sizes.cpp"; sourceTree = "<group>"; };
+		70871D4527DC767200D0B9E1 /* astcenc_percentile_tables.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_percentile_tables.cpp; path = "../../../astc-encoder/Source/astcenc_percentile_tables.cpp"; sourceTree = "<group>"; };
+		70871D4627DC767200D0B9E1 /* astcenc_integer_sequence.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_integer_sequence.cpp; path = "../../../astc-encoder/Source/astcenc_integer_sequence.cpp"; sourceTree = "<group>"; };
+		70871D4727DC767200D0B9E1 /* astcenc_vecmathlib_neon_4.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = astcenc_vecmathlib_neon_4.h; path = "../../../astc-encoder/Source/astcenc_vecmathlib_neon_4.h"; sourceTree = "<group>"; };
+		70871D4827DC767200D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_weight_quant_xfer_tables.cpp; path = "../../../astc-encoder/Source/astcenc_weight_quant_xfer_tables.cpp"; sourceTree = "<group>"; };
+		70871D4927DC767200D0B9E1 /* astcenc_decompress_symbolic.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_decompress_symbolic.cpp; path = "../../../astc-encoder/Source/astcenc_decompress_symbolic.cpp"; sourceTree = "<group>"; };
+		70871D4A27DC767200D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_ideal_endpoints_and_weights.cpp; path = "../../../astc-encoder/Source/astcenc_ideal_endpoints_and_weights.cpp"; sourceTree = "<group>"; };
+		70871D4B27DC767200D0B9E1 /* astcenc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = astcenc.h; path = "../../../astc-encoder/Source/astcenc.h"; sourceTree = "<group>"; };
+		70871D4C27DC767200D0B9E1 /* astcenc_mathlib.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_mathlib.cpp; path = "../../../astc-encoder/Source/astcenc_mathlib.cpp"; sourceTree = "<group>"; };
+		70871D4D27DC767200D0B9E1 /* astcenc_pick_best_endpoint_format.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_pick_best_endpoint_format.cpp; path = "../../../astc-encoder/Source/astcenc_pick_best_endpoint_format.cpp"; sourceTree = "<group>"; };
+		70871D4E27DC767200D0B9E1 /* astcenc_vecmathlib_common_4.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = astcenc_vecmathlib_common_4.h; path = "../../../astc-encoder/Source/astcenc_vecmathlib_common_4.h"; sourceTree = "<group>"; };
+		70871D4F27DC767200D0B9E1 /* astcenc_color_quantize.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_color_quantize.cpp; path = "../../../astc-encoder/Source/astcenc_color_quantize.cpp"; sourceTree = "<group>"; };
+		70871D5027DC767200D0B9E1 /* astcenc_image.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_image.cpp; path = "../../../astc-encoder/Source/astcenc_image.cpp"; sourceTree = "<group>"; };
+		70871D5127DC767200D0B9E1 /* astcenc_mathlib_softfloat.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_mathlib_softfloat.cpp; path = "../../../astc-encoder/Source/astcenc_mathlib_softfloat.cpp"; sourceTree = "<group>"; };
+		70871D5227DC767200D0B9E1 /* astcenc_find_best_partitioning.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_find_best_partitioning.cpp; path = "../../../astc-encoder/Source/astcenc_find_best_partitioning.cpp"; sourceTree = "<group>"; };
+		70871D5327DC767200D0B9E1 /* astcenc_vecmathlib_sse_4.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = astcenc_vecmathlib_sse_4.h; path = "../../../astc-encoder/Source/astcenc_vecmathlib_sse_4.h"; sourceTree = "<group>"; };
+		70871D5427DC767200D0B9E1 /* astcenc_symbolic_physical.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_symbolic_physical.cpp; path = "../../../astc-encoder/Source/astcenc_symbolic_physical.cpp"; sourceTree = "<group>"; };
+		70871D5527DC767200D0B9E1 /* astcenc_averages_and_directions.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_averages_and_directions.cpp; path = "../../../astc-encoder/Source/astcenc_averages_and_directions.cpp"; sourceTree = "<group>"; };
+		70871D5627DC767200D0B9E1 /* astcenc_vecmathlib_avx2_8.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = astcenc_vecmathlib_avx2_8.h; path = "../../../astc-encoder/Source/astcenc_vecmathlib_avx2_8.h"; sourceTree = "<group>"; };
+		70871D5727DC767200D0B9E1 /* astcenc_compute_variance.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_compute_variance.cpp; path = "../../../astc-encoder/Source/astcenc_compute_variance.cpp"; sourceTree = "<group>"; };
+		70871D5827DC767200D0B9E1 /* astcenc_vecmathlib.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = astcenc_vecmathlib.h; path = "../../../astc-encoder/Source/astcenc_vecmathlib.h"; sourceTree = "<group>"; };
+		70871D5927DC767200D0B9E1 /* astcenc_diagnostic_trace.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = astcenc_diagnostic_trace.h; path = "../../../astc-encoder/Source/astcenc_diagnostic_trace.h"; sourceTree = "<group>"; };
+		70871D5A27DC767200D0B9E1 /* astcenc_vecmathlib_none_4.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = astcenc_vecmathlib_none_4.h; path = "../../../astc-encoder/Source/astcenc_vecmathlib_none_4.h"; sourceTree = "<group>"; };
+		70871D5B27DC767200D0B9E1 /* astcenc_entry.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_entry.cpp; path = "../../../astc-encoder/Source/astcenc_entry.cpp"; sourceTree = "<group>"; };
+		70871D5C27DC767300D0B9E1 /* astcenc_mathlib.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = astcenc_mathlib.h; path = "../../../astc-encoder/Source/astcenc_mathlib.h"; sourceTree = "<group>"; };
+		70871D5D27DC767300D0B9E1 /* astcenc_compress_symbolic.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_compress_symbolic.cpp; path = "../../../astc-encoder/Source/astcenc_compress_symbolic.cpp"; sourceTree = "<group>"; };
+		70871D5E27DC767300D0B9E1 /* astcenc_platform_isa_detection.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_platform_isa_detection.cpp; path = "../../../astc-encoder/Source/astcenc_platform_isa_detection.cpp"; sourceTree = "<group>"; };
+		70871D5F27DC767300D0B9E1 /* astcenc_color_unquantize.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_color_unquantize.cpp; path = "../../../astc-encoder/Source/astcenc_color_unquantize.cpp"; sourceTree = "<group>"; };
+		70871D6027DC767300D0B9E1 /* astcenc_internal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = astcenc_internal.h; path = "../../../astc-encoder/Source/astcenc_internal.h"; sourceTree = "<group>"; };
+		70871D6127DC767300D0B9E1 /* astcenc_quantization.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_quantization.cpp; path = "../../../astc-encoder/Source/astcenc_quantization.cpp"; sourceTree = "<group>"; };
+		70871D6227DC767300D0B9E1 /* astcenc_diagnostic_trace.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_diagnostic_trace.cpp; path = "../../../astc-encoder/Source/astcenc_diagnostic_trace.cpp"; sourceTree = "<group>"; };
+		70871D6327DC767300D0B9E1 /* astcenc_partition_tables.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_partition_tables.cpp; path = "../../../astc-encoder/Source/astcenc_partition_tables.cpp"; sourceTree = "<group>"; };
+		70871D6427DC767300D0B9E1 /* astcenc_weight_align.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_weight_align.cpp; path = "../../../astc-encoder/Source/astcenc_weight_align.cpp"; sourceTree = "<group>"; };
 		708A6A8B2708CE4700BA5410 /* bc6h_decode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bc6h_decode.cpp; sourceTree = "<group>"; };
 		708A6A8C2708CE4700BA5410 /* bc6h_decode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bc6h_decode.h; sourceTree = "<group>"; };
 		708A6A8D2708CE4700BA5410 /* bc6h_encode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bc6h_encode.cpp; sourceTree = "<group>"; };
@@ -769,16 +859,36 @@
 			isa = PBXGroup;
 			children = (
 				706EEDD326D1583E001C950E /* astcenc_pick_best_endpoint_format.cpp */,
+				70871D6227DC767300D0B9E1 /* astcenc_diagnostic_trace.cpp */,
 				706EEDD426D1583E001C950E /* astcenc_diagnostic_trace.h */,
+				70871D4F27DC767200D0B9E1 /* astcenc_color_quantize.cpp */,
+				70871D5F27DC767300D0B9E1 /* astcenc_color_unquantize.cpp */,
+				70871D5D27DC767300D0B9E1 /* astcenc_compress_symbolic.cpp */,
+				70871D5727DC767200D0B9E1 /* astcenc_compute_variance.cpp */,
+				70871D4927DC767200D0B9E1 /* astcenc_decompress_symbolic.cpp */,
+				70871D5927DC767200D0B9E1 /* astcenc_diagnostic_trace.h */,
+				70871D5B27DC767200D0B9E1 /* astcenc_entry.cpp */,
+				70871D5227DC767200D0B9E1 /* astcenc_find_best_partitioning.cpp */,
+				70871D4A27DC767200D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp */,
+				70871D5027DC767200D0B9E1 /* astcenc_image.cpp */,
 				706EEDD526D1583E001C950E /* astcenc_integer_sequence.cpp */,
 				706EEDD626D1583E001C950E /* astcenc_vecmathlib.h */,
 				706EEDD726D1583E001C950E /* astcenc_compute_variance.cpp */,
 				706EEDD826D1583E001C950E /* astcenc_quantization.cpp */,
 				706EEDD926D1583E001C950E /* astcenc_color_unquantize.cpp */,
+				70871D6027DC767300D0B9E1 /* astcenc_internal.h */,
 				706EEDDA26D1583E001C950E /* astcenc_mathlib_softfloat.cpp */,
+				70871D5A27DC767200D0B9E1 /* astcenc_vecmathlib_none_4.h */,
+				70871D5327DC767200D0B9E1 /* astcenc_vecmathlib_sse_4.h */,
+				70871D5827DC767200D0B9E1 /* astcenc_vecmathlib.h */,
+				70871D6427DC767300D0B9E1 /* astcenc_weight_align.cpp */,
 				706EEDDB26D1583E001C950E /* astcenc_weight_quant_xfer_tables.cpp */,
 				706EEDDC26D1583E001C950E /* astcenc_vecmathlib_avx2_8.h */,
-				706EEDDD26D1583E001C950E /* astcenc_encoding_choice_error.cpp */,
+				70871D4627DC767200D0B9E1 /* astcenc_integer_sequence.cpp */,
+				70871D5127DC767200D0B9E1 /* astcenc_mathlib_softfloat.cpp */,
+				70871D4C27DC767200D0B9E1 /* astcenc_mathlib.cpp */,
+				70871D5C27DC767300D0B9E1 /* astcenc_mathlib.h */,
+				70871D6327DC767300D0B9E1 /* astcenc_partition_tables.cpp */,
 				706EEDDE26D1583E001C950E /* astcenc_percentile_tables.cpp */,
 				706EEDDF26D1583E001C950E /* astcenc_partition_tables.cpp */,
 				706EEDE026D1583E001C950E /* astcenc_decompress_symbolic.cpp */,
@@ -787,7 +897,18 @@
 				706EEDE326D1583E001C950E /* astcenc_platform_isa_detection.cpp */,
 				706EEDE426D1583E001C950E /* astcenc_image.cpp */,
 				706EEDE526D1583E001C950E /* astcenc_internal.h */,
-				706EEDE626D1583E001C950E /* astcenc_kmeans_partitioning.cpp */,
+				70871D5527DC767200D0B9E1 /* astcenc_averages_and_directions.cpp */,
+				70871D4427DC767200D0B9E1 /* astcenc_block_sizes.cpp */,
+				70871D4527DC767200D0B9E1 /* astcenc_percentile_tables.cpp */,
+				70871D4D27DC767200D0B9E1 /* astcenc_pick_best_endpoint_format.cpp */,
+				70871D5E27DC767300D0B9E1 /* astcenc_platform_isa_detection.cpp */,
+				70871D6127DC767300D0B9E1 /* astcenc_quantization.cpp */,
+				70871D5427DC767200D0B9E1 /* astcenc_symbolic_physical.cpp */,
+				70871D5627DC767200D0B9E1 /* astcenc_vecmathlib_avx2_8.h */,
+				70871D4E27DC767200D0B9E1 /* astcenc_vecmathlib_common_4.h */,
+				70871D4727DC767200D0B9E1 /* astcenc_vecmathlib_neon_4.h */,
+				70871D4827DC767200D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp */,
+				70871D4B27DC767200D0B9E1 /* astcenc.h */,
 				706EEDE726D1583E001C950E /* astcenc_vecmathlib_neon_armv7_4.h */,
 				706EEDE826D1583E001C950E /* astcenc_compress_symbolic.cpp */,
 				706EEDE926D1583E001C950E /* astcenc_ideal_endpoints_and_weights.cpp */,
@@ -799,7 +920,6 @@
 				706EEDEF26D1583E001C950E /* astcenc_symbolic_physical.cpp */,
 				706EEDF026D1583E001C950E /* astcenc_weight_align.cpp */,
 				706EEDF126D1583E001C950E /* astcenc_vecmathlib_none_4.h */,
-				706EEDF226D1583E001C950E /* astcenc_block_sizes2.cpp */,
 				706EEDF326D1583E001C950E /* astcenc_entry.cpp */,
 				706EEDF426D1583E001C950E /* astcenc_vecmathlib_common_4.h */,
 				706EEDF526D1583E001C950E /* astcenc_averages_and_directions.cpp */,
@@ -1220,10 +1340,12 @@
 				706EEFDE26D15984001C950E /* EtcImage.h in Headers */,
 				70CDB65027A1382700A546C1 /* KramDDSHelper.h in Headers */,
 				708A6A9C2708CE4700BA5410 /* bc6h_encode.h in Headers */,
+				70871D6B27DC767300D0B9E1 /* astcenc_vecmathlib_neon_4.h in Headers */,
 				706EEFDF26D15984001C950E /* EtcBlock4x4Encoding_ETC1.h in Headers */,
 				706EEFE026D15984001C950E /* EtcBlock4x4Encoding_RGBA8.h in Headers */,
 				706EEFE126D15984001C950E /* EtcColorFloatRGBA.h in Headers */,
 				706EEFE226D15984001C950E /* EtcBlock4x4Encoding.h in Headers */,
+				70871D8F27DC767300D0B9E1 /* astcenc_diagnostic_trace.h in Headers */,
 				706EEFE326D15984001C950E /* rgbcx.h in Headers */,
 				706EEFE426D15984001C950E /* bc7enc.h in Headers */,
 				706EEFE526D15984001C950E /* bc7decomp.h in Headers */,
@@ -1235,6 +1357,7 @@
 				706EEFEB26D15984001C950E /* astcenc_internal.h in Headers */,
 				706EEFEC26D15984001C950E /* astcenc_vecmathlib_neon_armv7_4.h in Headers */,
 				706EEFED26D15984001C950E /* astcenc_vecmathlib_sse_4.h in Headers */,
+				70871D8327DC767300D0B9E1 /* astcenc_vecmathlib_sse_4.h in Headers */,
 				706EEFEE26D15984001C950E /* astcenc_vecmathlib_neon_4.h in Headers */,
 				706EEFEF26D15984001C950E /* astcenc_vecmathlib_none_4.h in Headers */,
 				706EEFF026D15984001C950E /* astcenc_vecmathlib_common_4.h in Headers */,
@@ -1242,6 +1365,7 @@
 				706EEFF226D15984001C950E /* ateencoder.h in Headers */,
 				706EEFF326D15984001C950E /* basisu_transcoder.h in Headers */,
 				70A7BD3227092A1200DBCCF7 /* hdr_encode.h in Headers */,
+				70871D7927DC767300D0B9E1 /* astcenc_vecmathlib_common_4.h in Headers */,
 				708A6AA02708CE4700BA5410 /* bc6h_definitions.h in Headers */,
 				706EEFF426D15984001C950E /* basisu_containers.h in Headers */,
 				706EEFF526D15985001C950E /* basisu_containers_impl.h in Headers */,
@@ -1252,9 +1376,11 @@
 				706EEFFA26D15985001C950E /* basisu.h in Headers */,
 				706EEFFB26D15985001C950E /* basisu_file_headers.h in Headers */,
 				706EEFFC26D15985001C950E /* miniz.h in Headers */,
+				70871D8927DC767300D0B9E1 /* astcenc_vecmathlib_avx2_8.h in Headers */,
 				706EEFFD26D15985001C950E /* hedistance.h in Headers */,
 				706EEFFE26D15985001C950E /* stb_rect_pack.h in Headers */,
 				706EEFFF26D15985001C950E /* KramZipHelper.h in Headers */,
+				70871D7327DC767300D0B9E1 /* astcenc.h in Headers */,
 				706EF00026D15985001C950E /* KramSDFMipper.h in Headers */,
 				706EF00126D15985001C950E /* sse2neon.h in Headers */,
 				706EF00226D15985001C950E /* KramConfig.h in Headers */,
@@ -1262,6 +1388,7 @@
 				706EF00426D15985001C950E /* KramLib.h in Headers */,
 				706EF00526D15985001C950E /* KramVersion.h in Headers */,
 				706EF00626D15985001C950E /* KramImage.h in Headers */,
+				70871D9127DC767300D0B9E1 /* astcenc_vecmathlib_none_4.h in Headers */,
 				706EF00726D15985001C950E /* win_mmap.h in Headers */,
 				706EF00826D15985001C950E /* Kram.h in Headers */,
 				706EF00926D15985001C950E /* KTXImage.h in Headers */,
@@ -1272,13 +1399,16 @@
 				706EF00E26D15985001C950E /* KramFileHelper.h in Headers */,
 				706EF00F26D15985001C950E /* KramMipper.h in Headers */,
 				706EF01026D15985001C950E /* TaskSystem.h in Headers */,
+				70871D8D27DC767300D0B9E1 /* astcenc_vecmathlib.h in Headers */,
 				706EF01126D15985001C950E /* squish.h in Headers */,
 				706EF01226D15985001C950E /* clusterfit.h in Headers */,
 				706EF01326D15985001C950E /* colourfit.h in Headers */,
 				706EF01426D15985001C950E /* alpha.h in Headers */,
 				708A6A982708CE4700BA5410 /* bc6h_decode.h in Headers */,
+				70871D9D27DC767300D0B9E1 /* astcenc_internal.h in Headers */,
 				706EF01526D15985001C950E /* singlecolourfit.h in Headers */,
 				706EF01626D15985001C950E /* maths.h in Headers */,
+				70871D9527DC767300D0B9E1 /* astcenc_mathlib.h in Headers */,
 				706EF01726D15985001C950E /* colourset.h in Headers */,
 				708A6AA42708CE4700BA5410 /* bc6h_utils.h in Headers */,
 				706EF01826D15985001C950E /* colourblock.h in Headers */,
@@ -1309,10 +1439,12 @@
 				706EF15826D166C5001C950E /* EtcImage.h in Headers */,
 				70CDB65127A1382700A546C1 /* KramDDSHelper.h in Headers */,
 				708A6A9D2708CE4700BA5410 /* bc6h_encode.h in Headers */,
+				70871D6C27DC767300D0B9E1 /* astcenc_vecmathlib_neon_4.h in Headers */,
 				706EF15926D166C5001C950E /* EtcBlock4x4Encoding_ETC1.h in Headers */,
 				706EF15A26D166C5001C950E /* EtcBlock4x4Encoding_RGBA8.h in Headers */,
 				706EF15B26D166C5001C950E /* EtcColorFloatRGBA.h in Headers */,
 				706EF15C26D166C5001C950E /* EtcBlock4x4Encoding.h in Headers */,
+				70871D9027DC767300D0B9E1 /* astcenc_diagnostic_trace.h in Headers */,
 				706EF15D26D166C5001C950E /* rgbcx.h in Headers */,
 				706EF15E26D166C5001C950E /* bc7enc.h in Headers */,
 				706EF15F26D166C5001C950E /* bc7decomp.h in Headers */,
@@ -1324,6 +1456,7 @@
 				706EF16526D166C5001C950E /* astcenc_internal.h in Headers */,
 				706EF16626D166C5001C950E /* astcenc_vecmathlib_neon_armv7_4.h in Headers */,
 				706EF16726D166C5001C950E /* astcenc_vecmathlib_sse_4.h in Headers */,
+				70871D8427DC767300D0B9E1 /* astcenc_vecmathlib_sse_4.h in Headers */,
 				706EF16826D166C5001C950E /* astcenc_vecmathlib_neon_4.h in Headers */,
 				706EF16926D166C5001C950E /* astcenc_vecmathlib_none_4.h in Headers */,
 				706EF16A26D166C5001C950E /* astcenc_vecmathlib_common_4.h in Headers */,
@@ -1331,6 +1464,7 @@
 				706EF16C26D166C5001C950E /* ateencoder.h in Headers */,
 				706EF16D26D166C5001C950E /* basisu_transcoder.h in Headers */,
 				70A7BD3327092A1200DBCCF7 /* hdr_encode.h in Headers */,
+				70871D7A27DC767300D0B9E1 /* astcenc_vecmathlib_common_4.h in Headers */,
 				708A6AA12708CE4700BA5410 /* bc6h_definitions.h in Headers */,
 				706EF16E26D166C5001C950E /* basisu_containers.h in Headers */,
 				706EF16F26D166C5001C950E /* basisu_containers_impl.h in Headers */,
@@ -1341,9 +1475,11 @@
 				706EF17426D166C5001C950E /* basisu.h in Headers */,
 				706EF17526D166C5001C950E /* basisu_file_headers.h in Headers */,
 				706EF17626D166C5001C950E /* miniz.h in Headers */,
+				70871D8A27DC767300D0B9E1 /* astcenc_vecmathlib_avx2_8.h in Headers */,
 				706EF17726D166C5001C950E /* hedistance.h in Headers */,
 				706EF17826D166C5001C950E /* stb_rect_pack.h in Headers */,
 				706EF17926D166C5001C950E /* KramZipHelper.h in Headers */,
+				70871D7427DC767300D0B9E1 /* astcenc.h in Headers */,
 				706EF17A26D166C5001C950E /* KramSDFMipper.h in Headers */,
 				706EF17B26D166C5001C950E /* sse2neon.h in Headers */,
 				706EF17C26D166C5001C950E /* KramConfig.h in Headers */,
@@ -1351,6 +1487,7 @@
 				706EF17E26D166C5001C950E /* KramLib.h in Headers */,
 				706EF17F26D166C5001C950E /* KramVersion.h in Headers */,
 				706EF18026D166C5001C950E /* KramImage.h in Headers */,
+				70871D9227DC767300D0B9E1 /* astcenc_vecmathlib_none_4.h in Headers */,
 				706EF18126D166C5001C950E /* win_mmap.h in Headers */,
 				706EF18226D166C5001C950E /* Kram.h in Headers */,
 				706EF18326D166C5001C950E /* KTXImage.h in Headers */,
@@ -1361,13 +1498,16 @@
 				706EF18826D166C5001C950E /* KramFileHelper.h in Headers */,
 				706EF18926D166C5001C950E /* KramMipper.h in Headers */,
 				706EF18A26D166C5001C950E /* TaskSystem.h in Headers */,
+				70871D8E27DC767300D0B9E1 /* astcenc_vecmathlib.h in Headers */,
 				706EF18B26D166C5001C950E /* squish.h in Headers */,
 				706EF18C26D166C5001C950E /* clusterfit.h in Headers */,
 				706EF18D26D166C5001C950E /* colourfit.h in Headers */,
 				706EF18E26D166C5001C950E /* alpha.h in Headers */,
 				708A6A992708CE4700BA5410 /* bc6h_decode.h in Headers */,
+				70871D9E27DC767300D0B9E1 /* astcenc_internal.h in Headers */,
 				706EF18F26D166C5001C950E /* singlecolourfit.h in Headers */,
 				706EF19026D166C5001C950E /* maths.h in Headers */,
+				70871D9627DC767300D0B9E1 /* astcenc_mathlib.h in Headers */,
 				706EF19126D166C5001C950E /* colourset.h in Headers */,
 				708A6AA52708CE4700BA5410 /* bc6h_utils.h in Headers */,
 				706EF19226D166C5001C950E /* colourblock.h in Headers */,
@@ -1456,7 +1596,9 @@
 			files = (
 				706EF26426D17DCC001C950E /* ateencoder.mm in Sources */,
 				706EEF7F26D1595D001C950E /* EtcBlock4x4Encoding_RGB8.cpp in Sources */,
+				70871D7B27DC767300D0B9E1 /* astcenc_color_quantize.cpp in Sources */,
 				70CDB65227A1382700A546C1 /* KramDDSHelper.cpp in Sources */,
+				70871D9927DC767300D0B9E1 /* astcenc_platform_isa_detection.cpp in Sources */,
 				706EEF8026D1595D001C950E /* EtcImage.cpp in Sources */,
 				706EEF8126D1595D001C950E /* EtcDifferentialTrys.cpp in Sources */,
 				706EEF8226D1595D001C950E /* EtcMath.cpp in Sources */,
@@ -1466,31 +1608,40 @@
 				706EEF8726D1595D001C950E /* EtcIndividualTrys.cpp in Sources */,
 				706EEF8826D1595D001C950E /* EtcBlock4x4Encoding_R11.cpp in Sources */,
 				706EEF8926D1595D001C950E /* EtcBlock4x4Encoding_ETC1.cpp in Sources */,
+				70871D8127DC767300D0B9E1 /* astcenc_find_best_partitioning.cpp in Sources */,
 				706EEF8A26D1595D001C950E /* EtcBlock4x4Encoding.cpp in Sources */,
+				70871DA327DC767300D0B9E1 /* astcenc_partition_tables.cpp in Sources */,
 				706EEF8B26D1595D001C950E /* EtcBlock4x4.cpp in Sources */,
 				706EEF8C26D1595D001C950E /* bc7decomp.cpp in Sources */,
 				706EEF8D26D1595D001C950E /* bc7enc.cpp in Sources */,
 				706EEF8E26D1595D001C950E /* astcenc_pick_best_endpoint_format.cpp in Sources */,
 				708A6A9A2708CE4700BA5410 /* bc6h_encode.cpp in Sources */,
 				70A7BD3027092A1200DBCCF7 /* hdr_encode.cpp in Sources */,
+				70871DA527DC767300D0B9E1 /* astcenc_weight_align.cpp in Sources */,
 				706EEF8F26D1595D001C950E /* astcenc_integer_sequence.cpp in Sources */,
 				706EEF9026D1595D001C950E /* astcenc_compute_variance.cpp in Sources */,
 				706EFF7726D34740001C950E /* string.cpp in Sources */,
 				706EEF9126D1595D001C950E /* astcenc_quantization.cpp in Sources */,
 				706EEF9226D1595D001C950E /* astcenc_color_unquantize.cpp in Sources */,
+				70871D9327DC767300D0B9E1 /* astcenc_entry.cpp in Sources */,
+				70871D9727DC767300D0B9E1 /* astcenc_compress_symbolic.cpp in Sources */,
+				70871D7727DC767300D0B9E1 /* astcenc_pick_best_endpoint_format.cpp in Sources */,
 				706EEF9326D1595D001C950E /* astcenc_mathlib_softfloat.cpp in Sources */,
 				708A6A962708CE4700BA5410 /* bc6h_decode.cpp in Sources */,
 				706EEF9426D1595D001C950E /* astcenc_weight_quant_xfer_tables.cpp in Sources */,
-				706EEF9526D1595D001C950E /* astcenc_encoding_choice_error.cpp in Sources */,
 				706EEF9626D1595D001C950E /* astcenc_percentile_tables.cpp in Sources */,
 				706EEF9726D1595D001C950E /* astcenc_partition_tables.cpp in Sources */,
 				706EEF9826D1595D001C950E /* astcenc_decompress_symbolic.cpp in Sources */,
+				70871DA127DC767300D0B9E1 /* astcenc_diagnostic_trace.cpp in Sources */,
+				70871D7F27DC767300D0B9E1 /* astcenc_mathlib_softfloat.cpp in Sources */,
 				706EEF9926D1595D001C950E /* astcenc_color_quantize.cpp in Sources */,
 				706EFF7526D34740001C950E /* assert.cpp in Sources */,
+				70871D8727DC767300D0B9E1 /* astcenc_averages_and_directions.cpp in Sources */,
 				706EEF9A26D1595D001C950E /* astcenc_platform_isa_detection.cpp in Sources */,
 				706EEF9B26D1595D001C950E /* astcenc_image.cpp in Sources */,
-				706EEF9C26D1595D001C950E /* astcenc_kmeans_partitioning.cpp in Sources */,
 				706EEF9D26D1595D001C950E /* astcenc_compress_symbolic.cpp in Sources */,
+				70871D7527DC767300D0B9E1 /* astcenc_mathlib.cpp in Sources */,
+				70871D8527DC767300D0B9E1 /* astcenc_symbolic_physical.cpp in Sources */,
 				706EEF9E26D1595D001C950E /* astcenc_ideal_endpoints_and_weights.cpp in Sources */,
 				706EFF8526D34740001C950E /* fixed_pool.cpp in Sources */,
 				706EEF9F26D1595D001C950E /* astcenc_mathlib.cpp in Sources */,
@@ -1498,14 +1649,18 @@
 				706EEFA126D1595D001C950E /* astcenc_diagnostic_trace.cpp in Sources */,
 				706EEFA226D1595D001C950E /* astcenc_symbolic_physical.cpp in Sources */,
 				706EEFA326D1595D001C950E /* astcenc_weight_align.cpp in Sources */,
-				706EEFA426D1595D001C950E /* astcenc_block_sizes2.cpp in Sources */,
 				706EEFA526D1595D001C950E /* astcenc_entry.cpp in Sources */,
+				70871D6727DC767300D0B9E1 /* astcenc_percentile_tables.cpp in Sources */,
 				706EEFA626D1595D001C950E /* astcenc_averages_and_directions.cpp in Sources */,
 				706EEFA726D1595D001C950E /* basisu_transcoder.cpp in Sources */,
+				70871D7D27DC767300D0B9E1 /* astcenc_image.cpp in Sources */,
+				70871D6F27DC767300D0B9E1 /* astcenc_decompress_symbolic.cpp in Sources */,
 				706EFF8326D34740001C950E /* red_black_tree.cpp in Sources */,
 				706EFF7F26D34740001C950E /* intrusive_list.cpp in Sources */,
 				706EEFA826D1595D001C950E /* miniz.cpp in Sources */,
+				70871D6927DC767300D0B9E1 /* astcenc_integer_sequence.cpp in Sources */,
 				706EEFA926D1595D001C950E /* hedistance.cpp in Sources */,
+				70871D9F27DC767300D0B9E1 /* astcenc_quantization.cpp in Sources */,
 				706EEFAA26D1595D001C950E /* KramTimer.cpp in Sources */,
 				706EEFAB26D1595D001C950E /* KTXImage.cpp in Sources */,
 				706EEFAC26D1595D001C950E /* KramMipper.cpp in Sources */,
@@ -1514,19 +1669,24 @@
 				706EEFAF26D1595D001C950E /* KramFileHelper.cpp in Sources */,
 				706EFF7B26D34740001C950E /* numeric_limits.cpp in Sources */,
 				706EEFB026D1595D001C950E /* KramImageInfo.cpp in Sources */,
+				70871D9B27DC767300D0B9E1 /* astcenc_color_unquantize.cpp in Sources */,
 				706EEFB126D1595D001C950E /* KramImage.cpp in Sources */,
 				706EEFB226D1595D001C950E /* KramLog.cpp in Sources */,
 				706EEFB326D1595D001C950E /* KramSDFMipper.cpp in Sources */,
 				706EEFB426D1595D001C950E /* KramMmapHelper.cpp in Sources */,
 				706EEFB526D1595D001C950E /* float4a.cpp in Sources */,
+				70871D7127DC767300D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp in Sources */,
 				706EFF7326D34740001C950E /* thread_support.cpp in Sources */,
 				706EEFB626D1595D001C950E /* Kram.cpp in Sources */,
+				70871D6527DC767300D0B9E1 /* astcenc_block_sizes.cpp in Sources */,
 				706EEFB726D1595D001C950E /* squish.cpp in Sources */,
 				706EEFB826D1595D001C950E /* colourset.cpp in Sources */,
 				706EFF8126D34740001C950E /* hashtable.cpp in Sources */,
 				706EEFB926D1595D001C950E /* clusterfit.cpp in Sources */,
 				706EEFBB26D1595D001C950E /* rangefit.cpp in Sources */,
+				70871D6D27DC767300D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp in Sources */,
 				706EEFBC26D1595D001C950E /* alpha.cpp in Sources */,
+				70871D8B27DC767300D0B9E1 /* astcenc_compute_variance.cpp in Sources */,
 				706EEFBD26D1595D001C950E /* colourblock.cpp in Sources */,
 				706EEFBE26D1595E001C950E /* colourfit.cpp in Sources */,
 				706EFF7926D34740001C950E /* allocator_eastl.cpp in Sources */,
@@ -1545,7 +1705,9 @@
 			files = (
 				706EFC2426D1C39B001C950E /* ateencoder.mm in Sources */,
 				706EF19826D166C5001C950E /* EtcBlock4x4Encoding_RGB8.cpp in Sources */,
+				70871D7C27DC767300D0B9E1 /* astcenc_color_quantize.cpp in Sources */,
 				70CDB65327A1382700A546C1 /* KramDDSHelper.cpp in Sources */,
+				70871D9A27DC767300D0B9E1 /* astcenc_platform_isa_detection.cpp in Sources */,
 				706EF19926D166C5001C950E /* EtcImage.cpp in Sources */,
 				706EF19A26D166C5001C950E /* EtcDifferentialTrys.cpp in Sources */,
 				706EF19B26D166C5001C950E /* EtcMath.cpp in Sources */,
@@ -1555,31 +1717,40 @@
 				706EF19F26D166C5001C950E /* EtcIndividualTrys.cpp in Sources */,
 				706EF1A026D166C5001C950E /* EtcBlock4x4Encoding_R11.cpp in Sources */,
 				706EF1A126D166C5001C950E /* EtcBlock4x4Encoding_ETC1.cpp in Sources */,
+				70871D8227DC767300D0B9E1 /* astcenc_find_best_partitioning.cpp in Sources */,
 				706EF1A226D166C5001C950E /* EtcBlock4x4Encoding.cpp in Sources */,
+				70871DA427DC767300D0B9E1 /* astcenc_partition_tables.cpp in Sources */,
 				706EF1A326D166C5001C950E /* EtcBlock4x4.cpp in Sources */,
 				706EF1A426D166C5001C950E /* bc7decomp.cpp in Sources */,
 				706EF1A526D166C5001C950E /* bc7enc.cpp in Sources */,
 				706EF1A626D166C5001C950E /* astcenc_pick_best_endpoint_format.cpp in Sources */,
 				708A6A9B2708CE4700BA5410 /* bc6h_encode.cpp in Sources */,
 				70A7BD3127092A1200DBCCF7 /* hdr_encode.cpp in Sources */,
+				70871DA627DC767300D0B9E1 /* astcenc_weight_align.cpp in Sources */,
 				706EF1A726D166C5001C950E /* astcenc_integer_sequence.cpp in Sources */,
 				706EF1A826D166C5001C950E /* astcenc_compute_variance.cpp in Sources */,
 				706EFF7826D34740001C950E /* string.cpp in Sources */,
 				706EF1A926D166C5001C950E /* astcenc_quantization.cpp in Sources */,
 				706EF1AA26D166C5001C950E /* astcenc_color_unquantize.cpp in Sources */,
+				70871D9427DC767300D0B9E1 /* astcenc_entry.cpp in Sources */,
+				70871D9827DC767300D0B9E1 /* astcenc_compress_symbolic.cpp in Sources */,
+				70871D7827DC767300D0B9E1 /* astcenc_pick_best_endpoint_format.cpp in Sources */,
 				706EF1AB26D166C5001C950E /* astcenc_mathlib_softfloat.cpp in Sources */,
 				708A6A972708CE4700BA5410 /* bc6h_decode.cpp in Sources */,
 				706EF1AC26D166C5001C950E /* astcenc_weight_quant_xfer_tables.cpp in Sources */,
-				706EF1AD26D166C5001C950E /* astcenc_encoding_choice_error.cpp in Sources */,
 				706EF1AE26D166C5001C950E /* astcenc_percentile_tables.cpp in Sources */,
 				706EF1AF26D166C5001C950E /* astcenc_partition_tables.cpp in Sources */,
 				706EF1B026D166C5001C950E /* astcenc_decompress_symbolic.cpp in Sources */,
+				70871DA227DC767300D0B9E1 /* astcenc_diagnostic_trace.cpp in Sources */,
+				70871D8027DC767300D0B9E1 /* astcenc_mathlib_softfloat.cpp in Sources */,
 				706EF1B126D166C5001C950E /* astcenc_color_quantize.cpp in Sources */,
 				706EFF7626D34740001C950E /* assert.cpp in Sources */,
+				70871D8827DC767300D0B9E1 /* astcenc_averages_and_directions.cpp in Sources */,
 				706EF1B226D166C5001C950E /* astcenc_platform_isa_detection.cpp in Sources */,
 				706EF1B326D166C5001C950E /* astcenc_image.cpp in Sources */,
-				706EF1B426D166C5001C950E /* astcenc_kmeans_partitioning.cpp in Sources */,
 				706EF1B526D166C5001C950E /* astcenc_compress_symbolic.cpp in Sources */,
+				70871D7627DC767300D0B9E1 /* astcenc_mathlib.cpp in Sources */,
+				70871D8627DC767300D0B9E1 /* astcenc_symbolic_physical.cpp in Sources */,
 				706EF1B626D166C5001C950E /* astcenc_ideal_endpoints_and_weights.cpp in Sources */,
 				706EFF8626D34740001C950E /* fixed_pool.cpp in Sources */,
 				706EF1B726D166C5001C950E /* astcenc_mathlib.cpp in Sources */,
@@ -1587,14 +1758,18 @@
 				706EF1B926D166C5001C950E /* astcenc_diagnostic_trace.cpp in Sources */,
 				706EF1BA26D166C5001C950E /* astcenc_symbolic_physical.cpp in Sources */,
 				706EF1BB26D166C5001C950E /* astcenc_weight_align.cpp in Sources */,
-				706EF1BC26D166C5001C950E /* astcenc_block_sizes2.cpp in Sources */,
 				706EF1BD26D166C5001C950E /* astcenc_entry.cpp in Sources */,
+				70871D6827DC767300D0B9E1 /* astcenc_percentile_tables.cpp in Sources */,
 				706EF1BE26D166C5001C950E /* astcenc_averages_and_directions.cpp in Sources */,
 				706EF1BF26D166C5001C950E /* basisu_transcoder.cpp in Sources */,
+				70871D7E27DC767300D0B9E1 /* astcenc_image.cpp in Sources */,
+				70871D7027DC767300D0B9E1 /* astcenc_decompress_symbolic.cpp in Sources */,
 				706EFF8426D34740001C950E /* red_black_tree.cpp in Sources */,
 				706EFF8026D34740001C950E /* intrusive_list.cpp in Sources */,
 				706EF1C026D166C5001C950E /* miniz.cpp in Sources */,
+				70871D6A27DC767300D0B9E1 /* astcenc_integer_sequence.cpp in Sources */,
 				706EF1C126D166C5001C950E /* hedistance.cpp in Sources */,
+				70871DA027DC767300D0B9E1 /* astcenc_quantization.cpp in Sources */,
 				706EF1C226D166C5001C950E /* KramTimer.cpp in Sources */,
 				706EF1C326D166C5001C950E /* KTXImage.cpp in Sources */,
 				706EF1C426D166C5001C950E /* KramMipper.cpp in Sources */,
@@ -1603,19 +1778,24 @@
 				706EF1C726D166C5001C950E /* KramFileHelper.cpp in Sources */,
 				706EFF7C26D34740001C950E /* numeric_limits.cpp in Sources */,
 				706EF1C826D166C5001C950E /* KramImageInfo.cpp in Sources */,
+				70871D9C27DC767300D0B9E1 /* astcenc_color_unquantize.cpp in Sources */,
 				706EF1C926D166C5001C950E /* KramImage.cpp in Sources */,
 				706EF1CA26D166C5001C950E /* KramLog.cpp in Sources */,
 				706EF1CB26D166C5001C950E /* KramSDFMipper.cpp in Sources */,
 				706EF1CC26D166C5001C950E /* KramMmapHelper.cpp in Sources */,
 				706EF1CD26D166C5001C950E /* float4a.cpp in Sources */,
+				70871D7227DC767300D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp in Sources */,
 				706EFF7426D34740001C950E /* thread_support.cpp in Sources */,
 				706EF1CE26D166C5001C950E /* Kram.cpp in Sources */,
+				70871D6627DC767300D0B9E1 /* astcenc_block_sizes.cpp in Sources */,
 				706EF1CF26D166C5001C950E /* squish.cpp in Sources */,
 				706EF1D026D166C5001C950E /* colourset.cpp in Sources */,
 				706EFF8226D34740001C950E /* hashtable.cpp in Sources */,
 				706EF1D126D166C5001C950E /* clusterfit.cpp in Sources */,
 				706EF1D226D166C5001C950E /* rangefit.cpp in Sources */,
+				70871D6E27DC767300D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp in Sources */,
 				706EF1D326D166C5001C950E /* alpha.cpp in Sources */,
+				70871D8C27DC767300D0B9E1 /* astcenc_compute_variance.cpp in Sources */,
 				706EF1D426D166C5001C950E /* colourblock.cpp in Sources */,
 				706EF1D526D166C5001C950E /* colourfit.cpp in Sources */,
 				706EFF7A26D34740001C950E /* allocator_eastl.cpp in Sources */,
diff --git a/libkram/astc-encoder/astcenc.h b/libkram/astc-encoder/astcenc.h
index 618ded49..f98fa7c6 100644
--- a/libkram/astc-encoder/astcenc.h
+++ b/libkram/astc-encoder/astcenc.h
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2020-2021 Arm Limited
+// Copyright 2020-2022 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -18,33 +18,29 @@
 /**
  * @brief The core astcenc codec library interface.
  *
- * This interface is the entry point to the core astcenc codec. It aims to be
- * easy to use for non-experts, but also to allow experts to have fine control
- * over the compressor heuristics if needed. The core codec only handles
- * compression and decompression, transferring all inputs and outputs via
- * memory buffers. To catch obvious input/output buffer sizing issues, which
- * can cause security and stability problems, all transfer buffers are
- * explicitly sized.
- *
- * While the aim is that we keep this interface mostly stable, it should be
- * viewed as a mutable interface tied to a specific source version. We are not
- * trying to maintain backwards compatibility across codec versions.
- *
- * The API state management is based around an explicit context object, which
- * is the context for all allocated memory resources needed to compress and
- * decompress a single image. A context can be used to sequentially compress
- * multiple images using the same configuration, allowing setup overheads to be
- * amortized over multiple images, which is particularly important when images
- * are small.
+ * This interface is the entry point to the core astcenc codec. It aims to be easy to use for
+ * non-experts, but also to allow experts to have fine control over the compressor heuristics if
+ * needed. The core codec only handles compression and decompression, transferring all inputs and
+ * outputs via memory buffers. To catch obvious input/output buffer sizing issues, which can cause
+ * security and stability problems, all transfer buffers are explicitly sized.
+ *
+ * While the aim is that we keep this interface mostly stable, it should be viewed as a mutable
+ * interface tied to a specific source version. We are not trying to maintain backwards
+ * compatibility across codec versions.
+ *
+ * The API state management is based around an explicit context object, which is the context for all
+ * allocated memory resources needed to compress and decompress a single image. A context can be
+ * used to sequentially compress multiple images using the same configuration, allowing setup
+ * overheads to be amortized over multiple images, which is particularly important when images are
+ * small.
  *
  * Multi-threading can be used two ways.
  *
- *     * An application wishing to process multiple images in parallel can
- *       allocate multiple contexts and assign each context to a thread.
- *     * An application wishing to process a single image in using multiple
- *       threads can configure the context for multi-threaded use, and invoke
- *       astcenc_compress/decompress() once per thread for faster processing.
- *       The caller is responsible for creating the worker threads, and
+ *     * An application wishing to process multiple images in parallel can allocate multiple
+ *       contexts and assign each context to a thread.
+ *     * An application wishing to process a single image in using multiple threads can configure
+ *       contexts for multi-threaded use, and invoke astcenc_compress/decompress() once per thread
+ *       for faster processing. The caller is responsible for creating the worker threads, and
  *       synchronizing between images.
  *
  * Threading
@@ -56,7 +52,7 @@
  *     astcenc_config my_config;
  *     astcenc_config_init(..., &my_config);
  *
- *     // Power users can tune the tweak <my_config> settings here ...
+ *     // Power users can tweak <my_config> settings here ...
  *
  *     // Allocate working state given config and thread_count
  *     astcenc_context* my_context;
@@ -76,16 +72,17 @@
  * Images
  * ======
  *
- * Images are passed in as an astcenc_image structure. Inputs can be either
- * 8-bit unorm, 16-bit half-float, or 32-bit float, as indicated by the
- * data_type field.
+ * The codec supports compressing single images, which can be either 2D images or volumetric 3D
+ * images. Calling code is responsible for any handling of aggregate types, such as mipmap chains,
+ * texture arrays, or sliced 3D textures
+ *
+ * Images are passed in as an astcenc_image structure. Inputs can be either 8-bit unorm, 16-bit
+ * half-float, or 32-bit float, as indicated by the data_type field.
  *
- * Images can be any dimension; there is no requirement for them to be a
- * multiple of the ASTC block size.
+ * Images can be any dimension; there is no requirement to be a multiple of the ASTC block size.
  *
- * Data is always passed in as 4 color channels, and accessed as an array of
- * 2D image slices. Data within an image slice is always tightly packed without
- * padding. Addresing looks like this:
+ * Data is always passed in as 4 color components, and accessed as an array of 2D image slices. Data
+ * within an image slice is always tightly packed without padding. Addresing looks like this:
  *
  *     data[z_coord][y_coord * x_dim * 4 + x_coord * 4    ]   // Red
  *     data[z_coord][y_coord * x_dim * 4 + x_coord * 4 + 1]   // Green
@@ -95,47 +92,65 @@
  * Common compressor usage
  * =======================
  *
- * One of the most important things for coding image quality is to align the
- * input data channel count with the ASTC color endpoint mode. This avoids
- * wasting bits encoding channels you don't need in the endpoint colors.
+ * One of the most important things for coding image quality is to align the input data component
+ * count with the ASTC color endpoint mode. This avoids wasting bits encoding components you don't
+ * actually need in the endpoint colors.
+ *
+ *         | Input data   | Encoding swizzle | Sampling swizzle |
+ *         | ------------ | ---------------- | ---------------- |
+ *         | 1 component  | RRR1             | .[rgb]           |
+ *         | 2 components | RRRG             | .[rgb]a          |
+ *         | 3 components | RGB1             | .rgb             |
+ *         | 4 components | RGBA             | .rgba            |
+ *
+ * The 1 and 2 component modes recommend sampling from "g" to recover the luminance value as this
+ * provide best compatibility with other texture formats where the green component may be stored at
+ * higher precision than the others, such as RGB565. For ASTC any of the RGB components can be used;
+ * the luminance endpoint component will be returned for all three.
+ *
+ * When using the normal map compression mode ASTC will store normals as a two component X+Y map.
+ * Input images must contain unit-length normalized and should be passed in using a two component
+ * swizzle. The astcenc command line tool defaults to an RRRG swizzle, but some developers prefer
+ * to use GGGR for compatability with BC5n which will work just as well. The Z component can be
+ * recovered programatically in shader code, using knowledge that the vector is unit length and that
+ * Z must be positive for a tangent-space normal map.
  *
- *         | Input data | Encoding swizzle | Sampling swizzle |
- *         | ---------- | ---------------- | ---------------- |
- *         | 1 channel  | RRR1             | .g               |
- *         | 2 channels | RRRG             | .ga              |
- *         | 3 channels | RGB1             | .rgb             |
- *         | 4 channels | RGBA             | .rgba            |
+ * Decompress-only usage
+ * =====================
  *
- * The 1 and 2 channel modes recommend sampling from "g" to recover the
- * luminance value as this provide best compatibility with ETC1. For ETC the
- * luminance data will be stored as RGB565 where the green channel has the
- * best quality. For ASTC any of the rgb channels can be used - the same data
- * will be returned for all three.
+ * For some use cases it is useful to have a cut-down context and/or library which supports
+ * decompression but not compression.
  *
- * When using the normal map compression mode ASTC will store normals as a two
- * channel X+Y map. Input images must contain unit-length normalized and should
- * be passed in using the RRRG swizzle. The Z component can be programatically
- * recovered in shader code, using knowledge that the vector is unit length.
+ * A context can be made decompress-only using the ASTCENC_FLG_DECOMPRESS_ONLY flag when the context
+ * is allocated. These contexts have lower dynamic memory footprint than a full context.
  *
- * Decompress-only usage
- * =====================
+ * The entire library can be made decompress-only by building the files with the define
+ * ASTCENC_DECOMPRESS_ONLY set. In this build the context will be smaller, and the library will
+ * exclude the functionality which is only needed for compression. This reduces the binary size by
+ * ~180KB. For these builds contexts must be created with the ASTCENC_FLG_DECOMPRESS_ONLY flag.
+ *
+ * Note that context structures returned by a library built as decompress-only are incompatible with
+ * a library built with compression included, and visa versa, as they have different sizes and
+ * memory layout.
  *
- * For some use cases it is useful to have a cut-down context and/or library
- * which supports decompression but not compression.
+ * Self-decompress-only usage
+ * ==========================
  *
- * A context can be made decompress-only using the ASTCENC_FLG_DECOMPRESS_ONLY
- * flag when the context is allocated. These contexts have lower dynamic memory
- * footprint than a full context.
+ * ASTC is a complex format with a large search space. The parts of this search space that are
+ * searched is determined by heuristics that are, in part, tied to the quality level used when
+ * creating the context.
  *
- * The entire library can be made decompress-only by building the files with
- * the define ASTCENC_DECOMPRESS_ONLY set. In this build the context will be
- * smaller, and the library will exclude the functionality which is only needed
- * for compression. This reduces the binary size by ~180KB. For these builds
- * contexts must be created with the ASTCENC_FLG_DECOMPRESS_ONLY flag.
+ * A normal context is capable of decompressing any ASTC texture, including those generated by other
+ * compressors with unknown heuristics. This is the most flexible implementation, but forces the
+ * main data tables used by the codec to include entries that are not needed during compressor. This
+ * can slow down compression by ~15%. To optimize this use case the context can be created with the
+ * ASTCENC_FLG_SELF_DECOMPRESS_ONLY flag. This tells the compressor that it will only be asked to
+ * decompress images that it compressed, allowing the size of the context structures to be
+ * substantially reduced with a corresponding boost in performance.
  *
- * Note that context structures returned by a library built as decompress-only
- * are incompatible with a library built with compression included, and visa
- * versa, as they have different sizes and memory layout.
+ * Attempting to decompress an valid image which was created by another compressor, or even another
+ * astcenc compressor configuration, may result in blocks returning as solid magenta or NaN values
+ * if they use unsupported encodings for that configuration.
  */
 
 #ifndef ASTCENC_INCLUDED
@@ -183,7 +198,7 @@ enum astcenc_error {
 	ASTCENC_ERR_BAD_PROFILE,
 	/** @brief The call failed due to an out-of-spec quality value. */
 	ASTCENC_ERR_BAD_QUALITY,
-	/** @brief The call failed due to an out-of-spec channel swizzle. */
+	/** @brief The call failed due to an out-of-spec component swizzle. */
 	ASTCENC_ERR_BAD_SWIZZLE,
 	/** @brief The call failed due to an out-of-spec flag set. */
 	ASTCENC_ERR_BAD_FLAGS,
@@ -227,130 +242,127 @@ static const float ASTCENC_PRE_THOROUGH = 98.0f;
 static const float ASTCENC_PRE_EXHAUSTIVE = 100.0f;
 
 /**
- * @brief A codec channel swizzle selector.
+ * @brief A codec component swizzle selector.
  */
-enum astcenc_swz {
-	/** @brief Select the red channel. */
+enum astcenc_swz
+{
+	/** @brief Select the red component. */
 	ASTCENC_SWZ_R = 0,
-	/** @brief Select the green channel. */
+	/** @brief Select the green component. */
 	ASTCENC_SWZ_G = 1,
-	/** @brief Select the blue channel. */
+	/** @brief Select the blue component. */
 	ASTCENC_SWZ_B = 2,
-	/** @brief Select the alpha channel. */
+	/** @brief Select the alpha component. */
 	ASTCENC_SWZ_A = 3,
-	/** @brief Use a constant zero channel. */
+	/** @brief Use a constant zero component. */
 	ASTCENC_SWZ_0 = 4,
-	/** @brief Use a constant one channel. */
+	/** @brief Use a constant one component. */
 	ASTCENC_SWZ_1 = 5,
-	/** @brief Use a reconstructed normal vector Z channel. */
+	/** @brief Use a reconstructed normal vector Z component. */
 	ASTCENC_SWZ_Z = 6
 };
 
 /**
- * @brief A texel channel swizzle.
+ * @brief A texel component swizzle.
  */
-struct astcenc_swizzle {
-	/** @brief The red channel selector. */
+struct astcenc_swizzle
+{
+	/** @brief The red component selector. */
 	astcenc_swz r;
-	/** @brief The green channel selector. */
+	/** @brief The green component selector. */
 	astcenc_swz g;
-	/** @brief The blue channel selector. */
+	/** @brief The blue component selector. */
 	astcenc_swz b;
-	/** @brief The alpha channel selector. */
+	/** @brief The alpha component selector. */
 	astcenc_swz a;
 };
 
 /**
- * @brief A texel channel data format.
+ * @brief A texel component data format.
  */
-enum astcenc_type {
-	/** @brief Unorm 8-bit data per channel. */
+enum astcenc_type
+{
+	/** @brief Unorm 8-bit data per component. */
 	ASTCENC_TYPE_U8 = 0,
-	/** @brief 16-bit float per channel. */
+	/** @brief 16-bit float per component. */
 	ASTCENC_TYPE_F16 = 1,
-	/** @brief 32-bit float per channel. */
+	/** @brief 32-bit float per component. */
 	ASTCENC_TYPE_F32 = 2
 };
 
 /**
  * @brief Enable normal map compression.
  *
- * Input data will be treated a two channel normal map, storing X and Y, and
- * the codec will optimize for angular error rather than simple linear PSNR.
- * In this mode the input swizzle should be e.g. rrrg (the default ordering for
- * ASTC normals on the command line) or gggr (the ordering used by BC5).
+ * Input data will be treated a two component normal map, storing X and Y, and the codec will
+ * optimize for angular error rather than simple linear PSNR. In this mode the input swizzle should
+ * be e.g. rrrg (the default ordering for ASTC normals on the command line) or gggr (the ordering
+ * used by BC5n).
  */
 static const unsigned int ASTCENC_FLG_MAP_NORMAL          = 1 << 0;
 
 /**
  * @brief Enable mask map compression.
  *
- * Input data will be treated a multi-layer mask map, where is is desirable for
- * the color channels to be treated independently for the purposes of error
- * analysis.
+ * Input data will be treated a multi-layer mask map, where is is desirable for the color components
+ * to be treated independently for the purposes of error analysis.
  */
 static const unsigned int ASTCENC_FLG_MAP_MASK             = 1 << 1;
 
 /**
  * @brief Enable RGBM map compression.
  *
- * Input data will be treated as HDR data that has been stored in an LDR
- * RGBM-encoded wrapper format. Data must be preprocessed by the user to be in
- * LDR RGBM format before calling the compression function, this flag is only
- * used to control the use of RGBM-specific heuristics and error metrics.
- *
- * IMPORTANT: The ASTC format is prone to bad failure modes with unconstrained
- * RGBM data; very small M values can round to zero due to quantization and
- * result in black or white pixels. It is *highly* recommended that the minimum
- * value of M used in the encoding is kept above a lower threshold (try 16 or
- * 32). Applying this threshold reduces the number of very dark colors that can
- * be represented, but is still slightly higher precision than 8-bit LDR.
- *
- * When this flag is set the value of @c rgbm_m_scale in the context must be
- * set to the RGBM scale factor used during reconstruction. This defaults to 5
- * when in RGBM mode.
- *
- * It is recommended that the value of @c cw_a_weight is set to twice the value
- * of the multiplier scale, ensuring that the M value is accurately encoded.
- * This defaults to 10 when in RGBM mode, matching the default scale factor.
+ * Input data will be treated as HDR data that has been stored in an LDR RGBM-encoded wrapper
+ * format. Data must be preprocessed by the user to be in LDR RGBM format before calling the
+ * compression function, this flag is only used to control the use of RGBM-specific heuristics and
+ * error metrics.
+ *
+ * IMPORTANT: The ASTC format is prone to bad failure modes with unconstrained RGBM data; very small
+ * M values can round to zero due to quantization and result in black or white pixels. It is highly
+ * recommended that the minimum value of M used in the encoding is kept above a lower threshold (try
+ * 16 or 32). Applying this threshold reduces the number of very dark colors that can be
+ * represented, but is still higher precision than 8-bit LDR.
+ *
+ * When this flag is set the value of @c rgbm_m_scale in the context must be set to the RGBM scale
+ * factor used during reconstruction. This defaults to 5 when in RGBM mode.
+ *
+ * It is recommended that the value of @c cw_a_weight is set to twice the value of the multiplier
+ * scale, ensuring that the M value is accurately encoded. This defaults to 10 when in RGBM mode,
+ * matching the default scale factor.
  */
 static const unsigned int ASTCENC_FLG_MAP_RGBM             = 1 << 6;
 
 /**
  * @brief Enable alpha weighting.
  *
- * The input alpha value is used for transparency, so errors in the RGB
- * channels are weighted by the transparency level. This allows the codec to
- * more accurately encode the alpha value in areas where the color value
- * is less significant.
+ * The input alpha value is used for transparency, so errors in the RGB components are weighted by
+ * the transparency level. This allows the codec to more accurately encode the alpha value in areas
+ * where the color value is less significant.
  */
 static const unsigned int ASTCENC_FLG_USE_ALPHA_WEIGHT     = 1 << 2;
 
 /**
  * @brief Enable perceptual error metrics.
  *
- * This mode enables perceptual compression mode, which will optimize for
- * perceptual error rather than best PSNR. Only some input modes support
- * perceptual error metrics.
+ * This mode enables perceptual compression mode, which will optimize for perceptual error rather
+ * than best PSNR. Only some input modes support perceptual error metrics.
  */
 static const unsigned int ASTCENC_FLG_USE_PERCEPTUAL       = 1 << 3;
 
 /**
  * @brief Create a decompression-only context.
  *
- * This mode disables support for compression. This enables context allocation
- * to skip some transient buffer allocation, resulting in lower memory usage.
+ * This mode disables support for compression. This enables context allocation to skip some
+ * transient buffer allocation, resulting in lower memory usage.
  */
 static const unsigned int ASTCENC_FLG_DECOMPRESS_ONLY      = 1 << 4;
 
 /**
  * @brief Create a self-decompression context.
  *
- * This mode configures the compressor so that it is only guaranteed to be
- * able to decompress images that were actually created using the current
- * context. This is the common case for compression use cases, and setting this
- * flag enables additional optimizations, but does mean that the context cannot
- * reliably decompress arbitrary ASTC images.
+ * This mode configures the compressor so that it is only guaranteed to be able to decompress images
+ * that were actually created using the current context. This is the common case for compression use
+ * cases, and setting this flag enables additional optimizations, but does mean that the context
+ * cannot reliably decompress arbitrary ASTC images.
  */
 static const unsigned int ASTCENC_FLG_SELF_DECOMPRESS_ONLY = 1 << 5;
 
@@ -369,14 +381,12 @@ static const unsigned int ASTCENC_ALL_FLAGS =
 /**
  * @brief The config structure.
  *
- * This structure will initially be populated by a call to astcenc_config_init,
- * but power users may modify it before calling astcenc_context_alloc. See
- * astcenccli_toplevel_help.cpp for full user documentation of the power-user
- * settings.
+ * This structure will initially be populated by a call to astcenc_config_init, but power users may
+ * modify it before calling astcenc_context_alloc. See astcenccli_toplevel_help.cpp for full user
+ * documentation of the power-user settings.
  *
- * Note for any settings which are associated with a specific color channel,
- * the value in the config applies to the channel that exists after any
- * compression data swizzle is applied.
+ * Note for any settings which are associated with a specific color component, the value in the
+ * config applies to the component that exists after any compression data swizzle is applied.
  */
 struct astcenc_config
 {
@@ -395,65 +405,27 @@ struct astcenc_config
 	/** @brief The ASTC block size Z dimension. */
 	unsigned int block_z;
 
-	/** @brief The size of the texel kernel for error weighting (-v). */
-	unsigned int v_rgba_radius;
-
-	/** @brief The mean and stdev channel mix for error weighting (-v). */
-	float v_rgba_mean_stdev_mix;
-
-	/** @brief The texel RGB power for error weighting (-v). */
-	float v_rgb_power;
-
-	/** @brief The texel RGB base weight for error weighting (-v). */
-	float v_rgb_base;
-
-	/** @brief The texel RGB mean weight for error weighting (-v). */
-	float v_rgb_mean;
-
-	/** @brief The texel RGB stdev for error weighting (-v). */
-	float v_rgb_stdev;
-
-	/** @brief The texel A power for error weighting (-va). */
-	float v_a_power;
-
-	/** @brief The texel A base weight for error weighting (-va). */
-	float v_a_base;
-
-	/** @brief The texel A mean weight for error weighting (-va). */
-	float v_a_mean;
-
-	/** @brief The texel A stdev for error weighting (-va). */
-	float v_a_stdev;
-
-	/** @brief The red channel weight scale for error weighting (-cw). */
+	/** @brief The red component weight scale for error weighting (-cw). */
 	float cw_r_weight;
 
-	/** @brief The green channel weight scale for error weighting (-cw). */
+	/** @brief The green component weight scale for error weighting (-cw). */
 	float cw_g_weight;
 
-	/** @brief The blue channel weight scale for error weighting (-cw). */
+	/** @brief The blue component weight scale for error weighting (-cw). */
 	float cw_b_weight;
 
-	/** @brief The alpha channel weight scale for error weighting (-cw). */
+	/** @brief The alpha component weight scale for error weighting (-cw). */
 	float cw_a_weight;
 
 	/**
 	 * @brief The radius for any alpha-weight scaling (-a).
 	 *
-	 * It is recommended that this is set to 1 when using FLG_USE_ALPHA_WEIGHT
-	 * on a texture that will be sampled using linear texture filtering to
-	 * minimize color bleed out of transparent texels that are adjcent to
-	 * non-transparent texels.
+	 * It is recommended that this is set to 1 when using FLG_USE_ALPHA_WEIGHT on a texture that
+	 * will be sampled using linear texture filtering to minimize color bleed out of transparent
+	 * texels that are adjcent to non-transparent texels.
 	 */
 	unsigned int a_scale_radius;
 
-	/**
-	 * @brief The additional weight for block edge texels (-b).
-	 *
-	 * This is generic tool for reducing artefacts visible on block changes.
-	 */
-	float b_deblock_weight;
-
 	/** @brief The RGBM scale factor for the shared multiplier (-rgbm). */
 	float rgbm_m_scale;
 
@@ -503,40 +475,49 @@ struct astcenc_config
 	/**
 	 * @brief The amount of overshoot needed to early-out mode 0 fast path.
 	 *
-	 * We have a fast-path for mode 0 (1 partition, 1 plane) which uses only
-	 * essential block modes as an initital search. This can short-cut
-	 * compression for simple blocks, but to avoid shortcutting too much we
-	 * force this to overshoot the MSE threshold needed to hit the block-local
-	 * db_limit e.g. 1.0 = no overshoot, 2.0 = need half the error to trigger.
+	 * We have a fast-path for mode 0 (1 partition, 1 plane) which uses only essential block modes
+	 * as an initital search. This can short-cut compression for simple blocks, but to avoid
+	 * shortcutting too much we* force this to overshoot the MSE threshold needed to hit the
+	 * block-local db_limit e.g. 1.0 = no overshoot, 2.0 = need half the error to trigger.
 	 */
 	float tune_mode0_mse_overshoot;
 
 	/**
 	 * @brief The amount of overshoot needed to early-out refinement.
 	 *
-	 * The codec will refine block candidates iteratively to improve the
-	 * encoding, based on the @c tune_refinement_limit count. Earlier
-	 * implementations will use all refinement iterations, even if the target
-	 * threshold is reached. This tuning parameter allows an early out, but
-	 * with an overshoot MSE threshold. Setting this to 1.0 will early-out as
-	 * soon as the target is hit, but does reduce image quality vs the
-	 * default behavior of over-refinement.
+	 * The codec will refine block candidates iteratively to improve the encoding, based on the
+	 * @c tune_refinement_limit count. Earlier implementations will use all refinement iterations,
+	 * even if the target threshold is reached. This tuning parameter allows an early out, but with
+	 * an overshoot MSE threshold. Setting this to 1.0 will early-out as soon as the target is hit,
+	 * but does reduce image quality vs the default behavior of over-refinement.
 	 */
 	float tune_refinement_mse_overshoot;
 
 	/**
-	 * @brief The threshold for skipping 3+ partitions (-partitionearlylimit).
+	 * @brief The threshold for skipping 2.2/3.1/3.2/4.1 trials (-2partitionlimitfactor).
 	 *
-	 * This option is ineffective for normal maps.
+	 * This option is further scaled for normal maps, so it skips less often.
+	 */
+	float tune_2_partition_early_out_limit_factor;
+
+	/**
+	 * @brief The threshold for skipping 3.2/4.1 trials (-3partitionlimitfactor).
+	 *
+	 * This option is further scaled for normal maps, so it skips less often.
 	 */
-	float tune_partition_early_out_limit;
+	float tune_3_partition_early_out_limit_factor;
 
 	/**
-	 * @brief The threshold for skipping 2 weight planess (-planecorlimit).
+	 * @brief The threshold for skipping two weight planes (-2planelimitcorrelation).
 	 *
 	 * This option is ineffective for normal maps.
 	 */
-	float tune_two_plane_early_out_limit;
+	float tune_2_plane_early_out_limit_correlation;
+
+	/**
+	 * @brief The threshold below which (inclusive) we stop testing low/high/low+high cutoffs.
+	 */
+	unsigned int tune_low_weight_count_limit;
 
 #if defined(ASTCENC_DIAGNOSTICS)
 	/**
@@ -566,7 +547,7 @@ struct astcenc_image
 	/** @brief The Z dimension of the image, in texels. */
 	unsigned int dim_z;
 
-	/** @brief The data type per channel. */
+	/** @brief The data type per component. */
 	astcenc_type data_type;
 
 	/** @brief The array of 2D slices, of length @c dim_z. */
@@ -576,9 +557,8 @@ struct astcenc_image
 /**
  * @brief A block encoding metadata query result.
  *
- * If the block is an error block or a constant color block or an error block
- * all fields other than the profile, block dimensions, and error/constant
- * indicator will be zero.
+ * If the block is an error block or a constant color block or an error block all fields other than
+ * the profile, block dimensions, and error/constant indicator will be zero.
  */
 struct astcenc_block_info
 {
@@ -586,16 +566,16 @@ struct astcenc_block_info
 	astcenc_profile profile;
 
 	/** @brief The number of texels in the X dimension. */
-	int block_x;
+	unsigned int block_x;
 
 	/** @brief The number of texels in the Y dimension. */
-	int block_y;
+	unsigned int block_y;
 
 	/** @brief The number of texel in the Z dimension. */
-	int block_z;
+	unsigned int block_z;
 
 	/** @brief The number of texels in the block. */
-	int texel_count;
+	unsigned int texel_count;
 
 	/** @brief True if this block is an error block. */
 	bool is_error_block;
@@ -610,31 +590,31 @@ struct astcenc_block_info
 	bool is_dual_plane_block;
 
 	/** @brief The number of partitions if not constant color. */
-	int partition_count;
+	unsigned int partition_count;
 
 	/** @brief The partition index if 2 - 4 partitions used. */
-	int partition_index;
+	unsigned int partition_index;
 
 	/** @brief The component index of the second plane if dual plane. */
-	int dual_plane_component;
+	unsigned int dual_plane_component;
 
 	/** @brief The color endpoint encoding mode for each partition. */
-	int color_endpoint_modes[4];
+	unsigned int color_endpoint_modes[4];
 
 	/** @brief The number of color endpoint quantization levels. */
-	int color_level_count;
+	unsigned int color_level_count;
 
 	/** @brief The number of weight quantization levels. */
-	int weight_level_count;
+	unsigned int weight_level_count;
 
 	/** @brief The number of weights in the X dimension. */
-	int weight_x;
+	unsigned int weight_x;
 
 	/** @brief The number of weights in the Y dimension. */
-	int weight_y;
+	unsigned int weight_y;
 
 	/** @brief The number of weights in the Z dimension. */
-	int weight_z;
+	unsigned int weight_z;
 
 	/** @brief The unpacked color endpoints for each partition. */
 	float color_endpoints[4][2][4];
@@ -652,8 +632,7 @@ struct astcenc_block_info
 /**
  * Populate a codec config based on default settings.
  *
- * Power users can edit the returned config struct to apply manual fine tuning
- * before allocating the context.
+ * Power users can edit the returned config struct to fine tune before allocating the context.
  *
  * @param      profile   Color profile.
  * @param      block_x   ASTC block size X dimension.
@@ -663,10 +642,10 @@ struct astcenc_block_info
  *                       @c ASTCENC_PRE_* value, or a effort level between 0
  *                       and 100. Performance is not linear between 0 and 100.
 
- * @param      flags     A valid set of ASTCENC_FLG_* flag bits.
+ * @param      flags     A valid set of @c ASTCENC_FLG_* flag bits.
  * @param[out] config    Output config struct to populate.
  *
- * @return ASTCENC_SUCCESS on success, or an error if the inputs are invalid
+ * @return @c ASTCENC_SUCCESS on success, or an error if the inputs are invalid
  * either individually, or in combination.
  */
 ASTCENC_PUBLIC astcenc_error astcenc_config_init(
@@ -681,22 +660,20 @@ ASTCENC_PUBLIC astcenc_error astcenc_config_init(
 /**
  * @brief Allocate a new codec context based on a config.
  *
- * This function allocates all of the memory resources and threads needed by
- * the codec. This can be slow, so it is recommended that contexts are reused
- * to serially compress or decompress multiple images to amortize setup cost.
+ * This function allocates all of the memory resources and threads needed by the codec. This can be
+ * slow, so it is recommended that contexts are reused to serially compress or decompress multiple
+ * images to amortize setup cost.
  *
- * Contexts can be allocated to support only decompression by setting the
- * ASTCENC_FLG_DECOMPRESS_ONLY flag when creating the configuration. These
- * contexts must be allocated with a thread count of 1 (decompression is always
- * single threaded), and the compression functions will fail if invoked. For
- * a decompress-only library build the ASTCENC_FLG_DECOMPRESS_ONLY flag must
- * be set when creating ay context.
+ * Contexts can be allocated to support only decompression using the @c ASTCENC_FLG_DECOMPRESS_ONLY
+ * flag when creating the configuration. The compression functions will fail if invoked. For a
+ * decompress-only library build the @c ASTCENC_FLG_DECOMPRESS_ONLY flag must be set when creating
+ * any context.
  *
  * @param[in]  config         Codec config.
  * @param      thread_count   Thread count to configure for.
  * @param[out] context        Location to store an opaque context pointer.
  *
- * @return ASTCENC_SUCCESS on success, or an error if context creation failed.
+ * @return @c ASTCENC_SUCCESS on success, or an error if context creation failed.
  */
 ASTCENC_PUBLIC astcenc_error astcenc_context_alloc(
 	const astcenc_config* config,
@@ -708,23 +685,23 @@ ASTCENC_PUBLIC astcenc_error astcenc_context_alloc(
  *
  * A single context can only compress or decompress a single image at a time.
  *
- * For a context configured for multi-threading, any set of the N threads can
- * call this function. Work will be dynamically scheduled across the threads
- * available. Each thread must have a unique thread_index.
+ * For a context configured for multi-threading, any set of the N threads can call this function.
+ * Work will be dynamically scheduled across the threads available. Each thread must have a unique
+ * @c thread_index.
  *
  * @param         context        Codec context.
  * @param[in,out] image          An input image, in 2D slices.
- * @param         swizzle        Compression data swizzle.
+ * @param         swizzle        Compression data swizzle, applied before compression.
  * @param[out]    data_out       Pointer to output data array.
  * @param         data_len       Length of the output data array.
  * @param         thread_index   Thread index [0..N-1] of calling thread.
  *
- * @return ASTCENC_SUCCESS on success, or an error if compression failed.
+ * @return @c ASTCENC_SUCCESS on success, or an error if compression failed.
  */
 ASTCENC_PUBLIC astcenc_error astcenc_compress_image(
 	astcenc_context* context,
 	astcenc_image* image,
-	astcenc_swizzle swizzle,
+	const astcenc_swizzle* swizzle,
 	uint8_t* data_out,
 	size_t data_len,
 	unsigned int thread_index);
@@ -732,14 +709,15 @@ ASTCENC_PUBLIC astcenc_error astcenc_compress_image(
 /**
  * @brief Reset the codec state for a new compression.
  *
- * The caller is responsible for synchronizing threads in the worker thread
- * pool. This function must only be called when all threads have exited the
- * astcenc_compress_image() function for image N, but before any thread enters
- * it for image N + 1.
+ * The caller is responsible for synchronizing threads in the worker thread pool. This function must
+ * only be called when all threads have exited the @c astcenc_compress_image() function for image N,
+ * but before any thread enters it for image N + 1.
+ *
+ * Calling this is not required (but won't hurt), if the context is created for single threaded use.
  *
  * @param context   Codec context.
  *
- * @return ASTCENC_SUCCESS on success, or an error if reset failed.
+ * @return @c ASTCENC_SUCCESS on success, or an error if reset failed.
  */
 ASTCENC_PUBLIC astcenc_error astcenc_compress_reset(
 	astcenc_context* context);
@@ -751,30 +729,31 @@ ASTCENC_PUBLIC astcenc_error astcenc_compress_reset(
  * @param[in]     data           Pointer to compressed data.
  * @param         data_len       Length of the compressed data, in bytes.
  * @param[in,out] image_out      Output image.
- * @param         swizzle        Decompression data swizzle.
+ * @param         swizzle        Decompression data swizzle, applied after decompression.
  * @param         thread_index   Thread index [0..N-1] of calling thread.
  *
- * @return ASTCENC_SUCCESS on success, or an error if decompression failed.
+ * @return @c ASTCENC_SUCCESS on success, or an error if decompression failed.
  */
 ASTCENC_PUBLIC astcenc_error astcenc_decompress_image(
 	astcenc_context* context,
 	const uint8_t* data,
 	size_t data_len,
 	astcenc_image* image_out,
-	astcenc_swizzle swizzle,
+	const astcenc_swizzle* swizzle,
 	unsigned int thread_index);
 
 /**
  * @brief Reset the codec state for a new decompression.
  *
- * The caller is responsible for synchronizing threads in the worker thread
- * pool. This function must only be called when all threads have exited the
- * astcenc_decompress_image() function for image N, but before any thread
- * enters it for image N + 1.
+ * The caller is responsible for synchronizing threads in the worker thread pool. This function must
+ * only be called when all threads have exited the @c astcenc_decompress_image() function for image
+ * N, but before any thread enters it for image N + 1.
+ *
+ * Calling this is not required (but won't hurt), if the context is created for single threaded use.
  *
  * @param context   Codec context.
  *
- * @return ASTCENC_SUCCESS on success, or an error if reset failed.
+ * @return @c ASTCENC_SUCCESS on success, or an error if reset failed.
  */
 ASTCENC_PUBLIC astcenc_error astcenc_decompress_reset(
 	astcenc_context* context);
@@ -790,16 +769,16 @@ ASTCENC_PUBLIC void astcenc_context_free(
 /**
  * @brief Provide a high level summary of a block's encoding.
  *
- * This feature is primarily useful for codec developers but may be useful
- * for developers building advanced content packaging pipelines.
+ * This feature is primarily useful for codec developers but may be useful for developers building
+ * advanced content packaging pipelines.
  *
  * @param context   Codec context.
  * @param data      One block of compressesd ASTC data.
  * @param info      The output info structure to populate.
  *
- * @return ASTCENC_SUCCESS if the block was decoded, or an error otherwise.
- *         Note that this function will return success even if the block itself
- *         was an error block encoding, as the decode was correctly handled.
+ * @return @c ASTCENC_SUCCESS if the block was decoded, or an error otherwise. Note that this
+ *         function will return success even if the block itself was an error block encoding, as the
+ *         decode was correctly handled.
  */
 ASTCENC_PUBLIC astcenc_error astcenc_get_block_info(
 	astcenc_context* context,
diff --git a/libkram/astc-encoder/astcenc_averages_and_directions.cpp b/libkram/astc-encoder/astcenc_averages_and_directions.cpp
index 048f0881..3002928d 100644
--- a/libkram/astc-encoder/astcenc_averages_and_directions.cpp
+++ b/libkram/astc-encoder/astcenc_averages_and_directions.cpp
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2021 Arm Limited
+// Copyright 2011-2022 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -24,62 +24,45 @@
 
 #include <cassert>
 
-#ifdef DEBUG_CAPTURE_NAN
-	#ifndef _GNU_SOURCE
-		#define _GNU_SOURCE
-	#endif
-
-	#include <fenv.h>
-#endif
-
-// For a full block, functions to compute averages and dominant directions. The
-// averages and directions are computed separately for each partition.
-// We have separate versions for blocks with and without alpha, since the
-// processing for blocks with alpha is significantly more expensive. The
-// direction vectors it produces are NOT normalized.
+/* See header for documentation. */
 void compute_avgs_and_dirs_4_comp(
-	const partition_info* pt,
-	const imageblock* blk,
-	const error_weight_block* ewb,
-	partition_metrics pms[4]
+	const partition_info& pi,
+	const image_block& blk,
+	partition_metrics pm[BLOCK_MAX_PARTITIONS]
 ) {
-	int partition_count = pt->partition_count;
+	float texel_weight = hadd_s(blk.channel_weight) / 4.0f;
+
+	int partition_count = pi.partition_count;
 	promise(partition_count > 0);
 
 	for (int partition = 0; partition < partition_count; partition++)
 	{
-		const uint8_t *weights = pt->texels_of_partition[partition];
+		const uint8_t *texel_indexes = pi.texels_of_partition[partition];
+		unsigned int texel_count = pi.partition_texel_count[partition];
+		promise(texel_count > 0);
 
+		// TODO: Try gathers?
 		vfloat4 base_sum = vfloat4::zero();
-		float partition_weight = 0.0f;
 
-		int texel_count = pt->partition_texel_count[partition];
-		promise(texel_count > 0);
-
-		for (int i = 0; i < texel_count; i++)
+		for (unsigned int i = 0; i < texel_count; i++)
 		{
-			int iwt = weights[i];
-			float weight = ewb->texel_weight[iwt];
-			vfloat4 texel_datum = blk->texel(iwt);
-
-			partition_weight += weight;
-			base_sum = base_sum + texel_datum * weight;
+			int iwt = texel_indexes[i];
+			base_sum += blk.texel(iwt);
 		}
 
-		vfloat4 average = base_sum * (1.0f / astc::max(partition_weight, 1e-7f));
-		pms[partition].avg = average * pms[partition].color_scale;
+		vfloat4 average = base_sum / static_cast<float>(texel_count);
+		pm[partition].avg = average;
 
 		vfloat4 sum_xp = vfloat4::zero();
 		vfloat4 sum_yp = vfloat4::zero();
 		vfloat4 sum_zp = vfloat4::zero();
 		vfloat4 sum_wp = vfloat4::zero();
 
-		for (int i = 0; i < texel_count; i++)
+		for (unsigned int i = 0; i < texel_count; i++)
 		{
-			int iwt = weights[i];
-			float weight = ewb->texel_weight[iwt];
-			vfloat4 texel_datum = blk->texel(iwt);
-			texel_datum = (texel_datum - average) * weight;
+			unsigned int iwt = texel_indexes[i];
+			vfloat4 texel_datum = blk.texel(iwt);
+			texel_datum = (texel_datum - average) * texel_weight;
 
 			vfloat4 zero = vfloat4::zero();
 
@@ -121,90 +104,77 @@ void compute_avgs_and_dirs_4_comp(
 			best_vector = sum_wp;
 		}
 
-		pms[partition].dir = best_vector;
+		pm[partition].dir = best_vector;
 	}
 }
 
+/* See header for documentation. */
 void compute_avgs_and_dirs_3_comp(
-	const partition_info* pt,
-	const imageblock* blk,
-	const error_weight_block* ewb,
-	int omitted_component,
-	partition_metrics pm[4]
+	const partition_info& pi,
+	const image_block& blk,
+	unsigned int omitted_component,
+	partition_metrics pm[BLOCK_MAX_PARTITIONS]
 ) {
-	const float *texel_weights;
-	const float* data_vr = blk->data_r;
-	const float* data_vg = blk->data_g;
-	const float* data_vb = blk->data_b;
+	float texel_weight = hadd_s(blk.channel_weight.swz<0, 1, 2>()) / 3.0f;
+
+	const float* data_vr = blk.data_r;
+	const float* data_vg = blk.data_g;
+	const float* data_vb = blk.data_b;
 
 	if (omitted_component == 0)
 	{
-		texel_weights = ewb->texel_weight_gba;
-		data_vr = blk->data_g;
-		data_vg = blk->data_b;
-		data_vb = blk->data_a;
+		texel_weight = hadd_s(blk.channel_weight.swz<1, 2, 3>()) / 3.0f;
+
+		data_vr = blk.data_g;
+		data_vg = blk.data_b;
+		data_vb = blk.data_a;
 	}
 	else if (omitted_component == 1)
 	{
-		texel_weights = ewb->texel_weight_rba;
-		data_vg = blk->data_b;
-		data_vb = blk->data_a;
+		texel_weight = hadd_s(blk.channel_weight.swz<0, 2, 3>()) / 3.0f;
+
+		data_vg = blk.data_b;
+		data_vb = blk.data_a;
 	}
 	else if (omitted_component == 2)
 	{
-		texel_weights = ewb->texel_weight_rga;
-		data_vb = blk->data_a;
-	}
-	else
-	{
-		assert(omitted_component == 3);
-		texel_weights = ewb->texel_weight_rgb;
+		texel_weight = hadd_s(blk.channel_weight.swz<0, 1, 3>()) / 3.0f;
+
+		data_vb = blk.data_a;
 	}
 
-	int partition_count = pt->partition_count;
+	unsigned int partition_count = pi.partition_count;
 	promise(partition_count > 0);
 
-	for (int partition = 0; partition < partition_count; partition++)
+	for (unsigned int partition = 0; partition < partition_count; partition++)
 	{
-		const uint8_t *weights = pt->texels_of_partition[partition];
-
-		vfloat4 base_sum = vfloat4::zero();
-		float partition_weight = 0.0f;
-
-		int texel_count = pt->partition_texel_count[partition];
+		const uint8_t *texel_indexes = pi.texels_of_partition[partition];
+		unsigned int texel_count = pi.partition_texel_count[partition];
 		promise(texel_count > 0);
 
-		for (int i = 0; i < texel_count; i++)
+		vfloat4 base_sum = vfloat4::zero();
+		for (unsigned int i = 0; i < texel_count; i++)
 		{
-			int iwt = weights[i];
-			float weight = texel_weights[iwt];
-			vfloat4 texel_datum(data_vr[iwt],
-			                    data_vg[iwt],
-			                    data_vb[iwt],
-			                    0.0f);
-
-			partition_weight += weight;
-			base_sum = base_sum + texel_datum * weight;
+			unsigned int iwt = texel_indexes[i];
+			base_sum += vfloat3(data_vr[iwt], data_vg[iwt], data_vb[iwt]);
 		}
 
-		vfloat4 csf = pm[partition].color_scale;
-
-		vfloat4 average = base_sum * (1.0f / astc::max(partition_weight, 1e-7f));
-		pm[partition].avg = average * csf;
+		vfloat4 average = base_sum / static_cast<float>(texel_count);
+		pm[partition].avg = average;
 
 		vfloat4 sum_xp = vfloat4::zero();
 		vfloat4 sum_yp = vfloat4::zero();
 		vfloat4 sum_zp = vfloat4::zero();
 
-		for (int i = 0; i < texel_count; i++)
+		for (unsigned int i = 0; i < texel_count; i++)
 		{
-			int iwt = weights[i];
-			float weight = texel_weights[iwt];
-			vfloat4 texel_datum = vfloat4(data_vr[iwt],
+			unsigned int iwt = texel_indexes[i];
+
+			vfloat4 texel_datum = vfloat3(data_vr[iwt],
 			                              data_vg[iwt],
-			                              data_vb[iwt],
-			                              0.0f);
-			texel_datum = (texel_datum - average) * weight;
+			                              data_vb[iwt]);
+
+			texel_datum = (texel_datum - average) * texel_weight;
 
 			vfloat4 zero = vfloat4::zero();
 
@@ -236,102 +206,161 @@ void compute_avgs_and_dirs_3_comp(
 			best_vector = sum_zp;
 		}
 
-		if (dot3_s(best_vector, best_vector) < 1e-18f)
+		pm[partition].dir = best_vector;
+	}
+}
+
+/* See header for documentation. */
+void compute_avgs_and_dirs_3_comp_rgb(
+	const partition_info& pi,
+	const image_block& blk,
+	partition_metrics pm[BLOCK_MAX_PARTITIONS]
+) {
+	float texel_weight = hadd_s(blk.channel_weight.swz<0, 1, 2>()) / 3;
+
+	unsigned int partition_count = pi.partition_count;
+	promise(partition_count > 0);
+
+	for (unsigned int partition = 0; partition < partition_count; partition++)
+	{
+		const uint8_t *texel_indexes = pi.texels_of_partition[partition];
+		unsigned int texel_count = pi.partition_texel_count[partition];
+		promise(texel_count > 0);
+
+		vfloat4 base_sum = vfloat4::zero();
+		for (unsigned int i = 0; i < texel_count; i++)
 		{
-			best_vector = vfloat4(1.0f, 1.0f, 1.0f, 0.0f);
+			unsigned int iwt = texel_indexes[i];
+			base_sum += blk.texel3(iwt);
+		}
+
+		vfloat4 average = base_sum / static_cast<float>(texel_count);
+		pm[partition].avg = average;
+
+		vfloat4 sum_xp = vfloat4::zero();
+		vfloat4 sum_yp = vfloat4::zero();
+		vfloat4 sum_zp = vfloat4::zero();
+
+		for (unsigned int i = 0; i < texel_count; i++)
+		{
+			unsigned int iwt = texel_indexes[i];
+
+			vfloat4 texel_datum = blk.texel3(iwt);
+
+			texel_datum = (texel_datum - average) * texel_weight;
+
+			vfloat4 zero = vfloat4::zero();
+
+			vmask4 tdm0 = vfloat4(texel_datum.lane<0>()) > zero;
+			sum_xp += select(zero, texel_datum, tdm0);
+
+			vmask4 tdm1 = vfloat4(texel_datum.lane<1>()) > zero;
+			sum_yp += select(zero, texel_datum, tdm1);
+
+			vmask4 tdm2 = vfloat4(texel_datum.lane<2>()) > zero;
+			sum_zp += select(zero, texel_datum, tdm2);
+		}
+
+		float prod_xp = dot3_s(sum_xp, sum_xp);
+		float prod_yp = dot3_s(sum_yp, sum_yp);
+		float prod_zp = dot3_s(sum_zp, sum_zp);
+
+		vfloat4 best_vector = sum_xp;
+		float best_sum = prod_xp;
+
+		if (prod_yp > best_sum)
+		{
+			best_vector = sum_yp;
+			best_sum = prod_yp;
+		}
+
+		if (prod_zp > best_sum)
+		{
+			best_vector = sum_zp;
 		}
 
 		pm[partition].dir = best_vector;
 	}
 }
 
+/* See header for documentation. */
 void compute_avgs_and_dirs_2_comp(
-	const partition_info* pt,
-	const imageblock* blk,
-	const error_weight_block* ewb,
-	const float2* color_scalefactors,
-	int component1,
-	int component2,
-	float2* averages,
-	float2* directions
+	const partition_info& pt,
+	const image_block& blk,
+	unsigned int component1,
+	unsigned int component2,
+	partition_metrics pm[BLOCK_MAX_PARTITIONS]
 ) {
-	const float *texel_weights;
+	float texel_weight;
+
 	const float* data_vr = nullptr;
 	const float* data_vg = nullptr;
 
 	if (component1 == 0 && component2 == 1)
 	{
-		texel_weights = ewb->texel_weight_rg;
-		data_vr = blk->data_r;
-		data_vg = blk->data_g;
+		texel_weight = hadd_s(blk.channel_weight.swz<0, 1>()) / 2.0f;
+
+		data_vr = blk.data_r;
+		data_vg = blk.data_g;
 	}
 	else if (component1 == 0 && component2 == 2)
 	{
-		texel_weights = ewb->texel_weight_rb;
-		data_vr = blk->data_r;
-		data_vg = blk->data_b;
+		texel_weight = hadd_s(blk.channel_weight.swz<0, 2>()) / 2.0f;
+
+		data_vr = blk.data_r;
+		data_vg = blk.data_b;
 	}
 	else // (component1 == 1 && component2 == 2)
 	{
 		assert(component1 == 1 && component2 == 2);
-		texel_weights = ewb->texel_weight_gb;
-		data_vr = blk->data_g;
-		data_vg = blk->data_b;
+
+		texel_weight = hadd_s(blk.channel_weight.swz<1, 2>()) / 2.0f;
+
+		data_vr = blk.data_g;
+		data_vg = blk.data_b;
 	}
 
-	int partition_count = pt->partition_count;
+	unsigned int partition_count = pt.partition_count;
 	promise(partition_count > 0);
 
-	for (int partition = 0; partition < partition_count; partition++)
+	for (unsigned int partition = 0; partition < partition_count; partition++)
 	{
-		const uint8_t *weights = pt->texels_of_partition[partition];
-
-		float2 base_sum = float2(0.0f);
-		float partition_weight = 0.0f;
-
-		int texel_count = pt->partition_texel_count[partition];
+		const uint8_t *texel_indexes = pt.texels_of_partition[partition];
+		unsigned int texel_count = pt.partition_texel_count[partition];
 		promise(texel_count > 0);
 
-		for (int i = 0; i < texel_count; i++)
+		vfloat4 base_sum = vfloat4::zero();
+		for (unsigned int i = 0; i < texel_count; i++)
 		{
-			int iwt = weights[i];
-			float weight = texel_weights[iwt];
-			float2 texel_datum = float2(data_vr[iwt], data_vg[iwt]) * weight;
-			partition_weight += weight;
-
-			base_sum = base_sum + texel_datum;
+			unsigned int iwt = texel_indexes[i];
+			base_sum += vfloat2(data_vr[iwt], data_vg[iwt]);
 		}
 
-		float2 csf = color_scalefactors[partition];
-
-		float2 average = base_sum * (1.0f / astc::max(partition_weight, 1e-7f));
-		averages[partition] = average * float2(csf.r, csf.g);
+		vfloat4 average = base_sum / static_cast<float>(texel_count);
+		pm[partition].avg = average;
 
-		float2 sum_xp = float2(0.0f);
-		float2 sum_yp = float2(0.0f);
+		vfloat4 sum_xp = vfloat4::zero();
+		vfloat4 sum_yp = vfloat4::zero();
 
-		for (int i = 0; i < texel_count; i++)
+		for (unsigned int i = 0; i < texel_count; i++)
 		{
-			int iwt = weights[i];
-			float weight = texel_weights[iwt];
-			float2 texel_datum = float2(data_vr[iwt], data_vg[iwt]);
-			texel_datum = (texel_datum - average) * weight;
-
-			if (texel_datum.r > 0.0f)
-			{
-				sum_xp = sum_xp + texel_datum;
-			}
-
-			if (texel_datum.g > 0.0f)
-			{
-				sum_yp = sum_yp + texel_datum;
-			}
+			unsigned int iwt = texel_indexes[i];
+			vfloat4 texel_datum = vfloat2(data_vr[iwt], data_vg[iwt]);
+			texel_datum = (texel_datum - average) * texel_weight;
+
+			vfloat4 zero = vfloat4::zero();
+
+			vmask4 tdm0 = vfloat4(texel_datum.lane<0>()) > zero;
+			sum_xp += select(zero, texel_datum, tdm0);
+
+			vmask4 tdm1 = vfloat4(texel_datum.lane<1>()) > zero;
+			sum_yp += select(zero, texel_datum, tdm1);
 		}
 
-		float prod_xp = dot(sum_xp, sum_xp);
-		float prod_yp = dot(sum_yp, sum_yp);
+		float prod_xp = dot_s(sum_xp, sum_xp);
+		float prod_yp = dot_s(sum_yp, sum_yp);
 
-		float2 best_vector = sum_xp;
+		vfloat4 best_vector = sum_xp;
 		float best_sum = prod_xp;
 
 		if (prod_yp > best_sum)
@@ -339,30 +368,30 @@ void compute_avgs_and_dirs_2_comp(
 			best_vector = sum_yp;
 		}
 
-		directions[partition] = best_vector;
+		pm[partition].dir = best_vector;
 	}
 }
 
+/* See header for documentation. */
 void compute_error_squared_rgba(
-	const partition_info* pt,
-	const imageblock* blk,
-	const error_weight_block* ewb,
-	const processed_line4* uncor_plines,
-	const processed_line4* samec_plines,
-	float* uncor_lengths,
-	float* samec_lengths,
-	float* uncor_errors,
-	float* samec_errors
+	const partition_info& pi,
+	const image_block& blk,
+	const processed_line4 uncor_plines[BLOCK_MAX_PARTITIONS],
+	const processed_line4 samec_plines[BLOCK_MAX_PARTITIONS],
+	float uncor_lengths[BLOCK_MAX_PARTITIONS],
+	float samec_lengths[BLOCK_MAX_PARTITIONS],
+	float& uncor_error,
+	float& samec_error
 ) {
-	float uncor_errorsum = 0.0f;
-	float samec_errorsum = 0.0f;
-
-	int partition_count = pt->partition_count;
+	unsigned int partition_count = pi.partition_count;
 	promise(partition_count > 0);
 
-	for (int partition = 0; partition < partition_count; partition++)
+	uncor_error = 0.0f;
+	samec_error = 0.0f;
+
+	for (unsigned int partition = 0; partition < partition_count; partition++)
 	{
-		const uint8_t *weights = pt->texels_of_partition[partition];
+		const uint8_t *texel_indexes = pi.texels_of_partition[partition];
 
 		float uncor_loparam = 1e10f;
 		float uncor_hiparam = -1e10f;
@@ -373,11 +402,9 @@ void compute_error_squared_rgba(
 		processed_line4 l_uncor = uncor_plines[partition];
 		processed_line4 l_samec = samec_plines[partition];
 
-		int texel_count = pt->partition_texel_count[partition];
+		unsigned int texel_count = pi.partition_texel_count[partition];
 		promise(texel_count > 0);
 
-		int i = 0;
-
 		// Vectorize some useful scalar inputs
 		vfloat l_uncor_bs0(l_uncor.bs.lane<0>());
 		vfloat l_uncor_bs1(l_uncor.bs.lane<1>());
@@ -389,11 +416,6 @@ void compute_error_squared_rgba(
 		vfloat l_uncor_amod2(l_uncor.amod.lane<2>());
 		vfloat l_uncor_amod3(l_uncor.amod.lane<3>());
 
-		vfloat l_uncor_bis0(l_uncor.bis.lane<0>());
-		vfloat l_uncor_bis1(l_uncor.bis.lane<1>());
-		vfloat l_uncor_bis2(l_uncor.bis.lane<2>());
-		vfloat l_uncor_bis3(l_uncor.bis.lane<3>());
-
 		vfloat l_samec_bs0(l_samec.bs.lane<0>());
 		vfloat l_samec_bs1(l_samec.bs.lane<1>());
 		vfloat l_samec_bs2(l_samec.bs.lane<2>());
@@ -401,11 +423,6 @@ void compute_error_squared_rgba(
 
 		assert(all(l_samec.amod == vfloat4(0.0f)));
 
-		vfloat l_samec_bis0(l_samec.bis.lane<0>());
-		vfloat l_samec_bis1(l_samec.bis.lane<1>());
-		vfloat l_samec_bis2(l_samec.bis.lane<2>());
-		vfloat l_samec_bis3(l_samec.bis.lane<3>());
-
 		vfloat uncor_loparamv(1e10f);
 		vfloat uncor_hiparamv(-1e10f);
 		vfloat4 uncor_errorsumv = vfloat4::zero();
@@ -414,20 +431,24 @@ void compute_error_squared_rgba(
 		vfloat samec_hiparamv(-1e10f);
 		vfloat4 samec_errorsumv = vfloat4::zero();
 
-		int clipped_texel_count = round_down_to_simd_multiple_vla(texel_count);
-		for (/* */; i < clipped_texel_count; i += ASTCENC_SIMD_WIDTH)
-		{
-			vint texel_idxs(&(weights[i]));
+		vfloat ew_r(blk.channel_weight.lane<0>());
+		vfloat ew_g(blk.channel_weight.lane<1>());
+		vfloat ew_b(blk.channel_weight.lane<2>());
+		vfloat ew_a(blk.channel_weight.lane<3>());
 
-			vfloat data_r = gatherf(blk->data_r, texel_idxs);
-			vfloat data_g = gatherf(blk->data_g, texel_idxs);
-			vfloat data_b = gatherf(blk->data_b, texel_idxs);
-			vfloat data_a = gatherf(blk->data_a, texel_idxs);
+		// This implementation over-shoots, but this is safe as we initialize the texel_indexes
+		// array to extend the last value. This means min/max are not impacted, but we need to mask
+		// out the dummy values when we compute the line weighting.
+		vint lane_ids = vint::lane_id();
+		for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
+		{
+			vmask mask = lane_ids < vint(texel_count);
+			vint texel_idxs(&(texel_indexes[i]));
 
-			vfloat ew_r = gatherf(ewb->texel_weight_r, texel_idxs);
-			vfloat ew_g = gatherf(ewb->texel_weight_g, texel_idxs);
-			vfloat ew_b = gatherf(ewb->texel_weight_b, texel_idxs);
-			vfloat ew_a = gatherf(ewb->texel_weight_a, texel_idxs);
+			vfloat data_r = gatherf(blk.data_r, texel_idxs);
+			vfloat data_g = gatherf(blk.data_g, texel_idxs);
+			vfloat data_b = gatherf(blk.data_b, texel_idxs);
+			vfloat data_a = gatherf(blk.data_a, texel_idxs);
 
 			vfloat uncor_param  = (data_r * l_uncor_bs0)
 			                    + (data_g * l_uncor_bs1)
@@ -438,20 +459,21 @@ void compute_error_squared_rgba(
 			uncor_hiparamv = max(uncor_param, uncor_hiparamv);
 
 			vfloat uncor_dist0 = (l_uncor_amod0 - data_r)
-			                   + (uncor_param * l_uncor_bis0);
+			                   + (uncor_param * l_uncor_bs0);
 			vfloat uncor_dist1 = (l_uncor_amod1 - data_g)
-			                   + (uncor_param * l_uncor_bis1);
+			                   + (uncor_param * l_uncor_bs1);
 			vfloat uncor_dist2 = (l_uncor_amod2 - data_b)
-			                   + (uncor_param * l_uncor_bis2);
+			                   + (uncor_param * l_uncor_bs2);
 			vfloat uncor_dist3 = (l_uncor_amod3 - data_a)
-			                   + (uncor_param * l_uncor_bis3);
+			                   + (uncor_param * l_uncor_bs3);
 
-			vfloat uncor_error = (ew_r * uncor_dist0 * uncor_dist0)
-			                   + (ew_g * uncor_dist1 * uncor_dist1)
-			                   + (ew_b * uncor_dist2 * uncor_dist2)
-			                   + (ew_a * uncor_dist3 * uncor_dist3);
+			vfloat uncor_err = (ew_r * uncor_dist0 * uncor_dist0)
+			                 + (ew_g * uncor_dist1 * uncor_dist1)
+			                 + (ew_b * uncor_dist2 * uncor_dist2)
+			                 + (ew_a * uncor_dist3 * uncor_dist3);
 
-			haccumulate(uncor_errorsumv, uncor_error);
+			uncor_err = select(vfloat::zero(), uncor_err, mask);
+			haccumulate(uncor_errorsumv, uncor_err);
 
 			// Process samechroma data
 			vfloat samec_param = (data_r * l_samec_bs0)
@@ -462,18 +484,20 @@ void compute_error_squared_rgba(
 			samec_loparamv = min(samec_param, samec_loparamv);
 			samec_hiparamv = max(samec_param, samec_hiparamv);
 
+			vfloat samec_dist0 = samec_param * l_samec_bs0 - data_r;
+			vfloat samec_dist1 = samec_param * l_samec_bs1 - data_g;
+			vfloat samec_dist2 = samec_param * l_samec_bs2 - data_b;
+			vfloat samec_dist3 = samec_param * l_samec_bs3 - data_a;
 
-			vfloat samec_dist0 = samec_param * l_samec_bis0 - data_r;
-			vfloat samec_dist1 = samec_param * l_samec_bis1 - data_g;
-			vfloat samec_dist2 = samec_param * l_samec_bis2 - data_b;
-			vfloat samec_dist3 = samec_param * l_samec_bis3 - data_a;
+			vfloat samec_err = (ew_r * samec_dist0 * samec_dist0)
+			                 + (ew_g * samec_dist1 * samec_dist1)
+			                 + (ew_b * samec_dist2 * samec_dist2)
+			                 + (ew_a * samec_dist3 * samec_dist3);
 
-			vfloat samec_error = (ew_r * samec_dist0 * samec_dist0)
-			                   + (ew_g * samec_dist1 * samec_dist1)
-			                   + (ew_b * samec_dist2 * samec_dist2)
-			                   + (ew_a * samec_dist3 * samec_dist3);
+			samec_err = select(vfloat::zero(), samec_err, mask);
+			haccumulate(samec_errorsumv, samec_err);
 
-			haccumulate(samec_errorsumv, samec_error);
+			lane_ids += vint(ASTCENC_SIMD_WIDTH);
 		}
 
 		uncor_loparam = hmin_s(uncor_loparamv);
@@ -482,94 +506,38 @@ void compute_error_squared_rgba(
 		samec_loparam = hmin_s(samec_loparamv);
 		samec_hiparam = hmax_s(samec_hiparamv);
 
-		// Loop tail
-		// Error is buffered and accumulated in blocks of 4 to ensure that
-		// the partial sums added to the accumulator are invariant with the
-		// vector implementation, irrespective of vector size ...
-		alignas(16) float uncor_errorsum_tmp[4] { 0 };
-		alignas(16) float samec_errorsum_tmp[4] { 0 };
-		for (/* */; i < texel_count; i++)
-		{
-			int iwt = weights[i];
-
-			vfloat4 dat = blk->texel(iwt);
-			vfloat4 ews = ewb->error_weights[iwt];
-
-			float uncor_param = dot_s(dat, l_uncor.bs);
-			uncor_loparam = astc::min(uncor_param, uncor_loparam);
-			uncor_hiparam = astc::max(uncor_param, uncor_hiparam);
-
-			float samec_param = dot_s(dat, l_samec.bs);
-			samec_loparam = astc::min(samec_param, samec_loparam);
-			samec_hiparam = astc::max(samec_param, samec_hiparam);
-
-			vfloat4 uncor_dist  = (l_uncor.amod - dat)
-			                    + (uncor_param * l_uncor.bis);
-			float uncor_error_tmp = dot_s(ews, uncor_dist * uncor_dist);
-
-			vfloat4 samec_dist = samec_param * l_samec.bis - dat;
-			float samec_error_tmp = dot_s(ews, samec_dist * samec_dist);
-
-			// Accumulate error sum in the temporary array
-			int error_index = i & 0x3;
-			uncor_errorsum_tmp[error_index] = uncor_error_tmp;
-			samec_errorsum_tmp[error_index] = samec_error_tmp;
-
-#if ASTCENC_SIMD_WIDTH == 8
-			// Zero the temporary staging buffer every 4 items unless last iter
-			if ((i & 0x7) == 0x03)
-			{
-				haccumulate(uncor_errorsumv, vfloat4::loada(uncor_errorsum_tmp));
-				storea(vfloat4::zero(), uncor_errorsum_tmp);
-
-				haccumulate(samec_errorsumv, vfloat4::loada(samec_errorsum_tmp));
-				storea(vfloat4::zero(), samec_errorsum_tmp);
-			}
-#endif
-		}
-
-		// Accumulate the loop tail using the vfloat4 swizzle
-		haccumulate(uncor_errorsumv, vfloat4::loada(uncor_errorsum_tmp));
-		haccumulate(samec_errorsumv, vfloat4::loada(samec_errorsum_tmp));
-
 		// Resolve the final scalar accumulator sum
-		haccumulate(uncor_errorsum, uncor_errorsumv);
-		haccumulate(samec_errorsum, samec_errorsumv);
+		haccumulate(uncor_error, uncor_errorsumv);
+		haccumulate(samec_error, samec_errorsumv);
 
 		float uncor_linelen = uncor_hiparam - uncor_loparam;
 		float samec_linelen = samec_hiparam - samec_loparam;
 
 		// Turn very small numbers and NaNs into a small number
-		uncor_linelen = astc::max(uncor_linelen, 1e-7f);
-		samec_linelen = astc::max(samec_linelen, 1e-7f);
-
-		uncor_lengths[partition] = uncor_linelen;
-		samec_lengths[partition] = samec_linelen;
+		uncor_lengths[partition] = astc::max(uncor_linelen, 1e-7f);
+		samec_lengths[partition] = astc::max(samec_linelen, 1e-7f);
 	}
-
-	*uncor_errors = uncor_errorsum;
-	*samec_errors = samec_errorsum;
 }
 
+/* See header for documentation. */
 void compute_error_squared_rgb(
-	const partition_info *pt,
-	const imageblock *blk,
-	const error_weight_block *ewb,
-	partition_lines3 plines[4],
+	const partition_info& pi,
+	const image_block& blk,
+	partition_lines3 plines[BLOCK_MAX_PARTITIONS],
 	float& uncor_error,
 	float& samec_error
 ) {
-	float uncor_errorsum = 0.0f;
-	float samec_errorsum = 0.0f;
-
-	int partition_count = pt->partition_count;
+	unsigned int partition_count = pi.partition_count;
 	promise(partition_count > 0);
 
-	for (int partition = 0; partition < partition_count; partition++)
+	uncor_error = 0.0f;
+	samec_error = 0.0f;
+
+	for (unsigned int partition = 0; partition < partition_count; partition++)
 	{
 		partition_lines3& pl = plines[partition];
-		const uint8_t *weights = pt->texels_of_partition[partition];
-		int texel_count = pt->partition_texel_count[partition];
+		const uint8_t *texel_indexes = pi.texels_of_partition[partition];
+		unsigned int texel_count = pi.partition_texel_count[partition];
 		promise(texel_count > 0);
 
 		float uncor_loparam = 1e10f;
@@ -581,8 +549,6 @@ void compute_error_squared_rgb(
 		processed_line3 l_uncor = pl.uncor_pline;
 		processed_line3 l_samec = pl.samec_pline;
 
-		int i = 0;
-
 		// This implementation is an example vectorization of this function.
 		// It works for - the codec is a 2-4% faster than not vectorizing - but
 		// the benefit is limited by the use of gathers and register pressure
@@ -596,20 +562,12 @@ void compute_error_squared_rgb(
 		vfloat l_uncor_amod1(l_uncor.amod.lane<1>());
 		vfloat l_uncor_amod2(l_uncor.amod.lane<2>());
 
-		vfloat l_uncor_bis0(l_uncor.bis.lane<0>());
-		vfloat l_uncor_bis1(l_uncor.bis.lane<1>());
-		vfloat l_uncor_bis2(l_uncor.bis.lane<2>());
-
 		vfloat l_samec_bs0(l_samec.bs.lane<0>());
 		vfloat l_samec_bs1(l_samec.bs.lane<1>());
 		vfloat l_samec_bs2(l_samec.bs.lane<2>());
 
 		assert(all(l_samec.amod == vfloat4(0.0f)));
 
-		vfloat l_samec_bis0(l_samec.bis.lane<0>());
-		vfloat l_samec_bis1(l_samec.bis.lane<1>());
-		vfloat l_samec_bis2(l_samec.bis.lane<2>());
-
 		vfloat uncor_loparamv(1e10f);
 		vfloat uncor_hiparamv(-1e10f);
 		vfloat4 uncor_errorsumv = vfloat4::zero();
@@ -618,18 +576,22 @@ void compute_error_squared_rgb(
 		vfloat samec_hiparamv(-1e10f);
 		vfloat4 samec_errorsumv = vfloat4::zero();
 
-		int clipped_texel_count = round_down_to_simd_multiple_vla(texel_count);
-		for (/* */; i < clipped_texel_count; i += ASTCENC_SIMD_WIDTH)
-		{
-			vint texel_idxs(&(weights[i]));
+		vfloat ew_r(blk.channel_weight.lane<0>());
+		vfloat ew_g(blk.channel_weight.lane<1>());
+		vfloat ew_b(blk.channel_weight.lane<2>());
 
-			vfloat data_r = gatherf(blk->data_r, texel_idxs);
-			vfloat data_g = gatherf(blk->data_g, texel_idxs);
-			vfloat data_b = gatherf(blk->data_b, texel_idxs);
+		// This implementation over-shoots, but this is safe as we initialize the weights array
+		// to extend the last value. This means min/max are not impacted, but we need to mask
+		// out the dummy values when we compute the line weighting.
+		vint lane_ids = vint::lane_id();
+		for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
+		{
+			vmask mask = lane_ids < vint(texel_count);
+			vint texel_idxs(&(texel_indexes[i]));
 
-			vfloat ew_r = gatherf(ewb->texel_weight_r, texel_idxs);
-			vfloat ew_g = gatherf(ewb->texel_weight_g, texel_idxs);
-			vfloat ew_b = gatherf(ewb->texel_weight_b, texel_idxs);
+			vfloat data_r = gatherf(blk.data_r, texel_idxs);
+			vfloat data_g = gatherf(blk.data_g, texel_idxs);
+			vfloat data_b = gatherf(blk.data_b, texel_idxs);
 
 			vfloat uncor_param  = (data_r * l_uncor_bs0)
 			                    + (data_g * l_uncor_bs1)
@@ -639,16 +601,17 @@ void compute_error_squared_rgb(
 			uncor_hiparamv = max(uncor_param, uncor_hiparamv);
 
 			vfloat uncor_dist0 = (l_uncor_amod0 - data_r)
-			                   + (uncor_param * l_uncor_bis0);
+			                   + (uncor_param * l_uncor_bs0);
 			vfloat uncor_dist1 = (l_uncor_amod1 - data_g)
-			                   + (uncor_param * l_uncor_bis1);
+			                   + (uncor_param * l_uncor_bs1);
 			vfloat uncor_dist2 = (l_uncor_amod2 - data_b)
-			                   + (uncor_param * l_uncor_bis2);
+			                   + (uncor_param * l_uncor_bs2);
 
 			vfloat uncor_err = (ew_r * uncor_dist0 * uncor_dist0)
 			                 + (ew_g * uncor_dist1 * uncor_dist1)
 			                 + (ew_b * uncor_dist2 * uncor_dist2);
 
+			uncor_err = select(vfloat::zero(), uncor_err, mask);
 			haccumulate(uncor_errorsumv, uncor_err);
 
 			// Process samechroma data
@@ -660,15 +623,18 @@ void compute_error_squared_rgb(
 			samec_hiparamv = max(samec_param, samec_hiparamv);
 
 
-			vfloat samec_dist0 = samec_param * l_samec_bis0 - data_r;
-			vfloat samec_dist1 = samec_param * l_samec_bis1 - data_g;
-			vfloat samec_dist2 = samec_param * l_samec_bis2 - data_b;
+			vfloat samec_dist0 = samec_param * l_samec_bs0 - data_r;
+			vfloat samec_dist1 = samec_param * l_samec_bs1 - data_g;
+			vfloat samec_dist2 = samec_param * l_samec_bs2 - data_b;
 
 			vfloat samec_err = (ew_r * samec_dist0 * samec_dist0)
 			                 + (ew_g * samec_dist1 * samec_dist1)
 			                 + (ew_b * samec_dist2 * samec_dist2);
 
+			samec_err = select(vfloat::zero(), samec_err, mask);
 			haccumulate(samec_errorsumv, samec_err);
+
+			lane_ids += vint(ASTCENC_SIMD_WIDTH);
 		}
 
 		uncor_loparam = hmin_s(uncor_loparamv);
@@ -677,73 +643,17 @@ void compute_error_squared_rgb(
 		samec_loparam = hmin_s(samec_loparamv);
 		samec_hiparam = hmax_s(samec_hiparamv);
 
-		// Loop tail
-		// Error is buffered and accumulated in blocks of 4 to ensure that
-		// the partial sums added to the accumulator are invariant with the
-		// vector implementation, irrespective of vector size ...
-		alignas(16) float uncor_errorsum_tmp[4] { 0 };
-		alignas(16) float samec_errorsum_tmp[4] { 0 };
-		for (/* */; i < texel_count; i++)
-		{
-			int iwt = weights[i];
-
-			vfloat4 dat = blk->texel3(iwt);
-			vfloat4 ews = ewb->error_weights[iwt];
-
-			float uncor_param = dot3_s(dat, l_uncor.bs);
-			uncor_loparam  = astc::min(uncor_param, uncor_loparam);
-			uncor_hiparam = astc::max(uncor_param, uncor_hiparam);
-
-			float samec_param = dot3_s(dat, l_samec.bs);
-			samec_loparam  = astc::min(samec_param, samec_loparam);
-			samec_hiparam = astc::max(samec_param, samec_hiparam);
-
-			vfloat4 uncor_dist  = (l_uncor.amod - dat)
-			                    + (uncor_param * l_uncor.bis);
-			float uncor_error_tmp = dot3_s(ews, uncor_dist * uncor_dist);
-
-			vfloat4 samec_dist = samec_param * l_samec.bis - dat;
-			float samec_error_tmp = dot3_s(ews, samec_dist * samec_dist);
-
-			// Accumulate error sum in the temporary array
-			int error_index = i & 0x3;
-			uncor_errorsum_tmp[error_index] = uncor_error_tmp;
-			samec_errorsum_tmp[error_index] = samec_error_tmp;
-
-#if ASTCENC_SIMD_WIDTH == 8
-			// Emit the staging buffer every 4 items unless last iteration
-			if ((i & 0x7) == 0x03)
-			{
-				haccumulate(uncor_errorsumv, vfloat4::loada(uncor_errorsum_tmp));
-				storea(vfloat4::zero(), uncor_errorsum_tmp);
-
-				haccumulate(samec_errorsumv, vfloat4::loada(samec_errorsum_tmp));
-				storea(vfloat4::zero(), samec_errorsum_tmp);
-			}
-#endif
-		}
-
-		// Accumulate the loop tail using the vfloat4 swizzle
-		haccumulate(uncor_errorsumv, vfloat4::loada(uncor_errorsum_tmp));
-		haccumulate(samec_errorsumv, vfloat4::loada(samec_errorsum_tmp));
-
 		// Resolve the final scalar accumulator sum
-		haccumulate(uncor_errorsum, uncor_errorsumv);
-		haccumulate(samec_errorsum, samec_errorsumv);
+		haccumulate(uncor_error, uncor_errorsumv);
+		haccumulate(samec_error, samec_errorsumv);
 
 		float uncor_linelen = uncor_hiparam - uncor_loparam;
 		float samec_linelen = samec_hiparam - samec_loparam;
 
 		// Turn very small numbers and NaNs into a small number
-		uncor_linelen = astc::max(uncor_linelen, 1e-7f);
-		samec_linelen = astc::max(samec_linelen, 1e-7f);
-
-		pl.uncor_line_len = uncor_linelen;
-		pl.samec_line_len = samec_linelen;
+		pl.uncor_line_len = astc::max(uncor_linelen, 1e-7f);
+		pl.samec_line_len = astc::max(samec_linelen, 1e-7f);
 	}
-
-	uncor_error = uncor_errorsum;
-	samec_error = samec_errorsum;
 }
 
 #endif
diff --git a/libkram/astc-encoder/astcenc_block_sizes.cpp b/libkram/astc-encoder/astcenc_block_sizes.cpp
new file mode 100644
index 00000000..ebabaffa
--- /dev/null
+++ b/libkram/astc-encoder/astcenc_block_sizes.cpp
@@ -0,0 +1,1144 @@
+// SPDX-License-Identifier: Apache-2.0
+// ----------------------------------------------------------------------------
+// Copyright 2011-2022 Arm Limited
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy
+// of the License at:
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations
+// under the License.
+// ----------------------------------------------------------------------------
+
+/**
+ * @brief Functions to generate block size descriptor and decimation tables.
+ */
+
+#include "astcenc_internal.h"
+
+/**
+ * @brief Decode the properties of an encoded 2D block mode.
+ *
+ * @param      block_mode      The encoded block mode.
+ * @param[out] x_weights       The number of weights in the X dimension.
+ * @param[out] y_weights       The number of weights in the Y dimension.
+ * @param[out] is_dual_plane   True if this block mode has two weight planes.
+ * @param[out] quant_mode      The quantization level for the weights.
+ * @param[out] weight_bits     The storage bit count for the weights.
+ *
+ * @return Returns true if a valid mode, false otherwise.
+ */
+static bool decode_block_mode_2d(
+	unsigned int block_mode,
+	unsigned int& x_weights,
+	unsigned int& y_weights,
+	bool& is_dual_plane,
+	unsigned int& quant_mode,
+	unsigned int& weight_bits
+) {
+	unsigned int base_quant_mode = (block_mode >> 4) & 1;
+	unsigned int H = (block_mode >> 9) & 1;
+	unsigned int D = (block_mode >> 10) & 1;
+	unsigned int A = (block_mode >> 5) & 0x3;
+
+	x_weights = 0;
+	y_weights = 0;
+
+	if ((block_mode & 3) != 0)
+	{
+		base_quant_mode |= (block_mode & 3) << 1;
+		unsigned int B = (block_mode >> 7) & 3;
+		switch ((block_mode >> 2) & 3)
+		{
+		case 0:
+			x_weights = B + 4;
+			y_weights = A + 2;
+			break;
+		case 1:
+			x_weights = B + 8;
+			y_weights = A + 2;
+			break;
+		case 2:
+			x_weights = A + 2;
+			y_weights = B + 8;
+			break;
+		case 3:
+			B &= 1;
+			if (block_mode & 0x100)
+			{
+				x_weights = B + 2;
+				y_weights = A + 2;
+			}
+			else
+			{
+				x_weights = A + 2;
+				y_weights = B + 6;
+			}
+			break;
+		}
+	}
+	else
+	{
+		base_quant_mode |= ((block_mode >> 2) & 3) << 1;
+		if (((block_mode >> 2) & 3) == 0)
+		{
+			return false;
+		}
+
+		unsigned int B = (block_mode >> 9) & 3;
+		switch ((block_mode >> 7) & 3)
+		{
+		case 0:
+			x_weights = 12;
+			y_weights = A + 2;
+			break;
+		case 1:
+			x_weights = A + 2;
+			y_weights = 12;
+			break;
+		case 2:
+			x_weights = A + 6;
+			y_weights = B + 6;
+			D = 0;
+			H = 0;
+			break;
+		case 3:
+			switch ((block_mode >> 5) & 3)
+			{
+			case 0:
+				x_weights = 6;
+				y_weights = 10;
+				break;
+			case 1:
+				x_weights = 10;
+				y_weights = 6;
+				break;
+			case 2:
+			case 3:
+				return false;
+			}
+			break;
+		}
+	}
+
+	unsigned int weight_count = x_weights * y_weights * (D + 1);
+	quant_mode = (base_quant_mode - 2) + 6 * H;
+	is_dual_plane = D != 0;
+
+	weight_bits = get_ise_sequence_bitcount(weight_count, (quant_method)quant_mode);
+	return (weight_count <= BLOCK_MAX_WEIGHTS &&
+	        weight_bits >= BLOCK_MIN_WEIGHT_BITS &&
+	        weight_bits <= BLOCK_MAX_WEIGHT_BITS);
+}
+
+/**
+ * @brief Decode the properties of an encoded 3D block mode.
+ *
+ * @param      block_mode      The encoded block mode.
+ * @param[out] x_weights       The number of weights in the X dimension.
+ * @param[out] y_weights       The number of weights in the Y dimension.
+ * @param[out] z_weights       The number of weights in the Z dimension.
+ * @param[out] is_dual_plane   True if this block mode has two weight planes.
+ * @param[out] quant_mode      The quantization level for the weights.
+ * @param[out] weight_bits     The storage bit count for the weights.
+ *
+ * @return Returns true if a valid mode, false otherwise.
+ */
+static bool decode_block_mode_3d(
+	unsigned int block_mode,
+	unsigned int& x_weights,
+	unsigned int& y_weights,
+	unsigned int& z_weights,
+	bool& is_dual_plane,
+	unsigned int& quant_mode,
+	unsigned int& weight_bits
+) {
+	unsigned int base_quant_mode = (block_mode >> 4) & 1;
+	unsigned int H = (block_mode >> 9) & 1;
+	unsigned int D = (block_mode >> 10) & 1;
+	unsigned int A = (block_mode >> 5) & 0x3;
+
+	x_weights = 0;
+	y_weights = 0;
+	z_weights = 0;
+
+	if ((block_mode & 3) != 0)
+	{
+		base_quant_mode |= (block_mode & 3) << 1;
+		unsigned int B = (block_mode >> 7) & 3;
+		unsigned int C = (block_mode >> 2) & 0x3;
+		x_weights = A + 2;
+		y_weights = B + 2;
+		z_weights = C + 2;
+	}
+	else
+	{
+		base_quant_mode |= ((block_mode >> 2) & 3) << 1;
+		if (((block_mode >> 2) & 3) == 0)
+		{
+			return false;
+		}
+
+		int B = (block_mode >> 9) & 3;
+		if (((block_mode >> 7) & 3) != 3)
+		{
+			D = 0;
+			H = 0;
+		}
+		switch ((block_mode >> 7) & 3)
+		{
+		case 0:
+			x_weights = 6;
+			y_weights = B + 2;
+			z_weights = A + 2;
+			break;
+		case 1:
+			x_weights = A + 2;
+			y_weights = 6;
+			z_weights = B + 2;
+			break;
+		case 2:
+			x_weights = A + 2;
+			y_weights = B + 2;
+			z_weights = 6;
+			break;
+		case 3:
+			x_weights = 2;
+			y_weights = 2;
+			z_weights = 2;
+			switch ((block_mode >> 5) & 3)
+			{
+			case 0:
+				x_weights = 6;
+				break;
+			case 1:
+				y_weights = 6;
+				break;
+			case 2:
+				z_weights = 6;
+				break;
+			case 3:
+				return false;
+			}
+			break;
+		}
+	}
+
+	unsigned int weight_count = x_weights * y_weights * z_weights * (D + 1);
+	quant_mode = (base_quant_mode - 2) + 6 * H;
+	is_dual_plane = D != 0;
+
+	weight_bits = get_ise_sequence_bitcount(weight_count, (quant_method)quant_mode);
+	return (weight_count <= BLOCK_MAX_WEIGHTS &&
+	        weight_bits >= BLOCK_MIN_WEIGHT_BITS &&
+	        weight_bits <= BLOCK_MAX_WEIGHT_BITS);
+}
+
+/**
+ * @brief Create a 2D decimation entry for a block-size and weight-decimation pair.
+ *
+ * @param      x_texels    The number of texels in the X dimension.
+ * @param      y_texels    The number of texels in the Y dimension.
+ * @param      x_weights   The number of weights in the X dimension.
+ * @param      y_weights   The number of weights in the Y dimension.
+ * @param[out] di          The decimation info structure to populate.
+ * @param[out] wb          The decimation table init scratch working buffers.
+ */
+static void init_decimation_info_2d(
+	unsigned int x_texels,
+	unsigned int y_texels,
+	unsigned int x_weights,
+	unsigned int y_weights,
+	decimation_info& di,
+	dt_init_working_buffers& wb
+) {
+	unsigned int texels_per_block = x_texels * y_texels;
+	unsigned int weights_per_block = x_weights * y_weights;
+
+	uint8_t max_texel_count_of_weight = 0;
+
+	promise(weights_per_block > 0);
+	promise(texels_per_block > 0);
+	promise(x_texels > 0);
+	promise(y_texels > 0);
+
+	for (unsigned int i = 0; i < weights_per_block; i++)
+	{
+		wb.texel_count_of_weight[i] = 0;
+	}
+
+	for (unsigned int i = 0; i < texels_per_block; i++)
+	{
+		wb.weight_count_of_texel[i] = 0;
+	}
+
+	for (unsigned int y = 0; y < y_texels; y++)
+	{
+		for (unsigned int x = 0; x < x_texels; x++)
+		{
+			unsigned int texel = y * x_texels + x;
+
+			unsigned int x_weight = (((1024 + x_texels / 2) / (x_texels - 1)) * x * (x_weights - 1) + 32) >> 6;
+			unsigned int y_weight = (((1024 + y_texels / 2) / (y_texels - 1)) * y * (y_weights - 1) + 32) >> 6;
+
+			unsigned int x_weight_frac = x_weight & 0xF;
+			unsigned int y_weight_frac = y_weight & 0xF;
+			unsigned int x_weight_int = x_weight >> 4;
+			unsigned int y_weight_int = y_weight >> 4;
+
+			unsigned int qweight[4];
+			qweight[0] = x_weight_int + y_weight_int * x_weights;
+			qweight[1] = qweight[0] + 1;
+			qweight[2] = qweight[0] + x_weights;
+			qweight[3] = qweight[2] + 1;
+
+			// Truncated-precision bilinear interpolation
+			unsigned int prod = x_weight_frac * y_weight_frac;
+
+			unsigned int weight[4];
+			weight[3] = (prod + 8) >> 4;
+			weight[1] = x_weight_frac - weight[3];
+			weight[2] = y_weight_frac - weight[3];
+			weight[0] = 16 - x_weight_frac - y_weight_frac + weight[3];
+
+			for (unsigned int i = 0; i < 4; i++)
+			{
+				if (weight[i] != 0)
+				{
+					wb.grid_weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(qweight[i]);
+					wb.weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(weight[i]);
+					wb.weight_count_of_texel[texel]++;
+					wb.texels_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(texel);
+					wb.texel_weights_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(weight[i]);
+					wb.texel_count_of_weight[qweight[i]]++;
+					max_texel_count_of_weight = astc::max(max_texel_count_of_weight, wb.texel_count_of_weight[qweight[i]]);
+				}
+			}
+		}
+	}
+
+	for (unsigned int i = 0; i < texels_per_block; i++)
+	{
+		di.texel_weight_count[i] = wb.weight_count_of_texel[i];
+
+		for (unsigned int j = 0; j < wb.weight_count_of_texel[i]; j++)
+		{
+			di.texel_weights_int_4t[j][i] = wb.weights_of_texel[i][j];
+			di.texel_weights_float_4t[j][i] = ((float)wb.weights_of_texel[i][j]) * (1.0f / WEIGHTS_TEXEL_SUM);
+			di.texel_weights_4t[j][i] = wb.grid_weights_of_texel[i][j];
+		}
+
+		// Init all 4 entries so we can rely on zeros for vectorization
+		for (unsigned int j = wb.weight_count_of_texel[i]; j < 4; j++)
+		{
+			di.texel_weights_int_4t[j][i] = 0;
+			di.texel_weights_float_4t[j][i] = 0.0f;
+			di.texel_weights_4t[j][i] = 0;
+		}
+	}
+
+	for (unsigned int i = 0; i < weights_per_block; i++)
+	{
+		unsigned int texel_count_wt = wb.texel_count_of_weight[i];
+		di.weight_texel_count[i] = (uint8_t)texel_count_wt;
+
+		for (unsigned int j = 0; j < texel_count_wt; j++)
+		{
+			uint8_t texel = wb.texels_of_weight[i][j];
+
+			// Create transposed versions of these for better vectorization
+			di.weight_texel[j][i] = texel;
+			di.weights_flt[j][i] = (float)wb.texel_weights_of_weight[i][j];
+
+			// perform a layer of array unrolling. An aspect of this unrolling is that
+			// one of the texel-weight indexes is an identity-mapped index; we will use this
+			// fact to reorder the indexes so that the first one is the identity index.
+			int swap_idx = -1;
+			for (unsigned int k = 0; k < 4; k++)
+			{
+				uint8_t dttw = di.texel_weights_4t[k][texel];
+				float dttwf = di.texel_weights_float_4t[k][texel];
+				if (dttw == i && dttwf != 0.0f)
+				{
+					swap_idx = k;
+				}
+				di.texel_weights_texel[i][j][k] = dttw;
+				di.texel_weights_float_texel[i][j][k] = dttwf;
+			}
+
+			if (swap_idx != 0)
+			{
+				uint8_t vi = di.texel_weights_texel[i][j][0];
+				float vf = di.texel_weights_float_texel[i][j][0];
+				di.texel_weights_texel[i][j][0] = di.texel_weights_texel[i][j][swap_idx];
+				di.texel_weights_float_texel[i][j][0] = di.texel_weights_float_texel[i][j][swap_idx];
+				di.texel_weights_texel[i][j][swap_idx] = vi;
+				di.texel_weights_float_texel[i][j][swap_idx] = vf;
+			}
+		}
+
+		// Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
+		// Match last texel in active lane in SIMD group, for better gathers
+		uint8_t last_texel = di.weight_texel[texel_count_wt - 1][i];
+		for (unsigned int j = texel_count_wt; j < max_texel_count_of_weight; j++)
+		{
+			di.weight_texel[j][i] = last_texel;
+			di.weights_flt[j][i] = 0.0f;
+		}
+	}
+
+	// Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
+	unsigned int texels_per_block_simd = round_up_to_simd_multiple_vla(texels_per_block);
+	for (unsigned int i = texels_per_block; i < texels_per_block_simd; i++)
+	{
+		di.texel_weight_count[i] = 0;
+
+		for (unsigned int j = 0; j < 4; j++)
+		{
+			di.texel_weights_float_4t[j][i] = 0;
+			di.texel_weights_4t[j][i] = 0;
+			di.texel_weights_int_4t[j][i] = 0;
+		}
+	}
+
+	// Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
+	// Match last texel in active lane in SIMD group, for better gathers
+	unsigned int last_texel_count_wt = wb.texel_count_of_weight[weights_per_block - 1];
+	uint8_t last_texel = di.weight_texel[last_texel_count_wt - 1][weights_per_block - 1];
+
+	unsigned int weights_per_block_simd = round_up_to_simd_multiple_vla(weights_per_block);
+	for (unsigned int i = weights_per_block; i < weights_per_block_simd; i++)
+	{
+		di.weight_texel_count[i] = 0;
+
+		for (unsigned int j = 0; j < max_texel_count_of_weight; j++)
+		{
+			di.weight_texel[j][i] = last_texel;
+			di.weights_flt[j][i] = 0.0f;
+		}
+	}
+
+	di.texel_count = static_cast<uint8_t>(texels_per_block);
+	di.weight_count = static_cast<uint8_t>(weights_per_block);
+	di.weight_x = static_cast<uint8_t>(x_weights);
+	di.weight_y = static_cast<uint8_t>(y_weights);
+	di.weight_z = 1;
+}
+
+/**
+ * @brief Create a 3D decimation entry for a block-size and weight-decimation pair.
+ *
+ * @param      x_texels    The number of texels in the X dimension.
+ * @param      y_texels    The number of texels in the Y dimension.
+ * @param      z_texels    The number of texels in the Z dimension.
+ * @param      x_weights   The number of weights in the X dimension.
+ * @param      y_weights   The number of weights in the Y dimension.
+ * @param      z_weights   The number of weights in the Z dimension.
+ * @param[out] di          The decimation info structure to populate.
+   @param[out] wb          The decimation table init scratch working buffers.
+ */
+static void init_decimation_info_3d(
+	unsigned int x_texels,
+	unsigned int y_texels,
+	unsigned int z_texels,
+	unsigned int x_weights,
+	unsigned int y_weights,
+	unsigned int z_weights,
+	decimation_info& di,
+	dt_init_working_buffers& wb
+) {
+	unsigned int texels_per_block = x_texels * y_texels * z_texels;
+	unsigned int weights_per_block = x_weights * y_weights * z_weights;
+
+	uint8_t max_texel_count_of_weight = 0;
+
+	promise(weights_per_block > 0);
+	promise(texels_per_block > 0);
+
+	for (unsigned int i = 0; i < weights_per_block; i++)
+	{
+		wb.texel_count_of_weight[i] = 0;
+	}
+
+	for (unsigned int i = 0; i < texels_per_block; i++)
+	{
+		wb.weight_count_of_texel[i] = 0;
+	}
+
+	for (unsigned int z = 0; z < z_texels; z++)
+	{
+		for (unsigned int y = 0; y < y_texels; y++)
+		{
+			for (unsigned int x = 0; x < x_texels; x++)
+			{
+				int texel = (z * y_texels + y) * x_texels + x;
+
+				int x_weight = (((1024 + x_texels / 2) / (x_texels - 1)) * x * (x_weights - 1) + 32) >> 6;
+				int y_weight = (((1024 + y_texels / 2) / (y_texels - 1)) * y * (y_weights - 1) + 32) >> 6;
+				int z_weight = (((1024 + z_texels / 2) / (z_texels - 1)) * z * (z_weights - 1) + 32) >> 6;
+
+				int x_weight_frac = x_weight & 0xF;
+				int y_weight_frac = y_weight & 0xF;
+				int z_weight_frac = z_weight & 0xF;
+				int x_weight_int = x_weight >> 4;
+				int y_weight_int = y_weight >> 4;
+				int z_weight_int = z_weight >> 4;
+				int qweight[4];
+				int weight[4];
+				qweight[0] = (z_weight_int * y_weights + y_weight_int) * x_weights + x_weight_int;
+				qweight[3] = ((z_weight_int + 1) * y_weights + (y_weight_int + 1)) * x_weights + (x_weight_int + 1);
+
+				// simplex interpolation
+				int fs = x_weight_frac;
+				int ft = y_weight_frac;
+				int fp = z_weight_frac;
+
+				int cas = ((fs > ft) << 2) + ((ft > fp) << 1) + ((fs > fp));
+				int N = x_weights;
+				int NM = x_weights * y_weights;
+
+				int s1, s2, w0, w1, w2, w3;
+				switch (cas)
+				{
+				case 7:
+					s1 = 1;
+					s2 = N;
+					w0 = 16 - fs;
+					w1 = fs - ft;
+					w2 = ft - fp;
+					w3 = fp;
+					break;
+				case 3:
+					s1 = N;
+					s2 = 1;
+					w0 = 16 - ft;
+					w1 = ft - fs;
+					w2 = fs - fp;
+					w3 = fp;
+					break;
+				case 5:
+					s1 = 1;
+					s2 = NM;
+					w0 = 16 - fs;
+					w1 = fs - fp;
+					w2 = fp - ft;
+					w3 = ft;
+					break;
+				case 4:
+					s1 = NM;
+					s2 = 1;
+					w0 = 16 - fp;
+					w1 = fp - fs;
+					w2 = fs - ft;
+					w3 = ft;
+					break;
+				case 2:
+					s1 = N;
+					s2 = NM;
+					w0 = 16 - ft;
+					w1 = ft - fp;
+					w2 = fp - fs;
+					w3 = fs;
+					break;
+				case 0:
+					s1 = NM;
+					s2 = N;
+					w0 = 16 - fp;
+					w1 = fp - ft;
+					w2 = ft - fs;
+					w3 = fs;
+					break;
+				default:
+					s1 = NM;
+					s2 = N;
+					w0 = 16 - fp;
+					w1 = fp - ft;
+					w2 = ft - fs;
+					w3 = fs;
+					break;
+				}
+
+				qweight[1] = qweight[0] + s1;
+				qweight[2] = qweight[1] + s2;
+				weight[0] = w0;
+				weight[1] = w1;
+				weight[2] = w2;
+				weight[3] = w3;
+
+				for (unsigned int i = 0; i < 4; i++)
+				{
+					if (weight[i] != 0)
+					{
+						wb.grid_weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(qweight[i]);
+						wb.weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(weight[i]);
+						wb.weight_count_of_texel[texel]++;
+						wb.texels_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(texel);
+						wb.texel_weights_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(weight[i]);
+						wb.texel_count_of_weight[qweight[i]]++;
+						max_texel_count_of_weight = astc::max(max_texel_count_of_weight, wb.texel_count_of_weight[qweight[i]]);
+					}
+				}
+			}
+		}
+	}
+
+	for (unsigned int i = 0; i < texels_per_block; i++)
+	{
+		di.texel_weight_count[i] = wb.weight_count_of_texel[i];
+
+		// Init all 4 entries so we can rely on zeros for vectorization
+		for (unsigned int j = 0; j < 4; j++)
+		{
+			di.texel_weights_int_4t[j][i] = 0;
+			di.texel_weights_float_4t[j][i] = 0.0f;
+			di.texel_weights_4t[j][i] = 0;
+		}
+
+		for (unsigned int j = 0; j < wb.weight_count_of_texel[i]; j++)
+		{
+			di.texel_weights_int_4t[j][i] = wb.weights_of_texel[i][j];
+			di.texel_weights_float_4t[j][i] = ((float)wb.weights_of_texel[i][j]) * (1.0f / WEIGHTS_TEXEL_SUM);
+			di.texel_weights_4t[j][i] = wb.grid_weights_of_texel[i][j];
+		}
+	}
+
+	for (unsigned int i = 0; i < weights_per_block; i++)
+	{
+		unsigned int texel_count_wt = wb.texel_count_of_weight[i];
+		di.weight_texel_count[i] = (uint8_t)texel_count_wt;
+
+		for (unsigned int j = 0; j < texel_count_wt; j++)
+		{
+			unsigned int texel = wb.texels_of_weight[i][j];
+
+			// Create transposed versions of these for better vectorization
+			di.weight_texel[j][i] = static_cast<uint8_t>(texel);
+			di.weights_flt[j][i] = static_cast<float>(wb.texel_weights_of_weight[i][j]);
+
+			// perform a layer of array unrolling. An aspect of this unrolling is that
+			// one of the texel-weight indexes is an identity-mapped index; we will use this
+			// fact to reorder the indexes so that the first one is the identity index.
+			int swap_idx = -1;
+			for (unsigned int k = 0; k < 4; k++)
+			{
+				uint8_t dttw = di.texel_weights_4t[k][texel];
+				float dttwf = di.texel_weights_float_4t[k][texel];
+				if (dttw == i && dttwf != 0.0f)
+				{
+					swap_idx = k;
+				}
+				di.texel_weights_texel[i][j][k] = dttw;
+				di.texel_weights_float_texel[i][j][k] = dttwf;
+			}
+
+			if (swap_idx != 0)
+			{
+				uint8_t vi = di.texel_weights_texel[i][j][0];
+				float vf = di.texel_weights_float_texel[i][j][0];
+				di.texel_weights_texel[i][j][0] = di.texel_weights_texel[i][j][swap_idx];
+				di.texel_weights_float_texel[i][j][0] = di.texel_weights_float_texel[i][j][swap_idx];
+				di.texel_weights_texel[i][j][swap_idx] = vi;
+				di.texel_weights_float_texel[i][j][swap_idx] = vf;
+			}
+		}
+
+		// Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
+		// Match last texel in active lane in SIMD group, for better gathers
+		uint8_t last_texel = di.weight_texel[texel_count_wt - 1][i];
+		for (unsigned int j = texel_count_wt; j < max_texel_count_of_weight; j++)
+		{
+			di.weight_texel[j][i] = last_texel;
+			di.weights_flt[j][i] = 0.0f;
+		}
+	}
+
+	// Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
+	unsigned int texels_per_block_simd = round_up_to_simd_multiple_vla(texels_per_block);
+	for (unsigned int i = texels_per_block; i < texels_per_block_simd; i++)
+	{
+		di.texel_weight_count[i] = 0;
+
+		for (unsigned int j = 0; j < 4; j++)
+		{
+			di.texel_weights_float_4t[j][i] = 0;
+			di.texel_weights_4t[j][i] = 0;
+			di.texel_weights_int_4t[j][i] = 0;
+		}
+	}
+
+	// Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
+	// Match last texel in active lane in SIMD group, for better gathers
+	int last_texel_count_wt = wb.texel_count_of_weight[weights_per_block - 1];
+	uint8_t last_texel = di.weight_texel[last_texel_count_wt - 1][weights_per_block - 1];
+
+	unsigned int weights_per_block_simd = round_up_to_simd_multiple_vla(weights_per_block);
+	for (unsigned int i = weights_per_block; i < weights_per_block_simd; i++)
+	{
+		di.weight_texel_count[i] = 0;
+
+		for (int j = 0; j < max_texel_count_of_weight; j++)
+		{
+			di.weight_texel[j][i] = last_texel;
+			di.weights_flt[j][i] = 0.0f;
+		}
+	}
+
+	di.texel_count = static_cast<uint8_t>(texels_per_block);
+	di.weight_count = static_cast<uint8_t>(weights_per_block);
+	di.weight_x = static_cast<uint8_t>(x_weights);
+	di.weight_y = static_cast<uint8_t>(y_weights);
+	di.weight_z = static_cast<uint8_t>(z_weights);
+}
+
+/**
+ * @brief Assign the texels to use for kmeans clustering.
+ *
+ * The max limit is @c BLOCK_MAX_KMEANS_TEXELS; above this a random selection is used.
+ * The @c bsd.texel_count is an input and must be populated beforehand.
+ *
+ * @param[in,out] bsd   The block size descriptor to populate.
+ */
+static void assign_kmeans_texels(
+	block_size_descriptor& bsd
+) {
+	// Use all texels for kmeans on a small block
+	if (bsd.texel_count <= BLOCK_MAX_KMEANS_TEXELS)
+	{
+		for (uint8_t i = 0; i < bsd.texel_count; i++)
+		{
+			bsd.kmeans_texels[i] = i;
+		}
+
+		return;
+	}
+
+	// Select a random subset of BLOCK_MAX_KMEANS_TEXELS for kmeans on a large block
+	uint64_t rng_state[2];
+	astc::rand_init(rng_state);
+
+	// Initialize array used for tracking used indices
+	bool seen[BLOCK_MAX_TEXELS];
+	for (uint8_t i = 0; i < bsd.texel_count; i++)
+	{
+		seen[i] = false;
+	}
+
+	// Assign 64 random indices, retrying if we see repeats
+	unsigned int arr_elements_set = 0;
+	while (arr_elements_set < BLOCK_MAX_KMEANS_TEXELS)
+	{
+		uint8_t texel = static_cast<uint8_t>(astc::rand(rng_state));
+		texel = texel % bsd.texel_count;
+		if (!seen[texel])
+		{
+			bsd.kmeans_texels[arr_elements_set++] = texel;
+			seen[texel] = true;
+		}
+	}
+}
+
+/**
+ * @brief Allocate a single 2D decimation table entry.
+ *
+ * @param x_texels    The number of texels in the X dimension.
+ * @param y_texels    The number of texels in the Y dimension.
+ * @param x_weights   The number of weights in the X dimension.
+ * @param y_weights   The number of weights in the Y dimension.
+ * @param bsd         The block size descriptor we are populating.
+ * @param wb          The decimation table init scratch working buffers.
+ *
+ * @return The new entry's index in the compacted decimation table array.
+ */
+static int construct_dt_entry_2d(
+	unsigned int x_texels,
+	unsigned int y_texels,
+	unsigned int x_weights,
+	unsigned int y_weights,
+	block_size_descriptor& bsd,
+	dt_init_working_buffers& wb
+) {
+	unsigned int dm_index = bsd.decimation_mode_count;
+	unsigned int weight_count = x_weights * y_weights;
+	assert(weight_count <= BLOCK_MAX_WEIGHTS);
+
+	bool try_2planes = (2 * weight_count) <= BLOCK_MAX_WEIGHTS;
+
+	decimation_info& di = bsd.decimation_tables[dm_index];
+	init_decimation_info_2d(x_texels, y_texels, x_weights, y_weights, di, wb);
+
+	int maxprec_1plane = -1;
+	int maxprec_2planes = -1;
+	for (int i = 0; i < 12; i++)
+	{
+		unsigned int bits_1plane = get_ise_sequence_bitcount(weight_count, (quant_method)i);
+		if (bits_1plane >= BLOCK_MIN_WEIGHT_BITS && bits_1plane <= BLOCK_MAX_WEIGHT_BITS)
+		{
+			maxprec_1plane = i;
+		}
+
+		if (try_2planes)
+		{
+			unsigned int bits_2planes = get_ise_sequence_bitcount(2 * weight_count, (quant_method)i);
+			if (bits_2planes >= BLOCK_MIN_WEIGHT_BITS && bits_2planes <= BLOCK_MAX_WEIGHT_BITS)
+			{
+				maxprec_2planes = i;
+			}
+		}
+	}
+
+	// At least one of the two should be valid ...
+	assert(maxprec_1plane >= 0 || maxprec_2planes >= 0);
+	bsd.decimation_modes[dm_index].maxprec_1plane = static_cast<int8_t>(maxprec_1plane);
+	bsd.decimation_modes[dm_index].maxprec_2planes = static_cast<int8_t>(maxprec_2planes);
+
+	// Default to not enabled - we'll populate these based on active block modes
+	bsd.decimation_modes[dm_index].percentile_hit = false;
+
+	bsd.decimation_mode_count++;
+	return dm_index;
+}
+
+/**
+ * @brief Allocate block modes and decimation tables for a single 2D block size.
+ *
+ * @param      x_texels         The number of texels in the X dimension.
+ * @param      y_texels         The number of texels in the Y dimension.
+ * @param      can_omit_modes   Can we discard modes that astcenc won't use, even if legal?
+ * @param      mode_cutoff      Percentile cutoff in range [0,1]. Low values more likely to be used.
+ * @param[out] bsd              The block size descriptor to populate.
+ */
+static void construct_block_size_descriptor_2d(
+	unsigned int x_texels,
+	unsigned int y_texels,
+	bool can_omit_modes,
+	float mode_cutoff,
+	block_size_descriptor& bsd
+) {
+	// Store a remap table for storing packed decimation modes.
+	// Indexing uses [Y * 16 + X] and max size for each axis is 12.
+	static const unsigned int MAX_DMI = 12 * 16 + 12;
+	int decimation_mode_index[MAX_DMI];
+
+	dt_init_working_buffers* wb = new dt_init_working_buffers;
+
+	bsd.xdim = static_cast<uint8_t>(x_texels);
+	bsd.ydim = static_cast<uint8_t>(y_texels);
+	bsd.zdim = 1;
+	bsd.texel_count = static_cast<uint8_t>(x_texels * y_texels);
+	bsd.decimation_mode_count = 0;
+
+	for (unsigned int i = 0; i < MAX_DMI; i++)
+	{
+		decimation_mode_index[i] = -1;
+	}
+
+	// Gather all the decimation grids that can be used with the current block
+#if !defined(ASTCENC_DECOMPRESS_ONLY)
+	const float *percentiles = get_2d_percentile_table(x_texels, y_texels);
+	float always_threshold = 0.0f;
+#else
+	// Unused in decompress-only builds
+	(void)can_omit_modes;
+	(void)mode_cutoff;
+#endif
+
+	// Construct the list of block formats referencing the decimation tables
+	unsigned int packed_idx = 0;
+	unsigned int always_block_mode_count = 0;
+	unsigned int always_decimation_mode_count = 0;
+
+	// Iterate twice; first time keep the "always" blocks, second time keep the "non-always" blocks.
+	// This ensures that the always block modes and decimation modes are at the start of the list.
+	for (unsigned int j = 0; j < 2; j ++)
+	{
+		for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
+		{
+			unsigned int x_weights;
+			unsigned int y_weights;
+			bool is_dual_plane;
+			unsigned int quant_mode;
+			unsigned int weight_bits;
+
+	#if !defined(ASTCENC_DECOMPRESS_ONLY)
+			float percentile = percentiles[i];
+			bool selected = (percentile <= mode_cutoff) || !can_omit_modes;
+
+			if (j == 0 && percentile > always_threshold)
+			{
+				continue;
+			}
+
+			if (j == 1 && percentile <= always_threshold)
+			{
+				continue;
+			}
+
+	#else
+			// Decompressor builds can never discard modes, as we cannot make any
+			// assumptions about the modes the original compressor used
+			bool selected = true;
+
+			if (j == 1)
+			{
+				continue;
+			}
+	#endif
+
+			// ASSUMPTION: No compressor will use more weights in a dimension than
+			// the block has actual texels, because it wastes bits. Decompression
+			// of an image which violates this assumption will fail, even though it
+			// is technically permitted by the specification.
+
+			// Skip modes that are invalid, too large, or not selected by heuristic
+			bool valid = decode_block_mode_2d(i, x_weights, y_weights, is_dual_plane, quant_mode, weight_bits);
+			if (!selected || !valid || (x_weights > x_texels) || (y_weights > y_texels))
+			{
+				bsd.block_mode_packed_index[i] = BLOCK_BAD_BLOCK_MODE;
+				continue;
+			}
+
+			// Allocate and initialize the decimation table entry if we've not used it yet
+			int decimation_mode = decimation_mode_index[y_weights * 16 + x_weights];
+			if (decimation_mode < 0)
+			{
+				decimation_mode = construct_dt_entry_2d(x_texels, y_texels, x_weights, y_weights, bsd, *wb);
+				decimation_mode_index[y_weights * 16 + x_weights] = decimation_mode;
+
+	#if !defined(ASTCENC_DECOMPRESS_ONLY)
+				if (percentile <= always_threshold)
+				{
+					always_decimation_mode_count++;
+				}
+	#endif
+			}
+
+	#if !defined(ASTCENC_DECOMPRESS_ONLY)
+			// Flatten the block mode heuristic into some precomputed flags
+			if (percentile <= always_threshold)
+			{
+				always_block_mode_count++;
+				bsd.block_modes[packed_idx].percentile_hit = true;
+				bsd.decimation_modes[decimation_mode].percentile_hit = true;
+			}
+			else if (percentile <= mode_cutoff)
+			{
+				bsd.block_modes[packed_idx].percentile_hit = true;
+				bsd.decimation_modes[decimation_mode].percentile_hit = true;
+			}
+			else
+			{
+				bsd.block_modes[packed_idx].percentile_hit = false;
+			}
+	#endif
+
+			bsd.block_modes[packed_idx].decimation_mode = static_cast<uint8_t>(decimation_mode);
+			bsd.block_modes[packed_idx].quant_mode = static_cast<uint8_t>(quant_mode);
+			bsd.block_modes[packed_idx].is_dual_plane = static_cast<uint8_t>(is_dual_plane);
+			bsd.block_modes[packed_idx].weight_bits = static_cast<uint8_t>(weight_bits);
+			bsd.block_modes[packed_idx].mode_index = static_cast<uint16_t>(i);
+			bsd.block_mode_packed_index[i] = static_cast<uint16_t>(packed_idx);
+			packed_idx++;
+		}
+	}
+
+	bsd.block_mode_count = packed_idx;
+	bsd.always_block_mode_count = always_block_mode_count;
+	bsd.always_decimation_mode_count = always_decimation_mode_count;
+
+#if !defined(ASTCENC_DECOMPRESS_ONLY)
+	assert(bsd.always_block_mode_count > 0);
+	assert(bsd.always_decimation_mode_count > 0);
+
+	delete[] percentiles;
+#endif
+
+	// Ensure the end of the array contains valid data (should never get read)
+	for (unsigned int i = bsd.decimation_mode_count; i < WEIGHTS_MAX_DECIMATION_MODES; i++)
+	{
+		bsd.decimation_modes[i].maxprec_1plane = -1;
+		bsd.decimation_modes[i].maxprec_2planes = -1;
+		bsd.decimation_modes[i].percentile_hit = false;
+	}
+
+	// Determine the texels to use for kmeans clustering.
+	assign_kmeans_texels(bsd);
+
+	delete wb;
+}
+
+/**
+ * @brief Allocate block modes and decimation tables for a single £D block size.
+ *
+ * TODO: This function doesn't include all of the heuristics that we use for 2D block sizes such as
+ * the percentile mode cutoffs. If 3D becomes more widely used we should look at this.
+ *
+ * @param      x_texels   The number of texels in the X dimension.
+ * @param      y_texels   The number of texels in the Y dimension.
+ * @param      z_texels   The number of texels in the Z dimension.
+ * @param[out] bsd        The block size descriptor to populate.
+ */
+static void construct_block_size_descriptor_3d(
+	unsigned int x_texels,
+	unsigned int y_texels,
+	unsigned int z_texels,
+	block_size_descriptor& bsd
+) {
+	// Store a remap table for storing packed decimation modes.
+	// Indexing uses [Z * 64 + Y *  8 + X] and max size for each axis is 6.
+	static constexpr unsigned int MAX_DMI = 6 * 64 + 6 * 8 + 6;
+	int decimation_mode_index[MAX_DMI];
+	unsigned int decimation_mode_count = 0;
+
+	dt_init_working_buffers* wb = new dt_init_working_buffers;
+
+	bsd.xdim = static_cast<uint8_t>(x_texels);
+	bsd.ydim = static_cast<uint8_t>(y_texels);
+	bsd.zdim = static_cast<uint8_t>(z_texels);
+	bsd.texel_count = static_cast<uint8_t>(x_texels * y_texels * z_texels);
+
+	for (unsigned int i = 0; i < MAX_DMI; i++)
+	{
+		decimation_mode_index[i] = -1;
+	}
+
+	// gather all the infill-modes that can be used with the current block size
+	for (unsigned int x_weights = 2; x_weights <= x_texels; x_weights++)
+	{
+		for (unsigned int y_weights = 2; y_weights <= y_texels; y_weights++)
+		{
+			for (unsigned int z_weights = 2; z_weights <= z_texels; z_weights++)
+			{
+				unsigned int weight_count = x_weights * y_weights * z_weights;
+				if (weight_count > BLOCK_MAX_WEIGHTS)
+				{
+					continue;
+				}
+
+				decimation_info& di = bsd.decimation_tables[decimation_mode_count];
+				decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights] = decimation_mode_count;
+				init_decimation_info_3d(x_texels, y_texels, z_texels, x_weights, y_weights, z_weights, di, *wb);
+
+				int maxprec_1plane = -1;
+				int maxprec_2planes = -1;
+				for (unsigned int i = 0; i < 12; i++)
+				{
+					unsigned int bits_1plane = get_ise_sequence_bitcount(weight_count, (quant_method)i);
+					if (bits_1plane >= BLOCK_MIN_WEIGHT_BITS && bits_1plane <= BLOCK_MAX_WEIGHT_BITS)
+					{
+						maxprec_1plane = i;
+					}
+
+					unsigned int bits_2planes = get_ise_sequence_bitcount(2 * weight_count, (quant_method)i);
+					if (bits_2planes >= BLOCK_MIN_WEIGHT_BITS && bits_2planes <= BLOCK_MAX_WEIGHT_BITS)
+					{
+						maxprec_2planes = i;
+					}
+				}
+
+				if ((2 * weight_count) > BLOCK_MAX_WEIGHTS)
+				{
+					maxprec_2planes = -1;
+				}
+
+				bsd.decimation_modes[decimation_mode_count].maxprec_1plane = static_cast<int8_t>(maxprec_1plane);
+				bsd.decimation_modes[decimation_mode_count].maxprec_2planes = static_cast<int8_t>(maxprec_2planes);
+				bsd.decimation_modes[decimation_mode_count].percentile_hit = false;
+				decimation_mode_count++;
+			}
+		}
+	}
+
+	// Ensure the end of the array contains valid data (should never get read)
+	for (unsigned int i = decimation_mode_count; i < WEIGHTS_MAX_DECIMATION_MODES; i++)
+	{
+		bsd.decimation_modes[i].maxprec_1plane = -1;
+		bsd.decimation_modes[i].maxprec_2planes = -1;
+		bsd.decimation_modes[i].percentile_hit = false;
+	}
+
+	bsd.decimation_mode_count = decimation_mode_count;
+
+	// Construct the list of block formats
+	unsigned int packed_idx = 0;
+	for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
+	{
+		unsigned int x_weights;
+		unsigned int y_weights;
+		unsigned int z_weights;
+		bool is_dual_plane;
+		unsigned int quant_mode;
+		unsigned int weight_bits;
+		bool permit_encode = true;
+
+		if (decode_block_mode_3d(i, x_weights, y_weights, z_weights, is_dual_plane, quant_mode, weight_bits))
+		{
+			if (x_weights > x_texels || y_weights > y_texels || z_weights > z_texels)
+			{
+				permit_encode = false;
+			}
+		}
+		else
+		{
+			permit_encode = false;
+		}
+
+		if (!permit_encode)
+		{
+			bsd.block_mode_packed_index[i] = BLOCK_BAD_BLOCK_MODE;
+			continue;
+		}
+
+		int decimation_mode = decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights];
+		bsd.block_modes[packed_idx].decimation_mode = static_cast<uint8_t>(decimation_mode);
+		bsd.block_modes[packed_idx].quant_mode = static_cast<uint8_t>(quant_mode);
+		bsd.block_modes[packed_idx].weight_bits = static_cast<uint8_t>(weight_bits);
+		bsd.block_modes[packed_idx].is_dual_plane = static_cast<uint8_t>(is_dual_plane);
+		bsd.block_modes[packed_idx].mode_index = static_cast<uint16_t>(i);
+
+		// No percentile table, so enable everything all the time ...
+		bsd.block_modes[packed_idx].percentile_hit = true;
+		bsd.decimation_modes[decimation_mode].percentile_hit = true;
+
+		bsd.block_mode_packed_index[i] = static_cast<uint16_t>(packed_idx);
+
+		packed_idx++;
+	}
+
+	bsd.block_mode_count = packed_idx;
+
+	// These are never used = the MODE0 fast path is skipped for 3D blocks
+	bsd.always_block_mode_count = 0;
+	bsd.always_decimation_mode_count = 0;
+
+	// Determine the texels to use for kmeans clustering.
+	assign_kmeans_texels(bsd);
+
+	delete wb;
+}
+
+/* See header for documentation. */
+void init_block_size_descriptor(
+	unsigned int x_texels,
+	unsigned int y_texels,
+	unsigned int z_texels,
+	bool can_omit_modes,
+	unsigned int partition_count_cutoff,
+	float mode_cutoff,
+	block_size_descriptor& bsd
+) {
+	if (z_texels > 1)
+	{
+		construct_block_size_descriptor_3d(x_texels, y_texels, z_texels, bsd);
+	}
+	else
+	{
+		construct_block_size_descriptor_2d(x_texels, y_texels, can_omit_modes, mode_cutoff, bsd);
+	}
+
+	init_partition_tables(bsd, can_omit_modes, partition_count_cutoff);
+}
diff --git a/libkram/astc-encoder/astcenc_block_sizes2.cpp b/libkram/astc-encoder/astcenc_block_sizes2.cpp
deleted file mode 100644
index b37892d8..00000000
--- a/libkram/astc-encoder/astcenc_block_sizes2.cpp
+++ /dev/null
@@ -1,997 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// ----------------------------------------------------------------------------
-// Copyright 2011-2021 Arm Limited
-//
-// Licensed under the Apache License, Version 2.0 (the "License"); you may not
-// use this file except in compliance with the License. You may obtain a copy
-// of the License at:
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-// License for the specific language governing permissions and limitations
-// under the License.
-// ----------------------------------------------------------------------------
-
-/**
- * @brief Functions to generate block size descriptor and decimation tables.
- */
-
-#include "astcenc_internal.h"
-
-// return 0 on invalid mode, 1 on valid mode.
-static int decode_block_mode_2d(
-	int blockmode,
-	int* Nval,
-	int* Mval,
-	int* dual_weight_plane,
-	int* quant_mode
-) {
-	int base_quant_mode = (blockmode >> 4) & 1;
-	int H = (blockmode >> 9) & 1;
-	int D = (blockmode >> 10) & 1;
-
-	int A = (blockmode >> 5) & 0x3;
-
-	int N = 0, M = 0;
-
-	if ((blockmode & 3) != 0)
-	{
-		base_quant_mode |= (blockmode & 3) << 1;
-		int B = (blockmode >> 7) & 3;
-		switch ((blockmode >> 2) & 3)
-		{
-		case 0:
-			N = B + 4;
-			M = A + 2;
-			break;
-		case 1:
-			N = B + 8;
-			M = A + 2;
-			break;
-		case 2:
-			N = A + 2;
-			M = B + 8;
-			break;
-		case 3:
-			B &= 1;
-			if (blockmode & 0x100)
-			{
-				N = B + 2;
-				M = A + 2;
-			}
-			else
-			{
-				N = A + 2;
-				M = B + 6;
-			}
-			break;
-		}
-	}
-	else
-	{
-		base_quant_mode |= ((blockmode >> 2) & 3) << 1;
-		if (((blockmode >> 2) & 3) == 0)
-		{
-			return 0;
-		}
-
-		int B = (blockmode >> 9) & 3;
-		switch ((blockmode >> 7) & 3)
-		{
-		case 0:
-			N = 12;
-			M = A + 2;
-			break;
-		case 1:
-			N = A + 2;
-			M = 12;
-			break;
-		case 2:
-			N = A + 6;
-			M = B + 6;
-			D = 0;
-			H = 0;
-			break;
-		case 3:
-			switch ((blockmode >> 5) & 3)
-			{
-			case 0:
-				N = 6;
-				M = 10;
-				break;
-			case 1:
-				N = 10;
-				M = 6;
-				break;
-			case 2:
-			case 3:
-				return 0;
-			}
-			break;
-		}
-	}
-
-	int weight_count = N * M * (D + 1);
-	int qmode = (base_quant_mode - 2) + 6 * H;
-
-	int weightbits = get_ise_sequence_bitcount(weight_count, (quant_method)qmode);
-	if (weight_count > MAX_WEIGHTS_PER_BLOCK ||
-	    weightbits < MIN_WEIGHT_BITS_PER_BLOCK ||
-	    weightbits > MAX_WEIGHT_BITS_PER_BLOCK)
-	{
-		return 0;
-	}
-
-	*Nval = N;
-	*Mval = M;
-	*dual_weight_plane = D;
-	*quant_mode = qmode;
-	return 1;
-}
-
-static int decode_block_mode_3d(
-	int blockmode,
-	int* Nval,
-	int* Mval,
-	int* Qval,
-	int* dual_weight_plane,
-	int* quant_mode
-) {
-	int base_quant_mode = (blockmode >> 4) & 1;
-	int H = (blockmode >> 9) & 1;
-	int D = (blockmode >> 10) & 1;
-
-	int A = (blockmode >> 5) & 0x3;
-
-	int N = 0, M = 0, Q = 0;
-
-	if ((blockmode & 3) != 0)
-	{
-		base_quant_mode |= (blockmode & 3) << 1;
-		int B = (blockmode >> 7) & 3;
-		int C = (blockmode >> 2) & 0x3;
-		N = A + 2;
-		M = B + 2;
-		Q = C + 2;
-	}
-	else
-	{
-		base_quant_mode |= ((blockmode >> 2) & 3) << 1;
-		if (((blockmode >> 2) & 3) == 0)
-		{
-			return 0;
-		}
-
-		int B = (blockmode >> 9) & 3;
-		if (((blockmode >> 7) & 3) != 3)
-		{
-			D = 0;
-			H = 0;
-		}
-		switch ((blockmode >> 7) & 3)
-		{
-		case 0:
-			N = 6;
-			M = B + 2;
-			Q = A + 2;
-			break;
-		case 1:
-			N = A + 2;
-			M = 6;
-			Q = B + 2;
-			break;
-		case 2:
-			N = A + 2;
-			M = B + 2;
-			Q = 6;
-			break;
-		case 3:
-			N = 2;
-			M = 2;
-			Q = 2;
-			switch ((blockmode >> 5) & 3)
-			{
-			case 0:
-				N = 6;
-				break;
-			case 1:
-				M = 6;
-				break;
-			case 2:
-				Q = 6;
-				break;
-			case 3:
-				return 0;
-			}
-			break;
-		}
-	}
-
-	int weight_count = N * M * Q * (D + 1);
-	int qmode = (base_quant_mode - 2) + 6 * H;
-
-	int weightbits = get_ise_sequence_bitcount(weight_count, (quant_method)qmode);
-	if (weight_count > MAX_WEIGHTS_PER_BLOCK ||
-	    weightbits < MIN_WEIGHT_BITS_PER_BLOCK ||
-	    weightbits > MAX_WEIGHT_BITS_PER_BLOCK)
-	{
-		return 0;
-	}
-
-	*Nval = N;
-	*Mval = M;
-	*Qval = Q;
-	*dual_weight_plane = D;
-	*quant_mode = qmode;
-	return 1;
-}
-
-static void initialize_decimation_table_2d(
-	int xdim,
-	int ydim,
-	int x_weights,
-	int y_weights,
-	decimation_table* dt
-) {
-	int texels_per_block = xdim * ydim;
-	int weights_per_block = x_weights * y_weights;
-
-	uint8_t weightcount_of_texel[MAX_TEXELS_PER_BLOCK];
-	uint8_t grid_weights_of_texel[MAX_TEXELS_PER_BLOCK][4];
-	uint8_t weights_of_texel[MAX_TEXELS_PER_BLOCK][4];
-
-	uint8_t texelcount_of_weight[MAX_WEIGHTS_PER_BLOCK];
-	uint8_t texels_of_weight[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK];
-	int texelweights_of_weight[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK];
-
-	for (int i = 0; i < weights_per_block; i++)
-	{
-		texelcount_of_weight[i] = 0;
-	}
-
-	for (int i = 0; i < texels_per_block; i++)
-	{
-		weightcount_of_texel[i] = 0;
-	}
-
-	for (int y = 0; y < ydim; y++)
-	{
-		for (int x = 0; x < xdim; x++)
-		{
-			int texel = y * xdim + x;
-
-			int x_weight = (((1024 + xdim / 2) / (xdim - 1)) * x * (x_weights - 1) + 32) >> 6;
-			int y_weight = (((1024 + ydim / 2) / (ydim - 1)) * y * (y_weights - 1) + 32) >> 6;
-
-			int x_weight_frac = x_weight & 0xF;
-			int y_weight_frac = y_weight & 0xF;
-			int x_weight_int = x_weight >> 4;
-			int y_weight_int = y_weight >> 4;
-			int qweight[4];
-			qweight[0] = x_weight_int + y_weight_int * x_weights;
-			qweight[1] = qweight[0] + 1;
-			qweight[2] = qweight[0] + x_weights;
-			qweight[3] = qweight[2] + 1;
-
-			// truncated-precision bilinear interpolation.
-			int prod = x_weight_frac * y_weight_frac;
-
-			int weight[4];
-			weight[3] = (prod + 8) >> 4;
-			weight[1] = x_weight_frac - weight[3];
-			weight[2] = y_weight_frac - weight[3];
-			weight[0] = 16 - x_weight_frac - y_weight_frac + weight[3];
-
-			for (int i = 0; i < 4; i++)
-			{
-				if (weight[i] != 0)
-				{
-					grid_weights_of_texel[texel][weightcount_of_texel[texel]] = qweight[i];
-					weights_of_texel[texel][weightcount_of_texel[texel]] = weight[i];
-					weightcount_of_texel[texel]++;
-					texels_of_weight[qweight[i]][texelcount_of_weight[qweight[i]]] = texel;
-					texelweights_of_weight[qweight[i]][texelcount_of_weight[qweight[i]]] = weight[i];
-					texelcount_of_weight[qweight[i]]++;
-				}
-			}
-		}
-	}
-
-	for (int i = 0; i < texels_per_block; i++)
-	{
-		dt->texel_weight_count[i] = weightcount_of_texel[i];
-
-		// Init all 4 entries so we can rely on zeros for vectorization
-		for (int j = 0; j < 4; j++)
-		{
-			dt->texel_weights_int_t4[i][j] = 0;
-			dt->texel_weights_float_t4[i][j] = 0.0f;
-			dt->texel_weights_t4[i][j] = 0;
-
-			dt->texel_weights_float_4t[j][i] = 0.0f;
-			dt->texel_weights_4t[j][i] = 0;
-
-		}
-
-		for (int j = 0; j < weightcount_of_texel[i]; j++)
-		{
-			dt->texel_weights_int_t4[i][j] = weights_of_texel[i][j];
-			dt->texel_weights_float_t4[i][j] = ((float)weights_of_texel[i][j]) * (1.0f / TEXEL_WEIGHT_SUM);
-			dt->texel_weights_t4[i][j] = grid_weights_of_texel[i][j];
-
-			dt->texel_weights_float_4t[j][i] = ((float)weights_of_texel[i][j]) * (1.0f / TEXEL_WEIGHT_SUM);
-			dt->texel_weights_4t[j][i] = grid_weights_of_texel[i][j];
-		}
-	}
-
-	for (int i = 0; i < weights_per_block; i++)
-	{
-		dt->weight_texel_count[i] = texelcount_of_weight[i];
-
-		for (int j = 0; j < texelcount_of_weight[i]; j++)
-		{
-			uint8_t texel = texels_of_weight[i][j];
-
-			dt->weights_int[i][j] = texelweights_of_weight[i][j];
-
-			// Create transposed versions of these for better vectorization
-			dt->weight_texel[j][i] = texel;
-			dt->weights_flt[j][i] = (float)texelweights_of_weight[i][j];
-
-			// perform a layer of array unrolling. An aspect of this unrolling is that
-			// one of the texel-weight indexes is an identity-mapped index; we will use this
-			// fact to reorder the indexes so that the first one is the identity index.
-			int swap_idx = -1;
-			for (int k = 0; k < 4; k++)
-			{
-				uint8_t dttw = dt->texel_weights_t4[texel][k];
-				float dttwf = dt->texel_weights_float_t4[texel][k];
-				if (dttw == i && dttwf != 0.0f)
-				{
-					swap_idx = k;
-				}
-				dt->texel_weights_texel[i][j][k] = dttw;
-				dt->texel_weights_float_texel[i][j][k] = dttwf;
-			}
-
-			if (swap_idx != 0)
-			{
-				uint8_t vi = dt->texel_weights_texel[i][j][0];
-				float vf = dt->texel_weights_float_texel[i][j][0];
-				dt->texel_weights_texel[i][j][0] = dt->texel_weights_texel[i][j][swap_idx];
-				dt->texel_weights_float_texel[i][j][0] = dt->texel_weights_float_texel[i][j][swap_idx];
-				dt->texel_weights_texel[i][j][swap_idx] = vi;
-				dt->texel_weights_float_texel[i][j][swap_idx] = vf;
-			}
-		}
-	}
-
-	dt->texel_count = texels_per_block;
-	dt->weight_count = weights_per_block;
-	dt->weight_x = x_weights;
-	dt->weight_y = y_weights;
-	dt->weight_z = 1;
-}
-
-static void initialize_decimation_table_3d(
-	int xdim,
-	int ydim,
-	int zdim,
-	int x_weights,
-	int y_weights,
-	int z_weights,
-	decimation_table* dt
-) {
-	int texels_per_block = xdim * ydim * zdim;
-	int weights_per_block = x_weights * y_weights * z_weights;
-
-	uint8_t weightcount_of_texel[MAX_TEXELS_PER_BLOCK];
-	uint8_t grid_weights_of_texel[MAX_TEXELS_PER_BLOCK][4];
-	uint8_t weights_of_texel[MAX_TEXELS_PER_BLOCK][4];
-
-	uint8_t texelcount_of_weight[MAX_WEIGHTS_PER_BLOCK];
-	uint8_t texels_of_weight[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK];
-	int texelweights_of_weight[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK];
-
-	for (int i = 0; i < weights_per_block; i++)
-	{
-		texelcount_of_weight[i] = 0;
-	}
-
-	for (int i = 0; i < texels_per_block; i++)
-	{
-		weightcount_of_texel[i] = 0;
-	}
-
-	for (int z = 0; z < zdim; z++)
-	{
-		for (int y = 0; y < ydim; y++)
-		{
-			for (int x = 0; x < xdim; x++)
-			{
-				int texel = (z * ydim + y) * xdim + x;
-
-				int x_weight = (((1024 + xdim / 2) / (xdim - 1)) * x * (x_weights - 1) + 32) >> 6;
-				int y_weight = (((1024 + ydim / 2) / (ydim - 1)) * y * (y_weights - 1) + 32) >> 6;
-				int z_weight = (((1024 + zdim / 2) / (zdim - 1)) * z * (z_weights - 1) + 32) >> 6;
-
-				int x_weight_frac = x_weight & 0xF;
-				int y_weight_frac = y_weight & 0xF;
-				int z_weight_frac = z_weight & 0xF;
-				int x_weight_int = x_weight >> 4;
-				int y_weight_int = y_weight >> 4;
-				int z_weight_int = z_weight >> 4;
-				int qweight[4];
-				int weight[4];
-				qweight[0] = (z_weight_int * y_weights + y_weight_int) * x_weights + x_weight_int;
-				qweight[3] = ((z_weight_int + 1) * y_weights + (y_weight_int + 1)) * x_weights + (x_weight_int + 1);
-
-				// simplex interpolation
-				int fs = x_weight_frac;
-				int ft = y_weight_frac;
-				int fp = z_weight_frac;
-
-				int cas = ((fs > ft) << 2) + ((ft > fp) << 1) + ((fs > fp));
-				int N = x_weights;
-				int NM = x_weights * y_weights;
-
-				int s1, s2, w0, w1, w2, w3;
-				switch (cas)
-				{
-				case 7:
-					s1 = 1;
-					s2 = N;
-					w0 = 16 - fs;
-					w1 = fs - ft;
-					w2 = ft - fp;
-					w3 = fp;
-					break;
-				case 3:
-					s1 = N;
-					s2 = 1;
-					w0 = 16 - ft;
-					w1 = ft - fs;
-					w2 = fs - fp;
-					w3 = fp;
-					break;
-				case 5:
-					s1 = 1;
-					s2 = NM;
-					w0 = 16 - fs;
-					w1 = fs - fp;
-					w2 = fp - ft;
-					w3 = ft;
-					break;
-				case 4:
-					s1 = NM;
-					s2 = 1;
-					w0 = 16 - fp;
-					w1 = fp - fs;
-					w2 = fs - ft;
-					w3 = ft;
-					break;
-				case 2:
-					s1 = N;
-					s2 = NM;
-					w0 = 16 - ft;
-					w1 = ft - fp;
-					w2 = fp - fs;
-					w3 = fs;
-					break;
-				case 0:
-					s1 = NM;
-					s2 = N;
-					w0 = 16 - fp;
-					w1 = fp - ft;
-					w2 = ft - fs;
-					w3 = fs;
-					break;
-				default:
-					s1 = NM;
-					s2 = N;
-					w0 = 16 - fp;
-					w1 = fp - ft;
-					w2 = ft - fs;
-					w3 = fs;
-					break;
-				}
-
-				qweight[1] = qweight[0] + s1;
-				qweight[2] = qweight[1] + s2;
-				weight[0] = w0;
-				weight[1] = w1;
-				weight[2] = w2;
-				weight[3] = w3;
-
-				for (int i = 0; i < 4; i++)
-				{
-					if (weight[i] != 0)
-					{
-						grid_weights_of_texel[texel][weightcount_of_texel[texel]] = qweight[i];
-						weights_of_texel[texel][weightcount_of_texel[texel]] = weight[i];
-						weightcount_of_texel[texel]++;
-						texels_of_weight[qweight[i]][texelcount_of_weight[qweight[i]]] = texel;
-						texelweights_of_weight[qweight[i]][texelcount_of_weight[qweight[i]]] = weight[i];
-						texelcount_of_weight[qweight[i]]++;
-					}
-				}
-			}
-		}
-	}
-
-	for (int i = 0; i < texels_per_block; i++)
-	{
-		dt->texel_weight_count[i] = weightcount_of_texel[i];
-
-		// Init all 4 entries so we can rely on zeros for vectorization
-		for (int j = 0; j < 4; j++)
-		{
-			dt->texel_weights_int_t4[i][j] = 0;
-			dt->texel_weights_float_t4[i][j] = 0.0f;
-			dt->texel_weights_t4[i][j] = 0;
-
-			dt->texel_weights_float_4t[j][i] = 0.0f;
-			dt->texel_weights_4t[j][i] = 0;
-		}
-
-		for (int j = 0; j < weightcount_of_texel[i]; j++)
-		{
-			dt->texel_weights_int_t4[i][j] = weights_of_texel[i][j];
-			dt->texel_weights_float_t4[i][j] = ((float)weights_of_texel[i][j]) * (1.0f / TEXEL_WEIGHT_SUM);
-			dt->texel_weights_t4[i][j] = grid_weights_of_texel[i][j];
-
-			dt->texel_weights_float_4t[j][i] = ((float)weights_of_texel[i][j]) * (1.0f / TEXEL_WEIGHT_SUM);
-			dt->texel_weights_4t[j][i] = grid_weights_of_texel[i][j];
-		}
-	}
-
-	for (int i = 0; i < weights_per_block; i++)
-	{
-		dt->weight_texel_count[i] = texelcount_of_weight[i];
-		for (int j = 0; j < texelcount_of_weight[i]; j++)
-		{
-			int texel = texels_of_weight[i][j];
-
-			dt->weights_int[i][j] = texelweights_of_weight[i][j];
-
-			// Create transposed versions of these for better vectorization
-			dt->weight_texel[j][i] = texel;
-			dt->weights_flt[j][i] = (float)texelweights_of_weight[i][j];
-
-			// perform a layer of array unrolling. An aspect of this unrolling is that
-			// one of the texel-weight indexes is an identity-mapped index; we will use this
-			// fact to reorder the indexes so that the first one is the identity index.
-			int swap_idx = -1;
-			for (int k = 0; k < 4; k++)
-			{
-				uint8_t dttw = dt->texel_weights_t4[texel][k];
-				float dttwf = dt->texel_weights_float_t4[texel][k];
-				if (dttw == i && dttwf != 0.0f)
-				{
-					swap_idx = k;
-				}
-				dt->texel_weights_texel[i][j][k] = dttw;
-				dt->texel_weights_float_texel[i][j][k] = dttwf;
-			}
-
-			if (swap_idx != 0)
-			{
-				uint8_t vi = dt->texel_weights_texel[i][j][0];
-				float vf = dt->texel_weights_float_texel[i][j][0];
-				dt->texel_weights_texel[i][j][0] = dt->texel_weights_texel[i][j][swap_idx];
-				dt->texel_weights_float_texel[i][j][0] = dt->texel_weights_float_texel[i][j][swap_idx];
-				dt->texel_weights_texel[i][j][swap_idx] = vi;
-				dt->texel_weights_float_texel[i][j][swap_idx] = vf;
-			}
-		}
-	}
-
-	dt->texel_count = texels_per_block;
-	dt->weight_count = weights_per_block;
-	dt->weight_x = x_weights;
-	dt->weight_y = y_weights;
-	dt->weight_z = z_weights;
-}
-
-/**
- * @brief Assign the texels to use for kmeans clustering.
- *
- * The max limit is MAX_KMEANS_TEXELS; above this a random selection is used.
- * The @c bsd.texel_count is an input and must be populated beforehand.
- *
- * @param bsd   The block size descriptor to populate.
- */
-static void assign_kmeans_texels(
-	block_size_descriptor& bsd
-) {
-	// Use all texels for kmeans on a small block
-	if (bsd.texel_count <= MAX_KMEANS_TEXELS)
-	{
-		for (int i = 0; i < bsd.texel_count; i++)
-		{
-			bsd.kmeans_texels[i] = i;
-		}
-
-		bsd.kmeans_texel_count = bsd.texel_count;
-		return;
-	}
-
-	// Select a random subset of texels for kmeans on a large block
-	uint64_t rng_state[2];
-	astc::rand_init(rng_state);
-
-	// Pick 64 random texels for use with bitmap partitioning.
-	bool seen[MAX_TEXELS_PER_BLOCK];
-	for (int i = 0; i < bsd.texel_count; i++)
-	{
-		seen[i] = false;
-	}
-
-	// Assign 64 random indices, retrying if we see repeats
-	int arr_elements_set = 0;
-	while (arr_elements_set < MAX_KMEANS_TEXELS)
-	{
-		unsigned int idx = (unsigned int)astc::rand(rng_state);
-		idx %= bsd.texel_count;
-		if (!seen[idx])
-		{
-			bsd.kmeans_texels[arr_elements_set++] = idx;
-			seen[idx] = true;
-		}
-	}
-
-	bsd.kmeans_texel_count = MAX_KMEANS_TEXELS;
-}
-
-/**
- * @brief Allocate a single 2D decimation table entry.
- *
- * @param x_dim       The block X dimension.
- * @param y_dim       The block Y dimension.
- * @param x_weights   The weight grid X dimension.
- * @param y_weights   The weight grid Y dimension.
- *
- * @return The new entry's index in the compacted decimation_table array.
- */
-static int construct_dt_entry_2d(
-	int x_dim,
-	int y_dim,
-	int x_weights,
-	int y_weights,
-	block_size_descriptor& bsd
-) {
-	int dm_index = bsd.decimation_mode_count;
-	int weight_count = x_weights * y_weights;
-	assert(weight_count <= MAX_WEIGHTS_PER_BLOCK);
-
-	bool try_2planes = (2 * weight_count) <= MAX_WEIGHTS_PER_BLOCK;
-
-	decimation_table *dt = aligned_malloc<decimation_table>(sizeof(decimation_table), ASTCENC_VECALIGN);
-	initialize_decimation_table_2d(x_dim, y_dim, x_weights, y_weights, dt);
-
-	int maxprec_1plane = -1;
-	int maxprec_2planes = -1;
-	for (int i = 0; i < 12; i++)
-	{
-		int bits_1plane = get_ise_sequence_bitcount(weight_count, (quant_method)i);
-		if (bits_1plane >= MIN_WEIGHT_BITS_PER_BLOCK && bits_1plane <= MAX_WEIGHT_BITS_PER_BLOCK)
-		{
-			maxprec_1plane = i;
-		}
-
-		if (try_2planes)
-		{
-			int bits_2planes = get_ise_sequence_bitcount(2 * weight_count, (quant_method)i);
-			if (bits_2planes >= MIN_WEIGHT_BITS_PER_BLOCK && bits_2planes <= MAX_WEIGHT_BITS_PER_BLOCK)
-			{
-				maxprec_2planes = i;
-			}
-		}
-	}
-
-	// At least one of the two should be valid ...
-	assert(maxprec_1plane >= 0 || maxprec_2planes >= 0);
-	bsd.decimation_modes[dm_index].maxprec_1plane = maxprec_1plane;
-	bsd.decimation_modes[dm_index].maxprec_2planes = maxprec_2planes;
-	bsd.decimation_modes[dm_index].percentile_hit = false;
-	bsd.decimation_modes[dm_index].percentile_always = false;
-	bsd.decimation_tables[dm_index] = dt;
-
-	bsd.decimation_mode_count++;
-	return dm_index;
-}
-
-/**
- * @brief Allocate block modes and decimation tables for a single BSD.
- *
- * @param x_dim            The block X dimension.
- * @param y_dim            The block Y dimension.
- * @param can_omit_modes   True if we are allowed to discard modes that
- *                         compression won't use, even if they are legal.
- * @param mode_cutoff      Block mode percentile cut off, between [0,1].
- * @param bsd              The BSD to populate.
- */
-static void construct_block_size_descriptor_2d(
-	int x_dim,
-	int y_dim,
-	bool can_omit_modes,
-	float mode_cutoff,
-	block_size_descriptor& bsd
-) {
-	// Store a remap table for storing packed decimation modes.
-	// Indexing uses [Y * 16 + X] and max block size for each axis is 12.
-	static const int MAX_DMI = 12 * 16 + 12;
-	int decimation_mode_index[MAX_DMI];
-
-	bsd.xdim = x_dim;
-	bsd.ydim = y_dim;
-	bsd.zdim = 1;
-	bsd.texel_count = x_dim * y_dim;
-	bsd.decimation_mode_count = 0;
-
-	for (int i = 0; i < MAX_DMI; i++)
-	{
-		decimation_mode_index[i] = -1;
-	}
-
-	// Gather all the decimation grids that can be used with the current block.
-#if !defined(ASTCENC_DECOMPRESS_ONLY)
-	const float *percentiles = get_2d_percentile_table(x_dim, y_dim);
-#else
-	// Unused in decompress-only builds
-	(void)can_omit_modes;
-	(void)mode_cutoff;
-#endif
-
-	// Construct the list of block formats referencing the decimation tables
-	int packed_idx = 0;
-	for (int i = 0; i < MAX_WEIGHT_MODES; i++)
-	{
-		int x_weights, y_weights;
-		int is_dual_plane;
-		int quant_mode;
-
-		bool valid = decode_block_mode_2d(i, &x_weights, &y_weights, &is_dual_plane, &quant_mode);
-
-#if !defined(ASTCENC_DECOMPRESS_ONLY)
-		float percentile = percentiles[i];
-		bool selected = (percentile <= mode_cutoff) || !can_omit_modes;
-#else
-		// Decompressor builds can never discard modes, as we cannot make any
-		// assumptions about the modes the original compressor used
-		bool selected = true;
-#endif
-
-		// ASSUMPTION: No compressor will use more weights in a dimension than
-		// the block has actual texels, because it wastes bits. Decompression
-		// of an image which violates this assumption will fail, even though it
-		// is technically permitted by the specification.
-
-		// Skip modes that are invalid, too large, or not selected by heuristic
-		if (!valid || !selected || (x_weights > x_dim) || (y_weights > y_dim))
-		{
-			bsd.block_mode_packed_index[i] = -1;
-			continue;
-		}
-
-		// Allocate and initialize the DT entry if we've not used it yet.
-		int decimation_mode = decimation_mode_index[y_weights * 16 + x_weights];
-		if (decimation_mode == -1)
-		{
-			decimation_mode = construct_dt_entry_2d(x_dim, y_dim, x_weights, y_weights, bsd);
-			decimation_mode_index[y_weights * 16 + x_weights] = decimation_mode;
-		}
-
-#if !defined(ASTCENC_DECOMPRESS_ONLY)
-		// Flatten the block mode heuristic into some precomputed flags
-		if (percentile == 0.0f)
-		{
-			bsd.block_modes[packed_idx].percentile_always = true;
-			bsd.decimation_modes[decimation_mode].percentile_always = true;
-
-			bsd.block_modes[packed_idx].percentile_hit = true;
-			bsd.decimation_modes[decimation_mode].percentile_hit = true;
-		}
-		else if (percentile <= mode_cutoff)
-		{
-			bsd.block_modes[packed_idx].percentile_always = false;
-
-			bsd.block_modes[packed_idx].percentile_hit = true;
-			bsd.decimation_modes[decimation_mode].percentile_hit = true;
-		}
-		else
-		{
-			bsd.block_modes[packed_idx].percentile_always = false;
-			bsd.block_modes[packed_idx].percentile_hit = false;
-		}
-#endif
-
-		bsd.block_modes[packed_idx].decimation_mode = decimation_mode;
-		bsd.block_modes[packed_idx].quant_mode = quant_mode;
-		bsd.block_modes[packed_idx].is_dual_plane = is_dual_plane ? 1 : 0;
-		bsd.block_modes[packed_idx].mode_index = i;
-		bsd.block_mode_packed_index[i] = packed_idx;
-		++packed_idx;
-	}
-
-	bsd.block_mode_count = packed_idx;
-
-#if !defined(ASTCENC_DECOMPRESS_ONLY)
-	delete[] percentiles;
-#endif
-
-	// Ensure the end of the array contains valid data (should never get read)
-	for (int i = bsd.decimation_mode_count; i < MAX_DECIMATION_MODES; i++)
-	{
-		bsd.decimation_modes[i].maxprec_1plane = -1;
-		bsd.decimation_modes[i].maxprec_2planes = -1;
-		bsd.decimation_modes[i].percentile_hit = false;
-		bsd.decimation_modes[i].percentile_always = false;
-		bsd.decimation_tables[i] = nullptr;
-	}
-
-	// Determine the texels to use for kmeans clustering.
-	assign_kmeans_texels(bsd);
-}
-
-static void construct_block_size_descriptor_3d(
-	int xdim,
-	int ydim,
-	int zdim,
-	block_size_descriptor* bsd
-) {
-	int decimation_mode_index[512];	// for each of the 512 entries in the decim_table_array, its index
-	int decimation_mode_count = 0;
-
-	bsd->xdim = xdim;
-	bsd->ydim = ydim;
-	bsd->zdim = zdim;
-	bsd->texel_count = xdim * ydim * zdim;
-
-	for (int i = 0; i < 512; i++)
-	{
-		decimation_mode_index[i] = -1;
-	}
-
-	// gather all the infill-modes that can be used with the current block size
-	for (int x_weights = 2; x_weights <= xdim; x_weights++)
-	{
-		for (int y_weights = 2; y_weights <= ydim; y_weights++)
-		{
-			for (int z_weights = 2; z_weights <= zdim; z_weights++)
-			{
-				int weight_count = x_weights * y_weights * z_weights;
-				if (weight_count > MAX_WEIGHTS_PER_BLOCK)
-				{
-					continue;
-				}
-
-				decimation_table *dt = aligned_malloc<decimation_table>(sizeof(decimation_table), ASTCENC_VECALIGN);
-				decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights] = decimation_mode_count;
-				initialize_decimation_table_3d(xdim, ydim, zdim, x_weights, y_weights, z_weights, dt);
-
-				int maxprec_1plane = -1;
-				int maxprec_2planes = -1;
-				for (int i = 0; i < 12; i++)
-				{
-					int bits_1plane = get_ise_sequence_bitcount(weight_count, (quant_method)i);
-					int bits_2planes = get_ise_sequence_bitcount(2 * weight_count, (quant_method)i);
-
-					if (bits_1plane >= MIN_WEIGHT_BITS_PER_BLOCK && bits_1plane <= MAX_WEIGHT_BITS_PER_BLOCK)
-					{
-						maxprec_1plane = i;
-					}
-
-					if (bits_2planes >= MIN_WEIGHT_BITS_PER_BLOCK && bits_2planes <= MAX_WEIGHT_BITS_PER_BLOCK)
-					{
-						maxprec_2planes = i;
-					}
-				}
-
-				if ((2 * weight_count) > MAX_WEIGHTS_PER_BLOCK)
-				{
-					maxprec_2planes = -1;
-				}
-
-				bsd->decimation_modes[decimation_mode_count].maxprec_1plane = maxprec_1plane;
-				bsd->decimation_modes[decimation_mode_count].maxprec_2planes = maxprec_2planes;
-				bsd->decimation_modes[decimation_mode_count].percentile_hit = false;
-				bsd->decimation_modes[decimation_mode_count].percentile_always = false;
-				bsd->decimation_tables[decimation_mode_count] = dt;
-				decimation_mode_count++;
-			}
-		}
-	}
-
-	for (int i = decimation_mode_count; i < MAX_DECIMATION_MODES; i++)
-	{
-		bsd->decimation_modes[i].maxprec_1plane = -1;
-		bsd->decimation_modes[i].maxprec_2planes = -1;
-		bsd->decimation_modes[i].percentile_hit = false;
-		bsd->decimation_modes[i].percentile_always = false;
-		bsd->decimation_tables[i] = nullptr;
-	}
-
-	bsd->decimation_mode_count = decimation_mode_count;
-
-	// then construct the list of block formats
-	int packed_idx = 0;
-	for (int i = 0; i < MAX_WEIGHT_MODES; i++)
-	{
-		int x_weights, y_weights, z_weights;
-		int is_dual_plane;
-		int quant_mode;
-		int permit_encode = 1;
-
-		if (decode_block_mode_3d(i, &x_weights, &y_weights, &z_weights, &is_dual_plane, &quant_mode))
-		{
-			if (x_weights > xdim || y_weights > ydim || z_weights > zdim)
-			{
-				permit_encode = 0;
-			}
-		}
-		else
-		{
-			permit_encode = 0;
-		}
-
-		bsd->block_mode_packed_index[i] = -1;
-		if (!permit_encode)
-		{
-			continue;
-		}
-
-		int decimation_mode = decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights];
-		bsd->block_modes[packed_idx].decimation_mode = decimation_mode;
-		bsd->block_modes[packed_idx].quant_mode = quant_mode;
-		bsd->block_modes[packed_idx].is_dual_plane = is_dual_plane ? 1 : 0;
-		bsd->block_modes[packed_idx].mode_index = i;
-
-		// No percentile table, so enable everything all the time ...
-		bsd->block_modes[packed_idx].percentile_hit = true;
-		bsd->block_modes[packed_idx].percentile_always = true;
-		bsd->decimation_modes[decimation_mode].percentile_hit = true;
-		bsd->decimation_modes[decimation_mode].percentile_always = true;
-
-		bsd->block_mode_packed_index[i] = packed_idx;
-		++packed_idx;
-	}
-
-	bsd->block_mode_count = packed_idx;
-
-	// Determine the texels to use for kmeans clustering.
-	assign_kmeans_texels(*bsd);
-}
-
-/* Public function, see header file for detailed documentation */
-void init_block_size_descriptor(
-	int xdim,
-	int ydim,
-	int zdim,
-	bool can_omit_modes,
-	float mode_cutoff,
-	block_size_descriptor* bsd
-) {
-	if (zdim > 1)
-	{
-		construct_block_size_descriptor_3d(xdim, ydim, zdim, bsd);
-	}
-	else
-	{
-		construct_block_size_descriptor_2d(xdim, ydim, can_omit_modes, mode_cutoff, *bsd);
-	}
-
-	init_partition_tables(bsd);
-}
-
-void term_block_size_descriptor(
-	block_size_descriptor* bsd
-) {
-	for (int i = 0; i < bsd->decimation_mode_count; i++)
-	{
-		aligned_free<const decimation_table>(bsd->decimation_tables[i]);
-	}
-}
diff --git a/libkram/astc-encoder/astcenc_color_quantize.cpp b/libkram/astc-encoder/astcenc_color_quantize.cpp
index b3592657..f067c4d5 100644
--- a/libkram/astc-encoder/astcenc_color_quantize.cpp
+++ b/libkram/astc-encoder/astcenc_color_quantize.cpp
@@ -19,6 +19,20 @@
 
 /**
  * @brief Functions for color quantization.
+ *
+ * The design of the color quantization functionality requires the caller to use higher level error
+ * analysis to determine the base encoding that should be used. This earlier analysis will select
+ * the basic type of the endpoint that should be used:
+ *
+ *     * Mode: LDR or HDR
+ *     * Quantization level
+ *     * Channel count: L, LA, RGB, or RGBA
+ *     * Endpoint 2 type: Direct color endcode, or scaled from endpoint 1.
+ *
+ * However, this leaves a number of decisions about exactly how to pack the endpoints open. In
+ * particular we need to determine if blue contraction can be used, or/and if delta encoding can be
+ * used. If they can be applied these will allow us to maintain higher precision in the endpoints
+ * without needing additional storage.
  */
 
 #include <stdio.h>
@@ -26,28 +40,41 @@
 
 #include "astcenc_internal.h"
 
-/*
-	quantize an LDR RGB color. Since this is a fall-back encoding, we cannot actually
-	fail but must just go on until we can produce a sensible result.
-
-	Due to how this encoding works, color0 cannot be larger than color1; as such,
-	if color0 is actually larger than color1, then color0 is reduced and color1 is
-	increased until color0 is no longer larger than color1.
-*/
-static inline int cqt_lookup(
-	int quant_level,
+/**
+ * @brief Determine the quantized value given a quantization level.
+ *
+ * @param quant_level   The quantization level to use.
+ * @param value         The value to convert. This may be outside of the 0-255 range and will be
+ *                      clamped before the value is looked up.
+ *
+ * @return The encoded quantized value. These are not necessarily in the order; the compressor
+ *         scrambles the values slightly to make hardware implementation easier.
+ */
+static inline int quant_color(
+	quant_method quant_level,
 	int value
 ) {
-	// TODO: Make this unsigned and avoid the low clamp
 	value = astc::clamp(value, 0, 255);
 	return color_quant_tables[quant_level][value];
 }
 
+/**
+ * @brief Quantize an LDR RGB color.
+ *
+ * Since this is a fall-back encoding, we cannot actually fail but must produce a sensible result.
+ * For this encoding @c color0 cannot be larger than @c color1. If @c color0 is actually larger
+ * than @c color1, @c color0 is reduced and @c color1 is increased until the constraint is met.
+ *
+ * @param      color0        The input unquantized color0 endpoint.
+ * @param      color1        The input unquantized color1 endpoint.
+ * @param[out] output        The output endpoints, returned as (r0, r1, g0, g1, b0, b1).
+ * @param      quant_level   The quantization level to use.
+ */
 static void quantize_rgb(
 	vfloat4 color0,
 	vfloat4 color1,
-	int output[6],
-	int quant_level
+	uint8_t output[6],
+	quant_method quant_level
 ) {
 	float scale = 1.0f / 257.0f;
 
@@ -66,12 +93,12 @@ static void quantize_rgb(
 	int iters = 0;
 	do
 	{
-		ri0 = cqt_lookup(quant_level, astc::flt2int_rd(r0 + rgb0_addon));
-		gi0 = cqt_lookup(quant_level, astc::flt2int_rd(g0 + rgb0_addon));
-		bi0 = cqt_lookup(quant_level, astc::flt2int_rd(b0 + rgb0_addon));
-		ri1 = cqt_lookup(quant_level, astc::flt2int_rd(r1 + rgb1_addon));
-		gi1 = cqt_lookup(quant_level, astc::flt2int_rd(g1 + rgb1_addon));
-		bi1 = cqt_lookup(quant_level, astc::flt2int_rd(b1 + rgb1_addon));
+		ri0 = quant_color(quant_level, astc::flt2int_rd(r0 + rgb0_addon));
+		gi0 = quant_color(quant_level, astc::flt2int_rd(g0 + rgb0_addon));
+		bi0 = quant_color(quant_level, astc::flt2int_rd(b0 + rgb0_addon));
+		ri1 = quant_color(quant_level, astc::flt2int_rd(r1 + rgb1_addon));
+		gi1 = quant_color(quant_level, astc::flt2int_rd(g1 + rgb1_addon));
+		bi1 = quant_color(quant_level, astc::flt2int_rd(b1 + rgb1_addon));
 
 		ri0b = color_unquant_tables[quant_level][ri0];
 		gi0b = color_unquant_tables[quant_level][gi0];
@@ -85,41 +112,61 @@ static void quantize_rgb(
 		iters++;
 	} while (ri0b + gi0b + bi0b > ri1b + gi1b + bi1b);
 
-	output[0] = ri0;
-	output[1] = ri1;
-	output[2] = gi0;
-	output[3] = gi1;
-	output[4] = bi0;
-	output[5] = bi1;
+	output[0] = static_cast<uint8_t>(ri0);
+	output[1] = static_cast<uint8_t>(ri1);
+	output[2] = static_cast<uint8_t>(gi0);
+	output[3] = static_cast<uint8_t>(gi1);
+	output[4] = static_cast<uint8_t>(bi0);
+	output[5] = static_cast<uint8_t>(bi1);
 }
 
-/* quantize an RGBA color. */
+/**
+ * @brief Quantize an LDR RGBA color.
+ *
+ * Since this is a fall-back encoding, we cannot actually fail but must produce a sensible result.
+ * For this encoding @c color0.rgb cannot be larger than @c color1.rgb (this indicates blue
+ * contraction). If @c color0.rgb is actually larger than @c color1.rgb, @c color0.rgb is reduced
+ * and @c color1.rgb is increased until the constraint is met.
+ *
+ * @param      color0        The input unquantized color0 endpoint.
+ * @param      color1        The input unquantized color1 endpoint.
+ * @param[out] output        The output endpoints, returned as (r0, r1, g0, g1, b0, b1, a0, a1).
+ * @param      quant_level   The quantization level to use.
+ */
 static void quantize_rgba(
 	vfloat4 color0,
 	vfloat4 color1,
-	int output[8],
-	int quant_level
+	uint8_t output[8],
+	quant_method quant_level
 ) {
 	float scale = 1.0f / 257.0f;
 
 	float a0 = astc::clamp255f(color0.lane<3>() * scale);
 	float a1 = astc::clamp255f(color1.lane<3>() * scale);
 
-	int ai0 = color_quant_tables[quant_level][astc::flt2int_rtn(a0)];
-	int ai1 = color_quant_tables[quant_level][astc::flt2int_rtn(a1)];
-
-	output[6] = ai0;
-	output[7] = ai1;
+	output[6] = color_quant_tables[quant_level][astc::flt2int_rtn(a0)];
+	output[7] = color_quant_tables[quant_level][astc::flt2int_rtn(a1)];
 
 	quantize_rgb(color0, color1, output, quant_level);
 }
 
-/* attempt to quantize RGB endpoint values with blue-contraction. Returns 1 on failure, 0 on success. */
+/**
+ * @brief Try to quantize an LDR RGB color using blue-contraction.
+ *
+ * Blue-contraction is only usable if encoded color 1 is larger than color 0.
+ *
+ * @param      color0        The input unquantized color0 endpoint.
+ * @param      color1        The input unquantized color1 endpoint.
+ * @param[out] output        The output endpoints, returned as (r1, r0, g1, g0, b1, b0).
+ * @param      quant_level   The quantization level to use.
+ *
+ * @return Returns @c false on failure, @c true on success.
+ */
 static bool try_quantize_rgb_blue_contract(
-	vfloat4 color0,	// assumed to be the smaller color
-	vfloat4 color1,	// assumed to be the larger color
-	int output[6],
-	int quant_level
+	vfloat4 color0,
+	vfloat4 color1,
+	uint8_t output[6],
+	quant_method quant_level
 ) {
 	float scale = 1.0f / 257.0f;
 
@@ -131,8 +178,7 @@ static bool try_quantize_rgb_blue_contract(
 	float g1 = color1.lane<1>() * scale;
 	float b1 = color1.lane<2>() * scale;
 
-	// inverse blue-contraction. This can produce an overflow;
-	// just bail out immediately if this is the case.
+	// Apply inverse blue-contraction. This can produce an overflow; which means BC cannot be used.
 	r0 += (r0 - b0);
 	g0 += (g0 - b0);
 	r1 += (r1 - b1);
@@ -144,71 +190,89 @@ static bool try_quantize_rgb_blue_contract(
 		return false;
 	}
 
-	// quantize the inverse-blue-contracted color
+	// Quantize the inverse-blue-contracted color
 	int ri0 = color_quant_tables[quant_level][astc::flt2int_rtn(r0)];
 	int gi0 = color_quant_tables[quant_level][astc::flt2int_rtn(g0)];
 	int bi0 = color_quant_tables[quant_level][astc::flt2int_rtn(b0)];
+
 	int ri1 = color_quant_tables[quant_level][astc::flt2int_rtn(r1)];
 	int gi1 = color_quant_tables[quant_level][astc::flt2int_rtn(g1)];
 	int bi1 = color_quant_tables[quant_level][astc::flt2int_rtn(b1)];
 
-	// then unquantize again
+	// Then unquantize again
 	int ru0 = color_unquant_tables[quant_level][ri0];
 	int gu0 = color_unquant_tables[quant_level][gi0];
 	int bu0 = color_unquant_tables[quant_level][bi0];
+
 	int ru1 = color_unquant_tables[quant_level][ri1];
 	int gu1 = color_unquant_tables[quant_level][gi1];
 	int bu1 = color_unquant_tables[quant_level][bi1];
 
-	// if color #1 is not larger than color #0, then blue-contraction is not a valid approach.
-	// note that blue-contraction and quantization may itself change this order, which is why
-	// we must only test AFTER blue-contraction.
+	// If color #1 is not larger than color #0 then blue-contraction cannot be used. Note that
+	// blue-contraction and quantization change this order, which is why we must test aftwards.
 	if (ru1 + gu1 + bu1 <= ru0 + gu0 + bu0)
 	{
 		return false;
 	}
 
-	output[0] = ri1;
-	output[1] = ri0;
-	output[2] = gi1;
-	output[3] = gi0;
-	output[4] = bi1;
-	output[5] = bi0;
+	output[0] = static_cast<uint8_t>(ri1);
+	output[1] = static_cast<uint8_t>(ri0);
+	output[2] = static_cast<uint8_t>(gi1);
+	output[3] = static_cast<uint8_t>(gi0);
+	output[4] = static_cast<uint8_t>(bi1);
+	output[5] = static_cast<uint8_t>(bi0);
 
 	return true;
 }
 
-/* quantize an RGBA color with blue-contraction */
+/**
+ * @brief Try to quantize an LDR RGBA color using blue-contraction.
+ *
+ * Blue-contraction is only usable if encoded color 1 RGB is larger than color 0 RGB.
+ *
+ * @param      color0        The input unquantized color0 endpoint.
+ * @param      color1        The input unquantized color1 endpoint.
+ * @param[out] output        The output endpoints, returned as (r1, r0, g1, g0, b1, b0, a1, a0).
+ * @param      quant_level   The quantization level to use.
+ *
+ * @return Returns @c false on failure, @c true on success.
+ */
 static int try_quantize_rgba_blue_contract(
 	vfloat4 color0,
 	vfloat4 color1,
-	int output[8],
-	int quant_level
+	uint8_t output[8],
+	quant_method quant_level
 ) {
 	float scale = 1.0f / 257.0f;
 
 	float a0 = astc::clamp255f(color0.lane<3>() * scale);
 	float a1 = astc::clamp255f(color1.lane<3>() * scale);
 
-	output[7] = color_quant_tables[quant_level][astc::flt2int_rtn(a0)];
 	output[6] = color_quant_tables[quant_level][astc::flt2int_rtn(a1)];
+	output[7] = color_quant_tables[quant_level][astc::flt2int_rtn(a0)];
 
 	return try_quantize_rgb_blue_contract(color0, color1, output, quant_level);
 }
 
-
-// delta-encoding:
-// at decode time, we move one bit from the offset to the base and seize another bit as a sign bit;
-// we then unquantize both values as if they contain one extra bit.
-
-// if the sum of the offsets is nonnegative, then we encode a regular delta.
-
-/* attempt to quantize an RGB endpoint value with delta-encoding. */
+/**
+ * @brief Try to quantize an LDR RGB color using delta encoding.
+ *
+ * At decode time we move one bit from the offset to the base and seize another bit as a sign bit;
+ * we then unquantize both values as if they contain one extra bit. If the sum of the offsets is
+ * non-negative, then we encode a regular delta.
+ *
+ * @param      color0        The input unquantized color0 endpoint.
+ * @param      color1        The input unquantized color1 endpoint.
+ * @param[out] output        The output endpoints, returned as (r0, r1, g0, g1, b0, b1).
+ * @param      quant_level   The quantization level to use.
+ *
+ * @return Returns @c false on failure, @c true on success.
+ */
 static bool try_quantize_rgb_delta(
 	vfloat4 color0,
 	vfloat4 color1,
-	int output[6],
-	int quant_level
+	uint8_t output[6],
+	quant_method quant_level
 ) {
 	float scale = 1.0f / 257.0f;
 
@@ -220,7 +284,7 @@ static bool try_quantize_rgb_delta(
 	float g1 = astc::clamp255f(color1.lane<1>() * scale);
 	float b1 = astc::clamp255f(color1.lane<2>() * scale);
 
-	// transform r0 to unorm9
+	// Transform r0 to unorm9
 	int r0a = astc::flt2int_rtn(r0);
 	int g0a = astc::flt2int_rtn(g0);
 	int b0a = astc::flt2int_rtn(b0);
@@ -229,13 +293,12 @@ static bool try_quantize_rgb_delta(
 	g0a <<= 1;
 	b0a <<= 1;
 
-	// mask off the top bit
+	// Mask off the top bit
 	int r0b = r0a & 0xFF;
 	int g0b = g0a & 0xFF;
 	int b0b = b0a & 0xFF;
 
-	// quantize, then unquantize in order to get a value that we take
-	// differences against.
+	// Quantize then unquantize in order to get a value that we take differences against
 	int r0be = color_quant_tables[quant_level][r0b];
 	int g0be = color_quant_tables[quant_level][g0b];
 	int b0be = color_quant_tables[quant_level][b0b];
@@ -243,11 +306,11 @@ static bool try_quantize_rgb_delta(
 	r0b = color_unquant_tables[quant_level][r0be];
 	g0b = color_unquant_tables[quant_level][g0be];
 	b0b = color_unquant_tables[quant_level][b0be];
-	r0b |= r0a & 0x100;			// final unquantized-values for endpoint 0.
+	r0b |= r0a & 0x100;
 	g0b |= g0a & 0x100;
 	b0b |= b0a & 0x100;
 
-	// then, get hold of the second value
+	// Get hold of the second value
 	int r1d = astc::flt2int_rtn(r1);
 	int g1d = astc::flt2int_rtn(g1);
 	int b1d = astc::flt2int_rtn(b1);
@@ -255,18 +318,19 @@ static bool try_quantize_rgb_delta(
 	r1d <<= 1;
 	g1d <<= 1;
 	b1d <<= 1;
-	// and take differences!
+
+	// ... and take differences
 	r1d -= r0b;
 	g1d -= g0b;
 	b1d -= b0b;
 
-	// check if the difference is too large to be encodable.
+	// Check if the difference is too large to be encodable
 	if (r1d > 63 || g1d > 63 || b1d > 63 || r1d < -64 || g1d < -64 || b1d < -64)
 	{
 		return false;
 	}
 
-	// insert top bit of the base into the offset
+	// Insert top bit of the base into the offset
 	r1d &= 0x7F;
 	g1d &= 0x7F;
 	b1d &= 0x7F;
@@ -275,9 +339,8 @@ static bool try_quantize_rgb_delta(
 	g1d |= (g0b & 0x100) >> 1;
 	b1d |= (b0b & 0x100) >> 1;
 
-	// then quantize & unquantize; if this causes any of the top two bits to flip,
-	// then encoding fails, since we have then corrupted either the top bit of the base
-	// or the sign bit of the offset.
+	// Then quantize and unquantize; if this causes either top two bits to flip, then encoding fails
+	// since we have then corrupted either the top bit of the base or the sign bit of the offset
 	int r1de = color_quant_tables[quant_level][r1d];
 	int g1de = color_quant_tables[quant_level][g1d];
 	int b1de = color_quant_tables[quant_level][b1d];
@@ -291,7 +354,7 @@ static bool try_quantize_rgb_delta(
 		return false;
 	}
 
-	// check that the sum of the encoded offsets is nonnegative, else encoding fails
+	// Check that the sum of the encoded offsets is nonnegative, else encoding fails
 	r1du &= 0x7f;
 	g1du &= 0x7f;
 	b1du &= 0x7f;
@@ -316,7 +379,7 @@ static bool try_quantize_rgb_delta(
 		return false;
 	}
 
-	// check that the offsets produce legitimate sums as well.
+	// Check that the offsets produce legitimate sums as well
 	r1du += r0b;
 	g1du += g0b;
 	b1du += b0b;
@@ -325,13 +388,12 @@ static bool try_quantize_rgb_delta(
 		return false;
 	}
 
-	// OK, we've come this far; we can now encode legitimate values.
-	output[0] = r0be;
-	output[1] = r1de;
-	output[2] = g0be;
-	output[3] = g1de;
-	output[4] = b0be;
-	output[5] = b1de;
+	output[0] = static_cast<uint8_t>(r0be);
+	output[1] = static_cast<uint8_t>(r1de);
+	output[2] = static_cast<uint8_t>(g0be);
+	output[3] = static_cast<uint8_t>(g1de);
+	output[4] = static_cast<uint8_t>(b0be);
+	output[5] = static_cast<uint8_t>(b1de);
 
 	return true;
 }
@@ -339,8 +401,8 @@ static bool try_quantize_rgb_delta(
 static bool try_quantize_rgb_delta_blue_contract(
 	vfloat4 color0,
 	vfloat4 color1,
-	int output[6],
-	int quant_level
+	uint8_t output[6],
+	quant_method quant_level
 ) {
 	// Note: Switch around endpoint colors already at start
 	float scale = 1.0f / 257.0f;
@@ -353,8 +415,7 @@ static bool try_quantize_rgb_delta_blue_contract(
 	float g0 = color1.lane<1>() * scale;
 	float b0 = color1.lane<2>() * scale;
 
-	// inverse blue-contraction. This step can perform an overflow, in which case
-	// we will bail out immediately.
+	// Apply inverse blue-contraction. This can produce an overflow; which means BC cannot be used.
 	r0 += (r0 - b0);
 	g0 += (g0 - b0);
 	r1 += (r1 - b1);
@@ -366,7 +427,7 @@ static bool try_quantize_rgb_delta_blue_contract(
 		return false;
 	}
 
-	// transform r0 to unorm9
+	// Transform r0 to unorm9
 	int r0a = astc::flt2int_rtn(r0);
 	int g0a = astc::flt2int_rtn(g0);
 	int b0a = astc::flt2int_rtn(b0);
@@ -374,13 +435,12 @@ static bool try_quantize_rgb_delta_blue_contract(
 	g0a <<= 1;
 	b0a <<= 1;
 
-	// mask off the top bit
+	// Mask off the top bit
 	int r0b = r0a & 0xFF;
 	int g0b = g0a & 0xFF;
 	int b0b = b0a & 0xFF;
 
-	// quantize, then unquantize in order to get a value that we take
-	// differences against.
+	// Quantize, then unquantize in order to get a value that we take differences against.
 	int r0be = color_quant_tables[quant_level][r0b];
 	int g0be = color_quant_tables[quant_level][g0b];
 	int b0be = color_quant_tables[quant_level][b0b];
@@ -388,11 +448,11 @@ static bool try_quantize_rgb_delta_blue_contract(
 	r0b = color_unquant_tables[quant_level][r0be];
 	g0b = color_unquant_tables[quant_level][g0be];
 	b0b = color_unquant_tables[quant_level][b0be];
-	r0b |= r0a & 0x100;			// final unquantized-values for endpoint 0.
+	r0b |= r0a & 0x100;
 	g0b |= g0a & 0x100;
 	b0b |= b0a & 0x100;
 
-	// then, get hold of the second value
+	// Get hold of the second value
 	int r1d = astc::flt2int_rtn(r1);
 	int g1d = astc::flt2int_rtn(g1);
 	int b1d = astc::flt2int_rtn(b1);
@@ -400,18 +460,19 @@ static bool try_quantize_rgb_delta_blue_contract(
 	r1d <<= 1;
 	g1d <<= 1;
 	b1d <<= 1;
-	// and take differences!
+
+	// .. and take differences!
 	r1d -= r0b;
 	g1d -= g0b;
 	b1d -= b0b;
 
-	// check if the difference is too large to be encodable.
+	// Check if the difference is too large to be encodable
 	if (r1d > 63 || g1d > 63 || b1d > 63 || r1d < -64 || g1d < -64 || b1d < -64)
 	{
 		return false;
 	}
 
-	// insert top bit of the base into the offset
+	// Insert top bit of the base into the offset
 	r1d &= 0x7F;
 	g1d &= 0x7F;
 	b1d &= 0x7F;
@@ -420,7 +481,7 @@ static bool try_quantize_rgb_delta_blue_contract(
 	g1d |= (g0b & 0x100) >> 1;
 	b1d |= (b0b & 0x100) >> 1;
 
-	// then quantize & unquantize; if this causes any of the top two bits to flip,
+	// Then quantize and  unquantize; if this causes any of the top two bits to flip,
 	// then encoding fails, since we have then corrupted either the top bit of the base
 	// or the sign bit of the offset.
 	int r1de = color_quant_tables[quant_level][r1d];
@@ -436,8 +497,8 @@ static bool try_quantize_rgb_delta_blue_contract(
 		return false;
 	}
 
-	// check that the sum of the encoded offsets is negative, else encoding fails
-	// note that this is inverse of the test for non-blue-contracted RGB.
+	// Check that the sum of the encoded offsets is negative, else encoding fails
+	// Note that this is inverse of the test for non-blue-contracted RGB.
 	r1du &= 0x7f;
 	g1du &= 0x7f;
 	b1du &= 0x7f;
@@ -462,7 +523,7 @@ static bool try_quantize_rgb_delta_blue_contract(
 		return false;
 	}
 
-	// check that the offsets produce legitimate sums as well.
+	// Check that the offsets produce legitimate sums as well
 	r1du += r0b;
 	g1du += g0b;
 	b1du += b0b;
@@ -472,22 +533,37 @@ static bool try_quantize_rgb_delta_blue_contract(
 		return false;
 	}
 
-	// OK, we've come this far; we can now encode legitimate values.
-	output[0] = r0be;
-	output[1] = r1de;
-	output[2] = g0be;
-	output[3] = g1de;
-	output[4] = b0be;
-	output[5] = b1de;
+	output[0] = static_cast<uint8_t>(r0be);
+	output[1] = static_cast<uint8_t>(r1de);
+	output[2] = static_cast<uint8_t>(g0be);
+	output[3] = static_cast<uint8_t>(g1de);
+	output[4] = static_cast<uint8_t>(b0be);
+	output[5] = static_cast<uint8_t>(b1de);
 
 	return true;
 }
 
+/**
+ * @brief Try to quantize an LDR A color using delta encoding.
+ *
+ * At decode time we move one bit from the offset to the base and seize another bit as a sign bit;
+ * we then unquantize both values as if they contain one extra bit. If the sum of the offsets is
+ * non-negative, then we encode a regular delta.
+ *
+ * This function only compressed the alpha - the other elements in the output array are not touched.
+ *
+ * @param      color0        The input unquantized color0 endpoint.
+ * @param      color1        The input unquantized color1 endpoint.
+ * @param[out] output        The output endpoints, returned as (x, x, x, x, x, x, a0, a1).
+ * @param      quant_level   The quantization level to use.
+ *
+ * @return Returns @c false on failure, @c true on success.
+ */
 static bool try_quantize_alpha_delta(
 	vfloat4 color0,
 	vfloat4 color1,
-	int output[8],
-	int quant_level
+	uint8_t output[8],
+	quant_method quant_level
 ) {
 	float scale = 1.0f / 257.0f;
 
@@ -503,38 +579,61 @@ static bool try_quantize_alpha_delta(
 	int a1d = astc::flt2int_rtn(a1);
 	a1d <<= 1;
 	a1d -= a0b;
+
 	if (a1d > 63 || a1d < -64)
 	{
 		return false;
 	}
+
 	a1d &= 0x7F;
 	a1d |= (a0b & 0x100) >> 1;
+
 	int a1de = color_quant_tables[quant_level][a1d];
 	int a1du = color_unquant_tables[quant_level][a1de];
 	if ((a1d ^ a1du) & 0xC0)
 	{
 		return false;
 	}
+
 	a1du &= 0x7F;
 	if (a1du & 0x40)
 	{
 		a1du -= 0x80;
 	}
+
 	a1du += a0b;
 	if (a1du < 0 || a1du > 0x1FF)
 	{
 		return false;
 	}
-	output[6] = a0be;
-	output[7] = a1de;
+
+	output[6] = static_cast<uint8_t>(a0be);
+	output[7] = static_cast<uint8_t>(a1de);
+
 	return true;
 }
 
+/**
+ * @brief Try to quantize an LDR LA color using delta encoding.
+ *
+ * At decode time we move one bit from the offset to the base and seize another bit as a sign bit;
+ * we then unquantize both values as if they contain one extra bit. If the sum of the offsets is
+ * non-negative, then we encode a regular delta.
+ *
+ * This function only compressed the alpha - the other elements in the output array are not touched.
+ *
+ * @param      color0        The input unquantized color0 endpoint.
+ * @param      color1        The input unquantized color1 endpoint.
+ * @param[out] output        The output endpoints, returned as (l0, l1, a0, a1).
+ * @param      quant_level   The quantization level to use.
+ *
+ * @return Returns @c false on failure, @c true on success.
+ */
 static bool try_quantize_luminance_alpha_delta(
 	vfloat4 color0,
 	vfloat4 color1,
-	int output[8],
-	int quant_level
+	uint8_t output[4],
+	quant_method quant_level
 ) {
 	float scale = 1.0f / 257.0f;
 
@@ -548,6 +647,7 @@ static bool try_quantize_luminance_alpha_delta(
 	int a0a = astc::flt2int_rtn(a0);
 	l0a <<= 1;
 	a0a <<= 1;
+
 	int l0b = l0a & 0xFF;
 	int a0b = a0a & 0xFF;
 	int l0be = color_quant_tables[quant_level][l0b];
@@ -556,20 +656,24 @@ static bool try_quantize_luminance_alpha_delta(
 	a0b = color_unquant_tables[quant_level][a0be];
 	l0b |= l0a & 0x100;
 	a0b |= a0a & 0x100;
+
 	int l1d = astc::flt2int_rtn(l1);
 	int a1d = astc::flt2int_rtn(a1);
 	l1d <<= 1;
 	a1d <<= 1;
 	l1d -= l0b;
 	a1d -= a0b;
+
 	if (l1d > 63 || l1d < -64)
 	{
 		return false;
 	}
+
 	if (a1d > 63 || a1d < -64)
 	{
 		return false;
 	}
+
 	l1d &= 0x7F;
 	a1d &= 0x7F;
 	l1d |= (l0b & 0x100) >> 1;
@@ -579,86 +683,122 @@ static bool try_quantize_luminance_alpha_delta(
 	int a1de = color_quant_tables[quant_level][a1d];
 	int l1du = color_unquant_tables[quant_level][l1de];
 	int a1du = color_unquant_tables[quant_level][a1de];
+
 	if ((l1d ^ l1du) & 0xC0)
 	{
 		return false;
 	}
+
 	if ((a1d ^ a1du) & 0xC0)
 	{
 		return false;
 	}
+
 	l1du &= 0x7F;
 	a1du &= 0x7F;
+
 	if (l1du & 0x40)
 	{
 		l1du -= 0x80;
 	}
+
 	if (a1du & 0x40)
 	{
 		a1du -= 0x80;
 	}
+
 	l1du += l0b;
 	a1du += a0b;
+
 	if (l1du < 0 || l1du > 0x1FF)
 	{
 		return false;
 	}
+
 	if (a1du < 0 || a1du > 0x1FF)
 	{
 		return false;
 	}
-	output[0] = l0be;
-	output[1] = l1de;
-	output[2] = a0be;
-	output[3] = a1de;
+
+	output[0] = static_cast<uint8_t>(l0be);
+	output[1] = static_cast<uint8_t>(l1de);
+	output[2] = static_cast<uint8_t>(a0be);
+	output[3] = static_cast<uint8_t>(a1de);
 
 	return true;
 }
 
+/**
+ * @brief Try to quantize an LDR RGBA color using delta encoding.
+ *
+ * At decode time we move one bit from the offset to the base and seize another bit as a sign bit;
+ * we then unquantize both values as if they contain one extra bit. If the sum of the offsets is
+ * non-negative, then we encode a regular delta.
+ *
+ * This function only compressed the alpha - the other elements in the output array are not touched.
+ *
+ * @param      color0        The input unquantized color0 endpoint.
+ * @param      color1        The input unquantized color1 endpoint.
+ * @param[out] output        The output endpoints, returned as (r0, r1, b0, b1, g0, g1, a0, a1).
+ * @param      quant_level   The quantization level to use.
+ *
+ * @return Returns @c false on failure, @c true on success.
+ */
 static bool try_quantize_rgba_delta(
 	vfloat4 color0,
 	vfloat4 color1,
-	int output[8],
-	int quant_level
+	uint8_t output[8],
+	quant_method quant_level
 ) {
-	bool alpha_delta_res = try_quantize_alpha_delta(color0, color1, output, quant_level);
-
-	if (alpha_delta_res == false)
-	{
-		return false;
-	}
-
-	return try_quantize_rgb_delta(color0, color1, output, quant_level);
+	return try_quantize_rgb_delta(color0, color1, output, quant_level) &&
+	       try_quantize_alpha_delta(color0, color1, output, quant_level);
 }
 
+
+/**
+ * @brief Try to quantize an LDR RGBA color using delta and blue contract encoding.
+ *
+ * At decode time we move one bit from the offset to the base and seize another bit as a sign bit;
+ * we then unquantize both values as if they contain one extra bit. If the sum of the offsets is
+ * non-negative, then we encode a regular delta.
+ *
+ * This function only compressed the alpha - the other elements in the output array are not touched.
+ *
+ * @param      color0       The input unquantized color0 endpoint.
+ * @param      color1       The input unquantized color1 endpoint.
+ * @param[out] output       The output endpoints, returned as (r0, r1, b0, b1, g0, g1, a0, a1).
+ * @param      quant_level  The quantization level to use.
+ *
+ * @return Returns @c false on failure, @c true on success.
+ */
 static bool try_quantize_rgba_delta_blue_contract(
 	vfloat4 color0,
 	vfloat4 color1,
-	int output[8],
-	int quant_level
+	uint8_t output[8],
+	quant_method quant_level
 ) {
-	// notice that for the alpha encoding, we are swapping around color0 and color1;
-	// this is because blue-contraction involves swapping around the two colors.
-	int alpha_delta_res = try_quantize_alpha_delta(color1, color0, output, quant_level);
-
-	if (alpha_delta_res == 0)
-	{
-		return false;
-	}
-
-	return try_quantize_rgb_delta_blue_contract(color0, color1, output, quant_level);
+	// Note that we swap the color0 and color1 ordering for alpha to match RGB blue-contract
+	return try_quantize_rgb_delta_blue_contract(color0, color1, output, quant_level) &&
+	       try_quantize_alpha_delta(color1, color0, output, quant_level);
 }
 
-static void quantize_rgbs_new(
-	vfloat4 rgbs_color,	// W component is a desired-scale to apply, in the range 0..1
-	int output[4],
-	int quant_level
+/**
+ * @brief Quantize an LDR RGB color using scale encoding.
+ *
+ * @param      color         The input unquantized color endpoint and scale factor.
+ * @param[out] output        The output endpoints, returned as (r0, g0, b0, s).
+ * @param      quant_level   The quantization level to use.
+ */
+static void quantize_rgbs(
+	vfloat4 color,
+	uint8_t output[4],
+	quant_method quant_level
 ) {
 	float scale = 1.0f / 257.0f;
 
-	float r = astc::clamp255f(rgbs_color.lane<0>() * scale);
-	float g = astc::clamp255f(rgbs_color.lane<1>() * scale);
-	float b = astc::clamp255f(rgbs_color.lane<2>() * scale);
+	float r = astc::clamp255f(color.lane<0>() * scale);
+	float g = astc::clamp255f(color.lane<1>() * scale);
+	float b = astc::clamp255f(color.lane<2>() * scale);
 
 	int ri = color_quant_tables[quant_level][astc::flt2int_rtn(r)];
 	int gi = color_quant_tables[quant_level][astc::flt2int_rtn(g)];
@@ -668,45 +808,57 @@ static void quantize_rgbs_new(
 	int gu = color_unquant_tables[quant_level][gi];
 	int bu = color_unquant_tables[quant_level][bi];
 
-	float oldcolorsum = hadd_rgb_s(rgbs_color) * scale;
+	float oldcolorsum = hadd_rgb_s(color) * scale;
 	float newcolorsum = (float)(ru + gu + bu);
 
-	float scalea = astc::clamp1f(rgbs_color.lane<3>() * (oldcolorsum + 1e-10f) / (newcolorsum + 1e-10f));
+	float scalea = astc::clamp1f(color.lane<3>() * (oldcolorsum + 1e-10f) / (newcolorsum + 1e-10f));
 	int scale_idx = astc::flt2int_rtn(scalea * 256.0f);
 	scale_idx = astc::clamp(scale_idx, 0, 255);
 
-	output[0] = ri;
-	output[1] = gi;
-	output[2] = bi;
+	output[0] = static_cast<uint8_t>(ri);
+	output[1] = static_cast<uint8_t>(gi);
+	output[2] = static_cast<uint8_t>(bi);
 	output[3] = color_quant_tables[quant_level][scale_idx];
 }
 
-static void quantize_rgbs_alpha_new(
+/**
+ * @brief Quantize an LDR RGBA color using scale encoding.
+ *
+ * @param      color        The input unquantized color endpoint and scale factor.
+ * @param[out] output       The output endpoints, returned as (r0, g0, b0, s, a0, a1).
+ * @param      quant_level  The quantization level to use.
+ */
+static void quantize_rgbs_alpha(
 	vfloat4 color0,
 	vfloat4 color1,
-	vfloat4 rgbs_color,
-	int output[6],
-	int quant_level
+	vfloat4 color,
+	uint8_t output[6],
+	quant_method quant_level
 ) {
 	float scale = 1.0f / 257.0f;
 
 	float a0 = astc::clamp255f(color0.lane<3>() * scale);
 	float a1 = astc::clamp255f(color1.lane<3>() * scale);
 
-	int ai0 = color_quant_tables[quant_level][astc::flt2int_rtn(a0)];
-	int ai1 = color_quant_tables[quant_level][astc::flt2int_rtn(a1)];
+	output[4] = color_quant_tables[quant_level][astc::flt2int_rtn(a0)];
+	output[5] = color_quant_tables[quant_level][astc::flt2int_rtn(a1)];
 
-	output[4] = ai0;
-	output[5] = ai1;
-
-	quantize_rgbs_new(rgbs_color, output, quant_level);
+	quantize_rgbs(color, output, quant_level);
 }
 
+/**
+ * @brief Quantize a LDR L color.
+ *
+ * @param      color0        The input unquantized color0 endpoint.
+ * @param      color1        The input unquantized color1 endpoint.
+ * @param[out] output        The output endpoints, returned as (l0, l1).
+ * @param      quant_level   The quantization level to use.
+ */
 static void quantize_luminance(
 	vfloat4 color0,
 	vfloat4 color1,
-	int output[2],
-	int quant_level
+	uint8_t output[2],
+	quant_method quant_level
 ) {
 	float scale = 1.0f / 257.0f;
 
@@ -727,11 +879,19 @@ static void quantize_luminance(
 	output[1] = color_quant_tables[quant_level][astc::flt2int_rtn(lum1)];
 }
 
+/**
+ * @brief Quantize a LDR LA color.
+ *
+ * @param      color0        The input unquantized color0 endpoint.
+ * @param      color1        The input unquantized color1 endpoint.
+ * @param[out] output        The output endpoints, returned as (l0, l1, a0, a1).
+ * @param      quant_level   The quantization level to use.
+ */
 static void quantize_luminance_alpha(
 	vfloat4 color0,
 	vfloat4 color1,
-	int output[4],
-	int quant_level
+	uint8_t output[4],
+	quant_method quant_level
 ) {
 	float scale = 1.0f / 257.0f;
 
@@ -744,8 +904,7 @@ static void quantize_luminance_alpha(
 	float a0 = astc::clamp255f(color0.lane<3>());
 	float a1 = astc::clamp255f(color1.lane<3>());
 
-	// if the endpoints are *really* close, then pull them apart slightly;
-	// this affords for >8 bits precision for normal maps.
+	// If endpoints are close then pull apart slightly; this gives > 8 bit normal map precision.
 	if (quant_level > 18)
 	{
 		if (fabsf(lum0 - lum1) < 3.0f)
@@ -760,6 +919,7 @@ static void quantize_luminance_alpha(
 				lum0 += 0.5f;
 				lum1 -= 0.5f;
 			}
+
 			lum0 = astc::clamp255f(lum0);
 			lum1 = astc::clamp255f(lum1);
 		}
@@ -776,6 +936,7 @@ static void quantize_luminance_alpha(
 				a0 += 0.5f;
 				a1 -= 0.5f;
 			}
+
 			a0 = astc::clamp255f(a0);
 			a1 = astc::clamp255f(a1);
 		}
@@ -787,85 +948,105 @@ static void quantize_luminance_alpha(
 	output[3] = color_quant_tables[quant_level][astc::flt2int_rtn(a1)];
 }
 
-// quantize and unquantize a number, wile making sure to retain the top two bits.
+/**
+ * @brief Quantize and unquantize a value ensuring top two bits are the same.
+ *
+ * @param      quant_level     The quantization level to use.
+ * @param      value           The input unquantized value.
+ * @param[out] quant_value     The quantized value.
+ * @param[out] unquant_value   The unquantized value after quantization.
+ */
 static inline void quantize_and_unquantize_retain_top_two_bits(
-	int quant_level,
-	int value_to_quantize,	// 0 to 255.
-	int* quantized_value,
-	int* unquantized_value
+	quant_method quant_level,
+	uint8_t value,
+	uint8_t& quant_value,
+	uint8_t& unquant_value
 ) {
 	int perform_loop;
-	int quantval;
-	int uquantval;
+	uint8_t quantval;
+	uint8_t uquantval;
 
 	do
 	{
-		quantval = color_quant_tables[quant_level][value_to_quantize];
+		quantval = color_quant_tables[quant_level][value];
 		uquantval = color_unquant_tables[quant_level][quantval];
 
-		// perform looping if the top two bits were modified by quant/unquant
-		perform_loop = (value_to_quantize & 0xC0) != (uquantval & 0xC0);
+		// Perform looping if the top two bits were modified by quant/unquant
+		perform_loop = (value & 0xC0) != (uquantval & 0xC0);
 
-		if ((uquantval & 0xC0) > (value_to_quantize & 0xC0))
+		if ((uquantval & 0xC0) > (value & 0xC0))
 		{
-			// quant/unquant rounded UP so that the top two bits changed;
-			// decrement the input value in hopes that this will avoid rounding up.
-			value_to_quantize--;
+			// Quant/unquant rounded UP so that the top two bits changed;
+			// decrement the input in hopes that this will avoid rounding up.
+			value--;
 		}
-		else if ((uquantval & 0xC0) < (value_to_quantize & 0xC0))
+		else if ((uquantval & 0xC0) < (value & 0xC0))
 		{
-			// quant/unquant rounded DOWN so that the top two bits changed;
-			// decrement the input value in hopes that this will avoid rounding down.
-			value_to_quantize--;
+			// Quant/unquant rounded DOWN so that the top two bits changed;
+			// decrement the input in hopes that this will avoid rounding down.
+			value--;
 		}
 	} while (perform_loop);
 
-	*quantized_value = quantval;
-	*unquantized_value = uquantval;
+	quant_value = quantval;
+	unquant_value = uquantval;
 }
 
-// quantize and unquantize a number, wile making sure to retain the top four bits.
+/**
+ * @brief Quantize and unquantize a value ensuring top four bits are the same.
+ *
+ * @param      quant_level     The quantization level to use.
+ * @param      value           The input unquantized value.
+ * @param[out] quant_value     The quantized value.
+ * @param[out] unquant_value   The unquantized value after quantization.
+ */
 static inline void quantize_and_unquantize_retain_top_four_bits(
-	int quant_level,
-	int value_to_quantize,	// 0 to 255.
-	int *quantized_value,
-	int *unquantized_value
+	quant_method quant_level,
+	uint8_t value,
+	uint8_t& quant_value,
+	uint8_t& unquant_value
 ) {
-	int perform_loop;
-	int quantval;
-	int uquantval;
+	uint8_t perform_loop;
+	uint8_t quantval;
+	uint8_t uquantval;
 
 	do
 	{
-		quantval = color_quant_tables[quant_level][value_to_quantize];
+		quantval = color_quant_tables[quant_level][value];
 		uquantval = color_unquant_tables[quant_level][quantval];
 
-		// perform looping if the top two bits were modified by quant/unquant
-		perform_loop = (value_to_quantize & 0xF0) != (uquantval & 0xF0);
+		// Perform looping if the top four bits were modified by quant/unquant
+		perform_loop = (value & 0xF0) != (uquantval & 0xF0);
 
-		if ((uquantval & 0xF0) > (value_to_quantize & 0xF0))
+		if ((uquantval & 0xF0) > (value & 0xF0))
 		{
-			// quant/unquant rounded UP so that the top two bits changed;
+			// Quant/unquant rounded UP so that the top four bits changed;
 			// decrement the input value in hopes that this will avoid rounding up.
-			value_to_quantize--;
+			value--;
 		}
-		else if ((uquantval & 0xF0) < (value_to_quantize & 0xF0))
+		else if ((uquantval & 0xF0) < (value & 0xF0))
 		{
-			// quant/unquant rounded DOWN so that the top two bits changed;
+			// Quant/unquant rounded DOWN so that the top four bits changed;
 			// decrement the input value in hopes that this will avoid rounding down.
-			value_to_quantize--;
+			value--;
 		}
 	} while (perform_loop);
 
-	*quantized_value = quantval;
-	*unquantized_value = uquantval;
+	quant_value = quantval;
+	unquant_value = uquantval;
 }
 
-/* HDR color encoding, take #3 */
-static void quantize_hdr_rgbo3(
+/**
+ * @brief Quantize a HDR RGB color using RGB + offset.
+ *
+ * @param      color         The input unquantized color endpoint and offset.
+ * @param[out] output        The output endpoints, returned as packed RGBS with some mode bits.
+ * @param      quant_level   The quantization level to use.
+ */
+static void quantize_hdr_rgbo(
 	vfloat4 color,
-	int output[4],
-	int quant_level
+	uint8_t output[4],
+	quant_method quant_level
 ) {
 	color.set_lane<0>(color.lane<0>() + color.lane<3>());
 	color.set_lane<1>(color.lane<1>() + color.lane<3>());
@@ -874,13 +1055,20 @@ static void quantize_hdr_rgbo3(
 	color = clamp(0.0f, 65535.0f, color);
 
 	vfloat4 color_bak = color;
+
 	int majcomp;
 	if (color.lane<0>() > color.lane<1>() && color.lane<0>() > color.lane<2>())
+	{
 		majcomp = 0;			// red is largest component
+	}
 	else if (color.lane<1>() > color.lane<2>())
+	{
 		majcomp = 1;			// green is largest component
+	}
 	else
+	{
 		majcomp = 2;			// blue is largest component
+	}
 
 	// swap around the red component and the largest component.
 	switch (majcomp)
@@ -895,7 +1083,7 @@ static void quantize_hdr_rgbo3(
 		break;
 	}
 
-	static const int mode_bits[5][3] = {
+	static const int mode_bits[5][3] {
 		{11, 5, 7},
 		{11, 6, 5},
 		{10, 5, 8},
@@ -903,7 +1091,7 @@ static void quantize_hdr_rgbo3(
 		{8, 7, 6}
 	};
 
-	static const float mode_cutoffs[5][2] = {
+	static const float mode_cutoffs[5][2] {
 		{1024, 4096},
 		{2048, 1024},
 		{2048, 16384},
@@ -911,7 +1099,7 @@ static void quantize_hdr_rgbo3(
 		{32768, 16384}
 	};
 
-	static const float mode_rscales[5] = {
+	static const float mode_rscales[5] {
 		32.0f,
 		32.0f,
 		64.0f,
@@ -919,7 +1107,7 @@ static void quantize_hdr_rgbo3(
 		256.0f,
 	};
 
-	static const float mode_scales[5] = {
+	static const float mode_scales[5] {
 		1.0f / 32.0f,
 		1.0f / 32.0f,
 		1.0f / 64.0f,
@@ -939,7 +1127,7 @@ static void quantize_hdr_rgbo3(
 			continue;
 		}
 
-		// encode the mode into a 4-bit vector.
+		// Encode the mode into a 4-bit vector
 		int mode_enc = mode < 4 ? (mode | (majcomp << 2)) : (majcomp | 0xC);
 
 		float mode_scale = mode_scales[mode];
@@ -948,21 +1136,22 @@ static void quantize_hdr_rgbo3(
 		int gb_intcutoff = 1 << mode_bits[mode][1];
 		int s_intcutoff = 1 << mode_bits[mode][2];
 
-		// first, quantize and unquantize R.
+		// Quantize and unquantize R
 		int r_intval = astc::flt2int_rtn(r_base * mode_scale);
 
 		int r_lowbits = r_intval & 0x3f;
 
 		r_lowbits |= (mode_enc & 3) << 6;
 
-		int r_quantval;
-		int r_uquantval;
-		quantize_and_unquantize_retain_top_two_bits(quant_level, r_lowbits, &r_quantval, &r_uquantval);
+		uint8_t r_quantval;
+		uint8_t r_uquantval;
+		quantize_and_unquantize_retain_top_two_bits(
+		    quant_level, static_cast<uint8_t>(r_lowbits), r_quantval, r_uquantval);
 
 		r_intval = (r_intval & ~0x3f) | (r_uquantval & 0x3f);
 		float r_fval = static_cast<float>(r_intval) * mode_rscale;
 
-		// next, recompute G and B, then quantize and unquantize them.
+		// Recompute G and B, then quantize and unquantize them
 		float g_fval = r_fval - color.lane<1>() ;
 		float b_fval = r_fval - color.lane<2>() ;
 
@@ -1053,14 +1242,15 @@ static void quantize_hdr_rgbo3(
 		b_lowbits |= bit2 << 6;
 		b_lowbits |= bit3 << 5;
 
-		int g_quantval;
-		int b_quantval;
-		int g_uquantval;
-		int b_uquantval;
+		uint8_t g_quantval;
+		uint8_t b_quantval;
+		uint8_t g_uquantval;
+		uint8_t b_uquantval;
 
-		quantize_and_unquantize_retain_top_four_bits(quant_level, g_lowbits, &g_quantval, &g_uquantval);
-
-		quantize_and_unquantize_retain_top_four_bits(quant_level, b_lowbits, &b_quantval, &b_uquantval);
+		quantize_and_unquantize_retain_top_four_bits(
+		    quant_level, static_cast<uint8_t>(g_lowbits), g_quantval, g_uquantval);
+		quantize_and_unquantize_retain_top_four_bits(
+		    quant_level, static_cast<uint8_t>(b_lowbits), b_quantval, b_uquantval);
 
 		g_intval = (g_intval & ~0x1f) | (g_uquantval & 0x1f);
 		b_intval = (b_intval & ~0x1f) | (b_uquantval & 0x1f);
@@ -1068,8 +1258,7 @@ static void quantize_hdr_rgbo3(
 		g_fval = static_cast<float>(g_intval) * mode_rscale;
 		b_fval = static_cast<float>(b_intval) * mode_rscale;
 
-		// finally, recompute the scale value, based on the errors
-		// introduced to red, green and blue.
+		// Recompute the scale value, based on the errors introduced to red, green and blue
 
 		// If the error is positive, then the R,G,B errors combined have raised the color
 		// value overall; as such, the scale value needs to be increased.
@@ -1127,10 +1316,12 @@ static void quantize_hdr_rgbo3(
 		s_lowbits |= bit5 << 6;
 		s_lowbits |= bit4 << 7;
 
-		int s_quantval;
-		int s_uquantval;
+		uint8_t s_quantval;
+		uint8_t s_uquantval;
+
+		quantize_and_unquantize_retain_top_four_bits(
+		    quant_level, static_cast<uint8_t>(s_lowbits), s_quantval, s_uquantval);
 
-		quantize_and_unquantize_retain_top_four_bits(quant_level, s_lowbits, &s_quantval, &s_uquantval);
 		output[0] = r_quantval;
 		output[1] = g_quantval;
 		output[2] = b_quantval;
@@ -1138,8 +1329,7 @@ static void quantize_hdr_rgbo3(
 		return;
 	}
 
-	// failed to encode any of the modes above? In that case,
-	// encode using mode #5.
+	// Failed to encode any of the modes above? In that case encode using mode #5
 	float vals[4];
 	vals[0] = color_bak.lane<0>();
 	vals[1] = color_bak.lane<1>();
@@ -1168,20 +1358,29 @@ static void quantize_hdr_rgbo3(
 	encvals[2] = (ivals[2] & 0x7f) | 0x80;
 	encvals[3] = (ivals[3] & 0x7f) | ((ivals[0] & 0x40) << 1);
 
-	for (int i = 0; i < 4; i++)
+	for (uint8_t i = 0; i < 4; i++)
 	{
-		int dummy;
-		quantize_and_unquantize_retain_top_four_bits(quant_level, encvals[i], &(output[i]), &dummy);
+		uint8_t dummy;
+		quantize_and_unquantize_retain_top_four_bits(
+		    quant_level, static_cast<uint8_t>(encvals[i]), output[i], dummy);
 	}
 
 	return;
 }
 
-static void quantize_hdr_rgb3(
+/**
+ * @brief Quantize a HDR RGB color using direct RGB encoding.
+ *
+ * @param      color0        The input unquantized color0 endpoint.
+ * @param      color1        The input unquantized color1 endpoint.
+ * @param[out] output        The output endpoints, returned as packed RGB+RGB pairs with mode bits.
+ * @param      quant_level   The quantization level to use.
+ */
+static void quantize_hdr_rgb(
 	vfloat4 color0,
 	vfloat4 color1,
-	int output[6],
-	int quant_level
+	uint8_t output[6],
+	quant_method quant_level
 ) {
 	// Note: color*.lane<3> is not used so we can ignore it
 	color0 = clamp(0.0f, 65535.0f, color0);
@@ -1193,25 +1392,25 @@ static void quantize_hdr_rgb3(
 	int majcomp;
 	if (color1.lane<0>() > color1.lane<1>() && color1.lane<0>() > color1.lane<2>())
 	{
-		majcomp = 0;			// red is largest
+		majcomp = 0;
 	}
 	else if (color1.lane<1>() > color1.lane<2>())
 	{
-		majcomp = 1;			// green is largest
+		majcomp = 1;
 	}
 	else
 	{
-		majcomp = 2;			// blue is largest
+		majcomp = 2;
 	}
 
-	// swizzle the components
+	// Swizzle the components
 	switch (majcomp)
 	{
-	case 1:					// red-green swap
+	case 1:  // red-green swap
 		color0 = color0.swz<1, 0, 2, 3>();
 		color1 = color1.swz<1, 0, 2, 3>();
 		break;
-	case 2:					// red-blue swap
+	case 2:  // red-blue swap
 		color0 = color0.swz<2, 1, 0, 3>();
 		color1 = color1.swz<2, 1, 0, 3>();
 		break;
@@ -1228,8 +1427,8 @@ static void quantize_hdr_rgb3(
 	float d0_base = a_base - b0_base - c_base - color0.lane<1>();
 	float d1_base = a_base - b1_base - c_base - color0.lane<2>();
 
-	// number of bits in the various fields in the various modes
-	static const int mode_bits[8][4] = {
+	// Number of bits in the various fields in the various modes
+	static const int mode_bits[8][4] {
 		{9, 7, 6, 7},
 		{9, 8, 6, 6},
 		{10, 6, 7, 7},
@@ -1240,9 +1439,9 @@ static void quantize_hdr_rgb3(
 		{12, 6, 7, 6}
 	};
 
-	// cutoffs to use for the computed values of a,b,c,d, assuming the
+	// Cutoffs to use for the computed values of a,b,c,d, assuming the
 	// range 0..65535 are LNS values corresponding to fp16.
-	static const float mode_cutoffs[8][4] = {
+	static const float mode_cutoffs[8][4] {
 		{16384, 8192, 8192, 8},	// mode 0: 9,7,6,7
 		{32768, 8192, 4096, 8},	// mode 1: 9,8,6,6
 		{4096, 8192, 4096, 4},	// mode 2: 10,6,7,7
@@ -1253,7 +1452,7 @@ static void quantize_hdr_rgb3(
 		{1024, 2048, 512, 1},	// mode 7: 12,6,7,6
 	};
 
-	static const float mode_scales[8] = {
+	static const float mode_scales[8] {
 		1.0f / 128.0f,
 		1.0f / 128.0f,
 		1.0f / 64.0f,
@@ -1264,8 +1463,8 @@ static void quantize_hdr_rgb3(
 		1.0f / 16.0f,
 	};
 
-	// scaling factors when going from what was encoded in the mode to 16 bits.
-	static const float mode_rscales[8] = {
+	// Scaling factors when going from what was encoded in the mode to 16 bits.
+	static const float mode_rscales[8] {
 		128.0f,
 		128.0f,
 		64.0f,
@@ -1276,11 +1475,11 @@ static void quantize_hdr_rgb3(
 		16.0f
 	};
 
-	// try modes one by one, with the highest-precision mode first.
+	// Try modes one by one, with the highest-precision mode first.
 	for (int mode = 7; mode >= 0; mode--)
 	{
-		// for each mode, test if we can in fact accommodate
-		// the computed b,c,d values. If we clearly can't, then we skip to the next mode.
+		// For each mode, test if we can in fact accommodate the computed b, c, and d values.
+		// If we clearly can't, then we skip to the next mode.
 
 		float b_cutoff = mode_cutoffs[mode][0];
 		float c_cutoff = mode_cutoffs[mode][1];
@@ -1298,7 +1497,7 @@ static void quantize_hdr_rgb3(
 		int c_intcutoff = 1 << mode_bits[mode][2];
 		int d_intcutoff = 1 << (mode_bits[mode][3] - 1);
 
-		// first, quantize and unquantize A, with the assumption that its high bits can be handled safely.
+		// Quantize and unquantize A, with the assumption that its high bits can be handled safely.
 		int a_intval = astc::flt2int_rtn(a_base * mode_scale);
 		int a_lowbits = a_intval & 0xFF;
 
@@ -1307,7 +1506,7 @@ static void quantize_hdr_rgb3(
 		a_intval = (a_intval & ~0xFF) | a_uquantval;
 		float a_fval = static_cast<float>(a_intval) * mode_rscale;
 
-		// next, recompute C, then quantize and unquantize it
+		// Recompute C, then quantize and unquantize it
 		float c_fval = a_fval - color0.lane<0>();
 		c_fval = astc::clamp(c_fval, 0.0f, 65535.0f);
 
@@ -1323,13 +1522,16 @@ static void quantize_hdr_rgb3(
 		c_lowbits |= (mode & 1) << 7;
 		c_lowbits |= (a_intval & 0x100) >> 2;
 
-		int c_quantval;
-		int c_uquantval;
-		quantize_and_unquantize_retain_top_two_bits(quant_level, c_lowbits, &c_quantval, &c_uquantval);
+		uint8_t c_quantval;
+		uint8_t c_uquantval;
+
+		quantize_and_unquantize_retain_top_two_bits(
+		    quant_level, static_cast<uint8_t>(c_lowbits), c_quantval, c_uquantval);
+
 		c_intval = (c_intval & ~0x3F) | (c_uquantval & 0x3F);
 		c_fval = static_cast<float>(c_intval) * mode_rscale;
 
-		// next, recompute B0 and B1, then quantize and unquantize them
+		// Recompute B0 and B1, then quantize and unquantize them
 		float b0_fval = a_fval - color1.lane<1>();
 		float b1_fval = a_fval - color1.lane<2>();
 
@@ -1388,21 +1590,22 @@ static void quantize_hdr_rgb3(
 		b0_lowbits |= ((mode >> 1) & 1) << 7;
 		b1_lowbits |= ((mode >> 2) & 1) << 7;
 
-		int b0_quantval;
-		int b1_quantval;
-		int b0_uquantval;
-		int b1_uquantval;
+		uint8_t b0_quantval;
+		uint8_t b1_quantval;
+		uint8_t b0_uquantval;
+		uint8_t b1_uquantval;
 
-		quantize_and_unquantize_retain_top_two_bits(quant_level, b0_lowbits, &b0_quantval, &b0_uquantval);
-
-		quantize_and_unquantize_retain_top_two_bits(quant_level, b1_lowbits, &b1_quantval, &b1_uquantval);
+		quantize_and_unquantize_retain_top_two_bits(
+		    quant_level, static_cast<uint8_t>(b0_lowbits), b0_quantval, b0_uquantval);
+		quantize_and_unquantize_retain_top_two_bits(
+		    quant_level, static_cast<uint8_t>(b1_lowbits), b1_quantval, b1_uquantval);
 
 		b0_intval = (b0_intval & ~0x3f) | (b0_uquantval & 0x3f);
 		b1_intval = (b1_intval & ~0x3f) | (b1_uquantval & 0x3f);
 		b0_fval = static_cast<float>(b0_intval) * mode_rscale;
 		b1_fval = static_cast<float>(b1_intval) * mode_rscale;
 
-		// finally, recompute D0 and D1, then quantize and unquantize them
+		// Recompute D0 and D1, then quantize and unquantize them
 		float d0_fval = a_fval - b0_fval - c_fval - color0.lane<1>();
 		float d1_fval = a_fval - b1_fval - c_fval - color0.lane<2>();
 
@@ -1484,16 +1687,17 @@ static void quantize_hdr_rgb3(
 		d0_lowbits |= (majcomp & 1) << 7;
 		d1_lowbits |= ((majcomp >> 1) & 1) << 7;
 
-		int d0_quantval;
-		int d1_quantval;
-		int d0_uquantval;
-		int d1_uquantval;
-
-		quantize_and_unquantize_retain_top_four_bits(quant_level, d0_lowbits, &d0_quantval, &d0_uquantval);
+		uint8_t d0_quantval;
+		uint8_t d1_quantval;
+		uint8_t d0_uquantval;
+		uint8_t d1_uquantval;
 
-		quantize_and_unquantize_retain_top_four_bits(quant_level, d1_lowbits, &d1_quantval, &d1_uquantval);
+		quantize_and_unquantize_retain_top_four_bits(
+		    quant_level, static_cast<uint8_t>(d0_lowbits), d0_quantval, d0_uquantval);
+		quantize_and_unquantize_retain_top_four_bits(
+		    quant_level, static_cast<uint8_t>(d1_lowbits), d1_quantval, d1_uquantval);
 
-		output[0] = a_quantval;
+		output[0] = static_cast<uint8_t>(a_quantval);
 		output[1] = c_quantval;
 		output[2] = b0_quantval;
 		output[3] = b1_quantval;
@@ -1502,11 +1706,10 @@ static void quantize_hdr_rgb3(
 		return;
 	}
 
-	// neither of the modes fit? In this case, we will use a flat representation
-	// for storing data, using 8 bits for red and green, and 7 bits for blue.
-	// This gives color accuracy roughly similar to LDR 4:4:3 which is not at all great
-	// but usable. This representation is used if the light color is more than 4x the
-	// color value of the dark color.
+	// If neither of the modes fit we will use a flat representation for storing data, using 8 bits
+	// for red and green, and 7 bits for blue. This gives color accuracy roughly similar to LDR
+	// 4:4:3 which is not at all great but usable. This representation is used if the light color is
+	// more than 4x the color value of the dark color.
 	float vals[6];
 	vals[0] = color0_bak.lane<0>();
 	vals[1] = color1_bak.lane<0>();
@@ -1528,39 +1731,53 @@ static void quantize_hdr_rgb3(
 
 	for (int i = 4; i < 6; i++)
 	{
-		int dummy;
+		uint8_t dummy;
 		int idx = astc::flt2int_rtn(vals[i] * 1.0f / 512.0f) + 128;
-		quantize_and_unquantize_retain_top_two_bits(quant_level, idx, &(output[i]), &dummy);
+		quantize_and_unquantize_retain_top_two_bits(
+		    quant_level, static_cast<uint8_t>(idx), output[i], dummy);
 	}
 
 	return;
 }
 
-static void quantize_hdr_rgb_ldr_alpha3(
+/**
+ * @brief Quantize a HDR RGB + LDR A color using direct RGBA encoding.
+ *
+ * @param      color0        The input unquantized color0 endpoint.
+ * @param      color1        The input unquantized color1 endpoint.
+ * @param[out] output        The output endpoints, returned as packed RGBA+RGBA pairs with mode bits.
+ * @param      quant_level   The quantization level to use.
+ */
+static void quantize_hdr_rgb_ldr_alpha(
 	vfloat4 color0,
 	vfloat4 color1,
-	int output[8],
-	int quant_level
+	uint8_t output[8],
+	quant_method quant_level
 ) {
 	float scale = 1.0f / 257.0f;
 
 	float a0 = astc::clamp255f(color0.lane<3>() * scale);
 	float a1 = astc::clamp255f(color1.lane<3>() * scale);
 
-	int ai0 = color_quant_tables[quant_level][astc::flt2int_rtn(a0)];
-	int ai1 = color_quant_tables[quant_level][astc::flt2int_rtn(a1)];
+	output[6] = color_quant_tables[quant_level][astc::flt2int_rtn(a0)];
+	output[7] = color_quant_tables[quant_level][astc::flt2int_rtn(a1)];
 
-	output[6] = ai0;
-	output[7] = ai1;
-
-	quantize_hdr_rgb3(color0, color1, output, quant_level);
+	quantize_hdr_rgb(color0, color1, output, quant_level);
 }
 
-static void quantize_hdr_luminance_large_range3(
+/**
+ * @brief Quantize a HDR L color using the large range encoding.
+ *
+ * @param      color0        The input unquantized color0 endpoint.
+ * @param      color1        The input unquantized color1 endpoint.
+ * @param[out] output        The output endpoints, returned as packed (l0, l1).
+ * @param      quant_level   The quantization level to use.
+ */
+static void quantize_hdr_luminance_large_range(
 	vfloat4 color0,
 	vfloat4 color1,
-	int output[2],
-	int quant_level
+	uint8_t output[2],
+	quant_method quant_level
 ) {
 	float lum0 = hadd_rgb_s(color0) * (1.0f / 3.0f);
 	float lum1 = hadd_rgb_s(color1) * (1.0f / 3.0f);
@@ -1575,21 +1792,21 @@ static void quantize_hdr_luminance_large_range3(
 	int ilum1 = astc::flt2int_rtn(lum1);
 	int ilum0 = astc::flt2int_rtn(lum0);
 
-	// find the closest encodable point in the upper half of the code-point space
+	// Find the closest encodable point in the upper half of the code-point space
 	int upper_v0 = (ilum0 + 128) >> 8;
 	int upper_v1 = (ilum1 + 128) >> 8;
 
 	upper_v0 = astc::clamp(upper_v0, 0, 255);
 	upper_v1 = astc::clamp(upper_v1, 0, 255);
 
-	// find the closest encodable point in the lower half of the code-point space
+	// Find the closest encodable point in the lower half of the code-point space
 	int lower_v0 = (ilum1 + 256) >> 8;
 	int lower_v1 = ilum0 >> 8;
 
 	lower_v0 = astc::clamp(lower_v0, 0, 255);
 	lower_v1 = astc::clamp(lower_v1, 0, 255);
 
-	// determine the distance between the point in code-point space and the input value
+	// Determine the distance between the point in code-point space and the input value
 	int upper0_dec = upper_v0 << 8;
 	int upper1_dec = upper_v1 << 8;
 	int lower0_dec = (lower_v1 << 8) + 128;
@@ -1620,11 +1837,21 @@ static void quantize_hdr_luminance_large_range3(
 	output[1] = color_quant_tables[quant_level][v1];
 }
 
-static bool try_quantize_hdr_luminance_small_range3(
+/**
+ * @brief Quantize a HDR L color using the small range encoding.
+ *
+ * @param      color0        The input unquantized color0 endpoint.
+ * @param      color1        The input unquantized color1 endpoint.
+ * @param[out] output        The output endpoints, returned as packed (l0, l1) with mode bits.
+ * @param      quant_level   The quantization level to use.
+ *
+ * @return Returns @c false on failure, @c true on success.
+ */
+static bool try_quantize_hdr_luminance_small_range(
 	vfloat4 color0,
 	vfloat4 color1,
-	int output[2],
-	int quant_level
+	uint8_t output[2],
+	quant_method quant_level
 ) {
 	float lum0 = hadd_rgb_s(color0) * (1.0f / 3.0f);
 	float lum1 = hadd_rgb_s(color1) * (1.0f / 3.0f);
@@ -1639,7 +1866,7 @@ static bool try_quantize_hdr_luminance_small_range3(
 	int ilum1 = astc::flt2int_rtn(lum1);
 	int ilum0 = astc::flt2int_rtn(lum0);
 
-	// difference of more than a factor-of-2 results in immediate failure.
+	// Difference of more than a factor-of-2 results in immediate failure
 	if (ilum1 - ilum0 > 2048)
 	{
 		return false;
@@ -1650,7 +1877,7 @@ static bool try_quantize_hdr_luminance_small_range3(
 	int v0e, v1e;
 	int v0d, v1d;
 
-	// first, try to encode the high-precision submode
+	// Try to encode the high-precision submode
 	lowval = (ilum0 + 16) >> 5;
 	highval = (ilum1 + 16) >> 5;
 
@@ -1672,16 +1899,14 @@ static bool try_quantize_hdr_luminance_small_range3(
 			v1d = color_unquant_tables[quant_level][v1e];
 			if ((v1d & 0xF0) == (v1 & 0xF0))
 			{
-				output[0] = v0e;
-				output[1] = v1e;
+				output[0] = static_cast<uint8_t>(v0e);
+				output[1] = static_cast<uint8_t>(v1e);
 				return true;
 			}
 		}
 	}
 
-	// failed to encode the high-precision submode; well, then try to encode the
-	// low-precision submode.
-
+	// Try to encode the low-precision submode
 	lowval = (ilum0 + 32) >> 6;
 	highval = (ilum1 + 32) >> 6;
 
@@ -1711,16 +1936,24 @@ static bool try_quantize_hdr_luminance_small_range3(
 		return false;
 	}
 
-	output[0] = v0e;
-	output[1] = v1e;
+	output[0] = static_cast<uint8_t>(v0e);
+	output[1] = static_cast<uint8_t>(v1e);
 	return true;
 }
 
-static void quantize_hdr_alpha3(
+/**
+ * @brief Quantize a HDR A color using either delta or direct RGBA encoding.
+ *
+ * @param      alpha0        The input unquantized color0 endpoint.
+ * @param      alpha1        The input unquantized color1 endpoint.
+ * @param[out] output        The output endpoints, returned as packed RGBA+RGBA pairs with mode bits.
+ * @param      quant_level   The quantization level to use.
+ */
+static void quantize_hdr_alpha(
 	float alpha0,
 	float alpha1,
-	int output[2],
-	int quant_level
+	uint8_t output[2],
+	quant_method quant_level
 ) {
 	alpha0 = astc::clamp(alpha0, 0.0f, 65280.0f);
 	alpha1 = astc::clamp(alpha1, 0.0f, 65280.0f);
@@ -1733,7 +1966,7 @@ static void quantize_hdr_alpha3(
 	int v6e, v7e;
 	int v6d, v7d;
 
-	// try to encode one of the delta submodes, in decreasing-precision order.
+	// Try to encode one of the delta submodes, in decreasing-precision order
 	for (int i = 2; i >= 0; i--)
 	{
 		val0 = (ialpha0 + (128 >> i)) >> (8 - i);
@@ -1769,55 +2002,58 @@ static void quantize_hdr_alpha3(
 			continue;
 		}
 
-		output[0] = v6e;
-		output[1] = v7e;
+		output[0] = static_cast<uint8_t>(v6e);
+		output[1] = static_cast<uint8_t>(v7e);
 		return;
 	}
 
-	// could not encode any of the delta modes; instead encode a flat value
+	// Could not encode any of the delta modes; instead encode a flat value
 	val0 = (ialpha0 + 256) >> 9;
 	val1 = (ialpha1 + 256) >> 9;
 	v6 = val0 | 0x80;
 	v7 = val1 | 0x80;
 
-	v6e = color_quant_tables[quant_level][v6];
-	v7e = color_quant_tables[quant_level][v7];
-	output[0] = v6e;
-	output[1] = v7e;
+	output[0] = color_quant_tables[quant_level][v6];
+	output[1] = color_quant_tables[quant_level][v7];
 
 	return;
 }
 
-static void quantize_hdr_rgb_alpha3(
+/**
+ * @brief Quantize a HDR RGBA color using either delta or direct RGBA encoding.
+ *
+ * @param      color0        The input unquantized color0 endpoint.
+ * @param      color1        The input unquantized color1 endpoint.
+ * @param[out] output        The output endpoints, returned as packed RGBA+RGBA pairs with mode bits.
+ * @param      quant_level   The quantization level to use.
+ */
+static void quantize_hdr_rgb_alpha(
 	vfloat4 color0,
 	vfloat4 color1,
-	int output[8],
-	int quant_level
+	uint8_t output[8],
+	quant_method quant_level
 ) {
-	quantize_hdr_rgb3(color0, color1, output, quant_level);
-	quantize_hdr_alpha3(color0.lane<3>(), color1.lane<3>(), output + 6, quant_level);
+	quantize_hdr_rgb(color0, color1, output, quant_level);
+	quantize_hdr_alpha(color0.lane<3>(), color1.lane<3>(), output + 6, quant_level);
 }
 
-/*
-	Quantize a color. When quantizing an RGB or RGBA color, the quantizer may choose a
-	delta-based representation; as such, it will report back the format it actually used.
-*/
-int pack_color_endpoints(
+/* See header for documentation. */
+uint8_t pack_color_endpoints(
 	vfloat4 color0,
 	vfloat4 color1,
 	vfloat4 rgbs_color,
 	vfloat4 rgbo_color,
 	int format,
-	int* output,
-	int quant_level
+	uint8_t* output,
+	quant_method quant_level
 ) {
-	assert(quant_level >= 0 && quant_level < 21);
+	assert(quant_level < 21);
 
-	// we do not support negative colors.
+	// We do not support negative colors
 	color0 = max(color0, 0.0f);
 	color1 = max(color1, 0.0f);
 
-	int retval = 0;
+	uint8_t retval = 0;
 
 	switch (format)
 	{
@@ -1868,33 +2104,33 @@ int pack_color_endpoints(
 		break;
 
 	case FMT_RGB_SCALE:
-		quantize_rgbs_new(rgbs_color, output, quant_level);
+		quantize_rgbs(rgbs_color, output, quant_level);
 		retval = FMT_RGB_SCALE;
 		break;
 
 	case FMT_HDR_RGB_SCALE:
-		quantize_hdr_rgbo3(rgbo_color, output, quant_level);
+		quantize_hdr_rgbo(rgbo_color, output, quant_level);
 		retval = FMT_HDR_RGB_SCALE;
 		break;
 
 	case FMT_HDR_RGB:
-		quantize_hdr_rgb3(color0, color1, output, quant_level);
+		quantize_hdr_rgb(color0, color1, output, quant_level);
 		retval = FMT_HDR_RGB;
 		break;
 
 	case FMT_RGB_SCALE_ALPHA:
-		quantize_rgbs_alpha_new(color0, color1, rgbs_color, output, quant_level);
+		quantize_rgbs_alpha(color0, color1, rgbs_color, output, quant_level);
 		retval = FMT_RGB_SCALE_ALPHA;
 		break;
 
 	case FMT_HDR_LUMINANCE_SMALL_RANGE:
 	case FMT_HDR_LUMINANCE_LARGE_RANGE:
-		if (try_quantize_hdr_luminance_small_range3(color0, color1, output, quant_level))
+		if (try_quantize_hdr_luminance_small_range(color0, color1, output, quant_level))
 		{
 			retval = FMT_HDR_LUMINANCE_SMALL_RANGE;
 			break;
 		}
-		quantize_hdr_luminance_large_range3(color0, color1, output, quant_level);
+		quantize_hdr_luminance_large_range(color0, color1, output, quant_level);
 		retval = FMT_HDR_LUMINANCE_LARGE_RANGE;
 		break;
 
@@ -1917,12 +2153,12 @@ int pack_color_endpoints(
 		break;
 
 	case FMT_HDR_RGB_LDR_ALPHA:
-		quantize_hdr_rgb_ldr_alpha3(color0, color1, output, quant_level);
+		quantize_hdr_rgb_ldr_alpha(color0, color1, output, quant_level);
 		retval = FMT_HDR_RGB_LDR_ALPHA;
 		break;
 
 	case FMT_HDR_RGBA:
-		quantize_hdr_rgb_alpha3(color0, color1, output, quant_level);
+		quantize_hdr_rgb_alpha(color0, color1, output, quant_level);
 		retval = FMT_HDR_RGBA;
 		break;
 	}
diff --git a/libkram/astc-encoder/astcenc_color_unquantize.cpp b/libkram/astc-encoder/astcenc_color_unquantize.cpp
index 427c8817..a1c2eeb2 100644
--- a/libkram/astc-encoder/astcenc_color_unquantize.cpp
+++ b/libkram/astc-encoder/astcenc_color_unquantize.cpp
@@ -23,8 +23,19 @@
 
 #include "astcenc_internal.h"
 
+/**
+ * @brief Unquantize a color.
+ *
+ * This function uses a lookup table as the quantization is encoded to make
+ * hardware implementations easier, and is not a simple lerp.
+ *
+ * @param quant_level   The quantization level to use.
+ * @param inputq        The input quantized color.
+ *
+ * @return The unquantized color.
+ */
 static ASTCENC_SIMD_INLINE vint4 unquant_color(
-	int quant_level,
+	quant_method quant_level,
 	vint4 inputq
 ) {
 	const uint8_t* unq = color_unquant_tables[quant_level];
@@ -32,6 +43,15 @@ static ASTCENC_SIMD_INLINE vint4 unquant_color(
 	             unq[inputq.lane<2>()], unq[inputq.lane<3>()]);
 }
 
+/**
+ * @brief Un-blue-contract a color.
+ *
+ * This function reverses any applied blue contraction.
+ *
+ * @param input   The input color that has been blue-contracted.
+ *
+ * @return The uncontracted color.
+ */
 static ASTCENC_SIMD_INLINE vint4 uncontract_color(
 	vint4 input
 ) {
@@ -40,10 +60,19 @@ static ASTCENC_SIMD_INLINE vint4 uncontract_color(
 	return select(input, bc0, mask);
 }
 
+/**
+ * @brief Unpack an LDR RGBA color that uses delta encoding.
+ *
+ * @param      input0q       The raw quantized endpoint 0 color.
+ * @param      input1q       The raw quantized endpoint 1 color deltas.
+ * @param      quant_level   The quantization level to use.
+ * @param[out] output0       The unpacked and unquantized endpoint 0 color.
+ * @param[out] output1       The unpacked and unquantized endpoint 1 color.
+ */
 static void rgba_delta_unpack(
 	vint4 input0q,
 	vint4 input1q,
-	int quant_level,
+	quant_method quant_level,
 	vint4& output0,
 	vint4& output1
 ) {
@@ -75,10 +104,21 @@ static void rgba_delta_unpack(
 	output1 = clamp(0, 255, input1);
 }
 
+/**
+ * @brief Unpack an LDR RGB color that uses delta encoding.
+ *
+ * Output alpha set to 255.
+ *
+ * @param      input0q       The raw quantized endpoint 0 color.
+ * @param      input1q       The raw quantized endpoint 1 color deltas.
+ * @param      quant_level   The quantization level to use.
+ * @param[out] output0       The unpacked and unquantized endpoint 0 color.
+ * @param[out] output1       The unpacked and unquantized endpoint 1 color.
+ */
 static void rgb_delta_unpack(
 	vint4 input0q,
 	vint4 input1q,
-	int quant_level,
+	quant_method quant_level,
 	vint4& output0,
 	vint4& output1
 ) {
@@ -87,10 +127,19 @@ static void rgb_delta_unpack(
 	output1.set_lane<3>(255);
 }
 
+/**
+ * @brief Unpack an LDR RGBA color that uses direct encoding.
+ *
+ * @param      input0q       The raw quantized endpoint 0 color.
+ * @param      input1q       The raw quantized endpoint 1 color.
+ * @param      quant_level   The quantization level to use.
+ * @param[out] output0       The unpacked and unquantized endpoint 0 color.
+ * @param[out] output1       The unpacked and unquantized endpoint 1 color.
+ */
 static void rgba_unpack(
 	vint4 input0q,
 	vint4 input1q,
-	int quant_level,
+	quant_method quant_level,
 	vint4& output0,
 	vint4& output1
 ) {
@@ -110,10 +159,21 @@ static void rgba_unpack(
 	output1 = input1;
 }
 
+/**
+ * @brief Unpack an LDR RGB color that uses direct encoding.
+ *
+ * Output alpha set to 255.
+ *
+ * @param      input0q       The raw quantized endpoint 0 color.
+ * @param      input1q       The raw quantized endpoint 1 color.
+ * @param      quant_level   The quantization level to use.
+ * @param[out] output0       The unpacked and unquantized endpoint 0 color.
+ * @param[out] output1       The unpacked and unquantized endpoint 1 color.
+ */
 static void rgb_unpack(
 	vint4 input0q,
 	vint4 input1q,
-	int quant_level,
+	quant_method quant_level,
 	vint4& output0,
 	vint4& output1
 ) {
@@ -122,18 +182,30 @@ static void rgb_unpack(
 	output1.set_lane<3>(255);
 }
 
+/**
+ * @brief Unpack an LDR RGBA color that uses scaled encoding.
+ *
+ * Note only the RGB channels use the scaled encoding, alpha uses direct.
+ *
+ * @param      input0q       The raw quantized endpoint 0 color.
+ * @param      alpha1q       The raw quantized endpoint 1 alpha value.
+ * @param      scaleq        The raw quantized scale.
+ * @param      quant_level   The quantization level to use.
+ * @param[out] output0       The unpacked and unquantized endpoint 0 color.
+ * @param[out] output1       The unpacked and unquantized endpoint 1 color.
+ */
 static void rgb_scale_alpha_unpack(
 	vint4 input0q,
-	int alpha1q,
-	int scaleq,
-	int quant_level,
+	uint8_t alpha1q,
+	uint8_t scaleq,
+	quant_method quant_level,
 	vint4& output0,
 	vint4& output1
 ) {
 	// Unquantize color endpoints
 	vint4 input = unquant_color(quant_level, input0q);
-	int alpha1 = color_unquant_tables[quant_level][alpha1q];
-	int scale = color_unquant_tables[quant_level][scaleq];
+	uint8_t alpha1 = color_unquant_tables[quant_level][alpha1q];
+	uint8_t scale = color_unquant_tables[quant_level][scaleq];
 
 	output1 = input;
 	output1.set_lane<3>(alpha1);
@@ -142,10 +214,21 @@ static void rgb_scale_alpha_unpack(
 	output0.set_lane<3>(input.lane<3>());
 }
 
+/**
+ * @brief Unpack an LDR RGB color that uses scaled encoding.
+ *
+ * Output alpha is 255.
+ *
+ * @param      input0q       The raw quantized endpoint 0 color.
+ * @param      scaleq        The raw quantized scale.
+ * @param      quant_level   The quantization level to use.
+ * @param[out] output0       The unpacked and unquantized endpoint 0 color.
+ * @param[out] output1       The unpacked and unquantized endpoint 1 color.
+ */
 static void rgb_scale_unpack(
 	vint4 input0q,
 	int scaleq,
-	int quant_level,
+	quant_method quant_level,
 	vint4& output0,
 	vint4& output1
 ) {
@@ -159,23 +242,43 @@ static void rgb_scale_unpack(
 	output0.set_lane<3>(255);
 }
 
+/**
+ * @brief Unpack an LDR L color that uses direct encoding.
+ *
+ * Output alpha is 255.
+ *
+ * @param      input         The raw quantized endpoints.
+ * @param      quant_level   The quantization level to use.
+ * @param[out] output0       The unpacked and unquantized endpoint 0 color.
+ * @param[out] output1       The unpacked and unquantized endpoint 1 color.
+ */
 static void luminance_unpack(
-	const int input[2],
-	int quant_level,
-	vint4* output0,
-	vint4* output1
+	const uint8_t input[2],
+	quant_method quant_level,
+	vint4& output0,
+	vint4& output1
 ) {
 	int lum0 = color_unquant_tables[quant_level][input[0]];
 	int lum1 = color_unquant_tables[quant_level][input[1]];
-	*output0 = vint4(lum0, lum0, lum0, 255);
-	*output1 = vint4(lum1, lum1, lum1, 255);
+	output0 = vint4(lum0, lum0, lum0, 255);
+	output1 = vint4(lum1, lum1, lum1, 255);
 }
 
+/**
+ * @brief Unpack an LDR L color that uses delta encoding.
+ *
+ * Output alpha is 255.
+ *
+ * @param      input         The raw quantized endpoints (L0, L1).
+ * @param      quant_level   The quantization level to use.
+ * @param[out] output0       The unpacked and unquantized endpoint 0 color.
+ * @param[out] output1       The unpacked and unquantized endpoint 1 color.
+ */
 static void luminance_delta_unpack(
-	const int input[2],
-	int quant_level,
-	vint4* output0,
-	vint4* output1
+	const uint8_t input[2],
+	quant_method quant_level,
+	vint4& output0,
+	vint4& output1
 ) {
 	int v0 = color_unquant_tables[quant_level][input[0]];
 	int v1 = color_unquant_tables[quant_level][input[1]];
@@ -184,29 +287,45 @@ static void luminance_delta_unpack(
 
 	l1 = astc::min(l1, 255);
 
-	*output0 = vint4(l0, l0, l0, 255);
-	*output1 = vint4(l1, l1, l1, 255);
+	output0 = vint4(l0, l0, l0, 255);
+	output1 = vint4(l1, l1, l1, 255);
 }
 
+/**
+ * @brief Unpack an LDR LA color that uses direct encoding.
+ *
+ * @param      input         The raw quantized endpoints (L0, L1, A0, A1).
+ * @param      quant_level   The quantization level to use.
+ * @param[out] output0       The unpacked and unquantized endpoint 0 color.
+ * @param[out] output1       The unpacked and unquantized endpoint 1 color.
+ */
 static void luminance_alpha_unpack(
-	const int input[4],
-	int quant_level,
-	vint4* output0,
-	vint4* output1
+	const uint8_t input[4],
+	quant_method quant_level,
+	vint4& output0,
+	vint4& output1
 ) {
 	int lum0 = color_unquant_tables[quant_level][input[0]];
 	int lum1 = color_unquant_tables[quant_level][input[1]];
 	int alpha0 = color_unquant_tables[quant_level][input[2]];
 	int alpha1 = color_unquant_tables[quant_level][input[3]];
-	*output0 = vint4(lum0, lum0, lum0, alpha0);
-	*output1 = vint4(lum1, lum1, lum1, alpha1);
+	output0 = vint4(lum0, lum0, lum0, alpha0);
+	output1 = vint4(lum1, lum1, lum1, alpha1);
 }
 
+/**
+ * @brief Unpack an LDR LA color that uses delta encoding.
+ *
+ * @param      input         The raw quantized endpoints (L0, L1, A0, A1).
+ * @param      quant_level   The quantization level to use.
+ * @param[out] output0       The unpacked and unquantized endpoint 0 color.
+ * @param[out] output1       The unpacked and unquantized endpoint 1 color.
+ */
 static void luminance_alpha_delta_unpack(
-	const int input[4],
-	int quant_level,
-	vint4* output0,
-	vint4* output1
+	const uint8_t input[4],
+	quant_method quant_level,
+	vint4& output0,
+	vint4& output1
 ) {
 	int lum0 = color_unquant_tables[quant_level][input[0]];
 	int lum1 = color_unquant_tables[quant_level][input[1]];
@@ -232,16 +351,23 @@ static void luminance_alpha_delta_unpack(
 	lum1 = astc::clamp(lum1, 0, 255);
 	alpha1 = astc::clamp(alpha1, 0, 255);
 
-	*output0 = vint4(lum0, lum0, lum0, alpha0);
-	*output1 = vint4(lum1, lum1, lum1, alpha1);
+	output0 = vint4(lum0, lum0, lum0, alpha0);
+	output1 = vint4(lum1, lum1, lum1, alpha1);
 }
 
-// RGB-offset format
-static void hdr_rgbo_unpack3(
-	const int input[4],
-	int quant_level,
-	vint4* output0,
-	vint4* output1
+/**
+ * @brief Unpack an HDR RGB + offset encoding.
+ *
+ * @param      input         The raw quantized endpoints (packed and modal).
+ * @param      quant_level   The quantization level to use.
+ * @param[out] output0       The unpacked and unquantized endpoint 0 color.
+ * @param[out] output1       The unpacked and unquantized endpoint 1 color.
+ */
+static void hdr_rgbo_unpack(
+	const uint8_t input[4],
+	quant_method quant_level,
+	vint4& output0,
+	vint4& output1
 ) {
 	int v0 = color_unquant_tables[quant_level][input[0]];
 	int v1 = color_unquant_tables[quant_level][input[1]];
@@ -377,15 +503,23 @@ static void hdr_rgbo_unpack3(
 	if (blue0 < 0)
 		blue0 = 0;
 
-	*output0 = vint4(red0 << 4, green0 << 4, blue0 << 4, 0x7800);
-	*output1 = vint4(red << 4, green << 4, blue << 4, 0x7800);
+	output0 = vint4(red0 << 4, green0 << 4, blue0 << 4, 0x7800);
+	output1 = vint4(red << 4, green << 4, blue << 4, 0x7800);
 }
 
-static void hdr_rgb_unpack3(
-	const int input[6],
-	int quant_level,
-	vint4* output0,
-	vint4* output1
+/**
+ * @brief Unpack an HDR RGB direct encoding.
+ *
+ * @param      input         The raw quantized endpoints (packed and modal).
+ * @param      quant_level   The quantization level to use.
+ * @param[out] output0       The unpacked and unquantized endpoint 0 color.
+ * @param[out] output1       The unpacked and unquantized endpoint 1 color.
+ */
+static void hdr_rgb_unpack(
+	const uint8_t input[6],
+	quant_method quant_level,
+	vint4& output0,
+	vint4& output1
 ) {
 
 	int v0 = color_unquant_tables[quant_level][input[0]];
@@ -402,8 +536,8 @@ static void hdr_rgb_unpack3(
 
 	if (majcomp == 3)
 	{
-		*output0 = vint4(v0 << 8, v2 << 8, (v4 & 0x7F) << 9, 0x7800);
-		*output1 = vint4(v1 << 8, v3 << 8, (v5 & 0x7F) << 9, 0x7800);
+		output0 = vint4(v0 << 8, v2 << 8, (v4 & 0x7F) << 9, 0x7800);
+		output1 = vint4(v1 << 8, v3 << 8, (v5 & 0x7F) << 9, 0x7800);
 		return;
 	}
 
@@ -537,29 +671,45 @@ static void hdr_rgb_unpack3(
 		break;
 	}
 
-	*output0 = vint4(red0 << 4, green0 << 4, blue0 << 4, 0x7800);
-	*output1 = vint4(red1 << 4, green1 << 4, blue1 << 4, 0x7800);
+	output0 = vint4(red0 << 4, green0 << 4, blue0 << 4, 0x7800);
+	output1 = vint4(red1 << 4, green1 << 4, blue1 << 4, 0x7800);
 }
 
-static void hdr_rgb_ldr_alpha_unpack3(
-	const int input[8],
-	int quant_level,
-	vint4* output0,
-	vint4* output1
+/**
+ * @brief Unpack an HDR RGB + LDR A direct encoding.
+ *
+ * @param      input         The raw quantized endpoints (packed and modal).
+ * @param      quant_level   The quantization level to use.
+ * @param[out] output0       The unpacked and unquantized endpoint 0 color.
+ * @param[out] output1       The unpacked and unquantized endpoint 1 color.
+ */
+static void hdr_rgb_ldr_alpha_unpack(
+	const uint8_t input[8],
+	quant_method quant_level,
+	vint4& output0,
+	vint4& output1
 ) {
-	hdr_rgb_unpack3(input, quant_level, output0, output1);
+	hdr_rgb_unpack(input, quant_level, output0, output1);
 
 	int v6 = color_unquant_tables[quant_level][input[6]];
 	int v7 = color_unquant_tables[quant_level][input[7]];
-	output0->set_lane<3>(v6);
-	output1->set_lane<3>(v7);
+	output0.set_lane<3>(v6);
+	output1.set_lane<3>(v7);
 }
 
+/**
+ * @brief Unpack an HDR L (small range) direct encoding.
+ *
+ * @param      input         The raw quantized endpoints (packed and modal).
+ * @param      quant_level   The quantization level to use.
+ * @param[out] output0       The unpacked and unquantized endpoint 0 color.
+ * @param[out] output1       The unpacked and unquantized endpoint 1 color.
+ */
 static void hdr_luminance_small_range_unpack(
-	const int input[2],
-	int quant_level,
-	vint4* output0,
-	vint4* output1
+	const uint8_t input[2],
+	quant_method quant_level,
+	vint4& output0,
+	vint4& output1
 ) {
 	int v0 = color_unquant_tables[quant_level][input[0]];
 	int v1 = color_unquant_tables[quant_level][input[1]];
@@ -580,15 +730,23 @@ static void hdr_luminance_small_range_unpack(
 	if (y1 > 0xFFF)
 		y1 = 0xFFF;
 
-	*output0 = vint4(y0 << 4, y0 << 4, y0 << 4, 0x7800);
-	*output1 = vint4(y1 << 4, y1 << 4, y1 << 4, 0x7800);
+	output0 = vint4(y0 << 4, y0 << 4, y0 << 4, 0x7800);
+	output1 = vint4(y1 << 4, y1 << 4, y1 << 4, 0x7800);
 }
 
+/**
+ * @brief Unpack an HDR L (large range) direct encoding.
+ *
+ * @param      input         The raw quantized endpoints (packed and modal).
+ * @param      quant_level   The quantization level to use.
+ * @param[out] output0       The unpacked and unquantized endpoint 0 color.
+ * @param[out] output1       The unpacked and unquantized endpoint 1 color.
+ */
 static void hdr_luminance_large_range_unpack(
-	const int input[2],
-	int quant_level,
-	vint4* output0,
-	vint4* output1
+	const uint8_t input[2],
+	quant_method quant_level,
+	vint4& output0,
+	vint4& output1
 ) {
 	int v0 = color_unquant_tables[quant_level][input[0]];
 	int v1 = color_unquant_tables[quant_level][input[1]];
@@ -604,15 +762,24 @@ static void hdr_luminance_large_range_unpack(
 		y0 = (v1 << 4) + 8;
 		y1 = (v0 << 4) - 8;
 	}
-	*output0 = vint4(y0 << 4, y0 << 4, y0 << 4, 0x7800);
-	*output1 = vint4(y1 << 4, y1 << 4, y1 << 4, 0x7800);
+
+	output0 = vint4(y0 << 4, y0 << 4, y0 << 4, 0x7800);
+	output1 = vint4(y1 << 4, y1 << 4, y1 << 4, 0x7800);
 }
 
+/**
+ * @brief Unpack an HDR A direct encoding.
+ *
+ * @param      input         The raw quantized endpoints (packed and modal).
+ * @param      quant_level   The quantization level to use.
+ * @param[out] output0       The unpacked and unquantized endpoint 0 color.
+ * @param[out] output1       The unpacked and unquantized endpoint 1 color.
+ */
 static void hdr_alpha_unpack(
-	const int input[2],
-	int quant_level,
-	int* output0,
-	int* output1
+	const uint8_t input[2],
+	quant_method quant_level,
+	int& output0,
+	int& output1
 ) {
 
 	int v6 = color_unquant_tables[quant_level][input[0]];
@@ -623,8 +790,8 @@ static void hdr_alpha_unpack(
 	v7 &= 0x7F;
 	if (selector == 3)
 	{
-		*output0 = v6 << 5;
-		*output1 = v7 << 5;
+		output0 = v6 << 5;
+		output1 = v7 << 5;
 	}
 	else
 	{
@@ -641,49 +808,53 @@ static void hdr_alpha_unpack(
 		else if (v7 > 0xFFF)
 			v7 = 0xFFF;
 
-		*output0 = v6;
-		*output1 = v7;
+		output0 = v6;
+		output1 = v7;
 	}
 
-	*output0 <<= 4;
-	*output1 <<= 4;
+	output0 <<= 4;
+	output1 <<= 4;
 }
 
-static void hdr_rgb_hdr_alpha_unpack3(
-	const int input[8],
-	int quant_level,
-	vint4* output0,
-	vint4* output1
+/**
+ * @brief Unpack an HDR RGBA direct encoding.
+ *
+ * @param      input         The raw quantized endpoints (packed and modal).
+ * @param      quant_level   The quantization level to use.
+ * @param[out] output0       The unpacked and unquantized endpoint 0 color.
+ * @param[out] output1       The unpacked and unquantized endpoint 1 color.
+ */
+static void hdr_rgb_hdr_alpha_unpack(
+	const uint8_t input[8],
+	quant_method quant_level,
+	vint4& output0,
+	vint4& output1
 ) {
-	hdr_rgb_unpack3(input, quant_level, output0, output1);
+	hdr_rgb_unpack(input, quant_level, output0, output1);
 
 	int alpha0, alpha1;
-	hdr_alpha_unpack(input + 6, quant_level, &alpha0, &alpha1);
+	hdr_alpha_unpack(input + 6, quant_level, alpha0, alpha1);
 
-	output0->set_lane<3>(alpha0);
-	output1->set_lane<3>(alpha1);
+	output0.set_lane<3>(alpha0);
+	output1.set_lane<3>(alpha1);
 }
 
+/* See header for documentation. */
 void unpack_color_endpoints(
 	astcenc_profile decode_mode,
 	int format,
-	int quant_level,
-	const int* input,
-	int* rgb_hdr,
-	int* alpha_hdr,
-	int* nan_endpoint,
-	vint4* output0,
-	vint4* output1
+	quant_method quant_level,
+	const uint8_t* input,
+	bool& rgb_hdr,
+	bool& alpha_hdr,
+	vint4& output0,
+	vint4& output1
 ) {
-	// TODO: Make these bools ...
-
-	// Assume no NaNs and LDR endpoints
-
-	// TODO: Review use of NaN endpoint. It's never set for HDR images ...
-	*nan_endpoint = 0;
-	*rgb_hdr = 0;
-	*alpha_hdr = 0;
+	// Assume no NaNs and LDR endpoints unless set later
+	rgb_hdr = false;
+	alpha_hdr = false;
 
+	bool alpha_hdr_default = false;
 
 	switch (format)
 	{
@@ -696,14 +867,14 @@ void unpack_color_endpoints(
 		break;
 
 	case FMT_HDR_LUMINANCE_SMALL_RANGE:
-		*rgb_hdr = 1;
-		*alpha_hdr = -1;
+		rgb_hdr = true;
+		alpha_hdr_default = true;
 		hdr_luminance_small_range_unpack(input, quant_level, output0, output1);
 		break;
 
 	case FMT_HDR_LUMINANCE_LARGE_RANGE:
-		*rgb_hdr = 1;
-		*alpha_hdr = -1;
+		rgb_hdr = true;
+		alpha_hdr_default = true;
 		hdr_luminance_large_range_unpack(input, quant_level, output0, output1);
 		break;
 
@@ -718,31 +889,31 @@ void unpack_color_endpoints(
 	case FMT_RGB_SCALE:
 		{
 			vint4 input0q(input[0], input[1], input[2], 0);
-			int scale = input[3];
-			rgb_scale_unpack(input0q, scale, quant_level, *output0, *output1);
+			uint8_t scale = input[3];
+			rgb_scale_unpack(input0q, scale, quant_level, output0, output1);
 		}
 		break;
 
 	case FMT_RGB_SCALE_ALPHA:
 		{
 			vint4 input0q(input[0], input[1], input[2], input[4]);
-			int alpha1q = input[5];
-			int scaleq = input[3];
-			rgb_scale_alpha_unpack(input0q, alpha1q, scaleq, quant_level, *output0, *output1);
+			uint8_t alpha1q = input[5];
+			uint8_t scaleq = input[3];
+			rgb_scale_alpha_unpack(input0q, alpha1q, scaleq, quant_level, output0, output1);
 		}
 		break;
 
 	case FMT_HDR_RGB_SCALE:
-		*rgb_hdr = 1;
-		*alpha_hdr = -1;
-		hdr_rgbo_unpack3(input, quant_level, output0, output1);
+		rgb_hdr = true;
+		alpha_hdr_default = true;
+		hdr_rgbo_unpack(input, quant_level,output0, output1);
 		break;
 
 	case FMT_RGB:
 		{
 			vint4 input0q(input[0], input[2], input[4], 0);
 			vint4 input1q(input[1], input[3], input[5], 0);
-			rgb_unpack(input0q, input1q, quant_level, *output0, *output1);
+			rgb_unpack(input0q, input1q, quant_level, output0, output1);
 		}
 		break;
 
@@ -750,21 +921,21 @@ void unpack_color_endpoints(
 		{
 			vint4 input0q(input[0], input[2], input[4], 0);
 			vint4 input1q(input[1], input[3], input[5], 0);
-			rgb_delta_unpack(input0q, input1q, quant_level, *output0, *output1);
+			rgb_delta_unpack(input0q, input1q, quant_level, output0, output1);
 		}
 		break;
 
 	case FMT_HDR_RGB:
-		*rgb_hdr = 1;
-		*alpha_hdr = -1;
-		hdr_rgb_unpack3(input, quant_level, output0, output1);
+		rgb_hdr = true;
+		alpha_hdr_default = true;
+		hdr_rgb_unpack(input, quant_level, output0, output1);
 		break;
 
 	case FMT_RGBA:
 		{
 			vint4 input0q(input[0], input[2], input[4], input[6]);
 			vint4 input1q(input[1], input[3], input[5], input[7]);
-			rgba_unpack(input0q, input1q, quant_level, *output0, *output1);
+			rgba_unpack(input0q, input1q, quant_level, output0, output1);
 		}
 		break;
 
@@ -772,36 +943,36 @@ void unpack_color_endpoints(
 		{
 			vint4 input0q(input[0], input[2], input[4], input[6]);
 			vint4 input1q(input[1], input[3], input[5], input[7]);
-			rgba_delta_unpack(input0q, input1q, quant_level, *output0, *output1);
+			rgba_delta_unpack(input0q, input1q, quant_level, output0, output1);
 		}
 		break;
 
 	case FMT_HDR_RGB_LDR_ALPHA:
-		*rgb_hdr = 1;
-		hdr_rgb_ldr_alpha_unpack3(input, quant_level, output0, output1);
+		rgb_hdr = true;
+		hdr_rgb_ldr_alpha_unpack(input, quant_level, output0, output1);
 		break;
 
 	case FMT_HDR_RGBA:
-		*rgb_hdr = 1;
-		*alpha_hdr = 1;
-		hdr_rgb_hdr_alpha_unpack3(input, quant_level, output0, output1);
+		rgb_hdr = true;
+		alpha_hdr = true;
+		hdr_rgb_hdr_alpha_unpack(input, quant_level, output0, output1);
 		break;
 	}
 
 	// Assign a correct default alpha
-	if (*alpha_hdr == -1)
+	if (alpha_hdr_default)
 	{
 		if (decode_mode == ASTCENC_PRF_HDR)
 		{
-			output0->set_lane<3>(0x7800);
-			output1->set_lane<3>(0x7800);
-			*alpha_hdr = 1;
+			output0.set_lane<3>(0x7800);
+			output1.set_lane<3>(0x7800);
+			alpha_hdr = true;
 		}
 		else
 		{
-			output0->set_lane<3>(0x00FF);
-			output1->set_lane<3>(0x00FF);
-			*alpha_hdr = 0;
+			output0.set_lane<3>(0x00FF);
+			output1.set_lane<3>(0x00FF);
+			alpha_hdr = false;
 		}
 	}
 
@@ -814,25 +985,23 @@ void unpack_color_endpoints(
 	    (decode_mode == ASTCENC_PRF_LDR_SRGB))
 	{
 		// Also matches HDR alpha, as cannot have HDR alpha without HDR RGB
-		if (*rgb_hdr == 1)
+		if (rgb_hdr == true)
 		{
-			*output0 = vint4(0xFF00, 0x0000, 0xFF00, 0xFF00);
-			*output1 = vint4(0xFF00, 0x0000, 0xFF00, 0xFF00);
+			output0 = vint4(0xFF00, 0x0000, 0xFF00, 0xFF00);
+			output1 = vint4(0xFF00, 0x0000, 0xFF00, 0xFF00);
 			output_scale = hdr_scale;
 
-			*rgb_hdr = 0;
-			*alpha_hdr = 0;
+			rgb_hdr = false;
+			alpha_hdr = false;
 		}
 	}
 	// An HDR profile image
 	else
 	{
-		bool hrgb = *rgb_hdr == 1;
-		bool ha = *alpha_hdr == 1;
-		vmask4 hdr_lanes(hrgb, hrgb, hrgb, ha);
+		vmask4 hdr_lanes(rgb_hdr, rgb_hdr, rgb_hdr, alpha_hdr);
 		output_scale = select(ldr_scale, hdr_scale, hdr_lanes);
 	}
 
-	*output0 = *output0 * output_scale;
-	*output1 = *output1 * output_scale;
+	output0 = output0 * output_scale;
+	output1 = output1 * output_scale;
 }
diff --git a/libkram/astc-encoder/astcenc_compress_symbolic.cpp b/libkram/astc-encoder/astcenc_compress_symbolic.cpp
index 5b5e5519..252e38e5 100644
--- a/libkram/astc-encoder/astcenc_compress_symbolic.cpp
+++ b/libkram/astc-encoder/astcenc_compress_symbolic.cpp
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2021 Arm Limited
+// Copyright 2011-2022 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -25,152 +25,305 @@
 #include "astcenc_diagnostic_trace.h"
 
 #include <cassert>
-#include <cstring>
-#include <cstdio>
 
-#ifdef DEBUG_CAPTURE_NAN
-	#ifndef _GNU_SOURCE
-		#define _GNU_SOURCE
-	#endif
+/**
+ * @brief Merge two planes of endpoints into a single vector.
+ *
+ * @param      ep_plane1          The endpoints for plane 1.
+ * @param      ep_plane2          The endpoints for plane 2.
+ * @param      component_plane2   The color component for plane 2.
+ * @param[out] result             The merged output.
+ */
+static void merge_endpoints(
+	const endpoints& ep_plane1,
+	const endpoints& ep_plane2,
+	unsigned int component_plane2,
+	endpoints& result
+) {
+	unsigned int partition_count = ep_plane1.partition_count;
+	assert(partition_count == 1);
 
-	#include <fenv.h>
-#endif
+	vmask4 sep_mask = vint4::lane_id() == vint4(component_plane2);
+
+	result.partition_count = partition_count;
+	result.endpt0[0] = select(ep_plane1.endpt0[0], ep_plane2.endpt0[0], sep_mask);
+	result.endpt1[0] = select(ep_plane1.endpt1[0], ep_plane2.endpt1[0], sep_mask);
+}
+
+/**
+ * @brief Attempt to improve weights given a chosen configuration.
+ *
+ * Given a fixed weight grid decimation and weight value quantization, iterate over all weights (per
+ * partition and per plane) and attempt to improve image quality by moving each weight up by one or
+ * down by one quantization step.
+ *
+ * This is a specialized function which only supports operating on undecimated weight grids,
+ * therefore primarily improving the performance of 4x4 and 5x5 blocks where grid decimation
+ * is needed less often.
+ *
+ * @param      decode_mode                       The decode mode (LDR, HDR).
+ * @param      bsd                               The block size information.
+ * @param      blk                               The image block color data to compress.
+ * @param[out] scb                               The symbolic compressed block output.
+ * @param[out] dec_weights_quant_pvalue_plane1   The weights for plane 1.
+ * @param[out] dec_weights_quant_pvalue_plane2   The weights for plane 2, or @c nullptr if 1 plane.
+ */
+static bool realign_weights_undecimated(
+	astcenc_profile decode_mode,
+	const block_size_descriptor& bsd,
+	const image_block& blk,
+	symbolic_compressed_block& scb,
+	uint8_t* dec_weights_quant_pvalue_plane1,
+	uint8_t* dec_weights_quant_pvalue_plane2
+) {
+	// Get the partition descriptor
+	unsigned int partition_count = scb.partition_count;
+	const auto& pi = bsd.get_partition_info(partition_count, scb.partition_index);
+
+	// Get the quantization table
+	const block_mode& bm = bsd.get_block_mode(scb.block_mode);
+	unsigned int weight_quant_level = bm.quant_mode;
+	const quantization_and_transfer_table *qat = &(quant_and_xfer_tables[weight_quant_level]);
+
+	unsigned int max_plane = bm.is_dual_plane;
+	int plane2_component = bm.is_dual_plane ? scb.plane2_component : -1;
+	vmask4 plane_mask = vint4::lane_id() == vint4(plane2_component);
+
+	// Decode the color endpoints
+	bool rgb_hdr;
+	bool alpha_hdr;
+	vint4 endpnt0[BLOCK_MAX_PARTITIONS];
+	vint4 endpnt1[BLOCK_MAX_PARTITIONS];
+	vfloat4 endpnt0f[BLOCK_MAX_PARTITIONS];
+	vfloat4 offset[BLOCK_MAX_PARTITIONS];
+
+	promise(partition_count > 0);
+
+	for (unsigned int pa_idx = 0; pa_idx < partition_count; pa_idx++)
+	{
+		unpack_color_endpoints(decode_mode,
+		                       scb.color_formats[pa_idx],
+		                       scb.get_color_quant_mode(),
+		                       scb.color_values[pa_idx],
+		                       rgb_hdr, alpha_hdr,
+		                       endpnt0[pa_idx],
+		                       endpnt1[pa_idx]);
+	}
+
+	uint8_t* dec_weights_quant_pvalue = dec_weights_quant_pvalue_plane1;
+	bool adjustments = false;
+
+	// For each plane and partition ...
+	for (unsigned int pl_idx = 0; pl_idx <= max_plane; pl_idx++)
+	{
+		for (unsigned int pa_idx = 0; pa_idx < partition_count; pa_idx++)
+		{
+			// Compute the endpoint delta for all components in current plane
+			vint4 epd = endpnt1[pa_idx] - endpnt0[pa_idx];
+			epd = select(epd, vint4::zero(), plane_mask);
+
+			endpnt0f[pa_idx] = int_to_float(endpnt0[pa_idx]);
+			offset[pa_idx] = int_to_float(epd) * (1.0f / 64.0f);
+		}
+
+		// For each weight compute previous, current, and next errors
+		promise(bsd.texel_count > 0);
+		for (unsigned int texel = 0; texel < bsd.texel_count; texel++)
+		{
+			int uqw = qat->unquantized_value[dec_weights_quant_pvalue[texel]];
+
+			uint32_t prev_and_next = qat->prev_next_values[uqw];
+			int prev_wt_uq = prev_and_next & 0xFF;
+			int next_wt_uq = (prev_and_next >> 8) & 0xFF;
+
+			// Interpolate the colors to create the diffs
+			unsigned int partition = pi.partition_of_texel[texel];
+
+			float plane_weight = static_cast<float>(uqw);
+			float plane_up_weight = static_cast<float>(next_wt_uq - uqw);
+			float plane_down_weight = static_cast<float>(prev_wt_uq - uqw);
+
+			vfloat4 color_offset = offset[partition];
+			vfloat4 color_base   = endpnt0f[partition];
+
+			vfloat4 color = color_base + color_offset * plane_weight;
+
+			vfloat4 orig_color   = blk.texel(texel);
+			vfloat4 error_weight = blk.channel_weight;
+
+			vfloat4 color_diff      = color - orig_color;
+			vfloat4 color_up_diff   = color_diff + color_offset * plane_up_weight;
+			vfloat4 color_down_diff = color_diff + color_offset * plane_down_weight;
+
+			float current_error = dot_s(color_diff      * color_diff,      error_weight);
+			float up_error      = dot_s(color_up_diff   * color_up_diff,   error_weight);
+			float down_error    = dot_s(color_down_diff * color_down_diff, error_weight);
+
+			// Check if the prev or next error is better, and if so use it
+			if ((up_error < current_error) && (up_error < down_error))
+			{
+				dec_weights_quant_pvalue[texel] = (uint8_t)((prev_and_next >> 24) & 0xFF);
+				adjustments = true;
+			}
+			else if (down_error < current_error)
+			{
+				dec_weights_quant_pvalue[texel] = (uint8_t)((prev_and_next >> 16) & 0xFF);
+				adjustments = true;
+			}
+		}
+
+		// Prepare iteration for plane 2
+		dec_weights_quant_pvalue = dec_weights_quant_pvalue_plane2;
+		plane_mask = ~plane_mask;
+	}
+
+	return adjustments;
+}
 
 /**
  * @brief Attempt to improve weights given a chosen configuration.
  *
- * Given a fixed weight grid decimation and weight value quantization, iterate
- * over all weights (per partition and per plane) and attempt to improve image
- * quality by moving each weight up by one or down by one quantization step.
+ * Given a fixed weight grid decimation and weight value quantization, iterate over all weights (per
+ * partition and per plane) and attempt to improve image quality by moving each weight up by one or
+ * down by one quantization step.
+ *
+ * @param      decode_mode                       The decode mode (LDR, HDR).
+ * @param      bsd                               The block size information.
+ * @param      blk                               The image block color data to compress.
+ * @param[out] scb                               The symbolic compressed block output.
+ * @param[out] dec_weights_quant_pvalue_plane1   The weights for plane 1.
+ * @param[out] dec_weights_quant_pvalue_plane2   The weights for plane 2, or @c nullptr if 1 plane.
  */
-static int realign_weights(
+static bool realign_weights_decimated(
 	astcenc_profile decode_mode,
-	const block_size_descriptor* bsd,
-	const imageblock* blk,
-	const error_weight_block* ewb,
-	symbolic_compressed_block* scb,
-	uint8_t* plane1_weight_set8,
-	uint8_t* plane2_weight_set8
+	const block_size_descriptor& bsd,
+	const image_block& blk,
+	symbolic_compressed_block& scb,
+	uint8_t* dec_weights_quant_pvalue_plane1,
+	uint8_t* dec_weights_quant_pvalue_plane2
 ) {
 	// Get the partition descriptor
-	int partition_count = scb->partition_count;
-	const partition_info *pt = get_partition_table(bsd, partition_count);
-	pt += scb->partition_index;
+	unsigned int partition_count = scb.partition_count;
+	const auto& pi = bsd.get_partition_info(partition_count, scb.partition_index);
 
 	// Get the quantization table
-	const int packed_index = bsd->block_mode_packed_index[scb->block_mode];
-	assert(packed_index >= 0 && packed_index < bsd->block_mode_count);
-	const block_mode& bm = bsd->block_modes[packed_index];
-	int weight_quant_level = bm.quant_mode;
+	const block_mode& bm = bsd.get_block_mode(scb.block_mode);
+	unsigned int weight_quant_level = bm.quant_mode;
 	const quantization_and_transfer_table *qat = &(quant_and_xfer_tables[weight_quant_level]);
 
 	// Get the decimation table
-	const decimation_table* dt = bsd->decimation_tables[bm.decimation_mode];
-	int weight_count = dt->weight_count;
+	const decimation_info& di = bsd.get_decimation_info(bm.decimation_mode);
+	unsigned int weight_count = di.weight_count;
+	assert(weight_count != bsd.texel_count);
 
-	int max_plane = bm.is_dual_plane;
-	int plane2_component = bm.is_dual_plane ? scb->plane2_color_component : -1;
+	unsigned int max_plane = bm.is_dual_plane;
+	int plane2_component = bm.is_dual_plane ? scb.plane2_component : -1;
 	vmask4 plane_mask = vint4::lane_id() == vint4(plane2_component);
 
 	// Decode the color endpoints
-	int rgb_hdr;
-	int alpha_hdr;
-	int nan_endpoint;
-	vint4 endpnt0[4];
-	vint4 endpnt1[4];
-	vfloat4 endpnt0f[4];
-	vfloat4 offset[4];
+	bool rgb_hdr;
+	bool alpha_hdr;
+	vint4 endpnt0[BLOCK_MAX_PARTITIONS];
+	vint4 endpnt1[BLOCK_MAX_PARTITIONS];
+	vfloat4 endpnt0f[BLOCK_MAX_PARTITIONS];
+	vfloat4 offset[BLOCK_MAX_PARTITIONS];
 
 	promise(partition_count > 0);
 	promise(weight_count > 0);
-	promise(max_plane >= 0);
 
-	for (int pa_idx = 0; pa_idx < partition_count; pa_idx++)
+	for (unsigned int pa_idx = 0; pa_idx < partition_count; pa_idx++)
 	{
 		unpack_color_endpoints(decode_mode,
-		                       scb->color_formats[pa_idx],
-		                       scb->color_quant_level,
-		                       scb->color_values[pa_idx],
-		                       &rgb_hdr, &alpha_hdr, &nan_endpoint,
-		                       &endpnt0[pa_idx],
-		                       &endpnt1[pa_idx]);
+		                       scb.color_formats[pa_idx],
+		                       scb.get_color_quant_mode(),
+		                       scb.color_values[pa_idx],
+		                       rgb_hdr, alpha_hdr,
+		                       endpnt0[pa_idx],
+		                       endpnt1[pa_idx]);
 	}
 
-	uint8_t uq_pl_weights[MAX_WEIGHTS_PER_BLOCK];
-	uint8_t* weight_set8 = plane1_weight_set8;
-	int adjustments = 0;
+	uint8_t uq_pl_weights[BLOCK_MAX_WEIGHTS];
+	float uq_pl_weightsf[BLOCK_MAX_WEIGHTS];
+	uint8_t* dec_weights_quant_pvalue = dec_weights_quant_pvalue_plane1;
+	bool adjustments = false;
 
 	// For each plane and partition ...
-	for (int pl_idx = 0; pl_idx <= max_plane; pl_idx++)
+	for (unsigned int pl_idx = 0; pl_idx <= max_plane; pl_idx++)
 	{
-		for (int pa_idx = 0; pa_idx < partition_count; pa_idx++)
+		for (unsigned int pa_idx = 0; pa_idx < partition_count; pa_idx++)
 		{
-			// Compute the endpoint delta for all channels in current plane
+			// Compute the endpoint delta for all components in current plane
 			vint4 epd = endpnt1[pa_idx] - endpnt0[pa_idx];
 			epd = select(epd, vint4::zero(), plane_mask);
 
 			endpnt0f[pa_idx] = int_to_float(endpnt0[pa_idx]);
-			offset[pa_idx] = int_to_float(epd);
-			offset[pa_idx] = offset[pa_idx] * (1.0f / 64.0f);
+			offset[pa_idx] = int_to_float(epd) * (1.0f / 64.0f);
 		}
 
 		// Create an unquantized weight grid for this decimation level
-		for (int we_idx = 0; we_idx < weight_count; we_idx++)
+		for (unsigned int we_idx = 0; we_idx < weight_count; we_idx++)
 		{
-			uq_pl_weights[we_idx] = qat->unquantized_value[weight_set8[we_idx]];
+			uq_pl_weights[we_idx] = qat->unquantized_value[dec_weights_quant_pvalue[we_idx]];
+			uq_pl_weightsf[we_idx] = static_cast<float>(uq_pl_weights[we_idx]);
 		}
 
 		// For each weight compute previous, current, and next errors
-		for (int we_idx = 0; we_idx < weight_count; we_idx++)
+		for (unsigned int we_idx = 0; we_idx < weight_count; we_idx++)
 		{
-			int uqw = uq_pl_weights[we_idx];
+			unsigned int uqw = uq_pl_weights[we_idx];
+			float uqwf = uq_pl_weightsf[we_idx];
 
 			uint32_t prev_and_next = qat->prev_next_values[uqw];
-			int prev_wt_uq = prev_and_next & 0xFF;
-			int next_wt_uq = (prev_and_next >> 8) & 0xFF;
+			unsigned int prev_wt_uq = prev_and_next & 0xFF;
+			unsigned int next_wt_uq = (prev_and_next >> 8) & 0xFF;
 
-			int uqw_next_dif = next_wt_uq - uqw;
-			int uqw_prev_dif = prev_wt_uq - uqw;
+			float uqw_next_dif = static_cast<float>(next_wt_uq) - uqwf;
+			float uqw_prev_dif = static_cast<float>(prev_wt_uq) - uqwf;
 
 			float current_error = 0.0f;
 			float up_error = 0.0f;
 			float down_error = 0.0f;
 
 			// Interpolate the colors to create the diffs
-			int texels_to_evaluate = dt->weight_texel_count[we_idx];
+			unsigned int texels_to_evaluate = di.weight_texel_count[we_idx];
 			promise(texels_to_evaluate > 0);
-			for (int te_idx = 0; te_idx < texels_to_evaluate; te_idx++)
+			for (unsigned int te_idx = 0; te_idx < texels_to_evaluate; te_idx++)
 			{
-				int texel = dt->weight_texel[te_idx][we_idx];
-				const uint8_t *texel_weights = dt->texel_weights_texel[we_idx][te_idx];
-				const float *texel_weights_float = dt->texel_weights_float_texel[we_idx][te_idx];
+				unsigned int texel = di.weight_texel[te_idx][we_idx];
+				float weight_base = uqwf;
+
+				const uint8_t *texel_weights = di.texel_weights_texel[we_idx][te_idx];
+				const float *texel_weights_float = di.texel_weights_float_texel[we_idx][te_idx];
 				float twf0 = texel_weights_float[0];
-				float weight_base =
-				    ((static_cast<float>(uqw) * twf0
-				    + static_cast<float>(uq_pl_weights[texel_weights[1]])  * texel_weights_float[1])
-				    + (static_cast<float>(uq_pl_weights[texel_weights[2]]) * texel_weights_float[2]
-				    + static_cast<float>(uq_pl_weights[texel_weights[3]]) * texel_weights_float[3]));
 
-				int partition = pt->partition_of_texel[texel];
+				weight_base = (uqwf                             * twf0
+				             + uq_pl_weightsf[texel_weights[1]] * texel_weights_float[1])
+				            + (uq_pl_weightsf[texel_weights[2]] * texel_weights_float[2]
+				             + uq_pl_weightsf[texel_weights[3]] * texel_weights_float[3]);
+
+				unsigned int partition = pi.partition_of_texel[texel];
 
-				weight_base = weight_base + 0.5f;
-				float plane_weight = astc::flt_rd(weight_base);
-				float plane_up_weight = astc::flt_rd(weight_base + static_cast<float>(uqw_next_dif) * twf0) - plane_weight;
-				float plane_down_weight = astc::flt_rd(weight_base + static_cast<float>(uqw_prev_dif) * twf0) - plane_weight;
+				// Ideally this is integer rounded, but IQ gain it isn't worth the overhead
+				// float plane_weight = astc::flt_rd(weight_base + 0.5f);
+				// float plane_up_weight = astc::flt_rd(weight_base + 0.5f + uqw_next_dif * twf0) - plane_weight;
+				// float plane_down_weight = astc::flt_rd(weight_base + 0.5f + uqw_prev_dif * twf0) - plane_weight;
+
+				float plane_weight = weight_base;
+				float plane_up_weight = weight_base + uqw_next_dif * twf0 - plane_weight;
+				float plane_down_weight = weight_base + uqw_prev_dif * twf0 - plane_weight;
 
 				vfloat4 color_offset = offset[partition];
 				vfloat4 color_base   = endpnt0f[partition];
 
 				vfloat4 color = color_base + color_offset * plane_weight;
 
-				vfloat4 origcolor    = blk->texel(texel);
-				vfloat4 error_weight = vfloat4(ewb->texel_weight_r[texel], ewb->texel_weight_g[texel],
-				                               ewb->texel_weight_b[texel], ewb->texel_weight_a[texel]);
+				vfloat4 orig_color   = blk.texel(texel);
+				vfloat4 error_weight = blk.channel_weight;
 
-				vfloat4 colordiff       = color - origcolor;
-				vfloat4 color_up_diff   = colordiff + color_offset * plane_up_weight;
-				vfloat4 color_down_diff = colordiff + color_offset * plane_down_weight;
-				current_error += dot_s(colordiff       * colordiff,       error_weight);
+				vfloat4 color_diff      = color - orig_color;
+				vfloat4 color_up_diff   = color_diff + color_offset * plane_up_weight;
+				vfloat4 color_down_diff = color_diff + color_offset * plane_down_weight;
+				current_error += dot_s(color_diff      * color_diff,      error_weight);
 				up_error      += dot_s(color_up_diff   * color_up_diff,   error_weight);
 				down_error    += dot_s(color_down_diff * color_down_diff, error_weight);
 			}
@@ -178,124 +331,139 @@ static int realign_weights(
 			// Check if the prev or next error is better, and if so use it
 			if ((up_error < current_error) && (up_error < down_error))
 			{
-				uq_pl_weights[we_idx] = next_wt_uq;
-				weight_set8[we_idx] = (uint8_t)((prev_and_next >> 24) & 0xFF);
-				adjustments++;
+				uq_pl_weights[we_idx] = static_cast<uint8_t>(next_wt_uq);
+				uq_pl_weightsf[we_idx] = static_cast<float>(next_wt_uq);
+				dec_weights_quant_pvalue[we_idx] = (uint8_t)((prev_and_next >> 24) & 0xFF);
+				adjustments = true;
 			}
 			else if (down_error < current_error)
 			{
-				uq_pl_weights[we_idx] = prev_wt_uq;
-				weight_set8[we_idx] = (uint8_t)((prev_and_next >> 16) & 0xFF);
-				adjustments++;
+				uq_pl_weights[we_idx] = static_cast<uint8_t>(prev_wt_uq);
+				uq_pl_weightsf[we_idx] = static_cast<float>(prev_wt_uq);
+				dec_weights_quant_pvalue[we_idx] = (uint8_t)((prev_and_next >> 16) & 0xFF);
+				adjustments = true;
 			}
 		}
 
 		// Prepare iteration for plane 2
-		weight_set8 = plane2_weight_set8;
+		dec_weights_quant_pvalue = dec_weights_quant_pvalue_plane2;
 		plane_mask = ~plane_mask;
 	}
 
 	return adjustments;
 }
 
-/*
-	function for compressing a block symbolically, given that we have already decided on a partition
-*/
-static float compress_symbolic_block_fixed_partition_1_plane(
+/**
+ * @brief Compress a block using a chosen partitioning and 1 plane of weights.
+ *
+ * @param      config                    The compressor configuration.
+ * @param      bsd                       The block size information.
+ * @param      blk                       The image block color data to compress.
+ * @param      only_always               True if we only use "always" percentile block modes.
+ * @param      tune_errorval_threshold   The error value threshold.
+ * @param      partition_count           The partition count.
+ * @param      partition_index           The partition index if @c partition_count is 2-4.
+ * @param[out] scb                       The symbolic compressed block output.
+ * @param[out] tmpbuf                    The quantized weights for plane 1.
+ */
+static float compress_symbolic_block_for_partition_1plane(
 	const astcenc_config& config,
+	const block_size_descriptor& bsd,
+	const image_block& blk,
 	bool only_always,
-	int tune_candidate_limit,
 	float tune_errorval_threshold,
-	int max_refinement_iters,
-	const block_size_descriptor* bsd,
-	int partition_count,
-	int partition_index,
-	const imageblock* blk,
-	const error_weight_block* ewb,
+	unsigned int partition_count,
+	unsigned int partition_index,
 	symbolic_compressed_block& scb,
-	compress_fixed_partition_buffers* tmpbuf
+	compression_working_buffers& tmpbuf
 ) {
-	static const int free_bits_for_partition_count[5] = {
-		0, 115 - 4, 111 - 4 - PARTITION_BITS, 108 - 4 - PARTITION_BITS, 105 - 4 - PARTITION_BITS
-	};
+	promise(partition_count > 0);
+	promise(config.tune_candidate_limit > 0);
+	promise(config.tune_refinement_limit > 0);
+	promise(bsd.decimation_mode_count > 0);
 
-	const partition_info *pt = get_partition_table(bsd, partition_count);
-	pt += partition_index;
+	auto compute_difference = &compute_symbolic_block_difference_1plane;
+	if ((partition_count == 1) && !(config.flags & ASTCENC_FLG_MAP_RGBM))
+	{
+		compute_difference = &compute_symbolic_block_difference_1plane_1partition;
+	}
 
-	// first, compute ideal weights and endpoint colors, under the assumption that
-	// there is no quantization or decimation going on.
-	endpoints_and_weights *ei = &tmpbuf->ei1;
-	endpoints_and_weights *eix = tmpbuf->eix1;
-	compute_endpoints_and_ideal_weights_1_plane(bsd, pt, blk, ewb, ei);
+	const auto& pi = bsd.get_partition_info(partition_count, partition_index);
 
-	// next, compute ideal weights and endpoint colors for every decimation.
-	const decimation_table *const *dts = bsd->decimation_tables;
+	// Compute ideal weights and endpoint colors, with no quantization or decimation
+	endpoints_and_weights& ei = tmpbuf.ei1;
+	endpoints_and_weights *eix = tmpbuf.eix1;
+	compute_ideal_colors_and_weights_1plane(blk, pi, ei);
 
-	float *decimated_quantized_weights = tmpbuf->decimated_quantized_weights;
-	float *decimated_weights = tmpbuf->decimated_weights;
-	float *flt_quantized_decimated_quantized_weights = tmpbuf->flt_quantized_decimated_quantized_weights;
-	uint8_t *u8_quantized_decimated_quantized_weights = tmpbuf->u8_quantized_decimated_quantized_weights;
+	// Compute ideal weights and endpoint colors for every decimation
+	float *dec_weights_ideal_value = tmpbuf.dec_weights_ideal_value;
+	float *dec_weights_quant_uvalue = tmpbuf.dec_weights_quant_uvalue;
+	uint8_t *dec_weights_quant_pvalue = tmpbuf.dec_weights_quant_pvalue;
 
-	// for each decimation mode, compute an ideal set of weights
-	// (that is, weights computed with the assumption that they are not quantized)
-	for (int i = 0; i < bsd->decimation_mode_count; i++)
+	// For each decimation mode, compute an ideal set of weights with no quantization
+	unsigned int max_decimation_modes = only_always ? bsd.always_decimation_mode_count
+	                                                : bsd.decimation_mode_count;
+	promise(max_decimation_modes > 0);
+	for (unsigned int i = 0; i < max_decimation_modes; i++)
 	{
-		const decimation_mode& dm = bsd->decimation_modes[i];
-		if (dm.maxprec_1plane < 0 || (only_always && !dm.percentile_always) || !dm.percentile_hit)
+		const auto& dm = bsd.get_decimation_mode(i);
+		if (dm.maxprec_1plane < 0 || !dm.percentile_hit)
 		{
 			continue;
 		}
 
-		compute_ideal_weights_for_decimation_table(
-		    *ei,
+		const auto& di = bsd.get_decimation_info(i);
+
+		compute_ideal_weights_for_decimation(
+		    ei,
 		    eix[i],
-		    *(dts[i]),
-		    decimated_quantized_weights + i * MAX_WEIGHTS_PER_BLOCK,
-		    decimated_weights + i * MAX_WEIGHTS_PER_BLOCK);
+		    di,
+		    dec_weights_ideal_value + i * BLOCK_MAX_WEIGHTS);
 	}
 
-	// compute maximum colors for the endpoints and ideal weights.
-	// for each endpoint-and-ideal-weight pair, compute the smallest weight value
-	// that will result in a color value greater than 1.
+	// Compute maximum colors for the endpoints and ideal weights, then for each endpoint and ideal
+	// weight pair, compute the smallest weight that will result in a color value greater than 1
 	vfloat4 min_ep(10.0f);
-	for (int i = 0; i < partition_count; i++)
+	for (unsigned int i = 0; i < partition_count; i++)
 	{
-		#ifdef DEBUG_CAPTURE_NAN
-			fedisableexcept(FE_DIVBYZERO | FE_INVALID);
-		#endif
-
-		vfloat4 ep = (vfloat4(1.0f) - ei->ep.endpt0[i]) / (ei->ep.endpt1[i] - ei->ep.endpt0[i]);
+		vfloat4 ep = (vfloat4(1.0f) - ei.ep.endpt0[i]) / (ei.ep.endpt1[i] - ei.ep.endpt0[i]);
 
 		vmask4 use_ep = (ep > vfloat4(0.5f)) & (ep < min_ep);
 		min_ep = select(min_ep, ep, use_ep);
-
-		#ifdef DEBUG_CAPTURE_NAN
-			feenableexcept(FE_DIVBYZERO | FE_INVALID);
-		#endif
 	}
 
 	float min_wt_cutoff = hmin_s(min_ep);
 
-	// for each mode, use the angular method to compute a shift.
-	float weight_low_value[MAX_WEIGHT_MODES];
-	float weight_high_value[MAX_WEIGHT_MODES];
-
+	// For each mode, use the angular method to compute a shift
 	compute_angular_endpoints_1plane(
+	    config.tune_low_weight_count_limit,
 	    only_always, bsd,
-	    decimated_quantized_weights, decimated_weights,
-	    weight_low_value, weight_high_value);
+	    dec_weights_ideal_value,
+	    tmpbuf);
 
-	// for each mode (which specifies a decimation and a quantization):
-	// * compute number of bits needed for the quantized weights.
-	// * generate an optimized set of quantized weights.
-	// * compute quantization errors for the mode.
-	int qwt_bitcounts[MAX_WEIGHT_MODES];
-	float qwt_errors[MAX_WEIGHT_MODES];
+	float* weight_low_value = tmpbuf.weight_low_value1;
+	float* weight_high_value = tmpbuf.weight_high_value1;
+	int* qwt_bitcounts = tmpbuf.qwt_bitcounts;
+	float* qwt_errors = tmpbuf.qwt_errors;
 
-	for (int i = 0; i < bsd->block_mode_count; ++i)
+	// For each mode (which specifies a decimation and a quantization):
+	//     * Compute number of bits needed for the quantized weights
+	//     * Generate an optimized set of quantized weights
+	//     * Compute quantization errors for the mode
+
+
+	static const int8_t free_bits_for_partition_count[4] {
+		115 - 4, 111 - 4 - PARTITION_INDEX_BITS, 108 - 4 - PARTITION_INDEX_BITS, 105 - 4 - PARTITION_INDEX_BITS
+	};
+
+	unsigned int max_block_modes = only_always ? bsd.always_block_mode_count
+	                                           : bsd.block_mode_count;
+	promise(max_block_modes > 0);
+	for (unsigned int i = 0; i < max_block_modes; ++i)
 	{
-		const block_mode& bm = bsd->block_modes[i];
-		if (bm.is_dual_plane || (only_always && !bm.percentile_always) || !bm.percentile_hit)
+		const block_mode& bm = bsd.block_modes[i];
+		int bitcount = free_bits_for_partition_count[partition_count - 1] - bm.weight_bits;
+		if (bm.is_dual_plane || !bm.percentile_hit || bitcount <= 0)
 		{
 			qwt_errors[i] = 1e38f;
 			continue;
@@ -307,97 +475,83 @@ static float compress_symbolic_block_fixed_partition_1_plane(
 		}
 
 		int decimation_mode = bm.decimation_mode;
+		const auto& di = bsd.get_decimation_info(decimation_mode);
 
-		// compute weight bitcount for the mode
-		int bits_used_by_weights = get_ise_sequence_bitcount(
-		    dts[decimation_mode]->weight_count,
-		    (quant_method)bm.quant_mode);
-		int bitcount = free_bits_for_partition_count[partition_count] - bits_used_by_weights;
-		if (bitcount <= 0 || bits_used_by_weights < 24 || bits_used_by_weights > 96)
-		{
-			qwt_errors[i] = 1e38f;
-			continue;
-		}
 		qwt_bitcounts[i] = bitcount;
 
-		// then, generate the optimized set of weights for the weight mode.
-		compute_quantized_weights_for_decimation_table(
-		    dts[decimation_mode],
+		// Generate the optimized set of weights for the weight mode
+		compute_quantized_weights_for_decimation(
+		    di,
 		    weight_low_value[i], weight_high_value[i],
-		    decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * decimation_mode,
-		    flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * i,
-		    u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * i,
-		    bm.quant_mode);
-
-		// then, compute weight-errors for the weight mode.
-		qwt_errors[i] = compute_error_of_weight_set(
-		                    &(eix[decimation_mode]),
-		                    dts[decimation_mode],
-		                    flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * i);
+		    dec_weights_ideal_value + BLOCK_MAX_WEIGHTS * decimation_mode,
+		    dec_weights_quant_uvalue + BLOCK_MAX_WEIGHTS * i,
+		    dec_weights_quant_pvalue + BLOCK_MAX_WEIGHTS * i,
+		    bm.get_weight_quant_mode());
+
+		// Compute weight quantization errors for the block mode
+		qwt_errors[i] = compute_error_of_weight_set_1plane(
+		    eix[decimation_mode],
+		    di,
+		    dec_weights_quant_uvalue + BLOCK_MAX_WEIGHTS * i);
 	}
 
-	// for each weighting mode, determine the optimal combination of color endpoint encodings
-	// and weight encodings; return results for the 4 best-looking modes.
+	// Decide the optimal combination of color endpoint encodings and weight encodings
+	int partition_format_specifiers[TUNE_MAX_TRIAL_CANDIDATES][BLOCK_MAX_PARTITIONS];
+	int block_mode_index[TUNE_MAX_TRIAL_CANDIDATES];
 
-	int partition_format_specifiers[TUNE_MAX_TRIAL_CANDIDATES][4];
-	int quantized_weight[TUNE_MAX_TRIAL_CANDIDATES];
-	int color_quant_level[TUNE_MAX_TRIAL_CANDIDATES];
-	int color_quant_level_mod[TUNE_MAX_TRIAL_CANDIDATES];
+	quant_method color_quant_level[TUNE_MAX_TRIAL_CANDIDATES];
+	quant_method color_quant_level_mod[TUNE_MAX_TRIAL_CANDIDATES];
 
-	determine_optimal_set_of_endpoint_formats_to_use(
-	    bsd, pt, blk, ewb, &(ei->ep), -1, qwt_bitcounts, qwt_errors,
-	    tune_candidate_limit, partition_format_specifiers, quantized_weight,
-	    color_quant_level, color_quant_level_mod);
+	unsigned int candidate_count = compute_ideal_endpoint_formats(
+	    bsd, pi, blk, ei.ep, qwt_bitcounts, qwt_errors,
+	    config.tune_candidate_limit, max_block_modes,
+	    partition_format_specifiers, block_mode_index,
+	    color_quant_level, color_quant_level_mod, tmpbuf);
 
-	// then iterate over the tune_candidate_limit believed-to-be-best modes to
-	// find out which one is actually best.
-	float best_errorval_in_mode = 1e30f;
+	// Iterate over the N believed-to-be-best modes to find out which one is actually best
+	float best_errorval_in_mode = ERROR_CALC_DEFAULT;
 	float best_errorval_in_scb = scb.errorval;
 
-	for (int i = 0; i < tune_candidate_limit; i++)
+	for (unsigned int i = 0; i < candidate_count; i++)
 	{
 		TRACE_NODE(node0, "candidate");
 
-		uint8_t *u8_weight_src;
-		int weights_to_copy;
-
-		const int qw_packed_index = quantized_weight[i];
-		if (qw_packed_index < 0)
-		{
-			trace_add_data("failed", "error_block");
-			continue;
-		}
-
-		assert(qw_packed_index >= 0 && qw_packed_index < bsd->block_mode_count);
-		const block_mode& qw_bm = bsd->block_modes[qw_packed_index];
+		const int bm_packed_index = block_mode_index[i];
+		assert(bm_packed_index >= 0 && bm_packed_index < (int)bsd.block_mode_count);
+		const block_mode& qw_bm = bsd.block_modes[bm_packed_index];
 
 		int decimation_mode = qw_bm.decimation_mode;
 		int weight_quant_mode = qw_bm.quant_mode;
-		const decimation_table *dt = dts[decimation_mode];
-		u8_weight_src = u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * qw_packed_index;
-		weights_to_copy = dt->weight_count;
+		const auto& di = bsd.get_decimation_info(decimation_mode);
+		promise(di.weight_count > 0);
 
-		trace_add_data("weight_x", dt->weight_x);
-		trace_add_data("weight_y", dt->weight_y);
-		trace_add_data("weight_z", dt->weight_z);
+		trace_add_data("weight_x", di.weight_x);
+		trace_add_data("weight_y", di.weight_y);
+		trace_add_data("weight_z", di.weight_z);
 		trace_add_data("weight_quant", weight_quant_mode);
 
-		// recompute the ideal color endpoints before storing them.
-		vfloat4 rgbs_colors[4];
-		vfloat4 rgbo_colors[4];
+		// Recompute the ideal color endpoints before storing them
+		vfloat4 rgbs_colors[BLOCK_MAX_PARTITIONS];
+		vfloat4 rgbo_colors[BLOCK_MAX_PARTITIONS];
 
-		// TODO: Can we ping-pong between two buffers and make this zero copy?
 		symbolic_compressed_block workscb;
-		for (int l = 0; l < max_refinement_iters; l++)
+
+		uint8_t* u8_weight_src = dec_weights_quant_pvalue + BLOCK_MAX_WEIGHTS * bm_packed_index;
+
+		for (unsigned int j = 0; j < di.weight_count; j++)
 		{
-			recompute_ideal_colors_1plane(
-			    weight_quant_mode, &(eix[decimation_mode].ep),
-			    rgbs_colors, rgbo_colors, u8_weight_src, pt, dt, blk, ewb);
+			workscb.weights[j] = u8_weight_src[j];
+		}
 
-			// quantize the chosen color
+		for (unsigned int l = 0; l < config.tune_refinement_limit; l++)
+		{
+			recompute_ideal_colors_1plane(
+			    blk, pi, di,
+			    weight_quant_mode, workscb.weights,
+			    eix[decimation_mode].ep, rgbs_colors, rgbo_colors);
 
-			// store the colors for the block
-			for (int j = 0; j < partition_count; j++)
+			// Quantize the chosen color
+			for (unsigned int j = 0; j < partition_count; j++)
 			{
 				workscb.color_formats[j] = pack_color_endpoints(
 				    eix[decimation_mode].ep.endpt0[j],
@@ -406,13 +560,12 @@ static float compress_symbolic_block_fixed_partition_1_plane(
 				    rgbo_colors[j],
 				    partition_format_specifiers[i][j],
 				    workscb.color_values[j],
-				    color_quant_level[i]);
+				    (quant_method)color_quant_level[i]);
 			}
 
-			// if all the color endpoint modes are the same, we get a few more
-			// bits to store colors; let's see if we can take advantage of this:
-			// requantize all the colors and see if the endpoint modes remain the same;
-			// if they do, then exploit it.
+			// If all the color endpoint modes are the same, we get a few more bits to store colors;
+			// let's see if we can take advantage of this: requantize all the colors and see if the
+			// endpoint modes remain the same.
 			workscb.color_formats_matched = 0;
 
 			if ((partition_count >= 2 && workscb.color_formats[0] == workscb.color_formats[1]
@@ -420,9 +573,9 @@ static float compress_symbolic_block_fixed_partition_1_plane(
 			    && (partition_count == 2 || (workscb.color_formats[0] == workscb.color_formats[2]
 			    && (partition_count == 3 || (workscb.color_formats[0] == workscb.color_formats[3])))))
 			{
-				int colorvals[4][12];
-				int color_formats_mod[4] { 0 };
-				for (int j = 0; j < partition_count; j++)
+				uint8_t colorvals[BLOCK_MAX_PARTITIONS][12];
+				uint8_t color_formats_mod[BLOCK_MAX_PARTITIONS] { 0 };
+				for (unsigned int j = 0; j < partition_count; j++)
 				{
 					color_formats_mod[j] = pack_color_endpoints(
 					    eix[decimation_mode].ep.endpt0[j],
@@ -431,7 +584,7 @@ static float compress_symbolic_block_fixed_partition_1_plane(
 					    rgbo_colors[j],
 					    partition_format_specifiers[i][j],
 					    colorvals[j],
-					    color_quant_level_mod[i]);
+					    (quant_method)color_quant_level_mod[i]);
 				}
 
 				if (color_formats_mod[0] == color_formats_mod[1]
@@ -439,58 +592,49 @@ static float compress_symbolic_block_fixed_partition_1_plane(
 				    && (partition_count == 3 || (color_formats_mod[0] == color_formats_mod[3])))))
 				{
 					workscb.color_formats_matched = 1;
-					for (int j = 0; j < 4; j++)
+					for (unsigned int j = 0; j < BLOCK_MAX_PARTITIONS; j++)
 					{
-						for (int k = 0; k < 12; k++)
+						for (unsigned int k = 0; k < 8; k++)
 						{
 							workscb.color_values[j][k] = colorvals[j][k];
 						}
-					}
 
-					for (int j = 0; j < 4; j++)
-					{
 						workscb.color_formats[j] = color_formats_mod[j];
 					}
 				}
 			}
 
-			// store header fields
-			workscb.partition_count = partition_count;
-			workscb.partition_index = partition_index;
-			workscb.color_quant_level = workscb.color_formats_matched ? color_quant_level_mod[i] : color_quant_level[i];
+			// Store header fields
+			workscb.partition_count = static_cast<uint8_t>(partition_count);
+			workscb.partition_index = static_cast<uint16_t>(partition_index);
+			workscb.plane2_component = -1;
+			workscb.quant_mode = workscb.color_formats_matched ? color_quant_level_mod[i] : color_quant_level[i];
 			workscb.block_mode = qw_bm.mode_index;
-			workscb.error_block = 0;
+			workscb.block_type = SYM_BTYPE_NONCONST;
 
-			if (workscb.color_quant_level < 4)
+			if (workscb.quant_mode < QUANT_6)
 			{
-				workscb.error_block = 1; // should never happen, but cannot prove it impossible.
+				workscb.block_type = SYM_BTYPE_ERROR;
 			}
 
 			// Pre-realign test
 			if (l == 0)
 			{
-				for (int j = 0; j < weights_to_copy; j++)
-				{
-					workscb.weights[j] = u8_weight_src[j];
-				}
-
-				float errorval = compute_symbolic_block_difference(config, bsd, &workscb, blk, ewb);
-				if (errorval == -1e30f)
+				float errorval = compute_difference(config, bsd, workscb, blk);
+				if (errorval == -ERROR_CALC_DEFAULT)
 				{
 					errorval = -errorval;
-					workscb.error_block = 1;
+					workscb.block_type = SYM_BTYPE_ERROR;
 				}
 
-
 				trace_add_data("error_prerealign", errorval);
 				best_errorval_in_mode = astc::min(errorval, best_errorval_in_mode);
 
-				// Average refinement improvement is 3.5% per iteration
-				// (allow 5%), but the first iteration can help more so we give
-				// it a extra 10% leeway. Use this knowledge to drive a
-				// heuristic to skip blocks that are unlikely to catch up with
-				// the best block we have already.
-				int iters_remaining = max_refinement_iters - l;
+				// Average refinement improvement is 3.5% per iteration (allow 5%), but the first
+				// iteration can help more so we give it a extra 10% leeway. Use this knowledge to
+				// drive a heuristic to skip blocks that are unlikely to catch up with the best
+				// block we have already.
+				unsigned int iters_remaining = config.tune_refinement_limit - l;
 				float threshold = (0.05f * static_cast<float>(iters_remaining)) + 1.1f;
 				if (errorval > (threshold * best_errorval_in_scb))
 				{
@@ -505,36 +649,42 @@ static float compress_symbolic_block_fixed_partition_1_plane(
 
 					if (errorval < tune_errorval_threshold)
 					{
-						return errorval;
+						// Skip remaining candidates - this is "good enough"
+						i = candidate_count;
+						break;
 					}
 				}
 			}
 
-			// perform a final pass over the weights to try to improve them.
-			int adjustments = realign_weights(
-			    config.profile, bsd, blk, ewb, &workscb,
-			    u8_weight_src, nullptr);
-
-			// Post-realign test
-			for (int j = 0; j < weights_to_copy; j++)
+			bool adjustments;
+			if (di.weight_count != bsd.texel_count)
 			{
-				workscb.weights[j] = u8_weight_src[j];
+				adjustments = realign_weights_decimated(
+					config.profile, bsd, blk, workscb,
+					workscb.weights, nullptr);
+			}
+			else
+			{
+				adjustments = realign_weights_undecimated(
+					config.profile, bsd, blk, workscb,
+					workscb.weights, nullptr);
 			}
 
-			float errorval = compute_symbolic_block_difference(config, bsd, &workscb, blk, ewb);
-			if (errorval == -1e30f)
+			// Post-realign test
+			float errorval = compute_difference(config, bsd, workscb, blk);
+			if (errorval == -ERROR_CALC_DEFAULT)
 			{
 				errorval = -errorval;
-				workscb.error_block = 1;
+				workscb.block_type = SYM_BTYPE_ERROR;
 			}
 
 			trace_add_data("error_postrealign", errorval);
 			best_errorval_in_mode = astc::min(errorval, best_errorval_in_mode);
 
-			// Average refinement improvement is 3.5% per iteration, so skip
-			// blocks that are unlikely to catch up with the best block we
-			// have already. Assume a 5% per step to give benefit of the doubt
-			int iters_remaining = max_refinement_iters - 1 - l;
+			// Average refinement improvement is 3.5% per iteration, so skip blocks that are
+			// unlikely to catch up with the best block we have already. Assume a 5% per step to
+			// give benefit of the doubt ...
+			unsigned int iters_remaining = config.tune_refinement_limit - 1 - l;
 			float threshold = (0.05f * static_cast<float>(iters_remaining)) + 1.0f;
 			if (errorval > (threshold * best_errorval_in_scb))
 			{
@@ -549,11 +699,13 @@ static float compress_symbolic_block_fixed_partition_1_plane(
 
 				if (errorval < tune_errorval_threshold)
 				{
-					return errorval;
+					// Skip remaining candidates - this is "good enough"
+					i = candidate_count;
+					break;
 				}
 			}
 
-			if (adjustments == 0)
+			if (!adjustments)
 			{
 				break;
 			}
@@ -563,131 +715,119 @@ static float compress_symbolic_block_fixed_partition_1_plane(
 	return best_errorval_in_mode;
 }
 
-static float compress_symbolic_block_fixed_partition_2_planes(
+/**
+ * @brief Compress a block using a chosen partitioning and 2 planes of weights.
+ *
+ * @param      config                    The compressor configuration.
+ * @param      bsd                       The block size information.
+ * @param      blk                       The image block color data to compress.
+ * @param      tune_errorval_threshold   The error value threshold.
+ * @param      plane2_component          The component index for the second plane of weights.
+ * @param[out] scb                       The symbolic compressed block output.
+ * @param[out] tmpbuf                    The quantized weights for plane 1.
+ */
+static float compress_symbolic_block_for_partition_2planes(
 	const astcenc_config& config,
-	bool only_always,
-	int tune_candidate_limit,
+	const block_size_descriptor& bsd,
+	const image_block& blk,
 	float tune_errorval_threshold,
-	int max_refinement_iters,
-	const block_size_descriptor* bsd,
-	int partition_count,
-	int partition_index,
-	int separate_component,
-	const imageblock* blk,
-	const error_weight_block* ewb,
+	unsigned int plane2_component,
 	symbolic_compressed_block& scb,
-	compress_fixed_partition_buffers* tmpbuf
+	compression_working_buffers& tmpbuf
 ) {
-	static const int free_bits_for_partition_count[5] = {
-		0, 113 - 4, 109 - 4 - PARTITION_BITS, 106 - 4 - PARTITION_BITS, 103 - 4 - PARTITION_BITS
-	};
-
-	const partition_info *pt = get_partition_table(bsd, partition_count);
-	pt += partition_index;
-
-	// first, compute ideal weights and endpoint colors
-	endpoints_and_weights *ei1 = &tmpbuf->ei1;
-	endpoints_and_weights *ei2 = &tmpbuf->ei2;
-	endpoints_and_weights *eix1 = tmpbuf->eix1;
-	endpoints_and_weights *eix2 = tmpbuf->eix2;
-	compute_endpoints_and_ideal_weights_2_planes(bsd, pt, blk, ewb, separate_component, ei1, ei2);
-
-	// next, compute ideal weights and endpoint colors for every decimation.
-	const decimation_table *const *dts = bsd->decimation_tables;
-
-	float *decimated_quantized_weights = tmpbuf->decimated_quantized_weights;
-	float *decimated_weights = tmpbuf->decimated_weights;
-	float *flt_quantized_decimated_quantized_weights = tmpbuf->flt_quantized_decimated_quantized_weights;
-	uint8_t *u8_quantized_decimated_quantized_weights = tmpbuf->u8_quantized_decimated_quantized_weights;
-
-	// for each decimation mode, compute an ideal set of weights
-	for (int i = 0; i < bsd->decimation_mode_count; i++)
+	promise(config.tune_candidate_limit > 0);
+	promise(config.tune_refinement_limit > 0);
+	promise(bsd.decimation_mode_count > 0);
+
+	// Compute ideal weights and endpoint colors, with no quantization or decimation
+	endpoints_and_weights& ei1 = tmpbuf.ei1;
+	endpoints_and_weights& ei2 = tmpbuf.ei2;
+	endpoints_and_weights* eix1 = tmpbuf.eix1;
+	endpoints_and_weights* eix2 = tmpbuf.eix2;
+	compute_ideal_colors_and_weights_2planes(bsd, blk, plane2_component, ei1, ei2);
+
+	// Compute ideal weights and endpoint colors for every decimation
+	float *dec_weights_ideal_value = tmpbuf.dec_weights_ideal_value;
+	float *dec_weights_quant_uvalue = tmpbuf.dec_weights_quant_uvalue;
+	uint8_t *dec_weights_quant_pvalue = tmpbuf.dec_weights_quant_pvalue;
+
+	// For each decimation mode, compute an ideal set of weights with no quantization
+	for (unsigned int i = 0; i < bsd.decimation_mode_count; i++)
 	{
-		const decimation_mode& dm = bsd->decimation_modes[i];
-		if (dm.maxprec_2planes < 0 || (only_always && !dm.percentile_always) || !dm.percentile_hit)
+		const auto& dm = bsd.get_decimation_mode(i);
+		if (dm.maxprec_2planes < 0 || !dm.percentile_hit)
 		{
 			continue;
 		}
 
-		compute_ideal_weights_for_decimation_table(
-		    *ei1,
+		const auto& di = bsd.get_decimation_info(i);
+
+		compute_ideal_weights_for_decimation(
+		    ei1,
 		    eix1[i],
-		    *(dts[i]),
-		    decimated_quantized_weights + (2 * i) * MAX_WEIGHTS_PER_BLOCK,
-		    decimated_weights + (2 * i) * MAX_WEIGHTS_PER_BLOCK);
+		    di,
+		    dec_weights_ideal_value + i * BLOCK_MAX_WEIGHTS);
 
-		compute_ideal_weights_for_decimation_table(
-		    *ei2,
+		compute_ideal_weights_for_decimation(
+		    ei2,
 		    eix2[i],
-		    *(dts[i]),
-		    decimated_quantized_weights + (2 * i + 1) * MAX_WEIGHTS_PER_BLOCK,
-		    decimated_weights + (2 * i + 1) * MAX_WEIGHTS_PER_BLOCK);
+		    di,
+		    dec_weights_ideal_value + i * BLOCK_MAX_WEIGHTS + WEIGHTS_PLANE2_OFFSET);
 	}
 
-	// compute maximum colors for the endpoints and ideal weights.
-	// for each endpoint-and-ideal-weight pair, compute the smallest weight value
-	// that will result in a color value greater than 1.
-
+	// Compute maximum colors for the endpoints and ideal weights, then for each endpoint and ideal
+	// weight pair, compute the smallest weight that will result in a color value greater than 1
 	vfloat4 min_ep1(10.0f);
 	vfloat4 min_ep2(10.0f);
-	for (int i = 0; i < partition_count; i++)
-	{
-		#ifdef DEBUG_CAPTURE_NAN
-			fedisableexcept(FE_DIVBYZERO | FE_INVALID);
-		#endif
-
-		vfloat4 ep1 = (vfloat4(1.0f) - ei1->ep.endpt0[i]) / (ei1->ep.endpt1[i] - ei1->ep.endpt0[i]);
-		vmask4 use_ep1 = (ep1 > vfloat4(0.5f)) & (ep1 < min_ep1);
-		min_ep1 = select(min_ep1, ep1, use_ep1);
 
-		vfloat4 ep2 = (vfloat4(1.0f) - ei2->ep.endpt0[i]) / (ei2->ep.endpt1[i] - ei2->ep.endpt0[i]);
-		vmask4 use_ep2 = (ep2 > vfloat4(0.5f)) & (ep2 < min_ep2);
-		min_ep2 = select(min_ep2, ep2, use_ep2);
+	vfloat4 ep1 = (vfloat4(1.0f) - ei1.ep.endpt0[0]) / (ei1.ep.endpt1[0] - ei1.ep.endpt0[0]);
+	vmask4 use_ep1 = (ep1 > vfloat4(0.5f)) & (ep1 < min_ep1);
+	min_ep1 = select(min_ep1, ep1, use_ep1);
 
-		#ifdef DEBUG_CAPTURE_NAN
-			feenableexcept(FE_DIVBYZERO | FE_INVALID);
-		#endif
-	}
+	vfloat4 ep2 = (vfloat4(1.0f) - ei2.ep.endpt0[0]) / (ei2.ep.endpt1[0] - ei2.ep.endpt0[0]);
+	vmask4 use_ep2 = (ep2 > vfloat4(0.5f)) & (ep2 < min_ep2);
+	min_ep2 = select(min_ep2, ep2, use_ep2);
 
-	vfloat4 err_max(1e30f);
-	vmask4 err_mask = vint4::lane_id() == vint4(separate_component);
+	vfloat4 err_max(ERROR_CALC_DEFAULT);
+	vmask4 err_mask = vint4::lane_id() == vint4(plane2_component);
 
-	// Set the separate component to max error in ep1
+	// Set the plane2 component to max error in ep1
 	min_ep1 = select(min_ep1, err_max, err_mask);
 
 	float min_wt_cutoff1 = hmin_s(min_ep1);
 
-	// Set the minwt2 to the separate component min in ep2
+	// Set the minwt2 to the plane2 component min in ep2
 	float min_wt_cutoff2 = hmin_s(select(err_max, min_ep2, err_mask));
 
-	float weight_low_value1[MAX_WEIGHT_MODES];
-	float weight_high_value1[MAX_WEIGHT_MODES];
-	float weight_low_value2[MAX_WEIGHT_MODES];
-	float weight_high_value2[MAX_WEIGHT_MODES];
-
 	compute_angular_endpoints_2planes(
-	    only_always, bsd,
-	    decimated_quantized_weights, decimated_weights,
-	    weight_low_value1, weight_high_value1,
-	    weight_low_value2, weight_high_value2);
-
-	// for each mode (which specifies a decimation and a quantization):
-	// * generate an optimized set of quantized weights.
-	// * compute quantization errors for each mode
-	// * compute number of bits needed for the quantized weights.
-
-	int qwt_bitcounts[MAX_WEIGHT_MODES];
-	float qwt_errors[MAX_WEIGHT_MODES];
-	for (int i = 0; i < bsd->block_mode_count; ++i)
+	    config.tune_low_weight_count_limit,
+	    bsd, dec_weights_ideal_value,
+	    tmpbuf);
+
+	// For each mode (which specifies a decimation and a quantization):
+	//     * Compute number of bits needed for the quantized weights
+	//     * Generate an optimized set of quantized weights
+	//     * Compute quantization errors for the mode
+
+	float* weight_low_value1 = tmpbuf.weight_low_value1;
+	float* weight_high_value1 = tmpbuf.weight_high_value1;
+	float* weight_low_value2 = tmpbuf.weight_low_value2;
+	float* weight_high_value2 = tmpbuf.weight_high_value2;
+
+	int* qwt_bitcounts = tmpbuf.qwt_bitcounts;
+	float* qwt_errors = tmpbuf.qwt_errors;
+
+	for (unsigned int i = 0; i < bsd.block_mode_count; ++i)
 	{
-		const block_mode& bm = bsd->block_modes[i];
-		if ((!bm.is_dual_plane) || (only_always && !bm.percentile_always) || !bm.percentile_hit)
+		const block_mode& bm = bsd.block_modes[i];
+		int bitcount = 109 - bm.weight_bits;
+		if (!bm.is_dual_plane || !bm.percentile_hit || bitcount <= 0)
 		{
 			qwt_errors[i] = 1e38f;
 			continue;
 		}
 
-		int decimation_mode = bm.decimation_mode;
+		qwt_bitcounts[i] = bitcount;
 
 		if (weight_high_value1[i] > 1.02f * min_wt_cutoff1)
 		{
@@ -699,204 +839,141 @@ static float compress_symbolic_block_fixed_partition_2_planes(
 			weight_high_value2[i] = 1.0f;
 		}
 
-		// compute weight bitcount for the mode
-		int bits_used_by_weights = get_ise_sequence_bitcount(
-			2 * dts[decimation_mode]->weight_count,
-			(quant_method)bm.quant_mode);
-		int bitcount = free_bits_for_partition_count[partition_count] - bits_used_by_weights;
-		if (bitcount <= 0 || bits_used_by_weights < 24 || bits_used_by_weights > 96)
-		{
-			qwt_errors[i] = 1e38f;
-			continue;
-		}
-		qwt_bitcounts[i] = bitcount;
+		unsigned int decimation_mode = bm.decimation_mode;
+		const auto& di = bsd.get_decimation_info(decimation_mode);
 
-		// then, generate the optimized set of weights for the mode.
-		compute_quantized_weights_for_decimation_table(
-		    dts[decimation_mode],
+		// Generate the optimized set of weights for the mode
+		compute_quantized_weights_for_decimation(
+		    di,
 		    weight_low_value1[i],
 		    weight_high_value1[i],
-		    decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * decimation_mode),
-		    flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i),
-		    u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i), bm.quant_mode);
+		    dec_weights_ideal_value + BLOCK_MAX_WEIGHTS * decimation_mode,
+		    dec_weights_quant_uvalue + BLOCK_MAX_WEIGHTS * i,
+		    dec_weights_quant_pvalue + BLOCK_MAX_WEIGHTS * i,
+		    bm.get_weight_quant_mode());
 
-		compute_quantized_weights_for_decimation_table(
-		    dts[decimation_mode],
+		compute_quantized_weights_for_decimation(
+		    di,
 		    weight_low_value2[i],
 		    weight_high_value2[i],
-		    decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * decimation_mode + 1),
-		    flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i + 1),
-		    u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i + 1), bm.quant_mode);
-
-
-		// then, compute quantization errors for the block mode.
-		qwt_errors[i] =	compute_error_of_weight_set(
-		                    &(eix1[decimation_mode]),
-		                    dts[decimation_mode],
-		                    flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i))
-
-		              + compute_error_of_weight_set(
-		                    &(eix2[decimation_mode]),
-		                    dts[decimation_mode],
-		                    flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i + 1));
+		    dec_weights_ideal_value + BLOCK_MAX_WEIGHTS * decimation_mode + WEIGHTS_PLANE2_OFFSET,
+		    dec_weights_quant_uvalue + BLOCK_MAX_WEIGHTS * i + WEIGHTS_PLANE2_OFFSET,
+		    dec_weights_quant_pvalue + BLOCK_MAX_WEIGHTS * i + WEIGHTS_PLANE2_OFFSET,
+		    bm.get_weight_quant_mode());
+
+		// Compute weight quantization errors for the block mode
+		qwt_errors[i] = compute_error_of_weight_set_2planes(
+		    eix1[decimation_mode],
+		    eix2[decimation_mode],
+		    di,
+		    dec_weights_quant_uvalue + BLOCK_MAX_WEIGHTS * i,
+		    dec_weights_quant_uvalue + BLOCK_MAX_WEIGHTS * i + WEIGHTS_PLANE2_OFFSET);
 	}
 
-	// decide the optimal combination of color endpoint encodings and weight encodings.
-	int partition_format_specifiers[TUNE_MAX_TRIAL_CANDIDATES][4];
-	int quantized_weight[TUNE_MAX_TRIAL_CANDIDATES];
-	int color_quant_level[TUNE_MAX_TRIAL_CANDIDATES];
-	int color_quant_level_mod[TUNE_MAX_TRIAL_CANDIDATES];
+	// Decide the optimal combination of color endpoint encodings and weight encodings
+	int partition_format_specifiers[TUNE_MAX_TRIAL_CANDIDATES][BLOCK_MAX_PARTITIONS];
+	int block_mode_index[TUNE_MAX_TRIAL_CANDIDATES];
+
+	quant_method color_quant_level[TUNE_MAX_TRIAL_CANDIDATES];
+	quant_method color_quant_level_mod[TUNE_MAX_TRIAL_CANDIDATES];
 
 	endpoints epm;
-	merge_endpoints(&(ei1->ep), &(ei2->ep), separate_component, &epm);
+	merge_endpoints(ei1.ep, ei2.ep, plane2_component, epm);
 
-	determine_optimal_set_of_endpoint_formats_to_use(
-	    bsd, pt, blk, ewb, &epm, separate_component, qwt_bitcounts, qwt_errors,
-	    tune_candidate_limit, partition_format_specifiers, quantized_weight,
-	    color_quant_level, color_quant_level_mod);
+	const auto& pi = bsd.get_partition_info(1, 0);
+	unsigned int candidate_count = compute_ideal_endpoint_formats(
+	    bsd, pi, blk, epm, qwt_bitcounts, qwt_errors,
+	    config.tune_candidate_limit, bsd.block_mode_count,
+	    partition_format_specifiers, block_mode_index,
+	    color_quant_level, color_quant_level_mod, tmpbuf);
 
-	// then iterate over the tune_candidate_limit believed-to-be-best modes to
-	// find out which one is actually best.
-	float best_errorval_in_mode = 1e30f;
+	// Iterate over the N believed-to-be-best modes to find out which one is actually best
+	float best_errorval_in_mode = ERROR_CALC_DEFAULT;
 	float best_errorval_in_scb = scb.errorval;
 
-	for (int i = 0; i < tune_candidate_limit; i++)
+	for (unsigned int i = 0; i < candidate_count; i++)
 	{
 		TRACE_NODE(node0, "candidate");
 
-		const int qw_packed_index = quantized_weight[i];
-		if (qw_packed_index < 0)
-		{
-			trace_add_data("failed", "error_block");
-			continue;
-		}
-
-		uint8_t *u8_weight1_src;
-		uint8_t *u8_weight2_src;
-		int weights_to_copy;
-
-		assert(qw_packed_index >= 0 && qw_packed_index < bsd->block_mode_count);
-		const block_mode& qw_bm = bsd->block_modes[qw_packed_index];
+		const int bm_packed_index = block_mode_index[i];
+		assert(bm_packed_index >= 0 && bm_packed_index < (int)bsd.block_mode_count);
+		const block_mode& qw_bm = bsd.block_modes[bm_packed_index];
 
 		int decimation_mode = qw_bm.decimation_mode;
 		int weight_quant_mode = qw_bm.quant_mode;
-		const decimation_table *dt = dts[decimation_mode];
+		const auto& di = bsd.get_decimation_info(decimation_mode);
+		promise(di.weight_count > 0);
 
-		u8_weight1_src = u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * qw_packed_index);
-		u8_weight2_src = u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * qw_packed_index + 1);
-		weights_to_copy = dt->weight_count;
-
-		trace_add_data("weight_x", dt->weight_x);
-		trace_add_data("weight_y", dt->weight_y);
-		trace_add_data("weight_z", dt->weight_z);
+		trace_add_data("weight_x", di.weight_x);
+		trace_add_data("weight_y", di.weight_y);
+		trace_add_data("weight_z", di.weight_z);
 		trace_add_data("weight_quant", weight_quant_mode);
 
-		// recompute the ideal color endpoints before storing them.
-		merge_endpoints(&(eix1[decimation_mode].ep), &(eix2[decimation_mode].ep), separate_component, &epm);
+		// Recompute the ideal color endpoints before storing them.
+		merge_endpoints(eix1[decimation_mode].ep, eix2[decimation_mode].ep, plane2_component, epm);
 
-		vfloat4 rgbs_colors[4];
-		vfloat4 rgbo_colors[4];
+		vfloat4 rgbs_color;
+		vfloat4 rgbo_color;
 
-		// TODO: Ping-pong between two buffers and make this zero copy
 		symbolic_compressed_block workscb;
-		for (int l = 0; l < max_refinement_iters; l++)
-		{
-			recompute_ideal_colors_2planes(
-			    weight_quant_mode, &epm, rgbs_colors, rgbo_colors,
-			    u8_weight1_src, u8_weight2_src, separate_component, pt, dt, blk, ewb);
-
-			// store the colors for the block
-			for (int j = 0; j < partition_count; j++)
-			{
-				workscb.color_formats[j] = pack_color_endpoints(
-				                            epm.endpt0[j],
-				                            epm.endpt1[j],
-				                            rgbs_colors[j], rgbo_colors[j],
-				                            partition_format_specifiers[i][j],
-				                            workscb.color_values[j],
-				                            color_quant_level[i]);
-			}
-
-			workscb.color_formats_matched = 0;
-
-			if ((partition_count >= 2 && workscb.color_formats[0] == workscb.color_formats[1]
-			    && color_quant_level[i] != color_quant_level_mod[i])
-			    && (partition_count == 2 || (workscb.color_formats[0] == workscb.color_formats[2]
-			    && (partition_count == 3 || (workscb.color_formats[0] == workscb.color_formats[3])))))
-			{
-				int colorvals[4][12];
-				int color_formats_mod[4] { 0 };
-				for (int j = 0; j < partition_count; j++)
-				{
-					color_formats_mod[j] = pack_color_endpoints(
-					    epm.endpt0[j],
-					    epm.endpt1[j],
-					    rgbs_colors[j],
-					    rgbo_colors[j],
-					    partition_format_specifiers[i][j],
-					    colorvals[j],
-					    color_quant_level_mod[i]);
-				}
 
-				if (color_formats_mod[0] == color_formats_mod[1]
-				    && (partition_count == 2 || (color_formats_mod[0] == color_formats_mod[2]
-				    && (partition_count == 3 || (color_formats_mod[0] == color_formats_mod[3])))))
-				{
-					workscb.color_formats_matched = 1;
-					for (int j = 0; j < 4; j++)
-					{
-						for (int k = 0; k < 12; k++)
-						{
-							workscb.color_values[j][k] = colorvals[j][k];
-						}
-					}
+		uint8_t* u8_weight1_src = dec_weights_quant_pvalue + BLOCK_MAX_WEIGHTS * bm_packed_index;
+		uint8_t* u8_weight2_src = dec_weights_quant_pvalue + BLOCK_MAX_WEIGHTS * bm_packed_index + WEIGHTS_PLANE2_OFFSET;
 
-					for (int j = 0; j < 4; j++)
-					{
-						workscb.color_formats[j] = color_formats_mod[j];
-					}
-				}
-			}
+		for (int j = 0; j < di.weight_count; j++)
+		{
+			workscb.weights[j] = u8_weight1_src[j];
+			workscb.weights[j + WEIGHTS_PLANE2_OFFSET] = u8_weight2_src[j];
+		}
 
-			// store header fields
-			workscb.partition_count = partition_count;
-			workscb.partition_index = partition_index;
-			workscb.color_quant_level = workscb.color_formats_matched ? color_quant_level_mod[i] : color_quant_level[i];
+		for (unsigned int l = 0; l < config.tune_refinement_limit; l++)
+		{
+			recompute_ideal_colors_2planes(
+			    blk, bsd, di, weight_quant_mode,
+			    workscb.weights, workscb.weights + WEIGHTS_PLANE2_OFFSET,
+			    epm, rgbs_color, rgbo_color, plane2_component);
+
+			// Quantize the chosen color
+			workscb.color_formats[0] = pack_color_endpoints(
+			                               epm.endpt0[0],
+			                               epm.endpt1[0],
+			                               rgbs_color, rgbo_color,
+			                               partition_format_specifiers[i][0],
+			                               workscb.color_values[0],
+			                               (quant_method)color_quant_level[i]);
+
+			// Store header fields
+			workscb.partition_count = 1;
+			workscb.partition_index = 0;
+			workscb.quant_mode = color_quant_level[i];
+			workscb.color_formats_matched = 0;
 			workscb.block_mode = qw_bm.mode_index;
-			workscb.plane2_color_component = separate_component;
-			workscb.error_block = 0;
+			workscb.plane2_component = static_cast<int8_t>(plane2_component);
+			workscb.block_type = SYM_BTYPE_NONCONST;
 
-			if (workscb.color_quant_level < 4)
+			if (workscb.quant_mode < 4)
 			{
-				workscb.error_block = 1;	// should never happen, but cannot prove it impossible
+				workscb.block_type = SYM_BTYPE_ERROR;
 			}
 
 			// Pre-realign test
 			if (l == 0)
 			{
-				for (int j = 0; j < weights_to_copy; j++)
-				{
-					workscb.weights[j] = u8_weight1_src[j];
-					workscb.weights[j + PLANE2_WEIGHTS_OFFSET] = u8_weight2_src[j];
-				}
-
-				float errorval = compute_symbolic_block_difference(config, bsd, &workscb, blk, ewb);
-				if (errorval == -1e30f)
+				float errorval = compute_symbolic_block_difference_2plane(config, bsd, workscb, blk);
+				if (errorval == -ERROR_CALC_DEFAULT)
 				{
 					errorval = -errorval;
-					workscb.error_block = 1;
+					workscb.block_type = SYM_BTYPE_ERROR;
 				}
 
-
 				trace_add_data("error_prerealign", errorval);
 				best_errorval_in_mode = astc::min(errorval, best_errorval_in_mode);
 
-				// Average refinement improvement is 3.5% per iteration
-				// (allow 5%), but the first iteration can help more so we give
-				// it a extra 10% leeway. Use this knowledge to drive a
-				// heuristic to skip blocks that are unlikely to catch up with
-				// the best block we have already.
-				int iters_remaining = max_refinement_iters - l;
+				// Average refinement improvement is 3.5% per iteration (allow 5%), but the first
+				// iteration can help more so we give it a extra 10% leeway. Use this knowledge to
+				// drive a heuristic to skip blocks that are unlikely to catch up with the best
+				// block we have already.
+				unsigned int iters_remaining = config.tune_refinement_limit - l;
 				float threshold = (0.05f * static_cast<float>(iters_remaining)) + 1.1f;
 				if (errorval > (threshold * best_errorval_in_scb))
 				{
@@ -911,37 +988,43 @@ static float compress_symbolic_block_fixed_partition_2_planes(
 
 					if (errorval < tune_errorval_threshold)
 					{
-						return errorval;
+						// Skip remaining candidates - this is "good enough"
+						i = candidate_count;
+						break;
 					}
 				}
 			}
 
-			// perform a final pass over the weights to try to improve them.
-			int adjustments = realign_weights(
-			    config.profile, bsd, blk, ewb, &workscb,
-			    u8_weight1_src, u8_weight2_src);
-
-			// Post-realign test
-			for (int j = 0; j < weights_to_copy; j++)
+			// Perform a final pass over the weights to try to improve them.
+			bool adjustments;
+			if (di.weight_count != bsd.texel_count)
+			{
+				adjustments = realign_weights_decimated(
+					config.profile, bsd, blk, workscb,
+					workscb.weights, workscb.weights + WEIGHTS_PLANE2_OFFSET);
+			}
+			else
 			{
-				workscb.weights[j] = u8_weight1_src[j];
-				workscb.weights[j + PLANE2_WEIGHTS_OFFSET] = u8_weight2_src[j];
+				adjustments = realign_weights_undecimated(
+					config.profile, bsd, blk, workscb,
+					workscb.weights, workscb.weights + WEIGHTS_PLANE2_OFFSET);
 			}
 
-			float errorval = compute_symbolic_block_difference(config, bsd, &workscb, blk, ewb);
-			if (errorval == -1e30f)
+			// Post-realign test
+			float errorval = compute_symbolic_block_difference_2plane(config, bsd, workscb, blk);
+			if (errorval == -ERROR_CALC_DEFAULT)
 			{
 				errorval = -errorval;
-				workscb.error_block = 1;
+				workscb.block_type = SYM_BTYPE_ERROR;
 			}
 
 			trace_add_data("error_postrealign", errorval);
 			best_errorval_in_mode = astc::min(errorval, best_errorval_in_mode);
 
-			// Average refinement improvement is 3.5% per iteration, so skip
-			// blocks that are unlikely to catch up with the best block we
-			// have already. Assume a 5% per step to give benefit of the doubt
-			int iters_remaining = max_refinement_iters - 1 - l;
+			// Average refinement improvement is 3.5% per iteration, so skip blocks that are
+			// unlikely to catch up with the best block we have already. Assume a 5% per step to
+			// give benefit of the doubt ...
+			unsigned int iters_remaining = config.tune_refinement_limit - 1 - l;
 			float threshold = (0.05f * static_cast<float>(iters_remaining)) + 1.0f;
 			if (errorval > (threshold * best_errorval_in_scb))
 			{
@@ -956,11 +1039,13 @@ static float compress_symbolic_block_fixed_partition_2_planes(
 
 				if (errorval < tune_errorval_threshold)
 				{
-					return errorval;
+					// Skip remaining candidates - this is "good enough"
+					i = candidate_count;
+					break;
 				}
 			}
 
-			if (adjustments == 0)
+			if (!adjustments)
 			{
 				break;
 			}
@@ -970,207 +1055,20 @@ static float compress_symbolic_block_fixed_partition_2_planes(
 	return best_errorval_in_mode;
 }
 
-void expand_deblock_weights(
-	astcenc_context& ctx
-) {
-	unsigned int xdim = ctx.config.block_x;
-	unsigned int ydim = ctx.config.block_y;
-	unsigned int zdim = ctx.config.block_z;
-
-	float centerpos_x = static_cast<float>(xdim - 1) * 0.5f;
-	float centerpos_y = static_cast<float>(ydim - 1) * 0.5f;
-	float centerpos_z = static_cast<float>(zdim - 1) * 0.5f;
-	float *bef = ctx.deblock_weights;
-
-	for (unsigned int z = 0; z < zdim; z++)
-	{
-		for (unsigned int y = 0; y < ydim; y++)
-		{
-			for (unsigned int x = 0; x < xdim; x++)
-			{
-				float xdif = (static_cast<float>(x) - centerpos_x) / static_cast<float>(xdim);
-				float ydif = (static_cast<float>(y) - centerpos_y) / static_cast<float>(ydim);
-				float zdif = (static_cast<float>(z) - centerpos_z) / static_cast<float>(zdim);
-
-				float wdif = 0.36f;
-				float dist = astc::sqrt(xdif * xdif + ydif * ydif + zdif * zdif + wdif * wdif);
-				*bef = astc::pow(dist, ctx.config.b_deblock_weight);
-				bef++;
-			}
-		}
-	}
-}
-
-// Function to set error weights for each color component for each texel in a block.
-// Returns the sum of all the error values set.
-static float prepare_error_weight_block(
-	const astcenc_context& ctx,
-	const astcenc_image& input_image,
-	const block_size_descriptor* bsd,
-	const imageblock* blk,
-	error_weight_block* ewb
-) {
-	int idx = 0;
-	int any_mean_stdev_weight =
-		ctx.config.v_rgb_mean != 0.0f || ctx.config.v_rgb_stdev != 0.0f || \
-		ctx.config.v_a_mean != 0.0f || ctx.config.v_a_stdev != 0.0f;
-
-	vfloat4 derv[MAX_TEXELS_PER_BLOCK];
-	imageblock_initialize_deriv(blk, bsd->texel_count, derv);
-	vfloat4 color_weights(ctx.config.cw_r_weight,
-	                      ctx.config.cw_g_weight,
-	                      ctx.config.cw_b_weight,
-	                      ctx.config.cw_a_weight);
-
-	for (int z = 0; z < bsd->zdim; z++)
-	{
-		for (int y = 0; y < bsd->ydim; y++)
-		{
-			for (int x = 0; x < bsd->xdim; x++)
-			{
-				unsigned int xpos = x + blk->xpos;
-				unsigned int ypos = y + blk->ypos;
-				unsigned int zpos = z + blk->zpos;
-
-				if (xpos >= input_image.dim_x || ypos >= input_image.dim_y || zpos >= input_image.dim_z)
-				{
-					ewb->error_weights[idx] = vfloat4(1e-11f);
-				}
-				else
-				{
-					vfloat4 error_weight(ctx.config.v_rgb_base,
-					                     ctx.config.v_rgb_base,
-					                     ctx.config.v_rgb_base,
-					                     ctx.config.v_a_base);
-
-					int ydt = input_image.dim_x;
-					int zdt = input_image.dim_x * input_image.dim_y;
-
-					if (any_mean_stdev_weight)
-					{
-						vfloat4 avg = ctx.input_averages[zpos * zdt + ypos * ydt + xpos];
-						avg = max(avg, 6e-5f);
-						avg = avg * avg;
-
-						vfloat4 variance = ctx.input_variances[zpos * zdt + ypos * ydt + xpos];
-						variance = variance * variance;
-
-						float favg = hadd_rgb_s(avg) * (1.0f / 3.0f);
-						float fvar = hadd_rgb_s(variance) * (1.0f / 3.0f);
-
-						float mixing = ctx.config.v_rgba_mean_stdev_mix;
-						avg.set_lane<0>(favg * mixing + avg.lane<0>() * (1.0f - mixing));
-						avg.set_lane<1>(favg * mixing + avg.lane<1>() * (1.0f - mixing));
-						avg.set_lane<2>(favg * mixing + avg.lane<2>() * (1.0f - mixing));
-
-						variance.set_lane<0>(fvar * mixing + variance.lane<0>() * (1.0f - mixing));
-						variance.set_lane<1>(fvar * mixing + variance.lane<1>() * (1.0f - mixing));
-						variance.set_lane<2>(fvar * mixing + variance.lane<2>() * (1.0f - mixing));
-
-						vfloat4 stdev = sqrt(max(variance, 0.0f));
-
-						vfloat4 scalea(ctx.config.v_rgb_mean, ctx.config.v_rgb_mean, ctx.config.v_rgb_mean, ctx.config.v_a_mean);
-						avg = avg * scalea;
-
-						vfloat4 scales(ctx.config.v_rgb_stdev, ctx.config.v_rgb_stdev, ctx.config.v_rgb_stdev, ctx.config.v_a_stdev);
-						stdev = stdev * scales;
-
-						error_weight = error_weight + avg + stdev;
-						error_weight = 1.0f / error_weight;
-					}
-
-					if (ctx.config.flags & ASTCENC_FLG_MAP_NORMAL)
-					{
-						// Convert from 0 to 1 to -1 to +1 range.
-						float xN = ((blk->data_r[idx] * (1.0f / 65535.0f)) - 0.5f) * 2.0f;
-						float yN = ((blk->data_a[idx] * (1.0f / 65535.0f)) - 0.5f) * 2.0f;
-
-						float denom = 1.0f - xN * xN - yN * yN;
-						denom = astc::max(denom, 0.1f);
-						denom = 1.0f / denom;
-						error_weight.set_lane<0>(error_weight.lane<0>() * (1.0f + xN * xN * denom));
-						error_weight.set_lane<3>(error_weight.lane<3>() * (1.0f + yN * yN * denom));
-					}
-
-					if (ctx.config.flags & ASTCENC_FLG_USE_ALPHA_WEIGHT)
-					{
-						float alpha_scale;
-						if (ctx.config.a_scale_radius != 0)
-						{
-							alpha_scale = ctx.input_alpha_averages[zpos * zdt + ypos * ydt + xpos];
-						}
-						else
-						{
-							alpha_scale = blk->data_a[idx] * (1.0f / 65535.0f);
-						}
-
-						alpha_scale = astc::max(alpha_scale, 0.0001f);
-
-						alpha_scale *= alpha_scale;
-						error_weight.set_lane<0>(error_weight.lane<0>() * alpha_scale);
-						error_weight.set_lane<1>(error_weight.lane<1>() * alpha_scale);
-						error_weight.set_lane<2>(error_weight.lane<2>() * alpha_scale);
-					}
-
-					error_weight = error_weight * color_weights;
-					error_weight = error_weight * ctx.deblock_weights[idx];
-
-					// when we loaded the block to begin with, we applied a transfer function
-					// and computed the derivative of the transfer function. However, the
-					// error-weight computation so far is based on the original color values,
-					// not the transfer-function values. As such, we must multiply the
-					// error weights by the derivative of the inverse of the transfer function,
-					// which is equivalent to dividing by the derivative of the transfer
-					// function.
-
-					error_weight = error_weight / (derv[idx] * derv[idx] * 1e-10f);
-					ewb->error_weights[idx] = error_weight;
-				}
-				idx++;
-			}
-		}
-	}
-
-	vfloat4 error_weight_sum = vfloat4::zero();
-	int texels_per_block = bsd->texel_count;
-	for (int i = 0; i < texels_per_block; i++)
-	{
-		error_weight_sum = error_weight_sum + ewb->error_weights[i];
-
-		float wr = ewb->error_weights[i].lane<0>();
-		float wg = ewb->error_weights[i].lane<1>();
-		float wb = ewb->error_weights[i].lane<2>();
-		float wa = ewb->error_weights[i].lane<3>();
-
-		ewb->texel_weight_r[i] = wr;
-		ewb->texel_weight_g[i] = wg;
-		ewb->texel_weight_b[i] = wb;
-		ewb->texel_weight_a[i] = wa;
-
-		ewb->texel_weight_rg[i] = (wr + wg) * 0.5f;
-		ewb->texel_weight_rb[i] = (wr + wb) * 0.5f;
-		ewb->texel_weight_gb[i] = (wg + wb) * 0.5f;
-		ewb->texel_weight_ra[i] = (wr + wa) * 0.5f;
-
-		ewb->texel_weight_gba[i] = (wg + wb + wa) * 0.333333f;
-		ewb->texel_weight_rba[i] = (wr + wb + wa) * 0.333333f;
-		ewb->texel_weight_rga[i] = (wr + wg + wa) * 0.333333f;
-		ewb->texel_weight_rgb[i] = (wr + wg + wb) * 0.333333f;
-
-		ewb->texel_weight[i] = (wr + wg + wb + wa) * 0.25f;
-	}
-
-	return hadd_s(error_weight_sum);
-}
-
+/**
+ * @brief Determine the lowest cross-channel correlation factor.
+ *
+ * @param texels_per_block   The number of texels in a block.
+ * @param blk                The image block color data to compress.
+ *
+ * @return Return the lowest correlation factor.
+ */
 static float prepare_block_statistics(
 	int texels_per_block,
-	const imageblock * blk,
-	const error_weight_block* ewb
+	const image_block& blk
 ) {
-	// compute covariance matrix, as a collection of 10 scalars
-	// (that form the upper-triangular row of the matrix; the matrix is
-	// symmetric, so this is all we need)
+	// Compute covariance matrix, as a collection of 10 scalars that form the upper-triangular row
+	// of the matrix. The matrix is symmetric, so this is all we need for this use case.
 	float rs = 0.0f;
 	float gs = 0.0f;
 	float bs = 0.0f;
@@ -1188,16 +1086,17 @@ static float prepare_block_statistics(
 
 	float weight_sum = 0.0f;
 
+	promise(texels_per_block > 0);
 	for (int i = 0; i < texels_per_block; i++)
 	{
-		float weight = ewb->texel_weight[i];
+		float weight = hadd_s(blk.channel_weight) / 4.0f;
 		assert(weight >= 0.0f);
 		weight_sum += weight;
 
-		float r = blk->data_r[i];
-		float g = blk->data_g[i];
-		float b = blk->data_b[i];
-		float a = blk->data_a[i];
+		float r = blk.data_r[i];
+		float g = blk.data_g[i];
+		float b = blk.data_b[i];
+		float a = blk.data_a[i];
 
 		float rw = r * weight;
 		rs += rw;
@@ -1259,14 +1158,14 @@ static float prepare_block_statistics(
 	lowest_correlation       = astc::min(lowest_correlation, fabsf(ba_cov));
 
 	// Diagnostic trace points
-	trace_add_data("min_r", blk->data_min.lane<0>());
-	trace_add_data("max_r", blk->data_max.lane<0>());
-	trace_add_data("min_g", blk->data_min.lane<1>());
-	trace_add_data("max_g", blk->data_max.lane<1>());
-	trace_add_data("min_b", blk->data_min.lane<2>());
-	trace_add_data("max_b", blk->data_max.lane<2>());
-	trace_add_data("min_a", blk->data_min.lane<3>());
-	trace_add_data("max_a", blk->data_max.lane<3>());
+	trace_add_data("min_r", blk.data_min.lane<0>());
+	trace_add_data("max_r", blk.data_max.lane<0>());
+	trace_add_data("min_g", blk.data_min.lane<1>());
+	trace_add_data("max_g", blk.data_max.lane<1>());
+	trace_add_data("min_b", blk.data_min.lane<2>());
+	trace_add_data("max_b", blk.data_max.lane<2>());
+	trace_add_data("min_a", blk.data_min.lane<3>());
+	trace_add_data("max_a", blk.data_max.lane<3>());
 	trace_add_data("cov_rg", fabsf(rg_cov));
 	trace_add_data("cov_rb", fabsf(rb_cov));
 	trace_add_data("cov_ra", fabsf(ra_cov));
@@ -1277,92 +1176,82 @@ static float prepare_block_statistics(
 	return lowest_correlation;
 }
 
+/* See header for documentation. */
 void compress_block(
 	const astcenc_context& ctx,
-	const astcenc_image& input_image,
-	const imageblock* blk,
-	symbolic_compressed_block& scb,
+	const image_block& blk,
 	physical_compressed_block& pcb,
-	compress_symbolic_block_buffers* tmpbuf)
+	compression_working_buffers& tmpbuf)
 {
 	astcenc_profile decode_mode = ctx.config.profile;
-	error_weight_block *ewb = &tmpbuf->ewb;
-	const block_size_descriptor* bsd = ctx.bsd;
+	symbolic_compressed_block scb;
+	const block_size_descriptor& bsd = ctx.bsd;
 	float lowest_correl;
 
 	TRACE_NODE(node0, "block");
-	trace_add_data("pos_x", blk->xpos);
-	trace_add_data("pos_y", blk->ypos);
-	trace_add_data("pos_z", blk->zpos);
+	trace_add_data("pos_x", blk.xpos);
+	trace_add_data("pos_y", blk.ypos);
+	trace_add_data("pos_z", blk.zpos);
 
-	// Set stricter block targets for luminance data as we have more bits to
-	// play with - fewer endpoints and never need a second weight plane
-	bool block_is_l = imageblock_is_lum(blk);
+	// Set stricter block targets for luminance data as we have more bits to play with
+	bool block_is_l = blk.is_luminance();
 	float block_is_l_scale = block_is_l ? 1.0f / 1.5f : 1.0f;
 
-	// Set slightly stricter block targets for lumalpha data as we have more
-	// bits to play with - fewer endpoints but may use a second weight plane
-	bool block_is_la = imageblock_is_lumalp(blk);
+	// Set slightly stricter block targets for lumalpha data as we have more bits to play with
+	bool block_is_la = blk.is_luminancealpha();
 	float block_is_la_scale = block_is_la ? 1.0f / 1.05f : 1.0f;
 
 	bool block_skip_two_plane = false;
-
-	// Default max partition, but +1 if only have 1 or 2 active components
 	int max_partitions = ctx.config.tune_partition_count_limit;
-	if (block_is_l || block_is_la)
-	{
-		max_partitions = astc::min(max_partitions + 1, 4);
-	}
-
 
 #if defined(ASTCENC_DIAGNOSTICS)
 	// Do this early in diagnostic builds so we can dump uniform metrics
 	// for every block. Do it later in release builds to avoid redundant work!
-	float error_weight_sum = prepare_error_weight_block(ctx, input_image, bsd, blk, ewb);
+	float error_weight_sum = hadd_s(blk.channel_weight) * bsd->texel_count;
 	float error_threshold = ctx.config.tune_db_limit
 	                      * error_weight_sum
 	                      * block_is_l_scale
 	                      * block_is_la_scale;
 
-	lowest_correl = prepare_block_statistics(bsd->texel_count, blk, ewb);
-
+	lowest_correl = prepare_block_statistics(bsd->texel_count, blk);
+	trace_add_data("lowest_correl", lowest_correl);
 	trace_add_data("tune_error_threshold", error_threshold);
 #endif
 
-	if (all(blk->data_min == blk->data_max))
+	// Detected a constant-color block
+	if (all(blk.data_min == blk.data_max))
 	{
 		TRACE_NODE(node1, "pass");
 		trace_add_data("partition_count", 0);
 		trace_add_data("plane_count", 1);
 
-		// detected a constant-color block. Encode as FP16 if using HDR
-		scb.error_block = 0;
 		scb.partition_count = 0;
 
+		// Encode as FP16 if using HDR
 		if ((decode_mode == ASTCENC_PRF_HDR) ||
 		    (decode_mode == ASTCENC_PRF_HDR_RGB_LDR_A))
 		{
-			scb.block_mode = -1;
-			vint4 color_f16 = float_to_float16(blk->origin_texel);
+			scb.block_type = SYM_BTYPE_CONST_F16;
+			vint4 color_f16 = float_to_float16(blk.origin_texel);
 			store(color_f16, scb.constant_color);
 		}
+		// Encode as UNORM16 if NOT using HDR
 		else
 		{
-			// Encode as UNORM16 if NOT using HDR.
-			scb.block_mode = -2;
-			vfloat4 color_f32 = clamp(0.0f, 1.0f, blk->origin_texel) * 65535.0f;
+			scb.block_type = SYM_BTYPE_CONST_U16;
+			vfloat4 color_f32 = clamp(0.0f, 1.0f, blk.origin_texel) * 65535.0f;
 			vint4 color_u16 = float_to_int_rtn(color_f32);
 			store(color_u16, scb.constant_color);
 		}
 
 		trace_add_data("exit", "quality hit");
 
-		symbolic_to_physical(*bsd, scb, pcb);
+		symbolic_to_physical(bsd, scb, pcb);
 		return;
 	}
 
 #if !defined(ASTCENC_DIAGNOSTICS)
-	float error_weight_sum = prepare_error_weight_block(ctx, input_image, bsd, blk, ewb);
+	float error_weight_sum = hadd_s(blk.channel_weight) * bsd.texel_count;
 	float error_threshold = ctx.config.tune_db_limit
 	                      * error_weight_sum
 	                      * block_is_l_scale
@@ -1370,33 +1259,41 @@ void compress_block(
 #endif
 
 	// Set SCB and mode errors to a very high error value
-	scb.errorval = 1e30f;
-	scb.error_block = 1;
+	scb.errorval = ERROR_CALC_DEFAULT;
+	scb.block_type = SYM_BTYPE_ERROR;
 
-	float best_errorvals_in_modes[13];
-	for (int i = 0; i < 13; i++)
-	{
-		best_errorvals_in_modes[i] = 1e30f;
-	}
+	float best_errorvals_for_pcount[BLOCK_MAX_PARTITIONS] {
+		ERROR_CALC_DEFAULT, ERROR_CALC_DEFAULT, ERROR_CALC_DEFAULT, ERROR_CALC_DEFAULT
+	};
 
-	int uses_alpha = imageblock_uses_alpha(blk);
+	float exit_thresholds_for_pcount[BLOCK_MAX_PARTITIONS] {
+		0.0f,
+		ctx.config.tune_2_partition_early_out_limit_factor,
+		ctx.config.tune_3_partition_early_out_limit_factor,
+		0.0f
+	};
 
 	// Trial using 1 plane of weights and 1 partition.
 
-	// Most of the time we test it twice, first with a mode cutoff of 0 and
-	// then with the specified mode cutoff. This causes an early-out that
-	// speeds up encoding of easy blocks. However, this optimization is
-	// disabled for 4x4 and 5x4 blocks where it nearly always slows down the
+	// Most of the time we test it twice, first with a mode cutoff of 0 and then with the specified
+	// mode cutoff. This causes an early-out that speeds up encoding of easy blocks. However, this
+	// optimization is disabled for 4x4 and 5x4 blocks where it nearly always slows down the
 	// compression and slightly reduces image quality.
 
-	float errorval_mult[2] = {
+	float errorval_mult[2] {
 		1.0f / ctx.config.tune_mode0_mse_overshoot,
 		1.0f
 	};
 
 	static const float errorval_overshoot = 1.0f / ctx.config.tune_refinement_mse_overshoot;
 
-	int start_trial = bsd->texel_count < (int)TUNE_MAX_TEXELS_MODE0_FASTPATH ? 1 : 0;
+	// Only enable MODE0 fast path (trial 0) if 2D and more than 25 texels
+	int start_trial = 1;
+	if ((bsd.texel_count >= TUNE_MIN_TEXELS_MODE0_FASTPATH) && (bsd.zdim == 1))
+	{
+		start_trial = 0;
+	}
+
 	for (int i = start_trial; i < 2; i++)
 	{
 		TRACE_NODE(node1, "pass");
@@ -1404,15 +1301,12 @@ void compress_block(
 		trace_add_data("plane_count", 1);
 		trace_add_data("search_mode", i);
 
-		float errorval = compress_symbolic_block_fixed_partition_1_plane(
-		    ctx.config, i == 0,
-		    ctx.config.tune_candidate_limit,
+		float errorval = compress_symbolic_block_for_partition_1plane(
+		    ctx.config, bsd, blk, i == 0,
 		    error_threshold * errorval_mult[i] * errorval_overshoot,
-		    ctx.config.tune_refinement_limit,
-		    bsd, 1, 0, blk, ewb, scb, &tmpbuf->planes);
+		    1, 0,  scb, tmpbuf);
 
-		// Mode 0
-		best_errorvals_in_modes[0] = errorval;
+		best_errorvals_for_pcount[0] = astc::min(best_errorvals_for_pcount[0], errorval);
 		if (errorval < (error_threshold * errorval_mult[i]))
 		{
 			trace_add_data("exit", "quality hit");
@@ -1421,48 +1315,49 @@ void compress_block(
 	}
 
 #if !defined(ASTCENC_DIAGNOSTICS)
-	lowest_correl = prepare_block_statistics(bsd->texel_count, blk, ewb);
+	lowest_correl = prepare_block_statistics(bsd.texel_count, blk);
 #endif
 
-	block_skip_two_plane = lowest_correl > ctx.config.tune_two_plane_early_out_limit;
+	block_skip_two_plane = lowest_correl > ctx.config.tune_2_plane_early_out_limit_correlation;
 
-	// next, test the four possible 1-partition, 2-planes modes
-	for (int i = 0; i < 4; i++)
+	// Test the four possible 1-partition, 2-planes modes. Do this in reverse, as
+	// alpha is the most likely to be non-correlated if it is present in the data.
+	for (int i = BLOCK_MAX_COMPONENTS - 1; i >= 0; i--)
 	{
 		TRACE_NODE(node1, "pass");
 		trace_add_data("partition_count", 1);
 		trace_add_data("plane_count", 2);
-		trace_add_data("plane_channel", i);
+		trace_add_data("plane_component", i);
 
 		if (block_skip_two_plane)
 		{
-			trace_add_data("skip", "tune_two_plane_early_out_limit");
+			trace_add_data("skip", "tune_2_plane_early_out_limit_correlation");
 			continue;
 		}
 
-		if (blk->grayscale && i != 3)
+		if (blk.grayscale && i != 3)
 		{
 			trace_add_data("skip", "grayscale block");
 			continue;
 		}
 
-		if (!uses_alpha && i == 3)
+		if (blk.is_constant_channel(i))
 		{
-			trace_add_data("skip", "no alpha channel");
+			trace_add_data("skip", "constant component");
 			continue;
 		}
 
-		float errorval = compress_symbolic_block_fixed_partition_2_planes(
-		    ctx.config, false,
-		    ctx.config.tune_candidate_limit,
-		    error_threshold * errorval_overshoot,
-		    ctx.config.tune_refinement_limit,
-		    bsd, 1,	// partition count
-		    0,	// partition index
-		    i,	// the color component to test a separate plane of weights for.
-		    blk, ewb, scb, &tmpbuf->planes);
-
-		// Modes 7, 10 (13 is unreachable)
+		float errorval = compress_symbolic_block_for_partition_2planes(
+		    ctx.config, bsd, blk, error_threshold * errorval_overshoot,
+		    i, scb, tmpbuf);
+
+		// If attempting two planes is much worse than the best one plane result
+		// then further two plane searches are unlikely to help so move on ...
+		if (errorval > (best_errorvals_for_pcount[0] * 2.0f))
+		{
+			break;
+		}
+
 		if (errorval < error_threshold)
 		{
 			trace_add_data("exit", "quality hit");
@@ -1470,36 +1365,30 @@ void compress_block(
 		}
 	}
 
-	// find best blocks for 2, 3 and 4 partitions
+	// Find best blocks for 2, 3 and 4 partitions
 	for (int partition_count = 2; partition_count <= max_partitions; partition_count++)
 	{
-		int partition_indices_1plane[2] { 0, 0 };
-		int partition_index_2planes = 0;
+		unsigned int partition_indices[2] { 0 };
 
-		find_best_partitionings(bsd, blk, ewb, partition_count,
-		                        ctx.config.tune_partition_index_limit,
-		                        &(partition_indices_1plane[0]),
-		                        &(partition_indices_1plane[1]),
-		                        block_skip_two_plane ? nullptr : &partition_index_2planes);
+		find_best_partition_candidates(bsd, blk, partition_count,
+		                               ctx.config.tune_partition_index_limit,
+		                               partition_indices);
 
-		for (int i = 0; i < 2; i++)
+		for (unsigned int i = 0; i < 2; i++)
 		{
 			TRACE_NODE(node1, "pass");
 			trace_add_data("partition_count", partition_count);
-			trace_add_data("partition_index", partition_indices_1plane[i]);
+			trace_add_data("partition_index", partition_indices[i]);
 			trace_add_data("plane_count", 1);
 			trace_add_data("search_mode", i);
 
-			float errorval = compress_symbolic_block_fixed_partition_1_plane(
-			    ctx.config, false,
-			    ctx.config.tune_candidate_limit,
+			float errorval = compress_symbolic_block_for_partition_1plane(
+			    ctx.config, bsd, blk, false,
 			    error_threshold * errorval_overshoot,
-			    ctx.config.tune_refinement_limit,
-			    bsd, partition_count, partition_indices_1plane[i],
-			    blk, ewb, scb, &tmpbuf->planes);
+			    partition_count, partition_indices[i],
+			    scb, tmpbuf);
 
-			// Modes 5, 6, 8, 9, 11, 12
-			best_errorvals_in_modes[3 * (partition_count - 2) + 5 + i] = errorval;
+			best_errorvals_for_pcount[partition_count - 1] = astc::min(best_errorvals_for_pcount[partition_count - 1], errorval);
 			if (errorval < error_threshold)
 			{
 				trace_add_data("exit", "quality hit");
@@ -1507,65 +1396,43 @@ void compress_block(
 			}
 		}
 
-		if (partition_count == 2 && astc::min(best_errorvals_in_modes[5], best_errorvals_in_modes[6]) > (best_errorvals_in_modes[0] * ctx.config.tune_partition_early_out_limit))
+		// If using N partitions doesn't improve much over using N-1 partitions then skip trying N+1
+		float best_error = best_errorvals_for_pcount[partition_count - 1];
+		float best_error_in_prev = best_errorvals_for_pcount[partition_count - 2];
+		float best_error_scale = exit_thresholds_for_pcount[partition_count - 1];
+		if (best_error > (best_error_in_prev * best_error_scale))
 		{
-			trace_add_data("skip", "tune_partition_early_out_limit 1");
+			trace_add_data("skip", "tune_partition_early_out_limit_factor");
 			goto END_OF_TESTS;
 		}
+	}
 
-		// Skip testing dual weight planes for:
-		// * 4 partitions (can't be encoded by the format)
-		if (partition_count == 4)
-		{
-			continue;
-		}
-
-		// * Luminance only blocks (never need for a second plane)
-		if (blk->grayscale && !uses_alpha)
-		{
-			trace_add_data("skip", "grayscale no alpha block ");
-			continue;
-		}
+	trace_add_data("exit", "quality not hit");
 
-		// * Blocks with higher component correlation than the tuning cutoff
-		if (block_skip_two_plane)
+END_OF_TESTS:
+	// If we still have an error block then convert to something we can encode
+	// TODO: Do something more sensible here, such as average color block
+	if (scb.block_type == SYM_BTYPE_ERROR)
+	{
+#if defined(ASTCENC_DIAGNOSTICS)
+		static bool printed_once = false;
+		if (!printed_once)
 		{
-			trace_add_data("skip", "tune_two_plane_early_out_limit");
-			continue;
+			printed_once = true;
+			printf("WARN: At least one block failed to find a valid encoding.\n"
+			       "      Try increasing compression quality settings.\n\n");
 		}
+#endif
 
-
-		TRACE_NODE(node1, "pass");
-		trace_add_data("partition_count", partition_count);
-		trace_add_data("partition_index", partition_index_2planes & (PARTITION_COUNT - 1));
-		trace_add_data("plane_count", 2);
-		trace_add_data("plane_channel", partition_index_2planes >> PARTITION_BITS);
-
-		float errorval = compress_symbolic_block_fixed_partition_2_planes(
-			ctx.config,
-			false,
-			ctx.config.tune_candidate_limit,
-			error_threshold * errorval_overshoot,
-			ctx.config.tune_refinement_limit,
-			bsd,
-			partition_count,
-			partition_index_2planes & (PARTITION_COUNT - 1),
-			partition_index_2planes >> PARTITION_BITS,
-			blk, ewb, scb, &tmpbuf->planes);
-
-		// Modes 7, 10 (13 is unreachable)
-		if (errorval < error_threshold)
-		{
-			trace_add_data("exit", "quality hit");
-			goto END_OF_TESTS;
-		}
+		scb.block_type = SYM_BTYPE_CONST_U16;
+		scb.block_mode = -2;
+		vfloat4 color_f32 = clamp(0.0f, 1.0f, blk.origin_texel) * 65535.0f;
+		vint4 color_u16 = float_to_int_rtn(color_f32);
+		store(color_u16, scb.constant_color);
 	}
 
-	trace_add_data("exit", "quality not hit");
-
-END_OF_TESTS:
 	// Compress to a physical block
-	symbolic_to_physical(*bsd, scb, pcb);
+	symbolic_to_physical(bsd, scb, pcb);
 }
 
 #endif
diff --git a/libkram/astc-encoder/astcenc_compute_variance.cpp b/libkram/astc-encoder/astcenc_compute_variance.cpp
index eee65704..41757fc5 100644
--- a/libkram/astc-encoder/astcenc_compute_variance.cpp
+++ b/libkram/astc-encoder/astcenc_compute_variance.cpp
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2021 Arm Limited
+// Copyright 2011-2022 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -18,16 +18,15 @@
 #if !defined(ASTCENC_DECOMPRESS_ONLY)
 
 /**
- * @brief Functions to calculate variance per channel in a NxN footprint.
+ * @brief Functions to calculate variance per component in a NxN footprint.
  *
- * We need N to be parametric, so the routine below uses summed area tables in
- * order to execute in O(1) time independent of how big N is.
+ * We need N to be parametric, so the routine below uses summed area tables in order to execute in
+ * O(1) time independent of how big N is.
  *
- * The addition uses a Brent-Kung-based parallel prefix adder. This uses the
- * prefix tree to first perform a binary reduction, and then distributes the
- * results. This method means that there is no serial dependency between a
- * given element and the next one, and also significantly improves numerical
- * stability allowing us to use floats rather than doubles.
+ * The addition uses a Brent-Kung-based parallel prefix adder. This uses the prefix tree to first
+ * perform a binary reduction, and then distributes the results. This method means that there is no
+ * serial dependency between a given element and the next one, and also significantly improves
+ * numerical stability allowing us to use floats rather than doubles.
  */
 
 #include "astcenc_internal.h"
@@ -35,7 +34,7 @@
 #include <cassert>
 
 /**
- * @brief Generate a prefix-sum array using Brent-Kung algorithm.
+ * @brief Generate a prefix-sum array using the Brent-Kung algorithm.
  *
  * This will take an input array of the form:
  *     v0, v1, v2, ...
@@ -101,42 +100,38 @@ static void brent_kung_prefix_sum(
 }
 
 /**
- * @brief Compute averages and variances for a pixel region.
+ * @brief Compute averages for a pixel region.
  *
- * The routine computes both in a single pass, using a summed-area table to
- * decouple the running time from the averaging/variance kernel size.
+ * The routine computes both in a single pass, using a summed-area table to decouple the running
+ * time from the averaging/variance kernel size.
  *
- * @param arg The input parameter structure.
+ * @param[out] ctx   The compressor context storing the output data.
+ * @param      arg   The input parameter structure.
  */
 static void compute_pixel_region_variance(
 	astcenc_context& ctx,
-	const pixel_region_variance_args* arg
+	const pixel_region_args& arg
 ) {
 	// Unpack the memory structure into local variables
-	const astcenc_image* img = arg->img;
-	float rgb_power = arg->rgb_power;
-	float alpha_power = arg->alpha_power;
-	astcenc_swizzle swz = arg->swz;
-	bool have_z = arg->have_z;
+	const astcenc_image* img = arg.img;
+	astcenc_swizzle swz = arg.swz;
+	bool have_z = arg.have_z;
 
-	int size_x = arg->size_x;
-	int size_y = arg->size_y;
-	int size_z = arg->size_z;
+	int size_x = arg.size_x;
+	int size_y = arg.size_y;
+	int size_z = arg.size_z;
 
-	int offset_x = arg->offset_x;
-	int offset_y = arg->offset_y;
-	int offset_z = arg->offset_z;
+	int offset_x = arg.offset_x;
+	int offset_y = arg.offset_y;
+	int offset_z = arg.offset_z;
 
-	int avg_var_kernel_radius = arg->avg_var_kernel_radius;
-	int alpha_kernel_radius = arg->alpha_kernel_radius;
+	int alpha_kernel_radius = arg.alpha_kernel_radius;
 
-	float  *input_alpha_averages = ctx.input_alpha_averages;
-	vfloat4 *input_averages = ctx.input_averages;
-	vfloat4 *input_variances = ctx.input_variances;
-	vfloat4 *work_memory = arg->work_memory;
+	float*   input_alpha_averages = ctx.input_alpha_averages;
+	vfloat4* work_memory = arg.work_memory;
 
 	// Compute memory sizes and dimensions that we need
-	int kernel_radius = astc::max(avg_var_kernel_radius, alpha_kernel_radius);
+	int kernel_radius = alpha_kernel_radius;
 	int kerneldim = 2 * kernel_radius + 1;
 	int kernel_radius_xy = kernel_radius;
 	int kernel_radius_z = have_z ? kernel_radius : 0;
@@ -147,7 +142,6 @@ static void compute_pixel_region_variance(
 	int sizeprod = padsize_x * padsize_y * padsize_z;
 
 	int zd_start = have_z ? 1 : 0;
-	int are_powers_1 = (rgb_power == 1.0f) && (alpha_power == 1.0f);
 
 	vfloat4 *varbuf1 = work_memory;
 	vfloat4 *varbuf2 = work_memory + sizeprod;
@@ -203,12 +197,6 @@ static void compute_pixel_region_variance(
 					                     b * (1.0f / 255.0f),
 					                     a * (1.0f / 255.0f));
 
-					if (!are_powers_1)
-					{
-						vfloat4 exp(rgb_power, rgb_power, rgb_power, alpha_power);
-						d = pow(max(d, 1e-6f), exp);
-					}
-
 					VARBUF1(z, y, x) = d;
 					VARBUF2(z, y, x) = d * d;
 				}
@@ -246,12 +234,6 @@ static void compute_pixel_region_variance(
 					vint4 di(data[swz.r], data[swz.g], data[swz.b], data[swz.a]);
 					vfloat4 d = float16_to_float(di);
 
-					if (!are_powers_1)
-					{
-						vfloat4 exp(rgb_power, rgb_power, rgb_power, alpha_power);
-						d = pow(max(d, 1e-6f), exp);
-					}
-
 					VARBUF1(z, y, x) = d;
 					VARBUF2(z, y, x) = d * d;
 				}
@@ -295,12 +277,6 @@ static void compute_pixel_region_variance(
 
 					vfloat4 d(r, g, b, a);
 
-					if (!are_powers_1)
-					{
-						vfloat4 exp(rgb_power, rgb_power, rgb_power, alpha_power);
-						d = pow(max(d, 1e-6f), exp);
-					}
-
 					VARBUF1(z, y, x) = d;
 					VARBUF2(z, y, x) = d * d;
 				}
@@ -369,37 +345,20 @@ static void compute_pixel_region_variance(
 		}
 	}
 
-	int avg_var_kdim = 2 * avg_var_kernel_radius + 1;
 	int alpha_kdim = 2 * alpha_kernel_radius + 1;
 
 	// Compute a few constants used in the variance-calculation.
-	float avg_var_samples;
 	float alpha_rsamples;
-	float mul1;
 
 	if (have_z)
 	{
-		avg_var_samples = (float)(avg_var_kdim * avg_var_kdim * avg_var_kdim);
 		alpha_rsamples = 1.0f / (float)(alpha_kdim * alpha_kdim * alpha_kdim);
 	}
 	else
 	{
-		avg_var_samples = (float)(avg_var_kdim * avg_var_kdim);
 		alpha_rsamples = 1.0f / (float)(alpha_kdim * alpha_kdim);
 	}
 
-	float avg_var_rsamples = 1.0f / avg_var_samples;
-	if (avg_var_samples == 1)
-	{
-		mul1 = 1.0f;
-	}
-	else
-	{
-		mul1 = 1.0f / (float)(avg_var_samples * (avg_var_samples - 1));
-	}
-
-	float mul2 = avg_var_samples * mul1;
-
 	// Use the summed-area tables to compute variance for each neighborhood
 	if (have_z)
 	{
@@ -436,33 +395,6 @@ static void compute_pixel_region_variance(
 
 					int out_index = z_dst * zdt + y_dst * ydt + x_dst;
 					input_alpha_averages[out_index] = (vasum * alpha_rsamples);
-
-					// Summed-area table lookups for RGBA average and variance
-					vfloat4 v1sum = ( VARBUF1(z_high, y_low,  x_low)
-					                - VARBUF1(z_high, y_low,  x_high)
-					                - VARBUF1(z_high, y_high, x_low)
-					                + VARBUF1(z_high, y_high, x_high)) -
-					               (  VARBUF1(z_low,  y_low,  x_low)
-					                - VARBUF1(z_low,  y_low,  x_high)
-					                - VARBUF1(z_low,  y_high, x_low)
-					                + VARBUF1(z_low,  y_high, x_high));
-
-					vfloat4 v2sum = ( VARBUF2(z_high, y_low,  x_low)
-					                - VARBUF2(z_high, y_low,  x_high)
-					                - VARBUF2(z_high, y_high, x_low)
-					                + VARBUF2(z_high, y_high, x_high)) -
-					               (  VARBUF2(z_low,  y_low,  x_low)
-					                - VARBUF2(z_low,  y_low,  x_high)
-					                - VARBUF2(z_low,  y_high, x_low)
-					                + VARBUF2(z_low,  y_high, x_high));
-
-					// Compute and emit the average
-					vfloat4 avg = v1sum * avg_var_rsamples;
-					input_averages[out_index] = avg;
-
-					// Compute and emit the actual variance
-					vfloat4 variance = mul2 * v2sum - mul1 * (v1sum * v1sum);
-					input_variances[out_index] = variance;
 				}
 			}
 		}
@@ -491,35 +423,16 @@ static void compute_pixel_region_variance(
 
 				int out_index = y_dst * ydt + x_dst;
 				input_alpha_averages[out_index] = (vasum * alpha_rsamples);
-
-				// summed-area table lookups for RGBA average and variance
-				vfloat4 v1sum = VARBUF1(0, y_low,  x_low)
-				              - VARBUF1(0, y_low,  x_high)
-				              - VARBUF1(0, y_high, x_low)
-				              + VARBUF1(0, y_high, x_high);
-
-				vfloat4 v2sum = VARBUF2(0, y_low,  x_low)
-				              - VARBUF2(0, y_low,  x_high)
-				              - VARBUF2(0, y_high, x_low)
-				              + VARBUF2(0, y_high, x_high);
-
-				// Compute and emit the average
-				vfloat4 avg = v1sum * avg_var_rsamples;
-				input_averages[out_index] = avg;
-
-				// Compute and emit the actual variance
-				vfloat4 variance = mul2 * v2sum - mul1 * (v1sum * v1sum);
-				input_variances[out_index] = variance;
 			}
 		}
 	}
 }
 
-void compute_averages_and_variances(
+void compute_averages(
 	astcenc_context& ctx,
-	const avg_var_args &ag
+	const avg_args &ag
 ) {
-	pixel_region_variance_args arg = ag.arg;
+	pixel_region_args arg = ag.arg;
 	arg.work_memory = new vfloat4[ag.work_memory_size];
 
 	int size_x = ag.img_size_x;
@@ -535,7 +448,7 @@ void compute_averages_and_variances(
 	while (true)
 	{
 		unsigned int count;
-		unsigned int base = ctx.manage_avg_var.get_task_assignment(16, count);
+		unsigned int base = ctx.manage_avg.get_task_assignment(16, count);
 		if (!count)
 		{
 			break;
@@ -556,61 +469,53 @@ void compute_averages_and_variances(
 			{
 				arg.size_x = astc::min(step_xy, size_x - x);
 				arg.offset_x = x;
-				compute_pixel_region_variance(ctx, &arg);
+				compute_pixel_region_variance(ctx, arg);
 			}
 		}
 
-		ctx.manage_avg_var.complete_task_assignment(count);
+		ctx.manage_avg.complete_task_assignment(count);
 	}
 
 	delete[] arg.work_memory;
 }
 
-/* Public function, see header file for detailed documentation */
-unsigned int init_compute_averages_and_variances(
-	astcenc_image& img,
-	float rgb_power,
-	float alpha_power,
-	int avg_var_kernel_radius,
-	int alpha_kernel_radius,
-	astcenc_swizzle swz,
-	pixel_region_variance_args& arg,
-	avg_var_args& ag
+/* See header for documentation. */
+unsigned int init_compute_averages(
+	const astcenc_image& img,
+	unsigned int alpha_kernel_radius,
+	const astcenc_swizzle& swz,
+	avg_args& ag
 ) {
-	int size_x = img.dim_x;
-	int size_y = img.dim_y;
-	int size_z = img.dim_z;
+	unsigned int size_x = img.dim_x;
+	unsigned int size_y = img.dim_y;
+	unsigned int size_z = img.dim_z;
 
 	// Compute maximum block size and from that the working memory buffer size
-	int kernel_radius = astc::max(avg_var_kernel_radius, alpha_kernel_radius);
-	int kerneldim = 2 * kernel_radius + 1;
+	unsigned int kernel_radius = alpha_kernel_radius;
+	unsigned int kerneldim = 2 * kernel_radius + 1;
 
 	bool have_z = (size_z > 1);
-	int max_blk_size_xy = have_z ? 16 : 32;
-	int max_blk_size_z = astc::min(size_z, have_z ? 16 : 1);
+	unsigned int max_blk_size_xy = have_z ? 16 : 32;
+	unsigned int max_blk_size_z = astc::min(size_z, have_z ? 16u : 1u);
 
-	int max_padsize_xy = max_blk_size_xy + kerneldim;
-	int max_padsize_z = max_blk_size_z + (have_z ? kerneldim : 0);
+	unsigned int max_padsize_xy = max_blk_size_xy + kerneldim;
+	unsigned int max_padsize_z = max_blk_size_z + (have_z ? kerneldim : 0);
 
-	// Perform block-wise averages-and-variances calculations across the image
+	// Perform block-wise averages calculations across the image
 	// Initialize fields which are not populated until later
-	arg.size_x = 0;
-	arg.size_y = 0;
-	arg.size_z = 0;
-	arg.offset_x = 0;
-	arg.offset_y = 0;
-	arg.offset_z = 0;
-	arg.work_memory = nullptr;
-
-	arg.img = &img;
-	arg.rgb_power = rgb_power;
-	arg.alpha_power = alpha_power;
-	arg.swz = swz;
-	arg.have_z = have_z;
-	arg.avg_var_kernel_radius = avg_var_kernel_radius;
-	arg.alpha_kernel_radius = alpha_kernel_radius;
-
-	ag.arg = arg;
+	ag.arg.size_x = 0;
+	ag.arg.size_y = 0;
+	ag.arg.size_z = 0;
+	ag.arg.offset_x = 0;
+	ag.arg.offset_y = 0;
+	ag.arg.offset_z = 0;
+	ag.arg.work_memory = nullptr;
+
+	ag.arg.img = &img;
+	ag.arg.swz = swz;
+	ag.arg.have_z = have_z;
+	ag.arg.alpha_kernel_radius = alpha_kernel_radius;
+
 	ag.img_size_x = size_x;
 	ag.img_size_y = size_y;
 	ag.img_size_z = size_z;
@@ -619,8 +524,8 @@ unsigned int init_compute_averages_and_variances(
 	ag.work_memory_size = 2 * max_padsize_xy * max_padsize_xy * max_padsize_z;
 
 	// The parallel task count
-	int z_tasks = (size_z + max_blk_size_z - 1) / max_blk_size_z;
-	int y_tasks = (size_y + max_blk_size_xy - 1) / max_blk_size_xy;
+	unsigned int z_tasks = (size_z + max_blk_size_z - 1) / max_blk_size_z;
+	unsigned int y_tasks = (size_y + max_blk_size_xy - 1) / max_blk_size_xy;
 	return z_tasks * y_tasks;
 }
 
diff --git a/libkram/astc-encoder/astcenc_decompress_symbolic.cpp b/libkram/astc-encoder/astcenc_decompress_symbolic.cpp
index ac4a1cea..4fde3d28 100644
--- a/libkram/astc-encoder/astcenc_decompress_symbolic.cpp
+++ b/libkram/astc-encoder/astcenc_decompress_symbolic.cpp
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2021 Arm Limited
+// Copyright 2011-2022 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -24,30 +24,53 @@
 #include <stdio.h>
 #include <assert.h>
 
-static int compute_value_of_texel_int(
-	int texel_to_get,
-	const decimation_table* dt,
+/**
+ * @brief Compute a vector of texel weights by interpolating the decimated weight grid.
+ *
+ * @param base_texel_index   The first texel to get; N (SIMD width) consecutive texels are loaded.
+ * @param di                 The weight grid decimation to use.
+ * @param weights            The raw weights.
+ *
+ * @return The undecimated weight for N (SIMD width) texels.
+ */
+static vint compute_value_of_texel_weight_int_vla(
+	int base_texel_index,
+	const decimation_info& di,
 	const int* weights
 ) {
-	int summed_value = 8;
-	int weights_to_evaluate = dt->texel_weight_count[texel_to_get];
-	for (int i = 0; i < weights_to_evaluate; i++)
+	vint summed_value(8);
+	vint weight_count(di.texel_weight_count + base_texel_index);
+	int max_weight_count = hmax(weight_count).lane<0>();
+
+	promise(max_weight_count > 0);
+	for (int i = 0; i < max_weight_count; i++)
 	{
-		summed_value += weights[dt->texel_weights_t4[texel_to_get][i]]
-		              * dt->texel_weights_int_t4[texel_to_get][i];
+		vint texel_weights(di.texel_weights_4t[i] + base_texel_index);
+		vint texel_weights_int(di.texel_weights_int_4t[i] + base_texel_index);
+
+		summed_value += gatheri(weights, texel_weights) * texel_weights_int;
 	}
-	return summed_value >> 4;
+
+	return lsr<4>(summed_value);
 }
 
+/**
+ * @brief Compute the integer linear interpolation of two color endpoints.
+ *
+ * @param decode_mode   The ASTC profile (linear or sRGB)
+ * @param color0        The endpoint0 color.
+ * @param color1        The endpoint1 color.
+ * @param weights        The interpolation weight (between 0 and 64).
+ *
+ * @return The interpolated color.
+ */
 static vint4 lerp_color_int(
 	astcenc_profile decode_mode,
 	vint4 color0,
 	vint4 color1,
-	int weight,
-	int plane2_weight,
-	vmask4 plane2_mask
+	vint4 weights
 ) {
-	vint4 weight1 = select(vint4(weight), vint4(plane2_weight), plane2_mask);
+	vint4 weight1 = weights;
 	vint4 weight0 = vint4(64) - weight1;
 
 	if (decode_mode == ASTCENC_PRF_LDR_SRGB)
@@ -67,7 +90,15 @@ static vint4 lerp_color_int(
 	return color;
 }
 
-// Turn packed unorm16 or LNS data into generic float data
+
+/**
+ * @brief Convert integer color value into a float value for the decoder.
+ *
+ * @param data       The integer color value post-interpolation.
+ * @param lns_mask   If set treat lane as HDR (LNS) else LDR (unorm16).
+ *
+ * @return The float color value.
+ */
 static inline vfloat4 decode_texel(
 	vint4 data,
 	vmask4 lns_mask
@@ -85,119 +116,120 @@ static inline vfloat4 decode_texel(
 		color_unorm = unorm16_to_sf16(data);
 	}
 
-	// Pick channels and then covert to FP16
+	// Pick components and then convert to FP16
 	vint4 datai = select(color_unorm, color_lns, lns_mask);
 	return float16_to_float(datai);
 }
 
+/* See header for documentation. */
 void unpack_weights(
 	const block_size_descriptor& bsd,
 	const symbolic_compressed_block& scb,
-	const decimation_table& dt,
+	const decimation_info& di,
 	bool is_dual_plane,
-	int weight_quant_level,
-	int weights_plane1[MAX_TEXELS_PER_BLOCK],
-	int weights_plane2[MAX_TEXELS_PER_BLOCK]
+	quant_method quant_level,
+	int weights_plane1[BLOCK_MAX_TEXELS],
+	int weights_plane2[BLOCK_MAX_TEXELS]
 ) {
 	// First, unquantize the weights ...
-	int uq_plane1_weights[MAX_WEIGHTS_PER_BLOCK];
-	int uq_plane2_weights[MAX_WEIGHTS_PER_BLOCK];
-	int weight_count = dt.weight_count;
+	int uq_plane1_weights[BLOCK_MAX_WEIGHTS];
+	int uq_plane2_weights[BLOCK_MAX_WEIGHTS];
+	unsigned int weight_count = di.weight_count;
 
-	const quantization_and_transfer_table *qat = &(quant_and_xfer_tables[weight_quant_level]);
+	const quantization_and_transfer_table *qat = &(quant_and_xfer_tables[quant_level]);
 
-	for (int i = 0; i < weight_count; i++)
+	// Second, undecimate the weights ...
+	// Safe to overshoot as all arrays are allocated to full size
+	if (!is_dual_plane)
 	{
-		uq_plane1_weights[i] = qat->unquantized_value[scb.weights[i]];
-	}
+		for (unsigned int i = 0; i < weight_count; i++)
+		{
+			uq_plane1_weights[i] = qat->unquantized_value[scb.weights[i]];
+		}
 
-	if (is_dual_plane)
-	{
-		for (int i = 0; i < weight_count; i++)
+		for (unsigned int i = 0; i < bsd.texel_count; i += ASTCENC_SIMD_WIDTH)
 		{
-			uq_plane2_weights[i] = qat->unquantized_value[scb.weights[i + PLANE2_WEIGHTS_OFFSET]];
+			store(compute_value_of_texel_weight_int_vla(i, di, uq_plane1_weights), weights_plane1 + i);
 		}
 	}
-
-	// Second, undecimate the weights ...
-	for (int i = 0; i < bsd.texel_count; i++)
+	else
 	{
-		weights_plane1[i] = compute_value_of_texel_int(i, &dt, uq_plane1_weights);
-	}
+		for (unsigned int i = 0; i < weight_count; i++)
+		{
+			uq_plane1_weights[i] = qat->unquantized_value[scb.weights[i]];
+			uq_plane2_weights[i] = qat->unquantized_value[scb.weights[i + WEIGHTS_PLANE2_OFFSET]];
+		}
 
-	if (is_dual_plane)
-	{
-		for (int i = 0; i < bsd.texel_count; i++)
+		for (unsigned int i = 0; i < bsd.texel_count; i += ASTCENC_SIMD_WIDTH)
 		{
-			weights_plane2[i] = compute_value_of_texel_int(i, &dt, uq_plane2_weights);
+			store(compute_value_of_texel_weight_int_vla(i, di, uq_plane1_weights), weights_plane1 + i);
+			store(compute_value_of_texel_weight_int_vla(i, di, uq_plane2_weights), weights_plane2 + i);
 		}
 	}
 }
 
+/**
+ * @brief Return an FP32 NaN value for use in error colors.
+ *
+ * This NaN encoding will turn into 0xFFFF when converted to an FP16 NaN.
+ *
+ * @return The float color value.
+ */
+static float error_color_nan()
+{
+	if32 v;
+	v.u = 0xFFFFE000U;
+	return v.f;
+}
+
+/* See header for documentation. */
 void decompress_symbolic_block(
 	astcenc_profile decode_mode,
-	const block_size_descriptor* bsd,
+	const block_size_descriptor& bsd,
 	int xpos,
 	int ypos,
 	int zpos,
-	const symbolic_compressed_block* scb,
-	imageblock* blk
+	const symbolic_compressed_block& scb,
+	image_block& blk
 ) {
-	blk->xpos = xpos;
-	blk->ypos = ypos;
-	blk->zpos = zpos;
+	blk.xpos = xpos;
+	blk.ypos = ypos;
+	blk.zpos = zpos;
 
-	blk->data_min = vfloat4::zero();
-	blk->data_max = vfloat4::zero();
-	blk->grayscale = false;
+	blk.data_min = vfloat4::zero();
+	blk.data_mean = vfloat4::zero();
+	blk.data_max = vfloat4::zero();
+	blk.grayscale = false;
 
-	// if we detected an error-block, blow up immediately.
-	if (scb->error_block)
+	// If we detected an error-block, blow up immediately.
+	if (scb.block_type == SYM_BTYPE_ERROR)
 	{
-		// TODO: Check this - isn't linear LDR magenta too? Same below ...
-		if (decode_mode == ASTCENC_PRF_LDR_SRGB)
-		{
-			for (int i = 0; i < bsd->texel_count; i++)
-			{
-				blk->data_r[i] = 1.0f;
-				blk->data_g[i] = 0.0f;
-				blk->data_b[i] = 1.0f;
-				blk->data_a[i] = 1.0f;
-				blk->rgb_lns[i] = 0;
-				blk->alpha_lns[i] = 0;
-				blk->nan_texel[i] = 0;
-			}
-		}
-		else
+		for (unsigned int i = 0; i < bsd.texel_count; i++)
 		{
-			for (int i = 0; i < bsd->texel_count; i++)
-			{
-				blk->data_r[i] = 0.0f;
-				blk->data_g[i] = 0.0f;
-				blk->data_b[i] = 0.0f;
-				blk->data_a[i] = 0.0f;
-				blk->rgb_lns[i] = 0;
-				blk->alpha_lns[i] = 0;
-				blk->nan_texel[i] = 1;
-			}
+			blk.data_r[i] = error_color_nan();
+			blk.data_g[i] = error_color_nan();
+			blk.data_b[i] = error_color_nan();
+			blk.data_a[i] = error_color_nan();
+			blk.rgb_lns[i] = 0;
+			blk.alpha_lns[i] = 0;
 		}
 
 		return;
 	}
 
-	if (scb->block_mode < 0)
+	if ((scb.block_type == SYM_BTYPE_CONST_F16) ||
+	    (scb.block_type == SYM_BTYPE_CONST_U16))
 	{
 		vfloat4 color;
-		int use_lns = 0;
-		int use_nan = 0;
+		uint8_t use_lns = 0;
 
-		if (scb->block_mode == -2)
+		// UNORM16 constant color block
+		if (scb.block_type == SYM_BTYPE_CONST_U16)
 		{
-			vint4 colori(scb->constant_color);
+			vint4 colori(scb.constant_color);
 
-			// For sRGB decoding a real decoder would just use the top 8 bits
-			// for color conversion. We don't color convert, so linearly scale
-			// the top 8 bits into the full 16 bit dynamic range
+			// For sRGB decoding a real decoder would just use the top 8 bits for color conversion.
+			// We don't color convert, so rescale the top 8 bits into the full 16 bit dynamic range.
 			if (decode_mode == ASTCENC_PRF_LDR_SRGB)
 			{
 				colori = asr<8>(colori) * 257;
@@ -206,217 +238,155 @@ void decompress_symbolic_block(
 			vint4 colorf16 = unorm16_to_sf16(colori);
 			color = float16_to_float(colorf16);
 		}
+		// FLOAT16 constant color block
 		else
 		{
 			switch (decode_mode)
 			{
 			case ASTCENC_PRF_LDR_SRGB:
-				color = vfloat4(1.0f, 0.0f, 1.0f, 1.0f);
-				break;
 			case ASTCENC_PRF_LDR:
-				color = vfloat4(0.0f);
-				use_nan = 1;
+				color = vfloat4(error_color_nan());
 				break;
 			case ASTCENC_PRF_HDR_RGB_LDR_A:
 			case ASTCENC_PRF_HDR:
-				// constant-color block; unpack from FP16 to FP32.
-				color = float16_to_float(vint4(scb->constant_color));
+				// Constant-color block; unpack from FP16 to FP32.
+				color = float16_to_float(vint4(scb.constant_color));
 				use_lns = 1;
 				break;
 			}
 		}
 
-		// TODO: Skip this and add constant color transfer to img block?
-		for (int i = 0; i < bsd->texel_count; i++)
+		for (unsigned int i = 0; i < bsd.texel_count; i++)
 		{
-			blk->data_r[i] = color.lane<0>();
-			blk->data_g[i] = color.lane<1>();
-			blk->data_b[i] = color.lane<2>();
-			blk->data_a[i] = color.lane<3>();
-			blk->rgb_lns[i] = use_lns;
-			blk->alpha_lns[i] = use_lns;
-			blk->nan_texel[i] = use_nan;
+			blk.data_r[i] = color.lane<0>();
+			blk.data_g[i] = color.lane<1>();
+			blk.data_b[i] = color.lane<2>();
+			blk.data_a[i] = color.lane<3>();
+			blk.rgb_lns[i] = use_lns;
+			blk.alpha_lns[i] = use_lns;
 		}
 
 		return;
 	}
 
-	// get the appropriate partition-table entry
-	int partition_count = scb->partition_count;
-	const partition_info *pt = get_partition_table(bsd, partition_count);
-	pt += scb->partition_index;
-
-	// get the appropriate block descriptor
-	const decimation_table *const *dts = bsd->decimation_tables;
+	// Get the appropriate partition-table entry
+	int partition_count = scb.partition_count;
+	const auto& pi = bsd.get_partition_info(partition_count, scb.partition_index);
 
-	const int packed_index = bsd->block_mode_packed_index[scb->block_mode];
-	assert(packed_index >= 0 && packed_index < bsd->block_mode_count);
-	const block_mode& bm = bsd->block_modes[packed_index];
-	const decimation_table *dt = dts[bm.decimation_mode];
+	// Get the appropriate block descriptors
+	const auto& bm = bsd.get_block_mode(scb.block_mode);
+	const auto& di = bsd.get_decimation_info(bm.decimation_mode);
 
 	int is_dual_plane = bm.is_dual_plane;
 
-	int weight_quant_level = bm.quant_mode;
-
-	// decode the color endpoints
-	vint4 color_endpoint0[4];
-	vint4 color_endpoint1[4];
-	int rgb_hdr_endpoint[4];
-	int alpha_hdr_endpoint[4];
-	int nan_endpoint[4];
-
-	for (int i = 0; i < partition_count; i++)
-	{
-		unpack_color_endpoints(decode_mode,
-		                       scb->color_formats[i],
-		                       scb->color_quant_level,
-		                       scb->color_values[i],
-		                       &(rgb_hdr_endpoint[i]),
-		                       &(alpha_hdr_endpoint[i]),
-		                       &(nan_endpoint[i]),
-		                       &(color_endpoint0[i]),
-		                       &(color_endpoint1[i]));
-	}
-
 	// Unquantize and undecimate the weights
-	int weights[MAX_TEXELS_PER_BLOCK];
-	int plane2_weights[MAX_TEXELS_PER_BLOCK];
-	unpack_weights(*bsd, *scb, *dt, is_dual_plane, weight_quant_level, weights, plane2_weights);
+	int plane1_weights[BLOCK_MAX_TEXELS];
+	int plane2_weights[BLOCK_MAX_TEXELS];
+	unpack_weights(bsd, scb, di, is_dual_plane, bm.get_weight_quant_mode(), plane1_weights, plane2_weights);
 
 	// Now that we have endpoint colors and weights, we can unpack texel colors
-	int plane2_color_component = is_dual_plane ? scb->plane2_color_component : -1;
-	vmask4 plane2_mask = vint4::lane_id() == vint4(plane2_color_component);
+	int plane2_component = is_dual_plane ? scb.plane2_component : -1;
+	vmask4 plane2_mask = vint4::lane_id() == vint4(plane2_component);
 
 	for (int i = 0; i < partition_count; i++)
 	{
-		vint4 ep0 = color_endpoint0[i];
-		vint4 ep1 = color_endpoint1[i];
-		bool rgb_lns = rgb_hdr_endpoint[i];
-		bool nan = nan_endpoint[i];
-		bool a_lns = alpha_hdr_endpoint[i];
+		// Decode the color endpoints for this partition
+		vint4 ep0;
+		vint4 ep1;
+		bool rgb_lns;
+		bool a_lns;
+
+		unpack_color_endpoints(decode_mode,
+		                       scb.color_formats[i],
+		                       scb.get_color_quant_mode(),
+		                       scb.color_values[i],
+		                       rgb_lns, a_lns,
+		                       ep0, ep1);
+
 		vmask4 lns_mask(rgb_lns, rgb_lns, rgb_lns, a_lns);
 
-		int texel_count = pt->partition_texel_count[i];
+		int texel_count = pi.partition_texel_count[i];
 		for (int j = 0; j < texel_count; j++)
 		{
-			int tix = pt->texels_of_partition[i][j];
-			vint4 color = lerp_color_int(decode_mode,
-			                             ep0,
-			                             ep1,
-			                             weights[tix],
-			                             plane2_weights[tix],
-			                             plane2_mask);
-
+			int tix = pi.texels_of_partition[i][j];
+			vint4 weight = select(vint4(plane1_weights[tix]), vint4(plane2_weights[tix]), plane2_mask);
+			vint4 color = lerp_color_int(decode_mode, ep0, ep1, weight);
 			vfloat4 colorf = decode_texel(color, lns_mask);
 
-			blk->nan_texel[tix] = nan;
-			blk->data_r[tix] = colorf.lane<0>();
-			blk->data_g[tix] = colorf.lane<1>();
-			blk->data_b[tix] = colorf.lane<2>();
-			blk->data_a[tix] = colorf.lane<3>();
+			blk.data_r[tix] = colorf.lane<0>();
+			blk.data_g[tix] = colorf.lane<1>();
+			blk.data_b[tix] = colorf.lane<2>();
+			blk.data_a[tix] = colorf.lane<3>();
 		}
 	}
 }
 
-// Returns a negative error for encodings we want to reject as a part of a
-// heuristic check, e.g. for RGBM textures which have zero M values.
-float compute_symbolic_block_difference(
+#if !defined(ASTCENC_DECOMPRESS_ONLY)
+
+/* See header for documentation. */
+float compute_symbolic_block_difference_2plane(
 	const astcenc_config& config,
-	const block_size_descriptor* bsd,
-	const symbolic_compressed_block* scb,
-	const imageblock* blk,
-	const error_weight_block *ewb
+	const block_size_descriptor& bsd,
+	const symbolic_compressed_block& scb,
+	const image_block& blk
 ) {
-	// if we detected an error-block, blow up immediately.
-	if (scb->error_block)
+	// If we detected an error-block, blow up immediately.
+	if (scb.block_type == SYM_BTYPE_ERROR)
 	{
-		return 1e29f;
+		return ERROR_CALC_DEFAULT;
 	}
 
-	assert(scb->block_mode >= 0);
-
-	// get the appropriate partition-table entry
-	int partition_count = scb->partition_count;
-	const partition_info *pt = get_partition_table(bsd, partition_count);
-	pt += scb->partition_index;
-
-	// get the appropriate block descriptor
-	const decimation_table *const *dts = bsd->decimation_tables;
-
-	const int packed_index = bsd->block_mode_packed_index[scb->block_mode];
-	assert(packed_index >= 0 && packed_index < bsd->block_mode_count);
-	const block_mode& bm = bsd->block_modes[packed_index];
-	const decimation_table *dt = dts[bm.decimation_mode];
+	assert(scb.block_mode >= 0);
+	assert(scb.partition_count == 1);
+	assert(bsd.get_block_mode(scb.block_mode).is_dual_plane == 1);
 
-	int is_dual_plane = bm.is_dual_plane;
-	int weight_quant_level = bm.quant_mode;
-
-	int weight_count = dt->weight_count;
-	int texel_count = bsd->texel_count;
-
-	promise(partition_count > 0);
-	promise(weight_count > 0);
-	promise(texel_count > 0);
-
-	// decode the color endpoints
-	vint4 color_endpoint0[4];
-	vint4 color_endpoint1[4];
-	int rgb_hdr_endpoint[4];
-	int alpha_hdr_endpoint[4];
-	int nan_endpoint[4];
-
-	for (int i = 0; i < partition_count; i++)
-	{
-		unpack_color_endpoints(config.profile,
-		                       scb->color_formats[i],
-		                       scb->color_quant_level,
-		                       scb->color_values[i],
-		                       &(rgb_hdr_endpoint[i]),
-		                       &(alpha_hdr_endpoint[i]),
-		                       &(nan_endpoint[i]),
-		                       &(color_endpoint0[i]),
-		                       &(color_endpoint1[i]));
-	}
+	// Get the appropriate block descriptor
+	const block_mode& bm = bsd.get_block_mode(scb.block_mode);
+	const decimation_info& di = bsd.get_decimation_info(bm.decimation_mode);
 
 	// Unquantize and undecimate the weights
-	int weights[MAX_TEXELS_PER_BLOCK];
-	int plane2_weights[MAX_TEXELS_PER_BLOCK];
-	unpack_weights(*bsd, *scb, *dt, is_dual_plane, weight_quant_level, weights, plane2_weights);
+	int plane1_weights[BLOCK_MAX_TEXELS];
+	int plane2_weights[BLOCK_MAX_TEXELS];
+	unpack_weights(bsd, scb, di, true, bm.get_weight_quant_mode(), plane1_weights, plane2_weights);
 
-	// Now that we have endpoint colors and weights, we can unpack texel colors
-	int plane2_color_component = is_dual_plane ? scb->plane2_color_component : -1;
-	vmask4 plane2_mask = vint4::lane_id() == vint4(plane2_color_component);
+	vmask4 plane2_mask = vint4::lane_id() == vint4(scb.plane2_component);
 
 	float summa = 0.0f;
-	for (int i = 0; i < texel_count; i++)
+	// Decode the color endpoints for this partition
+	vint4 ep0;
+	vint4 ep1;
+	bool rgb_lns;
+	bool a_lns;
+
+	unpack_color_endpoints(config.profile,
+	                       scb.color_formats[0],
+	                       scb.get_color_quant_mode(),
+	                       scb.color_values[0],
+	                       rgb_lns, a_lns,
+	                       ep0, ep1);
+
+	// Unpack and compute error for each texel in the partition
+	unsigned int texel_count = bsd.texel_count;
+	for (unsigned int i = 0; i < texel_count; i++)
 	{
-		int partition = pt->partition_of_texel[i];
-
-		vint4 ep0 = color_endpoint0[partition];
-		vint4 ep1 = color_endpoint1[partition];
-
-		vint4 colori = lerp_color_int(config.profile,
-		                              ep0,
-		                              ep1,
-		                              weights[i],
-		                              plane2_weights[i],
-		                              plane2_mask);
+		vint4 weight = select(vint4(plane1_weights[i]), vint4(plane2_weights[i]), plane2_mask);
+		vint4 colori = lerp_color_int(config.profile, ep0, ep1, weight);
 
 		vfloat4 color = int_to_float(colori);
-		vfloat4 oldColor = blk->texel(i);
+		vfloat4 oldColor = blk.texel(i);
 
+		// Compare error using a perceptual decode metric for RGBM textures
 		if (config.flags & ASTCENC_FLG_MAP_RGBM)
 		{
-			// Fail encodings that result in zero weight M pixels. Note that
-			// this can cause "interesting" artifacts if we reject all useful
-			// encodings - we typically get max brightness encodings instead
-			// which look just as bad. We recommend users apply a bias to their
-			// stored M value, limiting the lower value to 16 or 32 to avoid
-			// getting small M values post-quantization, but we can't prove it
-			// would never happen, especially at low bit rates ...
+			// Fail encodings that result in zero weight M pixels. Note that this can cause
+			// "interesting" artifacts if we reject all useful encodings - we typically get max
+			// brightness encodings instead which look just as bad. We recommend users apply a
+			// bias to their stored M value, limiting the lower value to 16 or 32 to avoid
+			// getting small M values post-quantization, but we can't prove it would never
+			// happen, especially at low bit rates ...
 			if (color.lane<3>() == 0.0f)
 			{
-				return -1e30f;
+				return -ERROR_CALC_DEFAULT;
 			}
 
 			// Compute error based on decoded RGBM color
@@ -439,9 +409,221 @@ float compute_symbolic_block_difference(
 		error = min(abs(error), 1e15f);
 		error = error * error;
 
-		float metric = dot_s(error, ewb->error_weights[i]);
-		summa += astc::min(metric, 1e30f);
+		float metric = dot_s(error, blk.channel_weight);
+		summa += astc::min(metric, ERROR_CALC_DEFAULT);
+	}
+
+	return summa;
+}
+
+/* See header for documentation. */
+float compute_symbolic_block_difference_1plane(
+	const astcenc_config& config,
+	const block_size_descriptor& bsd,
+	const symbolic_compressed_block& scb,
+	const image_block& blk
+) {
+	assert(bsd.get_block_mode(scb.block_mode).is_dual_plane == 0);
+
+	// If we detected an error-block, blow up immediately.
+	if (scb.block_type == SYM_BTYPE_ERROR)
+	{
+		return ERROR_CALC_DEFAULT;
+	}
+
+	assert(scb.block_mode >= 0);
+
+	// Get the appropriate partition-table entry
+	unsigned int partition_count = scb.partition_count;
+	const auto& pi = bsd.get_partition_info(partition_count, scb.partition_index);
+
+	// Get the appropriate block descriptor
+	const block_mode& bm = bsd.get_block_mode(scb.block_mode);
+	const decimation_info& di = bsd.get_decimation_info(bm.decimation_mode);
+
+	// Unquantize and undecimate the weights
+	int plane1_weights[BLOCK_MAX_TEXELS];
+	unpack_weights(bsd, scb, di, false, bm.get_weight_quant_mode(), plane1_weights, nullptr);
+
+	float summa = 0.0f;
+	for (unsigned int i = 0; i < partition_count; i++)
+	{
+		// Decode the color endpoints for this partition
+		vint4 ep0;
+		vint4 ep1;
+		bool rgb_lns;
+		bool a_lns;
+
+		unpack_color_endpoints(config.profile,
+		                       scb.color_formats[i],
+		                       scb.get_color_quant_mode(),
+		                       scb.color_values[i],
+		                       rgb_lns, a_lns,
+		                       ep0, ep1);
+
+		// Unpack and compute error for each texel in the partition
+		unsigned int texel_count = pi.partition_texel_count[i];
+		for (unsigned int j = 0; j < texel_count; j++)
+		{
+			unsigned int tix = pi.texels_of_partition[i][j];
+			vint4 colori = lerp_color_int(config.profile, ep0, ep1,
+			                              vint4(plane1_weights[tix]));
+
+			vfloat4 color = int_to_float(colori);
+			vfloat4 oldColor = blk.texel(tix);
+
+			// Compare error using a perceptual decode metric for RGBM textures
+			if (config.flags & ASTCENC_FLG_MAP_RGBM)
+			{
+				// Fail encodings that result in zero weight M pixels. Note that this can cause
+				// "interesting" artifacts if we reject all useful encodings - we typically get max
+				// brightness encodings instead which look just as bad. We recommend users apply a
+				// bias to their stored M value, limiting the lower value to 16 or 32 to avoid
+				// getting small M values post-quantization, but we can't prove it would never
+				// happen, especially at low bit rates ...
+				if (color.lane<3>() == 0.0f)
+				{
+					return -ERROR_CALC_DEFAULT;
+				}
+
+				// Compute error based on decoded RGBM color
+				color = vfloat4(
+					color.lane<0>() * color.lane<3>() * config.rgbm_m_scale,
+					color.lane<1>() * color.lane<3>() * config.rgbm_m_scale,
+					color.lane<2>() * color.lane<3>() * config.rgbm_m_scale,
+					1.0f
+				);
+
+				oldColor = vfloat4(
+					oldColor.lane<0>() * oldColor.lane<3>() * config.rgbm_m_scale,
+					oldColor.lane<1>() * oldColor.lane<3>() * config.rgbm_m_scale,
+					oldColor.lane<2>() * oldColor.lane<3>() * config.rgbm_m_scale,
+					1.0f
+				);
+			}
+
+			vfloat4 error = oldColor - color;
+			error = min(abs(error), 1e15f);
+			error = error * error;
+
+			float metric = dot_s(error, blk.channel_weight);
+			summa += astc::min(metric, ERROR_CALC_DEFAULT);
+		}
 	}
 
 	return summa;
 }
+
+/* See header for documentation. */
+float compute_symbolic_block_difference_1plane_1partition(
+	const astcenc_config& config,
+	const block_size_descriptor& bsd,
+	const symbolic_compressed_block& scb,
+	const image_block& blk
+) {
+	// If we detected an error-block, blow up immediately.
+	if (scb.block_type == SYM_BTYPE_ERROR)
+	{
+		return ERROR_CALC_DEFAULT;
+	}
+
+	assert(scb.block_mode >= 0);
+	assert(bsd.get_partition_info(scb.partition_count, scb.partition_index).partition_count == 1);
+
+	// Get the appropriate block descriptor
+	const block_mode& bm = bsd.get_block_mode(scb.block_mode);
+	const decimation_info& di = bsd.get_decimation_info(bm.decimation_mode);
+
+	// Unquantize and undecimate the weights
+	alignas(ASTCENC_VECALIGN) int plane1_weights[BLOCK_MAX_TEXELS];
+	unpack_weights(bsd, scb, di, false, bm.get_weight_quant_mode(), plane1_weights, nullptr);
+
+	// Decode the color endpoints for this partition
+	vint4 ep0;
+	vint4 ep1;
+	bool rgb_lns;
+	bool a_lns;
+
+	unpack_color_endpoints(config.profile,
+	                       scb.color_formats[0],
+	                       scb.get_color_quant_mode(),
+	                       scb.color_values[0],
+	                       rgb_lns, a_lns,
+	                       ep0, ep1);
+
+
+	// Pre-shift sRGB so things round correctly
+	if (config.profile == ASTCENC_PRF_LDR_SRGB)
+	{
+		ep0 = asr<8>(ep0);
+		ep1 = asr<8>(ep1);
+	}
+
+	// Unpack and compute error for each texel in the partition
+	vfloat4 summav = vfloat4::zero();
+
+	vint lane_id = vint::lane_id();
+	vint srgb_scale(config.profile == ASTCENC_PRF_LDR_SRGB ? 257 : 1);
+
+	unsigned int texel_count = bsd.texel_count;
+	for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
+	{
+		// Compute EP1 contribution
+		vint weight1 = vint::loada(plane1_weights + i);
+		vint ep1_r = vint(ep1.lane<0>()) * weight1;
+		vint ep1_g = vint(ep1.lane<1>()) * weight1;
+		vint ep1_b = vint(ep1.lane<2>()) * weight1;
+		vint ep1_a = vint(ep1.lane<3>()) * weight1;
+
+		// Compute EP0 contribution
+		vint weight0 = vint(64) - weight1;
+		vint ep0_r = vint(ep0.lane<0>()) * weight0;
+		vint ep0_g = vint(ep0.lane<1>()) * weight0;
+		vint ep0_b = vint(ep0.lane<2>()) * weight0;
+		vint ep0_a = vint(ep0.lane<3>()) * weight0;
+
+		// Shift so things round correctly
+		vint colori_r = asr<6>(ep0_r + ep1_r + vint(32)) * srgb_scale;
+		vint colori_g = asr<6>(ep0_g + ep1_g + vint(32)) * srgb_scale;
+		vint colori_b = asr<6>(ep0_b + ep1_b + vint(32)) * srgb_scale;
+		vint colori_a = asr<6>(ep0_a + ep1_a + vint(32)) * srgb_scale;
+
+		// Compute color diff
+		vfloat color_r = int_to_float(colori_r);
+		vfloat color_g = int_to_float(colori_g);
+		vfloat color_b = int_to_float(colori_b);
+		vfloat color_a = int_to_float(colori_a);
+
+		vfloat color_orig_r = loada(blk.data_r + i);
+		vfloat color_orig_g = loada(blk.data_g + i);
+		vfloat color_orig_b = loada(blk.data_b + i);
+		vfloat color_orig_a = loada(blk.data_a + i);
+
+		vfloat color_error_r = min(abs(color_orig_r - color_r), vfloat(1e15f));
+		vfloat color_error_g = min(abs(color_orig_g - color_g), vfloat(1e15f));
+		vfloat color_error_b = min(abs(color_orig_b - color_b), vfloat(1e15f));
+		vfloat color_error_a = min(abs(color_orig_a - color_a), vfloat(1e15f));
+
+		// Compute squared error metric
+		color_error_r = color_error_r * color_error_r;
+		color_error_g = color_error_g * color_error_g;
+		color_error_b = color_error_b * color_error_b;
+		color_error_a = color_error_a * color_error_a;
+
+		vfloat metric = color_error_r * blk.channel_weight.lane<0>()
+		              + color_error_g * blk.channel_weight.lane<1>()
+		              + color_error_b * blk.channel_weight.lane<2>()
+		              + color_error_a * blk.channel_weight.lane<3>();
+
+		// Mask off bad lanes
+		vmask mask = lane_id < vint(texel_count);
+		lane_id += vint(ASTCENC_SIMD_WIDTH);
+		metric = select(vfloat::zero(), metric, mask);
+
+		haccumulate(summav, metric);
+	}
+
+	return hadd_s(summav);
+}
+
+#endif
diff --git a/libkram/astc-encoder/astcenc_diagnostic_trace.cpp b/libkram/astc-encoder/astcenc_diagnostic_trace.cpp
index 2f6c8c3b..fbf01a97 100644
--- a/libkram/astc-encoder/astcenc_diagnostic_trace.cpp
+++ b/libkram/astc-encoder/astcenc_diagnostic_trace.cpp
@@ -24,6 +24,7 @@
 #include <cassert>
 #include <cstdarg>
 #include <cstdio>
+#include <string>
 
 #include "astcenc_diagnostic_trace.h"
 
@@ -42,6 +43,7 @@ TraceLog::TraceLog(
 	m_root = new TraceNode("root");
 }
 
+/* See header for documentation. */
 TraceNode* TraceLog::get_current_leaf()
 {
 	if (m_stack.size())
@@ -52,11 +54,13 @@ TraceNode* TraceLog::get_current_leaf()
 	return nullptr;
 }
 
+/* See header for documentation. */
 int TraceLog::get_depth()
 {
 	return m_stack.size();
 }
 
+/* See header for documentation. */
 TraceLog::~TraceLog()
 {
 	assert(g_TraceLog == this);
@@ -64,6 +68,7 @@ TraceLog::~TraceLog()
 	g_TraceLog = nullptr;
 }
 
+/* See header for documentation. */
 TraceNode::TraceNode(
 	const char* format,
 	...
@@ -118,6 +123,7 @@ TraceNode::TraceNode(
 	out << in_indents << "[";
 }
 
+/* See header for documentation. */
 void TraceNode::add_attrib(
 	std::string type,
 	std::string key,
@@ -142,6 +148,7 @@ void TraceNode::add_attrib(
 	                                << value << " ]";
 }
 
+/* See header for documentation. */
 TraceNode::~TraceNode()
 {
 	g_TraceLog->m_stack.pop_back();
@@ -168,6 +175,7 @@ TraceNode::~TraceNode()
 	out << out_indents << "]";
 }
 
+/* See header for documentation. */
 void trace_add_data(
 	const char* key,
 	const char* format,
@@ -190,6 +198,7 @@ void trace_add_data(
 	node->add_attrib("str", key, value);
 }
 
+/* See header for documentation. */
 void trace_add_data(
 	const char* key,
 	float value
@@ -200,6 +209,7 @@ void trace_add_data(
 	node->add_attrib("float", key, buffer);
 }
 
+/* See header for documentation. */
 void trace_add_data(
 	const char* key,
 	int value
@@ -208,6 +218,7 @@ void trace_add_data(
 	node->add_attrib("int", key, std::to_string(value));
 }
 
+/* See header for documentation. */
 void trace_add_data(
 	const char* key,
 	unsigned int value
diff --git a/libkram/astc-encoder/astcenc_diagnostic_trace.h b/libkram/astc-encoder/astcenc_diagnostic_trace.h
index e3c26afb..61489498 100644
--- a/libkram/astc-encoder/astcenc_diagnostic_trace.h
+++ b/libkram/astc-encoder/astcenc_diagnostic_trace.h
@@ -21,43 +21,38 @@
  * Overview
  * ========
  *
- * The built-in diagnostic trace tool generates a hierarchical JSON tree
- * structure. The tree hierarchy contains three levels:
+ * The built-in diagnostic trace tool generates a hierarchical JSON tree structure. The tree
+ * hierarchy contains three levels:
  *
  *    - block
  *        - pass
  *           - candidate
  *
- * One block node exists for each compressed block in the image. One pass node
- * exists for each major pass (N partition, M planes, O channel) applied to a
- * block. One candidate node exists for each encoding candidate trialed for a
- * pass.
+ * One block node exists for each compressed block in the image. One pass node exists for each major
+ * pass (N partition, M planes, O components) applied to a block. One candidate node exists for each
+ * encoding candidate trialed for a pass.
  *
- * Each node contains both the hierarchy but also a number of attributes which
- * explain the behavior. For example, the block node contains the block
- * coordinates in the image, the pass explains the pass configuration, and the
- * candidate will explain the candidate encoding such as weight decimation,
- * refinement error, etc.
+ * Each node contains both the hierarchy but also a number of attributes which explain the behavior.
+ * For example, the block node contains the block coordinates in the image, the pass explains the
+ * pass configuration, and the candidate will explain the candidate encoding such as weight
+ * decimation, refinement error, etc.
  *
- * Trace Nodes are designed as scope-managed C++ objects with stack-like
- * push/pop behavior. Constructing a trace node on the stack will automatically
- * add it to the current node as a child, and then make it the current node.
- * Destroying the current node will pop the stack and set the parent to the
- * current node. This provides a robust mechanism for ensuring reliable
- * nesting in the tree structure.
+ * Trace Nodes are designed as scope-managed C++ objects with stack-like push/pop behavior.
+ * Constructing a trace node on the stack will automatically add it to the current node as a child,
+ * and then make it the current node. Destroying the current node will pop the stack and set the
+ * parent to the current node. This provides a robust mechanism for ensuring reliable nesting in the
+ * tree structure.
  *
- * A set of utility macros are provided to add attribute annotations to the
- * current trace node.
+ * A set of utility macros are provided to add attribute annotations to the current trace node.
  *
  * Usage
  * =====
  *
- * Create Trace Nodes on the stack using the TRACE_NODE() macro. This will
- * compile-out completely in builds with diagnostics disabled.
+ * Create Trace Nodes on the stack using the @c TRACE_NODE() macro. This will compile-out completely
+ * in builds with diagnostics disabled.
  *
- * Add annotations to the current trace node using the trace_add_data() macro.
- * This will similarly compile out completely in builds with diagnostics
- * disabled.
+ * Add annotations to the current trace node using the @c trace_add_data() macro. This will
+ * similarly compile out completely in builds with diagnostics disabled.
  *
  * If you need to add additional code to support diagnostics-only behavior wrap
  * it in preprocessor guards:
@@ -84,9 +79,8 @@ class TraceNode
 	/**
 	 * @brief Construct a new node.
 	 *
-	 * Constructing a node will push to the the top of the stack, automatically
-	 * making it a child of the current node, and then setting it to become the
-	 * current node.
+	 * Constructing a node will push to the the top of the stack, automatically making it a child of
+	 * the current node, and then setting it to become the current node.
 	 *
 	 * @param format   The format template for the node name.
 	 * @param ...      The format parameters.
@@ -96,8 +90,8 @@ class TraceNode
 	/**
 	 * @brief Add an attribute to this node.
 	 *
-	 * Note that no quoting is applied to the @c value, so if quoting is
-	 * needed it must be done by the caller.
+	 * Note that no quoting is applied to the @c value, so if quoting is needed it must be done by
+	 * the caller.
 	 *
 	 * @param type    The type of the attribute.
 	 * @param key     The key of the attribute.
@@ -108,9 +102,9 @@ class TraceNode
 	/**
 	 * @brief Destroy this node.
 	 *
-	 * Destroying a node will pop it from the top of the stack, making its
-	 * parent the current node. It is invalid behavior to destroy a node that
-	 * is not the current node; usage must conform to stack push-pop semantics.
+	 * Destroying a node will pop it from the top of the stack, making its parent the current node.
+	 * It is invalid behavior to destroy a node that is not the current node; usage must conform to
+	 * stack push-pop semantics.
 	 */
 	~TraceNode();
 
diff --git a/libkram/astc-encoder/astcenc_encoding_choice_error.cpp b/libkram/astc-encoder/astcenc_encoding_choice_error.cpp
deleted file mode 100644
index c9c46d4a..00000000
--- a/libkram/astc-encoder/astcenc_encoding_choice_error.cpp
+++ /dev/null
@@ -1,277 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// ----------------------------------------------------------------------------
-// Copyright 2011-2021 Arm Limited
-//
-// Licensed under the Apache License, Version 2.0 (the "License"); you may not
-// use this file except in compliance with the License. You may obtain a copy
-// of the License at:
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-// License for the specific language governing permissions and limitations
-// under the License.
-// ----------------------------------------------------------------------------
-
-#if !defined(ASTCENC_DECOMPRESS_ONLY)
-
-/**
- * @brief Functions for finding color error post-compression.
- *
- * We assume there are two independent sources of error in any given partition.
- * - encoding choice errors
- * - quantization errors
- *
- * Encoding choice errors are caused by encoder decisions, such as:
- * - using luminance rather than RGB.
- * - using RGB+scale instead of two full RGB endpoints.
- * - dropping the alpha channel.
- *
- * Quantization errors occur due to the limited precision we use for storage.
- * These errors generally scale with quantization level, but are not actually
- * independent of color encoding. In particular:
- * - if we can use offset encoding then quantization error is halved.
- * - if we can use blue-contraction, quantization error for RG is halved.
- * - quantization error is higher for the HDR endpoint modes.
- * Other than these errors, quantization error is assumed to be proportional to
- * the quantization step.
- */
-
-#include "astcenc_internal.h"
-
-// helper function to merge two endpoint-colors
-void merge_endpoints(
-	const endpoints* ep1,	// contains three of the color components
-	const endpoints* ep2,	// contains the remaining color component
-	int separate_component,
-	endpoints* res
-) {
-	int partition_count = ep1->partition_count;
-	vmask4 sep_mask = vint4::lane_id() == vint4(separate_component);
-
-	res->partition_count = partition_count;
-	promise(partition_count > 0);
-	for (int i = 0; i < partition_count; i++)
-	{
-		res->endpt0[i] = select(ep1->endpt0[i], ep2->endpt0[i], sep_mask);
-		res->endpt1[i] = select(ep1->endpt1[i], ep2->endpt1[i], sep_mask);
-	}
-}
-
-// function to compute the error across a tile when using a particular line for
-// a particular partition.
-static void compute_error_squared_rgb_single_partition(
-	int partition_to_test,
-	const block_size_descriptor* bsd,
-	const partition_info* pt,	// the partition that we use when computing the squared-error.
-	const imageblock* blk,
-	const error_weight_block* ewb,
-	const processed_line3* uncor_pline,
-	float* uncor_err,
-	const processed_line3* samec_pline,
-	float* samec_err,
-	const processed_line3* rgbl_pline,
-	float* rgbl_err,
-	const processed_line3* l_pline,
-	float* l_err,
-	float* a_drop_err
-) {
-	int texels_per_block = bsd->texel_count;
-	float uncor_errorsum = 0.0f;
-	float samec_errorsum = 0.0f;
-	float rgbl_errorsum = 0.0f;
-	float l_errorsum = 0.0f;
-	float a_drop_errorsum = 0.0f;
-
-	for (int i = 0; i < texels_per_block; i++)
-	{
-		int partition = pt->partition_of_texel[i];
-		float texel_weight = ewb->texel_weight_rgb[i];
-		if (partition != partition_to_test || texel_weight < 1e-20f)
-		{
-			continue;
-		}
-
-		vfloat4 point = blk->texel(i);
-		vfloat4 ews = ewb->error_weights[i];
-
-		// Compute the error that arises from just ditching alpha
-		float default_alpha = imageblock_default_alpha(blk);
-		float omalpha = point.lane<3>() - default_alpha;
-		a_drop_errorsum += omalpha * omalpha * ews.lane<3>();
-
-		{
-			float param = dot3_s(point, uncor_pline->bs);
-			vfloat4 rp1 = uncor_pline->amod + param * uncor_pline->bis;
-			vfloat4 dist = rp1 - point;
-			uncor_errorsum += dot3_s(ews, dist * dist);
-		}
-
-		{
-			float param = dot3_s(point, samec_pline->bs);
-			// No samec amod - we know it's always zero
-			vfloat4 rp1 = /* samec_pline->amod + */ param * samec_pline->bis;
-			vfloat4 dist = rp1 - point;
-			samec_errorsum += dot3_s(ews, dist * dist);
-		}
-
-		{
-			float param = dot3_s(point,  rgbl_pline->bs);
-			vfloat4 rp1 = rgbl_pline->amod + param * rgbl_pline->bis;
-			vfloat4 dist = rp1 - point;
-			rgbl_errorsum += dot3_s(ews, dist * dist);
-		}
-
-		{
-			float param = dot3_s(point, l_pline->bs);
-			// No luma amod - we know it's always zero
-			vfloat4 rp1 = /* l_pline->amod + */ param * l_pline->bis;
-			vfloat4 dist = rp1 - point;
-			l_errorsum += dot3_s(ews, dist * dist);
-		}
-	}
-
-	*uncor_err = uncor_errorsum;
-	*samec_err = samec_errorsum;
-	*rgbl_err = rgbl_errorsum;
-	*l_err = l_errorsum;
-	*a_drop_err = a_drop_errorsum;
-}
-
-/*
-   for a given set of input colors and a given partitioning, determine: color error that results
-   from RGB-scale encoding (relevant for LDR only) color error that results from RGB-lumashift encoding
-   (relevant for HDR only) color error that results from luminance-encoding color error that results
-   form dropping alpha. whether we are eligible for offset encoding whether we are eligible for
-   blue-contraction
-
-   The input data are: color data partitioning error-weight data
- */
-void compute_encoding_choice_errors(
-	const block_size_descriptor* bsd,
-	const imageblock* blk,
-	const partition_info* pt,
-	const error_weight_block* ewb,
-	int separate_component,	// component that is separated out in 2-plane mode, -1 in 1-plane mode
-	encoding_choice_errors* eci)
-{
-	int partition_count = pt->partition_count;
-	int texels_per_block = bsd->texel_count;
-
-	promise(partition_count > 0);
-	promise(texels_per_block > 0);
-
-	partition_metrics pms[4];
-
-	compute_partition_error_color_weightings(*ewb, *pt, pms);
-
-	compute_avgs_and_dirs_3_comp(pt, blk, ewb, 3, pms);
-
-	endpoints ep;
-	if (separate_component == -1)
-	{
-		endpoints_and_weights ei;
-		compute_endpoints_and_ideal_weights_1_plane(bsd, pt, blk, ewb, &ei);
-		ep = ei.ep;
-	}
-	else
-	{
-		endpoints_and_weights ei1, ei2;
-		compute_endpoints_and_ideal_weights_2_planes(bsd, pt, blk, ewb, separate_component, &ei1, &ei2);
-		merge_endpoints(&(ei1.ep), &(ei2.ep), separate_component, &ep);
-	}
-
-	for (int i = 0; i < partition_count; i++)
-	{
-		partition_metrics& pm = pms[i];
-
-		// TODO: Can we skip rgb_luma_lines for LDR images?
-		line3 uncor_rgb_lines;
-		line3 samec_rgb_lines;	// for LDR-RGB-scale
-		line3 rgb_luma_lines;	// for HDR-RGB-scale
-
-		processed_line3 uncor_rgb_plines;
-		processed_line3 samec_rgb_plines;	// for LDR-RGB-scale
-		processed_line3 rgb_luma_plines;	// for HDR-RGB-scale
-		processed_line3 luminance_plines;
-
-		float uncorr_rgb_error;
-		float samechroma_rgb_error;
-		float rgb_luma_error;
-		float luminance_rgb_error;
-		float alpha_drop_error;
-
-		vfloat4 csf = pm.color_scale;
-		csf.set_lane<3>(0.0f);
-		vfloat4 csfn = normalize(csf);
-
-		vfloat4 icsf = pm.icolor_scale;
-		icsf.set_lane<3>(0.0f);
-
-		uncor_rgb_lines.a = pm.avg;
-		uncor_rgb_lines.b = normalize_safe(pm.dir.swz<0, 1, 2>(), csfn);
-
-		samec_rgb_lines.a = vfloat4::zero();
-		samec_rgb_lines.b = normalize_safe(pm.avg.swz<0, 1, 2>(), csfn);
-
-		rgb_luma_lines.a = pm.avg;
-		rgb_luma_lines.b = csfn;
-
-		uncor_rgb_plines.amod = (uncor_rgb_lines.a - uncor_rgb_lines.b * dot3(uncor_rgb_lines.a, uncor_rgb_lines.b)) * icsf;
-		uncor_rgb_plines.bs   = uncor_rgb_lines.b * csf;
-		uncor_rgb_plines.bis  = uncor_rgb_lines.b * icsf;
-
-		// Same chroma always goes though zero, so this is simpler than the others
-		samec_rgb_plines.amod = vfloat4::zero();
-		samec_rgb_plines.bs   = samec_rgb_lines.b * csf;
-		samec_rgb_plines.bis  = samec_rgb_lines.b * icsf;
-
-		rgb_luma_plines.amod = (rgb_luma_lines.a - rgb_luma_lines.b * dot3(rgb_luma_lines.a, rgb_luma_lines.b)) * icsf;
-		rgb_luma_plines.bs   = rgb_luma_lines.b * csf;
-		rgb_luma_plines.bis  = rgb_luma_lines.b * icsf;
-
-		// Luminance always goes though zero, so this is simpler than the others
-		luminance_plines.amod = vfloat4::zero();
-		luminance_plines.bs   = csfn * csf;
-		luminance_plines.bis  = csfn * icsf;
-
-		compute_error_squared_rgb_single_partition(
-		    i, bsd, pt, blk, ewb,
-		    &uncor_rgb_plines, &uncorr_rgb_error,
-		    &samec_rgb_plines, &samechroma_rgb_error,
-		    &rgb_luma_plines,  &rgb_luma_error,
-		    &luminance_plines, &luminance_rgb_error,
-		                       &alpha_drop_error);
-
-		// Determine if we can offset encode RGB lanes
-		vfloat4 endpt0 = ep.endpt0[i];
-		vfloat4 endpt1 = ep.endpt1[i];
-		vfloat4 endpt_diff = abs(endpt1 - endpt0);
-		vmask4 endpt_can_offset = endpt_diff < vfloat4(0.12f * 65535.0f);
-		bool can_offset_encode = (mask(endpt_can_offset) & 0x7) == 0x7;
-
-		// Determine if we can blue contract encode RGB lanes
-		vfloat4 endpt_diff_bc(
-			endpt0.lane<0>() + (endpt0.lane<0>() - endpt0.lane<2>()),
-			endpt1.lane<0>() + (endpt1.lane<0>() - endpt1.lane<2>()),
-			endpt0.lane<1>() + (endpt0.lane<1>() - endpt0.lane<2>()),
-			endpt1.lane<1>() + (endpt1.lane<1>() - endpt1.lane<2>())
-		);
-
-		vmask4 endpt_can_bc_lo = endpt_diff_bc > vfloat4(0.01f * 65535.0f);
-		vmask4 endpt_can_bc_hi = endpt_diff_bc < vfloat4(0.99f * 65535.0f);
-		bool can_blue_contract = (mask(endpt_can_bc_lo & endpt_can_bc_hi) & 0x7) == 0x7;
-
-		// Store out the settings
-		eci[i].rgb_scale_error = (samechroma_rgb_error - uncorr_rgb_error) * 0.7f;	// empirical
-		eci[i].rgb_luma_error  = (rgb_luma_error - uncorr_rgb_error) * 1.5f;	// wild guess
-		eci[i].luminance_error = (luminance_rgb_error - uncorr_rgb_error) * 3.0f;	// empirical
-		eci[i].alpha_drop_error = alpha_drop_error * 3.0f;
-		eci[i].can_offset_encode = can_offset_encode;
-		eci[i].can_blue_contract = can_blue_contract;
-	}
-}
-
-#endif
diff --git a/libkram/astc-encoder/astcenc_entry.cpp b/libkram/astc-encoder/astcenc_entry.cpp
index 32b339ec..1de2c3c9 100644
--- a/libkram/astc-encoder/astcenc_entry.cpp
+++ b/libkram/astc-encoder/astcenc_entry.cpp
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2021 Arm Limited
+// Copyright 2011-2022 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -27,14 +27,115 @@
 #include "astcenc_internal.h"
 #include "astcenc_diagnostic_trace.h"
 
-using namespace NAMESPACE_STL;
+/**
+ * @brief Record of the quality tuning parameter values.
+ *
+ * See the @c astcenc_config structure for detailed parameter documentation.
+ *
+ * Note that the mse_overshoot entries are scaling factors relative to the base MSE to hit db_limit.
+ * A 20% overshoot is harder to hit for a higher base db_limit, so we may actually use lower ratios
+ * for the more through search presets because the underlying db_limit is so much higher.
+ */
+struct astcenc_preset_config
+{
+	float quality;
+	unsigned int tune_partition_count_limit;
+	unsigned int tune_partition_index_limit;
+	unsigned int tune_block_mode_limit;
+	unsigned int tune_refinement_limit;
+	unsigned int tune_candidate_limit;
+	float tune_db_limit_a_base;
+	float tune_db_limit_b_base;
+	float tune_mode0_mse_overshoot;
+	float tune_refinement_mse_overshoot;
+	float tune_2_partition_early_out_limit_factor;
+	float tune_3_partition_early_out_limit_factor;
+	float tune_2_plane_early_out_limit_correlation;
+	unsigned int tune_low_weight_count_limit;
+};
+
+
+/**
+ * @brief The static quality presets that are built-in for high bandwidth
+ * presets (x < 25 texels per block).
+ */
+static const std::array<astcenc_preset_config, 5> preset_configs_high {{
+	{
+		ASTCENC_PRE_FASTEST,
+		2, 8, 40, 2, 2, 85.2f, 63.2f, 3.5f, 3.5f, 1.0f, 1.0f, 0.5f, 25
+	}, {
+		ASTCENC_PRE_FAST,
+		3, 12, 55, 3, 3, 85.2f, 63.2f, 3.5f, 3.5f, 1.0f, 1.1f, 0.65f, 20
+	}, {
+		ASTCENC_PRE_MEDIUM,
+		4, 26, 76, 3, 3 , 95.0f, 70.0f, 2.5f, 2.5f, 1.2f, 1.25f, 0.85f, 16
+	}, {
+		ASTCENC_PRE_THOROUGH,
+		4, 76, 93, 4, 4, 105.0f, 77.0f, 10.0f, 10.0f, 2.5f, 1.25f, 0.95f, 12
+	}, {
+		ASTCENC_PRE_EXHAUSTIVE,
+		4, 1024, 100, 4, 4, 200.0f, 200.0f, 10.0f, 10.0f, 10.0f, 10.0f, 0.99f, 0
+	}
+}};
+
+
+/**
+ * @brief The static quality presets that are built-in for medium bandwidth
+ * presets (25 <= x < 64 texels per block).
+ */
+static const std::array<astcenc_preset_config, 5> preset_configs_mid {{
+	{
+		ASTCENC_PRE_FASTEST,
+		2, 8, 40, 2, 2, 85.2f, 63.2f, 3.5f, 3.5f, 1.0f, 1.0f, 0.5f, 20
+	}, {
+		ASTCENC_PRE_FAST,
+		3, 12, 55, 3, 3, 85.2f, 63.2f, 3.5f, 3.5f, 1.0f, 1.1f, 0.5f, 16
+	}, {
+		ASTCENC_PRE_MEDIUM,
+		4, 26, 76, 3, 3, 95.0f, 70.0f, 3.0f, 3.0f, 1.2f, 1.25f, 0.75f, 14
+	}, {
+		ASTCENC_PRE_THOROUGH,
+		4, 76, 93, 4, 4, 105.0f, 77.0f, 10.0f, 10.0f, 2.5f, 1.25f, 0.95f, 10
+	}, {
+		ASTCENC_PRE_EXHAUSTIVE,
+		4, 1024, 100, 4, 4, 200.0f, 200.0f, 10.0f, 10.0f, 10.0f, 10.0f, 0.99f, 0
+	}
+}};
+
 
-// The ASTC codec is written with the assumption that a float threaded through
-// the "if32" union will in fact be stored and reloaded as a 32-bit IEEE-754 single-precision
-// float, stored with round-to-nearest rounding. This is always the case in an
-// IEEE-754 compliant system, however not every system is actually IEEE-754 compliant
-// in the first place. As such, we run a quick test to check that this is actually the case
-// (e.g. gcc on 32-bit x86 will typically fail unless -msse2 -mfpmath=sse2 is specified).
+/**
+ * @brief The static quality presets that are built-in for low bandwidth
+ * presets (64 <= x texels per block).
+ */
+static const std::array<astcenc_preset_config, 5> preset_configs_low {{
+	{
+		ASTCENC_PRE_FASTEST,
+		2, 6, 38, 2, 2, 85.0f, 63.0f, 3.5f, 3.5f, 1.0f, 1.0f, 0.5f, 20
+	}, {
+		ASTCENC_PRE_FAST,
+		3, 10, 53, 3, 3, 85.0f, 63.0f, 3.5f, 3.5f, 1.0f, 1.1f, 0.5f, 16
+	}, {
+		ASTCENC_PRE_MEDIUM,
+		3, 26, 76, 3, 3, 95.0f, 70.0f, 3.5f, 3.5f, 1.2f, 1.25f, 0.65f, 12
+	}, {
+		ASTCENC_PRE_THOROUGH,
+		4, 75, 92, 4, 4, 105.0f, 77.0f, 10.0f, 10.0f, 2.5f, 1.25f, 0.85f, 10
+	}, {
+		ASTCENC_PRE_EXHAUSTIVE,
+		4, 1024, 100, 4, 4, 200.0f, 200.0f, 10.0f, 10.0f, 10.0f, 10.0f, 0.99f, 0
+	}
+}};
+
+/**
+ * @brief Validate CPU floating point meets assumptions made in the codec.
+ *
+ * The codec is written with the assumption that a float threaded through the @c if32 union will be
+ * stored and reloaded as a 32-bit IEEE-754 float with round-to-nearest rounding. This is always the
+ * case in an IEEE-754 compliant system, however not every system or compilation mode is actually
+ * IEEE-754 compliant. This normally fails if the code is compiled with fast math enabled.
+ *
+ * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
+ */
 static astcenc_error validate_cpu_float()
 {
 	if32 p;
@@ -50,6 +151,15 @@ static astcenc_error validate_cpu_float()
 	return ASTCENC_SUCCESS;
 }
 
+/**
+ * @brief Validate CPU ISA support meets the requirements of this build of the library.
+ *
+ * Each library build is statically compiled for a particular set of CPU ISA features, such as the
+ * SIMD support or other ISA extensions such as POPCNT. This function checks that the host CPU
+ * actually supports everything this build needs.
+ *
+ * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
+ */
 static astcenc_error validate_cpu_isa()
 {
 	#if ASTCENC_SSE >= 41
@@ -84,52 +194,12 @@ static astcenc_error validate_cpu_isa()
 }
 
 /**
- * @brief Record of the quality tuning parameter values.
+ * @brief Validate config profile.
  *
- * See the @c astcenc_config structure for detailed parameter documentation.
+ * @param profile   The profile to check.
  *
- * Note that the mse_overshoot entries are scaling factors relative to the
- * base MSE to hit db_limit. A 20% overshoot is harder to hit for a higher
- * base db_limit, so we may actually use lower ratios for the more through
- * search presets because the underlying db_limit is so much higher.
+ * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
  */
-struct astcenc_preset_config {
-	float quality;
-	unsigned int tune_partition_count_limit;
-	unsigned int tune_partition_index_limit;
-	unsigned int tune_block_mode_limit;
-	unsigned int tune_refinement_limit;
-	unsigned int tune_candidate_limit;
-	float tune_db_limit_a_base;
-	float tune_db_limit_b_base;
-	float tune_mode0_mse_overshoot;
-	float tune_refinement_mse_overshoot;
-	float tune_partition_early_out_limit;
-	float tune_two_plane_early_out_limit;
-};
-
-/**
- * @brief The static quality presets that are built-in.
- */
-static const array<astcenc_preset_config, 5> preset_configs {{
-	{
-		ASTCENC_PRE_FASTEST,
-		4, 2, 30, 1, 1, 79, 57, 2.0f, 2.0f, 1.0f, 0.5f
-	}, {
-		ASTCENC_PRE_FAST,
-		4, 4, 50, 2, 2, 85, 63, 3.5f, 3.5f, 1.0f, 0.5f
-	}, {
-		ASTCENC_PRE_MEDIUM,
-		4, 25, 75, 2, 2,  95, 70, 1.75f, 1.75f, 1.2f, 0.75f
-	}, {
-		ASTCENC_PRE_THOROUGH,
-		4, 75, 92, 4, 4, 105, 77, 10.0f, 10.0f, 2.5f, 0.95f
-	}, {
-		ASTCENC_PRE_EXHAUSTIVE,
-		4, 1024, 100, 4, 4, 200, 200, 10.0f, 10.0f, 10.0f, 0.99f
-	}
-}};
-
 static astcenc_error validate_profile(
 	astcenc_profile profile
 ) {
@@ -147,20 +217,45 @@ static astcenc_error validate_profile(
 	}
 }
 
+/**
+ * @brief Validate block size.
+ *
+ * @param block_x   The block x dimensions.
+ * @param block_y   The block y dimensions.
+ * @param block_z   The block z dimensions.
+ *
+ * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
+ */
 static astcenc_error validate_block_size(
 	unsigned int block_x,
 	unsigned int block_y,
 	unsigned int block_z
 ) {
-	if (((block_z <= 1) && is_legal_2d_block_size(block_x, block_y)) ||
-	    ((block_z >= 2) && is_legal_3d_block_size(block_x, block_y, block_z)))
+	// Test if this is a legal block size at all
+	bool is_legal = (((block_z <= 1) && is_legal_2d_block_size(block_x, block_y)) ||
+	                 ((block_z >= 2) && is_legal_3d_block_size(block_x, block_y, block_z)));
+	if (!is_legal)
 	{
-		return ASTCENC_SUCCESS;
+		return ASTCENC_ERR_BAD_BLOCK_SIZE;
 	}
 
-	return ASTCENC_ERR_BAD_BLOCK_SIZE;
+	// Test if this build has sufficient capacity for this block size
+	bool have_capacity = (block_x * block_y * block_z) <= BLOCK_MAX_TEXELS;
+	if (!have_capacity)
+	{
+		return ASTCENC_ERR_NOT_IMPLEMENTED;
+	}
+
+	return ASTCENC_SUCCESS;
 }
 
+/**
+ * @brief Validate flags.
+ *
+ * @param flags   The flags to check.
+ *
+ * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
+ */
 static astcenc_error validate_flags(
 	unsigned int flags
 ) {
@@ -184,6 +279,14 @@ static astcenc_error validate_flags(
 }
 
 #if !defined(ASTCENC_DECOMPRESS_ONLY)
+
+/**
+ * @brief Validate single channel compression swizzle.
+ *
+ * @param swizzle   The swizzle to check.
+ *
+ * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
+ */
 static astcenc_error validate_compression_swz(
 	astcenc_swz swizzle
 ) {
@@ -202,8 +305,15 @@ static astcenc_error validate_compression_swz(
 	}
 }
 
+/**
+ * @brief Validate overall compression swizzle.
+ *
+ * @param swizzle   The swizzle to check.
+ *
+ * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
+ */
 static astcenc_error validate_compression_swizzle(
-	astcenc_swizzle swizzle
+	const astcenc_swizzle& swizzle
 ) {
 	if (validate_compression_swz(swizzle.r) ||
 	    validate_compression_swz(swizzle.g) ||
@@ -217,6 +327,13 @@ static astcenc_error validate_compression_swizzle(
 }
 #endif
 
+/**
+ * @brief Validate single channel decompression swizzle.
+ *
+ * @param swizzle   The swizzle to check.
+ *
+ * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
+ */
 static astcenc_error validate_decompression_swz(
 	astcenc_swz swizzle
 ) {
@@ -237,8 +354,15 @@ static astcenc_error validate_decompression_swz(
 	}
 }
 
+/**
+ * @brief Validate overall decompression swizzle.
+ *
+ * @param swizzle   The swizzle to check.
+ *
+ * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
+ */
 static astcenc_error validate_decompression_swizzle(
-	astcenc_swizzle swizzle
+	const astcenc_swizzle& swizzle
 ) {
 	if (validate_decompression_swz(swizzle.r) ||
 	    validate_decompression_swz(swizzle.g) ||
@@ -256,10 +380,14 @@ static astcenc_error validate_decompression_swizzle(
  *
  * This function can respond in two ways:
  *
- *   * Numerical inputs that have valid ranges are clamped to those valid
- *     ranges. No error is thrown for out-of-range inputs in this case.
- *   * Numerical inputs and logic inputs are are logically invalid and which
- *     make no sense algorithmically will return an error.
+ *   * Numerical inputs that have valid ranges are clamped to those valid ranges. No error is thrown
+ *     for out-of-range inputs in this case.
+ *   * Numerical inputs and logic inputs are are logically invalid and which make no sense
+ *     algorithmically will return an error.
+ *
+ * @param[in,out] config   The input compressor configuration.
+ *
+ * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
  */
 static astcenc_error validate_config(
 	astcenc_config &config
@@ -292,30 +420,19 @@ static astcenc_error validate_config(
 	}
 #endif
 
-	config.v_rgba_mean_stdev_mix = astc::max(config.v_rgba_mean_stdev_mix, 0.0f);
-	config.v_rgb_power = astc::max(config.v_rgb_power, 0.0f);
-	config.v_rgb_base = astc::max(config.v_rgb_base, 0.0f);
-	config.v_rgb_mean = astc::max(config.v_rgb_mean, 0.0f);
-	config.v_rgb_stdev = astc::max(config.v_rgb_stdev, 0.0f);
-	config.v_a_power = astc::max(config.v_a_power, 0.0f);
-	config.v_a_base = astc::max(config.v_a_base, 0.0f);
-	config.v_a_mean = astc::max(config.v_a_mean, 0.0f);
-	config.v_a_stdev = astc::max(config.v_a_stdev, 0.0f);
-
-	config.b_deblock_weight = astc::max(config.b_deblock_weight, 0.0f);
-
 	config.rgbm_m_scale = astc::max(config.rgbm_m_scale, 1.0f);
 
 	config.tune_partition_count_limit = astc::clamp(config.tune_partition_count_limit, 1u, 4u);
-	config.tune_partition_index_limit = astc::clamp(config.tune_partition_index_limit, 1u, (unsigned int)PARTITION_COUNT);
+	config.tune_partition_index_limit = astc::clamp(config.tune_partition_index_limit, 1u, (unsigned int)BLOCK_MAX_PARTITIONINGS);
 	config.tune_block_mode_limit = astc::clamp(config.tune_block_mode_limit, 1u, 100u);
 	config.tune_refinement_limit = astc::max(config.tune_refinement_limit, 1u);
 	config.tune_candidate_limit = astc::clamp(config.tune_candidate_limit, 1u, TUNE_MAX_TRIAL_CANDIDATES);
 	config.tune_db_limit = astc::max(config.tune_db_limit, 0.0f);
 	config.tune_mode0_mse_overshoot = astc::max(config.tune_mode0_mse_overshoot, 1.0f);
 	config.tune_refinement_mse_overshoot = astc::max(config.tune_refinement_mse_overshoot, 1.0f);
-	config.tune_partition_early_out_limit = astc::max(config.tune_partition_early_out_limit, 0.0f);
-	config.tune_two_plane_early_out_limit = astc::max(config.tune_two_plane_early_out_limit, 0.0f);
+	config.tune_2_partition_early_out_limit_factor = astc::max(config.tune_2_partition_early_out_limit_factor, 0.0f);
+	config.tune_3_partition_early_out_limit_factor = astc::max(config.tune_3_partition_early_out_limit_factor, 0.0f);
+	config.tune_2_plane_early_out_limit_correlation = astc::max(config.tune_2_plane_early_out_limit_correlation, 0.0f);
 
 	// Specifying a zero weight color component is not allowed; force to small value
 	float max_weight = astc::max(astc::max(config.cw_r_weight, config.cw_g_weight),
@@ -337,6 +454,7 @@ static astcenc_error validate_config(
 	return ASTCENC_SUCCESS;
 }
 
+/* See header for documentation. */
 astcenc_error astcenc_config_init(
 	astcenc_profile profile,
 	unsigned int block_x,
@@ -367,22 +485,36 @@ astcenc_error astcenc_config_init(
 	float texels = static_cast<float>(block_x * block_y * block_z);
 	float ltexels = logf(texels) / logf(10.0f);
 
-	// Process the performance quality level or preset; note that this must be
-	// done before we process any additional settings, such as color profile
-	// and flags, which may replace some of these settings with more use case
-	// tuned values
+	// Process the performance quality level or preset; note that this must be done before we
+	// process any additional settings, such as color profile and flags, which may replace some of
+	// these settings with more use case tuned values
 	if (quality < ASTCENC_PRE_FASTEST ||
 	    quality > ASTCENC_PRE_EXHAUSTIVE)
 	{
 		return ASTCENC_ERR_BAD_QUALITY;
 	}
 
+	static const std::array<astcenc_preset_config, 5>* preset_configs;
+	int texels_int = block_x * block_y * block_z;
+	if (texels_int < 25)
+	{
+		preset_configs = &preset_configs_high;
+	}
+	else if (texels_int < 64)
+	{
+		preset_configs = &preset_configs_mid;
+	}
+	else
+	{
+		preset_configs = &preset_configs_low;
+	}
+
 	// Determine which preset to use, or which pair to interpolate
 	size_t start;
 	size_t end;
-	for (end = 0; end < preset_configs.size(); end++)
+	for (end = 0; end < preset_configs->size(); end++)
 	{
-		if (preset_configs[end].quality >= quality)
+		if ((*preset_configs)[end].quality >= quality)
 		{
 			break;
 		}
@@ -393,26 +525,28 @@ astcenc_error astcenc_config_init(
 	// Start and end node are the same - so just transfer the values.
 	if (start == end)
 	{
-		config.tune_partition_count_limit = preset_configs[start].tune_partition_count_limit;
-		config.tune_partition_index_limit = preset_configs[start].tune_partition_index_limit;
-		config.tune_block_mode_limit = preset_configs[start].tune_block_mode_limit;
-		config.tune_refinement_limit = preset_configs[start].tune_refinement_limit;
-		config.tune_candidate_limit = astc::min(preset_configs[start].tune_candidate_limit,
+		config.tune_partition_count_limit = (*preset_configs)[start].tune_partition_count_limit;
+		config.tune_partition_index_limit = (*preset_configs)[start].tune_partition_index_limit;
+		config.tune_block_mode_limit = (*preset_configs)[start].tune_block_mode_limit;
+		config.tune_refinement_limit = (*preset_configs)[start].tune_refinement_limit;
+		config.tune_candidate_limit = astc::min((*preset_configs)[start].tune_candidate_limit,
 		                                        TUNE_MAX_TRIAL_CANDIDATES);
-		config.tune_db_limit = astc::max(preset_configs[start].tune_db_limit_a_base - 35 * ltexels,
-		                                 preset_configs[start].tune_db_limit_b_base - 19 * ltexels);
+		config.tune_db_limit = astc::max((*preset_configs)[start].tune_db_limit_a_base - 35 * ltexels,
+		                                 (*preset_configs)[start].tune_db_limit_b_base - 19 * ltexels);
 
-		config.tune_mode0_mse_overshoot = preset_configs[start].tune_mode0_mse_overshoot;
-		config.tune_refinement_mse_overshoot = preset_configs[start].tune_refinement_mse_overshoot;
+		config.tune_mode0_mse_overshoot = (*preset_configs)[start].tune_mode0_mse_overshoot;
+		config.tune_refinement_mse_overshoot = (*preset_configs)[start].tune_refinement_mse_overshoot;
 
-		config.tune_partition_early_out_limit = preset_configs[start].tune_partition_early_out_limit;
-		config.tune_two_plane_early_out_limit = preset_configs[start].tune_two_plane_early_out_limit;
+		config.tune_2_partition_early_out_limit_factor = (*preset_configs)[start].tune_2_partition_early_out_limit_factor;
+		config.tune_3_partition_early_out_limit_factor =(*preset_configs)[start].tune_3_partition_early_out_limit_factor;
+		config.tune_2_plane_early_out_limit_correlation = (*preset_configs)[start].tune_2_plane_early_out_limit_correlation;
+		config.tune_low_weight_count_limit = (*preset_configs)[start].tune_low_weight_count_limit;
 	}
 	// Start and end node are not the same - so interpolate between them
 	else
 	{
-		auto& node_a = preset_configs[start];
-		auto& node_b = preset_configs[end];
+		auto& node_a = (*preset_configs)[start];
+		auto& node_b = (*preset_configs)[end];
 
 		float wt_range = node_b.quality - node_a.quality;
 		assert(wt_range > 0);
@@ -439,18 +573,16 @@ astcenc_error astcenc_config_init(
 		config.tune_mode0_mse_overshoot = LERP(tune_mode0_mse_overshoot);
 		config.tune_refinement_mse_overshoot = LERP(tune_refinement_mse_overshoot);
 
-		config.tune_partition_early_out_limit = LERP(tune_partition_early_out_limit);
-		config.tune_two_plane_early_out_limit = LERP(tune_two_plane_early_out_limit);
-
+		config.tune_2_partition_early_out_limit_factor = LERP(tune_2_partition_early_out_limit_factor);
+		config.tune_3_partition_early_out_limit_factor = LERP(tune_3_partition_early_out_limit_factor);
+		config.tune_2_plane_early_out_limit_correlation = LERP(tune_2_plane_early_out_limit_correlation);
+		config.tune_low_weight_count_limit = LERPI(tune_low_weight_count_limit);
 		#undef LERP
 		#undef LERPI
 		#undef LERPUI
 	}
 
 	// Set heuristics to the defaults for each color profile
-	config.v_rgba_radius = 0;
-	config.v_rgba_mean_stdev_mix = 0.0f;
-
 	config.cw_r_weight = 1.0f;
 	config.cw_g_weight = 1.0f;
 	config.cw_b_weight = 1.0f;
@@ -468,40 +600,9 @@ astcenc_error astcenc_config_init(
 	{
 	case ASTCENC_PRF_LDR:
 	case ASTCENC_PRF_LDR_SRGB:
-		config.v_rgb_power = 1.0f;
-		config.v_rgb_base = 1.0f;
-		config.v_rgb_mean = 0.0f;
-		config.v_rgb_stdev = 0.0f;
-
-		config.v_a_power = 1.0f;
-		config.v_a_base = 1.0f;
-		config.v_a_mean = 0.0f;
-		config.v_a_stdev = 0.0f;
 		break;
 	case ASTCENC_PRF_HDR_RGB_LDR_A:
-		config.v_rgb_power = 0.75f;
-		config.v_rgb_base = 0.0f;
-		config.v_rgb_mean = 1.0f;
-		config.v_rgb_stdev = 0.0f;
-
-		config.v_a_power = 1.0f;
-		config.v_a_base = 0.05f;
-		config.v_a_mean = 0.0f;
-		config.v_a_stdev = 0.0f;
-
-		config.tune_db_limit = 999.0f;
-		break;
 	case ASTCENC_PRF_HDR:
-		config.v_rgb_power = 0.75f;
-		config.v_rgb_base = 0.0f;
-		config.v_rgb_mean = 1.0f;
-		config.v_rgb_stdev = 0.0f;
-
-		config.v_a_power = 0.75f;
-		config.v_a_base = 0.0f;
-		config.v_a_mean = 1.0f;
-		config.v_a_stdev = 0.0f;
-
 		config.tune_db_limit = 999.0f;
 		break;
 	default:
@@ -517,52 +618,63 @@ astcenc_error astcenc_config_init(
 
 	if (flags & ASTCENC_FLG_MAP_NORMAL)
 	{
+		// Normal map encoding uses L+A blocks, so allow one more partitioning
+		// than normal. We need need fewer bits for endpoints, so more likely
+		// to be able to use more partitions than an RGB/RGBA block
+		config.tune_partition_count_limit = astc::min(config.tune_partition_count_limit + 1u, 4u);
+
 		config.cw_g_weight = 0.0f;
 		config.cw_b_weight = 0.0f;
-		config.tune_partition_early_out_limit = 1000.0f;
-		config.tune_two_plane_early_out_limit = 0.99f;
+		config.tune_2_partition_early_out_limit_factor *= 1.5f;
+		config.tune_3_partition_early_out_limit_factor *= 1.5f;
+		config.tune_2_plane_early_out_limit_correlation = 0.99f;
 
-		if (flags & ASTCENC_FLG_USE_PERCEPTUAL)
-		{
-			config.b_deblock_weight = 1.8f;
-			config.v_rgba_radius = 3;
-			config.v_rgba_mean_stdev_mix = 0.0f;
-			config.v_rgb_mean = 0.0f;
-			config.v_rgb_stdev = 50.0f;
-			config.v_a_mean = 0.0f;
-			config.v_a_stdev = 50.0f;
-		}
+		// Normals are prone to blocking artifacts on smooth curves
+		// so force compressor to try harder here ...
+		config.tune_db_limit *= 1.03f;
 	}
-
-	if (flags & ASTCENC_FLG_MAP_MASK)
+	else if (flags & ASTCENC_FLG_MAP_MASK)
 	{
-		config.v_rgba_radius = 3;
-		config.v_rgba_mean_stdev_mix = 0.03f;
-		config.v_rgb_mean = 0.0f;
-		config.v_rgb_stdev = 25.0f;
-		config.v_a_mean = 0.0f;
-		config.v_a_stdev = 25.0f;
+		// Masks are prone to blocking artifacts on mask edges
+		// so force compressor to try harder here ...
+		config.tune_db_limit *= 1.03f;
 	}
-
-	if (flags & ASTCENC_FLG_MAP_RGBM)
+	else if (flags & ASTCENC_FLG_MAP_RGBM)
 	{
 		config.rgbm_m_scale = 5.0f;
 		config.cw_a_weight = 2.0f * config.rgbm_m_scale;
 	}
-
+	else // (This is color data)
+	{
+		// This is a very basic perceptual metric for RGB color data, which weights error
+		// significance by the perceptual luminance contribution of each color channel. For
+		// luminance the usual weights to compute luminance from a linear RGB value are as
+		// follows:
+		//
+		//     l = r * 0.3 + g * 0.59 + b * 0.11
+		//
+		// ... but we scale these up to keep a better balance between color and alpha. Note
+		// that if the content is using alpha we'd recommend using the -a option to weight
+		// the color conribution by the alpha transparency.
+		if (flags & ASTCENC_FLG_USE_PERCEPTUAL)
+		{
+			config.cw_r_weight = 0.30f * 2.25f;
+			config.cw_g_weight = 0.59f * 2.25f;
+			config.cw_b_weight = 0.11f * 2.25f;
+		}
+	}
 	config.flags = flags;
 
 	return ASTCENC_SUCCESS;
 }
 
+/* See header for documentation. */
 astcenc_error astcenc_context_alloc(
 	const astcenc_config* configp,
 	unsigned int thread_count,
 	astcenc_context** context
 ) {
 	astcenc_error status;
-	astcenc_context* ctx = nullptr;
-	block_size_descriptor* bsd = nullptr;
 	const astcenc_config& config = *configp;
 
 	status = validate_cpu_float();
@@ -590,14 +702,12 @@ astcenc_error astcenc_context_alloc(
 	}
 #endif
 
-	ctx = new astcenc_context;
+	astcenc_context* ctx = aligned_malloc<astcenc_context>(sizeof(astcenc_context), ASTCENC_VECALIGN);
 	ctx->thread_count = thread_count;
 	ctx->config = config;
 	ctx->working_buffers = nullptr;
 
 	// These are allocated per-compress, as they depend on image size
-	ctx->input_averages = nullptr;
-	ctx->input_variances = nullptr;
 	ctx->input_alpha_averages = nullptr;
 
 	// Copy the config first and validate the copy (we may modify it)
@@ -608,19 +718,17 @@ astcenc_error astcenc_context_alloc(
 		return status;
 	}
 
-	bsd = new block_size_descriptor;
 	bool can_omit_modes = config.flags & ASTCENC_FLG_SELF_DECOMPRESS_ONLY;
 	init_block_size_descriptor(config.block_x, config.block_y, config.block_z,
-	                           can_omit_modes, static_cast<float>(config.tune_block_mode_limit) / 100.0f, bsd);
-	ctx->bsd = bsd;
+	                           can_omit_modes,
+	                           config.tune_partition_count_limit,
+	                           static_cast<float>(config.tune_block_mode_limit) / 100.0f,
+	                           ctx->bsd);
 
 #if !defined(ASTCENC_DECOMPRESS_ONLY)
 	// Do setup only needed by compression
 	if (!(status & ASTCENC_FLG_DECOMPRESS_ONLY))
 	{
-		// Expand deblock supression into a weight scale per texel in the block
-		expand_deblock_weights(*ctx);
-
 		// Turn a dB limit into a per-texel error for faster use later
 		if ((ctx->config.profile == ASTCENC_PRF_LDR) || (ctx->config.profile == ASTCENC_PRF_LDR_SRGB))
 		{
@@ -631,12 +739,12 @@ astcenc_error astcenc_context_alloc(
 			ctx->config.tune_db_limit = 0.0f;
 		}
 
-		size_t worksize = sizeof(compress_symbolic_block_buffers) * thread_count;
-		ctx->working_buffers = aligned_malloc<compress_symbolic_block_buffers>(worksize , 32);
+		size_t worksize = sizeof(compression_working_buffers) * thread_count;
+		ctx->working_buffers = aligned_malloc<compression_working_buffers>(worksize, ASTCENC_VECALIGN);
+		static_assert((sizeof(compression_working_buffers) % ASTCENC_VECALIGN) == 0,
+		              "compression_working_buffers size must be multiple of vector alignment");
 		if (!ctx->working_buffers)
 		{
-			term_block_size_descriptor(bsd);
-			delete bsd;
 			delete ctx;
 			*context = nullptr;
 			return ASTCENC_ERR_OUT_OF_MEM;
@@ -658,45 +766,54 @@ astcenc_error astcenc_context_alloc(
 
 	*context = ctx;
 
-	// TODO: Currently static memory; should move to context memory
 #if !defined(ASTCENC_DECOMPRESS_ONLY)
 	prepare_angular_tables();
 #endif
-	build_quant_mode_table();
 
 	return ASTCENC_SUCCESS;
 }
 
+/* See header dor documentation. */
 void astcenc_context_free(
 	astcenc_context* ctx
 ) {
 	if (ctx)
 	{
-		aligned_free<compress_symbolic_block_buffers>(ctx->working_buffers);
-		term_block_size_descriptor(ctx->bsd);
+		aligned_free<compression_working_buffers>(ctx->working_buffers);
 #if defined(ASTCENC_DIAGNOSTICS)
 		delete ctx->trace_log;
 #endif
-		delete ctx->bsd;
-		delete ctx;
+		aligned_free<astcenc_context>(ctx);
 	}
 }
 
 #if !defined(ASTCENC_DECOMPRESS_ONLY)
+
+/**
+ * @brief Compress an image, after any preflight has completed.
+ *
+ * @param[out] ctx            The compressor context.
+ * @param      thread_index   The thread index.
+ * @param      image          The intput image.
+ * @param      swizzle        The input swizzle.
+ * @param[out] buffer         The output array for the compressed data.
+ */
 static void compress_image(
 	astcenc_context& ctx,
 	unsigned int thread_index,
 	const astcenc_image& image,
-	astcenc_swizzle swizzle,
+	const astcenc_swizzle& swizzle,
 	uint8_t* buffer
 ) {
-	const block_size_descriptor *bsd = ctx.bsd;
+	const block_size_descriptor& bsd = ctx.bsd;
 	astcenc_profile decode_mode = ctx.config.profile;
-	imageblock blk;
 
-	int block_x = bsd->xdim;
-	int block_y = bsd->ydim;
-	int block_z = bsd->zdim;
+	image_block blk;
+
+	int block_x = bsd.xdim;
+	int block_y = bsd.ydim;
+	int block_z = bsd.zdim;
+	blk.texel_count = block_x * block_y * block_z;
 
 	int dim_x = image.dim_x;
 	int dim_y = image.dim_y;
@@ -710,7 +827,7 @@ static void compress_image(
 	int plane_blocks = xblocks * yblocks;
 
 	// Use preallocated scratch buffer
-	auto temp_buffers = &(ctx.working_buffers[thread_index]);
+	auto& temp_buffers = ctx.working_buffers[thread_index];
 
 	// Only the first thread actually runs the initializer
 	ctx.manage_compress.init(zblocks * yblocks * xblocks);
@@ -746,11 +863,9 @@ static void compress_image(
 				// SATs accumulate error, so don't test exactly zero. Test for
 				// less than 1 alpha in the expanded block footprint that
 				// includes the alpha radius.
-				int x_footprint = block_x +
-				                  2 * (ctx.config.a_scale_radius - 1);
+				int x_footprint = block_x + 2 * (ctx.config.a_scale_radius - 1);
 
-				int y_footprint = block_y +
-				                  2 * (ctx.config.a_scale_radius - 1);
+				int y_footprint = block_y + 2 * (ctx.config.a_scale_radius - 1);
 
 				float footprint = (float)(x_footprint * y_footprint);
 				float threshold = 0.9f / (255.0f * footprint);
@@ -775,33 +890,41 @@ static void compress_image(
 			// Fetch the full block for compression
 			if (use_full_block)
 			{
-				fetch_imageblock(decode_mode, image, &blk, bsd, x * block_x, y * block_y, z * block_z, swizzle);
+				fetch_image_block(decode_mode, image, blk, bsd, x * block_x, y * block_y, z * block_z, swizzle);
 			}
 			// Apply alpha scale RDO - substitute constant color block
 			else
 			{
 				blk.origin_texel = vfloat4::zero();
 				blk.data_min = vfloat4::zero();
-				blk.data_max = blk.data_min;
-				blk.grayscale = false;
+				blk.data_mean = vfloat4::zero();
+				blk.data_max = vfloat4::zero();
+				blk.grayscale = true;
 			}
 
+			// Populate the block channel weights
+			blk.channel_weight = vfloat4(ctx.config.cw_r_weight,
+			                             ctx.config.cw_g_weight,
+			                             ctx.config.cw_b_weight,
+			                             ctx.config.cw_a_weight);
+
 			int offset = ((z * yblocks + y) * xblocks + x) * 16;
 			uint8_t *bp = buffer + offset;
 			physical_compressed_block* pcb = reinterpret_cast<physical_compressed_block*>(bp);
-			symbolic_compressed_block scb;
-			compress_block(ctx, image, &blk, scb, *pcb, temp_buffers);
+			compress_block(ctx, blk, *pcb, temp_buffers);
 		}
 
 		ctx.manage_compress.complete_task_assignment(count);
 	}
 }
+
 #endif
 
+/* See header for documentation. */
 astcenc_error astcenc_compress_image(
 	astcenc_context* ctx,
 	astcenc_image* imagep,
-	astcenc_swizzle swizzle,
+	const astcenc_swizzle* swizzle,
 	uint8_t* data_out,
 	size_t data_len,
 	unsigned int thread_index
@@ -823,7 +946,7 @@ astcenc_error astcenc_compress_image(
 		return ASTCENC_ERR_BAD_CONTEXT;
 	}
 
-	status = validate_compression_swizzle(swizzle);
+	status = validate_compression_swizzle(*swizzle);
 	if (status != ASTCENC_SUCCESS)
 	{
 		return status;
@@ -849,47 +972,42 @@ astcenc_error astcenc_compress_image(
 		return ASTCENC_ERR_OUT_OF_MEM;
 	}
 
-	if (ctx->config.v_rgb_mean != 0.0f || ctx->config.v_rgb_stdev != 0.0f ||
-	    ctx->config.v_a_mean != 0.0f || ctx->config.v_a_stdev != 0.0f ||
-	    ctx->config.a_scale_radius != 0)
+	// If context thread count is one then implicitly reset
+	if (ctx->thread_count == 1)
+	{
+		astcenc_compress_reset(ctx);
+	}
+
+	if (ctx->config.a_scale_radius != 0)
 	{
 		// First thread to enter will do setup, other threads will subsequently
 		// enter the critical section but simply skip over the initialization
-		auto init_avg_var = [ctx, &image, swizzle]() {
+		auto init_avg = [ctx, &image, swizzle]() {
 			// Perform memory allocations for the destination buffers
 			size_t texel_count = image.dim_x * image.dim_y * image.dim_z;
-			ctx->input_averages = new vfloat4[texel_count];
-			ctx->input_variances = new vfloat4[texel_count];
 			ctx->input_alpha_averages = new float[texel_count];
 
-			return init_compute_averages_and_variances(
-				image, ctx->config.v_rgb_power, ctx->config.v_a_power,
-				ctx->config.v_rgba_radius, ctx->config.a_scale_radius, swizzle,
-				ctx->arg, ctx->ag);
+			return init_compute_averages(
+				image, ctx->config.a_scale_radius, *swizzle,
+				ctx->avg_preprocess_args);
 		};
 
 		// Only the first thread actually runs the initializer
-		ctx->manage_avg_var.init(init_avg_var);
+		ctx->manage_avg.init(init_avg);
 
 		// All threads will enter this function and dynamically grab work
-		compute_averages_and_variances(*ctx, ctx->ag);
+		compute_averages(*ctx, ctx->avg_preprocess_args);
 	}
 
-	// Wait for compute_averages_and_variances to complete before compressing
-	ctx->manage_avg_var.wait();
+	// Wait for compute_averages to complete before compressing
+	ctx->manage_avg.wait();
 
-	compress_image(*ctx, thread_index, image, swizzle, data_out);
+	compress_image(*ctx, thread_index, image, *swizzle, data_out);
 
 	// Wait for compress to complete before freeing memory
 	ctx->manage_compress.wait();
 
 	auto term_compress = [ctx]() {
-		delete[] ctx->input_averages;
-		ctx->input_averages = nullptr;
-
-		delete[] ctx->input_variances;
-		ctx->input_variances = nullptr;
-
 		delete[] ctx->input_alpha_averages;
 		ctx->input_alpha_averages = nullptr;
 	};
@@ -901,6 +1019,7 @@ astcenc_error astcenc_compress_image(
 #endif
 }
 
+/* See header for documentation. */
 astcenc_error astcenc_compress_reset(
 	astcenc_context* ctx
 ) {
@@ -913,18 +1032,19 @@ astcenc_error astcenc_compress_reset(
 		return ASTCENC_ERR_BAD_CONTEXT;
 	}
 
-	ctx->manage_avg_var.reset();
+	ctx->manage_avg.reset();
 	ctx->manage_compress.reset();
 	return ASTCENC_SUCCESS;
 #endif
 }
 
+/* See header for documentation. */
 astcenc_error astcenc_decompress_image(
 	astcenc_context* ctx,
 	const uint8_t* data,
 	size_t data_len,
 	astcenc_image* image_outp,
-	astcenc_swizzle swizzle,
+	const astcenc_swizzle* swizzle,
 	unsigned int thread_index
 ) {
 	astcenc_error status;
@@ -936,7 +1056,7 @@ astcenc_error astcenc_decompress_image(
 		return ASTCENC_ERR_BAD_PARAM;
 	}
 
-	status = validate_decompression_swizzle(swizzle);
+	status = validate_decompression_swizzle(*swizzle);
 	if (status != ASTCENC_SUCCESS)
 	{
 		return status;
@@ -960,7 +1080,14 @@ astcenc_error astcenc_decompress_image(
 		return ASTCENC_ERR_OUT_OF_MEM;
 	}
 
-	imageblock blk;
+	image_block blk;
+	blk.texel_count = block_x * block_y * block_z;
+
+	// If context thread count is one then implicitly reset
+	if (ctx->thread_count == 1)
+	{
+		astcenc_decompress_reset(ctx);
+	}
 
 	// Only the first thread actually runs the initializer
 	ctx->manage_decompress.init(zblocks * yblocks * xblocks);
@@ -988,14 +1115,14 @@ astcenc_error astcenc_decompress_image(
 			physical_compressed_block pcb = *(const physical_compressed_block*)bp;
 			symbolic_compressed_block scb;
 
-			physical_to_symbolic(*ctx->bsd, pcb, scb);
+			physical_to_symbolic(ctx->bsd, pcb, scb);
 
 			decompress_symbolic_block(ctx->config.profile, ctx->bsd,
 			                          x * block_x, y * block_y, z * block_z,
-			                          &scb, &blk);
+			                          scb, blk);
 
-			write_imageblock(image_out, &blk, ctx->bsd,
-			                 x * block_x, y * block_y, z * block_z, swizzle);
+			write_image_block(image_out, blk, ctx->bsd,
+			                  x * block_x, y * block_y, z * block_z, *swizzle);
 		}
 
 		ctx->manage_decompress.complete_task_assignment(count);
@@ -1004,6 +1131,7 @@ astcenc_error astcenc_decompress_image(
 	return ASTCENC_SUCCESS;
 }
 
+/* See header for documentation. */
 astcenc_error astcenc_decompress_reset(
 	astcenc_context* ctx
 ) {
@@ -1011,6 +1139,7 @@ astcenc_error astcenc_decompress_reset(
 	return ASTCENC_SUCCESS;
 }
 
+/* See header for documentation. */
 astcenc_error astcenc_get_block_info(
 	astcenc_context* ctx,
 	const uint8_t data[16],
@@ -1025,10 +1154,10 @@ astcenc_error astcenc_get_block_info(
 	// Decode the compressed data into a symbolic form
 	physical_compressed_block pcb = *(const physical_compressed_block*)data;
 	symbolic_compressed_block scb;
-	physical_to_symbolic(*ctx->bsd, pcb, scb);
+	physical_to_symbolic(ctx->bsd, pcb, scb);
 
 	// Fetch the appropriate partition and decimation tables
-	block_size_descriptor& bsd = *ctx->bsd;
+	block_size_descriptor& bsd = ctx->bsd;
 
 	// Start from a clean slate
 	memset(info, 0, sizeof(*info));
@@ -1041,62 +1170,58 @@ astcenc_error astcenc_get_block_info(
 	info->block_z = ctx->config.block_z;
 	info->texel_count = bsd.texel_count;
 
-	// Check for error blocks first - block_mode will be negative
-	info->is_error_block = scb.error_block != 0;
+	// Check for error blocks first
+	info->is_error_block = scb.block_type == SYM_BTYPE_ERROR;
 	if (info->is_error_block)
 	{
 		return ASTCENC_SUCCESS;
 	}
 
-	// Check for constant color blocks second - block_mode will be negative
-	info->is_constant_block = scb.block_mode < 0;
+	// Check for constant color blocks second
+	info->is_constant_block = scb.block_type == SYM_BTYPE_CONST_F16 ||
+	                          scb.block_type == SYM_BTYPE_CONST_U16;
 	if (info->is_constant_block)
 	{
 		return ASTCENC_SUCCESS;
 	}
 
-	// Otherwise, handle a full block with partition payload; values are known
-	// to be valid once the two conditions above have been checked
+	// Otherwise handle a full block ; known to be valid after conditions above have been checked
 	int partition_count = scb.partition_count;
-	const partition_info* pt = get_partition_table(&bsd, partition_count);
-	pt += scb.partition_index;
+	const auto& pi = bsd.get_partition_info(partition_count, scb.partition_index);
 
-	const int packed_index = bsd.block_mode_packed_index[scb.block_mode];
-	assert(packed_index >= 0 && packed_index < bsd.block_mode_count);
-	const block_mode& bm = bsd.block_modes[packed_index];
-	const decimation_table& dt = *bsd.decimation_tables[bm.decimation_mode];
+	const block_mode& bm = bsd.get_block_mode(scb.block_mode);
+	const decimation_info& di = bsd.get_decimation_info(bm.decimation_mode);
 
-	info->weight_x = dt.weight_x;
-	info->weight_y = dt.weight_y;
-	info->weight_z = dt.weight_z;
+	info->weight_x = di.weight_x;
+	info->weight_y = di.weight_y;
+	info->weight_z = di.weight_z;
 
 	info->is_dual_plane_block = bm.is_dual_plane != 0;
 
 	info->partition_count = scb.partition_count;
 	info->partition_index = scb.partition_index;
-	info->dual_plane_component = scb.plane2_color_component;
+	info->dual_plane_component = scb.plane2_component;
 
-	info->color_level_count =  get_quant_method_levels((quant_method)scb.color_quant_level);
-	info->weight_level_count = get_quant_method_levels((quant_method)bm.quant_mode);
+	info->color_level_count = get_quant_level(scb.get_color_quant_mode());
+	info->weight_level_count = get_quant_level(bm.get_weight_quant_mode());
 
 	// Unpack color endpoints for each active partition
-	for (int i = 0; i < scb.partition_count; i++)
+	for (unsigned int i = 0; i < scb.partition_count; i++)
 	{
-		int rgb_hdr;
-		int a_hdr;
-		int nan;
+		bool rgb_hdr;
+		bool a_hdr;
 		vint4 endpnt[2];
 
 		unpack_color_endpoints(ctx->config.profile,
 		                       scb.color_formats[i],
-		                       scb.color_quant_level,
+		                       scb.get_color_quant_mode(),
 		                       scb.color_values[i],
-		                       &rgb_hdr, &a_hdr, &nan,
-		                       endpnt, endpnt + 1);
+		                       rgb_hdr, a_hdr,
+		                       endpnt[0], endpnt[1]);
 
 		// Store the color endpoint mode info
 		info->color_endpoint_modes[i] = scb.color_formats[i];
-		info->is_hdr_block |= (rgb_hdr != 0) | (a_hdr != 0);
+		info->is_hdr_block = info->is_hdr_block || rgb_hdr || a_hdr;
 
 		// Store the unpacked and decoded color endpoint
 		vmask4 hdr_mask(rgb_hdr, rgb_hdr, rgb_hdr, a_hdr);
@@ -1110,30 +1235,30 @@ astcenc_error astcenc_get_block_info(
 	}
 
 	// Unpack weights for each texel
-	int weight_plane1[MAX_TEXELS_PER_BLOCK];
-	int weight_plane2[MAX_TEXELS_PER_BLOCK];
+	int weight_plane1[BLOCK_MAX_TEXELS];
+	int weight_plane2[BLOCK_MAX_TEXELS];
 
-	unpack_weights(bsd, scb, dt, bm.is_dual_plane, bm.quant_mode, weight_plane1, weight_plane2);
-	for (int i = 0; i < bsd.texel_count; i++)
+	unpack_weights(bsd, scb, di, bm.is_dual_plane, bm.get_weight_quant_mode(), weight_plane1, weight_plane2);
+	for (unsigned int i = 0; i < bsd.texel_count; i++)
 	{
-		info->weight_values_plane1[i] = (float)weight_plane1[i] / (float)TEXEL_WEIGHT_SUM;
+		info->weight_values_plane1[i] = (float)weight_plane1[i] * (1.0f / WEIGHTS_TEXEL_SUM);
 		if (info->is_dual_plane_block)
 		{
-			info->weight_values_plane2[i] = (float)weight_plane2[i] / (float)TEXEL_WEIGHT_SUM;
+			info->weight_values_plane2[i] = (float)weight_plane2[i] * (1.0f / WEIGHTS_TEXEL_SUM);
 		}
 	}
 
 	// Unpack partition assignments for each texel
-	for (int i = 0; i < bsd.texel_count; i++)
+	for (unsigned int i = 0; i < bsd.texel_count; i++)
 	{
-		info->partition_assignment[i] = pt->partition_of_texel[i];
+		info->partition_assignment[i] = pi.partition_of_texel[i];
 	}
 
 	return ASTCENC_SUCCESS;
 #endif
 }
 
-
+/* See header for documentation. */
 const char* astcenc_get_error_string(
 	astcenc_error status
 ) {
diff --git a/libkram/astc-encoder/astcenc_find_best_partitioning.cpp b/libkram/astc-encoder/astcenc_find_best_partitioning.cpp
index 7658e331..82da8467 100644
--- a/libkram/astc-encoder/astcenc_find_best_partitioning.cpp
+++ b/libkram/astc-encoder/astcenc_find_best_partitioning.cpp
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2021 Arm Limited
+// Copyright 2011-2022 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -20,119 +20,473 @@
 /**
  * @brief Functions for finding best partition for a block.
  *
- * Major step 1:
- * - find best partitioning assuming uncorrelated colors
- * - find best partitioning assuming RGBS color representation
+ * The partition search operates in two stages. The first pass uses kmeans clustering to group
+ * texels into an ideal partitioning for the requested partition count, and then compares that
+ * against the 1024 partitionings generated by the ASTC partition hash function. The generated
+ * partitions are then ranked by the number of texels in the wrong partition, compared to the ideal
+ * clustering. All 1024 partitions are tested for similarity and ranked, apart from duplicates and
+ * partitionings that actually generate fewer than the requested partition count, but only the top
+ * N candidates are actually put through a more detailed search. N is determined by the compressor
+ * quality preset.
  *
- * Finding best partitioning for a block:
+ * For the detailed search, each candidate is checked against two possible encoding methods:
  *
- * foreach available partitioning:
- * - compute mean-color-value and dominant direction.
- * - this defines two lines, both of which go through the mean-color-value.
- * - one line has a direction defined by the dominant direction; this is used
- *   to assess the error from using an uncorrelated color representation.
- * - the other line goes through (0,0,0,1) and is used to assess the error from
- *   using an RGBS color representation.
- * - we then compute, as a sum across the block, the squared-errors that result
- *   from using the dominant-direction-lines and the squared-errors that result
- *   from using the 0001-lines.
+ *   - The best partitioning assuming different chroma colors (RGB + RGB or RGB + delta endpoints).
+ *   - The best partitioning assuming same chroma colors (RGB + scale endpoints).
  *
- *	Partition table representation:
- *	We have 3 tables, each with 1024 partitions
- *	(these correspond to the 3x128 hardware partitions crossed with all the
- *	partition-transform modes in the hardware.)
+ * This is implemented by computing the compute mean color and dominant direction for each
+ * partition. This defines two lines, both of which go through the mean color value.
  *
- *	For each partitioning, we have:
- *	* a 4-entry table indicating how many texels there are in each of the 4
- *	  partitions. this may be from 2 to about 60 or so.
- *	* a 64-entry table indicating the partition index of each of the 64 texels
- *	  in the block. each index may be 0, 1, 2 or 3.
+ * - One line has a direction defined by the dominant direction; this is used to assess the error
+ *   from using an uncorrelated color representation.
+ * - The other line goes through (0,0,0,1) and is used to assess the error from using a same chroma
+ *   (RGB + scale) color representation.
  *
- * each element in the table is an uint8_t indicating partition index (0, 1, 2 or 3)
+ * The best candidate is selected by computing the squared-errors that result from using these
+ * lines for endpoint selection.
  */
 
 #include "astcenc_internal.h"
 
-static void compute_partition_error_color_weightings_and_range(
-	const imageblock& blk,
-	const error_weight_block& ewb,
-	const partition_info& pt,
-	partition_metrics pm[4]
+/**
+ * @brief Pick some initital kmeans cluster centers.
+ *
+ * @param      blk               The image block color data to compress.
+ * @param      texel_count       The number of texels in the block.
+ * @param      partition_count   The number of partitions in the block.
+ * @param[out] cluster_centers   The initital partition cluster center colors.
+ */
+static void kmeans_init(
+	const image_block& blk,
+	unsigned int texel_count,
+	unsigned int partition_count,
+	vfloat4 cluster_centers[BLOCK_MAX_PARTITIONS]
 ) {
-	int partition_count = pt.partition_count;
+	promise(texel_count > 0);
+	promise(partition_count > 0);
 
-	for (int i = 0; i < partition_count; i++)
+	unsigned int clusters_selected = 0;
+	float distances[BLOCK_MAX_TEXELS];
+
+	// Pick a random sample as first cluster center; 145897 from random.org
+	unsigned int sample = 145897 % texel_count;
+	vfloat4 center_color = blk.texel(sample);
+	cluster_centers[clusters_selected] = center_color;
+	clusters_selected++;
+
+	// Compute the distance to the first cluster center
+	float distance_sum = 0.0f;
+	for (unsigned int i = 0; i < texel_count; i++)
 	{
-		vfloat4 error_weight(1e-12f);
-		vfloat4 rgba_min(1e38f);
-		vfloat4 rgba_max(-1e38f);
+		vfloat4 color = blk.texel(i);
+		vfloat4 diff = color - center_color;
+		float distance = dot_s(diff * diff, blk.channel_weight);
+		distance_sum += distance;
+		distances[i] = distance;
+	}
 
-		int texel_count = pt.partition_texel_count[i];
-		for (int j = 0; j < texel_count; j++)
+	// More numbers from random.org for weighted-random center selection
+	const float cluster_cutoffs[9] {
+		0.626220f, 0.932770f, 0.275454f,
+		0.318558f, 0.240113f, 0.009190f,
+		0.347661f, 0.731960f, 0.156391f
+	};
+
+	unsigned int cutoff = (clusters_selected - 1) + 3 * (partition_count - 2);
+
+	// Pick the remaining samples as needed
+	while (true)
+	{
+		// Pick the next center in a weighted-random fashion.
+		float summa = 0.0f;
+		float distance_cutoff = distance_sum * cluster_cutoffs[cutoff++];
+		for (sample = 0; sample < texel_count; sample++)
 		{
-			int tidx = pt.texels_of_partition[i][j];
-			error_weight = error_weight + ewb.error_weights[tidx];
+			summa += distances[sample];
+			if (summa >= distance_cutoff)
+			{
+				break;
+			}
+		}
+
+		// Clamp to a valid range and store the selected cluster center
+		sample = astc::min(sample, texel_count - 1);
+
+		center_color = blk.texel(sample);
+		cluster_centers[clusters_selected++] = center_color;
+		if (clusters_selected >= partition_count)
+		{
+			break;
+		}
 
-			if (ewb.texel_weight[tidx] > 1e-10f)
+		// Compute the distance to the new cluster center, keep the min dist
+		distance_sum = 0.0f;
+		for (unsigned int i = 0; i < texel_count; i++)
+		{
+			vfloat4 color = blk.texel(i);
+			vfloat4 diff = color - center_color;
+			float distance = dot_s(diff * diff, blk.channel_weight);
+			distance = astc::min(distance, distances[i]);
+			distance_sum += distance;
+			distances[i] = distance;
+		}
+	}
+}
+
+/**
+ * @brief Assign texels to clusters, based on a set of chosen center points.
+ *
+ * @param      blk                  The image block color data to compress.
+ * @param      texel_count          The number of texels in the block.
+ * @param      partition_count      The number of partitions in the block.
+ * @param      cluster_centers      The partition cluster center colors.
+ * @param[out] partition_of_texel   The partition assigned for each texel.
+ */
+static void kmeans_assign(
+	const image_block& blk,
+	unsigned int texel_count,
+	unsigned int partition_count,
+	const vfloat4 cluster_centers[BLOCK_MAX_PARTITIONS],
+	uint8_t partition_of_texel[BLOCK_MAX_TEXELS]
+) {
+	promise(texel_count > 0);
+	promise(partition_count > 0);
+
+	uint8_t partition_texel_count[BLOCK_MAX_PARTITIONS] { 0 };
+
+	// Find the best partition for every texel
+	for (unsigned int i = 0; i < texel_count; i++)
+	{
+		float best_distance = std::numeric_limits<float>::max();
+		unsigned int best_partition = 0;
+
+		vfloat4 color = blk.texel(i);
+		for (unsigned int j = 0; j < partition_count; j++)
+		{
+			vfloat4 diff = color - cluster_centers[j];
+			float distance = dot_s(diff * diff, blk.channel_weight);
+			if (distance < best_distance)
 			{
-				vfloat4 data = blk.texel(tidx);
-				rgba_min = min(data, rgba_min);
-				rgba_max = max(data, rgba_max);
+				best_distance = distance;
+				best_partition = j;
 			}
 		}
 
-		error_weight = error_weight / pt.partition_texel_count[i];
-		vfloat4 csf = sqrt(error_weight);
-		vfloat4 range = max(rgba_max - rgba_min, 1e-10f);
-		pm[i].error_weight = error_weight;
-		pm[i].color_scale = csf;
-		pm[i].icolor_scale = 1.0f / max(csf, 1e-7f);
-		pm[i].range_sq = range * range;
+		partition_of_texel[i] = static_cast<uint8_t>(best_partition);
+		partition_texel_count[best_partition]++;
 	}
+
+	// It is possible to get a situation where a partition ends up without any texels. In this case,
+	// assign texel N to partition N. This is silly, but ensures that every partition retains at
+	// least one texel. Reassigning a texel in this manner may cause another partition to go empty,
+	// so if we actually did a reassignment, run the whole loop over again.
+	bool problem_case;
+	do
+	{
+		problem_case = false;
+		for (unsigned int i = 0; i < partition_count; i++)
+		{
+			if (partition_texel_count[i] == 0)
+			{
+				partition_texel_count[partition_of_texel[i]]--;
+				partition_texel_count[i]++;
+				partition_of_texel[i] = static_cast<uint8_t>(i);
+				problem_case = true;
+			}
+		}
+	} while (problem_case);
 }
 
-void compute_partition_error_color_weightings(
-	const error_weight_block& ewb,
-	const partition_info& pt,
-	partition_metrics pm[4]
+/**
+ * @brief Compute new cluster centers based on their center of gravity.
+ *
+ * @param       blk                  The image block color data to compress.
+ * @param       texel_count          The number of texels in the block.
+ * @param       partition_count      The number of partitions in the block.
+ * @param[out]  cluster_centers      The new cluster center colors.
+ * @param       partition_of_texel   The partition assigned for each texel.
+ */
+static void kmeans_update(
+	const image_block& blk,
+	unsigned int texel_count,
+	unsigned int partition_count,
+	vfloat4 cluster_centers[BLOCK_MAX_PARTITIONS],
+	const uint8_t partition_of_texel[BLOCK_MAX_TEXELS]
 ) {
-	int partition_count = pt.partition_count;
+	promise(texel_count > 0);
+	promise(partition_count > 0);
+
+	vfloat4 color_sum[BLOCK_MAX_PARTITIONS] {
+		vfloat4::zero(),
+		vfloat4::zero(),
+		vfloat4::zero(),
+		vfloat4::zero()
+	};
 
-	for (int i = 0; i < partition_count; i++)
+	uint8_t partition_texel_count[BLOCK_MAX_PARTITIONS] { 0 };
+
+	// Find the center-of-gravity in each cluster
+	for (unsigned int i = 0; i < texel_count; i++)
+	{
+		uint8_t partition = partition_of_texel[i];
+		color_sum[partition] += blk.texel(i);
+		partition_texel_count[partition]++;
+	}
+
+	// Set the center of gravity to be the new cluster center
+	for (unsigned int i = 0; i < partition_count; i++)
 	{
-		vfloat4 error_weight(1e-12f);
+		float scale = 1.0f / static_cast<float>(partition_texel_count[i]);
+		cluster_centers[i] = color_sum[i] * scale;
+	}
+}
+
+/**
+ * @brief Compute bit-mismatch for partitioning in 2-partition mode.
+ *
+ * @param a   The texel assignment bitvector for the block.
+ * @param b   The texel assignment bitvector for the partition table.
+ *
+ * @return    The number of bit mismatches.
+ */
+static inline unsigned int partition_mismatch2(
+	const uint64_t a[2],
+	const uint64_t b[2]
+) {
+	int v1 = astc::popcount(a[0] ^ b[0]) + astc::popcount(a[1] ^ b[1]);
+	int v2 = astc::popcount(a[0] ^ b[1]) + astc::popcount(a[1] ^ b[0]);
+	return astc::min(v1, v2);
+}
+
+/**
+ * @brief Compute bit-mismatch for partitioning in 3-partition mode.
+ *
+ * @param a   The texel assignment bitvector for the block.
+ * @param b   The texel assignment bitvector for the partition table.
+ *
+ * @return    The number of bit mismatches.
+ */
+static inline unsigned int partition_mismatch3(
+	const uint64_t a[3],
+	const uint64_t b[3]
+) {
+	int p00 = astc::popcount(a[0] ^ b[0]);
+	int p01 = astc::popcount(a[0] ^ b[1]);
+	int p02 = astc::popcount(a[0] ^ b[2]);
+
+	int p10 = astc::popcount(a[1] ^ b[0]);
+	int p11 = astc::popcount(a[1] ^ b[1]);
+	int p12 = astc::popcount(a[1] ^ b[2]);
+
+	int p20 = astc::popcount(a[2] ^ b[0]);
+	int p21 = astc::popcount(a[2] ^ b[1]);
+	int p22 = astc::popcount(a[2] ^ b[2]);
 
-		int texel_count = pt.partition_texel_count[i];
-		for (int j = 0; j < texel_count; j++)
+	int s0 = p11 + p22;
+	int s1 = p12 + p21;
+	int v0 = astc::min(s0, s1) + p00;
+
+	int s2 = p10 + p22;
+	int s3 = p12 + p20;
+	int v1 = astc::min(s2, s3) + p01;
+
+	int s4 = p10 + p21;
+	int s5 = p11 + p20;
+	int v2 = astc::min(s4, s5) + p02;
+
+	return astc::min(v0, v1, v2);
+}
+
+/**
+ * @brief Compute bit-mismatch for partitioning in 4-partition mode.
+ *
+ * @param a   The texel assignment bitvector for the block.
+ * @param b   The texel assignment bitvector for the partition table.
+ *
+ * @return    The number of bit mismatches.
+ */
+static inline unsigned int partition_mismatch4(
+	const uint64_t a[4],
+	const uint64_t b[4]
+) {
+	int p00 = astc::popcount(a[0] ^ b[0]);
+	int p01 = astc::popcount(a[0] ^ b[1]);
+	int p02 = astc::popcount(a[0] ^ b[2]);
+	int p03 = astc::popcount(a[0] ^ b[3]);
+
+	int p10 = astc::popcount(a[1] ^ b[0]);
+	int p11 = astc::popcount(a[1] ^ b[1]);
+	int p12 = astc::popcount(a[1] ^ b[2]);
+	int p13 = astc::popcount(a[1] ^ b[3]);
+
+	int p20 = astc::popcount(a[2] ^ b[0]);
+	int p21 = astc::popcount(a[2] ^ b[1]);
+	int p22 = astc::popcount(a[2] ^ b[2]);
+	int p23 = astc::popcount(a[2] ^ b[3]);
+
+	int p30 = astc::popcount(a[3] ^ b[0]);
+	int p31 = astc::popcount(a[3] ^ b[1]);
+	int p32 = astc::popcount(a[3] ^ b[2]);
+	int p33 = astc::popcount(a[3] ^ b[3]);
+
+	int mx23 = astc::min(p22 + p33, p23 + p32);
+	int mx13 = astc::min(p21 + p33, p23 + p31);
+	int mx12 = astc::min(p21 + p32, p22 + p31);
+	int mx03 = astc::min(p20 + p33, p23 + p30);
+	int mx02 = astc::min(p20 + p32, p22 + p30);
+	int mx01 = astc::min(p21 + p30, p20 + p31);
+
+	int v0 = p00 + astc::min(p11 + mx23, p12 + mx13, p13 + mx12);
+	int v1 = p01 + astc::min(p10 + mx23, p12 + mx03, p13 + mx02);
+	int v2 = p02 + astc::min(p11 + mx03, p10 + mx13, p13 + mx01);
+	int v3 = p03 + astc::min(p11 + mx02, p12 + mx01, p10 + mx12);
+
+	return astc::min(v0, v1, v2, v3);
+}
+
+using mismatch_dispatch = unsigned int (*)(const uint64_t*, const uint64_t*);
+
+/**
+ * @brief Count the partition table mismatches vs the data clustering.
+ *
+ * @param      bsd               The block size information.
+ * @param      partition_count   The number of partitions in the block.
+ * @param      bitmaps           The block texel partition assignment patterns.
+ * @param[out] mismatch_counts   The array storing per partitioning mismatch counts.
+ */
+static void count_partition_mismatch_bits(
+	const block_size_descriptor& bsd,
+	unsigned int partition_count,
+	const uint64_t bitmaps[BLOCK_MAX_PARTITIONS],
+	unsigned int mismatch_counts[BLOCK_MAX_PARTITIONINGS]
+) {
+	if (partition_count == 2)
+	{
+		for (unsigned int i = 0; i < bsd.partitioning_count[partition_count - 1]; i++)
 		{
-			int tidx = pt.texels_of_partition[i][j];
-			error_weight = error_weight + ewb.error_weights[tidx];
+			int bitcount = partition_mismatch2(bitmaps, bsd.coverage_bitmaps_2[i]);
+			mismatch_counts[i] = astc::max(bitcount, static_cast<int>(bsd.partitioning_valid_2[i]));
 		}
+	}
+	else if (partition_count == 3)
+	{
+		for (unsigned int i = 0; i < bsd.partitioning_count[partition_count - 1]; i++)
+		{
+			int bitcount = partition_mismatch3(bitmaps, bsd.coverage_bitmaps_3[i]);
+			mismatch_counts[i] = astc::max(bitcount, static_cast<int>(bsd.partitioning_valid_3[i]));
+		}
+	}
+	else
+	{
+		for (unsigned int i = 0; i < bsd.partitioning_count[partition_count - 1]; i++)
+		{
+			int bitcount = partition_mismatch4(bitmaps, bsd.coverage_bitmaps_4[i]);
+			mismatch_counts[i] = astc::max(bitcount, static_cast<int>(bsd.partitioning_valid_4[i]));
+		}
+	}
+}
 
-		error_weight = error_weight / pt.partition_texel_count[i];
-		vfloat4 csf = sqrt(error_weight);
+/**
+ * @brief Use counting sort on the mismatch array to sort partition candidates.
+ *
+ * @param      partitioning_count   The number of packed partitionings.
+ * @param      mismatch_count       Partitioning mismatch counts, in index order.
+ * @param[out] partition_ordering   Partition index values, in mismatch order.
+ */
+static void get_partition_ordering_by_mismatch_bits(
+	unsigned int partitioning_count,
+	const unsigned int mismatch_count[BLOCK_MAX_PARTITIONINGS],
+	unsigned int partition_ordering[BLOCK_MAX_PARTITIONINGS]
+) {
+	unsigned int mscount[256] { 0 };
+
+	// Create the histogram of mismatch counts
+	for (unsigned int i = 0; i < partitioning_count; i++)
+	{
+		mscount[mismatch_count[i]]++;
+	}
 
-		pm[i].error_weight = error_weight;
-		pm[i].color_scale = csf;
-		pm[i].icolor_scale = 1.0f / max(csf, 1e-7f);
+	// Create a running sum from the histogram array
+	// Cells store previous values only; i.e. exclude self after sum
+	unsigned int summa = 0;
+	for (unsigned int i = 0; i < 256; i++)
+	{
+		unsigned int cnt = mscount[i];
+		mscount[i] = summa;
+		summa += cnt;
+	}
+
+	// Use the running sum as the index, incrementing after read to allow
+	// sequential entries with the same count
+	for (unsigned int i = 0; i < partitioning_count; i++)
+	{
+		unsigned int idx = mscount[mismatch_count[i]]++;
+		partition_ordering[idx] = i;
 	}
 }
 
-/* main function to identify the best partitioning for a given number of texels */
-void find_best_partitionings(
-	const block_size_descriptor* bsd,
-	const imageblock* blk,
-	const error_weight_block* ewb,
-	int partition_count,
-	int partition_search_limit,
-	int* best_partition_uncor,
-	int* best_partition_samec,
-	int* best_partition_dualplane
+/**
+ * @brief Use k-means clustering to compute a partition ordering for a block..
+ *
+ * @param      bsd                  The block size information.
+ * @param      blk                  The image block color data to compress.
+ * @param      partition_count      The desired number of partitions in the block.
+ * @param[out] partition_ordering   The list of recommended partition indices, in priority order.
+ */
+static void compute_kmeans_partition_ordering(
+	const block_size_descriptor& bsd,
+	const image_block& blk,
+	unsigned int partition_count,
+	unsigned int partition_ordering[BLOCK_MAX_PARTITIONINGS]
+) {
+	vfloat4 cluster_centers[BLOCK_MAX_PARTITIONS];
+	uint8_t texel_partitions[BLOCK_MAX_TEXELS];
+
+	// Use three passes of k-means clustering to partition the block data
+	for (unsigned int i = 0; i < 3; i++)
+	{
+		if (i == 0)
+		{
+			kmeans_init(blk, bsd.texel_count, partition_count, cluster_centers);
+		}
+		else
+		{
+			kmeans_update(blk, bsd.texel_count, partition_count, cluster_centers, texel_partitions);
+		}
+
+		kmeans_assign(blk, bsd.texel_count, partition_count, cluster_centers, texel_partitions);
+	}
+
+	// Construct the block bitmaps of texel assignments to each partition
+	uint64_t bitmaps[BLOCK_MAX_PARTITIONS] { 0 };
+	unsigned int texels_to_process = astc::min(bsd.texel_count, BLOCK_MAX_KMEANS_TEXELS);
+	promise(texels_to_process > 0);
+	for (unsigned int i = 0; i < texels_to_process; i++)
+	{
+		unsigned int idx = bsd.kmeans_texels[i];
+		bitmaps[texel_partitions[idx]] |= 1ULL << i;
+	}
+
+	// Count the mismatch between the block and the format's partition tables
+	unsigned int mismatch_counts[BLOCK_MAX_PARTITIONINGS];
+	count_partition_mismatch_bits(bsd, partition_count, bitmaps, mismatch_counts);
+
+	// Sort the partitions based on the number of mismatched bits
+	get_partition_ordering_by_mismatch_bits(bsd.partitioning_count[partition_count - 1],
+	                                        mismatch_counts, partition_ordering);
+}
+
+/* See header for documentation. */
+void find_best_partition_candidates(
+	const block_size_descriptor& bsd,
+	const image_block& blk,
+	unsigned int partition_count,
+	unsigned int partition_search_limit,
+	unsigned int best_partitions[2]
 ) {
-	// constant used to estimate quantization error for a given partitioning;
-	// the optimal value for this constant depends on bitrate.
-	// These constants have been determined empirically.
-	int texels_per_block = bsd->texel_count;
+	// Constant used to estimate quantization error for a given partitioning; the optimal value for
+	// this depends on bitrate. These values have been determined empirically.
+	unsigned int texels_per_block = bsd.texel_count;
 	float weight_imprecision_estim = 0.055f;
 	if (texels_per_block <= 20)
 	{
@@ -147,185 +501,107 @@ void find_best_partitionings(
 		weight_imprecision_estim = 0.05f;
 	}
 
-	weight_imprecision_estim = weight_imprecision_estim * weight_imprecision_estim;
-
-	int partition_sequence[PARTITION_COUNT];
+	promise(partition_count > 0);
+	promise(partition_search_limit > 0);
 
-	kmeans_compute_partition_ordering(bsd, partition_count, blk, partition_sequence);
+	weight_imprecision_estim = weight_imprecision_estim * weight_imprecision_estim;
 
-	int uses_alpha = imageblock_uses_alpha(blk);
+	unsigned int partition_sequence[BLOCK_MAX_PARTITIONINGS];
+	compute_kmeans_partition_ordering(bsd, blk, partition_count, partition_sequence);
+	partition_search_limit = astc::min(partition_search_limit,
+	                                   bsd.partitioning_count[partition_count - 1]);
 
-	const partition_info* ptab = get_partition_table(bsd, partition_count);
+	bool uses_alpha = !blk.is_constant_channel(3);
 
 	// Partitioning errors assuming uncorrelated-chrominance endpoints
 	float uncor_best_error { ERROR_CALC_DEFAULT };
-	int uncor_best_partition { 0 };
+	unsigned int uncor_best_partition { 0 };
 
 	// Partitioning errors assuming same-chrominance endpoints
 	// Store two so we can always return one different to uncorr
 	float samec_best_errors[2] { ERROR_CALC_DEFAULT, ERROR_CALC_DEFAULT };
-	int samec_best_partitions[2] { 0, 0 };
-
-	// Partitioning errors assuming that one color component is uncorrelated
-	float sep_best_error { ERROR_CALC_DEFAULT };
-	int sep_best_partition { 0 };
-	int sep_best_component { 0 };
-
-	bool skip_two_plane = best_partition_dualplane == nullptr;
+	unsigned int samec_best_partitions[2] { 0, 0 };
 
 	if (uses_alpha)
 	{
-
-		for (int i = 0; i < partition_search_limit; i++)
+		for (unsigned int i = 0; i < partition_search_limit; i++)
 		{
-			int partition = partition_sequence[i];
-
-			int bk_partition_count = ptab[partition].partition_count;
+			unsigned int partition = partition_sequence[i];
+			const auto& pi = bsd.get_raw_partition_info(partition_count, partition);
+
+			// TODO: This escape shouldn't really be needed. We should return
+			// the number of blocks which have usable (!= 255) mismatch count
+			// from compute_kmeans_partition_ordering and use that as the upper
+			// loop limit.
+			unsigned int bk_partition_count = pi.partition_count;
 			if (bk_partition_count < partition_count)
 			{
 				break;
 			}
 
-			// Compute weighting to give to each channel in each partition
-			partition_metrics pms[4];
+			// Compute weighting to give to each component in each partition
+			partition_metrics pms[BLOCK_MAX_PARTITIONS];
 
-			compute_partition_error_color_weightings_and_range(*blk, *ewb, *(ptab + partition), pms);
+			compute_avgs_and_dirs_4_comp(pi, blk, pms);
 
-			compute_avgs_and_dirs_4_comp(ptab + partition, blk, ewb, pms);
+			line4 uncor_lines[BLOCK_MAX_PARTITIONS];
+			line4 samec_lines[BLOCK_MAX_PARTITIONS];
 
-			line4 uncor_lines[4];
-			line4 samec_lines[4];
-			line3 sep_r_lines[4];
-			line3 sep_g_lines[4];
-			line3 sep_b_lines[4];
-			line3 sep_a_lines[4];
+			processed_line4 uncor_plines[BLOCK_MAX_PARTITIONS];
+			processed_line4 samec_plines[BLOCK_MAX_PARTITIONS];
 
-			processed_line4 uncor_plines[4];
-			processed_line4 samec_plines[4];
-			processed_line3 sep_r_plines[4];
-			processed_line3 sep_g_plines[4];
-			processed_line3 sep_b_plines[4];
-			processed_line3 sep_a_plines[4];
+			float uncor_line_lens[BLOCK_MAX_PARTITIONS];
+			float samec_line_lens[BLOCK_MAX_PARTITIONS];
 
-			float uncor_line_lens[4];
-			float samec_line_lens[4];
-
-			for (int j = 0; j < partition_count; j++)
+			for (unsigned int j = 0; j < partition_count; j++)
 			{
 				partition_metrics& pm = pms[j];
 
 				uncor_lines[j].a = pm.avg;
 				uncor_lines[j].b = normalize_safe(pm.dir, unit4());
 
-				uncor_plines[j].amod = (uncor_lines[j].a - uncor_lines[j].b * dot(uncor_lines[j].a, uncor_lines[j].b)) * pm.icolor_scale;
-				uncor_plines[j].bs   = uncor_lines[j].b * pm.color_scale;
-				uncor_plines[j].bis  = uncor_lines[j].b * pm.icolor_scale;
+				uncor_plines[j].amod = uncor_lines[j].a - uncor_lines[j].b * dot(uncor_lines[j].a, uncor_lines[j].b);
+				uncor_plines[j].bs   = uncor_lines[j].b;
 
 				samec_lines[j].a = vfloat4::zero();
 				samec_lines[j].b = normalize_safe(pm.avg, unit4());
 
 				samec_plines[j].amod = vfloat4::zero();
-				samec_plines[j].bs   = samec_lines[j].b * pm.color_scale;
-				samec_plines[j].bis  = samec_lines[j].b * pm.icolor_scale;
-
-				if (!skip_two_plane)
-				{
-					sep_r_lines[j].a = pm.avg.swz<1, 2, 3>();
-					vfloat4 dirs_gba = pm.dir.swz<1, 2, 3>();
-					sep_r_lines[j].b = normalize_safe(dirs_gba, unit3());
-
-					sep_g_lines[j].a = pm.avg.swz<0, 2, 3>();
-					vfloat4 dirs_rba = pm.dir.swz<0, 2, 3>();
-					sep_g_lines[j].b = normalize_safe(dirs_rba, unit3());
-
-					sep_b_lines[j].a = pm.avg.swz<0, 1, 3>();
-					vfloat4 dirs_rga = pm.dir.swz<0, 1, 3>();
-					sep_b_lines[j].b = normalize_safe(dirs_rga, unit3());
-
-					sep_a_lines[j].a = pm.avg.swz<0, 1, 2>();
-					vfloat4 dirs_rgb = pm.dir.swz<0, 1, 2>();
-					sep_a_lines[j].b = normalize_safe(dirs_rgb, unit3());
-
-					sep_r_plines[j].amod = (sep_r_lines[j].a - sep_r_lines[j].b * dot3(sep_r_lines[j].a, sep_r_lines[j].b)) * pm.icolor_scale.swz<1, 2, 3, 0>();
-					sep_r_plines[j].bs   = (sep_r_lines[j].b * pm.color_scale.swz<1, 2, 3, 0>());
-					sep_r_plines[j].bis  = (sep_r_lines[j].b * pm.icolor_scale.swz<1, 2, 3, 0>());
-
-					sep_g_plines[j].amod = (sep_g_lines[j].a - sep_g_lines[j].b * dot3(sep_g_lines[j].a, sep_g_lines[j].b)) * pm.icolor_scale.swz<0, 2, 3, 1>();
-					sep_g_plines[j].bs   = (sep_g_lines[j].b * pm.color_scale.swz<0, 2, 3, 1>());
-					sep_g_plines[j].bis  = (sep_g_lines[j].b * pm.icolor_scale.swz<0, 2, 3, 1>());
-
-					sep_b_plines[j].amod = (sep_b_lines[j].a - sep_b_lines[j].b * dot3(sep_b_lines[j].a, sep_b_lines[j].b)) * pm.icolor_scale.swz<0, 1, 3, 2>();
-					sep_b_plines[j].bs   = (sep_b_lines[j].b * pm.color_scale.swz<0, 1, 3, 2>());
-					sep_b_plines[j].bis  = (sep_b_lines[j].b * pm.icolor_scale.swz<0, 1, 3, 2>());
-
-					sep_a_plines[j].amod = (sep_a_lines[j].a - sep_a_lines[j].b * dot3(sep_a_lines[j].a, sep_a_lines[j].b)) * pm.icolor_scale.swz<0, 1, 2, 3>();
-					sep_a_plines[j].bs   = (sep_a_lines[j].b * pm.color_scale.swz<0, 1, 2, 3>());
-					sep_a_plines[j].bis  = (sep_a_lines[j].b * pm.icolor_scale.swz<0, 1, 2, 3>());
-				}
+				samec_plines[j].bs   = samec_lines[j].b;
 			}
 
 			float uncor_error = 0.0f;
 			float samec_error = 0.0f;
-			vfloat4 sep_error = vfloat4::zero();
 
-			compute_error_squared_rgba(ptab + partition,
+			compute_error_squared_rgba(pi,
 			                           blk,
-			                           ewb,
 			                           uncor_plines,
 			                           samec_plines,
 			                           uncor_line_lens,
 			                           samec_line_lens,
-			                           &uncor_error,
-			                           &samec_error);
-
-			/*
-			   Compute an estimate of error introduced by weight quantization imprecision.
-			   This error is computed as follows, for each partition
-			   1: compute the principal-axis vector (full length) in error-space
-			   2: convert the principal-axis vector to regular RGB-space
-			   3: scale the vector by a constant that estimates average quantization error
-			   4: for each texel, square the vector, then do a dot-product with the texel's error weight;
-			      sum up the results across all texels.
-			   4(optimized): square the vector once, then do a dot-product with the average texel error,
-			      then multiply by the number of texels.
-			 */
-
-			for (int j = 0; j < partition_count; j++)
+			                           uncor_error,
+			                           samec_error);
+
+			// Compute an estimate of error introduced by weight quantization imprecision.
+			// This error is computed as follows, for each partition
+			//     1: compute the principal-axis vector (full length) in error-space
+			//     2: convert the principal-axis vector to regular RGB-space
+			//     3: scale the vector by a constant that estimates average quantization error
+			//     4: for each texel, square the vector, then do a dot-product with the texel's
+			//        error weight; sum up the results across all texels.
+			//     4(optimized): square the vector once, then do a dot-product with the average
+			//        texel error, then multiply by the number of texels.
+
+			for (unsigned int j = 0; j < partition_count; j++)
 			{
-				partition_metrics& pm = pms[j];
-				float tpp = (float)(ptab[partition].partition_texel_count[j]);
-
-				vfloat4 ics = pm.icolor_scale;
-				vfloat4 error_weights = pm.error_weight * (tpp * weight_imprecision_estim);
-
-				vfloat4 uncor_vector = uncor_lines[j].b * uncor_line_lens[j] * ics;
-				vfloat4 samec_vector = samec_lines[j].b * samec_line_lens[j] * ics;
-
-				uncor_vector = uncor_vector * uncor_vector;
-				samec_vector = samec_vector * samec_vector;
+				float tpp = static_cast<float>(pi.partition_texel_count[j]);
+				vfloat4 error_weights(tpp * weight_imprecision_estim);
 
-				uncor_error += dot_s(uncor_vector, error_weights);
-				samec_error += dot_s(samec_vector, error_weights);
+				vfloat4 uncor_vector = uncor_lines[j].b * uncor_line_lens[j];
+				vfloat4 samec_vector = samec_lines[j].b * samec_line_lens[j];
 
-				if (!skip_two_plane)
-				{
-					vfloat4 sep_r_vector = sep_r_lines[j].b * ics.swz<1, 2, 3, 0>();
-					vfloat4 sep_g_vector = sep_g_lines[j].b * ics.swz<0, 2, 3, 1>();
-					vfloat4 sep_b_vector = sep_b_lines[j].b * ics.swz<0, 1, 3, 2>();
-					vfloat4 sep_a_vector = sep_a_lines[j].b * ics.swz<0, 1, 2, 3>();
-
-					sep_r_vector = sep_r_vector * sep_r_vector;
-					sep_g_vector = sep_g_vector * sep_g_vector;
-					sep_b_vector = sep_b_vector * sep_b_vector;
-					sep_a_vector = sep_a_vector * sep_a_vector;
-
-					vfloat4 sep_err_inc(dot3_s(sep_r_vector, error_weights.swz<1, 2, 3, 0>()),
-										dot3_s(sep_g_vector, error_weights.swz<0, 2, 3, 1>()),
-										dot3_s(sep_b_vector, error_weights.swz<0, 1, 3, 2>()),
-										dot3_s(sep_a_vector, error_weights.swz<0, 1, 2, 3>()));
-
-					sep_error = sep_error + sep_err_inc + pm.range_sq * error_weights;
-				}
+				uncor_error += dot_s(uncor_vector * uncor_vector, error_weights);
+				samec_error += dot_s(samec_vector * samec_vector, error_weights);
 			}
 
 			if (uncor_error < uncor_best_error)
@@ -347,69 +623,32 @@ void find_best_partitionings(
 				samec_best_errors[1] = samec_error;
 				samec_best_partitions[1] = partition;
 			}
-
-			if (!skip_two_plane)
-			{
-				if (sep_error.lane<0>() < sep_best_error)
-				{
-					sep_best_error = sep_error.lane<0>();
-					sep_best_partition = partition;
-					sep_best_component = 0;
-				}
-
-				if (sep_error.lane<1>() < sep_best_error)
-				{
-					sep_best_error = sep_error.lane<1>();
-					sep_best_partition = partition;
-					sep_best_component = 1;
-				}
-
-				if (sep_error.lane<2>() < sep_best_error)
-				{
-					sep_best_error = sep_error.lane<2>();
-					sep_best_partition = partition;
-					sep_best_component = 2;
-				}
-
-				if (sep_error.lane<3>() < sep_best_error)
-				{
-					sep_best_error = sep_error.lane<3>();
-					sep_best_partition = partition;
-					sep_best_component = 3;
-				}
-			}
 		}
 	}
 	else
 	{
-		for (int i = 0; i < partition_search_limit; i++)
+		for (unsigned int i = 0; i < partition_search_limit; i++)
 		{
-			int partition = partition_sequence[i];
-
-			int bk_partition_count = ptab[partition].partition_count;
+			unsigned int partition = partition_sequence[i];
+			const auto& pi = bsd.get_raw_partition_info(partition_count, partition);
+
+			// TODO: This escape shouldn't really be needed. We should return
+			// the number of blocks which have usable (!= 255) mismatch count
+			// from compute_kmeans_partition_ordering and use that as the upper
+			// loop limit.
+			unsigned int bk_partition_count = pi.partition_count;
 			if (bk_partition_count < partition_count)
 			{
 				break;
 			}
 
-			// Compute weighting to give to each channel in each partition
-			partition_metrics pms[4];
-
-			compute_partition_error_color_weightings_and_range(*blk, *ewb, *(ptab + partition), pms);
-
-			compute_avgs_and_dirs_3_comp(ptab + partition, blk, ewb, 3, pms);
-
-			partition_lines3 plines[4];
+			// Compute weighting to give to each component in each partition
+			partition_metrics pms[BLOCK_MAX_PARTITIONS];
+			compute_avgs_and_dirs_3_comp_rgb(pi, blk, pms);
 
-			line2 sep_r_lines[4];
-			line2 sep_g_lines[4];
-			line2 sep_b_lines[4];
+			partition_lines3 plines[BLOCK_MAX_PARTITIONS];
 
-			processed_line2 sep_r_plines[4];
-			processed_line2 sep_g_plines[4];
-			processed_line2 sep_b_plines[4];
-
-			for (int j = 0; j < partition_count; j++)
+			for (unsigned int j = 0; j < partition_count; j++)
 			{
 				partition_metrics& pm = pms[j];
 				partition_lines3& pl = plines[j];
@@ -420,126 +659,44 @@ void find_best_partitionings(
 				pl.samec_line.a = vfloat4::zero();
 				pl.samec_line.b = normalize_safe(pm.avg.swz<0, 1, 2>(), unit3());
 
-				pl.uncor_pline.amod = (pl.uncor_line.a - pl.uncor_line.b * dot3(pl.uncor_line.a, pl.uncor_line.b)) * pm.icolor_scale.swz<0, 1, 2, 3>();
-				pl.uncor_pline.bs   = (pl.uncor_line.b * pm.color_scale.swz<0, 1, 2, 3>());
-				pl.uncor_pline.bis  = (pl.uncor_line.b * pm.icolor_scale.swz<0, 1, 2, 3>());
+				pl.uncor_pline.amod = pl.uncor_line.a - pl.uncor_line.b * dot3(pl.uncor_line.a, pl.uncor_line.b);
+				pl.uncor_pline.bs   = pl.uncor_line.b;
 
 				pl.samec_pline.amod = vfloat4::zero();
-				pl.samec_pline.bs   = (pl.samec_line.b * pm.color_scale.swz<0, 1, 2, 3>());
-				pl.samec_pline.bis  = (pl.samec_line.b * pm.icolor_scale.swz<0, 1, 2, 3>());
-
-				if (!skip_two_plane)
-				{
-					sep_r_lines[j].a = pm.avg.swz<1, 2>();
-					float2 dirs_gb = pm.dir.swz<1, 2>();
-					if (dot(dirs_gb, dirs_gb) == 0.0f)
-					{
-						sep_r_lines[j].b = normalize(float2(1.0f));
-					}
-					else
-					{
-						sep_r_lines[j].b = normalize(dirs_gb);
-					}
-
-					sep_g_lines[j].a = pm.avg.swz<0, 2>();
-					float2 dirs_rb = pm.dir.swz<0, 2>();
-					if (dot(dirs_rb, dirs_rb) == 0.0f)
-					{
-						sep_g_lines[j].b = normalize(float2(1.0f));
-					}
-					else
-					{
-						sep_g_lines[j].b = normalize(dirs_rb);
-					}
-
-					sep_b_lines[j].a = pm.avg.swz<0, 1>();
-					float2 dirs_rg = pm.dir.swz<0, 1>();
-					if (dot(dirs_rg, dirs_rg) == 0.0f)
-					{
-						sep_b_lines[j].b = normalize(float2(1.0f));
-					}
-					else
-					{
-						sep_b_lines[j].b = normalize(dirs_rg);
-					}
-
-					sep_r_plines[j].amod = (sep_r_lines[j].a - sep_r_lines[j].b * dot(sep_r_lines[j].a, sep_r_lines[j].b)) * pm.icolor_scale.swz<1, 2>();
-					sep_r_plines[j].bs   = (sep_r_lines[j].b * pm.color_scale.swz<1, 2>());
-					sep_r_plines[j].bis  = (sep_r_lines[j].b * pm.icolor_scale.swz<1, 2>());
-
-					sep_g_plines[j].amod = (sep_g_lines[j].a - sep_g_lines[j].b * dot(sep_g_lines[j].a, sep_g_lines[j].b)) * pm.icolor_scale.swz<0, 2>();
-					sep_g_plines[j].bs   = (sep_g_lines[j].b * pm.color_scale.swz<0, 2>());
-					sep_g_plines[j].bis  = (sep_g_lines[j].b * pm.icolor_scale.swz<0, 2>());
-
-					sep_b_plines[j].amod = (sep_b_lines[j].a - sep_b_lines[j].b * dot(sep_b_lines[j].a, sep_b_lines[j].b)) * pm.icolor_scale.swz<0, 1>();
-					sep_b_plines[j].bs   = (sep_b_lines[j].b * pm.color_scale.swz<0, 1>());
-					sep_b_plines[j].bis  = (sep_b_lines[j].b * pm.icolor_scale.swz<0, 1>());
-				}
+				pl.samec_pline.bs   = pl.samec_line.b;
 			}
 
 			float uncor_error = 0.0f;
 			float samec_error = 0.0f;
-			vfloat4 sep_error = vfloat4(0.0f);
 
-			compute_error_squared_rgb(ptab + partition,
+			compute_error_squared_rgb(pi,
 			                          blk,
-			                          ewb,
 			                          plines,
 			                          uncor_error,
 			                          samec_error);
 
-			/*
-			   compute an estimate of error introduced by weight imprecision.
-			   This error is computed as follows, for each partition
-			   1: compute the principal-axis vector (full length) in error-space
-			   2: convert the principal-axis vector to regular RGB-space
-			   3: scale the vector by a constant that estimates average quantization error.
-			   4: for each texel, square the vector, then do a dot-product with the texel's error weight;
-			      sum up the results across all texels.
-			   4(optimized): square the vector once, then do a dot-product with the average texel error,
-			      then multiply by the number of texels.
-			 */
-
-			for (int j = 0; j < partition_count; j++)
+			// Compute an estimate of error introduced by weight quantization imprecision.
+			// This error is computed as follows, for each partition
+			//     1: compute the principal-axis vector (full length) in error-space
+			//     2: convert the principal-axis vector to regular RGB-space
+			//     3: scale the vector by a constant that estimates average quantization error
+			//     4: for each texel, square the vector, then do a dot-product with the texel's
+			//        error weight; sum up the results across all texels.
+			//     4(optimized): square the vector once, then do a dot-product with the average
+			//        texel error, then multiply by the number of texels.
+
+			for (unsigned int j = 0; j < partition_count; j++)
 			{
-				partition_metrics& pm = pms[j];
 				partition_lines3& pl = plines[j];
 
-				float tpp = (float)(ptab[partition].partition_texel_count[j]);
-
-				vfloat4 ics = pm.icolor_scale;
-				ics.set_lane<3>(0.0f);
-
-				vfloat4 error_weights = pm.error_weight * (tpp * weight_imprecision_estim);
-				error_weights.set_lane<3>(0.0f);
-
-				vfloat4 uncor_vector = (pl.uncor_line.b * pl.uncor_line_len) * ics;
-				vfloat4 samec_vector = (pl.samec_line.b * pl.samec_line_len) * ics;
+				float tpp = static_cast<float>(pi.partition_texel_count[j]);
+				vfloat4 error_weights(tpp * weight_imprecision_estim);
 
-				uncor_vector = uncor_vector * uncor_vector;
-				samec_vector = samec_vector * samec_vector;
+				vfloat4 uncor_vector = pl.uncor_line.b * pl.uncor_line_len;
+				vfloat4 samec_vector = pl.samec_line.b * pl.samec_line_len;
 
-				uncor_error += dot3_s(uncor_vector, error_weights);
-				samec_error += dot3_s(samec_vector, error_weights);
-
-				if (!skip_two_plane)
-				{
-					float2 sep_r_vector = sep_r_lines[j].b * ics.swz<1, 2>();
-					float2 sep_g_vector = sep_g_lines[j].b * ics.swz<0, 2>();
-					float2 sep_b_vector = sep_b_lines[j].b * ics.swz<0, 1>();
-
-					sep_r_vector = sep_r_vector * sep_r_vector;
-					sep_g_vector = sep_g_vector * sep_g_vector;
-					sep_b_vector = sep_b_vector * sep_b_vector;
-
-					sep_error.set_lane<0>(sep_error.lane<0>() + dot(sep_r_vector, error_weights.swz<1, 2>()));
-					sep_error.set_lane<1>(sep_error.lane<1>() + dot(sep_g_vector, error_weights.swz<0, 2>()));
-					sep_error.set_lane<2>(sep_error.lane<2>() + dot(sep_b_vector, error_weights.swz<0, 1>()));
-
-					sep_error.set_lane<0>(sep_error.lane<0>() + pm.range_sq.lane<0>() * error_weights.lane<0>());
-					sep_error.set_lane<1>(sep_error.lane<1>() + pm.range_sq.lane<1>() * error_weights.lane<1>());
-					sep_error.set_lane<2>(sep_error.lane<2>() + pm.range_sq.lane<2>() * error_weights.lane<2>());
-				}
+				uncor_error += dot3_s(uncor_vector * uncor_vector, error_weights);
+				samec_error += dot3_s(samec_vector * samec_vector, error_weights);
 			}
 
 			if (uncor_error < uncor_best_error)
@@ -561,43 +718,31 @@ void find_best_partitionings(
 				samec_best_errors[1] = samec_error;
 				samec_best_partitions[1] = partition;
 			}
-
-			if (!skip_two_plane)
-			{
-				if (sep_error.lane<0>() < sep_best_error)
-				{
-					sep_best_error = sep_error.lane<0>();
-					sep_best_partition = partition;
-					sep_best_component = 0;
-				}
-
-				if (sep_error.lane<1>() < sep_best_error)
-				{
-					sep_best_error = sep_error.lane<1>();
-					sep_best_partition = partition;
-					sep_best_component = 1;
-				}
-
-				if (sep_error.lane<2>() < sep_best_error)
-				{
-					sep_best_error = sep_error.lane<2>();
-					sep_best_partition = partition;
-					sep_best_component = 2;
-				}
-			}
 		}
 	}
 
-	*best_partition_uncor = uncor_best_partition;
-
-	int index = samec_best_partitions[0] != uncor_best_partition ? 0 : 1;
-	*best_partition_samec = samec_best_partitions[index];
-
-	if (best_partition_dualplane)
+	// Same parition is best for both, so use this first unconditionally
+	if (uncor_best_partition == samec_best_partitions[0])
+	{
+		best_partitions[0] = samec_best_partitions[0];
+		best_partitions[1] = samec_best_partitions[1];
+	}
+	// Uncor is best
+	else if (uncor_best_error <= samec_best_errors[0])
 	{
-		*best_partition_dualplane = (sep_best_component << PARTITION_BITS) |
-		                            (sep_best_partition);
+		best_partitions[0] = uncor_best_partition;
+		best_partitions[1] = samec_best_partitions[0];
 	}
+	// Samec is best
+	else
+	{
+		best_partitions[0] = samec_best_partitions[0];
+		best_partitions[1] = uncor_best_partition;
+	}
+
+	// Convert these back into canonical partition IDs for the rest of the codec
+	best_partitions[0] = bsd.get_raw_partition_info(partition_count, best_partitions[0]).partition_index;
+	best_partitions[1] = bsd.get_raw_partition_info(partition_count, best_partitions[1]).partition_index;
 }
 
 #endif
diff --git a/libkram/astc-encoder/astcenc_ideal_endpoints_and_weights.cpp b/libkram/astc-encoder/astcenc_ideal_endpoints_and_weights.cpp
index 6e1906cf..c19b8175 100644
--- a/libkram/astc-encoder/astcenc_ideal_endpoints_and_weights.cpp
+++ b/libkram/astc-encoder/astcenc_ideal_endpoints_and_weights.cpp
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2021 Arm Limited
+// Copyright 2011-2022 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -26,524 +26,439 @@
 #include "astcenc_internal.h"
 #include "astcenc_vecmathlib.h"
 
-#ifdef DEBUG_CAPTURE_NAN
-	#ifndef _GNU_SOURCE
-		#define _GNU_SOURCE
-	#endif
-
-	#include <fenv.h>
-#endif
-
-static void compute_endpoints_and_ideal_weights_1_comp(
-	const block_size_descriptor* bsd,
-	const partition_info* pt,
-	const imageblock* blk,
-	const error_weight_block* ewb,
-	endpoints_and_weights* ei,
+/**
+ * @brief Compute the ideal endpoints and weights for 1 color component.
+ *
+ * @param      blk         The image block color data to compress.
+ * @param      pi          The partition info for the current trial.
+ * @param[out] ei          The computed ideal endpoints and weights.
+ * @param      component   The color component to compute.
+ */
+static void compute_ideal_colors_and_weights_1_comp(
+	const image_block& blk,
+	const partition_info& pi,
+	endpoints_and_weights& ei,
 	unsigned int component
 ) {
-	int partition_count = pt->partition_count;
-	ei->ep.partition_count = partition_count;
+	unsigned int partition_count = pi.partition_count;
+	ei.ep.partition_count = partition_count;
 	promise(partition_count > 0);
 
-	int texel_count = bsd->texel_count;
+	unsigned int texel_count = blk.texel_count;
 	promise(texel_count > 0);
 
-	float lowvalues[4] { 1e10f, 1e10f, 1e10f, 1e10f };
-	float highvalues[4] { -1e10f, -1e10f, -1e10f, -1e10f };
-
-	float partition_error_scale[4];
-	float linelengths_rcp[4];
-
-	const float *error_weights = nullptr;
+	float error_weight;
 	const float* data_vr = nullptr;
 
-	assert(component < 4);
+	assert(component < BLOCK_MAX_COMPONENTS);
 	switch (component)
 	{
 	case 0:
-		error_weights = ewb->texel_weight_r;
-		data_vr = blk->data_r;
+		error_weight = blk.channel_weight.lane<0>();
+		data_vr = blk.data_r;
 		break;
 	case 1:
-		error_weights = ewb->texel_weight_g;
-		data_vr = blk->data_g;
+		error_weight = blk.channel_weight.lane<1>();
+		data_vr = blk.data_g;
 		break;
 	case 2:
-		error_weights = ewb->texel_weight_b;
-		data_vr = blk->data_b;
+		error_weight = blk.channel_weight.lane<2>();
+		data_vr = blk.data_b;
 		break;
 	default:
-		error_weights = ewb->texel_weight_a;
-		data_vr = blk->data_a;
+		assert(component == 3);
+		error_weight = blk.channel_weight.lane<3>();
+		data_vr = blk.data_a;
 		break;
 	}
 
-	for (int i = 0; i < texel_count; i++)
+	vmask4 sep_mask = vint4::lane_id() == vint4(component);
+	bool is_constant_wes { true };
+	float partition0_len_sq { 0.0f };
+
+	for (unsigned int i = 0; i < partition_count; i++)
 	{
-		if (error_weights[i] > 1e-10f)
+		float lowvalue { 1e10f };
+		float highvalue { -1e10f };
+
+		unsigned int partition_texel_count = pi.partition_texel_count[i];
+		for (unsigned int j = 0; j < partition_texel_count; j++)
 		{
-			float value = data_vr[i];
-			int partition = pt->partition_of_texel[i];
+			unsigned int tix = pi.texels_of_partition[i][j];
+			float value = data_vr[tix];
+			lowvalue = astc::min(value, lowvalue);
+			highvalue = astc::max(value, highvalue);
+		}
 
-			lowvalues[partition] = astc::min(value, lowvalues[partition]);
-			highvalues[partition] = astc::max(value, highvalues[partition]);
+		if (highvalue < lowvalue)
+		{
+			lowvalue = 0.0f;
+			highvalue = 1e-7f;
 		}
-	}
 
-	for (int i = 0; i < partition_count; i++)
-	{
-		float diff = highvalues[i] - lowvalues[i];
+		float length = highvalue - lowvalue;
+		float length_squared = length * length;
+		float scale = 1.0f / length;
 
-		if (diff < 0)
+		if (i == 0)
+		{
+			partition0_len_sq = length_squared;
+		}
+		else
 		{
-			lowvalues[i] = 0.0f;
-			highvalues[i] = 0.0f;
+			is_constant_wes = is_constant_wes && length_squared == partition0_len_sq;
 		}
 
-		diff = astc::max(diff, 1e-7f);
+		for (unsigned int j = 0; j < partition_texel_count; j++)
+		{
+			unsigned int tix = pi.texels_of_partition[i][j];
+			float value = (data_vr[tix] - lowvalue) * scale;
+			value = astc::clamp1f(value);
+
+			ei.weights[tix] = value;
+			ei.weight_error_scale[tix] = length_squared * error_weight;
+			assert(!astc::isnan(ei.weight_error_scale[tix]));
+		}
 
-		partition_error_scale[i] = diff * diff;
-		linelengths_rcp[i] = 1.0f / diff;
+		ei.ep.endpt0[i] = select(blk.data_min, vfloat4(lowvalue), sep_mask);
+		ei.ep.endpt1[i] = select(blk.data_max, vfloat4(highvalue), sep_mask);
 	}
 
-	for (int i = 0; i < texel_count; i++)
+	// Zero initialize any SIMD over-fetch
+	unsigned int texel_count_simd = round_up_to_simd_multiple_vla(texel_count);
+	for (unsigned int i = texel_count; i < texel_count_simd; i++)
 	{
-		float value = data_vr[i];
-		int partition = pt->partition_of_texel[i];
-		value -= lowvalues[partition];
-		value *= linelengths_rcp[partition];
-		value = astc::clamp1f(value);
-
-		ei->weights[i] = value;
-		ei->weight_error_scale[i] = partition_error_scale[partition] * error_weights[i];
-		assert(!astc::isnan(ei->weight_error_scale[i]));
+		ei.weights[i] = 0.0f;
+		ei.weight_error_scale[i] = 0.0f;
 	}
 
-	vmask4 sep_mask = vint4::lane_id() == vint4(component);
-	for (int i = 0; i < partition_count; i++)
-	{
-		ei->ep.endpt0[i] = select(blk->data_min, vfloat4(lowvalues[i]), sep_mask);
-		ei->ep.endpt1[i] = select(blk->data_max, vfloat4(highvalues[i]), sep_mask);
-	}
+	ei.is_constant_weight_error_scale = is_constant_wes;
 }
 
-static void compute_endpoints_and_ideal_weights_2_comp(
-	const block_size_descriptor* bsd,
-	const partition_info* pt,
-	const imageblock* blk,
-	const error_weight_block * ewb,
-	endpoints_and_weights* ei,
+/**
+ * @brief Compute the ideal endpoints and weights for 2 color components.
+ *
+ * @param      blk          The image block color data to compress.
+ * @param      pi           The partition info for the current trial.
+ * @param[out] ei           The computed ideal endpoints and weights.
+ * @param      component1   The first color component to compute.
+ * @param      component2   The second color component to compute.
+ */
+static void compute_ideal_colors_and_weights_2_comp(
+	const image_block& blk,
+	const partition_info& pi,
+	endpoints_and_weights& ei,
 	int component1,
 	int component2
 ) {
-	int partition_count = pt->partition_count;
-	ei->ep.partition_count = partition_count;
+	unsigned int partition_count = pi.partition_count;
+	ei.ep.partition_count = partition_count;
 	promise(partition_count > 0);
 
-	int texel_count = bsd->texel_count;
+	unsigned int texel_count = blk.texel_count;
 	promise(texel_count > 0);
 
-	partition_metrics pms[4];
-
-	float2 scalefactors[4];
+	partition_metrics pms[BLOCK_MAX_PARTITIONS];
 
-	const float *error_weights;
+	float error_weight;
 	const float* data_vr = nullptr;
 	const float* data_vg = nullptr;
+
 	if (component1 == 0 && component2 == 1)
 	{
-		error_weights = ewb->texel_weight_rg;
-		data_vr = blk->data_r;
-		data_vg = blk->data_g;
+		error_weight = hadd_s(blk.channel_weight.swz<0, 1>()) / 2.0f;
+
+		data_vr = blk.data_r;
+		data_vg = blk.data_g;
 	}
 	else if (component1 == 0 && component2 == 2)
 	{
-		error_weights = ewb->texel_weight_rb;
-		data_vr = blk->data_r;
-		data_vg = blk->data_b;
+		error_weight = hadd_s(blk.channel_weight.swz<0, 2>()) / 2.0f;
+
+		data_vr = blk.data_r;
+		data_vg = blk.data_b;
 	}
 	else // (component1 == 1 && component2 == 2)
 	{
-		error_weights = ewb->texel_weight_gb;
-		data_vr = blk->data_g;
-		data_vg = blk->data_b;
-	}
-
-	compute_partition_error_color_weightings(*ewb, *pt, pms);
+		assert(component1 == 1 && component2 == 2);
 
-	for (int i = 0; i < partition_count; i++)
-	{
-		float s1 = 0, s2 = 0;
-		assert(component1 < 4);
-		switch (component1)
-		{
-		case 0:
-			s1 = pms[i].color_scale.lane<0>();
-			break;
-		case 1:
-			s1 = pms[i].color_scale.lane<1>();
-			break;
-		case 2:
-			s1 = pms[i].color_scale.lane<2>();
-			break;
-		default:
-			s1 = pms[i].color_scale.lane<3>();
-			break;
-		}
+		error_weight = hadd_s(blk.channel_weight.swz<1, 2>()) / 2.0f;
 
-		assert(component2 < 4);
-		switch (component2)
-		{
-		case 0:
-			s2 = pms[i].color_scale.lane<0>();
-			break;
-		case 1:
-			s2 = pms[i].color_scale.lane<1>();
-			break;
-		case 2:
-			s2 = pms[i].color_scale.lane<2>();
-			break;
-		default:
-			s2 = pms[i].color_scale.lane<3>();
-			break;
-		}
-		scalefactors[i] = normalize(float2(s1, s2)) * 1.41421356f;
+		data_vr = blk.data_g;
+		data_vg = blk.data_b;
 	}
 
-	float lowparam[4] { 1e10f, 1e10f, 1e10f, 1e10f };
-	float highparam[4] { -1e10f, -1e10f, -1e10f, -1e10f };
-
-	float2 averages[4];
-	float2 directions[4];
+	compute_avgs_and_dirs_2_comp(pi, blk, component1, component2, pms);
 
-	line2 lines[4];
-	float scale[4];
-	float length_squared[4];
+	bool is_constant_wes { true };
+	float partition0_len_sq { 0.0f };
 
-	compute_avgs_and_dirs_2_comp(pt, blk, ewb, scalefactors, component1, component2, averages, directions);
+	vmask4 comp1_mask = vint4::lane_id() == vint4(component1);
+	vmask4 comp2_mask = vint4::lane_id() == vint4(component2);
 
-	for (int i = 0; i < partition_count; i++)
+	for (unsigned int i = 0; i < partition_count; i++)
 	{
-		float2 dir = directions[i];
-		if (dir.r + dir.g < 0.0f)
+		vfloat4 dir = pms[i].dir.swz<0, 1>();
+		if (hadd_s(dir) < 0.0f)
 		{
-			dir = float2(0.0f) - dir;
+			dir = vfloat4::zero() - dir;
 		}
 
-		lines[i].a = averages[i];
-		if (dot(dir, dir) == 0.0f)
+		line2 line { pms[i].avg.swz<0, 1>(), normalize_safe(dir, unit2()) };
+		float lowparam { 1e10f };
+		float highparam { -1e10f };
+
+		unsigned int partition_texel_count = pi.partition_texel_count[i];
+		for (unsigned int j = 0; j < partition_texel_count; j++)
 		{
-			lines[i].b = normalize(float2(1.0f));
+			unsigned int tix = pi.texels_of_partition[i][j];
+			vfloat4 point = vfloat2(data_vr[tix], data_vg[tix]);
+			float param = dot_s(point - line.a, line.b);
+			ei.weights[tix] = param;
+
+			lowparam = astc::min(param, lowparam);
+			highparam = astc::max(param, highparam);
 		}
-		else
+
+		// It is possible for a uniform-color partition to produce length=0;
+		// this causes NaN issues so set to small value to avoid this problem
+		if (highparam < lowparam)
 		{
-			lines[i].b = normalize(dir);
+			lowparam = 0.0f;
+			highparam = 1e-7f;
 		}
-	}
 
-	for (int i = 0; i < texel_count; i++)
-	{
-		if (error_weights[i] > 1e-10f)
+		float length = highparam - lowparam;
+		float length_squared = length * length;
+		float scale = 1.0f / length;
+
+		if (i == 0)
 		{
-			int partition = pt->partition_of_texel[i];
-			float2 point = float2(data_vr[i], data_vg[i]) * scalefactors[partition];
-			line2 l = lines[partition];
-			float param = dot(point - l.a, l.b);
-			ei->weights[i] = param;
-
-			lowparam[partition] = astc::min(param, lowparam[partition]);
-			highparam[partition] = astc::max(param, highparam[partition]);
+			partition0_len_sq = length_squared;
 		}
 		else
 		{
-			ei->weights[i] = -1e38f;
+			is_constant_wes = is_constant_wes && length_squared == partition0_len_sq;
 		}
-	}
-
-	float2 lowvalues[4];
-	float2 highvalues[4];
 
-	for (int i = 0; i < partition_count; i++)
-	{
-		float length = highparam[i] - lowparam[i];
-		if (length < 0.0f)			// case for when none of the texels had any weight
+		for (unsigned int j = 0; j < partition_texel_count; j++)
 		{
-			lowparam[i] = 0.0f;
-			highparam[i] = 1e-7f;
-		}
+			unsigned int tix = pi.texels_of_partition[i][j];
+			float idx = (ei.weights[tix] - lowparam) * scale;
+			idx = astc::clamp1f(idx);
 
-		// it is possible for a uniform-color partition to produce length=0; this
-		// causes NaN-production and NaN-propagation later on. Set length to
-		// a small value to avoid this problem.
-		length = astc::max(length, 1e-7f);
-		length_squared[i] = length * length;
-		scale[i] = 1.0f / length;
-
-		float2 ep0 = lines[i].a + lines[i].b * lowparam[i];
-		float2 ep1 = lines[i].a + lines[i].b * highparam[i];
+			ei.weights[tix] = idx;
+			ei.weight_error_scale[tix] = length_squared * error_weight;
+			assert(!astc::isnan(ei.weight_error_scale[tix]));
+		}
 
-		ep0.r /= scalefactors[i].r;
-		ep0.g /= scalefactors[i].g;
+		vfloat4 lowvalue = line.a + line.b * lowparam;
+		vfloat4 highvalue = line.a + line.b * highparam;
 
-		ep1.r /= scalefactors[i].r;
-		ep1.g /= scalefactors[i].g;
+		vfloat4 ep0 = select(blk.data_min, vfloat4(lowvalue.lane<0>()), comp1_mask);
+		vfloat4 ep1 = select(blk.data_max, vfloat4(highvalue.lane<0>()), comp1_mask);
 
-		lowvalues[i] = ep0;
-		highvalues[i] = ep1;
+		ei.ep.endpt0[i] = select(ep0, vfloat4(lowvalue.lane<1>()), comp2_mask);
+		ei.ep.endpt1[i] = select(ep1, vfloat4(highvalue.lane<1>()), comp2_mask);
 	}
 
-	vmask4 comp1_mask = vint4::lane_id() == vint4(component1);
-	vmask4 comp2_mask = vint4::lane_id() == vint4(component2);
-	for (int i = 0; i < partition_count; i++)
+	// Zero initialize any SIMD over-fetch
+	unsigned int texel_count_simd = round_up_to_simd_multiple_vla(texel_count);
+	for (unsigned int i = texel_count; i < texel_count_simd; i++)
 	{
-		vfloat4 ep0 = select(blk->data_min, vfloat4(lowvalues[i].r), comp1_mask);
-		vfloat4 ep1 = select(blk->data_max, vfloat4(highvalues[i].r), comp1_mask);
-
-		ei->ep.endpt0[i] = select(ep0, vfloat4(lowvalues[i].g), comp2_mask);
-		ei->ep.endpt1[i] = select(ep1, vfloat4(highvalues[i].g), comp2_mask);
+		ei.weights[i] = 0.0f;
+		ei.weight_error_scale[i] = 0.0f;
 	}
 
-	for (int i = 0; i < texel_count; i++)
-	{
-		int partition = pt->partition_of_texel[i];
-		float idx = (ei->weights[i] - lowparam[partition]) * scale[partition];
-		idx = astc::clamp1f(idx);
-
-		ei->weights[i] = idx;
-		ei->weight_error_scale[i] = length_squared[partition] * error_weights[i];
-		assert(!astc::isnan(ei->weight_error_scale[i]));
-	}
+	ei.is_constant_weight_error_scale = is_constant_wes;
 }
 
-static void compute_endpoints_and_ideal_weights_3_comp(
-	const block_size_descriptor* bsd,
-	const partition_info* pt,
-	const imageblock* blk,
-	const error_weight_block* ewb,
-	endpoints_and_weights* ei,
-	int omitted_component
+/**
+ * @brief Compute the ideal endpoints and weights for 3 color components.
+ *
+ * @param      blk                 The image block color data to compress.
+ * @param      pi                  The partition info for the current trial.
+ * @param[out] ei                  The computed ideal endpoints and weights.
+ * @param      omitted_component   The color component excluded from the calculation.
+ */
+static void compute_ideal_colors_and_weights_3_comp(
+	const image_block& blk,
+	const partition_info& pi,
+	endpoints_and_weights& ei,
+	unsigned int omitted_component
 ) {
-	int partition_count = pt->partition_count;
-	ei->ep.partition_count = partition_count;
+	unsigned int partition_count = pi.partition_count;
+	ei.ep.partition_count = partition_count;
 	promise(partition_count > 0);
 
-	int texel_count= bsd->texel_count;
+	unsigned int texel_count = blk.texel_count;
 	promise(texel_count > 0);
 
-	partition_metrics pms[4];
+	partition_metrics pms[BLOCK_MAX_PARTITIONS];
 
-	const float *error_weights;
+	float error_weight;
 	const float* data_vr = nullptr;
 	const float* data_vg = nullptr;
 	const float* data_vb = nullptr;
 	if (omitted_component == 0)
 	{
-		error_weights = ewb->texel_weight_gba;
-		data_vr = blk->data_g;
-		data_vg = blk->data_b;
-		data_vb = blk->data_a;
+		error_weight = hadd_s(blk.channel_weight.swz<0, 1, 2>()) / 3.0f;
+		data_vr = blk.data_g;
+		data_vg = blk.data_b;
+		data_vb = blk.data_a;
 	}
 	else if (omitted_component == 1)
 	{
-		error_weights = ewb->texel_weight_rba;
-		data_vr = blk->data_r;
-		data_vg = blk->data_b;
-		data_vb = blk->data_a;
+		error_weight = hadd_s(blk.channel_weight.swz<0, 2, 3>()) / 3.0f;
+		data_vr = blk.data_r;
+		data_vg = blk.data_b;
+		data_vb = blk.data_a;
 	}
 	else if (omitted_component == 2)
 	{
-		error_weights = ewb->texel_weight_rga;
-		data_vr = blk->data_r;
-		data_vg = blk->data_g;
-		data_vb = blk->data_a;
+		error_weight = hadd_s(blk.channel_weight.swz<0, 1, 3>()) / 3.0f;
+		data_vr = blk.data_r;
+		data_vg = blk.data_g;
+		data_vb = blk.data_a;
 	}
 	else
 	{
-		error_weights = ewb->texel_weight_rgb;
-		data_vr = blk->data_r;
-		data_vg = blk->data_g;
-		data_vb = blk->data_b;
-	}
+		assert(omitted_component == 3);
 
-	compute_partition_error_color_weightings(*ewb, *pt, pms);
-
-	for (int i = 0; i < partition_count; i++)
-	{
-		float s1 = 0, s2 = 0, s3 = 0;
-		assert(omitted_component < 4);
-		switch (omitted_component)
-		{
-		case 0:
-			s1 = pms[i].color_scale.lane<1>();
-			s2 = pms[i].color_scale.lane<2>();
-			s3 = pms[i].color_scale.lane<3>();
-			break;
-		case 1:
-			s1 = pms[i].color_scale.lane<0>();
-			s2 = pms[i].color_scale.lane<2>();
-			s3 = pms[i].color_scale.lane<3>();
-			break;
-		case 2:
-			s1 = pms[i].color_scale.lane<0>();
-			s2 = pms[i].color_scale.lane<1>();
-			s3 = pms[i].color_scale.lane<3>();
-			break;
-		default:
-			s1 = pms[i].color_scale.lane<0>();
-			s2 = pms[i].color_scale.lane<1>();
-			s3 = pms[i].color_scale.lane<2>();
-			break;
-		}
-
-		pms[i].color_scale = normalize(vfloat4(s1, s2, s3, 0.0f)) * 1.73205080f;
+		error_weight = hadd_s(blk.channel_weight.swz<0, 1, 2>()) / 3.0f;
+		data_vr = blk.data_r;
+		data_vg = blk.data_g;
+		data_vb = blk.data_b;
 	}
 
-	float lowparam[4] { 1e10f, 1e10f, 1e10f, 1e10f };
-	float highparam[4] { -1e10f, -1e10f, -1e10f, -1e10f };
-
-	line3 lines[4];
-	float scale[4];
-	float length_squared[4];
+	compute_avgs_and_dirs_3_comp(pi, blk, omitted_component, pms);
 
-	compute_avgs_and_dirs_3_comp(pt, blk, ewb, omitted_component, pms);
+	bool is_constant_wes { true };
+	float partition0_len_sq { 0.0f };
 
-	for (int i = 0; i < partition_count; i++)
+	for (unsigned int i = 0; i < partition_count; i++)
 	{
 		vfloat4 dir = pms[i].dir;
 		if (hadd_rgb_s(dir) < 0.0f)
 		{
-			dir = vfloat4(0.0f) - dir;
+			dir = vfloat4::zero() - dir;
 		}
 
-		lines[i].a = pms[i].avg;
-		if (dot3_s(dir, dir) == 0.0f)
+		line3 line { pms[i].avg, normalize_safe(dir, unit3()) };
+		float lowparam { 1e10f };
+		float highparam { -1e10f };
+
+		unsigned int partition_texel_count = pi.partition_texel_count[i];
+		for (unsigned int j = 0; j < partition_texel_count; j++)
 		{
-			lines[i].b = normalize(vfloat4(1.0f, 1.0f, 1.0f, 0.0f));
+			unsigned int tix = pi.texels_of_partition[i][j];
+			vfloat4 point = vfloat3(data_vr[tix], data_vg[tix], data_vb[tix]);
+			float param = dot3_s(point - line.a, line.b);
+			ei.weights[tix] = param;
+
+			lowparam = astc::min(param, lowparam);
+			highparam = astc::max(param, highparam);
 		}
-		else
+
+		// It is possible for a uniform-color partition to produce length=0;
+		// this causes NaN issues so set to small value to avoid this problem
+		if (highparam < lowparam)
 		{
-			lines[i].b = normalize(dir);
+			lowparam = 0.0f;
+			highparam = 1e-7f;
 		}
-	}
 
-	for (int i = 0; i < texel_count; i++)
-	{
-		if (error_weights[i] > 1e-10f)
+		float length = highparam - lowparam;
+		float length_squared = length * length;
+		float scale = 1.0f / length;
+
+		if (i == 0)
 		{
-			int partition = pt->partition_of_texel[i];
-			vfloat4 point = vfloat4(data_vr[i], data_vg[i], data_vb[i], 0.0f) * pms[partition].color_scale;
-			line3 l = lines[partition];
-			float param = dot3_s(point - l.a, l.b);
-			ei->weights[i] = param;
-
-			lowparam[partition] = astc::min(param, lowparam[partition]);
-			highparam[partition] = astc::max(param, highparam[partition]);
+			partition0_len_sq = length_squared;
 		}
 		else
 		{
-			ei->weights[i] = -1e38f;
+			is_constant_wes = is_constant_wes && length_squared == partition0_len_sq;
 		}
-	}
 
-	for (int i = 0; i < partition_count; i++)
-	{
-		float length = highparam[i] - lowparam[i];
-		if (length < 0)			// case for when none of the texels had any weight
+		for (unsigned int j = 0; j < partition_texel_count; j++)
 		{
-			lowparam[i] = 0.0f;
-			highparam[i] = 1e-7f;
-		}
+			unsigned int tix = pi.texels_of_partition[i][j];
+			float idx = (ei.weights[tix] - lowparam) * scale;
+			idx = astc::clamp1f(idx);
 
-		// it is possible for a uniform-color partition to produce length=0; this
-		// causes NaN-production and NaN-propagation later on. Set length to
-		// a small value to avoid this problem.
-		length = astc::max(length, 1e-7f);
-
-		length_squared[i] = length * length;
-		scale[i] = 1.0f / length;
-
-		vfloat4 ep0 = lines[i].a + lines[i].b * lowparam[i];
-		vfloat4 ep1 = lines[i].a + lines[i].b * highparam[i];
+			ei.weights[tix] = idx;
+			ei.weight_error_scale[tix] = length_squared * error_weight;
+			assert(!astc::isnan(ei.weight_error_scale[tix]));
+		}
 
-		ep0 = ep0 / pms[i].color_scale;
-		ep1 = ep1 / pms[i].color_scale;
+		vfloat4 ep0 = line.a + line.b * lowparam;
+		vfloat4 ep1 = line.a + line.b * highparam;
 
-		vfloat4 bmin = blk->data_min;
-		vfloat4 bmax = blk->data_max;
+		vfloat4 bmin = blk.data_min;
+		vfloat4 bmax = blk.data_max;
 
-		// TODO: Probably a programmatic vector permute we can do here ...
-		assert(omitted_component < 4);
+		assert(omitted_component < BLOCK_MAX_COMPONENTS);
 		switch (omitted_component)
 		{
 			case 0:
-				ei->ep.endpt0[i] = vfloat4(bmin.lane<0>(), ep0.lane<0>(), ep0.lane<1>(), ep0.lane<2>());
-				ei->ep.endpt1[i] = vfloat4(bmax.lane<0>(), ep1.lane<0>(), ep1.lane<1>(), ep1.lane<2>());
+				ei.ep.endpt0[i] = vfloat4(bmin.lane<0>(), ep0.lane<0>(), ep0.lane<1>(), ep0.lane<2>());
+				ei.ep.endpt1[i] = vfloat4(bmax.lane<0>(), ep1.lane<0>(), ep1.lane<1>(), ep1.lane<2>());
 				break;
 			case 1:
-				ei->ep.endpt0[i] = vfloat4(ep0.lane<0>(), bmin.lane<1>(), ep0.lane<1>(), ep0.lane<2>());
-				ei->ep.endpt1[i] = vfloat4(ep1.lane<0>(), bmax.lane<1>(), ep1.lane<1>(), ep1.lane<2>());
+				ei.ep.endpt0[i] = vfloat4(ep0.lane<0>(), bmin.lane<1>(), ep0.lane<1>(), ep0.lane<2>());
+				ei.ep.endpt1[i] = vfloat4(ep1.lane<0>(), bmax.lane<1>(), ep1.lane<1>(), ep1.lane<2>());
 				break;
 			case 2:
-				ei->ep.endpt0[i] = vfloat4(ep0.lane<0>(), ep0.lane<1>(), bmin.lane<2>(), ep0.lane<2>());
-				ei->ep.endpt1[i] = vfloat4(ep1.lane<0>(), ep1.lane<1>(), bmax.lane<2>(), ep1.lane<2>());
+				ei.ep.endpt0[i] = vfloat4(ep0.lane<0>(), ep0.lane<1>(), bmin.lane<2>(), ep0.lane<2>());
+				ei.ep.endpt1[i] = vfloat4(ep1.lane<0>(), ep1.lane<1>(), bmax.lane<2>(), ep1.lane<2>());
 				break;
 			default:
-				ei->ep.endpt0[i] = vfloat4(ep0.lane<0>(), ep0.lane<1>(), ep0.lane<2>(), bmin.lane<3>());
-				ei->ep.endpt1[i] = vfloat4(ep1.lane<0>(), ep1.lane<1>(), ep1.lane<2>(), bmax.lane<3>());
+				ei.ep.endpt0[i] = vfloat4(ep0.lane<0>(), ep0.lane<1>(), ep0.lane<2>(), bmin.lane<3>());
+				ei.ep.endpt1[i] = vfloat4(ep1.lane<0>(), ep1.lane<1>(), ep1.lane<2>(), bmax.lane<3>());
 				break;
 		}
 	}
 
-	for (int i = 0; i < texel_count; i++)
+	// Zero initialize any SIMD over-fetch
+	unsigned int texel_count_simd = round_up_to_simd_multiple_vla(texel_count);
+	for (unsigned int i = texel_count; i < texel_count_simd; i++)
 	{
-		int partition = pt->partition_of_texel[i];
-		float idx = (ei->weights[i] - lowparam[partition]) * scale[partition];
-		idx = astc::clamp1f(idx);
-
-		ei->weights[i] = idx;
-		ei->weight_error_scale[i] = length_squared[partition] * error_weights[i];
-		assert(!astc::isnan(ei->weight_error_scale[i]));
+		ei.weights[i] = 0.0f;
+		ei.weight_error_scale[i] = 0.0f;
 	}
+
+	ei.is_constant_weight_error_scale = is_constant_wes;
 }
 
-static void compute_endpoints_and_ideal_weights_4_comp(
-	const block_size_descriptor* bsd,
-	const partition_info* pt,
-	const imageblock* blk,
-	const error_weight_block* ewb,
-	endpoints_and_weights* ei
+/**
+ * @brief Compute the ideal endpoints and weights for 4 color components.
+ *
+ * @param      blk   The image block color data to compress.
+ * @param      pi    The partition info for the current trial.
+ * @param[out] ei    The computed ideal endpoints and weights.
+ */
+static void compute_ideal_colors_and_weights_4_comp(
+	const image_block& blk,
+	const partition_info& pi,
+	endpoints_and_weights& ei
 ) {
-	const float *error_weights = ewb->texel_weight;
+	const float error_weight = hadd_s(blk.channel_weight) / 4.0f;
 
-	int partition_count = pt->partition_count;
+	unsigned int partition_count = pi.partition_count;
 
-	int texel_count= bsd->texel_count;
+	unsigned int texel_count = blk.texel_count;
 	promise(texel_count > 0);
 	promise(partition_count > 0);
 
-	float lowparam[4] { 1e10, 1e10, 1e10, 1e10 };
-	float highparam[4] {  -1e10,  -1e10,  -1e10, -1e10 };
-
-	line4 lines[4];
-
-	float scale[4];
-	float length_squared[4];
+	partition_metrics pms[BLOCK_MAX_PARTITIONS];
 
-	partition_metrics pms[4];
+	compute_avgs_and_dirs_4_comp(pi, blk, pms);
 
-	compute_partition_error_color_weightings(*ewb, *pt, pms);
+	bool is_constant_wes { true };
+	float partition0_len_sq { 0.0f };
 
-	for (int i = 0; i < partition_count; i++)
-	{
-		pms[i].color_scale = normalize(pms[i].color_scale) * 2.0f;
-	}
-
-	compute_avgs_and_dirs_4_comp(pt, blk, ewb, pms);
-
-	// if the direction-vector ends up pointing from light to dark, FLIP IT!
-	// this will make the first endpoint the darkest one.
-	for (int i = 0; i < partition_count; i++)
+	for (unsigned int i = 0; i < partition_count; i++)
 	{
 		vfloat4 dir = pms[i].dir;
 		if (hadd_rgb_s(dir) < 0.0f)
@@ -551,276 +466,261 @@ static void compute_endpoints_and_ideal_weights_4_comp(
 			dir = vfloat4::zero() - dir;
 		}
 
-		lines[i].a = pms[i].avg;
-		if (dot_s(dir, dir) == 0.0f)
-		{
-			lines[i].b = normalize(vfloat4(1.0f));
-		}
-		else
+		line4 line { pms[i].avg, normalize_safe(dir, unit4()) };
+		float lowparam { 1e10f };
+		float highparam { -1e10f };
+
+		unsigned int partition_texel_count = pi.partition_texel_count[i];
+		for (unsigned int j = 0; j < partition_texel_count; j++)
 		{
-			lines[i].b = normalize(dir);
+			unsigned int tix = pi.texels_of_partition[i][j];
+			vfloat4 point = blk.texel(tix);
+			float param = dot_s(point - line.a, line.b);
+			ei.weights[tix] = param;
+
+			lowparam = astc::min(param, lowparam);
+			highparam = astc::max(param, highparam);
 		}
-	}
 
-	for (int i = 0; i < texel_count; i++)
-	{
-		if (error_weights[i] > 1e-10f)
+		// It is possible for a uniform-color partition to produce length=0;
+		// this causes NaN issues so set to small value to avoid this problem
+		if (highparam < lowparam)
 		{
-			int partition = pt->partition_of_texel[i];
-
-			vfloat4 point = blk->texel(i) * pms[partition].color_scale;
-			line4 l = lines[partition];
+			lowparam = 0.0f;
+			highparam = 1e-7f;
+		}
 
-			float param = dot_s(point - l.a, l.b);
-			ei->weights[i] = param;
+		float length = highparam - lowparam;
+		float length_squared = length * length;
+		float scale = 1.0f / length;
 
-			lowparam[partition] = astc::min(param, lowparam[partition]);
-			highparam[partition] = astc::max(param, highparam[partition]);
-		}
-		else
+		if (i == 0)
 		{
-			ei->weights[i] = -1e38f;
+			partition0_len_sq = length_squared;
 		}
-	}
-
-	for (int i = 0; i < partition_count; i++)
-	{
-		float length = highparam[i] - lowparam[i];
-		if (length < 0)
+		else
 		{
-			lowparam[i] = 0.0f;
-			highparam[i] = 1e-7f;
+			is_constant_wes = is_constant_wes && length_squared == partition0_len_sq;
 		}
 
-		// it is possible for a uniform-color partition to produce length=0; this
-		// causes NaN-production and NaN-propagation later on. Set length to
-		// a small value to avoid this problem.
-		length = astc::max(length, 1e-7f);
+		ei.ep.endpt0[i] = line.a + line.b * lowparam;
+		ei.ep.endpt1[i] = line.a + line.b * highparam;
 
-		length_squared[i] = length * length;
-		scale[i] = 1.0f / length;
-
-		vfloat4 ep0 = lines[i].a + lines[i].b * lowparam[i];
-		vfloat4 ep1 = lines[i].a + lines[i].b * highparam[i];
+		for (unsigned int j = 0; j < partition_texel_count; j++)
+		{
+			unsigned int tix = pi.texels_of_partition[i][j];
+			float idx = (ei.weights[tix] - lowparam) * scale;
+			idx = astc::clamp1f(idx);
 
-		ei->ep.endpt0[i] = ep0 / pms[i].color_scale;
-		ei->ep.endpt1[i] = ep1 / pms[i].color_scale;
+			ei.weights[tix] = idx;
+			ei.weight_error_scale[tix] = length_squared * error_weight;
+			assert(!astc::isnan(ei.weight_error_scale[tix]));
+		}
 	}
 
-	for (int i = 0; i < texel_count; i++)
+	// Zero initialize any SIMD over-fetch
+	unsigned int texel_count_simd = round_up_to_simd_multiple_vla(texel_count);
+	for (unsigned int i = texel_count; i < texel_count_simd; i++)
 	{
-		int partition = pt->partition_of_texel[i];
-		float idx = (ei->weights[i] - lowparam[partition]) * scale[partition];
-		idx = astc::clamp1f(idx);
-
-		ei->weights[i] = idx;
-		ei->weight_error_scale[i] = error_weights[i] * length_squared[partition];
-		assert(!astc::isnan(ei->weight_error_scale[i]));
+		ei.weights[i] = 0.0f;
+		ei.weight_error_scale[i] = 0.0f;
 	}
-}
 
-/*
-	For a given partitioning, compute: for each partition, the ideal endpoint colors;
-	these define a color line for the partition. for each pixel, the ideal position of the pixel on the partition's
-	color line. for each pixel, the length of the color line.
+	ei.is_constant_weight_error_scale = is_constant_wes;
+}
 
-	These data allow us to assess the error introduced by removing and quantizing the per-pixel weights.
- */
-void compute_endpoints_and_ideal_weights_1_plane(
-	const block_size_descriptor* bsd,
-	const partition_info* pt,
-	const imageblock* blk,
-	const error_weight_block* ewb,
-	endpoints_and_weights* ei
+/* See header for documentation. */
+void compute_ideal_colors_and_weights_1plane(
+	const image_block& blk,
+	const partition_info& pi,
+	endpoints_and_weights& ei
 ) {
-	int uses_alpha = imageblock_uses_alpha(blk);
+	bool uses_alpha = !blk.is_constant_channel(3);
+
 	if (uses_alpha)
 	{
-		compute_endpoints_and_ideal_weights_4_comp(bsd, pt, blk, ewb, ei);
+		compute_ideal_colors_and_weights_4_comp(blk, pi, ei);
 	}
 	else
 	{
-		compute_endpoints_and_ideal_weights_3_comp(bsd, pt, blk, ewb, ei, 3);
+		compute_ideal_colors_and_weights_3_comp(blk, pi, ei, 3);
 	}
 }
 
-void compute_endpoints_and_ideal_weights_2_planes(
-	const block_size_descriptor* bsd,
-	const partition_info* pt,
-	const imageblock* blk,
-	const error_weight_block* ewb,
-	int separate_component,
-	endpoints_and_weights* ei1,
-	endpoints_and_weights* ei2
+/* See header for documentation. */
+void compute_ideal_colors_and_weights_2planes(
+	const block_size_descriptor& bsd,
+	const image_block& blk,
+	unsigned int plane2_component,
+	endpoints_and_weights& ei1,
+	endpoints_and_weights& ei2
 ) {
-	int uses_alpha = imageblock_uses_alpha(blk);
+	const auto& pi = bsd.get_partition_info(1, 0);
+	bool uses_alpha = !blk.is_constant_channel(3);
 
-	assert(separate_component < 4);
-	switch (separate_component)
+	assert(plane2_component < BLOCK_MAX_COMPONENTS);
+	switch (plane2_component)
 	{
-	case 0: // separate weights for red
+	case 0: // Separate weights for red
 		if (uses_alpha)
 		{
-			compute_endpoints_and_ideal_weights_3_comp(bsd, pt, blk, ewb, ei1, 0);
+			compute_ideal_colors_and_weights_3_comp(blk, pi, ei1, 0);
 		}
 		else
 		{
-			compute_endpoints_and_ideal_weights_2_comp(bsd, pt, blk, ewb, ei1, 1, 2);
+			compute_ideal_colors_and_weights_2_comp(blk, pi, ei1, 1, 2);
 		}
-		compute_endpoints_and_ideal_weights_1_comp(bsd, pt, blk, ewb, ei2, 0);
+		compute_ideal_colors_and_weights_1_comp(blk, pi, ei2, 0);
 		break;
 
-	case 1: // separate weights for green
+	case 1: // Separate weights for green
 		if (uses_alpha)
 		{
-			compute_endpoints_and_ideal_weights_3_comp(bsd, pt, blk, ewb, ei1, 1);
+			compute_ideal_colors_and_weights_3_comp(blk, pi, ei1, 1);
 		}
 		else
 		{
-			compute_endpoints_and_ideal_weights_2_comp(bsd, pt, blk, ewb, ei1, 0, 2);
+			compute_ideal_colors_and_weights_2_comp(blk, pi, ei1, 0, 2);
 		}
-		compute_endpoints_and_ideal_weights_1_comp(bsd, pt, blk, ewb, ei2, 1);
+		compute_ideal_colors_and_weights_1_comp(blk, pi, ei2, 1);
 		break;
 
-	case 2: // separate weights for blue
+	case 2: // Separate weights for blue
 		if (uses_alpha)
 		{
-			compute_endpoints_and_ideal_weights_3_comp(bsd, pt, blk, ewb, ei1, 2);
+			compute_ideal_colors_and_weights_3_comp(blk, pi, ei1, 2);
 		}
 		else
 		{
-			compute_endpoints_and_ideal_weights_2_comp(bsd, pt, blk, ewb, ei1, 0, 1);
+			compute_ideal_colors_and_weights_2_comp(blk, pi, ei1, 0, 1);
 		}
-		compute_endpoints_and_ideal_weights_1_comp(bsd, pt, blk, ewb, ei2, 2);
+		compute_ideal_colors_and_weights_1_comp(blk, pi, ei2, 2);
 		break;
 
-	default: // separate weights for alpha
+	default: // Separate weights for alpha
 		assert(uses_alpha);
-		compute_endpoints_and_ideal_weights_3_comp(bsd, pt, blk, ewb, ei1, 3);
-		compute_endpoints_and_ideal_weights_1_comp(bsd, pt, blk, ewb, ei2, 3);
+		compute_ideal_colors_and_weights_3_comp(blk, pi, ei1, 3);
+		compute_ideal_colors_and_weights_1_comp(blk, pi, ei2, 3);
 		break;
 	}
 }
 
-/*
-   After having computed ideal weights for the case where a weight exists for
-   every texel, we want to compute the ideal weights for the case where weights
-   exist only for some texels.
-
-   We do this with a steepest-descent grid solver; this works as follows:
-
-   * First, for each actual weight, perform a weighted averaging based on the
-     texels affected by the weight.
-   * Then, set step size to <some initial value>
-   * Then, repeat:
-		1: First, compute for each weight how much the error will change
-		   if we change the weight by an infinitesimal amount.
-		2: This produces a vector that points the direction we should step in.
-		   Normalize this vector.
-		3: Perform a step
-		4: Check if the step actually improved the error. If it did, perform
-		   another step in the same direction; repeat until error no longer
-		   improves. If the *first* step did not improve error, then we halve
-		   the step size.
-		5: If the step size dropped down below <some threshold value>,
-		   then we quit, else we go back to #1.
-
-   Subroutines: one routine to apply a step and compute the step's effect on
-   the error one routine to compute the error change of an infinitesimal
-   weight change
-
-   Data structures needed:
-   For every decimation pattern, we need:
-   * For each weight, a list of <texel, weight> tuples that tell which texels
-     the weight influences.
-   * For each texel, a list of <texel, weight> tuples that tell which weights
-     go into a given texel.
-*/
-
-float compute_error_of_weight_set(
-	const endpoints_and_weights* eai,
-	const decimation_table* dt,
-	const float* weights
+/* See header for documentation. */
+float compute_error_of_weight_set_1plane(
+	const endpoints_and_weights& eai,
+	const decimation_info& di,
+	const float* dec_weight_quant_uvalue
 ) {
 	vfloat4 error_summav = vfloat4::zero();
 	float error_summa = 0.0f;
-	int texel_count = dt->texel_count;
+	unsigned int texel_count = di.texel_count;
+
+	bool is_decimated = di.texel_count != di.weight_count;
+
+	// Process SIMD-width chunks, safe to over-fetch - the extra space is zero initialized
+	if (is_decimated)
+	{
+		for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
+		{
+			// Compute the bilinear interpolation of the decimated weight grid
+			vfloat current_values = bilinear_infill_vla(di, dec_weight_quant_uvalue, i);
 
-	int i = 0;
+			// Compute the error between the computed value and the ideal weight
+			vfloat actual_values = loada(eai.weights + i);
+			vfloat diff = current_values - actual_values;
+			vfloat significance = loada(eai.weight_error_scale + i);
+			vfloat error = diff * diff * significance;
 
-	// Process SIMD-width texel coordinates at at time while we can
-	int clipped_texel_count = round_down_to_simd_multiple_vla(texel_count);
-	for (/* */; i < clipped_texel_count; i += ASTCENC_SIMD_WIDTH)
+			haccumulate(error_summav, error);
+		}
+	}
+	else
 	{
-		// Load the bilinear filter texel weight indexes
-		vint weight_idx0 = vint(&(dt->texel_weights_4t[0][i]));
-		vint weight_idx1 = vint(&(dt->texel_weights_4t[1][i]));
-		vint weight_idx2 = vint(&(dt->texel_weights_4t[2][i]));
-		vint weight_idx3 = vint(&(dt->texel_weights_4t[3][i]));
-
-		// Load the bilinear filter texel weights
-		vfloat weight_val0 = gatherf(weights, weight_idx0);
-		vfloat weight_val1 = gatherf(weights, weight_idx1);
-		vfloat weight_val2 = gatherf(weights, weight_idx2);
-		vfloat weight_val3 = gatherf(weights, weight_idx3);
-
-		// Load the weight contributions for each texel
-		// TODO: Should we rename this dt->texel_weights_float field?
-		vfloat tex_weight_float0 = loada(&(dt->texel_weights_float_4t[0][i]));
-		vfloat tex_weight_float1 = loada(&(dt->texel_weights_float_4t[1][i]));
-		vfloat tex_weight_float2 = loada(&(dt->texel_weights_float_4t[2][i]));
-		vfloat tex_weight_float3 = loada(&(dt->texel_weights_float_4t[3][i]));
-
-		// Compute the bilinear interpolation
-		vfloat current_values = (weight_val0 * tex_weight_float0 +
-		                         weight_val1 * tex_weight_float1) +
-		                        (weight_val2 * tex_weight_float2 +
-		                         weight_val3 * tex_weight_float3);
-
-		// Compute the error between the computed value and the ideal weight
-		vfloat actual_values = loada(&(eai->weights[i]));
-		vfloat diff = current_values - actual_values;
-		vfloat significance = loada(&(eai->weight_error_scale[i]));
-		vfloat error = diff * diff * significance;
-
-		haccumulate(error_summav, error);
+		for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
+		{
+			// Load the weight set directly, without interpolation
+			vfloat current_values = loada(dec_weight_quant_uvalue + i);
+
+			// Compute the error between the computed value and the ideal weight
+			vfloat actual_values = loada(eai.weights + i);
+			vfloat diff = current_values - actual_values;
+			vfloat significance = loada(eai.weight_error_scale + i);
+			vfloat error = diff * diff * significance;
+
+			haccumulate(error_summav, error);
+		}
 	}
 
-	// Loop tail
-	// Error is buffered and accumulated in blocks of 4 to ensure that
-	// the partial sums added to the accumulator are invariant with the
-	// vector implementation, irrespective of vector size ...
-	alignas(16) float errorsum_tmp[4] { 0 };
-	for (/* */; i < texel_count; i++)
+	// Resolve the final scalar accumulator sum
+	haccumulate(error_summa, error_summav);
+
+	return error_summa;
+}
+
+/* See header for documentation. */
+float compute_error_of_weight_set_2planes(
+	const endpoints_and_weights& eai1,
+	const endpoints_and_weights& eai2,
+	const decimation_info& di,
+	const float* dec_weight_quant_uvalue_plane1,
+	const float* dec_weight_quant_uvalue_plane2
+) {
+	vfloat4 error_summav = vfloat4::zero();
+	float error_summa = 0.0f;
+	unsigned int texel_count = di.texel_count;
+	bool is_decimated = di.texel_count != di.weight_count;
+
+	// Process SIMD-width chunks, safe to over-fetch - the extra space is zero initialized
+	if (is_decimated)
 	{
-		// This isn't the ideal access pattern, but the cache lines are probably
-		// already in the cache due to the vector loop above, so go with it ...
-		float current_value = (weights[dt->texel_weights_4t[0][i]] * dt->texel_weights_float_4t[0][i] +
-		                       weights[dt->texel_weights_4t[1][i]] * dt->texel_weights_float_4t[1][i]) +
-		                      (weights[dt->texel_weights_4t[2][i]] * dt->texel_weights_float_4t[2][i] +
-		                       weights[dt->texel_weights_4t[3][i]] * dt->texel_weights_float_4t[3][i]);
-
-		float valuedif = current_value - eai->weights[i];
-		float error = valuedif * valuedif * eai->weight_error_scale[i];
-
-		// Accumulate error sum in the temporary array
-		int error_index = i & 0x3;
-		errorsum_tmp[error_index] = error;
-
-#if ASTCENC_SIMD_WIDTH == 8
-		// Zero the temporary staging buffer every 4 items unless last. Note
-		// that this block can only trigger for 6x5 blocks, all other partials
-		// tails are shorter than 4 ...
-		if ((i & 0x7) == 0x03)
+		for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
 		{
-			haccumulate(error_summav, vfloat4::loada(errorsum_tmp));
- 			storea(vfloat4::zero(), errorsum_tmp);
+			// Plane 1
+			// Compute the bilinear interpolation of the decimated weight grid
+			vfloat current_values1 = bilinear_infill_vla(di, dec_weight_quant_uvalue_plane1, i);
+
+			// Compute the error between the computed value and the ideal weight
+			vfloat actual_values1 = loada(eai1.weights + i);
+			vfloat diff = current_values1 - actual_values1;
+			vfloat error1 = diff * diff * loada(eai1.weight_error_scale + i);
+
+			// Plane 2
+			// Compute the bilinear interpolation of the decimated weight grid
+			vfloat current_values2 = bilinear_infill_vla(di, dec_weight_quant_uvalue_plane2, i);
+
+			// Compute the error between the computed value and the ideal weight
+			vfloat actual_values2 = loada(eai2.weights + i);
+			diff = current_values2 - actual_values2;
+			vfloat error2 = diff * diff * loada(eai2.weight_error_scale + i);
+
+			haccumulate(error_summav, error1 + error2);
 		}
-#endif
 	}
+	else
+	{
+		for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
+		{
+			// Plane 1
+			// Load the weight set directly, without interpolation
+			vfloat current_values1 = loada(dec_weight_quant_uvalue_plane1 + i);
+
+			// Compute the error between the computed value and the ideal weight
+			vfloat actual_values1 = loada(eai1.weights + i);
+			vfloat diff = current_values1 - actual_values1;
+			vfloat error1 = diff * diff * loada(eai1.weight_error_scale + i);
+
+			// Plane 2
+			// Load the weight set directly, without interpolation
+			vfloat current_values2 = loada(dec_weight_quant_uvalue_plane2 + i);
 
-	// Accumulate the loop tail using the vfloat4 swizzle
-	haccumulate(error_summav, vfloat4::loada(errorsum_tmp));
+			// Compute the error between the computed value and the ideal weight
+			vfloat actual_values2 = loada(eai2.weights + i);
+			diff = current_values2 - actual_values2;
+			vfloat error2 = diff * diff * loada(eai2.weight_error_scale + i);
+
+			haccumulate(error_summav, error1 + error2);
+		}
+	}
 
 	// Resolve the final scalar accumulator sum
 	haccumulate(error_summa, error_summav);
@@ -829,271 +729,173 @@ float compute_error_of_weight_set(
 }
 
 /* See header for documentation. */
-// Note: This function is vectorized, but needs to use gathers to access the
-// decimation table structures so vectorization is currently only enabled for
-// AVX2. The implementation loops over decimated weights, and then texels for
-// each weight. We know the backing memory is "large enough" we can can
-// overshoot the weight count to always use full vectors without a loop tail.
-// The inner loop operates on 8 weights, each of which may have a different
-// number of texels referenced by it. We iterate over the max reference count,
-// and then use lane masks to disable lanes that are no longer in scope.
-void compute_ideal_weights_for_decimation_table(
+void compute_ideal_weights_for_decimation(
 	const endpoints_and_weights& eai_in,
 	endpoints_and_weights& eai_out,
-	const decimation_table& dt,
-	float* RESTRICT weight_set,
-	float* RESTRICT weights
+	const decimation_info& di,
+	float* dec_weight_ideal_value
 ) {
-	int texel_count = dt.texel_count;
-	int weight_count = dt.weight_count;
-
+	unsigned int texel_count = di.texel_count;
+	unsigned int weight_count = di.weight_count;
+	bool is_direct = texel_count == weight_count;
 	promise(texel_count > 0);
 	promise(weight_count > 0);
 
-	// This function includes a copy of the epw from eai_in to eai_out. We do it
-	// here because we want to load the data anyway, so we can avoid loading it
-	// from memory twice.
+	// This function includes a copy of the epw from eai_in to eai_out. We do it here because we
+	// want to load the data anyway, so we can avoid loading it from memory twice.
 	eai_out.ep = eai_in.ep;
+	eai_out.is_constant_weight_error_scale = eai_in.is_constant_weight_error_scale;
+
+	// Ensure that the end of the output arrays that are used for SIMD paths later are filled so we
+	// can safely run SIMD elsewhere without a loop tail. Note that this is always safe as weight
+	// arrays always contain space for 64 elements
+	unsigned int prev_weight_count_simd = round_down_to_simd_multiple_vla(weight_count - 1);
+	storea(vfloat::zero(), dec_weight_ideal_value + prev_weight_count_simd);
+
+	// If we have a 1:1 mapping just shortcut the computation - clone the weights into both the
+	// weight set and the output epw copy.
 
-	// If we have a 1:1 mapping just shortcut the computation - clone the
-	// weights into both the weight set and the output epw copy.
-	if (texel_count == weight_count)
+	// Transfer enough to also copy zero initialized SIMD over-fetch region
+	unsigned int texel_count_simd = round_up_to_simd_multiple_vla(texel_count);
+	for (unsigned int i = 0; i < texel_count_simd; i +=  ASTCENC_SIMD_WIDTH)
 	{
-		for (int i = 0; i < texel_count; i++)
-		{
-			assert(i == dt.weight_texel[0][i]);
-			weight_set[i] = eai_in.weights[i];
-			weights[i] = eai_in.weight_error_scale[i];
+		vfloat weight(eai_in.weights + i);
+		vfloat weight_error_scale(eai_in.weight_error_scale + i);
+
+		storea(weight, eai_out.weights + i);
+		storea(weight_error_scale, eai_out.weight_error_scale + i);
 
-			eai_out.weights[i] = eai_in.weights[i];
-			eai_out.weight_error_scale[i] = eai_in.weight_error_scale[i];
+		// Direct 1:1 weight mapping, so clone weights directly
+		// TODO: Can we just avoid the copy for direct cases?
+		if (is_direct)
+		{
+			storea(weight, dec_weight_ideal_value + i);
 		}
-		return;
 	}
-	// If we don't have a 1:1 mapping just clone the weights into the output
-	// epw copy and then do the full algorithm to decimate weights.
-	else
+
+	if (is_direct)
 	{
-		for (int i = 0; i < texel_count; i++)
-		{
-			eai_out.weights[i] = eai_in.weights[i];
-			eai_out.weight_error_scale[i] = eai_in.weight_error_scale[i];
-		}
+		return;
 	}
 
 	// Otherwise compute an estimate and perform single refinement iteration
-	alignas(ASTCENC_VECALIGN) float infilled_weights[MAX_TEXELS_PER_BLOCK];
+	alignas(ASTCENC_VECALIGN) float infilled_weights[BLOCK_MAX_TEXELS];
 
 	// Compute an initial average for each decimated weight
-#if ASTCENC_SIMD_WIDTH >= 8
-	int clipped_weight_count = round_up_to_simd_multiple_vla(weight_count);
-	for (int i = 0; i < clipped_weight_count; i += ASTCENC_SIMD_WIDTH)
+	bool constant_wes = eai_in.is_constant_weight_error_scale;
+	vfloat weight_error_scale(eai_in.weight_error_scale[0]);
+
+	// This overshoots - this is OK as we initialize the array tails in the
+	// decimation table structures to safe values ...
+	for (unsigned int i = 0; i < weight_count; i += ASTCENC_SIMD_WIDTH)
 	{
 		// Start with a small value to avoid div-by-zero later
 		vfloat weight_weight(1e-10f);
 		vfloat initial_weight = vfloat::zero();
 
 		// Accumulate error weighting of all the texels using this weight
-		vint weight_texel_count(dt.weight_texel_count + i);
-		int max_texel_count = hmax(weight_texel_count).lane<0>();
+		vint weight_texel_count(di.weight_texel_count + i);
+		unsigned int max_texel_count = hmax(weight_texel_count).lane<0>();
 		promise(max_texel_count > 0);
 
-		for (int j = 0; j < max_texel_count; j++)
+		for (unsigned int j = 0; j < max_texel_count; j++)
 		{
-			// Not all lanes may actually use j texels, so mask out if idle
-			vmask active = weight_texel_count > vint(j);
-
-			vint texel(dt.weight_texel[j] + i);
-			texel = select(vint::zero(), texel, active);
-
-			vfloat weight = loada(dt.weights_flt[j] + i);
-			weight = select(vfloat::zero(), weight, active);
-
-			vfloat contrib_weight = weight * gatherf(eai_in.weight_error_scale, texel);
-
-			weight_weight = weight_weight + contrib_weight;
-			initial_weight = initial_weight + gatherf(eai_in.weights, texel) * contrib_weight;
-		}
+			vint texel(di.weight_texel[j] + i);
+			vfloat weight = loada(di.weights_flt[j] + i);
 
-		storea(weight_weight, weights + i);
-		storea(initial_weight / weight_weight, weight_set + i);
-	}
-#else
-	for (int i = 0; i < weight_count; i++)
-	{
-		// Start with a small value to avoid div-by-zero later
-		float weight_weight = 1e-10f;
-		float initial_weight = 0.0f;
+			if (!constant_wes)
+			{
+				weight_error_scale = gatherf(eai_in.weight_error_scale, texel);
+			}
 
-		// Accumulate error weighting of all the texels using this weight
-		int weight_texel_count = dt.weight_texel_count[i];
-		promise(weight_texel_count > 0);
+			vfloat contrib_weight = weight * weight_error_scale;
 
-		for (int j = 0; j < weight_texel_count; j++)
-		{
-			int texel = dt.weight_texel[j][i];
-			float weight = dt.weights_flt[j][i];
-			float contrib_weight = weight * eai_in.weight_error_scale[texel];
 			weight_weight += contrib_weight;
-			initial_weight += eai_in.weights[texel] * contrib_weight;
+			initial_weight += gatherf(eai_in.weights, texel) * contrib_weight;
 		}
 
-		weights[i] = weight_weight;
-		weight_set[i] = initial_weight / weight_weight;
+		storea(initial_weight / weight_weight, dec_weight_ideal_value + i);
 	}
-#endif
 
 	// Populate the interpolated weight grid based on the initital average
-#if ASTCENC_SIMD_WIDTH >= 8
-	// Process SIMD-width texel coordinates at at time while we can
-	int clipped_texel_count = round_up_to_simd_multiple_vla(texel_count);
-	for (int i = 0; i < clipped_texel_count; i += ASTCENC_SIMD_WIDTH)
+	// Process SIMD-width texel coordinates at at time while we can. Safe to
+	// over-process full SIMD vectors - the tail is zeroed.
+	for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
 	{
-		vint texel_weights_0(dt.texel_weights_4t[0] + i);
-		vint texel_weights_1(dt.texel_weights_4t[1] + i);
-		vint texel_weights_2(dt.texel_weights_4t[2] + i);
-		vint texel_weights_3(dt.texel_weights_4t[3] + i);
-
-		vfloat weight_set_0 = gatherf(weight_set, texel_weights_0);
-		vfloat weight_set_1 = gatherf(weight_set, texel_weights_1);
-		vfloat weight_set_2 = gatherf(weight_set, texel_weights_2);
-		vfloat weight_set_3 = gatherf(weight_set, texel_weights_3);
-
-		vfloat texel_weights_float_0 = loada(dt.texel_weights_float_4t[0] + i);
-		vfloat texel_weights_float_1 = loada(dt.texel_weights_float_4t[1] + i);
-		vfloat texel_weights_float_2 = loada(dt.texel_weights_float_4t[2] + i);
-		vfloat texel_weights_float_3 = loada(dt.texel_weights_float_4t[3] + i);
-
-		vfloat weight = (weight_set_0 * texel_weights_float_0
-		                + weight_set_1 * texel_weights_float_1)
-		               + (weight_set_2 * texel_weights_float_2
-		                + weight_set_3 * texel_weights_float_3);
-
+		vfloat weight = bilinear_infill_vla(di, dec_weight_ideal_value, i);
 		storea(weight, infilled_weights + i);
 	}
-#else
-	for (int i = 0; i < texel_count; i++)
-	{
-		const uint8_t *texel_weights = dt.texel_weights_t4[i];
-		const float *texel_weights_float = dt.texel_weights_float_t4[i];
-		infilled_weights[i] = (weight_set[texel_weights[0]] * texel_weights_float[0]
-		                      + weight_set[texel_weights[1]] * texel_weights_float[1])
-		                     + (weight_set[texel_weights[2]] * texel_weights_float[2]
-		                      + weight_set[texel_weights[3]] * texel_weights_float[3]);
-	}
-#endif
 
 	// Perform a single iteration of refinement
+	// Empirically determined step size; larger values don't help but smaller drops image quality
 	constexpr float stepsize = 0.25f;
-	constexpr float chd_scale = -TEXEL_WEIGHT_SUM;
+	constexpr float chd_scale = -WEIGHTS_TEXEL_SUM;
 
-#if ASTCENC_SIMD_WIDTH >= 8
-	for (int i = 0; i < clipped_weight_count; i += ASTCENC_SIMD_WIDTH)
+	for (unsigned int i = 0; i < weight_count; i += ASTCENC_SIMD_WIDTH)
 	{
-		// Start with a small value to avoid div-by-zero later
-		vfloat weight_val = loada(weight_set + i);
+		vfloat weight_val = loada(dec_weight_ideal_value + i);
 
 		// Accumulate error weighting of all the texels using this weight
+		// Start with a small value to avoid div-by-zero later
 		vfloat error_change0(1e-10f);
 		vfloat error_change1(0.0f);
 
 		// Accumulate error weighting of all the texels using this weight
-		vint weight_texel_count(dt.weight_texel_count + i);
-		int max_texel_count = hmax(weight_texel_count).lane<0>();
+		vint weight_texel_count(di.weight_texel_count + i);
+		unsigned int max_texel_count = hmax(weight_texel_count).lane<0>();
 		promise(max_texel_count > 0);
 
-		for (int j = 0; j < max_texel_count; j++)
+		for (unsigned int j = 0; j < max_texel_count; j++)
 		{
-			// Not all lanes may actually use j texels, so mask out if idle
-			vmask active = weight_texel_count > vint(j);
-
-			vint texel(dt.weight_texel[j] + i);
-			texel = select(vint::zero(), texel, active);
+			vint texel(di.weight_texel[j] + i);
+			vfloat contrib_weight = loada(di.weights_flt[j] + i);
 
-			vfloat contrib_weight = loada(dt.weights_flt[j] + i);
-			contrib_weight = select(vfloat::zero(), contrib_weight, active);
+			if (!constant_wes)
+			{
+ 				weight_error_scale = gatherf(eai_in.weight_error_scale, texel);
+			}
 
-			vfloat scale = gatherf(eai_in.weight_error_scale, texel) * contrib_weight;
+			vfloat scale = weight_error_scale * contrib_weight;
 			vfloat old_weight = gatherf(infilled_weights, texel);
 			vfloat ideal_weight = gatherf(eai_in.weights, texel);
 
-			error_change0 = error_change0 + contrib_weight * scale;
-			error_change1 = error_change1 + (old_weight - ideal_weight) * scale;
+			error_change0 += contrib_weight * scale;
+			error_change1 += (old_weight - ideal_weight) * scale;
 		}
 
 		vfloat step = (error_change1 * chd_scale) / error_change0;
 		step = clamp(-stepsize, stepsize, step);
 
-		// update the weight
-		storea(weight_val + step, weight_set + i);
+		// Update the weight; note this can store negative values.
+		storea(weight_val + step, dec_weight_ideal_value + i);
 	}
-#else
-	for (int i = 0; i < weight_count; i++)
-	{
-		float weight_val = weight_set[i];
-
-		// Start with a small value to avoid div-by-zero later
-		float error_change0 = 1e-10f;
-		float error_change1 = 0.0f;
-
-		// Compute the two error changes that occur from perturbing the current index
-		int weight_texel_count = dt.weight_texel_count[i];
-		promise(weight_texel_count > 0);
-		for (int k = 0; k < weight_texel_count; k++)
-		{
-			uint8_t texel = dt.weight_texel[k][i];
-			float contrib_weight = dt.weights_flt[k][i];
-
-			float scale = eai_in.weight_error_scale[texel] * contrib_weight;
-			float old_weight = infilled_weights[texel];
-			float ideal_weight = eai_in.weights[texel];
-
-			error_change0 +=  contrib_weight * scale;
-			error_change1 += (old_weight - ideal_weight) * scale;
-		}
-
-		float step = (error_change1 * chd_scale) / error_change0;
-		step = astc::clamp(step, -stepsize, stepsize);
-
-		// update the weight
-		weight_set[i] = weight_val + step;
-	}
-#endif
 }
 
-/*
-	For a decimation table, try to compute an optimal weight set, assuming
-	that the weights are quantized and subject to a transfer function.
-
-	We do this as follows:
-	First, we take the initial weights and quantize them. This is our initial estimate.
-	Then, go through the weights one by one; try to perturb then up and down one weight at a
-	time; apply any perturbations that improve overall error
-	Repeat until we have made a complete processing pass over all weights without
-	triggering any perturbations *OR* we have run 4 full passes.
-*/
-void compute_quantized_weights_for_decimation_table(
-	const decimation_table* dt,
+/* See header for documentation. */
+void compute_quantized_weights_for_decimation(
+	const decimation_info& di,
 	float low_bound,
 	float high_bound,
-	const float* weight_set_in,
+	const float* dec_weight_ideal_value,
 	float* weight_set_out,
 	uint8_t* quantized_weight_set,
-	int quant_level
+	quant_method quant_level
 ) {
-	int weight_count = dt->weight_count;
+	int weight_count = di.weight_count;
+	promise(weight_count > 0);
 	const quantization_and_transfer_table *qat = &(quant_and_xfer_tables[quant_level]);
 
-	static const int quant_levels[12] { 2,3,4,5,6,8,10,12,16,20,24,32 };
-	float quant_level_m1 = (float)(quant_levels[quant_level] - 1);
+	// The available quant levels, stored with a minus 1 bias
+	static const float quant_levels_m1[12] {
+		1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 7.0f, 9.0f, 11.0f, 15.0f, 19.0f, 23.0f, 31.0f
+	};
+
+	float quant_level_m1 = quant_levels_m1[quant_level];
 
-	// Quantize the weight set using both the specified low/high bounds
-	// and the standard 0..1 weight bounds.
+	// Quantize the weight set using both the specified low/high bounds and standard 0..1 bounds
 
-	/*
-	   TODO: WTF issue that we need to examine some time
-	*/
-	if (!((high_bound - low_bound) > 0.5f))
+	// TODO: Oddity to investigate; triggered by test in issue #265.
+	if (high_bound < low_bound)
 	{
 		low_bound = 0.0f;
 		high_bound = 1.0f;
@@ -1105,31 +907,30 @@ void compute_quantized_weights_for_decimation_table(
 	float scaled_low_bound = low_bound * scale;
 	rscale *= 1.0f / 64.0f;
 
-	int i = 0;
-
-#if ASTCENC_SIMD_WIDTH > 1
-	// SIMD loop; process weights in SIMD width batches while we can.
 	vfloat scalev(scale);
 	vfloat scaled_low_boundv(scaled_low_bound);
 	vfloat quant_level_m1v(quant_level_m1);
 	vfloat rscalev(rscale);
 	vfloat low_boundv(low_bound);
 
-	int clipped_weight_count = round_down_to_simd_multiple_vla(weight_count);
-	for (/* */; i < clipped_weight_count; i += ASTCENC_SIMD_WIDTH)
+	// This runs to the rounded-up SIMD size, which is safe as the loop tail is filled with known
+	// safe data in compute_ideal_weights_for_decimation and arrays are always 64 elements
+	for (int i = 0; i < weight_count; i += ASTCENC_SIMD_WIDTH)
 	{
-		vfloat ix = loada(&weight_set_in[i]) * scalev - scaled_low_boundv;
+		vfloat ix = loada(&dec_weight_ideal_value[i]) * scalev - scaled_low_boundv;
 		ix = clampzo(ix);
 
-		//Llook up the two closest indexes and return the one that was closest.
+		// Look up the two closest indexes and return the one that was closest
 		vfloat ix1 = ix * quant_level_m1v;
-		vint weight = float_to_int(ix1);
-		vint weight1 = weight + vint(1);
-		vfloat ixl = gatherf(qat->unquantized_value_unsc, weight);
-		vfloat ixh = gatherf(qat->unquantized_value_unsc, weight1);
+
+		vint weightl = float_to_int(ix1);
+		vint weighth = weightl + vint(1);
+
+		vfloat ixl = gatherf(qat->unquantized_value_unsc, weightl);
+		vfloat ixh = gatherf(qat->unquantized_value_unsc, weighth);
 
 		vmask mask = (ixl + ixh) < (vfloat(128.0f) * ix);
-		weight = select(weight, weight1, mask);
+		vint weight = select(weightl, weighth, mask);
 		ixl = select(ixl, ixh, mask);
 
 		// Invert the weight-scaling that was done initially
@@ -1138,51 +939,31 @@ void compute_quantized_weights_for_decimation_table(
 		vint scn = pack_low_bytes(scm);
 		store_nbytes(scn, &quantized_weight_set[i]);
 	}
-#endif // #if ASTCENC_SIMD_WIDTH > 1
-
-	// Loop tail
-	for (/* */; i < weight_count; i++)
-	{
-		float ix = (weight_set_in[i] * scale) - scaled_low_bound;
-		ix = astc::clamp1f(ix);
-
-		// look up the two closest indexes and return the one that was closest.
-		float ix1 = ix * quant_level_m1;
-		int weight = (int)ix1;
-		float ixl = qat->unquantized_value_unsc[weight];
-		float ixh = qat->unquantized_value_unsc[weight + 1];
-
-		if (ixl + ixh < 128.0f * ix)
-		{
-			weight++;
-			ixl = ixh;
-		}
-
-		// Invert the weight-scaling that was done initially
-		weight_set_out[i] = (ixl * rscale) + low_bound;
-		quantized_weight_set[i] = (uint8_t)qat->scramble_map[weight];
-	}
 }
 
-static inline vfloat4 compute_rgbovec(
+/**
+ * @brief Compute the RGB + offset for a HDR endpoint mode #7.
+ *
+ * Since the matrix needed has a regular structure we can simplify the inverse calculation. This
+ * gives us ~24 multiplications vs. 96 for a generic inverse.
+ *
+ *  mat[0] = vfloat4(rgba_ws.x,      0.0f,      0.0f, wght_ws.x);
+ *  mat[1] = vfloat4(     0.0f, rgba_ws.y,      0.0f, wght_ws.y);
+ *  mat[2] = vfloat4(     0.0f,      0.0f, rgba_ws.z, wght_ws.z);
+ *  mat[3] = vfloat4(wght_ws.x, wght_ws.y, wght_ws.z,      psum);
+ *  mat = invert(mat);
+ *
+ * @param rgba_weight_sum     Sum of partition component error weights.
+ * @param weight_weight_sum   Sum of partition component error weights * texel weight.
+ * @param rgbq_sum            Sum of partition component error weights * texel weight * color data.
+ * @param psum                Sum of RGB color weights * texel weight^2.
+ */
+static inline vfloat4 compute_rgbo_vector(
 	vfloat4 rgba_weight_sum,
 	vfloat4 weight_weight_sum,
-	float red_sum,
-	float green_sum,
-	float blue_sum,
-	float psum,
-	float qsum
+	vfloat4 rgbq_sum,
+	float psum
 ) {
-	// Compute the rgb+offset for HDR endpoint mode #7. Since the matrix needed
-	// has a regular structure, we can simplify the inverse calculation. This
-	// gives us ~24 multiplications, down from 96 for a generic inverse
-
-	// mat[0] = vfloat4(rgba_ws.x,      0.0f,      0.0f, wght_ws.x);
-	// mat[1] = vfloat4(     0.0f, rgba_ws.y,      0.0f, wght_ws.y);
-	// mat[2] = vfloat4(     0.0f,      0.0f, rgba_ws.z, wght_ws.z);
-	// mat[3] = vfloat4(wght_ws.x, wght_ws.y, wght_ws.z,      psum);
-	// mat = invert(mat);
-
 	float X = rgba_weight_sum.lane<0>();
 	float Y = rgba_weight_sum.lane<1>();
 	float Z = rgba_weight_sum.lane<2>();
@@ -1207,20 +988,15 @@ static inline vfloat4 compute_rgbovec(
 	float RYP = R * YP;
 	float RQX = R * QX;
 
-	// Compute the reciprocal of matrix determinant.
+	// Compute the reciprocal of matrix determinant
 	float rdet = 1.0f / (DT * X + mZYP * P);
 
-	// Actually compute the adjugate matrix, not the inverse, and apply the
-	// multiplication by 1/det to the vector separately.
+	// Actually compute the adjugate, and then apply 1/det separately
 	vfloat4 mat0(DT, ZQP, RYP, mZYP);
 	vfloat4 mat1(ZQP, SZmRR * X - Z * PP, RQX, mZQX);
 	vfloat4 mat2(RYP, RQX, (S * Y - QQ) * X - Y * PP, mRYX);
 	vfloat4 mat3(mZYP, mZQX, mRYX, Z * YX);
-	vfloat4 vect = vfloat4(red_sum, green_sum, blue_sum, qsum) * rdet;
-
-	#ifdef DEBUG_CAPTURE_NAN
-	    fedisableexcept(FE_DIVBYZERO | FE_INVALID);
-	#endif
+	vfloat4 vect = rgbq_sum * rdet;
 
 	return vfloat4(dot_s(mat0, vect),
 	               dot_s(mat1, vect),
@@ -1228,236 +1004,147 @@ static inline vfloat4 compute_rgbovec(
 	               dot_s(mat3, vect));
 }
 
-/* for a given weight set, we wish to recompute the colors so that they are optimal for a particular weight set. */
-void recompute_ideal_colors_2planes(
+/* See header for documentation. */
+// TODO: Specialize for 1 partition?
+void recompute_ideal_colors_1plane(
+	const image_block& blk,
+	const partition_info& pi,
+	const decimation_info& di,
 	int weight_quant_mode,
-	endpoints* ep,	// contains the endpoints we wish to update
-	vfloat4* rgbs_vectors,	// used to return RGBS-vectors for endpoint mode #6
-	vfloat4* rgbo_vectors,	// used to return RGBO-vectors for endpoint mode #7
-	const uint8_t* weight_set8,	// the current set of weight values
-	const uint8_t* plane2_weight_set8,	// nullptr if plane 2 is not actually used.
-	int plane2_color_component,	// color component for 2nd plane of weights; -1 if the 2nd plane of weights is not present
-	const partition_info* pt,
-	const decimation_table* dt,
-	const imageblock* blk,	// picture-block containing the actual data.
-	const error_weight_block* ewb
+	const uint8_t* dec_weights_quant_pvalue,
+	endpoints& ep,
+	vfloat4 rgbs_vectors[BLOCK_MAX_PARTITIONS],
+	vfloat4 rgbo_vectors[BLOCK_MAX_PARTITIONS]
 ) {
-	const quantization_and_transfer_table *qat = &(quant_and_xfer_tables[weight_quant_mode]);
+	int weight_count = di.weight_count;
+	int partition_count = pi.partition_count;
+	bool is_decimated = di.weight_count != di.texel_count;
 
-	float weight_set[MAX_WEIGHTS_PER_BLOCK];
-	float plane2_weight_set[MAX_WEIGHTS_PER_BLOCK];
+	promise(weight_count > 0);
+	promise(partition_count > 0);
 
-	for (int i = 0; i < dt->weight_count; i++)
-	{
-		weight_set[i] = qat->unquantized_value[weight_set8[i]] * (1.0f / 64.0f);
-	}
+	const quantization_and_transfer_table& qat = quant_and_xfer_tables[weight_quant_mode];
 
-	if (plane2_weight_set8)
+	float dec_weight_quant_uvalue[BLOCK_MAX_WEIGHTS];
+	for (int i = 0; i < weight_count; i++)
 	{
-		for (int i = 0; i < dt->weight_count; i++)
-		{
-			plane2_weight_set[i] = qat->unquantized_value[plane2_weight_set8[i]] * (1.0f / 64.0f);
-		}
+		dec_weight_quant_uvalue[i] = qat.unquantized_value[dec_weights_quant_pvalue[i]] * (1.0f / 64.0f);
 	}
 
-	int partition_count = pt->partition_count;
-
 	for (int i = 0; i < partition_count; i++)
 	{
 		vfloat4 rgba_sum(1e-17f);
-		vfloat4 rgba_weight_sum(1e-17f);
 
-		int texelcount = pt->partition_texel_count[i];
-		const uint8_t *texel_indexes = pt->texels_of_partition[i];
-		for (int j = 0; j < texelcount; j++)
-		{
-			int tix = texel_indexes[j];
-
-			vfloat4 rgba = blk->texel(tix);
-			vfloat4 error_weight(ewb->texel_weight_r[tix], ewb->texel_weight_g[tix], ewb->texel_weight_b[tix], ewb->texel_weight_a[tix]);
+		unsigned int texel_count = pi.partition_texel_count[i];
+		const uint8_t *texel_indexes = pi.texels_of_partition[i];
 
-			rgba_sum = rgba_sum + (rgba * error_weight);
-			rgba_weight_sum = rgba_weight_sum + error_weight;
+		// TODO: Use gathers?
+		promise(texel_count > 0);
+		for (unsigned int j = 0; j < texel_count; j++)
+		{
+			unsigned int tix = texel_indexes[j];
+			rgba_sum += blk.texel(tix);
 		}
 
-		vfloat4 scale_direction = normalize((rgba_sum * (1.0f / rgba_weight_sum)).swz<0, 1, 2>());
+		rgba_sum = rgba_sum * blk.channel_weight;
+		vfloat4 rgba_weight_sum = max(blk.channel_weight * static_cast<float>(texel_count), 1e-17f);
+		vfloat4 scale_dir = normalize((rgba_sum / rgba_weight_sum).swz<0, 1, 2>());
 
 		float scale_max = 0.0f;
 		float scale_min = 1e10f;
 
 		float wmin1 = 1.0f;
 		float wmax1 = 0.0f;
-		float wmin2 = 1.0f;
-		float wmax2 = 0.0f;
 
-		vfloat4 left_sum    = vfloat4::zero();
-		vfloat4 middle_sum  = vfloat4::zero();
-		vfloat4 right_sum   = vfloat4::zero();
-
-		vfloat4 left2_sum   = vfloat4::zero();
-		vfloat4 middle2_sum = vfloat4::zero();
-		vfloat4 right2_sum  = vfloat4::zero();
-
-		vfloat4 lmrs_sum = vfloat4(0.0f);
+		float left_sum_s = 0.0f;
+		float middle_sum_s = 0.0f;
+		float right_sum_s = 0.0f;
 
 		vfloat4 color_vec_x = vfloat4::zero();
 		vfloat4 color_vec_y = vfloat4::zero();
 
-		float2 scale_vec = float2(0.0f);
-
-		vfloat4 weight_weight_sum = vfloat4(1e-17f);
-		float psum = 1e-17f;
+		vfloat4 scale_vec = vfloat4::zero();
 
-		// FIXME: the loop below has too many responsibilities, making it inefficient.
-		for (int j = 0; j < texelcount; j++)
-		{
-			int tix = texel_indexes[j];
+		float weight_weight_sum_s = 1e-17f;
 
-			vfloat4 rgba = blk->texel(tix);
-			vfloat4 color_weight(ewb->texel_weight_r[tix], ewb->texel_weight_g[tix], ewb->texel_weight_b[tix], ewb->texel_weight_a[tix]);
+		vfloat4 color_weight = blk.channel_weight;
+		float ls_weight = hadd_rgb_s(color_weight);
 
-			vfloat4 color_weight3 = color_weight.swz<0, 1, 2>();
-			vfloat4 rgb = rgba.swz<0, 1, 2>();
+		for (unsigned int j = 0; j < texel_count; j++)
+		{
+			unsigned int tix = texel_indexes[j];
 
-			// FIXME: move this calculation out to the color block.
-			float ls_weight = hadd_rgb_s(color_weight);
+			vfloat4 rgba = blk.texel(tix);
 
-			const uint8_t *texel_weights = dt->texel_weights_t4[tix];
-			const float *texel_weights_float = dt->texel_weights_float_t4[tix];
-			float idx0 = (weight_set[texel_weights[0]] * texel_weights_float[0]
-			            + weight_set[texel_weights[1]] * texel_weights_float[1])
-			           + (weight_set[texel_weights[2]] * texel_weights_float[2]
-			            + weight_set[texel_weights[3]] * texel_weights_float[3]);
+			float idx0;
+			if (!is_decimated)
+			{
+				assert(tix < BLOCK_MAX_WEIGHTS);
+ 				idx0 = dec_weight_quant_uvalue[tix];
+			}
+			else
+			{
+				idx0 = bilinear_infill(di, dec_weight_quant_uvalue, tix);
+			}
 
 			float om_idx0 = 1.0f - idx0;
 			wmin1 = astc::min(idx0, wmin1);
 			wmax1 = astc::max(idx0, wmax1);
 
-			float scale = dot3_s(scale_direction, rgb);
+			float scale = dot3_s(scale_dir, rgba);
 			scale_min = astc::min(scale, scale_min);
 			scale_max = astc::max(scale, scale_max);
 
-			vfloat4 left   = color_weight * (om_idx0 * om_idx0);
-			vfloat4 middle = color_weight * (om_idx0 * idx0);
-			vfloat4 right  = color_weight * (idx0 * idx0);
-
-			vfloat4 lmrs = vfloat4(om_idx0 * om_idx0,
-			                       om_idx0 * idx0,
-			                       idx0 * idx0,
-			                       0.0f) * ls_weight;
-
-			left_sum   = left_sum + left;
-			middle_sum = middle_sum + middle;
-			right_sum  = right_sum + right;
-
-			lmrs_sum = lmrs_sum + lmrs;
-
-			float idx1 = 0.0f;
-			float om_idx1 = 0.0f;
-
-			if (plane2_weight_set8)
-			{
-				idx1 = (plane2_weight_set[texel_weights[0]] * texel_weights_float[0]
-				      + plane2_weight_set[texel_weights[1]] * texel_weights_float[1])
-				     + (plane2_weight_set[texel_weights[2]] * texel_weights_float[2]
-				      + plane2_weight_set[texel_weights[3]] * texel_weights_float[3]);
-
-				om_idx1 = 1.0f - idx1;
-				wmin2 = astc::min(idx1, wmin2);
-				wmax2 = astc::max(idx1, wmax2);
-
-				vfloat4 left2   = color_weight * (om_idx1 * om_idx1);
-				vfloat4 middle2 = color_weight * (om_idx1 * idx1);
-				vfloat4 right2  = color_weight * (idx1 * idx1);
-
-				left2_sum   = left2_sum   + left2;
-				middle2_sum = middle2_sum + middle2;
-				right2_sum  = right2_sum  + right2;
-			}
-
-			vfloat4 color_idx((plane2_color_component == 0) ? idx1 : idx0,
-			                  (plane2_color_component == 1) ? idx1 : idx0,
-			                  (plane2_color_component == 2) ? idx1 : idx0,
-			                  (plane2_color_component == 3) ? idx1 : idx0);
+			left_sum_s   += om_idx0 * om_idx0;
+			middle_sum_s += om_idx0 * idx0;
+			right_sum_s  += idx0 * idx0;
+			weight_weight_sum_s += idx0;
 
-			vfloat4 color_idx3 = color_idx.swz<0, 1, 2>();
-
-			vfloat4 cwprod = color_weight * rgba;
+			vfloat4 color_idx(idx0);
+			vfloat4 cwprod = rgba;
 			vfloat4 cwiprod = cwprod * color_idx;
 
-			color_vec_y = color_vec_y + cwiprod;
-			color_vec_x = color_vec_x + (cwprod - cwiprod);
-
-			scale_vec = scale_vec + float2(om_idx0, idx0) * (ls_weight * scale);
+			color_vec_y += cwiprod;
+			color_vec_x += cwprod - cwiprod;
 
-			weight_weight_sum = weight_weight_sum + (color_weight3 * color_idx3);
-
-			psum += dot3_s(color_weight3 * color_idx3, color_idx3);
+			scale_vec += vfloat2(om_idx0, idx0) * (scale * ls_weight);
 		}
 
-		// calculations specific to mode #7, the HDR RGB-scale mode.
-		// FIXME: Can we skip this for LDR textures?
-		float red_sum   = color_vec_x.lane<0>() + color_vec_y.lane<0>();
-		float green_sum = color_vec_x.lane<1>() + color_vec_y.lane<1>();
-		float blue_sum  = color_vec_x.lane<2>() + color_vec_y.lane<2>();
-		float qsum = hadd_rgb_s(color_vec_y);
-
-		#ifdef DEBUG_CAPTURE_NAN
-		    fedisableexcept(FE_DIVBYZERO | FE_INVALID);
-		#endif
-
-		vfloat4 rgbovec = compute_rgbovec(rgba_weight_sum, weight_weight_sum,
-		                                  red_sum, green_sum, blue_sum, psum, qsum);
-		rgbo_vectors[i] = rgbovec;
+		vfloat4 left_sum   = vfloat4(left_sum_s) * color_weight;
+		vfloat4 middle_sum = vfloat4(middle_sum_s) * color_weight;
+		vfloat4 right_sum  = vfloat4(right_sum_s) * color_weight;
+		vfloat4 lmrs_sum   = vfloat3(left_sum_s, middle_sum_s, right_sum_s) * ls_weight;
 
-		// We will occasionally get a failure due to the use of a singular
-		// (non-invertible) matrix. Record whether such a failure has taken
-		// place; if it did, compute rgbo_vectors[] with a different method
-		// later on.
-		float chkval = dot_s(rgbovec, rgbovec);
-		int rgbo_fail = chkval != chkval;
+		vfloat4 weight_weight_sum = vfloat4(weight_weight_sum_s) * color_weight;
+		float psum = right_sum_s * hadd_rgb_s(color_weight);
 
-		// Initialize the luminance and scale vectors with a reasonable
-		//  default, just in case the subsequent calculation blows up.
-		#ifdef DEBUG_CAPTURE_NAN
-			fedisableexcept(FE_DIVBYZERO | FE_INVALID);
-		#endif
+		color_vec_x = color_vec_x * color_weight;
+		color_vec_y = color_vec_y * color_weight;
 
+		// Initialize the luminance and scale vectors with a reasonable default
 		float scalediv = scale_min * (1.0f / astc::max(scale_max, 1e-10f));
 		scalediv = astc::clamp1f(scalediv);
 
-		#ifdef DEBUG_CAPTURE_NAN
-			feenableexcept(FE_DIVBYZERO | FE_INVALID);
-		#endif
-
-		vfloat4 sds = scale_direction * scale_max;
+		vfloat4 sds = scale_dir * scale_max;
 
 		rgbs_vectors[i] = vfloat4(sds.lane<0>(), sds.lane<1>(), sds.lane<2>(), scalediv);
 
 		if (wmin1 >= wmax1 * 0.999f)
 		{
-			// if all weights in the partition were equal, then just take average
-			// of all colors in the partition and use that as both endpoint colors.
-			vfloat4 avg = (color_vec_x + color_vec_y) * (1.0f / rgba_weight_sum);
+			// If all weights in the partition were equal, then just take average of all colors in
+			// the partition and use that as both endpoint colors
+			vfloat4 avg = (color_vec_x + color_vec_y) / rgba_weight_sum;
 
-			vmask4 p1_mask = vint4::lane_id() != vint4(plane2_color_component);
 			vmask4 notnan_mask = avg == avg;
-			vmask4 full_mask = p1_mask & notnan_mask;
-
-			ep->endpt0[i] = select(ep->endpt0[i], avg, full_mask);
-			ep->endpt1[i] = select(ep->endpt1[i], avg, full_mask);
+			ep.endpt0[i] = select(ep.endpt0[i], avg, notnan_mask);
+			ep.endpt1[i] = select(ep.endpt1[i], avg, notnan_mask);
 
 			rgbs_vectors[i] = vfloat4(sds.lane<0>(), sds.lane<1>(), sds.lane<2>(), 1.0f);
 		}
 		else
 		{
-			// otherwise, complete the analytic calculation of ideal-endpoint-values
-			// for the given set of texel weights and pixel colors.
-
-			#ifdef DEBUG_CAPTURE_NAN
-			    fedisableexcept(FE_DIVBYZERO | FE_INVALID);
-			#endif
-
+			// Otherwise, complete the analytic calculation of ideal-endpoint-values for the given
+			// set of texel weights and pixel colors
 			vfloat4 color_det1 = (left_sum * right_sum) - (middle_sum * middle_sum);
 			vfloat4 color_rdet1 = 1.0f / color_det1;
 
@@ -1475,334 +1162,299 @@ void recompute_ideal_colors_2planes(
 			vfloat4 ep0 = (right_sum * color_vec_x - middle_sum * color_vec_y) * color_rdet1;
 			vfloat4 ep1 = (left_sum * color_vec_y - middle_sum * color_vec_x) * color_rdet1;
 
-			float scale_ep0 = (lmrs_sum.lane<2>() * scale_vec.r - lmrs_sum.lane<1>() * scale_vec.g) * ls_rdet1;
-			float scale_ep1 = (lmrs_sum.lane<0>() * scale_vec.g - lmrs_sum.lane<1>() * scale_vec.r) * ls_rdet1;
-
-			vmask4 p1_mask = vint4::lane_id() != vint4(plane2_color_component);
 			vmask4 det_mask = abs(color_det1) > (color_mss1 * 1e-4f);
 			vmask4 notnan_mask = (ep0 == ep0) & (ep1 == ep1);
-			vmask4 full_mask = p1_mask & det_mask & notnan_mask;
+			vmask4 full_mask = det_mask & notnan_mask;
+
+			ep.endpt0[i] = select(ep.endpt0[i], ep0, full_mask);
+			ep.endpt1[i] = select(ep.endpt1[i], ep1, full_mask);
 
-			ep->endpt0[i] = select(ep->endpt0[i], ep0, full_mask);
-			ep->endpt1[i] = select(ep->endpt1[i], ep1, full_mask);
+			float scale_ep0 = (lmrs_sum.lane<2>() * scale_vec.lane<0>() - lmrs_sum.lane<1>() * scale_vec.lane<1>()) * ls_rdet1;
+			float scale_ep1 = (lmrs_sum.lane<0>() * scale_vec.lane<1>() - lmrs_sum.lane<1>() * scale_vec.lane<0>()) * ls_rdet1;
 
 			if (fabsf(ls_det1) > (ls_mss1 * 1e-4f) && scale_ep0 == scale_ep0 && scale_ep1 == scale_ep1 && scale_ep0 < scale_ep1)
 			{
 				float scalediv2 = scale_ep0 * (1.0f / scale_ep1);
-				vfloat4 sdsm = scale_direction * scale_ep1;
+				vfloat4 sdsm = scale_dir * scale_ep1;
 				rgbs_vectors[i] = vfloat4(sdsm.lane<0>(), sdsm.lane<1>(), sdsm.lane<2>(), scalediv2);
 			}
-
-			#ifdef DEBUG_CAPTURE_NAN
-				feenableexcept(FE_DIVBYZERO | FE_INVALID);
-			#endif
 		}
 
-		if (plane2_weight_set8)
-		{
-			if (wmin2 >= wmax2 * 0.999f)
-			{
-				// if all weights in the partition were equal, then just take average
-				// of all colors in the partition and use that as both endpoint colors.
-				vfloat4 avg = (color_vec_x + color_vec_y) * (1.0f / rgba_weight_sum);
-
-				vmask4 p2_mask = vint4::lane_id() == vint4(plane2_color_component);
-				vmask4 notnan_mask = avg == avg;
-				vmask4 full_mask = p2_mask & notnan_mask;
-
-				ep->endpt0[i] = select(ep->endpt0[i], avg, full_mask);
-				ep->endpt1[i] = select(ep->endpt1[i], avg, full_mask);
-			}
-			else
-			{
-				#ifdef DEBUG_CAPTURE_NAN
-					fedisableexcept(FE_DIVBYZERO | FE_INVALID);
-				#endif
-
-				// otherwise, complete the analytic calculation of ideal-endpoint-values
-				// for the given set of texel weights and pixel colors.
-				vfloat4 color_det2 = (left2_sum * right2_sum) - (middle2_sum * middle2_sum);
-				vfloat4 color_rdet2 = 1.0f / color_det2;
-
-				vfloat4 color_mss2 = (left2_sum * left2_sum)
-				                   + (2.0f * middle2_sum * middle2_sum)
-				                   + (right2_sum * right2_sum);
+		// Calculations specific to mode #7, the HDR RGB-scale mode
+		vfloat4 rgbq_sum = color_vec_x + color_vec_y;
+		rgbq_sum.set_lane<3>(hadd_rgb_s(color_vec_y));
 
-				vfloat4 ep0 = (right2_sum * color_vec_x - middle2_sum * color_vec_y) * color_rdet2;
-				vfloat4 ep1 = (left2_sum * color_vec_y - middle2_sum * color_vec_x) * color_rdet2;
-
-				vmask4 p2_mask = vint4::lane_id() == vint4(plane2_color_component);
-				vmask4 det_mask = abs(color_det2) > (color_mss2 * 1e-4f);
-				vmask4 notnan_mask = (ep0 == ep0) & (ep1 == ep1);
-				vmask4 full_mask = p2_mask & det_mask & notnan_mask;
-
-				ep->endpt0[i] = select(ep->endpt0[i], ep0, full_mask);
-				ep->endpt1[i] = select(ep->endpt1[i], ep1, full_mask);
-
-				#ifdef DEBUG_CAPTURE_NAN
-					feenableexcept(FE_DIVBYZERO | FE_INVALID);
-				#endif
-			}
-		}
+		vfloat4 rgbovec = compute_rgbo_vector(rgba_weight_sum, weight_weight_sum, rgbq_sum, psum);
+		rgbo_vectors[i] = rgbovec;
 
-		// if the calculation of an RGB-offset vector failed, try to compute
-		// a somewhat-sensible value anyway
-		if (rgbo_fail)
+		// We can get a failure due to the use of a singular (non-invertible) matrix
+		// If it failed, compute rgbo_vectors[] with a different method ...
+		if (astc::isnan(dot_s(rgbovec, rgbovec)))
 		{
-			vfloat4 v0 = ep->endpt0[i];
-			vfloat4 v1 = ep->endpt1[i];
+			vfloat4 v0 = ep.endpt0[i];
+			vfloat4 v1 = ep.endpt1[i];
+
 			float avgdif = hadd_rgb_s(v1 - v0) * (1.0f / 3.0f);
 			avgdif = astc::max(avgdif, 0.0f);
 
 			vfloat4 avg = (v0 + v1) * 0.5f;
 			vfloat4 ep0 = avg - vfloat4(avgdif) * 0.5f;
-
 			rgbo_vectors[i] = vfloat4(ep0.lane<0>(), ep0.lane<1>(), ep0.lane<2>(), avgdif);
 		}
 	}
 }
 
-/* for a given weight set, we wish to recompute the colors so that they are optimal for a particular weight set. */
-void recompute_ideal_colors_1plane(
+/* See header for documentation. */
+void recompute_ideal_colors_2planes(
+	const image_block& blk,
+	const block_size_descriptor& bsd,
+	const decimation_info& di,
 	int weight_quant_mode,
-	endpoints* ep,	// contains the endpoints we wish to update
-	vfloat4* rgbs_vectors,	// used to return RGBS-vectors for endpoint mode #6
-	vfloat4* rgbo_vectors,	// used to return RGBO-vectors for endpoint mode #7
-	const uint8_t* weight_set8,	// the current set of weight values
-	const partition_info* pt,
-	const decimation_table* dt,
-	const imageblock* blk,	// picture-block containing the actual data.
-	const error_weight_block* ewb
+	const uint8_t* dec_weights_quant_pvalue_plane1,
+	const uint8_t* dec_weights_quant_pvalue_plane2,
+	endpoints& ep,
+	vfloat4& rgbs_vector,
+	vfloat4& rgbo_vector,
+	int plane2_component
 ) {
-	int weight_count = dt->weight_count;
-	int partition_count = pt->partition_count;
+	unsigned int weight_count = di.weight_count;
+	bool is_decimated = di.weight_count != di.texel_count;
 
 	promise(weight_count > 0);
-	promise(partition_count > 0);
 
 	const quantization_and_transfer_table *qat = &(quant_and_xfer_tables[weight_quant_mode]);
 
-	float weight_set[MAX_WEIGHTS_PER_BLOCK];
-	for (int i = 0; i < weight_count; i++)
+	float dec_weights_quant_uvalue_plane1[BLOCK_MAX_WEIGHTS_2PLANE];
+	float dec_weights_quant_uvalue_plane2[BLOCK_MAX_WEIGHTS_2PLANE];
+
+	assert(weight_count <= BLOCK_MAX_WEIGHTS_2PLANE);
+	for (unsigned int i = 0; i < weight_count; i++)
 	{
-		weight_set[i] = qat->unquantized_value[weight_set8[i]] * (1.0f / 64.0f);
+		dec_weights_quant_uvalue_plane1[i] = qat->unquantized_value[dec_weights_quant_pvalue_plane1[i]] * (1.0f / 64.0f);
+		dec_weights_quant_uvalue_plane2[i] = qat->unquantized_value[dec_weights_quant_pvalue_plane2[i]] * (1.0f / 64.0f);
 	}
 
-	for (int i = 0; i < partition_count; i++)
-	{
-		vfloat4 rgba_sum(1e-17f);
-		vfloat4 rgba_weight_sum(1e-17f);
+	unsigned int texel_count = bsd.texel_count;
+	vfloat4 rgba_weight_sum = max(blk.channel_weight * static_cast<float>(texel_count), 1e-17f);
+	vfloat4 scale_dir = normalize(blk.data_mean.swz<0, 1, 2>());
 
-		int texelcount = pt->partition_texel_count[i];
-		const uint8_t *texel_indexes = pt->texels_of_partition[i];
+	float scale_max = 0.0f;
+	float scale_min = 1e10f;
 
-		promise(texelcount > 0);
-		for (int j = 0; j < texelcount; j++)
-		{
-			int tix = texel_indexes[j];
+	float wmin1 = 1.0f;
+	float wmax1 = 0.0f;
 
-			vfloat4 rgba = blk->texel(tix);
-			vfloat4 error_weight(ewb->texel_weight_r[tix], ewb->texel_weight_g[tix], ewb->texel_weight_b[tix], ewb->texel_weight_a[tix]);
+	float wmin2 = 1.0f;
+	float wmax2 = 0.0f;
 
-			rgba_sum = rgba_sum + (rgba * error_weight);
-			rgba_weight_sum = rgba_weight_sum + error_weight;
-		}
+	float left1_sum_s = 0.0f;
+	float middle1_sum_s = 0.0f;
+	float right1_sum_s = 0.0f;
 
-		vfloat4 scale_direction = normalize((rgba_sum * (1.0f / rgba_weight_sum)).swz<0, 1, 2>());
+	float left2_sum_s = 0.0f;
+	float middle2_sum_s = 0.0f;
+	float right2_sum_s = 0.0f;
 
-		float scale_max = 0.0f;
-		float scale_min = 1e10f;
+	vfloat4 color_vec_x = vfloat4::zero();
+	vfloat4 color_vec_y = vfloat4::zero();
 
-		float wmin1 = 1.0f;
-		float wmax1 = 0.0f;
+	vfloat4 scale_vec = vfloat4::zero();
 
-		vfloat4 left_sum    = vfloat4::zero();
-		vfloat4 middle_sum  = vfloat4::zero();
-		vfloat4 right_sum   = vfloat4::zero();
+	vfloat4 weight_weight_sum = vfloat4(1e-17f);
 
-		vfloat4 lmrs_sum = vfloat4(0.0f);
+	vmask4 p2_mask = vint4::lane_id() == vint4(plane2_component);
+	vfloat4 color_weight = blk.channel_weight;
+	float ls_weight = hadd_rgb_s(color_weight);
 
-		vfloat4 color_vec_x = vfloat4::zero();
-		vfloat4 color_vec_y = vfloat4::zero();
-
-		float2 scale_vec = float2(0.0f);
-
-		vfloat4 weight_weight_sum = vfloat4(1e-17f);
-		float psum = 1e-17f;
+	for (unsigned int j = 0; j < texel_count; j++)
+	{
+		vfloat4 rgba = blk.texel(j);
 
-		// FIXME: the loop below has too many responsibilities, making it inefficient.
-		for (int j = 0; j < texelcount; j++)
+		float idx0;
+		if (!is_decimated)
+		{
+			assert(j < BLOCK_MAX_WEIGHTS_2PLANE);
+		 	idx0 = dec_weights_quant_uvalue_plane1[j];
+		}
+		else
 		{
-			int tix = texel_indexes[j];
+			idx0 = bilinear_infill(di, dec_weights_quant_uvalue_plane1, j);
+		}
 
-			vfloat4 rgba = blk->texel(tix);
-			vfloat4 color_weight(ewb->texel_weight_r[tix], ewb->texel_weight_g[tix], ewb->texel_weight_b[tix], ewb->texel_weight_a[tix]);
+		float om_idx0 = 1.0f - idx0;
+		wmin1 = astc::min(idx0, wmin1);
+		wmax1 = astc::max(idx0, wmax1);
 
-			vfloat4 color_weight3 = color_weight.swz<0, 1, 2>();
-			vfloat4 rgb = rgba.swz<0, 1, 2>();
+		float scale = dot3_s(scale_dir, rgba);
+		scale_min = astc::min(scale, scale_min);
+		scale_max = astc::max(scale, scale_max);
 
-			// FIXME: move this calculation out to the color block.
-			float ls_weight = hadd_rgb_s(color_weight);
+		left1_sum_s   += om_idx0 * om_idx0;
+		middle1_sum_s += om_idx0 * idx0;
+		right1_sum_s  += idx0 * idx0;
 
-			const uint8_t *texel_weights = dt->texel_weights_t4[tix];
-			const float *texel_weights_float = dt->texel_weights_float_t4[tix];
-			float idx0 = (weight_set[texel_weights[0]] * texel_weights_float[0]
-			            + weight_set[texel_weights[1]] * texel_weights_float[1])
-			           + (weight_set[texel_weights[2]] * texel_weights_float[2]
-			            + weight_set[texel_weights[3]] * texel_weights_float[3]);
+		float idx1;
+		if (!is_decimated)
+		{
+			assert(j < BLOCK_MAX_WEIGHTS_2PLANE);
+			idx1 = dec_weights_quant_uvalue_plane2[j];
+		}
+		else
+		{
+			idx1 = bilinear_infill(di, dec_weights_quant_uvalue_plane2, j);
+		}
 
-			float om_idx0 = 1.0f - idx0;
-			wmin1 = astc::min(idx0, wmin1);
-			wmax1 = astc::max(idx0, wmax1);
+		float om_idx1 = 1.0f - idx1;
+		wmin2 = astc::min(idx1, wmin2);
+		wmax2 = astc::max(idx1, wmax2);
 
-			float scale = dot3_s(scale_direction, rgb);
-			scale_min = astc::min(scale, scale_min);
-			scale_max = astc::max(scale, scale_max);
+		left2_sum_s   += om_idx1 * om_idx1;
+		middle2_sum_s += om_idx1 * idx1;
+		right2_sum_s  += idx1 * idx1;
 
-			vfloat4 left   = color_weight * (om_idx0 * om_idx0);
-			vfloat4 middle = color_weight * (om_idx0 * idx0);
-			vfloat4 right  = color_weight * (idx0 * idx0);
+		vfloat4 color_idx = select(vfloat4(idx0), vfloat4(idx1), p2_mask);
 
-			vfloat4 lmrs = vfloat4(om_idx0 * om_idx0,
-			                       om_idx0 * idx0,
-			                       idx0 * idx0,
-			                       0.0f) * ls_weight;
+		vfloat4 cwprod = rgba;
+		vfloat4 cwiprod = cwprod * color_idx;
 
-			left_sum   = left_sum + left;
-			middle_sum = middle_sum + middle;
-			right_sum  = right_sum + right;
+		color_vec_y += cwiprod;
+		color_vec_x += cwprod - cwiprod;
 
-			lmrs_sum = lmrs_sum + lmrs;
+		scale_vec += vfloat2(om_idx0, idx0) * (ls_weight * scale);
+		weight_weight_sum += (color_weight * color_idx);
+	}
 
-			vfloat4 color_idx(idx0);
-			vfloat4 color_idx3(idx0);
+	vfloat4 left1_sum   = vfloat4(left1_sum_s) * color_weight;
+	vfloat4 middle1_sum = vfloat4(middle1_sum_s) * color_weight;
+	vfloat4 right1_sum  = vfloat4(right1_sum_s) * color_weight;
+	vfloat4 lmrs_sum    = vfloat3(left1_sum_s, middle1_sum_s, right1_sum_s) * ls_weight;
 
-			vfloat4 cwprod = color_weight * rgba;
-			vfloat4 cwiprod = cwprod * color_idx;
+	vfloat4 left2_sum   = vfloat4(left2_sum_s) * color_weight;
+	vfloat4 middle2_sum = vfloat4(middle2_sum_s) * color_weight;
+	vfloat4 right2_sum  = vfloat4(right2_sum_s) * color_weight;
 
-			color_vec_y = color_vec_y + cwiprod;
-			color_vec_x = color_vec_x + (cwprod - cwiprod);
+	float psum = dot3_s(select(right1_sum, right2_sum, p2_mask), color_weight);
 
-			scale_vec = scale_vec + float2(om_idx0, idx0) * (ls_weight * scale);
+	color_vec_x = color_vec_x * color_weight;
+	color_vec_y = color_vec_y * color_weight;
 
-			weight_weight_sum = weight_weight_sum + (color_weight3 * color_idx3);
+	// Initialize the luminance and scale vectors with a reasonable default
+	float scalediv = scale_min * (1.0f / astc::max(scale_max, 1e-10f));
+	scalediv = astc::clamp1f(scalediv);
 
-			psum += dot3_s(color_weight3 * color_idx3, color_idx3);
-		}
+	vfloat4 sds = scale_dir * scale_max;
 
-		// calculations specific to mode #7, the HDR RGB-scale mode.
-		// FIXME: Can we skip this for LDR textures?
-		float red_sum   = color_vec_x.lane<0>() + color_vec_y.lane<0>();
-		float green_sum = color_vec_x.lane<1>() + color_vec_y.lane<1>();
-		float blue_sum  = color_vec_x.lane<2>() + color_vec_y.lane<2>();
-		float qsum = hadd_rgb_s(color_vec_y);
+	rgbs_vector = vfloat4(sds.lane<0>(), sds.lane<1>(), sds.lane<2>(), scalediv);
 
-		#ifdef DEBUG_CAPTURE_NAN
-		    fedisableexcept(FE_DIVBYZERO | FE_INVALID);
-		#endif
+	if (wmin1 >= wmax1 * 0.999f)
+	{
+		// If all weights in the partition were equal, then just take average of all colors in
+		// the partition and use that as both endpoint colors
+		vfloat4 avg = (color_vec_x + color_vec_y) / rgba_weight_sum;
 
-		vfloat4 rgbovec = compute_rgbovec(rgba_weight_sum, weight_weight_sum,
-		                                  red_sum, green_sum, blue_sum, psum, qsum);
-		rgbo_vectors[i] = rgbovec;
+		vmask4 p1_mask = vint4::lane_id() != vint4(plane2_component);
+		vmask4 notnan_mask = avg == avg;
+		vmask4 full_mask = p1_mask & notnan_mask;
 
-		// We will occasionally get a failure due to the use of a singular
-		// (non-invertible) matrix. Record whether such a failure has taken
-		// place; if it did, compute rgbo_vectors[] with a different method
-		// later on.
-		float chkval = dot_s(rgbovec, rgbovec);
-		int rgbo_fail = chkval != chkval;
+		ep.endpt0[0] = select(ep.endpt0[0], avg, full_mask);
+		ep.endpt1[0] = select(ep.endpt1[0], avg, full_mask);
 
-		// Initialize the luminance and scale vectors with a reasonable
-		//  default, just in case the subsequent calculation blows up.
-		#ifdef DEBUG_CAPTURE_NAN
-			fedisableexcept(FE_DIVBYZERO | FE_INVALID);
-		#endif
+		rgbs_vector = vfloat4(sds.lane<0>(), sds.lane<1>(), sds.lane<2>(), 1.0f);
+	}
+	else
+	{
+		// Otherwise, complete the analytic calculation of ideal-endpoint-values for the given
+		// set of texel weights and pixel colors
+		vfloat4 color_det1 = (left1_sum * right1_sum) - (middle1_sum * middle1_sum);
+		vfloat4 color_rdet1 = 1.0f / color_det1;
 
-		float scalediv = scale_min * (1.0f / astc::max(scale_max, 1e-10f));
-		scalediv = astc::clamp1f(scalediv);
+		float ls_det1  = (lmrs_sum.lane<0>() * lmrs_sum.lane<2>()) - (lmrs_sum.lane<1>() * lmrs_sum.lane<1>());
+		float ls_rdet1 = 1.0f / ls_det1;
 
-		#ifdef DEBUG_CAPTURE_NAN
-			feenableexcept(FE_DIVBYZERO | FE_INVALID);
-		#endif
+		vfloat4 color_mss1 = (left1_sum * left1_sum)
+		                   + (2.0f * middle1_sum * middle1_sum)
+		                   + (right1_sum * right1_sum);
 
-		vfloat4 sds = scale_direction * scale_max;
+		float ls_mss1 = (lmrs_sum.lane<0>() * lmrs_sum.lane<0>())
+		              + (2.0f * lmrs_sum.lane<1>() * lmrs_sum.lane<1>())
+		              + (lmrs_sum.lane<2>() * lmrs_sum.lane<2>());
 
-		rgbs_vectors[i] = vfloat4(sds.lane<0>(), sds.lane<1>(), sds.lane<2>(), scalediv);
+		vfloat4 ep0 = (right1_sum * color_vec_x - middle1_sum * color_vec_y) * color_rdet1;
+		vfloat4 ep1 = (left1_sum * color_vec_y - middle1_sum * color_vec_x) * color_rdet1;
 
-		if (wmin1 >= wmax1 * 0.999f)
-		{
-			// if all weights in the partition were equal, then just take average
-			// of all colors in the partition and use that as both endpoint colors.
-			vfloat4 avg = (color_vec_x + color_vec_y) * (1.0f / rgba_weight_sum);
+		float scale_ep0 = (lmrs_sum.lane<2>() * scale_vec.lane<0>() - lmrs_sum.lane<1>() * scale_vec.lane<1>()) * ls_rdet1;
+		float scale_ep1 = (lmrs_sum.lane<0>() * scale_vec.lane<1>() - lmrs_sum.lane<1>() * scale_vec.lane<0>()) * ls_rdet1;
 
-			vmask4 notnan_mask = avg == avg;
-			ep->endpt0[i] = select(ep->endpt0[i], avg, notnan_mask);
-			ep->endpt1[i] = select(ep->endpt1[i], avg, notnan_mask);
+		vmask4 p1_mask = vint4::lane_id() != vint4(plane2_component);
+		vmask4 det_mask = abs(color_det1) > (color_mss1 * 1e-4f);
+		vmask4 notnan_mask = (ep0 == ep0) & (ep1 == ep1);
+		vmask4 full_mask = p1_mask & det_mask & notnan_mask;
 
-			rgbs_vectors[i] = vfloat4(sds.lane<0>(), sds.lane<1>(), sds.lane<2>(), 1.0f);
-		}
-		else
-		{
-			// otherwise, complete the analytic calculation of ideal-endpoint-values
-			// for the given set of texel weights and pixel colors.
+		ep.endpt0[0] = select(ep.endpt0[0], ep0, full_mask);
+		ep.endpt1[0] = select(ep.endpt1[0], ep1, full_mask);
 
-			#ifdef DEBUG_CAPTURE_NAN
-			    fedisableexcept(FE_DIVBYZERO | FE_INVALID);
-			#endif
+		if (fabsf(ls_det1) > (ls_mss1 * 1e-4f) && scale_ep0 == scale_ep0 && scale_ep1 == scale_ep1 && scale_ep0 < scale_ep1)
+		{
+			float scalediv2 = scale_ep0 * (1.0f / scale_ep1);
+			vfloat4 sdsm = scale_dir * scale_ep1;
+			rgbs_vector = vfloat4(sdsm.lane<0>(), sdsm.lane<1>(), sdsm.lane<2>(), scalediv2);
+		}
+	}
 
-			vfloat4 color_det1 = (left_sum * right_sum) - (middle_sum * middle_sum);
-			vfloat4 color_rdet1 = 1.0f / color_det1;
+	if (wmin2 >= wmax2 * 0.999f)
+	{
+		// If all weights in the partition were equal, then just take average of all colors in
+		// the partition and use that as both endpoint colors
+		vfloat4 avg = (color_vec_x + color_vec_y) / rgba_weight_sum;
 
-			float ls_det1  = (lmrs_sum.lane<0>() * lmrs_sum.lane<2>()) - (lmrs_sum.lane<1>() * lmrs_sum.lane<1>());
-			float ls_rdet1 = 1.0f / ls_det1;
+		vmask4 notnan_mask = avg == avg;
+		vmask4 full_mask = p2_mask & notnan_mask;
 
-			vfloat4 color_mss1 = (left_sum * left_sum)
-			                   + (2.0f * middle_sum * middle_sum)
-			                   + (right_sum * right_sum);
+		ep.endpt0[0] = select(ep.endpt0[0], avg, full_mask);
+		ep.endpt1[0] = select(ep.endpt1[0], avg, full_mask);
+	}
+	else
+	{
+		// Otherwise, complete the analytic calculation of ideal-endpoint-values for the given
+		// set of texel weights and pixel colors
+		vfloat4 color_det2 = (left2_sum * right2_sum) - (middle2_sum * middle2_sum);
+		vfloat4 color_rdet2 = 1.0f / color_det2;
 
-			float ls_mss1 = (lmrs_sum.lane<0>() * lmrs_sum.lane<0>())
-			              + (2.0f * lmrs_sum.lane<1>() * lmrs_sum.lane<1>())
-			              + (lmrs_sum.lane<2>() * lmrs_sum.lane<2>());
+		vfloat4 color_mss2 = (left2_sum * left2_sum)
+		                   + (2.0f * middle2_sum * middle2_sum)
+		                   + (right2_sum * right2_sum);
 
-			vfloat4 ep0 = (right_sum * color_vec_x - middle_sum * color_vec_y) * color_rdet1;
-			vfloat4 ep1 = (left_sum * color_vec_y - middle_sum * color_vec_x) * color_rdet1;
+		vfloat4 ep0 = (right2_sum * color_vec_x - middle2_sum * color_vec_y) * color_rdet2;
+		vfloat4 ep1 = (left2_sum * color_vec_y - middle2_sum * color_vec_x) * color_rdet2;
 
-			vmask4 det_mask = abs(color_det1) > (color_mss1 * 1e-4f);
-			vmask4 notnan_mask = (ep0 == ep0) & (ep1 == ep1);
-			vmask4 full_mask = det_mask & notnan_mask;
+		vmask4 det_mask = abs(color_det2) > (color_mss2 * 1e-4f);
+		vmask4 notnan_mask = (ep0 == ep0) & (ep1 == ep1);
+		vmask4 full_mask = p2_mask & det_mask & notnan_mask;
 
-			ep->endpt0[i] = select(ep->endpt0[i], ep0, full_mask);
-			ep->endpt1[i] = select(ep->endpt1[i], ep1, full_mask);
+		ep.endpt0[0] = select(ep.endpt0[0], ep0, full_mask);
+		ep.endpt1[0] = select(ep.endpt1[0], ep1, full_mask);
+	}
 
-			float scale_ep0 = (lmrs_sum.lane<2>() * scale_vec.r - lmrs_sum.lane<1>() * scale_vec.g) * ls_rdet1;
-			float scale_ep1 = (lmrs_sum.lane<0>() * scale_vec.g - lmrs_sum.lane<1>() * scale_vec.r) * ls_rdet1;
+	// Calculations specific to mode #7, the HDR RGB-scale mode
+	vfloat4 rgbq_sum = color_vec_x + color_vec_y;
+	rgbq_sum.set_lane<3>(hadd_rgb_s(color_vec_y));
 
-			if (fabsf(ls_det1) > (ls_mss1 * 1e-4f) && scale_ep0 == scale_ep0 && scale_ep1 == scale_ep1 && scale_ep0 < scale_ep1)
-			{
-				float scalediv2 = scale_ep0 * (1.0f / scale_ep1);
-				vfloat4 sdsm = scale_direction * scale_ep1;
-				rgbs_vectors[i] = vfloat4(sdsm.lane<0>(), sdsm.lane<1>(), sdsm.lane<2>(), scalediv2);
-			}
+	rgbo_vector = compute_rgbo_vector(rgba_weight_sum, weight_weight_sum, rgbq_sum, psum);
 
-			#ifdef DEBUG_CAPTURE_NAN
-				feenableexcept(FE_DIVBYZERO | FE_INVALID);
-			#endif
-		}
+	// We can get a failure due to the use of a singular (non-invertible) matrix
+	// If it failed, compute rgbo_vectors[] with a different method ...
+	if (astc::isnan(dot_s(rgbo_vector, rgbo_vector)))
+	{
+		vfloat4 v0 = ep.endpt0[0];
+		vfloat4 v1 = ep.endpt1[0];
 
-		// if the calculation of an RGB-offset vector failed, try to compute
-		// a somewhat-sensible value anyway
-		if (rgbo_fail)
-		{
-			vfloat4 v0 = ep->endpt0[i];
-			vfloat4 v1 = ep->endpt1[i];
-			float avgdif = ((v1.lane<0>() - v0.lane<0>()) + (v1.lane<1>() - v0.lane<1>()) + (v1.lane<2>() - v0.lane<2>())) * (1.0f / 3.0f);
-			avgdif = astc::max(avgdif, 0.0f);
+		float avgdif = hadd_rgb_s(v1 - v0) * (1.0f / 3.0f);
+		avgdif = astc::max(avgdif, 0.0f);
 
-			vfloat4 avg = (v0 + v1) * 0.5f;
-			vfloat4 ep0 = avg - vfloat4(avgdif) * 0.5f;
+		vfloat4 avg = (v0 + v1) * 0.5f;
+		vfloat4 ep0 = avg - vfloat4(avgdif) * 0.5f;
 
-			rgbo_vectors[i] = vfloat4(ep0.lane<0>(), ep0.lane<1>(), ep0.lane<2>(), avgdif);
-		}
+		rgbo_vector = vfloat4(ep0.lane<0>(), ep0.lane<1>(), ep0.lane<2>(), avgdif);
 	}
 }
 
diff --git a/libkram/astc-encoder/astcenc_image.cpp b/libkram/astc-encoder/astcenc_image.cpp
index 1d67c01b..cda80722 100644
--- a/libkram/astc-encoder/astcenc_image.cpp
+++ b/libkram/astc-encoder/astcenc_image.cpp
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2021 Arm Limited
+// Copyright 2011-2022 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -24,309 +24,273 @@
 
 #include "astcenc_internal.h"
 
-void imageblock_initialize_deriv(
-	const imageblock* blk,
-	int pixelcount,
-	vfloat4* dptr
-) {
-	// TODO: For LDR on the current codec we can skip this if no LNS and just
-	// early-out as we use the same LNS settings everywhere ...
-	for (int i = 0; i < pixelcount; i++)
-	{
-		vfloat4 derv_unorm(65535.0f);
-		vfloat4 derv_lns = vfloat4::zero();
-
-		// TODO: Pack these into bits and avoid the disjoint fetch
-		int rgb_lns = blk->rgb_lns[i];
-		int a_lns = blk->alpha_lns[i];
-
-		// Compute derivatives if we have any use of LNS
-		if (rgb_lns || a_lns)
-		{
-			vfloat4 data = blk->texel(i);
-			vint4 datai = lns_to_sf16(float_to_int(data));
-
-			vfloat4 dataf = float16_to_float(datai);
-			dataf = max(dataf, 6e-5f);
-
-			vfloat4 data_lns1 = dataf * 1.05f;
-			data_lns1 = float_to_lns(data_lns1);
-
-			vfloat4 data_lns2 = dataf;
-			data_lns2 = float_to_lns(data_lns2);
+/**
+ * @brief Loader pipeline function type for data fetch from memory.
+ */
+using pixel_loader = vfloat4(*)(const void*, int);
 
-			vfloat4 divisor_lns = dataf * 0.05f;
+/**
+ * @brief Loader pipeline function type for swizzling data in a vector.
+ */
+using pixel_swizzler = vfloat4(*)(vfloat4, const astcenc_swizzle&);
 
-			// Clamp derivatives between 1/32 and 2^25
-			float lo = 1.0f / 32.0f;
-			float hi = 33554432.0f;
-			derv_lns = clamp(lo, hi, (data_lns1 - data_lns2) / divisor_lns);
-		}
+/**
+ * @brief Loader pipeline function type for converting data in a vector to LNS.
+ */
+using pixel_converter = vfloat4(*)(vfloat4, vmask4);
 
-		vint4 use_lns(rgb_lns, rgb_lns, rgb_lns, a_lns);
-		vmask4 lns_mask = use_lns != vint4::zero();
-		*dptr = select(derv_unorm, derv_lns, lns_mask);
-		dptr++;
-	}
+/**
+ * @brief Load a 8-bit UNORM texel from a data array.
+ *
+ * @param data          The data pointer.
+ * @param base_offset   The index offset to the start of the pixel.
+ */
+static vfloat4 load_texel_u8(
+	const void* data,
+	int base_offset
+) {
+	const uint8_t* data8 = static_cast<const uint8_t*>(data);
+	return int_to_float(vint4(data8 + base_offset)) / 255.0f;
 }
 
-// helper function to initialize the work-data from the orig-data
-static void imageblock_initialize_work_from_orig(
-	imageblock* blk,
-	int pixelcount
+/**
+ * @brief Load a 16-bit fp16 texel from a data array.
+ *
+ * @param data          The data pointer.
+ * @param base_offset   The index offset to the start of the pixel.
+ */
+static vfloat4 load_texel_f16(
+	const void* data,
+	int base_offset
 ) {
-	blk->origin_texel = blk->texel(0);
-
-	vfloat4 data_min(1e38f);
-	vfloat4 data_max(-1e38f);
-	bool grayscale = true;
-
-	for (int i = 0; i < pixelcount; i++)
-	{
-		vfloat4 data = blk->texel(i);
-		vfloat4 color_lns = vfloat4::zero();
-		vfloat4 color_unorm = data * 65535.0f;
+	const uint16_t* data16 = static_cast<const uint16_t*>(data);
+	int r = data16[base_offset    ];
+	int g = data16[base_offset + 1];
+	int b = data16[base_offset + 2];
+	int a = data16[base_offset + 3];
+	return float16_to_float(vint4(r, g, b, a));
+}
 
-		int rgb_lns = blk->rgb_lns[i];
-		int a_lns = blk->alpha_lns[i];
+/**
+ * @brief Load a 32-bit float texel from a data array.
+ *
+ * @param data          The data pointer.
+ * @param base_offset   The index offset to the start of the pixel.
+ */
+static vfloat4 load_texel_f32(
+	const void* data,
+	int base_offset
+) {
+	const float* data32 = static_cast<const float*>(data);
+	return vfloat4(data32 + base_offset);
+}
 
-		if (rgb_lns || a_lns)
-		{
-			color_lns = float_to_lns(data);
-		}
+/**
+ * @brief Dummy no-op swizzle function.
+ *
+ * @param data   The source RGBA vector to swizzle.
+ * @param swz    The swizzle to use.
+ */
+static vfloat4 swz_texel_skip(
+	vfloat4 data,
+	const astcenc_swizzle& swz
+) {
+	(void)swz;
+	return data;
+}
 
-		vint4 use_lns(rgb_lns, rgb_lns, rgb_lns, a_lns);
-		vmask4 lns_mask = use_lns != vint4::zero();
-		data = select(color_unorm, color_lns, lns_mask);
+/**
+ * @brief Swizzle a texel into a new arrangement.
+ *
+ * @param data   The source RGBA vector to swizzle.
+ * @param swz    The swizzle to use.
+ */
+static vfloat4 swz_texel(
+	vfloat4 data,
+	const astcenc_swizzle& swz
+) {
+	alignas(16) float datas[6];
 
-		// Compute block metadata
-		data_min = min(data_min, data);
-		data_max = max(data_max, data);
+	storea(data, datas);
+	datas[ASTCENC_SWZ_0] = 0.0f;
+	datas[ASTCENC_SWZ_1] = 1.0f;
 
-		if (grayscale && (data.lane<0>() != data.lane<1>() || data.lane<0>() != data.lane<2>()))
-		{
-			grayscale = false;
-		}
+	return vfloat4(datas[swz.r], datas[swz.g], datas[swz.b], datas[swz.a]);
+}
 
-		// Store block data
-		blk->data_r[i] = data.lane<0>();
-		blk->data_g[i] = data.lane<1>();
-		blk->data_b[i] = data.lane<2>();
-		blk->data_a[i] = data.lane<3>();
-	}
+/**
+ * @brief Encode a texel that is entirely LDR linear.
+ *
+ * @param data       The RGBA data to encode.
+ * @param lns_mask   The mask for the HDR channels than need LNS encoding.
+ */
+static vfloat4 encode_texel_unorm(
+	vfloat4 data,
+	vmask4 lns_mask
+) {
+	(void)lns_mask;
+	return data * 65535.0f;
+}
 
-	// Store block metadata
-	blk->data_min = data_min;
-	blk->data_max = data_max;
-	blk->grayscale = grayscale;
+/**
+ * @brief Encode a texel that includes at least some HDR LNS texels.
+ *
+ * @param data       The RGBA data to encode.
+ * @param lns_mask   The mask for the HDR channels than need LNS encoding.
+ */
+static vfloat4 encode_texel_lns(
+	vfloat4 data,
+	vmask4 lns_mask
+) {
+	vfloat4 datav_unorm = data * 65535.0f;
+	vfloat4 datav_lns   = float_to_lns(data);
+	return select(datav_unorm, datav_lns, lns_mask);
 }
 
-// fetch an imageblock from the input file.
-void fetch_imageblock(
+/* See header for documentation. */
+void fetch_image_block(
 	astcenc_profile decode_mode,
 	const astcenc_image& img,
-	imageblock* blk,	// picture-block to initialize with image data
-	const block_size_descriptor* bsd,
-	// position in texture.
-	int xpos,
-	int ypos,
-	int zpos,
-	astcenc_swizzle swz
+	image_block& blk,
+	const block_size_descriptor& bsd,
+	unsigned int xpos,
+	unsigned int ypos,
+	unsigned int zpos,
+	const astcenc_swizzle& swz
 ) {
-	int xsize = img.dim_x;
-	int ysize = img.dim_y;
-	int zsize = img.dim_z;
+	unsigned int xsize = img.dim_x;
+	unsigned int ysize = img.dim_y;
+	unsigned int zsize = img.dim_z;
 
-	blk->xpos = xpos;
-	blk->ypos = ypos;
-	blk->zpos = zpos;
+	blk.xpos = xpos;
+	blk.ypos = ypos;
+	blk.zpos = zpos;
 
 	// True if any non-identity swizzle
 	bool needs_swz = (swz.r != ASTCENC_SWZ_R) || (swz.g != ASTCENC_SWZ_G) ||
 	                 (swz.b != ASTCENC_SWZ_B) || (swz.a != ASTCENC_SWZ_A);
 
 	int idx = 0;
-	if (img.data_type == ASTCENC_TYPE_U8)
-	{
-		uint8_t data[6];
-		data[ASTCENC_SWZ_0] = 0x00;
-		data[ASTCENC_SWZ_1] = 0xFF;
-
-		for (int z = 0; z < bsd->zdim; z++)
-		{
-			int zi = astc::min(zpos + z, zsize - 1);
-			uint8_t* data8 = static_cast<uint8_t*>(img.data[zi]);
 
-			for (int y = 0; y < bsd->ydim; y++)
-			{
-				int yi = astc::min(ypos + y, ysize - 1);
-
-				for (int x = 0; x < bsd->xdim; x++)
-				{
-					int xi = astc::min(xpos + x, xsize - 1);
-
-					int r = data8[(4 * xsize * yi) + (4 * xi    )];
-					int g = data8[(4 * xsize * yi) + (4 * xi + 1)];
-					int b = data8[(4 * xsize * yi) + (4 * xi + 2)];
-					int a = data8[(4 * xsize * yi) + (4 * xi + 3)];
+	vfloat4 data_min(1e38f);
+	vfloat4 data_mean(0.0f);
+	vfloat4 data_mean_scale(1.0f / static_cast<float>(bsd.texel_count));
+	vfloat4 data_max(-1e38f);
+	bool grayscale = true;
 
-					if (needs_swz)
-					{
-						data[ASTCENC_SWZ_R] = r;
-						data[ASTCENC_SWZ_G] = g;
-						data[ASTCENC_SWZ_B] = b;
-						data[ASTCENC_SWZ_A] = a;
-
-						r = data[swz.r];
-						g = data[swz.g];
-						b = data[swz.b];
-						a = data[swz.a];
-					}
+	// This works because we impose the same choice everywhere during encode
+	uint8_t rgb_lns = (decode_mode == ASTCENC_PRF_HDR) ||
+	                  (decode_mode == ASTCENC_PRF_HDR_RGB_LDR_A) ? 1 : 0;
+	uint8_t a_lns = decode_mode == ASTCENC_PRF_HDR ? 1 : 0;
+	vint4 use_lns(rgb_lns, rgb_lns, rgb_lns, a_lns);
+	vmask4 lns_mask = use_lns != vint4::zero();
 
-					blk->data_r[idx] = static_cast<float>(r) / 255.0f;
-					blk->data_g[idx] = static_cast<float>(g) / 255.0f;
-					blk->data_b[idx] = static_cast<float>(b) / 255.0f;
-					blk->data_a[idx] = static_cast<float>(a) / 255.0f;
-					idx++;
-				}
-			}
-		}
+	// Set up the function pointers for loading pipeline as needed
+	pixel_loader loader = load_texel_u8;
+	if (img.data_type == ASTCENC_TYPE_F16)
+	{
+		loader = load_texel_f16;
 	}
-	else if (img.data_type == ASTCENC_TYPE_F16)
+	else if  (img.data_type == ASTCENC_TYPE_F32)
 	{
-		uint16_t data[6];
-		data[ASTCENC_SWZ_0] = 0x0000;
-		data[ASTCENC_SWZ_1] = 0x3C00;
-
-		for (int z = 0; z < bsd->zdim; z++)
-		{
-			int zi = astc::min(zpos + z, zsize - 1);
-			uint16_t* data16 = static_cast<uint16_t*>(img.data[zi]);
-
-			for (int y = 0; y < bsd->ydim; y++)
-			{
-				int yi = astc::min(ypos + y, ysize - 1);
-
-				for (int x = 0; x < bsd->xdim; x++)
-				{
-					int xi = astc::min(xpos + x, xsize - 1);
-
-					int r = data16[(4 * xsize * yi) + (4 * xi    )];
-					int g = data16[(4 * xsize * yi) + (4 * xi + 1)];
-					int b = data16[(4 * xsize * yi) + (4 * xi + 2)];
-					int a = data16[(4 * xsize * yi) + (4 * xi + 3)];
-
-					if (needs_swz)
-					{
-						data[ASTCENC_SWZ_R] = r;
-						data[ASTCENC_SWZ_G] = g;
-						data[ASTCENC_SWZ_B] = b;
-						data[ASTCENC_SWZ_A] = a;
-
-						r = data[swz.r];
-						g = data[swz.g];
-						b = data[swz.b];
-						a = data[swz.a];
-					}
+		loader = load_texel_f32;
+	}
 
-					vfloat4 dataf = max(float16_to_float(vint4(r, g, b, a)), 1e-8f);
-					blk->data_r[idx] = dataf.lane<0>();
-					blk->data_g[idx] = dataf.lane<1>();
-					blk->data_b[idx] = dataf.lane<2>();
-					blk->data_a[idx] = dataf.lane<3>();
-					idx++;
-				}
-			}
-		}
+	pixel_swizzler swizzler = swz_texel_skip;
+	if (needs_swz)
+	{
+		swizzler = swz_texel;
 	}
-	else // if (img.data_type == ASTCENC_TYPE_F32)
+
+	pixel_converter converter = encode_texel_unorm;
+	if (any(lns_mask))
 	{
-		assert(img.data_type == ASTCENC_TYPE_F32);
+		converter = encode_texel_lns;
+	}
 
-		float data[6];
-		data[ASTCENC_SWZ_0] = 0.0f;
-		data[ASTCENC_SWZ_1] = 1.0f;
+	for (unsigned int z = 0; z < bsd.zdim; z++)
+	{
+		unsigned int zi = astc::min(zpos + z, zsize - 1);
+		void* plane = img.data[zi];
 
-		for (int z = 0; z < bsd->zdim; z++)
+		for (unsigned int y = 0; y < bsd.ydim; y++)
 		{
-			int zi = astc::min(zpos + z, zsize - 1);
-			float* data32 = static_cast<float*>(img.data[zi]);
+			unsigned int yi = astc::min(ypos + y, ysize - 1);
 
-			for (int y = 0; y < bsd->ydim; y++)
+			for (unsigned int x = 0; x < bsd.xdim; x++)
 			{
-				int yi = astc::min(ypos + y, ysize - 1);
+				unsigned int xi = astc::min(xpos + x, xsize - 1);
+
+				vfloat4 datav = loader(plane, (4 * xsize * yi) + (4 * xi));
+				datav = swizzler(datav, swz);
+				datav = converter(datav, lns_mask);
 
-				for (int x = 0; x < bsd->xdim; x++)
+				// Compute block metadata
+				data_min = min(data_min, datav);
+				data_mean += datav * data_mean_scale;
+				data_max = max(data_max, datav);
+
+				if (grayscale && (datav.lane<0>() != datav.lane<1>() || datav.lane<0>() != datav.lane<2>()))
 				{
-					int xi = astc::min(xpos + x, xsize - 1);
+					grayscale = false;
+				}
 
-					float r = data32[(4 * xsize * yi) + (4 * xi    )];
-					float g = data32[(4 * xsize * yi) + (4 * xi + 1)];
-					float b = data32[(4 * xsize * yi) + (4 * xi + 2)];
-					float a = data32[(4 * xsize * yi) + (4 * xi + 3)];
+				blk.data_r[idx] = datav.lane<0>();
+				blk.data_g[idx] = datav.lane<1>();
+				blk.data_b[idx] = datav.lane<2>();
+				blk.data_a[idx] = datav.lane<3>();
 
-					if (needs_swz)
-					{
-						data[ASTCENC_SWZ_R] = r;
-						data[ASTCENC_SWZ_G] = g;
-						data[ASTCENC_SWZ_B] = b;
-						data[ASTCENC_SWZ_A] = a;
-
-						r = data[swz.r];
-						g = data[swz.g];
-						b = data[swz.b];
-						a = data[swz.a];
-					}
+				blk.rgb_lns[idx] = rgb_lns;
+				blk.alpha_lns[idx] = a_lns;
 
-					blk->data_r[idx] = astc::max(r, 1e-8f);
-					blk->data_g[idx] = astc::max(g, 1e-8f);
-					blk->data_b[idx] = astc::max(b, 1e-8f);
-					blk->data_a[idx] = astc::max(a, 1e-8f);
-					idx++;
-				}
+				idx++;
 			}
 		}
 	}
 
-	int rgb_lns = (decode_mode == ASTCENC_PRF_HDR) || (decode_mode == ASTCENC_PRF_HDR_RGB_LDR_A);
-	int alpha_lns = decode_mode == ASTCENC_PRF_HDR;
+	// Reverse the encoding so we store origin block in the original format
+	vfloat4 data_enc = blk.texel(0);
+	vfloat4 data_enc_unorm = data_enc / 65535.0f;
+	vfloat4 data_enc_lns = vfloat4::zero();
 
-	// impose the choice on every pixel when encoding.
-	for (int i = 0; i < bsd->texel_count; i++)
+	if (rgb_lns || a_lns)
 	{
-		blk->rgb_lns[i] = rgb_lns;
-		blk->alpha_lns[i] = alpha_lns;
-		blk->nan_texel[i] = 0;
+		data_enc_lns = float16_to_float(lns_to_sf16(float_to_int(data_enc)));
 	}
 
-	imageblock_initialize_work_from_orig(blk, bsd->texel_count);
+	blk.origin_texel = select(data_enc_unorm, data_enc_lns, lns_mask);
+
+	// Store block metadata
+	blk.data_min = data_min;
+	blk.data_mean = data_mean;
+	blk.data_max = data_max;
+	blk.grayscale = grayscale;
 }
 
-void write_imageblock(
+/* See header for documentation. */
+void write_image_block(
 	astcenc_image& img,
-	const imageblock* blk,	// picture-block to initialize with image data. We assume that orig_data is valid.
-	const block_size_descriptor* bsd,
-	// position to write the block to
-	int xpos,
-	int ypos,
-	int zpos,
-	astcenc_swizzle swz
+	const image_block& blk,
+	const block_size_descriptor& bsd,
+	unsigned int xpos,
+	unsigned int ypos,
+	unsigned int zpos,
+	const astcenc_swizzle& swz
 ) {
-	const uint8_t *nptr = blk->nan_texel;
-	int xsize = img.dim_x;
-	int ysize = img.dim_y;
-	int zsize = img.dim_z;
+	unsigned int xsize = img.dim_x;
+	unsigned int ysize = img.dim_y;
+	unsigned int zsize = img.dim_z;
 
-	int x_start = xpos;
-	int x_end = std::min(xsize, xpos + bsd->xdim);
-	int x_nudge = bsd->xdim - (x_end - x_start);
+	unsigned int x_start = xpos;
+	unsigned int x_end = std::min(xsize, xpos + bsd.xdim);
+	unsigned int x_nudge = bsd.xdim - (x_end - x_start);
 
-	int y_start = ypos;
-	int y_end = std::min(ysize, ypos + bsd->ydim);
-	int y_nudge = (bsd->ydim - (y_end - y_start)) * bsd->xdim;
+	unsigned int y_start = ypos;
+	unsigned int y_end = std::min(ysize, ypos + bsd.ydim);
+	unsigned int y_nudge = (bsd.ydim - (y_end - y_start)) * bsd.xdim;
 
-	int z_start = zpos;
-	int z_end = std::min(zsize, zpos + bsd->zdim);
+	unsigned int z_start = zpos;
+	unsigned int z_end = std::min(zsize, zpos + bsd.zdim);
 
 	float data[7];
 	data[ASTCENC_SWZ_0] = 0.0f;
@@ -343,28 +307,28 @@ void write_imageblock(
 	int idx = 0;
 	if (img.data_type == ASTCENC_TYPE_U8)
 	{
-		for (int z = z_start; z < z_end; z++)
+		for (unsigned int z = z_start; z < z_end; z++)
 		{
 			// Fetch the image plane
 			uint8_t* data8 = static_cast<uint8_t*>(img.data[z]);
 
-			for (int y = y_start; y < y_end; y++)
+			for (unsigned int y = y_start; y < y_end; y++)
 			{
-				for (int x = x_start; x < x_end; x++)
+				for (unsigned int x = x_start; x < x_end; x++)
 				{
 					vint4 colori = vint4::zero();
 
-					if (*nptr)
+					// Errors are NaN encoded - convert to magenta error color
+					if (blk.data_r[idx] != blk.data_r[idx])
 					{
-						// Can't display NaN - show magenta error color
 						colori = vint4(0xFF, 0x00, 0xFF, 0xFF);
 					}
 					else if (needs_swz)
 					{
-						data[ASTCENC_SWZ_R] = blk->data_r[idx];
-						data[ASTCENC_SWZ_G] = blk->data_g[idx];
-						data[ASTCENC_SWZ_B] = blk->data_b[idx];
-						data[ASTCENC_SWZ_A] = blk->data_a[idx];
+						data[ASTCENC_SWZ_R] = blk.data_r[idx];
+						data[ASTCENC_SWZ_G] = blk.data_g[idx];
+						data[ASTCENC_SWZ_B] = blk.data_b[idx];
+						data[ASTCENC_SWZ_A] = blk.data_a[idx];
 
 						if (needs_z)
 						{
@@ -383,7 +347,7 @@ void write_imageblock(
 					}
 					else
 					{
-						vfloat4 color = blk->texel(idx);
+						vfloat4 color = blk.texel(idx);
 						colori = float_to_int_rtn(min(color, 1.0f) * 255.0f);
 					}
 
@@ -391,38 +355,32 @@ void write_imageblock(
 					store_nbytes(colori, data8 + (4 * xsize * y) + (4 * x    ));
 
 					idx++;
-					nptr++;
 				}
 				idx += x_nudge;
-				nptr += x_nudge;
 			}
 			idx += y_nudge;
-			nptr += y_nudge;
 		}
 	}
 	else if (img.data_type == ASTCENC_TYPE_F16)
 	{
-		for (int z = z_start; z < z_end; z++)
+		for (unsigned int z = z_start; z < z_end; z++)
 		{
 			// Fetch the image plane
 			uint16_t* data16 = static_cast<uint16_t*>(img.data[z]);
 
-			for (int y = y_start; y < y_end; y++)
+			for (unsigned int y = y_start; y < y_end; y++)
 			{
-				for (int x = x_start; x < x_end; x++)
+				for (unsigned int x = x_start; x < x_end; x++)
 				{
 					vint4 color;
 
-					if (*nptr)
-					{
-						color = vint4(0xFFFF);
-					}
-					else if (needs_swz)
+					// NaNs are handled inline - no need to special case
+					if (needs_swz)
 					{
-						data[ASTCENC_SWZ_R] = blk->data_r[idx];
-						data[ASTCENC_SWZ_G] = blk->data_g[idx];
-						data[ASTCENC_SWZ_B] = blk->data_b[idx];
-						data[ASTCENC_SWZ_A] = blk->data_a[idx];
+						data[ASTCENC_SWZ_R] = blk.data_r[idx];
+						data[ASTCENC_SWZ_G] = blk.data_g[idx];
+						data[ASTCENC_SWZ_B] = blk.data_b[idx];
+						data[ASTCENC_SWZ_A] = blk.data_a[idx];
 
 						if (needs_z)
 						{
@@ -441,7 +399,7 @@ void write_imageblock(
 					}
 					else
 					{
-						vfloat4 colorf = blk->texel(idx);
+						vfloat4 colorf = blk.texel(idx);
 						color = float_to_float16(colorf);
 					}
 
@@ -451,35 +409,29 @@ void write_imageblock(
 					data16[(4 * xsize * y) + (4 * x + 3)] = (uint16_t)color.lane<3>();
 
 					idx++;
-					nptr++;
 				}
 				idx += x_nudge;
-				nptr += x_nudge;
 			}
 			idx += y_nudge;
-			nptr += y_nudge;
 		}
 	}
 	else // if (img.data_type == ASTCENC_TYPE_F32)
 	{
 		assert(img.data_type == ASTCENC_TYPE_F32);
 
-		for (int z = z_start; z < z_end; z++)
+		for (unsigned int z = z_start; z < z_end; z++)
 		{
 			// Fetch the image plane
 			float* data32 = static_cast<float*>(img.data[z]);
 
-			for (int y = y_start; y < y_end; y++)
+			for (unsigned int y = y_start; y < y_end; y++)
 			{
-				for (int x = x_start; x < x_end; x++)
+				for (unsigned int x = x_start; x < x_end; x++)
 				{
-					vfloat4 color = blk->texel(idx);
+					vfloat4 color = blk.texel(idx);
 
-					if (*nptr)
-					{
-						color = vfloat4(std::numeric_limits<float>::quiet_NaN());
-					}
-					else if (needs_swz)
+					// NaNs are handled inline - no need to special case
+					if (needs_swz)
 					{
 						data[ASTCENC_SWZ_R] = color.lane<0>();
 						data[ASTCENC_SWZ_G] = color.lane<1>();
@@ -504,13 +456,10 @@ void write_imageblock(
 					store(color, data32 + (4 * xsize * y) + (4 * x    ));
 
 					idx++;
-					nptr++;
 				}
 				idx += x_nudge;
-				nptr += x_nudge;
 			}
 			idx += y_nudge;
-			nptr += y_nudge;
 		}
 	}
 }
diff --git a/libkram/astc-encoder/astcenc_integer_sequence.cpp b/libkram/astc-encoder/astcenc_integer_sequence.cpp
index 25ffc2c3..9675ab5b 100644
--- a/libkram/astc-encoder/astcenc_integer_sequence.cpp
+++ b/libkram/astc-encoder/astcenc_integer_sequence.cpp
@@ -21,11 +21,10 @@
 
 #include "astcenc_internal.h"
 
-using namespace NAMESPACE_STL;
-//#include <array>
+#include <array>
 
-// unpacked quint triplets <low,middle,high> for each packed-quint value
-static const uint8_t quints_of_integer[128][3] = {
+/** @brief Unpacked quint triplets <low,middle,high> for each packed value */
+static const uint8_t quints_of_integer[128][3] {
 	{0, 0, 0}, {1, 0, 0}, {2, 0, 0}, {3, 0, 0},
 	{4, 0, 0}, {0, 4, 0}, {4, 4, 0}, {4, 4, 4},
 	{0, 1, 0}, {1, 1, 0}, {2, 1, 0}, {3, 1, 0},
@@ -60,9 +59,8 @@ static const uint8_t quints_of_integer[128][3] = {
 	{4, 3, 3}, {3, 4, 3}, {0, 3, 4}, {1, 3, 4}
 };
 
-// packed quint-value for every unpacked quint-triplet
-// indexed by [high][middle][low]
-static const uint8_t integer_of_quints[5][5][5] = {
+/** @brief Packed quint values for each unpacked value, indexed [hi][mid][lo]. */
+static const uint8_t integer_of_quints[5][5][5] {
 	{
 		{0, 1, 2, 3, 4},
 		{8, 9, 10, 11, 12},
@@ -100,8 +98,8 @@ static const uint8_t integer_of_quints[5][5][5] = {
 	}
 };
 
-// unpacked trit quintuplets <low,_,_,_,high> for each packed-quint value
-static const uint8_t trits_of_integer[256][5] = {
+/** @brief Unpacked trit quintuplets <low,...,high> for each packed value */
+static const uint8_t trits_of_integer[256][5] {
 	{0, 0, 0, 0, 0}, {1, 0, 0, 0, 0}, {2, 0, 0, 0, 0}, {0, 0, 2, 0, 0},
 	{0, 1, 0, 0, 0}, {1, 1, 0, 0, 0}, {2, 1, 0, 0, 0}, {1, 0, 2, 0, 0},
 	{0, 2, 0, 0, 0}, {1, 2, 0, 0, 0}, {2, 2, 0, 0, 0}, {2, 0, 2, 0, 0},
@@ -168,9 +166,8 @@ static const uint8_t trits_of_integer[256][5] = {
 	{0, 2, 2, 2, 2}, {1, 2, 2, 2, 2}, {2, 2, 2, 2, 2}, {2, 1, 2, 2, 2}
 };
 
-// packed trit-value for every unpacked trit-quintuplet
-// indexed by [high][][][][low]
-static const uint8_t integer_of_trits[3][3][3][3][3] = {
+/** @brief Packed trit values for each unpacked value, indexed [hi][][][][lo]. */
+static const uint8_t integer_of_trits[3][3][3][3][3] {
 	{
 		{
 			{
@@ -335,24 +332,25 @@ static const uint8_t integer_of_trits[3][3][3][3][3] = {
 /**
  * @brief The number of bits, trits, and quints needed for a quant level.
  */
-struct btq_count {
-	/**< The quantization level. */
+struct btq_count
+{
+	/** @brief The quantization level. */
 	uint8_t quant;
 
-	/**< The number of bits. */
+	/** @brief The number of bits. */
 	uint8_t bits;
 
-	/**< The number of trits. */
+	/** @brief The number of trits. */
 	uint8_t trits;
 
-	/**< The number of quints. */
+	/** @brief The number of quints. */
 	uint8_t quints;
 };
 
 /**
  * @brief The table of bits, trits, and quints needed for a quant encode.
  */
-static const array<btq_count, 21> btq_counts = {{
+static const std::array<btq_count, 21> btq_counts {{
 	{   QUANT_2, 1, 0, 0 },
 	{   QUANT_3, 0, 1, 0 },
 	{   QUANT_4, 2, 0, 0 },
@@ -382,24 +380,25 @@ static const array<btq_count, 21> btq_counts = {{
  * The length of a quantized sequence in bits is:
  *     (scale * <sequence_len> + round) / divisor
  */
-struct ise_size {
-	/**< The quantization level. */
+struct ise_size
+{
+	/** @brief The quantization level. */
 	uint8_t quant;
 
-	/**< The scaling parameter. */
+	/** @brief The scaling parameter. */
 	uint8_t scale;
 
-	/**< The rounding parameter. */
+	/** @brief The rounding parameter. */
 	uint8_t round;
 
-	/**< The divisor parameter. */
+	/** @brief The divisor parameter. */
 	uint8_t divisor;
 };
 
 /**
  * @brief The table of scale, round, and divisors needed for quant sizing.
  */
-static const array<ise_size, 21> ise_sizes = {{
+static const std::array<ise_size, 21> ise_sizes {{
 	{   QUANT_2,  1, 0, 1 },
 	{   QUANT_3,  8, 4, 5 },
 	{   QUANT_4,  2, 0, 1 },
@@ -424,29 +423,39 @@ static const array<ise_size, 21> ise_sizes = {{
 }};
 
 /* See header for documentation. */
-int get_ise_sequence_bitcount(
-	int items,
-	quant_method quant
+unsigned int get_ise_sequence_bitcount(
+	unsigned int character_count,
+	quant_method quant_level
 ) {
 	// Cope with out-of bounds values - input might be invalid
-	if (static_cast<size_t>(quant) >= ise_sizes.size())
+	if (static_cast<size_t>(quant_level) >= ise_sizes.size())
 	{
 		// Arbitrary large number that's more than an ASTC block can hold
 		return 1024;
 	}
 
-	auto& entry = ise_sizes[quant];
-	return (entry.scale * items + entry.round) / entry.divisor;
+	auto& entry = ise_sizes[quant_level];
+	return (entry.scale * character_count + entry.round) / entry.divisor;
 }
 
-// routine to write up to 8 bits
+/**
+ * @brief Write up to 8 bits at an arbitrary bit offset.
+ *
+ * The stored value is at most 8 bits, but can be stored at an offset of between 0 and 7 bits so may
+ * span two separate bytes in memory.
+ *
+ * @param         value       The value to write.
+ * @param         bitcount    The number of bits to write, starting from LSB.
+ * @param         bitoffset   The bit offset to store at, between 0 and 7.
+ * @param[in,out] ptr         The data pointer to write to.
+ */
 static inline void write_bits(
-	int value,
-	int bitcount,
-	int bitoffset,
-	uint8_t* ptr
+	unsigned int value,
+	unsigned int bitcount,
+	unsigned int bitoffset,
+	uint8_t ptr[2]
 ) {
-	int mask = (1 << bitcount) - 1;
+	unsigned int mask = (1 << bitcount) - 1;
 	value &= mask;
 	ptr += bitoffset >> 3;
 	bitoffset &= 7;
@@ -460,46 +469,60 @@ static inline void write_bits(
 	ptr[1] |= value >> 8;
 }
 
-// routine to read up to 8 bits
-static inline int read_bits(
-	int bitcount,
-	int bitoffset,
+/**
+ * @brief Read up to 8 bits at an arbitrary bit offset.
+ *
+ * The stored value is at most 8 bits, but can be stored at an offset of between 0 and 7 bits so may
+ * span two separate bytes in memory.
+ *
+ * @param         bitcount    The number of bits to read.
+ * @param         bitoffset   The bit offset to read from, between 0 and 7.
+ * @param[in,out] ptr         The data pointer to read from.
+ *
+ * @return The read value.
+ */
+static inline unsigned int read_bits(
+	unsigned int bitcount,
+	unsigned int bitoffset,
 	const uint8_t* ptr
 ) {
-	int mask = (1 << bitcount) - 1;
+	unsigned int mask = (1 << bitcount) - 1;
 	ptr += bitoffset >> 3;
 	bitoffset &= 7;
-	int value = ptr[0] | (ptr[1] << 8);
+	unsigned int value = ptr[0] | (ptr[1] << 8);
 	value >>= bitoffset;
 	value &= mask;
 	return value;
 }
 
+/* See header for documentation. */
 void encode_ise(
-	int quant_level,
-	int elements,
+	quant_method quant_level,
+	unsigned int character_count,
 	const uint8_t* input_data,
 	uint8_t* output_data,
-	int bit_offset
+	unsigned int bit_offset
 ) {
-	int bits = btq_counts[quant_level].bits;
-	int trits = btq_counts[quant_level].trits;
-	int quints = btq_counts[quant_level].quints;
-	int mask = (1 << bits) - 1;
+	promise(character_count > 0);
+
+	unsigned int bits = btq_counts[quant_level].bits;
+	unsigned int trits = btq_counts[quant_level].trits;
+	unsigned int quints = btq_counts[quant_level].quints;
+	unsigned int mask = (1 << bits) - 1;
 
 	// Write out trits and bits
 	if (trits)
 	{
-		int i = 0;
-		int full_trit_blocks = elements / 5;
+		unsigned int i = 0;
+		unsigned int full_trit_blocks = character_count / 5;
 
-		for (int j = 0; j < full_trit_blocks; j++)
+		for (unsigned int j = 0; j < full_trit_blocks; j++)
 		{
-			int i4 = input_data[i + 4] >> bits;
-			int i3 = input_data[i + 3] >> bits;
-			int i2 = input_data[i + 2] >> bits;
-			int i1 = input_data[i + 1] >> bits;
-			int i0 = input_data[i + 0] >> bits;
+			unsigned int i4 = input_data[i + 4] >> bits;
+			unsigned int i3 = input_data[i + 3] >> bits;
+			unsigned int i2 = input_data[i + 2] >> bits;
+			unsigned int i1 = input_data[i + 1] >> bits;
+			unsigned int i0 = input_data[i + 0] >> bits;
 
 			uint8_t T = integer_of_trits[i4][i3][i2][i1][i0];
 
@@ -534,19 +557,19 @@ void encode_ise(
 		}
 
 		// Loop tail for a partial block
-		if (i != elements)
+		if (i != character_count)
 		{
 			// i4 cannot be present - we know the block is partial
 			// i0 must be present - we know the block isn't empty
-			int i4 =                         0;
-			int i3 = i + 3 >= elements ? 0 : input_data[i + 3] >> bits;
-			int i2 = i + 2 >= elements ? 0 : input_data[i + 2] >> bits;
-			int i1 = i + 1 >= elements ? 0 : input_data[i + 1] >> bits;
-			int i0 =                         input_data[i + 0] >> bits;
+			unsigned int i4 =                            0;
+			unsigned int i3 = i + 3 >= character_count ? 0 : input_data[i + 3] >> bits;
+			unsigned int i2 = i + 2 >= character_count ? 0 : input_data[i + 2] >> bits;
+			unsigned int i1 = i + 1 >= character_count ? 0 : input_data[i + 1] >> bits;
+			unsigned int i0 =                                input_data[i + 0] >> bits;
 
 			uint8_t T = integer_of_trits[i4][i3][i2][i1][i0];
 
-			for (int j = 0; i < elements; i++, j++)
+			for (unsigned int j = 0; i < character_count; i++, j++)
 			{
 				// Truncated table as this iteration is always partital
 				static const uint8_t tbits[4]  { 2, 2, 1, 2 };
@@ -563,14 +586,14 @@ void encode_ise(
 	// Write out quints and bits
 	else if (quints)
 	{
-		int i = 0;
-		int full_quint_blocks = elements / 3;
+		unsigned int i = 0;
+		unsigned int full_quint_blocks = character_count / 3;
 
-		for (int j = 0; j < full_quint_blocks; j++)
+		for (unsigned int j = 0; j < full_quint_blocks; j++)
 		{
-			int i2 = input_data[i + 2] >> bits;
-			int i1 = input_data[i + 1] >> bits;
-			int i0 = input_data[i + 0] >> bits;
+			unsigned int i2 = input_data[i + 2] >> bits;
+			unsigned int i1 = input_data[i + 1] >> bits;
+			unsigned int i0 = input_data[i + 0] >> bits;
 
 			uint8_t T = integer_of_quints[i2][i1][i0];
 
@@ -595,17 +618,17 @@ void encode_ise(
 		}
 
 		// Loop tail for a partial block
-		if (i != elements)
+		if (i != character_count)
 		{
 			// i2 cannot be present - we know the block is partial
 			// i0 must be present - we know the block isn't empty
-			int i2 =                         0;
-			int i1 = i + 1 >= elements ? 0 : input_data[i + 1] >> bits;
-			int i0 =                         input_data[i + 0] >> bits;
+			unsigned int i2 =                            0;
+			unsigned int i1 = i + 1 >= character_count ? 0 : input_data[i + 1] >> bits;
+			unsigned int i0 =                                input_data[i + 0] >> bits;
 
 			uint8_t T = integer_of_quints[i2][i1][i0];
 
-			for (int j = 0; i < elements; i++, j++)
+			for (unsigned int j = 0; i < character_count; i++, j++)
 			{
 				// Truncated table as this iteration is always partital
 				static const uint8_t tbits[2]  { 3, 2 };
@@ -622,8 +645,8 @@ void encode_ise(
 	// Write out just bits
 	else
 	{
-		promise(elements > 0);
-		for (int i = 0; i < elements; i++)
+		promise(character_count > 0);
+		for (unsigned int i = 0; i < character_count; i++)
 		{
 			write_bits(input_data[i], bits, bit_offset, output_data);
 			bit_offset += bits;
@@ -631,46 +654,42 @@ void encode_ise(
 	}
 }
 
+/* See header for documentation. */
 void decode_ise(
-	int quant_level,
-	int elements,
+	quant_method quant_level,
+	unsigned int character_count,
 	const uint8_t* input_data,
 	uint8_t* output_data,
-	int bit_offset
+	unsigned int bit_offset
 ) {
-	// note: due to how the trit/quint-block unpacking is done in this function,
-	// we may write more temporary results than the number of outputs
-	// The maximum actual number of results is 64 bit, but we keep 4 additional elements
-	// of padding.
-	uint8_t results[68];
-	uint8_t tq_blocks[22];		// trit-blocks or quint-blocks
+	promise(character_count > 0);
 
-	int bits = btq_counts[quant_level].bits;
-	int trits = btq_counts[quant_level].trits;
-	int quints = btq_counts[quant_level].quints;
+	// Note: due to how the trit/quint-block unpacking is done in this function, we may write more
+	// temporary results than the number of outputs. The maximum actual number of results is 64 bit,
+	// but we keep 4 additional character_count of padding.
+	uint8_t results[68];
+	uint8_t tq_blocks[22] { 0 }; // Trit-blocks or quint-blocks, must be zeroed
 
-	int lcounter = 0;
-	int hcounter = 0;
+	unsigned int bits = btq_counts[quant_level].bits;
+	unsigned int trits = btq_counts[quant_level].trits;
+	unsigned int quints = btq_counts[quant_level].quints;
 
-	// trit-blocks or quint-blocks must be zeroed out before we collect them in the loop below.
-	for (int i = 0; i < 22; i++)
-	{
-		tq_blocks[i] = 0;
-	}
+	unsigned int lcounter = 0;
+	unsigned int hcounter = 0;
 
-	// collect bits for each element, as well as bits for any trit-blocks and quint-blocks.
-	for (int i = 0; i < elements; i++)
+	// Collect bits for each element, as well as bits for any trit-blocks and quint-blocks.
+	for (unsigned int i = 0; i < character_count; i++)
 	{
-		results[i] = read_bits(bits, bit_offset, input_data);
+		results[i] = static_cast<uint8_t>(read_bits(bits, bit_offset, input_data));
 		bit_offset += bits;
 
 		if (trits)
 		{
-			static const int bits_to_read[5]  { 2, 2, 1, 2, 1 };
-			static const int block_shift[5]   { 0, 2, 4, 5, 7 };
-			static const int next_lcounter[5] { 1, 2, 3, 4, 0 };
-			static const int hcounter_incr[5] { 0, 0, 0, 0, 1 };
-			int tdata = read_bits(bits_to_read[lcounter], bit_offset, input_data);
+			static const unsigned int bits_to_read[5]  { 2, 2, 1, 2, 1 };
+			static const unsigned int block_shift[5]   { 0, 2, 4, 5, 7 };
+			static const unsigned int next_lcounter[5] { 1, 2, 3, 4, 0 };
+			static const unsigned int hcounter_incr[5] { 0, 0, 0, 0, 1 };
+			unsigned int tdata = read_bits(bits_to_read[lcounter], bit_offset, input_data);
 			bit_offset += bits_to_read[lcounter];
 			tq_blocks[hcounter] |= tdata << block_shift[lcounter];
 			hcounter += hcounter_incr[lcounter];
@@ -679,11 +698,11 @@ void decode_ise(
 
 		if (quints)
 		{
-			static const int bits_to_read[3]  { 3, 2, 2 };
-			static const int block_shift[3]   { 0, 3, 5 };
-			static const int next_lcounter[3] { 1, 2, 0 };
-			static const int hcounter_incr[3] { 0, 0, 1 };
-			int tdata = read_bits(bits_to_read[lcounter], bit_offset, input_data);
+			static const unsigned int bits_to_read[3]  { 3, 2, 2 };
+			static const unsigned int block_shift[3]   { 0, 3, 5 };
+			static const unsigned int next_lcounter[3] { 1, 2, 0 };
+			static const unsigned int hcounter_incr[3] { 0, 0, 1 };
+			unsigned int tdata = read_bits(bits_to_read[lcounter], bit_offset, input_data);
 			bit_offset += bits_to_read[lcounter];
 			tq_blocks[hcounter] |= tdata << block_shift[lcounter];
 			hcounter += hcounter_incr[lcounter];
@@ -691,11 +710,11 @@ void decode_ise(
 		}
 	}
 
-	// unpack trit-blocks or quint-blocks as needed
+	// Unpack trit-blocks or quint-blocks as needed
 	if (trits)
 	{
-		int trit_blocks = (elements + 4) / 5;
-		for (int i = 0; i < trit_blocks; i++)
+		unsigned int trit_blocks = (character_count + 4) / 5;
+		for (unsigned int i = 0; i < trit_blocks; i++)
 		{
 			const uint8_t *tritptr = trits_of_integer[tq_blocks[i]];
 			results[5 * i    ] |= tritptr[0] << bits;
@@ -708,8 +727,8 @@ void decode_ise(
 
 	if (quints)
 	{
-		int quint_blocks = (elements + 2) / 3;
-		for (int i = 0; i < quint_blocks; i++)
+		unsigned int quint_blocks = (character_count + 2) / 3;
+		for (unsigned int i = 0; i < quint_blocks; i++)
 		{
 			const uint8_t *quintptr = quints_of_integer[tq_blocks[i]];
 			results[3 * i    ] |= quintptr[0] << bits;
@@ -718,7 +737,7 @@ void decode_ise(
 		}
 	}
 
-	for (int i = 0; i < elements; i++)
+	for (unsigned int i = 0; i < character_count; i++)
 	{
 		output_data[i] = results[i];
 	}
diff --git a/libkram/astc-encoder/astcenc_internal.h b/libkram/astc-encoder/astcenc_internal.h
index 51363e47..00909e45 100644
--- a/libkram/astc-encoder/astcenc_internal.h
+++ b/libkram/astc-encoder/astcenc_internal.h
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2021 Arm Limited
+// Copyright 2011-2022 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -22,14 +22,14 @@
 #ifndef ASTCENC_INTERNAL_INCLUDED
 #define ASTCENC_INTERNAL_INCLUDED
 
-//#include <algorithm>
-//#include <atomic>
+#include <algorithm>
+#include <atomic>
 #include <cstddef>
 #include <cstdint>
 #include <cstdio>
 #include <cstdlib>
 #include <condition_variable>
-//#include <functional>
+#include <functional>
 #include <mutex>
 #include <type_traits>
 
@@ -40,11 +40,10 @@
 /**
  * @brief Make a promise to the compiler's optimizer.
  *
- * A promise is an expression that the optimizer is can assume is true for to
- * help it generate faster code. Common use cases for this are to promise that
- * a for loop will iterate more than once, or that the loop iteration count is
- * a multiple of a vector length, which avoids pre-loop checks and can avoid
- * loop tails if loops are unrolled by the auto-vectorizer.
+ * A promise is an expression that the optimizer is can assume is true for to help it generate
+ * faster code. Common use cases for this are to promise that a for loop will iterate more than
+ * once, or that the loop iteration count is a multiple of a vector length, which avoids pre-loop
+ * checks and can avoid loop tails if loops are unrolled by the auto-vectorizer.
  */
 #if defined(NDEBUG)
 	#if !defined(__clang__) && defined(_MSC_VER)
@@ -61,59 +60,91 @@
 		#define promise(cond) if(!(cond)) { __builtin_unreachable(); }
 	#endif
 #else
-	#define promise(cond) assert(cond);
+	#define promise(cond) assert(cond)
 #endif
 
+/* ============================================================================
+  Constants
+============================================================================ */
+#if !defined(ASTCENC_BLOCK_MAX_TEXELS)
+	#define ASTCENC_BLOCK_MAX_TEXELS 216 // A 3D 6x6x6 block
+#endif
+
+/** @brief The maximum number of texels a block can support (6x6x6 block). */
+static constexpr unsigned int BLOCK_MAX_TEXELS { ASTCENC_BLOCK_MAX_TEXELS };
+
+/** @brief The maximum number of components a block can support. */
+static constexpr unsigned int BLOCK_MAX_COMPONENTS { 4 };
+
+/** @brief The maximum number of partitions a block can support. */
+static constexpr unsigned int BLOCK_MAX_PARTITIONS { 4 };
+
+/** @brief The number of partitionings, per partition count, suported by the ASTC format. */
+static constexpr unsigned int BLOCK_MAX_PARTITIONINGS { 1024 };
+
+/** @brief The maximum number of weights used during partition selection for texel clustering. */
+static constexpr uint8_t BLOCK_MAX_KMEANS_TEXELS { 64 };
+
+/** @brief The maximum number of weights a block can support. */
+static constexpr unsigned int BLOCK_MAX_WEIGHTS { 64 };
+
+/** @brief The maximum number of weights a block can support per plane in 2 plane mode. */
+static constexpr unsigned int BLOCK_MAX_WEIGHTS_2PLANE { BLOCK_MAX_WEIGHTS / 2 };
+
+/** @brief The minimum number of weight bits a candidate encoding must encode. */
+static constexpr unsigned int BLOCK_MIN_WEIGHT_BITS { 24 };
+
+/** @brief The maximum number of weight bits a candidate encoding can encode. */
+static constexpr unsigned int BLOCK_MAX_WEIGHT_BITS { 96 };
+
+/** @brief The index indicating a bad (unused) block mode in the remap array. */
+static constexpr uint16_t BLOCK_BAD_BLOCK_MODE { 0xFFFFu };
+
+/** @brief The index indicating a bad (unused) partitioning in the remap array. */
+static constexpr uint16_t BLOCK_BAD_PARTITIONING { 0xFFFFu };
+
+/** @brief The number of partition index bits supported by the ASTC format . */
+static constexpr unsigned int PARTITION_INDEX_BITS { 10 };
+
+/** @brief The offset of the plane 2 weights in shared weight arrays. */
+static constexpr unsigned int WEIGHTS_PLANE2_OFFSET { BLOCK_MAX_WEIGHTS_2PLANE };
+
+/** @brief The sum of quantized weights for one texel. */
+static constexpr float WEIGHTS_TEXEL_SUM { 16.0f };
+
+/** @brief The number of block modes suported by the ASTC format. */
+static constexpr unsigned int WEIGHTS_MAX_BLOCK_MODES { 2048 };
+
+/** @brief The number of weight grid decimation modes suported by the ASTC format. */
+static constexpr unsigned int WEIGHTS_MAX_DECIMATION_MODES { 87 };
+
+/** @brief The high default error used to initialize error trackers. */
+static constexpr float ERROR_CALC_DEFAULT { 1e30f };
+
 /**
- * @brief Make a promise to the compiler's optimizer parameters don't alias.
+ * @brief The minimum texel count for a block to use the one partition fast path.
  *
- * This is a compiler extension to implement the equivalent of the C99
- *  @c restrict keyword. Mostly expected to help on functions which are
- * reading and writing to arrays via pointers of the same basic type.
+ * This setting skips 4x4 and 5x4 block sizes.
  */
-#if !defined(__clang__) && defined(_MSC_VER)
-	#define RESTRICT __restrict
-#else // Assume Clang or GCC
-	#define RESTRICT __restrict__
-#endif
+static constexpr unsigned int TUNE_MIN_TEXELS_MODE0_FASTPATH { 24 };
 
-/* ============================================================================
-  Constants
-============================================================================ */
-#define MAX_TEXELS_PER_BLOCK 216
-#define MAX_KMEANS_TEXELS 64
-#define MAX_WEIGHTS_PER_BLOCK 64
-#define PLANE2_WEIGHTS_OFFSET (MAX_WEIGHTS_PER_BLOCK/2)
-#define MIN_WEIGHT_BITS_PER_BLOCK 24
-#define MAX_WEIGHT_BITS_PER_BLOCK 96
-#define PARTITION_BITS 10
-#define PARTITION_COUNT (1 << PARTITION_BITS)
-
-// the sum of weights for one texel.
-#define TEXEL_WEIGHT_SUM 16
-#define MAX_DECIMATION_MODES 87
-#define MAX_WEIGHT_MODES 2048
-
-// A high default error value
-static const float ERROR_CALC_DEFAULT { 1e30f };
+/**
+ * @brief The maximum number of candidate encodings tested for each encoding mode..
+ *
+ * This can be dynamically reduced by the compression quality preset.
+ */
+static constexpr unsigned int TUNE_MAX_TRIAL_CANDIDATES { 4 };
 
-/* ============================================================================
-  Compile-time tuning parameters
-============================================================================ */
-// The max texel count in a block which can try the one partition fast path.
-// Default: enabled for 4x4 and 5x4 blocks.
-static const unsigned int TUNE_MAX_TEXELS_MODE0_FASTPATH { 24 };
 
-// The maximum number of candidate encodings returned for each encoding mode.
-// Default: depends on quality preset
-static const unsigned int TUNE_MAX_TRIAL_CANDIDATES { 4 };
+static_assert((BLOCK_MAX_TEXELS % ASTCENC_SIMD_WIDTH) == 0,
+              "BLOCK_MAX_TEXELS must be multiple of ASTCENC_SIMD_WIDTH");
 
-/* ============================================================================
-  Other configuration parameters
-============================================================================ */
+static_assert((BLOCK_MAX_WEIGHTS % ASTCENC_SIMD_WIDTH) == 0,
+              "BLOCK_MAX_WEIGHTS must be multiple of ASTCENC_SIMD_WIDTH");
+
+static_assert((WEIGHTS_MAX_BLOCK_MODES % ASTCENC_SIMD_WIDTH) == 0,
+              "WEIGHTS_MAX_BLOCK_MODES must be multiple of ASTCENC_SIMD_WIDTH");
 
-// Uncomment to enable checking for inappropriate NaNs; Linux only, and slow!
-// #define DEBUG_CAPTURE_NAN
 
 /* ============================================================================
   Parallel execution control
@@ -128,21 +159,18 @@ static const unsigned int TUNE_MAX_TRIAL_CANDIDATES { 4 };
  *     * A multi-threaded processing stage.
  *     * A condition variable so threads can wait for processing completion.
  *
- * The init stage will be executed by the first thread to arrive in the
- * critical section, there is no main thread in the thread pool.
+ * The init stage will be executed by the first thread to arrive in the critical section, there is
+ * no main thread in the thread pool.
  *
- * The processing stage uses dynamic dispatch to assign task tickets to threads
- * on an on-demand basis. Threads may each therefore executed different numbers
- * of tasks, depending on their processing complexity. The task queue and the
- * task tickets are just counters; the caller must map these integers to an
- * actual processing partition in a specific problem domain.
+ * The processing stage uses dynamic dispatch to assign task tickets to threads on an on-demand
+ * basis. Threads may each therefore executed different numbers of tasks, depending on their
+ * processing complexity. The task queue and the task tickets are just counters; the caller must map
+ * these integers to an actual processing partition in a specific problem domain.
  *
- * The exit wait condition is needed to ensure processing has finished before
- * a worker thread can progress to the next stage of the pipeline. Specifically
- * a worker may exit the processing stage because there are no new tasks to
- * assign to it while other worker threads are still processing. Calling wait()
- * will ensure that all other worker have finished before the thread can
- * proceed.
+ * The exit wait condition is needed to ensure processing has finished before a worker thread can
+ * progress to the next stage of the pipeline. Specifically a worker may exit the processing stage
+ * because there are no new tasks to assign to it while other worker threads are still processing.
+ * Calling @c wait() will ensure that all other worker have finished before the thread can proceed.
  *
  * The basic usage model:
  *
@@ -181,13 +209,9 @@ static const unsigned int TUNE_MAX_TRIAL_CANDIDATES { 4 };
  */
 class ParallelManager
 {
-    using mymutex = std::recursive_mutex;
-    using mylock = std::unique_lock<mymutex>;
-    using mycondition = std::condition_variable_any;
-
 private:
 	/** @brief Lock used for critical section and condition synchronization. */
-    mymutex m_lock;
+	std::mutex m_lock;
 
 	/** @brief True if the stage init() step has been executed. */
 	bool m_init_done;
@@ -196,7 +220,7 @@ class ParallelManager
 	bool m_term_done;
 
 	/** @brief Contition variable for tracking stage processing completion. */
-	mycondition m_complete;
+	std::condition_variable m_complete;
 
 	/** @brief Number of tasks started, but not necessarily finished. */
 	std::atomic<unsigned int> m_start_count;
@@ -217,8 +241,8 @@ class ParallelManager
 	/**
 	 * @brief Reset the tracker for a new processing batch.
 	 *
-	 * This must be called from single-threaded code before starting the
-	 * multi-threaded procesing operations.
+	 * This must be called from single-threaded code before starting the multi-threaded procesing
+	 * operations.
 	 */
 	void reset()
 	{
@@ -232,16 +256,15 @@ class ParallelManager
 	/**
 	 * @brief Trigger the pipeline stage init step.
 	 *
-	 * This can be called from multi-threaded code. The first thread to
-	 * hit this will process the initialization. Other threads will block
-	 * and wait for it to complete.
+	 * This can be called from multi-threaded code. The first thread to hit this will process the
+	 * initialization. Other threads will block and wait for it to complete.
 	 *
-	 * @param init_func   Callable which executes the stage initialization.
-	 *                    Must return the number of tasks in the stage.
+	 * @param init_func   Callable which executes the stage initialization. It must return the
+	 *                    total number of tasks in the stage.
 	 */
 	void init(std::function<unsigned int(void)> init_func)
 	{
-		mylock lck(m_lock);
+		std::lock_guard<std::mutex> lck(m_lock);
 		if (!m_init_done)
 		{
 			m_task_count = init_func();
@@ -252,15 +275,14 @@ class ParallelManager
 	/**
 	 * @brief Trigger the pipeline stage init step.
 	 *
-	 * This can be called from multi-threaded code. The first thread to
-	 * hit this will process the initialization. Other threads will block
-	 * and wait for it to complete.
+	 * This can be called from multi-threaded code. The first thread to hit this will process the
+	 * initialization. Other threads will block and wait for it to complete.
 	 *
 	 * @param task_count   Total number of tasks needing processing.
 	 */
 	void init(unsigned int task_count)
 	{
-        mylock lck(m_lock);
+		std::lock_guard<std::mutex> lck(m_lock);
 		if (!m_init_done)
 		{
 			m_task_count = task_count;
@@ -274,11 +296,9 @@ class ParallelManager
 	 * Assign up to @c granule tasks to the caller for processing.
 	 *
 	 * @param      granule   Maximum number of tasks that can be assigned.
-	 * @param[out] count     Actual number of tasks assigned, or zero if
-	 *                       no tasks were assigned.
+	 * @param[out] count     Actual number of tasks assigned, or zero if no tasks were assigned.
 	 *
-	 * @return Task index of the first assigned task; assigned tasks
-	 *         increment from this.
+	 * @return Task index of the first assigned task; assigned tasks increment from this.
 	 */
 	unsigned int get_task_assignment(unsigned int granule, unsigned int& count)
 	{
@@ -296,16 +316,16 @@ class ParallelManager
 	/**
 	 * @brief Complete a task assignment.
 	 *
-	 * Mark @c count tasks as complete. This will notify all threads blocked
-	 * on @c wait() if this completes the processing of the stage.
+	 * Mark @c count tasks as complete. This will notify all threads blocked on @c wait() if this
+	 * completes the processing of the stage.
 	 *
 	 * @param count   The number of completed tasks.
 	 */
 	void complete_task_assignment(unsigned int count)
 	{
-		// Note: m_done_count cannot use an atomic without the mutex; this has
-		// a race between the update here and the wait() for other threads
-        mylock lck(m_lock);
+		// Note: m_done_count cannot use an atomic without the mutex; this has a race between the
+		// update here and the wait() for other threads
+		std::unique_lock<std::mutex> lck(m_lock);
 		this->m_done_count += count;
 		if (m_done_count == m_task_count)
 		{
@@ -319,22 +339,22 @@ class ParallelManager
 	 */
 	void wait()
 	{
-        mylock lck(m_lock);
+		std::unique_lock<std::mutex> lck(m_lock);
 		m_complete.wait(lck, [this]{ return m_done_count == m_task_count; });
 	}
 
 	/**
 	 * @brief Trigger the pipeline stage term step.
 	 *
-	 * This can be called from multi-threaded code. The first thread to
-	 * hit this will process the thread termintion. Caller must have called
-	 * wait() prior to calling this function to ensure processing is complete.
+	 * This can be called from multi-threaded code. The first thread to hit this will process the
+	 * thread termintion. Caller must have called @c wait() prior to calling this function to ensure
+	 * that processing is complete.
 	 *
 	 * @param term_func   Callable which executes the stage termination.
 	 */
 	void term(std::function<void(void)> term_func)
 	{
-        mylock lck(m_lock);
+		std::lock_guard<std::mutex> lck(m_lock);
 		if (!m_term_done)
 		{
 			term_func();
@@ -343,293 +363,40 @@ class ParallelManager
 	}
 };
 
-struct partition_metrics
-{
-	vfloat4 range_sq;
-	vfloat4 error_weight;
-	vfloat4 icolor_scale;
-	vfloat4 color_scale;
-	vfloat4 avg;
-	vfloat4 dir;
-};
-
-struct partition_lines3
-{
-	line3 uncor_line;
-	line3 samec_line;
-
-	processed_line3 uncor_pline;
-	processed_line3 samec_pline;
-
-	float uncor_line_len;
-	float samec_line_len;
-};
-
-/*
-	Partition table representation:
-	For each block size, we have 3 tables, each with 1024 partitionings;
-	these three tables correspond to 2, 3 and 4 partitions respectively.
-	For each partitioning, we have:
-	* a 4-entry table indicating how many texels there are in each of the 4 partitions.
-	  This may be from 0 to a very large value.
-	* a table indicating the partition index of each of the texels in the block.
-	  Each index may be 0, 1, 2 or 3.
-	* Each element in the table is an uint8_t indicating partition index (0, 1, 2 or 3)
-*/
-
-struct partition_info
-{
-	int partition_count;
-	uint8_t partition_texel_count[4];
-	uint8_t partition_of_texel[MAX_TEXELS_PER_BLOCK];
-	uint8_t texels_of_partition[4][MAX_TEXELS_PER_BLOCK];
-	uint64_t coverage_bitmaps[4];
-};
-
-/*
-   In ASTC, we don't necessarily provide a weight for every texel.
-   As such, for each block size, there are a number of patterns where some texels
-   have their weights computed as a weighted average of more than 1 weight.
-   As such, the codec uses a data structure that tells us: for each texel, which
-   weights it is a combination of for each weight, which texels it contributes to.
-   The decimation_table is this data structure.
-*/
-struct decimation_table
-{
-	// TODO: Make these byte values
-	int texel_count;
-	int weight_count;
-	int weight_x;
-	int weight_y;
-	int weight_z;
-
-	uint8_t texel_weight_count[MAX_TEXELS_PER_BLOCK];	// number of indices that go into the calculation for a texel
-
-	// The 4t and t4 tables are the same data, but transposed to allow optimal
-	// data access patterns depending on how we can unroll loops
-	alignas(ASTCENC_VECALIGN) float texel_weights_float_4t[4][MAX_TEXELS_PER_BLOCK];	// the weight to assign to each weight
-	alignas(ASTCENC_VECALIGN) uint8_t texel_weights_4t[4][MAX_TEXELS_PER_BLOCK];	// the weights that go into a texel calculation
-
-	// TODO: Can we remove the copies?
-	float texel_weights_float_t4[MAX_TEXELS_PER_BLOCK][4];	// the weight to assign to each weight
-	uint8_t texel_weights_t4[MAX_TEXELS_PER_BLOCK][4];	// the weights that go into a texel calculation
-
-	uint8_t texel_weights_int_t4[MAX_TEXELS_PER_BLOCK][4];	// the weight to assign to each weight
-
-	uint8_t weight_texel_count[MAX_WEIGHTS_PER_BLOCK];	// the number of texels that a given weight contributes to
-	uint8_t weights_int[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK];	// the weights that the weight contributes to a texel.
-
-	// Stored transposed to give better access patterns
-	uint8_t weight_texel[MAX_TEXELS_PER_BLOCK][MAX_WEIGHTS_PER_BLOCK];	// the texels that the weight contributes to
-	alignas(ASTCENC_VECALIGN) float weights_flt[MAX_TEXELS_PER_BLOCK][MAX_WEIGHTS_PER_BLOCK];	// the weights that the weight contributes to a texel.
-
-	// folded data structures:
-	//  * texel_weights_texel[i][j] = texel_weights[weight_texel[i][j]];
-	//  * texel_weights_float_texel[i][j] = texel_weights_float[weight_texel[i][j]]
-	uint8_t texel_weights_texel[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK][4];
-	float texel_weights_float_texel[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK][4];
-};
-
-/**
- * @brief Metadata for single block mode for a specific BSD.
- */
-struct block_mode
-{
-	int8_t decimation_mode;
-	int8_t quant_mode;
-	uint8_t is_dual_plane : 1;
-	uint8_t percentile_hit : 1;
-	uint8_t percentile_always : 1;
-	int16_t mode_index;
-};
+/* ============================================================================
+  Commonly used data structures
+============================================================================ */
 
 /**
- * @brief Metadata for single decimation mode for a specific BSD.
+ * @brief The ASTC endpoint formats.
+ *
+ * Note, the values here are used directly in the encoding in the format so do not rearrange.
  */
-struct decimation_mode
+enum endpoint_formats
 {
-	int8_t maxprec_1plane;
-	int8_t maxprec_2planes;
-	uint8_t percentile_hit : 1;
-	uint8_t percentile_always : 1;
+	FMT_LUMINANCE = 0,
+	FMT_LUMINANCE_DELTA = 1,
+	FMT_HDR_LUMINANCE_LARGE_RANGE = 2,
+	FMT_HDR_LUMINANCE_SMALL_RANGE = 3,
+	FMT_LUMINANCE_ALPHA = 4,
+	FMT_LUMINANCE_ALPHA_DELTA = 5,
+	FMT_RGB_SCALE = 6,
+	FMT_HDR_RGB_SCALE = 7,
+	FMT_RGB = 8,
+	FMT_RGB_DELTA = 9,
+	FMT_RGB_SCALE_ALPHA = 10,
+	FMT_HDR_RGB = 11,
+	FMT_RGBA = 12,
+	FMT_RGBA_DELTA = 13,
+	FMT_HDR_RGB_LDR_ALPHA = 14,
+	FMT_HDR_RGBA = 15
 };
 
 /**
- * @brief Data tables for a single block size.
+ * @brief The ASTC quantization methods.
  *
- * The decimation tables store the information to apply weight grid dimension
- * reductions. We only store the decimation modes that are actually needed by
- * the current context; many of the possible modes will be unused (too many
- * weights for the current block size or disabled by heuristics). The actual
- * number of weights stored is @c decimation_mode_count, and the
- * @c decimation_modes and @c decimation_tables arrays store the active modes
- * contiguously at the start of the array. These entries are not stored in any
- * particuar order.
- *
- * The block mode tables store the unpacked block mode settings. Block modes
- * are stored in the compressed block as an 11 bit field, but for any given
- * block size and set of compressor heuristics, only a subset of the block
- * modes will be used. The actual number of block modes stored is indicated in
- * @c block_mode_count, and the @c block_modes array store the active modes
- * contiguously at the start of the array. These entries are stored in
- * incrementing "packed" value order, which doesn't mean much once unpacked.
- * To allow decompressors to reference the packed data efficiently the
- * @c block_mode_packed_index array stores the mapping between physical ID and
- * the actual remapped array index.
- */
-struct block_size_descriptor
-{
-	/**< The block X dimension, in texels. */
-	int xdim;
-
-	/**< The block Y dimension, in texels. */
-	int ydim;
-
-	/**< The block Z dimension, in texels. */
-	int zdim;
-
-	/**< The block total texel count. */
-	int texel_count;
-
-
-	/**< The number of stored decimation modes. */
-	int decimation_mode_count;
-
-	/**< The active decimation modes, stored in low indices. */
-	decimation_mode decimation_modes[MAX_DECIMATION_MODES];
-
-	/**< The active decimation tables, stored in low indices. */
-	const decimation_table *decimation_tables[MAX_DECIMATION_MODES];
-
-
-	/**< The number of stored block modes. */
-	int block_mode_count;
-
-	/**< The active block modes, stored in low indices. */
-	block_mode block_modes[MAX_WEIGHT_MODES];
-
-	/**< The block mode array index, or -1 if not valid in current config. */
-	int16_t block_mode_packed_index[MAX_WEIGHT_MODES];
-
-
-	/**< The texel count for k-means partition selection. */
-	int kmeans_texel_count;
-
-	/**< The active texels for k-means partition selection. */
-	int kmeans_texels[MAX_KMEANS_TEXELS];
-
-	/**< The partion tables for all of the possible partitions. */
-	partition_info partitions[(3 * PARTITION_COUNT) + 1];
-};
-
-// data structure representing one block of an image.
-// it is expanded to float prior to processing to save some computation time
-// on conversions to/from uint8_t (this also allows us to handle HDR textures easily)
-struct imageblock
-{
-	float data_r[MAX_TEXELS_PER_BLOCK];  // the data that we will compress, either linear or LNS (0..65535 in both cases)
-	float data_g[MAX_TEXELS_PER_BLOCK];
-	float data_b[MAX_TEXELS_PER_BLOCK];
-	float data_a[MAX_TEXELS_PER_BLOCK];
-
-	vfloat4 origin_texel;
-	vfloat4 data_min;
-	vfloat4 data_max;
-	bool    grayscale;
-
-	uint8_t rgb_lns[MAX_TEXELS_PER_BLOCK];      // 1 if RGB data are being treated as LNS
-	uint8_t alpha_lns[MAX_TEXELS_PER_BLOCK];    // 1 if Alpha data are being treated as LNS
-	uint8_t nan_texel[MAX_TEXELS_PER_BLOCK];    // 1 if the texel is a NaN-texel.
-	int xpos, ypos, zpos;
-
-	inline vfloat4 texel(int index) const
-	{
-		return vfloat4(data_r[index],
-		               data_g[index],
-		               data_b[index],
-		               data_a[index]);
-	}
-
-	inline vfloat4 texel3(int index) const
-	{
-		return vfloat4(data_r[index],
-		               data_g[index],
-		               data_b[index],
-		               0.0f);
-	}
-};
-
-static inline float imageblock_default_alpha(const imageblock * blk)
-{
-	return blk->alpha_lns[0] ? (float)0x7800 : (float)0xFFFF;
-}
-
-
-static inline int imageblock_uses_alpha(const imageblock * blk)
-{
-	return blk->data_min.lane<3>() != blk->data_max.lane<3>();
-}
-
-static inline int imageblock_is_lum(const imageblock * blk)
-{
-	float default_alpha = imageblock_default_alpha(blk);
-	bool alpha1 = (blk->data_min.lane<3>() == default_alpha) &&
-	              (blk->data_max.lane<3>() == default_alpha);
-	return blk->grayscale && alpha1;
-}
-
-static inline int imageblock_is_lumalp(const imageblock * blk)
-{
-	float default_alpha = imageblock_default_alpha(blk);
-	bool alpha1 = (blk->data_min.lane<3>() == default_alpha) &&
-	              (blk->data_max.lane<3>() == default_alpha);
-	return blk->grayscale && !alpha1;
-}
-
-/*
-	Data structure representing error weighting for one block of an image. this is used as
-	a multiplier for the error weight to apply to each color component when computing PSNR.
-
-	This weighting has several uses: it's usable for RA, GA, BA, A weighting, which is useful
-	for alpha-textures it's usable for HDR textures, where weighting should be approximately inverse to
-	luminance it's usable for perceptual weighting, where we assign higher weight to low-variability
-	regions than to high-variability regions. it's usable for suppressing off-edge block content in
-	case the texture doesn't actually extend to the edge of the block.
-
-	For the default case (everything is evenly weighted), every weight is 1. For the RA,GA,BA,A case,
-	we multiply the R,G,B weights with that of the alpha.
-
-	Putting the same weight in every component should result in the default case.
-	The following relations should hold:
-
-	texel_weight_rg[i] = (texel_weight_r[i] + texel_weight_g[i]) / 2
-	texel_weight_lum[i] = (texel_weight_r[i] + texel_weight_g[i] + texel_weight_b[i]) / 3
-	texel_weight[i] = (texel_weight_r[i] + texel_weight_g[i] + texel_weight_b[i] + texel_weight_a[i] / 4
+ * Note, the values here are used directly in the encoding in the format so do not rearrange.
  */
-
-struct error_weight_block
-{
-	vfloat4 error_weights[MAX_TEXELS_PER_BLOCK];
-
-	float texel_weight[MAX_TEXELS_PER_BLOCK];
-
-	float texel_weight_gba[MAX_TEXELS_PER_BLOCK];
-	float texel_weight_rba[MAX_TEXELS_PER_BLOCK];
-	float texel_weight_rga[MAX_TEXELS_PER_BLOCK];
-	float texel_weight_rgb[MAX_TEXELS_PER_BLOCK];
-
-	float texel_weight_rg[MAX_TEXELS_PER_BLOCK];
-	float texel_weight_rb[MAX_TEXELS_PER_BLOCK];
-	float texel_weight_gb[MAX_TEXELS_PER_BLOCK];
-	float texel_weight_ra[MAX_TEXELS_PER_BLOCK];
-
-	float texel_weight_r[MAX_TEXELS_PER_BLOCK];
-	float texel_weight_g[MAX_TEXELS_PER_BLOCK];
-	float texel_weight_b[MAX_TEXELS_PER_BLOCK];
-	float texel_weight_a[MAX_TEXELS_PER_BLOCK];
-};
-
-// enumeration of all the quantization methods we support under this format.
 enum quant_method
 {
 	QUANT_2 = 0,
@@ -655,7 +422,14 @@ enum quant_method
 	QUANT_256 = 20
 };
 
-static inline int get_quant_method_levels(quant_method method)
+/**
+ * @brief The number of levels use by an ASTC quantization method.
+ *
+ * @param method   The quantization method
+ *
+ * @return   The number of levels used by @c method.
+ */
+static inline unsigned int get_quant_level(quant_method method)
 {
 	switch(method)
 	{
@@ -680,149 +454,1018 @@ static inline int get_quant_method_levels(quant_method method)
 	case QUANT_160: return 160;
 	case QUANT_192: return 192;
 	case QUANT_256: return 256;
-	// Unreachable - the enum is fully described
-	default:        return   0;
 	}
+
+	// Unreachable - the enum is fully described
+	return 0;
 }
 
 /**
- * @brief Weight quantization transfer table.
- *
- * ASTC can store texel weights at many quantization levels, so for performance
- * we store essential information about each level as a precomputed data
- * structure.
- *
- * Unquantized weights are integers in the range [0, 64], or floats [0, 1].
- *
- * This structure provides the following information:
- * A table, used to estimate the closest quantized
-	weight for a given floating-point weight. For each quantized weight, the corresponding unquantized
-	and floating-point values. For each quantized weight, a previous-value and a next-value.
-*/
-struct quantization_and_transfer_table
+ * @brief Computed metrics about a partition in a block.
+ */
+struct partition_metrics
 {
-	/** The quantization level used */
-	quant_method method;
-	/** The unscrambled unquantized value. */
-	float unquantized_value_unsc[33];
-	/** The scrambling order: value[map[i]] == value_unsc[i] */
-	int32_t scramble_map[32];
-	/** The scrambled unquantized values. */
-	uint8_t unquantized_value[32];
-	/**
-	 * An encoded table of previous-and-next weight values, indexed by the
-	 * current unquantized value.
-	 *  * bits 7:0 = previous-index, unquantized
-	 *  * bits 15:8 = next-index, unquantized
-	 *  * bits 23:16 = previous-index, quantized
-	 *  * bits 31:24 = next-index, quantized
-	 */
-	uint32_t prev_next_values[65];
-};
-
-extern const quantization_and_transfer_table quant_and_xfer_tables[12];
+	/** @brief The error-weighted average color in the partition. */
+	vfloat4 avg;
 
-enum endpoint_formats
-{
-	FMT_LUMINANCE = 0,
-	FMT_LUMINANCE_DELTA = 1,
-	FMT_HDR_LUMINANCE_LARGE_RANGE = 2,
-	FMT_HDR_LUMINANCE_SMALL_RANGE = 3,
-	FMT_LUMINANCE_ALPHA = 4,
-	FMT_LUMINANCE_ALPHA_DELTA = 5,
-	FMT_RGB_SCALE = 6,
-	FMT_HDR_RGB_SCALE = 7,
-	FMT_RGB = 8,
-	FMT_RGB_DELTA = 9,
-	FMT_RGB_SCALE_ALPHA = 10,
-	FMT_HDR_RGB = 11,
-	FMT_RGBA = 12,
-	FMT_RGBA_DELTA = 13,
-	FMT_HDR_RGB_LDR_ALPHA = 14,
-	FMT_HDR_RGBA = 15
+	/** @brief The dominant error-weighted direction in the partition. */
+	vfloat4 dir;
 };
 
-struct symbolic_compressed_block
+/**
+ * @brief Computed lines for a a three component analysis.
+ */
+struct partition_lines3
 {
-	int error_block;			// 1 marks error block, 0 marks non-error-block.
-	int block_mode;				// 0 to 2047. Negative value marks constant-color block (-1: FP16, -2:UINT16)
-	int partition_count;		// 1 to 4; Zero marks a constant-color block.
-	int partition_index;		// 0 to 1023
-	int color_formats[4];		// color format for each endpoint color pair.
-	int color_formats_matched;	// color format for all endpoint pairs are matched.
-	int color_quant_level;
-	int plane2_color_component;	// color component for the secondary plane of weights
-
-	// TODO: Under what circumstances is this ever more than 8 (4 pairs) colors
-	int color_values[4][12];	// quantized endpoint color pairs.
-	int constant_color[4];		// constant-color, as FP16 or UINT16. Used for constant-color blocks only.
-	// Quantized and decimated weights. In the case of dual plane, the second
-	// index plane starts at weights[PLANE2_WEIGHTS_OFFSET]
-	float errorval;             // The error of the current encoding
-	uint8_t weights[MAX_WEIGHTS_PER_BLOCK];
-};
+	/** @brief Line for uncorrelated chroma. */
+	line3 uncor_line;
 
-struct physical_compressed_block
-{
-	uint8_t data[16];
-};
+	/** @brief Line for correlated chroma, passing though the origin. */
+	line3 samec_line;
 
-/* ============================================================================
-  Functions and data pertaining to quantization and encoding
-============================================================================ */
+	/** @brief Postprocessed line for uncorrelated chroma. */
+	processed_line3 uncor_pline;
+
+	/** @brief Postprocessed line for correlated chroma, passing though the origin. */
+	processed_line3 samec_pline;
+
+	/** @brief The length of the line for uncorrelated chroma. */
+	float uncor_line_len;
+
+	/** @brief The length of the line for correlated chroma. */
+	float samec_line_len;
+};
 
 /**
- * @brief Populate the blocksize descriptor for the target block size.
+ * @brief The partition information for a single partition.
  *
- * This will also initialize the partition table metadata, which is stored
- * as part of the BSD structure.
- *
- * @param xdim        The x axis size of the block.
- * @param ydim        The y axis size of the block.
- * @param zdim        The z axis size of the block.
- * @param mode_cutoff The block mode percentil cutoff [0-1].
- * @param bsd         The structure to populate.
+ * ASTC has a total of 1024 candidate partitions for each of 2/3/4 partition counts, although this
+ * 1024 includes seeds that generate duplicates of other seeds and seeds that generate completely
+ * empty partitions. These are both valid encodings, but astcenc will skip both during compression
+ * as they are not useful.
  */
-void init_block_size_descriptor(
-	int xdim,
-	int ydim,
-	int zdim,
-	bool can_omit_modes,
-	float mode_cutoff,
-	block_size_descriptor* bsd);
+struct partition_info
+{
+	/** @brief The number of partitions in this partitioning. */
+	uint16_t partition_count;
+
+	/** @brief The index (seed) of this partitioning. */
+	uint16_t partition_index;
+
+	/**
+	 * @brief The number of texels in each partition.
+	 *
+	 * Note that some seeds result in zero texels assigned to a partition are valid, but are skipped
+	 * by this compressor as there is no point spending bits encoding an unused color endpoint.
+	 */
+	uint8_t partition_texel_count[BLOCK_MAX_PARTITIONS];
+
+	/** @brief The partition of each texel in the block. */
+	uint8_t partition_of_texel[BLOCK_MAX_TEXELS];
 
-void term_block_size_descriptor(
-	block_size_descriptor* bsd);
+	/** @brief The list of texels in each partition. */
+	uint8_t texels_of_partition[BLOCK_MAX_PARTITIONS][BLOCK_MAX_TEXELS];
+};
 
 /**
- * @brief Populate the partition tables for the target block size.
+ * @brief The weight grid information for a single decimation pattern.
  *
- * Note the block_size_size descriptor must be initialized before calling this
- * function.
+ * ASTC can store one weight per texel, but is also capable of storing lower resoution weight grids
+ * that are interpolated during decompression to assign a with to a texel. Storing fewer weights
+ * can free up a substantial amount of bits that we can then spend on more useful things, such as
+ * more accurate endpoints and weights, or additional partitions.
  *
- * @param bsd  The structure to populate.
+ * This data structure is used to store information about a single weight grid decimation pattern,
+ * for a single block size.
  */
-void init_partition_tables(
-	block_size_descriptor* bsd);
+struct decimation_info
+{
+	// TODO: These structures are large. Any partitioning opportunities to
+	// improve caching and reduce miss rates?
 
-static inline const partition_info *get_partition_table(
-	const block_size_descriptor* bsd,
-	int partition_count
-) {
-	if (partition_count == 1) {
-		partition_count = 5;
-	}
-	int index = (partition_count - 2) * PARTITION_COUNT;
-	return bsd->partitions + index;
-}
+	/** @brief The total number of texels in the block. */
+	uint8_t texel_count;
 
-/**
- * @brief Get the percentile table for 2D block modes.
- *
- * This is an empirically determined prioritization of which block modes to
- * use in the search in terms of their centile (lower centiles = more useful).
- *
- * Returns a dynamically allocated array; caller must free with delete[].
+	/** @brief The total number of weights stored. */
+	uint8_t weight_count;
+
+	/** @brief The number of stored weights in the X dimension. */
+	uint8_t weight_x;
+
+	/** @brief The number of stored weights in the Y dimension. */
+	uint8_t weight_y;
+
+	/** @brief The number of stored weights in the Z dimension. */
+	uint8_t weight_z;
+
+	/** @brief The number of stored weights that contribute to each texel, between 1 and 4. */
+	uint8_t texel_weight_count[BLOCK_MAX_TEXELS];
+
+	/** @brief The weight index of the N weights that need to be interpolated for each texel. */
+	uint8_t texel_weights_4t[4][BLOCK_MAX_TEXELS];
+
+	/** @brief The bilinear interpolation weighting of the N input weights for each texel, between 0 and 16. */
+	uint8_t texel_weights_int_4t[4][BLOCK_MAX_TEXELS];
+
+	/** @brief The bilinear interpolation weighting of the N input weights for each texel, between 0 and 1. */
+	alignas(ASTCENC_VECALIGN) float texel_weights_float_4t[4][BLOCK_MAX_TEXELS];
+
+	/** @brief The number of texels that each stored weight contributes to. */
+	uint8_t weight_texel_count[BLOCK_MAX_WEIGHTS];
+
+	/** @brief The list of weights that contribute to each texel. */
+	uint8_t weight_texel[BLOCK_MAX_TEXELS][BLOCK_MAX_WEIGHTS];
+
+	/** @brief The list of weight indices that contribute to each texel. */
+	alignas(ASTCENC_VECALIGN) float weights_flt[BLOCK_MAX_TEXELS][BLOCK_MAX_WEIGHTS];
+
+	/**
+	 * @brief Folded structure for faster access:
+	 *     texel_weights_texel[i][j][.] = texel_weights[.][weight_texel[i][j]]
+	 */
+	uint8_t texel_weights_texel[BLOCK_MAX_WEIGHTS][BLOCK_MAX_TEXELS][4];
+
+	/**
+	 * @brief Folded structure for faster access:
+	 *     texel_weights_float_texel[i][j][.] = texel_weights_float[.][weight_texel[i][j]]
+	 */
+	float texel_weights_float_texel[BLOCK_MAX_WEIGHTS][BLOCK_MAX_TEXELS][4];
+};
+
+/**
+ * @brief Metadata for single block mode for a specific block size.
+ */
+struct block_mode
+{
+	/** @brief The block mode index in the ASTC encoded form. */
+	uint16_t mode_index;
+
+	/** @brief The decimation mode index in the compressor reindexed list. */
+	uint8_t decimation_mode;
+
+	/** @brief The weight quantization used by this block mode. */
+	uint8_t quant_mode;
+
+	/** @brief The weight quantization used by this block mode. */
+	uint8_t weight_bits;
+
+	/** @brief Is a dual weight plane used by this block mode? */
+	uint8_t is_dual_plane : 1;
+
+	/** @brief Is this mode enabled in the current search preset? */
+	uint8_t percentile_hit : 1;
+
+	/**
+	 * @brief Get the weight quantization used by this block mode.
+	 *
+	 * @return The quantization level.
+	 */
+	inline quant_method get_weight_quant_mode() const
+	{
+		return (quant_method)this->quant_mode;
+	}
+};
+
+/**
+ * @brief Metadata for single decimation mode for a specific block size.
+ */
+struct decimation_mode
+{
+	/** @brief The max weight precision for 1 plane, or -1 if not supported. */
+	int8_t maxprec_1plane;
+
+	/** @brief The max weight precision for 2 planes, or -1 if not supported. */
+	int8_t maxprec_2planes;
+
+	/** @brief Is this mode enabled in the current search preset? */
+	uint8_t percentile_hit;
+};
+
+/**
+ * @brief Data tables for a single block size.
+ *
+ * The decimation tables store the information to apply weight grid dimension reductions. We only
+ * store the decimation modes that are actually needed by the current context; many of the possible
+ * modes will be unused (too many weights for the current block size or disabled by heuristics). The
+ * actual number of weights stored is @c decimation_mode_count, and the @c decimation_modes and
+ * @c decimation_tables arrays store the active modes contiguously at the start of the array. These
+ * entries are not stored in any particuar order.
+ *
+ * The block mode tables store the unpacked block mode settings. Block modes are stored in the
+ * compressed block as an 11 bit field, but for any given block size and set of compressor
+ * heuristics, only a subset of the block modes will be used. The actual number of block modes
+ * stored is indicated in @c block_mode_count, and the @c block_modes array store the active modes
+ * contiguously at the start of the array. These entries are stored in incrementing "packed" value
+ * order, which doesn't mean much once unpacked. To allow decompressors to reference the packed data
+ * efficiently the @c block_mode_packed_index array stores the mapping between physical ID and the
+ * actual remapped array index.
+ */
+struct block_size_descriptor
+{
+	/** @brief The block X dimension, in texels. */
+	uint8_t xdim;
+
+	/** @brief The block Y dimension, in texels. */
+	uint8_t ydim;
+
+	/** @brief The block Z dimension, in texels. */
+	uint8_t zdim;
+
+	/** @brief The block total texel count. */
+	uint8_t texel_count;
+
+	/** @brief The number of stored decimation modes. */
+	unsigned int decimation_mode_count;
+
+	/**
+	 * @brief The number of stored decimation modes which are "always" modes.
+	 *
+	 * Always modes are stored at the start of the decimation_modes list.
+	 */
+	unsigned int always_decimation_mode_count;
+
+	/** @brief The number of stored block modes. */
+	unsigned int block_mode_count;
+
+	/** @brief The number of active partitionings for 1/2/3/4 partitionings. */
+	unsigned int partitioning_count[BLOCK_MAX_PARTITIONS];
+
+	/**
+	 * @brief The number of stored block modes which are "always" modes.
+	 *
+	 * Always modes are stored at the start of the block_modes list.
+	 */
+	unsigned int always_block_mode_count;
+
+	/** @brief The active decimation modes, stored in low indices. */
+	decimation_mode decimation_modes[WEIGHTS_MAX_DECIMATION_MODES];
+
+	/** @brief The active decimation tables, stored in low indices. */
+	alignas(ASTCENC_VECALIGN) decimation_info decimation_tables[WEIGHTS_MAX_DECIMATION_MODES];
+
+	/** @brief The packed block mode array index, or @c BLOCK_BAD_BLOCK_MODE if not active. */
+	uint16_t block_mode_packed_index[WEIGHTS_MAX_BLOCK_MODES];
+
+	/** @brief The active block modes, stored in low indices. */
+	block_mode block_modes[WEIGHTS_MAX_BLOCK_MODES];
+
+	/** @brief The active partition tables, stored in low indices per-count. */
+	partition_info partitionings[(3 * BLOCK_MAX_PARTITIONINGS) + 1];
+
+	/**
+	 * @brief The packed partition table array index, or @c BLOCK_BAD_PARTITIONING if not active.
+	 *
+	 * Indexed by partition_count - 2, containing 2, 3 and 4 partitions.
+	 */
+	uint16_t partitioning_packed_index[3][BLOCK_MAX_PARTITIONINGS];
+
+	/** @brief The active texels for k-means partition selection. */
+	uint8_t kmeans_texels[BLOCK_MAX_KMEANS_TEXELS];
+
+	/**
+	 * @brief Is 0 if this 2-partition is valid for compression 255 otherwise.
+	 *
+	 * Indexed by remapped index, not physical index.
+	 */
+	uint8_t partitioning_valid_2[BLOCK_MAX_PARTITIONINGS];
+
+	/**
+	 * @brief The canonical 2-partition coverage pattern used during block partition search.
+	 *
+	 * Indexed by remapped index, not physical index.
+	 */
+	uint64_t coverage_bitmaps_2[BLOCK_MAX_PARTITIONINGS][2];
+
+	/**
+	 * @brief Is 0 if this 3-partition is valid for compression 255 otherwise.
+	 *
+	 * Indexed by remapped index, not physical index.
+	 */
+	uint8_t partitioning_valid_3[BLOCK_MAX_PARTITIONINGS];
+
+	/**
+	 * @brief The canonical 3-partition coverage pattern used during block partition search.
+	 *
+	 * Indexed by remapped index, not physical index.
+	 */
+	uint64_t coverage_bitmaps_3[BLOCK_MAX_PARTITIONINGS][3];
+
+	/**
+	 * @brief Is 0 if this 4-partition is valid for compression 255 otherwise.
+	 *
+	 * Indexed by remapped index, not physical index.
+	 */
+	uint8_t partitioning_valid_4[BLOCK_MAX_PARTITIONINGS];
+
+	/**
+	 * @brief The canonical 4-partition coverage pattern used during block partition search.
+	 *
+	 * Indexed by remapped index, not physical index.
+	 */
+	uint64_t coverage_bitmaps_4[BLOCK_MAX_PARTITIONINGS][4];
+
+	/**
+	 * @brief Get the block mode structure for index @c block_mode.
+	 *
+	 * This function can only return block modes that are enabled by the current compressor config.
+	 * Decompression from an arbitrary source should not use this without first checking that the
+	 * packed block mode index is not @c BLOCK_BAD_BLOCK_MODE.
+	 *
+	 * @param block_mode   The packed block mode index.
+	 *
+	 * @return The block mode structure.
+	 */
+	const block_mode& get_block_mode(unsigned int block_mode) const
+	{
+		unsigned int packed_index = this->block_mode_packed_index[block_mode];
+		assert(packed_index != BLOCK_BAD_BLOCK_MODE && packed_index < this->block_mode_count);
+		return this->block_modes[packed_index];
+	}
+
+	/**
+	 * @brief Get the decimation mode structure for index @c decimation_mode.
+	 *
+	 * This function can only return decimation modes that are enabled by the current compressor
+	 * config. The mode array is stored packed, but this is only ever indexed by the packed index
+	 * stored in the @c block_mode and never exists in an unpacked form.
+	 *
+	 * @param decimation_mode   The packed decimation mode index.
+	 *
+	 * @return The decimation mode structure.
+	 */
+	const decimation_mode& get_decimation_mode(unsigned int decimation_mode) const
+	{
+		return this->decimation_modes[decimation_mode];
+	}
+
+	/**
+	 * @brief Get the decimation info structure for index @c decimation_mode.
+	 *
+	 * This function can only return decimation modes that are enabled by the current compressor
+	 * config. The mode array is stored packed, but this is only ever indexed by the packed index
+	 * stored in the @c block_mode and never exists in an unpacked form.
+	 *
+	 * @param decimation_mode   The packed decimation mode index.
+	 *
+	 * @return The decimation info structure.
+	 */
+	const decimation_info& get_decimation_info(unsigned int decimation_mode) const
+	{
+		return this->decimation_tables[decimation_mode];
+	}
+
+	/**
+	 * @brief Get the partition info table for a given partition count.
+	 *
+	 * @param partition_count   The number of partitions we want the table for.
+	 *
+	 * @return The pointer to the table of 1024 entries (for 2/3/4 parts) or 1 entry (for 1 part).
+	 */
+	const partition_info* get_partition_table(unsigned int partition_count) const
+	{
+		if (partition_count == 1)
+		{
+			partition_count = 5;
+		}
+		unsigned int index = (partition_count - 2) * BLOCK_MAX_PARTITIONINGS;
+		return this->partitionings + index;
+	}
+
+	/**
+	 * @brief Get the partition info structure for a given partition count and seed.
+	 *
+	 * @param partition_count   The number of partitions we want the info for.
+	 * @param index             The partition seed (between 0 and 1023).
+	 *
+	 * @return The partition info structure.
+	 */
+	const partition_info& get_partition_info(unsigned int partition_count, unsigned int index) const
+	{
+		unsigned int packed_index = 0;
+		if (partition_count >= 2)
+		{
+			packed_index = this->partitioning_packed_index[partition_count - 2][index];
+		}
+
+		assert(packed_index != BLOCK_BAD_PARTITIONING && packed_index < this->partitioning_count[partition_count - 1]);
+		auto& result = get_partition_table(partition_count)[packed_index];
+		assert(index == result.partition_index);
+		return result;
+	}
+
+	/**
+	 * @brief Get the partition info structure for a given partition count and seed.
+	 *
+	 * @param partition_count   The number of partitions we want the info for.
+	 * @param packed_index      The raw array offset.
+	 *
+	 * @return The partition info structure.
+	 */
+	const partition_info& get_raw_partition_info(unsigned int partition_count, unsigned int packed_index) const
+	{
+		assert(packed_index != BLOCK_BAD_PARTITIONING && packed_index < this->partitioning_count[partition_count - 1]);
+		auto& result = get_partition_table(partition_count)[packed_index];
+		return result;
+	}
+};
+
+/**
+ * @brief The image data for a single block.
+ *
+ * The @c data_[rgba] fields store the image data in an encoded SoA float form designed for easy
+ * vectorization. Input data is converted to float and stored as values between 0 and 65535. LDR
+ * data is stored as direct UNORM data, HDR data is stored as LNS data.
+ *
+ * The @c rgb_lns and @c alpha_lns fields that assigned a per-texel use of HDR are only used during
+ * decompression. The current compressor will always use HDR endpoint formats when in HDR mode.
+ */
+struct image_block
+{
+	/** @brief The input (compress) or output (decompress) data for the red color component. */
+	alignas(ASTCENC_VECALIGN) float data_r[BLOCK_MAX_TEXELS];
+
+	/** @brief The input (compress) or output (decompress) data for the green color component. */
+	alignas(ASTCENC_VECALIGN) float data_g[BLOCK_MAX_TEXELS];
+
+	/** @brief The input (compress) or output (decompress) data for the blue color component. */
+	alignas(ASTCENC_VECALIGN) float data_b[BLOCK_MAX_TEXELS];
+
+	/** @brief The input (compress) or output (decompress) data for the alpha color component. */
+	alignas(ASTCENC_VECALIGN) float data_a[BLOCK_MAX_TEXELS];
+
+	/** @brief The number of texels in the block. */
+	uint8_t texel_count;
+
+	/** @brief The original data for texel 0 for constant color block encoding. */
+	vfloat4 origin_texel;
+
+	/** @brief The min component value of all texels in the block. */
+	vfloat4 data_min;
+
+	/** @brief The mean component value of all texels in the block. */
+	vfloat4 data_mean;
+
+	/** @brief The max component value of all texels in the block. */
+	vfloat4 data_max;
+
+	/** @brief The relative error significance of the color channels. */
+	vfloat4 channel_weight;
+
+	/** @brief Is this grayscale block where R == G == B for all texels? */
+	bool grayscale;
+
+	/** @brief Set to 1 if a texel is using HDR RGB endpoints (decompression only). */
+	uint8_t rgb_lns[BLOCK_MAX_TEXELS];
+
+	/** @brief Set to 1 if a texel is using HDR alpha endpoints (decompression only). */
+	uint8_t alpha_lns[BLOCK_MAX_TEXELS];
+
+	/** @brief The X position of this block in the input or output image. */
+	unsigned int xpos;
+
+	/** @brief The Y position of this block in the input or output image. */
+	unsigned int ypos;
+
+	/** @brief The Z position of this block in the input or output image. */
+	unsigned int zpos;
+
+	/**
+	 * @brief Get an RGBA texel value from the data.
+	 *
+	 * @param index   The texel index.
+	 *
+	 * @return The texel in RGBA component ordering.
+	 */
+	inline vfloat4 texel(unsigned int index) const
+	{
+		return vfloat4(data_r[index],
+		               data_g[index],
+		               data_b[index],
+		               data_a[index]);
+	}
+
+	/**
+	 * @brief Get an RGB texel value from the data.
+	 *
+	 * @param index   The texel index.
+	 *
+	 * @return The texel in RGB0 component ordering.
+	 */
+	inline vfloat4 texel3(unsigned int index) const
+	{
+		return vfloat3(data_r[index],
+		               data_g[index],
+		               data_b[index]);
+	}
+
+	/**
+	 * @brief Get the default alpha value for endpoints that don't store it.
+	 *
+	 * The default depends on whether the alpha endpoint is LDR or HDR.
+	 *
+	 * @return The alpha value in the scaled range used by the compressor.
+	 */
+	inline float get_default_alpha() const
+	{
+		return this->alpha_lns[0] ? (float)0x7800 : (float)0xFFFF;
+	}
+
+	/**
+	 * @brief Test if a single color channel is constant across the block.
+	 *
+	 * Constant color channels are easier to compress as interpolating between two identical colors
+	 * always returns the same value, irrespective of the weight used. They therefore can be ignored
+	 * for the purposes of weight selection and use of a second weight plane.
+	 *
+	 * @return @c true if the channel is constant across the block, @c false otherwise.
+	 */
+	inline bool is_constant_channel(int channel) const
+	{
+		vmask4 lane_mask = vint4::lane_id() == vint4(channel);
+		vmask4 color_mask = this->data_min == this->data_max;
+		return any(lane_mask & color_mask);
+	}
+
+	/**
+	 * @brief Test if this block is a luminance block with constant 1.0 alpha.
+	 *
+	 * @return @c true if the block is a luminance block , @c false otherwise.
+	 */
+	inline bool is_luminance() const
+	{
+		float default_alpha = this->get_default_alpha();
+		bool alpha1 = (this->data_min.lane<3>() == default_alpha) &&
+		              (this->data_max.lane<3>() == default_alpha);
+		return this->grayscale && alpha1;
+	}
+
+	/**
+	 * @brief Test if this block is a luminance block with variable alpha.
+	 *
+	 * @return @c true if the block is a luminance + alpha block , @c false otherwise.
+	 */
+	inline bool is_luminancealpha() const
+	{
+		float default_alpha = this->get_default_alpha();
+		bool alpha1 = (this->data_min.lane<3>() == default_alpha) &&
+		              (this->data_max.lane<3>() == default_alpha);
+		return this->grayscale && !alpha1;
+	}
+};
+
+/**
+ * @brief Data structure storing the color endpoints for a block.
+ */
+struct endpoints
+{
+	/** @brief The number of partition endpoints stored. */
+	unsigned int partition_count;
+
+	/** @brief The colors for endpoint 0. */
+	vfloat4 endpt0[BLOCK_MAX_PARTITIONS];
+
+	/** @brief The colors for endpoint 1. */
+	vfloat4 endpt1[BLOCK_MAX_PARTITIONS];
+};
+
+/**
+ * @brief Data structure storing the color endpoints and weights.
+ */
+struct endpoints_and_weights
+{
+	/** @brief True if all active values in weight_error_scale are the same. */
+	bool is_constant_weight_error_scale;
+
+	/** @brief The color endpoints. */
+	endpoints ep;
+
+	/** @brief The ideal weight for each texel; may be undecimated or decimated. */
+	alignas(ASTCENC_VECALIGN) float weights[BLOCK_MAX_TEXELS];
+
+	/** @brief The ideal weight error scaling for each texel; may be undecimated or decimated. */
+	alignas(ASTCENC_VECALIGN) float weight_error_scale[BLOCK_MAX_TEXELS];
+};
+
+/**
+ * @brief Utility storing estimated errors from choosing particular endpoint encodings.
+ */
+struct encoding_choice_errors
+{
+	/** @brief Error of using LDR RGB-scale instead of complete endpoints. */
+	float rgb_scale_error;
+
+	/** @brief Error of using HDR RGB-scale instead of complete endpoints. */
+	float rgb_luma_error;
+
+	/** @brief Error of using luminance instead of RGB. */
+	float luminance_error;
+
+	/** @brief Error of discarding alpha and using a constant 1.0 alpha. */
+	float alpha_drop_error;
+
+	/** @brief Can we use delta offset encoding? */
+	bool can_offset_encode;
+
+	/** @brief CAn we use blue contraction encoding? */
+	bool can_blue_contract;
+};
+
+/**
+ * @brief Preallocated working buffers, allocated per thread during context creation.
+ */
+struct alignas(ASTCENC_VECALIGN) compression_working_buffers
+{
+	/** @brief Ideal endpoints and weights for plane 1. */
+	endpoints_and_weights ei1;
+
+	/** @brief Ideal endpoints and weights for plane 2. */
+	endpoints_and_weights ei2;
+
+	/** @brief Ideal decimated endpoints and weights for plane 1. */
+	endpoints_and_weights eix1[WEIGHTS_MAX_DECIMATION_MODES];
+
+	/** @brief Ideal decimated endpoints and weights for plane 2. */
+	endpoints_and_weights eix2[WEIGHTS_MAX_DECIMATION_MODES];
+
+	/**
+	 * @brief Decimated ideal weight values.
+	 *
+	 * For two plane encodings, second plane weights start at @c WEIGHTS_PLANE2_OFFSET offsets.
+	 */
+	alignas(ASTCENC_VECALIGN) float dec_weights_ideal_value[WEIGHTS_MAX_DECIMATION_MODES * BLOCK_MAX_WEIGHTS];
+
+	/**
+	 * @brief Decimated and quantized weight values stored in the unpacked quantized weight range.
+	 *
+	 * For two plane encodings, second plane weights start at @c WEIGHTS_PLANE2_OFFSET offsets.
+	 */
+	alignas(ASTCENC_VECALIGN) float dec_weights_quant_uvalue[WEIGHTS_MAX_BLOCK_MODES * BLOCK_MAX_WEIGHTS];
+
+	/**
+	 * @brief Decimated and quantized weight values stored in the packed quantized weight range.
+	 *
+	 * For two plane encodings, second plane weights start at @c WEIGHTS_PLANE2_OFFSET offsets.
+	 */
+	alignas(ASTCENC_VECALIGN) uint8_t dec_weights_quant_pvalue[WEIGHTS_MAX_BLOCK_MODES * BLOCK_MAX_WEIGHTS];
+
+	/** @brief Error of the best encoding combination for each block mode. */
+	alignas(ASTCENC_VECALIGN) float errors_of_best_combination[WEIGHTS_MAX_BLOCK_MODES];
+
+	/** @brief The best color quant for each block mode. */
+	alignas(ASTCENC_VECALIGN) quant_method best_quant_levels[WEIGHTS_MAX_BLOCK_MODES];
+
+	/** @brief The best color quant for each block mode if modes are the same and we have spare bits. */
+	quant_method best_quant_levels_mod[WEIGHTS_MAX_BLOCK_MODES];
+
+	/** @brief The best endpoint format for each partition. */
+	int best_ep_formats[WEIGHTS_MAX_BLOCK_MODES][BLOCK_MAX_PARTITIONS];
+
+	/** @brief The total bit storage needed for quantized weights for each block mode. */
+	int qwt_bitcounts[WEIGHTS_MAX_BLOCK_MODES];
+
+	/** @brief The cumulative error for quantized weights for each block mode. */
+	float qwt_errors[WEIGHTS_MAX_BLOCK_MODES];
+
+	/** @brief The low weight value in plane 1 for each block mode. */
+	float weight_low_value1[WEIGHTS_MAX_BLOCK_MODES];
+
+	/** @brief The high weight value in plane 1 for each block mode. */
+	float weight_high_value1[WEIGHTS_MAX_BLOCK_MODES];
+
+	/** @brief The low weight value in plane 1 for each quant level and decimation mode. */
+	float weight_low_values1[WEIGHTS_MAX_DECIMATION_MODES][12];
+
+	/** @brief The high weight value in plane 1 for each quant level and decimation mode. */
+	float weight_high_values1[WEIGHTS_MAX_DECIMATION_MODES][12];
+
+	/** @brief The low weight value in plane 2 for each block mode. */
+	float weight_low_value2[WEIGHTS_MAX_BLOCK_MODES];
+
+	/** @brief The high weight value in plane 2 for each block mode. */
+	float weight_high_value2[WEIGHTS_MAX_BLOCK_MODES];
+
+	/** @brief The low weight value in plane 2 for each quant level and decimation mode. */
+	float weight_low_values2[WEIGHTS_MAX_DECIMATION_MODES][12];
+
+	/** @brief The high weight value in plane 2 for each quant level and decimation mode. */
+	float weight_high_values2[WEIGHTS_MAX_DECIMATION_MODES][12];
+};
+
+struct dt_init_working_buffers
+{
+	uint8_t weight_count_of_texel[BLOCK_MAX_TEXELS];
+	uint8_t grid_weights_of_texel[BLOCK_MAX_TEXELS][4];
+	uint8_t weights_of_texel[BLOCK_MAX_TEXELS][4];
+
+	uint8_t texel_count_of_weight[BLOCK_MAX_WEIGHTS];
+	uint8_t texels_of_weight[BLOCK_MAX_WEIGHTS][BLOCK_MAX_TEXELS];
+	uint8_t texel_weights_of_weight[BLOCK_MAX_WEIGHTS][BLOCK_MAX_TEXELS];
+};
+
+/**
+ * @brief Weight quantization transfer table.
+ *
+ * ASTC can store texel weights at many quantization levels, so for performance we store essential
+ * information about each level as a precomputed data structure. Unquantized weights are integers
+ * or floats in the range [0, 64].
+ *
+ * This structure provides a table, used to estimate the closest quantized weight for a given
+ * floating-point weight. For each quantized weight, the corresponding unquantized values. For each
+ * quantized weight, a previous-value and a next-value.
+*/
+struct quantization_and_transfer_table
+{
+	/** @brief The quantization level used */
+	quant_method method;
+
+	/** @brief The unscrambled unquantized value. */
+	float unquantized_value_unsc[33];
+
+	/** @brief The scrambling order: value[map[i]] == value_unsc[i] */
+	int32_t scramble_map[32];
+
+	/** @brief The scrambled unquantized values. */
+	uint8_t unquantized_value[32];
+
+	/**
+	 * @brief A table of previous-and-next weights, indexed by the current unquantized value.
+	 *  * bits 7:0 = previous-index, unquantized
+	 *  * bits 15:8 = next-index, unquantized
+	 *  * bits 23:16 = previous-index, quantized
+	 *  * bits 31:24 = next-index, quantized
+	 */
+	uint32_t prev_next_values[65];
+};
+
+
+/** @brief The precomputed quant and transfer table. */
+extern const quantization_and_transfer_table quant_and_xfer_tables[12];
+
+/** @brief The block is an error block, and will return error color or NaN. */
+static constexpr uint8_t SYM_BTYPE_ERROR { 0 };
+
+/** @brief The block is a constant color block using FP16 colors. */
+static constexpr uint8_t SYM_BTYPE_CONST_F16 { 1 };
+
+/** @brief The block is a constant color block using UNORM16 colors. */
+static constexpr uint8_t SYM_BTYPE_CONST_U16 { 2 };
+
+/** @brief The block is a normal non-constant color block. */
+static constexpr uint8_t SYM_BTYPE_NONCONST { 3 };
+
+/**
+ * @brief A symbolic representation of a compressed block.
+ *
+ * The symbolic representation stores the unpacked content of a single
+ * @c physical_compressed_block, in a form which is much easier to access for
+ * the rest of the compressor code.
+ */
+struct symbolic_compressed_block
+{
+	/** @brief The block type, one of the @c SYM_BTYPE_* constants. */
+	uint8_t block_type;
+
+	/** @brief The number of partitions; valid for @c NONCONST blocks. */
+	uint8_t partition_count;
+
+	/** @brief Non-zero if the color formats matched; valid for @c NONCONST blocks. */
+	uint8_t color_formats_matched;
+
+	/** @brief The plane 2 color component, or -1 if single plane; valid for @c NONCONST blocks. */
+	// Try unsigned sentintel to avoid signext on load
+	int8_t plane2_component;
+
+	/** @brief The block mode; valid for @c NONCONST blocks. */
+	uint16_t block_mode;
+
+	/** @brief The partition index; valid for @c NONCONST blocks if 2 or more partitions. */
+	uint16_t partition_index;
+
+	/** @brief The endpoint color formats for each partition; valid for @c NONCONST blocks. */
+	uint8_t color_formats[BLOCK_MAX_PARTITIONS];
+
+	/** @brief The endpoint color quant mode; valid for @c NONCONST blocks. */
+	quant_method quant_mode;
+
+	/** @brief The error of the current encoding; valid for @c NONCONST blocks. */
+	float errorval;
+
+	// We can't have both of these at the same time
+	union {
+		/** @brief The constant color; valid for @c CONST blocks. */
+		int constant_color[BLOCK_MAX_COMPONENTS];
+
+		/** @brief The quantized endpoint color pairs; valid for @c NONCONST blocks. */
+		uint8_t color_values[BLOCK_MAX_PARTITIONS][8];
+	};
+
+	/** @brief The quantized and decimated weights.
+	 *
+	 * If dual plane, the second plane starts at @c weights[WEIGHTS_PLANE2_OFFSET].
+	 */
+	uint8_t weights[BLOCK_MAX_WEIGHTS];
+
+	/**
+	 * @brief Get the weight quantization used by this block mode.
+	 *
+	 * @return The quantization level.
+	 */
+	inline quant_method get_color_quant_mode() const
+	{
+		return this->quant_mode;
+	}
+};
+
+/**
+ * @brief A physical representation of a compressed block.
+ *
+ * The physical representation stores the raw bytes of the format in memory.
+ */
+struct physical_compressed_block
+{
+	/** @brief The ASTC encoded data for a single block. */
+	uint8_t data[16];
+};
+
+
+/**
+ * @brief Parameter structure for @c compute_pixel_region_variance().
+ *
+ * This function takes a structure to avoid spilling arguments to the stack on every function
+ * invocation, as there are a lot of parameters.
+ */
+struct pixel_region_args
+{
+	/** @brief The image to analyze. */
+	const astcenc_image* img;
+
+	/** @brief The component swizzle pattern. */
+	astcenc_swizzle swz;
+
+	/** @brief Should the algorithm bother with Z axis processing? */
+	bool have_z;
+
+	/** @brief The kernel radius for alpha processing. */
+	unsigned int alpha_kernel_radius;
+
+	/** @brief The X dimension of the working data to process. */
+	unsigned int size_x;
+
+	/** @brief The Y dimension of the working data to process. */
+	unsigned int size_y;
+
+	/** @brief The Z dimension of the working data to process. */
+	unsigned int size_z;
+
+	/** @brief The X position of first src and dst data in the data set. */
+	unsigned int offset_x;
+
+	/** @brief The Y position of first src and dst data in the data set. */
+	unsigned int offset_y;
+
+	/** @brief The Z position of first src and dst data in the data set. */
+	unsigned int offset_z;
+
+	/** @brief The working memory buffer. */
+	vfloat4 *work_memory;
+};
+
+/**
+ * @brief Parameter structure for @c compute_averages_proc().
+ */
+struct avg_args
+{
+	/** @brief The arguments for the nested variance computation. */
+	pixel_region_args arg;
+
+	// The above has a reference to the image altread?
+	/** @brief The image X dimensions. */
+	unsigned int img_size_x;
+
+	/** @brief The image Y dimensions. */
+	unsigned int img_size_y;
+
+	/** @brief The image Z dimensions. */
+	unsigned int img_size_z;
+
+	/** @brief The maximum working block dimensions in X and Y dimensions. */
+	unsigned int blk_size_xy;
+
+	/** @brief The maximum working block dimensions in Z dimensions. */
+	unsigned int blk_size_z;
+
+	/** @brief The working block memory size. */
+	unsigned int work_memory_size;
+};
+
+#if defined(ASTCENC_DIAGNOSTICS)
+/* See astcenc_diagnostic_trace header for details. */
+class TraceLog;
+#endif
+
+/**
+ * @brief The astcenc compression context.
+ */
+struct astcenc_context
+{
+	/** @brief The configuration this context was created with. */
+	astcenc_config config;
+
+	/** @brief The thread count supported by this context. */
+	unsigned int thread_count;
+
+	/** @brief The block size descriptor this context was created with. */
+	alignas(ASTCENC_VECALIGN) block_size_descriptor bsd;
+
+	/*
+	 * Fields below here are not needed in a decompress-only build, but some remain as they are
+	 * small and it avoids littering the code with #ifdefs. The most significant contributors to
+	 * large structure size are omitted.
+	 */
+
+	/** @brief The input image alpha channel averages table, may be @c nullptr if not needed. */
+	float *input_alpha_averages;
+
+	/** @brief The scratch working buffers, one per thread (see @c thread_count). */
+	compression_working_buffers* working_buffers;
+
+#if !defined(ASTCENC_DECOMPRESS_ONLY)
+	/** @brief The pixel region and variance worker arguments. */
+	avg_args avg_preprocess_args;
+
+	/** @brief The per-texel deblocking weights for the current block size. */
+	float deblock_weights[BLOCK_MAX_TEXELS];
+
+	/** @brief The parallel manager for averages computation. */
+	ParallelManager manage_avg;
+
+	/** @brief The parallel manager for compression. */
+	ParallelManager manage_compress;
+#endif
+
+	/** @brief The parallel manager for decompression. */
+	ParallelManager manage_decompress;
+
+#if defined(ASTCENC_DIAGNOSTICS)
+	/**
+	 * @brief The diagnostic trace logger.
+	 *
+	 * Note that this is a singleton, so can only be used in single threaded mode. It only exists
+	 * here so we have a reference to close the file at the end of the capture.
+	 */
+	TraceLog* trace_log;
+#endif
+};
+
+/* ============================================================================
+  Functionality for managing block sizes and partition tables.
+============================================================================ */
+
+/**
+ * @brief Populate the block size descriptor for the target block size.
+ *
+ * This will also initialize the partition table metadata, which is stored as part of the BSD
+ * structure.
+ *
+ * @param      x_texels                 The number of texels in the block X dimension.
+ * @param      y_texels                 The number of texels in the block Y dimension.
+ * @param      z_texels                 The number of texels in the block Z dimension.
+ * @param      can_omit_modes           Can we discard modes and partitionings that astcenc won't use?
+ * @param      partition_count_cutoff   The partition count cutoff to use, if we can omit partitionings.
+ * @param      mode_cutoff              The block mode percentile cutoff [0-1].
+ * @param[out] bsd                      The descriptor to initialize.
+ */
+void init_block_size_descriptor(
+	unsigned int x_texels,
+	unsigned int y_texels,
+	unsigned int z_texels,
+	bool can_omit_modes,
+	unsigned int partition_count_cutoff,
+	float mode_cutoff,
+	block_size_descriptor& bsd);
+
+/**
+ * @brief Populate the partition tables for the target block size.
+ *
+ * Note the @c bsd descriptor must be initialized by calling @c init_block_size_descriptor() before
+ * calling this function.
+ *
+ * @param[out] bsd                      The block size information structure to populate.
+ * @param      can_omit_partitionings   True if we can we drop partitionings that astcenc won't use.
+ * @param      partition_count_cutoff   The partition count cutoff to use, if we can omit partitionings.
+ */
+void init_partition_tables(
+	block_size_descriptor& bsd,
+	bool can_omit_partitionings,
+	unsigned int partition_count_cutoff);
+
+/**
+ * @brief Get the percentile table for 2D block modes.
+ *
+ * This is an empirically determined prioritization of which block modes to use in the search in
+ * terms of their centile (lower centiles = more useful).
+ *
+ * Returns a dynamically allocated array; caller must free with delete[].
  *
  * @param xdim The block x size.
  * @param ydim The block y size.
@@ -830,588 +1473,868 @@ static inline const partition_info *get_partition_table(
  * @return The unpacked table.
  */
 const float *get_2d_percentile_table(
-	int xdim,
-	int ydim);
+	unsigned int xdim,
+	unsigned int ydim);
 
 /**
  * @brief Query if a 2D block size is legal.
  *
- * @return A non-zero value if legal, zero otherwise.
+ * @return True if legal, false otherwise.
  */
-int is_legal_2d_block_size(
-	int xdim,
-	int ydim);
+bool is_legal_2d_block_size(
+	unsigned int xdim,
+	unsigned int ydim);
 
 /**
  * @brief Query if a 3D block size is legal.
  *
- * @return A non-zero value if legal, zero otherwise.
+ * @return True if legal, false otherwise.
  */
-int is_legal_3d_block_size(
-	int xdim,
-	int ydim,
-	int zdim);
+bool is_legal_3d_block_size(
+	unsigned int xdim,
+	unsigned int ydim,
+	unsigned int zdim);
 
-// ***********************************************************
-// functions and data pertaining to quantization and encoding
-// **********************************************************
+/* ============================================================================
+  Functionality for managing BISE quantization and unquantization.
+============================================================================ */
 
+/**
+ * @brief The precomputed table for quantizing color values.
+ *
+ * Indexed by [quant_mode][data_value].
+ */
 extern const uint8_t color_quant_tables[21][256];
+
+/**
+ * @brief The precomputed table for unquantizing color values.
+ *
+ * Indexed by [quant_mode][data_value].
+ */
 extern const uint8_t color_unquant_tables[21][256];
-extern int8_t quant_mode_table[17][128];
 
+/**
+ * @brief The precomputed quant mode storage table.
+ *
+ * Indexing by [integercount/2][bits] gives us the quantization level for a given integer count and
+ * number of compressed storage bits. Returns -1 for cases where the requested integer count cannot
+ * ever fit in the supplied storage size.
+ */
+extern const int8_t quant_mode_table[17][128];
+
+/**
+ * @brief Encode a packed string using BISE.
+ *
+ * Note that BISE can return strings that are not a whole number of bytes in length, and ASTC can
+ * start storing strings in a block at arbitrary bit offsets in the encoded data.
+ *
+ * @param         quant_level      The BISE alphabet size.
+ * @param         character_count  The number of characters in the string.
+ * @param         input_data       The unpacked string, one byte per character.
+ * @param[in,out] output_data      The output packed string.
+ * @param         bit_offset       The starting offset in the output storage.
+ */
 void encode_ise(
-	int quant_level,
-	int elements,
+	quant_method quant_level,
+	unsigned int character_count,
 	const uint8_t* input_data,
 	uint8_t* output_data,
-	int bit_offset);
+	unsigned int bit_offset);
 
+/**
+ * @brief Decode a packed string using BISE.
+ *
+ * Note that BISE input strings are not a whole number of bytes in length, and ASTC can start
+ * strings at arbitrary bit offsets in the encoded data.
+ *
+ * @param         quant_level      The BISE alphabet size.
+ * @param         character_count  The number of characters in the string.
+ * @param         input_data       The packed string.
+ * @param[in,out] output_data      The output storage, one byte per character.
+ * @param         bit_offset       The starting offset in the output storage.
+ */
 void decode_ise(
-	int quant_level,
-	int elements,
+	quant_method quant_level,
+	unsigned int character_count,
 	const uint8_t* input_data,
 	uint8_t* output_data,
-	int bit_offset);
+	unsigned int bit_offset);
 
 /**
  * @brief Return the number of bits needed to encode an ISE sequence.
  *
- * This implementation assumes that the @c quant level is untrusted, given it
- * may come from random data being decompressed, so we return an unencodable
- * size if that is the case.
+ * This implementation assumes that the @c quant level is untrusted, given it may come from random
+ * data being decompressed, so we return an arbitrary unencodable size if that is the case.
  *
- * @param items   The number of items in the sequence.
- * @param quant   The desired quantization level.
+ * @param character_count   The number of items in the sequence.
+ * @param quant_level       The desired quantization level.
+ *
+ * @return The number of bits needed to encode the BISE string.
  */
-int get_ise_sequence_bitcount(
-	int items,
-	quant_method quant);
-
-void build_quant_mode_table(void);
+unsigned int get_ise_sequence_bitcount(
+	unsigned int character_count,
+	quant_method quant_level);
 
-// **********************************************
-// functions and data pertaining to partitioning
-// **********************************************
+/* ============================================================================
+  Functionality for managing color partitioning.
+============================================================================ */
 
-// functions to compute color averages and dominant directions
-// for each partition in a block
-void compute_avgs_and_dirs_4_comp(
-	const partition_info* pt,
-	const imageblock* blk,
-	const error_weight_block* ewb,
-	partition_metrics pm[4]);
+/**
+ * @brief Compute averages and dominant directions for each partition in a 2 component texture.
+ *
+ * @param      pi           The partition info for the current trial.
+ * @param      blk          The image block color data to be compressed.
+ * @param      component1   The first component included in the analysis.
+ * @param      component2   The second component included in the analysis.
+ * @param[out] pm           The output partition metrics.
+ *                          - Only pi.partition_count array entries actually get initialized.
+ *                          - Direction vectors @c pm.dir are not normalized.
+ */
+void compute_avgs_and_dirs_2_comp(
+	const partition_info& pi,
+	const image_block& blk,
+	unsigned int component1,
+	unsigned int component2,
+	partition_metrics pm[BLOCK_MAX_PARTITIONS]);
 
+/**
+ * @brief Compute averages and dominant directions for each partition in a 3 component texture.
+ *
+ * @param      pi                  The partition info for the current trial.
+ * @param      blk                 The image block color data to be compressed.
+ * @param      omitted_component   The component excluded from the analysis.
+ * @param[out] pm                  The output partition metrics.
+ *                                 - Only pi.partition_count array entries actually get initialized.
+ *                                 - Direction vectors @c pm.dir are not normalized.
+ */
 void compute_avgs_and_dirs_3_comp(
-	const partition_info* pt,
-	const imageblock* blk,
-	const error_weight_block* ewb,
-	int omitted_component,
-	partition_metrics pm[4]);
+	const partition_info& pi,
+	const image_block& blk,
+	unsigned int omitted_component,
+	partition_metrics pm[BLOCK_MAX_PARTITIONS]);
 
-void compute_avgs_and_dirs_2_comp(
-	const partition_info* pt,
-	const imageblock* blk,
-	const error_weight_block* ewb,
-	const float2* color_scalefactors,
-	int component1,
-	int component2,
-	float2* averages,
-	float2* directions);
+/**
+ * @brief Compute averages and dominant directions for each partition in a 3 component texture.
+ *
+ * This is a specialization of @c compute_avgs_and_dirs_3_comp where the omitted component is
+ * always alpha, a common case during partition search.
+ *
+ * @param      pi                  The partition info for the current trial.
+ * @param      blk                 The image block color data to be compressed.
+ * @param[out] pm                  The output partition metrics.
+ *                                 - Only pi.partition_count array entries actually get initialized.
+ *                                 - Direction vectors @c pm.dir are not normalized.
+ */
+void compute_avgs_and_dirs_3_comp_rgb(
+	const partition_info& pi,
+	const image_block& blk,
+	partition_metrics pm[BLOCK_MAX_PARTITIONS]);
 
-void compute_error_squared_rgba(
-	const partition_info* pt,
-	const imageblock* blk,
-	const error_weight_block* ewb,
-	const processed_line4* uncor_plines,
-	const processed_line4* samec_plines,
-	float* uncor_lengths,
-	float* samec_lengths,
-	float* uncor_errors,
-	float* samec_errors);
+/**
+ * @brief Compute averages and dominant directions for each partition in a 4 component texture.
+ *
+ * @param      pi    The partition info for the current trial.
+ * @param      blk   The image block color data to be compressed.
+ * @param[out] pm    The output partition metrics.
+ *                   - Only pi.partition_count array entries actually get initialized.
+ *                   - Direction vectors @c pm.dir are not normalized.
+ */
+void compute_avgs_and_dirs_4_comp(
+	const partition_info& pi,
+	const image_block& blk,
+	partition_metrics pm[BLOCK_MAX_PARTITIONS]);
 
+/**
+ * @brief Compute the RGB error for uncorrelated and same chroma projections.
+ *
+ * The output of compute averages and dirs is post processed to define two lines, both of which go
+ * through the mean-color-value.  One line has a direction defined by the dominant direction; this
+ * is used to assess the error from using an uncorrelated color representation. The other line goes
+ * through (0,0,0) and is used to assess the error from using an RGBS color representation.
+ *
+ * This function computes the squared error when using these two representations.
+ *
+ * @param         pi              The partition info for the current trial.
+ * @param         blk             The image block color data to be compressed.
+ * @param[in,out] plines          Processed line inputs, and line length outputs.
+ * @param[out]    uncor_error     The cumulative error for using the uncorrelated line.
+ * @param[out]    samec_error     The cumulative error for using the same chroma line.
+ */
 void compute_error_squared_rgb(
-	const partition_info *pt,
-	const imageblock *blk,
-	const error_weight_block *ewb,
-	partition_lines3 plines[4],
+	const partition_info& pi,
+	const image_block& blk,
+	partition_lines3 plines[BLOCK_MAX_PARTITIONS],
 	float& uncor_error,
 	float& samec_error);
 
-// for each partition, compute its color weightings.
-void compute_partition_error_color_weightings(
-	const error_weight_block& ewb,
-	const partition_info& pt,
-	partition_metrics pm[4]);
-
 /**
- * @brief Find the best set of partitions to trial for a given block.
+ * @brief Compute the RGBA error for uncorrelated and same chroma projections.
+ *
+ * The output of compute averages and dirs is post processed to define two lines, both of which go
+ * through the mean-color-value.  One line has a direction defined by the dominant direction; this
+ * is used to assess the error from using an uncorrelated color representation. The other line goes
+ * through (0,0,0,1) and is used to assess the error from using an RGBS color representation.
  *
- * On return @c best_partition_uncorrelated contains the best partition
- * assuming the data has noncorrelated chroma, @c best_partition_samechroma
- * contains the best partition assuming the data has corelated chroma, and
- * @c best_partition_dualplane contains the best partition assuming the data
- * has one uncorrelated color component.
+ * This function computes the squared error when using these two representations.
  *
- * @c best_partition_dualplane is stored packed; bits [9:0] contain the
- * best partition, bits [11:10] contain the best color component.
+ * @param      pi              The partition info for the current trial.
+ * @param      blk             The image block color data to be compressed.
+ * @param      uncor_plines    Processed uncorrelated partition lines for each partition.
+ * @param      samec_plines    Processed same chroma partition lines for each partition.
+ * @param[out] uncor_lengths   The length of each components deviation from the line.
+ * @param[out] samec_lengths   The length of each components deviation from the line.
+ * @param[out] uncor_error     The cumulative error for using the uncorrelated line.
+ * @param[out] samec_error     The cumulative error for using the same chroma line.
  */
-void find_best_partitionings(
-	const block_size_descriptor* bsd,
-	const imageblock* blk,
-	const error_weight_block* ewb,
-	int partition_count,
-	int partition_search_limit,
-	int* best_partition_uncorrelated,
-	int* best_partition_samechroma,
-	int* best_partition_dualplane);
-
-// use k-means clustering to compute a partition ordering for a block.
-void kmeans_compute_partition_ordering(
-	const block_size_descriptor* bsd,
-	int partition_count,
-	const imageblock* blk,
-	int *ordering);
-
-// *********************************************************
-// functions and data pertaining to images and imageblocks
-// *********************************************************
+void compute_error_squared_rgba(
+	const partition_info& pi,
+	const image_block& blk,
+	const processed_line4 uncor_plines[BLOCK_MAX_PARTITIONS],
+	const processed_line4 samec_plines[BLOCK_MAX_PARTITIONS],
+	float uncor_lengths[BLOCK_MAX_PARTITIONS],
+	float samec_lengths[BLOCK_MAX_PARTITIONS],
+	float& uncor_error,
+	float& samec_error);
 
 /**
- * @brief Parameter structure for compute_pixel_region_variance().
+ * @brief Find the best set of partitions to trial for a given block.
  *
- * This function takes a structure to avoid spilling arguments to the stack
- * on every function invocation, as there are a lot of parameters.
+ * On return the @c best_partitions list will contain the two best partition
+ * candidates; one assuming data has uncorrelated chroma and one assuming the
+ * data has corelated chroma. The best candidate is returned first in the list.
+ *
+ * @param      bsd                        The block size information.
+ * @param      blk                        The image block color data to compress.
+ * @param      partition_count            The number of partitions in the block.
+ * @param      partition_search_limit     The number of candidate partition encodings to trial.
+ * @param[out] best_partitions            The best partition candidates.
  */
-struct pixel_region_variance_args
-{
-	/** The image to analyze. */
-	const astcenc_image* img;
-	/** The RGB channel power adjustment. */
-	float rgb_power;
-	/** The alpha channel power adjustment. */
-	float alpha_power;
-	/** The channel swizzle pattern. */
-	astcenc_swizzle swz;
-	/** Should the algorithm bother with Z axis processing? */
-	bool have_z;
-	/** The kernel radius for average and variance. */
-	int avg_var_kernel_radius;
-	/** The kernel radius for alpha processing. */
-	int alpha_kernel_radius;
-	/** The size of the working data to process. */
-	int size_x;
-	int size_y;
-	int size_z;
-	/** The position of first src and dst data in the data set. */
-	int offset_x;
-	int offset_y;
-	int offset_z;
-	/** The working memory buffer. */
-	vfloat4 *work_memory;
-};
+void find_best_partition_candidates(
+	const block_size_descriptor& bsd,
+	const image_block& blk,
+	unsigned int partition_count,
+	unsigned int partition_search_limit,
+	unsigned int best_partitions[2]);
+
+/* ============================================================================
+  Functionality for managing images and image related data.
+============================================================================ */
 
 /**
- * @brief Parameter structure for compute_averages_and_variances_proc().
+ * @brief Setup computation of regional averages in an image.
+ *
+ * This must be done by only a single thread per image, before any thread calls
+ * @c compute_averages().
+ *
+ * Results are written back into @c img->input_alpha_averages.
+ *
+ * @param      img                     The input image data, also holds output data.
+ * @param      alpha_kernel_radius     The kernel radius (in pixels) for alpha mods.
+ * @param      swz                     Input data component swizzle.
+ * @param[out] ag                      The average variance arguments to init.
+ *
+ * @return The number of tasks in the processing stage.
  */
-struct avg_var_args
-{
-	/** The arguments for the nested variance computation. */
-	pixel_region_variance_args arg;
-	/** The image dimensions. */
-	int img_size_x;
-	int img_size_y;
-	int img_size_z;
-	/** The maximum working block dimensions. */
-	int blk_size_xy;
-	int blk_size_z;
-	/** The working block memory size. */
-	int work_memory_size;
-};
+unsigned int init_compute_averages(
+	const astcenc_image& img,
+	unsigned int alpha_kernel_radius,
+	const astcenc_swizzle& swz,
+	avg_args& ag);
 
 /**
- * @brief Compute regional averages and variances in an image.
+ * @brief Compute regional averages in an image.
  *
- * Results are written back into img->input_averages, img->input_variances,
- * and img->input_alpha_averages.
+ * This function can be called by multiple threads, but only after a single
+ * thread calls the setup function @c init_compute_averages().
  *
- * @param img                   The input image data, also holds output data.
- * @param rgb_power             The RGB channel power.
- * @param alpha_power           The A channel power.
- * @param avg_var_kernel_radius The kernel radius (in pixels) for avg and var.
- * @param alpha_kernel_radius   The kernel radius (in pixels) for alpha mods.
- * @param swz                   Input data channel swizzle.
- * @param arg                   The pixel region arguments for this thread.
- * @param ag                    The average variance arguments for this thread.
+ * Results are written back into @c img->input_alpha_averages.
  *
- * @return The number of tasks in the processing stage.
+ * @param[out] ctx   The context.
+ * @param      ag    The average and variance arguments created during setup.
  */
-unsigned int init_compute_averages_and_variances(
-	astcenc_image& img,
-	float rgb_power,
-	float alpha_power,
-	int avg_var_kernel_radius,
-	int alpha_kernel_radius,
-	astcenc_swizzle swz,
-	pixel_region_variance_args& arg,
-	avg_var_args& ag);
-
-void compute_averages_and_variances(
+void compute_averages(
 	astcenc_context& ctx,
-	const avg_var_args& ag);
+	const avg_args& ag);
 
-// fetch an image-block from the input file
-void fetch_imageblock(
+/**
+ * @brief Fetch a single image block from the input image
+ *
+ * @param      decode_mode   The compression color profile.
+ * @param      img           The input image data.
+ * @param[out] blk           The image block to populate.
+ * @param      bsd           The block size information.
+ * @param      xpos          The block X coordinate in the input image.
+ * @param      ypos          The block Y coordinate in the input image.
+ * @param      zpos          The block Z coordinate in the input image.
+ * @param      swz           The swizzle to apply on load.
+ */
+void fetch_image_block(
 	astcenc_profile decode_mode,
 	const astcenc_image& img,
-	imageblock* blk,	// picture-block to initialize with image data
-	const block_size_descriptor* bsd,
-	// position in picture to fetch block from
-	int xpos,
-	int ypos,
-	int zpos,
-	astcenc_swizzle swz);
+	image_block& blk,
+	const block_size_descriptor& bsd,
+	unsigned int xpos,
+	unsigned int ypos,
+	unsigned int zpos,
+	const astcenc_swizzle& swz);
 
-// write an image block to the output file buffer.
-// the data written are taken from orig_data.
-void write_imageblock(
+/**
+ * @brief Write a single image block from the output image
+ *
+ * @param[out] img           The input image data.
+ * @param      blk           The image block to populate.
+ * @param      bsd           The block size information.
+ * @param      xpos          The block X coordinate in the input image.
+ * @param      ypos          The block Y coordinate in the input image.
+ * @param      zpos          The block Z coordinate in the input image.
+ * @param      swz           The swizzle to apply on store.
+ */
+void write_image_block(
 	astcenc_image& img,
-	const imageblock* blk,	// picture-block to initialize with image data
-	const block_size_descriptor* bsd,
-	// position in picture to write block to.
-	int xpos,
-	int ypos,
-	int zpos,
-	astcenc_swizzle swz);
-
-float compute_symbolic_block_difference(
-	const astcenc_config& config,
-	const block_size_descriptor* bsd,
-	const symbolic_compressed_block* scb,
-	const imageblock* blk,
-	const error_weight_block *ewb) ;
-
-// ***********************************************************
-// functions pertaining to computing texel weights for a block
-// ***********************************************************
-struct endpoints
-{
-	int partition_count;
-	vfloat4 endpt0[4];
-	vfloat4 endpt1[4];
-};
+	const image_block& blk,
+	const block_size_descriptor& bsd,
+	unsigned int xpos,
+	unsigned int ypos,
+	unsigned int zpos,
+	const astcenc_swizzle& swz);
 
-struct endpoints_and_weights
-{
-	endpoints ep;
-	alignas(ASTCENC_VECALIGN) float weights[MAX_TEXELS_PER_BLOCK];
-	alignas(ASTCENC_VECALIGN) float weight_error_scale[MAX_TEXELS_PER_BLOCK];
-};
+/* ============================================================================
+  Functionality for computing endpoint colors and weights for a block.
+============================================================================ */
 
-void compute_endpoints_and_ideal_weights_1_plane(
-	const block_size_descriptor* bsd,
-	const partition_info* pt,
-	const imageblock* blk,
-	const error_weight_block* ewb,
-	endpoints_and_weights* ei);
+/**
+ * @brief Compute ideal endpoint colors and weights for 1 plane of weights.
+ *
+ * The ideal endpoints define a color line for the partition. For each texel the ideal weight
+ * defines an exact position on the partition color line. We can then use these to assess the error
+ * introduced by removing and quantizing the weight grid.
+ *
+ * @param      blk   The image block color data to compress.
+ * @param      pi    The partition info for the current trial.
+ * @param[out] ei    The endpoint and weight values.
+ */
+void compute_ideal_colors_and_weights_1plane(
+	const image_block& blk,
+	const partition_info& pi,
+	endpoints_and_weights& ei);
 
-void compute_endpoints_and_ideal_weights_2_planes(
-	const block_size_descriptor* bsd,
-	const partition_info* pt,
-	const imageblock* blk,
-	const error_weight_block* ewb,
-	int separate_component,
-	endpoints_and_weights* ei1, // primary plane weights
-	endpoints_and_weights* ei2); // secondary plane weights
+/**
+ * @brief Compute ideal endpoint colors and weights for 2 planes of weights.
+ *
+ * The ideal endpoints define a color line for the partition. For each texel the ideal weight
+ * defines an exact position on the partition color line. We can then use these to assess the error
+ * introduced by removing and quantizing the weight grid.
+ *
+ * @param      bsd                The block size information.
+ * @param      blk                The image block color data to compress.
+ * @param      plane2_component   The component assigned to plane 2.
+ * @param[out] ei1                The endpoint and weight values for plane 1.
+ * @param[out] ei2                The endpoint and weight values for plane 2.
+ */
+void compute_ideal_colors_and_weights_2planes(
+	const block_size_descriptor& bsd,
+	const image_block& blk,
+	unsigned int plane2_component,
+	endpoints_and_weights& ei1,
+	endpoints_and_weights& ei2);
 
 /**
- * @brief Compute the optimal weights for a decimation table.
+ * @brief Compute the optimal unquantized weights for a decimation table.
+ *
+ * After computing ideal weights for the case for a complete weight grid, we we want to compute the
+ * ideal weights for the case where weights exist only for some texels. We do this with a
+ * steepest-descent grid solver which works as follows:
  *
- * Compute the idealized weight set, assuming infinite precision and no
- * quantization. Later functions will use this as a staring points.
+ * First, for each actual weight, perform a weighted averaging of the texels affected by the weight.
+ * Then, set step size to <some initial value> and attempt one step towards the original ideal
+ * weight if it helps to reduce error.
  *
- * @param      eai_in       The non-decimated endpoints and weights.
- * @param      eai_out      A copy of eai_in we can modify later.
- * @param      dt           The selected decimation table.
- * @param[out] weight_set   The output decimated weight set.
- * @param[out] weights      The output decimated weights.
+ * @param      eai_in                   The non-decimated endpoints and weights.
+ * @param      eai_out                  A copy of eai_in we can modify later for refinement.
+ * @param      di                       The selected weight decimation.
+ * @param[out] dec_weight_ideal_value   The ideal values for the decimated weight set.
  */
-void compute_ideal_weights_for_decimation_table(
+void compute_ideal_weights_for_decimation(
 	const endpoints_and_weights& eai_in,
 	endpoints_and_weights& eai_out,
-	const decimation_table& dt,
-	float* weight_set,
-	float* weights);
+	const decimation_info& di,
+	float* dec_weight_ideal_value);
 
 /**
- * @brief Compute the best quantized weights for a decimation table.
+ * @brief Compute the optimal quantized weights for a decimation table.
  *
- * Compute the quantized weight set, for a specific quant level.
+ * We test the two closest weight indices in the allowed quantization range and keep the weight that
+ * is the closest match.
+ *
+ * @param      di                        The selected weight decimation.
+ * @param      low_bound                 The lowest weight allowed.
+ * @param      high_bound                The highest weight allowed.
+ * @param      dec_weight_ideal_value    The ideal weight set.
+ * @param[out] dec_weight_quant_uvalue   The output quantized weight as a float.
+ * @param[out] dec_weight_quant_pvalue   The output quantized weight as encoded int.
+ * @param      quant_level               The desired weight quant level.
  */
-void compute_quantized_weights_for_decimation_table(
-	const decimation_table* dt,
+void compute_quantized_weights_for_decimation(
+	const decimation_info& di,
 	float low_bound,
 	float high_bound,
-	const float* weight_set_in,
-	float* weight_set_out,
-	uint8_t* quantized_weight_set,
-	int quant_level);
-
-float compute_error_of_weight_set(
-	const endpoints_and_weights* eai,
-	const decimation_table* dt,
-	const float *weights);
-
-void merge_endpoints(
-	const endpoints* ep1,	// contains three of the color components
-	const endpoints* ep2,	// contains the remaining color component
-	int separate_component, endpoints* res);
-
-// functions dealing with color endpoints
-
-// function to pack a pair of color endpoints into a series of integers.
-// the format used may or may not match the format specified;
-// the return value is the format actually used.
-int pack_color_endpoints(
+	const float* dec_weight_ideal_value,
+	float* dec_weight_quant_uvalue,
+	uint8_t* dec_weight_quant_pvalue,
+	quant_method quant_level);
+
+/**
+ * @brief Compute the infilled weight for a texel index in a decimated grid.
+ *
+ * @param di        The weight grid decimation to use.
+ * @param weights   The decimated weight values to use.
+ * @param index     The texel index to interpolate.
+ *
+ * @return The interpolated weight for the given texel.
+ */
+static inline float bilinear_infill(
+	const decimation_info& di,
+	const float* weights,
+	unsigned int index
+) {
+	return (weights[di.texel_weights_4t[0][index]] * di.texel_weights_float_4t[0][index] +
+	        weights[di.texel_weights_4t[1][index]] * di.texel_weights_float_4t[1][index]) +
+	       (weights[di.texel_weights_4t[2][index]] * di.texel_weights_float_4t[2][index] +
+	        weights[di.texel_weights_4t[3][index]] * di.texel_weights_float_4t[3][index]);
+}
+
+/**
+ * @brief Compute the infilled weight for N texel indices in a decimated grid.
+ *
+ * @param di        The weight grid decimation to use.
+ * @param weights   The decimated weight values to use.
+ * @param index     The first texel index to interpolate.
+ *
+ * @return The interpolated weight for the given set of SIMD_WIDTH texels.
+ */
+static inline vfloat bilinear_infill_vla(
+	const decimation_info& di,
+	const float* weights,
+	unsigned int index
+) {
+	// Load the bilinear filter texel weight indexes in the decimated grid
+	vint weight_idx0 = vint(di.texel_weights_4t[0] + index);
+	vint weight_idx1 = vint(di.texel_weights_4t[1] + index);
+	vint weight_idx2 = vint(di.texel_weights_4t[2] + index);
+	vint weight_idx3 = vint(di.texel_weights_4t[3] + index);
+
+	// Load the bilinear filter weights from the decimated grid
+	vfloat weight_val0 = gatherf(weights, weight_idx0);
+	vfloat weight_val1 = gatherf(weights, weight_idx1);
+	vfloat weight_val2 = gatherf(weights, weight_idx2);
+	vfloat weight_val3 = gatherf(weights, weight_idx3);
+
+	// Load the weight contribution factors for each decimated weight
+	vfloat tex_weight_float0 = loada(di.texel_weights_float_4t[0] + index);
+	vfloat tex_weight_float1 = loada(di.texel_weights_float_4t[1] + index);
+	vfloat tex_weight_float2 = loada(di.texel_weights_float_4t[2] + index);
+	vfloat tex_weight_float3 = loada(di.texel_weights_float_4t[3] + index);
+
+	// Compute the bilinear interpolation to generate the per-texel weight
+	return (weight_val0 * tex_weight_float0 + weight_val1 * tex_weight_float1) +
+	       (weight_val2 * tex_weight_float2 + weight_val3 * tex_weight_float3);
+}
+
+/**
+ * @brief Compute the error of a decimated weight set for 1 plane.
+ *
+ * After computing ideal weights for the case with one weight per texel, we want to compute the
+ * error for decimated weight grids where weights are stored at a lower resolution. This function
+ * computes the error of the reduced grid, compared to the full grid.
+ *
+ * @param eai                       The ideal weights for the full grid.
+ * @param di                        The selected weight decimation.
+ * @param dec_weight_quant_uvalue   The quantized weights for the decimated grid.
+ *
+ * @return The accumulated error.
+ */
+float compute_error_of_weight_set_1plane(
+	const endpoints_and_weights& eai,
+	const decimation_info& di,
+	const float* dec_weight_quant_uvalue);
+
+/**
+ * @brief Compute the error of a decimated weight set for 2 planes.
+ *
+ * After computing ideal weights for the case with one weight per texel, we want to compute the
+ * error for decimated weight grids where weights are stored at a lower resolution. This function
+ * computes the error of the reduced grid, compared to the full grid.
+ *
+ * @param eai1                             The ideal weights for the full grid and plane 1.
+ * @param eai2                             The ideal weights for the full grid and plane 2.
+ * @param di                               The selected weight decimation.
+ * @param dec_weight_quant_uvalue_plane1   The quantized weights for the decimated grid plane 1.
+ * @param dec_weight_quant_uvalue_plane2   The quantized weights for the decimated grid plane 2.
+ *
+ * @return The accumulated error.
+ */
+float compute_error_of_weight_set_2planes(
+	const endpoints_and_weights& eai1,
+	const endpoints_and_weights& eai2,
+	const decimation_info& di,
+	const float* dec_weight_quant_uvalue_plane1,
+	const float* dec_weight_quant_uvalue_plane2);
+
+/**
+ * @brief Pack a single pair of color endpoints as effectively as possible.
+ *
+ * The user requests a base color endpoint mode in @c format, but the quantizer may choose a
+ * delta-based representation. It will report back the format variant it actually used.
+ *
+ * @param      color0       The input unquantized color0 endpoint for absolute endpoint pairs.
+ * @param      color1       The input unquantized color1 endpoint for absolute endpoint pairs.
+ * @param      rgbs_color   The input unquantized RGBS variant endpoint for same chroma endpoints.
+ * @param      rgbo_color   The input unquantized RGBS variant endpoint for HDR endpoints..
+ * @param      format       The desired base format.
+ * @param[out] output       The output storage for the quantized colors/
+ * @param      quant_level  The quantization level requested.
+ *
+ * @return The actual endpoint mode used.
+ */
+uint8_t pack_color_endpoints(
 	vfloat4 color0,
 	vfloat4 color1,
 	vfloat4 rgbs_color,
 	vfloat4 rgbo_color,
 	int format,
-	int* output,
-	int quant_level);
+	uint8_t* output,
+	quant_method quant_level);
 
-// unpack a pair of color endpoints from a series of integers.
+/**
+ * @brief Unpack a single pair of encoded and quantized color endpoints.
+ *
+ * @param      decode_mode   The decode mode (LDR, HDR).
+ * @param      format        The color endpoint mode used.
+ * @param      quant_level   The quantization level used.
+ * @param      input         The raw array of encoded input integers. The length of this array
+ *                           depends on @c format; it can be safely assumed to be large enough.
+ * @param[out] rgb_hdr       Is the endpoint using HDR for the RGB channels?
+ * @param[out] alpha_hdr     Is the endpoint using HDR for the A channel?
+ * @param[out] output0       The output color for endpoint 0.
+ * @param[out] output1       The output color for endpoint 1.
+ */
 void unpack_color_endpoints(
 	astcenc_profile decode_mode,
 	int format,
-	int quant_level,
-	const int* input,
-	int* rgb_hdr,
-	int* alpha_hdr,
-	int* nan_endpoint,
-	vint4* output0,
-	vint4* output1);
-
-// unquantize and undecimate a weight grid
+	quant_method quant_level,
+	const uint8_t* input,
+	bool& rgb_hdr,
+	bool& alpha_hdr,
+	vint4& output0,
+	vint4& output1);
+
+/**
+ * @brief Unpack a set of quantized and decimated weights.
+ *
+ * @param      bsd              The block size information.
+ * @param      scb              The symbolic compressed encoding.
+ * @param      di               The weight grid decimation table.
+ * @param      is_dual_plane    @c true if this is a dual plane block, @c false otherwise.
+ * @param      quant_level      The weight quantization level.
+ * @param[out] weights_plane1   The output array for storing the plane 1 weights.
+ * @param[out] weights_plane2   The output array for storing the plane 2 weights.
+ */
 void unpack_weights(
 	const block_size_descriptor& bsd,
 	const symbolic_compressed_block& scb,
-	const decimation_table& dt,
+	const decimation_info& di,
 	bool is_dual_plane,
-	int weight_quant_level,
-	int weights_plane1[MAX_TEXELS_PER_BLOCK],
-	int weights_plane2[MAX_TEXELS_PER_BLOCK]);
-
-struct encoding_choice_errors
-{
-	// Error of using LDR RGB-scale instead of complete endpoints.
-	float rgb_scale_error;
-	// Error of using HDR RGB-scale instead of complete endpoints.
-	float rgb_luma_error;
-	// Error of using luminance instead of RGB.
-	float luminance_error;
-	// Error of discarding alpha.
-	float alpha_drop_error;
-	// Validity of using offset encoding.
-	bool can_offset_encode;
-	// Validity of using blue contraction encoding.
-	bool can_blue_contract;
-};
-
-// buffers used to store intermediate data in compress_symbolic_block_fixed_partition_*()
-struct alignas(ASTCENC_VECALIGN) compress_fixed_partition_buffers
-{
-	endpoints_and_weights ei1;
-	endpoints_and_weights ei2;
-	endpoints_and_weights eix1[MAX_DECIMATION_MODES];
-	endpoints_and_weights eix2[MAX_DECIMATION_MODES];
-	alignas(ASTCENC_VECALIGN) float decimated_quantized_weights[2 * MAX_DECIMATION_MODES * MAX_WEIGHTS_PER_BLOCK];
-	alignas(ASTCENC_VECALIGN) float decimated_weights[2 * MAX_DECIMATION_MODES * MAX_WEIGHTS_PER_BLOCK];
-	alignas(ASTCENC_VECALIGN) float flt_quantized_decimated_quantized_weights[2 * MAX_WEIGHT_MODES * MAX_WEIGHTS_PER_BLOCK];
-	alignas(ASTCENC_VECALIGN) uint8_t u8_quantized_decimated_quantized_weights[2 * MAX_WEIGHT_MODES * MAX_WEIGHTS_PER_BLOCK];
-};
-
-struct compress_symbolic_block_buffers
-{
-	error_weight_block ewb;
-	compress_fixed_partition_buffers planes;
-};
+	quant_method quant_level,
+	int weights_plane1[BLOCK_MAX_TEXELS],
+	int weights_plane2[BLOCK_MAX_TEXELS]);
 
-void compute_encoding_choice_errors(
-	const block_size_descriptor* bsd,
-	const imageblock* blk,
-	const partition_info* pt,
-	const error_weight_block* ewb,
-	int separate_component,	// component that is separated out in 2-plane mode, -1 in 1-plane mode
-	encoding_choice_errors* eci);
-
-void determine_optimal_set_of_endpoint_formats_to_use(
-	const block_size_descriptor* bsd,
-	const partition_info* pt,
-	const imageblock* blk,
-	const error_weight_block* ewb,
-	const endpoints* ep,
-	int separate_component,	// separate color component for 2-plane mode; -1 for single-plane mode
-	 // bitcounts and errors computed for the various quantization methods
+/**
+ * @brief Identify, for each mode, which set of color endpoint produces the best result.
+ *
+ * Returns the best @c tune_candidate_limit best looking modes, along with the ideal color encoding
+ * combination for each. The modified quantization level can be used when all formats are the same,
+ * as this frees up two additional bits of storage.
+ *
+ * @param      bsd                           The block size information.
+ * @param      pi                            The partition info for the current trial.
+ * @param      blk                           The image block color data to compress.
+ * @param      ep                            The ideal endpoints.
+ * @param      qwt_bitcounts                 Bit counts for different quantization methods.
+ * @param      qwt_errors                    Errors for different quantization methods.
+ * @param      tune_candidate_limit          The max number of candidates to return, may be less.
+ * @param      block_mode_count              The number of blocks mofdes candidates to inspect.
+ * @param[out] partition_format_specifiers   The best formats per partition.
+ * @param[out] block_mode                    The best packed block mode indexes.
+ * @param[out] quant_level                   The best color quant level.
+ * @param[out] quant_level_mod               The best color quant level if endpoints are the same.
+ * @param[out] tmpbuf                        Preallocated scratch buffers for the compressor.
+ *
+ * @return The actual number of candidate matches returned.
+ */
+unsigned int compute_ideal_endpoint_formats(
+	const block_size_descriptor& bsd,
+	const partition_info& pi,
+	const image_block& blk,
+	const endpoints& ep,
 	const int* qwt_bitcounts,
 	const float* qwt_errors,
-	int tune_candidate_limit,
-	// output data
-	int partition_format_specifiers[4][4],
-	int quantized_weight[4],
-	int quant_level[4],
-	int quant_level_mod[4]);
+	unsigned int tune_candidate_limit,
+	unsigned int block_mode_count,
+	int partition_format_specifiers[TUNE_MAX_TRIAL_CANDIDATES][BLOCK_MAX_PARTITIONS],
+	int block_mode[TUNE_MAX_TRIAL_CANDIDATES],
+	quant_method quant_level[TUNE_MAX_TRIAL_CANDIDATES],
+	quant_method quant_level_mod[TUNE_MAX_TRIAL_CANDIDATES],
+	compression_working_buffers& tmpbuf);
 
+/**
+ * @brief For a given 1 plane weight set recompute the endpoint colors.
+ *
+ * As we quantize and decimate weights the optimal endpoint colors may change slightly, so we must
+ * recompute the ideal colors for a specific weight set.
+ *
+ * @param         blk                        The image block color data to compress.
+ * @param         pi                         The partition info for the current trial.
+ * @param         di                         The weight grid decimation table.
+ * @param         weight_quant_mode          The weight grid quantization level.
+ * @param         dec_weights_quant_pvalue   The quantized weight set.
+ * @param[in,out] ep                         The color endpoints (modifed in place).
+ * @param[out]    rgbs_vectors               The RGB+scale vectors for LDR blocks.
+ * @param[out]    rgbo_vectors               The RGB+offset vectors for HDR blocks.
+ */
 void recompute_ideal_colors_1plane(
+	const image_block& blk,
+	const partition_info& pi,
+	const decimation_info& di,
 	int weight_quant_mode,
-	endpoints* ep,	// contains the endpoints we wish to update
-	vfloat4* rgbs_vectors,	// used to return RGBS-vectors for endpoint mode #6 (LDR RGB base + scale)
-	vfloat4* rgbo_vectors,	// used to return RGBS-vectors for endpoint mode #7 (HDR RGB base + scale)
-	const uint8_t* weight_set8,	// the current set of weight values
-	const partition_info* pt,
-	const decimation_table* dt,
-	const imageblock* blk,	// picture-block containing the actual data.
-	const error_weight_block* ewb);
+	const uint8_t* dec_weights_quant_pvalue,
+	endpoints& ep,
+	vfloat4 rgbs_vectors[BLOCK_MAX_PARTITIONS],
+	vfloat4 rgbo_vectors[BLOCK_MAX_PARTITIONS]);
 
+/**
+ * @brief For a given 2 plane weight set recompute the endpoint colors.
+ *
+ * As we quantize and decimate weights the optimal endpoint colors may change slightly, so we must
+ * recompute the ideal colors for a specific weight set.
+ *
+ * @param         blk                               The image block color data to compress.
+ * @param         bsd                               The block_size descriptor.
+ * @param         di                                The weight grid decimation table.
+ * @param         weight_quant_mode                 The weight grid quantization level.
+ * @param         dec_weights_quant_pvalue_plane1   The quantized weight set for plane 1.
+ * @param         dec_weights_quant_pvalue_plane2   The quantized weight set for plane 2.
+ * @param[in,out] ep                                The color endpoints (modifed in place).
+ * @param[out]    rgbs_vector                       The RGB+scale color for LDR blocks.
+ * @param[out]    rgbo_vector                       The RGB+offset color for HDR blocks.
+ * @param         plane2_component                  The component assigned to plane 2.
+ */
 void recompute_ideal_colors_2planes(
+	const image_block& blk,
+	const block_size_descriptor& bsd,
+	const decimation_info& di,
 	int weight_quant_mode,
-	endpoints* ep,	// contains the endpoints we wish to update
-	vfloat4* rgbs_vectors,	// used to return RGBS-vectors for endpoint mode #6 (LDR RGB base + scale)
-	vfloat4* rgbo_vectors,	// used to return RGBS-vectors for endpoint mode #7 (HDR RGB base + scale)
-	const uint8_t* weight_set8,	// the current set of weight values
-	const uint8_t* plane2_weight_set8,	// nullptr if plane 2 is not actually used.
-	int plane2_color_component,	// color component for 2nd plane of weights; -1 if the 2nd plane of weights is not present
-	const partition_info* pt,
-	const decimation_table* dt,
-	const imageblock* blk,	// picture-block containing the actual data.
-	const error_weight_block* ewb);
-
-void expand_deblock_weights(
-	astcenc_context& ctx);
-
-// functions pertaining to weight alignment
-void prepare_angular_tables();
+	const uint8_t* dec_weights_quant_pvalue_plane1,
+	const uint8_t* dec_weights_quant_pvalue_plane2,
+	endpoints& ep,
+	vfloat4& rgbs_vector,
+	vfloat4& rgbo_vector,
+	int plane2_component);
 
-void imageblock_initialize_deriv(
-	const imageblock* blk,
-	int pixelcount,
-	vfloat4* dptr);
+/**
+ * @brief Expand the angular tables needed for the alternative to PCA that we use.
+ */
+void prepare_angular_tables();
 
+/**
+ * @brief Compute the angular endpoints for one plane for each block mode.
+ *
+ * @param      tune_low_weight_limit     Weight count cutoff below which we use simpler searches.
+ * @param      only_always               Only consider block modes that are always enabled.
+ * @param      bsd                       The block size descriptor for the current trial.
+ * @param      dec_weight_quant_uvalue   The decimated and quantized weight values.
+ * @param[out] tmpbuf                    Preallocated scratch buffers for the compressor.
+ */
 void compute_angular_endpoints_1plane(
+	unsigned int tune_low_weight_limit,
 	bool only_always,
-	const block_size_descriptor* bsd,
-	const float* decimated_quantized_weights,
-	const float* decimated_weights,
-	float low_value[MAX_WEIGHT_MODES],
-	float high_value[MAX_WEIGHT_MODES]);
+	const block_size_descriptor& bsd,
+	const float* dec_weight_quant_uvalue,
+	compression_working_buffers& tmpbuf);
 
+/**
+ * @brief Compute the angular endpoints for two planes for each block mode.
+ *
+ * @param      tune_low_weight_limit     Weight count cutoff below which we use simpler searches.
+ * @param      bsd                       The block size descriptor for the current trial.
+ * @param      dec_weight_quant_uvalue   The decimated and quantized weight values.
+ * @param[out] tmpbuf                    Preallocated scratch buffers for the compressor.
+ */
 void compute_angular_endpoints_2planes(
-	bool only_always,
-	const block_size_descriptor * bsd,
-	const float* decimated_quantized_weights,
-	const float* decimated_weights,
-	float low_value1[MAX_WEIGHT_MODES],
-	float high_value1[MAX_WEIGHT_MODES],
-	float low_value2[MAX_WEIGHT_MODES],
-	float high_value2[MAX_WEIGHT_MODES]);
+	unsigned int tune_low_weight_limit,
+	const block_size_descriptor& bsd,
+	const float* dec_weight_quant_uvalue,
+	compression_working_buffers& tmpbuf);
 
-/* *********************************** high-level encode and decode functions ************************************ */
+/* ============================================================================
+  Functionality for high level compression and decompression access.
+============================================================================ */
 
+/**
+ * @brief Compress an image block into a physical block.
+ *
+ * @param      ctx      The compressor context and configuration.
+ * @param      blk      The image block color data to compress.
+ * @param[out] pcb      The physical compressed block output.
+ * @param[out] tmpbuf   Preallocated scratch buffers for the compressor.
+ */
 void compress_block(
 	const astcenc_context& ctx,
-	const astcenc_image& image,
-	const imageblock* blk,
-	symbolic_compressed_block& scb,
+	const image_block& blk,
 	physical_compressed_block& pcb,
-	compress_symbolic_block_buffers* tmpbuf);
+	compression_working_buffers& tmpbuf);
 
+/**
+ * @brief Decompress a symbolic block in to an image block.
+ *
+ * @param      decode_mode   The decode mode (LDR, HDR, etc).
+ * @param      bsd           The block size information.
+ * @param      xpos          The X coordinate of the block in the overall image.
+ * @param      ypos          The Y coordinate of the block in the overall image.
+ * @param      zpos          The Z coordinate of the block in the overall image.
+ * @param[out] blk           The decompressed image block color data.
+ */
 void decompress_symbolic_block(
 	astcenc_profile decode_mode,
-	const block_size_descriptor* bsd,
+	const block_size_descriptor& bsd,
 	int xpos,
 	int ypos,
 	int zpos,
-	const symbolic_compressed_block* scb,
-	imageblock* blk);
+	const symbolic_compressed_block& scb,
+	image_block& blk);
+
+/**
+ * @brief Compute the error between a symbolic block and the original input data.
+ *
+ * This function is specialized for 2 plane and 1 partition search.
+ *
+ * In RGBM mode this will reject blocks that attempt to encode a zero M value.
+ *
+ * @param config   The compressor config.
+ * @param bsd      The block size information.
+ * @param scb      The symbolic compressed encoding.
+ * @param blk      The original image block color data.
+ *
+ * @return Returns the computed error, or a negative value if the encoding
+ *         should be rejected for any reason.
+ */
+float compute_symbolic_block_difference_2plane(
+	const astcenc_config& config,
+	const block_size_descriptor& bsd,
+	const symbolic_compressed_block& scb,
+	const image_block& blk);
+
+/**
+ * @brief Compute the error between a symbolic block and the original input data.
+ *
+ * This function is specialized for 1 plane and N partition search.
+ *
+ * In RGBM mode this will reject blocks that attempt to encode a zero M value.
+ *
+ * @param config   The compressor config.
+ * @param bsd      The block size information.
+ * @param scb      The symbolic compressed encoding.
+ * @param blk      The original image block color data.
+ *
+ * @return Returns the computed error, or a negative value if the encoding
+ *         should be rejected for any reason.
+ */
+float compute_symbolic_block_difference_1plane(
+	const astcenc_config& config,
+	const block_size_descriptor& bsd,
+	const symbolic_compressed_block& scb,
+	const image_block& blk);
 
+/**
+ * @brief Compute the error between a symbolic block and the original input data.
+ *
+ * This function is specialized for 1 plane and 1 partition search.
+ *
+ * In RGBM mode this will reject blocks that attempt to encode a zero M value.
+ *
+ * @param config   The compressor config.
+ * @param bsd      The block size information.
+ * @param scb      The symbolic compressed encoding.
+ * @param blk      The original image block color data.
+ *
+ * @return Returns the computed error, or a negative value if the encoding
+ *         should be rejected for any reason.
+ */
+float compute_symbolic_block_difference_1plane_1partition(
+	const astcenc_config& config,
+	const block_size_descriptor& bsd,
+	const symbolic_compressed_block& scb,
+	const image_block& blk);
+
+/**
+ * @brief Convert a symbolic representation into a binary physical encoding.
+ *
+ * It is assumed that the symbolic encoding is valid and encodable, or
+ * previously flagged as an error block if an error color it to be encoded.
+ *
+ * @param      bsd   The block size information.
+ * @param      scb   The symbolic representation.
+ * @param[out] pcb   The binary encoded data.
+ */
 void symbolic_to_physical(
 	const block_size_descriptor& bsd,
 	const symbolic_compressed_block& scb,
 	physical_compressed_block& pcb);
 
+/**
+ * @brief Convert a binary physical encoding into a symbolic representation.
+ *
+ * This function can cope with arbitrary input data; output blocks will be
+ * flagged as an error block if the encoding is invalid.
+ *
+ * @param      bsd   The block size information.
+ * @param      pcb   The binary encoded data.
+ * @param[out] scb   The output symbolic representation.
+ */
 void physical_to_symbolic(
 	const block_size_descriptor& bsd,
 	const physical_compressed_block& pcb,
 	symbolic_compressed_block& scb);
 
-#if defined(ASTCENC_DIAGNOSTICS)
-class TraceLog; // See astcenc_diagnostic_trace for details.
-#endif
-
-struct astcenc_context
-{
-	astcenc_config config;
-	unsigned int thread_count;
-	block_size_descriptor* bsd;
-
-	// Fields below here are not needed in a decompress-only build, but some
-	// remain as they are small and it avoids littering the code with #ifdefs.
-	// The most significant contributors to large structure size are omitted.
-
-	// Regional average-and-variance information, initialized by
-	// compute_averages_and_variances() only if the astc encoder
-	// is requested to do error weighting based on averages and variances.
-	vfloat4 *input_averages;
-	vfloat4 *input_variances;
-	float *input_alpha_averages;
-
-	compress_symbolic_block_buffers* working_buffers;
-
-#if !defined(ASTCENC_DECOMPRESS_ONLY)
-	pixel_region_variance_args arg;
-	avg_var_args ag;
-
-	float deblock_weights[MAX_TEXELS_PER_BLOCK];
-
-	ParallelManager manage_avg_var;
-	ParallelManager manage_compress;
-#endif
-
-	ParallelManager manage_decompress;
-
-#if defined(ASTCENC_DIAGNOSTICS)
-	TraceLog* trace_log;
-#endif
-};
-
 /* ============================================================================
-  Platform-specific functions
+Platform-specific functions.
 ============================================================================ */
 /**
  * @brief Run-time detection if the host CPU supports the POPCNT extension.
- * @return Zero if not supported, positive value if it is.
+ *
+ * @return @c true if supported, @c false if not.
  */
-int cpu_supports_popcnt();
+bool cpu_supports_popcnt();
 
 /**
  * @brief Run-time detection if the host CPU supports F16C extension.
- * @return Zero if not supported, positive value if it is.
+ *
+ * @return @c true if supported, @c false if not.
  */
-int cpu_supports_f16c();
+bool cpu_supports_f16c();
 
 /**
  * @brief Run-time detection if the host CPU supports SSE 4.1 extension.
- * @return Zero if not supported, positive value if it is.
+ *
+ * @return @c true if supported, @c false if not.
  */
-int cpu_supports_sse41();
+bool cpu_supports_sse41();
 
 /**
  * @brief Run-time detection if the host CPU supports AVX 2 extension.
- * @return Zero if not supported, positive value if it is.
+ *
+ * @return @c true if supported, @c false if not.
  */
-int cpu_supports_avx2();
-
+bool cpu_supports_avx2();
 
 /**
  * @brief Allocate an aligned memory buffer.
diff --git a/libkram/astc-encoder/astcenc_kmeans_partitioning.cpp b/libkram/astc-encoder/astcenc_kmeans_partitioning.cpp
deleted file mode 100644
index 6b837566..00000000
--- a/libkram/astc-encoder/astcenc_kmeans_partitioning.cpp
+++ /dev/null
@@ -1,462 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// ----------------------------------------------------------------------------
-// Copyright 2011-2021 Arm Limited
-//
-// Licensed under the Apache License, Version 2.0 (the "License"); you may not
-// use this file except in compliance with the License. You may obtain a copy
-// of the License at:
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-// License for the specific language governing permissions and limitations
-// under the License.
-// ----------------------------------------------------------------------------
-
-#if !defined(ASTCENC_DECOMPRESS_ONLY)
-
-/**
- * @brief Functions for approximate partitioning by kmeans clustering.
- *
- * Do this in 2 stages:
- * 1: basic clustering, a couple of passes just to get a few clusters
- * 2: clustering based on line, a few passes until it seems to stabilize.
- *
- * After clustering is done, we use the clustering result to construct one
- * bitmap for each partition. We then scan though the partition table, counting
- * how well the bitmaps matched.
-  */
-
-#include "astcenc_internal.h"
-
-// for k++ means, we need pseudo-random numbers, however using random numbers
-// directly results in unreproducible encoding results. As such, we will
-// instead just supply a handful of numbers from random.org, and apply an
-// algorithm similar to XKCD #221. (http://xkcd.com/221/)
-
-// cluster the texels using the k++ means clustering initialization algorithm.
-static void kmeans_init(
-	int texels_per_block,
-	int partition_count,
-	const imageblock* blk,
-	vfloat4* cluster_centers
-) {
-	int cluster_center_samples[4];
-	// pick a random sample as first center-point.
-	cluster_center_samples[0] = 145897 /* number from random.org */  % texels_per_block;
-	int samples_selected = 1;
-
-	float distances[MAX_TEXELS_PER_BLOCK];
-
-	// compute the distance to the first point.
-	int sample = cluster_center_samples[0];
-	vfloat4 center_color = blk->texel(sample);
-
-	float distance_sum = 0.0f;
-	for (int i = 0; i < texels_per_block; i++)
-	{
-		vfloat4 color =  blk->texel(i);
-		vfloat4 diff = color - center_color;
-		float distance = dot_s(diff, diff);
-		distance_sum += distance;
-		distances[i] = distance;
-	}
-
-	// more numbers from random.org
-	float cluster_cutoffs[25] = {
-		0.952312f, 0.206893f, 0.835984f, 0.507813f, 0.466170f,
-		0.872331f, 0.488028f, 0.866394f, 0.363093f, 0.467905f,
-		0.812967f, 0.626220f, 0.932770f, 0.275454f, 0.832020f,
-		0.362217f, 0.318558f, 0.240113f, 0.009190f, 0.983995f,
-		0.566812f, 0.347661f, 0.731960f, 0.156391f, 0.297786f
-	};
-
-	while (1)
-	{
-		// pick a point in a weighted-random fashion.
-		float summa = 0.0f;
-		float distance_cutoff = distance_sum * cluster_cutoffs[samples_selected + 5 * partition_count];
-		for (sample = 0; sample < texels_per_block; sample++)
-		{
-			summa += distances[sample];
-			if (summa >= distance_cutoff)
-			{
-				break;
-			}
-		}
-
-		if (sample >= texels_per_block)
-		{
-			sample = texels_per_block - 1;
-		}
-
-		cluster_center_samples[samples_selected] = sample;
-		samples_selected++;
-		if (samples_selected >= partition_count)
-		{
-			break;
-		}
-
-		// update the distances with the new point.
-		center_color = blk->texel(sample);
-
-		distance_sum = 0.0f;
-		for (int i = 0; i < texels_per_block; i++)
-		{
-			vfloat4 color = blk->texel(i);
-			vfloat4 diff = color - center_color;
-			float distance = dot_s(diff, diff);
-			distance = astc::min(distance, distances[i]);
-			distance_sum += distance;
-			distances[i] = distance;
-		}
-	}
-
-	// finally, gather up the results.
-	for (int i = 0; i < partition_count; i++)
-	{
-		int center_sample = cluster_center_samples[i];
-		cluster_centers[i] = blk->texel(center_sample);
-	}
-}
-
-// basic K-means clustering: given a set of cluster centers,
-// assign each texel to a partition
-static void kmeans_assign(
-	int texels_per_block,
-	int partition_count,
-	const imageblock* blk,
-	const vfloat4* cluster_centers,
-	int* partition_of_texel
-) {
-	float distances[MAX_TEXELS_PER_BLOCK];
-
-	int partition_texel_count[4];
-
-	partition_texel_count[0] = texels_per_block;
-	for (int i = 1; i < partition_count; i++)
-	{
-		partition_texel_count[i] = 0;
-	}
-
-	for (int i = 0; i < texels_per_block; i++)
-	{
-		vfloat4 color = blk->texel(i);
-		vfloat4 diff = color - cluster_centers[0];
-		float distance = dot_s(diff, diff);
-		distances[i] = distance;
-		partition_of_texel[i] = 0;
-	}
-
-	for (int j = 1; j < partition_count; j++)
-	{
-		vfloat4 center_color = cluster_centers[j];
-
-		for (int i = 0; i < texels_per_block; i++)
-		{
-			vfloat4 color = blk->texel(i);
-			vfloat4 diff = color - center_color;
-			float distance = dot_s(diff, diff);
-			if (distance < distances[i])
-			{
-				distances[i] = distance;
-				partition_texel_count[partition_of_texel[i]]--;
-				partition_texel_count[j]++;
-				partition_of_texel[i] = j;
-			}
-		}
-	}
-
-	// it is possible to get a situation where one of the partitions ends up
-	// without any texels. In this case, we assign texel N to partition N;
-	// this is silly, but ensures that every partition retains at least one texel.
-	// Reassigning a texel in this manner may cause another partition to go empty,
-	// so if we actually did a reassignment, we run the whole loop over again.
-	int problem_case;
-	do
-	{
-		problem_case = 0;
-		for (int i = 0; i < partition_count; i++)
-		{
-			if (partition_texel_count[i] == 0)
-			{
-				partition_texel_count[partition_of_texel[i]]--;
-				partition_texel_count[i]++;
-				partition_of_texel[i] = i;
-				problem_case = 1;
-			}
-		}
-	}
-	while (problem_case != 0);
-}
-
-// basic k-means clustering: given a set of cluster assignments
-// for the texels, find the center position of each cluster.
-static void kmeans_update(
-	int texels_per_block,
-	int partition_count,
-	const imageblock* blk,
-	const int* partition_of_texel,
-	vfloat4* cluster_centers
-) {
-	vfloat4 color_sum[4];
-	int weight_sum[4];
-
-	for (int i = 0; i < partition_count; i++)
-	{
-		color_sum[i] = vfloat4::zero();
-		weight_sum[i] = 0;
-	}
-
-	// first, find the center-of-gravity in each cluster
-	for (int i = 0; i < texels_per_block; i++)
-	{
-		vfloat4 color = blk->texel(i);
-		int part = partition_of_texel[i];
-		color_sum[part] = color_sum[part] + color;
-		weight_sum[part]++;
-	}
-
-	for (int i = 0; i < partition_count; i++)
-	{
-		cluster_centers[i] = color_sum[i] * (1.0f / static_cast<float>(weight_sum[i]));
-	}
-}
-
-// compute the bit-mismatch for a partitioning in 2-partition mode
-static inline int partition_mismatch2(
-	uint64_t a0,
-	uint64_t a1,
-	uint64_t b0,
-	uint64_t b1
-) {
-	int v1 = astc::popcount(a0 ^ b0) + astc::popcount(a1 ^ b1);
-	int v2 = astc::popcount(a0 ^ b1) + astc::popcount(a1 ^ b0);
-	return astc::min(v1, v2);
-}
-
-// compute the bit-mismatch for a partitioning in 3-partition mode
-static inline int partition_mismatch3(
-	uint64_t a0,
-	uint64_t a1,
-	uint64_t a2,
-	uint64_t b0,
-	uint64_t b1,
-	uint64_t b2
-) {
-	int p00 = astc::popcount(a0 ^ b0);
-	int p01 = astc::popcount(a0 ^ b1);
-	int p02 = astc::popcount(a0 ^ b2);
-
-	int p10 = astc::popcount(a1 ^ b0);
-	int p11 = astc::popcount(a1 ^ b1);
-	int p12 = astc::popcount(a1 ^ b2);
-
-	int p20 = astc::popcount(a2 ^ b0);
-	int p21 = astc::popcount(a2 ^ b1);
-	int p22 = astc::popcount(a2 ^ b2);
-
-	int s0 = p11 + p22;
-	int s1 = p12 + p21;
-	int v0 = astc::min(s0, s1) + p00;
-
-	int s2 = p10 + p22;
-	int s3 = p12 + p20;
-	int v1 = astc::min(s2, s3) + p01;
-
-	int s4 = p10 + p21;
-	int s5 = p11 + p20;
-	int v2 = astc::min(s4, s5) + p02;
-
-	return astc::min(v0, v1, v2);
-}
-
-// compute the bit-mismatch for a partitioning in 4-partition mode
-static inline int partition_mismatch4(
-	uint64_t a0,
-	uint64_t a1,
-	uint64_t a2,
-	uint64_t a3,
-	uint64_t b0,
-	uint64_t b1,
-	uint64_t b2,
-	uint64_t b3
-) {
-	int p00 = astc::popcount(a0 ^ b0);
-	int p01 = astc::popcount(a0 ^ b1);
-	int p02 = astc::popcount(a0 ^ b2);
-	int p03 = astc::popcount(a0 ^ b3);
-
-	int p10 = astc::popcount(a1 ^ b0);
-	int p11 = astc::popcount(a1 ^ b1);
-	int p12 = astc::popcount(a1 ^ b2);
-	int p13 = astc::popcount(a1 ^ b3);
-
-	int p20 = astc::popcount(a2 ^ b0);
-	int p21 = astc::popcount(a2 ^ b1);
-	int p22 = astc::popcount(a2 ^ b2);
-	int p23 = astc::popcount(a2 ^ b3);
-
-	int p30 = astc::popcount(a3 ^ b0);
-	int p31 = astc::popcount(a3 ^ b1);
-	int p32 = astc::popcount(a3 ^ b2);
-	int p33 = astc::popcount(a3 ^ b3);
-
-	int mx23 = astc::min(p22 + p33, p23 + p32);
-	int mx13 = astc::min(p21 + p33, p23 + p31);
-	int mx12 = astc::min(p21 + p32, p22 + p31);
-	int mx03 = astc::min(p20 + p33, p23 + p30);
-	int mx02 = astc::min(p20 + p32, p22 + p30);
-	int mx01 = astc::min(p21 + p30, p20 + p31);
-
-	int v0 = p00 + astc::min(p11 + mx23, p12 + mx13, p13 + mx12);
-	int v1 = p01 + astc::min(p10 + mx23, p12 + mx03, p13 + mx02);
-	int v2 = p02 + astc::min(p11 + mx03, p10 + mx13, p13 + mx01);
-	int v3 = p03 + astc::min(p11 + mx02, p12 + mx01, p10 + mx12);
-
-	return astc::min(v0, v1, v2, v3);
-}
-
-static void count_partition_mismatch_bits(
-	const block_size_descriptor* bsd,
-	int partition_count,
-	const uint64_t bitmaps[4],
-	int bitcounts[PARTITION_COUNT]
-) {
-	const partition_info *pt = get_partition_table(bsd, partition_count);
-
-	if (partition_count == 2)
-	{
-		uint64_t bm0 = bitmaps[0];
-		uint64_t bm1 = bitmaps[1];
-		for (int i = 0; i < PARTITION_COUNT; i++)
-		{
-			if (pt->partition_count == 2)
-			{
-				bitcounts[i] = partition_mismatch2(bm0, bm1, pt->coverage_bitmaps[0], pt->coverage_bitmaps[1]);
-			}
-			else
-			{
-				bitcounts[i] = 255;
-			}
-			pt++;
-		}
-	}
-	else if (partition_count == 3)
-	{
-		uint64_t bm0 = bitmaps[0];
-		uint64_t bm1 = bitmaps[1];
-		uint64_t bm2 = bitmaps[2];
-		for (int i = 0; i < PARTITION_COUNT; i++)
-		{
-			if (pt->partition_count == 3)
-			{
-				bitcounts[i] = partition_mismatch3(bm0, bm1, bm2, pt->coverage_bitmaps[0], pt->coverage_bitmaps[1], pt->coverage_bitmaps[2]);
-			}
-			else
-			{
-				bitcounts[i] = 255;
-			}
-			pt++;
-		}
-	}
-	else if (partition_count == 4)
-	{
-		uint64_t bm0 = bitmaps[0];
-		uint64_t bm1 = bitmaps[1];
-		uint64_t bm2 = bitmaps[2];
-		uint64_t bm3 = bitmaps[3];
-		for (int i = 0; i < PARTITION_COUNT; i++)
-		{
-			if (pt->partition_count == 4)
-			{
-				bitcounts[i] = partition_mismatch4(bm0, bm1, bm2, bm3, pt->coverage_bitmaps[0], pt->coverage_bitmaps[1], pt->coverage_bitmaps[2], pt->coverage_bitmaps[3]);
-			}
-			else
-			{
-				bitcounts[i] = 255;
-			}
-			pt++;
-		}
-	}
-
-}
-
-/**
- * @brief Use counting sort on the mismatch array to sort partition candidates.
- */
-static void get_partition_ordering_by_mismatch_bits(
-	const int mismatch_bits[PARTITION_COUNT],
-	int partition_ordering[PARTITION_COUNT]
-) {
-	int mscount[256] { 0 };
-
-	// Create the histogram of mismatch counts
-	for (int i = 0; i < PARTITION_COUNT; i++)
-	{
-		mscount[mismatch_bits[i]]++;
-	}
-
-	// Create a running sum from the histogram array
-	// Cells store previous values only; i.e. exclude self after sum
-	int summa = 0;
-	for (int i = 0; i < 256; i++)
-	{
-		int cnt = mscount[i];
-		mscount[i] = summa;
-		summa += cnt;
-	}
-
-	// Use the running sum as the index, incrementing after read to allow
-	// sequential entries with the same count
-	for (int i = 0; i < PARTITION_COUNT; i++)
-	{
-		int idx = mscount[mismatch_bits[i]]++;
-		partition_ordering[idx] = i;
-	}
-}
-
-void kmeans_compute_partition_ordering(
-	const block_size_descriptor* bsd,
-	int partition_count,
-	const imageblock* blk,
-	int* ordering
-) {
-	vfloat4 cluster_centers[4];
-	int partition_of_texel[MAX_TEXELS_PER_BLOCK];
-
-	// Use three passes of k-means clustering to partition the block data
-	for (int i = 0; i < 3; i++)
-	{
-		if (i == 0)
-		{
-			kmeans_init(bsd->texel_count, partition_count, blk, cluster_centers);
-		}
-		else
-		{
-			kmeans_update(bsd->texel_count, partition_count, blk, partition_of_texel, cluster_centers);
-		}
-
-		kmeans_assign(bsd->texel_count, partition_count, blk, cluster_centers, partition_of_texel);
-	}
-
-	// Construct the block bitmaps of texel assignments to each partition
-	uint64_t bitmaps[4] { 0 };
-	int texels_to_process = bsd->kmeans_texel_count;
-	for (int i = 0; i < texels_to_process; i++)
-	{
-		int idx = bsd->kmeans_texels[i];
-		bitmaps[partition_of_texel[idx]] |= 1ULL << i;
-	}
-
-	// Count the mismatch between the block and the format's partition tables
-	int mismatch_counts[PARTITION_COUNT];
-	count_partition_mismatch_bits(bsd, partition_count, bitmaps, mismatch_counts);
-
-	// Sort the partitions based on the number of mismatched bits
-	get_partition_ordering_by_mismatch_bits(mismatch_counts, ordering);
-}
-
-#endif
diff --git a/libkram/astc-encoder/astcenc_mathlib.cpp b/libkram/astc-encoder/astcenc_mathlib.cpp
index a59cb24b..f276ac7e 100644
--- a/libkram/astc-encoder/astcenc_mathlib.cpp
+++ b/libkram/astc-encoder/astcenc_mathlib.cpp
@@ -28,14 +28,14 @@ static inline uint64_t rotl(uint64_t val, int count)
 	return (val << count) | (val >> (64 - count));
 }
 
-/* Public function, see header file for detailed documentation */
+/* See header for documentation. */
 void astc::rand_init(uint64_t state[2])
 {
 	state[0] = 0xfaf9e171cea1ec6bULL;
 	state[1] = 0xf1b318cc06af5d71ULL;
 }
 
-/* Public function, see header file for detailed documentation */
+/* See header for documentation. */
 uint64_t astc::rand(uint64_t state[2])
 {
 	uint64_t s0 = state[0];
diff --git a/libkram/astc-encoder/astcenc_mathlib.h b/libkram/astc-encoder/astcenc_mathlib.h
index 63822627..4876749b 100644
--- a/libkram/astc-encoder/astcenc_mathlib.h
+++ b/libkram/astc-encoder/astcenc_mathlib.h
@@ -27,18 +27,6 @@
 #include <cstdint>
 #include <cmath>
 
-#ifndef ASTCENC_NEON
-  #if defined(__aarch64__) || defined(__arm__)
-    #define ASTCENC_NEON 1
-
-    // these aren't valid on Neon
-    #define ASTCENC_AVX 0
-    #define ASTCENC_SSE 0
-  #else
-    #define ASTCENC_NEON 0
-  #endif
-#endif
-
 #ifndef ASTCENC_POPCNT
   #if defined(__POPCNT__)
     #define ASTCENC_POPCNT 1
@@ -79,8 +67,14 @@
   #endif
 #endif
 
-// 32-byte words in AVX and AVX2, but also a lot of 16-byte ops in AVX
-// Neon only has 16-byte ops for now, but new ISA on the way.
+#ifndef ASTCENC_NEON
+  #if defined(__aarch64__)
+    #define ASTCENC_NEON 1
+  #else
+    #define ASTCENC_NEON 0
+  #endif
+#endif
+
 #if ASTCENC_AVX
   #define ASTCENC_VECALIGN 32
 #else
@@ -421,80 +415,14 @@ uint64_t rand(uint64_t state[2]);
 
 }
 
-/* ============================================================================
-  Utility vector template classes with basic operations
-============================================================================ */
-
-template <typename T> class vtype2
-{
-public:
-	// Data storage
-	T r, g;
-
-	// Default constructor
-	vtype2() {}
-
-	// Initialize from 1 scalar
-	vtype2(T p) : r(p), g(p) {}
-
-	// Initialize from N scalars
-	vtype2(T p, T q) : r(p), g(q) {}
-
-	// Initialize from another vector
-	vtype2(const vtype2 & p) : r(p.r), g(p.g) {}
-
-	// Assignment operator
-	vtype2& operator=(const vtype2 &s) {
-		this->r = s.r;
-		this->g = s.g;
-		return *this;
-	}
-};
-
-// Vector by vector addition
-template <typename T>
-vtype2<T> operator+(vtype2<T> p, vtype2<T> q) {
-	return vtype2<T> { p.r + q.r, p.g + q.g };
-}
-
-// Vector by vector subtraction
-template <typename T>
-vtype2<T> operator-(vtype2<T> p, vtype2<T> q) {
-	return vtype2<T> { p.r - q.r, p.g - q.g };
-}
-
-// Vector by vector multiplication operator
-template <typename T>
-vtype2<T> operator*(vtype2<T> p, vtype2<T> q) {
-	return vtype2<T> { p.r * q.r, p.g * q.g };
-}
-
-// Vector by scalar multiplication operator
-template <typename T>
-vtype2<T> operator*(vtype2<T> p, T q) {
-	return vtype2<T> { p.r * q, p.g * q };
-}
-
-// Scalar by vector multiplication operator
-template <typename T>
-vtype2<T> operator*(T p, vtype2<T> q) {
-	return vtype2<T> { p * q.r, p * q.g };
-}
-
-typedef vtype2<float>        float2;
-
-static inline float dot(float2 p, float2 q)  { return p.r * q.r + p.g * q.g; }
-
-static inline float2 normalize(float2 p) { return p * astc::rsqrt(dot(p, p)); }
-
 /* ============================================================================
   Softfloat library with fp32 and fp16 conversion functionality.
 ============================================================================ */
-uint32_t clz32(uint32_t p);
-
-/* narrowing float->float conversions */
-uint16_t float_to_sf16(float val);
-float sf16_to_float(uint16_t val);
+#if (ASTCENC_F16C == 0) && (ASTCENC_NEON == 0)
+	/* narrowing float->float conversions */
+	uint16_t float_to_sf16(float val);
+	float sf16_to_float(uint16_t val);
+#endif
 
 /*********************************
   Vector library
@@ -508,8 +436,8 @@ float sf16_to_float(uint16_t val);
 
 struct line2
 {
-	float2 a;
-	float2 b;
+	vfloat4 a;
+	vfloat4 b;
 };
 
 // parametric line, 3D
@@ -528,23 +456,20 @@ struct line4
 
 struct processed_line2
 {
-	float2 amod;
-	float2 bs;
-	float2 bis;
+	vfloat4 amod;
+	vfloat4 bs;
 };
 
 struct processed_line3
 {
 	vfloat4 amod;
 	vfloat4 bs;
-	vfloat4 bis;
 };
 
 struct processed_line4
 {
 	vfloat4 amod;
 	vfloat4 bs;
-	vfloat4 bis;
 };
 
 #endif
diff --git a/libkram/astc-encoder/astcenc_mathlib_softfloat.cpp b/libkram/astc-encoder/astcenc_mathlib_softfloat.cpp
index d1381fd7..2665c0d8 100644
--- a/libkram/astc-encoder/astcenc_mathlib_softfloat.cpp
+++ b/libkram/astc-encoder/astcenc_mathlib_softfloat.cpp
@@ -18,6 +18,7 @@
 /**
  * @brief Soft-float library for IEEE-754.
  */
+#if (ASTCENC_F16C == 0) && (ASTCENC_NEON == 0)
 
 #include "astcenc_mathlib.h"
 
@@ -61,7 +62,7 @@ typedef uint32_t sf32;
 
 /*
    32-bit count-leading-zeros function: use the Assembly instruction whenever possible. */
-uint32_t clz32(uint32_t inp)
+static uint32_t clz32(uint32_t inp)
 {
 	#if defined(__GNUC__) && (defined(__i386) || defined(__amd64))
 		uint32_t bsr;
@@ -146,49 +147,54 @@ static sf32 sf16_to_sf32(sf16 inp)
 		with just 1 table lookup, 2 shifts and 1 add.
 	*/
 
-	#define WITH_MB(a) INT32_C((a) | (1 << 31))
-	static const int32_t tbl[64] =
+	#define WITH_MSB(a) (UINT32_C(a) | (1u << 31))
+	static const uint32_t tbl[64] =
 	{
-		WITH_MB(0x00000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000),
-		INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000),
-		INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000),
-		INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), WITH_MB(0x38000),
-		WITH_MB(0x38000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000),
-		INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000),
-		INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000),
-		INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), WITH_MB(0x70000)
+		WITH_MSB(0x00000), 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000,          0x1C000,
+		         0x1C000,  0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000,          0x1C000,
+		         0x1C000,  0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000,          0x1C000,
+		         0x1C000,  0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, WITH_MSB(0x38000),
+		WITH_MSB(0x38000), 0x54000, 0x54000, 0x54000, 0x54000, 0x54000, 0x54000,          0x54000,
+		         0x54000,  0x54000, 0x54000, 0x54000, 0x54000, 0x54000, 0x54000,          0x54000,
+		         0x54000,  0x54000, 0x54000, 0x54000, 0x54000, 0x54000, 0x54000,          0x54000,
+		         0x54000,  0x54000, 0x54000, 0x54000, 0x54000, 0x54000, 0x54000, WITH_MSB(0x70000)
 	};
 
-	int32_t res = tbl[inpx >> 10];
+	uint32_t res = tbl[inpx >> 10];
 	res += inpx;
 
-	/* the normal cases: the MSB of 'res' is not set. */
-	if (res >= 0)				/* signed compare */
+	/* Normal cases: MSB of 'res' not set. */
+	if ((res & WITH_MSB(0)) == 0)
+	{
 		return res << 13;
+	}
 
-	/* Infinity and Zero: the bottom 10 bits of 'res' are clear. */
-	if ((res & UINT32_C(0x3FF)) == 0)
+	/* Infinity and Zero: 10 LSB of 'res' not set. */
+	if ((res & 0x3FF) == 0)
+	{
 		return res << 13;
+	}
 
-	/* NaN: the exponent field of 'inp' is not zero; NaNs must be quietened. */
+	/* NaN: the exponent field of 'inp' is non-zero. */
 	if ((inpx & 0x7C00) != 0)
-		return (res << 13) | UINT32_C(0x400000);
-
-	/* the remaining cases are Denormals. */
 	{
-		uint32_t sign = (inpx & UINT32_C(0x8000)) << 16;
-		uint32_t mskval = inpx & UINT32_C(0x7FFF);
-		uint32_t leadingzeroes = clz32(mskval);
-		mskval <<= leadingzeroes;
-		return (mskval >> 8) + ((0x85 - leadingzeroes) << 23) + sign;
+		/* All NaNs are quietened. */
+		return (res << 13) | 0x400000;
 	}
+
+	/* Denormal cases */
+	uint32_t sign = (inpx & 0x8000) << 16;
+	uint32_t mskval = inpx & 0x7FFF;
+	uint32_t leadingzeroes = clz32(mskval);
+	mskval <<= leadingzeroes;
+	return (mskval >> 8) + ((0x85 - leadingzeroes) << 23) + sign;
 }
 
 /* Conversion routine that converts from FP32 to FP16. It supports denormals and all rounding modes. If a NaN is given as input, it is quietened. */
 static sf16 sf32_to_sf16(sf32 inp, roundmode rmode)
 {
 	/* for each possible sign/exponent combination, store a case index. This gives a 512-byte table */
-	static const uint8_t tab[512] = {
+	static const uint8_t tab[512] {
 		0, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
 		10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
 		10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
@@ -226,7 +232,7 @@ static sf16 sf32_to_sf16(sf32 inp, roundmode rmode)
 
 	/* many of the cases below use a case-dependent magic constant. So we look up a magic constant before actually performing the switch. This table allows us to group cases, thereby minimizing code
 	   size. */
-	static const uint32_t tabx[60] = {
+	static const uint32_t tabx[60] {
 		UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0x8000), UINT32_C(0x80000000), UINT32_C(0x8000), UINT32_C(0x8000), UINT32_C(0x8000),
 		UINT32_C(1), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0x8000), UINT32_C(0x8001), UINT32_C(0x8000), UINT32_C(0x8000), UINT32_C(0x8000),
 		UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0x8000), UINT32_C(0x8000), UINT32_C(0x8000), UINT32_C(0x8000), UINT32_C(0x8000),
@@ -401,3 +407,5 @@ uint16_t float_to_sf16(float p)
 	i.f = p;
 	return sf32_to_sf16(i.u, SF_NEARESTEVEN);
 }
+
+#endif
diff --git a/libkram/astc-encoder/astcenc_partition_tables.cpp b/libkram/astc-encoder/astcenc_partition_tables.cpp
index 04f7ae23..7c3f4027 100644
--- a/libkram/astc-encoder/astcenc_partition_tables.cpp
+++ b/libkram/astc-encoder/astcenc_partition_tables.cpp
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2021 Arm Limited
+// Copyright 2011-2022 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -21,91 +21,84 @@
 
 #include "astcenc_internal.h"
 
-/*
-	Produce a canonicalized representation of a partition pattern
-
-	The largest possible such representation is 432 bits, equal to 7 uint64_t values.
-*/
-static void gen_canonicalized_partition_table(
-	int texel_count,
-	const uint8_t* partition_table,
-	uint64_t canonicalized[7]
+/**
+ * @brief Generate a canonical representation of a partition pattern.
+ *
+ * The returned value stores two bits per texel, for up to 6x6x6 texels, where the two bits store
+ * the remapped texel index. Remapping ensures that we only match on the partition pattern,
+ * independent of the partition order generated by the hash.
+ *
+ * @param      texel_count           The number of texels in the block.
+ * @param      partition_of_texel    The partition assignments, in hash order.
+ * @param[out] bit_pattern           The output bit pattern representation.
+ */
+static void generate_canonical_partitioning(
+	unsigned int texel_count,
+	const uint8_t* partition_of_texel,
+	uint64_t bit_pattern[7]
 ) {
-	for (int i = 0; i < 7; i++)
+	// Clear the pattern
+	for (unsigned int i = 0; i < 7; i++)
 	{
-		canonicalized[i] = 0;
+		bit_pattern[i] = 0;
 	}
 
-	int mapped_index[4];
+	// Store a mapping to reorder the raw partitions so that the the partitions are ordered such
+	// that the lowest texel index in partition N is smaller than the lowest texel index in
+	// partition N + 1.
+	int mapped_index[BLOCK_MAX_PARTITIONS];
 	int map_weight_count = 0;
-	for (int i = 0; i < 4; i++)
+
+	for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONS; i++)
 	{
 		mapped_index[i] = -1;
 	}
 
-	for (int i = 0; i < texel_count; i++)
+	for (unsigned int i = 0; i < texel_count; i++)
 	{
-		int index = partition_table[i];
-		if (mapped_index[index] == -1)
+		int index = partition_of_texel[i];
+		if (mapped_index[index] < 0)
 		{
 			mapped_index[index] = map_weight_count++;
 		}
+
 		uint64_t xlat_index = mapped_index[index];
-		canonicalized[i >> 5] |= xlat_index << (2 * (i & 0x1F));
+		bit_pattern[i >> 5] |= xlat_index << (2 * (i & 0x1F));
 	}
 }
 
-static int compare_canonicalized_partition_tables(
+/**
+ * @brief Compare two canonical patterns to see if they are the same.
+ *
+ * @param part1   The first canonical bit pattern to check.
+ * @param part2   The second canonical bit pattern to check.
+ *
+ * @return @c true if the patterns are the same, @c false otherwise.
+ */
+static bool compare_canonical_partitionings(
 	const uint64_t part1[7],
 	const uint64_t part2[7]
 ) {
-	if ((part1[0] != part2[0]) || (part1[1] != part2[1]) || (part1[2] != part2[2]) ||
-	    (part1[3] != part2[3]) || (part1[4] != part2[4]) || (part1[5] != part2[5]) ||
-	    (part1[6] != part2[6]))
-	{
-		return 0;
-	}
-
-	return 1;
+	return (part1[0] == part2[0]) && (part1[1] == part2[1]) &&
+	       (part1[2] == part2[2]) && (part1[3] == part2[3]) &&
+	       (part1[4] == part2[4]) && (part1[5] == part2[5]) &&
+	       (part1[6] == part2[6]);
 }
 
-/*
-   For a partition table, detect partitions that are equivalent, then mark them
-   as invalid. This reduces the number of partitions that the codec has to
-   consider and thus improves encode performance. */
-static void partition_table_zap_equal_elements(
-	int texel_count,
-	partition_info* pt
+/**
+ * @brief Hash function used for procedural partition assignment.
+ *
+ * @param inp The hash seed.
+ *
+ * @return The hashed value.
+ */
+static uint32_t hash52(
+	uint32_t inp
 ) {
-	int partition_tables_zapped = 0;
-	uint64_t *canonicalizeds = new uint64_t[PARTITION_COUNT * 7];
-
-	for (int i = 0; i < PARTITION_COUNT; i++)
-	{
-		gen_canonicalized_partition_table(texel_count, pt[i].partition_of_texel, canonicalizeds + i * 7);
-	}
-
-	for (int i = 0; i < PARTITION_COUNT; i++)
-	{
-		for (int j = 0; j < i; j++)
-		{
-			if (compare_canonicalized_partition_tables(canonicalizeds + 7 * i, canonicalizeds + 7 * j))
-			{
-				pt[i].partition_count = 0;
-				partition_tables_zapped++;
-				break;
-			}
-		}
-	}
-
-	delete[] canonicalizeds;
-}
-
-static uint32_t hash52(uint32_t inp)
-{
 	inp ^= inp >> 15;
 
-	inp *= 0xEEDE0891;			// (2^4+1)*(2^7+1)*(2^17-1)
+	// (2^4 + 1) * (2^7 + 1) * (2^17 - 1)
+	inp *= 0xEEDE0891;
 	inp ^= inp >> 5;
 	inp += inp << 16;
 	inp ^= inp >> 7;
@@ -115,14 +108,27 @@ static uint32_t hash52(uint32_t inp)
 	return inp;
 }
 
-static int select_partition(
+/**
+ * @brief Select texel assignment for a single coordinate.
+ *
+ * @param seed              The seed - the partition index from the block.
+ * @param x                 The texel X coordinate in the block.
+ * @param y                 The texel Y coordinate in the block.
+ * @param z                 The texel Z coordinate in the block.
+ * @param partition_count   The total partition count of this encoding.
+ * @param small_block       @c true if the blockhas fewer than 32 texels.
+ *
+ * @return The assigned partition index for this texel.
+ */
+static uint8_t select_partition(
 	int seed,
 	int x,
 	int y,
 	int z,
-	int partitioncount,
-	int small_block
+	int partition_count,
+	bool small_block
 ) {
+	// For small blocks bias the coordinates to get better distribution
 	if (small_block)
 	{
 		x <<= 1;
@@ -130,7 +136,7 @@ static int select_partition(
 		z <<= 1;
 	}
 
-	seed += (partitioncount - 1) * 1024;
+	seed += (partition_count - 1) * 1024;
 
 	uint32_t rnum = hash52(seed);
 
@@ -147,8 +153,7 @@ static int select_partition(
 	uint8_t seed11 = (rnum >> 26) & 0xF;
 	uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF;
 
-	// squaring all the seeds in order to bias their distribution
-	// towards lower values.
+	// Squaring all the seeds in order to bias their distribution towards lower values.
 	seed1 *= seed1;
 	seed2 *= seed2;
 	seed3 *= seed3;
@@ -166,11 +171,11 @@ static int select_partition(
 	if (seed & 1)
 	{
 		sh1 = (seed & 2 ? 4 : 5);
-		sh2 = (partitioncount == 3 ? 6 : 5);
+		sh2 = (partition_count == 3 ? 6 : 5);
 	}
 	else
 	{
-		sh1 = (partitioncount == 3 ? 6 : 5);
+		sh1 = (partition_count == 3 ? 6 : 5);
 		sh2 = (seed & 2 ? 4 : 5);
 	}
 
@@ -195,29 +200,29 @@ static int select_partition(
 	int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);
 	int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);
 
-	// apply the saw
+	// Apply the saw
 	a &= 0x3F;
 	b &= 0x3F;
 	c &= 0x3F;
 	d &= 0x3F;
 
-	// remove some of the components if we are to output < 4 partitions.
-	if (partitioncount <= 3)
+	// Remove some of the components if we are to output < 4 partitions.
+	if (partition_count <= 3)
 	{
 		d = 0;
 	}
 
-	if (partitioncount <= 2)
+	if (partition_count <= 2)
 	{
 		c = 0;
 	}
 
-	if (partitioncount <= 1)
+	if (partition_count <= 1)
 	{
 		b = 0;
 	}
 
-	int partition;
+	uint8_t partition;
 	if (a >= b && a >= c && a >= d)
 	{
 		partition = 0;
@@ -238,98 +243,198 @@ static int select_partition(
 	return partition;
 }
 
-static void generate_one_partition_table(
-	const block_size_descriptor* bsd,
-	int partition_count,
-	int partition_index,
-	partition_info* pt
+/**
+ * @brief Generate a single partition info structure.
+ *
+ * @param[out] bsd                     The block size information.
+ * @param      partition_count         The partition count of this partitioning.
+ * @param      partition_index         The partition index / seed of this partitioning.
+ * @param      partition_remap_index   The remapped partition index of this partitioning.
+ * @param[out] pi                      The partition info structure to populate.
+ *
+ * @return True if this is a useful partition index, False if we can skip it.
+ */
+static bool generate_one_partition_info_entry(
+	block_size_descriptor& bsd,
+	unsigned int partition_count,
+	unsigned int partition_index,
+	unsigned int partition_remap_index,
+	partition_info& pi
 ) {
-	int texels_per_block = bsd->texel_count;
-	int small_block = texels_per_block < 32;
+	int texels_per_block = bsd.texel_count;
+	bool small_block = texels_per_block < 32;
 
-	uint8_t *partition_of_texel = pt->partition_of_texel;
+	uint8_t *partition_of_texel = pi.partition_of_texel;
 
-	for (int z = 0; z < bsd->zdim; z++)
+	// Assign texels to partitions
+	int texel_idx = 0;
+	int counts[BLOCK_MAX_PARTITIONS] { 0 };
+	for (unsigned int z = 0; z < bsd.zdim; z++)
 	{
-		for (int y = 0; y <  bsd->ydim; y++)
+		for (unsigned int y = 0; y <  bsd.ydim; y++)
 		{
-			for (int x = 0; x <  bsd->xdim; x++)
+			for (unsigned int x = 0; x <  bsd.xdim; x++)
 			{
 				uint8_t part = select_partition(partition_index, x, y, z, partition_count, small_block);
+				pi.texels_of_partition[part][counts[part]++] = static_cast<uint8_t>(texel_idx++);
 				*partition_of_texel++ = part;
 			}
 		}
 	}
 
-	int counts[4];
-	for (int i = 0; i < 4; i++)
-	{
-		counts[i] = 0;
-	}
-
-	for (int i = 0; i < texels_per_block; i++)
+	// Fill loop tail so we can overfetch later
+	for (unsigned int i = 0; i < partition_count; i++)
 	{
-		int partition = pt->partition_of_texel[i];
-		pt->texels_of_partition[partition][counts[partition]++] = i;
-	}
-
-	for (int i = 0; i < 4; i++)
-	{
-		pt->partition_texel_count[i] = counts[i];
+		int ptex_count = counts[i];
+		int ptex_count_simd = round_up_to_simd_multiple_vla(ptex_count);
+		for (int j = ptex_count; j < ptex_count_simd; j++)
+		{
+			pi.texels_of_partition[i][j] = pi.texels_of_partition[i][ptex_count - 1];
+		}
 	}
 
+	// Populate the actual procedural partition count
 	if (counts[0] == 0)
 	{
-		pt->partition_count = 0;
+		pi.partition_count = 0;
 	}
 	else if (counts[1] == 0)
 	{
-		pt->partition_count = 1;
+		pi.partition_count = 1;
 	}
 	else if (counts[2] == 0)
 	{
-		pt->partition_count = 2;
+		pi.partition_count = 2;
 	}
 	else if (counts[3] == 0)
 	{
-		pt->partition_count = 3;
+		pi.partition_count = 3;
 	}
 	else
 	{
-		pt->partition_count = 4;
+		pi.partition_count = 4;
 	}
 
-	for (int i = 0; i < 4; i++)
+	// Populate the partition index
+	pi.partition_index = partition_index;
+
+	// Populate the coverage bitmaps for 2/3/4 partitions
+	uint64_t* bitmaps { nullptr };
+	uint8_t* valids { nullptr };
+	if (partition_count == 2)
+	{
+		bitmaps = bsd.coverage_bitmaps_2[partition_remap_index];
+		valids = bsd.partitioning_valid_2;
+	}
+	else if (partition_count == 3)
+	{
+		bitmaps = bsd.coverage_bitmaps_3[partition_remap_index];
+		valids = bsd.partitioning_valid_3;
+	}
+	else if (partition_count == 4)
 	{
-		pt->coverage_bitmaps[i] = 0ULL;
+		bitmaps = bsd.coverage_bitmaps_4[partition_remap_index];
+		valids = bsd.partitioning_valid_4;
 	}
 
-	int texels_to_process = bsd->kmeans_texel_count;
-	for (int i = 0; i < texels_to_process; i++)
+	for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONS; i++)
 	{
-		int idx = bsd->kmeans_texels[i];
-		pt->coverage_bitmaps[pt->partition_of_texel[idx]] |= 1ULL << i;
+		pi.partition_texel_count[i] = static_cast<uint8_t>(counts[i]);
+	}
+
+	// Valid partitionings have texels in all of the requested partitions
+	bool valid = pi.partition_count == partition_count;
+
+	if (bitmaps)
+	{
+		// Populate the bitmap validity mask
+		valids[partition_remap_index] = valid ? 0 : 255;
+
+		for (unsigned int i = 0; i < partition_count; i++)
+		{
+			bitmaps[i] = 0ULL;
+		}
+
+		unsigned int texels_to_process = astc::min(bsd.texel_count, BLOCK_MAX_KMEANS_TEXELS);
+		for (unsigned int i = 0; i < texels_to_process; i++)
+		{
+			unsigned int idx = bsd.kmeans_texels[i];
+			bitmaps[pi.partition_of_texel[idx]] |= 1ULL << i;
+		}
 	}
+
+	return valid;
 }
 
-/* Public function, see header file for detailed documentation */
-void init_partition_tables(
-	block_size_descriptor* bsd
+static void build_partition_table_for_one_partition_count(
+	block_size_descriptor& bsd,
+	bool can_omit_partitionings,
+	unsigned int partition_count_cutoff,
+	unsigned int partition_count,
+	partition_info* ptab,
+	uint64_t* canonical_patterns
 ) {
-	partition_info *par_tab2 = bsd->partitions;
-	partition_info *par_tab3 = par_tab2 + PARTITION_COUNT;
-	partition_info *par_tab4 = par_tab3 + PARTITION_COUNT;
-	partition_info *par_tab1 = par_tab4 + PARTITION_COUNT;
+	unsigned int next_index = 0;
+	bsd.partitioning_count[partition_count - 1] = 0;
 
-	generate_one_partition_table(bsd, 1, 0, par_tab1);
-	for (int i = 0; i < 1024; i++)
+	if (can_omit_partitionings && (partition_count > partition_count_cutoff))
 	{
-		generate_one_partition_table(bsd, 2, i, par_tab2 + i);
-		generate_one_partition_table(bsd, 3, i, par_tab3 + i);
-		generate_one_partition_table(bsd, 4, i, par_tab4 + i);
+		return;
 	}
 
-	partition_table_zap_equal_elements(bsd->texel_count, par_tab2);
-	partition_table_zap_equal_elements(bsd->texel_count, par_tab3);
-	partition_table_zap_equal_elements(bsd->texel_count, par_tab4);
+	for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONINGS; i++)
+	{
+		bool keep = generate_one_partition_info_entry(bsd, partition_count, i, next_index, ptab[next_index]);
+		if (can_omit_partitionings && !keep)
+		{
+			bsd.partitioning_packed_index[partition_count - 2][i] = BLOCK_BAD_PARTITIONING;
+			continue;
+		}
+
+		generate_canonical_partitioning(bsd.texel_count, ptab[next_index].partition_of_texel, canonical_patterns + next_index * 7);
+		keep = true;
+		for (unsigned int j = 0; j < next_index; j++)
+		{
+			bool match = compare_canonical_partitionings(canonical_patterns + 7 * next_index, canonical_patterns + 7 * j);
+			if (match)
+			{
+				ptab[next_index].partition_count = 0;
+				keep = !can_omit_partitionings;
+				break;
+			}
+		}
+
+		if (keep)
+		{
+			bsd.partitioning_packed_index[partition_count - 2][i] = next_index;
+			bsd.partitioning_count[partition_count - 1] = next_index + 1;
+			next_index++;
+		}
+		else
+		{
+			bsd.partitioning_packed_index[partition_count - 2][i] = BLOCK_BAD_PARTITIONING;
+		}
+	}
+}
+
+/* See header for documentation. */
+void init_partition_tables(
+	block_size_descriptor& bsd,
+	bool can_omit_partitionings,
+	unsigned int partition_count_cutoff
+) {
+	partition_info* par_tab2 = bsd.partitionings;
+	partition_info* par_tab3 = par_tab2 + BLOCK_MAX_PARTITIONINGS;
+	partition_info* par_tab4 = par_tab3 + BLOCK_MAX_PARTITIONINGS;
+	partition_info* par_tab1 = par_tab4 + BLOCK_MAX_PARTITIONINGS;
+
+	generate_one_partition_info_entry(bsd, 1, 0, 0, *par_tab1);
+	bsd.partitioning_count[0] = 1;
+
+	uint64_t* canonical_patterns = new uint64_t[BLOCK_MAX_PARTITIONINGS * 7];
+	build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 2, par_tab2, canonical_patterns);
+	build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 3, par_tab3, canonical_patterns);
+	build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 4, par_tab4, canonical_patterns);
+
+	delete[] canonical_patterns;
 }
diff --git a/libkram/astc-encoder/astcenc_percentile_tables.cpp b/libkram/astc-encoder/astcenc_percentile_tables.cpp
index 6d55a7ac..1744d996 100644
--- a/libkram/astc-encoder/astcenc_percentile_tables.cpp
+++ b/libkram/astc-encoder/astcenc_percentile_tables.cpp
@@ -18,8 +18,7 @@
 /**
  * @brief Percentile data tables for different block encodings.
  *
- * To reduce binary size the tables are stored using a packed differential
- * encoding.
+ * To reduce binary size the tables are stored using a packed differential encoding.
  */
 
 #include "astcenc_internal.h"
@@ -28,8 +27,7 @@
 /**
  * @brief Structure containing packed percentile metadata.
  *
- * Note that percentile tables do not exist for 3D textures, so no zdim is
- * stored as they are all known to be 2D.
+ * Note that percentile tables do not exist for 3D textures, so no zdim is stored.
  */
 struct packed_percentile_table
 {
@@ -47,7 +45,8 @@ struct packed_percentile_table
 	const uint16_t *items[2];
 };
 
-static const uint16_t percentile_arr_4x4_0[61] = {
+#if ASTCENC_BLOCK_MAX_TEXELS >= (4 * 4)
+static const uint16_t percentile_arr_4x4_0[61] {
 	0x0242,0x7243,0x6A51,0x6A52,0x5A41,0x4A53,0x8851,0x3842,
 	0x3852,0x3853,0x3043,0xFA33,0x1BDF,0x2022,0x1032,0x29CE,
 	0x21DE,0x2823,0x0813,0x0A13,0x0A31,0x0A23,0x09CF,0x0833,
@@ -58,7 +57,7 @@ static const uint16_t percentile_arr_4x4_0[61] = {
 	0x039E,0x033E,0x033F,0x038F,0x032F
 };
 
-static const uint16_t percentile_arr_4x4_1[84] = {
+static const uint16_t percentile_arr_4x4_1[84] {
 	0x0452,0xFFAE,0x2433,0x1DDF,0x17CD,0x1E21,0x1C43,0x1442,
 	0x3FBE,0x1FDD,0x0E31,0x0F4F,0x1423,0x0FBD,0x1451,0x0E03,
 	0x05CF,0x0C32,0x0DDE,0x27AD,0x274E,0x0E02,0x0F5E,0x07AF,
@@ -72,16 +71,17 @@ static const uint16_t percentile_arr_4x4_1[84] = {
 	0x070F,0x071E,0x07BF,0x07CE
 };
 
-static const packed_percentile_table block_pcd_4x4 =
-{
+static const packed_percentile_table block_pcd_4x4 {
 	4, 4,
 	{ 61, 84 },
 	{ 184, 141 },
 	{ 0, 53 },
 	{ percentile_arr_4x4_0, percentile_arr_4x4_1 }
 };
+#endif
 
-static const uint16_t percentile_arr_5x4_0[91] = {
+#if ASTCENC_BLOCK_MAX_TEXELS >= (5 * 4)
+static const uint16_t percentile_arr_5x4_0[91] {
 	0x02C1,0xFAD1,0xE8D3,0xDAC2,0xA8D2,0x70D1,0x50C2,0x80C3,
 	0xD2C3,0x4AA2,0x2AD2,0x2242,0x2251,0x42A3,0x1A43,0x4A52,
 	0x32B3,0x2A41,0x1042,0x1851,0x5892,0x10A2,0x2253,0x10B2,
@@ -96,7 +96,7 @@ static const uint16_t percentile_arr_5x4_0[91] = {
 	0x039F,0x032F,0x038F
 };
 
-static const uint16_t percentile_arr_5x4_1[104] = {
+static const uint16_t percentile_arr_5x4_1[104] {
 	0x0433,0xB621,0x5452,0x4443,0x7FAE,0xFCA3,0x7CC2,0x24B2,
 	0x45DF,0x44B3,0x7631,0x27CD,0x1CD1,0x1E03,0x4FBE,0x774F,
 	0x1C42,0x7691,0x24A2,0x2681,0x3C23,0x3C93,0x0FBD,0x1C32,
@@ -112,16 +112,17 @@ static const uint16_t percentile_arr_5x4_1[104] = {
 	0x070E,0x079E,0x0622,0x0683,0x070F,0x071E,0x07BF,0x07CE
 };
 
-static const packed_percentile_table block_pcd_5x4 =
-{
+static const packed_percentile_table block_pcd_5x4 {
 	5, 4,
 	{ 91, 104 },
 	{ 322, 464 },
 	{ 0, 202 },
 	{ percentile_arr_5x4_0, percentile_arr_5x4_1 }
 };
+#endif
 
-static const uint16_t percentile_arr_5x5_0[129] = {
+#if ASTCENC_BLOCK_MAX_TEXELS >= (5 * 5)
+static const uint16_t percentile_arr_5x5_0[129] {
 	0x00F3,0xF8F2,0x70E3,0x62E1,0x60E1,0x4AC1,0x3261,0x38D3,
 	0x3271,0x5AF1,0x5873,0x2AD1,0x28E2,0x28F1,0x2262,0x9AC2,
 	0x18D2,0x1072,0x1071,0x22A2,0x2062,0x1A51,0x10C2,0x0892,
@@ -141,7 +142,7 @@ static const uint16_t percentile_arr_5x5_0[129] = {
 	0x038F
 };
 
-static const uint16_t percentile_arr_5x5_1[126] = {
+static const uint16_t percentile_arr_5x5_1[126] {
 	0x0443,0x6452,0xFE21,0x27AE,0x2433,0x1FCD,0x25DF,0x6CC2,
 	0x2C62,0x1F4F,0x4C42,0x1FBE,0x0DEF,0x34A3,0x0E03,0x54B2,
 	0x1F7D,0x17DD,0x0DFF,0x0CD1,0x0E31,0x0C71,0x1CF1,0x15FE,
@@ -160,16 +161,17 @@ static const uint16_t percentile_arr_5x5_1[126] = {
 	0x0622,0x0683,0x071E,0x076F,0x07BF,0x07CE
 };
 
-static const packed_percentile_table block_pcd_5x5 =
-{
+static const packed_percentile_table block_pcd_5x5 {
 	5, 5,
 	{ 129, 126 },
 	{ 258, 291 },
 	{ 0, 116 },
 	{ percentile_arr_5x5_0, percentile_arr_5x5_1 }
 };
+#endif
 
-static const uint16_t percentile_arr_6x5_0[165] = {
+#if ASTCENC_BLOCK_MAX_TEXELS >= (6 * 5)
+static const uint16_t percentile_arr_6x5_0[165] {
 	0x0163,0xF8F3,0x9962,0x8972,0x7961,0x7173,0x6953,0x5943,
 	0x4B41,0x3AE1,0x38E3,0x6971,0x32C1,0x28D3,0x2A61,0xC8F2,
 	0x2271,0x4873,0x5B21,0x3AD1,0x1B13,0x1952,0x1B51,0x12F1,
@@ -193,7 +195,7 @@ static const uint16_t percentile_arr_6x5_0[165] = {
 	0x033E,0x035E,0x039E,0x032F,0x038F
 };
 
-static const uint16_t percentile_arr_6x5_1[145] = {
+static const uint16_t percentile_arr_6x5_1[145] {
 	0x0443,0xEFAE,0x2CC2,0x2E21,0x2C52,0x7C33,0x47CD,0x25DF,
 	0x3CA3,0xFFBE,0x2551,0x24B3,0x474F,0x1513,0x2691,0x1603,
 	0x1462,0x1D32,0x14B2,0x5442,0x2CD2,0x35EF,0x0CD1,0x3D22,
@@ -215,16 +217,17 @@ static const uint16_t percentile_arr_6x5_1[145] = {
 	0x07CE
 };
 
-static const packed_percentile_table block_pcd_6x5 =
-{
+static const packed_percentile_table block_pcd_6x5 {
 	6, 5,
 	{ 165, 145 },
 	{ 388, 405 },
 	{ 0, 156 },
 	{ percentile_arr_6x5_0, percentile_arr_6x5_1 }
 };
+#endif
 
-static const uint16_t percentile_arr_6x6_0[206] = {
+#if ASTCENC_BLOCK_MAX_TEXELS >= (6 * 6)
+static const uint16_t percentile_arr_6x6_0[206] {
 	0x006F,0xF908,0xF104,0xE918,0xE963,0xD114,0xB0F3,0xA07E,
 	0x7972,0x705F,0x687F,0x6162,0x5953,0x586E,0x610C,0x524D,
 	0x5973,0x9943,0x98E3,0x904F,0x8341,0x7AC1,0x3A61,0x70D3,
@@ -253,7 +256,7 @@ static const uint16_t percentile_arr_6x6_0[206] = {
 	0x035D,0x0212,0x033F,0x035E,0x038F,0x032F
 };
 
-static const uint16_t percentile_arr_6x6_1[164] = {
+static const uint16_t percentile_arr_6x6_1[164] {
 	0x07AE,0x8443,0x7E21,0x77CD,0x6C62,0x9433,0x6452,0x34C2,
 	0x5DDF,0xC7BE,0x25EF,0x24A3,0x3CF1,0xFDFF,0x177D,0x1F4F,
 	0xC551,0x5CB3,0x1532,0x1513,0x143E,0x245D,0x14B2,0x2472,
@@ -277,16 +280,17 @@ static const uint16_t percentile_arr_6x6_1[164] = {
 	0x073E,0x076F,0x07BF,0x07CE
 };
 
-static const packed_percentile_table block_pcd_6x6 =
-{
+static const packed_percentile_table block_pcd_6x6 {
 	6, 6,
 	{ 206, 164 },
 	{ 769, 644 },
 	{ 0, 256 },
 	{ percentile_arr_6x6_0, percentile_arr_6x6_1 }
 };
+#endif
 
-static const uint16_t percentile_arr_8x5_0[226] = {
+#if ASTCENC_BLOCK_MAX_TEXELS >= (8 * 5)
+static const uint16_t percentile_arr_8x5_0[226] {
 	0x0066,0xF865,0xE963,0xA856,0xA1F2,0x9875,0x91C3,0x91E2,
 	0x80F3,0x8076,0x61E3,0x6153,0x5172,0x59D2,0x51D3,0x5047,
 	0xA943,0x49B3,0x4846,0x4962,0xC037,0x4173,0x39F1,0x7027,
@@ -318,7 +322,7 @@ static const uint16_t percentile_arr_8x5_0[226] = {
 	0x038F,0x039E
 };
 
-static const uint16_t percentile_arr_8x5_1[167] = {
+static const uint16_t percentile_arr_8x5_1[167] {
 	0x0621,0xFCC2,0x3443,0xA433,0x5532,0x2551,0x6CA3,0x27AE,
 	0x6452,0x8E03,0x3CB3,0x4DA2,0x6DDF,0x37CD,0x6F01,0x1691,
 	0x2E82,0x27BE,0x1513,0x34D2,0x1D22,0x3E31,0x2593,0x2CB2,
@@ -342,16 +346,17 @@ static const uint16_t percentile_arr_8x5_1[167] = {
 	0x0683,0x0702,0x073E,0x076F,0x079E,0x07BF,0x07CE
 };
 
-static const packed_percentile_table block_pcd_8x5 =
-{
+static const packed_percentile_table block_pcd_8x5 {
 	8, 5,
 	{ 226, 167 },
 	{ 763, 517 },
 	{ 0, 178 },
 	{ percentile_arr_8x5_0, percentile_arr_8x5_1 }
 };
+#endif
 
-static const uint16_t percentile_arr_8x6_0[273] = {
+#if ASTCENC_BLOCK_MAX_TEXELS >= (8 * 6)
+static const uint16_t percentile_arr_8x6_0[273] {
 	0x0154,0xF944,0xE066,0xA128,0x9963,0x8118,0x806F,0x79F2,
 	0x79E2,0x7108,0xD934,0x6056,0x69C3,0x60F3,0x5972,0x59E3,
 	0x5075,0x91B3,0xC9D2,0x807E,0x385F,0x4153,0x3943,0x4162,
@@ -389,7 +394,7 @@ static const uint16_t percentile_arr_8x6_0[273] = {
 	0x039E
 };
 
-static const uint16_t percentile_arr_8x6_1[186] = {
+static const uint16_t percentile_arr_8x6_1[186] {
 	0x0621,0xFC33,0x37AE,0x1CC2,0x2C43,0xAD32,0x34A3,0x4551,
 	0x6452,0x5C62,0x1FCD,0x14F1,0x4CB3,0x24D2,0x15DF,0x0FBE,
 	0x2603,0x3DA2,0x2E31,0x25D1,0x25EF,0x0D22,0x2E91,0x1E82,
@@ -416,16 +421,17 @@ static const uint16_t percentile_arr_8x6_1[186] = {
 	0x07BF,0x07CE
 };
 
-static const packed_percentile_table block_pcd_8x6 =
-{
+static const packed_percentile_table block_pcd_8x6 {
 	8, 6,
 	{ 273, 186 },
 	{ 880, 300 },
 	{ 0, 64 },
 	{ percentile_arr_8x6_0, percentile_arr_8x6_1 }
 };
+#endif
 
-static const uint16_t percentile_arr_8x8_0[347] = {
+#if ASTCENC_BLOCK_MAX_TEXELS >= (8 * 8)
+static const uint16_t percentile_arr_8x8_0[347] {
 	0x0334,0xFD44,0xDD14,0x9154,0x9B08,0x906A,0x8928,0x8108,
 	0xE866,0xC918,0x606F,0xC0FE,0x5963,0x58EE,0x6534,0x505A,
 	0x51E2,0xA8CF,0x5354,0x5314,0x5134,0x5524,0x48F3,0x504B,
@@ -472,7 +478,7 @@ static const uint16_t percentile_arr_8x8_0[347] = {
 	0x033E,0x038F,0x039E
 };
 
-static const uint16_t percentile_arr_8x8_1[208] = {
+static const uint16_t percentile_arr_8x8_1[208] {
 	0x0621,0x3443,0x47CD,0x97AE,0xFC62,0x14F1,0x24C2,0x25DF,
 	0x3C33,0x1C52,0x9C72,0x0FBE,0x0C5D,0x343E,0x24A3,0x1551,
 	0x5D32,0x1CD2,0x15EF,0x4E31,0x04DD,0x1FDD,0x174F,0x0DD1,
@@ -501,16 +507,17 @@ static const uint16_t percentile_arr_8x8_1[208] = {
 	0x068D,0x0702,0x073E,0x076F,0x0781,0x079E,0x07BF,0x07CE
 };
 
-static const packed_percentile_table block_pcd_8x8 =
-{
+static const packed_percentile_table block_pcd_8x8 {
 	8, 8,
 	{ 347, 208 },
 	{ 1144, 267 },
 	{ 0, 38 },
 	{ percentile_arr_8x8_0, percentile_arr_8x8_1 }
 };
+#endif
 
-static const uint16_t percentile_arr_10x5_0[274] = {
+#if ASTCENC_BLOCK_MAX_TEXELS >= (10 * 5)
+static const uint16_t percentile_arr_10x5_0[274] {
 	0x0165,0xF975,0xD866,0xC056,0xA946,0x90C6,0x90F5,0x8963,
 	0x80D6,0x80E6,0x60F3,0x61C3,0x59F2,0xA927,0x5075,0x4847,
 	0x5153,0x4955,0x49E2,0x48B6,0x41D2,0x4943,0x8305,0x8172,
@@ -548,7 +555,7 @@ static const uint16_t percentile_arr_10x5_0[274] = {
 	0x033E,0x038F
 };
 
-static const uint16_t percentile_arr_10x5_1[180] = {
+static const uint16_t percentile_arr_10x5_1[180] {
 	0x0532,0xFCA3,0x3621,0x6E82,0x2CC2,0x3D51,0x3F01,0x2691,
 	0x17AE,0x35A2,0x74B3,0x1603,0x4433,0x3C43,0x6C35,0x25D1,
 	0x1D13,0x15DF,0x37CD,0x0D93,0x1D22,0x0E81,0x1452,0x0CD2,
@@ -574,16 +581,17 @@ static const uint16_t percentile_arr_10x5_1[180] = {
 	0x073E,0x076F,0x07BF,0x07CE
 };
 
-static const packed_percentile_table block_pcd_10x5 =
-{
+static const packed_percentile_table block_pcd_10x5 {
 	10, 5,
 	{ 274, 180 },
 	{ 954, 324 },
 	{ 0, 79 },
 	{ percentile_arr_10x5_0, percentile_arr_10x5_1 }
 };
+#endif
 
-static const uint16_t percentile_arr_10x6_0[325] = {
+#if ASTCENC_BLOCK_MAX_TEXELS >= (10 * 6)
+static const uint16_t percentile_arr_10x6_0[325] {
 	0x01A4,0xF954,0xA066,0x9975,0x80F5,0x7056,0x6918,0x6963,
 	0x58C6,0x5946,0x5928,0x5174,0x586F,0xA0E6,0x5108,0x48D6,
 	0x49E2,0x40F3,0x9172,0x41F2,0xB875,0x3927,0x39C3,0xA953,
@@ -627,7 +635,7 @@ static const uint16_t percentile_arr_10x6_0[325] = {
 	0x035E,0x033F,0x039E,0x032F,0x038F
 };
 
-static const uint16_t percentile_arr_10x6_1[199] = {
+static const uint16_t percentile_arr_10x6_1[199] {
 	0x0621,0xBD32,0x5CA3,0x1FAE,0x64C2,0x1D51,0x6C33,0xFC43,
 	0x5CB3,0x25A2,0x2E82,0x35D1,0x4F01,0x3FBE,0x3691,0x2DDF,
 	0x2E03,0x3FCD,0x14D2,0x1CF1,0x0C52,0x3C35,0x2D22,0x1513,
@@ -655,16 +663,17 @@ static const uint16_t percentile_arr_10x6_1[199] = {
 	0x0683,0x0702,0x070D,0x071E,0x076F,0x07BF,0x07CE
 };
 
-static const packed_percentile_table block_pcd_10x6 =
-{
+static const packed_percentile_table block_pcd_10x6 {
 	10, 6,
 	{ 325, 199 },
 	{ 922, 381 },
 	{ 0, 78 },
 	{ percentile_arr_10x6_0, percentile_arr_10x6_1 }
 };
+#endif
 
-static const uint16_t percentile_arr_10x8_0[400] = {
+#if ASTCENC_BLOCK_MAX_TEXELS >= (10 * 8)
+static const uint16_t percentile_arr_10x8_0[400] {
 	0x0154,0xAB34,0xAD44,0x8308,0x7866,0x7B64,0x79A4,0x7975,
 	0x686A,0x6908,0xC514,0x6174,0x6128,0x6118,0x5B54,0x5163,
 	0xF856,0x50F5,0x986F,0xDD34,0x48FE,0x4972,0x48E6,0x4146,
@@ -717,7 +726,7 @@ static const uint16_t percentile_arr_10x8_0[400] = {
 	0x034D,0x034E,0x036D,0x032F,0x033E,0x037D,0x038F,0x039E
 };
 
-static const uint16_t percentile_arr_10x8_1[221] = {
+static const uint16_t percentile_arr_10x8_1[221] {
 	0x0621,0xDFAE,0x2443,0x54C2,0x37CD,0x1CF1,0xFCA3,0x14D2,
 	0x2D32,0x5551,0x7DDF,0x5C33,0x15D1,0x3462,0x24B3,0x7452,
 	0x5FBE,0x6472,0x65A2,0x1D06,0x445D,0x15EF,0x0E31,0x1D71,
@@ -756,8 +765,10 @@ static const packed_percentile_table block_pcd_10x8 =
 	{ 0, 52 },
 	{ percentile_arr_10x8_0, percentile_arr_10x8_1 }
 };
+#endif
 
-static const uint16_t percentile_arr_10x10_0[453] = {
+#if ASTCENC_BLOCK_MAX_TEXELS >= (10 * 10)
+static const uint16_t percentile_arr_10x10_0[453] {
 	0x0334,0x9514,0x8954,0x806A,0x6F14,0x6724,0x6108,0x6364,
 	0x5175,0x5D44,0x5866,0x5118,0x5308,0xA179,0x5128,0xF534,
 	0x49A4,0x5354,0x9174,0x486F,0x48EA,0x40F3,0x4963,0x414A,
@@ -817,7 +828,7 @@ static const uint16_t percentile_arr_10x10_0[453] = {
 	0x032F,0x033E,0x035D,0x038F,0x039F
 };
 
-static const uint16_t percentile_arr_10x10_1[234] = {
+static const uint16_t percentile_arr_10x10_1[234] {
 	0x07CD,0x6E21,0x24F1,0x8443,0xD7AE,0x24C2,0x1C62,0xCCA3,
 	0x1C33,0xFDEF,0x2532,0x55DF,0x1472,0x6C3E,0x14D2,0x34DD,
 	0x1452,0x745D,0x4D51,0x8DD1,0x247D,0x75FF,0x0CB3,0x17BE,
@@ -850,16 +861,17 @@ static const uint16_t percentile_arr_10x10_1[234] = {
 	0x07BF,0x07CE
 };
 
-static const packed_percentile_table block_pcd_10x10 =
-{
+static const packed_percentile_table block_pcd_10x10 {
 	10, 10,
 	{ 453, 234 },
 	{ 1095, 472 },
 	{ 0, 70 },
 	{ percentile_arr_10x10_0, percentile_arr_10x10_1 }
 };
+#endif
 
-static const uint16_t percentile_arr_12x10_0[491] = {
+#if ASTCENC_BLOCK_MAX_TEXELS >= (12 * 10)
+static const uint16_t percentile_arr_12x10_0[491] {
 	0x0334,0x9954,0x8514,0x7128,0x6364,0xC174,0x5D34,0x5866,
 	0x5975,0x5354,0xAF14,0x506A,0x5108,0x5724,0x5308,0x4544,
 	0x4918,0x4064,0x49E2,0x4179,0x8163,0x4054,0xF81C,0x394A,
@@ -924,7 +936,7 @@ static const uint16_t percentile_arr_12x10_0[491] = {
 	0x035D,0x038F,0x039E
 };
 
-static const uint16_t percentile_arr_12x10_1[240] = {
+static const uint16_t percentile_arr_12x10_1[240] {
 	0x0621,0xA443,0xFCC2,0x3CA3,0x1D32,0x14F1,0x7462,0x1433,
 	0x27CD,0x2571,0x57AE,0x5DD1,0x64B3,0x44D2,0x2C72,0x25A2,
 	0x1E31,0x55DF,0x4C52,0x1DEF,0x0D51,0x3C5D,0x3C3E,0x74DD,
@@ -965,8 +977,10 @@ static const packed_percentile_table block_pcd_12x10 =
 	{ 0, 23 },
 	{ percentile_arr_12x10_0, percentile_arr_12x10_1 }
 };
+#endif
 
-static const uint16_t percentile_arr_12x12_0[529] = {
+#if ASTCENC_BLOCK_MAX_TEXELS >= (12 * 12)
+static const uint16_t percentile_arr_12x12_0[529] {
 	0x0334,0xF534,0x8514,0x8954,0x7F14,0xFB54,0x7B08,0x7128,
 	0x7974,0x6179,0x6B64,0x6908,0x606A,0x6724,0xB544,0xB066,
 	0xA14A,0x5118,0x9975,0x51F9,0x981C,0x49CA,0x4854,0x886F,
@@ -1036,7 +1050,7 @@ static const uint16_t percentile_arr_12x12_0[529] = {
 	0x039E
 };
 
-static const uint16_t percentile_arr_12x12_1[246] = {
+static const uint16_t percentile_arr_12x12_1[246] {
 	0x0443,0xFFCD,0x2C62,0x2E21,0x3CF1,0x34C2,0x4CDD,0x2452,
 	0xD5DF,0x1DD1,0x0FAE,0x64A3,0x0C7D,0x3433,0x1CD2,0x2DEF,
 	0x0C3E,0x1D71,0xA472,0x0D32,0x54B3,0x4D51,0x445D,0x0E31,
@@ -1070,14 +1084,14 @@ static const uint16_t percentile_arr_12x12_1[246] = {
 	0x072F,0x076F,0x078D,0x078E,0x07BF,0x07CE
 };
 
-static const packed_percentile_table block_pcd_12x12 =
-{
+static const packed_percentile_table block_pcd_12x12 {
 	12, 12,
 	{ 529, 246 },
 	{ 1435, 335 },
 	{ 0, 22 },
 	{ percentile_arr_12x12_0, percentile_arr_12x12_1 }
 };
+#endif
 
 /**
  * @brief Fetch the packed percentile table for the given 2D block size.
@@ -1094,36 +1108,64 @@ static const packed_percentile_table *get_packed_table(
 	int idx = (ydim << 8) | xdim;
 	switch (idx)
 	{
+#if ASTCENC_BLOCK_MAX_TEXELS >= (4 * 4)
 		case 0x0404: return &block_pcd_4x4;
+#endif
+#if ASTCENC_BLOCK_MAX_TEXELS >= (5 * 4)
 		case 0x0405: return &block_pcd_5x4;
+#endif
+#if ASTCENC_BLOCK_MAX_TEXELS >= (5 * 5)
 		case 0x0505: return &block_pcd_5x5;
+#endif
+#if ASTCENC_BLOCK_MAX_TEXELS >= (6 * 5)
 		case 0x0506: return &block_pcd_6x5;
+#endif
+#if ASTCENC_BLOCK_MAX_TEXELS >= (6 * 6)
 		case 0x0606: return &block_pcd_6x6;
+#endif
+#if ASTCENC_BLOCK_MAX_TEXELS >= (8 * 5)
 		case 0x0508: return &block_pcd_8x5;
+#endif
+#if ASTCENC_BLOCK_MAX_TEXELS >= (8 * 6)
 		case 0x0608: return &block_pcd_8x6;
+#endif
+#if ASTCENC_BLOCK_MAX_TEXELS >= (8 * 8)
 		case 0x0808: return &block_pcd_8x8;
+#endif
+#if ASTCENC_BLOCK_MAX_TEXELS >= (10 * 5)
 		case 0x050A: return &block_pcd_10x5;
+#endif
+#if ASTCENC_BLOCK_MAX_TEXELS >= (10 * 6)
 		case 0x060A: return &block_pcd_10x6;
+#endif
+#if ASTCENC_BLOCK_MAX_TEXELS >= (10 * 8)
 		case 0x080A: return &block_pcd_10x8;
+#endif
+#if ASTCENC_BLOCK_MAX_TEXELS >= (10 * 10)
 		case 0x0A0A: return &block_pcd_10x10;
+#endif
+#if ASTCENC_BLOCK_MAX_TEXELS >= (12 * 10)
 		case 0x0A0C: return &block_pcd_12x10;
+#endif
+#if ASTCENC_BLOCK_MAX_TEXELS >= (12 * 12)
 		case 0x0C0C: return &block_pcd_12x12;
+#endif
 	}
 
 	// Should never hit this with a valid 2D block size
 	return nullptr;
 }
 
-/* Public function, see header file for detailed documentation */
+/* See header for documentation. */
 const float *get_2d_percentile_table(
-	int xdim,
-	int ydim
+	unsigned int xdim,
+	unsigned int ydim
 ) {
 	float* unpacked_table = new float[2048];
 	const packed_percentile_table *apt = get_packed_table(xdim, ydim);
 
 	// Set the default percentile
-	for (int i=0; i < 2048; i++)
+	for (unsigned int i = 0; i < 2048; i++)
 	{
 		unpacked_table[i] = 1.0f;
 	}
@@ -1131,16 +1173,16 @@ const float *get_2d_percentile_table(
 	// Populate the unpacked percentile values
 	for (int i = 0; i < 2; i++)
 	{
-		int itemcount = apt->itemcounts[i];
-		int difscale = apt->difscales[i];
-		int accum = apt->initial_percs[i];
+		unsigned int itemcount = apt->itemcounts[i];
+		unsigned int difscale = apt->difscales[i];
+		unsigned int accum = apt->initial_percs[i];
 		const uint16_t *item_ptr = apt->items[i];
 
-		for (int j = 0; j < itemcount; j++)
+		for (unsigned int j = 0; j < itemcount; j++)
 		{
 			uint16_t item = item_ptr[j];
-			int idx = item & 0x7FF;
-			int weight = (item >> 11) & 0x1F;
+			unsigned int idx = item & 0x7FF;
+			unsigned int weight = (item >> 11) & 0x1F;
 			accum += weight;
 			unpacked_table[idx] = (float)accum / (float)difscale;
 		}
@@ -1150,12 +1192,12 @@ const float *get_2d_percentile_table(
 }
 #endif
 
-/* Public function, see header file for detailed documentation */
-int is_legal_2d_block_size(
-	int xdim,
-	int ydim
+/* See header for documentation. */
+bool is_legal_2d_block_size(
+	unsigned int xdim,
+	unsigned int ydim
 ) {
-	int idx = (xdim << 8) | ydim;
+	unsigned int idx = (xdim << 8) | ydim;
 	switch (idx)
 	{
 		case 0x0404:
@@ -1172,19 +1214,19 @@ int is_legal_2d_block_size(
 		case 0x0A0A:
 		case 0x0C0A:
 		case 0x0C0C:
-			return 1;
+			return true;
 	}
 
-	return 0;
+	return false;
 }
 
-/* Public function, see header file for detailed documentation */
-int is_legal_3d_block_size(
-	int xdim,
-	int ydim,
-	int zdim
+/* See header for documentation. */
+bool is_legal_3d_block_size(
+	unsigned int xdim,
+	unsigned int ydim,
+	unsigned int zdim
 ) {
-	int idx = (xdim << 16) | (ydim << 8) | zdim;
+	unsigned int idx = (xdim << 16) | (ydim << 8) | zdim;
 	switch (idx)
 	{
 		case 0x030303:
@@ -1197,8 +1239,8 @@ int is_legal_3d_block_size(
 		case 0x060505:
 		case 0x060605:
 		case 0x060606:
-			return 1;
-		default:
-			return 0;
+			return true;
 	}
+
+	return false;
 }
diff --git a/libkram/astc-encoder/astcenc_pick_best_endpoint_format.cpp b/libkram/astc-encoder/astcenc_pick_best_endpoint_format.cpp
index 9ba1685f..032a680a 100644
--- a/libkram/astc-encoder/astcenc_pick_best_endpoint_format.cpp
+++ b/libkram/astc-encoder/astcenc_pick_best_endpoint_format.cpp
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2021 Arm Limited
+// Copyright 2011-2022 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -18,35 +18,335 @@
 #if !defined(ASTCENC_DECOMPRESS_ONLY)
 
 /**
- * @brief Functions to pick best ASTC endpoint for a block.
+ * @brief Functions for finding best endpoint format.
+ *
+ * We assume there are two independent sources of error in any given partition:
+ *
+ *   - Encoding choice errors
+ *   - Quantization errors
+ *
+ * Encoding choice errors are caused by encoder decisions. For example:
+ *
+ *   - Using luminance instead of separate RGB components.
+ *   - Using a constant 1.0 alpha instead of storing an alpha component.
+ *   - Using RGB+scale instead of storing two full RGB endpoints.
+ *
+ * Quantization errors occur due to the limited precision we use for storage. These errors generally
+ * scale with quantization level, but are not actually independent of color encoding. In particular:
+ *
+ *   - If we can use offset encoding then quantization error is halved.
+ *   - If we can use blue-contraction then quantization error for RG is halved.
+ *   - If we use HDR endpoints the quantization error is higher.
+ *
+ * Apart from these effects, we assume the error is proportional to the quantization step size.
  */
 
+
 #include "astcenc_internal.h"
 #include "astcenc_vecmathlib.h"
 
 #include <assert.h>
 
-/*
-   functions to determine, for a given partitioning, which color endpoint formats are the best to use.
+/**
+ * @brief Compute the errors of the endpoint line options for one partition.
+ *
+ * Uncorrelated data assumes storing completely independent RGBA channels for each endpoint. Same
+ * chroma data assumes storing RGBA endpoints which pass though the origin (LDR only). RGBL data
+ * assumes storing RGB + lumashift (HDR only). Luminance error assumes storing RGB channels as a
+ * single value.
+ *
+ *
+ * @param      pi                The partition info data.
+ * @param      partition_index   The partition index to compule the error for.
+ * @param      blk               The image block.
+ * @param      uncor_pline       The endpoint line assuming uncorrelated endpoints.
+ * @param[out] uncor_err         The computed error for the uncorrelated endpoint line.
+ * @param      samec_pline       The endpoint line assuming the same chroma for both endpoints.
+ * @param[out] samec_err         The computed error for the uncorrelated endpoint line.
+ * @param      rgbl_pline        The endpoint line assuming RGB + lumashift data.
+ * @param[out] rgbl_err          The computed error for the RGB + lumashift endpoint line.
+ * @param      l_pline           The endpoint line assuming luminance data.
+ * @param[out] l_err             The computed error for the luminance endpoint line.
+ * @param[out] a_drop_err        The computed error for dropping the alpha component.
  */
+static void compute_error_squared_rgb_single_partition(
+	const partition_info& pi,
+	int partition_index,
+	const image_block& blk,
+	const processed_line3& uncor_pline,
+	float& uncor_err,
+	const processed_line3& samec_pline,
+	float& samec_err,
+	const processed_line3& rgbl_pline,
+	float& rgbl_err,
+	const processed_line3& l_pline,
+	float& l_err,
+	float& a_drop_err
+) {
+	vfloat4 ews = blk.channel_weight;
+
+	unsigned int texel_count = pi.partition_texel_count[partition_index];
+	const uint8_t* texel_indexes = pi.texels_of_partition[partition_index];
+	promise(texel_count > 0);
+
+	vfloat4 a_drop_errv = vfloat4::zero();
+	vfloat default_a(blk.get_default_alpha());
+
+	vfloat4 uncor_errv = vfloat4::zero();
+	vfloat uncor_bs0(uncor_pline.bs.lane<0>());
+	vfloat uncor_bs1(uncor_pline.bs.lane<1>());
+	vfloat uncor_bs2(uncor_pline.bs.lane<2>());
+
+	vfloat uncor_amod0(uncor_pline.amod.lane<0>());
+	vfloat uncor_amod1(uncor_pline.amod.lane<1>());
+	vfloat uncor_amod2(uncor_pline.amod.lane<2>());
+
+	vfloat4 samec_errv = vfloat4::zero();
+	vfloat samec_bs0(samec_pline.bs.lane<0>());
+	vfloat samec_bs1(samec_pline.bs.lane<1>());
+	vfloat samec_bs2(samec_pline.bs.lane<2>());
+
+	vfloat4 rgbl_errv = vfloat4::zero();
+	vfloat rgbl_bs0(rgbl_pline.bs.lane<0>());
+	vfloat rgbl_bs1(rgbl_pline.bs.lane<1>());
+	vfloat rgbl_bs2(rgbl_pline.bs.lane<2>());
+
+	vfloat rgbl_amod0(rgbl_pline.amod.lane<0>());
+	vfloat rgbl_amod1(rgbl_pline.amod.lane<1>());
+	vfloat rgbl_amod2(rgbl_pline.amod.lane<2>());
+
+	vfloat4 l_errv = vfloat4::zero();
+	vfloat l_bs0(l_pline.bs.lane<0>());
+	vfloat l_bs1(l_pline.bs.lane<1>());
+	vfloat l_bs2(l_pline.bs.lane<2>());
+
+	vint lane_ids = vint::lane_id();
+	for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
+	{
+		vint tix(texel_indexes + i);
+
+		vmask mask = lane_ids < vint(texel_count);
+		lane_ids += vint(ASTCENC_SIMD_WIDTH);
+
+		// Compute the error that arises from just ditching alpha
+		vfloat data_a = gatherf(blk.data_a, tix);
+		vfloat alpha_diff = data_a - default_a;
+		alpha_diff = alpha_diff * alpha_diff;
+		alpha_diff = select(vfloat::zero(), alpha_diff, mask);
+		haccumulate(a_drop_errv, alpha_diff);
+
+		vfloat data_r = gatherf(blk.data_r, tix);
+		vfloat data_g = gatherf(blk.data_g, tix);
+		vfloat data_b = gatherf(blk.data_b, tix);
+
+		// Compute uncorrelated error
+		vfloat param = data_r * uncor_bs0
+		             + data_g * uncor_bs1
+		             + data_b * uncor_bs2;
+
+		vfloat dist0 = (uncor_amod0 + param * uncor_bs0) - data_r;
+		vfloat dist1 = (uncor_amod1 + param * uncor_bs1) - data_g;
+		vfloat dist2 = (uncor_amod2 + param * uncor_bs2) - data_b;
+
+		vfloat error = dist0 * dist0 * ews.lane<0>()
+		             + dist1 * dist1 * ews.lane<1>()
+		             + dist2 * dist2 * ews.lane<2>();
+
+		error = select(vfloat::zero(), error, mask);
+		haccumulate(uncor_errv, error);
+
+		// Compute same chroma error - no "amod", its always zero
+		param = data_r * samec_bs0
+		      + data_g * samec_bs1
+		      + data_b * samec_bs2;
+
+		dist0 = (param * samec_bs0) - data_r;
+		dist1 = (param * samec_bs1) - data_g;
+		dist2 = (param * samec_bs2) - data_b;
+
+		error = dist0 * dist0 * ews.lane<0>()
+		      + dist1 * dist1 * ews.lane<1>()
+		      + dist2 * dist2 * ews.lane<2>();
+
+		error = select(vfloat::zero(), error, mask);
+		haccumulate(samec_errv, error);
+
+		// Compute rgbl error
+		param = data_r * rgbl_bs0
+		      + data_g * rgbl_bs1
+		      + data_b * rgbl_bs2;
+
+		dist0 = (rgbl_amod0 + param * rgbl_bs0) - data_r;
+		dist1 = (rgbl_amod1 + param * rgbl_bs1) - data_g;
+		dist2 = (rgbl_amod2 + param * rgbl_bs2) - data_b;
+
+		error = dist0 * dist0 * ews.lane<0>()
+		      + dist1 * dist1 * ews.lane<1>()
+		      + dist2 * dist2 * ews.lane<2>();
 
-// for a given partition, compute for every (integer-component-count, quantization-level)
-// the color error.
+		error = select(vfloat::zero(), error, mask);
+		haccumulate(rgbl_errv, error);
+
+		// Compute luma error - no "amod", its always zero
+		param = data_r * l_bs0
+		      + data_g * l_bs1
+		      + data_b * l_bs2;
+
+		dist0 = (param * l_bs0) - data_r;
+		dist1 = (param * l_bs1) - data_g;
+		dist2 = (param * l_bs2) - data_b;
+
+		error = dist0 * dist0 * ews.lane<0>()
+		      + dist1 * dist1 * ews.lane<1>()
+		      + dist2 * dist2 * ews.lane<2>();
+
+		error = select(vfloat::zero(), error, mask);
+		haccumulate(l_errv, error);
+	}
+
+	a_drop_err = hadd_s(a_drop_errv * ews.lane<3>());
+	uncor_err = hadd_s(uncor_errv);
+	samec_err = hadd_s(samec_errv);
+	rgbl_err = hadd_s(rgbl_errv);
+	l_err = hadd_s(l_errv);
+}
+
+/**
+ * @brief For a given set of input colors and partitioning determine endpoint encode errors.
+ *
+ * This function determines the color error that results from RGB-scale encoding (LDR only),
+ * RGB-lumashift encoding (HDR only), luminance-encoding, and alpha drop. Also determines whether
+ * the endpoints are eligible for offset encoding or blue-contraction
+ *
+ * @param      bsd   The block size information.
+ * @param      blk   The image block.
+ * @param      pi    The partition info data.
+ * @param      ep    The idealized endpoints.
+ * @param[out] eci   The resulting encoding choice error metrics.
+  */
+static void compute_encoding_choice_errors(
+	const block_size_descriptor& bsd,
+	const image_block& blk,
+	const partition_info& pi,
+	const endpoints& ep,
+	encoding_choice_errors eci[BLOCK_MAX_PARTITIONS])
+{
+	int partition_count = pi.partition_count;
+	int texels_per_block = bsd.texel_count;
+
+	promise(partition_count > 0);
+	promise(texels_per_block > 0);
+
+	partition_metrics pms[BLOCK_MAX_PARTITIONS];
+
+	compute_avgs_and_dirs_3_comp_rgb(pi, blk, pms);
+
+	for (int i = 0; i < partition_count; i++)
+	{
+		partition_metrics& pm = pms[i];
+
+		line3 uncor_rgb_lines;
+		line3 samec_rgb_lines;  // for LDR-RGB-scale
+		line3 rgb_luma_lines;   // for HDR-RGB-scale
+
+		processed_line3 uncor_rgb_plines;
+		processed_line3 samec_rgb_plines;
+		processed_line3 rgb_luma_plines;
+		processed_line3 luminance_plines;
+
+		float uncorr_rgb_error;
+		float samechroma_rgb_error;
+		float rgb_luma_error;
+		float luminance_rgb_error;
+		float alpha_drop_error;
+
+		uncor_rgb_lines.a = pm.avg;
+		uncor_rgb_lines.b = normalize_safe(pm.dir, unit3());
+
+		samec_rgb_lines.a = vfloat4::zero();
+		samec_rgb_lines.b = normalize_safe(pm.avg, unit3());
+
+		rgb_luma_lines.a = pm.avg;
+		rgb_luma_lines.b = unit3();
+
+		uncor_rgb_plines.amod = uncor_rgb_lines.a - uncor_rgb_lines.b * dot3(uncor_rgb_lines.a, uncor_rgb_lines.b);
+		uncor_rgb_plines.bs   = uncor_rgb_lines.b;
+
+		// Same chroma always goes though zero, so this is simpler than the others
+		samec_rgb_plines.amod = vfloat4::zero();
+		samec_rgb_plines.bs   = samec_rgb_lines.b;
+
+		rgb_luma_plines.amod = rgb_luma_lines.a - rgb_luma_lines.b * dot3(rgb_luma_lines.a, rgb_luma_lines.b);
+		rgb_luma_plines.bs   = rgb_luma_lines.b;
+
+		// Luminance always goes though zero, so this is simpler than the others
+		luminance_plines.amod = vfloat4::zero();
+		luminance_plines.bs   = unit3();
+
+		compute_error_squared_rgb_single_partition(
+		    pi, i, blk,
+		    uncor_rgb_plines, uncorr_rgb_error,
+		    samec_rgb_plines, samechroma_rgb_error,
+		    rgb_luma_plines,  rgb_luma_error,
+		    luminance_plines, luminance_rgb_error,
+		                      alpha_drop_error);
+
+		// Determine if we can offset encode RGB lanes
+		vfloat4 endpt0 = ep.endpt0[i];
+		vfloat4 endpt1 = ep.endpt1[i];
+		vfloat4 endpt_diff = abs(endpt1 - endpt0);
+		vmask4 endpt_can_offset = endpt_diff < vfloat4(0.12f * 65535.0f);
+		bool can_offset_encode = (mask(endpt_can_offset) & 0x7) == 0x7;
+
+		// Determine if we can blue contract encode RGB lanes
+		vfloat4 endpt_diff_bc(
+			endpt0.lane<0>() + (endpt0.lane<0>() - endpt0.lane<2>()),
+			endpt1.lane<0>() + (endpt1.lane<0>() - endpt1.lane<2>()),
+			endpt0.lane<1>() + (endpt0.lane<1>() - endpt0.lane<2>()),
+			endpt1.lane<1>() + (endpt1.lane<1>() - endpt1.lane<2>())
+		);
+
+		vmask4 endpt_can_bc_lo = endpt_diff_bc > vfloat4(0.01f * 65535.0f);
+		vmask4 endpt_can_bc_hi = endpt_diff_bc < vfloat4(0.99f * 65535.0f);
+		bool can_blue_contract = (mask(endpt_can_bc_lo & endpt_can_bc_hi) & 0x7) == 0x7;
+
+		// Store out the settings
+		eci[i].rgb_scale_error = (samechroma_rgb_error - uncorr_rgb_error) * 0.7f;  // empirical
+		eci[i].rgb_luma_error  = (rgb_luma_error - uncorr_rgb_error) * 1.5f;        // wild guess
+		eci[i].luminance_error = (luminance_rgb_error - uncorr_rgb_error) * 3.0f;   // empirical
+		eci[i].alpha_drop_error = alpha_drop_error * 3.0f;
+		eci[i].can_offset_encode = can_offset_encode;
+		eci[i].can_blue_contract = can_blue_contract;
+	}
+}
+
+/**
+ * @brief For a given partition compute the error for every endpoint integer count and quant level.
+ *
+ * @param      encode_hdr_rgb     @c true if using HDR for RGB, @c false for LDR.
+ * @param      encode_hdr_alpha   @c true if using HDR for alpha, @c false for LDR.
+ * @param      partition_index    The partition index.
+ * @param      pi                 The partition info.
+ * @param      eci                The encoding choice error metrics.
+ * @param      ep                 The idealized endpoints.
+ * @param      error_weight       The resulting encoding choice error metrics.
+ * @param[out] best_error         The best error for each integer count and quant level.
+ * @param[out] format_of_choice   The preferred endpoint format for each integer count and quant level.
+ */
 static void compute_color_error_for_every_integer_count_and_quant_level(
-	int encode_hdr_rgb,	// 1 = perform HDR encoding, 0 = perform LDR encoding.
-	int encode_hdr_alpha,
+	bool encode_hdr_rgb,
+	bool encode_hdr_alpha,
 	int partition_index,
-	const partition_info* pt,
-	const encoding_choice_errors* eci,	// pointer to the structure for the CURRENT partition.
-	const endpoints* ep,
+	const partition_info& pi,
+	const encoding_choice_errors& eci,
+	const endpoints& ep,
 	vfloat4 error_weight,
-	// arrays to return results back through.
 	float best_error[21][4],
 	int format_of_choice[21][4]
 ) {
-	int partition_size = pt->partition_texel_count[partition_index];
+	int partition_size = pi.partition_texel_count[partition_index];
 
-	static const float baseline_quant_error[21] = {
+	static const float baseline_quant_error[21] {
 		(65536.0f * 65536.0f / 18.0f),				// 2 values, 1 step
 		(65536.0f * 65536.0f / 18.0f) / (2 * 2),	// 3 values, 2 steps
 		(65536.0f * 65536.0f / 18.0f) / (3 * 3),	// 4 values, 3 steps
@@ -70,8 +370,8 @@ static void compute_color_error_for_every_integer_count_and_quant_level(
 		(65536.0f * 65536.0f / 18.0f) / (255 * 255)
 	};
 
-	vfloat4 ep0 = ep->endpt0[partition_index];
-	vfloat4 ep1 = ep->endpt1[partition_index];
+	vfloat4 ep0 = ep.endpt0[partition_index];
+	vfloat4 ep1 = ep.endpt1[partition_index];
 
 	float ep1_min = hmin_rgb_s(ep1);
 	ep1_min = astc::max(ep1_min, 0.0f);
@@ -81,20 +381,14 @@ static void compute_color_error_for_every_integer_count_and_quant_level(
 	float range_upper_limit_rgb = encode_hdr_rgb ? 61440.0f : 65535.0f;
 	float range_upper_limit_alpha = encode_hdr_alpha ? 61440.0f : 65535.0f;
 
-	// it is possible to get endpoint colors significantly outside [0,upper-limit]
-	// even if the input data are safely contained in [0,upper-limit];
-	// we need to add an error term for this situation,
-	vfloat4 ep0_range_error_high;
-	vfloat4 ep1_range_error_high;
-	vfloat4 ep0_range_error_low;
-	vfloat4 ep1_range_error_low;
-
+	// It is possible to get endpoint colors significantly outside [0,upper-limit] even if the
+	// input data are safely contained in [0,upper-limit]; we need to add an error term for this
 	vfloat4 offset(range_upper_limit_rgb, range_upper_limit_rgb, range_upper_limit_rgb, range_upper_limit_alpha);
-	ep0_range_error_high = max(ep0 - offset, 0.0f);
-	ep1_range_error_high = max(ep1 - offset, 0.0f);
+	vfloat4 ep0_range_error_high = max(ep0 - offset, 0.0f);
+	vfloat4 ep1_range_error_high = max(ep1 - offset, 0.0f);
 
-	ep0_range_error_low = min(ep0, 0.0f);
-	ep1_range_error_low = min(ep1, 0.0f);
+	vfloat4 ep0_range_error_low = min(ep0, 0.0f);
+	vfloat4 ep1_range_error_low = min(ep1, 0.0f);
 
 	vfloat4 sum_range_error =
 		(ep0_range_error_low * ep0_range_error_low) +
@@ -102,8 +396,7 @@ static void compute_color_error_for_every_integer_count_and_quant_level(
 		(ep0_range_error_high * ep0_range_error_high) +
 		(ep1_range_error_high * ep1_range_error_high);
 
-	float rgb_range_error = dot3_s(sum_range_error.swz<0, 1, 2>(),
-	                               error_weight.swz<0, 1, 2>())
+	float rgb_range_error = dot3_s(sum_range_error, error_weight)
 	                      * 0.5f * static_cast<float>(partition_size);
 	float alpha_range_error = sum_range_error.lane<3>() * error_weight.lane<3>()
 	                        * 0.5f * static_cast<float>(partition_size);
@@ -111,7 +404,7 @@ static void compute_color_error_for_every_integer_count_and_quant_level(
 	if (encode_hdr_rgb)
 	{
 
-		// collect some statistics
+		// Collect some statistics
 		float af, cf;
 		if (ep1.lane<0>() > ep1.lane<1>() && ep1.lane<0>() > ep1.lane<2>())
 		{
@@ -129,18 +422,18 @@ static void compute_color_error_for_every_integer_count_and_quant_level(
 			cf = ep1.lane<2>() - ep0.lane<2>();
 		}
 
-		float bf = af - ep1_min;	// estimate of color-component spread in high endpoint color
+		// Estimate of color-component spread in high endpoint color
+		float bf = af - ep1_min;
 		vfloat4 prd = (ep1 - vfloat4(cf)).swz<0, 1, 2>();
 		vfloat4 pdif = prd - ep0.swz<0, 1, 2>();
-		// estimate of color-component spread in low endpoint color
+		// Estimate of color-component spread in low endpoint color
 		float df = hmax_s(abs(pdif));
 
 		int b = (int)bf;
 		int c = (int)cf;
 		int d = (int)df;
 
-		// determine which one of the 6 submodes is likely to be used in
-		// case of an RGBO-mode
+		// Determine which one of the 6 submodes is likely to be used in case of an RGBO-mode
 		int rgbo_mode = 5;		// 7 bits per component
 		// mode 4: 8 7 6
 		if (b < 32768 && c < 16384)
@@ -172,8 +465,7 @@ static void compute_color_error_for_every_integer_count_and_quant_level(
 			rgbo_mode = 0;
 		}
 
-		// determine which one of the 9 submodes is likely to be used in
-		// case of an RGB-mode.
+		// Determine which one of the 9 submodes is likely to be used in case of an RGB-mode.
 		int rgb_mode = 8;		// 8 bits per component, except 7 bits for blue
 
 		// mode 0: 9 7 6 7
@@ -227,8 +519,8 @@ static void compute_color_error_for_every_integer_count_and_quant_level(
 		static const float rgbo_error_scales[6] { 4.0f, 4.0f, 16.0f, 64.0f, 256.0f, 1024.0f };
 		static const float rgb_error_scales[9] { 64.0f, 64.0f, 16.0f, 16.0f, 4.0f, 4.0f, 1.0f, 1.0f, 384.0f };
 
-		float mode7mult = rgbo_error_scales[rgbo_mode] * 0.0015f;	// empirically determined ....
-		float mode11mult = rgb_error_scales[rgb_mode] * 0.010f;	// empirically determined ....
+		float mode7mult = rgbo_error_scales[rgbo_mode] * 0.0015f;  // Empirically determined ....
+		float mode11mult = rgb_error_scales[rgb_mode] * 0.010f;    // Empirically determined ....
 
 
 		float lum_high = hadd_rgb_s(ep1) * (1.0f / 3.0f);
@@ -236,51 +528,51 @@ static void compute_color_error_for_every_integer_count_and_quant_level(
 		float lumdif = lum_high - lum_low;
 		float mode23mult = lumdif < 960 ? 4.0f : lumdif < 3968 ? 16.0f : 128.0f;
 
-		mode23mult *= 0.0005f;	// empirically determined ....
+		mode23mult *= 0.0005f;  // Empirically determined ....
 
-		// pick among the available HDR endpoint modes
+		// Pick among the available HDR endpoint modes
 		for (int i = 0; i < 8; i++)
 		{
-			best_error[i][3] = 1e30f;
+			best_error[i][3] = ERROR_CALC_DEFAULT;
+			best_error[i][2] = ERROR_CALC_DEFAULT;
+			best_error[i][1] = ERROR_CALC_DEFAULT;
+			best_error[i][0] = ERROR_CALC_DEFAULT;
+
 			format_of_choice[i][3] = encode_hdr_alpha ? FMT_HDR_RGBA : FMT_HDR_RGB_LDR_ALPHA;
-			best_error[i][2] = 1e30f;
 			format_of_choice[i][2] = FMT_HDR_RGB;
-			best_error[i][1] = 1e30f;
 			format_of_choice[i][1] = FMT_HDR_RGB_SCALE;
-			best_error[i][0] = 1e30f;
 			format_of_choice[i][0] = FMT_HDR_LUMINANCE_LARGE_RANGE;
 		}
 
 		for (int i = 8; i < 21; i++)
 		{
-			// base_quant_error should depend on the scale-factor that would be used
-			// during actual encode of the color value.
+			// The base_quant_error should depend on the scale-factor that would be used during
+			// actual encode of the color value
 
 			float base_quant_error = baseline_quant_error[i] * static_cast<float>(partition_size);
 			float rgb_quantization_error = error_weight_rgbsum * base_quant_error * 2.0f;
 			float alpha_quantization_error = error_weight.lane<3>() * base_quant_error * 2.0f;
 			float rgba_quantization_error = rgb_quantization_error + alpha_quantization_error;
 
-			// for 8 integers, we have two encodings: one with HDR alpha and another one
-			// with LDR alpha.
+			// For 8 integers, we have two encodings: one with HDR A and another one with LDR A
 
 			float full_hdr_rgba_error = rgba_quantization_error + rgb_range_error + alpha_range_error;
 			best_error[i][3] = full_hdr_rgba_error;
 			format_of_choice[i][3] = encode_hdr_alpha ? FMT_HDR_RGBA : FMT_HDR_RGB_LDR_ALPHA;
 
-			// for 6 integers, we have one HDR-RGB encoding
-			float full_hdr_rgb_error = (rgb_quantization_error * mode11mult) + rgb_range_error + eci->alpha_drop_error;
+			// For 6 integers, we have one HDR-RGB encoding
+			float full_hdr_rgb_error = (rgb_quantization_error * mode11mult) + rgb_range_error + eci.alpha_drop_error;
 			best_error[i][2] = full_hdr_rgb_error;
 			format_of_choice[i][2] = FMT_HDR_RGB;
 
-			// for 4 integers, we have one HDR-RGB-Scale encoding
-			float hdr_rgb_scale_error = (rgb_quantization_error * mode7mult) + rgb_range_error + eci->alpha_drop_error + eci->rgb_luma_error;
+			// For 4 integers, we have one HDR-RGB-Scale encoding
+			float hdr_rgb_scale_error = (rgb_quantization_error * mode7mult) + rgb_range_error + eci.alpha_drop_error + eci.rgb_luma_error;
 
 			best_error[i][1] = hdr_rgb_scale_error;
 			format_of_choice[i][1] = FMT_HDR_RGB_SCALE;
 
-			// for 2 integers, we assume luminance-with-large-range
-			float hdr_luminance_error = (rgb_quantization_error * mode23mult) + rgb_range_error + eci->alpha_drop_error + eci->luminance_error;
+			// For 2 integers, we assume luminance-with-large-range
+			float hdr_luminance_error = (rgb_quantization_error * mode23mult) + rgb_range_error + eci.alpha_drop_error + eci.luminance_error;
 			best_error[i][0] = hdr_luminance_error;
 			format_of_choice[i][0] = FMT_HDR_LUMINANCE_LARGE_RANGE;
 		}
@@ -289,10 +581,10 @@ static void compute_color_error_for_every_integer_count_and_quant_level(
 	{
 		for (int i = 0; i < 4; i++)
 		{
-			best_error[i][3] = 1e30f;
-			best_error[i][2] = 1e30f;
-			best_error[i][1] = 1e30f;
-			best_error[i][0] = 1e30f;
+			best_error[i][3] = ERROR_CALC_DEFAULT;
+			best_error[i][2] = ERROR_CALC_DEFAULT;
+			best_error[i][1] = ERROR_CALC_DEFAULT;
+			best_error[i][0] = ERROR_CALC_DEFAULT;
 
 			format_of_choice[i][3] = FMT_RGBA;
 			format_of_choice[i][2] = FMT_RGB;
@@ -304,13 +596,13 @@ static void compute_color_error_for_every_integer_count_and_quant_level(
 		float base_quant_error_a = error_weight.lane<3>() * static_cast<float>(partition_size);
 		float base_quant_error_rgba = base_quant_error_rgb + base_quant_error_a;
 
-		float error_scale_bc_rgba = eci->can_blue_contract ? 0.625f : 1.0f;
-		float error_scale_oe_rgba = eci->can_offset_encode ? 0.5f : 1.0f;
+		float error_scale_bc_rgba = eci.can_blue_contract ? 0.625f : 1.0f;
+		float error_scale_oe_rgba = eci.can_offset_encode ? 0.5f : 1.0f;
 
-		float error_scale_bc_rgb = eci->can_blue_contract ? 0.5f : 1.0f;
-		float error_scale_oe_rgb = eci->can_offset_encode ? 0.25f : 1.0f;
+		float error_scale_bc_rgb = eci.can_blue_contract ? 0.5f : 1.0f;
+		float error_scale_oe_rgb = eci.can_offset_encode ? 0.25f : 1.0f;
 
-		// pick among the available LDR endpoint modes
+		// Pick among the available LDR endpoint modes
 		for (int i = 4; i < 21; i++)
 		{
 			// Offset encoding not possible at higher quant levels
@@ -339,10 +631,10 @@ static void compute_color_error_for_every_integer_count_and_quant_level(
 			                         * error_scale_bc_rgb
 			                         * error_scale_oe_rgb
 			                         + rgb_range_error
-			                         + eci->alpha_drop_error;
+			                         + eci.alpha_drop_error;
 
 			float rgbs_alpha_error = quant_error_rgba
-			                       + eci->rgb_scale_error
+			                       + eci.rgb_scale_error
 			                       + rgb_range_error
 			                       + alpha_range_error;
 
@@ -360,13 +652,13 @@ static void compute_color_error_for_every_integer_count_and_quant_level(
 			// 4 integers can encode as RGBS or LA+LA
 			float ldr_rgbs_error = quant_error_rgb
 			                     + rgb_range_error
-			                     + eci->alpha_drop_error
-			                     + eci->rgb_scale_error;
+			                     + eci.alpha_drop_error
+			                     + eci.rgb_scale_error;
 
 			float lum_alpha_error = quant_error_rgba
 			                      + rgb_range_error
 			                      + alpha_range_error
-			                      + eci->luminance_error;
+			                      + eci.luminance_error;
 
 			if (ldr_rgbs_error < lum_alpha_error)
 			{
@@ -382,8 +674,8 @@ static void compute_color_error_for_every_integer_count_and_quant_level(
 			// 2 integers can encode as L+L
 			float luminance_error = quant_error_rgb
 			                      + rgb_range_error
-			                      + eci->alpha_drop_error
-			                      + eci->luminance_error;
+			                      + eci.alpha_drop_error
+			                      + eci.luminance_error;
 
 			best_error[i][0] = luminance_error;
 			format_of_choice[i][0] = FMT_LUMINANCE;
@@ -391,60 +683,78 @@ static void compute_color_error_for_every_integer_count_and_quant_level(
 	}
 }
 
-// for 1 partition, find the best combination (one format + a quantization level) for a given bitcount
-static void one_partition_find_best_combination_for_bitcount(
-	float combined_best_error[21][4],
-	int formats_of_choice[21][4],
+/**
+ * @brief For one partition compute the best format and quantization for a given bit count.
+ *
+ * @param      best_combined_error    The best error for each quant level and integer count.
+ * @param      best_combined_format   The best format for each quant level and integer count.
+ * @param      bits_available         The number of bits available for encoding.
+ * @param[out] best_quant_level       The output best color quant level.
+ * @param[out] best_format            The output best color format.
+ *
+ * @return The output error for the best pairing.
+ */
+static float one_partition_find_best_combination_for_bitcount(
+	const float best_combined_error[21][4],
+	const int best_combined_format[21][4],
 	int bits_available,
-	int* best_quant_level,
-	int* best_formats,
-	float* error_of_best_combination
+	quant_method& best_quant_level,
+	int& best_format
 ) {
-	int best_integer_count = -1;
+	int best_integer_count = 0;
 	float best_integer_count_error = 1e20f;
-	for (int i = 0; i < 4; i++)
+
+	for (int integer_count = 1; integer_count <= 4;  integer_count++)
 	{
-		// compute the quantization level for a given number of integers and a given number of bits.
-		int quant_level = quant_mode_table[i + 1][bits_available];
+		// Compute the quantization level for a given number of integers and a given number of bits
+		int quant_level = quant_mode_table[integer_count][bits_available];
 
-		if (quant_level == -1)
+		// Don't have enough bits to represent a given endpoint format at all!
+		if (quant_level < 0)
 		{
-			continue;			// used to indicate the case where we don't have enough bits to represent a given endpoint format at all.
+			continue;
 		}
 
-		if (combined_best_error[quant_level][i] < best_integer_count_error)
+		float integer_count_error = best_combined_error[quant_level][integer_count - 1];
+		if (integer_count_error < best_integer_count_error)
 		{
-			best_integer_count_error = combined_best_error[quant_level][i];
-			best_integer_count = i;
+			best_integer_count_error = integer_count_error;
+			best_integer_count = integer_count - 1;
 		}
 	}
 
 	int ql = quant_mode_table[best_integer_count + 1][bits_available];
 
-	*best_quant_level = ql;
-	*error_of_best_combination = best_integer_count_error;
+	best_quant_level = (quant_method)ql;
+	best_format = FMT_LUMINANCE;
+
 	if (ql >= 0)
 	{
-		*best_formats = formats_of_choice[ql][best_integer_count];
-	}
-	else
-	{
-		*best_formats = FMT_LUMINANCE;
+		best_format = best_combined_format[ql][best_integer_count];
 	}
+
+	return best_integer_count_error;
 }
 
-// for 2 partitions, find the best format combinations for every (quantization-mode, integer-count) combination
+/**
+ * @brief For 2 partitions compute the best format combinations for every pair of quant mode and integer count.
+ *
+ * @param      best_error             The best error for a single endpoint quant level and integer count.
+ * @param      best_format            The best format for a single endpoint quant level and integer count.
+ * @param[out] best_combined_error    The best combined error pairings for the 2 partitions.
+ * @param[out] best_combined_format   The best combined format pairings for the 2 partitions.
+ */
 static void two_partitions_find_best_combination_for_every_quantization_and_integer_count(
-	float best_error[2][21][4],	// indexed by (partition, quant-level, integer-pair-count-minus-1)
-	int format_of_choice[2][21][4],
-	float combined_best_error[21][7],	// indexed by (quant-level, integer-pair-count-minus-2)
-	int formats_of_choice[21][7][2]
+	const float best_error[2][21][4],	// indexed by (partition, quant-level, integer-pair-count-minus-1)
+	const int best_format[2][21][4],
+	float best_combined_error[21][7],	// indexed by (quant-level, integer-pair-count-minus-2)
+	int best_combined_format[21][7][2]
 ) {
 	for (int i = 0; i < 21; i++)
 	{
 		for (int j = 0; j < 7; j++)
 		{
-			combined_best_error[i][j] = 1e30f;
+			best_combined_error[i][j] = ERROR_CALC_DEFAULT;
 		}
 	}
 
@@ -463,42 +773,52 @@ static void two_partitions_find_best_combination_for_every_quantization_and_inte
 
 				int intcnt = i + j;
 				float errorterm = astc::min(best_error[0][quant][i] + best_error[1][quant][j], 1e10f);
-				if (errorterm <= combined_best_error[quant][intcnt])
+				if (errorterm <= best_combined_error[quant][intcnt])
 				{
-					combined_best_error[quant][intcnt] = errorterm;
-					formats_of_choice[quant][intcnt][0] = format_of_choice[0][quant][i];
-					formats_of_choice[quant][intcnt][1] = format_of_choice[1][quant][j];
+					best_combined_error[quant][intcnt] = errorterm;
+					best_combined_format[quant][intcnt][0] = best_format[0][quant][i];
+					best_combined_format[quant][intcnt][1] = best_format[1][quant][j];
 				}
 			}
 		}
 	}
 }
 
-// for 2 partitions, find the best combination (two formats + a quantization level) for a given bitcount
-static void two_partitions_find_best_combination_for_bitcount(
-	float combined_best_error[21][7],
-	int formats_of_choice[21][7][2],
+/**
+ * @brief For 2 partitions compute the best format and quantization for a given bit count.
+ *
+ * @param      best_combined_error    The best error for each quant level and integer count.
+ * @param      best_combined_format   The best format for each quant level and integer count.
+ * @param      bits_available         The number of bits available for encoding.
+ * @param[out] best_quant_level       The output best color quant level.
+ * @param[out] best_quant_level_mod   The output best color quant level assuming two more bits are available.
+ * @param[out] best_formats           The output best color formats.
+ *
+ * @return The output error for the best pairing.
+ */
+static float two_partitions_find_best_combination_for_bitcount(
+	float best_combined_error[21][7],
+	int best_combined_format[21][7][2],
 	int bits_available,
-	int* best_quant_level,
-	int* best_quant_level_mod,
-	int* best_formats,
-	float* error_of_best_combination
+	quant_method& best_quant_level,
+	quant_method& best_quant_level_mod,
+	int* best_formats
 ) {
 	int best_integer_count = 0;
 	float best_integer_count_error = 1e20f;
 
 	for (int integer_count = 2; integer_count <= 8; integer_count++)
 	{
-		// compute the quantization level for a given number of integers and a given number of bits.
+		// Compute the quantization level for a given number of integers and a given number of bits
 		int quant_level = quant_mode_table[integer_count][bits_available];
 
-		if (quant_level == -1)
+		// Don't have enough bits to represent a given endpoint format at all!
+		if (quant_level < 0)
 		{
-			break;				// used to indicate the case where we don't have enough bits to represent a given endpoint format at all.
+			break;
 		}
 
-		float integer_count_error = combined_best_error[quant_level][integer_count - 2];
-
+		float integer_count_error = best_combined_error[quant_level][integer_count - 2];
 		if (integer_count_error < best_integer_count_error)
 		{
 			best_integer_count_error = integer_count_error;
@@ -509,14 +829,14 @@ static void two_partitions_find_best_combination_for_bitcount(
 	int ql = quant_mode_table[best_integer_count][bits_available];
 	int ql_mod = quant_mode_table[best_integer_count][bits_available + 2];
 
-	*best_quant_level = ql;
-	*best_quant_level_mod = ql_mod;
-	*error_of_best_combination = best_integer_count_error;
+	best_quant_level = (quant_method)ql;
+	best_quant_level_mod = (quant_method)ql_mod;
+
 	if (ql >= 0)
 	{
 		for (int i = 0; i < 2; i++)
 		{
-			best_formats[i] = formats_of_choice[ql][best_integer_count - 2][i];
+			best_formats[i] = best_combined_format[ql][best_integer_count - 2][i];
 		}
 	}
 	else
@@ -526,20 +846,29 @@ static void two_partitions_find_best_combination_for_bitcount(
 			best_formats[i] = FMT_LUMINANCE;
 		}
 	}
+
+	return best_integer_count_error;
 }
 
-// for 3 partitions, find the best format combinations for every (quantization-mode, integer-count) combination
+/**
+ * @brief For 3 partitions compute the best format combinations for every pair of quant mode and integer count.
+ *
+ * @param      best_error             The best error for a single endpoint quant level and integer count.
+ * @param      best_format            The best format for a single endpoint quant level and integer count.
+ * @param[out] best_combined_error    The best combined error pairings for the 3 partitions.
+ * @param[out] best_combined_format   The best combined format pairings for the 3 partitions.
+ */
 static void three_partitions_find_best_combination_for_every_quantization_and_integer_count(
-	float best_error[3][21][4],	// indexed by (partition, quant-level, integer-count)
-	int format_of_choice[3][21][4],
-	float combined_best_error[21][10],
-	int formats_of_choice[21][10][3]
+	const float best_error[3][21][4],	// indexed by (partition, quant-level, integer-count)
+	const int best_format[3][21][4],
+	float best_combined_error[21][10],
+	int best_combined_format[21][10][3]
 ) {
 	for (int i = 0; i < 21; i++)
 	{
 		for (int j = 0; j < 10; j++)
 		{
-			combined_best_error[i][j] = 1e30f;
+			best_combined_error[i][j] = ERROR_CALC_DEFAULT;
 		}
 	}
 
@@ -567,12 +896,12 @@ static void three_partitions_find_best_combination_for_every_quantization_and_in
 
 					int intcnt = i + j + k;
 					float errorterm = astc::min(best_error[0][quant][i] + best_error[1][quant][j] + best_error[2][quant][k], 1e10f);
-					if (errorterm <= combined_best_error[quant][intcnt])
+					if (errorterm <= best_combined_error[quant][intcnt])
 					{
-						combined_best_error[quant][intcnt] = errorterm;
-						formats_of_choice[quant][intcnt][0] = format_of_choice[0][quant][i];
-						formats_of_choice[quant][intcnt][1] = format_of_choice[1][quant][j];
-						formats_of_choice[quant][intcnt][2] = format_of_choice[2][quant][k];
+						best_combined_error[quant][intcnt] = errorterm;
+						best_combined_format[quant][intcnt][0] = best_format[0][quant][i];
+						best_combined_format[quant][intcnt][1] = best_format[1][quant][j];
+						best_combined_format[quant][intcnt][2] = best_format[2][quant][k];
 					}
 				}
 			}
@@ -580,31 +909,41 @@ static void three_partitions_find_best_combination_for_every_quantization_and_in
 	}
 }
 
-// for 3 partitions, find the best combination (three formats + a quantization level) for a given bitcount
-static void three_partitions_find_best_combination_for_bitcount(
-	float combined_best_error[21][10],
-	int formats_of_choice[21][10][3],
+/**
+ * @brief For 3 partitions compute the best format and quantization for a given bit count.
+ *
+ * @param      best_combined_error    The best error for each quant level and integer count.
+ * @param      best_combined_format   The best format for each quant level and integer count.
+ * @param      bits_available         The number of bits available for encoding.
+ * @param[out] best_quant_level       The output best color quant level.
+ * @param[out] best_quant_level_mod   The output best color quant level assuming two more bits are available.
+ * @param[out] best_formats           The output best color formats.
+ *
+ * @return The output error for the best pairing.
+ */
+static float three_partitions_find_best_combination_for_bitcount(
+	const float best_combined_error[21][10],
+	const int best_combined_format[21][10][3],
 	int bits_available,
-	int* best_quant_level,
-	int* best_quant_level_mod,
-	int* best_formats,
-	float* error_of_best_combination
+	quant_method& best_quant_level,
+	quant_method& best_quant_level_mod,
+	int* best_formats
 ) {
 	int best_integer_count = 0;
 	float best_integer_count_error = 1e20f;
 
 	for (int integer_count = 3; integer_count <= 9; integer_count++)
 	{
-		// compute the quantization level for a given number of integers and a given number of bits.
+		// Compute the quantization level for a given number of integers and a given number of bits
 		int quant_level = quant_mode_table[integer_count][bits_available];
 
-		if (quant_level == -1)
+		// Don't have enough bits to represent a given endpoint format at all!
+		if (quant_level < 0)
 		{
-			break;				// used to indicate the case where we don't have enough bits to represent a given endpoint format at all.
+			break;
 		}
 
-		float integer_count_error = combined_best_error[quant_level][integer_count - 3];
-
+		float integer_count_error = best_combined_error[quant_level][integer_count - 3];
 		if (integer_count_error < best_integer_count_error)
 		{
 			best_integer_count_error = integer_count_error;
@@ -615,14 +954,14 @@ static void three_partitions_find_best_combination_for_bitcount(
 	int ql = quant_mode_table[best_integer_count][bits_available];
 	int ql_mod = quant_mode_table[best_integer_count][bits_available + 5];
 
-	*best_quant_level = ql;
-	*best_quant_level_mod = ql_mod;
-	*error_of_best_combination = best_integer_count_error;
+	best_quant_level = (quant_method)ql;
+	best_quant_level_mod = (quant_method)ql_mod;
+
 	if (ql >= 0)
 	{
 		for (int i = 0; i < 3; i++)
 		{
-			best_formats[i] = formats_of_choice[ql][best_integer_count - 3][i];
+			best_formats[i] = best_combined_format[ql][best_integer_count - 3][i];
 		}
 	}
 	else
@@ -632,20 +971,29 @@ static void three_partitions_find_best_combination_for_bitcount(
 			best_formats[i] = FMT_LUMINANCE;
 		}
 	}
+
+	return best_integer_count_error;
 }
 
-// for 4 partitions, find the best format combinations for every (quantization-mode, integer-count) combination
+/**
+ * @brief For 4 partitions compute the best format combinations for every pair of quant mode and integer count.
+ *
+ * @param      best_error             The best error for a single endpoint quant level and integer count.
+ * @param      best_format            The best format for a single endpoint quant level and integer count.
+ * @param[out] best_combined_error    The best combined error pairings for the 4 partitions.
+ * @param[out] best_combined_format   The best combined format pairings for the 4 partitions.
+ */
 static void four_partitions_find_best_combination_for_every_quantization_and_integer_count(
-	float best_error[4][21][4],	// indexed by (partition, quant-level, integer-count)
-	int format_of_choice[4][21][4],
-	float combined_best_error[21][13],
-	int formats_of_choice[21][13][4]
+	const float best_error[4][21][4],	// indexed by (partition, quant-level, integer-count)
+	const int best_format[4][21][4],
+	float best_combined_error[21][13],
+	int best_combined_format[21][13][4]
 ) {
 	for (int i = 0; i < 21; i++)
 	{
 		for (int j = 0; j < 13; j++)
 		{
-			combined_best_error[i][j] = 1e30f;
+			best_combined_error[i][j] = ERROR_CALC_DEFAULT;
 		}
 	}
 
@@ -682,13 +1030,13 @@ static void four_partitions_find_best_combination_for_every_quantization_and_int
 
 						int intcnt = i + j + k + l;
 						float errorterm = astc::min(best_error[0][quant][i] + best_error[1][quant][j] + best_error[2][quant][k] + best_error[3][quant][l], 1e10f);
-						if (errorterm <= combined_best_error[quant][intcnt])
+						if (errorterm <= best_combined_error[quant][intcnt])
 						{
-							combined_best_error[quant][intcnt] = errorterm;
-							formats_of_choice[quant][intcnt][0] = format_of_choice[0][quant][i];
-							formats_of_choice[quant][intcnt][1] = format_of_choice[1][quant][j];
-							formats_of_choice[quant][intcnt][2] = format_of_choice[2][quant][k];
-							formats_of_choice[quant][intcnt][3] = format_of_choice[3][quant][l];
+							best_combined_error[quant][intcnt] = errorterm;
+							best_combined_format[quant][intcnt][0] = best_format[0][quant][i];
+							best_combined_format[quant][intcnt][1] = best_format[1][quant][j];
+							best_combined_format[quant][intcnt][2] = best_format[2][quant][k];
+							best_combined_format[quant][intcnt][3] = best_format[3][quant][l];
 						}
 					}
 				}
@@ -697,31 +1045,41 @@ static void four_partitions_find_best_combination_for_every_quantization_and_int
 	}
 }
 
-// for 4 partitions, find the best combination (four formats + a quantization level) for a given bitcount
-static void four_partitions_find_best_combination_for_bitcount(
-	float combined_best_error[21][13],
-	int formats_of_choice[21][13][4],
+/**
+ * @brief For 4 partitions compute the best format and quantization for a given bit count.
+ *
+ * @param      best_combined_error    The best error for each quant level and integer count.
+ * @param      best_combined_format   The best format for each quant level and integer count.
+ * @param      bits_available         The number of bits available for encoding.
+ * @param[out] best_quant_level       The output best color quant level.
+ * @param[out] best_quant_level_mod   The output best color quant level assuming two more bits are available.
+ * @param[out] best_formats           The output best color formats.
+ *
+ * @return best_error The output error for the best pairing.
+ */
+static float four_partitions_find_best_combination_for_bitcount(
+	const float best_combined_error[21][13],
+	const int best_combined_format[21][13][4],
 	int bits_available,
-	int* best_quant_level,
-	int* best_quant_level_mod,
-	int* best_formats,
-	float* error_of_best_combination
+	quant_method& best_quant_level,
+	quant_method& best_quant_level_mod,
+	int* best_formats
 ) {
 	int best_integer_count = 0;
 	float best_integer_count_error = 1e20f;
 
 	for (int integer_count = 4; integer_count <= 9; integer_count++)
 	{
-		// compute the quantization level for a given number of integers and a given number of bits.
+		// Compute the quantization level for a given number of integers and a given number of bits
 		int quant_level = quant_mode_table[integer_count][bits_available];
 
-		if (quant_level == -1)
+		// Don't have enough bits to represent a given endpoint format at all!
+		if (quant_level < 0)
 		{
-			break;				// used to indicate the case where we don't have enough bits to represent a given endpoint format at all.
+			break;
 		}
 
-		float integer_count_error = combined_best_error[quant_level][integer_count - 4];
-
+		float integer_count_error = best_combined_error[quant_level][integer_count - 4];
 		if (integer_count_error < best_integer_count_error)
 		{
 			best_integer_count_error = integer_count_error;
@@ -732,14 +1090,14 @@ static void four_partitions_find_best_combination_for_bitcount(
 	int ql = quant_mode_table[best_integer_count][bits_available];
 	int ql_mod = quant_mode_table[best_integer_count][bits_available + 8];
 
-	*best_quant_level = ql;
-	*best_quant_level_mod = ql_mod;
-	*error_of_best_combination = best_integer_count_error;
+	best_quant_level = (quant_method)ql;
+	best_quant_level_mod = (quant_method)ql_mod;
+
 	if (ql >= 0)
 	{
 		for (int i = 0; i < 4; i++)
 		{
-			best_formats[i] = formats_of_choice[ql][best_integer_count - 4][i];
+			best_formats[i] = best_combined_format[ql][best_integer_count - 4][i];
 		}
 	}
 	else
@@ -749,112 +1107,97 @@ static void four_partitions_find_best_combination_for_bitcount(
 			best_formats[i] = FMT_LUMINANCE;
 		}
 	}
+
+	return best_integer_count_error;
 }
 
-/*
-	The determine_optimal_set_of_endpoint_formats_to_use() function.
-
-	It identifies, for each mode, which set of color endpoint encodings
-	produces the best overall result. It then reports back which
-	tune_candidate_limit,  modes look best, along with the ideal color encoding
-	combination for each.
-
-	It takes as input:
-		a partitioning an imageblock,
-		a set of color endpoints.
-		for each mode, the number of bits available for color encoding and the error incurred by quantization.
-		in case of 2 plane of weights, a specifier for which color component to use for the second plane of weights.
-
-	It delivers as output for each of the tune_candidate_limit selected modes:
-		format specifier
-		for each partition
-			quantization level to use
-			modified quantization level to use
-		(when all format specifiers are equal)
-*/
-void determine_optimal_set_of_endpoint_formats_to_use(
-	const block_size_descriptor* bsd,
-	const partition_info* pt,
-	const imageblock* blk,
-	const error_weight_block* ewb,
-	const endpoints* ep,
-	int separate_component,	// separate color component for 2-plane mode; -1 for single-plane mode
+/* See header for documentation. */
+unsigned int compute_ideal_endpoint_formats(
+	const block_size_descriptor& bsd,
+	const partition_info& pi,
+	const image_block& blk,
+	const endpoints& ep,
 	 // bitcounts and errors computed for the various quantization methods
 	const int* qwt_bitcounts,
 	const float* qwt_errors,
-	int tune_candidate_limit,
+	unsigned int tune_candidate_limit,
+	unsigned int block_mode_count,
 	// output data
-	int partition_format_specifiers[TUNE_MAX_TRIAL_CANDIDATES][4],
-	int quantized_weight[TUNE_MAX_TRIAL_CANDIDATES],
-	int quant_level[TUNE_MAX_TRIAL_CANDIDATES],
-	int quant_level_mod[TUNE_MAX_TRIAL_CANDIDATES]
+	int partition_format_specifiers[TUNE_MAX_TRIAL_CANDIDATES][BLOCK_MAX_PARTITIONS],
+	int block_mode[TUNE_MAX_TRIAL_CANDIDATES],
+	quant_method quant_level[TUNE_MAX_TRIAL_CANDIDATES],
+	quant_method quant_level_mod[TUNE_MAX_TRIAL_CANDIDATES],
+	compression_working_buffers& tmpbuf
 ) {
-	int partition_count = pt->partition_count;
-
-	int encode_hdr_rgb = blk->rgb_lns[0];
-	int encode_hdr_alpha = blk->alpha_lns[0];
+	int partition_count = pi.partition_count;
 
-	// call a helper function to compute the errors that result from various
-	// encoding choices (such as using luminance instead of RGB, discarding Alpha,
-	// using RGB-scale in place of two separate RGB endpoints and so on)
-	encoding_choice_errors eci[4];
-	compute_encoding_choice_errors(bsd, blk, pt, ewb, separate_component, eci);
+	promise(partition_count > 0);
+	promise(block_mode_count > 0);
 
-	// for each partition, compute the error weights to apply for that partition.
-	partition_metrics pms[4];
+	int encode_hdr_rgb = blk.rgb_lns[0];
+	int encode_hdr_alpha = blk.alpha_lns[0];
 
-	compute_partition_error_color_weightings(*ewb, *pt, pms);
+	// Compute the errors that result from various encoding choices (such as using luminance instead
+	// of RGB, discarding Alpha, using RGB-scale in place of two separate RGB endpoints and so on)
+	encoding_choice_errors eci[BLOCK_MAX_PARTITIONS];
+	compute_encoding_choice_errors(bsd, blk, pi, ep, eci);
 
-	float best_error[4][21][4];
-	int format_of_choice[4][21][4];
+	float best_error[BLOCK_MAX_PARTITIONS][21][4];
+	int format_of_choice[BLOCK_MAX_PARTITIONS][21][4];
 	for (int i = 0; i < partition_count; i++)
 	{
 		compute_color_error_for_every_integer_count_and_quant_level(
 		    encode_hdr_rgb, encode_hdr_alpha, i,
-		    pt, &(eci[i]), ep, pms[i].error_weight, best_error[i],
+		    pi, eci[i], ep, blk.channel_weight, best_error[i],
 		    format_of_choice[i]);
 	}
 
-	alignas(ASTCENC_VECALIGN) float errors_of_best_combination[MAX_WEIGHT_MODES];
-	alignas(ASTCENC_VECALIGN) int best_quant_levels[MAX_WEIGHT_MODES];
-	int best_quant_levels_mod[MAX_WEIGHT_MODES];
-	int best_ep_formats[MAX_WEIGHT_MODES][4];
-
-#if ASTCENC_SIMD_WIDTH > 1
-	// have to ensure that the "overstep" of the last iteration in the vectorized
-	// loop will contain data that will never be picked as best candidate
-	const int packed_mode_count = bsd->block_mode_count;
-	const int packed_mode_count_simd_up = round_up_to_simd_multiple_vla(packed_mode_count);
-	for (int i = packed_mode_count; i < packed_mode_count_simd_up; ++i)
+	float* errors_of_best_combination = tmpbuf.errors_of_best_combination;
+	quant_method* best_quant_levels = tmpbuf.best_quant_levels;
+	quant_method* best_quant_levels_mod = tmpbuf.best_quant_levels_mod;
+	int (&best_ep_formats)[WEIGHTS_MAX_BLOCK_MODES][BLOCK_MAX_PARTITIONS] = tmpbuf.best_ep_formats;
+
+	// Ensure that the "overstep" of the last iteration in the vectorized loop will contain data
+	// that will never be picked as best candidate
+	const int packed_mode_count_simd_up = round_up_to_simd_multiple_vla(block_mode_count);
+	for (int i = block_mode_count; i < packed_mode_count_simd_up; i++)
 	{
-		errors_of_best_combination[i] = 1e30f;
-		best_quant_levels[i] = 0;
-		best_quant_levels_mod[i] = 0;
+		errors_of_best_combination[i] = ERROR_CALC_DEFAULT;
+		best_quant_levels[i] = QUANT_2;
+		best_quant_levels_mod[i] = QUANT_2;
 	}
-#endif // #if ASTCENC_SIMD_WIDTH > 1
 
-	// code for the case where the block contains 1 partition
+	// Track a scalar best to avoid expensive search at least once ...
+	float error_of_best_combination = ERROR_CALC_DEFAULT;
+	int index_of_best_combination = -1;
+
+	// The block contains 1 partition
 	if (partition_count == 1)
 	{
-		float error_of_best_combination;
-		for (int i = 0; i < bsd->block_mode_count; ++i)
+		for (unsigned int i = 0; i < block_mode_count; ++i)
 		{
-			if (qwt_errors[i] >= 1e29f)
+			if (qwt_errors[i] >= ERROR_CALC_DEFAULT)
 			{
-				errors_of_best_combination[i] = 1e30f;
+				errors_of_best_combination[i] = ERROR_CALC_DEFAULT;
 				continue;
 			}
 
-			one_partition_find_best_combination_for_bitcount(
+			float error_of_best = one_partition_find_best_combination_for_bitcount(
 			    best_error[0], format_of_choice[0], qwt_bitcounts[i],
-			    best_quant_levels + i, best_ep_formats[i], &error_of_best_combination);
-			error_of_best_combination += qwt_errors[i];
+			    best_quant_levels[i], best_ep_formats[i][0]);
 
-			errors_of_best_combination[i] = error_of_best_combination;
+			float total_error = error_of_best + qwt_errors[i];
+			errors_of_best_combination[i] = total_error;
 			best_quant_levels_mod[i] = best_quant_levels[i];
+
+			if (total_error < error_of_best_combination)
+			{
+				error_of_best_combination = total_error;
+				index_of_best_combination = i;
+			}
 		}
 	}
-	// code for the case where the block contains 2 partitions
+	// The block contains 2 partitions
 	else if (partition_count == 2)
 	{
 		float combined_best_error[21][7];
@@ -863,25 +1206,30 @@ void determine_optimal_set_of_endpoint_formats_to_use(
 		two_partitions_find_best_combination_for_every_quantization_and_integer_count(
 		    best_error, format_of_choice, combined_best_error, formats_of_choice);
 
-
-		for (int i = 0; i < bsd->block_mode_count; ++i)
+		for (unsigned int i = 0; i < block_mode_count; ++i)
 		{
-			if (qwt_errors[i] >= 1e29f)
+			if (qwt_errors[i] >= ERROR_CALC_DEFAULT)
 			{
-				errors_of_best_combination[i] = 1e30f;
+				errors_of_best_combination[i] = ERROR_CALC_DEFAULT;
 				continue;
 			}
 
-			float error_of_best_combination;
-			two_partitions_find_best_combination_for_bitcount(
+			float error_of_best = two_partitions_find_best_combination_for_bitcount(
 			    combined_best_error, formats_of_choice, qwt_bitcounts[i],
-			    best_quant_levels + i, best_quant_levels_mod + i,
-			    best_ep_formats[i], &error_of_best_combination);
+			    best_quant_levels[i], best_quant_levels_mod[i],
+			    best_ep_formats[i]);
+
+			float total_error = error_of_best + qwt_errors[i];
+			errors_of_best_combination[i] = total_error;
 
-			errors_of_best_combination[i] = error_of_best_combination + qwt_errors[i];
+			if (total_error < error_of_best_combination)
+			{
+				error_of_best_combination = total_error;
+				index_of_best_combination = i;
+			}
 		}
 	}
-	// code for the case where the block contains 3 partitions
+	// The block contains 3 partitions
 	else if (partition_count == 3)
 	{
 		float combined_best_error[21][10];
@@ -890,72 +1238,90 @@ void determine_optimal_set_of_endpoint_formats_to_use(
 		three_partitions_find_best_combination_for_every_quantization_and_integer_count(
 		    best_error, format_of_choice, combined_best_error, formats_of_choice);
 
-		for (int i = 0; i < bsd->block_mode_count; ++i)
+		for (unsigned int i = 0; i < block_mode_count; ++i)
 		{
-			if (qwt_errors[i] >= 1e29f)
+			if (qwt_errors[i] >= ERROR_CALC_DEFAULT)
 			{
-				errors_of_best_combination[i] = 1e30f;
+				errors_of_best_combination[i] = ERROR_CALC_DEFAULT;
 				continue;
 			}
 
-			float error_of_best_combination;
-			three_partitions_find_best_combination_for_bitcount(
+			float error_of_best = three_partitions_find_best_combination_for_bitcount(
 			    combined_best_error, formats_of_choice, qwt_bitcounts[i],
-			    best_quant_levels + i, best_quant_levels_mod + i,
-			    best_ep_formats[i], &error_of_best_combination);
+			    best_quant_levels[i], best_quant_levels_mod[i],
+			    best_ep_formats[i]);
+
+			float total_error = error_of_best + qwt_errors[i];
+			errors_of_best_combination[i] = total_error;
 
-			errors_of_best_combination[i] = error_of_best_combination + qwt_errors[i];
+			if (total_error < error_of_best_combination)
+			{
+				error_of_best_combination = total_error;
+				index_of_best_combination = i;
+			}
 		}
 	}
-	// code for the case where the block contains 4 partitions
-	else if (partition_count == 4)
+	// The block contains 4 partitions
+	else // if (partition_count == 4)
 	{
+		assert(partition_count == 4);
 		float combined_best_error[21][13];
 		int formats_of_choice[21][13][4];
 
 		four_partitions_find_best_combination_for_every_quantization_and_integer_count(
 		    best_error, format_of_choice, combined_best_error, formats_of_choice);
 
-		for (int i = 0; i < bsd->block_mode_count; ++i)
+		for (unsigned int i = 0; i < block_mode_count; ++i)
 		{
-			if (qwt_errors[i] >= 1e29f)
+			if (qwt_errors[i] >= ERROR_CALC_DEFAULT)
 			{
-				errors_of_best_combination[i] = 1e30f;
+				errors_of_best_combination[i] = ERROR_CALC_DEFAULT;
 				continue;
 			}
 
-			float error_of_best_combination;
-			four_partitions_find_best_combination_for_bitcount(
+			float error_of_best = four_partitions_find_best_combination_for_bitcount(
 			    combined_best_error, formats_of_choice, qwt_bitcounts[i],
-			    best_quant_levels + i, best_quant_levels_mod + i,
-			    best_ep_formats[i], &error_of_best_combination);
+			    best_quant_levels[i], best_quant_levels_mod[i],
+			    best_ep_formats[i]);
 
-			errors_of_best_combination[i] = error_of_best_combination + qwt_errors[i];
+			float total_error = error_of_best + qwt_errors[i];
+			errors_of_best_combination[i] = total_error;
+
+			if (total_error < error_of_best_combination)
+			{
+				error_of_best_combination = total_error;
+				index_of_best_combination = i;
+			}
 		}
 	}
 
-	// Go through the results and pick the best candidate modes
 	int best_error_weights[TUNE_MAX_TRIAL_CANDIDATES];
-	static_assert((MAX_WEIGHT_MODES % ASTCENC_SIMD_WIDTH) == 0,
-	              "MAX_WEIGHT_MODES should be multiple of ASTCENC_SIMD_WIDTH");
-	for (int i = 0; i < tune_candidate_limit; i++)
+
+	// Fast path the first result and avoid the list search for trial 0
+	best_error_weights[0] = index_of_best_combination;
+	if (index_of_best_combination >= 0)
+	{
+		errors_of_best_combination[index_of_best_combination] = ERROR_CALC_DEFAULT;
+	}
+
+	// Search the remaining results and pick the best candidate modes for trial 1+
+	for (unsigned int i = 1; i < tune_candidate_limit; i++)
 	{
 		vint vbest_error_index(-1);
-		vfloat vbest_ep_error(1e30f);
+		vfloat vbest_ep_error(ERROR_CALC_DEFAULT);
 		vint lane_ids = vint::lane_id();
-		for (int j = 0; j < bsd->block_mode_count; j += ASTCENC_SIMD_WIDTH)
+		for (unsigned int j = 0; j < block_mode_count; j += ASTCENC_SIMD_WIDTH)
 		{
 			vfloat err = vfloat(&errors_of_best_combination[j]);
 			vmask mask1 = err < vbest_ep_error;
-			vmask mask2 = vint(&best_quant_levels[j]) > vint(4);
+			vmask mask2 = vint((int*)(&best_quant_levels[j])) > vint(4);
 			vmask mask = mask1 & mask2;
 			vbest_ep_error = select(vbest_ep_error, err, mask);
 			vbest_error_index = select(vbest_error_index, lane_ids, mask);
-			lane_ids = lane_ids + vint(ASTCENC_SIMD_WIDTH);
+			lane_ids += vint(ASTCENC_SIMD_WIDTH);
 		}
 
-		// Pick best mode from the SIMD result. If multiple SIMD lanes have
-		// the best score, pick the one with the lowest index.
+		// Pick best mode from the SIMD result, using lowest matching index to ensure invariance
 		vmask lanes_min_error = vbest_ep_error == hmin(vbest_ep_error);
 		vbest_error_index = select(vint(0x7FFFFFFF), vbest_error_index, lanes_min_error);
 		vbest_error_index = hmin(vbest_error_index);
@@ -966,24 +1332,33 @@ void determine_optimal_set_of_endpoint_formats_to_use(
 		// Max the error for this candidate so we don't pick it again
 		if (best_error_index >= 0)
 		{
-			errors_of_best_combination[best_error_index] = 1e30f;
+			errors_of_best_combination[best_error_index] = ERROR_CALC_DEFAULT;
+		}
+		// Early-out if no more candidates are valid
+		else
+		{
+			break;
 		}
 	}
 
-	for (int i = 0; i < tune_candidate_limit; i++)
+	for (unsigned int i = 0; i < tune_candidate_limit; i++)
 	{
-		quantized_weight[i] = best_error_weights[i];
-		if (quantized_weight[i] >= 0)
+		if (best_error_weights[i] < 0)
 		{
-			quant_level[i] = best_quant_levels[best_error_weights[i]];
-			assert(quant_level[i] >= 0 && quant_level[i] < 21);
-			quant_level_mod[i] = best_quant_levels_mod[best_error_weights[i]];
-			for (int j = 0; j < partition_count; j++)
-			{
-				partition_format_specifiers[i][j] = best_ep_formats[best_error_weights[i]][j];
-			}
+			return i;
+		}
+
+		block_mode[i] = best_error_weights[i];
+		quant_level[i] = best_quant_levels[best_error_weights[i]];
+		assert(quant_level[i] >= 0 && quant_level[i] < 21);
+		quant_level_mod[i] = best_quant_levels_mod[best_error_weights[i]];
+		for (int j = 0; j < partition_count; j++)
+		{
+			partition_format_specifiers[i][j] = best_ep_formats[best_error_weights[i]][j];
 		}
 	}
+
+	return tune_candidate_limit;
 }
 
 #endif
diff --git a/libkram/astc-encoder/astcenc_platform_isa_detection.cpp b/libkram/astc-encoder/astcenc_platform_isa_detection.cpp
index 3766aa51..4158da31 100644
--- a/libkram/astc-encoder/astcenc_platform_isa_detection.cpp
+++ b/libkram/astc-encoder/astcenc_platform_isa_detection.cpp
@@ -15,29 +15,44 @@
 // under the License.
 // ----------------------------------------------------------------------------
 
-
 /**
  * @brief Platform-specific function implementations.
  *
  * This module contains functions for querying the host extended ISA support.
  */
 
+// Include before the defines below to pick up any auto-setup based on compiler
+// built-in config, if not being set explicitly by the build system
 #include "astcenc_internal.h"
 
 #if (ASTCENC_SSE > 0)    || (ASTCENC_AVX > 0) || \
     (ASTCENC_POPCNT > 0) || (ASTCENC_F16C > 0)
 
-static int g_cpu_has_sse41 = -1;
-static int g_cpu_has_avx2 = -1;
-static int g_cpu_has_popcnt = -1;
-static int g_cpu_has_f16c = -1;
+static bool g_init { false };
+
+/** Does this CPU support SSE 4.1? Set to -1 if not yet initialized. */
+static bool g_cpu_has_sse41 { false };
+
+/** Does this CPU support AVX2? Set to -1 if not yet initialized. */
+static bool g_cpu_has_avx2 { false };
+
+/** Does this CPU support POPCNT? Set to -1 if not yet initialized. */
+static bool g_cpu_has_popcnt { false };
+
+/** Does this CPU support F16C? Set to -1 if not yet initialized. */
+static bool g_cpu_has_f16c { false };
 
 /* ============================================================================
    Platform code for Visual Studio
 ============================================================================ */
 #if !defined(__clang__) && defined(_MSC_VER)
+#define WIN32_LEAN_AND_MEAN
+#include <Windows.h>
 #include <intrin.h>
 
+/**
+ * @brief Detect platform CPU ISA support and update global trackers.
+ */
 static void detect_cpu_isa()
 {
 	int data[4];
@@ -45,27 +60,27 @@ static void detect_cpu_isa()
 	__cpuid(data, 0);
 	int num_id = data[0];
 
-	g_cpu_has_sse41 = 0;
-	g_cpu_has_popcnt = 0;
-	g_cpu_has_f16c = 0;
 	if (num_id >= 1)
 	{
 		__cpuidex(data, 1, 0);
 		// SSE41 = Bank 1, ECX, bit 19
-		g_cpu_has_sse41 = data[2] & (1 << 19) ? 1 : 0;
+		g_cpu_has_sse41 = data[2] & (1 << 19) ? true : false;
 		// POPCNT = Bank 1, ECX, bit 23
-		g_cpu_has_popcnt = data[2] & (1 << 23) ? 1 : 0;
+		g_cpu_has_popcnt = data[2] & (1 << 23) ? true : false;
 		// F16C = Bank 1, ECX, bit 29
-		g_cpu_has_f16c = data[2] & (1 << 29) ? 1 : 0;
+		g_cpu_has_f16c = data[2] & (1 << 29) ? true : false;
 	}
 
-	g_cpu_has_avx2 = 0;
 	if (num_id >= 7)
 	{
 		__cpuidex(data, 7, 0);
 		// AVX2 = Bank 7, EBX, bit 5
-		g_cpu_has_avx2 = data[1] & (1 << 5) ? 1 : 0;
+		g_cpu_has_avx2 = data[1] & (1 << 5) ? true : false;
 	}
+
+	// Ensure state bits are updated before init flag is updated
+	MemoryBarrier();
+	g_init = true;
 }
 
 /* ============================================================================
@@ -74,69 +89,73 @@ static void detect_cpu_isa()
 #else
 #include <cpuid.h>
 
+/**
+ * @brief Detect platform CPU ISA support and update global trackers.
+ */
 static void detect_cpu_isa()
 {
 	unsigned int data[4];
 
-	g_cpu_has_sse41 = 0;
-	g_cpu_has_popcnt = 0;
-	g_cpu_has_f16c = 0;
 	if (__get_cpuid_count(1, 0, &data[0], &data[1], &data[2], &data[3]))
 	{
 		// SSE41 = Bank 1, ECX, bit 19
-		g_cpu_has_sse41 = data[2] & (1 << 19) ? 1 : 0;
+		g_cpu_has_sse41 = data[2] & (1 << 19) ? true : false;
 		// POPCNT = Bank 1, ECX, bit 23
-		g_cpu_has_popcnt = data[2] & (1 << 23) ? 1 : 0;
+		g_cpu_has_popcnt = data[2] & (1 << 23) ? true : false;
 		// F16C = Bank 1, ECX, bit 29
-		g_cpu_has_f16c = data[2] & (1 << 29) ? 1 : 0;
+		g_cpu_has_f16c = data[2] & (1 << 29) ? true : false;
 	}
 
 	g_cpu_has_avx2 = 0;
 	if (__get_cpuid_count(7, 0, &data[0], &data[1], &data[2], &data[3]))
 	{
 		// AVX2 = Bank 7, EBX, bit 5
-		g_cpu_has_avx2 = data[1] & (1 << 5) ? 1 : 0;
+		g_cpu_has_avx2 = data[1] & (1 << 5) ? true : false;
 	}
+
+	// Ensure state bits are updated before init flag is updated
+	__sync_synchronize();
+	g_init = true;
 }
 #endif
 
-/* Public function, see header file for detailed documentation */
-int cpu_supports_sse41()
+/* See header for documentation. */
+bool cpu_supports_popcnt()
 {
-	if (g_cpu_has_sse41 == -1)
+	if (!g_init)
 	{
 		detect_cpu_isa();
 	}
 
-	return g_cpu_has_sse41;
+	return g_cpu_has_popcnt;
 }
 
-/* Public function, see header file for detailed documentation */
-int cpu_supports_popcnt()
+/* See header for documentation. */
+bool cpu_supports_f16c()
 {
-	if (g_cpu_has_popcnt == -1)
+	if (!g_init)
 	{
 		detect_cpu_isa();
 	}
 
-	return g_cpu_has_popcnt;
+	return g_cpu_has_f16c;
 }
 
-/* Public function, see header file for detailed documentation */
-int cpu_supports_f16c()
+/* See header for documentation. */
+bool cpu_supports_sse41()
 {
-	if (g_cpu_has_f16c == -1)
+	if (!g_init)
 	{
 		detect_cpu_isa();
 	}
 
-	return g_cpu_has_f16c;
+	return g_cpu_has_sse41;
 }
 
-/* Public function, see header file for detailed documentation */
-int cpu_supports_avx2()
+/* See header for documentation. */
+bool cpu_supports_avx2()
 {
-	if (g_cpu_has_avx2 == -1)
+	if (!g_init)
 	{
 		detect_cpu_isa();
 	}
diff --git a/libkram/astc-encoder/astcenc_quantization.cpp b/libkram/astc-encoder/astcenc_quantization.cpp
index afc10160..2d48abff 100644
--- a/libkram/astc-encoder/astcenc_quantization.cpp
+++ b/libkram/astc-encoder/astcenc_quantization.cpp
@@ -23,7 +23,7 @@
 
 #if !defined(ASTCENC_DECOMPRESS_ONLY)
 
-const uint8_t color_quant_tables[21][256] = {
+const uint8_t color_quant_tables[21][256] {
 	{
 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -406,7 +406,7 @@ const uint8_t color_quant_tables[21][256] = {
 
 #endif
 
-const uint8_t color_unquant_tables[21][256] = {
+const uint8_t color_unquant_tables[21][256] {
 	{
 		0, 255
 	},
@@ -533,46 +533,177 @@ const uint8_t color_unquant_tables[21][256] = {
 	}
 };
 
-// The quant_mode_table[integercount/2][bits] gives us the quantization
-// level for a given integer count and number of bits that the integer may fit
-// into. This is needed for color decoding, and for the color encoding.
-int8_t quant_mode_table[17][128];
-
-void build_quant_mode_table()
-{
-	for (int i = 0; i <= 16; i++)
-	{
-		for (int j = 0; j < 128; j++)
-		{
-			quant_mode_table[i][j] = -1;
-		}
-	}
-
-	for (int i = 0; i < 21; i++)
-	{
-		for (int j = 1; j <= 16; j++)
-		{
-			int p = get_ise_sequence_bitcount(2 * j, (quant_method)i);
-			if (p < 128)
-			{
-				quant_mode_table[j][p] = i;
-			}
-		}
-	}
-
-	for (int i = 0; i <= 16; i++)
-	{
-		int largest_value_so_far = -1;
-		for (int j = 0; j < 128; j++)
-		{
-			if (quant_mode_table[i][j] > largest_value_so_far)
-			{
-				largest_value_so_far = quant_mode_table[i][j];
-			}
-			else
-			{
-				quant_mode_table[i][j] = largest_value_so_far;
-			}
-		}
-	}
-}
+// The quant_mode_table[integercount/2][bits] gives us the quantization level for a given integer
+// count and number of bits that the integer may fit into.
+const int8_t quant_mode_table[17][128] {
+    {
+         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
+    },
+    {
+         -1, -1,  0,  0,  2,  3,  5,  6,  8,  9, 11, 12, 14, 15, 17, 18,
+         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
+    },
+    {
+         -1, -1, -1, -1,  0,  0,  0,  1,  2,  2,  3,  4,  5,  5,  6,  7,
+          8,  8,  9, 10, 11, 11, 12, 13, 14, 14, 15, 16, 17, 17, 18, 19,
+         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
+    },
+    {
+         -1, -1, -1, -1, -1, -1,  0,  0,  0,  0,  1,  1,  2,  2,  3,  3,
+          4,  4,  5,  5,  6,  6,  7,  7,  8,  8,  9,  9, 10, 10, 11, 11,
+         12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19,
+         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
+    },
+    {
+         -1, -1, -1, -1, -1, -1, -1, -1,  0,  0,  0,  0,  0,  1,  1,  1,
+          2,  2,  2,  3,  3,  4,  4,  4,  5,  5,  5,  6,  6,  7,  7,  7,
+          8,  8,  8,  9,  9, 10, 10, 10, 11, 11, 11, 12, 12, 13, 13, 13,
+         14, 14, 14, 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 19, 19, 19,
+         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
+    },
+    {
+         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  0,  0,  0,  0,  0,  0,
+          1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  4,  4,  4,  4,  5,  5,
+          5,  5,  6,  6,  7,  7,  7,  7,  8,  8,  8,  8,  9,  9, 10, 10,
+         10, 10, 11, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14,
+         15, 15, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 19, 19, 19, 19,
+         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
+    },
+    {
+         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  0,  0,  0,  0,
+          0,  0,  0,  0,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  3,
+          4,  4,  4,  4,  5,  5,  5,  5,  6,  6,  6,  6,  7,  7,  7,  7,
+          8,  8,  8,  8,  9,  9,  9,  9, 10, 10, 10, 10, 11, 11, 11, 11,
+         12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15,
+         16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19,
+         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
+    },
+    {
+         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  0,  0,
+          0,  0,  0,  0,  0,  0,  0,  1,  1,  1,  1,  1,  2,  2,  2,  2,
+          2,  3,  3,  3,  3,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  6,
+          6,  6,  6,  7,  7,  7,  7,  7,  8,  8,  8,  8,  8,  9,  9,  9,
+          9, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 12, 12, 12, 12, 13,
+         13, 13, 13, 13, 14, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16,
+         16, 16, 17, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, 19,
+         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
+    },
+    {
+         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  1,  1,  1,  1,
+          2,  2,  2,  2,  2,  2,  3,  3,  3,  3,  4,  4,  4,  4,  4,  4,
+          5,  5,  5,  5,  5,  5,  6,  6,  6,  6,  7,  7,  7,  7,  7,  7,
+          8,  8,  8,  8,  8,  8,  9,  9,  9,  9, 10, 10, 10, 10, 10, 10,
+         11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13,
+         14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16,
+         17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19
+    },
+    {
+         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+         -1, -1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  1,
+          1,  1,  1,  1,  2,  2,  2,  2,  2,  2,  3,  3,  3,  3,  3,  4,
+          4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  6,  6,  6,  6,
+          6,  7,  7,  7,  7,  7,  7,  7,  8,  8,  8,  8,  8,  8,  9,  9,
+          9,  9,  9, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11,
+         12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14,
+         14, 14, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 17, 17
+    },
+    {
+         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+         -1, -1, -1, -1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+          1,  1,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  2,  2,  3,
+          3,  3,  3,  3,  4,  4,  4,  4,  4,  4,  4,  4,  5,  5,  5,  5,
+          5,  5,  5,  6,  6,  6,  6,  6,  7,  7,  7,  7,  7,  7,  7,  7,
+          8,  8,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9, 10, 10, 10, 10,
+         10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12,
+         13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 15
+    },
+    {
+         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+         -1, -1, -1, -1, -1, -1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+          0,  0,  0,  0,  1,  1,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,
+          2,  2,  2,  2,  3,  3,  3,  3,  3,  3,  4,  4,  4,  4,  4,  4,
+          4,  4,  5,  5,  5,  5,  5,  5,  5,  5,  6,  6,  6,  6,  6,  6,
+          7,  7,  7,  7,  7,  7,  7,  7,  8,  8,  8,  8,  8,  8,  8,  8,
+          9,  9,  9,  9,  9,  9, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11,
+         11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13
+    },
+    {
+         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+         -1, -1, -1, -1, -1, -1, -1, -1,  0,  0,  0,  0,  0,  0,  0,  0,
+          0,  0,  0,  0,  0,  0,  0,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+          2,  2,  2,  2,  2,  2,  2,  2,  3,  3,  3,  3,  3,  3,  3,  4,
+          4,  4,  4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,  5,
+          6,  6,  6,  6,  6,  6,  6,  7,  7,  7,  7,  7,  7,  7,  7,  7,
+          8,  8,  8,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9, 10,
+         10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11
+    },
+    {
+         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  0,  0,  0,  0,  0,  0,
+          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  1,  1,  1,  1,
+          1,  1,  1,  1,  2,  2,  2,  2,  2,  2,  2,  2,  2,  3,  3,  3,
+          3,  3,  3,  3,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  5,  5,
+          5,  5,  5,  5,  5,  5,  5,  6,  6,  6,  6,  6,  6,  6,  7,  7,
+          7,  7,  7,  7,  7,  7,  7,  7,  8,  8,  8,  8,  8,  8,  8,  8,
+          8,  9,  9,  9,  9,  9,  9,  9, 10, 10, 10, 10, 10, 10, 10, 10
+    },
+    {
+         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  0,  0,  0,  0,
+          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  1,
+          1,  1,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  2,  2,  2,
+          2,  2,  3,  3,  3,  3,  3,  3,  3,  4,  4,  4,  4,  4,  4,  4,
+          4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  6,  6,
+          6,  6,  6,  6,  6,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
+          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,  9
+    },
+    {
+         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  0,  0,
+          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+          1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,
+          2,  2,  2,  2,  2,  2,  3,  3,  3,  3,  3,  3,  3,  3,  4,  4,
+          4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,
+          5,  5,  5,  5,  6,  6,  6,  6,  6,  6,  6,  6,  7,  7,  7,  7,
+          7,  7,  7,  7,  7,  7,  7,  7,  8,  8,  8,  8,  8,  8,  8,  8
+    },
+    {
+         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+          0,  0,  0,  0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+          2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  3,  3,  3,  3,  3,
+          3,  3,  3,  3,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
+          5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  6,  6,  6,  6,  6,
+          6,  6,  6,  6,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7
+    },
+};
diff --git a/libkram/astc-encoder/astcenc_symbolic_physical.cpp b/libkram/astc-encoder/astcenc_symbolic_physical.cpp
index 894dc933..af68bd4a 100644
--- a/libkram/astc-encoder/astcenc_symbolic_physical.cpp
+++ b/libkram/astc-encoder/astcenc_symbolic_physical.cpp
@@ -21,9 +21,19 @@
 
 #include "astcenc_internal.h"
 
-#include <assert.h>
+#include <cassert>
 
-// routine to write up to 8 bits
+/**
+ * @brief Write up to 8 bits at an arbitrary bit offset.
+ *
+ * The stored value is at most 8 bits, but can be stored at an offset of between 0 and 7 bits so
+ * may span two separate bytes in memory.
+ *
+ * @param         value       The value to write.
+ * @param         bitcount    The number of bits to write, starting from LSB.
+ * @param         bitoffset   The bit offset to store at, between 0 and 7.
+ * @param[in,out] ptr         The data pointer to write to.
+ */
 static inline void write_bits(
 	int value,
 	int bitcount,
@@ -44,7 +54,18 @@ static inline void write_bits(
 	ptr[1] |= value >> 8;
 }
 
-// routine to read up to 8 bits
+/**
+ * @brief Read up to 8 bits at an arbitrary bit offset.
+ *
+ * The stored value is at most 8 bits, but can be stored at an offset of between 0 and 7 bits so may
+ * span two separate bytes in memory.
+ *
+ * @param         bitcount    The number of bits to read.
+ * @param         bitoffset   The bit offset to read from, between 0 and 7.
+ * @param[in,out] ptr         The data pointer to read from.
+ *
+ * @return The read value.
+ */
 static inline int read_bits(
 	int bitcount,
 	int bitoffset,
@@ -59,32 +80,40 @@ static inline int read_bits(
 	return value;
 }
 
+/**
+ * @brief Reverse bits in a byte.
+ *
+ * @param p   The value to reverse.
+  *
+ * @return The reversed result.
+ */
 static inline int bitrev8(int p)
 {
-	p = ((p & 0xF) << 4) | ((p >> 4) & 0xF);
+	p = ((p & 0x0F) << 4) | ((p >> 4) & 0x0F);
 	p = ((p & 0x33) << 2) | ((p >> 2) & 0x33);
 	p = ((p & 0x55) << 1) | ((p >> 1) & 0x55);
 	return p;
 }
 
+/* See header for documentation. */
 void symbolic_to_physical(
 	const block_size_descriptor& bsd,
 	const symbolic_compressed_block& scb,
 	physical_compressed_block& pcb
 ) {
-	if (scb.block_mode == -2)
-	{
-		// UNORM16 constant-color block.
-		// This encodes separate constant-color blocks. There is currently
-		// no attempt to coalesce them into larger void-extents.
+	assert(scb.block_type != SYM_BTYPE_ERROR);
 
+	// Constant color block using UNORM16 colors
+	if (scb.block_type == SYM_BTYPE_CONST_U16)
+	{
+		// There is currently no attempt to coalesce larger void-extents
 		static const uint8_t cbytes[8] { 0xFC, 0xFD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
-		for (int i = 0; i < 8; i++)
+		for (unsigned int i = 0; i < 8; i++)
 		{
 			pcb.data[i] = cbytes[i];
 		}
 
-		for (int i = 0; i < 4; i++)
+		for (unsigned int i = 0; i < BLOCK_MAX_COMPONENTS; i++)
 		{
 			pcb.data[2 * i + 8] = scb.constant_color[i] & 0xFF;
 			pcb.data[2 * i + 9] = (scb.constant_color[i] >> 8) & 0xFF;
@@ -93,19 +122,17 @@ void symbolic_to_physical(
 		return;
 	}
 
-	if (scb.block_mode == -1)
+	// Constant color block using FP16 colors
+	if (scb.block_type == SYM_BTYPE_CONST_F16)
 	{
-		// FP16 constant-color block.
-		// This encodes separate constant-color blocks. There is currently
-		// no attempt to coalesce them into larger void-extents.
-
+		// There is currently no attempt to coalesce larger void-extents
 		static const uint8_t cbytes[8]  { 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
-		for (int i = 0; i < 8; i++)
+		for (unsigned int i = 0; i < 8; i++)
 		{
 			pcb.data[i] = cbytes[i];
 		}
 
-		for (int i = 0; i < 4; i++)
+		for (unsigned int i = 0; i < BLOCK_MAX_COMPONENTS; i++)
 		{
 			pcb.data[2 * i + 8] = scb.constant_color[i] & 0xFF;
 			pcb.data[2 * i + 9] = (scb.constant_color[i] >> 8) & 0xFF;
@@ -114,30 +141,21 @@ void symbolic_to_physical(
 		return;
 	}
 
-	int partition_count = scb.partition_count;
+	unsigned int partition_count = scb.partition_count;
 
-	// first, compress the weights. They are encoded as an ordinary
-	// integer-sequence, then bit-reversed
-	uint8_t weightbuf[16];
-	for (int i = 0; i < 16; i++)
-	{
-		weightbuf[i] = 0;
-	}
-
-	const decimation_table *const *dts = bsd.decimation_tables;
-
-	const int packed_index = bsd.block_mode_packed_index[scb.block_mode];
-	assert(packed_index >= 0 && packed_index < bsd.block_mode_count);
-	const block_mode& bm = bsd.block_modes[packed_index];
+	// Compress the weights.
+	// They are encoded as an ordinary integer-sequence, then bit-reversed
+	uint8_t weightbuf[16] { 0 };
 
-	int weight_count = dts[bm.decimation_mode]->weight_count;
-	int weight_quant_method = bm.quant_mode;
+	const auto& bm = bsd.get_block_mode(scb.block_mode);
+	const auto& di = bsd.get_decimation_info(bm.decimation_mode);
+	int weight_count = di.weight_count;
+	quant_method weight_quant_method = bm.get_weight_quant_mode();
 	int is_dual_plane = bm.is_dual_plane;
 
 	int real_weight_count = is_dual_plane ? 2 * weight_count : weight_count;
 
-	int bits_for_weights = get_ise_sequence_bitcount(real_weight_count,
-	                                                 (quant_method)weight_quant_method);
+	int bits_for_weights = get_ise_sequence_bitcount(real_weight_count, weight_quant_method);
 
 	if (is_dual_plane)
 	{
@@ -145,7 +163,7 @@ void symbolic_to_physical(
 		for (int i = 0; i < weight_count; i++)
 		{
 			weights[2 * i] = scb.weights[i];
-			weights[2 * i + 1] = scb.weights[i + PLANE2_WEIGHTS_OFFSET];
+			weights[2 * i + 1] = scb.weights[i + WEIGHTS_PLANE2_OFFSET];
 		}
 		encode_ise(weight_quant_method, real_weight_count, weights, weightbuf, 0);
 	}
@@ -156,7 +174,7 @@ void symbolic_to_physical(
 
 	for (int i = 0; i < 16; i++)
 	{
-		pcb.data[i] = bitrev8(weightbuf[15 - i]);
+		pcb.data[i] = static_cast<uint8_t>(bitrev8(weightbuf[15 - i]));
 	}
 
 	write_bits(scb.block_mode, 11, 0, pcb.data);
@@ -164,24 +182,22 @@ void symbolic_to_physical(
 
 	int below_weights_pos = 128 - bits_for_weights;
 
-	// encode partition index and color endpoint types for blocks with
-	// 2 or more partitions.
+	// Encode partition index and color endpoint types for blocks with 2+ partitions
 	if (partition_count > 1)
 	{
 		write_bits(scb.partition_index, 6, 13, pcb.data);
-		write_bits(scb.partition_index >> 6, PARTITION_BITS - 6, 19, pcb.data);
+		write_bits(scb.partition_index >> 6, PARTITION_INDEX_BITS - 6, 19, pcb.data);
 
 		if (scb.color_formats_matched)
 		{
-			write_bits(scb.color_formats[0] << 2, 6, 13 + PARTITION_BITS, pcb.data);
+			write_bits(scb.color_formats[0] << 2, 6, 13 + PARTITION_INDEX_BITS, pcb.data);
 		}
 		else
 		{
-			// go through the selected endpoint type classes for each partition
-			// in order to determine the lowest class present.
+			// Check endpoint types for each partition to determine the lowest class present
 			int low_class = 4;
 
-			for (int i = 0; i < partition_count; i++)
+			for (unsigned int i = 0; i < partition_count; i++)
 			{
 				int class_of_format = scb.color_formats[i] >> 2;
 				low_class = astc::min(class_of_format, low_class);
@@ -195,14 +211,14 @@ void symbolic_to_physical(
 			int encoded_type = low_class + 1;
 			int bitpos = 2;
 
-			for (int i = 0; i < partition_count; i++)
+			for (unsigned int i = 0; i < partition_count; i++)
 			{
 				int classbit_of_format = (scb.color_formats[i] >> 2) - low_class;
 				encoded_type |= classbit_of_format << bitpos;
 				bitpos++;
 			}
 
-			for (int i = 0; i < partition_count; i++)
+			for (unsigned int i = 0; i < partition_count; i++)
 			{
 				int lowbits_of_format = scb.color_formats[i] & 3;
 				encoded_type |= lowbits_of_format << bitpos;
@@ -213,7 +229,7 @@ void symbolic_to_physical(
 			int encoded_type_highpart = encoded_type >> 6;
 			int encoded_type_highpart_size = (3 * partition_count) - 4;
 			int encoded_type_highpart_pos = 128 - bits_for_weights - encoded_type_highpart_size;
-			write_bits(encoded_type_lowpart, 6, 13 + PARTITION_BITS, pcb.data);
+			write_bits(encoded_type_lowpart, 6, 13 + PARTITION_INDEX_BITS, pcb.data);
 			write_bits(encoded_type_highpart, encoded_type_highpart_size, encoded_type_highpart_pos, pcb.data);
 			below_weights_pos -= encoded_type_highpart_size;
 		}
@@ -223,19 +239,19 @@ void symbolic_to_physical(
 		write_bits(scb.color_formats[0], 4, 13, pcb.data);
 	}
 
-	// in dual-plane mode, encode the color component of the second plane of weights
+	// In dual-plane mode, encode the color component of the second plane of weights
 	if (is_dual_plane)
 	{
-		write_bits(scb.plane2_color_component, 2, below_weights_pos - 2, pcb.data);
+		write_bits(scb.plane2_component, 2, below_weights_pos - 2, pcb.data);
 	}
 
-	// finally, encode the color bits
-	// first, get hold of all the color components to encode
+	// Encode the color components
 	uint8_t values_to_encode[32];
 	int valuecount_to_encode = 0;
-	for (int i = 0; i < scb.partition_count; i++)
+	for (unsigned int i = 0; i < scb.partition_count; i++)
 	{
 		int vals = 2 * (scb.color_formats[i] >> 2) + 2;
+		assert(vals <= 8);
 		for (int j = 0; j < vals; j++)
 		{
 			values_to_encode[j + valuecount_to_encode] = scb.color_values[i][j];
@@ -243,10 +259,11 @@ void symbolic_to_physical(
 		valuecount_to_encode += vals;
 	}
 
-	// then, encode an ISE based on them.
-	encode_ise(scb.color_quant_level, valuecount_to_encode, values_to_encode, pcb.data, (scb.partition_count == 1 ? 17 : 19 + PARTITION_BITS));
+	encode_ise(scb.get_color_quant_mode(), valuecount_to_encode, values_to_encode, pcb.data,
+	           scb.partition_count == 1 ? 17 : 19 + PARTITION_INDEX_BITS);
 }
 
+/* See header for documentation. */
 void physical_to_symbolic(
 	const block_size_descriptor& bsd,
 	const physical_compressed_block& pcb,
@@ -254,25 +271,22 @@ void physical_to_symbolic(
 ) {
 	uint8_t bswapped[16];
 
-	scb.error_block = 0;
+	scb.block_type = SYM_BTYPE_NONCONST;
 
-	// get hold of the decimation tables.
-	const decimation_table *const *dts = bsd.decimation_tables;
-
-	// extract header fields
+	// Extract header fields
 	int block_mode = read_bits(11, 0, pcb.data);
 	if ((block_mode & 0x1FF) == 0x1FC)
 	{
-		// void-extent block!
+		// Constant color block
 
-		// check what format the data has
+		// Check what format the data has
 		if (block_mode & 0x200)
 		{
-			scb.block_mode = -1;	// floating-point
+			scb.block_type = SYM_BTYPE_CONST_F16;
 		}
 		else
 		{
-			scb.block_mode = -2;	// unorm16.
+			scb.block_type = SYM_BTYPE_CONST_U16;
 		}
 
 		scb.partition_count = 0;
@@ -281,14 +295,15 @@ void physical_to_symbolic(
 			scb.constant_color[i] = pcb.data[2 * i + 8] | (pcb.data[2 * i + 9] << 8);
 		}
 
-		// additionally, check that the void-extent
+		// Additionally, check that the void-extent
 		if (bsd.zdim == 1)
 		{
 			// 2D void-extent
 			int rsvbits = read_bits(2, 10, pcb.data);
 			if (rsvbits != 3)
 			{
-				scb.error_block = 1;
+				scb.block_type = SYM_BTYPE_ERROR;
+				return;
 			}
 
 			int vx_low_s = read_bits(8, 12, pcb.data) | (read_bits(5, 12 + 8, pcb.data) << 8);
@@ -300,7 +315,8 @@ void physical_to_symbolic(
 
 			if ((vx_low_s >= vx_high_s || vx_low_t >= vx_high_t) && !all_ones)
 			{
-				scb.error_block = 1;
+				scb.block_type = SYM_BTYPE_ERROR;
+				return;
 			}
 		}
 		else
@@ -317,40 +333,41 @@ void physical_to_symbolic(
 
 			if ((vx_low_s >= vx_high_s || vx_low_t >= vx_high_t || vx_low_p >= vx_high_p) && !all_ones)
 			{
-				scb.error_block = 1;
+				scb.block_type = SYM_BTYPE_ERROR;
+				return;
 			}
 		}
 
 		return;
 	}
 
-	const int packed_index = bsd.block_mode_packed_index[block_mode];
-	if (packed_index < 0)
+	unsigned int packed_index = bsd.block_mode_packed_index[block_mode];
+	if (packed_index == BLOCK_BAD_BLOCK_MODE)
 	{
-		scb.error_block = 1;
+		scb.block_type = SYM_BTYPE_ERROR;
 		return;
 	}
-	assert(packed_index >= 0 && packed_index < bsd.block_mode_count);
-	const struct block_mode& bm = bsd.block_modes[packed_index];
 
-	int weight_count = dts[bm.decimation_mode]->weight_count;
-	int weight_quant_method = bm.quant_mode;
+	const auto& bm = bsd.get_block_mode(block_mode);
+	const auto& di = bsd.get_decimation_info(bm.decimation_mode);
+
+	int weight_count = di.weight_count;
+	quant_method weight_quant_method = (quant_method)bm.quant_mode;
 	int is_dual_plane = bm.is_dual_plane;
 
 	int real_weight_count = is_dual_plane ? 2 * weight_count : weight_count;
 
 	int partition_count = read_bits(2, 11, pcb.data) + 1;
 
-	scb.block_mode = block_mode;
-	scb.partition_count = partition_count;
+	scb.block_mode = static_cast<uint16_t>(block_mode);
+	scb.partition_count = static_cast<uint8_t>(partition_count);
 
 	for (int i = 0; i < 16; i++)
 	{
-		bswapped[i] = bitrev8(pcb.data[15 - i]);
+		bswapped[i] = static_cast<uint8_t>(bitrev8(pcb.data[15 - i]));
 	}
 
-	int bits_for_weights = get_ise_sequence_bitcount(real_weight_count,
-	                                                 (quant_method)weight_quant_method);
+	int bits_for_weights = get_ise_sequence_bitcount(real_weight_count, weight_quant_method);
 
 	int below_weights_pos = 128 - bits_for_weights;
 
@@ -361,7 +378,7 @@ void physical_to_symbolic(
 		for (int i = 0; i < weight_count; i++)
 		{
 			scb.weights[i] = indices[2 * i];
-			scb.weights[i + PLANE2_WEIGHTS_OFFSET] = indices[2 * i + 1];
+			scb.weights[i + WEIGHTS_PLANE2_OFFSET] = indices[2 * i + 1];
 		}
 	}
 	else
@@ -371,13 +388,14 @@ void physical_to_symbolic(
 
 	if (is_dual_plane && partition_count == 4)
 	{
-		scb.error_block = 1;
+		scb.block_type = SYM_BTYPE_ERROR;
+		return;
 	}
 
 	scb.color_formats_matched = 0;
 
-	// then, determine the format of each endpoint pair
-	int color_formats[4];
+	// Determine the format of each endpoint pair
+	int color_formats[BLOCK_MAX_PARTITIONS];
 	int encoded_type_highpart_size = 0;
 	if (partition_count == 1)
 	{
@@ -388,7 +406,7 @@ void physical_to_symbolic(
 	{
 		encoded_type_highpart_size = (3 * partition_count) - 4;
 		below_weights_pos -= encoded_type_highpart_size;
-		int encoded_type = read_bits(6, 13 + PARTITION_BITS, pcb.data) | (read_bits(encoded_type_highpart_size, below_weights_pos, pcb.data) << 6);
+		int encoded_type = read_bits(6, 13 + PARTITION_INDEX_BITS, pcb.data) | (read_bits(encoded_type_highpart_size, below_weights_pos, pcb.data) << 6);
 		int baseclass = encoded_type & 0x3;
 		if (baseclass == 0)
 		{
@@ -418,15 +436,15 @@ void physical_to_symbolic(
 				bitpos += 2;
 			}
 		}
-		scb.partition_index = read_bits(6, 13, pcb.data) | (read_bits(PARTITION_BITS - 6, 19, pcb.data) << 6);
+		scb.partition_index = static_cast<uint16_t>(read_bits(6, 13, pcb.data) | (read_bits(PARTITION_INDEX_BITS - 6, 19, pcb.data) << 6));
 	}
 
 	for (int i = 0; i < partition_count; i++)
 	{
-		scb.color_formats[i] = color_formats[i];
+		scb.color_formats[i] = static_cast<uint8_t>(color_formats[i]);
 	}
 
-	// then, determine the number of integers we need to unpack for the endpoint pairs
+	// Determine number of color endpoint integers
 	int color_integer_count = 0;
 	for (int i = 0; i < partition_count; i++)
 	{
@@ -436,11 +454,12 @@ void physical_to_symbolic(
 
 	if (color_integer_count > 18)
 	{
-		scb.error_block = 1;
+		scb.block_type = SYM_BTYPE_ERROR;
+		return;
 	}
 
-	// then, determine the color endpoint format to use for these integers
-	static const int color_bits_arr[5] { -1, 115 - 4, 113 - 4 - PARTITION_BITS, 113 - 4 - PARTITION_BITS, 113 - 4 - PARTITION_BITS };
+	// Determine the color endpoint format to use
+	static const int color_bits_arr[5] { -1, 115 - 4, 113 - 4 - PARTITION_INDEX_BITS, 113 - 4 - PARTITION_INDEX_BITS, 113 - 4 - PARTITION_INDEX_BITS };
 	int color_bits = color_bits_arr[partition_count] - bits_for_weights - encoded_type_highpart_size;
 	if (is_dual_plane)
 	{
@@ -453,19 +472,18 @@ void physical_to_symbolic(
 	}
 
 	int color_quant_level = quant_mode_table[color_integer_count >> 1][color_bits];
-	scb.color_quant_level = color_quant_level;
-	if (color_quant_level < 4)
+	if (color_quant_level < QUANT_6)
 	{
-		scb.error_block = 1;
+		scb.block_type = SYM_BTYPE_ERROR;
+		return;
 	}
 
-	// then unpack the integer-bits
+	// Unpack the integer color values and assign to endpoints
+	scb.quant_mode = (quant_method)color_quant_level;
 	uint8_t values_to_decode[32];
-	decode_ise(color_quant_level, color_integer_count, pcb.data, values_to_decode, (partition_count == 1 ? 17 : 19 + PARTITION_BITS));
+	decode_ise((quant_method)color_quant_level, color_integer_count, pcb.data, values_to_decode, (partition_count == 1 ? 17 : 19 + PARTITION_INDEX_BITS));
 
-	// and distribute them over the endpoint types
 	int valuecount_to_decode = 0;
-
 	for (int i = 0; i < partition_count; i++)
 	{
 		int vals = 2 * (color_formats[i] >> 2) + 2;
@@ -476,9 +494,9 @@ void physical_to_symbolic(
 		valuecount_to_decode += vals;
 	}
 
-	// get hold of color component for second-plane in the case of dual plane of weights.
+	// Fetch component for second-plane in the case of dual plane of weights.
 	if (is_dual_plane)
 	{
-		scb.plane2_color_component = read_bits(2, below_weights_pos - 2, pcb.data);
+		scb.plane2_component = static_cast<int8_t>(read_bits(2, below_weights_pos - 2, pcb.data));
 	}
 }
diff --git a/libkram/astc-encoder/astcenc_vecmathlib.h b/libkram/astc-encoder/astcenc_vecmathlib.h
index aed6752c..069c03c9 100644
--- a/libkram/astc-encoder/astcenc_vecmathlib.h
+++ b/libkram/astc-encoder/astcenc_vecmathlib.h
@@ -47,7 +47,6 @@
  *     * 4-wide for x86-64 SSE2.
  *     * 4-wide for x86-64 SSE4.1.
  *     * 8-wide for x86-64 AVX2.
- *
  */
 
 #ifndef ASTC_VECMATHLIB_H_INCLUDED
@@ -149,7 +148,7 @@
  *
  * @return The rounded value.
  */
-ASTCENC_SIMD_INLINE int round_down_to_simd_multiple_8(int count)
+ASTCENC_SIMD_INLINE unsigned int round_down_to_simd_multiple_8(unsigned int count)
 {
 	return count & ~(8 - 1);
 }
@@ -161,7 +160,7 @@ ASTCENC_SIMD_INLINE int round_down_to_simd_multiple_8(int count)
  *
  * @return The rounded value.
  */
-ASTCENC_SIMD_INLINE int round_down_to_simd_multiple_4(int count)
+ASTCENC_SIMD_INLINE unsigned int round_down_to_simd_multiple_4(unsigned int count)
 {
 	return count & ~(4 - 1);
 }
@@ -175,7 +174,7 @@ ASTCENC_SIMD_INLINE int round_down_to_simd_multiple_4(int count)
  *
  * @return The rounded value.
  */
-ASTCENC_SIMD_INLINE int round_down_to_simd_multiple_vla(int count)
+ASTCENC_SIMD_INLINE unsigned int round_down_to_simd_multiple_vla(unsigned int count)
 {
 	return count & ~(ASTCENC_SIMD_WIDTH - 1);
 }
@@ -189,7 +188,7 @@ ASTCENC_SIMD_INLINE int round_down_to_simd_multiple_vla(int count)
  *
  * @return The rounded value.
  */
-ASTCENC_SIMD_INLINE int round_up_to_simd_multiple_vla(int count)
+ASTCENC_SIMD_INLINE unsigned int round_up_to_simd_multiple_vla(unsigned int count)
 {
 	int multiples = (count + ASTCENC_SIMD_WIDTH - 1) / ASTCENC_SIMD_WIDTH;
 	return multiples * ASTCENC_SIMD_WIDTH;
@@ -244,7 +243,33 @@ static ASTCENC_SIMD_INLINE vfloat4 unit4()
  */
 static ASTCENC_SIMD_INLINE vfloat4 unit3()
 {
-	return vfloat4(0.57735f, 0.57735f, 0.57735f, 0.0f);
+	float val = 0.577350258827209473f;
+	return vfloat4(val, val, val, 0.0f);
+}
+
+/**
+ * @brief Factory that returns a unit length 2 component vfloat4.
+ */
+static ASTCENC_SIMD_INLINE vfloat4 unit2()
+{
+	float val = 0.707106769084930420f;
+	return vfloat4(val, val, 0.0f, 0.0f);
+}
+
+/**
+ * @brief Factory that returns a 3 component vfloat4.
+ */
+static ASTCENC_SIMD_INLINE vfloat4 vfloat3(float a, float b, float c)
+{
+	return vfloat4(a, b, c, 0.0f);
+}
+
+/**
+ * @brief Factory that returns a 2 component vfloat4.
+ */
+static ASTCENC_SIMD_INLINE vfloat4 vfloat2(float a, float b)
+{
+	return vfloat4(a, b, 0.0f, 0.0f);
 }
 
 /**
@@ -358,7 +383,7 @@ static ASTCENC_SIMD_INLINE vfloat4 pow(vfloat4 x, vfloat4 y)
  *
  * Valid for all data values of @c a; will return a per-lane value [0, 32].
  */
-ASTCENC_SIMD_INLINE vint4 clz(vint4 a)
+static ASTCENC_SIMD_INLINE vint4 clz(vint4 a)
 {
 	// This function is a horrible abuse of floating point exponents to convert
 	// the original integer value into a 2^N encoding we can recover easily.
@@ -380,7 +405,7 @@ ASTCENC_SIMD_INLINE vint4 clz(vint4 a)
  *
  * Use of signed int mean that this is only valid for values in range [0, 31].
  */
-ASTCENC_SIMD_INLINE vint4 two_to_the_n(vint4 a)
+static ASTCENC_SIMD_INLINE vint4 two_to_the_n(vint4 a)
 {
 	// 2^30 is the largest signed number than can be represented
 	assert(all(a < vint4(31)));
@@ -400,7 +425,7 @@ ASTCENC_SIMD_INLINE vint4 two_to_the_n(vint4 a)
 /**
  * @brief Convert unorm16 [0, 65535] to float16 in range [0, 1].
  */
-ASTCENC_SIMD_INLINE vint4 unorm16_to_sf16(vint4 p)
+static ASTCENC_SIMD_INLINE vint4 unorm16_to_sf16(vint4 p)
 {
 	vint4 fp16_one = vint4(0x3C00);
 	vint4 fp16_small = lsl<8>(p);
@@ -408,9 +433,17 @@ ASTCENC_SIMD_INLINE vint4 unorm16_to_sf16(vint4 p)
 	vmask4 is_one = p == vint4(0xFFFF);
 	vmask4 is_small = p < vint4(4);
 
+	// Manually inline clz() on Visual Studio to avoid release build codegen bug
+	// see https://github.com/ARM-software/astc-encoder/issues/259
+#if !defined(__clang__) && defined(_MSC_VER)
+	vint4 a = (~lsr<8>(p)) & p;
+	a = float_as_int(int_to_float(a));
+	a = vint4(127 + 31) - lsr<23>(a);
+	vint4 lz = clamp(0, 32, a) - 16;
+#else
 	vint4 lz = clz(p) - 16;
+#endif
 
-	// TODO: Could use AVX2 _mm_sllv_epi32() instead of p * 2^<shift>
 	p = p * two_to_the_n(lz + 1);
 	p = p & vint4(0xFFFF);
 
@@ -426,7 +459,7 @@ ASTCENC_SIMD_INLINE vint4 unorm16_to_sf16(vint4 p)
 /**
  * @brief Convert 16-bit LNS to float16.
  */
-ASTCENC_SIMD_INLINE vint4 lns_to_sf16(vint4 p)
+static ASTCENC_SIMD_INLINE vint4 lns_to_sf16(vint4 p)
 {
 	vint4 mc = p & 0x7FF;
 	vint4 ec = lsr<11>(p);
@@ -455,7 +488,7 @@ ASTCENC_SIMD_INLINE vint4 lns_to_sf16(vint4 p)
  *
  * @return The mantissa.
  */
-static inline vfloat4 frexp(vfloat4 a, vint4& exp)
+static ASTCENC_SIMD_INLINE vfloat4 frexp(vfloat4 a, vint4& exp)
 {
 	// Interpret the bits as an integer
 	vint4 ai = float_as_int(a);
@@ -471,7 +504,7 @@ static inline vfloat4 frexp(vfloat4 a, vint4& exp)
 /**
  * @brief Convert float to 16-bit LNS.
  */
-static inline vfloat4 float_to_lns(vfloat4 a)
+static ASTCENC_SIMD_INLINE vfloat4 float_to_lns(vfloat4 a)
 {
 	vint4 exp;
 	vfloat4 mant = frexp(a, exp);
diff --git a/libkram/astc-encoder/astcenc_vecmathlib_avx2_8.h b/libkram/astc-encoder/astcenc_vecmathlib_avx2_8.h
index cba1db45..0b0ea869 100755
--- a/libkram/astc-encoder/astcenc_vecmathlib_avx2_8.h
+++ b/libkram/astc-encoder/astcenc_vecmathlib_avx2_8.h
@@ -48,7 +48,7 @@ struct vfloat8
 	/**
 	 * @brief Construct from zero-initialized value.
 	 */
-	ASTCENC_SIMD_INLINE vfloat8() {}
+	ASTCENC_SIMD_INLINE vfloat8() = default;
 
 	/**
 	 * @brief Construct from 4 values loaded from an unaligned address.
@@ -154,7 +154,7 @@ struct vint8
 	/**
 	 * @brief Construct from zero-initialized value.
 	 */
-	ASTCENC_SIMD_INLINE vint8() {}
+	ASTCENC_SIMD_INLINE vint8() = default;
 
 	/**
 	 * @brief Construct from 8 values loaded from an unaligned address.
@@ -284,6 +284,15 @@ struct vmask8
 		m = _mm256_castsi256_ps(a);
 	}
 
+	/**
+	 * @brief Construct from 1 scalar value.
+	 */
+	ASTCENC_SIMD_INLINE explicit vmask8(bool a)
+	{
+		vint8 mask(a == false ? 0 : -1);
+		m = _mm256_castsi256_ps(mask.m);
+	}
+
 	/**
 	 * @brief The vector ...
 	 */
@@ -363,6 +372,15 @@ ASTCENC_SIMD_INLINE vint8 operator+(vint8 a, vint8 b)
 	return vint8(_mm256_add_epi32(a.m, b.m));
 }
 
+/**
+ * @brief Overload: vector by vector incremental addition.
+ */
+ASTCENC_SIMD_INLINE vint8& operator+=(vint8& a, const vint8& b)
+{
+	a = a + b;
+	return a;
+}
+
 /**
  * @brief Overload: vector by vector subtraction.
  */
@@ -371,6 +389,14 @@ ASTCENC_SIMD_INLINE vint8 operator-(vint8 a, vint8 b)
 	return vint8(_mm256_sub_epi32(a.m, b.m));
 }
 
+/**
+ * @brief Overload: vector by vector multiplication.
+ */
+ASTCENC_SIMD_INLINE vint8 operator*(vint8 a, vint8 b)
+{
+	return vint8(_mm256_mullo_epi32(a.m, b.m));
+}
+
 /**
  * @brief Overload: vector bit invert.
  */
@@ -435,6 +461,22 @@ ASTCENC_SIMD_INLINE vmask8 operator>(vint8 a, vint8 b)
 	return vmask8(_mm256_cmpgt_epi32(a.m, b.m));
 }
 
+/**
+ * @brief Arithmetic shift right.
+ */
+template <int s> ASTCENC_SIMD_INLINE vint8 asr(vint8 a)
+{
+	return vint8(_mm256_srai_epi32(a.m, s));
+}
+
+/**
+ * @brief Logical shift right.
+ */
+template <int s> ASTCENC_SIMD_INLINE vint8 lsr(vint8 a)
+{
+	return vint8(_mm256_srli_epi32(a.m, s));
+}
+
 /**
  * @brief Return the min vector of two vectors.
  */
@@ -495,6 +537,14 @@ ASTCENC_SIMD_INLINE void storea(vint8 a, int* p)
 	_mm256_store_si256((__m256i*)p, a.m);
 }
 
+/**
+ * @brief Store a vector to an unaligned memory address.
+ */
+ASTCENC_SIMD_INLINE void store(vint8 a, int* p)
+{
+	_mm256_storeu_si256((__m256i*)p, a.m);
+}
+
 /**
  * @brief Store lowest N (vector width) bytes into an unaligned address.
  */
@@ -571,6 +621,15 @@ ASTCENC_SIMD_INLINE vfloat8 operator+(vfloat8 a, vfloat8 b)
 	return vfloat8(_mm256_add_ps(a.m, b.m));
 }
 
+/**
+ * @brief Overload: vector by vector incremental addition.
+ */
+ASTCENC_SIMD_INLINE vfloat8& operator+=(vfloat8& a, const vfloat8& b)
+{
+	a = a + b;
+	return a;
+}
+
 /**
  * @brief Overload: vector by vector subtraction.
  */
@@ -662,7 +721,7 @@ ASTCENC_SIMD_INLINE vmask8 operator>(vfloat8 a, vfloat8 b)
 }
 
 /**
- * @brief Overload: vector by vector les than or equal.
+ * @brief Overload: vector by vector less than or equal.
  */
 ASTCENC_SIMD_INLINE vmask8 operator<=(vfloat8 a, vfloat8 b)
 {
@@ -904,6 +963,14 @@ ASTCENC_SIMD_INLINE vint8 float_to_int(vfloat8 a)
 	return vint8(_mm256_cvttps_epi32(a.m));
 }
 
+/**
+ * @brief Return a float value for an integer vector.
+ */
+ASTCENC_SIMD_INLINE vfloat8 int_to_float(vint8 a)
+{
+	return vfloat8(_mm256_cvtepi32_ps(a.m));
+}
+
 /**
  * @brief Return a float value as an integer bit pattern (i.e. no conversion).
  *
diff --git a/libkram/astc-encoder/astcenc_vecmathlib_common_4.h b/libkram/astc-encoder/astcenc_vecmathlib_common_4.h
index 319537b6..50394052 100755
--- a/libkram/astc-encoder/astcenc_vecmathlib_common_4.h
+++ b/libkram/astc-encoder/astcenc_vecmathlib_common_4.h
@@ -64,6 +64,15 @@ ASTCENC_SIMD_INLINE vint4 operator+(vint4 a, int b)
 	return a + vint4(b);
 }
 
+/**
+ * @brief Overload: vector by vector incremental addition.
+ */
+ASTCENC_SIMD_INLINE vint4& operator+=(vint4& a, const vint4& b)
+{
+	a = a + b;
+	return a;
+}
+
 /**
  * @brief Overload: vector by scalar subtraction.
  */
@@ -135,6 +144,9 @@ ASTCENC_SIMD_INLINE void print(vint4 a)
 // vfloat4 operators and functions
 // ============================================================================
 
+/**
+ * @brief Overload: vector by vector incremental addition.
+ */
 ASTCENC_SIMD_INLINE vfloat4& operator+=(vfloat4& a, const vfloat4& b)
 {
 	a = a + b;
@@ -321,7 +333,7 @@ ASTCENC_SIMD_INLINE float dot3_s(vfloat4 a, vfloat4 b)
 }
 
 /**
- * @brief Return the dot product for the full 4 lanes, returning vector.
+ * @brief Return the dot product for the bottom 3 lanes, returning vector.
  */
 ASTCENC_SIMD_INLINE vfloat4 dot3(vfloat4 a, vfloat4 b)
 {
diff --git a/libkram/astc-encoder/astcenc_vecmathlib_neon_4.h b/libkram/astc-encoder/astcenc_vecmathlib_neon_4.h
index a1163531..eb96536f 100755
--- a/libkram/astc-encoder/astcenc_vecmathlib_neon_4.h
+++ b/libkram/astc-encoder/astcenc_vecmathlib_neon_4.h
@@ -39,10 +39,6 @@
 
 #include <cstdio>
 
-#if defined(__arm__)
-	#include "astcenc_vecmathlib_neon_armv7_4.h"
-#endif
-
 // ============================================================================
 // vfloat4 data type
 // ============================================================================
@@ -55,7 +51,7 @@ struct vfloat4
 	/**
 	 * @brief Construct from zero-initialized value.
 	 */
-	ASTCENC_SIMD_INLINE vfloat4() {}
+	ASTCENC_SIMD_INLINE vfloat4() = default;
 
 	/**
 	 * @brief Construct from 4 values loaded from an unaligned address.
@@ -85,8 +81,8 @@ struct vfloat4
 	 */
 	ASTCENC_SIMD_INLINE explicit vfloat4(float a, float b, float c, float d)
 	{
-		float32x4_t v { a, b, c, d };
-		m = v;
+		float v[4] { a, b, c, d };
+		m = vld1q_f32(v);
 	}
 
 	/**
@@ -149,15 +145,13 @@ struct vfloat4
 	/**
 	 * @brief Return a swizzled float 2.
 	 */
-	template <int l0, int l1> ASTCENC_SIMD_INLINE float2 swz() const
+	template <int l0, int l1> ASTCENC_SIMD_INLINE vfloat4 swz() const
 	{
-		return float2(lane<l0>(), lane<l1>());
+		return vfloat4(lane<l0>(), lane<l1>(), 0.0f, 0.0f);
 	}
 
 	/**
 	 * @brief Return a swizzled float 3.
-	 *
-	 * TODO: Implement using permutes.
 	 */
 	template <int l0, int l1, int l2> ASTCENC_SIMD_INLINE vfloat4 swz() const
 	{
@@ -166,8 +160,6 @@ struct vfloat4
 
 	/**
 	 * @brief Return a swizzled float 4.
-	 *
-	 * TODO: Implement using permutes.
 	 */
 	template <int l0, int l1, int l2, int l3> ASTCENC_SIMD_INLINE vfloat4 swz() const
 	{
@@ -192,7 +184,7 @@ struct vint4
 	/**
 	 * @brief Construct from zero-initialized value.
 	 */
-	ASTCENC_SIMD_INLINE vint4() {}
+	ASTCENC_SIMD_INLINE vint4() = default;
 
 	/**
 	 * @brief Construct from 4 values loaded from an unaligned address.
@@ -234,8 +226,8 @@ struct vint4
 	 */
 	ASTCENC_SIMD_INLINE explicit vint4(int a, int b, int c, int d)
 	{
-		int32x4_t v { a, b, c, d };
-		m = v;
+		int v[4] { a, b, c, d };
+		m = vld1q_s32(v);
 	}
 
 	/**
@@ -318,6 +310,7 @@ struct vmask4
 		m = a;
 	}
 
+#if !defined(_MSC_VER)
 	/**
 	 * @brief Construct from an existing SIMD register.
 	 */
@@ -325,23 +318,34 @@ struct vmask4
 	{
 		m = vreinterpretq_u32_s32(a);
 	}
+#endif
 
 	/**
-	 * @brief Construct from an existing SIMD register.
+	 * @brief Construct from 1 scalar value.
+	 */
+	ASTCENC_SIMD_INLINE explicit vmask4(bool a)
+	{
+		m = vreinterpretq_u32_s32(vdupq_n_s32(a == true ? -1 : 0));
+	}
+
+	/**
+	 * @brief Construct from 4 scalar values.
+	 *
+	 * The value of @c a is stored to lane 0 (LSB) in the SIMD register.
 	 */
 	ASTCENC_SIMD_INLINE explicit vmask4(bool a, bool b, bool c, bool d)
 	{
-		int32x4_t v {
+		int v[4] {
 			a == true ? -1 : 0,
 			b == true ? -1 : 0,
 			c == true ? -1 : 0,
 			d == true ? -1 : 0
 		};
 
-		m = vreinterpretq_u32_s32(v);
+		int32x4_t ms = vld1q_s32(v);
+		m = vreinterpretq_u32_s32(ms);
 	}
 
-
 	/**
 	 * @brief The vector ...
 	 */
@@ -391,7 +395,9 @@ ASTCENC_SIMD_INLINE vmask4 operator~(vmask4 a)
  */
 ASTCENC_SIMD_INLINE unsigned int mask(vmask4 a)
 {
-	static const int32x4_t shift { 0, 1, 2, 3 };
+	static const int shifta[4] { 0, 1, 2, 3 };
+	static const int32x4_t shift = vld1q_s32(shifta);
+
 	uint32x4_t tmp = vshrq_n_u32(a.m, 31);
 	return vaddvq_u32(vshlq_u32(tmp, shift));
 }
@@ -599,7 +605,7 @@ ASTCENC_SIMD_INLINE vint4 gatheri(const int* base, vint4 indices)
  */
 ASTCENC_SIMD_INLINE vint4 pack_low_bytes(vint4 a)
 {
-	alignas(16) uint8_t shuf[16] = {
+	alignas(16) uint8_t shuf[16] {
 		0, 4, 8, 12,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0
 	};
 	uint8x16_t idx = vld1q_u8(shuf);
diff --git a/libkram/astc-encoder/astcenc_vecmathlib_none_4.h b/libkram/astc-encoder/astcenc_vecmathlib_none_4.h
index 70c1757b..db489bce 100644
--- a/libkram/astc-encoder/astcenc_vecmathlib_none_4.h
+++ b/libkram/astc-encoder/astcenc_vecmathlib_none_4.h
@@ -40,7 +40,7 @@
 	#error "Include astcenc_vecmathlib.h, do not include directly"
 #endif
 
-//#include <algorithm>
+#include <algorithm>
 #include <cstdio>
 #include <cstring>
 #include <cfenv>
@@ -57,7 +57,7 @@ struct vfloat4
 	/**
 	 * @brief Construct from zero-initialized value.
 	 */
-	ASTCENC_SIMD_INLINE vfloat4() {}
+	ASTCENC_SIMD_INLINE vfloat4() = default;
 
 	/**
 	 * @brief Construct from 4 values loaded from an unaligned address.
@@ -150,9 +150,9 @@ struct vfloat4
 	/**
 	 * @brief Return a swizzled float 2.
 	 */
-	template <int l0, int l1> ASTCENC_SIMD_INLINE float2 swz() const
+	template <int l0, int l1> ASTCENC_SIMD_INLINE vfloat4 swz() const
 	{
-		return float2(lane<l0>(), lane<l1>());
+		return  vfloat4(lane<l0>(), lane<l1>(), 0.0f, 0.0f);
 	}
 
 	/**
@@ -189,7 +189,7 @@ struct vint4
 	/**
 	 * @brief Construct from zero-initialized value.
 	 */
-	ASTCENC_SIMD_INLINE vint4() {}
+	ASTCENC_SIMD_INLINE vint4() = default;
 
 	/**
 	 * @brief Construct from 4 values loaded from an unaligned address.
@@ -317,6 +317,17 @@ struct vmask4
 		m[3] = p[3];
 	}
 
+	/**
+	 * @brief Construct from 1 scalar value.
+	 */
+	ASTCENC_SIMD_INLINE explicit vmask4(bool a)
+	{
+		m[0] = a == false ? 0 : -1;
+		m[1] = a == false ? 0 : -1;
+		m[2] = a == false ? 0 : -1;
+		m[3] = a == false ? 0 : -1;
+	}
+
 	/**
 	 * @brief Construct from 4 scalar values.
 	 *
@@ -330,6 +341,7 @@ struct vmask4
 		m[3] = d == false ? 0 : -1;
 	}
 
+
 	/**
 	 * @brief The vector ...
 	 */
@@ -924,12 +936,10 @@ ASTCENC_SIMD_INLINE void storea(vfloat4 a, float* ptr)
  */
 ASTCENC_SIMD_INLINE vint4 float_to_int(vfloat4 a)
 {
-	// Casting to unsigned buys us an extra bit of precision in cases where
-	// we can use the integer as nasty bit hacks.
-	return vint4((unsigned int)a.m[0],
-	             (unsigned int)a.m[1],
-	             (unsigned int)a.m[2],
-	             (unsigned int)a.m[3]);
+	return vint4((int)a.m[0],
+	             (int)a.m[1],
+	             (int)a.m[2],
+	             (int)a.m[3]);
 }
 
 /**f
diff --git a/libkram/astc-encoder/astcenc_vecmathlib_sse_4.h b/libkram/astc-encoder/astcenc_vecmathlib_sse_4.h
index 4bb8ea96..89fc4837 100755
--- a/libkram/astc-encoder/astcenc_vecmathlib_sse_4.h
+++ b/libkram/astc-encoder/astcenc_vecmathlib_sse_4.h
@@ -52,7 +52,7 @@ struct vfloat4
 	/**
 	 * @brief Construct from zero-initialized value.
 	 */
-	ASTCENC_SIMD_INLINE vfloat4() {}
+	ASTCENC_SIMD_INLINE vfloat4() = default;
 
 	/**
 	 * @brief Construct from 4 values loaded from an unaligned address.
@@ -152,9 +152,12 @@ struct vfloat4
 	/**
 	 * @brief Return a swizzled float 2.
 	 */
-	template <int l0, int l1> ASTCENC_SIMD_INLINE float2 swz() const
+	template <int l0, int l1> ASTCENC_SIMD_INLINE vfloat4 swz() const
 	{
-		return float2(lane<l0>(), lane<l1>());
+		vfloat4 result(_mm_shuffle_ps(m, m, l0 | l1 << 2));
+		result.set_lane<2>(0.0f);
+		result.set_lane<3>(0.0f);
+		return result;
 	}
 
 	/**
@@ -193,7 +196,7 @@ struct vint4
 	/**
 	 * @brief Construct from zero-initialized value.
 	 */
-	ASTCENC_SIMD_INLINE vint4() {}
+	ASTCENC_SIMD_INLINE vint4() = default;
 
 	/**
 	 * @brief Construct from 4 values loaded from an unaligned address.
@@ -336,6 +339,15 @@ struct vmask4
 		m = _mm_castsi128_ps(a);
 	}
 
+	/**
+	 * @brief Construct from 1 scalar value.
+	 */
+	ASTCENC_SIMD_INLINE explicit vmask4(bool a)
+	{
+		vint4 mask(a == false ? 0 : -1);
+		m = _mm_castsi128_ps(mask.m);
+	}
+
 	/**
 	 * @brief Construct from 4 scalar values.
 	 *
diff --git a/libkram/astc-encoder/astcenc_weight_align.cpp b/libkram/astc-encoder/astcenc_weight_align.cpp
index 97da89d1..a5288357 100644
--- a/libkram/astc-encoder/astcenc_weight_align.cpp
+++ b/libkram/astc-encoder/astcenc_weight_align.cpp
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2021 Arm Limited
+// Copyright 2011-2022 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -22,26 +22,19 @@
  *
  * This algorithm works as follows:
  * - we compute a complex number P as (cos s*i, sin s*i) for each weight,
- *   where i is the input value and s is a scaling factor based on the spacing
- *   between the weights.
+ *   where i is the input value and s is a scaling factor based on the spacing between the weights.
  * - we then add together complex numbers for all the weights.
  * - we then compute the length and angle of the resulting sum.
  *
  * This should produce the following results:
- * - perfect alignment results in a vector whose length is equal to the sum of
- *   lengths of all inputs
+ * - perfect alignment results in a vector whose length is equal to the sum of lengths of all inputs
  * - even distribution results in a vector of length 0.
  * - all samples identical results in perfect alignment for every scaling.
  *
- * For each scaling factor within a given set, we compute an alignment factor
- * from 0 to 1. This should then result in some scalings standing out as having
- * particularly good alignment factors; we can use this to produce a set of
- * candidate scale/shift values for various quantization levels; we should then
- * actually try them and see what happens.
- *
- * Assuming N quantization steps, the scaling factor becomes s=2*PI*(N-1); we
- * should probably have about 1 scaling factor for every 1/4 quantization step
- * (perhaps 1/8 for low levels of quantization).
+ * For each scaling factor within a given set, we compute an alignment factor from 0 to 1. This
+ * should then result in some scalings standing out as having particularly good alignment factors;
+ * we can use this to produce a set of candidate scale/shift values for various quantization levels;
+ * we should then actually try them and see what happens.
  */
 
 #include "astcenc_internal.h"
@@ -51,34 +44,40 @@
 #include <cassert>
 #include <cstring>
 
-#define ANGULAR_STEPS 40
+
+static constexpr unsigned int ANGULAR_STEPS { 40 };
+
+// Store a reduced sin/cos table for 64 possible weight values; this causes slight quality loss
+// compared to using sin() and cos() directly. Must be 2^N.
+static constexpr unsigned int SINCOS_STEPS { 64 };
+
 static_assert((ANGULAR_STEPS % ASTCENC_SIMD_WIDTH) == 0,
               "ANGULAR_STEPS must be multiple of ASTCENC_SIMD_WIDTH");
 
-static int max_angular_steps_needed_for_quant_level[13];
+static unsigned int max_angular_steps_needed_for_quant_level[13];
 
-// Yes, the next-to-last entry is supposed to have the value 33. This because
-// the 32-weight mode leaves a double-sized hole in the middle of the weight
-// space, so we are better off matching 33 weights than 32.
-static const int quantization_steps_for_level[13] = {
+// The next-to-last entry is supposed to have the value 33. This because the 32-weight mode leaves a
+// double-sized hole in the middle of the weight space, so we are better off matching 33 weights.
+static const unsigned int quantization_steps_for_level[13] {
 	2, 3, 4, 5, 6, 8, 10, 12, 16, 20, 24, 33, 36
 };
 
-// Store a reduced sin/cos table for 64 possible weight values; this causes
-// slight quality loss compared to using sin() and cos() directly. Must be 2^N.
-#define SINCOS_STEPS 64
-
 alignas(ASTCENC_VECALIGN) static float sin_table[SINCOS_STEPS][ANGULAR_STEPS];
 alignas(ASTCENC_VECALIGN) static float cos_table[SINCOS_STEPS][ANGULAR_STEPS];
 
+#if defined(ASTCENC_DIAGNOSTICS)
+	static bool print_once { true };
+#endif
+
+/* See header for documentation. */
 void prepare_angular_tables()
 {
-	int max_angular_steps_needed_for_quant_steps[ANGULAR_STEPS + 1];
-	for (int i = 0; i < ANGULAR_STEPS; i++)
+	unsigned int max_angular_steps_needed_for_quant_steps[ANGULAR_STEPS + 1];
+	for (unsigned int i = 0; i < ANGULAR_STEPS; i++)
 	{
 		float angle_step = (float)(i + 1);
 
-		for (int j = 0; j < SINCOS_STEPS; j++)
+		for (unsigned int j = 0; j < SINCOS_STEPS; j++)
 		{
 			sin_table[j][i] = static_cast<float>(sinf((2.0f * astc::PI / (SINCOS_STEPS - 1.0f)) * angle_step * static_cast<float>(j)));
 			cos_table[j][i] = static_cast<float>(cosf((2.0f * astc::PI / (SINCOS_STEPS - 1.0f)) * angle_step * static_cast<float>(j)));
@@ -87,139 +86,143 @@ void prepare_angular_tables()
 		max_angular_steps_needed_for_quant_steps[i + 1] = astc::min(i + 1, ANGULAR_STEPS - 1);
 	}
 
-	for (int i = 0; i < 13; i++)
+	for (unsigned int i = 0; i < 13; i++)
 	{
 		max_angular_steps_needed_for_quant_level[i] = max_angular_steps_needed_for_quant_steps[quantization_steps_for_level[i]];
 	}
 }
 
-// function to compute angular sums; then, from the
-// angular sums, compute alignment factor and offset.
-
+/**
+ * @brief Compute the angular alignment factors and offsets.
+ *
+ * @param      weight_count              The number of (decimated) weights.
+ * @param      dec_weight_quant_uvalue   The decimated and quantized weight values.
+ * @param      max_angular_steps         The maximum number of steps to be tested.
+ * @param[out] offsets                   The output angular offsets array.
+ */
 static void compute_angular_offsets(
-	int samplecount,
-	const float* samples,
-	const float* sample_weights,
-	int max_angular_steps,
+	unsigned int weight_count,
+	const float* dec_weight_quant_uvalue,
+	unsigned int max_angular_steps,
 	float* offsets
 ) {
-	promise(samplecount > 0);
+	promise(weight_count > 0);
 	promise(max_angular_steps > 0);
 
-	alignas(ASTCENC_VECALIGN) float anglesum_x[ANGULAR_STEPS] { 0 };
-	alignas(ASTCENC_VECALIGN) float anglesum_y[ANGULAR_STEPS] { 0 };
+	alignas(ASTCENC_VECALIGN) int isamplev[BLOCK_MAX_WEIGHTS];
 
-	// compute the angle-sums.
-	for (int i = 0; i < samplecount; i++)
+	// Precompute isample; arrays are always allocated 64 elements long
+	for (unsigned int i = 0; i < weight_count; i += ASTCENC_SIMD_WIDTH)
 	{
-		float sample = samples[i];
-		float sample_weight = sample_weights[i];
-		if32 p;
-		p.f = (sample * (SINCOS_STEPS - 1.0f)) + 12582912.0f;
-		unsigned int isample = p.u & (SINCOS_STEPS - 1);
-
-		const float *sinptr = sin_table[isample];
-		const float *cosptr = cos_table[isample];
-
-		vfloat sample_weightv(sample_weight);
- 		// Arrays are multiple of SIMD width (ANGULAR_STEPS), safe to overshoot max
-		for (int j = 0; j < max_angular_steps; j += ASTCENC_SIMD_WIDTH)
-		{
-			vfloat cp = loada(&cosptr[j]);
-			vfloat sp = loada(&sinptr[j]);
-			vfloat ax = loada(&anglesum_x[j]) + cp * sample_weightv;
-			vfloat ay = loada(&anglesum_y[j]) + sp * sample_weightv;
-			storea(ax, &anglesum_x[j]);
-			storea(ay, &anglesum_y[j]);
-		}
+		// Add 2^23 and interpreting bits extracts round-to-nearest int
+		vfloat sample = loada(dec_weight_quant_uvalue + i) * (SINCOS_STEPS - 1.0f) + vfloat(12582912.0f);
+		vint isample = float_as_int(sample) & vint((SINCOS_STEPS - 1));
+		storea(isample, isamplev + i);
 	}
 
-	// post-process the angle-sums
+	// Arrays are multiple of SIMD width (ANGULAR_STEPS), safe to overshoot max
 	vfloat mult = vfloat(1.0f / (2.0f * astc::PI));
-	vfloat rcp_stepsize = vfloat::lane_id() + vfloat(1.0f);
- 	// Arrays are multiple of SIMD width (ANGULAR_STEPS), safe to overshoot max
-	for (int i = 0; i < max_angular_steps; i += ASTCENC_SIMD_WIDTH)
+
+	for (unsigned int i = 0; i < max_angular_steps; i += ASTCENC_SIMD_WIDTH)
 	{
-		vfloat ssize = 1.0f / rcp_stepsize;
-		rcp_stepsize = rcp_stepsize + vfloat(ASTCENC_SIMD_WIDTH);
-		vfloat angle = atan2(loada(&anglesum_y[i]), loada(&anglesum_x[i]));
-		vfloat ofs = angle * ssize * mult;
-		storea(ofs, &offsets[i]);
+		vfloat anglesum_x = vfloat::zero();
+		vfloat anglesum_y = vfloat::zero();
+
+		for (unsigned int j = 0; j < weight_count; j++)
+		{
+			int isample = isamplev[j];
+			anglesum_x += loada(cos_table[isample] + i);
+			anglesum_y += loada(sin_table[isample] + i);
+		}
+
+		vfloat angle = atan2(anglesum_y, anglesum_x);
+		vfloat ofs = angle * mult;
+		storea(ofs, offsets + i);
 	}
 }
 
-// for a given step-size and a given offset, compute the
-// lowest and highest weight that results from quantizing using the stepsize & offset.
-// also, compute the resulting error.
+/**
+ * @brief For a given step size compute the lowest and highest weight.
+ *
+ * Compute the lowest and highest weight that results from quantizing using the given stepsize and
+ * offset, and then compute the resulting error. The cut errors indicate the error that results from
+ * forcing samples that should have had one weight value one step up or down.
+ *
+ * @param      weight_count              The number of (decimated) weights.
+ * @param      dec_weight_quant_uvalue   The decimated and quantized weight values.
+ * @param      max_angular_steps         The maximum number of steps to be tested.
+ * @param      max_quant_steps           The maximum quantization level to be tested.
+ * @param      offsets                   The angular offsets array.
+ * @param[out] lowest_weight             Per angular step, the lowest weight.
+ * @param[out] weight_span               Per angular step, the span between lowest and highest weight.
+ * @param[out] error                     Per angular step, the error.
+ * @param[out] cut_low_weight_error      Per angular step, the low weight cut error.
+ * @param[out] cut_high_weight_error     Per angular step, the high weight cut error.
+ */
 static void compute_lowest_and_highest_weight(
-	int samplecount,
-	const float *samples,
-	const float *sample_weights,
-	int max_angular_steps,
-	int max_quantization_steps,
-	const float *offsets,
-	int32_t * lowest_weight,
-	int32_t * weight_span,
-	float *error,
-	float *cut_low_weight_error,
-	float *cut_high_weight_error
+	unsigned int weight_count,
+	const float* dec_weight_quant_uvalue,
+	unsigned int max_angular_steps,
+	unsigned int max_quant_steps,
+	const float* offsets,
+	int* lowest_weight,
+	int* weight_span,
+	float* error,
+	float* cut_low_weight_error,
+	float* cut_high_weight_error
 ) {
-	promise(samplecount > 0);
+	promise(weight_count > 0);
 	promise(max_angular_steps > 0);
 
 	vfloat rcp_stepsize = vfloat::lane_id() + vfloat(1.0f);
 
 	// Arrays are ANGULAR_STEPS long, so always safe to run full vectors
-	for (int sp = 0; sp < max_angular_steps; sp += ASTCENC_SIMD_WIDTH)
+	for (unsigned int sp = 0; sp < max_angular_steps; sp += ASTCENC_SIMD_WIDTH)
 	{
-		vint minidx(128);
-		vint maxidx(-128);
+		vfloat minidx(128.0f);
+		vfloat maxidx(-128.0f);
 		vfloat errval = vfloat::zero();
 		vfloat cut_low_weight_err = vfloat::zero();
 		vfloat cut_high_weight_err = vfloat::zero();
 		vfloat offset = loada(&offsets[sp]);
-		vfloat scaled_offset = rcp_stepsize * offset;
-		for (int j = 0; j < samplecount; ++j)
+
+		for (unsigned int j = 0; j < weight_count; ++j)
 		{
-			vfloat wt = load1(&sample_weights[j]);
-			vfloat sval = load1(&samples[j]) * rcp_stepsize - scaled_offset;
+			vfloat sval = load1(&dec_weight_quant_uvalue[j]) * rcp_stepsize - offset;
 			vfloat svalrte = round(sval);
-			vint idxv = float_to_int(svalrte);
-			vfloat dif = sval - svalrte;
-			vfloat dwt = dif * wt;
-			errval = errval + dwt * dif;
+			vfloat diff = sval - svalrte;
+			errval += diff * diff;
 
 			// Reset tracker on min hit
-			vmask mask = idxv < minidx;
-			minidx = select(minidx, idxv, mask);
+			vmask mask = svalrte < minidx;
+			minidx = select(minidx, svalrte, mask);
 			cut_low_weight_err = select(cut_low_weight_err, vfloat::zero(), mask);
 
 			// Accumulate on min hit
-			mask = idxv == minidx;
-			vfloat accum = cut_low_weight_err + wt - vfloat(2.0f) * dwt;
+			mask = svalrte == minidx;
+			vfloat accum = cut_low_weight_err + vfloat(1.0f) - vfloat(2.0f) * diff;
 			cut_low_weight_err = select(cut_low_weight_err, accum, mask);
 
 			// Reset tracker on max hit
-			mask = idxv > maxidx;
-			maxidx = select(maxidx, idxv, mask);
+			mask = svalrte > maxidx;
+			maxidx = select(maxidx, svalrte, mask);
 			cut_high_weight_err = select(cut_high_weight_err, vfloat::zero(), mask);
 
 			// Accumulate on max hit
-			mask = idxv == maxidx;
-			accum = cut_high_weight_err + wt + vfloat(2.0f) * dwt;
+			mask = svalrte == maxidx;
+			accum = cut_high_weight_err + vfloat(1.0f) + vfloat(2.0f) * diff;
 			cut_high_weight_err = select(cut_high_weight_err, accum, mask);
 		}
 
 		// Write out min weight and weight span; clamp span to a usable range
-		vint span = maxidx - minidx + vint(1);
-		span = min(span, vint(max_quantization_steps + 3));
+		vint span = float_to_int(maxidx - minidx + vfloat(1));
+		span = min(span, vint(max_quant_steps + 3));
 		span = max(span, vint(2));
-		storea(minidx, &lowest_weight[sp]);
+		storea(float_to_int(minidx), &lowest_weight[sp]);
 		storea(span, &weight_span[sp]);
 
-		// The cut_(lowest/highest)_weight_error indicate the error that
-		// results from  forcing samples that should have had the weight value
-		// one step (up/down).
+		// The cut_(lowest/highest)_weight_error indicate the error that results from  forcing
+		// samples that should have had the weight value one step (up/down).
 		vfloat ssize = 1.0f / rcp_stepsize;
 		vfloat errscale = ssize * ssize;
 		storea(errval * errscale, &error[sp]);
@@ -230,20 +233,28 @@ static void compute_lowest_and_highest_weight(
 	}
 }
 
-// main function for running the angular algorithm.
+/**
+ * @brief The main function for the angular algorithm.
+ *
+ * @param      weight_count              The number of (decimated) weights.
+ * @param      dec_weight_quant_uvalue   The decimated and quantized weight value.
+ * @param      max_quant_level           The maximum quantization level to be tested.
+ * @param[out] low_value                 Per angular step, the lowest weight value.
+ * @param[out] high_value                Per angular step, the highest weight value.
+ */
 static void compute_angular_endpoints_for_quant_levels(
-	int samplecount,
-	const float* samples,
-	const float* sample_weights,
-	int max_quant_level,
+	unsigned int weight_count,
+	const float* dec_weight_quant_uvalue,
+	unsigned int max_quant_level,
 	float low_value[12],
 	float high_value[12]
 ) {
-	int max_quantization_steps = quantization_steps_for_level[max_quant_level + 1];
+	unsigned int max_quant_steps = quantization_steps_for_level[max_quant_level];
 
 	alignas(ASTCENC_VECALIGN) float angular_offsets[ANGULAR_STEPS];
-	int max_angular_steps = max_angular_steps_needed_for_quant_level[max_quant_level];
-	compute_angular_offsets(samplecount, samples, sample_weights, max_angular_steps, angular_offsets);
+	unsigned int max_angular_steps = max_angular_steps_needed_for_quant_level[max_quant_level];
+	compute_angular_offsets(weight_count, dec_weight_quant_uvalue,
+	                        max_angular_steps, angular_offsets);
 
 	alignas(ASTCENC_VECALIGN) int32_t lowest_weight[ANGULAR_STEPS];
 	alignas(ASTCENC_VECALIGN) int32_t weight_span[ANGULAR_STEPS];
@@ -251,187 +262,367 @@ static void compute_angular_endpoints_for_quant_levels(
 	alignas(ASTCENC_VECALIGN) float cut_low_weight_error[ANGULAR_STEPS];
 	alignas(ASTCENC_VECALIGN) float cut_high_weight_error[ANGULAR_STEPS];
 
-	compute_lowest_and_highest_weight(samplecount, samples, sample_weights,
-	                                  max_angular_steps, max_quantization_steps,
+	compute_lowest_and_highest_weight(weight_count, dec_weight_quant_uvalue,
+	                                  max_angular_steps, max_quant_steps,
 	                                  angular_offsets, lowest_weight, weight_span, error,
 	                                  cut_low_weight_error, cut_high_weight_error);
 
-	// for each quantization level, find the best error terms.
-	float best_errors[40];
-	int best_scale[40];
-	uint8_t cut_low_weight[40];
-	for (int i = 0; i < (max_quantization_steps + 4); i++)
+	// For each quantization level, find the best error terms. Use packed vectors so data-dependent
+	// branches can become selects. This involves some integer to float casts, but the values are
+	// small enough so they never round the wrong way.
+	vfloat4 best_results[40];
+
+	// Initialize the array to some safe defaults
+	promise(max_quant_steps > 0);
+	for (unsigned int i = 0; i < (max_quant_steps + 4); i++)
 	{
-		best_scale[i] = -1;	// Indicates no solution found
-		best_errors[i] = 1e30f;
-		cut_low_weight[i] = 0;
+		// Lane<0> = Best error
+		// Lane<1> = Best scale; -1 indicates no solution found
+		// Lane<2> = Cut low weight
+		best_results[i] = vfloat4(ERROR_CALC_DEFAULT, -1.0f, 0.0f, 0.0f);
 	}
 
 	promise(max_angular_steps > 0);
-	for (int i = 0; i < max_angular_steps; i++)
+	for (unsigned int i = 0; i < max_angular_steps; i++)
 	{
 		int idx_span = weight_span[i];
-
-		if (best_errors[idx_span] > error[i])
-		{
-			best_errors[idx_span] = error[i];
-			best_scale[idx_span] = i;
-			cut_low_weight[idx_span] = 0;
-		}
-
 		float error_cut_low = error[i] + cut_low_weight_error[i];
 		float error_cut_high = error[i] + cut_high_weight_error[i];
 		float error_cut_low_high = error[i] + cut_low_weight_error[i] + cut_high_weight_error[i];
 
-		if (best_errors[idx_span - 1] > error_cut_low)
-		{
-			best_errors[idx_span - 1] = error_cut_low;
-			best_scale[idx_span - 1] = i;
-			cut_low_weight[idx_span - 1] = 1;
-		}
+		// Check best error against record N
+		vfloat4 best_result = best_results[idx_span];
+		vfloat4 new_result = vfloat4(error[i], (float)i, 0.0f, 0.0f);
+		vmask4 mask1(best_result.lane<0>() > error[i]);
+		best_results[idx_span] = select(best_result, new_result, mask1);
+
+		// Check best error against record N-1 with either cut low or cut high
+		best_result = best_results[idx_span - 1];
+
+		new_result = vfloat4(error_cut_low, (float)i, 1.0f, 0.0f);
+		vmask4 mask2(best_result.lane<0>() > error_cut_low);
+		best_result = select(best_result, new_result, mask2);
+
+		new_result = vfloat4(error_cut_high, (float)i, 0.0f, 0.0f);
+		vmask4 mask3(best_result.lane<0>() > error_cut_high);
+		best_results[idx_span - 1] = select(best_result, new_result, mask3);
+
+		// Check best error against record N-2 with both cut low and high
+		best_result = best_results[idx_span - 2];
+		new_result = vfloat4(error_cut_low_high, (float)i, 1.0f, 0.0f);
+		vmask4 mask4(best_result.lane<0>() > error_cut_low_high);
+		best_results[idx_span - 2] = select(best_result, new_result, mask4);
+	}
+
+	for (unsigned int i = 0; i <= max_quant_level; i++)
+	{
+		unsigned int q = quantization_steps_for_level[i];
+		int bsi = (int)best_results[q].lane<1>();
 
-		if (best_errors[idx_span - 1] > error_cut_high)
+		// Did we find anything?
+#if defined(ASTCENC_DIAGNOSTICS)
+		if ((bsi < 0) && print_once)
 		{
-			best_errors[idx_span - 1] = error_cut_high;
-			best_scale[idx_span - 1] = i;
-			cut_low_weight[idx_span - 1] = 0;
+			print_once = false;
+			printf("INFO: Unable to find full encoding within search error limit.\n\n");
 		}
+#endif
+
+		bsi = astc::max(0, bsi);
+
+		float stepsize = 1.0f / (1.0f + (float)bsi);
+		int lwi = lowest_weight[bsi] + (int)best_results[q].lane<2>();
+		int hwi = lwi + q - 1;
+
+		float offset = angular_offsets[bsi] * stepsize;
+		low_value[i] = offset + static_cast<float>(lwi) * stepsize;
+		high_value[i] = offset + static_cast<float>(hwi) * stepsize;
+	}
+}
+
+/**
+ * @brief For a given step size compute the lowest and highest weight, variant for low weight count.
+ *
+ * Compute the lowest and highest weight that results from quantizing using the given stepsize and
+ * offset, and then compute the resulting error. The cut errors indicate the error that results from
+ * forcing samples that should have had one weight value one step up or down.
+ *
+ * @param      weight_count              The number of (decimated) weights.
+ * @param      dec_weight_quant_uvalue   The decimated and quantized weight values.
+ * @param      max_angular_steps         The maximum number of steps to be tested.
+ * @param      max_quant_steps           The maximum quantization level to be tested.
+ * @param      offsets                   The angular offsets array.
+ * @param[out] lowest_weight             Per angular step, the lowest weight.
+ * @param[out] weight_span               Per angular step, the span between lowest and highest weight.
+ * @param[out] error                     Per angular step, the error.
+ */
+static void compute_lowest_and_highest_weight_lwc(
+	unsigned int weight_count,
+	const float* dec_weight_quant_uvalue,
+	unsigned int max_angular_steps,
+	unsigned int max_quant_steps,
+	const float* offsets,
+	int* lowest_weight,
+	int* weight_span,
+	float* error
+) {
+	promise(weight_count > 0);
+	promise(max_angular_steps > 0);
+
+	vfloat rcp_stepsize = vfloat::lane_id() + vfloat(1.0f);
+
+	// Arrays are ANGULAR_STEPS long, so always safe to run full vectors
+	for (unsigned int sp = 0; sp < max_angular_steps; sp += ASTCENC_SIMD_WIDTH)
+	{
+		vfloat minidx(128.0f);
+		vfloat maxidx(-128.0f);
+		vfloat errval = vfloat::zero();
+		vfloat offset = loada(&offsets[sp]);
 
-		if (best_errors[idx_span - 2] > error_cut_low_high)
+		for (unsigned int j = 0; j < weight_count; ++j)
 		{
-			best_errors[idx_span - 2] = error_cut_low_high;
-			best_scale[idx_span - 2] = i;
-			cut_low_weight[idx_span - 2] = 1;
+			vfloat sval = load1(&dec_weight_quant_uvalue[j]) * rcp_stepsize - offset;
+			vfloat svalrte = round(sval);
+			vfloat diff = sval - svalrte;
+			errval += diff * diff;
+
+			// Reset tracker on min hit
+			vmask mask = svalrte < minidx;
+			minidx = select(minidx, svalrte, mask);
+
+			// Reset tracker on max hit
+			mask = svalrte > maxidx;
+			maxidx = select(maxidx, svalrte, mask);
 		}
+
+		// Write out min weight and weight span; clamp span to a usable range
+		vint span = float_to_int(maxidx - minidx + vfloat(1.0f));
+		span = min(span, vint(max_quant_steps + 3));
+		span = max(span, vint(2));
+		storea(float_to_int(minidx), &lowest_weight[sp]);
+		storea(span, &weight_span[sp]);
+
+		// The cut_(lowest/highest)_weight_error indicate the error that results from  forcing
+		// samples that should have had the weight value one step (up/down).
+		vfloat ssize = 1.0f / rcp_stepsize;
+		vfloat errscale = ssize * ssize;
+		storea(errval * errscale, &error[sp]);
+
+		rcp_stepsize = rcp_stepsize + vfloat(ASTCENC_SIMD_WIDTH);
 	}
+}
+
+/**
+ * @brief The main function for the angular algorithm, variant for low weight count.
+ *
+ * @param      weight_count              The number of (decimated) weights.
+ * @param      dec_weight_quant_uvalue   The decimated and quantized weight value.
+ * @param      max_quant_level           The maximum quantization level to be tested.
+ * @param[out] low_value                 Per angular step, the lowest weight value.
+ * @param[out] high_value                Per angular step, the highest weight value.
+ */
+static void compute_angular_endpoints_for_quant_levels_lwc(
+	unsigned int weight_count,
+	const float* dec_weight_quant_uvalue,
+	unsigned int max_quant_level,
+	float low_value[12],
+	float high_value[12]
+) {
+	unsigned int max_quant_steps = quantization_steps_for_level[max_quant_level];
+	unsigned int max_angular_steps = max_angular_steps_needed_for_quant_level[max_quant_level];
 
-	// if we got a better error-value for a low sample count than for a high one,
-	// use the low sample count error value for the higher sample count as well.
-	for (int i = 3; i <= max_quantization_steps; i++)
+	alignas(ASTCENC_VECALIGN) float angular_offsets[ANGULAR_STEPS];
+	alignas(ASTCENC_VECALIGN) int32_t lowest_weight[ANGULAR_STEPS];
+	alignas(ASTCENC_VECALIGN) int32_t weight_span[ANGULAR_STEPS];
+	alignas(ASTCENC_VECALIGN) float error[ANGULAR_STEPS];
+
+	compute_angular_offsets(weight_count, dec_weight_quant_uvalue,
+	                        max_angular_steps, angular_offsets);
+
+
+	compute_lowest_and_highest_weight_lwc(weight_count, dec_weight_quant_uvalue,
+	                                      max_angular_steps, max_quant_steps,
+	                                      angular_offsets, lowest_weight, weight_span, error);
+
+	// For each quantization level, find the best error terms. Use packed vectors so data-dependent
+	// branches can become selects. This involves some integer to float casts, but the values are
+	// small enough so they never round the wrong way.
+	float best_error[ANGULAR_STEPS];
+	int best_index[ANGULAR_STEPS];
+
+	// Initialize the array to some safe defaults
+	promise(max_quant_steps > 0);
+	for (unsigned int i = 0; i < (max_quant_steps + 4); i++)
+	{
+		best_error[i] = ERROR_CALC_DEFAULT;
+		best_index[i] = -1;
+	}
+
+	promise(max_angular_steps > 0);
+	for (unsigned int i = 0; i < max_angular_steps; i++)
 	{
-		if (best_errors[i] > best_errors[i - 1])
+		int idx_span = weight_span[i];
+
+		// Check best error against record N
+		float current_best = best_error[idx_span];
+		if (error[i] < current_best)
 		{
-			best_errors[i] = best_errors[i - 1];
-			best_scale[i] = best_scale[i - 1];
-			cut_low_weight[i] = cut_low_weight[i - 1];
+			best_error[idx_span] = error[i];
+			best_index[idx_span] = i;
 		}
 	}
 
-	for (int i = 0; i <= max_quant_level; i++)
+	for (unsigned int i = 0; i <= max_quant_level; i++)
 	{
-		int q = quantization_steps_for_level[i];
-		int bsi = best_scale[q];
+		unsigned int q = quantization_steps_for_level[i];
+		int bsi = best_index[q];
 
 		// Did we find anything?
-		// TODO: Can we do better than bsi = 0 here. We should at least
-		// propagate an error (and move the printf into the CLI).
-#if defined(NDEBUG)
-		if (bsi < 0)
+#if defined(ASTCENC_DIAGNOSTICS)
+		if ((bsi < 0) && print_once)
 		{
-			printf("WARNING: Unable to find encoding within specified error limit\n");
-			bsi = 0;
+			print_once = false;
+			printf("INFO: Unable to find low weight encoding within search error limit.\n\n");
 		}
-else
-		bsi = astc::max(0, bsi);
 #endif
 
-		float stepsize = 1.0f / (1.0f + (float)bsi);
-		int lwi = lowest_weight[bsi] + cut_low_weight[q];
+		bsi = astc::max(0, bsi);
+
+		int lwi = lowest_weight[bsi];
 		int hwi = lwi + q - 1;
-		float offset = angular_offsets[bsi];
 
-		low_value[i] = offset + static_cast<float>(lwi) * stepsize;
-		high_value[i] = offset + static_cast<float>(hwi) * stepsize;
+		low_value[i]  = (angular_offsets[bsi] + static_cast<float>(lwi)) / (1.0f + (float)bsi);
+		high_value[i] = (angular_offsets[bsi] + static_cast<float>(hwi)) / (1.0f + (float)bsi);
 	}
 }
 
-// helper functions that will compute ideal angular-endpoints
-// for a given set of weights and a given block size descriptors
+/* See header for documentation. */
 void compute_angular_endpoints_1plane(
+	unsigned int tune_low_weight_limit,
 	bool only_always,
-	const block_size_descriptor* bsd,
-	const float* decimated_quantized_weights,
-	const float* decimated_weights,
-	float low_value[MAX_WEIGHT_MODES],
-	float high_value[MAX_WEIGHT_MODES]
+	const block_size_descriptor& bsd,
+	const float* dec_weight_quant_uvalue,
+	compression_working_buffers& tmpbuf
 ) {
-	float low_values[MAX_DECIMATION_MODES][12];
-	float high_values[MAX_DECIMATION_MODES][12];
+	float (&low_value)[WEIGHTS_MAX_BLOCK_MODES] = tmpbuf.weight_low_value1;
+	float (&high_value)[WEIGHTS_MAX_BLOCK_MODES] = tmpbuf.weight_high_value1;
 
-	for (int i = 0; i < bsd->decimation_mode_count; i++)
+	float (&low_values)[WEIGHTS_MAX_DECIMATION_MODES][12] = tmpbuf.weight_low_values1;
+	float (&high_values)[WEIGHTS_MAX_DECIMATION_MODES][12] = tmpbuf.weight_high_values1;
+
+	unsigned int max_decimation_modes = only_always ? bsd.always_decimation_mode_count
+	                                                : bsd.decimation_mode_count;
+	promise(max_decimation_modes > 0);
+	for (unsigned int i = 0; i < max_decimation_modes; i++)
 	{
-		const decimation_mode& dm = bsd->decimation_modes[i];
-		if (dm.maxprec_1plane < 0 || (only_always && !dm.percentile_always) || !dm.percentile_hit)
+		const decimation_mode& dm = bsd.decimation_modes[i];
+		if (dm.maxprec_1plane < 0 || !dm.percentile_hit)
 		{
 			continue;
 		}
 
-		int samplecount = bsd->decimation_tables[i]->weight_count;
-		compute_angular_endpoints_for_quant_levels(samplecount,
-		                                                  decimated_quantized_weights + i * MAX_WEIGHTS_PER_BLOCK,
-		                                                  decimated_weights + i * MAX_WEIGHTS_PER_BLOCK, dm.maxprec_1plane, low_values[i], high_values[i]);
+		unsigned int weight_count = bsd.get_decimation_info(i).weight_count;
+
+		if (weight_count < tune_low_weight_limit)
+		{
+			compute_angular_endpoints_for_quant_levels_lwc(
+				weight_count,
+				dec_weight_quant_uvalue + i * BLOCK_MAX_WEIGHTS,
+				dm.maxprec_1plane, low_values[i], high_values[i]);
+		}
+		else
+		{
+			compute_angular_endpoints_for_quant_levels(
+				weight_count,
+				dec_weight_quant_uvalue + i * BLOCK_MAX_WEIGHTS,
+				dm.maxprec_1plane, low_values[i], high_values[i]);
+		}
 	}
 
-	for (int i = 0; i < bsd->block_mode_count; ++i)
+	unsigned int max_block_modes = only_always ? bsd.always_block_mode_count
+	                                           : bsd.block_mode_count;
+	promise(max_block_modes > 0);
+	for (unsigned int i = 0; i < max_block_modes; ++i)
 	{
-		const block_mode& bm = bsd->block_modes[i];
-		if (bm.is_dual_plane || (only_always && !bm.percentile_always) || !bm.percentile_hit)
+		const block_mode& bm = bsd.block_modes[i];
+		if (bm.is_dual_plane || !bm.percentile_hit)
 		{
 			continue;
 		}
 
-		int quant_mode = bm.quant_mode;
-		int decim_mode = bm.decimation_mode;
+		unsigned int quant_mode = bm.quant_mode;
+		unsigned int decim_mode = bm.decimation_mode;
 
 		low_value[i] = low_values[decim_mode][quant_mode];
 		high_value[i] = high_values[decim_mode][quant_mode];
 	}
 }
 
+/* See header for documentation. */
 void compute_angular_endpoints_2planes(
-	bool only_always,
-	const block_size_descriptor* bsd,
-	const float* decimated_quantized_weights,
-	const float* decimated_weights,
-	float low_value1[MAX_WEIGHT_MODES],
-	float high_value1[MAX_WEIGHT_MODES],
-	float low_value2[MAX_WEIGHT_MODES],
-	float high_value2[MAX_WEIGHT_MODES]
+	unsigned int tune_low_weight_limit,
+	const block_size_descriptor& bsd,
+	const float* dec_weight_quant_uvalue,
+	compression_working_buffers& tmpbuf
 ) {
-	float low_values1[MAX_DECIMATION_MODES][12];
-	float high_values1[MAX_DECIMATION_MODES][12];
-	float low_values2[MAX_DECIMATION_MODES][12];
-	float high_values2[MAX_DECIMATION_MODES][12];
-
-	for (int i = 0; i < bsd->decimation_mode_count; i++)
+	float (&low_value1)[WEIGHTS_MAX_BLOCK_MODES] = tmpbuf.weight_low_value1;
+	float (&high_value1)[WEIGHTS_MAX_BLOCK_MODES] = tmpbuf.weight_high_value1;
+	float (&low_value2)[WEIGHTS_MAX_BLOCK_MODES] = tmpbuf.weight_low_value2;
+	float (&high_value2)[WEIGHTS_MAX_BLOCK_MODES] = tmpbuf.weight_high_value2;
+
+	float (&low_values1)[WEIGHTS_MAX_DECIMATION_MODES][12] = tmpbuf.weight_low_values1;
+	float (&high_values1)[WEIGHTS_MAX_DECIMATION_MODES][12] = tmpbuf.weight_high_values1;
+	float (&low_values2)[WEIGHTS_MAX_DECIMATION_MODES][12] = tmpbuf.weight_low_values2;
+	float (&high_values2)[WEIGHTS_MAX_DECIMATION_MODES][12] = tmpbuf.weight_high_values2;
+
+	promise(bsd.decimation_mode_count > 0);
+	for (unsigned int i = 0; i < bsd.decimation_mode_count; i++)
 	{
-		const decimation_mode& dm = bsd->decimation_modes[i];
-		if (dm.maxprec_2planes < 0 || (only_always && !dm.percentile_always) || !dm.percentile_hit)
+		const decimation_mode& dm = bsd.decimation_modes[i];
+		if (dm.maxprec_2planes < 0 || !dm.percentile_hit)
 		{
 			continue;
 		}
 
-		int samplecount = bsd->decimation_tables[i]->weight_count;
+		unsigned int weight_count = bsd.get_decimation_info(i).weight_count;
 
-		compute_angular_endpoints_for_quant_levels(samplecount,
-		                                           decimated_quantized_weights + 2 * i * MAX_WEIGHTS_PER_BLOCK,
-		                                           decimated_weights + 2 * i * MAX_WEIGHTS_PER_BLOCK, dm.maxprec_2planes, low_values1[i], high_values1[i]);
-
-		compute_angular_endpoints_for_quant_levels(samplecount,
-		                                           decimated_quantized_weights + (2 * i + 1) * MAX_WEIGHTS_PER_BLOCK,
-		                                           decimated_weights + (2 * i + 1) * MAX_WEIGHTS_PER_BLOCK, dm.maxprec_2planes, low_values2[i], high_values2[i]);
+		if (weight_count < tune_low_weight_limit)
+		{
+			compute_angular_endpoints_for_quant_levels_lwc(
+				weight_count,
+				dec_weight_quant_uvalue + i * BLOCK_MAX_WEIGHTS,
+				dm.maxprec_2planes, low_values1[i], high_values1[i]);
+
+			compute_angular_endpoints_for_quant_levels_lwc(
+				weight_count,
+				dec_weight_quant_uvalue + i * BLOCK_MAX_WEIGHTS + WEIGHTS_PLANE2_OFFSET,
+				dm.maxprec_2planes, low_values2[i], high_values2[i]);
+		}
+		else
+		{
+			compute_angular_endpoints_for_quant_levels(
+				weight_count,
+				dec_weight_quant_uvalue + i * BLOCK_MAX_WEIGHTS,
+				dm.maxprec_2planes, low_values1[i], high_values1[i]);
+
+			compute_angular_endpoints_for_quant_levels(
+				weight_count,
+				dec_weight_quant_uvalue + i * BLOCK_MAX_WEIGHTS + WEIGHTS_PLANE2_OFFSET,
+				dm.maxprec_2planes, low_values2[i], high_values2[i]);
+		}
 	}
 
-	for (int i = 0; i < bsd->block_mode_count; ++i)
+	promise(bsd.block_mode_count > 0);
+	for (unsigned int i = 0; i < bsd.block_mode_count; ++i)
 	{
-		const block_mode& bm = bsd->block_modes[i];
-		if ((!bm.is_dual_plane) || (only_always && !bm.percentile_always) || !bm.percentile_hit)
+		const block_mode& bm = bsd.block_modes[i];
+		if (!bm.is_dual_plane || !bm.percentile_hit)
 		{
 			continue;
 		}
 
-		int quant_mode = bm.quant_mode;
-		int decim_mode = bm.decimation_mode;
+		unsigned int quant_mode = bm.quant_mode;
+		unsigned int decim_mode = bm.decimation_mode;
 
 		low_value1[i] = low_values1[decim_mode][quant_mode];
 		high_value1[i] = high_values1[decim_mode][quant_mode];
diff --git a/libkram/astc-encoder/astcenc_weight_quant_xfer_tables.cpp b/libkram/astc-encoder/astcenc_weight_quant_xfer_tables.cpp
index d2191496..9501f787 100644
--- a/libkram/astc-encoder/astcenc_weight_quant_xfer_tables.cpp
+++ b/libkram/astc-encoder/astcenc_weight_quant_xfer_tables.cpp
@@ -21,10 +21,10 @@
 
 #include "astcenc_internal.h"
 
-#define _ 0 // using _ to indicate an entry that will not be used.
+#define _ 0 // Using _ to indicate an entry that will not be used.
 
-const quantization_and_transfer_table quant_and_xfer_tables[12] = {
-	// quantization method 0, range 0..1
+const quantization_and_transfer_table quant_and_xfer_tables[12] {
+	// Quantization method 0, range 0..1
 	{
 		QUANT_2,
 		{0, 64, 255},
@@ -34,7 +34,7 @@ const quantization_and_transfer_table quant_and_xfer_tables[12] = {
 		 _,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,
 		 0x01004000}
 	},
-	// quantization method 1, range 0..2
+	// Quantization method 1, range 0..2
 	{
 		QUANT_3,
 		{0, 32, 64, 255},
@@ -44,7 +44,7 @@ const quantization_and_transfer_table quant_and_xfer_tables[12] = {
 		 _,_,0x02004000,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,
 		 _,_,_,_,0x02014020}
 	},
-	// quantization method 2, range 0..3
+	// Quantization method 2, range 0..3
 	{
 		QUANT_4,
 		{0, 21, 43, 64, 255},
@@ -54,7 +54,7 @@ const quantization_and_transfer_table quant_and_xfer_tables[12] = {
 		 _,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,0x03014015,_,_,_,_,_,_,_,_,_,_,_,_,
 		 _,_,_,_,_,_,_,_,0x0302402b}
 	},
-	// quantization method 3, range 0..4
+	// Quantization method 3, range 0..4
 	{
 		QUANT_5,
 		{0, 16, 32, 48, 64, 255},
@@ -64,7 +64,7 @@ const quantization_and_transfer_table quant_and_xfer_tables[12] = {
 		 _,_,_,_,_,_,0x03013010,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,0x04024020,_,_,_,
 		 _,_,_,_,_,_,_,_,_,_,_,_,0x04034030}
 	},
-	// quantization method 4, range 0..5
+	// Quantization method 4, range 0..5
 	{
 		QUANT_6,
 		{0, 12, 25, 39, 52, 64, 255},
@@ -74,7 +74,7 @@ const quantization_and_transfer_table quant_and_xfer_tables[12] = {
 		 0x0502270c,_,_,_,_,_,_,_,_,_,_,_,_,_,0x03043419,_,_,_,_,_,_,_,_,_,_,
 		 _,_,0x01054027,_,_,_,_,_,_,_,_,_,_,_,0x01034034}
 	},
-	// quantization method 5, range 0..7
+	// Quantization method 5, range 0..7
 	{
 		QUANT_8,
 		{0, 9, 18, 27, 37, 46, 55, 64, 255},
@@ -84,7 +84,7 @@ const quantization_and_transfer_table quant_and_xfer_tables[12] = {
 		 _,_,_,_,_,_,0x04022512,_,_,_,_,_,_,_,_,_,0x05032e1b,_,_,_,_,_,_,_,_,
 		 0x06043725,_,_,_,_,_,_,_,_,0x0705402e,_,_,_,_,_,_,_,_,0x07064037}
 	},
-	// quantization method 6, range 0..9
+	// Quantization method 6, range 0..9
 	{
 		QUANT_10,
 		{0, 7, 14, 21, 28, 36, 43, 50, 57, 64, 255},
@@ -95,7 +95,7 @@ const quantization_and_transfer_table quant_and_xfer_tables[12] = {
 		 _,0x05093224,_,_,_,_,_,_,0x0307392b,_,_,_,_,_,_,0x01054032,_,_,_,_,_,
 		 _,0x01034039}
 	},
-	// quantization method 7, range 0..11
+	// Quantization method 7, range 0..11
 	{
 		QUANT_12,
 		{0, 5, 11, 17, 23, 28, 36, 41, 47, 53, 59, 64, 255},
@@ -106,7 +106,7 @@ const quantization_and_transfer_table quant_and_xfer_tables[12] = {
 		 0x070a291c,_,_,_,_,0x030b2f24,_,_,_,_,_,0x09073529,_,_,_,_,_,
 		 0x05033b2f,_,_,_,_,_,0x01094035,_,_,_,_,0x0105403b}
 	},
-	// quantization method 8, range 0..15
+	// Quantization method 8, range 0..15
 	{
 		QUANT_16,
 		{0, 4, 8, 12, 17, 21, 25, 29, 35, 39, 43, 47, 52, 56, 60, 64, 255},
@@ -117,7 +117,7 @@ const quantization_and_transfer_table quant_and_xfer_tables[12] = {
 		 _,0x0907271d,_,_,_,0x0a082b23,_,_,_,0x0b092f27,_,_,_,0x0c0a342b,_,_,_,
 		 _,0x0d0b382f,_,_,_,0x0e0c3c34,_,_,_,0x0f0d4038,_,_,_,0x0f0e403c}
 	},
-	// quantization method 9, range 0..19
+	// Quantization method 9, range 0..19
 	{
 		QUANT_20,
 		{0, 3, 6, 9, 13, 16, 19, 23, 26, 29, 35, 38, 41, 45, 48, 51, 55, 58,
@@ -131,7 +131,7 @@ const quantization_and_transfer_table quant_and_xfer_tables[12] = {
 		 0x070f2d26,_,_,_,0x030b3029,_,_,0x1107332d,_,_,0x0d033730,_,_,_,
 		 0x09113a33,_,_,0x050d3d37,_,_,0x0109403a,_,_,0x0105403d}
 	},
-	// quantization method 10, range 0..23
+	// Quantization method 10, range 0..23
 	{
 		QUANT_24,
 		{0, 2, 5, 8, 11, 13, 16, 19, 22, 24, 27, 30, 34, 37, 40, 42, 45, 48,
@@ -147,7 +147,7 @@ const quantization_and_transfer_table quant_and_xfer_tables[12] = {
 		 _,_,0x0b053530,_,0x03133833,_,_,0x110b3b35,_,_,0x09033e38,_,_,
 		 0x0111403b,_,0x0109403e}
 	},
-	// quantization method 11, range 0..31
+	// Quantization method 11, range 0..31
 	{
 		QUANT_32,
 		{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 34, 36, 38,
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 764bd149..613c9917 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -414,6 +414,121 @@ unsigned LodepngDeflateUsingMiniz(
     return result;
 }
 
+struct IccProfileTag
+{
+    uint32_t type, offset, size;
+};
+
+static void swapEndianUint32(uint32_t& x) {
+    x =  ((x << 24) & 0xff000000 ) |
+         ((x <<  8) & 0x00ff0000 ) |
+         ((x >>  8) & 0x0000ff00 ) |
+         ((x >> 24) & 0x000000ff );
+}
+
+// https://github.com/lvandeve/lodepng/blob/master/pngdetail.cpp
+static int getICCInt32(const unsigned char* icc, size_t size, size_t pos) {
+  if (pos + 4 > size) return 0;
+    
+  // this is just swapEndianUint32 in byte form
+  return (int)((icc[pos] << 24) | (icc[pos + 1] << 16) | (icc[pos + 2] << 8) | (icc[pos + 3] << 0));
+}
+
+static float getICC15Fixed16(const unsigned char* icc, size_t size, size_t pos) {
+  return getICCInt32(icc, size, pos) / 65536.0;
+}
+
+// this is all big-endian, so needs swapped, 132 bytes total
+struct IccProfileHeader
+{
+    uint32_t size; // 0
+    uint32_t cmmType; // 4
+    uint32_t version; // 8
+    uint32_t deviceClass; // 12
+    uint32_t inputSpace; // 16
+    uint32_t outputSpace; // 20
+    uint16_t date[6]; // 24
+    uint32_t signature, platform, flags; // 36
+    uint32_t deviceManufacturer, deviceModel, deviceAttributes[2]; // 48
+    uint32_t renderingIntent; // 64
+    uint32_t psx, psy, psz; // 68 - fixed-point float, illuminant
+    uint32_t creator; // 80
+    uint32_t md5[4]; // 84
+    uint32_t padding[7]; // 100
+    uint32_t numTags; // 128
+};
+static_assert( sizeof(IccProfileHeader) == 132, "invalid IccProfileHeader");
+
+#define MAKEFOURCC(str)                                                       \
+    ((uint32_t)(uint8_t)(str[0]) | ((uint32_t)(uint8_t)(str[1]) << 8) |       \
+    ((uint32_t)(uint8_t)(str[2]) << 16) | ((uint32_t)(uint8_t)(str[3]) << 24 ))
+
+
+// this must be run after deflate if profile is compressed
+bool parseIccProfile(const uint8_t* data, uint32_t dataSize, bool& isSrgb)
+{
+    isSrgb = false;
+    
+    if (dataSize < sizeof(IccProfileHeader))
+        return false;
+
+    // copy header so can endianSwap it
+    IccProfileHeader header = *(const IccProfileHeader*)data;
+    
+    // convert big to little endian
+    swapEndianUint32(header.size);
+    swapEndianUint32(header.numTags);
+    
+    IccProfileTag* tags = (IccProfileTag*)(data + sizeof(IccProfileHeader));
+
+    for (uint32_t i = 0; i < header.numTags; ++i) {
+        IccProfileTag tag = tags[i];
+        swapEndianUint32(tag.offset);
+        swapEndianUint32(tag.size);
+        
+        // There's also tag.name which is 'wtpt' and others.
+        // Open a .icc profile to see all these names
+        
+        uint32_t datatype = *(const uint32_t*)(data + tag.offset);
+        
+        switch(datatype) {
+            case MAKEFOURCC("XYZ "): {
+                float x = getICC15Fixed16(data, dataSize, tag.offset + 8);
+                float y = getICC15Fixed16(data, dataSize, tag.offset + 12);
+                float z = getICC15Fixed16(data, dataSize, tag.offset + 16);
+
+                // types include rXYZ, gXYZ, bXYZ, wtpt,
+                // can srgb be based on white-point, seems cheesy
+                if (tag.type == MAKEFOURCC("wtpt") && x == 0.950454711f && y == 1.0f && z == 1.08905029f) {
+                    isSrgb = true;
+                    return true;
+                }
+                break;
+            }
+            case MAKEFOURCC("curv"):
+                break;
+            case MAKEFOURCC("para"):
+                break;
+            case MAKEFOURCC("chrm"):
+                break;
+                
+            case MAKEFOURCC("vcgp"):
+            case MAKEFOURCC("text"):
+            case MAKEFOURCC("mluc"):
+            case MAKEFOURCC("desc"):
+                break;
+        }
+    }
+    
+    return true;
+}
+   
+bool isIccProfileSrgb(const uint8_t* data, uint32_t dataSize) {
+    bool isSrgb = false;
+    parseIccProfile(data, dataSize, isSrgb);
+    return isSrgb;
+}
+
 bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray, bool& isSrgb, Image& sourceImage)
 {
     uint32_t width = 0;
@@ -448,6 +563,20 @@ bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray
         isSrgb = state.info_png.srgb_defined;
     }
     
+    /*
+    if (!chunkData) {
+        chunkData = lodepng_chunk_find_const(data, end, "iCCP");
+        if (chunkData) {
+            lodepng_inspect_chunk(&state, chunkData - data, data, end-data);
+            if (state.info_png.iccp_defined) {
+                if (!isSrgb)
+                    isSrgb = isIccProfileSrgb(state.info_png.iccp_profile, state.info_png.iccp_profile_size);
+            }
+                
+        }
+    }
+    */
+    
     if (!chunkData) {
         chunkData = lodepng_chunk_find_const(data, end, "gAMA");
         if (chunkData) {
@@ -461,30 +590,24 @@ bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray
     }
     
     if (!chunkData) {
-        chunkData = lodepng_chunk_find_const(data, end, "iCCP");
+        chunkData = lodepng_chunk_find_const(data, end, "cHRM");
         if (chunkData) {
             lodepng_inspect_chunk(&state, chunkData - data, data, end-data);
-            if (state.info_png.iccp_defined) {
-                // TODO: other profile names
+            if (state.info_png.chrm_defined) {
                 if (!isSrgb)
-                    isSrgb = strcmp(state.info_png.iccp_name, "sRGB ICE61966-2.1") == 0;
+                    isSrgb =
+                        state.info_png.chrm_red_x == 64000 &&
+                        state.info_png.chrm_red_y == 33000 &&
+                        state.info_png.chrm_green_x == 30000 &&
+                        state.info_png.chrm_green_y == 60000 &&
+                        state.info_png.chrm_blue_x == 15000 &&
+                        state.info_png.chrm_blue_y == 6000 &&
+                        state.info_png.chrm_white_x == 31720 &&
+                        state.info_png.chrm_white_y == 32900;
             }
-                
         }
     }
     
-//    if (!chunkData) {
-//        chunkData = lodepng_chunk_find_const(data, end, "cHRM");
-//        if (chunkData) {
-//            lodepng_inspect_chunk(&state, chunkData - data, data, end-data);
-//            if (state.info_png.chrm_defined) {
-//                if (!isSrgb)
-//                // isSrgb = tate.info_png.chrm_red_x == ...;
-//            }
-//
-//        }
-//    }
-    
     // don't convert png bit depths, but can convert pallete data
     //    if (state.info_png.color.bitdepth != 8) {
     //        return false;
@@ -1717,6 +1840,22 @@ string kramInfoPNGToString(const string& srcFilename, const uint8_t* data, uint6
         isSrgb = state.info_png.srgb_defined;
     }
     
+    /* TODO: fix this parsing, iccp holds vast quantities of data we don't care about
+     
+    // Adobe Photoshop 2022 only sets iccp + gama instead of sRGB flag, but iccp takes
+    // priority to gama block.
+    if (!chunkData) {
+        chunkData = lodepng_chunk_find_const(data, end, "iCCP");
+        if (chunkData) {
+            lodepng_inspect_chunk(&state, chunkData - data, data, end-data);
+            if (state.info_png.iccp_defined) {
+                if (!isSrgb)
+                    isSrgb = isIccProfileSrgb(state.info_png.iccp_profile, state.info_png.iccp_profile_size);
+            }
+                
+        }
+    }
+    */
     if (!chunkData) {
         chunkData = lodepng_chunk_find_const(data, end, "gAMA");
         if (chunkData) {
@@ -1730,29 +1869,24 @@ string kramInfoPNGToString(const string& srcFilename, const uint8_t* data, uint6
     }
     
     if (!chunkData) {
-        chunkData = lodepng_chunk_find_const(data, end, "iCCP");
+        chunkData = lodepng_chunk_find_const(data, data + dataSize, "cHRM");
         if (chunkData) {
             lodepng_inspect_chunk(&state, chunkData - data, data, end-data);
-            if (state.info_png.iccp_defined) {
+            if (state.info_png.chrm_defined) {
                 if (!isSrgb)
-                    isSrgb = strcmp(state.info_png.iccp_name, "sRGB ICE61966-2.1") == 0;
+                    isSrgb =
+                        state.info_png.chrm_red_x == 64000 &&
+                        state.info_png.chrm_red_y == 33000 &&
+                        state.info_png.chrm_green_x == 30000 &&
+                        state.info_png.chrm_green_y == 60000 &&
+                        state.info_png.chrm_blue_x == 15000 &&
+                        state.info_png.chrm_blue_y == 6000 &&
+                        state.info_png.chrm_white_x == 31720 &&
+                        state.info_png.chrm_white_y == 32900;
             }
-                
         }
     }
     
-//    if (!chunkData) {
-//        chunkData = lodepng_chunk_find_const(data, data + dataSize, "cHRM");
-//        if (chunkData) {
-//            lodepng_inspect_chunk(&state, chunkData - data, data, end-data);
-//            if (state.info_png.chrm_defined) {
-//                if (!isSrgb)
-//                // isSrgb = strcmp(state.info_png.chrm_red_x, "Srgb") == 0;
-//            }
-//
-//        }
-//    }
-    
     // TODO: also bkgd blocks.
     
     
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index a3fb39a1..8bcd1a35 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -904,7 +904,7 @@ bool KramDecoder::decodeBlocks(
             // no swizzle
             astcenc_swizzle swizzleDecode = {ASTCENC_SWZ_R, ASTCENC_SWZ_G, ASTCENC_SWZ_B, ASTCENC_SWZ_A};
 
-            error = astcenc_decompress_image(codec_context, srcData, srcDataLength, &dstImageASTC, swizzleDecode, 0);
+            error = astcenc_decompress_image(codec_context, srcData, srcDataLength, &dstImageASTC, &swizzleDecode, 0);
 
             astcenc_context_free(codec_context);
 
@@ -3000,7 +3000,7 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
             }
 #else
             error = astcenc_compress_image(
-                codec_context, &srcImage, swizzleEncode,
+                codec_context, &srcImage, &swizzleEncode,
                 outputTexture.data.data(), mipStorageSize,
                 0);  // threadIndex
 #endif

From 482709798cb16777361da567ece4eb78c3bbbec7 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 12 Mar 2022 15:59:17 -0800
Subject: [PATCH 258/901] kram - more iccp work, simplify MAKEFOURCC

---
 libkram/kram/Kram.cpp          | 103 ++++++++++++++++++++++++---------
 libkram/kram/KramDDSHelper.cpp |  12 ++--
 2 files changed, 82 insertions(+), 33 deletions(-)

diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 613c9917..7aac18ae 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -414,6 +414,13 @@ unsigned LodepngDeflateUsingMiniz(
     return result;
 }
 
+//-----------------------
+
+// TODO: fix this to identify srgb, otherwise will skip GAMA block
+// no docs on how to identify srgb from iccp, ImageMagick might
+// have code for this.
+static const bool doParseIccProfile = false;
+
 struct IccProfileTag
 {
     uint32_t type, offset, size;
@@ -442,17 +449,21 @@ static float getICC15Fixed16(const unsigned char* icc, size_t size, size_t pos)
 struct IccProfileHeader
 {
     uint32_t size; // 0
-    uint32_t cmmType; // 4
+    uint32_t cmmType; // 4 - 'appl'
     uint32_t version; // 8
-    uint32_t deviceClass; // 12
-    uint32_t inputSpace; // 16
-    uint32_t outputSpace; // 20
+    uint32_t deviceClass; // 12 - 'mntr'
+    uint32_t inputSpace; // 16 - 'RGB '
+    uint32_t outputSpace; // 20 - 'XYZ '
     uint16_t date[6]; // 24
-    uint32_t signature, platform, flags; // 36
-    uint32_t deviceManufacturer, deviceModel, deviceAttributes[2]; // 48
+    uint32_t signature; // 30 - 'ascp'
+    uint32_t platform; // 32 - 'APPL'
+    uint32_t flags; // 36
+    uint32_t deviceManufacturer; // 40 - 'APPL'
+    uint32_t deviceModel;
+    uint32_t deviceAttributes[2]; // 48
     uint32_t renderingIntent; // 64
     uint32_t psx, psy, psz; // 68 - fixed-point float, illuminant
-    uint32_t creator; // 80
+    uint32_t creator; // 80 - 'appl'
     uint32_t md5[4]; // 84
     uint32_t padding[7]; // 100
     uint32_t numTags; // 128
@@ -469,16 +480,26 @@ bool parseIccProfile(const uint8_t* data, uint32_t dataSize, bool& isSrgb)
 {
     isSrgb = false;
     
-    if (dataSize < sizeof(IccProfileHeader))
+    // should look at other blocks if this is false
+    if (dataSize < sizeof(IccProfileHeader)) {
         return false;
-
+    }
+    
     // copy header so can endianSwap it
     IccProfileHeader header = *(const IccProfileHeader*)data;
-    
     // convert big to little endian
     swapEndianUint32(header.size);
     swapEndianUint32(header.numTags);
     
+    if (header.signature != MAKEFOURCC("acsp")) {
+        return false;
+    }
+
+    if (header.deviceModel == MAKEFOURCC("sRGB")) {
+        isSrgb = true;
+        return true;
+    }
+    
     IccProfileTag* tags = (IccProfileTag*)(data + sizeof(IccProfileHeader));
 
     for (uint32_t i = 0; i < header.numTags; ++i) {
@@ -493,29 +514,59 @@ bool parseIccProfile(const uint8_t* data, uint32_t dataSize, bool& isSrgb)
         
         switch(datatype) {
             case MAKEFOURCC("XYZ "): {
-                float x = getICC15Fixed16(data, dataSize, tag.offset + 8);
-                float y = getICC15Fixed16(data, dataSize, tag.offset + 12);
-                float z = getICC15Fixed16(data, dataSize, tag.offset + 16);
-
-                // types include rXYZ, gXYZ, bXYZ, wtpt,
-                // can srgb be based on white-point, seems cheesy
-                if (tag.type == MAKEFOURCC("wtpt") && x == 0.950454711f && y == 1.0f && z == 1.08905029f) {
-                    isSrgb = true;
-                    return true;
+                if (tag.type == MAKEFOURCC("wtpt")) {
+                    float x = getICC15Fixed16(data, dataSize, tag.offset + 8);
+                    float y = getICC15Fixed16(data, dataSize, tag.offset + 12);
+                    float z = getICC15Fixed16(data, dataSize, tag.offset + 16);
+
+                    // types include rXYZ, gXYZ, bXYZ, wtpt,
+                    // can srgb be based on white-point, seems cheesy
+                    // Media hitepoint trisimulus
+                    if (x == 0.950454711f && y == 1.0f && z == 1.08905029f) {
+                        isSrgb = true;
+                        return true;
+                    }
                 }
                 break;
             }
             case MAKEFOURCC("curv"):
+                // rTRC, gTRC, bTRC
                 break;
             case MAKEFOURCC("para"):
+                // aarg, aagg, aabg,
+                break;
+            case MAKEFOURCC("vcgt"):
+                // vcgt,
+                break;
+            case MAKEFOURCC("vcgp"):
+                // vcgp,
+                break;
+            case MAKEFOURCC("mmod"):
+                // mmod,
+                break;
+            case MAKEFOURCC("ndin"):
+                // ndin,
                 break;
             case MAKEFOURCC("chrm"):
                 break;
+            case MAKEFOURCC("sf32"):
+                // chad - chromatic adaptation matrix
+                break;
+                
+            case MAKEFOURCC("mAB "):
+                // A2B0, A2B1 - Intent-0/1, device to PCS table
+            case MAKEFOURCC("mBA "):
+                // B2A0, B2A1 - Intent-0/1, PCS to device table
+            case MAKEFOURCC("sig "):
+                // rig0
                 
-            case MAKEFOURCC("vcgp"):
             case MAKEFOURCC("text"):
             case MAKEFOURCC("mluc"):
+                // muti-localizaed description strings
             case MAKEFOURCC("desc"):
+                // cprt, dscm
+                // desc/desc or mluc/desc
+                // copyright and en description
                 break;
         }
     }
@@ -529,6 +580,8 @@ bool isIccProfileSrgb(const uint8_t* data, uint32_t dataSize) {
     return isSrgb;
 }
 
+//-----------------------
+
 bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray, bool& isSrgb, Image& sourceImage)
 {
     uint32_t width = 0;
@@ -563,8 +616,7 @@ bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray
         isSrgb = state.info_png.srgb_defined;
     }
     
-    /*
-    if (!chunkData) {
+    if (doParseIccProfile && !chunkData) {
         chunkData = lodepng_chunk_find_const(data, end, "iCCP");
         if (chunkData) {
             lodepng_inspect_chunk(&state, chunkData - data, data, end-data);
@@ -575,7 +627,6 @@ bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray
                 
         }
     }
-    */
     
     if (!chunkData) {
         chunkData = lodepng_chunk_find_const(data, end, "gAMA");
@@ -1840,11 +1891,9 @@ string kramInfoPNGToString(const string& srcFilename, const uint8_t* data, uint6
         isSrgb = state.info_png.srgb_defined;
     }
     
-    /* TODO: fix this parsing, iccp holds vast quantities of data we don't care about
-     
     // Adobe Photoshop 2022 only sets iccp + gama instead of sRGB flag, but iccp takes
     // priority to gama block.
-    if (!chunkData) {
+    if (doParseIccProfile && !chunkData) {
         chunkData = lodepng_chunk_find_const(data, end, "iCCP");
         if (chunkData) {
             lodepng_inspect_chunk(&state, chunkData - data, data, end-data);
@@ -1855,7 +1904,7 @@ string kramInfoPNGToString(const string& srcFilename, const uint8_t* data, uint6
                 
         }
     }
-    */
+    
     if (!chunkData) {
         chunkData = lodepng_chunk_find_const(data, end, "gAMA");
         if (chunkData) {
diff --git a/libkram/kram/KramDDSHelper.cpp b/libkram/kram/KramDDSHelper.cpp
index 92fe1839..651e4a21 100644
--- a/libkram/kram/KramDDSHelper.cpp
+++ b/libkram/kram/KramDDSHelper.cpp
@@ -12,9 +12,9 @@ using namespace NAMESPACE_STL;
 
 const uint32_t DDS_MAGIC = 0x20534444; // "DDS "
 
-#define MAKEFOURCC(ch0, ch1, ch2, ch3)                              \
-    ((uint32_t)(uint8_t)(ch0) | ((uint32_t)(uint8_t)(ch1) << 8) |       \
-    ((uint32_t)(uint8_t)(ch2) << 16) | ((uint32_t)(uint8_t)(ch3) << 24 ))
+#define MAKEFOURCC(str)                                                       \
+    ((uint32_t)(uint8_t)(str[0]) | ((uint32_t)(uint8_t)(str[1]) << 8) |       \
+    ((uint32_t)(uint8_t)(str[2]) << 16) | ((uint32_t)(uint8_t)(str[3]) << 24 ))
 
 enum DDS_FLAGS : uint32_t
 {
@@ -55,7 +55,7 @@ enum DDS_FLAGS : uint32_t
     DDS_DIMENSION_TEXTURE2D = 3,
     DDS_DIMENSION_TEXTURE3D = 4,
     
-    FOURCC_DX10 = MAKEFOURCC('D', 'X', '1', '0'),
+    FOURCC_DX10 = MAKEFOURCC("DX10"),
     
     // dx10 misc2 flags
     DDS_ALPHA_MODE_UNKNOWN = 0,
@@ -65,8 +65,8 @@ enum DDS_FLAGS : uint32_t
     DDS_ALPHA_MODE_CUSTOM = 4,
     
     // Not worth support dx9-style files, these don't even hold srgb state
-    //FOURCC_BC1 = MAKEFOURCC('D', 'X', 'T', '1'),
-    //FOURCC_BC3 = MAKEFOURCC('D', 'X', 'T', '5'),
+    //FOURCC_BC1 = MAKEFOURCC("DXT1"),
+    //FOURCC_BC3 = MAKEFOURCC("DXT5"),
 };
 
 struct DDS_PIXELFORMAT

From bf7bb77037db950127254d3abf974b9553b5140a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 12 Mar 2022 16:35:37 -0800
Subject: [PATCH 259/901] kram/v - restore override of srgb state

These blocks are often not set on png files, so need to derive from the -a/-d suffix on the end of the textures.   PS2022 finally added an option to add iccp/gama blocks that reflect sRGB, but most of the test textures don't have this.   Adobe PS and Apple Preview all do srgb * alpha, which is non-linear * linear.  Kram uses linearToSrgb( srgbToLinear( srgb ) * alpha ), which is then linear color and linear alpha.  This means some alpha and glows look different in kramv.  Use isPremulRGB to approximate Photoshop/Preview which doesn't set an sRGB format on the image.

Eventually will make this override optional, but don't have time to expose that.
---
 kramv/KramLoader.mm     | 14 +++++++++-----
 kramv/KramViewerMain.mm | 17 +++++++++++------
 libkram/kram/Kram.cpp   |  9 ++-------
 3 files changed, 22 insertions(+), 18 deletions(-)

diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index af0ac23e..fc344395 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -386,16 +386,20 @@ - (BOOL)loadImageFromURL:(nonnull NSURL *)url
             isSDF = true;
         }
 
-        //bool isSRGB = (!isNormal && !isSDF);
-
+        
         if (!imageData.open(path, image)) {
             return NO;
         }
 
         // have to adjust the format if srgb
-//        if (isSRGB) {
-//            image.pixelFormat = MyMTLPixelFormatRGBA8Unorm_sRGB;
-//        }
+        // PS2022 finally added sRGB gama/iccp blocks to "Save As",
+        // but there are a lot of older files where this is not set
+        // or Figma always sets sRGB.  So set based on identified type.
+        bool doReplaceSrgbFromType = true;
+        if (doReplaceSrgbFromType) {
+            bool isSRGB = (!isNormal && !isSDF);
+            image.pixelFormat = isSRGB ?  MyMTLPixelFormatRGBA8Unorm_sRGB : MyMTLPixelFormatRGBA8Unorm;
+        }
     }
     else {
         if (!imageData.open(path, image)) {
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index d69c85cc..97156e6e 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -30,6 +30,7 @@
 #include "KramViewerBase.h"
 
 
+static bool doReplaceSrgbFromType = true;
 
 #ifdef NDEBUG
 static bool doPrintPanZoom = false;
@@ -2701,7 +2702,7 @@ - (BOOL)loadFileFromFolder
         isFound = searchPos != string::npos;
     }
 
-    //bool isSrgb = isFound;
+    bool isSrgb = isFound;
 
     string normalFilename;
     bool hasNormal = false;
@@ -2740,10 +2741,10 @@ - (BOOL)loadFileFromFolder
             hasNormal = false;
         }
     }
-
-//    if (isPNG && isSrgb) {
-//        image.pixelFormat = MyMTLPixelFormatRGBA8Unorm_sRGB;
-//    }
+    
+    if (doReplaceSrgbFromType && isPNG) {
+        image.pixelFormat = isSrgb ? MyMTLPixelFormatRGBA8Unorm_sRGB : MyMTLPixelFormatRGBA8Unorm;
+    }
 
     
     Renderer *renderer = (Renderer *)self.delegate;
@@ -2837,7 +2838,7 @@ - (BOOL)loadFileFromArchive
         isFound = searchPos != string::npos;
     }
 
-    //bool isSrgb = isFound;
+    bool isSrgb = isFound;
 
     //---------------------------
 
@@ -2893,6 +2894,10 @@ - (BOOL)loadFileFromArchive
         }
     }
 
+    if (doReplaceSrgbFromType && isPNG) {
+        image.pixelFormat = isSrgb ? MyMTLPixelFormatRGBA8Unorm_sRGB : MyMTLPixelFormatRGBA8Unorm;
+    }
+    
 //    if (isPNG && isSrgb) {
 //        image.pixelFormat = MyMTLPixelFormatRGBA8Unorm_sRGB;
 //    }
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 7aac18ae..340f17d4 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -418,7 +418,7 @@ unsigned LodepngDeflateUsingMiniz(
 
 // TODO: fix this to identify srgb, otherwise will skip GAMA block
 // no docs on how to identify srgb from iccp, ImageMagick might
-// have code for this.
+// have code for this.˜
 static const bool doParseIccProfile = false;
 
 struct IccProfileTag
@@ -2854,12 +2854,7 @@ static int32_t kramAppEncode(vector<const char*>& args)
         }
         else if (isDstKTX2) {
             KramEncoder encoder;
-
-            // default to zstd compressor
-            KTX2Compressor compressor;
-            compressor.compressorType = KTX2SupercompressionZstd;
-
-            success = encoder.saveKTX2(srcImageKTX, compressor, tmpFileHelper.pointer());
+            success = encoder.saveKTX2(srcImageKTX, infoArgs.compressor, tmpFileHelper.pointer());
         }
 
         if (!success) {

From 5046bcac79aa51b74bbc0f4af80f9e80cee451ae Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 12 Mar 2022 18:02:59 -0800
Subject: [PATCH 260/901] kram - fix astcencoder crash, and switch align to
 16-bytes.

This code was using an aligned_malloc to allocate a context that contained several classes like std::mutex and std::condition_variable.  Since the ctors were never called due to malloc, these never were initialized properly.   So switch them back to new/delete, and code was already using delete ctx in one case which is good, since dtors also need called.

macOS aligns all SIMD to 16-bytes (Neon, SSE2-4, AVX, AVX2, AVX-512).  This makes it much less work to deal with SIMD usage across platforms.  So redefine the ASTCENC_VECALIGN to 16 bytes always.  AVX/AVX2 can load/store to 16-byte increments.

See here for github issue on this.
https://github.com/ARM-software/astc-encoder/issues/315
---
 libkram/astc-encoder/astcenc_entry.cpp | 6 +++---
 libkram/astc-encoder/astcenc_mathlib.h | 8 ++++----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/libkram/astc-encoder/astcenc_entry.cpp b/libkram/astc-encoder/astcenc_entry.cpp
index 1de2c3c9..8332f28f 100644
--- a/libkram/astc-encoder/astcenc_entry.cpp
+++ b/libkram/astc-encoder/astcenc_entry.cpp
@@ -702,8 +702,8 @@ astcenc_error astcenc_context_alloc(
 	}
 #endif
 
-	astcenc_context* ctx = aligned_malloc<astcenc_context>(sizeof(astcenc_context), ASTCENC_VECALIGN);
-	ctx->thread_count = thread_count;
+    astcenc_context* ctx = new astcenc_context;
+    ctx->thread_count = thread_count;
 	ctx->config = config;
 	ctx->working_buffers = nullptr;
 
@@ -783,7 +783,7 @@ void astcenc_context_free(
 #if defined(ASTCENC_DIAGNOSTICS)
 		delete ctx->trace_log;
 #endif
-		aligned_free<astcenc_context>(ctx);
+		delete ctx;
 	}
 }
 
diff --git a/libkram/astc-encoder/astcenc_mathlib.h b/libkram/astc-encoder/astcenc_mathlib.h
index 4876749b..e7dfbd00 100644
--- a/libkram/astc-encoder/astcenc_mathlib.h
+++ b/libkram/astc-encoder/astcenc_mathlib.h
@@ -75,11 +75,11 @@
   #endif
 #endif
 
-#if ASTCENC_AVX
-  #define ASTCENC_VECALIGN 32
-#else
+//#if ASTCENC_AVX
+//  #define ASTCENC_VECALIGN 32
+//#else
   #define ASTCENC_VECALIGN 16
-#endif
+//#endif
 
 #if ASTCENC_SSE != 0 || ASTCENC_AVX != 0 || ASTCENC_POPCNT != 0
 	#include <immintrin.h>

From ab6a1a7daa4dcf30ba63158631b20ead6febbf0c Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 13 Mar 2022 00:03:50 -0800
Subject: [PATCH 261/901] kram - add in the correct project path to files for
 astc-encoder.

Astc encoder is leaving weird random block artifacts on astc diffuse/albedo/color files.  The normal maps are correct.  May need to drop back to 3.4 release instead of bleeding edge version.
---
 build2/kram.xcodeproj/project.pbxproj | 600 +++++++++-----------------
 1 file changed, 204 insertions(+), 396 deletions(-)

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index c3376065..218793e2 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -21,28 +21,6 @@
 		706EEF8B26D1595D001C950E /* EtcBlock4x4.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDC626D1583E001C950E /* EtcBlock4x4.cpp */; };
 		706EEF8C26D1595D001C950E /* bc7decomp.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDCE26D1583E001C950E /* bc7decomp.cpp */; };
 		706EEF8D26D1595D001C950E /* bc7enc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDD026D1583E001C950E /* bc7enc.cpp */; };
-		706EEF8E26D1595D001C950E /* astcenc_pick_best_endpoint_format.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDD326D1583E001C950E /* astcenc_pick_best_endpoint_format.cpp */; };
-		706EEF8F26D1595D001C950E /* astcenc_integer_sequence.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDD526D1583E001C950E /* astcenc_integer_sequence.cpp */; };
-		706EEF9026D1595D001C950E /* astcenc_compute_variance.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDD726D1583E001C950E /* astcenc_compute_variance.cpp */; };
-		706EEF9126D1595D001C950E /* astcenc_quantization.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDD826D1583E001C950E /* astcenc_quantization.cpp */; };
-		706EEF9226D1595D001C950E /* astcenc_color_unquantize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDD926D1583E001C950E /* astcenc_color_unquantize.cpp */; };
-		706EEF9326D1595D001C950E /* astcenc_mathlib_softfloat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDDA26D1583E001C950E /* astcenc_mathlib_softfloat.cpp */; };
-		706EEF9426D1595D001C950E /* astcenc_weight_quant_xfer_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDDB26D1583E001C950E /* astcenc_weight_quant_xfer_tables.cpp */; };
-		706EEF9626D1595D001C950E /* astcenc_percentile_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDDE26D1583E001C950E /* astcenc_percentile_tables.cpp */; };
-		706EEF9726D1595D001C950E /* astcenc_partition_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDDF26D1583E001C950E /* astcenc_partition_tables.cpp */; };
-		706EEF9826D1595D001C950E /* astcenc_decompress_symbolic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDE026D1583E001C950E /* astcenc_decompress_symbolic.cpp */; };
-		706EEF9926D1595D001C950E /* astcenc_color_quantize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDE126D1583E001C950E /* astcenc_color_quantize.cpp */; };
-		706EEF9A26D1595D001C950E /* astcenc_platform_isa_detection.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDE326D1583E001C950E /* astcenc_platform_isa_detection.cpp */; };
-		706EEF9B26D1595D001C950E /* astcenc_image.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDE426D1583E001C950E /* astcenc_image.cpp */; };
-		706EEF9D26D1595D001C950E /* astcenc_compress_symbolic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDE826D1583E001C950E /* astcenc_compress_symbolic.cpp */; };
-		706EEF9E26D1595D001C950E /* astcenc_ideal_endpoints_and_weights.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDE926D1583E001C950E /* astcenc_ideal_endpoints_and_weights.cpp */; };
-		706EEF9F26D1595D001C950E /* astcenc_mathlib.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDEB26D1583E001C950E /* astcenc_mathlib.cpp */; };
-		706EEFA026D1595D001C950E /* astcenc_find_best_partitioning.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDEC26D1583E001C950E /* astcenc_find_best_partitioning.cpp */; };
-		706EEFA126D1595D001C950E /* astcenc_diagnostic_trace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDEE26D1583E001C950E /* astcenc_diagnostic_trace.cpp */; };
-		706EEFA226D1595D001C950E /* astcenc_symbolic_physical.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDEF26D1583E001C950E /* astcenc_symbolic_physical.cpp */; };
-		706EEFA326D1595D001C950E /* astcenc_weight_align.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDF026D1583E001C950E /* astcenc_weight_align.cpp */; };
-		706EEFA526D1595D001C950E /* astcenc_entry.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDF326D1583E001C950E /* astcenc_entry.cpp */; };
-		706EEFA626D1595D001C950E /* astcenc_averages_and_directions.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDF526D1583E001C950E /* astcenc_averages_and_directions.cpp */; };
 		706EEFA726D1595D001C950E /* basisu_transcoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE0426D1583F001C950E /* basisu_transcoder.cpp */; };
 		706EEFA826D1595D001C950E /* miniz.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1126D1583F001C950E /* miniz.cpp */; };
 		706EEFA926D1595D001C950E /* hedistance.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1426D1583F001C950E /* hedistance.cpp */; };
@@ -94,17 +72,6 @@
 		706EEFE426D15984001C950E /* bc7enc.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDCC26D1583E001C950E /* bc7enc.h */; };
 		706EEFE526D15984001C950E /* bc7decomp.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDCD26D1583E001C950E /* bc7decomp.h */; };
 		706EEFE626D15984001C950E /* rgbcx_table4.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDD126D1583E001C950E /* rgbcx_table4.h */; };
-		706EEFE726D15984001C950E /* astcenc_diagnostic_trace.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDD426D1583E001C950E /* astcenc_diagnostic_trace.h */; };
-		706EEFE826D15984001C950E /* astcenc_vecmathlib.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDD626D1583E001C950E /* astcenc_vecmathlib.h */; };
-		706EEFE926D15984001C950E /* astcenc_vecmathlib_avx2_8.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDDC26D1583E001C950E /* astcenc_vecmathlib_avx2_8.h */; };
-		706EEFEA26D15984001C950E /* astcenc.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDE226D1583E001C950E /* astcenc.h */; };
-		706EEFEB26D15984001C950E /* astcenc_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDE526D1583E001C950E /* astcenc_internal.h */; };
-		706EEFEC26D15984001C950E /* astcenc_vecmathlib_neon_armv7_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDE726D1583E001C950E /* astcenc_vecmathlib_neon_armv7_4.h */; };
-		706EEFED26D15984001C950E /* astcenc_vecmathlib_sse_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDEA26D1583E001C950E /* astcenc_vecmathlib_sse_4.h */; };
-		706EEFEE26D15984001C950E /* astcenc_vecmathlib_neon_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDED26D1583E001C950E /* astcenc_vecmathlib_neon_4.h */; };
-		706EEFEF26D15984001C950E /* astcenc_vecmathlib_none_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDF126D1583E001C950E /* astcenc_vecmathlib_none_4.h */; };
-		706EEFF026D15984001C950E /* astcenc_vecmathlib_common_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDF426D1583E001C950E /* astcenc_vecmathlib_common_4.h */; };
-		706EEFF126D15984001C950E /* astcenc_mathlib.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDF626D1583E001C950E /* astcenc_mathlib.h */; };
 		706EEFF226D15984001C950E /* ateencoder.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFA26D1583E001C950E /* ateencoder.h */; };
 		706EEFF326D15984001C950E /* basisu_transcoder.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFC26D1583E001C950E /* basisu_transcoder.h */; };
 		706EEFF426D15984001C950E /* basisu_containers.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFD26D1583E001C950E /* basisu_containers.h */; };
@@ -171,17 +138,6 @@
 		706EF15E26D166C5001C950E /* bc7enc.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDCC26D1583E001C950E /* bc7enc.h */; };
 		706EF15F26D166C5001C950E /* bc7decomp.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDCD26D1583E001C950E /* bc7decomp.h */; };
 		706EF16026D166C5001C950E /* rgbcx_table4.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDD126D1583E001C950E /* rgbcx_table4.h */; };
-		706EF16126D166C5001C950E /* astcenc_diagnostic_trace.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDD426D1583E001C950E /* astcenc_diagnostic_trace.h */; };
-		706EF16226D166C5001C950E /* astcenc_vecmathlib.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDD626D1583E001C950E /* astcenc_vecmathlib.h */; };
-		706EF16326D166C5001C950E /* astcenc_vecmathlib_avx2_8.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDDC26D1583E001C950E /* astcenc_vecmathlib_avx2_8.h */; };
-		706EF16426D166C5001C950E /* astcenc.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDE226D1583E001C950E /* astcenc.h */; };
-		706EF16526D166C5001C950E /* astcenc_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDE526D1583E001C950E /* astcenc_internal.h */; };
-		706EF16626D166C5001C950E /* astcenc_vecmathlib_neon_armv7_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDE726D1583E001C950E /* astcenc_vecmathlib_neon_armv7_4.h */; };
-		706EF16726D166C5001C950E /* astcenc_vecmathlib_sse_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDEA26D1583E001C950E /* astcenc_vecmathlib_sse_4.h */; };
-		706EF16826D166C5001C950E /* astcenc_vecmathlib_neon_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDED26D1583E001C950E /* astcenc_vecmathlib_neon_4.h */; };
-		706EF16926D166C5001C950E /* astcenc_vecmathlib_none_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDF126D1583E001C950E /* astcenc_vecmathlib_none_4.h */; };
-		706EF16A26D166C5001C950E /* astcenc_vecmathlib_common_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDF426D1583E001C950E /* astcenc_vecmathlib_common_4.h */; };
-		706EF16B26D166C5001C950E /* astcenc_mathlib.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDF626D1583E001C950E /* astcenc_mathlib.h */; };
 		706EF16C26D166C5001C950E /* ateencoder.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFA26D1583E001C950E /* ateencoder.h */; };
 		706EF16D26D166C5001C950E /* basisu_transcoder.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFC26D1583E001C950E /* basisu_transcoder.h */; };
 		706EF16E26D166C5001C950E /* basisu_containers.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFD26D1583E001C950E /* basisu_containers.h */; };
@@ -239,28 +195,6 @@
 		706EF1A326D166C5001C950E /* EtcBlock4x4.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDC626D1583E001C950E /* EtcBlock4x4.cpp */; };
 		706EF1A426D166C5001C950E /* bc7decomp.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDCE26D1583E001C950E /* bc7decomp.cpp */; };
 		706EF1A526D166C5001C950E /* bc7enc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDD026D1583E001C950E /* bc7enc.cpp */; };
-		706EF1A626D166C5001C950E /* astcenc_pick_best_endpoint_format.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDD326D1583E001C950E /* astcenc_pick_best_endpoint_format.cpp */; };
-		706EF1A726D166C5001C950E /* astcenc_integer_sequence.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDD526D1583E001C950E /* astcenc_integer_sequence.cpp */; };
-		706EF1A826D166C5001C950E /* astcenc_compute_variance.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDD726D1583E001C950E /* astcenc_compute_variance.cpp */; };
-		706EF1A926D166C5001C950E /* astcenc_quantization.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDD826D1583E001C950E /* astcenc_quantization.cpp */; };
-		706EF1AA26D166C5001C950E /* astcenc_color_unquantize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDD926D1583E001C950E /* astcenc_color_unquantize.cpp */; };
-		706EF1AB26D166C5001C950E /* astcenc_mathlib_softfloat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDDA26D1583E001C950E /* astcenc_mathlib_softfloat.cpp */; };
-		706EF1AC26D166C5001C950E /* astcenc_weight_quant_xfer_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDDB26D1583E001C950E /* astcenc_weight_quant_xfer_tables.cpp */; };
-		706EF1AE26D166C5001C950E /* astcenc_percentile_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDDE26D1583E001C950E /* astcenc_percentile_tables.cpp */; };
-		706EF1AF26D166C5001C950E /* astcenc_partition_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDDF26D1583E001C950E /* astcenc_partition_tables.cpp */; };
-		706EF1B026D166C5001C950E /* astcenc_decompress_symbolic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDE026D1583E001C950E /* astcenc_decompress_symbolic.cpp */; };
-		706EF1B126D166C5001C950E /* astcenc_color_quantize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDE126D1583E001C950E /* astcenc_color_quantize.cpp */; };
-		706EF1B226D166C5001C950E /* astcenc_platform_isa_detection.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDE326D1583E001C950E /* astcenc_platform_isa_detection.cpp */; };
-		706EF1B326D166C5001C950E /* astcenc_image.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDE426D1583E001C950E /* astcenc_image.cpp */; };
-		706EF1B526D166C5001C950E /* astcenc_compress_symbolic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDE826D1583E001C950E /* astcenc_compress_symbolic.cpp */; };
-		706EF1B626D166C5001C950E /* astcenc_ideal_endpoints_and_weights.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDE926D1583E001C950E /* astcenc_ideal_endpoints_and_weights.cpp */; };
-		706EF1B726D166C5001C950E /* astcenc_mathlib.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDEB26D1583E001C950E /* astcenc_mathlib.cpp */; };
-		706EF1B826D166C5001C950E /* astcenc_find_best_partitioning.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDEC26D1583E001C950E /* astcenc_find_best_partitioning.cpp */; };
-		706EF1B926D166C5001C950E /* astcenc_diagnostic_trace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDEE26D1583E001C950E /* astcenc_diagnostic_trace.cpp */; };
-		706EF1BA26D166C5001C950E /* astcenc_symbolic_physical.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDEF26D1583E001C950E /* astcenc_symbolic_physical.cpp */; };
-		706EF1BB26D166C5001C950E /* astcenc_weight_align.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDF026D1583E001C950E /* astcenc_weight_align.cpp */; };
-		706EF1BD26D166C5001C950E /* astcenc_entry.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDF326D1583E001C950E /* astcenc_entry.cpp */; };
-		706EF1BE26D166C5001C950E /* astcenc_averages_and_directions.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDF526D1583E001C950E /* astcenc_averages_and_directions.cpp */; };
 		706EF1BF26D166C5001C950E /* basisu_transcoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE0426D1583F001C950E /* basisu_transcoder.cpp */; };
 		706EF1C026D166C5001C950E /* miniz.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1126D1583F001C950E /* miniz.cpp */; };
 		706EF1C126D166C5001C950E /* hedistance.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1426D1583F001C950E /* hedistance.cpp */; };
@@ -311,72 +245,74 @@
 		706EFF8426D34740001C950E /* red_black_tree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5F26D3473F001C950E /* red_black_tree.cpp */; };
 		706EFF8526D34740001C950E /* fixed_pool.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD6026D3473F001C950E /* fixed_pool.cpp */; };
 		706EFF8626D34740001C950E /* fixed_pool.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD6026D3473F001C950E /* fixed_pool.cpp */; };
-		70871D6527DC767300D0B9E1 /* astcenc_block_sizes.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D4427DC767200D0B9E1 /* astcenc_block_sizes.cpp */; };
-		70871D6627DC767300D0B9E1 /* astcenc_block_sizes.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D4427DC767200D0B9E1 /* astcenc_block_sizes.cpp */; };
-		70871D6727DC767300D0B9E1 /* astcenc_percentile_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D4527DC767200D0B9E1 /* astcenc_percentile_tables.cpp */; };
-		70871D6827DC767300D0B9E1 /* astcenc_percentile_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D4527DC767200D0B9E1 /* astcenc_percentile_tables.cpp */; };
-		70871D6927DC767300D0B9E1 /* astcenc_integer_sequence.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D4627DC767200D0B9E1 /* astcenc_integer_sequence.cpp */; };
-		70871D6A27DC767300D0B9E1 /* astcenc_integer_sequence.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D4627DC767200D0B9E1 /* astcenc_integer_sequence.cpp */; };
-		70871D6B27DC767300D0B9E1 /* astcenc_vecmathlib_neon_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871D4727DC767200D0B9E1 /* astcenc_vecmathlib_neon_4.h */; };
-		70871D6C27DC767300D0B9E1 /* astcenc_vecmathlib_neon_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871D4727DC767200D0B9E1 /* astcenc_vecmathlib_neon_4.h */; };
-		70871D6D27DC767300D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D4827DC767200D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp */; };
-		70871D6E27DC767300D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D4827DC767200D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp */; };
-		70871D6F27DC767300D0B9E1 /* astcenc_decompress_symbolic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D4927DC767200D0B9E1 /* astcenc_decompress_symbolic.cpp */; };
-		70871D7027DC767300D0B9E1 /* astcenc_decompress_symbolic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D4927DC767200D0B9E1 /* astcenc_decompress_symbolic.cpp */; };
-		70871D7127DC767300D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D4A27DC767200D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp */; };
-		70871D7227DC767300D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D4A27DC767200D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp */; };
-		70871D7327DC767300D0B9E1 /* astcenc.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871D4B27DC767200D0B9E1 /* astcenc.h */; };
-		70871D7427DC767300D0B9E1 /* astcenc.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871D4B27DC767200D0B9E1 /* astcenc.h */; };
-		70871D7527DC767300D0B9E1 /* astcenc_mathlib.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D4C27DC767200D0B9E1 /* astcenc_mathlib.cpp */; };
-		70871D7627DC767300D0B9E1 /* astcenc_mathlib.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D4C27DC767200D0B9E1 /* astcenc_mathlib.cpp */; };
-		70871D7727DC767300D0B9E1 /* astcenc_pick_best_endpoint_format.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D4D27DC767200D0B9E1 /* astcenc_pick_best_endpoint_format.cpp */; };
-		70871D7827DC767300D0B9E1 /* astcenc_pick_best_endpoint_format.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D4D27DC767200D0B9E1 /* astcenc_pick_best_endpoint_format.cpp */; };
-		70871D7927DC767300D0B9E1 /* astcenc_vecmathlib_common_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871D4E27DC767200D0B9E1 /* astcenc_vecmathlib_common_4.h */; };
-		70871D7A27DC767300D0B9E1 /* astcenc_vecmathlib_common_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871D4E27DC767200D0B9E1 /* astcenc_vecmathlib_common_4.h */; };
-		70871D7B27DC767300D0B9E1 /* astcenc_color_quantize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D4F27DC767200D0B9E1 /* astcenc_color_quantize.cpp */; };
-		70871D7C27DC767300D0B9E1 /* astcenc_color_quantize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D4F27DC767200D0B9E1 /* astcenc_color_quantize.cpp */; };
-		70871D7D27DC767300D0B9E1 /* astcenc_image.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D5027DC767200D0B9E1 /* astcenc_image.cpp */; };
-		70871D7E27DC767300D0B9E1 /* astcenc_image.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D5027DC767200D0B9E1 /* astcenc_image.cpp */; };
-		70871D7F27DC767300D0B9E1 /* astcenc_mathlib_softfloat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D5127DC767200D0B9E1 /* astcenc_mathlib_softfloat.cpp */; };
-		70871D8027DC767300D0B9E1 /* astcenc_mathlib_softfloat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D5127DC767200D0B9E1 /* astcenc_mathlib_softfloat.cpp */; };
-		70871D8127DC767300D0B9E1 /* astcenc_find_best_partitioning.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D5227DC767200D0B9E1 /* astcenc_find_best_partitioning.cpp */; };
-		70871D8227DC767300D0B9E1 /* astcenc_find_best_partitioning.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D5227DC767200D0B9E1 /* astcenc_find_best_partitioning.cpp */; };
-		70871D8327DC767300D0B9E1 /* astcenc_vecmathlib_sse_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871D5327DC767200D0B9E1 /* astcenc_vecmathlib_sse_4.h */; };
-		70871D8427DC767300D0B9E1 /* astcenc_vecmathlib_sse_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871D5327DC767200D0B9E1 /* astcenc_vecmathlib_sse_4.h */; };
-		70871D8527DC767300D0B9E1 /* astcenc_symbolic_physical.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D5427DC767200D0B9E1 /* astcenc_symbolic_physical.cpp */; };
-		70871D8627DC767300D0B9E1 /* astcenc_symbolic_physical.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D5427DC767200D0B9E1 /* astcenc_symbolic_physical.cpp */; };
-		70871D8727DC767300D0B9E1 /* astcenc_averages_and_directions.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D5527DC767200D0B9E1 /* astcenc_averages_and_directions.cpp */; };
-		70871D8827DC767300D0B9E1 /* astcenc_averages_and_directions.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D5527DC767200D0B9E1 /* astcenc_averages_and_directions.cpp */; };
-		70871D8927DC767300D0B9E1 /* astcenc_vecmathlib_avx2_8.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871D5627DC767200D0B9E1 /* astcenc_vecmathlib_avx2_8.h */; };
-		70871D8A27DC767300D0B9E1 /* astcenc_vecmathlib_avx2_8.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871D5627DC767200D0B9E1 /* astcenc_vecmathlib_avx2_8.h */; };
-		70871D8B27DC767300D0B9E1 /* astcenc_compute_variance.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D5727DC767200D0B9E1 /* astcenc_compute_variance.cpp */; };
-		70871D8C27DC767300D0B9E1 /* astcenc_compute_variance.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D5727DC767200D0B9E1 /* astcenc_compute_variance.cpp */; };
-		70871D8D27DC767300D0B9E1 /* astcenc_vecmathlib.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871D5827DC767200D0B9E1 /* astcenc_vecmathlib.h */; };
-		70871D8E27DC767300D0B9E1 /* astcenc_vecmathlib.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871D5827DC767200D0B9E1 /* astcenc_vecmathlib.h */; };
-		70871D8F27DC767300D0B9E1 /* astcenc_diagnostic_trace.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871D5927DC767200D0B9E1 /* astcenc_diagnostic_trace.h */; };
-		70871D9027DC767300D0B9E1 /* astcenc_diagnostic_trace.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871D5927DC767200D0B9E1 /* astcenc_diagnostic_trace.h */; };
-		70871D9127DC767300D0B9E1 /* astcenc_vecmathlib_none_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871D5A27DC767200D0B9E1 /* astcenc_vecmathlib_none_4.h */; };
-		70871D9227DC767300D0B9E1 /* astcenc_vecmathlib_none_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871D5A27DC767200D0B9E1 /* astcenc_vecmathlib_none_4.h */; };
-		70871D9327DC767300D0B9E1 /* astcenc_entry.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D5B27DC767200D0B9E1 /* astcenc_entry.cpp */; };
-		70871D9427DC767300D0B9E1 /* astcenc_entry.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D5B27DC767200D0B9E1 /* astcenc_entry.cpp */; };
-		70871D9527DC767300D0B9E1 /* astcenc_mathlib.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871D5C27DC767300D0B9E1 /* astcenc_mathlib.h */; };
-		70871D9627DC767300D0B9E1 /* astcenc_mathlib.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871D5C27DC767300D0B9E1 /* astcenc_mathlib.h */; };
-		70871D9727DC767300D0B9E1 /* astcenc_compress_symbolic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D5D27DC767300D0B9E1 /* astcenc_compress_symbolic.cpp */; };
-		70871D9827DC767300D0B9E1 /* astcenc_compress_symbolic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D5D27DC767300D0B9E1 /* astcenc_compress_symbolic.cpp */; };
-		70871D9927DC767300D0B9E1 /* astcenc_platform_isa_detection.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D5E27DC767300D0B9E1 /* astcenc_platform_isa_detection.cpp */; };
-		70871D9A27DC767300D0B9E1 /* astcenc_platform_isa_detection.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D5E27DC767300D0B9E1 /* astcenc_platform_isa_detection.cpp */; };
-		70871D9B27DC767300D0B9E1 /* astcenc_color_unquantize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D5F27DC767300D0B9E1 /* astcenc_color_unquantize.cpp */; };
-		70871D9C27DC767300D0B9E1 /* astcenc_color_unquantize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D5F27DC767300D0B9E1 /* astcenc_color_unquantize.cpp */; };
-		70871D9D27DC767300D0B9E1 /* astcenc_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871D6027DC767300D0B9E1 /* astcenc_internal.h */; };
-		70871D9E27DC767300D0B9E1 /* astcenc_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871D6027DC767300D0B9E1 /* astcenc_internal.h */; };
-		70871D9F27DC767300D0B9E1 /* astcenc_quantization.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D6127DC767300D0B9E1 /* astcenc_quantization.cpp */; };
-		70871DA027DC767300D0B9E1 /* astcenc_quantization.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D6127DC767300D0B9E1 /* astcenc_quantization.cpp */; };
-		70871DA127DC767300D0B9E1 /* astcenc_diagnostic_trace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D6227DC767300D0B9E1 /* astcenc_diagnostic_trace.cpp */; };
-		70871DA227DC767300D0B9E1 /* astcenc_diagnostic_trace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D6227DC767300D0B9E1 /* astcenc_diagnostic_trace.cpp */; };
-		70871DA327DC767300D0B9E1 /* astcenc_partition_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D6327DC767300D0B9E1 /* astcenc_partition_tables.cpp */; };
-		70871DA427DC767300D0B9E1 /* astcenc_partition_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D6327DC767300D0B9E1 /* astcenc_partition_tables.cpp */; };
-		70871DA527DC767300D0B9E1 /* astcenc_weight_align.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D6427DC767300D0B9E1 /* astcenc_weight_align.cpp */; };
-		70871DA627DC767300D0B9E1 /* astcenc_weight_align.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871D6427DC767300D0B9E1 /* astcenc_weight_align.cpp */; };
+		70871DC927DDDBCD00D0B9E1 /* astcenc_vecmathlib_common_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DA727DDDBCC00D0B9E1 /* astcenc_vecmathlib_common_4.h */; };
+		70871DCA27DDDBCD00D0B9E1 /* astcenc_vecmathlib_common_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DA727DDDBCC00D0B9E1 /* astcenc_vecmathlib_common_4.h */; };
+		70871DCB27DDDBCD00D0B9E1 /* astcenc_image.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DA827DDDBCC00D0B9E1 /* astcenc_image.cpp */; };
+		70871DCC27DDDBCD00D0B9E1 /* astcenc_image.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DA827DDDBCC00D0B9E1 /* astcenc_image.cpp */; };
+		70871DCD27DDDBCD00D0B9E1 /* astcenc_find_best_partitioning.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DA927DDDBCC00D0B9E1 /* astcenc_find_best_partitioning.cpp */; };
+		70871DCE27DDDBCD00D0B9E1 /* astcenc_find_best_partitioning.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DA927DDDBCC00D0B9E1 /* astcenc_find_best_partitioning.cpp */; };
+		70871DCF27DDDBCD00D0B9E1 /* astcenc_symbolic_physical.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DAA27DDDBCC00D0B9E1 /* astcenc_symbolic_physical.cpp */; };
+		70871DD027DDDBCD00D0B9E1 /* astcenc_symbolic_physical.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DAA27DDDBCC00D0B9E1 /* astcenc_symbolic_physical.cpp */; };
+		70871DD127DDDBCD00D0B9E1 /* astcenc_averages_and_directions.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DAB27DDDBCC00D0B9E1 /* astcenc_averages_and_directions.cpp */; };
+		70871DD227DDDBCD00D0B9E1 /* astcenc_averages_and_directions.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DAB27DDDBCC00D0B9E1 /* astcenc_averages_and_directions.cpp */; };
+		70871DD327DDDBCD00D0B9E1 /* astcenc_partition_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DAC27DDDBCC00D0B9E1 /* astcenc_partition_tables.cpp */; };
+		70871DD427DDDBCD00D0B9E1 /* astcenc_partition_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DAC27DDDBCC00D0B9E1 /* astcenc_partition_tables.cpp */; };
+		70871DD527DDDBCD00D0B9E1 /* astcenc.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DAD27DDDBCC00D0B9E1 /* astcenc.h */; };
+		70871DD627DDDBCD00D0B9E1 /* astcenc.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DAD27DDDBCC00D0B9E1 /* astcenc.h */; };
+		70871DD727DDDBCD00D0B9E1 /* astcenc_quantization.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DAE27DDDBCC00D0B9E1 /* astcenc_quantization.cpp */; };
+		70871DD827DDDBCD00D0B9E1 /* astcenc_quantization.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DAE27DDDBCC00D0B9E1 /* astcenc_quantization.cpp */; };
+		70871DD927DDDBCD00D0B9E1 /* astcenc_compute_variance.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DAF27DDDBCC00D0B9E1 /* astcenc_compute_variance.cpp */; };
+		70871DDA27DDDBCD00D0B9E1 /* astcenc_compute_variance.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DAF27DDDBCC00D0B9E1 /* astcenc_compute_variance.cpp */; };
+		70871DDB27DDDBCD00D0B9E1 /* astcenc_percentile_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB027DDDBCC00D0B9E1 /* astcenc_percentile_tables.cpp */; };
+		70871DDC27DDDBCD00D0B9E1 /* astcenc_percentile_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB027DDDBCC00D0B9E1 /* astcenc_percentile_tables.cpp */; };
+		70871DDD27DDDBCD00D0B9E1 /* astcenc_vecmathlib_sse_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DB127DDDBCC00D0B9E1 /* astcenc_vecmathlib_sse_4.h */; };
+		70871DDE27DDDBCD00D0B9E1 /* astcenc_vecmathlib_sse_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DB127DDDBCC00D0B9E1 /* astcenc_vecmathlib_sse_4.h */; };
+		70871DDF27DDDBCD00D0B9E1 /* astcenc_mathlib_softfloat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB227DDDBCC00D0B9E1 /* astcenc_mathlib_softfloat.cpp */; };
+		70871DE027DDDBCD00D0B9E1 /* astcenc_mathlib_softfloat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB227DDDBCC00D0B9E1 /* astcenc_mathlib_softfloat.cpp */; };
+		70871DE127DDDBCD00D0B9E1 /* astcenc_mathlib.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB327DDDBCC00D0B9E1 /* astcenc_mathlib.cpp */; };
+		70871DE227DDDBCD00D0B9E1 /* astcenc_mathlib.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB327DDDBCC00D0B9E1 /* astcenc_mathlib.cpp */; };
+		70871DE327DDDBCD00D0B9E1 /* astcenc_decompress_symbolic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB427DDDBCC00D0B9E1 /* astcenc_decompress_symbolic.cpp */; };
+		70871DE427DDDBCD00D0B9E1 /* astcenc_decompress_symbolic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB427DDDBCC00D0B9E1 /* astcenc_decompress_symbolic.cpp */; };
+		70871DE527DDDBCD00D0B9E1 /* astcenc_compress_symbolic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB527DDDBCC00D0B9E1 /* astcenc_compress_symbolic.cpp */; };
+		70871DE627DDDBCD00D0B9E1 /* astcenc_compress_symbolic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB527DDDBCC00D0B9E1 /* astcenc_compress_symbolic.cpp */; };
+		70871DE727DDDBCD00D0B9E1 /* astcenc_entry.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB627DDDBCC00D0B9E1 /* astcenc_entry.cpp */; };
+		70871DE827DDDBCD00D0B9E1 /* astcenc_entry.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB627DDDBCC00D0B9E1 /* astcenc_entry.cpp */; };
+		70871DE927DDDBCD00D0B9E1 /* astcenc_integer_sequence.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB727DDDBCC00D0B9E1 /* astcenc_integer_sequence.cpp */; };
+		70871DEA27DDDBCD00D0B9E1 /* astcenc_integer_sequence.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB727DDDBCC00D0B9E1 /* astcenc_integer_sequence.cpp */; };
+		70871DEB27DDDBCD00D0B9E1 /* astcenc_block_sizes.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB827DDDBCC00D0B9E1 /* astcenc_block_sizes.cpp */; };
+		70871DEC27DDDBCD00D0B9E1 /* astcenc_block_sizes.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB827DDDBCC00D0B9E1 /* astcenc_block_sizes.cpp */; };
+		70871DED27DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DB927DDDBCC00D0B9E1 /* astcenc_diagnostic_trace.h */; };
+		70871DEE27DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DB927DDDBCC00D0B9E1 /* astcenc_diagnostic_trace.h */; };
+		70871DEF27DDDBCD00D0B9E1 /* astcenc_weight_align.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DBA27DDDBCC00D0B9E1 /* astcenc_weight_align.cpp */; };
+		70871DF027DDDBCD00D0B9E1 /* astcenc_weight_align.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DBA27DDDBCC00D0B9E1 /* astcenc_weight_align.cpp */; };
+		70871DF127DDDBCD00D0B9E1 /* astcenc_mathlib.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DBB27DDDBCC00D0B9E1 /* astcenc_mathlib.h */; };
+		70871DF227DDDBCD00D0B9E1 /* astcenc_mathlib.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DBB27DDDBCC00D0B9E1 /* astcenc_mathlib.h */; };
+		70871DF327DDDBCD00D0B9E1 /* astcenc_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DBC27DDDBCC00D0B9E1 /* astcenc_internal.h */; };
+		70871DF427DDDBCD00D0B9E1 /* astcenc_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DBC27DDDBCC00D0B9E1 /* astcenc_internal.h */; };
+		70871DF527DDDBCD00D0B9E1 /* astcenc_color_quantize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DBD27DDDBCC00D0B9E1 /* astcenc_color_quantize.cpp */; };
+		70871DF627DDDBCD00D0B9E1 /* astcenc_color_quantize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DBD27DDDBCC00D0B9E1 /* astcenc_color_quantize.cpp */; };
+		70871DF727DDDBCD00D0B9E1 /* astcenc_vecmathlib_neon_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DBE27DDDBCC00D0B9E1 /* astcenc_vecmathlib_neon_4.h */; };
+		70871DF827DDDBCD00D0B9E1 /* astcenc_vecmathlib_neon_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DBE27DDDBCC00D0B9E1 /* astcenc_vecmathlib_neon_4.h */; };
+		70871DF927DDDBCD00D0B9E1 /* astcenc_vecmathlib_avx2_8.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DBF27DDDBCC00D0B9E1 /* astcenc_vecmathlib_avx2_8.h */; };
+		70871DFA27DDDBCD00D0B9E1 /* astcenc_vecmathlib_avx2_8.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DBF27DDDBCC00D0B9E1 /* astcenc_vecmathlib_avx2_8.h */; };
+		70871DFB27DDDBCD00D0B9E1 /* astcenc_vecmathlib_none_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DC027DDDBCC00D0B9E1 /* astcenc_vecmathlib_none_4.h */; };
+		70871DFC27DDDBCD00D0B9E1 /* astcenc_vecmathlib_none_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DC027DDDBCC00D0B9E1 /* astcenc_vecmathlib_none_4.h */; };
+		70871DFD27DDDBCD00D0B9E1 /* astcenc_vecmathlib.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DC127DDDBCC00D0B9E1 /* astcenc_vecmathlib.h */; };
+		70871DFE27DDDBCD00D0B9E1 /* astcenc_vecmathlib.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DC127DDDBCC00D0B9E1 /* astcenc_vecmathlib.h */; };
+		70871DFF27DDDBCD00D0B9E1 /* astcenc_pick_best_endpoint_format.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC227DDDBCC00D0B9E1 /* astcenc_pick_best_endpoint_format.cpp */; };
+		70871E0027DDDBCD00D0B9E1 /* astcenc_pick_best_endpoint_format.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC227DDDBCC00D0B9E1 /* astcenc_pick_best_endpoint_format.cpp */; };
+		70871E0127DDDBCD00D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC327DDDBCC00D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp */; };
+		70871E0227DDDBCD00D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC327DDDBCC00D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp */; };
+		70871E0327DDDBCD00D0B9E1 /* astcenc_color_unquantize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC427DDDBCC00D0B9E1 /* astcenc_color_unquantize.cpp */; };
+		70871E0427DDDBCD00D0B9E1 /* astcenc_color_unquantize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC427DDDBCC00D0B9E1 /* astcenc_color_unquantize.cpp */; };
+		70871E0527DDDBCD00D0B9E1 /* astcenc_platform_isa_detection.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC527DDDBCC00D0B9E1 /* astcenc_platform_isa_detection.cpp */; };
+		70871E0627DDDBCD00D0B9E1 /* astcenc_platform_isa_detection.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC527DDDBCC00D0B9E1 /* astcenc_platform_isa_detection.cpp */; };
+		70871E0727DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC627DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.cpp */; };
+		70871E0827DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC627DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.cpp */; };
+		70871E0927DDDBCD00D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC727DDDBCD00D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp */; };
+		70871E0A27DDDBCD00D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC727DDDBCD00D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp */; };
+		70871E0B27DDDBCD00D0B9E1 /* astcenc_vecmathlib_neon_armv7_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DC827DDDBCD00D0B9E1 /* astcenc_vecmathlib_neon_armv7_4.h */; };
+		70871E0C27DDDBCD00D0B9E1 /* astcenc_vecmathlib_neon_armv7_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DC827DDDBCD00D0B9E1 /* astcenc_vecmathlib_neon_armv7_4.h */; };
 		708A6A962708CE4700BA5410 /* bc6h_decode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 708A6A8B2708CE4700BA5410 /* bc6h_decode.cpp */; };
 		708A6A972708CE4700BA5410 /* bc6h_decode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 708A6A8B2708CE4700BA5410 /* bc6h_decode.cpp */; };
 		708A6A982708CE4700BA5410 /* bc6h_decode.h in Headers */ = {isa = PBXBuildFile; fileRef = 708A6A8C2708CE4700BA5410 /* bc6h_decode.h */; };
@@ -439,39 +375,6 @@
 		706EEDCF26D1583E001C950E /* README.md */ = {isa = PBXFileReference; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = "<group>"; };
 		706EEDD026D1583E001C950E /* bc7enc.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = bc7enc.cpp; sourceTree = "<group>"; };
 		706EEDD126D1583E001C950E /* rgbcx_table4.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = rgbcx_table4.h; sourceTree = "<group>"; };
-		706EEDD326D1583E001C950E /* astcenc_pick_best_endpoint_format.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_pick_best_endpoint_format.cpp; sourceTree = "<group>"; };
-		706EEDD426D1583E001C950E /* astcenc_diagnostic_trace.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = astcenc_diagnostic_trace.h; sourceTree = "<group>"; };
-		706EEDD526D1583E001C950E /* astcenc_integer_sequence.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_integer_sequence.cpp; sourceTree = "<group>"; };
-		706EEDD626D1583E001C950E /* astcenc_vecmathlib.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = astcenc_vecmathlib.h; sourceTree = "<group>"; };
-		706EEDD726D1583E001C950E /* astcenc_compute_variance.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_compute_variance.cpp; sourceTree = "<group>"; };
-		706EEDD826D1583E001C950E /* astcenc_quantization.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_quantization.cpp; sourceTree = "<group>"; };
-		706EEDD926D1583E001C950E /* astcenc_color_unquantize.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_color_unquantize.cpp; sourceTree = "<group>"; };
-		706EEDDA26D1583E001C950E /* astcenc_mathlib_softfloat.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_mathlib_softfloat.cpp; sourceTree = "<group>"; };
-		706EEDDB26D1583E001C950E /* astcenc_weight_quant_xfer_tables.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_weight_quant_xfer_tables.cpp; sourceTree = "<group>"; };
-		706EEDDC26D1583E001C950E /* astcenc_vecmathlib_avx2_8.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = astcenc_vecmathlib_avx2_8.h; sourceTree = "<group>"; };
-		706EEDDE26D1583E001C950E /* astcenc_percentile_tables.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_percentile_tables.cpp; sourceTree = "<group>"; };
-		706EEDDF26D1583E001C950E /* astcenc_partition_tables.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_partition_tables.cpp; sourceTree = "<group>"; };
-		706EEDE026D1583E001C950E /* astcenc_decompress_symbolic.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_decompress_symbolic.cpp; sourceTree = "<group>"; };
-		706EEDE126D1583E001C950E /* astcenc_color_quantize.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_color_quantize.cpp; sourceTree = "<group>"; };
-		706EEDE226D1583E001C950E /* astcenc.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = astcenc.h; sourceTree = "<group>"; };
-		706EEDE326D1583E001C950E /* astcenc_platform_isa_detection.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_platform_isa_detection.cpp; sourceTree = "<group>"; };
-		706EEDE426D1583E001C950E /* astcenc_image.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_image.cpp; sourceTree = "<group>"; };
-		706EEDE526D1583E001C950E /* astcenc_internal.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = astcenc_internal.h; sourceTree = "<group>"; };
-		706EEDE726D1583E001C950E /* astcenc_vecmathlib_neon_armv7_4.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = astcenc_vecmathlib_neon_armv7_4.h; sourceTree = "<group>"; };
-		706EEDE826D1583E001C950E /* astcenc_compress_symbolic.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_compress_symbolic.cpp; sourceTree = "<group>"; };
-		706EEDE926D1583E001C950E /* astcenc_ideal_endpoints_and_weights.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_ideal_endpoints_and_weights.cpp; sourceTree = "<group>"; };
-		706EEDEA26D1583E001C950E /* astcenc_vecmathlib_sse_4.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = astcenc_vecmathlib_sse_4.h; sourceTree = "<group>"; };
-		706EEDEB26D1583E001C950E /* astcenc_mathlib.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_mathlib.cpp; sourceTree = "<group>"; };
-		706EEDEC26D1583E001C950E /* astcenc_find_best_partitioning.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_find_best_partitioning.cpp; sourceTree = "<group>"; };
-		706EEDED26D1583E001C950E /* astcenc_vecmathlib_neon_4.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = astcenc_vecmathlib_neon_4.h; sourceTree = "<group>"; };
-		706EEDEE26D1583E001C950E /* astcenc_diagnostic_trace.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_diagnostic_trace.cpp; sourceTree = "<group>"; };
-		706EEDEF26D1583E001C950E /* astcenc_symbolic_physical.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_symbolic_physical.cpp; sourceTree = "<group>"; };
-		706EEDF026D1583E001C950E /* astcenc_weight_align.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_weight_align.cpp; sourceTree = "<group>"; };
-		706EEDF126D1583E001C950E /* astcenc_vecmathlib_none_4.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = astcenc_vecmathlib_none_4.h; sourceTree = "<group>"; };
-		706EEDF326D1583E001C950E /* astcenc_entry.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_entry.cpp; sourceTree = "<group>"; };
-		706EEDF426D1583E001C950E /* astcenc_vecmathlib_common_4.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = astcenc_vecmathlib_common_4.h; sourceTree = "<group>"; };
-		706EEDF526D1583E001C950E /* astcenc_averages_and_directions.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_averages_and_directions.cpp; sourceTree = "<group>"; };
-		706EEDF626D1583E001C950E /* astcenc_mathlib.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = astcenc_mathlib.h; sourceTree = "<group>"; };
 		706EEDF926D1583E001C950E /* ateencoder.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = ateencoder.mm; sourceTree = "<group>"; };
 		706EEDFA26D1583E001C950E /* ateencoder.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = ateencoder.h; sourceTree = "<group>"; };
 		706EEDFC26D1583E001C950E /* basisu_transcoder.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = basisu_transcoder.h; sourceTree = "<group>"; };
@@ -697,39 +600,40 @@
 		706EFD5E26D3473F001C950E /* hashtable.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = hashtable.cpp; sourceTree = "<group>"; };
 		706EFD5F26D3473F001C950E /* red_black_tree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = red_black_tree.cpp; sourceTree = "<group>"; };
 		706EFD6026D3473F001C950E /* fixed_pool.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = fixed_pool.cpp; sourceTree = "<group>"; };
-		70871D4427DC767200D0B9E1 /* astcenc_block_sizes.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_block_sizes.cpp; path = "../../../astc-encoder/Source/astcenc_block_sizes.cpp"; sourceTree = "<group>"; };
-		70871D4527DC767200D0B9E1 /* astcenc_percentile_tables.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_percentile_tables.cpp; path = "../../../astc-encoder/Source/astcenc_percentile_tables.cpp"; sourceTree = "<group>"; };
-		70871D4627DC767200D0B9E1 /* astcenc_integer_sequence.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_integer_sequence.cpp; path = "../../../astc-encoder/Source/astcenc_integer_sequence.cpp"; sourceTree = "<group>"; };
-		70871D4727DC767200D0B9E1 /* astcenc_vecmathlib_neon_4.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = astcenc_vecmathlib_neon_4.h; path = "../../../astc-encoder/Source/astcenc_vecmathlib_neon_4.h"; sourceTree = "<group>"; };
-		70871D4827DC767200D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_weight_quant_xfer_tables.cpp; path = "../../../astc-encoder/Source/astcenc_weight_quant_xfer_tables.cpp"; sourceTree = "<group>"; };
-		70871D4927DC767200D0B9E1 /* astcenc_decompress_symbolic.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_decompress_symbolic.cpp; path = "../../../astc-encoder/Source/astcenc_decompress_symbolic.cpp"; sourceTree = "<group>"; };
-		70871D4A27DC767200D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_ideal_endpoints_and_weights.cpp; path = "../../../astc-encoder/Source/astcenc_ideal_endpoints_and_weights.cpp"; sourceTree = "<group>"; };
-		70871D4B27DC767200D0B9E1 /* astcenc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = astcenc.h; path = "../../../astc-encoder/Source/astcenc.h"; sourceTree = "<group>"; };
-		70871D4C27DC767200D0B9E1 /* astcenc_mathlib.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_mathlib.cpp; path = "../../../astc-encoder/Source/astcenc_mathlib.cpp"; sourceTree = "<group>"; };
-		70871D4D27DC767200D0B9E1 /* astcenc_pick_best_endpoint_format.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_pick_best_endpoint_format.cpp; path = "../../../astc-encoder/Source/astcenc_pick_best_endpoint_format.cpp"; sourceTree = "<group>"; };
-		70871D4E27DC767200D0B9E1 /* astcenc_vecmathlib_common_4.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = astcenc_vecmathlib_common_4.h; path = "../../../astc-encoder/Source/astcenc_vecmathlib_common_4.h"; sourceTree = "<group>"; };
-		70871D4F27DC767200D0B9E1 /* astcenc_color_quantize.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_color_quantize.cpp; path = "../../../astc-encoder/Source/astcenc_color_quantize.cpp"; sourceTree = "<group>"; };
-		70871D5027DC767200D0B9E1 /* astcenc_image.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_image.cpp; path = "../../../astc-encoder/Source/astcenc_image.cpp"; sourceTree = "<group>"; };
-		70871D5127DC767200D0B9E1 /* astcenc_mathlib_softfloat.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_mathlib_softfloat.cpp; path = "../../../astc-encoder/Source/astcenc_mathlib_softfloat.cpp"; sourceTree = "<group>"; };
-		70871D5227DC767200D0B9E1 /* astcenc_find_best_partitioning.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_find_best_partitioning.cpp; path = "../../../astc-encoder/Source/astcenc_find_best_partitioning.cpp"; sourceTree = "<group>"; };
-		70871D5327DC767200D0B9E1 /* astcenc_vecmathlib_sse_4.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = astcenc_vecmathlib_sse_4.h; path = "../../../astc-encoder/Source/astcenc_vecmathlib_sse_4.h"; sourceTree = "<group>"; };
-		70871D5427DC767200D0B9E1 /* astcenc_symbolic_physical.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_symbolic_physical.cpp; path = "../../../astc-encoder/Source/astcenc_symbolic_physical.cpp"; sourceTree = "<group>"; };
-		70871D5527DC767200D0B9E1 /* astcenc_averages_and_directions.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_averages_and_directions.cpp; path = "../../../astc-encoder/Source/astcenc_averages_and_directions.cpp"; sourceTree = "<group>"; };
-		70871D5627DC767200D0B9E1 /* astcenc_vecmathlib_avx2_8.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = astcenc_vecmathlib_avx2_8.h; path = "../../../astc-encoder/Source/astcenc_vecmathlib_avx2_8.h"; sourceTree = "<group>"; };
-		70871D5727DC767200D0B9E1 /* astcenc_compute_variance.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_compute_variance.cpp; path = "../../../astc-encoder/Source/astcenc_compute_variance.cpp"; sourceTree = "<group>"; };
-		70871D5827DC767200D0B9E1 /* astcenc_vecmathlib.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = astcenc_vecmathlib.h; path = "../../../astc-encoder/Source/astcenc_vecmathlib.h"; sourceTree = "<group>"; };
-		70871D5927DC767200D0B9E1 /* astcenc_diagnostic_trace.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = astcenc_diagnostic_trace.h; path = "../../../astc-encoder/Source/astcenc_diagnostic_trace.h"; sourceTree = "<group>"; };
-		70871D5A27DC767200D0B9E1 /* astcenc_vecmathlib_none_4.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = astcenc_vecmathlib_none_4.h; path = "../../../astc-encoder/Source/astcenc_vecmathlib_none_4.h"; sourceTree = "<group>"; };
-		70871D5B27DC767200D0B9E1 /* astcenc_entry.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_entry.cpp; path = "../../../astc-encoder/Source/astcenc_entry.cpp"; sourceTree = "<group>"; };
-		70871D5C27DC767300D0B9E1 /* astcenc_mathlib.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = astcenc_mathlib.h; path = "../../../astc-encoder/Source/astcenc_mathlib.h"; sourceTree = "<group>"; };
-		70871D5D27DC767300D0B9E1 /* astcenc_compress_symbolic.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_compress_symbolic.cpp; path = "../../../astc-encoder/Source/astcenc_compress_symbolic.cpp"; sourceTree = "<group>"; };
-		70871D5E27DC767300D0B9E1 /* astcenc_platform_isa_detection.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_platform_isa_detection.cpp; path = "../../../astc-encoder/Source/astcenc_platform_isa_detection.cpp"; sourceTree = "<group>"; };
-		70871D5F27DC767300D0B9E1 /* astcenc_color_unquantize.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_color_unquantize.cpp; path = "../../../astc-encoder/Source/astcenc_color_unquantize.cpp"; sourceTree = "<group>"; };
-		70871D6027DC767300D0B9E1 /* astcenc_internal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = astcenc_internal.h; path = "../../../astc-encoder/Source/astcenc_internal.h"; sourceTree = "<group>"; };
-		70871D6127DC767300D0B9E1 /* astcenc_quantization.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_quantization.cpp; path = "../../../astc-encoder/Source/astcenc_quantization.cpp"; sourceTree = "<group>"; };
-		70871D6227DC767300D0B9E1 /* astcenc_diagnostic_trace.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_diagnostic_trace.cpp; path = "../../../astc-encoder/Source/astcenc_diagnostic_trace.cpp"; sourceTree = "<group>"; };
-		70871D6327DC767300D0B9E1 /* astcenc_partition_tables.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_partition_tables.cpp; path = "../../../astc-encoder/Source/astcenc_partition_tables.cpp"; sourceTree = "<group>"; };
-		70871D6427DC767300D0B9E1 /* astcenc_weight_align.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = astcenc_weight_align.cpp; path = "../../../astc-encoder/Source/astcenc_weight_align.cpp"; sourceTree = "<group>"; };
+		70871DA727DDDBCC00D0B9E1 /* astcenc_vecmathlib_common_4.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = astcenc_vecmathlib_common_4.h; sourceTree = "<group>"; };
+		70871DA827DDDBCC00D0B9E1 /* astcenc_image.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_image.cpp; sourceTree = "<group>"; };
+		70871DA927DDDBCC00D0B9E1 /* astcenc_find_best_partitioning.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_find_best_partitioning.cpp; sourceTree = "<group>"; };
+		70871DAA27DDDBCC00D0B9E1 /* astcenc_symbolic_physical.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_symbolic_physical.cpp; sourceTree = "<group>"; };
+		70871DAB27DDDBCC00D0B9E1 /* astcenc_averages_and_directions.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_averages_and_directions.cpp; sourceTree = "<group>"; };
+		70871DAC27DDDBCC00D0B9E1 /* astcenc_partition_tables.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_partition_tables.cpp; sourceTree = "<group>"; };
+		70871DAD27DDDBCC00D0B9E1 /* astcenc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = astcenc.h; sourceTree = "<group>"; };
+		70871DAE27DDDBCC00D0B9E1 /* astcenc_quantization.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_quantization.cpp; sourceTree = "<group>"; };
+		70871DAF27DDDBCC00D0B9E1 /* astcenc_compute_variance.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_compute_variance.cpp; sourceTree = "<group>"; };
+		70871DB027DDDBCC00D0B9E1 /* astcenc_percentile_tables.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_percentile_tables.cpp; sourceTree = "<group>"; };
+		70871DB127DDDBCC00D0B9E1 /* astcenc_vecmathlib_sse_4.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = astcenc_vecmathlib_sse_4.h; sourceTree = "<group>"; };
+		70871DB227DDDBCC00D0B9E1 /* astcenc_mathlib_softfloat.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_mathlib_softfloat.cpp; sourceTree = "<group>"; };
+		70871DB327DDDBCC00D0B9E1 /* astcenc_mathlib.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_mathlib.cpp; sourceTree = "<group>"; };
+		70871DB427DDDBCC00D0B9E1 /* astcenc_decompress_symbolic.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_decompress_symbolic.cpp; sourceTree = "<group>"; };
+		70871DB527DDDBCC00D0B9E1 /* astcenc_compress_symbolic.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_compress_symbolic.cpp; sourceTree = "<group>"; };
+		70871DB627DDDBCC00D0B9E1 /* astcenc_entry.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_entry.cpp; sourceTree = "<group>"; };
+		70871DB727DDDBCC00D0B9E1 /* astcenc_integer_sequence.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_integer_sequence.cpp; sourceTree = "<group>"; };
+		70871DB827DDDBCC00D0B9E1 /* astcenc_block_sizes.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_block_sizes.cpp; sourceTree = "<group>"; };
+		70871DB927DDDBCC00D0B9E1 /* astcenc_diagnostic_trace.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = astcenc_diagnostic_trace.h; sourceTree = "<group>"; };
+		70871DBA27DDDBCC00D0B9E1 /* astcenc_weight_align.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_weight_align.cpp; sourceTree = "<group>"; };
+		70871DBB27DDDBCC00D0B9E1 /* astcenc_mathlib.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = astcenc_mathlib.h; sourceTree = "<group>"; };
+		70871DBC27DDDBCC00D0B9E1 /* astcenc_internal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = astcenc_internal.h; sourceTree = "<group>"; };
+		70871DBD27DDDBCC00D0B9E1 /* astcenc_color_quantize.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_color_quantize.cpp; sourceTree = "<group>"; };
+		70871DBE27DDDBCC00D0B9E1 /* astcenc_vecmathlib_neon_4.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = astcenc_vecmathlib_neon_4.h; sourceTree = "<group>"; };
+		70871DBF27DDDBCC00D0B9E1 /* astcenc_vecmathlib_avx2_8.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = astcenc_vecmathlib_avx2_8.h; sourceTree = "<group>"; };
+		70871DC027DDDBCC00D0B9E1 /* astcenc_vecmathlib_none_4.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = astcenc_vecmathlib_none_4.h; sourceTree = "<group>"; };
+		70871DC127DDDBCC00D0B9E1 /* astcenc_vecmathlib.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = astcenc_vecmathlib.h; sourceTree = "<group>"; };
+		70871DC227DDDBCC00D0B9E1 /* astcenc_pick_best_endpoint_format.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_pick_best_endpoint_format.cpp; sourceTree = "<group>"; };
+		70871DC327DDDBCC00D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_weight_quant_xfer_tables.cpp; sourceTree = "<group>"; };
+		70871DC427DDDBCC00D0B9E1 /* astcenc_color_unquantize.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_color_unquantize.cpp; sourceTree = "<group>"; };
+		70871DC527DDDBCC00D0B9E1 /* astcenc_platform_isa_detection.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_platform_isa_detection.cpp; sourceTree = "<group>"; };
+		70871DC627DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_diagnostic_trace.cpp; sourceTree = "<group>"; };
+		70871DC727DDDBCD00D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_ideal_endpoints_and_weights.cpp; sourceTree = "<group>"; };
+		70871DC827DDDBCD00D0B9E1 /* astcenc_vecmathlib_neon_armv7_4.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = astcenc_vecmathlib_neon_armv7_4.h; sourceTree = "<group>"; };
 		708A6A8B2708CE4700BA5410 /* bc6h_decode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bc6h_decode.cpp; sourceTree = "<group>"; };
 		708A6A8C2708CE4700BA5410 /* bc6h_decode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bc6h_decode.h; sourceTree = "<group>"; };
 		708A6A8D2708CE4700BA5410 /* bc6h_encode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bc6h_encode.cpp; sourceTree = "<group>"; };
@@ -858,72 +762,40 @@
 		706EEDD226D1583E001C950E /* astc-encoder */ = {
 			isa = PBXGroup;
 			children = (
-				706EEDD326D1583E001C950E /* astcenc_pick_best_endpoint_format.cpp */,
-				70871D6227DC767300D0B9E1 /* astcenc_diagnostic_trace.cpp */,
-				706EEDD426D1583E001C950E /* astcenc_diagnostic_trace.h */,
-				70871D4F27DC767200D0B9E1 /* astcenc_color_quantize.cpp */,
-				70871D5F27DC767300D0B9E1 /* astcenc_color_unquantize.cpp */,
-				70871D5D27DC767300D0B9E1 /* astcenc_compress_symbolic.cpp */,
-				70871D5727DC767200D0B9E1 /* astcenc_compute_variance.cpp */,
-				70871D4927DC767200D0B9E1 /* astcenc_decompress_symbolic.cpp */,
-				70871D5927DC767200D0B9E1 /* astcenc_diagnostic_trace.h */,
-				70871D5B27DC767200D0B9E1 /* astcenc_entry.cpp */,
-				70871D5227DC767200D0B9E1 /* astcenc_find_best_partitioning.cpp */,
-				70871D4A27DC767200D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp */,
-				70871D5027DC767200D0B9E1 /* astcenc_image.cpp */,
-				706EEDD526D1583E001C950E /* astcenc_integer_sequence.cpp */,
-				706EEDD626D1583E001C950E /* astcenc_vecmathlib.h */,
-				706EEDD726D1583E001C950E /* astcenc_compute_variance.cpp */,
-				706EEDD826D1583E001C950E /* astcenc_quantization.cpp */,
-				706EEDD926D1583E001C950E /* astcenc_color_unquantize.cpp */,
-				70871D6027DC767300D0B9E1 /* astcenc_internal.h */,
-				706EEDDA26D1583E001C950E /* astcenc_mathlib_softfloat.cpp */,
-				70871D5A27DC767200D0B9E1 /* astcenc_vecmathlib_none_4.h */,
-				70871D5327DC767200D0B9E1 /* astcenc_vecmathlib_sse_4.h */,
-				70871D5827DC767200D0B9E1 /* astcenc_vecmathlib.h */,
-				70871D6427DC767300D0B9E1 /* astcenc_weight_align.cpp */,
-				706EEDDB26D1583E001C950E /* astcenc_weight_quant_xfer_tables.cpp */,
-				706EEDDC26D1583E001C950E /* astcenc_vecmathlib_avx2_8.h */,
-				70871D4627DC767200D0B9E1 /* astcenc_integer_sequence.cpp */,
-				70871D5127DC767200D0B9E1 /* astcenc_mathlib_softfloat.cpp */,
-				70871D4C27DC767200D0B9E1 /* astcenc_mathlib.cpp */,
-				70871D5C27DC767300D0B9E1 /* astcenc_mathlib.h */,
-				70871D6327DC767300D0B9E1 /* astcenc_partition_tables.cpp */,
-				706EEDDE26D1583E001C950E /* astcenc_percentile_tables.cpp */,
-				706EEDDF26D1583E001C950E /* astcenc_partition_tables.cpp */,
-				706EEDE026D1583E001C950E /* astcenc_decompress_symbolic.cpp */,
-				706EEDE126D1583E001C950E /* astcenc_color_quantize.cpp */,
-				706EEDE226D1583E001C950E /* astcenc.h */,
-				706EEDE326D1583E001C950E /* astcenc_platform_isa_detection.cpp */,
-				706EEDE426D1583E001C950E /* astcenc_image.cpp */,
-				706EEDE526D1583E001C950E /* astcenc_internal.h */,
-				70871D5527DC767200D0B9E1 /* astcenc_averages_and_directions.cpp */,
-				70871D4427DC767200D0B9E1 /* astcenc_block_sizes.cpp */,
-				70871D4527DC767200D0B9E1 /* astcenc_percentile_tables.cpp */,
-				70871D4D27DC767200D0B9E1 /* astcenc_pick_best_endpoint_format.cpp */,
-				70871D5E27DC767300D0B9E1 /* astcenc_platform_isa_detection.cpp */,
-				70871D6127DC767300D0B9E1 /* astcenc_quantization.cpp */,
-				70871D5427DC767200D0B9E1 /* astcenc_symbolic_physical.cpp */,
-				70871D5627DC767200D0B9E1 /* astcenc_vecmathlib_avx2_8.h */,
-				70871D4E27DC767200D0B9E1 /* astcenc_vecmathlib_common_4.h */,
-				70871D4727DC767200D0B9E1 /* astcenc_vecmathlib_neon_4.h */,
-				70871D4827DC767200D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp */,
-				70871D4B27DC767200D0B9E1 /* astcenc.h */,
-				706EEDE726D1583E001C950E /* astcenc_vecmathlib_neon_armv7_4.h */,
-				706EEDE826D1583E001C950E /* astcenc_compress_symbolic.cpp */,
-				706EEDE926D1583E001C950E /* astcenc_ideal_endpoints_and_weights.cpp */,
-				706EEDEA26D1583E001C950E /* astcenc_vecmathlib_sse_4.h */,
-				706EEDEB26D1583E001C950E /* astcenc_mathlib.cpp */,
-				706EEDEC26D1583E001C950E /* astcenc_find_best_partitioning.cpp */,
-				706EEDED26D1583E001C950E /* astcenc_vecmathlib_neon_4.h */,
-				706EEDEE26D1583E001C950E /* astcenc_diagnostic_trace.cpp */,
-				706EEDEF26D1583E001C950E /* astcenc_symbolic_physical.cpp */,
-				706EEDF026D1583E001C950E /* astcenc_weight_align.cpp */,
-				706EEDF126D1583E001C950E /* astcenc_vecmathlib_none_4.h */,
-				706EEDF326D1583E001C950E /* astcenc_entry.cpp */,
-				706EEDF426D1583E001C950E /* astcenc_vecmathlib_common_4.h */,
-				706EEDF526D1583E001C950E /* astcenc_averages_and_directions.cpp */,
-				706EEDF626D1583E001C950E /* astcenc_mathlib.h */,
+				70871DAB27DDDBCC00D0B9E1 /* astcenc_averages_and_directions.cpp */,
+				70871DB827DDDBCC00D0B9E1 /* astcenc_block_sizes.cpp */,
+				70871DBD27DDDBCC00D0B9E1 /* astcenc_color_quantize.cpp */,
+				70871DC427DDDBCC00D0B9E1 /* astcenc_color_unquantize.cpp */,
+				70871DB527DDDBCC00D0B9E1 /* astcenc_compress_symbolic.cpp */,
+				70871DAF27DDDBCC00D0B9E1 /* astcenc_compute_variance.cpp */,
+				70871DB427DDDBCC00D0B9E1 /* astcenc_decompress_symbolic.cpp */,
+				70871DC627DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.cpp */,
+				70871DB927DDDBCC00D0B9E1 /* astcenc_diagnostic_trace.h */,
+				70871DB627DDDBCC00D0B9E1 /* astcenc_entry.cpp */,
+				70871DA927DDDBCC00D0B9E1 /* astcenc_find_best_partitioning.cpp */,
+				70871DC727DDDBCD00D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp */,
+				70871DA827DDDBCC00D0B9E1 /* astcenc_image.cpp */,
+				70871DB727DDDBCC00D0B9E1 /* astcenc_integer_sequence.cpp */,
+				70871DBC27DDDBCC00D0B9E1 /* astcenc_internal.h */,
+				70871DB227DDDBCC00D0B9E1 /* astcenc_mathlib_softfloat.cpp */,
+				70871DB327DDDBCC00D0B9E1 /* astcenc_mathlib.cpp */,
+				70871DBB27DDDBCC00D0B9E1 /* astcenc_mathlib.h */,
+				70871DAC27DDDBCC00D0B9E1 /* astcenc_partition_tables.cpp */,
+				70871DB027DDDBCC00D0B9E1 /* astcenc_percentile_tables.cpp */,
+				70871DC227DDDBCC00D0B9E1 /* astcenc_pick_best_endpoint_format.cpp */,
+				70871DC527DDDBCC00D0B9E1 /* astcenc_platform_isa_detection.cpp */,
+				70871DAE27DDDBCC00D0B9E1 /* astcenc_quantization.cpp */,
+				70871DAA27DDDBCC00D0B9E1 /* astcenc_symbolic_physical.cpp */,
+				70871DBF27DDDBCC00D0B9E1 /* astcenc_vecmathlib_avx2_8.h */,
+				70871DA727DDDBCC00D0B9E1 /* astcenc_vecmathlib_common_4.h */,
+				70871DBE27DDDBCC00D0B9E1 /* astcenc_vecmathlib_neon_4.h */,
+				70871DC827DDDBCD00D0B9E1 /* astcenc_vecmathlib_neon_armv7_4.h */,
+				70871DC027DDDBCC00D0B9E1 /* astcenc_vecmathlib_none_4.h */,
+				70871DB127DDDBCC00D0B9E1 /* astcenc_vecmathlib_sse_4.h */,
+				70871DC127DDDBCC00D0B9E1 /* astcenc_vecmathlib.h */,
+				70871DBA27DDDBCC00D0B9E1 /* astcenc_weight_align.cpp */,
+				70871DC327DDDBCC00D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp */,
+				70871DAD27DDDBCC00D0B9E1 /* astcenc.h */,
 			);
 			path = "astc-encoder";
 			sourceTree = "<group>";
@@ -1329,6 +1201,7 @@
 				706EEFD326D15984001C950E /* EtcDifferentialTrys.h in Headers */,
 				706EEFD426D15984001C950E /* EtcBlock4x4Encoding_RGB8.h in Headers */,
 				706EEFD526D15984001C950E /* EtcConfig.h in Headers */,
+				70871DC927DDDBCD00D0B9E1 /* astcenc_vecmathlib_common_4.h in Headers */,
 				706EEFD626D15984001C950E /* EtcBlock4x4Encoding_R11.h in Headers */,
 				706EEFD726D15984001C950E /* EtcBlock4x4Encoding_RG11.h in Headers */,
 				706EEFD826D15984001C950E /* EtcMath.h in Headers */,
@@ -1340,80 +1213,69 @@
 				706EEFDE26D15984001C950E /* EtcImage.h in Headers */,
 				70CDB65027A1382700A546C1 /* KramDDSHelper.h in Headers */,
 				708A6A9C2708CE4700BA5410 /* bc6h_encode.h in Headers */,
-				70871D6B27DC767300D0B9E1 /* astcenc_vecmathlib_neon_4.h in Headers */,
 				706EEFDF26D15984001C950E /* EtcBlock4x4Encoding_ETC1.h in Headers */,
 				706EEFE026D15984001C950E /* EtcBlock4x4Encoding_RGBA8.h in Headers */,
 				706EEFE126D15984001C950E /* EtcColorFloatRGBA.h in Headers */,
 				706EEFE226D15984001C950E /* EtcBlock4x4Encoding.h in Headers */,
-				70871D8F27DC767300D0B9E1 /* astcenc_diagnostic_trace.h in Headers */,
 				706EEFE326D15984001C950E /* rgbcx.h in Headers */,
 				706EEFE426D15984001C950E /* bc7enc.h in Headers */,
 				706EEFE526D15984001C950E /* bc7decomp.h in Headers */,
 				706EEFE626D15984001C950E /* rgbcx_table4.h in Headers */,
-				706EEFE726D15984001C950E /* astcenc_diagnostic_trace.h in Headers */,
-				706EEFE826D15984001C950E /* astcenc_vecmathlib.h in Headers */,
-				706EEFE926D15984001C950E /* astcenc_vecmathlib_avx2_8.h in Headers */,
-				706EEFEA26D15984001C950E /* astcenc.h in Headers */,
-				706EEFEB26D15984001C950E /* astcenc_internal.h in Headers */,
-				706EEFEC26D15984001C950E /* astcenc_vecmathlib_neon_armv7_4.h in Headers */,
-				706EEFED26D15984001C950E /* astcenc_vecmathlib_sse_4.h in Headers */,
-				70871D8327DC767300D0B9E1 /* astcenc_vecmathlib_sse_4.h in Headers */,
-				706EEFEE26D15984001C950E /* astcenc_vecmathlib_neon_4.h in Headers */,
-				706EEFEF26D15984001C950E /* astcenc_vecmathlib_none_4.h in Headers */,
-				706EEFF026D15984001C950E /* astcenc_vecmathlib_common_4.h in Headers */,
-				706EEFF126D15984001C950E /* astcenc_mathlib.h in Headers */,
 				706EEFF226D15984001C950E /* ateencoder.h in Headers */,
 				706EEFF326D15984001C950E /* basisu_transcoder.h in Headers */,
 				70A7BD3227092A1200DBCCF7 /* hdr_encode.h in Headers */,
-				70871D7927DC767300D0B9E1 /* astcenc_vecmathlib_common_4.h in Headers */,
 				708A6AA02708CE4700BA5410 /* bc6h_definitions.h in Headers */,
 				706EEFF426D15984001C950E /* basisu_containers.h in Headers */,
+				70871DD527DDDBCD00D0B9E1 /* astcenc.h in Headers */,
 				706EEFF526D15985001C950E /* basisu_containers_impl.h in Headers */,
 				706EEFF626D15985001C950E /* basisu_transcoder_internal.h in Headers */,
+				70871DF927DDDBCD00D0B9E1 /* astcenc_vecmathlib_avx2_8.h in Headers */,
+				70871DFB27DDDBCD00D0B9E1 /* astcenc_vecmathlib_none_4.h in Headers */,
 				706EEFF726D15985001C950E /* basisu_global_selector_cb.h in Headers */,
 				706EEFF826D15985001C950E /* basisu_transcoder_uastc.h in Headers */,
 				706EEFF926D15985001C950E /* basisu_global_selector_palette.h in Headers */,
 				706EEFFA26D15985001C950E /* basisu.h in Headers */,
 				706EEFFB26D15985001C950E /* basisu_file_headers.h in Headers */,
 				706EEFFC26D15985001C950E /* miniz.h in Headers */,
-				70871D8927DC767300D0B9E1 /* astcenc_vecmathlib_avx2_8.h in Headers */,
 				706EEFFD26D15985001C950E /* hedistance.h in Headers */,
 				706EEFFE26D15985001C950E /* stb_rect_pack.h in Headers */,
 				706EEFFF26D15985001C950E /* KramZipHelper.h in Headers */,
-				70871D7327DC767300D0B9E1 /* astcenc.h in Headers */,
 				706EF00026D15985001C950E /* KramSDFMipper.h in Headers */,
 				706EF00126D15985001C950E /* sse2neon.h in Headers */,
+				70871DF127DDDBCD00D0B9E1 /* astcenc_mathlib.h in Headers */,
 				706EF00226D15985001C950E /* KramConfig.h in Headers */,
 				706EF00326D15985001C950E /* KramLog.h in Headers */,
 				706EF00426D15985001C950E /* KramLib.h in Headers */,
 				706EF00526D15985001C950E /* KramVersion.h in Headers */,
 				706EF00626D15985001C950E /* KramImage.h in Headers */,
-				70871D9127DC767300D0B9E1 /* astcenc_vecmathlib_none_4.h in Headers */,
 				706EF00726D15985001C950E /* win_mmap.h in Headers */,
+				70871DDD27DDDBCD00D0B9E1 /* astcenc_vecmathlib_sse_4.h in Headers */,
 				706EF00826D15985001C950E /* Kram.h in Headers */,
+				70871DED27DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.h in Headers */,
 				706EF00926D15985001C950E /* KTXImage.h in Headers */,
 				706EF00A26D15985001C950E /* KramImageInfo.h in Headers */,
+				70871DF727DDDBCD00D0B9E1 /* astcenc_vecmathlib_neon_4.h in Headers */,
 				706EF00B26D15985001C950E /* KramTimer.h in Headers */,
 				706EF00C26D15985001C950E /* KramMmapHelper.h in Headers */,
 				706EF00D26D15985001C950E /* float4a.h in Headers */,
 				706EF00E26D15985001C950E /* KramFileHelper.h in Headers */,
 				706EF00F26D15985001C950E /* KramMipper.h in Headers */,
 				706EF01026D15985001C950E /* TaskSystem.h in Headers */,
-				70871D8D27DC767300D0B9E1 /* astcenc_vecmathlib.h in Headers */,
 				706EF01126D15985001C950E /* squish.h in Headers */,
 				706EF01226D15985001C950E /* clusterfit.h in Headers */,
 				706EF01326D15985001C950E /* colourfit.h in Headers */,
+				70871DFD27DDDBCD00D0B9E1 /* astcenc_vecmathlib.h in Headers */,
 				706EF01426D15985001C950E /* alpha.h in Headers */,
 				708A6A982708CE4700BA5410 /* bc6h_decode.h in Headers */,
-				70871D9D27DC767300D0B9E1 /* astcenc_internal.h in Headers */,
 				706EF01526D15985001C950E /* singlecolourfit.h in Headers */,
+				70871E0B27DDDBCD00D0B9E1 /* astcenc_vecmathlib_neon_armv7_4.h in Headers */,
 				706EF01626D15985001C950E /* maths.h in Headers */,
-				70871D9527DC767300D0B9E1 /* astcenc_mathlib.h in Headers */,
 				706EF01726D15985001C950E /* colourset.h in Headers */,
 				708A6AA42708CE4700BA5410 /* bc6h_utils.h in Headers */,
 				706EF01826D15985001C950E /* colourblock.h in Headers */,
 				706EF01926D15985001C950E /* rangefit.h in Headers */,
 				706EF01A26D15985001C950E /* zstd.h in Headers */,
+				70871DF327DDDBCD00D0B9E1 /* astcenc_internal.h in Headers */,
 				706EF01B26D15985001C950E /* lodepng.h in Headers */,
 				706EF01C26D15985001C950E /* tmpfileplus.h in Headers */,
 			);
@@ -1428,6 +1290,7 @@
 				706EF14D26D166C5001C950E /* EtcDifferentialTrys.h in Headers */,
 				706EF14E26D166C5001C950E /* EtcBlock4x4Encoding_RGB8.h in Headers */,
 				706EF14F26D166C5001C950E /* EtcConfig.h in Headers */,
+				70871DCA27DDDBCD00D0B9E1 /* astcenc_vecmathlib_common_4.h in Headers */,
 				706EF15026D166C5001C950E /* EtcBlock4x4Encoding_R11.h in Headers */,
 				706EF15126D166C5001C950E /* EtcBlock4x4Encoding_RG11.h in Headers */,
 				706EF15226D166C5001C950E /* EtcMath.h in Headers */,
@@ -1439,80 +1302,69 @@
 				706EF15826D166C5001C950E /* EtcImage.h in Headers */,
 				70CDB65127A1382700A546C1 /* KramDDSHelper.h in Headers */,
 				708A6A9D2708CE4700BA5410 /* bc6h_encode.h in Headers */,
-				70871D6C27DC767300D0B9E1 /* astcenc_vecmathlib_neon_4.h in Headers */,
 				706EF15926D166C5001C950E /* EtcBlock4x4Encoding_ETC1.h in Headers */,
 				706EF15A26D166C5001C950E /* EtcBlock4x4Encoding_RGBA8.h in Headers */,
 				706EF15B26D166C5001C950E /* EtcColorFloatRGBA.h in Headers */,
 				706EF15C26D166C5001C950E /* EtcBlock4x4Encoding.h in Headers */,
-				70871D9027DC767300D0B9E1 /* astcenc_diagnostic_trace.h in Headers */,
 				706EF15D26D166C5001C950E /* rgbcx.h in Headers */,
 				706EF15E26D166C5001C950E /* bc7enc.h in Headers */,
 				706EF15F26D166C5001C950E /* bc7decomp.h in Headers */,
 				706EF16026D166C5001C950E /* rgbcx_table4.h in Headers */,
-				706EF16126D166C5001C950E /* astcenc_diagnostic_trace.h in Headers */,
-				706EF16226D166C5001C950E /* astcenc_vecmathlib.h in Headers */,
-				706EF16326D166C5001C950E /* astcenc_vecmathlib_avx2_8.h in Headers */,
-				706EF16426D166C5001C950E /* astcenc.h in Headers */,
-				706EF16526D166C5001C950E /* astcenc_internal.h in Headers */,
-				706EF16626D166C5001C950E /* astcenc_vecmathlib_neon_armv7_4.h in Headers */,
-				706EF16726D166C5001C950E /* astcenc_vecmathlib_sse_4.h in Headers */,
-				70871D8427DC767300D0B9E1 /* astcenc_vecmathlib_sse_4.h in Headers */,
-				706EF16826D166C5001C950E /* astcenc_vecmathlib_neon_4.h in Headers */,
-				706EF16926D166C5001C950E /* astcenc_vecmathlib_none_4.h in Headers */,
-				706EF16A26D166C5001C950E /* astcenc_vecmathlib_common_4.h in Headers */,
-				706EF16B26D166C5001C950E /* astcenc_mathlib.h in Headers */,
 				706EF16C26D166C5001C950E /* ateencoder.h in Headers */,
 				706EF16D26D166C5001C950E /* basisu_transcoder.h in Headers */,
 				70A7BD3327092A1200DBCCF7 /* hdr_encode.h in Headers */,
-				70871D7A27DC767300D0B9E1 /* astcenc_vecmathlib_common_4.h in Headers */,
 				708A6AA12708CE4700BA5410 /* bc6h_definitions.h in Headers */,
 				706EF16E26D166C5001C950E /* basisu_containers.h in Headers */,
+				70871DD627DDDBCD00D0B9E1 /* astcenc.h in Headers */,
 				706EF16F26D166C5001C950E /* basisu_containers_impl.h in Headers */,
 				706EF17026D166C5001C950E /* basisu_transcoder_internal.h in Headers */,
+				70871DFA27DDDBCD00D0B9E1 /* astcenc_vecmathlib_avx2_8.h in Headers */,
+				70871DFC27DDDBCD00D0B9E1 /* astcenc_vecmathlib_none_4.h in Headers */,
 				706EF17126D166C5001C950E /* basisu_global_selector_cb.h in Headers */,
 				706EF17226D166C5001C950E /* basisu_transcoder_uastc.h in Headers */,
 				706EF17326D166C5001C950E /* basisu_global_selector_palette.h in Headers */,
 				706EF17426D166C5001C950E /* basisu.h in Headers */,
 				706EF17526D166C5001C950E /* basisu_file_headers.h in Headers */,
 				706EF17626D166C5001C950E /* miniz.h in Headers */,
-				70871D8A27DC767300D0B9E1 /* astcenc_vecmathlib_avx2_8.h in Headers */,
 				706EF17726D166C5001C950E /* hedistance.h in Headers */,
 				706EF17826D166C5001C950E /* stb_rect_pack.h in Headers */,
 				706EF17926D166C5001C950E /* KramZipHelper.h in Headers */,
-				70871D7427DC767300D0B9E1 /* astcenc.h in Headers */,
 				706EF17A26D166C5001C950E /* KramSDFMipper.h in Headers */,
 				706EF17B26D166C5001C950E /* sse2neon.h in Headers */,
+				70871DF227DDDBCD00D0B9E1 /* astcenc_mathlib.h in Headers */,
 				706EF17C26D166C5001C950E /* KramConfig.h in Headers */,
 				706EF17D26D166C5001C950E /* KramLog.h in Headers */,
 				706EF17E26D166C5001C950E /* KramLib.h in Headers */,
 				706EF17F26D166C5001C950E /* KramVersion.h in Headers */,
 				706EF18026D166C5001C950E /* KramImage.h in Headers */,
-				70871D9227DC767300D0B9E1 /* astcenc_vecmathlib_none_4.h in Headers */,
 				706EF18126D166C5001C950E /* win_mmap.h in Headers */,
+				70871DDE27DDDBCD00D0B9E1 /* astcenc_vecmathlib_sse_4.h in Headers */,
 				706EF18226D166C5001C950E /* Kram.h in Headers */,
+				70871DEE27DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.h in Headers */,
 				706EF18326D166C5001C950E /* KTXImage.h in Headers */,
 				706EF18426D166C5001C950E /* KramImageInfo.h in Headers */,
+				70871DF827DDDBCD00D0B9E1 /* astcenc_vecmathlib_neon_4.h in Headers */,
 				706EF18526D166C5001C950E /* KramTimer.h in Headers */,
 				706EF18626D166C5001C950E /* KramMmapHelper.h in Headers */,
 				706EF18726D166C5001C950E /* float4a.h in Headers */,
 				706EF18826D166C5001C950E /* KramFileHelper.h in Headers */,
 				706EF18926D166C5001C950E /* KramMipper.h in Headers */,
 				706EF18A26D166C5001C950E /* TaskSystem.h in Headers */,
-				70871D8E27DC767300D0B9E1 /* astcenc_vecmathlib.h in Headers */,
 				706EF18B26D166C5001C950E /* squish.h in Headers */,
 				706EF18C26D166C5001C950E /* clusterfit.h in Headers */,
 				706EF18D26D166C5001C950E /* colourfit.h in Headers */,
+				70871DFE27DDDBCD00D0B9E1 /* astcenc_vecmathlib.h in Headers */,
 				706EF18E26D166C5001C950E /* alpha.h in Headers */,
 				708A6A992708CE4700BA5410 /* bc6h_decode.h in Headers */,
-				70871D9E27DC767300D0B9E1 /* astcenc_internal.h in Headers */,
 				706EF18F26D166C5001C950E /* singlecolourfit.h in Headers */,
+				70871E0C27DDDBCD00D0B9E1 /* astcenc_vecmathlib_neon_armv7_4.h in Headers */,
 				706EF19026D166C5001C950E /* maths.h in Headers */,
-				70871D9627DC767300D0B9E1 /* astcenc_mathlib.h in Headers */,
 				706EF19126D166C5001C950E /* colourset.h in Headers */,
 				708A6AA52708CE4700BA5410 /* bc6h_utils.h in Headers */,
 				706EF19226D166C5001C950E /* colourblock.h in Headers */,
 				706EF19326D166C5001C950E /* rangefit.h in Headers */,
 				706EF19426D166C5001C950E /* zstd.h in Headers */,
+				70871DF427DDDBCD00D0B9E1 /* astcenc_internal.h in Headers */,
 				706EF19526D166C5001C950E /* lodepng.h in Headers */,
 				706EF19626D166C5001C950E /* tmpfileplus.h in Headers */,
 			);
@@ -1594,12 +1446,16 @@
 			isa = PBXSourcesBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
+				70871DD727DDDBCD00D0B9E1 /* astcenc_quantization.cpp in Sources */,
+				70871E0327DDDBCD00D0B9E1 /* astcenc_color_unquantize.cpp in Sources */,
+				70871DD127DDDBCD00D0B9E1 /* astcenc_averages_and_directions.cpp in Sources */,
+				70871DDF27DDDBCD00D0B9E1 /* astcenc_mathlib_softfloat.cpp in Sources */,
 				706EF26426D17DCC001C950E /* ateencoder.mm in Sources */,
 				706EEF7F26D1595D001C950E /* EtcBlock4x4Encoding_RGB8.cpp in Sources */,
-				70871D7B27DC767300D0B9E1 /* astcenc_color_quantize.cpp in Sources */,
+				70871DCD27DDDBCD00D0B9E1 /* astcenc_find_best_partitioning.cpp in Sources */,
 				70CDB65227A1382700A546C1 /* KramDDSHelper.cpp in Sources */,
-				70871D9927DC767300D0B9E1 /* astcenc_platform_isa_detection.cpp in Sources */,
 				706EEF8026D1595D001C950E /* EtcImage.cpp in Sources */,
+				70871DEB27DDDBCD00D0B9E1 /* astcenc_block_sizes.cpp in Sources */,
 				706EEF8126D1595D001C950E /* EtcDifferentialTrys.cpp in Sources */,
 				706EEF8226D1595D001C950E /* EtcMath.cpp in Sources */,
 				706EEF8326D1595D001C950E /* EtcBlock4x4Encoding_RGBA8.cpp in Sources */,
@@ -1607,61 +1463,31 @@
 				706EEF8526D1595D001C950E /* EtcBlock4x4Encoding_RGB8A1.cpp in Sources */,
 				706EEF8726D1595D001C950E /* EtcIndividualTrys.cpp in Sources */,
 				706EEF8826D1595D001C950E /* EtcBlock4x4Encoding_R11.cpp in Sources */,
+				70871DF527DDDBCD00D0B9E1 /* astcenc_color_quantize.cpp in Sources */,
 				706EEF8926D1595D001C950E /* EtcBlock4x4Encoding_ETC1.cpp in Sources */,
-				70871D8127DC767300D0B9E1 /* astcenc_find_best_partitioning.cpp in Sources */,
 				706EEF8A26D1595D001C950E /* EtcBlock4x4Encoding.cpp in Sources */,
-				70871DA327DC767300D0B9E1 /* astcenc_partition_tables.cpp in Sources */,
 				706EEF8B26D1595D001C950E /* EtcBlock4x4.cpp in Sources */,
+				70871DDB27DDDBCD00D0B9E1 /* astcenc_percentile_tables.cpp in Sources */,
 				706EEF8C26D1595D001C950E /* bc7decomp.cpp in Sources */,
+				70871DE127DDDBCD00D0B9E1 /* astcenc_mathlib.cpp in Sources */,
 				706EEF8D26D1595D001C950E /* bc7enc.cpp in Sources */,
-				706EEF8E26D1595D001C950E /* astcenc_pick_best_endpoint_format.cpp in Sources */,
 				708A6A9A2708CE4700BA5410 /* bc6h_encode.cpp in Sources */,
 				70A7BD3027092A1200DBCCF7 /* hdr_encode.cpp in Sources */,
-				70871DA527DC767300D0B9E1 /* astcenc_weight_align.cpp in Sources */,
-				706EEF8F26D1595D001C950E /* astcenc_integer_sequence.cpp in Sources */,
-				706EEF9026D1595D001C950E /* astcenc_compute_variance.cpp in Sources */,
 				706EFF7726D34740001C950E /* string.cpp in Sources */,
-				706EEF9126D1595D001C950E /* astcenc_quantization.cpp in Sources */,
-				706EEF9226D1595D001C950E /* astcenc_color_unquantize.cpp in Sources */,
-				70871D9327DC767300D0B9E1 /* astcenc_entry.cpp in Sources */,
-				70871D9727DC767300D0B9E1 /* astcenc_compress_symbolic.cpp in Sources */,
-				70871D7727DC767300D0B9E1 /* astcenc_pick_best_endpoint_format.cpp in Sources */,
-				706EEF9326D1595D001C950E /* astcenc_mathlib_softfloat.cpp in Sources */,
 				708A6A962708CE4700BA5410 /* bc6h_decode.cpp in Sources */,
-				706EEF9426D1595D001C950E /* astcenc_weight_quant_xfer_tables.cpp in Sources */,
-				706EEF9626D1595D001C950E /* astcenc_percentile_tables.cpp in Sources */,
-				706EEF9726D1595D001C950E /* astcenc_partition_tables.cpp in Sources */,
-				706EEF9826D1595D001C950E /* astcenc_decompress_symbolic.cpp in Sources */,
-				70871DA127DC767300D0B9E1 /* astcenc_diagnostic_trace.cpp in Sources */,
-				70871D7F27DC767300D0B9E1 /* astcenc_mathlib_softfloat.cpp in Sources */,
-				706EEF9926D1595D001C950E /* astcenc_color_quantize.cpp in Sources */,
 				706EFF7526D34740001C950E /* assert.cpp in Sources */,
-				70871D8727DC767300D0B9E1 /* astcenc_averages_and_directions.cpp in Sources */,
-				706EEF9A26D1595D001C950E /* astcenc_platform_isa_detection.cpp in Sources */,
-				706EEF9B26D1595D001C950E /* astcenc_image.cpp in Sources */,
-				706EEF9D26D1595D001C950E /* astcenc_compress_symbolic.cpp in Sources */,
-				70871D7527DC767300D0B9E1 /* astcenc_mathlib.cpp in Sources */,
-				70871D8527DC767300D0B9E1 /* astcenc_symbolic_physical.cpp in Sources */,
-				706EEF9E26D1595D001C950E /* astcenc_ideal_endpoints_and_weights.cpp in Sources */,
 				706EFF8526D34740001C950E /* fixed_pool.cpp in Sources */,
-				706EEF9F26D1595D001C950E /* astcenc_mathlib.cpp in Sources */,
-				706EEFA026D1595D001C950E /* astcenc_find_best_partitioning.cpp in Sources */,
-				706EEFA126D1595D001C950E /* astcenc_diagnostic_trace.cpp in Sources */,
-				706EEFA226D1595D001C950E /* astcenc_symbolic_physical.cpp in Sources */,
-				706EEFA326D1595D001C950E /* astcenc_weight_align.cpp in Sources */,
-				706EEFA526D1595D001C950E /* astcenc_entry.cpp in Sources */,
-				70871D6727DC767300D0B9E1 /* astcenc_percentile_tables.cpp in Sources */,
-				706EEFA626D1595D001C950E /* astcenc_averages_and_directions.cpp in Sources */,
 				706EEFA726D1595D001C950E /* basisu_transcoder.cpp in Sources */,
-				70871D7D27DC767300D0B9E1 /* astcenc_image.cpp in Sources */,
-				70871D6F27DC767300D0B9E1 /* astcenc_decompress_symbolic.cpp in Sources */,
 				706EFF8326D34740001C950E /* red_black_tree.cpp in Sources */,
+				70871DE327DDDBCD00D0B9E1 /* astcenc_decompress_symbolic.cpp in Sources */,
+				70871E0727DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.cpp in Sources */,
+				70871E0527DDDBCD00D0B9E1 /* astcenc_platform_isa_detection.cpp in Sources */,
 				706EFF7F26D34740001C950E /* intrusive_list.cpp in Sources */,
 				706EEFA826D1595D001C950E /* miniz.cpp in Sources */,
-				70871D6927DC767300D0B9E1 /* astcenc_integer_sequence.cpp in Sources */,
+				70871DE527DDDBCD00D0B9E1 /* astcenc_compress_symbolic.cpp in Sources */,
 				706EEFA926D1595D001C950E /* hedistance.cpp in Sources */,
-				70871D9F27DC767300D0B9E1 /* astcenc_quantization.cpp in Sources */,
 				706EEFAA26D1595D001C950E /* KramTimer.cpp in Sources */,
+				70871DE727DDDBCD00D0B9E1 /* astcenc_entry.cpp in Sources */,
 				706EEFAB26D1595D001C950E /* KTXImage.cpp in Sources */,
 				706EEFAC26D1595D001C950E /* KramMipper.cpp in Sources */,
 				706EEFAD26D1595D001C950E /* KramZipHelper.cpp in Sources */,
@@ -1669,26 +1495,29 @@
 				706EEFAF26D1595D001C950E /* KramFileHelper.cpp in Sources */,
 				706EFF7B26D34740001C950E /* numeric_limits.cpp in Sources */,
 				706EEFB026D1595D001C950E /* KramImageInfo.cpp in Sources */,
-				70871D9B27DC767300D0B9E1 /* astcenc_color_unquantize.cpp in Sources */,
+				70871DE927DDDBCD00D0B9E1 /* astcenc_integer_sequence.cpp in Sources */,
 				706EEFB126D1595D001C950E /* KramImage.cpp in Sources */,
 				706EEFB226D1595D001C950E /* KramLog.cpp in Sources */,
 				706EEFB326D1595D001C950E /* KramSDFMipper.cpp in Sources */,
 				706EEFB426D1595D001C950E /* KramMmapHelper.cpp in Sources */,
+				70871DCB27DDDBCD00D0B9E1 /* astcenc_image.cpp in Sources */,
 				706EEFB526D1595D001C950E /* float4a.cpp in Sources */,
-				70871D7127DC767300D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp in Sources */,
 				706EFF7326D34740001C950E /* thread_support.cpp in Sources */,
 				706EEFB626D1595D001C950E /* Kram.cpp in Sources */,
-				70871D6527DC767300D0B9E1 /* astcenc_block_sizes.cpp in Sources */,
 				706EEFB726D1595D001C950E /* squish.cpp in Sources */,
 				706EEFB826D1595D001C950E /* colourset.cpp in Sources */,
+				70871DD327DDDBCD00D0B9E1 /* astcenc_partition_tables.cpp in Sources */,
 				706EFF8126D34740001C950E /* hashtable.cpp in Sources */,
+				70871DEF27DDDBCD00D0B9E1 /* astcenc_weight_align.cpp in Sources */,
+				70871DD927DDDBCD00D0B9E1 /* astcenc_compute_variance.cpp in Sources */,
 				706EEFB926D1595D001C950E /* clusterfit.cpp in Sources */,
 				706EEFBB26D1595D001C950E /* rangefit.cpp in Sources */,
-				70871D6D27DC767300D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp in Sources */,
 				706EEFBC26D1595D001C950E /* alpha.cpp in Sources */,
-				70871D8B27DC767300D0B9E1 /* astcenc_compute_variance.cpp in Sources */,
 				706EEFBD26D1595D001C950E /* colourblock.cpp in Sources */,
 				706EEFBE26D1595E001C950E /* colourfit.cpp in Sources */,
+				70871DFF27DDDBCD00D0B9E1 /* astcenc_pick_best_endpoint_format.cpp in Sources */,
+				70871E0927DDDBCD00D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp in Sources */,
+				70871DCF27DDDBCD00D0B9E1 /* astcenc_symbolic_physical.cpp in Sources */,
 				706EFF7926D34740001C950E /* allocator_eastl.cpp in Sources */,
 				706EEFC026D1595E001C950E /* maths.cpp in Sources */,
 				706EEFC126D1595E001C950E /* singlecolourfit.cpp in Sources */,
@@ -1696,6 +1525,7 @@
 				706EEFC326D1595E001C950E /* zstddeclib.cpp in Sources */,
 				706EEFC426D1595E001C950E /* lodepng.cpp in Sources */,
 				706EEFC526D1595E001C950E /* tmpfileplus.cpp in Sources */,
+				70871E0127DDDBCD00D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@@ -1703,12 +1533,16 @@
 			isa = PBXSourcesBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
+				70871DD827DDDBCD00D0B9E1 /* astcenc_quantization.cpp in Sources */,
+				70871E0427DDDBCD00D0B9E1 /* astcenc_color_unquantize.cpp in Sources */,
+				70871DD227DDDBCD00D0B9E1 /* astcenc_averages_and_directions.cpp in Sources */,
+				70871DE027DDDBCD00D0B9E1 /* astcenc_mathlib_softfloat.cpp in Sources */,
 				706EFC2426D1C39B001C950E /* ateencoder.mm in Sources */,
 				706EF19826D166C5001C950E /* EtcBlock4x4Encoding_RGB8.cpp in Sources */,
-				70871D7C27DC767300D0B9E1 /* astcenc_color_quantize.cpp in Sources */,
+				70871DCE27DDDBCD00D0B9E1 /* astcenc_find_best_partitioning.cpp in Sources */,
 				70CDB65327A1382700A546C1 /* KramDDSHelper.cpp in Sources */,
-				70871D9A27DC767300D0B9E1 /* astcenc_platform_isa_detection.cpp in Sources */,
 				706EF19926D166C5001C950E /* EtcImage.cpp in Sources */,
+				70871DEC27DDDBCD00D0B9E1 /* astcenc_block_sizes.cpp in Sources */,
 				706EF19A26D166C5001C950E /* EtcDifferentialTrys.cpp in Sources */,
 				706EF19B26D166C5001C950E /* EtcMath.cpp in Sources */,
 				706EF19C26D166C5001C950E /* EtcBlock4x4Encoding_RGBA8.cpp in Sources */,
@@ -1716,61 +1550,31 @@
 				706EF19E26D166C5001C950E /* EtcBlock4x4Encoding_RGB8A1.cpp in Sources */,
 				706EF19F26D166C5001C950E /* EtcIndividualTrys.cpp in Sources */,
 				706EF1A026D166C5001C950E /* EtcBlock4x4Encoding_R11.cpp in Sources */,
+				70871DF627DDDBCD00D0B9E1 /* astcenc_color_quantize.cpp in Sources */,
 				706EF1A126D166C5001C950E /* EtcBlock4x4Encoding_ETC1.cpp in Sources */,
-				70871D8227DC767300D0B9E1 /* astcenc_find_best_partitioning.cpp in Sources */,
 				706EF1A226D166C5001C950E /* EtcBlock4x4Encoding.cpp in Sources */,
-				70871DA427DC767300D0B9E1 /* astcenc_partition_tables.cpp in Sources */,
 				706EF1A326D166C5001C950E /* EtcBlock4x4.cpp in Sources */,
+				70871DDC27DDDBCD00D0B9E1 /* astcenc_percentile_tables.cpp in Sources */,
 				706EF1A426D166C5001C950E /* bc7decomp.cpp in Sources */,
+				70871DE227DDDBCD00D0B9E1 /* astcenc_mathlib.cpp in Sources */,
 				706EF1A526D166C5001C950E /* bc7enc.cpp in Sources */,
-				706EF1A626D166C5001C950E /* astcenc_pick_best_endpoint_format.cpp in Sources */,
 				708A6A9B2708CE4700BA5410 /* bc6h_encode.cpp in Sources */,
 				70A7BD3127092A1200DBCCF7 /* hdr_encode.cpp in Sources */,
-				70871DA627DC767300D0B9E1 /* astcenc_weight_align.cpp in Sources */,
-				706EF1A726D166C5001C950E /* astcenc_integer_sequence.cpp in Sources */,
-				706EF1A826D166C5001C950E /* astcenc_compute_variance.cpp in Sources */,
 				706EFF7826D34740001C950E /* string.cpp in Sources */,
-				706EF1A926D166C5001C950E /* astcenc_quantization.cpp in Sources */,
-				706EF1AA26D166C5001C950E /* astcenc_color_unquantize.cpp in Sources */,
-				70871D9427DC767300D0B9E1 /* astcenc_entry.cpp in Sources */,
-				70871D9827DC767300D0B9E1 /* astcenc_compress_symbolic.cpp in Sources */,
-				70871D7827DC767300D0B9E1 /* astcenc_pick_best_endpoint_format.cpp in Sources */,
-				706EF1AB26D166C5001C950E /* astcenc_mathlib_softfloat.cpp in Sources */,
 				708A6A972708CE4700BA5410 /* bc6h_decode.cpp in Sources */,
-				706EF1AC26D166C5001C950E /* astcenc_weight_quant_xfer_tables.cpp in Sources */,
-				706EF1AE26D166C5001C950E /* astcenc_percentile_tables.cpp in Sources */,
-				706EF1AF26D166C5001C950E /* astcenc_partition_tables.cpp in Sources */,
-				706EF1B026D166C5001C950E /* astcenc_decompress_symbolic.cpp in Sources */,
-				70871DA227DC767300D0B9E1 /* astcenc_diagnostic_trace.cpp in Sources */,
-				70871D8027DC767300D0B9E1 /* astcenc_mathlib_softfloat.cpp in Sources */,
-				706EF1B126D166C5001C950E /* astcenc_color_quantize.cpp in Sources */,
 				706EFF7626D34740001C950E /* assert.cpp in Sources */,
-				70871D8827DC767300D0B9E1 /* astcenc_averages_and_directions.cpp in Sources */,
-				706EF1B226D166C5001C950E /* astcenc_platform_isa_detection.cpp in Sources */,
-				706EF1B326D166C5001C950E /* astcenc_image.cpp in Sources */,
-				706EF1B526D166C5001C950E /* astcenc_compress_symbolic.cpp in Sources */,
-				70871D7627DC767300D0B9E1 /* astcenc_mathlib.cpp in Sources */,
-				70871D8627DC767300D0B9E1 /* astcenc_symbolic_physical.cpp in Sources */,
-				706EF1B626D166C5001C950E /* astcenc_ideal_endpoints_and_weights.cpp in Sources */,
 				706EFF8626D34740001C950E /* fixed_pool.cpp in Sources */,
-				706EF1B726D166C5001C950E /* astcenc_mathlib.cpp in Sources */,
-				706EF1B826D166C5001C950E /* astcenc_find_best_partitioning.cpp in Sources */,
-				706EF1B926D166C5001C950E /* astcenc_diagnostic_trace.cpp in Sources */,
-				706EF1BA26D166C5001C950E /* astcenc_symbolic_physical.cpp in Sources */,
-				706EF1BB26D166C5001C950E /* astcenc_weight_align.cpp in Sources */,
-				706EF1BD26D166C5001C950E /* astcenc_entry.cpp in Sources */,
-				70871D6827DC767300D0B9E1 /* astcenc_percentile_tables.cpp in Sources */,
-				706EF1BE26D166C5001C950E /* astcenc_averages_and_directions.cpp in Sources */,
 				706EF1BF26D166C5001C950E /* basisu_transcoder.cpp in Sources */,
-				70871D7E27DC767300D0B9E1 /* astcenc_image.cpp in Sources */,
-				70871D7027DC767300D0B9E1 /* astcenc_decompress_symbolic.cpp in Sources */,
 				706EFF8426D34740001C950E /* red_black_tree.cpp in Sources */,
+				70871DE427DDDBCD00D0B9E1 /* astcenc_decompress_symbolic.cpp in Sources */,
+				70871E0827DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.cpp in Sources */,
+				70871E0627DDDBCD00D0B9E1 /* astcenc_platform_isa_detection.cpp in Sources */,
 				706EFF8026D34740001C950E /* intrusive_list.cpp in Sources */,
 				706EF1C026D166C5001C950E /* miniz.cpp in Sources */,
-				70871D6A27DC767300D0B9E1 /* astcenc_integer_sequence.cpp in Sources */,
+				70871DE627DDDBCD00D0B9E1 /* astcenc_compress_symbolic.cpp in Sources */,
 				706EF1C126D166C5001C950E /* hedistance.cpp in Sources */,
-				70871DA027DC767300D0B9E1 /* astcenc_quantization.cpp in Sources */,
 				706EF1C226D166C5001C950E /* KramTimer.cpp in Sources */,
+				70871DE827DDDBCD00D0B9E1 /* astcenc_entry.cpp in Sources */,
 				706EF1C326D166C5001C950E /* KTXImage.cpp in Sources */,
 				706EF1C426D166C5001C950E /* KramMipper.cpp in Sources */,
 				706EF1C526D166C5001C950E /* KramZipHelper.cpp in Sources */,
@@ -1778,26 +1582,29 @@
 				706EF1C726D166C5001C950E /* KramFileHelper.cpp in Sources */,
 				706EFF7C26D34740001C950E /* numeric_limits.cpp in Sources */,
 				706EF1C826D166C5001C950E /* KramImageInfo.cpp in Sources */,
-				70871D9C27DC767300D0B9E1 /* astcenc_color_unquantize.cpp in Sources */,
+				70871DEA27DDDBCD00D0B9E1 /* astcenc_integer_sequence.cpp in Sources */,
 				706EF1C926D166C5001C950E /* KramImage.cpp in Sources */,
 				706EF1CA26D166C5001C950E /* KramLog.cpp in Sources */,
 				706EF1CB26D166C5001C950E /* KramSDFMipper.cpp in Sources */,
 				706EF1CC26D166C5001C950E /* KramMmapHelper.cpp in Sources */,
+				70871DCC27DDDBCD00D0B9E1 /* astcenc_image.cpp in Sources */,
 				706EF1CD26D166C5001C950E /* float4a.cpp in Sources */,
-				70871D7227DC767300D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp in Sources */,
 				706EFF7426D34740001C950E /* thread_support.cpp in Sources */,
 				706EF1CE26D166C5001C950E /* Kram.cpp in Sources */,
-				70871D6627DC767300D0B9E1 /* astcenc_block_sizes.cpp in Sources */,
 				706EF1CF26D166C5001C950E /* squish.cpp in Sources */,
 				706EF1D026D166C5001C950E /* colourset.cpp in Sources */,
+				70871DD427DDDBCD00D0B9E1 /* astcenc_partition_tables.cpp in Sources */,
 				706EFF8226D34740001C950E /* hashtable.cpp in Sources */,
+				70871DF027DDDBCD00D0B9E1 /* astcenc_weight_align.cpp in Sources */,
+				70871DDA27DDDBCD00D0B9E1 /* astcenc_compute_variance.cpp in Sources */,
 				706EF1D126D166C5001C950E /* clusterfit.cpp in Sources */,
 				706EF1D226D166C5001C950E /* rangefit.cpp in Sources */,
-				70871D6E27DC767300D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp in Sources */,
 				706EF1D326D166C5001C950E /* alpha.cpp in Sources */,
-				70871D8C27DC767300D0B9E1 /* astcenc_compute_variance.cpp in Sources */,
 				706EF1D426D166C5001C950E /* colourblock.cpp in Sources */,
 				706EF1D526D166C5001C950E /* colourfit.cpp in Sources */,
+				70871E0027DDDBCD00D0B9E1 /* astcenc_pick_best_endpoint_format.cpp in Sources */,
+				70871E0A27DDDBCD00D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp in Sources */,
+				70871DD027DDDBCD00D0B9E1 /* astcenc_symbolic_physical.cpp in Sources */,
 				706EFF7A26D34740001C950E /* allocator_eastl.cpp in Sources */,
 				706EF1D626D166C5001C950E /* maths.cpp in Sources */,
 				706EF1D726D166C5001C950E /* singlecolourfit.cpp in Sources */,
@@ -1805,6 +1612,7 @@
 				706EF1D926D166C5001C950E /* zstddeclib.cpp in Sources */,
 				706EF1DA26D166C5001C950E /* lodepng.cpp in Sources */,
 				706EF1DB26D166C5001C950E /* tmpfileplus.cpp in Sources */,
+				70871E0227DDDBCD00D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};

From 7ba583666d7578c57f9f774a25734304a22a720e Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 13 Mar 2022 01:51:26 -0800
Subject: [PATCH 262/901] kram - bring over latest astc-encoder

This is still generating bad blocks randomly throughout the textures, but this fixes bugs in context creation/deletion that are now a part of the library.
---
 .../astcenc_compress_symbolic.cpp             |  2 +-
 libkram/astc-encoder/astcenc_entry.cpp        | 23 +++++++++++--------
 libkram/astc-encoder/astcenc_internal.h       |  6 ++---
 libkram/astc-encoder/astcenc_mathlib.h        |  1 +
 4 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/libkram/astc-encoder/astcenc_compress_symbolic.cpp b/libkram/astc-encoder/astcenc_compress_symbolic.cpp
index 252e38e5..93d98e29 100644
--- a/libkram/astc-encoder/astcenc_compress_symbolic.cpp
+++ b/libkram/astc-encoder/astcenc_compress_symbolic.cpp
@@ -1185,7 +1185,7 @@ void compress_block(
 {
 	astcenc_profile decode_mode = ctx.config.profile;
 	symbolic_compressed_block scb;
-	const block_size_descriptor& bsd = ctx.bsd;
+	const block_size_descriptor& bsd = *ctx.bsd;
 	float lowest_correl;
 
 	TRACE_NODE(node0, "block");
diff --git a/libkram/astc-encoder/astcenc_entry.cpp b/libkram/astc-encoder/astcenc_entry.cpp
index 8332f28f..ceeb7169 100644
--- a/libkram/astc-encoder/astcenc_entry.cpp
+++ b/libkram/astc-encoder/astcenc_entry.cpp
@@ -702,8 +702,8 @@ astcenc_error astcenc_context_alloc(
 	}
 #endif
 
-    astcenc_context* ctx = new astcenc_context;
-    ctx->thread_count = thread_count;
+	astcenc_context* ctx = new astcenc_context;
+	ctx->thread_count = thread_count;
 	ctx->config = config;
 	ctx->working_buffers = nullptr;
 
@@ -718,12 +718,13 @@ astcenc_error astcenc_context_alloc(
 		return status;
 	}
 
+	ctx->bsd = aligned_malloc<block_size_descriptor>(sizeof(block_size_descriptor), ASTCENC_VECALIGN);
 	bool can_omit_modes = config.flags & ASTCENC_FLG_SELF_DECOMPRESS_ONLY;
 	init_block_size_descriptor(config.block_x, config.block_y, config.block_z,
 	                           can_omit_modes,
 	                           config.tune_partition_count_limit,
 	                           static_cast<float>(config.tune_block_mode_limit) / 100.0f,
-	                           ctx->bsd);
+	                           *ctx->bsd);
 
 #if !defined(ASTCENC_DECOMPRESS_ONLY)
 	// Do setup only needed by compression
@@ -745,6 +746,7 @@ astcenc_error astcenc_context_alloc(
 		              "compression_working_buffers size must be multiple of vector alignment");
 		if (!ctx->working_buffers)
 		{
+			aligned_free<block_size_descriptor>(ctx->bsd);
 			delete ctx;
 			*context = nullptr;
 			return ASTCENC_ERR_OUT_OF_MEM;
@@ -754,7 +756,7 @@ astcenc_error astcenc_context_alloc(
 
 #if defined(ASTCENC_DIAGNOSTICS)
 	ctx->trace_log = new TraceLog(ctx->config.trace_file_path);
-	if(!ctx->trace_log->m_file)
+	if (!ctx->trace_log->m_file)
 	{
 		return ASTCENC_ERR_DTRACE_FAILURE;
 	}
@@ -780,6 +782,7 @@ void astcenc_context_free(
 	if (ctx)
 	{
 		aligned_free<compression_working_buffers>(ctx->working_buffers);
+		aligned_free<block_size_descriptor>(ctx->bsd);
 #if defined(ASTCENC_DIAGNOSTICS)
 		delete ctx->trace_log;
 #endif
@@ -805,7 +808,7 @@ static void compress_image(
 	const astcenc_swizzle& swizzle,
 	uint8_t* buffer
 ) {
-	const block_size_descriptor& bsd = ctx.bsd;
+	const block_size_descriptor& bsd = *ctx.bsd;
 	astcenc_profile decode_mode = ctx.config.profile;
 
 	image_block blk;
@@ -1115,13 +1118,13 @@ astcenc_error astcenc_decompress_image(
 			physical_compressed_block pcb = *(const physical_compressed_block*)bp;
 			symbolic_compressed_block scb;
 
-			physical_to_symbolic(ctx->bsd, pcb, scb);
+			physical_to_symbolic(*ctx->bsd, pcb, scb);
 
-			decompress_symbolic_block(ctx->config.profile, ctx->bsd,
+			decompress_symbolic_block(ctx->config.profile, *ctx->bsd,
 			                          x * block_x, y * block_y, z * block_z,
 			                          scb, blk);
 
-			write_image_block(image_out, blk, ctx->bsd,
+			write_image_block(image_out, blk, *ctx->bsd,
 			                  x * block_x, y * block_y, z * block_z, *swizzle);
 		}
 
@@ -1154,10 +1157,10 @@ astcenc_error astcenc_get_block_info(
 	// Decode the compressed data into a symbolic form
 	physical_compressed_block pcb = *(const physical_compressed_block*)data;
 	symbolic_compressed_block scb;
-	physical_to_symbolic(ctx->bsd, pcb, scb);
+	physical_to_symbolic(*ctx->bsd, pcb, scb);
 
 	// Fetch the appropriate partition and decimation tables
-	block_size_descriptor& bsd = ctx->bsd;
+	block_size_descriptor& bsd = *ctx->bsd;
 
 	// Start from a clean slate
 	memset(info, 0, sizeof(*info));
diff --git a/libkram/astc-encoder/astcenc_internal.h b/libkram/astc-encoder/astcenc_internal.h
index 00909e45..2ff8c997 100644
--- a/libkram/astc-encoder/astcenc_internal.h
+++ b/libkram/astc-encoder/astcenc_internal.h
@@ -52,12 +52,12 @@
 		#if __has_builtin(__builtin_assume)
 			#define promise(cond) __builtin_assume(cond)
 		#elif __has_builtin(__builtin_unreachable)
-			#define promise(cond) if(!(cond)) { __builtin_unreachable(); }
+			#define promise(cond) if (!(cond)) { __builtin_unreachable(); }
 		#else
 			#define promise(cond)
 		#endif
 	#else // Assume GCC
-		#define promise(cond) if(!(cond)) { __builtin_unreachable(); }
+		#define promise(cond) if (!(cond)) { __builtin_unreachable(); }
 	#endif
 #else
 	#define promise(cond) assert(cond)
@@ -1375,7 +1375,7 @@ struct astcenc_context
 	unsigned int thread_count;
 
 	/** @brief The block size descriptor this context was created with. */
-	alignas(ASTCENC_VECALIGN) block_size_descriptor bsd;
+	block_size_descriptor* bsd;
 
 	/*
 	 * Fields below here are not needed in a decompress-only build, but some remain as they are
diff --git a/libkram/astc-encoder/astcenc_mathlib.h b/libkram/astc-encoder/astcenc_mathlib.h
index e7dfbd00..8a488ae2 100644
--- a/libkram/astc-encoder/astcenc_mathlib.h
+++ b/libkram/astc-encoder/astcenc_mathlib.h
@@ -75,6 +75,7 @@
   #endif
 #endif
 
+// Alec changed this back to 16 bytes.
 //#if ASTCENC_AVX
 //  #define ASTCENC_VECALIGN 32
 //#else

From 18d5f3cb410462dd865177124d48fb6f8ff1fdee Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 13 Mar 2022 12:07:47 -0700
Subject: [PATCH 263/901] kram - fix from ARM for astc-encoder

The simd loada instruction wasn't implemented properly on Neon, so M1 was failing to encode textures properly.  Special thanks to Pete Harris for the quick fix!
---
 libkram/astc-encoder/astcenc_vecmathlib_neon_4.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libkram/astc-encoder/astcenc_vecmathlib_neon_4.h b/libkram/astc-encoder/astcenc_vecmathlib_neon_4.h
index eb96536f..91cc21f3 100755
--- a/libkram/astc-encoder/astcenc_vecmathlib_neon_4.h
+++ b/libkram/astc-encoder/astcenc_vecmathlib_neon_4.h
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2019-2021 Arm Limited
+// Copyright 2019-2022 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -275,7 +275,7 @@ struct vint4
 	 */
 	static ASTCENC_SIMD_INLINE vint4 loada(const int* p)
 	{
-		return vint4(*p);
+		return vint4(p);
 	}
 
 	/**

From 478b82cb5e7fc0b7ebb8b66e7e6bb0c260bb6a8b Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 14 Mar 2022 09:43:58 -0700
Subject: [PATCH 264/901] kram - add in vis for eastl

The lldb version isn't valid code yet.  Just a placeholder, but the natvis is from eastl project.
---
 scripts/EASTL.natvis | 565 +++++++++++++++++++++++++++++++++++++++++++
 scripts/EASTL.py     |  14 ++
 2 files changed, 579 insertions(+)
 create mode 100644 scripts/EASTL.natvis
 create mode 100644 scripts/EASTL.py

diff --git a/scripts/EASTL.natvis b/scripts/EASTL.natvis
new file mode 100644
index 00000000..30986d5e
--- /dev/null
+++ b/scripts/EASTL.natvis
@@ -0,0 +1,565 @@
+<?xml version="1.0" encoding="utf-8"?>
+
+<!-- 
+     This is a Microsoft natvis file, which allows visualization of complex variables in the
+     Microsoft debugger starting with VS2012. It's a successor to the AutoExp.dat file format. 
+
+     This file needs to go into your C:\Users\<user>\Documents\Visual Studio 2011\Visualizers\
+     folder. Microsoft documentation states that it should go into a 2012 folder, but testing 
+     in June of 2013 suggests that it still needs to be the 2011 folder.
+
+     You don't need to restart Visual Studio to use it, you just need to restart the debug 
+     session. You can have multiple .natvis files and they will all be used.
+
+	 VS2017 natvis documentation:
+	https://docs.microsoft.com/en-us/visualstudio/debugger/create-custom-views-of-native-objects
+-->
+
+<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">
+
+<Type Name="eastl::unique_ptr&lt;*&gt;">
+	<DisplayString Condition="mPair.mFirst != nullptr">({(void*)mPair.mFirst} = {*mPair.mFirst})</DisplayString>
+	<DisplayString Condition="mPair.mFirst == nullptr">({nullptr})</DisplayString>
+	<Expand>
+		<Item Name="[pointer]">(void*)mPair.mFirst</Item>
+		<Item Name="[value]">*mPair.mFirst</Item>
+	</Expand>
+</Type>
+
+<Type Name="eastl::shared_ptr&lt;*&gt;">
+	<DisplayString Condition="mpValue != nullptr">({(void*)mpValue} = {*mpValue})</DisplayString>
+	<DisplayString Condition="mpValue == nullptr">({nullptr})</DisplayString>
+	<Expand>
+		<Item Name="[pointer]">(void*)mpValue</Item>
+		<Item Name="[value]">*mpValue</Item>
+		<Item Name="[reference count]">mpRefCount->mRefCount</Item>
+		<Item Name="[weak reference count]">mpRefCount->mWeakRefCount</Item>
+	</Expand>
+</Type>
+
+<Type Name="eastl::weak_ptr&lt;*&gt;">
+	<DisplayString>{((mpRefCount &amp;&amp; mpRefCount-&gt;mRefCount) ? mpValue : nullptr)}</DisplayString>
+	<Expand>
+		<ExpandedItem>mpRefCount &amp;&amp; mpRefCount-&gt;mRefCount ? mpValue : nullptr</ExpandedItem>
+	</Expand>
+</Type>
+
+<Type Name="eastl::array&lt;*,*&gt;">
+	<DisplayString Condition="$T2 == 0">[{$T2}] {{}}</DisplayString>
+	<DisplayString Condition="$T2 == 1">[{$T2}] {{ {*mValue} }}</DisplayString>
+	<DisplayString Condition="$T2 == 2">[{$T2}] {{ {*mValue}, {*(mValue+1)} }}</DisplayString>
+	<DisplayString Condition="$T2 == 3">[{$T2}] {{ {*mValue}, {*(mValue+1)}, {*(mValue+2)} }}</DisplayString>
+	<DisplayString Condition="$T2 == 4">[{$T2}] {{ {*mValue}, {*(mValue+1)}, {*(mValue+2)}, {*(mValue+3)} }}</DisplayString>
+	<DisplayString Condition="$T2 == 5">[{$T2}] {{ {*mValue}, {*(mValue+1)}, {*(mValue+2)}, {*(mValue+3)}, {*(mValue+4)} }}</DisplayString>
+	<DisplayString Condition="$T2 == 6">[{$T2}] {{ {*mValue}, {*(mValue+1)}, {*(mValue+2)}, {*(mValue+3)}, {*(mValue+4)}, {*(mValue+5)} }}</DisplayString>
+	<DisplayString Condition="$T2 &gt; 6">[{$T2}] {{ {*mValue}, {*(mValue+1)}, {*(mValue+2)}, {*(mValue+3)}, {*(mValue+4)}, {*(mValue+5)}, ... }}</DisplayString>
+	<Expand>
+        <Item Name="[size]">$T2</Item>
+        <ArrayItems>
+            <Size>$T2</Size>
+            <ValuePointer>mValue</ValuePointer>
+        </ArrayItems>
+    </Expand>
+</Type>
+
+<Type Name="eastl::basic_string&lt;*&gt;">
+	<DisplayString Condition="!!(mPair.mFirst.sso.mRemainingSizeField.mnRemainingSize &amp; kSSOMask)">"{mPair.mFirst.heap.mpBegin,sb}"</DisplayString>
+	<DisplayString Condition="!(mPair.mFirst.sso.mRemainingSizeField.mnRemainingSize &amp; kSSOMask)">"{mPair.mFirst.sso.mData,sb}"</DisplayString>
+	<Expand>
+		<Item Name="[length]"   Condition="!!(mPair.mFirst.sso.mRemainingSizeField.mnRemainingSize &amp; kSSOMask)">mPair.mFirst.heap.mnSize</Item>
+		<Item Name="[capacity]" Condition="!!(mPair.mFirst.sso.mRemainingSizeField.mnRemainingSize &amp; kSSOMask)">(mPair.mFirst.heap.mnCapacity &amp; ~kHeapMask)</Item>
+		<Item Name="[value]"    Condition="!!(mPair.mFirst.sso.mRemainingSizeField.mnRemainingSize &amp; kSSOMask)">mPair.mFirst.heap.mpBegin,sb</Item>
+
+		<Item Name="[length]"   Condition="!(mPair.mFirst.sso.mRemainingSizeField.mnRemainingSize &amp; kSSOMask)">mPair.mFirst.sso.mRemainingSizeField.mnRemainingSize</Item>
+		<Item Name="[capacity]" Condition="!(mPair.mFirst.sso.mRemainingSizeField.mnRemainingSize &amp; kSSOMask)">SSOLayout::SSO_CAPACITY</Item>
+		<Item Name="[value]"    Condition="!(mPair.mFirst.sso.mRemainingSizeField.mnRemainingSize &amp; kSSOMask)">mPair.mFirst.sso.mData,sb</Item>
+
+		<Item Name="[uses heap]">!!(mPair.mFirst.sso.mRemainingSizeField.mnRemainingSize &amp; kSSOMask)</Item>
+	</Expand>
+</Type>
+
+  
+<Type Name="eastl::basic_string&lt;wchar_t,*&gt;">
+	<DisplayString Condition="!!(mPair.mFirst.sso.mRemainingSizeField.mnRemainingSize &amp; kSSOMask)">{mPair.mFirst.heap.mpBegin,su}</DisplayString>
+	<DisplayString Condition="!(mPair.mFirst.sso.mRemainingSizeField.mnRemainingSize &amp; kSSOMask)">{mPair.mFirst.sso.mData,su}</DisplayString>
+	<Expand>
+		<Item Name="[length]"   Condition="!!(mPair.mFirst.sso.mRemainingSizeField.mnRemainingSize &amp; kSSOMask)">mPair.mFirst.heap.mnSize</Item>
+		<Item Name="[capacity]" Condition="!!(mPair.mFirst.sso.mRemainingSizeField.mnRemainingSize &amp; kSSOMask)">(mPair.mFirst.heap.mnCapacity &amp; ~kHeapMask)</Item>
+		<Item Name="[value]"    Condition="!!(mPair.mFirst.sso.mRemainingSizeField.mnRemainingSize &amp; kSSOMask)">mPair.mFirst.heap.mpBegin,su</Item>
+
+		<Item Name="[length]"   Condition="!(mPair.mFirst.sso.mRemainingSizeField.mnRemainingSize &amp; kSSOMask)">mPair.mFirst.sso.mRemainingSizeField.mnRemainingSize</Item>
+		<Item Name="[capacity]" Condition="!(mPair.mFirst.sso.mRemainingSizeField.mnRemainingSize &amp; kSSOMask)">SSOLayout::SSO_CAPACITY</Item>
+		<Item Name="[value]"    Condition="!(mPair.mFirst.sso.mRemainingSizeField.mnRemainingSize &amp; kSSOMask)">mPair.mFirst.sso.mData,su</Item>
+
+		<Item Name="[uses heap]">!!(mPair.mFirst.sso.mRemainingSizeField.mnRemainingSize &amp; kSSOMask)</Item>
+	</Expand>
+</Type>
+
+<Type Name="eastl::pair&lt;*&gt;">
+	<DisplayString>({first}, {second})</DisplayString>
+    <Expand>
+        <Item Name="first">first</Item>
+        <Item Name="second">second</Item>
+    </Expand>
+</Type>
+
+<Type Name="eastl::span&lt;*&gt;">
+	<DisplayString Condition="mnSize == 0">[{mnSize}] {{}}</DisplayString>
+	<DisplayString Condition="mnSize == 1">[{mnSize}] {{ {*mpData} }}</DisplayString>
+	<DisplayString Condition="mnSize == 2">[{mnSize}] {{ {*mpData}, {*(mpData+1)} }}</DisplayString>
+	<DisplayString Condition="mnSize == 3">[{mnSize}] {{ {*mpData}, {*(mpData+1)}, {*(mpData+2)} }}</DisplayString>
+	<DisplayString Condition="mnSize == 4">[{mnSize}] {{ {*mpData}, {*(mpData+1)}, {*(mpData+2)}, {*(mpData+3)} }}</DisplayString>
+	<DisplayString Condition="mnSize == 5">[{mnSize}] {{ {*mpData}, {*(mpData+1)}, {*(mpData+2)}, {*(mpData+3)}, {*(mpData+4)} }}</DisplayString>
+	<DisplayString Condition="mnSize == 6">[{mnSize}] {{ {*mpData}, {*(mpData+1)}, {*(mpData+2)}, {*(mpData+3)}, {*(mpData+4)}, {*(mpData+5)} }}</DisplayString>
+	<DisplayString Condition="mnSize &gt; 6">[{mnSize}] {{ {*mpData}, {*(mpData+1)}, {*(mpData+2)}, {*(mpData+3)}, {*(mpData+4)}, {*(mpData+5)}, ... }}</DisplayString>
+	<Expand>
+		<Item Name="[size]">mnSize</Item>
+		<ArrayItems>
+			<Size>mnSize</Size>
+			<ValuePointer>mpData</ValuePointer>
+		</ArrayItems>
+	</Expand>
+</Type>
+
+<Type Name="eastl::VectorBase&lt;*&gt;">
+	<DisplayString Condition="mpEnd == mpBegin">[{mpEnd - mpBegin}] {{}}</DisplayString>
+	<DisplayString Condition="mpEnd - mpBegin == 1">[{mpEnd - mpBegin}] {{ {*mpBegin} }}</DisplayString>
+	<DisplayString Condition="mpEnd - mpBegin == 2">[{mpEnd - mpBegin}] {{ {*mpBegin}, {*(mpBegin+1)} }}</DisplayString>
+	<DisplayString Condition="mpEnd - mpBegin == 3">[{mpEnd - mpBegin}] {{ {*mpBegin}, {*(mpBegin+1)}, {*(mpBegin+2)} }}</DisplayString>
+	<DisplayString Condition="mpEnd - mpBegin == 4">[{mpEnd - mpBegin}] {{ {*mpBegin}, {*(mpBegin+1)}, {*(mpBegin+2)}, {*(mpBegin+3)} }}</DisplayString>
+	<DisplayString Condition="mpEnd - mpBegin == 5">[{mpEnd - mpBegin}] {{ {*mpBegin}, {*(mpBegin+1)}, {*(mpBegin+2)}, {*(mpBegin+3)}, {*(mpBegin+4)} }}</DisplayString>
+	<DisplayString Condition="mpEnd - mpBegin == 6">[{mpEnd - mpBegin}] {{ {*mpBegin}, {*(mpBegin+1)}, {*(mpBegin+2)}, {*(mpBegin+3)}, {*(mpBegin+4)}, {*(mpBegin+5)} }}</DisplayString>
+	<DisplayString Condition="mpEnd - mpBegin &gt; 6">[{mpEnd - mpBegin}] {{ {*mpBegin}, {*(mpBegin+1)}, {*(mpBegin+2)}, {*(mpBegin+3)}, {*(mpBegin+4)}, {*(mpBegin+5)}, ... }}</DisplayString>
+    <Expand>
+        <Item Name="[size]">mpEnd - mpBegin</Item>
+        <Item Name="[capacity]">mCapacityAllocator.mFirst - mpBegin</Item>
+        <ArrayItems>
+            <Size>mpEnd - mpBegin</Size>
+            <ValuePointer>mpBegin</ValuePointer>
+        </ArrayItems>
+    </Expand>
+</Type>
+
+<Type Name="eastl::DequeBase&lt;*,*,*&gt;">
+	<DisplayString Condition="mItBegin.mpCurrent == mItEnd.mpCurrent">
+		[0] {{}}
+	</DisplayString>
+	<DisplayString Condition="(mItEnd.mpCurrentArrayPtr - mItBegin.mpCurrentArrayPtr) * $T3 + (mItEnd.mpCurrent-mItEnd.mpBegin) - (mItBegin.mpCurrent-mItBegin.mpBegin) == 1">
+		[1] {{ {*mItBegin.mpCurrent} }}
+	</DisplayString>
+	<DisplayString Condition="(mItEnd.mpCurrentArrayPtr - mItBegin.mpCurrentArrayPtr) * $T3 + (mItEnd.mpCurrent-mItEnd.mpBegin) - (mItBegin.mpCurrent-mItBegin.mpBegin) != 0">
+		[{(mItEnd.mpCurrentArrayPtr - mItBegin.mpCurrentArrayPtr) * $T3 + (mItEnd.mpCurrent-mItEnd.mpBegin) - (mItBegin.mpCurrent-mItBegin.mpBegin)}] 
+		{{ 
+			{*mItBegin.mpCurrent}, 
+			... 
+		}}
+	</DisplayString>
+	<Expand>
+		<Item Name="[size]">(mItEnd.mpCurrentArrayPtr - mItBegin.mpCurrentArrayPtr) * $T3 + (mItEnd.mpCurrent-mItEnd.mpBegin) - (mItBegin.mpCurrent-mItBegin.mpBegin)</Item>
+		<IndexListItems>
+            <Size>(mItEnd.mpCurrentArrayPtr - mItBegin.mpCurrentArrayPtr) * $T3 + (mItEnd.mpCurrent-mItEnd.mpBegin) - (mItBegin.mpCurrent-mItBegin.mpBegin)</Size>
+            <ValueNode>mItBegin.mpCurrentArrayPtr[(mItBegin.mpCurrent-mItBegin.mpBegin + $i) / $T3][(mItBegin.mpCurrent-mItBegin.mpBegin + $i) % $T3]</ValueNode>
+        </IndexListItems>
+	</Expand>
+</Type>
+
+<Type Name="eastl::DequeIterator&lt;*&gt;">
+	<DisplayString>{*mpCurrent}</DisplayString>
+	<Expand>
+		<Item Name="Value">*mpCurrent</Item>
+		<Item Name="Previous" Condition="mpCurrent == mpBegin">*(*(mpCurrentArrayPtr-1) + (mpEnd-mpBegin) - 1)</Item>
+		<Item Name="Previous" Condition="mpCurrent != mpBegin">*(mpCurrent-1)</Item>
+		<Item Name="Next" Condition="mpCurrent+1 == mpEnd">**(mpCurrentArrayPtr+1)</Item>
+		<Item Name="Next" Condition="mpCurrent+1 != mpEnd">*(mpCurrent+1)</Item>
+		<Item Name="Begin">mpCurrent == mpBegin</Item>
+		<Item Name="End">mpCurrent+1 == mpEnd</Item>
+	</Expand>
+</Type>
+
+<Type Name="eastl::queue&lt;*&gt;">
+	<AlternativeType Name="eastl::priority_queue&lt;*&gt;" />
+	<AlternativeType Name="eastl::stack&lt;*&gt;" />
+	<DisplayString>{c}</DisplayString>
+	<Expand>
+		<ExpandedItem>c</ExpandedItem>
+	</Expand>
+</Type>
+
+<Type Name="eastl::ListBase&lt;*&gt;">
+	<DisplayString Condition="mNodeAllocator.mFirst.mpNext == &amp;mNodeAllocator.mFirst">
+		[0] {{}}
+	</DisplayString>
+    <DisplayString Condition="mNodeAllocator.mFirst.mpNext != &amp;mNodeAllocator.mFirst &amp;&amp; mNodeAllocator.mFirst.mpNext-&gt;mpNext == &amp;mNodeAllocator.mFirst">
+		[1] {{ {((eastl::ListNode&lt;$T1&gt;*)mNodeAllocator.mFirst.mpNext)-&gt;mValue} }}
+	</DisplayString>
+	<DisplayString Condition="mNodeAllocator.mFirst.mpNext != &amp;mNodeAllocator.mFirst &amp;&amp; mNodeAllocator.mFirst.mpNext-&gt;mpNext != &amp;mNodeAllocator.mFirst &amp;&amp; mNodeAllocator.mFirst.mpNext-&gt;mpNext-&gt;mpNext == &amp;mNodeAllocator.mFirst">
+		[2] 
+		{{ 
+			{((eastl::ListNode&lt;$T1&gt;*)mNodeAllocator.mFirst.mpNext)-&gt;mValue}, 
+			{((eastl::ListNode&lt;$T1&gt;*)mNodeAllocator.mFirst.mpNext-&gt;mpNext)-&gt;mValue} 
+		}}
+	</DisplayString>
+	<DisplayString Condition="mNodeAllocator.mFirst.mpNext != &amp;mNodeAllocator.mFirst &amp;&amp; mNodeAllocator.mFirst.mpNext-&gt;mpNext != &amp;mNodeAllocator.mFirst &amp;&amp; mNodeAllocator.mFirst.mpNext-&gt;mpNext-&gt;mpNext != &amp;mNodeAllocator.mFirst">
+		[?] 
+		{{ 
+			{((eastl::ListNode&lt;$T1&gt;*)mNodeAllocator.mFirst.mpNext)-&gt;mValue}, 
+			{((eastl::ListNode&lt;$T1&gt;*)mNodeAllocator.mFirst.mpNext-&gt;mpNext)-&gt;mValue}, 
+			...
+		}}
+	</DisplayString>
+	<Expand>
+		<Synthetic Name="NOTE!">
+		  <DisplayString>Content of lists will repeat indefinitely. Keep that in mind!</DisplayString> 
+		</Synthetic>  
+        <LinkedListItems>
+            <HeadPointer>mNodeAllocator.mFirst.mpNext</HeadPointer>
+            <NextPointer>mpNext</NextPointer>
+            <ValueNode>((eastl::ListNode&lt;$T1&gt;*)this)-&gt;mValue</ValueNode>
+        </LinkedListItems>
+    </Expand>
+</Type>
+
+<Type Name="eastl::ListNode&lt;*&gt;">
+	<DisplayString>{mValue}</DisplayString>
+	<Expand>
+		<Item Name="Value">mValue</Item>
+		<Item Name="Next">*(eastl::ListNode&lt;$T1&gt;*)mpNext</Item>
+		<Item Name="Previous">*(eastl::ListNode&lt;$T1&gt;*)mpPrev</Item>
+		<Synthetic Name="NOTE!">
+		  <DisplayString>Content of lists will repeat indefinitely. Keep that in mind!</DisplayString> 
+		</Synthetic>  
+		<Synthetic Name="List">
+		  <DisplayString>The rest of the list follows:</DisplayString> 
+		</Synthetic>
+        <LinkedListItems>
+            <HeadPointer>(eastl::ListNode&lt;$T1&gt;*)mpNext-&gt;mpNext</HeadPointer>
+            <NextPointer>(eastl::ListNode&lt;$T1&gt;*)mpNext</NextPointer>
+            <ValueNode>mValue</ValueNode>
+        </LinkedListItems>
+	</Expand>
+</Type>
+
+<Type Name="eastl::ListIterator&lt;*&gt;">
+	<DisplayString>{*mpNode}</DisplayString>
+	<Expand>
+		<ExpandedItem>mpNode</ExpandedItem>
+	</Expand>
+</Type>
+
+<Type Name="eastl::SListBase&lt;*&gt;">
+	<DisplayString Condition="mNode.mpNext == 0">
+		[0] {{}}
+	</DisplayString>
+    <DisplayString Condition="mNode.mpNext != 0 &amp;&amp; mNode.mpNext-&gt;mpNext == 0">
+		[1] 
+		{{ 
+			{((eastl::SListNode&lt;$T1&gt;*)mNode.mpNext)-&gt;mValue} 
+		}}
+	</DisplayString>
+	<DisplayString Condition="mNode.mpNext != 0 &amp;&amp; mNode.mpNext-&gt;mpNext != 0 &amp;&amp; mNode.mpNext-&gt;mpNext-&gt;mpNext == 0">
+		[2] 
+		{{ 
+			{((eastl::SListNode&lt;$T1&gt;*)mNode.mpNext)-&gt;mValue}, 
+			{((eastl::SListNode&lt;$T1&gt;*)mNode.mpNext-&gt;mpNext)-&gt;mValue} 
+		}}
+	</DisplayString>
+	<DisplayString Condition="mNode.mpNext != 0 &amp;&amp; mNode.mpNext-&gt;mpNext != 0 &amp;&amp; mNode.mpNext-&gt;mpNext-&gt;mpNext != 0">
+		[?] 
+		{{ 
+			{((eastl::SListNode&lt;$T1&gt;*)mNode.mpNext)-&gt;mValue}, 
+			{((eastl::SListNode&lt;$T1&gt;*)mNode.mpNext-&gt;mpNext)-&gt;mValue},
+			...
+		}}
+	</DisplayString>
+	<Expand>
+        <LinkedListItems>
+            <HeadPointer>mNode.mpNext</HeadPointer>
+            <NextPointer>mpNext</NextPointer>
+            <ValueNode>((eastl::SListNode&lt;$T1&gt;*)this)-&gt;mValue</ValueNode>
+        </LinkedListItems>
+    </Expand>
+</Type>
+
+<Type Name="eastl::SListNode&lt;*&gt;">
+	<DisplayString>{mValue}</DisplayString>
+	<Expand>
+		<Item Name="Value">mValue</Item>
+		<Item Name="Next">*(eastl::SListNode&lt;$T1&gt;*)mpNext</Item>
+		<Synthetic Name="List">
+		  <DisplayString>The rest of the list follows:</DisplayString> 
+		</Synthetic>
+		<LinkedListItems>
+            <HeadPointer>mpNext == nullptr ? nullptr : (eastl::SListNode&lt;$T1&gt;*)mpNext-&gt;mpNext</HeadPointer>
+            <NextPointer>(eastl::SListNode&lt;$T1&gt;*)mpNext</NextPointer>
+            <ValueNode>mValue</ValueNode>
+        </LinkedListItems>
+	</Expand>
+</Type>
+
+<Type Name="eastl::SListIterator&lt;*&gt;">
+	<DisplayString>{*mpNode}</DisplayString>
+	<Expand>
+		<ExpandedItem>*mpNode</ExpandedItem>
+	</Expand>
+</Type>
+
+<Type Name="eastl::intrusive_list_base">
+	<DisplayString Condition="mAnchor.mpNext == &amp;mAnchor">[0] {{}}</DisplayString>
+    <DisplayString Condition="mAnchor.mpNext != &amp;mAnchor &amp;&amp; mAnchor.mpNext-&gt;mpNext == &amp;mAnchor">[1] {{ {mAnchor.mpNext} }}</DisplayString>
+	<DisplayString Condition="mAnchor.mpNext != &amp;mAnchor &amp;&amp; mAnchor.mpNext-&gt;mpNext != &amp;mAnchor">[?] {{ {mAnchor.mpNext}, ... }}</DisplayString>
+	<Expand>
+		<Synthetic Name="NOTE!">
+		  <DisplayString>Content of intrusive lists will repeat indefinitely. Keep that in mind!</DisplayString> 
+		</Synthetic>  
+        <LinkedListItems>
+            <HeadPointer>mAnchor.mpNext</HeadPointer>
+            <NextPointer>mpNext</NextPointer>
+            <ValueNode>*this</ValueNode>
+        </LinkedListItems>
+    </Expand>
+</Type>
+
+<Type Name="eastl::intrusive_list_iterator&lt;*&gt;">
+	<DisplayString>{*mpNode}</DisplayString>
+	<Expand>
+		<ExpandedItem>*mpNode</ExpandedItem>
+	</Expand>
+</Type>
+
+<Type Name="eastl::set&lt;*&gt;">
+	<AlternativeType Name="eastl::multiset&lt;*&gt;" />
+    <DisplayString Condition="mnSize == 0">
+		[0] {{}}
+	</DisplayString>
+	<DisplayString Condition="mnSize == 1">
+		[1] 
+		{{ 
+			{((eastl::rbtree_node&lt;$T1&gt;*)mAnchor.mpNodeLeft)-&gt;mValue} 
+		}}
+	</DisplayString>
+	<DisplayString Condition="mnSize &gt; 1">
+		[{mnSize}] 
+		{{ 
+			{((eastl::rbtree_node&lt;$T1&gt;*)mAnchor.mpNodeLeft)-&gt;mValue}, 
+			...
+		}}
+	</DisplayString>
+    <Expand>
+		<Item Name="[size]">mnSize</Item>
+        <TreeItems>
+            <Size>mnSize</Size>
+            <HeadPointer>mAnchor.mpNodeParent</HeadPointer>
+            <LeftPointer>mpNodeLeft</LeftPointer>
+            <RightPointer>mpNodeRight</RightPointer>
+            <ValueNode>((eastl::rbtree_node&lt;$T1&gt;*)this)-&gt;mValue</ValueNode>
+        </TreeItems>
+    </Expand>
+</Type>
+
+<Type Name="eastl::rbtree&lt;*,*&gt;">
+	<DisplayString Condition="mnSize == 0">
+		[0] {{}}
+	</DisplayString>
+	<DisplayString Condition="mnSize == 1">
+		[1] 
+		{{ 
+			{((eastl::rbtree_node&lt;$T2&gt;*)mAnchor.mpNodeLeft)-&gt;mValue} 
+		}}
+	</DisplayString>
+	<DisplayString Condition="mnSize &gt; 1">
+		[{mnSize}] 
+		{{ 
+			{((eastl::rbtree_node&lt;$T2&gt;*)mAnchor.mpNodeLeft)-&gt;mValue}, 
+			...
+		}}
+	</DisplayString>
+    <Expand>
+		<Item Name="[size]">mnSize</Item>
+        <TreeItems>
+            <Size>mnSize</Size>
+            <HeadPointer>mAnchor.mpNodeParent</HeadPointer>
+            <LeftPointer>mpNodeLeft</LeftPointer>
+            <RightPointer>mpNodeRight</RightPointer>
+            <ValueNode>((eastl::rbtree_node&lt;$T2&gt;*)this)-&gt;mValue</ValueNode>
+        </TreeItems>
+    </Expand>
+</Type>
+
+<Type Name="eastl::rbtree_node&lt;*&gt;">
+	<DisplayString>{mValue}</DisplayString>
+	<Expand>
+		<Item Name="Value">mValue</Item>
+		<Synthetic Name="NOTE!">
+		  <DisplayString>It is possible to expand parents that do not exist.</DisplayString> 
+		</Synthetic>
+		<Item Name="Parent">*(eastl::rbtree_node&lt;$T2&gt;*)(mpNodeParent.value &amp; (~uintptr_t(1)))</Item>
+		<Item Name="Left">*(eastl::rbtree_node&lt;$T2&gt;*)mpNodeLeft</Item>
+		<Item Name="Right">*(eastl::rbtree_node&lt;$T2&gt;*)mpNodeRight</Item>
+	</Expand>
+</Type>
+
+<Type Name="eastl::rbtree_iterator&lt;*&gt;">
+	<DisplayString>{*mpNode}</DisplayString>
+	<Expand>
+		<ExpandedItem>mpNode</ExpandedItem>
+	</Expand>
+</Type>
+
+
+<Type Name="eastl::hashtable&lt;*&gt;">
+    <DisplayString Condition="mnElementCount == 0">[{mnElementCount}] {{}}</DisplayString>
+	<DisplayString Condition="mnElementCount != 0">[{mnElementCount}] {{ ... }}</DisplayString>
+	<Expand>
+		<ArrayItems>
+            <Size>mnBucketCount</Size>
+            <ValuePointer>mpBucketArray</ValuePointer>
+        </ArrayItems>
+	</Expand>
+</Type>
+
+<Type Name="eastl::hash_node&lt;*&gt;">
+	<DisplayString Condition="this != 0 &amp;&amp; mpNext != 0"> {mValue}, {*mpNext}</DisplayString>
+	<DisplayString Condition="this != 0 &amp;&amp; mpNext == 0"> {mValue}</DisplayString>
+	<DisplayString Condition="this == 0"></DisplayString>
+	<Expand>
+		<LinkedListItems>
+            <HeadPointer>this</HeadPointer>
+            <NextPointer>mpNext</NextPointer>
+            <ValueNode>mValue</ValueNode>
+        </LinkedListItems>
+	</Expand>
+</Type>
+
+<Type Name="eastl::hashtable_iterator_base&lt;*&gt;">
+	<DisplayString>{mpNode-&gt;mValue}</DisplayString>
+	<Expand>
+		<ExpandedItem>mpNode-&gt;mValue</ExpandedItem>
+	</Expand>
+</Type>
+
+<Type Name="eastl::reverse_iterator&lt;*&gt;">
+	<DisplayString>{*(mIterator-1)}</DisplayString>
+	<Expand>
+		<ExpandedItem>mIterator-1</ExpandedItem>
+	</Expand>
+</Type>
+
+<Type Name="eastl::bitset&lt;*&gt;">
+  <DisplayString>{{count = {kSize}}}</DisplayString>
+  <Expand>
+    <Item Name="[count]">kSize</Item>
+    <CustomListItems>
+      <Variable Name="iWord" InitialValue="0" />
+      <Variable Name="iBitInWord" InitialValue="0" />
+      <Variable Name="bBitValue" InitialValue="false" />
+
+      <Size>kSize</Size>
+
+      <Loop>
+        <Exec>bBitValue = ((mWord[iWord] >> iBitInWord) % 2) != 0 ? true : false</Exec>
+        <Item>bBitValue</Item>
+        <Exec>iBitInWord++</Exec>
+        <If Condition="iBitInWord == kBitsPerWord">
+          <Exec>iWord++</Exec>
+          <Exec>iBitInWord = 0</Exec>
+        </If>
+      </Loop>
+    </CustomListItems>
+  </Expand>
+</Type>
+
+<Type Name="eastl::ring_buffer&lt;*,*,*&gt;">
+  <DisplayString>{c}</DisplayString>
+  <Expand>
+    <ExpandedItem>c</ExpandedItem>
+  </Expand>
+</Type>
+
+<Type Name="eastl::basic_string_view&lt;*&gt;">
+	<DisplayString>{mpBegin,[mnCount]}</DisplayString>
+	<StringView>mpBegin,[mnCount]</StringView>
+</Type>
+
+<Type Name="eastl::compressed_pair_imp&lt;*&gt;">
+	<DisplayString Condition="($T3) == 0" Optional="true">({mFirst}, {mSecond})</DisplayString>
+	<DisplayString Condition="($T3) == 1" Optional="true">({mSecond})</DisplayString>
+	<DisplayString Condition="($T3) == 2" Optional="true">({mFirst})</DisplayString>
+	<DisplayString Condition="($T3) == 3" Optional="true">(empty)</DisplayString>
+	<DisplayString Condition="($T3) == 4" Optional="true">(empty)</DisplayString>
+	<DisplayString Condition="($T3) == 5" Optional="true">({mFirst}, {mSecond})</DisplayString>
+</Type>
+
+<Type Name="eastl::optional&lt;*&gt;">
+	<Intrinsic Name="value" Expression="*($T1*)&amp;val"/>
+	<DisplayString Condition="!engaged">nullopt</DisplayString>
+	<DisplayString Condition="engaged">{value()}</DisplayString>
+	<Expand>
+		<Item Condition="engaged" Name="value">value()</Item>
+	</Expand>
+</Type>
+
+<Type Name="eastl::ratio&lt;*&gt;">
+	<DisplayString>{$T1} to {$T2}}</DisplayString>
+</Type>
+
+
+<Type Name="eastl::chrono::duration&lt;*,eastl::ratio&lt;1,1000000000&gt; &gt;">
+	<DisplayString>{mRep} nanoseconds</DisplayString>
+</Type>
+
+<Type Name="eastl::chrono::duration&lt;*,eastl::ratio&lt;1,1000000&gt; &gt;">
+	<DisplayString>{mRep} microseconds</DisplayString>
+</Type>
+
+<Type Name="eastl::chrono::duration&lt;*,eastl::ratio&lt;1,1000&gt; &gt;">
+	<DisplayString>{mRep} milliseconds</DisplayString>
+</Type>
+
+<Type Name="eastl::chrono::duration&lt;*,eastl::ratio&lt;1,1&gt; &gt;">
+	<DisplayString>{mRep} seconds</DisplayString>
+</Type>
+
+<Type Name="eastl::chrono::duration&lt;*,eastl::ratio&lt;60,1&gt; &gt;">
+	<DisplayString>{mRep} minutes</DisplayString>
+</Type>
+
+<Type Name="eastl::chrono::duration&lt;*,eastl::ratio&lt;3600,1&gt; &gt;">
+	<DisplayString>{mRep} hours</DisplayString>
+</Type>
+
+<Type Name="eastl::chrono::duration&lt;*,eastl::ratio&lt;*,*&gt; &gt;">
+	<DisplayString>{mRep} duration with ratio = [{$T2} : {$T3}] </DisplayString>
+</Type>
+
+
+
+<Type Name="eastl::function&lt;*&gt;">
+	<DisplayString Condition="mInvokeFuncPtr == nullptr">empty</DisplayString>
+	<DisplayString>{mInvokeFuncPtr}</DisplayString>
+</Type>
+
+
+<Type Name="eastl::reference_wrapper&lt;*&gt;">
+	<DisplayString>{*val}</DisplayString>
+</Type>
+
+<!--
+	This implementation isn't ideal because it can't switch between showing inline value vs values stored in a heap allocation.
+	We are hitting the limit of natvis scripting that we can't workaround unless we change the implementation of eastl::any.
+-->
+<Type Name="eastl::any">
+	<DisplayString Condition="m_handler == nullptr">empty</DisplayString>
+	<DisplayString Condition="m_handler != nullptr">{m_storage.external_storage}</DisplayString>
+</Type>
+
+
+
+<Type Name="eastl::atomic_flag">
+	<DisplayString>{mFlag.mAtomic}</DisplayString>
+</Type>
+
+
+<!-- TODO eastl::tuple -->
+
+
+</AutoVisualizer>
diff --git a/scripts/EASTL.py b/scripts/EASTL.py
new file mode 100644
index 00000000..9655b639
--- /dev/null
+++ b/scripts/EASTL.py
@@ -0,0 +1,14 @@
+// manual conversion of EASTL.natvis
+
+// see here for string, table, sequences for Nim language
+// https://www.reddit.com/r/nim/comments/lhaaa6/debugging_support_formatters_for_lldb_in_vscode/
+
+// https://pspdfkit.com/blog/2018/how-to-extend-lldb-to-provide-a-better-debugging-experience/debugging_support_formatters_for_lldb_in_vscode
+// TODO: size is template param
+
+
+def __lldb_init_module(debugger, internal_dict):
+	print('installing eastl formatters to lldb')
+
+	debugger.HandleCommand('type summary add --summary-string "${var.mValue}" east::array<*>')
+	debugger.HandleCommand('type summary add --summary-string "${var.mpBegin} size=${var.mpEnd}-$(var.mpBegin}" east::VectorBase<*>')

From e16936b99e36f3700533f386a24189620b3ec285 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecazam@users.noreply.github.com>
Date: Mon, 14 Mar 2022 09:59:20 -0700
Subject: [PATCH 265/901] Update README.md

---
 README.md | 39 ++++++++++++++++++---------------------
 1 file changed, 18 insertions(+), 21 deletions(-)

diff --git a/README.md b/README.md
index c512f771..41ab3258 100644
--- a/README.md
+++ b/README.md
@@ -43,8 +43,8 @@ Texture Types - 1darray (no mips), 2d, 2darray, 3d (no mips), cube, cube array
 
 ? - show keyboard shortcuts
 P - toggle preview, disables debug mode, shows lit normals, and mips and filtering are enabled
-G - advance through none, pixel grid, block grid, atlas grid (32, 64, 128, 256), must be zoomed-in to see pixel grid
-D - advance debug mode, this is texture content specific 
+G - advance grid, none, pixel grid, block grid, atlas grid (32, 64, 128, 256),
+D - advance debug mode
 H - toggle hud
 U - toggle ui
 V - toggle vertical vs. horizontal buttons
@@ -65,10 +65,10 @@ M - advance mip
 
 S - advance shape mesh (plane, unit box, sphere, capsule), displays list, esc to get out of list
 C - advance shape channel (depth, uv, face normal, vtx normal, tangent, bitangent, mip)
-L - advance lighting mode (lighting used in preview)
-T - advance tangent generation
+L - advance lighting mode (none, diffuse, diffuse + specular)
+T - toggle tangent generation
 
-N - advance bundle/folder image (can traverse zip of ktx/ktx2 files), displays list, esc to get out of list
+↓ - advance bundle/folder image (can traverse zip of ktx/ktx2 files), displays list, esc to get out of list
 
 ```
 
@@ -88,12 +88,12 @@ Mip filtering - 2x2 box filter that's reasonable for pow2, and a non-linear filt
     
 BC/ETC2/ASTC - supposedly WebGL requires pow2, and some implementation need top multiple of 4 for BC/ETC2
 
-BC1 - artifacts from limits of format, artifacts from encoder, use BC7 w/2x memory
-BC1 w/alpha - blocked
-BC2 - blocked
-BC6H - unsupported, no decode either, need to pull BCH encode/decode and pass 8u/16f/32f data
+These formats are disabled:
+BC1 w/alpha - may re-enable 3 color for black + rgb
+BC2 - not useful
+ETC2_RGB8A1 - broken in ETC2 optimizations
 
-ETC2_RGB8A1 - disabled, broken in ETC2 optimizations
+BC1 - artifacts from limits of format, artifacts from encoder, use BC7 w/2x memory
 
 ASTC LDR - rrr1, rrrg/gggr, rgb1, rgba must be followed to avoid endpoint storage, requires swizzles
 ASTC HDR - encoder uses 8-bit source image, need 16f/32f passed to encoder, no hw L+A mode
@@ -112,9 +112,9 @@ KTX - only uncompressed, mip levels are unaligned to block size from 4 byte leng
 
 KTX2 - works in kram and viewer, has aligned levels of mips when uncompressed, 
   libkram supports None/Zlib/Zstd supercompression for read/write
-  libkram does not support UASTC or BasisLZ yet
+  libkram does not support UASTC/BasisLZ yet
   
-DDS - works in kram and viewer, no mip compression, only BC and explicit formats
+DDS - works in kram and viewer, no mip compression, only BC and explicit formats, extended for ASTC/ETC
   kram/kramv only support newer DX10 style DDS format.  Can view in Preview on macOS too.
   DDSHelper provides load/save.  Pixel data ordered by chunk instead of by mips.
   
@@ -147,9 +147,7 @@ At runtime:
 ```
 
 ### Building
-kram has switched from CMake to an explicit Xcode workspace and projects on Apple platforms.  CMake can't clean, build workspaces, or handle app extensions needed for thumbnails/previews.  I spent a lot of time trying to keep this system working since it keeps kram from being tied to Xcode releases, but I also wanted to added better Finder integration and debugging.  These all live in 'build2' to distinguish from the 'build' directory created for CMake.  Like CMake, the cibuild.h script runs xcodebuild from the command line to generate all the libraries and apps into the bin directory.  Note that Xcode has never been able to simultaneously open the same project included in different workspaces, so organize derivative workspaces carefullly.
-
-I also tried to use a CMake framework build, but this required changing all the include paths to one parent directory.  Since libkram includes eastl, and other includes this requirement was not ideal.  So for now using libkram involves setting header and library search paths.  See the workspace projects for how this is setup.
+kram uses an explicit Xcode workspace and projects on Apple platforms.  CMake can't clean, build workspaces, or handle app extensions needed for thumbnails/previews.  I spent a lot of time trying to keep CMake working since it keeps kram from being tied to Xcode releases, but I also wanted to add better Finder integration.  These all live in 'build2' to distinguish from the 'build' directory created for CMake.  Like CMake, the cibuild.h script runs xcodebuild from the command line to generate all the libraries and apps into the bin directory.  Note that Xcode has never been able to simultaneously open the same project included in different workspaces, so organize derivative workspaces carefully.
 
 ```
 ./scripts/cibuild.h
@@ -274,7 +272,8 @@ kram includes the following encoders/decoders:
 | Astcenc  | Arm              | Apache 2.0  | ASTC4x4,5x5,6x6,8x8 LDR/HDR | same    |
 | Etc2comp | Google           | MIT         | ETC2r11,rg11,rgb,rgba       | same    |
 | Explicit | Me               | MIT         | r/rg/rgba 8u/16f/32f        | none    |
-
+| Compress | AMD              | MIT         | BC6                         | same    |
+| GTLFKit  | Warren Moore     | MIT         | gltf                        | same    |
 
 ```
 ATE
@@ -289,9 +288,9 @@ Squish
 Simplified to single folder.
 Replaced sse vector with float4/a for ARM/Neon support.
 
-Astcenc v2.5 (current is v3.0)
+Astcenc v3.4
 Provide rgba8u source pixels.  Converted to 32f at tile level.
-Improved 1 and 2 channel format encoding (not transfered to v2.1).
+Improved 1 and 2 channel format encoding (disable now).
 Avoid reading off end of arrays with padding.
 Support 2d array of src pixels instead of 3d.
 Force AVX and SSE path, and implement using sse2neon emlation on Neon.
@@ -344,11 +343,9 @@ kram includes additional open-source:
 * Motion vector direction analysis.
 * Split view comparison rendering.  Move horizontal slider like ShaderToy.
 * Add GPU encoder (use compute in Metal/Vulkan)
-* Add BC6H encoder
 * Save prop with args and compare args and modstamp before rebuilding to avoid --force
 * Multichannel SDF
-* Plumb half4/float4 through to ASTC HDR encoding.  Sending 8u.
-* Test Neon support and SSE2Neon
+* Plumb half4/float4 through to BC6 encoding.  Sending 8u.
 * Run srgb conversion on endpoint data after fitting linear color point cloud
 * PSNR stats off encode + decode
 * Dump stats on BC6/7 block types, and ASTC void extent, dual-plane, etc

From 477a28b01b1047dafdebf0a521f970b708f7959a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 14 Mar 2022 17:35:43 -0700
Subject: [PATCH 266/901] kramv - add png thumbnail/preview support

Apple Finder thumbnails/previews for PNG are bad, they have a white background which makes looking at alpha, grayscale, and premul textures terrible.
---
 kram-preview/Info.plist                   | 1 +
 kram-preview/KramPreviewViewController.mm | 4 ++--
 kram-thumb/Info.plist                     | 1 +
 kram-thumb/KramThumbnailProvider.mm       | 5 +++--
 4 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/kram-preview/Info.plist b/kram-preview/Info.plist
index 7daa5edc..e4fc6b3a 100644
--- a/kram-preview/Info.plist
+++ b/kram-preview/Info.plist
@@ -31,6 +31,7 @@
 				<string>org.khronos.ktx</string>
 				<string>public.ktx2</string>
 				<string>image/dds</string>
+				<string>public.png</string>
 			</array>
 			<key>QLSupportsSearchableItems</key>
 			<false/>
diff --git a/kram-preview/KramPreviewViewController.mm b/kram-preview/KramPreviewViewController.mm
index 42cbd0e1..48a61dd7 100644
--- a/kram-preview/KramPreviewViewController.mm
+++ b/kram-preview/KramPreviewViewController.mm
@@ -112,13 +112,13 @@ - (void)preparePreviewOfFileAtURL:(NSURL *)url completionHandler:(void (^)(NSErr
     uint32_t maxWidth = _imageView.frame.size.width;
     uint32_t maxHeight = _imageView.frame.size.height;
     
-    
     bool isKTX = isKTXFilename(filename);
     bool isKTX2 = isKTX2Filename(filename);
     bool isDDS = isDDSFilename(filename);
+    bool isPNG = isDDSFilename(filename);
     
     // ignore upper case extensions
-    if (!(isKTX || isKTX2 || isDDS)) {
+    if (!(isKTX || isKTX2 || isDDS || isPNG)) {
         error = KLOGF(1, "kramv %s only supports ktx, ktx2, dds files\n", filename);
         handler(error);
         return;
diff --git a/kram-thumb/Info.plist b/kram-thumb/Info.plist
index 61778c92..9d6f5ca4 100644
--- a/kram-thumb/Info.plist
+++ b/kram-thumb/Info.plist
@@ -31,6 +31,7 @@
 				<string>org.khronos.ktx</string>
 				<string>public.ktx2</string>
 				<string>image/dds</string>
+				<string>public.png</string>
 			</array>
 			<key>QLThumbnailMinimumDimension</key>
 			<integer>0</integer>
diff --git a/kram-thumb/KramThumbnailProvider.mm b/kram-thumb/KramThumbnailProvider.mm
index 1b37d8e1..551a1453 100644
--- a/kram-thumb/KramThumbnailProvider.mm
+++ b/kram-thumb/KramThumbnailProvider.mm
@@ -54,9 +54,10 @@ - (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest *)request complet
     bool isKTX = isKTXFilename(filename);
     bool isKTX2 = isKTX2Filename(filename);
     bool isDDS = isDDSFilename(filename);
+    bool isPNG = isDDSFilename(filename);
     
-    if (!(isKTX || isKTX2 || isDDS)) {
-        error = KLOGF(1, "kramv %s only supports ktx,ktx2,dds files\n", filename);
+    if (!(isKTX || isKTX2 || isDDS || isPNG)) {
+        error = KLOGF(1, "kramv %s only supports ktx,ktx2,dds,png files\n", filename);
         handler(nil, error);
         return;
     }

From 887aa6f8c982102cedeb8071074ba76da0636f2d Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 14 Mar 2022 18:24:38 -0700
Subject: [PATCH 267/901] kramv - fix png thumb/preview

---
 kram-preview/KramPreviewViewController.mm | 2 +-
 kram-thumb/KramThumbnailProvider.mm       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/kram-preview/KramPreviewViewController.mm b/kram-preview/KramPreviewViewController.mm
index 48a61dd7..93d937c0 100644
--- a/kram-preview/KramPreviewViewController.mm
+++ b/kram-preview/KramPreviewViewController.mm
@@ -115,7 +115,7 @@ - (void)preparePreviewOfFileAtURL:(NSURL *)url completionHandler:(void (^)(NSErr
     bool isKTX = isKTXFilename(filename);
     bool isKTX2 = isKTX2Filename(filename);
     bool isDDS = isDDSFilename(filename);
-    bool isPNG = isDDSFilename(filename);
+    bool isPNG = isPNGFilename(filename);
     
     // ignore upper case extensions
     if (!(isKTX || isKTX2 || isDDS || isPNG)) {
diff --git a/kram-thumb/KramThumbnailProvider.mm b/kram-thumb/KramThumbnailProvider.mm
index 551a1453..06d030f7 100644
--- a/kram-thumb/KramThumbnailProvider.mm
+++ b/kram-thumb/KramThumbnailProvider.mm
@@ -54,7 +54,7 @@ - (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest *)request complet
     bool isKTX = isKTXFilename(filename);
     bool isKTX2 = isKTX2Filename(filename);
     bool isDDS = isDDSFilename(filename);
-    bool isPNG = isDDSFilename(filename);
+    bool isPNG = isPNGFilename(filename);
     
     if (!(isKTX || isKTX2 || isDDS || isPNG)) {
         error = KLOGF(1, "kramv %s only supports ktx,ktx2,dds,png files\n", filename);

From 55ec29dd7b3b80f34be8ba980567d2f4c8f24833 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 15 Mar 2022 09:54:23 -0700
Subject: [PATCH 268/901] kram - consolidate png fixup, and add call for
 TexContentType

This code was replicated and diverging.  Was trying to avoid a single content type, since input/output textures can pack channels.   But do often have a single input/output texture type.  Can add other conventions to these functions.
---
 kram-preview/KramPreviewViewController.mm |   7 +-
 kram-thumb/KramThumbnailProvider.mm       |   7 +-
 kramv/KramLoader.mm                       | 282 +---------------------
 kramv/KramRenderer.mm                     |  53 +---
 kramv/KramViewerBase.cpp                  |   3 +
 kramv/KramViewerBase.h                    |   7 +-
 kramv/KramViewerMain.mm                   |  59 +----
 libkram/kram/Kram.cpp                     |  68 +++++-
 libkram/kram/Kram.h                       |  20 ++
 9 files changed, 121 insertions(+), 385 deletions(-)

diff --git a/kram-preview/KramPreviewViewController.mm b/kram-preview/KramPreviewViewController.mm
index 93d937c0..b2eaf928 100644
--- a/kram-preview/KramPreviewViewController.mm
+++ b/kram-preview/KramPreviewViewController.mm
@@ -112,13 +112,8 @@ - (void)preparePreviewOfFileAtURL:(NSURL *)url completionHandler:(void (^)(NSErr
     uint32_t maxWidth = _imageView.frame.size.width;
     uint32_t maxHeight = _imageView.frame.size.height;
     
-    bool isKTX = isKTXFilename(filename);
-    bool isKTX2 = isKTX2Filename(filename);
-    bool isDDS = isDDSFilename(filename);
-    bool isPNG = isPNGFilename(filename);
-    
     // ignore upper case extensions
-    if (!(isKTX || isKTX2 || isDDS || isPNG)) {
+    if (!isSupportedFilename(filename)) {
         error = KLOGF(1, "kramv %s only supports ktx, ktx2, dds files\n", filename);
         handler(error);
         return;
diff --git a/kram-thumb/KramThumbnailProvider.mm b/kram-thumb/KramThumbnailProvider.mm
index 06d030f7..33b35cf4 100644
--- a/kram-thumb/KramThumbnailProvider.mm
+++ b/kram-thumb/KramThumbnailProvider.mm
@@ -51,12 +51,7 @@ - (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest *)request complet
     string errorText;
     
     // TODO: use first x-many bytes also to validate, open will do that
-    bool isKTX = isKTXFilename(filename);
-    bool isKTX2 = isKTX2Filename(filename);
-    bool isDDS = isDDSFilename(filename);
-    bool isPNG = isPNGFilename(filename);
-    
-    if (!(isKTX || isKTX2 || isDDS || isPNG)) {
+    if (!isSupportedFilename(filename)) {
         error = KLOGF(1, "kramv %s only supports ktx,ktx2,dds,png files\n", filename);
         handler(nil, error);
         return;
diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index fc344395..854e426e 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -6,16 +6,9 @@
 
 #import <TargetConditionals.h>
 
-//#include <vector>
-//#include <algorithm> // for max
 #include <mutex>
 
 #include "KramLib.h"
-//#include "KramLog.h"
-//#include "KramImage.h"
-//#include "KramFileHelper.h"
-//#include "KramMmapHelper.h"
-//#include "KTXImage.h"
 
 using namespace kram;
 using namespace NAMESPACE_STL;
@@ -130,6 +123,7 @@ bool decodeImage(const KTXImage &image, KTXImage &imageDecoded)
 
 #if SUPPORT_RGB
 
+// TODO: move these into libkram
 inline bool isInternalRGBFormat(MyMTLPixelFormat format)
 {
     bool isInternal = false;
@@ -283,128 +277,13 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format)
     }
 }
 
-/*
-
-static uint32_t numberOfMipmapLevels(const Image& image) {
-    uint32_t w = image.width();
-    uint32_t h = image.height();
-    uint32_t maxDim = MAX(w,h);
-
-    uint32_t numberOfMips = 1;
-    while (maxDim > 1) {
-        numberOfMips++;
-        maxDim = maxDim >> 1;
-    }
-    return numberOfMips;
-}
-
-- (nullable id<MTLTexture>)_loadTextureFromPNGData:(const uint8_t*)data
-dataSize:(int32_t)dataSize isSRGB:(BOOL)isSRGB originalFormat:(nullable
-MTLPixelFormat*)originalFormat
-{
-    // can only load 8u and 16u from png, no hdr formats, no premul either, no
-props
-    // this also doesn't handle strips like done in libkram.
-
-   // Image sourceImage;
-    bool isLoaded = LoadPng(data, dataSize, false, false, image);
-    if (!isLoaded) {
-        return nil;
-    }
-
-
-    // TODO: replace this with code that gens a KTXImage from png (and cpu mips)
-    // instead of needing to use autogenmip that has it's own filters (probably
-a box)
-
-    id<MTLTexture> texture = [self createTexture:image isPrivate:true];
-    if (!texture) {
-        return nil;
-    }
-
-    if (originalFormat != nullptr) {
-        *originalFormat = (MTLPixelFormat)image.pixelFormat;
-    }
-
-    // this means KTXImage must hold data
-    [self blitTextureFromImage:image];
-
-
-    // cpu copy the bytes from the data object into the texture
-    const MTLRegion region = {
-        { 0, 0, 0 }, // MTLOrigin
-        { static_cast<NSUInteger>(image.width),
-static_cast<NSUInteger>(image.height), 1 }  // MTLSize
-    };
-
-    size_t bytesPerRow = 4 * sourceImage.width();
-
-    [texture replaceRegion:region
-                mipmapLevel:0
-                  withBytes:sourceImage.pixels().data()
-                bytesPerRow:bytesPerRow];
-
-
-    // have to schedule autogen inside render using MTLBlitEncoder
-    if (image.mipCount() > 1) {
-        [_mipgenTextures addObject: texture];
-    }
-
-    return texture;
-}
-*/
-
 - (BOOL)loadImageFromURL:(nonnull NSURL *)url
                    image:(KTXImage &)image
                imageData:(KTXImageData &)imageData
 {
     const char *path = url.absoluteURL.path.UTF8String;
-
-    // TODO: could also ignore extension, and look at header/signature instead
-    // files can be renamed to the incorrect extensions
-    string filename = toLower(path);
-
-    if (isPNGFilename(filename)) {
-        // set title to filename, chop this to just file+ext, not directory
-        string filenameShort = filename;
-        const char *filenameSlash = strrchr(filenameShort.c_str(), '/');
-        if (filenameSlash != nullptr) {
-            filenameShort = filenameSlash + 1;
-        }
-
-        // now chop off the extension
-        filenameShort = filenameShort.substr(0, filenameShort.find_last_of("."));
-
-        // dealing with png means fabricating the format, texture type, and other
-        // data
-        bool isNormal = false;
-        bool isSDF = false;
-        if (endsWith(filenameShort, "-n") || endsWith(filenameShort, "_normal")) {
-            isNormal = true;
-        }
-        else if (endsWith(filenameShort, "-sdf")) {
-            isSDF = true;
-        }
-
-        
-        if (!imageData.open(path, image)) {
-            return NO;
-        }
-
-        // have to adjust the format if srgb
-        // PS2022 finally added sRGB gama/iccp blocks to "Save As",
-        // but there are a lot of older files where this is not set
-        // or Figma always sets sRGB.  So set based on identified type.
-        bool doReplaceSrgbFromType = true;
-        if (doReplaceSrgbFromType) {
-            bool isSRGB = (!isNormal && !isSDF);
-            image.pixelFormat = isSRGB ?  MyMTLPixelFormatRGBA8Unorm_sRGB : MyMTLPixelFormatRGBA8Unorm;
-        }
-    }
-    else {
-        if (!imageData.open(path, image)) {
-            return NO;
-        }
+    if (!imageData.open(path, image)) {
+        return NO;
     }
 
     return YES;
@@ -464,161 +343,6 @@ - (BOOL)loadImageFromURL:(nonnull NSURL *)url
     return texture;
 }
 
-/* just for reference now
-
-// Has a synchronous upload via replaceRegion that only works for shared/managed
-(f.e. ktx),
-// and another path for private that uses a blitEncoder and must have block
-aligned data (f.e. ktxa, ktx2).
-// Could repack ktx data into ktxa before writing to temporary file, or when
-copying NSData into MTLBuffer.
-- (nullable id<MTLTexture>)loadTextureFromImage:(KTXImage &)image
-{
-    // TODO: about aligning to 4k for base + length
-    // http://metalkit.org/2017/05/26/working-with-memory-in-metal-part-2.html
-
-    int32_t w = image.width;
-    int32_t h = image.height;
-    int32_t d = image.depth;
-
-    int32_t numMips     = MAX(1, image.mipCount());
-    int32_t numArrays   = MAX(1, image.arrayCount());
-    int32_t numFaces    = MAX(1, image.faceCount());
-    int32_t numSlices   = MAX(1, image.depth);
-
-    Int2 blockDims = image.blockDims();
-
-    uint32_t numChunks = image.totalChunks();
-
-    // TODO: reuse staging _buffer and _bufferOffset here, these large
-allocations take time vector<uint8_t> mipStorage;
-    mipStorage.resize(image.mipLengthLargest() * numChunks); // enough to hold
-biggest mip
-
-    //-----------------
-
-    id<MTLTexture> texture = [self createTexture:image isPrivate:false];
-    if (!texture) {
-        return nil;
-    }
-
-    const uint8_t* srcLevelData = image.fileData;
-
-    for (int mipLevelNumber = 0; mipLevelNumber < numMips; ++mipLevelNumber) {
-        // there's a 4 byte levelSize for each mipLevel
-        // the mipLevel.offset is immediately after this
-
-        const KTXImageLevel& mipLevel = image.mipLevels[mipLevelNumber];
-
-        // this is offset to a given level
-        uint64_t mipBaseOffset = mipLevel.offset;
-
-        // unpack the whole level in-place
-        if (image.isSupercompressed()) {
-            if (!image.unpackLevel(mipLevelNumber, image.fileData +
-mipLevel.offset, mipStorage.data())) { return nil;
-            }
-            srcLevelData = mipStorage.data();
-
-            // going to upload from mipStorage temp array
-            mipBaseOffset = 0;
-        }
-
-        // only have face, face+array, or slice but this handles all cases
-        for (int array = 0; array < numArrays; ++array) {
-            for (int face = 0; face < numFaces; ++face) {
-                for (int slice = 0; slice < numSlices; ++slice) {
-
-                    uint32_t bytesPerRow = 0;
-
-                    // 1D/1DArray textures set bytesPerRow to 0
-                    if (//image.textureType != MyMTLTextureType1D &&
-                        image.textureType != MyMTLTextureType1DArray)
-                    {
-                        // for compressed, bytesPerRow needs to be multiple of
-block size
-                        // so divide by the number of blocks making up the
-height
-                        //int xBlocks = ((w + blockDims.x - 1) / blockDims.x);
-                        uint32_t yBlocks = ((h + blockDims.y - 1) /
-blockDims.y);
-
-                        // Calculate the number of bytes per row in the image.
-                        // for compressed images this is xBlocks * blockSize
-                        bytesPerRow = (uint32_t)mipLevel.length / yBlocks;
-                    }
-
-                    int32_t chunkNum = 0;
-
-                    if (image.header.numberOfArrayElements > 0) {
-                        // can be 1d, 2d, or cube array
-                        chunkNum = array;
-                        if (numFaces > 1) {
-                            chunkNum = 6 * chunkNum + face;
-                        }
-                    }
-                    else {
-                        // can be 1d, 2d, or 3d
-                        chunkNum = slice;
-                        if (numFaces > 1) {
-                            chunkNum = face;
-                        }
-                    }
-
-                    // this is size of one face/slice/texture, not the levels
-size uint64_t mipStorageSize = mipLevel.length;
-
-                    uint64_t mipOffset = mipBaseOffset + chunkNum *
-mipStorageSize;
-
-                    // offset into the level
-                    const uint8_t *srcBytes = srcLevelData + mipOffset;
-
-                    {
-                        // Note: this only works for managed/shared textures.
-                        // For private upload to buffer and then use blitEncoder
-to copy to texture.
-                        // See KramBlitLoader for that.  This is all synchronous
-upload too.
-                        //
-                        // Note: due to API limit we can only copy one chunk at
-a time.  With KramBlitLoader
-                        // can copy the whole level to buffer, and then
-reference chunks within.
-
-                        bool is3D = image.textureType == MyMTLTextureType3D;
-
-                        // sync cpu copy the bytes from the data object into the
-texture MTLRegion region = { { 0, 0, 0 }, // MTLOrigin { (NSUInteger)w,
-(NSUInteger)h, 1 }  // MTLSize
-                        };
-
-                        size_t bytesPerImage = 0;
-                        if (is3D) {
-                            region.origin.z = chunkNum;
-                            chunkNum = 0;
-                            bytesPerImage = mipStorageSize;
-                        }
-
-                        [texture replaceRegion:region
-                                    mipmapLevel:mipLevelNumber
-                                         slice:chunkNum
-                                      withBytes:srcBytes
-                                    bytesPerRow:bytesPerRow
-                                 bytesPerImage:bytesPerImage];
-
-                    }
-                }
-            }
-        }
-
-        mipDown(w, h, d);
-    }
-
-    return texture;
-}
-*/
-
 //--------------------------
 
 - (void)createStagingBufffer:(uint64_t)dataSize
diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index a355dc05..54ca66a7 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -1241,57 +1241,22 @@ - (void)updateImageSettings:(const string &)fullFilename
     _showSettings->blockY = image.blockDims().y;
 
     _showSettings->isSigned = isSignedFormat(format);
-
-    string fullFilenameCopy = fullFilename;
-    string filename = toLower(fullFilenameCopy);
-
-    // set title to filename, chop this to just file+ext, not directory
-    string filenameShort = filename;
-    const char *filenameSlash = strrchr(filenameShort.c_str(), '/');
-    if (filenameSlash != nullptr) {
-        filenameShort = filenameSlash + 1;
-    }
-
-    // now chop off the extension
-    filenameShort = filenameShort.substr(0, filenameShort.find_last_of("."));
-
-    bool isAlbedo = false;
-    bool isNormal = false;
-    bool isSDF = false;
-
-    // could cycle between rrr1 and r001.
-    int32_t numChannels = numChannelsOfFormat(originalFormat);
-    bool isSigned = isSignedFormat(originalFormat);
-
-    // note that decoded textures are 3/4 channel even though they are normal/sdf
-    // originally, so test those first
-    if (/*numChannels == 2 || */ endsWith(filenameShort, "-n") ||
-        endsWith(filenameShort, "_normal")) {
-        isNormal = true;
-    }
-    else if ((numChannels == 1 && isSigned) ||
-             endsWith(filenameShort, "-sdf")) {
-        isSDF = true;
-    }
-    else if (numChannels == 3 || numChannels == 4 || // TODO: elim channel test, only rely on suffix?, also have key-value pairs
-             endsWith(filenameShort, "-a") ||
-             endsWith(filenameShort, "_basecolor")) {
-        isAlbedo = true;
-    }
-
-    _showSettings->isNormal = isNormal;
-    _showSettings->isSDF = isSDF;
+    
+    TexContentType texContentType = findContentTypeFromFilename(fullFilename.c_str());
+    _showSettings->texContentType = texContentType;
+    //_showSettings->isSDF = isSDF;
 
     // textures are already premul, so don't need to premul in shader
     // should really have 3 modes, unmul, default, premul
-    bool isPNG = isPNGFilename(filename.c_str());
+    bool isPNG = isPNGFilename(fullFilename.c_str());
 
     _showSettings->isPremul = false;
-    if (isAlbedo && isPNG) {
+    if (texContentType == TexContentTypeAlbedo && isPNG) {
         _showSettings->isPremul =
             true;  // convert to premul in shader, so can see other channels
     }
 
+    int32_t numChannels = numChannelsOfFormat(originalFormat);
     _showSettings->numChannels = numChannels;
 
     // TODO: identify if texture holds normal data from the props
@@ -1469,12 +1434,12 @@ - (void)_updateGameState
     Uniforms &uniforms =
         *(Uniforms *)_dynamicUniformBuffer[_uniformBufferIndex].contents;
 
-    uniforms.isNormal = _showSettings->isNormal;
+    uniforms.isNormal = _showSettings->texContentType == TexContentTypeAlbedo;
     uniforms.isPremul = _showSettings->isPremul;
     uniforms.isSigned = _showSettings->isSigned;
     uniforms.isSwizzleAGToRG = _showSettings->isSwizzleAGToRG;
 
-    uniforms.isSDF = _showSettings->isSDF;
+    uniforms.isSDF = _showSettings->texContentType == TexContentTypeSDF;
     uniforms.numChannels = _showSettings->numChannels;
     uniforms.lightingMode = (ShaderLightingMode)_showSettings->lightingMode;
 
diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index 93dd2c02..e283be66 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -254,6 +254,9 @@ void ShowSettings::advanceDebugMode(bool decrement)
     // (hasColor) if (_showSettings->debugMode == DebugModeGray && !hasColor)
     // advanceDebugMode(isShiftKeyDown);
 
+    bool isNormal = texContentType == TexContentTypeNormal;
+    bool isSDF = texContentType == TexContentTypeSDF;
+    
     // for normals show directions
     if (debugMode == DebugModePosX && !(isNormal || isSDF)) {
         advanceDebugMode(decrement);
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index 4facd3a1..a235ba93 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -121,12 +121,13 @@ class ShowSettings {
     // expands uv from [0,1] to [0,2] in shader to see the repeat pattern
     bool isWrap = false;
 
-    bool isNormal = false;
+    //bool isNormal = false;
     bool isSigned = false;
     bool isPremul = false;  // needed for png which only holds unmul
     bool isSwizzleAGToRG = false;
-    bool isSDF = false;
-
+    //bool isSDF = false;
+    TexContentType texContentType = TexContentTypeUnknown;
+    
     // this mode shows the content with lighting or with bilinear/mips active
     bool isPreview = false;
 
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 97156e6e..d1767407 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -29,9 +29,6 @@
 //#include "KramImage.h"
 #include "KramViewerBase.h"
 
-
-static bool doReplaceSrgbFromType = true;
-
 #ifdef NDEBUG
 static bool doPrintPanZoom = false;
 #else
@@ -1395,7 +1392,7 @@ - (void)showEyedropperData:(float2)uv
 
     int32_t numChannels = _showSettings->numChannels;
 
-    bool isNormal = _showSettings->isNormal;
+    bool isNormal = _showSettings->texContentType == TexContentTypeNormal;
     bool isColor = !isNormal;
 
     bool isDirection = false;
@@ -1764,7 +1761,7 @@ - (void)updateUIAfterLoad
     bool isRedHidden = _showSettings->numChannels == 0; // models don't show rgba
     bool isGreenHidden = _showSettings->numChannels <= 1;
     bool isBlueHidden = _showSettings->numChannels <= 2 &&
-                        !_showSettings->isNormal;  // reconstruct z = b on normals
+                        _showSettings->texContentType != TexContentTypeNormal;  // reconstruct z = b on normals
 
     // TODO: also need a hasAlpha for pixels, since many compressed formats like
     // ASTC always have 4 channels but internally store R,RG01,... etc.  Can get
@@ -2660,6 +2657,10 @@ - (BOOL)findFilenameInFolders:(const string &)filename
     return isFound;
 }
 
+
+
+
+
 - (BOOL)loadFileFromFolder
 {
     // now lookup the filename and data at that entry
@@ -2674,12 +2675,7 @@ - (BOOL)loadFileFromFolder
         return [self loadModelFile:nil filename:filename];
     
     // have already filtered filenames out, so this should never get hit
-    bool isPNG = isPNGFilename(filename);
-    if (!(isPNG ||
-          isKTXFilename(filename) ||
-          isKTX2Filename(filename) ||
-          isDDSFilename(filename))
-    ) {
+    if (!isSupportedFilename(filename)) {
         return NO;
     }
 
@@ -2702,8 +2698,6 @@ - (BOOL)loadFileFromFolder
         isFound = searchPos != string::npos;
     }
 
-    bool isSrgb = isFound;
-
     string normalFilename;
     bool hasNormal = false;
 
@@ -2742,11 +2736,6 @@ - (BOOL)loadFileFromFolder
         }
     }
     
-    if (doReplaceSrgbFromType && isPNG) {
-        image.pixelFormat = isSrgb ? MyMTLPixelFormatRGBA8Unorm_sRGB : MyMTLPixelFormatRGBA8Unorm;
-    }
-
-    
     Renderer *renderer = (Renderer *)self.delegate;
     [renderer releaseAllPendingTextures];
     
@@ -2795,6 +2784,8 @@ - (BOOL)loadFileFromFolder
     return YES;
 }
 
+
+
 - (BOOL)loadFileFromArchive
 {
     // now lookup the filename and data at that entry
@@ -2811,12 +2802,7 @@ - (BOOL)loadFileFromArchive
     
     //--------
     
-    bool isPNG = isPNGFilename(filename);
-
-    if (!(isPNG ||
-          isKTXFilename(filename) ||
-          isKTX2Filename(filename) ||
-          isDDSFilename(filename))) {
+    if (!isSupportedFilename(filename)) {
         return NO;
     }
 
@@ -2837,9 +2823,7 @@ - (BOOL)loadFileFromArchive
         searchPos = fullFilename.find(search);
         isFound = searchPos != string::npos;
     }
-
-    bool isSrgb = isFound;
-
+    
     //---------------------------
 
     const uint8_t *imageData = nullptr;
@@ -2894,14 +2878,6 @@ - (BOOL)loadFileFromArchive
         }
     }
 
-    if (doReplaceSrgbFromType && isPNG) {
-        image.pixelFormat = isSrgb ? MyMTLPixelFormatRGBA8Unorm_sRGB : MyMTLPixelFormatRGBA8Unorm;
-    }
-    
-//    if (isPNG && isSrgb) {
-//        image.pixelFormat = MyMTLPixelFormatRGBA8Unorm_sRGB;
-//    }
-
     Renderer *renderer = (Renderer *)self.delegate;
     [renderer releaseAllPendingTextures];
     
@@ -3027,13 +3003,7 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
                 while (NSURL *fileOrDirectoryURL = [directoryEnumerator nextObject]) {
                     const char *name = fileOrDirectoryURL.fileSystemRepresentation;
 
-                    // filter only types that are supported
-                    bool isPNG = isPNGFilename(name);
-                    bool isKTX = isKTXFilename(name);
-                    bool isKTX2 = isKTX2Filename(name);
-                    bool isDDS = isDDSFilename(name);
-    
-                    if (isPNG || isKTX || isKTX2 || isDDS)
+                    if (isSupportedFilename(name))
                     {
                         files.push_back(name);
                     }
@@ -3149,10 +3119,7 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
           endsWithExtension(filename, ".zip") ||
           
           // images
-          isPNGFilename(filename) ||
-          isKTXFilename(filename) ||
-          isKTX2Filename(filename) ||
-          isDDSFilename(filename) ||
+          isSupportedFilename(filename) ||
           
           // models
           endsWithExtension(filename, ".gltf") ||
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 340f17d4..0a223df1 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -132,7 +132,12 @@ bool KTXImageData::open(const char* filename, KTXImage& image, bool isInfoOnly_)
     _name = toFilenameShort(filename);
 
     if (isPNGFilename(filename)) {
-        return openPNG(filename, image);
+       bool success = openPNG(filename, image);
+        
+        if (success)
+            fixPixelFormat(image, filename);
+        
+        return success;
     }
 
     isMmap = true;
@@ -3326,4 +3331,65 @@ int32_t kramAppMain(int32_t argc, char* argv[])
     return kramAppCommand(args);
 }
 
+bool isSupportedFilename(const char* filename) {
+    if (isPNGFilename(filename) ||
+        isKTXFilename(filename) ||
+        isKTX2Filename(filename) ||
+        isDDSFilename(filename)) {
+    return true;
+    }
+    return false;
+}
+
+void fixPixelFormat(KTXImage& image, const char* filename)
+{
+    // have to adjust the format if srgb png
+    // PS2022 finally added sRGB gama/iccp blocks to "Save As",
+    // but there are a lot of older files where this is not set
+    // or Figma always sets sRGB.  So set based on content type.
+    static bool doReplacePixelFormatFromContentType = true;
+    if (!doReplacePixelFormatFromContentType)
+        return;
+    
+    bool isPNG = isPNGFilename(filename);
+    if (!isPNG)
+        return;
+    
+    TexContentType contentType = findContentTypeFromFilename(filename);
+    
+    bool isSrgb = contentType == TexContentTypeAlbedo;
+    image.pixelFormat = isSrgb ? MyMTLPixelFormatRGBA8Unorm_sRGB : MyMTLPixelFormatRGBA8Unorm;
+}
+
+// Use naming convention for PNG/DDS, can pull from props on KTX/KTX2
+// This is mostly useful for source content.
+TexContentType findContentTypeFromFilename(const char* filename)
+{
+    string filenameShort = filename;
+    
+    auto dotPos = filenameShort.find_last_of(".");
+    if (dotPos == string::npos)
+        return TexContentTypeUnknown;
+    
+    // now chop off the extension
+    filenameShort = filenameShort.substr(0, dotPos);
+
+    // dealing with png means fabricating the format, texture type, and other data
+    if (endsWith(filenameShort, "-n") || endsWith(filenameShort, "_normal")) {
+        return TexContentTypeNormal;
+    }
+    else if (endsWith(filenameShort, "-sdf")) {
+        return TexContentTypeSDF;
+    }
+    else if (endsWith(filenameShort, "-h")) {
+        return TexContentTypeHeight;
+    }
+    else if (endsWith(filenameShort, "-a") || endsWith(filenameShort, "-d") || endsWith(filenameShort, "_baseColor")) {
+        return TexContentTypeAlbedo;
+    }
+    
+    // fallback to albedo for now
+    return TexContentTypeAlbedo;
+}
+
 }  // namespace kram
diff --git a/libkram/kram/Kram.h b/libkram/kram/Kram.h
index 14d87a4f..62264a13 100644
--- a/libkram/kram/Kram.h
+++ b/libkram/kram/Kram.h
@@ -52,11 +52,13 @@ bool isKTXFilename(const char* filename);
 bool isKTX2Filename(const char* filename);
 bool isDDSFilename(const char* filename);
 bool isPNGFilename(const char* filename);
+bool isSupportedFilename(const char* filename);
 
 inline bool isKTXFilename(const string& filename) { return isKTXFilename(filename.c_str()); }
 inline bool isKTX2Filename(const string& filename) { return isKTX2Filename(filename.c_str()); }
 inline bool isDDSFilename(const string& filename) { return isDDSFilename(filename.c_str()); }
 inline bool isPNGFilename(const string& filename) { return isPNGFilename(filename.c_str()); }
+inline bool isSupportedFilename(const string& filename) { return isSupportedFilename(filename.c_str()); }
 
 // helpers to source from a png or single level of a ktx
 bool LoadKtx(const uint8_t* data, size_t dataSize, Image& sourceImage);
@@ -71,4 +73,22 @@ string kramInfoToString(const string& srcFilename, bool isVerbose);
 
 // this is entry point to library for cli app
 int32_t kramAppMain(int32_t argc, char* argv[]);
+
+enum TexContentType
+{
+    TexContentTypeUnknown = 0,
+    TexContentTypeAlbedo,
+    TexContentTypeNormal,
+    TexContentTypeHeight,
+    TexContentTypeSDF
+};
+
+// this is a helper to override the format, since sRGB blocks and settings
+// often don't indicate the actual color type on PNG, or is omitted
+void fixPixelFormat(KTXImage& image, const char* filename);
+
+// This is using naming conventions on filenames, but KTX/KTX2 hold channel props
+TexContentType findContentTypeFromFilename(const char* filename);
+
+
 }  // namespace kram

From d3c620445ffd552139a17c386d594857aca6404a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 15 Mar 2022 10:20:13 -0700
Subject: [PATCH 269/901] kramv - consolidate counterpart normal file lookup

---
 kramv/KramViewerMain.mm | 106 ++++++++++++++++++----------------------
 1 file changed, 47 insertions(+), 59 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index d1767407..38bc775a 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -2659,6 +2659,35 @@ - (BOOL)findFilenameInFolders:(const string &)filename
 
 
+static string findNormalMapFromAlbedoFilename(const char* filename)
+{
+    string filenameShort = filename;
+    
+    const char *ext = strrchr(filename, '.');
+
+    auto dotPos = filenameShort.find_last_of(".");
+    if (dotPos == string::npos)
+        return "";
+    
+    // now chop off the extension
+    filenameShort = filenameShort.substr(0, dotPos);
+
+    const char* searches[] = { "-a", "-d" };
+    
+    for (uint32_t i = 0; i < ArrayCount(searches); ++i) {
+        const char* search = searches[i];
+        if (endsWith(filenameShort, search)) {
+            filenameShort = filenameShort.substr(0, filenameShort.length()-strlen(search));
+            break;
+        }
+    }
+     
+    // may need to try various names, and see if any exist
+    filenameShort += "-n";
+    filenameShort += ext;
+    
+    return filenameShort;
+}
 
 
 - (BOOL)loadFileFromFolder
@@ -2679,37 +2708,17 @@ - (BOOL)loadFileFromFolder
         return NO;
     }
 
-    const char *ext = strrchr(filename, '.');
-
-    // first only do this on albedo/diffuse textures
-
-    // find matching png
-    string search = "-a";
-    search += ext;
-
-    auto searchPos = fullFilename.find(search);
-    bool isFound = searchPos != string::npos;
-
-    if (!isFound) {
-        search = "-d";
-        search += ext;
-
-        searchPos = fullFilename.find(search);
-        isFound = searchPos != string::npos;
-    }
-
     string normalFilename;
     bool hasNormal = false;
 
-    if (isFound) {
-        normalFilename = fullFilename;
-        normalFilename = normalFilename.erase(searchPos);
-        normalFilename += "-n";
-        normalFilename += ext;
-
-        hasNormal = [self findFilenameInFolders:normalFilename];
+    TexContentType texContentType = findContentTypeFromFilename(filename);
+    if (texContentType == TexContentTypeAlbedo) {
+        normalFilename = findNormalMapFromAlbedoFilename(filename);
+     
+        if (!normalFilename.empty())
+            hasNormal = [self findFilenameInFolders:normalFilename];
     }
-
+    
     //-------------------------------
 
     KTXImage image;
@@ -2805,50 +2814,29 @@ - (BOOL)loadFileFromArchive
     if (!isSupportedFilename(filename)) {
         return NO;
     }
-
-    const char *ext = strrchr(filename, '.');
-
-    // first only do this on albedo/diffuse textures
-
-    string search = "-a";
-    search += ext;
-
-    auto searchPos = fullFilename.find(search);
-    bool isFound = searchPos != string::npos;
-
-    if (!isFound) {
-        search = "-d";
-        search += ext;
-
-        searchPos = fullFilename.find(search);
-        isFound = searchPos != string::npos;
-    }
     
-    //---------------------------
-
     const uint8_t *imageData = nullptr;
     uint64_t imageDataLength = 0;
 
-    const uint8_t *imageNormalData = nullptr;
-    uint64_t imageNormalDataLength = 0;
-
     // search for main file - can be albedo or normal
     if (!_zip.extractRaw(filename, &imageData, imageDataLength)) {
         return NO;
     }
 
-    // search for normal map in the same archive
+    const uint8_t *imageNormalData = nullptr;
+    uint64_t imageNormalDataLength = 0;
+    
     string normalFilename;
     bool hasNormal = false;
 
-    if (isFound) {
-        normalFilename = fullFilename;
-        normalFilename = normalFilename.erase(searchPos);
-        normalFilename += "-n";
-        normalFilename += ext;
-
-        hasNormal = _zip.extractRaw(normalFilename.c_str(), &imageNormalData,
-                                    imageNormalDataLength);
+    
+    TexContentType texContentType = findContentTypeFromFilename(filename);
+    if (texContentType == TexContentTypeAlbedo) {
+        normalFilename = findNormalMapFromAlbedoFilename(filename);
+     
+        if (!normalFilename.empty())
+            hasNormal = _zip.extractRaw(normalFilename.c_str(), &imageNormalData,
+                                        imageNormalDataLength);
     }
 
     //---------------------------

From 4cbfdfe7d4c89c0836f2d8a29ea2d0b4cc5a9801 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 15 Mar 2022 11:18:13 -0700
Subject: [PATCH 270/901] kramv - fix isNormal

---
 kramv/KramRenderer.mm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 54ca66a7..2804bbed 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -1434,7 +1434,7 @@ - (void)_updateGameState
     Uniforms &uniforms =
         *(Uniforms *)_dynamicUniformBuffer[_uniformBufferIndex].contents;
 
-    uniforms.isNormal = _showSettings->texContentType == TexContentTypeAlbedo;
+    uniforms.isNormal = _showSettings->texContentType == TexContentTypeNormal;
     uniforms.isPremul = _showSettings->isPremul;
     uniforms.isSigned = _showSettings->isSigned;
     uniforms.isSwizzleAGToRG = _showSettings->isSwizzleAGToRG;

From 7727a0757fbce09b6fb96ea91d19697659a4e835 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 17 Mar 2022 20:00:29 -0700
Subject: [PATCH 271/901] kram - update to latest astc 3.5 rc

This fixes errors in pre 3.5 release after 3.4.  Noticed at astc 6x6 for some textures like 4 color checkerboards.
---
 libkram/astc-encoder/astcenc_block_sizes.cpp  |   8 +
 libkram/astc-encoder/astcenc_entry.cpp        |  25 +--
 .../astcenc_find_best_partitioning.cpp        |   8 +-
 .../astcenc_ideal_endpoints_and_weights.cpp   |  86 ++++++--
 libkram/astc-encoder/astcenc_internal.h       |  60 +++++-
 libkram/astc-encoder/astcenc_mathlib.h        |   9 +-
 .../astc-encoder/astcenc_partition_tables.cpp |   7 +
 .../astcenc_vecmathlib_neon_armv7_4.h         | 186 ------------------
 8 files changed, 168 insertions(+), 221 deletions(-)
 delete mode 100644 libkram/astc-encoder/astcenc_vecmathlib_neon_armv7_4.h

diff --git a/libkram/astc-encoder/astcenc_block_sizes.cpp b/libkram/astc-encoder/astcenc_block_sizes.cpp
index ebabaffa..9718cba9 100644
--- a/libkram/astc-encoder/astcenc_block_sizes.cpp
+++ b/libkram/astc-encoder/astcenc_block_sizes.cpp
@@ -322,9 +322,11 @@ static void init_decimation_info_2d(
 		}
 	}
 
+	uint8_t max_texel_weight_count = 0;
 	for (unsigned int i = 0; i < texels_per_block; i++)
 	{
 		di.texel_weight_count[i] = wb.weight_count_of_texel[i];
+		max_texel_weight_count = astc::max(max_texel_weight_count, di.texel_weight_count[i]);
 
 		for (unsigned int j = 0; j < wb.weight_count_of_texel[i]; j++)
 		{
@@ -342,6 +344,8 @@ static void init_decimation_info_2d(
 		}
 	}
 
+	di.max_texel_weight_count = max_texel_weight_count;
+
 	for (unsigned int i = 0; i < weights_per_block; i++)
 	{
 		unsigned int texel_count_wt = wb.texel_count_of_weight[i];
@@ -587,9 +591,11 @@ static void init_decimation_info_3d(
 		}
 	}
 
+	uint8_t max_texel_weight_count = 0;
 	for (unsigned int i = 0; i < texels_per_block; i++)
 	{
 		di.texel_weight_count[i] = wb.weight_count_of_texel[i];
+		max_texel_weight_count = astc::max(max_texel_weight_count, di.texel_weight_count[i]);
 
 		// Init all 4 entries so we can rely on zeros for vectorization
 		for (unsigned int j = 0; j < 4; j++)
@@ -607,6 +613,8 @@ static void init_decimation_info_3d(
 		}
 	}
 
+	di.max_texel_weight_count = max_texel_weight_count;
+
 	for (unsigned int i = 0; i < weights_per_block; i++)
 	{
 		unsigned int texel_count_wt = wb.texel_count_of_weight[i];
diff --git a/libkram/astc-encoder/astcenc_entry.cpp b/libkram/astc-encoder/astcenc_entry.cpp
index ceeb7169..02597cf0 100644
--- a/libkram/astc-encoder/astcenc_entry.cpp
+++ b/libkram/astc-encoder/astcenc_entry.cpp
@@ -205,7 +205,7 @@ static astcenc_error validate_profile(
 ) {
 	// Values in this enum are from an external user, so not guaranteed to be
 	// bounded to the enum values
-	switch(static_cast<int>(profile))
+	switch (static_cast<int>(profile))
 	{
 	case ASTCENC_PRF_LDR_SRGB:
 	case ASTCENC_PRF_LDR:
@@ -291,7 +291,7 @@ static astcenc_error validate_compression_swz(
 	astcenc_swz swizzle
 ) {
 	// Not all enum values are handled; SWZ_Z is invalid for compression
-	switch(static_cast<int>(swizzle))
+	switch (static_cast<int>(swizzle))
 	{
 	case ASTCENC_SWZ_R:
 	case ASTCENC_SWZ_G:
@@ -339,7 +339,7 @@ static astcenc_error validate_decompression_swz(
 ) {
 	// Values in this enum are from an external user, so not guaranteed to be
 	// bounded to the enum values
-	switch(static_cast<int>(swizzle))
+	switch (static_cast<int>(swizzle))
 	{
 	case ASTCENC_SWZ_R:
 	case ASTCENC_SWZ_G:
@@ -596,7 +596,7 @@ astcenc_error astcenc_config_init(
 
 	// Values in this enum are from an external user, so not guaranteed to be
 	// bounded to the enum values
-	switch(static_cast<int>(profile))
+	switch (static_cast<int>(profile))
 	{
 	case ASTCENC_PRF_LDR:
 	case ASTCENC_PRF_LDR_SRGB:
@@ -825,15 +825,22 @@ static void compress_image(
 	int xblocks = (dim_x + block_x - 1) / block_x;
 	int yblocks = (dim_y + block_y - 1) / block_y;
 	int zblocks = (dim_z + block_z - 1) / block_z;
+	int block_count = zblocks * yblocks * xblocks;
 
 	int row_blocks = xblocks;
 	int plane_blocks = xblocks * yblocks;
 
+	// Populate the block channel weights
+	blk.channel_weight = vfloat4(ctx.config.cw_r_weight,
+									ctx.config.cw_g_weight,
+									ctx.config.cw_b_weight,
+									ctx.config.cw_a_weight);
+
 	// Use preallocated scratch buffer
 	auto& temp_buffers = ctx.working_buffers[thread_index];
 
 	// Only the first thread actually runs the initializer
-	ctx.manage_compress.init(zblocks * yblocks * xblocks);
+	ctx.manage_compress.init(block_count);
 
 	// All threads run this processing loop until there is no work remaining
 	while (true)
@@ -905,12 +912,6 @@ static void compress_image(
 				blk.grayscale = true;
 			}
 
-			// Populate the block channel weights
-			blk.channel_weight = vfloat4(ctx.config.cw_r_weight,
-			                             ctx.config.cw_g_weight,
-			                             ctx.config.cw_b_weight,
-			                             ctx.config.cw_a_weight);
-
 			int offset = ((z * yblocks + y) * xblocks + x) * 16;
 			uint8_t *bp = buffer + offset;
 			physical_compressed_block* pcb = reinterpret_cast<physical_compressed_block*>(bp);
@@ -1267,7 +1268,7 @@ const char* astcenc_get_error_string(
 ) {
 	// Values in this enum are from an external user, so not guaranteed to be
 	// bounded to the enum values
-	switch(static_cast<int>(status))
+	switch (static_cast<int>(status))
 	{
 	case ASTCENC_SUCCESS:
 		return "ASTCENC_SUCCESS";
diff --git a/libkram/astc-encoder/astcenc_find_best_partitioning.cpp b/libkram/astc-encoder/astcenc_find_best_partitioning.cpp
index 82da8467..6a4eff1b 100644
--- a/libkram/astc-encoder/astcenc_find_best_partitioning.cpp
+++ b/libkram/astc-encoder/astcenc_find_best_partitioning.cpp
@@ -360,9 +360,11 @@ static void count_partition_mismatch_bits(
 	const uint64_t bitmaps[BLOCK_MAX_PARTITIONS],
 	unsigned int mismatch_counts[BLOCK_MAX_PARTITIONINGS]
 ) {
+	unsigned int active_count = bsd.partitioning_count[partition_count - 1];
+
 	if (partition_count == 2)
 	{
-		for (unsigned int i = 0; i < bsd.partitioning_count[partition_count - 1]; i++)
+		for (unsigned int i = 0; i < active_count; i++)
 		{
 			int bitcount = partition_mismatch2(bitmaps, bsd.coverage_bitmaps_2[i]);
 			mismatch_counts[i] = astc::max(bitcount, static_cast<int>(bsd.partitioning_valid_2[i]));
@@ -370,7 +372,7 @@ static void count_partition_mismatch_bits(
 	}
 	else if (partition_count == 3)
 	{
-		for (unsigned int i = 0; i < bsd.partitioning_count[partition_count - 1]; i++)
+		for (unsigned int i = 0; i < active_count; i++)
 		{
 			int bitcount = partition_mismatch3(bitmaps, bsd.coverage_bitmaps_3[i]);
 			mismatch_counts[i] = astc::max(bitcount, static_cast<int>(bsd.partitioning_valid_3[i]));
@@ -378,7 +380,7 @@ static void count_partition_mismatch_bits(
 	}
 	else
 	{
-		for (unsigned int i = 0; i < bsd.partitioning_count[partition_count - 1]; i++)
+		for (unsigned int i = 0; i < active_count; i++)
 		{
 			int bitcount = partition_mismatch4(bitmaps, bsd.coverage_bitmaps_4[i]);
 			mismatch_counts[i] = astc::max(bitcount, static_cast<int>(bsd.partitioning_valid_4[i]));
diff --git a/libkram/astc-encoder/astcenc_ideal_endpoints_and_weights.cpp b/libkram/astc-encoder/astcenc_ideal_endpoints_and_weights.cpp
index c19b8175..2b5faa1c 100644
--- a/libkram/astc-encoder/astcenc_ideal_endpoints_and_weights.cpp
+++ b/libkram/astc-encoder/astcenc_ideal_endpoints_and_weights.cpp
@@ -615,10 +615,8 @@ float compute_error_of_weight_set_1plane(
 	float error_summa = 0.0f;
 	unsigned int texel_count = di.texel_count;
 
-	bool is_decimated = di.texel_count != di.weight_count;
-
 	// Process SIMD-width chunks, safe to over-fetch - the extra space is zero initialized
-	if (is_decimated)
+	if (di.max_texel_weight_count > 2)
 	{
 		for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
 		{
@@ -634,6 +632,22 @@ float compute_error_of_weight_set_1plane(
 			haccumulate(error_summav, error);
 		}
 	}
+	else if (di.max_texel_weight_count > 1)
+	{
+		for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
+		{
+			// Compute the bilinear interpolation of the decimated weight grid
+			vfloat current_values = bilinear_infill_vla_2(di, dec_weight_quant_uvalue, i);
+
+			// Compute the error between the computed value and the ideal weight
+			vfloat actual_values = loada(eai.weights + i);
+			vfloat diff = current_values - actual_values;
+			vfloat significance = loada(eai.weight_error_scale + i);
+			vfloat error = diff * diff * significance;
+
+			haccumulate(error_summav, error);
+		}
+	}
 	else
 	{
 		for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
@@ -668,10 +682,9 @@ float compute_error_of_weight_set_2planes(
 	vfloat4 error_summav = vfloat4::zero();
 	float error_summa = 0.0f;
 	unsigned int texel_count = di.texel_count;
-	bool is_decimated = di.texel_count != di.weight_count;
 
 	// Process SIMD-width chunks, safe to over-fetch - the extra space is zero initialized
-	if (is_decimated)
+	if (di.max_texel_weight_count > 2)
 	{
 		for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
 		{
@@ -696,6 +709,31 @@ float compute_error_of_weight_set_2planes(
 			haccumulate(error_summav, error1 + error2);
 		}
 	}
+	else if (di.max_texel_weight_count > 1)
+	{
+		for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
+		{
+			// Plane 1
+			// Compute the bilinear interpolation of the decimated weight grid
+			vfloat current_values1 = bilinear_infill_vla_2(di, dec_weight_quant_uvalue_plane1, i);
+
+			// Compute the error between the computed value and the ideal weight
+			vfloat actual_values1 = loada(eai1.weights + i);
+			vfloat diff = current_values1 - actual_values1;
+			vfloat error1 = diff * diff * loada(eai1.weight_error_scale + i);
+
+			// Plane 2
+			// Compute the bilinear interpolation of the decimated weight grid
+			vfloat current_values2 = bilinear_infill_vla_2(di, dec_weight_quant_uvalue_plane2, i);
+
+			// Compute the error between the computed value and the ideal weight
+			vfloat actual_values2 = loada(eai2.weights + i);
+			diff = current_values2 - actual_values2;
+			vfloat error2 = diff * diff * loada(eai2.weight_error_scale + i);
+
+			haccumulate(error_summav, error1 + error2);
+		}
+	}
 	else
 	{
 		for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
@@ -820,10 +858,21 @@ void compute_ideal_weights_for_decimation(
 	// Populate the interpolated weight grid based on the initital average
 	// Process SIMD-width texel coordinates at at time while we can. Safe to
 	// over-process full SIMD vectors - the tail is zeroed.
-	for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
+	if (di.max_texel_weight_count <= 2)
+	{
+		for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
+		{
+			vfloat weight = bilinear_infill_vla_2(di, dec_weight_ideal_value, i);
+			storea(weight, infilled_weights + i);
+		}
+	}
+	else
 	{
-		vfloat weight = bilinear_infill_vla(di, dec_weight_ideal_value, i);
-		storea(weight, infilled_weights + i);
+		for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
+		{
+			vfloat weight = bilinear_infill_vla(di, dec_weight_ideal_value, i);
+			storea(weight, infilled_weights + i);
+		}
 	}
 
 	// Perform a single iteration of refinement
@@ -1018,7 +1067,6 @@ void recompute_ideal_colors_1plane(
 ) {
 	int weight_count = di.weight_count;
 	int partition_count = pi.partition_count;
-	bool is_decimated = di.weight_count != di.texel_count;
 
 	promise(weight_count > 0);
 	promise(partition_count > 0);
@@ -1077,11 +1125,15 @@ void recompute_ideal_colors_1plane(
 			vfloat4 rgba = blk.texel(tix);
 
 			float idx0;
-			if (!is_decimated)
+			if (di.max_texel_weight_count == 1)
 			{
 				assert(tix < BLOCK_MAX_WEIGHTS);
  				idx0 = dec_weight_quant_uvalue[tix];
 			}
+			else if (di.max_texel_weight_count == 2)
+			{
+				idx0 = bilinear_infill_2(di, dec_weight_quant_uvalue, tix);
+			}
 			else
 			{
 				idx0 = bilinear_infill(di, dec_weight_quant_uvalue, tix);
@@ -1218,8 +1270,6 @@ void recompute_ideal_colors_2planes(
 	int plane2_component
 ) {
 	unsigned int weight_count = di.weight_count;
-	bool is_decimated = di.weight_count != di.texel_count;
-
 	promise(weight_count > 0);
 
 	const quantization_and_transfer_table *qat = &(quant_and_xfer_tables[weight_quant_mode]);
@@ -1271,11 +1321,15 @@ void recompute_ideal_colors_2planes(
 		vfloat4 rgba = blk.texel(j);
 
 		float idx0;
-		if (!is_decimated)
+		if (di.max_texel_weight_count == 1)
 		{
 			assert(j < BLOCK_MAX_WEIGHTS_2PLANE);
 		 	idx0 = dec_weights_quant_uvalue_plane1[j];
 		}
+		else if (di.max_texel_weight_count == 2)
+		{
+			idx0 = bilinear_infill_2(di, dec_weights_quant_uvalue_plane1, j);
+		}
 		else
 		{
 			idx0 = bilinear_infill(di, dec_weights_quant_uvalue_plane1, j);
@@ -1294,11 +1348,15 @@ void recompute_ideal_colors_2planes(
 		right1_sum_s  += idx0 * idx0;
 
 		float idx1;
-		if (!is_decimated)
+		if (di.max_texel_weight_count == 1)
 		{
 			assert(j < BLOCK_MAX_WEIGHTS_2PLANE);
 			idx1 = dec_weights_quant_uvalue_plane2[j];
 		}
+		else if (di.max_texel_weight_count == 2)
+		{
+			idx1 = bilinear_infill_2(di, dec_weights_quant_uvalue_plane2, j);
+		}
 		else
 		{
 			idx1 = bilinear_infill(di, dec_weights_quant_uvalue_plane2, j);
diff --git a/libkram/astc-encoder/astcenc_internal.h b/libkram/astc-encoder/astcenc_internal.h
index 2ff8c997..6b711deb 100644
--- a/libkram/astc-encoder/astcenc_internal.h
+++ b/libkram/astc-encoder/astcenc_internal.h
@@ -431,7 +431,7 @@ enum quant_method
  */
 static inline unsigned int get_quant_level(quant_method method)
 {
-	switch(method)
+	switch (method)
 	{
 	case QUANT_2:   return   2;
 	case QUANT_3:   return   3;
@@ -546,6 +546,9 @@ struct decimation_info
 	/** @brief The total number of texels in the block. */
 	uint8_t texel_count;
 
+	/** @brief The maximum number of stored weights that contribute to each texel, between 1 and 4. */
+	uint8_t max_texel_weight_count;
+
 	/** @brief The total number of weights stored. */
 	uint8_t weight_count;
 
@@ -1903,6 +1906,28 @@ static inline float bilinear_infill(
 	        weights[di.texel_weights_4t[3][index]] * di.texel_weights_float_4t[3][index]);
 }
 
+/**
+ * @brief Compute the infilled weight for a texel index in a decimated grid.
+ *
+ * This is specialized version which computes only two weights per texel for
+ * encodings that are only decimated in a single axis.
+ *
+ * @param di        The weight grid decimation to use.
+ * @param weights   The decimated weight values to use.
+ * @param index     The texel index to interpolate.
+ *
+ * @return The interpolated weight for the given texel.
+ */
+static inline float bilinear_infill_2(
+	const decimation_info& di,
+	const float* weights,
+	unsigned int index
+) {
+	return (weights[di.texel_weights_4t[0][index]] * di.texel_weights_float_4t[0][index] +
+	        weights[di.texel_weights_4t[1][index]] * di.texel_weights_float_4t[1][index]);
+}
+
+
 /**
  * @brief Compute the infilled weight for N texel indices in a decimated grid.
  *
@@ -1940,6 +1965,39 @@ static inline vfloat bilinear_infill_vla(
 	       (weight_val2 * tex_weight_float2 + weight_val3 * tex_weight_float3);
 }
 
+/**
+ * @brief Compute the infilled weight for N texel indices in a decimated grid.
+ *
+ * This is specialized version which computes only two weights per texel for
+ * encodings that are only decimated in a single axis.
+ *
+ * @param di        The weight grid decimation to use.
+ * @param weights   The decimated weight values to use.
+ * @param index     The first texel index to interpolate.
+ *
+ * @return The interpolated weight for the given set of SIMD_WIDTH texels.
+ */
+static inline vfloat bilinear_infill_vla_2(
+	const decimation_info& di,
+	const float* weights,
+	unsigned int index
+) {
+	// Load the bilinear filter texel weight indexes in the decimated grid
+	vint weight_idx0 = vint(di.texel_weights_4t[0] + index);
+	vint weight_idx1 = vint(di.texel_weights_4t[1] + index);
+
+	// Load the bilinear filter weights from the decimated grid
+	vfloat weight_val0 = gatherf(weights, weight_idx0);
+	vfloat weight_val1 = gatherf(weights, weight_idx1);
+
+	// Load the weight contribution factors for each decimated weight
+	vfloat tex_weight_float0 = loada(di.texel_weights_float_4t[0] + index);
+	vfloat tex_weight_float1 = loada(di.texel_weights_float_4t[1] + index);
+
+	// Compute the bilinear interpolation to generate the per-texel weight
+	return (weight_val0 * tex_weight_float0 + weight_val1 * tex_weight_float1);
+}
+
 /**
  * @brief Compute the error of a decimated weight set for 1 plane.
  *
diff --git a/libkram/astc-encoder/astcenc_mathlib.h b/libkram/astc-encoder/astcenc_mathlib.h
index 8a488ae2..4876749b 100644
--- a/libkram/astc-encoder/astcenc_mathlib.h
+++ b/libkram/astc-encoder/astcenc_mathlib.h
@@ -75,12 +75,11 @@
   #endif
 #endif
 
-// Alec changed this back to 16 bytes.
-//#if ASTCENC_AVX
-//  #define ASTCENC_VECALIGN 32
-//#else
+#if ASTCENC_AVX
+  #define ASTCENC_VECALIGN 32
+#else
   #define ASTCENC_VECALIGN 16
-//#endif
+#endif
 
 #if ASTCENC_SSE != 0 || ASTCENC_AVX != 0 || ASTCENC_POPCNT != 0
 	#include <immintrin.h>
diff --git a/libkram/astc-encoder/astcenc_partition_tables.cpp b/libkram/astc-encoder/astcenc_partition_tables.cpp
index 7c3f4027..8769ec63 100644
--- a/libkram/astc-encoder/astcenc_partition_tables.cpp
+++ b/libkram/astc-encoder/astcenc_partition_tables.cpp
@@ -374,6 +374,12 @@ static void build_partition_table_for_one_partition_count(
 	partition_info* ptab,
 	uint64_t* canonical_patterns
 ) {
+	uint8_t* partitioning_valid[3] {
+		bsd.partitioning_valid_2,
+		bsd.partitioning_valid_3,
+		bsd.partitioning_valid_4
+	};
+
 	unsigned int next_index = 0;
 	bsd.partitioning_count[partition_count - 1] = 0;
 
@@ -399,6 +405,7 @@ static void build_partition_table_for_one_partition_count(
 			if (match)
 			{
 				ptab[next_index].partition_count = 0;
+				partitioning_valid[partition_count - 2][next_index] = 255;
 				keep = !can_omit_partitionings;
 				break;
 			}
diff --git a/libkram/astc-encoder/astcenc_vecmathlib_neon_armv7_4.h b/libkram/astc-encoder/astcenc_vecmathlib_neon_armv7_4.h
deleted file mode 100644
index 894ad6da..00000000
--- a/libkram/astc-encoder/astcenc_vecmathlib_neon_armv7_4.h
+++ /dev/null
@@ -1,186 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// ----------------------------------------------------------------------------
-// Copyright 2021 Arm Limited
-//
-// Licensed under the Apache License, Version 2.0 (the "License"); you may not
-// use this file except in compliance with the License. You may obtain a copy
-// of the License at:
-//
-//	 http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-// License for the specific language governing permissions and limitations
-// under the License.
-// ----------------------------------------------------------------------------
-
-/**
- * @brief Intrinsics for Armv7 NEON.
- *
- * This module implements a few Armv7-compatible intrinsics indentical to Armv8
- * ones. Thus, astcenc can be compiled using Armv7 architecture.
- */
-
-#ifndef ASTC_VECMATHLIB_NEON_ARMV7_4_H_INCLUDED
-#define ASTC_VECMATHLIB_NEON_ARMV7_4_H_INCLUDED
-
-#ifndef ASTCENC_SIMD_INLINE
-	#error "Include astcenc_vecmathlib.h, do not include directly"
-#endif
-
-//#include <algorithm>
-#include <cfenv>
-
-
-// arm-linux-gnueabi-gcc contains the following functions by using
-// #pragma GCC target ("fpu=neon-fp-armv8"), while clang does not.
-#if defined(__clang__)
-
-/**
- * @brief Return the max vector of two vectors.
- *
- * If one vector element is numeric and the other is a quiet NaN,
- * the result placed in the vector is the numerical value.
- */
-ASTCENC_SIMD_INLINE float32x4_t vmaxnmq_f32(float32x4_t a, float32x4_t b)
-{
-	uint32x4_t amask = vceqq_f32(a, a);
-	uint32x4_t bmask = vceqq_f32(b, b);
-	a = vbslq_f32(amask, a, b);
-	b = vbslq_f32(bmask, b, a);
-	return vmaxq_f32(a, b);
-}
-
-/**
- * @brief Return the min vector of two vectors.
- *
- * If one vector element is numeric and the other is a quiet NaN,
- * the result placed in the vector is the numerical value.
- */
-ASTCENC_SIMD_INLINE float32x4_t vminnmq_f32(float32x4_t a, float32x4_t b)
-{
-	uint32x4_t amask = vceqq_f32(a, a);
-	uint32x4_t bmask = vceqq_f32(b, b);
-	a = vbslq_f32(amask, a, b);
-	b = vbslq_f32(bmask, b, a);
-	return vminq_f32(a, b);
-}
-
-/**
- * @brief Return a float rounded to the nearest integer value.
- */
-ASTCENC_SIMD_INLINE float32x4_t vrndnq_f32(float32x4_t a)
-{
-	assert(std::fegetround() == FE_TONEAREST);
-	float a0 = std::nearbyintf(vgetq_lane_f32(a, 0));
-	float a1 = std::nearbyintf(vgetq_lane_f32(a, 1));
-	float a2 = std::nearbyintf(vgetq_lane_f32(a, 2));
-	float a3 = std::nearbyintf(vgetq_lane_f32(a, 3));
-	float32x4_t c { a0, a1, a2, a3 };
-	return c;
-}
-
-#endif
-
-/**
- * @brief Return the horizontal maximum of a vector.
- */
-ASTCENC_SIMD_INLINE float vmaxvq_f32(float32x4_t a)
-{
-	float a0 = vgetq_lane_f32(a, 0);
-	float a1 = vgetq_lane_f32(a, 1);
-	float a2 = vgetq_lane_f32(a, 2);
-	float a3 = vgetq_lane_f32(a, 3);
-	return std::max(std::max(a0, a1), std::max(a2, a3));
-}
-
-/**
- * @brief Return the horizontal maximum of a vector.
- */
-ASTCENC_SIMD_INLINE float vminvq_f32(float32x4_t a)
-{
-	float a0 = vgetq_lane_f32(a, 0);
-	float a1 = vgetq_lane_f32(a, 1);
-	float a2 = vgetq_lane_f32(a, 2);
-	float a3 = vgetq_lane_f32(a, 3);
-	return std::min(std::min(a0, a1), std::min(a2, a3));
-}
-
-/**
- * @brief Return the horizontal maximum of a vector.
- */
-ASTCENC_SIMD_INLINE int32_t vmaxvq_s32(int32x4_t a)
-{
-	int32_t a0 = vgetq_lane_s32(a, 0);
-	int32_t a1 = vgetq_lane_s32(a, 1);
-	int32_t a2 = vgetq_lane_s32(a, 2);
-	int32_t a3 = vgetq_lane_s32(a, 3);
-	return std::max(std::max(a0, a1), std::max(a2, a3));
-}
-
-/**
- * @brief Return the horizontal maximum of a vector.
- */
-ASTCENC_SIMD_INLINE int32_t vminvq_s32(int32x4_t a)
-{
-	int32_t a0 = vgetq_lane_s32(a, 0);
-	int32_t a1 = vgetq_lane_s32(a, 1);
-	int32_t a2 = vgetq_lane_s32(a, 2);
-	int32_t a3 = vgetq_lane_s32(a, 3);
-	return std::min(std::min(a0, a1), std::min(a2, a3));
-}
-
-/**
- * @brief Return the sqrt of the lanes in the vector.
- */
-ASTCENC_SIMD_INLINE float32x4_t vsqrtq_f32(float32x4_t a)
-{
-	float a0 = std::sqrt(vgetq_lane_f32(a, 0));
-	float a1 = std::sqrt(vgetq_lane_f32(a, 1));
-	float a2 = std::sqrt(vgetq_lane_f32(a, 2));
-	float a3 = std::sqrt(vgetq_lane_f32(a, 3));
-	float32x4_t c { a0, a1, a2, a3 };
-	return c;
-}
-
-/**
- * @brief Vector by vector division.
- */
-ASTCENC_SIMD_INLINE float32x4_t vdivq_f32(float32x4_t a, float32x4_t b)
-{
-	float a0 = vgetq_lane_f32(a, 0), b0 = vgetq_lane_f32(b, 0);
-	float a1 = vgetq_lane_f32(a, 1), b1 = vgetq_lane_f32(b, 1);
-	float a2 = vgetq_lane_f32(a, 2), b2 = vgetq_lane_f32(b, 2);
-	float a3 = vgetq_lane_f32(a, 3), b3 = vgetq_lane_f32(b, 3);
-	float32x4_t c { a0 / b0, a1 / b1, a2 / b2, a3 / b3 };
-	return c;
-}
-
-/**
- * @brief Table vector lookup.
- */
-ASTCENC_SIMD_INLINE int8x16_t vqtbl1q_s8(int8x16_t t, uint8x16_t idx)
-{
-	int8x8x2_t tab;
-	tab.val[0] = vget_low_s8(t);
-	tab.val[1] = vget_high_s8(t);
-	int8x16_t id = vreinterpretq_s8_u8(idx);
-	return vcombine_s8(
-		vtbl2_s8(tab, vget_low_s8(id)),
-		vtbl2_s8(tab, vget_high_s8(id)));
-}
-
-/**
- * @brief Horizontal integer addition.
- */
-ASTCENC_SIMD_INLINE uint32_t vaddvq_u32(uint32x4_t a)
-{
-	uint32_t a0 = vgetq_lane_u32(a, 0);
-	uint32_t a1 = vgetq_lane_u32(a, 1);
-	uint32_t a2 = vgetq_lane_u32(a, 2);
-	uint32_t a3 = vgetq_lane_u32(a, 3);
-	return a0 + a1 + a2 + a3;
-}
-
-#endif

From 2c70fe697e70f2fdf4b88d9a022c07eaee545e19 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 17 Mar 2022 23:29:16 -0700
Subject: [PATCH 272/901] kramv - add tests/src/CircleGlow.png

Some encoders seemed to be having difficulty encoding images like this.
---
 tests/src/CircleGlow.png | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 tests/src/CircleGlow.png

diff --git a/tests/src/CircleGlow.png b/tests/src/CircleGlow.png
new file mode 100644
index 00000000..bee344d7
--- /dev/null
+++ b/tests/src/CircleGlow.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dcef7e7e7120d45486050d6fed05bada31afcc3eb91befc2bce82d0d0d5f7315
+size 71466

From 16dcdaa64436dc92cdf8bf23d963793365bdff44 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 18 Mar 2022 00:05:51 -0700
Subject: [PATCH 273/901] kramv - update tests/src/CircleGlow

---
 tests/src/CircleGlow-d.png | 3 +++
 tests/src/CircleGlow.png   | 3 ---
 2 files changed, 3 insertions(+), 3 deletions(-)
 create mode 100644 tests/src/CircleGlow-d.png
 delete mode 100644 tests/src/CircleGlow.png

diff --git a/tests/src/CircleGlow-d.png b/tests/src/CircleGlow-d.png
new file mode 100644
index 00000000..13fb5f39
--- /dev/null
+++ b/tests/src/CircleGlow-d.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6e999d2885737daf95edb8200adc0efce4b9510cfc639af5a6a5e630f5436784
+size 52623
diff --git a/tests/src/CircleGlow.png b/tests/src/CircleGlow.png
deleted file mode 100644
index bee344d7..00000000
--- a/tests/src/CircleGlow.png
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:dcef7e7e7120d45486050d6fed05bada31afcc3eb91befc2bce82d0d0d5f7315
-size 71466

From 71b4fe6ac2c66e3966678423a89efc6e8348ff2c Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 19 Mar 2022 10:50:04 -0700
Subject: [PATCH 274/901] kramv - improve button readability, add flag for
 faster astc encode,

---
 kramv/KramRenderer.mm           |  3 ++
 kramv/KramViewerMain.mm         | 78 ++++++++++++++++++++++++++++++++-
 kramv/Shaders/KramShaders.metal | 19 ++++++--
 libkram/kram/KramImage.cpp      | 15 +++++++
 4 files changed, 111 insertions(+), 4 deletions(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 2804bbed..54036f24 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -528,6 +528,9 @@ - (void)_createComputePipelines
     pipelineStateDescriptor.colorAttachments[0].pixelFormat =
         _viewFramebuffer.colorPixelFormat;
 
+    // Note: blending is disabled on color target, all blending done in shader
+    // since have checkerboard and other stuff to blend against.
+    
     // TODO: could drop these for images, but want a 3D preview of content
     // or might make these memoryless.
     pipelineStateDescriptor.depthAttachmentPixelFormat =
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 38bc775a..670d1b53 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -511,6 +511,8 @@ @implementation MyMTKView {
     Action* _actionArray;
     Action* _actionItem;
     Action* _actionPrevItem;
+    Action* _actionCounterpart;
+    Action* _actionPrevCounterpart;
     Action* _actionReload;
     Action* _actionFit;
     
@@ -656,6 +658,9 @@ - (NSStackView *)_addButtons
         
         Action("↑", "Prev Item", Key::UpArrow),
         Action("↓", "Next Item", Key::DownArrow),
+        Action("←", "Prev Counterpart", Key::LeftArrow),
+        Action("→", "Next Counterpart", Key::RightArrow),
+        
         Action("R", "Reload", Key::R),
         Action("0", "Fit", Key::Num0),
 
@@ -700,6 +705,9 @@ - (NSStackView *)_addButtons
         
         &_actionPrevItem,
         &_actionItem,
+        &_actionPrevCounterpart,
+        &_actionCounterpart,
+        
         &_actionReload,
         &_actionFit,
         
@@ -744,6 +752,35 @@ - (NSStackView *)_addButtons
         button.bordered = NO;
         [button setFrame:rect];
 
+        // https://stackoverflow.com/questions/4467597/how-do-you-stroke-the-outside-of-an-nsattributedstring
+        
+        NSMutableDictionary* attribs = [NSMutableDictionary dictionaryWithObjectsAndKeys:
+            //[NSFont systemFontOfSize:64.0],NSFontAttributeName,
+            [NSColor whiteColor],NSForegroundColorAttributeName,
+            [NSNumber numberWithFloat:-2.0],NSStrokeWidthAttributeName,
+            [NSColor blackColor],NSStrokeColorAttributeName,
+            nil];
+
+        button.attributedTitle = [[NSMutableAttributedString alloc] initWithString:name attributes:attribs];
+        
+#if 0 // this isn't appearing
+        button.wantsLayer = YES;
+        if (button.layer) {
+//            CGFloat glowColor[] = {1.0, 0.0, 0.0, 1.0};
+//            button.layer.masksToBounds = false;
+//            button.layer.shadowColor = CGColorCreate(CGColorSpaceCreateDeviceRGB(), glowColor);
+//            button.layer.shadowRadius = 10.0;
+//            button.layer.shadowOpacity = 1.0;
+//            //button.layer.shadowOffset = .zero;
+            
+            NSShadow* dropShadow = [[NSShadow alloc] init];
+            [dropShadow setShadowColor:[NSColor redColor]];
+            [dropShadow setShadowOffset:NSMakeSize(0, 0)];
+            [dropShadow setShadowBlurRadius:10.0];
+            [button setShadow: dropShadow];
+        }
+#endif
+        
         // stackView seems to disperse the items evenly across the area, so this
         // doesn't work
         bool isSeparator = icon[0] == 0;
@@ -851,6 +888,8 @@ - (NSStackView *)_addButtons
     // don't want these buttons showing up, menu only
     _actionPrevItem->disableButton();
     _actionItem->disableButton();
+    _actionPrevCounterpart->disableButton();
+    _actionCounterpart->disableButton();
     
     _actionHud->disableButton();
     _actionHelp->disableButton();
@@ -1789,6 +1828,9 @@ - (void)updateUIAfterLoad
     _actionItem->setHidden(isJumpToNextHidden);
     _actionPrevItem->setHidden(isJumpToNextHidden);
     
+    _actionCounterpart->setHidden(isJumpToNextHidden);
+    _actionPrevCounterpart->setHidden(isJumpToNextHidden);
+    
     _actionR->setHidden(isRedHidden);
     _actionG->setHidden(isGreenHidden);
     _actionB->setHidden(isBlueHidden);
@@ -1858,6 +1900,9 @@ - (void)updateUIControlState
     _actionItem->setHighlight(Off);
     _actionPrevItem->setHighlight(Off);
     
+    _actionCounterpart->setHighlight(Off);
+    _actionPrevCounterpart->setHighlight(Off);
+    
     _actionHideUI->setHighlight(uiState); // note below button always off, menu has state
     
     _actionR->setHighlight(redState);
@@ -2111,7 +2156,8 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
             "Info, Hud, Reload, 0-fit\n"
             "Checker, Grid\n"
             "Wrap, 8-signed, 9-premul\n"
-            "Mip, Face, Y-array, ↓-next item\n"
+            "Mip, Face, Y-array\n"
+            "↓-next item, →-next counterpart\n"
             "Lighting, S-shape, C-shape channel\n";
         
         // just to update toggle state to Off
@@ -2338,6 +2384,36 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
         }
     }
 
+    else if (action == _actionCounterpart || action == _actionPrevCounterpart) {
+        if (!action->isHidden) {
+            // invert shift key for prev, since it's reversese
+            if (action == _actionPrevCounterpart)
+                isShiftKeyDown = !isShiftKeyDown;
+            
+            /* Archive probably only holds one type of file, so no counterparts
+            if (_showSettings->isArchive) {
+                if ([self advanceCounterpartFromAchive:!isShiftKeyDown]) {
+                    _hudHidden = true;
+                    [self updateHudVisibility];
+                    
+                    isChanged = true;
+                    text = "Loaded " + _showSettings->lastFilename;
+                }
+            }
+            else */
+            
+            /* TODO: finish this, should only cycle through counterpart files
+                those are files with same name but different extension under the same folder.
+            if (_showSettings->isFolder) {
+                if ([self advanceCounterpartFromFolder:!isShiftKeyDown]) {
+                    isChanged = true;
+                    text = "Loaded " + _showSettings->lastFilename;
+                }
+            }
+            */
+        }
+    }
+    
     // test out different shapes
     else if (action == _actionShapeMesh) {
         if (_showSettings->meshCount > 1) {
diff --git a/kramv/Shaders/KramShaders.metal b/kramv/Shaders/KramShaders.metal
index 353c9000..175bd193 100644
--- a/kramv/Shaders/KramShaders.metal
+++ b/kramv/Shaders/KramShaders.metal
@@ -144,6 +144,19 @@ half4 toPremul(half4 c) {
     return c;
 }
 
+float4 fromPremul(float4 c) {
+    // TODO: should this saturate, or how does premul and HDR work?
+    if (c.a >= 1.0/255.0)
+        c.rgb /= c.a;
+    return c;
+}
+half4 fromPremul(half4 c) {
+    if (c.a >= 1.0h/255.0h)
+        c.rgb /= c.a;
+    return c;
+}
+
+
 // TODO: note that Metal must pass the same half3 from vertex to fragment shader
 // so can't mix a float vs with half fs.
 
@@ -954,7 +967,7 @@ float4 DrawPixels(
             
             // to premul, but also need to see without premul
             if (uniforms.isPremul) {
-                c.xyz *= c.a;
+                c = toPremul(c);
             }
         }
         
@@ -1004,7 +1017,7 @@ float4 DrawPixels(
                 // Note: premul on signed should occur while still signed, since it's a pull to zoer
                 // to premul, but also need to see without premul
                 if (uniforms.isPremul) {
-                    c.xyz *= c.a;
+                    c = toPremul(c);
                 }
                 
                 sc = c;
@@ -1012,7 +1025,7 @@ float4 DrawPixels(
             }
             else {
                 if (uniforms.isPremul) {
-                    c.xyz *= c.a;
+                    c = toPremul(c);
                 }
             }
             
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index 8bcd1a35..d77f9a80 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -2856,6 +2856,8 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
                     ASTCENC_PRF_HDR;  // TODO: also ASTCENC_PRF_HDR_RGB_LDR_A
             }
 
+            
+            
             // not generating 3d ASTC ever, even for 3D textures
             //Int2 blockDims = image.blockDims();
 
@@ -2875,6 +2877,19 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
             // don't really need this
             // flags |= ASTCENC_FLG_USE_ALPHA_WEIGHT;
 
+            // Pete Harris recommended this flag https://github.com/alecazam/kram/issues/10
+            // This allows the codec to repack the data tables with only the entries it needs
+            // for the compression configuration in use.
+            //
+            // However, it cannot reliably decompress arbitrary images as the table entries
+            // are missing for things the current compressor configuration isn't using, so only
+            // do this for compression-only contexts or cases where you are decompressing images
+            // made in the same compressor session.
+            //
+            // Using this option for compression give 10-20% more performance, depending on
+            // block size, so is highly recommended.
+            flags |= ASTCENC_FLG_SELF_DECOMPRESS_ONLY;
+            
             // convert quality to present
             float quality = info.quality;
 

From 26482a6e0d35ef085013b1767091b21af04faf7c Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 2 Apr 2022 11:57:04 -0700
Subject: [PATCH 275/901] kram - add fixup command

Image export to png is fraught with complications.  PS2022 finally added "save as sRGB" to the png export for Save As, but doesn't actually set the sRGB flag. They set iCCP, gAMA, and cHRM blocks.  iccp is pages of code to interpret, and not clear how you identify one sRGB from another.  But can identify from gamma or chrm block.  So strip all blocks from files, and inject an sRGB block back into the PNG.  Then can run ImageOptim with strip blocks turned off.    Want source content to correctly and simply identify sRGB.

Turn on the lodepng encoder paths, so can overwrite the png.  Should really use temp file, but source control on source content should suffice.

kram fixup -srgb -i filename.png
---
 kramv/KramViewerMain.mm    |   2 +-
 libkram/kram/Kram.cpp      | 259 ++++++++++++++++++++++++++++++-------
 libkram/kram/KramImage.cpp |   9 +-
 libkram/kram/KramImage.h   |  19 ++-
 libkram/lodepng/lodepng.h  |   8 +-
 5 files changed, 243 insertions(+), 54 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 670d1b53..2feccd6f 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -2390,7 +2390,7 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
             if (action == _actionPrevCounterpart)
                 isShiftKeyDown = !isShiftKeyDown;
             
-            /* Archive probably only holds one type of file, so no counterparts
+            /* Archive probably only holds one type of file, could pull in zips?
             if (_showSettings->isArchive) {
                 if ([self advanceCounterpartFromAchive:!isShiftKeyDown]) {
                     _hudHidden = true;
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 0a223df1..60efa690 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -402,8 +402,8 @@ bool LoadKtx(const uint8_t* data, size_t dataSize, Image& sourceImage)
     return sourceImage.loadImageFromKTX(image);
 }
 
-// wrap miniz decoder, since it ignores crc checksum and is faster than default png
-unsigned LodepngDeflateUsingMiniz(
+// wrap miniz decompress, since it ignores crc checksum and is faster than default png
+unsigned LodepngDecompressUsingMiniz(
     unsigned char** dstData, size_t* dstDataSize,
     const unsigned char* srcData, size_t srcDataSize,
     const LodePNGDecompressSettings* settings)
@@ -419,6 +419,24 @@ unsigned LodepngDeflateUsingMiniz(
     return result;
 }
 
+// wrap miniz compress
+unsigned LodepngCompressUsingMiniz(
+    unsigned char** dstData, size_t* dstDataSize,
+    const unsigned char* srcData, size_t srcDataSize,
+    const LodePNGCompressSettings* settings)
+{
+    // mz_ulong doesn't line up with size_t on Windows, but does on macOS
+    mz_ulong dstDataSizeUL = *dstDataSize;
+
+    int result = mz_compress(*dstData, &dstDataSizeUL,
+                               srcData, srcDataSize);
+
+    *dstDataSize = dstDataSizeUL;
+
+    return result;
+}
+
+
 //-----------------------
 
 // TODO: fix this to identify srgb, otherwise will skip GAMA block
@@ -595,7 +613,7 @@ bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray
 
     // Point deflate on decoder to faster version in miniz.
     auto& settings = lodepng_default_decompress_settings;
-    settings.custom_zlib = LodepngDeflateUsingMiniz;
+    settings.custom_zlib = LodepngDecompressUsingMiniz;
 
     // can identify 16unorm data for heightmaps via this call
     LodePNGState state;
@@ -615,10 +633,24 @@ bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray
     if (!end)
         end = data + dataSize;
     
+    bool hasNonSrgbBlocks = false;
+    bool hasSrgbBlock = false;
+    {
+        // Apps like Photoshop never set sRGB block
+        hasNonSrgbBlocks =
+            lodepng_chunk_find_const(data, end, "iCCP") != nullptr ||
+            lodepng_chunk_find_const(data, end, "gAMA") != nullptr ||
+            lodepng_chunk_find_const(data, end, "cHRM") != nullptr;
+        
+        // Apps like Figma always set this
+        hasSrgbBlock = lodepng_chunk_find_const(data, end, "sRGB") != nullptr;
+    }
+    
     const uint8_t* chunkData = lodepng_chunk_find_const(data, end, "sRGB");
     if (chunkData) {
         lodepng_inspect_chunk(&state, chunkData - data, data, end-data);
         isSrgb = state.info_png.srgb_defined;
+        //state.info_png.srgb_intent; // 0-3
     }
     
     if (doParseIccProfile && !chunkData) {
@@ -737,9 +769,77 @@ bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray
         }
     }
 
+    sourceImage.setSrgbState(isSrgb, hasSrgbBlock, hasNonSrgbBlocks);
+    
     return sourceImage.loadImageFromPixels(pixels, width, height, hasColor, hasAlpha);
 }
 
+// Use this to fix the src png, will only have a single block with srgb or not.
+// Can then run ImageOptim on it, with block preservation set.
+// Need this since Photoshop refuses to save the srgb flag, and stuff a giant
+// ICCP block which isn't easy to parse, and a Gama/Chrm block which is easier.
+// Really just want all png to either specify srgb block or not.  Don't need other
+// blocks.
+bool SavePNG(Image& image, const char* filename)
+{
+    // TODO: would be nice to skip this work if the blocks are already
+    // removed (could detect no iccp/chrm/gama in src png, and only srgb or no).
+    // Then if srgb, see if that matches content type srgb state below.
+    TexContentType contentType = findContentTypeFromFilename(filename);
+    bool isSrgb = contentType == TexContentTypeAlbedo;
+
+    // Skip file if it has srgb block, and none of the other block types.
+    // This code will also strip the sRGB block from apps like Figma that always set it.
+    if (isSrgb == image.isSrgb()) {
+        if (isSrgb == image.hasSrgbBlock() && !image.hasNonSrgbBlocks()) {
+            KLOGI("Kram", "skipping srgb correction");
+            return true;
+        }
+    }
+    
+    // This is the only block written or not
+    lodepng::State state;
+    if (isSrgb) {
+        // this is the only block that gets written
+        state.info_png.srgb_defined = 1;
+        state.info_png.srgb_intent = 0;
+    }
+    
+    // TODO: could write other data into Txt block
+    // or try to preserve those
+    
+    // TODO: image converted to 32-bit, so will save out large ?
+    // Can we write out L, LA, RGB, RGBA based on image state?
+ 
+    // use miniz as the encoder
+    auto& settings = lodepng_default_compress_settings;
+    settings.custom_zlib = LodepngCompressUsingMiniz;
+
+    // encode to png
+    vector<unsigned char> outputData;
+    unsigned success = lodepng::encode(outputData, (const uint8_t*)(image.pixels().data()), image.width(), image.height(), state);
+    
+    if (!success) {
+        return false;
+    }
+    
+    FileHelper fileHelper;
+    if (!fileHelper.open(filename, "w+")) {
+        return false;
+    }
+    
+    // this is overrwriting the source file currently
+    // TODO: could use tmp file, and then replace existing
+    // this could destroy original png on failure otherwise
+    if (!fileHelper.write((const uint8_t*)outputData.data(), outputData.size())) {
+        return false;
+    }
+    
+    KLOGI("Kram", "saved %s %s sRGB block", filename, isSrgb ? "with" : "without");
+    
+    return true;
+}
+
 bool SetupTmpFile(FileHelper& tmpFileHelper, const char* suffix)
 {
     return tmpFileHelper.openTemporaryFile(suffix, "w+b");
@@ -1470,6 +1570,18 @@ void kramDecodeUsage(bool showVersion = true)
           showVersion ? usageName : "");
 }
 
+void kramFixUsage(bool showVersion = true)
+{
+    KLOGI("Kram",
+          "%s\n"
+          "Usage: kram fixup\n"
+          "\t -i/nput <.png>\n"
+          "\t -srgb\n"
+          "\n",
+          showVersion ? usageName : "");
+}
+
+
 void kramInfoUsage(bool showVersion = true)
 {
     KLOGI("Kram",
@@ -1717,7 +1829,6 @@ static int32_t kramAppInfo(vector<const char*>& args)
             }
 
             dstFilename = args[i];
-            //continue;
         }
         else if (isStringEqual(word, "-input") ||
                  isStringEqual(word, "-i")) {
@@ -1729,7 +1840,6 @@ static int32_t kramAppInfo(vector<const char*>& args)
             }
 
             srcFilename = args[i];
-            //continue;
         }
         else if (isStringEqual(word, "-v") ||
                  isStringEqual(word, "-verbose")) {
@@ -1870,7 +1980,7 @@ string kramInfoPNGToString(const string& srcFilename, const uint8_t* data, uint6
     uint32_t errorLode = 0;
 
     auto& settings = lodepng_default_decompress_settings;
-    settings.custom_zlib = LodepngDeflateUsingMiniz;
+    settings.custom_zlib = LodepngDecompressUsingMiniz;
 
     // can identify 16unorm data for heightmaps via this call
     LodePNGState state;
@@ -1894,6 +2004,7 @@ string kramInfoPNGToString(const string& srcFilename, const uint8_t* data, uint6
     if (chunkData) {
         lodepng_inspect_chunk(&state, chunkData - data, data, end-data);
         isSrgb = state.info_png.srgb_defined;
+        //state.info_png.srgb_intent; // 0-3
     }
     
     // Adobe Photoshop 2022 only sets iccp + gama instead of sRGB flag, but iccp takes
@@ -2239,7 +2350,6 @@ static int32_t kramAppDecode(vector<const char*>& args)
 
             // TODO: if args ends with /, then output to that dir
             dstFilename = args[i];
-            //continue;
         }
         else if (isStringEqual(word, "-input") ||
                  isStringEqual(word, "-i")) {
@@ -2251,7 +2361,6 @@ static int32_t kramAppDecode(vector<const char*>& args)
             }
 
             srcFilename = args[i];
-            //continue;
         }
 
         else if (isStringEqual(word, "-swizzle")) {
@@ -2269,7 +2378,6 @@ static int32_t kramAppDecode(vector<const char*>& args)
                 break;
             }
             swizzleText = swizzleString;
-            //continue;
         }
         // this is really decoder, but keep same argument as encoder
         else if (isStringEqual(word, "-e") ||
@@ -2282,14 +2390,12 @@ static int32_t kramAppDecode(vector<const char*>& args)
             }
 
             textureDecoder = parseEncoder(args[i]);
-            //continue;
         }
 
         // probably should be per-command and global verbose
         else if (isStringEqual(word, "-v") ||
                  isStringEqual(word, "-verbose")) {
             isVerbose = true;
-            //continue;
         }
         else {
             KLOGE("Kram", "unexpected argument \"%s\"\n",
@@ -2378,6 +2484,90 @@ static int32_t kramAppDecode(vector<const char*>& args)
     return success ? 0 : -1;
 }
 
+int32_t kramAppFixup(vector<const char*>& args)
+{
+    // this is help
+    int32_t argc = (int32_t)args.size();
+    if (argc == 0) {
+        kramFixUsage();
+        return 0;
+    }
+
+    string srcFilename;
+    bool doFixupSrgb = false;
+    bool error = false;
+    
+    for (int32_t i = 0; i < argc; ++i) {
+        // check for options
+        const char* word = args[i];
+        if (word[0] != '-') {
+            KLOGE("Kram", "unexpected argument \"%s\"\n",
+                  word);
+            error = true;
+            break;
+        }
+        
+        // TDOO: may want to add output command too
+        
+        if (isStringEqual(word, "-srgb")) {
+            doFixupSrgb = true;
+        }
+        else if (isStringEqual(word, "-input") ||
+                 isStringEqual(word, "-i")) {
+            ++i;
+            if (i >= argc) {
+                KLOGE("Kram", "no input file defined");
+                error = true;
+                break;
+            }
+
+            srcFilename = args[i];
+        }
+        else {
+            KLOGE("Kram", "unexpected argument \"%s\"\n",
+                  word);
+            error = true;
+            break;
+        }
+    }
+        
+    if (srcFilename.empty()) {
+        KLOGE("Kram", "no input file given\n");
+        error = true;
+    }
+        
+    if (doFixupSrgb) {
+        bool isPNG = isPNGFilename(srcFilename);
+
+        if (!isPNG) {
+            KLOGE("Kram", "fixup srgb only supports png input");
+            error = true;
+        }
+        
+        bool success = !error;
+        
+        Image srcImage;
+        
+        // load the png, this doesn't return srgb state of original png
+        if (success)
+            success = SetupSourceImage(srcFilename, srcImage);
+
+        // stuff srgb block based on filename to content conversion for now
+        if (success) {
+            success = SavePNG(srcImage, srcFilename.c_str());
+            
+            if (!success) {
+                KLOGE("Kram", "fixup srgb could not save to file");
+            }
+        }
+        
+        if (!success)
+            error = true;
+    }
+    
+    return error ? -1 : 0;
+}
+
 static int32_t kramAppEncode(vector<const char*>& args)
 {
     // this is help
@@ -2410,15 +2600,12 @@ static int32_t kramAppEncode(vector<const char*>& args)
 
         if (isStringEqual(word, "-sdf")) {
             infoArgs.doSDF = true;
-            //continue;
         }
         else if (isStringEqual(word, "-optopaque")) {
             infoArgs.optimizeFormatForOpaque = true;
-            //continue;
         }
         else if (isStringEqual(word, "-gray")) {
             isGray = true;
-            //continue;
         }
 
         // mip setting
@@ -2436,8 +2623,6 @@ static int32_t kramAppEncode(vector<const char*>& args)
                 error = true;
                 break;
             }
-
-            //continue;
         }
         else if (isStringEqual(word, "-mipmin")) {
             ++i;
@@ -2453,7 +2638,6 @@ static int32_t kramAppEncode(vector<const char*>& args)
                 error = true;
                 break;
             }
-            //continue;
         }
         else if (isStringEqual(word, "-mipskip")) {
             ++i;
@@ -2469,13 +2653,10 @@ static int32_t kramAppEncode(vector<const char*>& args)
                 error = true;
                 break;
             }
-
-            //continue;
         }
         else if (isStringEqual(word, "-mipnone")) {
             // disable mips even if pow2
             infoArgs.doMipmaps = false;
-            //continue;
         }
 
         else if (isStringEqual(word, "-heightScale")) {
@@ -2494,17 +2675,14 @@ static int32_t kramAppEncode(vector<const char*>& args)
                 KLOGE("Kram", "heightScale arg cannot be 0");
                 error = true;
             }
-            //continue;
         }
         else if (isStringEqual(word, "-height")) {
             // converted to a normal map
             infoArgs.isHeight = true;
-            //continue;
         }
         else if (isStringEqual(word, "-wrap")) {
             // whether texture is clamp or wrap
             infoArgs.isWrap = true;
-            //continue;
         }
 
         else if (isStringEqual(word, "-e") ||
@@ -2517,7 +2695,6 @@ static int32_t kramAppEncode(vector<const char*>& args)
             }
 
             infoArgs.textureEncoder = parseEncoder(args[i]);
-            //continue;
         }
 
         else if (isStringEqual(word, "-swizzle")) {
@@ -2535,7 +2712,6 @@ static int32_t kramAppEncode(vector<const char*>& args)
                 break;
             }
             infoArgs.swizzleText = swizzleString;
-            //continue;
         }
 
         else if (isStringEqual(word, "-chunks")) {
@@ -2561,8 +2737,6 @@ static int32_t kramAppEncode(vector<const char*>& args)
             infoArgs.chunksX = chunksX;
             infoArgs.chunksY = chunksY;
             infoArgs.chunksCount = chunksX * chunksY;
-
-            //continue;
         }
 
         else if (isStringEqual(word, "-avg")) {
@@ -2574,7 +2748,6 @@ static int32_t kramAppEncode(vector<const char*>& args)
                 break;
             }
             infoArgs.averageChannels = channelString;
-            //continue;
         }
         else if (isStringEqual(word, "-type")) {
             ++i;
@@ -2585,7 +2758,6 @@ static int32_t kramAppEncode(vector<const char*>& args)
             }
 
             infoArgs.textureType = parseTextureType(args[i]);
-            //continue;
         }
         else if (isStringEqual(word, "-quality")) {
             ++i;
@@ -2596,7 +2768,6 @@ static int32_t kramAppEncode(vector<const char*>& args)
             }
 
             infoArgs.quality = atoi(args[i]);
-            //continue;
         }
 
         else if (isStringEqual(word, "-output") ||
@@ -2610,7 +2781,6 @@ static int32_t kramAppEncode(vector<const char*>& args)
 
             // TODO: if args ends with /, then output to that dir
             dstFilename = args[i];
-            //continue;
         }
         else if (isStringEqual(word, "-input") ||
                  isStringEqual(word, "-i")) {
@@ -2622,29 +2792,24 @@ static int32_t kramAppEncode(vector<const char*>& args)
             }
 
             srcFilename = args[i];
-            //continue;
         }
 
         // these affect the format
         else if (isStringEqual(word, "-hdr")) {
             // not validating format for whether it's srgb or not
             infoArgs.isHDR = true;
-            //continue;
         }
         else if (isStringEqual(word, "-srgb")) {
             // not validating format for whether it's srgb or not
             infoArgs.isSRGB = true;
-            //continue;
         }
         else if (isStringEqual(word, "-signed")) {
             // not validating format for whether it's signed or not
             infoArgs.isSigned = true;
-            //continue;
         }
 
         else if (isStringEqual(word, "-normal")) {
             infoArgs.isNormal = true;
-            //continue;
         }
         else if (isStringEqual(word, "-resize")) {
             ++i;
@@ -2655,7 +2820,6 @@ static int32_t kramAppEncode(vector<const char*>& args)
             }
 
             resizeString = args[i];
-            //continue;
         }
 
         // This means to post-multiply alpha after loading, not that incoming data in already premul
@@ -2663,22 +2827,18 @@ static int32_t kramAppEncode(vector<const char*>& args)
         // really would prefer to premul them when building the texture.
         else if (isStringEqual(word, "-premul")) {
             infoArgs.isPremultiplied = true;
-            //continue;
         }
         else if (isStringEqual(word, "-prezero")) {
             infoArgs.isPrezero = true;
-            //continue;
         }
         // this means premul the data at read from srgb, this it to match photoshop
         else if (isStringEqual(word, "-premulrgb")) {
             isPremulRgb = true;
-            //continue;
         }
 
         else if (isStringEqual(word, "-v") ||
                  isStringEqual(word, "-verbose")) {
             infoArgs.isVerbose = true;
-            //continue;
         }
         else if (isStringEqual(word, "-f") ||
                  isStringEqual(word, "-format")) {
@@ -2690,7 +2850,6 @@ static int32_t kramAppEncode(vector<const char*>& args)
             }
 
             infoArgs.formatString = args[i];
-            //continue;
         }
 
         // compressor for ktx2 mips
@@ -2704,8 +2863,6 @@ static int32_t kramAppEncode(vector<const char*>& args)
                 break;
             }
             infoArgs.compressor.compressorLevel = atoi(args[i]);
-
-            //continue;
         }
         else if (isStringEqual(word, "-zlib")) {
             infoArgs.compressor.compressorType = KTX2SupercompressionZlib;
@@ -2716,7 +2873,6 @@ static int32_t kramAppEncode(vector<const char*>& args)
                 break;
             }
             infoArgs.compressor.compressorLevel = atoi(args[i]);
-            //continue;
         }
         else {
             KLOGE("Kram", "unexpected argument \"%s\"\n",
@@ -2973,6 +3129,9 @@ static int32_t kramAppEncode(vector<const char*>& args)
     return success ? 0 : -1;
 }
 
+
+                   
+                   
 int32_t kramAppScript(vector<const char*>& args)
 {
     // this is help
@@ -3014,7 +3173,6 @@ int32_t kramAppScript(vector<const char*>& args)
             }
 
             srcFilename = args[i];
-            //continue;
         }
         else if (isStringEqual(word, "-jobs") ||
                  isStringEqual(word, "-j")) {
@@ -3027,7 +3185,6 @@ int32_t kramAppScript(vector<const char*>& args)
             }
 
             numJobs = atoi(args[i]);
-            //continue;
         }
         else if (isStringEqual(word, "-v") ||
                  isStringEqual(word, "-verbose")) {
@@ -3180,6 +3337,7 @@ enum CommandType {
     kCommandTypeDecode,
     kCommandTypeInfo,
     kCommandTypeScript,
+    kCommandTypeFixup,
     // TODO: more commands, but scripting doesn't deal with failure or dependency
     //    kCommandTypeMerge, // combine channels from multiple png/ktx into one ktx
     //    kCommandTypeAtlas, // combine images into a single texture + atlas table (atlas to 2d or 2darray)
@@ -3202,7 +3360,9 @@ CommandType parseCommandType(const char* command)
     else if (isStringEqual(command, "script")) {
         commandType = kCommandTypeScript;
     }
-
+    else if (isStringEqual(command, "fixup")) {
+        commandType = kCommandTypeFixup;
+    }
     return commandType;
 }
 
@@ -3300,6 +3460,9 @@ int32_t kramAppCommand(vector<const char*>& args)
         case kCommandTypeScript:
             args.erase(args.begin());
             return kramAppScript(args);
+        case kCommandTypeFixup:
+            args.erase(args.begin());
+            return kramAppFixup(args);
         default:
             break;
     }
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index d77f9a80..d6694ac3 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -453,7 +453,7 @@ bool Image::loadImageFromPixels(const vector<Color>& pixels, int32_t width,
     // Format can also affect this, since 1/2 channel don't have color or alpha.
     _hasColor = hasColor;  // grayscale or no rgb when false
     _hasAlpha = hasAlpha;
-
+    
     // always assumes 4 rgba8 channels
     // _pixels.resize(4 * _width * _height);
     assert((int32_t)pixels.size() == (width * height));
@@ -462,6 +462,13 @@ bool Image::loadImageFromPixels(const vector<Color>& pixels, int32_t width,
     return true;
 }
 
+void Image::setSrgbState(bool isSrgb, bool hasSrgbBlock, bool hasNonSrgbBlocks)
+{
+    _isSrgb = isSrgb;
+    _hasSrgbBlock = hasSrgbBlock;
+    _hasNonSrgbBlocks = hasNonSrgbBlocks;
+}
+
 // Can average any channels per block, this means they are constant across the
 // block and use endpoint storage but do not affect the endpoint fitting.
 // Results in a low-res, blocky version of those channels, but better
diff --git a/libkram/kram/KramImage.h b/libkram/kram/KramImage.h
index f5b7ca8d..eda911a1 100644
--- a/libkram/kram/KramImage.h
+++ b/libkram/kram/KramImage.h
@@ -39,8 +39,12 @@ class Image {
     Image();
 
     // these 3 calls for Encode
-    bool loadImageFromPixels(const vector<Color>& pixels, int32_t width,
-                             int32_t height, bool hasColor, bool hasAlpha);
+    bool loadImageFromPixels(const vector<Color>& pixels,
+                             int32_t width, int32_t height,
+                             bool hasColor, bool hasAlpha);
+    
+    // set state off png blocks
+    void setSrgbState(bool isSrgb, bool hasSrgbBlock, bool hasNonSrgbBlocks);
 
     // convert mip level of explicit format to single-image
     bool loadImageFromKTX(const KTXImage& image, uint32_t mipNumber = 0);
@@ -58,9 +62,16 @@ class Image {
     const vector<Color>& pixels() const { return _pixels; }
     const vector<float4>& pixelsFloat() const { return _pixelsFloat; }
 
+    // content analysis
     bool hasColor() const { return _hasColor; }
     bool hasAlpha() const { return _hasAlpha; }
 
+    // only for png files, detects ICCP/CHRM/GAMA blocks vs. sRGB block
+    // so that these can be stripped by fixup -srgb
+    bool isSrgb() const { return _isSrgb; }
+    bool hasSrgbBlock() const { return _hasSrgbBlock; }
+    bool hasNonSrgbBlocks() const { return _hasNonSrgbBlocks; }
+    
     // if converted a KTX/2 image to Image, then this field will be non-zero
     uint32_t chunksY() const { return _chunksY; }
     void setChunksY(uint32_t chunksY) { _chunksY = chunksY; }
@@ -82,6 +93,10 @@ class Image {
     bool _hasColor = true;
     bool _hasAlpha = true;
 
+    bool _isSrgb = false;
+    bool _hasNonSrgbBlocks = false;
+    bool _hasSrgbBlock = false;
+    
     // this is the entire strip data, float version can be passed for HDR
     // sources always 4 channels RGBA for 8 and 32f data.  16f promoted to 32f.
     vector<Color> _pixels;
diff --git a/libkram/lodepng/lodepng.h b/libkram/lodepng/lodepng.h
index 1019f027..aeee03bf 100644
--- a/libkram/lodepng/lodepng.h
+++ b/libkram/lodepng/lodepng.h
@@ -33,9 +33,13 @@ using namespace NAMESPACE_STL;
 extern const char* LODEPNG_VERSION_STRING;
 
 // TODO: Alec - move these to config
+
+// don't need io
 #define LODEPNG_NO_COMPILE_DISK
-#define LODEPNG_COMPILE_ZLIB
-#define LODEPNG_NO_COMPILE_ENCODER
+// using miniz now
+#define LODEPNG_NO_COMPILE_ZLIB
+// was not doing png saves, but to strip blocks now need to
+#define LODEPNG_COMPILE_ENCODER
 
 /*
 The following #defines are used to create code sections. They can be disabled

From a1572cbcad3a2770529dc03a649c070ae3f06b55 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 2 Apr 2022 12:20:25 -0700
Subject: [PATCH 276/901] kram - turn off miniz for encode/decode, fix bugs in
 fixup call

miniz was causing decode failures, so go back to default.
squash bugs in fixup
update the usage messages
---
 libkram/kram/Kram.cpp     | 24 ++++++++++++++++--------
 libkram/lodepng/lodepng.h |  2 +-
 2 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 60efa690..a2da1dd5 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -51,6 +51,10 @@ namespace kram {
 
 using namespace NAMESPACE_STL;
 
+// lodepng iccp decode is failing when setting this for some reason, find out why
+// Must set in with LODEPNG_NO_COMPILE_ZLIB in lodepng.h if true
+static bool useMiniZ = false;
+
 template <typename T>
 void releaseVector(vector<T>& v)
 {
@@ -613,7 +617,8 @@ bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray
 
     // Point deflate on decoder to faster version in miniz.
     auto& settings = lodepng_default_decompress_settings;
-    settings.custom_zlib = LodepngDecompressUsingMiniz;
+    if (useMiniZ)
+        settings.custom_zlib = LodepngDecompressUsingMiniz;
 
     // can identify 16unorm data for heightmaps via this call
     LodePNGState state;
@@ -813,13 +818,14 @@ bool SavePNG(Image& image, const char* filename)
  
     // use miniz as the encoder
     auto& settings = lodepng_default_compress_settings;
-    settings.custom_zlib = LodepngCompressUsingMiniz;
+    if (useMiniZ)
+        settings.custom_zlib = LodepngCompressUsingMiniz;
 
     // encode to png
     vector<unsigned char> outputData;
-    unsigned success = lodepng::encode(outputData, (const uint8_t*)(image.pixels().data()), image.width(), image.height(), state);
+    unsigned error = lodepng::encode(outputData, (const uint8_t*)(image.pixels().data()), image.width(), image.height(), state);
     
-    if (!success) {
+    if (error) {
         return false;
     }
     
@@ -1570,7 +1576,7 @@ void kramDecodeUsage(bool showVersion = true)
           showVersion ? usageName : "");
 }
 
-void kramFixUsage(bool showVersion = true)
+void kramFixupUsage(bool showVersion = true)
 {
     KLOGI("Kram",
           "%s\n"
@@ -1787,12 +1793,13 @@ void kramUsage()
     KLOGI("Kram",
           usageName
           "\n"
-          "SYNTAX\nkram [encode | decode | info | script | ...]\n");
+          "SYNTAX\nkram [encode | decode | info | script | fixup | ...]\n");
 
     kramEncodeUsage(false);
     kramInfoUsage(false);
     kramDecodeUsage(false);
     kramScriptUsage(false);
+    kramFixupUsage(false);
 }
 
 static int32_t kramAppInfo(vector<const char*>& args)
@@ -1980,7 +1987,8 @@ string kramInfoPNGToString(const string& srcFilename, const uint8_t* data, uint6
     uint32_t errorLode = 0;
 
     auto& settings = lodepng_default_decompress_settings;
-    settings.custom_zlib = LodepngDecompressUsingMiniz;
+    if (useMiniZ)
+        settings.custom_zlib = LodepngDecompressUsingMiniz;
 
     // can identify 16unorm data for heightmaps via this call
     LodePNGState state;
@@ -2489,7 +2497,7 @@ int32_t kramAppFixup(vector<const char*>& args)
     // this is help
     int32_t argc = (int32_t)args.size();
     if (argc == 0) {
-        kramFixUsage();
+        kramFixupUsage();
         return 0;
     }
 
diff --git a/libkram/lodepng/lodepng.h b/libkram/lodepng/lodepng.h
index aeee03bf..785e777f 100644
--- a/libkram/lodepng/lodepng.h
+++ b/libkram/lodepng/lodepng.h
@@ -37,7 +37,7 @@ extern const char* LODEPNG_VERSION_STRING;
 // don't need io
 #define LODEPNG_NO_COMPILE_DISK
 // using miniz now
-#define LODEPNG_NO_COMPILE_ZLIB
+//#define LODEPNG_NO_COMPILE_ZLIB
 // was not doing png saves, but to strip blocks now need to
 #define LODEPNG_COMPILE_ENCODER
 

From e77fdc5bade211d6379962345f325ac8e3c8dc66 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 2 Apr 2022 14:44:54 -0700
Subject: [PATCH 277/901] kramv - restore highlight to buttons

---
 kramv/KramViewerMain.mm | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 2feccd6f..402bfb07 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -754,14 +754,22 @@ - (NSStackView *)_addButtons
 
         // https://stackoverflow.com/questions/4467597/how-do-you-stroke-the-outside-of-an-nsattributedstring
         
-        NSMutableDictionary* attribs = [NSMutableDictionary dictionaryWithObjectsAndKeys:
+        NSMutableDictionary* attribsOff = [NSMutableDictionary dictionaryWithObjectsAndKeys:
             //[NSFont systemFontOfSize:64.0],NSFontAttributeName,
             [NSColor whiteColor],NSForegroundColorAttributeName,
             [NSNumber numberWithFloat:-2.0],NSStrokeWidthAttributeName,
             [NSColor blackColor],NSStrokeColorAttributeName,
             nil];
-
-        button.attributedTitle = [[NSMutableAttributedString alloc] initWithString:name attributes:attribs];
+        NSMutableDictionary* attribsOn = [NSMutableDictionary dictionaryWithObjectsAndKeys:
+            //[NSFont systemFontOfSize:64.0],NSFontAttributeName,
+            [NSColor systemBlueColor],NSForegroundColorAttributeName,
+            [NSNumber numberWithFloat:-2.0],NSStrokeWidthAttributeName,
+            [NSColor blackColor],NSStrokeColorAttributeName,
+            nil];
+        button.attributedTitle = [[NSMutableAttributedString alloc] initWithString:name attributes:attribsOff];
+        
+        // Have to set this too, or button doesn't go blue
+        button.attributedAlternateTitle = [[NSMutableAttributedString alloc] initWithString:name attributes:attribsOn];
         
 #if 0 // this isn't appearing
         button.wantsLayer = YES;

From 1e7ea80332e595ee82c5a8b17d88cd109f0dba5e Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 2 Apr 2022 16:09:27 -0700
Subject: [PATCH 278/901] kram - add fixupSources.sh script

This runs the srgb fixup on the tests/src directory.  Then I manually run ImageOptim with strip png metadata disabled.
---
 libkram/kram/Kram.cpp   | 4 ++--
 scripts/fixupSources.sh | 6 ++++++
 scripts/kramTests.sh    | 2 +-
 3 files changed, 9 insertions(+), 3 deletions(-)
 create mode 100755 scripts/fixupSources.sh

diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index a2da1dd5..4e5c803b 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -830,7 +830,7 @@ bool SavePNG(Image& image, const char* filename)
     }
     
     FileHelper fileHelper;
-    if (!fileHelper.open(filename, "w+")) {
+    if (!fileHelper.open(filename, "wb+")) {
         return false;
     }
     
@@ -1890,7 +1890,7 @@ static int32_t kramAppInfo(vector<const char*>& args)
 
     FileHelper dstFileHelper;
     if (!dstFilename.empty()) {
-        if (!dstFileHelper.open(dstFilename.c_str(), "w+")) {
+        if (!dstFileHelper.open(dstFilename.c_str(), "wb+")) {
             KLOGE("Kram", "info couldn't open output file");
             return -1;
         }
diff --git a/scripts/fixupSources.sh b/scripts/fixupSources.sh
new file mode 100755
index 00000000..a0ade7d8
--- /dev/null
+++ b/scripts/fixupSources.sh
@@ -0,0 +1,6 @@
+#!/bin/zsh
+
+for file in ../tests/src/**/*.png; do 
+	echo fixup $file
+	kram fixup -srgb -i $file;
+done
diff --git a/scripts/kramTests.sh b/scripts/kramTests.sh
index 93ca1d91..b38d6e23 100755
--- a/scripts/kramTests.sh
+++ b/scripts/kramTests.sh
@@ -1,4 +1,4 @@
-#/bin/zsh
+#!/bin/zsh
 
 args=$1
 

From 5ee4790d285b6318734e20555dff0b581916afa8 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 2 Apr 2022 16:12:49 -0700
Subject: [PATCH 279/901] Tests - fixup the src image srgb blocks based on
 content type, run ImageOptim

Now content should correctly reflect sRGB.  So much source content out there is all over the place on srgb state in the png file, using ICCP, CHRM, and GAMA.  Others, always write the sRGB block which is also not correct.
---
 tests/src/Checkerboard-1darray-a.png         | 4 ++--
 tests/src/Checkerboard-2darray-a.png         | 4 ++--
 tests/src/Checkerboard-3d-a.png              | 4 ++--
 tests/src/Checkerboard-cube-a.png            | 4 ++--
 tests/src/Checkerboard-cubearray-a.png       | 4 ++--
 tests/src/Checkerboard4x4-a.png              | 4 ++--
 tests/src/CircleGlow-d.png                   | 4 ++--
 tests/src/ColorMap-a.png                     | 4 ++--
 tests/src/GradientGray4x4-a.png              | 4 ++--
 tests/src/TestDirections-cube-a.png          | 4 ++--
 tests/src/TestNumbers-atlas4x4-2darray-a.png | 4 ++--
 tests/src/Toof-a.png                         | 4 ++--
 tests/src/White4x4-a.png                     | 4 ++--
 tests/src/brick01-d.png                      | 4 ++--
 tests/src/brick01-h.png                      | 4 ++--
 tests/src/collectorbarrel-a.png              | 4 ++--
 tests/src/collectorbarrel-h.png              | 4 ++--
 tests/src/color_grid-a.png                   | 4 ++--
 tests/src/flipper-sdf.png                    | 4 ++--
 tests/src/laying_rock7-d.png                 | 4 ++--
 tests/src/laying_rock7-h.png                 | 4 ++--
 tests/src/rockwall-d.png                     | 4 ++--
 tests/src/rockwall-h.png                     | 4 ++--
 tests/src/roots-d.png                        | 4 ++--
 tests/src/roots-h.png                        | 4 ++--
 25 files changed, 50 insertions(+), 50 deletions(-)
 mode change 100755 => 100644 tests/src/brick01-d.png
 mode change 100755 => 100644 tests/src/brick01-h.png
 mode change 100755 => 100644 tests/src/collectorbarrel-a.png
 mode change 100755 => 100644 tests/src/laying_rock7-d.png
 mode change 100755 => 100644 tests/src/laying_rock7-h.png
 mode change 100755 => 100644 tests/src/rockwall-d.png
 mode change 100755 => 100644 tests/src/rockwall-h.png
 mode change 100755 => 100644 tests/src/roots-d.png
 mode change 100755 => 100644 tests/src/roots-h.png

diff --git a/tests/src/Checkerboard-1darray-a.png b/tests/src/Checkerboard-1darray-a.png
index ba0066c2..8352b1e4 100644
--- a/tests/src/Checkerboard-1darray-a.png
+++ b/tests/src/Checkerboard-1darray-a.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7037d14a92bcd824925ab5da49547298a8a1c59e7dd2131a53dcc761b2f5dcab
-size 171
+oid sha256:7e9499a24b14081b9966a448e49e6e7a5aabc39cfcfad8ba952b728b100461ca
+size 123
diff --git a/tests/src/Checkerboard-2darray-a.png b/tests/src/Checkerboard-2darray-a.png
index ba0066c2..8352b1e4 100644
--- a/tests/src/Checkerboard-2darray-a.png
+++ b/tests/src/Checkerboard-2darray-a.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7037d14a92bcd824925ab5da49547298a8a1c59e7dd2131a53dcc761b2f5dcab
-size 171
+oid sha256:7e9499a24b14081b9966a448e49e6e7a5aabc39cfcfad8ba952b728b100461ca
+size 123
diff --git a/tests/src/Checkerboard-3d-a.png b/tests/src/Checkerboard-3d-a.png
index ba0066c2..8352b1e4 100644
--- a/tests/src/Checkerboard-3d-a.png
+++ b/tests/src/Checkerboard-3d-a.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7037d14a92bcd824925ab5da49547298a8a1c59e7dd2131a53dcc761b2f5dcab
-size 171
+oid sha256:7e9499a24b14081b9966a448e49e6e7a5aabc39cfcfad8ba952b728b100461ca
+size 123
diff --git a/tests/src/Checkerboard-cube-a.png b/tests/src/Checkerboard-cube-a.png
index ba0066c2..8352b1e4 100644
--- a/tests/src/Checkerboard-cube-a.png
+++ b/tests/src/Checkerboard-cube-a.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7037d14a92bcd824925ab5da49547298a8a1c59e7dd2131a53dcc761b2f5dcab
-size 171
+oid sha256:7e9499a24b14081b9966a448e49e6e7a5aabc39cfcfad8ba952b728b100461ca
+size 123
diff --git a/tests/src/Checkerboard-cubearray-a.png b/tests/src/Checkerboard-cubearray-a.png
index ba0066c2..8352b1e4 100644
--- a/tests/src/Checkerboard-cubearray-a.png
+++ b/tests/src/Checkerboard-cubearray-a.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7037d14a92bcd824925ab5da49547298a8a1c59e7dd2131a53dcc761b2f5dcab
-size 171
+oid sha256:7e9499a24b14081b9966a448e49e6e7a5aabc39cfcfad8ba952b728b100461ca
+size 123
diff --git a/tests/src/Checkerboard4x4-a.png b/tests/src/Checkerboard4x4-a.png
index 12c5571d..e3e9bfbc 100644
--- a/tests/src/Checkerboard4x4-a.png
+++ b/tests/src/Checkerboard4x4-a.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1383b78e86e4b23df2d431950d9341fc6a31bf9c1420783d62e58c7d5e50bb36
-size 153
+oid sha256:4d3e594c152586ad13408adbc9908c9e373e4524ce8a6cb783acedda1a1e3d70
+size 96
diff --git a/tests/src/CircleGlow-d.png b/tests/src/CircleGlow-d.png
index 13fb5f39..51d6e21c 100644
--- a/tests/src/CircleGlow-d.png
+++ b/tests/src/CircleGlow-d.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6e999d2885737daf95edb8200adc0efce4b9510cfc639af5a6a5e630f5436784
-size 52623
+oid sha256:1e368534e87c208cb9a07b97768700a2c083e61c175c93286cdd9056b65babae
+size 45814
diff --git a/tests/src/ColorMap-a.png b/tests/src/ColorMap-a.png
index d618e37a..4390e991 100644
--- a/tests/src/ColorMap-a.png
+++ b/tests/src/ColorMap-a.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5b351f4b1de624571c6dcb00e6fcf5aa9a5405ea7489d33d9c6e912be2fe8f0f
-size 37539
+oid sha256:07038c92e5af83deaa3e425898e4f6403bd7f8a0e9bab3c59a204455be080d6f
+size 10149
diff --git a/tests/src/GradientGray4x4-a.png b/tests/src/GradientGray4x4-a.png
index 782ec70e..d32fe462 100644
--- a/tests/src/GradientGray4x4-a.png
+++ b/tests/src/GradientGray4x4-a.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:70a0898336eb863668cf1ffc9edcaada4ab702f93b2b244276e07cc501825d8f
-size 1871
+oid sha256:1ad1fd549a72b8f76c41124941bb55b79d075b63147a59953c3d9ca95903ed3e
+size 116
diff --git a/tests/src/TestDirections-cube-a.png b/tests/src/TestDirections-cube-a.png
index ab9cad71..15256579 100644
--- a/tests/src/TestDirections-cube-a.png
+++ b/tests/src/TestDirections-cube-a.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a40e0ab244a33f72f4b78822420efd78ce3301c3b0b766f716d82cf12112435e
-size 1484
+oid sha256:f494179a5454e76b35d6c77e3bf089b885e76cd2a40c4a308ce54b94edbfd58c
+size 989
diff --git a/tests/src/TestNumbers-atlas4x4-2darray-a.png b/tests/src/TestNumbers-atlas4x4-2darray-a.png
index c08183cd..2de1ee42 100644
--- a/tests/src/TestNumbers-atlas4x4-2darray-a.png
+++ b/tests/src/TestNumbers-atlas4x4-2darray-a.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d406c81b248d658d82378dc249fa576e5a03cffefccdad8e8b9361d30120736f
-size 7048
+oid sha256:d0ff857b51c59dd92620728bc24d1756bf14ad197fa1ebe4efbfa5990b6148b3
+size 3050
diff --git a/tests/src/Toof-a.png b/tests/src/Toof-a.png
index e3474b61..6bc19300 100644
--- a/tests/src/Toof-a.png
+++ b/tests/src/Toof-a.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cedc3a14c02c9def3b061f1a7dffcd416064dfe791db95b7e3312b3378c94464
-size 40852
+oid sha256:1d10908e7ae164c574d7c217c879c75e22291f576f4e4d2824e978bbb17b5e57
+size 33600
diff --git a/tests/src/White4x4-a.png b/tests/src/White4x4-a.png
index 486e1d69..2d977dd0 100644
--- a/tests/src/White4x4-a.png
+++ b/tests/src/White4x4-a.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d9767dc8547c1a7c1a3db989ce5b84fa28f6b7b026af5c850b9d01e942c65a59
-size 1412
+oid sha256:acf5d808ce01a653754a016a500a64b4eee62b592c77fe55e5e964da44d66f65
+size 81
diff --git a/tests/src/brick01-d.png b/tests/src/brick01-d.png
old mode 100755
new mode 100644
index 6614baa0..40fd6dc8
--- a/tests/src/brick01-d.png
+++ b/tests/src/brick01-d.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5d2d2645121417c9559c9b5689546b44e7dcb139bb225e556634406345194ce2
-size 355401
+oid sha256:69e720422154df98934c3483d4e2f8c06690c2534917880c8f5c1f1f361d981d
+size 289359
diff --git a/tests/src/brick01-h.png b/tests/src/brick01-h.png
old mode 100755
new mode 100644
index c45e07d1..f84dcea1
--- a/tests/src/brick01-h.png
+++ b/tests/src/brick01-h.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3e8ea48fdb25da97f55d3f4c939b8c0a8572e2ad6bbb6f1105d6fd340889c823
-size 74397
+oid sha256:4bb59f0632dafeb35d26ba7cd11305d03d4fb37989c5298e363dcba992f4b668
+size 65988
diff --git a/tests/src/collectorbarrel-a.png b/tests/src/collectorbarrel-a.png
old mode 100755
new mode 100644
index 52d46dc0..5d85c2cf
--- a/tests/src/collectorbarrel-a.png
+++ b/tests/src/collectorbarrel-a.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:99e5d52d9ad3c3642201500c663b6fa98bd5e629fea7749a8a404ad08ecb261b
-size 98428
+oid sha256:8ce95889f66a2258bab3b8125e4dd9de2a06ebe942c8861543748a69a55f9481
+size 86966
diff --git a/tests/src/collectorbarrel-h.png b/tests/src/collectorbarrel-h.png
index fdaabf09..26a1746b 100644
--- a/tests/src/collectorbarrel-h.png
+++ b/tests/src/collectorbarrel-h.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b71f2ac9ab675fa807f7757e39019b780b1d67638bb53c27980d29d9e5c6da76
-size 35466
+oid sha256:3f4c1cd5bba52dbc8059be18985bd1401d9042e03df94f14efb5888b24f25491
+size 21490
diff --git a/tests/src/color_grid-a.png b/tests/src/color_grid-a.png
index 8be1f0c0..6e43b1fe 100644
--- a/tests/src/color_grid-a.png
+++ b/tests/src/color_grid-a.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:04065469c3fc0fb704797d49a8c4e1753efe547c3749702b648ab78e4b7a1953
-size 286178
+oid sha256:d677efb48c34bbc1ce998f9db059e817b20700422e1ebe7d2b06333cd979541e
+size 163873
diff --git a/tests/src/flipper-sdf.png b/tests/src/flipper-sdf.png
index fca6fdf6..038b2ff6 100644
--- a/tests/src/flipper-sdf.png
+++ b/tests/src/flipper-sdf.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ae278c91aba8ce9dd784f881b751f39491213a69f958e134855f17e0a87648de
-size 5032
+oid sha256:1f543e282885a9d2516c391536d9c3687f4a3084caca965e55105c4eeae4fe3e
+size 784
diff --git a/tests/src/laying_rock7-d.png b/tests/src/laying_rock7-d.png
old mode 100755
new mode 100644
index ae3bdf2b..72fa1c02
--- a/tests/src/laying_rock7-d.png
+++ b/tests/src/laying_rock7-d.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eb7a51282ee5c9ca1314b53443ce59a9da93ba4cd1daea59280400e6d088938c
-size 540419
+oid sha256:e72a193de122ca5d382a01f2eea8f3e21528defc8667ce1630511ed538b53541
+size 330587
diff --git a/tests/src/laying_rock7-h.png b/tests/src/laying_rock7-h.png
old mode 100755
new mode 100644
index 6a6621c2..3c13a0ee
--- a/tests/src/laying_rock7-h.png
+++ b/tests/src/laying_rock7-h.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ecaa27e2311541d0882bd5c1b53a5e4818dc808a29a6081a53a540b318aeebf5
-size 87309
+oid sha256:c36aab763e31674c321523beaf3dbd0097b8aea2bb7b9393ae1d17dbaa1f5d51
+size 74250
diff --git a/tests/src/rockwall-d.png b/tests/src/rockwall-d.png
old mode 100755
new mode 100644
index b191fad5..f9aa2be3
--- a/tests/src/rockwall-d.png
+++ b/tests/src/rockwall-d.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:694da069137942b85428aa1990831cecdcd7d1f168c0b3e47d8773a9a4592dab
-size 705228
+oid sha256:1860f466e2e965e4d33edd298f7fc0c0643ceac4f954549fd0ac41c1fd0c2e71
+size 536417
diff --git a/tests/src/rockwall-h.png b/tests/src/rockwall-h.png
old mode 100755
new mode 100644
index 5fe1e8c1..cf6db85c
--- a/tests/src/rockwall-h.png
+++ b/tests/src/rockwall-h.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b64ce3b80d21a3bde9d223e8db37e1c365c24421c73717a54fe94632c5f61656
-size 68538
+oid sha256:6aee4c93378d4850e5fd03a9ec55e5da7f6943bec8ee7da93894b52dc6a17da2
+size 39296
diff --git a/tests/src/roots-d.png b/tests/src/roots-d.png
old mode 100755
new mode 100644
index 067b5762..299b2f16
--- a/tests/src/roots-d.png
+++ b/tests/src/roots-d.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:50ef4f60c5ad4536d022cc9263d9444afe92181b00373a7b98699fc807c6df28
-size 604307
+oid sha256:1ce6e1857171cb9d7d820ff6a3083fd339cda7d3ae31078390aff73cb7f806a5
+size 505005
diff --git a/tests/src/roots-h.png b/tests/src/roots-h.png
old mode 100755
new mode 100644
index 29182ac8..43610ad1
--- a/tests/src/roots-h.png
+++ b/tests/src/roots-h.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5348fba75215a3aeea537dd50fa4943b1c1674473752346c4ebe1f05e22a83a0
-size 63375
+oid sha256:0b989b719c825a819d918117b8f256d6cb5c085c92af39e3a5c728f1e8fce959
+size 36980

From a11a46930979549a1fe25e63b9c3a837825ce56b Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 2 Apr 2022 21:48:22 -0700
Subject: [PATCH 280/901] kramv - fix pinch-zoom and display of non-square
 images

All images used to be tied to a square, and so non-uniform scaling was used to match non-square images.  But that code was modified to create a quad with the correct aspect ratio instead.  But this threw off pinch-zoom lock to the cursor, eyedropper, and correct aspect ratio display.

Code now uses uniform scaling even in 2D mode, and then adjusts the pinch-zoom calcs.
---
 kramv/KramRenderer.mm   | 50 +++++++++++++++++++++--------------------
 kramv/KramViewerMain.mm | 19 +++++++++-------
 2 files changed, 37 insertions(+), 32 deletions(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 54036f24..5cd813a4 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -829,6 +829,18 @@ - (void)updateProjTransform
             MIN((float)_showSettings->viewSizeX, (float)_showSettings->viewSizeY) /
             MAX(1, MAX((float)_showSettings->imageBoundsX,
                        (float)_showSettings->imageBoundsY));
+        
+        static bool useImageAndViewBounds = true;
+        if (useImageAndViewBounds) {
+            float invWidth = 1.0f / MAX(1.0f, (float)_showSettings->imageBoundsX);
+            float invHeight = 1.0f / MAX(1.0f, (float)_showSettings->imageBoundsY);
+
+            // DONE: adjust zoom to fit the entire image to the window
+            // the best fit depends on dimension of image and window
+            _showSettings->zoomFit =
+                MIN( (float)_showSettings->viewSizeX * invWidth,
+                     (float)_showSettings->viewSizeY * invHeight);
+        }
     }
 #endif
 }
@@ -1332,43 +1344,33 @@ - (void)resetSomeImageSettings:(BOOL)isNewFile
 
     _showSettings->zoom = _showSettings->zoomFit;
 
-    // test rendering with inversion and mirroring and non-uniform scale
-    bool doInvertX = false;
-    bool doScaleX = false;
-
+    // Y is always 1.0 on the plane, so scale to imageBoundsY
+    // plane is already a non-uniform size, so can keep uniform scale
+    
     // have one of these for each texture added to the viewer
-    float scaleX = MAX(1, _showSettings->imageBoundsX);
+    //float scaleX = MAX(1, _showSettings->imageBoundsX);
     float scaleY = MAX(1, _showSettings->imageBoundsY);
-    float scaleZ = MAX(scaleX, scaleY);  // don't want 1.0f, or specular is all off
-                                         // due to extreme scale differences
-
-    float tmpScaleX = scaleX;
-    if (doInvertX) {
-        tmpScaleX = -tmpScaleX;
-    }
-    if (doScaleX) {
-        tmpScaleX *= 2.0f;
-    }
+    float scaleX = scaleY;
+    float scaleZ = scaleY;
 
     _modelMatrix =
-        float4x4(float4m(tmpScaleX, scaleY, scaleZ, 1.0f));  // non uniform scale
+        float4x4(float4m(scaleX, scaleY, scaleZ, 1.0f)); // uniform scale
     _modelMatrix = _modelMatrix *
                    matrix4x4_translation(0.0f, 0.0f, -1.0);  // set z=-1 unit back
 
     // uniform scaled 3d primitive
-    float scale = MAX(scaleX, scaleY);
+    float scale = scaleY; // MAX(scaleX, scaleY);
 
     // store the zoom into thew view matrix
     // fragment tangents seem to break down at high model scale due to precision
     // differences between worldPos and uv
-    static bool useZoom3D = false;
-    if (useZoom3D) {
-        zoom3D = scale;  // * _showSettings->viewSizeX / 2.0f;
-        scale = 1.0;
-    }
+//    static bool useZoom3D = false;
+//    if (useZoom3D) {
+//        zoom3D = scale;  // * _showSettings->viewSizeX / 2.0f;
+//        scale = 1.0;
+//    }
 
-    _modelMatrix3D = float4x4(float4m((doScaleX || doInvertX) ? tmpScaleX : scale,
-                                      scale, scale, 1.0f));  // uniform scale
+    _modelMatrix3D = float4x4(float4m(scale, scale, scale, 1.0f));  // uniform scale
     _modelMatrix3D =
         _modelMatrix3D *
         matrix4x4_translation(0.0f, 0.0f, -1.0f);  // set z=-1 unit back
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 402bfb07..bb728fab 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -1009,17 +1009,20 @@ - (void)doZoomMath:(float)newZoom newPan:(float2 &)newPan
     // No checks on this zoom
     // old - newPosition from the zoom
 
+    // normalized coords to pixel coords
+    pixel.x *= _showSettings->imageBoundsX;
+    pixel.y *= _showSettings->imageBoundsY;
+    
+    // this fixes pinch-zoom on cube which are 6:1
+    pixel.x /= ar;
+    
 #if USE_PERSPECTIVE
     // TODO: this doesn't work for perspective
-    newPan.x = _showSettings->panX - (_showSettings->zoom - newZoom) *
-                                         _showSettings->imageBoundsX * pixel.x;
-    newPan.y = _showSettings->panY + (_showSettings->zoom - newZoom) *
-                                         _showSettings->imageBoundsY * pixel.y;
+    newPan.x = _showSettings->panX - (_showSettings->zoom - newZoom) * pixel.x;
+    newPan.y = _showSettings->panY + (_showSettings->zoom - newZoom) * pixel.y;
 #else
-    newPan.x = _showSettings->panX - (_showSettings->zoom - newZoom) *
-                                         _showSettings->imageBoundsX * pixel.x;
-    newPan.y = _showSettings->panY + (_showSettings->zoom - newZoom) *
-                                         _showSettings->imageBoundsY * pixel.y;
+    newPan.x = _showSettings->panX - (_showSettings->zoom - newZoom) * pixel.x;
+    newPan.y = _showSettings->panY + (_showSettings->zoom - newZoom) * pixel.y;
 #endif
 }
 

From 28cc0753a1e57e8f35f4f13d9c7cf714530dab0a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 2 Apr 2022 22:12:37 -0700
Subject: [PATCH 281/901] kramv - fix cube preview on a plane

The samples were incorrect, since the box z-dimension was crushed to form a "plane".  So only do cube lookup on 3d views/models like cube, sphere, etc.
This does cause the pixels to shift from the 2D view, since the default orient doesn't line up with the cube lookup from model space point.
---
 kramv/Shaders/KramShaders.metal | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kramv/Shaders/KramShaders.metal b/kramv/Shaders/KramShaders.metal
index 175bd193..16fc81c9 100644
--- a/kramv/Shaders/KramShaders.metal
+++ b/kramv/Shaders/KramShaders.metal
@@ -655,10 +655,10 @@ vertex ColorInOut DrawCubeVS(
     // convert to -1 to 1
     float3 uvw;
 
-    // if preview, then actually sample from cube map
+    // if preview and not plance, then actually sample from cube map
     // and don't override to the face
-    if (uniforms.isPreview) {
-        uvw = 2 * in.position.xyz; // use model-space pos
+    if (uniforms.isPreview && uniforms.is3DView) {
+        uvw = in.position.xyz; // use model-space pos
     }
     else {
         uvw = out.texCoordXYZ;

From 8c16936122d873c80fbeaff9675ced0885e61e15 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 4 Apr 2022 19:05:02 -0700
Subject: [PATCH 282/901] kram - support ktx images that write incorrect cube
 length

Xcode Gpu Capture saves as of Xcode 13.2.1 are writing faceSize * 6 instead of just faceSize out to the KTX files.  So allow that case.  A warning is generated, but those won't appear in kramv.
---
 libkram/kram/KTXImage.cpp | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index 022baf17..19afd94a 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -1563,7 +1563,15 @@ bool KTXImage::validateMipLevels() const
 
         // cube only stores size of one face, ugh
         if (textureType == MyMTLTextureTypeCube) {
-            levelSizeFromRead *= 6;
+            // Xcode GPU Capture is saving KTX cube files out wit4h length * 6
+            // which is incorrect, but it's en easy to miss "feature" in the KTX spec
+            if (levelSizeFromRead != level.length) {
+                levelSizeFromRead *= 6;
+            }
+            else {
+                // This won't appear in kramv, but will if reading ktx cubes in kram
+                KLOGW("kram", "LevelSize in image data for cube needs to be faceSize, not faceSize * 6");
+            }
         }
 
         if (levelSizeFromRead != level.length * numChunks) {

From beacf9f9be1ec01295e1e7a711eae246e519526c Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 4 Apr 2022 19:08:16 -0700
Subject: [PATCH 283/901] kram - fix that last test.

---
 libkram/kram/KTXImage.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index 19afd94a..d08f8721 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -1565,7 +1565,7 @@ bool KTXImage::validateMipLevels() const
         if (textureType == MyMTLTextureTypeCube) {
             // Xcode GPU Capture is saving KTX cube files out wit4h length * 6
             // which is incorrect, but it's en easy to miss "feature" in the KTX spec
-            if (levelSizeFromRead != level.length) {
+            if (levelSizeFromRead != level.length * numChunks) {
                 levelSizeFromRead *= 6;
             }
             else {

From ec9dfaabf6ee7d4cf1e6460422deb5fae6bd6b12 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 15 Apr 2022 09:48:35 -0700
Subject: [PATCH 284/901] kramv - fix broken NSTableView

The table view class responds to gestures and doesn't hide the scrollbar when the view is hidden.  So set the scrollView that it's in hidden instead.
---
 kramv/KramViewerMain.mm | 38 +++++++++++++++++++++++++++++---------
 1 file changed, 29 insertions(+), 9 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index bb728fab..982e6e6e 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -597,6 +597,8 @@ - (instancetype)initWithCoder:(NSCoder *)coder
     _hudLabel = [self _addHud:NO];
     [self setHudText:""];
     
+   
+    
     return self;
 }
 
@@ -2009,7 +2011,18 @@ - (void)keyDown:(NSEvent *)theEvent
 
 - (void)hideTables
 {
-    _tableView.hidden = true;
+    // fix broken NSTableView, keeps showing scroll and responding to pan
+    // so set scroll to hidden instead of tables
+    NSScrollView* scrollView = [_tableView enclosingScrollView];
+    scrollView.hidden = YES;
+}
+
+- (void)showFileTable
+{
+    \// fix broken NSTableView, keeps showing scroll and responding to pan
+    // so set scroll to hidden instead of tables
+    NSScrollView* scrollView = [_tableView enclosingScrollView];
+    scrollView.hidden = NO;
 }
 
 - (void)updateHudVisibility {
@@ -2630,7 +2643,7 @@ - (BOOL)loadArchive:(const char *)zipFilename
     [_tableView scrollRowToVisible:_fileArchiveIndex];
     
     // want it to respond to arrow keys
-    [self.window makeFirstResponder: _tableView];
+    //[self.window makeFirstResponder: _tableView];
     
     // hack to see table
     [self hideTables];
@@ -2658,11 +2671,11 @@ - (BOOL)advanceFileFromAchive:(BOOL)increment
     [_tableView scrollRowToVisible:_fileArchiveIndex];
     
     // want it to respond to arrow keys
-    [self.window makeFirstResponder: _tableView];
+    //[self.window makeFirstResponder: _tableView];
     
     // show the files table
     [self hideTables];
-    _tableView.hidden = NO;
+    [self showFileTable];
     
     // also have to hide hud or it will obscure the visible table
     _hudHidden = true;
@@ -2691,11 +2704,11 @@ - (BOOL)advanceFileFromFolder:(BOOL)increment
     [_tableView scrollRowToVisible:_fileFolderIndex];
     
     // want it to respond to arrow keys
-    [self.window makeFirstResponder: _tableView];
+    //[self.window makeFirstResponder: _tableView];
     
     // show the files table
     [self hideTables];
-    _tableView.hidden = NO;
+    [self showFileTable];
     
     _hudHidden = true;
     [self updateHudVisibility];
@@ -3134,6 +3147,7 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
             }
             [_tableView reloadData];
             
+            
             [_tableView selectRowIndexes:[NSIndexSet indexSetWithIndex:_fileFolderIndex] byExtendingSelection:NO];
             [_tableView scrollRowToVisible:_fileFolderIndex];
             
@@ -3291,7 +3305,13 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
         return success;
     }
 
-    return [self loadImageFile:url];
+    bool success = [self loadImageFile:url];
+    
+    // hide table in case last had archive open
+    if (success)
+        [self hideTables];
+    
+    return success;
 }
 
 -(BOOL)loadModelFile:(NSURL*)url filename:(const char*)filename
@@ -3367,7 +3387,7 @@ -(BOOL)loadModelFile:(NSURL*)url filename:(const char*)filename
         _showSettings->isFolder = false;
         
         // no need for file table on single files
-        _tableView.hidden = YES;
+        [self hideTables];
     }
     
     // show the controls
@@ -3443,7 +3463,7 @@ -(BOOL)loadImageFile:(NSURL*)url
     // show/hide button
     [self updateUIAfterLoad];
     // no need for file table on single files
-    _tableView.hidden = YES;
+    [self hideTables];
     
     self.needsDisplay = YES;
     return YES;

From f76049ce1e823841863e3df14009c17781fae7f4 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 15 Apr 2022 12:41:00 -0700
Subject: [PATCH 285/901] kramv - fix NSTableView typo

---
 kramv/KramViewerMain.mm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 982e6e6e..c251f466 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -2019,7 +2019,7 @@ - (void)hideTables
 
 - (void)showFileTable
 {
-    \// fix broken NSTableView, keeps showing scroll and responding to pan
+    // fix broken NSTableView, keeps showing scroll and responding to pan
     // so set scroll to hidden instead of tables
     NSScrollView* scrollView = [_tableView enclosingScrollView];
     scrollView.hidden = NO;

From c7bba3ef3f1d519dfbbd84a52baf034432ffffcb Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 15 Apr 2022 13:08:27 -0700
Subject: [PATCH 286/901] kramv - remove the shape NSTableView, and rename
 hide/showFileTable

---
 kramv/Base.lproj/Main.storyboard | 61 --------------------------------
 kramv/KramViewerMain.mm          | 18 +++++-----
 2 files changed, 8 insertions(+), 71 deletions(-)

diff --git a/kramv/Base.lproj/Main.storyboard b/kramv/Base.lproj/Main.storyboard
index 49395152..fa4d3926 100644
--- a/kramv/Base.lproj/Main.storyboard
+++ b/kramv/Base.lproj/Main.storyboard
@@ -174,66 +174,6 @@
                         <rect key="frame" x="0.0" y="0.0" width="800" height="600"/>
                         <autoresizingMask key="autoresizingMask"/>
                         <subviews>
-                            <scrollView fixedFrame="YES" borderType="none" autohidesScrollers="YES" horizontalLineScroll="24" horizontalPageScroll="10" verticalLineScroll="24" verticalPageScroll="10" usesPredominantAxisScrolling="NO" translatesAutoresizingMaskIntoConstraints="NO" id="sGH-FI-BDN" userLabel="ShapesScrollView">
-                                <rect key="frame" x="20" y="20" width="207" height="413"/>
-                                <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" heightSizable="YES" flexibleMaxY="YES"/>
-                                <clipView key="contentView" drawsBackground="NO" id="ZHD-QY-oGf">
-                                    <rect key="frame" x="0.0" y="0.0" width="207" height="413"/>
-                                    <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
-                                    <subviews>
-                                        <tableView verticalHuggingPriority="750" allowsExpansionToolTips="YES" columnAutoresizingStyle="lastColumnOnly" multipleSelection="NO" emptySelection="NO" autosaveColumns="NO" rowHeight="24" rowSizeStyle="automatic" viewBased="YES" id="vPC-mQ-zsL" userLabel="ShapesTableView">
-                                            <rect key="frame" x="0.0" y="0.0" width="207" height="413"/>
-                                            <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
-                                            <size key="intercellSpacing" width="17" height="0.0"/>
-                                            <color key="backgroundColor" white="0.0" alpha="0.0" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
-                                            <color key="gridColor" white="0.0" alpha="0.0" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
-                                            <tableColumns>
-                                                <tableColumn editable="NO" width="175" minWidth="40" maxWidth="1000" id="CoU-u5-LiB">
-                                                    <tableHeaderCell key="headerCell" lineBreakMode="truncatingTail" borderStyle="border" alignment="left" title="Archive">
-                                                        <color key="textColor" name="headerTextColor" catalog="System" colorSpace="catalog"/>
-                                                        <color key="backgroundColor" name="headerColor" catalog="System" colorSpace="catalog"/>
-                                                    </tableHeaderCell>
-                                                    <textFieldCell key="dataCell" lineBreakMode="truncatingTail" selectable="YES" title="Text Cell" id="Wiv-qG-tfu">
-                                                        <font key="font" metaFont="system"/>
-                                                        <color key="textColor" name="controlTextColor" catalog="System" colorSpace="catalog"/>
-                                                        <color key="backgroundColor" name="controlBackgroundColor" catalog="System" colorSpace="catalog"/>
-                                                    </textFieldCell>
-                                                    <tableColumnResizingMask key="resizingMask" resizeWithTable="YES" userResizable="YES"/>
-                                                    <prototypeCellViews>
-                                                        <tableCellView id="h00-7l-MSC">
-                                                            <rect key="frame" x="18" y="0.0" width="170" height="24"/>
-                                                            <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
-                                                            <subviews>
-                                                                <textField horizontalHuggingPriority="251" verticalHuggingPriority="750" horizontalCompressionResistancePriority="250" fixedFrame="YES" translatesAutoresizingMaskIntoConstraints="NO" id="vH5-RJ-dMX">
-                                                                    <rect key="frame" x="0.0" y="4" width="170" height="16"/>
-                                                                    <autoresizingMask key="autoresizingMask" widthSizable="YES" flexibleMinY="YES" flexibleMaxY="YES"/>
-                                                                    <textFieldCell key="cell" lineBreakMode="truncatingTail" sendsActionOnEndEditing="YES" title="Table View Cell" id="V0l-MI-gNz">
-                                                                        <font key="font" usesAppearanceFont="YES"/>
-                                                                        <color key="textColor" name="controlTextColor" catalog="System" colorSpace="catalog"/>
-                                                                        <color key="backgroundColor" name="textBackgroundColor" catalog="System" colorSpace="catalog"/>
-                                                                    </textFieldCell>
-                                                                </textField>
-                                                            </subviews>
-                                                            <connections>
-                                                                <outlet property="textField" destination="vH5-RJ-dMX" id="niy-32-PVc"/>
-                                                            </connections>
-                                                        </tableCellView>
-                                                    </prototypeCellViews>
-                                                </tableColumn>
-                                            </tableColumns>
-                                        </tableView>
-                                    </subviews>
-                                    <color key="backgroundColor" white="0.0" alpha="0.0" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
-                                </clipView>
-                                <scroller key="horizontalScroller" hidden="YES" wantsLayer="YES" verticalHuggingPriority="750" horizontal="YES" id="zCQ-Sl-upT">
-                                    <rect key="frame" x="0.0" y="260" width="176" height="16"/>
-                                    <autoresizingMask key="autoresizingMask"/>
-                                </scroller>
-                                <scroller key="verticalScroller" hidden="YES" wantsLayer="YES" verticalHuggingPriority="750" horizontal="NO" id="a9C-f1-G9k">
-                                    <rect key="frame" x="224" y="17" width="15" height="102"/>
-                                    <autoresizingMask key="autoresizingMask"/>
-                                </scroller>
-                            </scrollView>
                             <scrollView fixedFrame="YES" borderType="none" autohidesScrollers="YES" horizontalLineScroll="24" horizontalPageScroll="10" verticalLineScroll="24" verticalPageScroll="10" usesPredominantAxisScrolling="NO" translatesAutoresizingMaskIntoConstraints="NO" id="CPB-x5-bmZ" userLabel="FilesScrollView">
                                 <rect key="frame" x="20" y="20" width="207" height="413"/>
                                 <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" heightSizable="YES" flexibleMaxY="YES"/>
@@ -300,7 +240,6 @@
                             </scrollView>
                         </subviews>
                         <connections>
-                            <outlet property="_shapesTableView" destination="vPC-mQ-zsL" id="j7A-ww-DGP"/>
                             <outlet property="_tableView" destination="Ydb-sa-YEf" id="PjO-jm-pnf"/>
                             <outlet property="_tableViewController" destination="Ivt-wI-wYi" id="l0c-Dj-gAP"/>
                         </connections>
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index c251f466..0fd8284f 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -1981,7 +1981,7 @@ - (void)keyDown:(NSEvent *)theEvent
 
     // for now hit esc to hide the table views
     if (keyCode == Key::Escape) {
-        [self hideTables];
+        [self hideFileTable];
         
         _hudHidden = false;
         [self updateHudVisibility];
@@ -2009,7 +2009,7 @@ - (void)keyDown:(NSEvent *)theEvent
     }
 }
 
-- (void)hideTables
+- (void)hideFileTable
 {
     // fix broken NSTableView, keeps showing scroll and responding to pan
     // so set scroll to hidden instead of tables
@@ -2646,7 +2646,7 @@ - (BOOL)loadArchive:(const char *)zipFilename
     //[self.window makeFirstResponder: _tableView];
     
     // hack to see table
-    [self hideTables];
+    [self hideFileTable];
     
     return YES;
 }
@@ -2674,7 +2674,6 @@ - (BOOL)advanceFileFromAchive:(BOOL)increment
     //[self.window makeFirstResponder: _tableView];
     
     // show the files table
-    [self hideTables];
     [self showFileTable];
     
     // also have to hide hud or it will obscure the visible table
@@ -2707,7 +2706,6 @@ - (BOOL)advanceFileFromFolder:(BOOL)increment
     //[self.window makeFirstResponder: _tableView];
     
     // show the files table
-    [self hideTables];
     [self showFileTable];
     
     _hudHidden = true;
@@ -3151,7 +3149,7 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
             [_tableView selectRowIndexes:[NSIndexSet indexSetWithIndex:_fileFolderIndex] byExtendingSelection:NO];
             [_tableView scrollRowToVisible:_fileFolderIndex];
             
-            [self hideTables];
+            [self hideFileTable];
         }
 
         // now load image from directory
@@ -3309,7 +3307,7 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
     
     // hide table in case last had archive open
     if (success)
-        [self hideTables];
+        [self hideFileTable];
     
     return success;
 }
@@ -3387,7 +3385,7 @@ -(BOOL)loadModelFile:(NSURL*)url filename:(const char*)filename
         _showSettings->isFolder = false;
         
         // no need for file table on single files
-        [self hideTables];
+        [self hideFileTable];
     }
     
     // show the controls
@@ -3463,7 +3461,7 @@ -(BOOL)loadImageFile:(NSURL*)url
     // show/hide button
     [self updateUIAfterLoad];
     // no need for file table on single files
-    [self hideTables];
+    [self hideFileTable];
     
     self.needsDisplay = YES;
     return YES;
@@ -3471,7 +3469,7 @@ -(BOOL)loadImageFile:(NSURL*)url
 
 - (void)setupUI
 {
-    [self hideTables];
+    [self hideFileTable];
 }
 
 - (void)concludeDragOperation:(id)sender

From 9956d29493019f867118184c5d0379a031751f70 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 15 Apr 2022 13:23:44 -0700
Subject: [PATCH 287/901] kram - fix warnings from quoted includes in GLTFKit,
 and fix other warnings.

---
 gtlf/GLTF/Headers/GLTFAccessor.h               | 4 ++--
 gtlf/GLTF/Headers/GLTFAnimation.h              | 4 ++--
 gtlf/GLTF/Headers/GLTFAsset.h                  | 4 ++--
 gtlf/GLTF/Headers/GLTFBuffer.h                 | 2 +-
 gtlf/GLTF/Headers/GLTFBufferAllocator.h        | 2 +-
 gtlf/GLTF/Headers/GLTFBufferView.h             | 4 ++--
 gtlf/GLTF/Headers/GLTFCamera.h                 | 4 ++--
 gtlf/GLTF/Headers/GLTFDefaultBufferAllocator.h | 2 +-
 gtlf/GLTF/Headers/GLTFImage.h                  | 4 ++--
 gtlf/GLTF/Headers/GLTFMaterial.h               | 4 ++--
 gtlf/GLTF/Headers/GLTFMesh.h                   | 4 ++--
 gtlf/GLTF/Headers/GLTFNode.h                   | 6 +++---
 gtlf/GLTF/Headers/GLTFScene.h                  | 6 +++---
 gtlf/GLTF/Headers/GLTFSkin.h                   | 2 +-
 gtlf/GLTF/Headers/GLTFTexture.h                | 4 ++--
 gtlf/GLTF/Headers/GLTFTextureSampler.h         | 4 ++--
 gtlf/GLTF/Headers/GLTFUtilities.h              | 2 +-
 gtlf/GLTF/Headers/GLTFVertexDescriptor.h       | 2 +-
 gtlf/GLTFMTL/Headers/GLTFMTLRenderer.h         | 2 +-
 gtlf/GLTFMTL/Headers/GLTFMTLShaderBuilder.h    | 2 +-
 libkram/kram/KTXImage.cpp                      | 6 +++---
 libkram/kram/Kram.cpp                          | 2 +-
 22 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/gtlf/GLTF/Headers/GLTFAccessor.h b/gtlf/GLTF/Headers/GLTFAccessor.h
index 29ef9363..78f7e1ba 100644
--- a/gtlf/GLTF/Headers/GLTFAccessor.h
+++ b/gtlf/GLTF/Headers/GLTFAccessor.h
@@ -14,8 +14,8 @@
 //  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 //
 
-#import "GLTFObject.h"
-#import "GLTFEnums.h"
+#import <GLTF/GLTFObject.h>
+#import <GLTF/GLTFEnums.h>
 
 @import simd;
 
diff --git a/gtlf/GLTF/Headers/GLTFAnimation.h b/gtlf/GLTF/Headers/GLTFAnimation.h
index b70ab091..0386c0ae 100644
--- a/gtlf/GLTF/Headers/GLTFAnimation.h
+++ b/gtlf/GLTF/Headers/GLTFAnimation.h
@@ -14,8 +14,8 @@
 //  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 //
 
-#import "GLTFObject.h"
-#import "GLTFEnums.h"
+#import <GLTF/GLTFObject.h>
+#import <GLTF/GLTFEnums.h>
 
 NS_ASSUME_NONNULL_BEGIN
 
diff --git a/gtlf/GLTF/Headers/GLTFAsset.h b/gtlf/GLTF/Headers/GLTFAsset.h
index 7ebcb3a7..ce6a2eb9 100644
--- a/gtlf/GLTF/Headers/GLTFAsset.h
+++ b/gtlf/GLTF/Headers/GLTFAsset.h
@@ -16,8 +16,8 @@
 
 @import Foundation;
 
-#import "GLTFObject.h"
-#import "GLTFEnums.h"
+#import <GLTF/GLTFObject.h>
+#import <GLTF/GLTFEnums.h>
 
 NS_ASSUME_NONNULL_BEGIN
 
diff --git a/gtlf/GLTF/Headers/GLTFBuffer.h b/gtlf/GLTF/Headers/GLTFBuffer.h
index 0be3d201..912b210d 100644
--- a/gtlf/GLTF/Headers/GLTFBuffer.h
+++ b/gtlf/GLTF/Headers/GLTFBuffer.h
@@ -14,7 +14,7 @@
 //  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 //
 
-#import "GLTFObject.h"
+#import <GLTF/GLTFObject.h>
 
 NS_ASSUME_NONNULL_BEGIN
 
diff --git a/gtlf/GLTF/Headers/GLTFBufferAllocator.h b/gtlf/GLTF/Headers/GLTFBufferAllocator.h
index 585ae8a1..e04119f6 100644
--- a/gtlf/GLTF/Headers/GLTFBufferAllocator.h
+++ b/gtlf/GLTF/Headers/GLTFBufferAllocator.h
@@ -14,7 +14,7 @@
 //  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 //
 
-#import "GLTFBuffer.h"
+#import <GLTF/GLTFBuffer.h>
 
 @import Foundation;
 
diff --git a/gtlf/GLTF/Headers/GLTFBufferView.h b/gtlf/GLTF/Headers/GLTFBufferView.h
index 2b41ec2f..06c95e95 100644
--- a/gtlf/GLTF/Headers/GLTFBufferView.h
+++ b/gtlf/GLTF/Headers/GLTFBufferView.h
@@ -14,8 +14,8 @@
 //  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 //
 
-#import "GLTFObject.h"
-#import "GLTFEnums.h"
+#import <GLTF/GLTFObject.h>
+#import <GLTF/GLTFEnums.h>
 
 NS_ASSUME_NONNULL_BEGIN
 
diff --git a/gtlf/GLTF/Headers/GLTFCamera.h b/gtlf/GLTF/Headers/GLTFCamera.h
index 2c771157..942b845a 100644
--- a/gtlf/GLTF/Headers/GLTFCamera.h
+++ b/gtlf/GLTF/Headers/GLTFCamera.h
@@ -14,8 +14,8 @@
 //  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 //
 
-#import "GLTFObject.h"
-#import "GLTFEnums.h"
+#import <GLTF/GLTFObject.h>
+#import <GLTF/GLTFEnums.h>
 
 @import simd;
 
diff --git a/gtlf/GLTF/Headers/GLTFDefaultBufferAllocator.h b/gtlf/GLTF/Headers/GLTFDefaultBufferAllocator.h
index dc0078f7..18e8a591 100644
--- a/gtlf/GLTF/Headers/GLTFDefaultBufferAllocator.h
+++ b/gtlf/GLTF/Headers/GLTFDefaultBufferAllocator.h
@@ -16,7 +16,7 @@
 
 @import Foundation;
 
-#import "GLTFBufferAllocator.h"
+#import <GLTF/GLTFBufferAllocator.h>
 
 NS_ASSUME_NONNULL_BEGIN
 
diff --git a/gtlf/GLTF/Headers/GLTFImage.h b/gtlf/GLTF/Headers/GLTFImage.h
index add922e6..118e4cd5 100644
--- a/gtlf/GLTF/Headers/GLTFImage.h
+++ b/gtlf/GLTF/Headers/GLTFImage.h
@@ -14,8 +14,8 @@
 //  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 //
 
-#import "GLTFObject.h"
-#import "GLTFBufferView.h"
+#import <GLTF/GLTFObject.h>
+#import <GLTF/GLTFBufferView.h>
 
 @import Foundation;
 
diff --git a/gtlf/GLTF/Headers/GLTFMaterial.h b/gtlf/GLTF/Headers/GLTFMaterial.h
index 17ec4198..cebe325e 100644
--- a/gtlf/GLTF/Headers/GLTFMaterial.h
+++ b/gtlf/GLTF/Headers/GLTFMaterial.h
@@ -14,8 +14,8 @@
 //  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 //
 
-#import "GLTFObject.h"
-#import "GLTFTexture.h"
+#import <GLTF/GLTFObject.h>
+#import <GLTF/GLTFTexture.h>
 
 @import simd;
 
diff --git a/gtlf/GLTF/Headers/GLTFMesh.h b/gtlf/GLTF/Headers/GLTFMesh.h
index 3bf40f62..2c2eeef4 100644
--- a/gtlf/GLTF/Headers/GLTFMesh.h
+++ b/gtlf/GLTF/Headers/GLTFMesh.h
@@ -14,8 +14,8 @@
 //  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 //
 
-#import "GLTFObject.h"
-#import "GLTFEnums.h"
+#import <GLTF/GLTFObject.h>
+#import <GLTF/GLTFEnums.h>
 
 NS_ASSUME_NONNULL_BEGIN
 
diff --git a/gtlf/GLTF/Headers/GLTFNode.h b/gtlf/GLTF/Headers/GLTFNode.h
index f2df6512..08ddff42 100644
--- a/gtlf/GLTF/Headers/GLTFNode.h
+++ b/gtlf/GLTF/Headers/GLTFNode.h
@@ -14,9 +14,9 @@
 //  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 //
 
-#import "GLTFObject.h"
-#import "GLTFUtilities.h"
-#import "GLTFNodeVisitor.h"
+#import <GLTF/GLTFObject.h>
+#import <GLTF/GLTFUtilities.h>
+#import <GLTF/GLTFNodeVisitor.h>
 
 NS_ASSUME_NONNULL_BEGIN
 
diff --git a/gtlf/GLTF/Headers/GLTFScene.h b/gtlf/GLTF/Headers/GLTFScene.h
index cf5ae848..1f7f397a 100644
--- a/gtlf/GLTF/Headers/GLTFScene.h
+++ b/gtlf/GLTF/Headers/GLTFScene.h
@@ -14,9 +14,9 @@
 //  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 //
 
-#import "GLTFObject.h"
-#import "GLTFUtilities.h"
-#import "GLTFNodeVisitor.h"
+#import <GLTF/GLTFObject.h>
+#import <GLTF/GLTFUtilities.h>
+#import <GLTF/GLTFNodeVisitor.h>
 
 NS_ASSUME_NONNULL_BEGIN
 
diff --git a/gtlf/GLTF/Headers/GLTFSkin.h b/gtlf/GLTF/Headers/GLTFSkin.h
index da289182..e3c1c60c 100644
--- a/gtlf/GLTF/Headers/GLTFSkin.h
+++ b/gtlf/GLTF/Headers/GLTFSkin.h
@@ -15,7 +15,7 @@
 //
 
 @import Foundation;
-#import "GLTFObject.h"
+#import <GLTF/GLTFObject.h>
 
 NS_ASSUME_NONNULL_BEGIN
 
diff --git a/gtlf/GLTF/Headers/GLTFTexture.h b/gtlf/GLTF/Headers/GLTFTexture.h
index 62a24bee..14ab1a6f 100644
--- a/gtlf/GLTF/Headers/GLTFTexture.h
+++ b/gtlf/GLTF/Headers/GLTFTexture.h
@@ -14,8 +14,8 @@
 //  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 //
 
-#import "GLTFObject.h"
-#import "GLTFEnums.h"
+#import <GLTF/GLTFObject.h>
+#import <GLTF/GLTFEnums.h>
 
 @import simd;
 
diff --git a/gtlf/GLTF/Headers/GLTFTextureSampler.h b/gtlf/GLTF/Headers/GLTFTextureSampler.h
index a3b37249..936fdd77 100644
--- a/gtlf/GLTF/Headers/GLTFTextureSampler.h
+++ b/gtlf/GLTF/Headers/GLTFTextureSampler.h
@@ -14,8 +14,8 @@
 //  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 //
 
-#import "GLTFObject.h"
-#import "GLTFEnums.h"
+#import <GLTF/GLTFObject.h>
+#import <GLTF/GLTFEnums.h>
 
 NS_ASSUME_NONNULL_BEGIN
 
diff --git a/gtlf/GLTF/Headers/GLTFUtilities.h b/gtlf/GLTF/Headers/GLTFUtilities.h
index 22528ef4..ea90a6a8 100644
--- a/gtlf/GLTF/Headers/GLTFUtilities.h
+++ b/gtlf/GLTF/Headers/GLTFUtilities.h
@@ -14,7 +14,7 @@
 //  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 //
 
-#import "GLTFEnums.h"
+#import <GLTF/GLTFEnums.h>
 
 @import Foundation;
 @import simd;
diff --git a/gtlf/GLTF/Headers/GLTFVertexDescriptor.h b/gtlf/GLTF/Headers/GLTFVertexDescriptor.h
index a94f96fb..3523d137 100644
--- a/gtlf/GLTF/Headers/GLTFVertexDescriptor.h
+++ b/gtlf/GLTF/Headers/GLTFVertexDescriptor.h
@@ -15,7 +15,7 @@
 //
 
 @import Foundation;
-#import "GLTFEnums.h"
+#import <GLTF/GLTFEnums.h>
 
 NS_ASSUME_NONNULL_BEGIN
 
diff --git a/gtlf/GLTFMTL/Headers/GLTFMTLRenderer.h b/gtlf/GLTFMTL/Headers/GLTFMTLRenderer.h
index d0030b2d..a2a4d04a 100644
--- a/gtlf/GLTFMTL/Headers/GLTFMTLRenderer.h
+++ b/gtlf/GLTFMTL/Headers/GLTFMTLRenderer.h
@@ -15,7 +15,7 @@
 //
 
 #import <GLTF/GLTF.h>
-#import "GLTFMTLTextureLoader.h"
+#import <GLTFMTL/GLTFMTLTextureLoader.h>
 
 @import Foundation;
 @import Metal;
diff --git a/gtlf/GLTFMTL/Headers/GLTFMTLShaderBuilder.h b/gtlf/GLTFMTL/Headers/GLTFMTLShaderBuilder.h
index d78bd96c..30733fc5 100644
--- a/gtlf/GLTFMTL/Headers/GLTFMTLShaderBuilder.h
+++ b/gtlf/GLTFMTL/Headers/GLTFMTLShaderBuilder.h
@@ -15,7 +15,7 @@
 //
 
 #import <GLTF/GLTF.h>
-#import "GLTFMTLUtilities.h"
+#import <GLTFMTL/GLTFMTLUtilities.h>
 
 @import Metal;
 
diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index d08f8721..047b45aa 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -1215,7 +1215,7 @@ bool KTXImage::open(const uint8_t* imageData, size_t imageDataLength, bool isInf
 
     // since KTX1 doesn't have compressed mips, can alias the file data directly
     fileData = imageData;
-    fileDataLength = imageDataLength;
+    fileDataLength = (int32_t)imageDataLength;
 
     // copy out the header, TODO: should make sure bytes exist
     header = *(const KTXHeader*)fileData;
@@ -1496,7 +1496,7 @@ void KTXImage::initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxS
         mipLevels.push_back(level);
     }
 
-    header.numberOfMipmapLevels = mipLevels.size();
+    header.numberOfMipmapLevels = (uint32_t)mipLevels.size();
 
     header.pixelWidth = width;
     header.pixelHeight = height;
@@ -1521,7 +1521,7 @@ void KTXImage::initMipLevels(size_t mipOffset)
     for (uint32_t i = 0; i < numMips; ++i) {
         size_t dataSize = mipLengthCalc(w, h);
 
-        uint32_t levelSize = dataSize * numChunks;
+        uint32_t levelSize = (uint32_t)(dataSize * numChunks);
 
         // TODO: align mip offset to multiple of 4 bytes for KTX1, may need for kTX2
         // make sure when adding up offsets with length to include this padding
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 4e5c803b..e4b19ab0 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -2155,7 +2155,7 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage,
     // for now driving everything off metal type, but should switch to neutral
     MyMTLPixelFormat metalFormat = srcImage.pixelFormat;
 
-    int32_t dataSize = srcImage.fileDataLength;
+    int32_t dataSize = (int32_t)srcImage.fileDataLength;
 
     //string tmp;
     bool isMB = (dataSize > (512 * 1024));

From 9050e43948a0a816a8ebb0928b8ae88d9bceb697 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 15 Apr 2022 13:26:04 -0700
Subject: [PATCH 288/901] kram - more warning fixes, should be at zero

---
 gtlf/GLTF/GLTF.h                            | 2 +-
 gtlf/GLTF/Headers/Extensions/GLTFKHRLight.h | 2 +-
 kramv/KramRenderer.mm                       | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/gtlf/GLTF/GLTF.h b/gtlf/GLTF/GLTF.h
index 0be075b8..6d2807d0 100644
--- a/gtlf/GLTF/GLTF.h
+++ b/gtlf/GLTF/GLTF.h
@@ -14,7 +14,7 @@
 //  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 //
 
-#include "TargetConditionals.h"
+#include <TargetConditionals.h>
 
 #if TARGET_OS_OSX
 @import Cocoa;
diff --git a/gtlf/GLTF/Headers/Extensions/GLTFKHRLight.h b/gtlf/GLTF/Headers/Extensions/GLTFKHRLight.h
index d359064f..45859dbe 100644
--- a/gtlf/GLTF/Headers/Extensions/GLTFKHRLight.h
+++ b/gtlf/GLTF/Headers/Extensions/GLTFKHRLight.h
@@ -14,7 +14,7 @@
 //  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 //
 
-#import "GLTFObject.h"
+#import <GLTF/GLTFObject.h>
 
 @import simd;
 
diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 5cd813a4..b5814bf6 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -375,8 +375,8 @@ - (void)_loadMetalWithView:(nonnull MTKView *)view
     // Important to set color space, or colors are wrong.  Why doesn't one of these work (or the default)
     // false is good for srgb -> rgba16f
     // true is good for non-srgb -> rgba16f
-    bool pickOne = false;
     CGColorSpaceRef viewColorSpace = CGColorSpaceCreateWithName(kCGColorSpaceGenericRGBLinear);
+    //bool pickOne = false;
     // pickOne ? kCGColorSpaceSRGB : kCGColorSpaceLinearSRGB);
     view.colorspace = viewColorSpace;
     

From a5f3a1e93ed8c7ddd9beca04d7ce22c269d9c33b Mon Sep 17 00:00:00 2001
From: Alec Miller <alecazam@users.noreply.github.com>
Date: Sun, 24 Apr 2022 18:16:52 -0700
Subject: [PATCH 289/901] Update README.md

---
 README.md | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 41ab3258..95daaa31 100644
--- a/README.md
+++ b/README.md
@@ -69,6 +69,7 @@ L - advance lighting mode (none, diffuse, diffuse + specular)
 T - toggle tangent generation
 
 ↓ - advance bundle/folder image (can traverse zip of ktx/ktx2 files), displays list, esc to get out of list
+→ - advance counterpart (can see png, then encodes if viewing folders).  Not yet finished.
 
 ```
 
@@ -116,7 +117,7 @@ KTX2 - works in kram and viewer, has aligned levels of mips when uncompressed,
   
 DDS - works in kram and viewer, no mip compression, only BC and explicit formats, extended for ASTC/ETC
   kram/kramv only support newer DX10 style DDS format.  Can view in Preview on macOS too.
-  DDSHelper provides load/save.  Pixel data ordered by chunk instead of by mips.
+  DDSHelper provides load/save.  Pixel data ordered by chunk instead of by mips.  No metadata.
   
 ```
 
@@ -268,12 +269,12 @@ kram includes the following encoders/decoders:
 |----------|------------------|-------------|-----------------------------|---------|
 | BCEnc    | Rich Geldreich   | MIT         | BC1,3,4,5,7                 | same    |
 | Squish   | Simon Brown      | MIT         | BC1,3,4,5                   | same    |
-| ATE      | Apple            | no sources  | BC1,4,5,7 ASTC4x4,8x8 LDR   | all LDR |
+| ATE      | Apple            | no sources  | BC1,4,5,7 ASTC4x4,8x8 LDR   | LDR     |
 | Astcenc  | Arm              | Apache 2.0  | ASTC4x4,5x5,6x6,8x8 LDR/HDR | same    |
 | Etc2comp | Google           | MIT         | ETC2r11,rg11,rgb,rgba       | same    |
 | Explicit | Me               | MIT         | r/rg/rgba 8u/16f/32f        | none    |
 | Compress | AMD              | MIT         | BC6                         | same    |
-| GTLFKit  | Warren Moore     | MIT         | gltf                        | same    |
+| GTLFKit  | Warren Moore     | MIT         | none                        | gltf    |
 
 ```
 ATE
@@ -321,6 +322,7 @@ kram includes additional open-source:
 | mmap universal | Mike Frysinger     | Pub       | mmap on Windows           |
 | zstd           | Yann Collett (FB)  | BSD-2     | KTX2 mip decode           |
 | miniz	         | Rich Gelreich      | Unlicense | bundle support via zip    |
+| gltfKit        | Warren Moore       | MIT       | gltf decoder/renderer     |
 
 #### Open source changes
 
@@ -332,6 +334,7 @@ kram includes additional open-source:
 * mmap universal - may leak a file mapping handle on Win.
 * zstd - using single file version of zstd for decode, disabled encode paths
 * miniz - expose raw data and offset for mmap-ed zip files, disabled writer, disable read crc checks, in .cpp file
+* gltfkit - several warning fixes, changes to support kram texture loader
 
 ## kram unstarted features:
 

From df8ce0db041680830b4b8983690e110f7c857248 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 27 Apr 2022 23:38:14 -0700
Subject: [PATCH 290/901] kram - remove dead astc files, move pointers, and
 bury TaskSystem to .cpp

---
 build2/kram.xcodeproj/project.pbxproj |   6 -
 kramv/KramLoader.mm                   |  24 +--
 kramv/KramRenderer.mm                 | 104 +++++-----
 kramv/KramViewerMain.mm               | 166 ++++++++--------
 libkram/kram/TaskSystem.cpp           | 273 ++++++++++++++++++++++++++
 libkram/kram/TaskSystem.h             | 228 +--------------------
 6 files changed, 428 insertions(+), 373 deletions(-)

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index 218793e2..85574ec7 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -311,8 +311,6 @@
 		70871E0827DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC627DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.cpp */; };
 		70871E0927DDDBCD00D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC727DDDBCD00D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp */; };
 		70871E0A27DDDBCD00D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC727DDDBCD00D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp */; };
-		70871E0B27DDDBCD00D0B9E1 /* astcenc_vecmathlib_neon_armv7_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DC827DDDBCD00D0B9E1 /* astcenc_vecmathlib_neon_armv7_4.h */; };
-		70871E0C27DDDBCD00D0B9E1 /* astcenc_vecmathlib_neon_armv7_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DC827DDDBCD00D0B9E1 /* astcenc_vecmathlib_neon_armv7_4.h */; };
 		708A6A962708CE4700BA5410 /* bc6h_decode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 708A6A8B2708CE4700BA5410 /* bc6h_decode.cpp */; };
 		708A6A972708CE4700BA5410 /* bc6h_decode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 708A6A8B2708CE4700BA5410 /* bc6h_decode.cpp */; };
 		708A6A982708CE4700BA5410 /* bc6h_decode.h in Headers */ = {isa = PBXBuildFile; fileRef = 708A6A8C2708CE4700BA5410 /* bc6h_decode.h */; };
@@ -633,7 +631,6 @@
 		70871DC527DDDBCC00D0B9E1 /* astcenc_platform_isa_detection.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_platform_isa_detection.cpp; sourceTree = "<group>"; };
 		70871DC627DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_diagnostic_trace.cpp; sourceTree = "<group>"; };
 		70871DC727DDDBCD00D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_ideal_endpoints_and_weights.cpp; sourceTree = "<group>"; };
-		70871DC827DDDBCD00D0B9E1 /* astcenc_vecmathlib_neon_armv7_4.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = astcenc_vecmathlib_neon_armv7_4.h; sourceTree = "<group>"; };
 		708A6A8B2708CE4700BA5410 /* bc6h_decode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bc6h_decode.cpp; sourceTree = "<group>"; };
 		708A6A8C2708CE4700BA5410 /* bc6h_decode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bc6h_decode.h; sourceTree = "<group>"; };
 		708A6A8D2708CE4700BA5410 /* bc6h_encode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bc6h_encode.cpp; sourceTree = "<group>"; };
@@ -789,7 +786,6 @@
 				70871DBF27DDDBCC00D0B9E1 /* astcenc_vecmathlib_avx2_8.h */,
 				70871DA727DDDBCC00D0B9E1 /* astcenc_vecmathlib_common_4.h */,
 				70871DBE27DDDBCC00D0B9E1 /* astcenc_vecmathlib_neon_4.h */,
-				70871DC827DDDBCD00D0B9E1 /* astcenc_vecmathlib_neon_armv7_4.h */,
 				70871DC027DDDBCC00D0B9E1 /* astcenc_vecmathlib_none_4.h */,
 				70871DB127DDDBCC00D0B9E1 /* astcenc_vecmathlib_sse_4.h */,
 				70871DC127DDDBCC00D0B9E1 /* astcenc_vecmathlib.h */,
@@ -1268,7 +1264,6 @@
 				706EF01426D15985001C950E /* alpha.h in Headers */,
 				708A6A982708CE4700BA5410 /* bc6h_decode.h in Headers */,
 				706EF01526D15985001C950E /* singlecolourfit.h in Headers */,
-				70871E0B27DDDBCD00D0B9E1 /* astcenc_vecmathlib_neon_armv7_4.h in Headers */,
 				706EF01626D15985001C950E /* maths.h in Headers */,
 				706EF01726D15985001C950E /* colourset.h in Headers */,
 				708A6AA42708CE4700BA5410 /* bc6h_utils.h in Headers */,
@@ -1357,7 +1352,6 @@
 				706EF18E26D166C5001C950E /* alpha.h in Headers */,
 				708A6A992708CE4700BA5410 /* bc6h_decode.h in Headers */,
 				706EF18F26D166C5001C950E /* singlecolourfit.h in Headers */,
-				70871E0C27DDDBCD00D0B9E1 /* astcenc_vecmathlib_neon_armv7_4.h in Headers */,
 				706EF19026D166C5001C950E /* maths.h in Headers */,
 				706EF19126D166C5001C950E /* colourset.h in Headers */,
 				708A6AA52708CE4700BA5410 /* bc6h_utils.h in Headers */,
diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index 854e426e..9cb54974 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -45,12 +45,12 @@
 @implementation KramLoader {
     // only one of these for now
     id<MTLBuffer> _buffer;
-    uint8_t *_data;
+    uint8_t* _data;
     uint32_t _bufferOffset;
 
     vector<KramBlit> _blits;
-    NSMutableArray<id<MTLTexture>> *_blitTextures;
-    NSMutableArray<id<MTLTexture>> *_mipgenTextures;
+    NSMutableArray<id<MTLTexture>>* _blitTextures;
+    NSMutableArray<id<MTLTexture>>* _mipgenTextures;
 }
 
 - (instancetype)init
@@ -278,10 +278,10 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format)
 }
 
 - (BOOL)loadImageFromURL:(nonnull NSURL *)url
-                   image:(KTXImage &)image
-               imageData:(KTXImageData &)imageData
+                   image:(KTXImage&)image
+               imageData:(KTXImageData&)imageData
 {
-    const char *path = url.absoluteURL.path.UTF8String;
+    const char* path = url.absoluteURL.path.UTF8String;
     if (!imageData.open(path, image)) {
         return NO;
     }
@@ -289,9 +289,9 @@ - (BOOL)loadImageFromURL:(nonnull NSURL *)url
     return YES;
 }
 
-- (nullable id<MTLTexture>)loadTextureFromURL:(nonnull NSURL *)url
+- (nullable id<MTLTexture>)loadTextureFromURL:(nonnull NSURL*)url
                                originalFormat:
-                                   (nullable MTLPixelFormat *)originalFormat
+                                   (nullable MTLPixelFormat*)originalFormat
 {
     KTXImage image;
     KTXImageData imageData;
@@ -306,7 +306,7 @@ - (BOOL)loadImageFromURL:(nonnull NSURL *)url
 - (nullable id<MTLTexture>)createTexture:(const KTXImage &)image
                                isPrivate:(bool)isPrivate
 {
-    MTLTextureDescriptor *textureDescriptor = [[MTLTextureDescriptor alloc] init];
+    MTLTextureDescriptor* textureDescriptor = [[MTLTextureDescriptor alloc] init];
 
     // Indicate that each pixel has a blue, green, red, and alpha channel, where
     // each channel is an 8-bit unsigned normalized value (i.e. 0 maps to 0.0 and
@@ -375,7 +375,7 @@ - (void)uploadTexturesIfNeeded:(id<MTLBlitCommandEncoder>)blitEncoder
     
     if (!_blits.empty()) {
         // now upload from staging MTLBuffer to private MTLTexture
-        for (const auto &blit : _blits) {
+        for (const auto& blit : _blits) {
             MTLRegion region = {
                 {0, 0, 0},                                   // MTLOrigin
                 {(NSUInteger)blit.w, (NSUInteger)blit.h, 1}  // MTLSize
@@ -504,8 +504,8 @@ inline uint64_t alignOffset(uint64_t offset, uint64_t alignment)
     size_t blockSize = image.blockSize();
 
     vector<uint64_t> bufferOffsets;
-    uint8_t *bufferData = (uint8_t *)_buffer.contents;
-    const uint8_t *mipData = (const uint8_t *)image.fileData;
+    uint8_t* bufferData = (uint8_t*)_buffer.contents;
+    const uint8_t* mipData = (const uint8_t*)image.fileData;
     bufferOffsets.resize(image.mipLevels.size());
 
     uint32_t numChunks = image.totalChunks();
diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index b5814bf6..260cbf6a 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -135,7 +135,7 @@ @implementation Renderer {
     id<MTLDepthStencilState> _depthStateFull;
     id<MTLDepthStencilState> _depthStateNone;
 
-    MTLVertexDescriptor *_mtlVertexDescriptor;
+    MTLVertexDescriptor* _mtlVertexDescriptor;
 
     // TODO: Array< id<MTLTexture> > _textures;
     id<MTLTexture> _colorMap;
@@ -173,31 +173,31 @@ @implementation Renderer {
     float4x4 _modelMatrix3D;
 
     // float _rotation;
-    KramLoader *_loader;
-    MTKMesh *_mesh;
+    KramLoader* _loader;
+    MTKMesh* _mesh;
 
     MDLVertexDescriptor *_mdlVertexDescriptor;
 
-    MTKMesh *_meshRect;
-    MTKMesh *_meshBox;
-    MTKMesh *_meshSphere;
-    MTKMesh *_meshSphereMirrored;
-    // MTKMesh *_meshCylinder;
-    MTKMesh *_meshCapsule;
+    MTKMesh* _meshRect;
+    MTKMesh* _meshBox;
+    MTKMesh* _meshSphere;
+    MTKMesh* _meshSphereMirrored;
+    // MTKMesh* _meshCylinder;
+    MTKMesh* _meshCapsule;
     MTKMeshBufferAllocator *_metalAllocator;
 
     id<MTLLibrary> _shaderLibrary;
-    NSURL *_metallibFileURL;
-    NSDate *_metallibFileDate;
+    NSURL* _metallibFileURL;
+    NSDate* _metallibFileDate;
     ViewFramebufferData _viewFramebuffer;
 
-    ShowSettings *_showSettings;
+    ShowSettings* _showSettings;
     
 #if USE_GLTF
     KramGLTFTextureLoader* _textureLoader;
     id<GLTFBufferAllocator> _bufferAllocator;
     GLTFMTLRenderer* _gltfRenderer;
-    GLTFAsset *_asset; // only 1 for now
+    GLTFAsset* _asset; // only 1 for now
     double _animationTime;
     
     id<MTLTexture> _environmentTexture;
@@ -436,8 +436,8 @@ - (BOOL)hotloadShaders:(const char *)filename
     _metallibFileURL =
         [NSURL fileURLWithPath:[NSString stringWithUTF8String:filename]];
 
-    NSError *err = nil;
-    NSDate *fileDate = nil;
+    NSError* err = nil;
+    NSDate* fileDate = nil;
     [_metallibFileURL getResourceValue:&fileDate
                                 forKey:NSURLContentModificationDateKey
                                  error:&err];
@@ -478,8 +478,8 @@ - (BOOL)hotloadShaders:(const char *)filename
 
 - (id<MTLComputePipelineState>)_createComputePipeline:(const char *)name
 {
-    NSString *nameNS = [NSString stringWithUTF8String:name];
-    NSError *error = nil;
+    NSString* nameNS = [NSString stringWithUTF8String:name];
+    NSError* error = nil;
     id<MTLFunction> computeFunction = [_shaderLibrary newFunctionWithName:nameNS];
 
     id<MTLComputePipelineState> pipe;
@@ -514,13 +514,13 @@ - (void)_createComputePipelines
 - (id<MTLRenderPipelineState>)_createRenderPipeline:(const char *)vs
                                                  fs:(const char *)fs
 {
-    NSString *vsNameNS = [NSString stringWithUTF8String:vs];
-    NSString *fsNameNS = [NSString stringWithUTF8String:fs];
+    NSString* vsNameNS = [NSString stringWithUTF8String:vs];
+    NSString* fsNameNS = [NSString stringWithUTF8String:fs];
 
     id<MTLFunction> vertexFunction;
     id<MTLFunction> fragmentFunction;
 
-    MTLRenderPipelineDescriptor *pipelineStateDescriptor =
+    MTLRenderPipelineDescriptor* pipelineStateDescriptor =
         [[MTLRenderPipelineDescriptor alloc] init];
     pipelineStateDescriptor.label = fsNameNS;
     pipelineStateDescriptor.sampleCount = _viewFramebuffer.sampleCount;
@@ -538,7 +538,7 @@ - (void)_createComputePipelines
     pipelineStateDescriptor.stencilAttachmentPixelFormat =
         _viewFramebuffer.depthStencilPixelFormat;
 
-    NSError *error = NULL;
+    NSError* error = NULL;
 
     //-----------------------
 
@@ -587,7 +587,7 @@ - (void)_createSampleRender
 {
     {
         // writing to this texture
-        MTLTextureDescriptor *textureDesc = [MTLTextureDescriptor
+        MTLTextureDescriptor* textureDesc = [MTLTextureDescriptor
             texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA32Float
                                          width:1
                                         height:1
@@ -601,7 +601,7 @@ - (void)_createSampleRender
     {
         // this must match drawable format due to using a blit to copy pixel out of
         // drawable
-        MTLTextureDescriptor *textureDesc = [MTLTextureDescriptor
+        MTLTextureDescriptor* textureDesc = [MTLTextureDescriptor
             texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA16Float
                                          width:1
                                         height:1
@@ -618,7 +618,7 @@ - (MTKMesh *)_createMeshAsset:(const char *)name
                       mdlMesh:(MDLMesh *)mdlMesh
                      doFlipUV:(bool)doFlipUV
 {
-    NSError *error = nil;
+    NSError* error = nil;
 
     mdlMesh.vertexDescriptor = _mdlVertexDescriptor;
 
@@ -628,9 +628,9 @@ - (MTKMesh *)_createMeshAsset:(const char *)name
     // flip the u coordinate
     if (doFlipUV) {
         id<MDLMeshBuffer> uvs = mdlMesh.vertexBuffers[BufferIndexMeshUV0];
-        MDLMeshBufferMap *uvsMap = [uvs map];
+        MDLMeshBufferMap* uvsMap = [uvs map];
 
-        packed_float2 *uvData = (packed_float2 *)uvsMap.bytes;
+        packed_float2* uvData = (packed_float2 *)uvsMap.bytes;
 
         for (uint32_t i = 0; i < mdlMesh.vertexCount; ++i) {
             auto &uv = uvData[i];
@@ -650,8 +650,8 @@ - (MTKMesh *)_createMeshAsset:(const char *)name
     bool doFlipBitangent = true;
     if (doFlipBitangent) {
         id<MDLMeshBuffer> uvs = mdlMesh.vertexBuffers[BufferIndexMeshTangent];
-        MDLMeshBufferMap *uvsMap = [uvs map];
-        packed_float4 *uvData = (packed_float4 *)uvsMap.bytes;
+        MDLMeshBufferMap* uvsMap = [uvs map];
+        packed_float4* uvData = (packed_float4 *)uvsMap.bytes;
 
         for (uint32_t i = 0; i < mdlMesh.vertexCount; ++i) {
             //            if (uvData[i].w != -1.0f && uvData[i].w != 1.0f) {
@@ -664,7 +664,7 @@ - (MTKMesh *)_createMeshAsset:(const char *)name
     }
 
     // now set it into mtk mesh
-    MTKMesh *mesh = [[MTKMesh alloc] initWithMesh:mdlMesh
+    MTKMesh* mesh = [[MTKMesh alloc] initWithMesh:mdlMesh
                                            device:_device
                                             error:&error];
     mesh.name = [NSString stringWithUTF8String:name];
@@ -673,10 +673,10 @@ - (MTKMesh *)_createMeshAsset:(const char *)name
     // These don't seem to appear as the buffer name that is suballocated from
     {
         // name the vertex range on the vb
-        MTKMeshBuffer *pos = mesh.vertexBuffers[BufferIndexMeshPosition];
-        MTKMeshBuffer *uvs = mesh.vertexBuffers[BufferIndexMeshUV0];
-        MTKMeshBuffer *normals = mesh.vertexBuffers[BufferIndexMeshNormal];
-        MTKMeshBuffer *tangents = mesh.vertexBuffers[BufferIndexMeshTangent];
+        MTKMeshBuffer* pos = mesh.vertexBuffers[BufferIndexMeshPosition];
+        MTKMeshBuffer* uvs = mesh.vertexBuffers[BufferIndexMeshUV0];
+        MTKMeshBuffer* normals = mesh.vertexBuffers[BufferIndexMeshNormal];
+        MTKMeshBuffer* tangents = mesh.vertexBuffers[BufferIndexMeshTangent];
 
         [pos.buffer addDebugMarker:@"Pos"
                              range:NSMakeRange(pos.offset, pos.length)];
@@ -689,7 +689,7 @@ - (MTKMesh *)_createMeshAsset:(const char *)name
 
         // This seems to already be named "ellisoid-Indices",
         // need to do for ib as well
-        for (MTKSubmesh *submesh in mesh.submeshes) {
+        for (MTKSubmesh* submesh in mesh.submeshes) {
             [submesh.indexBuffer.buffer
                 addDebugMarker:mesh.name
                          range:NSMakeRange(submesh.indexBuffer.offset,
@@ -870,7 +870,7 @@ - (void)_loadAssets
 {
     /// Load assets into metal objects
 
-    MDLMesh *mdlMesh;
+    MDLMesh* mdlMesh;
 
     mdlMesh = [MDLMesh newBoxWithDimensions:(vector_float3){1, 1, 1}
                                    segments:(vector_uint3){1, 1, 1}
@@ -905,13 +905,13 @@ - (void)_loadAssets
 
         id<MDLMeshBuffer> posBuffer =
             mdlMesh.vertexBuffers[BufferIndexMeshPosition];
-        MDLMeshBufferMap *posMap = [posBuffer map];
-        packed_float3 *posData = (packed_float3 *)posMap.bytes;
+        MDLMeshBufferMap* posMap = [posBuffer map];
+        packed_float3* posData = (packed_float3 *)posMap.bytes;
 
         id<MDLMeshBuffer> normalBuffer =
             mdlMesh.vertexBuffers[BufferIndexMeshNormal];
-        MDLMeshBufferMap *normalsMap = [normalBuffer map];
-        packed_float3 *normalData = (packed_float3 *)normalsMap.bytes;
+        MDLMeshBufferMap* normalsMap = [normalBuffer map];
+        packed_float3* normalData = (packed_float3 *)normalsMap.bytes;
 
         // vertexCount reports 306, but vertex 289+ are garbage
         uint32_t numVertices = 289;  // mdlMesh.vertexCount
@@ -963,20 +963,20 @@ - (void)_loadAssets
         mdlMesh.vertexDescriptor = _mdlVertexDescriptor;
 
         id<MDLMeshBuffer> uvsBuffer = mdlMesh.vertexBuffers[BufferIndexMeshUV0];
-        MDLMeshBufferMap *uvsMap = [uvsBuffer map];
-        packed_float2 *uvData = (packed_float2 *)uvsMap.bytes;
+        MDLMeshBufferMap* uvsMap = [uvsBuffer map];
+        packed_float2* uvData = (packed_float2 *)uvsMap.bytes;
 
         // this is all aos
 
         id<MDLMeshBuffer> posBuffer =
             mdlMesh.vertexBuffers[BufferIndexMeshPosition];
-        MDLMeshBufferMap *posMap = [posBuffer map];
+        MDLMeshBufferMap* posMap = [posBuffer map];
         packed_float3 *posData = (packed_float3 *)posMap.bytes;
 
         id<MDLMeshBuffer> normalsBuffe =
             mdlMesh.vertexBuffers[BufferIndexMeshNormal];
-        MDLMeshBufferMap *normalsMap = [normalsBuffe map];
-        packed_float3 *normalData = (packed_float3 *)normalsMap.bytes;
+        MDLMeshBufferMap* normalsMap = [normalsBuffe map];
+        packed_float3* normalData = (packed_float3 *)normalsMap.bytes;
 
         // vertexCount reports 306, but vertex 289+ are garbage
         uint32_t numVertices = 289;  // mdlMesh.vertexCount
@@ -1168,8 +1168,8 @@ - (BOOL)loadTexture:(nonnull NSURL *)url
     string fullFilename = url.path.UTF8String;
 
     // can use this to pull, or use fstat on FileHelper
-    NSDate *fileDate = nil;
-    NSError *error = nil;
+    NSDate* fileDate = nil;
+    NSError* error = nil;
     [url getResourceValue:&fileDate
                    forKey:NSURLContentModificationDateKey
                     error:&error];
@@ -1813,8 +1813,8 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
                 _animationTime += 1.0/60.0;
     
                 NSTimeInterval maxAnimDuration = 0;
-                for (GLTFAnimation *animation in _asset.animations) {
-                    for (GLTFAnimationChannel *channel in animation.channels) {
+                for (GLTFAnimation* animation in _asset.animations) {
+                    for (GLTFAnimationChannel* channel in animation.channels) {
                         if (channel.duration > maxAnimDuration) {
                             maxAnimDuration = channel.duration;
                         }
@@ -1823,7 +1823,7 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
             
                 NSTimeInterval animTime = fmod(_animationTime, maxAnimDuration);
     
-                for (GLTFAnimation *animation in _asset.animations) {
+                for (GLTFAnimation* animation in _asset.animations) {
                     [animation runAtTime:animTime];
                 }
             }
@@ -1962,7 +1962,7 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
                 // use exisiting lod, and mip
                 [renderEncoder setFragmentSamplerState:sampler atIndex:SamplerIndexColor];
 
-                for (MTKSubmesh *submesh in _mesh.submeshes) {
+                for (MTKSubmesh* submesh in _mesh.submeshes) {
                     [renderEncoder drawIndexedPrimitives:submesh.primitiveType
                                               indexCount:submesh.indexCount
                                                indexType:submesh.indexType
@@ -2033,7 +2033,7 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
                         // and mips on on screen faces and arrays and slices go across in a
                         // row, and mips are displayed down from each of those in a column
 
-                        for (MTKSubmesh *submesh in _mesh.submeshes) {
+                        for (MTKSubmesh* submesh in _mesh.submeshes) {
                             [renderEncoder drawIndexedPrimitives:submesh.primitiveType
                                                       indexCount:submesh.indexCount
                                                        indexType:submesh.indexType
@@ -2068,7 +2068,7 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
                 // mips on on screen faces and arrays and slices go across in a row, and
                 // mips are displayed down from each of those in a column
 
-                for (MTKSubmesh *submesh in _mesh.submeshes) {
+                for (MTKSubmesh* submesh in _mesh.submeshes) {
                     [renderEncoder drawIndexedPrimitives:submesh.primitiveType
                                               indexCount:submesh.indexCount
                                                indexType:submesh.indexType
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 0fd8284f..b372e5b6 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -250,8 +250,8 @@ - (NSInteger)numberOfRowsInTableView:(NSTableView *)tableView
 // NSTableViewDelegate
 -(NSView *)tableView:(NSTableView *)tableView viewForTableColumn:(NSTableColumn *)tableColumn row:(NSInteger)row
 {
-    NSString *identifier = tableColumn.identifier;
-    NSTableCellView *cell = [tableView makeViewWithIdentifier:identifier owner:self];
+    NSString* identifier = tableColumn.identifier;
+    NSTableCellView* cell = [tableView makeViewWithIdentifier:identifier owner:self];
     cell.textField.stringValue = [self.items objectAtIndex:row];
     return cell;
 }
@@ -337,8 +337,8 @@ - (BOOL)readFromURL:(nonnull NSURL *)url
     // TODO: This is only getting called on first open on macOS 12.0 even with hack below.
     // find out why.
     
-    NSApplication *app = [NSApplication sharedApplication];
-    MyMTKView *view = app.mainWindow.contentView;
+    NSApplication* app = [NSApplication sharedApplication];
+    MyMTKView* view = app.mainWindow.contentView;
     BOOL success = [view loadTextureFromURL:url];
     if (success) {
         // Note: if I return NO from this call then a dialog pops up that image
@@ -398,7 +398,7 @@ - (void)application:(NSApplication *)sender
     // TODO: also do an overlapping diff if two files are dropped with same
     // dimensions.
 
-    NSURL *url = urls.firstObject;
+    NSURL* url = urls.firstObject;
     [view loadTextureFromURL:url];
     [view fixupDocumentList];
 }
@@ -408,7 +408,7 @@ - (void)application:(NSApplication *)sender
 - (IBAction)showAboutDialog:(id)sender
 {
     // calls openDocumentWithContentsOfURL above
-    NSMutableDictionary<NSAboutPanelOptionKey, id> *options =
+    NSMutableDictionary<NSAboutPanelOptionKey, id>* options =
         [[NSMutableDictionary alloc] init];
 
     // name and icon are already supplied
@@ -418,7 +418,7 @@ - (IBAction)showAboutDialog:(id)sender
         [NSString stringWithUTF8String:"kram ©2020-2022 by Alec Miller"];
 
     // add a link to kram website, skip the Visit text
-    NSMutableAttributedString *str = [[NSMutableAttributedString alloc]
+    NSMutableAttributedString* str = [[NSMutableAttributedString alloc]
         initWithString:@"https://github.com/alecazam/kram"];
     [str addAttribute:NSLinkAttributeName
                 value:@"https://github.com/alecazam/kram"
@@ -442,7 +442,7 @@ - (IBAction)showAboutDialog:(id)sender
     options[NSAboutPanelOptionCredits] = str;
 
     // skip the v character
-    const char *version = KRAM_VERSION;
+    const char* version = KRAM_VERSION;
     version += 1;
 
     // this is the build version, should be github hash?
@@ -465,19 +465,19 @@ - (IBAction)showAboutDialog:(id)sender
 NSArray<NSString *> *pasteboardTypes = @[ NSPasteboardTypeFileURL ];
 
 @implementation MyMTKView {
-    NSMenu *_viewMenu;  // really the items
-    NSStackView *_buttonStack;
-    NSMutableArray<NSButton *> *_buttonArray;
-    NSTextField *_hudLabel;
-    NSTextField *_hudLabel2;
+    NSMenu* _viewMenu;  // really the items
+    NSStackView* _buttonStack;
+    NSMutableArray<NSButton *>* _buttonArray;
+    NSTextField* _hudLabel;
+    NSTextField* _hudLabel2;
     
     // Offer list of files in archives
     // TODO: move to NSOutlineView since that can show archive folders with content inside
-    IBOutlet NSTableView *_tableView;
-    IBOutlet TableViewController *_tableViewController;
+    IBOutlet NSTableView* _tableView;
+    IBOutlet TableViewController* _tableViewController;
     
     vector<string> _textSlots;
-    ShowSettings *_showSettings;
+    ShowSettings* _showSettings;
 
     // allow zip files to be dropped and opened, and can advance through bundle
     // content
@@ -617,15 +617,15 @@ -(void)fixupDocumentList
    
     // Clear the document list so readFromURL keeps getting called
     // Can't remove currentDoc, so have to skip that
-    NSDocumentController *dc = [NSDocumentController sharedDocumentController];
-    NSDocument *currentDoc = dc.currentDocument;
-    NSMutableArray *docsToRemove = [[NSMutableArray alloc] init];
-    for (NSDocument *doc in dc.documents) {
+    NSDocumentController* dc = [NSDocumentController sharedDocumentController];
+    NSDocument* currentDoc = dc.currentDocument;
+    NSMutableArray* docsToRemove = [[NSMutableArray alloc] init];
+    for (NSDocument* doc in dc.documents) {
         if (doc != currentDoc)
             [docsToRemove addObject:doc];
     }
 
-    for (NSDocument *doc in docsToRemove) {
+    for (NSDocument* doc in docsToRemove) {
         [dc removeDocument:doc];
     }
 }
@@ -731,17 +731,17 @@ - (NSStackView *)_addButtons
 
     int32_t numActions = ArrayCount(actions);
     
-    NSMutableArray *buttons = [[NSMutableArray alloc] init];
+    NSMutableArray* buttons = [[NSMutableArray alloc] init];
 
     for (int32_t i = 0; i < numActions; ++i) {
         Action& action = actions[i];
-        const char *icon = action.icon;
-        const char *tip = action.tip;
+        const char* icon = action.icon;
+        const char* tip = action.tip;
 
-        NSString *name = [NSString stringWithUTF8String:icon];
-        NSString *toolTip = [NSString stringWithUTF8String:tip];
+        NSString* name = [NSString stringWithUTF8String:icon];
+        NSString* toolTip = [NSString stringWithUTF8String:tip];
 
-        NSButton *button = nil;
+        NSButton* button = nil;
 
         button = [NSButton buttonWithTitle:name
                                     target:self
@@ -814,7 +814,7 @@ - (NSStackView *)_addButtons
         [buttons addObject:button];
     }
 
-    NSStackView *stackView = [NSStackView stackViewWithViews:buttons];
+    NSStackView* stackView = [NSStackView stackViewWithViews:buttons];
     stackView.orientation = NSUserInterfaceLayoutOrientationVertical;
     stackView.detachesHiddenViews =
         YES;  // default, but why have to have _buttonArrary
@@ -822,10 +822,10 @@ - (NSStackView *)_addButtons
 
     // Want menus, so user can define their own shortcuts to commands
     // Also need to enable/disable this via validateUserInterfaceItem
-    NSApplication *app = [NSApplication sharedApplication];
+    NSApplication* app = [NSApplication sharedApplication];
 
-    NSMenu *mainMenu = app.mainMenu;
-    NSMenuItem *viewMenuItem = mainMenu.itemArray[2];
+    NSMenu* mainMenu = app.mainMenu;
+    NSMenuItem* viewMenuItem = mainMenu.itemArray[2];
     _viewMenu = viewMenuItem.submenu;
 
     // TODO: add a view menu in the storyboard
@@ -834,11 +834,11 @@ - (NSStackView *)_addButtons
 
     for (int32_t i = 0; i < numActions; ++i) {
         Action& action = actions[i];
-        const char *icon = action.icon;  // single char
-        const char *title = action.tip;
+        const char* icon = action.icon;  // single char
+        const char* title = action.tip;
 
-        NSString *toolTip = [NSString stringWithUTF8String:icon];
-        NSString *name = [NSString stringWithUTF8String:title];
+        NSString* toolTip = [NSString stringWithUTF8String:icon];
+        NSString* name = [NSString stringWithUTF8String:title];
         bool isSeparator = icon[0] == 0;
         
         if (isSeparator) {
@@ -846,9 +846,9 @@ - (NSStackView *)_addButtons
         }
         else {
             // NSString *shortcut = @"";  // for now, or AppKit turns key int cmd+shift+key
-            NSString *shortcut = [NSString stringWithUTF8String:icon];
+            NSString* shortcut = [NSString stringWithUTF8String:icon];
             
-            NSMenuItem *menuItem =
+            NSMenuItem* menuItem =
                 [[NSMenuItem alloc] initWithTitle:name
                                            action:@selector(handleAction:)
                                     keyEquivalent:shortcut];
@@ -880,7 +880,7 @@ - (NSStackView *)_addButtons
     // copy all of them to a vector, and then assign the action ptrs
     for (int32_t i = 0; i < numActions; ++i) {
         Action& action = actions[i];
-        const char *icon = action.icon;  // single char
+        const char* icon = action.icon;  // single char
         
         // skip separators
         bool isSeparator = icon[0] == 0;
@@ -917,7 +917,7 @@ - (NSTextField *)_addHud:(BOOL)isShadow
     uint32_t h = 1220;
     
     // add a label for the hud
-    NSTextField *label = [[MyNSTextField alloc]
+    NSTextField* label = [[MyNSTextField alloc]
         initWithFrame:NSMakeRect(isShadow ? 21 : 20, isShadow ? 21 : 20, w,
                                  h)];
     
@@ -954,7 +954,7 @@ - (NSTextField *)_addHud:(BOOL)isShadow
 - (void)doZoomMath:(float)newZoom newPan:(float2 &)newPan
 {
     // transform the cursor to texture coordinate, or clamped version if outside
-    Renderer *renderer = (Renderer *)self.delegate;
+    Renderer* renderer = (Renderer *)self.delegate;
     float4x4 projectionViewModelMatrix =
         [renderer computeImageTransform:_showSettings->panX
                                    panY:_showSettings->panY
@@ -1084,7 +1084,7 @@ -(void)updateZoom:(float)zoom
     float4 bottomLeftCorner = float4m(-0.5 * ar, -0.5f, 0.0f, 1.0f);
     float4 topRightCorner = float4m(0.5 * ar, 0.5f, 0.0f, 1.0f);
 
-    Renderer *renderer = (Renderer *)self.delegate;
+    Renderer* renderer = (Renderer *)self.delegate;
     float4x4 newMatrix = [renderer computeImageTransform:_showSettings->panX
                                                     panY:_showSettings->panY
                                                     zoom:zoom];
@@ -1299,7 +1299,7 @@ - (void)updateEyedropper
     }
 
     // don't wait on renderer to update this matrix
-    Renderer *renderer = (Renderer *)self.delegate;
+    Renderer* renderer = (Renderer *)self.delegate;
 
     if (_showSettings->isEyedropperFromDrawable()) {
         // this only needs the cursor location, but can't supply uv to
@@ -1722,7 +1722,7 @@ - (void)scrollWheel:(NSEvent *)event
 
 - (void)updatePan:(float)panX panY:(float)panY
 {
-    Renderer *renderer = (Renderer *)self.delegate;
+    Renderer* renderer = (Renderer *)self.delegate;
     float4x4 projectionViewModelMatrix =
         [renderer computeImageTransform:panX
                                    panY:panY
@@ -1943,12 +1943,12 @@ - (void)updateUIControlState
 
 - (IBAction)handleAction:(id)sender
 {
-    NSEvent *theEvent = [NSApp currentEvent];
+    NSEvent* theEvent = [NSApp currentEvent];
     bool isShiftKeyDown = (theEvent.modifierFlags & NSEventModifierFlagShift);
 
     const Action* action = nullptr;
     if ([sender isKindOfClass:[NSButton class]]) {
-        NSButton *button = (NSButton *)sender;
+        NSButton* button = (NSButton *)sender;
         for (const auto& search: _actions) {
             if (search.button == button) {
                 action = &search;
@@ -1957,7 +1957,7 @@ - (IBAction)handleAction:(id)sender
         }
     }
     else if ([sender isKindOfClass:[NSMenuItem class]]) {
-        NSMenuItem *menuItem = (NSMenuItem *)sender;
+        NSMenuItem* menuItem = (NSMenuItem *)sender;
         for (const auto& search: _actions) {
             if (search.menuItem == menuItem) {
                 action = &search;
@@ -2561,30 +2561,30 @@ - (BOOL)prepareForDragOperation:(id)sender
 
 - (BOOL)performDragOperation:(id)sender
 {
-    NSPasteboard *pasteboard = [sender draggingPasteboard];
+    NSPasteboard* pasteboard = [sender draggingPasteboard];
 
-    NSString *desiredType = [pasteboard availableTypeFromArray:pasteboardTypes];
+    NSString* desiredType = [pasteboard availableTypeFromArray:pasteboardTypes];
 
     if ([desiredType isEqualToString:NSPasteboardTypeFileURL]) {
         // TODO: use readObjects to drag multiple files onto one view
         // load one mip of all those, use smaller mips for thumbnail
 
         // the pasteboard contains a list of filenames
-        NSString *urlString =
+        NSString* urlString =
             [pasteboard propertyListForType:NSPasteboardTypeFileURL];
 
         // this turns it into a real path (supposedly works even with sandbox)
-        NSURL *url = [NSURL URLWithString:urlString];
+        NSURL* url = [NSURL URLWithString:urlString];
 
         // convert the original path and then back to a url, otherwise reload fails
         // when this file is replaced.
-        const char *filename = url.fileSystemRepresentation;
+        const char* filename = url.fileSystemRepresentation;
         if (filename == nullptr) {
             KLOGE("kramv", "Fix this drop url returning nil issue");
             return NO;
         }
 
-        NSString *filenameString = [NSString stringWithUTF8String:filename];
+        NSString* filenameString = [NSString stringWithUTF8String:filename];
 
         url = [NSURL fileURLWithPath:filenameString];
 
@@ -2633,7 +2633,7 @@ - (BOOL)loadArchive:(const char *)zipFilename
     // copy names into the files view
     [_tableViewController.items removeAllObjects];
     for (const auto& entry: _zip.zipEntrys()) {
-        const char *filenameShort = toFilenameShort(entry.filename);
+        const char* filenameShort = toFilenameShort(entry.filename);
         [_tableViewController.items addObject: [NSString stringWithUTF8String: filenameShort]];
     }
     [_tableView reloadData];
@@ -2761,7 +2761,7 @@ static string findNormalMapFromAlbedoFilename(const char* filename)
 {
     string filenameShort = filename;
     
-    const char *ext = strrchr(filename, '.');
+    const char* ext = strrchr(filename, '.');
 
     auto dotPos = filenameShort.find_last_of(".");
     if (dotPos == string::npos)
@@ -2791,7 +2791,7 @@ static string findNormalMapFromAlbedoFilename(const char* filename)
 - (BOOL)loadFileFromFolder
 {
     // now lookup the filename and data at that entry
-    const char *filename = _folderFiles[_fileFolderIndex].c_str();
+    const char* filename = _folderFiles[_fileFolderIndex].c_str();
     string fullFilename = filename;
     auto timestamp = FileHelper::modificationTimestamp(filename);
     
@@ -2843,7 +2843,7 @@ - (BOOL)loadFileFromFolder
         }
     }
     
-    Renderer *renderer = (Renderer *)self.delegate;
+    Renderer* renderer = (Renderer *)self.delegate;
     [renderer releaseAllPendingTextures];
     
     if (![renderer loadTextureFromImage:fullFilename.c_str()
@@ -2857,7 +2857,7 @@ - (BOOL)loadFileFromFolder
     //-------------------------------
 
     // set title to filename, chop this to just file+ext, not directory
-    const char *filenameShort = strrchr(filename, '/');
+    const char* filenameShort = strrchr(filename, '/');
     if (filenameShort == nullptr) {
         filenameShort = filename;
     }
@@ -2913,7 +2913,7 @@ - (BOOL)loadFileFromArchive
         return NO;
     }
     
-    const uint8_t *imageData = nullptr;
+    const uint8_t* imageData = nullptr;
     uint64_t imageDataLength = 0;
 
     // search for main file - can be albedo or normal
@@ -2921,7 +2921,7 @@ - (BOOL)loadFileFromArchive
         return NO;
     }
 
-    const uint8_t *imageNormalData = nullptr;
+    const uint8_t* imageNormalData = nullptr;
     uint64_t imageNormalDataLength = 0;
     
     string normalFilename;
@@ -2964,7 +2964,7 @@ - (BOOL)loadFileFromArchive
         }
     }
 
-    Renderer *renderer = (Renderer *)self.delegate;
+    Renderer* renderer = (Renderer *)self.delegate;
     [renderer releaseAllPendingTextures];
     
     if (![renderer loadTextureFromImage:fullFilename.c_str()
@@ -2978,7 +2978,7 @@ - (BOOL)loadFileFromArchive
     //---------------------------------
 
     // set title to filename, chop this to just file+ext, not directory
-    const char *filenameShort = strrchr(filename, '/');
+    const char* filenameShort = strrchr(filename, '/');
     if (filenameShort == nullptr) {
         filenameShort = filename;
     }
@@ -3021,7 +3021,7 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
     _hudHidden = false;
     [self updateHudVisibility];
     
-    const char *filename = url.fileSystemRepresentation;
+    const char* filename = url.fileSystemRepresentation;
     if (filename == nullptr) {
         // Fixed by converting dropped urls into paths then back to a url.
         // When file replaced the drop url is no longer valid.
@@ -3029,7 +3029,7 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
         return NO;
     }
     
-    Renderer *renderer = (Renderer *)self.delegate;
+    Renderer* renderer = (Renderer *)self.delegate;
     
     // folders can have a . in them f.e. 2.0/blah/...
     bool isDirectory = url.hasDirectoryPath;
@@ -3039,7 +3039,7 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
         // make list of all file in the directory
 
         if (!self.imageURL || (!([self.imageURL isEqualTo:url]))) {
-            NSDirectoryEnumerator *directoryEnumerator =
+            NSDirectoryEnumerator* directoryEnumerator =
                 [[NSFileManager defaultManager]
                                enumeratorAtURL:url
                     includingPropertiesForKeys:[NSArray array]
@@ -3056,8 +3056,8 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
             vector<string> files;
 #if USE_GLTF
             // only display models in folder if found, ignore the png/jpg files
-            while (NSURL *fileOrDirectoryURL = [directoryEnumerator nextObject]) {
-                const char *name = fileOrDirectoryURL.fileSystemRepresentation;
+            while (NSURL* fileOrDirectoryURL = [directoryEnumerator nextObject]) {
+                const char* name = fileOrDirectoryURL.fileSystemRepresentation;
 
                 bool isGLTF = endsWithExtension(name, ".gltf");
                 bool isGLB = endsWithExtension(name, ".glb");
@@ -3078,7 +3078,7 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
                         includingPropertiesForKeys:[NSArray array]
                                            options:0
                                       errorHandler:  // nil
-                                          ^BOOL(NSURL *urlArg, NSError *error) {
+                                          ^BOOL(NSURL* urlArg, NSError* error) {
                                               macroUnusedVar(urlArg);
                                               macroUnusedVar(error);
 
@@ -3086,8 +3086,8 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
                                               return NO;
                                           }];
 #endif
-                while (NSURL *fileOrDirectoryURL = [directoryEnumerator nextObject]) {
-                    const char *name = fileOrDirectoryURL.fileSystemRepresentation;
+                while (NSURL* fileOrDirectoryURL = [directoryEnumerator nextObject]) {
+                    const char* name = fileOrDirectoryURL.fileSystemRepresentation;
 
                     if (isSupportedFilename(name))
                     {
@@ -3101,7 +3101,7 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
             }
 
             // add it to recent docs
-            NSDocumentController *dc =
+            NSDocumentController* dc =
                 [NSDocumentController sharedDocumentController];
             [dc noteNewRecentDocumentURL:url];
 
@@ -3140,7 +3140,7 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
             
             [_tableViewController.items removeAllObjects];
             for (const auto& file: files) {
-                const char *filenameShort = toFilenameShort(file.c_str());
+                const char* filenameShort = toFilenameShort(file.c_str());
                 [_tableViewController.items addObject: [NSString stringWithUTF8String: filenameShort]];
             }
             [_tableView reloadData];
@@ -3187,11 +3187,11 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
     
     if (endsWithExtension(filename, ".metallib")) {
         if ([renderer hotloadShaders:filename]) {
-            NSURL *metallibFileURL =
+            NSURL* metallibFileURL =
                 [NSURL fileURLWithPath:[NSString stringWithUTF8String:filename]];
 
             // add to recent docs, so can reload quickly
-            NSDocumentController *dc =
+            NSDocumentController* dc =
                 [NSDocumentController sharedDocumentController];
             [dc noteNewRecentDocumentURL:metallibFileURL];
 
@@ -3259,13 +3259,13 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
             self.lastArchiveTimestamp = archiveTimestamp;
 
             // add it to recent docs
-            NSDocumentController *dc =
+            NSDocumentController* dc =
                 [NSDocumentController sharedDocumentController];
             [dc noteNewRecentDocumentURL:url];
 
             // now reload the filename if needed
             if (!existingFilename.empty()) {
-                const ZipEntry *formerEntry = _zip.zipEntry(existingFilename.c_str());
+                const ZipEntry* formerEntry = _zip.zipEntry(existingFilename.c_str());
                 if (formerEntry) {
                     // lookup the index in the remapIndices table
                     _fileArchiveIndex =
@@ -3287,8 +3287,8 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
             getErrorLogCaptureText(errorText);
             setErrorLogCapture(false);
 
-            const auto &entry = _zip.zipEntrys()[_fileArchiveIndex];
-            const char *archiveFilename = entry.filename;
+            const auto& entry = _zip.zipEntrys()[_fileArchiveIndex];
+            const char* archiveFilename = entry.filename;
 
             // prepend filename
             string finalErrorText;
@@ -3373,7 +3373,7 @@ -(BOOL)loadModelFile:(NSURL*)url filename:(const char*)filename
     if (url != nil)
     {
         // add to recent docs, so can reload quickly
-        NSDocumentController *dc =
+        NSDocumentController* dc =
             [NSDocumentController sharedDocumentController];
         [dc noteNewRecentDocumentURL:gltfFileURL];
 
@@ -3406,7 +3406,7 @@ -(BOOL)loadModelFile:(NSURL*)url filename:(const char*)filename
 
 -(BOOL)loadImageFile:(NSURL*)url
 {
-    Renderer *renderer = (Renderer *)self.delegate;
+    Renderer* renderer = (Renderer *)self.delegate;
     setErrorLogCapture(true);
 
     // set title to filename, chop this to just file+ext, not directory
@@ -3444,7 +3444,7 @@ -(BOOL)loadImageFile:(NSURL*)url
     // list
 
     // add to recent document menu
-    NSDocumentController *dc = [NSDocumentController sharedDocumentController];
+    NSDocumentController* dc = [NSDocumentController sharedDocumentController];
     [dc noteNewRecentDocumentURL:url];
 
     self.imageURL = url;
@@ -3549,11 +3549,11 @@ @interface GameViewController : NSViewController
 @end
 
 @implementation GameViewController {
-    MyMTKView *_view;
+    MyMTKView* _view;
 
-    Renderer *_renderer;
+    Renderer* _renderer;
 
-    NSTrackingArea *_trackingArea;
+    NSTrackingArea* _trackingArea;
 }
 
 - (void)viewWillDisappear
diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index b9b4de1e..1979129d 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -1 +1,274 @@
 #include "TaskSystem.h"
+
+// TODO: bury in system/cpp file
+#if KRAM_MAC || KRAM_IOS
+    #include <mach/thread_act.h>
+    #include <mach/thread_policy.h>
+#elif KRAM_WIN
+    #include <windows.h>
+#else
+    #include <pthread/pthread.h>
+#endif
+
+namespace kram {
+using namespace NAMESPACE_STL;
+
+void task_system::set_affinity(std::thread& thread, uint32_t threadIndex)
+{
+    // https://eli.thegreenplace.net/2016/c11-threads-affinity-and-hyperthreading/
+    // TODO: set affinity, but need to create a thread that doesn't launch
+    // so can set this up, and then run it.
+    
+    auto handle = thread.native_handle();
+    uint64_t affinityMask = ((uint64_t)1) << threadIndex; // for now only allow single thread mask
+            
+#if KRAM_MAC || KRAM_IOS
+    thread_affinity_policy_data_t policy = { (int)affinityMask };
+
+    // TODO: check return
+    thread_policy_set(pthread_mach_thread_np(handle), THREAD_AFFINITY_POLICY, (thread_policy_t)&policy, 1);
+
+#elif KRAM_WIN
+    // each processor group only has 64 bits
+    SetThreadAffinityMask(handle, &affinityMask);
+#else
+    // most systems are pthread-based, this is represented with array of bits
+    cpu_set_t cpuset;
+    CPU_ZERO(&cpuset);
+    CPU_SET(threadIndex, &cpuset);
+
+    // TODO: check return
+    /*int rc = */ pthread_setaffinity_np(handle, sizeof(cpu_set_t), &cpuset);
+#endif
+}
+
+void task_system::run(int32_t threadIndex)
+{
+    while (true) {
+        // pop() wait avoids a spinloop.
+
+        function<void()> f;
+
+        // start with ours, but steal from other queues if nothing found
+        // Note that if threadIndex queue is empty and stays empty
+        // then pop() below will stop using that thread.  But async_ is round-robining
+        // all work across the available queues.
+        int32_t multiple = 4;  // 32;
+        int32_t numTries = 0;
+        for (int32_t n = 0, nEnd = _count * multiple; n < nEnd; ++n) {
+            numTries++;
+
+            // break for loop if work found
+            if (_q[(threadIndex + n) % _count].try_pop(f)) {
+                break;
+            }
+        }
+
+        // numTries is 64 when queues are empty, and typically 1 when queues are full
+        //KLOGD("task_system", "thread %d searched %d tries", threadIndex, numTries);
+
+        // if no task, and nothing to steal, pop own queue if possible
+        // pop blocks until it's queue receives tasks
+        if (!f && !_q[threadIndex].pop(f)) {
+            // shutdown if tasks have all been submitted and queue marked as done.
+            if (_q[threadIndex].is_done()) {
+                KLOGD("task_system", "thread %d shutting down", threadIndex);
+
+                break;
+            }
+            else {
+                KLOGD("task_system", "no work found for %d in %d tries", threadIndex, numTries);
+
+                // keep searching
+                continue;
+            }
+        }
+
+        // do the work
+        f();
+    }
+}
+
+
+// TODO: don't want hyperthreads from hardware_concurrency
+task_system::task_system(int32_t count) :
+    _count(std::min(count, (int32_t)std::thread::hardware_concurrency())),
+    _q{(size_t)_count},
+    _index(0)
+{
+    // start up the threads
+    for (int32_t threadIndex = 0; threadIndex != _count; ++threadIndex) {
+        _threads.emplace_back([&, threadIndex] { run(threadIndex); });
+        set_affinity(_threads.back(), threadIndex);
+    }
+    
+}
+
+task_system::~task_system()
+{
+    // indicate that all tasks are submitted
+    for (auto& e : _q)
+        e.set_done();
+
+    // wait until threads are all done, but joining each thread
+    for (auto& e : _threads)
+        e.join();
+}
+
+}
+
+
+/**************************************************************************************************/
+
+// this autogens max threads even if none are used
+//task_system _system;
+
+/**************************************************************************************************/
+
+// There's already a std::future, so may want to look at that
+//  otherwise, this all implements async call which would be useful
+//
+//template <typename>
+//struct result_of_;
+//
+//template <typename R, typename... Args>
+//struct result_of_<R(Args...)> { using type = R; };
+//
+//template <typename F>
+//using result_of_t_ = typename result_of_<F>::type;
+//
+///**************************************************************************************************/
+//
+//template <typename R>
+//struct shared_base {
+//    vector<R> _r; // optional
+//    mutex _mutex;
+//    condition_variable _ready;
+//    vector<function<void()>> _then;
+//
+//    virtual ~shared_base() { }
+//
+//    void set(R&& r) {
+//        vector<function<void()>> then;
+//        {
+//            lock_t lock{_mutex};
+//            _r.push_back(move(r));
+//            swap(_then, then);
+//        }
+//        _ready.notify_all();
+//        for (const auto& f : then) _system.async_(move(f));
+//    }
+//
+//    template <typename F>
+//    void then(F&& f) {
+//        bool resolved{false};
+//        {
+//            lock_t lock{_mutex};
+//            if (_r.empty()) _then.push_back(forward<F>(f));
+//            else resolved = true;
+//        }
+//        if (resolved) _system.async_(move(f));
+//    }
+//
+//    const R& get() {
+//        lock_t lock{_mutex};
+//        while (_r.empty()) _ready.wait(lock);
+//        return _r.back();
+//    }
+//};
+//
+//template <typename> struct shared; // not defined
+//
+//template <typename R, typename... Args>
+//struct shared<R(Args...)> : shared_base<R> {
+//    function<R(Args...)> _f;
+//
+//    template<typename F>
+//    shared(F&& f) : _f(forward<F>(f)) { }
+//
+//    template <typename... A>
+//    void operator()(A&&... args) {
+//        this->set(_f(forward<A>(args)...));
+//        _f = nullptr;
+//    }
+//};
+//
+//template <typename> class packaged_task; //not defined
+//template <typename> class future;
+//
+//template <typename S, typename F>
+//auto package(F&& f) -> pair<packaged_task<S>, future<result_of_t_<S>>>;
+//
+//template <typename R>
+//class future {
+//    shared_ptr<shared_base<R>> _p;
+//
+//    template <typename S, typename F>
+//    friend auto package(F&& f) -> pair<packaged_task<S>, future<result_of_t_<S>>>;
+//
+//    explicit future(shared_ptr<shared_base<R>> p) : _p(move(p)) { }
+// public:
+//    future() = default;
+//
+//    template <typename F>
+//    auto then(F&& f) {
+//        auto pack = package<result_of_t<F(R)>()>([p = _p, f = forward<F>(f)](){
+//            return f(p->_r.back());
+//        });
+//        _p->then(move(pack.first));
+//        return pack.second;
+//    }
+//
+//    const R& get() const { return _p->get(); }
+//};
+//
+//template<typename R, typename ...Args >
+//class packaged_task<R (Args...)> {
+//    weak_ptr<shared<R(Args...)>> _p;
+//
+//    template <typename S, typename F>
+//    friend auto package(F&& f) -> pair<packaged_task<S>, future<result_of_t_<S>>>;
+//
+//    explicit packaged_task(weak_ptr<shared<R(Args...)>> p) : _p(move(p)) { }
+//
+// public:
+//    packaged_task() = default;
+//
+//    template <typename... A>
+//    void operator()(A&&... args) const {
+//        auto p = _p.lock();
+//        if (p) (*p)(forward<A>(args)...);
+//    }
+//};
+//
+//template <typename S, typename F>
+//auto package(F&& f) -> pair<packaged_task<S>, future<result_of_t_<S>>> {
+//    auto p = make_shared<shared<S>>(forward<F>(f));
+//    return make_pair(packaged_task<S>(p), future<result_of_t_<S>>(p));
+//}
+//
+///**************************************************************************************************/
+//
+//template <typename F, typename ...Args>
+//auto async(F&& f, Args&&... args)
+//{
+//    using result_type = result_of_t<F (Args...)>;
+//    using packaged_type = packaged_task<result_type()>;
+//
+//    auto pack = package<result_type()>(bind(forward<F>(f), forward<Args>(args)...));
+//
+//    _system.async_(move(get<0>(pack)));
+//    return get<1>(pack);
+//}
+
+/**************************************************************************************************/
+
+//int32_t main() {
+//    future<cpp_int> x = async([]{ return fibonacci<cpp_int>(100); });
+//
+//    future<cpp_int> y = x.then([](const cpp_int& x){ return cpp_int(x * 2); });
+//    future<cpp_int> z = x.then([](const cpp_int& x){ return cpp_int(x / 15); });
+//
+//    cout << y.get() << endl;
+//    cout << z.get() << endl;
+//}
diff --git a/libkram/kram/TaskSystem.h b/libkram/kram/TaskSystem.h
index 3805841b..b1b35262 100644
--- a/libkram/kram/TaskSystem.h
+++ b/libkram/kram/TaskSystem.h
@@ -17,6 +17,8 @@
 #include <thread>
 //#include <vector>
 
+
+
 /**************************************************************************************************/
 
 namespace kram {
@@ -130,75 +132,15 @@ class task_system {
     vector<notification_queue> _q;
     std::atomic<int32_t> _index;
 
-    void run(int32_t threadIndex)
-    {
-        while (true) {
-            // pop() wait avoids a spinloop.
-
-            function<void()> f;
-
-            // start with ours, but steal from other queues if nothing found
-            // Note that if threadIndex queue is empty and stays empty
-            // then pop() below will stop using that thread.  But async_ is round-robining
-            // all work across the available queues.
-            int32_t multiple = 4;  // 32;
-            int32_t numTries = 0;
-            for (int32_t n = 0, nEnd = _count * multiple; n < nEnd; ++n) {
-                numTries++;
-
-                // break for loop if work found
-                if (_q[(threadIndex + n) % _count].try_pop(f)) {
-                    break;
-                }
-            }
-
-            // numTries is 64 when queues are empty, and typically 1 when queues are full
-            //KLOGD("task_system", "thread %d searched %d tries", threadIndex, numTries);
-
-            // if no task, and nothing to steal, pop own queue if possible
-            // pop blocks until it's queue receives tasks
-            if (!f && !_q[threadIndex].pop(f)) {
-                // shutdown if tasks have all been submitted and queue marked as done.
-                if (_q[threadIndex].is_done()) {
-                    KLOGD("task_system", "thread %d shutting down", threadIndex);
-
-                    break;
-                }
-                else {
-                    KLOGD("task_system", "no work found for %d in %d tries", threadIndex, numTries);
-
-                    // keep searching
-                    continue;
-                }
-            }
-
-            // do the work
-            f();
-        }
-    }
+    void run(int32_t threadIndex);
 
+    void set_affinity(std::thread& thread, uint32_t threadIndex);
+    
 public:
-    task_system(int32_t count = 1) : _count(std::min(count, (int32_t)std::thread::hardware_concurrency())), _q{(size_t)_count}, _index(0)
-    {
-        // start up the threads
-        for (int32_t threadIndex = 0; threadIndex != _count; ++threadIndex) {
-            _threads.emplace_back([&, threadIndex] { run(threadIndex); });
-        }
-    }
-
-    ~task_system()
-    {
-        // indicate that all tasks are submitted
-        for (auto& e : _q) e.set_done();
+    task_system(int32_t count = 1);
+    ~task_system();
 
-        // wait until threads are all done, but joining each thread
-        for (auto& e : _threads) e.join();
-    }
-
-    int32_t num_threads() const
-    {
-        return _count;
-    }
+    int32_t num_threads() const { return _count; }
     
     template <typename F>
     void async_(F&& f)
@@ -221,159 +163,5 @@ class task_system {
     }
 };
 
-/**************************************************************************************************/
-
-// this autogens max threads even if none are used
-//task_system _system;
-
-/**************************************************************************************************/
-
-// There's already a std::future, so may want to look at that
-//  otherwise, this all implements async call which would be useful
-//
-//template <typename>
-//struct result_of_;
-//
-//template <typename R, typename... Args>
-//struct result_of_<R(Args...)> { using type = R; };
-//
-//template <typename F>
-//using result_of_t_ = typename result_of_<F>::type;
-//
-///**************************************************************************************************/
-//
-//template <typename R>
-//struct shared_base {
-//    vector<R> _r; // optional
-//    mutex _mutex;
-//    condition_variable _ready;
-//    vector<function<void()>> _then;
-//
-//    virtual ~shared_base() { }
-//
-//    void set(R&& r) {
-//        vector<function<void()>> then;
-//        {
-//            lock_t lock{_mutex};
-//            _r.push_back(move(r));
-//            swap(_then, then);
-//        }
-//        _ready.notify_all();
-//        for (const auto& f : then) _system.async_(move(f));
-//    }
-//
-//    template <typename F>
-//    void then(F&& f) {
-//        bool resolved{false};
-//        {
-//            lock_t lock{_mutex};
-//            if (_r.empty()) _then.push_back(forward<F>(f));
-//            else resolved = true;
-//        }
-//        if (resolved) _system.async_(move(f));
-//    }
-//
-//    const R& get() {
-//        lock_t lock{_mutex};
-//        while (_r.empty()) _ready.wait(lock);
-//        return _r.back();
-//    }
-//};
-//
-//template <typename> struct shared; // not defined
-//
-//template <typename R, typename... Args>
-//struct shared<R(Args...)> : shared_base<R> {
-//    function<R(Args...)> _f;
-//
-//    template<typename F>
-//    shared(F&& f) : _f(forward<F>(f)) { }
-//
-//    template <typename... A>
-//    void operator()(A&&... args) {
-//        this->set(_f(forward<A>(args)...));
-//        _f = nullptr;
-//    }
-//};
-//
-//template <typename> class packaged_task; //not defined
-//template <typename> class future;
-//
-//template <typename S, typename F>
-//auto package(F&& f) -> pair<packaged_task<S>, future<result_of_t_<S>>>;
-//
-//template <typename R>
-//class future {
-//    shared_ptr<shared_base<R>> _p;
-//
-//    template <typename S, typename F>
-//    friend auto package(F&& f) -> pair<packaged_task<S>, future<result_of_t_<S>>>;
-//
-//    explicit future(shared_ptr<shared_base<R>> p) : _p(move(p)) { }
-// public:
-//    future() = default;
-//
-//    template <typename F>
-//    auto then(F&& f) {
-//        auto pack = package<result_of_t<F(R)>()>([p = _p, f = forward<F>(f)](){
-//            return f(p->_r.back());
-//        });
-//        _p->then(move(pack.first));
-//        return pack.second;
-//    }
-//
-//    const R& get() const { return _p->get(); }
-//};
-//
-//template<typename R, typename ...Args >
-//class packaged_task<R (Args...)> {
-//    weak_ptr<shared<R(Args...)>> _p;
-//
-//    template <typename S, typename F>
-//    friend auto package(F&& f) -> pair<packaged_task<S>, future<result_of_t_<S>>>;
-//
-//    explicit packaged_task(weak_ptr<shared<R(Args...)>> p) : _p(move(p)) { }
-//
-// public:
-//    packaged_task() = default;
-//
-//    template <typename... A>
-//    void operator()(A&&... args) const {
-//        auto p = _p.lock();
-//        if (p) (*p)(forward<A>(args)...);
-//    }
-//};
-//
-//template <typename S, typename F>
-//auto package(F&& f) -> pair<packaged_task<S>, future<result_of_t_<S>>> {
-//    auto p = make_shared<shared<S>>(forward<F>(f));
-//    return make_pair(packaged_task<S>(p), future<result_of_t_<S>>(p));
-//}
-//
-///**************************************************************************************************/
-//
-//template <typename F, typename ...Args>
-//auto async(F&& f, Args&&... args)
-//{
-//    using result_type = result_of_t<F (Args...)>;
-//    using packaged_type = packaged_task<result_type()>;
-//
-//    auto pack = package<result_type()>(bind(forward<F>(f), forward<Args>(args)...));
-//
-//    _system.async_(move(get<0>(pack)));
-//    return get<1>(pack);
-//}
-
-/**************************************************************************************************/
-
-//int32_t main() {
-//    future<cpp_int> x = async([]{ return fibonacci<cpp_int>(100); });
-//
-//    future<cpp_int> y = x.then([](const cpp_int& x){ return cpp_int(x * 2); });
-//    future<cpp_int> z = x.then([](const cpp_int& x){ return cpp_int(x / 15); });
-//
-//    cout << y.get() << endl;
-//    cout << z.get() << endl;
-//}
 
 }  // namespace kram

From 71aa6a25641f16376aa2a137e824f9b96136cced Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 27 Apr 2022 23:40:51 -0700
Subject: [PATCH 291/901] kram - add dds read support for dx9 style files

These have to infer the format from fourCC data and other flags.  Only read, writes occur to dx10 style.  This should help viewing legacy files, and also generating thumbnails for them.  Only a subset of the D3DFMT are supported.
---
 libkram/kram/KramDDSHelper.cpp | 291 +++++++++++++++++++++++++++++----
 1 file changed, 257 insertions(+), 34 deletions(-)

diff --git a/libkram/kram/KramDDSHelper.cpp b/libkram/kram/KramDDSHelper.cpp
index 651e4a21..595b3390 100644
--- a/libkram/kram/KramDDSHelper.cpp
+++ b/libkram/kram/KramDDSHelper.cpp
@@ -16,6 +16,104 @@ const uint32_t DDS_MAGIC = 0x20534444; // "DDS "
     ((uint32_t)(uint8_t)(str[0]) | ((uint32_t)(uint8_t)(str[1]) << 8) |       \
     ((uint32_t)(uint8_t)(str[2]) << 16) | ((uint32_t)(uint8_t)(str[3]) << 24 ))
 
+// DX9 era formats, only for reading dds files in dx9 style
+enum D3DFORMAT : uint32_t
+{
+    D3DFMT_UNKNOWN              =  0,
+
+//    D3DFMT_R8G8B8               = 20,
+    D3DFMT_A8R8G8B8             = 21,
+//    D3DFMT_X8R8G8B8             = 22,
+//    D3DFMT_R5G6B5               = 23,
+//    D3DFMT_X1R5G5B5             = 24,
+//    D3DFMT_A1R5G5B5             = 25,
+//    D3DFMT_A4R4G4B4             = 26,
+//    D3DFMT_R3G3B2               = 27,
+//    D3DFMT_A8                   = 28,
+//    D3DFMT_A8R3G3B2             = 29,
+//    D3DFMT_X4R4G4B4             = 30,
+//    D3DFMT_A2B10G10R10          = 31,
+    D3DFMT_A8B8G8R8             = 32,
+//    D3DFMT_X8B8G8R8             = 33,
+//    D3DFMT_G16R16               = 34,
+//    D3DFMT_A2R10G10B10          = 35,
+//    D3DFMT_A16B16G16R16         = 36,
+
+//    D3DFMT_A8P8                 = 40,
+//    D3DFMT_P8                   = 41,
+
+//    D3DFMT_L8                   = 50,
+//    D3DFMT_A8L8                 = 51,
+//    D3DFMT_A4L4                 = 52,
+
+//    D3DFMT_V8U8                 = 60,
+//    D3DFMT_L6V5U5               = 61,
+//    D3DFMT_X8L8V8U8             = 62,
+//    D3DFMT_Q8W8V8U8             = 63,
+//    D3DFMT_V16U16               = 64,
+//    D3DFMT_A2W10V10U10          = 67,
+
+//    D3DFMT_UYVY                 = MAKEFOURCC("UYVY"),
+//    D3DFMT_R8G8_B8G8            = MAKEFOURCC("RGBG"),
+//    D3DFMT_YUY2                 = MAKEFOURCC("YUY2"),
+//    D3DFMT_G8R8_G8B8            = MAKEFOURCC("GRGB"),
+    
+    D3DFMT_DXT1                 = MAKEFOURCC("DXT1"),
+    D3DFMT_DXT2                 = MAKEFOURCC("DXT2"),
+    D3DFMT_DXT3                 = MAKEFOURCC("DXT3"),
+    D3DFMT_DXT4                 = MAKEFOURCC("DXT4"),
+    D3DFMT_DXT5                 = MAKEFOURCC("DXT5"),
+
+    // Not worth support dx9-style files, these don't even hold srgb state
+    D3DFMT_ATI1 = MAKEFOURCC("ATI1"),
+    D3DFMT_BC4U = MAKEFOURCC("BC4U"),
+    D3DFMT_BC4S = MAKEFOURCC("BC4S"),
+    
+    D3DFMT_ATI2 = MAKEFOURCC("ATI2"),
+    D3DFMT_BC5U = MAKEFOURCC("BC5U"),
+    D3DFMT_BC5S = MAKEFOURCC("BC5S"),
+    
+//    D3DFMT_D16_LOCKABLE         = 70,
+//    D3DFMT_D32                  = 71,
+//    D3DFMT_D15S1                = 73,
+//    D3DFMT_D24S8                = 75,
+//    D3DFMT_D24X8                = 77,
+//    D3DFMT_D24X4S4              = 79,
+//    D3DFMT_D16                  = 80,
+//
+//    D3DFMT_D32F_LOCKABLE        = 82,
+//    D3DFMT_D24FS8               = 83,
+
+    //D3DFMT_D32_LOCKABLE         = 84,
+    //D3DFMT_S8_LOCKABLE          = 85,
+
+//    D3DFMT_L16                  = 81,
+//
+//    D3DFMT_VERTEXDATA           =100,
+//    D3DFMT_INDEX16              =101,
+//    D3DFMT_INDEX32              =102,
+
+    //D3DFMT_Q16W16V16U16         =110,
+
+    //D3DFMT_MULTI2_ARGB8         = MAKEFOURCC("MET1"),
+
+    D3DFMT_R16F                 = 111,
+    D3DFMT_G16R16F              = 112,
+    D3DFMT_A16B16G16R16F        = 113,
+
+    D3DFMT_R32F                 = 114,
+    D3DFMT_G32R32F              = 115,
+    D3DFMT_A32B32G32R32F        = 116,
+
+//    D3DFMT_CxV8U8               = 117,
+
+    //D3DFMT_A1                   = 118,
+    //D3DFMT_A2B10G10R10_XR_BIAS  = 119,
+    //D3DFMT_BINARYBUFFER         = 199,
+
+    D3DFMT_FORCE_DWORD          =0x7fffffff
+};
+
 enum DDS_FLAGS : uint32_t
 {
     
@@ -34,10 +132,10 @@ enum DDS_FLAGS : uint32_t
     DDSPF_ALPHAPIXELS = 0x00000001,
     DDSPF_FOURCC =      0x00000004,
     DDSPF_RGB =         0x00000040,
-    //DDSPF_LUMINANCE =   0x00020000,
-    //DDSPF_ALPHA =       0x00000002,
+    DDSPF_LUMINANCE =   0x00020000, // dx9
+    DDSPF_ALPHA =       0x00000002, // dx9
     //DDSPF_BUMPDUDV =    0x00080000,
-
+    
     // caps
     DDSCAPS_TEXTURE = 0x00001000,
     DDSCAPS_MIPMAP  = 0x00400000,
@@ -63,10 +161,6 @@ enum DDS_FLAGS : uint32_t
     DDS_ALPHA_MODE_PREMULTIPLIED = 2,
     DDS_ALPHA_MODE_OPAQUE = 3,
     DDS_ALPHA_MODE_CUSTOM = 4,
-    
-    // Not worth support dx9-style files, these don't even hold srgb state
-    //FOURCC_BC1 = MAKEFOURCC("DXT1"),
-    //FOURCC_BC3 = MAKEFOURCC("DXT5"),
 };
 
 struct DDS_PIXELFORMAT
@@ -108,10 +202,115 @@ struct DDS_HEADER_DXT10
     uint32_t        miscFlags2;
 };
 
+// DX9 bitmask parsing adapted from GetPixelFormat() call here https://github.com/microsoft/DirectXTex/blob/main/DDSTextureLoader/DDSTextureLoader12.cpp
+static MyMTLPixelFormat getMetalFormatFromDDS9(const DDS_PIXELFORMAT& ddpf)
+{
+    // Copyright (c) Microsoft Corporation.
+    // Licensed under the MIT License.
+    #define ISBITMASK( r,g,b,a ) ( ddpf.RBitMask == r && ddpf.GBitMask == g && ddpf.BBitMask == b && ddpf.ABitMask == a )
+
+    if (ddpf.flags & DDSPF_RGB)
+    {
+        // Note that sRGB formats are written using the "DX10" extended header
+        // here would need to force the format to an srgb format from cli
+        switch (ddpf.RGBBitCount)
+        {
+        case 32:
+            if (ISBITMASK(0x000000ff, 0x0000ff00, 0x00ff0000, 0xff000000))
+            {
+                return MyMTLPixelFormatRGBA8Unorm;
+            }
+
+            if (ISBITMASK(0xffffffff, 0, 0, 0))
+            {
+                // Only 32-bit color channel format in D3D9 was R32F
+                return MyMTLPixelFormatR32Float; // D3DX writes this out as a FourCC of 114
+            }
+            break;
+
+        case 8:
+            // NVTT versions 1.x wrote this as RGB instead of LUMINANCE
+            if (ISBITMASK(0xff, 0, 0, 0))
+            {
+                return MyMTLPixelFormatR8Unorm;
+            }
+
+            // No 3:3:2 or paletted DXGI formats aka D3DFMT_R3G3B2, D3DFMT_P8
+            break;
+        }
+    }
+    else if (ddpf.flags & DDSPF_LUMINANCE)
+    {
+        // TODO: need rrrg swizzle on these
+        switch (ddpf.RGBBitCount)
+        {
+        case 16:
+            if (ISBITMASK(0x00ff, 0, 0, 0xff00))
+            {
+                return MyMTLPixelFormatRG8Unorm; // D3DX10/11 writes this out as DX10 extension
+            }
+            break;
+
+        case 8:
+            if (ISBITMASK(0xff, 0, 0, 0))
+            {
+                return MyMTLPixelFormatR8Unorm; // D3DX10/11 writes this out as DX10 extension
+            }
+
+            // No DXGI format maps to ISBITMASK(0x0f,0,0,0xf0) aka D3DFMT_A4L4
+
+            if (ISBITMASK(0x00ff, 0, 0, 0xff00))
+            {
+                return MyMTLPixelFormatRG8Unorm; // Some DDS writers assume the bitcount should be 8 instead of 16
+            }
+            break;
+        }
+    }
+    else if (ddpf.flags & DDSPF_ALPHA)
+    {
+        if (8 == ddpf.RGBBitCount)
+        {
+            // TODO: need rrrr swizzle
+            return MyMTLPixelFormatR8Unorm; // really A8, but use a swizzle
+        }
+    }
+    else if (ddpf.flags & DDSPF_FOURCC)
+    {
+        switch (ddpf.fourCC)
+        {
+            case D3DFMT_DXT1: return MyMTLPixelFormatBC1_RGBA;
+            //case D3DFMT_DXT2: return MyMTLPixelFormatBC2_RGBA; // isPremul
+            //case D3DFMT_DXT3: return MyMTLPixelFormatBC2_RGBA;
+            case D3DFMT_DXT4: return MyMTLPixelFormatBC3_RGBA; // isPremul
+            case D3DFMT_DXT5: return MyMTLPixelFormatBC3_RGBA;
+                
+            case D3DFMT_ATI1: return MyMTLPixelFormatBC4_RUnorm;
+            case D3DFMT_BC4U: return MyMTLPixelFormatBC4_RUnorm;
+            case D3DFMT_BC4S: return MyMTLPixelFormatBC4_RSnorm;
+            
+            case D3DFMT_ATI2: return MyMTLPixelFormatBC5_RGUnorm;
+            case D3DFMT_BC5U: return MyMTLPixelFormatBC5_RGUnorm;
+            case D3DFMT_BC5S: return MyMTLPixelFormatBC5_RGSnorm;
+            
+            case D3DFMT_R16F: return MyMTLPixelFormatR16Float;
+            case D3DFMT_G16R16F: return MyMTLPixelFormatRG16Float;
+            case D3DFMT_A16B16G16R16F: return MyMTLPixelFormatRGBA16Float;
+                
+            case D3DFMT_R32F: return MyMTLPixelFormatR32Float;
+            case D3DFMT_G32R32F: return MyMTLPixelFormatRG32Float;
+            case D3DFMT_A32B32G32R32F: return MyMTLPixelFormatRGBA32Float;
+        }
+        
+    }
+
+    return MyMTLPixelFormatInvalid;
+    #undef ISBITMASK
+}
+
 bool DDSHelper::load(const uint8_t* data, size_t dataSize, KTXImage& image, bool isInfoOnly)
 {
     const uint32_t magicSize = sizeof(uint32_t);
-    uint32_t mipDataOffset = magicSize + sizeof(DDS_HEADER) + sizeof(DDS_HEADER_DXT10);
+    uint32_t mipDataOffset = magicSize + sizeof(DDS_HEADER);
     
     if (dataSize <= mipDataOffset) {
         KLOGE("kram", "bad dataSize too small %zu <= %d", dataSize, mipDataOffset);
@@ -120,7 +319,6 @@ bool DDSHelper::load(const uint8_t* data, size_t dataSize, KTXImage& image, bool
     
     const uint32_t& magic = *(const uint32_t*)data;
     const DDS_HEADER& hdr = *(const DDS_HEADER*)(data + magicSize);
-    const DDS_HEADER_DXT10& hdr10 = *(const DDS_HEADER_DXT10*)(data + magicSize + sizeof(DDS_HEADER));
     const DDS_PIXELFORMAT& format = hdr.ddspf;
     
     if (magic != DDS_MAGIC) {
@@ -128,7 +326,6 @@ bool DDSHelper::load(const uint8_t* data, size_t dataSize, KTXImage& image, bool
         return false;
     }
 
-    // only load DX10 formatted DDS for now
     if (hdr.size != sizeof(DDS_HEADER)) {
         KLOGE("kram", "bad header size %d", hdr.size);
         return false;
@@ -143,15 +340,22 @@ bool DDSHelper::load(const uint8_t* data, size_t dataSize, KTXImage& image, bool
         KLOGE("kram", "missing format.fourCC flag");
         return false;
     }
-    if (format.fourCC != FOURCC_DX10) {
-        KLOGE("kram", "format.fourCC 0x%08X must be DX10", format.fourCC);
-        return false;
+    
+    bool isDDS10 = format.fourCC == FOURCC_DX10;
+    const DDS_HEADER_DXT10& hdr10 = *(const DDS_HEADER_DXT10*)(data + magicSize + sizeof(DDS_HEADER));
+    
+    MyMTLPixelFormat pixelFormat = MyMTLPixelFormatInvalid;
+    if (isDDS10) {
+        mipDataOffset += sizeof(DDS_HEADER_DXT10);
+        pixelFormat = directxToMetalFormat(hdr10.dxgiFormat);
+    }
+    else {
+        pixelFormat = getMetalFormatFromDDS9(format);
     }
     
     // Kram only supports a subset of DDS formats
-    auto pixelFormat = directxToMetalFormat(hdr10.dxgiFormat);
     if (pixelFormat == MyMTLPixelFormatInvalid) {
-        KLOGE("kram", "bad format.dxgiFormat %d", hdr10.dxgiFormat);
+        KLOGE("kram", "unsupported dds format");
         return false;
     }
     
@@ -161,7 +365,11 @@ bool DDSHelper::load(const uint8_t* data, size_t dataSize, KTXImage& image, bool
     uint32_t depth = (hdr.flags & DDSD_DEPTH) ? hdr.depth : 1;
     
     uint32_t mipCount = (hdr.flags & DDSD_MIPMAPCOUNT) ? hdr.mipMapCount : 1;
-    uint32_t arrayCount = hdr10.arraySize;
+    uint32_t arrayCount = 1;
+    
+    if (isDDS10) {
+        arrayCount = hdr10.arraySize;
+    }
     
     // make sure that counts are reasonable
     const uint32_t kMaxMipCount = 16;
@@ -202,30 +410,45 @@ bool DDSHelper::load(const uint8_t* data, size_t dataSize, KTXImage& image, bool
     if (arrayCount == 0)
         arrayCount = 1;
     
-    bool isCube = (hdr10.miscFlag & DDS_RESOURCE_MISC_TEXTURECUBE);
+    bool isCube = false;
     bool isArray = arrayCount > 1;
+    bool isPremul = false;
     
-    switch(hdr10.resourceDimension) {
-        case DDS_DIMENSION_TEXTURE1D:
-            image.textureType = MyMTLTextureType1DArray;
-            isArray = true; // kram doesn't support 1d
-            break;
-        case DDS_DIMENSION_TEXTURE2D:
-            if (isCube) {
-                image.textureType = isArray ? MyMTLTextureTypeCubeArray : MyMTLTextureTypeCube;
-            }
-            else {
-                image.textureType = isArray ? MyMTLTextureType2DArray : MyMTLTextureType2D;
-            }
-            break;
-        case DDS_DIMENSION_TEXTURE3D:
+    if (isDDS10) {
+        isCube = (hdr10.miscFlag & DDS_RESOURCE_MISC_TEXTURECUBE);
+        
+        switch(hdr10.resourceDimension) {
+            case DDS_DIMENSION_TEXTURE1D:
+                image.textureType = MyMTLTextureType1DArray;
+                isArray = true; // kram doesn't support 1d
+                break;
+            case DDS_DIMENSION_TEXTURE2D:
+                if (isCube) {
+                    image.textureType = isArray ? MyMTLTextureTypeCubeArray : MyMTLTextureTypeCube;
+                }
+                else {
+                    image.textureType = isArray ? MyMTLTextureType2DArray : MyMTLTextureType2D;
+                }
+                break;
+            case DDS_DIMENSION_TEXTURE3D:
+                image.textureType = MyMTLTextureType3D;
+                isArray = false;
+                break;
+        }
+        isPremul = (hdr10.miscFlags2 & DDS_ALPHA_MODE_PREMULTIPLIED) != 0;
+    }
+    else {
+        isArray = false;
+        
+        if (hdr.flags & DDSD_DEPTH) {
             image.textureType = MyMTLTextureType3D;
-            isArray = false;
-            break;
+        }
+        else if (hdr.caps2 & DDSCAPS2_CUBEMAP) {
+            image.textureType = MyMTLTextureTypeCube;
+        }
     }
     
     // transfer premul setting, would like to not depend on "info" to carry this
-    bool isPremul = (hdr10.miscFlags2 & DDS_ALPHA_MODE_PREMULTIPLIED) != 0;
     if (isPremul)
         image.addChannelProps("Alb.ra,Alb.ga,Alb.ba,Alb.a");
     

From f37f50b37368a0cd90361745a1eeedf5d91e25d6 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 28 Apr 2022 00:03:05 -0700
Subject: [PATCH 292/901] kram - fix typo on task system, and mark warning on
 miniz to fix later.

---
 libkram/kram/TaskSystem.cpp | 2 +-
 libkram/miniz/miniz.cpp     | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index 1979129d..da3a8446 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -30,7 +30,7 @@ void task_system::set_affinity(std::thread& thread, uint32_t threadIndex)
 
 #elif KRAM_WIN
     // each processor group only has 64 bits
-    SetThreadAffinityMask(handle, &affinityMask);
+    SetThreadAffinityMask(handle, (DWORD_PTR)&affinityMask);
 #else
     // most systems are pthread-based, this is represented with array of bits
     cpu_set_t cpuset;
diff --git a/libkram/miniz/miniz.cpp b/libkram/miniz/miniz.cpp
index 431c442f..a62263fc 100644
--- a/libkram/miniz/miniz.cpp
+++ b/libkram/miniz/miniz.cpp
@@ -2418,6 +2418,7 @@ tinfl_status tinfl_decompress(tinfl_decompressor *r, const mz_uint8 *pIn_buf_nex
         TINFL_GET_BYTE(2, r->m_zhdr1);
         counter = (((r->m_zhdr0 * 256 + r->m_zhdr1) % 31 != 0) || (r->m_zhdr1 & 32) || ((r->m_zhdr0 & 15) != 8));
         if (!(decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF))
+            // TODO: fix warning C4334: '<<': result of 32-bit shift implicitly converted to 64 bits (was 64-bit shift intended?)
             counter |= (((1U << (8U + (r->m_zhdr0 >> 4))) > 32768U) || ((out_buf_size_mask + 1) < (size_t)(1U << (8U + (r->m_zhdr0 >> 4)))));
         if (counter)
         {

From bfa890bf76f74d3560a114463676eb699d226592 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 30 Apr 2022 10:17:31 -0700
Subject: [PATCH 293/901] kram - update to astc-encoder 3.7

This should be faster.  Make sure to re-encode, since older encoder had some block encoding artifacts.   Those were already fixed by the 3.5 update, but I had some stored textures with the artifacts.
---
 libkram/astc-encoder/astcenc.h                |  67 +-
 .../astcenc_averages_and_directions.cpp       | 632 ++++++++++++++----
 libkram/astc-encoder/astcenc_block_sizes.cpp  | 318 +++++----
 .../astc-encoder/astcenc_color_quantize.cpp   | 257 +++----
 .../astc-encoder/astcenc_color_unquantize.cpp |  85 ++-
 .../astcenc_compress_symbolic.cpp             | 139 ++--
 .../astc-encoder/astcenc_compute_variance.cpp |  29 +-
 .../astcenc_decompress_symbolic.cpp           |  21 +-
 libkram/astc-encoder/astcenc_entry.cpp        |  50 +-
 .../astcenc_find_best_partitioning.cpp        | 106 ++-
 .../astcenc_ideal_endpoints_and_weights.cpp   | 188 ++++--
 libkram/astc-encoder/astcenc_image.cpp        |  84 ++-
 libkram/astc-encoder/astcenc_internal.h       | 123 ++--
 libkram/astc-encoder/astcenc_mathlib.h        |  43 +-
 .../astcenc_mathlib_softfloat.cpp             |   2 +-
 .../astc-encoder/astcenc_partition_tables.cpp |  83 ++-
 .../astcenc_percentile_tables.cpp             |   2 +-
 .../astcenc_pick_best_endpoint_format.cpp     | 144 ++--
 libkram/astc-encoder/astcenc_quantization.cpp | 166 +----
 .../astcenc_symbolic_physical.cpp             |   7 +-
 libkram/astc-encoder/astcenc_vecmathlib.h     |  14 +-
 .../astc-encoder/astcenc_vecmathlib_avx2_8.h  | 118 ++--
 .../astcenc_vecmathlib_common_4.h             |  73 +-
 .../astc-encoder/astcenc_vecmathlib_neon_4.h  |  30 +-
 .../astc-encoder/astcenc_vecmathlib_none_4.h  |  13 +-
 .../astc-encoder/astcenc_vecmathlib_sse_4.h   | 102 ++-
 libkram/astc-encoder/astcenc_weight_align.cpp | 120 ++--
 27 files changed, 1834 insertions(+), 1182 deletions(-)

diff --git a/libkram/astc-encoder/astcenc.h b/libkram/astc-encoder/astcenc.h
index f98fa7c6..a5e2b646 100644
--- a/libkram/astc-encoder/astcenc.h
+++ b/libkram/astc-encoder/astcenc.h
@@ -142,15 +142,18 @@
  *
  * A normal context is capable of decompressing any ASTC texture, including those generated by other
  * compressors with unknown heuristics. This is the most flexible implementation, but forces the
- * main data tables used by the codec to include entries that are not needed during compressor. This
- * can slow down compression by ~15%. To optimize this use case the context can be created with the
- * ASTCENC_FLG_SELF_DECOMPRESS_ONLY flag. This tells the compressor that it will only be asked to
- * decompress images that it compressed, allowing the size of the context structures to be
- * substantially reduced with a corresponding boost in performance.
- *
- * Attempting to decompress an valid image which was created by another compressor, or even another
- * astcenc compressor configuration, may result in blocks returning as solid magenta or NaN values
- * if they use unsupported encodings for that configuration.
+ * data tables used by the codec to include entries that are not needed during compression. This
+ * can slow down context creation by a significant amount, especially for the faster compression
+ * modes where few data table entries are actually used. To optimize this use case the context can
+ * be created with the ASTCENC_FLG_SELF_DECOMPRESS_ONLY flag. This tells the compressor that it will
+ * only be asked to decompress images that it compressed itself, allowing the data tables to
+ * exclude entries that are not needed by the current compression configuration. This reduces the
+ * size of the context data tables in memory and improves context creation performance. Note that,
+ * as of the 3.6 release, this flag no longer affects compression performance.
+ *
+ * Using this flag while attempting to decompress an valid image which was created by another
+ * compressor, or even another astcenc compressor version or configuration, may result in blocks
+ * returning as solid magenta or NaN value error blocks.
  */
 
 #ifndef ASTCENC_INCLUDED
@@ -308,29 +311,6 @@ static const unsigned int ASTCENC_FLG_MAP_NORMAL          = 1 << 0;
  */
 static const unsigned int ASTCENC_FLG_MAP_MASK             = 1 << 1;
 
-/**
- * @brief Enable RGBM map compression.
- *
- * Input data will be treated as HDR data that has been stored in an LDR RGBM-encoded wrapper
- * format. Data must be preprocessed by the user to be in LDR RGBM format before calling the
- * compression function, this flag is only used to control the use of RGBM-specific heuristics and
- * error metrics.
- *
- * IMPORTANT: The ASTC format is prone to bad failure modes with unconstrained RGBM data; very small
- * M values can round to zero due to quantization and result in black or white pixels. It is highly
- * recommended that the minimum value of M used in the encoding is kept above a lower threshold (try
- * 16 or 32). Applying this threshold reduces the number of very dark colors that can be
- * represented, but is still higher precision than 8-bit LDR.
- *
- * When this flag is set the value of @c rgbm_m_scale in the context must be set to the RGBM scale
- * factor used during reconstruction. This defaults to 5 when in RGBM mode.
- *
- * It is recommended that the value of @c cw_a_weight is set to twice the value of the multiplier
- * scale, ensuring that the M value is accurately encoded. This defaults to 10 when in RGBM mode,
- * matching the default scale factor.
- */
-static const unsigned int ASTCENC_FLG_MAP_RGBM             = 1 << 6;
-
 /**
  * @brief Enable alpha weighting.
  *
@@ -366,6 +346,29 @@ static const unsigned int ASTCENC_FLG_DECOMPRESS_ONLY      = 1 << 4;
  */
 static const unsigned int ASTCENC_FLG_SELF_DECOMPRESS_ONLY = 1 << 5;
 
+/**
+ * @brief Enable RGBM map compression.
+ *
+ * Input data will be treated as HDR data that has been stored in an LDR RGBM-encoded wrapper
+ * format. Data must be preprocessed by the user to be in LDR RGBM format before calling the
+ * compression function, this flag is only used to control the use of RGBM-specific heuristics and
+ * error metrics.
+ *
+ * IMPORTANT: The ASTC format is prone to bad failure modes with unconstrained RGBM data; very small
+ * M values can round to zero due to quantization and result in black or white pixels. It is highly
+ * recommended that the minimum value of M used in the encoding is kept above a lower threshold (try
+ * 16 or 32). Applying this threshold reduces the number of very dark colors that can be
+ * represented, but is still higher precision than 8-bit LDR.
+ *
+ * When this flag is set the value of @c rgbm_m_scale in the context must be set to the RGBM scale
+ * factor used during reconstruction. This defaults to 5 when in RGBM mode.
+ *
+ * It is recommended that the value of @c cw_a_weight is set to twice the value of the multiplier
+ * scale, ensuring that the M value is accurately encoded. This defaults to 10 when in RGBM mode,
+ * matching the default scale factor.
+ */
+static const unsigned int ASTCENC_FLG_MAP_RGBM             = 1 << 6;
+
 /**
  * @brief The bit mask of all valid flags.
  */
diff --git a/libkram/astc-encoder/astcenc_averages_and_directions.cpp b/libkram/astc-encoder/astcenc_averages_and_directions.cpp
index 3002928d..2ceb83db 100644
--- a/libkram/astc-encoder/astcenc_averages_and_directions.cpp
+++ b/libkram/astc-encoder/astcenc_averages_and_directions.cpp
@@ -24,6 +24,366 @@
 
 #include <cassert>
 
+/**
+ * @brief Compute the average RGB color of each partition.
+ *
+ * The algorithm here uses a vectorized sequential scan and per-partition
+ * color accumulators, using select() to mask texel lanes in other partitions.
+ *
+ * We only accumulate sums for N-1 partitions during the scan; the value for
+ * the last partition can be computed given that we know the block-wide average
+ * already.
+ *
+ * Because of this we could reduce the loop iteration count so it "just" spans
+ * the max texel index needed for the N-1 partitions, which could need fewer
+ * iterations than the full block texel count. However, this makes the loop
+ * count erratic and causes more branch mispredictions so is a net loss.
+ *
+ * @param      pi         The partitioning to use.
+ * @param      blk        The block data to process.
+ * @param[out] averages   The output averages. Unused partition indices will
+ *                        not be initialized, and lane<3> will be zero.
+ */
+static void compute_partition_averages_rgb(
+	const partition_info& pi,
+	const image_block& blk,
+	vfloat4 averages[BLOCK_MAX_PARTITIONS]
+) {
+	unsigned int partition_count = pi.partition_count;
+	unsigned int texel_count = blk.texel_count;
+	promise(texel_count > 0);
+
+	// For 1 partition just use the precomputed mean
+	if (partition_count == 1)
+	{
+		averages[0] = blk.data_mean.swz<0, 1, 2>();
+	}
+	// For 2 partitions scan results for partition 0, compute partition 1
+	else if (partition_count == 2)
+	{
+		vfloatacc pp_avg_rgb[3] {};
+
+		vint lane_id = vint::lane_id();
+		for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
+		{
+			vint texel_partition(pi.partition_of_texel + i);
+
+			vmask lane_mask = lane_id < vint(texel_count);
+			lane_id += vint(ASTCENC_SIMD_WIDTH);
+
+			vmask p0_mask = lane_mask & (texel_partition == vint(0));
+
+			vfloat data_r = loada(blk.data_r + i);
+			haccumulate(pp_avg_rgb[0], data_r, p0_mask);
+
+			vfloat data_g = loada(blk.data_g + i);
+			haccumulate(pp_avg_rgb[1], data_g, p0_mask);
+
+			vfloat data_b = loada(blk.data_b + i);
+			haccumulate(pp_avg_rgb[2], data_b, p0_mask);
+		}
+
+		vfloat4 block_total = blk.data_mean.swz<0, 1, 2>() * static_cast<float>(blk.texel_count);
+
+		vfloat4 p0_total = vfloat3(hadd_s(pp_avg_rgb[0]),
+		                           hadd_s(pp_avg_rgb[1]),
+		                           hadd_s(pp_avg_rgb[2]));
+
+		vfloat4 p1_total = block_total - p0_total;
+
+		averages[0] = p0_total / static_cast<float>(pi.partition_texel_count[0]);
+		averages[1] = p1_total / static_cast<float>(pi.partition_texel_count[1]);
+	}
+	// For 3 partitions scan results for partition 0/1, compute partition 2
+	else if (partition_count == 3)
+	{
+		vfloatacc pp_avg_rgb[2][3] {};
+
+		vint lane_id = vint::lane_id();
+		for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
+		{
+			vint texel_partition(pi.partition_of_texel + i);
+
+			vmask lane_mask = lane_id < vint(texel_count);
+			lane_id += vint(ASTCENC_SIMD_WIDTH);
+
+			vmask p0_mask = lane_mask & (texel_partition == vint(0));
+			vmask p1_mask = lane_mask & (texel_partition == vint(1));
+
+			vfloat data_r = loada(blk.data_r + i);
+			haccumulate(pp_avg_rgb[0][0], data_r, p0_mask);
+			haccumulate(pp_avg_rgb[1][0], data_r, p1_mask);
+
+			vfloat data_g = loada(blk.data_g + i);
+			haccumulate(pp_avg_rgb[0][1], data_g, p0_mask);
+			haccumulate(pp_avg_rgb[1][1], data_g, p1_mask);
+
+			vfloat data_b = loada(blk.data_b + i);
+			haccumulate(pp_avg_rgb[0][2], data_b, p0_mask);
+			haccumulate(pp_avg_rgb[1][2], data_b, p1_mask);
+		}
+
+		vfloat4 block_total = blk.data_mean.swz<0, 1, 2>() * static_cast<float>(blk.texel_count);
+
+		vfloat4 p0_total = vfloat3(hadd_s(pp_avg_rgb[0][0]),
+		                           hadd_s(pp_avg_rgb[0][1]),
+		                           hadd_s(pp_avg_rgb[0][2]));
+
+		vfloat4 p1_total = vfloat3(hadd_s(pp_avg_rgb[1][0]),
+		                           hadd_s(pp_avg_rgb[1][1]),
+		                           hadd_s(pp_avg_rgb[1][2]));
+
+		vfloat4 p2_total = block_total - p0_total - p1_total;
+
+		averages[0] = p0_total / static_cast<float>(pi.partition_texel_count[0]);
+		averages[1] = p1_total / static_cast<float>(pi.partition_texel_count[1]);
+		averages[2] = p2_total / static_cast<float>(pi.partition_texel_count[2]);
+	}
+	else
+	{
+		// For 4 partitions scan results for partition 0/1/2, compute partition 3
+		vfloatacc pp_avg_rgb[3][3] {};
+
+		vint lane_id = vint::lane_id();
+		for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
+		{
+			vint texel_partition(pi.partition_of_texel + i);
+
+			vmask lane_mask = lane_id < vint(texel_count);
+			lane_id += vint(ASTCENC_SIMD_WIDTH);
+
+			vmask p0_mask = lane_mask & (texel_partition == vint(0));
+			vmask p1_mask = lane_mask & (texel_partition == vint(1));
+			vmask p2_mask = lane_mask & (texel_partition == vint(2));
+
+			vfloat data_r = loada(blk.data_r + i);
+			haccumulate(pp_avg_rgb[0][0], data_r, p0_mask);
+			haccumulate(pp_avg_rgb[1][0], data_r, p1_mask);
+			haccumulate(pp_avg_rgb[2][0], data_r, p2_mask);
+
+			vfloat data_g = loada(blk.data_g + i);
+			haccumulate(pp_avg_rgb[0][1], data_g, p0_mask);
+			haccumulate(pp_avg_rgb[1][1], data_g, p1_mask);
+			haccumulate(pp_avg_rgb[2][1], data_g, p2_mask);
+
+			vfloat data_b = loada(blk.data_b + i);
+			haccumulate(pp_avg_rgb[0][2], data_b, p0_mask);
+			haccumulate(pp_avg_rgb[1][2], data_b, p1_mask);
+			haccumulate(pp_avg_rgb[2][2], data_b, p2_mask);
+		}
+
+		vfloat4 block_total = blk.data_mean.swz<0, 1, 2>() * static_cast<float>(blk.texel_count);
+
+		vfloat4 p0_total = vfloat3(hadd_s(pp_avg_rgb[0][0]),
+		                           hadd_s(pp_avg_rgb[0][1]),
+		                           hadd_s(pp_avg_rgb[0][2]));
+
+		vfloat4 p1_total = vfloat3(hadd_s(pp_avg_rgb[1][0]),
+		                           hadd_s(pp_avg_rgb[1][1]),
+		                           hadd_s(pp_avg_rgb[1][2]));
+
+		vfloat4 p2_total = vfloat3(hadd_s(pp_avg_rgb[2][0]),
+		                           hadd_s(pp_avg_rgb[2][1]),
+		                           hadd_s(pp_avg_rgb[2][2]));
+
+		vfloat4 p3_total = block_total - p0_total - p1_total- p2_total;
+
+		averages[0] = p0_total / static_cast<float>(pi.partition_texel_count[0]);
+		averages[1] = p1_total / static_cast<float>(pi.partition_texel_count[1]);
+		averages[2] = p2_total / static_cast<float>(pi.partition_texel_count[2]);
+		averages[3] = p3_total / static_cast<float>(pi.partition_texel_count[3]);
+	}
+}
+
+/**
+ * @brief Compute the average RGBA color of each partition.
+ *
+ * The algorithm here uses a vectorized sequential scan and per-partition
+ * color accumulators, using select() to mask texel lanes in other partitions.
+ *
+ * We only accumulate sums for N-1 partitions during the scan; the value for
+ * the last partition can be computed given that we know the block-wide average
+ * already.
+ *
+ * Because of this we could reduce the loop iteration count so it "just" spans
+ * the max texel index needed for the N-1 partitions, which could need fewer
+ * iterations than the full block texel count. However, this makes the loop
+ * count erratic and causes more branch mispredictions so is a net loss.
+ *
+ * @param      pi         The partitioning to use.
+ * @param      blk        The block data to process.
+ * @param[out] averages   The output averages. Unused partition indices will
+ *                        not be initialized.
+ */
+static void compute_partition_averages_rgba(
+	const partition_info& pi,
+	const image_block& blk,
+	vfloat4 averages[BLOCK_MAX_PARTITIONS]
+) {
+	unsigned int partition_count = pi.partition_count;
+	unsigned int texel_count = blk.texel_count;
+	promise(texel_count > 0);
+
+	// For 1 partition just use the precomputed mean
+	if (partition_count == 1)
+	{
+		averages[0] = blk.data_mean;
+	}
+	// For 2 partitions scan results for partition 0, compute partition 1
+	else if (partition_count == 2)
+	{
+		vfloat4 pp_avg_rgba[4] {};
+
+		vint lane_id = vint::lane_id();
+		for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
+		{
+			vint texel_partition(pi.partition_of_texel + i);
+
+			vmask lane_mask = lane_id < vint(texel_count);
+			lane_id += vint(ASTCENC_SIMD_WIDTH);
+
+			vmask p0_mask = lane_mask & (texel_partition == vint(0));
+
+			vfloat data_r = loada(blk.data_r + i);
+			haccumulate(pp_avg_rgba[0], data_r, p0_mask);
+
+			vfloat data_g = loada(blk.data_g + i);
+			haccumulate(pp_avg_rgba[1], data_g, p0_mask);
+
+			vfloat data_b = loada(blk.data_b + i);
+			haccumulate(pp_avg_rgba[2], data_b, p0_mask);
+
+			vfloat data_a = loada(blk.data_a + i);
+			haccumulate(pp_avg_rgba[3], data_a, p0_mask);
+		}
+
+		vfloat4 block_total = blk.data_mean * static_cast<float>(blk.texel_count);
+
+		vfloat4 p0_total = vfloat4(hadd_s(pp_avg_rgba[0]),
+		                           hadd_s(pp_avg_rgba[1]),
+		                           hadd_s(pp_avg_rgba[2]),
+		                           hadd_s(pp_avg_rgba[3]));
+
+		vfloat4 p1_total = block_total - p0_total;
+
+		averages[0] = p0_total / static_cast<float>(pi.partition_texel_count[0]);
+		averages[1] = p1_total / static_cast<float>(pi.partition_texel_count[1]);
+	}
+	// For 3 partitions scan results for partition 0/1, compute partition 2
+	else if (partition_count == 3)
+	{
+		vfloat4 pp_avg_rgba[2][4] {};
+
+		vint lane_id = vint::lane_id();
+		for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
+		{
+			vint texel_partition(pi.partition_of_texel + i);
+
+			vmask lane_mask = lane_id < vint(texel_count);
+			lane_id += vint(ASTCENC_SIMD_WIDTH);
+
+			vmask p0_mask = lane_mask & (texel_partition == vint(0));
+			vmask p1_mask = lane_mask & (texel_partition == vint(1));
+
+			vfloat data_r = loada(blk.data_r + i);
+			haccumulate(pp_avg_rgba[0][0], data_r, p0_mask);
+			haccumulate(pp_avg_rgba[1][0], data_r, p1_mask);
+
+			vfloat data_g = loada(blk.data_g + i);
+			haccumulate(pp_avg_rgba[0][1], data_g, p0_mask);
+			haccumulate(pp_avg_rgba[1][1], data_g, p1_mask);
+
+			vfloat data_b = loada(blk.data_b + i);
+			haccumulate(pp_avg_rgba[0][2], data_b, p0_mask);
+			haccumulate(pp_avg_rgba[1][2], data_b, p1_mask);
+
+			vfloat data_a = loada(blk.data_a + i);
+			haccumulate(pp_avg_rgba[0][3], data_a, p0_mask);
+			haccumulate(pp_avg_rgba[1][3], data_a, p1_mask);
+		}
+
+		vfloat4 block_total = blk.data_mean * static_cast<float>(blk.texel_count);
+
+		vfloat4 p0_total = vfloat4(hadd_s(pp_avg_rgba[0][0]),
+		                           hadd_s(pp_avg_rgba[0][1]),
+		                           hadd_s(pp_avg_rgba[0][2]),
+		                           hadd_s(pp_avg_rgba[0][3]));
+
+		vfloat4 p1_total = vfloat4(hadd_s(pp_avg_rgba[1][0]),
+		                           hadd_s(pp_avg_rgba[1][1]),
+		                           hadd_s(pp_avg_rgba[1][2]),
+		                           hadd_s(pp_avg_rgba[1][3]));
+
+		vfloat4 p2_total = block_total - p0_total - p1_total;
+
+		averages[0] = p0_total / static_cast<float>(pi.partition_texel_count[0]);
+		averages[1] = p1_total / static_cast<float>(pi.partition_texel_count[1]);
+		averages[2] = p2_total / static_cast<float>(pi.partition_texel_count[2]);
+	}
+	else
+	{
+		// For 4 partitions scan results for partition 0/1/2, compute partition 3
+		vfloat4 pp_avg_rgba[3][4] {};
+
+		vint lane_id = vint::lane_id();
+		for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
+		{
+			vint texel_partition(pi.partition_of_texel + i);
+
+			vmask lane_mask = lane_id < vint(texel_count);
+			lane_id += vint(ASTCENC_SIMD_WIDTH);
+
+			vmask p0_mask = lane_mask & (texel_partition == vint(0));
+			vmask p1_mask = lane_mask & (texel_partition == vint(1));
+			vmask p2_mask = lane_mask & (texel_partition == vint(2));
+
+			vfloat data_r = loada(blk.data_r + i);
+			haccumulate(pp_avg_rgba[0][0], data_r, p0_mask);
+			haccumulate(pp_avg_rgba[1][0], data_r, p1_mask);
+			haccumulate(pp_avg_rgba[2][0], data_r, p2_mask);
+
+			vfloat data_g = loada(blk.data_g + i);
+			haccumulate(pp_avg_rgba[0][1], data_g, p0_mask);
+			haccumulate(pp_avg_rgba[1][1], data_g, p1_mask);
+			haccumulate(pp_avg_rgba[2][1], data_g, p2_mask);
+
+			vfloat data_b = loada(blk.data_b + i);
+			haccumulate(pp_avg_rgba[0][2], data_b, p0_mask);
+			haccumulate(pp_avg_rgba[1][2], data_b, p1_mask);
+			haccumulate(pp_avg_rgba[2][2], data_b, p2_mask);
+
+			vfloat data_a = loada(blk.data_a + i);
+			haccumulate(pp_avg_rgba[0][3], data_a, p0_mask);
+			haccumulate(pp_avg_rgba[1][3], data_a, p1_mask);
+			haccumulate(pp_avg_rgba[2][3], data_a, p2_mask);
+		}
+
+		vfloat4 block_total = blk.data_mean * static_cast<float>(blk.texel_count);
+
+		vfloat4 p0_total = vfloat4(hadd_s(pp_avg_rgba[0][0]),
+		                           hadd_s(pp_avg_rgba[0][1]),
+		                           hadd_s(pp_avg_rgba[0][2]),
+		                           hadd_s(pp_avg_rgba[0][3]));
+
+		vfloat4 p1_total = vfloat4(hadd_s(pp_avg_rgba[1][0]),
+		                           hadd_s(pp_avg_rgba[1][1]),
+		                           hadd_s(pp_avg_rgba[1][2]),
+		                           hadd_s(pp_avg_rgba[1][3]));
+
+		vfloat4 p2_total = vfloat4(hadd_s(pp_avg_rgba[2][0]),
+		                           hadd_s(pp_avg_rgba[2][1]),
+		                           hadd_s(pp_avg_rgba[2][2]),
+		                           hadd_s(pp_avg_rgba[2][3]));
+
+		vfloat4 p3_total = block_total - p0_total - p1_total- p2_total;
+
+		averages[0] = p0_total / static_cast<float>(pi.partition_texel_count[0]);
+		averages[1] = p1_total / static_cast<float>(pi.partition_texel_count[1]);
+		averages[2] = p2_total / static_cast<float>(pi.partition_texel_count[2]);
+		averages[3] = p3_total / static_cast<float>(pi.partition_texel_count[3]);
+	}
+}
+
 /* See header for documentation. */
 void compute_avgs_and_dirs_4_comp(
 	const partition_info& pi,
@@ -35,22 +395,17 @@ void compute_avgs_and_dirs_4_comp(
 	int partition_count = pi.partition_count;
 	promise(partition_count > 0);
 
+	// Pre-compute partition_averages
+	vfloat4 partition_averages[BLOCK_MAX_PARTITIONS];
+	compute_partition_averages_rgba(pi, blk, partition_averages);
+
 	for (int partition = 0; partition < partition_count; partition++)
 	{
 		const uint8_t *texel_indexes = pi.texels_of_partition[partition];
 		unsigned int texel_count = pi.partition_texel_count[partition];
 		promise(texel_count > 0);
 
-		// TODO: Try gathers?
-		vfloat4 base_sum = vfloat4::zero();
-
-		for (unsigned int i = 0; i < texel_count; i++)
-		{
-			int iwt = texel_indexes[i];
-			base_sum += blk.texel(iwt);
-		}
-
-		vfloat4 average = base_sum / static_cast<float>(texel_count);
+		vfloat4 average = partition_averages[partition];
 		pm[partition].avg = average;
 
 		vfloat4 sum_xp = vfloat4::zero();
@@ -62,47 +417,46 @@ void compute_avgs_and_dirs_4_comp(
 		{
 			unsigned int iwt = texel_indexes[i];
 			vfloat4 texel_datum = blk.texel(iwt);
-			texel_datum = (texel_datum - average) * texel_weight;
+			texel_datum = texel_datum - average;
 
 			vfloat4 zero = vfloat4::zero();
 
-			vmask4 tdm0 = vfloat4(texel_datum.lane<0>()) > zero;
+			vmask4 tdm0 = texel_datum.swz<0,0,0,0>() > zero;
 			sum_xp += select(zero, texel_datum, tdm0);
 
-			vmask4 tdm1 = vfloat4(texel_datum.lane<1>()) > zero;
+			vmask4 tdm1 = texel_datum.swz<1,1,1,1>() > zero;
 			sum_yp += select(zero, texel_datum, tdm1);
 
-			vmask4 tdm2 = vfloat4(texel_datum.lane<2>()) > zero;
+			vmask4 tdm2 = texel_datum.swz<2,2,2,2>() > zero;
 			sum_zp += select(zero, texel_datum, tdm2);
 
-			vmask4 tdm3 = vfloat4(texel_datum.lane<3>()) > zero;
+			vmask4 tdm3 = texel_datum.swz<3,3,3,3>() > zero;
 			sum_wp += select(zero, texel_datum, tdm3);
 		}
 
-		float prod_xp = dot_s(sum_xp, sum_xp);
-		float prod_yp = dot_s(sum_yp, sum_yp);
-		float prod_zp = dot_s(sum_zp, sum_zp);
-		float prod_wp = dot_s(sum_wp, sum_wp);
+		sum_xp = sum_xp * texel_weight;
+		sum_yp = sum_yp * texel_weight;
+		sum_zp = sum_zp * texel_weight;
+		sum_wp = sum_wp * texel_weight;
+
+		vfloat4 prod_xp = dot(sum_xp, sum_xp);
+		vfloat4 prod_yp = dot(sum_yp, sum_yp);
+		vfloat4 prod_zp = dot(sum_zp, sum_zp);
+		vfloat4 prod_wp = dot(sum_wp, sum_wp);
 
 		vfloat4 best_vector = sum_xp;
-		float best_sum = prod_xp;
+		vfloat4 best_sum = prod_xp;
 
-		if (prod_yp > best_sum)
-		{
-			best_vector = sum_yp;
-			best_sum = prod_yp;
-		}
+		vmask4 mask = prod_yp > best_sum;
+		best_vector = select(best_vector, sum_yp, mask);
+		best_sum = select(best_sum, prod_yp, mask);
 
-		if (prod_zp > best_sum)
-		{
-			best_vector = sum_zp;
-			best_sum = prod_zp;
-		}
+		mask = prod_zp > best_sum;
+		best_vector = select(best_vector, sum_zp, mask);
+		best_sum = select(best_sum, prod_zp, mask);
 
-		if (prod_wp > best_sum)
-		{
-			best_vector = sum_wp;
-		}
+		mask = prod_wp > best_sum;
+		best_vector = select(best_vector, sum_wp, mask);
 
 		pm[partition].dir = best_vector;
 	}
@@ -115,15 +469,25 @@ void compute_avgs_and_dirs_3_comp(
 	unsigned int omitted_component,
 	partition_metrics pm[BLOCK_MAX_PARTITIONS]
 ) {
-	float texel_weight = hadd_s(blk.channel_weight.swz<0, 1, 2>()) / 3.0f;
+	// Pre-compute partition_averages
+	vfloat4 partition_averages[BLOCK_MAX_PARTITIONS];
+	compute_partition_averages_rgba(pi, blk, partition_averages);
+
+	float texel_weight = hadd_s(blk.channel_weight.swz<0, 1, 2>());
 
 	const float* data_vr = blk.data_r;
 	const float* data_vg = blk.data_g;
 	const float* data_vb = blk.data_b;
 
+	// TODO: Data-driven permute would be useful to avoid this ...
 	if (omitted_component == 0)
 	{
-		texel_weight = hadd_s(blk.channel_weight.swz<1, 2, 3>()) / 3.0f;
+		texel_weight = hadd_s(blk.channel_weight.swz<1, 2, 3>());
+
+		partition_averages[0] = partition_averages[0].swz<1, 2, 3>();
+		partition_averages[1] = partition_averages[1].swz<1, 2, 3>();
+		partition_averages[2] = partition_averages[2].swz<1, 2, 3>();
+		partition_averages[3] = partition_averages[3].swz<1, 2, 3>();
 
 		data_vr = blk.data_g;
 		data_vg = blk.data_b;
@@ -131,17 +495,36 @@ void compute_avgs_and_dirs_3_comp(
 	}
 	else if (omitted_component == 1)
 	{
-		texel_weight = hadd_s(blk.channel_weight.swz<0, 2, 3>()) / 3.0f;
+		texel_weight = hadd_s(blk.channel_weight.swz<0, 2, 3>());
+
+		partition_averages[0] = partition_averages[0].swz<0, 2, 3>();
+		partition_averages[1] = partition_averages[1].swz<0, 2, 3>();
+		partition_averages[2] = partition_averages[2].swz<0, 2, 3>();
+		partition_averages[3] = partition_averages[3].swz<0, 2, 3>();
 
 		data_vg = blk.data_b;
 		data_vb = blk.data_a;
 	}
 	else if (omitted_component == 2)
 	{
-		texel_weight = hadd_s(blk.channel_weight.swz<0, 1, 3>()) / 3.0f;
+		texel_weight = hadd_s(blk.channel_weight.swz<0, 1, 3>());
+
+		partition_averages[0] = partition_averages[0].swz<0, 1, 3>();
+		partition_averages[1] = partition_averages[1].swz<0, 1, 3>();
+		partition_averages[2] = partition_averages[2].swz<0, 1, 3>();
+		partition_averages[3] = partition_averages[3].swz<0, 1, 3>();
 
 		data_vb = blk.data_a;
 	}
+	else
+	{
+		partition_averages[0] = partition_averages[0].swz<0, 1, 2>();
+		partition_averages[1] = partition_averages[1].swz<0, 1, 2>();
+		partition_averages[2] = partition_averages[2].swz<0, 1, 2>();
+		partition_averages[3] = partition_averages[3].swz<0, 1, 2>();
+	}
+
+ 	texel_weight = texel_weight * (1.0f / 3.0f);
 
 	unsigned int partition_count = pi.partition_count;
 	promise(partition_count > 0);
@@ -152,14 +535,7 @@ void compute_avgs_and_dirs_3_comp(
 		unsigned int texel_count = pi.partition_texel_count[partition];
 		promise(texel_count > 0);
 
-		vfloat4 base_sum = vfloat4::zero();
-		for (unsigned int i = 0; i < texel_count; i++)
-		{
-			unsigned int iwt = texel_indexes[i];
-			base_sum += vfloat3(data_vr[iwt], data_vg[iwt], data_vb[iwt]);
-		}
-
-		vfloat4 average = base_sum / static_cast<float>(texel_count);
+		vfloat4 average = partition_averages[partition];
 		pm[partition].avg = average;
 
 		vfloat4 sum_xp = vfloat4::zero();
@@ -173,38 +549,37 @@ void compute_avgs_and_dirs_3_comp(
 			vfloat4 texel_datum = vfloat3(data_vr[iwt],
 			                              data_vg[iwt],
 			                              data_vb[iwt]);
-
-			texel_datum = (texel_datum - average) * texel_weight;
+			texel_datum = texel_datum - average;
 
 			vfloat4 zero = vfloat4::zero();
 
-			vmask4 tdm0 = vfloat4(texel_datum.lane<0>()) > zero;
+			vmask4 tdm0 = texel_datum.swz<0,0,0,0>() > zero;
 			sum_xp += select(zero, texel_datum, tdm0);
 
-			vmask4 tdm1 = vfloat4(texel_datum.lane<1>()) > zero;
+			vmask4 tdm1 = texel_datum.swz<1,1,1,1>() > zero;
 			sum_yp += select(zero, texel_datum, tdm1);
 
-			vmask4 tdm2 = vfloat4(texel_datum.lane<2>()) > zero;
+			vmask4 tdm2 = texel_datum.swz<2,2,2,2>() > zero;
 			sum_zp += select(zero, texel_datum, tdm2);
 		}
 
-		float prod_xp = dot3_s(sum_xp, sum_xp);
-		float prod_yp = dot3_s(sum_yp, sum_yp);
-		float prod_zp = dot3_s(sum_zp, sum_zp);
+		sum_xp = sum_xp * texel_weight;
+		sum_yp = sum_yp * texel_weight;
+		sum_zp = sum_zp * texel_weight;
+
+		vfloat4 prod_xp = dot(sum_xp, sum_xp);
+		vfloat4 prod_yp = dot(sum_yp, sum_yp);
+		vfloat4 prod_zp = dot(sum_zp, sum_zp);
 
 		vfloat4 best_vector = sum_xp;
-		float best_sum = prod_xp;
+		vfloat4 best_sum = prod_xp;
 
-		if (prod_yp > best_sum)
-		{
-			best_vector = sum_yp;
-			best_sum = prod_yp;
-		}
+		vmask4 mask = prod_yp > best_sum;
+		best_vector = select(best_vector, sum_yp, mask);
+		best_sum = select(best_sum, prod_yp, mask);
 
-		if (prod_zp > best_sum)
-		{
-			best_vector = sum_zp;
-		}
+		mask = prod_zp > best_sum;
+		best_vector = select(best_vector, sum_zp, mask);
 
 		pm[partition].dir = best_vector;
 	}
@@ -216,25 +591,22 @@ void compute_avgs_and_dirs_3_comp_rgb(
 	const image_block& blk,
 	partition_metrics pm[BLOCK_MAX_PARTITIONS]
 ) {
-	float texel_weight = hadd_s(blk.channel_weight.swz<0, 1, 2>()) / 3;
+	float texel_weight = hadd_s(blk.channel_weight.swz<0, 1, 2>()) * (1.0f / 3.0f);
 
 	unsigned int partition_count = pi.partition_count;
 	promise(partition_count > 0);
 
+	// Pre-compute partition_averages
+	vfloat4 partition_averages[BLOCK_MAX_PARTITIONS];
+	compute_partition_averages_rgb(pi, blk, partition_averages);
+
 	for (unsigned int partition = 0; partition < partition_count; partition++)
 	{
 		const uint8_t *texel_indexes = pi.texels_of_partition[partition];
 		unsigned int texel_count = pi.partition_texel_count[partition];
 		promise(texel_count > 0);
 
-		vfloat4 base_sum = vfloat4::zero();
-		for (unsigned int i = 0; i < texel_count; i++)
-		{
-			unsigned int iwt = texel_indexes[i];
-			base_sum += blk.texel3(iwt);
-		}
-
-		vfloat4 average = base_sum / static_cast<float>(texel_count);
+		vfloat4 average = partition_averages[partition];
 		pm[partition].avg = average;
 
 		vfloat4 sum_xp = vfloat4::zero();
@@ -246,38 +618,37 @@ void compute_avgs_and_dirs_3_comp_rgb(
 			unsigned int iwt = texel_indexes[i];
 
 			vfloat4 texel_datum = blk.texel3(iwt);
-
-			texel_datum = (texel_datum - average) * texel_weight;
+			texel_datum = texel_datum - average;
 
 			vfloat4 zero = vfloat4::zero();
 
-			vmask4 tdm0 = vfloat4(texel_datum.lane<0>()) > zero;
+			vmask4 tdm0 = texel_datum.swz<0,0,0,0>() > zero;
 			sum_xp += select(zero, texel_datum, tdm0);
 
-			vmask4 tdm1 = vfloat4(texel_datum.lane<1>()) > zero;
+			vmask4 tdm1 = texel_datum.swz<1,1,1,1>() > zero;
 			sum_yp += select(zero, texel_datum, tdm1);
 
-			vmask4 tdm2 = vfloat4(texel_datum.lane<2>()) > zero;
+			vmask4 tdm2 = texel_datum.swz<2,2,2,2>() > zero;
 			sum_zp += select(zero, texel_datum, tdm2);
 		}
 
-		float prod_xp = dot3_s(sum_xp, sum_xp);
-		float prod_yp = dot3_s(sum_yp, sum_yp);
-		float prod_zp = dot3_s(sum_zp, sum_zp);
+		sum_xp = sum_xp * texel_weight;
+		sum_yp = sum_yp * texel_weight;
+		sum_zp = sum_zp * texel_weight;
+
+		vfloat4 prod_xp = dot(sum_xp, sum_xp);
+		vfloat4 prod_yp = dot(sum_yp, sum_yp);
+		vfloat4 prod_zp = dot(sum_zp, sum_zp);
 
 		vfloat4 best_vector = sum_xp;
-		float best_sum = prod_xp;
+		vfloat4 best_sum = prod_xp;
 
-		if (prod_yp > best_sum)
-		{
-			best_vector = sum_yp;
-			best_sum = prod_yp;
-		}
+		vmask4 mask = prod_yp > best_sum;
+		best_vector = select(best_vector, sum_yp, mask);
+		best_sum = select(best_sum, prod_yp, mask);
 
-		if (prod_zp > best_sum)
-		{
-			best_vector = sum_zp;
-		}
+		mask = prod_zp > best_sum;
+		best_vector = select(best_vector, sum_zp, mask);
 
 		pm[partition].dir = best_vector;
 	}
@@ -292,6 +663,7 @@ void compute_avgs_and_dirs_2_comp(
 	partition_metrics pm[BLOCK_MAX_PARTITIONS]
 ) {
 	float texel_weight;
+	vfloat4 average;
 
 	const float* data_vr = nullptr;
 	const float* data_vg = nullptr;
@@ -299,6 +671,7 @@ void compute_avgs_and_dirs_2_comp(
 	if (component1 == 0 && component2 == 1)
 	{
 		texel_weight = hadd_s(blk.channel_weight.swz<0, 1>()) / 2.0f;
+		average = blk.data_mean.swz<0, 1>();
 
 		data_vr = blk.data_r;
 		data_vg = blk.data_g;
@@ -306,6 +679,7 @@ void compute_avgs_and_dirs_2_comp(
 	else if (component1 == 0 && component2 == 2)
 	{
 		texel_weight = hadd_s(blk.channel_weight.swz<0, 2>()) / 2.0f;
+		average = blk.data_mean.swz<0, 2>();
 
 		data_vr = blk.data_r;
 		data_vg = blk.data_b;
@@ -315,6 +689,7 @@ void compute_avgs_and_dirs_2_comp(
 		assert(component1 == 1 && component2 == 2);
 
 		texel_weight = hadd_s(blk.channel_weight.swz<1, 2>()) / 2.0f;
+		average = blk.data_mean.swz<1, 2>();
 
 		data_vr = blk.data_g;
 		data_vg = blk.data_b;
@@ -329,14 +704,19 @@ void compute_avgs_and_dirs_2_comp(
 		unsigned int texel_count = pt.partition_texel_count[partition];
 		promise(texel_count > 0);
 
-		vfloat4 base_sum = vfloat4::zero();
-		for (unsigned int i = 0; i < texel_count; i++)
+		// Only compute a partition mean if more than one partition
+		if (partition_count > 1)
 		{
-			unsigned int iwt = texel_indexes[i];
-			base_sum += vfloat2(data_vr[iwt], data_vg[iwt]);
+			average = vfloat4::zero();
+			for (unsigned int i = 0; i < texel_count; i++)
+			{
+				unsigned int iwt = texel_indexes[i];
+				average += vfloat2(data_vr[iwt], data_vg[iwt]);
+			}
+
+			average = average * (1.0f / static_cast<float>(texel_count));
 		}
 
-		vfloat4 average = base_sum / static_cast<float>(texel_count);
 		pm[partition].avg = average;
 
 		vfloat4 sum_xp = vfloat4::zero();
@@ -346,27 +726,28 @@ void compute_avgs_and_dirs_2_comp(
 		{
 			unsigned int iwt = texel_indexes[i];
 			vfloat4 texel_datum = vfloat2(data_vr[iwt], data_vg[iwt]);
-			texel_datum = (texel_datum - average) * texel_weight;
+			texel_datum = texel_datum - average;
 
 			vfloat4 zero = vfloat4::zero();
 
-			vmask4 tdm0 = vfloat4(texel_datum.lane<0>()) > zero;
+			vmask4 tdm0 = texel_datum.swz<0,0,0,0>() > zero;
 			sum_xp += select(zero, texel_datum, tdm0);
 
-			vmask4 tdm1 = vfloat4(texel_datum.lane<1>()) > zero;
+			vmask4 tdm1 = texel_datum.swz<1,1,1,1>() > zero;
 			sum_yp += select(zero, texel_datum, tdm1);
 		}
 
-		float prod_xp = dot_s(sum_xp, sum_xp);
-		float prod_yp = dot_s(sum_yp, sum_yp);
+		sum_xp = sum_xp * texel_weight;
+		sum_yp = sum_yp * texel_weight;
+
+		vfloat4 prod_xp = dot(sum_xp, sum_xp);
+		vfloat4 prod_yp = dot(sum_yp, sum_yp);
 
 		vfloat4 best_vector = sum_xp;
-		float best_sum = prod_xp;
+		vfloat4 best_sum = prod_xp;
 
-		if (prod_yp > best_sum)
-		{
-			best_vector = sum_yp;
-		}
+		vmask4 mask = prod_yp > best_sum;
+		best_vector = select(best_vector, sum_yp, mask);
 
 		pm[partition].dir = best_vector;
 	}
@@ -386,8 +767,8 @@ void compute_error_squared_rgba(
 	unsigned int partition_count = pi.partition_count;
 	promise(partition_count > 0);
 
-	uncor_error = 0.0f;
-	samec_error = 0.0f;
+	vfloatacc uncor_errorsumv = vfloatacc::zero();
+	vfloatacc samec_errorsumv = vfloatacc::zero();
 
 	for (unsigned int partition = 0; partition < partition_count; partition++)
 	{
@@ -425,11 +806,9 @@ void compute_error_squared_rgba(
 
 		vfloat uncor_loparamv(1e10f);
 		vfloat uncor_hiparamv(-1e10f);
-		vfloat4 uncor_errorsumv = vfloat4::zero();
 
 		vfloat samec_loparamv(1e10f);
 		vfloat samec_hiparamv(-1e10f);
-		vfloat4 samec_errorsumv = vfloat4::zero();
 
 		vfloat ew_r(blk.channel_weight.lane<0>());
 		vfloat ew_g(blk.channel_weight.lane<1>());
@@ -472,8 +851,7 @@ void compute_error_squared_rgba(
 			                 + (ew_b * uncor_dist2 * uncor_dist2)
 			                 + (ew_a * uncor_dist3 * uncor_dist3);
 
-			uncor_err = select(vfloat::zero(), uncor_err, mask);
-			haccumulate(uncor_errorsumv, uncor_err);
+			haccumulate(uncor_errorsumv, uncor_err, mask);
 
 			// Process samechroma data
 			vfloat samec_param = (data_r * l_samec_bs0)
@@ -494,8 +872,7 @@ void compute_error_squared_rgba(
 			                 + (ew_b * samec_dist2 * samec_dist2)
 			                 + (ew_a * samec_dist3 * samec_dist3);
 
-			samec_err = select(vfloat::zero(), samec_err, mask);
-			haccumulate(samec_errorsumv, samec_err);
+			haccumulate(samec_errorsumv, samec_err, mask);
 
 			lane_ids += vint(ASTCENC_SIMD_WIDTH);
 		}
@@ -506,10 +883,6 @@ void compute_error_squared_rgba(
 		samec_loparam = hmin_s(samec_loparamv);
 		samec_hiparam = hmax_s(samec_hiparamv);
 
-		// Resolve the final scalar accumulator sum
-		haccumulate(uncor_error, uncor_errorsumv);
-		haccumulate(samec_error, samec_errorsumv);
-
 		float uncor_linelen = uncor_hiparam - uncor_loparam;
 		float samec_linelen = samec_hiparam - samec_loparam;
 
@@ -517,6 +890,9 @@ void compute_error_squared_rgba(
 		uncor_lengths[partition] = astc::max(uncor_linelen, 1e-7f);
 		samec_lengths[partition] = astc::max(samec_linelen, 1e-7f);
 	}
+
+	uncor_error = hadd_s(uncor_errorsumv);
+	samec_error = hadd_s(samec_errorsumv);
 }
 
 /* See header for documentation. */
@@ -530,8 +906,8 @@ void compute_error_squared_rgb(
 	unsigned int partition_count = pi.partition_count;
 	promise(partition_count > 0);
 
-	uncor_error = 0.0f;
-	samec_error = 0.0f;
+	vfloatacc uncor_errorsumv = vfloatacc::zero();
+	vfloatacc samec_errorsumv = vfloatacc::zero();
 
 	for (unsigned int partition = 0; partition < partition_count; partition++)
 	{
@@ -570,11 +946,9 @@ void compute_error_squared_rgb(
 
 		vfloat uncor_loparamv(1e10f);
 		vfloat uncor_hiparamv(-1e10f);
-		vfloat4 uncor_errorsumv = vfloat4::zero();
 
 		vfloat samec_loparamv(1e10f);
 		vfloat samec_hiparamv(-1e10f);
-		vfloat4 samec_errorsumv = vfloat4::zero();
 
 		vfloat ew_r(blk.channel_weight.lane<0>());
 		vfloat ew_g(blk.channel_weight.lane<1>());
@@ -611,8 +985,7 @@ void compute_error_squared_rgb(
 			                 + (ew_g * uncor_dist1 * uncor_dist1)
 			                 + (ew_b * uncor_dist2 * uncor_dist2);
 
-			uncor_err = select(vfloat::zero(), uncor_err, mask);
-			haccumulate(uncor_errorsumv, uncor_err);
+			haccumulate(uncor_errorsumv, uncor_err, mask);
 
 			// Process samechroma data
 			vfloat samec_param = (data_r * l_samec_bs0)
@@ -622,7 +995,6 @@ void compute_error_squared_rgb(
 			samec_loparamv = min(samec_param, samec_loparamv);
 			samec_hiparamv = max(samec_param, samec_hiparamv);
 
-
 			vfloat samec_dist0 = samec_param * l_samec_bs0 - data_r;
 			vfloat samec_dist1 = samec_param * l_samec_bs1 - data_g;
 			vfloat samec_dist2 = samec_param * l_samec_bs2 - data_b;
@@ -631,8 +1003,7 @@ void compute_error_squared_rgb(
 			                 + (ew_g * samec_dist1 * samec_dist1)
 			                 + (ew_b * samec_dist2 * samec_dist2);
 
-			samec_err = select(vfloat::zero(), samec_err, mask);
-			haccumulate(samec_errorsumv, samec_err);
+			haccumulate(samec_errorsumv, samec_err, mask);
 
 			lane_ids += vint(ASTCENC_SIMD_WIDTH);
 		}
@@ -643,10 +1014,6 @@ void compute_error_squared_rgb(
 		samec_loparam = hmin_s(samec_loparamv);
 		samec_hiparam = hmax_s(samec_hiparamv);
 
-		// Resolve the final scalar accumulator sum
-		haccumulate(uncor_error, uncor_errorsumv);
-		haccumulate(samec_error, samec_errorsumv);
-
 		float uncor_linelen = uncor_hiparam - uncor_loparam;
 		float samec_linelen = samec_hiparam - samec_loparam;
 
@@ -654,6 +1021,9 @@ void compute_error_squared_rgb(
 		pl.uncor_line_len = astc::max(uncor_linelen, 1e-7f);
 		pl.samec_line_len = astc::max(samec_linelen, 1e-7f);
 	}
+
+	uncor_error = hadd_s(uncor_errorsumv);
+	samec_error = hadd_s(samec_errorsumv);
 }
 
 #endif
diff --git a/libkram/astc-encoder/astcenc_block_sizes.cpp b/libkram/astc-encoder/astcenc_block_sizes.cpp
index 9718cba9..e498da46 100644
--- a/libkram/astc-encoder/astcenc_block_sizes.cpp
+++ b/libkram/astc-encoder/astcenc_block_sizes.cpp
@@ -130,7 +130,7 @@ static bool decode_block_mode_2d(
 	quant_mode = (base_quant_mode - 2) + 6 * H;
 	is_dual_plane = D != 0;
 
-	weight_bits = get_ise_sequence_bitcount(weight_count, (quant_method)quant_mode);
+	weight_bits = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(quant_mode));
 	return (weight_count <= BLOCK_MAX_WEIGHTS &&
 	        weight_bits >= BLOCK_MIN_WEIGHT_BITS &&
 	        weight_bits <= BLOCK_MAX_WEIGHT_BITS);
@@ -233,7 +233,7 @@ static bool decode_block_mode_3d(
 	quant_mode = (base_quant_mode - 2) + 6 * H;
 	is_dual_plane = D != 0;
 
-	weight_bits = get_ise_sequence_bitcount(weight_count, (quant_method)quant_mode);
+	weight_bits = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(quant_mode));
 	return (weight_count <= BLOCK_MAX_WEIGHTS &&
 	        weight_bits >= BLOCK_MIN_WEIGHT_BITS &&
 	        weight_bits <= BLOCK_MAX_WEIGHT_BITS);
@@ -331,7 +331,7 @@ static void init_decimation_info_2d(
 		for (unsigned int j = 0; j < wb.weight_count_of_texel[i]; j++)
 		{
 			di.texel_weights_int_4t[j][i] = wb.weights_of_texel[i][j];
-			di.texel_weights_float_4t[j][i] = ((float)wb.weights_of_texel[i][j]) * (1.0f / WEIGHTS_TEXEL_SUM);
+			di.texel_weights_float_4t[j][i] = static_cast<float>(wb.weights_of_texel[i][j]) * (1.0f / WEIGHTS_TEXEL_SUM);
 			di.texel_weights_4t[j][i] = wb.grid_weights_of_texel[i][j];
 		}
 
@@ -349,7 +349,7 @@ static void init_decimation_info_2d(
 	for (unsigned int i = 0; i < weights_per_block; i++)
 	{
 		unsigned int texel_count_wt = wb.texel_count_of_weight[i];
-		di.weight_texel_count[i] = (uint8_t)texel_count_wt;
+		di.weight_texel_count[i] = static_cast<uint8_t>(texel_count_wt);
 
 		for (unsigned int j = 0; j < texel_count_wt; j++)
 		{
@@ -357,7 +357,7 @@ static void init_decimation_info_2d(
 
 			// Create transposed versions of these for better vectorization
 			di.weight_texel[j][i] = texel;
-			di.weights_flt[j][i] = (float)wb.texel_weights_of_weight[i][j];
+			di.weights_flt[j][i] = static_cast<float>(wb.texel_weights_of_weight[i][j]);
 
 			// perform a layer of array unrolling. An aspect of this unrolling is that
 			// one of the texel-weight indexes is an identity-mapped index; we will use this
@@ -608,7 +608,7 @@ static void init_decimation_info_3d(
 		for (unsigned int j = 0; j < wb.weight_count_of_texel[i]; j++)
 		{
 			di.texel_weights_int_4t[j][i] = wb.weights_of_texel[i][j];
-			di.texel_weights_float_4t[j][i] = ((float)wb.weights_of_texel[i][j]) * (1.0f / WEIGHTS_TEXEL_SUM);
+			di.texel_weights_float_4t[j][i] = static_cast<float>(wb.weights_of_texel[i][j]) * (1.0f / WEIGHTS_TEXEL_SUM);
 			di.texel_weights_4t[j][i] = wb.grid_weights_of_texel[i][j];
 		}
 	}
@@ -618,7 +618,7 @@ static void init_decimation_info_3d(
 	for (unsigned int i = 0; i < weights_per_block; i++)
 	{
 		unsigned int texel_count_wt = wb.texel_count_of_weight[i];
-		di.weight_texel_count[i] = (uint8_t)texel_count_wt;
+		di.weight_texel_count[i] = static_cast<uint8_t>(texel_count_wt);
 
 		for (unsigned int j = 0; j < texel_count_wt; j++)
 		{
@@ -759,31 +759,30 @@ static void assign_kmeans_texels(
  * @param y_weights   The number of weights in the Y dimension.
  * @param bsd         The block size descriptor we are populating.
  * @param wb          The decimation table init scratch working buffers.
- *
- * @return The new entry's index in the compacted decimation table array.
+ * @param index       The packed array index to populate.
  */
-static int construct_dt_entry_2d(
+static void construct_dt_entry_2d(
 	unsigned int x_texels,
 	unsigned int y_texels,
 	unsigned int x_weights,
 	unsigned int y_weights,
 	block_size_descriptor& bsd,
-	dt_init_working_buffers& wb
+	dt_init_working_buffers& wb,
+	unsigned int index
 ) {
-	unsigned int dm_index = bsd.decimation_mode_count;
 	unsigned int weight_count = x_weights * y_weights;
 	assert(weight_count <= BLOCK_MAX_WEIGHTS);
 
 	bool try_2planes = (2 * weight_count) <= BLOCK_MAX_WEIGHTS;
 
-	decimation_info& di = bsd.decimation_tables[dm_index];
+	decimation_info& di = bsd.decimation_tables[index];
 	init_decimation_info_2d(x_texels, y_texels, x_weights, y_weights, di, wb);
 
 	int maxprec_1plane = -1;
 	int maxprec_2planes = -1;
 	for (int i = 0; i < 12; i++)
 	{
-		unsigned int bits_1plane = get_ise_sequence_bitcount(weight_count, (quant_method)i);
+		unsigned int bits_1plane = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(i));
 		if (bits_1plane >= BLOCK_MIN_WEIGHT_BITS && bits_1plane <= BLOCK_MAX_WEIGHT_BITS)
 		{
 			maxprec_1plane = i;
@@ -791,7 +790,7 @@ static int construct_dt_entry_2d(
 
 		if (try_2planes)
 		{
-			unsigned int bits_2planes = get_ise_sequence_bitcount(2 * weight_count, (quant_method)i);
+			unsigned int bits_2planes = get_ise_sequence_bitcount(2 * weight_count, static_cast<quant_method>(i));
 			if (bits_2planes >= BLOCK_MIN_WEIGHT_BITS && bits_2planes <= BLOCK_MAX_WEIGHT_BITS)
 			{
 				maxprec_2planes = i;
@@ -801,14 +800,10 @@ static int construct_dt_entry_2d(
 
 	// At least one of the two should be valid ...
 	assert(maxprec_1plane >= 0 || maxprec_2planes >= 0);
-	bsd.decimation_modes[dm_index].maxprec_1plane = static_cast<int8_t>(maxprec_1plane);
-	bsd.decimation_modes[dm_index].maxprec_2planes = static_cast<int8_t>(maxprec_2planes);
-
-	// Default to not enabled - we'll populate these based on active block modes
-	bsd.decimation_modes[dm_index].percentile_hit = false;
-
-	bsd.decimation_mode_count++;
-	return dm_index;
+	bsd.decimation_modes[index].maxprec_1plane = static_cast<int8_t>(maxprec_1plane);
+	bsd.decimation_modes[index].maxprec_2planes = static_cast<int8_t>(maxprec_2planes);
+	bsd.decimation_modes[index].ref_1_plane = 0;
+	bsd.decimation_modes[index].ref_2_planes = 0;
 }
 
 /**
@@ -838,7 +833,6 @@ static void construct_block_size_descriptor_2d(
 	bsd.ydim = static_cast<uint8_t>(y_texels);
 	bsd.zdim = 1;
 	bsd.texel_count = static_cast<uint8_t>(x_texels * y_texels);
-	bsd.decimation_mode_count = 0;
 
 	for (unsigned int i = 0; i < MAX_DMI; i++)
 	{
@@ -848,7 +842,7 @@ static void construct_block_size_descriptor_2d(
 	// Gather all the decimation grids that can be used with the current block
 #if !defined(ASTCENC_DECOMPRESS_ONLY)
 	const float *percentiles = get_2d_percentile_table(x_texels, y_texels);
-	float always_threshold = 0.0f;
+	float always_cutoff = 0.0f;
 #else
 	// Unused in decompress-only builds
 	(void)can_omit_modes;
@@ -856,57 +850,89 @@ static void construct_block_size_descriptor_2d(
 #endif
 
 	// Construct the list of block formats referencing the decimation tables
-	unsigned int packed_idx = 0;
-	unsigned int always_block_mode_count = 0;
-	unsigned int always_decimation_mode_count = 0;
+	unsigned int packed_bm_idx = 0;
+	unsigned int packed_dm_idx = 0;
 
-	// Iterate twice; first time keep the "always" blocks, second time keep the "non-always" blocks.
-	// This ensures that the always block modes and decimation modes are at the start of the list.
-	for (unsigned int j = 0; j < 2; j ++)
+	// Trackers
+	unsigned int bm_counts[4] { 0 };
+	unsigned int dm_counts[4] { 0 };
+
+	// Clear the list to a known-bad value
+	for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
+	{
+		bsd.block_mode_packed_index[i] = BLOCK_BAD_BLOCK_MODE;
+	}
+
+	// Iterate four times to build a usefully ordered list:
+	//   - Pass 0 - keep selected single plane "always" block modes
+	//   - Pass 1 - keep selected single plane "non-always" block modes
+	//   - Pass 2 - keep select dual plane block modes
+	//   - Pass 3 - keep everything else that's legal
+	unsigned int limit = can_omit_modes ? 3 : 4;
+	for (unsigned int j = 0; j < limit; j ++)
 	{
 		for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
 		{
+			// Skip modes we've already included in a previous pass
+			if (bsd.block_mode_packed_index[i] != BLOCK_BAD_BLOCK_MODE)
+			{
+				continue;
+			}
+
+			// Decode parameters
 			unsigned int x_weights;
 			unsigned int y_weights;
 			bool is_dual_plane;
 			unsigned int quant_mode;
 			unsigned int weight_bits;
+			bool valid = decode_block_mode_2d(i, x_weights, y_weights, is_dual_plane, quant_mode, weight_bits);
 
-	#if !defined(ASTCENC_DECOMPRESS_ONLY)
-			float percentile = percentiles[i];
-			bool selected = (percentile <= mode_cutoff) || !can_omit_modes;
-
-			if (j == 0 && percentile > always_threshold)
+			// Always skip invalid encodings for the current block size
+			if (!valid || (x_weights > x_texels) || (y_weights > y_texels))
 			{
 				continue;
 			}
 
-			if (j == 1 && percentile <= always_threshold)
+			// Selectively skip dual plane encodings
+			if (((j <= 1) && is_dual_plane) || (j == 2 && !is_dual_plane))
 			{
 				continue;
 			}
 
-	#else
-			// Decompressor builds can never discard modes, as we cannot make any
-			// assumptions about the modes the original compressor used
-			bool selected = true;
+			// Always skip encodings we can't physically encode based on
+			// generic encoding bit availability
+			if (is_dual_plane)
+			{
+				 // This is the only check we need as only support 1 partition
+				 if ((109 - weight_bits) <= 0)
+				 {
+					continue;
+				 }
+			}
+			else
+			{
+				// This is conservative - fewer bits may be available for > 1 partition
+				 if ((111 - weight_bits) <= 0)
+				 {
+					continue;
+				 }
+			}
 
-			if (j == 1)
+			// Selectively skip encodings based on percentile
+			bool percentile_hit = false;
+	#if !defined(ASTCENC_DECOMPRESS_ONLY)
+			if (j == 0)
 			{
-				continue;
+				percentile_hit = percentiles[i] <= always_cutoff;
+			}
+			else
+			{
+				percentile_hit = percentiles[i] <= mode_cutoff;
 			}
 	#endif
 
-			// ASSUMPTION: No compressor will use more weights in a dimension than
-			// the block has actual texels, because it wastes bits. Decompression
-			// of an image which violates this assumption will fail, even though it
-			// is technically permitted by the specification.
-
-			// Skip modes that are invalid, too large, or not selected by heuristic
-			bool valid = decode_block_mode_2d(i, x_weights, y_weights, is_dual_plane, quant_mode, weight_bits);
-			if (!selected || !valid || (x_weights > x_texels) || (y_weights > y_texels))
+			if (j != 3 && !percentile_hit)
 			{
-				bsd.block_mode_packed_index[i] = BLOCK_BAD_BLOCK_MODE;
 				continue;
 			}
 
@@ -914,63 +940,62 @@ static void construct_block_size_descriptor_2d(
 			int decimation_mode = decimation_mode_index[y_weights * 16 + x_weights];
 			if (decimation_mode < 0)
 			{
-				decimation_mode = construct_dt_entry_2d(x_texels, y_texels, x_weights, y_weights, bsd, *wb);
-				decimation_mode_index[y_weights * 16 + x_weights] = decimation_mode;
+				construct_dt_entry_2d(x_texels, y_texels, x_weights, y_weights, bsd, *wb, packed_dm_idx);
+				decimation_mode_index[y_weights * 16 + x_weights] = packed_dm_idx;
+				decimation_mode = packed_dm_idx;
 
-	#if !defined(ASTCENC_DECOMPRESS_ONLY)
-				if (percentile <= always_threshold)
-				{
-					always_decimation_mode_count++;
-				}
-	#endif
+				dm_counts[j]++;
+				packed_dm_idx++;
 			}
 
-	#if !defined(ASTCENC_DECOMPRESS_ONLY)
-			// Flatten the block mode heuristic into some precomputed flags
-			if (percentile <= always_threshold)
-			{
-				always_block_mode_count++;
-				bsd.block_modes[packed_idx].percentile_hit = true;
-				bsd.decimation_modes[decimation_mode].percentile_hit = true;
-			}
-			else if (percentile <= mode_cutoff)
+			auto& bm = bsd.block_modes[packed_bm_idx];
+			auto& dm = bsd.decimation_modes[decimation_mode];
+
+			if (is_dual_plane)
 			{
-				bsd.block_modes[packed_idx].percentile_hit = true;
-				bsd.decimation_modes[decimation_mode].percentile_hit = true;
+				dm.ref_2_planes = 1;
 			}
 			else
 			{
-				bsd.block_modes[packed_idx].percentile_hit = false;
+				dm.ref_1_plane = 1;
 			}
-	#endif
 
-			bsd.block_modes[packed_idx].decimation_mode = static_cast<uint8_t>(decimation_mode);
-			bsd.block_modes[packed_idx].quant_mode = static_cast<uint8_t>(quant_mode);
-			bsd.block_modes[packed_idx].is_dual_plane = static_cast<uint8_t>(is_dual_plane);
-			bsd.block_modes[packed_idx].weight_bits = static_cast<uint8_t>(weight_bits);
-			bsd.block_modes[packed_idx].mode_index = static_cast<uint16_t>(i);
-			bsd.block_mode_packed_index[i] = static_cast<uint16_t>(packed_idx);
-			packed_idx++;
+			bm.decimation_mode = static_cast<uint8_t>(decimation_mode);
+			bm.quant_mode = static_cast<uint8_t>(quant_mode);
+			bm.is_dual_plane = static_cast<uint8_t>(is_dual_plane);
+			bm.weight_bits = static_cast<uint8_t>(weight_bits);
+			bm.mode_index = static_cast<uint16_t>(i);
+
+			bsd.block_mode_packed_index[i] = static_cast<uint16_t>(packed_bm_idx);
+
+			packed_bm_idx++;
+			bm_counts[j]++;
 		}
 	}
 
-	bsd.block_mode_count = packed_idx;
-	bsd.always_block_mode_count = always_block_mode_count;
-	bsd.always_decimation_mode_count = always_decimation_mode_count;
+	bsd.block_mode_count_1plane_always = bm_counts[0];
+	bsd.block_mode_count_1plane_selected = bm_counts[0] + bm_counts[1];
+	bsd.block_mode_count_1plane_2plane_selected = bm_counts[0] + bm_counts[1] + bm_counts[2];
+	bsd.block_mode_count_all = bm_counts[0] + bm_counts[1] + bm_counts[2] + bm_counts[3];
+
+	bsd.decimation_mode_count_always = dm_counts[0];
+	bsd.decimation_mode_count_selected = dm_counts[0] + dm_counts[1] + dm_counts[2];
+	bsd.decimation_mode_count_all = dm_counts[0] + dm_counts[1] + dm_counts[2] + dm_counts[3];
 
 #if !defined(ASTCENC_DECOMPRESS_ONLY)
-	assert(bsd.always_block_mode_count > 0);
-	assert(bsd.always_decimation_mode_count > 0);
+	assert(bsd.block_mode_count_1plane_always > 0);
+	assert(bsd.decimation_mode_count_always > 0);
 
 	delete[] percentiles;
 #endif
 
 	// Ensure the end of the array contains valid data (should never get read)
-	for (unsigned int i = bsd.decimation_mode_count; i < WEIGHTS_MAX_DECIMATION_MODES; i++)
+	for (unsigned int i = bsd.decimation_mode_count_all; i < WEIGHTS_MAX_DECIMATION_MODES; i++)
 	{
 		bsd.decimation_modes[i].maxprec_1plane = -1;
 		bsd.decimation_modes[i].maxprec_2planes = -1;
-		bsd.decimation_modes[i].percentile_hit = false;
+		bsd.decimation_modes[i].ref_1_plane = 0;
+		bsd.decimation_modes[i].ref_2_planes = 0;
 	}
 
 	// Determine the texels to use for kmeans clustering.
@@ -1035,13 +1060,13 @@ static void construct_block_size_descriptor_3d(
 				int maxprec_2planes = -1;
 				for (unsigned int i = 0; i < 12; i++)
 				{
-					unsigned int bits_1plane = get_ise_sequence_bitcount(weight_count, (quant_method)i);
+					unsigned int bits_1plane = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(i));
 					if (bits_1plane >= BLOCK_MIN_WEIGHT_BITS && bits_1plane <= BLOCK_MAX_WEIGHT_BITS)
 					{
 						maxprec_1plane = i;
 					}
 
-					unsigned int bits_2planes = get_ise_sequence_bitcount(2 * weight_count, (quant_method)i);
+					unsigned int bits_2planes = get_ise_sequence_bitcount(2 * weight_count, static_cast<quant_method>(i));
 					if (bits_2planes >= BLOCK_MIN_WEIGHT_BITS && bits_2planes <= BLOCK_MAX_WEIGHT_BITS)
 					{
 						maxprec_2planes = i;
@@ -1055,7 +1080,8 @@ static void construct_block_size_descriptor_3d(
 
 				bsd.decimation_modes[decimation_mode_count].maxprec_1plane = static_cast<int8_t>(maxprec_1plane);
 				bsd.decimation_modes[decimation_mode_count].maxprec_2planes = static_cast<int8_t>(maxprec_2planes);
-				bsd.decimation_modes[decimation_mode_count].percentile_hit = false;
+				bsd.decimation_modes[decimation_mode_count].ref_1_plane = maxprec_1plane == -1 ? 0 : 1;
+				bsd.decimation_modes[decimation_mode_count].ref_2_planes = maxprec_2planes == -1 ? 0 : 1;
 				decimation_mode_count++;
 			}
 		}
@@ -1066,62 +1092,94 @@ static void construct_block_size_descriptor_3d(
 	{
 		bsd.decimation_modes[i].maxprec_1plane = -1;
 		bsd.decimation_modes[i].maxprec_2planes = -1;
-		bsd.decimation_modes[i].percentile_hit = false;
+		bsd.decimation_modes[i].ref_1_plane = 0;
+		bsd.decimation_modes[i].ref_2_planes = 0;
 	}
 
-	bsd.decimation_mode_count = decimation_mode_count;
+	bsd.decimation_mode_count_always = 0; // Skipped for 3D modes
+	bsd.decimation_mode_count_selected = decimation_mode_count;
+	bsd.decimation_mode_count_all = decimation_mode_count;
 
 	// Construct the list of block formats
-	unsigned int packed_idx = 0;
+	// Construct the list of block formats referencing the decimation tables
+
+	// Clear the list to a known-bad value
 	for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
 	{
-		unsigned int x_weights;
-		unsigned int y_weights;
-		unsigned int z_weights;
-		bool is_dual_plane;
-		unsigned int quant_mode;
-		unsigned int weight_bits;
-		bool permit_encode = true;
-
-		if (decode_block_mode_3d(i, x_weights, y_weights, z_weights, is_dual_plane, quant_mode, weight_bits))
+		bsd.block_mode_packed_index[i] = BLOCK_BAD_BLOCK_MODE;
+	}
+
+	unsigned int packed_idx = 0;
+	unsigned int bm_counts[2] { 0 };
+
+	// Iterate two times to build a usefully ordered list:
+	//   - Pass 0 - keep valid single plane block modes
+	//   - Pass 1 - keep valid dual plane block modes
+	for (unsigned int j = 0; j < 2; j++)
+	{
+		for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
 		{
-			if (x_weights > x_texels || y_weights > y_texels || z_weights > z_texels)
+			// Skip modes we've already included in a previous pass
+			if (bsd.block_mode_packed_index[i] != BLOCK_BAD_BLOCK_MODE)
 			{
-				permit_encode = false;
+				continue;
 			}
-		}
-		else
-		{
-			permit_encode = false;
-		}
 
-		if (!permit_encode)
-		{
-			bsd.block_mode_packed_index[i] = BLOCK_BAD_BLOCK_MODE;
-			continue;
-		}
+			unsigned int x_weights;
+			unsigned int y_weights;
+			unsigned int z_weights;
+			bool is_dual_plane;
+			unsigned int quant_mode;
+			unsigned int weight_bits;
 
-		int decimation_mode = decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights];
-		bsd.block_modes[packed_idx].decimation_mode = static_cast<uint8_t>(decimation_mode);
-		bsd.block_modes[packed_idx].quant_mode = static_cast<uint8_t>(quant_mode);
-		bsd.block_modes[packed_idx].weight_bits = static_cast<uint8_t>(weight_bits);
-		bsd.block_modes[packed_idx].is_dual_plane = static_cast<uint8_t>(is_dual_plane);
-		bsd.block_modes[packed_idx].mode_index = static_cast<uint16_t>(i);
+			bool valid = decode_block_mode_3d(i, x_weights, y_weights, z_weights, is_dual_plane, quant_mode, weight_bits);
+			// Skip invalid encodings
+			if (!valid || x_weights > x_texels || y_weights > y_texels || z_weights > z_texels)
+			{
+				continue;
+			}
 
-		// No percentile table, so enable everything all the time ...
-		bsd.block_modes[packed_idx].percentile_hit = true;
-		bsd.decimation_modes[decimation_mode].percentile_hit = true;
+			// Skip encodings in the wrong iteration
+			if ((j == 0 && is_dual_plane) || (j == 1 && !is_dual_plane))
+			{
+				continue;
+			}
 
-		bsd.block_mode_packed_index[i] = static_cast<uint16_t>(packed_idx);
+			// Always skip encodings we can't physically encode based on bit availability
+			if (is_dual_plane)
+			{
+				 // This is the only check we need as only support 1 partition
+				 if ((109 - weight_bits) <= 0)
+				 {
+					continue;
+				 }
+			}
+			else
+			{
+				// This is conservative - fewer bits may be available for > 1 partition
+				 if ((111 - weight_bits) <= 0)
+				 {
+					continue;
+				 }
+			}
 
-		packed_idx++;
-	}
+			int decimation_mode = decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights];
+			bsd.block_modes[packed_idx].decimation_mode = static_cast<uint8_t>(decimation_mode);
+			bsd.block_modes[packed_idx].quant_mode = static_cast<uint8_t>(quant_mode);
+			bsd.block_modes[packed_idx].weight_bits = static_cast<uint8_t>(weight_bits);
+			bsd.block_modes[packed_idx].is_dual_plane = static_cast<uint8_t>(is_dual_plane);
+			bsd.block_modes[packed_idx].mode_index = static_cast<uint16_t>(i);
 
-	bsd.block_mode_count = packed_idx;
+			bsd.block_mode_packed_index[i] = static_cast<uint16_t>(packed_idx);
+			bm_counts[j]++;
+			packed_idx++;
+		}
+	}
 
-	// These are never used = the MODE0 fast path is skipped for 3D blocks
-	bsd.always_block_mode_count = 0;
-	bsd.always_decimation_mode_count = 0;
+	bsd.block_mode_count_1plane_always = 0;  // Skipped for 3D modes
+	bsd.block_mode_count_1plane_selected = bm_counts[0];
+	bsd.block_mode_count_1plane_2plane_selected = bm_counts[0] + bm_counts[1];
+	bsd.block_mode_count_all = bm_counts[0] + bm_counts[1];
 
 	// Determine the texels to use for kmeans clustering.
 	assign_kmeans_texels(bsd);
diff --git a/libkram/astc-encoder/astcenc_color_quantize.cpp b/libkram/astc-encoder/astcenc_color_quantize.cpp
index f067c4d5..3d700a63 100644
--- a/libkram/astc-encoder/astcenc_color_quantize.cpp
+++ b/libkram/astc-encoder/astcenc_color_quantize.cpp
@@ -50,12 +50,45 @@
  * @return The encoded quantized value. These are not necessarily in the order; the compressor
  *         scrambles the values slightly to make hardware implementation easier.
  */
-static inline int quant_color(
+static inline int quant_color_clamp(
 	quant_method quant_level,
 	int value
 ) {
 	value = astc::clamp(value, 0, 255);
-	return color_quant_tables[quant_level][value];
+	return color_quant_tables[quant_level - QUANT_6][value];
+}
+
+/**
+ * @brief Determine the quantized value given a quantization level.
+ *
+ * @param quant_level   The quantization level to use.
+ * @param value         The value to convert. This may be outside of the 0-255 range and will be
+ *                      clamped before the value is looked up.
+ *
+ * @return The encoded quantized value. These are not necessarily in the order; the compressor
+ *         scrambles the values slightly to make hardware implementation easier.
+ */
+static inline int quant_color(
+	quant_method quant_level,
+	int value
+) {
+	return color_quant_tables[quant_level - QUANT_6][value];
+}
+
+/**
+ * @brief Determine the unquantized value given a quantization level.
+ *
+ * @param quant_level   The quantization level to use.
+ * @param value         The value to convert.
+ *
+ * @return The encoded quantized value. These are not necessarily in the order; the compressor
+ *         scrambles the values slightly to make hardware implementation easier.
+ */
+static inline int unquant_color(
+	quant_method quant_level,
+	int value
+) {
+	return color_unquant_tables[quant_level - QUANT_6][value];
 }
 
 /**
@@ -90,26 +123,24 @@ static void quantize_rgb(
 	int ri0b, gi0b, bi0b, ri1b, gi1b, bi1b;
 	float rgb0_addon = 0.5f;
 	float rgb1_addon = 0.5f;
-	int iters = 0;
 	do
 	{
-		ri0 = quant_color(quant_level, astc::flt2int_rd(r0 + rgb0_addon));
-		gi0 = quant_color(quant_level, astc::flt2int_rd(g0 + rgb0_addon));
-		bi0 = quant_color(quant_level, astc::flt2int_rd(b0 + rgb0_addon));
-		ri1 = quant_color(quant_level, astc::flt2int_rd(r1 + rgb1_addon));
-		gi1 = quant_color(quant_level, astc::flt2int_rd(g1 + rgb1_addon));
-		bi1 = quant_color(quant_level, astc::flt2int_rd(b1 + rgb1_addon));
-
-		ri0b = color_unquant_tables[quant_level][ri0];
-		gi0b = color_unquant_tables[quant_level][gi0];
-		bi0b = color_unquant_tables[quant_level][bi0];
-		ri1b = color_unquant_tables[quant_level][ri1];
-		gi1b = color_unquant_tables[quant_level][gi1];
-		bi1b = color_unquant_tables[quant_level][bi1];
+		ri0 = quant_color_clamp(quant_level, astc::flt2int_rd(r0 + rgb0_addon));
+		gi0 = quant_color_clamp(quant_level, astc::flt2int_rd(g0 + rgb0_addon));
+		bi0 = quant_color_clamp(quant_level, astc::flt2int_rd(b0 + rgb0_addon));
+		ri1 = quant_color_clamp(quant_level, astc::flt2int_rd(r1 + rgb1_addon));
+		gi1 = quant_color_clamp(quant_level, astc::flt2int_rd(g1 + rgb1_addon));
+		bi1 = quant_color_clamp(quant_level, astc::flt2int_rd(b1 + rgb1_addon));
+
+		ri0b = unquant_color(quant_level, ri0);
+		gi0b = unquant_color(quant_level, gi0);
+		bi0b = unquant_color(quant_level, bi0);
+		ri1b = unquant_color(quant_level, ri1);
+		gi1b = unquant_color(quant_level, gi1);
+		bi1b = unquant_color(quant_level, bi1);
 
 		rgb0_addon -= 0.2f;
 		rgb1_addon += 0.2f;
-		iters++;
 	} while (ri0b + gi0b + bi0b > ri1b + gi1b + bi1b);
 
 	output[0] = static_cast<uint8_t>(ri0);
@@ -144,8 +175,8 @@ static void quantize_rgba(
 	float a0 = astc::clamp255f(color0.lane<3>() * scale);
 	float a1 = astc::clamp255f(color1.lane<3>() * scale);
 
-	output[6] = color_quant_tables[quant_level][astc::flt2int_rtn(a0)];
-	output[7] = color_quant_tables[quant_level][astc::flt2int_rtn(a1)];
+	output[6] = quant_color(quant_level, astc::flt2int_rtn(a0));
+	output[7] = quant_color(quant_level, astc::flt2int_rtn(a1));
 
 	quantize_rgb(color0, color1, output, quant_level);
 }
@@ -191,22 +222,22 @@ static bool try_quantize_rgb_blue_contract(
 	}
 
 	// Quantize the inverse-blue-contracted color
-	int ri0 = color_quant_tables[quant_level][astc::flt2int_rtn(r0)];
-	int gi0 = color_quant_tables[quant_level][astc::flt2int_rtn(g0)];
-	int bi0 = color_quant_tables[quant_level][astc::flt2int_rtn(b0)];
+	int ri0 = quant_color(quant_level, astc::flt2int_rtn(r0));
+	int gi0 = quant_color(quant_level, astc::flt2int_rtn(g0));
+	int bi0 = quant_color(quant_level, astc::flt2int_rtn(b0));
 
-	int ri1 = color_quant_tables[quant_level][astc::flt2int_rtn(r1)];
-	int gi1 = color_quant_tables[quant_level][astc::flt2int_rtn(g1)];
-	int bi1 = color_quant_tables[quant_level][astc::flt2int_rtn(b1)];
+	int ri1 = quant_color(quant_level, astc::flt2int_rtn(r1));
+	int gi1 = quant_color(quant_level, astc::flt2int_rtn(g1));
+	int bi1 = quant_color(quant_level, astc::flt2int_rtn(b1));
 
 	// Then unquantize again
-	int ru0 = color_unquant_tables[quant_level][ri0];
-	int gu0 = color_unquant_tables[quant_level][gi0];
-	int bu0 = color_unquant_tables[quant_level][bi0];
+	int ru0 = unquant_color(quant_level, ri0);
+	int gu0 = unquant_color(quant_level, gi0);
+	int bu0 = unquant_color(quant_level, bi0);
 
-	int ru1 = color_unquant_tables[quant_level][ri1];
-	int gu1 = color_unquant_tables[quant_level][gi1];
-	int bu1 = color_unquant_tables[quant_level][bi1];
+	int ru1 = unquant_color(quant_level, ri1);
+	int gu1 = unquant_color(quant_level, gi1);
+	int bu1 = unquant_color(quant_level, bi1);
 
 	// If color #1 is not larger than color #0 then blue-contraction cannot be used. Note that
 	// blue-contraction and quantization change this order, which is why we must test aftwards.
@@ -248,8 +279,8 @@ static int try_quantize_rgba_blue_contract(
 	float a0 = astc::clamp255f(color0.lane<3>() * scale);
 	float a1 = astc::clamp255f(color1.lane<3>() * scale);
 
-	output[6] = color_quant_tables[quant_level][astc::flt2int_rtn(a1)];
-	output[7] = color_quant_tables[quant_level][astc::flt2int_rtn(a0)];
+	output[6] = quant_color(quant_level, astc::flt2int_rtn(a1));
+	output[7] = quant_color(quant_level, astc::flt2int_rtn(a0));
 
 	return try_quantize_rgb_blue_contract(color0, color1, output, quant_level);
 }
@@ -299,13 +330,14 @@ static bool try_quantize_rgb_delta(
 	int b0b = b0a & 0xFF;
 
 	// Quantize then unquantize in order to get a value that we take differences against
-	int r0be = color_quant_tables[quant_level][r0b];
-	int g0be = color_quant_tables[quant_level][g0b];
-	int b0be = color_quant_tables[quant_level][b0b];
+	int r0be = quant_color(quant_level, r0b);
+	int g0be = quant_color(quant_level, g0b);
+	int b0be = quant_color(quant_level, b0b);
+
+	r0b = unquant_color(quant_level, r0be);
+	g0b = unquant_color(quant_level, g0be);
+	b0b = unquant_color(quant_level, b0be);
 
-	r0b = color_unquant_tables[quant_level][r0be];
-	g0b = color_unquant_tables[quant_level][g0be];
-	b0b = color_unquant_tables[quant_level][b0be];
 	r0b |= r0a & 0x100;
 	g0b |= g0a & 0x100;
 	b0b |= b0a & 0x100;
@@ -341,13 +373,13 @@ static bool try_quantize_rgb_delta(
 
 	// Then quantize and unquantize; if this causes either top two bits to flip, then encoding fails
 	// since we have then corrupted either the top bit of the base or the sign bit of the offset
-	int r1de = color_quant_tables[quant_level][r1d];
-	int g1de = color_quant_tables[quant_level][g1d];
-	int b1de = color_quant_tables[quant_level][b1d];
+	int r1de = quant_color(quant_level, r1d);
+	int g1de = quant_color(quant_level, g1d);
+	int b1de = quant_color(quant_level, b1d);
 
-	int r1du = color_unquant_tables[quant_level][r1de];
-	int g1du = color_unquant_tables[quant_level][g1de];
-	int b1du = color_unquant_tables[quant_level][b1de];
+	int r1du = unquant_color(quant_level, r1de);
+	int g1du = unquant_color(quant_level, g1de);
+	int b1du = unquant_color(quant_level, b1de);
 
 	if (((r1d ^ r1du) | (g1d ^ g1du) | (b1d ^ b1du)) & 0xC0)
 	{
@@ -441,13 +473,14 @@ static bool try_quantize_rgb_delta_blue_contract(
 	int b0b = b0a & 0xFF;
 
 	// Quantize, then unquantize in order to get a value that we take differences against.
-	int r0be = color_quant_tables[quant_level][r0b];
-	int g0be = color_quant_tables[quant_level][g0b];
-	int b0be = color_quant_tables[quant_level][b0b];
+	int r0be = quant_color(quant_level, r0b);
+	int g0be = quant_color(quant_level, g0b);
+	int b0be = quant_color(quant_level, b0b);
+
+	r0b = unquant_color(quant_level, r0be);
+	g0b = unquant_color(quant_level, g0be);
+	b0b = unquant_color(quant_level, b0be);
 
-	r0b = color_unquant_tables[quant_level][r0be];
-	g0b = color_unquant_tables[quant_level][g0be];
-	b0b = color_unquant_tables[quant_level][b0be];
 	r0b |= r0a & 0x100;
 	g0b |= g0a & 0x100;
 	b0b |= b0a & 0x100;
@@ -484,13 +517,13 @@ static bool try_quantize_rgb_delta_blue_contract(
 	// Then quantize and  unquantize; if this causes any of the top two bits to flip,
 	// then encoding fails, since we have then corrupted either the top bit of the base
 	// or the sign bit of the offset.
-	int r1de = color_quant_tables[quant_level][r1d];
-	int g1de = color_quant_tables[quant_level][g1d];
-	int b1de = color_quant_tables[quant_level][b1d];
+	int r1de = quant_color(quant_level, r1d);
+	int g1de = quant_color(quant_level, g1d);
+	int b1de = quant_color(quant_level, b1d);
 
-	int r1du = color_unquant_tables[quant_level][r1de];
-	int g1du = color_unquant_tables[quant_level][g1de];
-	int b1du = color_unquant_tables[quant_level][b1de];
+	int r1du = unquant_color(quant_level, r1de);
+	int g1du = unquant_color(quant_level, g1de);
+	int b1du = unquant_color(quant_level, b1de);
 
 	if (((r1d ^ r1du) | (g1d ^ g1du) | (b1d ^ b1du)) & 0xC0)
 	{
@@ -573,8 +606,8 @@ static bool try_quantize_alpha_delta(
 	int a0a = astc::flt2int_rtn(a0);
 	a0a <<= 1;
 	int a0b = a0a & 0xFF;
-	int a0be = color_quant_tables[quant_level][a0b];
-	a0b = color_unquant_tables[quant_level][a0be];
+	int a0be = quant_color(quant_level, a0b);
+	a0b = unquant_color(quant_level, a0be);
 	a0b |= a0a & 0x100;
 	int a1d = astc::flt2int_rtn(a1);
 	a1d <<= 1;
@@ -588,8 +621,8 @@ static bool try_quantize_alpha_delta(
 	a1d &= 0x7F;
 	a1d |= (a0b & 0x100) >> 1;
 
-	int a1de = color_quant_tables[quant_level][a1d];
-	int a1du = color_unquant_tables[quant_level][a1de];
+	int a1de = quant_color(quant_level, a1d);
+	int a1du = unquant_color(quant_level, a1de);
 	if ((a1d ^ a1du) & 0xC0)
 	{
 		return false;
@@ -650,10 +683,10 @@ static bool try_quantize_luminance_alpha_delta(
 
 	int l0b = l0a & 0xFF;
 	int a0b = a0a & 0xFF;
-	int l0be = color_quant_tables[quant_level][l0b];
-	int a0be = color_quant_tables[quant_level][a0b];
-	l0b = color_unquant_tables[quant_level][l0be];
-	a0b = color_unquant_tables[quant_level][a0be];
+	int l0be = quant_color(quant_level, l0b);
+	int a0be = quant_color(quant_level, a0b);
+	l0b = unquant_color(quant_level, l0be);
+	a0b = unquant_color(quant_level, a0be);
 	l0b |= l0a & 0x100;
 	a0b |= a0a & 0x100;
 
@@ -679,10 +712,10 @@ static bool try_quantize_luminance_alpha_delta(
 	l1d |= (l0b & 0x100) >> 1;
 	a1d |= (a0b & 0x100) >> 1;
 
-	int l1de = color_quant_tables[quant_level][l1d];
-	int a1de = color_quant_tables[quant_level][a1d];
-	int l1du = color_unquant_tables[quant_level][l1de];
-	int a1du = color_unquant_tables[quant_level][a1de];
+	int l1de = quant_color(quant_level, l1d);
+	int a1de = quant_color(quant_level, a1d);
+	int l1du = unquant_color(quant_level, l1de);
+	int a1du = unquant_color(quant_level, a1de);
 
 	if ((l1d ^ l1du) & 0xC0)
 	{
@@ -800,16 +833,16 @@ static void quantize_rgbs(
 	float g = astc::clamp255f(color.lane<1>() * scale);
 	float b = astc::clamp255f(color.lane<2>() * scale);
 
-	int ri = color_quant_tables[quant_level][astc::flt2int_rtn(r)];
-	int gi = color_quant_tables[quant_level][astc::flt2int_rtn(g)];
-	int bi = color_quant_tables[quant_level][astc::flt2int_rtn(b)];
+	int ri = quant_color(quant_level, astc::flt2int_rtn(r));
+	int gi = quant_color(quant_level, astc::flt2int_rtn(g));
+	int bi = quant_color(quant_level, astc::flt2int_rtn(b));
 
-	int ru = color_unquant_tables[quant_level][ri];
-	int gu = color_unquant_tables[quant_level][gi];
-	int bu = color_unquant_tables[quant_level][bi];
+	int ru = unquant_color(quant_level, ri);
+	int gu = unquant_color(quant_level, gi);
+	int bu = unquant_color(quant_level, bi);
 
 	float oldcolorsum = hadd_rgb_s(color) * scale;
-	float newcolorsum = (float)(ru + gu + bu);
+	float newcolorsum = static_cast<float>(ru + gu + bu);
 
 	float scalea = astc::clamp1f(color.lane<3>() * (oldcolorsum + 1e-10f) / (newcolorsum + 1e-10f));
 	int scale_idx = astc::flt2int_rtn(scalea * 256.0f);
@@ -818,7 +851,7 @@ static void quantize_rgbs(
 	output[0] = static_cast<uint8_t>(ri);
 	output[1] = static_cast<uint8_t>(gi);
 	output[2] = static_cast<uint8_t>(bi);
-	output[3] = color_quant_tables[quant_level][scale_idx];
+	output[3] = quant_color(quant_level, scale_idx);
 }
 
 /**
@@ -840,8 +873,8 @@ static void quantize_rgbs_alpha(
 	float a0 = astc::clamp255f(color0.lane<3>() * scale);
 	float a1 = astc::clamp255f(color1.lane<3>() * scale);
 
-	output[4] = color_quant_tables[quant_level][astc::flt2int_rtn(a0)];
-	output[5] = color_quant_tables[quant_level][astc::flt2int_rtn(a1)];
+	output[4] = quant_color(quant_level, astc::flt2int_rtn(a0));
+	output[5] = quant_color(quant_level, astc::flt2int_rtn(a1));
 
 	quantize_rgbs(color, output, quant_level);
 }
@@ -875,8 +908,8 @@ static void quantize_luminance(
 		lum1 = avg;
 	}
 
-	output[0] = color_quant_tables[quant_level][astc::flt2int_rtn(lum0)];
-	output[1] = color_quant_tables[quant_level][astc::flt2int_rtn(lum1)];
+	output[0] = quant_color(quant_level, astc::flt2int_rtn(lum0));
+	output[1] = quant_color(quant_level, astc::flt2int_rtn(lum1));
 }
 
 /**
@@ -942,10 +975,10 @@ static void quantize_luminance_alpha(
 		}
 	}
 
-	output[0] = color_quant_tables[quant_level][astc::flt2int_rtn(lum0)];
-	output[1] = color_quant_tables[quant_level][astc::flt2int_rtn(lum1)];
-	output[2] = color_quant_tables[quant_level][astc::flt2int_rtn(a0)];
-	output[3] = color_quant_tables[quant_level][astc::flt2int_rtn(a1)];
+	output[0] = quant_color(quant_level, astc::flt2int_rtn(lum0));
+	output[1] = quant_color(quant_level, astc::flt2int_rtn(lum1));
+	output[2] = quant_color(quant_level, astc::flt2int_rtn(a0));
+	output[3] = quant_color(quant_level, astc::flt2int_rtn(a1));
 }
 
 /**
@@ -968,8 +1001,8 @@ static inline void quantize_and_unquantize_retain_top_two_bits(
 
 	do
 	{
-		quantval = color_quant_tables[quant_level][value];
-		uquantval = color_unquant_tables[quant_level][quantval];
+		quantval = quant_color(quant_level, value);
+		uquantval = unquant_color(quant_level, quantval);
 
 		// Perform looping if the top two bits were modified by quant/unquant
 		perform_loop = (value & 0xC0) != (uquantval & 0xC0);
@@ -1012,8 +1045,8 @@ static inline void quantize_and_unquantize_retain_top_four_bits(
 
 	do
 	{
-		quantval = color_quant_tables[quant_level][value];
-		uquantval = color_unquant_tables[quant_level][quantval];
+		quantval = quant_color(quant_level, value);
+		uquantval = unquant_color(quant_level, quantval);
 
 		// Perform looping if the top four bits were modified by quant/unquant
 		perform_loop = (value & 0xF0) != (uquantval & 0xF0);
@@ -1501,8 +1534,8 @@ static void quantize_hdr_rgb(
 		int a_intval = astc::flt2int_rtn(a_base * mode_scale);
 		int a_lowbits = a_intval & 0xFF;
 
-		int a_quantval = color_quant_tables[quant_level][a_lowbits];
-		int a_uquantval = color_unquant_tables[quant_level][a_quantval];
+		int a_quantval = quant_color(quant_level, a_lowbits);
+		int a_uquantval = unquant_color(quant_level, a_quantval);
 		a_intval = (a_intval & ~0xFF) | a_uquantval;
 		float a_fval = static_cast<float>(a_intval) * mode_rscale;
 
@@ -1726,7 +1759,7 @@ static void quantize_hdr_rgb(
 	for (int i = 0; i < 4; i++)
 	{
 		int idx = astc::flt2int_rtn(vals[i] * 1.0f / 256.0f);
-		output[i] = color_quant_tables[quant_level][idx];
+		output[i] = quant_color(quant_level, idx);
 	}
 
 	for (int i = 4; i < 6; i++)
@@ -1759,8 +1792,8 @@ static void quantize_hdr_rgb_ldr_alpha(
 	float a0 = astc::clamp255f(color0.lane<3>() * scale);
 	float a1 = astc::clamp255f(color1.lane<3>() * scale);
 
-	output[6] = color_quant_tables[quant_level][astc::flt2int_rtn(a0)];
-	output[7] = color_quant_tables[quant_level][astc::flt2int_rtn(a1)];
+	output[6] = quant_color(quant_level, astc::flt2int_rtn(a0));
+	output[7] = quant_color(quant_level, astc::flt2int_rtn(a1));
 
 	quantize_hdr_rgb(color0, color1, output, quant_level);
 }
@@ -1832,9 +1865,9 @@ static void quantize_hdr_luminance_large_range(
 		v1 = lower_v1;
 	}
 
-	// OK; encode.
-	output[0] = color_quant_tables[quant_level][v0];
-	output[1] = color_quant_tables[quant_level][v1];
+	// OK; encode
+	output[0] = quant_color(quant_level, v0);
+	output[1] = quant_color(quant_level, v1);
 }
 
 /**
@@ -1885,8 +1918,8 @@ static bool try_quantize_hdr_luminance_small_range(
 	highval = astc::clamp(highval, 0, 2047);
 
 	v0 = lowval & 0x7F;
-	v0e = color_quant_tables[quant_level][v0];
-	v0d = color_unquant_tables[quant_level][v0e];
+	v0e = quant_color(quant_level, v0);
+	v0d = unquant_color(quant_level, v0e);
 
 	if (v0d < 0x80)
 	{
@@ -1895,8 +1928,8 @@ static bool try_quantize_hdr_luminance_small_range(
 		if (diffval >= 0 && diffval <= 15)
 		{
 			v1 = ((lowval >> 3) & 0xF0) | diffval;
-			v1e = color_quant_tables[quant_level][v1];
-			v1d = color_unquant_tables[quant_level][v1e];
+			v1e = quant_color(quant_level, v1);
+			v1d = unquant_color(quant_level, v1e);
 			if ((v1d & 0xF0) == (v1 & 0xF0))
 			{
 				output[0] = static_cast<uint8_t>(v0e);
@@ -1914,8 +1947,8 @@ static bool try_quantize_hdr_luminance_small_range(
 	highval = astc::clamp(highval, 0, 1023);
 
 	v0 = (lowval & 0x7F) | 0x80;
-	v0e = color_quant_tables[quant_level][v0];
-	v0d = color_unquant_tables[quant_level][v0e];
+	v0e = quant_color(quant_level, v0);
+	v0d = unquant_color(quant_level, v0e);
 	if ((v0d & 0x80) == 0)
 	{
 		return false;
@@ -1929,8 +1962,8 @@ static bool try_quantize_hdr_luminance_small_range(
 	}
 
 	v1 = ((lowval >> 2) & 0xE0) | diffval;
-	v1e = color_quant_tables[quant_level][v1];
-	v1d = color_unquant_tables[quant_level][v1e];
+	v1e = quant_color(quant_level, v1);
+	v1d = unquant_color(quant_level, v1e);
 	if ((v1d & 0xE0) != (v1 & 0xE0))
 	{
 		return false;
@@ -1973,8 +2006,8 @@ static void quantize_hdr_alpha(
 		val1 = (ialpha1 + (128 >> i)) >> (8 - i);
 
 		v6 = (val0 & 0x7F) | ((i & 1) << 7);
-		v6e = color_quant_tables[quant_level][v6];
-		v6d = color_unquant_tables[quant_level][v6e];
+		v6e = quant_color(quant_level, v6);
+		v6d = unquant_color(quant_level, v6e);
 
 		if ((v6 ^ v6d) & 0x80)
 		{
@@ -1992,8 +2025,8 @@ static void quantize_hdr_alpha(
 		}
 
 		v7 = ((i & 2) << 6) | ((val0 >> 7) << (6 - i)) | (diffval & mask);
-		v7e = color_quant_tables[quant_level][v7];
-		v7d = color_unquant_tables[quant_level][v7e];
+		v7e = quant_color(quant_level, v7);
+		v7d = unquant_color(quant_level, v7e);
 
 		static const int testbits[3] { 0xE0, 0xF0, 0xF8 };
 
@@ -2013,8 +2046,8 @@ static void quantize_hdr_alpha(
 	v6 = val0 | 0x80;
 	v7 = val1 | 0x80;
 
-	output[0] = color_quant_tables[quant_level][v6];
-	output[1] = color_quant_tables[quant_level][v7];
+	output[0] = quant_color(quant_level, v6);
+	output[1] = quant_color(quant_level, v7);
 
 	return;
 }
@@ -2047,7 +2080,7 @@ uint8_t pack_color_endpoints(
 	uint8_t* output,
 	quant_method quant_level
 ) {
-	assert(quant_level < 21);
+	assert(QUANT_6 <= quant_level && quant_level <= QUANT_256);
 
 	// We do not support negative colors
 	color0 = max(color0, 0.0f);
diff --git a/libkram/astc-encoder/astcenc_color_unquantize.cpp b/libkram/astc-encoder/astcenc_color_unquantize.cpp
index a1c2eeb2..bb02d81b 100644
--- a/libkram/astc-encoder/astcenc_color_unquantize.cpp
+++ b/libkram/astc-encoder/astcenc_color_unquantize.cpp
@@ -38,11 +38,28 @@ static ASTCENC_SIMD_INLINE vint4 unquant_color(
 	quant_method quant_level,
 	vint4 inputq
 ) {
-	const uint8_t* unq = color_unquant_tables[quant_level];
+	const uint8_t* unq = color_unquant_tables[quant_level - QUANT_6];
 	return vint4(unq[inputq.lane<0>()], unq[inputq.lane<1>()],
 	             unq[inputq.lane<2>()], unq[inputq.lane<3>()]);
 }
 
+/**
+ * @brief Determine the quantized value given a quantization level.
+ *
+ * @param quant_level   The quantization level to use.
+ * @param value         The value to convert. This may be outside of the 0-255 range and will be
+ *                      clamped before the value is looked up.
+ *
+ * @return The encoded quantized value. These are not necessarily in the order; the compressor
+ *         scrambles the values slightly to make hardware implementation easier.
+ */
+static inline int unquant_color(
+	quant_method quant_level,
+	int value
+) {
+	return color_unquant_tables[quant_level - QUANT_6][value];
+}
+
 /**
  * @brief Un-blue-contract a color.
  *
@@ -204,8 +221,8 @@ static void rgb_scale_alpha_unpack(
 ) {
 	// Unquantize color endpoints
 	vint4 input = unquant_color(quant_level, input0q);
-	uint8_t alpha1 = color_unquant_tables[quant_level][alpha1q];
-	uint8_t scale = color_unquant_tables[quant_level][scaleq];
+	uint8_t alpha1 = unquant_color(quant_level, alpha1q);
+	uint8_t scale = unquant_color(quant_level, scaleq);
 
 	output1 = input;
 	output1.set_lane<3>(alpha1);
@@ -233,7 +250,7 @@ static void rgb_scale_unpack(
 	vint4& output1
 ) {
 	vint4 input = unquant_color(quant_level, input0q);
-	int scale = color_unquant_tables[quant_level][scaleq];
+	int scale = unquant_color(quant_level, scaleq);
 
 	output1 = input;
 	output1.set_lane<3>(255);
@@ -258,8 +275,8 @@ static void luminance_unpack(
 	vint4& output0,
 	vint4& output1
 ) {
-	int lum0 = color_unquant_tables[quant_level][input[0]];
-	int lum1 = color_unquant_tables[quant_level][input[1]];
+	int lum0 = unquant_color(quant_level, input[0]);
+	int lum1 = unquant_color(quant_level, input[1]);
 	output0 = vint4(lum0, lum0, lum0, 255);
 	output1 = vint4(lum1, lum1, lum1, 255);
 }
@@ -280,8 +297,8 @@ static void luminance_delta_unpack(
 	vint4& output0,
 	vint4& output1
 ) {
-	int v0 = color_unquant_tables[quant_level][input[0]];
-	int v1 = color_unquant_tables[quant_level][input[1]];
+	int v0 = unquant_color(quant_level, input[0]);
+	int v1 = unquant_color(quant_level, input[1]);
 	int l0 = (v0 >> 2) | (v1 & 0xC0);
 	int l1 = l0 + (v1 & 0x3F);
 
@@ -305,10 +322,10 @@ static void luminance_alpha_unpack(
 	vint4& output0,
 	vint4& output1
 ) {
-	int lum0 = color_unquant_tables[quant_level][input[0]];
-	int lum1 = color_unquant_tables[quant_level][input[1]];
-	int alpha0 = color_unquant_tables[quant_level][input[2]];
-	int alpha1 = color_unquant_tables[quant_level][input[3]];
+	int lum0 =   unquant_color(quant_level, input[0]);
+	int lum1 =   unquant_color(quant_level, input[1]);
+	int alpha0 = unquant_color(quant_level, input[2]);
+	int alpha1 = unquant_color(quant_level, input[3]);
 	output0 = vint4(lum0, lum0, lum0, alpha0);
 	output1 = vint4(lum1, lum1, lum1, alpha1);
 }
@@ -327,10 +344,10 @@ static void luminance_alpha_delta_unpack(
 	vint4& output0,
 	vint4& output1
 ) {
-	int lum0 = color_unquant_tables[quant_level][input[0]];
-	int lum1 = color_unquant_tables[quant_level][input[1]];
-	int alpha0 = color_unquant_tables[quant_level][input[2]];
-	int alpha1 = color_unquant_tables[quant_level][input[3]];
+	int lum0 =   unquant_color(quant_level, input[0]);
+	int lum1 =   unquant_color(quant_level, input[1]);
+	int alpha0 = unquant_color(quant_level, input[2]);
+	int alpha1 = unquant_color(quant_level, input[3]);
 
 	lum0 |= (lum1 & 0x80) << 1;
 	alpha0 |= (alpha1 & 0x80) << 1;
@@ -369,10 +386,10 @@ static void hdr_rgbo_unpack(
 	vint4& output0,
 	vint4& output1
 ) {
-	int v0 = color_unquant_tables[quant_level][input[0]];
-	int v1 = color_unquant_tables[quant_level][input[1]];
-	int v2 = color_unquant_tables[quant_level][input[2]];
-	int v3 = color_unquant_tables[quant_level][input[3]];
+	int v0 = unquant_color(quant_level, input[0]);
+	int v1 = unquant_color(quant_level, input[1]);
+	int v2 = unquant_color(quant_level, input[2]);
+	int v3 = unquant_color(quant_level, input[3]);
 
 	int modeval = ((v0 & 0xC0) >> 6) | (((v1 & 0x80) >> 7) << 2) | (((v2 & 0x80) >> 7) << 3);
 
@@ -522,12 +539,12 @@ static void hdr_rgb_unpack(
 	vint4& output1
 ) {
 
-	int v0 = color_unquant_tables[quant_level][input[0]];
-	int v1 = color_unquant_tables[quant_level][input[1]];
-	int v2 = color_unquant_tables[quant_level][input[2]];
-	int v3 = color_unquant_tables[quant_level][input[3]];
-	int v4 = color_unquant_tables[quant_level][input[4]];
-	int v5 = color_unquant_tables[quant_level][input[5]];
+	int v0 = unquant_color(quant_level, input[0]);
+	int v1 = unquant_color(quant_level, input[1]);
+	int v2 = unquant_color(quant_level, input[2]);
+	int v3 = unquant_color(quant_level, input[3]);
+	int v4 = unquant_color(quant_level, input[4]);
+	int v5 = unquant_color(quant_level, input[5]);
 
 	// extract all the fixed-placement bitfields
 	int modeval = ((v1 & 0x80) >> 7) | (((v2 & 0x80) >> 7) << 1) | (((v3 & 0x80) >> 7) << 2);
@@ -691,8 +708,8 @@ static void hdr_rgb_ldr_alpha_unpack(
 ) {
 	hdr_rgb_unpack(input, quant_level, output0, output1);
 
-	int v6 = color_unquant_tables[quant_level][input[6]];
-	int v7 = color_unquant_tables[quant_level][input[7]];
+	int v6 = unquant_color(quant_level, input[6]);
+	int v7 = unquant_color(quant_level, input[7]);
 	output0.set_lane<3>(v6);
 	output1.set_lane<3>(v7);
 }
@@ -711,8 +728,8 @@ static void hdr_luminance_small_range_unpack(
 	vint4& output0,
 	vint4& output1
 ) {
-	int v0 = color_unquant_tables[quant_level][input[0]];
-	int v1 = color_unquant_tables[quant_level][input[1]];
+	int v0 = unquant_color(quant_level, input[0]);
+	int v1 = unquant_color(quant_level, input[1]);
 
 	int y0, y1;
 	if (v0 & 0x80)
@@ -748,8 +765,8 @@ static void hdr_luminance_large_range_unpack(
 	vint4& output0,
 	vint4& output1
 ) {
-	int v0 = color_unquant_tables[quant_level][input[0]];
-	int v1 = color_unquant_tables[quant_level][input[1]];
+	int v0 = unquant_color(quant_level, input[0]);
+	int v1 = unquant_color(quant_level, input[1]);
 
 	int y0, y1;
 	if (v1 >= v0)
@@ -782,8 +799,8 @@ static void hdr_alpha_unpack(
 	int& output1
 ) {
 
-	int v6 = color_unquant_tables[quant_level][input[0]];
-	int v7 = color_unquant_tables[quant_level][input[1]];
+	int v6 = unquant_color(quant_level, input[0]);
+	int v7 = unquant_color(quant_level, input[1]);
 
 	int selector = ((v6 >> 7) & 1) | ((v7 >> 6) & 2);
 	v6 &= 0x7F;
diff --git a/libkram/astc-encoder/astcenc_compress_symbolic.cpp b/libkram/astc-encoder/astcenc_compress_symbolic.cpp
index 93d98e29..c7d3a8c7 100644
--- a/libkram/astc-encoder/astcenc_compress_symbolic.cpp
+++ b/libkram/astc-encoder/astcenc_compress_symbolic.cpp
@@ -61,20 +61,16 @@ static void merge_endpoints(
  * therefore primarily improving the performance of 4x4 and 5x5 blocks where grid decimation
  * is needed less often.
  *
- * @param      decode_mode                       The decode mode (LDR, HDR).
- * @param      bsd                               The block size information.
- * @param      blk                               The image block color data to compress.
- * @param[out] scb                               The symbolic compressed block output.
- * @param[out] dec_weights_quant_pvalue_plane1   The weights for plane 1.
- * @param[out] dec_weights_quant_pvalue_plane2   The weights for plane 2, or @c nullptr if 1 plane.
+ * @param      decode_mode   The decode mode (LDR, HDR).
+ * @param      bsd           The block size information.
+ * @param      blk           The image block color data to compress.
+ * @param[out] scb           The symbolic compressed block output.
  */
 static bool realign_weights_undecimated(
 	astcenc_profile decode_mode,
 	const block_size_descriptor& bsd,
 	const image_block& blk,
-	symbolic_compressed_block& scb,
-	uint8_t* dec_weights_quant_pvalue_plane1,
-	uint8_t* dec_weights_quant_pvalue_plane2
+	symbolic_compressed_block& scb
 ) {
 	// Get the partition descriptor
 	unsigned int partition_count = scb.partition_count;
@@ -110,7 +106,7 @@ static bool realign_weights_undecimated(
 		                       endpnt1[pa_idx]);
 	}
 
-	uint8_t* dec_weights_quant_pvalue = dec_weights_quant_pvalue_plane1;
+	uint8_t* dec_weights_quant_pvalue = scb.weights;
 	bool adjustments = false;
 
 	// For each plane and partition ...
@@ -162,18 +158,18 @@ static bool realign_weights_undecimated(
 			// Check if the prev or next error is better, and if so use it
 			if ((up_error < current_error) && (up_error < down_error))
 			{
-				dec_weights_quant_pvalue[texel] = (uint8_t)((prev_and_next >> 24) & 0xFF);
+				dec_weights_quant_pvalue[texel] = static_cast<uint8_t>((prev_and_next >> 24) & 0xFF);
 				adjustments = true;
 			}
 			else if (down_error < current_error)
 			{
-				dec_weights_quant_pvalue[texel] = (uint8_t)((prev_and_next >> 16) & 0xFF);
+				dec_weights_quant_pvalue[texel] = static_cast<uint8_t>((prev_and_next >> 16) & 0xFF);
 				adjustments = true;
 			}
 		}
 
 		// Prepare iteration for plane 2
-		dec_weights_quant_pvalue = dec_weights_quant_pvalue_plane2;
+		dec_weights_quant_pvalue += WEIGHTS_PLANE2_OFFSET;
 		plane_mask = ~plane_mask;
 	}
 
@@ -187,20 +183,16 @@ static bool realign_weights_undecimated(
  * partition and per plane) and attempt to improve image quality by moving each weight up by one or
  * down by one quantization step.
  *
- * @param      decode_mode                       The decode mode (LDR, HDR).
- * @param      bsd                               The block size information.
- * @param      blk                               The image block color data to compress.
- * @param[out] scb                               The symbolic compressed block output.
- * @param[out] dec_weights_quant_pvalue_plane1   The weights for plane 1.
- * @param[out] dec_weights_quant_pvalue_plane2   The weights for plane 2, or @c nullptr if 1 plane.
+ * @param      decode_mode   The decode mode (LDR, HDR).
+ * @param      bsd           The block size information.
+ * @param      blk           The image block color data to compress.
+ * @param[out] scb           The symbolic compressed block output.
  */
 static bool realign_weights_decimated(
 	astcenc_profile decode_mode,
 	const block_size_descriptor& bsd,
 	const image_block& blk,
-	symbolic_compressed_block& scb,
-	uint8_t* dec_weights_quant_pvalue_plane1,
-	uint8_t* dec_weights_quant_pvalue_plane2
+	symbolic_compressed_block& scb
 ) {
 	// Get the partition descriptor
 	unsigned int partition_count = scb.partition_count;
@@ -244,7 +236,7 @@ static bool realign_weights_decimated(
 
 	uint8_t uq_pl_weights[BLOCK_MAX_WEIGHTS];
 	float uq_pl_weightsf[BLOCK_MAX_WEIGHTS];
-	uint8_t* dec_weights_quant_pvalue = dec_weights_quant_pvalue_plane1;
+	uint8_t* dec_weights_quant_pvalue = scb.weights;
 	bool adjustments = false;
 
 	// For each plane and partition ...
@@ -280,9 +272,9 @@ static bool realign_weights_decimated(
 			float uqw_next_dif = static_cast<float>(next_wt_uq) - uqwf;
 			float uqw_prev_dif = static_cast<float>(prev_wt_uq) - uqwf;
 
-			float current_error = 0.0f;
-			float up_error = 0.0f;
-			float down_error = 0.0f;
+			vfloat4 current_errorv = vfloat4::zero();
+			vfloat4 up_errorv = vfloat4::zero();
+			vfloat4 down_errorv = vfloat4::zero();
 
 			// Interpolate the colors to create the diffs
 			unsigned int texels_to_evaluate = di.weight_texel_count[we_idx];
@@ -318,35 +310,40 @@ static bool realign_weights_decimated(
 				vfloat4 color = color_base + color_offset * plane_weight;
 
 				vfloat4 orig_color   = blk.texel(texel);
-				vfloat4 error_weight = blk.channel_weight;
 
 				vfloat4 color_diff      = color - orig_color;
 				vfloat4 color_up_diff   = color_diff + color_offset * plane_up_weight;
 				vfloat4 color_down_diff = color_diff + color_offset * plane_down_weight;
-				current_error += dot_s(color_diff      * color_diff,      error_weight);
-				up_error      += dot_s(color_up_diff   * color_up_diff,   error_weight);
-				down_error    += dot_s(color_down_diff * color_down_diff, error_weight);
+
+				current_errorv += color_diff * color_diff;
+				up_errorv      += color_up_diff * color_up_diff;
+				down_errorv    += color_down_diff * color_down_diff;
 			}
 
+			vfloat4 error_weight = blk.channel_weight;
+			float current_error = hadd_s(current_errorv * error_weight);
+			float up_error = hadd_s(up_errorv * error_weight);
+			float down_error = hadd_s(down_errorv * error_weight);
+
 			// Check if the prev or next error is better, and if so use it
 			if ((up_error < current_error) && (up_error < down_error))
 			{
 				uq_pl_weights[we_idx] = static_cast<uint8_t>(next_wt_uq);
 				uq_pl_weightsf[we_idx] = static_cast<float>(next_wt_uq);
-				dec_weights_quant_pvalue[we_idx] = (uint8_t)((prev_and_next >> 24) & 0xFF);
+				dec_weights_quant_pvalue[we_idx] = static_cast<uint8_t>((prev_and_next >> 24) & 0xFF);
 				adjustments = true;
 			}
 			else if (down_error < current_error)
 			{
 				uq_pl_weights[we_idx] = static_cast<uint8_t>(prev_wt_uq);
 				uq_pl_weightsf[we_idx] = static_cast<float>(prev_wt_uq);
-				dec_weights_quant_pvalue[we_idx] = (uint8_t)((prev_and_next >> 16) & 0xFF);
+				dec_weights_quant_pvalue[we_idx] = static_cast<uint8_t>((prev_and_next >> 16) & 0xFF);
 				adjustments = true;
 			}
 		}
 
 		// Prepare iteration for plane 2
-		dec_weights_quant_pvalue = dec_weights_quant_pvalue_plane2;
+		dec_weights_quant_pvalue += WEIGHTS_PLANE2_OFFSET;
 		plane_mask = ~plane_mask;
 	}
 
@@ -380,7 +377,6 @@ static float compress_symbolic_block_for_partition_1plane(
 	promise(partition_count > 0);
 	promise(config.tune_candidate_limit > 0);
 	promise(config.tune_refinement_limit > 0);
-	promise(bsd.decimation_mode_count > 0);
 
 	auto compute_difference = &compute_symbolic_block_difference_1plane;
 	if ((partition_count == 1) && !(config.flags & ASTCENC_FLG_MAP_RGBM))
@@ -401,13 +397,13 @@ static float compress_symbolic_block_for_partition_1plane(
 	uint8_t *dec_weights_quant_pvalue = tmpbuf.dec_weights_quant_pvalue;
 
 	// For each decimation mode, compute an ideal set of weights with no quantization
-	unsigned int max_decimation_modes = only_always ? bsd.always_decimation_mode_count
-	                                                : bsd.decimation_mode_count;
+	unsigned int max_decimation_modes = only_always ? bsd.decimation_mode_count_always
+	                                                : bsd.decimation_mode_count_selected;
 	promise(max_decimation_modes > 0);
 	for (unsigned int i = 0; i < max_decimation_modes; i++)
 	{
 		const auto& dm = bsd.get_decimation_mode(i);
-		if (dm.maxprec_1plane < 0 || !dm.percentile_hit)
+		if (!dm.ref_1_plane)
 		{
 			continue;
 		}
@@ -456,14 +452,15 @@ static float compress_symbolic_block_for_partition_1plane(
 		115 - 4, 111 - 4 - PARTITION_INDEX_BITS, 108 - 4 - PARTITION_INDEX_BITS, 105 - 4 - PARTITION_INDEX_BITS
 	};
 
-	unsigned int max_block_modes = only_always ? bsd.always_block_mode_count
-	                                           : bsd.block_mode_count;
+	unsigned int max_block_modes = only_always ? bsd.block_mode_count_1plane_always
+	                                           : bsd.block_mode_count_1plane_selected;
 	promise(max_block_modes > 0);
 	for (unsigned int i = 0; i < max_block_modes; ++i)
 	{
 		const block_mode& bm = bsd.block_modes[i];
+		assert(!bm.is_dual_plane);
 		int bitcount = free_bits_for_partition_count[partition_count - 1] - bm.weight_bits;
-		if (bm.is_dual_plane || !bm.percentile_hit || bitcount <= 0)
+		if (bitcount <= 0)
 		{
 			qwt_errors[i] = 1e38f;
 			continue;
@@ -503,8 +500,8 @@ static float compress_symbolic_block_for_partition_1plane(
 	quant_method color_quant_level_mod[TUNE_MAX_TRIAL_CANDIDATES];
 
 	unsigned int candidate_count = compute_ideal_endpoint_formats(
-	    bsd, pi, blk, ei.ep, qwt_bitcounts, qwt_errors,
-	    config.tune_candidate_limit, max_block_modes,
+	    pi, blk, ei.ep, qwt_bitcounts, qwt_errors,
+	    config.tune_candidate_limit, 0, max_block_modes,
 	    partition_format_specifiers, block_mode_index,
 	    color_quant_level, color_quant_level_mod, tmpbuf);
 
@@ -517,7 +514,7 @@ static float compress_symbolic_block_for_partition_1plane(
 		TRACE_NODE(node0, "candidate");
 
 		const int bm_packed_index = block_mode_index[i];
-		assert(bm_packed_index >= 0 && bm_packed_index < (int)bsd.block_mode_count);
+		assert(bm_packed_index >= 0 && bm_packed_index < static_cast<int>(bsd.block_mode_count_1plane_selected));
 		const block_mode& qw_bm = bsd.block_modes[bm_packed_index];
 
 		int decimation_mode = qw_bm.decimation_mode;
@@ -560,7 +557,7 @@ static float compress_symbolic_block_for_partition_1plane(
 				    rgbo_colors[j],
 				    partition_format_specifiers[i][j],
 				    workscb.color_values[j],
-				    (quant_method)color_quant_level[i]);
+				    color_quant_level[i]);
 			}
 
 			// If all the color endpoint modes are the same, we get a few more bits to store colors;
@@ -584,7 +581,7 @@ static float compress_symbolic_block_for_partition_1plane(
 					    rgbo_colors[j],
 					    partition_format_specifiers[i][j],
 					    colorvals[j],
-					    (quant_method)color_quant_level_mod[i]);
+					    color_quant_level_mod[i]);
 				}
 
 				if (color_formats_mod[0] == color_formats_mod[1]
@@ -612,11 +609,6 @@ static float compress_symbolic_block_for_partition_1plane(
 			workscb.block_mode = qw_bm.mode_index;
 			workscb.block_type = SYM_BTYPE_NONCONST;
 
-			if (workscb.quant_mode < QUANT_6)
-			{
-				workscb.block_type = SYM_BTYPE_ERROR;
-			}
-
 			// Pre-realign test
 			if (l == 0)
 			{
@@ -660,14 +652,12 @@ static float compress_symbolic_block_for_partition_1plane(
 			if (di.weight_count != bsd.texel_count)
 			{
 				adjustments = realign_weights_decimated(
-					config.profile, bsd, blk, workscb,
-					workscb.weights, nullptr);
+					config.profile, bsd, blk, workscb);
 			}
 			else
 			{
 				adjustments = realign_weights_undecimated(
-					config.profile, bsd, blk, workscb,
-					workscb.weights, nullptr);
+					config.profile, bsd, blk, workscb);
 			}
 
 			// Post-realign test
@@ -737,7 +727,7 @@ static float compress_symbolic_block_for_partition_2planes(
 ) {
 	promise(config.tune_candidate_limit > 0);
 	promise(config.tune_refinement_limit > 0);
-	promise(bsd.decimation_mode_count > 0);
+	promise(bsd.decimation_mode_count_selected > 0);
 
 	// Compute ideal weights and endpoint colors, with no quantization or decimation
 	endpoints_and_weights& ei1 = tmpbuf.ei1;
@@ -752,10 +742,10 @@ static float compress_symbolic_block_for_partition_2planes(
 	uint8_t *dec_weights_quant_pvalue = tmpbuf.dec_weights_quant_pvalue;
 
 	// For each decimation mode, compute an ideal set of weights with no quantization
-	for (unsigned int i = 0; i < bsd.decimation_mode_count; i++)
+	for (unsigned int i = 0; i < bsd.decimation_mode_count_selected; i++)
 	{
 		const auto& dm = bsd.get_decimation_mode(i);
-		if (dm.maxprec_2planes < 0 || !dm.percentile_hit)
+		if (!dm.ref_2_planes)
 		{
 			continue;
 		}
@@ -817,17 +807,15 @@ static float compress_symbolic_block_for_partition_2planes(
 	int* qwt_bitcounts = tmpbuf.qwt_bitcounts;
 	float* qwt_errors = tmpbuf.qwt_errors;
 
-	for (unsigned int i = 0; i < bsd.block_mode_count; ++i)
+	unsigned int start_2plane = bsd.block_mode_count_1plane_selected;
+	unsigned int end_2plane = bsd.block_mode_count_1plane_2plane_selected;
+
+	for (unsigned int i = start_2plane; i < end_2plane; i++)
 	{
 		const block_mode& bm = bsd.block_modes[i];
-		int bitcount = 109 - bm.weight_bits;
-		if (!bm.is_dual_plane || !bm.percentile_hit || bitcount <= 0)
-		{
-			qwt_errors[i] = 1e38f;
-			continue;
-		}
+		assert(bm.is_dual_plane);
 
-		qwt_bitcounts[i] = bitcount;
+		qwt_bitcounts[i] = 109 - bm.weight_bits;
 
 		if (weight_high_value1[i] > 1.02f * min_wt_cutoff1)
 		{
@@ -882,8 +870,9 @@ static float compress_symbolic_block_for_partition_2planes(
 
 	const auto& pi = bsd.get_partition_info(1, 0);
 	unsigned int candidate_count = compute_ideal_endpoint_formats(
-	    bsd, pi, blk, epm, qwt_bitcounts, qwt_errors,
-	    config.tune_candidate_limit, bsd.block_mode_count,
+	    pi, blk, epm, qwt_bitcounts, qwt_errors,
+	    config.tune_candidate_limit,
+		bsd.block_mode_count_1plane_selected, bsd.block_mode_count_1plane_2plane_selected,
 	    partition_format_specifiers, block_mode_index,
 	    color_quant_level, color_quant_level_mod, tmpbuf);
 
@@ -896,7 +885,8 @@ static float compress_symbolic_block_for_partition_2planes(
 		TRACE_NODE(node0, "candidate");
 
 		const int bm_packed_index = block_mode_index[i];
-		assert(bm_packed_index >= 0 && bm_packed_index < (int)bsd.block_mode_count);
+		assert(bm_packed_index >= static_cast<int>(bsd.block_mode_count_1plane_selected) &&
+		       bm_packed_index < static_cast<int>(bsd.block_mode_count_1plane_2plane_selected));
 		const block_mode& qw_bm = bsd.block_modes[bm_packed_index];
 
 		int decimation_mode = qw_bm.decimation_mode;
@@ -940,7 +930,7 @@ static float compress_symbolic_block_for_partition_2planes(
 			                               rgbs_color, rgbo_color,
 			                               partition_format_specifiers[i][0],
 			                               workscb.color_values[0],
-			                               (quant_method)color_quant_level[i]);
+			                               color_quant_level[i]);
 
 			// Store header fields
 			workscb.partition_count = 1;
@@ -951,11 +941,6 @@ static float compress_symbolic_block_for_partition_2planes(
 			workscb.plane2_component = static_cast<int8_t>(plane2_component);
 			workscb.block_type = SYM_BTYPE_NONCONST;
 
-			if (workscb.quant_mode < 4)
-			{
-				workscb.block_type = SYM_BTYPE_ERROR;
-			}
-
 			// Pre-realign test
 			if (l == 0)
 			{
@@ -1000,14 +985,12 @@ static float compress_symbolic_block_for_partition_2planes(
 			if (di.weight_count != bsd.texel_count)
 			{
 				adjustments = realign_weights_decimated(
-					config.profile, bsd, blk, workscb,
-					workscb.weights, workscb.weights + WEIGHTS_PLANE2_OFFSET);
+					config.profile, bsd, blk, workscb);
 			}
 			else
 			{
 				adjustments = realign_weights_undecimated(
-					config.profile, bsd, blk, workscb,
-					workscb.weights, workscb.weights + WEIGHTS_PLANE2_OFFSET);
+					config.profile, bsd, blk, workscb);
 			}
 
 			// Post-realign test
diff --git a/libkram/astc-encoder/astcenc_compute_variance.cpp b/libkram/astc-encoder/astcenc_compute_variance.cpp
index 41757fc5..02281a19 100644
--- a/libkram/astc-encoder/astcenc_compute_variance.cpp
+++ b/libkram/astc-encoder/astcenc_compute_variance.cpp
@@ -63,7 +63,7 @@ static void brent_kung_prefix_sum(
 		size_t iters = items >> log2_stride;
 
 		vfloat4 *da = d + (start * stride);
-		ptrdiff_t ofs = -(ptrdiff_t)(step * stride);
+		ptrdiff_t ofs = -static_cast<ptrdiff_t>(step * stride);
 		size_t ofs_stride = stride << log2_stride;
 
 		while (iters)
@@ -87,7 +87,7 @@ static void brent_kung_prefix_sum(
 		size_t iters = (items - step) >> log2_stride;
 
 		vfloat4 *da = d + (start * stride);
-		ptrdiff_t ofs = -(ptrdiff_t)(step * stride);
+		ptrdiff_t ofs = -static_cast<ptrdiff_t>(step * stride);
 		size_t ofs_stride = stride << log2_stride;
 
 		while (iters)
@@ -169,18 +169,18 @@ static void compute_pixel_region_variance(
 		for (int z = zd_start; z < padsize_z; z++)
 		{
 			int z_src = (z - zd_start) + offset_z - kernel_radius_z;
-			z_src = astc::clamp(z_src, 0, (int)(img->dim_z - 1));
+			z_src = astc::clamp(z_src, 0, static_cast<int>(img->dim_z - 1));
 			uint8_t* data8 = static_cast<uint8_t*>(img->data[z_src]);
 
 			for (int y = 1; y < padsize_y; y++)
 			{
 				int y_src = (y - 1) + offset_y - kernel_radius_xy;
-				y_src = astc::clamp(y_src, 0, (int)(img->dim_y - 1));
+				y_src = astc::clamp(y_src, 0, static_cast<int>(img->dim_y - 1));
 
 				for (int x = 1; x < padsize_x; x++)
 				{
 					int x_src = (x - 1) + offset_x - kernel_radius_xy;
-					x_src = astc::clamp(x_src, 0, (int)(img->dim_x - 1));
+					x_src = astc::clamp(x_src, 0, static_cast<int>(img->dim_x - 1));
 
 					data[0] = data8[(4 * img->dim_x * y_src) + (4 * x_src    )];
 					data[1] = data8[(4 * img->dim_x * y_src) + (4 * x_src + 1)];
@@ -213,18 +213,18 @@ static void compute_pixel_region_variance(
 		for (int z = zd_start; z < padsize_z; z++)
 		{
 			int z_src = (z - zd_start) + offset_z - kernel_radius_z;
-			z_src = astc::clamp(z_src, 0, (int)(img->dim_z - 1));
+			z_src = astc::clamp(z_src, 0, static_cast<int>(img->dim_z - 1));
 			uint16_t* data16 = static_cast<uint16_t*>(img->data[z_src]);
 
 			for (int y = 1; y < padsize_y; y++)
 			{
 				int y_src = (y - 1) + offset_y - kernel_radius_xy;
-				y_src = astc::clamp(y_src, 0, (int)(img->dim_y - 1));
+				y_src = astc::clamp(y_src, 0, static_cast<int>(img->dim_y - 1));
 
 				for (int x = 1; x < padsize_x; x++)
 				{
 					int x_src = (x - 1) + offset_x - kernel_radius_xy;
-					x_src = astc::clamp(x_src, 0, (int)(img->dim_x - 1));
+					x_src = astc::clamp(x_src, 0, static_cast<int>(img->dim_x - 1));
 
 					data[0] = data16[(4 * img->dim_x * y_src) + (4 * x_src    )];
 					data[1] = data16[(4 * img->dim_x * y_src) + (4 * x_src + 1)];
@@ -252,18 +252,18 @@ static void compute_pixel_region_variance(
 		for (int z = zd_start; z < padsize_z; z++)
 		{
 			int z_src = (z - zd_start) + offset_z - kernel_radius_z;
-			z_src = astc::clamp(z_src, 0, (int)(img->dim_z - 1));
+			z_src = astc::clamp(z_src, 0, static_cast<int>(img->dim_z - 1));
 			float* data32 = static_cast<float*>(img->data[z_src]);
 
 			for (int y = 1; y < padsize_y; y++)
 			{
 				int y_src = (y - 1) + offset_y - kernel_radius_xy;
-				y_src = astc::clamp(y_src, 0, (int)(img->dim_y - 1));
+				y_src = astc::clamp(y_src, 0, static_cast<int>(img->dim_y - 1));
 
 				for (int x = 1; x < padsize_x; x++)
 				{
 					int x_src = (x - 1) + offset_x - kernel_radius_xy;
-					x_src = astc::clamp(x_src, 0, (int)(img->dim_x - 1));
+					x_src = astc::clamp(x_src, 0, static_cast<int>(img->dim_x - 1));
 
 					data[0] = data32[(4 * img->dim_x * y_src) + (4 * x_src    )];
 					data[1] = data32[(4 * img->dim_x * y_src) + (4 * x_src + 1)];
@@ -345,18 +345,17 @@ static void compute_pixel_region_variance(
 		}
 	}
 
-	int alpha_kdim = 2 * alpha_kernel_radius + 1;
-
 	// Compute a few constants used in the variance-calculation.
+	float alpha_kdim = static_cast<float>(2 * alpha_kernel_radius + 1);
 	float alpha_rsamples;
 
 	if (have_z)
 	{
-		alpha_rsamples = 1.0f / (float)(alpha_kdim * alpha_kdim * alpha_kdim);
+		alpha_rsamples = 1.0f / (alpha_kdim * alpha_kdim * alpha_kdim);
 	}
 	else
 	{
-		alpha_rsamples = 1.0f / (float)(alpha_kdim * alpha_kdim);
+		alpha_rsamples = 1.0f / (alpha_kdim * alpha_kdim);
 	}
 
 	// Use the summed-area tables to compute variance for each neighborhood
diff --git a/libkram/astc-encoder/astcenc_decompress_symbolic.cpp b/libkram/astc-encoder/astcenc_decompress_symbolic.cpp
index 4fde3d28..6d50d512 100644
--- a/libkram/astc-encoder/astcenc_decompress_symbolic.cpp
+++ b/libkram/astc-encoder/astcenc_decompress_symbolic.cpp
@@ -351,7 +351,8 @@ float compute_symbolic_block_difference_2plane(
 
 	vmask4 plane2_mask = vint4::lane_id() == vint4(scb.plane2_component);
 
-	float summa = 0.0f;
+	vfloat4 summa = vfloat4::zero();
+
 	// Decode the color endpoints for this partition
 	vint4 ep0;
 	vint4 ep1;
@@ -409,11 +410,10 @@ float compute_symbolic_block_difference_2plane(
 		error = min(abs(error), 1e15f);
 		error = error * error;
 
-		float metric = dot_s(error, blk.channel_weight);
-		summa += astc::min(metric, ERROR_CALC_DEFAULT);
+		summa += min(dot(error, blk.channel_weight), ERROR_CALC_DEFAULT);
 	}
 
-	return summa;
+	return summa.lane<0>();
 }
 
 /* See header for documentation. */
@@ -445,7 +445,7 @@ float compute_symbolic_block_difference_1plane(
 	int plane1_weights[BLOCK_MAX_TEXELS];
 	unpack_weights(bsd, scb, di, false, bm.get_weight_quant_mode(), plane1_weights, nullptr);
 
-	float summa = 0.0f;
+	vfloat4 summa = vfloat4::zero();
 	for (unsigned int i = 0; i < partition_count; i++)
 	{
 		// Decode the color endpoints for this partition
@@ -506,12 +506,11 @@ float compute_symbolic_block_difference_1plane(
 			error = min(abs(error), 1e15f);
 			error = error * error;
 
-			float metric = dot_s(error, blk.channel_weight);
-			summa += astc::min(metric, ERROR_CALC_DEFAULT);
+			summa += min(dot(error, blk.channel_weight), ERROR_CALC_DEFAULT);
 		}
 	}
 
-	return summa;
+	return summa.lane<0>();
 }
 
 /* See header for documentation. */
@@ -560,7 +559,7 @@ float compute_symbolic_block_difference_1plane_1partition(
 	}
 
 	// Unpack and compute error for each texel in the partition
-	vfloat4 summav = vfloat4::zero();
+	vfloatacc summav = vfloatacc::zero();
 
 	vint lane_id = vint::lane_id();
 	vint srgb_scale(config.profile == ASTCENC_PRF_LDR_SRGB ? 257 : 1);
@@ -618,9 +617,7 @@ float compute_symbolic_block_difference_1plane_1partition(
 		// Mask off bad lanes
 		vmask mask = lane_id < vint(texel_count);
 		lane_id += vint(ASTCENC_SIMD_WIDTH);
-		metric = select(vfloat::zero(), metric, mask);
-
-		haccumulate(summav, metric);
+		haccumulate(summav, metric, mask);
 	}
 
 	return hadd_s(summav);
diff --git a/libkram/astc-encoder/astcenc_entry.cpp b/libkram/astc-encoder/astcenc_entry.cpp
index 02597cf0..3c21c55c 100644
--- a/libkram/astc-encoder/astcenc_entry.cpp
+++ b/libkram/astc-encoder/astcenc_entry.cpp
@@ -62,7 +62,7 @@ struct astcenc_preset_config
 static const std::array<astcenc_preset_config, 5> preset_configs_high {{
 	{
 		ASTCENC_PRE_FASTEST,
-		2, 8, 40, 2, 2, 85.2f, 63.2f, 3.5f, 3.5f, 1.0f, 1.0f, 0.5f, 25
+		2, 8, 42, 2, 2, 85.2f, 63.2f, 3.5f, 3.5f, 1.0f, 1.0f, 0.5f, 25
 	}, {
 		ASTCENC_PRE_FAST,
 		3, 12, 55, 3, 3, 85.2f, 63.2f, 3.5f, 3.5f, 1.0f, 1.1f, 0.65f, 20
@@ -261,7 +261,7 @@ static astcenc_error validate_flags(
 ) {
 	// Flags field must not contain any unknown flag bits
 	unsigned int exMask = ~ASTCENC_ALL_FLAGS;
-	if (astc::popcount(flags & exMask) != 0)
+	if (popcount(flags & exMask) != 0)
 	{
 		return ASTCENC_ERR_BAD_FLAGS;
 	}
@@ -270,7 +270,7 @@ static astcenc_error validate_flags(
 	exMask = ASTCENC_FLG_MAP_MASK
 	       | ASTCENC_FLG_MAP_NORMAL
 	       | ASTCENC_FLG_MAP_RGBM;
-	if (astc::popcount(flags & exMask) > 1)
+	if (popcount(flags & exMask) > 1)
 	{
 		return ASTCENC_ERR_BAD_FLAGS;
 	}
@@ -423,7 +423,7 @@ static astcenc_error validate_config(
 	config.rgbm_m_scale = astc::max(config.rgbm_m_scale, 1.0f);
 
 	config.tune_partition_count_limit = astc::clamp(config.tune_partition_count_limit, 1u, 4u);
-	config.tune_partition_index_limit = astc::clamp(config.tune_partition_index_limit, 1u, (unsigned int)BLOCK_MAX_PARTITIONINGS);
+	config.tune_partition_index_limit = astc::clamp(config.tune_partition_index_limit, 1u, BLOCK_MAX_PARTITIONINGS);
 	config.tune_block_mode_limit = astc::clamp(config.tune_block_mode_limit, 1u, 100u);
 	config.tune_refinement_limit = astc::max(config.tune_refinement_limit, 1u);
 	config.tune_candidate_limit = astc::clamp(config.tune_candidate_limit, 1u, TUNE_MAX_TRIAL_CANDIDATES);
@@ -557,9 +557,9 @@ astcenc_error astcenc_config_init(
 
 		#define LERP(param) ((node_a.param * wt_node_a) + (node_b.param * wt_node_b))
 		#define LERPI(param) astc::flt2int_rtn(\
-		                         (((float)node_a.param) * wt_node_a) + \
-		                         (((float)node_b.param) * wt_node_b))
-		#define LERPUI(param) (unsigned int)LERPI(param)
+		                         (static_cast<float>(node_a.param) * wt_node_a) + \
+		                         (static_cast<float>(node_b.param) * wt_node_b))
+		#define LERPUI(param) static_cast<unsigned int>(LERPI(param))
 
 		config.tune_partition_count_limit = LERPI(tune_partition_count_limit);
 		config.tune_partition_index_limit = LERPI(tune_partition_index_limit);
@@ -832,9 +832,9 @@ static void compress_image(
 
 	// Populate the block channel weights
 	blk.channel_weight = vfloat4(ctx.config.cw_r_weight,
-									ctx.config.cw_g_weight,
-									ctx.config.cw_b_weight,
-									ctx.config.cw_a_weight);
+	                             ctx.config.cw_g_weight,
+	                             ctx.config.cw_b_weight,
+	                             ctx.config.cw_a_weight);
 
 	// Use preallocated scratch buffer
 	auto& temp_buffers = ctx.working_buffers[thread_index];
@@ -842,6 +842,23 @@ static void compress_image(
 	// Only the first thread actually runs the initializer
 	ctx.manage_compress.init(block_count);
 
+
+	// Determine if we can use an optimized load function
+	bool needs_swz = (swizzle.r != ASTCENC_SWZ_R) || (swizzle.g != ASTCENC_SWZ_G) ||
+	                 (swizzle.b != ASTCENC_SWZ_B) || (swizzle.a != ASTCENC_SWZ_A);
+
+	bool needs_hdr = (decode_mode == ASTCENC_PRF_HDR) ||
+	                 (decode_mode == ASTCENC_PRF_HDR_RGB_LDR_A);
+
+	bool use_fast_load = !needs_swz && !needs_hdr &&
+	                     block_z == 1 && image.data_type == ASTCENC_TYPE_U8;
+
+	auto load_func = fetch_image_block;
+	if (use_fast_load)
+	{
+		load_func = fetch_image_block_fast_ldr;
+	}
+
 	// All threads run this processing loop until there is no work remaining
 	while (true)
 	{
@@ -877,7 +894,7 @@ static void compress_image(
 
 				int y_footprint = block_y + 2 * (ctx.config.a_scale_radius - 1);
 
-				float footprint = (float)(x_footprint * y_footprint);
+				float footprint = static_cast<float>(x_footprint * y_footprint);
 				float threshold = 0.9f / (255.0f * footprint);
 
 				// Do we have any alpha values?
@@ -900,7 +917,7 @@ static void compress_image(
 			// Fetch the full block for compression
 			if (use_full_block)
 			{
-				fetch_image_block(decode_mode, image, blk, bsd, x * block_x, y * block_y, z * block_z, swizzle);
+				load_func(decode_mode, image, blk, bsd, x * block_x, y * block_y, z * block_z, swizzle);
 			}
 			// Apply alpha scale RDO - substitute constant color block
 			else
@@ -1116,7 +1133,8 @@ astcenc_error astcenc_decompress_image(
 
 			unsigned int offset = (((z * yblocks + y) * xblocks) + x) * 16;
 			const uint8_t* bp = data + offset;
-			physical_compressed_block pcb = *(const physical_compressed_block*)bp;
+
+			const physical_compressed_block& pcb = *reinterpret_cast<const physical_compressed_block*>(bp);
 			symbolic_compressed_block scb;
 
 			physical_to_symbolic(*ctx->bsd, pcb, scb);
@@ -1156,7 +1174,7 @@ astcenc_error astcenc_get_block_info(
 	return ASTCENC_ERR_BAD_CONTEXT;
 #else
 	// Decode the compressed data into a symbolic form
-	physical_compressed_block pcb = *(const physical_compressed_block*)data;
+	const physical_compressed_block&pcb = *reinterpret_cast<const physical_compressed_block*>(data);
 	symbolic_compressed_block scb;
 	physical_to_symbolic(*ctx->bsd, pcb, scb);
 
@@ -1245,10 +1263,10 @@ astcenc_error astcenc_get_block_info(
 	unpack_weights(bsd, scb, di, bm.is_dual_plane, bm.get_weight_quant_mode(), weight_plane1, weight_plane2);
 	for (unsigned int i = 0; i < bsd.texel_count; i++)
 	{
-		info->weight_values_plane1[i] = (float)weight_plane1[i] * (1.0f / WEIGHTS_TEXEL_SUM);
+		info->weight_values_plane1[i] = static_cast<float>(weight_plane1[i]) * (1.0f / WEIGHTS_TEXEL_SUM);
 		if (info->is_dual_plane_block)
 		{
-			info->weight_values_plane2[i] = (float)weight_plane2[i] * (1.0f / WEIGHTS_TEXEL_SUM);
+			info->weight_values_plane2[i] = static_cast<float>(weight_plane2[i]) * (1.0f / WEIGHTS_TEXEL_SUM);
 		}
 	}
 
diff --git a/libkram/astc-encoder/astcenc_find_best_partitioning.cpp b/libkram/astc-encoder/astcenc_find_best_partitioning.cpp
index 6a4eff1b..aa9a8ab8 100644
--- a/libkram/astc-encoder/astcenc_find_best_partitioning.cpp
+++ b/libkram/astc-encoder/astcenc_find_best_partitioning.cpp
@@ -253,8 +253,8 @@ static inline unsigned int partition_mismatch2(
 	const uint64_t a[2],
 	const uint64_t b[2]
 ) {
-	int v1 = astc::popcount(a[0] ^ b[0]) + astc::popcount(a[1] ^ b[1]);
-	int v2 = astc::popcount(a[0] ^ b[1]) + astc::popcount(a[1] ^ b[0]);
+	int v1 = popcount(a[0] ^ b[0]) + popcount(a[1] ^ b[1]);
+	int v2 = popcount(a[0] ^ b[1]) + popcount(a[1] ^ b[0]);
 	return astc::min(v1, v2);
 }
 
@@ -270,17 +270,17 @@ static inline unsigned int partition_mismatch3(
 	const uint64_t a[3],
 	const uint64_t b[3]
 ) {
-	int p00 = astc::popcount(a[0] ^ b[0]);
-	int p01 = astc::popcount(a[0] ^ b[1]);
-	int p02 = astc::popcount(a[0] ^ b[2]);
+	int p00 = popcount(a[0] ^ b[0]);
+	int p01 = popcount(a[0] ^ b[1]);
+	int p02 = popcount(a[0] ^ b[2]);
 
-	int p10 = astc::popcount(a[1] ^ b[0]);
-	int p11 = astc::popcount(a[1] ^ b[1]);
-	int p12 = astc::popcount(a[1] ^ b[2]);
+	int p10 = popcount(a[1] ^ b[0]);
+	int p11 = popcount(a[1] ^ b[1]);
+	int p12 = popcount(a[1] ^ b[2]);
 
-	int p20 = astc::popcount(a[2] ^ b[0]);
-	int p21 = astc::popcount(a[2] ^ b[1]);
-	int p22 = astc::popcount(a[2] ^ b[2]);
+	int p20 = popcount(a[2] ^ b[0]);
+	int p21 = popcount(a[2] ^ b[1]);
+	int p22 = popcount(a[2] ^ b[2]);
 
 	int s0 = p11 + p22;
 	int s1 = p12 + p21;
@@ -309,25 +309,25 @@ static inline unsigned int partition_mismatch4(
 	const uint64_t a[4],
 	const uint64_t b[4]
 ) {
-	int p00 = astc::popcount(a[0] ^ b[0]);
-	int p01 = astc::popcount(a[0] ^ b[1]);
-	int p02 = astc::popcount(a[0] ^ b[2]);
-	int p03 = astc::popcount(a[0] ^ b[3]);
-
-	int p10 = astc::popcount(a[1] ^ b[0]);
-	int p11 = astc::popcount(a[1] ^ b[1]);
-	int p12 = astc::popcount(a[1] ^ b[2]);
-	int p13 = astc::popcount(a[1] ^ b[3]);
-
-	int p20 = astc::popcount(a[2] ^ b[0]);
-	int p21 = astc::popcount(a[2] ^ b[1]);
-	int p22 = astc::popcount(a[2] ^ b[2]);
-	int p23 = astc::popcount(a[2] ^ b[3]);
-
-	int p30 = astc::popcount(a[3] ^ b[0]);
-	int p31 = astc::popcount(a[3] ^ b[1]);
-	int p32 = astc::popcount(a[3] ^ b[2]);
-	int p33 = astc::popcount(a[3] ^ b[3]);
+	int p00 = popcount(a[0] ^ b[0]);
+	int p01 = popcount(a[0] ^ b[1]);
+	int p02 = popcount(a[0] ^ b[2]);
+	int p03 = popcount(a[0] ^ b[3]);
+
+	int p10 = popcount(a[1] ^ b[0]);
+	int p11 = popcount(a[1] ^ b[1]);
+	int p12 = popcount(a[1] ^ b[2]);
+	int p13 = popcount(a[1] ^ b[3]);
+
+	int p20 = popcount(a[2] ^ b[0]);
+	int p21 = popcount(a[2] ^ b[1]);
+	int p22 = popcount(a[2] ^ b[2]);
+	int p23 = popcount(a[2] ^ b[3]);
+
+	int p30 = popcount(a[3] ^ b[0]);
+	int p31 = popcount(a[3] ^ b[1]);
+	int p32 = popcount(a[3] ^ b[2]);
+	int p33 = popcount(a[3] ^ b[3]);
 
 	int mx23 = astc::min(p22 + p33, p23 + p32);
 	int mx13 = astc::min(p21 + p33, p23 + p31);
@@ -360,7 +360,7 @@ static void count_partition_mismatch_bits(
 	const uint64_t bitmaps[BLOCK_MAX_PARTITIONS],
 	unsigned int mismatch_counts[BLOCK_MAX_PARTITIONINGS]
 ) {
-	unsigned int active_count = bsd.partitioning_count[partition_count - 1];
+	unsigned int active_count = bsd.partitioning_count_selected[partition_count - 1];
 
 	if (partition_count == 2)
 	{
@@ -394,8 +394,10 @@ static void count_partition_mismatch_bits(
  * @param      partitioning_count   The number of packed partitionings.
  * @param      mismatch_count       Partitioning mismatch counts, in index order.
  * @param[out] partition_ordering   Partition index values, in mismatch order.
+ *
+ * @return The number of active partitions in this selection.
  */
-static void get_partition_ordering_by_mismatch_bits(
+static unsigned int get_partition_ordering_by_mismatch_bits(
 	unsigned int partitioning_count,
 	const unsigned int mismatch_count[BLOCK_MAX_PARTITIONINGS],
 	unsigned int partition_ordering[BLOCK_MAX_PARTITIONINGS]
@@ -408,6 +410,8 @@ static void get_partition_ordering_by_mismatch_bits(
 		mscount[mismatch_count[i]]++;
 	}
 
+	unsigned int active_count = partitioning_count - mscount[255];
+
 	// Create a running sum from the histogram array
 	// Cells store previous values only; i.e. exclude self after sum
 	unsigned int summa = 0;
@@ -425,6 +429,8 @@ static void get_partition_ordering_by_mismatch_bits(
 		unsigned int idx = mscount[mismatch_count[i]]++;
 		partition_ordering[idx] = i;
 	}
+
+	return active_count;
 }
 
 /**
@@ -434,8 +440,10 @@ static void get_partition_ordering_by_mismatch_bits(
  * @param      blk                  The image block color data to compress.
  * @param      partition_count      The desired number of partitions in the block.
  * @param[out] partition_ordering   The list of recommended partition indices, in priority order.
+ *
+ * @return The number of active partitionings in this selection.
  */
-static void compute_kmeans_partition_ordering(
+static unsigned int compute_kmeans_partition_ordering(
 	const block_size_descriptor& bsd,
 	const image_block& blk,
 	unsigned int partition_count,
@@ -474,8 +482,9 @@ static void compute_kmeans_partition_ordering(
 	count_partition_mismatch_bits(bsd, partition_count, bitmaps, mismatch_counts);
 
 	// Sort the partitions based on the number of mismatched bits
-	get_partition_ordering_by_mismatch_bits(bsd.partitioning_count[partition_count - 1],
-	                                        mismatch_counts, partition_ordering);
+	return get_partition_ordering_by_mismatch_bits(
+	    bsd.partitioning_count_selected[partition_count - 1],
+	    mismatch_counts, partition_ordering);
 }
 
 /* See header for documentation. */
@@ -509,9 +518,8 @@ void find_best_partition_candidates(
 	weight_imprecision_estim = weight_imprecision_estim * weight_imprecision_estim;
 
 	unsigned int partition_sequence[BLOCK_MAX_PARTITIONINGS];
-	compute_kmeans_partition_ordering(bsd, blk, partition_count, partition_sequence);
-	partition_search_limit = astc::min(partition_search_limit,
-	                                   bsd.partitioning_count[partition_count - 1]);
+	unsigned int sequence_len = compute_kmeans_partition_ordering(bsd, blk, partition_count, partition_sequence);
+	partition_search_limit = astc::min(partition_search_limit, sequence_len);
 
 	bool uses_alpha = !blk.is_constant_channel(3);
 
@@ -531,16 +539,6 @@ void find_best_partition_candidates(
 			unsigned int partition = partition_sequence[i];
 			const auto& pi = bsd.get_raw_partition_info(partition_count, partition);
 
-			// TODO: This escape shouldn't really be needed. We should return
-			// the number of blocks which have usable (!= 255) mismatch count
-			// from compute_kmeans_partition_ordering and use that as the upper
-			// loop limit.
-			unsigned int bk_partition_count = pi.partition_count;
-			if (bk_partition_count < partition_count)
-			{
-				break;
-			}
-
 			// Compute weighting to give to each component in each partition
 			partition_metrics pms[BLOCK_MAX_PARTITIONS];
 
@@ -634,16 +632,6 @@ void find_best_partition_candidates(
 			unsigned int partition = partition_sequence[i];
 			const auto& pi = bsd.get_raw_partition_info(partition_count, partition);
 
-			// TODO: This escape shouldn't really be needed. We should return
-			// the number of blocks which have usable (!= 255) mismatch count
-			// from compute_kmeans_partition_ordering and use that as the upper
-			// loop limit.
-			unsigned int bk_partition_count = pi.partition_count;
-			if (bk_partition_count < partition_count)
-			{
-				break;
-			}
-
 			// Compute weighting to give to each component in each partition
 			partition_metrics pms[BLOCK_MAX_PARTITIONS];
 			compute_avgs_and_dirs_3_comp_rgb(pi, blk, pms);
@@ -723,7 +711,7 @@ void find_best_partition_candidates(
 		}
 	}
 
-	// Same parition is best for both, so use this first unconditionally
+	// Same partition is best for both, so use this first unconditionally
 	if (uncor_best_partition == samec_best_partitions[0])
 	{
 		best_partitions[0] = samec_best_partitions[0];
diff --git a/libkram/astc-encoder/astcenc_ideal_endpoints_and_weights.cpp b/libkram/astc-encoder/astcenc_ideal_endpoints_and_weights.cpp
index 2b5faa1c..9df44176 100644
--- a/libkram/astc-encoder/astcenc_ideal_endpoints_and_weights.cpp
+++ b/libkram/astc-encoder/astcenc_ideal_endpoints_and_weights.cpp
@@ -302,21 +302,21 @@ static void compute_ideal_colors_and_weights_3_comp(
 	const float* data_vb = nullptr;
 	if (omitted_component == 0)
 	{
-		error_weight = hadd_s(blk.channel_weight.swz<0, 1, 2>()) / 3.0f;
+		error_weight = hadd_s(blk.channel_weight.swz<0, 1, 2>());
 		data_vr = blk.data_g;
 		data_vg = blk.data_b;
 		data_vb = blk.data_a;
 	}
 	else if (omitted_component == 1)
 	{
-		error_weight = hadd_s(blk.channel_weight.swz<0, 2, 3>()) / 3.0f;
+		error_weight = hadd_s(blk.channel_weight.swz<0, 2, 3>());
 		data_vr = blk.data_r;
 		data_vg = blk.data_b;
 		data_vb = blk.data_a;
 	}
 	else if (omitted_component == 2)
 	{
-		error_weight = hadd_s(blk.channel_weight.swz<0, 1, 3>()) / 3.0f;
+		error_weight = hadd_s(blk.channel_weight.swz<0, 1, 3>());
 		data_vr = blk.data_r;
 		data_vg = blk.data_g;
 		data_vb = blk.data_a;
@@ -325,13 +325,22 @@ static void compute_ideal_colors_and_weights_3_comp(
 	{
 		assert(omitted_component == 3);
 
-		error_weight = hadd_s(blk.channel_weight.swz<0, 1, 2>()) / 3.0f;
+		error_weight = hadd_s(blk.channel_weight.swz<0, 1, 2>());
 		data_vr = blk.data_r;
 		data_vg = blk.data_g;
 		data_vb = blk.data_b;
 	}
 
-	compute_avgs_and_dirs_3_comp(pi, blk, omitted_component, pms);
+	error_weight = error_weight * (1.0f / 3.0f);
+
+	if (omitted_component == 3)
+	{
+		compute_avgs_and_dirs_3_comp_rgb(pi, blk, pms);
+	}
+	else
+	{
+		compute_avgs_and_dirs_3_comp(pi, blk, omitted_component, pms);
+	}
 
 	bool is_constant_wes { true };
 	float partition0_len_sq { 0.0f };
@@ -611,7 +620,7 @@ float compute_error_of_weight_set_1plane(
 	const decimation_info& di,
 	const float* dec_weight_quant_uvalue
 ) {
-	vfloat4 error_summav = vfloat4::zero();
+	vfloatacc error_summav = vfloatacc::zero();
 	float error_summa = 0.0f;
 	unsigned int texel_count = di.texel_count;
 
@@ -666,9 +675,7 @@ float compute_error_of_weight_set_1plane(
 	}
 
 	// Resolve the final scalar accumulator sum
-	haccumulate(error_summa, error_summav);
-
-	return error_summa;
+	return error_summa = hadd_s(error_summav);
 }
 
 /* See header for documentation. */
@@ -679,8 +686,7 @@ float compute_error_of_weight_set_2planes(
 	const float* dec_weight_quant_uvalue_plane1,
 	const float* dec_weight_quant_uvalue_plane2
 ) {
-	vfloat4 error_summav = vfloat4::zero();
-	float error_summa = 0.0f;
+	vfloatacc error_summav = vfloatacc::zero();
 	unsigned int texel_count = di.texel_count;
 
 	// Process SIMD-width chunks, safe to over-fetch - the extra space is zero initialized
@@ -761,9 +767,7 @@ float compute_error_of_weight_set_2planes(
 	}
 
 	// Resolve the final scalar accumulator sum
-	haccumulate(error_summa, error_summav);
-
-	return error_summa;
+	return hadd_s(error_summav);
 }
 
 /* See header for documentation. */
@@ -795,7 +799,7 @@ void compute_ideal_weights_for_decimation(
 
 	// Transfer enough to also copy zero initialized SIMD over-fetch region
 	unsigned int texel_count_simd = round_up_to_simd_multiple_vla(texel_count);
-	for (unsigned int i = 0; i < texel_count_simd; i +=  ASTCENC_SIMD_WIDTH)
+	for (unsigned int i = 0; i < texel_count_simd; i += ASTCENC_SIMD_WIDTH)
 	{
 		vfloat weight(eai_in.weights + i);
 		vfloat weight_error_scale(eai_in.weight_error_scale + i);
@@ -1054,7 +1058,6 @@ static inline vfloat4 compute_rgbo_vector(
 }
 
 /* See header for documentation. */
-// TODO: Specialize for 1 partition?
 void recompute_ideal_colors_1plane(
 	const image_block& blk,
 	const partition_info& pi,
@@ -1065,33 +1068,66 @@ void recompute_ideal_colors_1plane(
 	vfloat4 rgbs_vectors[BLOCK_MAX_PARTITIONS],
 	vfloat4 rgbo_vectors[BLOCK_MAX_PARTITIONS]
 ) {
-	int weight_count = di.weight_count;
-	int partition_count = pi.partition_count;
+	unsigned int weight_count = di.weight_count;
+	unsigned int total_texel_count = blk.texel_count;
+	unsigned int partition_count = pi.partition_count;
 
 	promise(weight_count > 0);
+	promise(total_texel_count > 0);
 	promise(partition_count > 0);
 
 	const quantization_and_transfer_table& qat = quant_and_xfer_tables[weight_quant_mode];
 
-	float dec_weight_quant_uvalue[BLOCK_MAX_WEIGHTS];
-	for (int i = 0; i < weight_count; i++)
+	float dec_weight[BLOCK_MAX_WEIGHTS];
+	for (unsigned int i = 0; i < weight_count; i++)
 	{
-		dec_weight_quant_uvalue[i] = qat.unquantized_value[dec_weights_quant_pvalue[i]] * (1.0f / 64.0f);
+		dec_weight[i] = qat.unquantized_value[dec_weights_quant_pvalue[i]] * (1.0f / 64.0f);
 	}
 
-	for (int i = 0; i < partition_count; i++)
+	alignas(ASTCENC_VECALIGN) float undec_weight[BLOCK_MAX_TEXELS];
+	float* undec_weight_ref;
+	if (di.max_texel_weight_count == 1)
+	{
+		undec_weight_ref = dec_weight;
+	}
+	else if (di.max_texel_weight_count <= 2)
+	{
+		for (unsigned int i = 0; i < total_texel_count; i += ASTCENC_SIMD_WIDTH)
+		{
+			vfloat weight = bilinear_infill_vla_2(di, dec_weight, i);
+			storea(weight, undec_weight + i);
+		}
+
+		undec_weight_ref = undec_weight;
+	}
+	else
 	{
-		vfloat4 rgba_sum(1e-17f);
+		for (unsigned int i = 0; i < total_texel_count; i += ASTCENC_SIMD_WIDTH)
+		{
+			vfloat weight = bilinear_infill_vla(di, dec_weight, i);
+			storea(weight, undec_weight + i);
+		}
 
+		undec_weight_ref = undec_weight;
+	}
+
+	vfloat4 rgba_sum(blk.data_mean * static_cast<float>(blk.texel_count));
+
+	for (unsigned int i = 0; i < partition_count; i++)
+	{
 		unsigned int texel_count = pi.partition_texel_count[i];
 		const uint8_t *texel_indexes = pi.texels_of_partition[i];
 
-		// TODO: Use gathers?
-		promise(texel_count > 0);
-		for (unsigned int j = 0; j < texel_count; j++)
+		// Only compute a partition mean if more than one partition
+		if (partition_count > 1)
 		{
-			unsigned int tix = texel_indexes[j];
-			rgba_sum += blk.texel(tix);
+			rgba_sum = vfloat4(1e-17f);
+			promise(texel_count > 0);
+			for (unsigned int j = 0; j < texel_count; j++)
+			{
+				unsigned int tix = texel_indexes[j];
+				rgba_sum += blk.texel(tix);
+			}
 		}
 
 		rgba_sum = rgba_sum * blk.channel_weight;
@@ -1124,20 +1160,7 @@ void recompute_ideal_colors_1plane(
 
 			vfloat4 rgba = blk.texel(tix);
 
-			float idx0;
-			if (di.max_texel_weight_count == 1)
-			{
-				assert(tix < BLOCK_MAX_WEIGHTS);
- 				idx0 = dec_weight_quant_uvalue[tix];
-			}
-			else if (di.max_texel_weight_count == 2)
-			{
-				idx0 = bilinear_infill_2(di, dec_weight_quant_uvalue, tix);
-			}
-			else
-			{
-				idx0 = bilinear_infill(di, dec_weight_quant_uvalue, tix);
-			}
+			float idx0 = undec_weight_ref[tix];
 
 			float om_idx0 = 1.0f - idx0;
 			wmin1 = astc::min(idx0, wmin1);
@@ -1270,18 +1293,61 @@ void recompute_ideal_colors_2planes(
 	int plane2_component
 ) {
 	unsigned int weight_count = di.weight_count;
+	unsigned int total_texel_count = blk.texel_count;
+
+	promise(total_texel_count > 0);
 	promise(weight_count > 0);
 
 	const quantization_and_transfer_table *qat = &(quant_and_xfer_tables[weight_quant_mode]);
 
-	float dec_weights_quant_uvalue_plane1[BLOCK_MAX_WEIGHTS_2PLANE];
-	float dec_weights_quant_uvalue_plane2[BLOCK_MAX_WEIGHTS_2PLANE];
+	float dec_weight_plane1[BLOCK_MAX_WEIGHTS_2PLANE];
+	float dec_weight_plane2[BLOCK_MAX_WEIGHTS_2PLANE];
 
 	assert(weight_count <= BLOCK_MAX_WEIGHTS_2PLANE);
 	for (unsigned int i = 0; i < weight_count; i++)
 	{
-		dec_weights_quant_uvalue_plane1[i] = qat->unquantized_value[dec_weights_quant_pvalue_plane1[i]] * (1.0f / 64.0f);
-		dec_weights_quant_uvalue_plane2[i] = qat->unquantized_value[dec_weights_quant_pvalue_plane2[i]] * (1.0f / 64.0f);
+		dec_weight_plane1[i] = qat->unquantized_value[dec_weights_quant_pvalue_plane1[i]] * (1.0f / 64.0f);
+		dec_weight_plane2[i] = qat->unquantized_value[dec_weights_quant_pvalue_plane2[i]] * (1.0f / 64.0f);
+	}
+
+	alignas(ASTCENC_VECALIGN) float undec_weight_plane1[BLOCK_MAX_TEXELS];
+	alignas(ASTCENC_VECALIGN) float undec_weight_plane2[BLOCK_MAX_TEXELS];
+
+	float* undec_weight_plane1_ref;
+	float* undec_weight_plane2_ref;
+
+	if (di.max_texel_weight_count == 1)
+	{
+		undec_weight_plane1_ref = dec_weight_plane1;
+		undec_weight_plane2_ref = dec_weight_plane2;
+	}
+	else if (di.max_texel_weight_count <= 2)
+	{
+		for (unsigned int i = 0; i < total_texel_count; i += ASTCENC_SIMD_WIDTH)
+		{
+			vfloat weight = bilinear_infill_vla_2(di, dec_weight_plane1, i);
+			storea(weight, undec_weight_plane1 + i);
+
+			weight = bilinear_infill_vla_2(di, dec_weight_plane2, i);
+			storea(weight, undec_weight_plane2 + i);
+		}
+
+		undec_weight_plane1_ref = undec_weight_plane1;
+		undec_weight_plane2_ref = undec_weight_plane2;
+	}
+	else
+	{
+		for (unsigned int i = 0; i < total_texel_count; i += ASTCENC_SIMD_WIDTH)
+		{
+			vfloat weight = bilinear_infill_vla(di, dec_weight_plane1, i);
+			storea(weight, undec_weight_plane1 + i);
+
+			weight = bilinear_infill_vla(di, dec_weight_plane2, i);
+			storea(weight, undec_weight_plane2 + i);
+		}
+
+		undec_weight_plane1_ref = undec_weight_plane1;
+		undec_weight_plane2_ref = undec_weight_plane2;
 	}
 
 	unsigned int texel_count = bsd.texel_count;
@@ -1320,20 +1386,7 @@ void recompute_ideal_colors_2planes(
 	{
 		vfloat4 rgba = blk.texel(j);
 
-		float idx0;
-		if (di.max_texel_weight_count == 1)
-		{
-			assert(j < BLOCK_MAX_WEIGHTS_2PLANE);
-		 	idx0 = dec_weights_quant_uvalue_plane1[j];
-		}
-		else if (di.max_texel_weight_count == 2)
-		{
-			idx0 = bilinear_infill_2(di, dec_weights_quant_uvalue_plane1, j);
-		}
-		else
-		{
-			idx0 = bilinear_infill(di, dec_weights_quant_uvalue_plane1, j);
-		}
+		float idx0 = undec_weight_plane1_ref[j];
 
 		float om_idx0 = 1.0f - idx0;
 		wmin1 = astc::min(idx0, wmin1);
@@ -1347,20 +1400,7 @@ void recompute_ideal_colors_2planes(
 		middle1_sum_s += om_idx0 * idx0;
 		right1_sum_s  += idx0 * idx0;
 
-		float idx1;
-		if (di.max_texel_weight_count == 1)
-		{
-			assert(j < BLOCK_MAX_WEIGHTS_2PLANE);
-			idx1 = dec_weights_quant_uvalue_plane2[j];
-		}
-		else if (di.max_texel_weight_count == 2)
-		{
-			idx1 = bilinear_infill_2(di, dec_weights_quant_uvalue_plane2, j);
-		}
-		else
-		{
-			idx1 = bilinear_infill(di, dec_weights_quant_uvalue_plane2, j);
-		}
+		float idx1 = undec_weight_plane2_ref[j];
 
 		float om_idx1 = 1.0f - idx1;
 		wmin2 = astc::min(idx1, wmin2);
diff --git a/libkram/astc-encoder/astcenc_image.cpp b/libkram/astc-encoder/astcenc_image.cpp
index cda80722..ff8c6755 100644
--- a/libkram/astc-encoder/astcenc_image.cpp
+++ b/libkram/astc-encoder/astcenc_image.cpp
@@ -176,7 +176,7 @@ void fetch_image_block(
 	vfloat4 data_mean(0.0f);
 	vfloat4 data_mean_scale(1.0f / static_cast<float>(bsd.texel_count));
 	vfloat4 data_max(-1e38f);
-	bool grayscale = true;
+	vmask4 grayscalev(true);
 
 	// This works because we impose the same choice everywhere during encode
 	uint8_t rgb_lns = (decode_mode == ASTCENC_PRF_HDR) ||
@@ -230,10 +230,7 @@ void fetch_image_block(
 				data_mean += datav * data_mean_scale;
 				data_max = max(data_max, datav);
 
-				if (grayscale && (datav.lane<0>() != datav.lane<1>() || datav.lane<0>() != datav.lane<2>()))
-				{
-					grayscale = false;
-				}
+				grayscalev = grayscalev & (datav.swz<0,0,0,0>() == datav.swz<1,1,2,2>());
 
 				blk.data_r[idx] = datav.lane<0>();
 				blk.data_g[idx] = datav.lane<1>();
@@ -264,7 +261,74 @@ void fetch_image_block(
 	blk.data_min = data_min;
 	blk.data_mean = data_mean;
 	blk.data_max = data_max;
-	blk.grayscale = grayscale;
+	blk.grayscale = all(grayscalev);
+}
+
+/* See header for documentation. */
+void fetch_image_block_fast_ldr(
+	astcenc_profile decode_mode,
+	const astcenc_image& img,
+	image_block& blk,
+	const block_size_descriptor& bsd,
+	unsigned int xpos,
+	unsigned int ypos,
+	unsigned int zpos,
+	const astcenc_swizzle& swz
+) {
+	(void)swz;
+	(void)decode_mode;
+
+	unsigned int xsize = img.dim_x;
+	unsigned int ysize = img.dim_y;
+
+	blk.xpos = xpos;
+	blk.ypos = ypos;
+	blk.zpos = zpos;
+
+	vfloat4 data_min(1e38f);
+	vfloat4 data_mean = vfloat4::zero();
+	vfloat4 data_max(-1e38f);
+	vmask4 grayscalev(true);
+	int idx = 0;
+
+	const uint8_t* plane = static_cast<const uint8_t*>(img.data[0]);
+	for (unsigned int y = ypos; y < ypos + bsd.ydim; y++)
+	{
+		unsigned int yi = astc::min(y, ysize - 1);
+
+		for (unsigned int x = xpos; x < xpos + bsd.xdim; x++)
+		{
+			unsigned int xi = astc::min(x, xsize - 1);
+
+			vint4 datavi = vint4(plane + (4 * xsize * yi) + (4 * xi));
+			vfloat4 datav = int_to_float(datavi) * (65535.0f / 255.0f);
+
+			// Compute block metadata
+			data_min = min(data_min, datav);
+			data_mean += datav;
+			data_max = max(data_max, datav);
+
+			grayscalev = grayscalev & (datav.swz<0,0,0,0>() == datav.swz<1,1,2,2>());
+
+			blk.data_r[idx] = datav.lane<0>();
+			blk.data_g[idx] = datav.lane<1>();
+			blk.data_b[idx] = datav.lane<2>();
+			blk.data_a[idx] = datav.lane<3>();
+
+			idx++;
+		}
+	}
+
+	// Reverse the encoding so we store origin block in the original format
+	blk.origin_texel = blk.texel(0) / 65535.0f;
+
+	// Store block metadata
+	blk.rgb_lns[0] = 0;
+	blk.alpha_lns[0] = 0;
+	blk.data_min = data_min;
+	blk.data_mean = data_mean / static_cast<float>(bsd.texel_count);
+	blk.data_max = data_max;
+	blk.grayscale = all(grayscalev);
 }
 
 /* See header for documentation. */
@@ -403,10 +467,10 @@ void write_image_block(
 						color = float_to_float16(colorf);
 					}
 
-					data16[(4 * xsize * y) + (4 * x    )] = (uint16_t)color.lane<0>();
-					data16[(4 * xsize * y) + (4 * x + 1)] = (uint16_t)color.lane<1>();
-					data16[(4 * xsize * y) + (4 * x + 2)] = (uint16_t)color.lane<2>();
-					data16[(4 * xsize * y) + (4 * x + 3)] = (uint16_t)color.lane<3>();
+					data16[(4 * xsize * y) + (4 * x    )] = static_cast<uint16_t>(color.lane<0>());
+					data16[(4 * xsize * y) + (4 * x + 1)] = static_cast<uint16_t>(color.lane<1>());
+					data16[(4 * xsize * y) + (4 * x + 2)] = static_cast<uint16_t>(color.lane<2>());
+					data16[(4 * xsize * y) + (4 * x + 3)] = static_cast<uint16_t>(color.lane<3>());
 
 					idx++;
 				}
diff --git a/libkram/astc-encoder/astcenc_internal.h b/libkram/astc-encoder/astcenc_internal.h
index 6b711deb..aa7f6001 100644
--- a/libkram/astc-encoder/astcenc_internal.h
+++ b/libkram/astc-encoder/astcenc_internal.h
@@ -112,10 +112,10 @@ static constexpr unsigned int WEIGHTS_PLANE2_OFFSET { BLOCK_MAX_WEIGHTS_2PLANE }
 /** @brief The sum of quantized weights for one texel. */
 static constexpr float WEIGHTS_TEXEL_SUM { 16.0f };
 
-/** @brief The number of block modes suported by the ASTC format. */
+/** @brief The number of block modes supported by the ASTC format. */
 static constexpr unsigned int WEIGHTS_MAX_BLOCK_MODES { 2048 };
 
-/** @brief The number of weight grid decimation modes suported by the ASTC format. */
+/** @brief The number of weight grid decimation modes supported by the ASTC format. */
 static constexpr unsigned int WEIGHTS_MAX_DECIMATION_MODES { 87 };
 
 /** @brief The high default error used to initialize error trackers. */
@@ -540,9 +540,6 @@ struct partition_info
  */
 struct decimation_info
 {
-	// TODO: These structures are large. Any partitioning opportunities to
-	// improve caching and reduce miss rates?
-
 	/** @brief The total number of texels in the block. */
 	uint8_t texel_count;
 
@@ -615,9 +612,6 @@ struct block_mode
 	/** @brief Is a dual weight plane used by this block mode? */
 	uint8_t is_dual_plane : 1;
 
-	/** @brief Is this mode enabled in the current search preset? */
-	uint8_t percentile_hit : 1;
-
 	/**
 	 * @brief Get the weight quantization used by this block mode.
 	 *
@@ -625,7 +619,7 @@ struct block_mode
 	 */
 	inline quant_method get_weight_quant_mode() const
 	{
-		return (quant_method)this->quant_mode;
+		return static_cast<quant_method>(this->quant_mode);
 	}
 };
 
@@ -640,8 +634,11 @@ struct decimation_mode
 	/** @brief The max weight precision for 2 planes, or -1 if not supported. */
 	int8_t maxprec_2planes;
 
-	/** @brief Is this mode enabled in the current search preset? */
-	uint8_t percentile_hit;
+	/** @brief Was this actually referenced by an active 1 plane mode? */
+	uint8_t ref_1_plane;
+
+	/** @brief Was this actually referenced by an active 2 plane mode? */
+	uint8_t ref_2_planes;
 };
 
 /**
@@ -677,28 +674,40 @@ struct block_size_descriptor
 	/** @brief The block total texel count. */
 	uint8_t texel_count;
 
-	/** @brief The number of stored decimation modes. */
-	unsigned int decimation_mode_count;
-
 	/**
 	 * @brief The number of stored decimation modes which are "always" modes.
 	 *
 	 * Always modes are stored at the start of the decimation_modes list.
 	 */
-	unsigned int always_decimation_mode_count;
+	unsigned int decimation_mode_count_always;
 
-	/** @brief The number of stored block modes. */
-	unsigned int block_mode_count;
+	/** @brief The number of stored decimation modes for selected encodings. */
+	unsigned int decimation_mode_count_selected;
 
-	/** @brief The number of active partitionings for 1/2/3/4 partitionings. */
-	unsigned int partitioning_count[BLOCK_MAX_PARTITIONS];
+	/** @brief The number of stored decimation modes for any encoding. */
+	unsigned int decimation_mode_count_all;
 
 	/**
 	 * @brief The number of stored block modes which are "always" modes.
 	 *
 	 * Always modes are stored at the start of the block_modes list.
 	 */
-	unsigned int always_block_mode_count;
+	unsigned int block_mode_count_1plane_always;
+
+	/** @brief The number of stored block modes for active 1 plane encodings. */
+	unsigned int block_mode_count_1plane_selected;
+
+	/** @brief The number of stored block modes for active 1 and 2 plane encodings. */
+	unsigned int block_mode_count_1plane_2plane_selected;
+
+	/** @brief The number of stored block modes for any encoding. */
+	unsigned int block_mode_count_all;
+
+	/** @brief The number of selected partitionings for 1/2/3/4 partitionings. */
+	unsigned int partitioning_count_selected[BLOCK_MAX_PARTITIONS];
+
+	/** @brief The number of partitionings for 1/2/3/4 partitionings. */
+	unsigned int partitioning_count_all[BLOCK_MAX_PARTITIONS];
 
 	/** @brief The active decimation modes, stored in low indices. */
 	decimation_mode decimation_modes[WEIGHTS_MAX_DECIMATION_MODES];
@@ -781,7 +790,7 @@ struct block_size_descriptor
 	const block_mode& get_block_mode(unsigned int block_mode) const
 	{
 		unsigned int packed_index = this->block_mode_packed_index[block_mode];
-		assert(packed_index != BLOCK_BAD_BLOCK_MODE && packed_index < this->block_mode_count);
+		assert(packed_index != BLOCK_BAD_BLOCK_MODE && packed_index < this->block_mode_count_all);
 		return this->block_modes[packed_index];
 	}
 
@@ -850,7 +859,7 @@ struct block_size_descriptor
 			packed_index = this->partitioning_packed_index[partition_count - 2][index];
 		}
 
-		assert(packed_index != BLOCK_BAD_PARTITIONING && packed_index < this->partitioning_count[partition_count - 1]);
+		assert(packed_index != BLOCK_BAD_PARTITIONING && packed_index < this->partitioning_count_all[partition_count - 1]);
 		auto& result = get_partition_table(partition_count)[packed_index];
 		assert(index == result.partition_index);
 		return result;
@@ -866,7 +875,7 @@ struct block_size_descriptor
 	 */
 	const partition_info& get_raw_partition_info(unsigned int partition_count, unsigned int packed_index) const
 	{
-		assert(packed_index != BLOCK_BAD_PARTITIONING && packed_index < this->partitioning_count[partition_count - 1]);
+		assert(packed_index != BLOCK_BAD_PARTITIONING && packed_index < this->partitioning_count_all[partition_count - 1]);
 		auto& result = get_partition_table(partition_count)[packed_index];
 		return result;
 	}
@@ -970,7 +979,7 @@ struct image_block
 	 */
 	inline float get_default_alpha() const
 	{
-		return this->alpha_lns[0] ? (float)0x7800 : (float)0xFFFF;
+		return this->alpha_lns[0] ? static_cast<float>(0x7800) : static_cast<float>(0xFFFF);
 	}
 
 	/**
@@ -1396,9 +1405,6 @@ struct astcenc_context
 	/** @brief The pixel region and variance worker arguments. */
 	avg_args avg_preprocess_args;
 
-	/** @brief The per-texel deblocking weights for the current block size. */
-	float deblock_weights[BLOCK_MAX_TEXELS];
-
 	/** @brief The parallel manager for averages computation. */
 	ParallelManager manage_avg;
 
@@ -1505,16 +1511,20 @@ bool is_legal_3d_block_size(
 /**
  * @brief The precomputed table for quantizing color values.
  *
- * Indexed by [quant_mode][data_value].
+ * Returned value is in the ASTC BISE scrambled order.
+ *
+ * Indexed by [quant_mode - 4][data_value].
  */
-extern const uint8_t color_quant_tables[21][256];
+extern const uint8_t color_quant_tables[17][256];
 
 /**
  * @brief The precomputed table for unquantizing color values.
  *
- * Indexed by [quant_mode][data_value].
+ * Returned value is in the ASTC BISE scrambled order.
+ *
+ * Indexed by [quant_mode - 4][data_value].
  */
-extern const uint8_t color_unquant_tables[21][256];
+extern const uint8_t color_unquant_tables[17][256];
 
 /**
  * @brief The precomputed quant mode storage table.
@@ -1523,7 +1533,7 @@ extern const uint8_t color_unquant_tables[21][256];
  * number of compressed storage bits. Returns -1 for cases where the requested integer count cannot
  * ever fit in the supplied storage size.
  */
-extern const int8_t quant_mode_table[17][128];
+extern const int8_t quant_mode_table[10][128];
 
 /**
  * @brief Encode a packed string using BISE.
@@ -1760,7 +1770,7 @@ void compute_averages(
 	const avg_args& ag);
 
 /**
- * @brief Fetch a single image block from the input image
+ * @brief Fetch a single image block from the input image.
  *
  * @param      decode_mode   The compression color profile.
  * @param      img           The input image data.
@@ -1782,7 +1792,32 @@ void fetch_image_block(
 	const astcenc_swizzle& swz);
 
 /**
- * @brief Write a single image block from the output image
+ * @brief Fetch a single image block from the input image.
+ *
+ * This specialized variant can be used only if the block is 2D LDR U8 data,
+ * with no swizzle.
+ *
+ * @param      decode_mode   The compression color profile.
+ * @param      img           The input image data.
+ * @param[out] blk           The image block to populate.
+ * @param      bsd           The block size information.
+ * @param      xpos          The block X coordinate in the input image.
+ * @param      ypos          The block Y coordinate in the input image.
+ * @param      zpos          The block Z coordinate in the input image.
+ * @param      swz           The swizzle to apply on load.
+ */
+void fetch_image_block_fast_ldr(
+	astcenc_profile decode_mode,
+	const astcenc_image& img,
+	image_block& blk,
+	const block_size_descriptor& bsd,
+	unsigned int xpos,
+	unsigned int ypos,
+	unsigned int zpos,
+	const astcenc_swizzle& swz);
+
+/**
+ * @brief Write a single image block from the output image.
  *
  * @param[out] img           The input image data.
  * @param      blk           The image block to populate.
@@ -2113,14 +2148,14 @@ void unpack_weights(
  * combination for each. The modified quantization level can be used when all formats are the same,
  * as this frees up two additional bits of storage.
  *
- * @param      bsd                           The block size information.
  * @param      pi                            The partition info for the current trial.
  * @param      blk                           The image block color data to compress.
  * @param      ep                            The ideal endpoints.
  * @param      qwt_bitcounts                 Bit counts for different quantization methods.
  * @param      qwt_errors                    Errors for different quantization methods.
  * @param      tune_candidate_limit          The max number of candidates to return, may be less.
- * @param      block_mode_count              The number of blocks mofdes candidates to inspect.
+ * @param      start_block_mode              The first block mode to inspect.
+ * @param      end_block_mode                The last block mode to inspect.
  * @param[out] partition_format_specifiers   The best formats per partition.
  * @param[out] block_mode                    The best packed block mode indexes.
  * @param[out] quant_level                   The best color quant level.
@@ -2130,14 +2165,14 @@ void unpack_weights(
  * @return The actual number of candidate matches returned.
  */
 unsigned int compute_ideal_endpoint_formats(
-	const block_size_descriptor& bsd,
 	const partition_info& pi,
 	const image_block& blk,
 	const endpoints& ep,
 	const int* qwt_bitcounts,
 	const float* qwt_errors,
 	unsigned int tune_candidate_limit,
-	unsigned int block_mode_count,
+	unsigned int start_block_mode,
+	unsigned int end_block_mode,
 	int partition_format_specifiers[TUNE_MAX_TRIAL_CANDIDATES][BLOCK_MAX_PARTITIONS],
 	int block_mode[TUNE_MAX_TRIAL_CANDIDATES],
 	quant_method quant_level[TUNE_MAX_TRIAL_CANDIDATES],
@@ -2209,14 +2244,14 @@ void prepare_angular_tables();
  * @param      tune_low_weight_limit     Weight count cutoff below which we use simpler searches.
  * @param      only_always               Only consider block modes that are always enabled.
  * @param      bsd                       The block size descriptor for the current trial.
- * @param      dec_weight_quant_uvalue   The decimated and quantized weight values.
+ * @param      dec_weight_ideal_value    The ideal decimated unquantized weight values.
  * @param[out] tmpbuf                    Preallocated scratch buffers for the compressor.
  */
 void compute_angular_endpoints_1plane(
 	unsigned int tune_low_weight_limit,
 	bool only_always,
 	const block_size_descriptor& bsd,
-	const float* dec_weight_quant_uvalue,
+	const float* dec_weight_ideal_value,
 	compression_working_buffers& tmpbuf);
 
 /**
@@ -2224,13 +2259,13 @@ void compute_angular_endpoints_1plane(
  *
  * @param      tune_low_weight_limit     Weight count cutoff below which we use simpler searches.
  * @param      bsd                       The block size descriptor for the current trial.
- * @param      dec_weight_quant_uvalue   The decimated and quantized weight values.
+ * @param      dec_weight_ideal_value    The ideal decimated unquantized weight values.
  * @param[out] tmpbuf                    Preallocated scratch buffers for the compressor.
  */
 void compute_angular_endpoints_2planes(
 	unsigned int tune_low_weight_limit,
 	const block_size_descriptor& bsd,
-	const float* dec_weight_quant_uvalue,
+	const float* dec_weight_ideal_value,
 	compression_working_buffers& tmpbuf);
 
 /* ============================================================================
@@ -2433,9 +2468,9 @@ template<typename T>
 void aligned_free(T* ptr)
 {
 #if defined(_WIN32)
-	_aligned_free((void*)ptr);
+	_aligned_free(reinterpret_cast<void*>(ptr));
 #else
-	free((void*)ptr);
+	free(reinterpret_cast<void*>(ptr));
 #endif
 }
 
diff --git a/libkram/astc-encoder/astcenc_mathlib.h b/libkram/astc-encoder/astcenc_mathlib.h
index 4876749b..67e989e7 100644
--- a/libkram/astc-encoder/astcenc_mathlib.h
+++ b/libkram/astc-encoder/astcenc_mathlib.h
@@ -308,7 +308,7 @@ static inline float flt_rd(float v)
 static inline int flt2int_rtn(float v)
 {
 
-	return (int)(v + 0.5f);
+	return static_cast<int>(v + 0.5f);
 }
 
 /**
@@ -320,32 +320,35 @@ static inline int flt2int_rtn(float v)
  */
 static inline int flt2int_rd(float v)
 {
-	return (int)(v);
+	return static_cast<int>(v);
 }
 
 /**
- * @brief Population bit count.
+ * @brief SP float bit-interpreted as an integer.
  *
- * @param v   The value to population count.
+ * @param v   The value to bitcast.
  *
- * @return The number of 1 bits.
+ * @return The converted value.
  */
-static inline int popcount(uint64_t v)
+static inline int float_as_int(float v)
 {
-#if ASTCENC_POPCNT >= 1
-	return (int)_mm_popcnt_u64(v);
-#else
-	uint64_t mask1 = 0x5555555555555555ULL;
-	uint64_t mask2 = 0x3333333333333333ULL;
-	uint64_t mask3 = 0x0F0F0F0F0F0F0F0FULL;
-	v -= (v >> 1) & mask1;
-	v = (v & mask2) + ((v >> 2) & mask2);
-	v += v >> 4;
-	v &= mask3;
-	v *= 0x0101010101010101ULL;
-	v >>= 56;
-	return (int)v;
-#endif
+	union { int a; float b; } u;
+	u.b = v;
+	return u.a;
+}
+
+/**
+ * @brief Integer bit-interpreted as an SP float.
+ *
+ * @param v   The value to bitcast.
+ *
+ * @return The converted value.
+ */
+static inline float int_as_float(int v)
+{
+	union { int a; float b; } u;
+	u.a = v;
+	return u.b;
 }
 
 /**
diff --git a/libkram/astc-encoder/astcenc_mathlib_softfloat.cpp b/libkram/astc-encoder/astcenc_mathlib_softfloat.cpp
index 2665c0d8..d95fb9da 100644
--- a/libkram/astc-encoder/astcenc_mathlib_softfloat.cpp
+++ b/libkram/astc-encoder/astcenc_mathlib_softfloat.cpp
@@ -283,7 +283,7 @@ static sf16 sf32_to_sf16(sf32 inp, roundmode rmode)
 			-inp will set the MSB if the input number is nonzero.
 			Thus (-inp) >> 31 will turn into 0 if the input number is 0 and 1 otherwise.
 		*/
-		return (uint32_t) (-(int32_t) inp) >> 31;
+		return static_cast<uint32_t>((-static_cast<int32_t>(inp))) >> 31;
 
 		/*
 			negative, exponent = , round-mode == DOWN, need to check whether number is
diff --git a/libkram/astc-encoder/astcenc_partition_tables.cpp b/libkram/astc-encoder/astcenc_partition_tables.cpp
index 8769ec63..6b97e49d 100644
--- a/libkram/astc-encoder/astcenc_partition_tables.cpp
+++ b/libkram/astc-encoder/astcenc_partition_tables.cpp
@@ -381,45 +381,71 @@ static void build_partition_table_for_one_partition_count(
 	};
 
 	unsigned int next_index = 0;
-	bsd.partitioning_count[partition_count - 1] = 0;
+	bsd.partitioning_count_selected[partition_count - 1] = 0;
+	bsd.partitioning_count_all[partition_count - 1] = 0;
 
+	// Skip tables larger than config max partition count if we can omit modes
 	if (can_omit_partitionings && (partition_count > partition_count_cutoff))
 	{
 		return;
 	}
 
-	for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONINGS; i++)
+	// Iterate through twice
+	//   - Pass 0: Keep selected partitionings
+	//   - Pass 1: Keep non-selected partitionings (skip if in omit mode)
+	unsigned int max_iter = can_omit_partitionings ? 1 : 2;
+
+	// Tracker for things we built in the first iteration
+	uint8_t build[BLOCK_MAX_PARTITIONINGS] { 0 };
+		for (unsigned int x = 0; x < max_iter; x++)
 	{
-		bool keep = generate_one_partition_info_entry(bsd, partition_count, i, next_index, ptab[next_index]);
-		if (can_omit_partitionings && !keep)
+		for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONINGS; i++)
 		{
-			bsd.partitioning_packed_index[partition_count - 2][i] = BLOCK_BAD_PARTITIONING;
-			continue;
-		}
+			// Don't include things we built in the first pass
+			if ((x == 1) && build[i])
+			{
+				continue;
+			}
 
-		generate_canonical_partitioning(bsd.texel_count, ptab[next_index].partition_of_texel, canonical_patterns + next_index * 7);
-		keep = true;
-		for (unsigned int j = 0; j < next_index; j++)
-		{
-			bool match = compare_canonical_partitionings(canonical_patterns + 7 * next_index, canonical_patterns + 7 * j);
-			if (match)
+			bool keep_useful = generate_one_partition_info_entry(bsd, partition_count, i, next_index, ptab[next_index]);
+			if ((x == 0) && !keep_useful)
 			{
-				ptab[next_index].partition_count = 0;
-				partitioning_valid[partition_count - 2][next_index] = 255;
-				keep = !can_omit_partitionings;
-				break;
+				continue;
 			}
-		}
 
-		if (keep)
-		{
-			bsd.partitioning_packed_index[partition_count - 2][i] = next_index;
-			bsd.partitioning_count[partition_count - 1] = next_index + 1;
-			next_index++;
-		}
-		else
-		{
-			bsd.partitioning_packed_index[partition_count - 2][i] = BLOCK_BAD_PARTITIONING;
+			generate_canonical_partitioning(bsd.texel_count, ptab[next_index].partition_of_texel, canonical_patterns + next_index * 7);
+			bool keep_canonical = true;
+			for (unsigned int j = 0; j < next_index; j++)
+			{
+				bool match = compare_canonical_partitionings(canonical_patterns + 7 * next_index, canonical_patterns + 7 * j);
+				if (match)
+				{
+					keep_canonical = false;
+					break;
+				}
+			}
+
+			if (keep_useful && keep_canonical)
+			{
+				if (x == 0)
+				{
+					bsd.partitioning_packed_index[partition_count - 2][i] = next_index;
+					bsd.partitioning_count_selected[partition_count - 1]++;
+					bsd.partitioning_count_all[partition_count - 1]++;
+					build[i] = 1;
+					next_index++;
+				}
+			}
+			else
+			{
+				if (x == 1)
+				{
+					bsd.partitioning_packed_index[partition_count - 2][i] = next_index;
+					bsd.partitioning_count_all[partition_count - 1]++;
+					partitioning_valid[partition_count - 2][next_index] = 255;
+					next_index++;
+				}
+			}
 		}
 	}
 }
@@ -436,7 +462,8 @@ void init_partition_tables(
 	partition_info* par_tab1 = par_tab4 + BLOCK_MAX_PARTITIONINGS;
 
 	generate_one_partition_info_entry(bsd, 1, 0, 0, *par_tab1);
-	bsd.partitioning_count[0] = 1;
+	bsd.partitioning_count_selected[0] = 1;
+	bsd.partitioning_count_all[0] = 1;
 
 	uint64_t* canonical_patterns = new uint64_t[BLOCK_MAX_PARTITIONINGS * 7];
 	build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 2, par_tab2, canonical_patterns);
diff --git a/libkram/astc-encoder/astcenc_percentile_tables.cpp b/libkram/astc-encoder/astcenc_percentile_tables.cpp
index 1744d996..57903050 100644
--- a/libkram/astc-encoder/astcenc_percentile_tables.cpp
+++ b/libkram/astc-encoder/astcenc_percentile_tables.cpp
@@ -1184,7 +1184,7 @@ const float *get_2d_percentile_table(
 			unsigned int idx = item & 0x7FF;
 			unsigned int weight = (item >> 11) & 0x1F;
 			accum += weight;
-			unpacked_table[idx] = (float)accum / (float)difscale;
+			unpacked_table[idx] = static_cast<float>(accum) / static_cast<float>(difscale);
 		}
 	}
 
diff --git a/libkram/astc-encoder/astcenc_pick_best_endpoint_format.cpp b/libkram/astc-encoder/astcenc_pick_best_endpoint_format.cpp
index 032a680a..fc00b74e 100644
--- a/libkram/astc-encoder/astcenc_pick_best_endpoint_format.cpp
+++ b/libkram/astc-encoder/astcenc_pick_best_endpoint_format.cpp
@@ -89,10 +89,10 @@ static void compute_error_squared_rgb_single_partition(
 	const uint8_t* texel_indexes = pi.texels_of_partition[partition_index];
 	promise(texel_count > 0);
 
-	vfloat4 a_drop_errv = vfloat4::zero();
+	vfloatacc a_drop_errv = vfloatacc::zero();
 	vfloat default_a(blk.get_default_alpha());
 
-	vfloat4 uncor_errv = vfloat4::zero();
+	vfloatacc uncor_errv = vfloatacc::zero();
 	vfloat uncor_bs0(uncor_pline.bs.lane<0>());
 	vfloat uncor_bs1(uncor_pline.bs.lane<1>());
 	vfloat uncor_bs2(uncor_pline.bs.lane<2>());
@@ -101,12 +101,12 @@ static void compute_error_squared_rgb_single_partition(
 	vfloat uncor_amod1(uncor_pline.amod.lane<1>());
 	vfloat uncor_amod2(uncor_pline.amod.lane<2>());
 
-	vfloat4 samec_errv = vfloat4::zero();
+	vfloatacc samec_errv = vfloatacc::zero();
 	vfloat samec_bs0(samec_pline.bs.lane<0>());
 	vfloat samec_bs1(samec_pline.bs.lane<1>());
 	vfloat samec_bs2(samec_pline.bs.lane<2>());
 
-	vfloat4 rgbl_errv = vfloat4::zero();
+	vfloatacc rgbl_errv = vfloatacc::zero();
 	vfloat rgbl_bs0(rgbl_pline.bs.lane<0>());
 	vfloat rgbl_bs1(rgbl_pline.bs.lane<1>());
 	vfloat rgbl_bs2(rgbl_pline.bs.lane<2>());
@@ -115,7 +115,7 @@ static void compute_error_squared_rgb_single_partition(
 	vfloat rgbl_amod1(rgbl_pline.amod.lane<1>());
 	vfloat rgbl_amod2(rgbl_pline.amod.lane<2>());
 
-	vfloat4 l_errv = vfloat4::zero();
+	vfloatacc l_errv = vfloatacc::zero();
 	vfloat l_bs0(l_pline.bs.lane<0>());
 	vfloat l_bs1(l_pline.bs.lane<1>());
 	vfloat l_bs2(l_pline.bs.lane<2>());
@@ -132,8 +132,8 @@ static void compute_error_squared_rgb_single_partition(
 		vfloat data_a = gatherf(blk.data_a, tix);
 		vfloat alpha_diff = data_a - default_a;
 		alpha_diff = alpha_diff * alpha_diff;
-		alpha_diff = select(vfloat::zero(), alpha_diff, mask);
-		haccumulate(a_drop_errv, alpha_diff);
+
+		haccumulate(a_drop_errv, alpha_diff, mask);
 
 		vfloat data_r = gatherf(blk.data_r, tix);
 		vfloat data_g = gatherf(blk.data_g, tix);
@@ -152,8 +152,7 @@ static void compute_error_squared_rgb_single_partition(
 		             + dist1 * dist1 * ews.lane<1>()
 		             + dist2 * dist2 * ews.lane<2>();
 
-		error = select(vfloat::zero(), error, mask);
-		haccumulate(uncor_errv, error);
+		haccumulate(uncor_errv, error, mask);
 
 		// Compute same chroma error - no "amod", its always zero
 		param = data_r * samec_bs0
@@ -168,8 +167,7 @@ static void compute_error_squared_rgb_single_partition(
 		      + dist1 * dist1 * ews.lane<1>()
 		      + dist2 * dist2 * ews.lane<2>();
 
-		error = select(vfloat::zero(), error, mask);
-		haccumulate(samec_errv, error);
+		haccumulate(samec_errv, error, mask);
 
 		// Compute rgbl error
 		param = data_r * rgbl_bs0
@@ -184,8 +182,7 @@ static void compute_error_squared_rgb_single_partition(
 		      + dist1 * dist1 * ews.lane<1>()
 		      + dist2 * dist2 * ews.lane<2>();
 
-		error = select(vfloat::zero(), error, mask);
-		haccumulate(rgbl_errv, error);
+		haccumulate(rgbl_errv, error, mask);
 
 		// Compute luma error - no "amod", its always zero
 		param = data_r * l_bs0
@@ -200,11 +197,10 @@ static void compute_error_squared_rgb_single_partition(
 		      + dist1 * dist1 * ews.lane<1>()
 		      + dist2 * dist2 * ews.lane<2>();
 
-		error = select(vfloat::zero(), error, mask);
-		haccumulate(l_errv, error);
+		haccumulate(l_errv, error, mask);
 	}
 
-	a_drop_err = hadd_s(a_drop_errv * ews.lane<3>());
+	a_drop_err = hadd_s(a_drop_errv) * ews.lane<3>();
 	uncor_err = hadd_s(uncor_errv);
 	samec_err = hadd_s(samec_errv);
 	rgbl_err = hadd_s(rgbl_errv);
@@ -218,24 +214,19 @@ static void compute_error_squared_rgb_single_partition(
  * RGB-lumashift encoding (HDR only), luminance-encoding, and alpha drop. Also determines whether
  * the endpoints are eligible for offset encoding or blue-contraction
  *
- * @param      bsd   The block size information.
  * @param      blk   The image block.
  * @param      pi    The partition info data.
  * @param      ep    The idealized endpoints.
  * @param[out] eci   The resulting encoding choice error metrics.
   */
 static void compute_encoding_choice_errors(
-	const block_size_descriptor& bsd,
 	const image_block& blk,
 	const partition_info& pi,
 	const endpoints& ep,
 	encoding_choice_errors eci[BLOCK_MAX_PARTITIONS])
 {
 	int partition_count = pi.partition_count;
-	int texels_per_block = bsd.texel_count;
-
 	promise(partition_count > 0);
-	promise(texels_per_block > 0);
 
 	partition_metrics pms[BLOCK_MAX_PARTITIONS];
 
@@ -429,9 +420,9 @@ static void compute_color_error_for_every_integer_count_and_quant_level(
 		// Estimate of color-component spread in low endpoint color
 		float df = hmax_s(abs(pdif));
 
-		int b = (int)bf;
-		int c = (int)cf;
-		int d = (int)df;
+		int b = static_cast<int>(bf);
+		int c = static_cast<int>(cf);
+		int d = static_cast<int>(df);
 
 		// Determine which one of the 6 submodes is likely to be used in case of an RGBO-mode
 		int rgbo_mode = 5;		// 7 bits per component
@@ -531,7 +522,7 @@ static void compute_color_error_for_every_integer_count_and_quant_level(
 		mode23mult *= 0.0005f;  // Empirically determined ....
 
 		// Pick among the available HDR endpoint modes
-		for (int i = 0; i < 8; i++)
+		for (int i = QUANT_2; i < QUANT_16; i++)
 		{
 			best_error[i][3] = ERROR_CALC_DEFAULT;
 			best_error[i][2] = ERROR_CALC_DEFAULT;
@@ -544,7 +535,7 @@ static void compute_color_error_for_every_integer_count_and_quant_level(
 			format_of_choice[i][0] = FMT_HDR_LUMINANCE_LARGE_RANGE;
 		}
 
-		for (int i = 8; i < 21; i++)
+		for (int i = QUANT_16; i <= QUANT_256; i++)
 		{
 			// The base_quant_error should depend on the scale-factor that would be used during
 			// actual encode of the color value
@@ -579,7 +570,7 @@ static void compute_color_error_for_every_integer_count_and_quant_level(
 	}
 	else
 	{
-		for (int i = 0; i < 4; i++)
+		for (int i = QUANT_2; i < QUANT_6; i++)
 		{
 			best_error[i][3] = ERROR_CALC_DEFAULT;
 			best_error[i][2] = ERROR_CALC_DEFAULT;
@@ -603,10 +594,10 @@ static void compute_color_error_for_every_integer_count_and_quant_level(
 		float error_scale_oe_rgb = eci.can_offset_encode ? 0.25f : 1.0f;
 
 		// Pick among the available LDR endpoint modes
-		for (int i = 4; i < 21; i++)
+		for (int i = QUANT_6; i <= QUANT_256; i++)
 		{
 			// Offset encoding not possible at higher quant levels
-			if (i == 19)
+			if (i >= QUANT_192)
 			{
 				error_scale_oe_rgba = 1.0f;
 				error_scale_oe_rgb = 1.0f;
@@ -702,7 +693,7 @@ static float one_partition_find_best_combination_for_bitcount(
 	int& best_format
 ) {
 	int best_integer_count = 0;
-	float best_integer_count_error = 1e20f;
+	float best_integer_count_error = ERROR_CALC_DEFAULT;
 
 	for (int integer_count = 1; integer_count <= 4;  integer_count++)
 	{
@@ -710,7 +701,7 @@ static float one_partition_find_best_combination_for_bitcount(
 		int quant_level = quant_mode_table[integer_count][bits_available];
 
 		// Don't have enough bits to represent a given endpoint format at all!
-		if (quant_level < 0)
+		if (quant_level < QUANT_6)
 		{
 			continue;
 		}
@@ -725,10 +716,10 @@ static float one_partition_find_best_combination_for_bitcount(
 
 	int ql = quant_mode_table[best_integer_count + 1][bits_available];
 
-	best_quant_level = (quant_method)ql;
+	best_quant_level = static_cast<quant_method>(ql);
 	best_format = FMT_LUMINANCE;
 
-	if (ql >= 0)
+	if (ql >= QUANT_6)
 	{
 		best_format = best_combined_format[ql][best_integer_count];
 	}
@@ -750,7 +741,7 @@ static void two_partitions_find_best_combination_for_every_quantization_and_inte
 	float best_combined_error[21][7],	// indexed by (quant-level, integer-pair-count-minus-2)
 	int best_combined_format[21][7][2]
 ) {
-	for (int i = 0; i < 21; i++)
+	for (int i = QUANT_2; i <= QUANT_256; i++)
 	{
 		for (int j = 0; j < 7; j++)
 		{
@@ -758,7 +749,7 @@ static void two_partitions_find_best_combination_for_every_quantization_and_inte
 		}
 	}
 
-	for (int quant = 5; quant < 21; quant++)
+	for (int quant = QUANT_6; quant <= QUANT_256; quant++)
 	{
 		for (int i = 0; i < 4; i++)	// integer-count for first endpoint-pair
 		{
@@ -805,7 +796,7 @@ static float two_partitions_find_best_combination_for_bitcount(
 	int* best_formats
 ) {
 	int best_integer_count = 0;
-	float best_integer_count_error = 1e20f;
+	float best_integer_count_error = ERROR_CALC_DEFAULT;
 
 	for (int integer_count = 2; integer_count <= 8; integer_count++)
 	{
@@ -813,7 +804,7 @@ static float two_partitions_find_best_combination_for_bitcount(
 		int quant_level = quant_mode_table[integer_count][bits_available];
 
 		// Don't have enough bits to represent a given endpoint format at all!
-		if (quant_level < 0)
+		if (quant_level < QUANT_6)
 		{
 			break;
 		}
@@ -829,10 +820,10 @@ static float two_partitions_find_best_combination_for_bitcount(
 	int ql = quant_mode_table[best_integer_count][bits_available];
 	int ql_mod = quant_mode_table[best_integer_count][bits_available + 2];
 
-	best_quant_level = (quant_method)ql;
-	best_quant_level_mod = (quant_method)ql_mod;
+	best_quant_level = static_cast<quant_method>(ql);
+	best_quant_level_mod = static_cast<quant_method>(ql_mod);
 
-	if (ql >= 0)
+	if (ql >= QUANT_6)
 	{
 		for (int i = 0; i < 2; i++)
 		{
@@ -864,7 +855,7 @@ static void three_partitions_find_best_combination_for_every_quantization_and_in
 	float best_combined_error[21][10],
 	int best_combined_format[21][10][3]
 ) {
-	for (int i = 0; i < 21; i++)
+	for (int i = QUANT_2; i <= QUANT_256; i++)
 	{
 		for (int j = 0; j < 10; j++)
 		{
@@ -872,7 +863,7 @@ static void three_partitions_find_best_combination_for_every_quantization_and_in
 		}
 	}
 
-	for (int quant = 5; quant < 21; quant++)
+	for (int quant = QUANT_6; quant <= QUANT_256; quant++)
 	{
 		for (int i = 0; i < 4; i++)	// integer-count for first endpoint-pair
 		{
@@ -930,7 +921,7 @@ static float three_partitions_find_best_combination_for_bitcount(
 	int* best_formats
 ) {
 	int best_integer_count = 0;
-	float best_integer_count_error = 1e20f;
+	float best_integer_count_error = ERROR_CALC_DEFAULT;
 
 	for (int integer_count = 3; integer_count <= 9; integer_count++)
 	{
@@ -938,7 +929,7 @@ static float three_partitions_find_best_combination_for_bitcount(
 		int quant_level = quant_mode_table[integer_count][bits_available];
 
 		// Don't have enough bits to represent a given endpoint format at all!
-		if (quant_level < 0)
+		if (quant_level < QUANT_6)
 		{
 			break;
 		}
@@ -954,10 +945,10 @@ static float three_partitions_find_best_combination_for_bitcount(
 	int ql = quant_mode_table[best_integer_count][bits_available];
 	int ql_mod = quant_mode_table[best_integer_count][bits_available + 5];
 
-	best_quant_level = (quant_method)ql;
-	best_quant_level_mod = (quant_method)ql_mod;
+	best_quant_level = static_cast<quant_method>(ql);
+	best_quant_level_mod = static_cast<quant_method>(ql_mod);
 
-	if (ql >= 0)
+	if (ql >= QUANT_6)
 	{
 		for (int i = 0; i < 3; i++)
 		{
@@ -989,7 +980,7 @@ static void four_partitions_find_best_combination_for_every_quantization_and_int
 	float best_combined_error[21][13],
 	int best_combined_format[21][13][4]
 ) {
-	for (int i = 0; i < 21; i++)
+	for (int i = QUANT_2; i <= QUANT_256; i++)
 	{
 		for (int j = 0; j < 13; j++)
 		{
@@ -997,7 +988,7 @@ static void four_partitions_find_best_combination_for_every_quantization_and_int
 		}
 	}
 
-	for (int quant = 5; quant < 21; quant++)
+	for (int quant = QUANT_6; quant <= QUANT_256; quant++)
 	{
 		for (int i = 0; i < 4; i++)	// integer-count for first endpoint-pair
 		{
@@ -1066,7 +1057,7 @@ static float four_partitions_find_best_combination_for_bitcount(
 	int* best_formats
 ) {
 	int best_integer_count = 0;
-	float best_integer_count_error = 1e20f;
+	float best_integer_count_error = ERROR_CALC_DEFAULT;
 
 	for (int integer_count = 4; integer_count <= 9; integer_count++)
 	{
@@ -1074,7 +1065,7 @@ static float four_partitions_find_best_combination_for_bitcount(
 		int quant_level = quant_mode_table[integer_count][bits_available];
 
 		// Don't have enough bits to represent a given endpoint format at all!
-		if (quant_level < 0)
+		if (quant_level < QUANT_6)
 		{
 			break;
 		}
@@ -1090,10 +1081,10 @@ static float four_partitions_find_best_combination_for_bitcount(
 	int ql = quant_mode_table[best_integer_count][bits_available];
 	int ql_mod = quant_mode_table[best_integer_count][bits_available + 8];
 
-	best_quant_level = (quant_method)ql;
-	best_quant_level_mod = (quant_method)ql_mod;
+	best_quant_level = static_cast<quant_method>(ql);
+	best_quant_level_mod = static_cast<quant_method>(ql_mod);
 
-	if (ql >= 0)
+	if (ql >= QUANT_6)
 	{
 		for (int i = 0; i < 4; i++)
 		{
@@ -1113,7 +1104,6 @@ static float four_partitions_find_best_combination_for_bitcount(
 
 /* See header for documentation. */
 unsigned int compute_ideal_endpoint_formats(
-	const block_size_descriptor& bsd,
 	const partition_info& pi,
 	const image_block& blk,
 	const endpoints& ep,
@@ -1121,7 +1111,8 @@ unsigned int compute_ideal_endpoint_formats(
 	const int* qwt_bitcounts,
 	const float* qwt_errors,
 	unsigned int tune_candidate_limit,
-	unsigned int block_mode_count,
+	unsigned int start_block_mode,
+	unsigned int end_block_mode,
 	// output data
 	int partition_format_specifiers[TUNE_MAX_TRIAL_CANDIDATES][BLOCK_MAX_PARTITIONS],
 	int block_mode[TUNE_MAX_TRIAL_CANDIDATES],
@@ -1132,7 +1123,6 @@ unsigned int compute_ideal_endpoint_formats(
 	int partition_count = pi.partition_count;
 
 	promise(partition_count > 0);
-	promise(block_mode_count > 0);
 
 	int encode_hdr_rgb = blk.rgb_lns[0];
 	int encode_hdr_alpha = blk.alpha_lns[0];
@@ -1140,7 +1130,7 @@ unsigned int compute_ideal_endpoint_formats(
 	// Compute the errors that result from various encoding choices (such as using luminance instead
 	// of RGB, discarding Alpha, using RGB-scale in place of two separate RGB endpoints and so on)
 	encoding_choice_errors eci[BLOCK_MAX_PARTITIONS];
-	compute_encoding_choice_errors(bsd, blk, pi, ep, eci);
+	compute_encoding_choice_errors(blk, pi, ep, eci);
 
 	float best_error[BLOCK_MAX_PARTITIONS][21][4];
 	int format_of_choice[BLOCK_MAX_PARTITIONS][21][4];
@@ -1159,8 +1149,17 @@ unsigned int compute_ideal_endpoint_formats(
 
 	// Ensure that the "overstep" of the last iteration in the vectorized loop will contain data
 	// that will never be picked as best candidate
-	const int packed_mode_count_simd_up = round_up_to_simd_multiple_vla(block_mode_count);
-	for (int i = block_mode_count; i < packed_mode_count_simd_up; i++)
+	const unsigned int packed_end_block_mode = round_up_to_simd_multiple_vla(end_block_mode);
+
+	// TODO: Can we avoid this?
+	for (unsigned int i = 0; i < start_block_mode; i++)
+	{
+		errors_of_best_combination[i] = ERROR_CALC_DEFAULT;
+		best_quant_levels[i] = QUANT_2;
+		best_quant_levels_mod[i] = QUANT_2;
+	}
+
+	for (unsigned int i = end_block_mode; i < packed_end_block_mode; i++)
 	{
 		errors_of_best_combination[i] = ERROR_CALC_DEFAULT;
 		best_quant_levels[i] = QUANT_2;
@@ -1174,7 +1173,7 @@ unsigned int compute_ideal_endpoint_formats(
 	// The block contains 1 partition
 	if (partition_count == 1)
 	{
-		for (unsigned int i = 0; i < block_mode_count; ++i)
+		for (unsigned int i = start_block_mode; i < end_block_mode; ++i)
 		{
 			if (qwt_errors[i] >= ERROR_CALC_DEFAULT)
 			{
@@ -1206,7 +1205,8 @@ unsigned int compute_ideal_endpoint_formats(
 		two_partitions_find_best_combination_for_every_quantization_and_integer_count(
 		    best_error, format_of_choice, combined_best_error, formats_of_choice);
 
-		for (unsigned int i = 0; i < block_mode_count; ++i)
+		assert(start_block_mode == 0);
+		for (unsigned int i = 0; i < end_block_mode; ++i)
 		{
 			if (qwt_errors[i] >= ERROR_CALC_DEFAULT)
 			{
@@ -1238,7 +1238,8 @@ unsigned int compute_ideal_endpoint_formats(
 		three_partitions_find_best_combination_for_every_quantization_and_integer_count(
 		    best_error, format_of_choice, combined_best_error, formats_of_choice);
 
-		for (unsigned int i = 0; i < block_mode_count; ++i)
+		assert(start_block_mode == 0);
+		for (unsigned int i = 0; i < end_block_mode; ++i)
 		{
 			if (qwt_errors[i] >= ERROR_CALC_DEFAULT)
 			{
@@ -1271,7 +1272,8 @@ unsigned int compute_ideal_endpoint_formats(
 		four_partitions_find_best_combination_for_every_quantization_and_integer_count(
 		    best_error, format_of_choice, combined_best_error, formats_of_choice);
 
-		for (unsigned int i = 0; i < block_mode_count; ++i)
+		assert(start_block_mode == 0);
+		for (unsigned int i = 0; i < end_block_mode; ++i)
 		{
 			if (qwt_errors[i] >= ERROR_CALC_DEFAULT)
 			{
@@ -1309,12 +1311,14 @@ unsigned int compute_ideal_endpoint_formats(
 	{
 		vint vbest_error_index(-1);
 		vfloat vbest_ep_error(ERROR_CALC_DEFAULT);
-		vint lane_ids = vint::lane_id();
-		for (unsigned int j = 0; j < block_mode_count; j += ASTCENC_SIMD_WIDTH)
+
+		start_block_mode = round_down_to_simd_multiple_vla(start_block_mode);
+		vint lane_ids = vint::lane_id() + vint(start_block_mode);
+		for (unsigned int j = start_block_mode; j < end_block_mode; j += ASTCENC_SIMD_WIDTH)
 		{
 			vfloat err = vfloat(&errors_of_best_combination[j]);
 			vmask mask1 = err < vbest_ep_error;
-			vmask mask2 = vint((int*)(&best_quant_levels[j])) > vint(4);
+			vmask mask2 = vint(reinterpret_cast<int*>(best_quant_levels + j)) > vint(4);
 			vmask mask = mask1 & mask2;
 			vbest_ep_error = select(vbest_ep_error, err, mask);
 			vbest_error_index = select(vbest_error_index, lane_ids, mask);
@@ -1349,9 +1353,13 @@ unsigned int compute_ideal_endpoint_formats(
 		}
 
 		block_mode[i] = best_error_weights[i];
+
 		quant_level[i] = best_quant_levels[best_error_weights[i]];
-		assert(quant_level[i] >= 0 && quant_level[i] < 21);
 		quant_level_mod[i] = best_quant_levels_mod[best_error_weights[i]];
+
+		assert(quant_level[i] >= QUANT_6 && quant_level[i] <= QUANT_256);
+		assert(quant_level_mod[i] >= QUANT_6 && quant_level_mod[i] <= QUANT_256);
+
 		for (int j = 0; j < partition_count; j++)
 		{
 			partition_format_specifiers[i][j] = best_ep_formats[best_error_weights[i]][j];
diff --git a/libkram/astc-encoder/astcenc_quantization.cpp b/libkram/astc-encoder/astcenc_quantization.cpp
index 2d48abff..233a9338 100644
--- a/libkram/astc-encoder/astcenc_quantization.cpp
+++ b/libkram/astc-encoder/astcenc_quantization.cpp
@@ -23,79 +23,9 @@
 
 #if !defined(ASTCENC_DECOMPRESS_ONLY)
 
-const uint8_t color_quant_tables[21][256] {
-	{
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
-	},
-	{
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-		0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
-	},
-	{
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
-		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-		2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-		3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-		3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
-	},
-	{
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-		0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-		1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-		3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-		3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-		3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-		3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-		4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-		4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
-	},
+// Starts from QUANT_6
+// Scrambled
+const uint8_t color_quant_tables[17][256] {
 	{
 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2,
@@ -406,19 +336,9 @@ const uint8_t color_quant_tables[21][256] {
 
 #endif
 
-const uint8_t color_unquant_tables[21][256] {
-	{
-		0, 255
-	},
-	{
-		0, 128, 255
-	},
-	{
-		0, 85, 170, 255
-	},
-	{
-		0, 64, 128, 192, 255
-	},
+// Starts from QUANT_6
+// Scrambled
+const uint8_t color_unquant_tables[17][256] {
 	{
 		0, 255, 51, 204, 102, 153
 	},
@@ -535,7 +455,7 @@ const uint8_t color_unquant_tables[21][256] {
 
 // The quant_mode_table[integercount/2][bits] gives us the quantization level for a given integer
 // count and number of bits that the integer may fit into.
-const int8_t quant_mode_table[17][128] {
+const int8_t quant_mode_table[10][128] {
     {
          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
@@ -635,75 +555,5 @@ const int8_t quant_mode_table[17][128] {
           9,  9,  9, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11,
          12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14,
          14, 14, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 17, 17
-    },
-    {
-         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-         -1, -1, -1, -1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-          1,  1,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  2,  2,  3,
-          3,  3,  3,  3,  4,  4,  4,  4,  4,  4,  4,  4,  5,  5,  5,  5,
-          5,  5,  5,  6,  6,  6,  6,  6,  7,  7,  7,  7,  7,  7,  7,  7,
-          8,  8,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9, 10, 10, 10, 10,
-         10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12,
-         13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 15
-    },
-    {
-         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-         -1, -1, -1, -1, -1, -1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-          0,  0,  0,  0,  1,  1,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,
-          2,  2,  2,  2,  3,  3,  3,  3,  3,  3,  4,  4,  4,  4,  4,  4,
-          4,  4,  5,  5,  5,  5,  5,  5,  5,  5,  6,  6,  6,  6,  6,  6,
-          7,  7,  7,  7,  7,  7,  7,  7,  8,  8,  8,  8,  8,  8,  8,  8,
-          9,  9,  9,  9,  9,  9, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11,
-         11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13
-    },
-    {
-         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-         -1, -1, -1, -1, -1, -1, -1, -1,  0,  0,  0,  0,  0,  0,  0,  0,
-          0,  0,  0,  0,  0,  0,  0,  1,  1,  1,  1,  1,  1,  1,  1,  1,
-          2,  2,  2,  2,  2,  2,  2,  2,  3,  3,  3,  3,  3,  3,  3,  4,
-          4,  4,  4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,  5,
-          6,  6,  6,  6,  6,  6,  6,  7,  7,  7,  7,  7,  7,  7,  7,  7,
-          8,  8,  8,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9, 10,
-         10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11
-    },
-    {
-         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  0,  0,  0,  0,  0,  0,
-          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  1,  1,  1,  1,
-          1,  1,  1,  1,  2,  2,  2,  2,  2,  2,  2,  2,  2,  3,  3,  3,
-          3,  3,  3,  3,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  5,  5,
-          5,  5,  5,  5,  5,  5,  5,  6,  6,  6,  6,  6,  6,  6,  7,  7,
-          7,  7,  7,  7,  7,  7,  7,  7,  8,  8,  8,  8,  8,  8,  8,  8,
-          8,  9,  9,  9,  9,  9,  9,  9, 10, 10, 10, 10, 10, 10, 10, 10
-    },
-    {
-         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  0,  0,  0,  0,
-          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  1,
-          1,  1,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  2,  2,  2,
-          2,  2,  3,  3,  3,  3,  3,  3,  3,  4,  4,  4,  4,  4,  4,  4,
-          4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  6,  6,
-          6,  6,  6,  6,  6,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
-          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,  9
-    },
-    {
-         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  0,  0,
-          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-          1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,
-          2,  2,  2,  2,  2,  2,  3,  3,  3,  3,  3,  3,  3,  3,  4,  4,
-          4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,
-          5,  5,  5,  5,  6,  6,  6,  6,  6,  6,  6,  6,  7,  7,  7,  7,
-          7,  7,  7,  7,  7,  7,  7,  7,  8,  8,  8,  8,  8,  8,  8,  8
-    },
-    {
-         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-          0,  0,  0,  0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
-          2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  3,  3,  3,  3,  3,
-          3,  3,  3,  3,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
-          5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  6,  6,  6,  6,  6,
-          6,  6,  6,  6,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7
-    },
+    }
 };
diff --git a/libkram/astc-encoder/astcenc_symbolic_physical.cpp b/libkram/astc-encoder/astcenc_symbolic_physical.cpp
index af68bd4a..2afd460e 100644
--- a/libkram/astc-encoder/astcenc_symbolic_physical.cpp
+++ b/libkram/astc-encoder/astcenc_symbolic_physical.cpp
@@ -352,7 +352,7 @@ void physical_to_symbolic(
 	const auto& di = bsd.get_decimation_info(bm.decimation_mode);
 
 	int weight_count = di.weight_count;
-	quant_method weight_quant_method = (quant_method)bm.quant_mode;
+	quant_method weight_quant_method = static_cast<quant_method>(bm.quant_mode);
 	int is_dual_plane = bm.is_dual_plane;
 
 	int real_weight_count = is_dual_plane ? 2 * weight_count : weight_count;
@@ -479,9 +479,10 @@ void physical_to_symbolic(
 	}
 
 	// Unpack the integer color values and assign to endpoints
-	scb.quant_mode = (quant_method)color_quant_level;
+	scb.quant_mode = static_cast<quant_method>(color_quant_level);
 	uint8_t values_to_decode[32];
-	decode_ise((quant_method)color_quant_level, color_integer_count, pcb.data, values_to_decode, (partition_count == 1 ? 17 : 19 + PARTITION_INDEX_BITS));
+	decode_ise(static_cast<quant_method>(color_quant_level), color_integer_count, pcb.data,
+	           values_to_decode, (partition_count == 1 ? 17 : 19 + PARTITION_INDEX_BITS));
 
 	int valuecount_to_decode = 0;
 	for (int i = 0; i < partition_count; i++)
diff --git a/libkram/astc-encoder/astcenc_vecmathlib.h b/libkram/astc-encoder/astcenc_vecmathlib.h
index 069c03c9..07a2ff3f 100644
--- a/libkram/astc-encoder/astcenc_vecmathlib.h
+++ b/libkram/astc-encoder/astcenc_vecmathlib.h
@@ -75,6 +75,13 @@
 	#define ASTCENC_SIMD_WIDTH 8
 
 	using vfloat = vfloat8;
+
+	#if defined(ASTCENC_NO_INVARIANCE)
+		using vfloatacc = vfloat8;
+	#else
+		using vfloatacc = vfloat4;
+	#endif
+
 	using vint = vint8;
 	using vmask = vmask8;
 
@@ -89,6 +96,7 @@
 	#define ASTCENC_SIMD_WIDTH 4
 
 	using vfloat = vfloat4;
+	using vfloatacc = vfloat4;
 	using vint = vint4;
 	using vmask = vmask4;
 
@@ -103,6 +111,7 @@
 	#define ASTCENC_SIMD_WIDTH 4
 
 	using vfloat = vfloat4;
+	using vfloatacc = vfloat4;
 	using vint = vint4;
 	using vmask = vmask4;
 
@@ -134,6 +143,7 @@
 	#define ASTCENC_SIMD_WIDTH 4
 
 	using vfloat = vfloat4;
+	using vfloatacc = vfloat4;
 	using vint = vint4;
 	using vmask = vmask4;
 
@@ -201,7 +211,7 @@ ASTCENC_SIMD_INLINE vfloat change_sign(vfloat a, vfloat b)
 {
 	vint ia = float_as_int(a);
 	vint ib = float_as_int(b);
-	vint sign_mask((int)0x80000000);
+	vint sign_mask(static_cast<int>(0x80000000));
 	vint r = ia ^ (ib & sign_mask);
 	return int_as_float(r);
 }
@@ -227,7 +237,7 @@ ASTCENC_SIMD_INLINE vfloat atan2(vfloat y, vfloat x)
 {
 	vfloat z = atan(abs(y / x));
 	vmask xmask = vmask(float_as_int(x).m);
-	return change_sign(select(z, vfloat(astc::PI) - z, xmask), y);
+	return change_sign(select_msb(z, vfloat(astc::PI) - z, xmask), y);
 }
 
 /*
diff --git a/libkram/astc-encoder/astcenc_vecmathlib_avx2_8.h b/libkram/astc-encoder/astcenc_vecmathlib_avx2_8.h
index 0b0ea869..fe8a1b16 100755
--- a/libkram/astc-encoder/astcenc_vecmathlib_avx2_8.h
+++ b/libkram/astc-encoder/astcenc_vecmathlib_avx2_8.h
@@ -164,7 +164,7 @@ struct vint8
 	 */
 	ASTCENC_SIMD_INLINE explicit vint8(const int *p)
 	{
-		m = _mm256_loadu_si256((const __m256i*)p);
+		m = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(p));
 	}
 
 	/**
@@ -173,7 +173,7 @@ struct vint8
 	ASTCENC_SIMD_INLINE explicit vint8(const uint8_t *p)
 	{
 		// _mm_loadu_si64 would be nicer syntax, but missing on older GCC
-		m = _mm256_cvtepu8_epi32(_mm_cvtsi64_si128(*(const long long*)p));
+		m = _mm256_cvtepu8_epi32(_mm_cvtsi64_si128(*reinterpret_cast<const long long*>(p)));
 	}
 
 	/**
@@ -242,7 +242,7 @@ struct vint8
 	 */
 	static ASTCENC_SIMD_INLINE vint8 loada(const int* p)
 	{
-		return vint8(_mm256_load_si256((const __m256i*)p));
+		return vint8(_mm256_load_si256(reinterpret_cast<const __m256i*>(p)));
 	}
 
 	/**
@@ -534,7 +534,7 @@ ASTCENC_SIMD_INLINE vint8 hmax(vint8 a)
  */
 ASTCENC_SIMD_INLINE void storea(vint8 a, int* p)
 {
-	_mm256_store_si256((__m256i*)p, a.m);
+	_mm256_store_si256(reinterpret_cast<__m256i*>(p), a.m);
 }
 
 /**
@@ -542,7 +542,7 @@ ASTCENC_SIMD_INLINE void storea(vint8 a, int* p)
  */
 ASTCENC_SIMD_INLINE void store(vint8 a, int* p)
 {
-	_mm256_storeu_si256((__m256i*)p, a.m);
+	_mm256_storeu_si256(reinterpret_cast<__m256i*>(p), a.m);
 }
 
 /**
@@ -553,7 +553,7 @@ ASTCENC_SIMD_INLINE void store_nbytes(vint8 a, uint8_t* p)
 	// This is the most logical implementation, but the convenience intrinsic
 	// is missing on older compilers (supported in g++ 9 and clang++ 9).
 	// _mm_storeu_si64(ptr, _mm256_extracti128_si256(v.m, 0))
-	_mm_storel_epi64((__m128i*)p, _mm256_extracti128_si256(a.m, 0));
+	_mm_storel_epi64(reinterpret_cast<__m128i*>(p), _mm256_extracti128_si256(a.m, 0));
 }
 
 /**
@@ -586,27 +586,12 @@ ASTCENC_SIMD_INLINE vint8 pack_low_bytes(vint8 v)
 }
 
 /**
- * @brief Return lanes from @c b if MSB of @c cond is set, else @c a.
+ * @brief Return lanes from @c b if @c cond is set, else @c a.
  */
 ASTCENC_SIMD_INLINE vint8 select(vint8 a, vint8 b, vmask8 cond)
 {
-	// Don't use _mm256_blendv_epi8 directly, as it doesn't give the select on
-	// float sign-bit in the mask behavior which is useful. Performance is the
-	// same, these casts are free.
-	__m256 av = _mm256_castsi256_ps(a.m);
-	__m256 bv = _mm256_castsi256_ps(b.m);
-	return vint8(_mm256_castps_si256(_mm256_blendv_ps(av, bv, cond.m)));
-}
-
-/**
- * @brief Debug function to print a vector of ints.
- */
-ASTCENC_SIMD_INLINE void print(vint8 a)
-{
-	alignas(ASTCENC_VECALIGN) int v[8];
-	storea(a, v);
-	printf("v8_i32:\n  %8d %8d %8d %8d %8d %8d %8d %8d\n",
-	       v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);
+	__m256i condi = _mm256_castps_si256(cond.m);
+	return vint8(_mm256_blendv_epi8(a.m, b.m, condi));
 }
 
 // ============================================================================
@@ -886,28 +871,28 @@ ASTCENC_SIMD_INLINE float hadd_s(vfloat8 a)
 }
 
 /**
- * @brief Accumulate the full horizontal sum of a vector.
+ * @brief Return lanes from @c b if @c cond is set, else @c a.
  */
-ASTCENC_SIMD_INLINE void haccumulate(float& accum, vfloat8 a)
+ASTCENC_SIMD_INLINE vfloat8 select(vfloat8 a, vfloat8 b, vmask8 cond)
 {
-	// Two sequential 4-wide accumulates gives invariance with 4-wide code.
-	// Note that this approach gives higher error in the sum; adding the two
-	// smaller numbers together first would be more accurate.
-	vfloat4 lo(_mm256_extractf128_ps(a.m, 0));
-	haccumulate(accum, lo);
+	return vfloat8(_mm256_blendv_ps(a.m, b.m, cond.m));
+}
 
-	vfloat4 hi(_mm256_extractf128_ps(a.m, 1));
-	haccumulate(accum, hi);
+/**
+ * @brief Return lanes from @c b if MSB of @c cond is set, else @c a.
+ */
+ASTCENC_SIMD_INLINE vfloat8 select_msb(vfloat8 a, vfloat8 b, vmask8 cond)
+{
+	return vfloat8(_mm256_blendv_ps(a.m, b.m, cond.m));
 }
 
 /**
  * @brief Accumulate lane-wise sums for a vector, folded 4-wide.
+ *
+ * This is invariant with 4-wide implementations.
  */
 ASTCENC_SIMD_INLINE void haccumulate(vfloat4& accum, vfloat8 a)
 {
-	// Two sequential 4-wide accumulates gives invariance with 4-wide code.
-	// Note that this approach gives higher error in the sum; adding the two
-	// smaller numbers together first would be more accurate.
 	vfloat4 lo(_mm256_extractf128_ps(a.m, 0));
 	haccumulate(accum, lo);
 
@@ -916,19 +901,43 @@ ASTCENC_SIMD_INLINE void haccumulate(vfloat4& accum, vfloat8 a)
 }
 
 /**
- * @brief Return the sqrt of the lanes in the vector.
+ * @brief Accumulate lane-wise sums for a vector.
+ *
+ * This is NOT invariant with 4-wide implementations.
  */
-ASTCENC_SIMD_INLINE vfloat8 sqrt(vfloat8 a)
+ASTCENC_SIMD_INLINE void haccumulate(vfloat8& accum, vfloat8 a)
 {
-	return vfloat8(_mm256_sqrt_ps(a.m));
+	accum += a;
 }
 
 /**
- * @brief Return lanes from @c b if MSB of @c cond is set, else @c a.
+ * @brief Accumulate masked lane-wise sums for a vector, folded 4-wide.
+ *
+ * This is invariant with 4-wide implementations.
  */
-ASTCENC_SIMD_INLINE vfloat8 select(vfloat8 a, vfloat8 b, vmask8 cond)
+ASTCENC_SIMD_INLINE void haccumulate(vfloat4& accum, vfloat8 a, vmask8 m)
 {
-	return vfloat8(_mm256_blendv_ps(a.m, b.m, cond.m));
+	a = select(vfloat8::zero(), a, m);
+	haccumulate(accum, a);
+}
+
+/**
+ * @brief Accumulate masked lane-wise sums for a vector.
+ *
+ * This is NOT invariant with 4-wide implementations.
+ */
+ASTCENC_SIMD_INLINE void haccumulate(vfloat8& accum, vfloat8 a, vmask8 m)
+{
+	a = select(vfloat8::zero(), a, m);
+	haccumulate(accum, a);
+}
+
+/**
+ * @brief Return the sqrt of the lanes in the vector.
+ */
+ASTCENC_SIMD_INLINE vfloat8 sqrt(vfloat8 a)
+{
+	return vfloat8(_mm256_sqrt_ps(a.m));
 }
 
 /**
@@ -995,6 +1004,17 @@ ASTCENC_SIMD_INLINE vfloat8 int_as_float(vint8 a)
 	return vfloat8(_mm256_castsi256_ps(a.m));
 }
 
+/**
+ * @brief Debug function to print a vector of ints.
+ */
+ASTCENC_SIMD_INLINE void print(vint8 a)
+{
+	alignas(ASTCENC_VECALIGN) int v[8];
+	storea(a, v);
+	printf("v8_i32:\n  %8d %8d %8d %8d %8d %8d %8d %8d\n",
+	       v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);
+}
+
 /**
  * @brief Debug function to print a vector of floats.
  */
@@ -1003,8 +1023,18 @@ ASTCENC_SIMD_INLINE void print(vfloat8 a)
 	alignas(ASTCENC_VECALIGN) float v[8];
 	storea(a, v);
 	printf("v8_f32:\n  %0.4f %0.4f %0.4f %0.4f %0.4f %0.4f %0.4f %0.4f\n",
-	       (double)v[0], (double)v[1], (double)v[2], (double)v[3],
-	       (double)v[4], (double)v[5], (double)v[6], (double)v[7]);
+	       static_cast<double>(v[0]), static_cast<double>(v[1]),
+	       static_cast<double>(v[2]), static_cast<double>(v[3]),
+	       static_cast<double>(v[4]), static_cast<double>(v[5]),
+	       static_cast<double>(v[6]), static_cast<double>(v[7]));
+}
+
+/**
+ * @brief Debug function to print a vector of masks.
+ */
+ASTCENC_SIMD_INLINE void print(vmask8 a)
+{
+	print(select(vint8(0), vint8(1), a));
 }
 
 #endif // #ifndef ASTC_VECMATHLIB_AVX2_8_H_INCLUDED
diff --git a/libkram/astc-encoder/astcenc_vecmathlib_common_4.h b/libkram/astc-encoder/astcenc_vecmathlib_common_4.h
index 50394052..b7d644d8 100755
--- a/libkram/astc-encoder/astcenc_vecmathlib_common_4.h
+++ b/libkram/astc-encoder/astcenc_vecmathlib_common_4.h
@@ -129,17 +129,6 @@ ASTCENC_SIMD_INLINE int hadd_rgb_s(vint4 a)
 	return a.lane<0>() + a.lane<1>() + a.lane<2>();
 }
 
-/**
- * @brief Debug function to print a vector of ints.
- */
-ASTCENC_SIMD_INLINE void print(vint4 a)
-{
-	alignas(16) int v[4];
-	storea(a, v);
-	printf("v4_i32:\n  %8d %8d %8d %8d\n",
-	       v[0], v[1], v[2], v[3]);
-}
-
 // ============================================================================
 // vfloat4 operators and functions
 // ============================================================================
@@ -282,19 +271,20 @@ ASTCENC_SIMD_INLINE float hmax_s(vfloat4 a)
 }
 
 /**
- * @brief Accumulate the full horizontal sum of a vector.
+ * @brief Accumulate lane-wise sums for a vector.
  */
-ASTCENC_SIMD_INLINE void haccumulate(float& accum, vfloat4 a)
+ASTCENC_SIMD_INLINE void haccumulate(vfloat4& accum, vfloat4 a)
 {
-	accum += hadd_s(a);
+	accum = accum + a;
 }
 
 /**
- * @brief Accumulate lane-wise sums for a vector.
+ * @brief Accumulate lane-wise sums for a masked vector.
  */
-ASTCENC_SIMD_INLINE void haccumulate(vfloat4& accum, vfloat4 a)
+ASTCENC_SIMD_INLINE void haccumulate(vfloat4& accum, vfloat4 a, vmask4 m)
 {
-	accum = accum + a;
+	a = select(vfloat4::zero(), a, m);
+	haccumulate(accum, a);
 }
 
 /**
@@ -305,6 +295,8 @@ ASTCENC_SIMD_INLINE float hadd_rgb_s(vfloat4 a)
 	return a.lane<0>() + a.lane<1>() + a.lane<2>();
 }
 
+#if !defined(ASTCENC_USE_NATIVE_DOT_PRODUCT)
+
 /**
  * @brief Return the dot product for the full 4 lanes, returning scalar.
  */
@@ -342,12 +334,42 @@ ASTCENC_SIMD_INLINE vfloat4 dot3(vfloat4 a, vfloat4 b)
 	return vfloat4(d3, d3, d3, 0.0f);
 }
 
+#endif
+
+#if !defined(ASTCENC_USE_NATIVE_POPCOUNT)
+
 /**
- * @brief Generate a reciprocal of a vector.
+ * @brief Population bit count.
+ *
+ * @param v   The value to population count.
+ *
+ * @return The number of 1 bits.
+ */
+static inline int popcount(uint64_t v)
+{
+	uint64_t mask1 = 0x5555555555555555ULL;
+	uint64_t mask2 = 0x3333333333333333ULL;
+	uint64_t mask3 = 0x0F0F0F0F0F0F0F0FULL;
+	v -= (v >> 1) & mask1;
+	v = (v & mask2) + ((v >> 2) & mask2);
+	v += v >> 4;
+	v &= mask3;
+	v *= 0x0101010101010101ULL;
+	v >>= 56;
+	return static_cast<int>(v);
+}
+
+#endif
+
+/**
+ * @brief Debug function to print a vector of ints.
  */
-ASTCENC_SIMD_INLINE vfloat4 recip(vfloat4 b)
+ASTCENC_SIMD_INLINE void print(vint4 a)
 {
-	return 1.0f / b;
+	alignas(16) int v[4];
+	storea(a, v);
+	printf("v4_i32:\n  %8d %8d %8d %8d\n",
+	       v[0], v[1], v[2], v[3]);
 }
 
 /**
@@ -358,7 +380,16 @@ ASTCENC_SIMD_INLINE void print(vfloat4 a)
 	alignas(16) float v[4];
 	storea(a, v);
 	printf("v4_f32:\n  %0.4f %0.4f %0.4f %0.4f\n",
-	       (double)v[0], (double)v[1], (double)v[2], (double)v[3]);
+	       static_cast<double>(v[0]), static_cast<double>(v[1]),
+	       static_cast<double>(v[2]), static_cast<double>(v[3]));
+}
+
+/**
+ * @brief Debug function to print a vector of masks.
+ */
+ASTCENC_SIMD_INLINE void print(vmask4 a)
+{
+	print(select(vint4(0), vint4(1), a));
 }
 
 #endif // #ifndef ASTC_VECMATHLIB_COMMON_4_H_INCLUDED
diff --git a/libkram/astc-encoder/astcenc_vecmathlib_neon_4.h b/libkram/astc-encoder/astcenc_vecmathlib_neon_4.h
index 91cc21f3..7ac9da3f 100755
--- a/libkram/astc-encoder/astcenc_vecmathlib_neon_4.h
+++ b/libkram/astc-encoder/astcenc_vecmathlib_neon_4.h
@@ -614,13 +614,11 @@ ASTCENC_SIMD_INLINE vint4 pack_low_bytes(vint4 a)
 }
 
 /**
- * @brief Return lanes from @c b if MSB of @c cond is set, else @c a.
+ * @brief Return lanes from @c b if @c cond is set, else @c a.
  */
 ASTCENC_SIMD_INLINE vint4 select(vint4 a, vint4 b, vmask4 cond)
 {
-	static const uint32x4_t msb = vdupq_n_u32(0x80000000u);
-	uint32x4_t mask = vcgeq_u32(cond.m, msb);
-	return vint4(vbslq_s32(mask, b.m, a.m));
+	return vint4(vbslq_s32(cond.m, b.m, a.m));
 }
 
 // ============================================================================
@@ -783,9 +781,17 @@ ASTCENC_SIMD_INLINE vfloat4 sqrt(vfloat4 a)
 }
 
 /**
- * @brief Return lanes from @c b if MSB of @c cond is set, else @c a.
+ * @brief Return lanes from @c b if @c cond is set, else @c a.
  */
 ASTCENC_SIMD_INLINE vfloat4 select(vfloat4 a, vfloat4 b, vmask4 cond)
+{
+	return vfloat4(vbslq_f32(cond.m, b.m, a.m));
+}
+
+/**
+ * @brief Return lanes from @c b if MSB of @c cond is set, else @c a.
+ */
+ASTCENC_SIMD_INLINE vfloat4 select_msb(vfloat4 a, vfloat4 b, vmask4 cond)
 {
 	static const uint32x4_t msb = vdupq_n_u32(0x80000000u);
 	uint32x4_t mask = vcgeq_u32(cond.m, msb);
@@ -918,4 +924,18 @@ ASTCENC_SIMD_INLINE vfloat4 int_as_float(vint4 v)
 	return vfloat4(vreinterpretq_f32_s32(v.m));
 }
 
+#define ASTCENC_USE_NATIVE_POPCOUNT 1
+
+/**
+ * @brief Population bit count.
+ *
+ * @param v   The value to population count.
+ *
+ * @return The number of 1 bits.
+ */
+ASTCENC_SIMD_INLINE int popcount(uint64_t v)
+{
+	return static_cast<int>(vaddlv_u8(vcnt_u8(vcreate_u8(v))));
+}
+
 #endif // #ifndef ASTC_VECMATHLIB_NEON_4_H_INCLUDED
diff --git a/libkram/astc-encoder/astcenc_vecmathlib_none_4.h b/libkram/astc-encoder/astcenc_vecmathlib_none_4.h
index db489bce..5a399ef5 100644
--- a/libkram/astc-encoder/astcenc_vecmathlib_none_4.h
+++ b/libkram/astc-encoder/astcenc_vecmathlib_none_4.h
@@ -888,7 +888,7 @@ ASTCENC_SIMD_INLINE vfloat4 sqrt(vfloat4 a)
 }
 
 /**
- * @brief Return lanes from @c b if MSB of @c cond is set, else @c a.
+ * @brief Return lanes from @c b if @c cond is set, else @c a.
  */
 ASTCENC_SIMD_INLINE vfloat4 select(vfloat4 a, vfloat4 b, vmask4 cond)
 {
@@ -898,6 +898,17 @@ ASTCENC_SIMD_INLINE vfloat4 select(vfloat4 a, vfloat4 b, vmask4 cond)
 	               (cond.m[3] & 0x80000000) ? b.m[3] : a.m[3]);
 }
 
+/**
+ * @brief Return lanes from @c b if MSB of @c cond is set, else @c a.
+ */
+ASTCENC_SIMD_INLINE vfloat4 select_msb(vfloat4 a, vfloat4 b, vmask4 cond)
+{
+	return vfloat4((cond.m[0] & 0x80000000) ? b.m[0] : a.m[0],
+	               (cond.m[1] & 0x80000000) ? b.m[1] : a.m[1],
+	               (cond.m[2] & 0x80000000) ? b.m[2] : a.m[2],
+	               (cond.m[3] & 0x80000000) ? b.m[3] : a.m[3]);
+}
+
 /**
  * @brief Load a vector of gathered results from an array;
  */
diff --git a/libkram/astc-encoder/astcenc_vecmathlib_sse_4.h b/libkram/astc-encoder/astcenc_vecmathlib_sse_4.h
index 89fc4837..868522dc 100755
--- a/libkram/astc-encoder/astcenc_vecmathlib_sse_4.h
+++ b/libkram/astc-encoder/astcenc_vecmathlib_sse_4.h
@@ -206,7 +206,7 @@ struct vint4
 	 */
 	ASTCENC_SIMD_INLINE explicit vint4(const int *p)
 	{
-		m = _mm_loadu_si128((const __m128i*)p);
+		m = _mm_loadu_si128(reinterpret_cast<const __m128i*>(p));
 	}
 
 	/**
@@ -215,7 +215,7 @@ struct vint4
 	ASTCENC_SIMD_INLINE explicit vint4(const uint8_t *p)
 	{
 		// _mm_loadu_si32 would be nicer syntax, but missing on older GCC
-		__m128i t = _mm_cvtsi32_si128(*(const int*)p);
+		__m128i t = _mm_cvtsi32_si128(*reinterpret_cast<const int*>(p));
 
 #if ASTCENC_SSE >= 41
 		m = _mm_cvtepu8_epi32(t);
@@ -270,9 +270,9 @@ struct vint4
 		m = _mm_insert_epi32(m, a, l);
 #else
 		alignas(16) int idx[4];
-		_mm_store_si128((__m128i*)idx, m);
+		_mm_store_si128(reinterpret_cast<__m128i*>(idx), m);
 		idx[l] = a;
-		m = _mm_load_si128((const __m128i*)idx);
+		m = _mm_load_si128(reinterpret_cast<const __m128i*>(idx));
 #endif
 	}
 
@@ -297,7 +297,7 @@ struct vint4
 	 */
 	static ASTCENC_SIMD_INLINE vint4 loada(const int* p)
 	{
-		return vint4(_mm_load_si128((const __m128i*)p));
+		return vint4(_mm_load_si128(reinterpret_cast<const __m128i*>(p)));
 	}
 
 	/**
@@ -613,7 +613,7 @@ ASTCENC_SIMD_INLINE int hadd_s(vint4 a)
  */
 ASTCENC_SIMD_INLINE void storea(vint4 a, int* p)
 {
-	_mm_store_si128((__m128i*)p, a.m);
+	_mm_store_si128(reinterpret_cast<__m128i*>(p), a.m);
 }
 
 /**
@@ -622,7 +622,7 @@ ASTCENC_SIMD_INLINE void storea(vint4 a, int* p)
 ASTCENC_SIMD_INLINE void store(vint4 a, int* p)
 {
 	// Cast due to missing intrinsics
-	_mm_storeu_ps((float*)p, _mm_castsi128_ps(a.m));
+	_mm_storeu_ps(reinterpret_cast<float*>(p), _mm_castsi128_ps(a.m));
 }
 
 /**
@@ -631,7 +631,7 @@ ASTCENC_SIMD_INLINE void store(vint4 a, int* p)
 ASTCENC_SIMD_INLINE void store_nbytes(vint4 a, uint8_t* p)
 {
 	// Cast due to missing intrinsics
-	_mm_store_ss((float*)p, _mm_castsi128_ps(a.m));
+	_mm_store_ss(reinterpret_cast<float*>(p), _mm_castsi128_ps(a.m));
 }
 
 /**
@@ -664,20 +664,16 @@ ASTCENC_SIMD_INLINE vint4 pack_low_bytes(vint4 a)
 }
 
 /**
- * @brief Return lanes from @c b if MSB of @c cond is set, else @c a.
+ * @brief Return lanes from @c b if @c cond is set, else @c a.
  */
 ASTCENC_SIMD_INLINE vint4 select(vint4 a, vint4 b, vmask4 cond)
 {
+	__m128i condi = _mm_castps_si128(cond.m);
+
 #if ASTCENC_SSE >= 41
-	// Don't use _mm_blendv_epi8 directly, as it doesn't give the select on
-	// float sign-bit in the mask behavior which is useful. Performance is the
-	// same, these casts are free.
-	__m128 av = _mm_castsi128_ps(a.m);
-	__m128 bv = _mm_castsi128_ps(b.m);
-	return vint4(_mm_castps_si128(_mm_blendv_ps(av, bv, cond.m)));
+	return vint4(_mm_blendv_epi8(a.m, b.m, condi));
 #else
-	__m128i d = _mm_srai_epi32(_mm_castps_si128(cond.m), 31);
-	return vint4(_mm_or_si128(_mm_and_si128(d, b.m), _mm_andnot_si128(d, a.m)));
+	return vint4(_mm_or_si128(_mm_and_si128(condi, b.m), _mm_andnot_si128(condi, a.m)));
 #endif
 }
 
@@ -863,10 +859,22 @@ ASTCENC_SIMD_INLINE vfloat4 sqrt(vfloat4 a)
 }
 
 /**
- * @brief Return lanes from @c b if MSB of @c cond is set, else @c a.
+ * @brief Return lanes from @c b if @c cond is set, else @c a.
  */
 ASTCENC_SIMD_INLINE vfloat4 select(vfloat4 a, vfloat4 b, vmask4 cond)
 {
+#if ASTCENC_SSE >= 41
+	return vfloat4(_mm_blendv_ps(a.m, b.m, cond.m));
+#else
+	return vfloat4(_mm_or_ps(_mm_and_ps(cond.m, b.m), _mm_andnot_ps(cond.m, a.m)));
+#endif
+}
+
+/**
+ * @brief Return lanes from @c b if MSB of @c cond is set, else @c a.
+ */
+ASTCENC_SIMD_INLINE vfloat4 select_msb(vfloat4 a, vfloat4 b, vmask4 cond)
+{
 #if ASTCENC_SSE >= 41
 	return vfloat4(_mm_blendv_ps(a.m, b.m, cond.m));
 #else
@@ -955,7 +963,7 @@ static inline uint16_t float_to_float16(float a)
 {
 #if ASTCENC_F16C >= 1
 	__m128i f16 = _mm_cvtps_ph(_mm_set1_ps(a), 0);
-	return  (uint16_t)_mm_cvtsi128_si32(f16);
+	return  static_cast<uint16_t>(_mm_cvtsi128_si32(f16));
 #else
 	return float_to_sf16(a);
 #endif
@@ -1017,4 +1025,60 @@ ASTCENC_SIMD_INLINE vfloat4 int_as_float(vint4 v)
 	return vfloat4(_mm_castsi128_ps(v.m));
 }
 
+#if defined(ASTCENC_NO_INVARIANCE) && (ASTCENC_SSE >= 41)
+
+#define ASTCENC_USE_NATIVE_DOT_PRODUCT 1
+
+/**
+ * @brief Return the dot product for the full 4 lanes, returning scalar.
+ */
+ASTCENC_SIMD_INLINE float dot_s(vfloat4 a, vfloat4 b)
+{
+	return _mm_cvtss_f32(_mm_dp_ps(a.m, b.m, 0xFF));
+}
+
+/**
+ * @brief Return the dot product for the full 4 lanes, returning vector.
+ */
+ASTCENC_SIMD_INLINE vfloat4 dot(vfloat4 a, vfloat4 b)
+{
+	return vfloat4(_mm_dp_ps(a.m, b.m, 0xFF));
+}
+
+/**
+ * @brief Return the dot product for the bottom 3 lanes, returning scalar.
+ */
+ASTCENC_SIMD_INLINE float dot3_s(vfloat4 a, vfloat4 b)
+{
+	return _mm_cvtss_f32(_mm_dp_ps(a.m, b.m, 0x77));
+}
+
+/**
+ * @brief Return the dot product for the bottom 3 lanes, returning vector.
+ */
+ASTCENC_SIMD_INLINE vfloat4 dot3(vfloat4 a, vfloat4 b)
+{
+	return vfloat4(_mm_dp_ps(a.m, b.m, 0x77));
+}
+
+#endif // #if defined(ASTCENC_NO_INVARIANCE) && (ASTCENC_SSE >= 41)
+
+#if ASTCENC_POPCNT >= 1
+
+#define ASTCENC_USE_NATIVE_POPCOUNT 1
+
+/**
+ * @brief Population bit count.
+ *
+ * @param v   The value to population count.
+ *
+ * @return The number of 1 bits.
+ */
+ASTCENC_SIMD_INLINE int popcount(uint64_t v)
+{
+	return static_cast<int>(_mm_popcnt_u64(v));
+}
+
+#endif // ASTCENC_POPCNT >= 1
+
 #endif // #ifndef ASTC_VECMATHLIB_SSE_4_H_INCLUDED
diff --git a/libkram/astc-encoder/astcenc_weight_align.cpp b/libkram/astc-encoder/astcenc_weight_align.cpp
index a5288357..96eb6ae6 100644
--- a/libkram/astc-encoder/astcenc_weight_align.cpp
+++ b/libkram/astc-encoder/astcenc_weight_align.cpp
@@ -54,11 +54,11 @@ static constexpr unsigned int SINCOS_STEPS { 64 };
 static_assert((ANGULAR_STEPS % ASTCENC_SIMD_WIDTH) == 0,
               "ANGULAR_STEPS must be multiple of ASTCENC_SIMD_WIDTH");
 
-static unsigned int max_angular_steps_needed_for_quant_level[13];
+static uint8_t max_angular_steps_needed_for_quant_level[13];
 
 // The next-to-last entry is supposed to have the value 33. This because the 32-weight mode leaves a
 // double-sized hole in the middle of the weight space, so we are better off matching 33 weights.
-static const unsigned int quantization_steps_for_level[13] {
+static const uint8_t quantization_steps_for_level[13] {
 	2, 3, 4, 5, 6, 8, 10, 12, 16, 20, 24, 33, 36
 };
 
@@ -75,7 +75,7 @@ void prepare_angular_tables()
 	unsigned int max_angular_steps_needed_for_quant_steps[ANGULAR_STEPS + 1];
 	for (unsigned int i = 0; i < ANGULAR_STEPS; i++)
 	{
-		float angle_step = (float)(i + 1);
+		float angle_step = static_cast<float>(i + 1);
 
 		for (unsigned int j = 0; j < SINCOS_STEPS; j++)
 		{
@@ -96,13 +96,13 @@ void prepare_angular_tables()
  * @brief Compute the angular alignment factors and offsets.
  *
  * @param      weight_count              The number of (decimated) weights.
- * @param      dec_weight_quant_uvalue   The decimated and quantized weight values.
+ * @param      dec_weight_ideal_value    The ideal decimated unquantized weight values.
  * @param      max_angular_steps         The maximum number of steps to be tested.
  * @param[out] offsets                   The output angular offsets array.
  */
 static void compute_angular_offsets(
 	unsigned int weight_count,
-	const float* dec_weight_quant_uvalue,
+	const float* dec_weight_ideal_value,
 	unsigned int max_angular_steps,
 	float* offsets
 ) {
@@ -115,7 +115,7 @@ static void compute_angular_offsets(
 	for (unsigned int i = 0; i < weight_count; i += ASTCENC_SIMD_WIDTH)
 	{
 		// Add 2^23 and interpreting bits extracts round-to-nearest int
-		vfloat sample = loada(dec_weight_quant_uvalue + i) * (SINCOS_STEPS - 1.0f) + vfloat(12582912.0f);
+		vfloat sample = loada(dec_weight_ideal_value + i) * (SINCOS_STEPS - 1.0f) + vfloat(12582912.0f);
 		vint isample = float_as_int(sample) & vint((SINCOS_STEPS - 1));
 		storea(isample, isamplev + i);
 	}
@@ -149,7 +149,7 @@ static void compute_angular_offsets(
  * forcing samples that should have had one weight value one step up or down.
  *
  * @param      weight_count              The number of (decimated) weights.
- * @param      dec_weight_quant_uvalue   The decimated and quantized weight values.
+ * @param      dec_weight_ideal_value    The ideal decimated unquantized weight values.
  * @param      max_angular_steps         The maximum number of steps to be tested.
  * @param      max_quant_steps           The maximum quantization level to be tested.
  * @param      offsets                   The angular offsets array.
@@ -161,7 +161,7 @@ static void compute_angular_offsets(
  */
 static void compute_lowest_and_highest_weight(
 	unsigned int weight_count,
-	const float* dec_weight_quant_uvalue,
+	const float* dec_weight_ideal_value,
 	unsigned int max_angular_steps,
 	unsigned int max_quant_steps,
 	const float* offsets,
@@ -188,7 +188,7 @@ static void compute_lowest_and_highest_weight(
 
 		for (unsigned int j = 0; j < weight_count; ++j)
 		{
-			vfloat sval = load1(&dec_weight_quant_uvalue[j]) * rcp_stepsize - offset;
+			vfloat sval = load1(&dec_weight_ideal_value[j]) * rcp_stepsize - offset;
 			vfloat svalrte = round(sval);
 			vfloat diff = sval - svalrte;
 			errval += diff * diff;
@@ -237,14 +237,14 @@ static void compute_lowest_and_highest_weight(
  * @brief The main function for the angular algorithm.
  *
  * @param      weight_count              The number of (decimated) weights.
- * @param      dec_weight_quant_uvalue   The decimated and quantized weight value.
+ * @param      dec_weight_ideal_value    The ideal decimated unquantized weight values.
  * @param      max_quant_level           The maximum quantization level to be tested.
  * @param[out] low_value                 Per angular step, the lowest weight value.
  * @param[out] high_value                Per angular step, the highest weight value.
  */
 static void compute_angular_endpoints_for_quant_levels(
 	unsigned int weight_count,
-	const float* dec_weight_quant_uvalue,
+	const float* dec_weight_ideal_value,
 	unsigned int max_quant_level,
 	float low_value[12],
 	float high_value[12]
@@ -253,7 +253,7 @@ static void compute_angular_endpoints_for_quant_levels(
 
 	alignas(ASTCENC_VECALIGN) float angular_offsets[ANGULAR_STEPS];
 	unsigned int max_angular_steps = max_angular_steps_needed_for_quant_level[max_quant_level];
-	compute_angular_offsets(weight_count, dec_weight_quant_uvalue,
+	compute_angular_offsets(weight_count, dec_weight_ideal_value,
 	                        max_angular_steps, angular_offsets);
 
 	alignas(ASTCENC_VECALIGN) int32_t lowest_weight[ANGULAR_STEPS];
@@ -262,7 +262,7 @@ static void compute_angular_endpoints_for_quant_levels(
 	alignas(ASTCENC_VECALIGN) float cut_low_weight_error[ANGULAR_STEPS];
 	alignas(ASTCENC_VECALIGN) float cut_high_weight_error[ANGULAR_STEPS];
 
-	compute_lowest_and_highest_weight(weight_count, dec_weight_quant_uvalue,
+	compute_lowest_and_highest_weight(weight_count, dec_weight_ideal_value,
 	                                  max_angular_steps, max_quant_steps,
 	                                  angular_offsets, lowest_weight, weight_span, error,
 	                                  cut_low_weight_error, cut_high_weight_error);
@@ -285,31 +285,34 @@ static void compute_angular_endpoints_for_quant_levels(
 	promise(max_angular_steps > 0);
 	for (unsigned int i = 0; i < max_angular_steps; i++)
 	{
+		float i_flt = static_cast<float>(i);
+
 		int idx_span = weight_span[i];
+
 		float error_cut_low = error[i] + cut_low_weight_error[i];
 		float error_cut_high = error[i] + cut_high_weight_error[i];
 		float error_cut_low_high = error[i] + cut_low_weight_error[i] + cut_high_weight_error[i];
 
 		// Check best error against record N
 		vfloat4 best_result = best_results[idx_span];
-		vfloat4 new_result = vfloat4(error[i], (float)i, 0.0f, 0.0f);
+		vfloat4 new_result = vfloat4(error[i], i_flt, 0.0f, 0.0f);
 		vmask4 mask1(best_result.lane<0>() > error[i]);
 		best_results[idx_span] = select(best_result, new_result, mask1);
 
 		// Check best error against record N-1 with either cut low or cut high
 		best_result = best_results[idx_span - 1];
 
-		new_result = vfloat4(error_cut_low, (float)i, 1.0f, 0.0f);
+		new_result = vfloat4(error_cut_low, i_flt, 1.0f, 0.0f);
 		vmask4 mask2(best_result.lane<0>() > error_cut_low);
 		best_result = select(best_result, new_result, mask2);
 
-		new_result = vfloat4(error_cut_high, (float)i, 0.0f, 0.0f);
+		new_result = vfloat4(error_cut_high, i_flt, 0.0f, 0.0f);
 		vmask4 mask3(best_result.lane<0>() > error_cut_high);
 		best_results[idx_span - 1] = select(best_result, new_result, mask3);
 
 		// Check best error against record N-2 with both cut low and high
 		best_result = best_results[idx_span - 2];
-		new_result = vfloat4(error_cut_low_high, (float)i, 1.0f, 0.0f);
+		new_result = vfloat4(error_cut_low_high, i_flt, 1.0f, 0.0f);
 		vmask4 mask4(best_result.lane<0>() > error_cut_low_high);
 		best_results[idx_span - 2] = select(best_result, new_result, mask4);
 	}
@@ -317,7 +320,7 @@ static void compute_angular_endpoints_for_quant_levels(
 	for (unsigned int i = 0; i <= max_quant_level; i++)
 	{
 		unsigned int q = quantization_steps_for_level[i];
-		int bsi = (int)best_results[q].lane<1>();
+		int bsi = static_cast<int>(best_results[q].lane<1>());
 
 		// Did we find anything?
 #if defined(ASTCENC_DIAGNOSTICS)
@@ -330,8 +333,8 @@ static void compute_angular_endpoints_for_quant_levels(
 
 		bsi = astc::max(0, bsi);
 
-		float stepsize = 1.0f / (1.0f + (float)bsi);
-		int lwi = lowest_weight[bsi] + (int)best_results[q].lane<2>();
+		float stepsize = 1.0f / (1.0f + static_cast<float>(bsi));
+		int lwi = lowest_weight[bsi] + static_cast<int>(best_results[q].lane<2>());
 		int hwi = lwi + q - 1;
 
 		float offset = angular_offsets[bsi] * stepsize;
@@ -416,14 +419,14 @@ static void compute_lowest_and_highest_weight_lwc(
  * @brief The main function for the angular algorithm, variant for low weight count.
  *
  * @param      weight_count              The number of (decimated) weights.
- * @param      dec_weight_quant_uvalue   The decimated and quantized weight value.
+ * @param      dec_weight_ideal_value    The ideal decimated unquantized weight values.
  * @param      max_quant_level           The maximum quantization level to be tested.
  * @param[out] low_value                 Per angular step, the lowest weight value.
  * @param[out] high_value                Per angular step, the highest weight value.
  */
 static void compute_angular_endpoints_for_quant_levels_lwc(
 	unsigned int weight_count,
-	const float* dec_weight_quant_uvalue,
+	const float* dec_weight_ideal_value,
 	unsigned int max_quant_level,
 	float low_value[12],
 	float high_value[12]
@@ -436,26 +439,24 @@ static void compute_angular_endpoints_for_quant_levels_lwc(
 	alignas(ASTCENC_VECALIGN) int32_t weight_span[ANGULAR_STEPS];
 	alignas(ASTCENC_VECALIGN) float error[ANGULAR_STEPS];
 
-	compute_angular_offsets(weight_count, dec_weight_quant_uvalue,
+	compute_angular_offsets(weight_count, dec_weight_ideal_value,
 	                        max_angular_steps, angular_offsets);
 
 
-	compute_lowest_and_highest_weight_lwc(weight_count, dec_weight_quant_uvalue,
+	compute_lowest_and_highest_weight_lwc(weight_count, dec_weight_ideal_value,
 	                                      max_angular_steps, max_quant_steps,
 	                                      angular_offsets, lowest_weight, weight_span, error);
 
 	// For each quantization level, find the best error terms. Use packed vectors so data-dependent
 	// branches can become selects. This involves some integer to float casts, but the values are
 	// small enough so they never round the wrong way.
-	float best_error[ANGULAR_STEPS];
-	int best_index[ANGULAR_STEPS];
+	vfloat4 best_results[ANGULAR_STEPS];
 
 	// Initialize the array to some safe defaults
 	promise(max_quant_steps > 0);
 	for (unsigned int i = 0; i < (max_quant_steps + 4); i++)
 	{
-		best_error[i] = ERROR_CALC_DEFAULT;
-		best_index[i] = -1;
+		best_results[i] = vfloat4(ERROR_CALC_DEFAULT, -1.0f, 0.0f, 0.0f);
 	}
 
 	promise(max_angular_steps > 0);
@@ -464,18 +465,16 @@ static void compute_angular_endpoints_for_quant_levels_lwc(
 		int idx_span = weight_span[i];
 
 		// Check best error against record N
-		float current_best = best_error[idx_span];
-		if (error[i] < current_best)
-		{
-			best_error[idx_span] = error[i];
-			best_index[idx_span] = i;
-		}
+		vfloat4 current_best = best_results[idx_span];
+		vfloat4 candidate = vfloat4(error[i], static_cast<float>(i), 0.0f, 0.0f);
+		vmask4 mask(current_best.lane<0>() > error[i]);
+		best_results[idx_span] = select(current_best, candidate, mask);
 	}
 
 	for (unsigned int i = 0; i <= max_quant_level; i++)
 	{
 		unsigned int q = quantization_steps_for_level[i];
-		int bsi = best_index[q];
+		int bsi = static_cast<int>(best_results[q].lane<1>());
 
 		// Did we find anything?
 #if defined(ASTCENC_DIAGNOSTICS)
@@ -491,8 +490,8 @@ static void compute_angular_endpoints_for_quant_levels_lwc(
 		int lwi = lowest_weight[bsi];
 		int hwi = lwi + q - 1;
 
-		low_value[i]  = (angular_offsets[bsi] + static_cast<float>(lwi)) / (1.0f + (float)bsi);
-		high_value[i] = (angular_offsets[bsi] + static_cast<float>(hwi)) / (1.0f + (float)bsi);
+		low_value[i]  = (angular_offsets[bsi] + static_cast<float>(lwi)) / (1.0f + static_cast<float>(bsi));
+		high_value[i] = (angular_offsets[bsi] + static_cast<float>(hwi)) / (1.0f + static_cast<float>(bsi));
 	}
 }
 
@@ -501,7 +500,7 @@ void compute_angular_endpoints_1plane(
 	unsigned int tune_low_weight_limit,
 	bool only_always,
 	const block_size_descriptor& bsd,
-	const float* dec_weight_quant_uvalue,
+	const float* dec_weight_ideal_value,
 	compression_working_buffers& tmpbuf
 ) {
 	float (&low_value)[WEIGHTS_MAX_BLOCK_MODES] = tmpbuf.weight_low_value1;
@@ -510,13 +509,13 @@ void compute_angular_endpoints_1plane(
 	float (&low_values)[WEIGHTS_MAX_DECIMATION_MODES][12] = tmpbuf.weight_low_values1;
 	float (&high_values)[WEIGHTS_MAX_DECIMATION_MODES][12] = tmpbuf.weight_high_values1;
 
-	unsigned int max_decimation_modes = only_always ? bsd.always_decimation_mode_count
-	                                                : bsd.decimation_mode_count;
+	unsigned int max_decimation_modes = only_always ? bsd.decimation_mode_count_always
+	                                                : bsd.decimation_mode_count_selected;
 	promise(max_decimation_modes > 0);
 	for (unsigned int i = 0; i < max_decimation_modes; i++)
 	{
 		const decimation_mode& dm = bsd.decimation_modes[i];
-		if (dm.maxprec_1plane < 0 || !dm.percentile_hit)
+		if (!dm.ref_1_plane)
 		{
 			continue;
 		}
@@ -527,28 +526,25 @@ void compute_angular_endpoints_1plane(
 		{
 			compute_angular_endpoints_for_quant_levels_lwc(
 				weight_count,
-				dec_weight_quant_uvalue + i * BLOCK_MAX_WEIGHTS,
+				dec_weight_ideal_value + i * BLOCK_MAX_WEIGHTS,
 				dm.maxprec_1plane, low_values[i], high_values[i]);
 		}
 		else
 		{
 			compute_angular_endpoints_for_quant_levels(
 				weight_count,
-				dec_weight_quant_uvalue + i * BLOCK_MAX_WEIGHTS,
+				dec_weight_ideal_value + i * BLOCK_MAX_WEIGHTS,
 				dm.maxprec_1plane, low_values[i], high_values[i]);
 		}
 	}
 
-	unsigned int max_block_modes = only_always ? bsd.always_block_mode_count
-	                                           : bsd.block_mode_count;
+	unsigned int max_block_modes = only_always ? bsd.block_mode_count_1plane_always
+	                                           : bsd.block_mode_count_1plane_selected;
 	promise(max_block_modes > 0);
 	for (unsigned int i = 0; i < max_block_modes; ++i)
 	{
 		const block_mode& bm = bsd.block_modes[i];
-		if (bm.is_dual_plane || !bm.percentile_hit)
-		{
-			continue;
-		}
+		assert(!bm.is_dual_plane);
 
 		unsigned int quant_mode = bm.quant_mode;
 		unsigned int decim_mode = bm.decimation_mode;
@@ -562,7 +558,7 @@ void compute_angular_endpoints_1plane(
 void compute_angular_endpoints_2planes(
 	unsigned int tune_low_weight_limit,
 	const block_size_descriptor& bsd,
-	const float* dec_weight_quant_uvalue,
+	const float* dec_weight_ideal_value,
 	compression_working_buffers& tmpbuf
 ) {
 	float (&low_value1)[WEIGHTS_MAX_BLOCK_MODES] = tmpbuf.weight_low_value1;
@@ -575,11 +571,11 @@ void compute_angular_endpoints_2planes(
 	float (&low_values2)[WEIGHTS_MAX_DECIMATION_MODES][12] = tmpbuf.weight_low_values2;
 	float (&high_values2)[WEIGHTS_MAX_DECIMATION_MODES][12] = tmpbuf.weight_high_values2;
 
-	promise(bsd.decimation_mode_count > 0);
-	for (unsigned int i = 0; i < bsd.decimation_mode_count; i++)
+	promise(bsd.decimation_mode_count_selected > 0);
+	for (unsigned int i = 0; i < bsd.decimation_mode_count_selected; i++)
 	{
 		const decimation_mode& dm = bsd.decimation_modes[i];
-		if (dm.maxprec_2planes < 0 || !dm.percentile_hit)
+		if (!dm.ref_2_planes)
 		{
 			continue;
 		}
@@ -590,37 +586,33 @@ void compute_angular_endpoints_2planes(
 		{
 			compute_angular_endpoints_for_quant_levels_lwc(
 				weight_count,
-				dec_weight_quant_uvalue + i * BLOCK_MAX_WEIGHTS,
+				dec_weight_ideal_value + i * BLOCK_MAX_WEIGHTS,
 				dm.maxprec_2planes, low_values1[i], high_values1[i]);
 
 			compute_angular_endpoints_for_quant_levels_lwc(
 				weight_count,
-				dec_weight_quant_uvalue + i * BLOCK_MAX_WEIGHTS + WEIGHTS_PLANE2_OFFSET,
+				dec_weight_ideal_value + i * BLOCK_MAX_WEIGHTS + WEIGHTS_PLANE2_OFFSET,
 				dm.maxprec_2planes, low_values2[i], high_values2[i]);
 		}
 		else
 		{
 			compute_angular_endpoints_for_quant_levels(
 				weight_count,
-				dec_weight_quant_uvalue + i * BLOCK_MAX_WEIGHTS,
+				dec_weight_ideal_value + i * BLOCK_MAX_WEIGHTS,
 				dm.maxprec_2planes, low_values1[i], high_values1[i]);
 
 			compute_angular_endpoints_for_quant_levels(
 				weight_count,
-				dec_weight_quant_uvalue + i * BLOCK_MAX_WEIGHTS + WEIGHTS_PLANE2_OFFSET,
+				dec_weight_ideal_value + i * BLOCK_MAX_WEIGHTS + WEIGHTS_PLANE2_OFFSET,
 				dm.maxprec_2planes, low_values2[i], high_values2[i]);
 		}
 	}
 
-	promise(bsd.block_mode_count > 0);
-	for (unsigned int i = 0; i < bsd.block_mode_count; ++i)
+	unsigned int start = bsd.block_mode_count_1plane_selected;
+	unsigned int end = bsd.block_mode_count_1plane_2plane_selected;
+	for (unsigned int i = start; i < end; i++)
 	{
 		const block_mode& bm = bsd.block_modes[i];
-		if (!bm.is_dual_plane || !bm.percentile_hit)
-		{
-			continue;
-		}
-
 		unsigned int quant_mode = bm.quant_mode;
 		unsigned int decim_mode = bm.decimation_mode;
 

From 768d9439cd15f3cd835f4bf97a0e75a655ab3702 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 30 Apr 2022 22:18:32 -0700
Subject: [PATCH 294/901] kramv - fix the size of the scroll view, and stop
 hiding the hud

Can now see the hud response for actions, and load/save failure messages.
Suppress the eyedropper text during next/prevItem since the eyedropper text is stale and overlaps.
---
 kramv/Base.lproj/Main.storyboard | 10 ++--
 kramv/KramViewerMain.mm          | 79 ++++++++++++++++++--------------
 2 files changed, 50 insertions(+), 39 deletions(-)

diff --git a/kramv/Base.lproj/Main.storyboard b/kramv/Base.lproj/Main.storyboard
index fa4d3926..1d6caae7 100644
--- a/kramv/Base.lproj/Main.storyboard
+++ b/kramv/Base.lproj/Main.storyboard
@@ -174,15 +174,15 @@
                         <rect key="frame" x="0.0" y="0.0" width="800" height="600"/>
                         <autoresizingMask key="autoresizingMask"/>
                         <subviews>
-                            <scrollView fixedFrame="YES" borderType="none" autohidesScrollers="YES" horizontalLineScroll="24" horizontalPageScroll="10" verticalLineScroll="24" verticalPageScroll="10" usesPredominantAxisScrolling="NO" translatesAutoresizingMaskIntoConstraints="NO" id="CPB-x5-bmZ" userLabel="FilesScrollView">
-                                <rect key="frame" x="20" y="20" width="207" height="413"/>
-                                <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" heightSizable="YES" flexibleMaxY="YES"/>
+                            <scrollView borderType="none" autohidesScrollers="YES" horizontalLineScroll="24" horizontalPageScroll="10" verticalLineScroll="24" verticalPageScroll="10" usesPredominantAxisScrolling="NO" translatesAutoresizingMaskIntoConstraints="NO" id="CPB-x5-bmZ" userLabel="FilesScrollView">
+                                <rect key="frame" x="20" y="20" width="207" height="400"/>
+                                <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" heightSizable="YES"/>
                                 <clipView key="contentView" drawsBackground="NO" id="R7E-tN-iH2">
-                                    <rect key="frame" x="0.0" y="0.0" width="207" height="413"/>
+                                    <rect key="frame" x="0.0" y="0.0" width="207" height="400"/>
                                     <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
                                     <subviews>
                                         <tableView verticalHuggingPriority="750" allowsExpansionToolTips="YES" columnAutoresizingStyle="lastColumnOnly" multipleSelection="NO" emptySelection="NO" autosaveColumns="NO" rowHeight="24" rowSizeStyle="automatic" viewBased="YES" id="Ydb-sa-YEf" userLabel="FilesTableView">
-                                            <rect key="frame" x="0.0" y="0.0" width="207" height="413"/>
+                                            <rect key="frame" x="0.0" y="0.0" width="207" height="400"/>
                                             <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
                                             <size key="intercellSpacing" width="17" height="0.0"/>
                                             <color key="backgroundColor" white="0.0" alpha="0.0" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index b372e5b6..522d0424 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -39,6 +39,19 @@
 using namespace kram;
 using namespace NAMESPACE_STL;
 
+bool isSupportedModelFilename(const char* filename) {
+#if USE_GLTF
+    return endsWithExtension(filename, ".gltf") ||
+           endsWithExtension(filename, ".glb");
+#else
+    return false;
+#endif
+}
+
+bool isSupportedArchiveFilename(const char* filename) {
+    return endsWithExtension(filename, ".zip");
+}
+
 struct MouseData
 {
     NSPoint originPoint;
@@ -533,6 +546,13 @@ - (void)awakeFromNib
 {
     [super awakeFromNib];
 
+    // vertical offset of table down so hud can display info
+    NSScrollView* scrollView = [_tableView enclosingScrollView];
+    CGRect rect = scrollView.frame;
+    rect.origin.y += 50;
+    scrollView.frame = rect;
+    
+
     // TODO: see if can only open this
     // NSLog(@"AwakeFromNIB");
 }
@@ -597,8 +617,6 @@ - (instancetype)initWithCoder:(NSCoder *)coder
     _hudLabel = [self _addHud:NO];
     [self setHudText:""];
     
-   
-    
     return self;
 }
 
@@ -2030,6 +2048,10 @@ - (void)updateHudVisibility {
     _hudLabel2.hidden = _hudHidden || !_showSettings->isHudShown;
 }
 
+- (void)clearHud {
+   
+}
+
 - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyDown
 {
     // Some data depends on the texture data (isSigned, isNormal, ..)
@@ -2389,8 +2411,9 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
             
             if (_showSettings->isArchive) {
                 if ([self advanceFileFromAchive:!isShiftKeyDown]) {
-                    _hudHidden = true;
-                    [self updateHudVisibility];
+                    //_hudHidden = true;
+                    //[self updateHudVisibility];
+                    [self setEyedropperText:""];
                     
                     isChanged = true;
                     text = "Loaded " + _showSettings->lastFilename;
@@ -2398,9 +2421,10 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
             }
             else if (_showSettings->isFolder) {
                 if ([self advanceFileFromFolder:!isShiftKeyDown]) {
-                    _hudHidden = true;
-                    [self updateHudVisibility];
-                    
+                    //_hudHidden = true;
+                    //[self updateHudVisibility];
+                    [self setEyedropperText:""];
+                   
                     isChanged = true;
                     text = "Loaded " + _showSettings->lastFilename;
                 }
@@ -2677,8 +2701,8 @@ - (BOOL)advanceFileFromAchive:(BOOL)increment
     [self showFileTable];
     
     // also have to hide hud or it will obscure the visible table
-    _hudHidden = true;
-    [self updateHudVisibility];
+    //_hudHidden = true;
+    //[self updateHudVisibility];
     
     return [self loadFileFromArchive];
 }
@@ -2708,8 +2732,8 @@ - (BOOL)advanceFileFromFolder:(BOOL)increment
     // show the files table
     [self showFileTable];
     
-    _hudHidden = true;
-    [self updateHudVisibility];
+    //_hudHidden = true;
+    //[self updateHudVisibility];
     
     return [self loadFileFromFolder];
 }
@@ -2795,9 +2819,7 @@ - (BOOL)loadFileFromFolder
     string fullFilename = filename;
     auto timestamp = FileHelper::modificationTimestamp(filename);
     
-    bool isModel =
-        endsWithExtension(filename, ".gltf") ||
-        endsWithExtension(filename, ".gtb");
+    bool isModel = isSupportedModelFilename(filename);
     if (isModel)
         return [self loadModelFile:nil filename:filename];
     
@@ -2901,9 +2923,7 @@ - (BOOL)loadFileFromArchive
     string fullFilename = filename;
     double timestamp = (double)entry.modificationDate;
 
-    bool isModel =
-        endsWithExtension(filename, ".gltf") ||
-        endsWithExtension(filename, ".gtb");
+    bool isModel = isSupportedModelFilename(filename);
     if (isModel)
         return [self loadModelFile:nil filename:filename];
     
@@ -3059,9 +3079,8 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
             while (NSURL* fileOrDirectoryURL = [directoryEnumerator nextObject]) {
                 const char* name = fileOrDirectoryURL.fileSystemRepresentation;
 
-                bool isGLTF = endsWithExtension(name, ".gltf");
-                bool isGLB = endsWithExtension(name, ".glb");
-                if (isGLTF || isGLB)
+                bool isModel = isSupportedModelFilename(filename);
+                if (isModel)
                 {
                     files.push_back(name);
                 }
@@ -3201,16 +3220,9 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
     }
 
     // file is not a supported extension
-    if (!(
-          // archive
-          endsWithExtension(filename, ".zip") ||
-          
-          // images
+    if (!(isSupportedArchiveFilename(filename) ||
           isSupportedFilename(filename) ||
-          
-          // models
-          endsWithExtension(filename, ".gltf") ||
-          endsWithExtension(filename, ".glb")
+          isSupportedModelFilename(filename)
         ))
     {
         string errorText =
@@ -3225,8 +3237,7 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
         return NO;
     }
 
-    if (endsWithExtension(filename, ".gltf") ||
-        endsWithExtension(filename, ".glb"))
+    if (isSupportedModelFilename(filename))
     {
         return [self loadModelFile:url filename:nullptr];
     }
@@ -3237,7 +3248,7 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
     
     //-------------------
 
-    if (endsWithExtension(filename, ".zip")) {
+    if (isSupportedArchiveFilename(filename)) {
         auto archiveTimestamp = FileHelper::modificationTimestamp(filename);
 
         if (!self.imageURL || (!([self.imageURL isEqualTo:url])) ||
@@ -3326,7 +3337,7 @@ -(BOOL)loadModelFile:(NSURL*)url filename:(const char*)filename
     // save out a scene with all of them in a single scene.  But that should
     // probably reference original content in case it's updated.
     
-    Renderer *renderer = (Renderer *)self.delegate;
+    Renderer* renderer = (Renderer *)self.delegate;
     [renderer releaseAllPendingTextures];
     
     setErrorLogCapture(true);
@@ -3599,7 +3610,7 @@ - (void)viewDidLoad
     [_view addNotifications];
     
     [_view setupUI];
-    
+
     // original sample code was sending down _view.bounds.size, but need
     // drawableSize this was causing all sorts of inconsistencies
     [_renderer mtkView:_view drawableSizeWillChange:_view.drawableSize];

From ad8a608a14c999322e7072fcc44af22425aa94ad Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 30 Apr 2022 23:12:01 -0700
Subject: [PATCH 295/901] kramv - more hud reduction

---
 kramv/KramViewerMain.mm | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 522d0424..9db27f37 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -2048,10 +2048,6 @@ - (void)updateHudVisibility {
     _hudLabel2.hidden = _hudHidden || !_showSettings->isHudShown;
 }
 
-- (void)clearHud {
-   
-}
-
 - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyDown
 {
     // Some data depends on the texture data (isSigned, isNormal, ..)
@@ -2703,6 +2699,7 @@ - (BOOL)advanceFileFromAchive:(BOOL)increment
     // also have to hide hud or it will obscure the visible table
     //_hudHidden = true;
     //[self updateHudVisibility];
+    [self setEyedropperText:""];
     
     return [self loadFileFromArchive];
 }
@@ -2734,6 +2731,7 @@ - (BOOL)advanceFileFromFolder:(BOOL)increment
     
     //_hudHidden = true;
     //[self updateHudVisibility];
+    [self setEyedropperText:""];
     
     return [self loadFileFromFolder];
 }

From b3a62fcfde9d78fb09e0d284c95a802534a6d572 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 1 May 2022 09:25:25 -0700
Subject: [PATCH 296/901] kram - reorder hud vs. eydropper text, hide
 eyedropper text when files view is up

---
 kramv/KramViewerMain.mm | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 9db27f37..e7f0d9c2 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -1668,16 +1668,22 @@ - (void)showEyedropperData:(float2)uv
     // TODO: Stuff these on clipboard with a click, or use cmd+C?
 }
 
+enum TextSlot
+{
+    kTextSlotHud,
+    kTextSlotEyedropper
+};
+
 - (void)setEyedropperText:(const char *)text
 {
-    _textSlots[0] = text;
+    _textSlots[kTextSlotEyedropper] = text;
 
     [self updateHudText];
 }
 
 - (void)setHudText:(const char *)text
 {
-    _textSlots[1] = text;
+    _textSlots[kTextSlotHud] = text;
 
     [self updateHudText];
 }
@@ -1685,7 +1691,11 @@ - (void)setHudText:(const char *)text
 - (void)updateHudText
 {
     // combine textSlots
-    string text = _textSlots[0] + _textSlots[1];
+    string text = _textSlots[kTextSlotHud];
+    
+    // don't show eyedropper text with table up, it's many lines and overlaps
+    if (!_tableView.hidden)
+        text += _textSlots[kTextSlotEyedropper];
 
     NSString *textNS = [NSString stringWithUTF8String:text.c_str()];
     _hudLabel2.stringValue = textNS;

From 131aeeb54b1aa1cd291b448e94cad3afd1c84269 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 1 May 2022 23:38:09 -0700
Subject: [PATCH 297/901] kramv - fix newlines on hud

---
 kramv/KramViewerBase.cpp |  3 ++-
 kramv/KramViewerMain.mm  | 10 ++++++++--
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index e283be66..bd8b2935 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -7,7 +7,8 @@ using namespace NAMESPACE_STL;
 int32_t ShowSettings::totalChunks() const
 {
     int32_t one = 1;
-    return std::max(one, faceCount) * std::max(one, arrayCount) *
+    return std::max(one, faceCount) *
+           std::max(one, arrayCount) *
            std::max(one, sliceCount);
 }
 
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index e7f0d9c2..3380edd8 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -1692,7 +1692,9 @@ - (void)updateHudText
 {
     // combine textSlots
     string text = _textSlots[kTextSlotHud];
-    
+    if (!text.empty() && text.back() != '\n')
+        text += "\n";
+        
     // don't show eyedropper text with table up, it's many lines and overlaps
     if (!_tableView.hidden)
         text += _textSlots[kTextSlotEyedropper];
@@ -3234,7 +3236,11 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
         ))
     {
         string errorText =
-            "Unsupported file extension, must be .zip, .png, .ktx, .ktx2, .dds, .gltf, .glb\n";
+            "Unsupported file extension, must be .zip"
+#if USE_GLTF
+            ", .gltf, .glb"
+#endif
+            ", .png, .ktx, .ktx2, .dds\n";
 
         string finalErrorText;
         append_sprintf(finalErrorText, "Could not load from file:\n %s\n",

From e7c72d9b325dc9e1983b337dd02452e121ae1eea Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 6 May 2022 19:11:22 -0700
Subject: [PATCH 298/901] kramv - add uvPreview feature.

This has no UI to enable it, but can unfold the shape UI back the textures they are using.  Overlays the lines of the triangle onto them.
Added concept of passes, since would like to separate debug overlay from texture render.  Could use a lower bit-depth drawable then.
---
 kramv/KramRenderer.mm           | 67 +++++++++++++++++++++++++++-
 kramv/KramViewerBase.h          |  3 ++
 kramv/Shaders/KramShaders.h     | 15 +++++--
 kramv/Shaders/KramShaders.metal | 78 +++++++++++++++++++++++++--------
 4 files changed, 140 insertions(+), 23 deletions(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 260cbf6a..75a692c6 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -1545,6 +1545,30 @@ - (void)_updateGameState
     float4x4 panTransform =
         matrix4x4_translation(-_showSettings->panX, _showSettings->panY, 0.0);
 
+    // interpolate this, also need to draw wireframe
+    // this is an animated effect, that overlays the shape uv wires over the image
+    // but it needs to set needsDisplay until animation finishes
+    static float delta = 1.0 / 60.0;
+    
+    // hack to see uvPreview
+    //_showSettings->isUVPreview = true;
+    
+    if (_showSettings->is3DView && _showSettings->isUVPreview) {
+        uniforms.uvPreview += delta;
+        
+        if (uniforms.uvPreview > 1.0) {
+            delta = -1.0 / 60.0;
+            uniforms.uvPreview = 1.0;
+        }
+        else if (uniforms.uvPreview < 0.0) {
+            delta = 1.0 / 60.0;
+            uniforms.uvPreview = 0.0;
+        }
+    }
+    else {
+        uniforms.uvPreview = 0.0;
+    }
+    
     // scale
     float zoom = _showSettings->zoom;
 
@@ -1945,7 +1969,8 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
 
             UniformsLevel uniformsLevel;
             uniformsLevel.drawOffset = float2m(0.0f);
-
+            uniformsLevel.passNumber = kPassDefault;
+            
             if (_showSettings->isPreview) {
                 // upload this on each face drawn, since want to be able to draw all
                 // mips/levels at once
@@ -2068,6 +2093,7 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
                 // mips on on screen faces and arrays and slices go across in a row, and
                 // mips are displayed down from each of those in a column
 
+                
                 for (MTKSubmesh* submesh in _mesh.submeshes) {
                     [renderEncoder drawIndexedPrimitives:submesh.primitiveType
                                               indexCount:submesh.indexCount
@@ -2075,6 +2101,45 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
                                              indexBuffer:submesh.indexBuffer.buffer
                                        indexBufferOffset:submesh.indexBuffer.offset];
                 }
+                
+                // Draw uv wire overlay
+                if (_showSettings->isUVPreview) {
+                    // need to force color in shader or it's still sampling texture
+                    // also need to add z offset
+                    
+                    [renderEncoder setTriangleFillMode:MTLTriangleFillModeLines];
+                    
+                    // only applies to tris, not points/lines, pushes depth away (towards 0), after clip
+                    // affects reads/tests and writes.  Could also add in vertex shader.
+                    // depthBias * 2^(exp(max abs(z) in primitive) - r) + slopeScale * maxSlope
+                    [renderEncoder setDepthBias:0.015 slopeScale:3.0 clamp: 0.02];
+                    
+                    uniformsLevel.passNumber = kPassUVPreview;
+                    
+                    [renderEncoder setVertexBytes:&uniformsLevel
+                                           length:sizeof(uniformsLevel)
+                                          atIndex:BufferIndexUniformsLevel];
+
+                    [renderEncoder setFragmentBytes:&uniformsLevel
+                                             length:sizeof(uniformsLevel)
+                                            atIndex:BufferIndexUniformsLevel];
+
+                    for (MTKSubmesh* submesh in _mesh.submeshes) {
+                        [renderEncoder drawIndexedPrimitives:submesh.primitiveType
+                                                  indexCount:submesh.indexCount
+                                                   indexType:submesh.indexType
+                                                 indexBuffer:submesh.indexBuffer.buffer
+                                           indexBufferOffset:submesh.indexBuffer.offset];
+                    }
+                    
+                    uniformsLevel.passNumber = kPassDefault;
+                    
+                    // restore state, even though this isn't a true state shadow
+                    [renderEncoder setDepthBias:0.0 slopeScale:0.0 clamp:0.0];
+                    
+                    [renderEncoder setTriangleFillMode:MTLTriangleFillModeFill];
+                    
+                }
             }
         }
 
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index a235ba93..9afe084c 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -131,6 +131,9 @@ class ShowSettings {
     // this mode shows the content with lighting or with bilinear/mips active
     bool isPreview = false;
 
+    // Can collapse 3d to 2d and overlay the uv
+    bool isUVPreview = false;
+    
     // the 2d view doesn't want to inset pixels for clamp, or point sampling is
     // thrown off expecially on small 4x4 textures
 #if USE_PERSPECTIVE
diff --git a/kramv/Shaders/KramShaders.h b/kramv/Shaders/KramShaders.h
index 39e90a6e..6e337c20 100644
--- a/kramv/Shaders/KramShaders.h
+++ b/kramv/Shaders/KramShaders.h
@@ -113,7 +113,8 @@ struct Uniforms {
     simd::float4x4 modelMatrix;
     simd::float4 modelMatrixInvScale2;  // to supply inverse, w is determinant
     simd::float3 cameraPosition;        // world-space
-
+    float uvPreview;
+    
     bool isSigned;
     bool isNormal;
     bool isSwizzleAGToRG;
@@ -122,8 +123,9 @@ struct Uniforms {
     bool isCheckerboardShown;
     bool isWrap;
     bool isSDF;
-    bool isPreview;
-
+    bool isPreview; // render w/lighting, normals, etc
+    bool isUVPreview; // show uv overlay
+    
     bool is3DView;
     bool isNormalMapPreview;  // for isNormal or combined
 
@@ -156,6 +158,12 @@ struct Uniforms {
     ShaderLightingMode lightingMode;
 };
 
+enum PassNumber
+{
+    kPassDefault = 0,
+    kPassUVPreview = 1
+};
+
 // uploaded separately, so multiple mips, faces, array can be drawn to the
 // screen at one time although modelMatrix offset changes.  Could store offset
 // in here.
@@ -165,6 +173,7 @@ struct UniformsLevel {
     uint32_t arrayOrSlice;
     simd::float2 drawOffset;   // pixel offset to apply
     simd::float4 textureSize;  // width, height, 1/width, 1/height
+    uint32_t passNumber; // switch to enum
 };
 
 // This is all tied to a single level sample
diff --git a/kramv/Shaders/KramShaders.metal b/kramv/Shaders/KramShaders.metal
index 16fc81c9..2e32b748 100644
--- a/kramv/Shaders/KramShaders.metal
+++ b/kramv/Shaders/KramShaders.metal
@@ -6,6 +6,8 @@
 
 using namespace metal;
 
+
+    
 //---------------------------------
 // helpers
 
@@ -565,6 +567,25 @@ struct ColorInOut
     half4 tangent;
 };
 
+void doUVPreview(
+    thread float3& position,
+    thread float3& normal,
+    thread float4& tangent,
+    float2 texCoord,
+    float uvPreview
+)
+{
+    // convert [0,1] to [-1,1] plane
+    float3 uv(toSnorm(texCoord), 0.0);
+    uv.y *= -1;
+    uv.xy *= 0.5;  // shrink it
+    position.xyz = mix(position.xyz, uv.xyz, uvPreview);
+  
+    // interpolate norma and tangent too
+    normal = mix(normal.xyz, float3(0,0,1), uvPreview);
+    tangent = mix(tangent, float4(1,0,0,1), uvPreview);
+}
+
 ColorInOut DrawImageFunc(
     Vertex in [[stage_in]],
     constant Uniforms& uniforms,
@@ -573,11 +594,6 @@ ColorInOut DrawImageFunc(
 {
     ColorInOut out;
 
-    float4 position = in.position;
-    //position.xy += uniformsLevel.drawOffset;
-    
-    float4 worldPos = uniforms.modelMatrix * position;
-    
     // deal with full basis
     
     bool needsWorldBasis =
@@ -588,20 +604,34 @@ ColorInOut DrawImageFunc(
         uniforms.shapeChannel == ShaderShapeChannel::ShShapeChannelNormal ||
         uniforms.shapeChannel == ShaderShapeChannel::ShShapeChannelBitangent;
 
+    float4 position = in.position;
+    //position.xy += uniformsLevel.drawOffset;
+    float3 normal = in.normal;
+    float4 tangent = in.tangent;
+    
+    // interpolate position to uv plane coordinates (will flatten the shape
+    if (uniforms.uvPreview > 0.0) {
+        float3 pos = position.xyz;
+        doUVPreview(pos, normal, tangent, in.texCoord, uniforms.uvPreview);
+        position.xyz = pos;
+    }
+    
+    float4 worldPos = uniforms.modelMatrix * position;
+    
     if (needsWorldBasis) {
-        float3 normal = in.normal;
-        float3 tangent = in.tangent.xyz;
-        transformBasis(normal, tangent, uniforms.modelMatrix, uniforms.modelMatrixInvScale2.xyz, uniforms.useTangent);
+        float3 t = tangent.xyz;
+        transformBasis(normal, t, uniforms.modelMatrix, uniforms.modelMatrixInvScale2.xyz, uniforms.useTangent);
+        tangent.xyz = t;
         
         out.normal = toHalf(normal);
         
         // may be invalid if useTangent is false
-        out.tangent.xyz = toHalf(tangent);
-        out.tangent.w = toHalf(in.tangent.w);
+        out.tangent.xyz = toHalf(tangent.xyz);
+        out.tangent.w = toHalf(tangent.w);
     }
     else {
-        out.normal = toHalf(in.normal);
-        out.tangent = toHalf(in.tangent);
+        out.normal = toHalf(normal);
+        out.tangent = toHalf(tangent);
     }
     // try adding pixel offset to pixel values
     worldPos.xy += uniformsLevel.drawOffset;
@@ -889,7 +919,8 @@ float4 DrawPixels(
     constant Uniforms& uniforms,
     float4 c,
     float4 nmap,
-    float2 textureSize
+    float2 textureSize,
+    uint passNumber
 )
 {
     // auto-swizzle BC4 and EAC_R11 to rrr1
@@ -1256,6 +1287,15 @@ float4 DrawPixels(
         }
     }
     
+    // draw grayscale at alpha value
+    if (passNumber == kPassUVPreview) {
+        // always want to draw lines, even in low alpha
+        if (c.a < 0.1)
+            c = float4(0.1);
+        else
+            c = c.a;
+    }
+    
     return c;
 }
 
@@ -1277,7 +1317,7 @@ fragment float4 Draw1DArrayPS(
     // colorMap.get_num_mip_levels();
 
     float4 n = float4(0,0,1,1);
-    return DrawPixels(in, facing, uniforms, c, n, textureSize);
+    return DrawPixels(in, facing, uniforms, c, n, textureSize, uniformsLevel.passNumber);
 }
 
 fragment float4 DrawImagePS(
@@ -1298,7 +1338,7 @@ fragment float4 DrawImagePS(
     float2 textureSize = float2(colorMap.get_width(lod), colorMap.get_height(lod));
     // colorMap.get_num_mip_levels();
 
-    return DrawPixels(in, facing, uniforms, c, n, textureSize);
+    return DrawPixels(in, facing, uniforms, c, n, textureSize, uniformsLevel.passNumber);
 }
 
 fragment float4 DrawImageArrayPS(
@@ -1319,7 +1359,7 @@ fragment float4 DrawImageArrayPS(
     float2 textureSize = float2(colorMap.get_width(lod), colorMap.get_height(lod));
     // colorMap.get_num_mip_levels();
 
-    return DrawPixels(in, facing, uniforms, c, n, textureSize);
+    return DrawPixels(in, facing, uniforms, c, n, textureSize, uniformsLevel.passNumber);
 }
 
 
@@ -1341,7 +1381,7 @@ fragment float4 DrawCubePS(
     // colorMap.get_num_mip_levels();
 
     float4 n = float4(0,0,1,1);
-    return DrawPixels(in, facing, uniforms, c, n, textureSize);
+    return DrawPixels(in, facing, uniforms, c, n, textureSize, uniformsLevel.passNumber);
 }
 
 fragment float4 DrawCubeArrayPS(
@@ -1362,7 +1402,7 @@ fragment float4 DrawCubeArrayPS(
     // colorMap.get_num_mip_levels();
 
     float4 n = float4(0,0,1,1);
-    return DrawPixels(in, facing, uniforms, c, n, textureSize);
+    return DrawPixels(in, facing, uniforms, c, n, textureSize, uniformsLevel.passNumber);
 }
 
 
@@ -1394,7 +1434,7 @@ fragment float4 DrawVolumePS(
     // colorMap.get_num_mip_levels();
 
     float4 n = float4(0,0,1,1);
-    return DrawPixels(in, facing, uniforms, c, n, textureSize);
+    return DrawPixels(in, facing, uniforms, c, n, textureSize, uniformsLevel.passNumber);
 }
 
 //--------------------------------------------------

From 7e3c282cd2f065b8a5e514cb65f5df67019a6031 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 6 May 2022 21:01:10 -0700
Subject: [PATCH 299/901] kramv - more fixes to uvPreview

---
 build2/kramv.xcodeproj/project.pbxproj |  6 +++---
 kramv/KramRenderer.mm                  | 20 ++++++++++++--------
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/build2/kramv.xcodeproj/project.pbxproj b/build2/kramv.xcodeproj/project.pbxproj
index e98743c5..cda84c46 100644
--- a/build2/kramv.xcodeproj/project.pbxproj
+++ b/build2/kramv.xcodeproj/project.pbxproj
@@ -218,12 +218,12 @@
 			isa = PBXGroup;
 			children = (
 				706EF22A26D17A81001C950E /* KramViewerBase.h */,
-				706EF22C26D17A81001C950E /* KramViewerMain.mm */,
 				706EF22F26D17A81001C950E /* KramViewerBase.cpp */,
-				706EF23026D17A81001C950E /* KramLoader.mm */,
+				706EF22C26D17A81001C950E /* KramViewerMain.mm */,
 				706EF23226D17A81001C950E /* KramRenderer.h */,
-				706EF23526D17A81001C950E /* KramLoader.h */,
 				706EF23726D17A81001C950E /* KramRenderer.mm */,
+				706EF23526D17A81001C950E /* KramLoader.h */,
+				706EF23026D17A81001C950E /* KramLoader.mm */,
 				706EF22B26D17A81001C950E /* kramv.entitlements */,
 				706EF23126D17A81001C950E /* Assets.xcassets */,
 				706EF23326D17A81001C950E /* Main.storyboard */,
diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 75a692c6..58c920ee 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -1548,26 +1548,30 @@ - (void)_updateGameState
     // interpolate this, also need to draw wireframe
     // this is an animated effect, that overlays the shape uv wires over the image
     // but it needs to set needsDisplay until animation finishes
+    
+    // TODO: need to reset these when shape changes
     static float delta = 1.0 / 60.0;
+    static float uvPreviewAmount = 0.0;
     
     // hack to see uvPreview
-    //_showSettings->isUVPreview = true;
+    _showSettings->isUVPreview = true;
     
     if (_showSettings->is3DView && _showSettings->isUVPreview) {
-        uniforms.uvPreview += delta;
+        uvPreviewAmount += delta;
         
-        if (uniforms.uvPreview > 1.0) {
+        if (uvPreviewAmount > 1.0) {
             delta = -1.0 / 60.0;
-            uniforms.uvPreview = 1.0;
+            uvPreviewAmount = 1.0;
         }
-        else if (uniforms.uvPreview < 0.0) {
+        else if (uvPreviewAmount < 0.0) {
             delta = 1.0 / 60.0;
-            uniforms.uvPreview = 0.0;
+            uvPreviewAmount = 0.0;
         }
     }
     else {
-        uniforms.uvPreview = 0.0;
+        uvPreviewAmount = 0.0;
     }
+    uniforms.uvPreview = uvPreviewAmount;
     
     // scale
     float zoom = _showSettings->zoom;
@@ -2103,7 +2107,7 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
                 }
                 
                 // Draw uv wire overlay
-                if (_showSettings->isUVPreview) {
+                if (_showSettings->is3DView && _showSettings->isUVPreview) {
                     // need to force color in shader or it's still sampling texture
                     // also need to add z offset
                     

From 57ed9878f1e4a55ad6f66b22a1a580cffa5ce6cf Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 6 May 2022 21:04:22 -0700
Subject: [PATCH 300/901] kramv - disable uvPreview for now

---
 kramv/KramRenderer.mm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 58c920ee..4a54446a 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -1554,7 +1554,7 @@ - (void)_updateGameState
     static float uvPreviewAmount = 0.0;
     
     // hack to see uvPreview
-    _showSettings->isUVPreview = true;
+    //_showSettings->isUVPreview = true;
     
     if (_showSettings->is3DView && _showSettings->isUVPreview) {
         uvPreviewAmount += delta;

From c4b2e4d0be84814880ba7e05ef7271f3e58603d9 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 6 May 2022 22:52:49 -0700
Subject: [PATCH 301/901] kramv - enable uvPreview for shapes of Key::Num6

---
 kramv/KramRenderer.h     |  3 +++
 kramv/KramRenderer.mm    | 52 +++++++++++++++++++---------------------
 kramv/KramViewerBase.cpp | 25 +++++++++++++++++++
 kramv/KramViewerBase.h   |  6 +++++
 kramv/KramViewerMain.mm  | 34 +++++++++++++++++++-------
 5 files changed, 83 insertions(+), 37 deletions(-)

diff --git a/kramv/KramRenderer.h b/kramv/KramRenderer.h
index d2411c8e..517cb2dc 100644
--- a/kramv/KramRenderer.h
+++ b/kramv/KramRenderer.h
@@ -65,6 +65,9 @@ class KTXImage;
 // unload gltf model
 - (void)unloadModel;
 
+// called from view and renderer in render loop
+- (void)updateAnimationState:(MTKView*)view;
+
 // can play animations in gltf models
 @property (nonatomic) BOOL playAnimations;
 
diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 4a54446a..b84f4f4f 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -722,6 +722,23 @@ - (void)releaseAllPendingTextures
     }
 }
 
+- (void)updateAnimationState:(MTKView*)view
+{
+    bool animateDisplay = self.playAnimations;
+    
+    // animate the uvPreviw until it reaches endPoint, no scrubber yet
+    _showSettings->updateUVPreviewState();
+    
+    if (_showSettings->uvPreviewFrames > 0) {
+        _showSettings->uvPreviewFrames--;
+        animateDisplay = true;
+    }
+    
+    view.enableSetNeedsDisplay = !animateDisplay;
+    view.paused = !animateDisplay;
+}
+
+
 - (void)updateModelSettings:(const string &)fullFilename
 {
     _showSettings->isModel = true;
@@ -1545,33 +1562,9 @@ - (void)_updateGameState
     float4x4 panTransform =
         matrix4x4_translation(-_showSettings->panX, _showSettings->panY, 0.0);
 
-    // interpolate this, also need to draw wireframe
     // this is an animated effect, that overlays the shape uv wires over the image
-    // but it needs to set needsDisplay until animation finishes
-    
-    // TODO: need to reset these when shape changes
-    static float delta = 1.0 / 60.0;
-    static float uvPreviewAmount = 0.0;
-    
-    // hack to see uvPreview
-    //_showSettings->isUVPreview = true;
-    
-    if (_showSettings->is3DView && _showSettings->isUVPreview) {
-        uvPreviewAmount += delta;
-        
-        if (uvPreviewAmount > 1.0) {
-            delta = -1.0 / 60.0;
-            uvPreviewAmount = 1.0;
-        }
-        else if (uvPreviewAmount < 0.0) {
-            delta = 1.0 / 60.0;
-            uvPreviewAmount = 0.0;
-        }
-    }
-    else {
-        uvPreviewAmount = 0.0;
-    }
-    uniforms.uvPreview = uvPreviewAmount;
+    uniforms.isUVPreview = _showSettings->uvPreview > 0.0;
+    uniforms.uvPreview = _showSettings->uvPreview;
     
     // scale
     float zoom = _showSettings->zoom;
@@ -1669,8 +1662,11 @@ - (void)_setUniformsLevel:(UniformsLevel &)uniforms mipLOD:(int32_t)mipLOD
 - (void)drawInMTKView:(nonnull MTKView *)view
 {
     @autoreleasepool {
-        /// Per frame updates here
+        // Per frame updates here
 
+        // update per frame state
+        [self updateAnimationState:view];
+        
         // TODO: move this out, needs to get called off mouseMove, but don't want to
         // call drawMain
         [self drawSample];
@@ -2107,7 +2103,7 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
                 }
                 
                 // Draw uv wire overlay
-                if (_showSettings->is3DView && _showSettings->isUVPreview) {
+                if (_showSettings->is3DView && _showSettings->uvPreview > 0.0) {
                     // need to force color in shader or it's still sampling texture
                     // also need to add z offset
                     
diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index bd8b2935..042f43da 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -275,6 +275,31 @@ void ShowSettings::advanceDebugMode(bool decrement)
     // clear color of farPlane.
 }
 
+void ShowSettings::updateUVPreviewState()
+{
+    if (is3DView) {
+        if (isUVPreview) {
+            if (uvPreview < 1.0)
+                uvPreview += uvPreviewStep;
+        }
+        else
+        {
+            if (uvPreview > 0.0)
+                uvPreview -= uvPreviewStep;
+        }
+
+        uvPreview = saturate(uvPreview);
+    }
+    else {
+        uvPreview = 0.0;
+    }
+    
+    // stop the frame update
+    if (uvPreview == 0.0f || uvPreview == 1.0f) {
+        uvPreviewFrames = 0;
+    }
+}
+
 void printChannels(string &tmp, const string &label, float4 c,
                    int32_t numChannels, bool isFloat, bool isSigned)
 {
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index 9afe084c..146c01c0 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -134,6 +134,10 @@ class ShowSettings {
     // Can collapse 3d to 2d and overlay the uv
     bool isUVPreview = false;
     
+    uint32_t uvPreviewFrames = 0;
+    float uvPreviewStep = 1.0f / 10.0f;
+    float uvPreview = 0.0f;
+    
     // the 2d view doesn't want to inset pixels for clamp, or point sampling is
     // thrown off expecially on small 4x4 textures
 #if USE_PERSPECTIVE
@@ -240,6 +244,8 @@ class ShowSettings {
     
     const char *meshNumberName(uint32_t meshNumber) const;
     
+    void updateUVPreviewState();
+    
     float imageAspectRatio() const {
         float ar = 1.0f;
         if (meshNumber == 0 && !isModel && imageBoundsY > 0)
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 3380edd8..67815c74 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -503,6 +503,7 @@ @implementation MyMTKView {
     int32_t _fileFolderIndex;
     
     Action* _actionPlay;
+    Action* _actionShapeUVPreview;
     Action* _actionHelp;
     Action* _actionInfo;
     Action* _actionHud;
@@ -687,6 +688,7 @@ - (NSStackView *)_addButtons
         Action("", "", Key::A), // sep
 
         Action(" ", "Play", Key::Space), // TODO: really need icon on this
+        Action("6", "Shape UVPreview", Key::Num6),
         Action("S", "Shape", Key::S),
         Action("C", "Shape Channel", Key::C),
         Action("L", "Lighting", Key::L),
@@ -732,6 +734,7 @@ - (NSStackView *)_addButtons
         &_actionFit,
         
         &_actionPlay,
+        &_actionShapeUVPreview,
         &_actionShapeMesh,
         &_actionShapeChannel,
         &_actionLighting,
@@ -2055,11 +2058,14 @@ - (void)showFileTable
     scrollView.hidden = NO;
 }
 
-- (void)updateHudVisibility {
+- (void)updateHudVisibility
+{
     _hudLabel.hidden = _hudHidden || !_showSettings->isHudShown;
     _hudLabel2.hidden = _hudHidden || !_showSettings->isHudShown;
 }
 
+
+
 - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyDown
 {
     // Some data depends on the texture data (isSigned, isNormal, ..)
@@ -2070,6 +2076,8 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
     // f.e. clamped values don't need to re-render
     string text;
 
+    Renderer* renderer = (Renderer*)self.delegate;
+    
     if (action == _actionVertical) {
         bool isVertical =
             _buttonStack.orientation == NSUserInterfaceLayoutOrientationVertical;
@@ -2162,23 +2170,31 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
     }
     else if (action == _actionPlay) {
         if (!action->isHidden) {
-            Renderer* renderer = (Renderer*)self.delegate;
-            
+           
             renderer.playAnimations = !renderer.playAnimations;
             
             text = renderer.playAnimations ? "Play" : "Pause";
             isChanged = true;
             
-            self.enableSetNeedsDisplay = !renderer.playAnimations;
-            self.paused = !renderer.playAnimations;
+            [renderer updateAnimationState:self];
         }
         else {
-            self.enableSetNeedsDisplay = YES;
-            self.paused = YES;
+            [renderer updateAnimationState:self];
         }
-       
     }
-
+    else if (action == _actionShapeUVPreview) {
+        
+        // toggle state
+        _showSettings->isUVPreview = !_showSettings->isUVPreview;
+        text = _showSettings->isUVPreview ? "Show UVPreview" : "Hide UvPreview";
+        isChanged = true;
+        
+        _showSettings->uvPreviewFrames = 10;
+        
+        // also need to call this in display link, for when it reaches end
+        [renderer updateAnimationState:self];
+    }
+    
     else if (action == _actionShapeChannel) {
         _showSettings->advanceShapeChannel(isShiftKeyDown);
         

From 2cd9ce065e9366a6a8f8a8852507f23c9877baf7 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 7 May 2022 00:32:03 -0700
Subject: [PATCH 302/901] kramv - fix directory listing for gltf

If files have gltf, then don't list png's they reference
---
 kramv/KramViewerMain.mm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 67815c74..2b40d039 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -3105,7 +3105,7 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
             while (NSURL* fileOrDirectoryURL = [directoryEnumerator nextObject]) {
                 const char* name = fileOrDirectoryURL.fileSystemRepresentation;
 
-                bool isModel = isSupportedModelFilename(filename);
+                bool isModel = isSupportedModelFilename(name);
                 if (isModel)
                 {
                     files.push_back(name);

From b406e77e0a7d7d35434c8ba0668412dffb9ab730 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 7 May 2022 00:47:51 -0700
Subject: [PATCH 303/901] kramv - show model vs, name, show file

---
 kramv/KramViewerMain.mm | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 2b40d039..541951b7 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -2277,7 +2277,11 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
             _showSettings->zoom = 1.0f;
         }
 
-        text = "Reload Image";
+        // Name change if image
+        if (_showSettings->isModel)
+            text = "Reload Model\n";
+        else
+            text = "Reload Image\n";
         if (doPrintPanZoom) {
             string tmp;
             sprintf(tmp,
@@ -3353,6 +3357,20 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
     return success;
 }
 
+-(double)getTimestampForFile:(NSURL*)url
+{
+    // TODO: could just use FileHelper::modificationTimestamp(filename);
+    
+    NSDate* fileDate = nil;
+    NSError* error = nil;
+    [url getResourceValue:&fileDate
+                   forKey:NSURLContentModificationDateKey
+                    error:&error];
+
+    double timestamp = fileDate.timeIntervalSince1970;
+    return timestamp;
+}
+
 -(BOOL)loadModelFile:(NSURL*)url filename:(const char*)filename
 {
 #if USE_GLTF
@@ -3379,7 +3397,8 @@ -(BOOL)loadModelFile:(NSURL*)url filename:(const char*)filename
 
     NSURL* gltfFileURL =
         [NSURL fileURLWithPath:[NSString stringWithUTF8String:filename]];
-
+    double timestamp = [self getTimestampForFile:gltfFileURL];
+    
     BOOL success = [renderer loadModel:gltfFileURL];
     
     // TODO: split this off to a completion handler, since loadModel is async
@@ -3437,6 +3456,10 @@ -(BOOL)loadModelFile:(NSURL*)url filename:(const char*)filename
 
     setErrorLogCapture(false);
 
+    // store the filename
+    _showSettings->lastFilename = filename;
+    _showSettings->lastTimestamp = timestamp;
+    
     self.needsDisplay = YES;
 
     return success;

From 75ac6e38e8575ec3ae74aac6d6f817e1897156fb Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 8 May 2022 20:33:09 -0700
Subject: [PATCH 304/901] kram - enhance task system, only call
 updateAnimationState from one spot

---
 kramv/KramLoader.mm             |   2 +-
 kramv/KramRenderer.h            |   2 +-
 kramv/KramRenderer.mm           |   2 +-
 kramv/KramViewerMain.mm         |   6 +-
 kramv/Shaders/KramShaders.metal |   5 +-
 kramv/Shaders/hdr.metal         |   4 +
 libkram/kram/TaskSystem.cpp     | 252 ++++++++++++++++++++++++++++++--
 libkram/kram/TaskSystem.h       |  11 ++
 8 files changed, 263 insertions(+), 21 deletions(-)

diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index 9cb54974..97a08efe 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -74,7 +74,7 @@ - (instancetype)init
 
 // for macOS/win Intel need to decode astc/etc
 // on macOS/arm, the M1 supports all 3 encode formats
-#define DO_DECODE TARGET_CPU_X86_64
+#define DO_DECODE KRAM_SSE
 
 #if DO_DECODE
 
diff --git a/kramv/KramRenderer.h b/kramv/KramRenderer.h
index 517cb2dc..566d0b26 100644
--- a/kramv/KramRenderer.h
+++ b/kramv/KramRenderer.h
@@ -66,7 +66,7 @@ class KTXImage;
 - (void)unloadModel;
 
 // called from view and renderer in render loop
-- (void)updateAnimationState:(MTKView*)view;
+- (void)updateAnimationState:(nonnull MTKView*)view;
 
 // can play animations in gltf models
 @property (nonatomic) BOOL playAnimations;
diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index b84f4f4f..f392894d 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -722,7 +722,7 @@ - (void)releaseAllPendingTextures
     }
 }
 
-- (void)updateAnimationState:(MTKView*)view
+- (void)updateAnimationState:(nonnull MTKView*)view
 {
     bool animateDisplay = self.playAnimations;
     
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 541951b7..65b59c42 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -2176,10 +2176,10 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
             text = renderer.playAnimations ? "Play" : "Pause";
             isChanged = true;
             
-            [renderer updateAnimationState:self];
+            //[renderer updateAnimationState:self];
         }
         else {
-            [renderer updateAnimationState:self];
+            //[renderer updateAnimationState:self];
         }
     }
     else if (action == _actionShapeUVPreview) {
@@ -2192,7 +2192,7 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
         _showSettings->uvPreviewFrames = 10;
         
         // also need to call this in display link, for when it reaches end
-        [renderer updateAnimationState:self];
+        //[renderer updateAnimationState:self];
     }
     
     else if (action == _actionShapeChannel) {
diff --git a/kramv/Shaders/KramShaders.metal b/kramv/Shaders/KramShaders.metal
index 2e32b748..e5301a0c 100644
--- a/kramv/Shaders/KramShaders.metal
+++ b/kramv/Shaders/KramShaders.metal
@@ -575,13 +575,16 @@ void doUVPreview(
     float uvPreview
 )
 {
+    // TODO: should honor aspect ratio of original image
+    // this will convert to square -1,1
+    
     // convert [0,1] to [-1,1] plane
     float3 uv(toSnorm(texCoord), 0.0);
     uv.y *= -1;
     uv.xy *= 0.5;  // shrink it
     position.xyz = mix(position.xyz, uv.xyz, uvPreview);
   
-    // interpolate norma and tangent too
+    // interpolate normal and tangent too
     normal = mix(normal.xyz, float3(0,0,1), uvPreview);
     tangent = mix(tangent, float4(1,0,0,1), uvPreview);
 }
diff --git a/kramv/Shaders/hdr.metal b/kramv/Shaders/hdr.metal
index bab4a725..4749ca2c 100644
--- a/kramv/Shaders/hdr.metal
+++ b/kramv/Shaders/hdr.metal
@@ -67,6 +67,8 @@ fragment half4 blur_horizontal7_fragment_main(FragmentIn in [[stage_in]],
     float weights[]{ 0.134032, 0.126854, 0.107545, 0.08167, 0.055555, 0.033851, 0.018476, 0.009033 };
     float offset = 1.0 / sourceTexture.get_width();
     half4 color(0);
+    
+    // TODO: do this in half the samples with offsets and linearSampler, a 15x15 px blur w/8 weights
     color += weights[7] * sourceTexture.sample(nearestSampler, in.texCoords - float2(offset * 7, 0));
     color += weights[6] * sourceTexture.sample(nearestSampler, in.texCoords - float2(offset * 6, 0));
     color += weights[5] * sourceTexture.sample(nearestSampler, in.texCoords - float2(offset * 5, 0));
@@ -91,6 +93,8 @@ fragment half4 blur_vertical7_fragment_main(FragmentIn in [[stage_in]],
     float weights[]{ 0.134032, 0.126854, 0.107545, 0.08167, 0.055555, 0.033851, 0.018476, 0.009033 };
     float offset = 1.0 / sourceTexture.get_height();
     half4 color(0);
+    
+    // TODO: do this in half the samples with offsets and linearSampler
     color += weights[7] * sourceTexture.sample(nearestSampler, in.texCoords - float2(0, offset * 7));
     color += weights[6] * sourceTexture.sample(nearestSampler, in.texCoords - float2(0, offset * 6));
     color += weights[5] * sourceTexture.sample(nearestSampler, in.texCoords - float2(0, offset * 5));
diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index da3a8446..4a6e773f 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -1,9 +1,15 @@
 #include "TaskSystem.h"
 
-// TODO: bury in system/cpp file
-#if KRAM_MAC || KRAM_IOS
+#if KRAM_MAC
+    // affiniity
     #include <mach/thread_act.h>
     #include <mach/thread_policy.h>
+
+    #include <pthread/qos.h>
+    #include <sys/sysctl.h>
+#elif KRAM_IOS
+    #include <pthread/qos.h>
+    #include <sys/sysctl.h>
 #elif KRAM_WIN
     #include <windows.h>
 #else
@@ -13,24 +19,78 @@
 namespace kram {
 using namespace NAMESPACE_STL;
 
+void task_system::set_qos(std::thread& thread, ThreadQos level)
+{
+#if KRAM_MAC || KRAM_IOS
+    auto handle = thread.native_handle();
+   
+    // https://abhimuralidharan.medium.com/understanding-threads-in-ios-5b8d7ab16f09
+    // user-interative, user-initiated, default, utility, background, unspecified
+    
+    qos_class_t qos = QOS_CLASS_UNSPECIFIED;
+    switch(level) {
+        case ThreadQos::Interactive: qos = QOS_CLASS_USER_INTERACTIVE; break;
+        case ThreadQos::High: qos = QOS_CLASS_USER_INITIATED; break;
+        case ThreadQos::Default: qos = QOS_CLASS_DEFAULT; break;
+        case ThreadQos::Medium: qos = QOS_CLASS_UTILITY; break;
+        case ThreadQos::Low: qos = QOS_CLASS_BACKGROUND; break;
+    }
+    
+    // note here the priority = 0, but is negative offsets
+    // note this is a start/end overide call, but can set override on existing thread
+    pthread_override_qos_class_start_np(handle, qos, 0);
+#endif
+}
+
 void task_system::set_affinity(std::thread& thread, uint32_t threadIndex)
 {
     // https://eli.thegreenplace.net/2016/c11-threads-affinity-and-hyperthreading/
-    // TODO: set affinity, but need to create a thread that doesn't launch
-    // so can set this up, and then run it.
     
     auto handle = thread.native_handle();
-    uint64_t affinityMask = ((uint64_t)1) << threadIndex; // for now only allow single thread mask
-            
-#if KRAM_MAC || KRAM_IOS
-    thread_affinity_policy_data_t policy = { (int)affinityMask };
+    
+    // for now only allow single core mask
+    uint64_t affinityMask = ((uint64_t)1) << threadIndex;
+          
+    // These are used in most of the paths
+    macroUnusedVar(handle);
+    macroUnusedVar(affinityMask);
+    
+#if KRAM_MAC
+    #if KRAM_SSE
+    if (!coreInfo.isTranslated) {
+        thread_affinity_policy_data_t policy = { (int)affinityMask };
 
-    // TODO: check return
-    thread_policy_set(pthread_mach_thread_np(handle), THREAD_AFFINITY_POLICY, (thread_policy_t)&policy, 1);
+        // TODO: consider skipping affinity on macOS altogether
+        // this is just a hint on x64-based macOS
+        int returnVal = thread_policy_set(pthread_mach_thread_np(handle), THREAD_AFFINITY_POLICY, (thread_policy_t)&policy, 1);
+        
+        if (returnVal != 0) {
+            // TODO: unsupported on M1, only have QoS
+        }
+    }
+    #endif
+    
+#elif KRAM_IOS
+    // no support
+    
+#elif KRAM_ANDROID
+    cpu_set_t cpuset;
+    CPU_ZERO(&cpuset);
+    CPU_SET(threadIndex, &cpuset);
+    
+    // convert pthread to pid
+    pid_t pid;
+    pthread_getunique_np(handle, &pid);
+    if (!sched_setaffinity(pid, sizeof(cpu_set_t), &cpuset)) {
+        // TODO: this can fail on some/all cores
+    }
 
 #elif KRAM_WIN
     // each processor group only has 64 bits
-    SetThreadAffinityMask(handle, (DWORD_PTR)&affinityMask);
+    DWORD_PTR mask = SetThreadAffinityMask(handle, *(const DWORD_PTR*)&affinityMask);
+    if (mask == 0) {
+        // TODO: failure case
+    }
 #else
     // most systems are pthread-based, this is represented with array of bits
     cpu_set_t cpuset;
@@ -38,7 +98,10 @@ void task_system::set_affinity(std::thread& thread, uint32_t threadIndex)
     CPU_SET(threadIndex, &cpuset);
 
     // TODO: check return
-    /*int rc = */ pthread_setaffinity_np(handle, sizeof(cpu_set_t), &cpuset);
+    int returnVal = pthread_setaffinity_np(handle, sizeof(cpu_set_t), &cpuset);
+    if (returnVal != 0) {
+        // TODO: linux pthread failure case
+    }
 #endif
 }
 
@@ -89,17 +152,178 @@ void task_system::run(int32_t threadIndex)
     }
 }
 
+enum class CoreType
+{
+    Little,
+    // Medium,
+    Big,
+};
+
+struct CoreInfo
+{
+    // hyperthreading can result in logical = 2x physical cores (1.5x on Alderlake)
+    uint32_t logicalCoreCount;
+    uint32_t physicalCoreCount;
+    
+    // ARM is has big-little and big-medium-little, no HT, 2/4, 4/4, 6/2, 8/2.
+    // Intel x64 AlderLake has big-little. 24 threads (8x2HT/8)
+    uint32_t bigCoreCount;
+    uint32_t littleCoreCount;
+    
+    // x64 under Rosetta2 on M1 Arm chip, no AVX only SSE 4.2
+    uint32_t isTranslated;
+    uint32_t isHyperthreaded;
+    
+    // TODO: this needs coreIndex, and then sort big to little
+    vector<CoreType> typeTable;
+    vector<uint8_t> remapTable;
+};
+
+static const CoreInfo& GetCoreInfo()
+{
+    static CoreInfo coreInfo = {};
+    if (coreInfo.logicalCoreCount != 0)
+        return coreInfo;
+
+    // this includes hyperthreads
+    coreInfo.logicalCoreCount = std::thread::hardware_concurrency();
+    coreInfo.physicalCoreCount = coreInfo.logicalCoreCount;
+        
+    #if KRAM_IOS || KRAM_MAC
+    // get big/little core counts
+    // use sysctl -a from command line to see all
+    size_t size = sizeof(coreInfo.bigCoreCount);
+    sysctlbyname("hw.perflevel0.physicalcpu", &coreInfo.bigCoreCount, &size, nullptr, 0);
+    sysctlbyname("hw.perflevel1.physicalcpu", &coreInfo.littleCoreCount, &size, nullptr, 0);
+    
+    // may not work on A10 2/2 exclusive
+    coreInfo.physicalCoreCount = std::min(coreInfo.bigCoreCount + coreInfo.littleCoreCount, coreInfo.physicalCoreCount);
+    
+    // no affinity, so core order here doesn't really matter.
+    for (uint32_t i = 0; i < coreInfo.bigCoreCount; ++i) {
+        coreInfo.typeTable.push_back(CoreType::Big);
+        coreInfo.remapTable.push_back(i);
+    }
+    for (uint32_t i = 0; i < coreInfo.littleCoreCount; ++i) {
+        coreInfo.typeTable.push_back(CoreType::Little);
+        coreInfo.remapTable.push_back(i + coreInfo.bigCoreCount);
+    }
+    
+    coreInfo.isHyperthreaded = coreInfo.logicalCoreCount != coreInfo.physicalCoreCount;
+    
+    #if KRAM_MAC
+    // Call the sysctl and if successful return the result
+    sysctlbyname("sysctl.proc_translated", &coreInfo.isTranslated, &size, NULL, 0);
+    #endif
+    
+    #elif KRAM_WIN
+    
+    // have to walk array of data, and assemble this info, ugh
+    // https://docs.microsoft.com/en-us/windows/win32/api/sysinfoapi/nf-sysinfoapi-getlogicalprocessorinformation
+    
+    DWORD logicalCoreCount = 0;
+    DWORD physicalCoreCount = 0;
+      
+    DWORD returnLength = 0;
+    DWORD rc = GetLogicalProcessorInformation(buffer, &returnLength);
+    PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = nullptr;
+    DWORD byteOffset = 0;
+    
+    // walk the array
+    bool isHyperthreaded = false;
+    ptr = buffer;
+    byteOffset = 0;
+    while (byteOffset + sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION) <= returnLength) {
+        switch (ptr->Relationship) {
+            case RelationProcessorCore: {
+                uint32_t logicalCores = CountSetBits(ptr->ProcessorMask);
+                if (logicalCores > 1) {
+                    isHyperthreaded = true;
+                }
+                break;
+            }
+        }
+        
+        if (isHyperthreaded)
+            break;
+        
+        byteOffset += sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
+        ptr++;
+    }
+    
+    ptr = buffer;
+    byteOffset = 0;
+    uint32_t coreNumber = 0;
+    while (byteOffset + sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION) <= returnLength) {
+        switch (ptr->Relationship) {
+            case RelationProcessorCore: {
+                physicalCoreCount++;
+                
+                // A hyperthreaded core supplies more than one logical processor.
+                // Can identify AlderLake big vs. little off this
+                uint32_t logicalCores = CountSetBits(ptr->ProcessorMask);
+                if (logicalCores > 1 || !isHyperthreaded) {
+                    coreInfo.bigCoreCount++;
+                    coreInfo.typeTable.push_back(CoreType::Big);
+                    coreInfo.remapTable.push_back(coreNumber++);
+                }
+                else {
+                    coreInfo.littleCoreCount++;
+                    coreInfo.typeTable.push_back(CoreType::Little);
+                    coreInfo.remapTable.push_back(coreNumber++);
+                }
+                
+                logicalCoreCount += logicalCores;
+                break;
+            }
+        }
+        byteOffset += sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
+        ptr++;
+    }
+    
+    coreInfo.isHyperthreaded = isHyperthreaded;
+    coreInfo.physicalCoreCount = physicalCoreCount;
+    
+    #elif KRAM_ANDROID
+
+    // TODO: have to walk array of proc/cpuinfo, and assemble this info, ugh
+    // then build a core remap table since big core are typically last, little early
+    // https://stackoverflow.com/questions/26239956/how-to-get-specific-information-of-an-android-device-from-proc-cpuinfo-file
+
+    // JDK and NDK version of library with workarounds
+    // https://github.com/google/cpu_features
+    
+    // hack - assume all big cores, typical 1/3/4 or 2/2/4
+    coreInfo.bigCoreCount = coreInfo.physicalCoreCount;
+    
+    for (int32_t i = coreInfo.bigCoreCount-1; i >= 0; --i) {
+        coreInfo.typeTable.push_back(CoreType::Big);
+        coreInfo.remapTable.push_back(i);
+    }
+    
+    #endif
+    
+    return coreInfo;
+}
 
-// TODO: don't want hyperthreads from hardware_concurrency
 task_system::task_system(int32_t count) :
-    _count(std::min(count, (int32_t)std::thread::hardware_concurrency())),
+    _count(std::min(count, (int32_t)GetCoreInfo().physicalCoreCount)),
     _q{(size_t)_count},
     _index(0)
 {
     // start up the threads
     for (int32_t threadIndex = 0; threadIndex != _count; ++threadIndex) {
         _threads.emplace_back([&, threadIndex] { run(threadIndex); });
+        
+#if KRAM_IOS || KRAM_MAC
+        // No exposed affinity on Apple platforms, just this lame QoS setting
+        // which acts more like thread-priority.  Good luck monitoring
+        // work on specific threads in profile captures.  Even swift
+        // now doesn't allocate more threads than cores to avoid thread explosion.
+        set_qos(_threads.back(), ThreadQos::High);
+#else
         set_affinity(_threads.back(), threadIndex);
+#endif
     }
     
 }
diff --git a/libkram/kram/TaskSystem.h b/libkram/kram/TaskSystem.h
index b1b35262..12d26fef 100644
--- a/libkram/kram/TaskSystem.h
+++ b/libkram/kram/TaskSystem.h
@@ -122,6 +122,16 @@ class notification_queue {
     type(const type&) = delete; \
     void operator=(const type&) = delete
 
+// only for ioS/macOS
+enum class ThreadQos
+{
+    Low = 1,
+    Medium = 2,
+    Default = 3,
+    High = 4,
+    Interactive = 5,
+};
+
 class task_system {
     NOT_COPYABLE(task_system);
 
@@ -135,6 +145,7 @@ class task_system {
     void run(int32_t threadIndex);
 
     void set_affinity(std::thread& thread, uint32_t threadIndex);
+    void set_qos(std::thread& thread, ThreadQos level);
     
 public:
     task_system(int32_t count = 1);

From e52859ee9adcbedfd83902db797541ab89927bf1 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 8 May 2022 20:44:13 -0700
Subject: [PATCH 305/901] kramv - don't set uvPreview to 0 on 2d views, it's
 already disabled there

---
 kramv/KramViewerBase.cpp | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index 042f43da..54ae5d75 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -278,20 +278,23 @@ void ShowSettings::advanceDebugMode(bool decrement)
 void ShowSettings::updateUVPreviewState()
 {
     if (is3DView) {
-        if (isUVPreview) {
-            if (uvPreview < 1.0)
-                uvPreview += uvPreviewStep;
+        if (uvPreviewFrames > 0) {
+            if (isUVPreview) {
+                if (uvPreview < 1.0)
+                    uvPreview += uvPreviewStep;
+            }
+            else
+            {
+                if (uvPreview > 0.0)
+                    uvPreview -= uvPreviewStep;
+            }
+
+            uvPreview = saturate(uvPreview);
         }
-        else
-        {
-            if (uvPreview > 0.0)
-                uvPreview -= uvPreviewStep;
-        }
-
-        uvPreview = saturate(uvPreview);
     }
     else {
-        uvPreview = 0.0;
+        // This hides the uvView even when switchig back to 3d shape
+        //uvPreview = 0.0;
     }
     
     // stop the frame update

From 1447aca7f43a4e2d347e1c7bf8bd48e8fd13c59c Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 8 May 2022 20:53:00 -0700
Subject: [PATCH 306/901] kram - add ODS for Win logs, fix TaskSystem

---
 libkram/kram/KramLog.cpp    | 43 +++++++++++++++++++++++++++++++++++--
 libkram/kram/TaskSystem.cpp | 20 +++++++++++++++++
 2 files changed, 61 insertions(+), 2 deletions(-)

diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp
index ee439cee..e3a812f3 100644
--- a/libkram/kram/KramLog.cpp
+++ b/libkram/kram/KramLog.cpp
@@ -202,7 +202,7 @@ extern int32_t logMessage(const char* group, int32_t logLevel,
             groupString = group;
             space = " ";
 
-#if _WIN32
+#if KRAM_WIN
             const char fileSeparator = '\\';
 #else
             const char fileSeparator = '/';
@@ -238,7 +238,46 @@ extern int32_t logMessage(const char* group, int32_t logLevel,
         }
     }
 
-    fprintf(fp, "%s%s%s%s%s%s", tag, groupString, space, msg, needsNewline ? "\n" : "", fileLineFunc.c_str());
+    // format into a buffer
+    static string buffer;
+    sprintf(buffer, "%s%s%s%s%s%s", tag, groupString, space, msg, needsNewline ? "\n" : "", fileLineFunc.c_str());
+    
+#if KRAM_WIN
+    // won't this print twice?
+    //fprintf(fp, "%s", buffer.c_str());
+    
+    if (::IsDebuggerPresent()) {
+        // TODO: split string up into multiple logs
+        // this is limited to 32K
+        OutputDebugString(buffer.c_str());
+    }
+#elif KRAM_ANDROID
+    AndroidLogLevel androidLogLevel = ANDROID_LOG_ERROR;
+    switch (logLevel) {
+        case LogLevelDebug:
+            androidLogLevel = ANDROID_LOG_DEBUG;
+            break;
+        case LogLevelInfo:
+            androidLogLevel = ANDROID_LOG_INFO;
+            break;
+
+        case LogLevelWarning:
+            androidLogLevel = ANDROID_LOG_WARNING;
+            break;
+        case LogLevelError:
+            androidLogLevel = ANDROID_LOG_ERROR;
+            break;
+    }
+    
+    // TODO: can also fix printf to work on Android
+    // but can't set log level like with this call, but no dump buffer limit
+    
+    // TODO: split string up into multiple logs
+    // this can only write 4K - 40? chars at time, don't use print it's 1023
+    __android_log_write(androidLogLevel, buffer.c_str());
+#else
+    fprintf(fp, "%s", buffer.c_str());
+#endif
 
     return 0;  // reserved for later
 }
diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index 4a6e773f..e196612f 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -179,6 +179,25 @@ struct CoreInfo
     vector<uint8_t> remapTable;
 };
 
+#if KRAM_WIN
+// Helper function to count set bits in the processor mask.
+DWORD CountSetBits(ULONG_PTR bitMask)
+{
+    DWORD LSHIFT = sizeof(ULONG_PTR)*8 - 1;
+    DWORD bitSetCount = 0;
+    ULONG_PTR bitTest = (ULONG_PTR)1 << LSHIFT;
+    DWORD i;
+    
+    for (i = 0; i <= LSHIFT; ++i)
+    {
+        bitSetCount += ((bitMask & bitTest)?1:0);
+        bitTest /= 2;
+    }
+
+    return bitSetCount;
+}
+#endif
+
 static const CoreInfo& GetCoreInfo()
 {
     static CoreInfo coreInfo = {};
@@ -225,6 +244,7 @@ static const CoreInfo& GetCoreInfo()
     DWORD physicalCoreCount = 0;
       
     DWORD returnLength = 0;
+    PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = nullptr;
     DWORD rc = GetLogicalProcessorInformation(buffer, &returnLength);
     PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = nullptr;
     DWORD byteOffset = 0;

From 94cde87a00447745843f773eb1d672e2bd9648c6 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 8 May 2022 20:58:57 -0700
Subject: [PATCH 307/901] kram - fix log on Win

---
 libkram/kram/KramLog.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp
index e3a812f3..3281f841 100644
--- a/libkram/kram/KramLog.cpp
+++ b/libkram/kram/KramLog.cpp
@@ -11,6 +11,11 @@
 
 #include <mutex>
 
+#if KRAM_WIN
+#include <windows.h>
+#elif KRAM_ANDROID
+#include <log.h>
+#endif
 namespace kram {
 
 using mymutex = std::recursive_mutex;

From 4768b344c75709cf6a170224e15c1a854b5b58d8 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 8 May 2022 22:42:04 -0700
Subject: [PATCH 308/901] kram - win log

---
 libkram/kram/KramLog.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp
index 3281f841..c932cd51 100644
--- a/libkram/kram/KramLog.cpp
+++ b/libkram/kram/KramLog.cpp
@@ -248,14 +248,15 @@ extern int32_t logMessage(const char* group, int32_t logLevel,
     sprintf(buffer, "%s%s%s%s%s%s", tag, groupString, space, msg, needsNewline ? "\n" : "", fileLineFunc.c_str());
     
 #if KRAM_WIN
-    // won't this print twice?
-    //fprintf(fp, "%s", buffer.c_str());
-    
     if (::IsDebuggerPresent()) {
         // TODO: split string up into multiple logs
         // this is limited to 32K
         OutputDebugString(buffer.c_str());
     }
+    else {
+        // avoid double print to debugger
+        fprintf(fp, "%s", buffer.c_str());
+    }
 #elif KRAM_ANDROID
     AndroidLogLevel androidLogLevel = ANDROID_LOG_ERROR;
     switch (logLevel) {

From e7d8915f4c9b530d171ff1318265225fdff79109 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 8 May 2022 22:44:55 -0700
Subject: [PATCH 309/901] kram - log fix

---
 libkram/kram/KramLog.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp
index c932cd51..8e3fa994 100644
--- a/libkram/kram/KramLog.cpp
+++ b/libkram/kram/KramLog.cpp
@@ -280,7 +280,7 @@ extern int32_t logMessage(const char* group, int32_t logLevel,
     
     // TODO: split string up into multiple logs
     // this can only write 4K - 40? chars at time, don't use print it's 1023
-    __android_log_write(androidLogLevel, buffer.c_str());
+    __android_log_write(androidLogLevel, tag, buffer.c_str());
 #else
     fprintf(fp, "%s", buffer.c_str());
 #endif

From fbd8c5f09adcebc8db94f0992780aa8728db3e43 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 10 May 2022 09:11:30 -0700
Subject: [PATCH 310/901] tasks - handle coers

---
 libkram/kram/TaskSystem.cpp | 389 ++++++++++++++++++++++--------------
 libkram/kram/TaskSystem.h   |  15 ++
 2 files changed, 249 insertions(+), 155 deletions(-)

diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index e196612f..fe12e50d 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -6,6 +6,7 @@
     #include <mach/thread_policy.h>
 
     #include <pthread/qos.h>
+    #include <pthread/pthread.h>
     #include <sys/sysctl.h>
 #elif KRAM_IOS
     #include <pthread/qos.h>
@@ -19,146 +20,19 @@
 namespace kram {
 using namespace NAMESPACE_STL;
 
-void task_system::set_qos(std::thread& thread, ThreadQos level)
-{
-#if KRAM_MAC || KRAM_IOS
-    auto handle = thread.native_handle();
-   
-    // https://abhimuralidharan.medium.com/understanding-threads-in-ios-5b8d7ab16f09
-    // user-interative, user-initiated, default, utility, background, unspecified
-    
-    qos_class_t qos = QOS_CLASS_UNSPECIFIED;
-    switch(level) {
-        case ThreadQos::Interactive: qos = QOS_CLASS_USER_INTERACTIVE; break;
-        case ThreadQos::High: qos = QOS_CLASS_USER_INITIATED; break;
-        case ThreadQos::Default: qos = QOS_CLASS_DEFAULT; break;
-        case ThreadQos::Medium: qos = QOS_CLASS_UTILITY; break;
-        case ThreadQos::Low: qos = QOS_CLASS_BACKGROUND; break;
-    }
-    
-    // note here the priority = 0, but is negative offsets
-    // note this is a start/end overide call, but can set override on existing thread
-    pthread_override_qos_class_start_np(handle, qos, 0);
-#endif
-}
-
-void task_system::set_affinity(std::thread& thread, uint32_t threadIndex)
-{
-    // https://eli.thegreenplace.net/2016/c11-threads-affinity-and-hyperthreading/
-    
-    auto handle = thread.native_handle();
-    
-    // for now only allow single core mask
-    uint64_t affinityMask = ((uint64_t)1) << threadIndex;
-          
-    // These are used in most of the paths
-    macroUnusedVar(handle);
-    macroUnusedVar(affinityMask);
-    
-#if KRAM_MAC
-    #if KRAM_SSE
-    if (!coreInfo.isTranslated) {
-        thread_affinity_policy_data_t policy = { (int)affinityMask };
-
-        // TODO: consider skipping affinity on macOS altogether
-        // this is just a hint on x64-based macOS
-        int returnVal = thread_policy_set(pthread_mach_thread_np(handle), THREAD_AFFINITY_POLICY, (thread_policy_t)&policy, 1);
-        
-        if (returnVal != 0) {
-            // TODO: unsupported on M1, only have QoS
-        }
-    }
-    #endif
-    
-#elif KRAM_IOS
-    // no support
-    
-#elif KRAM_ANDROID
-    cpu_set_t cpuset;
-    CPU_ZERO(&cpuset);
-    CPU_SET(threadIndex, &cpuset);
-    
-    // convert pthread to pid
-    pid_t pid;
-    pthread_getunique_np(handle, &pid);
-    if (!sched_setaffinity(pid, sizeof(cpu_set_t), &cpuset)) {
-        // TODO: this can fail on some/all cores
-    }
-
-#elif KRAM_WIN
-    // each processor group only has 64 bits
-    DWORD_PTR mask = SetThreadAffinityMask(handle, *(const DWORD_PTR*)&affinityMask);
-    if (mask == 0) {
-        // TODO: failure case
-    }
-#else
-    // most systems are pthread-based, this is represented with array of bits
-    cpu_set_t cpuset;
-    CPU_ZERO(&cpuset);
-    CPU_SET(threadIndex, &cpuset);
-
-    // TODO: check return
-    int returnVal = pthread_setaffinity_np(handle, sizeof(cpu_set_t), &cpuset);
-    if (returnVal != 0) {
-        // TODO: linux pthread failure case
-    }
-#endif
-}
-
-void task_system::run(int32_t threadIndex)
-{
-    while (true) {
-        // pop() wait avoids a spinloop.
-
-        function<void()> f;
-
-        // start with ours, but steal from other queues if nothing found
-        // Note that if threadIndex queue is empty and stays empty
-        // then pop() below will stop using that thread.  But async_ is round-robining
-        // all work across the available queues.
-        int32_t multiple = 4;  // 32;
-        int32_t numTries = 0;
-        for (int32_t n = 0, nEnd = _count * multiple; n < nEnd; ++n) {
-            numTries++;
-
-            // break for loop if work found
-            if (_q[(threadIndex + n) % _count].try_pop(f)) {
-                break;
-            }
-        }
-
-        // numTries is 64 when queues are empty, and typically 1 when queues are full
-        //KLOGD("task_system", "thread %d searched %d tries", threadIndex, numTries);
-
-        // if no task, and nothing to steal, pop own queue if possible
-        // pop blocks until it's queue receives tasks
-        if (!f && !_q[threadIndex].pop(f)) {
-            // shutdown if tasks have all been submitted and queue marked as done.
-            if (_q[threadIndex].is_done()) {
-                KLOGD("task_system", "thread %d shutting down", threadIndex);
-
-                break;
-            }
-            else {
-                KLOGD("task_system", "no work found for %d in %d tries", threadIndex, numTries);
-
-                // keep searching
-                continue;
-            }
-        }
-
-        // do the work
-        f();
-    }
-}
-
-enum class CoreType
+enum class CoreType : uint8_t
 {
     Little,
     // Medium,
     Big,
 };
 
+struct CoreNum
+{
+    uint8_t index;
+    CoreType type;
+};
+
 struct CoreInfo
 {
     // hyperthreading can result in logical = 2x physical cores (1.5x on Alderlake)
@@ -174,14 +48,12 @@ struct CoreInfo
     uint32_t isTranslated;
     uint32_t isHyperthreaded;
     
-    // TODO: this needs coreIndex, and then sort big to little
-    vector<CoreType> typeTable;
-    vector<uint8_t> remapTable;
+    vector<CoreNum> remapTable;
 };
 
 #if KRAM_WIN
 // Helper function to count set bits in the processor mask.
-DWORD CountSetBits(ULONG_PTR bitMask)
+static DWORD CountSetBits(ULONG_PTR bitMask)
 {
     DWORD LSHIFT = sizeof(ULONG_PTR)*8 - 1;
     DWORD bitSetCount = 0;
@@ -212,20 +84,30 @@ static const CoreInfo& GetCoreInfo()
     // get big/little core counts
     // use sysctl -a from command line to see all
     size_t size = sizeof(coreInfo.bigCoreCount);
-    sysctlbyname("hw.perflevel0.physicalcpu", &coreInfo.bigCoreCount, &size, nullptr, 0);
-    sysctlbyname("hw.perflevel1.physicalcpu", &coreInfo.littleCoreCount, &size, nullptr, 0);
+    
+    uint32_t perfLevelCount = 0;
+    
+    // only big-little core counts on macOS12/iOS15
+    sysctlbyname("hw.nperflevels", &perfLevelCount, &size, nullptr, 0);
+    if (perfLevelCount > 0) {
+        sysctlbyname("hw.perflevel0.physicalcpu", &coreInfo.bigCoreCount, &size, nullptr, 0);
+        if (perfLevelCount > 1)
+            sysctlbyname("hw.perflevel1.physicalcpu", &coreInfo.littleCoreCount, &size, nullptr, 0);
+    }
+    else {
+        // can't identify little cores
+        sysctlbyname("hw.perflevel0.physicalcpu", &coreInfo.bigCoreCount, &size, nullptr, 0);
+    }
     
     // may not work on A10 2/2 exclusive
     coreInfo.physicalCoreCount = std::min(coreInfo.bigCoreCount + coreInfo.littleCoreCount, coreInfo.physicalCoreCount);
     
     // no affinity, so core order here doesn't really matter.
     for (uint32_t i = 0; i < coreInfo.bigCoreCount; ++i) {
-        coreInfo.typeTable.push_back(CoreType::Big);
-        coreInfo.remapTable.push_back(i);
+        coreInfo.remapTable.push_back({(uint8_t)i, CoreType::Big});
     }
     for (uint32_t i = 0; i < coreInfo.littleCoreCount; ++i) {
-        coreInfo.typeTable.push_back(CoreType::Little);
-        coreInfo.remapTable.push_back(i + coreInfo.bigCoreCount);
+        coreInfo.remapTable.push_back({(uint8_t)(i + coreInfo.bigCoreCount), CoreType::Little});
     }
     
     coreInfo.isHyperthreaded = coreInfo.logicalCoreCount != coreInfo.physicalCoreCount;
@@ -284,13 +166,11 @@ static const CoreInfo& GetCoreInfo()
                 uint32_t logicalCores = CountSetBits(ptr->ProcessorMask);
                 if (logicalCores > 1 || !isHyperthreaded) {
                     coreInfo.bigCoreCount++;
-                    coreInfo.typeTable.push_back(CoreType::Big);
-                    coreInfo.remapTable.push_back(coreNumber++);
+                    coreInfo.remapTable.push_back({(uint8_t)coreNumber++, CoreType::Big});
                 }
                 else {
                     coreInfo.littleCoreCount++;
-                    coreInfo.typeTable.push_back(CoreType::Little);
-                    coreInfo.remapTable.push_back(coreNumber++);
+                    coreInfo.remapTable.push_back({(uint8_t)coreNumber++, CoreType::Little});
                 }
                 
                 logicalCoreCount += logicalCores;
@@ -317,30 +197,229 @@ static const CoreInfo& GetCoreInfo()
     coreInfo.bigCoreCount = coreInfo.physicalCoreCount;
     
     for (int32_t i = coreInfo.bigCoreCount-1; i >= 0; --i) {
-        coreInfo.typeTable.push_back(CoreType::Big);
-        coreInfo.remapTable.push_back(i);
+        coreInfo.remapTable.push_back({(uint8_t)i, CoreType::Big});
     }
     
     #endif
     
+    // sort faster cores first in the remap table
+    sort(coreInfo.remapTable.begin(), coreInfo.remapTable.end(), [](const CoreNum& lhs, const CoreNum& rhs){
+        if (lhs.type == rhs.type)
+            return lhs.index > rhs.index;
+        
+        return lhs.type > rhs.type;
+    });
+    
     return coreInfo;
 }
 
+
+//------------------
+
+#if KRAM_MAC || KRAM_IOS
+
+void task_system::set_rr_priority(std::thread& thread, uint8_t priority)
+{
+    auto handle = thread.native_handle();
+   
+    struct sched_param param = { priority };
+    pthread_setschedparam(handle, SCHED_RR, &param);
+}
+
+void task_system::set_main_rr_priority(uint8_t priority)
+{
+    auto handle = pthread_self();
+   
+    struct sched_param param = { priority };
+    pthread_setschedparam(handle, SCHED_RR, &param);
+}
+
+void task_system::set_main_qos(ThreadQos level)
+{
+    set_qos(pthread_self(), level);
+}
+
+void task_system::set_qos(std::thread& thread, ThreadQos level)
+{
+    auto handle = thread.native_handle();
+    set_qos(handle, level);
+}
+
+void task_system::set_qos(std::thread::native_handle_type handle, ThreadQos level)
+{
+    // https://abhimuralidharan.medium.com/understanding-threads-in-ios-5b8d7ab16f09
+    // user-interactive, user-initiated, default, utility, background, unspecified
+    
+    qos_class_t qos = QOS_CLASS_UNSPECIFIED;
+    switch(level) {
+        case ThreadQos::Interactive: qos = QOS_CLASS_USER_INTERACTIVE; break;
+        case ThreadQos::High: qos = QOS_CLASS_USER_INITIATED; break;
+        case ThreadQos::Default: qos = QOS_CLASS_DEFAULT; break;
+        case ThreadQos::Medium: qos = QOS_CLASS_UTILITY; break;
+        case ThreadQos::Low: qos = QOS_CLASS_BACKGROUND; break;
+    }
+    
+    // qos is transferred to GCD jobs, and can experience thread depriority
+    // can system can try to adjust priority inversion.
+    
+    // note here the priorityOffset = 0, but is negative offsets
+    // there is a narrow range of offsets
+    
+    // note this is a start/end overide call, but can set override on existing thread
+    pthread_override_qos_class_start_np(handle, qos, 0);
+}
+
+#endif
+
+void task_system::set_affinity(std::thread& thread, uint32_t threadIndex)
+{
+    // https://eli.thegreenplace.net/2016/c11-threads-affinity-and-hyperthreading/
+    
+    auto handle = thread.native_handle();
+    
+    set_affinity(handle, threadIndex);
+}
+
+void task_system::set_main_affinity(uint32_t threadIndex)
+{
+    set_affinity(pthread_self(), threadIndex);
+}
+
+void task_system::set_affinity(std::thread::native_handle_type handle, uint32_t threadIndex)
+{
+    const auto& coreInfo = GetCoreInfo();
+    
+    if (threadIndex > coreInfo.remapTable.size())
+        threadIndex = coreInfo.remapTable.size() - 1;
+    
+    threadIndex = coreInfo.remapTable[threadIndex].index;
+    
+    // for now only allow single core mask
+    uint64_t affinityMask = ((uint64_t)1) << threadIndex;
+          
+    // These are used in most of the paths
+    macroUnusedVar(handle);
+    macroUnusedVar(affinityMask);
+    
+#if KRAM_MAC
+    #if KRAM_SSE
+    if (!coreInfo.isTranslated) {
+        thread_affinity_policy_data_t policy = { (int)affinityMask };
+
+        // TODO: consider skipping affinity on macOS altogether
+        // this is just a hint on x64-based macOS
+        int returnVal = thread_policy_set(pthread_mach_thread_np(handle), THREAD_AFFINITY_POLICY, (thread_policy_t)&policy, 1);
+        
+        if (returnVal != 0) {
+            // TODO: unsupported on iOS/M1, only have QoS and priority
+            // big P cores can also be disabled to resolve thermals
+        }
+    }
+    #endif
+    
+#elif KRAM_IOS
+    // no support
+    
+#elif KRAM_ANDROID
+    cpu_set_t cpuset;
+    CPU_ZERO(&cpuset);
+    CPU_SET(threadIndex, &cpuset);
+    
+    // convert pthread to pid
+    pid_t pid;
+    pthread_getunique_np(handle, &pid);
+    if (!sched_setaffinity(pid, sizeof(cpu_set_t), &cpuset)) {
+        // TODO: this can fail on some/all cores
+    }
+
+#elif KRAM_WIN
+    // each processor group only has 64 bits
+    DWORD_PTR mask = SetThreadAffinityMask(handle, *(const DWORD_PTR*)&affinityMask);
+    if (mask == 0) {
+        // TODO: failure case
+    }
+#else
+    // most systems are pthread-based, this is represented with array of bits
+    cpu_set_t cpuset;
+    CPU_ZERO(&cpuset);
+    CPU_SET(threadIndex, &cpuset);
+
+    // TODO: check return
+    int returnVal = pthread_setaffinity_np(handle, sizeof(cpu_set_t), &cpuset);
+    if (returnVal != 0) {
+        // TODO: linux pthread failure case
+    }
+#endif
+}
+
+void task_system::run(int32_t threadIndex)
+{
+    while (true) {
+        // pop() wait avoids a spinloop.
+
+        function<void()> f;
+
+        // start with ours, but steal from other queues if nothing found
+        // Note that if threadIndex queue is empty and stays empty
+        // then pop() below will stop using that thread.  But async_ is round-robining
+        // all work across the available queues.
+        int32_t multiple = 4;  // 32;
+        int32_t numTries = 0;
+        for (int32_t n = 0, nEnd = _count * multiple; n < nEnd; ++n) {
+            numTries++;
+
+            // break for loop if work found
+            if (_q[(threadIndex + n) % _count].try_pop(f)) {
+                break;
+            }
+        }
+
+        // numTries is 64 when queues are empty, and typically 1 when queues are full
+        //KLOGD("task_system", "thread %d searched %d tries", threadIndex, numTries);
+
+        // if no task, and nothing to steal, pop own queue if possible
+        // pop blocks until it's queue receives tasks
+        if (!f && !_q[threadIndex].pop(f)) {
+            // shutdown if tasks have all been submitted and queue marked as done.
+            if (_q[threadIndex].is_done()) {
+                KLOGD("task_system", "thread %d shutting down", threadIndex);
+
+                break;
+            }
+            else {
+                KLOGD("task_system", "no work found for %d in %d tries", threadIndex, numTries);
+
+                // keep searching
+                continue;
+            }
+        }
+
+        // do the work
+        f();
+    }
+}
+
+
+
+
 task_system::task_system(int32_t count) :
     _count(std::min(count, (int32_t)GetCoreInfo().physicalCoreCount)),
     _q{(size_t)_count},
     _index(0)
 {
+#if KRAM_IOS || KRAM_MAC
+        set_main_rr_priority(45);
+#else
+        set_main_affinity(0);
+#endif
+        
     // start up the threads
     for (int32_t threadIndex = 0; threadIndex != _count; ++threadIndex) {
         _threads.emplace_back([&, threadIndex] { run(threadIndex); });
         
 #if KRAM_IOS || KRAM_MAC
-        // No exposed affinity on Apple platforms, just this lame QoS setting
-        // which acts more like thread-priority.  Good luck monitoring
-        // work on specific threads in profile captures.  Even swift
-        // now doesn't allocate more threads than cores to avoid thread explosion.
-        set_qos(_threads.back(), ThreadQos::High);
+        // it's either this or qos
+        set_rr_priority(_threads.back(), 41);
 #else
         set_affinity(_threads.back(), threadIndex);
 #endif
diff --git a/libkram/kram/TaskSystem.h b/libkram/kram/TaskSystem.h
index 12d26fef..33beb26c 100644
--- a/libkram/kram/TaskSystem.h
+++ b/libkram/kram/TaskSystem.h
@@ -144,9 +144,24 @@ class task_system {
 
     void run(int32_t threadIndex);
 
+    // affinity isn't really supported on Apple
     void set_affinity(std::thread& thread, uint32_t threadIndex);
+    static void set_main_affinity(uint32_t threadIndex);
+
+#if KRAM_MAC || KRAM_IOS
+    // these are Apple specific, due to lack of affinity control
+    // once priority set, can't use qos
     void set_qos(std::thread& thread, ThreadQos level);
+    static void set_main_qos(ThreadQos level);
+   
+    void set_rr_priority(std::thread& thread, uint8_t priority);
+    static void set_main_rr_priority(uint8_t priority);
+#endif
     
+    // impl
+    static void set_qos(std::thread::native_handle_type handle, ThreadQos level);
+    static void set_affinity(std::thread::native_handle_type handle, uint32_t threadIndex);
+
 public:
     task_system(int32_t count = 1);
     ~task_system();

From c38fec01ff6dab3254cebb1b9ea41a2fd1da75a4 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 10 May 2022 09:13:38 -0700
Subject: [PATCH 311/901] tasks - fix index clamp

---
 libkram/kram/TaskSystem.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index fe12e50d..7a1dfd54 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -289,8 +289,9 @@ void task_system::set_affinity(std::thread::native_handle_type handle, uint32_t
 {
     const auto& coreInfo = GetCoreInfo();
     
-    if (threadIndex > coreInfo.remapTable.size())
-        threadIndex = coreInfo.remapTable.size() - 1;
+    uint32_t maxIndex = coreInfo.remapTable.size() - 1;
+    if (threadIndex > maxIndex)
+        threadIndex = maxIndex;
     
     threadIndex = coreInfo.remapTable[threadIndex].index;
     

From 5e8c740ca38af23a5a380efc4fdef1e6341f44d9 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 10 May 2022 23:29:17 -0700
Subject: [PATCH 312/901] Tasks - fix win

---
 libkram/kram/TaskSystem.cpp | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index 7a1dfd54..4b37dc36 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -124,7 +124,8 @@ static const CoreInfo& GetCoreInfo()
     
     DWORD logicalCoreCount = 0;
     DWORD physicalCoreCount = 0;
-      
+    bool isHyperthreaded = false;
+    
     DWORD returnLength = 0;
     PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = nullptr;
     DWORD rc = GetLogicalProcessorInformation(buffer, &returnLength);
@@ -132,7 +133,6 @@ static const CoreInfo& GetCoreInfo()
     DWORD byteOffset = 0;
     
     // walk the array
-    bool isHyperthreaded = false;
     ptr = buffer;
     byteOffset = 0;
     while (byteOffset + sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION) <= returnLength) {
@@ -204,10 +204,19 @@ static const CoreInfo& GetCoreInfo()
     
     // sort faster cores first in the remap table
     sort(coreInfo.remapTable.begin(), coreInfo.remapTable.end(), [](const CoreNum& lhs, const CoreNum& rhs){
+#if KRAM_ANDROID
+        // sort largest index
         if (lhs.type == rhs.type)
             return lhs.index > rhs.index;
-        
         return lhs.type > rhs.type;
+#else
+        // sort smallest index
+        if (lhs.type == rhs.type)
+            return lhs.index < rhs.index;
+        return lhs.type > rhs.type;
+#endif
+        
+       
     });
     
     return coreInfo;
@@ -282,7 +291,11 @@ void task_system::set_affinity(std::thread& thread, uint32_t threadIndex)
 
 void task_system::set_main_affinity(uint32_t threadIndex)
 {
+#if KRAM_WIN
+    set_affinity(::GetCurrentThread(), threadIndex);
+#else
     set_affinity(pthread_self(), threadIndex);
+#endif
 }
 
 void task_system::set_affinity(std::thread::native_handle_type handle, uint32_t threadIndex)
@@ -303,6 +316,7 @@ void task_system::set_affinity(std::thread::native_handle_type handle, uint32_t
     macroUnusedVar(affinityMask);
     
 #if KRAM_MAC
+    // don't use this, it's unsupported on ARM chips, and only an affinity hints on x64
     #if KRAM_SSE
     if (!coreInfo.isTranslated) {
         thread_affinity_policy_data_t policy = { (int)affinityMask };
@@ -408,10 +422,13 @@ task_system::task_system(int32_t count) :
     _q{(size_t)_count},
     _index(0)
 {
+    // see WWDC 2021 presentation here
+    // Tune CPU job scheduling for Apple silicon games
+    // https://developer.apple.com/videos/play/tech-talks/110147/
 #if KRAM_IOS || KRAM_MAC
-        set_main_rr_priority(45);
+    set_main_rr_priority(45);
 #else
-        set_main_affinity(0);
+    set_main_affinity(0);
 #endif
         
     // start up the threads

From 04ec65152376352616888188db1735ee232d677a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 15 May 2022 11:06:56 -0700
Subject: [PATCH 313/901] Tasks - add name and logging

---
 libkram/kram/TaskSystem.cpp | 340 ++++++++++++++++++++++++++++++------
 libkram/kram/TaskSystem.h   |  34 ++--
 2 files changed, 309 insertions(+), 65 deletions(-)

diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index 4b37dc36..44f47b94 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -222,39 +222,123 @@ static const CoreInfo& GetCoreInfo()
     return coreInfo;
 }
 
+//----------------------
 
-//------------------
+// Ugh C++ "portable" thread classes that don't do anything useful
+// and make you define all this over and over again in so many apps.
+// https://stackoverflow.com/questions/10121560/stdthread-naming-your-thread
+// Of course, Windows has to make portability difficult.
+// And Mac non-standardly, doesn't even pass thread to call.
+//   This requires it to be set from thread itself).
 
-#if KRAM_MAC || KRAM_IOS
+#if KRAM_WIN
 
-void task_system::set_rr_priority(std::thread& thread, uint8_t priority)
+// Isn't this in a header?
+#pragma pack(push,8)
+typedef struct tagTHREADNAME_INFO
 {
-    auto handle = thread.native_handle();
-   
-    struct sched_param param = { priority };
-    pthread_setschedparam(handle, SCHED_RR, &param);
+   DWORD dwType; // Must be 0x1000.
+   LPCSTR szName; // Pointer to name (in user addr space).
+   DWORD dwThreadID; // Thread ID (-1=caller thread).
+   DWORD dwFlags; // Reserved for future use, must be zero.
+} THREADNAME_INFO;
+#pragma pack(pop)
+
+void setThreadName(std::thread::native_handle_type handle, const char* threadName)
+{
+   DWORD threadID = ::GetThreadId(handle);
+
+   THREADNAME_INFO info;
+   info.dwType = 0x1000;
+   info.szName = threadName;
+   info.dwThreadID = threadID;
+   info.dwFlags = 0;
+
+   __try
+   {
+       // Limits to how long this name can be.  Also copy into ptr to change name.
+      RaiseException(0x406D1388, 0, sizeof(info)/sizeof(ULONG_PTR), (ULONG_PTR*)&info);
+   }
+   __except(EXCEPTION_EXECUTE_HANDLER)
+   {
+   }
+}
+
+void setCurrentThreadName(const char* threadName)
+{
+    setThreadName(GetCurrentThread(), threadName);
 }
 
-void task_system::set_main_rr_priority(uint8_t priority)
+void setThreadName(std::thread& thread, const char* threadName)
+{
+    DWORD threadId = ::GetThreadId(thread.native_handle());
+    setThreadName(threadId, threadName);
+}
+
+#elif KRAM_MAC || KRAM_IOS
+
+void setThreadName(std::thread::native_handle_type macroUnusedArg(handle), const char* threadName)
+{
+    // This can only set on self
+    int val = pthread_setname_np(threadName);
+    if (val != 0)
+        KLOGW("Thread", "Could not set thread name");
+}
+
+void setCurrentThreadName(const char* threadName)
 {
     auto handle = pthread_self();
-   
-    struct sched_param param = { priority };
-    pthread_setschedparam(handle, SCHED_RR, &param);
+    setThreadName(handle, threadName);
 }
 
-void task_system::set_main_qos(ThreadQos level)
+// This doesn't exist on macOS. What a pain.  Doesn't line up with getter calls.
+// Means can't set threadName externally without telling thread to wake and set itself.
+//void setThreadName(std::thread& thread, const char* threadName)
+//{
+//    auto handle = thread.native_handle();
+//    setThreadName(handle, threadName);
+//}
+
+#else
+
+void setThreadName(std::thread::native_handle_type handle, const char* threadName)
 {
-    set_qos(pthread_self(), level);
+    // This can only set on self
+    int val = pthread_setname_np(handle, threadName);
+    if (val != 0)
+        KLOGW("Thread", "Could not set thread name");
 }
 
-void task_system::set_qos(std::thread& thread, ThreadQos level)
+void setCurrentThreadName(const char* threadName)
+{
+    auto handle = pthread_self();
+    setThreadName(handle, threadName);
+}
+
+void setThreadName(std::thread& thread, const char* threadName)
 {
     auto handle = thread.native_handle();
-    set_qos(handle, level);
+    setThreadName(handle, threadName);
+}
+
+#endif
+
+//------------------
+
+#if SUPPORT_PRIORITY
+#if KRAM_MAC || KRAM_IOS
+
+static void setThreadPriority(std::thread::native_handle_type handle, uint8_t priority)
+{
+    struct sched_param param = { priority };
+
+    // this sets policy to round-robin and priority
+    int val = pthread_setschedparam(handle, SCHED_RR, &param);
+    if (val != 0)
+        KLOGW("Thread", "Failed to set priority %d", priority);
 }
 
-void task_system::set_qos(std::thread::native_handle_type handle, ThreadQos level)
+static void setThreadQos(std::thread::native_handle_type handle, ThreadQos level)
 {
     // https://abhimuralidharan.medium.com/understanding-threads-in-ios-5b8d7ab16f09
     // user-interactive, user-initiated, default, utility, background, unspecified
@@ -275,30 +359,93 @@ void task_system::set_qos(std::thread::native_handle_type handle, ThreadQos leve
     // there is a narrow range of offsets
     
     // note this is a start/end overide call, but can set override on existing thread
-    pthread_override_qos_class_start_np(handle, qos, 0);
+    // TODO: this returns a newly allocated object which isn't released here
+    // need to release with pthread_override_qos_class_end_np(override);
+    auto val = pthread_override_qos_class_start_np(handle, qos, 0);
+    if (val != nullptr)
+        KLOGW("Thread", "Failed to set qos %d", (int)qos);
 }
 
-#endif
+void task_system::set_priority(std::thread& thread, uint8_t priority)
+{
+    setThreadPriority(thread.native_handle(), priority);
+}
 
-void task_system::set_affinity(std::thread& thread, uint32_t threadIndex)
+void task_system::set_current_priority(uint8_t priority)
+{
+    setThreadPriority(pthread_self(), priority);
+}
+
+void task_system::set_current_qos(ThreadQos level)
+{
+    setThreadQos(pthread_self(), level);
+}
+
+void task_system::set_qos(std::thread& thread, ThreadQos level)
 {
-    // https://eli.thegreenplace.net/2016/c11-threads-affinity-and-hyperthreading/
-    
     auto handle = thread.native_handle();
+    setThreadQos(handle, level);
+}
+
+
+
+#elif KRAM_ANDROID
+
+void setThreadPriority(std::thread::native_handle_type handle, uint8_t priority)
+{
+    struct sched_param param = { priority };
     
-    set_affinity(handle, threadIndex);
+    // Android doesn not allow policy change (prob SCHED_OTHER), and only allows setting priority;
+    // Only from Android 10 (API 28).
+    int val = pthread_setschedprio(handle, priority);
+    if (val != 0)
+        KLOGW("Thread", "Failed to set priority %d", priority);
 }
 
-void task_system::set_main_affinity(uint32_t threadIndex)
+
+static uint8_t convertQosToPriority(ThreadQos level)
 {
-#if KRAM_WIN
-    set_affinity(::GetCurrentThread(), threadIndex);
-#else
-    set_affinity(pthread_self(), threadIndex);
-#endif
+    // TODO: fix these priorities.  Linux had 20 to -20 as priorities
+    // but unclear what Android wants set from the docs.
+    uint8_t priority = 30;
+    switch(level) {
+        case ThreadQos::Interactive: priority = 45; break;
+        case ThreadQos::High: priority = 41; break;
+        case ThreadQos::Default: priority = 31; break;
+        case ThreadQos::Medium: priority = 20; break;
+        case ThreadQos::Low: priority = 10; break;
+    }
+    return priority;
+}
+
+void task_system::set_priority(std::thread& thread, uint8_t priority)
+{
+    setThreadPriority(thread.native_handle(), priority);
+}
+
+void task_system::set_current_priority(uint8_t priority)
+{
+    setThreadPriority(pthread_self(), priority);
 }
 
-void task_system::set_affinity(std::thread::native_handle_type handle, uint32_t threadIndex)
+
+void task_system::set_main_qos(ThreadQos level)
+{
+    uint8_t priority = convertQosToPriority(level);
+    set_current_priority(priority);
+}
+
+void task_system::set_qos(std::thread& thread, ThreadQos level)
+{
+    uint8_t priority = convertQosToPriority(level);
+    set_priority(thread, priority);
+}
+#endif
+#endif
+
+#if SUPPORT_AFFINITY
+
+static void setThreadAffinity(std::thread::native_handle_type handle, uint32_t threadIndex)
 {
     const auto& coreInfo = GetCoreInfo();
     
@@ -317,20 +464,20 @@ void task_system::set_affinity(std::thread::native_handle_type handle, uint32_t
     
 #if KRAM_MAC
     // don't use this, it's unsupported on ARM chips, and only an affinity hints on x64
-    #if KRAM_SSE
-    if (!coreInfo.isTranslated) {
-        thread_affinity_policy_data_t policy = { (int)affinityMask };
-
-        // TODO: consider skipping affinity on macOS altogether
-        // this is just a hint on x64-based macOS
-        int returnVal = thread_policy_set(pthread_mach_thread_np(handle), THREAD_AFFINITY_POLICY, (thread_policy_t)&policy, 1);
-        
-        if (returnVal != 0) {
-            // TODO: unsupported on iOS/M1, only have QoS and priority
-            // big P cores can also be disabled to resolve thermals
-        }
-    }
-    #endif
+//    #if KRAM_SSE
+//    if (!coreInfo.isTranslated) {
+//        thread_affinity_policy_data_t policy = { (int)affinityMask };
+//
+//        // TODO: consider skipping affinity on macOS altogether
+//        // this is just a hint on x64-based macOS
+//        int returnVal = thread_policy_set(pthread_mach_thread_np(handle), THREAD_AFFINITY_POLICY, (thread_policy_t)&policy, 1);
+//
+//        if (returnVal != 0) {
+//            // TODO: unsupported on iOS/M1, only have QoS and priority
+//            // big P cores can also be disabled to resolve thermal throttling
+//        }
+//    }
+//    #endif
     
 #elif KRAM_IOS
     // no support
@@ -367,6 +514,25 @@ void task_system::set_affinity(std::thread::native_handle_type handle, uint32_t
 #endif
 }
 
+void task_system::set_affinity(std::thread& thread, uint32_t threadIndex)
+{
+    // https://eli.thegreenplace.net/2016/c11-threads-affinity-and-hyperthreading/
+    auto handle = thread.native_handle();
+    setThreadAffinity(handle, threadIndex);
+}
+
+void task_system::set_main_affinity(uint32_t threadIndex)
+{
+#if KRAM_WIN
+    setThreadAffinity(::GetCurrentThread(), threadIndex);
+#else
+    setThreadAffinity(pthread_self(), threadIndex);
+#endif
+}
+
+
+#endif
+
 void task_system::run(int32_t threadIndex)
 {
     while (true) {
@@ -425,24 +591,94 @@ task_system::task_system(int32_t count) :
     // see WWDC 2021 presentation here
     // Tune CPU job scheduling for Apple silicon games
     // https://developer.apple.com/videos/play/tech-talks/110147/
-#if KRAM_IOS || KRAM_MAC
-    set_main_rr_priority(45);
-#else
-    set_main_affinity(0);
+#if SUPPORT_PRIORITY
+    set_current_priority(45);
 #endif
         
+#if SUPPORT_AFFINITY
+    set_current_affinity(0);
+#endif
+        
+    setCurrentThreadName("Main");
+        
+    // Note that running work on core0 when core0 may starve it
+    // from assigning work to threads.
+        
     // start up the threads
+    string name;
     for (int32_t threadIndex = 0; threadIndex != _count; ++threadIndex) {
-        _threads.emplace_back([&, threadIndex] { run(threadIndex); });
         
-#if KRAM_IOS || KRAM_MAC
+        // Generate a name, also corresponds to core for affinity
+        // May want to include priority too.
+        sprintf(name, "Task%d", threadIndex);
+        _threadNames.push_back(name);
+        
+        _threads.emplace_back([&, threadIndex, name] {
+            // Have to set name from thread only for Apple.
+            setCurrentThreadName(name.c_str());
+            
+            run(threadIndex);
+        });
+        
+#if SUPPORT_PRIORITY
         // it's either this or qos
-        set_rr_priority(_threads.back(), 41);
-#else
+        set_priority(_threads.back(), 41);
+#endif
+        
+#if SUPPORT_AFFINITY
         set_affinity(_threads.back(), threadIndex);
 #endif
     }
+        
+    // dump out thread data
+    log_threads();
+}
+
+struct ThreadInfo {
+    const char* name;
+    int policy;
+    int priority;
+    int affinity; // single core for now
+};
+
+static void getThreadInfo(std::thread::native_handle_type handle, int& policy, int& priority)
+{
+#if KRAM_MAC || KRAM_IOS || KRAM_ANDROID
+    struct sched_param priorityVal;
+    int val = pthread_getschedparam(handle, &policy, &priorityVal);
+    if (val != 0)
+        KLOGW("Thread", "failed to retrieve thread data");
+    priority = priorityVal.sched_priority;
+#endif
+}
+
+
+void task_system::log_threads()
+{
+    ThreadInfo info = {};
+    info.name = "Main";
+#if SUPPORT_AFFINITY
+    info.affinity = 0;
+#endif
     
+    getThreadInfo(pthread_self(), info.policy, info.priority);
+    KLOGI("Thread", "Thread:%s (pol:%d pri:%d aff:%d)",
+          info.name, info.policy, info.priority, info.affinity);
+    
+    for (uint32_t i = 0; i < _threads.size(); ++i)
+    {
+        info.policy = 0;
+        info.priority = 0;
+        info.name = _threadNames[i].c_str();
+#if SUPPORT_AFFINITY
+        // TODO: if more tasks/threads than cores, then this isn't accurate
+        // but don't want to write a getter for this right now.
+        info.affinity = i;
+#endif
+        getThreadInfo(_threads[i].native_handle(), info.policy, info.priority);
+        KLOGI("Thread", "Thread:%s (pol:%d pri:%d aff:%d)",
+              info.name, info.policy, info.priority, info.affinity);
+    }
 }
 
 task_system::~task_system()
@@ -451,7 +687,7 @@ task_system::~task_system()
     for (auto& e : _q)
         e.set_done();
 
-    // wait until threads are all done, but joining each thread
+    // wait until threads are all done by joining each thread
     for (auto& e : _threads)
         e.join();
 }
diff --git a/libkram/kram/TaskSystem.h b/libkram/kram/TaskSystem.h
index 33beb26c..746d15f6 100644
--- a/libkram/kram/TaskSystem.h
+++ b/libkram/kram/TaskSystem.h
@@ -122,6 +122,10 @@ class notification_queue {
     type(const type&) = delete; \
     void operator=(const type&) = delete
 
+#define SUPPORT_AFFINITY (KRAM_ANDROID || KRAM_WIN)
+#define SUPPORT_PRIORITY (KRAM_MAC || KRAM_IOS || KRAM_ANDROID)
+
+
 // only for ioS/macOS
 enum class ThreadQos
 {
@@ -137,31 +141,35 @@ class task_system {
 
     const int32_t _count;
     vector<std::thread> _threads;
-
+    
+    // want to store with thread itself, but no field.  Also have affinity, priority data.
+    vector<string> _threadNames;
+    
     // currently one queue to each thread, but can steal from other queues
     vector<notification_queue> _q;
     std::atomic<int32_t> _index;
 
     void run(int32_t threadIndex);
 
+#if SUPPORT_AFFINITY
     // affinity isn't really supported on Apple
     void set_affinity(std::thread& thread, uint32_t threadIndex);
-    static void set_main_affinity(uint32_t threadIndex);
-
-#if KRAM_MAC || KRAM_IOS
+    static void set_current_affinity(uint32_t threadIndex);
+#endif
+    
+#if SUPPORT_PRIORITY
     // these are Apple specific, due to lack of affinity control
-    // once priority set, can't use qos
+    // once priority set, can't use qos.  Also Android can't control
+    // policy, only the priority in API 28.
     void set_qos(std::thread& thread, ThreadQos level);
-    static void set_main_qos(ThreadQos level);
-   
-    void set_rr_priority(std::thread& thread, uint8_t priority);
-    static void set_main_rr_priority(uint8_t priority);
+    static void set_current_qos(ThreadQos level);
+
+    void set_priority(std::thread& thread, uint8_t priority);
+    static void set_current_priority(uint8_t priority);
 #endif
     
-    // impl
-    static void set_qos(std::thread::native_handle_type handle, ThreadQos level);
-    static void set_affinity(std::thread::native_handle_type handle, uint32_t threadIndex);
-
+    void log_threads();
+    
 public:
     task_system(int32_t count = 1);
     ~task_system();

From f92b3268346a5e8bc07ac0644326052514eabcb5 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 15 May 2022 11:27:29 -0700
Subject: [PATCH 314/901] kram - fix tasks on Win

---
 libkram/kram/TaskSystem.cpp | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index 44f47b94..211439b6 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -266,7 +266,7 @@ void setThreadName(std::thread::native_handle_type handle, const char* threadNam
 
 void setCurrentThreadName(const char* threadName)
 {
-    setThreadName(GetCurrentThread(), threadName);
+    setThreadName(::GetCurrentThread(), threadName);
 }
 
 void setThreadName(std::thread& thread, const char* threadName)
@@ -429,7 +429,7 @@ void task_system::set_current_priority(uint8_t priority)
 }
 
 
-void task_system::set_main_qos(ThreadQos level)
+void task_system::set_current_qos(ThreadQos level)
 {
     uint8_t priority = convertQosToPriority(level);
     set_current_priority(priority);
@@ -521,7 +521,7 @@ void task_system::set_affinity(std::thread& thread, uint32_t threadIndex)
     setThreadAffinity(handle, threadIndex);
 }
 
-void task_system::set_main_affinity(uint32_t threadIndex)
+void task_system::set_current_affinity(uint32_t threadIndex)
 {
 #if KRAM_WIN
     setThreadAffinity(::GetCurrentThread(), threadIndex);
@@ -661,7 +661,11 @@ void task_system::log_threads()
     info.affinity = 0;
 #endif
     
+#if KRAM_WIN
+    getThreadInfo(GetCurrentThread(), info.policy, info.priority);
+#else
     getThreadInfo(pthread_self(), info.policy, info.priority);
+#endif
     KLOGI("Thread", "Thread:%s (pol:%d pri:%d aff:%d)",
           info.name, info.policy, info.priority, info.affinity);
     

From e9e1b50354674cd643253fe282b7e41c135ad563 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 15 May 2022 11:38:24 -0700
Subject: [PATCH 315/901] kram - fix Win tasks

---
 libkram/kram/TaskSystem.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index 211439b6..855a8018 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -271,8 +271,7 @@ void setCurrentThreadName(const char* threadName)
 
 void setThreadName(std::thread& thread, const char* threadName)
 {
-    DWORD threadId = ::GetThreadId(thread.native_handle());
-    setThreadName(threadId, threadName);
+    setThreadName(thread.native_handle(), threadName);
 }
 
 #elif KRAM_MAC || KRAM_IOS

From f9e2565de12d4050bed7558aeb1524e8edef2451 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 15 May 2022 12:55:53 -0700
Subject: [PATCH 316/901] kram - task priority for Win

Commented out until priority remap finished.
---
 libkram/kram/TaskSystem.cpp | 76 ++++++++++++++++++++++++++++++++++---
 libkram/kram/TaskSystem.h   |  2 +-
 2 files changed, 72 insertions(+), 6 deletions(-)

diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index 855a8018..d80449b6 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -166,13 +166,17 @@ static const CoreInfo& GetCoreInfo()
                 uint32_t logicalCores = CountSetBits(ptr->ProcessorMask);
                 if (logicalCores > 1 || !isHyperthreaded) {
                     coreInfo.bigCoreCount++;
-                    coreInfo.remapTable.push_back({(uint8_t)coreNumber++, CoreType::Big});
+                    coreInfo.remapTable.push_back({(uint8_t)coreNumber, CoreType::Big});
                 }
                 else {
                     coreInfo.littleCoreCount++;
-                    coreInfo.remapTable.push_back({(uint8_t)coreNumber++, CoreType::Little});
+                    coreInfo.remapTable.push_back({(uint8_t)coreNumber, CoreType::Little});
                 }
                 
+                // Is this the correct index for physical cores?
+                // Always go through remap table
+                coreNumber += logicalCores;
+                
                 logicalCoreCount += logicalCores;
                 break;
             }
@@ -392,13 +396,14 @@ void task_system::set_qos(std::thread& thread, ThreadQos level)
 
 void setThreadPriority(std::thread::native_handle_type handle, uint8_t priority)
 {
+/* TODO: finish priority remap first
     struct sched_param param = { priority };
-    
-    // Android doesn not allow policy change (prob SCHED_OTHER), and only allows setting priority;
-    // Only from Android 10 (API 28).
+   
+    // Win has 0 to 15 normal, then 16-31 real time priority
     int val = pthread_setschedprio(handle, priority);
     if (val != 0)
         KLOGW("Thread", "Failed to set priority %d", priority);
+*/
 }
 
 
@@ -439,6 +444,56 @@ void task_system::set_qos(std::thread& thread, ThreadQos level)
     uint8_t priority = convertQosToPriority(level);
     set_priority(thread, priority);
 }
+
+#elif KRAM_WIN
+
+static uint8_t convertQosToPriority(ThreadQos level)
+{
+    // TODO: fix these priorities.  Linux had 20 to -20 as priorities
+    // but unclear what Android wants set from the docs.
+    uint8_t priority = 30;
+    switch(level) {
+        case ThreadQos::Interactive: priority = 45; break;
+        case ThreadQos::High: priority = 41; break;
+        case ThreadQos::Default: priority = 31; break;
+        case ThreadQos::Medium: priority = 20; break;
+        case ThreadQos::Low: priority = 10; break;
+    }
+    return priority;
+}
+
+void setThreadPriority(std::thread::native_handle_type handle, uint8_t priority)
+{
+/* TODO: finish priority remap first
+
+    BOOL success = SetThreadPriority(handle, priority);
+    if (!success)
+        LOGW("Thread", "Failed to set priority %d", priority);
+*/
+}
+
+void task_system::set_priority(std::thread& thread, uint8_t priority)
+{
+    setThreadPriority(thread.native_handle(), priority);
+}
+
+void task_system::set_current_priority(uint8_t priority)
+{
+    setThreadPriority(pthread_self(), priority);
+}
+
+void task_system::set_current_qos(ThreadQos level)
+{
+    uint8_t priority = convertQosToPriority(level);
+    set_current_priority(priority);
+}
+
+void task_system::set_qos(std::thread& thread, ThreadQos level)
+{
+    uint8_t priority = convertQosToPriority(level);
+    set_priority(thread, priority);
+}
+
 #endif
 #endif
 
@@ -648,6 +703,17 @@ static void getThreadInfo(std::thread::native_handle_type handle, int& policy, i
     if (val != 0)
         KLOGW("Thread", "failed to retrieve thread data");
     priority = priorityVal.sched_priority;
+#elif KRAM_WIN
+    // all threads same policy on Win?
+    // https://www.microsoftpressstore.com/articles/article.aspx?p=2233328&seqNum=7#:~:text=Windows%20never%20adjusts%20the%20priority,the%20process%20that%20created%20it.
+    
+    // scheduling based on process priority class, thread priority is +/- offset
+    // DWORD priorityClass = GetPriorityClass(GetCurrentProcess());
+    
+    // The handle must have the THREAD_QUERY_INFORMATION or THREAD_QUERY_LIMITED_INFORMATION access right.
+    priority = GetThreadPriority(handle);
+    if (priority == THREAD_PRIORITY_ERROR_RETURN)
+        priority = 0;
 #endif
 }
 
diff --git a/libkram/kram/TaskSystem.h b/libkram/kram/TaskSystem.h
index 746d15f6..632d0fc9 100644
--- a/libkram/kram/TaskSystem.h
+++ b/libkram/kram/TaskSystem.h
@@ -123,7 +123,7 @@ class notification_queue {
     void operator=(const type&) = delete
 
 #define SUPPORT_AFFINITY (KRAM_ANDROID || KRAM_WIN)
-#define SUPPORT_PRIORITY (KRAM_MAC || KRAM_IOS || KRAM_ANDROID)
+#define SUPPORT_PRIORITY (KRAM_MAC || KRAM_IOS || KRAM_ANDROID || KRAM_WIN)
 
 
 // only for ioS/macOS

From ba528ee88c4ffd6632fa11d0ecc2e11a93138da9 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 15 May 2022 13:05:33 -0700
Subject: [PATCH 317/901] kram - fix Win

---
 libkram/kram/TaskSystem.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index d80449b6..c89c967d 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -479,7 +479,7 @@ void task_system::set_priority(std::thread& thread, uint8_t priority)
 
 void task_system::set_current_priority(uint8_t priority)
 {
-    setThreadPriority(pthread_self(), priority);
+    setThreadPriority(::GetCurrentThread(), priority);
 }
 
 void task_system::set_current_qos(ThreadQos level)

From 489f94e4b5fe0cf7aab5a5f83839399a4b5929f4 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 15 May 2022 14:58:34 -0700
Subject: [PATCH 318/901] kram - task cleanup

---
 libkram/kram/TaskSystem.cpp | 97 +++++++++++++++++--------------------
 1 file changed, 44 insertions(+), 53 deletions(-)

diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index c89c967d..a2990c4e 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -228,6 +228,15 @@ static const CoreInfo& GetCoreInfo()
 
 //----------------------
 
+std::thread::native_handle_type getCurrentThread()
+{
+#if KRAM_WIN
+    return ::GetCurrentThread();
+#else
+    return pthread_self();
+#endif
+}
+
 // Ugh C++ "portable" thread classes that don't do anything useful
 // and make you define all this over and over again in so many apps.
 // https://stackoverflow.com/questions/10121560/stdthread-naming-your-thread
@@ -270,7 +279,7 @@ void setThreadName(std::thread::native_handle_type handle, const char* threadNam
 
 void setCurrentThreadName(const char* threadName)
 {
-    setThreadName(::GetCurrentThread(), threadName);
+    setThreadName(getCurrentThread(), threadName);
 }
 
 void setThreadName(std::thread& thread, const char* threadName)
@@ -290,8 +299,7 @@ void setThreadName(std::thread::native_handle_type macroUnusedArg(handle), const
 
 void setCurrentThreadName(const char* threadName)
 {
-    auto handle = pthread_self();
-    setThreadName(handle, threadName);
+    setThreadName(getCurrentThread(), threadName);
 }
 
 // This doesn't exist on macOS. What a pain.  Doesn't line up with getter calls.
@@ -314,14 +322,12 @@ void setThreadName(std::thread::native_handle_type handle, const char* threadNam
 
 void setCurrentThreadName(const char* threadName)
 {
-    auto handle = pthread_self();
-    setThreadName(handle, threadName);
+    setThreadName(getCurrentThread(), threadName);
 }
 
 void setThreadName(std::thread& thread, const char* threadName)
 {
-    auto handle = thread.native_handle();
-    setThreadName(handle, threadName);
+    setThreadName(thread.native_handle(), threadName);
 }
 
 #endif
@@ -376,18 +382,17 @@ void task_system::set_priority(std::thread& thread, uint8_t priority)
 
 void task_system::set_current_priority(uint8_t priority)
 {
-    setThreadPriority(pthread_self(), priority);
+    setThreadPriority(getCurrentThread(), priority);
 }
 
 void task_system::set_current_qos(ThreadQos level)
 {
-    setThreadQos(pthread_self(), level);
+    setThreadQos(getCurrentThread(), level);
 }
 
 void task_system::set_qos(std::thread& thread, ThreadQos level)
 {
-    auto handle = thread.native_handle();
-    setThreadQos(handle, level);
+    setThreadQos(thread.native_handle(), level);
 }
 
 
@@ -429,7 +434,7 @@ void task_system::set_priority(std::thread& thread, uint8_t priority)
 
 void task_system::set_current_priority(uint8_t priority)
 {
-    setThreadPriority(pthread_self(), priority);
+    setThreadPriority(getCurrentThread(), priority);
 }
 
 
@@ -479,7 +484,7 @@ void task_system::set_priority(std::thread& thread, uint8_t priority)
 
 void task_system::set_current_priority(uint8_t priority)
 {
-    setThreadPriority(::GetCurrentThread(), priority);
+    setThreadPriority(getCurrentThread(), priority);
 }
 
 void task_system::set_current_qos(ThreadQos level)
@@ -571,17 +576,12 @@ static void setThreadAffinity(std::thread::native_handle_type handle, uint32_t t
 void task_system::set_affinity(std::thread& thread, uint32_t threadIndex)
 {
     // https://eli.thegreenplace.net/2016/c11-threads-affinity-and-hyperthreading/
-    auto handle = thread.native_handle();
-    setThreadAffinity(handle, threadIndex);
+    setThreadAffinity(thread.native_handle(), threadIndex);
 }
 
 void task_system::set_current_affinity(uint32_t threadIndex)
 {
-#if KRAM_WIN
-    setThreadAffinity(::GetCurrentThread(), threadIndex);
-#else
-    setThreadAffinity(pthread_self(), threadIndex);
-#endif
+    setThreadAffinity(getCurrentThread(), threadIndex);
 }
 
 
@@ -634,8 +634,25 @@ void task_system::run(int32_t threadIndex)
     }
 }
 
+struct ThreadInfo {
+    const char* name;
+    int policy;
+    int priority;
+    int affinity; // single core for now
+};
+
+// This only works for current thread, but simplifies setting several thread params.
+void setThreadInfo(ThreadInfo& info) {
+    setCurrentThreadName(info.name);
 
+    #if SUPPORT_PRIORITY
+    setThreadPriority(getCurrentThread(), info.priority);
+    #endif
 
+    #if SUPPORT_AFFINITY
+    setThreadAffinity(getCurrentThread(), info.affinity);
+    #endif
+}
 
 task_system::task_system(int32_t count) :
     _count(std::min(count, (int32_t)GetCoreInfo().physicalCoreCount)),
@@ -645,16 +662,9 @@ task_system::task_system(int32_t count) :
     // see WWDC 2021 presentation here
     // Tune CPU job scheduling for Apple silicon games
     // https://developer.apple.com/videos/play/tech-talks/110147/
-#if SUPPORT_PRIORITY
-    set_current_priority(45);
-#endif
-        
-#if SUPPORT_AFFINITY
-    set_current_affinity(0);
-#endif
-        
-    setCurrentThreadName("Main");
-        
+    ThreadInfo infoMain = { "Main", 0, 45, 0 };
+    setThreadInfo(infoMain);
+    
     // Note that running work on core0 when core0 may starve it
     // from assigning work to threads.
         
@@ -668,32 +678,17 @@ task_system::task_system(int32_t count) :
         _threadNames.push_back(name);
         
         _threads.emplace_back([&, threadIndex, name] {
-            // Have to set name from thread only for Apple.
-            setCurrentThreadName(name.c_str());
-            
+            ThreadInfo infoTask = { name.c_str(), 0, 41, threadIndex };
+            setThreadInfo(infoTask);
+
             run(threadIndex);
         });
-        
-#if SUPPORT_PRIORITY
-        // it's either this or qos
-        set_priority(_threads.back(), 41);
-#endif
-        
-#if SUPPORT_AFFINITY
-        set_affinity(_threads.back(), threadIndex);
-#endif
     }
         
     // dump out thread data
     log_threads();
 }
 
-struct ThreadInfo {
-    const char* name;
-    int policy;
-    int priority;
-    int affinity; // single core for now
-};
 
 static void getThreadInfo(std::thread::native_handle_type handle, int& policy, int& priority)
 {
@@ -726,11 +721,7 @@ void task_system::log_threads()
     info.affinity = 0;
 #endif
     
-#if KRAM_WIN
-    getThreadInfo(GetCurrentThread(), info.policy, info.priority);
-#else
-    getThreadInfo(pthread_self(), info.policy, info.priority);
-#endif
+    getThreadInfo(getCurrentThread(), info.policy, info.priority);
     KLOGI("Thread", "Thread:%s (pol:%d pri:%d aff:%d)",
           info.name, info.policy, info.priority, info.affinity);
     

From 4aaac61959348404a524bb6a41006e0581ae081a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 21 May 2022 10:02:05 -0700
Subject: [PATCH 319/901] kram - switch to C++20, disable clang modules, switch
 to header includes

Required disabling clang modules.  These seem to just break all Objective-C++ compilation.  May need to use C++20 modules, but those don't work for ObjC/C++ libraries.
---
 build2/kram.xcodeproj/project.pbxproj         |  8 ++++----
 build2/kramc.xcodeproj/project.pbxproj        |  8 ++++----
 build2/kramv.xcodeproj/project.pbxproj        | 20 ++++++-------------
 gtlf/GLTF/GLTF.h                              |  5 +++--
 gtlf/GLTF/GLTF.xcodeproj/project.pbxproj      |  8 ++++----
 .../Headers/Extensions/GLTFExtensionNames.h   |  3 ++-
 gtlf/GLTF/Headers/Extensions/GLTFKHRLight.h   |  2 +-
 gtlf/GLTF/Headers/GLTFAccessor.h              |  2 +-
 gtlf/GLTF/Headers/GLTFAsset.h                 |  2 +-
 gtlf/GLTF/Headers/GLTFBinaryChunk.h           |  3 ++-
 gtlf/GLTF/Headers/GLTFBufferAllocator.h       |  2 +-
 gtlf/GLTF/Headers/GLTFCamera.h                |  2 +-
 .../GLTF/Headers/GLTFDefaultBufferAllocator.h |  2 +-
 gtlf/GLTF/Headers/GLTFEnums.h                 |  5 ++++-
 gtlf/GLTF/Headers/GLTFImage.h                 |  2 +-
 gtlf/GLTF/Headers/GLTFMaterial.h              |  2 +-
 gtlf/GLTF/Headers/GLTFNode.h                  |  2 +-
 gtlf/GLTF/Headers/GLTFNodeVisitor.h           |  2 +-
 gtlf/GLTF/Headers/GLTFObject.h                |  4 +++-
 gtlf/GLTF/Headers/GLTFSkin.h                  |  2 +-
 gtlf/GLTF/Headers/GLTFTexture.h               |  2 +-
 gtlf/GLTF/Headers/GLTFUtilities.h             |  4 ++--
 gtlf/GLTF/Headers/GLTFVertexDescriptor.h      |  2 +-
 gtlf/GLTFMTL/GLTFMTL.h                        |  4 ++--
 .../GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj |  9 +++++----
 gtlf/GLTFMTL/Headers/GLTFMTLBufferAllocator.h |  7 +++++--
 .../Headers/GLTFMTLLightingEnvironment.h      |  7 +++++--
 gtlf/GLTFMTL/Headers/GLTFMTLRenderer.h        |  4 ++--
 gtlf/GLTFMTL/Headers/GLTFMTLShaderBuilder.h   |  2 +-
 gtlf/GLTFMTL/Headers/GLTFMTLTextureLoader.h   |  5 ++++-
 gtlf/GLTFMTL/Headers/GLTFMTLUtilities.h       |  8 ++++++--
 kram-thumb/KramThumbnailProvider.mm           |  3 ++-
 kramv/KramRenderer.h                          | 14 +++++++------
 kramv/KramRenderer.mm                         |  6 +++---
 kramv/KramViewerMain.mm                       | 16 +++++++--------
 libkram/kram/TaskSystem.cpp                   |  4 ++--
 36 files changed, 100 insertions(+), 83 deletions(-)

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index 85574ec7..856e7154 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -1619,7 +1619,7 @@
 				ALWAYS_SEARCH_USER_PATHS = NO;
 				CLANG_ANALYZER_NONNULL = YES;
 				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
-				CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
 				CLANG_CXX_LIBRARY = "libc++";
 				CLANG_ENABLE_MODULES = YES;
 				CLANG_ENABLE_OBJC_ARC = YES;
@@ -1676,7 +1676,7 @@
 				GCC_WARN_UNUSED_VARIABLE = YES;
 				HEADER_SEARCH_PATHS = "";
 				IPHONEOS_DEPLOYMENT_TARGET = 14.1;
-				MACOSX_DEPLOYMENT_TARGET = 10.15;
+				MACOSX_DEPLOYMENT_TARGET = 11.0;
 				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
 				MTL_FAST_MATH = YES;
 				ONLY_ACTIVE_ARCH = YES;
@@ -1703,7 +1703,7 @@
 				ALWAYS_SEARCH_USER_PATHS = NO;
 				CLANG_ANALYZER_NONNULL = YES;
 				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
-				CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
 				CLANG_CXX_LIBRARY = "libc++";
 				CLANG_ENABLE_MODULES = YES;
 				CLANG_ENABLE_OBJC_ARC = YES;
@@ -1754,7 +1754,7 @@
 				GCC_WARN_UNUSED_VARIABLE = YES;
 				HEADER_SEARCH_PATHS = "";
 				IPHONEOS_DEPLOYMENT_TARGET = 14.1;
-				MACOSX_DEPLOYMENT_TARGET = 10.15;
+				MACOSX_DEPLOYMENT_TARGET = 11.0;
 				MTL_ENABLE_DEBUG_INFO = NO;
 				MTL_FAST_MATH = YES;
 				OTHER_CFLAGS = (
diff --git a/build2/kramc.xcodeproj/project.pbxproj b/build2/kramc.xcodeproj/project.pbxproj
index 9def6b8d..0c1c5c27 100644
--- a/build2/kramc.xcodeproj/project.pbxproj
+++ b/build2/kramc.xcodeproj/project.pbxproj
@@ -152,7 +152,7 @@
 				ALWAYS_SEARCH_USER_PATHS = NO;
 				CLANG_ANALYZER_NONNULL = YES;
 				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
-				CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
 				CLANG_CXX_LIBRARY = "libc++";
 				CLANG_ENABLE_MODULES = YES;
 				CLANG_ENABLE_OBJC_ARC = YES;
@@ -197,7 +197,7 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
-				MACOSX_DEPLOYMENT_TARGET = 10.15;
+				MACOSX_DEPLOYMENT_TARGET = 11.0;
 				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
 				MTL_FAST_MATH = YES;
 				ONLY_ACTIVE_ARCH = YES;
@@ -211,7 +211,7 @@
 				ALWAYS_SEARCH_USER_PATHS = NO;
 				CLANG_ANALYZER_NONNULL = YES;
 				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
-				CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
 				CLANG_CXX_LIBRARY = "libc++";
 				CLANG_ENABLE_MODULES = YES;
 				CLANG_ENABLE_OBJC_ARC = YES;
@@ -250,7 +250,7 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
-				MACOSX_DEPLOYMENT_TARGET = 10.15;
+				MACOSX_DEPLOYMENT_TARGET = 11.0;
 				MTL_ENABLE_DEBUG_INFO = NO;
 				MTL_FAST_MATH = YES;
 				SDKROOT = macosx;
diff --git a/build2/kramv.xcodeproj/project.pbxproj b/build2/kramv.xcodeproj/project.pbxproj
index cda84c46..060736bb 100644
--- a/build2/kramv.xcodeproj/project.pbxproj
+++ b/build2/kramv.xcodeproj/project.pbxproj
@@ -506,7 +506,7 @@
 				ALWAYS_SEARCH_USER_PATHS = NO;
 				CLANG_ANALYZER_NONNULL = YES;
 				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
-				CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
 				CLANG_CXX_LIBRARY = "libc++";
 				CLANG_ENABLE_MODULES = YES;
 				CLANG_ENABLE_OBJC_ARC = YES;
@@ -553,7 +553,7 @@
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
 				HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram";
-				MACOSX_DEPLOYMENT_TARGET = 10.15;
+				MACOSX_DEPLOYMENT_TARGET = 11.0;
 				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
 				MTL_FAST_MATH = YES;
 				MTL_LANGUAGE_REVISION = UseDeploymentTarget;
@@ -563,11 +563,7 @@
 					"-include",
 					KramConfig.h,
 				);
-				OTHER_CPLUSPLUSFLAGS = (
-					"$(OTHER_CFLAGS)",
-					"-fcxx-modules",
-					"-fmodules",
-				);
+				OTHER_CPLUSPLUSFLAGS = "$(OTHER_CFLAGS)";
 				SDKROOT = macosx;
 			};
 			name = Debug;
@@ -578,7 +574,7 @@
 				ALWAYS_SEARCH_USER_PATHS = NO;
 				CLANG_ANALYZER_NONNULL = YES;
 				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
-				CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
 				CLANG_CXX_LIBRARY = "libc++";
 				CLANG_ENABLE_MODULES = YES;
 				CLANG_ENABLE_OBJC_ARC = YES;
@@ -619,7 +615,7 @@
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
 				HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram";
-				MACOSX_DEPLOYMENT_TARGET = 10.15;
+				MACOSX_DEPLOYMENT_TARGET = 11.0;
 				MTL_ENABLE_DEBUG_INFO = NO;
 				MTL_FAST_MATH = YES;
 				MTL_LANGUAGE_REVISION = UseDeploymentTarget;
@@ -628,11 +624,7 @@
 					"-include",
 					KramConfig.h,
 				);
-				OTHER_CPLUSPLUSFLAGS = (
-					"$(OTHER_CFLAGS)",
-					"-fcxx-modules",
-					"-fmodules",
-				);
+				OTHER_CPLUSPLUSFLAGS = "$(OTHER_CFLAGS)";
 				SDKROOT = macosx;
 			};
 			name = Release;
diff --git a/gtlf/GLTF/GLTF.h b/gtlf/GLTF/GLTF.h
index 6d2807d0..9612a95f 100644
--- a/gtlf/GLTF/GLTF.h
+++ b/gtlf/GLTF/GLTF.h
@@ -17,9 +17,10 @@
 #include <TargetConditionals.h>
 
 #if TARGET_OS_OSX
-@import Cocoa;
+// eliminate Cocoa, can't this use AppKit?
+#include <Cocoa/Cocoa.h>
 #elif TARGET_OS_IOS
-@import UIKit;
+#include <UIKit/UIKit.h>
 #endif
 
 //! Project version number for GLTF.
diff --git a/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj b/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj
index 06789696..047e484f 100644
--- a/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj
+++ b/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj
@@ -400,8 +400,8 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
-				IPHONEOS_DEPLOYMENT_TARGET = 8.0;
-				MACOSX_DEPLOYMENT_TARGET = 10.12;
+				IPHONEOS_DEPLOYMENT_TARGET = 14.1;
+				MACOSX_DEPLOYMENT_TARGET = 11.0;
 				MTL_ENABLE_DEBUG_INFO = YES;
 				ONLY_ACTIVE_ARCH = YES;
 				SDKROOT = macosx;
@@ -454,8 +454,8 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
-				IPHONEOS_DEPLOYMENT_TARGET = 8.0;
-				MACOSX_DEPLOYMENT_TARGET = 10.12;
+				IPHONEOS_DEPLOYMENT_TARGET = 14.1;
+				MACOSX_DEPLOYMENT_TARGET = 11.0;
 				MTL_ENABLE_DEBUG_INFO = NO;
 				SDKROOT = macosx;
 				VERSIONING_SYSTEM = "apple-generic";
diff --git a/gtlf/GLTF/Headers/Extensions/GLTFExtensionNames.h b/gtlf/GLTF/Headers/Extensions/GLTFExtensionNames.h
index 5f77b187..cfb734b0 100644
--- a/gtlf/GLTF/Headers/Extensions/GLTFExtensionNames.h
+++ b/gtlf/GLTF/Headers/Extensions/GLTFExtensionNames.h
@@ -14,7 +14,8 @@
 //  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 //
 
-@import Foundation;
+//@import Foundation;
+#import <Foundation/Foundation.h>
 
 extern NSString *const GLTFExtensionKHRMaterialsPBRSpecularGlossiness;
 extern NSString *const GLTFExtensionKHRLights;
diff --git a/gtlf/GLTF/Headers/Extensions/GLTFKHRLight.h b/gtlf/GLTF/Headers/Extensions/GLTFKHRLight.h
index 45859dbe..5044f4e4 100644
--- a/gtlf/GLTF/Headers/Extensions/GLTFKHRLight.h
+++ b/gtlf/GLTF/Headers/Extensions/GLTFKHRLight.h
@@ -16,7 +16,7 @@
 
 #import <GLTF/GLTFObject.h>
 
-@import simd;
+//@import simd;
 
 typedef NS_ENUM(NSInteger, GLTFKHRLightType) {
     GLTFKHRLightTypeAmbient,
diff --git a/gtlf/GLTF/Headers/GLTFAccessor.h b/gtlf/GLTF/Headers/GLTFAccessor.h
index 78f7e1ba..453a1d41 100644
--- a/gtlf/GLTF/Headers/GLTFAccessor.h
+++ b/gtlf/GLTF/Headers/GLTFAccessor.h
@@ -17,7 +17,7 @@
 #import <GLTF/GLTFObject.h>
 #import <GLTF/GLTFEnums.h>
 
-@import simd;
+//@import simd;
 
 NS_ASSUME_NONNULL_BEGIN
 
diff --git a/gtlf/GLTF/Headers/GLTFAsset.h b/gtlf/GLTF/Headers/GLTFAsset.h
index ce6a2eb9..ee203e1a 100644
--- a/gtlf/GLTF/Headers/GLTFAsset.h
+++ b/gtlf/GLTF/Headers/GLTFAsset.h
@@ -14,7 +14,7 @@
 //  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 //
 
-@import Foundation;
+//@import Foundation;
 
 #import <GLTF/GLTFObject.h>
 #import <GLTF/GLTFEnums.h>
diff --git a/gtlf/GLTF/Headers/GLTFBinaryChunk.h b/gtlf/GLTF/Headers/GLTFBinaryChunk.h
index 27e821f2..1af848d1 100644
--- a/gtlf/GLTF/Headers/GLTFBinaryChunk.h
+++ b/gtlf/GLTF/Headers/GLTFBinaryChunk.h
@@ -14,7 +14,8 @@
 //  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 //
 
-@import Foundation;
+//@import Foundation;
+#import <Foundation/Foundation.h>
 
 extern const UInt32 GLTFBinaryMagic;
 
diff --git a/gtlf/GLTF/Headers/GLTFBufferAllocator.h b/gtlf/GLTF/Headers/GLTFBufferAllocator.h
index e04119f6..75e4729a 100644
--- a/gtlf/GLTF/Headers/GLTFBufferAllocator.h
+++ b/gtlf/GLTF/Headers/GLTFBufferAllocator.h
@@ -16,7 +16,7 @@
 
 #import <GLTF/GLTFBuffer.h>
 
-@import Foundation;
+//@import Foundation;
 
 NS_ASSUME_NONNULL_BEGIN
 
diff --git a/gtlf/GLTF/Headers/GLTFCamera.h b/gtlf/GLTF/Headers/GLTFCamera.h
index 942b845a..348a44ca 100644
--- a/gtlf/GLTF/Headers/GLTFCamera.h
+++ b/gtlf/GLTF/Headers/GLTFCamera.h
@@ -17,7 +17,7 @@
 #import <GLTF/GLTFObject.h>
 #import <GLTF/GLTFEnums.h>
 
-@import simd;
+//@import simd;
 
 NS_ASSUME_NONNULL_BEGIN
 
diff --git a/gtlf/GLTF/Headers/GLTFDefaultBufferAllocator.h b/gtlf/GLTF/Headers/GLTFDefaultBufferAllocator.h
index 18e8a591..20efcc32 100644
--- a/gtlf/GLTF/Headers/GLTFDefaultBufferAllocator.h
+++ b/gtlf/GLTF/Headers/GLTFDefaultBufferAllocator.h
@@ -14,7 +14,7 @@
 //  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 //
 
-@import Foundation;
+//@import Foundation;
 
 #import <GLTF/GLTFBufferAllocator.h>
 
diff --git a/gtlf/GLTF/Headers/GLTFEnums.h b/gtlf/GLTF/Headers/GLTFEnums.h
index 1ed2e8d7..25a8d832 100644
--- a/gtlf/GLTF/Headers/GLTFEnums.h
+++ b/gtlf/GLTF/Headers/GLTFEnums.h
@@ -14,7 +14,10 @@
 //  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 //
 
-@import Foundation;
+
+#import <Foundation/Foundation.h>
+#import <simd/simd.h>
+//@import Foundation;
 
 typedef NS_ENUM(NSInteger, GLTFDataType) {
     GLTFBaseTypeUnknown,
diff --git a/gtlf/GLTF/Headers/GLTFImage.h b/gtlf/GLTF/Headers/GLTFImage.h
index 118e4cd5..266122e0 100644
--- a/gtlf/GLTF/Headers/GLTFImage.h
+++ b/gtlf/GLTF/Headers/GLTFImage.h
@@ -17,7 +17,7 @@
 #import <GLTF/GLTFObject.h>
 #import <GLTF/GLTFBufferView.h>
 
-@import Foundation;
+//@import Foundation;
 
 NS_ASSUME_NONNULL_BEGIN
 
diff --git a/gtlf/GLTF/Headers/GLTFMaterial.h b/gtlf/GLTF/Headers/GLTFMaterial.h
index cebe325e..0d80fe10 100644
--- a/gtlf/GLTF/Headers/GLTFMaterial.h
+++ b/gtlf/GLTF/Headers/GLTFMaterial.h
@@ -17,7 +17,7 @@
 #import <GLTF/GLTFObject.h>
 #import <GLTF/GLTFTexture.h>
 
-@import simd;
+//@import simd;
 
 NS_ASSUME_NONNULL_BEGIN
 
diff --git a/gtlf/GLTF/Headers/GLTFNode.h b/gtlf/GLTF/Headers/GLTFNode.h
index 08ddff42..6f80f837 100644
--- a/gtlf/GLTF/Headers/GLTFNode.h
+++ b/gtlf/GLTF/Headers/GLTFNode.h
@@ -20,7 +20,7 @@
 
 NS_ASSUME_NONNULL_BEGIN
 
-@import simd;
+//@import simd;
 
 @class GLTFCamera, GLTFSkin, GLTFMesh;
 @class GLTFKHRLight;
diff --git a/gtlf/GLTF/Headers/GLTFNodeVisitor.h b/gtlf/GLTF/Headers/GLTFNodeVisitor.h
index 36be13b8..cd6b67fc 100644
--- a/gtlf/GLTF/Headers/GLTFNodeVisitor.h
+++ b/gtlf/GLTF/Headers/GLTFNodeVisitor.h
@@ -14,7 +14,7 @@
 //  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 //
 
-@import Foundation;
+//@import Foundation;
 
 NS_ASSUME_NONNULL_BEGIN
 
diff --git a/gtlf/GLTF/Headers/GLTFObject.h b/gtlf/GLTF/Headers/GLTFObject.h
index 9b7cc8de..9d5c7654 100644
--- a/gtlf/GLTF/Headers/GLTFObject.h
+++ b/gtlf/GLTF/Headers/GLTFObject.h
@@ -14,7 +14,9 @@
 //  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 //
 
-@import Foundation;
+#import <Foundation/Foundation.h>
+#import <simd/simd.h>
+//@import Foundation;
 
 NS_ASSUME_NONNULL_BEGIN
 
diff --git a/gtlf/GLTF/Headers/GLTFSkin.h b/gtlf/GLTF/Headers/GLTFSkin.h
index e3c1c60c..869fd76e 100644
--- a/gtlf/GLTF/Headers/GLTFSkin.h
+++ b/gtlf/GLTF/Headers/GLTFSkin.h
@@ -14,7 +14,7 @@
 //  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 //
 
-@import Foundation;
+//@import Foundation;
 #import <GLTF/GLTFObject.h>
 
 NS_ASSUME_NONNULL_BEGIN
diff --git a/gtlf/GLTF/Headers/GLTFTexture.h b/gtlf/GLTF/Headers/GLTFTexture.h
index 14ab1a6f..5cf0357c 100644
--- a/gtlf/GLTF/Headers/GLTFTexture.h
+++ b/gtlf/GLTF/Headers/GLTFTexture.h
@@ -17,7 +17,7 @@
 #import <GLTF/GLTFObject.h>
 #import <GLTF/GLTFEnums.h>
 
-@import simd;
+//@import simd;
 
 typedef struct {
     simd_float2 offset;
diff --git a/gtlf/GLTF/Headers/GLTFUtilities.h b/gtlf/GLTF/Headers/GLTFUtilities.h
index ea90a6a8..88d65925 100644
--- a/gtlf/GLTF/Headers/GLTFUtilities.h
+++ b/gtlf/GLTF/Headers/GLTFUtilities.h
@@ -16,8 +16,8 @@
 
 #import <GLTF/GLTFEnums.h>
 
-@import Foundation;
-@import simd;
+//@import Foundation;
+//@import simd;
 
 NS_ASSUME_NONNULL_BEGIN
 
diff --git a/gtlf/GLTF/Headers/GLTFVertexDescriptor.h b/gtlf/GLTF/Headers/GLTFVertexDescriptor.h
index 3523d137..8591edde 100644
--- a/gtlf/GLTF/Headers/GLTFVertexDescriptor.h
+++ b/gtlf/GLTF/Headers/GLTFVertexDescriptor.h
@@ -14,7 +14,7 @@
 //  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 //
 
-@import Foundation;
+//@import Foundation;
 #import <GLTF/GLTFEnums.h>
 
 NS_ASSUME_NONNULL_BEGIN
diff --git a/gtlf/GLTFMTL/GLTFMTL.h b/gtlf/GLTFMTL/GLTFMTL.h
index 35bc6931..7b32e20d 100644
--- a/gtlf/GLTFMTL/GLTFMTL.h
+++ b/gtlf/GLTFMTL/GLTFMTL.h
@@ -17,9 +17,9 @@
 #import <TargetConditionals.h>
 
 #if TARGET_OS_OSX
-@import Cocoa;
+#import <Cocoa/Cocoa.h>
 #elif TARGET_OS_IOS
-@import UIKit;
+#import <UIKit/UIKit.h>
 #endif
 
 //! Project version number for GLTFMTL.
diff --git a/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj b/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj
index ebc83f38..2573207b 100644
--- a/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj
+++ b/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj
@@ -170,6 +170,7 @@
 			developmentRegion = English;
 			hasScannedForEncodings = 0;
 			knownRegions = (
+				English,
 				en,
 			);
 			mainGroup = 83D6FFA71F48BCB500F71E0C;
@@ -259,8 +260,8 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
-				IPHONEOS_DEPLOYMENT_TARGET = 10.0;
-				MACOSX_DEPLOYMENT_TARGET = 10.12;
+				IPHONEOS_DEPLOYMENT_TARGET = 14.1;
+				MACOSX_DEPLOYMENT_TARGET = 11.0;
 				MTL_ENABLE_DEBUG_INFO = YES;
 				ONLY_ACTIVE_ARCH = YES;
 				SDKROOT = macosx;
@@ -315,8 +316,8 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
-				IPHONEOS_DEPLOYMENT_TARGET = 10.0;
-				MACOSX_DEPLOYMENT_TARGET = 10.12;
+				IPHONEOS_DEPLOYMENT_TARGET = 14.1;
+				MACOSX_DEPLOYMENT_TARGET = 11.0;
 				MTL_ENABLE_DEBUG_INFO = NO;
 				SDKROOT = macosx;
 				SUPPORTED_PLATFORMS = "macosx iphoneos";
diff --git a/gtlf/GLTFMTL/Headers/GLTFMTLBufferAllocator.h b/gtlf/GLTFMTL/Headers/GLTFMTLBufferAllocator.h
index f62c4025..acd40e02 100644
--- a/gtlf/GLTFMTL/Headers/GLTFMTLBufferAllocator.h
+++ b/gtlf/GLTFMTL/Headers/GLTFMTLBufferAllocator.h
@@ -15,9 +15,12 @@
 //
 
 #import <GLTF/GLTF.h>
+#import <Foundation/Foundation.h>
+#import <simd/simd.h>
+#import <Metal/Metal.h>
 
-@import Foundation;
-@import Metal;
+//@import Foundation;
+//@import Metal;
 
 NS_ASSUME_NONNULL_BEGIN
 
diff --git a/gtlf/GLTFMTL/Headers/GLTFMTLLightingEnvironment.h b/gtlf/GLTFMTL/Headers/GLTFMTLLightingEnvironment.h
index 6045a5fc..95a99c44 100644
--- a/gtlf/GLTFMTL/Headers/GLTFMTLLightingEnvironment.h
+++ b/gtlf/GLTFMTL/Headers/GLTFMTLLightingEnvironment.h
@@ -15,9 +15,12 @@
 //
 
 #import <GLTF/GLTF.h>
+#import <Foundation/Foundation.h>
+#import <simd/simd.h>
+#import <Metal/Metal.h>
 
-@import Foundation;
-@import Metal;
+//@import Foundation;
+//@import Metal;
 
 NS_ASSUME_NONNULL_BEGIN
 
diff --git a/gtlf/GLTFMTL/Headers/GLTFMTLRenderer.h b/gtlf/GLTFMTL/Headers/GLTFMTLRenderer.h
index a2a4d04a..0ba84149 100644
--- a/gtlf/GLTFMTL/Headers/GLTFMTLRenderer.h
+++ b/gtlf/GLTFMTL/Headers/GLTFMTLRenderer.h
@@ -17,8 +17,8 @@
 #import <GLTF/GLTF.h>
 #import <GLTFMTL/GLTFMTLTextureLoader.h>
 
-@import Foundation;
-@import Metal;
+//@import Foundation;
+//@import Metal;
 
 NS_ASSUME_NONNULL_BEGIN
 
diff --git a/gtlf/GLTFMTL/Headers/GLTFMTLShaderBuilder.h b/gtlf/GLTFMTL/Headers/GLTFMTLShaderBuilder.h
index 30733fc5..77ebed2f 100644
--- a/gtlf/GLTFMTL/Headers/GLTFMTLShaderBuilder.h
+++ b/gtlf/GLTFMTL/Headers/GLTFMTLShaderBuilder.h
@@ -17,7 +17,7 @@
 #import <GLTF/GLTF.h>
 #import <GLTFMTL/GLTFMTLUtilities.h>
 
-@import Metal;
+//@import Metal;
 
 #define GLTFMTLMaximumLightCount 3
 
diff --git a/gtlf/GLTFMTL/Headers/GLTFMTLTextureLoader.h b/gtlf/GLTFMTL/Headers/GLTFMTLTextureLoader.h
index 2818af75..afc00fdd 100644
--- a/gtlf/GLTFMTL/Headers/GLTFMTLTextureLoader.h
+++ b/gtlf/GLTFMTL/Headers/GLTFMTLTextureLoader.h
@@ -15,8 +15,11 @@
 //
 
 #import <GLTF/GLTF.h>
+#import <Foundation/Foundation.h>
+#import <simd/simd.h>
+#import <Metal/Metal.h>
 
-@import Metal;
+//@import Metal;
 
 NS_ASSUME_NONNULL_BEGIN
 
diff --git a/gtlf/GLTFMTL/Headers/GLTFMTLUtilities.h b/gtlf/GLTFMTL/Headers/GLTFMTLUtilities.h
index 3338443e..bc0fa5f8 100644
--- a/gtlf/GLTFMTL/Headers/GLTFMTLUtilities.h
+++ b/gtlf/GLTFMTL/Headers/GLTFMTLUtilities.h
@@ -15,9 +15,13 @@
 //
 
 #import <GLTF/GLTF.h>
+#import <Foundation/Foundation.h>
+#import <simd/simd.h>
+#import <Metal/Metal.h>
 
-@import Metal;
-@import simd;
+
+//@import Metal;
+//@import simd;
 
 NS_ASSUME_NONNULL_BEGIN
 
diff --git a/kram-thumb/KramThumbnailProvider.mm b/kram-thumb/KramThumbnailProvider.mm
index 33b35cf4..d08119fb 100644
--- a/kram-thumb/KramThumbnailProvider.mm
+++ b/kram-thumb/KramThumbnailProvider.mm
@@ -5,7 +5,8 @@
 #import "KramThumbnailProvider.h"
 #include "KramLib.h"
 
-#include <CoreGraphics/CoreGraphics.h>
+#import <CoreGraphics/CoreGraphics.h>
+#import <Foundation/Foundation.h>
 #import <Accelerate/Accelerate.h> // for vImage
 
 using namespace kram;
diff --git a/kramv/KramRenderer.h b/kramv/KramRenderer.h
index 566d0b26..a413b715 100644
--- a/kramv/KramRenderer.h
+++ b/kramv/KramRenderer.h
@@ -2,10 +2,10 @@
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
-@import Foundation;
-@import MetalKit;
-//#import <Foundation/NSURL.h>
-//#import <MetalKit/MetalKit.h>
+//@import Foundation;
+//@import MetalKit;
+#import <Foundation/NSURL.h>
+#import <MetalKit/MetalKit.h>
 
 #include "KramLib.h"
 #import "KramShaders.h"  // for TextureChannels
@@ -18,8 +18,10 @@
 #define USE_PERSPECTIVE 0
 
 #if USE_GLTF
-@import GLTF;
-@import GLTFMTL;
+#import "GLTF/GLTF.h"
+#import "GLTFMTL/GLTFMTL.h"
+//@import GLTF;
+//@import GLTFMTL;
 #endif
 
 
diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index f392894d..40cbd404 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -4,9 +4,9 @@
 
 #import "KramRenderer.h"
 
-@import ModelIO;
-//#import <ModelIO/ModelIO.h>
-//#import <TargetConditionals.h>
+//@import ModelIO;
+#import <ModelIO/ModelIO.h>
+#import <TargetConditionals.h>
 
 // Include header shared between C code here, which executes Metal API commands,
 // and .metal files
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 65b59c42..485845e7 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -3,14 +3,14 @@
 // in all copies or substantial portions of the Software.
 
 // using -fmodules and -fcxx-modules
-@import Cocoa;
-@import Metal;
-@import MetalKit;
-
-//#import <Cocoa/Cocoa.h>
-//#import <Metal/Metal.h>
-//#import <MetalKit/MetalKit.h>
-//#import <TargetConditionals.h>
+//@import Cocoa;
+//@import Metal;
+//@import MetalKit;
+
+#import <Cocoa/Cocoa.h>
+#import <Metal/Metal.h>
+#import <MetalKit/MetalKit.h>
+#import <TargetConditionals.h>
 
 #import "KramRenderer.h"
 #import "KramShaders.h"
diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index a2990c4e..159787ec 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -248,13 +248,13 @@ std::thread::native_handle_type getCurrentThread()
 
 // Isn't this in a header?
 #pragma pack(push,8)
-typedef struct tagTHREADNAME_INFO
+struct THREADNAME_INFO
 {
    DWORD dwType; // Must be 0x1000.
    LPCSTR szName; // Pointer to name (in user addr space).
    DWORD dwThreadID; // Thread ID (-1=caller thread).
    DWORD dwFlags; // Reserved for future use, must be zero.
-} THREADNAME_INFO;
+};
 #pragma pack(pop)
 
 void setThreadName(std::thread::native_handle_type handle, const char* threadName)

From ba09bac7df89e635cc291f8125173f2d40f29ee2 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 21 May 2022 10:25:08 -0700
Subject: [PATCH 320/901] kram - cmake, bump win up to C++20, macOS/iOS are
 11.0/14.1 minimum now too

---
 CMakeLists.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 19a82556..13c6b570 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -44,7 +44,7 @@ endif()
 # suppress ZERO_CHECK project
 set(CMAKE_SUPPRESS_REGENERATION true)
 
-set(CMAKE_CXX_STANDARD 14)
+set(CMAKE_CXX_STANDARD 20)
 set(CMAKE_CXX_STANDARD_REQUIRED YES)
 set(CMAKE_CXX_EXTENSIONS NO)
 
@@ -55,10 +55,10 @@ set(CMAKE_CXX_EXTENSIONS NO)
 
 # CMAKE_OSX_DEPLOYMENT_TARGET must be set as a CACHE variable, or it will be stripped
 if (BUILD_IOS)
-    set(CMAKE_OSX_DEPLOYMENT_TARGET "14.0" CACHE STRING "Minimum iOS")
+    set(CMAKE_OSX_DEPLOYMENT_TARGET "14.1" CACHE STRING "Minimum iOS")
     set(CMAKE_OSX_ARCHITECTURES "arm64" CACHE STRING "Architecture iOS")
 elseif (BUILD_MAC)
-    set(CMAKE_OSX_DEPLOYMENT_TARGET "10.15" CACHE STRING "Minimum macOS")
+    set(CMAKE_OSX_DEPLOYMENT_TARGET "11.0" CACHE STRING "Minimum macOS")
     set(CMAKE_OSX_ARCHITECTURES "$(ARCHS_STANDARD)" CACHE STRING "Architecture macOS")
 endif()
 

From 4b2b3025b4b7f60e2f70abe5b8cb36b8bf3dece8 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 21 May 2022 20:25:11 -0700
Subject: [PATCH 321/901] kram - fix warnings and cmake min macOS

---
 CMakeLists.txt                            |  2 +-
 kram-preview/KramPreviewViewController.mm |  2 +-
 kram-thumb/KramThumbnailProvider.mm       |  2 +-
 libkram/kram/KramConfig.h                 | 12 ++++++++++++
 4 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 13c6b570..064119e7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -121,7 +121,7 @@ if (BUILD_MAC OR BUILD_IOS)
     endif()
 
     # check the SDK
-    set(XCODE_MIN_SDK_IOS 14.0)
+    set(XCODE_MIN_SDK_IOS 14.1)
     set(XCODE_MIN_SDK_MACOS 11.0)
 
     execute_process(
diff --git a/kram-preview/KramPreviewViewController.mm b/kram-preview/KramPreviewViewController.mm
index b2eaf928..bd0f6b7a 100644
--- a/kram-preview/KramPreviewViewController.mm
+++ b/kram-preview/KramPreviewViewController.mm
@@ -262,7 +262,7 @@ - (void)preparePreviewOfFileAtURL:(NSURL *)url completionHandler:(void (^)(NSErr
         .bitsPerPixel       = 32,
     };
     
-    format.bitmapInfo = kCGBitmapByteOrderDefault | (isPremul ? kCGImageAlphaPremultipliedLast : kCGImageAlphaLast);
+    format.bitmapInfo = kCGBitmapByteOrderDefault | (CGBitmapInfo)(isPremul ? kCGImageAlphaPremultipliedLast : kCGImageAlphaLast);
     format.colorSpace = isSrgb ? CGColorSpaceCreateWithName(kCGColorSpaceSRGB) : CGColorSpaceCreateDeviceRGB();
     
     // don't need to allocate, can requse memory from mip
diff --git a/kram-thumb/KramThumbnailProvider.mm b/kram-thumb/KramThumbnailProvider.mm
index d08119fb..e263cde1 100644
--- a/kram-thumb/KramThumbnailProvider.mm
+++ b/kram-thumb/KramThumbnailProvider.mm
@@ -243,7 +243,7 @@ - (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest *)request complet
             .bitsPerPixel       = 32,
         };
         
-        format.bitmapInfo = kCGBitmapByteOrderDefault | (isPremul ? kCGImageAlphaPremultipliedLast : kCGImageAlphaLast);
+        format.bitmapInfo = kCGBitmapByteOrderDefault | (CGBitmapInfo)(isPremul ? kCGImageAlphaPremultipliedLast : kCGImageAlphaLast);
         format.colorSpace = isSrgb ? CGColorSpaceCreateWithName(kCGColorSpaceSRGB) : CGColorSpaceCreateDeviceRGB();
         
         // don't need to allocate, can reuse memory from mip
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index 2a130a0a..fa3ba302 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -191,6 +191,17 @@
 
 #else
 
+/*
+// seems that Modules have "partial" support in Xcode, whatever that means
+// these imports are taken from MSVC which has a full implementation
+ 
+import std.memory;
+import std.threading;
+import std.core;
+import std.filesystem;
+import std.regex;
+*/
+
 #include <algorithm>  // for max
 #include <functional>
 
@@ -207,6 +218,7 @@
 #include <unordered_map>
 #include <vector>
 
+
 #define NAMESPACE_STL std
 
 #endif

From 335e7ebef7cd28c379ee39664fca39d1e97ca720 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 25 May 2022 23:02:18 -0700
Subject: [PATCH 322/901] kram - small fixes

---
 kramv/KramViewerMain.mm | 6 ++++++
 libkram/kram/Kram.cpp   | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 485845e7..c5663eb1 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -1717,6 +1717,12 @@ - (void)scrollWheel:(NSEvent *)event
         return;
     }
     
+    // From ImGui notes:
+    // From macOS 12.1, scrolling with two fingers and then decelerating
+    // by tapping two fingers results in two events appearing.
+    if (event.phase == NSEventPhaseCancelled)
+        return;
+    
     double wheelX = [event scrollingDeltaX];
     double wheelY = [event scrollingDeltaY];
 
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index e4b19ab0..69351494 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -2787,7 +2787,7 @@ static int32_t kramAppEncode(vector<const char*>& args)
                 break;
             }
 
-            // TODO: if args ends with /, then output to that dir
+            // TODO: if args Ωƒends with /, then output to that dir
             dstFilename = args[i];
         }
         else if (isStringEqual(word, "-input") ||

From 93792d82cf9462c61b7f3c28846a4936d6c19e82 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 28 May 2022 22:04:28 -0700
Subject: [PATCH 323/901] kram - fix toHalf4

---
 libkram/kram/KramConfig.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index fa3ba302..9d186d8a 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -438,7 +438,7 @@ inline float4 toFloat4(const half4& vv)
 }
 inline half4 toHalf4(const float4& vv)
 {
-    return half(vcvt_f16_f32(*(const float32x4_t*)&vv));
+    return half4(vcvt_f16_f32(*(const float32x4_t*)&vv));
 }
 #endif
 

From 74cb9119aaaac93166f34f6514535cf799435743 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 13 Jun 2022 23:57:53 -0700
Subject: [PATCH 324/901] kram - cleanup for Xcode 14

Turn on dead-code stripping.  Turn off exceptions/RTTI.   Turn off warnings for conversion of 64 to 32-bit.  Fix sprintf use to safer alternative.
---
 build2/kram.xcodeproj/project.pbxproj         | 12 ++++++---
 build2/kramc.xcodeproj/project.pbxproj        | 12 +++++++--
 build2/kramv.xcodeproj/project.pbxproj        |  8 ++++++
 gtlf/GLTF/GLTF.xcodeproj/project.pbxproj      |  6 +++++
 .../GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj |  6 +++++
 libkram/ate/ateencoder.mm                     |  2 +-
 libkram/kram/KramFileHelper.cpp               |  2 +-
 libkram/kram/KramImage.cpp                    |  2 +-
 libkram/kram/KramLog.cpp                      |  8 +++---
 libkram/kram/TaskSystem.cpp                   | 25 ++++++++++++++++++-
 libkram/kram/TaskSystem.h                     |  3 +++
 libkram/tmpfileplus/tmpfileplus.cpp           | 12 +++------
 libkram/transcoder/basisu_containers_impl.h   | 13 +++-------
 13 files changed, 79 insertions(+), 32 deletions(-)

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index 856e7154..e57e4e91 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -866,18 +866,18 @@
 				706EEE1926D1583F001C950E /* KramZipHelper.h */,
 				706EEE1E26D1583F001C950E /* KramZipHelper.cpp */,
 				706EEE2326D1583F001C950E /* KramConfig.h */,
+				706EEE3126D1583F001C950E /* KramImageInfo.h */,
 				706EEE2526D1583F001C950E /* KramImageInfo.cpp */,
-				706EEE2626D1583F001C950E /* KramImage.cpp */,
 				706EEE2726D1583F001C950E /* KramLib.h */,
 				706EEE2426D1583F001C950E /* KramLog.h */,
 				706EEE2826D1583F001C950E /* KramLog.cpp */,
 				706EEE2926D1583F001C950E /* KramVersion.h */,
 				706EEE2A26D1583F001C950E /* KramImage.h */,
+				706EEE2626D1583F001C950E /* KramImage.cpp */,
 				706EEE2026D1583F001C950E /* KramSDFMipper.h */,
 				706EEE2B26D1583F001C950E /* KramSDFMipper.cpp */,
 				706EEE3026D1583F001C950E /* KTXImage.h */,
 				706EEE1B26D1583F001C950E /* KTXImage.cpp */,
-				706EEE3126D1583F001C950E /* KramImageInfo.h */,
 				706EEE3226D1583F001C950E /* KramTimer.h */,
 				706EEE1A26D1583F001C950E /* KramTimer.cpp */,
 				706EEE3326D1583F001C950E /* KramMmapHelper.h */,
@@ -1650,6 +1650,7 @@
 				CLANG_WARN_UNREACHABLE_CODE = YES;
 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
 				COPY_PHASE_STRIP = NO;
+				DEAD_CODE_STRIPPING = YES;
 				DEBUG_INFORMATION_FORMAT = dwarf;
 				ENABLE_STRICT_OBJC_MSGSEND = YES;
 				ENABLE_TESTABILITY = YES;
@@ -1663,6 +1664,7 @@
 					"DEBUG=1",
 					"$(inherited)",
 				);
+				"GCC_WARN_64_TO_32_BIT_CONVERSION[arch=*64]" = NO;
 				GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
 				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
 				GCC_WARN_HIDDEN_VIRTUAL_FUNCTIONS = YES;
@@ -1692,6 +1694,7 @@
 					"-include",
 					KramConfig.h,
 				);
+				PRESERVE_DEAD_CODE_INITS_AND_TERMS = NO;
 				SDKROOT = macosx;
 				USER_HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram";
 			};
@@ -1734,6 +1737,7 @@
 				CLANG_WARN_UNREACHABLE_CODE = YES;
 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
 				COPY_PHASE_STRIP = NO;
+				DEAD_CODE_STRIPPING = YES;
 				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
 				ENABLE_NS_ASSERTIONS = NO;
 				ENABLE_STRICT_OBJC_MSGSEND = YES;
@@ -1741,6 +1745,7 @@
 				GCC_ENABLE_CPP_EXCEPTIONS = NO;
 				GCC_ENABLE_CPP_RTTI = NO;
 				GCC_NO_COMMON_BLOCKS = YES;
+				"GCC_WARN_64_TO_32_BIT_CONVERSION[arch=*64]" = NO;
 				GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
 				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
 				GCC_WARN_HIDDEN_VIRTUAL_FUNCTIONS = YES;
@@ -1769,6 +1774,7 @@
 					"-include",
 					KramConfig.h,
 				);
+				PRESERVE_DEAD_CODE_INITS_AND_TERMS = NO;
 				SDKROOT = macosx;
 				USER_HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram";
 			};
@@ -1797,7 +1803,6 @@
 				CLANG_WARN_OBJC_REPEATED_USE_OF_WEAK = YES;
 				CLANG_X86_VECTOR_INSTRUCTIONS = avx;
 				CODE_SIGN_STYLE = Automatic;
-				DEAD_CODE_STRIPPING = NO;
 				EXECUTABLE_PREFIX = lib;
 				GCC_PRECOMPILE_PREFIX_HEADER = NO;
 				GCC_PREFIX_HEADER = "$(PROJECT_DIR)/../libkram/kram/KramConfig.h";
@@ -1833,7 +1838,6 @@
 				CLANG_WARN_DOCUMENTATION_COMMENTS = NO;
 				CLANG_X86_VECTOR_INSTRUCTIONS = default;
 				CODE_SIGN_STYLE = Automatic;
-				DEAD_CODE_STRIPPING = NO;
 				EXECUTABLE_PREFIX = lib;
 				GCC_PRECOMPILE_PREFIX_HEADER = NO;
 				GCC_PREFIX_HEADER = "$(PROJECT_DIR)/../libkram/kram/KramConfig.h";
diff --git a/build2/kramc.xcodeproj/project.pbxproj b/build2/kramc.xcodeproj/project.pbxproj
index 0c1c5c27..e73338b2 100644
--- a/build2/kramc.xcodeproj/project.pbxproj
+++ b/build2/kramc.xcodeproj/project.pbxproj
@@ -180,18 +180,21 @@
 				CLANG_WARN_UNREACHABLE_CODE = YES;
 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
 				COPY_PHASE_STRIP = NO;
+				DEAD_CODE_STRIPPING = YES;
 				DEBUG_INFORMATION_FORMAT = dwarf;
 				ENABLE_STRICT_OBJC_MSGSEND = YES;
 				ENABLE_TESTABILITY = YES;
 				GCC_C_LANGUAGE_STANDARD = gnu11;
 				GCC_DYNAMIC_NO_PIC = NO;
+				GCC_ENABLE_CPP_EXCEPTIONS = NO;
+				GCC_ENABLE_CPP_RTTI = NO;
 				GCC_NO_COMMON_BLOCKS = YES;
 				GCC_OPTIMIZATION_LEVEL = 0;
 				GCC_PREPROCESSOR_DEFINITIONS = (
 					"DEBUG=1",
 					"$(inherited)",
 				);
-				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
 				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
 				GCC_WARN_UNDECLARED_SELECTOR = YES;
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
@@ -201,6 +204,7 @@
 				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
 				MTL_FAST_MATH = YES;
 				ONLY_ACTIVE_ARCH = YES;
+				PRESERVE_DEAD_CODE_INITS_AND_TERMS = NO;
 				SDKROOT = macosx;
 			};
 			name = Debug;
@@ -239,12 +243,15 @@
 				CLANG_WARN_UNREACHABLE_CODE = YES;
 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
 				COPY_PHASE_STRIP = NO;
+				DEAD_CODE_STRIPPING = YES;
 				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
 				ENABLE_NS_ASSERTIONS = NO;
 				ENABLE_STRICT_OBJC_MSGSEND = YES;
 				GCC_C_LANGUAGE_STANDARD = gnu11;
+				GCC_ENABLE_CPP_EXCEPTIONS = NO;
+				GCC_ENABLE_CPP_RTTI = NO;
 				GCC_NO_COMMON_BLOCKS = YES;
-				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
 				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
 				GCC_WARN_UNDECLARED_SELECTOR = YES;
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
@@ -253,6 +260,7 @@
 				MACOSX_DEPLOYMENT_TARGET = 11.0;
 				MTL_ENABLE_DEBUG_INFO = NO;
 				MTL_FAST_MATH = YES;
+				PRESERVE_DEAD_CODE_INITS_AND_TERMS = NO;
 				SDKROOT = macosx;
 			};
 			name = Release;
diff --git a/build2/kramv.xcodeproj/project.pbxproj b/build2/kramv.xcodeproj/project.pbxproj
index 060736bb..cad5656d 100644
--- a/build2/kramv.xcodeproj/project.pbxproj
+++ b/build2/kramv.xcodeproj/project.pbxproj
@@ -534,12 +534,15 @@
 				CLANG_WARN_UNREACHABLE_CODE = YES;
 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
 				COPY_PHASE_STRIP = NO;
+				DEAD_CODE_STRIPPING = YES;
 				DEBUG_INFORMATION_FORMAT = dwarf;
 				DONT_GENERATE_INFOPLIST_FILE = YES;
 				ENABLE_STRICT_OBJC_MSGSEND = YES;
 				ENABLE_TESTABILITY = YES;
 				GCC_C_LANGUAGE_STANDARD = gnu11;
 				GCC_DYNAMIC_NO_PIC = NO;
+				GCC_ENABLE_CPP_EXCEPTIONS = NO;
+				GCC_ENABLE_CPP_RTTI = NO;
 				GCC_NO_COMMON_BLOCKS = YES;
 				GCC_OPTIMIZATION_LEVEL = 0;
 				GCC_PREPROCESSOR_DEFINITIONS = (
@@ -564,6 +567,7 @@
 					KramConfig.h,
 				);
 				OTHER_CPLUSPLUSFLAGS = "$(OTHER_CFLAGS)";
+				PRESERVE_DEAD_CODE_INITS_AND_TERMS = NO;
 				SDKROOT = macosx;
 			};
 			name = Debug;
@@ -602,11 +606,14 @@
 				CLANG_WARN_UNREACHABLE_CODE = YES;
 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
 				COPY_PHASE_STRIP = NO;
+				DEAD_CODE_STRIPPING = YES;
 				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
 				DONT_GENERATE_INFOPLIST_FILE = YES;
 				ENABLE_NS_ASSERTIONS = NO;
 				ENABLE_STRICT_OBJC_MSGSEND = YES;
 				GCC_C_LANGUAGE_STANDARD = gnu11;
+				GCC_ENABLE_CPP_EXCEPTIONS = NO;
+				GCC_ENABLE_CPP_RTTI = NO;
 				GCC_NO_COMMON_BLOCKS = YES;
 				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
 				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
@@ -625,6 +632,7 @@
 					KramConfig.h,
 				);
 				OTHER_CPLUSPLUSFLAGS = "$(OTHER_CFLAGS)";
+				PRESERVE_DEAD_CODE_INITS_AND_TERMS = NO;
 				SDKROOT = macosx;
 			};
 			name = Release;
diff --git a/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj b/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj
index 047e484f..c93236fd 100644
--- a/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj
+++ b/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj
@@ -383,11 +383,14 @@
 				CODE_SIGN_IDENTITY = "-";
 				COPY_PHASE_STRIP = NO;
 				CURRENT_PROJECT_VERSION = 1;
+				DEAD_CODE_STRIPPING = YES;
 				DEBUG_INFORMATION_FORMAT = dwarf;
 				ENABLE_STRICT_OBJC_MSGSEND = YES;
 				ENABLE_TESTABILITY = YES;
 				GCC_C_LANGUAGE_STANDARD = gnu99;
 				GCC_DYNAMIC_NO_PIC = NO;
+				GCC_ENABLE_CPP_EXCEPTIONS = NO;
+				GCC_ENABLE_CPP_RTTI = NO;
 				GCC_NO_COMMON_BLOCKS = YES;
 				GCC_OPTIMIZATION_LEVEL = 0;
 				GCC_PREPROCESSOR_DEFINITIONS = (
@@ -443,10 +446,13 @@
 				CODE_SIGN_IDENTITY = "-";
 				COPY_PHASE_STRIP = NO;
 				CURRENT_PROJECT_VERSION = 1;
+				DEAD_CODE_STRIPPING = YES;
 				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
 				ENABLE_NS_ASSERTIONS = NO;
 				ENABLE_STRICT_OBJC_MSGSEND = YES;
 				GCC_C_LANGUAGE_STANDARD = gnu99;
+				GCC_ENABLE_CPP_EXCEPTIONS = NO;
+				GCC_ENABLE_CPP_RTTI = NO;
 				GCC_NO_COMMON_BLOCKS = YES;
 				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
 				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
diff --git a/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj b/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj
index 2573207b..9d51e748 100644
--- a/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj
+++ b/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj
@@ -243,11 +243,14 @@
 				CODE_SIGN_IDENTITY = "-";
 				COPY_PHASE_STRIP = NO;
 				CURRENT_PROJECT_VERSION = 1;
+				DEAD_CODE_STRIPPING = YES;
 				DEBUG_INFORMATION_FORMAT = dwarf;
 				ENABLE_STRICT_OBJC_MSGSEND = YES;
 				ENABLE_TESTABILITY = YES;
 				GCC_C_LANGUAGE_STANDARD = gnu99;
 				GCC_DYNAMIC_NO_PIC = NO;
+				GCC_ENABLE_CPP_EXCEPTIONS = NO;
+				GCC_ENABLE_CPP_RTTI = NO;
 				GCC_NO_COMMON_BLOCKS = YES;
 				GCC_OPTIMIZATION_LEVEL = 0;
 				GCC_PREPROCESSOR_DEFINITIONS = (
@@ -305,10 +308,13 @@
 				CODE_SIGN_IDENTITY = "-";
 				COPY_PHASE_STRIP = NO;
 				CURRENT_PROJECT_VERSION = 1;
+				DEAD_CODE_STRIPPING = YES;
 				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
 				ENABLE_NS_ASSERTIONS = NO;
 				ENABLE_STRICT_OBJC_MSGSEND = YES;
 				GCC_C_LANGUAGE_STANDARD = gnu99;
+				GCC_ENABLE_CPP_EXCEPTIONS = NO;
+				GCC_ENABLE_CPP_RTTI = NO;
 				GCC_NO_COMMON_BLOCKS = YES;
 				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
 				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
diff --git a/libkram/ate/ateencoder.mm b/libkram/ate/ateencoder.mm
index 1836ebda..f3a43b9b 100644
--- a/libkram/ate/ateencoder.mm
+++ b/libkram/ate/ateencoder.mm
@@ -438,7 +438,7 @@ inline my_at_block_format_t pixelToDecoderFormat(MyMTLPixelFormat format, bool i
     // decode is leaving a=60 for some bizarro reason, so correct that
     if (srcAlphaType == at_alpha_opaque)
     {
-        for (uint32_t i = 0, iEnd = w*h; i < iEnd; ++i)
+        for (uint32_t i = 0, iEnd = (uint32_t)w*h; i < iEnd; ++i)
         {
             dstData[4*i+3] = 255;
         }
diff --git a/libkram/kram/KramFileHelper.cpp b/libkram/kram/KramFileHelper.cpp
index a6c2b873..05996cb8 100644
--- a/libkram/kram/KramFileHelper.cpp
+++ b/libkram/kram/KramFileHelper.cpp
@@ -176,7 +176,7 @@ bool FileHelper::copyTemporaryFileTo(const char* dstFilename)
 
     size_t bytesRemaining = size_;
     while (bytesRemaining > 0) {
-        int bytesToRead = min(bufferSize, bytesRemaining);
+        size_t bytesToRead = min(bufferSize, bytesRemaining);
         bytesRemaining -= bytesToRead;
 
         if (!read(tmpBuf.data(), bytesToRead) ||
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index d6694ac3..80568fdf 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -1089,7 +1089,7 @@ bool KramDecoder::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage&
                 }
             }
 
-            int32_t dstMipOffset = dstMipLevel.offset + chunk * dstMipLevel.length;
+            uint32_t dstMipOffset = dstMipLevel.offset + chunk * dstMipLevel.length;
 
             if (!writeDataAtOffset(outputTexture.data(), dstMipLevel.length, dstMipOffset, dstFile, dstImage)) {
                 return false;
diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp
index 8e3fa994..5a7726b9 100644
--- a/libkram/kram/KramLog.cpp
+++ b/libkram/kram/KramLog.cpp
@@ -53,8 +53,8 @@ void getErrorLogCaptureText(string& text)
 //
 //}
 
-// Note: careful with stdio sscanf.  In clang, this does and initial strlen which for long buffers
-// being parsed (f.e. mmapped Json) this can significantly slow a parser down.
+// Note: careful with stdio sscanf.  In clang, this does an initial strlen which for long buffers
+// being parsed (f.e. mmapped Json) can significantly slow a parser down.
 
 int32_t append_vsprintf(string& str, const char* format, va_list args)
 {
@@ -62,7 +62,7 @@ int32_t append_vsprintf(string& str, const char* format, va_list args)
     if (strcmp(format, "%s") == 0) {
         const char* firstArg = va_arg(args, const char*);
         str += firstArg;
-        return strlen(firstArg);
+        return (int32_t)strlen(firstArg);
     }
 
     // This is important for the case where ##VAR_ARGS only leaves the format.
@@ -70,7 +70,7 @@ int32_t append_vsprintf(string& str, const char* format, va_list args)
     // for KLOGE("group", "text")
     if (strrchr(format, '%') == nullptr) {
         str += format;
-        return strlen(format);
+        return (int32_t)strlen(format);
     }
 
     // format once to get length (without NULL at end)
diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index 159787ec..6a32e791 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -13,6 +13,7 @@
     #include <sys/sysctl.h>
 #elif KRAM_WIN
     #include <windows.h>
+    #include <processthreadsapi.h>
 #else
     #include <pthread/pthread.h>
 #endif
@@ -246,6 +247,8 @@ std::thread::native_handle_type getCurrentThread()
 
 #if KRAM_WIN
 
+/* This is the old way.  This name is only available if debugger attached.
+ 
 // Isn't this in a header?
 #pragma pack(push,8)
 struct THREADNAME_INFO
@@ -276,6 +279,27 @@ void setThreadName(std::thread::native_handle_type handle, const char* threadNam
    {
    }
 }
+*/
+
+// TODO: on Win, also need to set the following.  Then use Windows Termnial.
+// SetConsoleOutputCP(CP_UTF8);
+
+void setThreadName(std::thread::native_handle_type handle, const char* threadName)
+{
+    // TODO: use std::wstring_convert();
+    // std::codecvt_utf8_utf16
+    
+    // ugh, win still using char16_t.  TODO: this isn't utf8 to utf16 conversion
+    uint32_t len = strlen(threadName);
+    std::wstring str;
+    str.reserve(len);
+    for (uint32_t i = 0; i < len; ++i) {
+        if (threadname[i] <= 127)
+            str.push_back((char)threadName[i]);
+    }
+    
+    ::SetThreadDescription(handle, str.c_str());
+}
 
 void setCurrentThreadName(const char* threadName)
 {
@@ -314,7 +338,6 @@ void setCurrentThreadName(const char* threadName)
 
 void setThreadName(std::thread::native_handle_type handle, const char* threadName)
 {
-    // This can only set on self
     int val = pthread_setname_np(handle, threadName);
     if (val != 0)
         KLOGW("Thread", "Could not set thread name");
diff --git a/libkram/kram/TaskSystem.h b/libkram/kram/TaskSystem.h
index 632d0fc9..fdc26515 100644
--- a/libkram/kram/TaskSystem.h
+++ b/libkram/kram/TaskSystem.h
@@ -122,6 +122,9 @@ class notification_queue {
     type(const type&) = delete; \
     void operator=(const type&) = delete
 
+// Note: if running multiple processes on the same cpu, then affinity
+// isn't ideal.  It will force work onto the same cores.  Especially if
+// limiting cores to say 4/16, then can run 4 processes faster w/o affinity.
 #define SUPPORT_AFFINITY (KRAM_ANDROID || KRAM_WIN)
 #define SUPPORT_PRIORITY (KRAM_MAC || KRAM_IOS || KRAM_ANDROID || KRAM_WIN)
 
diff --git a/libkram/tmpfileplus/tmpfileplus.cpp b/libkram/tmpfileplus/tmpfileplus.cpp
index da555877..ece6707a 100644
--- a/libkram/tmpfileplus/tmpfileplus.cpp
+++ b/libkram/tmpfileplus/tmpfileplus.cpp
@@ -109,9 +109,12 @@
  */
 #define OPEN_ _open
 #define FDOPEN_ _fdopen
+#define FILE_SEPARATOR "\\"
+#define snprintf sprintf_s
 #else
 #define OPEN_ open
 #define FDOPEN_ fdopen
+#define FILE_SEPARATOR "/"
 #endif
 
 
@@ -122,13 +125,6 @@
 #define DPRINTF1(s, a1)
 #endif
 
-
-#ifdef _WIN32
-#define FILE_SEPARATOR "\\"
-#else
-#define FILE_SEPARATOR "/"
-#endif
-
 #define RANDCHARS   "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
 #define NRANDCHARS  (sizeof(RANDCHARS) - 1)
 
@@ -243,7 +239,7 @@ static FILE *mktempfile_internal(const char *tmpdir, const char *pfx, const char
     /* If we don't manage to create a file after 10 goes, there is something wrong... */
     for (i = 0; i < 10; i++)
     {
-        sprintf(tmpname, "%s%s%s%s%s", tmpdir, separator, pfx, set_randpart(randpart), sfx);
+        snprintf(tmpname, lentempname + 1, "%s%s%s%s%s", tmpdir, separator, pfx, set_randpart(randpart), sfx);
         DPRINTF1("[%s]\n", tmpname);
         fd = OPEN_(tmpname, oflag, pmode);
         if (fd != -1) break;
diff --git a/libkram/transcoder/basisu_containers_impl.h b/libkram/transcoder/basisu_containers_impl.h
index 65551714..b233997a 100644
--- a/libkram/transcoder/basisu_containers_impl.h
+++ b/libkram/transcoder/basisu_containers_impl.h
@@ -3,6 +3,7 @@
 
 #ifdef _MSC_VER
 #pragma warning (disable:4127) // warning C4127: conditional expression is constant
+#define snprintf sprintf_s
 #endif
 
 namespace basisu
@@ -46,11 +47,7 @@ namespace basisu
                return false;
 
             char buf[256];
-#ifdef _MSC_VER
-            sprintf_s(buf, sizeof(buf), "vector: realloc() failed allocating %u bytes", (uint32_t)desired_size);
-#else
-            sprintf(buf, "vector: realloc() failed allocating %u bytes", (uint32_t)desired_size);
-#endif
+             snprintf(buf, sizeof(buf), "vector: realloc() failed allocating %u bytes", (uint32_t)desired_size);
             fprintf(stderr, "%s", buf);
             abort();
          }
@@ -73,11 +70,7 @@ namespace basisu
                return false;
 
             char buf[256];
-#ifdef _MSC_VER
-            sprintf_s(buf, sizeof(buf), "vector: malloc() failed allocating %u bytes", (uint32_t)desired_size);
-#else
-            sprintf(buf, "vector: malloc() failed allocating %u bytes", (uint32_t)desired_size);
-#endif
+            snprintf(buf, sizeof(buf), "vector: malloc() failed allocating %u bytes", (uint32_t)desired_size);
             fprintf(stderr, "%s", buf);
             abort();
          }

From e296c8d693d17a2684dd01154ade44951f5cfc52 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 14 Jun 2022 00:11:06 -0700
Subject: [PATCH 325/901] kram - fix typo

---
 libkram/kram/TaskSystem.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index 6a32e791..512104d0 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -294,7 +294,7 @@ void setThreadName(std::thread::native_handle_type handle, const char* threadNam
     std::wstring str;
     str.reserve(len);
     for (uint32_t i = 0; i < len; ++i) {
-        if (threadname[i] <= 127)
+        if (threadName[i] <= 127)
             str.push_back((char)threadName[i]);
     }
     

From 36020265aeead5a6fdf90cd765486a7956f3231a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 24 Jun 2022 23:15:58 -0700
Subject: [PATCH 326/901] kramv - hide file table when info shown

Info can be a lot of lines, and the overlap of this and the file table wasn't ideal.
---
 kramv/KramViewerMain.mm | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index c5663eb1..2707dce3 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -2399,6 +2399,10 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
     // that info
     else if (action == _actionInfo) {
         if (_showSettings->isHudShown) {
+            
+            // also hide the file table, since this can be long
+            [self hideFileTable];
+            
             sprintf(text, "%s",
                     isShiftKeyDown ? _showSettings->imageInfoVerbose.c_str()
                                    : _showSettings->imageInfo.c_str());

From 2ee942e6c6340d87c4fd44f5b12be63d4cd69bf0 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 24 Jun 2022 23:17:29 -0700
Subject: [PATCH 327/901] kram - simplify policyt/priority handling.

Some platforms like Android make setting policy/priority difficult.  So make each platform handle the nuances internally off a ThreadPriority enum.
---
 libkram/kram/TaskSystem.cpp | 291 +++++++++++++++---------------------
 libkram/kram/TaskSystem.h   |  20 +--
 2 files changed, 124 insertions(+), 187 deletions(-)

diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index 512104d0..b54caa26 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -1,7 +1,7 @@
 #include "TaskSystem.h"
 
 #if KRAM_MAC
-    // affiniity
+    // affinity
     #include <mach/thread_act.h>
     #include <mach/thread_policy.h>
 
@@ -14,10 +14,14 @@
 #elif KRAM_WIN
     #include <windows.h>
     #include <processthreadsapi.h>
+#elif KRAM_ANDROID
+    #include <sys/resource.h>
 #else
     #include <pthread/pthread.h>
 #endif
 
+// TODO: look at replacing this with Job Queue from Filament
+
 namespace kram {
 using namespace NAMESPACE_STL;
 
@@ -243,7 +247,7 @@ std::thread::native_handle_type getCurrentThread()
 // https://stackoverflow.com/questions/10121560/stdthread-naming-your-thread
 // Of course, Windows has to make portability difficult.
 // And Mac non-standardly, doesn't even pass thread to call.
-//   This requires it to be set from thread itself).
+//   This requires it to be set from thread itself.
 
 #if KRAM_WIN
 
@@ -336,6 +340,7 @@ void setCurrentThreadName(const char* threadName)
 
 #else
 
+// 15 char name limit on Linux/Android, how modern!
 void setThreadName(std::thread::native_handle_type handle, const char* threadName)
 {
     int val = pthread_setname_np(handle, threadName);
@@ -357,174 +362,101 @@ void setThreadName(std::thread& thread, const char* threadName)
 
 //------------------
 
-#if SUPPORT_PRIORITY
 #if KRAM_MAC || KRAM_IOS
 
-static void setThreadPriority(std::thread::native_handle_type handle, uint8_t priority)
-{
-    struct sched_param param = { priority };
-
-    // this sets policy to round-robin and priority
-    int val = pthread_setschedparam(handle, SCHED_RR, &param);
-    if (val != 0)
-        KLOGW("Thread", "Failed to set priority %d", priority);
-}
-
-static void setThreadQos(std::thread::native_handle_type handle, ThreadQos level)
+static void setThreadPriority(std::thread::native_handle_type handle, ThreadPriority priority)
 {
-    // https://abhimuralidharan.medium.com/understanding-threads-in-ios-5b8d7ab16f09
-    // user-interactive, user-initiated, default, utility, background, unspecified
-    
-    qos_class_t qos = QOS_CLASS_UNSPECIFIED;
-    switch(level) {
-        case ThreadQos::Interactive: qos = QOS_CLASS_USER_INTERACTIVE; break;
-        case ThreadQos::High: qos = QOS_CLASS_USER_INITIATED; break;
-        case ThreadQos::Default: qos = QOS_CLASS_DEFAULT; break;
-        case ThreadQos::Medium: qos = QOS_CLASS_UTILITY; break;
-        case ThreadQos::Low: qos = QOS_CLASS_BACKGROUND; break;
+    if (priority == ThreadPriority::Default) {
+        
+        /* samples of qos
+        qos_class_t qos = QOS_CLASS_UNSPECIFIED;
+        switch(level) {
+            case ThreadQos::Interactive: qos = QOS_CLASS_USER_INTERACTIVE; break;
+            case ThreadQos::High: qos = QOS_CLASS_USER_INITIATED; break;
+            case ThreadQos::Default: qos = QOS_CLASS_DEFAULT; break;
+            case ThreadQos::Medium: qos = QOS_CLASS_UTILITY; break;
+            case ThreadQos::Low: qos = QOS_CLASS_BACKGROUND; break;
+        }
+        */
+        
+        // qos is transferred to GCD jobs, and can experience thread depriority
+        // can system can try to adjust priority inversion.
+        
+        // note here the priorityOffset = 0, but is negative offsets
+        // there is a narrow range of offsets
+        
+        // note this is a start/end overide call, but can set override on existing thread
+        // TODO: this returns a newly allocated object which isn't released here
+        // need to release with pthread_override_qos_class_end_np(override);
+        
+        qos_class_t qos = QOS_CLASS_DEFAULT;
+        auto val = pthread_override_qos_class_start_np(handle, qos, 0);
+        if (val != nullptr)
+            KLOGW("Thread", "Failed to set qos %d", (int)qos);
+    }
+    else {
+        int prioritySys = 0;
+        switch(priority) {
+            case ThreadPriority::Default: prioritySys = 30;  break; // skipped above
+            case ThreadPriority::High: prioritySys = 41; break;
+            case ThreadPriority::Interactive: prioritySys = 45; break;
+        }
+                
+        struct sched_param param = { prioritySys };
+        
+        // policy choices
+        // SCHED_RR, SCHED_FIFO, SCHED_OTHER
+        int policy = SCHED_RR;
+        
+        // this sets policy to round-robin and priority
+        int val = pthread_setschedparam(handle, policy, &param);
+        if (val != 0)
+            KLOGW("Thread", "Failed to set policy %d priority %d", policy, prioritySys);
     }
-    
-    // qos is transferred to GCD jobs, and can experience thread depriority
-    // can system can try to adjust priority inversion.
-    
-    // note here the priorityOffset = 0, but is negative offsets
-    // there is a narrow range of offsets
-    
-    // note this is a start/end overide call, but can set override on existing thread
-    // TODO: this returns a newly allocated object which isn't released here
-    // need to release with pthread_override_qos_class_end_np(override);
-    auto val = pthread_override_qos_class_start_np(handle, qos, 0);
-    if (val != nullptr)
-        KLOGW("Thread", "Failed to set qos %d", (int)qos);
-}
-
-void task_system::set_priority(std::thread& thread, uint8_t priority)
-{
-    setThreadPriority(thread.native_handle(), priority);
-}
-
-void task_system::set_current_priority(uint8_t priority)
-{
-    setThreadPriority(getCurrentThread(), priority);
-}
-
-void task_system::set_current_qos(ThreadQos level)
-{
-    setThreadQos(getCurrentThread(), level);
-}
-
-void task_system::set_qos(std::thread& thread, ThreadQos level)
-{
-    setThreadQos(thread.native_handle(), level);
 }
 
-
-
 #elif KRAM_ANDROID
 
-void setThreadPriority(std::thread::native_handle_type handle, uint8_t priority)
-{
-/* TODO: finish priority remap first
-    struct sched_param param = { priority };
-   
-    // Win has 0 to 15 normal, then 16-31 real time priority
-    int val = pthread_setschedprio(handle, priority);
-    if (val != 0)
-        KLOGW("Thread", "Failed to set priority %d", priority);
-*/
-}
-
-
-static uint8_t convertQosToPriority(ThreadQos level)
+static void setThreadPriority(std::thread::native_handle_type handle, uint8_t priority)
 {
-    // TODO: fix these priorities.  Linux had 20 to -20 as priorities
-    // but unclear what Android wants set from the docs.
-    uint8_t priority = 30;
-    switch(level) {
-        case ThreadQos::Interactive: priority = 45; break;
-        case ThreadQos::High: priority = 41; break;
-        case ThreadQos::Default: priority = 31; break;
-        case ThreadQos::Medium: priority = 20; break;
-        case ThreadQos::Low: priority = 10; break;
+    // Android on -20 to 20, where lower is higher priority
+    int prioritySys = 0;
+    switch(priority) {
+        case ThreadPriority::Default: prioritySys = 0;  break; // NORMAL
+        case ThreadPriority::High: prioritySys = -4; break; // ABOVE NORMAL
+        case ThreadPriority::Interactive: prioritySys = -8; break; // HIGHEST
     }
-    return priority;
-}
-
-void task_system::set_priority(std::thread& thread, uint8_t priority)
-{
-    setThreadPriority(thread.native_handle(), priority);
-}
-
-void task_system::set_current_priority(uint8_t priority)
-{
-    setThreadPriority(getCurrentThread(), priority);
-}
-
-
-void task_system::set_current_qos(ThreadQos level)
-{
-    uint8_t priority = convertQosToPriority(level);
-    set_current_priority(priority);
-}
-
-void task_system::set_qos(std::thread& thread, ThreadQos level)
-{
-    uint8_t priority = convertQosToPriority(level);
-    set_priority(thread, priority);
+    
+    int val = setpriority(PRIO_PROCESS, 0, prioritySys);
+    if (val != 0)
+        KLOGW("Thread", "Failed to set priority %d", prioritySys);
 }
 
 #elif KRAM_WIN
 
-static uint8_t convertQosToPriority(ThreadQos level)
+static void setThreadPriority(std::thread::native_handle_type handle, uint8_t priority)
 {
-    // TODO: fix these priorities.  Linux had 20 to -20 as priorities
-    // but unclear what Android wants set from the docs.
-    uint8_t priority = 30;
-    switch(level) {
-        case ThreadQos::Interactive: priority = 45; break;
-        case ThreadQos::High: priority = 41; break;
-        case ThreadQos::Default: priority = 31; break;
-        case ThreadQos::Medium: priority = 20; break;
-        case ThreadQos::Low: priority = 10; break;
+    // Win has 0 to 15 normal, then 16-31 real time priority
+    int prioritySys = 0;
+    switch(priority) {
+        case ThreadPriority::Default: prioritySys = 0;  break; // NORMAL
+        case ThreadPriority::High: prioritySys = 1; break; // ABOVE NORMAL
+        case ThreadPriority::Interactive: prioritySys = 2; break; // HIGHEST
     }
-    return priority;
-}
-
-void setThreadPriority(std::thread::native_handle_type handle, uint8_t priority)
-{
-/* TODO: finish priority remap first
-
-    BOOL success = SetThreadPriority(handle, priority);
+    
+    BOOL success = SetThreadPriority(handle, prioritySys);
     if (!success)
-        LOGW("Thread", "Failed to set priority %d", priority);
-*/
+        LOGW("Thread", "Failed to set priority %d", prioritySys);
 }
 
-void task_system::set_priority(std::thread& thread, uint8_t priority)
-{
-    setThreadPriority(thread.native_handle(), priority);
-}
+#endif
 
-void task_system::set_current_priority(uint8_t priority)
+void task_system::set_current_priority(ThreadPriority priority)
 {
+    // Most systems can set priority from another thread, but Android can't
     setThreadPriority(getCurrentThread(), priority);
 }
 
-void task_system::set_current_qos(ThreadQos level)
-{
-    uint8_t priority = convertQosToPriority(level);
-    set_current_priority(priority);
-}
-
-void task_system::set_qos(std::thread& thread, ThreadQos level)
-{
-    uint8_t priority = convertQosToPriority(level);
-    set_priority(thread, priority);
-}
-
-#endif
-#endif
-
 #if SUPPORT_AFFINITY
 
 static void setThreadAffinity(std::thread::native_handle_type handle, uint32_t threadIndex)
@@ -545,7 +477,7 @@ static void setThreadAffinity(std::thread::native_handle_type handle, uint32_t t
     macroUnusedVar(affinityMask);
     
 #if KRAM_MAC
-    // don't use this, it's unsupported on ARM chips, and only an affinity hints on x64
+    // don't use this, it's unsupported on ARM chips, and only affinity hints on x64
 //    #if KRAM_SSE
 //    if (!coreInfo.isTranslated) {
 //        thread_affinity_policy_data_t policy = { (int)affinityMask };
@@ -658,20 +590,17 @@ void task_system::run(int32_t threadIndex)
 }
 
 struct ThreadInfo {
-    const char* name;
-    int policy;
-    int priority;
-    int affinity; // single core for now
+    const char* name = "";
+    ThreadPriority priority = ThreadPriority::Default;
+    int affinity = 0; // single core for now
 };
 
 // This only works for current thread, but simplifies setting several thread params.
 void setThreadInfo(ThreadInfo& info) {
     setCurrentThreadName(info.name);
 
-    #if SUPPORT_PRIORITY
     setThreadPriority(getCurrentThread(), info.priority);
-    #endif
-
+    
     #if SUPPORT_AFFINITY
     setThreadAffinity(getCurrentThread(), info.affinity);
     #endif
@@ -685,7 +614,7 @@ task_system::task_system(int32_t count) :
     // see WWDC 2021 presentation here
     // Tune CPU job scheduling for Apple silicon games
     // https://developer.apple.com/videos/play/tech-talks/110147/
-    ThreadInfo infoMain = { "Main", 0, 45, 0 };
+    ThreadInfo infoMain = { "Main", ThreadPriority::Interactive, 0 };
     setThreadInfo(infoMain);
     
     // Note that running work on core0 when core0 may starve it
@@ -701,7 +630,7 @@ task_system::task_system(int32_t count) :
         _threadNames.push_back(name);
         
         _threads.emplace_back([&, threadIndex, name] {
-            ThreadInfo infoTask = { name.c_str(), 0, 41, threadIndex };
+            ThreadInfo infoTask = { name.c_str(), ThreadPriority::High, threadIndex };
             setThreadInfo(infoTask);
 
             run(threadIndex);
@@ -712,15 +641,29 @@ task_system::task_system(int32_t count) :
     log_threads();
 }
 
-
-static void getThreadInfo(std::thread::native_handle_type handle, int& policy, int& priority)
+// TEODO: rename to getThreadPriority
+static void getThreadInfo(std::thread::native_handle_type handle, ThreadPriority& priority)
 {
-#if KRAM_MAC || KRAM_IOS || KRAM_ANDROID
+#if KRAM_MAC || KRAM_IOS
+    int policy = 0;
     struct sched_param priorityVal;
     int val = pthread_getschedparam(handle, &policy, &priorityVal);
     if (val != 0)
         KLOGW("Thread", "failed to retrieve thread data");
-    priority = priorityVal.sched_priority;
+    int prioritySys = priorityVal.sched_priority;
+    
+    // remap back to enum
+    switch(prioritySys) {
+        case 41: priority = ThreadPriority::High; break;
+        case 45: priority = ThreadPriority::Interactive; break;
+        default: priority = ThreadPriority::Default; break;
+    }
+    
+#elif KRAM_ANDROID
+    // only have getpriority call on current thread
+    // pthread_getschedparam never returns valid data
+    priority = ThreadPriority::Default; // TODO: fix
+    
 #elif KRAM_WIN
     // all threads same policy on Win?
     // https://www.microsoftpressstore.com/articles/article.aspx?p=2233328&seqNum=7#:~:text=Windows%20never%20adjusts%20the%20priority,the%20process%20that%20created%20it.
@@ -729,9 +672,17 @@ static void getThreadInfo(std::thread::native_handle_type handle, int& policy, i
     // DWORD priorityClass = GetPriorityClass(GetCurrentProcess());
     
     // The handle must have the THREAD_QUERY_INFORMATION or THREAD_QUERY_LIMITED_INFORMATION access right.
-    priority = GetThreadPriority(handle);
-    if (priority == THREAD_PRIORITY_ERROR_RETURN)
-        priority = 0;
+    int prioritySys = GetThreadPriority(handle);
+    if (prioritySys == THREAD_PRIORITY_ERROR_RETURN)
+        prioritySys = 0;
+    
+    switch(prioritySys) {
+        case 1: priority = ThreadPriority::High; break;
+        case 2: priority = ThreadPriority::Interactive; break;
+        default: priority = ThreadPriority::Default; break;
+    }
+    
+    // TODO: remap back to enum
 #endif
 }
 
@@ -744,23 +695,21 @@ void task_system::log_threads()
     info.affinity = 0;
 #endif
     
-    getThreadInfo(getCurrentThread(), info.policy, info.priority);
-    KLOGI("Thread", "Thread:%s (pol:%d pri:%d aff:%d)",
-          info.name, info.policy, info.priority, info.affinity);
+    getThreadInfo(getCurrentThread(), info.priority);
+    KLOGI("Thread", "Thread:%s (pri:%d aff:%d)",
+          info.name, info.priority, info.affinity);
     
     for (uint32_t i = 0; i < _threads.size(); ++i)
     {
-        info.policy = 0;
-        info.priority = 0;
         info.name = _threadNames[i].c_str();
 #if SUPPORT_AFFINITY
         // TODO: if more tasks/threads than cores, then this isn't accurate
         // but don't want to write a getter for this right now.
         info.affinity = i;
 #endif
-        getThreadInfo(_threads[i].native_handle(), info.policy, info.priority);
-        KLOGI("Thread", "Thread:%s (pol:%d pri:%d aff:%d)",
-              info.name, info.policy, info.priority, info.affinity);
+        getThreadInfo(_threads[i].native_handle(), info.priority);
+        KLOGI("Thread", "Thread:%s (pri:%d aff:%d)",
+              info.name, info.priority, info.affinity);
     }
 }
 
diff --git a/libkram/kram/TaskSystem.h b/libkram/kram/TaskSystem.h
index fdc26515..ebd86e90 100644
--- a/libkram/kram/TaskSystem.h
+++ b/libkram/kram/TaskSystem.h
@@ -126,14 +126,13 @@ class notification_queue {
 // isn't ideal.  It will force work onto the same cores.  Especially if
 // limiting cores to say 4/16, then can run 4 processes faster w/o affinity.
 #define SUPPORT_AFFINITY (KRAM_ANDROID || KRAM_WIN)
-#define SUPPORT_PRIORITY (KRAM_MAC || KRAM_IOS || KRAM_ANDROID || KRAM_WIN)
 
 
 // only for ioS/macOS
-enum class ThreadQos
+enum class ThreadPriority
 {
-    Low = 1,
-    Medium = 2,
+    //Low = 1,
+    //Medium = 2,
     Default = 3,
     High = 4,
     Interactive = 5,
@@ -155,21 +154,10 @@ class task_system {
     void run(int32_t threadIndex);
 
 #if SUPPORT_AFFINITY
-    // affinity isn't really supported on Apple
-    void set_affinity(std::thread& thread, uint32_t threadIndex);
     static void set_current_affinity(uint32_t threadIndex);
 #endif
     
-#if SUPPORT_PRIORITY
-    // these are Apple specific, due to lack of affinity control
-    // once priority set, can't use qos.  Also Android can't control
-    // policy, only the priority in API 28.
-    void set_qos(std::thread& thread, ThreadQos level);
-    static void set_current_qos(ThreadQos level);
-
-    void set_priority(std::thread& thread, uint8_t priority);
-    static void set_current_priority(uint8_t priority);
-#endif
+    static void set_current_priority(ThreadPriority priority);
     
     void log_threads();
     

From bc562b7fc4e1d916974ce438ce32b8b056d5357d Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 25 Jun 2022 00:08:22 -0700
Subject: [PATCH 328/901] kram - fix win build

---
 libkram/kram/TaskSystem.cpp | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index b54caa26..b784d56b 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -434,7 +434,7 @@ static void setThreadPriority(std::thread::native_handle_type handle, uint8_t pr
 
 #elif KRAM_WIN
 
-static void setThreadPriority(std::thread::native_handle_type handle, uint8_t priority)
+static void setThreadPriority(std::thread::native_handle_type handle, ThreadPriority priority)
 {
     // Win has 0 to 15 normal, then 16-31 real time priority
     int prioritySys = 0;
@@ -461,6 +461,8 @@ void task_system::set_current_priority(ThreadPriority priority)
 
 static void setThreadAffinity(std::thread::native_handle_type handle, uint32_t threadIndex)
 {
+    // https://eli.thegreenplace.net/2016/c11-threads-affinity-and-hyperthreading/
+    //
     const auto& coreInfo = GetCoreInfo();
     
     uint32_t maxIndex = coreInfo.remapTable.size() - 1;
@@ -528,11 +530,10 @@ static void setThreadAffinity(std::thread::native_handle_type handle, uint32_t t
 #endif
 }
 
-void task_system::set_affinity(std::thread& thread, uint32_t threadIndex)
-{
-    // https://eli.thegreenplace.net/2016/c11-threads-affinity-and-hyperthreading/
-    setThreadAffinity(thread.native_handle(), threadIndex);
-}
+//void task_system::set_affinity(std::thread& thread, uint32_t threadIndex)
+//{
+//    setThreadAffinity(thread.native_handle(), threadIndex);
+//}
 
 void task_system::set_current_affinity(uint32_t threadIndex)
 {

From 545b7e0c770fd7ecb7934c1609ebf3b67f396747 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 25 Jun 2022 00:15:04 -0700
Subject: [PATCH 329/901] kram - fix win build

---
 libkram/kram/TaskSystem.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index b784d56b..36e21d41 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -446,7 +446,7 @@ static void setThreadPriority(std::thread::native_handle_type handle, ThreadPrio
     
     BOOL success = SetThreadPriority(handle, prioritySys);
     if (!success)
-        LOGW("Thread", "Failed to set priority %d", prioritySys);
+        KLOGW("Thread", "Failed to set priority %d", prioritySys);
 }
 
 #endif

From 03fff0a1af5b8624bc11ee8e25ee2d0d85f6360c Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 25 Jun 2022 00:17:31 -0700
Subject: [PATCH 330/901] kram - set bkgd block to black (0,0,0) on png to try
 to fix Finder's white thumbnails

This may not survive image optim, but fixup srgb also now sets the bkgd block so Finder stops generating white thumbnails.  These make white content and icons impossible to view, and Apple blocks redefining the png thumbnailer with kramv's custom one.
---
 libkram/kram/Kram.cpp    | 68 +++++++++++++++++++++++++++++++++-------
 libkram/kram/KramImage.h |  9 +++++-
 2 files changed, 65 insertions(+), 12 deletions(-)

diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 69351494..9880a010 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -701,6 +701,23 @@ bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray
         }
     }
     
+    // because Apple finder thumbnails can't be overridden with custom thumbanailer
+    // and defaults to white bkgd (making white icons impossible to see).
+    // track the bkgd block, and set/re-define as all black.  Maybe will honor that.
+    bool hasBackground = false;
+    bool hasBlackBackground = false;
+    chunkData = lodepng_chunk_find_const(data, data + dataSize, "bKGD");
+    if (chunkData) {
+        lodepng_inspect_chunk(&state, chunkData - data, data, end-data);
+        if (state.info_png.background_defined) {
+            hasBackground = true;
+            hasBlackBackground =
+                state.info_png.background_r == 0 && // gray/pallete uses this only
+                state.info_png.background_g == 0 &&
+                state.info_png.background_b == 0;
+        }
+    }
+    
     // don't convert png bit depths, but can convert pallete data
     //    if (state.info_png.color.bitdepth != 8) {
     //        return false;
@@ -775,6 +792,7 @@ bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray
     }
 
     sourceImage.setSrgbState(isSrgb, hasSrgbBlock, hasNonSrgbBlocks);
+    sourceImage.setBackgroundState(hasBlackBackground);
     
     return sourceImage.loadImageFromPixels(pixels, width, height, hasColor, hasAlpha);
 }
@@ -792,16 +810,18 @@ bool SavePNG(Image& image, const char* filename)
     // Then if srgb, see if that matches content type srgb state below.
     TexContentType contentType = findContentTypeFromFilename(filename);
     bool isSrgb = contentType == TexContentTypeAlbedo;
-
+    
     // Skip file if it has srgb block, and none of the other block types.
     // This code will also strip the sRGB block from apps like Figma that always set it.
-    if (isSrgb == image.isSrgb()) {
-        if (isSrgb == image.hasSrgbBlock() && !image.hasNonSrgbBlocks()) {
-            KLOGI("Kram", "skipping srgb correction");
-            return true;
+    if (image.hasBlackBackground()) {
+        if (isSrgb == image.isSrgb()) {
+            if (isSrgb == image.hasSrgbBlock() && !image.hasNonSrgbBlocks()) {
+                KLOGI("Kram", "skipping srgb correction");
+                return true;
+            }
         }
     }
-    
+
     // This is the only block written or not
     lodepng::State state;
     if (isSrgb) {
@@ -810,6 +830,16 @@ bool SavePNG(Image& image, const char* filename)
         state.info_png.srgb_intent = 0;
     }
     
+    // always redefine background to black, so Finder thumbnails are not white
+    // this makes viewing any white icons nearly impossible.  Make suer lodepng
+    // ignores this background on import, want the stored pixels not ones composited.
+    // Note that _r is only used for grayscale/pallete, and these values are in same
+    // color depth as pixels.  But 0 works for all bit-depths.
+    state.info_png.background_defined = true;
+    state.info_png.background_r = 0;
+    state.info_png.background_g = 0;
+    state.info_png.background_b = 0;
+    
     // TODO: could write other data into Txt block
     // or try to preserve those
     
@@ -2060,9 +2090,22 @@ string kramInfoPNGToString(const string& srcFilename, const uint8_t* data, uint6
         }
     }
     
-    // TODO: also bkgd blocks.
-    
-    
+    // because Apple finder thumbnails can't be overridden with custom thumbanailer
+    // and defaults to white bkgd (making white icons impossible to see).
+    // track the bkgd block, and set/re-define as all black.  Maybe will honor that.
+    bool hasBackground = false;
+    bool hasBlackBackground = false;
+    chunkData = lodepng_chunk_find_const(data, data + dataSize, "bKGD");
+    if (chunkData) {
+        lodepng_inspect_chunk(&state, chunkData - data, data, end-data);
+        if (state.info_png.background_defined) {
+            hasBackground = true;
+            hasBlackBackground =
+                state.info_png.background_r == 0 && // gray/pallete uses this only
+                state.info_png.background_g == 0 &&
+                state.info_png.background_b == 0;
+        }
+    }
     
     string info;
 
@@ -2117,7 +2160,8 @@ string kramInfoPNGToString(const string& srcFilename, const uint8_t* data, uint6
             "colr: %s\n"
             "alph: %s\n"
             "palt: %s\n"
-            "srgb: %s\n",
+            "srgb: %s\n"
+            "bkgd: %s\n",
             textureTypeName(MyMTLTextureType2D),
             width, height,
             width * height / (1000.0f * 1000.0f),
@@ -2125,7 +2169,9 @@ string kramInfoPNGToString(const string& srcFilename, const uint8_t* data, uint6
             hasColor ? "y" : "n",
             hasAlpha ? "y" : "n",
             hasPalette ? "y" : "n",
-            isSrgb ? "y" : "n");
+            isSrgb ? "y" : "n",
+            hasBackground ? "y" : "n"
+            );
     info += tmp;
 
     // optional block with ppi
diff --git a/libkram/kram/KramImage.h b/libkram/kram/KramImage.h
index eda911a1..f450bd39 100644
--- a/libkram/kram/KramImage.h
+++ b/libkram/kram/KramImage.h
@@ -45,7 +45,8 @@ class Image {
     
     // set state off png blocks
     void setSrgbState(bool isSrgb, bool hasSrgbBlock, bool hasNonSrgbBlocks);
-
+    void setBackgroundState(bool hasBlackBackground) { _hasBlackBackground = hasBlackBackground; }
+    
     // convert mip level of explicit format to single-image
     bool loadImageFromKTX(const KTXImage& image, uint32_t mipNumber = 0);
 
@@ -72,6 +73,8 @@ class Image {
     bool hasSrgbBlock() const { return _hasSrgbBlock; }
     bool hasNonSrgbBlocks() const { return _hasNonSrgbBlocks; }
     
+    bool hasBlackBackground() const { return _hasBlackBackground; }
+    
     // if converted a KTX/2 image to Image, then this field will be non-zero
     uint32_t chunksY() const { return _chunksY; }
     void setChunksY(uint32_t chunksY) { _chunksY = chunksY; }
@@ -93,10 +96,14 @@ class Image {
     bool _hasColor = true;
     bool _hasAlpha = true;
 
+    // track to fix incorrect sRGB state from Figma/Photoshop on PNG files
     bool _isSrgb = false;
     bool _hasNonSrgbBlocks = false;
     bool _hasSrgbBlock = false;
     
+    // track to fix Apple Finder previews that are always white background
+    bool _hasBlackBackground = false;
+    
     // this is the entire strip data, float version can be passed for HDR
     // sources always 4 channels RGBA for 8 and 32f data.  16f promoted to 32f.
     vector<Color> _pixels;

From 4c9cd92a93a577128ad1e5bb620ef65b9aebfd69 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 25 Jun 2022 12:07:14 -0700
Subject: [PATCH 331/901] kram - more cleanup on tasks

---
 libkram/kram/TaskSystem.cpp | 35 +++++++++++++++++++++--------------
 1 file changed, 21 insertions(+), 14 deletions(-)

diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index 36e21d41..f709b22b 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -530,11 +530,6 @@ static void setThreadAffinity(std::thread::native_handle_type handle, uint32_t t
 #endif
 }
 
-//void task_system::set_affinity(std::thread& thread, uint32_t threadIndex)
-//{
-//    setThreadAffinity(thread.native_handle(), threadIndex);
-//}
-
 void task_system::set_current_affinity(uint32_t threadIndex)
 {
     setThreadAffinity(getCurrentThread(), threadIndex);
@@ -642,10 +637,14 @@ task_system::task_system(int32_t count) :
     log_threads();
 }
 
-// TEODO: rename to getThreadPriority
-static void getThreadInfo(std::thread::native_handle_type handle, ThreadPriority& priority)
+static ThreadPriority getThreadPriority(std::thread::native_handle_type handle)
 {
-#if KRAM_MAC || KRAM_IOS
+    ThreadPriority priority = ThreadPriority::Default;
+    
+#if KRAM_MAC || KRAM_IOS || KRAM_ANDROID
+    // Note: this doesn't handle qOS, and returns default priority
+    // on those threads.
+    
     int policy = 0;
     struct sched_param priorityVal;
     int val = pthread_getschedparam(handle, &policy, &priorityVal);
@@ -660,11 +659,19 @@ static void getThreadInfo(std::thread::native_handle_type handle, ThreadPriority
         default: priority = ThreadPriority::Default; break;
     }
     
+/* Using code above since it may work with other threads
 #elif KRAM_ANDROID
+    // Note: only for current thread
+    
     // only have getpriority call on current thread
     // pthread_getschedparam never returns valid data
-    priority = ThreadPriority::Default; // TODO: fix
-    
+    int priority = getpriority(PRIO_PROCESS, 0);
+    switch(prioritySys) {
+        case 41: priority = ThreadPriority::High; break;
+        case 45: priority = ThreadPriority::Interactive; break;
+        default: priority = ThreadPriority::Default; break;
+    }
+*/
 #elif KRAM_WIN
     // all threads same policy on Win?
     // https://www.microsoftpressstore.com/articles/article.aspx?p=2233328&seqNum=7#:~:text=Windows%20never%20adjusts%20the%20priority,the%20process%20that%20created%20it.
@@ -682,9 +689,9 @@ static void getThreadInfo(std::thread::native_handle_type handle, ThreadPriority
         case 2: priority = ThreadPriority::Interactive; break;
         default: priority = ThreadPriority::Default; break;
     }
-    
-    // TODO: remap back to enum
 #endif
+    
+    return priority;
 }
 
 
@@ -696,7 +703,7 @@ void task_system::log_threads()
     info.affinity = 0;
 #endif
     
-    getThreadInfo(getCurrentThread(), info.priority);
+    info.priority = getThreadPriority(getCurrentThread());
     KLOGI("Thread", "Thread:%s (pri:%d aff:%d)",
           info.name, info.priority, info.affinity);
     
@@ -708,7 +715,7 @@ void task_system::log_threads()
         // but don't want to write a getter for this right now.
         info.affinity = i;
 #endif
-        getThreadInfo(_threads[i].native_handle(), info.priority);
+        info.priority = getThreadPriority(_threads[i].native_handle());
         KLOGI("Thread", "Thread:%s (pri:%d aff:%d)",
               info.name, info.priority, info.affinity);
     }

From cc9a579de976c964723af4ed1bd62c20f5caf3c5 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 15 Jul 2022 09:14:21 -0700
Subject: [PATCH 332/901] kram - update bc7enc to bc7enc_rdo

This is the maintained codebase.  Still has bug with all alpha = 255 mapped to 254.  Will put in patch for that next.
https://github.com/richgel999/bc7enc/issues/3
---
 build2/kram.xcodeproj/project.pbxproj |  134 +-
 libkram/bc7enc/LICENSE                |   12 +-
 libkram/bc7enc/README.md              |  237 +-
 libkram/bc7enc/bc7decomp.cpp          |  450 ++-
 libkram/bc7enc/bc7decomp.h            |   17 +-
 libkram/bc7enc/bc7decomp_ref.cpp      |  431 +++
 libkram/bc7enc/bc7enc.cpp             | 1231 +++++---
 libkram/bc7enc/bc7enc.h               |  103 +-
 libkram/bc7enc/ert.cpp                |  705 +++++
 libkram/bc7enc/ert.h                  |   81 +
 libkram/bc7enc/rdo_bc_encoder.cpp     | 1270 ++++++++
 libkram/bc7enc/rdo_bc_encoder.h       |  269 ++
 libkram/bc7enc/rgbcx.cpp              | 3083 +++++++++++++++++++
 libkram/bc7enc/rgbcx.h                | 4040 +------------------------
 libkram/bc7enc/rgbcx_table4_small.h   |  969 ++++++
 libkram/bc7enc/utils.cpp              |  908 ++++++
 libkram/bc7enc/utils.h                | 2617 ++++++++++++++++
 libkram/kram/KramImage.cpp            |   10 +-
 18 files changed, 11931 insertions(+), 4636 deletions(-)
 create mode 100644 libkram/bc7enc/bc7decomp_ref.cpp
 create mode 100644 libkram/bc7enc/ert.cpp
 create mode 100644 libkram/bc7enc/ert.h
 create mode 100644 libkram/bc7enc/rdo_bc_encoder.cpp
 create mode 100644 libkram/bc7enc/rdo_bc_encoder.h
 create mode 100644 libkram/bc7enc/rgbcx.cpp
 create mode 100644 libkram/bc7enc/rgbcx_table4_small.h
 create mode 100644 libkram/bc7enc/utils.cpp
 create mode 100644 libkram/bc7enc/utils.h

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index e57e4e91..4e3a0c5a 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -19,8 +19,6 @@
 		706EEF8926D1595D001C950E /* EtcBlock4x4Encoding_ETC1.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDBF26D1583E001C950E /* EtcBlock4x4Encoding_ETC1.cpp */; };
 		706EEF8A26D1595D001C950E /* EtcBlock4x4Encoding.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDC526D1583E001C950E /* EtcBlock4x4Encoding.cpp */; };
 		706EEF8B26D1595D001C950E /* EtcBlock4x4.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDC626D1583E001C950E /* EtcBlock4x4.cpp */; };
-		706EEF8C26D1595D001C950E /* bc7decomp.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDCE26D1583E001C950E /* bc7decomp.cpp */; };
-		706EEF8D26D1595D001C950E /* bc7enc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDD026D1583E001C950E /* bc7enc.cpp */; };
 		706EEFA726D1595D001C950E /* basisu_transcoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE0426D1583F001C950E /* basisu_transcoder.cpp */; };
 		706EEFA826D1595D001C950E /* miniz.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1126D1583F001C950E /* miniz.cpp */; };
 		706EEFA926D1595D001C950E /* hedistance.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1426D1583F001C950E /* hedistance.cpp */; };
@@ -68,10 +66,6 @@
 		706EEFE026D15984001C950E /* EtcBlock4x4Encoding_RGBA8.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC426D1583E001C950E /* EtcBlock4x4Encoding_RGBA8.h */; };
 		706EEFE126D15984001C950E /* EtcColorFloatRGBA.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC726D1583E001C950E /* EtcColorFloatRGBA.h */; };
 		706EEFE226D15984001C950E /* EtcBlock4x4Encoding.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC826D1583E001C950E /* EtcBlock4x4Encoding.h */; };
-		706EEFE326D15984001C950E /* rgbcx.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDCB26D1583E001C950E /* rgbcx.h */; };
-		706EEFE426D15984001C950E /* bc7enc.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDCC26D1583E001C950E /* bc7enc.h */; };
-		706EEFE526D15984001C950E /* bc7decomp.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDCD26D1583E001C950E /* bc7decomp.h */; };
-		706EEFE626D15984001C950E /* rgbcx_table4.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDD126D1583E001C950E /* rgbcx_table4.h */; };
 		706EEFF226D15984001C950E /* ateencoder.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFA26D1583E001C950E /* ateencoder.h */; };
 		706EEFF326D15984001C950E /* basisu_transcoder.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFC26D1583E001C950E /* basisu_transcoder.h */; };
 		706EEFF426D15984001C950E /* basisu_containers.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFD26D1583E001C950E /* basisu_containers.h */; };
@@ -134,10 +128,6 @@
 		706EF15A26D166C5001C950E /* EtcBlock4x4Encoding_RGBA8.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC426D1583E001C950E /* EtcBlock4x4Encoding_RGBA8.h */; };
 		706EF15B26D166C5001C950E /* EtcColorFloatRGBA.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC726D1583E001C950E /* EtcColorFloatRGBA.h */; };
 		706EF15C26D166C5001C950E /* EtcBlock4x4Encoding.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC826D1583E001C950E /* EtcBlock4x4Encoding.h */; };
-		706EF15D26D166C5001C950E /* rgbcx.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDCB26D1583E001C950E /* rgbcx.h */; };
-		706EF15E26D166C5001C950E /* bc7enc.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDCC26D1583E001C950E /* bc7enc.h */; };
-		706EF15F26D166C5001C950E /* bc7decomp.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDCD26D1583E001C950E /* bc7decomp.h */; };
-		706EF16026D166C5001C950E /* rgbcx_table4.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDD126D1583E001C950E /* rgbcx_table4.h */; };
 		706EF16C26D166C5001C950E /* ateencoder.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFA26D1583E001C950E /* ateencoder.h */; };
 		706EF16D26D166C5001C950E /* basisu_transcoder.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFC26D1583E001C950E /* basisu_transcoder.h */; };
 		706EF16E26D166C5001C950E /* basisu_containers.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFD26D1583E001C950E /* basisu_containers.h */; };
@@ -193,8 +183,6 @@
 		706EF1A126D166C5001C950E /* EtcBlock4x4Encoding_ETC1.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDBF26D1583E001C950E /* EtcBlock4x4Encoding_ETC1.cpp */; };
 		706EF1A226D166C5001C950E /* EtcBlock4x4Encoding.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDC526D1583E001C950E /* EtcBlock4x4Encoding.cpp */; };
 		706EF1A326D166C5001C950E /* EtcBlock4x4.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDC626D1583E001C950E /* EtcBlock4x4.cpp */; };
-		706EF1A426D166C5001C950E /* bc7decomp.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDCE26D1583E001C950E /* bc7decomp.cpp */; };
-		706EF1A526D166C5001C950E /* bc7enc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDD026D1583E001C950E /* bc7enc.cpp */; };
 		706EF1BF26D166C5001C950E /* basisu_transcoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE0426D1583F001C950E /* basisu_transcoder.cpp */; };
 		706EF1C026D166C5001C950E /* miniz.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1126D1583F001C950E /* miniz.cpp */; };
 		706EF1C126D166C5001C950E /* hedistance.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1426D1583F001C950E /* hedistance.cpp */; };
@@ -245,6 +233,36 @@
 		706EFF8426D34740001C950E /* red_black_tree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5F26D3473F001C950E /* red_black_tree.cpp */; };
 		706EFF8526D34740001C950E /* fixed_pool.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD6026D3473F001C950E /* fixed_pool.cpp */; };
 		706EFF8626D34740001C950E /* fixed_pool.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD6026D3473F001C950E /* fixed_pool.cpp */; };
+		707789D52881BA81008A51BC /* bc7enc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789C62881BA81008A51BC /* bc7enc.cpp */; };
+		707789D62881BA81008A51BC /* bc7enc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789C62881BA81008A51BC /* bc7enc.cpp */; };
+		707789D72881BA81008A51BC /* bc7enc.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789C72881BA81008A51BC /* bc7enc.h */; };
+		707789D82881BA81008A51BC /* bc7enc.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789C72881BA81008A51BC /* bc7enc.h */; };
+		707789D92881BA81008A51BC /* bc7decomp.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789C82881BA81008A51BC /* bc7decomp.h */; };
+		707789DA2881BA81008A51BC /* bc7decomp.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789C82881BA81008A51BC /* bc7decomp.h */; };
+		707789DB2881BA81008A51BC /* ert.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789C92881BA81008A51BC /* ert.h */; };
+		707789DC2881BA81008A51BC /* ert.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789C92881BA81008A51BC /* ert.h */; };
+		707789DD2881BA81008A51BC /* rgbcx.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789CA2881BA81008A51BC /* rgbcx.cpp */; };
+		707789DE2881BA81008A51BC /* rgbcx.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789CA2881BA81008A51BC /* rgbcx.cpp */; };
+		707789DF2881BA81008A51BC /* rgbcx_table4.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789CB2881BA81008A51BC /* rgbcx_table4.h */; };
+		707789E02881BA81008A51BC /* rgbcx_table4.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789CB2881BA81008A51BC /* rgbcx_table4.h */; };
+		707789E12881BA81008A51BC /* utils.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789CC2881BA81008A51BC /* utils.cpp */; };
+		707789E22881BA81008A51BC /* utils.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789CC2881BA81008A51BC /* utils.cpp */; };
+		707789E32881BA81008A51BC /* rgbcx_table4_small.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789CD2881BA81008A51BC /* rgbcx_table4_small.h */; };
+		707789E42881BA81008A51BC /* rgbcx_table4_small.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789CD2881BA81008A51BC /* rgbcx_table4_small.h */; };
+		707789E52881BA81008A51BC /* ert.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789CE2881BA81008A51BC /* ert.cpp */; };
+		707789E62881BA81008A51BC /* ert.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789CE2881BA81008A51BC /* ert.cpp */; };
+		707789E72881BA81008A51BC /* rgbcx.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789CF2881BA81008A51BC /* rgbcx.h */; };
+		707789E82881BA81008A51BC /* rgbcx.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789CF2881BA81008A51BC /* rgbcx.h */; };
+		707789E92881BA81008A51BC /* bc7decomp.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789D02881BA81008A51BC /* bc7decomp.cpp */; };
+		707789EA2881BA81008A51BC /* bc7decomp.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789D02881BA81008A51BC /* bc7decomp.cpp */; };
+		707789EB2881BA81008A51BC /* utils.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789D22881BA81008A51BC /* utils.h */; };
+		707789EC2881BA81008A51BC /* utils.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789D22881BA81008A51BC /* utils.h */; };
+		707789ED2881BA81008A51BC /* bc7decomp_ref.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789D32881BA81008A51BC /* bc7decomp_ref.cpp */; };
+		707789EE2881BA81008A51BC /* bc7decomp_ref.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789D32881BA81008A51BC /* bc7decomp_ref.cpp */; };
+		707789F12881BCE2008A51BC /* rdo_bc_encoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789EF2881BCE2008A51BC /* rdo_bc_encoder.cpp */; };
+		707789F22881BCE2008A51BC /* rdo_bc_encoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789EF2881BCE2008A51BC /* rdo_bc_encoder.cpp */; };
+		707789F32881BCE2008A51BC /* rdo_bc_encoder.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789F02881BCE2008A51BC /* rdo_bc_encoder.h */; };
+		707789F42881BCE2008A51BC /* rdo_bc_encoder.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789F02881BCE2008A51BC /* rdo_bc_encoder.h */; };
 		70871DC927DDDBCD00D0B9E1 /* astcenc_vecmathlib_common_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DA727DDDBCC00D0B9E1 /* astcenc_vecmathlib_common_4.h */; };
 		70871DCA27DDDBCD00D0B9E1 /* astcenc_vecmathlib_common_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DA727DDDBCC00D0B9E1 /* astcenc_vecmathlib_common_4.h */; };
 		70871DCB27DDDBCD00D0B9E1 /* astcenc_image.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DA827DDDBCC00D0B9E1 /* astcenc_image.cpp */; };
@@ -365,14 +383,6 @@
 		706EEDC626D1583E001C950E /* EtcBlock4x4.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = EtcBlock4x4.cpp; sourceTree = "<group>"; };
 		706EEDC726D1583E001C950E /* EtcColorFloatRGBA.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = EtcColorFloatRGBA.h; sourceTree = "<group>"; };
 		706EEDC826D1583E001C950E /* EtcBlock4x4Encoding.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = EtcBlock4x4Encoding.h; sourceTree = "<group>"; };
-		706EEDCA26D1583E001C950E /* LICENSE */ = {isa = PBXFileReference; lastKnownFileType = text; path = LICENSE; sourceTree = "<group>"; };
-		706EEDCB26D1583E001C950E /* rgbcx.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = rgbcx.h; sourceTree = "<group>"; };
-		706EEDCC26D1583E001C950E /* bc7enc.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = bc7enc.h; sourceTree = "<group>"; };
-		706EEDCD26D1583E001C950E /* bc7decomp.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = bc7decomp.h; sourceTree = "<group>"; };
-		706EEDCE26D1583E001C950E /* bc7decomp.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = bc7decomp.cpp; sourceTree = "<group>"; };
-		706EEDCF26D1583E001C950E /* README.md */ = {isa = PBXFileReference; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = "<group>"; };
-		706EEDD026D1583E001C950E /* bc7enc.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = bc7enc.cpp; sourceTree = "<group>"; };
-		706EEDD126D1583E001C950E /* rgbcx_table4.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = rgbcx_table4.h; sourceTree = "<group>"; };
 		706EEDF926D1583E001C950E /* ateencoder.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = ateencoder.mm; sourceTree = "<group>"; };
 		706EEDFA26D1583E001C950E /* ateencoder.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = ateencoder.h; sourceTree = "<group>"; };
 		706EEDFC26D1583E001C950E /* basisu_transcoder.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = basisu_transcoder.h; sourceTree = "<group>"; };
@@ -598,6 +608,23 @@
 		706EFD5E26D3473F001C950E /* hashtable.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = hashtable.cpp; sourceTree = "<group>"; };
 		706EFD5F26D3473F001C950E /* red_black_tree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = red_black_tree.cpp; sourceTree = "<group>"; };
 		706EFD6026D3473F001C950E /* fixed_pool.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = fixed_pool.cpp; sourceTree = "<group>"; };
+		707789C62881BA81008A51BC /* bc7enc.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bc7enc.cpp; sourceTree = "<group>"; };
+		707789C72881BA81008A51BC /* bc7enc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bc7enc.h; sourceTree = "<group>"; };
+		707789C82881BA81008A51BC /* bc7decomp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bc7decomp.h; sourceTree = "<group>"; };
+		707789C92881BA81008A51BC /* ert.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ert.h; sourceTree = "<group>"; };
+		707789CA2881BA81008A51BC /* rgbcx.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = rgbcx.cpp; sourceTree = "<group>"; };
+		707789CB2881BA81008A51BC /* rgbcx_table4.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = rgbcx_table4.h; sourceTree = "<group>"; };
+		707789CC2881BA81008A51BC /* utils.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = utils.cpp; sourceTree = "<group>"; };
+		707789CD2881BA81008A51BC /* rgbcx_table4_small.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = rgbcx_table4_small.h; sourceTree = "<group>"; };
+		707789CE2881BA81008A51BC /* ert.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ert.cpp; sourceTree = "<group>"; };
+		707789CF2881BA81008A51BC /* rgbcx.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = rgbcx.h; sourceTree = "<group>"; };
+		707789D02881BA81008A51BC /* bc7decomp.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bc7decomp.cpp; sourceTree = "<group>"; };
+		707789D12881BA81008A51BC /* README.md */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = "<group>"; };
+		707789D22881BA81008A51BC /* utils.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = utils.h; sourceTree = "<group>"; };
+		707789D32881BA81008A51BC /* bc7decomp_ref.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bc7decomp_ref.cpp; sourceTree = "<group>"; };
+		707789D42881BA81008A51BC /* LICENSE */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = LICENSE; sourceTree = "<group>"; };
+		707789EF2881BCE2008A51BC /* rdo_bc_encoder.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = rdo_bc_encoder.cpp; sourceTree = "<group>"; };
+		707789F02881BCE2008A51BC /* rdo_bc_encoder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = rdo_bc_encoder.h; sourceTree = "<group>"; };
 		70871DA727DDDBCC00D0B9E1 /* astcenc_vecmathlib_common_4.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = astcenc_vecmathlib_common_4.h; sourceTree = "<group>"; };
 		70871DA827DDDBCC00D0B9E1 /* astcenc_image.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_image.cpp; sourceTree = "<group>"; };
 		70871DA927DDDBCC00D0B9E1 /* astcenc_find_best_partitioning.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_find_best_partitioning.cpp; sourceTree = "<group>"; };
@@ -744,14 +771,23 @@
 		706EEDC926D1583E001C950E /* bc7enc */ = {
 			isa = PBXGroup;
 			children = (
-				706EEDCA26D1583E001C950E /* LICENSE */,
-				706EEDCB26D1583E001C950E /* rgbcx.h */,
-				706EEDCC26D1583E001C950E /* bc7enc.h */,
-				706EEDCD26D1583E001C950E /* bc7decomp.h */,
-				706EEDCE26D1583E001C950E /* bc7decomp.cpp */,
-				706EEDCF26D1583E001C950E /* README.md */,
-				706EEDD026D1583E001C950E /* bc7enc.cpp */,
-				706EEDD126D1583E001C950E /* rgbcx_table4.h */,
+				707789D32881BA81008A51BC /* bc7decomp_ref.cpp */,
+				707789D02881BA81008A51BC /* bc7decomp.cpp */,
+				707789C82881BA81008A51BC /* bc7decomp.h */,
+				707789C62881BA81008A51BC /* bc7enc.cpp */,
+				707789C72881BA81008A51BC /* bc7enc.h */,
+				707789CE2881BA81008A51BC /* ert.cpp */,
+				707789C92881BA81008A51BC /* ert.h */,
+				707789D42881BA81008A51BC /* LICENSE */,
+				707789D12881BA81008A51BC /* README.md */,
+				707789EF2881BCE2008A51BC /* rdo_bc_encoder.cpp */,
+				707789F02881BCE2008A51BC /* rdo_bc_encoder.h */,
+				707789CD2881BA81008A51BC /* rgbcx_table4_small.h */,
+				707789CB2881BA81008A51BC /* rgbcx_table4.h */,
+				707789CA2881BA81008A51BC /* rgbcx.cpp */,
+				707789CF2881BA81008A51BC /* rgbcx.h */,
+				707789CC2881BA81008A51BC /* utils.cpp */,
+				707789D22881BA81008A51BC /* utils.h */,
 			);
 			path = bc7enc;
 			sourceTree = "<group>";
@@ -1205,7 +1241,9 @@
 				706EEFDA26D15984001C950E /* EtcBlock4x4EncodingBits.h in Headers */,
 				706EEFDB26D15984001C950E /* EtcBlock4x4Encoding_RGB8A1.h in Headers */,
 				706EEFDC26D15984001C950E /* EtcBlock4x4.h in Headers */,
+				707789E72881BA81008A51BC /* rgbcx.h in Headers */,
 				706EEFDD26D15984001C950E /* Etc.h in Headers */,
+				707789D72881BA81008A51BC /* bc7enc.h in Headers */,
 				706EEFDE26D15984001C950E /* EtcImage.h in Headers */,
 				70CDB65027A1382700A546C1 /* KramDDSHelper.h in Headers */,
 				708A6A9C2708CE4700BA5410 /* bc6h_encode.h in Headers */,
@@ -1213,10 +1251,6 @@
 				706EEFE026D15984001C950E /* EtcBlock4x4Encoding_RGBA8.h in Headers */,
 				706EEFE126D15984001C950E /* EtcColorFloatRGBA.h in Headers */,
 				706EEFE226D15984001C950E /* EtcBlock4x4Encoding.h in Headers */,
-				706EEFE326D15984001C950E /* rgbcx.h in Headers */,
-				706EEFE426D15984001C950E /* bc7enc.h in Headers */,
-				706EEFE526D15984001C950E /* bc7decomp.h in Headers */,
-				706EEFE626D15984001C950E /* rgbcx_table4.h in Headers */,
 				706EEFF226D15984001C950E /* ateencoder.h in Headers */,
 				706EEFF326D15984001C950E /* basisu_transcoder.h in Headers */,
 				70A7BD3227092A1200DBCCF7 /* hdr_encode.h in Headers */,
@@ -1224,12 +1258,14 @@
 				706EEFF426D15984001C950E /* basisu_containers.h in Headers */,
 				70871DD527DDDBCD00D0B9E1 /* astcenc.h in Headers */,
 				706EEFF526D15985001C950E /* basisu_containers_impl.h in Headers */,
+				707789EB2881BA81008A51BC /* utils.h in Headers */,
 				706EEFF626D15985001C950E /* basisu_transcoder_internal.h in Headers */,
 				70871DF927DDDBCD00D0B9E1 /* astcenc_vecmathlib_avx2_8.h in Headers */,
 				70871DFB27DDDBCD00D0B9E1 /* astcenc_vecmathlib_none_4.h in Headers */,
 				706EEFF726D15985001C950E /* basisu_global_selector_cb.h in Headers */,
 				706EEFF826D15985001C950E /* basisu_transcoder_uastc.h in Headers */,
 				706EEFF926D15985001C950E /* basisu_global_selector_palette.h in Headers */,
+				707789E32881BA81008A51BC /* rgbcx_table4_small.h in Headers */,
 				706EEFFA26D15985001C950E /* basisu.h in Headers */,
 				706EEFFB26D15985001C950E /* basisu_file_headers.h in Headers */,
 				706EEFFC26D15985001C950E /* miniz.h in Headers */,
@@ -1246,10 +1282,13 @@
 				706EF00626D15985001C950E /* KramImage.h in Headers */,
 				706EF00726D15985001C950E /* win_mmap.h in Headers */,
 				70871DDD27DDDBCD00D0B9E1 /* astcenc_vecmathlib_sse_4.h in Headers */,
+				707789D92881BA81008A51BC /* bc7decomp.h in Headers */,
 				706EF00826D15985001C950E /* Kram.h in Headers */,
 				70871DED27DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.h in Headers */,
+				707789DB2881BA81008A51BC /* ert.h in Headers */,
 				706EF00926D15985001C950E /* KTXImage.h in Headers */,
 				706EF00A26D15985001C950E /* KramImageInfo.h in Headers */,
+				707789DF2881BA81008A51BC /* rgbcx_table4.h in Headers */,
 				70871DF727DDDBCD00D0B9E1 /* astcenc_vecmathlib_neon_4.h in Headers */,
 				706EF00B26D15985001C950E /* KramTimer.h in Headers */,
 				706EF00C26D15985001C950E /* KramMmapHelper.h in Headers */,
@@ -1265,6 +1304,7 @@
 				708A6A982708CE4700BA5410 /* bc6h_decode.h in Headers */,
 				706EF01526D15985001C950E /* singlecolourfit.h in Headers */,
 				706EF01626D15985001C950E /* maths.h in Headers */,
+				707789F32881BCE2008A51BC /* rdo_bc_encoder.h in Headers */,
 				706EF01726D15985001C950E /* colourset.h in Headers */,
 				708A6AA42708CE4700BA5410 /* bc6h_utils.h in Headers */,
 				706EF01826D15985001C950E /* colourblock.h in Headers */,
@@ -1293,7 +1333,9 @@
 				706EF15426D166C5001C950E /* EtcBlock4x4EncodingBits.h in Headers */,
 				706EF15526D166C5001C950E /* EtcBlock4x4Encoding_RGB8A1.h in Headers */,
 				706EF15626D166C5001C950E /* EtcBlock4x4.h in Headers */,
+				707789E82881BA81008A51BC /* rgbcx.h in Headers */,
 				706EF15726D166C5001C950E /* Etc.h in Headers */,
+				707789D82881BA81008A51BC /* bc7enc.h in Headers */,
 				706EF15826D166C5001C950E /* EtcImage.h in Headers */,
 				70CDB65127A1382700A546C1 /* KramDDSHelper.h in Headers */,
 				708A6A9D2708CE4700BA5410 /* bc6h_encode.h in Headers */,
@@ -1301,10 +1343,6 @@
 				706EF15A26D166C5001C950E /* EtcBlock4x4Encoding_RGBA8.h in Headers */,
 				706EF15B26D166C5001C950E /* EtcColorFloatRGBA.h in Headers */,
 				706EF15C26D166C5001C950E /* EtcBlock4x4Encoding.h in Headers */,
-				706EF15D26D166C5001C950E /* rgbcx.h in Headers */,
-				706EF15E26D166C5001C950E /* bc7enc.h in Headers */,
-				706EF15F26D166C5001C950E /* bc7decomp.h in Headers */,
-				706EF16026D166C5001C950E /* rgbcx_table4.h in Headers */,
 				706EF16C26D166C5001C950E /* ateencoder.h in Headers */,
 				706EF16D26D166C5001C950E /* basisu_transcoder.h in Headers */,
 				70A7BD3327092A1200DBCCF7 /* hdr_encode.h in Headers */,
@@ -1312,12 +1350,14 @@
 				706EF16E26D166C5001C950E /* basisu_containers.h in Headers */,
 				70871DD627DDDBCD00D0B9E1 /* astcenc.h in Headers */,
 				706EF16F26D166C5001C950E /* basisu_containers_impl.h in Headers */,
+				707789EC2881BA81008A51BC /* utils.h in Headers */,
 				706EF17026D166C5001C950E /* basisu_transcoder_internal.h in Headers */,
 				70871DFA27DDDBCD00D0B9E1 /* astcenc_vecmathlib_avx2_8.h in Headers */,
 				70871DFC27DDDBCD00D0B9E1 /* astcenc_vecmathlib_none_4.h in Headers */,
 				706EF17126D166C5001C950E /* basisu_global_selector_cb.h in Headers */,
 				706EF17226D166C5001C950E /* basisu_transcoder_uastc.h in Headers */,
 				706EF17326D166C5001C950E /* basisu_global_selector_palette.h in Headers */,
+				707789E42881BA81008A51BC /* rgbcx_table4_small.h in Headers */,
 				706EF17426D166C5001C950E /* basisu.h in Headers */,
 				706EF17526D166C5001C950E /* basisu_file_headers.h in Headers */,
 				706EF17626D166C5001C950E /* miniz.h in Headers */,
@@ -1334,10 +1374,13 @@
 				706EF18026D166C5001C950E /* KramImage.h in Headers */,
 				706EF18126D166C5001C950E /* win_mmap.h in Headers */,
 				70871DDE27DDDBCD00D0B9E1 /* astcenc_vecmathlib_sse_4.h in Headers */,
+				707789DA2881BA81008A51BC /* bc7decomp.h in Headers */,
 				706EF18226D166C5001C950E /* Kram.h in Headers */,
 				70871DEE27DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.h in Headers */,
+				707789DC2881BA81008A51BC /* ert.h in Headers */,
 				706EF18326D166C5001C950E /* KTXImage.h in Headers */,
 				706EF18426D166C5001C950E /* KramImageInfo.h in Headers */,
+				707789E02881BA81008A51BC /* rgbcx_table4.h in Headers */,
 				70871DF827DDDBCD00D0B9E1 /* astcenc_vecmathlib_neon_4.h in Headers */,
 				706EF18526D166C5001C950E /* KramTimer.h in Headers */,
 				706EF18626D166C5001C950E /* KramMmapHelper.h in Headers */,
@@ -1353,6 +1396,7 @@
 				708A6A992708CE4700BA5410 /* bc6h_decode.h in Headers */,
 				706EF18F26D166C5001C950E /* singlecolourfit.h in Headers */,
 				706EF19026D166C5001C950E /* maths.h in Headers */,
+				707789F42881BCE2008A51BC /* rdo_bc_encoder.h in Headers */,
 				706EF19126D166C5001C950E /* colourset.h in Headers */,
 				708A6AA52708CE4700BA5410 /* bc6h_utils.h in Headers */,
 				706EF19226D166C5001C950E /* colourblock.h in Headers */,
@@ -1441,10 +1485,12 @@
 			buildActionMask = 2147483647;
 			files = (
 				70871DD727DDDBCD00D0B9E1 /* astcenc_quantization.cpp in Sources */,
+				707789E52881BA81008A51BC /* ert.cpp in Sources */,
 				70871E0327DDDBCD00D0B9E1 /* astcenc_color_unquantize.cpp in Sources */,
 				70871DD127DDDBCD00D0B9E1 /* astcenc_averages_and_directions.cpp in Sources */,
 				70871DDF27DDDBCD00D0B9E1 /* astcenc_mathlib_softfloat.cpp in Sources */,
 				706EF26426D17DCC001C950E /* ateencoder.mm in Sources */,
+				707789ED2881BA81008A51BC /* bc7decomp_ref.cpp in Sources */,
 				706EEF7F26D1595D001C950E /* EtcBlock4x4Encoding_RGB8.cpp in Sources */,
 				70871DCD27DDDBCD00D0B9E1 /* astcenc_find_best_partitioning.cpp in Sources */,
 				70CDB65227A1382700A546C1 /* KramDDSHelper.cpp in Sources */,
@@ -1456,15 +1502,15 @@
 				706EEF8426D1595D001C950E /* EtcBlock4x4Encoding_RG11.cpp in Sources */,
 				706EEF8526D1595D001C950E /* EtcBlock4x4Encoding_RGB8A1.cpp in Sources */,
 				706EEF8726D1595D001C950E /* EtcIndividualTrys.cpp in Sources */,
+				707789DD2881BA81008A51BC /* rgbcx.cpp in Sources */,
 				706EEF8826D1595D001C950E /* EtcBlock4x4Encoding_R11.cpp in Sources */,
+				707789F12881BCE2008A51BC /* rdo_bc_encoder.cpp in Sources */,
 				70871DF527DDDBCD00D0B9E1 /* astcenc_color_quantize.cpp in Sources */,
 				706EEF8926D1595D001C950E /* EtcBlock4x4Encoding_ETC1.cpp in Sources */,
 				706EEF8A26D1595D001C950E /* EtcBlock4x4Encoding.cpp in Sources */,
 				706EEF8B26D1595D001C950E /* EtcBlock4x4.cpp in Sources */,
 				70871DDB27DDDBCD00D0B9E1 /* astcenc_percentile_tables.cpp in Sources */,
-				706EEF8C26D1595D001C950E /* bc7decomp.cpp in Sources */,
 				70871DE127DDDBCD00D0B9E1 /* astcenc_mathlib.cpp in Sources */,
-				706EEF8D26D1595D001C950E /* bc7enc.cpp in Sources */,
 				708A6A9A2708CE4700BA5410 /* bc6h_encode.cpp in Sources */,
 				70A7BD3027092A1200DBCCF7 /* hdr_encode.cpp in Sources */,
 				706EFF7726D34740001C950E /* string.cpp in Sources */,
@@ -1476,7 +1522,9 @@
 				70871DE327DDDBCD00D0B9E1 /* astcenc_decompress_symbolic.cpp in Sources */,
 				70871E0727DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.cpp in Sources */,
 				70871E0527DDDBCD00D0B9E1 /* astcenc_platform_isa_detection.cpp in Sources */,
+				707789D52881BA81008A51BC /* bc7enc.cpp in Sources */,
 				706EFF7F26D34740001C950E /* intrusive_list.cpp in Sources */,
+				707789E92881BA81008A51BC /* bc7decomp.cpp in Sources */,
 				706EEFA826D1595D001C950E /* miniz.cpp in Sources */,
 				70871DE527DDDBCD00D0B9E1 /* astcenc_compress_symbolic.cpp in Sources */,
 				706EEFA926D1595D001C950E /* hedistance.cpp in Sources */,
@@ -1518,6 +1566,7 @@
 				706EEFC226D1595E001C950E /* zstd.cpp in Sources */,
 				706EEFC326D1595E001C950E /* zstddeclib.cpp in Sources */,
 				706EEFC426D1595E001C950E /* lodepng.cpp in Sources */,
+				707789E12881BA81008A51BC /* utils.cpp in Sources */,
 				706EEFC526D1595E001C950E /* tmpfileplus.cpp in Sources */,
 				70871E0127DDDBCD00D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp in Sources */,
 			);
@@ -1528,10 +1577,12 @@
 			buildActionMask = 2147483647;
 			files = (
 				70871DD827DDDBCD00D0B9E1 /* astcenc_quantization.cpp in Sources */,
+				707789E62881BA81008A51BC /* ert.cpp in Sources */,
 				70871E0427DDDBCD00D0B9E1 /* astcenc_color_unquantize.cpp in Sources */,
 				70871DD227DDDBCD00D0B9E1 /* astcenc_averages_and_directions.cpp in Sources */,
 				70871DE027DDDBCD00D0B9E1 /* astcenc_mathlib_softfloat.cpp in Sources */,
 				706EFC2426D1C39B001C950E /* ateencoder.mm in Sources */,
+				707789EE2881BA81008A51BC /* bc7decomp_ref.cpp in Sources */,
 				706EF19826D166C5001C950E /* EtcBlock4x4Encoding_RGB8.cpp in Sources */,
 				70871DCE27DDDBCD00D0B9E1 /* astcenc_find_best_partitioning.cpp in Sources */,
 				70CDB65327A1382700A546C1 /* KramDDSHelper.cpp in Sources */,
@@ -1543,15 +1594,15 @@
 				706EF19D26D166C5001C950E /* EtcBlock4x4Encoding_RG11.cpp in Sources */,
 				706EF19E26D166C5001C950E /* EtcBlock4x4Encoding_RGB8A1.cpp in Sources */,
 				706EF19F26D166C5001C950E /* EtcIndividualTrys.cpp in Sources */,
+				707789DE2881BA81008A51BC /* rgbcx.cpp in Sources */,
 				706EF1A026D166C5001C950E /* EtcBlock4x4Encoding_R11.cpp in Sources */,
+				707789F22881BCE2008A51BC /* rdo_bc_encoder.cpp in Sources */,
 				70871DF627DDDBCD00D0B9E1 /* astcenc_color_quantize.cpp in Sources */,
 				706EF1A126D166C5001C950E /* EtcBlock4x4Encoding_ETC1.cpp in Sources */,
 				706EF1A226D166C5001C950E /* EtcBlock4x4Encoding.cpp in Sources */,
 				706EF1A326D166C5001C950E /* EtcBlock4x4.cpp in Sources */,
 				70871DDC27DDDBCD00D0B9E1 /* astcenc_percentile_tables.cpp in Sources */,
-				706EF1A426D166C5001C950E /* bc7decomp.cpp in Sources */,
 				70871DE227DDDBCD00D0B9E1 /* astcenc_mathlib.cpp in Sources */,
-				706EF1A526D166C5001C950E /* bc7enc.cpp in Sources */,
 				708A6A9B2708CE4700BA5410 /* bc6h_encode.cpp in Sources */,
 				70A7BD3127092A1200DBCCF7 /* hdr_encode.cpp in Sources */,
 				706EFF7826D34740001C950E /* string.cpp in Sources */,
@@ -1563,7 +1614,9 @@
 				70871DE427DDDBCD00D0B9E1 /* astcenc_decompress_symbolic.cpp in Sources */,
 				70871E0827DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.cpp in Sources */,
 				70871E0627DDDBCD00D0B9E1 /* astcenc_platform_isa_detection.cpp in Sources */,
+				707789D62881BA81008A51BC /* bc7enc.cpp in Sources */,
 				706EFF8026D34740001C950E /* intrusive_list.cpp in Sources */,
+				707789EA2881BA81008A51BC /* bc7decomp.cpp in Sources */,
 				706EF1C026D166C5001C950E /* miniz.cpp in Sources */,
 				70871DE627DDDBCD00D0B9E1 /* astcenc_compress_symbolic.cpp in Sources */,
 				706EF1C126D166C5001C950E /* hedistance.cpp in Sources */,
@@ -1605,6 +1658,7 @@
 				706EF1D826D166C5001C950E /* zstd.cpp in Sources */,
 				706EF1D926D166C5001C950E /* zstddeclib.cpp in Sources */,
 				706EF1DA26D166C5001C950E /* lodepng.cpp in Sources */,
+				707789E22881BA81008A51BC /* utils.cpp in Sources */,
 				706EF1DB26D166C5001C950E /* tmpfileplus.cpp in Sources */,
 				70871E0227DDDBCD00D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp in Sources */,
 			);
diff --git a/libkram/bc7enc/LICENSE b/libkram/bc7enc/LICENSE
index 71e10daf..b3b1f69b 100644
--- a/libkram/bc7enc/LICENSE
+++ b/libkram/bc7enc/LICENSE
@@ -1,10 +1,12 @@
-The following source code files are available under 2 licenses -- choose whichever you prefer:
-rgbcx.h
-bc7decomp.cpp/h
-bc7enc.c
+If you use this software in a product, attribution / credits is requested but not required.
+
+bc7e.ispc uses the Apache 2.0 license and is Copyright (C) 2018-2021 Binomial LLC.
+LodePNG is Copyright (c) 2005-2016 Lode Vandevenne. See LodePNG.cpp for its license.
+
+All other source code files in this repo are available under 2 licenses -- choose whichever you prefer.
 
 ALTERNATIVE A - MIT License
-Copyright(c) 2020 Richard Geldreich, Jr.
+Copyright(c) 2020-2021 Richard Geldreich, Jr.
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files(the "Software"), to deal in
 the Software without restriction, including without limitation the rights to
diff --git a/libkram/bc7enc/README.md b/libkram/bc7enc/README.md
index 8f762b2d..b86777f5 100644
--- a/libkram/bc7enc/README.md
+++ b/libkram/bc7enc/README.md
@@ -1,146 +1,163 @@
-bc7enc - Fast, single source file BC1-5 and BC7/BPTC GPU texture encoders.
+bc7enc - Fast BC1-7 GPU texture encoders with Rate Distortion Optimization (RDO)
 
-Features:
-- BC1/3 encoder (in [rgbcx.h](https://github.com/richgel999/bc7enc/blob/master/rgbcx.h)) uses a new algorithm (which we've named "prioritized cluster fit") which is 3-4x faster than traditional cluster fit (as implemented in [libsquish](https://github.com/svn2github/libsquish) with SSE2) at the same or slightly higher average quality using scalar CPU instructions. This algorithm is suitable for GPU encoder implementations.
+This repo contains fast texture encoders for BC1-7. All formats support a simple post-processing transform on the encoded texture data designed to trade off quality for smaller compressed file sizes using LZ compression. Significant (10-50%) size reductions are possible. The BC7 encoder also supports a "reduced entropy" mode using the -e option which causes the output to be biased/weighted in various ways which minimally impact quality, which results in 5-10% smaller file sizes with no slowdowns in encoding time.
 
-The BC1/BC3 encoder also implements [Castano's optimal endpoint rounding improvement](https://gist.github.com/castano/c92c7626f288f9e99e158520b14a61cf).
+Currently, the entropy reduction transform is tuned for Deflate, LZHAM, or LZMA. The method used to control the rate-distortion tradeoff is the classic Lagrangian multiplier RDO method, modified to favor MSE on very smooth blocks. Rate is approximated using a fixed Deflate model. The post-processing transform applied to the encoded texture data tries to introduce the longest match it can into every encoded output block. It also tries to continue matches between blocks and (specifically for codecs like LZHAM/LZMA/Zstd) it tries to utilize REP0 (repeat) matches.
 
-rgbcx's BC1 encoder is faster than both AMD Compressonator and libsquish at the same average quality.
+You can see examples of the RDO BC7 encoder's current output [here](https://richg42.blogspot.com/2021/02/more-rdo-bc7-encoding.html). Some examples on how to use the command line tool are on my blog, [here](https://richg42.blogspot.com/2021/02/how-to-use-bc7encrdo.html).
 
-- BC7 encoder (in bc7enc.c/.h) has perceptual colorspace metric support, and is very fast compared to ispc_texcomp (see below) for RGB textures. Important: The BC7 encoder included in this repo is still a work in progress. I took bc7enc16 and added more modes for better alpha support, but it needs more testing and development.
+This repo contains both [bc7e.ispc](https://github.com/BinomialLLC/bc7e) and its distantly related but weaker 4 mode only non-ispc variant, bc7enc.cpp. By default, if you set SUPPORT_BC7E=TRUE when running cmake, you get bc7e.ispc, otherwise you get bc7enc.cpp. (The -C option forces bc7enc.cpp.) bc7e supports all BC7 modes and features, but doesn't yet support reduced entropy BC7 encoding. bc7enc.cpp supports optional reduced entropy encoding (using -e with the command line tool). RDO BC7 is supported when using either encoder, however.
 
-- Full decoders for BC1-5/7. BC7 decoder is in bc7decomp.cpp/.h, BC1-5 decoders in rgbcx.h.
+The next major focus will be improving the default smooth block handling and improving rate distorton performance.
 
-This project is basically a demo of some of the techniques we use in Basis BC7,
-which is Binomial's state of the art vectorized BC7 encoder. Basis BC7 is the
-highest quality and fastest CPU BC7 encoder available (2-3x faster than
-ispc_texcomp). It supports all modes and linear/perceptual colorspace metrics.
-Licensees get full ISPC source code so they can customize the codec as needed.
+This repo was originally derived from [bc7enc](https://github.com/richgel999/bc7enc) and [bc7e](https://github.com/BinomialLLC/bc7e). Note this repo contains the latest version of bc7e.ispc, which has a determinism bug fix.
 
-bc7enc currently only supports modes 1 and 6 for RGB, and modes 1, 5, 6, and 7 for alpha. The plan is to add all the modes. See the [bc7enc16](https://github.com/richgel999/bc7enc16) project for the previous version (which only supports modes 1 and 6). Note this readme still refers to "bc7enc16", but bc7enc is the same encoder but with more alpha modes.
+**Note: If you use this software in a product, attribution / credits is requested but not required. Thanks!**
 
-This codec supports a perceptual mode when encoding BC7, where it computes colorspace error in
-weighted YCbCr space (like etc2comp), and it also supports weighted RGBA
-metrics. It's particular strong in perceptual mode, beating the current state of
-the art CPU encoder (Intel's ispc_texcomp) by a wide margin when measured by
-Luma PSNR, even though it only supports 2 modes and isn't vectorized.
+### Compiling
 
-Why only modes 1 and 6 for opaque BC7?
-Because with these two modes you have a complete encoder that supports both
-opaque and transparent textures in a small amount (~1400 lines) of
-understandable plain C code. Mode 6 excels on smooth blocks, and mode 1 is
-strong with complex blocks, and a strong encoder that combines both modes can be
-quite high quality. Fast mode 6-only encoders will have noticeable block
-artifacts which this codec avoids by fully supporting mode 1.
+This build has been tested with MSVC 2019 x64 and clang 6.0.0 under Ubuntu v18.04.
 
-Modes 1 and 6 are typically the most used modes on many textures using other
-encoders. Mode 1 has two subsets, 64 possible partitions, and 3-bit indices,
-while mode 6 has large 4-bit indices and high precision 7777.1 endpoints. This
-codec produces output that is far higher quality than any BC1 encoder, and
-approaches (or in perceptual mode exceeds!) the quality of other full BC7
-encoders.
+To compile with bc7e.ispc (on Linux this requires [Intel's ISPC compiler](https://ispc.github.io/downloads.html) to be in your path - recommended):
 
-Why is bc7enc16 so fast in perceptual mode?
-Computing error in YCbCr space is more expensive than in RGB space, yet bc7enc16
-in perceptual mode is stronger than ispc_texcomp (see the benchmark below) -
-even without SSE/AVX vectorization and with only 2 modes to work with!
+```
+cmake -D SUPPORT_BC7E=TRUE .
+make
+```
+
+To compile without BC7E:
+
+```
+cmake .
+make
+```
 
-Most BC7 encoders only support linear RGB colorspace metrics, which is a
-fundamental weakness. Some support weighted RGB metrics, which is better. With
-linear RGB metrics, encoding error is roughly balanced between each channel, and
-encoders have to work *very* hard (examining large amounts of RGB search space)
-to get overall quality up. With perceptual colorspace metrics, RGB error tends
-to become a bit unbalanced, with green quality favored more highly than red and
-blue, and blue quality favored the least. A perceptual encoder is tuned to
-prefer exploring solutions along the luma axis, where it's much less work to find
-solutions with less luma error. bc7enc16 is, as far as I know, the first BC7
-codec to support computing error in weighted YCbCr colorspace.
+Note the MSVC and Linux builds enable OpenMP for faster compression.
 
-Note: Most of the timings here (except for the ispc_texcomp "fast" mode timings at the very bottom)
-are for the *original* release, before I added several more optimizations. The latest version of 
-bc7enc16.c is around 8-27% faster than the initial release at same quality (when mode 1 is enabled - 
-there's no change with just mode 6).
+### Examples
 
-Some benchmarks across 31 images (kodim corpus+others):
+The [.DDS](https://docs.microsoft.com/en-us/windows/win32/direct3ddds/dx-graphics-dds-pguide) output files can be loaded/viewed using tools like [AMD Compressonator](https://gpuopen.com/compressonator/).
 
-Perceptual (average REC709 Luma PSNR - higher is better quality):
+To encode to non-RDO BC7 using BC7E, highest quality, linear RGB(A) metrics:
+
+```
+./bc7enc blah.png
 ```
-iscp_texcomp slow vs. bc7enc16 uber4/max_partitions 64
-iscp_texcomp:   355.4 secs 48.6 dB
-bc7enc16:       122.6 secs 50.0 dB
 
-iscp_texcomp slow vs. bc7enc16 uber0/max_partitions 64
-iscp_texcomp:   355.4 secs 48.6 dB
-bc7enc16:       38.3 secs 49.6 dB
+To encode to non-RDO BC7 using BC7E, highest quality, using perceptual (scaled YCbCr) colorspace error metrics:
 
-iscp_texcomp basic vs. bc7enc16 uber0/max_partitions 16
-ispc_texcomp:   100.2 secs 48.3 dB
-bc7enc16:       20.8 secs 49.3 dB 
+```
+./bc7enc blah.png -s
+```
 
-iscp_texcomp fast vs. bc7enc16 uber0/max_partitions 16
-iscp_texcomp:   41.5 secs 48.0 dB 
-bc7enc16:       20.8 secs 49.3 dB
+To encode to RDO BC7 using BC7E, highest quality, lambda=.5, linear metrics (perceptual colorspace metrics are always automatically disabled when -z is specified), with a balance of encoding performance vs. RDO efficiency:
 
-iscp_texcomp ultrafast vs. bc7enc16 uber0/max_partitions 0
-iscp_texcomp:   1.9 secs 46.2 dB
-bc7enc16:       8.9 secs 48.4 dB 
+```
+./bc7enc blah.png -z.5
+```
 
-Non-perceptual (average RGB PSNR):
+To encode to RDO BC7 using BC7E, lower baseline quality (-u4) for faster encoding, lambda=.5, and with faster encoding (only inject one match vs two, with a tiny RDO lookback window size of 16 bytes):
 
-iscp_texcomp slow vs. bc7enc16 uber4/max_partitions 64
-iscp_texcomp:   355.4 secs 46.8 dB 
-bc7enc16:       51 secs 46.1 dB
+```
+./bc7enc blah.png -u4 -z.5 -ze -zc16
+```
 
-iscp_texcomp slow vs. bc7enc16 uber0/max_partitions 64
-iscp_texcomp:   355.4 secs 46.8 dB
-bc7enc16:       29.3 secs 45.8 dB
+To encode to non-RDO BC7 using entropy reduced or quantized/weighted BC7 (no slowdown vs. non-RDO bc7enc.cpp for BC7, slightly reduced quality, but 5-10% better LZ compression, only uses 2 or 4 BC7 modes):
 
-iscp_texcomp basic vs. bc7enc16 uber4/max_partitions 64
-iscp_texcomp:   99.9 secs 46.5 dB
-bc7enc16:       51 secs 46.1 dB
+```
+./bc7enc blah.png -C -e
+```
 
-iscp_texcomp fast vs. bc7enc16 uber1/max_partitions 16
-ispc_texcomp:   41.5 secs 46.1 dB
-bc7enc16:       19.8 secs 45.5 dB
+To encode to RDO BC7 using the entropy reduction transform combined with reduced entropy BC7 encoding, with a slightly larger window size than the default which is 128 bytes:
 
-iscp_texcomp fast vs. bc7enc16 uber0/max_partitions 8
-ispc_texcomp:   41.5 secs 46.1 dB
-bc7enc16:       10.46 secs 44.4 dB
+```
+./bc7enc -zc256 blah.png -C -e -z1.0
+```
 
-iscp_texcomp ultrafast vs. bc7enc16 uber0/max_partitions 0
-ispc_texcomp:   1.9 secs 42.7 dB 
-bc7enc16:       3.8 secs 42.7 dB
+Same as before, but higher compression (allow 2 matches per block instead of 1):
 
-DirectXTex CPU in "mode 6 only" mode vs. bc7enc16 uber1/max_partions 0 (mode 6 only), non-perceptual:
-
-DirectXTex:     466.4 secs 41.9 dB 
-bc7enc16:       6.7 secs 42.8 dB
+```
+./bc7enc -zc256 blah.png -C -e -z1.0 -zn
+```
 
-DirectXTex CPU in (default - no 3 subset modes) vs. bc7enc16 uber1/max_partions 64, non-perceptual:
+Same, except disable ultra-smooth block handling:
 
-DirectXTex:     9485.1 secs 45.6 dB 
-bc7enc16:       36 secs 46.0 dB
-```
-(Note this version of DirectXTex has a key pbit bugfix which I've submitted but
-is still waiting to be accepted. Non-bugfixed versions will be slightly lower
-quality.)
+```
+./bc7enc -zc256 blah.png -C -e -z1.0 -zu
+```
 
-UPDATE: To illustrate how strong the mode 1+6 implementation is in bc7enc16, let's compare ispc_texcomp 
-fast vs. the latest version of bc7enc16 uber4/max_partitions 64:
+To encode to RDO BC7 using the entropy reduction transform at lower quality, combined with reduced entropy BC7 encoding, with a slightly larger window size than the default which is 128 bytes:
 
-Without filterbank optimizations:
 ```
-                Time       RGB PSNR   Y PSNR
-ispc_texcomp:   41.45 secs 46.09 dB   48.0 dB
-bc7enc16:       41.42 secs 46.03 dB   48.2 dB
+./bc7enc -zc256 blah.png -C -e -z2.0
+```
+
+To encode to RDO BC7 using the entropy reduction transform at higher effectivenes using a larger window size, without using reduced entropy BC7 encoding:
 
-With filterbank optimizations enabled:
-bc7enc16:       38.78 secs 45.94 dB   48.12 dB
 ```
-They both have virtually the same average RGB PSNR with these settings (.06 dB is basically noise), but 
-bc7enc16 is just as fast as ispc_texcomp fast, even though it's not vectorized. Interestingly, our Y PSNR is better, 
-although bc7enc16 wasn't using perceptual metrics in these benchmarks. 
+./bc7enc -zc1024 blah.png -z1.0
+```
+
+To encode to RDO BC7 using the entropy reduction transform at higher effectivenes using a larger window size, with a manually specified max smooth block max error scale:
+
+```
+./bc7enc -zc1024 blah.png -z2.0 -zb30.0
+```
+
+To encode to RDO BC7 using the entropy reduction transform at higher effectivenes using a larger window size, using only mode 6 (more block artifacts, but better rate-distortion performance as measured by PSNR):
+
+```
+./bc7enc -zc1024 blah.png -6 -z1.0 -e
+```
+
+To encode to BC1:
+```
+./bc7enc -1 blah.png
+```
+
+To encode to BC1 with Rate Distortion Optimization (RDO) at lambda=1.0:
+```
+./bc7enc -1 -z1.0 blah.png
+```
+
+The -z option controls lambda, or the rate vs. distortion tradeoff. 0 = maximum quality, higher values=lower bitrates but lower quality. Try values [.25-8].
+
+To encode to BC1 with RDO, with RDO debug output, to monitor the percentage of blocks impacted:
+```
+./bc7enc -1 -z1.0 -zd blah.png
+```
+
+To encode to BC1 with RDO with a higher then default smooth block scale factor:
+```
+./bc7enc -1 -z1.0 -zb40.0 blah.png
+```
+
+Use -zb1.0 to disable smooth block error scaling completely, which increases RDO performance but can result in noticeable artifacts on smooth/flat blocks at higher lambdas.
+
+Use -zc# to control the RDO window size in bytes. Good values to try are 16-8192. 
+Use -zt to disable RDO multithreading. 
+
+To encode to BC1 with RDO at the highest achievable quality/effectiveness (this is extremely slow):
+
+```
+./bc7enc -1 -z1.0 -zc32768 blah.png
+```
+
+This sets the window size to 32KB (the highest setting that makes sense for Deflate). Window sizes of 2KB (the default) to 8KB are way faster and in practice are almost as effective. The maximum window size setting supported by the command line tool is 64KB, but this would be very slow.
+
+For even higher quality per bit (this is incredibly slow):
+```
+./bc7enc -1 -z1.0 -zc32768 -zm blah.png
+```
+
+### Dependencies
+There are no 3rd party code or library dependencies. utils.cpp/.h is only needed by the example command line tool. It uses C++11. The individual .cpp files are designed to be easily dropped into other codebases.
+
+For RDO post-processing of any block-based format: ert.cpp/.h. You provide this function an array of encoded blocks, an array of source/original 32bpp blocks, some parameters, and a pointer to a block decoder function for your format as a callback. It must return false if the passed in block data is invalid. (Make sure you *really* validate the block's data, because the ERT post-processor will inevitably call your callback with invalid blocks.) This transform works on most other texture formats, such as ETC1/2, EAC, and ASTC. The ERT works on block sizes ranging from 1x1 to 12x12. This file has no other dependencies apart from utils.cpp/h.
+
+For BC1-5 encoding/decoding: rgbcx.cpp/.h
+
+For BC7 encoding: bc7enc.cpp/.h
+
+For BC7 decoding: bc7decomp.cpp/.h
 
-This was a multithreaded benchmark (using OpenMP) on a dual Xeon workstation.
-ispc_texcomp was called with 64-blocks at a time and used AVX instructions.
-Timings are for encoding only.
diff --git a/libkram/bc7enc/bc7decomp.cpp b/libkram/bc7enc/bc7decomp.cpp
index 3099ec4d..cf1574af 100644
--- a/libkram/bc7enc/bc7decomp.cpp
+++ b/libkram/bc7enc/bc7decomp.cpp
@@ -1,9 +1,33 @@
 // File: bc7decomp.c - Richard Geldreich, Jr. 3/31/2020 - MIT license or public domain (see end of file)
 #include "bc7decomp.h"
+#include <string.h>
 
-namespace bc7decomp 
+#if (defined(_M_AMD64) || defined(_M_X64) || defined(__SSE2__))
+#  define BC7DECOMP_USE_SSE2
+#endif
+
+#ifdef BC7DECOMP_USE_SSE2
+#include <immintrin.h>
+#include <emmintrin.h>
+#endif
+
+namespace bc7decomp
 {
 
+#ifdef BC7DECOMP_USE_SSE2
+	const __m128i g_bc7_weights4_sse2[8] =
+	{
+		_mm_set_epi16(4, 4, 4, 4, 0, 0, 0, 0),
+		_mm_set_epi16(13, 13, 13, 13, 9, 9, 9, 9),
+		_mm_set_epi16(21, 21, 21, 21, 17, 17, 17, 17),
+		_mm_set_epi16(30, 30, 30, 30, 26, 26, 26, 26),
+		_mm_set_epi16(38, 38, 38, 38, 34, 34, 34, 34),
+		_mm_set_epi16(47, 47, 47, 47, 43, 43, 43, 43),
+		_mm_set_epi16(55, 55, 55, 55, 51, 51, 51, 51),
+		_mm_set_epi16(64, 64, 64, 64, 60, 60, 60, 60),
+	};
+#endif
+
 const uint32_t g_bc7_weights2[4] = { 0, 21, 43, 64 };
 const uint32_t g_bc7_weights3[8] = { 0, 9, 18, 27, 37, 46, 55, 64 };
 const uint32_t g_bc7_weights4[16] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
@@ -44,27 +68,32 @@ const uint8_t g_bc7_table_anchor_index_third_subset_2[64] =
 	15, 8, 8, 3,15,15, 3, 8,		15,15,15,15,15,15,15, 8,		15, 8,15, 3,15, 8,15, 8,		3,15, 6,10,15,15,10, 8,		15, 3,15,10,10, 8, 9,10,		6,15, 8,15, 3, 6, 6, 8,		15, 3,15,15,15,15,15,15,		15,15,15,15, 3,15,15, 8
 };
 
-inline uint32_t read_bits32(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize)
+const uint8_t g_bc7_first_byte_to_mode[256] =
 {
-	assert(codesize <= 32);
-	uint32_t bits = 0;
-	uint32_t total_bits = 0;
-
-	while (total_bits < codesize)
-	{
-		uint32_t byte_bit_offset = bit_offset & 7;
-		uint32_t bits_to_read = std::min<int>(codesize - total_bits, 8 - byte_bit_offset);
-
-		uint32_t byte_bits = pBuf[bit_offset >> 3] >> byte_bit_offset;
-		byte_bits &= ((1 << bits_to_read) - 1);
-
-		bits |= (byte_bits << total_bits);
+	8, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+	5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+	6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+	5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+	7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+	5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+	6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+	5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+};
 
-		total_bits += bits_to_read;
-		bit_offset += bits_to_read;
-	}
+inline void insert_weight_zero(uint64_t& index_bits, uint32_t bits_per_index, uint32_t offset)
+{
+	uint64_t LOW_BIT_MASK = (static_cast<uint64_t>(1) << ((bits_per_index * (offset + 1)) - 1)) - 1;
+	uint64_t HIGH_BIT_MASK = ~LOW_BIT_MASK;
 
-	return bits;
+	index_bits = ((index_bits & HIGH_BIT_MASK) << 1) | (index_bits & LOW_BIT_MASK);
 }
 
 // BC7 mode 0-7 decompression.
@@ -89,51 +118,142 @@ static inline uint32_t bc7_interp(uint32_t l, uint32_t h, uint32_t w, uint32_t b
 	}
 	return 0;
 }
-		
-bool unpack_bc7_mode0_2(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels)
+
+
+#ifdef BC7DECOMP_USE_SSE2
+static inline __m128i bc7_interp_sse2(__m128i l, __m128i h, __m128i w, __m128i iw)
+{
+	return _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(l, iw), _mm_mullo_epi16(h, w)), _mm_set1_epi16(32)), 6);
+}
+
+static inline void bc7_interp2_sse2(const color_rgba* endpoint_pair, color_rgba* out_colors)
+{
+	__m128i endpoints = _mm_loadu_si64(endpoint_pair);
+	__m128i endpoints_16 = _mm_unpacklo_epi8(endpoints, _mm_setzero_si128());
+
+	__m128i endpoints_16_swapped = _mm_shuffle_epi32(endpoints_16, _MM_SHUFFLE(1, 0, 3, 2));
+
+	// Interpolated colors will be color 1 and 2
+	__m128i interpolated_colors = bc7_interp_sse2(endpoints_16, endpoints_16_swapped, _mm_set1_epi16(21), _mm_set1_epi16(43));
+
+	// all_colors will be 1, 2, 0, 3
+	__m128i all_colors = _mm_packus_epi16(interpolated_colors, endpoints_16);
+
+	all_colors = _mm_shuffle_epi32(all_colors, _MM_SHUFFLE(3, 1, 0, 2));
+
+	_mm_storeu_si128(reinterpret_cast<__m128i*>(out_colors), all_colors);
+}
+
+static inline void bc7_interp3_sse2(const color_rgba* endpoint_pair, color_rgba* out_colors)
+{
+	__m128i endpoints = _mm_loadu_si64(endpoint_pair);
+	__m128i endpoints_16bit = _mm_unpacklo_epi8(endpoints, _mm_setzero_si128());
+	__m128i endpoints_16bit_swapped = _mm_shuffle_epi32(endpoints_16bit, _MM_SHUFFLE(1, 0, 3, 2));
+
+	__m128i interpolated_16 = bc7_interp_sse2(endpoints_16bit, endpoints_16bit_swapped, _mm_set1_epi16(9), _mm_set1_epi16(55));
+	__m128i interpolated_23 = bc7_interp_sse2(endpoints_16bit, endpoints_16bit_swapped, _mm_set_epi16(37, 37, 37, 37, 18, 18, 18, 18), _mm_set_epi16(27, 27, 27, 27, 46, 46, 46, 46));
+	__m128i interpolated_45 = bc7_interp_sse2(endpoints_16bit, endpoints_16bit_swapped, _mm_set_epi16(18, 18, 18, 18, 37, 37, 37, 37), _mm_set_epi16(46, 46, 46, 46, 27, 27, 27, 27));
+
+	__m128i interpolated_01 = _mm_unpacklo_epi64(endpoints_16bit, interpolated_16);
+	__m128i interpolated_67 = _mm_unpackhi_epi64(interpolated_16, endpoints_16bit);
+
+	__m128i all_colors_0 = _mm_packus_epi16(interpolated_01, interpolated_23);
+	__m128i all_colors_1 = _mm_packus_epi16(interpolated_45, interpolated_67);
+
+	_mm_storeu_si128(reinterpret_cast<__m128i*>(out_colors), all_colors_0);
+	_mm_storeu_si128(reinterpret_cast<__m128i*>(out_colors + 4), all_colors_1);
+}
+#endif
+
+bool unpack_bc7_mode0_2(uint32_t mode, const uint64_t* data_chunks, color_rgba* pPixels)
 {
 	//const uint32_t SUBSETS = 3;
 	const uint32_t ENDPOINTS = 6;
 	const uint32_t COMPS = 3;
 	const uint32_t WEIGHT_BITS = (mode == 0) ? 3 : 2;
+	const uint32_t WEIGHT_MASK = (1 << WEIGHT_BITS) - 1;
 	const uint32_t ENDPOINT_BITS = (mode == 0) ? 4 : 5;
+	const uint32_t ENDPOINT_MASK = (1 << ENDPOINT_BITS) - 1;
 	const uint32_t PBITS = (mode == 0) ? 6 : 0;
 	const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS;
-		
-	uint32_t bit_offset = 0;
-	const uint8_t* pBuf = static_cast<const uint8_t*>(pBlock_bits);
+	const uint32_t PART_BITS = (mode == 0) ? 4 : 6;
+	const uint32_t PART_MASK = (1 << PART_BITS) - 1;
+
+	const uint64_t low_chunk = data_chunks[0];
+	const uint64_t high_chunk = data_chunks[1];
 
-	if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false;
+	const uint32_t part = (low_chunk >> (mode + 1)) & PART_MASK;
 
-	const uint32_t part = read_bits32(pBuf, bit_offset, (mode == 0) ? 4 : 6);
+	uint64_t channel_read_chunks[3] = { 0, 0, 0 };
+
+	if (mode == 0)
+	{
+		channel_read_chunks[0] = low_chunk >> 5;
+		channel_read_chunks[1] = low_chunk >> 29;
+		channel_read_chunks[2] = ((low_chunk >> 53) | (high_chunk << 11));
+	}
+	else
+	{
+		channel_read_chunks[0] = low_chunk >> 9;
+		channel_read_chunks[1] = ((low_chunk >> 39) | (high_chunk << 25));
+		channel_read_chunks[2] = high_chunk >> 5;
+	}
 
 	color_rgba endpoints[ENDPOINTS];
 	for (uint32_t c = 0; c < COMPS; c++)
+	{
+		uint64_t channel_read_chunk = channel_read_chunks[c];
 		for (uint32_t e = 0; e < ENDPOINTS; e++)
-			endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, ENDPOINT_BITS);
+		{
+			endpoints[e][c] = static_cast<uint8_t>(channel_read_chunk & ENDPOINT_MASK);
+			channel_read_chunk >>= ENDPOINT_BITS;
+		}
+	}
 
 	uint32_t pbits[6];
-	for (uint32_t p = 0; p < PBITS; p++)
-		pbits[p] = read_bits32(pBuf, bit_offset, 1);
+	if (mode == 0)
+	{
+		uint8_t p_bits_chunk = static_cast<uint8_t>((high_chunk >> 13) & 0xff);
+
+		for (uint32_t p = 0; p < PBITS; p++)
+			pbits[p] = (p_bits_chunk >> p) & 1;
+	}
+
+	uint64_t weights_read_chunk = high_chunk >> (67 - 16 * WEIGHT_BITS);
+	insert_weight_zero(weights_read_chunk, WEIGHT_BITS, 0);
+	insert_weight_zero(weights_read_chunk, WEIGHT_BITS, std::min(g_bc7_table_anchor_index_third_subset_1[part], g_bc7_table_anchor_index_third_subset_2[part]));
+	insert_weight_zero(weights_read_chunk, WEIGHT_BITS, std::max(g_bc7_table_anchor_index_third_subset_1[part], g_bc7_table_anchor_index_third_subset_2[part]));
 
 	uint32_t weights[16];
 	for (uint32_t i = 0; i < 16; i++)
-		weights[i] = read_bits32(pBuf, bit_offset, ((!i) || (i == g_bc7_table_anchor_index_third_subset_1[part]) || (i == g_bc7_table_anchor_index_third_subset_2[part])) ? (WEIGHT_BITS - 1) : WEIGHT_BITS);
-
-	assert(bit_offset == 128);
+	{
+		weights[i] = static_cast<uint32_t>(weights_read_chunk & WEIGHT_MASK);
+		weights_read_chunk >>= WEIGHT_BITS;
+	}
 
 	for (uint32_t e = 0; e < ENDPOINTS; e++)
 		for (uint32_t c = 0; c < 4; c++)
-			endpoints[e][c] = (uint8_t)((c == 3) ? 255 : (PBITS ? bc7_dequant(endpoints[e][c], pbits[e], ENDPOINT_BITS) : bc7_dequant(endpoints[e][c], ENDPOINT_BITS)));
+			endpoints[e][c] = static_cast<uint8_t>((c == 3) ? 255 : (PBITS ? bc7_dequant(endpoints[e][c], pbits[e], ENDPOINT_BITS) : bc7_dequant(endpoints[e][c], ENDPOINT_BITS)));
 
 	color_rgba block_colors[3][8];
+
+#ifdef BC7DECOMP_USE_SSE2
+	for (uint32_t s = 0; s < 3; s++)
+	{
+		if (WEIGHT_BITS == 2)
+			bc7_interp2_sse2(endpoints + s * 2, block_colors[s]);
+		else
+			bc7_interp3_sse2(endpoints + s * 2, block_colors[s]);
+	}
+#else
 	for (uint32_t s = 0; s < 3; s++)
 		for (uint32_t i = 0; i < WEIGHT_VALS; i++)
 		{
 			for (uint32_t c = 0; c < 3; c++)
-				block_colors[s][i][c] = (uint8_t)bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS);
+				block_colors[s][i][c] = static_cast<uint8_t>(bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS));
 			block_colors[s][i][3] = 255;
 		}
+#endif
 
 	for (uint32_t i = 0; i < 16; i++)
 		pPixels[i] = block_colors[g_bc7_partition3[part * 16 + i]][weights[i]];
@@ -141,51 +261,102 @@ bool unpack_bc7_mode0_2(uint32_t mode, const void* pBlock_bits, color_rgba* pPix
 	return true;
 }
 
-bool unpack_bc7_mode1_3_7(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels)
+bool unpack_bc7_mode1_3_7(uint32_t mode, const uint64_t* data_chunks, color_rgba* pPixels)
 {
 	//const uint32_t SUBSETS = 2;
 	const uint32_t ENDPOINTS = 4;
 	const uint32_t COMPS = (mode == 7) ? 4 : 3;
 	const uint32_t WEIGHT_BITS = (mode == 1) ? 3 : 2;
+	const uint32_t WEIGHT_MASK = (1 << WEIGHT_BITS) - 1;
 	const uint32_t ENDPOINT_BITS = (mode == 7) ? 5 : ((mode == 1) ? 6 : 7);
+	const uint32_t ENDPOINT_MASK = (1 << ENDPOINT_BITS) - 1;
 	const uint32_t PBITS = (mode == 1) ? 2 : 4;
 	const uint32_t SHARED_PBITS = (mode == 1) ? true : false;
 	const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS;
-		
-	uint32_t bit_offset = 0;
-	const uint8_t* pBuf = static_cast<const uint8_t*>(pBlock_bits);
 
-	if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false;
+	const uint64_t low_chunk = data_chunks[0];
+	const uint64_t high_chunk = data_chunks[1];
 
-	const uint32_t part = read_bits32(pBuf, bit_offset, 6);
+	const uint32_t part = ((low_chunk >> (mode + 1)) & 0x3f);
 
 	color_rgba endpoints[ENDPOINTS];
+
+	uint64_t channel_read_chunks[4] = { 0, 0, 0, 0 };
+	uint64_t p_read_chunk = 0;
+	channel_read_chunks[0] = (low_chunk >> (mode + 7));
+	uint64_t weight_read_chunk;
+
+	switch (mode)
+	{
+	case 1:
+		channel_read_chunks[1] = (low_chunk >> 32);
+		channel_read_chunks[2] = ((low_chunk >> 56) | (high_chunk << 8));
+		p_read_chunk = high_chunk >> 16;
+		weight_read_chunk = high_chunk >> 18;
+		break;
+	case 3:
+		channel_read_chunks[1] = ((low_chunk >> 38) | (high_chunk << 26));
+		channel_read_chunks[2] = high_chunk >> 2;
+		p_read_chunk = high_chunk >> 30;
+		weight_read_chunk = high_chunk >> 34;
+		break;
+	case 7:
+		channel_read_chunks[1] = low_chunk >> 34;
+		channel_read_chunks[2] = ((low_chunk >> 54) | (high_chunk << 10));
+		channel_read_chunks[3] = high_chunk >> 10;
+		p_read_chunk = (high_chunk >> 30);
+		weight_read_chunk = (high_chunk >> 34);
+		break;
+	default:
+		return false;
+	};
+
 	for (uint32_t c = 0; c < COMPS; c++)
+	{
+		uint64_t channel_read_chunk = channel_read_chunks[c];
 		for (uint32_t e = 0; e < ENDPOINTS; e++)
-			endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, ENDPOINT_BITS);
+		{
+			endpoints[e][c] = static_cast<uint8_t>(channel_read_chunk & ENDPOINT_MASK);
+			channel_read_chunk >>= ENDPOINT_BITS;
+		}
+	}
 		
 	uint32_t pbits[4];
 	for (uint32_t p = 0; p < PBITS; p++)
-		pbits[p] = read_bits32(pBuf, bit_offset, 1);
-						
+		pbits[p] = (p_read_chunk >> p) & 1;
+
+	insert_weight_zero(weight_read_chunk, WEIGHT_BITS, 0);
+	insert_weight_zero(weight_read_chunk, WEIGHT_BITS, g_bc7_table_anchor_index_second_subset[part]);
+
 	uint32_t weights[16];
 	for (uint32_t i = 0; i < 16; i++)
-		weights[i] = read_bits32(pBuf, bit_offset, ((!i) || (i == g_bc7_table_anchor_index_second_subset[part])) ? (WEIGHT_BITS - 1) : WEIGHT_BITS);
-		
-	assert(bit_offset == 128);
+	{
+		weights[i] = static_cast<uint32_t>(weight_read_chunk & WEIGHT_MASK);
+		weight_read_chunk >>= WEIGHT_BITS;
+	}
 
 	for (uint32_t e = 0; e < ENDPOINTS; e++)
 		for (uint32_t c = 0; c < 4; c++)
-			endpoints[e][c] = (uint8_t)((c == ((mode == 7U) ? 4U : 3U)) ? 255 : bc7_dequant(endpoints[e][c], pbits[SHARED_PBITS ? (e >> 1) : e], ENDPOINT_BITS));
+			endpoints[e][c] = static_cast<uint8_t>((mode != 7U && c == 3U) ? 255 : bc7_dequant(endpoints[e][c], pbits[SHARED_PBITS ? (e >> 1) : e], ENDPOINT_BITS));
 		
 	color_rgba block_colors[2][8];
+#ifdef BC7DECOMP_USE_SSE2
+	for (uint32_t s = 0; s < 2; s++)
+	{
+		if (WEIGHT_BITS == 2)
+			bc7_interp2_sse2(endpoints + s * 2, block_colors[s]);
+		else
+			bc7_interp3_sse2(endpoints + s * 2, block_colors[s]);
+	}
+#else
 	for (uint32_t s = 0; s < 2; s++)
 		for (uint32_t i = 0; i < WEIGHT_VALS; i++)
 		{
 			for (uint32_t c = 0; c < COMPS; c++)
-				block_colors[s][i][c] = (uint8_t)bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS);
+				block_colors[s][i][c] = static_cast<uint8_t>(bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS));
 			block_colors[s][i][3] = (COMPS == 3) ? 255 : block_colors[s][i][3];
 		}
+#endif
 
 	for (uint32_t i = 0; i < 16; i++)
 		pPixels[i] = block_colors[g_bc7_partition2[part * 16 + i]][weights[i]];
@@ -193,53 +364,101 @@ bool unpack_bc7_mode1_3_7(uint32_t mode, const void* pBlock_bits, color_rgba* pP
 	return true;
 }
 
-bool unpack_bc7_mode4_5(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels)
+bool unpack_bc7_mode4_5(uint32_t mode, const uint64_t* data_chunks, color_rgba* pPixels)
 {
 	const uint32_t ENDPOINTS = 2;
-	const uint32_t COMPS = 4;
+	//const uint32_t COMPS = 4;
 	const uint32_t WEIGHT_BITS = 2;
+	const uint32_t WEIGHT_MASK = (1 << WEIGHT_BITS) - 1;
 	const uint32_t A_WEIGHT_BITS = (mode == 4) ? 3 : 2;
+	const uint32_t A_WEIGHT_MASK = (1 << A_WEIGHT_BITS) - 1;
 	const uint32_t ENDPOINT_BITS = (mode == 4) ? 5 : 7;
+	const uint32_t ENDPOINT_MASK = (1 << ENDPOINT_BITS) - 1;
 	const uint32_t A_ENDPOINT_BITS = (mode == 4) ? 6 : 8;
+	const uint32_t A_ENDPOINT_MASK = (1 << A_ENDPOINT_BITS) - 1;
 	//const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS;
 	//const uint32_t A_WEIGHT_VALS = 1 << A_WEIGHT_BITS;
 
-	uint32_t bit_offset = 0;
-	const uint8_t* pBuf = static_cast<const uint8_t*>(pBlock_bits);
+	const uint64_t low_chunk = data_chunks[0];
+	const uint64_t high_chunk = data_chunks[1];
 
-	if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false;
+	const uint32_t comp_rot = (low_chunk >> (mode + 1)) & 0x3;
+	const uint32_t index_mode = (mode == 4) ? static_cast<uint32_t>((low_chunk >> 7) & 1) : 0;
 
-	const uint32_t comp_rot = read_bits32(pBuf, bit_offset, 2);
-	const uint32_t index_mode = (mode == 4) ? read_bits32(pBuf, bit_offset, 1) : 0;
+	uint64_t color_read_bits = low_chunk >> 8;
 
 	color_rgba endpoints[ENDPOINTS];
-	for (uint32_t c = 0; c < COMPS; c++)
+	for (uint32_t c = 0; c < 3; c++)
+	{
 		for (uint32_t e = 0; e < ENDPOINTS; e++)
-			endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, (c == 3) ? A_ENDPOINT_BITS : ENDPOINT_BITS);
-		
+		{
+			endpoints[e][c] = static_cast<uint8_t>(color_read_bits & ENDPOINT_MASK);
+			color_read_bits >>= ENDPOINT_BITS;
+		}
+	}
+
+	endpoints[0][3] = static_cast<uint8_t>(color_read_bits & ENDPOINT_MASK);
+
+	uint64_t rgb_weights_chunk;
+	uint64_t a_weights_chunk;
+	if (mode == 4)
+	{
+		endpoints[0][3] = static_cast<uint8_t>(color_read_bits & A_ENDPOINT_MASK);
+		endpoints[1][3] = static_cast<uint8_t>((color_read_bits >> A_ENDPOINT_BITS) & A_ENDPOINT_MASK);
+		rgb_weights_chunk = ((low_chunk >> 50) | (high_chunk << 14));
+		a_weights_chunk = high_chunk >> 17;
+	}
+	else if (mode == 5)
+	{
+		endpoints[0][3] = static_cast<uint8_t>(color_read_bits & A_ENDPOINT_MASK);
+		endpoints[1][3] = static_cast<uint8_t>(((low_chunk >> 58) | (high_chunk << 6)) & A_ENDPOINT_MASK);
+		rgb_weights_chunk = high_chunk >> 2;
+		a_weights_chunk = high_chunk >> 33;
+	}
+	else
+		return false;
+
+	insert_weight_zero(rgb_weights_chunk, WEIGHT_BITS, 0);
+	insert_weight_zero(a_weights_chunk, A_WEIGHT_BITS, 0);
+
 	const uint32_t weight_bits[2] = { index_mode ? A_WEIGHT_BITS : WEIGHT_BITS,  index_mode ? WEIGHT_BITS : A_WEIGHT_BITS };
-		
+	const uint32_t weight_mask[2] = { index_mode ? A_WEIGHT_MASK : WEIGHT_MASK,  index_mode ? WEIGHT_MASK : A_WEIGHT_MASK };
+
 	uint32_t weights[16], a_weights[16];
-		
-	for (uint32_t i = 0; i < 16; i++)
-		(index_mode ? a_weights : weights)[i] = read_bits32(pBuf, bit_offset, weight_bits[index_mode] - ((!i) ? 1 : 0));
+
+	if (index_mode)
+		std::swap(rgb_weights_chunk, a_weights_chunk);
 
 	for (uint32_t i = 0; i < 16; i++)
-		(index_mode ? weights : a_weights)[i] = read_bits32(pBuf, bit_offset, weight_bits[1 - index_mode] - ((!i) ? 1 : 0));
+	{
+		weights[i] = (rgb_weights_chunk & weight_mask[0]);
+		rgb_weights_chunk >>= weight_bits[0];
+	}
 
-	assert(bit_offset == 128);
+	for (uint32_t i = 0; i < 16; i++)
+	{
+		a_weights[i] = (a_weights_chunk & weight_mask[1]);
+		a_weights_chunk >>= weight_bits[1];
+	}
 
 	for (uint32_t e = 0; e < ENDPOINTS; e++)
 		for (uint32_t c = 0; c < 4; c++)
-			endpoints[e][c] = (uint8_t)bc7_dequant(endpoints[e][c], (c == 3) ? A_ENDPOINT_BITS : ENDPOINT_BITS);
+			endpoints[e][c] = static_cast<uint8_t>(bc7_dequant(endpoints[e][c], (c == 3) ? A_ENDPOINT_BITS : ENDPOINT_BITS));
 
 	color_rgba block_colors[8];
+#ifdef BC7DECOMP_USE_SSE2
+	if (weight_bits[0] == 3)
+		bc7_interp3_sse2(endpoints, block_colors);
+	else
+		bc7_interp2_sse2(endpoints, block_colors);
+#else
 	for (uint32_t i = 0; i < (1U << weight_bits[0]); i++)
 		for (uint32_t c = 0; c < 3; c++)
-			block_colors[i][c] = (uint8_t)bc7_interp(endpoints[0][c], endpoints[1][c], i, weight_bits[0]);
+			block_colors[i][c] = static_cast<uint8_t>(bc7_interp(endpoints[0][c], endpoints[1][c], i, weight_bits[0]));
+#endif
 
 	for (uint32_t i = 0; i < (1U << weight_bits[1]); i++)
-		block_colors[i][3] = (uint8_t)bc7_interp(endpoints[0][3], endpoints[1][3], i, weight_bits[1]);
+		block_colors[i][3] = static_cast<uint8_t>(bc7_interp(endpoints[0][3], endpoints[1][3], i, weight_bits[1]));
 
 	for (uint32_t i = 0; i < 16; i++)
 	{
@@ -308,26 +527,46 @@ bool unpack_bc7_mode6(const void *pBlock_bits, color_rgba *pPixels)
 	if (block.m_lo.m_mode != (1 << 6))
 		return false;
 
-	const uint32_t r0 = (uint32_t)((block.m_lo.m_r0 << 1) | block.m_lo.m_p0);
-	const uint32_t g0 = (uint32_t)((block.m_lo.m_g0 << 1) | block.m_lo.m_p0);
-	const uint32_t b0 = (uint32_t)((block.m_lo.m_b0 << 1) | block.m_lo.m_p0);
-	const uint32_t a0 = (uint32_t)((block.m_lo.m_a0 << 1) | block.m_lo.m_p0);
-	const uint32_t r1 = (uint32_t)((block.m_lo.m_r1 << 1) | block.m_hi.m_p1);
-	const uint32_t g1 = (uint32_t)((block.m_lo.m_g1 << 1) | block.m_hi.m_p1);
-	const uint32_t b1 = (uint32_t)((block.m_lo.m_b1 << 1) | block.m_hi.m_p1);
-	const uint32_t a1 = (uint32_t)((block.m_lo.m_a1 << 1) | block.m_hi.m_p1);
+	const uint32_t r0 = static_cast<uint32_t>((block.m_lo.m_r0 << 1) | block.m_lo.m_p0);
+	const uint32_t g0 = static_cast<uint32_t>((block.m_lo.m_g0 << 1) | block.m_lo.m_p0);
+	const uint32_t b0 = static_cast<uint32_t>((block.m_lo.m_b0 << 1) | block.m_lo.m_p0);
+	const uint32_t a0 = static_cast<uint32_t>((block.m_lo.m_a0 << 1) | block.m_lo.m_p0);
+	const uint32_t r1 = static_cast<uint32_t>((block.m_lo.m_r1 << 1) | block.m_hi.m_p1);
+	const uint32_t g1 = static_cast<uint32_t>((block.m_lo.m_g1 << 1) | block.m_hi.m_p1);
+	const uint32_t b1 = static_cast<uint32_t>((block.m_lo.m_b1 << 1) | block.m_hi.m_p1);
+	const uint32_t a1 = static_cast<uint32_t>((block.m_lo.m_a1 << 1) | block.m_hi.m_p1);
 
 	color_rgba vals[16];
+#ifdef BC7DECOMP_USE_SSE2
+	__m128i vep0 = _mm_set_epi16((short)a0, (short)b0, (short)g0, (short)r0, (short)a0, (short)b0, (short)g0, (short)r0);
+	__m128i vep1 = _mm_set_epi16((short)a1, (short)b1, (short)g1, (short)r1, (short)a1, (short)b1, (short)g1, (short)r1);
+
+	for (uint32_t i = 0; i < 16; i += 4)
+	{
+		const __m128i w0 = g_bc7_weights4_sse2[i / 4 * 2 + 0];
+		const __m128i w1 = g_bc7_weights4_sse2[i / 4 * 2 + 1];
+
+		const __m128i iw0 = _mm_sub_epi16(_mm_set1_epi16(64), w0);
+		const __m128i iw1 = _mm_sub_epi16(_mm_set1_epi16(64), w1);
+
+		__m128i first_half = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(vep0, iw0), _mm_mullo_epi16(vep1, w0)), _mm_set1_epi16(32)), 6);
+		__m128i second_half = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(vep0, iw1), _mm_mullo_epi16(vep1, w1)), _mm_set1_epi16(32)), 6);
+		__m128i combined = _mm_packus_epi16(first_half, second_half);
+
+		_mm_storeu_si128(reinterpret_cast<__m128i*>(vals + i), combined);
+	}
+#else
 	for (uint32_t i = 0; i < 16; i++)
 	{
 		const uint32_t w = g_bc7_weights4[i];
 		const uint32_t iw = 64 - w;
-		vals[i].set_noclamp_rgba( 
-			(r0 * iw + r1 * w + 32) >> 6, 
-			(g0 * iw + g1 * w + 32) >> 6, 
-			(b0 * iw + b1 * w + 32) >> 6, 
+		vals[i].set_noclamp_rgba(
+			(r0 * iw + r1 * w + 32) >> 6,
+			(g0 * iw + g1 * w + 32) >> 6,
+			(b0 * iw + b1 * w + 32) >> 6,
 			(a0 * iw + a1 * w + 32) >> 6);
 	}
+#endif
 
 	pPixels[0] = vals[block.m_hi.m_s00];
 	pPixels[1] = vals[block.m_hi.m_s10];
@@ -338,7 +577,7 @@ bool unpack_bc7_mode6(const void *pBlock_bits, color_rgba *pPixels)
 	pPixels[5] = vals[block.m_hi.m_s11];
 	pPixels[6] = vals[block.m_hi.m_s21];
 	pPixels[7] = vals[block.m_hi.m_s31];
-		
+
 	pPixels[8] = vals[block.m_hi.m_s02];
 	pPixels[9] = vals[block.m_hi.m_s12];
 	pPixels[10] = vals[block.m_hi.m_s22];
@@ -354,32 +593,43 @@ bool unpack_bc7_mode6(const void *pBlock_bits, color_rgba *pPixels)
 
 bool unpack_bc7(const void *pBlock, color_rgba *pPixels)
 {
-	const uint32_t first_byte = static_cast<const uint8_t*>(pBlock)[0];
+	const uint8_t *block_bytes = static_cast<const uint8_t*>(pBlock);
+	uint8_t mode = g_bc7_first_byte_to_mode[block_bytes[0]];
 
-	for (uint32_t mode = 0; mode <= 7; mode++)
+	uint64_t data_chunks[2];
+
+	uint64_t endian_check = 1;
+	if (*reinterpret_cast<const uint8_t*>(&endian_check) == 1)
+		memcpy(data_chunks, pBlock, 16);
+	else
 	{
-		if (first_byte & (1U << mode))
+		data_chunks[0] = data_chunks[1] = 0;
+		for (int chunk_index = 0; chunk_index < 2; chunk_index++)
 		{
-			switch (mode)
-			{
-			case 0:
-			case 2:
-				return unpack_bc7_mode0_2(mode, pBlock, pPixels);
-			case 1:
-			case 3:
-			case 7:
-				return unpack_bc7_mode1_3_7(mode, pBlock, pPixels);
-			case 4:
-			case 5:
-				return unpack_bc7_mode4_5(mode, pBlock, pPixels);
-			case 6:
-				return unpack_bc7_mode6(pBlock, pPixels);
-			default:
-				break;
-			}
+			for (int byte_index = 0; byte_index < 8; byte_index++)
+				data_chunks[chunk_index] |= static_cast<uint64_t>(block_bytes[chunk_index * 8 + byte_index]) << (byte_index * 8);
 		}
 	}
 
+	switch (mode)
+	{
+	case 0:
+	case 2:
+		return unpack_bc7_mode0_2(mode, data_chunks, pPixels);
+	case 1:
+	case 3:
+	case 7:
+		return unpack_bc7_mode1_3_7(mode, data_chunks, pPixels);
+	case 4:
+	case 5:
+		return unpack_bc7_mode4_5(mode, data_chunks, pPixels);
+	case 6:
+		return unpack_bc7_mode6(data_chunks, pPixels);
+	default:
+		memset(pPixels, 0, sizeof(color_rgba) * 16);
+		break;
+	}
+
 	return false;
 }
 
diff --git a/libkram/bc7enc/bc7decomp.h b/libkram/bc7enc/bc7decomp.h
index cccdf50e..49dc9341 100644
--- a/libkram/bc7enc/bc7decomp.h
+++ b/libkram/bc7enc/bc7decomp.h
@@ -1,8 +1,13 @@
 #pragma once
 
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable:4201) //  nonstandard extension used: nameless struct/union
+#endif
+
 #include <stdlib.h>
 #include <stdint.h>
-//#include <algorithm>
+#include <algorithm>
 #include <math.h>
 #include <assert.h>
 
@@ -163,3 +168,13 @@ class color_rgba
 bool unpack_bc7(const void *pBlock, color_rgba *pPixels);
 
 } // namespace bc7decomp
+
+namespace bc7decomp_ref
+{
+	bool unpack_bc7(const void* pBlock, bc7decomp::color_rgba* pPixels);
+} // namespace bc7decomp_ref
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
diff --git a/libkram/bc7enc/bc7decomp_ref.cpp b/libkram/bc7enc/bc7decomp_ref.cpp
new file mode 100644
index 00000000..8a69e947
--- /dev/null
+++ b/libkram/bc7enc/bc7decomp_ref.cpp
@@ -0,0 +1,431 @@
+// File: bc7decomp.c - Richard Geldreich, Jr. 3/31/2020 - MIT license or public domain (see end of file)
+#include "bc7decomp.h"
+
+using namespace bc7decomp;
+
+namespace bc7decomp_ref
+{
+
+const uint32_t g_bc7_weights2[4] = { 0, 21, 43, 64 };
+const uint32_t g_bc7_weights3[8] = { 0, 9, 18, 27, 37, 46, 55, 64 };
+const uint32_t g_bc7_weights4[16] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
+
+const uint8_t g_bc7_partition2[64 * 16] =
+{
+	0,0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,		0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,		0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,		0,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,		0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,		0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,		0,0,0,1,0,0,1,1,0,1,1,1,1,1,1,1,		0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,1,
+	0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,		0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,		0,0,0,0,0,0,0,1,0,1,1,1,1,1,1,1,		0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,		0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,		0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,		0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,		0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,
+	0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,1,		0,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0,		0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,0,		0,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0,		0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,		0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,0,		0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,		0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,
+	0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0,		0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,		0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0,		0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,		0,0,0,1,0,1,1,1,1,1,1,0,1,0,0,0,		0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,		0,1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,		0,0,1,1,1,0,0,1,1,0,0,1,1,1,0,0,
+	0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,		0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1,		0,1,0,1,1,0,1,0,0,1,0,1,1,0,1,0,		0,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0,		0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,0,		0,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0,		0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,		0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,
+	0,1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,		0,0,0,1,0,0,1,1,1,1,0,0,1,0,0,0,		0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,0,		0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,0,		0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,		0,0,1,1,1,1,0,0,1,1,0,0,0,0,1,1,		0,1,1,0,0,1,1,0,1,0,0,1,1,0,0,1,		0,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,
+	0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,		0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,		0,0,0,0,0,0,1,0,0,1,1,1,0,0,1,0,		0,0,0,0,0,1,0,0,1,1,1,0,0,1,0,0,		0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,1,		0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,		0,1,1,0,0,0,1,1,1,0,0,1,1,1,0,0,		0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0,
+	0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,1,		0,1,1,0,0,0,1,1,0,0,1,1,1,0,0,1,		0,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1,		0,0,0,1,1,0,0,0,1,1,1,0,0,1,1,1,		0,0,0,0,1,1,1,1,0,0,1,1,0,0,1,1,		0,0,1,1,0,0,1,1,1,1,1,1,0,0,0,0,		0,0,1,0,0,0,1,0,1,1,1,0,1,1,1,0,		0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,1
+};
+
+const uint8_t g_bc7_partition3[64 * 16] =
+{
+	0,0,1,1,0,0,1,1,0,2,2,1,2,2,2,2,		0,0,0,1,0,0,1,1,2,2,1,1,2,2,2,1,		0,0,0,0,2,0,0,1,2,2,1,1,2,2,1,1,		0,2,2,2,0,0,2,2,0,0,1,1,0,1,1,1,		0,0,0,0,0,0,0,0,1,1,2,2,1,1,2,2,		0,0,1,1,0,0,1,1,0,0,2,2,0,0,2,2,		0,0,2,2,0,0,2,2,1,1,1,1,1,1,1,1,		0,0,1,1,0,0,1,1,2,2,1,1,2,2,1,1,
+	0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,		0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,		0,0,0,0,1,1,1,1,2,2,2,2,2,2,2,2,		0,0,1,2,0,0,1,2,0,0,1,2,0,0,1,2,		0,1,1,2,0,1,1,2,0,1,1,2,0,1,1,2,		0,1,2,2,0,1,2,2,0,1,2,2,0,1,2,2,		0,0,1,1,0,1,1,2,1,1,2,2,1,2,2,2,		0,0,1,1,2,0,0,1,2,2,0,0,2,2,2,0,
+	0,0,0,1,0,0,1,1,0,1,1,2,1,1,2,2,		0,1,1,1,0,0,1,1,2,0,0,1,2,2,0,0,		0,0,0,0,1,1,2,2,1,1,2,2,1,1,2,2,		0,0,2,2,0,0,2,2,0,0,2,2,1,1,1,1,		0,1,1,1,0,1,1,1,0,2,2,2,0,2,2,2,		0,0,0,1,0,0,0,1,2,2,2,1,2,2,2,1,		0,0,0,0,0,0,1,1,0,1,2,2,0,1,2,2,		0,0,0,0,1,1,0,0,2,2,1,0,2,2,1,0,
+	0,1,2,2,0,1,2,2,0,0,1,1,0,0,0,0,		0,0,1,2,0,0,1,2,1,1,2,2,2,2,2,2,		0,1,1,0,1,2,2,1,1,2,2,1,0,1,1,0,		0,0,0,0,0,1,1,0,1,2,2,1,1,2,2,1,		0,0,2,2,1,1,0,2,1,1,0,2,0,0,2,2,		0,1,1,0,0,1,1,0,2,0,0,2,2,2,2,2,		0,0,1,1,0,1,2,2,0,1,2,2,0,0,1,1,		0,0,0,0,2,0,0,0,2,2,1,1,2,2,2,1,
+	0,0,0,0,0,0,0,2,1,1,2,2,1,2,2,2,		0,2,2,2,0,0,2,2,0,0,1,2,0,0,1,1,		0,0,1,1,0,0,1,2,0,0,2,2,0,2,2,2,		0,1,2,0,0,1,2,0,0,1,2,0,0,1,2,0,		0,0,0,0,1,1,1,1,2,2,2,2,0,0,0,0,		0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,		0,1,2,0,2,0,1,2,1,2,0,1,0,1,2,0,		0,0,1,1,2,2,0,0,1,1,2,2,0,0,1,1,
+	0,0,1,1,1,1,2,2,2,2,0,0,0,0,1,1,		0,1,0,1,0,1,0,1,2,2,2,2,2,2,2,2,		0,0,0,0,0,0,0,0,2,1,2,1,2,1,2,1,		0,0,2,2,1,1,2,2,0,0,2,2,1,1,2,2,		0,0,2,2,0,0,1,1,0,0,2,2,0,0,1,1,		0,2,2,0,1,2,2,1,0,2,2,0,1,2,2,1,		0,1,0,1,2,2,2,2,2,2,2,2,0,1,0,1,		0,0,0,0,2,1,2,1,2,1,2,1,2,1,2,1,
+	0,1,0,1,0,1,0,1,0,1,0,1,2,2,2,2,		0,2,2,2,0,1,1,1,0,2,2,2,0,1,1,1,		0,0,0,2,1,1,1,2,0,0,0,2,1,1,1,2,		0,0,0,0,2,1,1,2,2,1,1,2,2,1,1,2,		0,2,2,2,0,1,1,1,0,1,1,1,0,2,2,2,		0,0,0,2,1,1,1,2,1,1,1,2,0,0,0,2,		0,1,1,0,0,1,1,0,0,1,1,0,2,2,2,2,		0,0,0,0,0,0,0,0,2,1,1,2,2,1,1,2,
+	0,1,1,0,0,1,1,0,2,2,2,2,2,2,2,2,		0,0,2,2,0,0,1,1,0,0,1,1,0,0,2,2,		0,0,2,2,1,1,2,2,1,1,2,2,0,0,2,2,		0,0,0,0,0,0,0,0,0,0,0,0,2,1,1,2,		0,0,0,2,0,0,0,1,0,0,0,2,0,0,0,1,		0,2,2,2,1,2,2,2,0,2,2,2,1,2,2,2,		0,1,0,1,2,2,2,2,2,2,2,2,2,2,2,2,		0,1,1,1,2,0,1,1,2,2,0,1,2,2,2,0,
+};
+
+const uint8_t g_bc7_table_anchor_index_second_subset[64] = { 15,15,15,15,15,15,15,15,		15,15,15,15,15,15,15,15,		15, 2, 8, 2, 2, 8, 8,15,		2, 8, 2, 2, 8, 8, 2, 2,		15,15, 6, 8, 2, 8,15,15,		2, 8, 2, 2, 2,15,15, 6,		6, 2, 6, 8,15,15, 2, 2,		15,15,15,15,15, 2, 2,15 };
+
+const uint8_t g_bc7_table_anchor_index_third_subset_1[64] =
+{
+	3, 3,15,15, 8, 3,15,15,		8, 8, 6, 6, 6, 5, 3, 3,		3, 3, 8,15, 3, 3, 6,10,		5, 8, 8, 6, 8, 5,15,15,		8,15, 3, 5, 6,10, 8,15,		15, 3,15, 5,15,15,15,15,		3,15, 5, 5, 5, 8, 5,10,		5,10, 8,13,15,12, 3, 3
+};
+
+const uint8_t g_bc7_table_anchor_index_third_subset_2[64] =
+{
+	15, 8, 8, 3,15,15, 3, 8,		15,15,15,15,15,15,15, 8,		15, 8,15, 3,15, 8,15, 8,		3,15, 6,10,15,15,10, 8,		15, 3,15,10,10, 8, 9,10,		6,15, 8,15, 3, 6, 6, 8,		15, 3,15,15,15,15,15,15,		15,15,15,15, 3,15,15, 8
+};
+
+inline uint32_t read_bits32(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize)
+{
+	assert(codesize <= 32);
+	uint32_t bits = 0;
+	uint32_t total_bits = 0;
+
+	while (total_bits < codesize)
+	{
+		uint32_t byte_bit_offset = bit_offset & 7;
+		uint32_t bits_to_read = std::min<int>(codesize - total_bits, 8 - byte_bit_offset);
+
+		uint32_t byte_bits = pBuf[bit_offset >> 3] >> byte_bit_offset;
+		byte_bits &= ((1 << bits_to_read) - 1);
+
+		bits |= (byte_bits << total_bits);
+
+		total_bits += bits_to_read;
+		bit_offset += bits_to_read;
+	}
+
+	return bits;
+}
+
+// BC7 mode 0-7 decompression.
+// Instead of one monster routine to unpack all the BC7 modes, we're lumping the 3 subset, 2 subset, 1 subset, and dual plane modes together into simple shared routines.
+
+static inline uint32_t bc7_dequant(uint32_t val, uint32_t pbit, uint32_t val_bits) { assert(val < (1U << val_bits)); assert(pbit < 2); assert(val_bits >= 4 && val_bits <= 8); const uint32_t total_bits = val_bits + 1; val = (val << 1) | pbit; val <<= (8 - total_bits); val |= (val >> total_bits); assert(val <= 255); return val; }
+static inline uint32_t bc7_dequant(uint32_t val, uint32_t val_bits) { assert(val < (1U << val_bits)); assert(val_bits >= 4 && val_bits <= 8); val <<= (8 - val_bits); val |= (val >> val_bits); assert(val <= 255); return val; }
+
+static inline uint32_t bc7_interp2(uint32_t l, uint32_t h, uint32_t w) { assert(w < 4); return (l * (64 - g_bc7_weights2[w]) + h * g_bc7_weights2[w] + 32) >> 6; }
+static inline uint32_t bc7_interp3(uint32_t l, uint32_t h, uint32_t w) { assert(w < 8); return (l * (64 - g_bc7_weights3[w]) + h * g_bc7_weights3[w] + 32) >> 6; }
+static inline uint32_t bc7_interp4(uint32_t l, uint32_t h, uint32_t w) { assert(w < 16); return (l * (64 - g_bc7_weights4[w]) + h * g_bc7_weights4[w] + 32) >> 6; }
+static inline uint32_t bc7_interp(uint32_t l, uint32_t h, uint32_t w, uint32_t bits)
+{
+	assert(l <= 255 && h <= 255);
+	switch (bits)
+	{
+	case 2: return bc7_interp2(l, h, w);
+	case 3: return bc7_interp3(l, h, w);
+	case 4: return bc7_interp4(l, h, w);
+	default: 
+		break;
+	}
+	return 0;
+}
+		
+bool unpack_bc7_mode0_2(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels)
+{
+	//const uint32_t SUBSETS = 3;
+	const uint32_t ENDPOINTS = 6;
+	const uint32_t COMPS = 3;
+	const uint32_t WEIGHT_BITS = (mode == 0) ? 3 : 2;
+	const uint32_t ENDPOINT_BITS = (mode == 0) ? 4 : 5;
+	const uint32_t PBITS = (mode == 0) ? 6 : 0;
+	const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS;
+		
+	uint32_t bit_offset = 0;
+	const uint8_t* pBuf = static_cast<const uint8_t*>(pBlock_bits);
+
+	if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false;
+
+	const uint32_t part = read_bits32(pBuf, bit_offset, (mode == 0) ? 4 : 6);
+
+	color_rgba endpoints[ENDPOINTS];
+	for (uint32_t c = 0; c < COMPS; c++)
+		for (uint32_t e = 0; e < ENDPOINTS; e++)
+			endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, ENDPOINT_BITS);
+
+	uint32_t pbits[6];
+	for (uint32_t p = 0; p < PBITS; p++)
+		pbits[p] = read_bits32(pBuf, bit_offset, 1);
+
+	uint32_t weights[16];
+	for (uint32_t i = 0; i < 16; i++)
+		weights[i] = read_bits32(pBuf, bit_offset, ((!i) || (i == g_bc7_table_anchor_index_third_subset_1[part]) || (i == g_bc7_table_anchor_index_third_subset_2[part])) ? (WEIGHT_BITS - 1) : WEIGHT_BITS);
+
+	assert(bit_offset == 128);
+
+	for (uint32_t e = 0; e < ENDPOINTS; e++)
+		for (uint32_t c = 0; c < 4; c++)
+			endpoints[e][c] = (uint8_t)((c == 3) ? 255 : (PBITS ? bc7_dequant(endpoints[e][c], pbits[e], ENDPOINT_BITS) : bc7_dequant(endpoints[e][c], ENDPOINT_BITS)));
+
+	color_rgba block_colors[3][8];
+	for (uint32_t s = 0; s < 3; s++)
+		for (uint32_t i = 0; i < WEIGHT_VALS; i++)
+		{
+			for (uint32_t c = 0; c < 3; c++)
+				block_colors[s][i][c] = (uint8_t)bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS);
+			block_colors[s][i][3] = 255;
+		}
+
+	for (uint32_t i = 0; i < 16; i++)
+		pPixels[i] = block_colors[g_bc7_partition3[part * 16 + i]][weights[i]];
+
+	return true;
+}
+
+bool unpack_bc7_mode1_3_7(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels)
+{
+	//const uint32_t SUBSETS = 2;
+	const uint32_t ENDPOINTS = 4;
+	const uint32_t COMPS = (mode == 7) ? 4 : 3;
+	const uint32_t WEIGHT_BITS = (mode == 1) ? 3 : 2;
+	const uint32_t ENDPOINT_BITS = (mode == 7) ? 5 : ((mode == 1) ? 6 : 7);
+	const uint32_t PBITS = (mode == 1) ? 2 : 4;
+	const uint32_t SHARED_PBITS = (mode == 1) ? true : false;
+	const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS;
+		
+	uint32_t bit_offset = 0;
+	const uint8_t* pBuf = static_cast<const uint8_t*>(pBlock_bits);
+
+	if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false;
+
+	const uint32_t part = read_bits32(pBuf, bit_offset, 6);
+
+	color_rgba endpoints[ENDPOINTS];
+	for (uint32_t c = 0; c < COMPS; c++)
+		for (uint32_t e = 0; e < ENDPOINTS; e++)
+			endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, ENDPOINT_BITS);
+		
+	uint32_t pbits[4];
+	for (uint32_t p = 0; p < PBITS; p++)
+		pbits[p] = read_bits32(pBuf, bit_offset, 1);
+						
+	uint32_t weights[16];
+	for (uint32_t i = 0; i < 16; i++)
+		weights[i] = read_bits32(pBuf, bit_offset, ((!i) || (i == g_bc7_table_anchor_index_second_subset[part])) ? (WEIGHT_BITS - 1) : WEIGHT_BITS);
+		
+	assert(bit_offset == 128);
+
+	for (uint32_t e = 0; e < ENDPOINTS; e++)
+		for (uint32_t c = 0; c < 4; c++)
+			endpoints[e][c] = (uint8_t)((c == ((mode == 7U) ? 4U : 3U)) ? 255 : bc7_dequant(endpoints[e][c], pbits[SHARED_PBITS ? (e >> 1) : e], ENDPOINT_BITS));
+		
+	color_rgba block_colors[2][8];
+	for (uint32_t s = 0; s < 2; s++)
+		for (uint32_t i = 0; i < WEIGHT_VALS; i++)
+		{
+			for (uint32_t c = 0; c < COMPS; c++)
+				block_colors[s][i][c] = (uint8_t)bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS);
+			block_colors[s][i][3] = (COMPS == 3) ? 255 : block_colors[s][i][3];
+		}
+
+	for (uint32_t i = 0; i < 16; i++)
+		pPixels[i] = block_colors[g_bc7_partition2[part * 16 + i]][weights[i]];
+
+	return true;
+}
+
+bool unpack_bc7_mode4_5(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels)
+{
+	const uint32_t ENDPOINTS = 2;
+	const uint32_t COMPS = 4;
+	const uint32_t WEIGHT_BITS = 2;
+	const uint32_t A_WEIGHT_BITS = (mode == 4) ? 3 : 2;
+	const uint32_t ENDPOINT_BITS = (mode == 4) ? 5 : 7;
+	const uint32_t A_ENDPOINT_BITS = (mode == 4) ? 6 : 8;
+	//const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS;
+	//const uint32_t A_WEIGHT_VALS = 1 << A_WEIGHT_BITS;
+
+	uint32_t bit_offset = 0;
+	const uint8_t* pBuf = static_cast<const uint8_t*>(pBlock_bits);
+
+	if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false;
+
+	const uint32_t comp_rot = read_bits32(pBuf, bit_offset, 2);
+	const uint32_t index_mode = (mode == 4) ? read_bits32(pBuf, bit_offset, 1) : 0;
+
+	color_rgba endpoints[ENDPOINTS];
+	for (uint32_t c = 0; c < COMPS; c++)
+		for (uint32_t e = 0; e < ENDPOINTS; e++)
+			endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, (c == 3) ? A_ENDPOINT_BITS : ENDPOINT_BITS);
+		
+	const uint32_t weight_bits[2] = { index_mode ? A_WEIGHT_BITS : WEIGHT_BITS,  index_mode ? WEIGHT_BITS : A_WEIGHT_BITS };
+		
+	uint32_t weights[16], a_weights[16];
+		
+	for (uint32_t i = 0; i < 16; i++)
+		(index_mode ? a_weights : weights)[i] = read_bits32(pBuf, bit_offset, weight_bits[index_mode] - ((!i) ? 1 : 0));
+
+	for (uint32_t i = 0; i < 16; i++)
+		(index_mode ? weights : a_weights)[i] = read_bits32(pBuf, bit_offset, weight_bits[1 - index_mode] - ((!i) ? 1 : 0));
+
+	assert(bit_offset == 128);
+
+	for (uint32_t e = 0; e < ENDPOINTS; e++)
+		for (uint32_t c = 0; c < 4; c++)
+			endpoints[e][c] = (uint8_t)bc7_dequant(endpoints[e][c], (c == 3) ? A_ENDPOINT_BITS : ENDPOINT_BITS);
+
+	color_rgba block_colors[8];
+	for (uint32_t i = 0; i < (1U << weight_bits[0]); i++)
+		for (uint32_t c = 0; c < 3; c++)
+			block_colors[i][c] = (uint8_t)bc7_interp(endpoints[0][c], endpoints[1][c], i, weight_bits[0]);
+
+	for (uint32_t i = 0; i < (1U << weight_bits[1]); i++)
+		block_colors[i][3] = (uint8_t)bc7_interp(endpoints[0][3], endpoints[1][3], i, weight_bits[1]);
+
+	for (uint32_t i = 0; i < 16; i++)
+	{
+		pPixels[i] = block_colors[weights[i]];
+		pPixels[i].a = block_colors[a_weights[i]].a;
+		if (comp_rot >= 1)
+			std::swap(pPixels[i].a, pPixels[i].m_comps[comp_rot - 1]);
+	}
+
+	return true;
+}
+
+struct bc7_mode_6
+{
+	struct
+	{
+		uint64_t m_mode : 7;
+		uint64_t m_r0 : 7;
+		uint64_t m_r1 : 7;
+		uint64_t m_g0 : 7;
+		uint64_t m_g1 : 7;
+		uint64_t m_b0 : 7;
+		uint64_t m_b1 : 7;
+		uint64_t m_a0 : 7;
+		uint64_t m_a1 : 7;
+		uint64_t m_p0 : 1;
+	} m_lo;
+
+	union
+	{
+		struct
+		{
+			uint64_t m_p1 : 1;
+			uint64_t m_s00 : 3;
+			uint64_t m_s10 : 4;
+			uint64_t m_s20 : 4;
+			uint64_t m_s30 : 4;
+
+			uint64_t m_s01 : 4;
+			uint64_t m_s11 : 4;
+			uint64_t m_s21 : 4;
+			uint64_t m_s31 : 4;
+
+			uint64_t m_s02 : 4;
+			uint64_t m_s12 : 4;
+			uint64_t m_s22 : 4;
+			uint64_t m_s32 : 4;
+
+			uint64_t m_s03 : 4;
+			uint64_t m_s13 : 4;
+			uint64_t m_s23 : 4;
+			uint64_t m_s33 : 4;
+
+		} m_hi;
+
+		uint64_t m_hi_bits;
+	};
+};
+
+bool unpack_bc7_mode6(const void *pBlock_bits, color_rgba *pPixels)
+{
+	static_assert(sizeof(bc7_mode_6) == 16, "sizeof(bc7_mode_6) == 16");
+
+	const bc7_mode_6 &block = *static_cast<const bc7_mode_6 *>(pBlock_bits);
+
+	if (block.m_lo.m_mode != (1 << 6))
+		return false;
+
+	const uint32_t r0 = (uint32_t)((block.m_lo.m_r0 << 1) | block.m_lo.m_p0);
+	const uint32_t g0 = (uint32_t)((block.m_lo.m_g0 << 1) | block.m_lo.m_p0);
+	const uint32_t b0 = (uint32_t)((block.m_lo.m_b0 << 1) | block.m_lo.m_p0);
+	const uint32_t a0 = (uint32_t)((block.m_lo.m_a0 << 1) | block.m_lo.m_p0);
+	const uint32_t r1 = (uint32_t)((block.m_lo.m_r1 << 1) | block.m_hi.m_p1);
+	const uint32_t g1 = (uint32_t)((block.m_lo.m_g1 << 1) | block.m_hi.m_p1);
+	const uint32_t b1 = (uint32_t)((block.m_lo.m_b1 << 1) | block.m_hi.m_p1);
+	const uint32_t a1 = (uint32_t)((block.m_lo.m_a1 << 1) | block.m_hi.m_p1);
+
+	color_rgba vals[16];
+	for (uint32_t i = 0; i < 16; i++)
+	{
+		const uint32_t w = g_bc7_weights4[i];
+		const uint32_t iw = 64 - w;
+		vals[i].set_noclamp_rgba( 
+			(r0 * iw + r1 * w + 32) >> 6, 
+			(g0 * iw + g1 * w + 32) >> 6, 
+			(b0 * iw + b1 * w + 32) >> 6, 
+			(a0 * iw + a1 * w + 32) >> 6);
+	}
+
+	pPixels[0] = vals[block.m_hi.m_s00];
+	pPixels[1] = vals[block.m_hi.m_s10];
+	pPixels[2] = vals[block.m_hi.m_s20];
+	pPixels[3] = vals[block.m_hi.m_s30];
+
+	pPixels[4] = vals[block.m_hi.m_s01];
+	pPixels[5] = vals[block.m_hi.m_s11];
+	pPixels[6] = vals[block.m_hi.m_s21];
+	pPixels[7] = vals[block.m_hi.m_s31];
+		
+	pPixels[8] = vals[block.m_hi.m_s02];
+	pPixels[9] = vals[block.m_hi.m_s12];
+	pPixels[10] = vals[block.m_hi.m_s22];
+	pPixels[11] = vals[block.m_hi.m_s32];
+
+	pPixels[12] = vals[block.m_hi.m_s03];
+	pPixels[13] = vals[block.m_hi.m_s13];
+	pPixels[14] = vals[block.m_hi.m_s23];
+	pPixels[15] = vals[block.m_hi.m_s33];
+
+	return true;
+}
+
+bool unpack_bc7(const void *pBlock, bc7decomp::color_rgba *pPixels)
+{
+	const uint32_t first_byte = static_cast<const uint8_t*>(pBlock)[0];
+
+	for (uint32_t mode = 0; mode <= 7; mode++)
+	{
+		if (first_byte & (1U << mode))
+		{
+			switch (mode)
+			{
+			case 0:
+			case 2:
+				return unpack_bc7_mode0_2(mode, pBlock, pPixels);
+			case 1:
+			case 3:
+			case 7:
+				return unpack_bc7_mode1_3_7(mode, pBlock, pPixels);
+			case 4:
+			case 5:
+				return unpack_bc7_mode4_5(mode, pBlock, pPixels);
+			case 6:
+				return unpack_bc7_mode6(pBlock, pPixels);
+			default:
+				break;
+			}
+		}
+	}
+
+	return false;
+}
+
+} // namespace bc7decomp_ref
+
+/*
+------------------------------------------------------------------------------
+This software is available under 2 licenses -- choose whichever you prefer.
+------------------------------------------------------------------------------
+ALTERNATIVE A - MIT License
+Copyright(c) 2020 Richard Geldreich, Jr.
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files(the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and / or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions :
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+------------------------------------------------------------------------------
+ALTERNATIVE B - Public Domain(www.unlicense.org)
+This is free and unencumbered software released into the public domain.
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non - commercial, and by any means.
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain.We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors.We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+------------------------------------------------------------------------------
+*/
+
diff --git a/libkram/bc7enc/bc7enc.cpp b/libkram/bc7enc/bc7enc.cpp
index b2403b84..4cbdd552 100644
--- a/libkram/bc7enc/bc7enc.cpp
+++ b/libkram/bc7enc/bc7enc.cpp
@@ -1,82 +1,49 @@
 // File: bc7enc.c - Richard Geldreich, Jr. 3/31/2020 - MIT license or public domain (see end of file)
 // Currently supports modes 1, 6 for RGB blocks, and modes 5, 6, 7 for RGBA blocks.
-// NOTE: This module is still a work in progress as of 3/31/2020. It needs to support mode modes for RGB content.
 #include "bc7enc.h"
 #include <math.h>
 #include <memory.h>
 #include <assert.h>
 #include <limits.h>
-#include <stdio.h>
-
-// Make mapping to simd classes more simple.
-// Repeated individual ops instead of functions that can be optimized don't result in a speedup.
-// The algorithm is already so fast.
-#define USE_SIMD_BCENC 0
+#include <algorithm>
 
 // Helpers
 static inline int32_t clampi(int32_t value, int32_t low, int32_t high) { if (value < low) value = low; else if (value > high) value = high;	return value; }
 static inline float clampf(float value, float low, float high) { if (value < low) value = low; else if (value > high) value = high;	return value; }
-#if !USE_SIMD_BCENC
 static inline float saturate(float value) { return clampf(value, 0, 1.0f); }
-#endif
 //static inline uint8_t minimumub(uint8_t a, uint8_t b) { return (a < b) ? a : b; }
+static inline int32_t minimumi(int32_t a, int32_t b) { return (a < b) ? a : b; }
 static inline uint32_t minimumu(uint32_t a, uint32_t b) { return (a < b) ? a : b; }
 static inline float minimumf(float a, float b) { return (a < b) ? a : b; }
 //static inline uint8_t maximumub(uint8_t a, uint8_t b) { return (a > b) ? a : b; }
 static inline uint32_t maximumu(uint32_t a, uint32_t b) { return (a > b) ? a : b; }
+//static inline int32_t maximumi(int32_t a, int32_t b) { return (a > b) ? a : b; }
 static inline float maximumf(float a, float b) { return (a > b) ? a : b; }
 static inline int squarei(int i) { return i * i; }
 static inline float squaref(float i) { return i * i; }
+template <typename T0, typename T1> inline T0 lerp(T0 a, T0 b, T1 c) { return a + (b - a) * c; }
 
 static inline int32_t iabs32(int32_t v) { uint32_t msk = v >> 31; return (v ^ msk) - msk; }
 //static inline void swapub(uint8_t* a, uint8_t* b) { uint8_t t = *a; *a = *b; *b = t; }
 static inline void swapu(uint32_t* a, uint32_t* b) { uint32_t t = *a; *a = *b; *b = t; }
 //static inline void swapf(float* a, float* b) { float t = *a; *a = *b; *b = t; }
 
-struct color_quad_u8 {
-    uint8_t r, g, b, a;
-    inline const uint8_t& operator[](int index) const { return *(&r + index); }
-    inline uint8_t& operator[](int index) { return *(&r + index); }
-};
-
-static inline color_quad_u8 *color_quad_u8_set_clamped(color_quad_u8 *pRes, int32_t r, int32_t g, int32_t b, int32_t a) { pRes->r = (uint8_t)clampi(r, 0, 255); pRes->g = (uint8_t)clampi(g, 0, 255); pRes->b = (uint8_t)clampi(b, 0, 255); pRes->a = (uint8_t)clampi(a, 0, 255); return pRes; }
-static inline color_quad_u8 *color_quad_u8_set(color_quad_u8 *pRes, int32_t r, int32_t g, int32_t b, int32_t a) { assert((uint32_t)(r | g | b | a) <= 255); pRes->r = (uint8_t)r; pRes->g = (uint8_t)g; pRes->b = (uint8_t)b; pRes->a = (uint8_t)a; return pRes; }
-static inline bc7enc_bool color_quad_u8_notequals(const color_quad_u8 *pLHS, const color_quad_u8 *pRHS) { return (pLHS->r != pRHS->r) || (pLHS->g != pRHS->g) || (pLHS->b != pRHS->b) || (pLHS->a != pRHS->a); }
-
-#if USE_SIMD_BCENC
-using namespace simd;
-using vec4F = float4;
-
-static inline vec4F *vec4F_set_scalar(vec4F *pV, float x) {	*pV = vec4F(x); return pV; }
-static inline vec4F *vec4F_set(vec4F *pV, float x, float y, float z, float w) {	*pV = float4m(x,y,z,w); return pV; }
-static inline vec4F *vec4F_saturate_in_place(vec4F *pV) { *pV = saturate(*pV); return pV; }
-static inline vec4F vec4F_saturate(const vec4F *pV) { vec4F res = saturate(*pV); return res; }
-
-static inline vec4F vec4F_from_color(const color_quad_u8 *pC) { vec4F res = float4m((float)pC->r, (float)pC->g, (float)pC->b, (float)pC->a); return res; }
-static inline vec4F vec4F_add(const vec4F *pLHS, const vec4F *pRHS) { vec4F res = *pLHS + *pRHS; return res; }
-static inline vec4F vec4F_sub(const vec4F *pLHS, const vec4F *pRHS) { vec4F res = *pLHS - *pRHS; return res; }
-static inline float vec4F_dot(const vec4F *pLHS, const vec4F *pRHS) { return dot(*pLHS, *pRHS); }
-static inline vec4F vec4F_mul(const vec4F *pLHS, float s) { vec4F res = *pLHS * s; return res; }
-static inline vec4F *vec4F_normalize_in_place(vec4F *pV) {  *pV = normalize(*pV); return pV; }
+struct vec4F { float m_c[4]; };
+
+static inline color_rgba *color_quad_u8_set_clamped(color_rgba *pRes, int32_t r, int32_t g, int32_t b, int32_t a) { pRes->m_c[0] = (uint8_t)clampi(r, 0, 255); pRes->m_c[1] = (uint8_t)clampi(g, 0, 255); pRes->m_c[2] = (uint8_t)clampi(b, 0, 255); pRes->m_c[3] = (uint8_t)clampi(a, 0, 255); return pRes; }
+static inline color_rgba *color_quad_u8_set(color_rgba *pRes, int32_t r, int32_t g, int32_t b, int32_t a) { assert((uint32_t)(r | g | b | a) <= 255); pRes->m_c[0] = (uint8_t)r; pRes->m_c[1] = (uint8_t)g; pRes->m_c[2] = (uint8_t)b; pRes->m_c[3] = (uint8_t)a; return pRes; }
+static inline bool color_quad_u8_notequals(const color_rgba *pLHS, const color_rgba *pRHS) { return (pLHS->m_c[0] != pRHS->m_c[0]) || (pLHS->m_c[1] != pRHS->m_c[1]) || (pLHS->m_c[2] != pRHS->m_c[2]) || (pLHS->m_c[3] != pRHS->m_c[3]); }
+static inline vec4F *vec4F_set_scalar(vec4F *pV, float x) {	pV->m_c[0] = x; pV->m_c[1] = x; pV->m_c[2] = x;	pV->m_c[3] = x;	return pV; }
+static inline vec4F *vec4F_set(vec4F *pV, float x, float y, float z, float w) {	pV->m_c[0] = x;	pV->m_c[1] = y;	pV->m_c[2] = z;	pV->m_c[3] = w;	return pV; }
+static inline vec4F *vec4F_saturate_in_place(vec4F *pV) { pV->m_c[0] = saturate(pV->m_c[0]); pV->m_c[1] = saturate(pV->m_c[1]); pV->m_c[2] = saturate(pV->m_c[2]); pV->m_c[3] = saturate(pV->m_c[3]); return pV; }
+static inline vec4F vec4F_saturate(const vec4F *pV) { vec4F res; res.m_c[0] = saturate(pV->m_c[0]); res.m_c[1] = saturate(pV->m_c[1]); res.m_c[2] = saturate(pV->m_c[2]); res.m_c[3] = saturate(pV->m_c[3]); return res; }
+static inline vec4F vec4F_from_color(const color_rgba *pC) { vec4F res; vec4F_set(&res, pC->m_c[0], pC->m_c[1], pC->m_c[2], pC->m_c[3]); return res; }
+static inline vec4F vec4F_add(const vec4F *pLHS, const vec4F *pRHS) { vec4F res; vec4F_set(&res, pLHS->m_c[0] + pRHS->m_c[0], pLHS->m_c[1] + pRHS->m_c[1], pLHS->m_c[2] + pRHS->m_c[2], pLHS->m_c[3] + pRHS->m_c[3]); return res; }
+static inline vec4F vec4F_sub(const vec4F *pLHS, const vec4F *pRHS) { vec4F res; vec4F_set(&res, pLHS->m_c[0] - pRHS->m_c[0], pLHS->m_c[1] - pRHS->m_c[1], pLHS->m_c[2] - pRHS->m_c[2], pLHS->m_c[3] - pRHS->m_c[3]); return res; }
+static inline float vec4F_dot(const vec4F *pLHS, const vec4F *pRHS) { return pLHS->m_c[0] * pRHS->m_c[0] + pLHS->m_c[1] * pRHS->m_c[1] + pLHS->m_c[2] * pRHS->m_c[2] + pLHS->m_c[3] * pRHS->m_c[3]; }
+static inline vec4F vec4F_mul(const vec4F *pLHS, float s) { vec4F res; vec4F_set(&res, pLHS->m_c[0] * s, pLHS->m_c[1] * s, pLHS->m_c[2] * s, pLHS->m_c[3] * s); return res; }
+static inline vec4F *vec4F_normalize_in_place(vec4F *pV) { float s = pV->m_c[0] * pV->m_c[0] + pV->m_c[1] * pV->m_c[1] + pV->m_c[2] * pV->m_c[2] + pV->m_c[3] * pV->m_c[3]; if (s != 0.0f) { s = 1.0f / sqrtf(s); pV->m_c[0] *= s; pV->m_c[1] *= s; pV->m_c[2] *= s; pV->m_c[3] *= s; } return pV; }
 
-#else
-struct vec4F {
-    float r, g, b, a;
-    inline const float& operator[](int index) const { return *(&r + index); }
-    inline float& operator[](int index) { return *(&r + index); }
-};
-
-static inline vec4F *vec4F_set_scalar(vec4F *pV, float x) {    pV->r = x; pV->g = x; pV->b = x;    pV->a = x;    return pV; }
-static inline vec4F *vec4F_set(vec4F *pV, float x, float y, float z, float w) {    pV->r = x;    pV->g = y;    pV->b = z;    pV->a = w;    return pV; }
-static inline vec4F *vec4F_saturate_in_place(vec4F *pV) { pV->r = saturate(pV->r); pV->g = saturate(pV->g); pV->b = saturate(pV->b); pV->a = saturate(pV->a); return pV; }
-static inline vec4F vec4F_saturate(const vec4F *pV) { vec4F res; res.r = saturate(pV->r); res.g = saturate(pV->g); res.b = saturate(pV->b); res.a = saturate(pV->a); return res; }
-static inline vec4F vec4F_from_color(const color_quad_u8 *pC) { vec4F res; vec4F_set(&res, pC->r, pC->g, pC->b, pC->a); return res; }
-static inline vec4F vec4F_add(const vec4F *pLHS, const vec4F *pRHS) { vec4F res; vec4F_set(&res, pLHS->r + pRHS->r, pLHS->g + pRHS->g, pLHS->b + pRHS->b, pLHS->a + pRHS->a); return res; }
-static inline vec4F vec4F_sub(const vec4F *pLHS, const vec4F *pRHS) { vec4F res; vec4F_set(&res, pLHS->r - pRHS->r, pLHS->g - pRHS->g, pLHS->b - pRHS->b, pLHS->a - pRHS->a); return res; }
-static inline float vec4F_dot(const vec4F *pLHS, const vec4F *pRHS) { return pLHS->r * pRHS->r + pLHS->g * pRHS->g + pLHS->b * pRHS->b + pLHS->a * pRHS->a; }
-static inline vec4F vec4F_mul(const vec4F *pLHS, float s) { vec4F res; vec4F_set(&res, pLHS->r * s, pLHS->g * s, pLHS->b * s, pLHS->a * s); return res; }
-static inline vec4F *vec4F_normalize_in_place(vec4F *pV) { float s = pV->r * pV->r + pV->g * pV->g + pV->b * pV->b + pV->a * pV->a; if (s != 0.0f) { s = 1.0f / sqrtf(s); pV->r *= s; pV->g *= s; pV->b *= s; pV->a *= s; } return pV; }
-#endif
 // Various BC7 tables
 static const uint32_t g_bc7_weights2[4] = { 0, 21, 43, 64 };
 static const uint32_t g_bc7_weights3[8] = { 0, 9, 18, 27, 37, 46, 55, 64 };
@@ -135,7 +102,7 @@ static const uint8_t g_bc7_mode_has_p_bits[8] = { 1, 1, 0, 1, 0, 0, 1, 1 };
 static const uint8_t g_bc7_mode_has_shared_p_bits[8] = { 0, 1, 0, 0, 0, 0, 0, 0 };
 static const uint8_t g_bc7_color_precision_table[8] = { 4, 6, 5, 7, 5, 7, 7, 5 };
 static const int8_t g_bc7_alpha_precision_table[8] = { 0, 0, 0, 0, 6, 8, 7, 5 };
-static bc7enc_bool get_bc7_mode_has_seperate_alpha_selectors(int mode) { return (mode == 4) || (mode == 5); }
+static bool get_bc7_mode_has_seperate_alpha_selectors(int mode) { return (mode == 4) || (mode == 5); }
 
 typedef struct { uint16_t m_error; uint8_t m_lo; uint8_t m_hi; } endpoint_err;
 
@@ -145,9 +112,105 @@ static const uint32_t BC7ENC_MODE_1_OPTIMAL_INDEX = 2;
 static endpoint_err g_bc7_mode_7_optimal_endpoints[256][2][2]; // [c][pbit][hp][lp]
 const uint32_t BC7E_MODE_7_OPTIMAL_INDEX = 1;
 
-// Initialize the lookup table used for optimal single color compression in mode 1. Must be called before encoding.
+static float g_mode1_rgba_midpoints[64][2];
+static float g_mode5_rgba_midpoints[128];
+static float g_mode7_rgba_midpoints[32][2];
+
+static uint8_t g_mode6_reduced_quant[2048][2];
+
+static bool g_initialized;
+
+// Initialize the lookup table used for optimal single color compression in mode 1/7. Must be called before encoding.
 void bc7enc_compress_block_init()
 {
+	if (g_initialized)
+		return;
+
+	// Mode 7 endpoint midpoints
+	for (uint32_t p = 0; p < 2; p++)
+	{
+		for (uint32_t i = 0; i < 32; i++)
+		{
+			uint32_t vl = ((i << 1) | p) << 2;
+			vl |= (vl >> 6);
+			float lo = vl / 255.0f;
+
+			uint32_t vh = ((minimumi(31, (i + 1)) << 1) | p) << 2;
+			vh |= (vh >> 6);
+			float hi = vh / 255.0f;
+
+			//g_mode7_quant_values[i][p] = lo;
+			if (i == 31)
+				g_mode7_rgba_midpoints[i][p] = 1.0f;
+			else
+				g_mode7_rgba_midpoints[i][p] = (lo + hi) / 2.0f;
+		}
+	}
+
+	// Mode 1 endpoint midpoints
+	for (uint32_t p = 0; p < 2; p++)
+	{
+		for (uint32_t i = 0; i < 64; i++)
+		{
+			uint32_t vl = ((i << 1) | p) << 1;
+			vl |= (vl >> 7);
+			float lo = vl / 255.0f;
+
+			uint32_t vh = ((minimumi(63, (i + 1)) << 1) | p) << 1;
+			vh |= (vh >> 7);
+			float hi = vh / 255.0f;
+
+			//g_mode1_quant_values[i][p] = lo;
+			if (i == 63)
+				g_mode1_rgba_midpoints[i][p] = 1.0f;
+			else
+				g_mode1_rgba_midpoints[i][p] = (lo + hi) / 2.0f;
+		}
+	}
+
+	// Mode 5 endpoint midpoints
+	for (uint32_t i = 0; i < 128; i++)
+	{
+		uint32_t vl = (i << 1);
+		vl |= (vl >> 7);
+		float lo = vl / 255.0f;
+
+		uint32_t vh = minimumi(127, i + 1) << 1;
+		vh |= (vh >> 7);
+		float hi = vh / 255.0f;
+
+		if (i == 127)
+			g_mode5_rgba_midpoints[i] = 1.0f;
+		else
+			g_mode5_rgba_midpoints[i] = (lo + hi) / 2.0f;
+	}
+
+	for (uint32_t p = 0; p < 2; p++)
+	{
+		for (uint32_t i = 0; i < 2048; i++)
+		{
+			float f = i / 2047.0f;
+
+			float best_err = 1e+9f;
+			int best_index = 0;
+			for (int j = 0; j < 64; j++)
+			{
+				int ik = (j * 127 + 31) / 63;
+				float k = ((ik << 1) + p) / 255.0f;
+
+				float e = fabsf(k - f);
+				if (e < best_err)
+				{
+					best_err = e;
+					best_index = ik;
+				}
+			}
+
+			g_mode6_reduced_quant[i][p] = (uint8_t)best_index;
+		}
+	} // p
+
+	// Mode 1
 	for (int c = 0; c < 256; c++)
 	{
 		for (uint32_t lp = 0; lp < 2; lp++)
@@ -217,9 +280,11 @@ void bc7enc_compress_block_init()
 		} // lp
 
 	} // c
+
+	g_initialized = true;
 }
 
-static void compute_least_squares_endpoints_rgba(uint32_t N, const uint8_t *pSelectors, const vec4F *pSelector_weights, vec4F *pXl, vec4F *pXh, const color_quad_u8 *pColors)
+static void compute_least_squares_endpoints_rgba(uint32_t N, const uint8_t *pSelectors, const vec4F *pSelector_weights, vec4F *pXl, vec4F *pXh, const color_rgba *pColors)
 {
 	// Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf 
 	// I did this in matrix form first, expanded out all the ops, then optimized it a bit.
@@ -231,14 +296,14 @@ static void compute_least_squares_endpoints_rgba(uint32_t N, const uint8_t *pSel
 	for (uint32_t i = 0; i < N; i++)
 	{
 		const uint32_t sel = pSelectors[i];
-		z00 += pSelector_weights[sel][0];
-		z10 += pSelector_weights[sel][1];
-		z11 += pSelector_weights[sel][2];
-		float w = pSelector_weights[sel][3];
-		q00_r += w * pColors[i].r; t_r += pColors[i].r;
-		q00_g += w * pColors[i].g; t_g += pColors[i].g;
-		q00_b += w * pColors[i].b; t_b += pColors[i].b;
-		q00_a += w * pColors[i].a; t_a += pColors[i].a;
+		z00 += pSelector_weights[sel].m_c[0];
+		z10 += pSelector_weights[sel].m_c[1];
+		z11 += pSelector_weights[sel].m_c[2];
+		float w = pSelector_weights[sel].m_c[3];
+		q00_r += w * pColors[i].m_c[0]; t_r += pColors[i].m_c[0];
+		q00_g += w * pColors[i].m_c[1]; t_g += pColors[i].m_c[1];
+		q00_b += w * pColors[i].m_c[2]; t_b += pColors[i].m_c[2];
+		q00_a += w * pColors[i].m_c[3]; t_a += pColors[i].m_c[3];
 	}
 
 	q10_r = t_r - q00_r;
@@ -258,32 +323,32 @@ static void compute_least_squares_endpoints_rgba(uint32_t N, const uint8_t *pSel
 	iz10 = -z10 * det;
 	iz11 = z00 * det;
 
-	pXl->r = (float)(iz00 * q00_r + iz01 * q10_r); pXh->r = (float)(iz10 * q00_r + iz11 * q10_r);
-	pXl->g = (float)(iz00 * q00_g + iz01 * q10_g); pXh->g = (float)(iz10 * q00_g + iz11 * q10_g);
-	pXl->b = (float)(iz00 * q00_b + iz01 * q10_b); pXh->b = (float)(iz10 * q00_b + iz11 * q10_b);
-	pXl->a = (float)(iz00 * q00_a + iz01 * q10_a); pXh->a = (float)(iz10 * q00_a + iz11 * q10_a);
+	pXl->m_c[0] = (float)(iz00 * q00_r + iz01 * q10_r); pXh->m_c[0] = (float)(iz10 * q00_r + iz11 * q10_r);
+	pXl->m_c[1] = (float)(iz00 * q00_g + iz01 * q10_g); pXh->m_c[1] = (float)(iz10 * q00_g + iz11 * q10_g);
+	pXl->m_c[2] = (float)(iz00 * q00_b + iz01 * q10_b); pXh->m_c[2] = (float)(iz10 * q00_b + iz11 * q10_b);
+	pXl->m_c[3] = (float)(iz00 * q00_a + iz01 * q10_a); pXh->m_c[3] = (float)(iz10 * q00_a + iz11 * q10_a);
 
 	for (uint32_t c = 0; c < 4; c++)
 	{
-		if (((*pXl)[c] < 0.0f) || ((*pXh)[c] > 255.0f))
+		if ((pXl->m_c[c] < 0.0f) || (pXh->m_c[c] > 255.0f))
 		{
 			uint32_t lo_v = UINT32_MAX, hi_v = 0;
 			for (uint32_t i = 0; i < N; i++)
 			{
-				lo_v = minimumu(lo_v, pColors[i][c]);
-				hi_v = maximumu(hi_v, pColors[i][c]);
+				lo_v = minimumu(lo_v, pColors[i].m_c[c]);
+				hi_v = maximumu(hi_v, pColors[i].m_c[c]);
 			}
 
 			if (lo_v == hi_v)
 			{
-				(*pXl)[c] = (float)lo_v;
-				(*pXh)[c] = (float)hi_v;
+				pXl->m_c[c] = (float)lo_v;
+				pXh->m_c[c] = (float)hi_v;
 			}
 		}
 	}
 }
 
-static void compute_least_squares_endpoints_rgb(uint32_t N, const uint8_t *pSelectors, const vec4F *pSelector_weights, vec4F *pXl, vec4F *pXh, const color_quad_u8 *pColors)
+static void compute_least_squares_endpoints_rgb(uint32_t N, const uint8_t *pSelectors, const vec4F *pSelector_weights, vec4F *pXl, vec4F *pXh, const color_rgba*pColors)
 {
 	float z00 = 0.0f, z01 = 0.0f, z10 = 0.0f, z11 = 0.0f;
 	float q00_r = 0.0f, q10_r = 0.0f, t_r = 0.0f;
@@ -292,13 +357,13 @@ static void compute_least_squares_endpoints_rgb(uint32_t N, const uint8_t *pSele
 	for (uint32_t i = 0; i < N; i++)
 	{
 		const uint32_t sel = pSelectors[i];
-		z00 += pSelector_weights[sel].r;
-		z10 += pSelector_weights[sel].g;
-		z11 += pSelector_weights[sel].b;
-		float w = pSelector_weights[sel].a;
-		q00_r += w * pColors[i].r; t_r += pColors[i].r;
-		q00_g += w * pColors[i].g; t_g += pColors[i].g;
-		q00_b += w * pColors[i].b; t_b += pColors[i].b;
+		z00 += pSelector_weights[sel].m_c[0];
+		z10 += pSelector_weights[sel].m_c[1];
+		z11 += pSelector_weights[sel].m_c[2];
+		float w = pSelector_weights[sel].m_c[3];
+		q00_r += w * pColors[i].m_c[0]; t_r += pColors[i].m_c[0];
+		q00_g += w * pColors[i].m_c[1]; t_g += pColors[i].m_c[1];
+		q00_b += w * pColors[i].m_c[2]; t_b += pColors[i].m_c[2];
 	}
 
 	q10_r = t_r - q00_r;
@@ -317,32 +382,32 @@ static void compute_least_squares_endpoints_rgb(uint32_t N, const uint8_t *pSele
 	iz10 = -z10 * det;
 	iz11 = z00 * det;
 
-	pXl->r = (float)(iz00 * q00_r + iz01 * q10_r); pXh->r = (float)(iz10 * q00_r + iz11 * q10_r);
-	pXl->g = (float)(iz00 * q00_g + iz01 * q10_g); pXh->g = (float)(iz10 * q00_g + iz11 * q10_g);
-	pXl->b = (float)(iz00 * q00_b + iz01 * q10_b); pXh->b = (float)(iz10 * q00_b + iz11 * q10_b);
-	pXl->a = 255.0f; pXh->a = 255.0f;
+	pXl->m_c[0] = (float)(iz00 * q00_r + iz01 * q10_r); pXh->m_c[0] = (float)(iz10 * q00_r + iz11 * q10_r);
+	pXl->m_c[1] = (float)(iz00 * q00_g + iz01 * q10_g); pXh->m_c[1] = (float)(iz10 * q00_g + iz11 * q10_g);
+	pXl->m_c[2] = (float)(iz00 * q00_b + iz01 * q10_b); pXh->m_c[2] = (float)(iz10 * q00_b + iz11 * q10_b);
+	pXl->m_c[3] = 255.0f; pXh->m_c[3] = 255.0f;
 
 	for (uint32_t c = 0; c < 3; c++)
 	{
-		if (((*pXl)[c] < 0.0f) || ((*pXh)[c] > 255.0f))
+		if ((pXl->m_c[c] < 0.0f) || (pXh->m_c[c] > 255.0f))
 		{
 			uint32_t lo_v = UINT32_MAX, hi_v = 0;
 			for (uint32_t i = 0; i < N; i++)
 			{
-				lo_v = minimumu(lo_v, pColors[i][c]);
-				hi_v = maximumu(hi_v, pColors[i][c]);
+				lo_v = minimumu(lo_v, pColors[i].m_c[c]);
+				hi_v = maximumu(hi_v, pColors[i].m_c[c]);
 			}
 
 			if (lo_v == hi_v)
 			{
-				(*pXl)[c] = (float)lo_v;
-				(*pXh)[c] = (float)hi_v;
+				pXl->m_c[c] = (float)lo_v;
+				pXh->m_c[c] = (float)hi_v;
 			}
 		}
 	}
 }
 
-static void compute_least_squares_endpoints_a(uint32_t N, const uint8_t* pSelectors, const vec4F* pSelector_weights, float* pXl, float* pXh, const color_quad_u8* pColors)
+static void compute_least_squares_endpoints_a(uint32_t N, const uint8_t* pSelectors, const vec4F* pSelector_weights, float* pXl, float* pXh, const color_rgba *pColors)
 {
 	// Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf 
 	// I did this in matrix form first, expanded out all the ops, then optimized it a bit.
@@ -352,13 +417,13 @@ static void compute_least_squares_endpoints_a(uint32_t N, const uint8_t* pSelect
 	{
 		const uint32_t sel = pSelectors[i];
 
-		z00 += pSelector_weights[sel].r;
-		z10 += pSelector_weights[sel].g;
-		z11 += pSelector_weights[sel].b;
+		z00 += pSelector_weights[sel].m_c[0];
+		z10 += pSelector_weights[sel].m_c[1];
+		z11 += pSelector_weights[sel].m_c[2];
 
-		float w = pSelector_weights[sel].a;
+		float w = pSelector_weights[sel].m_c[3];
 
-		q00_a += w * pColors[i].a; t_a += pColors[i].a;
+		q00_a += w * pColors[i].m_c[3]; t_a += pColors[i].m_c[3];
 	}
 
 	q10_a = t_a - q00_a;
@@ -382,8 +447,8 @@ static void compute_least_squares_endpoints_a(uint32_t N, const uint8_t* pSelect
 		uint32_t lo_v = UINT32_MAX, hi_v = 0;
 		for (uint32_t i = 0; i < N; i++)
 		{
-			lo_v = minimumu(lo_v, pColors[i].a);
-			hi_v = maximumu(hi_v, pColors[i].a);
+			lo_v = minimumu(lo_v, pColors[i].m_c[3]);
+			hi_v = maximumu(hi_v, pColors[i].m_c[3]);
 		}
 
 		if (lo_v == hi_v)
@@ -394,78 +459,78 @@ static void compute_least_squares_endpoints_a(uint32_t N, const uint8_t* pSelect
 	}
 }
 
-typedef struct 
+struct color_cell_compressor_params
 {
 	uint32_t m_num_pixels;
-	const color_quad_u8 *m_pPixels;
+	const color_rgba *m_pPixels;
 	uint32_t m_num_selector_weights;
 	const uint32_t *m_pSelector_weights;
 	const vec4F *m_pSelector_weightsx;
 	uint32_t m_comp_bits;
 	uint32_t m_weights[4];
-	bc7enc_bool m_has_alpha;
-	bc7enc_bool m_has_pbits;
-	bc7enc_bool m_endpoints_share_pbit;
-	bc7enc_bool m_perceptual;
-} color_cell_compressor_params;
+	bool m_has_alpha;
+	bool m_has_pbits;
+	bool m_endpoints_share_pbit;
+	bool m_perceptual;
+};
 
-typedef struct 
+struct color_cell_compressor_results
 {
 	uint64_t m_best_overall_err;
-	color_quad_u8 m_low_endpoint;
-	color_quad_u8 m_high_endpoint;
+	color_rgba m_low_endpoint;
+	color_rgba m_high_endpoint;
 	uint32_t m_pbits[2];
 	uint8_t *m_pSelectors;
 	uint8_t *m_pSelectors_temp;
-} color_cell_compressor_results;
+};
 
-static inline color_quad_u8 scale_color(const color_quad_u8 *pC, const color_cell_compressor_params *pParams)
+static inline color_rgba scale_color(const color_rgba *pC, const color_cell_compressor_params *pParams)
 {
-	color_quad_u8 results;
+	color_rgba results;
 
 	const uint32_t n = pParams->m_comp_bits + (pParams->m_has_pbits ? 1 : 0);
 	assert((n >= 4) && (n <= 8));
 
 	for (uint32_t i = 0; i < 4; i++)
 	{
-		uint32_t v = (*pC)[i] << (8 - n);
+		uint32_t v = pC->m_c[i] << (8 - n);
 		v |= (v >> n);
 		assert(v <= 255);
-		results[i] = (uint8_t)(v);
+		results.m_c[i] = (uint8_t)(v);
 	}
 
 	return results;
 }
 
-static inline uint64_t compute_color_distance_rgb(const color_quad_u8 *pE1, const color_quad_u8 *pE2, bc7enc_bool perceptual, const uint32_t weights[4])
+static inline uint64_t compute_color_distance_rgb(const color_rgba *pE1, const color_rgba *pE2, bool perceptual, const uint32_t weights[4])
 {
 	int dr, dg, db;
 
 	if (perceptual)
 	{
-		const int l1 = pE1->r * 109 + pE1->g * 366 + pE1->b * 37;
-		const int cr1 = ((int)pE1->r << 9) - l1;
-		const int cb1 = ((int)pE1->b << 9) - l1;
-		const int l2 = pE2->r * 109 + pE2->g * 366 + pE2->b * 37;
-		const int cr2 = ((int)pE2->r << 9) - l2;
-		const int cb2 = ((int)pE2->b << 9) - l2;
+		const int l1 = pE1->m_c[0] * 109 + pE1->m_c[1] * 366 + pE1->m_c[2] * 37;
+		const int cr1 = ((int)pE1->m_c[0] << 9) - l1;
+		const int cb1 = ((int)pE1->m_c[2] << 9) - l1;
+		const int l2 = pE2->m_c[0] * 109 + pE2->m_c[1] * 366 + pE2->m_c[2] * 37;
+		const int cr2 = ((int)pE2->m_c[0] << 9) - l2;
+		const int cb2 = ((int)pE2->m_c[2] << 9) - l2;
 		dr = (l1 - l2) >> 8;
 		dg = (cr1 - cr2) >> 8;
 		db = (cb1 - cb2) >> 8;
 	}
 	else
 	{
-		dr = (int)pE1->r - (int)pE2->r;
-		dg = (int)pE1->g - (int)pE2->g;
-		db = (int)pE1->b - (int)pE2->b;
+		dr = (int)pE1->m_c[0] - (int)pE2->m_c[0];
+		dg = (int)pE1->m_c[1] - (int)pE2->m_c[1];
+		db = (int)pE1->m_c[2] - (int)pE2->m_c[2];
 	}
 
 	return weights[0] * (uint32_t)(dr * dr) + weights[1] * (uint32_t)(dg * dg) + weights[2] * (uint32_t)(db * db);
 }
 
-static inline uint64_t compute_color_distance_rgba(const color_quad_u8 *pE1, const color_quad_u8 *pE2, bc7enc_bool perceptual, const uint32_t weights[4])
+static inline uint64_t compute_color_distance_rgba(const color_rgba *pE1, const color_rgba *pE2, bool perceptual, const uint32_t weights[4])
 {
-	int da = (int)pE1->a - (int)pE2->a;
+	int da = (int)pE1->m_c[3] - (int)pE2->m_c[3];
 	return compute_color_distance_rgb(pE1, pE2, perceptual, weights) + (weights[3] * (uint32_t)(da * da));
 }
 
@@ -497,18 +562,18 @@ static uint64_t pack_mode1_to_one_color(const color_cell_compressor_params *pPar
 
 	memset(pSelectors, BC7ENC_MODE_1_OPTIMAL_INDEX, pParams->m_num_pixels);
 
-	color_quad_u8 p;
+	color_rgba p;
 	for (uint32_t i = 0; i < 3; i++)
 	{
-		uint32_t low = ((pResults->m_low_endpoint[i] << 1) | pResults->m_pbits[0]) << 1;
+		uint32_t low = ((pResults->m_low_endpoint.m_c[i] << 1) | pResults->m_pbits[0]) << 1;
 		low |= (low >> 7);
 
-		uint32_t high = ((pResults->m_high_endpoint[i] << 1) | pResults->m_pbits[0]) << 1;
+		uint32_t high = ((pResults->m_high_endpoint.m_c[i] << 1) | pResults->m_pbits[0]) << 1;
 		high |= (high >> 7);
 
-		p[i] = (uint8_t)((low * (64 - g_bc7_weights3[BC7ENC_MODE_1_OPTIMAL_INDEX]) + high * g_bc7_weights3[BC7ENC_MODE_1_OPTIMAL_INDEX] + 32) >> 6);
+		p.m_c[i] = (uint8_t)((low * (64 - g_bc7_weights3[BC7ENC_MODE_1_OPTIMAL_INDEX]) + high * g_bc7_weights3[BC7ENC_MODE_1_OPTIMAL_INDEX] + 32) >> 6);
 	}
-	p.a = 255;
+	p.m_c[3] = 255;
 
 	uint64_t total_err = 0;
 	for (uint32_t i = 0; i < pParams->m_num_pixels; i++)
@@ -520,7 +585,7 @@ static uint64_t pack_mode1_to_one_color(const color_cell_compressor_params *pPar
 }
 
 static uint64_t pack_mode7_to_one_color(const color_cell_compressor_params* pParams, color_cell_compressor_results* pResults, uint32_t r, uint32_t g, uint32_t b, uint32_t a,
-	uint8_t* pSelectors, uint32_t num_pixels, const color_quad_u8* pPixels)
+	uint8_t* pSelectors, uint32_t num_pixels, const color_rgba *pPixels)
 {
 	uint32_t best_err = UINT_MAX;
 	uint32_t best_p = 0;
@@ -553,19 +618,19 @@ static uint64_t pack_mode7_to_one_color(const color_cell_compressor_params* pPar
 	pResults->m_pbits[1] = best_hi_p;
 
 	for (uint32_t i = 0; i < num_pixels; i++)
-		pSelectors[i] = BC7E_MODE_7_OPTIMAL_INDEX;
+		pSelectors[i] = (uint8_t)BC7E_MODE_7_OPTIMAL_INDEX;
 
-	color_quad_u8 p;
+	color_rgba p;
 
 	for (uint32_t i = 0; i < 4; i++)
 	{
-		uint32_t low = (pResults->m_low_endpoint[i] << 1) | pResults->m_pbits[0];
-		uint32_t high = (pResults->m_high_endpoint[i] << 1) | pResults->m_pbits[1];
+		uint32_t low = (pResults->m_low_endpoint.m_c[i] << 1) | pResults->m_pbits[0];
+		uint32_t high = (pResults->m_high_endpoint.m_c[i] << 1) | pResults->m_pbits[1];
 
 		low = (low << 2) | (low >> 6);
 		high = (high << 2) | (high >> 6);
 
-		p[i] = (low * (64 - g_bc7_weights2[BC7E_MODE_7_OPTIMAL_INDEX]) + high * g_bc7_weights2[BC7E_MODE_7_OPTIMAL_INDEX] + 32) >> 6;
+		p.m_c[i] = (uint8_t)((low * (64 - g_bc7_weights2[BC7E_MODE_7_OPTIMAL_INDEX]) + high * g_bc7_weights2[BC7E_MODE_7_OPTIMAL_INDEX] + 32) >> 6);
 	}
 
 	uint64_t total_err = 0;
@@ -577,10 +642,11 @@ static uint64_t pack_mode7_to_one_color(const color_cell_compressor_params* pPar
 	return total_err;
 }
 
-static uint64_t evaluate_solution(const color_quad_u8 *pLow, const color_quad_u8 *pHigh, const uint32_t pbits[2], const color_cell_compressor_params *pParams, color_cell_compressor_results *pResults)
+static uint64_t evaluate_solution(const color_rgba *pLow, const color_rgba *pHigh, const uint32_t pbits[2], const color_cell_compressor_params *pParams, color_cell_compressor_results *pResults,
+	const bc7enc_compress_block_params* pComp_params)
 {
-	color_quad_u8 quantMinColor = *pLow;
-	color_quad_u8 quantMaxColor = *pHigh;
+	color_rgba quantMinColor = *pLow;
+	color_rgba quantMaxColor = *pHigh;
 
 	if (pParams->m_has_pbits)
 	{
@@ -594,62 +660,79 @@ static uint64_t evaluate_solution(const color_quad_u8 *pLow, const color_quad_u8
 			maxPBit = pbits[1];
 		}
 
-		quantMinColor.r = (uint8_t)((pLow->r << 1) | minPBit);
-		quantMinColor.g = (uint8_t)((pLow->g << 1) | minPBit);
-		quantMinColor.b = (uint8_t)((pLow->b << 1) | minPBit);
-		quantMinColor.a = (uint8_t)((pLow->a << 1) | minPBit);
+		quantMinColor.m_c[0] = (uint8_t)((pLow->m_c[0] << 1) | minPBit);
+		quantMinColor.m_c[1] = (uint8_t)((pLow->m_c[1] << 1) | minPBit);
+		quantMinColor.m_c[2] = (uint8_t)((pLow->m_c[2] << 1) | minPBit);
+		quantMinColor.m_c[3] = (uint8_t)((pLow->m_c[3] << 1) | minPBit);
 
-		quantMaxColor.r = (uint8_t)((pHigh->r << 1) | maxPBit);
-		quantMaxColor.g = (uint8_t)((pHigh->g << 1) | maxPBit);
-		quantMaxColor.b = (uint8_t)((pHigh->b << 1) | maxPBit);
-		quantMaxColor.a = (uint8_t)((pHigh->a << 1) | maxPBit);
+		quantMaxColor.m_c[0] = (uint8_t)((pHigh->m_c[0] << 1) | maxPBit);
+		quantMaxColor.m_c[1] = (uint8_t)((pHigh->m_c[1] << 1) | maxPBit);
+		quantMaxColor.m_c[2] = (uint8_t)((pHigh->m_c[2] << 1) | maxPBit);
+		quantMaxColor.m_c[3] = (uint8_t)((pHigh->m_c[3] << 1) | maxPBit);
 	}
 
-	color_quad_u8 actualMinColor = scale_color(&quantMinColor, pParams);
-	color_quad_u8 actualMaxColor = scale_color(&quantMaxColor, pParams);
+	color_rgba actualMinColor = scale_color(&quantMinColor, pParams);
+	color_rgba actualMaxColor = scale_color(&quantMaxColor, pParams);
 
 	const uint32_t N = pParams->m_num_selector_weights;
 
-	color_quad_u8 weightedColors[16];
+	color_rgba weightedColors[16];
 	weightedColors[0] = actualMinColor;
 	weightedColors[N - 1] = actualMaxColor;
 
 	const uint32_t nc = pParams->m_has_alpha ? 4 : 3;
 	for (uint32_t i = 1; i < (N - 1); i++)
 		for (uint32_t j = 0; j < nc; j++)
-			weightedColors[i][j] = (uint8_t)((actualMinColor[j] * (64 - pParams->m_pSelector_weights[i]) + actualMaxColor[j] * pParams->m_pSelector_weights[i] + 32) >> 6);
-
-	const int lr = actualMinColor.r;
-	const int lg = actualMinColor.g;
-	const int lb = actualMinColor.b;
-	const int dr = actualMaxColor.r - lr;
-	const int dg = actualMaxColor.g - lg;
-	const int db = actualMaxColor.b - lb;
+			weightedColors[i].m_c[j] = (uint8_t)((actualMinColor.m_c[j] * (64 - pParams->m_pSelector_weights[i]) + actualMaxColor.m_c[j] * pParams->m_pSelector_weights[i] + 32) >> 6);
+
+	const int lr = actualMinColor.m_c[0];
+	const int lg = actualMinColor.m_c[1];
+	const int lb = actualMinColor.m_c[2];
+	const int dr = actualMaxColor.m_c[0] - lr;
+	const int dg = actualMaxColor.m_c[1] - lg;
+	const int db = actualMaxColor.m_c[2] - lb;
 	
 	uint64_t total_err = 0;
-	
-	if (!pParams->m_perceptual)
+
+	if (pComp_params->m_force_selectors)
+	{
+		for (uint32_t i = 0; i < pParams->m_num_pixels; i++)
+		{
+			const uint32_t best_sel = pComp_params->m_selectors[i];
+
+			uint64_t best_err;
+			if (pParams->m_has_alpha)
+				best_err = compute_color_distance_rgba(&weightedColors[best_sel], &pParams->m_pPixels[i], pParams->m_perceptual, pParams->m_weights);
+			else
+				best_err = compute_color_distance_rgb(&weightedColors[best_sel], &pParams->m_pPixels[i], pParams->m_perceptual, pParams->m_weights);
+
+			total_err += best_err;
+
+			pResults->m_pSelectors_temp[i] = (uint8_t)best_sel;
+		}
+	}
+	else if (!pParams->m_perceptual)
 	{
 		if (pParams->m_has_alpha)
 		{
-			const int la = actualMinColor.a;
-			const int da = actualMaxColor.a - la;
+			const int la = actualMinColor.m_c[3];
+			const int da = actualMaxColor.m_c[3] - la;
 
 			const float f = N / (float)(squarei(dr) + squarei(dg) + squarei(db) + squarei(da) + .00000125f);
 
 			for (uint32_t i = 0; i < pParams->m_num_pixels; i++)
 			{
-				const color_quad_u8 *pC = &pParams->m_pPixels[i];
-				int r = pC->r;
-				int g = pC->g;
-				int b = pC->b;
-				int a = pC->a;
+				const color_rgba *pC = &pParams->m_pPixels[i];
+				int r = pC->m_c[0];
+				int g = pC->m_c[1];
+				int b = pC->m_c[2];
+				int a = pC->m_c[3];
 
 				int best_sel = (int)((float)((r - lr) * dr + (g - lg) * dg + (b - lb) * db + (a - la) * da) * f + .5f);
 				best_sel = clampi(best_sel, 1, N - 1);
 
-				uint64_t err0 = compute_color_distance_rgba(&weightedColors[best_sel - 1], pC, BC7ENC_FALSE, pParams->m_weights);
-				uint64_t err1 = compute_color_distance_rgba(&weightedColors[best_sel], pC, BC7ENC_FALSE, pParams->m_weights);
+				uint64_t err0 = compute_color_distance_rgba(&weightedColors[best_sel - 1], pC, false, pParams->m_weights);
+				uint64_t err1 = compute_color_distance_rgba(&weightedColors[best_sel], pC, false, pParams->m_weights);
 
 				if (err1 > err0)
 				{
@@ -667,16 +750,16 @@ static uint64_t evaluate_solution(const color_quad_u8 *pLow, const color_quad_u8
 
 			for (uint32_t i = 0; i < pParams->m_num_pixels; i++)
 			{
-				const color_quad_u8 *pC = &pParams->m_pPixels[i];
-				int r = pC->r;
-				int g = pC->g;
-				int b = pC->b;
+				const color_rgba *pC = &pParams->m_pPixels[i];
+				int r = pC->m_c[0];
+				int g = pC->m_c[1];
+				int b = pC->m_c[2];
 
 				int sel = (int)((float)((r - lr) * dr + (g - lg) * dg + (b - lb) * db) * f + .5f);
 				sel = clampi(sel, 1, N - 1);
 
-				uint64_t err0 = compute_color_distance_rgb(&weightedColors[sel - 1], pC, BC7ENC_FALSE, pParams->m_weights);
-				uint64_t err1 = compute_color_distance_rgb(&weightedColors[sel], pC, BC7ENC_FALSE, pParams->m_weights);
+				uint64_t err0 = compute_color_distance_rgb(&weightedColors[sel - 1], pC, false, pParams->m_weights);
+				uint64_t err1 = compute_color_distance_rgb(&weightedColors[sel], pC, false, pParams->m_weights);
 
 				int best_sel = sel;
 				uint64_t best_err = err1;
@@ -704,7 +787,7 @@ static uint64_t evaluate_solution(const color_quad_u8 *pLow, const color_quad_u8
 			{
 				for (uint32_t j = 0; j < N; j++)
 				{
-					uint64_t err = compute_color_distance_rgba(&weightedColors[j], &pParams->m_pPixels[i], BC7ENC_TRUE, pParams->m_weights);
+					uint64_t err = compute_color_distance_rgba(&weightedColors[j], &pParams->m_pPixels[i], true, pParams->m_weights);
 					if (err < best_err)
 					{
 						best_err = err;
@@ -716,7 +799,7 @@ static uint64_t evaluate_solution(const color_quad_u8 *pLow, const color_quad_u8
 			{
 				for (uint32_t j = 0; j < N; j++)
 				{
-					uint64_t err = compute_color_distance_rgb(&weightedColors[j], &pParams->m_pPixels[i], BC7ENC_TRUE, pParams->m_weights);
+					uint64_t err = compute_color_distance_rgb(&weightedColors[j], &pParams->m_pPixels[i], true, pParams->m_weights);
 					if (err < best_err)
 					{
 						best_err = err;
@@ -747,32 +830,34 @@ static uint64_t evaluate_solution(const color_quad_u8 *pLow, const color_quad_u8
 	return total_err;
 }
 
-static void fixDegenerateEndpoints(uint32_t mode, color_quad_u8 *pTrialMinColor, color_quad_u8 *pTrialMaxColor, const vec4F *pXl, const vec4F *pXh, uint32_t iscale)
+static void fixDegenerateEndpoints(uint32_t mode, color_rgba *pTrialMinColor, color_rgba *pTrialMaxColor, const vec4F *pXl, const vec4F *pXh, uint32_t iscale,
+	const bc7enc_compress_block_params* pComp_params)
 {
 	//if ((mode == 1) || (mode == 7))
-	if (mode == 1)
+	//if (mode == 1)
+	if ( (mode == 1) || ((mode == 6) && (pComp_params->m_quant_mode6_endpoints)) )
 	{
 		// fix degenerate case where the input collapses to a single colorspace voxel, and we loose all freedom (test with grayscale ramps)
 		for (uint32_t i = 0; i < 3; i++)
 		{
-			if ((*pTrialMinColor)[i] == (*pTrialMaxColor)[i])
+			if (pTrialMinColor->m_c[i] == pTrialMaxColor->m_c[i])
 			{
-				if (fabs((*pXl)[i] - (*pXh)[i]) > 0.0f)
+				if (fabs(pXl->m_c[i] - pXh->m_c[i]) > 0.0f)
 				{
-					if ((*pTrialMinColor)[i] > (iscale >> 1))
+					if (pTrialMinColor->m_c[i] > (iscale >> 1))
 					{
-						if ((*pTrialMinColor)[i] > 0)
-							(*pTrialMinColor)[i]--;
+						if (pTrialMinColor->m_c[i] > 0)
+							pTrialMinColor->m_c[i]--;
 						else
-							if ((*pTrialMaxColor)[i] < iscale)
-								(*pTrialMaxColor)[i]++;
+							if (pTrialMaxColor->m_c[i] < iscale)
+								pTrialMaxColor->m_c[i]++;
 					}
 					else
 					{
-						if ((*pTrialMaxColor)[i] < iscale)
-							(*pTrialMaxColor)[i]++;
-						else if ((*pTrialMinColor)[i] > 0)
-							(*pTrialMinColor)[i]--;
+						if (pTrialMaxColor->m_c[i] < iscale)
+							pTrialMaxColor->m_c[i]++;
+						else if (pTrialMinColor->m_c[i] > 0)
+							pTrialMinColor->m_c[i]--;
 					}
 				}
 			}
@@ -780,7 +865,8 @@ static void fixDegenerateEndpoints(uint32_t mode, color_quad_u8 *pTrialMinColor,
 	}
 }
 
-static uint64_t find_optimal_solution(uint32_t mode, vec4F xl, vec4F xh, const color_cell_compressor_params *pParams, color_cell_compressor_results *pResults)
+static uint64_t find_optimal_solution(uint32_t mode, vec4F xl, vec4F xh, const color_cell_compressor_params *pParams, color_cell_compressor_results *pResults,
+	const bc7enc_compress_block_params* pComp_params)
 {
 	vec4F_saturate_in_place(&xl); vec4F_saturate_in_place(&xh);
 
@@ -792,114 +878,221 @@ static uint64_t find_optimal_solution(uint32_t mode, vec4F xl, vec4F xh, const c
 		const int32_t totalComps = pParams->m_has_alpha ? 4 : 3;
 
 		uint32_t best_pbits[2];
-		color_quad_u8 bestMinColor, bestMaxColor;
+		color_rgba bestMinColor, bestMaxColor;
 
 		if (!pParams->m_endpoints_share_pbit)
 		{
-			float best_err0 = 1e+9;
-			float best_err1 = 1e+9;
-
-			for (int p = 0; p < 2; p++)
+			if ((pParams->m_comp_bits == 7) && (pComp_params->m_quant_mode6_endpoints))
 			{
-				color_quad_u8 xMinColor, xMaxColor;
+				best_pbits[0] = 0;
+				bestMinColor.m_c[0] = g_mode6_reduced_quant[(int)((xl.m_c[0] * 2047.0f) + .5f)][0];
+				bestMinColor.m_c[1] = g_mode6_reduced_quant[(int)((xl.m_c[1] * 2047.0f) + .5f)][0];
+				bestMinColor.m_c[2] = g_mode6_reduced_quant[(int)((xl.m_c[2] * 2047.0f) + .5f)][0];
+				bestMinColor.m_c[3] = g_mode6_reduced_quant[(int)((xl.m_c[3] * 2047.0f) + .5f)][0];
+
+				best_pbits[1] = 1;
+				bestMaxColor.m_c[0] = g_mode6_reduced_quant[(int)((xh.m_c[0] * 2047.0f) + .5f)][1];
+				bestMaxColor.m_c[1] = g_mode6_reduced_quant[(int)((xh.m_c[1] * 2047.0f) + .5f)][1];
+				bestMaxColor.m_c[2] = g_mode6_reduced_quant[(int)((xh.m_c[2] * 2047.0f) + .5f)][1];
+				bestMaxColor.m_c[3] = g_mode6_reduced_quant[(int)((xh.m_c[3] * 2047.0f) + .5f)][1];
+			}
+			else
+			{
+				float best_err0 = 1e+9;
+				float best_err1 = 1e+9;
 
-				// Notes: The pbit controls which quantization intervals are selected.
-				// total_levels=2^(comp_bits+1), where comp_bits=4 for mode 0, etc.
-				// pbit 0: v=(b*2)/(total_levels-1), pbit 1: v=(b*2+1)/(total_levels-1) where b is the component bin from [0,total_levels/2-1] and v is the [0,1] component value
-				// rearranging you get for pbit 0: b=floor(v*(total_levels-1)/2+.5)
-				// rearranging you get for pbit 1: b=floor((v*(total_levels-1)-1)/2+.5)
-				for (uint32_t c = 0; c < 4; c++)
+				for (int p = 0; p < 2; p++)
 				{
-					xMinColor[c] = (uint8_t)(clampi(((int)((xl[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p));
-					xMaxColor[c] = (uint8_t)(clampi(((int)((xh[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p));
-				}
+					color_rgba xMinColor, xMaxColor;
+
+					// Notes: The pbit controls which quantization intervals are selected.
+					// total_levels=2^(comp_bits+1), where comp_bits=4 for mode 0, etc.
+					// pbit 0: v=(b*2)/(total_levels-1), pbit 1: v=(b*2+1)/(total_levels-1) where b is the component bin from [0,total_levels/2-1] and v is the [0,1] component value
+					// rearranging you get for pbit 0: b=floor(v*(total_levels-1)/2+.5)
+					// rearranging you get for pbit 1: b=floor((v*(total_levels-1)-1)/2+.5)
+					if (pParams->m_comp_bits == 5)
+					{
+						for (uint32_t c = 0; c < 4; c++)
+						{
+							int vl = (int)(xl.m_c[c] * 31.0f);
+							vl += (xl.m_c[c] > g_mode7_rgba_midpoints[vl][p]);
+							xMinColor.m_c[c] = (uint8_t)clampi(vl * 2 + p, p, 63 - 1 + p);
 
-				color_quad_u8 scaledLow = scale_color(&xMinColor, pParams);
-				color_quad_u8 scaledHigh = scale_color(&xMaxColor, pParams);
+							int vh = (int)(xh.m_c[c] * 31.0f);
+							vh += (xh.m_c[c] > g_mode7_rgba_midpoints[vh][p]);
+							xMaxColor.m_c[c] = (uint8_t)clampi(vh * 2 + p, p, 63 - 1 + p);
+						}
+					}
+					else
+					{
+						for (uint32_t c = 0; c < 4; c++)
+						{
+							xMinColor.m_c[c] = (uint8_t)(clampi(((int)((xl.m_c[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p));
+							xMaxColor.m_c[c] = (uint8_t)(clampi(((int)((xh.m_c[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p));
+						}
+					}
 
-				float err0 = 0, err1 = 0;
-				for (int i = 0; i < totalComps; i++)
-				{
-					err0 += squaref(scaledLow[i] - xl[i] * 255.0f);
-					err1 += squaref(scaledHigh[i] - xh[i] * 255.0f);
-				}
+					color_rgba scaledLow = scale_color(&xMinColor, pParams);
+					color_rgba scaledHigh = scale_color(&xMaxColor, pParams);
 
-				if (err0 < best_err0)
-				{
-					best_err0 = err0;
-					best_pbits[0] = p;
+					float err0 = 0, err1 = 0;
+					for (int i = 0; i < totalComps; i++)
+					{
+						err0 += squaref(scaledLow.m_c[i] - xl.m_c[i] * 255.0f);
+						err1 += squaref(scaledHigh.m_c[i] - xh.m_c[i] * 255.0f);
+					}
 
-					bestMinColor.r = xMinColor.r >> 1;
-					bestMinColor.g = xMinColor.g >> 1;
-					bestMinColor.b = xMinColor.b >> 1;
-					bestMinColor.a = xMinColor.a >> 1;
-				}
+					if (p == 1)
+					{
+						err0 *= pComp_params->m_pbit1_weight;
+						err1 *= pComp_params->m_pbit1_weight;
+					}
+											
+					if (err0 < best_err0)
+					{
+						best_err0 = err0;
+						best_pbits[0] = p;
 
-				if (err1 < best_err1)
-				{
-					best_err1 = err1;
-					best_pbits[1] = p;
+						bestMinColor.m_c[0] = xMinColor.m_c[0] >> 1;
+						bestMinColor.m_c[1] = xMinColor.m_c[1] >> 1;
+						bestMinColor.m_c[2] = xMinColor.m_c[2] >> 1;
+						bestMinColor.m_c[3] = xMinColor.m_c[3] >> 1;
+					}
+
+					if (err1 < best_err1)
+					{
+						best_err1 = err1;
+						best_pbits[1] = p;
 
-					bestMaxColor.r = xMaxColor.r >> 1;
-					bestMaxColor.g = xMaxColor.g >> 1;
-					bestMaxColor.b = xMaxColor.b >> 1;
-					bestMaxColor.a = xMaxColor.a >> 1;
+						bestMaxColor.m_c[0] = xMaxColor.m_c[0] >> 1;
+						bestMaxColor.m_c[1] = xMaxColor.m_c[1] >> 1;
+						bestMaxColor.m_c[2] = xMaxColor.m_c[2] >> 1;
+						bestMaxColor.m_c[3] = xMaxColor.m_c[3] >> 1;
+					}
 				}
 			}
 		}
 		else
 		{
-			// Endpoints share pbits
-			float best_err = 1e+9;
-
-			for (int p = 0; p < 2; p++)
+			if ((mode == 1) && (pComp_params->m_bias_mode1_pbits))
 			{
-				color_quad_u8 xMinColor, xMaxColor;
+				float x = 0.0f;
+				for (uint32_t c = 0; c < 3; c++)
+					x = std::max(std::max(x, xl.m_c[c]), xh.m_c[c]);
+				
+				int p = 0;
+				if (x > (253.0f / 255.0f))
+					p = 1;
+
+				color_rgba xMinColor, xMaxColor;
 				for (uint32_t c = 0; c < 4; c++)
 				{
-					xMinColor[c] = (uint8_t)(clampi(((int)((xl[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p));
-					xMaxColor[c] = (uint8_t)(clampi(((int)((xh[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p));
-				}
+					int vl = (int)(xl.m_c[c] * 63.0f);
+					vl += (xl.m_c[c] > g_mode1_rgba_midpoints[vl][p]);
+					xMinColor.m_c[c] = (uint8_t)clampi(vl * 2 + p, p, 127 - 1 + p);
 
-				color_quad_u8 scaledLow = scale_color(&xMinColor, pParams);
-				color_quad_u8 scaledHigh = scale_color(&xMaxColor, pParams);
+					int vh = (int)(xh.m_c[c] * 63.0f);
+					vh += (xh.m_c[c] > g_mode1_rgba_midpoints[vh][p]);
+					xMaxColor.m_c[c] = (uint8_t)clampi(vh * 2 + p, p, 127 - 1 + p);
+				}
 
-				float err = 0;
-				for (int i = 0; i < totalComps; i++)
-					err += squaref((scaledLow[i] / 255.0f) - xl[i]) + squaref((scaledHigh[i] / 255.0f) - xh[i]);
+				best_pbits[0] = p;
+				best_pbits[1] = p;
+				for (uint32_t j = 0; j < 4; j++)
+				{
+					bestMinColor.m_c[j] = xMinColor.m_c[j] >> 1;
+					bestMaxColor.m_c[j] = xMaxColor.m_c[j] >> 1;
+				}
+			}
+			else
+			{
+				// Endpoints share pbits
+				float best_err = 1e+9;
 
-				if (err < best_err)
+				for (int p = 0; p < 2; p++)
 				{
-					best_err = err;
-					best_pbits[0] = p;
-					best_pbits[1] = p;
-					for (uint32_t j = 0; j < 4; j++)
+					color_rgba xMinColor, xMaxColor;
+					if (pParams->m_comp_bits == 6)
+					{
+						for (uint32_t c = 0; c < 4; c++)
+						{
+							int vl = (int)(xl.m_c[c] * 63.0f);
+							vl += (xl.m_c[c] > g_mode1_rgba_midpoints[vl][p]);
+							xMinColor.m_c[c] = (uint8_t)clampi(vl * 2 + p, p, 127 - 1 + p);
+
+							int vh = (int)(xh.m_c[c] * 63.0f);
+							vh += (xh.m_c[c] > g_mode1_rgba_midpoints[vh][p]);
+							xMaxColor.m_c[c] = (uint8_t)clampi(vh * 2 + p, p, 127 - 1 + p);
+						}
+					}
+					else
 					{
-						bestMinColor[j] = xMinColor[j] >> 1;
-						bestMaxColor[j] = xMaxColor[j] >> 1;
+						for (uint32_t c = 0; c < 4; c++)
+						{
+							xMinColor.m_c[c] = (uint8_t)(clampi(((int)((xl.m_c[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p));
+							xMaxColor.m_c[c] = (uint8_t)(clampi(((int)((xh.m_c[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p));
+						}
+					}
+
+					color_rgba scaledLow = scale_color(&xMinColor, pParams);
+					color_rgba scaledHigh = scale_color(&xMaxColor, pParams);
+
+					float err = 0;
+					for (int i = 0; i < totalComps; i++)
+						err += squaref((scaledLow.m_c[i] / 255.0f) - xl.m_c[i]) + squaref((scaledHigh.m_c[i] / 255.0f) - xh.m_c[i]);
+
+					if (p == 1)
+						err *= pComp_params->m_pbit1_weight;
+
+					if (err < best_err)
+					{
+						best_err = err;
+						best_pbits[0] = p;
+						best_pbits[1] = p;
+						for (uint32_t j = 0; j < 4; j++)
+						{
+							bestMinColor.m_c[j] = xMinColor.m_c[j] >> 1;
+							bestMaxColor.m_c[j] = xMaxColor.m_c[j] >> 1;
+						}
 					}
 				}
 			}
 		}
 						
-		fixDegenerateEndpoints(mode, &bestMinColor, &bestMaxColor, &xl, &xh, iscalep >> 1);
+		fixDegenerateEndpoints(mode, &bestMinColor, &bestMaxColor, &xl, &xh, iscalep >> 1, pComp_params);
 
 		if ((pResults->m_best_overall_err == UINT64_MAX) || color_quad_u8_notequals(&bestMinColor, &pResults->m_low_endpoint) || color_quad_u8_notequals(&bestMaxColor, &pResults->m_high_endpoint) || (best_pbits[0] != pResults->m_pbits[0]) || (best_pbits[1] != pResults->m_pbits[1]))
-			evaluate_solution(&bestMinColor, &bestMaxColor, best_pbits, pParams, pResults);
+			evaluate_solution(&bestMinColor, &bestMaxColor, best_pbits, pParams, pResults, pComp_params);
 	}
 	else
 	{
 		const int iscale = (1 << pParams->m_comp_bits) - 1;
 		const float scale = (float)iscale;
 
-		color_quad_u8 trialMinColor, trialMaxColor;
-		color_quad_u8_set_clamped(&trialMinColor, (int)(xl.r * scale + .5f), (int)(xl.g * scale + .5f), (int)(xl.b * scale + .5f), (int)(xl.a * scale + .5f));
-		color_quad_u8_set_clamped(&trialMaxColor, (int)(xh.r * scale + .5f), (int)(xh.g * scale + .5f), (int)(xh.b * scale + .5f), (int)(xh.a * scale + .5f));
+		color_rgba trialMinColor, trialMaxColor;
+		if (pParams->m_comp_bits == 7)
+		{
+			for (uint32_t c = 0; c < 4; c++)
+			{
+				int vl = (int)(xl.m_c[c] * 127.0f);
+				vl += (xl.m_c[c] > g_mode5_rgba_midpoints[vl]);
+				trialMinColor.m_c[c] = (uint8_t)clampi(vl, 0, 127);
 
-		fixDegenerateEndpoints(mode, &trialMinColor, &trialMaxColor, &xl, &xh, iscale);
+				int vh = (int)(xh.m_c[c] * 127.0f);
+				vh += (xh.m_c[c] > g_mode5_rgba_midpoints[vh]);
+				trialMaxColor.m_c[c] = (uint8_t)clampi(vh, 0, 127);
+			}
+		}
+		else
+		{
+			color_quad_u8_set_clamped(&trialMinColor, (int)(xl.m_c[0] * scale + .5f), (int)(xl.m_c[1] * scale + .5f), (int)(xl.m_c[2] * scale + .5f), (int)(xl.m_c[3] * scale + .5f));
+			color_quad_u8_set_clamped(&trialMaxColor, (int)(xh.m_c[0] * scale + .5f), (int)(xh.m_c[1] * scale + .5f), (int)(xh.m_c[2] * scale + .5f), (int)(xh.m_c[3] * scale + .5f));
+		}
+
+		fixDegenerateEndpoints(mode, &trialMinColor, &trialMaxColor, &xl, &xh, iscale, pComp_params);
 
 		if ((pResults->m_best_overall_err == UINT64_MAX) || color_quad_u8_notequals(&trialMinColor, &pResults->m_low_endpoint) || color_quad_u8_notequals(&trialMaxColor, &pResults->m_high_endpoint))
-			evaluate_solution(&trialMinColor, &trialMaxColor, pResults->m_pbits, pParams, pResults);
+			evaluate_solution(&trialMinColor, &trialMaxColor, pResults->m_pbits, pParams, pResults, pComp_params);
 	}
 
 	return pResults->m_best_overall_err;
@@ -914,14 +1107,14 @@ static uint64_t color_cell_compression(uint32_t mode, const color_cell_compresso
 	// If the partition's colors are all the same in mode 1, then just pack them as a single color.
 	if (mode == 1)
 	{
-		const uint32_t cr = pParams->m_pPixels[0].r, cg = pParams->m_pPixels[0].g, cb = pParams->m_pPixels[0].b;
+		const uint32_t cr = pParams->m_pPixels[0].m_c[0], cg = pParams->m_pPixels[0].m_c[1], cb = pParams->m_pPixels[0].m_c[2];
 
-		bc7enc_bool allSame = BC7ENC_TRUE;
+		bool allSame = true;
 		for (uint32_t i = 1; i < pParams->m_num_pixels; i++)
 		{
-			if ((cr != pParams->m_pPixels[i].r) || (cg != pParams->m_pPixels[i].g) || (cb != pParams->m_pPixels[i].b))
+			if ((cr != pParams->m_pPixels[i].m_c[0]) || (cg != pParams->m_pPixels[i].m_c[1]) || (cb != pParams->m_pPixels[i].m_c[2]))
 			{
-				allSame = BC7ENC_FALSE;
+				allSame = false;
 				break;
 			}
 		}
@@ -931,14 +1124,14 @@ static uint64_t color_cell_compression(uint32_t mode, const color_cell_compresso
 	}
 	else if (mode == 7)
 	{
-		const uint32_t cr = pParams->m_pPixels[0].r, cg = pParams->m_pPixels[0].g, cb = pParams->m_pPixels[0].b, ca = pParams->m_pPixels[0].a;
+		const uint32_t cr = pParams->m_pPixels[0].m_c[0], cg = pParams->m_pPixels[0].m_c[1], cb = pParams->m_pPixels[0].m_c[2], ca = pParams->m_pPixels[0].m_c[3];
 
-		bc7enc_bool allSame = BC7ENC_TRUE;
+		bool allSame = true;
 		for (uint32_t i = 1; i < pParams->m_num_pixels; i++)
 		{
-			if ((cr != pParams->m_pPixels[i].r) || (cg != pParams->m_pPixels[i].g) || (cb != pParams->m_pPixels[i].b) || (ca != pParams->m_pPixels[i].a))
+			if ((cr != pParams->m_pPixels[i].m_c[0]) || (cg != pParams->m_pPixels[i].m_c[1]) || (cb != pParams->m_pPixels[i].m_c[2]) || (ca != pParams->m_pPixels[i].m_c[3]))
 			{
-				allSame = BC7ENC_FALSE;
+				allSame = false;
 				break;
 			}
 		}
@@ -970,16 +1163,16 @@ static uint64_t color_cell_compression(uint32_t mode, const color_cell_compresso
 		{
 			vec4F color = vec4F_from_color(&pParams->m_pPixels[i]);
 			color = vec4F_sub(&color, &meanColorScaled);
-			vec4F a = vec4F_mul(&color, color.r);
-			vec4F b = vec4F_mul(&color, color.g);
-			vec4F c = vec4F_mul(&color, color.b);
-			vec4F d = vec4F_mul(&color, color.a);
+			vec4F a = vec4F_mul(&color, color.m_c[0]);
+			vec4F b = vec4F_mul(&color, color.m_c[1]);
+			vec4F c = vec4F_mul(&color, color.m_c[2]);
+			vec4F d = vec4F_mul(&color, color.m_c[3]);
 			vec4F n = i ? axis : color;
 			vec4F_normalize_in_place(&n);
-			axis.r += vec4F_dot(&a, &n);
-			axis.g += vec4F_dot(&b, &n);
-			axis.b += vec4F_dot(&c, &n);
-			axis.a += vec4F_dot(&d, &n);
+			axis.m_c[0] += vec4F_dot(&a, &n);
+			axis.m_c[1] += vec4F_dot(&b, &n);
+			axis.m_c[2] += vec4F_dot(&c, &n);
+			axis.m_c[3] += vec4F_dot(&d, &n);
 		}
 		vec4F_normalize_in_place(&axis);
 	}
@@ -990,10 +1183,10 @@ static uint64_t color_cell_compression(uint32_t mode, const color_cell_compresso
 
 		for (uint32_t i = 0; i < pParams->m_num_pixels; i++)
 		{
-			const color_quad_u8 *pV = &pParams->m_pPixels[i];
-			float r = pV->r - meanColorScaled.r;
-			float g = pV->g - meanColorScaled.g;
-			float b = pV->b - meanColorScaled.b;
+			const color_rgba *pV = &pParams->m_pPixels[i];
+			float r = pV->m_c[0] - meanColorScaled.m_c[0];
+			float g = pV->m_c[1] - meanColorScaled.m_c[1];
+			float b = pV->m_c[2] - meanColorScaled.m_c[2];
 			cov[0] += r*r; cov[1] += r*g; cov[2] += r*b; cov[3] += g*g; cov[4] += g*b; cov[5] += b*b;
 		}
 
@@ -1070,20 +1263,20 @@ static uint64_t color_cell_compression(uint32_t mode, const color_cell_compresso
 		minColor = maxColor;
 		maxColor = temp;
 #else
-		float a = minColor.r, b = minColor.g, c = minColor.b, d = minColor.a;
-		minColor.r = maxColor.r;
-		minColor.g = maxColor.g;
-		minColor.b = maxColor.b;
-		minColor.a = maxColor.a;
-		maxColor.r = a;
-		maxColor.g = b;
-		maxColor.b = c;
-		maxColor.a = d;
+		float a = minColor.m_c[0], b = minColor.m_c[1], c = minColor.m_c[2], d = minColor.m_c[3];
+		minColor.m_c[0] = maxColor.m_c[0];
+		minColor.m_c[1] = maxColor.m_c[1];
+		minColor.m_c[2] = maxColor.m_c[2];
+		minColor.m_c[3] = maxColor.m_c[3];
+		maxColor.m_c[0] = a;
+		maxColor.m_c[1] = b;
+		maxColor.m_c[2] = c;
+		maxColor.m_c[3] = d;
 #endif
 	}
 
 	// First find a solution using the block's PCA.
-	if (!find_optimal_solution(mode, minColor, maxColor, pParams, pResults))
+	if (!find_optimal_solution(mode, minColor, maxColor, pParams, pResults, pComp_params))
 		return 0;
 	
 	if (pComp_params->m_try_least_squares)
@@ -1100,7 +1293,7 @@ static uint64_t color_cell_compression(uint32_t mode, const color_cell_compresso
 		xl = vec4F_mul(&xl, (1.0f / 255.0f));
 		xh = vec4F_mul(&xh, (1.0f / 255.0f));
 
-		if (!find_optimal_solution(mode, xl, xh, pParams, pResults))
+		if (!find_optimal_solution(mode, xl, xh, pParams, pResults, pComp_params))
 			return 0;
 	}
 	
@@ -1141,7 +1334,7 @@ static uint64_t color_cell_compression(uint32_t mode, const color_cell_compresso
 		xl = vec4F_mul(&xl, (1.0f / 255.0f));
 		xh = vec4F_mul(&xh, (1.0f / 255.0f));
 
-		if (!find_optimal_solution(mode, xl, xh, pParams, pResults))
+		if (!find_optimal_solution(mode, xl, xh, pParams, pResults, pComp_params))
 			return 0;
 
 		for (uint32_t i = 0; i < pParams->m_num_pixels; i++)
@@ -1160,7 +1353,7 @@ static uint64_t color_cell_compression(uint32_t mode, const color_cell_compresso
 		xl = vec4F_mul(&xl, (1.0f / 255.0f));
 		xh = vec4F_mul(&xh, (1.0f / 255.0f));
 
-		if (!find_optimal_solution(mode, xl, xh, pParams, pResults))
+		if (!find_optimal_solution(mode, xl, xh, pParams, pResults, pComp_params))
 			return 0;
 
 		for (uint32_t i = 0; i < pParams->m_num_pixels; i++)
@@ -1181,7 +1374,7 @@ static uint64_t color_cell_compression(uint32_t mode, const color_cell_compresso
 		xl = vec4F_mul(&xl, (1.0f / 255.0f));
 		xh = vec4F_mul(&xh, (1.0f / 255.0f));
 
-		if (!find_optimal_solution(mode, xl, xh, pParams, pResults))
+		if (!find_optimal_solution(mode, xl, xh, pParams, pResults, pComp_params))
 			return 0;
 
 		// In uber levels 2+, try taking more advantage of endpoint extrapolation by scaling the selectors in one direction or another.
@@ -1210,7 +1403,7 @@ static uint64_t color_cell_compression(uint32_t mode, const color_cell_compresso
 					xl = vec4F_mul(&xl, (1.0f / 255.0f));
 					xh = vec4F_mul(&xh, (1.0f / 255.0f));
 
-					if (!find_optimal_solution(mode, xl, xh, pParams, pResults))
+					if (!find_optimal_solution(mode, xl, xh, pParams, pResults, pComp_params))
 						return 0;
 				}
 			}
@@ -1221,7 +1414,7 @@ static uint64_t color_cell_compression(uint32_t mode, const color_cell_compresso
 	{
 		// Try encoding the partition as a single color by using the optimal singe colors tables to encode the block to its mean.
 		color_cell_compressor_results avg_results = *pResults;
-		const uint32_t r = (int)(.5f + meanColor.r * 255.0f), g = (int)(.5f + meanColor.g * 255.0f), b = (int)(.5f + meanColor.b * 255.0f);
+		const uint32_t r = (int)(.5f + meanColor.m_c[0] * 255.0f), g = (int)(.5f + meanColor.m_c[1] * 255.0f), b = (int)(.5f + meanColor.m_c[2] * 255.0f);
 		uint64_t avg_err = pack_mode1_to_one_color(pParams, &avg_results, r, g, b, pResults->m_pSelectors_temp);
 		if (avg_err < pResults->m_best_overall_err)
 		{
@@ -1234,7 +1427,7 @@ static uint64_t color_cell_compression(uint32_t mode, const color_cell_compresso
 	{
 		// Try encoding the partition as a single color by using the optimal singe colors tables to encode the block to its mean.
 		color_cell_compressor_results avg_results = *pResults;
-		const uint32_t r = (int)(.5f + meanColor.r * 255.0f), g = (int)(.5f + meanColor.g * 255.0f), b = (int)(.5f + meanColor.b * 255.0f), a = (int)(.5f + meanColor.a * 255.0f);
+		const uint32_t r = (int)(.5f + meanColor.m_c[0] * 255.0f), g = (int)(.5f + meanColor.m_c[1] * 255.0f), b = (int)(.5f + meanColor.m_c[2] * 255.0f), a = (int)(.5f + meanColor.m_c[3] * 255.0f);
 		uint64_t avg_err = pack_mode7_to_one_color(pParams, &avg_results, r, g, b, a, pResults->m_pSelectors_temp, pParams->m_num_pixels, pParams->m_pPixels);
 		if (avg_err < pResults->m_best_overall_err)
 		{
@@ -1247,46 +1440,46 @@ static uint64_t color_cell_compression(uint32_t mode, const color_cell_compresso
 	return pResults->m_best_overall_err;
 }
 
-static uint64_t color_cell_compression_est_mode1(uint32_t num_pixels, const color_quad_u8 *pPixels, bc7enc_bool perceptual, uint32_t pweights[4], uint64_t best_err_so_far)
+static uint64_t color_cell_compression_est_mode1(uint32_t num_pixels, const color_rgba *pPixels, bool perceptual, uint32_t pweights[4], uint64_t best_err_so_far)
 {
 	// Find RGB bounds as an approximation of the block's principle axis
 	uint32_t lr = 255, lg = 255, lb = 255;
 	uint32_t hr = 0, hg = 0, hb = 0;
 	for (uint32_t i = 0; i < num_pixels; i++)
 	{
-		const color_quad_u8 *pC = &pPixels[i];
-		if (pC->r < lr) lr = pC->r;
-		if (pC->g < lg) lg = pC->g;
-		if (pC->b < lb) lb = pC->b;
-		if (pC->r > hr) hr = pC->r;
-		if (pC->g > hg) hg = pC->g;
-		if (pC->b > hb) hb = pC->b;
+		const color_rgba *pC = &pPixels[i];
+		if (pC->m_c[0] < lr) lr = pC->m_c[0];
+		if (pC->m_c[1] < lg) lg = pC->m_c[1];
+		if (pC->m_c[2] < lb) lb = pC->m_c[2];
+		if (pC->m_c[0] > hr) hr = pC->m_c[0];
+		if (pC->m_c[1] > hg) hg = pC->m_c[1];
+		if (pC->m_c[2] > hb) hb = pC->m_c[2];
 	}
 		
-	color_quad_u8 lowColor; color_quad_u8_set(&lowColor, lr, lg, lb, 0);
-	color_quad_u8 highColor; color_quad_u8_set(&highColor, hr, hg, hb, 0);
+	color_rgba lowColor; color_quad_u8_set(&lowColor, lr, lg, lb, 0);
+	color_rgba highColor; color_quad_u8_set(&highColor, hr, hg, hb, 0);
 
 	// Place endpoints at bbox diagonals and compute interpolated colors 
 	const uint32_t N = 8;
-	color_quad_u8 weightedColors[8];
+	color_rgba weightedColors[8];
 
 	weightedColors[0] = lowColor;
 	weightedColors[N - 1] = highColor;
 	for (uint32_t i = 1; i < (N - 1); i++)
 	{
-		weightedColors[i].r = (uint8_t)((lowColor.r * (64 - g_bc7_weights3[i]) + highColor.r * g_bc7_weights3[i] + 32) >> 6);
-		weightedColors[i].g = (uint8_t)((lowColor.g * (64 - g_bc7_weights3[i]) + highColor.g * g_bc7_weights3[i] + 32) >> 6);
-		weightedColors[i].b = (uint8_t)((lowColor.b * (64 - g_bc7_weights3[i]) + highColor.b * g_bc7_weights3[i] + 32) >> 6);
+		weightedColors[i].m_c[0] = (uint8_t)((lowColor.m_c[0] * (64 - g_bc7_weights3[i]) + highColor.m_c[0] * g_bc7_weights3[i] + 32) >> 6);
+		weightedColors[i].m_c[1] = (uint8_t)((lowColor.m_c[1] * (64 - g_bc7_weights3[i]) + highColor.m_c[1] * g_bc7_weights3[i] + 32) >> 6);
+		weightedColors[i].m_c[2] = (uint8_t)((lowColor.m_c[2] * (64 - g_bc7_weights3[i]) + highColor.m_c[2] * g_bc7_weights3[i] + 32) >> 6);
 	}
 
 	// Compute dots and thresholds
-	const int ar = highColor.r - lowColor.r;
-	const int ag = highColor.g - lowColor.g;
-	const int ab = highColor.b - lowColor.b;
+	const int ar = highColor.m_c[0] - lowColor.m_c[0];
+	const int ag = highColor.m_c[1] - lowColor.m_c[1];
+	const int ab = highColor.m_c[2] - lowColor.m_c[2];
 
 	int dots[8];
 	for (uint32_t i = 0; i < N; i++)
-		dots[i] = weightedColors[i].r * ar + weightedColors[i].g * ag + weightedColors[i].b * ab;
+		dots[i] = weightedColors[i].m_c[0] * ar + weightedColors[i].m_c[1] * ag + weightedColors[i].m_c[2] * ab;
 
 	int thresh[8 - 1];
 	for (uint32_t i = 0; i < (N - 1); i++)
@@ -1299,17 +1492,17 @@ static uint64_t color_cell_compression_est_mode1(uint32_t num_pixels, const colo
 		int l1[8], cr1[8], cb1[8];
 		for (int j = 0; j < 8; j++)
 		{
-			const color_quad_u8 *pE1 = &weightedColors[j];
-			l1[j] = pE1->r * 109 + pE1->g * 366 + pE1->b * 37;
-			cr1[j] = ((int)pE1->r << 9) - l1[j];
-			cb1[j] = ((int)pE1->b << 9) - l1[j];
+			const color_rgba *pE1 = &weightedColors[j];
+			l1[j] = pE1->m_c[0] * 109 + pE1->m_c[1] * 366 + pE1->m_c[2] * 37;
+			cr1[j] = ((int)pE1->m_c[0] << 9) - l1[j];
+			cb1[j] = ((int)pE1->m_c[2] << 9) - l1[j];
 		}
 
 		for (uint32_t i = 0; i < num_pixels; i++)
 		{
-			const color_quad_u8 *pC = &pPixels[i];
+			const color_rgba *pC = &pPixels[i];
 
-			int d = ar * pC->r + ag * pC->g + ab * pC->b;
+			int d = ar * pC->m_c[0] + ag * pC->m_c[1] + ab * pC->m_c[2];
 
 			// Find approximate selector
 			uint32_t s = 0;
@@ -1329,9 +1522,9 @@ static uint64_t color_cell_compression_est_mode1(uint32_t num_pixels, const colo
 				s = 1;
 
 			// Compute error
-			const int l2 = pC->r * 109 + pC->g * 366 + pC->b * 37;
-			const int cr2 = ((int)pC->r << 9) - l2;
-			const int cb2 = ((int)pC->b << 9) - l2;
+			const int l2 = pC->m_c[0] * 109 + pC->m_c[1] * 366 + pC->m_c[2] * 37;
+			const int cr2 = ((int)pC->m_c[0] << 9) - l2;
+			const int cb2 = ((int)pC->m_c[2] << 9) - l2;
 
 			const int dl = (l1[s] - l2) >> 8;
 			const int dcr = (cr1[s] - cr2) >> 8;
@@ -1348,9 +1541,9 @@ static uint64_t color_cell_compression_est_mode1(uint32_t num_pixels, const colo
 	{
 		for (uint32_t i = 0; i < num_pixels; i++)
 		{
-			const color_quad_u8 *pC = &pPixels[i];
+			const color_rgba *pC = &pPixels[i];
 
-			int d = ar * pC->r + ag * pC->g + ab * pC->b;
+			int d = ar * pC->m_c[0] + ag * pC->m_c[1] + ab * pC->m_c[2];
 
 			// Find approximate selector
 			uint32_t s = 0;
@@ -1370,11 +1563,11 @@ static uint64_t color_cell_compression_est_mode1(uint32_t num_pixels, const colo
 				s = 1;
 
 			// Compute error
-			const color_quad_u8 *pE1 = &weightedColors[s];
+			const color_rgba *pE1 = &weightedColors[s];
 
-			int dr = (int)pE1->r - (int)pC->r;
-			int dg = (int)pE1->g - (int)pC->g;
-			int db = (int)pE1->b - (int)pC->b;
+			int dr = (int)pE1->m_c[0] - (int)pC->m_c[0];
+			int dg = (int)pE1->m_c[1] - (int)pC->m_c[1];
+			int db = (int)pE1->m_c[2] - (int)pC->m_c[2];
 
 			total_err += pweights[0] * (dr * dr) + pweights[1] * (dg * dg) + pweights[2] * (db * db);
 			if (total_err > best_err_so_far)
@@ -1385,51 +1578,51 @@ static uint64_t color_cell_compression_est_mode1(uint32_t num_pixels, const colo
 	return total_err;
 }
 
-static uint64_t color_cell_compression_est_mode7(uint32_t num_pixels, const color_quad_u8* pPixels, bc7enc_bool perceptual, uint32_t pweights[4], uint64_t best_err_so_far)
+static uint64_t color_cell_compression_est_mode7(uint32_t num_pixels, const color_rgba * pPixels, bool perceptual, uint32_t pweights[4], uint64_t best_err_so_far)
 {
 	// Find RGB bounds as an approximation of the block's principle axis
 	uint32_t lr = 255, lg = 255, lb = 255, la = 255;
 	uint32_t hr = 0, hg = 0, hb = 0, ha = 0;
 	for (uint32_t i = 0; i < num_pixels; i++)
 	{
-		const color_quad_u8* pC = &pPixels[i];
-		if (pC->r < lr) lr = pC->r;
-		if (pC->g < lg) lg = pC->g;
-		if (pC->b < lb) lb = pC->b;
-		if (pC->a < la) la = pC->a;
-
-		if (pC->r > hr) hr = pC->r;
-		if (pC->g > hg) hg = pC->g;
-		if (pC->b > hb) hb = pC->b;
-		if (pC->a > ha) ha = pC->a;
+		const color_rgba* pC = &pPixels[i];
+		if (pC->m_c[0] < lr) lr = pC->m_c[0];
+		if (pC->m_c[1] < lg) lg = pC->m_c[1];
+		if (pC->m_c[2] < lb) lb = pC->m_c[2];
+		if (pC->m_c[3] < la) la = pC->m_c[3];
+
+		if (pC->m_c[0] > hr) hr = pC->m_c[0];
+		if (pC->m_c[1] > hg) hg = pC->m_c[1];
+		if (pC->m_c[2] > hb) hb = pC->m_c[2];
+		if (pC->m_c[3] > ha) ha = pC->m_c[3];
 	}
 
-	color_quad_u8 lowColor; color_quad_u8_set(&lowColor, lr, lg, lb, la);
-	color_quad_u8 highColor; color_quad_u8_set(&highColor, hr, hg, hb, ha);
+	color_rgba lowColor; color_quad_u8_set(&lowColor, lr, lg, lb, la);
+	color_rgba highColor; color_quad_u8_set(&highColor, hr, hg, hb, ha);
 
 	// Place endpoints at bbox diagonals and compute interpolated colors 
 	const uint32_t N = 4;
-	color_quad_u8 weightedColors[4];
+	color_rgba weightedColors[4];
 
 	weightedColors[0] = lowColor;
 	weightedColors[N - 1] = highColor;
 	for (uint32_t i = 1; i < (N - 1); i++)
 	{
-		weightedColors[i].r = (uint8_t)((lowColor.r * (64 - g_bc7_weights2[i]) + highColor.r * g_bc7_weights2[i] + 32) >> 6);
-		weightedColors[i].g = (uint8_t)((lowColor.g * (64 - g_bc7_weights2[i]) + highColor.g * g_bc7_weights2[i] + 32) >> 6);
-		weightedColors[i].b = (uint8_t)((lowColor.b * (64 - g_bc7_weights2[i]) + highColor.b * g_bc7_weights2[i] + 32) >> 6);
-		weightedColors[i].a = (uint8_t)((lowColor.a * (64 - g_bc7_weights2[i]) + highColor.a * g_bc7_weights2[i] + 32) >> 6);
+		weightedColors[i].m_c[0] = (uint8_t)((lowColor.m_c[0] * (64 - g_bc7_weights2[i]) + highColor.m_c[0] * g_bc7_weights2[i] + 32) >> 6);
+		weightedColors[i].m_c[1] = (uint8_t)((lowColor.m_c[1] * (64 - g_bc7_weights2[i]) + highColor.m_c[1] * g_bc7_weights2[i] + 32) >> 6);
+		weightedColors[i].m_c[2] = (uint8_t)((lowColor.m_c[2] * (64 - g_bc7_weights2[i]) + highColor.m_c[2] * g_bc7_weights2[i] + 32) >> 6);
+		weightedColors[i].m_c[3] = (uint8_t)((lowColor.m_c[3] * (64 - g_bc7_weights2[i]) + highColor.m_c[3] * g_bc7_weights2[i] + 32) >> 6);
 	}
 
 	// Compute dots and thresholds
-	const int ar = highColor.r - lowColor.r;
-	const int ag = highColor.g - lowColor.g;
-	const int ab = highColor.b - lowColor.b;
-	const int aa = highColor.a - lowColor.a;
+	const int ar = highColor.m_c[0] - lowColor.m_c[0];
+	const int ag = highColor.m_c[1] - lowColor.m_c[1];
+	const int ab = highColor.m_c[2] - lowColor.m_c[2];
+	const int aa = highColor.m_c[3] - lowColor.m_c[3];
 
 	int dots[4];
 	for (uint32_t i = 0; i < N; i++)
-		dots[i] = weightedColors[i].r * ar + weightedColors[i].g * ag + weightedColors[i].b * ab + weightedColors[i].a * aa;
+		dots[i] = weightedColors[i].m_c[0] * ar + weightedColors[i].m_c[1] * ag + weightedColors[i].m_c[2] * ab + weightedColors[i].m_c[3] * aa;
 
 	int thresh[4 - 1];
 	for (uint32_t i = 0; i < (N - 1); i++)
@@ -1442,17 +1635,17 @@ static uint64_t color_cell_compression_est_mode7(uint32_t num_pixels, const colo
 		int l1[4], cr1[4], cb1[4];
 		for (int j = 0; j < 4; j++)
 		{
-			const color_quad_u8* pE1 = &weightedColors[j];
-			l1[j] = pE1->r * 109 + pE1->g * 366 + pE1->b * 37;
-			cr1[j] = ((int)pE1->r << 9) - l1[j];
-			cb1[j] = ((int)pE1->b << 9) - l1[j];
+			const color_rgba* pE1 = &weightedColors[j];
+			l1[j] = pE1->m_c[0] * 109 + pE1->m_c[1] * 366 + pE1->m_c[2] * 37;
+			cr1[j] = ((int)pE1->m_c[0] << 9) - l1[j];
+			cb1[j] = ((int)pE1->m_c[2] << 9) - l1[j];
 		}
 
 		for (uint32_t i = 0; i < num_pixels; i++)
 		{
-			const color_quad_u8* pC = &pPixels[i];
+			const color_rgba* pC = &pPixels[i];
 
-			int d = ar * pC->r + ag * pC->g + ab * pC->b + aa * pC->a;
+			int d = ar * pC->m_c[0] + ag * pC->m_c[1] + ab * pC->m_c[2] + aa * pC->m_c[3];
 
 			// Find approximate selector
 			uint32_t s = 0;
@@ -1464,15 +1657,15 @@ static uint64_t color_cell_compression_est_mode7(uint32_t num_pixels, const colo
 				s = 1;
 
 			// Compute error
-			const int l2 = pC->r * 109 + pC->g * 366 + pC->b * 37;
-			const int cr2 = ((int)pC->r << 9) - l2;
-			const int cb2 = ((int)pC->b << 9) - l2;
+			const int l2 = pC->m_c[0] * 109 + pC->m_c[1] * 366 + pC->m_c[2] * 37;
+			const int cr2 = ((int)pC->m_c[0] << 9) - l2;
+			const int cb2 = ((int)pC->m_c[2] << 9) - l2;
 
 			const int dl = (l1[s] - l2) >> 8;
 			const int dcr = (cr1[s] - cr2) >> 8;
 			const int dcb = (cb1[s] - cb2) >> 8;
 
-			const int dca = (int)pC->a - (int)weightedColors[s].a;
+			const int dca = (int)pC->m_c[3] - (int)weightedColors[s].m_c[3];
 
 			int ie = (pweights[0] * dl * dl) + (pweights[1] * dcr * dcr) + (pweights[2] * dcb * dcb) + (pweights[3] * dca * dca);
 
@@ -1485,9 +1678,9 @@ static uint64_t color_cell_compression_est_mode7(uint32_t num_pixels, const colo
 	{
 		for (uint32_t i = 0; i < num_pixels; i++)
 		{
-			const color_quad_u8* pC = &pPixels[i];
+			const color_rgba* pC = &pPixels[i];
 
-			int d = ar * pC->r + ag * pC->g + ab * pC->b + aa * pC->a;
+			int d = ar * pC->m_c[0] + ag * pC->m_c[1] + ab * pC->m_c[2] + aa * pC->m_c[3];
 
 			// Find approximate selector
 			uint32_t s = 0;
@@ -1499,12 +1692,12 @@ static uint64_t color_cell_compression_est_mode7(uint32_t num_pixels, const colo
 				s = 1;
 
 			// Compute error
-			const color_quad_u8* pE1 = &weightedColors[s];
+			const color_rgba* pE1 = &weightedColors[s];
 
-			int dr = (int)pE1->r - (int)pC->r;
-			int dg = (int)pE1->g - (int)pC->g;
-			int db = (int)pE1->b - (int)pC->b;
-			int da = (int)pE1->a - (int)pC->a;
+			int dr = (int)pE1->m_c[0] - (int)pC->m_c[0];
+			int dg = (int)pE1->m_c[1] - (int)pC->m_c[1];
+			int db = (int)pE1->m_c[2] - (int)pC->m_c[2];
+			int da = (int)pE1->m_c[3] - (int)pC->m_c[3];
 
 			total_err += pweights[0] * (dr * dr) + pweights[1] * (dg * dg) + pweights[2] * (db * db) + pweights[3] * (da * da);
 			if (total_err > best_err_so_far)
@@ -1558,9 +1751,9 @@ static const uint32_t g_partition_predictors[35] =
 };
 
 // Estimate the partition used by modes 1/7. This scans through each partition and computes an approximate error for each.
-static uint32_t estimate_partition(const color_quad_u8 *pPixels, const bc7enc_compress_block_params *pComp_params, uint32_t pweights[4], uint32_t mode)
+static uint32_t estimate_partition(const color_rgba *pPixels, const bc7enc_compress_block_params *pComp_params, uint32_t pweights[4], uint32_t mode)
 {
-	const uint32_t total_partitions = minimumu(pComp_params->m_max_partitions_mode, BC7ENC_MAX_PARTITIONS1);
+	const uint32_t total_partitions = minimumu(pComp_params->m_max_partitions, BC7ENC_MAX_PARTITIONS);
 	if (total_partitions <= 1)
 		return 0;
 
@@ -1590,7 +1783,7 @@ static uint32_t estimate_partition(const color_quad_u8 *pPixels, const bc7enc_co
 		const uint32_t partition = s_sorted_partition_order[partition_iter];
 
 		// Check to see if we should bother evaluating this partition at all, depending on the best partition found from the first 14.
-		if (pComp_params->m_mode_partition_estimation_filterbank)
+		if (pComp_params->m_mode17_partition_estimation_filterbank)
 		{
 			if ((partition_iter >= 14) && (partition_iter <= 34))
 			{
@@ -1607,7 +1800,7 @@ static uint32_t estimate_partition(const color_quad_u8 *pPixels, const bc7enc_co
 
 		const uint8_t *pPartition = &g_bc7_partition2[partition * 16];
 
-		color_quad_u8 subset_colors[2][16];
+		color_rgba subset_colors[2][16];
 		uint32_t subset_total_colors[2] = { 0, 0 };
 		for (uint32_t index = 0; index < 16; index++)
 			subset_colors[pPartition[index]][subset_total_colors[pPartition[index]]++] = pPixels[index];
@@ -1621,6 +1814,11 @@ static uint32_t estimate_partition(const color_quad_u8 *pPixels, const bc7enc_co
 				total_subset_err += color_cell_compression_est_mode1(subset_total_colors[subset], &subset_colors[subset][0], pComp_params->m_perceptual, pweights, best_err);
 		}
 
+		if (partition < 16)
+		{
+			total_subset_err = (uint64_t)((double)total_subset_err * pComp_params->m_low_frequency_partition_weight + .5f);
+		}
+
 		if (total_subset_err < best_err)
 		{
 			best_err = total_subset_err;
@@ -1653,20 +1851,20 @@ static void set_block_bits(uint8_t *pBytes, uint32_t val, uint32_t num_bits, uin
 	assert(*pCur_ofs <= 128);
 }
 
-typedef struct
+struct bc7_optimization_results
 {
 	uint32_t m_mode;
 	uint32_t m_partition;
 	uint8_t m_selectors[16];
 	uint8_t m_alpha_selectors[16];
-	color_quad_u8 m_low[3];
-	color_quad_u8 m_high[3];
+	color_rgba m_low[3];
+	color_rgba m_high[3];
 	uint32_t m_pbits[3][2];
 	uint32_t m_rotation;
 	uint32_t m_index_selector;
-} bc7_optimization_results;
+};
 
-static void encode_bc7_block(void* pBlock, const bc7_optimization_results* pResults)
+void encode_bc7_block(void* pBlock, const bc7_optimization_results* pResults)
 {
 	assert(pResults->m_index_selector <= 1);
 	assert(pResults->m_rotation <= 3);
@@ -1692,7 +1890,7 @@ static void encode_bc7_block(void* pBlock, const bc7_optimization_results* pResu
 	uint8_t alpha_selectors[16];
 	memcpy(alpha_selectors, pResults->m_alpha_selectors, 16);
 
-	color_quad_u8 low[3], high[3];
+	color_rgba low[3], high[3];
 	memcpy(low, pResults->m_low, sizeof(low));
 	memcpy(high, pResults->m_high, sizeof(high));
 
@@ -1729,14 +1927,14 @@ static void encode_bc7_block(void* pBlock, const bc7_optimization_results* pResu
 			{
 				for (uint32_t q = 0; q < 3; q++)
 				{
-					uint8_t t = low[k][q];
-					low[k][q] = high[k][q];
-					high[k][q] = t;
+					uint8_t t = low[k].m_c[q];
+					low[k].m_c[q] = high[k].m_c[q];
+					high[k].m_c[q] = t;
 				}
 			}
 			else
 			{
-				color_quad_u8 tmp = low[k];
+				color_rgba tmp = low[k];
 				low[k] = high[k];
 				high[k] = tmp;
 			}
@@ -1760,9 +1958,9 @@ static void encode_bc7_block(void* pBlock, const bc7_optimization_results* pResu
 					if (pPartition[i] == k)
 						alpha_selectors[i] = (uint8_t)((num_alpha_indices - 1) - alpha_selectors[i]);
 
-				uint8_t t = low[k].a;
-				low[k].a = high[k].a;
-				high[k].a = t;
+				uint8_t t = low[k].m_c[3];
+				low[k].m_c[3] = high[k].m_c[3];
+				high[k].m_c[3] = t;
 			}
 		}
 	}
@@ -1787,8 +1985,8 @@ static void encode_bc7_block(void* pBlock, const bc7_optimization_results* pResu
 	{
 		for (uint32_t subset = 0; subset < total_subsets; subset++)
 		{
-			set_block_bits(pBlock_bytes, low[subset][comp], (comp == 3) ? g_bc7_alpha_precision_table[best_mode] : g_bc7_color_precision_table[best_mode], &cur_bit_ofs);
-			set_block_bits(pBlock_bytes, high[subset][comp], (comp == 3) ? g_bc7_alpha_precision_table[best_mode] : g_bc7_color_precision_table[best_mode], &cur_bit_ofs);
+			set_block_bits(pBlock_bytes, low[subset].m_c[comp], (comp == 3) ? g_bc7_alpha_precision_table[best_mode] : g_bc7_color_precision_table[best_mode], &cur_bit_ofs);
+			set_block_bits(pBlock_bytes, high[subset].m_c[comp], (comp == 3) ? g_bc7_alpha_precision_table[best_mode] : g_bc7_color_precision_table[best_mode], &cur_bit_ofs);
 		}
 	}
 
@@ -1838,16 +2036,16 @@ static void encode_bc7_block(void* pBlock, const bc7_optimization_results* pResu
 	assert(cur_bit_ofs == 128);
 }
 
-static void handle_alpha_block_mode5(const color_quad_u8* pPixels, const bc7enc_compress_block_params* pComp_params, color_cell_compressor_params* pParams, uint32_t lo_a, uint32_t hi_a, bc7_optimization_results* pOpt_results5, uint64_t* pMode5_err, uint64_t* pMode5_alpha_err)
+static void handle_alpha_block_mode5(const color_rgba* pPixels, const bc7enc_compress_block_params* pComp_params, color_cell_compressor_params* pParams, uint32_t lo_a, uint32_t hi_a, bc7_optimization_results* pOpt_results5, uint64_t* pMode5_err, uint64_t* pMode5_alpha_err)
 {
 	pParams->m_pSelector_weights = g_bc7_weights2;
 	pParams->m_pSelector_weightsx = (const vec4F*)g_bc7_weights2x;
 	pParams->m_num_selector_weights = 4;
 
 	pParams->m_comp_bits = 7;
-	pParams->m_has_pbits = BC7ENC_FALSE;
-	pParams->m_endpoints_share_pbit = BC7ENC_FALSE;
-	pParams->m_has_alpha = BC7ENC_FALSE;
+	pParams->m_has_pbits = false;
+	pParams->m_endpoints_share_pbit = false;
+	pParams->m_has_alpha = false;
 
 	pParams->m_perceptual = pComp_params->m_perceptual;
 
@@ -1869,8 +2067,8 @@ static void handle_alpha_block_mode5(const color_quad_u8* pPixels, const bc7enc_
 	if (lo_a == hi_a)
 	{
 		*pMode5_alpha_err = 0;
-		pOpt_results5->m_low[0].a = (uint8_t)lo_a;
-		pOpt_results5->m_high[0].a = (uint8_t)hi_a;
+		pOpt_results5->m_low[0].m_c[3] = (uint8_t)lo_a;
+		pOpt_results5->m_high[0].m_c[3] = (uint8_t)hi_a;
 		memset(pOpt_results5->m_alpha_selectors, 0, sizeof(pOpt_results5->m_alpha_selectors));
 	}
 	else
@@ -1893,7 +2091,7 @@ static void handle_alpha_block_mode5(const color_quad_u8* pPixels, const bc7enc_
 			uint64_t trial_alpha_err = 0;
 			for (uint32_t i = 0; i < 16; i++)
 			{
-				const int32_t a = pParams->m_pPixels[i].a;
+				const int32_t a = pParams->m_pPixels[i].m_c[3];
 
 				int s = 0;
 				int32_t be = iabs32(a - vals[0]);
@@ -1911,8 +2109,8 @@ static void handle_alpha_block_mode5(const color_quad_u8* pPixels, const bc7enc_
 			if (trial_alpha_err < *pMode5_alpha_err)
 			{
 				*pMode5_alpha_err = trial_alpha_err;
-				pOpt_results5->m_low[0].a = (uint8_t)lo_a;
-				pOpt_results5->m_high[0].a = (uint8_t)hi_a;
+				pOpt_results5->m_low[0].m_c[3] = (uint8_t)lo_a;
+				pOpt_results5->m_high[0].m_c[3] = (uint8_t)hi_a;
 				memcpy(pOpt_results5->m_alpha_selectors, trial_alpha_selectors, sizeof(pOpt_results5->m_alpha_selectors));
 			}
 
@@ -1938,35 +2136,44 @@ static void handle_alpha_block_mode5(const color_quad_u8* pPixels, const bc7enc_
 	}
 }
 
-static void handle_alpha_block(void *pBlock, const color_quad_u8 *pPixels, const bc7enc_compress_block_params *pComp_params, color_cell_compressor_params *pParams)
+static void handle_alpha_block(void *pBlock, const color_rgba *pPixels, const bc7enc_compress_block_params *pComp_params, color_cell_compressor_params *pParams)
 {
+	assert((pComp_params->m_mode_mask & (1 << 6)) || (pComp_params->m_mode_mask & (1 << 5)) || (pComp_params->m_mode_mask & (1 << 7)));
+
 	pParams->m_pSelector_weights = g_bc7_weights4;
 	pParams->m_pSelector_weightsx = (const vec4F *)g_bc7_weights4x;
 	pParams->m_num_selector_weights = 16;
 	pParams->m_comp_bits = 7;
-    pParams->m_has_pbits = BC7ENC_TRUE;
-    pParams->m_endpoints_share_pbit = BC7ENC_FALSE;
-    pParams->m_has_alpha = BC7ENC_TRUE;
+	pParams->m_has_pbits = true;
+	pParams->m_endpoints_share_pbit = false;
+	pParams->m_has_alpha = true;
 	pParams->m_perceptual = pComp_params->m_perceptual;
 	pParams->m_num_pixels = 16;
 	pParams->m_pPixels = pPixels;
 		
 	bc7_optimization_results opt_results6, opt_results5, opt_results7;
-	
 	color_cell_compressor_results results6;
-	results6.m_pSelectors = opt_results6.m_selectors;
+	memset(&results6, 0, sizeof(results6));
+
+	uint64_t best_err = UINT64_MAX;
+	uint32_t best_mode = 0;
 	uint8_t selectors_temp[16];
-	results6.m_pSelectors_temp = selectors_temp;
 
-	uint64_t best_err = color_cell_compression(6, pParams, &results6, pComp_params);
-	uint32_t best_mode = 6;
+	if (pComp_params->m_mode_mask & (1 << 6))
+	{
+		results6.m_pSelectors = opt_results6.m_selectors;
+		results6.m_pSelectors_temp = selectors_temp;
+
+		best_err = (uint64_t)(color_cell_compression(6, pParams, &results6, pComp_params) * pComp_params->m_mode6_error_weight + .5f);
+		best_mode = 6;
+	}
 
-	if ((best_err > 0) && (pComp_params->m_use_mode5_for_alpha))
+	if ((best_err > 0) && (pComp_params->m_mode_mask & (1 << 5)))
 	{
 		uint32_t lo_a = 255, hi_a = 0;
 		for (uint32_t i = 0; i < 16; i++)
 		{
-			uint32_t a = pPixels[i].a;
+			uint32_t a = pPixels[i].m_c[3];
 			lo_a = minimumu(lo_a, a);
 			hi_a = maximumu(hi_a, a);
 		}
@@ -1974,6 +2181,8 @@ static void handle_alpha_block(void *pBlock, const color_quad_u8 *pPixels, const
 		uint64_t mode5_err, mode5_alpha_err;
 		handle_alpha_block_mode5(pPixels, pComp_params, pParams, lo_a, hi_a, &opt_results5, &mode5_err, &mode5_alpha_err);
 
+		mode5_err = (uint64_t)(mode5_err * pComp_params->m_mode5_error_weight + .5f);
+
 		if (mode5_err < best_err)
 		{
 			best_err = mode5_err;
@@ -1981,7 +2190,7 @@ static void handle_alpha_block(void *pBlock, const color_quad_u8 *pPixels, const
 		}
 	}
 
-	if ((best_err > 0) && (pComp_params->m_use_mode7_for_alpha))
+	if ((best_err > 0) && (pComp_params->m_mode_mask & (1 << 7)))
 	{
 		const uint32_t trial_partition = estimate_partition(pPixels, pComp_params, pParams->m_weights, 7);
 
@@ -1989,13 +2198,13 @@ static void handle_alpha_block(void *pBlock, const color_quad_u8 *pPixels, const
 		pParams->m_pSelector_weightsx = (const vec4F*)g_bc7_weights2x;
 		pParams->m_num_selector_weights = 4;
 		pParams->m_comp_bits = 5;
-		pParams->m_has_pbits = BC7ENC_TRUE;
-		pParams->m_endpoints_share_pbit = BC7ENC_FALSE;
-		pParams->m_has_alpha = BC7ENC_TRUE;
+		pParams->m_has_pbits = true;
+		pParams->m_endpoints_share_pbit = false;
+		pParams->m_has_alpha = true;
 
 		const uint8_t* pPartition = &g_bc7_partition2[trial_partition * 16];
 
-		color_quad_u8 subset_colors[2][16];
+		color_rgba subset_colors[2][16];
 
 		uint32_t subset_total_colors7[2] = { 0, 0 };
 
@@ -2022,14 +2231,16 @@ static void handle_alpha_block(void *pBlock, const color_quad_u8 *pPixels, const
 			pResults->m_pSelectors_temp = selectors_temp;
 			uint64_t err = color_cell_compression(7, pParams, pResults, pComp_params);
 			trial_err += err;
-			if (trial_err > best_err)
+			if ((uint64_t)(trial_err * pComp_params->m_mode7_error_weight + .5f) > best_err)
 				break;
 
 		} // subset
 
-		if (trial_err < best_err)
+		const uint64_t mode7_trial_err = (uint64_t)(trial_err * pComp_params->m_mode7_error_weight + .5f);
+
+		if (mode7_trial_err < best_err)
 		{
-			best_err = trial_err;
+			best_err = mode7_trial_err;
 			best_mode = 7;
 			opt_results7.m_mode = 7;
 			opt_results7.m_partition = trial_partition;
@@ -2073,43 +2284,56 @@ static void handle_alpha_block(void *pBlock, const color_quad_u8 *pPixels, const
 
 		encode_bc7_block(pBlock, &opt_results6);
 	}
+	else
+	{
+		assert(0);
+	}
 }
 
-static void handle_opaque_block(void *pBlock, const color_quad_u8 *pPixels, const bc7enc_compress_block_params *pComp_params, color_cell_compressor_params *pParams)
+static void handle_opaque_block(void *pBlock, const color_rgba *pPixels, const bc7enc_compress_block_params *pComp_params, color_cell_compressor_params *pParams)
 {
+	assert((pComp_params->m_mode_mask & (1 << 6)) || (pComp_params->m_mode_mask & (1 << 1)));
+
 	uint8_t selectors_temp[16];
-	
-	// Mode 6
+		
 	bc7_optimization_results opt_results;
-	
-	pParams->m_pSelector_weights = g_bc7_weights4;
-	pParams->m_pSelector_weightsx = (const vec4F *)g_bc7_weights4x;
-	pParams->m_num_selector_weights = 16;
-	pParams->m_comp_bits = 7;
-	pParams->m_has_pbits = BC7ENC_TRUE;
-	pParams->m_endpoints_share_pbit = BC7ENC_FALSE;
+
+	uint64_t best_err = UINT64_MAX;
+		
 	pParams->m_perceptual = pComp_params->m_perceptual;
 	pParams->m_num_pixels = 16;
 	pParams->m_pPixels = pPixels;
-	pParams->m_has_alpha = BC7ENC_FALSE;
+	pParams->m_has_alpha = false;
 
-	color_cell_compressor_results results6;
-	results6.m_pSelectors = opt_results.m_selectors;
-	results6.m_pSelectors_temp = selectors_temp;
-
-	uint64_t best_err = color_cell_compression(6, pParams, &results6, pComp_params);
-	
-	opt_results.m_mode = 6;
 	opt_results.m_partition = 0;
-	opt_results.m_low[0] = results6.m_low_endpoint;
-	opt_results.m_high[0] = results6.m_high_endpoint;
-	opt_results.m_pbits[0][0] = results6.m_pbits[0];
-	opt_results.m_pbits[0][1] = results6.m_pbits[1];
 	opt_results.m_index_selector = 0;
 	opt_results.m_rotation = 0;
 
+	// Mode 6
+	if (pComp_params->m_mode_mask & (1 << 6))
+	{
+		pParams->m_pSelector_weights = g_bc7_weights4;
+		pParams->m_pSelector_weightsx = (const vec4F*)g_bc7_weights4x;
+		pParams->m_num_selector_weights = 16;
+		pParams->m_comp_bits = 7;
+		pParams->m_has_pbits = true;
+		pParams->m_endpoints_share_pbit = false;
+
+		color_cell_compressor_results results6;
+		results6.m_pSelectors = opt_results.m_selectors;
+		results6.m_pSelectors_temp = selectors_temp;
+
+		best_err = (uint64_t)(color_cell_compression(6, pParams, &results6, pComp_params) * pComp_params->m_mode6_error_weight + .5f);
+
+		opt_results.m_mode = 6;
+		opt_results.m_low[0] = results6.m_low_endpoint;
+		opt_results.m_high[0] = results6.m_high_endpoint;
+		opt_results.m_pbits[0][0] = results6.m_pbits[0];
+		opt_results.m_pbits[0][1] = results6.m_pbits[1];
+	}
+
 	// Mode 1
-	if ((best_err > 0) && (pComp_params->m_max_partitions_mode > 0))
+	if ((best_err > 0) && (pComp_params->m_max_partitions > 0) && (pComp_params->m_mode_mask & (1 << 1)))
 	{
 		const uint32_t trial_partition = estimate_partition(pPixels, pComp_params, pParams->m_weights, 1);
 		
@@ -2117,12 +2341,12 @@ static void handle_opaque_block(void *pBlock, const color_quad_u8 *pPixels, cons
 		pParams->m_pSelector_weightsx = (const vec4F *)g_bc7_weights3x;
 		pParams->m_num_selector_weights = 8;
 		pParams->m_comp_bits = 6;
-		pParams->m_has_pbits = BC7ENC_TRUE;
-		pParams->m_endpoints_share_pbit = BC7ENC_TRUE;
+		pParams->m_has_pbits = true;
+		pParams->m_endpoints_share_pbit = true;
 
 		const uint8_t *pPartition = &g_bc7_partition2[trial_partition * 16];
 
-		color_quad_u8 subset_colors[2][16];
+		color_rgba subset_colors[2][16];
 
 		uint32_t subset_total_colors1[2] = { 0, 0 };
 
@@ -2150,14 +2374,15 @@ static void handle_opaque_block(void *pBlock, const color_quad_u8 *pPixels, cons
 			uint64_t err = color_cell_compression(1, pParams, pResults, pComp_params);
 			
 			trial_err += err;
-			if (trial_err > best_err)
+			if ((uint64_t)(trial_err * pComp_params->m_mode1_error_weight + .5f) > best_err)
 				break;
 
 		} // subset
 
-		if (trial_err < best_err)
+		const uint64_t mode1_trial_err = (uint64_t)(trial_err * pComp_params->m_mode1_error_weight + .5f);
+		if (mode1_trial_err < best_err)
 		{
-			best_err = trial_err;
+			best_err = mode1_trial_err;
 			opt_results.m_mode = 1;
 			opt_results.m_partition = trial_partition;
 			for (uint32_t subset = 0; subset < 2; subset++)
@@ -2174,11 +2399,11 @@ static void handle_opaque_block(void *pBlock, const color_quad_u8 *pPixels, cons
 	encode_bc7_block(pBlock, &opt_results);
 }
 
-bc7enc_bool bc7enc_compress_block(void *pBlock, const void *pPixelsRGBA, const bc7enc_compress_block_params *pComp_params)
+bool bc7enc_compress_block(void *pBlock, const void *pPixelsRGBA, const bc7enc_compress_block_params *pComp_params)
 {
 	assert(g_bc7_mode_1_optimal_endpoints[255][0].m_hi != 0);
 
-	const color_quad_u8 *pPixels = (const color_quad_u8 *)(pPixelsRGBA);
+	const color_rgba *pPixels = (const color_rgba *)(pPixelsRGBA);
 
 	color_cell_compressor_params params;
 	if (pComp_params->m_perceptual)
@@ -2193,25 +2418,133 @@ bc7enc_bool bc7enc_compress_block(void *pBlock, const void *pPixelsRGBA, const b
 	}
 	else
 		memcpy(params.m_weights, pComp_params->m_weights, sizeof(params.m_weights));
+	
+	if (pComp_params->m_force_alpha)
+	{
+		handle_alpha_block(pBlock, pPixels, pComp_params, &params);
+		return true;
+	}
 
 	for (uint32_t i = 0; i < 16; i++)
 	{
-		if (pPixels[i].a < 255)
+		if (pPixels[i].m_c[3] < 255)
 		{
 			handle_alpha_block(pBlock, pPixels, pComp_params, &params);
-			return BC7ENC_TRUE;
+			return true;
 		}
 	}
 	handle_opaque_block(pBlock, pPixels, pComp_params, &params);
-	return BC7ENC_FALSE;
+	return false;
 }
 
+/*
+static const uint8_t g_tdefl_small_dist_extra[512] =
+{
+	0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5,
+	5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+	7, 7, 7, 7, 7, 7, 7, 7
+};
+
+static const uint8_t g_tdefl_large_dist_extra[128] =
+{
+	0, 0, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+	12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+	13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13
+};
+
+static inline uint32_t compute_match_cost_estimate(uint32_t dist, uint32_t match_len_in_bytes)
+{
+	assert(match_len_in_bytes <= 258);
+
+	uint32_t len_cost = 6;
+	if (match_len_in_bytes >= 12)
+		len_cost = 9;
+	else if (match_len_in_bytes >= 8)
+		len_cost = 8;
+	else if (match_len_in_bytes >= 6)
+		len_cost = 7;
+
+	uint32_t dist_cost = 5;
+	if (dist < 512)
+		dist_cost += g_tdefl_small_dist_extra[dist & 511];
+	else
+	{
+		dist_cost += g_tdefl_large_dist_extra[std::min<uint32_t>(dist, 32767) >> 8];
+		while (dist >= 32768)
+		{
+			dist_cost++;
+			dist >>= 1;
+		}
+	}
+	return len_cost + dist_cost;
+}
+*/
+class tracked_stat
+{
+public:
+	tracked_stat() { clear(); }
+
+	void clear() { m_num = 0; m_total = 0; m_total2 = 0; }
+
+	void update(uint32_t val) { m_num++; m_total += val; m_total2 += val * val; }
+
+	tracked_stat& operator += (uint32_t val) { update(val); return *this; }
+
+	uint32_t get_number_of_values() { return m_num; }
+	uint64_t get_total() const { return m_total; }
+	uint64_t get_total2() const { return m_total2; }
+
+	float get_average() const { return m_num ? (float)m_total / m_num : 0.0f; };
+	float get_std_dev() const { return m_num ? sqrtf((float)(m_num * m_total2 - m_total * m_total)) / m_num : 0.0f; }
+	float get_variance() const { float s = get_std_dev(); return s * s; }
+
+private:
+	uint32_t m_num;
+	uint64_t m_total;
+	uint64_t m_total2;
+};
+
+/*
+static inline float compute_block_max_std_dev(const color_rgba* pPixels)
+{
+	tracked_stat r_stats, g_stats, b_stats, a_stats;
+
+	for (uint32_t i = 0; i < 16; i++)
+	{
+		r_stats.update(pPixels[i].m_c[0]);
+		g_stats.update(pPixels[i].m_c[1]);
+		b_stats.update(pPixels[i].m_c[2]);
+		a_stats.update(pPixels[i].m_c[3]);
+	}
+
+	return std::max<float>(std::max<float>(std::max(r_stats.get_std_dev(), g_stats.get_std_dev()), b_stats.get_std_dev()), a_stats.get_std_dev());
+}
+*/
+struct bc7_block
+{
+	uint8_t m_bytes[16];
+
+	uint32_t get_mode() const
+	{
+		uint32_t bc7_mode = 0;
+		while (((m_bytes[0] & (1 << bc7_mode)) == 0) && (bc7_mode < 8))
+			bc7_mode++;
+		return bc7_mode;
+	}
+};
+
 /*
 ------------------------------------------------------------------------------
 This software is available under 2 licenses -- choose whichever you prefer.
+If you use this software in a product, attribution / credits is requested but not required.
 ------------------------------------------------------------------------------
 ALTERNATIVE A - MIT License
-Copyright(c) 2020 Richard Geldreich, Jr.
+Copyright(c) 2020-2021 Richard Geldreich, Jr.
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files(the "Software"), to deal in
 the Software without restriction, including without limitation the rights to
diff --git a/libkram/bc7enc/bc7enc.h b/libkram/bc7enc/bc7enc.h
index 2dbd9101..8794c15d 100644
--- a/libkram/bc7enc/bc7enc.h
+++ b/libkram/bc7enc/bc7enc.h
@@ -1,23 +1,22 @@
 // File: bc7enc.h - Richard Geldreich, Jr. - MIT license or public domain (see end of bc7enc.c)
+// If you use this software in a product, attribution / credits is requested but not required.
 #include <stdlib.h>
 #include <stdint.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
+#include <stdio.h>
+#include <string.h>
 
 #define BC7ENC_BLOCK_SIZE (16)
-#define BC7ENC_MAX_PARTITIONS1 (64)
+#define BC7ENC_MAX_PARTITIONS (64)
 #define BC7ENC_MAX_UBER_LEVEL (4)
 
-typedef uint8_t bc7enc_bool;
-#define BC7ENC_TRUE (1)
-#define BC7ENC_FALSE (0)
+struct color_rgba { uint8_t m_c[4]; };
 
-typedef struct 
+struct bc7enc_compress_block_params
 {
-	// m_max_partitions_mode may range from 0 (disables mode 1) to BC7ENC_MAX_PARTITIONS1. The higher this value, the slower the compressor, but the higher the quality.
-	uint32_t m_max_partitions_mode;
+	uint32_t m_mode_mask;
+
+	// m_max_partitions may range from 0 (disables mode 1) to BC7ENC_MAX_PARTITIONS. The higher this value, the slower the compressor, but the higher the quality.
+	uint32_t m_max_partitions;
 	
 	// Relative RGBA or YCbCrA weights.
 	uint32_t m_weights[4];
@@ -26,23 +25,58 @@ typedef struct
 	uint32_t m_uber_level;
 
 	// If m_perceptual is true, colorspace error is computed in YCbCr space, otherwise RGB.
-	bc7enc_bool m_perceptual;
+	bool m_perceptual;
 
 	// Set m_try_least_squares to false for slightly faster/lower quality compression.
-	bc7enc_bool m_try_least_squares;
+	bool m_try_least_squares;
 	
-	// When m_mode_partition_estimation_filterbank, the mode1 partition estimator skips lesser used partition patterns unless they are strongly predicted to be potentially useful.
+	// When m_mode17_partition_estimation_filterbank, the mode1 partition estimator skips lesser used partition patterns unless they are strongly predicted to be potentially useful.
 	// There's a slight loss in quality with this enabled (around .08 dB RGB PSNR or .05 dB Y PSNR), but up to a 11% gain in speed depending on the other settings.
-	bc7enc_bool m_mode_partition_estimation_filterbank;
+	bool m_mode17_partition_estimation_filterbank;
+
+	bool m_force_alpha;
+
+	bool m_force_selectors;
+	uint8_t m_selectors[16];
+
+	bool m_quant_mode6_endpoints;
+	bool m_bias_mode1_pbits;
 
-	bc7enc_bool m_use_mode5_for_alpha;
-	bc7enc_bool m_use_mode7_for_alpha;
+	float m_pbit1_weight;
 
-} bc7enc_compress_block_params;
+	float m_mode1_error_weight;
+	float m_mode5_error_weight;
+	float m_mode6_error_weight;
+	float m_mode7_error_weight;
+
+	float m_low_frequency_partition_weight;
+
+	void clear()
+	{
+		memset(this, 0, sizeof(*this));
+	}
+
+	void print()
+	{
+		printf("Mode mask: 0x%X\n", m_mode_mask);
+		printf("Max partitions: %u\n", m_max_partitions);
+		printf("Weights: %u %u %u %u\n", m_weights[0], m_weights[1], m_weights[2], m_weights[3]);
+		printf("Uber level: %u\n", m_uber_level);
+		printf("Perceptual: %u\n", m_perceptual);
+		printf("Try least squares: %u\n", m_try_least_squares);
+		printf("Mode 1/7 partition estimation filterbank: %u\n", m_mode17_partition_estimation_filterbank);
+		printf("Force alpha: %u\n", m_force_alpha);
+		printf("Quant mode 6 endpoints: %u\n", m_quant_mode6_endpoints);
+		printf("Bias mode 1 p-bits: %u\n", m_bias_mode1_pbits);
+		printf("p-bit 1 weight: %f\n", m_pbit1_weight);
+		printf("Mode error weights: %f %f %f %f\n", m_mode1_error_weight, m_mode5_error_weight, m_mode6_error_weight, m_mode7_error_weight);
+		printf("Low frequency partition weight: %f\n", m_low_frequency_partition_weight);
+	}
+};
 
 inline void bc7enc_compress_block_params_init_linear_weights(bc7enc_compress_block_params *p)
 {
-	p->m_perceptual = BC7ENC_FALSE;
+	p->m_perceptual = false;
 	p->m_weights[0] = 1;
 	p->m_weights[1] = 1;
 	p->m_weights[2] = 1;
@@ -51,7 +85,7 @@ inline void bc7enc_compress_block_params_init_linear_weights(bc7enc_compress_blo
 
 inline void bc7enc_compress_block_params_init_perceptual_weights(bc7enc_compress_block_params *p)
 {
-	p->m_perceptual = BC7ENC_TRUE;
+	p->m_perceptual = true;
 	p->m_weights[0] = 128;
 	p->m_weights[1] = 64;
 	p->m_weights[2] = 16;
@@ -60,23 +94,30 @@ inline void bc7enc_compress_block_params_init_perceptual_weights(bc7enc_compress
 
 inline void bc7enc_compress_block_params_init(bc7enc_compress_block_params *p)
 {
-	p->m_max_partitions_mode = BC7ENC_MAX_PARTITIONS1;
-	p->m_try_least_squares = BC7ENC_TRUE;
-	p->m_mode_partition_estimation_filterbank = BC7ENC_TRUE;
+	p->m_mode_mask = UINT32_MAX;
+	p->m_max_partitions = BC7ENC_MAX_PARTITIONS;
+	p->m_try_least_squares = true;
+	p->m_mode17_partition_estimation_filterbank = true;
 	p->m_uber_level = 0;
-	p->m_use_mode5_for_alpha = BC7ENC_TRUE;
-	p->m_use_mode7_for_alpha = BC7ENC_TRUE;
+	p->m_force_selectors = false;
+	p->m_force_alpha = false;
+	p->m_quant_mode6_endpoints = false;
+	p->m_bias_mode1_pbits = false;
+	p->m_pbit1_weight = 1.0f;
+	p->m_mode1_error_weight = 1.0f;
+	p->m_mode5_error_weight = 1.0f;
+	p->m_mode6_error_weight = 1.0f;
+	p->m_mode7_error_weight = 1.0f;
+	p->m_low_frequency_partition_weight = 1.0f;
 	bc7enc_compress_block_params_init_perceptual_weights(p);
 }
 
 // bc7enc_compress_block_init() MUST be called before calling bc7enc_compress_block() (or you'll get artifacts).
 void bc7enc_compress_block_init();
 
-// Packs a single block of 4x4=16 RGBA pixels (R first in memory) to 128-bit BC7 block pBlock, using either mode 1 and/or 6.
+// Packs a single block of 16x16 RGBA pixels (R first in memory) to 128-bit BC7 block pBlock, using either mode 1 and/or 6.
 // Alpha blocks will always use mode 6, and by default opaque blocks will use either modes 1 or 6.
-// Returns BC7ENC_TRUE if the block had any pixels with alpha < 255, otherwise it return BC7ENC_FALSE. (This is not an error code - a block is always encoded.)
-bc7enc_bool bc7enc_compress_block(void *pBlock, const void *pPixelsRGBA, const bc7enc_compress_block_params *pComp_params);
+// Returns true if the block had any pixels with alpha < 255, otherwise it return false. (This is not an error code - a block is always encoded.)
+bool bc7enc_compress_block(void *pBlock, const void *pPixelsRGBA, const bc7enc_compress_block_params *pComp_params);
+
 
-#ifdef __cplusplus
-}
-#endif
diff --git a/libkram/bc7enc/ert.cpp b/libkram/bc7enc/ert.cpp
new file mode 100644
index 00000000..c09b9668
--- /dev/null
+++ b/libkram/bc7enc/ert.cpp
@@ -0,0 +1,705 @@
+#include "ert.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include "utils.h"
+
+#define ERT_FAVOR_CONT_AND_REP0_MATCHES (1)
+#define ERT_FAVOR_REP0_MATCHES (0)
+
+namespace ert
+{
+	const uint32_t MAX_BLOCK_PIXELS = 12 * 12;
+	const uint32_t MAX_BLOCK_SIZE_IN_BYTES = 256;
+	const uint32_t MIN_MATCH_LEN = 3;
+	const float LITERAL_BITS = 13.0f;
+	const float MATCH_CONTINUE_BITS = 1.0f;
+	const float MATCH_REP0_BITS = 4.0f;
+
+	static inline float clampf(float value, float low, float high) { if (value < low) value = low; else if (value > high) value = high;	return value; }
+	template<typename F> inline F lerp(F a, F b, F s) { return a + (b - a) * s; }
+
+	static const uint8_t g_tdefl_small_dist_extra[512] =
+	{
+		0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5,
+		5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+		6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+		6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+		7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+		7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+		7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+		7, 7, 7, 7, 7, 7, 7, 7
+	};
+
+	static const uint8_t g_tdefl_large_dist_extra[128] =
+	{
+		0, 0, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+		12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+		13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13
+	};
+
+	static inline uint32_t compute_match_cost_estimate(uint32_t dist, uint32_t match_len_in_bytes)
+	{
+		assert(match_len_in_bytes <= 258);
+
+		uint32_t len_cost = 6;
+		if (match_len_in_bytes >= 12)
+			len_cost = 9;
+		else if (match_len_in_bytes >= 8)
+			len_cost = 8;
+		else if (match_len_in_bytes >= 6)
+			len_cost = 7;
+
+		uint32_t dist_cost = 5;
+		if (dist < 512)
+			dist_cost += g_tdefl_small_dist_extra[dist & 511];
+		else
+		{
+			dist_cost += g_tdefl_large_dist_extra[std::min<uint32_t>(dist, 32767) >> 8];
+			while (dist >= 32768)
+			{
+				dist_cost++;
+				dist >>= 1;
+			}
+		}
+		return len_cost + dist_cost;
+	}
+
+	class tracked_stat
+	{
+	public:
+		tracked_stat() { clear(); }
+
+		void clear() { m_num = 0; m_total = 0; m_total2 = 0; }
+
+		void update(uint32_t val) { m_num++; m_total += val; m_total2 += val * val; }
+
+		tracked_stat& operator += (uint32_t val) { update(val); return *this; }
+
+		uint32_t get_number_of_values() { return m_num; }
+		uint64_t get_total() const { return m_total; }
+		uint64_t get_total2() const { return m_total2; }
+
+		float get_average() const { return m_num ? (float)m_total / m_num : 0.0f; };
+		float get_std_dev() const { return m_num ? sqrtf((float)(m_num * m_total2 - m_total * m_total)) / m_num : 0.0f; }
+		float get_variance() const { float s = get_std_dev(); return s * s; }
+
+	private:
+		uint32_t m_num;
+		uint64_t m_total;
+		uint64_t m_total2;
+	};
+
+	static inline float compute_block_max_std_dev(const color_rgba* pPixels, uint32_t block_width, uint32_t block_height, uint32_t num_comps)
+	{
+		tracked_stat comp_stats[4];
+
+		for (uint32_t y = 0; y < block_height; y++)
+		{
+			for (uint32_t x = 0; x < block_width; x++)
+			{
+				const color_rgba* pPixel = pPixels + x + y * block_width;
+
+				for (uint32_t c = 0; c < num_comps; c++)
+					comp_stats[c].update(pPixel->m_c[c]);
+			}
+		}
+
+		float max_std_dev = 0.0f;
+		for (uint32_t i = 0; i < num_comps; i++)
+			max_std_dev = std::max(max_std_dev, comp_stats[i].get_std_dev());
+		return max_std_dev;
+	}
+
+	static inline float compute_block_mse(const color_rgba* pPixelsA, const color_rgba* pPixelsB, uint32_t block_width, uint32_t block_height, uint32_t total_block_pixels, uint32_t num_comps, const uint32_t weights[4], float one_over_total_color_weight)
+	{
+		uint64_t total_err = 0;
+
+		if ((block_width == 4) && (block_height == 4) && (num_comps == 4))
+		{
+			if ((weights[0] == 1) && (weights[1] == 1) && (weights[2] == 1) && (weights[3] == 1))
+			{
+				for (uint32_t i = 0; i < 16; i++)
+				{
+					const color_rgba* pA = pPixelsA + i;
+					const color_rgba* pB = pPixelsB + i;
+
+					const int dr = pA->m_c[0] - pB->m_c[0];
+					const int dg = pA->m_c[1] - pB->m_c[1];
+					const int db = pA->m_c[2] - pB->m_c[2];
+					const int da = pA->m_c[3] - pB->m_c[3];
+
+					total_err += dr * dr + dg * dg + db * db + da * da;
+				}
+			}
+			else
+			{
+				for (uint32_t i = 0; i < 16; i++)
+				{
+					const color_rgba* pA = pPixelsA + i;
+					const color_rgba* pB = pPixelsB + i;
+
+					const int dr = pA->m_c[0] - pB->m_c[0];
+					const int dg = pA->m_c[1] - pB->m_c[1];
+					const int db = pA->m_c[2] - pB->m_c[2];
+					const int da = pA->m_c[3] - pB->m_c[3];
+
+					total_err += weights[0] * dr * dr + weights[1] * dg * dg + weights[2] * db * db + weights[3] * da * da;
+				}
+			}
+		}
+		else if ((block_width == 4) && (block_height == 4) && (num_comps == 3))
+		{
+			for (uint32_t y = 0; y < 4; y++)
+			{
+				const uint32_t y_ofs = y * 4;
+				for (uint32_t x = 0; x < 4; x++)
+				{
+					const color_rgba* pA = pPixelsA + x + y_ofs;
+					const color_rgba* pB = pPixelsB + x + y_ofs;
+
+					const int dr = pA->m_c[0] - pB->m_c[0];
+					const int dg = pA->m_c[1] - pB->m_c[1];
+					const int db = pA->m_c[2] - pB->m_c[2];
+
+					total_err += weights[0] * dr * dr + weights[1] * dg * dg + weights[2] * db * db;
+				}
+			}
+		}
+		else if ((block_width == 4) && (block_height == 4) && (num_comps == 2))
+		{
+			for (uint32_t y = 0; y < 4; y++)
+			{
+				const uint32_t y_ofs = y * 4;
+				for (uint32_t x = 0; x < 4; x++)
+				{
+					const color_rgba* pA = pPixelsA + x + y_ofs;
+					const color_rgba* pB = pPixelsB + x + y_ofs;
+
+					const int dr = pA->m_c[0] - pB->m_c[0];
+					const int dg = pA->m_c[1] - pB->m_c[1];
+
+					total_err += weights[0] * dr * dr + weights[1] * dg * dg;
+				}
+			}
+		}
+		else if ((block_width == 4) && (block_height == 4) && (num_comps == 1))
+		{
+			for (uint32_t y = 0; y < 4; y++)
+			{
+				const uint32_t y_ofs = y * 4;
+				for (uint32_t x = 0; x < 4; x++)
+				{
+					const color_rgba* pA = pPixelsA + x + y_ofs;
+					const color_rgba* pB = pPixelsB + x + y_ofs;
+
+					const int dr = pA->m_c[0] - pB->m_c[0];
+
+					total_err += weights[0] * dr * dr;
+				}
+			}
+		}
+		else
+		{
+			for (uint32_t y = 0; y < block_height; y++)
+			{
+				const uint32_t y_ofs = y * block_width;
+				for (uint32_t x = 0; x < block_width; x++)
+				{
+					const color_rgba* pA = pPixelsA + x + y_ofs;
+					const color_rgba* pB = pPixelsB + x + y_ofs;
+
+					for (uint32_t c = 0; c < num_comps; c++)
+					{
+						const int d = pA->m_c[c] - pB->m_c[c];
+						total_err += weights[c] * d * d;
+					}
+				}
+			}
+		}
+
+		return total_err * (one_over_total_color_weight / total_block_pixels);
+	}	
+
+	uint32_t hash_hsieh(const uint8_t* pBuf, size_t len, uint32_t salt)
+	{
+		if (!pBuf || !len)
+			return 0;
+
+		uint32_t h = static_cast<uint32_t>(len + (salt << 16));
+
+		const uint32_t bytes_left = len & 3;
+		len >>= 2;
+
+		while (len--)
+		{
+			const uint16_t* pWords = reinterpret_cast<const uint16_t*>(pBuf);
+
+			h += pWords[0];
+
+			const uint32_t t = (pWords[1] << 11) ^ h;
+			h = (h << 16) ^ t;
+
+			pBuf += sizeof(uint32_t);
+
+			h += h >> 11;
+		}
+
+		switch (bytes_left)
+		{
+		case 1:
+			h += *reinterpret_cast<const signed char*>(pBuf);
+			h ^= h << 10;
+			h += h >> 1;
+			break;
+		case 2:
+			h += *reinterpret_cast<const uint16_t*>(pBuf);
+			h ^= h << 11;
+			h += h >> 17;
+			break;
+		case 3:
+			h += *reinterpret_cast<const uint16_t*>(pBuf);
+			h ^= h << 16;
+			h ^= (static_cast<signed char>(pBuf[sizeof(uint16_t)])) << 18;
+			h += h >> 11;
+			break;
+		default:
+			break;
+		}
+
+		h ^= h << 3;
+		h += h >> 5;
+		h ^= h << 4;
+		h += h >> 17;
+		h ^= h << 25;
+		h += h >> 6;
+
+		return h;
+	}
+
+	// BC7 entropy reduction transform with Deflate/LZMA/LZHAM optimizations
+	bool reduce_entropy(void* pBlocks, uint32_t num_blocks,
+		uint32_t total_block_stride_in_bytes, uint32_t block_size_to_optimize_in_bytes, uint32_t block_width, uint32_t block_height, uint32_t num_comps,
+		const color_rgba* pBlock_pixels, const reduce_entropy_params& params, uint32_t& total_modified,
+		pUnpack_block_func pUnpack_block_func, void* pUnpack_block_func_user_data,
+		std::vector<float>* pBlock_mse_scales)
+	{
+		assert(total_block_stride_in_bytes && block_size_to_optimize_in_bytes);
+		assert(total_block_stride_in_bytes >= block_size_to_optimize_in_bytes);
+		
+		assert(num_comps >= 1 && num_comps <= 4);
+		for (uint32_t i = num_comps; i < 4; i++)
+		{
+			assert(!params.m_color_weights[i]);
+			if (params.m_color_weights[i])
+				return false;
+		}
+
+		const uint32_t total_color_weight = params.m_color_weights[0] + params.m_color_weights[1] + params.m_color_weights[2] + params.m_color_weights[3];
+		assert(total_color_weight);
+		const float one_over_total_color_weight = 1.0f / total_color_weight;
+
+		assert((block_size_to_optimize_in_bytes >= MIN_MATCH_LEN) && (block_size_to_optimize_in_bytes <= MAX_BLOCK_SIZE_IN_BYTES));
+		if ((block_size_to_optimize_in_bytes < MIN_MATCH_LEN) || (block_size_to_optimize_in_bytes > MAX_BLOCK_SIZE_IN_BYTES))
+			return false;
+
+		uint8_t* pBlock_bytes = (uint8_t*)pBlocks;
+
+		const uint32_t total_block_pixels = block_width * block_height;
+		if (total_block_pixels > MAX_BLOCK_PIXELS)
+			return false;
+
+		const int total_blocks_to_check = std::max<uint32_t>(1U, params.m_lookback_window_size / total_block_stride_in_bytes);
+
+		std::vector<uint32_t> len_hist(MAX_BLOCK_SIZE_IN_BYTES + 1);
+		std::vector<uint32_t> second_len_hist(MAX_BLOCK_SIZE_IN_BYTES + 1);
+		uint32_t total_second_matches = 0;
+
+		int prev_match_window_ofs_to_favor_cont = -1, prev_match_dist_to_favor = -1;
+				
+		uint32_t total_smooth_blocks = 0;
+
+		const uint32_t HASH_SIZE = 8192;
+		uint32_t hash[HASH_SIZE];
+				
+		for (uint32_t block_index = 0; block_index < num_blocks; block_index++)
+		{
+			if ((block_index & 0xFF) == 0)
+				memset(hash, 0, sizeof(hash));
+
+			uint8_t* pOrig_block = &pBlock_bytes[block_index * total_block_stride_in_bytes];
+			const color_rgba* pPixels = &pBlock_pixels[block_index * total_block_pixels];
+
+			color_rgba decoded_block[MAX_BLOCK_PIXELS];
+			if (!(*pUnpack_block_func)(pOrig_block, decoded_block, block_index, pUnpack_block_func_user_data))
+				return false;
+
+			float cur_mse = compute_block_mse(pPixels, decoded_block, block_width, block_height, total_block_pixels, num_comps, params.m_color_weights, one_over_total_color_weight);
+
+			if ((params.m_skip_zero_mse_blocks) && (cur_mse == 0.0f))
+				continue;
+
+			const float max_std_dev = compute_block_max_std_dev(pPixels, block_width, block_height, num_comps);
+			
+			float yl = clampf(max_std_dev / params.m_max_smooth_block_std_dev, 0.0f, 1.0f);
+			yl = yl * yl;
+			float smooth_block_mse_scale = lerp(params.m_smooth_block_max_mse_scale, 1.0f, yl);
+
+			if (pBlock_mse_scales)
+			{
+				if ((*pBlock_mse_scales)[block_index] > 0.0f)
+				{
+					smooth_block_mse_scale = (*pBlock_mse_scales)[block_index];
+				}
+			}
+			
+			if (smooth_block_mse_scale > 1.0f)
+				total_smooth_blocks++;
+						
+			float cur_bits = (LITERAL_BITS * block_size_to_optimize_in_bytes);
+			float cur_t = cur_mse * smooth_block_mse_scale + cur_bits * params.m_lambda;
+
+			int first_block_to_check = std::max<int>(0, block_index - total_blocks_to_check);
+			int last_block_to_check = block_index - 1;
+
+			uint8_t best_block[MAX_BLOCK_SIZE_IN_BYTES];
+			memcpy(best_block, pOrig_block, block_size_to_optimize_in_bytes);
+
+			float best_t = cur_t;
+			uint32_t best_match_len = 0, best_match_src_window_ofs = 0, best_match_dst_window_ofs = 0, best_match_src_block_ofs = 0, best_match_dst_block_ofs = 0;
+			float best_match_bits = 0;
+
+			// Don't let thresh_ms_err be 0 to let zero error blocks have slightly increased distortion
+			const float thresh_ms_err = params.m_max_allowed_rms_increase_ratio * params.m_max_allowed_rms_increase_ratio * std::max(cur_mse, 1.0f);
+			
+			for (int prev_block_index = last_block_to_check; prev_block_index >= first_block_to_check; --prev_block_index)
+			{
+				const uint8_t* pPrev_blk = &pBlock_bytes[prev_block_index * total_block_stride_in_bytes];
+
+				for (uint32_t len = block_size_to_optimize_in_bytes; len >= MIN_MATCH_LEN; len--)
+				{
+					if (params.m_allow_relative_movement)
+					{
+						for (uint32_t src_ofs = 0; src_ofs <= (block_size_to_optimize_in_bytes - len); src_ofs++)
+						{
+							assert(len + src_ofs <= block_size_to_optimize_in_bytes);
+							
+							const uint32_t src_match_window_ofs = prev_block_index * total_block_stride_in_bytes + src_ofs;
+
+							for (uint32_t dst_ofs = 0; dst_ofs <= (block_size_to_optimize_in_bytes - len); dst_ofs++)
+							{
+								assert(len + dst_ofs <= block_size_to_optimize_in_bytes);
+								
+								const uint32_t dst_match_window_ofs = block_index * total_block_stride_in_bytes + dst_ofs;
+
+								const uint32_t match_dist = dst_match_window_ofs - src_match_window_ofs;
+																
+								float trial_match_bits, trial_total_bits;
+
+								uint32_t hs = hash_hsieh(pPrev_blk + src_ofs, len, dst_ofs);
+
+#if ERT_FAVOR_CONT_AND_REP0_MATCHES
+								// Continue a previous match (which would cross block boundaries)
+								if (((int)src_match_window_ofs == prev_match_window_ofs_to_favor_cont) && (dst_ofs == 0))
+								{
+									trial_match_bits = MATCH_CONTINUE_BITS;
+									trial_total_bits = (block_size_to_optimize_in_bytes - len) * LITERAL_BITS + MATCH_CONTINUE_BITS;
+								}
+								// Exploit REP0 matches
+								else if ((prev_match_dist_to_favor != -1) && (src_match_window_ofs == (dst_match_window_ofs - prev_match_dist_to_favor)))
+								{
+									trial_match_bits = MATCH_REP0_BITS;
+									trial_total_bits = (block_size_to_optimize_in_bytes - len) * LITERAL_BITS + MATCH_REP0_BITS;
+								}
+								else
+								{
+									trial_match_bits = (float)compute_match_cost_estimate(match_dist, len);
+									trial_total_bits = (block_size_to_optimize_in_bytes - len) * LITERAL_BITS + trial_match_bits;
+										
+									uint32_t hash_check = hash[hs & (HASH_SIZE - 1)];
+									if ((hash_check & 0xFF) == (block_index & 0xFF))
+									{
+										if ((hash_check >> 8) == (hs >> 8))
+											continue;
+									}
+								}
+#else
+								uint32_t hash_check = hash[hs & (HASH_SIZE - 1)];
+								if ((hash_check & 0xFF) == (block_index & 0xFF))
+								{
+									if ((hash_check >> 8) == (hs >> 8))
+										continue;
+								}
+#endif
+
+								hash[hs & (HASH_SIZE - 1)] = (hs & 0xFFFFFF00) | (block_index & 0xFF);
+
+								const float trial_total_bits_times_lambda = trial_total_bits * params.m_lambda;
+								
+								uint8_t trial_block[MAX_BLOCK_SIZE_IN_BYTES];
+								memcpy(trial_block, pOrig_block, block_size_to_optimize_in_bytes);
+								memcpy(trial_block + dst_ofs, pPrev_blk + src_ofs, len);
+
+								color_rgba decoded_trial_block[MAX_BLOCK_PIXELS];
+								if (!(*pUnpack_block_func)(trial_block, decoded_trial_block, block_index, pUnpack_block_func_user_data))
+									continue;
+
+								float trial_mse = compute_block_mse(pPixels, decoded_trial_block, block_width, block_height, total_block_pixels, num_comps, params.m_color_weights, one_over_total_color_weight);
+
+								if (trial_mse < thresh_ms_err)
+								{
+									float t = trial_mse * smooth_block_mse_scale + trial_total_bits_times_lambda;
+
+									if (t < best_t)
+									{
+										best_t = t;
+										memcpy(best_block, trial_block, block_size_to_optimize_in_bytes);
+										best_match_len = len;
+										best_match_src_window_ofs = src_match_window_ofs;
+										best_match_dst_window_ofs = dst_match_window_ofs;
+										best_match_src_block_ofs = src_ofs;
+										best_match_dst_block_ofs = dst_ofs;
+										best_match_bits = trial_match_bits;
+									}
+								}
+
+							} // dst_ofs
+						} // src_ofs
+					}
+					else
+					{
+						const uint32_t match_dist = (block_index - prev_block_index) * total_block_stride_in_bytes;
+
+						// Assume the block has 1 match and block_size_to_optimize_in_bytes-match_len literals.
+						const float trial_match_bits = (float)compute_match_cost_estimate(match_dist, len);
+						const float trial_total_bits = (block_size_to_optimize_in_bytes - len) * LITERAL_BITS + trial_match_bits;
+						const float trial_total_bits_times_lambda = trial_total_bits * params.m_lambda;
+
+						for (uint32_t ofs = 0; ofs <= (block_size_to_optimize_in_bytes - len); ofs++)
+						{
+							assert(len + ofs <= block_size_to_optimize_in_bytes);
+							
+							const uint32_t dst_match_window_ofs = block_index * total_block_stride_in_bytes + ofs;
+							const uint32_t src_match_window_ofs = prev_block_index * total_block_stride_in_bytes + ofs;
+
+							float trial_match_bits_to_use = trial_match_bits;
+							float trial_total_bits_times_lambda_to_use = trial_total_bits_times_lambda;
+														
+							uint32_t hs = hash_hsieh(pPrev_blk + ofs, len, ofs);
+
+#if ERT_FAVOR_CONT_AND_REP0_MATCHES
+							// Continue a previous match (which would cross block boundaries)
+							if (((int)src_match_window_ofs == prev_match_window_ofs_to_favor_cont) && (ofs == 0))
+							{
+								float continue_match_trial_bits = (block_size_to_optimize_in_bytes - len) * LITERAL_BITS + MATCH_CONTINUE_BITS;
+								trial_match_bits_to_use = MATCH_CONTINUE_BITS;
+								trial_total_bits_times_lambda_to_use = continue_match_trial_bits * params.m_lambda;
+							}
+							// Exploit REP0 matches
+							else if ((prev_match_dist_to_favor != -1) && (src_match_window_ofs == (dst_match_window_ofs - prev_match_dist_to_favor)))
+							{
+								float continue_match_trial_bits = (block_size_to_optimize_in_bytes - len) * LITERAL_BITS + MATCH_REP0_BITS;
+								trial_match_bits_to_use = MATCH_REP0_BITS;
+								trial_total_bits_times_lambda_to_use = continue_match_trial_bits * params.m_lambda;
+							}
+							else
+							{
+								uint32_t hash_check = hash[hs & (HASH_SIZE - 1)];
+								if ((hash_check & 0xFF) == (block_index & 0xFF))
+								{
+									if ((hash_check >> 8) == (hs >> 8))
+										continue;
+								}
+							}
+#else
+							uint32_t hash_check = hash[hs & (HASH_SIZE - 1)];
+							if ((hash_check & 0xFF) == (block_index & 0xFF))
+							{
+								if ((hash_check >> 8) == (hs >> 8))
+									continue;
+							}
+#endif
+
+							hash[hs & (HASH_SIZE - 1)] = (hs & 0xFFFFFF00) | (block_index & 0xFF);
+
+							uint8_t trial_block[MAX_BLOCK_SIZE_IN_BYTES];
+							memcpy(trial_block, pOrig_block, block_size_to_optimize_in_bytes);
+							memcpy(trial_block + ofs, pPrev_blk + ofs, len);
+
+							color_rgba decoded_trial_block[MAX_BLOCK_PIXELS];
+							if (!(*pUnpack_block_func)(trial_block, decoded_trial_block, block_index, pUnpack_block_func_user_data))
+								continue;
+
+							float trial_mse = compute_block_mse(pPixels, decoded_trial_block, block_width, block_height, total_block_pixels, num_comps, params.m_color_weights, one_over_total_color_weight);
+
+							if (trial_mse < thresh_ms_err)
+							{
+								float t = trial_mse * smooth_block_mse_scale + trial_total_bits_times_lambda_to_use;
+								
+								if (t < best_t)
+								{
+									best_t = t;
+									memcpy(best_block, trial_block, block_size_to_optimize_in_bytes);
+									best_match_len = len;
+									best_match_src_window_ofs = src_match_window_ofs;
+									best_match_dst_window_ofs = dst_match_window_ofs;
+									best_match_src_block_ofs = ofs;
+									best_match_dst_block_ofs = ofs;
+									best_match_bits = trial_match_bits_to_use;
+								}
+							}
+						} // ofs
+					}
+
+				} // len
+
+			} // prev_block_index
+
+			if (best_t < cur_t)
+			{
+				uint32_t best_second_match_len = 0, best_second_match_src_window_ofs = 0, best_second_match_dst_window_ofs = 0, best_second_match_src_block_ofs = 0, best_second_match_dst_block_ofs = 0;
+								
+				// Try injecting a second match, being sure it does't overlap with the first.
+				if ((params.m_try_two_matches) && (best_match_len <= (block_size_to_optimize_in_bytes - 3)))
+				{
+					uint8_t matched_flags[MAX_BLOCK_SIZE_IN_BYTES];
+					memset(matched_flags, 0, sizeof(matched_flags));
+					memset(matched_flags + best_match_dst_block_ofs, 1, best_match_len);
+
+					uint8_t orig_best_block[MAX_BLOCK_SIZE_IN_BYTES];
+					memcpy(orig_best_block, best_block, block_size_to_optimize_in_bytes);
+										
+					for (int prev_block_index = last_block_to_check; prev_block_index >= first_block_to_check; --prev_block_index)
+					{
+						const uint8_t* pPrev_blk = &pBlock_bytes[prev_block_index * total_block_stride_in_bytes];
+
+						const uint32_t match_dist = (block_index - prev_block_index) * total_block_stride_in_bytes;
+
+						for (uint32_t len = 3; len <= (block_size_to_optimize_in_bytes - best_match_len); len++)
+						{
+							const float trial_total_bits = (block_size_to_optimize_in_bytes - len - best_match_len) * LITERAL_BITS + compute_match_cost_estimate(match_dist, len) + best_match_bits;
+
+							const float trial_total_bits_times_lambda = trial_total_bits * params.m_lambda;
+
+							for (uint32_t ofs = 0; ofs <= (block_size_to_optimize_in_bytes - len); ofs++)
+							{
+								int i;
+								for (i = 0; i < (int)len; i++)
+									if (matched_flags[ofs + i])
+										break;
+								if (i != (int)len)
+									continue;
+
+								assert(len + ofs <= block_size_to_optimize_in_bytes);
+
+								const uint32_t dst_match_window_ofs = block_index * total_block_stride_in_bytes + ofs;
+								const uint32_t src_match_window_ofs = prev_block_index * total_block_stride_in_bytes + ofs;
+
+								uint8_t trial_block[MAX_BLOCK_SIZE_IN_BYTES];
+								memcpy(trial_block, orig_best_block, block_size_to_optimize_in_bytes);
+								memcpy(trial_block + ofs, pPrev_blk + ofs, len);
+
+								color_rgba decoded_trial_block[MAX_BLOCK_PIXELS];
+								if (!(*pUnpack_block_func)(trial_block, decoded_trial_block, block_index, pUnpack_block_func_user_data))
+									continue;
+
+								float trial_mse = compute_block_mse(pPixels, decoded_trial_block, block_width, block_height, total_block_pixels, num_comps, params.m_color_weights, one_over_total_color_weight);
+
+								if (trial_mse < thresh_ms_err)
+								{
+									float t = trial_mse * smooth_block_mse_scale + trial_total_bits_times_lambda;
+
+									if (t < best_t)
+									{
+										best_t = t;
+										memcpy(best_block, trial_block, block_size_to_optimize_in_bytes);
+										best_second_match_len = len;
+										best_second_match_src_window_ofs = src_match_window_ofs;
+										best_second_match_dst_window_ofs = dst_match_window_ofs;
+										best_second_match_src_block_ofs = ofs;
+										best_second_match_dst_block_ofs = ofs;
+									}
+								}
+							}
+						}
+					}
+				}
+
+				memcpy(pOrig_block, best_block, block_size_to_optimize_in_bytes);
+				total_modified++;
+
+				if ((best_second_match_len == 0) || (best_match_dst_window_ofs > best_second_match_dst_window_ofs))
+				{
+					int best_match_dist = best_match_dst_window_ofs - best_match_src_window_ofs;
+					assert(best_match_dist >= 1);
+					(void)best_match_dist;
+
+					if (block_size_to_optimize_in_bytes == total_block_stride_in_bytes)
+					{
+						// If the match goes all the way to the end of a block, we can try to continue it on the next encoded block.
+						if ((best_match_dst_block_ofs + best_match_len) == total_block_stride_in_bytes)
+							prev_match_window_ofs_to_favor_cont = best_match_src_window_ofs + best_match_len;
+						else
+							prev_match_window_ofs_to_favor_cont = -1;
+					}
+
+#if ERT_FAVOR_REP0_MATCHES
+					// Compute the window offset where a cheaper REP0 match would be available
+					prev_match_dist_to_favor = best_match_dist;
+#endif
+				}
+				else
+				{
+					int best_match_dist = best_second_match_dst_window_ofs - best_second_match_src_window_ofs;
+					assert(best_match_dist >= 1);
+					(void)best_match_dist;
+
+					if (block_size_to_optimize_in_bytes == total_block_stride_in_bytes)
+					{
+						// If the match goes all the way to the end of a block, we can try to continue it on the next encoded block.
+						if ((best_second_match_dst_block_ofs + best_second_match_len) == total_block_stride_in_bytes)
+							prev_match_window_ofs_to_favor_cont = best_second_match_src_window_ofs + best_second_match_len;
+						else
+							prev_match_window_ofs_to_favor_cont = -1;
+					}
+
+#if ERT_FAVOR_REP0_MATCHES
+					// Compute the window offset where a cheaper REP0 match would be available
+					prev_match_dist_to_favor = best_match_dist;
+#endif
+				}
+
+				len_hist[best_match_len]++;
+
+				if (best_second_match_len)
+				{
+					second_len_hist[best_second_match_len]++;
+					total_second_matches++;
+				}
+			}
+			else
+			{
+				prev_match_window_ofs_to_favor_cont = -1;
+			}
+						
+		} // block_index
+				
+		if (params.m_debug_output)
+		{
+			printf("Total smooth blocks: %3.2f%%\n", total_smooth_blocks * 100.0f / num_blocks);
+
+			printf("Match length histogram:\n");
+			for (uint32_t i = MIN_MATCH_LEN; i <= block_size_to_optimize_in_bytes; i++)
+				printf("%u%c", len_hist[i], (i < block_size_to_optimize_in_bytes) ? ',' : '\n');
+
+			printf("Total second matches: %u %3.2f%%\n", total_second_matches, total_second_matches * 100.0f / num_blocks);
+			printf("Secod match length histogram:\n");
+			for (uint32_t i = MIN_MATCH_LEN; i <= block_size_to_optimize_in_bytes; i++)
+				printf("%u%c", second_len_hist[i], (i < block_size_to_optimize_in_bytes) ? ',' : '\n');
+		}
+		
+		return true;
+	}
+
+} // namespace ert
+
diff --git a/libkram/bc7enc/ert.h b/libkram/bc7enc/ert.h
new file mode 100644
index 00000000..d387f527
--- /dev/null
+++ b/libkram/bc7enc/ert.h
@@ -0,0 +1,81 @@
+#pragma once
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <algorithm>
+#include <assert.h>
+#include <time.h>
+#include <vector>
+#include <string>
+
+namespace ert
+{
+	struct color_rgba { uint8_t m_c[4]; };
+
+	struct reduce_entropy_params
+	{
+		// m_lambda: The post-processor tries to reduce distortion*smooth_block_scale + rate*lambda (rate is approximate LZ bits and distortion is scaled MS error multiplied against the smooth block MSE weighting factor).
+		// Larger values push the postprocessor towards optimizing more for lower rate, and smaller values more for distortion. 0=minimal distortion.
+		float m_lambda;
+
+		// m_lookback_window_size: The number of bytes the encoder can look back from each block to find matches. The larger this value, the slower the encoder but the higher the quality per LZ compressed bit.
+		uint32_t m_lookback_window_size;
+
+		// m_max_allowed_rms_increase_ratio: How much the RMS error of a block is allowed to increase before a trial is rejected. 1.0=no increase allowed, 1.05=5% increase allowed, etc.
+		float m_max_allowed_rms_increase_ratio;
+
+		float m_max_smooth_block_std_dev;
+		float m_smooth_block_max_mse_scale;
+
+		uint32_t m_color_weights[4];
+				
+		bool m_try_two_matches;
+		bool m_allow_relative_movement;
+		bool m_skip_zero_mse_blocks;
+		bool m_debug_output;
+
+		reduce_entropy_params() { clear(); }
+
+		void clear()
+		{
+			m_lookback_window_size = 256;
+			m_lambda = 1.0f;
+			m_max_allowed_rms_increase_ratio = 10.0f;
+			m_max_smooth_block_std_dev = 18.0f;
+			m_smooth_block_max_mse_scale = 10.0f;
+			m_color_weights[0] = 1;
+			m_color_weights[1] = 1;
+			m_color_weights[2] = 1;
+			m_color_weights[3] = 1;
+			m_try_two_matches = false;
+			m_allow_relative_movement = false;
+			m_skip_zero_mse_blocks = false;
+			m_debug_output = false;
+		}
+
+		void print()
+		{
+			printf("lambda: %f\n", m_lambda);
+			printf("Lookback window size: %u\n", m_lookback_window_size);
+			printf("Max allowed RMS increase ratio: %f\n", m_max_allowed_rms_increase_ratio);
+			printf("Max smooth block std dev: %f\n", m_max_smooth_block_std_dev);
+			printf("Smooth block max MSE scale: %f\n", m_smooth_block_max_mse_scale);
+			printf("Color weights: %u %u %u %u\n", m_color_weights[0], m_color_weights[1], m_color_weights[2], m_color_weights[3]);
+			printf("Try two matches: %u\n", m_try_two_matches);
+			printf("Allow relative movement: %u\n", m_allow_relative_movement);
+			printf("Skip zero MSE blocks: %u\n", m_skip_zero_mse_blocks);
+		}
+	};
+
+	typedef bool (*pUnpack_block_func)(const void* pBlock, color_rgba* pPixels, uint32_t block_index, void* pUser_data);
+
+	// BC7 entropy reduction transform with Deflate/LZMA/LZHAM optimizations
+	bool reduce_entropy(void* pBlocks, uint32_t num_blocks,
+		uint32_t total_block_stride_in_bytes, uint32_t block_size_to_optimize_in_bytes, uint32_t block_width, uint32_t block_height, uint32_t num_comps,
+		const color_rgba* pBlock_pixels, const reduce_entropy_params& params, uint32_t& total_modified,
+		pUnpack_block_func pUnpack_block_func, void* pUnpack_block_func_user_data,
+		std::vector<float>* pBlock_mse_scales = nullptr);
+
+} // namespace ert
diff --git a/libkram/bc7enc/rdo_bc_encoder.cpp b/libkram/bc7enc/rdo_bc_encoder.cpp
new file mode 100644
index 00000000..44d39333
--- /dev/null
+++ b/libkram/bc7enc/rdo_bc_encoder.cpp
@@ -0,0 +1,1270 @@
+// rdo_bc_encoder.cpp
+#include "rdo_bc_encoder.h"
+
+#define RGBCX_IMPLEMENTATION
+#include "rgbcx.h"
+
+#define DECODE_BC4_TO_GRAYSCALE (0)
+
+#ifdef _MSC_VER
+#pragma warning (disable: 4127) // conditional expression is constant
+#endif
+
+using namespace utils;
+
+namespace rdo_bc
+{
+	static const char* get_dxgi_format_string(DXGI_FORMAT fmt)
+	{
+		switch (fmt)
+		{
+		case DXGI_FORMAT_BC1_UNORM: return "BC1_UNORM";
+		case DXGI_FORMAT_BC4_UNORM: return "BC4_UNORM";
+		case DXGI_FORMAT_BC3_UNORM: return "BC3_UNORM";
+		case DXGI_FORMAT_BC5_UNORM: return "BC5_UNORM";
+		case DXGI_FORMAT_BC7_UNORM: return "BC7_UNORM";
+		default: break;
+		}
+		return "?";
+	}
+
+	static std::vector<float> compute_block_mse_scales(const image_u8& source_image, uint32_t blocks_x, uint32_t blocks_y, uint32_t total_blocks, bool rdo_debug_output)
+	{
+		const float ULTRASMOOTH_BLOCK_STD_DEV_THRESHOLD = 2.9f;
+		const float DARK_THRESHOLD = 13.0f;
+		const float BRIGHT_THRESHOLD = 222.0f;
+		const float ULTRAMOOTH_BLOCK_MSE_SCALE = 120.0f;
+		const uint32_t ULTRASMOOTH_REGION_TOO_SMALL_THRESHOLD = 64;
+
+		image_u8 ultrasmooth_blocks_vis(blocks_x, blocks_y);
+
+		for (uint32_t by = 0; by < blocks_y; by++)
+		{
+			for (uint32_t bx = 0; bx < blocks_x; bx++)
+			{
+				color_quad_u8 block_pixels[16];
+				source_image.get_block(bx, by, 4, 4, block_pixels);
+
+				tracked_stat y_stats;
+				for (uint32_t y = 0; y < 4; y++)
+					for (uint32_t x = 0; x < 4; x++)
+					{
+						int l = block_pixels[x + y * 4].get_luma();
+						y_stats.update(l);
+					}
+
+				float max_std_dev = compute_block_max_std_dev((color_quad_u8*)block_pixels, 4, 4, 3);
+
+				float yl = max_std_dev / ULTRASMOOTH_BLOCK_STD_DEV_THRESHOLD;
+
+				yl = clamp(yl, 0.0f, 1.0f);
+				yl *= yl;
+
+				float y_avg = y_stats.get_mean();
+
+				if ((y_avg < DARK_THRESHOLD) || (y_avg >= BRIGHT_THRESHOLD))
+					yl = 1.0f;
+
+				int k = std::min<int>((int)(yl * 255.0f + .5f), 255);
+
+				ultrasmooth_blocks_vis.fill_box(bx, by, 1, 1, color_quad_u8((uint8_t)k, 255));
+			}
+		}
+
+		for (int pass = 0; pass < 1; pass++)
+		{
+			image_u8 next_vis(ultrasmooth_blocks_vis);
+
+			for (int y = 0; y < (int)blocks_y; y++)
+			{
+				for (int x = 0; x < (int)blocks_x; x++)
+				{
+					int m = 0;
+
+					for (int dy = -1; dy <= 1; dy++)
+						for (int dx = -1; dx <= 1; dx++)
+						{
+							if (ultrasmooth_blocks_vis.get_clamped(x + dx, y + dy).r == 255)
+								m = std::max<int>(m, ultrasmooth_blocks_vis.get_clamped(x + dx, y + dy).r);
+						}
+
+					next_vis(x, y).set((uint8_t)m, 255);
+				}
+			}
+
+			ultrasmooth_blocks_vis.swap(next_vis);
+		}
+
+		for (uint32_t pass = 0; pass < 32; pass++)
+		{
+			image_u8 next_vis(ultrasmooth_blocks_vis);
+			for (int y = 0; y < (int)blocks_y; y++)
+			{
+				for (int x = 0; x < (int)blocks_x; x++)
+				{
+					if (ultrasmooth_blocks_vis.get_clamped(x, y).r < 255)
+					{
+						int m = 0;
+
+						for (int dy = -1; dy <= 1; dy++)
+							for (int dx = -1; dx <= 1; dx++)
+								if (ultrasmooth_blocks_vis.get_clamped(x + dx, y + dy).r == 255)
+									m++;
+
+						if (m >= 5)
+							next_vis.set_pixel_clipped(x, y, color_quad_u8(255, 255, 255, 255));
+					}
+				}
+			}
+			ultrasmooth_blocks_vis.swap(next_vis);
+		}
+
+		image_u8 orig_ultrasmooth_blocks_vis(ultrasmooth_blocks_vis);
+
+		if (rdo_debug_output)
+		{
+			save_png("ultrasmooth_block_mask_pre_filter.png", ultrasmooth_blocks_vis, false);
+		}
+
+		for (uint32_t by = 0; by < blocks_y; by++)
+		{
+			for (uint32_t bx = 0; bx < blocks_x; bx++)
+			{
+				const bool is_ultrasmooth = ultrasmooth_blocks_vis(bx, by).r == 0;
+				if (!is_ultrasmooth)
+					continue;
+
+				std::vector<image_u8::pixel_coord> filled_pixels;
+				filled_pixels.reserve(256);
+
+				uint32_t total_set_pixels = ultrasmooth_blocks_vis.flood_fill(bx, by, color_quad_u8(255, 255, 255, 255), color_quad_u8(0, 0, 0, 255), &filled_pixels);
+
+				if (total_set_pixels < ULTRASMOOTH_REGION_TOO_SMALL_THRESHOLD)
+				{
+					for (uint32_t i = 0; i < filled_pixels.size(); i++)
+						orig_ultrasmooth_blocks_vis(filled_pixels[i].m_x, filled_pixels[i].m_y) = color_quad_u8(255, 255, 255, 255);
+				}
+
+			} // bx
+		} // by
+
+		ultrasmooth_blocks_vis = orig_ultrasmooth_blocks_vis;
+
+		if (rdo_debug_output)
+		{
+			save_png("ultrasmooth_block_mask.png", ultrasmooth_blocks_vis, false);
+		}
+
+		std::vector<float> block_mse_scales(total_blocks);
+
+		uint32_t total_ultrasmooth_blocks = 0;
+		for (uint32_t by = 0; by < blocks_y; by++)
+		{
+			for (uint32_t bx = 0; bx < blocks_x; bx++)
+			{
+				const bool is_ultrasmooth = ultrasmooth_blocks_vis(bx, by).r == 0;
+
+				block_mse_scales[bx + by * blocks_x] = is_ultrasmooth ? ULTRAMOOTH_BLOCK_MSE_SCALE : -1.0f;
+
+				total_ultrasmooth_blocks += is_ultrasmooth;
+			}
+		}
+
+		if (rdo_debug_output)
+			printf("Total ultrasmooth blocks: %3.2f%%\n", total_ultrasmooth_blocks * 100.0f / total_blocks);
+
+		return block_mse_scales;
+	}
+
+	rdo_bc_encoder::rdo_bc_encoder() :
+		m_pOrig_source_image(nullptr),
+		m_orig_width(0),
+		m_orig_height(0),
+		m_blocks_x(0),
+		m_blocks_y(0),
+		m_total_blocks(0),
+		m_bytes_per_block(0),
+		m_pixel_format_bpp(0),
+		m_total_texels(0),
+		m_has_alpha(false)
+	{
+	}
+
+	void rdo_bc_encoder::clear()
+	{
+		m_pOrig_source_image = nullptr;
+
+		m_source_image.clear();
+
+		m_params.clear();
+
+		m_orig_width = 0;
+		m_orig_height = 0;
+		m_blocks_x = 0;
+		m_blocks_y = 0;
+		m_total_blocks = 0;
+		m_bytes_per_block = 0;
+		m_pixel_format_bpp = 0;
+		m_total_texels = 0;
+		m_has_alpha = false;
+
+		m_packed_image8.clear();
+		m_packed_image16.clear();
+
+		m_prerdo_packed_image8.clear();
+		m_prerdo_packed_image16.clear();
+
+		m_bc7enc_pack_params.clear();
+#if SUPPORT_BC7E
+		memset(&m_bc7e_pack_params, 0, sizeof(m_bc7e_pack_params));
+#endif
+	}
+
+	bool rdo_bc_encoder::init(const utils::image_u8& src_image, rdo_bc_params& params)
+	{
+		clear();
+
+		m_pOrig_source_image = &src_image;
+		m_params = params;
+
+		init_encoders();
+
+		if (!init_source_image())
+			return false;
+
+		return true;
+	}
+
+	bool rdo_bc_encoder::encode()
+	{
+		if (!m_packed_image8.size() && !m_packed_image16.size())
+			return false;
+
+		if (!init_encoder_params())
+			return false;
+
+		if (!encode_texture())
+			return false;
+
+		if (!postprocess_rdo())
+			return false;
+
+		return true;
+	}
+
+	void rdo_bc_encoder::init_encoders()
+	{
+		rgbcx::init(m_params.m_bc1_mode);
+		bc7enc_compress_block_init();
+#if SUPPORT_BC7E
+		ispc::bc7e_compress_block_init();
+#endif
+	}
+
+	bool rdo_bc_encoder::init_encoder_params()
+	{
+		bc7enc_compress_block_params_init(&m_bc7enc_pack_params);
+		if (!m_params.m_perceptual)
+			bc7enc_compress_block_params_init_linear_weights(&m_bc7enc_pack_params);
+		m_bc7enc_pack_params.m_max_partitions = m_params.m_bc7enc_max_partitions_to_scan;
+		m_bc7enc_pack_params.m_uber_level = std::min(BC7ENC_MAX_UBER_LEVEL, m_params.m_bc7_uber_level);
+
+		if (m_params.m_bc7enc_mode6_only)
+			m_bc7enc_pack_params.m_mode_mask = 1 << 6;
+
+		if ((m_params.m_dxgi_format == DXGI_FORMAT_BC7_UNORM) && (m_params.m_rdo_lambda > 0.0f))
+		{
+			// Slam off perceptual in RDO mode - we don't support it (too slow).
+			m_params.m_perceptual = false;
+			m_bc7enc_pack_params.m_perceptual = false;
+			bc7enc_compress_block_params_init_linear_weights(&m_bc7enc_pack_params);
+		}
+
+		if ((m_params.m_dxgi_format == DXGI_FORMAT_BC7_UNORM) && (m_params.m_bc7enc_reduce_entropy))
+		{
+			// Configure the BC7 encoder with some decent parameters for later RDO post-processing.
+			// Textures with alpha are harder for BC7 to handle, so we use more conservative defaults.
+
+			m_bc7enc_pack_params.m_mode17_partition_estimation_filterbank = false;
+
+			if (m_params.m_bc7enc_rdo_bc7_weight_modes)
+			{
+				// Weight modes 5 and especially 6 more highly than the other modes.
+				if (m_has_alpha)
+				{
+					m_bc7enc_pack_params.m_mode5_error_weight = .7f;
+					m_bc7enc_pack_params.m_mode6_error_weight = .6f;
+				}
+				else
+				{
+					m_bc7enc_pack_params.m_mode6_error_weight = .4f;
+				}
+			}
+
+			if (m_params.m_bc7enc_rdo_bc7_weight_low_frequency_partitions)
+			{
+				// Slightly prefer the lower frequency partition patterns.
+				m_bc7enc_pack_params.m_low_frequency_partition_weight = .9999f;
+			}
+
+			if (m_params.m_bc7enc_rdo_bc7_quant_mode6_endpoints)
+			{
+				// As a good default, don't quantize mode 6 endpoints if the texture has alpha. This isn't required, but helps mask textures.
+				//if (!has_alpha)
+				m_bc7enc_pack_params.m_quant_mode6_endpoints = true;
+			}
+
+			if (m_params.m_bc7enc_rdo_bc7_pbit1_weighting)
+			{
+				// Favor p-bit 0 vs. 1, to slightly lower the entropy of output blocks with p-bits
+				m_bc7enc_pack_params.m_pbit1_weight = 1.3f;
+			}
+		}
+
+#if SUPPORT_BC7E
+		// Now initialize the BC7 compressor's parameters.
+
+		memset(&m_bc7e_pack_params, 0, sizeof(m_bc7e_pack_params));
+		switch (m_params.m_bc7_uber_level)
+		{
+		case 0:
+			ispc::bc7e_compress_block_params_init_ultrafast(&m_bc7e_pack_params, m_params.m_perceptual);
+			break;
+		case 1:
+			ispc::bc7e_compress_block_params_init_veryfast(&m_bc7e_pack_params, m_params.m_perceptual);
+			break;
+		case 2:
+			ispc::bc7e_compress_block_params_init_fast(&m_bc7e_pack_params, m_params.m_perceptual);
+			break;
+		case 3:
+			ispc::bc7e_compress_block_params_init_basic(&m_bc7e_pack_params, m_params.m_perceptual);
+			break;
+		case 4:
+			ispc::bc7e_compress_block_params_init_slow(&m_bc7e_pack_params, m_params.m_perceptual);
+			break;
+		case 5:
+			ispc::bc7e_compress_block_params_init_veryslow(&m_bc7e_pack_params, m_params.m_perceptual);
+			break;
+		case 6:
+		default:
+			ispc::bc7e_compress_block_params_init_slowest(&m_bc7e_pack_params, m_params.m_perceptual);
+			break;
+		}
+#endif
+
+		if (m_params.m_status_output)
+		{
+			if (m_params.m_dxgi_format == DXGI_FORMAT_BC7_UNORM)
+			{
+				if ((SUPPORT_BC7E) && (m_params.m_use_bc7e))
+					printf("bc7e.ispc uber level: %u, perceptual: %u\n", m_params.m_bc7_uber_level, m_params.m_perceptual);
+				else
+				{
+					printf("\nbc7enc parameters:\n");
+					m_bc7enc_pack_params.print();
+				}
+			}
+			else
+			{
+				printf("BC1 level: %u, use 3-color mode: %u, use 3-color mode for black: %u, bc1_mode: %u\n",
+					m_params.m_bc1_quality_level, m_params.m_use_bc1_3color_mode, m_params.m_use_bc1_3color_mode_for_black, (int)m_params.m_bc1_mode);
+			}
+
+			if ((m_params.m_dxgi_format == DXGI_FORMAT_BC3_UNORM) || (m_params.m_dxgi_format == DXGI_FORMAT_BC4_UNORM) || (m_params.m_dxgi_format == DXGI_FORMAT_BC5_UNORM))
+			{
+				printf("Use high quality BC4 block encoder: %u, BC4 block radius: %u, use 6 value mode: %u, use 8 value mode: %u\n",
+					m_params.m_use_hq_bc345, m_params.m_bc345_search_rad, (m_params.m_bc345_mode_mask & 2) != 0, (m_params.m_bc345_mode_mask & 1) != 0);
+			}
+
+			printf("\nrdo_bc_params:\n");
+			printf("  Perceptual: %u\n", m_params.m_perceptual);
+			printf("  Y Flip: %u\n", m_params.m_y_flip);
+			printf("  DXGI format: 0x%X %s\n", m_params.m_dxgi_format, get_dxgi_format_string(m_params.m_dxgi_format));
+
+			printf("BC1-5 parameters:\n");
+			printf("  BC45 channels: %u %u\n", m_params.m_bc45_channel0, m_params.m_bc45_channel1);
+			printf("  BC1 approximation mode: %u\n", (int)m_params.m_bc1_mode);
+			printf("  Use BC1 3-color mode: %u\n", m_params.m_use_bc1_3color_mode);
+			printf("  Use BC1 3-color mode for black: %u\n", m_params.m_use_bc1_3color_mode_for_black);
+			printf("  BC1 quality level: %u\n", m_params.m_bc1_quality_level);
+			printf("  Use HQ BC345: %u\n", m_params.m_use_hq_bc345);
+			printf("  BC345 search radius: %u\n", m_params.m_bc345_search_rad);
+			printf("  BC345 mode mask: 0x%X\n", m_params.m_bc345_mode_mask);
+			
+			printf("BC7 parameters:\n");
+			printf("  Use bc7e: %u\n", m_params.m_use_bc7e);
+			printf("  BC7 uber level: %u\n", m_params.m_bc7_uber_level);
+
+			printf("RDO parameters:\n");
+			printf("  Lambda: %f\n", m_params.m_rdo_lambda);
+			printf("  Lookback window size: %u\n", m_params.m_lookback_window_size);
+			printf("  Custom lookback window size: %u\n", m_params.m_custom_lookback_window_size);
+			printf("  Try 2 matches: %u\n", m_params.m_rdo_try_2_matches);
+			printf("  Smooth block error scale: %f\n", m_params.m_rdo_smooth_block_error_scale);
+			printf("  Custom RDO smooth block error scale: %u\n", m_params.m_custom_rdo_smooth_block_error_scale);
+			printf("  Max smooth block std dev: %f\n", m_params.m_rdo_max_smooth_block_std_dev);
+			printf("  Allow relative movement: %u\n", m_params.m_rdo_allow_relative_movement);
+			printf("  Ultrasmooth block handling: %u\n", m_params.m_rdo_ultrasmooth_block_handling);
+			printf("  Multithreading: %u, max threads: %u\n", m_params.m_rdo_multithreading, m_params.m_rdo_max_threads);
+			
+			printf("bc7enc parameters:\n");
+			printf("  Mode 6 only: %u\n", m_params.m_bc7enc_mode6_only);
+			printf("  Max partitions to scan: %u\n", m_params.m_bc7enc_max_partitions_to_scan);
+			printf("  Quant mode 6 endpoints: %u\n", m_params.m_bc7enc_rdo_bc7_quant_mode6_endpoints);
+			printf("  Weight modes: %u\n", m_params.m_bc7enc_rdo_bc7_weight_modes);
+			printf("  Weight low freq partitions: %u\n", m_params.m_bc7enc_rdo_bc7_weight_low_frequency_partitions);
+			printf("  P-bit1 weighting: %u\n", m_params.m_bc7enc_rdo_bc7_pbit1_weighting);
+			printf("  Reduce entropy mode: %u\n", m_params.m_bc7enc_reduce_entropy);
+			printf("\n");
+		}
+
+		return true;
+	}
+
+	bool rdo_bc_encoder::init_source_image()
+	{
+		switch (m_params.m_dxgi_format)
+		{
+		case DXGI_FORMAT_BC1_UNORM:
+		case DXGI_FORMAT_BC4_UNORM:
+			m_pixel_format_bpp = 4;
+			break;
+		case DXGI_FORMAT_BC3_UNORM:
+		case DXGI_FORMAT_BC5_UNORM:
+		case DXGI_FORMAT_BC7_UNORM:
+			m_pixel_format_bpp = 8;
+			break;
+		default:
+			return false;
+		}
+
+		m_bytes_per_block = (16 * m_pixel_format_bpp) / 8;
+		assert((m_bytes_per_block == 8) || (m_bytes_per_block == 16));
+
+		m_source_image = *m_pOrig_source_image;
+
+		m_orig_width = m_source_image.width();
+		m_orig_height = m_source_image.height();
+
+		if (m_params.m_y_flip)
+		{
+			utils::image_u8 temp;
+			temp.init(m_orig_width, m_orig_height);
+
+			for (uint32_t y = 0; y < m_orig_height; y++)
+				for (uint32_t x = 0; x < m_orig_width; x++)
+					temp(x, (m_orig_height - 1) - y) = m_source_image(x, y);
+
+			temp.swap(m_source_image);
+		}
+
+		m_source_image.crop_dup_borders((m_source_image.width() + 3) & ~3, (m_source_image.height() + 3) & ~3);
+
+		m_blocks_x = m_source_image.width() / 4;
+		m_blocks_y = m_source_image.height() / 4;
+		m_total_blocks = m_blocks_x * m_blocks_y;
+		m_total_texels = m_total_blocks * 16;
+
+		bool has_alpha = false;
+		for (int by = 0; by < ((int)m_blocks_y) && !has_alpha; by++)
+		{
+			for (uint32_t bx = 0; bx < m_blocks_x; bx++)
+			{
+				color_quad_u8 pixels[16];
+				m_source_image.get_block(bx, by, 4, 4, pixels);
+
+				for (uint32_t i = 0; i < 16; i++)
+				{
+					if (pixels[i].m_c[3] < 255)
+					{
+						has_alpha = true;
+						break;
+					}
+				}
+			}
+		}
+				
+		if (m_pixel_format_bpp == 8)
+			m_packed_image16.resize(m_total_blocks);
+		else
+			m_packed_image8.resize(m_total_blocks);
+
+		return true;
+	}
+		
+	bool rdo_bc_encoder::encode_texture()
+	{
+		clock_t start_t = clock();
+
+		uint32_t bc7_mode_hist[8];
+		memset(bc7_mode_hist, 0, sizeof(bc7_mode_hist));
+
+#if SUPPORT_BC7E
+		if ((m_params.m_dxgi_format == DXGI_FORMAT_BC7_UNORM) && (m_params.m_use_bc7e))
+		{
+			if (m_params.m_status_output)
+				printf("Using bc7e: ");
+
+#pragma omp parallel for
+			for (int32_t by = 0; by < static_cast<int32_t>(m_blocks_y); by++)
+			{
+				// Process 64 blocks at a time, for efficient SIMD processing.
+				// Ideally, N >= 8 (or more) and (N % 8) == 0.
+				const int N = 64;
+
+				for (uint32_t bx = 0; bx < m_blocks_x; bx += N)
+				{
+					const uint32_t num_blocks_to_process = std::min<uint32_t>(m_blocks_x - bx, N);
+
+					color_quad_u8 pixels[16 * N];
+
+					// Extract num_blocks_to_process 4x4 pixel blocks from the source image and put them into the pixels[] array.
+					for (uint32_t b = 0; b < num_blocks_to_process; b++)
+						m_source_image.get_block(bx + b, by, 4, 4, pixels + b * 16);
+
+					// Compress the blocks to BC7.
+					// Note: If you've used Intel's ispc_texcomp, the input pixels are different. BC7E requires a pointer to an array of 16 pixels for each block.
+					block16* pBlock = &m_packed_image16[bx + by * m_blocks_x];
+					ispc::bc7e_compress_blocks(num_blocks_to_process, reinterpret_cast<uint64_t*>(pBlock), reinterpret_cast<const uint32_t*>(pixels), &m_bc7e_pack_params);
+				}
+
+				if (m_params.m_status_output)
+				{
+					if ((by & 63) == 0)
+						printf(".");
+				}
+			}
+
+			for (int by = 0; by < (int)m_blocks_y; by++)
+			{
+				for (uint32_t bx = 0; bx < m_blocks_x; bx++)
+				{
+					block16* pBlock = &m_packed_image16[bx + by * m_blocks_x];
+
+					uint32_t mode = ((uint8_t*)pBlock)[0];
+					for (uint32_t m = 0; m <= 7; m++)
+					{
+						if (mode & (1 << m))
+						{
+							bc7_mode_hist[m]++;
+							break;
+						}
+					}
+				}
+			}
+		}
+		else
+#endif
+		{
+#pragma omp parallel for
+			for (int by = 0; by < (int)m_blocks_y; by++)
+			{
+				for (uint32_t bx = 0; bx < m_blocks_x; bx++)
+				{
+					color_quad_u8 pixels[16];
+
+					m_source_image.get_block(bx, by, 4, 4, pixels);
+
+					switch (m_params.m_dxgi_format)
+					{
+					case DXGI_FORMAT_BC1_UNORM:
+					{
+						block8* pBlock = &m_packed_image8[bx + by * m_blocks_x];
+
+						rgbcx::encode_bc1(m_params.m_bc1_quality_level, pBlock, &pixels[0].m_c[0], m_params.m_use_bc1_3color_mode, m_params.m_use_bc1_3color_mode_for_black);
+						break;
+					}
+					case DXGI_FORMAT_BC3_UNORM:
+					{
+						block16* pBlock = &m_packed_image16[bx + by * m_blocks_x];
+
+						if (m_params.m_use_hq_bc345)
+							rgbcx::encode_bc3_hq(m_params.m_bc1_quality_level, pBlock, &pixels[0].m_c[0], m_params.m_bc345_search_rad, m_params.m_bc345_mode_mask);
+						else
+							rgbcx::encode_bc3(m_params.m_bc1_quality_level, pBlock, &pixels[0].m_c[0]);
+						break;
+					}
+					case DXGI_FORMAT_BC4_UNORM:
+					{
+						block8* pBlock = &m_packed_image8[bx + by * m_blocks_x];
+
+						if (m_params.m_use_hq_bc345)
+							rgbcx::encode_bc4_hq(pBlock, &pixels[0].m_c[m_params.m_bc45_channel0], 4, m_params.m_bc345_search_rad, m_params.m_bc345_mode_mask);
+						else
+							rgbcx::encode_bc4(pBlock, &pixels[0].m_c[m_params.m_bc45_channel0], 4);
+						break;
+					}
+					case DXGI_FORMAT_BC5_UNORM:
+					{
+						block16* pBlock = &m_packed_image16[bx + by * m_blocks_x];
+
+						if (m_params.m_use_hq_bc345)
+							rgbcx::encode_bc5_hq(pBlock, &pixels[0].m_c[0], m_params.m_bc45_channel0, m_params.m_bc45_channel1, 4, m_params.m_bc345_search_rad, m_params.m_bc345_mode_mask);
+						else
+							rgbcx::encode_bc5(pBlock, &pixels[0].m_c[0], m_params.m_bc45_channel0, m_params.m_bc45_channel1, 4);
+						break;
+					}
+					case DXGI_FORMAT_BC7_UNORM:
+					{
+						block16* pBlock = &m_packed_image16[bx + by * m_blocks_x];
+
+						bc7enc_compress_block(pBlock, pixels, &m_bc7enc_pack_params);
+
+#pragma omp critical
+						{
+							uint32_t mode = ((uint8_t*)pBlock)[0];
+							for (uint32_t m = 0; m <= 7; m++)
+							{
+								if (mode & (1 << m))
+								{
+									bc7_mode_hist[m]++;
+									break;
+								}
+							}
+						}
+
+						break;
+					}
+					default:
+					{
+						assert(0);
+						break;
+					}
+					}
+				}
+
+				if (m_params.m_status_output)
+				{
+					if ((by & 127) == 0)
+						printf(".");
+				}
+			}
+		}
+
+		clock_t end_t = clock();
+
+		if (m_params.m_status_output)
+		{
+			printf("\nTotal encoding time: %f secs\n", (double)(end_t - start_t) / CLOCKS_PER_SEC);
+
+			if (m_params.m_dxgi_format == DXGI_FORMAT_BC7_UNORM)
+			{
+				printf("BC7 mode histogram:\n");
+				for (uint32_t i = 0; i < 8; i++)
+					printf("%u: %u\n", i, bc7_mode_hist[i]);
+			}
+		}
+
+		return true;
+	}
+
+	bool rdo_bc_encoder::postprocess_rdo()
+	{
+		m_prerdo_packed_image8 = m_packed_image8;
+		m_prerdo_packed_image16 = m_packed_image16;
+
+		// Post-process the data with Rate Distortion Optimization
+		if (m_params.m_rdo_lambda <= 0.0f)
+			return true;
+
+		const uint32_t MIN_RDO_MULTITHREADING_BLOCKS = 4096;
+		const int rdo_total_threads = (m_params.m_rdo_multithreading && (m_params.m_rdo_max_threads > 1) && (m_total_blocks >= MIN_RDO_MULTITHREADING_BLOCKS)) ? m_params.m_rdo_max_threads : 1;
+
+		if (m_params.m_status_output)
+			printf("rdo_total_threads: %u\n", rdo_total_threads);
+
+		int blocks_remaining = m_total_blocks, cur_block_index = 0;
+		std::vector<int> blocks_to_do(rdo_total_threads), first_block_index(rdo_total_threads);
+		for (int p = 0; p < rdo_total_threads; p++)
+		{
+			const int num_blocks = (p == (rdo_total_threads - 1)) ? blocks_remaining : (m_total_blocks / rdo_total_threads);
+
+			blocks_to_do[p] = num_blocks;
+			first_block_index[p] = cur_block_index;
+
+			cur_block_index += num_blocks;
+			blocks_remaining -= num_blocks;
+		}
+
+		assert(!blocks_remaining && cur_block_index == (int)m_total_blocks);
+
+		ert::reduce_entropy_params ert_p;
+
+		ert_p.m_lambda = m_params.m_rdo_lambda;
+		ert_p.m_lookback_window_size = m_params.m_lookback_window_size;
+		ert_p.m_smooth_block_max_mse_scale = m_params.m_rdo_smooth_block_error_scale;
+		ert_p.m_max_smooth_block_std_dev = m_params.m_rdo_max_smooth_block_std_dev;
+		ert_p.m_debug_output = m_params.m_rdo_debug_output;
+		ert_p.m_try_two_matches = m_params.m_rdo_try_2_matches;
+		ert_p.m_allow_relative_movement = m_params.m_rdo_allow_relative_movement;
+		ert_p.m_skip_zero_mse_blocks = false;
+		
+		std::vector<float> block_rgb_mse_scales(compute_block_mse_scales(m_source_image, m_blocks_x, m_blocks_y, m_total_blocks, m_params.m_rdo_debug_output));
+
+		std::vector<rgbcx::color32> block_pixels(m_total_blocks * 16);
+
+		for (uint32_t by = 0; by < m_blocks_y; by++)
+			for (uint32_t bx = 0; bx < m_blocks_x; bx++)
+				m_source_image.get_block(bx, by, 4, 4, (color_quad_u8*)&block_pixels[(bx + by * m_blocks_x) * 16]);
+
+		unpacker_funcs block_unpackers;
+		block_unpackers.m_allow_3color_mode = m_params.m_use_bc1_3color_mode;
+		block_unpackers.m_use_bc1_3color_mode_for_black = m_params.m_use_bc1_3color_mode_for_black;
+		block_unpackers.m_mode = m_params.m_bc1_mode;
+
+		if (m_params.m_dxgi_format == DXGI_FORMAT_BC7_UNORM)
+		{
+			ert_p.m_lookback_window_size = std::max(16U, m_params.m_lookback_window_size);
+
+			// BC7 RDO
+			const uint32_t NUM_COMPONENTS = 4;
+
+			if (!m_params.m_custom_rdo_smooth_block_error_scale)
+			{
+				// Attempt to compute a decent conservative smooth block MSE max scaling factor.
+				// No single smooth block scale setting can work for all textures (unless it's ridiuclously large, killing efficiency).
+				ert_p.m_smooth_block_max_mse_scale = lerp(15.0f, 50.0f, std::min(1.0f, ert_p.m_lambda / 4.0f));
+
+				if (m_params.m_status_output)
+					printf("Using an automatically computed smooth block error scale of %f (use -zb# to override)\n", ert_p.m_smooth_block_max_mse_scale);
+			}
+
+			for (uint32_t by = 0; by < m_blocks_y; by++)
+				for (uint32_t bx = 0; bx < m_blocks_x; bx++)
+				{
+					float& s = block_rgb_mse_scales[bx + by * m_blocks_x];
+					if (s > 0.0f)
+						s = std::max(ert_p.m_smooth_block_max_mse_scale, s * std::min(ert_p.m_lambda, 3.0f));
+				}
+
+			if (m_params.m_status_output)
+			{
+				printf("\nERT parameters:\n");
+				ert_p.print();
+				printf("\n");
+			}
+
+			uint32_t total_modified = 0;
+
+			clock_t rdo_start_t = clock();
+
+#pragma omp parallel for
+			for (int p = 0; p < rdo_total_threads; p++)
+			{
+				const int first_block_to_encode = first_block_index[p];
+				const int num_blocks_to_encode = blocks_to_do[p];
+				if (!num_blocks_to_encode)
+					continue;
+
+				uint32_t total_modified_local = 0;
+
+				std::vector<float> local_block_rgb_mse_scales(num_blocks_to_encode);
+				for (int i = 0; i < num_blocks_to_encode; i++)
+					local_block_rgb_mse_scales[i] = block_rgb_mse_scales[first_block_to_encode + i];
+
+				ert::reduce_entropy(&m_packed_image16[first_block_to_encode], num_blocks_to_encode,
+					16, 16, 4, 4, NUM_COMPONENTS,
+					(ert::color_rgba*)&block_pixels[16 * first_block_to_encode], ert_p, total_modified_local,
+					unpacker_funcs::unpack_bc7_block, &block_unpackers,
+					m_params.m_rdo_ultrasmooth_block_handling ? &local_block_rgb_mse_scales : nullptr);
+
+#pragma omp critical
+				{
+					total_modified += total_modified_local;
+				}
+			} // p
+
+			clock_t rdo_end_t = clock();
+
+			if (m_params.m_status_output)
+			{
+				printf("Total RDO time: %f secs\n", (double)(rdo_end_t - rdo_start_t) / CLOCKS_PER_SEC);
+
+				printf("Total blocks modified: %u %3.2f%%\n", total_modified, total_modified * 100.0f / m_total_blocks);
+
+				uint32_t bc7_mode_hist[8];
+				memset(bc7_mode_hist, 0, sizeof(bc7_mode_hist));
+
+				for (int by = 0; by < (int)m_blocks_y; by++)
+				{
+					for (uint32_t bx = 0; bx < m_blocks_x; bx++)
+					{
+						block16* pBlock = &m_packed_image16[bx + by * m_blocks_x];
+
+						const uint32_t mode_byte = ((uint8_t*)pBlock)[0];
+
+						uint32_t m;
+						for (m = 0; m <= 7; m++)
+						{
+							if (mode_byte & (1 << m))
+							{
+								bc7_mode_hist[m]++;
+								break;
+							}
+						}
+						assert(m != 8);
+					}
+				}
+
+				printf("BC7 mode histogram:\n");
+				for (uint32_t i = 0; i < 8; i++)
+					printf("%u: %u\n", i, bc7_mode_hist[i]);
+			}
+		}
+		else if (m_params.m_dxgi_format == DXGI_FORMAT_BC5_UNORM)
+		{
+			// BC5 RDO - One BC4 block for R followed by one BC4 block for G
+
+			ert_p.m_lookback_window_size = std::max(16U, m_params.m_lookback_window_size);
+
+			std::vector<rgbcx::color32> block_pixels_r(m_total_blocks * 16), block_pixels_g(m_total_blocks * 16);
+
+			for (uint32_t by = 0; by < m_blocks_y; by++)
+			{
+				for (uint32_t bx = 0; bx < m_blocks_x; bx++)
+				{
+					color_quad_u8 orig_block[16];
+					m_source_image.get_block(bx, by, 4, 4, orig_block);
+
+					color_quad_u8* pDst_block_r = (color_quad_u8*)&block_pixels_r[(bx + by * m_blocks_x) * 16];
+					color_quad_u8* pDst_block_g = (color_quad_u8*)&block_pixels_g[(bx + by * m_blocks_x) * 16];
+
+					for (uint32_t i = 0; i < 16; i++)
+					{
+						pDst_block_r[i].set(orig_block[i].r, 0, 0, 0);
+						pDst_block_g[i].set(orig_block[i].g, 0, 0, 0);
+					}
+				}
+			}
+
+			const uint32_t NUM_COMPONENTS = 1;
+
+			ert_p.m_color_weights[1] = 0;
+			ert_p.m_color_weights[2] = 0;
+			ert_p.m_color_weights[3] = 0;
+
+			if (!m_params.m_custom_rdo_smooth_block_error_scale)
+			{
+				// Attempt to compute a decent conservative smooth block MSE max scaling factor.
+				// No single smooth block scale setting can work for all textures (unless it's ridiuclously large, killing efficiency).
+				ert_p.m_smooth_block_max_mse_scale = lerp(10.0f, 30.0f, std::min(1.0f, ert_p.m_lambda / 4.0f));
+
+				if (m_params.m_status_output)
+					printf("Using an automatically computed smooth block error scale of %f (use -zb# to override)\n", ert_p.m_smooth_block_max_mse_scale);
+			}
+
+			if (m_params.m_status_output)
+			{
+				printf("\nERT parameters:\n");
+				ert_p.print();
+				printf("\n");
+			}
+
+			uint32_t total_modified_r = 0, total_modified_g = 0;
+
+			clock_t rdo_start_t = clock();
+
+#pragma omp parallel for
+			for (int p = 0; p < rdo_total_threads; p++)
+			{
+				const int first_block_to_encode = first_block_index[p];
+				const int num_blocks_to_encode = blocks_to_do[p];
+				if (!num_blocks_to_encode)
+					continue;
+
+				uint32_t total_modified_local_r = 0, total_modified_local_g = 0;
+
+				ert::reduce_entropy(&m_packed_image16[first_block_to_encode], num_blocks_to_encode,
+					2 * sizeof(rgbcx::bc4_block), sizeof(rgbcx::bc4_block), 4, 4, NUM_COMPONENTS,
+					(ert::color_rgba*)&block_pixels_r[16 * first_block_to_encode], ert_p, total_modified_local_r,
+					unpacker_funcs::unpack_bc4_block, &block_unpackers);
+
+				ert::reduce_entropy((uint8_t*)&m_packed_image16[first_block_to_encode] + sizeof(rgbcx::bc4_block), num_blocks_to_encode,
+					2 * sizeof(rgbcx::bc4_block), sizeof(rgbcx::bc4_block), 4, 4, NUM_COMPONENTS,
+					(ert::color_rgba*)&block_pixels_g[16 * first_block_to_encode], ert_p, total_modified_local_g,
+					unpacker_funcs::unpack_bc4_block, &block_unpackers);
+
+#pragma omp critical
+				{
+					total_modified_r += total_modified_local_r;
+					total_modified_g += total_modified_local_g;
+				}
+			} // p
+
+			clock_t rdo_end_t = clock();
+
+			if (m_params.m_status_output)
+			{
+				printf("Total RDO time: %f secs\n", (double)(rdo_end_t - rdo_start_t) / CLOCKS_PER_SEC);
+
+				printf("Total blocks modified R: %u %3.2f%%\n", total_modified_r, total_modified_r * 100.0f / m_total_blocks);
+				printf("Total blocks modified G: %u %3.2f%%\n", total_modified_g, total_modified_g * 100.0f / m_total_blocks);
+			}
+		}
+		else if (m_params.m_dxgi_format == DXGI_FORMAT_BC4_UNORM)
+		{
+			// BC4 RDO - One BC4 block for R
+
+			const uint32_t NUM_COMPONENTS = 1;
+
+			ert_p.m_color_weights[1] = 0;
+			ert_p.m_color_weights[2] = 0;
+			ert_p.m_color_weights[3] = 0;
+
+			if (!m_params.m_custom_rdo_smooth_block_error_scale)
+			{
+				// Attempt to compute a decent conservative smooth block MSE max scaling factor.
+				// No single smooth block scale setting can work for all textures (unless it's ridiuclously large, killing efficiency).
+				ert_p.m_smooth_block_max_mse_scale = lerp(10.0f, 30.0f, std::min(1.0f, ert_p.m_lambda / 4.0f));
+
+				if (m_params.m_status_output)
+					printf("Using an automatically computed smooth block error scale of %f (use -zb# to override)\n", ert_p.m_smooth_block_max_mse_scale);
+			}
+
+			if (m_params.m_status_output)
+			{
+				printf("\nERT parameters:\n");
+				ert_p.print();
+				printf("\n");
+			}
+
+			uint32_t total_modified = 0;
+
+			clock_t rdo_start_t = clock();
+
+#pragma omp parallel for
+			for (int p = 0; p < rdo_total_threads; p++)
+			{
+				const int first_block_to_encode = first_block_index[p];
+				const int num_blocks_to_encode = blocks_to_do[p];
+				if (!num_blocks_to_encode)
+					continue;
+
+				uint32_t total_modified_local = 0;
+
+				ert::reduce_entropy(&m_packed_image8[first_block_to_encode], num_blocks_to_encode,
+					sizeof(rgbcx::bc4_block), sizeof(rgbcx::bc4_block), 4, 4, NUM_COMPONENTS,
+					(ert::color_rgba*)&block_pixels[16 * first_block_to_encode], ert_p, total_modified_local,
+					unpacker_funcs::unpack_bc4_block, &block_unpackers);
+
+#pragma omp critical
+				{
+					total_modified += total_modified_local;
+				}
+			} // p
+
+			clock_t rdo_end_t = clock();
+
+			if (m_params.m_status_output)
+			{
+				printf("Total RDO time: %f secs\n", (double)(rdo_end_t - rdo_start_t) / CLOCKS_PER_SEC);
+
+				printf("Total blocks modified: %u %3.2f%%\n", total_modified, total_modified * 100.0f / m_total_blocks);
+			}
+		}
+		else if (m_params.m_dxgi_format == DXGI_FORMAT_BC1_UNORM)
+		{
+			// BC1 RDO - One BC1 block
+			const uint32_t NUM_COMPONENTS = 3;
+
+			ert_p.m_color_weights[3] = 0;
+
+			if (!m_params.m_custom_rdo_smooth_block_error_scale)
+			{
+				// This is just a hack - no single setting can work for all textures.
+				ert_p.m_smooth_block_max_mse_scale = lerp(15.0f, 50.0f, std::min(1.0f, ert_p.m_lambda / 8.0f));
+
+				if (m_params.m_status_output)
+					printf("Using an automatically computed smooth block error scale of %f (use -zb# to override)\n", ert_p.m_smooth_block_max_mse_scale);
+			}
+
+			for (uint32_t by = 0; by < m_blocks_y; by++)
+				for (uint32_t bx = 0; bx < m_blocks_x; bx++)
+				{
+					float& s = block_rgb_mse_scales[bx + by * m_blocks_x];
+					if (s > 0.0f)
+						s = std::max(ert_p.m_smooth_block_max_mse_scale, s * std::min(ert_p.m_lambda, 3.0f));
+				}
+
+			printf("\nERT parameters:\n");
+			ert_p.print();
+			printf("\n");
+
+			uint32_t total_modified = 0;
+
+			clock_t rdo_start_t = clock();
+
+#pragma omp parallel for
+			for (int p = 0; p < rdo_total_threads; p++)
+			{
+				const int first_block_to_encode = first_block_index[p];
+				const int num_blocks_to_encode = blocks_to_do[p];
+				if (!num_blocks_to_encode)
+					continue;
+
+				uint32_t total_modified_local = 0;
+
+				std::vector<float> local_block_rgb_mse_scales(num_blocks_to_encode);
+				for (int i = 0; i < num_blocks_to_encode; i++)
+					local_block_rgb_mse_scales[i] = block_rgb_mse_scales[first_block_to_encode + i];
+
+				ert::reduce_entropy(&m_packed_image8[first_block_to_encode], num_blocks_to_encode,
+					sizeof(rgbcx::bc1_block), sizeof(rgbcx::bc1_block), 4, 4, NUM_COMPONENTS,
+					(ert::color_rgba*)&block_pixels[16 * first_block_to_encode], ert_p, total_modified_local,
+					unpacker_funcs::unpack_bc1_block, &block_unpackers,
+					m_params.m_rdo_ultrasmooth_block_handling ? &local_block_rgb_mse_scales : nullptr);
+
+#pragma omp critical
+				{
+					total_modified += total_modified_local;
+				}
+			} // p
+
+			clock_t rdo_end_t = clock();
+
+			if (m_params.m_status_output)
+			{
+				printf("Total RDO time: %f secs\n", (double)(rdo_end_t - rdo_start_t) / CLOCKS_PER_SEC);
+
+				printf("Total blocks modified: %u %3.2f%%\n",
+					total_modified, total_modified * 100.0f / m_total_blocks);
+			}
+		}
+		else if (m_params.m_dxgi_format == DXGI_FORMAT_BC3_UNORM)
+		{
+			// BC3 RDO - One BC4 block followed by one BC1 block
+
+			ert_p.m_lookback_window_size = std::max(16U, m_params.m_lookback_window_size);
+
+			std::vector<rgbcx::color32> block_pixels_a(m_total_blocks * 16);
+
+			for (uint32_t by = 0; by < m_blocks_y; by++)
+			{
+				for (uint32_t bx = 0; bx < m_blocks_x; bx++)
+				{
+					color_quad_u8 orig_block[16];
+					m_source_image.get_block(bx, by, 4, 4, orig_block);
+
+					color_quad_u8* pDst_block_a = (color_quad_u8*)&block_pixels_a[(bx + by * m_blocks_x) * 16];
+					for (uint32_t i = 0; i < 16; i++)
+						pDst_block_a[i].set(orig_block[i].a, 0, 0, 0);
+				}
+			}
+
+			ert_p.m_color_weights[3] = 0;
+
+			ert::reduce_entropy_params ert_alpha_p(ert_p);
+			ert_alpha_p.m_color_weights[1] = 0;
+			ert_alpha_p.m_color_weights[2] = 0;
+			ert_alpha_p.m_color_weights[3] = 0;
+
+			if (!m_params.m_custom_rdo_smooth_block_error_scale)
+			{
+				// This is just a hack - no single setting can work for all textures.
+				ert_p.m_smooth_block_max_mse_scale = lerp(15.0f, 50.0f, std::min(1.0f, ert_p.m_lambda / 8.0f));
+
+				if (m_params.m_status_output)
+					printf("Using an automatically computed smooth block error scale of %f (use -zb# to override) for RGB\n", ert_p.m_smooth_block_max_mse_scale);
+
+				ert_alpha_p.m_smooth_block_max_mse_scale = lerp(10.0f, 30.0f, std::min(1.0f, ert_alpha_p.m_lambda / 4.0f));
+
+				if (m_params.m_status_output)
+					printf("Using an automatically computed smooth block error scale of %f for Alpha\n", ert_alpha_p.m_smooth_block_max_mse_scale);
+			}
+
+			for (uint32_t by = 0; by < m_blocks_y; by++)
+				for (uint32_t bx = 0; bx < m_blocks_x; bx++)
+				{
+					float& s = block_rgb_mse_scales[bx + by * m_blocks_x];
+					if (s > 0.0f)
+						s = std::max(ert_p.m_smooth_block_max_mse_scale, s * std::min(ert_p.m_lambda, 3.0f));
+				}
+
+			if (m_params.m_status_output)
+			{
+				printf("\nERT RGB parameters:\n");
+				ert_p.print();
+
+				printf("\nERT Alpha parameters:\n");
+				ert_alpha_p.print();
+				printf("\n");
+			}
+
+			uint32_t total_modified_rgb = 0, total_modified_alpha = 0;
+
+			block_unpackers.m_allow_3color_mode = false;
+			block_unpackers.m_use_bc1_3color_mode_for_black = false;
+
+			clock_t rdo_start_t = clock();
+
+#pragma omp parallel for
+			for (int p = 0; p < rdo_total_threads; p++)
+			{
+				const int first_block_to_encode = first_block_index[p];
+				const int num_blocks_to_encode = blocks_to_do[p];
+				if (!num_blocks_to_encode)
+					continue;
+
+				uint32_t total_modified_local_rgb = 0, total_modified_local_alpha = 0;
+
+				ert::reduce_entropy((uint8_t*)&m_packed_image16[first_block_to_encode], num_blocks_to_encode,
+					sizeof(rgbcx::bc1_block) * 2, sizeof(rgbcx::bc4_block), 4, 4, 1,
+					(ert::color_rgba*)&block_pixels_a[16 * first_block_to_encode], ert_alpha_p, total_modified_local_alpha,
+					unpacker_funcs::unpack_bc4_block, &block_unpackers);
+
+				std::vector<float> local_block_rgb_mse_scales(num_blocks_to_encode);
+				for (int i = 0; i < num_blocks_to_encode; i++)
+					local_block_rgb_mse_scales[i] = block_rgb_mse_scales[first_block_to_encode + i];
+
+				ert::reduce_entropy((uint8_t*)&m_packed_image16[first_block_to_encode] + sizeof(rgbcx::bc1_block), num_blocks_to_encode,
+					sizeof(rgbcx::bc1_block) * 2, sizeof(rgbcx::bc1_block), 4, 4, 3,
+					(ert::color_rgba*)&block_pixels[16 * first_block_to_encode], ert_p, total_modified_local_rgb,
+					unpacker_funcs::unpack_bc1_block, &block_unpackers,
+					m_params.m_rdo_ultrasmooth_block_handling ? &local_block_rgb_mse_scales : nullptr);
+
+#pragma omp critical
+				{
+					total_modified_rgb += total_modified_local_rgb;
+					total_modified_alpha += total_modified_local_alpha;
+				}
+			} // p
+
+			clock_t rdo_end_t = clock();
+
+			if (m_params.m_status_output)
+			{
+				printf("Total RDO time: %f secs\n", (double)(rdo_end_t - rdo_start_t) / CLOCKS_PER_SEC);
+
+				printf("Total RGB blocks modified: %u %3.2f%%\n", total_modified_rgb, total_modified_rgb * 100.0f / m_total_blocks);
+				printf("Total Alpha blocks modified: %u %3.2f%%\n", total_modified_alpha, total_modified_alpha * 100.0f / m_total_blocks);
+			}
+		}
+
+		return true;
+	}
+
+	bool rdo_bc_encoder::unpack_blocks(image_u8& unpacked_image) const
+	{
+		unpacked_image.init(get_blocks_x() * 4, get_blocks_y() * 4);
+
+		bool bc1_punchthrough_flag = false;
+		bool used_bc1_transparent_texels_for_black = false;
+
+		bool unpack_failed = false;
+				
+#pragma omp parallel for
+		for (int by = 0; by < (int)get_blocks_y(); by++)
+		{
+			for (uint32_t bx = 0; bx < get_blocks_x(); bx++)
+			{
+				const void* pBlock = (const uint8_t*)get_blocks() + (bx + by * get_blocks_x()) * get_bytes_per_block();
+
+				color_quad_u8 unpacked_pixels[16];
+				for (uint32_t i = 0; i < 16; i++)
+					unpacked_pixels[i].set(0, 0, 0, 255);
+
+				switch (m_params.m_dxgi_format)
+				{
+				case DXGI_FORMAT_BC1_UNORM:
+				{
+					const bool used_punchthrough = rgbcx::unpack_bc1(pBlock, unpacked_pixels, true, m_params.m_bc1_mode);
+
+					if (used_punchthrough)
+					{
+						bc1_punchthrough_flag = true;
+
+						const rgbcx::bc1_block* pBC1_block = (const rgbcx::bc1_block*)pBlock;
+
+						for (uint32_t y = 0; y < 4; y++)
+							for (uint32_t x = 0; x < 4; x++)
+								if (pBC1_block->get_selector(x, y) == 3)
+									used_bc1_transparent_texels_for_black = true;
+					}
+
+					break;
+				}
+				case DXGI_FORMAT_BC3_UNORM:
+				{
+					if (!rgbcx::unpack_bc3(pBlock, unpacked_pixels, m_params.m_bc1_mode))
+						bc1_punchthrough_flag = true;
+					break;
+				}
+				case DXGI_FORMAT_BC4_UNORM:
+				{
+					rgbcx::unpack_bc4(pBlock, &unpacked_pixels[0][0], 4);
+
+#if DECODE_BC4_TO_GRAYSCALE
+					for (uint32_t i = 0; i < 16; i++)
+					{
+						unpacked_pixels[i][1] = unpacked_pixels[i][0];
+						unpacked_pixels[i][2] = unpacked_pixels[i][0];
+					}
+#endif
+					break;
+				}
+				case DXGI_FORMAT_BC5_UNORM:
+				{
+					rgbcx::unpack_bc5(pBlock, &unpacked_pixels[0][0], 0, 1, 4);
+					break;
+				}
+				case DXGI_FORMAT_BC7_UNORM:
+				{
+					if (!bc7decomp::unpack_bc7((const uint8_t*)pBlock, (bc7decomp::color_rgba*)unpacked_pixels))
+					{
+						fprintf(stderr, "bc7decomp::unpack_bc7() failed!\n");
+						unpack_failed = true;
+					}
+
+					// Now unpack the block using the non-SSE reference decoder, to make sure we get the same exact unpacked bits.
+					color_quad_u8 unpacked_pixels_ref[16];
+					if (!bc7decomp_ref::unpack_bc7((const uint8_t*)pBlock, (bc7decomp::color_rgba*)unpacked_pixels_ref))
+					{
+						fprintf(stderr, "bc7decomp::unpack_bc7_ref() failed!\n");
+						unpack_failed = true;
+					}
+
+					if (memcmp(unpacked_pixels, unpacked_pixels_ref, sizeof(unpacked_pixels)) != 0)
+					{
+						fprintf(stderr, "BC7 unpack verification failed!\n");
+						unpack_failed = true;
+					}
+
+					break;
+				}
+				default:
+					assert(0);
+					break;
+				}
+
+				unpacked_image.set_block(bx, by, 4, 4, unpacked_pixels);
+			} // bx
+		} // by
+
+		if (unpack_failed)
+			return false;
+
+		// Sanity check the BC1/BC3 output
+		if (m_params.m_dxgi_format == DXGI_FORMAT_BC3_UNORM)
+		{
+			if (bc1_punchthrough_flag)
+				fprintf(stderr, "WARNING: BC3 mode selected, but rgbcx::unpack_bc3() returned one or more blocks using 3-color mode!\n");
+		}
+		else if (m_params.m_dxgi_format == DXGI_FORMAT_BC1_UNORM)
+		{
+			if ((bc1_punchthrough_flag) && (!m_params.m_use_bc1_3color_mode))
+				fprintf(stderr, "WARNING: BC1 output used 3-color mode, when this was disabled!\n");
+
+			if ((used_bc1_transparent_texels_for_black) && (!used_bc1_transparent_texels_for_black))
+				fprintf(stderr, "WARNING: BC1 output used the transparent selector for black, when this was disabled!\n");
+		}
+
+		if (m_params.m_status_output)
+		{
+			if ((m_params.m_dxgi_format == DXGI_FORMAT_BC1_UNORM) || (m_params.m_dxgi_format == DXGI_FORMAT_BC3_UNORM))
+				printf("Output used 3-color mode: %u, output used transparent texels for black: %u\n", bc1_punchthrough_flag, used_bc1_transparent_texels_for_black);
+		}
+
+		return true;
+	}
+
+} // namespace rdo_bc
diff --git a/libkram/bc7enc/rdo_bc_encoder.h b/libkram/bc7enc/rdo_bc_encoder.h
new file mode 100644
index 00000000..469211e1
--- /dev/null
+++ b/libkram/bc7enc/rdo_bc_encoder.h
@@ -0,0 +1,269 @@
+// rdo_bc_encoder.h
+#pragma once
+
+#ifndef SUPPORT_BC7E
+#define SUPPORT_BC7E 0
+#endif
+
+#include "utils.h"
+#include "ert.h"
+
+#include "bc7decomp.h"
+#include "rgbcx.h"
+
+#include "bc7enc.h"
+
+#if SUPPORT_BC7E
+#include "bc7e_ispc.h"
+#endif
+
+//#include "dds_defs.h"
+
+// TODO: code below doesn't handle srgb case
+enum DXGI_FORMAT
+{
+    DXGI_FORMAT_BC1_UNORM = 71,
+    DXGI_FORMAT_BC1_UNORM_SRGB = 72,
+    DXGI_FORMAT_BC2_UNORM = 74,
+    DXGI_FORMAT_BC2_UNORM_SRGB = 75,
+    DXGI_FORMAT_BC3_UNORM = 77,
+    DXGI_FORMAT_BC3_UNORM_SRGB = 78,
+    DXGI_FORMAT_BC4_UNORM = 80,
+    DXGI_FORMAT_BC4_SNORM = 81,
+    DXGI_FORMAT_BC5_UNORM = 83,
+    DXGI_FORMAT_BC5_SNORM = 84,
+    DXGI_FORMAT_BC6H_UF16 = 95,
+    DXGI_FORMAT_BC6H_SF16 = 96,
+    DXGI_FORMAT_BC7_UNORM = 98,
+    DXGI_FORMAT_BC7_UNORM_SRGB = 99,
+};
+
+namespace rdo_bc
+{
+
+	struct rdo_bc_params
+	{
+		rdo_bc_params()
+		{
+			clear();
+		}
+
+		void clear()
+		{
+			m_bc7_uber_level = 6; // BC7ENC_MAX_UBER_LEVEL;
+			m_bc7enc_max_partitions_to_scan = BC7ENC_MAX_PARTITIONS;
+			m_perceptual = false;
+			m_y_flip = false;
+			m_bc45_channel0 = 0;
+			m_bc45_channel1 = 1;
+
+			m_bc1_mode = rgbcx::bc1_approx_mode::cBC1Ideal;
+			m_use_bc1_3color_mode = true;
+
+			// We're just turning this on by default now, like NVDXT.EXE used to do back in the old original Xbox days.
+			m_use_bc1_3color_mode_for_black = true; // false; 
+
+			m_bc1_quality_level = rgbcx::MAX_LEVEL;
+
+			m_dxgi_format = DXGI_FORMAT_BC7_UNORM;
+
+			m_rdo_lambda = 0.0f;
+			m_rdo_debug_output = false;
+			m_rdo_smooth_block_error_scale = 15.0f;
+			m_custom_rdo_smooth_block_error_scale = false;
+			m_lookback_window_size = 128;
+			m_custom_lookback_window_size = false;
+			m_bc7enc_rdo_bc7_quant_mode6_endpoints = true;
+			m_bc7enc_rdo_bc7_weight_modes = true;
+			m_bc7enc_rdo_bc7_weight_low_frequency_partitions = true;
+			m_bc7enc_rdo_bc7_pbit1_weighting = true;
+			m_rdo_max_smooth_block_std_dev = 18.0f;
+			m_rdo_allow_relative_movement = false;
+			m_rdo_try_2_matches = true;
+			m_rdo_ultrasmooth_block_handling = true;
+
+			m_use_hq_bc345 = true;
+			m_bc345_search_rad = 5;
+			m_bc345_mode_mask = rgbcx::BC4_USE_ALL_MODES;
+
+			m_bc7enc_mode6_only = false;
+			m_rdo_multithreading = true;
+
+			m_bc7enc_reduce_entropy = false;
+
+			m_use_bc7e = false;
+
+#if SUPPORT_BC7E
+			// By default, if they've compiled in BC7E.ispc, then use that. In a rate distortion sense it's better overall.
+			// https://richg42.blogspot.com/2021/02/average-rate-distortion-curves-for.html
+			m_use_bc7e = true;
+#endif
+						
+			m_status_output = false;
+			
+			m_rdo_max_threads = 128;
+		}
+
+		int m_bc7_uber_level;
+		int m_bc7enc_max_partitions_to_scan;
+		bool m_perceptual;
+		bool m_y_flip;
+		uint32_t m_bc45_channel0;
+		uint32_t m_bc45_channel1;
+
+		rgbcx::bc1_approx_mode m_bc1_mode;
+		bool m_use_bc1_3color_mode;
+
+		bool m_use_bc1_3color_mode_for_black;
+
+		int m_bc1_quality_level;
+
+		DXGI_FORMAT m_dxgi_format;
+
+		float m_rdo_lambda;
+		bool m_rdo_debug_output;
+		float m_rdo_smooth_block_error_scale;
+		bool m_custom_rdo_smooth_block_error_scale;
+		uint32_t m_lookback_window_size;
+		bool m_custom_lookback_window_size;
+		bool m_bc7enc_rdo_bc7_quant_mode6_endpoints;
+		bool m_bc7enc_rdo_bc7_weight_modes;
+		bool m_bc7enc_rdo_bc7_weight_low_frequency_partitions;
+		bool m_bc7enc_rdo_bc7_pbit1_weighting;
+		float m_rdo_max_smooth_block_std_dev;
+		bool m_rdo_allow_relative_movement;
+		bool m_rdo_try_2_matches;
+		bool m_rdo_ultrasmooth_block_handling;
+
+		bool m_use_hq_bc345;
+		int m_bc345_search_rad;
+		uint32_t m_bc345_mode_mask;
+
+		bool m_bc7enc_mode6_only;
+		bool m_rdo_multithreading;
+
+		bool m_bc7enc_reduce_entropy;
+
+		bool m_use_bc7e;
+		bool m_status_output;
+		
+		uint32_t m_rdo_max_threads;
+	};
+
+	class rdo_bc_encoder
+	{
+	public:
+		rdo_bc_encoder();
+
+		void clear();
+
+		bool init(const utils::image_u8& src_image, rdo_bc_params& params);
+		bool encode();
+
+		const rdo_bc_params &get_params() const { return m_params; }
+
+		const utils::image_u8* get_orig_source_image() const { return m_pOrig_source_image; }
+		const utils::image_u8& get_source_image() const { return m_source_image; }
+
+		const void* get_prerdo_blocks() const { return m_prerdo_packed_image8.size() ? (void*)m_prerdo_packed_image8.data() : (void*)m_prerdo_packed_image16.data(); }
+		const void* get_blocks() const { return m_packed_image8.size() ? (void*)m_packed_image8.data() : (void*)m_packed_image16.data(); }
+
+		bool unpack_blocks(utils::image_u8& unpacked_image) const;
+
+		DXGI_FORMAT get_pixel_format() const { return m_params.m_dxgi_format; }
+
+		uint32_t get_orig_width() const { return m_orig_width; }
+		uint32_t get_orig_height() const { return m_orig_height; }
+		uint32_t get_blocks_x() const { return m_blocks_x; }
+		uint32_t get_blocks_y() const { return m_blocks_y; }
+		uint32_t get_total_blocks() const { return m_total_blocks; }
+		uint32_t get_total_blocks_size_in_bytes() const { return m_total_blocks * m_bytes_per_block; }
+		uint32_t get_bytes_per_block() const { return m_bytes_per_block; }
+		uint32_t get_pixel_format_bpp() const { return m_pixel_format_bpp; }
+		uint32_t get_total_texels() const { return m_total_texels; }
+		bool get_has_alpha() const { return m_has_alpha; }
+								
+	private:
+		const utils::image_u8* m_pOrig_source_image;
+		utils::image_u8 m_source_image;
+		rdo_bc_params m_params;
+
+		uint32_t m_orig_width, m_orig_height;
+		uint32_t m_blocks_x, m_blocks_y, m_total_blocks, m_bytes_per_block, m_pixel_format_bpp;
+		uint32_t m_total_texels;
+		bool m_has_alpha;
+
+		utils::block8_vec m_packed_image8;
+		utils::block16_vec m_packed_image16;
+
+		utils::block8_vec m_prerdo_packed_image8;
+		utils::block16_vec m_prerdo_packed_image16;
+
+		bc7enc_compress_block_params m_bc7enc_pack_params;
+#if SUPPORT_BC7E
+		ispc::bc7e_compress_block_params m_bc7e_pack_params;
+#endif
+
+		void init_encoders();
+		bool init_source_image();
+		bool init_encoder_params();
+		bool encode_texture();
+
+		struct unpacker_funcs
+		{
+			rgbcx::bc1_approx_mode m_mode;
+			bool m_allow_3color_mode;
+			bool m_use_bc1_3color_mode_for_black;
+
+			static bool unpack_bc1_block(const void* pBlock, ert::color_rgba* pPixels, uint32_t block_index, void* pUser_data)
+			{
+				(void)block_index;
+				const unpacker_funcs* pState = (const unpacker_funcs*)pUser_data;
+
+				bool used_3color_mode = rgbcx::unpack_bc1(pBlock, pPixels, true, pState->m_mode);
+
+				if (used_3color_mode)
+				{
+					if (!pState->m_allow_3color_mode)
+						return false;
+
+					if (!pState->m_use_bc1_3color_mode_for_black)
+					{
+						rgbcx::bc1_block* pBC1_block = (rgbcx::bc1_block*)pBlock;
+
+						for (uint32_t y = 0; y < 4; y++)
+						{
+							for (uint32_t x = 0; x < 4; x++)
+							{
+								if (pBC1_block->get_selector(x, y) == 3)
+									return false;
+							} // x
+						} // y
+					}
+				}
+
+				return true;
+			}
+
+			// TODO: Enforce 6/8 color constraints
+			static bool unpack_bc4_block(const void* pBlock, ert::color_rgba* pPixels, uint32_t block_index, void* pUser_data)
+			{
+				(void)block_index;
+				(void)pUser_data;
+				memset(pPixels, 0, sizeof(ert::color_rgba) * 16);
+				rgbcx::unpack_bc4(pBlock, (uint8_t*)pPixels, 4);
+				return true;
+			}
+
+			static bool unpack_bc7_block(const void* pBlock, ert::color_rgba* pPixels, uint32_t block_index, void* pUser_data)
+			{
+				(void)block_index;
+				(void)pUser_data;
+				return bc7decomp::unpack_bc7(pBlock, (bc7decomp::color_rgba*)pPixels);
+			}
+		};
+
+		bool postprocess_rdo();
+	};
+		
+} // namespace rdo_bc
diff --git a/libkram/bc7enc/rgbcx.cpp b/libkram/bc7enc/rgbcx.cpp
new file mode 100644
index 00000000..b0c40880
--- /dev/null
+++ b/libkram/bc7enc/rgbcx.cpp
@@ -0,0 +1,3083 @@
+// rgbcx.cpp - see license at end of rgbcx.h
+#include "rgbcx.h"
+#include <string.h>
+#include <math.h>
+#include <vector>
+
+namespace rgbcx
+{
+	//const uint8_t g_bc1_to_linear[4] = { 0, 3, 1, 2 };
+
+	const uint32_t NUM_UNIQUE_TOTAL_ORDERINGS4 = 969;
+
+#ifdef _MSC_VER
+#pragma region
+#endif
+	// All total orderings for 16 pixels 2-bit selectors.
+	// BC1 selector order 0, 2, 3, 1 (i.e. the selectors are reordered into linear order).
+	static uint8_t g_unique_total_orders4[NUM_UNIQUE_TOTAL_ORDERINGS4][4] =
+	{
+		{0,8,2,6},{4,3,9,0},{4,8,1,3},{12,0,3,1},{11,3,2,0},{6,4,6,0},{7,5,0,4},{6,0,8,2},{1,0,0,15},{3,0,8,5},{1,1,13,1},{13,1,2,0},{0,14,1,1},{0,15,1,0},{0,13,0,3},{16,0,0,0},{4,3,4,5},{8,6,0,2},{0,10,0,6},{10,0,4,2},{7,2,1,6},{4,7,5,0},{1,4,7,4},{0,14,2,0},{2,7,2,5},{9,0,5,2},{9,2,2,3},{10,0,5,1},{2,3,7,4},{4,9,0,3},{1,5,0,10},{1,1,6,8},
+		{6,6,4,0},{11,5,0,0},{11,2,0,3},{4,0,10,2},{2,3,10,1},{1,13,1,1},{0,14,0,2},{2,3,3,8},{12,3,1,0},{14,0,0,2},{9,1,3,3},{6,4,0,6},{1,1,5,9},{5,9,0,2},{2,10,1,3},{12,0,0,4},{4,6,6,0},{0,6,4,6},{3,7,4,2},{0,13,3,0},{3,10,0,3},{10,2,1,3},{1,12,1,2},{2,0,13,1},{11,0,5,0},{12,1,3,0},{6,4,5,1},{10,4,2,0},{3,6,1,6},{7,3,6,0},{10,4,0,2},{10,0,2,4},
+		{0,5,9,2},{0,9,3,4},{6,4,2,4},{3,4,7,2},{3,3,5,5},{4,2,9,1},{6,2,8,0},{3,5,3,5},{4,10,1,1},{10,1,3,2},{5,7,0,4},{5,3,7,1},{6,8,1,1},{8,8,0,0},{11,1,0,4},{14,1,0,1},{9,3,2,2},{8,2,1,5},{0,0,2,14},{3,3,9,1},{10,1,5,0},{8,3,1,4},{1,5,8,2},{6,1,9,0},{3,2,1,10},{3,11,1,1},{7,6,3,0},{9,0,3,4},{5,2,5,4},{0,2,3,11},{15,0,0,1},{0,6,6,4},
+		{3,4,9,0},{4,7,0,5},{0,4,4,8},{0,13,2,1},{2,4,1,9},{3,2,5,6},{10,6,0,0},{3,5,6,2},{8,0,4,4},{1,3,6,6},{7,7,0,2},{6,1,4,5},{0,11,1,4},{2,2,8,4},{0,1,2,13},{15,0,1,0},{7,2,6,1},{8,1,7,0},{1,8,4,3},{2,13,1,0},{1,0,7,8},{14,2,0,0},{1,8,1,6},{9,3,3,1},{0,0,7,9},{4,4,1,7},{9,0,6,1},{10,2,4,0},{1,7,3,5},{0,3,8,5},{5,2,4,5},{1,2,5,8},
+		{0,8,7,1},{10,3,2,1},{12,0,4,0},{2,1,4,9},{5,2,2,7},{1,9,3,3},{15,1,0,0},{6,3,4,3},{9,5,0,2},{1,6,9,0},{6,6,0,4},{13,2,1,0},{5,1,8,2},{0,5,11,0},{7,1,0,8},{1,2,12,1},{0,3,3,10},{7,4,2,3},{5,1,4,6},{7,0,3,6},{3,12,0,1},{3,4,5,4},{1,10,0,5},{7,4,3,2},{10,5,0,1},{13,3,0,0},{2,5,4,5},{3,10,1,2},{5,1,2,8},{14,0,1,1},{1,5,4,6},{1,4,5,6},
+		{2,3,11,0},{11,0,4,1},{11,2,2,1},{5,3,8,0},{1,3,10,2},{0,1,13,2},{3,1,4,8},{4,2,4,6},{1,5,6,4},{2,1,11,2},{1,2,9,4},{4,7,3,2},{6,2,5,3},{7,2,2,5},{8,1,4,3},{3,2,8,3},{12,1,0,3},{7,8,1,0},{7,0,2,7},{5,10,0,1},{0,2,14,0},{2,9,3,2},{7,0,0,9},{11,1,4,0},{10,4,1,1},{2,2,9,3},{5,7,2,2},{1,3,1,11},{13,2,0,1},{4,2,8,2},{2,3,1,10},{4,2,5,5},
+		{7,0,7,2},{10,0,0,6},{0,8,5,3},{4,4,0,8},{12,4,0,0},{0,1,14,1},{8,0,1,7},{5,1,5,5},{11,0,3,2},{0,4,1,11},{0,8,8,0},{0,2,5,9},{7,3,2,4},{7,8,0,1},{1,0,3,12},{7,4,5,0},{1,6,7,2},{7,6,1,2},{9,6,1,0},{12,2,0,2},{4,1,6,5},{4,0,1,11},{8,4,4,0},{13,0,1,2},{8,6,2,0},{4,12,0,0},{2,7,5,2},{2,0,5,9},{5,4,5,2},{3,8,5,0},{7,3,3,3},{4,4,8,0},
+		{2,1,3,10},{5,0,1,10},{6,4,3,3},{4,9,1,2},{1,4,0,11},{11,3,1,1},{4,0,12,0},{13,0,0,3},{6,1,6,3},{9,0,4,3},{8,0,0,8},{8,4,0,4},{0,12,1,3},{0,4,10,2},{3,4,8,1},{1,3,8,4},{9,2,5,0},{5,7,4,0},{1,0,11,4},{4,10,0,2},{1,3,12,0},{6,9,0,1},{5,0,9,2},{5,9,2,0},{13,1,0,2},{9,3,4,0},{9,4,0,3},{3,1,12,0},{2,4,3,7},{1,2,13,0},{2,2,4,8},{6,8,0,2},
+		{9,2,1,4},{9,5,1,1},{2,0,4,10},{5,4,0,7},{0,0,6,10},{1,2,0,13},{4,7,2,3},{6,5,5,0},{3,3,1,9},{1,6,1,8},{12,2,1,1},{4,4,5,3},{1,0,6,9},{0,6,10,0},{4,8,3,1},{4,3,2,7},{2,1,7,6},{1,9,1,5},{3,1,3,9},{8,7,1,0},{1,2,3,10},{14,1,1,0},{5,4,4,3},{3,7,0,6},{7,4,1,4},{3,7,5,1},{1,1,0,14},{0,10,3,3},{0,4,3,9},{1,7,7,1},{2,0,10,4},{5,8,0,3},
+		{6,7,3,0},{0,8,4,4},{5,7,3,1},{7,9,0,0},{7,6,2,1},{0,4,5,7},{6,3,5,2},{1,2,1,12},{5,2,0,9},{8,5,0,3},{4,6,1,5},{1,1,7,7},{10,5,1,0},{1,2,8,5},{1,8,2,5},{5,1,0,10},{6,9,1,0},{13,0,2,1},{8,3,5,0},{6,3,6,1},{2,11,3,0},{3,7,3,3},{1,5,2,8},{7,5,2,2},{0,6,7,3},{13,1,1,1},{5,3,4,4},{7,2,7,0},{5,8,3,0},{3,13,0,0},{0,7,9,0},{8,0,3,5},
+		{1,3,7,5},{4,0,2,10},{12,0,1,3},{1,7,6,2},{3,9,0,4},{7,2,0,7},{0,1,7,8},{2,1,8,5},{0,13,1,2},{0,8,1,7},{5,0,11,0},{5,6,2,3},{0,3,0,13},{2,3,4,7},{5,6,3,2},{4,2,10,0},{3,3,7,3},{7,2,5,2},{1,1,11,3},{12,3,0,1},{5,1,1,9},{1,15,0,0},{9,7,0,0},{9,1,2,4},{0,7,3,6},{3,0,13,0},{3,0,11,2},{0,6,5,5},{8,2,2,4},{6,10,0,0},{4,8,4,0},{0,0,3,13},
+		{0,4,12,0},{7,1,6,2},{3,5,0,8},{8,0,6,2},{6,2,3,5},{2,10,0,4},{4,11,0,1},{6,1,5,4},{5,1,3,7},{0,11,3,2},{4,6,0,6},{2,6,0,8},{3,1,7,5},{2,14,0,0},{2,9,2,3},{0,3,4,9},{11,0,1,4},{13,0,3,0},{8,3,0,5},{0,5,3,8},{5,11,0,0},{0,1,4,11},{2,1,9,4},{3,4,4,5},{7,1,2,6},{12,2,2,0},{9,4,1,2},{6,0,2,8},{4,6,2,4},{11,2,3,0},{3,2,2,9},{10,3,1,2},
+		{1,1,2,12},{0,5,2,9},{0,1,11,4},{6,2,4,4},{2,8,2,4},{0,9,4,3},{11,0,2,3},{0,2,11,3},{6,0,7,3},{0,3,6,7},{4,5,5,2},{1,2,6,7},{7,5,1,3},{9,0,2,5},{2,6,4,4},{4,1,9,2},{4,8,2,2},{1,12,3,0},{0,9,6,1},{0,10,6,0},{3,1,5,7},{2,13,0,1},{2,2,1,11},{3,6,0,7},{5,6,5,0},{5,5,4,2},{4,0,3,9},{3,4,1,8},{0,11,2,3},{2,12,1,1},{7,1,3,5},{7,0,9,0},
+		{8,0,8,0},{1,0,2,13},{3,3,10,0},{2,4,4,6},{2,3,8,3},{1,10,5,0},{7,3,0,6},{2,9,0,5},{1,4,6,5},{6,6,3,1},{5,6,0,5},{6,3,0,7},{3,10,2,1},{2,5,5,4},{3,8,4,1},{1,14,0,1},{10,3,3,0},{3,5,7,1},{1,1,3,11},{2,4,0,10},{9,3,1,3},{5,10,1,0},{3,0,6,7},{3,1,9,3},{11,2,1,2},{5,3,3,5},{0,5,1,10},{4,1,11,0},{10,2,0,4},{7,6,0,3},{2,7,0,7},{4,2,2,8},
+		{6,1,7,2},{4,9,2,1},{0,0,8,8},{3,7,2,4},{9,6,0,1},{0,12,4,0},{6,7,1,2},{0,7,2,7},{1,0,10,5},{0,0,14,2},{2,7,3,4},{5,0,0,11},{7,7,1,1},{6,2,7,1},{4,5,3,4},{3,5,1,7},{5,9,1,1},{6,2,1,7},{3,2,0,11},{0,11,0,5},{3,11,2,0},{10,1,4,1},{7,0,4,5},{11,4,0,1},{10,3,0,3},{0,2,4,10},{0,15,0,1},{0,11,5,0},{6,7,2,1},{1,12,2,1},{4,1,3,8},{1,0,13,2},
+		{1,8,5,2},{7,0,1,8},{3,12,1,0},{9,2,4,1},{1,7,4,4},{11,4,1,0},{4,3,8,1},{2,8,4,2},{1,11,3,1},{1,1,4,10},{4,10,2,0},{8,2,5,1},{1,0,9,6},{5,3,2,6},{0,9,7,0},{10,2,2,2},{5,8,1,2},{8,7,0,1},{0,3,12,1},{1,0,1,14},{4,8,0,4},{3,8,0,5},{4,6,5,1},{0,9,5,2},{10,2,3,1},{2,3,9,2},{1,0,12,3},{11,3,0,2},{4,5,2,5},{0,2,12,2},{9,1,0,6},{9,2,0,5},
+		{1,2,7,6},{4,7,4,1},{0,12,2,2},{0,0,0,16},{2,8,3,3},{3,6,2,5},{0,6,3,7},{7,5,4,0},{3,3,3,7},{3,3,0,10},{5,0,6,5},{0,0,10,6},{8,5,3,0},{8,1,5,2},{6,0,9,1},{11,1,2,2},{2,11,2,1},{9,5,2,0},{3,0,4,9},{2,2,12,0},{2,6,6,2},{2,1,13,0},{6,0,5,5},{2,0,14,0},{2,11,1,2},{4,4,7,1},{2,0,11,3},{3,1,1,11},{2,9,4,1},{3,7,6,0},{14,0,2,0},{1,10,4,1},
+		{8,0,7,1},{3,6,5,2},{0,3,11,2},{2,5,6,3},{11,1,3,1},{6,5,3,2},{3,8,1,4},{0,2,7,7},{2,10,2,2},{1,6,2,7},{11,0,0,5},{12,1,1,2},{12,1,2,1},{0,7,1,8},{0,3,9,4},{0,2,1,13},{7,1,4,4},{10,1,0,5},{4,0,8,4},{5,2,7,2},{0,2,0,14},{4,3,7,2},{2,7,1,6},{1,2,2,11},{6,3,3,4},{1,14,1,0},{2,4,6,4},{5,3,6,2},{5,3,5,3},{8,4,1,3},{1,3,0,12},{3,5,2,6},
+		{1,8,7,0},{0,7,4,5},{2,1,6,7},{4,11,1,0},{7,2,4,3},{6,1,3,6},{4,5,4,3},{2,11,0,3},{1,5,7,3},{12,0,2,2},{5,0,4,7},{1,13,0,2},{7,7,2,0},{4,1,7,4},{4,5,0,7},{5,0,5,6},{6,5,4,1},{2,4,2,8},{1,10,1,4},{6,3,1,6},{3,3,8,2},{0,7,7,2},{4,4,2,6},{1,1,8,6},{1,12,0,3},{2,1,12,1},{1,9,2,4},{1,11,0,4},{2,5,2,7},{10,0,3,3},{4,6,3,3},{3,7,1,5},
+		{1,9,0,6},{7,1,7,1},{1,6,5,4},{9,2,3,2},{6,2,2,6},{2,2,2,10},{8,3,3,2},{0,1,8,7},{2,0,8,6},{0,3,1,12},{9,4,2,1},{9,4,3,0},{6,2,6,2},{1,8,0,7},{5,1,10,0},{0,5,5,6},{8,2,4,2},{2,3,2,9},{6,0,3,7},{2,2,6,6},{2,6,2,6},{1,13,2,0},{9,3,0,4},{7,3,5,1},{6,5,2,3},{5,2,6,3},{2,0,12,2},{5,7,1,3},{8,1,3,4},{3,1,10,2},{1,0,15,0},{0,8,0,8},
+		{5,0,7,4},{4,4,6,2},{0,1,0,15},{10,0,1,5},{7,3,4,2},{4,9,3,0},{2,5,7,2},{3,4,2,7},{8,3,2,3},{5,1,6,4},{0,10,2,4},{6,6,1,3},{6,0,0,10},{4,4,3,5},{1,3,9,3},{7,5,3,1},{3,0,7,6},{1,8,6,1},{4,3,0,9},{3,11,0,2},{6,0,6,4},{0,1,3,12},{0,4,2,10},{5,5,6,0},{4,1,4,7},{8,1,6,1},{5,6,4,1},{8,4,2,2},{4,3,1,8},{3,0,2,11},{1,11,4,0},{0,8,3,5},
+		{5,1,7,3},{7,0,8,1},{4,3,5,4},{4,6,4,2},{3,2,4,7},{1,6,3,6},{0,7,8,1},{3,0,1,12},{9,1,4,2},{7,4,0,5},{1,7,0,8},{5,4,1,6},{9,1,5,1},{1,1,9,5},{4,1,1,10},{5,3,0,8},{2,2,5,7},{4,0,0,12},{9,0,7,0},{3,4,0,9},{0,2,6,8},{8,2,0,6},{3,2,6,5},{4,2,6,4},{3,6,4,3},{2,8,6,0},{5,0,3,8},{0,4,0,12},{0,16,0,0},{0,9,2,5},{4,0,11,1},{1,6,4,5},
+		{0,1,6,9},{3,4,6,3},{3,0,10,3},{7,0,6,3},{1,4,9,2},{1,5,3,7},{8,5,2,1},{0,12,0,4},{7,2,3,4},{0,5,6,5},{11,1,1,3},{6,5,0,5},{2,1,5,8},{1,4,11,0},{9,1,1,5},{0,0,13,3},{5,8,2,1},{2,12,0,2},{3,3,6,4},{4,1,10,1},{4,0,5,7},{8,1,0,7},{5,1,9,1},{4,3,3,6},{0,2,2,12},{6,3,2,5},{0,0,12,4},{1,5,1,9},{2,6,5,3},{3,6,3,4},{2,12,2,0},{1,6,8,1},
+		{10,1,1,4},{1,3,4,8},{7,4,4,1},{1,11,1,3},{1,2,10,3},{3,9,3,1},{8,5,1,2},{2,10,4,0},{4,2,0,10},{2,7,6,1},{8,2,3,3},{1,5,5,5},{3,1,0,12},{3,10,3,0},{8,0,5,3},{0,6,8,2},{0,3,13,0},{0,0,16,0},{1,9,4,2},{4,1,8,3},{1,6,6,3},{0,10,5,1},{0,1,12,3},{4,0,6,6},{3,8,3,2},{0,5,4,7},{1,0,14,1},{0,4,6,6},{3,9,1,3},{3,5,8,0},{3,6,6,1},{5,4,7,0},
+		{3,0,12,1},{8,6,1,1},{2,9,5,0},{6,1,1,8},{4,1,2,9},{3,9,4,0},{5,2,9,0},{0,12,3,1},{1,4,10,1},{4,0,7,5},{3,1,2,10},{5,4,2,5},{5,5,5,1},{4,2,3,7},{1,7,5,3},{2,8,0,6},{8,1,2,5},{3,8,2,3},{6,1,2,7},{3,9,2,2},{9,0,0,7},{0,8,6,2},{8,4,3,1},{0,2,8,6},{6,5,1,4},{2,3,5,6},{2,10,3,1},{0,7,0,9},{4,2,7,3},{2,4,8,2},{7,1,1,7},{2,4,7,3},
+		{2,4,10,0},{0,1,10,5},{4,7,1,4},{0,10,4,2},{9,0,1,6},{1,9,6,0},{3,3,4,6},{4,5,7,0},{5,5,2,4},{2,8,1,5},{2,3,6,5},{0,1,1,14},{3,2,3,8},{10,1,2,3},{9,1,6,0},{3,4,3,6},{2,2,0,12},{0,0,9,7},{4,0,9,3},{7,0,5,4},{4,5,6,1},{2,5,1,8},{2,5,9,0},{3,5,4,4},{1,3,11,1},{7,1,5,3},{3,2,7,4},{1,4,2,9},{1,11,2,2},{2,2,3,9},{5,0,10,1},{3,2,11,0},
+		{1,10,3,2},{8,3,4,1},{3,6,7,0},{0,7,5,4},{1,3,3,9},{2,2,10,2},{1,9,5,1},{0,5,0,11},{3,0,3,10},{0,4,8,4},{2,7,7,0},{2,0,2,12},{1,2,11,2},{6,3,7,0},{0,6,2,8},{0,10,1,5},{0,9,0,7},{6,4,4,2},{6,0,1,9},{1,5,10,0},{5,4,6,1},{5,5,3,3},{0,0,4,12},{0,3,2,11},{1,4,1,10},{3,0,9,4},{5,5,0,6},{1,7,8,0},{2,0,3,11},{6,4,1,5},{10,0,6,0},{0,6,0,10},
+		{0,4,11,1},{3,1,6,6},{2,5,8,1},{0,2,10,4},{3,1,11,1},{6,6,2,2},{1,1,10,4},{2,1,2,11},{6,1,8,1},{0,2,13,1},{0,7,6,3},{6,8,2,0},{3,0,0,13},{4,4,4,4},{6,2,0,8},{7,3,1,5},{0,11,4,1},{6,7,0,3},{2,6,3,5},{5,2,1,8},{7,1,8,0},{5,5,1,5},{1,8,3,4},{8,2,6,0},{6,0,10,0},{5,6,1,4},{1,4,4,7},{2,7,4,3},{1,4,8,3},{5,4,3,4},{1,10,2,3},{2,9,1,4},
+		{2,2,11,1},{2,5,0,9},{0,0,1,15},{0,0,11,5},{0,4,7,5},{0,1,15,0},{2,1,0,13},{0,3,10,3},{8,0,2,6},{3,3,2,8},{3,5,5,3},{1,7,1,7},{1,3,2,10},{4,0,4,8},{2,0,9,5},{1,1,1,13},{2,2,7,5},{2,1,10,3},{4,2,1,9},{4,3,6,3},{1,3,5,7},{2,5,3,6},{1,0,8,7},{5,0,2,9},{2,8,5,1},{1,6,0,9},{0,0,5,11},{0,4,9,3},{2,0,7,7},{1,7,2,6},{2,1,1,12},{2,4,9,1},
+		{0,5,7,4},{6,0,4,6},{3,2,10,1},{0,6,1,9},{2,6,1,7},{0,5,8,3},{4,1,0,11},{1,2,4,9},{4,1,5,6},{6,1,0,9},{1,4,3,8},{4,5,1,6},{1,0,5,10},{5,3,1,7},{0,9,1,6},{2,0,1,13},{2,0,6,8},{8,1,1,6},{1,5,9,1},{0,6,9,1},{0,3,5,8},{0,2,9,5},{5,2,8,1},{1,1,14,0},{3,2,9,2},{5,0,8,3},{0,5,10,1},{5,2,3,6},{2,6,7,1},{2,3,0,11},{0,1,9,6},{1,0,4,11},
+		{3,0,5,8},{0,0,15,1},{2,4,5,5},{0,3,7,6},{2,0,0,14},{1,1,12,2},{2,6,8,0},{3,1,8,4},{0,1,5,10}
+	};
+
+	// All total orderings for 16 pixels [0,2] 2-bit selectors.
+	// BC1 selector order: 0, 1, 2
+	// Note this is different from g_unique_total_orders4[], which reorders the selectors into linear order.
+	const uint32_t NUM_UNIQUE_TOTAL_ORDERINGS3 = 153;
+	static uint8_t g_unique_total_orders3[NUM_UNIQUE_TOTAL_ORDERINGS3][3] =
+	{
+		{6,0,10},{3,6,7},{3,0,13},{13,3,0},{12,4,0},{9,1,6},{2,13,1},{4,7,5},{7,5,4},{9,6,1},{7,4,5},{8,6,2},{16,0,0},{10,6,0},{2,7,7},
+		{0,0,16},{0,3,13},{1,15,0},{0,2,14},{1,4,11},{15,1,0},{1,12,3},{9,2,5},{14,1,1},{8,2,6},{3,3,10},{4,2,10},{14,0,2},{0,14,2},{1,7,8},{6,6,4},
+		{11,5,0},{6,4,6},{11,3,2},{4,3,9},{7,1,8},{10,4,2},{12,1,3},{11,0,5},{9,3,4},{1,0,15},{9,0,7},{2,6,8},{12,2,2},{6,2,8},{6,8,2},{15,0,1},
+		{4,8,4},{0,4,12},{8,5,3},{5,9,2},{11,2,3},{12,3,1},{6,3,7},{1,1,14},{2,9,5},{1,8,7},{4,10,2},{7,7,2},{13,1,2},{0,15,1},{3,2,11},{7,0,9},
+		{4,4,8},{3,8,5},{0,5,11},{13,2,1},{1,10,5},{4,11,1},{3,10,3},{5,10,1},{10,2,4},{0,6,10},{14,2,0},{11,4,1},{3,12,1},{1,13,2},{1,5,10},{5,11,0},
+		{12,0,4},{8,1,7},{6,10,0},{3,13,0},{7,2,7},{0,7,9},{5,8,3},{0,12,4},{11,1,4},{13,0,3},{0,16,0},{5,7,4},{10,3,3},{10,0,6},{0,13,3},{4,6,6},
+		{2,8,6},{2,5,9},{7,8,1},{2,1,13},{2,0,14},{7,3,6},{5,1,10},{3,11,2},{5,4,7},{8,3,5},{10,5,1},{6,9,1},{1,3,12},{4,5,7},{2,2,12},{4,1,11},
+		{0,8,8},{4,12,0},{6,5,5},{8,7,1},{5,5,6},{3,7,6},{7,9,0},{4,9,3},{0,10,6},{8,0,8},{5,3,8},{10,1,5},{6,1,9},{7,6,3},{9,5,2},{0,1,15},
+		{9,7,0},{2,14,0},{3,4,9},{8,4,4},{9,4,3},{0,9,7},{1,9,6},{3,9,4},{5,2,9},{2,3,11},{5,6,5},{1,14,1},{6,7,3},{2,4,10},{2,12,2},{8,8,0},
+		{2,10,4},{4,0,12},{0,11,5},{2,11,3},{1,11,4},{3,5,8},{5,0,11},{3,1,12},{1,2,13},{1,6,9}
+	};
+
+	// For each total ordering, this table indicates which other total orderings are likely to improve quality using a least squares pass. Each array is sorted by usefulness.
+	static uint16_t g_best_total_orderings4[NUM_UNIQUE_TOTAL_ORDERINGS4][MAX_TOTAL_ORDERINGS4] =
+	{
+#if RGBCX_USE_SMALLER_TABLES
+		#include "rgbcx_table4_small.h"
+#else
+		#include "rgbcx_table4.h"
+#endif
+	};
+
+	static uint8_t g_best_total_orderings3[NUM_UNIQUE_TOTAL_ORDERINGS3][32] =
+	{
+		{ 12,1,3,5,27,2,4,38,8,7,16,18,6,10,41,79,40,23,46,9,20,88,22,37,14,19,24,126,99,119,35,11  },
+		{ 7,64,116,14,94,30,8,42,1,108,47,55,137,10,134,95,96,115,69,32,63,29,90,113,11,148,16,103,19,9,34,25  },
+		{ 12,1,0,5,3,7,4,27,8,6,38,40,41,16,18,46,9,10,20,23,79,62,14,22,88,99,37,126,92,19,120,11  },
+		{ 16,88,27,18,46,48,126,107,79,19,59,38,37,65,23,66,0,2,3,43,12,151,28,25,5,87,72,40,1,20,52,92  },
+		{ 79,48,88,16,27,65,18,38,46,19,37,4,72,33,126,41,52,0,12,92,5,1,2,107,3,77,23,91,43,51,22,74  },
+		{ 1,8,41,122,10,22,2,0,87,24,37,120,38,7,39,4,5,3,9,92,62,59,23,16,104,11,27,79,19,26,25,32  },
+		{ 2,76,99,28,40,86,93,21,138,60,6,0,17,128,145,119,98,144,141,82,147,54,67,75,5,12,27,132,146,1,38,14  },
+		{ 47,7,64,90,1,118,116,85,57,14,30,94,50,45,137,134,8,42,69,139,55,68,58,108,95,29,10,115,0,32,2,11  },
+		{ 49,8,10,30,124,11,32,113,130,58,125,9,100,53,104,115,131,103,24,7,1,39,45,36,139,0,137,22,90,44,114,105  },
+		{ 9,38,72,125,49,41,84,11,13,5,27,0,16,92,8,2,65,105,10,18,48,29,127,131,36,14,1,46,111,79,130,12  },
+		{ 130,8,10,100,104,131,49,32,53,39,30,36,113,24,11,22,124,44,83,58,7,103,1,4,9,125,5,0,91,33,115,74  },
+		{ 114,11,58,8,120,49,9,124,142,111,41,30,10,0,97,130,62,84,38,5,72,125,92,127,100,27,139,113,13,132,32,1  },
+		{ 60,46,28,27,40,20,0,17,18,2,126,16,6,38,86,23,79,54,1,93,5,88,41,14,21,111,7,48,3,84,72,62  },
+		{ 72,92,38,65,84,48,41,79,27,16,29,111,88,5,18,46,1,0,152,14,37,19,77,42,132,7,22,13,119,56,12,2  },
+		{ 7,55,1,95,29,56,64,116,143,8,14,30,47,94,152,90,65,67,10,133,42,72,146,84,16,48,6,0,25,108,77,21  },
+		{ 27,23,20,5,0,79,38,2,3,1,59,46,4,41,33,86,37,87,88,92,7,126,43,8,22,152,151,150,149,148,147,146  },
+		{ 12,0,1,2,7,6,3,5,28,4,8,14,60,40,17,19,21,86,126,93,10,18,9,29,48,99,65,25,84,119,72,41  },
+		{ 60,40,99,2,54,12,0,1,19,28,98,93,6,138,21,5,27,17,151,14,76,46,16,18,38,29,86,144,107,7,25,41  },
+		{ 12,0,1,2,3,5,6,7,4,28,8,60,14,40,16,17,21,10,19,9,86,38,126,41,93,27,29,48,62,84,79,99  },
+		{ 0,1,2,10,5,8,3,25,4,29,32,34,63,7,77,26,16,48,65,56,14,22,129,103,72,24,18,152,140,53,96,42  },
+		{ 46,126,18,54,12,16,1,0,5,2,27,98,20,23,6,3,88,48,28,7,19,8,4,60,151,38,37,21,79,14,65,40  },
+		{ 76,6,141,86,119,2,138,67,28,145,0,93,17,1,40,60,146,99,147,14,21,144,132,7,5,29,55,27,16,75,19,12  },
+		{ 71,5,51,39,22,80,0,43,10,122,8,62,41,24,104,87,35,37,2,91,33,120,36,38,1,131,9,100,130,66,3,4  },
+		{ 126,18,46,27,20,16,88,23,12,79,54,59,48,0,73,1,37,151,5,19,28,38,2,66,60,3,65,98,14,26,6,43  },
+		{ 22,10,8,5,0,71,35,80,104,39,24,51,100,1,62,32,2,130,11,41,7,9,53,43,49,83,122,120,30,44,37,38  },
+		{ 1,34,14,129,53,63,42,26,121,148,7,44,96,10,0,24,100,32,64,116,140,22,5,19,29,103,135,108,8,61,39,83  },
+		{ 1,7,34,63,44,25,135,14,24,108,22,0,83,94,5,129,35,101,47,121,2,19,42,53,6,110,103,8,148,10,16,123  },
+		{ 12,28,16,60,18,1,6,21,14,0,86,19,2,48,93,17,38,29,7,5,65,126,46,72,41,79,84,119,40,56,54,88  },
+		{ 0,2,12,27,5,46,38,40,41,79,88,99,3,23,1,62,20,4,22,37,92,35,18,8,16,24,10,60,7,120,98,54  },
+		{ 1,7,14,56,8,0,84,67,10,2,133,72,42,111,5,30,21,4,9,3,25,94,16,116,47,11,65,18,132,90,55,64  },
+		{ 30,8,124,139,45,11,58,90,113,137,7,115,10,32,1,49,94,85,9,47,108,103,0,97,63,14,50,114,53,106,100,25  },
+		{ 65,38,48,27,16,79,72,18,88,19,46,77,84,92,37,41,0,29,1,14,12,111,2,5,31,36,87,74,105,40,28,51  },
+		{ 10,8,30,113,130,100,53,32,115,103,104,7,1,121,39,49,131,44,24,36,63,137,34,45,22,90,108,83,26,11,94,139  },
+		{ 51,52,43,33,5,74,16,37,71,91,38,3,36,87,48,22,4,0,122,41,39,18,66,27,79,24,65,88,59,23,62,92  },
+		{ 1,7,63,53,108,121,94,44,103,100,14,10,129,47,32,26,24,25,148,42,135,22,0,61,83,8,39,104,5,64,115,34  },
+		{ 1,8,10,7,5,0,80,32,62,2,24,44,53,83,9,41,30,22,100,11,14,25,120,4,26,6,3,16,122,34,19,35  },
+		{ 74,4,36,48,33,91,39,79,22,16,65,5,131,38,24,71,27,52,0,105,51,18,88,104,3,31,10,37,72,19,41,130  },
+		{ 59,43,38,79,23,27,92,51,0,16,46,5,18,88,41,37,66,3,87,20,48,2,122,4,22,12,1,126,19,65,33,24  },
+		{ 12,28,1,27,0,16,2,46,65,60,21,3,5,18,6,19,48,14,4,7,79,88,86,29,22,72,93,40,23,8,17,41  },
+		{ 22,91,39,33,24,71,5,131,36,10,51,0,130,8,104,2,35,125,9,43,52,49,83,80,100,41,122,3,37,38,4,16  },
+		{ 12,0,1,2,5,3,4,8,7,27,18,38,10,6,16,46,9,20,41,23,126,79,22,14,19,99,88,54,37,48,62,35  },
+		{ 12,27,1,2,3,0,46,4,38,16,8,28,7,79,18,5,84,6,88,10,14,21,23,20,40,22,60,19,9,29,72,65  },
+		{ 1,14,7,55,95,29,8,94,30,56,10,108,77,116,152,64,32,48,63,42,143,148,16,25,137,65,11,0,115,9,19,72  },
+		{ 37,79,66,38,16,52,48,59,43,27,87,33,41,4,23,51,3,5,88,18,92,46,73,122,22,71,20,0,65,19,2,120  },
+		{ 24,32,83,22,53,1,8,10,7,30,35,5,103,0,100,101,121,113,34,123,63,2,44,25,71,115,80,14,26,108,51,39  },
+		{ 97,45,111,58,85,139,0,90,47,7,120,106,142,30,50,132,41,62,84,1,119,114,14,56,117,8,38,29,2,64,116,5  },
+		{ 12,28,16,18,1,60,6,14,2,21,0,86,126,19,48,93,7,27,17,29,5,65,54,38,72,79,84,88,119,145,8,111  },
+		{ 118,47,64,116,57,85,7,14,50,1,42,0,45,68,86,69,2,111,134,28,90,55,16,29,56,48,84,144,60,30,112,41  },
+		{ 12,1,2,0,7,6,28,5,3,4,8,14,60,21,18,40,17,86,10,9,16,29,19,93,126,79,38,84,72,27,111,119  },
+		{ 11,8,49,130,10,125,9,124,100,114,131,30,58,104,32,39,24,113,36,105,0,41,22,120,5,53,111,38,142,44,83,35  },
+		{ 50,70,47,118,85,57,106,0,45,7,64,90,81,14,2,134,28,62,86,55,69,1,78,119,68,56,18,67,16,60,29,21  },
+		{ 43,37,33,87,51,41,66,5,122,38,22,59,92,0,23,91,27,16,71,79,18,52,120,4,3,24,46,20,73,39,62,36  },
+		{ 79,48,4,16,27,88,43,33,18,38,65,37,46,3,19,51,52,22,66,87,74,5,41,91,23,59,0,71,122,72,20,92  },
+		{ 32,100,10,8,30,104,24,44,39,113,83,103,1,7,22,53,115,63,135,121,26,35,34,5,0,108,137,90,91,45,2,130  },
+		{ 0,1,2,5,16,12,6,7,14,3,19,18,29,20,4,21,40,8,17,35,23,48,126,22,25,56,26,10,98,27,38,65  },
+		{ 143,67,56,146,1,7,133,55,64,141,134,69,6,47,14,29,84,21,111,147,57,16,95,72,118,132,50,0,2,18,119,42  },
+		{ 1,7,67,14,133,111,8,84,0,21,2,47,64,132,55,10,95,147,119,42,16,5,72,56,4,3,6,29,9,25,18,30  },
+		{ 68,57,69,112,144,86,102,2,134,55,0,70,118,64,75,47,14,28,93,143,67,7,50,149,1,21,29,56,119,95,60,78  },
+		{ 58,97,114,30,124,45,11,139,8,90,0,142,7,10,41,113,84,62,49,111,85,1,9,5,137,120,32,14,2,117,47,38  },
+		{ 23,66,18,79,38,20,43,27,16,88,46,59,126,37,87,12,73,92,3,5,48,0,19,54,2,51,28,1,41,65,122,22  },
+		{ 0,12,2,27,5,40,46,38,1,41,3,79,88,23,99,4,20,62,22,54,92,18,8,37,16,35,10,7,19,120,144,24  },
+		{ 1,14,25,26,0,7,44,34,129,42,24,5,135,22,19,148,6,96,83,2,29,16,63,35,101,64,140,136,116,110,3,10  },
+		{ 12,1,2,27,3,4,38,5,7,8,18,16,46,6,0,40,41,10,79,23,88,9,20,22,14,19,37,92,48,126,28,21  },
+		{ 7,1,10,32,108,103,94,47,8,53,25,14,34,115,100,129,121,130,148,42,64,116,63,26,44,0,24,30,113,4,104,22  },
+		{ 47,134,7,14,55,69,64,95,1,29,85,118,56,116,45,57,102,143,50,90,42,30,16,94,0,8,67,75,133,2,18,48  },
+		{ 12,1,2,0,7,6,28,8,14,5,3,4,40,21,17,18,60,86,16,93,126,10,9,29,99,38,119,25,19,54,27,84  },
+		{ 59,16,27,18,23,88,79,37,46,66,38,20,73,126,3,43,48,87,92,51,41,12,19,5,52,107,65,0,151,122,54,2  },
+		{ 1,21,147,7,119,14,76,132,55,0,86,145,2,6,69,67,16,143,111,138,17,28,29,60,18,93,8,19,40,56,84,5  },
+		{ 144,86,112,2,68,102,69,0,149,93,75,28,57,55,145,60,21,67,99,134,143,40,146,119,82,110,62,6,29,26,78,14  },
+		{ 102,57,55,69,143,75,146,67,56,68,134,2,29,141,0,21,6,14,133,118,64,1,7,95,47,84,111,28,147,82,72,119  },
+		{ 0,70,57,119,50,145,2,86,28,118,69,78,149,47,60,68,67,55,93,81,134,21,14,62,64,7,5,1,132,85,41,16  },
+		{ 51,5,43,71,122,87,41,37,91,39,0,22,33,36,38,24,66,120,62,2,80,16,92,10,59,4,27,23,35,79,8,3  },
+		{ 12,1,2,0,7,6,28,5,8,14,3,21,40,4,60,17,86,18,16,93,10,9,126,119,99,29,19,41,38,27,25,92  },
+		{ 27,18,46,126,23,16,88,79,20,151,59,73,48,38,0,54,12,2,37,1,19,5,28,60,66,41,3,109,86,65,40,6  },
+		{ 48,79,4,33,16,74,65,38,88,27,91,52,18,36,22,19,46,0,37,3,51,5,71,39,72,43,24,41,92,87,2,10  },
+		{ 86,2,144,93,28,112,141,6,102,21,99,60,75,0,68,82,69,146,67,149,55,40,145,76,111,147,56,119,110,143,26,132  },
+		{ 6,138,2,99,86,17,40,93,28,21,145,141,0,60,119,147,128,76,67,54,1,12,5,27,144,14,38,98,146,41,29,19  },
+		{ 1,8,0,10,2,29,7,5,3,56,4,25,14,152,63,32,65,72,96,42,34,108,48,9,26,16,84,103,67,148,22,129  },
+		{ 149,145,0,86,2,28,93,144,62,60,119,101,21,41,5,35,78,99,26,40,12,68,57,67,110,120,69,18,55,76,132,70  },
+		{ 12,28,16,1,48,19,6,60,2,14,18,21,0,27,46,65,86,29,5,7,72,93,40,3,17,84,56,88,126,4,38,8  },
+		{ 1,8,5,10,7,24,2,62,0,41,22,122,120,9,4,3,32,87,11,37,38,83,100,44,25,104,16,26,39,80,14,6  },
+		{ 0,119,62,86,145,149,28,132,93,2,120,67,60,41,35,5,144,21,123,38,111,81,84,56,12,44,24,50,92,55,40,22  },
+		{ 2,93,99,28,40,144,60,0,86,150,76,21,149,98,6,25,1,61,82,26,12,5,54,141,7,18,145,16,27,138,110,38  },
+		{ 24,8,10,22,32,35,100,5,1,53,0,7,71,80,30,123,83,104,51,11,2,39,44,113,9,62,25,103,34,101,43,41  },
+		{ 12,1,2,0,7,6,28,5,40,60,8,16,3,18,14,4,86,21,17,93,41,10,9,99,27,119,38,19,126,22,48,145  },
+		{ 45,47,50,7,85,90,97,1,64,139,116,118,30,58,14,106,70,111,0,57,94,42,137,142,29,120,8,56,18,134,84,41  },
+		{ 12,0,2,5,27,38,1,46,41,40,79,144,3,22,88,23,28,60,99,62,6,24,26,7,4,16,10,35,37,18,14,20  },
+		{ 37,38,59,92,0,5,23,51,79,41,27,22,2,3,87,16,46,4,1,43,20,33,18,88,24,71,8,10,48,19,126,122  },
+		{ 12,28,16,60,1,18,6,21,19,14,48,0,2,86,93,5,46,29,17,27,65,7,3,72,38,126,119,40,84,37,56,4  },
+		{ 0,2,5,1,16,6,27,28,18,38,60,7,14,21,46,40,86,41,19,48,93,8,3,79,22,4,10,37,62,23,24,111  },
+		{ 85,7,90,30,47,139,45,50,94,58,137,1,8,64,14,116,118,115,113,11,124,108,0,10,97,57,32,70,42,106,29,114  },
+		{ 33,36,22,71,51,5,91,39,0,52,43,24,131,74,16,37,38,122,41,3,87,48,4,104,35,80,10,2,105,62,27,18  },
+		{ 12,1,27,2,0,16,3,28,46,18,4,6,5,72,21,79,38,7,14,60,88,8,65,19,48,29,23,40,22,20,86,126  },
+		{ 0,12,2,27,5,38,46,41,1,40,79,3,88,23,22,99,20,37,62,4,18,6,16,35,60,28,24,7,92,8,14,10  },
+		{ 7,47,1,30,137,8,116,94,90,64,14,115,108,118,57,10,148,113,42,85,32,11,63,50,103,45,124,134,55,9,69,34  },
+		{ 55,7,1,29,56,143,64,47,67,133,14,146,95,72,84,8,116,111,6,134,141,21,65,0,69,30,16,45,85,42,50,10  },
+		{ 14,1,42,8,10,29,108,63,55,148,95,32,7,19,25,115,103,34,56,129,77,0,16,152,94,30,113,26,2,5,48,4  },
+		{ 111,120,142,97,58,0,41,45,62,132,114,84,139,30,5,8,38,2,7,85,119,90,117,1,124,11,56,47,28,27,35,72  },
+		{ 1,0,14,2,6,5,16,19,7,29,42,18,3,25,12,35,21,8,26,17,40,4,20,48,109,99,22,96,55,101,10,61  },
+		{ 12,0,1,5,3,2,4,7,27,8,38,6,40,18,16,10,20,46,9,41,23,22,79,14,62,19,37,126,88,11,92,48  },
+		{ 10,8,104,39,24,32,22,83,44,100,30,130,53,91,113,5,11,1,35,33,7,49,0,2,103,71,36,124,9,80,131,34  },
+		{ 1,7,0,14,8,34,5,25,35,26,6,63,10,123,2,16,103,19,44,32,135,121,108,80,62,30,115,94,149,144,53,18  },
+		{ 75,68,146,141,102,67,2,21,6,57,69,143,0,55,82,86,28,144,147,29,93,112,56,119,133,14,76,60,84,134,111,145  },
+		{ 10,32,115,7,8,53,1,108,30,113,94,137,100,63,90,34,130,103,121,47,44,25,104,39,24,26,85,14,49,36,22,131  },
+		{ 39,24,10,22,8,130,91,104,83,49,5,33,100,11,0,35,32,131,71,36,9,44,53,2,80,51,30,1,41,7,43,62  },
+		{ 38,36,65,105,27,72,31,79,41,131,5,48,125,39,0,16,92,46,22,13,18,84,24,37,88,2,33,74,91,71,130,49  },
+		{ 0,106,62,50,45,119,85,81,132,28,2,86,41,47,38,60,35,117,5,29,7,30,145,90,55,70,14,111,18,67,93,56  },
+		{ 0,2,5,1,3,25,19,26,4,34,29,10,22,16,8,7,24,14,48,65,53,18,6,77,44,56,72,61,121,21,136,40  },
+		{ 7,1,94,8,47,115,10,32,113,103,30,108,137,63,14,64,116,148,129,42,90,25,34,118,53,57,11,49,85,9,96,50  },
+		{ 14,0,1,26,19,5,42,2,25,24,29,22,6,44,61,16,7,96,136,3,140,34,35,55,135,18,48,77,83,4,8,10  },
+		{ 1,7,14,0,25,6,34,5,26,16,63,2,19,8,35,101,108,29,94,10,18,42,123,144,129,47,61,21,3,62,149,4  },
+		{ 12,0,2,1,28,5,6,120,7,60,40,16,18,86,27,14,21,93,8,62,41,38,3,17,4,119,99,48,19,126,10,9  },
+		{ 86,144,93,2,28,149,0,60,99,112,110,145,40,21,102,26,75,62,69,1,12,101,119,25,76,67,7,68,55,5,6,14  },
+		{ 8,30,10,32,113,49,115,137,124,103,45,90,7,139,11,1,58,53,130,94,108,100,9,63,85,125,34,47,0,24,44,104  },
+		{ 120,142,111,41,58,114,97,0,11,62,84,124,5,30,8,38,132,127,27,139,92,10,72,45,49,9,28,2,29,56,16,1  },
+		{ 8,113,30,137,7,32,10,90,94,115,1,103,108,63,47,85,49,53,11,45,34,50,14,25,9,124,100,130,139,121,42,26  },
+		{ 64,7,14,47,134,55,1,42,95,69,116,90,94,30,8,29,56,137,45,108,85,10,57,16,102,143,118,19,63,32,11,50  },
+		{ 62,132,0,119,120,41,111,86,35,28,5,84,56,38,2,93,145,60,67,12,92,27,29,72,55,117,21,24,133,149,22,45  },
+		{ 57,68,69,118,134,64,50,47,55,14,7,2,102,144,0,112,70,86,85,1,95,29,116,143,42,75,16,56,28,45,21,48  },
+		{ 0,12,2,1,5,28,6,40,60,27,7,38,16,14,86,18,93,41,62,46,99,35,8,23,3,17,22,21,10,19,79,20  },
+		{ 12,1,2,27,16,3,38,111,4,0,18,5,7,46,40,8,79,6,14,28,88,10,48,41,19,84,21,9,22,23,20,72  },
+		{ 53,103,32,7,1,100,22,63,71,44,10,115,108,24,92,104,26,30,122,94,8,39,83,34,137,135,90,91,121,5,87,47  },
+		{ 87,37,41,0,22,38,2,92,1,24,4,8,3,59,10,5,39,23,71,79,122,27,16,46,33,7,91,20,18,51,9,120  },
+		{ 1,7,8,10,0,5,35,32,53,44,14,30,2,80,25,34,6,62,26,103,16,19,63,9,149,24,121,41,22,11,113,83  },
+		{ 11,58,8,30,124,49,10,113,9,114,139,45,97,32,7,137,90,1,0,130,115,125,100,24,5,94,53,41,14,13,35,38  },
+		{ 125,105,9,36,131,49,8,130,39,11,10,5,22,38,41,104,0,31,13,24,27,16,2,72,65,91,48,32,84,18,100,74  },
+		{ 12,1,0,2,6,3,7,5,4,8,14,28,16,60,18,10,21,17,19,9,40,27,86,93,29,38,54,11,25,48,46,41  },
+		{ 84,41,38,72,92,29,111,5,65,120,79,0,27,56,48,14,132,16,119,22,86,88,46,28,62,12,1,2,93,18,24,127  },
+		{ 99,28,40,60,2,93,138,0,98,17,86,54,76,12,27,1,21,144,128,38,5,14,46,18,25,16,109,6,41,145,7,29  },
+		{ 1,63,10,32,148,14,103,34,42,7,8,108,116,53,64,96,25,121,26,94,140,0,29,19,55,24,100,136,5,4,44,115  },
+		{ 131,100,130,49,10,8,36,104,39,0,48,41,11,38,4,24,27,22,16,44,79,5,33,2,53,9,125,74,91,120,32,83  },
+		{ 36,39,131,74,4,91,22,33,125,104,130,48,10,24,16,5,49,8,100,105,79,0,9,65,71,2,18,83,31,11,19,44  },
+		{ 0,12,2,1,6,5,7,28,40,60,16,14,18,62,86,27,93,8,17,38,21,41,35,99,3,19,10,23,22,4,9,48  },
+		{ 1,7,67,14,21,147,111,55,132,119,0,8,2,76,64,16,47,84,6,18,86,95,145,10,42,29,133,5,56,134,17,72  },
+		{ 69,55,47,134,102,143,7,57,118,95,14,64,29,56,1,50,75,67,146,2,0,133,68,16,21,6,141,85,116,18,72,65  },
+		{ 1,44,7,24,83,63,34,103,22,121,53,32,25,35,0,115,108,5,14,8,10,101,94,30,2,123,110,26,137,47,90,19  },
+		{ 14,1,25,42,34,0,26,96,19,29,140,5,53,10,2,121,3,24,44,22,55,77,129,7,63,16,8,4,6,61,100,48  },
+		{ 30,90,7,8,137,94,85,1,47,113,115,108,45,139,124,11,10,32,50,58,103,14,63,64,9,116,49,42,25,148,0,53  },
+		{ 40,99,2,60,28,17,0,54,93,98,86,138,6,12,21,76,1,5,27,144,128,38,19,46,14,41,145,7,16,67,3,109  },
+		{ 45,58,30,139,90,7,85,137,97,8,124,47,1,11,106,114,50,94,0,113,10,115,14,32,9,64,108,41,49,29,62,116  },
+		{ 14,42,10,1,63,96,32,25,34,8,129,29,0,103,55,19,26,53,77,5,95,2,4,7,3,16,148,56,18,24,121,108  },
+		{ 21,2,75,86,6,76,144,28,119,99,93,147,141,67,102,145,60,132,146,128,0,82,40,138,55,111,143,17,133,112,69,14  },
+		{ 111,120,41,62,84,132,0,5,38,119,56,92,72,142,27,28,29,35,58,80,2,86,65,79,12,14,1,24,145,16,21,48  },
+		{ 146,67,141,69,133,21,6,143,57,55,111,147,56,1,14,132,7,2,134,102,0,119,29,84,76,64,86,72,28,68,47,75  },
+		{ 12,1,0,5,27,3,7,4,38,8,6,41,16,40,46,10,18,79,2,9,23,86,20,22,62,14,37,88,92,19,24,11  },
+		{ 0,12,2,1,27,5,38,28,60,6,40,7,16,46,18,14,41,99,93,62,3,79,86,23,149,8,22,35,88,17,19,10  },
+		{ 141,6,21,67,147,102,146,2,76,119,132,69,55,111,86,75,28,133,143,0,1,145,14,128,56,99,17,60,29,93,84,68  },
+		{ 21,76,1,119,86,145,2,0,14,7,6,138,146,55,17,28,132,93,67,40,60,143,29,147,111,16,69,141,5,56,19,133  },
+		{ 1,8,108,14,7,116,64,42,10,63,94,32,115,103,113,96,30,34,55,47,95,148,29,140,129,25,134,53,69,26,19,11  },
+		{ 12,1,3,5,4,2,0,7,8,38,27,16,18,6,10,20,41,40,79,46,9,23,22,88,92,37,14,24,62,19,48,99  },
+		{ 1,14,7,0,6,25,5,16,19,2,42,26,29,35,61,8,18,129,101,21,3,110,34,148,96,10,17,4,22,40,12,20  },
+		{ 0,2,5,1,3,19,22,26,16,24,29,7,14,6,4,25,18,44,8,48,12,61,20,21,10,35,65,56,23,40,17,107  },
+		{ 1,7,8,29,56,0,10,14,2,42,72,5,4,65,3,30,84,94,67,9,25,133,111,11,32,108,16,63,21,96,26,48  }
+	};
+#ifdef _MSC_VER
+#pragma endregion
+#endif
+
+	static inline uint32_t iabs(int32_t i) { return (i < 0) ? static_cast<uint32_t>(-i) : static_cast<uint32_t>(i); }
+	//static inline uint64_t iabs(int64_t i) { return (i < 0) ? static_cast<uint64_t>(-i) : static_cast<uint64_t>(i); }
+
+	static inline uint8_t to_5(uint32_t v) { v = v * 31 + 128; return (uint8_t)((v + (v >> 8)) >> 8); }
+	static inline uint8_t to_6(uint32_t v) { v = v * 63 + 128; return (uint8_t)((v + (v >> 8)) >> 8); }
+
+	template<typename T> inline T square(T a) { return a * a; }
+
+	static inline float clampf(float value, float low, float high) { if (value < low) value = low; else if (value > high) value = high;	return value; }
+
+	template <typename S> inline S clamp(S value, S low, S high) { return (value < low) ? low : ((value > high) ? high : value); }
+	static inline int32_t clampi(int32_t value, int32_t low, int32_t high) { if (value < low) value = low; else if (value > high) value = high;	return value; }
+
+	static inline int squarei(int a) { return a * a; }
+	//static inline int absi(int a) { return (a < 0) ? -a : a; }
+
+	template<typename F> inline F lerp(F a, F b, F s) { return a + (b - a) * s; }
+
+	static const uint32_t TOTAL_ORDER_4_0_16 = 15;
+	static const uint32_t TOTAL_ORDER_4_1_16 = 700;
+	static const uint32_t TOTAL_ORDER_4_2_16 = 753;
+	static const uint32_t TOTAL_ORDER_4_3_16 = 515;
+	static uint16_t g_total_ordering4_hash[4096];
+	static float g_selector_factors4[NUM_UNIQUE_TOTAL_ORDERINGS4][3];
+
+	static const uint32_t TOTAL_ORDER_3_0_16 = 12;
+	static const uint32_t TOTAL_ORDER_3_1_16 = 15;
+	static const uint32_t TOTAL_ORDER_3_2_16 = 89;
+	static uint16_t g_total_ordering3_hash[256];
+	static float g_selector_factors3[NUM_UNIQUE_TOTAL_ORDERINGS3][3];
+
+	struct hist4
+	{
+		uint8_t m_hist[4];
+
+		hist4()
+		{
+			memset(m_hist, 0, sizeof(m_hist));
+		}
+
+		hist4(uint32_t i, uint32_t j, uint32_t k, uint32_t l)
+		{
+			m_hist[0] = (uint8_t)i;
+			m_hist[1] = (uint8_t)j;
+			m_hist[2] = (uint8_t)k;
+			m_hist[3] = (uint8_t)l;
+		}
+
+		inline bool operator== (const hist4& h) const
+		{
+			if (m_hist[0] != h.m_hist[0]) return false;
+			if (m_hist[1] != h.m_hist[1]) return false;
+			if (m_hist[2] != h.m_hist[2]) return false;
+			if (m_hist[3] != h.m_hist[3]) return false;
+			return true;
+		}
+
+		inline bool any_16() const
+		{
+			return (m_hist[0] == 16) || (m_hist[1] == 16) || (m_hist[2] == 16) || (m_hist[3] == 16);
+		}
+
+		inline uint32_t lookup_total_ordering_index() const
+		{
+			if (m_hist[0] == 16)
+				return TOTAL_ORDER_4_0_16;
+			else if (m_hist[1] == 16)
+				return TOTAL_ORDER_4_1_16;
+			else if (m_hist[2] == 16)
+				return TOTAL_ORDER_4_2_16;
+			else if (m_hist[3] == 16)
+				return TOTAL_ORDER_4_3_16;
+
+			// Must sum to 16, so m_hist[3] isn't needed.
+			return g_total_ordering4_hash[m_hist[0] | (m_hist[1] << 4) | (m_hist[2] << 8)];
+		}
+	};
+
+	struct hist3
+	{
+		uint8_t m_hist[3];
+
+		hist3()
+		{
+			memset(m_hist, 0, sizeof(m_hist));
+		}
+
+		hist3(uint32_t i, uint32_t j, uint32_t k)
+		{
+			m_hist[0] = (uint8_t)i;
+			m_hist[1] = (uint8_t)j;
+			m_hist[2] = (uint8_t)k;
+		}
+
+		inline bool operator== (const hist3& h) const
+		{
+			if (m_hist[0] != h.m_hist[0]) return false;
+			if (m_hist[1] != h.m_hist[1]) return false;
+			if (m_hist[2] != h.m_hist[2]) return false;
+			return true;
+		}
+
+		inline bool any_16() const
+		{
+			return (m_hist[0] == 16) || (m_hist[1] == 16) || (m_hist[2] == 16);
+		}
+
+		inline uint32_t lookup_total_ordering_index() const
+		{
+			if (m_hist[0] == 16)
+				return TOTAL_ORDER_3_0_16;
+			else if (m_hist[1] == 16)
+				return TOTAL_ORDER_3_1_16;
+			else if (m_hist[2] == 16)
+				return TOTAL_ORDER_3_2_16;
+
+			// Must sum to 16, so m_hist[2] isn't needed.
+			return g_total_ordering3_hash[m_hist[0] | (m_hist[1] << 4)];
+		}
+	};
+
+	struct bc1_match_entry
+	{
+		uint8_t m_hi;
+		uint8_t m_lo;
+		uint8_t m_e;
+	};
+
+	static bc1_approx_mode g_bc1_approx_mode;
+	static bc1_match_entry g_bc1_match5_equals_1[256], g_bc1_match6_equals_1[256];
+	static bc1_match_entry g_bc1_match5_half[256], g_bc1_match6_half[256];
+
+	static inline int scale_5_to_8(int v) { return (v << 3) | (v >> 2); }
+	static inline int scale_6_to_8(int v) { return (v << 2) | (v >> 4); }
+
+	// v0, v1 = unexpanded DXT1 endpoint values (5/6-bits)
+	// c0, c1 = expanded DXT1 endpoint values (8-bits)
+	static inline int interp_5_6_ideal(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 * 2 + c1) / 3; }
+	static inline int interp_5_6_ideal_round(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 * 2 + c1 + 1) / 3; }
+	static inline int interp_half_5_6_ideal(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 + c1) / 2; }
+
+	static inline int interp_5_nv(int v0, int v1) { assert(v0 < 32 && v1 < 32); return ((2 * v0 + v1) * 22) / 8; }
+	static inline int interp_6_nv(int c0, int c1) { assert(c0 < 256 && c1 < 256); const int gdiff = c1 - c0; return (256 * c0 + (gdiff / 4) + 128 + gdiff * 80) / 256; }
+
+	static inline int interp_half_5_nv(int v0, int v1) { assert(v0 < 32 && v1 < 32); return ((v0 + v1) * 33) / 8; }
+	static inline int interp_half_6_nv(int c0, int c1) { assert(c0 < 256 && c1 < 256); const int gdiff = c1 - c0; return (256 * c0 + gdiff / 4 + 128 + gdiff * 128) / 256; }
+
+	static inline int interp_5_6_amd(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 * 43 + c1 * 21 + 32) >> 6; }
+	static inline int interp_half_5_6_amd(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 + c1 + 1) >> 1; }
+
+	static inline int interp_5(int v0, int v1, int c0, int c1, bc1_approx_mode mode)
+	{
+		assert(scale_5_to_8(v0) == c0 && scale_5_to_8(v1) == c1);
+		switch (mode)
+		{
+		case bc1_approx_mode::cBC1NVidia: return interp_5_nv(v0, v1);
+		case bc1_approx_mode::cBC1AMD: return interp_5_6_amd(c0, c1);
+		default:
+		case bc1_approx_mode::cBC1Ideal: return interp_5_6_ideal(c0, c1);
+		case bc1_approx_mode::cBC1IdealRound4: return interp_5_6_ideal_round(c0, c1);
+		}
+	}
+
+	static inline int interp_6(int v0, int v1, int c0, int c1, bc1_approx_mode mode)
+	{
+		(void)v0; (void)v1;
+		assert(scale_6_to_8(v0) == c0 && scale_6_to_8(v1) == c1);
+		switch (mode)
+		{
+		case bc1_approx_mode::cBC1NVidia: return interp_6_nv(c0, c1);
+		case bc1_approx_mode::cBC1AMD: return interp_5_6_amd(c0, c1);
+		default:
+		case bc1_approx_mode::cBC1Ideal: return interp_5_6_ideal(c0, c1);
+		case bc1_approx_mode::cBC1IdealRound4: return interp_5_6_ideal_round(c0, c1);
+		}
+	}
+
+	static inline int interp_half_5(int v0, int v1, int c0, int c1, bc1_approx_mode mode)
+	{
+		assert(scale_5_to_8(v0) == c0 && scale_5_to_8(v1) == c1);
+		switch (mode)
+		{
+		case bc1_approx_mode::cBC1NVidia: return interp_half_5_nv(v0, v1);
+		case bc1_approx_mode::cBC1AMD: return interp_half_5_6_amd(c0, c1);
+		case bc1_approx_mode::cBC1Ideal:
+		case bc1_approx_mode::cBC1IdealRound4:
+		default:
+			return interp_half_5_6_ideal(c0, c1);
+		}
+	}
+
+	static inline int interp_half_6(int v0, int v1, int c0, int c1, bc1_approx_mode mode)
+	{
+		(void)v0; (void)v1;
+		assert(scale_6_to_8(v0) == c0 && scale_6_to_8(v1) == c1);
+		switch (mode)
+		{
+		case bc1_approx_mode::cBC1NVidia: return interp_half_6_nv(c0, c1);
+		case bc1_approx_mode::cBC1AMD: return interp_half_5_6_amd(c0, c1);
+		case bc1_approx_mode::cBC1Ideal:
+		case bc1_approx_mode::cBC1IdealRound4:
+		default:
+			return interp_half_5_6_ideal(c0, c1);
+		}
+	}
+
+	static void prepare_bc1_single_color_table_half(bc1_match_entry* pTable, const uint8_t* pExpand, int size, bc1_approx_mode mode)
+	{
+		for (int i = 0; i < 256; i++)
+		{
+			int lowest_e = 256;
+			for (int lo = 0; lo < size; lo++)
+			{
+				const int lo_e = pExpand[lo];
+
+				for (int hi = 0; hi < size; hi++)
+				{
+					const int hi_e = pExpand[hi];
+
+					const int v = (size == 32) ? interp_half_5(hi, lo, hi_e, lo_e, mode) : interp_half_6(hi, lo, hi_e, lo_e, mode);
+
+					int e = iabs(v - i);
+
+					// We only need to factor in 3% error in BC1 ideal mode.
+					if ((mode == bc1_approx_mode::cBC1Ideal) || (mode == bc1_approx_mode::cBC1IdealRound4))
+						e += (iabs(hi_e - lo_e) * 3) / 100;
+
+					// Favor equal endpoints, for lower error on actual GPU's which approximate the interpolation.
+					if ((e < lowest_e) || ((e == lowest_e) && (lo == hi)))
+					{
+						pTable[i].m_hi = static_cast<uint8_t>(hi);
+						pTable[i].m_lo = static_cast<uint8_t>(lo);
+
+						assert(e <= UINT8_MAX);
+						pTable[i].m_e = static_cast<uint8_t>(e);
+
+						lowest_e = e;
+					}
+
+				} // hi
+			} // lo
+		}
+	}
+
+	static void prepare_bc1_single_color_table(bc1_match_entry* pTable, const uint8_t* pExpand, int size, bc1_approx_mode mode)
+	{
+		for (int i = 0; i < 256; i++)
+		{
+			int lowest_e = 256;
+			for (int lo = 0; lo < size; lo++)
+			{
+				const int lo_e = pExpand[lo];
+
+				for (int hi = 0; hi < size; hi++)
+				{
+					const int hi_e = pExpand[hi];
+
+					const int v = (size == 32) ? interp_5(hi, lo, hi_e, lo_e, mode) : interp_6(hi, lo, hi_e, lo_e, mode);
+
+					int e = iabs(v - i);
+
+					if ((mode == bc1_approx_mode::cBC1Ideal) || (mode == bc1_approx_mode::cBC1IdealRound4))
+						e += (iabs(hi_e - lo_e) * 3) / 100;
+
+					// Favor equal endpoints, for lower error on actual GPU's which approximate the interpolation.
+					if ((e < lowest_e) || ((e == lowest_e) && (lo == hi)))
+					{
+						pTable[i].m_hi = static_cast<uint8_t>(hi);
+						pTable[i].m_lo = static_cast<uint8_t>(lo);
+
+						assert(e <= UINT8_MAX);
+						pTable[i].m_e = static_cast<uint8_t>(e);
+
+						lowest_e = e;
+					}
+
+				} // hi
+			} // lo
+		}
+	}
+
+	// This table is: 9 * (w * w), 9 * ((1.0f - w) * w), 9 * ((1.0f - w) * (1.0f - w))
+	// where w is [0,1/3,2/3,1]. 9 is the perfect multiplier.
+	static const uint32_t g_weight_vals4[4] = { 0x000009, 0x010204, 0x040201, 0x090000 };
+
+	// multiplier is 4 for 3-color
+	static const uint32_t g_weight_vals3[3] = { 0x000004, 0x040000, 0x010101 };
+
+	static inline void compute_selector_factors4(const hist4& h, float& iz00, float& iz10, float& iz11)
+	{
+		uint32_t weight_accum = 0;
+		for (uint32_t sel = 0; sel < 4; sel++)
+			weight_accum += g_weight_vals4[sel] * h.m_hist[sel];
+
+		float z00 = (float)((weight_accum >> 16) & 0xFF);
+		float z10 = (float)((weight_accum >> 8) & 0xFF);
+		float z11 = (float)(weight_accum & 0xFF);
+		float z01 = z10;
+
+		float det = z00 * z11 - z01 * z10;
+		if (fabs(det) < 1e-8f)
+			det = 0.0f;
+		else
+			det = (3.0f / 255.0f) / det;
+
+		iz00 = z11 * det;
+		iz10 = -z10 * det;
+		iz11 = z00 * det;
+	}
+
+	static inline void compute_selector_factors3(const hist3& h, float& iz00, float& iz10, float& iz11)
+	{
+		uint32_t weight_accum = 0;
+		for (uint32_t sel = 0; sel < 3; sel++)
+			weight_accum += g_weight_vals3[sel] * h.m_hist[sel];
+
+		float z00 = (float)((weight_accum >> 16) & 0xFF);
+		float z10 = (float)((weight_accum >> 8) & 0xFF);
+		float z11 = (float)(weight_accum & 0xFF);
+		float z01 = z10;
+
+		float det = z00 * z11 - z01 * z10;
+		if (fabs(det) < 1e-8f)
+			det = 0.0f;
+		else
+			det = (2.0f / 255.0f) / det;
+
+		iz00 = z11 * det;
+		iz10 = -z10 * det;
+		iz11 = z00 * det;
+	}
+
+	static bool g_initialized;
+
+	void init(bc1_approx_mode mode)
+	{
+		g_bc1_approx_mode = mode;
+
+		uint8_t bc1_expand5[32];
+		for (int i = 0; i < 32; i++)
+			bc1_expand5[i] = static_cast<uint8_t>((i << 3) | (i >> 2));
+		prepare_bc1_single_color_table(g_bc1_match5_equals_1, bc1_expand5, 32, mode);
+		prepare_bc1_single_color_table_half(g_bc1_match5_half, bc1_expand5, 32, mode);
+
+		uint8_t bc1_expand6[64];
+		for (int i = 0; i < 64; i++)
+			bc1_expand6[i] = static_cast<uint8_t>((i << 2) | (i >> 4));
+		prepare_bc1_single_color_table(g_bc1_match6_equals_1, bc1_expand6, 64, mode);
+		prepare_bc1_single_color_table_half(g_bc1_match6_half, bc1_expand6, 64, mode);
+
+		for (uint32_t i = 0; i < NUM_UNIQUE_TOTAL_ORDERINGS4; i++)
+		{
+			hist4 h;
+			h.m_hist[0] = (uint8_t)g_unique_total_orders4[i][0];
+			h.m_hist[1] = (uint8_t)g_unique_total_orders4[i][1];
+			h.m_hist[2] = (uint8_t)g_unique_total_orders4[i][2];
+			h.m_hist[3] = (uint8_t)g_unique_total_orders4[i][3];
+
+			if (!h.any_16())
+			{
+				const uint32_t index = h.m_hist[0] | (h.m_hist[1] << 4) | (h.m_hist[2] << 8);
+				assert(index < 4096);
+				g_total_ordering4_hash[index] = (uint16_t)i;
+			}
+
+			compute_selector_factors4(h, g_selector_factors4[i][0], g_selector_factors4[i][1], g_selector_factors4[i][2]);
+		}
+
+		for (uint32_t i = 0; i < NUM_UNIQUE_TOTAL_ORDERINGS3; i++)
+		{
+			hist3 h;
+			h.m_hist[0] = (uint8_t)g_unique_total_orders3[i][0];
+			h.m_hist[1] = (uint8_t)g_unique_total_orders3[i][1];
+			h.m_hist[2] = (uint8_t)g_unique_total_orders3[i][2];
+
+			if (!h.any_16())
+			{
+				const uint32_t index = h.m_hist[0] | (h.m_hist[1] << 4);
+				assert(index < 256);
+				g_total_ordering3_hash[index] = (uint16_t)i;
+			}
+
+			compute_selector_factors3(h, g_selector_factors3[i][0], g_selector_factors3[i][1], g_selector_factors3[i][2]);
+		}
+
+		g_initialized = true;
+	}
+
+	void encode_bc1_solid_block(void* pDst, uint32_t fr, uint32_t fg, uint32_t fb, bool allow_3color)
+	{
+		bc1_block* pDst_block = static_cast<bc1_block*>(pDst);
+
+		uint32_t mask = 0xAA;
+		int max16 = -1, min16 = 0;
+
+		if (allow_3color)
+		{
+			const uint32_t err4 = g_bc1_match5_equals_1[fr].m_e + g_bc1_match6_equals_1[fg].m_e + g_bc1_match5_equals_1[fb].m_e;
+			const uint32_t err3 = g_bc1_match5_half[fr].m_e + g_bc1_match6_half[fg].m_e + g_bc1_match5_half[fb].m_e;
+
+			if (err3 < err4)
+			{
+				max16 = (g_bc1_match5_half[fr].m_hi << 11) | (g_bc1_match6_half[fg].m_hi << 5) | g_bc1_match5_half[fb].m_hi;
+				min16 = (g_bc1_match5_half[fr].m_lo << 11) | (g_bc1_match6_half[fg].m_lo << 5) | g_bc1_match5_half[fb].m_lo;
+
+				if (max16 > min16)
+					std::swap(max16, min16);
+			}
+		}
+
+		if (max16 == -1)
+		{
+			max16 = (g_bc1_match5_equals_1[fr].m_hi << 11) | (g_bc1_match6_equals_1[fg].m_hi << 5) | g_bc1_match5_equals_1[fb].m_hi;
+			min16 = (g_bc1_match5_equals_1[fr].m_lo << 11) | (g_bc1_match6_equals_1[fg].m_lo << 5) | g_bc1_match5_equals_1[fb].m_lo;
+
+			if (min16 == max16)
+			{
+				// Always forbid 3 color blocks
+				// This is to guarantee that BC3 blocks never use punchthrough alpha (3 color) mode, which isn't supported on some (all?) GPU's.
+				mask = 0;
+
+				// Make l > h
+				if (min16 > 0)
+					min16--;
+				else
+				{
+					// l = h = 0
+					assert(min16 == max16 && max16 == 0);
+
+					max16 = 1;
+					min16 = 0;
+					mask = 0x55;
+				}
+
+				assert(max16 > min16);
+			}
+
+			if (max16 < min16)
+			{
+				std::swap(max16, min16);
+				mask ^= 0x55;
+			}
+		}
+
+		pDst_block->set_low_color(static_cast<uint16_t>(max16));
+		pDst_block->set_high_color(static_cast<uint16_t>(min16));
+		pDst_block->m_selectors[0] = static_cast<uint8_t>(mask);
+		pDst_block->m_selectors[1] = static_cast<uint8_t>(mask);
+		pDst_block->m_selectors[2] = static_cast<uint8_t>(mask);
+		pDst_block->m_selectors[3] = static_cast<uint8_t>(mask);
+	}
+
+	static const float g_midpoint5[32] = { .015686f, .047059f, .078431f, .111765f, .145098f, .176471f, .207843f, .241176f, .274510f, .305882f, .337255f, .370588f, .403922f, .435294f, .466667f, .5f, .533333f, .564706f, .596078f, .629412f, .662745f, .694118f, .725490f, .758824f, .792157f, .823529f, .854902f, .888235f, .921569f, .952941f, .984314f, 1e+37f };
+	static const float g_midpoint6[64] = { .007843f, .023529f, .039216f, .054902f, .070588f, .086275f, .101961f, .117647f, .133333f, .149020f, .164706f, .180392f, .196078f, .211765f, .227451f, .245098f, .262745f, .278431f, .294118f, .309804f, .325490f, .341176f, .356863f, .372549f, .388235f, .403922f, .419608f, .435294f, .450980f, .466667f, .482353f, .500000f, .517647f, .533333f, .549020f, .564706f, .580392f, .596078f, .611765f, .627451f, .643137f, .658824f, .674510f, .690196f, .705882f, .721569f, .737255f, .754902f, .772549f, .788235f, .803922f, .819608f, .835294f, .850980f, .866667f, .882353f, .898039f, .913725f, .929412f, .945098f, .960784f, .976471f, .992157f, 1e+37f };
+
+	struct vec3F { float c[3]; };
+
+	static inline void compute_least_squares_endpoints4_rgb(
+		vec3F* pXl, vec3F* pXh,
+		int total_r, int total_g, int total_b,
+		float iz00, float iz10, float iz11,
+		uint32_t s, const uint32_t r_sum[17], const uint32_t g_sum[17], const uint32_t b_sum[17])
+	{
+		const float iz01 = iz10;
+
+		const uint32_t f1 = g_unique_total_orders4[s][0];
+		const uint32_t f2 = g_unique_total_orders4[s][0] + g_unique_total_orders4[s][1];
+		const uint32_t f3 = g_unique_total_orders4[s][0] + g_unique_total_orders4[s][1] + g_unique_total_orders4[s][2];
+		uint32_t uq00_r = (r_sum[f2] - r_sum[f1]) + (r_sum[f3] - r_sum[f2]) * 2 + (r_sum[16] - r_sum[f3]) * 3;
+		uint32_t uq00_g = (g_sum[f2] - g_sum[f1]) + (g_sum[f3] - g_sum[f2]) * 2 + (g_sum[16] - g_sum[f3]) * 3;
+		uint32_t uq00_b = (b_sum[f2] - b_sum[f1]) + (b_sum[f3] - b_sum[f2]) * 2 + (b_sum[16] - b_sum[f3]) * 3;
+
+		float q10_r = (float)(total_r * 3 - uq00_r);
+		float q10_g = (float)(total_g * 3 - uq00_g);
+		float q10_b = (float)(total_b * 3 - uq00_b);
+
+		pXl->c[0] = iz00 * (float)uq00_r + iz01 * q10_r;
+		pXh->c[0] = iz10 * (float)uq00_r + iz11 * q10_r;
+
+		pXl->c[1] = iz00 * (float)uq00_g + iz01 * q10_g;
+		pXh->c[1] = iz10 * (float)uq00_g + iz11 * q10_g;
+
+		pXl->c[2] = iz00 * (float)uq00_b + iz01 * q10_b;
+		pXh->c[2] = iz10 * (float)uq00_b + iz11 * q10_b;
+	}
+
+	static inline bool compute_least_squares_endpoints4_rgb(const color32* pColors, const uint8_t* pSelectors, vec3F* pXl, vec3F* pXh, int total_r, int total_g, int total_b)
+	{
+		uint32_t uq00_r = 0, uq00_g = 0, uq00_b = 0;
+		uint32_t weight_accum = 0;
+		for (uint32_t i = 0; i < 16; i++)
+		{
+			const uint8_t r = pColors[i].c[0], g = pColors[i].c[1], b = pColors[i].c[2];
+			const uint8_t sel = pSelectors[i];
+
+			weight_accum += g_weight_vals4[sel];
+			uq00_r += sel * r;
+			uq00_g += sel * g;
+			uq00_b += sel * b;
+		}
+
+		int q10_r = total_r * 3 - uq00_r;
+		int q10_g = total_g * 3 - uq00_g;
+		int q10_b = total_b * 3 - uq00_b;
+
+		float z00 = (float)((weight_accum >> 16) & 0xFF);
+		float z10 = (float)((weight_accum >> 8) & 0xFF);
+		float z11 = (float)(weight_accum & 0xFF);
+		float z01 = z10;
+
+		float det = z00 * z11 - z01 * z10;
+		if (fabs(det) < 1e-8f)
+			return false;
+
+		det = (3.0f / 255.0f) / det;
+
+		float iz00, iz01, iz10, iz11;
+		iz00 = z11 * det;
+		iz01 = -z01 * det;
+		iz10 = -z10 * det;
+		iz11 = z00 * det;
+
+		pXl->c[0] = iz00 * (float)uq00_r + iz01 * q10_r;
+		pXh->c[0] = iz10 * (float)uq00_r + iz11 * q10_r;
+
+		pXl->c[1] = iz00 * (float)uq00_g + iz01 * q10_g;
+		pXh->c[1] = iz10 * (float)uq00_g + iz11 * q10_g;
+
+		pXl->c[2] = iz00 * (float)uq00_b + iz01 * q10_b;
+		pXh->c[2] = iz10 * (float)uq00_b + iz11 * q10_b;
+
+		return true;
+	}
+
+	static inline void compute_least_squares_endpoints3_rgb(
+		vec3F* pXl, vec3F* pXh,
+		int total_r, int total_g, int total_b,
+		float iz00, float iz10, float iz11,
+		uint32_t s, const uint32_t r_sum[17], const uint32_t g_sum[17], const uint32_t b_sum[17])
+	{
+		const float iz01 = iz10;
+
+		// Compensates for BC1 3-color ordering, which is selector 0, 2, 1
+		const uint32_t f1 = g_unique_total_orders3[s][0];
+		const uint32_t f2 = g_unique_total_orders3[s][0] + g_unique_total_orders3[s][2];
+		uint32_t uq00_r = (r_sum[16] - r_sum[f2]) * 2 + (r_sum[f2] - r_sum[f1]);
+		uint32_t uq00_g = (g_sum[16] - g_sum[f2]) * 2 + (g_sum[f2] - g_sum[f1]);
+		uint32_t uq00_b = (b_sum[16] - b_sum[f2]) * 2 + (b_sum[f2] - b_sum[f1]);
+
+		float q10_r = (float)(total_r * 2 - uq00_r);
+		float q10_g = (float)(total_g * 2 - uq00_g);
+		float q10_b = (float)(total_b * 2 - uq00_b);
+
+		pXl->c[0] = iz00 * (float)uq00_r + iz01 * q10_r;
+		pXh->c[0] = iz10 * (float)uq00_r + iz11 * q10_r;
+
+		pXl->c[1] = iz00 * (float)uq00_g + iz01 * q10_g;
+		pXh->c[1] = iz10 * (float)uq00_g + iz11 * q10_g;
+
+		pXl->c[2] = iz00 * (float)uq00_b + iz01 * q10_b;
+		pXh->c[2] = iz10 * (float)uq00_b + iz11 * q10_b;
+	}
+
+	static inline bool compute_least_squares_endpoints3_rgb(bool use_black, const color32* pColors, const uint8_t* pSelectors, vec3F* pXl, vec3F* pXh)
+	{
+		int uq00_r = 0, uq00_g = 0, uq00_b = 0;
+		uint32_t weight_accum = 0;
+		int total_r = 0, total_g = 0, total_b = 0;
+		for (uint32_t i = 0; i < 16; i++)
+		{
+			const uint8_t r = pColors[i].c[0], g = pColors[i].c[1], b = pColors[i].c[2];
+			if (use_black)
+			{
+				if ((r | g | b) < 4)
+					continue;
+			}
+
+			const uint8_t sel = pSelectors[i];
+			assert(sel <= 3);
+			if (sel == 3)
+				continue;
+
+			weight_accum += g_weight_vals3[sel];
+
+			static const uint8_t s_tran[3] = { 0, 2, 1 };
+			const uint8_t tsel = s_tran[sel];
+			uq00_r += tsel * r;
+			uq00_g += tsel * g;
+			uq00_b += tsel * b;
+
+			total_r += r;
+			total_g += g;
+			total_b += b;
+		}
+
+		int q10_r = total_r * 2 - uq00_r;
+		int q10_g = total_g * 2 - uq00_g;
+		int q10_b = total_b * 2 - uq00_b;
+
+		float z00 = (float)((weight_accum >> 16) & 0xFF);
+		float z10 = (float)((weight_accum >> 8) & 0xFF);
+		float z11 = (float)(weight_accum & 0xFF);
+		float z01 = z10;
+
+		float det = z00 * z11 - z01 * z10;
+		if (fabs(det) < 1e-8f)
+			return false;
+
+		det = (2.0f / 255.0f) / det;
+
+		float iz00, iz01, iz10, iz11;
+		iz00 = z11 * det;
+		iz01 = -z01 * det;
+		iz10 = -z10 * det;
+		iz11 = z00 * det;
+
+		pXl->c[0] = iz00 * (float)uq00_r + iz01 * q10_r;
+		pXh->c[0] = iz10 * (float)uq00_r + iz11 * q10_r;
+
+		pXl->c[1] = iz00 * (float)uq00_g + iz01 * q10_g;
+		pXh->c[1] = iz10 * (float)uq00_g + iz11 * q10_g;
+
+		pXl->c[2] = iz00 * (float)uq00_b + iz01 * q10_b;
+		pXh->c[2] = iz10 * (float)uq00_b + iz11 * q10_b;
+
+		return true;
+	}
+
+	static inline void bc1_get_block_colors4(uint32_t block_r[4], uint32_t block_g[4], uint32_t block_b[4], uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb)
+	{
+		block_r[0] = (lr << 3) | (lr >> 2); block_g[0] = (lg << 2) | (lg >> 4);	block_b[0] = (lb << 3) | (lb >> 2);
+		block_r[3] = (hr << 3) | (hr >> 2);	block_g[3] = (hg << 2) | (hg >> 4);	block_b[3] = (hb << 3) | (hb >> 2);
+
+		if (g_bc1_approx_mode == bc1_approx_mode::cBC1Ideal)
+		{
+			block_r[1] = (block_r[0] * 2 + block_r[3]) / 3;	block_g[1] = (block_g[0] * 2 + block_g[3]) / 3;	block_b[1] = (block_b[0] * 2 + block_b[3]) / 3;
+			block_r[2] = (block_r[3] * 2 + block_r[0]) / 3;	block_g[2] = (block_g[3] * 2 + block_g[0]) / 3;	block_b[2] = (block_b[3] * 2 + block_b[0]) / 3;
+		}
+		else if (g_bc1_approx_mode == bc1_approx_mode::cBC1IdealRound4)
+		{
+			block_r[1] = (block_r[0] * 2 + block_r[3] + 1) / 3;	block_g[1] = (block_g[0] * 2 + block_g[3] + 1) / 3;	block_b[1] = (block_b[0] * 2 + block_b[3] + 1) / 3;
+			block_r[2] = (block_r[3] * 2 + block_r[0] + 1) / 3;	block_g[2] = (block_g[3] * 2 + block_g[0] + 1) / 3;	block_b[2] = (block_b[3] * 2 + block_b[0] + 1) / 3;
+		}
+		else if (g_bc1_approx_mode == bc1_approx_mode::cBC1AMD)
+		{
+			block_r[1] = interp_5_6_amd(block_r[0], block_r[3]); block_g[1] = interp_5_6_amd(block_g[0], block_g[3]); block_b[1] = interp_5_6_amd(block_b[0], block_b[3]);
+			block_r[2] = interp_5_6_amd(block_r[3], block_r[0]); block_g[2] = interp_5_6_amd(block_g[3], block_g[0]); block_b[2] = interp_5_6_amd(block_b[3], block_b[0]);
+		}
+		else
+		{
+			block_r[1] = interp_5_nv(lr, hr); block_g[1] = interp_6_nv(block_g[0], block_g[3]); block_b[1] = interp_5_nv(lb, hb);
+			block_r[2] = interp_5_nv(hr, lr); block_g[2] = interp_6_nv(block_g[3], block_g[0]); block_b[2] = interp_5_nv(hb, lb);
+		}
+	}
+
+	static inline void bc1_get_block_colors3(uint32_t block_r[3], uint32_t block_g[3], uint32_t block_b[3], uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb)
+	{
+		block_r[0] = (lr << 3) | (lr >> 2); block_g[0] = (lg << 2) | (lg >> 4);	block_b[0] = (lb << 3) | (lb >> 2);
+		block_r[1] = (hr << 3) | (hr >> 2);	block_g[1] = (hg << 2) | (hg >> 4);	block_b[1] = (hb << 3) | (hb >> 2);
+
+		if ((g_bc1_approx_mode == bc1_approx_mode::cBC1Ideal) || (g_bc1_approx_mode == bc1_approx_mode::cBC1IdealRound4))
+		{
+			block_r[2] = (block_r[0] + block_r[1]) / 2; block_g[2] = (block_g[0] + block_g[1]) / 2; block_b[2] = (block_b[0] + block_b[1]) / 2;
+		}
+		else if (g_bc1_approx_mode == bc1_approx_mode::cBC1AMD)
+		{
+			block_r[2] = interp_half_5_6_amd(block_r[0], block_r[1]); block_g[2] = interp_half_5_6_amd(block_g[0], block_g[1]); block_b[2] = interp_half_5_6_amd(block_b[0], block_b[1]);
+		}
+		else
+		{
+			block_r[2] = interp_half_5_nv(lr, hr); block_g[2] = interp_half_6_nv(block_g[0], block_g[1]); block_b[2] = interp_half_5_nv(lb, hb);
+		}
+	}
+
+	static inline void bc1_find_sels4_noerr(const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16], const uint8_t* pForce_selectors)
+	{
+		if (pForce_selectors)
+		{
+			memcpy(sels, pForce_selectors, 16);
+			return;
+		}
+
+		uint32_t block_r[4], block_g[4], block_b[4];
+		bc1_get_block_colors4(block_r, block_g, block_b, lr, lg, lb, hr, hg, hb);
+
+		int ar = block_r[3] - block_r[0], ag = block_g[3] - block_g[0], ab = block_b[3] - block_b[0];
+
+		int dots[4];
+		for (uint32_t i = 0; i < 4; i++)
+			dots[i] = (int)block_r[i] * ar + (int)block_g[i] * ag + (int)block_b[i] * ab;
+
+		int t0 = dots[0] + dots[1], t1 = dots[1] + dots[2], t2 = dots[2] + dots[3];
+
+		ar *= 2; ag *= 2; ab *= 2;
+
+		static const uint8_t s_sels[4] = { 3, 2, 1, 0 };
+
+		for (uint32_t i = 0; i < 16; i += 4)
+		{
+			const int d0 = pSrc_pixels[i + 0].r * ar + pSrc_pixels[i + 0].g * ag + pSrc_pixels[i + 0].b * ab;
+			const int d1 = pSrc_pixels[i + 1].r * ar + pSrc_pixels[i + 1].g * ag + pSrc_pixels[i + 1].b * ab;
+			const int d2 = pSrc_pixels[i + 2].r * ar + pSrc_pixels[i + 2].g * ag + pSrc_pixels[i + 2].b * ab;
+			const int d3 = pSrc_pixels[i + 3].r * ar + pSrc_pixels[i + 3].g * ag + pSrc_pixels[i + 3].b * ab;
+
+			sels[i + 0] = s_sels[(d0 <= t0) + (d0 < t1) + (d0 < t2)];
+			sels[i + 1] = s_sels[(d1 <= t0) + (d1 < t1) + (d1 < t2)];
+			sels[i + 2] = s_sels[(d2 <= t0) + (d2 < t1) + (d2 < t2)];
+			sels[i + 3] = s_sels[(d3 <= t0) + (d3 < t1) + (d3 < t2)];
+		}
+	}
+
+	static inline uint32_t bc1_find_sels4_fasterr(const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16], uint32_t cur_err)
+	{
+		uint32_t block_r[4], block_g[4], block_b[4];
+		bc1_get_block_colors4(block_r, block_g, block_b, lr, lg, lb, hr, hg, hb);
+
+		int ar = block_r[3] - block_r[0], ag = block_g[3] - block_g[0], ab = block_b[3] - block_b[0];
+
+		int dots[4];
+		for (uint32_t i = 0; i < 4; i++)
+			dots[i] = (int)block_r[i] * ar + (int)block_g[i] * ag + (int)block_b[i] * ab;
+
+		int t0 = dots[0] + dots[1], t1 = dots[1] + dots[2], t2 = dots[2] + dots[3];
+
+		ar *= 2; ag *= 2; ab *= 2;
+
+		static const uint8_t s_sels[4] = { 3, 2, 1, 0 };
+
+		uint32_t total_err = 0;
+
+		for (uint32_t i = 0; i < 16; i += 4)
+		{
+			const int d0 = pSrc_pixels[i + 0].r * ar + pSrc_pixels[i + 0].g * ag + pSrc_pixels[i + 0].b * ab;
+			const int d1 = pSrc_pixels[i + 1].r * ar + pSrc_pixels[i + 1].g * ag + pSrc_pixels[i + 1].b * ab;
+			const int d2 = pSrc_pixels[i + 2].r * ar + pSrc_pixels[i + 2].g * ag + pSrc_pixels[i + 2].b * ab;
+			const int d3 = pSrc_pixels[i + 3].r * ar + pSrc_pixels[i + 3].g * ag + pSrc_pixels[i + 3].b * ab;
+
+			uint8_t sel0 = s_sels[(d0 <= t0) + (d0 < t1) + (d0 < t2)];
+			uint8_t sel1 = s_sels[(d1 <= t0) + (d1 < t1) + (d1 < t2)];
+			uint8_t sel2 = s_sels[(d2 <= t0) + (d2 < t1) + (d2 < t2)];
+			uint8_t sel3 = s_sels[(d3 <= t0) + (d3 < t1) + (d3 < t2)];
+
+			sels[i + 0] = sel0;
+			sels[i + 1] = sel1;
+			sels[i + 2] = sel2;
+			sels[i + 3] = sel3;
+
+			total_err += squarei(pSrc_pixels[i + 0].r - block_r[sel0]) + squarei(pSrc_pixels[i + 0].g - block_g[sel0]) + squarei(pSrc_pixels[i + 0].b - block_b[sel0]);
+			total_err += squarei(pSrc_pixels[i + 1].r - block_r[sel1]) + squarei(pSrc_pixels[i + 1].g - block_g[sel1]) + squarei(pSrc_pixels[i + 1].b - block_b[sel1]);
+			total_err += squarei(pSrc_pixels[i + 2].r - block_r[sel2]) + squarei(pSrc_pixels[i + 2].g - block_g[sel2]) + squarei(pSrc_pixels[i + 2].b - block_b[sel2]);
+			total_err += squarei(pSrc_pixels[i + 3].r - block_r[sel3]) + squarei(pSrc_pixels[i + 3].g - block_g[sel3]) + squarei(pSrc_pixels[i + 3].b - block_b[sel3]);
+
+			if (total_err >= cur_err)
+				break;
+		}
+
+		return total_err;
+	}
+
+	static inline uint32_t bc1_find_sels4_check2_err(const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16], uint32_t cur_err)
+	{
+		uint32_t block_r[4], block_g[4], block_b[4];
+		bc1_get_block_colors4(block_r, block_g, block_b, lr, lg, lb, hr, hg, hb);
+
+		int dr = block_r[3] - block_r[0], dg = block_g[3] - block_g[0], db = block_b[3] - block_b[0];
+
+		const float f = 4.0f / (float)(squarei(dr) + squarei(dg) + squarei(db) + .00000125f);
+
+		uint32_t total_err = 0;
+
+		for (uint32_t i = 0; i < 16; i++)
+		{
+			const int r = pSrc_pixels[i].r;
+			const int g = pSrc_pixels[i].g;
+			const int b = pSrc_pixels[i].b;
+
+			int sel = (int)((float)((r - (int)block_r[0]) * dr + (g - (int)block_g[0]) * dg + (b - (int)block_b[0]) * db) * f + .5f);
+			sel = clampi(sel, 1, 3);
+
+			uint32_t err0 = squarei((int)block_r[sel - 1] - (int)r) + squarei((int)block_g[sel - 1] - (int)g) + squarei((int)block_b[sel - 1] - (int)b);
+			uint32_t err1 = squarei((int)block_r[sel] - (int)r) + squarei((int)block_g[sel] - (int)g) + squarei((int)block_b[sel] - (int)b);
+
+			int best_sel = sel;
+			uint32_t best_err = err1;
+			if (err0 == err1)
+			{
+				// Prefer non-interpolation
+				if ((best_sel - 1) == 0)
+					best_sel = 0;
+			}
+			else if (err0 < best_err)
+			{
+				best_sel = sel - 1;
+				best_err = err0;
+			}
+
+			total_err += best_err;
+
+			if (total_err >= cur_err)
+				break;
+
+			sels[i] = (uint8_t)best_sel;
+		}
+		return total_err;
+	}
+
+	static inline uint32_t bc1_find_sels4_fullerr(const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16], uint32_t cur_err)
+	{
+		uint32_t block_r[4], block_g[4], block_b[4];
+		bc1_get_block_colors4(block_r, block_g, block_b, lr, lg, lb, hr, hg, hb);
+
+		uint32_t total_err = 0;
+
+		for (uint32_t i = 0; i < 16; i++)
+		{
+			const int r = pSrc_pixels[i].r;
+			const int g = pSrc_pixels[i].g;
+			const int b = pSrc_pixels[i].b;
+
+			uint32_t best_err = squarei((int)block_r[0] - (int)r) + squarei((int)block_g[0] - (int)g) + squarei((int)block_b[0] - (int)b);
+			uint8_t best_sel = 0;
+
+			for (uint32_t j = 1; (j < 4) && best_err; j++)
+			{
+				uint32_t err = squarei((int)block_r[j] - (int)r) + squarei((int)block_g[j] - (int)g) + squarei((int)block_b[j] - (int)b);
+				if ((err < best_err) || ((err == best_err) && (j == 3)))
+				{
+					best_err = err;
+					best_sel = (uint8_t)j;
+				}
+			}
+
+			total_err += best_err;
+
+			if (total_err >= cur_err)
+				break;
+
+			sels[i] = (uint8_t)best_sel;
+		}
+		return total_err;
+	}
+
+	static inline uint32_t bc1_find_sels4(uint32_t flags, const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16], uint32_t cur_err, const uint8_t* pForce_selectors)
+	{
+		uint32_t err;
+
+		if (pForce_selectors)
+		{
+			memcpy(sels, pForce_selectors, 16);
+
+			uint32_t block_r[4], block_g[4], block_b[4];
+			bc1_get_block_colors4(block_r, block_g, block_b, lr, lg, lb, hr, hg, hb);
+
+			err = 0;
+			for (uint32_t i = 0; i < 16; i++)
+			{
+				const int r = pSrc_pixels[i].r;
+				const int g = pSrc_pixels[i].g;
+				const int b = pSrc_pixels[i].b;
+
+				const uint32_t sel = pForce_selectors[i];
+				assert(sel <= 3);
+
+				err += squarei((int)block_r[sel] - (int)r) + squarei((int)block_g[sel] - (int)g) + squarei((int)block_b[sel] - (int)b);
+			}
+		}
+		else
+		{
+			if (flags & cEncodeBC1UseFasterMSEEval)
+				err = bc1_find_sels4_fasterr(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels, cur_err);
+			else if (flags & cEncodeBC1UseFullMSEEval)
+				err = bc1_find_sels4_fullerr(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels, cur_err);
+			else
+				err = bc1_find_sels4_check2_err(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels, cur_err);
+		}
+
+		return err;
+	}
+
+	static inline uint32_t bc1_find_sels3_fullerr(bool use_black, const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16], uint32_t cur_err, const uint8_t* pForce_selectors)
+	{
+		uint32_t block_r[4], block_g[4], block_b[4];
+		bc1_get_block_colors3(block_r, block_g, block_b, lr, lg, lb, hr, hg, hb);
+
+		uint32_t total_err = 0;
+
+		if (pForce_selectors)
+		{
+			memcpy(sels, pForce_selectors, 16);
+
+			//uint32_t block_r[4], block_g[4], block_b[4];
+			//bc1_get_block_colors3(block_r, block_g, block_b, lr, lg, lb, hr, hg, hb);
+
+			block_r[3] = 0; block_g[3] = 0; block_b[3] = 0;
+
+			for (uint32_t i = 0; i < 16; i++)
+			{
+				const int r = pSrc_pixels[i].r;
+				const int g = pSrc_pixels[i].g;
+				const int b = pSrc_pixels[i].b;
+
+				const uint32_t sel = pForce_selectors[i];
+				assert(sel <= 3);
+
+				total_err += squarei((int)block_r[sel] - (int)r) + squarei((int)block_g[sel] - (int)g) + squarei((int)block_b[sel] - (int)b);
+			}
+		}
+		else
+		{
+			for (uint32_t i = 0; i < 16; i++)
+			{
+				const int r = pSrc_pixels[i].r;
+				const int g = pSrc_pixels[i].g;
+				const int b = pSrc_pixels[i].b;
+
+				uint32_t best_err = squarei((int)block_r[0] - (int)r) + squarei((int)block_g[0] - (int)g) + squarei((int)block_b[0] - (int)b);
+				uint32_t best_sel = 0;
+
+				uint32_t err1 = squarei((int)block_r[1] - (int)r) + squarei((int)block_g[1] - (int)g) + squarei((int)block_b[1] - (int)b);
+				if (err1 < best_err)
+				{
+					best_err = err1;
+					best_sel = 1;
+				}
+
+				uint32_t err2 = squarei((int)block_r[2] - (int)r) + squarei((int)block_g[2] - (int)g) + squarei((int)block_b[2] - (int)b);
+				if (err2 < best_err)
+				{
+					best_err = err2;
+					best_sel = 2;
+				}
+
+				if (use_black)
+				{
+					uint32_t err3 = squarei(r) + squarei(g) + squarei(b);
+					if (err3 < best_err)
+					{
+						best_err = err3;
+						best_sel = 3;
+					}
+				}
+
+				total_err += best_err;
+				if (total_err >= cur_err)
+					return total_err;
+
+				sels[i] = (uint8_t)best_sel;
+			}
+		}
+
+		return total_err;
+	}
+
+	static inline void precise_round_565(const vec3F& xl, const vec3F& xh,
+		int& trial_lr, int& trial_lg, int& trial_lb,
+		int& trial_hr, int& trial_hg, int& trial_hb)
+	{
+		trial_lr = (int)(xl.c[0] * 31.0f);
+		trial_lg = (int)(xl.c[1] * 63.0f);
+		trial_lb = (int)(xl.c[2] * 31.0f);
+
+		trial_hr = (int)(xh.c[0] * 31.0f);
+		trial_hg = (int)(xh.c[1] * 63.0f);
+		trial_hb = (int)(xh.c[2] * 31.0f);
+
+		if ((uint32_t)(trial_lr | trial_lb | trial_hr | trial_hb) > 31U)
+		{
+			trial_lr = ((uint32_t)trial_lr > 31U) ? (~trial_lr >> 31) & 31 : trial_lr;
+			trial_hr = ((uint32_t)trial_hr > 31U) ? (~trial_hr >> 31) & 31 : trial_hr;
+
+			trial_lb = ((uint32_t)trial_lb > 31U) ? (~trial_lb >> 31) & 31 : trial_lb;
+			trial_hb = ((uint32_t)trial_hb > 31U) ? (~trial_hb >> 31) & 31 : trial_hb;
+		}
+
+		if ((uint32_t)(trial_lg | trial_hg) > 63U)
+		{
+			trial_lg = ((uint32_t)trial_lg > 63U) ? (~trial_lg >> 31) & 63 : trial_lg;
+			trial_hg = ((uint32_t)trial_hg > 63U) ? (~trial_hg >> 31) & 63 : trial_hg;
+		}
+
+		trial_lr = (trial_lr + (xl.c[0] > g_midpoint5[trial_lr])) & 31;
+		trial_lg = (trial_lg + (xl.c[1] > g_midpoint6[trial_lg])) & 63;
+		trial_lb = (trial_lb + (xl.c[2] > g_midpoint5[trial_lb])) & 31;
+
+		trial_hr = (trial_hr + (xh.c[0] > g_midpoint5[trial_hr])) & 31;
+		trial_hg = (trial_hg + (xh.c[1] > g_midpoint6[trial_hg])) & 63;
+		trial_hb = (trial_hb + (xh.c[2] > g_midpoint5[trial_hb])) & 31;
+	}
+
+	static inline void precise_round_565_noscale(vec3F xl, vec3F xh,
+		int& trial_lr, int& trial_lg, int& trial_lb,
+		int& trial_hr, int& trial_hg, int& trial_hb)
+	{
+		xl.c[0] *= 1.0f / 255.0f;
+		xl.c[1] *= 1.0f / 255.0f;
+		xl.c[2] *= 1.0f / 255.0f;
+
+		xh.c[0] *= 1.0f / 255.0f;
+		xh.c[1] *= 1.0f / 255.0f;
+		xh.c[2] *= 1.0f / 255.0f;
+
+		precise_round_565(xl, xh, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb);
+	}
+
+	static inline void bc1_encode4(bc1_block* pDst_block, int lr, int lg, int lb, int hr, int hg, int hb, const uint8_t sels[16])
+	{
+		uint32_t lc16 = bc1_block::pack_unscaled_color(lr, lg, lb);
+		uint32_t hc16 = bc1_block::pack_unscaled_color(hr, hg, hb);
+
+		// Always forbid 3 color blocks
+		if (lc16 == hc16)
+		{
+			uint8_t mask = 0;
+
+			// Make l > h
+			if (hc16 > 0)
+				hc16--;
+			else
+			{
+				// lc16 = hc16 = 0
+				assert(lc16 == hc16 && hc16 == 0);
+
+				hc16 = 0;
+				lc16 = 1;
+				mask = 0x55; // select hc16
+			}
+
+			assert(lc16 > hc16);
+			pDst_block->set_low_color(static_cast<uint16_t>(lc16));
+			pDst_block->set_high_color(static_cast<uint16_t>(hc16));
+
+			pDst_block->m_selectors[0] = mask;
+			pDst_block->m_selectors[1] = mask;
+			pDst_block->m_selectors[2] = mask;
+			pDst_block->m_selectors[3] = mask;
+		}
+		else
+		{
+			uint8_t invert_mask = 0;
+			if (lc16 < hc16)
+			{
+				std::swap(lc16, hc16);
+				invert_mask = 0x55;
+			}
+
+			assert(lc16 > hc16);
+			pDst_block->set_low_color((uint16_t)lc16);
+			pDst_block->set_high_color((uint16_t)hc16);
+
+			uint32_t packed_sels = 0;
+			static const uint8_t s_sel_trans[4] = { 0, 2, 3, 1 };
+			for (uint32_t i = 0; i < 16; i++)
+				packed_sels |= ((uint32_t)s_sel_trans[sels[i]] << (i * 2));
+
+			pDst_block->m_selectors[0] = (uint8_t)packed_sels ^ invert_mask;
+			pDst_block->m_selectors[1] = (uint8_t)(packed_sels >> 8) ^ invert_mask;
+			pDst_block->m_selectors[2] = (uint8_t)(packed_sels >> 16) ^ invert_mask;
+			pDst_block->m_selectors[3] = (uint8_t)(packed_sels >> 24) ^ invert_mask;
+		}
+	}
+
+	static inline void bc1_encode3(bc1_block* pDst_block, int lr, int lg, int lb, int hr, int hg, int hb, const uint8_t sels[16])
+	{
+		uint32_t lc16 = bc1_block::pack_unscaled_color(lr, lg, lb);
+		uint32_t hc16 = bc1_block::pack_unscaled_color(hr, hg, hb);
+
+		bool invert_flag = false;
+		if (lc16 > hc16)
+		{
+			std::swap(lc16, hc16);
+			invert_flag = true;
+		}
+
+		assert(lc16 <= hc16);
+
+		pDst_block->set_low_color((uint16_t)lc16);
+		pDst_block->set_high_color((uint16_t)hc16);
+
+		uint32_t packed_sels = 0;
+
+		if (invert_flag)
+		{
+			static const uint8_t s_sel_trans_inv[4] = { 1, 0, 2, 3 };
+
+			for (uint32_t i = 0; i < 16; i++)
+				packed_sels |= ((uint32_t)s_sel_trans_inv[sels[i]] << (i * 2));
+		}
+		else
+		{
+			for (uint32_t i = 0; i < 16; i++)
+				packed_sels |= ((uint32_t)sels[i] << (i * 2));
+		}
+
+		pDst_block->m_selectors[0] = (uint8_t)packed_sels;
+		pDst_block->m_selectors[1] = (uint8_t)(packed_sels >> 8);
+		pDst_block->m_selectors[2] = (uint8_t)(packed_sels >> 16);
+		pDst_block->m_selectors[3] = (uint8_t)(packed_sels >> 24);
+	}
+
+	struct bc1_encode_results
+	{
+		int lr, lg, lb;
+		int hr, hg, hb;
+		uint8_t sels[16];
+		bool m_3color;
+	};
+
+	static bool try_3color_block_useblack(const color32* pSrc_pixels, uint32_t flags, uint32_t& cur_err, bc1_encode_results& results, const uint8_t* pForce_selectors)
+	{
+		int total_r = 0, total_g = 0, total_b = 0;
+		int max_r = 0, max_g = 0, max_b = 0;
+		int min_r = 255, min_g = 255, min_b = 255;
+		int total_pixels = 0;
+		for (uint32_t i = 0; i < 16; i++)
+		{
+			const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b;
+			if ((r | g | b) < 4)
+				continue;
+
+			max_r = std::max(max_r, r); max_g = std::max(max_g, g); max_b = std::max(max_b, b);
+			min_r = std::min(min_r, r); min_g = std::min(min_g, g); min_b = std::min(min_b, b);
+			total_r += r; total_g += g; total_b += b;
+
+			total_pixels++;
+		}
+
+		if (!total_pixels)
+			return false;
+
+		int half_total_pixels = total_pixels >> 1;
+		int avg_r = (total_r + half_total_pixels) / total_pixels;
+		int avg_g = (total_g + half_total_pixels) / total_pixels;
+		int avg_b = (total_b + half_total_pixels) / total_pixels;
+
+		uint32_t low_c = 0, high_c = 0;
+
+		int icov[6] = { 0, 0, 0, 0, 0, 0 };
+		for (uint32_t i = 0; i < 16; i++)
+		{
+			int r = (int)pSrc_pixels[i].r;
+			int g = (int)pSrc_pixels[i].g;
+			int b = (int)pSrc_pixels[i].b;
+
+			if ((r | g | b) < 4)
+				continue;
+
+			r -= avg_r;
+			g -= avg_g;
+			b -= avg_b;
+
+			icov[0] += r * r;
+			icov[1] += r * g;
+			icov[2] += r * b;
+			icov[3] += g * g;
+			icov[4] += g * b;
+			icov[5] += b * b;
+		}
+
+		float cov[6];
+		for (uint32_t i = 0; i < 6; i++)
+			cov[i] = (float)(icov[i]) * (1.0f / 255.0f);
+
+		float xr = (float)(max_r - min_r);
+		float xg = (float)(max_g - min_g);
+		float xb = (float)(max_b - min_b);
+
+		if (icov[2] < 0)
+			xr = -xr;
+
+		if (icov[4] < 0)
+			xg = -xg;
+
+		for (uint32_t power_iter = 0; power_iter < 4; power_iter++)
+		{
+			float r = xr * cov[0] + xg * cov[1] + xb * cov[2];
+			float g = xr * cov[1] + xg * cov[3] + xb * cov[4];
+			float b = xr * cov[2] + xg * cov[4] + xb * cov[5];
+			xr = r; xg = g; xb = b;
+		}
+
+		float k = maximum(fabsf(xr), fabsf(xg), fabsf(xb));
+		int saxis_r = 306, saxis_g = 601, saxis_b = 117;
+		if (k >= 2)
+		{
+			float m = 1024.0f / k;
+			saxis_r = (int)(xr * m);
+			saxis_g = (int)(xg * m);
+			saxis_b = (int)(xb * m);
+		}
+
+		int low_dot = INT_MAX, high_dot = INT_MIN;
+		for (uint32_t i = 0; i < 16; i++)
+		{
+			int r = (int)pSrc_pixels[i].r, g = (int)pSrc_pixels[i].g, b = (int)pSrc_pixels[i].b;
+
+			if ((r | g | b) < 4)
+				continue;
+
+			int dot = r * saxis_r + g * saxis_g + b * saxis_b;
+			if (dot < low_dot)
+			{
+				low_dot = dot;
+				low_c = i;
+			}
+			if (dot > high_dot)
+			{
+				high_dot = dot;
+				high_c = i;
+			}
+		}
+
+		int lr = to_5(pSrc_pixels[low_c].r);
+		int lg = to_6(pSrc_pixels[low_c].g);
+		int lb = to_5(pSrc_pixels[low_c].b);
+
+		int hr = to_5(pSrc_pixels[high_c].r);
+		int hg = to_6(pSrc_pixels[high_c].g);
+		int hb = to_5(pSrc_pixels[high_c].b);
+
+		uint8_t trial_sels[16];
+		uint32_t trial_err = bc1_find_sels3_fullerr(true, pSrc_pixels, lr, lg, lb, hr, hg, hb, trial_sels, UINT32_MAX, pForce_selectors);
+
+		if (trial_err)
+		{
+			const uint32_t total_ls_passes = flags & cEncodeBC1TwoLeastSquaresPasses ? 2 : 1;
+			for (uint32_t trials = 0; trials < total_ls_passes; trials++)
+			{
+				vec3F xl, xh;
+				int lr2, lg2, lb2, hr2, hg2, hb2;
+				if (!compute_least_squares_endpoints3_rgb(true, pSrc_pixels, trial_sels, &xl, &xh))
+				{
+					lr2 = g_bc1_match5_half[avg_r].m_hi;
+					lg2 = g_bc1_match6_half[avg_g].m_hi;
+					lb2 = g_bc1_match5_half[avg_b].m_hi;
+
+					hr2 = g_bc1_match5_half[avg_r].m_lo;
+					hg2 = g_bc1_match6_half[avg_g].m_lo;
+					hb2 = g_bc1_match5_half[avg_b].m_lo;
+				}
+				else
+				{
+					precise_round_565(xl, xh, hr2, hg2, hb2, lr2, lg2, lb2);
+				}
+
+				if ((lr == lr2) && (lg == lg2) && (lb == lb2) && (hr == hr2) && (hg == hg2) && (hb == hb2))
+					break;
+
+				uint8_t trial_sels2[16];
+				uint32_t trial_err2 = bc1_find_sels3_fullerr(true, pSrc_pixels, lr2, lg2, lb2, hr2, hg2, hb2, trial_sels2, trial_err, pForce_selectors);
+
+				if (trial_err2 < trial_err)
+				{
+					trial_err = trial_err2;
+					lr = lr2; lg = lg2; lb = lb2;
+					hr = hr2; hg = hg2; hb = hb2;
+					memcpy(trial_sels, trial_sels2, sizeof(trial_sels));
+				}
+				else
+					break;
+			}
+		}
+
+		if (trial_err < cur_err)
+		{
+			results.m_3color = true;
+			results.lr = lr;
+			results.lg = lg;
+			results.lb = lb;
+			results.hr = hr;
+			results.hg = hg;
+			results.hb = hb;
+			memcpy(results.sels, trial_sels, 16);
+
+			cur_err = trial_err;
+
+			return true;
+		}
+
+		return false;
+	}
+
+	static bool try_3color_block(const color32* pSrc_pixels, uint32_t flags, uint32_t& cur_err,
+		int avg_r, int avg_g, int avg_b, int lr, int lg, int lb, int hr, int hg, int hb, int total_r, int total_g, int total_b, uint32_t total_orderings_to_try,
+		bc1_encode_results& results, const uint8_t* pForce_selectors)
+	{
+		if (pForce_selectors)
+		{
+			for (uint32_t i = 0; i < 16; i++)
+				if (pForce_selectors[i] == 3)
+					return false;
+		}
+
+		uint8_t trial_sels[16];
+		uint32_t trial_err = bc1_find_sels3_fullerr(false, pSrc_pixels, lr, lg, lb, hr, hg, hb, trial_sels, UINT32_MAX, pForce_selectors);
+
+		if (trial_err)
+		{
+			const uint32_t total_ls_passes = flags & cEncodeBC1TwoLeastSquaresPasses ? 2 : 1;
+			for (uint32_t trials = 0; trials < total_ls_passes; trials++)
+			{
+				vec3F xl, xh;
+				int lr2, lg2, lb2, hr2, hg2, hb2;
+				if (!compute_least_squares_endpoints3_rgb(false, pSrc_pixels, trial_sels, &xl, &xh))
+				{
+					lr2 = g_bc1_match5_half[avg_r].m_hi;
+					lg2 = g_bc1_match6_half[avg_g].m_hi;
+					lb2 = g_bc1_match5_half[avg_b].m_hi;
+
+					hr2 = g_bc1_match5_half[avg_r].m_lo;
+					hg2 = g_bc1_match6_half[avg_g].m_lo;
+					hb2 = g_bc1_match5_half[avg_b].m_lo;
+				}
+				else
+				{
+					precise_round_565(xl, xh, hr2, hg2, hb2, lr2, lg2, lb2);
+				}
+
+				if ((lr == lr2) && (lg == lg2) && (lb == lb2) && (hr == hr2) && (hg == hg2) && (hb == hb2))
+					break;
+
+				uint8_t trial_sels2[16];
+				uint32_t trial_err2 = bc1_find_sels3_fullerr(false, pSrc_pixels, lr2, lg2, lb2, hr2, hg2, hb2, trial_sels2, trial_err, pForce_selectors);
+
+				if (trial_err2 < trial_err)
+				{
+					trial_err = trial_err2;
+					lr = lr2; lg = lg2; lb = lb2;
+					hr = hr2; hg = hg2; hb = hb2;
+					memcpy(trial_sels, trial_sels2, sizeof(trial_sels));
+				}
+				else
+					break;
+			}
+		}
+
+		if ((trial_err) && (flags & cEncodeBC1UseLikelyTotalOrderings) && (total_orderings_to_try))
+		{
+			hist3 h;
+			for (uint32_t i = 0; i < 16; i++)
+			{
+				assert(trial_sels[i] < 3);
+				h.m_hist[trial_sels[i]]++;
+			}
+
+			const uint32_t orig_total_order_index = h.lookup_total_ordering_index();
+
+			int r0, g0, b0, r3, g3, b3;
+			r0 = (lr << 3) | (lr >> 2); g0 = (lg << 2) | (lg >> 4); b0 = (lb << 3) | (lb >> 2);
+			r3 = (hr << 3) | (hr >> 2); g3 = (hg << 2) | (hg >> 4); b3 = (hb << 3) | (hb >> 2);
+
+			int ar = r3 - r0, ag = g3 - g0, ab = b3 - b0;
+
+			int dots[16];
+			for (uint32_t i = 0; i < 16; i++)
+			{
+				int r = pSrc_pixels[i].r;
+				int g = pSrc_pixels[i].g;
+				int b = pSrc_pixels[i].b;
+				int d = 0x1000000 + (r * ar + g * ag + b * ab);
+				assert(d >= 0);
+				dots[i] = (d << 4) + i;
+			}
+
+			std::sort(dots, dots + 16);
+
+			uint32_t r_sum[17], g_sum[17], b_sum[17];
+			uint32_t r = 0, g = 0, b = 0;
+			for (uint32_t i = 0; i < 16; i++)
+			{
+				const uint32_t p = dots[i] & 15;
+
+				r_sum[i] = r;
+				g_sum[i] = g;
+				b_sum[i] = b;
+
+				r += pSrc_pixels[p].r;
+				g += pSrc_pixels[p].g;
+				b += pSrc_pixels[p].b;
+			}
+
+			r_sum[16] = total_r;
+			g_sum[16] = total_g;
+			b_sum[16] = total_b;
+
+			const uint32_t q_total = (flags & cEncodeBC1Exhaustive) ? NUM_UNIQUE_TOTAL_ORDERINGS3 : std::min(total_orderings_to_try, MAX_TOTAL_ORDERINGS3);
+			for (uint32_t q = 0; q < q_total; q++)
+			{
+				const uint32_t s = (flags & cEncodeBC1Exhaustive) ? q : g_best_total_orderings3[orig_total_order_index][q];
+
+				int trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb;
+
+				vec3F xl, xh;
+
+				if ((s == TOTAL_ORDER_3_0_16) || (s == TOTAL_ORDER_3_1_16) || (s == TOTAL_ORDER_3_2_16))
+				{
+					trial_lr = g_bc1_match5_half[avg_r].m_hi;
+					trial_lg = g_bc1_match6_half[avg_g].m_hi;
+					trial_lb = g_bc1_match5_half[avg_b].m_hi;
+
+					trial_hr = g_bc1_match5_half[avg_r].m_lo;
+					trial_hg = g_bc1_match6_half[avg_g].m_lo;
+					trial_hb = g_bc1_match5_half[avg_b].m_lo;
+				}
+				else
+				{
+					compute_least_squares_endpoints3_rgb(&xl, &xh, total_r, total_g, total_b,
+						g_selector_factors3[s][0], g_selector_factors3[s][1], g_selector_factors3[s][2], s, r_sum, g_sum, b_sum);
+
+					precise_round_565(xl, xh, trial_hr, trial_hg, trial_hb, trial_lr, trial_lg, trial_lb);
+				}
+
+				uint8_t trial_sels2[16];
+				uint32_t trial_err2 = bc1_find_sels3_fullerr(false, pSrc_pixels, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb, trial_sels2, UINT32_MAX, pForce_selectors);
+
+				if (trial_err2 < trial_err)
+				{
+					trial_err = trial_err2;
+
+					lr = trial_lr;
+					lg = trial_lg;
+					lb = trial_lb;
+
+					hr = trial_hr;
+					hg = trial_hg;
+					hb = trial_hb;
+
+					memcpy(trial_sels, trial_sels2, sizeof(trial_sels));
+				}
+
+			} // s
+		}
+
+		if (trial_err < cur_err)
+		{
+			results.m_3color = true;
+			results.lr = lr;
+			results.lg = lg;
+			results.lb = lb;
+			results.hr = hr;
+			results.hg = hg;
+			results.hb = hb;
+			memcpy(results.sels, trial_sels, 16);
+
+			cur_err = trial_err;
+
+			return true;
+		}
+
+		return false;
+	}
+
+	void encode_bc1(uint32_t level, void* pDst, const uint8_t* pPixels, bool allow_3color, bool allow_transparent_texels_for_black, const uint8_t* pForce_selectors)
+	{
+		uint32_t flags = 0, total_orderings4 = 1, total_orderings3 = 1;
+
+		static_assert(MAX_TOTAL_ORDERINGS3 >= 32, "MAX_TOTAL_ORDERINGS3 >= 32");
+		static_assert(MAX_TOTAL_ORDERINGS4 >= 32, "MAX_TOTAL_ORDERINGS4 >= 32");
+
+		switch (level)
+		{
+		case 0:
+			// Faster/higher quality than stb_dxt default.
+			flags = cEncodeBC1BoundingBoxInt;
+			break;
+		case 1:
+			// Faster/higher quality than stb_dxt default. A bit higher average quality vs. mode 0.
+			flags = cEncodeBC1Use2DLS;
+			break;
+		case 2:
+			// On average mode 2 is a little weaker than modes 0/1, but it's stronger on outliers (very tough textures).
+			// Slightly stronger than stb_dxt.
+			flags = 0;
+			break;
+		case 3:
+			// Slightly stronger than stb_dxt HIGHQUAL.
+			flags = cEncodeBC1TwoLeastSquaresPasses;
+			break;
+		case 4:
+			flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1Use6PowerIters;
+			break;
+		default:
+		case 5:
+			// stb_dxt HIGHQUAL + permit 3 color (if it's enabled).
+			flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFasterMSEEval;
+			flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0);
+			break;
+		case 6:
+			flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFasterMSEEval | cEncodeBC1UseLikelyTotalOrderings;
+			flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0);
+			break;
+		case 7:
+			flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFasterMSEEval | cEncodeBC1UseLikelyTotalOrderings;
+			flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0);
+			total_orderings4 = 4;
+			break;
+		case 8:
+			flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFasterMSEEval | cEncodeBC1UseLikelyTotalOrderings;
+			flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0);
+			total_orderings4 = 8;
+			break;
+		case 9:
+			flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseLikelyTotalOrderings;
+			flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0);
+			total_orderings4 = 11;
+			total_orderings3 = 3;
+			break;
+		case 10:
+			flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseLikelyTotalOrderings;
+			flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0);
+			total_orderings4 = 20;
+			total_orderings3 = 8;
+			break;
+		case 11:
+			flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseLikelyTotalOrderings;
+			flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0);
+			total_orderings4 = 28;
+			total_orderings3 = 16;
+			break;
+		case 12:
+			flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseLikelyTotalOrderings;
+			flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0);
+			total_orderings4 = 32;
+			total_orderings3 = 32;
+			break;
+		case 13:
+			flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | (20 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts;
+			flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0);
+			total_orderings4 = 32;
+			total_orderings3 = 32;
+			break;
+		case 14:
+			flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | (32 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts;
+			flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0);
+			total_orderings4 = 32;
+			total_orderings3 = 32;
+			break;
+		case 15:
+			flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | (32 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts;
+			flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0);
+			total_orderings4 = ((((32 + MAX_TOTAL_ORDERINGS4) / 2) + 32) / 2);
+			total_orderings3 = 32;
+			break;
+		case 16:
+			flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | (256 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts;
+			flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0);
+			total_orderings4 = (32 + MAX_TOTAL_ORDERINGS4) / 2;
+			total_orderings3 = 32;
+			break;
+		case 17:
+			flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | (256 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts;
+			flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0);
+			total_orderings4 = MAX_TOTAL_ORDERINGS4;
+			total_orderings3 = 32;
+			break;
+		case 18:
+			flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | cEncodeBC1Iterative | (256 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts;
+			flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0);
+			total_orderings4 = MAX_TOTAL_ORDERINGS4;
+			total_orderings3 = 32;
+			break;
+		case 19:
+			// This hidden mode is *extremely* slow and abuses the encoder. It's just for testing/training.
+			flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | cEncodeBC1Exhaustive | cEncodeBC1Iterative | (256 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts;
+			flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0);
+			total_orderings4 = 32;
+			total_orderings3 = 32;
+			break;
+		}
+
+		encode_bc1(pDst, pPixels, flags, total_orderings4, total_orderings3, pForce_selectors);
+	}
+
+	static inline void encode_bc1_pick_initial(const color32* pSrc_pixels, uint32_t flags, bool grayscale_flag,
+		int min_r, int min_g, int min_b, int max_r, int max_g, int max_b,
+		int avg_r, int avg_g, int avg_b, int total_r, int total_g, int total_b,
+		int& lr, int& lg, int& lb, int& hr, int& hg, int& hb)
+	{
+		if (grayscale_flag)
+		{
+			const int fr = pSrc_pixels[0].r;
+
+			// Grayscale blocks are a common enough case to specialize.
+			if ((max_r - min_r) < 2)
+			{
+				lr = lb = hr = hb = to_5(fr);
+				lg = hg = to_6(fr);
+			}
+			else
+			{
+				lr = lb = to_5(min_r);
+				lg = to_6(min_r);
+
+				hr = hb = to_5(max_r);
+				hg = to_6(max_r);
+			}
+		}
+		else if (flags & cEncodeBC1Use2DLS)
+		{
+			//  2D Least Squares approach from Humus's example, with added inset and optimal rounding.
+			int big_chan = 0, min_chan_val = min_r, max_chan_val = max_r;
+			if ((max_g - min_g) > (max_chan_val - min_chan_val))
+				big_chan = 1, min_chan_val = min_g, max_chan_val = max_g;
+
+			if ((max_b - min_b) > (max_chan_val - min_chan_val))
+				big_chan = 2, min_chan_val = min_b, max_chan_val = max_b;
+
+			int sum_xy_r = 0, sum_xy_g = 0, sum_xy_b = 0;
+			vec3F l, h;
+			if (big_chan == 0)
+			{
+				for (uint32_t i = 0; i < 16; i++)
+				{
+					const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b;
+					sum_xy_r += r * r, sum_xy_g += r * g, sum_xy_b += r * b;
+				}
+
+				int sum_x = total_r;
+				int sum_x2 = sum_xy_r;
+
+				float div = (float)(16 * sum_x2 - sum_x * sum_x);
+				float b_y = 0.0f, b_z = 0.0f;
+				if (fabs(div) > 1e-8f)
+				{
+					div = 1.0f / div;
+					b_y = (16 * sum_xy_g - sum_x * total_g) * div;
+					b_z = (16 * sum_xy_b - sum_x * total_b) * div;
+				}
+
+				float a_y = (total_g - b_y * sum_x) / 16.0f;
+				float a_z = (total_b - b_z * sum_x) / 16.0f;
+
+				l.c[1] = a_y + b_y * min_chan_val;
+				l.c[2] = a_z + b_z * min_chan_val;
+
+				h.c[1] = a_y + b_y * max_chan_val;
+				h.c[2] = a_z + b_z * max_chan_val;
+
+				float dg = (h.c[1] - l.c[1]);
+				float db = (h.c[2] - l.c[2]);
+
+				h.c[1] = l.c[1] + dg * (15.0f / 16.0f);
+				h.c[2] = l.c[2] + db * (15.0f / 16.0f);
+
+				l.c[1] = l.c[1] + dg * (1.0f / 16.0f);
+				l.c[2] = l.c[2] + db * (1.0f / 16.0f);
+
+				float d = (float)(max_chan_val - min_chan_val);
+				float fmin_chan_val = min_chan_val + d * (1.0f / 16.0f);
+				float fmax_chan_val = min_chan_val + d * (15.0f / 16.0f);
+
+				l.c[0] = fmin_chan_val;
+				h.c[0] = fmax_chan_val;
+			}
+			else if (big_chan == 1)
+			{
+				for (uint32_t i = 0; i < 16; i++)
+				{
+					const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b;
+					sum_xy_r += g * r, sum_xy_g += g * g, sum_xy_b += g * b;
+				}
+
+				int sum_x = total_g;
+				int sum_x2 = sum_xy_g;
+
+				float div = (float)(16 * sum_x2 - sum_x * sum_x);
+				float b_x = 0.0f, b_z = 0.0f;
+				if (fabs(div) > 1e-8f)
+				{
+					div = 1.0f / div;
+					b_x = (16 * sum_xy_r - sum_x * total_r) * div;
+					b_z = (16 * sum_xy_b - sum_x * total_b) * div;
+				}
+
+				float a_x = (total_r - b_x * sum_x) / 16.0f;
+				float a_z = (total_b - b_z * sum_x) / 16.0f;
+
+				l.c[0] = a_x + b_x * min_chan_val;
+				l.c[2] = a_z + b_z * min_chan_val;
+
+				h.c[0] = a_x + b_x * max_chan_val;
+				h.c[2] = a_z + b_z * max_chan_val;
+
+				float dr = (h.c[0] - l.c[0]);
+				float db = (h.c[2] - l.c[2]);
+
+				h.c[0] = l.c[0] + dr * (15.0f / 16.0f);
+				h.c[2] = l.c[2] + db * (15.0f / 16.0f);
+
+				l.c[0] = l.c[0] + dr * (1.0f / 16.0f);
+				l.c[2] = l.c[2] + db * (1.0f / 16.0f);
+
+				float d = (float)(max_chan_val - min_chan_val);
+				float fmin_chan_val = min_chan_val + d * (1.0f / 16.0f);
+				float fmax_chan_val = min_chan_val + d * (15.0f / 16.0f);
+
+				l.c[1] = fmin_chan_val;
+				h.c[1] = fmax_chan_val;
+			}
+			else
+			{
+				for (uint32_t i = 0; i < 16; i++)
+				{
+					const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b;
+					sum_xy_r += b * r, sum_xy_g += b * g, sum_xy_b += b * b;
+				}
+
+				int sum_x = total_b;
+				int sum_x2 = sum_xy_b;
+
+				float div = (float)(16 * sum_x2 - sum_x * sum_x);
+				float b_x = 0.0f, b_y = 0.0f;
+				if (fabs(div) > 1e-8f)
+				{
+					div = 1.0f / div;
+					b_x = (16 * sum_xy_r - sum_x * total_r) * div;
+					b_y = (16 * sum_xy_g - sum_x * total_g) * div;
+				}
+
+				float a_x = (total_r - b_x * sum_x) / 16.0f;
+				float a_y = (total_g - b_y * sum_x) / 16.0f;
+
+				l.c[0] = a_x + b_x * min_chan_val;
+				l.c[1] = a_y + b_y * min_chan_val;
+
+				h.c[0] = a_x + b_x * max_chan_val;
+				h.c[1] = a_y + b_y * max_chan_val;
+
+				float dr = (h.c[0] - l.c[0]);
+				float dg = (h.c[1] - l.c[1]);
+
+				h.c[0] = l.c[0] + dr * (15.0f / 16.0f);
+				h.c[1] = l.c[1] + dg * (15.0f / 16.0f);
+
+				l.c[0] = l.c[0] + dr * (1.0f / 16.0f);
+				l.c[1] = l.c[1] + dg * (1.0f / 16.0f);
+
+				float d = (float)(max_chan_val - min_chan_val);
+				float fmin_chan_val = min_chan_val + d * (1.0f / 16.0f);
+				float fmax_chan_val = min_chan_val + d * (15.0f / 16.0f);
+
+				l.c[2] = fmin_chan_val;
+				h.c[2] = fmax_chan_val;
+			}
+
+			precise_round_565_noscale(l, h, lr, lg, lb, hr, hg, hb);
+		}
+		else if (flags & cEncodeBC1BoundingBox)
+		{
+			// Algorithm from icbc.h compress_dxt1_fast()
+			vec3F l, h;
+			l.c[0] = min_r * (1.0f / 255.0f);
+			l.c[1] = min_g * (1.0f / 255.0f);
+			l.c[2] = min_b * (1.0f / 255.0f);
+
+			h.c[0] = max_r * (1.0f / 255.0f);
+			h.c[1] = max_g * (1.0f / 255.0f);
+			h.c[2] = max_b * (1.0f / 255.0f);
+
+			const float bias = 8.0f / 255.0f;
+			float inset_r = (h.c[0] - l.c[0] - bias) * (1.0f / 16.0f);
+			float inset_g = (h.c[1] - l.c[1] - bias) * (1.0f / 16.0f);
+			float inset_b = (h.c[2] - l.c[2] - bias) * (1.0f / 16.0f);
+
+			l.c[0] = clampf(l.c[0] + inset_r, 0.0f, 1.0f);
+			l.c[1] = clampf(l.c[1] + inset_g, 0.0f, 1.0f);
+			l.c[2] = clampf(l.c[2] + inset_b, 0.0f, 1.0f);
+
+			h.c[0] = clampf(h.c[0] - inset_r, 0.0f, 1.0f);
+			h.c[1] = clampf(h.c[1] - inset_g, 0.0f, 1.0f);
+			h.c[2] = clampf(h.c[2] - inset_b, 0.0f, 1.0f);
+
+			int icov_xz = 0, icov_yz = 0;
+			for (uint32_t i = 0; i < 16; i++)
+			{
+				int r = (int)pSrc_pixels[i].r - avg_r;
+				int g = (int)pSrc_pixels[i].g - avg_g;
+				int b = (int)pSrc_pixels[i].b - avg_b;
+				icov_xz += r * b;
+				icov_yz += g * b;
+			}
+
+			if (icov_xz < 0)
+				std::swap(l.c[0], h.c[0]);
+
+			if (icov_yz < 0)
+				std::swap(l.c[1], h.c[1]);
+
+			precise_round_565(l, h, lr, lg, lb, hr, hg, hb);
+		}
+		else if (flags & cEncodeBC1BoundingBoxInt)
+		{
+			// Algorithm from icbc.h compress_dxt1_fast(), but converted to integer.
+			int inset_r = (max_r - min_r - 8) >> 4;
+			int inset_g = (max_g - min_g - 8) >> 4;
+			int inset_b = (max_b - min_b - 8) >> 4;
+
+			min_r += inset_r;
+			min_g += inset_g;
+			min_b += inset_b;
+			if ((uint32_t)(min_r | min_g | min_b) > 255U)
+			{
+				min_r = clampi(min_r, 0, 255);
+				min_g = clampi(min_g, 0, 255);
+				min_b = clampi(min_b, 0, 255);
+			}
+
+			max_r -= inset_r;
+			max_g -= inset_g;
+			max_b -= inset_b;
+			if ((uint32_t)(max_r | max_g | max_b) > 255U)
+			{
+				max_r = clampi(max_r, 0, 255);
+				max_g = clampi(max_g, 0, 255);
+				max_b = clampi(max_b, 0, 255);
+			}
+
+			int icov_xz = 0, icov_yz = 0;
+			for (uint32_t i = 0; i < 16; i++)
+			{
+				int r = (int)pSrc_pixels[i].r - avg_r;
+				int g = (int)pSrc_pixels[i].g - avg_g;
+				int b = (int)pSrc_pixels[i].b - avg_b;
+				icov_xz += r * b;
+				icov_yz += g * b;
+			}
+
+			int x0 = min_r;
+			int y0 = min_g;
+			int x1 = max_r;
+			int y1 = max_g;
+
+			if (icov_xz < 0)
+				std::swap(x0, x1);
+
+			if (icov_yz < 0)
+				std::swap(y0, y1);
+
+			lr = to_5(x0);
+			lg = to_6(y0);
+			lb = to_5(min_b);
+
+			hr = to_5(x1);
+			hg = to_6(y1);
+			hb = to_5(max_b);
+		}
+		else
+		{
+			// Select 2 colors along the principle axis. (There must be a faster/simpler way.)
+			uint32_t low_c = 0, high_c = 0;
+
+			int icov[6] = { 0, 0, 0, 0, 0, 0 };
+			for (uint32_t i = 0; i < 16; i++)
+			{
+				int r = (int)pSrc_pixels[i].r - avg_r;
+				int g = (int)pSrc_pixels[i].g - avg_g;
+				int b = (int)pSrc_pixels[i].b - avg_b;
+				icov[0] += r * r;
+				icov[1] += r * g;
+				icov[2] += r * b;
+				icov[3] += g * g;
+				icov[4] += g * b;
+				icov[5] += b * b;
+			}
+
+			int saxis_r = 306, saxis_g = 601, saxis_b = 117;
+
+			float xr = (float)(max_r - min_r);
+			float xg = (float)(max_g - min_g);
+			float xb = (float)(max_b - min_b);
+
+			if (icov[2] < 0)
+				xr = -xr;
+
+			if (icov[4] < 0)
+				xg = -xg;
+
+			float cov[6];
+			for (uint32_t i = 0; i < 6; i++)
+				cov[i] = (float)(icov[i]) * (1.0f / 255.0f);
+
+			const uint32_t total_power_iters = (flags & cEncodeBC1Use6PowerIters) ? 6 : 4;
+			for (uint32_t power_iter = 0; power_iter < total_power_iters; power_iter++)
+			{
+				float r = xr * cov[0] + xg * cov[1] + xb * cov[2];
+				float g = xr * cov[1] + xg * cov[3] + xb * cov[4];
+				float b = xr * cov[2] + xg * cov[4] + xb * cov[5];
+				xr = r; xg = g; xb = b;
+			}
+
+			float k = maximum(fabsf(xr), fabsf(xg), fabsf(xb));
+			if (k >= 2)
+			{
+				float m = 2048.0f / k;
+				saxis_r = (int)(xr * m);
+				saxis_g = (int)(xg * m);
+				saxis_b = (int)(xb * m);
+			}
+
+			int low_dot = INT_MAX, high_dot = INT_MIN;
+
+			saxis_r = (int)((uint32_t)saxis_r << 4U);
+			saxis_g = (int)((uint32_t)saxis_g << 4U);
+			saxis_b = (int)((uint32_t)saxis_b << 4U);
+
+			for (uint32_t i = 0; i < 16; i += 4)
+			{
+				int dot0 = ((pSrc_pixels[i].r * saxis_r + pSrc_pixels[i].g * saxis_g + pSrc_pixels[i].b * saxis_b) & ~0xF) + i;
+				int dot1 = ((pSrc_pixels[i + 1].r * saxis_r + pSrc_pixels[i + 1].g * saxis_g + pSrc_pixels[i + 1].b * saxis_b) & ~0xF) + i + 1;
+				int dot2 = ((pSrc_pixels[i + 2].r * saxis_r + pSrc_pixels[i + 2].g * saxis_g + pSrc_pixels[i + 2].b * saxis_b) & ~0xF) + i + 2;
+				int dot3 = ((pSrc_pixels[i + 3].r * saxis_r + pSrc_pixels[i + 3].g * saxis_g + pSrc_pixels[i + 3].b * saxis_b) & ~0xF) + i + 3;
+
+				int min_d01 = std::min(dot0, dot1);
+				int max_d01 = std::max(dot0, dot1);
+
+				int min_d23 = std::min(dot2, dot3);
+				int max_d23 = std::max(dot2, dot3);
+
+				int min_d = std::min(min_d01, min_d23);
+				int max_d = std::max(max_d01, max_d23);
+
+				low_dot = std::min(low_dot, min_d);
+				high_dot = std::max(high_dot, max_d);
+			}
+			low_c = low_dot & 15;
+			high_c = high_dot & 15;
+
+			lr = to_5(pSrc_pixels[low_c].r);
+			lg = to_6(pSrc_pixels[low_c].g);
+			lb = to_5(pSrc_pixels[low_c].b);
+
+			hr = to_5(pSrc_pixels[high_c].r);
+			hg = to_6(pSrc_pixels[high_c].g);
+			hb = to_5(pSrc_pixels[high_c].b);
+		}
+	}
+
+	static const int8_t s_adjacent_voxels[16][4] =
+	{
+		{ 1,0,0, 3 },   // 0
+		{ 0,1,0, 4 },   // 1
+		{ 0,0,1, 5 },   // 2
+		{ -1,0,0, 0 },  // 3
+		{ 0,-1,0, 1 },  // 4
+		{ 0,0,-1, 2 },  // 5
+		{ 1,1,0, 9 },   // 6
+		{ 1,0,1, 10 },  // 7
+		{ 0,1,1, 11 },  // 8
+		{ -1,-1,0, 6 }, // 9
+		{ -1,0,-1, 7 }, // 10
+		{ 0,-1,-1, 8 }, // 11
+		{ -1,1,0, 13 }, // 12
+		{ 1,-1,0, 12 }, // 13
+		{ 0,-1,1, 15 }, // 14
+		{ 0,1,-1, 14 }, // 15
+	};
+
+	// From icbc's high quality mode.
+	static inline void encode_bc1_endpoint_search(const color32* pSrc_pixels, bool any_black_pixels,
+		uint32_t flags, bc1_encode_results& results, uint32_t cur_err, const uint8_t* pForce_selectors)
+	{
+		int& lr = results.lr, & lg = results.lg, & lb = results.lb, & hr = results.hr, & hg = results.hg, & hb = results.hb;
+		uint8_t* sels = results.sels;
+
+		int prev_improvement_index = 0, forbidden_direction = -1;
+
+		const int endpoint_search_rounds = (flags & cEncodeBC1EndpointSearchRoundsMask) >> cEncodeBC1EndpointSearchRoundsShift;
+		for (int i = 0; i < endpoint_search_rounds; i++)
+		{
+			assert(s_adjacent_voxels[s_adjacent_voxels[i & 15][3]][3] == (i & 15));
+
+			if (forbidden_direction == (i & 31))
+				continue;
+
+			const int8_t delta[3] = { s_adjacent_voxels[i & 15][0], s_adjacent_voxels[i & 15][1], s_adjacent_voxels[i & 15][2] };
+
+			int trial_lr = lr, trial_lg = lg, trial_lb = lb, trial_hr = hr, trial_hg = hg, trial_hb = hb;
+
+			if ((i >> 4) & 1)
+			{
+				trial_lr = clampi(trial_lr + delta[0], 0, 31);
+				trial_lg = clampi(trial_lg + delta[1], 0, 63);
+				trial_lb = clampi(trial_lb + delta[2], 0, 31);
+			}
+			else
+			{
+				trial_hr = clampi(trial_hr + delta[0], 0, 31);
+				trial_hg = clampi(trial_hg + delta[1], 0, 63);
+				trial_hb = clampi(trial_hb + delta[2], 0, 31);
+			}
+
+			uint8_t trial_sels[16];
+
+			uint32_t trial_err;
+			if (results.m_3color)
+			{
+				trial_err = bc1_find_sels3_fullerr(
+					((any_black_pixels) && ((flags & cEncodeBC1Use3ColorBlocksForBlackPixels) != 0)),
+					pSrc_pixels, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb, trial_sels, cur_err, pForce_selectors);
+			}
+			else
+			{
+				trial_err = bc1_find_sels4(flags, pSrc_pixels, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb, trial_sels, cur_err, pForce_selectors);
+			}
+
+			if (trial_err < cur_err)
+			{
+				cur_err = trial_err;
+
+				forbidden_direction = s_adjacent_voxels[i & 15][3] | (i & 16);
+
+				lr = trial_lr, lg = trial_lg, lb = trial_lb, hr = trial_hr, hg = trial_hg, hb = trial_hb;
+
+				memcpy(sels, trial_sels, 16);
+
+				prev_improvement_index = i;
+			}
+
+			if (i - prev_improvement_index > 32)
+				break;
+		}
+	}
+
+	void encode_bc1(void* pDst, const uint8_t* pPixels, uint32_t flags, uint32_t total_orderings_to_try, uint32_t total_orderings_to_try3, const uint8_t* pForce_selectors)
+	{
+		assert(g_initialized);
+
+		const color32* pSrc_pixels = (const color32*)pPixels;
+		bc1_block* pDst_block = static_cast<bc1_block*>(pDst);
+
+		int avg_r, avg_g, avg_b, min_r, min_g, min_b, max_r, max_g, max_b;
+
+		const uint32_t fr = pSrc_pixels[0].r, fg = pSrc_pixels[0].g, fb = pSrc_pixels[0].b;
+
+		uint32_t j;
+		for (j = 15; j >= 1; --j)
+			if ((pSrc_pixels[j].r != fr) || (pSrc_pixels[j].g != fg) || (pSrc_pixels[j].b != fb))
+				break;
+
+		if (j == 0)
+		{
+			encode_bc1_solid_block(pDst, fr, fg, fb, (flags & (cEncodeBC1Use3ColorBlocks | cEncodeBC1Use3ColorBlocksForBlackPixels)) != 0);
+			return;
+		}
+
+		int total_r = fr, total_g = fg, total_b = fb;
+
+		max_r = fr, max_g = fg, max_b = fb;
+		min_r = fr, min_g = fg, min_b = fb;
+
+		uint32_t grayscale_flag = (fr == fg) && (fr == fb);
+		uint32_t any_black_pixels = (fr | fg | fb) < 4;
+
+		for (uint32_t i = 1; i < 16; i++)
+		{
+			const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b;
+
+			grayscale_flag &= ((r == g) && (r == b));
+			any_black_pixels |= ((r | g | b) < 4);
+
+			max_r = std::max(max_r, r); max_g = std::max(max_g, g); max_b = std::max(max_b, b);
+			min_r = std::min(min_r, r); min_g = std::min(min_g, g); min_b = std::min(min_b, b);
+			total_r += r; total_g += g; total_b += b;
+		}
+
+		avg_r = (total_r + 8) >> 4, avg_g = (total_g + 8) >> 4, avg_b = (total_b + 8) >> 4;
+
+		bc1_encode_results results;
+		results.m_3color = false;
+
+		uint8_t* sels = results.sels;
+		int& lr = results.lr, & lg = results.lg, & lb = results.lb, & hr = results.hr, & hg = results.hg, & hb = results.hb;
+		int orig_lr = 0, orig_lg = 0, orig_lb = 0, orig_hr = 0, orig_hg = 0, orig_hb = 0;
+
+		lr = 0, lg = 0, lb = 0, hr = 0, hg = 0, hb = 0;
+
+		const bool needs_block_error = ((flags & (cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use3ColorBlocks | cEncodeBC1UseFullMSEEval | cEncodeBC1EndpointSearchRoundsMask)) != 0) ||
+			(any_black_pixels && ((flags & cEncodeBC1Use3ColorBlocksForBlackPixels) != 0));
+
+		uint32_t cur_err = UINT32_MAX;
+
+		if (!needs_block_error)
+		{
+			assert((flags & cEncodeBC1TryAllInitialEndponts) == 0);
+
+			encode_bc1_pick_initial(pSrc_pixels, flags, grayscale_flag != 0,
+				min_r, min_g, min_b, max_r, max_g, max_b,
+				avg_r, avg_g, avg_b, total_r, total_g, total_b,
+				lr, lg, lb, hr, hg, hb);
+
+			orig_lr = lr, orig_lg = lg, orig_lb = lb, orig_hr = hr, orig_hg = hg, orig_hb = hb;
+
+			bc1_find_sels4_noerr(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels, pForce_selectors);
+
+			const uint32_t total_ls_passes = flags & cEncodeBC1TwoLeastSquaresPasses ? 2 : 1;
+			for (uint32_t ls_pass = 0; ls_pass < total_ls_passes; ls_pass++)
+			{
+				int trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb;
+
+				vec3F xl, xh;
+				if (!compute_least_squares_endpoints4_rgb(pSrc_pixels, sels, &xl, &xh, total_r, total_g, total_b))
+				{
+					// All selectors equal - treat it as a solid block which should always be equal or better.
+					trial_lr = g_bc1_match5_equals_1[avg_r].m_hi;
+					trial_lg = g_bc1_match6_equals_1[avg_g].m_hi;
+					trial_lb = g_bc1_match5_equals_1[avg_b].m_hi;
+
+					trial_hr = g_bc1_match5_equals_1[avg_r].m_lo;
+					trial_hg = g_bc1_match6_equals_1[avg_g].m_lo;
+					trial_hb = g_bc1_match5_equals_1[avg_b].m_lo;
+
+					// In high/higher quality mode, let it try again in case the optimal tables have caused the sels to diverge.
+				}
+				else
+				{
+					precise_round_565(xl, xh, trial_hr, trial_hg, trial_hb, trial_lr, trial_lg, trial_lb);
+				}
+
+				if ((lr == trial_lr) && (lg == trial_lg) && (lb == trial_lb) && (hr == trial_hr) && (hg == trial_hg) && (hb == trial_hb))
+					break;
+
+				bc1_find_sels4_noerr(pSrc_pixels, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb, sels, pForce_selectors);
+
+				lr = trial_lr;
+				lg = trial_lg;
+				lb = trial_lb;
+				hr = trial_hr;
+				hg = trial_hg;
+				hb = trial_hb;
+
+			} // ls_pass
+		}
+		else
+		{
+			const uint32_t total_rounds = (flags & cEncodeBC1TryAllInitialEndponts) ? 2 : 1;
+			for (uint32_t round = 0; round < total_rounds; round++)
+			{
+				uint32_t modified_flags = flags;
+				if (round == 1)
+				{
+					modified_flags &= ~(cEncodeBC1Use2DLS | cEncodeBC1BoundingBox);
+					modified_flags |= cEncodeBC1BoundingBox;
+				}
+
+				int round_lr, round_lg, round_lb, round_hr, round_hg, round_hb;
+				uint8_t round_sels[16];
+
+				encode_bc1_pick_initial(pSrc_pixels, modified_flags, grayscale_flag != 0,
+					min_r, min_g, min_b, max_r, max_g, max_b,
+					avg_r, avg_g, avg_b, total_r, total_g, total_b,
+					round_lr, round_lg, round_lb, round_hr, round_hg, round_hb);
+
+				int orig_round_lr = round_lr, orig_round_lg = round_lg, orig_round_lb = round_lb, orig_round_hr = round_hr, orig_round_hg = round_hg, orig_round_hb = round_hb;
+
+				uint32_t round_err = bc1_find_sels4(flags, pSrc_pixels, round_lr, round_lg, round_lb, round_hr, round_hg, round_hb, round_sels, UINT32_MAX, pForce_selectors);
+
+				const uint32_t total_ls_passes = flags & cEncodeBC1TwoLeastSquaresPasses ? 2 : 1;
+				for (uint32_t ls_pass = 0; ls_pass < total_ls_passes; ls_pass++)
+				{
+					int trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb;
+
+					vec3F xl, xh;
+					if (!compute_least_squares_endpoints4_rgb(pSrc_pixels, round_sels, &xl, &xh, total_r, total_g, total_b))
+					{
+						// All selectors equal - treat it as a solid block which should always be equal or better.
+						trial_lr = g_bc1_match5_equals_1[avg_r].m_hi;
+						trial_lg = g_bc1_match6_equals_1[avg_g].m_hi;
+						trial_lb = g_bc1_match5_equals_1[avg_b].m_hi;
+
+						trial_hr = g_bc1_match5_equals_1[avg_r].m_lo;
+						trial_hg = g_bc1_match6_equals_1[avg_g].m_lo;
+						trial_hb = g_bc1_match5_equals_1[avg_b].m_lo;
+
+						// In high/higher quality mode, let it try again in case the optimal tables have caused the sels to diverge.
+					}
+					else
+					{
+						precise_round_565(xl, xh, trial_hr, trial_hg, trial_hb, trial_lr, trial_lg, trial_lb);
+					}
+
+					if ((round_lr == trial_lr) && (round_lg == trial_lg) && (round_lb == trial_lb) && (round_hr == trial_hr) && (round_hg == trial_hg) && (round_hb == trial_hb))
+						break;
+
+					uint8_t trial_sels[16];
+					uint32_t trial_err = bc1_find_sels4(flags, pSrc_pixels, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb, trial_sels, round_err, pForce_selectors);
+
+					if (trial_err < round_err)
+					{
+						round_lr = trial_lr;
+						round_lg = trial_lg;
+						round_lb = trial_lb;
+
+						round_hr = trial_hr;
+						round_hg = trial_hg;
+						round_hb = trial_hb;
+
+						round_err = trial_err;
+						memcpy(round_sels, trial_sels, 16);
+					}
+					else
+						break;
+
+				} // ls_pass
+
+				if (round_err <= cur_err)
+				{
+					cur_err = round_err;
+
+					lr = round_lr;
+					lg = round_lg;
+					lb = round_lb;
+					hr = round_hr;
+					hg = round_hg;
+					hb = round_hb;
+
+					orig_lr = orig_round_lr;
+					orig_lg = orig_round_lg;
+					orig_lb = orig_round_lb;
+					orig_hr = orig_round_hr;
+					orig_hg = orig_round_hg;
+					orig_hb = orig_round_hb;
+
+					memcpy(sels, round_sels, 16);
+				}
+
+			} // round
+		}
+
+		if ((cur_err) && (flags & cEncodeBC1UseLikelyTotalOrderings))
+		{
+			assert(needs_block_error);
+
+			const uint32_t total_iters = (flags & cEncodeBC1Iterative) ? 2 : 1;
+			for (uint32_t iter_index = 0; iter_index < total_iters; iter_index++)
+			{
+				const uint32_t orig_err = cur_err;
+
+				hist4 h;
+				for (uint32_t i = 0; i < 16; i++)
+				{
+					assert(sels[i] < 4);
+					h.m_hist[sels[i]]++;
+				}
+
+				const uint32_t orig_total_order_index = h.lookup_total_ordering_index();
+
+				int r0, g0, b0, r3, g3, b3;
+				r0 = (lr << 3) | (lr >> 2); g0 = (lg << 2) | (lg >> 4); b0 = (lb << 3) | (lb >> 2);
+				r3 = (hr << 3) | (hr >> 2); g3 = (hg << 2) | (hg >> 4); b3 = (hb << 3) | (hb >> 2);
+
+				int ar = r3 - r0, ag = g3 - g0, ab = b3 - b0;
+
+				int dots[16];
+				for (uint32_t i = 0; i < 16; i++)
+				{
+					int r = pSrc_pixels[i].r;
+					int g = pSrc_pixels[i].g;
+					int b = pSrc_pixels[i].b;
+					int d = 0x1000000 + (r * ar + g * ag + b * ab);
+					assert(d >= 0);
+					dots[i] = (d << 4) + i;
+				}
+
+				std::sort(dots, dots + 16);
+
+				uint32_t r_sum[17], g_sum[17], b_sum[17];
+				uint32_t r = 0, g = 0, b = 0;
+				for (uint32_t i = 0; i < 16; i++)
+				{
+					const uint32_t p = dots[i] & 15;
+
+					r_sum[i] = r;
+					g_sum[i] = g;
+					b_sum[i] = b;
+
+					r += pSrc_pixels[p].r;
+					g += pSrc_pixels[p].g;
+					b += pSrc_pixels[p].b;
+				}
+
+				r_sum[16] = total_r;
+				g_sum[16] = total_g;
+				b_sum[16] = total_b;
+
+				const uint32_t q_total = (flags & cEncodeBC1Exhaustive) ? NUM_UNIQUE_TOTAL_ORDERINGS4 : clampi(total_orderings_to_try, MIN_TOTAL_ORDERINGS, MAX_TOTAL_ORDERINGS4);
+				for (uint32_t q = 0; q < q_total; q++)
+				{
+					const uint32_t s = (flags & cEncodeBC1Exhaustive) ? q : g_best_total_orderings4[orig_total_order_index][q];
+
+					int trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb;
+
+					vec3F xl, xh;
+
+					if ((s == TOTAL_ORDER_4_0_16) || (s == TOTAL_ORDER_4_1_16) || (s == TOTAL_ORDER_4_2_16) || (s == TOTAL_ORDER_4_3_16))
+					{
+						trial_lr = g_bc1_match5_equals_1[avg_r].m_hi;
+						trial_lg = g_bc1_match6_equals_1[avg_g].m_hi;
+						trial_lb = g_bc1_match5_equals_1[avg_b].m_hi;
+
+						trial_hr = g_bc1_match5_equals_1[avg_r].m_lo;
+						trial_hg = g_bc1_match6_equals_1[avg_g].m_lo;
+						trial_hb = g_bc1_match5_equals_1[avg_b].m_lo;
+					}
+					else
+					{
+						compute_least_squares_endpoints4_rgb(&xl, &xh, total_r, total_g, total_b,
+							g_selector_factors4[s][0], g_selector_factors4[s][1], g_selector_factors4[s][2], s, r_sum, g_sum, b_sum);
+
+						precise_round_565(xl, xh, trial_hr, trial_hg, trial_hb, trial_lr, trial_lg, trial_lb);
+					}
+
+					uint8_t trial_sels[16];
+
+					uint32_t trial_err = bc1_find_sels4(flags, pSrc_pixels, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb, trial_sels, cur_err, pForce_selectors);
+
+					if (trial_err < cur_err)
+					{
+						cur_err = trial_err;
+
+						lr = trial_lr;
+						lg = trial_lg;
+						lb = trial_lb;
+
+						hr = trial_hr;
+						hg = trial_hg;
+						hb = trial_hb;
+
+						memcpy(sels, trial_sels, 16);
+					}
+
+				} // s
+
+				if ((!cur_err) || (cur_err == orig_err))
+					break;
+
+			} // iter_index
+		}
+
+		if (((flags & (cEncodeBC1Use3ColorBlocks | cEncodeBC1Use3ColorBlocksForBlackPixels)) != 0) && (cur_err))
+		{
+			if (flags & cEncodeBC1Use3ColorBlocks)
+			{
+				assert(needs_block_error);
+				try_3color_block(pSrc_pixels, flags, cur_err, avg_r, avg_g, avg_b, orig_lr, orig_lg, orig_lb, orig_hr, orig_hg, orig_hb, total_r, total_g, total_b, total_orderings_to_try3, results, pForce_selectors);
+			}
+
+			if ((any_black_pixels) && ((flags & cEncodeBC1Use3ColorBlocksForBlackPixels) != 0))
+			{
+				assert(needs_block_error);
+				try_3color_block_useblack(pSrc_pixels, flags, cur_err, results, pForce_selectors);
+			}
+		}
+
+		if ((flags & cEncodeBC1EndpointSearchRoundsMask) && (cur_err))
+		{
+			assert(needs_block_error);
+
+			encode_bc1_endpoint_search(pSrc_pixels, any_black_pixels != 0, flags, results, cur_err, pForce_selectors);
+		}
+
+		if (results.m_3color)
+			bc1_encode3(pDst_block, results.lr, results.lg, results.lb, results.hr, results.hg, results.hb, results.sels);
+		else
+			bc1_encode4(pDst_block, results.lr, results.lg, results.lb, results.hr, results.hg, results.hb, results.sels);
+	}
+
+	// BC3-5
+
+	void encode_bc4(void* pDst, const uint8_t* pPixels, uint32_t stride)
+	{
+		assert(g_initialized);
+
+		uint32_t min0_v, max0_v, min1_v, max1_v, min2_v, max2_v, min3_v, max3_v;
+
+		{
+			min0_v = max0_v = pPixels[0 * stride];
+			min1_v = max1_v = pPixels[1 * stride];
+			min2_v = max2_v = pPixels[2 * stride];
+			min3_v = max3_v = pPixels[3 * stride];
+		}
+
+		{
+			uint32_t v0 = pPixels[4 * stride]; min0_v = std::min(min0_v, v0); max0_v = std::max(max0_v, v0);
+			uint32_t v1 = pPixels[5 * stride]; min1_v = std::min(min1_v, v1); max1_v = std::max(max1_v, v1);
+			uint32_t v2 = pPixels[6 * stride]; min2_v = std::min(min2_v, v2); max2_v = std::max(max2_v, v2);
+			uint32_t v3 = pPixels[7 * stride]; min3_v = std::min(min3_v, v3); max3_v = std::max(max3_v, v3);
+		}
+
+		{
+			uint32_t v0 = pPixels[8 * stride]; min0_v = std::min(min0_v, v0); max0_v = std::max(max0_v, v0);
+			uint32_t v1 = pPixels[9 * stride]; min1_v = std::min(min1_v, v1); max1_v = std::max(max1_v, v1);
+			uint32_t v2 = pPixels[10 * stride]; min2_v = std::min(min2_v, v2); max2_v = std::max(max2_v, v2);
+			uint32_t v3 = pPixels[11 * stride]; min3_v = std::min(min3_v, v3); max3_v = std::max(max3_v, v3);
+		}
+
+		{
+			uint32_t v0 = pPixels[12 * stride]; min0_v = std::min(min0_v, v0); max0_v = std::max(max0_v, v0);
+			uint32_t v1 = pPixels[13 * stride]; min1_v = std::min(min1_v, v1); max1_v = std::max(max1_v, v1);
+			uint32_t v2 = pPixels[14 * stride]; min2_v = std::min(min2_v, v2); max2_v = std::max(max2_v, v2);
+			uint32_t v3 = pPixels[15 * stride]; min3_v = std::min(min3_v, v3); max3_v = std::max(max3_v, v3);
+		}
+
+		const uint32_t min_v = minimum(min0_v, min1_v, min2_v, min3_v);
+		const uint32_t max_v = maximum(max0_v, max1_v, max2_v, max3_v);
+
+		uint8_t* pDst_bytes = static_cast<uint8_t*>(pDst);
+		pDst_bytes[0] = (uint8_t)max_v;
+		pDst_bytes[1] = (uint8_t)min_v;
+
+		if (max_v == min_v)
+		{
+			memset(pDst_bytes + 2, 0, 6);
+			return;
+		}
+
+		const uint32_t delta = max_v - min_v;
+
+		// min_v is now 0. Compute thresholds between values by scaling max_v. It's x14 because we're adding two x7 scale factors.
+		const int t0 = delta * 13;
+		const int t1 = delta * 11;
+		const int t2 = delta * 9;
+		const int t3 = delta * 7;
+		const int t4 = delta * 5;
+		const int t5 = delta * 3;
+		const int t6 = delta * 1;
+
+		// BC4 floors in its divisions, which we compensate for with the 4 bias.
+		// This function is optimal for all possible inputs (i.e. it outputs the same results as checking all 8 values and choosing the closest one).
+		const int bias = 4 - min_v * 14;
+
+		static const uint32_t s_tran0[8] = { 1U      , 7U      , 6U      , 5U      , 4U      , 3U      , 2U      , 0U };
+		static const uint32_t s_tran1[8] = { 1U << 3U, 7U << 3U, 6U << 3U, 5U << 3U, 4U << 3U, 3U << 3U, 2U << 3U, 0U << 3U };
+		static const uint32_t s_tran2[8] = { 1U << 6U, 7U << 6U, 6U << 6U, 5U << 6U, 4U << 6U, 3U << 6U, 2U << 6U, 0U << 6U };
+		static const uint32_t s_tran3[8] = { 1U << 9U, 7U << 9U, 6U << 9U, 5U << 9U, 4U << 9U, 3U << 9U, 2U << 9U, 0U << 9U };
+
+		uint64_t a0, a1, a2, a3;
+		{
+			const int v0 = pPixels[0 * stride] * 14 + bias;
+			const int v1 = pPixels[1 * stride] * 14 + bias;
+			const int v2 = pPixels[2 * stride] * 14 + bias;
+			const int v3 = pPixels[3 * stride] * 14 + bias;
+			a0 = s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)];
+			a1 = s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)];
+			a2 = s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)];
+			a3 = s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)];
+		}
+
+		{
+			const int v0 = pPixels[4 * stride] * 14 + bias;
+			const int v1 = pPixels[5 * stride] * 14 + bias;
+			const int v2 = pPixels[6 * stride] * 14 + bias;
+			const int v3 = pPixels[7 * stride] * 14 + bias;
+			a0 |= (uint64_t)(s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)] << 12U);
+			a1 |= (uint64_t)(s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)] << 12U);
+			a2 |= (uint64_t)(s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)] << 12U);
+			a3 |= (uint64_t)(s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)] << 12U);
+		}
+
+		{
+			const int v0 = pPixels[8 * stride] * 14 + bias;
+			const int v1 = pPixels[9 * stride] * 14 + bias;
+			const int v2 = pPixels[10 * stride] * 14 + bias;
+			const int v3 = pPixels[11 * stride] * 14 + bias;
+			a0 |= (((uint64_t)s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]) << 24U);
+			a1 |= (((uint64_t)s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]) << 24U);
+			a2 |= (((uint64_t)s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]) << 24U);
+			a3 |= (((uint64_t)s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]) << 24U);
+		}
+
+		{
+			const int v0 = pPixels[12 * stride] * 14 + bias;
+			const int v1 = pPixels[13 * stride] * 14 + bias;
+			const int v2 = pPixels[14 * stride] * 14 + bias;
+			const int v3 = pPixels[15 * stride] * 14 + bias;
+			a0 |= (((uint64_t)s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]) << 36U);
+			a1 |= (((uint64_t)s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]) << 36U);
+			a2 |= (((uint64_t)s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]) << 36U);
+			a3 |= (((uint64_t)s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]) << 36U);
+		}
+
+		const uint64_t f = a0 | a1 | a2 | a3;
+
+		pDst_bytes[2] = (uint8_t)f;
+		pDst_bytes[3] = (uint8_t)(f >> 8U);
+		pDst_bytes[4] = (uint8_t)(f >> 16U);
+		pDst_bytes[5] = (uint8_t)(f >> 24U);
+		pDst_bytes[6] = (uint8_t)(f >> 32U);
+		pDst_bytes[7] = (uint8_t)(f >> 40U);
+	}
+
+	uint32_t encode_bc4_hq(void* pDst, const uint8_t* pPixels, uint32_t stride, uint32_t search_rad, uint32_t mode_flag, const uint8_t* pForce_selectors)
+	{
+		assert(mode_flag);
+
+		uint8_t* pDst_bytes = static_cast<uint8_t*>(pDst);
+
+		uint32_t min_val = 255, max_val = 0;
+		for (uint32_t i = 0; i < 16; i++)
+		{
+			uint32_t val = pPixels[i * stride];
+			min_val = std::min(val, min_val);
+			max_val = std::max(val, max_val);
+		}
+
+		if (min_val == max_val)
+		{
+			if (mode_flag & BC4_USE_MODE6_FLAG)
+			{
+				pDst_bytes[0] = (uint8_t)min_val;
+				pDst_bytes[1] = (uint8_t)min_val;
+
+				memset(pDst_bytes + 2, 0, 6);
+
+				assert(!(pDst_bytes[0] > pDst_bytes[1]));
+			}
+			else
+			{
+				// Use an 8 value encoding
+				if (min_val > 0)
+				{
+					pDst_bytes[0] = (uint8_t)min_val;
+					pDst_bytes[1] = (uint8_t)min_val - 1;
+
+					memset(pDst_bytes + 2, 0, 6);
+				}
+				else
+				{
+					static const uint8_t s_const_1_vals[8] = { 1, 0, 0x49, 0x92, 0x24, 0x49, 0x92, 0x24 };
+					memcpy(pDst_bytes, s_const_1_vals, 8);
+				}
+
+				assert(pDst_bytes[0] > pDst_bytes[1]);
+			}
+
+#if defined(_DEBUG) || defined(DEBUG)
+			{
+				bc4_block* pBlock = (bc4_block*)pDst;
+				uint8_t pixels[16];
+				unpack_bc4(pDst, pixels, 1);
+				for (uint32_t i = 0; i < 16; i++)
+					assert(pixels[i] == min_val);
+				if (mode_flag & BC4_USE_MODE6_FLAG)
+				{
+					assert(pBlock->is_alpha6_block());
+				}
+				else
+				{
+					assert(!pBlock->is_alpha6_block());
+				}
+			}
+#endif			
+
+			return 0;
+		}
+
+		uint32_t best_err = UINT32_MAX;
+		for (uint32_t mode = 0; mode < 2; mode++)
+		{
+			if ((mode_flag & (1 << mode)) == 0)
+				continue;
+
+			for (int lo_delta = -(int)search_rad; lo_delta <= (int)search_rad; lo_delta++)
+			{
+				for (int hi_delta = -(int)search_rad; hi_delta <= (int)search_rad; hi_delta++)
+				{
+					bc4_block trial_block;
+					trial_block.m_endpoints[0] = (uint8_t)clamp<int>(max_val + hi_delta, 0, 255);
+					trial_block.m_endpoints[1] = (uint8_t)clamp<int>(min_val + lo_delta, 0, 255);
+
+					if (trial_block.m_endpoints[0] == trial_block.m_endpoints[1])
+						continue;
+
+					if (mode == 0)
+					{
+						if (trial_block.is_alpha6_block())
+							std::swap(trial_block.m_endpoints[0], trial_block.m_endpoints[1]);
+					}
+					else if (!trial_block.is_alpha6_block())
+						std::swap(trial_block.m_endpoints[0], trial_block.m_endpoints[1]);
+
+					uint8_t block_vals[8];
+					trial_block.get_block_values(block_vals, trial_block.m_endpoints[0], trial_block.m_endpoints[1]);
+
+					uint32_t trial_err = 0;
+					uint8_t trial_sels[16];
+
+					if (pForce_selectors)
+					{
+						memcpy(trial_sels, pForce_selectors, 16);
+
+						for (uint32_t i = 0; i < 16; i++)
+							trial_err += squarei(block_vals[pForce_selectors[i]] - pPixels[i * stride]);
+					}
+					else
+					{
+						for (uint32_t i = 0; i < 16; i++)
+						{
+							uint32_t best_index_err = UINT32_MAX;
+							uint32_t best_index = 0;
+							for (uint32_t j = 0; j < 8; j++)
+							{
+								uint32_t err = squarei(block_vals[j] - pPixels[i * stride]);
+								if (err < best_index_err)
+								{
+									best_index_err = err;
+									best_index = j;
+									if (!err)
+										break;
+								}
+							}
+
+							trial_err += best_index_err;
+							if (trial_err >= best_err)
+								break;
+
+							trial_sels[i] = (uint8_t)best_index;
+						} // i
+					}
+
+					if (trial_err < best_err)
+					{
+						best_err = trial_err;
+
+						uint64_t sel_vals = 0;
+						for (uint32_t i = 0; i < 16; i++)
+							sel_vals |= ((uint64_t)trial_sels[i] << (i * 3));
+
+						trial_block.m_selectors[0] = (uint8_t)sel_vals;
+						trial_block.m_selectors[1] = (uint8_t)(sel_vals >> 8);
+						trial_block.m_selectors[2] = (uint8_t)(sel_vals >> 16);
+						trial_block.m_selectors[3] = (uint8_t)(sel_vals >> 24);
+						trial_block.m_selectors[4] = (uint8_t)(sel_vals >> 32);
+						trial_block.m_selectors[5] = (uint8_t)(sel_vals >> 40);
+
+						memcpy(pDst_bytes, &trial_block, sizeof(bc4_block));
+					} // if (trial_err < best_err)
+
+				} // hi_delta
+
+			} // lo_delta
+
+		} // mode
+
+		return best_err;
+	}
+
+	void encode_bc3(void* pDst, const uint8_t* pPixels, uint32_t flags, uint32_t total_orderings_to_try)
+	{
+		assert(g_initialized);
+
+		// 3-color blocks are not allowed with BC3 (on most GPU's).
+		flags &= ~(cEncodeBC1Use3ColorBlocksForBlackPixels | cEncodeBC1Use3ColorBlocks);
+
+		encode_bc4(pDst, pPixels + 3, 4);
+		encode_bc1(static_cast<uint8_t*>(pDst) + 8, pPixels, flags, total_orderings_to_try);
+	}
+
+	void encode_bc3(uint32_t level, void* pDst, const uint8_t* pPixels)
+	{
+		assert(g_initialized);
+
+		encode_bc4(pDst, pPixels + 3, 4);
+		encode_bc1(level, static_cast<uint8_t*>(pDst) + 8, pPixels, false, false);
+	}
+
+	void encode_bc3_hq(uint32_t level, void* pDst, const uint8_t* pPixels, uint32_t alpha_search_rad, uint32_t alpha_modes)
+	{
+		assert(g_initialized);
+
+		encode_bc4_hq(pDst, pPixels + 3, 4, alpha_search_rad, alpha_modes);
+		encode_bc1(level, static_cast<uint8_t*>(pDst) + 8, pPixels, false, false);
+	}
+
+	void encode_bc5(void* pDst, const uint8_t* pPixels, uint32_t chan0, uint32_t chan1, uint32_t stride)
+	{
+		assert(g_initialized);
+
+		encode_bc4(pDst, pPixels + chan0, stride);
+		encode_bc4(static_cast<uint8_t*>(pDst) + 8, pPixels + chan1, stride);
+	}
+
+	void encode_bc5_hq(void* pDst, const uint8_t* pPixels, uint32_t chan0, uint32_t chan1, uint32_t stride, uint32_t alpha_search_rad, uint32_t alpha_modes)
+	{
+		assert(g_initialized);
+
+		encode_bc4_hq(pDst, pPixels + chan0, stride, alpha_search_rad, alpha_modes);
+		encode_bc4_hq(static_cast<uint8_t*>(pDst) + 8, pPixels + chan1, stride, alpha_search_rad, alpha_modes);
+	}
+
+	bool unpack_bc1_block_colors(const void* pBlock_bits, color32* c, bc1_approx_mode mode)
+	{
+		const bc1_block* pBlock = static_cast<const bc1_block*>(pBlock_bits);
+
+		const uint32_t l = pBlock->get_low_color();
+		const uint32_t h = pBlock->get_high_color();
+
+		const int cr0 = (l >> 11) & 31;
+		const int cg0 = (l >> 5) & 63;
+		const int cb0 = l & 31;
+		const int r0 = (cr0 << 3) | (cr0 >> 2);
+		const int g0 = (cg0 << 2) | (cg0 >> 4);
+		const int b0 = (cb0 << 3) | (cb0 >> 2);
+
+		const int cr1 = (h >> 11) & 31;
+		const int cg1 = (h >> 5) & 63;
+		const int cb1 = h & 31;
+		const int r1 = (cr1 << 3) | (cr1 >> 2);
+		const int g1 = (cg1 << 2) | (cg1 >> 4);
+		const int b1 = (cb1 << 3) | (cb1 >> 2);
+
+		bool used_punchthrough = false;
+
+		if (l > h)
+		{
+			c[0].set_noclamp_rgba(r0, g0, b0, 255);
+			c[1].set_noclamp_rgba(r1, g1, b1, 255);
+			switch (mode)
+			{
+			case bc1_approx_mode::cBC1Ideal:
+				c[2].set_noclamp_rgba((r0 * 2 + r1) / 3, (g0 * 2 + g1) / 3, (b0 * 2 + b1) / 3, 255);
+				c[3].set_noclamp_rgba((r1 * 2 + r0) / 3, (g1 * 2 + g0) / 3, (b1 * 2 + b0) / 3, 255);
+				break;
+			case bc1_approx_mode::cBC1IdealRound4:
+				c[2].set_noclamp_rgba((r0 * 2 + r1 + 1) / 3, (g0 * 2 + g1 + 1) / 3, (b0 * 2 + b1 + 1) / 3, 255);
+				c[3].set_noclamp_rgba((r1 * 2 + r0 + 1) / 3, (g1 * 2 + g0 + 1) / 3, (b1 * 2 + b0 + 1) / 3, 255);
+				break;
+			case bc1_approx_mode::cBC1NVidia:
+				c[2].set_noclamp_rgba(interp_5_nv(cr0, cr1), interp_6_nv(g0, g1), interp_5_nv(cb0, cb1), 255);
+				c[3].set_noclamp_rgba(interp_5_nv(cr1, cr0), interp_6_nv(g1, g0), interp_5_nv(cb1, cb0), 255);
+				break;
+			case bc1_approx_mode::cBC1AMD:
+				c[2].set_noclamp_rgba(interp_5_6_amd(r0, r1), interp_5_6_amd(g0, g1), interp_5_6_amd(b0, b1), 255);
+				c[3].set_noclamp_rgba(interp_5_6_amd(r1, r0), interp_5_6_amd(g1, g0), interp_5_6_amd(b1, b0), 255);
+				break;
+			}
+		}
+		else
+		{
+			c[0].set_noclamp_rgba(r0, g0, b0, 255);
+			c[1].set_noclamp_rgba(r1, g1, b1, 255);
+			switch (mode)
+			{
+			case bc1_approx_mode::cBC1Ideal:
+			case bc1_approx_mode::cBC1IdealRound4:
+				c[2].set_noclamp_rgba((r0 + r1) / 2, (g0 + g1) / 2, (b0 + b1) / 2, 255);
+				break;
+			case bc1_approx_mode::cBC1NVidia:
+				c[2].set_noclamp_rgba(interp_half_5_nv(cr0, cr1), interp_half_6_nv(g0, g1), interp_half_5_nv(cb0, cb1), 255);
+				break;
+			case bc1_approx_mode::cBC1AMD:
+				c[2].set_noclamp_rgba(interp_half_5_6_amd(r0, r1), interp_half_5_6_amd(g0, g1), interp_half_5_6_amd(b0, b1), 255);
+				break;
+			}
+
+			c[3].set_noclamp_rgba(0, 0, 0, 0);
+			used_punchthrough = true;
+		}
+
+		return used_punchthrough;
+	}
+
+	// Returns true if the block uses 3 color punchthrough alpha mode.
+	bool unpack_bc1(const void* pBlock_bits, void* pPixels, bool set_alpha, bc1_approx_mode mode)
+	{
+		color32* pDst_pixels = static_cast<color32*>(pPixels);
+
+		static_assert(sizeof(bc1_block) == 8, "sizeof(bc1_block) == 8");
+		static_assert(sizeof(bc4_block) == 8, "sizeof(bc4_block) == 8");
+
+		const bc1_block* pBlock = static_cast<const bc1_block*>(pBlock_bits);
+
+		color32 c[4];
+		const bool used_punchthrough = unpack_bc1_block_colors(pBlock_bits, c, mode);
+
+		if (set_alpha)
+		{
+			for (uint32_t y = 0; y < 4; y++, pDst_pixels += 4)
+			{
+				pDst_pixels[0] = c[pBlock->get_selector(0, y)];
+				pDst_pixels[1] = c[pBlock->get_selector(1, y)];
+				pDst_pixels[2] = c[pBlock->get_selector(2, y)];
+				pDst_pixels[3] = c[pBlock->get_selector(3, y)];
+			}
+		}
+		else
+		{
+			for (uint32_t y = 0; y < 4; y++, pDst_pixels += 4)
+			{
+				pDst_pixels[0].set_rgb(c[pBlock->get_selector(0, y)]);
+				pDst_pixels[1].set_rgb(c[pBlock->get_selector(1, y)]);
+				pDst_pixels[2].set_rgb(c[pBlock->get_selector(2, y)]);
+				pDst_pixels[3].set_rgb(c[pBlock->get_selector(3, y)]);
+			}
+		}
+
+		return used_punchthrough;
+	}
+
+	void unpack_bc4(const void* pBlock_bits, uint8_t* pPixels, uint32_t stride)
+	{
+		static_assert(sizeof(bc4_block) == 8, "sizeof(bc4_block) == 8");
+
+		const bc4_block* pBlock = static_cast<const bc4_block*>(pBlock_bits);
+
+		uint8_t sel_values[8];
+		bc4_block::get_block_values(sel_values, pBlock->get_low_alpha(), pBlock->get_high_alpha());
+
+		const uint64_t selector_bits = pBlock->get_selector_bits();
+
+		for (uint32_t y = 0; y < 4; y++, pPixels += (stride * 4U))
+		{
+			pPixels[0] = sel_values[pBlock->get_selector(0, y, selector_bits)];
+			pPixels[stride * 1] = sel_values[pBlock->get_selector(1, y, selector_bits)];
+			pPixels[stride * 2] = sel_values[pBlock->get_selector(2, y, selector_bits)];
+			pPixels[stride * 3] = sel_values[pBlock->get_selector(3, y, selector_bits)];
+		}
+	}
+
+	// Returns false if the block uses 3-color punchthrough alpha mode, which isn't supported on some GPU's for BC3.
+	bool unpack_bc3(const void* pBlock_bits, void* pPixels, bc1_approx_mode mode)
+	{
+		color32* pDst_pixels = static_cast<color32*>(pPixels);
+
+		bool success = true;
+
+		if (unpack_bc1((const uint8_t*)pBlock_bits + sizeof(bc4_block), pDst_pixels, true, mode))
+			success = false;
+
+		unpack_bc4(pBlock_bits, &pDst_pixels[0].a, sizeof(color32));
+
+		return success;
+	}
+
+	// writes RG
+	void unpack_bc5(const void* pBlock_bits, void* pPixels, uint32_t chan0, uint32_t chan1, uint32_t stride)
+	{
+		unpack_bc4(pBlock_bits, (uint8_t*)pPixels + chan0, stride);
+		unpack_bc4((const uint8_t*)pBlock_bits + sizeof(bc4_block), (uint8_t*)pPixels + chan1, stride);
+	}
+		
+} // namespace rgbcx
+
+
+
diff --git a/libkram/bc7enc/rgbcx.h b/libkram/bc7enc/rgbcx.h
index 748d39e2..cf793921 100644
--- a/libkram/bc7enc/rgbcx.h
+++ b/libkram/bc7enc/rgbcx.h
@@ -1,7 +1,9 @@
-// rgbcx.h v1.12
-// High-performance scalar BC1-5 encoders. Public Domain or MIT license (you choose - see below), written by Richard Geldreich 2020 <richgel99@gmail.com>.
+// rgbcx.h v1.13
+// High-performance scalar encoders and RDO (Rate Distortion Optimization) post processors for BC1-5.
+// Public Domain or MIT license (you choose - see below), written by Richard Geldreich 2020 <richgel99@gmail.com>.
 //
 // Influential references:
+// https://tinyurl.com/y3vxz457 (Ortego and Ramchandran, "Rate-distortion Methods for Image and Video Compression", 1998)
 // http://sjbrown.co.uk/2006/01/19/dxt-compression-techniques/
 // https://github.com/nothings/stb/blob/master/stb_dxt.h
 // https://gist.github.com/castano/c92c7626f288f9e99e158520b14a61cf
@@ -56,9 +58,13 @@
 #ifndef RGBCX_INCLUDE_H
 #define RGBCX_INCLUDE_H
 
+#ifdef _MSC_VER
+#pragma warning (disable:4201) //nameless struct/union
+#endif
+
 #include <stdlib.h>
 #include <stdint.h>
-//#include <algorithm>
+#include <algorithm>
 #include <assert.h>
 #include <limits.h>
 
@@ -87,6 +93,57 @@ namespace rgbcx
 		cBC1IdealRound4 = 3
 	};
 
+	enum class eNoClamp { cNoClamp };
+	static inline uint8_t clamp255(int32_t i) { return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i); }
+
+	template <typename S> inline S maximum(S a, S b) { return (a > b) ? a : b; }
+	template <typename S> inline S maximum(S a, S b, S c) { return maximum(maximum(a, b), c); }
+	template <typename S> inline S maximum(S a, S b, S c, S d) { return maximum(maximum(maximum(a, b), c), d); }
+
+	template <typename S> inline S minimum(S a, S b) { return (a < b) ? a : b; }
+	template <typename S> inline S minimum(S a, S b, S c) { return minimum(minimum(a, b), c); }
+	template <typename S> inline S minimum(S a, S b, S c, S d) { return minimum(minimum(minimum(a, b), c), d); }
+		
+	struct color32
+	{
+		union
+		{
+			struct
+			{
+				uint8_t r;
+				uint8_t g;
+				uint8_t b;
+				uint8_t a;
+			};
+
+			uint8_t c[4];
+
+			uint32_t m;
+		};
+
+		color32() { }
+
+		color32(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); }
+		color32(eNoClamp unused, uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { (void)unused; set_noclamp_rgba(vr, vg, vb, va); }
+
+		void set(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { c[0] = static_cast<uint8_t>(vr); c[1] = static_cast<uint8_t>(vg); c[2] = static_cast<uint8_t>(vb); c[3] = static_cast<uint8_t>(va); }
+
+		void set_noclamp_rgb(uint32_t vr, uint32_t vg, uint32_t vb) { c[0] = static_cast<uint8_t>(vr); c[1] = static_cast<uint8_t>(vg); c[2] = static_cast<uint8_t>(vb); }
+		void set_noclamp_rgba(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); }
+
+		void set_clamped(int vr, int vg, int vb, int va) { c[0] = clamp255(vr); c[1] = clamp255(vg);	c[2] = clamp255(vb); c[3] = clamp255(va); }
+
+		uint8_t operator[] (uint32_t idx) const { assert(idx < 4); return c[idx]; }
+		uint8_t& operator[] (uint32_t idx) { assert(idx < 4); return c[idx]; }
+
+		bool operator== (const color32& rhs) const { return m == rhs.m; }
+
+		void set_rgb(const color32& other) { c[0] = static_cast<uint8_t>(other.c[0]); c[1] = static_cast<uint8_t>(other.c[1]); c[2] = static_cast<uint8_t>(other.c[2]); }
+
+		static color32 comp_min(const color32& a, const color32& b) { return color32(eNoClamp::cNoClamp, std::min(a[0], b[0]), std::min(a[1], b[1]), std::min(a[2], b[2]), std::min(a[3], b[3])); }
+		static color32 comp_max(const color32& a, const color32& b) { return color32(eNoClamp::cNoClamp, std::max(a[0], b[0]), std::max(a[1], b[1]), std::max(a[2], b[2]), std::max(a[3], b[3])); }
+	};
+
 	// init() MUST be called once before using the BC1 encoder.
 	// This function may be called multiple times to change the BC1 approximation mode. 
 	// This function initializes global state, so don't call it while other threads inside the encoder.
@@ -177,30 +234,41 @@ namespace rgbcx
 	// Note that the 3 color modes won't be used at all until level 5 or higher.
 	// No transparency supported, however if you set use_transparent_texels_for_black to true the encocer will use transparent selectors on very dark/black texels to reduce MSE. 
 	const uint32_t MIN_LEVEL = 0, MAX_LEVEL = 18;
-	void encode_bc1(uint32_t level, void* pDst, const uint8_t* pPixels, bool allow_3color, bool use_transparent_texels_for_black);
+	void encode_bc1(uint32_t level, void* pDst, const uint8_t* pPixels, bool allow_3color, bool use_transparent_texels_for_black, const uint8_t* pForce_selectors = nullptr);
 
 	// Low-level interface for BC1 encoding.
 	// Always returns a 4 color block, unless cEncodeBC1Use3ColorBlocksForBlackPixels or cEncodeBC1Use3ColorBlock flags are specified. 
 	// total_orderings_to_try controls the perf. vs. quality tradeoff on 4-color blocks when the cEncodeBC1UseLikelyTotalOrderings flag is used. It must range between [MIN_TOTAL_ORDERINGS, MAX_TOTAL_ORDERINGS4].
 	// total_orderings_to_try3 controls the perf. vs. quality tradeoff on 3-color bocks when the cEncodeBC1UseLikelyTotalOrderings and the cEncodeBC1Use3ColorBlocks flags are used. Valid range is [0,MAX_TOTAL_ORDERINGS3] (0=disabled).
-	void encode_bc1(void* pDst, const uint8_t* pPixels, uint32_t flags = 0, uint32_t total_orderings_to_try = DEFAULT_TOTAL_ORDERINGS_TO_TRY, uint32_t total_orderings_to_try3 = DEFAULT_TOTAL_ORDERINGS_TO_TRY3);
-		
+	void encode_bc1(void* pDst, const uint8_t* pPixels, uint32_t flags = 0, uint32_t total_orderings_to_try = DEFAULT_TOTAL_ORDERINGS_TO_TRY, uint32_t total_orderings_to_try3 = DEFAULT_TOTAL_ORDERINGS_TO_TRY3, const uint8_t *pForce_selectors = nullptr);
+	
+	// Constants used for high quality BC4/BC5 encoding (and alpha of BC3)
+	const uint32_t BC4_DEFAULT_SEARCH_RAD = 3;
+	const uint32_t BC4_USE_MODE8_FLAG = 1;
+	const uint32_t BC4_USE_MODE6_FLAG = 2;
+	const uint32_t BC4_USE_ALL_MODES = 3;
+
 	// Encodes a 4x4 block of RGBA pixels to BC3 format.
 	// There are two encode_bc3() functions. 
 	// The first is the recommended function, which accepts a level parameter.
 	// The second is a low-level version that allows fine control over BC1 encoding. 
 	void encode_bc3(uint32_t level, void* pDst, const uint8_t* pPixels);
 	void encode_bc3(void* pDst, const uint8_t* pPixels, uint32_t flags = 0, uint32_t total_orderings_to_try = DEFAULT_TOTAL_ORDERINGS_TO_TRY);
-
+	void encode_bc3_hq(uint32_t level, void* pDst, const uint8_t* pPixels, uint32_t alpha_search_rad = BC4_DEFAULT_SEARCH_RAD, uint32_t alpha_modes = BC4_USE_ALL_MODES);
+			
 	// Encodes a single channel to BC4.
 	// stride is the source pixel stride in bytes.
 	void encode_bc4(void* pDst, const uint8_t* pPixels, uint32_t stride = 4);
+	uint32_t encode_bc4_hq(void* pDst, const uint8_t* pPixels, uint32_t stride = 4, uint32_t search_rad = BC4_DEFAULT_SEARCH_RAD, uint32_t mode_flag = BC4_USE_ALL_MODES, const uint8_t* pForce_selectors = nullptr);
 
 	// Encodes two channels to BC5. 
 	// chan0/chan1 control which channels, stride is the source pixel stride in bytes.
 	void encode_bc5(void* pDst, const uint8_t* pPixels, uint32_t chan0 = 0, uint32_t chan1 = 1, uint32_t stride = 4);
+	void encode_bc5_hq(void* pDst, const uint8_t* pPixels, uint32_t chan0 = 0, uint32_t chan1 = 1, uint32_t stride = 4, uint32_t alpha_search_rad = BC4_DEFAULT_SEARCH_RAD, uint32_t alpha_modes = BC4_USE_ALL_MODES);
 
 	// Decompression functions. 
+
+	bool unpack_bc1_block_colors(const void* pBlock_bits, color32* c, bc1_approx_mode mode = bc1_approx_mode::cBC1Ideal);
 	
 	// Returns true if the block uses 3 color punchthrough alpha mode.
 	bool unpack_bc1(const void* pBlock_bits, void* pPixels, bool set_alpha = true, bc1_approx_mode mode = bc1_approx_mode::cBC1Ideal);
@@ -211,1273 +279,8 @@ namespace rgbcx
 	bool unpack_bc3(const void* pBlock_bits, void* pPixels, bc1_approx_mode mode = bc1_approx_mode::cBC1Ideal);
 
 	void unpack_bc5(const void* pBlock_bits, void* pPixels, uint32_t chan0 = 0, uint32_t chan1 = 1, uint32_t stride = 4);
-}
-#endif // #ifndef RGBCX_INCLUDE_H
-
-#ifdef RGBCX_IMPLEMENTATION
-namespace rgbcx
-{
-	const uint32_t NUM_UNIQUE_TOTAL_ORDERINGS4 = 969;
-
-	// All total orderings for 16 pixels 2-bit selectors.
-	// BC1 selector order 0, 2, 3, 1 (i.e. the selectors are reordered into linear order).
-	static uint8_t g_unique_total_orders4[NUM_UNIQUE_TOTAL_ORDERINGS4][4] = 
-	{
-		{0,8,2,6},{4,3,9,0},{4,8,1,3},{12,0,3,1},{11,3,2,0},{6,4,6,0},{7,5,0,4},{6,0,8,2},{1,0,0,15},{3,0,8,5},{1,1,13,1},{13,1,2,0},{0,14,1,1},{0,15,1,0},{0,13,0,3},{16,0,0,0},{4,3,4,5},{8,6,0,2},{0,10,0,6},{10,0,4,2},{7,2,1,6},{4,7,5,0},{1,4,7,4},{0,14,2,0},{2,7,2,5},{9,0,5,2},{9,2,2,3},{10,0,5,1},{2,3,7,4},{4,9,0,3},{1,5,0,10},{1,1,6,8},
-		{6,6,4,0},{11,5,0,0},{11,2,0,3},{4,0,10,2},{2,3,10,1},{1,13,1,1},{0,14,0,2},{2,3,3,8},{12,3,1,0},{14,0,0,2},{9,1,3,3},{6,4,0,6},{1,1,5,9},{5,9,0,2},{2,10,1,3},{12,0,0,4},{4,6,6,0},{0,6,4,6},{3,7,4,2},{0,13,3,0},{3,10,0,3},{10,2,1,3},{1,12,1,2},{2,0,13,1},{11,0,5,0},{12,1,3,0},{6,4,5,1},{10,4,2,0},{3,6,1,6},{7,3,6,0},{10,4,0,2},{10,0,2,4},
-		{0,5,9,2},{0,9,3,4},{6,4,2,4},{3,4,7,2},{3,3,5,5},{4,2,9,1},{6,2,8,0},{3,5,3,5},{4,10,1,1},{10,1,3,2},{5,7,0,4},{5,3,7,1},{6,8,1,1},{8,8,0,0},{11,1,0,4},{14,1,0,1},{9,3,2,2},{8,2,1,5},{0,0,2,14},{3,3,9,1},{10,1,5,0},{8,3,1,4},{1,5,8,2},{6,1,9,0},{3,2,1,10},{3,11,1,1},{7,6,3,0},{9,0,3,4},{5,2,5,4},{0,2,3,11},{15,0,0,1},{0,6,6,4},
-		{3,4,9,0},{4,7,0,5},{0,4,4,8},{0,13,2,1},{2,4,1,9},{3,2,5,6},{10,6,0,0},{3,5,6,2},{8,0,4,4},{1,3,6,6},{7,7,0,2},{6,1,4,5},{0,11,1,4},{2,2,8,4},{0,1,2,13},{15,0,1,0},{7,2,6,1},{8,1,7,0},{1,8,4,3},{2,13,1,0},{1,0,7,8},{14,2,0,0},{1,8,1,6},{9,3,3,1},{0,0,7,9},{4,4,1,7},{9,0,6,1},{10,2,4,0},{1,7,3,5},{0,3,8,5},{5,2,4,5},{1,2,5,8},
-		{0,8,7,1},{10,3,2,1},{12,0,4,0},{2,1,4,9},{5,2,2,7},{1,9,3,3},{15,1,0,0},{6,3,4,3},{9,5,0,2},{1,6,9,0},{6,6,0,4},{13,2,1,0},{5,1,8,2},{0,5,11,0},{7,1,0,8},{1,2,12,1},{0,3,3,10},{7,4,2,3},{5,1,4,6},{7,0,3,6},{3,12,0,1},{3,4,5,4},{1,10,0,5},{7,4,3,2},{10,5,0,1},{13,3,0,0},{2,5,4,5},{3,10,1,2},{5,1,2,8},{14,0,1,1},{1,5,4,6},{1,4,5,6},
-		{2,3,11,0},{11,0,4,1},{11,2,2,1},{5,3,8,0},{1,3,10,2},{0,1,13,2},{3,1,4,8},{4,2,4,6},{1,5,6,4},{2,1,11,2},{1,2,9,4},{4,7,3,2},{6,2,5,3},{7,2,2,5},{8,1,4,3},{3,2,8,3},{12,1,0,3},{7,8,1,0},{7,0,2,7},{5,10,0,1},{0,2,14,0},{2,9,3,2},{7,0,0,9},{11,1,4,0},{10,4,1,1},{2,2,9,3},{5,7,2,2},{1,3,1,11},{13,2,0,1},{4,2,8,2},{2,3,1,10},{4,2,5,5},
-		{7,0,7,2},{10,0,0,6},{0,8,5,3},{4,4,0,8},{12,4,0,0},{0,1,14,1},{8,0,1,7},{5,1,5,5},{11,0,3,2},{0,4,1,11},{0,8,8,0},{0,2,5,9},{7,3,2,4},{7,8,0,1},{1,0,3,12},{7,4,5,0},{1,6,7,2},{7,6,1,2},{9,6,1,0},{12,2,0,2},{4,1,6,5},{4,0,1,11},{8,4,4,0},{13,0,1,2},{8,6,2,0},{4,12,0,0},{2,7,5,2},{2,0,5,9},{5,4,5,2},{3,8,5,0},{7,3,3,3},{4,4,8,0},
-		{2,1,3,10},{5,0,1,10},{6,4,3,3},{4,9,1,2},{1,4,0,11},{11,3,1,1},{4,0,12,0},{13,0,0,3},{6,1,6,3},{9,0,4,3},{8,0,0,8},{8,4,0,4},{0,12,1,3},{0,4,10,2},{3,4,8,1},{1,3,8,4},{9,2,5,0},{5,7,4,0},{1,0,11,4},{4,10,0,2},{1,3,12,0},{6,9,0,1},{5,0,9,2},{5,9,2,0},{13,1,0,2},{9,3,4,0},{9,4,0,3},{3,1,12,0},{2,4,3,7},{1,2,13,0},{2,2,4,8},{6,8,0,2},
-		{9,2,1,4},{9,5,1,1},{2,0,4,10},{5,4,0,7},{0,0,6,10},{1,2,0,13},{4,7,2,3},{6,5,5,0},{3,3,1,9},{1,6,1,8},{12,2,1,1},{4,4,5,3},{1,0,6,9},{0,6,10,0},{4,8,3,1},{4,3,2,7},{2,1,7,6},{1,9,1,5},{3,1,3,9},{8,7,1,0},{1,2,3,10},{14,1,1,0},{5,4,4,3},{3,7,0,6},{7,4,1,4},{3,7,5,1},{1,1,0,14},{0,10,3,3},{0,4,3,9},{1,7,7,1},{2,0,10,4},{5,8,0,3},
-		{6,7,3,0},{0,8,4,4},{5,7,3,1},{7,9,0,0},{7,6,2,1},{0,4,5,7},{6,3,5,2},{1,2,1,12},{5,2,0,9},{8,5,0,3},{4,6,1,5},{1,1,7,7},{10,5,1,0},{1,2,8,5},{1,8,2,5},{5,1,0,10},{6,9,1,0},{13,0,2,1},{8,3,5,0},{6,3,6,1},{2,11,3,0},{3,7,3,3},{1,5,2,8},{7,5,2,2},{0,6,7,3},{13,1,1,1},{5,3,4,4},{7,2,7,0},{5,8,3,0},{3,13,0,0},{0,7,9,0},{8,0,3,5},
-		{1,3,7,5},{4,0,2,10},{12,0,1,3},{1,7,6,2},{3,9,0,4},{7,2,0,7},{0,1,7,8},{2,1,8,5},{0,13,1,2},{0,8,1,7},{5,0,11,0},{5,6,2,3},{0,3,0,13},{2,3,4,7},{5,6,3,2},{4,2,10,0},{3,3,7,3},{7,2,5,2},{1,1,11,3},{12,3,0,1},{5,1,1,9},{1,15,0,0},{9,7,0,0},{9,1,2,4},{0,7,3,6},{3,0,13,0},{3,0,11,2},{0,6,5,5},{8,2,2,4},{6,10,0,0},{4,8,4,0},{0,0,3,13},
-		{0,4,12,0},{7,1,6,2},{3,5,0,8},{8,0,6,2},{6,2,3,5},{2,10,0,4},{4,11,0,1},{6,1,5,4},{5,1,3,7},{0,11,3,2},{4,6,0,6},{2,6,0,8},{3,1,7,5},{2,14,0,0},{2,9,2,3},{0,3,4,9},{11,0,1,4},{13,0,3,0},{8,3,0,5},{0,5,3,8},{5,11,0,0},{0,1,4,11},{2,1,9,4},{3,4,4,5},{7,1,2,6},{12,2,2,0},{9,4,1,2},{6,0,2,8},{4,6,2,4},{11,2,3,0},{3,2,2,9},{10,3,1,2},
-		{1,1,2,12},{0,5,2,9},{0,1,11,4},{6,2,4,4},{2,8,2,4},{0,9,4,3},{11,0,2,3},{0,2,11,3},{6,0,7,3},{0,3,6,7},{4,5,5,2},{1,2,6,7},{7,5,1,3},{9,0,2,5},{2,6,4,4},{4,1,9,2},{4,8,2,2},{1,12,3,0},{0,9,6,1},{0,10,6,0},{3,1,5,7},{2,13,0,1},{2,2,1,11},{3,6,0,7},{5,6,5,0},{5,5,4,2},{4,0,3,9},{3,4,1,8},{0,11,2,3},{2,12,1,1},{7,1,3,5},{7,0,9,0},
-		{8,0,8,0},{1,0,2,13},{3,3,10,0},{2,4,4,6},{2,3,8,3},{1,10,5,0},{7,3,0,6},{2,9,0,5},{1,4,6,5},{6,6,3,1},{5,6,0,5},{6,3,0,7},{3,10,2,1},{2,5,5,4},{3,8,4,1},{1,14,0,1},{10,3,3,0},{3,5,7,1},{1,1,3,11},{2,4,0,10},{9,3,1,3},{5,10,1,0},{3,0,6,7},{3,1,9,3},{11,2,1,2},{5,3,3,5},{0,5,1,10},{4,1,11,0},{10,2,0,4},{7,6,0,3},{2,7,0,7},{4,2,2,8},
-		{6,1,7,2},{4,9,2,1},{0,0,8,8},{3,7,2,4},{9,6,0,1},{0,12,4,0},{6,7,1,2},{0,7,2,7},{1,0,10,5},{0,0,14,2},{2,7,3,4},{5,0,0,11},{7,7,1,1},{6,2,7,1},{4,5,3,4},{3,5,1,7},{5,9,1,1},{6,2,1,7},{3,2,0,11},{0,11,0,5},{3,11,2,0},{10,1,4,1},{7,0,4,5},{11,4,0,1},{10,3,0,3},{0,2,4,10},{0,15,0,1},{0,11,5,0},{6,7,2,1},{1,12,2,1},{4,1,3,8},{1,0,13,2},
-		{1,8,5,2},{7,0,1,8},{3,12,1,0},{9,2,4,1},{1,7,4,4},{11,4,1,0},{4,3,8,1},{2,8,4,2},{1,11,3,1},{1,1,4,10},{4,10,2,0},{8,2,5,1},{1,0,9,6},{5,3,2,6},{0,9,7,0},{10,2,2,2},{5,8,1,2},{8,7,0,1},{0,3,12,1},{1,0,1,14},{4,8,0,4},{3,8,0,5},{4,6,5,1},{0,9,5,2},{10,2,3,1},{2,3,9,2},{1,0,12,3},{11,3,0,2},{4,5,2,5},{0,2,12,2},{9,1,0,6},{9,2,0,5},
-		{1,2,7,6},{4,7,4,1},{0,12,2,2},{0,0,0,16},{2,8,3,3},{3,6,2,5},{0,6,3,7},{7,5,4,0},{3,3,3,7},{3,3,0,10},{5,0,6,5},{0,0,10,6},{8,5,3,0},{8,1,5,2},{6,0,9,1},{11,1,2,2},{2,11,2,1},{9,5,2,0},{3,0,4,9},{2,2,12,0},{2,6,6,2},{2,1,13,0},{6,0,5,5},{2,0,14,0},{2,11,1,2},{4,4,7,1},{2,0,11,3},{3,1,1,11},{2,9,4,1},{3,7,6,0},{14,0,2,0},{1,10,4,1},
-		{8,0,7,1},{3,6,5,2},{0,3,11,2},{2,5,6,3},{11,1,3,1},{6,5,3,2},{3,8,1,4},{0,2,7,7},{2,10,2,2},{1,6,2,7},{11,0,0,5},{12,1,1,2},{12,1,2,1},{0,7,1,8},{0,3,9,4},{0,2,1,13},{7,1,4,4},{10,1,0,5},{4,0,8,4},{5,2,7,2},{0,2,0,14},{4,3,7,2},{2,7,1,6},{1,2,2,11},{6,3,3,4},{1,14,1,0},{2,4,6,4},{5,3,6,2},{5,3,5,3},{8,4,1,3},{1,3,0,12},{3,5,2,6},
-		{1,8,7,0},{0,7,4,5},{2,1,6,7},{4,11,1,0},{7,2,4,3},{6,1,3,6},{4,5,4,3},{2,11,0,3},{1,5,7,3},{12,0,2,2},{5,0,4,7},{1,13,0,2},{7,7,2,0},{4,1,7,4},{4,5,0,7},{5,0,5,6},{6,5,4,1},{2,4,2,8},{1,10,1,4},{6,3,1,6},{3,3,8,2},{0,7,7,2},{4,4,2,6},{1,1,8,6},{1,12,0,3},{2,1,12,1},{1,9,2,4},{1,11,0,4},{2,5,2,7},{10,0,3,3},{4,6,3,3},{3,7,1,5},
-		{1,9,0,6},{7,1,7,1},{1,6,5,4},{9,2,3,2},{6,2,2,6},{2,2,2,10},{8,3,3,2},{0,1,8,7},{2,0,8,6},{0,3,1,12},{9,4,2,1},{9,4,3,0},{6,2,6,2},{1,8,0,7},{5,1,10,0},{0,5,5,6},{8,2,4,2},{2,3,2,9},{6,0,3,7},{2,2,6,6},{2,6,2,6},{1,13,2,0},{9,3,0,4},{7,3,5,1},{6,5,2,3},{5,2,6,3},{2,0,12,2},{5,7,1,3},{8,1,3,4},{3,1,10,2},{1,0,15,0},{0,8,0,8},
-		{5,0,7,4},{4,4,6,2},{0,1,0,15},{10,0,1,5},{7,3,4,2},{4,9,3,0},{2,5,7,2},{3,4,2,7},{8,3,2,3},{5,1,6,4},{0,10,2,4},{6,6,1,3},{6,0,0,10},{4,4,3,5},{1,3,9,3},{7,5,3,1},{3,0,7,6},{1,8,6,1},{4,3,0,9},{3,11,0,2},{6,0,6,4},{0,1,3,12},{0,4,2,10},{5,5,6,0},{4,1,4,7},{8,1,6,1},{5,6,4,1},{8,4,2,2},{4,3,1,8},{3,0,2,11},{1,11,4,0},{0,8,3,5},
-		{5,1,7,3},{7,0,8,1},{4,3,5,4},{4,6,4,2},{3,2,4,7},{1,6,3,6},{0,7,8,1},{3,0,1,12},{9,1,4,2},{7,4,0,5},{1,7,0,8},{5,4,1,6},{9,1,5,1},{1,1,9,5},{4,1,1,10},{5,3,0,8},{2,2,5,7},{4,0,0,12},{9,0,7,0},{3,4,0,9},{0,2,6,8},{8,2,0,6},{3,2,6,5},{4,2,6,4},{3,6,4,3},{2,8,6,0},{5,0,3,8},{0,4,0,12},{0,16,0,0},{0,9,2,5},{4,0,11,1},{1,6,4,5},
-		{0,1,6,9},{3,4,6,3},{3,0,10,3},{7,0,6,3},{1,4,9,2},{1,5,3,7},{8,5,2,1},{0,12,0,4},{7,2,3,4},{0,5,6,5},{11,1,1,3},{6,5,0,5},{2,1,5,8},{1,4,11,0},{9,1,1,5},{0,0,13,3},{5,8,2,1},{2,12,0,2},{3,3,6,4},{4,1,10,1},{4,0,5,7},{8,1,0,7},{5,1,9,1},{4,3,3,6},{0,2,2,12},{6,3,2,5},{0,0,12,4},{1,5,1,9},{2,6,5,3},{3,6,3,4},{2,12,2,0},{1,6,8,1},
-		{10,1,1,4},{1,3,4,8},{7,4,4,1},{1,11,1,3},{1,2,10,3},{3,9,3,1},{8,5,1,2},{2,10,4,0},{4,2,0,10},{2,7,6,1},{8,2,3,3},{1,5,5,5},{3,1,0,12},{3,10,3,0},{8,0,5,3},{0,6,8,2},{0,3,13,0},{0,0,16,0},{1,9,4,2},{4,1,8,3},{1,6,6,3},{0,10,5,1},{0,1,12,3},{4,0,6,6},{3,8,3,2},{0,5,4,7},{1,0,14,1},{0,4,6,6},{3,9,1,3},{3,5,8,0},{3,6,6,1},{5,4,7,0},
-		{3,0,12,1},{8,6,1,1},{2,9,5,0},{6,1,1,8},{4,1,2,9},{3,9,4,0},{5,2,9,0},{0,12,3,1},{1,4,10,1},{4,0,7,5},{3,1,2,10},{5,4,2,5},{5,5,5,1},{4,2,3,7},{1,7,5,3},{2,8,0,6},{8,1,2,5},{3,8,2,3},{6,1,2,7},{3,9,2,2},{9,0,0,7},{0,8,6,2},{8,4,3,1},{0,2,8,6},{6,5,1,4},{2,3,5,6},{2,10,3,1},{0,7,0,9},{4,2,7,3},{2,4,8,2},{7,1,1,7},{2,4,7,3},
-		{2,4,10,0},{0,1,10,5},{4,7,1,4},{0,10,4,2},{9,0,1,6},{1,9,6,0},{3,3,4,6},{4,5,7,0},{5,5,2,4},{2,8,1,5},{2,3,6,5},{0,1,1,14},{3,2,3,8},{10,1,2,3},{9,1,6,0},{3,4,3,6},{2,2,0,12},{0,0,9,7},{4,0,9,3},{7,0,5,4},{4,5,6,1},{2,5,1,8},{2,5,9,0},{3,5,4,4},{1,3,11,1},{7,1,5,3},{3,2,7,4},{1,4,2,9},{1,11,2,2},{2,2,3,9},{5,0,10,1},{3,2,11,0},
-		{1,10,3,2},{8,3,4,1},{3,6,7,0},{0,7,5,4},{1,3,3,9},{2,2,10,2},{1,9,5,1},{0,5,0,11},{3,0,3,10},{0,4,8,4},{2,7,7,0},{2,0,2,12},{1,2,11,2},{6,3,7,0},{0,6,2,8},{0,10,1,5},{0,9,0,7},{6,4,4,2},{6,0,1,9},{1,5,10,0},{5,4,6,1},{5,5,3,3},{0,0,4,12},{0,3,2,11},{1,4,1,10},{3,0,9,4},{5,5,0,6},{1,7,8,0},{2,0,3,11},{6,4,1,5},{10,0,6,0},{0,6,0,10},
-		{0,4,11,1},{3,1,6,6},{2,5,8,1},{0,2,10,4},{3,1,11,1},{6,6,2,2},{1,1,10,4},{2,1,2,11},{6,1,8,1},{0,2,13,1},{0,7,6,3},{6,8,2,0},{3,0,0,13},{4,4,4,4},{6,2,0,8},{7,3,1,5},{0,11,4,1},{6,7,0,3},{2,6,3,5},{5,2,1,8},{7,1,8,0},{5,5,1,5},{1,8,3,4},{8,2,6,0},{6,0,10,0},{5,6,1,4},{1,4,4,7},{2,7,4,3},{1,4,8,3},{5,4,3,4},{1,10,2,3},{2,9,1,4},
-		{2,2,11,1},{2,5,0,9},{0,0,1,15},{0,0,11,5},{0,4,7,5},{0,1,15,0},{2,1,0,13},{0,3,10,3},{8,0,2,6},{3,3,2,8},{3,5,5,3},{1,7,1,7},{1,3,2,10},{4,0,4,8},{2,0,9,5},{1,1,1,13},{2,2,7,5},{2,1,10,3},{4,2,1,9},{4,3,6,3},{1,3,5,7},{2,5,3,6},{1,0,8,7},{5,0,2,9},{2,8,5,1},{1,6,0,9},{0,0,5,11},{0,4,9,3},{2,0,7,7},{1,7,2,6},{2,1,1,12},{2,4,9,1},
-		{0,5,7,4},{6,0,4,6},{3,2,10,1},{0,6,1,9},{2,6,1,7},{0,5,8,3},{4,1,0,11},{1,2,4,9},{4,1,5,6},{6,1,0,9},{1,4,3,8},{4,5,1,6},{1,0,5,10},{5,3,1,7},{0,9,1,6},{2,0,1,13},{2,0,6,8},{8,1,1,6},{1,5,9,1},{0,6,9,1},{0,3,5,8},{0,2,9,5},{5,2,8,1},{1,1,14,0},{3,2,9,2},{5,0,8,3},{0,5,10,1},{5,2,3,6},{2,6,7,1},{2,3,0,11},{0,1,9,6},{1,0,4,11},
-		{3,0,5,8},{0,0,15,1},{2,4,5,5},{0,3,7,6},{2,0,0,14},{1,1,12,2},{2,6,8,0},{3,1,8,4},{0,1,5,10}
-	};
 
-	// All total orderings for 16 pixels [0,2] 2-bit selectors.
-	// BC1 selector order: 0, 1, 2
-	// Note this is different from g_unique_total_orders4[], which reorders the selectors into linear order.
-	const uint32_t NUM_UNIQUE_TOTAL_ORDERINGS3 = 153;
-	static uint8_t g_unique_total_orders3[NUM_UNIQUE_TOTAL_ORDERINGS3][3] = 
-	{
-		{6,0,10},{3,6,7},{3,0,13},{13,3,0},{12,4,0},{9,1,6},{2,13,1},{4,7,5},{7,5,4},{9,6,1},{7,4,5},{8,6,2},{16,0,0},{10,6,0},{2,7,7},
-		{0,0,16},{0,3,13},{1,15,0},{0,2,14},{1,4,11},{15,1,0},{1,12,3},{9,2,5},{14,1,1},{8,2,6},{3,3,10},{4,2,10},{14,0,2},{0,14,2},{1,7,8},{6,6,4},
-		{11,5,0},{6,4,6},{11,3,2},{4,3,9},{7,1,8},{10,4,2},{12,1,3},{11,0,5},{9,3,4},{1,0,15},{9,0,7},{2,6,8},{12,2,2},{6,2,8},{6,8,2},{15,0,1},
-		{4,8,4},{0,4,12},{8,5,3},{5,9,2},{11,2,3},{12,3,1},{6,3,7},{1,1,14},{2,9,5},{1,8,7},{4,10,2},{7,7,2},{13,1,2},{0,15,1},{3,2,11},{7,0,9},
-		{4,4,8},{3,8,5},{0,5,11},{13,2,1},{1,10,5},{4,11,1},{3,10,3},{5,10,1},{10,2,4},{0,6,10},{14,2,0},{11,4,1},{3,12,1},{1,13,2},{1,5,10},{5,11,0},
-		{12,0,4},{8,1,7},{6,10,0},{3,13,0},{7,2,7},{0,7,9},{5,8,3},{0,12,4},{11,1,4},{13,0,3},{0,16,0},{5,7,4},{10,3,3},{10,0,6},{0,13,3},{4,6,6},
-		{2,8,6},{2,5,9},{7,8,1},{2,1,13},{2,0,14},{7,3,6},{5,1,10},{3,11,2},{5,4,7},{8,3,5},{10,5,1},{6,9,1},{1,3,12},{4,5,7},{2,2,12},{4,1,11},
-		{0,8,8},{4,12,0},{6,5,5},{8,7,1},{5,5,6},{3,7,6},{7,9,0},{4,9,3},{0,10,6},{8,0,8},{5,3,8},{10,1,5},{6,1,9},{7,6,3},{9,5,2},{0,1,15},
-		{9,7,0},{2,14,0},{3,4,9},{8,4,4},{9,4,3},{0,9,7},{1,9,6},{3,9,4},{5,2,9},{2,3,11},{5,6,5},{1,14,1},{6,7,3},{2,4,10},{2,12,2},{8,8,0},
-		{2,10,4},{4,0,12},{0,11,5},{2,11,3},{1,11,4},{3,5,8},{5,0,11},{3,1,12},{1,2,13},{1,6,9}
-	};
-		
-	// For each total ordering, this table indicates which other total orderings are likely to improve quality using a least squares pass. Each array is sorted by usefulness.
-	static uint16_t g_best_total_orderings4[NUM_UNIQUE_TOTAL_ORDERINGS4][MAX_TOTAL_ORDERINGS4] = 
-	{
-#if RGBCX_USE_SMALLER_TABLES
-		{ 202,120,13,318,15,23,403,450,5,51,260,128,77,21,33,494,515,523,4,141,269,1,2,700,137,49,48,102,7,64,753,82  },
-		{ 13,141,23,217,115,51,77,2,64,21,0,4,5,317,137,269,202,33,318,7,291,352,9,10,3,180,32,6,365,102,341,349  },
-		{ 29,58,262,1,52,74,6,171,5,287,151,334,27,500,75,26,331,223,53,635,220,19,50,45,46,17,14,396,163,409,324,70  },
-		{ 40,51,33,453,14,23,62,56,12,196,730,475,153,99,403,775,117,130,585,34,4,17,162,11,139,57,102,38,108,47,123,440  },
-		{ 33,23,51,13,102,64,202,128,12,40,15,196,153,10,1,2,77,99,141,0,515,5,117,3,120,403,700,165,22,14,269,453  },
-		{ 13,23,51,4,77,141,202,33,115,64,32,128,0,11,177,40,15,102,2,217,7,137,269,21,90,59,515,1,180,403,22,6  },
-		{ 26,235,19,47,648,624,78,145,27,112,122,64,444,6,630,453,25,42,65,130,711,85,390,113,416,108,665,29,730,138,644,95  },
-		{ 64,141,352,751,217,247,237,437,177,269,86,954,947,875,32,318,95,77,304,92,597,180,232,291,128,864,349,588,372,202,312,1  },
-		{ 642,898,180,638,901,341,82,197,10,951,15,515,165,762,700,253,811,753,752,365,143,479,244,569,8,110,351,873,55,31,499,116  },
-		{ 221,23,51,125,438,254,13,21,39,49,308,656,0,115,530,159,158,401,30,166,912,386,165,688,518,9,105,627,424,22,421,33  },
-		{ 143,31,1,44,197,8,180,125,116,55,13,498,23,341,638,242,93,15,2,141,0,901,752,115,36,206,165,479,338,365,515,762  },
-		{ 12,23,51,13,14,15,37,99,515,38,700,117,2,196,134,153,753,64,54,33,128,120,21,0,328,5,139,82,453,719,457,1  },
-		{ 13,15,23,515,961,700,457,753,51,115,4,165,197,2,38,569,1,474,0,37,99,719,5,12,629,14,11,3,33,77,64,10  },
-		{ 15,515,700,753,1,0,2,4,3,23,134,12,961,5,10,197,11,33,82,120,457,51,165,7,6,341,217,21,77,9,40,180  },
-		{ 13,51,23,457,719,961,730,401,165,453,0,117,386,15,134,1,758,153,12,54,515,99,11,2,700,5,753,4,308,33,6,899  },
-		{ 134,898,82,117,13,33,77,102,23,260,341,351,120,901,197,153,961,111,196,110,180,457,854,10,450,8,165,40,4,115,0,365  },
-		{ 60,18,126,167,35,16,191,71,24,92,121,271,68,107,212,146,118,150,199,7,21,1,9,575,727,5,566,48,0,132,108,273  },
-		{ 62,136,129,123,128,41,162,17,249,211,214,789,618,710,38,678,248,507,57,64,152,269,119,3,177,183,597,106,4,179,216,90  },
-		{ 403,523,51,475,494,453,817,899,202,23,450,13,421,120,102,730,33,128,4,1,805,5,7,153,757,260,318,196,77,457,326,65  },
-		{ 4,59,3,62,12,33,56,193,27,21,102,17,40,77,76,84,32,0,6,123,119,177,128,11,18,611,605,25,13,51,73,210  },
-		{ 43,20,319,422,414,945,0,7,819,61,5,376,325,173,804,904,470,693,97,707,14,49,22,104,147,107,95,32,426,1,330,577  },
-		{ 13,23,51,2,0,115,4,141,217,33,10,77,1,15,64,180,3,515,7,6,22,102,11,5,40,9,165,700,202,197,317,341  },
-		{ 28,49,0,105,1,24,65,159,35,55,95,239,16,2,109,7,9,14,170,320,347,168,424,158,10,301,124,5,67,21,64,36  },
-		{ 15,515,700,753,0,1,13,2,117,4,12,10,5,165,457,3,9,134,11,7,6,51,77,64,961,82,33,197,14,341,120,141  },
-		{ 7,71,14,149,97,18,60,16,150,92,398,189,140,124,24,273,35,2,69,302,154,68,0,336,517,43,66,28,118,251,230,1  },
-		{ 4,102,33,77,40,59,11,624,210,12,128,342,5,503,91,139,64,32,25,494,202,678,416,0,403,275,21,450,196,318,523,177  },
-		{ 25,19,42,6,122,813,256,235,85,26,436,53,297,573,680,390,445,63,27,416,80,233,65,73,389,283,45,605,194,17,250,343  },
-		{ 402,102,202,128,33,300,403,23,12,77,40,21,342,117,483,99,25,494,6,4,63,32,84,569,139,757,475,318,19,26,196,134  },
-		{ 158,9,0,109,39,49,65,22,35,168,55,24,68,124,159,16,185,344,333,154,254,272,175,289,1,577,95,28,105,810,30,169  },
-		{ 197,180,115,237,498,165,2,5,287,546,400,3,61,34,509,13,297,80,341,52,45,186,58,881,23,873,468,176,64,17,311,250  },
-		{ 120,968,373,260,704,110,450,202,137,318,77,95,269,326,217,717,661,652,851,349,93,1,518,98,827,291,21,177,82,33,848,719  },
-		{ 44,116,144,268,434,489,367,384,98,127,918,93,948,31,206,940,855,0,203,137,9,22,617,141,332,105,393,492,959,282,299,131  },
-		{ 13,77,23,33,51,0,64,141,102,4,2,115,1,6,202,15,10,128,269,7,177,180,3,40,22,11,515,217,117,318,700,137  },
-		{ 15,515,700,753,4,11,141,40,165,23,64,180,13,202,32,3,51,125,5,197,21,128,0,93,77,1,120,82,269,117,110,59  },
-		{ 176,231,585,62,34,14,412,161,56,236,527,57,17,3,51,202,4,23,369,283,128,13,472,440,84,361,136,457,381,130,719,53  },
-		{ 9,0,180,217,237,101,141,352,88,100,230,64,175,317,115,498,68,39,30,1,702,83,213,36,365,208,752,13,252,321,952,546  },
-		{ 28,9,22,1,49,0,109,39,83,95,86,30,13,105,128,55,141,168,158,67,31,159,208,12,96,5,185,2,160,64,137,23  },
-		{ 72,4,38,12,51,89,477,11,57,76,401,308,23,474,99,148,413,179,59,13,431,152,54,569,17,3,205,629,197,421,405,15  },
-		{ 457,13,23,961,15,51,515,700,165,12,753,629,11,1,719,117,0,3,2,37,569,197,40,328,33,5,153,134,99,64,38,196  },
-		{ 254,100,310,9,30,1,39,625,166,265,190,0,272,557,131,731,31,98,578,688,404,93,101,88,49,21,127,264,44,36,252,478  },
-		{ 51,23,12,13,15,128,99,120,10,202,515,153,64,82,700,33,165,2,5,117,403,1,141,0,3,196,37,453,753,197,260,93  },
-		{ 38,99,542,139,453,117,196,23,457,13,328,111,37,134,961,11,12,51,40,775,587,401,474,54,153,477,41,629,33,475,14,277  },
-		{ 6,85,25,233,343,91,26,63,138,29,19,65,283,4,81,235,42,122,605,64,648,256,174,370,74,389,718,59,45,194,445,416  },
-		{ 49,5,97,20,197,21,18,193,0,64,408,729,173,350,43,422,165,7,14,104,61,32,509,713,523,102,120,95,125,397,35,232  },
-		{ 144,116,268,434,384,489,367,206,93,855,940,44,98,332,617,127,959,911,137,282,203,31,22,219,141,9,131,276,417,0,1,120  },
-		{ 17,106,64,62,32,255,136,292,476,162,129,241,123,141,41,237,720,214,209,352,519,211,186,148,752,247,507,90,21,77,197,119  },
-		{ 2,29,52,50,5,58,14,6,27,1,366,357,45,53,17,19,171,151,26,181,133,38,218,764,287,583,61,113,3,487,600,281  },
-		{ 130,59,196,412,381,730,711,236,77,210,202,402,453,99,401,108,361,803,291,283,153,4,57,51,128,183,14,719,503,117,23,11  },
-		{ 13,23,51,141,77,4,33,64,115,0,217,10,180,202,2,102,11,9,15,165,40,21,128,352,22,7,197,3,317,515,269,1  },
-		{ 23,13,202,51,120,15,21,5,141,1,128,269,137,515,64,102,125,48,98,33,260,523,318,93,700,165,450,77,2,12,403,82  },
-		{ 1,2,14,46,29,67,38,52,5,171,58,24,103,69,96,70,83,181,54,75,163,223,16,45,112,309,155,0,186,35,18,108  },
-		{ 15,515,700,753,13,0,1,2,153,5,23,10,117,3,9,7,134,165,12,6,341,33,4,14,77,457,115,21,719,180,217,82  },
-		{ 197,165,509,13,391,180,308,115,23,546,5,498,2,29,3,401,901,61,34,80,14,457,250,569,237,873,38,297,45,15,468,386  },
-		{ 19,73,27,250,200,714,444,472,26,53,34,17,813,322,283,390,128,297,78,123,432,14,436,136,106,690,57,122,389,80,503,3  },
-		{ 3,17,21,45,62,32,38,12,155,14,2,328,5,99,401,536,828,13,227,488,106,51,719,119,540,76,165,221,115,629,209,41  },
-		{ 115,341,873,197,365,13,901,180,569,752,317,1,10,498,143,634,261,0,509,15,943,237,44,31,116,601,165,127,282,23,141,64  },
-		{ 453,51,23,403,33,421,475,102,15,153,196,515,13,700,117,523,12,40,753,21,4,134,0,494,670,899,22,801,730,10,11,401  },
-		{ 23,13,51,33,12,117,153,134,453,196,15,99,515,40,14,700,128,102,11,753,77,64,403,202,0,401,475,37,65,2,3,38  },
-		{ 2,7,5,14,70,1,29,61,52,45,6,112,66,16,21,32,592,46,38,135,87,58,186,315,290,128,113,0,64,48,227,23  },
-		{ 33,23,102,51,128,13,64,202,141,1,77,10,153,40,196,117,2,3,0,5,15,269,403,12,137,134,318,165,120,6,453,99  },
-		{ 16,92,7,20,43,35,126,71,60,14,107,18,68,97,0,121,279,149,24,246,191,48,118,575,55,140,362,783,230,150,375,566  },
-		{ 13,23,4,33,77,64,51,102,141,128,32,10,0,202,40,115,59,22,90,11,177,21,291,6,7,318,180,117,137,2,95,165  },
-		{ 507,162,129,41,4,211,62,38,123,59,57,248,183,130,99,11,3,361,202,17,402,556,266,305,803,210,128,184,152,136,313,117  },
-		{ 643,123,193,650,802,18,25,389,718,256,65,289,84,91,619,511,415,90,235,63,57,510,324,216,862,102,6,183,108,397,217,736  },
-		{ 13,23,15,1,515,51,0,2,700,5,753,165,141,115,12,3,4,180,21,197,457,7,6,10,120,9,33,202,77,32,8,11  },
-		{ 23,51,13,453,64,403,12,21,5,202,128,475,165,141,523,95,125,115,3,1,4,730,120,32,2,494,180,719,457,197,450,401  },
-		{ 204,74,135,66,6,174,192,7,138,172,85,353,348,580,280,97,95,500,29,64,426,32,87,889,65,81,25,2,52,43,568,673  },
-		{ 35,0,68,69,24,9,1,16,65,103,149,133,18,114,28,50,83,2,189,7,46,14,101,336,175,124,251,55,71,218,38,238  },
-		{ 16,101,0,118,9,18,24,68,35,154,71,124,60,212,191,520,55,806,694,167,28,39,364,375,1,346,252,65,604,302,22,21  },
-		{ 0,9,16,35,1,24,68,18,65,21,103,67,13,149,28,189,71,23,101,238,114,7,335,133,486,141,22,212,48,50,30,118  },
-		{ 13,202,23,77,33,51,128,5,21,141,115,32,102,64,4,0,318,269,10,15,291,2,494,177,11,217,3,515,22,137,6,700  },
-		{ 16,92,60,35,7,18,24,68,150,149,14,71,0,375,97,126,118,107,230,191,246,273,140,55,175,653,9,575,2,28,566,517  },
-		{ 76,90,21,179,316,148,205,32,464,288,184,257,245,1,89,2,460,57,152,45,38,358,645,5,12,449,350,48,37,17,4,14  },
-		{ 19,27,26,813,80,297,17,495,436,53,73,200,4,378,250,59,106,25,45,128,361,42,113,469,122,390,77,40,736,6,11,136  },
-		{ 6,26,235,138,19,145,112,70,331,262,25,42,52,624,27,453,122,47,500,78,648,85,29,2,630,632,409,113,50,226,108,75  },
-		{ 7,16,14,24,92,35,18,2,46,9,60,140,0,87,50,5,54,13,12,38,171,23,126,21,58,64,1,70,128,71,220,163  },
-		{ 90,205,257,184,32,179,460,5,245,45,2,288,769,524,57,21,152,229,17,1,497,4,292,59,619,452,432,76,476,11,266,14  },
-		{ 15,515,700,753,4,5,11,141,13,1,33,3,0,128,202,23,180,21,2,64,269,32,117,134,120,40,102,318,153,17,137,352  },
-		{ 47,130,711,108,453,412,730,196,390,283,78,27,51,183,381,236,128,200,719,14,153,472,503,34,59,250,3,4,57,803,123,432  },
-		{ 12,277,51,474,111,153,23,99,13,37,961,94,629,542,569,431,79,139,38,134,117,453,33,188,196,40,115,15,11,157,401,515  },
-		{ 17,495,469,106,26,378,80,27,161,483,19,742,527,436,383,862,73,136,53,814,297,6,119,84,62,56,25,3,209,611,4,128  },
-		{ 81,681,636,91,0,750,370,104,718,138,18,693,173,784,29,397,348,74,192,673,174,65,6,207,64,280,306,52,671,32,355,319  },
-		{ 15,515,700,753,33,77,4,102,115,117,40,13,1,153,134,11,5,217,23,196,2,21,3,317,32,365,0,341,291,59,12,51  },
-		{ 0,9,28,35,68,1,65,67,101,39,69,175,16,238,13,22,96,124,18,24,251,30,55,12,23,2,50,141,114,5,154,103  },
-		{ 23,33,77,13,117,40,11,102,64,4,51,403,153,453,10,0,196,134,128,65,12,291,86,99,95,59,15,141,202,180,137,719  },
-		{ 214,90,289,6,874,64,25,65,235,42,751,249,256,312,194,85,746,875,174,32,525,288,519,835,247,348,233,544,217,524,437,352  },
-		{ 1,22,2,0,36,67,28,5,49,95,12,50,168,83,105,55,7,9,14,194,103,23,114,21,584,46,10,13,38,69,208,159  },
-		{ 269,141,13,202,33,180,318,77,291,137,102,352,128,23,349,51,31,217,372,317,125,197,44,21,11,5,901,1,18,0,4,494  },
-		{ 435,144,274,88,203,418,30,1,190,410,96,778,100,530,521,326,466,795,686,166,960,321,382,264,367,822,131,31,692,9,213,93  },
-		{ 76,72,90,21,37,179,12,205,32,428,148,38,308,405,4,413,57,184,749,245,316,221,54,645,288,1,152,155,464,257,2,14  },
-		{ 77,33,64,102,13,141,23,2,40,1,51,10,0,115,6,180,202,128,4,3,177,269,15,7,22,165,291,14,217,318,137,11  },
-		{ 397,81,4,32,65,788,693,804,681,11,249,21,91,64,690,494,3,0,422,56,348,725,194,123,23,59,523,319,61,510,95,90  },
-		{ 60,126,16,7,92,121,314,246,35,107,150,132,14,146,24,18,199,298,232,71,359,140,672,97,392,649,5,423,95,21,22,388  },
-		{ 15,515,141,217,115,700,13,23,120,317,753,180,33,260,110,137,341,51,1,365,4,77,64,202,0,40,36,352,197,269,10,21  },
-		{ 111,134,117,474,23,13,961,12,569,431,37,15,51,115,515,700,277,99,753,38,197,405,457,4,72,94,629,45,11,89,54,148  },
-		{ 23,13,51,5,1,15,2,21,12,202,141,0,515,165,120,32,4,64,700,3,115,197,269,125,753,7,9,128,6,180,453,403  },
-		{ 13,141,4,23,5,2,115,217,202,51,180,137,269,352,77,1,317,3,21,318,0,15,9,64,10,197,11,341,33,515,752,7  },
-		{ 165,125,197,13,391,21,23,558,48,380,97,120,298,33,14,426,66,115,32,386,900,180,6,98,357,237,326,509,51,278,221,457  },
-		{ 120,82,15,260,515,1,351,77,450,700,13,21,141,23,753,202,217,93,110,33,51,854,5,128,326,102,137,180,817,48,269,352  },
-		{ 23,13,15,51,515,700,961,753,0,457,1,2,4,115,10,453,569,5,33,165,11,719,14,40,64,197,3,21,474,629,38,401  },
-		{ 264,166,39,30,9,100,435,254,93,921,190,363,1,625,411,382,897,656,203,478,404,812,438,110,473,88,18,691,156,141,274,272  },
-		{ 9,0,252,100,166,39,101,265,364,68,88,329,520,18,419,676,118,167,404,604,16,1,21,30,212,158,553,49,382,274,48,13  },
-		{ 15,515,700,753,4,11,141,5,3,13,202,1,180,21,2,165,269,23,40,64,0,318,12,32,128,51,77,117,523,197,120,457  },
-		{ 24,1,2,69,35,16,67,18,14,50,0,46,68,9,38,7,133,71,83,149,28,108,189,218,65,114,238,29,75,54,5,96  },
-		{ 90,289,214,64,874,13,77,712,66,751,4,23,51,192,32,0,202,194,312,177,33,65,234,104,875,288,59,5,835,416,102,95  },
-		{ 0,9,49,127,98,31,301,28,371,159,1,395,512,737,158,761,916,623,16,44,242,39,170,18,293,105,24,272,101,22,23,385  },
-		{ 17,62,136,214,123,129,32,292,119,209,710,106,141,162,128,64,45,4,77,249,11,618,211,3,207,130,519,183,38,177,21,269  },
-		{ 5,107,581,356,279,32,441,362,493,660,13,298,0,534,49,147,21,22,132,121,97,423,7,590,259,683,14,786,126,508,60,246  },
-		{ 51,13,15,730,453,23,515,719,386,457,12,700,403,475,899,1,6,523,753,421,99,401,165,33,2,19,361,5,0,670,120,27  },
-		{ 49,28,9,159,272,22,254,131,158,327,95,105,0,39,35,168,347,286,374,55,65,627,424,912,68,578,1,24,239,175,688,169  },
-		{ 15,515,700,33,753,4,77,141,341,317,1,10,13,180,102,22,40,117,115,365,5,901,23,197,134,11,217,351,64,82,21,137  },
-		{ 134,15,13,515,23,700,12,753,51,474,37,961,197,10,457,569,4,0,99,2,115,38,165,153,94,3,139,11,1,82,33,5  },
-		{ 7,2,20,58,5,14,128,66,6,29,32,43,21,52,16,38,631,61,74,97,46,135,113,25,202,192,13,0,884,45,112,87  },
-		{ 77,13,33,202,23,128,102,4,141,342,117,0,269,318,134,22,11,21,32,153,403,291,49,64,137,51,40,15,494,5,196,98  },
-		{ 2,1,14,6,46,38,29,65,5,36,67,0,103,7,22,86,133,50,108,208,52,83,24,323,283,69,28,18,10,25,23,75  },
-		{ 15,515,700,753,1,5,4,2,3,13,0,11,180,341,12,33,10,197,134,365,77,23,21,901,6,117,165,7,37,32,17,102  },
-		{ 203,268,206,93,417,940,31,8,120,137,44,499,959,473,202,692,728,559,0,260,10,326,141,564,817,127,341,1,450,22,110,23  },
-		{ 15,82,515,120,700,0,10,753,33,8,64,165,110,31,260,93,13,197,23,22,40,4,351,44,77,9,11,153,102,51,1,196  },
-		{ 60,0,16,7,14,43,20,71,28,10,2,22,154,18,13,24,92,1,51,576,35,615,805,925,68,126,124,149,97,64,23,55  },
-		{ 19,6,26,80,5,84,27,17,25,2,504,129,45,240,56,123,4,119,618,1,76,106,64,51,14,3,128,65,32,710,0,42  },
-		{ 15,515,700,753,13,4,77,23,33,51,0,5,8,10,11,31,44,1,82,22,202,64,110,102,93,21,291,40,141,180,9,49  },
-		{ 195,98,271,223,132,167,146,407,1,360,121,834,393,591,212,199,293,259,522,107,354,147,156,191,807,590,48,18,125,16,765,541  },
-		{ 128,202,77,210,402,318,33,102,6,40,403,29,342,269,196,757,99,139,2,111,42,4,494,117,275,300,13,12,678,0,177,122  },
-		{ 13,33,23,40,51,102,4,117,77,64,134,0,128,153,202,196,453,11,15,12,1,22,403,141,59,14,10,475,515,65,700,95  },
-		{ 7,16,14,24,18,2,28,0,92,71,1,22,6,35,60,20,168,10,154,118,5,302,124,69,97,109,703,158,420,12,149,66  },
-		{ 15,1,515,23,0,13,700,2,51,753,180,5,120,165,197,21,115,4,33,9,141,7,12,6,3,457,386,202,260,523,8,31  },
-		{ 60,107,121,132,146,126,199,279,150,92,16,649,441,35,955,7,21,0,423,5,18,195,598,298,493,356,32,653,22,362,953,10  },
-		{ 31,44,98,276,284,299,116,935,9,201,0,131,39,127,144,662,1,137,371,492,567,489,93,254,49,268,22,28,30,293,434,737  },
-		{ 13,15,23,515,700,0,1,51,753,4,2,10,77,202,5,115,3,165,197,457,9,12,11,961,33,120,22,141,180,7,6,40  },
-		{ 123,162,184,257,17,183,229,130,129,3,84,136,99,152,556,383,57,497,12,205,4,62,56,452,80,266,128,14,40,119,27,106  },
-		{ 196,33,117,40,153,23,134,13,51,102,453,0,15,475,12,14,515,2,22,700,4,21,753,64,401,670,730,1,9,11,10,99  },
-		{ 224,219,187,131,258,385,442,871,836,31,98,908,44,574,127,944,137,839,116,36,613,1,254,39,926,160,829,96,93,371,860,827  },
-		{ 121,195,156,132,146,360,590,407,786,522,883,591,259,929,626,941,150,687,5,55,296,379,467,178,586,465,279,21,1,13,60,354  },
-		{ 2,1,14,29,6,5,46,52,38,19,114,75,26,65,108,96,25,50,36,70,103,309,17,236,218,74,12,86,0,3,10,112  },
-		{ 15,515,82,700,120,753,10,0,8,197,260,165,351,64,13,110,117,93,31,1,9,33,22,23,457,44,450,77,102,898,40,49  },
-		{ 7,66,97,2,172,74,226,52,29,135,192,232,43,324,92,5,38,20,222,14,6,568,87,107,353,620,580,16,138,174,448,32  },
-		{ 62,129,123,162,136,249,618,183,507,57,4,152,17,59,11,184,117,77,3,128,211,41,130,205,12,40,33,106,64,229,38,313  },
-		{ 1,13,15,2,4,515,23,0,3,115,700,5,51,77,341,141,753,180,33,217,197,202,901,6,21,165,11,365,318,317,10,102  },
-		{ 6,26,235,19,145,47,112,78,64,27,453,95,29,444,25,624,85,108,648,70,32,130,74,42,711,630,632,138,65,122,113,730  },
-		{ 23,51,12,15,13,99,515,153,117,10,700,37,120,82,165,2,753,64,128,0,403,3,5,1,134,197,453,31,202,457,110,21  },
-		{ 16,24,18,71,64,35,92,7,246,146,9,108,60,118,199,5,140,2,267,0,230,830,32,133,1,68,50,330,247,563,36,12  },
-		{ 15,515,700,753,0,1,13,2,23,3,4,217,51,5,115,8,9,180,341,10,7,6,317,77,33,372,901,197,365,11,120,165  },
-		{ 234,639,178,202,77,142,5,455,450,49,416,0,147,427,198,21,315,329,13,318,325,557,120,344,113,259,22,128,61,105,23,494  },
-		{ 1,31,36,44,141,180,55,2,64,22,98,116,13,352,0,115,10,127,5,164,253,498,237,165,341,197,4,86,15,170,125,23  },
-		{ 15,120,13,141,23,260,217,515,1,77,51,110,180,700,317,82,269,137,115,202,21,753,64,5,351,291,0,450,352,93,36,326  },
-		{ 26,6,112,396,19,145,25,122,648,287,42,74,624,222,416,45,138,66,644,151,113,651,29,573,64,280,445,27,525,85,70,58  },
-		{ 156,360,5,146,121,21,271,522,354,132,49,13,18,195,16,340,60,591,446,586,727,0,107,407,167,48,1,463,199,566,32,23  },
-		{ 5,61,49,147,178,612,660,120,21,182,23,427,259,683,33,4,77,70,13,3,376,98,64,0,481,344,48,595,291,263,141,51  },
-		{ 89,79,468,179,358,205,94,405,115,498,72,180,365,431,37,111,341,734,188,317,482,217,11,4,245,152,413,216,12,474,490,752  },
-		{ 24,16,35,68,18,71,7,92,0,108,9,14,118,101,336,175,375,302,28,124,154,55,149,60,398,1,65,2,140,273,345,230  },
-		{ 51,730,421,801,453,386,23,523,13,475,719,401,670,365,899,403,115,457,758,165,33,494,450,6,423,805,629,56,569,514,958,388  },
-		{ 113,45,6,311,29,2,151,614,145,491,112,80,5,27,61,74,315,66,209,631,19,25,58,17,73,26,1,243,70,64,611,287  },
-		{ 4,339,188,471,11,59,79,12,377,94,99,33,77,102,51,111,37,152,13,961,474,542,40,342,3,23,128,403,202,177,184,57  },
-		{ 15,4,515,11,700,33,82,40,0,120,753,10,8,110,13,93,23,165,77,260,64,31,22,51,44,102,351,1,125,9,197,21  },
-		{ 16,24,18,0,35,68,28,71,124,118,60,7,9,55,14,92,109,101,419,175,22,252,154,375,149,302,158,346,2,49,1,126  },
-		{ 17,45,227,21,106,3,2,243,209,5,48,32,221,62,207,50,29,186,290,270,263,52,14,496,400,119,46,255,54,430,38,721  },
-		{ 340,354,586,658,156,195,698,668,1,296,9,18,883,363,447,379,303,98,411,13,31,163,51,5,371,48,919,846,121,21,360,70  },
-		{ 277,153,111,12,23,51,474,99,38,37,139,117,41,457,79,453,542,13,11,33,134,157,629,188,961,14,196,401,102,569,15,94  },
-		{ 0,18,16,159,49,24,9,105,35,68,7,28,22,1,60,344,55,101,109,2,14,158,13,23,71,118,455,286,272,424,5,327  },
-		{ 0,105,9,49,16,18,158,28,518,24,101,320,1,68,170,301,272,127,7,286,35,890,109,39,159,98,21,344,31,55,371,23  },
-		{ 141,1,180,15,13,2,365,217,515,352,317,115,341,0,4,5,269,700,23,21,3,752,197,77,753,51,31,901,10,202,8,64  },
-		{ 4,23,51,33,19,17,102,153,485,880,40,403,196,26,300,453,27,117,78,0,12,200,47,5,11,14,342,99,53,77,475,2  },
-		{ 62,184,56,440,130,229,183,3,556,152,99,162,12,266,17,548,136,57,305,161,123,14,452,4,383,403,257,34,40,84,33,139  },
-		{ 13,23,77,141,64,202,33,51,269,115,0,102,21,4,217,128,5,32,318,137,291,9,15,2,180,10,3,317,177,515,7,6  },
-		{ 1,22,36,105,170,0,86,2,31,28,239,64,55,5,10,98,9,44,127,95,654,67,301,143,13,12,49,23,320,141,83,21  },
-		{ 15,515,700,753,0,1,13,2,23,901,5,8,51,82,9,180,457,4,7,12,3,6,10,120,341,141,22,898,197,351,115,260  },
-		{ 1,39,274,98,100,265,190,30,438,310,166,223,88,96,909,31,264,625,530,9,382,812,21,252,593,0,254,539,44,131,23,778  },
-		{ 18,212,167,118,363,1,447,411,146,60,271,16,781,121,647,9,621,562,21,478,664,68,815,5,354,98,48,101,24,446,777,463  },
-		{ 24,28,22,0,7,1,2,16,14,65,35,49,158,95,109,159,55,105,10,18,124,9,67,5,239,149,12,289,108,68,21,424  },
-		{ 105,22,131,272,286,98,55,239,1,31,320,9,127,327,36,185,28,374,86,219,0,64,187,44,578,164,224,913,535,115,601,13  },
-		{ 22,31,28,301,127,98,44,0,105,1,512,395,9,293,109,299,95,338,239,125,242,116,36,320,55,841,900,685,599,23,13,763  },
-		{ 2,1,58,29,5,14,52,46,186,334,45,155,151,50,400,75,38,69,502,61,48,227,223,7,163,17,262,67,549,21,70,113  },
-		{ 7,107,135,232,97,14,2,92,66,16,172,192,278,387,298,356,38,35,448,52,46,43,60,29,20,126,324,526,357,359,64,5  },
-		{ 20,43,104,426,173,7,560,414,707,784,319,81,0,861,422,819,38,74,715,52,376,97,879,32,330,22,49,64,66,95,192,526  },
-		{ 104,74,636,66,204,0,355,81,222,25,29,319,145,784,20,65,90,4,174,194,7,64,6,746,138,173,750,715,91,43,192,32  },
-		{ 0,9,101,35,68,39,65,28,252,124,67,154,364,336,100,166,30,1,289,55,149,346,16,114,158,88,439,24,429,22,570,194  },
-		{ 57,14,4,231,236,585,176,59,369,23,361,13,719,51,300,342,12,457,56,3,62,38,202,401,34,46,2,322,11,215,210,507  },
-		{ 1,2,15,3,141,0,515,5,33,700,13,64,77,180,6,128,753,10,4,269,102,202,11,7,134,197,352,120,117,318,12,291  },
-		{ 5,1,21,202,13,32,48,23,0,61,259,22,494,120,70,49,51,18,137,128,465,12,178,115,2,453,403,141,58,3,90,450  },
-		{ 141,205,4,72,59,79,245,11,352,94,152,76,247,216,21,188,452,217,497,12,89,37,111,339,588,77,64,875,864,115,358,464  },
-		{ 15,515,700,753,0,1,2,13,5,4,23,3,8,341,365,51,115,10,120,457,6,141,77,197,31,7,165,9,202,450,961,260  },
-		{ 5,2,50,14,58,38,171,46,29,1,45,186,17,52,155,218,48,281,61,487,54,36,67,21,328,334,151,227,760,114,400,133  },
-		{ 457,120,70,125,318,64,23,48,795,291,202,761,751,415,77,846,269,758,21,237,96,260,391,165,87,1,128,5,221,13,137,763  },
-		{ 13,23,51,33,4,40,117,102,453,64,153,196,0,77,15,11,12,475,1,65,134,10,515,22,21,14,700,59,403,141,2,753  },
-		{ 229,152,57,266,452,381,432,12,313,184,99,471,17,4,62,339,157,3,129,59,128,11,369,37,77,38,40,123,5,497,188,257  },
-		{ 49,28,109,22,159,9,272,95,105,131,55,35,254,168,39,327,169,0,1,286,175,374,347,158,420,67,36,194,312,424,627,346  },
-		{ 5,2,61,29,45,58,80,311,1,17,209,227,52,243,106,869,454,151,592,496,48,334,14,155,6,186,46,171,75,21,255,667  },
-		{ 244,44,110,141,260,30,269,352,839,131,574,228,373,276,1,406,219,717,217,137,253,224,120,93,36,31,567,116,661,187,341,88  },
-		{ 12,99,79,139,11,453,196,51,277,474,111,23,542,37,94,188,33,13,401,775,40,961,313,102,4,339,153,485,629,134,300,431  },
-		{ 16,35,9,0,68,24,149,69,67,18,1,114,65,230,71,7,103,133,50,167,212,118,101,191,140,64,399,28,124,283,55,565  },
-		{ 88,30,274,435,131,613,190,100,93,829,166,1,187,795,530,127,382,957,960,160,31,137,466,264,39,800,406,254,28,473,521,219  },
-		{ 167,16,18,118,212,24,60,71,101,68,191,9,375,411,363,35,0,1,589,199,302,21,447,55,146,126,92,271,647,121,562,48  },
-		{ 64,141,86,177,77,128,147,597,304,95,269,102,275,4,352,49,120,5,372,194,465,13,588,237,947,216,202,180,612,751,107,534  },
-		{ 18,65,90,403,523,289,240,214,194,102,701,475,202,217,283,862,389,51,33,0,494,421,453,817,84,64,847,899,352,13,23,437  },
-		{ 13,51,23,202,5,12,21,128,15,115,0,1,141,120,64,32,4,2,515,403,165,457,3,10,700,99,453,318,719,450,308,401  },
-		{ 98,223,393,31,1,271,834,791,167,44,202,64,93,697,5,116,77,125,450,446,212,18,541,293,51,120,195,132,284,13,807,765  },
-		{ 15,515,700,753,4,11,23,13,40,51,82,165,0,110,93,33,141,64,120,5,10,77,3,102,180,32,202,125,8,197,31,21  },
-		{ 15,515,700,753,0,1,13,2,901,23,5,341,3,51,82,8,4,180,961,9,115,10,12,6,898,7,351,141,134,22,31,120  },
-		{ 234,416,77,5,315,639,325,202,147,198,113,49,450,61,455,142,0,21,22,342,329,494,178,58,102,427,318,230,13,120,43,470  },
-		{ 60,146,16,18,156,126,121,271,199,360,132,24,167,0,640,10,71,522,21,92,5,340,107,354,118,150,22,195,446,35,28,212  },
-		{ 4,361,11,14,56,368,377,161,27,12,300,77,59,200,17,554,202,33,40,494,495,21,210,80,757,25,128,23,19,38,444,53  },
-		{ 141,82,217,351,15,352,120,1,180,260,515,64,854,36,700,317,752,372,13,269,77,753,922,21,349,23,202,110,93,137,51,373  },
-		{ 15,515,700,753,77,13,0,1,23,33,102,2,51,4,3,5,291,217,10,9,450,120,341,7,317,6,11,117,115,8,260,180  },
-		{ 15,515,120,13,700,23,77,141,1,260,0,753,180,51,137,202,115,365,110,291,217,5,128,9,21,341,197,269,2,450,317,165  },
-		{ 174,6,348,85,138,74,280,204,66,233,192,355,289,65,81,580,636,353,25,91,104,343,673,214,64,95,42,712,792,32,194,90  },
-		{ 152,497,452,59,4,216,11,79,94,77,128,188,269,339,588,33,76,529,318,32,141,471,12,202,111,21,5,51,37,90,72,177  },
-		{ 417,499,10,141,253,244,110,559,8,564,180,260,728,120,352,638,642,341,951,206,143,752,901,93,137,661,922,373,44,31,811,197  },
-		{ 13,77,23,33,4,51,0,102,128,59,141,40,64,115,177,10,137,22,202,2,7,11,90,1,117,180,269,14,49,6,134,3  },
-		{ 1,2,22,0,36,5,67,50,14,28,12,86,38,46,83,168,194,65,103,114,49,7,10,95,21,69,23,24,128,51,55,13  },
-		{ 17,106,119,207,255,306,742,378,84,62,136,45,3,5,240,80,61,56,209,383,311,790,655,32,2,440,76,151,58,29,179,263  },
-		{ 3,128,1,141,2,202,33,5,64,15,0,515,102,13,269,10,700,180,134,51,120,6,77,318,23,137,17,117,753,197,82,153  },
-		{ 514,38,377,328,11,57,41,248,880,266,556,4,152,361,471,757,485,403,305,102,3,211,313,99,457,130,12,14,157,40,23,54  },
-		{ 68,0,167,101,9,118,264,520,16,18,21,478,562,1,124,212,100,936,664,777,191,88,806,154,48,24,759,604,35,252,265,65  },
-		{ 230,689,699,213,466,352,217,831,30,443,418,144,854,201,840,855,1,251,203,317,530,957,96,93,822,539,36,752,351,137,83,800  },
-		{ 33,77,102,117,15,82,13,134,23,64,0,515,120,153,51,4,40,128,700,260,202,141,196,22,753,11,351,10,1,326,95,269  },
-		{ 11,40,33,51,117,13,542,328,14,134,38,153,23,12,485,231,102,54,775,37,3,377,111,139,211,4,457,403,369,475,99,719  },
-		{ 33,64,77,128,141,2,1,202,102,13,23,117,0,15,3,153,51,134,10,40,6,5,515,269,137,180,318,165,700,7,196,753  },
-		{ 15,515,700,753,4,1,5,11,13,21,33,180,93,141,64,2,23,77,82,3,0,102,32,40,352,341,10,197,98,110,117,901  },
-		{ 1,2,14,67,50,46,38,24,103,83,0,5,36,28,29,133,114,96,65,52,18,75,54,108,22,7,238,58,160,9,361,69  },
-		{ 258,201,276,137,160,860,116,261,295,843,567,144,131,44,187,268,943,219,284,31,202,935,141,98,662,203,127,96,36,93,224,1  },
-		{ 7,2,14,16,46,87,75,52,92,278,29,38,140,70,1,5,35,294,24,262,135,69,171,172,58,409,112,60,50,66,97,12  },
-		{ 13,23,0,2,51,1,33,4,115,10,15,141,77,3,5,180,217,515,9,7,64,11,700,6,102,40,197,22,317,753,165,202  },
-		{ 74,145,6,66,25,204,42,29,222,337,138,26,7,525,192,174,746,287,544,135,415,2,609,632,112,64,87,0,85,45,712,396  },
-		{ 77,33,102,15,217,13,23,141,202,515,51,700,291,4,269,753,317,180,21,64,318,115,128,0,275,2,352,196,3,5,137,11  },
-		{ 187,219,258,871,44,442,160,574,137,224,908,116,839,131,36,926,276,201,93,228,202,860,31,613,144,531,406,1,902,30,190,318  },
-		{ 1,372,141,5,21,77,225,744,96,30,23,349,13,291,269,284,69,442,459,144,303,839,217,622,160,330,260,48,120,410,189,352  },
-		{ 66,222,2,74,29,87,135,6,7,145,52,25,294,337,226,172,138,331,42,70,97,112,26,1,632,192,43,5,415,609,461,353  },
-		{ 45,17,106,209,5,2,21,29,48,207,3,186,243,155,255,263,454,119,400,496,270,14,290,62,425,1,171,32,659,52,38,56  },
-		{ 93,88,141,120,30,213,260,373,100,717,459,82,110,1,166,450,180,321,217,372,36,269,131,225,22,352,326,466,473,187,244,410  },
-		{ 266,57,152,381,313,471,12,229,99,369,339,62,157,3,4,37,77,38,188,17,11,162,40,184,129,59,475,775,128,452,403,453  },
-		{ 217,352,317,141,752,15,180,515,372,365,700,341,753,349,77,21,291,1,115,244,64,120,13,98,269,82,5,498,864,351,23,144  },
-		{ 14,514,369,102,403,377,51,719,880,153,23,13,457,11,485,4,401,12,328,453,33,40,117,57,629,38,730,236,134,670,361,961  },
-		{ 107,7,172,14,92,135,2,359,60,314,46,16,126,278,232,150,279,32,38,392,298,5,35,97,24,192,259,288,330,52,356,312  },
-		{ 0,4,25,13,59,90,65,23,26,19,18,12,5,216,91,51,389,33,77,11,22,85,27,81,21,177,746,45,42,194,37,123  },
-		{ 5,49,315,202,416,77,455,639,450,21,197,137,350,13,408,0,329,318,494,344,61,402,64,509,347,120,113,48,95,713,308,401  },
-		{ 130,47,381,390,59,90,200,214,289,6,65,472,29,64,874,648,50,751,624,26,52,32,4,194,875,714,85,249,247,33,881,19  },
-		{ 51,23,453,13,719,12,457,165,37,730,99,4,386,197,401,17,11,2,3,15,5,961,475,6,515,64,54,700,32,115,0,403  },
-		{ 15,515,1,13,700,2,23,0,753,5,3,180,51,4,165,12,141,21,197,457,7,115,6,9,352,10,120,202,8,341,11,77  },
-		{ 0,9,1,67,35,28,68,16,24,65,18,69,50,114,103,12,22,13,5,101,2,96,23,83,149,21,39,55,7,175,433,124  },
-		{ 28,105,22,0,1,320,170,9,49,301,109,95,127,31,98,55,65,35,2,24,168,159,36,713,16,740,13,338,21,44,512,23  },
-		{ 13,77,4,51,23,33,102,202,128,59,40,0,64,141,117,403,115,11,15,318,153,269,22,515,475,134,10,494,177,1,90,210  },
-		{ 13,23,0,51,77,33,2,141,4,10,1,64,115,102,3,6,22,15,217,11,180,7,40,515,165,202,177,9,269,128,700,5  },
-		{ 456,116,492,8,949,268,867,391,203,51,499,13,719,386,31,791,457,918,125,10,23,93,479,685,417,0,22,338,506,551,870,730  },
-		{ 17,237,45,180,106,62,32,64,115,41,136,498,255,21,197,129,241,13,3,227,23,352,165,752,350,365,449,155,4,546,476,38  },
-		{ 1,15,180,515,0,2,341,700,901,352,4,141,13,3,752,5,753,217,317,115,365,23,197,21,51,165,31,6,269,202,77,7  },
-		{ 205,141,216,269,497,4,588,76,59,152,128,452,79,77,875,11,72,94,188,217,352,12,247,37,90,64,32,1,474,23,947,372  },
-		{ 64,247,217,237,317,180,752,115,349,141,498,13,437,304,23,372,352,164,579,291,33,864,177,197,0,490,72,10,482,77,269,51  },
-		{ 2,1,0,13,15,141,3,77,5,515,64,33,23,180,6,700,4,117,217,7,10,11,102,165,753,197,115,134,40,352,12,269  },
-		{ 11,40,38,328,33,542,12,313,41,339,23,157,377,117,369,51,471,99,775,485,13,305,457,57,14,475,37,248,4,54,188,719  },
-		{ 33,77,102,40,13,23,0,51,4,128,64,202,117,141,22,196,153,10,134,15,59,269,1,137,65,11,403,318,453,86,515,177  },
-		{ 472,80,34,250,495,161,17,14,469,176,128,4,389,106,283,436,216,527,3,297,483,177,53,56,231,194,119,84,719,57,255,59  },
-		{ 317,352,180,141,217,752,115,341,365,244,1,269,202,901,253,15,21,498,372,4,137,515,13,2,700,318,5,197,23,143,753,349  },
-		{ 9,39,101,18,265,100,333,520,252,16,0,329,593,1,553,364,68,167,310,30,121,254,118,158,363,166,60,604,272,24,286,404  },
-		{ 15,515,1,180,700,901,0,2,753,341,752,4,3,13,115,365,317,5,23,197,141,217,165,352,6,22,36,9,137,51,7,10  },
-		{ 131,39,9,829,166,613,578,827,1,30,716,254,100,98,31,224,0,406,228,310,616,219,44,846,127,190,938,96,265,371,856,438  },
-		{ 17,64,62,106,141,751,136,292,32,129,352,41,38,476,86,128,214,237,5,177,123,209,217,45,269,954,162,710,180,3,90,4  },
-		{ 25,42,235,65,650,736,605,6,630,85,123,343,233,256,26,122,63,389,141,249,416,444,368,194,19,108,138,174,90,0,544,511  },
-		{ 184,229,152,57,266,432,497,452,17,381,619,257,313,12,4,205,59,3,99,471,157,128,5,129,339,369,77,11,32,45,202,2  },
-		{ 137,202,160,860,141,30,93,567,36,276,295,261,131,39,9,964,201,843,1,98,800,318,116,22,943,187,10,219,206,44,269,535  },
-		{ 0,493,125,64,49,9,279,10,35,18,93,55,293,31,14,13,194,165,325,48,22,132,21,107,98,389,44,581,342,259,174,137  },
-		{ 15,515,700,753,4,33,13,77,23,5,51,32,102,40,93,11,349,141,21,8,82,202,64,31,110,10,117,0,1,44,3,318  },
-		{ 110,253,854,811,352,141,244,951,180,642,661,384,498,143,752,317,911,10,269,206,559,351,261,120,902,533,922,959,365,160,332,217  },
-		{ 2,29,70,1,75,52,6,220,26,112,145,331,74,163,19,69,38,324,46,58,14,5,25,21,278,223,50,307,66,7,67,409  },
-		{ 13,23,77,33,51,4,64,141,115,102,0,2,128,177,40,11,202,10,6,180,7,15,269,1,32,217,59,22,291,3,137,515  },
-		{ 340,897,691,478,658,264,914,382,100,812,363,1,724,156,166,698,88,521,39,404,682,447,296,96,303,411,30,909,9,274,656,772  },
-		{ 9,18,310,101,265,159,326,120,105,158,33,363,77,195,51,55,13,39,354,132,23,7,28,639,16,137,98,1,252,272,709,49  },
-		{ 57,313,471,12,99,369,157,339,266,152,38,37,475,453,328,775,11,40,59,188,77,514,401,403,342,4,139,33,377,51,229,14  },
-		{ 16,7,24,14,35,140,60,92,18,69,71,2,189,1,46,230,108,388,150,38,21,172,278,67,246,267,50,309,236,135,451,0  },
-		{ 206,417,93,940,959,473,499,203,8,137,559,728,31,202,44,120,450,141,10,260,116,564,22,326,269,318,268,244,0,1,253,638  },
-		{ 15,515,700,753,1,0,13,2,23,4,3,51,5,217,7,77,341,115,8,9,10,33,6,180,317,349,291,120,11,165,457,901  },
-		{ 1,2,5,14,48,21,290,32,50,45,38,46,263,207,155,72,76,29,17,408,425,171,89,52,7,0,292,449,3,227,513,428  },
-		{ 121,132,354,167,271,223,146,98,18,463,1,668,446,195,407,60,212,447,781,48,360,363,411,522,156,393,807,9,21,16,293,13  },
-		{ 131,578,105,371,219,224,716,616,187,49,9,254,737,159,385,98,258,127,272,761,0,916,623,910,28,286,39,31,22,518,924,242  },
-		{ 302,467,97,6,273,1,24,484,124,51,36,18,2,398,453,421,523,69,7,23,13,403,386,150,66,0,298,65,426,165,22,158  },
-		{ 30,190,530,88,1,100,778,539,625,274,382,410,96,731,960,39,795,321,9,131,264,144,840,748,44,166,669,957,36,31,435,228  },
-		{ 141,1,2,128,64,33,15,202,3,0,180,5,13,77,515,134,269,102,197,700,10,137,318,6,120,165,753,352,4,82,23,117  },
-		{ 44,201,567,116,131,224,295,662,489,268,219,31,434,144,187,276,110,384,93,261,699,137,36,442,120,1,613,30,228,64,141,244  },
-		{ 12,15,51,23,515,37,99,13,700,0,10,117,753,38,165,82,134,120,11,453,197,64,115,569,1,629,401,22,457,474,110,153  },
-		{ 7,135,2,92,172,14,66,140,38,52,97,46,29,74,16,324,278,226,6,87,1,571,262,5,357,232,35,380,69,314,24,330  },
-		{ 125,386,23,963,949,60,51,391,165,221,13,197,118,21,719,193,541,421,517,150,393,7,401,453,308,5,791,551,326,558,48,173  },
-		{ 6,85,42,25,138,222,174,235,280,256,525,289,26,214,64,746,90,32,544,65,204,19,66,337,355,95,348,415,74,29,5,312  },
-		{ 1,14,5,50,2,67,24,0,46,69,48,21,58,103,16,12,18,38,54,96,83,7,502,45,36,181,35,9,430,28,10,155  },
-		{ 811,351,642,180,951,752,110,638,253,10,82,352,197,341,365,564,499,854,873,55,9,417,282,901,244,22,559,143,206,141,28,898  },
-		{ 23,13,51,15,12,453,403,165,4,515,115,719,475,457,700,523,2,21,0,99,202,197,14,5,386,753,128,401,37,308,33,117  },
-		{ 120,13,23,77,141,1,15,93,217,82,260,51,137,202,110,515,21,180,165,5,128,102,64,351,291,700,269,352,326,203,177,0  },
-		{ 1,5,0,22,12,2,36,21,10,23,86,13,28,51,9,128,48,14,32,50,7,3,96,137,54,4,202,49,37,65,208,323  },
-		{ 219,98,23,127,301,51,258,308,170,910,13,165,22,105,293,616,125,242,276,401,201,395,964,115,55,284,31,374,327,206,512,900  },
-		{ 64,180,80,165,5,237,2,250,34,58,297,61,197,17,22,29,186,498,231,445,247,3,752,311,95,32,483,153,27,45,115,469  },
-		{ 13,77,23,33,0,2,1,64,141,51,102,10,15,3,115,40,180,6,515,128,7,22,269,202,4,217,700,5,177,117,14,165  },
-		{ 15,120,51,515,13,450,23,700,202,153,196,753,260,64,128,141,730,4,326,386,21,523,33,318,5,457,95,32,403,1,77,269  },
-		{ 2,1,5,29,32,45,207,263,14,425,58,72,76,21,7,408,48,46,52,186,17,292,38,6,61,89,476,50,155,720,119,3  },
-		{ 15,515,700,753,4,13,11,5,1,23,33,21,3,141,32,2,40,180,117,64,269,202,102,197,0,165,120,51,341,352,153,12  },
-		{ 76,5,214,129,2,123,45,710,17,249,618,460,179,32,1,257,205,519,90,207,245,184,162,61,769,209,292,106,6,29,14,128  },
-		{ 1,15,23,13,120,141,51,515,202,21,700,165,0,180,137,2,5,77,128,93,753,260,269,197,326,33,110,352,82,102,318,48  },
-		{ 7,2,135,14,29,87,66,52,97,172,70,112,5,58,46,337,92,16,20,43,1,38,232,155,74,294,6,461,409,151,262,32  },
-		{ 574,187,384,926,860,110,258,434,269,531,141,244,160,261,253,116,699,959,940,717,533,36,219,31,902,661,871,295,201,352,10,260  },
-		{ 156,354,296,1,182,586,64,379,340,937,850,698,31,48,98,44,120,18,163,23,30,658,195,125,77,284,223,291,774,481,96,39  },
-		{ 250,80,34,472,17,495,176,469,33,194,64,483,4,297,141,14,161,27,53,667,56,833,73,527,585,231,106,51,84,814,2,59  },
-		{ 97,7,81,140,66,92,172,192,24,298,43,6,74,69,314,426,462,14,501,16,21,508,60,189,267,232,230,104,48,20,135,330  },
-		{ 31,44,116,144,268,393,492,434,367,489,127,98,918,0,384,9,22,206,948,105,93,203,1,456,332,940,299,28,137,49,293,125  },
-		{ 15,128,33,3,13,51,141,1,202,64,23,2,515,120,102,0,5,82,10,700,165,197,269,153,403,110,753,137,196,318,117,12  },
-		{ 31,98,127,9,0,105,22,28,44,512,293,395,299,1,242,49,685,763,320,599,125,116,109,276,284,95,870,159,23,456,36,900  },
-		{ 7,24,124,1,6,97,2,69,14,18,23,92,21,67,66,16,5,484,43,20,118,65,36,22,28,0,51,140,13,71,29,150  },
-		{ 1,64,442,303,284,349,202,141,622,67,154,447,260,44,652,429,9,335,237,919,197,98,167,33,682,269,547,77,863,411,340,201  },
-		{ 1,15,2,141,515,0,700,13,3,180,10,753,5,64,77,33,4,6,7,197,102,269,165,23,134,11,352,341,291,349,22,120  },
-		{ 99,139,12,453,196,277,775,40,475,33,23,401,215,51,11,14,77,111,313,130,38,211,37,266,129,15,339,153,719,3,369,515  },
-		{ 33,77,102,4,23,128,13,141,202,64,51,0,40,59,269,115,117,137,153,1,318,11,10,177,15,134,22,90,196,2,403,32  },
-		{ 7,2,14,58,70,112,16,5,87,38,46,52,6,128,135,1,32,21,155,29,66,64,0,97,92,186,172,294,13,23,20,37  },
-		{ 15,13,515,1,700,2,23,0,753,5,3,4,51,10,341,115,365,180,11,33,317,77,6,7,217,12,197,165,117,9,64,102  },
-		{ 2,1,14,29,75,69,67,6,52,46,38,24,103,220,83,25,70,87,262,74,96,267,50,366,26,16,226,394,357,66,108,19  },
-		{ 9,105,18,39,1,0,16,557,101,272,252,890,326,49,265,21,137,100,23,938,13,310,159,5,31,24,254,51,30,128,202,132  },
-		{ 80,209,45,61,667,17,6,106,5,2,151,29,483,255,454,833,27,311,112,19,738,378,1,58,113,26,25,469,119,887,32,64  },
-		{ 13,23,51,15,5,1,515,0,21,2,12,141,700,165,202,115,753,32,180,4,3,197,10,120,457,9,269,128,64,341,7,33  },
-		{ 99,12,453,277,139,157,369,474,339,51,38,23,37,196,188,401,775,111,11,313,328,475,153,266,4,471,79,40,33,629,102,14  },
-		{ 7,92,16,232,97,140,126,14,60,107,66,35,298,387,314,104,246,462,441,150,0,38,24,2,172,357,230,330,5,633,22,289  },
-		{ 13,77,23,202,318,141,33,4,51,269,102,177,115,403,137,2,40,494,90,11,342,128,31,117,21,32,7,12,64,134,14,10  },
-		{ 13,2,0,23,141,1,77,3,180,33,6,64,15,10,115,51,4,5,217,197,7,165,515,102,22,11,700,269,40,352,177,14  },
-		{ 15,515,700,753,4,11,1,93,13,5,180,110,82,21,120,23,2,33,10,141,3,165,197,102,901,0,32,341,117,40,153,12  },
-		{ 15,515,700,753,1,13,0,2,23,4,77,51,3,5,341,291,7,33,6,115,10,9,8,217,11,177,120,180,102,165,197,365  },
-		{ 20,43,198,325,173,904,104,234,66,147,77,319,416,422,97,426,5,0,7,450,861,202,712,725,2,32,639,376,38,324,945,315  },
-		{ 105,0,9,28,49,301,170,1,127,159,22,16,31,98,512,623,24,109,158,395,35,68,371,65,713,55,2,242,293,21,44,18  },
-		{ 213,88,689,466,230,30,321,435,699,352,217,201,795,831,144,854,1,443,96,539,530,840,418,251,855,190,93,100,669,31,957,662  },
-		{ 130,453,47,196,4,57,14,59,236,711,51,153,730,77,412,381,23,202,108,128,361,13,283,117,11,719,200,46,34,78,210,2  },
-		{ 1,2,5,14,0,50,36,22,38,46,65,67,12,86,114,28,103,29,208,7,10,128,21,83,218,23,96,54,194,6,133,51  },
-		{ 6,26,74,19,165,453,14,730,1,125,197,50,29,51,138,357,13,2,108,391,70,719,46,457,47,500,386,262,112,23,235,52  },
-		{ 9,10,376,20,43,0,49,18,30,120,2,33,325,104,501,470,77,788,725,102,523,39,858,5,904,414,174,55,137,37,342,13  },
-		{ 15,515,700,753,0,1,13,23,51,77,120,202,341,82,5,4,9,260,2,137,141,128,115,351,901,8,180,10,197,21,450,33  },
-		{ 105,131,272,578,9,49,371,219,159,616,286,320,224,187,716,98,28,22,0,623,127,258,910,737,385,31,239,347,254,109,424,95  },
-		{ 457,51,13,23,961,12,719,99,453,15,4,515,165,401,629,3,700,11,17,14,2,37,753,41,57,569,38,45,0,33,5,32  },
-		{ 202,120,5,33,318,77,450,102,1,260,403,128,494,21,165,13,269,12,326,23,342,523,402,2,817,64,15,141,125,82,457,475  },
-		{ 141,269,352,217,180,64,349,137,202,160,317,15,372,515,700,752,318,753,244,13,437,291,165,864,22,237,5,82,954,21,77,418  },
-		{ 70,29,2,145,74,112,26,6,75,52,19,66,632,1,87,220,5,135,163,287,307,25,226,7,58,396,294,278,113,409,69,151  },
-		{ 82,351,317,15,752,180,898,352,141,901,515,341,10,700,365,1,753,498,0,217,253,115,55,854,33,5,143,32,21,160,36,197  },
-		{ 39,9,310,254,0,30,101,49,252,272,100,265,105,455,159,557,190,333,286,688,18,166,1,158,709,16,625,627,31,131,327,329  },
-		{ 2,58,29,5,1,151,186,52,70,45,7,549,14,75,112,400,113,155,61,46,227,163,311,315,66,6,307,27,17,220,287,74  },
-		{ 141,217,13,21,352,23,269,77,180,115,317,64,202,15,349,137,5,51,165,291,318,752,372,4,0,102,33,365,197,32,341,125  },
-		{ 68,35,0,9,65,101,149,124,24,154,175,16,28,7,67,1,18,189,114,398,55,14,345,39,118,133,69,2,230,429,71,283  },
-		{ 66,7,29,2,112,52,20,43,97,151,74,192,135,5,173,525,337,45,145,58,415,25,14,32,644,70,544,226,222,21,6,580  },
-		{ 31,125,44,22,116,299,242,55,1,170,64,36,479,870,456,685,10,599,558,0,268,506,28,740,23,903,492,164,393,206,2,86  },
-		{ 188,11,79,12,99,377,94,33,542,339,40,474,111,37,4,51,102,453,139,775,13,475,23,961,277,471,134,57,431,266,115,117  },
-		{ 658,698,340,98,296,303,1,31,850,363,156,919,44,774,586,385,120,77,82,10,223,30,354,291,23,914,478,87,260,163,48,13  },
-		{ 15,515,700,753,82,4,1,13,901,33,197,11,5,10,23,165,2,0,180,3,21,77,51,120,365,115,217,40,117,102,32,401  },
-		{ 15,515,700,753,4,11,5,13,1,141,3,180,23,202,21,2,269,64,165,33,40,32,0,318,120,128,12,197,117,352,51,17  },
-		{ 91,6,233,85,370,718,81,65,25,256,63,343,42,74,235,123,138,511,397,249,26,194,650,355,64,87,544,18,90,643,66,214  },
-		{ 23,13,202,51,21,120,1,5,141,128,450,64,318,403,15,137,260,33,12,48,32,31,125,494,269,102,165,515,77,2,197,14  },
-		{ 180,317,365,341,752,217,115,352,901,482,372,498,1,141,15,253,515,244,2,700,0,21,13,82,23,4,579,351,753,291,269,77  },
-		{ 13,115,197,341,9,352,468,237,64,498,23,165,22,509,901,546,482,180,28,569,317,51,365,873,391,95,86,217,49,837,752,706  },
-		{ 13,23,51,1,141,5,165,202,21,120,64,125,180,15,2,33,197,115,128,32,260,269,12,82,4,515,137,7,318,93,0,700  },
-		{ 214,289,90,174,874,6,138,280,65,81,64,85,355,751,194,233,312,348,835,91,0,32,343,636,249,29,875,288,519,104,247,74  },
-		{ 15,515,700,753,4,5,11,13,1,33,23,21,2,3,102,32,141,77,180,117,31,64,0,40,134,196,120,352,12,44,197,6  },
-		{ 33,15,13,515,117,23,700,217,134,753,0,51,153,77,141,2,4,64,196,1,3,180,10,115,5,102,6,11,22,202,165,7  },
-		{ 15,515,700,753,33,4,77,102,1,40,13,117,11,115,134,5,21,153,23,217,3,32,2,317,120,196,180,141,51,12,59,260  },
-		{ 15,515,700,753,13,0,1,23,2,217,51,3,4,5,8,317,115,9,341,10,202,180,6,365,7,82,457,22,120,901,33,291  },
-		{ 7,2,135,20,97,14,66,52,337,673,192,29,43,355,353,5,16,294,107,376,147,226,331,560,64,470,222,104,415,32,4,324  },
-		{ 195,132,142,167,146,77,363,271,121,354,202,120,647,178,786,212,687,0,101,878,16,522,60,5,450,411,35,55,98,639,259,318  },
-		{ 202,77,20,0,318,66,104,128,102,269,177,43,33,7,216,291,494,5,2,342,74,173,97,112,450,22,337,10,234,52,64,678  },
-		{ 107,362,612,356,359,97,414,43,259,20,392,7,298,147,819,683,465,173,729,660,319,14,5,779,581,595,246,35,501,92,0,230  },
-		{ 6,165,14,453,13,51,19,23,386,457,74,391,308,2,26,401,47,758,603,108,719,366,1,29,309,730,324,197,133,70,115,867  },
-		{ 179,72,205,180,247,245,4,490,352,59,317,152,79,498,94,217,148,76,752,864,11,216,141,405,89,452,197,111,497,188,37,21  },
-		{ 107,7,298,314,14,359,32,392,232,279,172,97,60,581,387,126,121,0,534,493,356,92,441,95,13,21,35,147,22,5,16,362  },
-		{ 156,271,354,586,360,132,591,195,121,18,340,1,5,13,21,48,668,446,23,463,296,658,60,55,407,698,146,70,626,51,163,24  },
-		{ 13,23,51,4,0,12,457,15,11,453,2,515,5,1,99,10,115,165,700,475,401,403,3,961,40,14,37,753,719,32,64,569  },
-		{ 48,125,21,165,13,221,23,763,423,508,197,5,98,92,193,16,441,386,64,314,293,457,391,140,49,60,102,693,683,51,35,867  },
-		{ 202,77,120,450,5,318,1,494,0,195,18,132,523,403,326,604,354,260,121,576,203,167,234,817,682,49,35,615,21,20,13,102  },
-		{ 39,9,166,30,0,101,158,68,404,190,333,274,252,310,88,100,49,28,344,35,21,22,419,131,438,1,16,65,530,694,124,10  },
-		{ 15,515,700,753,110,4,1,11,165,180,93,13,82,5,2,197,33,120,0,3,10,23,21,115,901,217,341,77,317,51,32,117  },
-		{ 2,29,1,14,6,52,5,46,50,26,70,19,103,58,38,67,96,262,516,309,218,133,108,27,75,17,112,114,24,487,331,83  },
-		{ 120,77,15,13,1,141,260,23,515,217,110,51,137,700,317,202,165,291,180,21,753,128,0,177,326,93,450,82,64,269,197,5  },
-		{ 255,59,554,297,183,56,33,444,108,358,123,196,269,122,77,153,57,177,117,730,19,467,605,130,128,50,275,4,291,475,134,133  },
-		{ 13,23,51,12,153,14,117,120,165,134,99,401,38,453,15,128,197,719,64,515,475,403,37,33,196,700,40,125,5,0,54,2  },
-		{ 64,33,174,348,95,108,467,554,56,0,25,306,233,6,63,511,343,120,13,85,29,561,543,707,319,180,899,355,77,49,256,18  },
-		{ 120,260,51,23,77,15,202,1,93,82,141,450,13,326,515,137,21,5,64,33,110,700,128,165,318,203,269,102,351,753,197,125  },
-		{ 15,515,700,753,4,13,11,1,5,21,23,2,33,64,3,180,32,141,22,102,77,0,10,93,82,352,117,40,341,31,165,6  },
-		{ 15,515,700,753,341,13,23,141,33,1,0,217,4,77,180,10,82,351,51,137,5,64,9,317,21,11,102,40,260,202,854,115  },
-		{ 105,272,131,22,327,286,28,239,320,9,109,578,219,49,98,224,95,159,538,371,616,127,187,64,713,55,0,170,168,258,716,623  },
-		{ 16,18,68,35,24,60,71,118,92,126,0,9,101,191,7,55,154,175,212,14,167,150,302,28,375,1,107,124,346,273,21,108  },
-		{ 20,147,43,470,376,142,904,178,427,798,0,595,198,325,858,319,61,202,173,97,5,422,14,22,107,259,32,49,887,77,414,392  },
-		{ 13,23,51,12,33,15,99,64,128,515,453,202,117,153,37,102,700,40,134,196,120,0,2,753,141,14,38,3,82,403,77,21  },
-		{ 383,17,62,136,84,119,56,440,3,504,240,80,378,129,123,548,106,128,4,11,14,555,162,32,184,361,59,64,205,5,469,57  },
-		{ 70,1,48,652,5,638,846,888,21,349,269,260,340,562,767,761,163,883,774,141,125,518,591,0,23,9,87,13,371,303,622,31  },
-		{ 66,135,6,97,74,278,69,7,14,324,267,172,2,140,462,1,357,38,808,550,92,841,189,29,16,25,298,87,75,204,24,335  },
-		{ 51,23,33,13,102,40,12,128,64,77,10,202,0,196,117,4,14,99,134,453,65,153,11,475,139,403,22,141,86,2,21,15  },
-		{ 88,100,264,166,274,435,772,1,382,921,96,478,30,438,639,909,897,521,190,466,960,410,9,144,530,418,31,329,265,691,778,93  },
-		{ 62,440,136,56,84,3,504,548,555,383,4,17,129,128,507,361,123,59,119,162,14,57,152,328,161,11,202,495,184,27,80,215  },
-		{ 911,617,332,959,206,141,253,244,282,384,110,120,10,260,352,143,951,811,269,373,160,417,93,531,728,203,434,940,137,55,36,717  },
-		{ 120,15,260,141,77,1,515,82,700,351,33,23,450,13,110,326,64,217,269,753,203,137,102,5,165,21,51,291,93,177,373,128  },
-		{ 15,515,700,753,0,1,2,23,13,51,5,9,82,901,180,8,3,4,120,6,7,141,93,12,197,341,10,33,115,730,64,125  },
-		{ 7,104,97,107,356,232,66,560,298,289,14,707,38,568,359,64,20,0,65,324,22,214,92,32,192,5,387,43,712,90,172,95  },
-		{ 6,1,2,66,67,14,74,24,108,29,69,83,458,7,25,38,135,103,36,150,451,114,52,594,75,65,380,18,267,602,19,278  },
-		{ 13,23,51,12,115,21,202,5,457,15,4,1,64,719,0,403,2,3,453,165,99,141,401,128,32,515,10,37,523,197,120,700  },
-		{ 57,59,4,11,412,381,77,53,421,291,250,368,99,14,27,369,803,283,23,108,403,19,339,210,0,401,12,444,236,40,361,736  },
-		{ 15,515,700,1,0,753,2,13,23,5,51,180,3,115,6,7,457,4,9,8,12,82,197,165,141,901,120,719,33,64,21,22  },
-		{ 64,95,180,247,929,146,90,126,197,32,237,60,288,165,316,92,5,13,77,7,217,955,522,22,16,314,132,4,317,10,312,86  },
-		{ 15,1,120,13,23,515,0,51,700,180,141,2,5,202,21,260,753,165,137,33,77,110,197,128,326,7,450,4,102,9,269,12  },
-		{ 14,2,16,46,1,7,24,69,75,35,38,50,29,220,52,140,267,67,18,54,70,309,5,60,92,189,171,87,71,163,58,0  },
-		{ 31,98,127,44,9,299,0,276,293,284,116,49,935,599,105,22,456,201,28,1,39,125,242,137,371,144,131,492,159,272,51,395  },
-		{ 6,27,151,53,573,445,297,113,26,73,436,19,491,250,396,315,45,112,145,58,614,881,25,34,611,200,17,80,70,5,138,631  },
-		{ 32,693,81,788,90,804,403,56,494,21,84,397,202,65,18,77,64,681,214,725,523,784,526,33,102,825,240,0,115,241,817,91  },
-		{ 24,7,14,2,18,16,65,0,108,149,28,69,1,71,154,36,124,35,67,140,189,429,92,68,66,22,55,118,302,150,9,6  },
-		{ 0,68,9,35,65,101,189,212,114,67,124,69,1,154,149,39,230,64,252,16,88,702,103,100,18,336,28,329,520,83,30,755  },
-		{ 5,2,186,29,61,45,17,1,52,48,58,171,155,227,80,209,311,21,14,46,50,106,243,513,334,502,496,38,3,6,32,592  },
-		{ 15,515,700,753,13,1,2,0,3,4,5,23,341,11,10,33,6,51,165,117,153,7,180,12,365,901,77,569,197,115,64,9  },
-		{ 13,15,23,515,0,51,1,700,4,2,753,10,3,5,12,77,33,961,165,457,197,11,115,9,22,102,40,403,202,21,14,59  },
-		{ 15,515,700,753,13,0,1,23,2,33,102,5,4,10,9,3,51,115,77,7,6,341,12,11,217,40,457,196,180,165,8,523  },
-		{ 166,39,30,274,190,100,333,438,530,310,88,252,0,9,539,265,1,656,404,101,625,131,778,254,31,455,676,329,724,158,21,23  },
-		{ 734,148,94,308,431,115,37,89,111,413,79,468,197,629,341,474,569,12,13,873,179,401,11,4,180,23,205,72,59,365,134,51  },
-		{ 539,228,224,219,816,190,30,258,871,840,669,93,406,530,957,187,160,531,748,137,131,88,863,36,728,839,44,213,352,116,202,466  },
-		{ 393,791,125,801,730,551,386,23,31,175,93,98,51,13,144,788,126,203,21,345,116,22,949,110,575,165,326,44,0,4,60,221  },
-		{ 13,23,77,141,0,4,51,2,33,115,64,1,10,3,6,15,11,102,7,217,180,40,515,22,128,177,202,9,700,269,165,5  },
-		{ 2,29,7,70,52,14,1,58,112,46,75,5,171,163,87,220,307,151,186,334,38,66,155,16,69,135,278,45,262,97,6,21  },
-		{ 88,321,213,100,230,435,689,466,1,382,30,352,217,699,410,96,795,36,921,752,190,141,144,180,44,831,317,83,443,31,840,251  },
-		{ 363,411,101,520,354,9,195,668,132,156,447,1,905,364,18,23,765,664,146,5,360,13,121,96,98,31,252,39,100,759,264,551  },
-		{ 13,23,51,730,12,719,453,457,401,475,5,21,403,2,0,1,15,4,3,899,99,32,165,11,515,308,197,115,6,961,700,523  },
-		{ 72,76,89,12,37,4,308,179,38,528,90,431,54,205,148,184,401,57,152,474,23,59,51,245,428,11,32,99,405,316,257,21  },
-		{ 376,20,43,147,470,173,97,595,107,319,414,142,819,5,729,178,858,7,427,32,426,104,14,0,392,362,259,61,230,77,560,246  },
-		{ 202,141,269,494,318,137,51,128,403,4,217,96,77,5,64,177,291,180,15,352,102,10,33,349,2,317,0,341,120,515,21,453  },
-		{ 77,202,33,128,102,318,494,269,13,0,117,23,342,291,403,15,134,51,153,141,177,515,82,137,196,700,203,64,22,351,753,4  },
-		{ 253,110,951,352,499,811,10,854,180,638,244,559,642,752,564,8,141,143,417,341,901,260,206,197,922,661,93,15,498,373,165,911  },
-		{ 141,13,23,180,4,217,5,1,269,317,21,0,2,202,115,51,352,77,3,197,64,341,318,15,291,9,137,93,32,165,515,33  },
-		{ 9,0,18,252,16,101,68,39,24,118,35,109,158,329,28,167,60,364,333,265,49,100,22,419,553,55,1,677,71,7,212,159  },
-		{ 28,109,9,39,0,158,49,22,168,35,55,175,1,65,67,185,194,159,289,95,272,114,30,105,86,584,36,169,254,2,83,24  },
-		{ 15,515,13,700,1,753,2,23,0,3,4,5,33,341,11,51,6,10,197,115,901,180,77,40,102,12,365,165,141,217,7,317  },
-		{ 173,693,104,422,5,18,61,32,102,0,20,13,784,560,33,66,397,526,49,207,29,25,510,707,65,6,11,344,21,263,81,77  },
-		{ 23,13,386,51,308,801,719,221,401,949,21,730,165,421,102,115,125,33,341,670,468,117,770,1,120,6,197,14,403,97,67,958  },
-		{ 0,49,105,16,28,24,159,9,158,320,1,68,35,239,170,18,109,7,55,65,2,95,301,124,347,14,21,154,22,127,286,31  },
-		{ 2,5,1,207,45,29,32,58,76,61,6,263,292,655,72,14,17,476,7,119,52,306,70,64,21,90,186,214,106,38,3,790  },
-		{ 21,6,125,49,13,64,715,66,115,95,197,33,22,32,204,165,56,278,0,408,241,120,4,808,681,350,263,85,81,571,135,509  },
-		{ 612,427,325,107,202,5,376,49,64,392,403,470,21,147,31,788,494,14,362,465,858,98,20,804,518,43,845,318,125,97,725,534  },
-		{ 32,21,76,72,2,1,14,5,241,449,89,38,350,221,155,48,50,292,37,46,45,90,270,54,17,179,214,12,148,430,476,413  },
-		{ 24,0,28,16,7,124,35,154,14,149,65,18,9,68,55,108,175,71,2,1,22,109,92,67,484,336,118,69,302,398,570,420  },
-		{ 1,5,14,2,48,50,38,67,46,21,0,54,45,270,281,12,24,32,155,96,513,103,290,83,61,58,36,17,37,72,69,181  },
-		{ 13,961,569,197,37,15,23,474,515,94,148,111,12,165,629,341,700,79,901,401,51,405,753,10,134,4,115,734,873,11,89,117  },
-		{ 33,23,102,51,13,40,77,128,64,202,141,15,4,12,0,1,2,117,22,11,10,403,153,515,99,318,137,269,139,196,700,134  },
-		{ 0,1,24,67,9,16,18,35,28,69,103,50,5,2,65,12,83,68,7,96,14,22,21,149,75,114,13,133,23,71,218,54  },
-		{ 384,617,940,332,855,911,206,959,434,282,141,10,93,253,244,110,144,268,120,36,352,137,417,203,116,31,44,269,160,201,143,951  },
-		{ 30,93,473,137,31,704,450,652,190,203,800,254,166,274,326,144,269,160,127,303,120,625,88,848,110,435,77,521,349,131,340,744  },
-		{ 53,27,73,26,19,250,297,200,25,630,17,6,611,122,34,42,714,235,472,65,436,14,80,684,690,106,45,113,680,108,64,4  },
-		{ 15,515,1,2,700,0,753,3,5,141,180,4,13,77,33,10,217,6,7,134,11,352,197,64,165,341,317,23,12,115,102,40  },
-		{ 254,530,39,613,688,221,30,31,438,190,228,960,1,44,141,21,180,406,23,166,9,202,13,96,137,48,131,829,317,269,393,51  },
-		{ 9,39,28,35,30,166,158,36,0,175,101,346,364,67,49,68,168,420,88,1,194,131,100,352,55,83,190,64,137,570,86,65  },
-		{ 62,56,3,548,555,507,440,161,34,4,215,136,162,514,361,527,17,14,211,130,328,11,383,123,84,183,38,57,184,152,205,494  },
-		{ 92,126,107,7,356,493,97,279,359,298,16,246,35,60,14,441,362,121,43,423,5,132,392,20,508,230,199,146,232,173,150,414  },
-		{ 15,82,141,515,291,922,349,700,217,260,372,120,351,93,77,753,318,352,373,854,1,326,269,21,13,102,144,202,64,23,203,137  },
-		{ 141,217,352,115,180,13,269,317,752,77,23,21,341,197,5,372,244,291,9,64,51,102,4,1,365,2,165,33,3,48,237,351  },
-		{ 78,47,390,19,130,453,108,27,711,813,730,444,412,283,196,690,123,14,128,26,250,389,650,236,200,65,51,4,34,183,297,73  },
-		{ 34,250,297,80,472,64,495,17,311,3,148,45,667,61,176,53,243,27,90,161,469,141,483,151,62,128,29,4,58,56,5,231  },
-		{ 51,23,33,13,551,77,102,326,421,21,523,120,5,899,453,692,202,153,308,615,115,958,450,401,791,68,221,93,475,18,403,4  },
-		{ 98,223,393,363,411,1,478,834,664,156,284,691,447,791,914,293,354,724,697,9,807,541,759,51,18,421,48,264,948,586,195,848  },
-		{ 7,14,107,232,16,92,2,60,46,5,359,121,24,526,220,620,135,1,172,21,126,314,132,77,18,75,32,278,12,23,52,38  },
-		{ 32,76,2,1,21,72,241,14,5,48,292,89,476,45,720,270,179,90,17,214,148,38,50,29,129,155,350,46,290,227,123,464  },
-		{ 15,515,700,753,13,23,33,77,51,4,102,0,32,202,1,11,128,82,117,141,40,5,110,8,3,90,137,21,10,318,403,165  },
-		{ 66,6,69,2,1,74,14,135,278,267,380,24,29,97,67,38,103,75,7,388,324,25,52,150,87,83,189,357,335,108,204,172  },
-		{ 152,4,339,59,79,471,188,11,77,94,128,33,529,377,12,111,102,202,452,402,216,99,13,542,51,40,474,37,64,291,23,961  },
-		{ 15,515,700,753,1,0,196,13,33,2,77,5,23,102,3,10,9,7,217,4,6,153,117,177,14,457,115,12,40,730,11,134  },
-		{ 17,209,45,106,207,5,255,119,62,2,61,3,263,742,306,655,425,378,32,56,29,136,84,80,311,58,186,240,243,383,14,21  },
-		{ 120,260,450,15,1,23,817,13,515,523,326,5,700,51,82,31,202,64,21,753,318,93,32,269,98,33,351,77,102,125,457,165  },
-		{ 116,492,268,93,23,206,203,0,551,918,13,51,8,22,417,940,120,10,499,31,949,791,125,523,165,473,341,730,421,959,401,391  },
-		{ 15,515,700,753,165,13,0,1,197,23,4,82,120,2,180,12,260,719,8,3,386,117,5,523,901,11,341,51,10,9,141,351  },
-		{ 14,24,69,7,2,66,108,1,67,6,36,398,18,267,150,97,29,38,83,149,65,74,28,0,189,71,388,16,273,124,46,22  },
-		{ 330,96,523,335,367,662,141,839,1,922,372,615,244,717,269,443,418,352,403,692,217,854,752,180,36,64,498,576,349,201,98,284  },
-		{ 184,90,257,205,245,229,57,152,769,17,524,5,32,497,45,432,619,2,452,266,4,106,1,21,179,59,76,3,460,292,381,128  },
-		{ 7,14,16,2,46,5,70,107,87,13,58,307,92,32,38,23,202,0,172,24,18,21,60,128,77,35,20,10,9,4,171,112  },
-		{ 7,66,140,16,14,92,97,69,267,172,189,24,380,2,35,60,298,451,230,135,314,74,150,71,38,357,6,330,67,423,21,443  },
-		{ 121,167,354,132,18,446,147,101,212,146,407,16,55,35,647,191,20,271,199,68,60,259,463,107,9,126,363,7,195,43,14,411  },
-		{ 76,90,179,32,205,21,184,460,257,288,45,245,316,5,57,152,241,2,358,1,229,72,524,148,48,769,17,4,12,38,14,720  },
-		{ 147,259,178,878,427,465,581,198,786,798,142,534,325,929,20,362,35,132,107,376,43,5,279,77,49,146,70,202,590,771,33,14  },
-		{ 473,93,450,778,141,30,855,466,144,203,330,530,88,523,459,372,201,617,839,704,254,321,934,326,39,36,82,717,332,213,559,403  },
-		{ 523,475,51,899,730,453,23,719,403,33,457,13,421,386,4,120,117,196,102,153,15,801,450,817,515,260,202,11,700,99,165,125  },
-		{ 15,1,13,515,0,2,700,5,23,753,4,3,341,317,10,115,180,11,33,64,217,77,117,165,197,7,6,365,9,141,102,134  },
-		{ 19,4,119,40,33,202,27,84,102,56,77,73,504,485,26,494,757,63,862,59,23,300,25,12,128,11,5,13,342,880,469,6  },
-		{ 32,20,2,13,5,21,23,6,12,38,43,29,64,7,95,51,61,207,48,147,90,178,17,182,49,0,115,202,52,362,37,22  },
-		{ 339,188,11,79,4,94,377,12,99,111,542,102,37,33,474,51,471,40,453,152,77,13,59,403,342,23,117,57,475,134,128,38  },
-		{ 34,128,283,176,495,231,318,432,503,275,529,527,161,53,3,202,56,291,585,469,73,17,14,412,57,27,80,245,250,381,402,51  },
-		{ 15,515,13,700,1,217,141,120,23,180,753,115,365,51,317,341,77,260,0,291,110,137,202,5,21,269,64,36,349,2,4,10  },
-		{ 13,15,961,515,700,753,4,12,2,457,3,11,197,51,37,569,115,23,5,0,99,10,1,134,6,111,165,33,72,40,38,79  },
-		{ 15,515,700,753,13,1,0,2,23,33,5,3,10,4,9,115,7,102,6,51,12,217,77,11,40,457,569,341,117,317,14,719  },
-		{ 5,76,2,32,292,214,45,1,129,519,123,179,90,710,17,29,460,72,14,207,21,249,58,205,464,263,618,48,6,245,3,257  },
-		{ 72,76,32,4,21,12,38,23,99,54,89,3,14,17,51,57,11,90,13,488,179,2,59,148,45,37,5,115,401,1,10,421  },
-		{ 98,223,393,1,834,264,284,791,724,293,478,772,697,909,363,682,905,447,541,821,411,51,421,9,807,48,765,31,730,96,386,410  },
-		{ 341,13,509,8,23,638,165,901,762,10,569,242,391,197,873,642,506,499,629,961,15,180,116,456,206,546,417,1,338,457,515,867  },
-		{ 1,2,5,50,14,38,46,114,0,36,29,22,218,65,86,96,137,21,133,285,12,10,323,181,17,58,51,23,67,7,28,6  },
-		{ 481,878,202,13,5,23,182,32,269,21,1,318,77,142,557,494,141,33,640,137,70,291,2,51,260,415,929,403,120,58,4,259  },
-		{ 15,515,700,753,1,4,13,0,2,5,341,3,11,180,134,12,10,317,197,365,33,21,23,165,117,6,77,7,217,37,32,498  },
-		{ 25,119,19,6,26,42,27,17,4,790,45,814,2,469,483,84,122,1,0,33,32,128,76,80,611,113,73,56,5,240,202,77  },
-		{ 14,2,7,1,24,0,65,6,16,69,67,22,124,28,108,5,18,36,86,10,38,46,66,398,289,168,12,83,21,23,610,13  },
-		{ 51,23,128,13,15,202,12,120,33,64,141,82,10,515,0,403,700,3,1,99,117,269,153,165,753,5,318,197,102,260,2,137  },
-		{ 16,35,24,0,9,18,7,1,68,69,50,71,103,65,67,189,133,23,28,13,60,537,149,335,75,21,64,5,114,2,12,14  },
-		{ 754,803,133,576,880,543,2,1,657,50,14,38,46,5,29,67,218,36,58,171,52,96,24,103,775,0,114,83,181,54,65,45  },
-		{ 21,32,5,3,2,17,14,72,76,1,12,23,38,51,4,54,10,0,89,13,99,137,45,36,421,115,543,11,22,128,221,48  },
-		{ 434,384,268,144,855,940,617,206,332,116,93,911,959,282,203,137,141,489,44,120,10,110,244,36,98,31,269,253,367,417,160,9  },
-		{ 15,2,1,0,13,515,5,700,3,23,180,217,141,10,753,4,117,6,77,33,64,7,11,197,352,317,341,134,165,115,12,9  },
-		{ 2,113,6,25,1,0,29,4,7,833,5,45,32,61,128,19,77,151,74,145,64,42,14,210,655,106,59,177,27,17,21,738  },
-		{ 116,268,918,203,551,31,8,692,206,791,403,499,417,93,940,421,0,23,22,120,13,523,44,51,299,473,959,1,10,475,202,125  },
-		{ 107,126,132,612,362,279,20,146,259,493,199,121,590,43,660,147,35,376,939,60,941,534,683,5,0,953,16,7,49,649,595,470  },
-		{ 15,515,700,753,13,1,0,23,2,33,77,4,3,51,5,102,115,10,9,341,6,7,11,342,217,12,120,180,40,317,141,8  },
-		{ 53,27,17,161,469,378,73,527,19,136,383,250,495,56,862,26,62,84,80,106,200,4,34,14,440,297,3,128,585,5,129,123  },
-		{ 17,45,209,106,5,207,243,454,119,255,2,263,186,290,29,3,21,62,425,61,84,32,58,56,48,408,655,136,306,14,742,227  },
-		{ 4,152,59,452,128,79,216,11,339,471,529,188,94,77,202,12,291,33,318,377,99,51,23,5,402,349,32,474,102,13,205,111  },
-		{ 15,515,700,753,1,0,2,13,3,5,23,4,180,51,115,9,6,12,7,8,197,33,10,961,901,77,141,752,110,22,120,341  },
-		{ 951,752,638,811,351,642,180,253,10,341,197,901,110,873,8,244,15,352,165,898,143,515,564,762,499,55,365,700,82,753,141,854  },
-		{ 6,262,197,350,74,26,115,509,841,583,165,38,21,13,47,50,235,19,33,324,453,4,308,196,138,99,64,903,675,1,223,130  },
-		{ 125,165,391,23,386,221,21,13,558,457,51,867,197,115,401,758,77,97,308,791,7,180,48,120,963,451,743,89,603,134,403,450  },
-		{ 1,14,2,5,16,46,7,38,58,24,50,0,69,48,35,67,54,18,12,75,21,45,513,155,430,37,270,9,61,163,223,32  },
-		{ 23,13,51,0,12,15,4,1,115,2,515,453,10,457,5,3,202,21,165,700,403,11,37,64,77,401,9,197,753,59,475,99  },
-		{ 129,84,17,56,27,495,19,548,80,123,162,378,3,504,161,469,618,73,40,53,4,26,205,184,106,183,62,6,257,128,862,12  },
-		{ 28,9,22,49,109,1,67,0,39,55,168,158,83,36,35,86,420,194,185,159,95,105,69,208,272,103,50,114,2,254,169,30  },
-		{ 242,391,8,456,116,13,23,492,341,165,867,51,499,457,479,638,338,509,719,10,1,642,417,762,401,93,206,268,901,569,22,197  },
-		{ 211,162,248,130,57,4,41,556,507,266,183,152,305,361,11,129,62,229,38,471,514,313,157,300,377,3,440,128,123,328,339,59  },
-		{ 7,92,97,16,298,140,60,126,14,35,279,314,232,246,43,230,508,173,71,107,423,24,150,779,20,189,66,18,607,21,0,653  },
-		{ 15,515,700,753,1,0,2,13,23,5,3,180,51,901,6,4,7,12,9,115,8,457,165,82,120,197,10,64,141,341,22,117  },
-		{ 0,18,403,25,523,74,6,24,42,91,22,102,13,51,49,193,475,681,95,85,730,64,899,397,273,750,247,673,32,805,757,288  },
-		{ 56,0,18,65,33,554,84,343,64,6,90,561,22,19,899,108,27,63,289,475,240,467,370,32,233,214,24,123,95,287,28,194  },
-		{ 31,98,127,9,0,44,293,105,395,299,49,242,28,22,599,116,1,284,276,125,456,685,763,159,272,623,23,935,393,144,201,137  },
-		{ 1,5,2,14,38,46,50,48,21,7,58,45,270,61,155,171,0,290,69,32,29,54,67,16,24,666,663,17,37,75,502,52  },
-		{ 23,51,13,453,457,12,719,4,15,99,401,2,961,3,11,730,475,515,0,1,165,115,629,700,14,17,403,40,5,33,37,64  },
-		{ 968,967,966,965,964,963,962,961,960,959,958,957,956,955,954,953,952,951,950,949,948,947,946,945,944,943,942,941,940,939,938,937  },
-		{ 2,1,14,29,67,103,6,46,52,75,24,133,38,218,83,309,36,108,70,114,96,5,238,74,25,26,220,236,65,50,69,87  },
-		{ 7,71,16,92,24,60,14,97,150,140,35,189,149,298,18,230,43,508,2,423,69,0,38,314,66,279,399,517,251,20,232,273  },
-		{ 23,1,120,51,13,202,77,141,260,21,15,5,128,82,2,450,269,165,102,318,48,32,137,515,125,64,12,115,351,180,33,7  },
-		{ 77,13,33,23,64,51,4,102,141,128,40,1,2,202,0,6,177,115,137,15,59,10,11,7,269,22,515,180,318,3,700,95  },
-		{ 101,9,18,363,264,520,411,604,676,682,905,271,16,821,167,0,621,364,39,100,121,118,166,781,647,252,1,848,447,265,404,60  },
-		{ 144,203,326,382,166,418,93,88,96,822,1,141,859,77,744,438,110,269,921,367,521,274,100,39,494,120,403,473,217,576,13,291  },
-		{ 13,21,180,125,5,23,191,32,18,16,146,199,115,24,165,118,0,225,22,1,60,197,64,901,375,241,48,12,408,71,522,818  },
-		{ 15,515,700,753,13,0,23,8,1,51,82,102,2,33,4,9,180,165,5,77,10,110,12,197,120,260,18,326,351,403,22,457  },
-		{ 33,77,102,64,13,23,128,51,141,202,1,40,0,2,117,10,15,4,6,318,269,134,22,515,180,115,177,153,137,196,3,700  },
-		{ 174,544,104,525,74,0,151,25,6,624,29,66,2,636,81,45,204,177,64,416,7,644,5,138,222,319,355,77,22,122,789,216  },
-		{ 141,304,372,352,291,947,177,269,128,954,77,349,217,202,64,318,498,437,102,864,86,13,115,180,137,5,210,197,32,950,678,7  },
-		{ 161,200,53,17,714,27,34,73,472,62,585,56,440,383,136,78,527,19,4,3,106,361,14,250,80,514,377,84,322,390,862,548  },
-		{ 32,76,72,21,38,14,89,54,12,37,2,241,5,428,17,1,181,221,350,45,3,4,449,90,148,179,99,292,794,770,477,46  },
-		{ 33,23,128,64,141,13,77,51,102,202,2,15,1,3,40,10,5,153,269,515,165,0,117,196,180,318,6,700,137,134,120,22  },
-		{ 96,137,30,0,9,39,840,202,669,406,141,530,613,1,180,88,22,160,679,576,28,403,31,219,49,228,829,100,36,15,10,856  },
-		{ 180,141,352,1,15,752,115,0,217,365,2,515,13,901,341,317,23,4,197,700,269,5,3,31,753,244,21,165,253,202,51,44  },
-		{ 1,2,67,0,28,50,83,65,14,46,103,114,24,38,36,9,69,5,18,7,22,133,55,218,16,124,29,54,96,160,12,480  },
-		{ 180,115,352,317,365,217,752,901,141,15,341,1,515,253,700,0,753,873,2,197,31,137,165,244,4,120,160,44,98,5,202,3  },
-		{ 5,32,347,49,13,21,95,713,23,1,77,33,60,64,107,4,126,928,296,850,0,241,197,102,652,195,180,534,165,153,379,10  },
-		{ 341,180,365,901,317,115,15,752,515,700,217,873,753,82,0,110,197,141,951,165,1,564,13,351,253,12,10,3,2,4,308,244  },
-		{ 17,45,21,3,106,5,155,38,227,32,2,209,62,54,12,243,14,181,552,587,46,540,207,794,37,48,430,119,255,221,770,29  },
-		{ 16,24,35,18,7,0,50,1,9,14,75,69,2,5,12,21,60,13,67,71,23,48,10,108,223,181,189,103,46,64,92,51  },
-		{ 127,13,98,165,308,23,286,293,258,51,219,395,197,115,301,401,31,391,22,105,457,170,239,276,55,338,629,116,180,479,509,569  },
-		{ 539,213,748,840,957,669,30,466,88,217,144,251,863,190,137,93,230,228,679,352,317,203,617,321,258,530,160,219,96,831,816,689  },
-		{ 5,48,1,21,2,14,0,36,12,38,32,54,430,181,50,270,72,99,281,45,17,10,46,22,37,218,67,3,290,76,23,51  },
-		{ 13,23,0,4,33,51,2,115,141,1,77,217,180,10,9,317,3,102,11,5,15,197,7,202,22,165,40,64,515,6,341,31  },
-		{ 13,15,117,515,23,12,37,134,165,700,38,54,457,753,51,64,153,197,14,10,33,82,961,0,99,89,115,719,141,3,4,1  },
-		{ 5,21,2,3,1,32,14,12,48,17,0,10,51,23,38,22,4,72,13,54,36,45,137,76,99,114,86,37,11,64,540,430  },
-		{ 202,128,77,318,291,33,269,102,275,141,494,342,40,678,0,177,20,210,402,7,4,5,137,6,13,450,403,32,49,120,23,22  },
-		{ 1,2,24,14,67,46,69,50,38,103,16,18,75,35,83,29,52,96,5,108,0,7,54,71,149,394,236,309,70,133,220,58  },
-		{ 15,515,1,700,0,2,753,13,23,5,180,3,51,4,165,457,12,197,115,6,7,21,9,141,8,901,33,82,120,77,10,110  },
-		{ 0,28,65,14,67,2,124,24,1,9,7,69,55,154,36,16,46,114,175,35,83,22,429,18,109,149,68,189,108,336,251,133  },
-		{ 56,162,403,3,129,775,99,161,17,40,527,33,880,4,14,128,475,12,548,23,102,202,361,117,34,184,383,200,183,196,64,53  },
-		{ 151,2,29,58,112,45,186,113,5,70,52,1,311,6,315,66,61,7,74,27,631,17,80,87,287,243,209,227,14,491,19,869  },
-		{ 6,1,74,2,75,29,25,66,26,70,52,138,67,324,357,42,19,220,14,85,87,108,38,451,309,103,24,69,380,135,114,65  },
-		{ 15,515,700,13,23,0,1,120,753,51,180,2,260,202,5,141,77,102,9,450,115,21,197,165,7,137,110,33,12,269,901,4  },
-		{ 5,45,17,2,14,46,48,38,181,50,155,3,186,54,61,29,21,227,281,80,540,106,12,400,52,1,58,32,328,171,209,487  },
-		{ 16,18,265,121,158,35,60,9,39,7,329,105,252,68,24,1,132,167,159,22,0,49,286,101,21,146,23,327,120,709,5,14  },
-		{ 108,467,283,56,389,650,123,412,33,177,899,475,216,453,269,349,619,65,51,730,403,670,23,196,523,128,84,13,401,789,503,543  },
-		{ 514,3,11,377,328,4,361,507,57,403,14,880,130,485,176,215,236,38,152,102,211,56,62,757,54,585,300,556,34,555,40,229  },
-		{ 3,555,62,266,130,99,507,139,514,12,152,229,215,305,57,40,440,33,403,471,38,56,475,14,361,313,775,328,196,548,123,23  },
-		{ 120,202,318,15,77,13,1,450,33,269,515,260,5,128,494,51,23,700,102,141,40,753,326,403,817,137,523,21,177,922,342,7  },
-		{ 15,1,515,23,0,13,700,2,51,753,180,5,165,21,197,12,3,120,115,4,141,6,9,7,457,33,386,202,82,8,31,341  },
-		{ 15,180,515,82,351,700,10,317,753,115,217,365,141,898,33,901,13,23,110,854,752,77,1,197,4,341,143,36,64,352,102,9  },
-		{ 104,289,66,707,214,90,712,64,97,173,20,0,414,194,874,43,32,7,568,560,65,38,426,312,715,192,376,74,835,5,324,147  },
-		{ 84,56,0,554,63,65,453,249,123,643,18,26,847,475,511,403,416,561,524,289,370,73,9,19,45,42,719,194,27,467,33,730  },
-		{ 21,346,13,350,308,826,197,101,352,68,570,0,165,23,9,841,115,100,509,694,221,230,35,217,569,88,124,749,1,777,212,154  },
-		{ 16,92,7,24,60,18,35,140,126,14,50,71,46,330,2,75,246,5,121,267,571,1,230,309,220,0,9,64,146,236,54,108  },
-		{ 82,15,515,898,365,700,180,33,341,753,77,901,10,115,55,351,21,5,1,4,13,102,36,217,2,165,752,120,197,117,11,317  },
-		{ 16,24,35,18,69,71,140,1,103,7,189,68,0,50,9,108,2,133,60,267,230,46,149,67,167,118,92,14,75,21,191,38  },
-		{ 60,71,16,18,7,20,43,118,35,68,375,28,608,0,175,566,154,92,14,149,628,33,22,13,2,10,279,23,107,356,55,117  },
-		{ 187,258,871,295,201,434,219,224,489,384,268,110,261,839,44,699,93,116,36,131,141,228,144,160,940,567,244,406,137,574,98,253  },
-		{ 66,7,97,172,192,712,232,324,204,74,43,448,387,426,568,20,526,107,104,135,356,729,173,0,22,5,32,95,2,64,500,560  },
-		{ 15,515,700,753,1,4,0,341,13,3,134,2,5,33,11,77,12,10,23,197,365,901,7,40,217,32,21,6,51,180,961,37  },
-		{ 0,28,24,9,35,65,16,124,68,55,109,154,7,39,22,149,158,14,175,1,49,252,18,71,2,168,289,419,108,420,67,101  },
-		{ 7,16,14,92,2,46,140,24,220,35,38,60,75,1,50,18,87,54,5,126,29,52,278,262,314,107,71,21,172,135,330,394  },
-		{ 7,92,16,14,172,126,2,60,140,35,135,314,278,46,24,38,232,107,330,66,5,18,150,246,230,97,52,1,121,563,279,21  },
-		{ 6,26,235,53,297,436,27,19,25,73,113,445,90,214,65,42,64,289,250,611,624,32,45,648,614,17,85,491,34,122,200,416  },
-		{ 352,141,1,217,854,752,351,180,244,36,110,661,82,258,816,160,295,219,567,224,230,269,922,144,260,268,93,201,137,116,489,202  },
-		{ 16,60,35,18,126,107,68,191,92,121,7,14,598,20,493,279,167,446,118,0,28,43,463,55,24,212,375,566,9,150,575,21  },
-		{ 15,1,515,2,4,13,0,700,3,5,23,753,341,77,51,115,33,11,180,10,197,141,6,165,7,901,102,40,9,202,217,12  },
-		{ 23,51,13,202,21,5,1,120,15,137,128,125,32,2,12,141,33,165,64,515,403,318,700,48,180,7,6,450,115,523,475,260  },
-		{ 131,716,224,371,219,187,737,616,385,254,9,98,105,924,31,258,836,39,127,578,49,916,44,761,272,137,944,159,0,242,442,22  },
-		{ 15,515,700,1,753,2,5,0,4,13,3,180,11,141,197,10,341,217,33,134,165,6,77,7,317,12,352,64,365,32,102,40  },
-		{ 66,74,7,173,174,29,192,2,222,20,226,43,353,52,712,6,0,138,500,204,97,145,64,104,426,673,355,90,25,5,65,87  },
-		{ 5,259,786,534,590,493,279,49,13,581,465,21,929,35,941,132,147,32,23,612,362,626,107,121,178,0,146,61,48,939,10,18  },
-		{ 2,14,16,7,278,69,135,140,46,24,267,35,92,38,1,189,29,52,309,60,66,75,71,172,74,357,18,87,67,6,230,5  },
-		{ 165,13,308,197,391,23,401,15,51,457,180,509,115,569,3,629,961,719,34,758,317,734,14,29,46,2,17,901,38,453,5,217  },
-		{ 1,22,2,14,0,28,7,168,67,49,65,24,36,95,5,105,55,35,12,46,69,16,114,159,194,50,10,9,158,83,164,109  },
-		{ 34,453,3,196,130,14,322,11,47,51,377,236,361,4,730,153,514,711,57,440,62,17,161,108,176,59,485,56,162,412,202,117  },
-		{ 18,16,21,23,48,13,24,35,121,5,156,60,51,1,7,132,141,221,163,115,0,271,447,340,363,202,125,71,2,781,22,698  },
-		{ 165,13,457,23,197,961,629,569,341,41,12,38,401,901,54,51,115,17,15,509,421,37,62,45,719,57,32,328,117,758,157,99  },
-		{ 2,1,77,141,33,64,3,102,0,23,13,5,128,10,6,15,180,202,269,40,51,515,7,165,137,117,318,4,700,153,197,352  },
-		{ 68,212,0,124,101,9,154,16,562,191,21,149,65,24,35,1,118,167,818,350,520,100,722,841,264,71,13,302,478,23,375,346  },
-		{ 98,23,48,598,13,293,541,21,125,121,51,807,0,31,35,259,126,7,386,1,223,783,10,107,199,20,221,144,342,963,49,64  },
-		{ 21,13,5,586,1,23,167,48,33,781,647,49,165,18,51,271,77,32,761,118,0,82,391,22,146,141,459,31,197,156,115,4  },
-		{ 2,1,5,61,29,7,58,45,14,6,425,32,70,52,290,738,207,21,72,112,66,76,655,17,186,46,64,263,38,0,128,87  },
-		{ 39,265,9,100,1,333,363,101,18,411,447,254,166,310,31,98,264,30,639,404,156,286,16,93,593,203,272,682,0,905,44,821  },
-		{ 6,2,1,19,29,51,26,108,25,74,5,23,14,114,13,386,133,103,42,66,453,70,309,138,719,324,65,38,64,96,52,75  },
-		{ 20,43,356,107,49,858,595,7,414,359,0,5,392,319,97,612,422,819,14,376,173,246,22,470,147,427,230,92,197,33,683,95  },
-		{ 0,9,68,35,65,67,114,101,28,1,124,175,336,69,154,103,83,24,189,133,39,16,50,7,2,149,55,251,18,345,230,36  },
-		{ 23,13,51,15,0,1,515,115,165,2,5,12,700,202,4,21,141,457,753,197,10,3,180,120,32,9,318,11,453,64,6,269  },
-		{ 121,195,60,16,126,107,98,271,146,407,132,35,1,167,199,223,493,191,279,20,18,5,43,7,21,92,48,393,0,362,212,467  },
-		{ 31,44,299,116,393,144,492,456,268,22,105,0,367,918,384,434,127,489,98,9,963,125,242,948,1,28,206,49,36,51,93,293  },
-		{ 23,13,457,51,165,401,719,758,197,453,961,629,308,14,15,12,730,3,386,569,391,29,739,515,34,828,832,901,115,514,670,341  },
-		{ 105,36,131,22,180,115,341,127,169,1,9,31,64,98,44,365,317,141,272,143,160,55,219,86,197,776,239,187,0,535,13,752  },
-		{ 1,6,2,14,66,25,29,5,108,67,65,114,19,38,26,52,74,7,24,18,69,86,36,388,64,51,17,83,23,46,42,75  },
-		{ 51,386,23,453,719,13,730,6,457,670,758,19,401,165,2,475,47,26,899,14,108,17,1,5,197,29,894,754,236,74,27,285  },
-		{ 252,18,9,101,121,16,132,0,419,167,364,60,604,35,265,363,146,271,39,158,68,109,28,329,848,24,647,907,682,159,212,55  },
-		{ 283,503,128,432,26,193,63,269,789,529,102,122,389,275,678,6,25,318,445,4,342,27,573,605,177,862,643,291,216,57,235,59  },
-		{ 2,1,29,75,69,52,14,6,46,74,87,7,220,226,278,38,135,66,267,70,16,262,25,24,380,324,357,140,67,394,97,222  },
-		{ 97,298,69,7,66,140,189,24,16,267,172,423,60,150,14,314,92,71,81,501,43,35,74,6,517,232,149,607,83,330,18,2  },
-		{ 475,421,403,899,51,805,523,958,453,817,23,615,401,801,120,326,202,670,494,730,450,386,115,629,260,576,77,365,569,0,165,13  },
-		{ 7,20,14,128,77,97,112,202,2,177,16,415,269,318,275,66,107,43,141,414,135,38,307,10,58,0,6,291,32,5,4,40  },
-		{ 24,14,7,0,2,1,22,28,16,65,168,124,35,67,108,109,18,49,10,149,69,158,5,95,289,12,55,6,36,71,46,21  },
-		{ 26,80,27,73,122,25,19,17,6,42,684,209,445,573,667,106,45,690,4,611,255,680,297,495,65,59,128,119,483,113,64,53  },
-		{ 107,259,362,376,465,20,470,147,595,534,612,683,660,43,5,49,581,0,858,35,427,246,97,786,178,356,14,21,142,878,7,279  },
-		{ 131,30,228,190,856,406,224,88,219,530,863,613,778,274,944,816,187,39,100,160,258,31,44,93,1,321,539,36,871,137,435,531  },
-		{ 113,6,311,25,45,491,80,611,27,26,209,667,17,73,122,42,684,396,19,85,106,5,614,4,2,255,151,29,1,64,648,61  },
-		{ 15,515,700,753,0,1,23,51,120,2,13,82,5,260,9,4,341,77,180,115,141,10,7,12,450,8,202,901,197,351,165,93  },
-		{ 219,127,98,258,395,421,924,293,242,201,697,105,276,51,308,23,453,272,401,944,512,137,13,31,284,567,386,365,116,131,964,125  },
-		{ 15,180,352,141,515,752,217,82,1,317,854,700,351,753,115,341,110,13,260,120,21,36,33,898,23,10,5,365,4,160,901,137  },
-		{ 129,123,17,257,162,184,205,249,183,769,5,80,3,4,229,130,119,45,90,99,618,106,57,497,12,128,2,84,59,152,27,40  },
-		{ 33,102,23,77,64,128,51,13,0,202,10,141,40,15,1,22,117,137,2,86,4,403,269,153,515,196,65,11,700,115,99,5  },
-		{ 7,14,2,16,172,107,46,92,5,135,35,202,294,87,38,232,29,97,20,21,24,1,60,220,66,43,12,0,126,52,54,70  },
-		{ 403,576,615,523,475,326,805,817,494,421,51,202,120,450,137,453,23,859,260,401,402,77,33,670,0,958,15,197,386,515,165,480  },
-		{ 141,352,217,137,0,180,202,349,9,269,23,51,115,291,77,372,13,317,120,752,365,351,93,22,2,341,64,10,82,854,28,18  },
-		{ 1,23,13,51,202,141,5,165,21,15,120,180,64,2,197,125,33,102,12,7,137,515,48,128,269,318,93,700,0,403,9,4  },
-		{ 25,151,6,145,122,29,174,45,113,74,4,665,42,138,2,614,416,287,19,348,746,0,66,26,1,7,64,243,311,396,81,624  },
-		{ 30,190,254,166,100,382,731,829,88,131,264,795,9,93,625,274,438,1,578,613,716,31,44,39,530,36,616,921,265,203,160,77  },
-		{ 132,5,21,13,1,23,32,195,379,687,156,121,626,296,48,70,850,146,51,82,883,771,35,49,652,407,60,4,260,0,845,33  },
-		{ 9,254,0,49,272,131,39,159,688,101,105,578,518,158,286,28,327,333,68,224,252,219,344,16,22,1,716,31,30,228,24,890  },
-		{ 16,7,35,60,18,20,14,68,9,0,28,118,43,92,126,55,107,2,101,154,24,71,5,202,121,109,22,252,21,97,1,621  },
-		{ 15,515,700,753,13,1,341,2,0,4,3,5,11,23,10,33,117,12,901,197,6,134,77,8,165,317,21,365,217,7,17,40  },
-		{ 78,19,444,47,26,390,27,453,130,813,108,730,711,65,412,122,51,680,113,235,690,196,630,283,128,236,14,64,73,53,200,445  },
-		{ 2,7,29,5,61,6,45,1,66,113,112,14,52,315,738,128,32,151,74,16,20,64,70,21,592,0,25,4,425,43,491,222  },
-		{ 145,112,74,66,6,29,26,70,19,396,25,87,2,287,135,151,138,222,5,226,42,122,7,307,1,644,45,58,113,651,635,632  },
-		{ 92,16,7,60,126,24,140,35,14,232,18,121,246,71,46,267,172,150,107,314,132,146,230,2,278,108,330,199,236,5,38,572  },
-		{ 13,115,197,538,569,341,98,55,165,127,365,762,219,286,844,23,170,206,734,638,535,901,169,253,629,0,873,509,180,10,332,258  },
-		{ 58,151,74,53,287,27,29,396,6,70,2,73,5,52,112,26,651,1,297,113,17,75,19,45,334,445,145,34,315,549,436,331  },
-		{ 214,289,90,874,104,751,64,65,312,835,204,249,750,194,74,81,875,32,519,288,348,0,174,247,636,715,138,192,784,6,524,280  },
-		{ 9,39,28,35,30,0,166,49,1,175,439,158,64,346,36,101,67,364,86,88,274,100,168,55,23,10,420,22,190,141,505,180  },
-		{ 341,901,15,515,700,753,1,365,10,0,569,180,2,197,115,31,165,3,5,4,44,22,317,13,9,951,23,253,116,143,762,93  },
-		{ 120,202,77,450,260,15,128,318,102,515,494,13,817,700,269,5,403,51,1,33,23,753,82,326,141,342,291,137,21,523,351,32  },
-		{ 13,115,241,64,180,32,125,197,165,4,118,22,21,23,16,247,237,28,225,191,95,141,167,5,0,341,288,35,459,18,177,24  },
-		{ 16,24,35,14,1,2,7,69,18,46,60,50,267,140,71,189,108,38,75,92,0,5,9,230,67,21,309,335,54,236,394,220  },
-		{ 15,515,700,753,898,180,901,341,197,638,10,165,33,1,115,4,77,365,317,13,102,217,117,0,5,2,253,3,82,569,21,752  },
-		{ 193,523,18,84,56,730,233,65,4,817,90,33,643,403,91,511,453,240,59,11,214,51,719,196,153,475,32,123,64,847,102,561  },
-		{ 112,29,151,2,74,6,66,7,222,145,287,45,5,624,52,25,113,416,58,122,19,70,186,204,4,87,644,549,337,884,32,0  },
-		{ 13,0,23,2,1,15,33,3,77,515,141,5,4,217,10,51,64,180,700,115,6,117,11,7,753,40,102,165,197,22,317,153  },
-		{ 28,0,1,67,65,9,2,114,83,69,103,50,36,22,55,24,46,14,124,109,35,7,16,38,133,160,389,323,18,12,154,5  },
-		{ 121,132,18,167,271,146,101,363,621,9,411,647,16,354,520,60,212,932,1,806,55,0,195,446,68,35,31,364,777,252,407,118  },
-		{ 26,6,85,396,122,624,25,19,42,445,64,648,573,416,174,680,665,214,45,348,90,65,194,145,113,881,138,289,112,436,297,544  },
-		{ 16,146,18,92,24,199,60,71,121,126,35,108,156,953,271,674,132,7,32,640,360,246,649,118,21,95,5,517,14,9,1,314  },
-		{ 51,13,23,453,475,730,719,15,457,403,64,115,33,95,4,523,3,12,21,6,899,102,5,128,401,202,11,141,308,515,22,125  },
-		{ 151,396,6,53,27,113,58,26,73,112,74,287,45,29,297,19,145,70,138,445,315,436,34,2,17,573,5,61,549,491,1,80  },
-		{ 223,1,888,774,260,98,269,385,349,202,96,141,421,622,730,863,318,697,87,453,393,418,922,834,751,5,163,335,120,291,352,30  },
-		{ 16,60,92,35,126,121,7,150,246,18,107,1,598,24,167,195,14,97,71,279,98,441,191,199,517,146,356,223,298,271,230,0  },
-		{ 22,1,105,28,239,170,0,55,95,31,36,301,2,320,98,127,9,49,44,64,35,67,10,86,5,12,109,23,168,13,21,312  },
-		{ 2,6,5,207,292,76,1,119,45,32,17,29,61,306,790,58,240,106,14,64,214,151,476,710,7,72,84,128,4,179,70,25  },
-		{ 51,23,221,254,115,13,438,530,125,48,21,39,541,960,386,49,1,613,15,840,228,308,627,131,688,401,5,326,421,158,165,83  },
-		{ 1,5,2,0,12,22,21,36,10,14,48,86,23,13,32,54,3,4,28,65,51,50,137,37,208,114,9,38,17,7,281,202  },
-		{ 363,23,447,182,296,340,1,93,698,478,379,156,284,144,18,269,21,98,141,70,668,411,664,658,110,914,67,937,180,691,335,291  },
-		{ 17,32,45,498,41,115,180,197,106,62,54,38,546,165,13,155,468,509,341,243,241,217,542,15,57,536,428,51,117,721,292,129  },
-		{ 32,95,64,246,22,92,180,13,5,652,125,241,638,237,7,49,4,126,21,115,197,296,888,316,0,165,774,23,16,392,1,534  },
-		{ 15,515,700,753,33,341,13,217,4,141,77,23,180,317,1,10,102,351,82,115,40,5,854,21,137,11,352,901,365,117,197,0  },
-		{ 15,120,1,82,93,217,515,260,77,141,13,110,700,351,352,23,180,753,21,854,202,317,64,349,269,51,165,137,5,128,291,36  },
-		{ 13,23,51,141,77,0,33,4,115,64,2,10,102,202,217,128,1,177,269,11,7,22,6,21,32,9,180,40,15,3,165,318  },
-		{ 478,264,1,520,98,724,9,682,223,664,21,759,13,772,604,100,23,363,411,48,821,5,0,905,909,447,31,265,88,101,166,39  },
-		{ 20,29,7,2,77,416,6,128,33,5,0,113,104,32,43,13,491,66,23,21,102,51,74,210,202,525,64,318,10,81,174,14  },
-		{ 2,1,5,14,7,58,61,29,45,290,46,38,52,21,32,270,6,592,425,0,75,155,16,48,17,50,72,70,207,24,263,663  },
-		{ 80,6,17,209,106,26,483,113,19,469,255,25,378,27,495,833,45,64,161,2,61,667,76,742,32,90,445,5,814,65,887,119  },
-		{ 98,223,393,1,354,834,195,791,447,697,284,293,360,541,781,156,51,807,18,664,421,411,163,668,48,31,591,765,883,386,948,23  },
-		{ 679,141,816,36,93,406,876,144,228,137,1,180,669,21,332,251,5,269,116,187,96,351,202,752,317,64,203,831,574,466,855,345  },
-		{ 15,515,700,13,1,753,2,0,23,341,3,5,4,10,51,11,33,165,6,7,115,197,12,64,180,153,217,77,9,569,901,317  },
-		{ 13,23,202,51,5,21,403,15,120,64,1,450,128,141,12,523,33,165,494,125,2,515,269,7,48,102,318,95,260,180,453,197  },
-		{ 16,18,24,60,71,92,146,246,199,35,140,7,9,118,121,108,167,230,126,132,0,640,156,14,68,133,267,360,649,271,64,55  },
-		{ 269,141,678,177,202,77,128,318,33,947,40,120,291,349,102,137,64,352,210,864,461,498,13,342,196,23,275,450,954,0,205,111  },
-		{ 16,24,92,18,71,60,35,7,108,191,167,246,140,14,126,21,1,68,150,118,149,388,399,9,273,0,121,796,230,48,212,517  },
-		{ 2,14,1,29,46,75,52,70,69,171,38,7,58,163,16,5,24,220,67,112,223,54,50,409,155,35,267,186,151,334,394,140  },
-		{ 9,252,100,265,166,39,88,404,329,0,1,520,382,812,101,593,264,274,604,676,30,118,68,553,18,664,363,23,639,865,21,411  },
-		{ 16,18,35,24,0,60,158,7,22,68,14,49,109,159,55,9,28,71,2,10,5,105,1,118,329,13,344,23,92,20,21,126  },
-		{ 15,13,515,700,23,0,753,1,51,2,4,10,77,5,3,197,115,165,961,202,9,457,180,12,141,22,33,120,6,11,318,31  },
-		{ 160,93,251,137,317,1,180,36,120,217,345,752,617,352,332,10,96,531,498,318,365,202,141,269,816,341,901,679,143,35,83,968  },
-		{ 6,25,42,128,19,59,122,4,85,26,611,27,269,233,45,0,343,91,318,80,11,177,283,73,33,614,2,77,64,138,445,216  },
-		{ 95,64,74,7,32,81,51,204,0,20,237,65,56,38,91,23,207,180,347,343,29,6,511,52,49,10,25,18,554,370,14,312  },
-		{ 202,120,326,260,450,817,494,318,137,403,128,77,523,553,859,5,704,1,15,23,13,576,7,16,615,51,682,291,515,0,21,234  },
-		{ 20,43,107,356,362,126,595,92,359,7,422,319,493,16,858,5,392,246,414,683,60,0,35,945,441,21,259,819,49,97,279,173  },
-		{ 25,42,6,77,33,102,0,122,4,690,29,483,210,27,21,19,2,300,18,648,680,119,117,59,1,10,342,12,26,153,91,684  },
-		{ 31,44,299,116,125,242,456,599,22,393,0,144,492,28,268,1,9,963,301,105,367,36,127,170,384,434,206,98,918,10,13,93  },
-		{ 410,521,686,367,662,88,335,321,201,96,98,772,144,1,934,921,443,435,284,274,264,551,120,897,44,100,33,225,744,418,909,960  },
-		{ 142,178,878,234,132,786,195,202,77,416,147,929,146,522,167,259,687,639,450,271,626,481,590,5,198,212,771,49,0,465,315,427  },
-		{ 254,39,131,9,272,0,578,716,310,224,30,49,105,827,518,829,166,333,616,228,613,846,101,219,1,31,890,98,159,938,252,100  },
-		{ 230,699,854,473,450,351,831,137,855,217,352,704,800,202,251,498,160,144,206,203,317,201,253,752,418,141,1,332,82,180,443,36  },
-		{ 403,202,475,453,494,23,51,77,318,402,13,33,128,102,137,141,120,342,269,0,450,4,899,576,40,421,275,117,217,177,196,64  },
-		{ 23,44,98,182,291,144,116,39,110,141,96,82,905,70,367,264,125,93,77,411,120,1,658,202,100,415,107,363,197,30,447,105  },
-		{ 15,515,1,13,700,23,77,120,0,753,51,180,202,141,260,5,21,115,2,137,128,9,450,197,365,269,12,326,110,102,318,7  },
-		{ 0,32,18,95,207,577,193,29,61,104,64,784,715,102,693,887,81,91,583,671,403,5,52,474,397,180,138,49,37,344,38,263  },
-		{ 0,101,9,68,252,16,100,39,166,364,124,24,154,265,212,88,18,35,329,419,28,118,71,30,65,158,191,55,1,694,21,676  },
-		{ 16,24,191,18,35,71,167,118,149,68,212,9,0,1,21,108,101,92,60,375,302,7,589,755,124,674,350,48,562,246,13,363  },
-		{ 2,14,69,24,1,67,46,16,38,103,29,267,7,35,189,135,278,71,108,18,83,309,52,6,149,388,75,236,60,0,150,66  },
-		{ 13,1,23,0,4,2,51,15,180,33,3,115,5,515,141,10,77,700,11,9,197,341,202,165,217,102,22,7,753,317,365,6  },
-		{ 447,1,698,411,31,363,98,5,919,4,156,125,759,691,13,64,459,354,44,21,48,293,30,914,478,225,82,120,2,922,848,839  },
-		{ 854,82,351,217,141,180,352,15,515,752,1,700,317,898,753,244,10,21,922,115,77,36,4,260,64,110,372,13,5,365,120,11  },
-		{ 0,1,4,13,5,2,82,33,3,120,10,23,9,11,77,260,21,102,8,31,40,6,351,51,64,450,22,117,93,110,7,457  },
-		{ 51,13,403,23,12,475,1,2,21,5,453,523,115,202,817,7,0,99,3,6,450,120,494,64,22,95,49,899,10,37,32,141  },
-		{ 180,752,352,141,498,864,317,217,9,0,115,237,230,39,30,197,83,1,930,64,35,365,372,13,579,88,702,36,101,901,482,21  },
-		{ 16,24,0,7,22,18,28,35,14,158,71,2,109,60,1,168,49,154,124,68,10,55,92,118,159,9,5,747,95,105,65,6  },
-		{ 15,515,700,753,0,23,1,13,341,51,120,77,141,4,137,33,260,82,202,9,180,5,351,2,291,10,11,901,21,115,40,128  },
-		{ 24,68,35,149,18,16,0,7,9,14,189,108,69,65,67,1,71,2,118,28,140,101,114,336,230,124,175,133,46,55,251,154  },
-		{ 13,308,197,115,125,9,165,237,391,23,509,569,546,28,49,629,22,338,317,254,749,180,468,159,903,386,217,352,558,39,36,734  },
-		{ 64,90,32,217,77,4,141,216,172,312,128,13,86,33,597,147,352,95,5,115,875,22,59,11,102,14,182,437,97,177,874,707  },
-		{ 1,22,36,0,105,28,2,67,95,49,55,5,239,12,86,9,83,170,312,64,31,21,23,10,164,50,114,159,208,13,7,320  },
-		{ 9,18,16,0,159,105,101,252,49,310,24,68,35,39,265,272,7,1,60,28,455,890,329,557,118,286,55,137,327,167,5,13  },
-		{ 129,123,214,249,618,17,5,257,205,184,460,76,2,162,769,245,90,106,128,45,119,1,183,4,3,12,179,64,6,229,99,209  },
-		{ 51,453,719,457,23,13,730,899,475,386,4,15,11,12,670,196,515,523,961,401,153,3,700,99,753,117,403,32,120,165,57,0  },
-		{ 173,66,192,204,20,74,104,636,7,43,289,426,825,712,560,214,81,750,65,97,707,0,90,414,64,348,32,500,22,861,95,6  },
-		{ 13,23,1,51,5,21,141,120,202,15,165,2,515,180,12,125,0,64,82,700,197,269,32,48,260,128,115,93,9,137,33,753  },
-		{ 200,34,322,78,472,390,27,714,19,14,136,161,453,176,236,444,59,3,62,128,108,57,283,862,73,53,47,17,412,813,4,56  },
-		{ 33,347,66,204,426,498,56,172,97,95,5,681,546,22,10,0,135,180,4,241,19,174,6,353,263,21,7,370,42,197,27,808  },
-		{ 131,224,219,187,385,371,258,442,254,737,31,98,836,127,924,944,44,871,908,716,39,827,201,574,116,137,36,1,276,242,578,616  },
-		{ 1,15,2,180,4,141,13,515,0,5,3,115,700,901,341,23,217,352,753,51,197,77,317,33,365,752,165,21,6,7,269,93  },
-		{ 370,91,718,74,81,510,397,66,636,240,355,84,138,511,18,278,6,681,701,289,90,5,214,582,64,104,0,643,192,65,750,32  },
-		{ 15,515,700,753,13,120,1,0,165,2,197,23,260,180,4,82,51,386,8,5,12,10,3,141,351,341,326,9,450,7,64,6  },
-		{ 32,2,76,5,1,292,72,45,476,214,21,241,29,14,17,48,129,90,179,460,464,123,290,148,519,205,3,263,249,38,710,89  },
-		{ 13,165,115,17,197,569,23,509,457,45,32,41,106,180,62,38,659,734,155,536,341,629,961,873,587,54,431,37,391,99,405,428  },
-		{ 68,24,35,16,0,101,9,124,154,71,149,65,18,175,28,118,7,55,302,108,92,14,22,346,1,39,429,252,375,364,10,67  },
-		{ 0,9,1,68,230,65,35,69,83,23,101,13,141,67,217,352,21,39,16,88,28,124,212,100,115,154,51,64,30,36,10,317  },
-		{ 100,265,88,909,410,382,812,593,1,213,321,30,252,230,352,264,9,166,689,39,676,98,21,466,724,639,478,217,13,48,553,101  },
-		{ 113,61,198,904,43,0,5,37,899,325,20,59,33,523,204,725,817,389,470,329,222,40,174,58,22,453,690,848,122,104,788,105  },
-		{ 16,24,141,18,7,0,71,140,35,269,75,352,12,9,108,217,5,330,60,64,199,70,22,13,486,246,318,133,65,50,23,498  },
-		{ 271,167,121,60,18,191,146,199,16,1,446,132,575,212,463,354,126,35,598,566,727,98,107,21,608,955,640,407,5,24,223,68  },
-		{ 15,515,115,217,700,13,317,753,141,180,33,23,110,120,4,341,82,10,1,260,365,36,64,854,351,21,51,352,137,77,40,0  },
-		{ 173,7,97,356,43,107,20,387,729,104,426,232,560,595,359,392,414,707,885,81,5,0,66,858,612,49,861,14,22,32,819,230  },
-		{ 15,515,700,753,13,23,51,82,0,33,165,120,196,4,1,2,197,453,260,351,180,12,40,8,386,110,5,326,9,141,217,457  },
-		{ 9,100,120,30,77,795,137,82,202,39,264,827,578,127,0,166,373,318,18,326,141,260,1,450,731,31,33,395,217,291,341,254  },
-		{ 14,2,67,1,24,69,0,28,65,7,46,18,114,108,36,83,38,398,9,16,124,133,103,154,50,55,22,267,29,160,35,547  },
-		{ 14,7,69,24,66,16,2,267,189,67,71,150,140,97,18,60,172,35,6,1,38,149,388,92,83,135,108,74,462,380,29,36  },
-		{ 15,515,700,753,1,13,2,0,4,341,5,3,23,365,11,117,180,10,12,33,134,115,77,197,217,165,6,7,317,102,21,9  },
-		{ 1,22,0,12,5,2,36,28,21,10,86,13,23,49,128,9,95,51,55,96,208,141,48,202,4,137,37,64,105,3,50,7  },
-		{ 605,630,63,123,736,650,65,108,444,368,561,389,19,25,42,619,122,194,183,27,53,33,84,26,297,813,114,73,256,235,249,216  },
-		{ 127,39,9,0,31,371,98,254,1,385,395,44,30,836,187,131,100,116,284,578,299,166,28,21,737,16,276,272,23,49,137,935  },
-		{ 2,61,6,29,45,151,1,655,7,207,32,5,112,425,17,76,833,4,14,64,58,106,119,25,113,128,72,52,70,21,292,790  },
-		{ 2,5,17,14,3,29,23,27,13,401,46,6,51,58,1,453,45,53,34,52,133,19,236,26,181,114,99,366,151,108,218,38  },
-		{ 31,22,170,1,301,44,127,98,36,28,55,105,239,0,338,116,512,299,293,125,86,10,242,395,2,13,9,64,841,23,95,685  },
-		{ 1,21,2,14,5,32,48,50,38,270,46,76,290,72,45,54,17,0,155,221,263,207,37,281,430,3,89,12,181,408,36,67  },
-		{ 17,106,119,378,84,240,62,80,383,136,306,3,56,790,742,5,207,504,64,440,32,128,45,2,123,209,14,4,61,57,297,667  },
-		{ 13,15,1,515,23,2,0,700,4,115,3,51,5,10,753,180,33,341,217,11,165,317,365,197,6,77,40,64,22,9,7,117  },
-		{ 772,335,96,744,1,367,662,686,652,897,303,264,521,31,225,410,141,520,260,116,64,44,321,98,144,88,919,966,340,269,349,284  },
-		{ 1,0,5,28,36,2,12,22,83,67,65,50,24,14,9,96,21,218,18,114,48,281,54,10,7,160,181,103,37,23,133,99  },
-		{ 25,6,145,42,138,81,174,348,525,544,26,74,85,280,287,648,746,91,66,0,29,396,204,64,636,90,122,194,355,104,65,233  },
-		{ 16,24,0,18,28,158,7,35,49,22,68,159,55,1,14,109,105,2,9,71,65,154,124,95,424,344,60,239,118,577,21,10  },
-		{ 352,854,699,230,93,689,137,144,217,160,251,36,669,202,351,120,617,855,752,203,332,82,450,180,141,748,831,30,258,201,1,816  },
-		{ 15,13,515,700,1,2,0,753,23,5,4,3,51,33,10,115,11,317,217,77,180,341,117,165,6,134,197,153,64,9,102,7  },
-		{ 104,20,43,173,66,319,0,77,202,7,198,5,97,580,355,74,2,204,174,52,712,234,426,155,102,192,32,4,500,337,226,904  },
-		{ 13,23,15,51,1,515,0,2,5,700,141,4,753,165,115,12,3,21,457,10,180,269,32,6,197,202,9,7,120,11,77,33  },
-		{ 15,515,700,753,0,1,13,2,23,115,4,317,8,3,5,51,9,341,10,217,22,365,33,457,6,180,77,901,197,120,18,7  },
-		{ 15,515,700,753,1,0,2,3,4,13,5,141,23,16,82,217,457,10,365,180,9,317,51,21,269,898,64,202,11,12,318,341  },
-		{ 2,5,1,14,50,38,29,17,114,46,133,3,45,21,58,171,181,36,218,12,6,52,0,48,137,65,361,23,155,4,285,51  },
-		{ 0,68,9,65,101,124,35,212,16,149,154,100,24,1,114,336,67,589,252,39,71,189,69,562,18,13,30,398,118,88,265,264  },
-		{ 1,2,14,22,0,7,67,65,28,36,24,46,168,5,86,69,38,16,49,12,289,10,194,50,83,114,95,6,18,23,55,158  },
-		{ 13,23,15,51,515,0,700,4,1,753,2,10,115,961,457,12,33,11,3,5,197,9,165,77,102,403,453,40,64,22,37,59  },
-		{ 15,515,700,753,0,1,2,13,23,5,51,901,8,9,180,3,7,82,4,120,12,10,719,341,6,31,141,457,197,22,115,93  },
-		{ 100,252,88,101,0,265,9,724,48,1,21,352,213,676,410,382,321,230,30,329,593,909,39,812,553,217,23,689,520,264,166,419  },
-		{ 2,5,1,58,171,14,46,50,29,52,45,38,186,155,67,54,151,281,334,61,48,96,17,181,103,400,502,227,21,223,12,69  },
-		{ 23,120,13,1,202,141,51,21,165,128,260,15,5,269,137,64,33,180,82,318,93,197,77,326,515,125,110,700,450,2,32,48  },
-		{ 341,197,10,901,13,15,8,638,569,515,479,23,180,873,700,165,143,642,0,961,753,951,1,115,509,499,116,12,498,242,82,206  },
-		{ 15,1,23,13,515,21,120,51,2,141,202,700,5,180,165,0,753,197,12,7,33,260,352,137,269,4,82,128,48,9,110,6  },
-		{ 2,29,50,58,1,6,5,52,14,262,17,46,27,53,151,34,171,74,324,26,38,309,45,113,19,96,287,396,223,67,73,583  },
-		{ 13,23,141,51,4,202,0,115,77,2,33,217,5,317,180,64,10,269,3,9,15,21,1,128,102,137,318,11,352,515,22,31  },
-		{ 1,67,0,24,50,5,14,18,16,69,2,9,103,35,83,12,96,28,54,7,58,223,21,46,281,48,65,181,22,38,36,108  },
-		{ 13,23,141,51,77,64,202,115,33,102,128,4,0,269,10,21,217,32,180,318,9,137,2,11,22,291,7,177,16,31,165,197  },
-		{ 317,115,180,365,873,498,217,341,13,752,482,197,569,352,1,901,36,23,457,468,165,346,546,143,509,134,579,876,868,2,332,21  },
-		{ 184,257,205,229,152,17,57,497,266,432,452,524,5,619,381,32,4,90,2,12,313,128,45,59,245,106,3,471,129,769,339,214  },
-		{ 13,23,1,0,15,2,4,515,51,3,10,33,5,700,115,180,753,77,11,365,341,217,9,6,197,7,102,165,317,40,22,64  },
-		{ 626,70,771,687,379,846,767,761,518,878,82,481,31,786,49,591,178,163,407,44,87,13,845,125,590,371,195,120,98,557,937,351  },
-		{ 264,1,410,909,772,897,686,521,335,478,98,96,691,639,100,44,284,382,31,321,744,88,914,724,662,765,223,9,682,363,0,367  },
-		{ 13,23,1,2,0,15,51,515,5,10,4,33,115,77,180,700,3,141,217,40,6,753,317,197,64,165,7,11,102,9,341,22  },
-		{ 141,77,13,64,269,23,115,21,318,217,5,202,102,33,137,2,15,291,177,51,48,180,32,4,515,352,128,7,0,10,96,11  },
-		{ 13,15,23,515,51,0,700,753,1,2,4,10,33,11,961,453,115,40,457,14,12,3,9,5,165,401,197,77,22,21,64,102  },
-		{ 1,22,0,36,2,31,5,12,13,105,28,9,49,86,141,21,23,95,128,55,44,115,170,10,164,98,180,4,137,239,83,51  },
-		{ 100,101,88,0,252,9,265,30,21,39,759,724,213,329,321,13,419,68,562,382,676,352,694,35,553,410,1,166,909,593,230,23  },
-		{ 539,88,30,190,321,530,840,144,669,435,957,748,778,100,96,418,203,213,1,131,410,228,466,274,36,382,219,863,613,83,822,352  },
-		{ 7,97,92,173,298,107,43,314,232,140,16,356,20,387,729,362,126,359,246,14,230,501,426,441,0,5,560,66,104,779,35,60  },
-		{ 2,14,7,1,58,5,46,16,38,70,75,45,24,155,29,0,21,52,61,163,220,50,69,270,35,48,32,171,18,6,64,54  },
-		{ 447,411,363,664,647,98,621,1,354,271,223,478,18,777,781,936,360,759,167,132,121,48,21,156,9,195,118,293,23,691,13,264  },
-		{ 2,1,14,65,36,67,0,7,46,22,69,5,38,24,28,6,83,29,86,114,168,50,124,208,12,18,108,10,194,484,103,16  },
-		{ 421,386,51,791,730,958,165,801,23,453,697,403,615,13,221,523,24,899,401,326,551,670,576,102,18,33,125,77,566,115,203,197  },
-		{ 104,319,422,945,0,81,20,43,715,32,784,693,879,7,397,74,306,207,52,681,671,2,61,173,6,636,904,95,887,5,18,192  },
-		{ 2,29,1,46,14,52,70,262,6,26,50,67,75,96,309,38,103,112,58,19,5,163,145,83,74,220,223,357,24,69,331,25  },
-		{ 786,929,590,771,687,626,941,178,465,259,70,5,13,21,35,534,107,518,132,49,878,48,146,121,379,279,31,767,147,195,108,125  },
-		{ 5,2,186,45,17,29,48,50,14,61,46,155,400,1,227,171,52,58,38,54,430,209,80,281,3,106,536,311,181,243,21,502  },
-		{ 0,32,64,95,817,494,342,403,207,202,194,389,453,365,312,180,316,5,690,237,848,577,450,61,102,523,475,289,49,241,65,482  },
-		{ 23,13,51,15,202,515,1,0,2,12,115,4,700,5,165,753,77,457,21,141,128,10,3,64,403,32,197,318,9,11,33,117  },
-		{ 119,2,6,76,5,17,45,292,306,240,32,1,19,84,64,61,4,209,710,80,26,0,106,27,214,25,128,129,29,179,3,113  },
-		{ 15,515,700,1,753,0,13,23,180,120,51,2,5,33,165,197,9,450,7,260,115,523,4,12,202,141,82,77,21,102,8,6  },
-		{ 6,138,74,280,222,85,66,226,25,42,87,204,64,337,29,135,95,174,235,26,145,65,19,32,792,294,112,52,256,2,5,22  },
-		{ 39,9,0,101,333,158,49,252,310,254,272,68,16,18,159,286,344,455,30,109,627,327,24,105,419,100,364,22,35,1,329,709  },
-		{ 21,5,32,14,2,1,38,72,76,54,17,3,48,221,270,0,45,46,12,181,37,89,36,50,540,290,430,10,4,741,99,23  },
-		{ 120,137,202,269,141,260,318,450,922,494,77,291,82,15,5,351,128,1,515,326,64,854,700,352,342,21,753,678,349,32,523,90  },
-		{ 16,24,68,35,71,18,149,118,191,167,9,0,212,124,65,246,7,67,140,189,399,101,133,60,1,108,267,114,69,92,695,154  },
-		{ 28,0,9,67,1,22,109,36,55,65,194,114,39,83,49,69,2,35,103,50,158,208,86,420,168,289,505,24,7,185,5,323  },
-		{ 147,325,198,427,142,178,202,798,5,376,20,318,259,43,120,450,77,234,534,904,470,465,878,725,329,14,315,0,260,858,70,61  },
-		{ 28,0,65,9,109,1,55,67,35,22,24,39,289,7,175,14,114,2,158,124,420,194,68,16,336,36,49,69,168,570,154,505  },
-		{ 141,180,13,115,1,23,4,269,2,202,0,317,217,51,15,5,21,352,77,318,3,752,197,10,165,365,137,341,9,515,33,64  },
-		{ 15,515,700,0,753,1,2,13,23,9,51,5,4,901,33,7,8,3,12,93,180,120,197,6,82,341,10,141,22,260,457,115  },
-		{ 6,74,66,1,25,75,324,380,278,26,138,85,135,500,87,42,220,841,97,350,29,19,70,226,38,21,52,606,235,889,2,14  },
-		{ 13,23,51,12,4,15,453,0,457,1,403,165,115,3,11,2,64,5,401,10,515,37,202,33,40,32,99,475,197,700,308,17  },
-		{ 788,180,5,83,693,319,314,4,32,21,17,11,817,3,510,498,33,12,24,104,814,120,64,117,306,804,523,450,288,160,102,43  },
-		{ 15,1,515,13,2,700,23,0,4,753,3,341,5,51,33,11,10,6,77,7,115,102,180,165,141,9,197,217,901,40,12,64  },
-		{ 101,18,9,167,520,16,0,118,60,212,604,364,694,24,55,252,68,917,264,35,1,121,146,363,39,100,806,5,21,166,191,28  },
-		{ 13,23,0,33,51,141,77,4,64,2,115,217,9,102,7,202,21,10,180,3,15,128,5,269,6,32,11,16,165,352,22,317  },
-		{ 66,135,97,74,172,6,278,7,204,324,138,174,29,85,2,87,25,140,92,192,52,38,802,69,448,500,808,620,22,1,280,232  },
-		{ 2,24,69,6,97,7,1,0,14,298,423,66,67,29,150,25,189,267,124,74,607,18,36,81,172,33,83,38,52,273,71,809  },
-		{ 0,9,158,39,68,49,109,16,24,333,35,344,101,22,159,254,272,30,124,65,28,18,793,154,310,252,327,105,627,419,286,55  },
-		{ 15,515,700,10,753,33,77,180,4,341,1,197,13,115,365,23,901,317,5,102,11,217,165,117,141,40,2,3,253,21,134,55  },
-		{ 100,166,382,478,265,264,88,39,98,1,404,274,9,593,724,921,639,438,363,682,411,31,30,812,96,447,821,905,252,0,223,435  },
-		{ 19,283,436,53,297,26,813,432,27,128,42,25,390,503,122,736,73,123,605,63,389,529,630,250,690,65,381,444,6,269,108,216  },
-		{ 33,202,13,128,494,0,51,141,269,1,4,2,102,180,15,137,65,95,6,450,77,40,117,59,457,36,196,817,134,86,49,515  },
-		{ 18,16,60,68,101,167,191,118,35,121,9,212,55,0,126,1,24,647,199,146,520,107,628,621,363,71,21,28,346,92,806,727  },
-		{ 352,230,217,531,160,93,36,669,748,854,689,258,137,871,728,699,752,251,574,202,373,351,228,120,717,260,144,219,268,82,816,1  },
-		{ 15,515,700,753,13,23,0,51,8,1,4,82,165,77,110,33,10,180,5,202,11,22,120,12,9,197,115,93,403,141,40,351  },
-		{ 0,9,101,217,35,88,352,100,39,175,30,68,562,752,13,317,252,115,180,197,64,1,83,141,65,213,165,230,194,36,28,265  },
-		{ 32,64,5,470,288,90,21,147,0,95,356,22,20,519,835,312,819,18,247,182,11,97,13,4,387,49,43,298,316,48,107,7  },
-		{ 16,14,24,1,7,2,35,0,5,50,18,69,46,12,58,75,9,67,70,163,21,54,38,48,223,502,281,37,140,60,28,10  },
-		{ 18,265,9,252,39,195,354,411,1,16,132,101,121,682,167,203,5,363,146,593,35,333,21,271,60,13,100,0,156,327,7,520  },
-		{ 4,13,1,115,141,23,2,180,5,0,51,3,217,202,77,15,33,269,341,318,317,21,165,515,11,10,197,365,9,137,64,352  },
-		{ 7,24,16,14,71,35,18,92,140,189,108,149,68,60,69,150,2,230,97,66,0,458,67,1,65,251,38,314,388,267,36,46  },
-		{ 1,22,31,36,0,2,44,5,141,105,180,170,12,64,13,98,86,55,23,21,28,164,115,127,10,125,128,4,9,239,352,197  },
-		{ 66,7,97,2,192,20,52,43,135,74,560,107,104,0,750,147,414,29,580,173,324,376,226,194,77,174,204,38,356,64,16,470  },
-		{ 0,101,9,68,35,124,24,65,39,16,252,100,154,166,28,364,149,694,30,88,55,346,1,419,71,439,265,289,22,21,175,158  },
-		{ 39,9,100,30,127,0,737,856,31,836,827,254,98,931,166,88,93,1,44,190,131,228,120,395,625,385,863,264,219,373,110,28  },
-		{ 5,17,2,3,21,45,14,155,48,32,38,1,328,181,186,46,23,51,12,61,227,29,106,54,99,133,62,832,13,37,514,543  },
-		{ 131,613,30,224,228,716,274,100,827,406,219,856,39,190,31,88,1,166,9,44,829,863,931,93,0,187,625,924,127,98,137,254  },
-		{ 352,217,64,141,752,269,180,864,437,372,954,115,498,177,77,349,317,318,579,291,947,197,247,0,23,717,237,304,128,457,776,678  },
-		{ 141,217,180,317,352,115,15,341,1,23,13,365,515,752,2,64,0,5,498,700,372,165,51,237,753,77,244,197,137,4,21,253  },
-		{ 5,2,17,14,1,45,3,38,21,29,181,58,46,48,50,133,114,171,61,155,32,6,186,281,361,12,36,54,4,13,52,514  },
-		{ 6,2,119,25,790,4,45,483,655,113,1,29,76,26,32,19,887,17,128,0,292,833,59,61,106,64,77,814,14,151,84,42  },
-		{ 13,23,2,0,51,4,1,115,141,3,5,180,33,217,77,9,202,11,7,15,10,6,317,64,21,197,515,165,102,128,22,269  },
-		{ 13,23,51,202,21,5,1,15,141,165,120,2,115,12,32,0,515,128,318,64,125,700,4,403,197,453,180,457,3,7,10,6  },
-		{ 9,39,31,30,0,127,1,44,100,131,98,187,385,276,88,442,219,908,254,116,49,166,935,28,201,36,141,827,137,299,284,21  },
-		{ 22,49,28,109,9,185,105,95,1,131,159,272,36,67,86,254,39,55,35,0,505,31,83,169,208,327,286,98,168,535,312,708  },
-		{ 5,1,2,21,0,12,48,22,10,14,36,3,32,17,23,54,86,38,4,51,13,37,137,50,65,281,114,45,28,99,58,202  },
-		{ 141,269,352,82,217,351,180,854,372,922,752,1,15,260,317,318,515,202,64,700,120,349,954,753,77,35,67,717,898,137,365,115  },
-		{ 144,203,613,418,326,406,96,669,137,679,1,228,494,822,840,317,36,83,855,160,817,859,856,816,217,831,345,93,876,77,44,251  },
-		{ 15,13,515,23,700,753,51,1,33,0,202,21,2,5,180,141,120,165,217,82,12,117,4,352,269,197,115,32,3,9,134,260  },
-		{ 13,23,115,1,0,51,4,77,2,33,15,141,10,5,341,180,515,3,217,202,9,365,317,64,700,102,11,165,197,22,753,7  },
-		{ 531,943,373,160,728,93,206,260,261,559,964,269,717,535,332,384,365,295,110,533,141,10,180,352,244,137,120,55,959,564,36,253  },
-		{ 1,31,36,170,22,55,44,10,86,64,127,0,2,98,301,164,740,338,237,143,5,125,116,13,242,141,299,180,23,169,105,12  },
-		{ 13,23,141,77,51,4,64,32,33,202,115,269,102,128,21,0,177,180,318,90,40,10,7,5,137,15,217,352,9,291,59,22  },
-		{ 202,120,260,318,77,15,450,269,1,82,33,23,141,13,51,515,351,128,700,5,64,326,137,21,102,110,753,494,93,523,817,165  },
-		{ 523,899,102,33,730,15,23,403,719,117,153,13,515,51,475,4,700,5,453,817,196,753,494,40,202,120,1,2,450,457,17,421  },
-		{ 202,403,494,450,120,817,523,475,318,453,33,402,128,77,13,51,260,576,342,102,15,23,515,4,700,5,82,753,326,210,137,615  },
-		{ 2,29,112,66,7,52,70,151,58,87,135,5,74,226,307,6,14,186,1,45,549,172,644,25,113,287,46,155,334,64,294,97  },
-		{ 1,77,349,291,260,120,652,102,5,39,64,269,9,33,340,342,13,98,888,698,23,296,100,318,51,202,87,137,638,128,50,850  },
-		{ 1,13,15,2,0,4,23,515,5,141,180,3,700,341,115,51,753,269,77,901,197,352,217,33,21,11,365,6,165,202,7,317  },
-		{ 7,14,16,2,46,5,70,58,1,38,24,35,92,163,0,75,21,18,50,54,140,12,87,220,155,69,171,23,60,9,13,307  },
-		{ 2,29,66,226,135,7,87,74,52,278,6,75,222,220,294,70,97,1,145,25,172,262,324,38,69,112,331,92,5,14,140,26  },
-		{ 15,515,700,753,33,77,117,4,1,102,134,40,153,11,13,196,217,21,5,51,23,115,32,3,2,202,141,137,128,291,48,177  },
-		{ 15,217,82,515,351,141,317,1,13,700,260,77,110,120,115,854,23,753,180,51,21,36,137,922,5,64,365,352,291,202,93,341  },
-		{ 31,190,30,373,120,110,863,88,44,127,908,856,260,318,82,98,93,187,836,717,935,39,442,131,141,254,228,219,1,968,77,116  },
-		{ 23,13,308,9,165,115,51,21,401,125,49,39,197,391,159,254,217,743,28,438,773,629,558,386,341,95,32,317,876,679,109,166  },
-		{ 49,5,43,165,7,0,21,104,125,22,173,422,64,13,623,102,20,18,314,95,91,141,23,31,193,51,391,900,779,558,92,232  },
-		{ 1,4,13,2,15,0,23,515,77,3,341,33,5,700,115,51,202,753,141,180,11,10,102,217,6,901,40,7,197,318,317,365  },
-		{ 93,843,295,120,36,160,206,261,10,137,567,110,384,141,943,268,201,332,258,55,1,180,64,116,44,144,699,203,282,31,260,373  },
-		{ 81,7,192,426,43,173,172,104,879,91,5,712,715,526,6,97,568,95,448,66,33,861,560,32,49,20,0,636,232,825,2,22  },
-		{ 475,403,51,453,33,102,13,23,494,202,0,196,15,77,153,18,4,117,515,450,318,22,730,128,700,421,65,753,269,402,134,817  },
-		{ 141,269,260,318,202,120,352,349,82,351,1,5,854,137,64,291,15,922,180,851,32,77,515,372,21,700,7,217,13,947,33,753  },
-		{ 15,515,700,753,1,13,0,2,4,23,3,5,180,115,197,12,51,165,217,10,961,9,6,141,352,21,8,7,33,77,457,120  },
-		{ 39,166,9,30,0,101,274,404,252,333,190,100,158,438,310,88,68,265,656,21,1,530,329,344,49,539,625,254,13,131,48,419  },
-		{ 0,1,28,9,22,12,65,83,67,36,5,2,50,55,96,109,16,13,24,23,21,238,49,18,285,160,128,39,69,114,7,323  },
-		{ 15,515,700,1,0,753,23,2,13,51,5,180,115,6,3,9,197,12,457,120,7,165,901,82,4,21,8,141,31,33,719,341  },
-		{ 9,39,30,0,28,166,22,49,180,1,352,35,317,158,88,141,498,131,115,345,752,128,228,217,100,83,219,930,13,251,365,36  },
-		{ 5,61,45,2,80,29,311,209,6,17,58,1,151,106,454,667,243,70,52,496,287,592,255,738,64,74,483,14,27,32,112,19  },
-		{ 31,125,22,44,299,456,685,242,599,116,170,28,0,1,492,393,506,144,558,10,268,301,239,23,13,36,963,367,55,206,105,95  },
-		{ 187,258,926,574,839,93,228,860,406,219,871,160,137,531,224,116,120,902,669,201,36,131,44,144,843,533,318,384,442,1,434,268  },
-		{ 7,107,75,16,87,9,64,177,24,18,291,77,349,141,60,232,23,0,51,269,132,14,5,21,70,32,678,112,126,121,71,947  },
-		{ 15,515,700,753,1,0,2,13,5,3,23,180,4,115,901,51,6,8,961,9,7,10,12,82,197,22,141,341,33,120,365,457  },
-		{ 13,23,51,1,5,202,2,12,15,21,165,141,0,115,3,4,32,515,197,10,180,318,128,120,64,700,6,7,403,269,457,137  },
-		{ 1,2,0,77,64,3,141,13,33,15,23,10,6,102,5,515,180,4,117,7,700,165,11,217,269,40,753,115,128,17,197,134  },
-		{ 345,531,332,269,260,317,717,752,373,351,180,352,728,82,10,365,160,533,217,143,498,251,244,93,341,901,36,1,141,898,55,864  },
-		{ 16,7,33,189,92,77,388,60,140,35,102,24,14,1,230,21,150,117,733,314,18,915,71,13,108,134,5,64,69,2,98,22  },
-		{ 142,202,234,178,5,786,77,49,70,0,416,450,639,878,1,48,21,929,147,259,315,455,198,120,12,481,163,113,846,329,318,22  },
-		{ 81,715,192,0,173,712,681,104,636,91,74,20,750,370,7,718,95,879,22,43,825,560,422,64,207,49,172,18,397,10,426,319  },
-		{ 13,23,15,51,515,0,700,753,4,1,961,2,10,115,457,11,33,453,3,5,9,40,12,197,165,77,401,475,64,102,22,569  },
-		{ 64,297,5,445,95,61,250,311,80,34,17,312,45,2,86,472,58,14,180,53,22,151,869,738,247,237,29,1,128,165,21,288  },
-		{ 16,24,18,71,7,35,118,92,14,154,60,68,0,149,28,302,124,150,55,175,2,9,97,1,429,20,108,273,22,65,43,126  },
-		{ 195,360,156,771,132,163,626,687,591,371,883,146,121,846,70,586,379,13,293,98,407,48,761,296,354,18,31,1,55,49,21,105  },
-		{ 202,13,77,23,318,33,51,0,4,141,5,21,217,32,291,102,64,128,15,10,9,494,269,137,515,403,1,31,117,700,120,317  },
-		{ 7,192,97,81,172,66,426,173,43,715,712,232,861,879,104,330,568,298,74,893,885,526,387,825,92,140,91,14,636,6,5,448  },
-		{ 2,1,14,6,67,7,65,69,24,36,66,124,108,83,38,29,22,86,0,18,484,5,28,46,12,10,25,302,150,16,650,74  },
-		{ 33,77,13,202,102,4,0,23,128,51,141,64,318,22,403,269,137,10,15,40,494,117,32,59,11,153,1,21,177,196,515,115  },
-		{ 269,141,318,77,349,291,217,202,33,15,372,304,515,22,102,177,351,700,352,120,5,137,10,317,260,753,64,851,854,403,49,21  },
-		{ 6,74,66,85,138,25,87,42,135,26,226,222,280,29,75,500,220,278,792,70,19,2,1,294,204,64,32,145,853,112,52,174  },
-		{ 9,0,105,39,16,18,1,101,272,31,127,98,24,518,333,252,310,28,68,737,846,371,158,916,938,49,30,7,286,35,301,455  },
-		{ 2,1,14,67,24,46,83,108,69,29,38,103,114,36,6,133,18,0,28,7,65,52,236,75,50,398,5,309,135,16,278,160  },
-		{ 22,1,28,105,49,95,0,2,67,55,36,239,168,159,65,35,14,170,320,164,9,7,10,5,114,12,83,64,194,109,24,301  },
-		{ 7,66,172,97,92,140,232,568,298,14,192,314,16,380,135,324,2,330,74,38,357,448,126,69,35,5,107,6,387,60,204,572  },
-		{ 2,29,1,14,5,6,46,133,114,50,52,26,218,108,19,13,366,236,27,45,70,17,58,23,86,51,137,65,112,38,25,12  },
-		{ 6,1,74,25,2,26,29,66,42,19,75,14,388,67,108,70,52,85,103,65,38,138,357,133,114,594,324,516,603,96,309,69  },
-		{ 22,9,28,1,36,49,109,105,86,95,131,31,169,39,0,141,272,159,44,55,98,180,13,30,185,115,83,128,352,137,64,208  },
-		{ 203,822,326,23,77,859,403,494,576,39,473,182,33,1,691,100,18,217,13,817,411,447,363,102,93,966,96,478,291,704,310,120  },
-		{ 15,515,700,753,33,77,117,4,102,134,115,153,13,1,40,217,11,196,341,2,5,3,23,317,365,0,21,291,32,51,12,569  },
-		{ 15,515,700,753,13,0,23,1,8,82,51,165,197,120,180,2,9,33,4,110,5,12,10,260,351,386,141,7,457,475,93,901  },
-		{ 1,23,13,15,51,0,21,2,515,5,141,180,120,165,700,202,197,4,753,12,33,9,7,82,115,93,3,352,260,6,110,48  },
-		{ 15,515,700,753,0,1,2,5,3,4,8,13,180,341,10,23,7,6,9,51,77,197,961,115,165,82,120,31,22,202,457,217  },
-		{ 559,661,922,564,141,533,10,317,373,110,143,269,244,260,332,261,93,642,752,295,351,876,531,843,180,206,728,384,352,1,434,120  },
-		{ 15,515,1,0,700,2,13,23,753,5,51,180,3,165,12,6,197,115,4,9,7,21,719,8,457,82,141,120,33,22,901,10  },
-		{ 198,234,0,325,5,77,202,416,20,147,32,43,639,315,49,61,450,455,142,21,113,230,22,318,725,342,207,13,95,904,494,10  },
-		{ 447,264,363,9,411,676,682,1,156,664,821,478,166,354,812,39,100,905,382,897,98,18,759,404,31,101,724,5,265,223,88,13  },
-		{ 24,14,69,16,35,18,2,7,108,189,71,67,267,149,1,46,68,83,38,140,0,236,251,9,388,60,133,103,65,28,29,50  },
-		{ 16,7,35,20,14,18,109,2,43,120,107,60,1,121,326,907,553,77,13,147,23,82,68,260,0,403,5,24,202,126,265,199  },
-		{ 30,131,187,276,31,44,613,442,39,9,190,228,1,839,116,935,908,219,127,88,244,224,110,137,93,201,98,141,36,567,0,856  },
-		{ 98,223,1,393,812,265,100,421,593,834,697,48,51,410,791,382,21,88,31,284,9,125,96,293,230,23,213,217,656,689,541,5  },
-		{ 98,51,127,219,616,258,105,293,395,421,924,512,31,308,23,201,116,44,301,272,763,276,125,13,453,170,401,295,261,944,115,567  },
-		{ 253,110,951,352,811,206,332,180,141,244,282,10,854,417,642,638,559,752,143,911,260,55,93,533,499,498,661,120,351,959,564,341  },
-		{ 49,9,159,254,272,158,0,131,28,39,627,105,327,286,22,518,688,578,68,347,374,101,224,424,95,35,219,24,16,364,65,344  },
-		{ 105,22,131,272,98,286,327,109,374,239,28,95,320,219,9,224,55,127,187,36,578,169,64,185,538,1,159,10,371,634,49,616  },
-		{ 691,478,340,1,658,914,724,363,744,698,156,772,411,296,682,447,9,284,335,98,264,303,909,21,354,410,225,13,664,686,88,919  },
-		{ 16,24,35,18,71,7,140,108,189,267,92,60,14,230,68,69,9,1,149,46,246,191,388,167,2,0,118,236,133,21,674,5  },
-		{ 9,0,127,31,98,371,395,39,737,49,1,44,385,272,512,28,293,242,836,761,254,299,101,16,187,22,116,158,159,131,18,21  },
-		{ 16,18,68,0,60,35,9,101,252,28,118,24,419,55,7,109,604,71,39,121,22,364,14,158,191,167,925,126,329,21,92,49  },
-		{ 116,268,203,93,206,692,551,31,417,940,499,8,473,44,202,523,959,0,120,137,559,22,450,403,576,10,728,299,13,326,51,1  },
-		{ 225,459,744,1,919,914,691,330,622,21,141,223,5,284,934,335,88,538,340,82,385,839,363,120,478,98,48,30,64,32,686,166  },
-		{ 5,1,2,0,14,36,21,281,12,48,50,67,22,28,54,83,24,218,38,10,181,9,32,18,65,58,45,114,430,17,99,37  },
-		{ 137,450,202,704,120,260,326,318,968,269,851,403,291,77,23,141,182,310,494,373,351,457,82,890,349,110,60,128,817,678,105,96  },
-		{ 15,515,700,753,4,33,13,23,77,5,40,11,102,93,1,21,110,51,82,117,141,2,10,8,32,64,120,31,202,3,217,115  },
-		{ 15,13,1,23,515,0,51,2,700,5,753,21,180,141,165,3,12,115,197,4,7,6,457,9,352,202,33,8,719,120,77,341  },
-		{ 219,127,258,98,276,201,131,395,944,293,116,284,567,31,242,105,137,935,295,44,403,860,51,224,576,456,9,371,578,475,202,512  },
-		{ 16,7,18,35,60,0,14,20,118,28,68,22,2,24,1,92,158,107,5,49,154,126,109,12,43,10,55,6,677,71,21,168  },
-		{ 93,728,531,160,559,373,574,120,295,860,533,269,717,260,926,902,258,318,36,201,261,434,851,137,617,141,187,352,843,384,332,251  },
-		{ 28,0,1,9,22,109,83,39,49,12,36,67,55,5,96,2,128,30,158,69,21,23,160,208,35,13,65,323,50,141,194,238  },
-		{ 23,13,51,1,5,15,141,21,0,2,165,515,202,700,12,197,180,120,32,115,4,753,64,9,7,269,6,3,125,386,48,453  },
-		{ 5,21,13,49,14,20,7,23,43,32,1,0,652,48,713,22,38,2,16,132,955,107,12,279,24,888,197,640,70,303,18,638  },
-		{ 9,0,28,39,1,30,35,101,22,67,83,141,49,175,36,68,55,88,13,251,10,69,23,158,180,115,64,100,217,65,345,166  },
-		{ 260,120,82,269,5,450,351,1,202,141,854,13,77,922,32,33,137,4,23,125,291,21,15,515,165,349,177,700,318,326,180,753  },
-		{ 121,16,18,35,363,101,60,20,107,14,68,259,621,55,604,43,7,252,9,364,126,0,167,191,5,407,132,28,199,419,146,10  },
-		{ 13,23,51,1,15,0,2,141,5,515,12,21,700,115,165,180,4,753,3,197,202,32,9,120,7,8,6,11,37,10,457,269  },
-		{ 201,144,206,443,418,203,435,96,335,459,187,1,88,332,330,321,269,934,30,372,822,521,268,326,44,523,382,141,410,264,494,473  },
-		{ 31,44,276,201,116,131,284,662,567,144,9,489,98,295,268,434,0,30,137,39,93,1,187,22,219,918,110,299,141,36,224,384  },
-		{ 520,478,664,1,264,604,9,167,777,759,411,0,806,724,48,21,101,68,647,936,363,223,118,682,410,18,100,16,252,98,265,13  },
-		{ 23,70,21,87,60,75,120,182,163,379,92,18,7,937,71,121,446,132,24,98,931,126,107,77,795,195,115,44,411,146,51,850  },
-		{ 39,9,0,737,127,31,846,98,1,827,105,310,371,30,254,100,44,18,395,242,272,101,385,916,836,16,265,131,938,93,166,557  },
-		{ 279,20,43,126,107,7,92,16,356,362,60,595,246,359,598,35,0,683,939,653,121,97,125,441,399,392,150,199,48,230,14,649  },
-		{ 206,417,93,959,499,728,8,559,120,473,137,141,10,564,31,260,44,450,203,341,253,244,373,116,143,638,268,180,352,110,318,940  },
-		{ 259,465,147,132,590,687,534,199,581,146,941,427,107,640,279,178,121,5,195,150,522,955,198,35,786,929,798,142,1,21,325,626  },
-		{ 523,15,120,450,202,515,403,51,817,700,13,753,23,457,33,899,128,64,730,102,494,342,115,719,453,196,49,99,318,421,308,5  },
-		{ 141,559,10,244,365,564,661,180,253,143,752,110,55,317,533,341,901,93,373,206,535,160,82,922,260,36,531,964,352,332,261,197  },
-		{ 219,258,98,127,276,964,943,137,843,535,201,935,131,860,261,295,284,567,206,44,116,31,253,492,203,332,160,615,36,93,55,692  },
-		{ 0,319,422,207,945,693,577,887,32,804,95,344,104,904,61,20,5,43,7,725,113,510,306,102,49,263,153,426,33,83,22,9  },
-		{ 1,22,0,5,12,2,36,21,28,86,49,105,9,10,23,13,141,95,31,55,128,37,51,4,83,202,3,64,96,7,32,44  },
-		{ 15,515,13,700,1,0,753,2,23,3,4,5,51,10,115,197,6,33,12,9,165,7,8,77,11,961,180,269,141,22,120,457  },
-		{ 15,1,23,120,77,13,515,51,141,202,700,180,110,137,260,753,326,5,128,102,0,21,2,165,269,33,197,450,318,217,93,115  },
-		{ 15,515,1,700,0,753,13,2,23,180,51,5,120,4,9,115,197,12,7,165,21,33,6,82,3,8,523,901,31,141,457,260  },
-		{ 16,18,24,7,92,35,60,75,9,13,71,14,0,108,50,21,126,121,1,140,23,5,132,146,2,12,128,10,64,141,70,87  },
-		{ 180,341,901,15,515,1,365,700,0,2,197,753,115,4,10,13,752,5,3,8,165,317,141,23,143,873,44,31,569,55,93,6  },
-		{ 9,0,175,35,101,28,39,67,68,1,65,83,30,69,364,336,22,114,55,124,194,158,100,289,252,166,64,345,103,36,50,88  },
-		{ 64,165,180,197,115,247,217,237,21,13,32,316,22,141,352,72,288,304,95,225,76,391,386,16,468,90,49,35,365,640,372,23  },
-		{ 15,515,700,13,753,1,0,2,23,4,5,3,115,51,141,197,12,10,180,961,7,9,21,33,217,6,8,165,457,11,77,341  },
-		{ 132,121,199,146,60,279,493,640,407,598,126,195,534,581,955,590,107,5,150,35,522,49,259,16,18,360,156,0,147,362,21,167  },
-		{ 0,1,28,9,22,5,36,12,65,24,67,96,2,83,18,50,114,55,21,16,7,10,23,14,13,160,137,51,48,218,103,69  },
-		{ 93,120,957,77,30,968,459,110,137,160,613,102,202,352,373,141,31,372,217,330,190,318,269,260,203,44,28,473,228,177,863,704  },
-		{ 15,515,700,0,753,1,13,23,2,51,5,9,120,82,4,7,901,197,10,8,260,180,341,12,33,6,3,523,165,102,115,141  },
-		{ 206,417,8,141,499,44,244,93,31,10,137,253,559,116,728,144,120,564,269,638,203,352,143,260,341,752,268,717,951,180,160,110  },
-		{ 530,254,228,1,96,21,406,39,827,31,669,840,613,829,137,679,166,98,23,51,960,438,131,93,48,224,219,317,310,36,876,190  },
-		{ 15,515,700,753,13,457,0,197,719,1,165,82,23,8,120,730,2,10,12,180,134,5,9,141,260,4,351,51,115,3,341,899  },
-		{ 0,16,68,9,24,28,18,35,252,109,39,419,124,158,154,55,101,71,22,118,60,7,49,65,333,14,1,10,329,364,677,346  },
-		{ 1,15,13,23,515,51,120,0,700,180,2,165,5,753,141,197,21,33,202,102,260,4,9,12,7,326,137,450,115,6,82,110  },
-		{ 535,253,352,564,110,365,82,180,341,10,854,533,55,898,244,901,873,141,752,143,642,559,498,317,36,951,115,964,638,282,661,197  },
-		{ 31,44,125,338,116,64,242,36,1,10,55,22,456,237,180,13,299,164,506,86,23,165,558,143,0,762,492,479,844,546,93,8  },
-		{ 13,23,4,1,202,2,0,51,115,77,141,180,5,15,217,3,33,11,515,317,9,10,102,21,700,341,365,318,269,64,32,128  },
-		{ 9,39,0,166,68,101,28,364,30,158,562,35,175,65,333,154,49,404,706,124,21,252,274,168,190,289,100,570,16,1,310,346  },
-		{ 15,515,700,753,341,13,0,23,1,33,141,4,260,82,77,51,351,180,9,5,115,137,10,217,11,120,102,40,349,269,202,854  }
-#else
-		#include "rgbcx_table4.h"
-#endif
-	};
-
-	static uint8_t g_best_total_orderings3[NUM_UNIQUE_TOTAL_ORDERINGS3][32] = 
-	{
-		{ 12,1,3,5,27,2,4,38,8,7,16,18,6,10,41,79,40,23,46,9,20,88,22,37,14,19,24,126,99,119,35,11  },
-		{ 7,64,116,14,94,30,8,42,1,108,47,55,137,10,134,95,96,115,69,32,63,29,90,113,11,148,16,103,19,9,34,25  },
-		{ 12,1,0,5,3,7,4,27,8,6,38,40,41,16,18,46,9,10,20,23,79,62,14,22,88,99,37,126,92,19,120,11  },
-		{ 16,88,27,18,46,48,126,107,79,19,59,38,37,65,23,66,0,2,3,43,12,151,28,25,5,87,72,40,1,20,52,92  },
-		{ 79,48,88,16,27,65,18,38,46,19,37,4,72,33,126,41,52,0,12,92,5,1,2,107,3,77,23,91,43,51,22,74  },
-		{ 1,8,41,122,10,22,2,0,87,24,37,120,38,7,39,4,5,3,9,92,62,59,23,16,104,11,27,79,19,26,25,32  },
-		{ 2,76,99,28,40,86,93,21,138,60,6,0,17,128,145,119,98,144,141,82,147,54,67,75,5,12,27,132,146,1,38,14  },
-		{ 47,7,64,90,1,118,116,85,57,14,30,94,50,45,137,134,8,42,69,139,55,68,58,108,95,29,10,115,0,32,2,11  },
-		{ 49,8,10,30,124,11,32,113,130,58,125,9,100,53,104,115,131,103,24,7,1,39,45,36,139,0,137,22,90,44,114,105  },
-		{ 9,38,72,125,49,41,84,11,13,5,27,0,16,92,8,2,65,105,10,18,48,29,127,131,36,14,1,46,111,79,130,12  },
-		{ 130,8,10,100,104,131,49,32,53,39,30,36,113,24,11,22,124,44,83,58,7,103,1,4,9,125,5,0,91,33,115,74  },
-		{ 114,11,58,8,120,49,9,124,142,111,41,30,10,0,97,130,62,84,38,5,72,125,92,127,100,27,139,113,13,132,32,1  },
-		{ 60,46,28,27,40,20,0,17,18,2,126,16,6,38,86,23,79,54,1,93,5,88,41,14,21,111,7,48,3,84,72,62  },
-		{ 72,92,38,65,84,48,41,79,27,16,29,111,88,5,18,46,1,0,152,14,37,19,77,42,132,7,22,13,119,56,12,2  },
-		{ 7,55,1,95,29,56,64,116,143,8,14,30,47,94,152,90,65,67,10,133,42,72,146,84,16,48,6,0,25,108,77,21  },
-		{ 27,23,20,5,0,79,38,2,3,1,59,46,4,41,33,86,37,87,88,92,7,126,43,8,22,152,151,150,149,148,147,146  },
-		{ 12,0,1,2,7,6,3,5,28,4,8,14,60,40,17,19,21,86,126,93,10,18,9,29,48,99,65,25,84,119,72,41  },
-		{ 60,40,99,2,54,12,0,1,19,28,98,93,6,138,21,5,27,17,151,14,76,46,16,18,38,29,86,144,107,7,25,41  },
-		{ 12,0,1,2,3,5,6,7,4,28,8,60,14,40,16,17,21,10,19,9,86,38,126,41,93,27,29,48,62,84,79,99  },
-		{ 0,1,2,10,5,8,3,25,4,29,32,34,63,7,77,26,16,48,65,56,14,22,129,103,72,24,18,152,140,53,96,42  },
-		{ 46,126,18,54,12,16,1,0,5,2,27,98,20,23,6,3,88,48,28,7,19,8,4,60,151,38,37,21,79,14,65,40  },
-		{ 76,6,141,86,119,2,138,67,28,145,0,93,17,1,40,60,146,99,147,14,21,144,132,7,5,29,55,27,16,75,19,12  },
-		{ 71,5,51,39,22,80,0,43,10,122,8,62,41,24,104,87,35,37,2,91,33,120,36,38,1,131,9,100,130,66,3,4  },
-		{ 126,18,46,27,20,16,88,23,12,79,54,59,48,0,73,1,37,151,5,19,28,38,2,66,60,3,65,98,14,26,6,43  },
-		{ 22,10,8,5,0,71,35,80,104,39,24,51,100,1,62,32,2,130,11,41,7,9,53,43,49,83,122,120,30,44,37,38  },
-		{ 1,34,14,129,53,63,42,26,121,148,7,44,96,10,0,24,100,32,64,116,140,22,5,19,29,103,135,108,8,61,39,83  },
-		{ 1,7,34,63,44,25,135,14,24,108,22,0,83,94,5,129,35,101,47,121,2,19,42,53,6,110,103,8,148,10,16,123  },
-		{ 12,28,16,60,18,1,6,21,14,0,86,19,2,48,93,17,38,29,7,5,65,126,46,72,41,79,84,119,40,56,54,88  },
-		{ 0,2,12,27,5,46,38,40,41,79,88,99,3,23,1,62,20,4,22,37,92,35,18,8,16,24,10,60,7,120,98,54  },
-		{ 1,7,14,56,8,0,84,67,10,2,133,72,42,111,5,30,21,4,9,3,25,94,16,116,47,11,65,18,132,90,55,64  },
-		{ 30,8,124,139,45,11,58,90,113,137,7,115,10,32,1,49,94,85,9,47,108,103,0,97,63,14,50,114,53,106,100,25  },
-		{ 65,38,48,27,16,79,72,18,88,19,46,77,84,92,37,41,0,29,1,14,12,111,2,5,31,36,87,74,105,40,28,51  },
-		{ 10,8,30,113,130,100,53,32,115,103,104,7,1,121,39,49,131,44,24,36,63,137,34,45,22,90,108,83,26,11,94,139  },
-		{ 51,52,43,33,5,74,16,37,71,91,38,3,36,87,48,22,4,0,122,41,39,18,66,27,79,24,65,88,59,23,62,92  },
-		{ 1,7,63,53,108,121,94,44,103,100,14,10,129,47,32,26,24,25,148,42,135,22,0,61,83,8,39,104,5,64,115,34  },
-		{ 1,8,10,7,5,0,80,32,62,2,24,44,53,83,9,41,30,22,100,11,14,25,120,4,26,6,3,16,122,34,19,35  },
-		{ 74,4,36,48,33,91,39,79,22,16,65,5,131,38,24,71,27,52,0,105,51,18,88,104,3,31,10,37,72,19,41,130  },
-		{ 59,43,38,79,23,27,92,51,0,16,46,5,18,88,41,37,66,3,87,20,48,2,122,4,22,12,1,126,19,65,33,24  },
-		{ 12,28,1,27,0,16,2,46,65,60,21,3,5,18,6,19,48,14,4,7,79,88,86,29,22,72,93,40,23,8,17,41  },
-		{ 22,91,39,33,24,71,5,131,36,10,51,0,130,8,104,2,35,125,9,43,52,49,83,80,100,41,122,3,37,38,4,16  },
-		{ 12,0,1,2,5,3,4,8,7,27,18,38,10,6,16,46,9,20,41,23,126,79,22,14,19,99,88,54,37,48,62,35  },
-		{ 12,27,1,2,3,0,46,4,38,16,8,28,7,79,18,5,84,6,88,10,14,21,23,20,40,22,60,19,9,29,72,65  },
-		{ 1,14,7,55,95,29,8,94,30,56,10,108,77,116,152,64,32,48,63,42,143,148,16,25,137,65,11,0,115,9,19,72  },
-		{ 37,79,66,38,16,52,48,59,43,27,87,33,41,4,23,51,3,5,88,18,92,46,73,122,22,71,20,0,65,19,2,120  },
-		{ 24,32,83,22,53,1,8,10,7,30,35,5,103,0,100,101,121,113,34,123,63,2,44,25,71,115,80,14,26,108,51,39  },
-		{ 97,45,111,58,85,139,0,90,47,7,120,106,142,30,50,132,41,62,84,1,119,114,14,56,117,8,38,29,2,64,116,5  },
-		{ 12,28,16,18,1,60,6,14,2,21,0,86,126,19,48,93,7,27,17,29,5,65,54,38,72,79,84,88,119,145,8,111  },
-		{ 118,47,64,116,57,85,7,14,50,1,42,0,45,68,86,69,2,111,134,28,90,55,16,29,56,48,84,144,60,30,112,41  },
-		{ 12,1,2,0,7,6,28,5,3,4,8,14,60,21,18,40,17,86,10,9,16,29,19,93,126,79,38,84,72,27,111,119  },
-		{ 11,8,49,130,10,125,9,124,100,114,131,30,58,104,32,39,24,113,36,105,0,41,22,120,5,53,111,38,142,44,83,35  },
-		{ 50,70,47,118,85,57,106,0,45,7,64,90,81,14,2,134,28,62,86,55,69,1,78,119,68,56,18,67,16,60,29,21  },
-		{ 43,37,33,87,51,41,66,5,122,38,22,59,92,0,23,91,27,16,71,79,18,52,120,4,3,24,46,20,73,39,62,36  },
-		{ 79,48,4,16,27,88,43,33,18,38,65,37,46,3,19,51,52,22,66,87,74,5,41,91,23,59,0,71,122,72,20,92  },
-		{ 32,100,10,8,30,104,24,44,39,113,83,103,1,7,22,53,115,63,135,121,26,35,34,5,0,108,137,90,91,45,2,130  },
-		{ 0,1,2,5,16,12,6,7,14,3,19,18,29,20,4,21,40,8,17,35,23,48,126,22,25,56,26,10,98,27,38,65  },
-		{ 143,67,56,146,1,7,133,55,64,141,134,69,6,47,14,29,84,21,111,147,57,16,95,72,118,132,50,0,2,18,119,42  },
-		{ 1,7,67,14,133,111,8,84,0,21,2,47,64,132,55,10,95,147,119,42,16,5,72,56,4,3,6,29,9,25,18,30  },
-		{ 68,57,69,112,144,86,102,2,134,55,0,70,118,64,75,47,14,28,93,143,67,7,50,149,1,21,29,56,119,95,60,78  },
-		{ 58,97,114,30,124,45,11,139,8,90,0,142,7,10,41,113,84,62,49,111,85,1,9,5,137,120,32,14,2,117,47,38  },
-		{ 23,66,18,79,38,20,43,27,16,88,46,59,126,37,87,12,73,92,3,5,48,0,19,54,2,51,28,1,41,65,122,22  },
-		{ 0,12,2,27,5,40,46,38,1,41,3,79,88,23,99,4,20,62,22,54,92,18,8,37,16,35,10,7,19,120,144,24  },
-		{ 1,14,25,26,0,7,44,34,129,42,24,5,135,22,19,148,6,96,83,2,29,16,63,35,101,64,140,136,116,110,3,10  },
-		{ 12,1,2,27,3,4,38,5,7,8,18,16,46,6,0,40,41,10,79,23,88,9,20,22,14,19,37,92,48,126,28,21  },
-		{ 7,1,10,32,108,103,94,47,8,53,25,14,34,115,100,129,121,130,148,42,64,116,63,26,44,0,24,30,113,4,104,22  },
-		{ 47,134,7,14,55,69,64,95,1,29,85,118,56,116,45,57,102,143,50,90,42,30,16,94,0,8,67,75,133,2,18,48  },
-		{ 12,1,2,0,7,6,28,8,14,5,3,4,40,21,17,18,60,86,16,93,126,10,9,29,99,38,119,25,19,54,27,84  },
-		{ 59,16,27,18,23,88,79,37,46,66,38,20,73,126,3,43,48,87,92,51,41,12,19,5,52,107,65,0,151,122,54,2  },
-		{ 1,21,147,7,119,14,76,132,55,0,86,145,2,6,69,67,16,143,111,138,17,28,29,60,18,93,8,19,40,56,84,5  },
-		{ 144,86,112,2,68,102,69,0,149,93,75,28,57,55,145,60,21,67,99,134,143,40,146,119,82,110,62,6,29,26,78,14  },
-		{ 102,57,55,69,143,75,146,67,56,68,134,2,29,141,0,21,6,14,133,118,64,1,7,95,47,84,111,28,147,82,72,119  },
-		{ 0,70,57,119,50,145,2,86,28,118,69,78,149,47,60,68,67,55,93,81,134,21,14,62,64,7,5,1,132,85,41,16  },
-		{ 51,5,43,71,122,87,41,37,91,39,0,22,33,36,38,24,66,120,62,2,80,16,92,10,59,4,27,23,35,79,8,3  },
-		{ 12,1,2,0,7,6,28,5,8,14,3,21,40,4,60,17,86,18,16,93,10,9,126,119,99,29,19,41,38,27,25,92  },
-		{ 27,18,46,126,23,16,88,79,20,151,59,73,48,38,0,54,12,2,37,1,19,5,28,60,66,41,3,109,86,65,40,6  },
-		{ 48,79,4,33,16,74,65,38,88,27,91,52,18,36,22,19,46,0,37,3,51,5,71,39,72,43,24,41,92,87,2,10  },
-		{ 86,2,144,93,28,112,141,6,102,21,99,60,75,0,68,82,69,146,67,149,55,40,145,76,111,147,56,119,110,143,26,132  },
-		{ 6,138,2,99,86,17,40,93,28,21,145,141,0,60,119,147,128,76,67,54,1,12,5,27,144,14,38,98,146,41,29,19  },
-		{ 1,8,0,10,2,29,7,5,3,56,4,25,14,152,63,32,65,72,96,42,34,108,48,9,26,16,84,103,67,148,22,129  },
-		{ 149,145,0,86,2,28,93,144,62,60,119,101,21,41,5,35,78,99,26,40,12,68,57,67,110,120,69,18,55,76,132,70  },
-		{ 12,28,16,1,48,19,6,60,2,14,18,21,0,27,46,65,86,29,5,7,72,93,40,3,17,84,56,88,126,4,38,8  },
-		{ 1,8,5,10,7,24,2,62,0,41,22,122,120,9,4,3,32,87,11,37,38,83,100,44,25,104,16,26,39,80,14,6  },
-		{ 0,119,62,86,145,149,28,132,93,2,120,67,60,41,35,5,144,21,123,38,111,81,84,56,12,44,24,50,92,55,40,22  },
-		{ 2,93,99,28,40,144,60,0,86,150,76,21,149,98,6,25,1,61,82,26,12,5,54,141,7,18,145,16,27,138,110,38  },
-		{ 24,8,10,22,32,35,100,5,1,53,0,7,71,80,30,123,83,104,51,11,2,39,44,113,9,62,25,103,34,101,43,41  },
-		{ 12,1,2,0,7,6,28,5,40,60,8,16,3,18,14,4,86,21,17,93,41,10,9,99,27,119,38,19,126,22,48,145  },
-		{ 45,47,50,7,85,90,97,1,64,139,116,118,30,58,14,106,70,111,0,57,94,42,137,142,29,120,8,56,18,134,84,41  },
-		{ 12,0,2,5,27,38,1,46,41,40,79,144,3,22,88,23,28,60,99,62,6,24,26,7,4,16,10,35,37,18,14,20  },
-		{ 37,38,59,92,0,5,23,51,79,41,27,22,2,3,87,16,46,4,1,43,20,33,18,88,24,71,8,10,48,19,126,122  },
-		{ 12,28,16,60,1,18,6,21,19,14,48,0,2,86,93,5,46,29,17,27,65,7,3,72,38,126,119,40,84,37,56,4  },
-		{ 0,2,5,1,16,6,27,28,18,38,60,7,14,21,46,40,86,41,19,48,93,8,3,79,22,4,10,37,62,23,24,111  },
-		{ 85,7,90,30,47,139,45,50,94,58,137,1,8,64,14,116,118,115,113,11,124,108,0,10,97,57,32,70,42,106,29,114  },
-		{ 33,36,22,71,51,5,91,39,0,52,43,24,131,74,16,37,38,122,41,3,87,48,4,104,35,80,10,2,105,62,27,18  },
-		{ 12,1,27,2,0,16,3,28,46,18,4,6,5,72,21,79,38,7,14,60,88,8,65,19,48,29,23,40,22,20,86,126  },
-		{ 0,12,2,27,5,38,46,41,1,40,79,3,88,23,22,99,20,37,62,4,18,6,16,35,60,28,24,7,92,8,14,10  },
-		{ 7,47,1,30,137,8,116,94,90,64,14,115,108,118,57,10,148,113,42,85,32,11,63,50,103,45,124,134,55,9,69,34  },
-		{ 55,7,1,29,56,143,64,47,67,133,14,146,95,72,84,8,116,111,6,134,141,21,65,0,69,30,16,45,85,42,50,10  },
-		{ 14,1,42,8,10,29,108,63,55,148,95,32,7,19,25,115,103,34,56,129,77,0,16,152,94,30,113,26,2,5,48,4  },
-		{ 111,120,142,97,58,0,41,45,62,132,114,84,139,30,5,8,38,2,7,85,119,90,117,1,124,11,56,47,28,27,35,72  },
-		{ 1,0,14,2,6,5,16,19,7,29,42,18,3,25,12,35,21,8,26,17,40,4,20,48,109,99,22,96,55,101,10,61  },
-		{ 12,0,1,5,3,2,4,7,27,8,38,6,40,18,16,10,20,46,9,41,23,22,79,14,62,19,37,126,88,11,92,48  },
-		{ 10,8,104,39,24,32,22,83,44,100,30,130,53,91,113,5,11,1,35,33,7,49,0,2,103,71,36,124,9,80,131,34  },
-		{ 1,7,0,14,8,34,5,25,35,26,6,63,10,123,2,16,103,19,44,32,135,121,108,80,62,30,115,94,149,144,53,18  },
-		{ 75,68,146,141,102,67,2,21,6,57,69,143,0,55,82,86,28,144,147,29,93,112,56,119,133,14,76,60,84,134,111,145  },
-		{ 10,32,115,7,8,53,1,108,30,113,94,137,100,63,90,34,130,103,121,47,44,25,104,39,24,26,85,14,49,36,22,131  },
-		{ 39,24,10,22,8,130,91,104,83,49,5,33,100,11,0,35,32,131,71,36,9,44,53,2,80,51,30,1,41,7,43,62  },
-		{ 38,36,65,105,27,72,31,79,41,131,5,48,125,39,0,16,92,46,22,13,18,84,24,37,88,2,33,74,91,71,130,49  },
-		{ 0,106,62,50,45,119,85,81,132,28,2,86,41,47,38,60,35,117,5,29,7,30,145,90,55,70,14,111,18,67,93,56  },
-		{ 0,2,5,1,3,25,19,26,4,34,29,10,22,16,8,7,24,14,48,65,53,18,6,77,44,56,72,61,121,21,136,40  },
-		{ 7,1,94,8,47,115,10,32,113,103,30,108,137,63,14,64,116,148,129,42,90,25,34,118,53,57,11,49,85,9,96,50  },
-		{ 14,0,1,26,19,5,42,2,25,24,29,22,6,44,61,16,7,96,136,3,140,34,35,55,135,18,48,77,83,4,8,10  },
-		{ 1,7,14,0,25,6,34,5,26,16,63,2,19,8,35,101,108,29,94,10,18,42,123,144,129,47,61,21,3,62,149,4  },
-		{ 12,0,2,1,28,5,6,120,7,60,40,16,18,86,27,14,21,93,8,62,41,38,3,17,4,119,99,48,19,126,10,9  },
-		{ 86,144,93,2,28,149,0,60,99,112,110,145,40,21,102,26,75,62,69,1,12,101,119,25,76,67,7,68,55,5,6,14  },
-		{ 8,30,10,32,113,49,115,137,124,103,45,90,7,139,11,1,58,53,130,94,108,100,9,63,85,125,34,47,0,24,44,104  },
-		{ 120,142,111,41,58,114,97,0,11,62,84,124,5,30,8,38,132,127,27,139,92,10,72,45,49,9,28,2,29,56,16,1  },
-		{ 8,113,30,137,7,32,10,90,94,115,1,103,108,63,47,85,49,53,11,45,34,50,14,25,9,124,100,130,139,121,42,26  },
-		{ 64,7,14,47,134,55,1,42,95,69,116,90,94,30,8,29,56,137,45,108,85,10,57,16,102,143,118,19,63,32,11,50  },
-		{ 62,132,0,119,120,41,111,86,35,28,5,84,56,38,2,93,145,60,67,12,92,27,29,72,55,117,21,24,133,149,22,45  },
-		{ 57,68,69,118,134,64,50,47,55,14,7,2,102,144,0,112,70,86,85,1,95,29,116,143,42,75,16,56,28,45,21,48  },
-		{ 0,12,2,1,5,28,6,40,60,27,7,38,16,14,86,18,93,41,62,46,99,35,8,23,3,17,22,21,10,19,79,20  },
-		{ 12,1,2,27,16,3,38,111,4,0,18,5,7,46,40,8,79,6,14,28,88,10,48,41,19,84,21,9,22,23,20,72  },
-		{ 53,103,32,7,1,100,22,63,71,44,10,115,108,24,92,104,26,30,122,94,8,39,83,34,137,135,90,91,121,5,87,47  },
-		{ 87,37,41,0,22,38,2,92,1,24,4,8,3,59,10,5,39,23,71,79,122,27,16,46,33,7,91,20,18,51,9,120  },
-		{ 1,7,8,10,0,5,35,32,53,44,14,30,2,80,25,34,6,62,26,103,16,19,63,9,149,24,121,41,22,11,113,83  },
-		{ 11,58,8,30,124,49,10,113,9,114,139,45,97,32,7,137,90,1,0,130,115,125,100,24,5,94,53,41,14,13,35,38  },
-		{ 125,105,9,36,131,49,8,130,39,11,10,5,22,38,41,104,0,31,13,24,27,16,2,72,65,91,48,32,84,18,100,74  },
-		{ 12,1,0,2,6,3,7,5,4,8,14,28,16,60,18,10,21,17,19,9,40,27,86,93,29,38,54,11,25,48,46,41  },
-		{ 84,41,38,72,92,29,111,5,65,120,79,0,27,56,48,14,132,16,119,22,86,88,46,28,62,12,1,2,93,18,24,127  },
-		{ 99,28,40,60,2,93,138,0,98,17,86,54,76,12,27,1,21,144,128,38,5,14,46,18,25,16,109,6,41,145,7,29  },
-		{ 1,63,10,32,148,14,103,34,42,7,8,108,116,53,64,96,25,121,26,94,140,0,29,19,55,24,100,136,5,4,44,115  },
-		{ 131,100,130,49,10,8,36,104,39,0,48,41,11,38,4,24,27,22,16,44,79,5,33,2,53,9,125,74,91,120,32,83  },
-		{ 36,39,131,74,4,91,22,33,125,104,130,48,10,24,16,5,49,8,100,105,79,0,9,65,71,2,18,83,31,11,19,44  },
-		{ 0,12,2,1,6,5,7,28,40,60,16,14,18,62,86,27,93,8,17,38,21,41,35,99,3,19,10,23,22,4,9,48  },
-		{ 1,7,67,14,21,147,111,55,132,119,0,8,2,76,64,16,47,84,6,18,86,95,145,10,42,29,133,5,56,134,17,72  },
-		{ 69,55,47,134,102,143,7,57,118,95,14,64,29,56,1,50,75,67,146,2,0,133,68,16,21,6,141,85,116,18,72,65  },
-		{ 1,44,7,24,83,63,34,103,22,121,53,32,25,35,0,115,108,5,14,8,10,101,94,30,2,123,110,26,137,47,90,19  },
-		{ 14,1,25,42,34,0,26,96,19,29,140,5,53,10,2,121,3,24,44,22,55,77,129,7,63,16,8,4,6,61,100,48  },
-		{ 30,90,7,8,137,94,85,1,47,113,115,108,45,139,124,11,10,32,50,58,103,14,63,64,9,116,49,42,25,148,0,53  },
-		{ 40,99,2,60,28,17,0,54,93,98,86,138,6,12,21,76,1,5,27,144,128,38,19,46,14,41,145,7,16,67,3,109  },
-		{ 45,58,30,139,90,7,85,137,97,8,124,47,1,11,106,114,50,94,0,113,10,115,14,32,9,64,108,41,49,29,62,116  },
-		{ 14,42,10,1,63,96,32,25,34,8,129,29,0,103,55,19,26,53,77,5,95,2,4,7,3,16,148,56,18,24,121,108  },
-		{ 21,2,75,86,6,76,144,28,119,99,93,147,141,67,102,145,60,132,146,128,0,82,40,138,55,111,143,17,133,112,69,14  },
-		{ 111,120,41,62,84,132,0,5,38,119,56,92,72,142,27,28,29,35,58,80,2,86,65,79,12,14,1,24,145,16,21,48  },
-		{ 146,67,141,69,133,21,6,143,57,55,111,147,56,1,14,132,7,2,134,102,0,119,29,84,76,64,86,72,28,68,47,75  },
-		{ 12,1,0,5,27,3,7,4,38,8,6,41,16,40,46,10,18,79,2,9,23,86,20,22,62,14,37,88,92,19,24,11  },
-		{ 0,12,2,1,27,5,38,28,60,6,40,7,16,46,18,14,41,99,93,62,3,79,86,23,149,8,22,35,88,17,19,10  },
-		{ 141,6,21,67,147,102,146,2,76,119,132,69,55,111,86,75,28,133,143,0,1,145,14,128,56,99,17,60,29,93,84,68  },
-		{ 21,76,1,119,86,145,2,0,14,7,6,138,146,55,17,28,132,93,67,40,60,143,29,147,111,16,69,141,5,56,19,133  },
-		{ 1,8,108,14,7,116,64,42,10,63,94,32,115,103,113,96,30,34,55,47,95,148,29,140,129,25,134,53,69,26,19,11  },
-		{ 12,1,3,5,4,2,0,7,8,38,27,16,18,6,10,20,41,40,79,46,9,23,22,88,92,37,14,24,62,19,48,99  },
-		{ 1,14,7,0,6,25,5,16,19,2,42,26,29,35,61,8,18,129,101,21,3,110,34,148,96,10,17,4,22,40,12,20  },
-		{ 0,2,5,1,3,19,22,26,16,24,29,7,14,6,4,25,18,44,8,48,12,61,20,21,10,35,65,56,23,40,17,107  },
-		{ 1,7,8,29,56,0,10,14,2,42,72,5,4,65,3,30,84,94,67,9,25,133,111,11,32,108,16,63,21,96,26,48  }
-	};
-
-	static inline uint32_t iabs(int32_t i) { return (i < 0) ? static_cast<uint32_t>(-i) : static_cast<uint32_t>(i);	}
-	static inline uint64_t iabs(int64_t i) { return (i < 0) ? static_cast<uint64_t>(-i) : static_cast<uint64_t>(i);	}
-
-	static inline uint8_t to_5(uint32_t v) { v = v * 31 + 128; return (uint8_t)((v + (v >> 8)) >> 8); }
-	static inline uint8_t to_6(uint32_t v) { v = v * 63 + 128; return (uint8_t)((v + (v >> 8)) >> 8); }
-
-	template <typename S> inline S maximum(S a, S b) { return (a > b) ? a : b; }
-	template <typename S> inline S maximum(S a, S b, S c) { return maximum(maximum(a, b), c); }
-	template <typename S> inline S maximum(S a, S b, S c, S d) { return maximum(maximum(maximum(a, b), c), d); }
-	
-	template <typename S> inline S minimum(S a, S b) {	return (a < b) ? a : b; }
-	template <typename S> inline S minimum(S a, S b, S c) {	return minimum(minimum(a, b), c); }
-	template <typename S> inline S minimum(S a, S b, S c, S d) { return minimum(minimum(minimum(a, b), c), d); }
-
-	template<typename T> inline T square(T a) { return a * a; }
-
-	static inline float clampf(float value, float low, float high) { if (value < low) value = low; else if (value > high) value = high;	return value; }
-	static inline uint8_t clamp255(int32_t i) { return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i); }
-
-	template <typename S> inline S clamp(S value, S low, S high) { return (value < low) ? low : ((value > high) ? high : value); }
-	static inline int32_t clampi(int32_t value, int32_t low, int32_t high) { if (value < low) value = low; else if (value > high) value = high;	return value; }
-
-	static inline int squarei(int a) { return a * a; }
-	static inline int absi(int a) { return (a < 0) ? -a : a; }
-
-	template<typename F> inline F lerp(F a, F b, F s) { return a + (b - a) * s; }
-
-	enum class eNoClamp { cNoClamp };
-
-	struct color32
-	{
-		union
-		{
-			struct
-			{
-				uint8_t r;
-				uint8_t g;
-				uint8_t b;
-				uint8_t a;
-			};
-
-			uint8_t c[4];
-			
-			uint32_t m;
-		};
-
-		color32() { }
-
-		color32(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); }
-		color32(eNoClamp unused, uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { (void)unused; set_noclamp_rgba(vr, vg, vb, va); }
-
-		void set(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { c[0] = static_cast<uint8_t>(vr); c[1] = static_cast<uint8_t>(vg); c[2] = static_cast<uint8_t>(vb); c[3] = static_cast<uint8_t>(va); }
-
-		void set_noclamp_rgb(uint32_t vr, uint32_t vg, uint32_t vb) { c[0] = static_cast<uint8_t>(vr); c[1] = static_cast<uint8_t>(vg); c[2] = static_cast<uint8_t>(vb); }
-		void set_noclamp_rgba(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); }
-
-		void set_clamped(int vr, int vg, int vb, int va) { c[0] = clamp255(vr); c[1] = clamp255(vg);	c[2] = clamp255(vb); c[3] = clamp255(va); }
-
-		uint8_t operator[] (uint32_t idx) const { assert(idx < 4); return c[idx]; }
-		uint8_t &operator[] (uint32_t idx) { assert(idx < 4); return c[idx]; }
-
-		bool operator== (const color32&rhs) const { return m == rhs.m; }
-
-		void set_rgb(const color32& other) { c[0] = static_cast<uint8_t>(other.c[0]); c[1] = static_cast<uint8_t>(other.c[1]); c[2] = static_cast<uint8_t>(other.c[2]); }
-
-		static color32 comp_min(const color32& a, const color32& b) { return color32(eNoClamp::cNoClamp, std::min(a[0], b[0]), std::min(a[1], b[1]), std::min(a[2], b[2]), std::min(a[3], b[3])); }
-		static color32 comp_max(const color32& a, const color32& b) { return color32(eNoClamp::cNoClamp, std::max(a[0], b[0]), std::max(a[1], b[1]), std::max(a[2], b[2]), std::max(a[3], b[3])); }
-	};
-	
+	// Rate Distortion Optimization (RDO)
 	enum dxt_constants
 	{
 		cDXT1SelectorBits = 2U, cDXT1SelectorValues = 1U << cDXT1SelectorBits, cDXT1SelectorMask = cDXT1SelectorValues - 1U,
@@ -1491,7 +294,7 @@ namespace rgbcx
 		uint8_t m_low_color[cTotalEndpointBytes];
 		uint8_t m_high_color[cTotalEndpointBytes];
 		uint8_t m_selectors[cTotalSelectorBytes];
-		
+
 		inline uint32_t get_low_color() const { return m_low_color[0] | (m_low_color[1] << 8U); }
 		inline uint32_t get_high_color() const { return m_high_color[0] | (m_high_color[1] << 8U); }
 		inline bool is_3color() const { return get_low_color() <= get_high_color(); }
@@ -1500,6 +303,25 @@ namespace rgbcx
 		inline uint32_t get_selector(uint32_t x, uint32_t y) const { assert((x < 4U) && (y < 4U)); return (m_selectors[y] >> (x * cDXT1SelectorBits)) & cDXT1SelectorMask; }
 		inline void set_selector(uint32_t x, uint32_t y, uint32_t val) { assert((x < 4U) && (y < 4U) && (val < 4U)); m_selectors[y] &= (~(cDXT1SelectorMask << (x * cDXT1SelectorBits))); m_selectors[y] |= (val << (x * cDXT1SelectorBits)); }
 
+		inline uint32_t get_endpoint_bits() const { return m_low_color[0] | (m_low_color[1] << 8) | (m_high_color[0] << 16) | (m_high_color[1] << 24); }
+		inline void set_endpoint_bits(uint32_t s) { m_low_color[0] = (uint8_t)s; m_low_color[1] = (uint8_t)(s >> 8); m_high_color[0] = (uint8_t)(s >> 16); m_high_color[1] = (uint8_t)(s >> 24); }
+
+		inline uint32_t get_selector_bits() const { return m_selectors[0] | (m_selectors[1] << 8) | (m_selectors[2] << 16) | (m_selectors[3] << 24); }
+		inline void set_selector_bits(uint32_t s) { m_selectors[0] = (uint8_t)s; m_selectors[1] = (uint8_t)(s >> 8); m_selectors[2] = (uint8_t)(s >> 16); m_selectors[3] = (uint8_t)(s >> 24); }
+
+		inline bool any_selectors_transparent() const
+		{
+			uint32_t sel_bits = get_selector_bits();
+			for (uint32_t i = 0; i < 16; i++)
+			{
+				if ((sel_bits & 3) == 3)
+					return true;
+
+				sel_bits >>= 2;
+			}
+			return false;
+		}
+
 		static inline uint16_t pack_color(const color32& color, bool scaled, uint32_t bias = 127U)
 		{
 			uint32_t r = color.r, g = color.g, b = color.b;
@@ -1533,2650 +355,78 @@ namespace rgbcx
 		}
 	};
 
-	static const uint32_t TOTAL_ORDER_4_0_16 = 15;
-	static const uint32_t TOTAL_ORDER_4_1_16 = 700;
-	static const uint32_t TOTAL_ORDER_4_2_16 = 753;
-	static const uint32_t TOTAL_ORDER_4_3_16 = 515;
-	static uint16_t g_total_ordering4_hash[4096];
-	static float g_selector_factors4[NUM_UNIQUE_TOTAL_ORDERINGS4][3];
-
-	static const uint32_t TOTAL_ORDER_3_0_16 = 12;
-	static const uint32_t TOTAL_ORDER_3_1_16 = 15;
-	static const uint32_t TOTAL_ORDER_3_2_16 = 89;
-	static uint16_t g_total_ordering3_hash[256];
-	static float g_selector_factors3[NUM_UNIQUE_TOTAL_ORDERINGS3][3];
-		
-	struct hist4
-	{
-		uint8_t m_hist[4];
-
-		hist4() 
-		{ 
-			memset(m_hist, 0, sizeof(m_hist)); 
-		}
-
-		hist4(uint32_t i, uint32_t j, uint32_t k, uint32_t l)
-		{
-			m_hist[0] = (uint8_t)i;
-			m_hist[1] = (uint8_t)j;
-			m_hist[2] = (uint8_t)k;
-			m_hist[3] = (uint8_t)l;
-		}
-
-		inline bool operator== (const hist4 &h) const
-		{
-			if (m_hist[0] != h.m_hist[0]) return false;
-			if (m_hist[1] != h.m_hist[1]) return false;
-			if (m_hist[2] != h.m_hist[2]) return false;
-			if (m_hist[3] != h.m_hist[3]) return false;
-			return true;
-		}
-
-		inline bool any_16() const 
-		{
-			return (m_hist[0] == 16) || (m_hist[1] == 16) || (m_hist[2] == 16) || (m_hist[3] == 16);
-		}
-				
-		inline uint32_t lookup_total_ordering_index() const
-		{
-			if (m_hist[0] == 16)
-				return TOTAL_ORDER_4_0_16;
-			else if (m_hist[1] == 16)
-				return TOTAL_ORDER_4_1_16;
-			else if (m_hist[2] == 16)
-				return TOTAL_ORDER_4_2_16;
-			else if (m_hist[3] == 16)
-				return TOTAL_ORDER_4_3_16;
-
-			// Must sum to 16, so m_hist[3] isn't needed.
-			return g_total_ordering4_hash[m_hist[0] | (m_hist[1] << 4) | (m_hist[2] << 8)];
-		}
-	};
-
-	struct hist3
-	{
-		uint8_t m_hist[3];
-
-		hist3() 
-		{ 
-			memset(m_hist, 0, sizeof(m_hist)); 
-		}
-
-		hist3(uint32_t i, uint32_t j, uint32_t k)
-		{
-			m_hist[0] = (uint8_t)i;
-			m_hist[1] = (uint8_t)j;
-			m_hist[2] = (uint8_t)k;
-		}
-
-		inline bool operator== (const hist3 &h) const
-		{
-			if (m_hist[0] != h.m_hist[0]) return false;
-			if (m_hist[1] != h.m_hist[1]) return false;
-			if (m_hist[2] != h.m_hist[2]) return false;
-			return true;
-		}
-
-		inline bool any_16() const 
-		{
-			return (m_hist[0] == 16) || (m_hist[1] == 16) || (m_hist[2] == 16);
-		}
-				
-		inline uint32_t lookup_total_ordering_index() const
-		{
-			if (m_hist[0] == 16)
-				return TOTAL_ORDER_3_0_16;
-			else if (m_hist[1] == 16)
-				return TOTAL_ORDER_3_1_16;
-			else if (m_hist[2] == 16)
-				return TOTAL_ORDER_3_2_16;
-
-			// Must sum to 16, so m_hist[2] isn't needed.
-			return g_total_ordering3_hash[m_hist[0] | (m_hist[1] << 4)];
-		}
-	};
-
-	struct bc1_match_entry
+	struct bc4_block
 	{
-		uint8_t m_hi;
-		uint8_t m_lo;
-		uint8_t m_e;
-	};
-
-	static bc1_approx_mode g_bc1_approx_mode;
-	static bc1_match_entry g_bc1_match5_equals_1[256], g_bc1_match6_equals_1[256];
-	static bc1_match_entry g_bc1_match5_half[256], g_bc1_match6_half[256]; 
-
-	static inline int scale_5_to_8(int v) { return (v << 3) | (v >> 2); }
-	static inline int scale_6_to_8(int v) { return (v << 2) | (v >> 4); }
-
-	// v0, v1 = unexpanded DXT1 endpoint values (5/6-bits)
-	// c0, c1 = expanded DXT1 endpoint values (8-bits)
-	static inline int interp_5_6_ideal(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 * 2 + c1) / 3; }
-	static inline int interp_5_6_ideal_round(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 * 2 + c1 + 1) / 3; }
-	static inline int interp_half_5_6_ideal(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 + c1) / 2; }
-
-	static inline int interp_5_nv(int v0, int v1) {	assert(v0 < 32 && v1 < 32); return ((2 * v0 + v1) * 22) / 8; }
-	static inline int interp_6_nv(int c0, int c1) {	assert(c0 < 256 && c1 < 256); const int gdiff = c1 - c0; return (256 * c0 + (gdiff / 4) + 128 + gdiff * 80) / 256; }
+		enum { cBC4SelectorBits = 3, cTotalSelectorBytes = 6, cMaxSelectorValues = 8 };
+		uint8_t m_endpoints[2];
 
-	static inline int interp_half_5_nv(int v0, int v1) { assert(v0 < 32 && v1 < 32); return ((v0 + v1) * 33) / 8; }
-	static inline int interp_half_6_nv(int c0, int c1) { assert(c0 < 256 && c1 < 256); const int gdiff = c1 - c0; return (256 * c0 + gdiff/4 + 128 + gdiff * 128) / 256; }
+		uint8_t m_selectors[cTotalSelectorBytes];
 
-	static inline int interp_5_6_amd(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 * 43 + c1 * 21 + 32) >> 6; }
-	static inline int interp_half_5_6_amd(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 + c1 + 1) >> 1; }
+		inline uint32_t get_low_alpha() const { return m_endpoints[0]; }
+		inline uint32_t get_high_alpha() const { return m_endpoints[1]; }
+		inline bool is_alpha6_block() const { return get_low_alpha() <= get_high_alpha(); }
 
-	static inline int interp_5(int v0, int v1, int c0, int c1, bc1_approx_mode mode) 
-	{ 
-		assert(scale_5_to_8(v0) == c0 && scale_5_to_8(v1) == c1);
-		switch (mode)
+		inline uint64_t get_selector_bits() const
 		{
-		case bc1_approx_mode::cBC1NVidia: return interp_5_nv(v0, v1); 
-		case bc1_approx_mode::cBC1AMD: return interp_5_6_amd(c0, c1);
-		default:
-		case bc1_approx_mode::cBC1Ideal: return interp_5_6_ideal(c0, c1);		
-		case bc1_approx_mode::cBC1IdealRound4: return interp_5_6_ideal_round(c0, c1);		
+			return ((uint64_t)((uint32_t)m_selectors[0] | ((uint32_t)m_selectors[1] << 8U) | ((uint32_t)m_selectors[2] << 16U) | ((uint32_t)m_selectors[3] << 24U))) |
+				(((uint64_t)m_selectors[4]) << 32U) |
+				(((uint64_t)m_selectors[5]) << 40U);
 		}
-	}
 
-	static inline int interp_6(int v0, int v1, int c0, int c1, bc1_approx_mode mode) 
-	{ 
-		(void)v0; (void)v1;
-		assert(scale_6_to_8(v0) == c0 && scale_6_to_8(v1) == c1);
-		switch (mode)
+		inline void set_selector_bits(uint64_t v)
 		{
-		case bc1_approx_mode::cBC1NVidia: return interp_6_nv(c0, c1); 
-		case bc1_approx_mode::cBC1AMD: return interp_5_6_amd(c0, c1);
-		default:
-		case bc1_approx_mode::cBC1Ideal: return interp_5_6_ideal(c0, c1);		
-		case bc1_approx_mode::cBC1IdealRound4: return interp_5_6_ideal_round(c0, c1);		
+			for (uint32_t i = 0; i < 6; i++)
+			{
+				m_selectors[i] = (uint8_t)v;
+				v >>= 8;
+			}
 		}
-	}
 
-	static inline int interp_half_5(int v0, int v1, int c0, int c1, bc1_approx_mode mode) 
-	{ 
-		assert(scale_5_to_8(v0) == c0 && scale_5_to_8(v1) == c1);
-		switch (mode)
+		inline uint32_t get_selector(uint32_t x, uint32_t y, uint64_t selector_bits) const
 		{
-		case bc1_approx_mode::cBC1NVidia: return interp_half_5_nv(v0, v1); 
-		case bc1_approx_mode::cBC1AMD: return interp_half_5_6_amd(c0, c1);
-		case bc1_approx_mode::cBC1Ideal: 
-		case bc1_approx_mode::cBC1IdealRound4: 
-		default:
-			return interp_half_5_6_ideal(c0, c1);		
+			assert((x < 4U) && (y < 4U));
+			return (selector_bits >> (((y * 4) + x) * cBC4SelectorBits)) & (cMaxSelectorValues - 1);
 		}
-	}
 
-	static inline int interp_half_6(int v0, int v1, int c0, int c1, bc1_approx_mode mode) 
-	{ 
-		(void)v0; (void)v1;
-		assert(scale_6_to_8(v0) == c0 && scale_6_to_8(v1) == c1);
-		switch (mode)
+		static inline uint32_t get_block_values6(uint8_t* pDst, uint32_t l, uint32_t h)
 		{
-		case bc1_approx_mode::cBC1NVidia: return interp_half_6_nv(c0, c1); 
-		case bc1_approx_mode::cBC1AMD: return interp_half_5_6_amd(c0, c1);
-		case bc1_approx_mode::cBC1Ideal: 
-		case bc1_approx_mode::cBC1IdealRound4: 
-		default:
-			return interp_half_5_6_ideal(c0, c1);		
+			pDst[0] = static_cast<uint8_t>(l);
+			pDst[1] = static_cast<uint8_t>(h);
+			pDst[2] = static_cast<uint8_t>((l * 4 + h) / 5);
+			pDst[3] = static_cast<uint8_t>((l * 3 + h * 2) / 5);
+			pDst[4] = static_cast<uint8_t>((l * 2 + h * 3) / 5);
+			pDst[5] = static_cast<uint8_t>((l + h * 4) / 5);
+			pDst[6] = 0;
+			pDst[7] = 255;
+			return 6;
 		}
-	}
 
-	static void prepare_bc1_single_color_table_half(bc1_match_entry* pTable, const uint8_t* pExpand, int size, bc1_approx_mode mode)
-	{
-		for (int i = 0; i < 256; i++)
+		static inline uint32_t get_block_values8(uint8_t* pDst, uint32_t l, uint32_t h)
 		{
-			int lowest_e = 256;
-			for (int lo = 0; lo < size; lo++)
-			{
-				const int lo_e = pExpand[lo];
-
-				for (int hi = 0; hi < size; hi++)
-				{
-					const int hi_e = pExpand[hi];
-										
-					const int v = (size == 32) ? interp_half_5(hi, lo, hi_e, lo_e, mode) : interp_half_6(hi, lo, hi_e, lo_e, mode);
-											
-					int e = iabs(v - i);
-
-					// We only need to factor in 3% error in BC1 ideal mode.
-					if ((mode == bc1_approx_mode::cBC1Ideal) || (mode == bc1_approx_mode::cBC1IdealRound4))
-						e += (iabs(hi_e - lo_e) * 3) / 100;
-
-					// Favor equal endpoints, for lower error on actual GPU's which approximate the interpolation.
-					if ((e < lowest_e) || ((e == lowest_e) && (lo == hi)))
-					{
-						pTable[i].m_hi = static_cast<uint8_t>(hi);
-						pTable[i].m_lo = static_cast<uint8_t>(lo);
-						
-						assert(e <= UINT8_MAX);
-						pTable[i].m_e = static_cast<uint8_t>(e);
-
-						lowest_e = e;
-					}
-
-				} // hi
-			} // lo
+			pDst[0] = static_cast<uint8_t>(l);
+			pDst[1] = static_cast<uint8_t>(h);
+			pDst[2] = static_cast<uint8_t>((l * 6 + h) / 7);
+			pDst[3] = static_cast<uint8_t>((l * 5 + h * 2) / 7);
+			pDst[4] = static_cast<uint8_t>((l * 4 + h * 3) / 7);
+			pDst[5] = static_cast<uint8_t>((l * 3 + h * 4) / 7);
+			pDst[6] = static_cast<uint8_t>((l * 2 + h * 5) / 7);
+			pDst[7] = static_cast<uint8_t>((l + h * 6) / 7);
+			return 8;
 		}
-	}
 
-	static void prepare_bc1_single_color_table(bc1_match_entry* pTable, const uint8_t* pExpand, int size, bc1_approx_mode mode)
-	{
-		for (int i = 0; i < 256; i++)
+		static inline uint32_t get_block_values(uint8_t* pDst, uint32_t l, uint32_t h)
 		{
-			int lowest_e = 256;
-			for (int lo = 0; lo < size; lo++)
-			{
-				const int lo_e = pExpand[lo];
-
-				for (int hi = 0; hi < size; hi++)
-				{
-					const int hi_e = pExpand[hi];
-
-					const int v = (size == 32) ? interp_5(hi, lo, hi_e, lo_e, mode) : interp_6(hi, lo, hi_e, lo_e, mode);
-					
-					int e = iabs(v - i);
-
-					if ((mode == bc1_approx_mode::cBC1Ideal) || (mode == bc1_approx_mode::cBC1IdealRound4))
-						e += (iabs(hi_e - lo_e) * 3) / 100;
-
-					// Favor equal endpoints, for lower error on actual GPU's which approximate the interpolation.
-					if ((e < lowest_e) || ((e == lowest_e) && (lo == hi)))
-					{
-						pTable[i].m_hi = static_cast<uint8_t>(hi);
-						pTable[i].m_lo = static_cast<uint8_t>(lo);
-						
-						assert(e <= UINT8_MAX);
-						pTable[i].m_e = static_cast<uint8_t>(e);
-
-						lowest_e = e;
-					}
-
-				} // hi
-			} // lo
+			if (l > h)
+				return get_block_values8(pDst, l, h);
+			else
+				return get_block_values6(pDst, l, h);
 		}
-	}
-
-	// This table is: 9 * (w * w), 9 * ((1.0f - w) * w), 9 * ((1.0f - w) * (1.0f - w))
-	// where w is [0,1/3,2/3,1]. 9 is the perfect multiplier.
-	static const uint32_t g_weight_vals4[4] = { 0x000009, 0x010204, 0x040201, 0x090000 };
-	
-	// multiplier is 4 for 3-color
-	static const uint32_t g_weight_vals3[3] = { 0x000004, 0x040000, 0x010101 };
-
-	static inline void compute_selector_factors4(const hist4 &h, float &iz00, float &iz10, float &iz11)
-	{
-		uint32_t weight_accum = 0;
-		for (uint32_t sel = 0; sel < 4; sel++)
-			weight_accum += g_weight_vals4[sel] * h.m_hist[sel];
-
-		float z00 = (float)((weight_accum >> 16) & 0xFF);
-		float z10 = (float)((weight_accum >> 8) & 0xFF);
-		float z11 = (float)(weight_accum & 0xFF);
-		float z01 = z10;
-
-		float det = z00 * z11 - z01 * z10;
-		if (fabs(det) < 1e-8f)
-			det = 0.0f;
-		else
-			det = (3.0f / 255.0f) / det;
-				
-		iz00 = z11 * det;
-		iz10 = -z10 * det;
-		iz11 = z00 * det;
-	}
-
-	static inline void compute_selector_factors3(const hist3 &h, float &iz00, float &iz10, float &iz11)
-	{
-		uint32_t weight_accum = 0;
-		for (uint32_t sel = 0; sel < 3; sel++)
-			weight_accum += g_weight_vals3[sel] * h.m_hist[sel];
-
-		float z00 = (float)((weight_accum >> 16) & 0xFF);
-		float z10 = (float)((weight_accum >> 8) & 0xFF);
-		float z11 = (float)(weight_accum & 0xFF);
-		float z01 = z10;
+	};
 
-		float det = z00 * z11 - z01 * z10;
-		if (fabs(det) < 1e-8f)
-			det = 0.0f;
-		else
-			det = (2.0f / 255.0f) / det;
-				
-		iz00 = z11 * det;
-		iz10 = -z10 * det;
-		iz11 = z00 * det;
-	}
-
-	static bool g_initialized;
-		
-	void init(bc1_approx_mode mode)
-	{
-		g_bc1_approx_mode = mode;
-								
-		uint8_t bc1_expand5[32];
-		for (int i = 0; i < 32; i++)
-			bc1_expand5[i] = static_cast<uint8_t>((i << 3) | (i >> 2));
-		prepare_bc1_single_color_table(g_bc1_match5_equals_1, bc1_expand5, 32, mode);
-		prepare_bc1_single_color_table_half(g_bc1_match5_half, bc1_expand5, 32, mode);
-
-		uint8_t bc1_expand6[64];
-		for (int i = 0; i < 64; i++)
-			bc1_expand6[i] = static_cast<uint8_t>((i << 2) | (i >> 4));
-		prepare_bc1_single_color_table(g_bc1_match6_equals_1, bc1_expand6, 64, mode);
-		prepare_bc1_single_color_table_half(g_bc1_match6_half, bc1_expand6, 64, mode);
-
-		for (uint32_t i = 0; i < NUM_UNIQUE_TOTAL_ORDERINGS4; i++)
-		{
-			hist4 h;
-			h.m_hist[0] = (uint8_t)g_unique_total_orders4[i][0];
-			h.m_hist[1] = (uint8_t)g_unique_total_orders4[i][1];
-			h.m_hist[2] = (uint8_t)g_unique_total_orders4[i][2];
-			h.m_hist[3] = (uint8_t)g_unique_total_orders4[i][3];
-			
-			if (!h.any_16())
-			{
-				const uint32_t index = h.m_hist[0] | (h.m_hist[1] << 4) | (h.m_hist[2] << 8);
-				assert(index < 4096);
-				g_total_ordering4_hash[index] = (uint16_t)i;
-			}
-
-			compute_selector_factors4(h, g_selector_factors4[i][0], g_selector_factors4[i][1], g_selector_factors4[i][2]);
-		}
-
-		for (uint32_t i = 0; i < NUM_UNIQUE_TOTAL_ORDERINGS3; i++)
-		{
-			hist3 h;
-			h.m_hist[0] = (uint8_t)g_unique_total_orders3[i][0];
-			h.m_hist[1] = (uint8_t)g_unique_total_orders3[i][1];
-			h.m_hist[2] = (uint8_t)g_unique_total_orders3[i][2];
-			
-			if (!h.any_16())
-			{
-				const uint32_t index = h.m_hist[0] | (h.m_hist[1] << 4);
-				assert(index < 256);
-				g_total_ordering3_hash[index] = (uint16_t)i;
-			}
-
-			compute_selector_factors3(h, g_selector_factors3[i][0], g_selector_factors3[i][1], g_selector_factors3[i][2]);
-		}
-
-		g_initialized = true;
-	}
-	
-	void encode_bc1_solid_block(void* pDst, uint32_t fr, uint32_t fg, uint32_t fb, bool allow_3color) 
-	{
-		bc1_block* pDst_block = static_cast<bc1_block*>(pDst);
-
-		uint32_t mask = 0xAA;
-		int max16 = -1, min16 = 0;
-
-		if (allow_3color)
-		{
-			const uint32_t err4 = g_bc1_match5_equals_1[fr].m_e + g_bc1_match6_equals_1[fg].m_e + g_bc1_match5_equals_1[fb].m_e;
-			const uint32_t err3 = g_bc1_match5_half[fr].m_e + g_bc1_match6_half[fg].m_e + g_bc1_match5_half[fb].m_e;
-
-			if (err3 < err4)
-			{
-				max16 = (g_bc1_match5_half[fr].m_hi << 11) | (g_bc1_match6_half[fg].m_hi << 5) | g_bc1_match5_half[fb].m_hi;
-				min16 = (g_bc1_match5_half[fr].m_lo << 11) | (g_bc1_match6_half[fg].m_lo << 5) | g_bc1_match5_half[fb].m_lo;
-
-				if (max16 > min16)
-					std::swap(max16, min16);
-			}
-		}
-
-		if (max16 == -1)
-		{
-			max16 = (g_bc1_match5_equals_1[fr].m_hi << 11) | (g_bc1_match6_equals_1[fg].m_hi << 5) | g_bc1_match5_equals_1[fb].m_hi;
-			min16 = (g_bc1_match5_equals_1[fr].m_lo << 11) | (g_bc1_match6_equals_1[fg].m_lo << 5) | g_bc1_match5_equals_1[fb].m_lo;
-
-			if (min16 == max16)
-			{
-				// Always forbid 3 color blocks
-				// This is to guarantee that BC3 blocks never use punchthrough alpha (3 color) mode, which isn't supported on some (all?) GPU's.
-				mask = 0;
-
-				// Make l > h
-				if (min16 > 0)
-					min16--;
-				else
-				{
-					// l = h = 0
-					assert(min16 == max16 && max16 == 0);
-
-					max16 = 1;
-					min16 = 0;
-					mask = 0x55;
-				}
-
-				assert(max16 > min16);
-			}
-
-			if (max16 < min16)
-			{
-				std::swap(max16, min16);
-				mask ^= 0x55;
-			}
-		}
-
-		pDst_block->set_low_color(static_cast<uint16_t>(max16));
-		pDst_block->set_high_color(static_cast<uint16_t>(min16));
-		pDst_block->m_selectors[0] = static_cast<uint8_t>(mask);
-		pDst_block->m_selectors[1] = static_cast<uint8_t>(mask);
-		pDst_block->m_selectors[2] = static_cast<uint8_t>(mask);
-		pDst_block->m_selectors[3] = static_cast<uint8_t>(mask);
-	}
-
-	static const float g_midpoint5[32] = { .015686f, .047059f, .078431f, .111765f, .145098f, .176471f, .207843f, .241176f, .274510f, .305882f, .337255f, .370588f, .403922f, .435294f, .466667f, .5f, .533333f, .564706f, .596078f, .629412f, .662745f, .694118f, .725490f, .758824f, .792157f, .823529f, .854902f, .888235f, .921569f, .952941f, .984314f, 1e+37f };
-	static const float g_midpoint6[64] = { .007843f, .023529f, .039216f, .054902f, .070588f, .086275f, .101961f, .117647f, .133333f, .149020f, .164706f, .180392f, .196078f, .211765f, .227451f, .245098f, .262745f, .278431f, .294118f, .309804f, .325490f, .341176f, .356863f, .372549f, .388235f, .403922f, .419608f, .435294f, .450980f, .466667f, .482353f, .500000f, .517647f, .533333f, .549020f, .564706f, .580392f, .596078f, .611765f, .627451f, .643137f, .658824f, .674510f, .690196f, .705882f, .721569f, .737255f, .754902f, .772549f, .788235f, .803922f, .819608f, .835294f, .850980f, .866667f, .882353f, .898039f, .913725f, .929412f, .945098f, .960784f, .976471f, .992157f, 1e+37f };
-
-	struct vec3F { float c[3]; };
-
-	static inline void compute_least_squares_endpoints4_rgb(
-		vec3F* pXl, vec3F* pXh,
-		int total_r, int total_g, int total_b,
-		float iz00, float iz10, float iz11,
-		uint32_t s, const uint32_t r_sum[17], const uint32_t g_sum[17], const uint32_t b_sum[17])
-	{
-		const float iz01 = iz10;
-
-		const uint32_t f1 = g_unique_total_orders4[s][0];
-		const uint32_t f2 = g_unique_total_orders4[s][0] + g_unique_total_orders4[s][1];
-		const uint32_t f3 = g_unique_total_orders4[s][0] + g_unique_total_orders4[s][1] + g_unique_total_orders4[s][2];
-		uint32_t uq00_r = (r_sum[f2] - r_sum[f1]) + (r_sum[f3] - r_sum[f2]) * 2 + (r_sum[16] - r_sum[f3]) * 3;
-		uint32_t uq00_g = (g_sum[f2] - g_sum[f1]) + (g_sum[f3] - g_sum[f2]) * 2 + (g_sum[16] - g_sum[f3]) * 3;
-		uint32_t uq00_b = (b_sum[f2] - b_sum[f1]) + (b_sum[f3] - b_sum[f2]) * 2 + (b_sum[16] - b_sum[f3]) * 3;
-
-		float q10_r = (float)(total_r * 3 - uq00_r);
-		float q10_g = (float)(total_g * 3 - uq00_g);
-		float q10_b = (float)(total_b * 3 - uq00_b);
-
-		pXl->c[0] = iz00 * (float)uq00_r + iz01 * q10_r;
-		pXh->c[0] = iz10 * (float)uq00_r + iz11 * q10_r;
-
-		pXl->c[1] = iz00 * (float)uq00_g + iz01 * q10_g;
-		pXh->c[1] = iz10 * (float)uq00_g + iz11 * q10_g;
-
-		pXl->c[2] = iz00 * (float)uq00_b + iz01 * q10_b;
-		pXh->c[2] = iz10 * (float)uq00_b + iz11 * q10_b;
-	}
-		
-	static inline bool compute_least_squares_endpoints4_rgb(const color32* pColors, const uint8_t* pSelectors, vec3F* pXl, vec3F* pXh, int total_r, int total_g, int total_b)
-	{
-		uint32_t uq00_r = 0, uq00_g = 0, uq00_b = 0;
-		uint32_t weight_accum = 0;
-		for (uint32_t i = 0; i < 16; i++)
-		{
-			const uint8_t r = pColors[i].c[0], g = pColors[i].c[1], b = pColors[i].c[2];
-			const uint8_t sel = pSelectors[i];
-			
-			weight_accum += g_weight_vals4[sel];
-			uq00_r += sel * r;
-			uq00_g += sel * g;
-			uq00_b += sel * b;
-		}
-
-		int q10_r = total_r * 3 - uq00_r;
-		int q10_g = total_g * 3 - uq00_g;
-		int q10_b = total_b * 3 - uq00_b;
-
-		float z00 = (float)((weight_accum >> 16) & 0xFF);
-		float z10 = (float)((weight_accum >> 8) & 0xFF);
-		float z11 = (float)(weight_accum & 0xFF);
-		float z01 = z10;
-
-		float det = z00 * z11 - z01 * z10;
-		if (fabs(det) < 1e-8f)
-			return false;
-
-		det = (3.0f / 255.0f) / det;
-
-		float iz00, iz01, iz10, iz11;
-		iz00 = z11 * det;
-		iz01 = -z01 * det;
-		iz10 = -z10 * det;
-		iz11 = z00 * det;
-
-		pXl->c[0] = iz00 * (float)uq00_r + iz01 * q10_r;
-		pXh->c[0] = iz10 * (float)uq00_r + iz11 * q10_r;
-
-		pXl->c[1] = iz00 * (float)uq00_g + iz01 * q10_g;
-		pXh->c[1] = iz10 * (float)uq00_g + iz11 * q10_g;
-
-		pXl->c[2] = iz00 * (float)uq00_b + iz01 * q10_b;
-		pXh->c[2] = iz10 * (float)uq00_b + iz11 * q10_b;
-
-		return true;
-	}
-
-	static inline void compute_least_squares_endpoints3_rgb(
-		vec3F* pXl, vec3F* pXh,
-		int total_r, int total_g, int total_b,
-		float iz00, float iz10, float iz11,
-		uint32_t s, const uint32_t r_sum[17], const uint32_t g_sum[17], const uint32_t b_sum[17])
-	{
-		const float iz01 = iz10;
-
-		// Compensates for BC1 3-color ordering, which is selector 0, 2, 1
-		const uint32_t f1 = g_unique_total_orders3[s][0];
-		const uint32_t f2 = g_unique_total_orders3[s][0] + g_unique_total_orders3[s][2];
-		uint32_t uq00_r = (r_sum[16] - r_sum[f2]) * 2 + (r_sum[f2] - r_sum[f1]);
-		uint32_t uq00_g = (g_sum[16] - g_sum[f2]) * 2 + (g_sum[f2] - g_sum[f1]);
-		uint32_t uq00_b = (b_sum[16] - b_sum[f2]) * 2 + (b_sum[f2] - b_sum[f1]);
-
-		float q10_r = (float)(total_r * 2 - uq00_r);
-		float q10_g = (float)(total_g * 2 - uq00_g);
-		float q10_b = (float)(total_b * 2 - uq00_b);
-
-		pXl->c[0] = iz00 * (float)uq00_r + iz01 * q10_r;
-		pXh->c[0] = iz10 * (float)uq00_r + iz11 * q10_r;
-
-		pXl->c[1] = iz00 * (float)uq00_g + iz01 * q10_g;
-		pXh->c[1] = iz10 * (float)uq00_g + iz11 * q10_g;
-
-		pXl->c[2] = iz00 * (float)uq00_b + iz01 * q10_b;
-		pXh->c[2] = iz10 * (float)uq00_b + iz11 * q10_b;
-	}
-
-	static inline bool compute_least_squares_endpoints3_rgb(bool use_black, const color32* pColors, const uint8_t* pSelectors, vec3F* pXl, vec3F* pXh)
-	{
-		int uq00_r = 0, uq00_g = 0, uq00_b = 0;
-		uint32_t weight_accum = 0;
-		int total_r = 0, total_g = 0, total_b = 0;
-		for (uint32_t i = 0; i < 16; i++)
-		{
-			const uint8_t r = pColors[i].c[0], g = pColors[i].c[1], b = pColors[i].c[2];
-			if (use_black)
-			{
-				if ((r | g | b) < 4)
-					continue;
-			}
-
-			const uint8_t sel = pSelectors[i];
-			assert(sel <= 3);
-			if (sel == 3)
-				continue;
-						
-			weight_accum += g_weight_vals3[sel];
-
-			static const uint8_t s_tran[3] = { 0, 2, 1 };
-			const uint8_t tsel = s_tran[sel];
-			uq00_r += tsel * r;
-			uq00_g += tsel * g;
-			uq00_b += tsel * b;
-
-			total_r += r;
-			total_g += g;
-			total_b += b;
-		}
-
-		int q10_r = total_r * 2 - uq00_r;
-		int q10_g = total_g * 2 - uq00_g;
-		int q10_b = total_b * 2 - uq00_b;
-
-		float z00 = (float)((weight_accum >> 16) & 0xFF);
-		float z10 = (float)((weight_accum >> 8) & 0xFF);
-		float z11 = (float)(weight_accum & 0xFF);
-		float z01 = z10;
-
-		float det = z00 * z11 - z01 * z10;
-		if (fabs(det) < 1e-8f)
-			return false;
-
-		det = (2.0f / 255.0f) / det;
-
-		float iz00, iz01, iz10, iz11;
-		iz00 = z11 * det;
-		iz01 = -z01 * det;
-		iz10 = -z10 * det;
-		iz11 = z00 * det;
-
-		pXl->c[0] = iz00 * (float)uq00_r + iz01 * q10_r;
-		pXh->c[0] = iz10 * (float)uq00_r + iz11 * q10_r;
-
-		pXl->c[1] = iz00 * (float)uq00_g + iz01 * q10_g;
-		pXh->c[1] = iz10 * (float)uq00_g + iz11 * q10_g;
-
-		pXl->c[2] = iz00 * (float)uq00_b + iz01 * q10_b;
-		pXh->c[2] = iz10 * (float)uq00_b + iz11 * q10_b;
-
-		return true;
-	}
-
-	static inline void bc1_get_block_colors4(uint32_t block_r[4], uint32_t block_g[4], uint32_t block_b[4], uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb)
-	{
-		block_r[0] = (lr << 3) | (lr >> 2); block_g[0] = (lg << 2) | (lg >> 4);	block_b[0] = (lb << 3) | (lb >> 2);
-		block_r[3] = (hr << 3) | (hr >> 2);	block_g[3] = (hg << 2) | (hg >> 4);	block_b[3] = (hb << 3) | (hb >> 2);
-
-		if (g_bc1_approx_mode == bc1_approx_mode::cBC1Ideal)
-		{
-			block_r[1] = (block_r[0] * 2 + block_r[3]) / 3;	block_g[1] = (block_g[0] * 2 + block_g[3]) / 3;	block_b[1] = (block_b[0] * 2 + block_b[3]) / 3;
-			block_r[2] = (block_r[3] * 2 + block_r[0]) / 3;	block_g[2] = (block_g[3] * 2 + block_g[0]) / 3;	block_b[2] = (block_b[3] * 2 + block_b[0]) / 3;
-		}
-		else if (g_bc1_approx_mode == bc1_approx_mode::cBC1IdealRound4)
-		{
-			block_r[1] = (block_r[0] * 2 + block_r[3] + 1) / 3;	block_g[1] = (block_g[0] * 2 + block_g[3] + 1) / 3;	block_b[1] = (block_b[0] * 2 + block_b[3] + 1) / 3;
-			block_r[2] = (block_r[3] * 2 + block_r[0] + 1) / 3;	block_g[2] = (block_g[3] * 2 + block_g[0] + 1) / 3;	block_b[2] = (block_b[3] * 2 + block_b[0] + 1) / 3;
-		}
-		else if (g_bc1_approx_mode == bc1_approx_mode::cBC1AMD)
-		{
-			block_r[1] = interp_5_6_amd(block_r[0], block_r[3]); block_g[1] = interp_5_6_amd(block_g[0], block_g[3]); block_b[1] = interp_5_6_amd(block_b[0], block_b[3]);
-			block_r[2] = interp_5_6_amd(block_r[3], block_r[0]); block_g[2] = interp_5_6_amd(block_g[3], block_g[0]); block_b[2] = interp_5_6_amd(block_b[3], block_b[0]);
-		}
-		else
-		{
-			block_r[1] = interp_5_nv(lr, hr); block_g[1] = interp_6_nv(block_g[0], block_g[3]); block_b[1] = interp_5_nv(lb, hb);
-			block_r[2] = interp_5_nv(hr, lr); block_g[2] = interp_6_nv(block_g[3], block_g[0]); block_b[2] = interp_5_nv(hb, lb);
-		}
-	}
-
-	static inline void bc1_get_block_colors3(uint32_t block_r[3], uint32_t block_g[3], uint32_t block_b[3], uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb)
-	{
-		block_r[0] = (lr << 3) | (lr >> 2); block_g[0] = (lg << 2) | (lg >> 4);	block_b[0] = (lb << 3) | (lb >> 2);
-		block_r[1] = (hr << 3) | (hr >> 2);	block_g[1] = (hg << 2) | (hg >> 4);	block_b[1] = (hb << 3) | (hb >> 2);
-
-		if ((g_bc1_approx_mode == bc1_approx_mode::cBC1Ideal) || (g_bc1_approx_mode == bc1_approx_mode::cBC1IdealRound4))
-		{
-			block_r[2] = (block_r[0] + block_r[1]) / 2; block_g[2] = (block_g[0] + block_g[1]) / 2; block_b[2] = (block_b[0] + block_b[1]) / 2;
-		}
-		else if (g_bc1_approx_mode == bc1_approx_mode::cBC1AMD)
-		{
-			block_r[2] = interp_half_5_6_amd(block_r[0], block_r[1]); block_g[2] = interp_half_5_6_amd(block_g[0], block_g[1]); block_b[2] = interp_half_5_6_amd(block_b[0], block_b[1]);
-		}
-		else
-		{
-			block_r[2] = interp_half_5_nv(lr, hr); block_g[2] = interp_half_6_nv(block_g[0], block_g[1]); block_b[2] = interp_half_5_nv(lb, hb);
-		}
-	}
-
-	static inline void bc1_find_sels4_noerr(const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16])
-	{
-		uint32_t block_r[4], block_g[4], block_b[4];
-		bc1_get_block_colors4(block_r, block_g, block_b, lr, lg, lb, hr, hg, hb);
-
-		int ar = block_r[3] - block_r[0], ag = block_g[3] - block_g[0], ab = block_b[3] - block_b[0];
-
-		int dots[4];
-		for (uint32_t i = 0; i < 4; i++)
-			dots[i] = (int)block_r[i] * ar + (int)block_g[i] * ag + (int)block_b[i] * ab;
-
-		int t0 = dots[0] + dots[1], t1 = dots[1] + dots[2], t2 = dots[2] + dots[3];
-
-		ar *= 2; ag *= 2; ab *= 2;
-
-		static const uint8_t s_sels[4] = { 3, 2, 1, 0 };
-
-		for (uint32_t i = 0; i < 16; i += 4)
-		{
-			const int d0 = pSrc_pixels[i+0].r * ar + pSrc_pixels[i+0].g * ag + pSrc_pixels[i+0].b * ab;
-			const int d1 = pSrc_pixels[i+1].r * ar + pSrc_pixels[i+1].g * ag + pSrc_pixels[i+1].b * ab;
-			const int d2 = pSrc_pixels[i+2].r * ar + pSrc_pixels[i+2].g * ag + pSrc_pixels[i+2].b * ab;
-			const int d3 = pSrc_pixels[i+3].r * ar + pSrc_pixels[i+3].g * ag + pSrc_pixels[i+3].b * ab;
-
-			sels[i+0] = s_sels[(d0 <= t0) + (d0 < t1) + (d0 < t2)];
-			sels[i+1] = s_sels[(d1 <= t0) + (d1 < t1) + (d1 < t2)];
-			sels[i+2] = s_sels[(d2 <= t0) + (d2 < t1) + (d2 < t2)];
-			sels[i+3] = s_sels[(d3 <= t0) + (d3 < t1) + (d3 < t2)];
-		}
-	}
-
-	static inline uint32_t bc1_find_sels4_fasterr(const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16], uint32_t cur_err)
-	{
-		uint32_t block_r[4], block_g[4], block_b[4];
-		bc1_get_block_colors4(block_r, block_g, block_b, lr, lg, lb, hr, hg, hb);
-				
-		int ar = block_r[3] - block_r[0], ag = block_g[3] - block_g[0], ab = block_b[3] - block_b[0];
-
-		int dots[4];
-		for (uint32_t i = 0; i < 4; i++)
-			dots[i] = (int)block_r[i] * ar + (int)block_g[i] * ag + (int)block_b[i] * ab;
-
-		int t0 = dots[0] + dots[1], t1 = dots[1] + dots[2], t2 = dots[2] + dots[3];
-
-		ar *= 2; ag *= 2; ab *= 2;
-
-		static const uint8_t s_sels[4] = { 3, 2, 1, 0 };
-
-		uint32_t total_err = 0;
-
-		for (uint32_t i = 0; i < 16; i += 4)
-		{
-			const int d0 = pSrc_pixels[i+0].r * ar + pSrc_pixels[i+0].g * ag + pSrc_pixels[i+0].b * ab;
-			const int d1 = pSrc_pixels[i+1].r * ar + pSrc_pixels[i+1].g * ag + pSrc_pixels[i+1].b * ab;
-			const int d2 = pSrc_pixels[i+2].r * ar + pSrc_pixels[i+2].g * ag + pSrc_pixels[i+2].b * ab;
-			const int d3 = pSrc_pixels[i+3].r * ar + pSrc_pixels[i+3].g * ag + pSrc_pixels[i+3].b * ab;
-
-			uint8_t sel0 = s_sels[(d0 <= t0) + (d0 < t1) + (d0 < t2)];
-			uint8_t sel1 = s_sels[(d1 <= t0) + (d1 < t1) + (d1 < t2)];
-			uint8_t sel2 = s_sels[(d2 <= t0) + (d2 < t1) + (d2 < t2)];
-			uint8_t sel3 = s_sels[(d3 <= t0) + (d3 < t1) + (d3 < t2)];
-
-			sels[i+0] = sel0;
-			sels[i+1] = sel1;
-			sels[i+2] = sel2;
-			sels[i+3] = sel3;
-
-			total_err += squarei(pSrc_pixels[i+0].r - block_r[sel0]) + squarei(pSrc_pixels[i+0].g - block_g[sel0]) + squarei(pSrc_pixels[i+0].b - block_b[sel0]);
-			total_err += squarei(pSrc_pixels[i+1].r - block_r[sel1]) + squarei(pSrc_pixels[i+1].g - block_g[sel1]) + squarei(pSrc_pixels[i+1].b - block_b[sel1]);
-			total_err += squarei(pSrc_pixels[i+2].r - block_r[sel2]) + squarei(pSrc_pixels[i+2].g - block_g[sel2]) + squarei(pSrc_pixels[i+2].b - block_b[sel2]);
-			total_err += squarei(pSrc_pixels[i+3].r - block_r[sel3]) + squarei(pSrc_pixels[i+3].g - block_g[sel3]) + squarei(pSrc_pixels[i+3].b - block_b[sel3]);
-
-			if (total_err >= cur_err)
-				break;
-		}
-
-		return total_err;
-	}
-	
-	static inline uint32_t bc1_find_sels4_check2_err(const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16], uint32_t cur_err)
-	{
-		uint32_t block_r[4], block_g[4], block_b[4];
-		bc1_get_block_colors4(block_r, block_g, block_b, lr, lg, lb, hr, hg, hb);
-				
-		int dr = block_r[3] - block_r[0], dg = block_g[3] - block_g[0], db = block_b[3] - block_b[0];
-
-		const float f = 4.0f / (float)(squarei(dr) + squarei(dg) + squarei(db) + .00000125f);
-
-		uint32_t total_err = 0;
-
-		for (uint32_t i = 0; i < 16; i++)
-		{
-			const int r = pSrc_pixels[i].r;
-			const int g = pSrc_pixels[i].g;
-			const int b = pSrc_pixels[i].b;
-
-			int sel = (int)((float)((r - (int)block_r[0]) * dr + (g - (int)block_g[0]) * dg + (b - (int)block_b[0]) * db) * f + .5f);
-			sel = clampi(sel, 1, 3);
-
-			uint32_t err0 = squarei((int)block_r[sel - 1] - (int)r) + squarei((int)block_g[sel - 1] - (int)g) + squarei((int)block_b[sel - 1] - (int)b);
-			uint32_t err1 = squarei((int)block_r[sel] - (int)r) + squarei((int)block_g[sel] - (int)g) + squarei((int)block_b[sel] - (int)b);
-						
-			int best_sel = sel;
-			uint32_t best_err = err1;
-			if (err0 == err1)
-			{
-				// Prefer non-interpolation
-				if ((best_sel - 1) == 0)
-					best_sel = 0;
-			}
-			else if (err0 < best_err)
-			{
-				best_sel = sel - 1;
-				best_err = err0;
-			}
-			
-			total_err += best_err;
-						
-			if (total_err >= cur_err)
-				break;
-			
-			sels[i] = (uint8_t)best_sel;
-		}
-		return total_err;
-	}
-
-	static inline uint32_t bc1_find_sels4_fullerr(const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16], uint32_t cur_err)
-	{
-		uint32_t block_r[4], block_g[4], block_b[4];
-		bc1_get_block_colors4(block_r, block_g, block_b, lr, lg, lb, hr, hg, hb);
-				
-		uint32_t total_err = 0;
-
-		for (uint32_t i = 0; i < 16; i++)
-		{
-			const int r = pSrc_pixels[i].r;
-			const int g = pSrc_pixels[i].g;
-			const int b = pSrc_pixels[i].b;
-
-			uint32_t best_err = squarei((int)block_r[0] - (int)r) + squarei((int)block_g[0] - (int)g) + squarei((int)block_b[0] - (int)b);
-			uint8_t best_sel = 0;
-			
-			for (uint32_t j = 1; (j < 4) && best_err; j++)
-			{
-				uint32_t err = squarei((int)block_r[j] - (int)r) + squarei((int)block_g[j] - (int)g) + squarei((int)block_b[j] - (int)b);
-				if ( (err < best_err) || ((err == best_err) && (j == 3)) )
-				{
-					best_err = err;
-					best_sel = (uint8_t)j;
-				}
-			}
-			
-			total_err += best_err;
-						
-			if (total_err >= cur_err)
-				break;
-			
-			sels[i] = (uint8_t)best_sel;
-		}
-		return total_err;
-	}
-
-	static inline uint32_t bc1_find_sels4(uint32_t flags, const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16], uint32_t cur_err)
-	{
-		uint32_t err;
-
-		if (flags & cEncodeBC1UseFasterMSEEval)
-			err = bc1_find_sels4_fasterr(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels, cur_err);
-		else if (flags & cEncodeBC1UseFullMSEEval)
-			err = bc1_find_sels4_fullerr(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels, cur_err);
-		else
-			err = bc1_find_sels4_check2_err(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels, cur_err);
-
-		return err;
-	}
-		
-	static inline uint32_t bc1_find_sels3_fullerr(bool use_black, const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16], uint32_t cur_err)
-	{
-		uint32_t block_r[3], block_g[3], block_b[3];
-		bc1_get_block_colors3(block_r, block_g, block_b, lr, lg, lb, hr, hg, hb);
-								
-		uint32_t total_err = 0;
-
-		for (uint32_t i = 0; i < 16; i++)
-		{
-			const int r = pSrc_pixels[i].r;
-			const int g = pSrc_pixels[i].g;
-			const int b = pSrc_pixels[i].b;
-
-			uint32_t best_err = squarei((int)block_r[0] - (int)r) + squarei((int)block_g[0] - (int)g) + squarei((int)block_b[0] - (int)b);
-			uint32_t best_sel = 0;
-
-			uint32_t err1 = squarei((int)block_r[1] - (int)r) + squarei((int)block_g[1] - (int)g) + squarei((int)block_b[1] - (int)b);
-			if (err1 < best_err)
-			{
-				best_err = err1;
-				best_sel = 1;
-			}
-
-			uint32_t err2 = squarei((int)block_r[2] - (int)r) + squarei((int)block_g[2] - (int)g) + squarei((int)block_b[2] - (int)b);
-			if (err2 < best_err)
-			{
-				best_err = err2;
-				best_sel = 2;
-			}
-
-			if (use_black)
-			{
-				uint32_t err3 = squarei(r) + squarei(g) + squarei(b);
-				if (err3 < best_err)
-				{
-					best_err = err3;
-					best_sel = 3;
-				}
-			}
-			
-			total_err += best_err;
-			if (total_err >= cur_err)
-				return total_err;
-			
-			sels[i] = (uint8_t)best_sel;
-		}
-
-		return total_err;
-	}
-
-	static inline void precise_round_565(const vec3F &xl, const vec3F &xh, 
-		int &trial_lr, int &trial_lg, int &trial_lb,
-		int &trial_hr, int &trial_hg, int &trial_hb)
-	{
-		trial_lr = (int)(xl.c[0] * 31.0f); 
-		trial_lg = (int)(xl.c[1] * 63.0f); 
-		trial_lb = (int)(xl.c[2] * 31.0f); 
-
-		trial_hr = (int)(xh.c[0] * 31.0f); 
-		trial_hg = (int)(xh.c[1] * 63.0f); 
-		trial_hb = (int)(xh.c[2] * 31.0f); 
-
-		if ((uint32_t)(trial_lr | trial_lb | trial_hr | trial_hb) > 31U)
-		{
-			trial_lr = ((uint32_t)trial_lr > 31U) ? (~trial_lr >> 31) & 31 : trial_lr;
-			trial_hr = ((uint32_t)trial_hr > 31U) ? (~trial_hr >> 31) & 31 : trial_hr;
-
-			trial_lb = ((uint32_t)trial_lb > 31U) ? (~trial_lb >> 31) & 31 : trial_lb;
-			trial_hb = ((uint32_t)trial_hb > 31U) ? (~trial_hb >> 31) & 31 : trial_hb;
-		}
-
-		if ((uint32_t)(trial_lg | trial_hg) > 63U)
-		{
-			trial_lg = ((uint32_t)trial_lg > 63U) ? (~trial_lg >> 31) & 63 : trial_lg;
-			trial_hg = ((uint32_t)trial_hg > 63U) ? (~trial_hg >> 31) & 63 : trial_hg;
-		}
-
-		trial_lr = (trial_lr + (xl.c[0] > g_midpoint5[trial_lr])) & 31;
-		trial_lg = (trial_lg + (xl.c[1] > g_midpoint6[trial_lg])) & 63;
-		trial_lb = (trial_lb + (xl.c[2] > g_midpoint5[trial_lb])) & 31;
-				
-		trial_hr = (trial_hr + (xh.c[0] > g_midpoint5[trial_hr])) & 31;
-		trial_hg = (trial_hg + (xh.c[1] > g_midpoint6[trial_hg])) & 63;
-		trial_hb = (trial_hb + (xh.c[2] > g_midpoint5[trial_hb])) & 31;
-	}
-
-	static inline void precise_round_565_noscale(vec3F xl, vec3F xh, 
-		int &trial_lr, int &trial_lg, int &trial_lb,
-		int &trial_hr, int &trial_hg, int &trial_hb)
-	{
-		xl.c[0] *= 1.0f/255.0f;
-		xl.c[1] *= 1.0f/255.0f;
-		xl.c[2] *= 1.0f/255.0f;
-
-		xh.c[0] *= 1.0f/255.0f;
-		xh.c[1] *= 1.0f/255.0f;
-		xh.c[2] *= 1.0f/255.0f;
-
-		precise_round_565(xl, xh, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb);
-	}
-
-	static inline void bc1_encode4(bc1_block *pDst_block, int lr, int lg, int lb, int hr, int hg, int hb, const uint8_t sels[16])
-	{
-		uint32_t lc16 = bc1_block::pack_unscaled_color(lr, lg, lb);
-		uint32_t hc16 = bc1_block::pack_unscaled_color(hr, hg, hb);
-				
-		// Always forbid 3 color blocks
-		if (lc16 == hc16)
-		{
-			uint8_t mask = 0;
-
-			// Make l > h
-			if (hc16 > 0)
-				hc16--;
-			else
-			{
-				// lc16 = hc16 = 0
-				assert(lc16 == hc16 && hc16 == 0);
-
-				hc16 = 0;
-				lc16 = 1;
-				mask = 0x55; // select hc16
-			}
-
-			assert(lc16 > hc16);
-			pDst_block->set_low_color(static_cast<uint16_t>(lc16));
-			pDst_block->set_high_color(static_cast<uint16_t>(hc16));
-
-			pDst_block->m_selectors[0] = mask;
-			pDst_block->m_selectors[1] = mask;
-			pDst_block->m_selectors[2] = mask;
-			pDst_block->m_selectors[3] = mask;
-		}
-		else
-		{
-			uint8_t invert_mask = 0;
-			if (lc16 < hc16)
-			{
-				std::swap(lc16, hc16);
-				invert_mask = 0x55;
-			}
-
-			assert(lc16 > hc16);
-			pDst_block->set_low_color((uint16_t)lc16);
-			pDst_block->set_high_color((uint16_t)hc16);
-
-			uint32_t packed_sels = 0;
-			static const uint8_t s_sel_trans[4] = { 0, 2, 3, 1 };
-			for (uint32_t i = 0; i < 16; i++)
-				packed_sels |= ((uint32_t)s_sel_trans[sels[i]] << (i * 2));
-
-			pDst_block->m_selectors[0] = (uint8_t)packed_sels ^ invert_mask;
-			pDst_block->m_selectors[1] = (uint8_t)(packed_sels >> 8) ^ invert_mask;
-			pDst_block->m_selectors[2] = (uint8_t)(packed_sels >> 16) ^ invert_mask;
-			pDst_block->m_selectors[3] = (uint8_t)(packed_sels >> 24) ^ invert_mask;
-		}
-	}
-
-	static inline void bc1_encode3(bc1_block *pDst_block, int lr, int lg, int lb, int hr, int hg, int hb, const uint8_t sels[16])
-	{
-		uint32_t lc16 = bc1_block::pack_unscaled_color(lr, lg, lb);
-		uint32_t hc16 = bc1_block::pack_unscaled_color(hr, hg, hb);
-
-		bool invert_flag = false;
-		if (lc16 > hc16)
-		{
-			std::swap(lc16, hc16);
-			invert_flag = true;
-		}
-
-		assert(lc16 <= hc16);
-		
-		pDst_block->set_low_color((uint16_t)lc16);
-		pDst_block->set_high_color((uint16_t)hc16);
-
-		uint32_t packed_sels = 0;
-			
-		if (invert_flag)
-		{
-			static const uint8_t s_sel_trans_inv[4] = { 1, 0, 2, 3 };
-				
-			for (uint32_t i = 0; i < 16; i++)
-				packed_sels |= ((uint32_t)s_sel_trans_inv[sels[i]] << (i * 2));
-		}
-		else
-		{
-			for (uint32_t i = 0; i < 16; i++)
-				packed_sels |= ((uint32_t)sels[i] << (i * 2));
-		}
-
-		pDst_block->m_selectors[0] = (uint8_t)packed_sels;
-		pDst_block->m_selectors[1] = (uint8_t)(packed_sels >> 8);
-		pDst_block->m_selectors[2] = (uint8_t)(packed_sels >> 16);
-		pDst_block->m_selectors[3] = (uint8_t)(packed_sels >> 24);
-	}
-	
-	struct bc1_encode_results
-	{
-		int lr, lg, lb;
-		int hr, hg, hb;
-		uint8_t sels[16];
-		bool m_3color;
-	};
-	
-	static bool try_3color_block_useblack(const color32* pSrc_pixels, uint32_t flags, uint32_t &cur_err, bc1_encode_results &results)
-	{
-		int total_r = 0, total_g = 0, total_b = 0;
-		int max_r = 0, max_g = 0, max_b = 0;
-		int min_r = 255, min_g = 255, min_b = 255;
-		int total_pixels = 0;
-		for (uint32_t i = 0; i < 16; i++)
-		{
-			const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b;
-			if ((r | g | b) < 4)
-				continue;
-			
-			max_r = std::max(max_r, r); max_g = std::max(max_g, g); max_b = std::max(max_b, b);
-			min_r = std::min(min_r, r); min_g = std::min(min_g, g); min_b = std::min(min_b, b);
-			total_r += r; total_g += g; total_b += b;
-				
-			total_pixels++;
-		}
-
-		if (!total_pixels)
-			return false;
-
-		int half_total_pixels = total_pixels >> 1;
-		int avg_r = (total_r + half_total_pixels) / total_pixels;
-		int avg_g = (total_g + half_total_pixels) / total_pixels;
-		int avg_b = (total_b + half_total_pixels) / total_pixels;
-
-		uint32_t low_c = 0, high_c = 0;
-
-		int icov[6] = { 0, 0, 0, 0, 0, 0 };
-		for (uint32_t i = 0; i < 16; i++)
-		{
-			int r = (int)pSrc_pixels[i].r;
-			int g = (int)pSrc_pixels[i].g;
-			int b = (int)pSrc_pixels[i].b;
-
-			if ((r | g | b) < 4)
-				continue;
-
-			r -= avg_r;
-			g -= avg_g;
-			b -= avg_b;
-
-			icov[0] += r * r;
-			icov[1] += r * g;
-			icov[2] += r * b;
-			icov[3] += g * g;
-			icov[4] += g * b;
-			icov[5] += b * b;
-		}
-
-		float cov[6];
-		for (uint32_t i = 0; i < 6; i++)
-			cov[i] = (float)(icov[i]) * (1.0f / 255.0f);
-			
-		float xr = (float)(max_r - min_r);
-		float xg = (float)(max_g - min_g);
-		float xb = (float)(max_b - min_b);
-		
-		if (icov[2] < 0)
-			xr = -xr;
-
-		if (icov[4] < 0)
-			xg = -xg;
-
-		for (uint32_t power_iter = 0; power_iter < 4; power_iter++)
-		{
-			float r = xr * cov[0] + xg * cov[1] + xb * cov[2];
-			float g = xr * cov[1] + xg * cov[3] + xb * cov[4];
-			float b = xr * cov[2] + xg * cov[4] + xb * cov[5];
-			xr = r; xg = g; xb = b;
-		}
-
-		float k = maximum(fabsf(xr), fabsf(xg), fabsf(xb));
-		int saxis_r = 306, saxis_g = 601, saxis_b = 117;
-		if (k >= 2)
-		{
-			float m = 1024.0f / k;
-			saxis_r = (int)(xr * m);
-			saxis_g = (int)(xg * m);
-			saxis_b = (int)(xb * m);
-		}
-			
-		int low_dot = INT_MAX, high_dot = INT_MIN;
-		for (uint32_t i = 0; i < 16; i++)
-		{
-			int r = (int)pSrc_pixels[i].r, g = (int)pSrc_pixels[i].g, b = (int)pSrc_pixels[i].b;
-
-			if ((r | g | b) < 4)
-				continue;
-
-			int dot = r * saxis_r + g * saxis_g + b * saxis_b;
-			if (dot < low_dot)
-			{
-				low_dot = dot;
-				low_c = i;
-			}
-			if (dot > high_dot)
-			{
-				high_dot = dot;
-				high_c = i;
-			}
-		}
-
-		int lr = to_5(pSrc_pixels[low_c].r);
-		int lg = to_6(pSrc_pixels[low_c].g);
-		int lb = to_5(pSrc_pixels[low_c].b);
-
-		int hr = to_5(pSrc_pixels[high_c].r);
-		int hg = to_6(pSrc_pixels[high_c].g);
-		int hb = to_5(pSrc_pixels[high_c].b);
-
-		uint8_t trial_sels[16];
-		uint32_t trial_err = bc1_find_sels3_fullerr(true, pSrc_pixels, lr, lg, lb, hr, hg, hb, trial_sels, UINT32_MAX);
-
-		if (trial_err)
-		{
-			const uint32_t total_ls_passes = flags & cEncodeBC1TwoLeastSquaresPasses ? 2 : 1;
-			for (uint32_t trials = 0; trials < total_ls_passes; trials++)
-			{
-				vec3F xl, xh;
-				int lr2, lg2, lb2, hr2, hg2, hb2;
-				if (!compute_least_squares_endpoints3_rgb(true, pSrc_pixels, trial_sels, &xl, &xh))
-				{
-					lr2 = g_bc1_match5_half[avg_r].m_hi;
-					lg2 = g_bc1_match6_half[avg_g].m_hi;
-					lb2 = g_bc1_match5_half[avg_b].m_hi;
-
-					hr2 = g_bc1_match5_half[avg_r].m_lo;
-					hg2 = g_bc1_match6_half[avg_g].m_lo;
-					hb2 = g_bc1_match5_half[avg_b].m_lo;
-				}
-				else
-				{
-					precise_round_565(xl, xh, hr2, hg2, hb2, lr2, lg2, lb2);
-				}
-
-				if ((lr == lr2) && (lg == lg2) && (lb == lb2) && (hr == hr2) && (hg == hg2) && (hb == hb2))
-					break;
-								
-				uint8_t trial_sels2[16];
-				uint32_t trial_err2 = bc1_find_sels3_fullerr(true, pSrc_pixels, lr2, lg2, lb2, hr2, hg2, hb2, trial_sels2, trial_err);
-												
-				if (trial_err2 < trial_err)
-				{
-					trial_err = trial_err2;
-					lr = lr2; lg = lg2; lb = lb2;
-					hr = hr2; hg = hg2; hb = hb2;
-					memcpy(trial_sels, trial_sels2, sizeof(trial_sels));
-				}
-				else
-					break;
-			}
-		}
-
-		if (trial_err < cur_err)
-		{
-			results.m_3color = true;
-			results.lr = lr;
-			results.lg = lg;
-			results.lb = lb;
-			results.hr = hr;
-			results.hg = hg;
-			results.hb = hb;
-			memcpy(results.sels, trial_sels, 16);
-			
-			cur_err = trial_err;
-						
-			return true;
-		}
-
-		return false;
-	}
-
-	static bool try_3color_block(const color32* pSrc_pixels, uint32_t flags, uint32_t &cur_err, 
-		int avg_r, int avg_g, int avg_b, int lr, int lg, int lb, int hr, int hg, int hb, int total_r, int total_g, int total_b, uint32_t total_orderings_to_try,
-		bc1_encode_results &results)
-	{
-		uint8_t trial_sels[16];
-		uint32_t trial_err = bc1_find_sels3_fullerr(false, pSrc_pixels, lr, lg, lb, hr, hg, hb, trial_sels, UINT32_MAX);
-
-		if (trial_err)
-		{
-			const uint32_t total_ls_passes = flags & cEncodeBC1TwoLeastSquaresPasses ? 2 : 1;
-			for (uint32_t trials = 0; trials < total_ls_passes; trials++)
-			{
-				vec3F xl, xh;
-				int lr2, lg2, lb2, hr2, hg2, hb2;
-				if (!compute_least_squares_endpoints3_rgb(false, pSrc_pixels, trial_sels, &xl, &xh))
-				{
-					lr2 = g_bc1_match5_half[avg_r].m_hi;
-					lg2 = g_bc1_match6_half[avg_g].m_hi;
-					lb2 = g_bc1_match5_half[avg_b].m_hi;
-
-					hr2 = g_bc1_match5_half[avg_r].m_lo;
-					hg2 = g_bc1_match6_half[avg_g].m_lo;
-					hb2 = g_bc1_match5_half[avg_b].m_lo;
-				}
-				else
-				{
-					precise_round_565(xl, xh, hr2, hg2, hb2, lr2, lg2, lb2);
-				}
-
-				if ((lr == lr2) && (lg == lg2) && (lb == lb2) && (hr == hr2) && (hg == hg2) && (hb == hb2))
-					break;
-												
-				uint8_t trial_sels2[16];
-				uint32_t trial_err2 = bc1_find_sels3_fullerr(false, pSrc_pixels, lr2, lg2, lb2, hr2, hg2, hb2, trial_sels2, trial_err);
-												
-				if (trial_err2 < trial_err)
-				{
-					trial_err = trial_err2;
-					lr = lr2; lg = lg2; lb = lb2;
-					hr = hr2; hg = hg2; hb = hb2;
-					memcpy(trial_sels, trial_sels2, sizeof(trial_sels));
-				}
-				else
-					break;
-			}
-		}
-
-		if ((trial_err) && (flags & cEncodeBC1UseLikelyTotalOrderings) && (total_orderings_to_try))
-		{
-			hist3 h;
-			for (uint32_t i = 0; i < 16; i++)
-			{
-				assert(trial_sels[i] < 3);
-				h.m_hist[trial_sels[i]]++;
-			}
-
-			const uint32_t orig_total_order_index = h.lookup_total_ordering_index();
-
-			int r0, g0, b0, r3, g3, b3;
-			r0 = (lr << 3) | (lr >> 2); g0 = (lg << 2) | (lg >> 4); b0 = (lb << 3) | (lb >> 2);
-			r3 = (hr << 3) | (hr >> 2); g3 = (hg << 2) | (hg >> 4); b3 = (hb << 3) | (hb >> 2);
-
-			int ar = r3 - r0, ag = g3 - g0, ab = b3 - b0;
-																
-			int dots[16];
-			for (uint32_t i = 0; i < 16; i++)
-			{
-				int r = pSrc_pixels[i].r;
-				int g = pSrc_pixels[i].g;
-				int b = pSrc_pixels[i].b;
-				int d = 0x1000000 + (r * ar + g * ag + b * ab);
-				assert(d >= 0);
-				dots[i] = (d << 4) + i;
-			}
-
-			std::sort(dots, dots + 16);
-
-			uint32_t r_sum[17], g_sum[17], b_sum[17];
-			uint32_t r = 0, g = 0, b = 0;
-			for (uint32_t i = 0; i < 16; i++)
-			{
-				const uint32_t p = dots[i] & 15;
-
-				r_sum[i] = r;
-				g_sum[i] = g;
-				b_sum[i] = b;
-
-				r += pSrc_pixels[p].r;
-				g += pSrc_pixels[p].g;
-				b += pSrc_pixels[p].b;
-			}
-
-			r_sum[16] = total_r;
-			g_sum[16] = total_g;
-			b_sum[16] = total_b;
-						
-			const uint32_t q_total = (flags & cEncodeBC1Exhaustive) ? NUM_UNIQUE_TOTAL_ORDERINGS3 : std::min(total_orderings_to_try, MAX_TOTAL_ORDERINGS3);
-			for (uint32_t q = 0; q < q_total; q++)
-			{
-				const uint32_t s = (flags & cEncodeBC1Exhaustive) ? q : g_best_total_orderings3[orig_total_order_index][q];
-
-				int trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb;
-
-				vec3F xl, xh;
-
-				if ((s == TOTAL_ORDER_3_0_16) || (s == TOTAL_ORDER_3_1_16) || (s == TOTAL_ORDER_3_2_16))
-				{
-					trial_lr = g_bc1_match5_half[avg_r].m_hi;
-					trial_lg = g_bc1_match6_half[avg_g].m_hi;
-					trial_lb = g_bc1_match5_half[avg_b].m_hi;
-
-					trial_hr = g_bc1_match5_half[avg_r].m_lo;
-					trial_hg = g_bc1_match6_half[avg_g].m_lo;
-					trial_hb = g_bc1_match5_half[avg_b].m_lo;
-				}
-				else
-				{
-					compute_least_squares_endpoints3_rgb(&xl, &xh, total_r, total_g, total_b, 
-						g_selector_factors3[s][0], g_selector_factors3[s][1], g_selector_factors3[s][2], s, r_sum, g_sum, b_sum);
-					
-					precise_round_565(xl, xh, trial_hr, trial_hg, trial_hb, trial_lr, trial_lg, trial_lb);
-				}
-
-				uint8_t trial_sels2[16];
-				uint32_t trial_err2 = bc1_find_sels3_fullerr(false, pSrc_pixels, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb, trial_sels2, UINT32_MAX);
-				
-				if (trial_err2 < trial_err)
-				{
-					trial_err = trial_err2;
-
-					lr = trial_lr;
-					lg = trial_lg;
-					lb = trial_lb;
-
-					hr = trial_hr;
-					hg = trial_hg;
-					hb = trial_hb;
-
-					memcpy(trial_sels, trial_sels2, sizeof(trial_sels));
-				}
-
-			} // s
-		}
-
-		if (trial_err < cur_err)
-		{
-			results.m_3color = true;
-			results.lr = lr;
-			results.lg = lg;
-			results.lb = lb;
-			results.hr = hr;
-			results.hg = hg;
-			results.hb = hb;
-			memcpy(results.sels, trial_sels, 16);
-			
-			cur_err = trial_err;
-
-			return true;
-		}
-
-		return false;
-	}
-
-	void encode_bc1(uint32_t level, void* pDst, const uint8_t* pPixels, bool allow_3color, bool allow_transparent_texels_for_black)
-	{
-		uint32_t flags = 0, total_orderings4 = 1, total_orderings3 = 1;
-
-		static_assert(MAX_TOTAL_ORDERINGS3 >= 32, "MAX_TOTAL_ORDERINGS3 >= 32");
-		static_assert(MAX_TOTAL_ORDERINGS4 >= 32, "MAX_TOTAL_ORDERINGS4 >= 32");
-
-		switch (level)
-		{
-		case 0:
-			// Faster/higher quality than stb_dxt default.
-			flags = cEncodeBC1BoundingBoxInt;
-			break;
-		case 1:
-			// Faster/higher quality than stb_dxt default. A bit higher average quality vs. mode 0.
-			flags = cEncodeBC1Use2DLS;
-			break;
-		case 2:
-			// On average mode 2 is a little weaker than modes 0/1, but it's stronger on outliers (very tough textures).
-			// Slightly stronger than stb_dxt.
-			flags = 0;
-			break;
-		case 3:
-			// Slightly stronger than stb_dxt HIGHQUAL.
-			flags = cEncodeBC1TwoLeastSquaresPasses;
-			break;
-		case 4:
-			flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1Use6PowerIters;
-			break;
-		default:
-		case 5:
-			// stb_dxt HIGHQUAL + permit 3 color (if it's enabled).
-			flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFasterMSEEval;
-			flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0);
-			break;
-		case 6:
-			flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFasterMSEEval | cEncodeBC1UseLikelyTotalOrderings;
-			flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0);
-			break;
-		case 7:
-			flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFasterMSEEval | cEncodeBC1UseLikelyTotalOrderings;
-			flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0);
-			total_orderings4 = 4;
-			break;
-		case 8:
-			flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFasterMSEEval | cEncodeBC1UseLikelyTotalOrderings;
-			flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0);
-			total_orderings4 = 8;
-			break;
-		case 9:
-			flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseLikelyTotalOrderings;
-			flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0);
-			total_orderings4 = 11;
-			total_orderings3 = 3;
-			break;
-		case 10:
-			flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseLikelyTotalOrderings;
-			flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0);
-			total_orderings4 = 20;
-			total_orderings3 = 8;
-			break;
-		case 11:
-			flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseLikelyTotalOrderings;
-			flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0);
-			total_orderings4 = 28;
-			total_orderings3 = 16;
-			break;
-		case 12:
-			flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseLikelyTotalOrderings;
-			flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0);
-			total_orderings4 = 32;
-			total_orderings3 = 32;
-			break;
-		case 13:
-			flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | (20 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts;
-			flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0);
-			total_orderings4 = 32;
-			total_orderings3 = 32;
-			break;
-		case 14:
-			flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | (32 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts;
-			flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0);
-			total_orderings4 = 32;
-			total_orderings3 = 32;
-			break;
-		case 15:
-			flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | (32 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts;
-			flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0);
-			total_orderings4 = ((((32 + MAX_TOTAL_ORDERINGS4) / 2) + 32) / 2);
-			total_orderings3 = 32;
-			break;
-		case 16:
-			flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | (256 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts;
-			flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0);
-			total_orderings4 = (32 + MAX_TOTAL_ORDERINGS4) / 2;
-			total_orderings3 = 32;
-			break;
-		case 17:
-			flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | (256 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts;
-			flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0);
-			total_orderings4 = MAX_TOTAL_ORDERINGS4;
-			total_orderings3 = 32;
-			break;
-		case 18:
-			flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | cEncodeBC1Iterative | (256 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts;
-			flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0);
-			total_orderings4 = MAX_TOTAL_ORDERINGS4;
-			total_orderings3 = 32;
-			break;
-		case 19:
-			// This hidden mode is *extremely* slow and abuses the encoder. It's just for testing/training.
-			flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | cEncodeBC1Exhaustive | cEncodeBC1Iterative | (256 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts;
-			flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0);
-			total_orderings4 = 32;
-			total_orderings3 = 32;
-			break;
-		}
-
-		encode_bc1(pDst, pPixels, flags, total_orderings4, total_orderings3);
-	}
-
-	static inline void encode_bc1_pick_initial(const color32 *pSrc_pixels, uint32_t flags, bool grayscale_flag,
-			int min_r, int min_g, int min_b, int max_r, int max_g, int max_b, 
-			int avg_r, int avg_g, int avg_b, int total_r, int total_g, int total_b,
-			int &lr, int &lg, int &lb, int &hr, int &hg, int &hb)
-	{
-		if (grayscale_flag) 
-		{
-			const int fr = pSrc_pixels[0].r;
-
-			// Grayscale blocks are a common enough case to specialize.
-			if ((max_r - min_r) < 2)
-			{
-				lr = lb = hr = hb = to_5(fr);
-				lg = hg = to_6(fr);
-			}
-			else
-			{
-				lr = lb = to_5(min_r);
-				lg = to_6(min_r);
-
-				hr = hb = to_5(max_r);
-				hg = to_6(max_r);
-			}
-		}
-		else if (flags & cEncodeBC1Use2DLS)
-		{
-			//  2D Least Squares approach from Humus's example, with added inset and optimal rounding.
-			int big_chan = 0, min_chan_val = min_r, max_chan_val = max_r;
-			if ((max_g - min_g) > (max_chan_val - min_chan_val))
-            {
-                big_chan = 1; min_chan_val = min_g; max_chan_val = max_g;
-            }
-			if ((max_b - min_b) > (max_chan_val - min_chan_val))
-            {
-                big_chan = 2; min_chan_val = min_b; max_chan_val = max_b;
-            }
-			int sum_xy_r = 0, sum_xy_g = 0, sum_xy_b = 0;
-			vec3F l, h;
-			if (big_chan == 0)
-			{
-				for (uint32_t i = 0; i < 16; i++)
-				{
-					const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b;
-                    sum_xy_r += r * r; sum_xy_g += r * g; sum_xy_b += r * b;
-				}
-
-				int sum_x = total_r;
-				int sum_x2 = sum_xy_r;
-
-				float div = (float)(16 * sum_x2 - sum_x * sum_x);
-				float b_y = 0.0f, b_z = 0.0f;
-				if (fabs(div) > 1e-8f)
-				{
-					div = 1.0f / div;
-					b_y = (16 * sum_xy_g - sum_x * total_g) * div;
-					b_z = (16 * sum_xy_b - sum_x * total_b) * div;
-				}
-
-				float a_y = (total_g - b_y * sum_x) / 16.0f;
-				float a_z = (total_b - b_z * sum_x) / 16.0f;
-								
-				l.c[1] = a_y + b_y * min_chan_val;
-				l.c[2] = a_z + b_z * min_chan_val;
-
-				h.c[1] = a_y + b_y * max_chan_val;
-				h.c[2] = a_z + b_z * max_chan_val;
-
-				float dg = (h.c[1] - l.c[1]);
-				float db = (h.c[2] - l.c[2]);
-				
-				h.c[1] = l.c[1] + dg * (15.0f/16.0f);
-				h.c[2] = l.c[2] + db * (15.0f/16.0f);
-
-				l.c[1] = l.c[1] + dg * (1.0f/16.0f);
-				l.c[2] = l.c[2] + db * (1.0f/16.0f);
-								
-				float d = (float)(max_chan_val - min_chan_val);
-				float fmin_chan_val = min_chan_val + d * (1.0f/16.0f);
-				float fmax_chan_val = min_chan_val + d * (15.0f/16.0f);
-
-				l.c[0] = fmin_chan_val;
-				h.c[0] = fmax_chan_val;
-			}
-			else if (big_chan == 1)
-			{
-				for (uint32_t i = 0; i < 16; i++)
-				{
-					const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b;
-                    sum_xy_r += g * r; sum_xy_g += g * g; sum_xy_b += g * b;
-				}
-
-				int sum_x = total_g;
-				int sum_x2 = sum_xy_g;
-
-				float div = (float)(16 * sum_x2 - sum_x * sum_x);
-				float b_x = 0.0f, b_z = 0.0f;
-				if (fabs(div) > 1e-8f)
-				{
-					div = 1.0f / div;
-					b_x = (16 * sum_xy_r - sum_x * total_r) * div;
-					b_z = (16 * sum_xy_b - sum_x * total_b) * div;
-				}
-
-				float a_x = (total_r - b_x * sum_x) / 16.0f;
-				float a_z = (total_b - b_z * sum_x) / 16.0f;
-
-				l.c[0] = a_x + b_x * min_chan_val;
-				l.c[2] = a_z + b_z * min_chan_val;
-
-				h.c[0] = a_x + b_x * max_chan_val;
-				h.c[2] = a_z + b_z * max_chan_val;
-
-				float dr = (h.c[0] - l.c[0]);
-				float db = (h.c[2] - l.c[2]);
-				
-				h.c[0] = l.c[0] + dr * (15.0f/16.0f);
-				h.c[2] = l.c[2] + db * (15.0f/16.0f);
-
-				l.c[0] = l.c[0] + dr * (1.0f/16.0f);
-				l.c[2] = l.c[2] + db * (1.0f/16.0f);
-								
-				float d = (float)(max_chan_val - min_chan_val);
-				float fmin_chan_val = min_chan_val + d * (1.0f/16.0f);
-				float fmax_chan_val = min_chan_val + d * (15.0f/16.0f);
-
-				l.c[1] = fmin_chan_val;
-				h.c[1] = fmax_chan_val;
-			}
-			else
-			{
-				for (uint32_t i = 0; i < 16; i++)
-				{
-					const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b;
-                    sum_xy_r += b * r; sum_xy_g += b * g; sum_xy_b += b * b;
-				}
-
-				int sum_x = total_b;
-				int sum_x2 = sum_xy_b;
-
-				float div = (float)(16 * sum_x2 - sum_x * sum_x);
-				float b_x = 0.0f, b_y = 0.0f;
-				if (fabs(div) > 1e-8f)
-				{
-					div = 1.0f / div;
-					b_x = (16 * sum_xy_r - sum_x * total_r) * div;
-					b_y = (16 * sum_xy_g - sum_x * total_g) * div;
-				}
-
-				float a_x = (total_r - b_x * sum_x) / 16.0f;
-				float a_y = (total_g - b_y * sum_x) / 16.0f;
-
-				l.c[0] = a_x + b_x * min_chan_val;
-				l.c[1] = a_y + b_y * min_chan_val;
-
-				h.c[0] = a_x + b_x * max_chan_val;
-				h.c[1] = a_y + b_y * max_chan_val;
-
-				float dr = (h.c[0] - l.c[0]);
-				float dg = (h.c[1] - l.c[1]);
-				
-				h.c[0] = l.c[0] + dr * (15.0f/16.0f);
-				h.c[1] = l.c[1] + dg * (15.0f/16.0f);
-
-				l.c[0] = l.c[0] + dr * (1.0f/16.0f);
-				l.c[1] = l.c[1] + dg * (1.0f/16.0f);
-
-				float d = (float)(max_chan_val - min_chan_val);
-				float fmin_chan_val = min_chan_val + d * (1.0f/16.0f);
-				float fmax_chan_val = min_chan_val + d * (15.0f/16.0f);
-
-				l.c[2] = fmin_chan_val;
-				h.c[2] = fmax_chan_val;
-			}
-
-			precise_round_565_noscale(l, h, lr, lg, lb, hr, hg, hb);
-		}
-		else if (flags & cEncodeBC1BoundingBox)
-		{
-			// Algorithm from icbc.h compress_dxt1_fast()
-			vec3F l, h;
-			l.c[0] = min_r * (1.0f/255.0f);
-			l.c[1] = min_g * (1.0f/255.0f);
-			l.c[2] = min_b * (1.0f/255.0f);
-
-			h.c[0] = max_r * (1.0f/255.0f);
-			h.c[1] = max_g * (1.0f/255.0f);
-			h.c[2] = max_b * (1.0f/255.0f);
-
-			const float bias = 8.0f / 255.0f;
-			float inset_r = (h.c[0] - l.c[0] - bias) * (1.0f/16.0f);
-			float inset_g = (h.c[1] - l.c[1] - bias) * (1.0f/16.0f);
-			float inset_b = (h.c[2] - l.c[2] - bias) * (1.0f/16.0f);
-			
-			l.c[0] = clampf(l.c[0] + inset_r, 0.0f, 1.0f);
-			l.c[1] = clampf(l.c[1] + inset_g, 0.0f, 1.0f);
-			l.c[2] = clampf(l.c[2] + inset_b, 0.0f, 1.0f);
-
-			h.c[0] = clampf(h.c[0] - inset_r, 0.0f, 1.0f);
-			h.c[1] = clampf(h.c[1] - inset_g, 0.0f, 1.0f);
-			h.c[2] = clampf(h.c[2] - inset_b, 0.0f, 1.0f);
-
-			int icov_xz = 0, icov_yz = 0;
-			for (uint32_t i = 0; i < 16; i++)
-			{
-				int r = (int)pSrc_pixels[i].r - avg_r;
-				int g = (int)pSrc_pixels[i].g - avg_g;
-				int b = (int)pSrc_pixels[i].b - avg_b;
-				icov_xz += r * b;
-				icov_yz += g * b;
-			}
-						 
-			 if (icov_xz < 0)
-				  std::swap(l.c[0], h.c[0]);
-
-			 if (icov_yz < 0)
-				  std::swap(l.c[1], h.c[1]);
-
-			 precise_round_565(l, h, lr, lg, lb, hr, hg, hb);
-		}
-		else if (flags & cEncodeBC1BoundingBoxInt)
-		{
-			// Algorithm from icbc.h compress_dxt1_fast(), but converted to integer.
-			int inset_r = (max_r - min_r - 8) >> 4;
-			int inset_g = (max_g - min_g - 8) >> 4;
-			int inset_b = (max_b - min_b - 8) >> 4;
-
-			min_r += inset_r;
-			min_g += inset_g;
-			min_b += inset_b;
-			if ((uint32_t)(min_r | min_g | min_b) > 255U)
-			{
-				min_r = clampi(min_r, 0, 255);
-				min_g = clampi(min_g, 0, 255);
-				min_b = clampi(min_b, 0, 255);
-			}
-
-			max_r -= inset_r;
-			max_g -= inset_g;
-			max_b -= inset_b;
-			if ((uint32_t)(max_r | max_g | max_b) > 255U)
-			{
-				max_r = clampi(max_r, 0, 255);
-				max_g = clampi(max_g, 0, 255);
-				max_b = clampi(max_b, 0, 255);
-			}
-
-			int icov_xz = 0, icov_yz = 0;
-			for (uint32_t i = 0; i < 16; i++)
-			{
-				int r = (int)pSrc_pixels[i].r - avg_r;
-				int g = (int)pSrc_pixels[i].g - avg_g;
-				int b = (int)pSrc_pixels[i].b - avg_b;
-				icov_xz += r * b;
-				icov_yz += g * b;
-			}
-
-			 int x0 = min_r;
-			 int y0 = min_g;
-			 int x1 = max_r;
-			 int y1 = max_g;
-
-			 if (icov_xz < 0)
-				  std::swap(x0, x1);
-
-			 if (icov_yz < 0)
-				  std::swap(y0, y1);
-			 
-			 lr = to_5(x0);
-			 lg = to_6(y0);
-			 lb = to_5(min_b);
-
-			 hr = to_5(x1);
-			 hg = to_6(y1);
-			 hb = to_5(max_b);
-		}
-		else
-		{
-			// Select 2 colors along the principle axis. (There must be a faster/simpler way.)
-			uint32_t low_c = 0, high_c = 0;
-
-			int icov[6] = { 0, 0, 0, 0, 0, 0 };
-			for (uint32_t i = 0; i < 16; i++)
-			{
-				int r = (int)pSrc_pixels[i].r - avg_r;
-				int g = (int)pSrc_pixels[i].g - avg_g;
-				int b = (int)pSrc_pixels[i].b - avg_b;
-				icov[0] += r * r;
-				icov[1] += r * g;
-				icov[2] += r * b;
-				icov[3] += g * g;
-				icov[4] += g * b;
-				icov[5] += b * b;
-			}
-			
-			int saxis_r = 306, saxis_g = 601, saxis_b = 117;
-
-			float xr = (float)(max_r - min_r);
-			float xg = (float)(max_g - min_g);
-			float xb = (float)(max_b - min_b);
-
-			if (icov[2] < 0)
-				xr = -xr;
-
-			if (icov[4] < 0)
-				xg = -xg;
-						
-			float cov[6];
-			for (uint32_t i = 0; i < 6; i++)
-				cov[i] = (float)(icov[i]) * (1.0f / 255.0f);
-
-			const uint32_t total_power_iters = (flags & cEncodeBC1Use6PowerIters) ? 6 : 4;
-			for (uint32_t power_iter = 0; power_iter < total_power_iters; power_iter++)
-			{
-				float r = xr * cov[0] + xg * cov[1] + xb * cov[2];
-				float g = xr * cov[1] + xg * cov[3] + xb * cov[4];
-				float b = xr * cov[2] + xg * cov[4] + xb * cov[5];
-				xr = r; xg = g; xb = b;
-			}
-
-			float k = maximum(fabsf(xr), fabsf(xg), fabsf(xb));
-			if (k >= 2)
-			{
-				float m = 2048.0f / k;
-				saxis_r = (int)(xr * m);
-				saxis_g = (int)(xg * m);
-				saxis_b = (int)(xb * m);
-			}
-												
-			int low_dot = INT_MAX, high_dot = INT_MIN;
-
-			saxis_r = (int)((uint32_t)saxis_r << 4U);
-			saxis_g = (int)((uint32_t)saxis_g << 4U);
-			saxis_b = (int)((uint32_t)saxis_b << 4U);
-			
-			for (uint32_t i = 0; i < 16; i += 4)
-			{
-				int dot0 = ((pSrc_pixels[i].r * saxis_r + pSrc_pixels[i].g * saxis_g + pSrc_pixels[i].b * saxis_b) & ~0xF) + i;
-				int dot1 = ((pSrc_pixels[i + 1].r * saxis_r + pSrc_pixels[i + 1].g * saxis_g + pSrc_pixels[i + 1].b * saxis_b) & ~0xF) + i + 1;
-				int dot2 = ((pSrc_pixels[i + 2].r * saxis_r + pSrc_pixels[i + 2].g * saxis_g + pSrc_pixels[i + 2].b * saxis_b) & ~0xF) + i + 2;
-				int dot3 = ((pSrc_pixels[i + 3].r * saxis_r + pSrc_pixels[i + 3].g * saxis_g + pSrc_pixels[i + 3].b * saxis_b) & ~0xF) + i + 3;
-
-				int min_d01 = std::min(dot0, dot1);
-				int max_d01 = std::max(dot0, dot1);
-
-				int min_d23 = std::min(dot2, dot3);
-				int max_d23 = std::max(dot2, dot3);
-
-				int min_d = std::min(min_d01, min_d23);
-				int max_d = std::max(max_d01, max_d23);
-
-				low_dot = std::min(low_dot, min_d);
-				high_dot = std::max(high_dot, max_d);
-			}
-			low_c = low_dot & 15;
-			high_c = high_dot & 15;
-
-			lr = to_5(pSrc_pixels[low_c].r);
-			lg = to_6(pSrc_pixels[low_c].g);
-			lb = to_5(pSrc_pixels[low_c].b);
-
-			hr = to_5(pSrc_pixels[high_c].r);
-			hg = to_6(pSrc_pixels[high_c].g);
-			hb = to_5(pSrc_pixels[high_c].b);
-		}
-	}
-
-	static const int8_t s_adjacent_voxels[16][4] = 
-	{
-		{ 1,0,0, 3 },   // 0
-		{ 0,1,0, 4 },   // 1
-		{ 0,0,1, 5 },   // 2
-		{ -1,0,0, 0 },  // 3
-		{ 0,-1,0, 1 },  // 4
-		{ 0,0,-1, 2 },  // 5
-		{ 1,1,0, 9 },   // 6
-		{ 1,0,1, 10 },  // 7
-		{ 0,1,1, 11 },  // 8
-		{ -1,-1,0, 6 }, // 9
-		{ -1,0,-1, 7 }, // 10
-		{ 0,-1,-1, 8 }, // 11
-		{ -1,1,0, 13 }, // 12
-		{ 1,-1,0, 12 }, // 13
-		{ 0,-1,1, 15 }, // 14
-		{ 0,1,-1, 14 }, // 15
-	};
-
-	// From icbc's high quality mode.
-	static inline void encode_bc1_endpoint_search(const color32 *pSrc_pixels, bool any_black_pixels,
-		uint32_t flags, bc1_encode_results &results, uint32_t cur_err)
-	{
-		int &lr = results.lr, &lg = results.lg, &lb = results.lb, &hr = results.hr, &hg = results.hg, &hb = results.hb;
-		uint8_t *sels = results.sels;
-						
-		int prev_improvement_index = 0, forbidden_direction = -1;
-		
-		const int endpoint_search_rounds = (flags & cEncodeBC1EndpointSearchRoundsMask) >> cEncodeBC1EndpointSearchRoundsShift;
-		for (int i = 0; i < endpoint_search_rounds; i++)
-		{
-			assert(s_adjacent_voxels[ s_adjacent_voxels[i & 15][3] ][3] == (i & 15)); 
-			
-			if (forbidden_direction == (i & 31))
-				continue;
-
-			const int8_t delta[3] = { s_adjacent_voxels[i & 15][0], s_adjacent_voxels[i & 15][1], s_adjacent_voxels[i & 15][2] };
-
-			int trial_lr = lr, trial_lg = lg, trial_lb = lb, trial_hr = hr, trial_hg = hg, trial_hb = hb;
-
-			if ((i >> 4) & 1) 
-			{
-				trial_lr = clampi(trial_lr + delta[0], 0, 31);
-				trial_lg = clampi(trial_lg + delta[1], 0, 63);
-				trial_lb = clampi(trial_lb + delta[2], 0, 31);
-			}
-			else 
-			{
-				trial_hr = clampi(trial_hr + delta[0], 0, 31);
-				trial_hg = clampi(trial_hg + delta[1], 0, 63);
-				trial_hb = clampi(trial_hb + delta[2], 0, 31);
-			}
-										
-			uint8_t trial_sels[16];
-
-			uint32_t trial_err;
-			if (results.m_3color)
-			{
-				trial_err = bc1_find_sels3_fullerr(
-					((any_black_pixels) && ((flags & cEncodeBC1Use3ColorBlocksForBlackPixels) != 0)),
-					pSrc_pixels, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb, trial_sels, cur_err);
-			}
-			else
-			{
-				trial_err = bc1_find_sels4(flags, pSrc_pixels, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb, trial_sels, cur_err);
-			}
-
-			if (trial_err < cur_err)
-			{
-				cur_err = trial_err;
-
-				forbidden_direction = s_adjacent_voxels[i & 15][3] | (i & 16);
-
-                lr = trial_lr; lg = trial_lg; lb = trial_lb; hr = trial_hr; hg = trial_hg; hb = trial_hb;
-
-				memcpy(sels, trial_sels, 16);
-					
-				prev_improvement_index = i;
-			}
-
-			if (i - prev_improvement_index > 32) 
-				break;
-		}
-	}
-		
-	void encode_bc1(void* pDst, const uint8_t* pPixels, uint32_t flags, uint32_t total_orderings_to_try, uint32_t total_orderings_to_try3)
-	{
-		assert(g_initialized);
-				
-		const color32* pSrc_pixels = (const color32*)pPixels;
-		bc1_block* pDst_block = static_cast<bc1_block*>(pDst);
-		
-		int avg_r, avg_g, avg_b, min_r, min_g, min_b, max_r, max_g, max_b;
-						
-		const uint32_t fr = pSrc_pixels[0].r, fg = pSrc_pixels[0].g, fb = pSrc_pixels[0].b;
-
-		uint32_t j;
-		for (j = 15; j >= 1; --j)
-			if ((pSrc_pixels[j].r != fr) || (pSrc_pixels[j].g != fg) || (pSrc_pixels[j].b != fb))
-				break;
-
-		if (j == 0)
-		{
-			encode_bc1_solid_block(pDst, fr, fg, fb, (flags & (cEncodeBC1Use3ColorBlocks | cEncodeBC1Use3ColorBlocksForBlackPixels)) != 0);
-			return;
-		}
-
-		int total_r = fr, total_g = fg, total_b = fb;
-		
-        max_r = fr; max_g = fg; max_b = fb;
-        min_r = fr; min_g = fg; min_b = fb;
-		
-		uint32_t grayscale_flag = (fr == fg) && (fr == fb);
-		uint32_t any_black_pixels = (fr | fg | fb) < 4;
-
-		for (uint32_t i = 1; i < 16; i++)
-		{
-			const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b;
-			
-			grayscale_flag &= ((r == g) && (r == b));
-			any_black_pixels |= ((r | g | b) < 4);
-
-			max_r = std::max(max_r, r); max_g = std::max(max_g, g); max_b = std::max(max_b, b);
-			min_r = std::min(min_r, r); min_g = std::min(min_g, g); min_b = std::min(min_b, b);
-			total_r += r; total_g += g; total_b += b;
-		}
-
-        avg_r = (total_r + 8) >> 4; avg_g = (total_g + 8) >> 4; avg_b = (total_b + 8) >> 4;
-
-		bc1_encode_results results;
-		results.m_3color = false;
-
-		uint8_t *sels = results.sels;
-		int &lr = results.lr, &lg = results.lg, &lb = results.lb, &hr = results.hr, &hg = results.hg, &hb = results.hb;
-		int orig_lr = 0, orig_lg = 0, orig_lb = 0, orig_hr = 0, orig_hg = 0, orig_hb = 0;
-
-        lr = 0; lg = 0; lb = 0; hr = 0; hg = 0; hb = 0;
-								
-		const bool needs_block_error = ((flags & (cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use3ColorBlocks | cEncodeBC1UseFullMSEEval | cEncodeBC1EndpointSearchRoundsMask)) != 0) ||
-				(any_black_pixels && ((flags & cEncodeBC1Use3ColorBlocksForBlackPixels) != 0));
-		
-		uint32_t cur_err = UINT32_MAX;
-
-		if (!needs_block_error)
-		{
-			assert((flags & cEncodeBC1TryAllInitialEndponts) == 0);
-
-			encode_bc1_pick_initial(pSrc_pixels, flags, grayscale_flag != 0,
-					min_r, min_g, min_b, max_r, max_g, max_b, 
-					avg_r, avg_g, avg_b, total_r, total_g, total_b,
-					lr, lg, lb, hr, hg, hb);
-
-            orig_lr = lr; orig_lg = lg; orig_lb = lb; orig_hr = hr; orig_hg = hg; orig_hb = hb;
-
-			bc1_find_sels4_noerr(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels);
-
-			const uint32_t total_ls_passes = flags & cEncodeBC1TwoLeastSquaresPasses ? 2 : 1;
-			for (uint32_t ls_pass = 0; ls_pass < total_ls_passes; ls_pass++)
-			{
-				int trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb;
-
-				vec3F xl, xh;
-				if (!compute_least_squares_endpoints4_rgb(pSrc_pixels, sels, &xl, &xh, total_r, total_g, total_b))
-				{
-					// All selectors equal - treat it as a solid block which should always be equal or better.
-					trial_lr = g_bc1_match5_equals_1[avg_r].m_hi;
-					trial_lg = g_bc1_match6_equals_1[avg_g].m_hi;
-					trial_lb = g_bc1_match5_equals_1[avg_b].m_hi;
-
-					trial_hr = g_bc1_match5_equals_1[avg_r].m_lo;
-					trial_hg = g_bc1_match6_equals_1[avg_g].m_lo;
-					trial_hb = g_bc1_match5_equals_1[avg_b].m_lo;
-
-					// In high/higher quality mode, let it try again in case the optimal tables have caused the sels to diverge.
-				}
-				else
-				{
-					precise_round_565(xl, xh, trial_hr, trial_hg, trial_hb, trial_lr, trial_lg, trial_lb);
-				}
-
-				if ((lr == trial_lr) && (lg == trial_lg) && (lb == trial_lb) && (hr == trial_hr) && (hg == trial_hg) && (hb == trial_hb))
-					break;
-			
-				bc1_find_sels4_noerr(pSrc_pixels, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb, sels);
-
-				lr = trial_lr;
-				lg = trial_lg;
-				lb = trial_lb;
-				hr = trial_hr;
-				hg = trial_hg;
-				hb = trial_hb;
-
-			} // ls_pass
-		}
-		else
-		{
-			const uint32_t total_rounds = (flags & cEncodeBC1TryAllInitialEndponts) ? 2 : 1;
-			for (uint32_t round = 0; round < total_rounds; round++)
-			{
-				uint32_t modified_flags = flags;
-				if (round == 1)
-				{
-					modified_flags &= ~(cEncodeBC1Use2DLS | cEncodeBC1BoundingBox);
-					modified_flags |= cEncodeBC1BoundingBox;
-				}
-
-				int round_lr, round_lg, round_lb, round_hr, round_hg, round_hb;
-				uint8_t round_sels[16];
-
-				encode_bc1_pick_initial(pSrc_pixels, modified_flags, grayscale_flag != 0,
-					min_r, min_g, min_b, max_r, max_g, max_b, 
-					avg_r, avg_g, avg_b, total_r, total_g, total_b,
-					round_lr, round_lg, round_lb, round_hr, round_hg, round_hb);
-
-				int orig_round_lr = round_lr, orig_round_lg = round_lg, orig_round_lb = round_lb, orig_round_hr = round_hr, orig_round_hg = round_hg, orig_round_hb = round_hb;
-
-				uint32_t round_err = bc1_find_sels4(flags, pSrc_pixels, round_lr, round_lg, round_lb, round_hr, round_hg, round_hb, round_sels, UINT32_MAX);
-						
-				const uint32_t total_ls_passes = flags & cEncodeBC1TwoLeastSquaresPasses ? 2 : 1;
-				for (uint32_t ls_pass = 0; ls_pass < total_ls_passes; ls_pass++)
-				{
-					int trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb;
-
-					vec3F xl, xh;
-					if (!compute_least_squares_endpoints4_rgb(pSrc_pixels, round_sels, &xl, &xh, total_r, total_g, total_b))
-					{
-						// All selectors equal - treat it as a solid block which should always be equal or better.
-						trial_lr = g_bc1_match5_equals_1[avg_r].m_hi;
-						trial_lg = g_bc1_match6_equals_1[avg_g].m_hi;
-						trial_lb = g_bc1_match5_equals_1[avg_b].m_hi;
-
-						trial_hr = g_bc1_match5_equals_1[avg_r].m_lo;
-						trial_hg = g_bc1_match6_equals_1[avg_g].m_lo;
-						trial_hb = g_bc1_match5_equals_1[avg_b].m_lo;
-
-						// In high/higher quality mode, let it try again in case the optimal tables have caused the sels to diverge.
-					}
-					else
-					{
-						precise_round_565(xl, xh, trial_hr, trial_hg, trial_hb, trial_lr, trial_lg, trial_lb);
-					}
-
-					if ((round_lr == trial_lr) && (round_lg == trial_lg) && (round_lb == trial_lb) && (round_hr == trial_hr) && (round_hg == trial_hg) && (round_hb == trial_hb))
-						break;
-			
-					uint8_t trial_sels[16];
-					uint32_t trial_err = bc1_find_sels4(flags, pSrc_pixels, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb, trial_sels, round_err);
-														
-					if (trial_err < round_err)
-					{
-						round_lr = trial_lr;
-						round_lg = trial_lg;
-						round_lb = trial_lb;
-
-						round_hr = trial_hr;
-						round_hg = trial_hg;
-						round_hb = trial_hb;
-
-						round_err = trial_err;
-						memcpy(round_sels, trial_sels, 16);
-					}
-					else
-						break;
-
-				} // ls_pass
-
-				if (round_err <= cur_err)
-				{
-					cur_err = round_err;
-				
-					lr = round_lr;
-					lg = round_lg;
-					lb = round_lb;
-					hr = round_hr;
-					hg = round_hg;
-					hb = round_hb;
-
-					orig_lr = orig_round_lr;
-					orig_lg = orig_round_lg;
-					orig_lb = orig_round_lb;
-					orig_hr = orig_round_hr;
-					orig_hg = orig_round_hg;
-					orig_hb = orig_round_hb;
-
-					memcpy(sels, round_sels, 16);
-				}
-
-			} // round
-		}
-				
-		if ((cur_err) && (flags & cEncodeBC1UseLikelyTotalOrderings))
-		{
-			assert(needs_block_error);
-
-			const uint32_t total_iters = (flags & cEncodeBC1Iterative) ? 2 : 1;
-			for (uint32_t iter_index = 0; iter_index < total_iters; iter_index++)
-			{
-				const uint32_t orig_err = cur_err;
-								
-				hist4 h;
-				for (uint32_t i = 0; i < 16; i++)
-				{
-					assert(sels[i] < 4);
-					h.m_hist[sels[i]]++;
-				}
-
-				const uint32_t orig_total_order_index = h.lookup_total_ordering_index();
-
-				int r0, g0, b0, r3, g3, b3;
-				r0 = (lr << 3) | (lr >> 2); g0 = (lg << 2) | (lg >> 4); b0 = (lb << 3) | (lb >> 2);
-				r3 = (hr << 3) | (hr >> 2); g3 = (hg << 2) | (hg >> 4); b3 = (hb << 3) | (hb >> 2);
-										
-				int ar = r3 - r0, ag = g3 - g0, ab = b3 - b0;
-												
-				int dots[16];
-				for (uint32_t i = 0; i < 16; i++)
-				{
-					int r = pSrc_pixels[i].r;
-					int g = pSrc_pixels[i].g;
-					int b = pSrc_pixels[i].b;
-					int d = 0x1000000 + (r * ar + g * ag + b * ab);
-					assert(d >= 0);
-					dots[i] = (d << 4) + i;
-				}
-
-				std::sort(dots, dots + 16);
-								
-				uint32_t r_sum[17], g_sum[17], b_sum[17];
-				uint32_t r = 0, g = 0, b = 0;
-				for (uint32_t i = 0; i < 16; i++)
-				{
-					const uint32_t p = dots[i] & 15;
-
-					r_sum[i] = r;
-					g_sum[i] = g;
-					b_sum[i] = b;
-										
-					r += pSrc_pixels[p].r;
-					g += pSrc_pixels[p].g;
-					b += pSrc_pixels[p].b;
-				}
-
-				r_sum[16] = total_r;
-				g_sum[16] = total_g;
-				b_sum[16] = total_b;
-
-				const uint32_t q_total = (flags & cEncodeBC1Exhaustive) ? NUM_UNIQUE_TOTAL_ORDERINGS4 : clampi(total_orderings_to_try, MIN_TOTAL_ORDERINGS, MAX_TOTAL_ORDERINGS4);
-				for (uint32_t q = 0; q < q_total; q++)
-				{
-					const uint32_t s = (flags & cEncodeBC1Exhaustive) ? q : g_best_total_orderings4[orig_total_order_index][q];
-	
-					int trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb;
-
-					vec3F xl, xh;
-
-					if ((s == TOTAL_ORDER_4_0_16) || (s == TOTAL_ORDER_4_1_16) || (s == TOTAL_ORDER_4_2_16) || (s == TOTAL_ORDER_4_3_16))
-					{
-						trial_lr = g_bc1_match5_equals_1[avg_r].m_hi;
-						trial_lg = g_bc1_match6_equals_1[avg_g].m_hi;
-						trial_lb = g_bc1_match5_equals_1[avg_b].m_hi;
-
-						trial_hr = g_bc1_match5_equals_1[avg_r].m_lo;
-						trial_hg = g_bc1_match6_equals_1[avg_g].m_lo;
-						trial_hb = g_bc1_match5_equals_1[avg_b].m_lo;
-					}
-					else
-					{
-						compute_least_squares_endpoints4_rgb(&xl, &xh, total_r, total_g, total_b, 
-							g_selector_factors4[s][0], g_selector_factors4[s][1], g_selector_factors4[s][2], s, r_sum, g_sum, b_sum);
-				
-						precise_round_565(xl, xh, trial_hr, trial_hg, trial_hb, trial_lr, trial_lg, trial_lb);
-					}
-										
-					uint8_t trial_sels[16];
-					
-					uint32_t trial_err = bc1_find_sels4(flags, pSrc_pixels, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb, trial_sels, cur_err);
-
-					if (trial_err < cur_err)
-					{
-						cur_err = trial_err;
-
-						lr = trial_lr;
-						lg = trial_lg;
-						lb = trial_lb;
-
-						hr = trial_hr;
-						hg = trial_hg;
-						hb = trial_hb;
-
-						memcpy(sels, trial_sels, 16);
-					}
-
-				} // s
-
-				if ((!cur_err) || (cur_err == orig_err))
-					break;
-			
-			} // iter_index
-		}
-												
-		if ( ((flags & (cEncodeBC1Use3ColorBlocks | cEncodeBC1Use3ColorBlocksForBlackPixels)) != 0) && (cur_err) )
-		{
-			if (flags & cEncodeBC1Use3ColorBlocks)
-			{
-				assert(needs_block_error);
-				try_3color_block(pSrc_pixels, flags, cur_err, avg_r, avg_g, avg_b, orig_lr, orig_lg, orig_lb, orig_hr, orig_hg, orig_hb, total_r, total_g, total_b, total_orderings_to_try3, results);
-			}
-
-			if ((any_black_pixels) && ((flags & cEncodeBC1Use3ColorBlocksForBlackPixels) != 0))
-			{
-				assert(needs_block_error);
-				try_3color_block_useblack(pSrc_pixels, flags, cur_err, results);
-			}
-		}
-		
-		if ( (flags & cEncodeBC1EndpointSearchRoundsMask) && (cur_err)  )
-		{
-			assert(needs_block_error);
-
-			encode_bc1_endpoint_search(pSrc_pixels, any_black_pixels != 0, flags, results, cur_err);
-		}
-
-		if (results.m_3color)
-			bc1_encode3(pDst_block, results.lr, results.lg, results.lb, results.hr, results.hg, results.hb, results.sels);
-		else
-			bc1_encode4(pDst_block, results.lr, results.lg, results.lb, results.hr, results.hg, results.hb, results.sels);
-	}
-
-	// BC3-5
-
-	struct bc4_block
-	{
-		enum { cBC4SelectorBits = 3, cTotalSelectorBytes = 6, cMaxSelectorValues = 8 };
-		uint8_t m_endpoints[2];
-
-		uint8_t m_selectors[cTotalSelectorBytes];
-
-		inline uint32_t get_low_alpha() const { return m_endpoints[0]; }
-		inline uint32_t get_high_alpha() const { return m_endpoints[1]; }
-		inline bool is_alpha6_block() const { return get_low_alpha() <= get_high_alpha(); }
-
-		inline uint64_t get_selector_bits() const
-		{
-			return ((uint64_t)((uint32_t)m_selectors[0] | ((uint32_t)m_selectors[1] << 8U) | ((uint32_t)m_selectors[2] << 16U) | ((uint32_t)m_selectors[3] << 24U))) |
-				(((uint64_t)m_selectors[4]) << 32U) |
-				(((uint64_t)m_selectors[5]) << 40U);
-		}
-
-		inline uint32_t get_selector(uint32_t x, uint32_t y, uint64_t selector_bits) const
-		{
-			assert((x < 4U) && (y < 4U));
-			return (selector_bits >> (((y * 4) + x) * cBC4SelectorBits))& (cMaxSelectorValues - 1);
-		}
-
-		static inline uint32_t get_block_values6(uint8_t* pDst, uint32_t l, uint32_t h)
-		{
-			pDst[0] = static_cast<uint8_t>(l);
-			pDst[1] = static_cast<uint8_t>(h);
-			pDst[2] = static_cast<uint8_t>((l * 4 + h) / 5);
-			pDst[3] = static_cast<uint8_t>((l * 3 + h * 2) / 5);
-			pDst[4] = static_cast<uint8_t>((l * 2 + h * 3) / 5);
-			pDst[5] = static_cast<uint8_t>((l + h * 4) / 5);
-			pDst[6] = 0;
-			pDst[7] = 255;
-			return 6;
-		}
-
-		static inline uint32_t get_block_values8(uint8_t* pDst, uint32_t l, uint32_t h)
-		{
-			pDst[0] = static_cast<uint8_t>(l);
-			pDst[1] = static_cast<uint8_t>(h);
-			pDst[2] = static_cast<uint8_t>((l * 6 + h) / 7);
-			pDst[3] = static_cast<uint8_t>((l * 5 + h * 2) / 7);
-			pDst[4] = static_cast<uint8_t>((l * 4 + h * 3) / 7);
-			pDst[5] = static_cast<uint8_t>((l * 3 + h * 4) / 7);
-			pDst[6] = static_cast<uint8_t>((l * 2 + h * 5) / 7);
-			pDst[7] = static_cast<uint8_t>((l + h * 6) / 7);
-			return 8;
-		}
-
-		static inline uint32_t get_block_values(uint8_t* pDst, uint32_t l, uint32_t h)
-		{
-			if (l > h)
-				return get_block_values8(pDst, l, h);
-			else
-				return get_block_values6(pDst, l, h);
-		}
-	};
-
-	void encode_bc4(void* pDst, const uint8_t* pPixels, uint32_t stride)
-	{
-		assert(g_initialized);
-
-		uint32_t min0_v, max0_v, min1_v, max1_v, min2_v, max2_v, min3_v, max3_v;
-
-		{
-			min0_v = max0_v = pPixels[0 * stride];
-			min1_v = max1_v = pPixels[1 * stride];
-			min2_v = max2_v = pPixels[2 * stride];
-			min3_v = max3_v = pPixels[3 * stride];
-		}
-
-		{
-			uint32_t v0 = pPixels[4 * stride]; min0_v = std::min(min0_v, v0); max0_v = std::max(max0_v, v0);
-			uint32_t v1 = pPixels[5 * stride]; min1_v = std::min(min1_v, v1); max1_v = std::max(max1_v, v1);
-			uint32_t v2 = pPixels[6 * stride]; min2_v = std::min(min2_v, v2); max2_v = std::max(max2_v, v2);
-			uint32_t v3 = pPixels[7 * stride]; min3_v = std::min(min3_v, v3); max3_v = std::max(max3_v, v3);
-		}
-
-		{
-			uint32_t v0 = pPixels[8 * stride]; min0_v = std::min(min0_v, v0); max0_v = std::max(max0_v, v0);
-			uint32_t v1 = pPixels[9 * stride]; min1_v = std::min(min1_v, v1); max1_v = std::max(max1_v, v1);
-			uint32_t v2 = pPixels[10 * stride]; min2_v = std::min(min2_v, v2); max2_v = std::max(max2_v, v2);
-			uint32_t v3 = pPixels[11 * stride]; min3_v = std::min(min3_v, v3); max3_v = std::max(max3_v, v3);
-		}
-
-		{
-			uint32_t v0 = pPixels[12 * stride]; min0_v = std::min(min0_v, v0); max0_v = std::max(max0_v, v0);
-			uint32_t v1 = pPixels[13 * stride]; min1_v = std::min(min1_v, v1); max1_v = std::max(max1_v, v1);
-			uint32_t v2 = pPixels[14 * stride]; min2_v = std::min(min2_v, v2); max2_v = std::max(max2_v, v2);
-			uint32_t v3 = pPixels[15 * stride]; min3_v = std::min(min3_v, v3); max3_v = std::max(max3_v, v3);
-		}
-
-		const uint32_t min_v = minimum(min0_v, min1_v, min2_v, min3_v);
-		const uint32_t max_v = maximum(max0_v, max1_v, max2_v, max3_v);
-
-		uint8_t* pDst_bytes = static_cast<uint8_t*>(pDst);
-		pDst_bytes[0] = (uint8_t)max_v;
-		pDst_bytes[1] = (uint8_t)min_v;
-
-		if (max_v == min_v)
-		{
-			memset(pDst_bytes + 2, 0, 6);
-			return;
-		}
-
-		const uint32_t delta = max_v - min_v;
-
-		// min_v is now 0. Compute thresholds between values by scaling max_v. It's x14 because we're adding two x7 scale factors.
-		const int t0 = delta * 13;
-		const int t1 = delta * 11;
-		const int t2 = delta * 9;
-		const int t3 = delta * 7;
-		const int t4 = delta * 5;
-		const int t5 = delta * 3;
-		const int t6 = delta * 1;
-
-		// BC4 floors in its divisions, which we compensate for with the 4 bias.
-		// This function is optimal for all possible inputs (i.e. it outputs the same results as checking all 8 values and choosing the closest one).
-		const int bias = 4 - min_v * 14;
-
-		static const uint32_t s_tran0[8] = { 1U      , 7U      , 6U      , 5U      , 4U      , 3U      , 2U      , 0U };
-		static const uint32_t s_tran1[8] = { 1U << 3U, 7U << 3U, 6U << 3U, 5U << 3U, 4U << 3U, 3U << 3U, 2U << 3U, 0U << 3U };
-		static const uint32_t s_tran2[8] = { 1U << 6U, 7U << 6U, 6U << 6U, 5U << 6U, 4U << 6U, 3U << 6U, 2U << 6U, 0U << 6U };
-		static const uint32_t s_tran3[8] = { 1U << 9U, 7U << 9U, 6U << 9U, 5U << 9U, 4U << 9U, 3U << 9U, 2U << 9U, 0U << 9U };
-
-		uint64_t a0, a1, a2, a3;
-		{
-			const int v0 = pPixels[0 * stride] * 14 + bias;
-			const int v1 = pPixels[1 * stride] * 14 + bias;
-			const int v2 = pPixels[2 * stride] * 14 + bias;
-			const int v3 = pPixels[3 * stride] * 14 + bias;
-			a0 = s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)];
-			a1 = s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)];
-			a2 = s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)];
-			a3 = s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)];
-		}
-
-		{
-			const int v0 = pPixels[4 * stride] * 14 + bias;
-			const int v1 = pPixels[5 * stride] * 14 + bias;
-			const int v2 = pPixels[6 * stride] * 14 + bias;
-			const int v3 = pPixels[7 * stride] * 14 + bias;
-			a0 |= (uint64_t)(s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)] << 12U);
-			a1 |= (uint64_t)(s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)] << 12U);
-			a2 |= (uint64_t)(s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)] << 12U);
-			a3 |= (uint64_t)(s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)] << 12U);
-		}
-
-		{
-			const int v0 = pPixels[8 * stride] * 14 + bias;
-			const int v1 = pPixels[9 * stride] * 14 + bias;
-			const int v2 = pPixels[10 * stride] * 14 + bias;
-			const int v3 = pPixels[11 * stride] * 14 + bias;
-			a0 |= (((uint64_t)s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]) << 24U);
-			a1 |= (((uint64_t)s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]) << 24U);
-			a2 |= (((uint64_t)s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]) << 24U);
-			a3 |= (((uint64_t)s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]) << 24U);
-		}
-
-		{
-			const int v0 = pPixels[12 * stride] * 14 + bias;
-			const int v1 = pPixels[13 * stride] * 14 + bias;
-			const int v2 = pPixels[14 * stride] * 14 + bias;
-			const int v3 = pPixels[15 * stride] * 14 + bias;
-			a0 |= (((uint64_t)s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]) << 36U);
-			a1 |= (((uint64_t)s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]) << 36U);
-			a2 |= (((uint64_t)s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]) << 36U);
-			a3 |= (((uint64_t)s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]) << 36U);
-		}
-
-		const uint64_t f = a0 | a1 | a2 | a3;
-
-		pDst_bytes[2] = (uint8_t)f;
-		pDst_bytes[3] = (uint8_t)(f >> 8U);
-		pDst_bytes[4] = (uint8_t)(f >> 16U);
-		pDst_bytes[5] = (uint8_t)(f >> 24U);
-		pDst_bytes[6] = (uint8_t)(f >> 32U);
-		pDst_bytes[7] = (uint8_t)(f >> 40U);
-	}
-
-	void encode_bc3(void* pDst, const uint8_t* pPixels, uint32_t flags, uint32_t total_orderings_to_try)
-	{
-		assert(g_initialized);
-
-		// 3-color blocks are not allowed with BC3 (on most GPU's).
-		flags &= ~(cEncodeBC1Use3ColorBlocksForBlackPixels | cEncodeBC1Use3ColorBlocks);
-
-		encode_bc4(pDst, pPixels + 3, 4);
-		encode_bc1(static_cast<uint8_t*>(pDst) + 8, pPixels, flags, total_orderings_to_try);
-	}
-
-	void encode_bc3(uint32_t level, void* pDst, const uint8_t* pPixels)
-	{
-		assert(g_initialized);
-
-		encode_bc4(pDst, pPixels + 3, 4);
-		encode_bc1(level, static_cast<uint8_t*>(pDst) + 8, pPixels, false, false);
-	}
-
-	void encode_bc5(void* pDst, const uint8_t* pPixels, uint32_t chan0, uint32_t chan1, uint32_t stride)
-	{
-		assert(g_initialized);
-
-		encode_bc4(pDst, pPixels + chan0, stride);
-		encode_bc4(static_cast<uint8_t*>(pDst) + 8, pPixels + chan1, stride);
-	}
-		
-	// Returns true if the block uses 3 color punchthrough alpha mode.
-	bool unpack_bc1(const void* pBlock_bits, void* pPixels, bool set_alpha, bc1_approx_mode mode)
-	{
-		color32* pDst_pixels = static_cast<color32*>(pPixels);
-
-		static_assert(sizeof(bc1_block) == 8, "sizeof(bc1_block) == 8");
-		static_assert(sizeof(bc4_block) == 8, "sizeof(bc4_block) == 8");
-
-		const bc1_block* pBlock = static_cast<const bc1_block*>(pBlock_bits);
-
-		const uint32_t l = pBlock->get_low_color();
-		const uint32_t h = pBlock->get_high_color();
-
-		color32 c[4];
-
-		const int cr0 = (l >> 11) & 31;
-		const int cg0 = (l >> 5) & 63;
-		const int cb0 = l & 31;
-		const int r0 = (cr0 << 3) | (cr0 >> 2);
-		const int g0 = (cg0 << 2) | (cg0 >> 4);
-		const int b0 = (cb0 << 3) | (cb0 >> 2);
-
-		const int cr1 = (h >> 11) & 31;
-		const int cg1 = (h >> 5) & 63;
-		const int cb1 = h & 31;
-		const int r1 = (cr1 << 3) | (cr1 >> 2);
-		const int g1 = (cg1 << 2) | (cg1 >> 4);
-		const int b1 = (cb1 << 3) | (cb1 >> 2);
-
-		bool used_punchthrough = false;
-				
-		if (l > h)
-		{
-			c[0].set_noclamp_rgba(r0, g0, b0, 255);
-			c[1].set_noclamp_rgba(r1, g1, b1, 255);
-			switch (mode)
-			{
-				case bc1_approx_mode::cBC1Ideal:
-					c[2].set_noclamp_rgba((r0 * 2 + r1) / 3, (g0 * 2 + g1) / 3, (b0 * 2 + b1) / 3, 255);
-					c[3].set_noclamp_rgba((r1 * 2 + r0) / 3, (g1 * 2 + g0) / 3, (b1 * 2 + b0) / 3, 255);
-					break;
-				case bc1_approx_mode::cBC1IdealRound4:
-					c[2].set_noclamp_rgba((r0 * 2 + r1 + 1) / 3, (g0 * 2 + g1 + 1) / 3, (b0 * 2 + b1 + 1) / 3, 255);
-					c[3].set_noclamp_rgba((r1 * 2 + r0 + 1) / 3, (g1 * 2 + g0 + 1) / 3, (b1 * 2 + b0 + 1) / 3, 255);
-					break;
-				case bc1_approx_mode::cBC1NVidia:
-					c[2].set_noclamp_rgba(interp_5_nv(cr0, cr1), interp_6_nv(g0, g1), interp_5_nv(cb0, cb1), 255);
-					c[3].set_noclamp_rgba(interp_5_nv(cr1, cr0), interp_6_nv(g1, g0), interp_5_nv(cb1, cb0), 255);
-					break;
-				case bc1_approx_mode::cBC1AMD:
-					c[2].set_noclamp_rgba(interp_5_6_amd(r0, r1), interp_5_6_amd(g0, g1), interp_5_6_amd(b0, b1), 255);
-					c[3].set_noclamp_rgba(interp_5_6_amd(r1, r0), interp_5_6_amd(g1, g0), interp_5_6_amd(b1, b0), 255);
-					break;
-			}
-		}
-		else
-		{
-			c[0].set_noclamp_rgba(r0, g0, b0, 255);
-			c[1].set_noclamp_rgba(r1, g1, b1, 255);
-			switch (mode)
-			{
-				case bc1_approx_mode::cBC1Ideal:
-				case bc1_approx_mode::cBC1IdealRound4:
-					c[2].set_noclamp_rgba((r0 + r1) / 2, (g0 + g1) / 2, (b0 + b1) / 2, 255);
-					break;
-				case bc1_approx_mode::cBC1NVidia:
-					c[2].set_noclamp_rgba(interp_half_5_nv(cr0, cr1), interp_half_6_nv(g0, g1), interp_half_5_nv(cb0, cb1), 255);
-					break;
-				case bc1_approx_mode::cBC1AMD:
-					c[2].set_noclamp_rgba(interp_half_5_6_amd(r0, r1), interp_half_5_6_amd(g0, g1), interp_half_5_6_amd(b0, b1), 255);
-					break;
-			}
-
-			c[3].set_noclamp_rgba(0, 0, 0, 0);
-			used_punchthrough = true;
-		}
-
-		if (set_alpha)
-		{
-			for (uint32_t y = 0; y < 4; y++, pDst_pixels += 4)
-			{
-				pDst_pixels[0] = c[pBlock->get_selector(0, y)];
-				pDst_pixels[1] = c[pBlock->get_selector(1, y)];
-				pDst_pixels[2] = c[pBlock->get_selector(2, y)];
-				pDst_pixels[3] = c[pBlock->get_selector(3, y)];
-			}
-		}
-		else
-		{
-			for (uint32_t y = 0; y < 4; y++, pDst_pixels += 4)
-			{
-				pDst_pixels[0].set_rgb(c[pBlock->get_selector(0, y)]);
-				pDst_pixels[1].set_rgb(c[pBlock->get_selector(1, y)]);
-				pDst_pixels[2].set_rgb(c[pBlock->get_selector(2, y)]);
-				pDst_pixels[3].set_rgb(c[pBlock->get_selector(3, y)]);
-			}
-		}
-
-		return used_punchthrough;
-	}
-
-	void unpack_bc4(const void* pBlock_bits, uint8_t* pPixels, uint32_t stride)
-	{
-		static_assert(sizeof(bc4_block) == 8, "sizeof(bc4_block) == 8");
-
-		const bc4_block* pBlock = static_cast<const bc4_block*>(pBlock_bits);
-
-		uint8_t sel_values[8];
-		bc4_block::get_block_values(sel_values, pBlock->get_low_alpha(), pBlock->get_high_alpha());
-
-		const uint64_t selector_bits = pBlock->get_selector_bits();
-
-		for (uint32_t y = 0; y < 4; y++, pPixels += (stride * 4U))
-		{
-			pPixels[0] = sel_values[pBlock->get_selector(0, y, selector_bits)];
-			pPixels[stride * 1] = sel_values[pBlock->get_selector(1, y, selector_bits)];
-			pPixels[stride * 2] = sel_values[pBlock->get_selector(2, y, selector_bits)];
-			pPixels[stride * 3] = sel_values[pBlock->get_selector(3, y, selector_bits)];
-		}
-	}
-
-	// Returns false if the block uses 3-color punchthrough alpha mode, which isn't supported on some GPU's for BC3.
-	bool unpack_bc3(const void* pBlock_bits, void* pPixels, bc1_approx_mode mode)
-	{
-		color32* pDst_pixels = static_cast<color32*>(pPixels);
-
-		bool success = true;
-
-		if (unpack_bc1((const uint8_t*)pBlock_bits + sizeof(bc4_block), pDst_pixels, true, mode))
-			success = false;
-
-		unpack_bc4(pBlock_bits, &pDst_pixels[0].a, sizeof(color32));
-
-		return success;
-	}
-
-	// writes RG
-	void unpack_bc5(const void* pBlock_bits, void* pPixels, uint32_t chan0, uint32_t chan1, uint32_t stride)
-	{
-		unpack_bc4(pBlock_bits, (uint8_t *)pPixels + chan0, stride);
-		unpack_bc4((const uint8_t*)pBlock_bits + sizeof(bc4_block), (uint8_t *)pPixels + chan1, stride);
-	}
-
-} // namespace rgbcx
+}
+#endif // #ifndef RGBCX_INCLUDE_H
 
+#ifdef RGBCX_IMPLEMENTATION
 #endif //#ifdef RGBCX_IMPLEMENTATION
 
 /*
diff --git a/libkram/bc7enc/rgbcx_table4_small.h b/libkram/bc7enc/rgbcx_table4_small.h
new file mode 100644
index 00000000..30ccc709
--- /dev/null
+++ b/libkram/bc7enc/rgbcx_table4_small.h
@@ -0,0 +1,969 @@
+{ 202, 120, 13, 318, 15, 23, 403, 450, 5, 51, 260, 128, 77, 21, 33, 494, 515, 523, 4, 141, 269, 1, 2, 700, 137, 49, 48, 102, 7, 64, 753, 82  },
+{ 13,141,23,217,115,51,77,2,64,21,0,4,5,317,137,269,202,33,318,7,291,352,9,10,3,180,32,6,365,102,341,349 },
+{ 29,58,262,1,52,74,6,171,5,287,151,334,27,500,75,26,331,223,53,635,220,19,50,45,46,17,14,396,163,409,324,70 },
+{ 40,51,33,453,14,23,62,56,12,196,730,475,153,99,403,775,117,130,585,34,4,17,162,11,139,57,102,38,108,47,123,440 },
+{ 33,23,51,13,102,64,202,128,12,40,15,196,153,10,1,2,77,99,141,0,515,5,117,3,120,403,700,165,22,14,269,453 },
+{ 13,23,51,4,77,141,202,33,115,64,32,128,0,11,177,40,15,102,2,217,7,137,269,21,90,59,515,1,180,403,22,6 },
+{ 26,235,19,47,648,624,78,145,27,112,122,64,444,6,630,453,25,42,65,130,711,85,390,113,416,108,665,29,730,138,644,95 },
+{ 64,141,352,751,217,247,237,437,177,269,86,954,947,875,32,318,95,77,304,92,597,180,232,291,128,864,349,588,372,202,312,1 },
+{ 642,898,180,638,901,341,82,197,10,951,15,515,165,762,700,253,811,753,752,365,143,479,244,569,8,110,351,873,55,31,499,116 },
+{ 221,23,51,125,438,254,13,21,39,49,308,656,0,115,530,159,158,401,30,166,912,386,165,688,518,9,105,627,424,22,421,33 },
+{ 143,31,1,44,197,8,180,125,116,55,13,498,23,341,638,242,93,15,2,141,0,901,752,115,36,206,165,479,338,365,515,762 },
+{ 12,23,51,13,14,15,37,99,515,38,700,117,2,196,134,153,753,64,54,33,128,120,21,0,328,5,139,82,453,719,457,1 },
+{ 13,15,23,515,961,700,457,753,51,115,4,165,197,2,38,569,1,474,0,37,99,719,5,12,629,14,11,3,33,77,64,10 },
+{ 15,515,700,753,1,0,2,4,3,23,134,12,961,5,10,197,11,33,82,120,457,51,165,7,6,341,217,21,77,9,40,180 },
+{ 13,51,23,457,719,961,730,401,165,453,0,117,386,15,134,1,758,153,12,54,515,99,11,2,700,5,753,4,308,33,6,899 },
+{ 134,898,82,117,13,33,77,102,23,260,341,351,120,901,197,153,961,111,196,110,180,457,854,10,450,8,165,40,4,115,0,365 },
+{ 60,18,126,167,35,16,191,71,24,92,121,271,68,107,212,146,118,150,199,7,21,1,9,575,727,5,566,48,0,132,108,273 },
+{ 62,136,129,123,128,41,162,17,249,211,214,789,618,710,38,678,248,507,57,64,152,269,119,3,177,183,597,106,4,179,216,90 },
+{ 403,523,51,475,494,453,817,899,202,23,450,13,421,120,102,730,33,128,4,1,805,5,7,153,757,260,318,196,77,457,326,65 },
+{ 4,59,3,62,12,33,56,193,27,21,102,17,40,77,76,84,32,0,6,123,119,177,128,11,18,611,605,25,13,51,73,210 },
+{ 43,20,319,422,414,945,0,7,819,61,5,376,325,173,804,904,470,693,97,707,14,49,22,104,147,107,95,32,426,1,330,577 },
+{ 13,23,51,2,0,115,4,141,217,33,10,77,1,15,64,180,3,515,7,6,22,102,11,5,40,9,165,700,202,197,317,341 },
+{ 28,49,0,105,1,24,65,159,35,55,95,239,16,2,109,7,9,14,170,320,347,168,424,158,10,301,124,5,67,21,64,36 },
+{ 15,515,700,753,0,1,13,2,117,4,12,10,5,165,457,3,9,134,11,7,6,51,77,64,961,82,33,197,14,341,120,141 },
+{ 7,71,14,149,97,18,60,16,150,92,398,189,140,124,24,273,35,2,69,302,154,68,0,336,517,43,66,28,118,251,230,1 },
+{ 4,102,33,77,40,59,11,624,210,12,128,342,5,503,91,139,64,32,25,494,202,678,416,0,403,275,21,450,196,318,523,177 },
+{ 25,19,42,6,122,813,256,235,85,26,436,53,297,573,680,390,445,63,27,416,80,233,65,73,389,283,45,605,194,17,250,343 },
+{ 402,102,202,128,33,300,403,23,12,77,40,21,342,117,483,99,25,494,6,4,63,32,84,569,139,757,475,318,19,26,196,134 },
+{ 158,9,0,109,39,49,65,22,35,168,55,24,68,124,159,16,185,344,333,154,254,272,175,289,1,577,95,28,105,810,30,169 },
+{ 197,180,115,237,498,165,2,5,287,546,400,3,61,34,509,13,297,80,341,52,45,186,58,881,23,873,468,176,64,17,311,250 },
+{ 120,968,373,260,704,110,450,202,137,318,77,95,269,326,217,717,661,652,851,349,93,1,518,98,827,291,21,177,82,33,848,719 },
+{ 44,116,144,268,434,489,367,384,98,127,918,93,948,31,206,940,855,0,203,137,9,22,617,141,332,105,393,492,959,282,299,131 },
+{ 13,77,23,33,51,0,64,141,102,4,2,115,1,6,202,15,10,128,269,7,177,180,3,40,22,11,515,217,117,318,700,137 },
+{ 15,515,700,753,4,11,141,40,165,23,64,180,13,202,32,3,51,125,5,197,21,128,0,93,77,1,120,82,269,117,110,59 },
+{ 176,231,585,62,34,14,412,161,56,236,527,57,17,3,51,202,4,23,369,283,128,13,472,440,84,361,136,457,381,130,719,53 },
+{ 9,0,180,217,237,101,141,352,88,100,230,64,175,317,115,498,68,39,30,1,702,83,213,36,365,208,752,13,252,321,952,546 },
+{ 28,9,22,1,49,0,109,39,83,95,86,30,13,105,128,55,141,168,158,67,31,159,208,12,96,5,185,2,160,64,137,23 },
+{ 72,4,38,12,51,89,477,11,57,76,401,308,23,474,99,148,413,179,59,13,431,152,54,569,17,3,205,629,197,421,405,15 },
+{ 457,13,23,961,15,51,515,700,165,12,753,629,11,1,719,117,0,3,2,37,569,197,40,328,33,5,153,134,99,64,38,196 },
+{ 254,100,310,9,30,1,39,625,166,265,190,0,272,557,131,731,31,98,578,688,404,93,101,88,49,21,127,264,44,36,252,478 },
+{ 51,23,12,13,15,128,99,120,10,202,515,153,64,82,700,33,165,2,5,117,403,1,141,0,3,196,37,453,753,197,260,93 },
+{ 38,99,542,139,453,117,196,23,457,13,328,111,37,134,961,11,12,51,40,775,587,401,474,54,153,477,41,629,33,475,14,277 },
+{ 6,85,25,233,343,91,26,63,138,29,19,65,283,4,81,235,42,122,605,64,648,256,174,370,74,389,718,59,45,194,445,416 },
+{ 49,5,97,20,197,21,18,193,0,64,408,729,173,350,43,422,165,7,14,104,61,32,509,713,523,102,120,95,125,397,35,232 },
+{ 144,116,268,434,384,489,367,206,93,855,940,44,98,332,617,127,959,911,137,282,203,31,22,219,141,9,131,276,417,0,1,120 },
+{ 17,106,64,62,32,255,136,292,476,162,129,241,123,141,41,237,720,214,209,352,519,211,186,148,752,247,507,90,21,77,197,119 },
+{ 2,29,52,50,5,58,14,6,27,1,366,357,45,53,17,19,171,151,26,181,133,38,218,764,287,583,61,113,3,487,600,281 },
+{ 130,59,196,412,381,730,711,236,77,210,202,402,453,99,401,108,361,803,291,283,153,4,57,51,128,183,14,719,503,117,23,11 },
+{ 13,23,51,141,77,4,33,64,115,0,217,10,180,202,2,102,11,9,15,165,40,21,128,352,22,7,197,3,317,515,269,1 },
+{ 23,13,202,51,120,15,21,5,141,1,128,269,137,515,64,102,125,48,98,33,260,523,318,93,700,165,450,77,2,12,403,82 },
+{ 1,2,14,46,29,67,38,52,5,171,58,24,103,69,96,70,83,181,54,75,163,223,16,45,112,309,155,0,186,35,18,108 },
+{ 15,515,700,753,13,0,1,2,153,5,23,10,117,3,9,7,134,165,12,6,341,33,4,14,77,457,115,21,719,180,217,82 },
+{ 197,165,509,13,391,180,308,115,23,546,5,498,2,29,3,401,901,61,34,80,14,457,250,569,237,873,38,297,45,15,468,386 },
+{ 19,73,27,250,200,714,444,472,26,53,34,17,813,322,283,390,128,297,78,123,432,14,436,136,106,690,57,122,389,80,503,3 },
+{ 3,17,21,45,62,32,38,12,155,14,2,328,5,99,401,536,828,13,227,488,106,51,719,119,540,76,165,221,115,629,209,41 },
+{ 115,341,873,197,365,13,901,180,569,752,317,1,10,498,143,634,261,0,509,15,943,237,44,31,116,601,165,127,282,23,141,64 },
+{ 453,51,23,403,33,421,475,102,15,153,196,515,13,700,117,523,12,40,753,21,4,134,0,494,670,899,22,801,730,10,11,401 },
+{ 23,13,51,33,12,117,153,134,453,196,15,99,515,40,14,700,128,102,11,753,77,64,403,202,0,401,475,37,65,2,3,38 },
+{ 2,7,5,14,70,1,29,61,52,45,6,112,66,16,21,32,592,46,38,135,87,58,186,315,290,128,113,0,64,48,227,23 },
+{ 33,23,102,51,128,13,64,202,141,1,77,10,153,40,196,117,2,3,0,5,15,269,403,12,137,134,318,165,120,6,453,99 },
+{ 16,92,7,20,43,35,126,71,60,14,107,18,68,97,0,121,279,149,24,246,191,48,118,575,55,140,362,783,230,150,375,566 },
+{ 13,23,4,33,77,64,51,102,141,128,32,10,0,202,40,115,59,22,90,11,177,21,291,6,7,318,180,117,137,2,95,165 },
+{ 507,162,129,41,4,211,62,38,123,59,57,248,183,130,99,11,3,361,202,17,402,556,266,305,803,210,128,184,152,136,313,117 },
+{ 643,123,193,650,802,18,25,389,718,256,65,289,84,91,619,511,415,90,235,63,57,510,324,216,862,102,6,183,108,397,217,736 },
+{ 13,23,15,1,515,51,0,2,700,5,753,165,141,115,12,3,4,180,21,197,457,7,6,10,120,9,33,202,77,32,8,11 },
+{ 23,51,13,453,64,403,12,21,5,202,128,475,165,141,523,95,125,115,3,1,4,730,120,32,2,494,180,719,457,197,450,401 },
+{ 204,74,135,66,6,174,192,7,138,172,85,353,348,580,280,97,95,500,29,64,426,32,87,889,65,81,25,2,52,43,568,673 },
+{ 35,0,68,69,24,9,1,16,65,103,149,133,18,114,28,50,83,2,189,7,46,14,101,336,175,124,251,55,71,218,38,238 },
+{ 16,101,0,118,9,18,24,68,35,154,71,124,60,212,191,520,55,806,694,167,28,39,364,375,1,346,252,65,604,302,22,21 },
+{ 0,9,16,35,1,24,68,18,65,21,103,67,13,149,28,189,71,23,101,238,114,7,335,133,486,141,22,212,48,50,30,118 },
+{ 13,202,23,77,33,51,128,5,21,141,115,32,102,64,4,0,318,269,10,15,291,2,494,177,11,217,3,515,22,137,6,700 },
+{ 16,92,60,35,7,18,24,68,150,149,14,71,0,375,97,126,118,107,230,191,246,273,140,55,175,653,9,575,2,28,566,517 },
+{ 76,90,21,179,316,148,205,32,464,288,184,257,245,1,89,2,460,57,152,45,38,358,645,5,12,449,350,48,37,17,4,14 },
+{ 19,27,26,813,80,297,17,495,436,53,73,200,4,378,250,59,106,25,45,128,361,42,113,469,122,390,77,40,736,6,11,136 },
+{ 6,26,235,138,19,145,112,70,331,262,25,42,52,624,27,453,122,47,500,78,648,85,29,2,630,632,409,113,50,226,108,75 },
+{ 7,16,14,24,92,35,18,2,46,9,60,140,0,87,50,5,54,13,12,38,171,23,126,21,58,64,1,70,128,71,220,163 },
+{ 90,205,257,184,32,179,460,5,245,45,2,288,769,524,57,21,152,229,17,1,497,4,292,59,619,452,432,76,476,11,266,14 },
+{ 15,515,700,753,4,5,11,141,13,1,33,3,0,128,202,23,180,21,2,64,269,32,117,134,120,40,102,318,153,17,137,352 },
+{ 47,130,711,108,453,412,730,196,390,283,78,27,51,183,381,236,128,200,719,14,153,472,503,34,59,250,3,4,57,803,123,432 },
+{ 12,277,51,474,111,153,23,99,13,37,961,94,629,542,569,431,79,139,38,134,117,453,33,188,196,40,115,15,11,157,401,515 },
+{ 17,495,469,106,26,378,80,27,161,483,19,742,527,436,383,862,73,136,53,814,297,6,119,84,62,56,25,3,209,611,4,128 },
+{ 81,681,636,91,0,750,370,104,718,138,18,693,173,784,29,397,348,74,192,673,174,65,6,207,64,280,306,52,671,32,355,319 },
+{ 15,515,700,753,33,77,4,102,115,117,40,13,1,153,134,11,5,217,23,196,2,21,3,317,32,365,0,341,291,59,12,51 },
+{ 0,9,28,35,68,1,65,67,101,39,69,175,16,238,13,22,96,124,18,24,251,30,55,12,23,2,50,141,114,5,154,103 },
+{ 23,33,77,13,117,40,11,102,64,4,51,403,153,453,10,0,196,134,128,65,12,291,86,99,95,59,15,141,202,180,137,719 },
+{ 214,90,289,6,874,64,25,65,235,42,751,249,256,312,194,85,746,875,174,32,525,288,519,835,247,348,233,544,217,524,437,352 },
+{ 1,22,2,0,36,67,28,5,49,95,12,50,168,83,105,55,7,9,14,194,103,23,114,21,584,46,10,13,38,69,208,159 },
+{ 269,141,13,202,33,180,318,77,291,137,102,352,128,23,349,51,31,217,372,317,125,197,44,21,11,5,901,1,18,0,4,494 },
+{ 435,144,274,88,203,418,30,1,190,410,96,778,100,530,521,326,466,795,686,166,960,321,382,264,367,822,131,31,692,9,213,93 },
+{ 76,72,90,21,37,179,12,205,32,428,148,38,308,405,4,413,57,184,749,245,316,221,54,645,288,1,152,155,464,257,2,14 },
+{ 77,33,64,102,13,141,23,2,40,1,51,10,0,115,6,180,202,128,4,3,177,269,15,7,22,165,291,14,217,318,137,11 },
+{ 397,81,4,32,65,788,693,804,681,11,249,21,91,64,690,494,3,0,422,56,348,725,194,123,23,59,523,319,61,510,95,90 },
+{ 60,126,16,7,92,121,314,246,35,107,150,132,14,146,24,18,199,298,232,71,359,140,672,97,392,649,5,423,95,21,22,388 },
+{ 15,515,141,217,115,700,13,23,120,317,753,180,33,260,110,137,341,51,1,365,4,77,64,202,0,40,36,352,197,269,10,21 },
+{ 111,134,117,474,23,13,961,12,569,431,37,15,51,115,515,700,277,99,753,38,197,405,457,4,72,94,629,45,11,89,54,148 },
+{ 23,13,51,5,1,15,2,21,12,202,141,0,515,165,120,32,4,64,700,3,115,197,269,125,753,7,9,128,6,180,453,403 },
+{ 13,141,4,23,5,2,115,217,202,51,180,137,269,352,77,1,317,3,21,318,0,15,9,64,10,197,11,341,33,515,752,7 },
+{ 165,125,197,13,391,21,23,558,48,380,97,120,298,33,14,426,66,115,32,386,900,180,6,98,357,237,326,509,51,278,221,457 },
+{ 120,82,15,260,515,1,351,77,450,700,13,21,141,23,753,202,217,93,110,33,51,854,5,128,326,102,137,180,817,48,269,352 },
+{ 23,13,15,51,515,700,961,753,0,457,1,2,4,115,10,453,569,5,33,165,11,719,14,40,64,197,3,21,474,629,38,401 },
+{ 264,166,39,30,9,100,435,254,93,921,190,363,1,625,411,382,897,656,203,478,404,812,438,110,473,88,18,691,156,141,274,272 },
+{ 9,0,252,100,166,39,101,265,364,68,88,329,520,18,419,676,118,167,404,604,16,1,21,30,212,158,553,49,382,274,48,13 },
+{ 15,515,700,753,4,11,141,5,3,13,202,1,180,21,2,165,269,23,40,64,0,318,12,32,128,51,77,117,523,197,120,457 },
+{ 24,1,2,69,35,16,67,18,14,50,0,46,68,9,38,7,133,71,83,149,28,108,189,218,65,114,238,29,75,54,5,96 },
+{ 90,289,214,64,874,13,77,712,66,751,4,23,51,192,32,0,202,194,312,177,33,65,234,104,875,288,59,5,835,416,102,95 },
+{ 0,9,49,127,98,31,301,28,371,159,1,395,512,737,158,761,916,623,16,44,242,39,170,18,293,105,24,272,101,22,23,385 },
+{ 17,62,136,214,123,129,32,292,119,209,710,106,141,162,128,64,45,4,77,249,11,618,211,3,207,130,519,183,38,177,21,269 },
+{ 5,107,581,356,279,32,441,362,493,660,13,298,0,534,49,147,21,22,132,121,97,423,7,590,259,683,14,786,126,508,60,246 },
+{ 51,13,15,730,453,23,515,719,386,457,12,700,403,475,899,1,6,523,753,421,99,401,165,33,2,19,361,5,0,670,120,27 },
+{ 49,28,9,159,272,22,254,131,158,327,95,105,0,39,35,168,347,286,374,55,65,627,424,912,68,578,1,24,239,175,688,169 },
+{ 15,515,700,33,753,4,77,141,341,317,1,10,13,180,102,22,40,117,115,365,5,901,23,197,134,11,217,351,64,82,21,137 },
+{ 134,15,13,515,23,700,12,753,51,474,37,961,197,10,457,569,4,0,99,2,115,38,165,153,94,3,139,11,1,82,33,5 },
+{ 7,2,20,58,5,14,128,66,6,29,32,43,21,52,16,38,631,61,74,97,46,135,113,25,202,192,13,0,884,45,112,87 },
+{ 77,13,33,202,23,128,102,4,141,342,117,0,269,318,134,22,11,21,32,153,403,291,49,64,137,51,40,15,494,5,196,98 },
+{ 2,1,14,6,46,38,29,65,5,36,67,0,103,7,22,86,133,50,108,208,52,83,24,323,283,69,28,18,10,25,23,75 },
+{ 15,515,700,753,1,5,4,2,3,13,0,11,180,341,12,33,10,197,134,365,77,23,21,901,6,117,165,7,37,32,17,102 },
+{ 203,268,206,93,417,940,31,8,120,137,44,499,959,473,202,692,728,559,0,260,10,326,141,564,817,127,341,1,450,22,110,23 },
+{ 15,82,515,120,700,0,10,753,33,8,64,165,110,31,260,93,13,197,23,22,40,4,351,44,77,9,11,153,102,51,1,196 },
+{ 60,0,16,7,14,43,20,71,28,10,2,22,154,18,13,24,92,1,51,576,35,615,805,925,68,126,124,149,97,64,23,55 },
+{ 19,6,26,80,5,84,27,17,25,2,504,129,45,240,56,123,4,119,618,1,76,106,64,51,14,3,128,65,32,710,0,42 },
+{ 15,515,700,753,13,4,77,23,33,51,0,5,8,10,11,31,44,1,82,22,202,64,110,102,93,21,291,40,141,180,9,49 },
+{ 195,98,271,223,132,167,146,407,1,360,121,834,393,591,212,199,293,259,522,107,354,147,156,191,807,590,48,18,125,16,765,541 },
+{ 128,202,77,210,402,318,33,102,6,40,403,29,342,269,196,757,99,139,2,111,42,4,494,117,275,300,13,12,678,0,177,122 },
+{ 13,33,23,40,51,102,4,117,77,64,134,0,128,153,202,196,453,11,15,12,1,22,403,141,59,14,10,475,515,65,700,95 },
+{ 7,16,14,24,18,2,28,0,92,71,1,22,6,35,60,20,168,10,154,118,5,302,124,69,97,109,703,158,420,12,149,66 },
+{ 15,1,515,23,0,13,700,2,51,753,180,5,120,165,197,21,115,4,33,9,141,7,12,6,3,457,386,202,260,523,8,31 },
+{ 60,107,121,132,146,126,199,279,150,92,16,649,441,35,955,7,21,0,423,5,18,195,598,298,493,356,32,653,22,362,953,10 },
+{ 31,44,98,276,284,299,116,935,9,201,0,131,39,127,144,662,1,137,371,492,567,489,93,254,49,268,22,28,30,293,434,737 },
+{ 13,15,23,515,700,0,1,51,753,4,2,10,77,202,5,115,3,165,197,457,9,12,11,961,33,120,22,141,180,7,6,40 },
+{ 123,162,184,257,17,183,229,130,129,3,84,136,99,152,556,383,57,497,12,205,4,62,56,452,80,266,128,14,40,119,27,106 },
+{ 196,33,117,40,153,23,134,13,51,102,453,0,15,475,12,14,515,2,22,700,4,21,753,64,401,670,730,1,9,11,10,99 },
+{ 224,219,187,131,258,385,442,871,836,31,98,908,44,574,127,944,137,839,116,36,613,1,254,39,926,160,829,96,93,371,860,827 },
+{ 121,195,156,132,146,360,590,407,786,522,883,591,259,929,626,941,150,687,5,55,296,379,467,178,586,465,279,21,1,13,60,354 },
+{ 2,1,14,29,6,5,46,52,38,19,114,75,26,65,108,96,25,50,36,70,103,309,17,236,218,74,12,86,0,3,10,112 },
+{ 15,515,82,700,120,753,10,0,8,197,260,165,351,64,13,110,117,93,31,1,9,33,22,23,457,44,450,77,102,898,40,49 },
+{ 7,66,97,2,172,74,226,52,29,135,192,232,43,324,92,5,38,20,222,14,6,568,87,107,353,620,580,16,138,174,448,32 },
+{ 62,129,123,162,136,249,618,183,507,57,4,152,17,59,11,184,117,77,3,128,211,41,130,205,12,40,33,106,64,229,38,313 },
+{ 1,13,15,2,4,515,23,0,3,115,700,5,51,77,341,141,753,180,33,217,197,202,901,6,21,165,11,365,318,317,10,102 },
+{ 6,26,235,19,145,47,112,78,64,27,453,95,29,444,25,624,85,108,648,70,32,130,74,42,711,630,632,138,65,122,113,730 },
+{ 23,51,12,15,13,99,515,153,117,10,700,37,120,82,165,2,753,64,128,0,403,3,5,1,134,197,453,31,202,457,110,21 },
+{ 16,24,18,71,64,35,92,7,246,146,9,108,60,118,199,5,140,2,267,0,230,830,32,133,1,68,50,330,247,563,36,12 },
+{ 15,515,700,753,0,1,13,2,23,3,4,217,51,5,115,8,9,180,341,10,7,6,317,77,33,372,901,197,365,11,120,165 },
+{ 234,639,178,202,77,142,5,455,450,49,416,0,147,427,198,21,315,329,13,318,325,557,120,344,113,259,22,128,61,105,23,494 },
+{ 1,31,36,44,141,180,55,2,64,22,98,116,13,352,0,115,10,127,5,164,253,498,237,165,341,197,4,86,15,170,125,23 },
+{ 15,120,13,141,23,260,217,515,1,77,51,110,180,700,317,82,269,137,115,202,21,753,64,5,351,291,0,450,352,93,36,326 },
+{ 26,6,112,396,19,145,25,122,648,287,42,74,624,222,416,45,138,66,644,151,113,651,29,573,64,280,445,27,525,85,70,58 },
+{ 156,360,5,146,121,21,271,522,354,132,49,13,18,195,16,340,60,591,446,586,727,0,107,407,167,48,1,463,199,566,32,23 },
+{ 5,61,49,147,178,612,660,120,21,182,23,427,259,683,33,4,77,70,13,3,376,98,64,0,481,344,48,595,291,263,141,51 },
+{ 89,79,468,179,358,205,94,405,115,498,72,180,365,431,37,111,341,734,188,317,482,217,11,4,245,152,413,216,12,474,490,752 },
+{ 24,16,35,68,18,71,7,92,0,108,9,14,118,101,336,175,375,302,28,124,154,55,149,60,398,1,65,2,140,273,345,230 },
+{ 51,730,421,801,453,386,23,523,13,475,719,401,670,365,899,403,115,457,758,165,33,494,450,6,423,805,629,56,569,514,958,388 },
+{ 113,45,6,311,29,2,151,614,145,491,112,80,5,27,61,74,315,66,209,631,19,25,58,17,73,26,1,243,70,64,611,287 },
+{ 4,339,188,471,11,59,79,12,377,94,99,33,77,102,51,111,37,152,13,961,474,542,40,342,3,23,128,403,202,177,184,57 },
+{ 15,4,515,11,700,33,82,40,0,120,753,10,8,110,13,93,23,165,77,260,64,31,22,51,44,102,351,1,125,9,197,21 },
+{ 16,24,18,0,35,68,28,71,124,118,60,7,9,55,14,92,109,101,419,175,22,252,154,375,149,302,158,346,2,49,1,126 },
+{ 17,45,227,21,106,3,2,243,209,5,48,32,221,62,207,50,29,186,290,270,263,52,14,496,400,119,46,255,54,430,38,721 },
+{ 340,354,586,658,156,195,698,668,1,296,9,18,883,363,447,379,303,98,411,13,31,163,51,5,371,48,919,846,121,21,360,70 },
+{ 277,153,111,12,23,51,474,99,38,37,139,117,41,457,79,453,542,13,11,33,134,157,629,188,961,14,196,401,102,569,15,94 },
+{ 0,18,16,159,49,24,9,105,35,68,7,28,22,1,60,344,55,101,109,2,14,158,13,23,71,118,455,286,272,424,5,327 },
+{ 0,105,9,49,16,18,158,28,518,24,101,320,1,68,170,301,272,127,7,286,35,890,109,39,159,98,21,344,31,55,371,23 },
+{ 141,1,180,15,13,2,365,217,515,352,317,115,341,0,4,5,269,700,23,21,3,752,197,77,753,51,31,901,10,202,8,64 },
+{ 4,23,51,33,19,17,102,153,485,880,40,403,196,26,300,453,27,117,78,0,12,200,47,5,11,14,342,99,53,77,475,2 },
+{ 62,184,56,440,130,229,183,3,556,152,99,162,12,266,17,548,136,57,305,161,123,14,452,4,383,403,257,34,40,84,33,139 },
+{ 13,23,77,141,64,202,33,51,269,115,0,102,21,4,217,128,5,32,318,137,291,9,15,2,180,10,3,317,177,515,7,6 },
+{ 1,22,36,105,170,0,86,2,31,28,239,64,55,5,10,98,9,44,127,95,654,67,301,143,13,12,49,23,320,141,83,21 },
+{ 15,515,700,753,0,1,13,2,23,901,5,8,51,82,9,180,457,4,7,12,3,6,10,120,341,141,22,898,197,351,115,260 },
+{ 1,39,274,98,100,265,190,30,438,310,166,223,88,96,909,31,264,625,530,9,382,812,21,252,593,0,254,539,44,131,23,778 },
+{ 18,212,167,118,363,1,447,411,146,60,271,16,781,121,647,9,621,562,21,478,664,68,815,5,354,98,48,101,24,446,777,463 },
+{ 24,28,22,0,7,1,2,16,14,65,35,49,158,95,109,159,55,105,10,18,124,9,67,5,239,149,12,289,108,68,21,424 },
+{ 105,22,131,272,286,98,55,239,1,31,320,9,127,327,36,185,28,374,86,219,0,64,187,44,578,164,224,913,535,115,601,13 },
+{ 22,31,28,301,127,98,44,0,105,1,512,395,9,293,109,299,95,338,239,125,242,116,36,320,55,841,900,685,599,23,13,763 },
+{ 2,1,58,29,5,14,52,46,186,334,45,155,151,50,400,75,38,69,502,61,48,227,223,7,163,17,262,67,549,21,70,113 },
+{ 7,107,135,232,97,14,2,92,66,16,172,192,278,387,298,356,38,35,448,52,46,43,60,29,20,126,324,526,357,359,64,5 },
+{ 20,43,104,426,173,7,560,414,707,784,319,81,0,861,422,819,38,74,715,52,376,97,879,32,330,22,49,64,66,95,192,526 },
+{ 104,74,636,66,204,0,355,81,222,25,29,319,145,784,20,65,90,4,174,194,7,64,6,746,138,173,750,715,91,43,192,32 },
+{ 0,9,101,35,68,39,65,28,252,124,67,154,364,336,100,166,30,1,289,55,149,346,16,114,158,88,439,24,429,22,570,194 },
+{ 57,14,4,231,236,585,176,59,369,23,361,13,719,51,300,342,12,457,56,3,62,38,202,401,34,46,2,322,11,215,210,507 },
+{ 1,2,15,3,141,0,515,5,33,700,13,64,77,180,6,128,753,10,4,269,102,202,11,7,134,197,352,120,117,318,12,291 },
+{ 5,1,21,202,13,32,48,23,0,61,259,22,494,120,70,49,51,18,137,128,465,12,178,115,2,453,403,141,58,3,90,450 },
+{ 141,205,4,72,59,79,245,11,352,94,152,76,247,216,21,188,452,217,497,12,89,37,111,339,588,77,64,875,864,115,358,464 },
+{ 15,515,700,753,0,1,2,13,5,4,23,3,8,341,365,51,115,10,120,457,6,141,77,197,31,7,165,9,202,450,961,260 },
+{ 5,2,50,14,58,38,171,46,29,1,45,186,17,52,155,218,48,281,61,487,54,36,67,21,328,334,151,227,760,114,400,133 },
+{ 457,120,70,125,318,64,23,48,795,291,202,761,751,415,77,846,269,758,21,237,96,260,391,165,87,1,128,5,221,13,137,763 },
+{ 13,23,51,33,4,40,117,102,453,64,153,196,0,77,15,11,12,475,1,65,134,10,515,22,21,14,700,59,403,141,2,753 },
+{ 229,152,57,266,452,381,432,12,313,184,99,471,17,4,62,339,157,3,129,59,128,11,369,37,77,38,40,123,5,497,188,257 },
+{ 49,28,109,22,159,9,272,95,105,131,55,35,254,168,39,327,169,0,1,286,175,374,347,158,420,67,36,194,312,424,627,346 },
+{ 5,2,61,29,45,58,80,311,1,17,209,227,52,243,106,869,454,151,592,496,48,334,14,155,6,186,46,171,75,21,255,667 },
+{ 244,44,110,141,260,30,269,352,839,131,574,228,373,276,1,406,219,717,217,137,253,224,120,93,36,31,567,116,661,187,341,88 },
+{ 12,99,79,139,11,453,196,51,277,474,111,23,542,37,94,188,33,13,401,775,40,961,313,102,4,339,153,485,629,134,300,431 },
+{ 16,35,9,0,68,24,149,69,67,18,1,114,65,230,71,7,103,133,50,167,212,118,101,191,140,64,399,28,124,283,55,565 },
+{ 88,30,274,435,131,613,190,100,93,829,166,1,187,795,530,127,382,957,960,160,31,137,466,264,39,800,406,254,28,473,521,219 },
+{ 167,16,18,118,212,24,60,71,101,68,191,9,375,411,363,35,0,1,589,199,302,21,447,55,146,126,92,271,647,121,562,48 },
+{ 64,141,86,177,77,128,147,597,304,95,269,102,275,4,352,49,120,5,372,194,465,13,588,237,947,216,202,180,612,751,107,534 },
+{ 18,65,90,403,523,289,240,214,194,102,701,475,202,217,283,862,389,51,33,0,494,421,453,817,84,64,847,899,352,13,23,437 },
+{ 13,51,23,202,5,12,21,128,15,115,0,1,141,120,64,32,4,2,515,403,165,457,3,10,700,99,453,318,719,450,308,401 },
+{ 98,223,393,31,1,271,834,791,167,44,202,64,93,697,5,116,77,125,450,446,212,18,541,293,51,120,195,132,284,13,807,765 },
+{ 15,515,700,753,4,11,23,13,40,51,82,165,0,110,93,33,141,64,120,5,10,77,3,102,180,32,202,125,8,197,31,21 },
+{ 15,515,700,753,0,1,13,2,901,23,5,341,3,51,82,8,4,180,961,9,115,10,12,6,898,7,351,141,134,22,31,120 },
+{ 234,416,77,5,315,639,325,202,147,198,113,49,450,61,455,142,0,21,22,342,329,494,178,58,102,427,318,230,13,120,43,470 },
+{ 60,146,16,18,156,126,121,271,199,360,132,24,167,0,640,10,71,522,21,92,5,340,107,354,118,150,22,195,446,35,28,212 },
+{ 4,361,11,14,56,368,377,161,27,12,300,77,59,200,17,554,202,33,40,494,495,21,210,80,757,25,128,23,19,38,444,53 },
+{ 141,82,217,351,15,352,120,1,180,260,515,64,854,36,700,317,752,372,13,269,77,753,922,21,349,23,202,110,93,137,51,373 },
+{ 15,515,700,753,77,13,0,1,23,33,102,2,51,4,3,5,291,217,10,9,450,120,341,7,317,6,11,117,115,8,260,180 },
+{ 15,515,120,13,700,23,77,141,1,260,0,753,180,51,137,202,115,365,110,291,217,5,128,9,21,341,197,269,2,450,317,165 },
+{ 174,6,348,85,138,74,280,204,66,233,192,355,289,65,81,580,636,353,25,91,104,343,673,214,64,95,42,712,792,32,194,90 },
+{ 152,497,452,59,4,216,11,79,94,77,128,188,269,339,588,33,76,529,318,32,141,471,12,202,111,21,5,51,37,90,72,177 },
+{ 417,499,10,141,253,244,110,559,8,564,180,260,728,120,352,638,642,341,951,206,143,752,901,93,137,661,922,373,44,31,811,197 },
+{ 13,77,23,33,4,51,0,102,128,59,141,40,64,115,177,10,137,22,202,2,7,11,90,1,117,180,269,14,49,6,134,3 },
+{ 1,2,22,0,36,5,67,50,14,28,12,86,38,46,83,168,194,65,103,114,49,7,10,95,21,69,23,24,128,51,55,13 },
+{ 17,106,119,207,255,306,742,378,84,62,136,45,3,5,240,80,61,56,209,383,311,790,655,32,2,440,76,151,58,29,179,263 },
+{ 3,128,1,141,2,202,33,5,64,15,0,515,102,13,269,10,700,180,134,51,120,6,77,318,23,137,17,117,753,197,82,153 },
+{ 514,38,377,328,11,57,41,248,880,266,556,4,152,361,471,757,485,403,305,102,3,211,313,99,457,130,12,14,157,40,23,54 },
+{ 68,0,167,101,9,118,264,520,16,18,21,478,562,1,124,212,100,936,664,777,191,88,806,154,48,24,759,604,35,252,265,65 },
+{ 230,689,699,213,466,352,217,831,30,443,418,144,854,201,840,855,1,251,203,317,530,957,96,93,822,539,36,752,351,137,83,800 },
+{ 33,77,102,117,15,82,13,134,23,64,0,515,120,153,51,4,40,128,700,260,202,141,196,22,753,11,351,10,1,326,95,269 },
+{ 11,40,33,51,117,13,542,328,14,134,38,153,23,12,485,231,102,54,775,37,3,377,111,139,211,4,457,403,369,475,99,719 },
+{ 33,64,77,128,141,2,1,202,102,13,23,117,0,15,3,153,51,134,10,40,6,5,515,269,137,180,318,165,700,7,196,753 },
+{ 15,515,700,753,4,1,5,11,13,21,33,180,93,141,64,2,23,77,82,3,0,102,32,40,352,341,10,197,98,110,117,901 },
+{ 1,2,14,67,50,46,38,24,103,83,0,5,36,28,29,133,114,96,65,52,18,75,54,108,22,7,238,58,160,9,361,69 },
+{ 258,201,276,137,160,860,116,261,295,843,567,144,131,44,187,268,943,219,284,31,202,935,141,98,662,203,127,96,36,93,224,1 },
+{ 7,2,14,16,46,87,75,52,92,278,29,38,140,70,1,5,35,294,24,262,135,69,171,172,58,409,112,60,50,66,97,12 },
+{ 13,23,0,2,51,1,33,4,115,10,15,141,77,3,5,180,217,515,9,7,64,11,700,6,102,40,197,22,317,753,165,202 },
+{ 74,145,6,66,25,204,42,29,222,337,138,26,7,525,192,174,746,287,544,135,415,2,609,632,112,64,87,0,85,45,712,396 },
+{ 77,33,102,15,217,13,23,141,202,515,51,700,291,4,269,753,317,180,21,64,318,115,128,0,275,2,352,196,3,5,137,11 },
+{ 187,219,258,871,44,442,160,574,137,224,908,116,839,131,36,926,276,201,93,228,202,860,31,613,144,531,406,1,902,30,190,318 },
+{ 1,372,141,5,21,77,225,744,96,30,23,349,13,291,269,284,69,442,459,144,303,839,217,622,160,330,260,48,120,410,189,352 },
+{ 66,222,2,74,29,87,135,6,7,145,52,25,294,337,226,172,138,331,42,70,97,112,26,1,632,192,43,5,415,609,461,353 },
+{ 45,17,106,209,5,2,21,29,48,207,3,186,243,155,255,263,454,119,400,496,270,14,290,62,425,1,171,32,659,52,38,56 },
+{ 93,88,141,120,30,213,260,373,100,717,459,82,110,1,166,450,180,321,217,372,36,269,131,225,22,352,326,466,473,187,244,410 },
+{ 266,57,152,381,313,471,12,229,99,369,339,62,157,3,4,37,77,38,188,17,11,162,40,184,129,59,475,775,128,452,403,453 },
+{ 217,352,317,141,752,15,180,515,372,365,700,341,753,349,77,21,291,1,115,244,64,120,13,98,269,82,5,498,864,351,23,144 },
+{ 14,514,369,102,403,377,51,719,880,153,23,13,457,11,485,4,401,12,328,453,33,40,117,57,629,38,730,236,134,670,361,961 },
+{ 107,7,172,14,92,135,2,359,60,314,46,16,126,278,232,150,279,32,38,392,298,5,35,97,24,192,259,288,330,52,356,312 },
+{ 0,4,25,13,59,90,65,23,26,19,18,12,5,216,91,51,389,33,77,11,22,85,27,81,21,177,746,45,42,194,37,123 },
+{ 5,49,315,202,416,77,455,639,450,21,197,137,350,13,408,0,329,318,494,344,61,402,64,509,347,120,113,48,95,713,308,401 },
+{ 130,47,381,390,59,90,200,214,289,6,65,472,29,64,874,648,50,751,624,26,52,32,4,194,875,714,85,249,247,33,881,19 },
+{ 51,23,453,13,719,12,457,165,37,730,99,4,386,197,401,17,11,2,3,15,5,961,475,6,515,64,54,700,32,115,0,403 },
+{ 15,515,1,13,700,2,23,0,753,5,3,180,51,4,165,12,141,21,197,457,7,115,6,9,352,10,120,202,8,341,11,77 },
+{ 0,9,1,67,35,28,68,16,24,65,18,69,50,114,103,12,22,13,5,101,2,96,23,83,149,21,39,55,7,175,433,124 },
+{ 28,105,22,0,1,320,170,9,49,301,109,95,127,31,98,55,65,35,2,24,168,159,36,713,16,740,13,338,21,44,512,23 },
+{ 13,77,4,51,23,33,102,202,128,59,40,0,64,141,117,403,115,11,15,318,153,269,22,515,475,134,10,494,177,1,90,210 },
+{ 13,23,0,51,77,33,2,141,4,10,1,64,115,102,3,6,22,15,217,11,180,7,40,515,165,202,177,9,269,128,700,5 },
+{ 456,116,492,8,949,268,867,391,203,51,499,13,719,386,31,791,457,918,125,10,23,93,479,685,417,0,22,338,506,551,870,730 },
+{ 17,237,45,180,106,62,32,64,115,41,136,498,255,21,197,129,241,13,3,227,23,352,165,752,350,365,449,155,4,546,476,38 },
+{ 1,15,180,515,0,2,341,700,901,352,4,141,13,3,752,5,753,217,317,115,365,23,197,21,51,165,31,6,269,202,77,7 },
+{ 205,141,216,269,497,4,588,76,59,152,128,452,79,77,875,11,72,94,188,217,352,12,247,37,90,64,32,1,474,23,947,372 },
+{ 64,247,217,237,317,180,752,115,349,141,498,13,437,304,23,372,352,164,579,291,33,864,177,197,0,490,72,10,482,77,269,51 },
+{ 2,1,0,13,15,141,3,77,5,515,64,33,23,180,6,700,4,117,217,7,10,11,102,165,753,197,115,134,40,352,12,269 },
+{ 11,40,38,328,33,542,12,313,41,339,23,157,377,117,369,51,471,99,775,485,13,305,457,57,14,475,37,248,4,54,188,719 },
+{ 33,77,102,40,13,23,0,51,4,128,64,202,117,141,22,196,153,10,134,15,59,269,1,137,65,11,403,318,453,86,515,177 },
+{ 472,80,34,250,495,161,17,14,469,176,128,4,389,106,283,436,216,527,3,297,483,177,53,56,231,194,119,84,719,57,255,59 },
+{ 317,352,180,141,217,752,115,341,365,244,1,269,202,901,253,15,21,498,372,4,137,515,13,2,700,318,5,197,23,143,753,349 },
+{ 9,39,101,18,265,100,333,520,252,16,0,329,593,1,553,364,68,167,310,30,121,254,118,158,363,166,60,604,272,24,286,404 },
+{ 15,515,1,180,700,901,0,2,753,341,752,4,3,13,115,365,317,5,23,197,141,217,165,352,6,22,36,9,137,51,7,10 },
+{ 131,39,9,829,166,613,578,827,1,30,716,254,100,98,31,224,0,406,228,310,616,219,44,846,127,190,938,96,265,371,856,438 },
+{ 17,64,62,106,141,751,136,292,32,129,352,41,38,476,86,128,214,237,5,177,123,209,217,45,269,954,162,710,180,3,90,4 },
+{ 25,42,235,65,650,736,605,6,630,85,123,343,233,256,26,122,63,389,141,249,416,444,368,194,19,108,138,174,90,0,544,511 },
+{ 184,229,152,57,266,432,497,452,17,381,619,257,313,12,4,205,59,3,99,471,157,128,5,129,339,369,77,11,32,45,202,2 },
+{ 137,202,160,860,141,30,93,567,36,276,295,261,131,39,9,964,201,843,1,98,800,318,116,22,943,187,10,219,206,44,269,535 },
+{ 0,493,125,64,49,9,279,10,35,18,93,55,293,31,14,13,194,165,325,48,22,132,21,107,98,389,44,581,342,259,174,137 },
+{ 15,515,700,753,4,33,13,77,23,5,51,32,102,40,93,11,349,141,21,8,82,202,64,31,110,10,117,0,1,44,3,318 },
+{ 110,253,854,811,352,141,244,951,180,642,661,384,498,143,752,317,911,10,269,206,559,351,261,120,902,533,922,959,365,160,332,217 },
+{ 2,29,70,1,75,52,6,220,26,112,145,331,74,163,19,69,38,324,46,58,14,5,25,21,278,223,50,307,66,7,67,409 },
+{ 13,23,77,33,51,4,64,141,115,102,0,2,128,177,40,11,202,10,6,180,7,15,269,1,32,217,59,22,291,3,137,515 },
+{ 340,897,691,478,658,264,914,382,100,812,363,1,724,156,166,698,88,521,39,404,682,447,296,96,303,411,30,909,9,274,656,772 },
+{ 9,18,310,101,265,159,326,120,105,158,33,363,77,195,51,55,13,39,354,132,23,7,28,639,16,137,98,1,252,272,709,49 },
+{ 57,313,471,12,99,369,157,339,266,152,38,37,475,453,328,775,11,40,59,188,77,514,401,403,342,4,139,33,377,51,229,14 },
+{ 16,7,24,14,35,140,60,92,18,69,71,2,189,1,46,230,108,388,150,38,21,172,278,67,246,267,50,309,236,135,451,0 },
+{ 206,417,93,940,959,473,499,203,8,137,559,728,31,202,44,120,450,141,10,260,116,564,22,326,269,318,268,244,0,1,253,638 },
+{ 15,515,700,753,1,0,13,2,23,4,3,51,5,217,7,77,341,115,8,9,10,33,6,180,317,349,291,120,11,165,457,901 },
+{ 1,2,5,14,48,21,290,32,50,45,38,46,263,207,155,72,76,29,17,408,425,171,89,52,7,0,292,449,3,227,513,428 },
+{ 121,132,354,167,271,223,146,98,18,463,1,668,446,195,407,60,212,447,781,48,360,363,411,522,156,393,807,9,21,16,293,13 },
+{ 131,578,105,371,219,224,716,616,187,49,9,254,737,159,385,98,258,127,272,761,0,916,623,910,28,286,39,31,22,518,924,242 },
+{ 302,467,97,6,273,1,24,484,124,51,36,18,2,398,453,421,523,69,7,23,13,403,386,150,66,0,298,65,426,165,22,158 },
+{ 30,190,530,88,1,100,778,539,625,274,382,410,96,731,960,39,795,321,9,131,264,144,840,748,44,166,669,957,36,31,435,228 },
+{ 141,1,2,128,64,33,15,202,3,0,180,5,13,77,515,134,269,102,197,700,10,137,318,6,120,165,753,352,4,82,23,117 },
+{ 44,201,567,116,131,224,295,662,489,268,219,31,434,144,187,276,110,384,93,261,699,137,36,442,120,1,613,30,228,64,141,244 },
+{ 12,15,51,23,515,37,99,13,700,0,10,117,753,38,165,82,134,120,11,453,197,64,115,569,1,629,401,22,457,474,110,153 },
+{ 7,135,2,92,172,14,66,140,38,52,97,46,29,74,16,324,278,226,6,87,1,571,262,5,357,232,35,380,69,314,24,330 },
+{ 125,386,23,963,949,60,51,391,165,221,13,197,118,21,719,193,541,421,517,150,393,7,401,453,308,5,791,551,326,558,48,173 },
+{ 6,85,42,25,138,222,174,235,280,256,525,289,26,214,64,746,90,32,544,65,204,19,66,337,355,95,348,415,74,29,5,312 },
+{ 1,14,5,50,2,67,24,0,46,69,48,21,58,103,16,12,18,38,54,96,83,7,502,45,36,181,35,9,430,28,10,155 },
+{ 811,351,642,180,951,752,110,638,253,10,82,352,197,341,365,564,499,854,873,55,9,417,282,901,244,22,559,143,206,141,28,898 },
+{ 23,13,51,15,12,453,403,165,4,515,115,719,475,457,700,523,2,21,0,99,202,197,14,5,386,753,128,401,37,308,33,117 },
+{ 120,13,23,77,141,1,15,93,217,82,260,51,137,202,110,515,21,180,165,5,128,102,64,351,291,700,269,352,326,203,177,0 },
+{ 1,5,0,22,12,2,36,21,10,23,86,13,28,51,9,128,48,14,32,50,7,3,96,137,54,4,202,49,37,65,208,323 },
+{ 219,98,23,127,301,51,258,308,170,910,13,165,22,105,293,616,125,242,276,401,201,395,964,115,55,284,31,374,327,206,512,900 },
+{ 64,180,80,165,5,237,2,250,34,58,297,61,197,17,22,29,186,498,231,445,247,3,752,311,95,32,483,153,27,45,115,469 },
+{ 13,77,23,33,0,2,1,64,141,51,102,10,15,3,115,40,180,6,515,128,7,22,269,202,4,217,700,5,177,117,14,165 },
+{ 15,120,51,515,13,450,23,700,202,153,196,753,260,64,128,141,730,4,326,386,21,523,33,318,5,457,95,32,403,1,77,269 },
+{ 2,1,5,29,32,45,207,263,14,425,58,72,76,21,7,408,48,46,52,186,17,292,38,6,61,89,476,50,155,720,119,3 },
+{ 15,515,700,753,4,13,11,5,1,23,33,21,3,141,32,2,40,180,117,64,269,202,102,197,0,165,120,51,341,352,153,12 },
+{ 76,5,214,129,2,123,45,710,17,249,618,460,179,32,1,257,205,519,90,207,245,184,162,61,769,209,292,106,6,29,14,128 },
+{ 1,15,23,13,120,141,51,515,202,21,700,165,0,180,137,2,5,77,128,93,753,260,269,197,326,33,110,352,82,102,318,48 },
+{ 7,2,135,14,29,87,66,52,97,172,70,112,5,58,46,337,92,16,20,43,1,38,232,155,74,294,6,461,409,151,262,32 },
+{ 574,187,384,926,860,110,258,434,269,531,141,244,160,261,253,116,699,959,940,717,533,36,219,31,902,661,871,295,201,352,10,260 },
+{ 156,354,296,1,182,586,64,379,340,937,850,698,31,48,98,44,120,18,163,23,30,658,195,125,77,284,223,291,774,481,96,39 },
+{ 250,80,34,472,17,495,176,469,33,194,64,483,4,297,141,14,161,27,53,667,56,833,73,527,585,231,106,51,84,814,2,59 },
+{ 97,7,81,140,66,92,172,192,24,298,43,6,74,69,314,426,462,14,501,16,21,508,60,189,267,232,230,104,48,20,135,330 },
+{ 31,44,116,144,268,393,492,434,367,489,127,98,918,0,384,9,22,206,948,105,93,203,1,456,332,940,299,28,137,49,293,125 },
+{ 15,128,33,3,13,51,141,1,202,64,23,2,515,120,102,0,5,82,10,700,165,197,269,153,403,110,753,137,196,318,117,12 },
+{ 31,98,127,9,0,105,22,28,44,512,293,395,299,1,242,49,685,763,320,599,125,116,109,276,284,95,870,159,23,456,36,900 },
+{ 7,24,124,1,6,97,2,69,14,18,23,92,21,67,66,16,5,484,43,20,118,65,36,22,28,0,51,140,13,71,29,150 },
+{ 1,64,442,303,284,349,202,141,622,67,154,447,260,44,652,429,9,335,237,919,197,98,167,33,682,269,547,77,863,411,340,201 },
+{ 1,15,2,141,515,0,700,13,3,180,10,753,5,64,77,33,4,6,7,197,102,269,165,23,134,11,352,341,291,349,22,120 },
+{ 99,139,12,453,196,277,775,40,475,33,23,401,215,51,11,14,77,111,313,130,38,211,37,266,129,15,339,153,719,3,369,515 },
+{ 33,77,102,4,23,128,13,141,202,64,51,0,40,59,269,115,117,137,153,1,318,11,10,177,15,134,22,90,196,2,403,32 },
+{ 7,2,14,58,70,112,16,5,87,38,46,52,6,128,135,1,32,21,155,29,66,64,0,97,92,186,172,294,13,23,20,37 },
+{ 15,13,515,1,700,2,23,0,753,5,3,4,51,10,341,115,365,180,11,33,317,77,6,7,217,12,197,165,117,9,64,102 },
+{ 2,1,14,29,75,69,67,6,52,46,38,24,103,220,83,25,70,87,262,74,96,267,50,366,26,16,226,394,357,66,108,19 },
+{ 9,105,18,39,1,0,16,557,101,272,252,890,326,49,265,21,137,100,23,938,13,310,159,5,31,24,254,51,30,128,202,132 },
+{ 80,209,45,61,667,17,6,106,5,2,151,29,483,255,454,833,27,311,112,19,738,378,1,58,113,26,25,469,119,887,32,64 },
+{ 13,23,51,15,5,1,515,0,21,2,12,141,700,165,202,115,753,32,180,4,3,197,10,120,457,9,269,128,64,341,7,33 },
+{ 99,12,453,277,139,157,369,474,339,51,38,23,37,196,188,401,775,111,11,313,328,475,153,266,4,471,79,40,33,629,102,14 },
+{ 7,92,16,232,97,140,126,14,60,107,66,35,298,387,314,104,246,462,441,150,0,38,24,2,172,357,230,330,5,633,22,289 },
+{ 13,77,23,202,318,141,33,4,51,269,102,177,115,403,137,2,40,494,90,11,342,128,31,117,21,32,7,12,64,134,14,10 },
+{ 13,2,0,23,141,1,77,3,180,33,6,64,15,10,115,51,4,5,217,197,7,165,515,102,22,11,700,269,40,352,177,14 },
+{ 15,515,700,753,4,11,1,93,13,5,180,110,82,21,120,23,2,33,10,141,3,165,197,102,901,0,32,341,117,40,153,12 },
+{ 15,515,700,753,1,13,0,2,23,4,77,51,3,5,341,291,7,33,6,115,10,9,8,217,11,177,120,180,102,165,197,365 },
+{ 20,43,198,325,173,904,104,234,66,147,77,319,416,422,97,426,5,0,7,450,861,202,712,725,2,32,639,376,38,324,945,315 },
+{ 105,0,9,28,49,301,170,1,127,159,22,16,31,98,512,623,24,109,158,395,35,68,371,65,713,55,2,242,293,21,44,18 },
+{ 213,88,689,466,230,30,321,435,699,352,217,201,795,831,144,854,1,443,96,539,530,840,418,251,855,190,93,100,669,31,957,662 },
+{ 130,453,47,196,4,57,14,59,236,711,51,153,730,77,412,381,23,202,108,128,361,13,283,117,11,719,200,46,34,78,210,2 },
+{ 1,2,5,14,0,50,36,22,38,46,65,67,12,86,114,28,103,29,208,7,10,128,21,83,218,23,96,54,194,6,133,51 },
+{ 6,26,74,19,165,453,14,730,1,125,197,50,29,51,138,357,13,2,108,391,70,719,46,457,47,500,386,262,112,23,235,52 },
+{ 9,10,376,20,43,0,49,18,30,120,2,33,325,104,501,470,77,788,725,102,523,39,858,5,904,414,174,55,137,37,342,13 },
+{ 15,515,700,753,0,1,13,23,51,77,120,202,341,82,5,4,9,260,2,137,141,128,115,351,901,8,180,10,197,21,450,33 },
+{ 105,131,272,578,9,49,371,219,159,616,286,320,224,187,716,98,28,22,0,623,127,258,910,737,385,31,239,347,254,109,424,95 },
+{ 457,51,13,23,961,12,719,99,453,15,4,515,165,401,629,3,700,11,17,14,2,37,753,41,57,569,38,45,0,33,5,32 },
+{ 202,120,5,33,318,77,450,102,1,260,403,128,494,21,165,13,269,12,326,23,342,523,402,2,817,64,15,141,125,82,457,475 },
+{ 141,269,352,217,180,64,349,137,202,160,317,15,372,515,700,752,318,753,244,13,437,291,165,864,22,237,5,82,954,21,77,418 },
+{ 70,29,2,145,74,112,26,6,75,52,19,66,632,1,87,220,5,135,163,287,307,25,226,7,58,396,294,278,113,409,69,151 },
+{ 82,351,317,15,752,180,898,352,141,901,515,341,10,700,365,1,753,498,0,217,253,115,55,854,33,5,143,32,21,160,36,197 },
+{ 39,9,310,254,0,30,101,49,252,272,100,265,105,455,159,557,190,333,286,688,18,166,1,158,709,16,625,627,31,131,327,329 },
+{ 2,58,29,5,1,151,186,52,70,45,7,549,14,75,112,400,113,155,61,46,227,163,311,315,66,6,307,27,17,220,287,74 },
+{ 141,217,13,21,352,23,269,77,180,115,317,64,202,15,349,137,5,51,165,291,318,752,372,4,0,102,33,365,197,32,341,125 },
+{ 68,35,0,9,65,101,149,124,24,154,175,16,28,7,67,1,18,189,114,398,55,14,345,39,118,133,69,2,230,429,71,283 },
+{ 66,7,29,2,112,52,20,43,97,151,74,192,135,5,173,525,337,45,145,58,415,25,14,32,644,70,544,226,222,21,6,580 },
+{ 31,125,44,22,116,299,242,55,1,170,64,36,479,870,456,685,10,599,558,0,268,506,28,740,23,903,492,164,393,206,2,86 },
+{ 188,11,79,12,99,377,94,33,542,339,40,474,111,37,4,51,102,453,139,775,13,475,23,961,277,471,134,57,431,266,115,117 },
+{ 658,698,340,98,296,303,1,31,850,363,156,919,44,774,586,385,120,77,82,10,223,30,354,291,23,914,478,87,260,163,48,13 },
+{ 15,515,700,753,82,4,1,13,901,33,197,11,5,10,23,165,2,0,180,3,21,77,51,120,365,115,217,40,117,102,32,401 },
+{ 15,515,700,753,4,11,5,13,1,141,3,180,23,202,21,2,269,64,165,33,40,32,0,318,120,128,12,197,117,352,51,17 },
+{ 91,6,233,85,370,718,81,65,25,256,63,343,42,74,235,123,138,511,397,249,26,194,650,355,64,87,544,18,90,643,66,214 },
+{ 23,13,202,51,21,120,1,5,141,128,450,64,318,403,15,137,260,33,12,48,32,31,125,494,269,102,165,515,77,2,197,14 },
+{ 180,317,365,341,752,217,115,352,901,482,372,498,1,141,15,253,515,244,2,700,0,21,13,82,23,4,579,351,753,291,269,77 },
+{ 13,115,197,341,9,352,468,237,64,498,23,165,22,509,901,546,482,180,28,569,317,51,365,873,391,95,86,217,49,837,752,706 },
+{ 13,23,51,1,141,5,165,202,21,120,64,125,180,15,2,33,197,115,128,32,260,269,12,82,4,515,137,7,318,93,0,700 },
+{ 214,289,90,174,874,6,138,280,65,81,64,85,355,751,194,233,312,348,835,91,0,32,343,636,249,29,875,288,519,104,247,74 },
+{ 15,515,700,753,4,5,11,13,1,33,23,21,2,3,102,32,141,77,180,117,31,64,0,40,134,196,120,352,12,44,197,6 },
+{ 33,15,13,515,117,23,700,217,134,753,0,51,153,77,141,2,4,64,196,1,3,180,10,115,5,102,6,11,22,202,165,7 },
+{ 15,515,700,753,33,4,77,102,1,40,13,117,11,115,134,5,21,153,23,217,3,32,2,317,120,196,180,141,51,12,59,260 },
+{ 15,515,700,753,13,0,1,23,2,217,51,3,4,5,8,317,115,9,341,10,202,180,6,365,7,82,457,22,120,901,33,291 },
+{ 7,2,135,20,97,14,66,52,337,673,192,29,43,355,353,5,16,294,107,376,147,226,331,560,64,470,222,104,415,32,4,324 },
+{ 195,132,142,167,146,77,363,271,121,354,202,120,647,178,786,212,687,0,101,878,16,522,60,5,450,411,35,55,98,639,259,318 },
+{ 202,77,20,0,318,66,104,128,102,269,177,43,33,7,216,291,494,5,2,342,74,173,97,112,450,22,337,10,234,52,64,678 },
+{ 107,362,612,356,359,97,414,43,259,20,392,7,298,147,819,683,465,173,729,660,319,14,5,779,581,595,246,35,501,92,0,230 },
+{ 6,165,14,453,13,51,19,23,386,457,74,391,308,2,26,401,47,758,603,108,719,366,1,29,309,730,324,197,133,70,115,867 },
+{ 179,72,205,180,247,245,4,490,352,59,317,152,79,498,94,217,148,76,752,864,11,216,141,405,89,452,197,111,497,188,37,21 },
+{ 107,7,298,314,14,359,32,392,232,279,172,97,60,581,387,126,121,0,534,493,356,92,441,95,13,21,35,147,22,5,16,362 },
+{ 156,271,354,586,360,132,591,195,121,18,340,1,5,13,21,48,668,446,23,463,296,658,60,55,407,698,146,70,626,51,163,24 },
+{ 13,23,51,4,0,12,457,15,11,453,2,515,5,1,99,10,115,165,700,475,401,403,3,961,40,14,37,753,719,32,64,569 },
+{ 48,125,21,165,13,221,23,763,423,508,197,5,98,92,193,16,441,386,64,314,293,457,391,140,49,60,102,693,683,51,35,867 },
+{ 202,77,120,450,5,318,1,494,0,195,18,132,523,403,326,604,354,260,121,576,203,167,234,817,682,49,35,615,21,20,13,102 },
+{ 39,9,166,30,0,101,158,68,404,190,333,274,252,310,88,100,49,28,344,35,21,22,419,131,438,1,16,65,530,694,124,10 },
+{ 15,515,700,753,110,4,1,11,165,180,93,13,82,5,2,197,33,120,0,3,10,23,21,115,901,217,341,77,317,51,32,117 },
+{ 2,29,1,14,6,52,5,46,50,26,70,19,103,58,38,67,96,262,516,309,218,133,108,27,75,17,112,114,24,487,331,83 },
+{ 120,77,15,13,1,141,260,23,515,217,110,51,137,700,317,202,165,291,180,21,753,128,0,177,326,93,450,82,64,269,197,5 },
+{ 255,59,554,297,183,56,33,444,108,358,123,196,269,122,77,153,57,177,117,730,19,467,605,130,128,50,275,4,291,475,134,133 },
+{ 13,23,51,12,153,14,117,120,165,134,99,401,38,453,15,128,197,719,64,515,475,403,37,33,196,700,40,125,5,0,54,2 },
+{ 64,33,174,348,95,108,467,554,56,0,25,306,233,6,63,511,343,120,13,85,29,561,543,707,319,180,899,355,77,49,256,18 },
+{ 120,260,51,23,77,15,202,1,93,82,141,450,13,326,515,137,21,5,64,33,110,700,128,165,318,203,269,102,351,753,197,125 },
+{ 15,515,700,753,4,13,11,1,5,21,23,2,33,64,3,180,32,141,22,102,77,0,10,93,82,352,117,40,341,31,165,6 },
+{ 15,515,700,753,341,13,23,141,33,1,0,217,4,77,180,10,82,351,51,137,5,64,9,317,21,11,102,40,260,202,854,115 },
+{ 105,272,131,22,327,286,28,239,320,9,109,578,219,49,98,224,95,159,538,371,616,127,187,64,713,55,0,170,168,258,716,623 },
+{ 16,18,68,35,24,60,71,118,92,126,0,9,101,191,7,55,154,175,212,14,167,150,302,28,375,1,107,124,346,273,21,108 },
+{ 20,147,43,470,376,142,904,178,427,798,0,595,198,325,858,319,61,202,173,97,5,422,14,22,107,259,32,49,887,77,414,392 },
+{ 13,23,51,12,33,15,99,64,128,515,453,202,117,153,37,102,700,40,134,196,120,0,2,753,141,14,38,3,82,403,77,21 },
+{ 383,17,62,136,84,119,56,440,3,504,240,80,378,129,123,548,106,128,4,11,14,555,162,32,184,361,59,64,205,5,469,57 },
+{ 70,1,48,652,5,638,846,888,21,349,269,260,340,562,767,761,163,883,774,141,125,518,591,0,23,9,87,13,371,303,622,31 },
+{ 66,135,6,97,74,278,69,7,14,324,267,172,2,140,462,1,357,38,808,550,92,841,189,29,16,25,298,87,75,204,24,335 },
+{ 51,23,33,13,102,40,12,128,64,77,10,202,0,196,117,4,14,99,134,453,65,153,11,475,139,403,22,141,86,2,21,15 },
+{ 88,100,264,166,274,435,772,1,382,921,96,478,30,438,639,909,897,521,190,466,960,410,9,144,530,418,31,329,265,691,778,93 },
+{ 62,440,136,56,84,3,504,548,555,383,4,17,129,128,507,361,123,59,119,162,14,57,152,328,161,11,202,495,184,27,80,215 },
+{ 911,617,332,959,206,141,253,244,282,384,110,120,10,260,352,143,951,811,269,373,160,417,93,531,728,203,434,940,137,55,36,717 },
+{ 120,15,260,141,77,1,515,82,700,351,33,23,450,13,110,326,64,217,269,753,203,137,102,5,165,21,51,291,93,177,373,128 },
+{ 15,515,700,753,0,1,2,23,13,51,5,9,82,901,180,8,3,4,120,6,7,141,93,12,197,341,10,33,115,730,64,125 },
+{ 7,104,97,107,356,232,66,560,298,289,14,707,38,568,359,64,20,0,65,324,22,214,92,32,192,5,387,43,712,90,172,95 },
+{ 6,1,2,66,67,14,74,24,108,29,69,83,458,7,25,38,135,103,36,150,451,114,52,594,75,65,380,18,267,602,19,278 },
+{ 13,23,51,12,115,21,202,5,457,15,4,1,64,719,0,403,2,3,453,165,99,141,401,128,32,515,10,37,523,197,120,700 },
+{ 57,59,4,11,412,381,77,53,421,291,250,368,99,14,27,369,803,283,23,108,403,19,339,210,0,401,12,444,236,40,361,736 },
+{ 15,515,700,1,0,753,2,13,23,5,51,180,3,115,6,7,457,4,9,8,12,82,197,165,141,901,120,719,33,64,21,22 },
+{ 64,95,180,247,929,146,90,126,197,32,237,60,288,165,316,92,5,13,77,7,217,955,522,22,16,314,132,4,317,10,312,86 },
+{ 15,1,120,13,23,515,0,51,700,180,141,2,5,202,21,260,753,165,137,33,77,110,197,128,326,7,450,4,102,9,269,12 },
+{ 14,2,16,46,1,7,24,69,75,35,38,50,29,220,52,140,267,67,18,54,70,309,5,60,92,189,171,87,71,163,58,0 },
+{ 31,98,127,44,9,299,0,276,293,284,116,49,935,599,105,22,456,201,28,1,39,125,242,137,371,144,131,492,159,272,51,395 },
+{ 6,27,151,53,573,445,297,113,26,73,436,19,491,250,396,315,45,112,145,58,614,881,25,34,611,200,17,80,70,5,138,631 },
+{ 32,693,81,788,90,804,403,56,494,21,84,397,202,65,18,77,64,681,214,725,523,784,526,33,102,825,240,0,115,241,817,91 },
+{ 24,7,14,2,18,16,65,0,108,149,28,69,1,71,154,36,124,35,67,140,189,429,92,68,66,22,55,118,302,150,9,6 },
+{ 0,68,9,35,65,101,189,212,114,67,124,69,1,154,149,39,230,64,252,16,88,702,103,100,18,336,28,329,520,83,30,755 },
+{ 5,2,186,29,61,45,17,1,52,48,58,171,155,227,80,209,311,21,14,46,50,106,243,513,334,502,496,38,3,6,32,592 },
+{ 15,515,700,753,13,1,2,0,3,4,5,23,341,11,10,33,6,51,165,117,153,7,180,12,365,901,77,569,197,115,64,9 },
+{ 13,15,23,515,0,51,1,700,4,2,753,10,3,5,12,77,33,961,165,457,197,11,115,9,22,102,40,403,202,21,14,59 },
+{ 15,515,700,753,13,0,1,23,2,33,102,5,4,10,9,3,51,115,77,7,6,341,12,11,217,40,457,196,180,165,8,523 },
+{ 166,39,30,274,190,100,333,438,530,310,88,252,0,9,539,265,1,656,404,101,625,131,778,254,31,455,676,329,724,158,21,23 },
+{ 734,148,94,308,431,115,37,89,111,413,79,468,197,629,341,474,569,12,13,873,179,401,11,4,180,23,205,72,59,365,134,51 },
+{ 539,228,224,219,816,190,30,258,871,840,669,93,406,530,957,187,160,531,748,137,131,88,863,36,728,839,44,213,352,116,202,466 },
+{ 393,791,125,801,730,551,386,23,31,175,93,98,51,13,144,788,126,203,21,345,116,22,949,110,575,165,326,44,0,4,60,221 },
+{ 13,23,77,141,0,4,51,2,33,115,64,1,10,3,6,15,11,102,7,217,180,40,515,22,128,177,202,9,700,269,165,5 },
+{ 2,29,7,70,52,14,1,58,112,46,75,5,171,163,87,220,307,151,186,334,38,66,155,16,69,135,278,45,262,97,6,21 },
+{ 88,321,213,100,230,435,689,466,1,382,30,352,217,699,410,96,795,36,921,752,190,141,144,180,44,831,317,83,443,31,840,251 },
+{ 363,411,101,520,354,9,195,668,132,156,447,1,905,364,18,23,765,664,146,5,360,13,121,96,98,31,252,39,100,759,264,551 },
+{ 13,23,51,730,12,719,453,457,401,475,5,21,403,2,0,1,15,4,3,899,99,32,165,11,515,308,197,115,6,961,700,523 },
+{ 72,76,89,12,37,4,308,179,38,528,90,431,54,205,148,184,401,57,152,474,23,59,51,245,428,11,32,99,405,316,257,21 },
+{ 376,20,43,147,470,173,97,595,107,319,414,142,819,5,729,178,858,7,427,32,426,104,14,0,392,362,259,61,230,77,560,246 },
+{ 202,141,269,494,318,137,51,128,403,4,217,96,77,5,64,177,291,180,15,352,102,10,33,349,2,317,0,341,120,515,21,453 },
+{ 77,202,33,128,102,318,494,269,13,0,117,23,342,291,403,15,134,51,153,141,177,515,82,137,196,700,203,64,22,351,753,4 },
+{ 253,110,951,352,499,811,10,854,180,638,244,559,642,752,564,8,141,143,417,341,901,260,206,197,922,661,93,15,498,373,165,911 },
+{ 141,13,23,180,4,217,5,1,269,317,21,0,2,202,115,51,352,77,3,197,64,341,318,15,291,9,137,93,32,165,515,33 },
+{ 9,0,18,252,16,101,68,39,24,118,35,109,158,329,28,167,60,364,333,265,49,100,22,419,553,55,1,677,71,7,212,159 },
+{ 28,109,9,39,0,158,49,22,168,35,55,175,1,65,67,185,194,159,289,95,272,114,30,105,86,584,36,169,254,2,83,24 },
+{ 15,515,13,700,1,753,2,23,0,3,4,5,33,341,11,51,6,10,197,115,901,180,77,40,102,12,365,165,141,217,7,317 },
+{ 173,693,104,422,5,18,61,32,102,0,20,13,784,560,33,66,397,526,49,207,29,25,510,707,65,6,11,344,21,263,81,77 },
+{ 23,13,386,51,308,801,719,221,401,949,21,730,165,421,102,115,125,33,341,670,468,117,770,1,120,6,197,14,403,97,67,958 },
+{ 0,49,105,16,28,24,159,9,158,320,1,68,35,239,170,18,109,7,55,65,2,95,301,124,347,14,21,154,22,127,286,31 },
+{ 2,5,1,207,45,29,32,58,76,61,6,263,292,655,72,14,17,476,7,119,52,306,70,64,21,90,186,214,106,38,3,790 },
+{ 21,6,125,49,13,64,715,66,115,95,197,33,22,32,204,165,56,278,0,408,241,120,4,808,681,350,263,85,81,571,135,509 },
+{ 612,427,325,107,202,5,376,49,64,392,403,470,21,147,31,788,494,14,362,465,858,98,20,804,518,43,845,318,125,97,725,534 },
+{ 32,21,76,72,2,1,14,5,241,449,89,38,350,221,155,48,50,292,37,46,45,90,270,54,17,179,214,12,148,430,476,413 },
+{ 24,0,28,16,7,124,35,154,14,149,65,18,9,68,55,108,175,71,2,1,22,109,92,67,484,336,118,69,302,398,570,420 },
+{ 1,5,14,2,48,50,38,67,46,21,0,54,45,270,281,12,24,32,155,96,513,103,290,83,61,58,36,17,37,72,69,181 },
+{ 13,961,569,197,37,15,23,474,515,94,148,111,12,165,629,341,700,79,901,401,51,405,753,10,134,4,115,734,873,11,89,117 },
+{ 33,23,102,51,13,40,77,128,64,202,141,15,4,12,0,1,2,117,22,11,10,403,153,515,99,318,137,269,139,196,700,134 },
+{ 0,1,24,67,9,16,18,35,28,69,103,50,5,2,65,12,83,68,7,96,14,22,21,149,75,114,13,133,23,71,218,54 },
+{ 384,617,940,332,855,911,206,959,434,282,141,10,93,253,244,110,144,268,120,36,352,137,417,203,116,31,44,269,160,201,143,951 },
+{ 30,93,473,137,31,704,450,652,190,203,800,254,166,274,326,144,269,160,127,303,120,625,88,848,110,435,77,521,349,131,340,744 },
+{ 53,27,73,26,19,250,297,200,25,630,17,6,611,122,34,42,714,235,472,65,436,14,80,684,690,106,45,113,680,108,64,4 },
+{ 15,515,1,2,700,0,753,3,5,141,180,4,13,77,33,10,217,6,7,134,11,352,197,64,165,341,317,23,12,115,102,40 },
+{ 254,530,39,613,688,221,30,31,438,190,228,960,1,44,141,21,180,406,23,166,9,202,13,96,137,48,131,829,317,269,393,51 },
+{ 9,39,28,35,30,166,158,36,0,175,101,346,364,67,49,68,168,420,88,1,194,131,100,352,55,83,190,64,137,570,86,65 },
+{ 62,56,3,548,555,507,440,161,34,4,215,136,162,514,361,527,17,14,211,130,328,11,383,123,84,183,38,57,184,152,205,494 },
+{ 92,126,107,7,356,493,97,279,359,298,16,246,35,60,14,441,362,121,43,423,5,132,392,20,508,230,199,146,232,173,150,414 },
+{ 15,82,141,515,291,922,349,700,217,260,372,120,351,93,77,753,318,352,373,854,1,326,269,21,13,102,144,202,64,23,203,137 },
+{ 141,217,352,115,180,13,269,317,752,77,23,21,341,197,5,372,244,291,9,64,51,102,4,1,365,2,165,33,3,48,237,351 },
+{ 78,47,390,19,130,453,108,27,711,813,730,444,412,283,196,690,123,14,128,26,250,389,650,236,200,65,51,4,34,183,297,73 },
+{ 34,250,297,80,472,64,495,17,311,3,148,45,667,61,176,53,243,27,90,161,469,141,483,151,62,128,29,4,58,56,5,231 },
+{ 51,23,33,13,551,77,102,326,421,21,523,120,5,899,453,692,202,153,308,615,115,958,450,401,791,68,221,93,475,18,403,4 },
+{ 98,223,393,363,411,1,478,834,664,156,284,691,447,791,914,293,354,724,697,9,807,541,759,51,18,421,48,264,948,586,195,848 },
+{ 7,14,107,232,16,92,2,60,46,5,359,121,24,526,220,620,135,1,172,21,126,314,132,77,18,75,32,278,12,23,52,38 },
+{ 32,76,2,1,21,72,241,14,5,48,292,89,476,45,720,270,179,90,17,214,148,38,50,29,129,155,350,46,290,227,123,464 },
+{ 15,515,700,753,13,23,33,77,51,4,102,0,32,202,1,11,128,82,117,141,40,5,110,8,3,90,137,21,10,318,403,165 },
+{ 66,6,69,2,1,74,14,135,278,267,380,24,29,97,67,38,103,75,7,388,324,25,52,150,87,83,189,357,335,108,204,172 },
+{ 152,4,339,59,79,471,188,11,77,94,128,33,529,377,12,111,102,202,452,402,216,99,13,542,51,40,474,37,64,291,23,961 },
+{ 15,515,700,753,1,0,196,13,33,2,77,5,23,102,3,10,9,7,217,4,6,153,117,177,14,457,115,12,40,730,11,134 },
+{ 17,209,45,106,207,5,255,119,62,2,61,3,263,742,306,655,425,378,32,56,29,136,84,80,311,58,186,240,243,383,14,21 },
+{ 120,260,450,15,1,23,817,13,515,523,326,5,700,51,82,31,202,64,21,753,318,93,32,269,98,33,351,77,102,125,457,165 },
+{ 116,492,268,93,23,206,203,0,551,918,13,51,8,22,417,940,120,10,499,31,949,791,125,523,165,473,341,730,421,959,401,391 },
+{ 15,515,700,753,165,13,0,1,197,23,4,82,120,2,180,12,260,719,8,3,386,117,5,523,901,11,341,51,10,9,141,351 },
+{ 14,24,69,7,2,66,108,1,67,6,36,398,18,267,150,97,29,38,83,149,65,74,28,0,189,71,388,16,273,124,46,22 },
+{ 330,96,523,335,367,662,141,839,1,922,372,615,244,717,269,443,418,352,403,692,217,854,752,180,36,64,498,576,349,201,98,284 },
+{ 184,90,257,205,245,229,57,152,769,17,524,5,32,497,45,432,619,2,452,266,4,106,1,21,179,59,76,3,460,292,381,128 },
+{ 7,14,16,2,46,5,70,107,87,13,58,307,92,32,38,23,202,0,172,24,18,21,60,128,77,35,20,10,9,4,171,112 },
+{ 7,66,140,16,14,92,97,69,267,172,189,24,380,2,35,60,298,451,230,135,314,74,150,71,38,357,6,330,67,423,21,443 },
+{ 121,167,354,132,18,446,147,101,212,146,407,16,55,35,647,191,20,271,199,68,60,259,463,107,9,126,363,7,195,43,14,411 },
+{ 76,90,179,32,205,21,184,460,257,288,45,245,316,5,57,152,241,2,358,1,229,72,524,148,48,769,17,4,12,38,14,720 },
+{ 147,259,178,878,427,465,581,198,786,798,142,534,325,929,20,362,35,132,107,376,43,5,279,77,49,146,70,202,590,771,33,14 },
+{ 473,93,450,778,141,30,855,466,144,203,330,530,88,523,459,372,201,617,839,704,254,321,934,326,39,36,82,717,332,213,559,403 },
+{ 523,475,51,899,730,453,23,719,403,33,457,13,421,386,4,120,117,196,102,153,15,801,450,817,515,260,202,11,700,99,165,125 },
+{ 15,1,13,515,0,2,700,5,23,753,4,3,341,317,10,115,180,11,33,64,217,77,117,165,197,7,6,365,9,141,102,134 },
+{ 19,4,119,40,33,202,27,84,102,56,77,73,504,485,26,494,757,63,862,59,23,300,25,12,128,11,5,13,342,880,469,6 },
+{ 32,20,2,13,5,21,23,6,12,38,43,29,64,7,95,51,61,207,48,147,90,178,17,182,49,0,115,202,52,362,37,22 },
+{ 339,188,11,79,4,94,377,12,99,111,542,102,37,33,474,51,471,40,453,152,77,13,59,403,342,23,117,57,475,134,128,38 },
+{ 34,128,283,176,495,231,318,432,503,275,529,527,161,53,3,202,56,291,585,469,73,17,14,412,57,27,80,245,250,381,402,51 },
+{ 15,515,13,700,1,217,141,120,23,180,753,115,365,51,317,341,77,260,0,291,110,137,202,5,21,269,64,36,349,2,4,10 },
+{ 13,15,961,515,700,753,4,12,2,457,3,11,197,51,37,569,115,23,5,0,99,10,1,134,6,111,165,33,72,40,38,79 },
+{ 15,515,700,753,13,1,0,2,23,33,5,3,10,4,9,115,7,102,6,51,12,217,77,11,40,457,569,341,117,317,14,719 },
+{ 5,76,2,32,292,214,45,1,129,519,123,179,90,710,17,29,460,72,14,207,21,249,58,205,464,263,618,48,6,245,3,257 },
+{ 72,76,32,4,21,12,38,23,99,54,89,3,14,17,51,57,11,90,13,488,179,2,59,148,45,37,5,115,401,1,10,421 },
+{ 98,223,393,1,834,264,284,791,724,293,478,772,697,909,363,682,905,447,541,821,411,51,421,9,807,48,765,31,730,96,386,410 },
+{ 341,13,509,8,23,638,165,901,762,10,569,242,391,197,873,642,506,499,629,961,15,180,116,456,206,546,417,1,338,457,515,867 },
+{ 1,2,5,50,14,38,46,114,0,36,29,22,218,65,86,96,137,21,133,285,12,10,323,181,17,58,51,23,67,7,28,6 },
+{ 481,878,202,13,5,23,182,32,269,21,1,318,77,142,557,494,141,33,640,137,70,291,2,51,260,415,929,403,120,58,4,259 },
+{ 15,515,700,753,1,4,13,0,2,5,341,3,11,180,134,12,10,317,197,365,33,21,23,165,117,6,77,7,217,37,32,498 },
+{ 25,119,19,6,26,42,27,17,4,790,45,814,2,469,483,84,122,1,0,33,32,128,76,80,611,113,73,56,5,240,202,77 },
+{ 14,2,7,1,24,0,65,6,16,69,67,22,124,28,108,5,18,36,86,10,38,46,66,398,289,168,12,83,21,23,610,13 },
+{ 51,23,128,13,15,202,12,120,33,64,141,82,10,515,0,403,700,3,1,99,117,269,153,165,753,5,318,197,102,260,2,137 },
+{ 16,35,24,0,9,18,7,1,68,69,50,71,103,65,67,189,133,23,28,13,60,537,149,335,75,21,64,5,114,2,12,14 },
+{ 754,803,133,576,880,543,2,1,657,50,14,38,46,5,29,67,218,36,58,171,52,96,24,103,775,0,114,83,181,54,65,45 },
+{ 21,32,5,3,2,17,14,72,76,1,12,23,38,51,4,54,10,0,89,13,99,137,45,36,421,115,543,11,22,128,221,48 },
+{ 434,384,268,144,855,940,617,206,332,116,93,911,959,282,203,137,141,489,44,120,10,110,244,36,98,31,269,253,367,417,160,9 },
+{ 15,2,1,0,13,515,5,700,3,23,180,217,141,10,753,4,117,6,77,33,64,7,11,197,352,317,341,134,165,115,12,9 },
+{ 2,113,6,25,1,0,29,4,7,833,5,45,32,61,128,19,77,151,74,145,64,42,14,210,655,106,59,177,27,17,21,738 },
+{ 116,268,918,203,551,31,8,692,206,791,403,499,417,93,940,421,0,23,22,120,13,523,44,51,299,473,959,1,10,475,202,125 },
+{ 107,126,132,612,362,279,20,146,259,493,199,121,590,43,660,147,35,376,939,60,941,534,683,5,0,953,16,7,49,649,595,470 },
+{ 15,515,700,753,13,1,0,23,2,33,77,4,3,51,5,102,115,10,9,341,6,7,11,342,217,12,120,180,40,317,141,8 },
+{ 53,27,17,161,469,378,73,527,19,136,383,250,495,56,862,26,62,84,80,106,200,4,34,14,440,297,3,128,585,5,129,123 },
+{ 17,45,209,106,5,207,243,454,119,255,2,263,186,290,29,3,21,62,425,61,84,32,58,56,48,408,655,136,306,14,742,227 },
+{ 4,152,59,452,128,79,216,11,339,471,529,188,94,77,202,12,291,33,318,377,99,51,23,5,402,349,32,474,102,13,205,111 },
+{ 15,515,700,753,1,0,2,13,3,5,23,4,180,51,115,9,6,12,7,8,197,33,10,961,901,77,141,752,110,22,120,341 },
+{ 951,752,638,811,351,642,180,253,10,341,197,901,110,873,8,244,15,352,165,898,143,515,564,762,499,55,365,700,82,753,141,854 },
+{ 6,262,197,350,74,26,115,509,841,583,165,38,21,13,47,50,235,19,33,324,453,4,308,196,138,99,64,903,675,1,223,130 },
+{ 125,165,391,23,386,221,21,13,558,457,51,867,197,115,401,758,77,97,308,791,7,180,48,120,963,451,743,89,603,134,403,450 },
+{ 1,14,2,5,16,46,7,38,58,24,50,0,69,48,35,67,54,18,12,75,21,45,513,155,430,37,270,9,61,163,223,32 },
+{ 23,13,51,0,12,15,4,1,115,2,515,453,10,457,5,3,202,21,165,700,403,11,37,64,77,401,9,197,753,59,475,99 },
+{ 129,84,17,56,27,495,19,548,80,123,162,378,3,504,161,469,618,73,40,53,4,26,205,184,106,183,62,6,257,128,862,12 },
+{ 28,9,22,49,109,1,67,0,39,55,168,158,83,36,35,86,420,194,185,159,95,105,69,208,272,103,50,114,2,254,169,30 },
+{ 242,391,8,456,116,13,23,492,341,165,867,51,499,457,479,638,338,509,719,10,1,642,417,762,401,93,206,268,901,569,22,197 },
+{ 211,162,248,130,57,4,41,556,507,266,183,152,305,361,11,129,62,229,38,471,514,313,157,300,377,3,440,128,123,328,339,59 },
+{ 7,92,97,16,298,140,60,126,14,35,279,314,232,246,43,230,508,173,71,107,423,24,150,779,20,189,66,18,607,21,0,653 },
+{ 15,515,700,753,1,0,2,13,23,5,3,180,51,901,6,4,7,12,9,115,8,457,165,82,120,197,10,64,141,341,22,117 },
+{ 0,18,403,25,523,74,6,24,42,91,22,102,13,51,49,193,475,681,95,85,730,64,899,397,273,750,247,673,32,805,757,288 },
+{ 56,0,18,65,33,554,84,343,64,6,90,561,22,19,899,108,27,63,289,475,240,467,370,32,233,214,24,123,95,287,28,194 },
+{ 31,98,127,9,0,44,293,105,395,299,49,242,28,22,599,116,1,284,276,125,456,685,763,159,272,623,23,935,393,144,201,137 },
+{ 1,5,2,14,38,46,50,48,21,7,58,45,270,61,155,171,0,290,69,32,29,54,67,16,24,666,663,17,37,75,502,52 },
+{ 23,51,13,453,457,12,719,4,15,99,401,2,961,3,11,730,475,515,0,1,165,115,629,700,14,17,403,40,5,33,37,64 },
+{ 968,967,966,965,964,963,962,961,960,959,958,957,956,955,954,953,952,951,950,949,948,947,946,945,944,943,942,941,940,939,938,937 },
+{ 2,1,14,29,67,103,6,46,52,75,24,133,38,218,83,309,36,108,70,114,96,5,238,74,25,26,220,236,65,50,69,87 },
+{ 7,71,16,92,24,60,14,97,150,140,35,189,149,298,18,230,43,508,2,423,69,0,38,314,66,279,399,517,251,20,232,273 },
+{ 23,1,120,51,13,202,77,141,260,21,15,5,128,82,2,450,269,165,102,318,48,32,137,515,125,64,12,115,351,180,33,7 },
+{ 77,13,33,23,64,51,4,102,141,128,40,1,2,202,0,6,177,115,137,15,59,10,11,7,269,22,515,180,318,3,700,95 },
+{ 101,9,18,363,264,520,411,604,676,682,905,271,16,821,167,0,621,364,39,100,121,118,166,781,647,252,1,848,447,265,404,60 },
+{ 144,203,326,382,166,418,93,88,96,822,1,141,859,77,744,438,110,269,921,367,521,274,100,39,494,120,403,473,217,576,13,291 },
+{ 13,21,180,125,5,23,191,32,18,16,146,199,115,24,165,118,0,225,22,1,60,197,64,901,375,241,48,12,408,71,522,818 },
+{ 15,515,700,753,13,0,23,8,1,51,82,102,2,33,4,9,180,165,5,77,10,110,12,197,120,260,18,326,351,403,22,457 },
+{ 33,77,102,64,13,23,128,51,141,202,1,40,0,2,117,10,15,4,6,318,269,134,22,515,180,115,177,153,137,196,3,700 },
+{ 174,544,104,525,74,0,151,25,6,624,29,66,2,636,81,45,204,177,64,416,7,644,5,138,222,319,355,77,22,122,789,216 },
+{ 141,304,372,352,291,947,177,269,128,954,77,349,217,202,64,318,498,437,102,864,86,13,115,180,137,5,210,197,32,950,678,7 },
+{ 161,200,53,17,714,27,34,73,472,62,585,56,440,383,136,78,527,19,4,3,106,361,14,250,80,514,377,84,322,390,862,548 },
+{ 32,76,72,21,38,14,89,54,12,37,2,241,5,428,17,1,181,221,350,45,3,4,449,90,148,179,99,292,794,770,477,46 },
+{ 33,23,128,64,141,13,77,51,102,202,2,15,1,3,40,10,5,153,269,515,165,0,117,196,180,318,6,700,137,134,120,22 },
+{ 96,137,30,0,9,39,840,202,669,406,141,530,613,1,180,88,22,160,679,576,28,403,31,219,49,228,829,100,36,15,10,856 },
+{ 180,141,352,1,15,752,115,0,217,365,2,515,13,901,341,317,23,4,197,700,269,5,3,31,753,244,21,165,253,202,51,44 },
+{ 1,2,67,0,28,50,83,65,14,46,103,114,24,38,36,9,69,5,18,7,22,133,55,218,16,124,29,54,96,160,12,480 },
+{ 180,115,352,317,365,217,752,901,141,15,341,1,515,253,700,0,753,873,2,197,31,137,165,244,4,120,160,44,98,5,202,3 },
+{ 5,32,347,49,13,21,95,713,23,1,77,33,60,64,107,4,126,928,296,850,0,241,197,102,652,195,180,534,165,153,379,10 },
+{ 341,180,365,901,317,115,15,752,515,700,217,873,753,82,0,110,197,141,951,165,1,564,13,351,253,12,10,3,2,4,308,244 },
+{ 17,45,21,3,106,5,155,38,227,32,2,209,62,54,12,243,14,181,552,587,46,540,207,794,37,48,430,119,255,221,770,29 },
+{ 16,24,35,18,7,0,50,1,9,14,75,69,2,5,12,21,60,13,67,71,23,48,10,108,223,181,189,103,46,64,92,51 },
+{ 127,13,98,165,308,23,286,293,258,51,219,395,197,115,301,401,31,391,22,105,457,170,239,276,55,338,629,116,180,479,509,569 },
+{ 539,213,748,840,957,669,30,466,88,217,144,251,863,190,137,93,230,228,679,352,317,203,617,321,258,530,160,219,96,831,816,689 },
+{ 5,48,1,21,2,14,0,36,12,38,32,54,430,181,50,270,72,99,281,45,17,10,46,22,37,218,67,3,290,76,23,51 },
+{ 13,23,0,4,33,51,2,115,141,1,77,217,180,10,9,317,3,102,11,5,15,197,7,202,22,165,40,64,515,6,341,31 },
+{ 13,15,117,515,23,12,37,134,165,700,38,54,457,753,51,64,153,197,14,10,33,82,961,0,99,89,115,719,141,3,4,1 },
+{ 5,21,2,3,1,32,14,12,48,17,0,10,51,23,38,22,4,72,13,54,36,45,137,76,99,114,86,37,11,64,540,430 },
+{ 202,128,77,318,291,33,269,102,275,141,494,342,40,678,0,177,20,210,402,7,4,5,137,6,13,450,403,32,49,120,23,22 },
+{ 1,2,24,14,67,46,69,50,38,103,16,18,75,35,83,29,52,96,5,108,0,7,54,71,149,394,236,309,70,133,220,58 },
+{ 15,515,1,700,0,2,753,13,23,5,180,3,51,4,165,457,12,197,115,6,7,21,9,141,8,901,33,82,120,77,10,110 },
+{ 0,28,65,14,67,2,124,24,1,9,7,69,55,154,36,16,46,114,175,35,83,22,429,18,109,149,68,189,108,336,251,133 },
+{ 56,162,403,3,129,775,99,161,17,40,527,33,880,4,14,128,475,12,548,23,102,202,361,117,34,184,383,200,183,196,64,53 },
+{ 151,2,29,58,112,45,186,113,5,70,52,1,311,6,315,66,61,7,74,27,631,17,80,87,287,243,209,227,14,491,19,869 },
+{ 6,1,74,2,75,29,25,66,26,70,52,138,67,324,357,42,19,220,14,85,87,108,38,451,309,103,24,69,380,135,114,65 },
+{ 15,515,700,13,23,0,1,120,753,51,180,2,260,202,5,141,77,102,9,450,115,21,197,165,7,137,110,33,12,269,901,4 },
+{ 5,45,17,2,14,46,48,38,181,50,155,3,186,54,61,29,21,227,281,80,540,106,12,400,52,1,58,32,328,171,209,487 },
+{ 16,18,265,121,158,35,60,9,39,7,329,105,252,68,24,1,132,167,159,22,0,49,286,101,21,146,23,327,120,709,5,14 },
+{ 108,467,283,56,389,650,123,412,33,177,899,475,216,453,269,349,619,65,51,730,403,670,23,196,523,128,84,13,401,789,503,543 },
+{ 514,3,11,377,328,4,361,507,57,403,14,880,130,485,176,215,236,38,152,102,211,56,62,757,54,585,300,556,34,555,40,229 },
+{ 3,555,62,266,130,99,507,139,514,12,152,229,215,305,57,40,440,33,403,471,38,56,475,14,361,313,775,328,196,548,123,23 },
+{ 120,202,318,15,77,13,1,450,33,269,515,260,5,128,494,51,23,700,102,141,40,753,326,403,817,137,523,21,177,922,342,7 },
+{ 15,1,515,23,0,13,700,2,51,753,180,5,165,21,197,12,3,120,115,4,141,6,9,7,457,33,386,202,82,8,31,341 },
+{ 15,180,515,82,351,700,10,317,753,115,217,365,141,898,33,901,13,23,110,854,752,77,1,197,4,341,143,36,64,352,102,9 },
+{ 104,289,66,707,214,90,712,64,97,173,20,0,414,194,874,43,32,7,568,560,65,38,426,312,715,192,376,74,835,5,324,147 },
+{ 84,56,0,554,63,65,453,249,123,643,18,26,847,475,511,403,416,561,524,289,370,73,9,19,45,42,719,194,27,467,33,730 },
+{ 21,346,13,350,308,826,197,101,352,68,570,0,165,23,9,841,115,100,509,694,221,230,35,217,569,88,124,749,1,777,212,154 },
+{ 16,92,7,24,60,18,35,140,126,14,50,71,46,330,2,75,246,5,121,267,571,1,230,309,220,0,9,64,146,236,54,108 },
+{ 82,15,515,898,365,700,180,33,341,753,77,901,10,115,55,351,21,5,1,4,13,102,36,217,2,165,752,120,197,117,11,317 },
+{ 16,24,35,18,69,71,140,1,103,7,189,68,0,50,9,108,2,133,60,267,230,46,149,67,167,118,92,14,75,21,191,38 },
+{ 60,71,16,18,7,20,43,118,35,68,375,28,608,0,175,566,154,92,14,149,628,33,22,13,2,10,279,23,107,356,55,117 },
+{ 187,258,871,295,201,434,219,224,489,384,268,110,261,839,44,699,93,116,36,131,141,228,144,160,940,567,244,406,137,574,98,253 },
+{ 66,7,97,172,192,712,232,324,204,74,43,448,387,426,568,20,526,107,104,135,356,729,173,0,22,5,32,95,2,64,500,560 },
+{ 15,515,700,753,1,4,0,341,13,3,134,2,5,33,11,77,12,10,23,197,365,901,7,40,217,32,21,6,51,180,961,37 },
+{ 0,28,24,9,35,65,16,124,68,55,109,154,7,39,22,149,158,14,175,1,49,252,18,71,2,168,289,419,108,420,67,101 },
+{ 7,16,14,92,2,46,140,24,220,35,38,60,75,1,50,18,87,54,5,126,29,52,278,262,314,107,71,21,172,135,330,394 },
+{ 7,92,16,14,172,126,2,60,140,35,135,314,278,46,24,38,232,107,330,66,5,18,150,246,230,97,52,1,121,563,279,21 },
+{ 6,26,235,53,297,436,27,19,25,73,113,445,90,214,65,42,64,289,250,611,624,32,45,648,614,17,85,491,34,122,200,416 },
+{ 352,141,1,217,854,752,351,180,244,36,110,661,82,258,816,160,295,219,567,224,230,269,922,144,260,268,93,201,137,116,489,202 },
+{ 16,60,35,18,126,107,68,191,92,121,7,14,598,20,493,279,167,446,118,0,28,43,463,55,24,212,375,566,9,150,575,21 },
+{ 15,1,515,2,4,13,0,700,3,5,23,753,341,77,51,115,33,11,180,10,197,141,6,165,7,901,102,40,9,202,217,12 },
+{ 23,51,13,202,21,5,1,120,15,137,128,125,32,2,12,141,33,165,64,515,403,318,700,48,180,7,6,450,115,523,475,260 },
+{ 131,716,224,371,219,187,737,616,385,254,9,98,105,924,31,258,836,39,127,578,49,916,44,761,272,137,944,159,0,242,442,22 },
+{ 15,515,700,1,753,2,5,0,4,13,3,180,11,141,197,10,341,217,33,134,165,6,77,7,317,12,352,64,365,32,102,40 },
+{ 66,74,7,173,174,29,192,2,222,20,226,43,353,52,712,6,0,138,500,204,97,145,64,104,426,673,355,90,25,5,65,87 },
+{ 5,259,786,534,590,493,279,49,13,581,465,21,929,35,941,132,147,32,23,612,362,626,107,121,178,0,146,61,48,939,10,18 },
+{ 2,14,16,7,278,69,135,140,46,24,267,35,92,38,1,189,29,52,309,60,66,75,71,172,74,357,18,87,67,6,230,5 },
+{ 165,13,308,197,391,23,401,15,51,457,180,509,115,569,3,629,961,719,34,758,317,734,14,29,46,2,17,901,38,453,5,217 },
+{ 1,22,2,14,0,28,7,168,67,49,65,24,36,95,5,105,55,35,12,46,69,16,114,159,194,50,10,9,158,83,164,109 },
+{ 34,453,3,196,130,14,322,11,47,51,377,236,361,4,730,153,514,711,57,440,62,17,161,108,176,59,485,56,162,412,202,117 },
+{ 18,16,21,23,48,13,24,35,121,5,156,60,51,1,7,132,141,221,163,115,0,271,447,340,363,202,125,71,2,781,22,698 },
+{ 165,13,457,23,197,961,629,569,341,41,12,38,401,901,54,51,115,17,15,509,421,37,62,45,719,57,32,328,117,758,157,99 },
+{ 2,1,77,141,33,64,3,102,0,23,13,5,128,10,6,15,180,202,269,40,51,515,7,165,137,117,318,4,700,153,197,352 },
+{ 68,212,0,124,101,9,154,16,562,191,21,149,65,24,35,1,118,167,818,350,520,100,722,841,264,71,13,302,478,23,375,346 },
+{ 98,23,48,598,13,293,541,21,125,121,51,807,0,31,35,259,126,7,386,1,223,783,10,107,199,20,221,144,342,963,49,64 },
+{ 21,13,5,586,1,23,167,48,33,781,647,49,165,18,51,271,77,32,761,118,0,82,391,22,146,141,459,31,197,156,115,4 },
+{ 2,1,5,61,29,7,58,45,14,6,425,32,70,52,290,738,207,21,72,112,66,76,655,17,186,46,64,263,38,0,128,87 },
+{ 39,265,9,100,1,333,363,101,18,411,447,254,166,310,31,98,264,30,639,404,156,286,16,93,593,203,272,682,0,905,44,821 },
+{ 6,2,1,19,29,51,26,108,25,74,5,23,14,114,13,386,133,103,42,66,453,70,309,138,719,324,65,38,64,96,52,75 },
+{ 20,43,356,107,49,858,595,7,414,359,0,5,392,319,97,612,422,819,14,376,173,246,22,470,147,427,230,92,197,33,683,95 },
+{ 0,9,68,35,65,67,114,101,28,1,124,175,336,69,154,103,83,24,189,133,39,16,50,7,2,149,55,251,18,345,230,36 },
+{ 23,13,51,15,0,1,515,115,165,2,5,12,700,202,4,21,141,457,753,197,10,3,180,120,32,9,318,11,453,64,6,269 },
+{ 121,195,60,16,126,107,98,271,146,407,132,35,1,167,199,223,493,191,279,20,18,5,43,7,21,92,48,393,0,362,212,467 },
+{ 31,44,299,116,393,144,492,456,268,22,105,0,367,918,384,434,127,489,98,9,963,125,242,948,1,28,206,49,36,51,93,293 },
+{ 23,13,457,51,165,401,719,758,197,453,961,629,308,14,15,12,730,3,386,569,391,29,739,515,34,828,832,901,115,514,670,341 },
+{ 105,36,131,22,180,115,341,127,169,1,9,31,64,98,44,365,317,141,272,143,160,55,219,86,197,776,239,187,0,535,13,752 },
+{ 1,6,2,14,66,25,29,5,108,67,65,114,19,38,26,52,74,7,24,18,69,86,36,388,64,51,17,83,23,46,42,75 },
+{ 51,386,23,453,719,13,730,6,457,670,758,19,401,165,2,475,47,26,899,14,108,17,1,5,197,29,894,754,236,74,27,285 },
+{ 252,18,9,101,121,16,132,0,419,167,364,60,604,35,265,363,146,271,39,158,68,109,28,329,848,24,647,907,682,159,212,55 },
+{ 283,503,128,432,26,193,63,269,789,529,102,122,389,275,678,6,25,318,445,4,342,27,573,605,177,862,643,291,216,57,235,59 },
+{ 2,1,29,75,69,52,14,6,46,74,87,7,220,226,278,38,135,66,267,70,16,262,25,24,380,324,357,140,67,394,97,222 },
+{ 97,298,69,7,66,140,189,24,16,267,172,423,60,150,14,314,92,71,81,501,43,35,74,6,517,232,149,607,83,330,18,2 },
+{ 475,421,403,899,51,805,523,958,453,817,23,615,401,801,120,326,202,670,494,730,450,386,115,629,260,576,77,365,569,0,165,13 },
+{ 7,20,14,128,77,97,112,202,2,177,16,415,269,318,275,66,107,43,141,414,135,38,307,10,58,0,6,291,32,5,4,40 },
+{ 24,14,7,0,2,1,22,28,16,65,168,124,35,67,108,109,18,49,10,149,69,158,5,95,289,12,55,6,36,71,46,21 },
+{ 26,80,27,73,122,25,19,17,6,42,684,209,445,573,667,106,45,690,4,611,255,680,297,495,65,59,128,119,483,113,64,53 },
+{ 107,259,362,376,465,20,470,147,595,534,612,683,660,43,5,49,581,0,858,35,427,246,97,786,178,356,14,21,142,878,7,279 },
+{ 131,30,228,190,856,406,224,88,219,530,863,613,778,274,944,816,187,39,100,160,258,31,44,93,1,321,539,36,871,137,435,531 },
+{ 113,6,311,25,45,491,80,611,27,26,209,667,17,73,122,42,684,396,19,85,106,5,614,4,2,255,151,29,1,64,648,61 },
+{ 15,515,700,753,0,1,23,51,120,2,13,82,5,260,9,4,341,77,180,115,141,10,7,12,450,8,202,901,197,351,165,93 },
+{ 219,127,98,258,395,421,924,293,242,201,697,105,276,51,308,23,453,272,401,944,512,137,13,31,284,567,386,365,116,131,964,125 },
+{ 15,180,352,141,515,752,217,82,1,317,854,700,351,753,115,341,110,13,260,120,21,36,33,898,23,10,5,365,4,160,901,137 },
+{ 129,123,17,257,162,184,205,249,183,769,5,80,3,4,229,130,119,45,90,99,618,106,57,497,12,128,2,84,59,152,27,40 },
+{ 33,102,23,77,64,128,51,13,0,202,10,141,40,15,1,22,117,137,2,86,4,403,269,153,515,196,65,11,700,115,99,5 },
+{ 7,14,2,16,172,107,46,92,5,135,35,202,294,87,38,232,29,97,20,21,24,1,60,220,66,43,12,0,126,52,54,70 },
+{ 403,576,615,523,475,326,805,817,494,421,51,202,120,450,137,453,23,859,260,401,402,77,33,670,0,958,15,197,386,515,165,480 },
+{ 141,352,217,137,0,180,202,349,9,269,23,51,115,291,77,372,13,317,120,752,365,351,93,22,2,341,64,10,82,854,28,18 },
+{ 1,23,13,51,202,141,5,165,21,15,120,180,64,2,197,125,33,102,12,7,137,515,48,128,269,318,93,700,0,403,9,4 },
+{ 25,151,6,145,122,29,174,45,113,74,4,665,42,138,2,614,416,287,19,348,746,0,66,26,1,7,64,243,311,396,81,624 },
+{ 30,190,254,166,100,382,731,829,88,131,264,795,9,93,625,274,438,1,578,613,716,31,44,39,530,36,616,921,265,203,160,77 },
+{ 132,5,21,13,1,23,32,195,379,687,156,121,626,296,48,70,850,146,51,82,883,771,35,49,652,407,60,4,260,0,845,33 },
+{ 9,254,0,49,272,131,39,159,688,101,105,578,518,158,286,28,327,333,68,224,252,219,344,16,22,1,716,31,30,228,24,890 },
+{ 16,7,35,60,18,20,14,68,9,0,28,118,43,92,126,55,107,2,101,154,24,71,5,202,121,109,22,252,21,97,1,621 },
+{ 15,515,700,753,13,1,341,2,0,4,3,5,11,23,10,33,117,12,901,197,6,134,77,8,165,317,21,365,217,7,17,40 },
+{ 78,19,444,47,26,390,27,453,130,813,108,730,711,65,412,122,51,680,113,235,690,196,630,283,128,236,14,64,73,53,200,445 },
+{ 2,7,29,5,61,6,45,1,66,113,112,14,52,315,738,128,32,151,74,16,20,64,70,21,592,0,25,4,425,43,491,222 },
+{ 145,112,74,66,6,29,26,70,19,396,25,87,2,287,135,151,138,222,5,226,42,122,7,307,1,644,45,58,113,651,635,632 },
+{ 92,16,7,60,126,24,140,35,14,232,18,121,246,71,46,267,172,150,107,314,132,146,230,2,278,108,330,199,236,5,38,572 },
+{ 13,115,197,538,569,341,98,55,165,127,365,762,219,286,844,23,170,206,734,638,535,901,169,253,629,0,873,509,180,10,332,258 },
+{ 58,151,74,53,287,27,29,396,6,70,2,73,5,52,112,26,651,1,297,113,17,75,19,45,334,445,145,34,315,549,436,331 },
+{ 214,289,90,874,104,751,64,65,312,835,204,249,750,194,74,81,875,32,519,288,348,0,174,247,636,715,138,192,784,6,524,280 },
+{ 9,39,28,35,30,0,166,49,1,175,439,158,64,346,36,101,67,364,86,88,274,100,168,55,23,10,420,22,190,141,505,180 },
+{ 341,901,15,515,700,753,1,365,10,0,569,180,2,197,115,31,165,3,5,4,44,22,317,13,9,951,23,253,116,143,762,93 },
+{ 120,202,77,450,260,15,128,318,102,515,494,13,817,700,269,5,403,51,1,33,23,753,82,326,141,342,291,137,21,523,351,32 },
+{ 13,115,241,64,180,32,125,197,165,4,118,22,21,23,16,247,237,28,225,191,95,141,167,5,0,341,288,35,459,18,177,24 },
+{ 16,24,35,14,1,2,7,69,18,46,60,50,267,140,71,189,108,38,75,92,0,5,9,230,67,21,309,335,54,236,394,220 },
+{ 15,515,700,753,898,180,901,341,197,638,10,165,33,1,115,4,77,365,317,13,102,217,117,0,5,2,253,3,82,569,21,752 },
+{ 193,523,18,84,56,730,233,65,4,817,90,33,643,403,91,511,453,240,59,11,214,51,719,196,153,475,32,123,64,847,102,561 },
+{ 112,29,151,2,74,6,66,7,222,145,287,45,5,624,52,25,113,416,58,122,19,70,186,204,4,87,644,549,337,884,32,0 },
+{ 13,0,23,2,1,15,33,3,77,515,141,5,4,217,10,51,64,180,700,115,6,117,11,7,753,40,102,165,197,22,317,153 },
+{ 28,0,1,67,65,9,2,114,83,69,103,50,36,22,55,24,46,14,124,109,35,7,16,38,133,160,389,323,18,12,154,5 },
+{ 121,132,18,167,271,146,101,363,621,9,411,647,16,354,520,60,212,932,1,806,55,0,195,446,68,35,31,364,777,252,407,118 },
+{ 26,6,85,396,122,624,25,19,42,445,64,648,573,416,174,680,665,214,45,348,90,65,194,145,113,881,138,289,112,436,297,544 },
+{ 16,146,18,92,24,199,60,71,121,126,35,108,156,953,271,674,132,7,32,640,360,246,649,118,21,95,5,517,14,9,1,314 },
+{ 51,13,23,453,475,730,719,15,457,403,64,115,33,95,4,523,3,12,21,6,899,102,5,128,401,202,11,141,308,515,22,125 },
+{ 151,396,6,53,27,113,58,26,73,112,74,287,45,29,297,19,145,70,138,445,315,436,34,2,17,573,5,61,549,491,1,80 },
+{ 223,1,888,774,260,98,269,385,349,202,96,141,421,622,730,863,318,697,87,453,393,418,922,834,751,5,163,335,120,291,352,30 },
+{ 16,60,92,35,126,121,7,150,246,18,107,1,598,24,167,195,14,97,71,279,98,441,191,199,517,146,356,223,298,271,230,0 },
+{ 22,1,105,28,239,170,0,55,95,31,36,301,2,320,98,127,9,49,44,64,35,67,10,86,5,12,109,23,168,13,21,312 },
+{ 2,6,5,207,292,76,1,119,45,32,17,29,61,306,790,58,240,106,14,64,214,151,476,710,7,72,84,128,4,179,70,25 },
+{ 51,23,221,254,115,13,438,530,125,48,21,39,541,960,386,49,1,613,15,840,228,308,627,131,688,401,5,326,421,158,165,83 },
+{ 1,5,2,0,12,22,21,36,10,14,48,86,23,13,32,54,3,4,28,65,51,50,137,37,208,114,9,38,17,7,281,202 },
+{ 363,23,447,182,296,340,1,93,698,478,379,156,284,144,18,269,21,98,141,70,668,411,664,658,110,914,67,937,180,691,335,291 },
+{ 17,32,45,498,41,115,180,197,106,62,54,38,546,165,13,155,468,509,341,243,241,217,542,15,57,536,428,51,117,721,292,129 },
+{ 32,95,64,246,22,92,180,13,5,652,125,241,638,237,7,49,4,126,21,115,197,296,888,316,0,165,774,23,16,392,1,534 },
+{ 15,515,700,753,33,341,13,217,4,141,77,23,180,317,1,10,102,351,82,115,40,5,854,21,137,11,352,901,365,117,197,0 },
+{ 15,120,1,82,93,217,515,260,77,141,13,110,700,351,352,23,180,753,21,854,202,317,64,349,269,51,165,137,5,128,291,36 },
+{ 13,23,51,141,77,0,33,4,115,64,2,10,102,202,217,128,1,177,269,11,7,22,6,21,32,9,180,40,15,3,165,318 },
+{ 478,264,1,520,98,724,9,682,223,664,21,759,13,772,604,100,23,363,411,48,821,5,0,905,909,447,31,265,88,101,166,39 },
+{ 20,29,7,2,77,416,6,128,33,5,0,113,104,32,43,13,491,66,23,21,102,51,74,210,202,525,64,318,10,81,174,14 },
+{ 2,1,5,14,7,58,61,29,45,290,46,38,52,21,32,270,6,592,425,0,75,155,16,48,17,50,72,70,207,24,263,663 },
+{ 80,6,17,209,106,26,483,113,19,469,255,25,378,27,495,833,45,64,161,2,61,667,76,742,32,90,445,5,814,65,887,119 },
+{ 98,223,393,1,354,834,195,791,447,697,284,293,360,541,781,156,51,807,18,664,421,411,163,668,48,31,591,765,883,386,948,23 },
+{ 679,141,816,36,93,406,876,144,228,137,1,180,669,21,332,251,5,269,116,187,96,351,202,752,317,64,203,831,574,466,855,345 },
+{ 15,515,700,13,1,753,2,0,23,341,3,5,4,10,51,11,33,165,6,7,115,197,12,64,180,153,217,77,9,569,901,317 },
+{ 13,23,202,51,5,21,403,15,120,64,1,450,128,141,12,523,33,165,494,125,2,515,269,7,48,102,318,95,260,180,453,197 },
+{ 16,18,24,60,71,92,146,246,199,35,140,7,9,118,121,108,167,230,126,132,0,640,156,14,68,133,267,360,649,271,64,55 },
+{ 269,141,678,177,202,77,128,318,33,947,40,120,291,349,102,137,64,352,210,864,461,498,13,342,196,23,275,450,954,0,205,111 },
+{ 16,24,92,18,71,60,35,7,108,191,167,246,140,14,126,21,1,68,150,118,149,388,399,9,273,0,121,796,230,48,212,517 },
+{ 2,14,1,29,46,75,52,70,69,171,38,7,58,163,16,5,24,220,67,112,223,54,50,409,155,35,267,186,151,334,394,140 },
+{ 9,252,100,265,166,39,88,404,329,0,1,520,382,812,101,593,264,274,604,676,30,118,68,553,18,664,363,23,639,865,21,411 },
+{ 16,18,35,24,0,60,158,7,22,68,14,49,109,159,55,9,28,71,2,10,5,105,1,118,329,13,344,23,92,20,21,126 },
+{ 15,13,515,700,23,0,753,1,51,2,4,10,77,5,3,197,115,165,961,202,9,457,180,12,141,22,33,120,6,11,318,31 },
+{ 160,93,251,137,317,1,180,36,120,217,345,752,617,352,332,10,96,531,498,318,365,202,141,269,816,341,901,679,143,35,83,968 },
+{ 6,25,42,128,19,59,122,4,85,26,611,27,269,233,45,0,343,91,318,80,11,177,283,73,33,614,2,77,64,138,445,216 },
+{ 95,64,74,7,32,81,51,204,0,20,237,65,56,38,91,23,207,180,347,343,29,6,511,52,49,10,25,18,554,370,14,312 },
+{ 202,120,326,260,450,817,494,318,137,403,128,77,523,553,859,5,704,1,15,23,13,576,7,16,615,51,682,291,515,0,21,234 },
+{ 20,43,107,356,362,126,595,92,359,7,422,319,493,16,858,5,392,246,414,683,60,0,35,945,441,21,259,819,49,97,279,173 },
+{ 25,42,6,77,33,102,0,122,4,690,29,483,210,27,21,19,2,300,18,648,680,119,117,59,1,10,342,12,26,153,91,684 },
+{ 31,44,299,116,125,242,456,599,22,393,0,144,492,28,268,1,9,963,301,105,367,36,127,170,384,434,206,98,918,10,13,93 },
+{ 410,521,686,367,662,88,335,321,201,96,98,772,144,1,934,921,443,435,284,274,264,551,120,897,44,100,33,225,744,418,909,960 },
+{ 142,178,878,234,132,786,195,202,77,416,147,929,146,522,167,259,687,639,450,271,626,481,590,5,198,212,771,49,0,465,315,427 },
+{ 254,39,131,9,272,0,578,716,310,224,30,49,105,827,518,829,166,333,616,228,613,846,101,219,1,31,890,98,159,938,252,100 },
+{ 230,699,854,473,450,351,831,137,855,217,352,704,800,202,251,498,160,144,206,203,317,201,253,752,418,141,1,332,82,180,443,36 },
+{ 403,202,475,453,494,23,51,77,318,402,13,33,128,102,137,141,120,342,269,0,450,4,899,576,40,421,275,117,217,177,196,64 },
+{ 23,44,98,182,291,144,116,39,110,141,96,82,905,70,367,264,125,93,77,411,120,1,658,202,100,415,107,363,197,30,447,105 },
+{ 15,515,1,13,700,23,77,120,0,753,51,180,202,141,260,5,21,115,2,137,128,9,450,197,365,269,12,326,110,102,318,7 },
+{ 0,32,18,95,207,577,193,29,61,104,64,784,715,102,693,887,81,91,583,671,403,5,52,474,397,180,138,49,37,344,38,263 },
+{ 0,101,9,68,252,16,100,39,166,364,124,24,154,265,212,88,18,35,329,419,28,118,71,30,65,158,191,55,1,694,21,676 },
+{ 16,24,191,18,35,71,167,118,149,68,212,9,0,1,21,108,101,92,60,375,302,7,589,755,124,674,350,48,562,246,13,363 },
+{ 2,14,69,24,1,67,46,16,38,103,29,267,7,35,189,135,278,71,108,18,83,309,52,6,149,388,75,236,60,0,150,66 },
+{ 13,1,23,0,4,2,51,15,180,33,3,115,5,515,141,10,77,700,11,9,197,341,202,165,217,102,22,7,753,317,365,6 },
+{ 447,1,698,411,31,363,98,5,919,4,156,125,759,691,13,64,459,354,44,21,48,293,30,914,478,225,82,120,2,922,848,839 },
+{ 854,82,351,217,141,180,352,15,515,752,1,700,317,898,753,244,10,21,922,115,77,36,4,260,64,110,372,13,5,365,120,11 },
+{ 0,1,4,13,5,2,82,33,3,120,10,23,9,11,77,260,21,102,8,31,40,6,351,51,64,450,22,117,93,110,7,457 },
+{ 51,13,403,23,12,475,1,2,21,5,453,523,115,202,817,7,0,99,3,6,450,120,494,64,22,95,49,899,10,37,32,141 },
+{ 180,752,352,141,498,864,317,217,9,0,115,237,230,39,30,197,83,1,930,64,35,365,372,13,579,88,702,36,101,901,482,21 },
+{ 16,24,0,7,22,18,28,35,14,158,71,2,109,60,1,168,49,154,124,68,10,55,92,118,159,9,5,747,95,105,65,6 },
+{ 15,515,700,753,0,23,1,13,341,51,120,77,141,4,137,33,260,82,202,9,180,5,351,2,291,10,11,901,21,115,40,128 },
+{ 24,68,35,149,18,16,0,7,9,14,189,108,69,65,67,1,71,2,118,28,140,101,114,336,230,124,175,133,46,55,251,154 },
+{ 13,308,197,115,125,9,165,237,391,23,509,569,546,28,49,629,22,338,317,254,749,180,468,159,903,386,217,352,558,39,36,734 },
+{ 64,90,32,217,77,4,141,216,172,312,128,13,86,33,597,147,352,95,5,115,875,22,59,11,102,14,182,437,97,177,874,707 },
+{ 1,22,36,0,105,28,2,67,95,49,55,5,239,12,86,9,83,170,312,64,31,21,23,10,164,50,114,159,208,13,7,320 },
+{ 9,18,16,0,159,105,101,252,49,310,24,68,35,39,265,272,7,1,60,28,455,890,329,557,118,286,55,137,327,167,5,13 },
+{ 129,123,214,249,618,17,5,257,205,184,460,76,2,162,769,245,90,106,128,45,119,1,183,4,3,12,179,64,6,229,99,209 },
+{ 51,453,719,457,23,13,730,899,475,386,4,15,11,12,670,196,515,523,961,401,153,3,700,99,753,117,403,32,120,165,57,0 },
+{ 173,66,192,204,20,74,104,636,7,43,289,426,825,712,560,214,81,750,65,97,707,0,90,414,64,348,32,500,22,861,95,6 },
+{ 13,23,1,51,5,21,141,120,202,15,165,2,515,180,12,125,0,64,82,700,197,269,32,48,260,128,115,93,9,137,33,753 },
+{ 200,34,322,78,472,390,27,714,19,14,136,161,453,176,236,444,59,3,62,128,108,57,283,862,73,53,47,17,412,813,4,56 },
+{ 33,347,66,204,426,498,56,172,97,95,5,681,546,22,10,0,135,180,4,241,19,174,6,353,263,21,7,370,42,197,27,808 },
+{ 131,224,219,187,385,371,258,442,254,737,31,98,836,127,924,944,44,871,908,716,39,827,201,574,116,137,36,1,276,242,578,616 },
+{ 1,15,2,180,4,141,13,515,0,5,3,115,700,901,341,23,217,352,753,51,197,77,317,33,365,752,165,21,6,7,269,93 },
+{ 370,91,718,74,81,510,397,66,636,240,355,84,138,511,18,278,6,681,701,289,90,5,214,582,64,104,0,643,192,65,750,32 },
+{ 15,515,700,753,13,120,1,0,165,2,197,23,260,180,4,82,51,386,8,5,12,10,3,141,351,341,326,9,450,7,64,6 },
+{ 32,2,76,5,1,292,72,45,476,214,21,241,29,14,17,48,129,90,179,460,464,123,290,148,519,205,3,263,249,38,710,89 },
+{ 13,165,115,17,197,569,23,509,457,45,32,41,106,180,62,38,659,734,155,536,341,629,961,873,587,54,431,37,391,99,405,428 },
+{ 68,24,35,16,0,101,9,124,154,71,149,65,18,175,28,118,7,55,302,108,92,14,22,346,1,39,429,252,375,364,10,67 },
+{ 0,9,1,68,230,65,35,69,83,23,101,13,141,67,217,352,21,39,16,88,28,124,212,100,115,154,51,64,30,36,10,317 },
+{ 100,265,88,909,410,382,812,593,1,213,321,30,252,230,352,264,9,166,689,39,676,98,21,466,724,639,478,217,13,48,553,101 },
+{ 113,61,198,904,43,0,5,37,899,325,20,59,33,523,204,725,817,389,470,329,222,40,174,58,22,453,690,848,122,104,788,105 },
+{ 16,24,141,18,7,0,71,140,35,269,75,352,12,9,108,217,5,330,60,64,199,70,22,13,486,246,318,133,65,50,23,498 },
+{ 271,167,121,60,18,191,146,199,16,1,446,132,575,212,463,354,126,35,598,566,727,98,107,21,608,955,640,407,5,24,223,68 },
+{ 15,515,115,217,700,13,317,753,141,180,33,23,110,120,4,341,82,10,1,260,365,36,64,854,351,21,51,352,137,77,40,0 },
+{ 173,7,97,356,43,107,20,387,729,104,426,232,560,595,359,392,414,707,885,81,5,0,66,858,612,49,861,14,22,32,819,230 },
+{ 15,515,700,753,13,23,51,82,0,33,165,120,196,4,1,2,197,453,260,351,180,12,40,8,386,110,5,326,9,141,217,457 },
+{ 9,100,120,30,77,795,137,82,202,39,264,827,578,127,0,166,373,318,18,326,141,260,1,450,731,31,33,395,217,291,341,254 },
+{ 14,2,67,1,24,69,0,28,65,7,46,18,114,108,36,83,38,398,9,16,124,133,103,154,50,55,22,267,29,160,35,547 },
+{ 14,7,69,24,66,16,2,267,189,67,71,150,140,97,18,60,172,35,6,1,38,149,388,92,83,135,108,74,462,380,29,36 },
+{ 15,515,700,753,1,13,2,0,4,341,5,3,23,365,11,117,180,10,12,33,134,115,77,197,217,165,6,7,317,102,21,9 },
+{ 1,22,0,12,5,2,36,28,21,10,86,13,23,49,128,9,95,51,55,96,208,141,48,202,4,137,37,64,105,3,50,7 },
+{ 605,630,63,123,736,650,65,108,444,368,561,389,19,25,42,619,122,194,183,27,53,33,84,26,297,813,114,73,256,235,249,216 },
+{ 127,39,9,0,31,371,98,254,1,385,395,44,30,836,187,131,100,116,284,578,299,166,28,21,737,16,276,272,23,49,137,935 },
+{ 2,61,6,29,45,151,1,655,7,207,32,5,112,425,17,76,833,4,14,64,58,106,119,25,113,128,72,52,70,21,292,790 },
+{ 2,5,17,14,3,29,23,27,13,401,46,6,51,58,1,453,45,53,34,52,133,19,236,26,181,114,99,366,151,108,218,38 },
+{ 31,22,170,1,301,44,127,98,36,28,55,105,239,0,338,116,512,299,293,125,86,10,242,395,2,13,9,64,841,23,95,685 },
+{ 1,21,2,14,5,32,48,50,38,270,46,76,290,72,45,54,17,0,155,221,263,207,37,281,430,3,89,12,181,408,36,67 },
+{ 17,106,119,378,84,240,62,80,383,136,306,3,56,790,742,5,207,504,64,440,32,128,45,2,123,209,14,4,61,57,297,667 },
+{ 13,15,1,515,23,2,0,700,4,115,3,51,5,10,753,180,33,341,217,11,165,317,365,197,6,77,40,64,22,9,7,117 },
+{ 772,335,96,744,1,367,662,686,652,897,303,264,521,31,225,410,141,520,260,116,64,44,321,98,144,88,919,966,340,269,349,284 },
+{ 1,0,5,28,36,2,12,22,83,67,65,50,24,14,9,96,21,218,18,114,48,281,54,10,7,160,181,103,37,23,133,99 },
+{ 25,6,145,42,138,81,174,348,525,544,26,74,85,280,287,648,746,91,66,0,29,396,204,64,636,90,122,194,355,104,65,233 },
+{ 16,24,0,18,28,158,7,35,49,22,68,159,55,1,14,109,105,2,9,71,65,154,124,95,424,344,60,239,118,577,21,10 },
+{ 352,854,699,230,93,689,137,144,217,160,251,36,669,202,351,120,617,855,752,203,332,82,450,180,141,748,831,30,258,201,1,816 },
+{ 15,13,515,700,1,2,0,753,23,5,4,3,51,33,10,115,11,317,217,77,180,341,117,165,6,134,197,153,64,9,102,7 },
+{ 104,20,43,173,66,319,0,77,202,7,198,5,97,580,355,74,2,204,174,52,712,234,426,155,102,192,32,4,500,337,226,904 },
+{ 13,23,15,51,1,515,0,2,5,700,141,4,753,165,115,12,3,21,457,10,180,269,32,6,197,202,9,7,120,11,77,33 },
+{ 15,515,700,753,0,1,13,2,23,115,4,317,8,3,5,51,9,341,10,217,22,365,33,457,6,180,77,901,197,120,18,7 },
+{ 15,515,700,753,1,0,2,3,4,13,5,141,23,16,82,217,457,10,365,180,9,317,51,21,269,898,64,202,11,12,318,341 },
+{ 2,5,1,14,50,38,29,17,114,46,133,3,45,21,58,171,181,36,218,12,6,52,0,48,137,65,361,23,155,4,285,51 },
+{ 0,68,9,65,101,124,35,212,16,149,154,100,24,1,114,336,67,589,252,39,71,189,69,562,18,13,30,398,118,88,265,264 },
+{ 1,2,14,22,0,7,67,65,28,36,24,46,168,5,86,69,38,16,49,12,289,10,194,50,83,114,95,6,18,23,55,158 },
+{ 13,23,15,51,515,0,700,4,1,753,2,10,115,961,457,12,33,11,3,5,197,9,165,77,102,403,453,40,64,22,37,59 },
+{ 15,515,700,753,0,1,2,13,23,5,51,901,8,9,180,3,7,82,4,120,12,10,719,341,6,31,141,457,197,22,115,93 },
+{ 100,252,88,101,0,265,9,724,48,1,21,352,213,676,410,382,321,230,30,329,593,909,39,812,553,217,23,689,520,264,166,419 },
+{ 2,5,1,58,171,14,46,50,29,52,45,38,186,155,67,54,151,281,334,61,48,96,17,181,103,400,502,227,21,223,12,69 },
+{ 23,120,13,1,202,141,51,21,165,128,260,15,5,269,137,64,33,180,82,318,93,197,77,326,515,125,110,700,450,2,32,48 },
+{ 341,197,10,901,13,15,8,638,569,515,479,23,180,873,700,165,143,642,0,961,753,951,1,115,509,499,116,12,498,242,82,206 },
+{ 15,1,23,13,515,21,120,51,2,141,202,700,5,180,165,0,753,197,12,7,33,260,352,137,269,4,82,128,48,9,110,6 },
+{ 2,29,50,58,1,6,5,52,14,262,17,46,27,53,151,34,171,74,324,26,38,309,45,113,19,96,287,396,223,67,73,583 },
+{ 13,23,141,51,4,202,0,115,77,2,33,217,5,317,180,64,10,269,3,9,15,21,1,128,102,137,318,11,352,515,22,31 },
+{ 1,67,0,24,50,5,14,18,16,69,2,9,103,35,83,12,96,28,54,7,58,223,21,46,281,48,65,181,22,38,36,108 },
+{ 13,23,141,51,77,64,202,115,33,102,128,4,0,269,10,21,217,32,180,318,9,137,2,11,22,291,7,177,16,31,165,197 },
+{ 317,115,180,365,873,498,217,341,13,752,482,197,569,352,1,901,36,23,457,468,165,346,546,143,509,134,579,876,868,2,332,21 },
+{ 184,257,205,229,152,17,57,497,266,432,452,524,5,619,381,32,4,90,2,12,313,128,45,59,245,106,3,471,129,769,339,214 },
+{ 13,23,1,0,15,2,4,515,51,3,10,33,5,700,115,180,753,77,11,365,341,217,9,6,197,7,102,165,317,40,22,64 },
+{ 626,70,771,687,379,846,767,761,518,878,82,481,31,786,49,591,178,163,407,44,87,13,845,125,590,371,195,120,98,557,937,351 },
+{ 264,1,410,909,772,897,686,521,335,478,98,96,691,639,100,44,284,382,31,321,744,88,914,724,662,765,223,9,682,363,0,367 },
+{ 13,23,1,2,0,15,51,515,5,10,4,33,115,77,180,700,3,141,217,40,6,753,317,197,64,165,7,11,102,9,341,22 },
+{ 141,77,13,64,269,23,115,21,318,217,5,202,102,33,137,2,15,291,177,51,48,180,32,4,515,352,128,7,0,10,96,11 },
+{ 13,15,23,515,51,0,700,753,1,2,4,10,33,11,961,453,115,40,457,14,12,3,9,5,165,401,197,77,22,21,64,102 },
+{ 1,22,0,36,2,31,5,12,13,105,28,9,49,86,141,21,23,95,128,55,44,115,170,10,164,98,180,4,137,239,83,51 },
+{ 100,101,88,0,252,9,265,30,21,39,759,724,213,329,321,13,419,68,562,382,676,352,694,35,553,410,1,166,909,593,230,23 },
+{ 539,88,30,190,321,530,840,144,669,435,957,748,778,100,96,418,203,213,1,131,410,228,466,274,36,382,219,863,613,83,822,352 },
+{ 7,97,92,173,298,107,43,314,232,140,16,356,20,387,729,362,126,359,246,14,230,501,426,441,0,5,560,66,104,779,35,60 },
+{ 2,14,7,1,58,5,46,16,38,70,75,45,24,155,29,0,21,52,61,163,220,50,69,270,35,48,32,171,18,6,64,54 },
+{ 447,411,363,664,647,98,621,1,354,271,223,478,18,777,781,936,360,759,167,132,121,48,21,156,9,195,118,293,23,691,13,264 },
+{ 2,1,14,65,36,67,0,7,46,22,69,5,38,24,28,6,83,29,86,114,168,50,124,208,12,18,108,10,194,484,103,16 },
+{ 421,386,51,791,730,958,165,801,23,453,697,403,615,13,221,523,24,899,401,326,551,670,576,102,18,33,125,77,566,115,203,197 },
+{ 104,319,422,945,0,81,20,43,715,32,784,693,879,7,397,74,306,207,52,681,671,2,61,173,6,636,904,95,887,5,18,192 },
+{ 2,29,1,46,14,52,70,262,6,26,50,67,75,96,309,38,103,112,58,19,5,163,145,83,74,220,223,357,24,69,331,25 },
+{ 786,929,590,771,687,626,941,178,465,259,70,5,13,21,35,534,107,518,132,49,878,48,146,121,379,279,31,767,147,195,108,125 },
+{ 5,2,186,45,17,29,48,50,14,61,46,155,400,1,227,171,52,58,38,54,430,209,80,281,3,106,536,311,181,243,21,502 },
+{ 0,32,64,95,817,494,342,403,207,202,194,389,453,365,312,180,316,5,690,237,848,577,450,61,102,523,475,289,49,241,65,482 },
+{ 23,13,51,15,202,515,1,0,2,12,115,4,700,5,165,753,77,457,21,141,128,10,3,64,403,32,197,318,9,11,33,117 },
+{ 119,2,6,76,5,17,45,292,306,240,32,1,19,84,64,61,4,209,710,80,26,0,106,27,214,25,128,129,29,179,3,113 },
+{ 15,515,700,1,753,0,13,23,180,120,51,2,5,33,165,197,9,450,7,260,115,523,4,12,202,141,82,77,21,102,8,6 },
+{ 6,138,74,280,222,85,66,226,25,42,87,204,64,337,29,135,95,174,235,26,145,65,19,32,792,294,112,52,256,2,5,22 },
+{ 39,9,0,101,333,158,49,252,310,254,272,68,16,18,159,286,344,455,30,109,627,327,24,105,419,100,364,22,35,1,329,709 },
+{ 21,5,32,14,2,1,38,72,76,54,17,3,48,221,270,0,45,46,12,181,37,89,36,50,540,290,430,10,4,741,99,23 },
+{ 120,137,202,269,141,260,318,450,922,494,77,291,82,15,5,351,128,1,515,326,64,854,700,352,342,21,753,678,349,32,523,90 },
+{ 16,24,68,35,71,18,149,118,191,167,9,0,212,124,65,246,7,67,140,189,399,101,133,60,1,108,267,114,69,92,695,154 },
+{ 28,0,9,67,1,22,109,36,55,65,194,114,39,83,49,69,2,35,103,50,158,208,86,420,168,289,505,24,7,185,5,323 },
+{ 147,325,198,427,142,178,202,798,5,376,20,318,259,43,120,450,77,234,534,904,470,465,878,725,329,14,315,0,260,858,70,61 },
+{ 28,0,65,9,109,1,55,67,35,22,24,39,289,7,175,14,114,2,158,124,420,194,68,16,336,36,49,69,168,570,154,505 },
+{ 141,180,13,115,1,23,4,269,2,202,0,317,217,51,15,5,21,352,77,318,3,752,197,10,165,365,137,341,9,515,33,64 },
+{ 15,515,700,0,753,1,2,13,23,9,51,5,4,901,33,7,8,3,12,93,180,120,197,6,82,341,10,141,22,260,457,115 },
+{ 6,74,66,1,25,75,324,380,278,26,138,85,135,500,87,42,220,841,97,350,29,19,70,226,38,21,52,606,235,889,2,14 },
+{ 13,23,51,12,4,15,453,0,457,1,403,165,115,3,11,2,64,5,401,10,515,37,202,33,40,32,99,475,197,700,308,17 },
+{ 788,180,5,83,693,319,314,4,32,21,17,11,817,3,510,498,33,12,24,104,814,120,64,117,306,804,523,450,288,160,102,43 },
+{ 15,1,515,13,2,700,23,0,4,753,3,341,5,51,33,11,10,6,77,7,115,102,180,165,141,9,197,217,901,40,12,64 },
+{ 101,18,9,167,520,16,0,118,60,212,604,364,694,24,55,252,68,917,264,35,1,121,146,363,39,100,806,5,21,166,191,28 },
+{ 13,23,0,33,51,141,77,4,64,2,115,217,9,102,7,202,21,10,180,3,15,128,5,269,6,32,11,16,165,352,22,317 },
+{ 66,135,97,74,172,6,278,7,204,324,138,174,29,85,2,87,25,140,92,192,52,38,802,69,448,500,808,620,22,1,280,232 },
+{ 2,24,69,6,97,7,1,0,14,298,423,66,67,29,150,25,189,267,124,74,607,18,36,81,172,33,83,38,52,273,71,809 },
+{ 0,9,158,39,68,49,109,16,24,333,35,344,101,22,159,254,272,30,124,65,28,18,793,154,310,252,327,105,627,419,286,55 },
+{ 15,515,700,10,753,33,77,180,4,341,1,197,13,115,365,23,901,317,5,102,11,217,165,117,141,40,2,3,253,21,134,55 },
+{ 100,166,382,478,265,264,88,39,98,1,404,274,9,593,724,921,639,438,363,682,411,31,30,812,96,447,821,905,252,0,223,435 },
+{ 19,283,436,53,297,26,813,432,27,128,42,25,390,503,122,736,73,123,605,63,389,529,630,250,690,65,381,444,6,269,108,216 },
+{ 33,202,13,128,494,0,51,141,269,1,4,2,102,180,15,137,65,95,6,450,77,40,117,59,457,36,196,817,134,86,49,515 },
+{ 18,16,60,68,101,167,191,118,35,121,9,212,55,0,126,1,24,647,199,146,520,107,628,621,363,71,21,28,346,92,806,727 },
+{ 352,230,217,531,160,93,36,669,748,854,689,258,137,871,728,699,752,251,574,202,373,351,228,120,717,260,144,219,268,82,816,1 },
+{ 15,515,700,753,13,23,0,51,8,1,4,82,165,77,110,33,10,180,5,202,11,22,120,12,9,197,115,93,403,141,40,351 },
+{ 0,9,101,217,35,88,352,100,39,175,30,68,562,752,13,317,252,115,180,197,64,1,83,141,65,213,165,230,194,36,28,265 },
+{ 32,64,5,470,288,90,21,147,0,95,356,22,20,519,835,312,819,18,247,182,11,97,13,4,387,49,43,298,316,48,107,7 },
+{ 16,14,24,1,7,2,35,0,5,50,18,69,46,12,58,75,9,67,70,163,21,54,38,48,223,502,281,37,140,60,28,10 },
+{ 18,265,9,252,39,195,354,411,1,16,132,101,121,682,167,203,5,363,146,593,35,333,21,271,60,13,100,0,156,327,7,520 },
+{ 4,13,1,115,141,23,2,180,5,0,51,3,217,202,77,15,33,269,341,318,317,21,165,515,11,10,197,365,9,137,64,352 },
+{ 7,24,16,14,71,35,18,92,140,189,108,149,68,60,69,150,2,230,97,66,0,458,67,1,65,251,38,314,388,267,36,46 },
+{ 1,22,31,36,0,2,44,5,141,105,180,170,12,64,13,98,86,55,23,21,28,164,115,127,10,125,128,4,9,239,352,197 },
+{ 66,7,97,2,192,20,52,43,135,74,560,107,104,0,750,147,414,29,580,173,324,376,226,194,77,174,204,38,356,64,16,470 },
+{ 0,101,9,68,35,124,24,65,39,16,252,100,154,166,28,364,149,694,30,88,55,346,1,419,71,439,265,289,22,21,175,158 },
+{ 39,9,100,30,127,0,737,856,31,836,827,254,98,931,166,88,93,1,44,190,131,228,120,395,625,385,863,264,219,373,110,28 },
+{ 5,17,2,3,21,45,14,155,48,32,38,1,328,181,186,46,23,51,12,61,227,29,106,54,99,133,62,832,13,37,514,543 },
+{ 131,613,30,224,228,716,274,100,827,406,219,856,39,190,31,88,1,166,9,44,829,863,931,93,0,187,625,924,127,98,137,254 },
+{ 352,217,64,141,752,269,180,864,437,372,954,115,498,177,77,349,317,318,579,291,947,197,247,0,23,717,237,304,128,457,776,678 },
+{ 141,217,180,317,352,115,15,341,1,23,13,365,515,752,2,64,0,5,498,700,372,165,51,237,753,77,244,197,137,4,21,253 },
+{ 5,2,17,14,1,45,3,38,21,29,181,58,46,48,50,133,114,171,61,155,32,6,186,281,361,12,36,54,4,13,52,514 },
+{ 6,2,119,25,790,4,45,483,655,113,1,29,76,26,32,19,887,17,128,0,292,833,59,61,106,64,77,814,14,151,84,42 },
+{ 13,23,2,0,51,4,1,115,141,3,5,180,33,217,77,9,202,11,7,15,10,6,317,64,21,197,515,165,102,128,22,269 },
+{ 13,23,51,202,21,5,1,15,141,165,120,2,115,12,32,0,515,128,318,64,125,700,4,403,197,453,180,457,3,7,10,6 },
+{ 9,39,31,30,0,127,1,44,100,131,98,187,385,276,88,442,219,908,254,116,49,166,935,28,201,36,141,827,137,299,284,21 },
+{ 22,49,28,109,9,185,105,95,1,131,159,272,36,67,86,254,39,55,35,0,505,31,83,169,208,327,286,98,168,535,312,708 },
+{ 5,1,2,21,0,12,48,22,10,14,36,3,32,17,23,54,86,38,4,51,13,37,137,50,65,281,114,45,28,99,58,202 },
+{ 141,269,352,82,217,351,180,854,372,922,752,1,15,260,317,318,515,202,64,700,120,349,954,753,77,35,67,717,898,137,365,115 },
+{ 144,203,613,418,326,406,96,669,137,679,1,228,494,822,840,317,36,83,855,160,817,859,856,816,217,831,345,93,876,77,44,251 },
+{ 15,13,515,23,700,753,51,1,33,0,202,21,2,5,180,141,120,165,217,82,12,117,4,352,269,197,115,32,3,9,134,260 },
+{ 13,23,115,1,0,51,4,77,2,33,15,141,10,5,341,180,515,3,217,202,9,365,317,64,700,102,11,165,197,22,753,7 },
+{ 531,943,373,160,728,93,206,260,261,559,964,269,717,535,332,384,365,295,110,533,141,10,180,352,244,137,120,55,959,564,36,253 },
+{ 1,31,36,170,22,55,44,10,86,64,127,0,2,98,301,164,740,338,237,143,5,125,116,13,242,141,299,180,23,169,105,12 },
+{ 13,23,141,77,51,4,64,32,33,202,115,269,102,128,21,0,177,180,318,90,40,10,7,5,137,15,217,352,9,291,59,22 },
+{ 202,120,260,318,77,15,450,269,1,82,33,23,141,13,51,515,351,128,700,5,64,326,137,21,102,110,753,494,93,523,817,165 },
+{ 523,899,102,33,730,15,23,403,719,117,153,13,515,51,475,4,700,5,453,817,196,753,494,40,202,120,1,2,450,457,17,421 },
+{ 202,403,494,450,120,817,523,475,318,453,33,402,128,77,13,51,260,576,342,102,15,23,515,4,700,5,82,753,326,210,137,615 },
+{ 2,29,112,66,7,52,70,151,58,87,135,5,74,226,307,6,14,186,1,45,549,172,644,25,113,287,46,155,334,64,294,97 },
+{ 1,77,349,291,260,120,652,102,5,39,64,269,9,33,340,342,13,98,888,698,23,296,100,318,51,202,87,137,638,128,50,850 },
+{ 1,13,15,2,0,4,23,515,5,141,180,3,700,341,115,51,753,269,77,901,197,352,217,33,21,11,365,6,165,202,7,317 },
+{ 7,14,16,2,46,5,70,58,1,38,24,35,92,163,0,75,21,18,50,54,140,12,87,220,155,69,171,23,60,9,13,307 },
+{ 2,29,66,226,135,7,87,74,52,278,6,75,222,220,294,70,97,1,145,25,172,262,324,38,69,112,331,92,5,14,140,26 },
+{ 15,515,700,753,33,77,117,4,1,102,134,40,153,11,13,196,217,21,5,51,23,115,32,3,2,202,141,137,128,291,48,177 },
+{ 15,217,82,515,351,141,317,1,13,700,260,77,110,120,115,854,23,753,180,51,21,36,137,922,5,64,365,352,291,202,93,341 },
+{ 31,190,30,373,120,110,863,88,44,127,908,856,260,318,82,98,93,187,836,717,935,39,442,131,141,254,228,219,1,968,77,116 },
+{ 23,13,308,9,165,115,51,21,401,125,49,39,197,391,159,254,217,743,28,438,773,629,558,386,341,95,32,317,876,679,109,166 },
+{ 49,5,43,165,7,0,21,104,125,22,173,422,64,13,623,102,20,18,314,95,91,141,23,31,193,51,391,900,779,558,92,232 },
+{ 1,4,13,2,15,0,23,515,77,3,341,33,5,700,115,51,202,753,141,180,11,10,102,217,6,901,40,7,197,318,317,365 },
+{ 93,843,295,120,36,160,206,261,10,137,567,110,384,141,943,268,201,332,258,55,1,180,64,116,44,144,699,203,282,31,260,373 },
+{ 81,7,192,426,43,173,172,104,879,91,5,712,715,526,6,97,568,95,448,66,33,861,560,32,49,20,0,636,232,825,2,22 },
+{ 475,403,51,453,33,102,13,23,494,202,0,196,15,77,153,18,4,117,515,450,318,22,730,128,700,421,65,753,269,402,134,817 },
+{ 141,269,260,318,202,120,352,349,82,351,1,5,854,137,64,291,15,922,180,851,32,77,515,372,21,700,7,217,13,947,33,753 },
+{ 15,515,700,753,1,13,0,2,4,23,3,5,180,115,197,12,51,165,217,10,961,9,6,141,352,21,8,7,33,77,457,120 },
+{ 39,166,9,30,0,101,274,404,252,333,190,100,158,438,310,88,68,265,656,21,1,530,329,344,49,539,625,254,13,131,48,419 },
+{ 0,1,28,9,22,12,65,83,67,36,5,2,50,55,96,109,16,13,24,23,21,238,49,18,285,160,128,39,69,114,7,323 },
+{ 15,515,700,1,0,753,23,2,13,51,5,180,115,6,3,9,197,12,457,120,7,165,901,82,4,21,8,141,31,33,719,341 },
+{ 9,39,30,0,28,166,22,49,180,1,352,35,317,158,88,141,498,131,115,345,752,128,228,217,100,83,219,930,13,251,365,36 },
+{ 5,61,45,2,80,29,311,209,6,17,58,1,151,106,454,667,243,70,52,496,287,592,255,738,64,74,483,14,27,32,112,19 },
+{ 31,125,22,44,299,456,685,242,599,116,170,28,0,1,492,393,506,144,558,10,268,301,239,23,13,36,963,367,55,206,105,95 },
+{ 187,258,926,574,839,93,228,860,406,219,871,160,137,531,224,116,120,902,669,201,36,131,44,144,843,533,318,384,442,1,434,268 },
+{ 7,107,75,16,87,9,64,177,24,18,291,77,349,141,60,232,23,0,51,269,132,14,5,21,70,32,678,112,126,121,71,947 },
+{ 15,515,700,753,1,0,2,13,5,3,23,180,4,115,901,51,6,8,961,9,7,10,12,82,197,22,141,341,33,120,365,457 },
+{ 13,23,51,1,5,202,2,12,15,21,165,141,0,115,3,4,32,515,197,10,180,318,128,120,64,700,6,7,403,269,457,137 },
+{ 1,2,0,77,64,3,141,13,33,15,23,10,6,102,5,515,180,4,117,7,700,165,11,217,269,40,753,115,128,17,197,134 },
+{ 345,531,332,269,260,317,717,752,373,351,180,352,728,82,10,365,160,533,217,143,498,251,244,93,341,901,36,1,141,898,55,864 },
+{ 16,7,33,189,92,77,388,60,140,35,102,24,14,1,230,21,150,117,733,314,18,915,71,13,108,134,5,64,69,2,98,22 },
+{ 142,202,234,178,5,786,77,49,70,0,416,450,639,878,1,48,21,929,147,259,315,455,198,120,12,481,163,113,846,329,318,22 },
+{ 81,715,192,0,173,712,681,104,636,91,74,20,750,370,7,718,95,879,22,43,825,560,422,64,207,49,172,18,397,10,426,319 },
+{ 13,23,15,51,515,0,700,753,4,1,961,2,10,115,457,11,33,453,3,5,9,40,12,197,165,77,401,475,64,102,22,569 },
+{ 64,297,5,445,95,61,250,311,80,34,17,312,45,2,86,472,58,14,180,53,22,151,869,738,247,237,29,1,128,165,21,288 },
+{ 16,24,18,71,7,35,118,92,14,154,60,68,0,149,28,302,124,150,55,175,2,9,97,1,429,20,108,273,22,65,43,126 },
+{ 195,360,156,771,132,163,626,687,591,371,883,146,121,846,70,586,379,13,293,98,407,48,761,296,354,18,31,1,55,49,21,105 },
+{ 202,13,77,23,318,33,51,0,4,141,5,21,217,32,291,102,64,128,15,10,9,494,269,137,515,403,1,31,117,700,120,317 },
+{ 7,192,97,81,172,66,426,173,43,715,712,232,861,879,104,330,568,298,74,893,885,526,387,825,92,140,91,14,636,6,5,448 },
+{ 2,1,14,6,67,7,65,69,24,36,66,124,108,83,38,29,22,86,0,18,484,5,28,46,12,10,25,302,150,16,650,74 },
+{ 33,77,13,202,102,4,0,23,128,51,141,64,318,22,403,269,137,10,15,40,494,117,32,59,11,153,1,21,177,196,515,115 },
+{ 269,141,318,77,349,291,217,202,33,15,372,304,515,22,102,177,351,700,352,120,5,137,10,317,260,753,64,851,854,403,49,21 },
+{ 6,74,66,85,138,25,87,42,135,26,226,222,280,29,75,500,220,278,792,70,19,2,1,294,204,64,32,145,853,112,52,174 },
+{ 9,0,105,39,16,18,1,101,272,31,127,98,24,518,333,252,310,28,68,737,846,371,158,916,938,49,30,7,286,35,301,455 },
+{ 2,1,14,67,24,46,83,108,69,29,38,103,114,36,6,133,18,0,28,7,65,52,236,75,50,398,5,309,135,16,278,160 },
+{ 22,1,28,105,49,95,0,2,67,55,36,239,168,159,65,35,14,170,320,164,9,7,10,5,114,12,83,64,194,109,24,301 },
+{ 7,66,172,97,92,140,232,568,298,14,192,314,16,380,135,324,2,330,74,38,357,448,126,69,35,5,107,6,387,60,204,572 },
+{ 2,29,1,14,5,6,46,133,114,50,52,26,218,108,19,13,366,236,27,45,70,17,58,23,86,51,137,65,112,38,25,12 },
+{ 6,1,74,25,2,26,29,66,42,19,75,14,388,67,108,70,52,85,103,65,38,138,357,133,114,594,324,516,603,96,309,69 },
+{ 22,9,28,1,36,49,109,105,86,95,131,31,169,39,0,141,272,159,44,55,98,180,13,30,185,115,83,128,352,137,64,208 },
+{ 203,822,326,23,77,859,403,494,576,39,473,182,33,1,691,100,18,217,13,817,411,447,363,102,93,966,96,478,291,704,310,120 },
+{ 15,515,700,753,33,77,117,4,102,134,115,153,13,1,40,217,11,196,341,2,5,3,23,317,365,0,21,291,32,51,12,569 },
+{ 15,515,700,753,13,0,23,1,8,82,51,165,197,120,180,2,9,33,4,110,5,12,10,260,351,386,141,7,457,475,93,901 },
+{ 1,23,13,15,51,0,21,2,515,5,141,180,120,165,700,202,197,4,753,12,33,9,7,82,115,93,3,352,260,6,110,48 },
+{ 15,515,700,753,0,1,2,5,3,4,8,13,180,341,10,23,7,6,9,51,77,197,961,115,165,82,120,31,22,202,457,217 },
+{ 559,661,922,564,141,533,10,317,373,110,143,269,244,260,332,261,93,642,752,295,351,876,531,843,180,206,728,384,352,1,434,120 },
+{ 15,515,1,0,700,2,13,23,753,5,51,180,3,165,12,6,197,115,4,9,7,21,719,8,457,82,141,120,33,22,901,10 },
+{ 198,234,0,325,5,77,202,416,20,147,32,43,639,315,49,61,450,455,142,21,113,230,22,318,725,342,207,13,95,904,494,10 },
+{ 447,264,363,9,411,676,682,1,156,664,821,478,166,354,812,39,100,905,382,897,98,18,759,404,31,101,724,5,265,223,88,13 },
+{ 24,14,69,16,35,18,2,7,108,189,71,67,267,149,1,46,68,83,38,140,0,236,251,9,388,60,133,103,65,28,29,50 },
+{ 16,7,35,20,14,18,109,2,43,120,107,60,1,121,326,907,553,77,13,147,23,82,68,260,0,403,5,24,202,126,265,199 },
+{ 30,131,187,276,31,44,613,442,39,9,190,228,1,839,116,935,908,219,127,88,244,224,110,137,93,201,98,141,36,567,0,856 },
+{ 98,223,1,393,812,265,100,421,593,834,697,48,51,410,791,382,21,88,31,284,9,125,96,293,230,23,213,217,656,689,541,5 },
+{ 98,51,127,219,616,258,105,293,395,421,924,512,31,308,23,201,116,44,301,272,763,276,125,13,453,170,401,295,261,944,115,567 },
+{ 253,110,951,352,811,206,332,180,141,244,282,10,854,417,642,638,559,752,143,911,260,55,93,533,499,498,661,120,351,959,564,341 },
+{ 49,9,159,254,272,158,0,131,28,39,627,105,327,286,22,518,688,578,68,347,374,101,224,424,95,35,219,24,16,364,65,344 },
+{ 105,22,131,272,98,286,327,109,374,239,28,95,320,219,9,224,55,127,187,36,578,169,64,185,538,1,159,10,371,634,49,616 },
+{ 691,478,340,1,658,914,724,363,744,698,156,772,411,296,682,447,9,284,335,98,264,303,909,21,354,410,225,13,664,686,88,919 },
+{ 16,24,35,18,71,7,140,108,189,267,92,60,14,230,68,69,9,1,149,46,246,191,388,167,2,0,118,236,133,21,674,5 },
+{ 9,0,127,31,98,371,395,39,737,49,1,44,385,272,512,28,293,242,836,761,254,299,101,16,187,22,116,158,159,131,18,21 },
+{ 16,18,68,0,60,35,9,101,252,28,118,24,419,55,7,109,604,71,39,121,22,364,14,158,191,167,925,126,329,21,92,49 },
+{ 116,268,203,93,206,692,551,31,417,940,499,8,473,44,202,523,959,0,120,137,559,22,450,403,576,10,728,299,13,326,51,1 },
+{ 225,459,744,1,919,914,691,330,622,21,141,223,5,284,934,335,88,538,340,82,385,839,363,120,478,98,48,30,64,32,686,166 },
+{ 5,1,2,0,14,36,21,281,12,48,50,67,22,28,54,83,24,218,38,10,181,9,32,18,65,58,45,114,430,17,99,37 },
+{ 137,450,202,704,120,260,326,318,968,269,851,403,291,77,23,141,182,310,494,373,351,457,82,890,349,110,60,128,817,678,105,96 },
+{ 15,515,700,753,4,33,13,23,77,5,40,11,102,93,1,21,110,51,82,117,141,2,10,8,32,64,120,31,202,3,217,115 },
+{ 15,13,1,23,515,0,51,2,700,5,753,21,180,141,165,3,12,115,197,4,7,6,457,9,352,202,33,8,719,120,77,341 },
+{ 219,127,258,98,276,201,131,395,944,293,116,284,567,31,242,105,137,935,295,44,403,860,51,224,576,456,9,371,578,475,202,512 },
+{ 16,7,18,35,60,0,14,20,118,28,68,22,2,24,1,92,158,107,5,49,154,126,109,12,43,10,55,6,677,71,21,168 },
+{ 93,728,531,160,559,373,574,120,295,860,533,269,717,260,926,902,258,318,36,201,261,434,851,137,617,141,187,352,843,384,332,251 },
+{ 28,0,1,9,22,109,83,39,49,12,36,67,55,5,96,2,128,30,158,69,21,23,160,208,35,13,65,323,50,141,194,238 },
+{ 23,13,51,1,5,15,141,21,0,2,165,515,202,700,12,197,180,120,32,115,4,753,64,9,7,269,6,3,125,386,48,453 },
+{ 5,21,13,49,14,20,7,23,43,32,1,0,652,48,713,22,38,2,16,132,955,107,12,279,24,888,197,640,70,303,18,638 },
+{ 9,0,28,39,1,30,35,101,22,67,83,141,49,175,36,68,55,88,13,251,10,69,23,158,180,115,64,100,217,65,345,166 },
+{ 260,120,82,269,5,450,351,1,202,141,854,13,77,922,32,33,137,4,23,125,291,21,15,515,165,349,177,700,318,326,180,753 },
+{ 121,16,18,35,363,101,60,20,107,14,68,259,621,55,604,43,7,252,9,364,126,0,167,191,5,407,132,28,199,419,146,10 },
+{ 13,23,51,1,15,0,2,141,5,515,12,21,700,115,165,180,4,753,3,197,202,32,9,120,7,8,6,11,37,10,457,269 },
+{ 201,144,206,443,418,203,435,96,335,459,187,1,88,332,330,321,269,934,30,372,822,521,268,326,44,523,382,141,410,264,494,473 },
+{ 31,44,276,201,116,131,284,662,567,144,9,489,98,295,268,434,0,30,137,39,93,1,187,22,219,918,110,299,141,36,224,384 },
+{ 520,478,664,1,264,604,9,167,777,759,411,0,806,724,48,21,101,68,647,936,363,223,118,682,410,18,100,16,252,98,265,13 },
+{ 23,70,21,87,60,75,120,182,163,379,92,18,7,937,71,121,446,132,24,98,931,126,107,77,795,195,115,44,411,146,51,850 },
+{ 39,9,0,737,127,31,846,98,1,827,105,310,371,30,254,100,44,18,395,242,272,101,385,916,836,16,265,131,938,93,166,557 },
+{ 279,20,43,126,107,7,92,16,356,362,60,595,246,359,598,35,0,683,939,653,121,97,125,441,399,392,150,199,48,230,14,649 },
+{ 206,417,93,959,499,728,8,559,120,473,137,141,10,564,31,260,44,450,203,341,253,244,373,116,143,638,268,180,352,110,318,940 },
+{ 259,465,147,132,590,687,534,199,581,146,941,427,107,640,279,178,121,5,195,150,522,955,198,35,786,929,798,142,1,21,325,626 },
+{ 523,15,120,450,202,515,403,51,817,700,13,753,23,457,33,899,128,64,730,102,494,342,115,719,453,196,49,99,318,421,308,5 },
+{ 141,559,10,244,365,564,661,180,253,143,752,110,55,317,533,341,901,93,373,206,535,160,82,922,260,36,531,964,352,332,261,197 },
+{ 219,258,98,127,276,964,943,137,843,535,201,935,131,860,261,295,284,567,206,44,116,31,253,492,203,332,160,615,36,93,55,692 },
+{ 0,319,422,207,945,693,577,887,32,804,95,344,104,904,61,20,5,43,7,725,113,510,306,102,49,263,153,426,33,83,22,9 },
+{ 1,22,0,5,12,2,36,21,28,86,49,105,9,10,23,13,141,95,31,55,128,37,51,4,83,202,3,64,96,7,32,44 },
+{ 15,515,13,700,1,0,753,2,23,3,4,5,51,10,115,197,6,33,12,9,165,7,8,77,11,961,180,269,141,22,120,457 },
+{ 15,1,23,120,77,13,515,51,141,202,700,180,110,137,260,753,326,5,128,102,0,21,2,165,269,33,197,450,318,217,93,115 },
+{ 15,515,1,700,0,753,13,2,23,180,51,5,120,4,9,115,197,12,7,165,21,33,6,82,3,8,523,901,31,141,457,260 },
+{ 16,18,24,7,92,35,60,75,9,13,71,14,0,108,50,21,126,121,1,140,23,5,132,146,2,12,128,10,64,141,70,87 },
+{ 180,341,901,15,515,1,365,700,0,2,197,753,115,4,10,13,752,5,3,8,165,317,141,23,143,873,44,31,569,55,93,6 },
+{ 9,0,175,35,101,28,39,67,68,1,65,83,30,69,364,336,22,114,55,124,194,158,100,289,252,166,64,345,103,36,50,88 },
+{ 64,165,180,197,115,247,217,237,21,13,32,316,22,141,352,72,288,304,95,225,76,391,386,16,468,90,49,35,365,640,372,23 },
+{ 15,515,700,13,753,1,0,2,23,4,5,3,115,51,141,197,12,10,180,961,7,9,21,33,217,6,8,165,457,11,77,341 },
+{ 132,121,199,146,60,279,493,640,407,598,126,195,534,581,955,590,107,5,150,35,522,49,259,16,18,360,156,0,147,362,21,167 },
+{ 0,1,28,9,22,5,36,12,65,24,67,96,2,83,18,50,114,55,21,16,7,10,23,14,13,160,137,51,48,218,103,69 },
+{ 93,120,957,77,30,968,459,110,137,160,613,102,202,352,373,141,31,372,217,330,190,318,269,260,203,44,28,473,228,177,863,704 },
+{ 15,515,700,0,753,1,13,23,2,51,5,9,120,82,4,7,901,197,10,8,260,180,341,12,33,6,3,523,165,102,115,141 },
+{ 206,417,8,141,499,44,244,93,31,10,137,253,559,116,728,144,120,564,269,638,203,352,143,260,341,752,268,717,951,180,160,110 },
+{ 530,254,228,1,96,21,406,39,827,31,669,840,613,829,137,679,166,98,23,51,960,438,131,93,48,224,219,317,310,36,876,190 },
+{ 15,515,700,753,13,457,0,197,719,1,165,82,23,8,120,730,2,10,12,180,134,5,9,141,260,4,351,51,115,3,341,899 },
+{ 0,16,68,9,24,28,18,35,252,109,39,419,124,158,154,55,101,71,22,118,60,7,49,65,333,14,1,10,329,364,677,346 },
+{ 1,15,13,23,515,51,120,0,700,180,2,165,5,753,141,197,21,33,202,102,260,4,9,12,7,326,137,450,115,6,82,110 },
+{ 535,253,352,564,110,365,82,180,341,10,854,533,55,898,244,901,873,141,752,143,642,559,498,317,36,951,115,964,638,282,661,197 },
+{ 31,44,125,338,116,64,242,36,1,10,55,22,456,237,180,13,299,164,506,86,23,165,558,143,0,762,492,479,844,546,93,8 },
+{ 13,23,4,1,202,2,0,51,115,77,141,180,5,15,217,3,33,11,515,317,9,10,102,21,700,341,365,318,269,64,32,128 },
+{ 9,39,0,166,68,101,28,364,30,158,562,35,175,65,333,154,49,404,706,124,21,252,274,168,190,289,100,570,16,1,310,346 },
+{ 15,515,700,753,341,13,0,23,1,33,141,4,260,82,77,51,351,180,9,5,115,137,10,217,11,120,102,40,349,269,202,854 }
diff --git a/libkram/bc7enc/utils.cpp b/libkram/bc7enc/utils.cpp
new file mode 100644
index 00000000..2b3b04d7
--- /dev/null
+++ b/libkram/bc7enc/utils.cpp
@@ -0,0 +1,908 @@
+// File: utils.cpp
+#include "utils.h"
+
+// Don't need the impl yet
+#if 0
+
+#include "lodepng.h"
+#include "miniz.h"
+
+
+namespace utils 
+{
+		
+#define FLOOD_PUSH(y, xl, xr, dy) if (((y + (dy)) >= 0) && ((y + (dy)) < (int)m_height)) { stack.push_back(fill_segment(y, xl, xr, dy)); }
+
+// See http://www.realtimerendering.com/resources/GraphicsGems/gems/SeedFill.c
+uint32_t image_u8::flood_fill(int x, int y, const color_quad_u8& c, const color_quad_u8& b, std::vector<pixel_coord>* pSet_pixels)
+{
+	uint32_t total_set = 0;
+
+	if (!flood_fill_is_inside(x, y, b))
+		return 0;
+
+	std::vector<fill_segment> stack;
+	stack.reserve(64);
+
+	FLOOD_PUSH(y, x, x, 1);
+	FLOOD_PUSH(y + 1, x, x, -1);
+
+	while (stack.size())
+	{
+		fill_segment s = stack.back();
+		stack.pop_back();
+
+		int x1 = s.m_xl, x2 = s.m_xr, dy = s.m_dy;
+		y = s.m_y + s.m_dy;
+
+		for (x = x1; (x >= 0) && flood_fill_is_inside(x, y, b); x--)
+		{
+			(*this)(x, y) = c;
+			total_set++;
+			if (pSet_pixels)
+				pSet_pixels->push_back(pixel_coord(x, y));
+		}
+
+		int l;
+
+		if (x >= x1)
+			goto skip;
+
+		l = x + 1;
+		if (l < x1)
+			FLOOD_PUSH(y, l, x1 - 1, -dy);
+
+		x = x1 + 1;
+
+		do
+		{
+			for (; x <= ((int)m_width - 1) && flood_fill_is_inside(x, y, b); x++)
+			{
+				(*this)(x, y) = c;
+				total_set++;
+				if (pSet_pixels)
+					pSet_pixels->push_back(pixel_coord(x, y));
+			}
+			FLOOD_PUSH(y, l, x - 1, dy);
+
+			if (x > (x2 + 1))
+				FLOOD_PUSH(y, x2 + 1, x - 1, -dy);
+
+		skip:
+			for (x++; x <= x2 && !flood_fill_is_inside(x, y, b); x++)
+				;
+
+			l = x;
+		} while (x <= x2);
+	}
+
+	return total_set;
+}
+
+void image_u8::draw_line(int xs, int ys, int xe, int ye, const color_quad_u8& color)
+{
+	if (xs > xe)
+	{
+		std::swap(xs, xe);
+		std::swap(ys, ye);
+	}
+
+	int dx = xe - xs, dy = ye - ys;
+	if (!dx)
+	{
+		if (ys > ye)
+			std::swap(ys, ye);
+		for (int i = ys; i <= ye; i++)
+			set_pixel_clipped(xs, i, color);
+	}
+	else if (!dy)
+	{
+		for (int i = xs; i < xe; i++)
+			set_pixel_clipped(i, ys, color);
+	}
+	else if (dy > 0)
+	{
+		if (dy <= dx)
+		{
+			int e = 2 * dy - dx, e_no_inc = 2 * dy, e_inc = 2 * (dy - dx);
+			rasterize_line(xs, ys, xe, ye, 0, 1, e, e_inc, e_no_inc, color);
+		}
+		else
+		{
+			int e = 2 * dx - dy, e_no_inc = 2 * dx, e_inc = 2 * (dx - dy);
+			rasterize_line(xs, ys, xe, ye, 1, 1, e, e_inc, e_no_inc, color);
+		}
+	}
+	else
+	{
+		dy = -dy;
+		if (dy <= dx)
+		{
+			int e = 2 * dy - dx, e_no_inc = 2 * dy, e_inc = 2 * (dy - dx);
+			rasterize_line(xs, ys, xe, ye, 0, -1, e, e_inc, e_no_inc, color);
+		}
+		else
+		{
+			int e = 2 * dx - dy, e_no_inc = (2 * dx), e_inc = 2 * (dx - dy);
+			rasterize_line(xe, ye, xs, ys, 1, -1, e, e_inc, e_no_inc, color);
+		}
+	}
+}
+
+void image_u8::rasterize_line(int xs, int ys, int xe, int ye, int pred, int inc_dec, int e, int e_inc, int e_no_inc, const color_quad_u8& color)
+{
+	int start, end, var;
+
+	if (pred)
+	{
+		start = ys;
+		end = ye;
+		var = xs;
+		for (int i = start; i <= end; i++)
+		{
+			set_pixel_clipped(var, i, color);
+			if (e < 0)
+				e += e_no_inc;
+			else
+			{
+				var += inc_dec;
+				e += e_inc;
+			}
+		}
+	}
+	else
+	{
+		start = xs;
+		end = xe;
+		var = ys;
+		for (int i = start; i <= end; i++)
+		{
+			set_pixel_clipped(i, var, color);
+			if (e < 0)
+				e += e_no_inc;
+			else
+			{
+				var += inc_dec;
+				e += e_inc;
+			}
+		}
+	}
+}
+
+bool load_png(const char* pFilename, image_u8& img)
+{
+	img.clear();
+
+	std::vector<unsigned char> pixels;
+	unsigned int w = 0, h = 0;
+	unsigned int e = lodepng::decode(pixels, w, h, pFilename);
+	if (e != 0)
+	{
+		fprintf(stderr, "Failed loading PNG file %s\n", pFilename);
+		return false;
+	}
+
+	img.init(w, h);
+	memcpy(&img.get_pixels()[0], &pixels[0], w * h * sizeof(uint32_t));
+
+	return true;
+}
+
+bool save_png(const char* pFilename, const image_u8& img, bool save_alpha)
+{
+	const uint32_t w = img.width();
+	const uint32_t h = img.height();
+
+	std::vector<unsigned char> pixels;
+	if (save_alpha)
+	{
+		pixels.resize(w * h * sizeof(color_quad_u8));
+		memcpy(&pixels[0], &img.get_pixels()[0], w * h * sizeof(color_quad_u8));
+	}
+	else
+	{
+		pixels.resize(w * h * 3);
+		unsigned char* pDst = &pixels[0];
+		for (uint32_t y = 0; y < h; y++)
+			for (uint32_t x = 0; x < w; x++, pDst += 3)
+				pDst[0] = img(x, y)[0], pDst[1] = img(x, y)[1], pDst[2] = img(x, y)[2];
+	}
+
+	return lodepng::encode(pFilename, pixels, w, h, save_alpha ? LCT_RGBA : LCT_RGB) == 0;
+}
+
+static float gauss(int x, int y, float sigma_sqr)
+{
+	float pow = expf(-((x * x + y * y) / (2.0f * sigma_sqr)));
+	float g = (1.0f / (sqrtf((float)(2.0f * M_PI * sigma_sqr)))) * pow;
+	return g;
+}
+
+// size_x/y should be odd
+void compute_gaussian_kernel(float* pDst, int size_x, int size_y, float sigma_sqr, uint32_t flags)
+{
+	assert(size_x & size_y & 1);
+
+	if (!(size_x | size_y))
+		return;
+
+	int mid_x = size_x / 2;
+	int mid_y = size_y / 2;
+
+	double sum = 0;
+	for (int x = 0; x < size_x; x++)
+	{
+		for (int y = 0; y < size_y; y++)
+		{
+			float g;
+			if ((x > mid_x) && (y < mid_y))
+				g = pDst[(size_x - x - 1) + y * size_x];
+			else if ((x < mid_x) && (y > mid_y))
+				g = pDst[x + (size_y - y - 1) * size_x];
+			else if ((x > mid_x) && (y > mid_y))
+				g = pDst[(size_x - x - 1) + (size_y - y - 1) * size_x];
+			else
+				g = gauss(x - mid_x, y - mid_y, sigma_sqr);
+
+			pDst[x + y * size_x] = g;
+			sum += g;
+		}
+	}
+
+	if (flags & cComputeGaussianFlagNormalizeCenterToOne)
+	{
+		sum = pDst[mid_x + mid_y * size_x];
+	}
+
+	if (flags & (cComputeGaussianFlagNormalizeCenterToOne | cComputeGaussianFlagNormalize))
+	{
+		double one_over_sum = 1.0f / sum;
+		for (int i = 0; i < size_x * size_y; i++)
+			pDst[i] = static_cast<float>(pDst[i] * one_over_sum);
+
+		if (flags & cComputeGaussianFlagNormalizeCenterToOne)
+			pDst[mid_x + mid_y * size_x] = 1.0f;
+	}
+
+	if (flags & cComputeGaussianFlagPrint)
+	{
+		printf("{\n");
+		for (int y = 0; y < size_y; y++)
+		{
+			printf("  ");
+			for (int x = 0; x < size_x; x++)
+			{
+				printf("%f, ", pDst[x + y * size_x]);
+			}
+			printf("\n");
+		}
+		printf("}");
+	}
+}
+
+void gaussian_filter(imagef& dst, const imagef& orig_img, uint32_t odd_filter_width, float sigma_sqr, bool wrapping, uint32_t width_divisor, uint32_t height_divisor)
+{
+	assert(odd_filter_width && (odd_filter_width & 1));
+	odd_filter_width |= 1;
+
+	std::vector<float> kernel(odd_filter_width * odd_filter_width);
+	compute_gaussian_kernel(&kernel[0], odd_filter_width, odd_filter_width, sigma_sqr, cComputeGaussianFlagNormalize);
+
+	const int dst_width = orig_img.get_width() / width_divisor;
+	const int dst_height = orig_img.get_height() / height_divisor;
+
+	const int H = odd_filter_width / 2;
+	const int L = -H;
+
+	dst.crop(dst_width, dst_height);
+
+#pragma omp parallel for
+	for (int oy = 0; oy < dst_height; oy++)
+	{
+		for (int ox = 0; ox < dst_width; ox++)
+		{
+			vec4F c(0.0f);
+
+			for (int yd = L; yd <= H; yd++)
+			{
+				int y = oy * height_divisor + (height_divisor >> 1) + yd;
+
+				for (int xd = L; xd <= H; xd++)
+				{
+					int x = ox * width_divisor + (width_divisor >> 1) + xd;
+
+					const vec4F& p = orig_img.get_clamped_or_wrapped(x, y, wrapping, wrapping);
+
+					float w = kernel[(xd + H) + (yd + H) * odd_filter_width];
+					c[0] += p[0] * w;
+					c[1] += p[1] * w;
+					c[2] += p[2] * w;
+					c[3] += p[3] * w;
+				}
+			}
+
+			dst(ox, oy).set(c[0], c[1], c[2], c[3]);
+		}
+	}
+}
+
+static void pow_image(const imagef& src, imagef& dst, const vec4F& power)
+{
+	dst.resize(src);
+
+#pragma omp parallel for
+	for (int y = 0; y < (int)dst.get_height(); y++)
+	{
+		for (uint32_t x = 0; x < dst.get_width(); x++)
+		{
+			const vec4F& p = src(x, y);
+
+			if ((power[0] == 2.0f) && (power[1] == 2.0f) && (power[2] == 2.0f) && (power[3] == 2.0f))
+				dst(x, y).set(p[0] * p[0], p[1] * p[1], p[2] * p[2], p[3] * p[3]);
+			else
+				dst(x, y).set(powf(p[0], power[0]), powf(p[1], power[1]), powf(p[2], power[2]), powf(p[3], power[3]));
+		}
+	}
+}
+
+#if 0
+static void mul_image(const imagef& src, imagef& dst, const vec4F& mul)
+{
+	dst.resize(src);
+
+#pragma omp parallel for
+	for (int y = 0; y < (int)dst.get_height(); y++)
+	{
+		for (uint32_t x = 0; x < dst.get_width(); x++)
+		{
+			const vec4F& p = src(x, y);
+			dst(x, y).set(p[0] * mul[0], p[1] * mul[1], p[2] * mul[2], p[3] * mul[3]);
+		}
+	}
+}
+#endif
+
+static void scale_image(const imagef& src, imagef& dst, const vec4F& scale, const vec4F& shift)
+{
+	dst.resize(src);
+
+#pragma omp parallel for
+	for (int y = 0; y < (int)dst.get_height(); y++)
+	{
+		for (uint32_t x = 0; x < dst.get_width(); x++)
+		{
+			const vec4F& p = src(x, y);
+
+			vec4F d;
+
+			for (uint32_t c = 0; c < 4; c++)
+				d[c] = scale[c] * p[c] + shift[c];
+
+			dst(x, y).set(d[0], d[1], d[2], d[3]);
+		}
+	}
+}
+
+static void add_weighted_image(const imagef& src1, const vec4F& alpha, const imagef& src2, const vec4F& beta, const vec4F& gamma, imagef& dst)
+{
+	dst.resize(src1);
+
+#pragma omp parallel for
+	for (int y = 0; y < (int)dst.get_height(); y++)
+	{
+		for (uint32_t x = 0; x < dst.get_width(); x++)
+		{
+			const vec4F& s1 = src1(x, y);
+			const vec4F& s2 = src2(x, y);
+
+			dst(x, y).set(
+				s1[0] * alpha[0] + s2[0] * beta[0] + gamma[0],
+				s1[1] * alpha[1] + s2[1] * beta[1] + gamma[1],
+				s1[2] * alpha[2] + s2[2] * beta[2] + gamma[2],
+				s1[3] * alpha[3] + s2[3] * beta[3] + gamma[3]);
+		}
+	}
+}
+
+static void add_image(const imagef& src1, const imagef& src2, imagef& dst)
+{
+	dst.resize(src1);
+
+#pragma omp parallel for
+	for (int y = 0; y < (int)dst.get_height(); y++)
+	{
+		for (uint32_t x = 0; x < dst.get_width(); x++)
+		{
+			const vec4F& s1 = src1(x, y);
+			const vec4F& s2 = src2(x, y);
+
+			dst(x, y).set(s1[0] + s2[0], s1[1] + s2[1], s1[2] + s2[2], s1[3] + s2[3]);
+		}
+	}
+}
+
+static void adds_image(const imagef& src, const vec4F& value, imagef& dst)
+{
+	dst.resize(src);
+
+#pragma omp parallel for
+	for (int y = 0; y < (int)dst.get_height(); y++)
+	{
+		for (uint32_t x = 0; x < dst.get_width(); x++)
+		{
+			const vec4F& p = src(x, y);
+
+			dst(x, y).set(p[0] + value[0], p[1] + value[1], p[2] + value[2], p[3] + value[3]);
+		}
+	}
+}
+
+static void mul_image(const imagef& src1, const imagef& src2, imagef& dst, const vec4F& scale)
+{
+	dst.resize(src1);
+
+#pragma omp parallel for
+	for (int y = 0; y < (int)dst.get_height(); y++)
+	{
+		for (uint32_t x = 0; x < dst.get_width(); x++)
+		{
+			const vec4F& s1 = src1(x, y);
+			const vec4F& s2 = src2(x, y);
+
+			vec4F d;
+
+			for (uint32_t c = 0; c < 4; c++)
+			{
+				float v1 = s1[c];
+				float v2 = s2[c];
+				d[c] = v1 * v2 * scale[c];
+			}
+
+			dst(x, y) = d;
+		}
+	}
+}
+
+static void div_image(const imagef& src1, const imagef& src2, imagef& dst, const vec4F& scale)
+{
+	dst.resize(src1);
+
+#pragma omp parallel for
+	for (int y = 0; y < (int)dst.get_height(); y++)
+	{
+		for (uint32_t x = 0; x < dst.get_width(); x++)
+		{
+			const vec4F& s1 = src1(x, y);
+			const vec4F& s2 = src2(x, y);
+
+			vec4F d;
+
+			for (uint32_t c = 0; c < 4; c++)
+			{
+				float v = s2[c];
+				if (v == 0.0f)
+					d[c] = 0.0f;
+				else
+					d[c] = (s1[c] * scale[c]) / v;
+			}
+
+			dst(x, y) = d;
+		}
+	}
+}
+
+static vec4F avg_image(const imagef& src)
+{
+	vec4F avg(0.0f);
+
+	for (uint32_t y = 0; y < src.get_height(); y++)
+	{
+		for (uint32_t x = 0; x < src.get_width(); x++)
+		{
+			const vec4F& s = src(x, y);
+
+			avg += vec4F(s[0], s[1], s[2], s[3]);
+		}
+	}
+
+	avg /= static_cast<float>(src.get_total_pixels());
+
+	return avg;
+}
+
+// Reference: https://ece.uwaterloo.ca/~z70wang/research/ssim/index.html
+vec4F compute_ssim(const imagef& a, const imagef& b)
+{
+	imagef axb, a_sq, b_sq, mu1, mu2, mu1_sq, mu2_sq, mu1_mu2, s1_sq, s2_sq, s12, smap, t1, t2, t3;
+
+	const float C1 = 6.50250f, C2 = 58.52250f;
+
+	pow_image(a, a_sq, vec4F(2));
+	pow_image(b, b_sq, vec4F(2));
+	mul_image(a, b, axb, vec4F(1.0f));
+
+	gaussian_filter(mu1, a, 11, 1.5f * 1.5f);
+	gaussian_filter(mu2, b, 11, 1.5f * 1.5f);
+
+	pow_image(mu1, mu1_sq, vec4F(2));
+	pow_image(mu2, mu2_sq, vec4F(2));
+	mul_image(mu1, mu2, mu1_mu2, vec4F(1.0f));
+
+	gaussian_filter(s1_sq, a_sq, 11, 1.5f * 1.5f);
+	add_weighted_image(s1_sq, vec4F(1), mu1_sq, vec4F(-1), vec4F(0), s1_sq);
+
+	gaussian_filter(s2_sq, b_sq, 11, 1.5f * 1.5f);
+	add_weighted_image(s2_sq, vec4F(1), mu2_sq, vec4F(-1), vec4F(0), s2_sq);
+
+	gaussian_filter(s12, axb, 11, 1.5f * 1.5f);
+	add_weighted_image(s12, vec4F(1), mu1_mu2, vec4F(-1), vec4F(0), s12);
+
+	scale_image(mu1_mu2, t1, vec4F(2), vec4F(0));
+	adds_image(t1, vec4F(C1), t1);
+
+	scale_image(s12, t2, vec4F(2), vec4F(0));
+	adds_image(t2, vec4F(C2), t2);
+
+	mul_image(t1, t2, t3, vec4F(1));
+
+	add_image(mu1_sq, mu2_sq, t1);
+	adds_image(t1, vec4F(C1), t1);
+
+	add_image(s1_sq, s2_sq, t2);
+	adds_image(t2, vec4F(C2), t2);
+
+	mul_image(t1, t2, t1, vec4F(1));
+
+	div_image(t3, t1, smap, vec4F(1));
+
+	return avg_image(smap);
+}
+
+vec4F compute_ssim(const image_u8& a, const image_u8& b, bool luma)
+{
+	image_u8 ta(a), tb(b);
+
+	if ((ta.width() != tb.width()) || (ta.height() != tb.height()))
+	{
+		fprintf(stderr, "compute_ssim: Cropping input images to equal dimensions\n");
+
+		const uint32_t w = std::min(a.width(), b.width());
+		const uint32_t h = std::min(a.height(), b.height());
+		ta.crop(w, h);
+		tb.crop(w, h);
+	}
+
+	if (!ta.width() || !ta.height())
+	{
+		assert(0);
+		return vec4F(0);
+	}
+
+	if (luma)
+	{
+		for (uint32_t y = 0; y < ta.height(); y++)
+		{
+			for (uint32_t x = 0; x < ta.width(); x++)
+			{
+				ta(x, y).set((uint8_t)ta(x, y).get_luma(), ta(x, y).a);
+				tb(x, y).set((uint8_t)tb(x, y).get_luma(), tb(x, y).a);
+			}
+		}
+	}
+
+	imagef fta, ftb;
+
+	fta.set(ta);
+	ftb.set(tb);
+
+	return compute_ssim(fta, ftb);
+}
+
+bool save_dds(const char* pFilename, uint32_t width, uint32_t height, const void* pBlocks, uint32_t pixel_format_bpp, DXGI_FORMAT dxgi_format, bool srgb, bool force_dx10_header)
+{
+	(void)srgb;
+
+	FILE* pFile = NULL;
+#ifdef _MSC_VER
+	fopen_s(&pFile, pFilename, "wb");
+#else
+	pFile = fopen(pFilename, "wb");
+#endif
+	if (!pFile)
+	{
+		fprintf(stderr, "Failed creating file %s!\n", pFilename);
+		return false;
+	}
+
+	fwrite("DDS ", 4, 1, pFile);
+
+	DDSURFACEDESC2 desc;
+	memset(&desc, 0, sizeof(desc));
+
+	desc.dwSize = sizeof(desc);
+	desc.dwFlags = DDSD_WIDTH | DDSD_HEIGHT | DDSD_PIXELFORMAT | DDSD_CAPS;
+
+	desc.dwWidth = width;
+	desc.dwHeight = height;
+
+	desc.ddsCaps.dwCaps = DDSCAPS_TEXTURE;
+	desc.ddpfPixelFormat.dwSize = sizeof(desc.ddpfPixelFormat);
+
+	desc.ddpfPixelFormat.dwFlags |= DDPF_FOURCC;
+
+	desc.lPitch = (((desc.dwWidth + 3) & ~3) * ((desc.dwHeight + 3) & ~3) * pixel_format_bpp) >> 3;
+	desc.dwFlags |= DDSD_LINEARSIZE;
+
+	desc.ddpfPixelFormat.dwRGBBitCount = 0;
+
+	if ((!force_dx10_header) &&
+		((dxgi_format == DXGI_FORMAT_BC1_UNORM) ||
+			(dxgi_format == DXGI_FORMAT_BC3_UNORM) ||
+			(dxgi_format == DXGI_FORMAT_BC4_UNORM) ||
+			(dxgi_format == DXGI_FORMAT_BC5_UNORM)))
+	{
+		if (dxgi_format == DXGI_FORMAT_BC1_UNORM)
+			desc.ddpfPixelFormat.dwFourCC = (uint32_t)PIXEL_FMT_FOURCC('D', 'X', 'T', '1');
+		else if (dxgi_format == DXGI_FORMAT_BC3_UNORM)
+			desc.ddpfPixelFormat.dwFourCC = (uint32_t)PIXEL_FMT_FOURCC('D', 'X', 'T', '5');
+		else if (dxgi_format == DXGI_FORMAT_BC4_UNORM)
+			desc.ddpfPixelFormat.dwFourCC = (uint32_t)PIXEL_FMT_FOURCC('A', 'T', 'I', '1');
+		else if (dxgi_format == DXGI_FORMAT_BC5_UNORM)
+			desc.ddpfPixelFormat.dwFourCC = (uint32_t)PIXEL_FMT_FOURCC('A', 'T', 'I', '2');
+
+		fwrite(&desc, sizeof(desc), 1, pFile);
+	}
+	else
+	{
+		desc.ddpfPixelFormat.dwFourCC = (uint32_t)PIXEL_FMT_FOURCC('D', 'X', '1', '0');
+
+		fwrite(&desc, sizeof(desc), 1, pFile);
+
+		DDS_HEADER_DXT10 hdr10;
+		memset(&hdr10, 0, sizeof(hdr10));
+
+		// Not all tools support DXGI_FORMAT_BC7_UNORM_SRGB (like NVTT), but ddsview in DirectXTex pays attention to it. So not sure what to do here.
+		// For best compatibility just write DXGI_FORMAT_BC7_UNORM.
+		//hdr10.dxgiFormat = srgb ? DXGI_FORMAT_BC7_UNORM_SRGB : DXGI_FORMAT_BC7_UNORM;
+		hdr10.dxgiFormat = dxgi_format; // DXGI_FORMAT_BC7_UNORM;
+		hdr10.resourceDimension = D3D10_RESOURCE_DIMENSION_TEXTURE2D;
+		hdr10.arraySize = 1;
+
+		fwrite(&hdr10, sizeof(hdr10), 1, pFile);
+	}
+
+	fwrite(pBlocks, desc.lPitch, 1, pFile);
+
+	if (fclose(pFile) == EOF)
+	{
+		fprintf(stderr, "Failed writing to DDS file %s!\n", pFilename);
+		return false;
+	}
+
+	return true;
+}
+
+void strip_extension(std::string& s)
+{
+	for (int32_t i = (int32_t)s.size() - 1; i >= 0; i--)
+	{
+		if (s[i] == '.')
+		{
+			s.resize(i);
+			break;
+		}
+	}
+}
+
+void strip_path(std::string& s)
+{
+	for (int32_t i = (int32_t)s.size() - 1; i >= 0; i--)
+	{
+		if ((s[i] == '/') || (s[i] == ':') || (s[i] == '\\'))
+		{
+			s.erase(0, i + 1);
+			break;
+		}
+	}
+}
+
+uint32_t hash_hsieh(const uint8_t* pBuf, size_t len)
+{
+	if (!pBuf || !len)
+		return 0;
+
+	uint32_t h = static_cast<uint32_t>(len);
+
+	const uint32_t bytes_left = len & 3;
+	len >>= 2;
+
+	while (len--)
+	{
+		const uint16_t* pWords = reinterpret_cast<const uint16_t*>(pBuf);
+
+		h += pWords[0];
+
+		const uint32_t t = (pWords[1] << 11) ^ h;
+		h = (h << 16) ^ t;
+
+		pBuf += sizeof(uint32_t);
+
+		h += h >> 11;
+	}
+
+	switch (bytes_left)
+	{
+	case 1:
+		h += *reinterpret_cast<const signed char*>(pBuf);
+		h ^= h << 10;
+		h += h >> 1;
+		break;
+	case 2:
+		h += *reinterpret_cast<const uint16_t*>(pBuf);
+		h ^= h << 11;
+		h += h >> 17;
+		break;
+	case 3:
+		h += *reinterpret_cast<const uint16_t*>(pBuf);
+		h ^= h << 16;
+		h ^= (static_cast<signed char>(pBuf[sizeof(uint16_t)])) << 18;
+		h += h >> 11;
+		break;
+	default:
+		break;
+	}
+
+	h ^= h << 3;
+	h += h >> 5;
+	h ^= h << 4;
+	h += h >> 17;
+	h ^= h << 25;
+	h += h >> 6;
+
+	return h;
+}
+
+float compute_block_max_std_dev(const color_quad_u8* pPixels, uint32_t block_width, uint32_t block_height, uint32_t num_comps)
+{
+	tracked_stat comp_stats[4];
+
+	for (uint32_t y = 0; y < block_height; y++)
+	{
+		for (uint32_t x = 0; x < block_width; x++)
+		{
+			const color_quad_u8* pPixel = pPixels + x + y * block_width;
+
+			for (uint32_t c = 0; c < num_comps; c++)
+				comp_stats[c].update(pPixel->m_c[c]);
+		}
+	}
+
+	float max_std_dev = 0.0f;
+	for (uint32_t i = 0; i < num_comps; i++)
+		max_std_dev = std::max(max_std_dev, comp_stats[i].get_std_dev());
+	return max_std_dev;
+}
+
+const uint32_t ASTC_SIG = 0x5CA1AB13;
+
+#pragma pack(push, 1)
+struct astc_header
+{
+	uint32_t m_sig;
+	uint8_t m_block_x;
+	uint8_t m_block_y;
+	uint8_t m_block_z;
+	uint8_t m_width[3];
+	uint8_t m_height[3];
+	uint8_t m_depth[3];
+};
+#pragma pack(pop)
+
+bool save_astc_file(const char* pFilename, block16_vec& blocks, uint32_t width, uint32_t height, uint32_t block_width, uint32_t block_height)
+{
+	FILE* pFile = nullptr;
+
+#ifdef _MSC_VER	
+	fopen_s(&pFile, pFilename, "wb");
+#else
+	pFile = fopen(pFilename, "wb");
+#endif
+
+	if (!pFile)
+		return false;
+
+	astc_header hdr;
+	memset(&hdr, 0, sizeof(hdr));
+
+	hdr.m_sig = ASTC_SIG;
+	hdr.m_block_x = (uint8_t)block_width;
+	hdr.m_block_y = (uint8_t)block_height;
+	hdr.m_block_z = 1;
+	hdr.m_width[0] = (uint8_t)(width);
+	hdr.m_width[1] = (uint8_t)(width >> 8);
+	hdr.m_width[2] = (uint8_t)(width >> 16);
+	hdr.m_height[0] = (uint8_t)(height);
+	hdr.m_height[1] = (uint8_t)(height >> 8);
+	hdr.m_height[2] = (uint8_t)(height >> 16);
+	hdr.m_depth[0] = 1;
+	fwrite(&hdr, sizeof(hdr), 1, pFile);
+
+	fwrite(blocks.data(), 16, blocks.size(), pFile);
+	if (fclose(pFile) == EOF)
+		return false;
+
+	return true;
+}
+
+bool load_astc_file(const char* pFilename, block16_vec& blocks, uint32_t& width, uint32_t& height, uint32_t& block_width, uint32_t& block_height)
+{
+	FILE* pFile = nullptr;
+
+#ifdef _MSC_VER
+	fopen_s(&pFile, pFilename, "rb");
+#else
+	pFile = fopen(pFilename, "rb");
+#endif
+
+	if (!pFile)
+		return false;
+
+	astc_header hdr;
+	if (fread(&hdr, sizeof(hdr), 1, pFile) != 1)
+	{
+		fclose(pFile);
+		return false;
+	}
+
+	if (hdr.m_sig != ASTC_SIG)
+	{
+		fclose(pFile);
+		return false;
+	}
+
+	width = hdr.m_width[0] + (hdr.m_width[1] << 8) + (hdr.m_width[2] << 16);
+	height = hdr.m_height[0] + (hdr.m_height[1] << 8) + (hdr.m_height[2] << 16);
+	uint32_t depth = hdr.m_depth[0] + (hdr.m_depth[1] << 8) + (hdr.m_depth[2] << 16);
+
+	if ((width < 1) || (width > 32768) || (height < 1) || (height > 32768))
+		return false;
+	if ((hdr.m_block_z != 1) || (depth != 1))
+		return false;
+
+	block_width = hdr.m_block_x;
+	block_height = hdr.m_block_y;
+
+	if ((block_width < 4) || (block_width > 12) || (block_height < 4) || (block_height > 12))
+		return false;
+
+	uint32_t blocks_x = (width + block_width - 1) / block_width;
+	uint32_t blocks_y = (height + block_height - 1) / block_height;
+	uint32_t total_blocks = blocks_x * blocks_y;
+
+	blocks.resize(total_blocks);
+
+	if (fread(blocks.data(), 16, total_blocks, pFile) != total_blocks)
+	{
+		fclose(pFile);
+		return false;
+	}
+
+	fclose(pFile);
+	return true;
+}
+
+uint32_t get_deflate_size(const void* pData, size_t data_size)
+{
+	size_t comp_size = 0;
+	void* pPre_RDO_Comp_data = tdefl_compress_mem_to_heap(pData, data_size, &comp_size, TDEFL_MAX_PROBES_MASK);// TDEFL_DEFAULT_MAX_PROBES);
+	mz_free(pPre_RDO_Comp_data);
+
+	if (comp_size > UINT32_MAX)
+		return UINT32_MAX;
+
+	return (uint32_t)comp_size;
+}
+
+} // namespace utils
+
+#endif
diff --git a/libkram/bc7enc/utils.h b/libkram/bc7enc/utils.h
new file mode 100644
index 00000000..841710c4
--- /dev/null
+++ b/libkram/bc7enc/utils.h
@@ -0,0 +1,2617 @@
+// File: utils.h
+#pragma once
+#ifdef _MSC_VER
+#pragma warning (push)
+#pragma warning (disable:4127) // conditional expression is constant
+#endif
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <algorithm>
+#include <assert.h>
+#include <time.h>
+#include <vector>
+#include <string>
+#include <random>
+#include <utility>
+#include <limits.h>
+//#include "dds_defs.h"
+
+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif
+
+#define ASSUME(c) static_assert(c, #c)
+#define ARRAY_SIZE(a) (sizeof(a)/sizeof(a[0]))
+
+#define VECTOR_TEXT_LINE_SIZE (30.0f)
+#define VECTOR_TEXT_CORE_LINE_SIZE (21.0f)
+
+#define UNUSED(x) (void)x
+
+namespace utils
+{
+extern const uint32_t g_pretty_colors[];
+extern const uint32_t g_num_pretty_colors;
+
+const float cDegToRad = 0.01745329252f;
+const float cRadToDeg = 57.29577951f;
+
+enum eClear { cClear };
+enum eZero { cZero };
+enum eInitExpand { cInitExpand };
+
+inline int iabs(int i) { if (i < 0) i = -i; return i; }
+inline uint8_t clamp255(int32_t i) { return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i); }
+template <typename S> inline S clamp(S value, S low, S high) { return (value < low) ? low : ((value > high) ? high : value); }
+template<typename F> inline F lerp(F a, F b, F s) { return a + (b - a) * s; }
+template<typename F> inline F square(F a) { return a * a; }
+
+template <class T>
+inline T prev_wrap(T i, T n)
+{
+	T temp = i - 1;
+	if (temp < 0)
+		temp = n - 1;
+	return temp;
+}
+
+template <class T>
+inline T next_wrap(T i, T n)
+{
+	T temp = i + 1;
+	if (temp >= n)
+		temp = 0;
+	return temp;
+}
+
+inline int posmod(int x, int y)
+{
+	if (x >= 0)
+		return (x < y) ? x : (x % y);
+	int m = (-x) % y;
+	return (m != 0) ? (y - m) : m;
+}
+
+inline float deg_to_rad(float f)
+{
+	return f * cDegToRad;
+};
+
+inline float rad_to_deg(float f)
+{
+	return f * cRadToDeg;
+};
+
+template <typename T>
+struct rel_ops
+{
+	friend bool operator!=(const T& x, const T& y)
+	{
+		return (!(x == y));
+	}
+	friend bool operator>(const T& x, const T& y)
+	{
+		return (y < x);
+	}
+	friend bool operator<=(const T& x, const T& y)
+	{
+		return (!(y < x));
+	}
+	friend bool operator>=(const T& x, const T& y)
+	{
+		return (!(x < y));
+	}
+};
+
+template <uint32_t N, typename T>
+class vec : public rel_ops<vec<N, T> >
+{
+public:
+	typedef T scalar_type;
+	enum
+	{
+		num_elements = N
+	};
+
+	inline vec()
+	{
+	}
+
+	inline vec(eClear)
+	{
+		clear();
+	}
+
+	inline vec(const vec& other)
+	{
+		for (uint32_t i = 0; i < N; i++)
+			m_s[i] = other.m_s[i];
+	}
+
+	template <uint32_t O, typename U>
+	inline vec(const vec<O, U>& other)
+	{
+		set(other);
+	}
+
+	template <uint32_t O, typename U>
+	inline vec(const vec<O, U>& other, T w)
+	{
+		*this = other;
+		m_s[N - 1] = w;
+	}
+
+	explicit inline vec(T val)
+	{
+		set(val);
+	}
+
+	inline vec(T val0, T val1)
+	{
+		set(val0, val1);
+	}
+
+	inline vec(T val0, T val1, T val2)
+	{
+		set(val0, val1, val2);
+	}
+
+	inline vec(T val0, T val1, T val2, T val3)
+	{
+		set(val0, val1, val2, val3);
+	}
+
+	inline vec(T val0, T val1, T val2, T val3, T val4, T val5)
+	{
+		set(val0, val1, val2, val3, val4, val5);
+	}
+
+	inline vec(
+		T val0, T val1, T val2, T val3,
+		T val4, T val5, T val6, T val7,
+		T val8, T val9, T val10, T val11,
+		T val12, T val13, T val14, T val15)
+	{
+		set(val0, val1, val2, val3,
+			val4, val5, val6, val7,
+			val8, val9, val10, val11,
+			val12, val13, val14, val15);
+	}
+
+	inline vec(
+		T val0, T val1, T val2, T val3,
+		T val4, T val5, T val6, T val7,
+		T val8, T val9, T val10, T val11,
+		T val12, T val13, T val14, T val15,
+		T val16, T val17, T val18, T val19)
+	{
+		set(val0, val1, val2, val3,
+			val4, val5, val6, val7,
+			val8, val9, val10, val11,
+			val12, val13, val14, val15,
+			val16, val17, val18, val19);
+	}
+
+	inline vec(
+		T val0, T val1, T val2, T val3,
+		T val4, T val5, T val6, T val7,
+		T val8, T val9, T val10, T val11,
+		T val12, T val13, T val14, T val15,
+		T val16, T val17, T val18, T val19,
+		T val20, T val21, T val22, T val23,
+		T val24)
+	{
+		set(val0, val1, val2, val3,
+			val4, val5, val6, val7,
+			val8, val9, val10, val11,
+			val12, val13, val14, val15,
+			val16, val17, val18, val19,
+			val20, val21, val22, val23,
+			val24);
+	}
+
+	inline void clear()
+	{
+		if (N > 4)
+			memset(m_s, 0, sizeof(m_s));
+		else
+		{
+			for (uint32_t i = 0; i < N; i++)
+				m_s[i] = 0;
+		}
+	}
+
+	template <uint32_t ON, typename OT>
+	inline vec& set(const vec<ON, OT>& other)
+	{
+		if ((void*)this == (void*)&other)
+			return *this;
+		const uint32_t m = std::min(N, ON);
+		uint32_t i;
+		for (i = 0; i < m; i++)
+			m_s[i] = static_cast<T>(other[i]);
+		for (; i < N; i++)
+			m_s[i] = 0;
+		return *this;
+	}
+
+	inline vec& set_component(uint32_t index, T val)
+	{
+		assert(index < N);
+		m_s[index] = val;
+		return *this;
+	}
+
+	inline vec& set(T val)
+	{
+		for (uint32_t i = 0; i < N; i++)
+			m_s[i] = val;
+		return *this;
+	}
+
+	inline vec& set(T val0, T val1)
+	{
+		m_s[0] = val0;
+		if (N >= 2)
+		{
+			m_s[1] = val1;
+
+			for (uint32_t i = 2; i < N; i++)
+				m_s[i] = 0;
+		}
+		return *this;
+	}
+
+	inline vec& set(T val0, T val1, T val2)
+	{
+		m_s[0] = val0;
+		if (N >= 2)
+		{
+			m_s[1] = val1;
+
+			if (N >= 3)
+			{
+				m_s[2] = val2;
+
+				for (uint32_t i = 3; i < N; i++)
+					m_s[i] = 0;
+			}
+		}
+		return *this;
+	}
+
+	inline vec& set(T val0, T val1, T val2, T val3)
+	{
+		m_s[0] = val0;
+		if (N >= 2)
+		{
+			m_s[1] = val1;
+
+			if (N >= 3)
+			{
+				m_s[2] = val2;
+
+				if (N >= 4)
+				{
+					m_s[3] = val3;
+
+					for (uint32_t i = 4; i < N; i++)
+						m_s[i] = 0;
+				}
+			}
+		}
+		return *this;
+	}
+
+	inline vec& set(T val0, T val1, T val2, T val3, T val4, T val5)
+	{
+		m_s[0] = val0;
+		if (N >= 2)
+		{
+			m_s[1] = val1;
+
+			if (N >= 3)
+			{
+				m_s[2] = val2;
+
+				if (N >= 4)
+				{
+					m_s[3] = val3;
+
+					if (N >= 5)
+					{
+						m_s[4] = val4;
+
+						if (N >= 6)
+						{
+							m_s[5] = val5;
+
+							for (uint32_t i = 6; i < N; i++)
+								m_s[i] = 0;
+						}
+					}
+				}
+			}
+		}
+		return *this;
+	}
+
+	inline vec& set(
+		T val0, T val1, T val2, T val3,
+		T val4, T val5, T val6, T val7,
+		T val8, T val9, T val10, T val11,
+		T val12, T val13, T val14, T val15)
+	{
+		m_s[0] = val0;
+		if (N >= 2)
+			m_s[1] = val1;
+		if (N >= 3)
+			m_s[2] = val2;
+		if (N >= 4)
+			m_s[3] = val3;
+
+		if (N >= 5)
+			m_s[4] = val4;
+		if (N >= 6)
+			m_s[5] = val5;
+		if (N >= 7)
+			m_s[6] = val6;
+		if (N >= 8)
+			m_s[7] = val7;
+
+		if (N >= 9)
+			m_s[8] = val8;
+		if (N >= 10)
+			m_s[9] = val9;
+		if (N >= 11)
+			m_s[10] = val10;
+		if (N >= 12)
+			m_s[11] = val11;
+
+		if (N >= 13)
+			m_s[12] = val12;
+		if (N >= 14)
+			m_s[13] = val13;
+		if (N >= 15)
+			m_s[14] = val14;
+		if (N >= 16)
+			m_s[15] = val15;
+
+		for (uint32_t i = 16; i < N; i++)
+			m_s[i] = 0;
+
+		return *this;
+	}
+
+	inline vec& set(
+		T val0, T val1, T val2, T val3,
+		T val4, T val5, T val6, T val7,
+		T val8, T val9, T val10, T val11,
+		T val12, T val13, T val14, T val15,
+		T val16, T val17, T val18, T val19)
+	{
+		m_s[0] = val0;
+		if (N >= 2)
+			m_s[1] = val1;
+		if (N >= 3)
+			m_s[2] = val2;
+		if (N >= 4)
+			m_s[3] = val3;
+
+		if (N >= 5)
+			m_s[4] = val4;
+		if (N >= 6)
+			m_s[5] = val5;
+		if (N >= 7)
+			m_s[6] = val6;
+		if (N >= 8)
+			m_s[7] = val7;
+
+		if (N >= 9)
+			m_s[8] = val8;
+		if (N >= 10)
+			m_s[9] = val9;
+		if (N >= 11)
+			m_s[10] = val10;
+		if (N >= 12)
+			m_s[11] = val11;
+
+		if (N >= 13)
+			m_s[12] = val12;
+		if (N >= 14)
+			m_s[13] = val13;
+		if (N >= 15)
+			m_s[14] = val14;
+		if (N >= 16)
+			m_s[15] = val15;
+
+		if (N >= 17)
+			m_s[16] = val16;
+		if (N >= 18)
+			m_s[17] = val17;
+		if (N >= 19)
+			m_s[18] = val18;
+		if (N >= 20)
+			m_s[19] = val19;
+
+		for (uint32_t i = 20; i < N; i++)
+			m_s[i] = 0;
+
+		return *this;
+	}
+
+	inline vec& set(
+		T val0, T val1, T val2, T val3,
+		T val4, T val5, T val6, T val7,
+		T val8, T val9, T val10, T val11,
+		T val12, T val13, T val14, T val15,
+		T val16, T val17, T val18, T val19,
+		T val20, T val21, T val22, T val23,
+		T val24)
+	{
+		m_s[0] = val0;
+		if (N >= 2)
+			m_s[1] = val1;
+		if (N >= 3)
+			m_s[2] = val2;
+		if (N >= 4)
+			m_s[3] = val3;
+
+		if (N >= 5)
+			m_s[4] = val4;
+		if (N >= 6)
+			m_s[5] = val5;
+		if (N >= 7)
+			m_s[6] = val6;
+		if (N >= 8)
+			m_s[7] = val7;
+
+		if (N >= 9)
+			m_s[8] = val8;
+		if (N >= 10)
+			m_s[9] = val9;
+		if (N >= 11)
+			m_s[10] = val10;
+		if (N >= 12)
+			m_s[11] = val11;
+
+		if (N >= 13)
+			m_s[12] = val12;
+		if (N >= 14)
+			m_s[13] = val13;
+		if (N >= 15)
+			m_s[14] = val14;
+		if (N >= 16)
+			m_s[15] = val15;
+
+		if (N >= 17)
+			m_s[16] = val16;
+		if (N >= 18)
+			m_s[17] = val17;
+		if (N >= 19)
+			m_s[18] = val18;
+		if (N >= 20)
+			m_s[19] = val19;
+
+		if (N >= 21)
+			m_s[20] = val20;
+		if (N >= 22)
+			m_s[21] = val21;
+		if (N >= 23)
+			m_s[22] = val22;
+		if (N >= 24)
+			m_s[23] = val23;
+
+		if (N >= 25)
+			m_s[24] = val24;
+
+		for (uint32_t i = 25; i < N; i++)
+			m_s[i] = 0;
+
+		return *this;
+	}
+
+	inline vec& set(const T* pValues)
+	{
+		for (uint32_t i = 0; i < N; i++)
+			m_s[i] = pValues[i];
+		return *this;
+	}
+
+	template <uint32_t ON, typename OT>
+	inline vec& swizzle_set(const vec<ON, OT>& other, uint32_t i)
+	{
+		return set(static_cast<T>(other[i]));
+	}
+
+	template <uint32_t ON, typename OT>
+	inline vec& swizzle_set(const vec<ON, OT>& other, uint32_t i, uint32_t j)
+	{
+		return set(static_cast<T>(other[i]), static_cast<T>(other[j]));
+	}
+
+	template <uint32_t ON, typename OT>
+	inline vec& swizzle_set(const vec<ON, OT>& other, uint32_t i, uint32_t j, uint32_t k)
+	{
+		return set(static_cast<T>(other[i]), static_cast<T>(other[j]), static_cast<T>(other[k]));
+	}
+
+	template <uint32_t ON, typename OT>
+	inline vec& swizzle_set(const vec<ON, OT>& other, uint32_t i, uint32_t j, uint32_t k, uint32_t l)
+	{
+		return set(static_cast<T>(other[i]), static_cast<T>(other[j]), static_cast<T>(other[k]), static_cast<T>(other[l]));
+	}
+
+	inline vec& operator=(const vec& rhs)
+	{
+		if (this != &rhs)
+		{
+			for (uint32_t i = 0; i < N; i++)
+				m_s[i] = rhs.m_s[i];
+		}
+		return *this;
+	}
+
+	template <uint32_t O, typename U>
+	inline vec& operator=(const vec<O, U>& other)
+	{
+		if ((void*)this == (void*)&other)
+			return *this;
+
+		uint32_t s = std::min(N, O);
+
+		uint32_t i;
+		for (i = 0; i < s; i++)
+			m_s[i] = static_cast<T>(other[i]);
+
+		for (; i < N; i++)
+			m_s[i] = 0;
+
+		return *this;
+	}
+
+	inline bool operator==(const vec& rhs) const
+	{
+		for (uint32_t i = 0; i < N; i++)
+			if (!(m_s[i] == rhs.m_s[i]))
+				return false;
+		return true;
+	}
+
+	inline bool operator<(const vec& rhs) const
+	{
+		for (uint32_t i = 0; i < N; i++)
+		{
+			if (m_s[i] < rhs.m_s[i])
+				return true;
+			else if (!(m_s[i] == rhs.m_s[i]))
+				return false;
+		}
+
+		return false;
+	}
+
+	inline T operator[](uint32_t i) const
+	{
+		assert(i < N);
+		return m_s[i];
+	}
+
+	inline T& operator[](uint32_t i)
+	{
+		assert(i < N);
+		return m_s[i];
+	}
+
+	template <uint32_t index>
+	inline uint64_t get_component_as_uint() const
+	{
+		ASSUME(index < N);
+		if (sizeof(T) == sizeof(float))
+			return *reinterpret_cast<const uint32_t*>(&m_s[index]);
+		else
+			return *reinterpret_cast<const uint64_t*>(&m_s[index]);
+	}
+
+	inline T get_x(void) const
+	{
+		return m_s[0];
+	}
+	inline T get_y(void) const
+	{
+		ASSUME(N >= 2);
+		return m_s[1];
+	}
+	inline T get_z(void) const
+	{
+		ASSUME(N >= 3);
+		return m_s[2];
+	}
+	inline T get_w(void) const
+	{
+		ASSUME(N >= 4);
+		return m_s[3];
+	}
+
+	inline vec get_x_vector() const
+	{
+		return broadcast<0>();
+	}
+	inline vec get_y_vector() const
+	{
+		return broadcast<1>();
+	}
+	inline vec get_z_vector() const
+	{
+		return broadcast<2>();
+	}
+	inline vec get_w_vector() const
+	{
+		return broadcast<3>();
+	}
+
+	inline T get_component(uint32_t i) const
+	{
+		return (*this)[i];
+	}
+
+	inline vec& set_x(T v)
+	{
+		m_s[0] = v;
+		return *this;
+	}
+	inline vec& set_y(T v)
+	{
+		ASSUME(N >= 2);
+		m_s[1] = v;
+		return *this;
+	}
+	inline vec& set_z(T v)
+	{
+		ASSUME(N >= 3);
+		m_s[2] = v;
+		return *this;
+	}
+	inline vec& set_w(T v)
+	{
+		ASSUME(N >= 4);
+		m_s[3] = v;
+		return *this;
+	}
+
+	inline const T* get_ptr() const
+	{
+		return reinterpret_cast<const T*>(&m_s[0]);
+	}
+	inline T* get_ptr()
+	{
+		return reinterpret_cast<T*>(&m_s[0]);
+	}
+
+	inline vec as_point() const
+	{
+		vec result(*this);
+		result[N - 1] = 1;
+		return result;
+	}
+
+	inline vec as_dir() const
+	{
+		vec result(*this);
+		result[N - 1] = 0;
+		return result;
+	}
+
+	inline vec<2, T> select2(uint32_t i, uint32_t j) const
+	{
+		assert((i < N) && (j < N));
+		return vec<2, T>(m_s[i], m_s[j]);
+	}
+
+	inline vec<3, T> select3(uint32_t i, uint32_t j, uint32_t k) const
+	{
+		assert((i < N) && (j < N) && (k < N));
+		return vec<3, T>(m_s[i], m_s[j], m_s[k]);
+	}
+
+	inline vec<4, T> select4(uint32_t i, uint32_t j, uint32_t k, uint32_t l) const
+	{
+		assert((i < N) && (j < N) && (k < N) && (l < N));
+		return vec<4, T>(m_s[i], m_s[j], m_s[k], m_s[l]);
+	}
+
+	inline bool is_dir() const
+	{
+		return m_s[N - 1] == 0;
+	}
+	inline bool is_vector() const
+	{
+		return is_dir();
+	}
+	inline bool is_point() const
+	{
+		return m_s[N - 1] == 1;
+	}
+
+	inline vec project() const
+	{
+		vec result(*this);
+		if (result[N - 1])
+			result /= result[N - 1];
+		return result;
+	}
+
+	inline vec broadcast(unsigned i) const
+	{
+		return vec((*this)[i]);
+	}
+
+	template <uint32_t i>
+	inline vec broadcast() const
+	{
+		return vec((*this)[i]);
+	}
+
+	inline vec swizzle(uint32_t i, uint32_t j) const
+	{
+		return vec((*this)[i], (*this)[j]);
+	}
+
+	inline vec swizzle(uint32_t i, uint32_t j, uint32_t k) const
+	{
+		return vec((*this)[i], (*this)[j], (*this)[k]);
+	}
+
+	inline vec swizzle(uint32_t i, uint32_t j, uint32_t k, uint32_t l) const
+	{
+		return vec((*this)[i], (*this)[j], (*this)[k], (*this)[l]);
+	}
+
+	inline vec operator-() const
+	{
+		vec result;
+		for (uint32_t i = 0; i < N; i++)
+			result.m_s[i] = -m_s[i];
+		return result;
+	}
+
+	inline vec operator+() const
+	{
+		return *this;
+	}
+
+	inline vec& operator+=(const vec& other)
+	{
+		for (uint32_t i = 0; i < N; i++)
+			m_s[i] += other.m_s[i];
+		return *this;
+	}
+
+	inline vec& operator-=(const vec& other)
+	{
+		for (uint32_t i = 0; i < N; i++)
+			m_s[i] -= other.m_s[i];
+		return *this;
+	}
+
+	inline vec& operator*=(const vec& other)
+	{
+		for (uint32_t i = 0; i < N; i++)
+			m_s[i] *= other.m_s[i];
+		return *this;
+	}
+
+	inline vec& operator/=(const vec& other)
+	{
+		for (uint32_t i = 0; i < N; i++)
+			m_s[i] /= other.m_s[i];
+		return *this;
+	}
+
+	inline vec& operator*=(T s)
+	{
+		for (uint32_t i = 0; i < N; i++)
+			m_s[i] *= s;
+		return *this;
+	}
+
+	inline vec& operator/=(T s)
+	{
+		for (uint32_t i = 0; i < N; i++)
+			m_s[i] /= s;
+		return *this;
+	}
+
+	// component-wise multiply (not a dot product like in previous versions)
+	// just remarking it out because it's too ambiguous, use dot() or mul_components() instead
+#if 0
+	friend inline vec operator*(const vec& lhs, const vec& rhs)
+	{
+		return vec::mul_components(lhs, rhs);
+	}
+#endif
+
+	friend inline vec operator*(const vec& lhs, T val)
+	{
+		vec result;
+		for (uint32_t i = 0; i < N; i++)
+			result.m_s[i] = lhs.m_s[i] * val;
+		return result;
+	}
+
+	friend inline vec operator*(T val, const vec& rhs)
+	{
+		vec result;
+		for (uint32_t i = 0; i < N; i++)
+			result.m_s[i] = val * rhs.m_s[i];
+		return result;
+	}
+
+	friend inline vec operator/(const vec& lhs, const vec& rhs)
+	{
+		vec result;
+		for (uint32_t i = 0; i < N; i++)
+			result.m_s[i] = lhs.m_s[i] / rhs.m_s[i];
+		return result;
+	}
+
+	friend inline vec operator/(const vec& lhs, T val)
+	{
+		vec result;
+		for (uint32_t i = 0; i < N; i++)
+			result.m_s[i] = lhs.m_s[i] / val;
+		return result;
+	}
+
+	friend inline vec operator+(const vec& lhs, const vec& rhs)
+	{
+		vec result;
+		for (uint32_t i = 0; i < N; i++)
+			result.m_s[i] = lhs.m_s[i] + rhs.m_s[i];
+		return result;
+	}
+
+	friend inline vec operator-(const vec& lhs, const vec& rhs)
+	{
+		vec result;
+		for (uint32_t i = 0; i < N; i++)
+			result.m_s[i] = lhs.m_s[i] - rhs.m_s[i];
+		return result;
+	}
+
+	static inline vec<3, T> cross2(const vec& a, const vec& b)
+	{
+		ASSUME(N >= 2);
+		return vec<3, T>(0, 0, a[0] * b[1] - a[1] * b[0]);
+	}
+
+	inline vec<3, T> cross2(const vec& b) const
+	{
+		return cross2(*this, b);
+	}
+
+	static inline vec<3, T> cross3(const vec& a, const vec& b)
+	{
+		ASSUME(N >= 3);
+		return vec<3, T>(a[1] * b[2] - a[2] * b[1], a[2] * b[0] - a[0] * b[2], a[0] * b[1] - a[1] * b[0]);
+	}
+
+	inline vec<3, T> cross3(const vec& b) const
+	{
+		return cross3(*this, b);
+	}
+
+	static inline vec<3, T> cross(const vec& a, const vec& b)
+	{
+		ASSUME(N >= 2);
+
+		if (N == 2)
+			return cross2(a, b);
+		else
+			return cross3(a, b);
+	}
+
+	inline vec<3, T> cross(const vec& b) const
+	{
+		ASSUME(N >= 2);
+		return cross(*this, b);
+	}
+
+	inline T dot(const vec& rhs) const
+	{
+		return dot(*this, rhs);
+	}
+
+	inline vec dot_vector(const vec& rhs) const
+	{
+		return vec(dot(*this, rhs));
+	}
+
+	static inline T dot(const vec& lhs, const vec& rhs)
+	{
+		T result = lhs.m_s[0] * rhs.m_s[0];
+		for (uint32_t i = 1; i < N; i++)
+			result += lhs.m_s[i] * rhs.m_s[i];
+		return result;
+	}
+
+	inline T dot2(const vec& rhs) const
+	{
+		ASSUME(N >= 2);
+		return m_s[0] * rhs.m_s[0] + m_s[1] * rhs.m_s[1];
+	}
+
+	inline T dot3(const vec& rhs) const
+	{
+		ASSUME(N >= 3);
+		return m_s[0] * rhs.m_s[0] + m_s[1] * rhs.m_s[1] + m_s[2] * rhs.m_s[2];
+	}
+
+	inline T dot4(const vec& rhs) const
+	{
+		ASSUME(N >= 4);
+		return m_s[0] * rhs.m_s[0] + m_s[1] * rhs.m_s[1] + m_s[2] * rhs.m_s[2] + m_s[3] * rhs.m_s[3];
+	}
+
+	inline T norm(void) const
+	{
+		T sum = m_s[0] * m_s[0];
+		for (uint32_t i = 1; i < N; i++)
+			sum += m_s[i] * m_s[i];
+		return sum;
+	}
+
+	inline T length(void) const
+	{
+		return sqrt(norm());
+	}
+
+	inline T squared_distance(const vec& rhs) const
+	{
+		T dist2 = 0;
+		for (uint32_t i = 0; i < N; i++)
+		{
+			T d = m_s[i] - rhs.m_s[i];
+			dist2 += d * d;
+		}
+		return dist2;
+	}
+
+	inline T squared_distance(const vec& rhs, T early_out) const
+	{
+		T dist2 = 0;
+		for (uint32_t i = 0; i < N; i++)
+		{
+			T d = m_s[i] - rhs.m_s[i];
+			dist2 += d * d;
+			if (dist2 > early_out)
+				break;
+		}
+		return dist2;
+	}
+
+	inline T distance(const vec& rhs) const
+	{
+		T dist2 = 0;
+		for (uint32_t i = 0; i < N; i++)
+		{
+			T d = m_s[i] - rhs.m_s[i];
+			dist2 += d * d;
+		}
+		return sqrt(dist2);
+	}
+
+	inline vec inverse() const
+	{
+		vec result;
+		for (uint32_t i = 0; i < N; i++)
+			result[i] = m_s[i] ? (1.0f / m_s[i]) : 0;
+		return result;
+	}
+
+	// returns squared length (norm)
+	inline double normalize(const vec* pDefaultVec = NULL)
+	{
+		double n = m_s[0] * m_s[0];
+		for (uint32_t i = 1; i < N; i++)
+			n += m_s[i] * m_s[i];
+
+		if (n != 0)
+			*this *= static_cast<T>(1.0f / sqrt(n));
+		else if (pDefaultVec)
+			*this = *pDefaultVec;
+		return n;
+	}
+
+	inline double normalize3(const vec* pDefaultVec = NULL)
+	{
+		ASSUME(N >= 3);
+
+		double n = m_s[0] * m_s[0] + m_s[1] * m_s[1] + m_s[2] * m_s[2];
+
+		if (n != 0)
+			*this *= static_cast<T>((1.0f / sqrt(n)));
+		else if (pDefaultVec)
+			*this = *pDefaultVec;
+		return n;
+	}
+
+	inline vec& normalize_in_place(const vec* pDefaultVec = NULL)
+	{
+		normalize(pDefaultVec);
+		return *this;
+	}
+
+	inline vec& normalize3_in_place(const vec* pDefaultVec = NULL)
+	{
+		normalize3(pDefaultVec);
+		return *this;
+	}
+
+	inline vec get_normalized(const vec* pDefaultVec = NULL) const
+	{
+		vec result(*this);
+		result.normalize(pDefaultVec);
+		return result;
+	}
+
+	inline vec get_normalized3(const vec* pDefaultVec = NULL) const
+	{
+		vec result(*this);
+		result.normalize3(pDefaultVec);
+		return result;
+	}
+
+	inline vec& clamp(T l, T h)
+	{
+		for (uint32_t i = 0; i < N; i++)
+			m_s[i] = static_cast<T>(clamp(m_s[i], l, h));
+		return *this;
+	}
+
+	inline vec& saturate()
+	{
+		return clamp(0.0f, 1.0f);
+	}
+
+	inline vec& clamp(const vec& l, const vec& h)
+	{
+		for (uint32_t i = 0; i < N; i++)
+			m_s[i] = static_cast<T>(clamp(m_s[i], l[i], h[i]));
+		return *this;
+	}
+
+	inline bool is_within_bounds(const vec& l, const vec& h) const
+	{
+		for (uint32_t i = 0; i < N; i++)
+			if ((m_s[i] < l[i]) || (m_s[i] > h[i]))
+				return false;
+
+		return true;
+	}
+
+	inline bool is_within_bounds(T l, T h) const
+	{
+		for (uint32_t i = 0; i < N; i++)
+			if ((m_s[i] < l) || (m_s[i] > h))
+				return false;
+
+		return true;
+	}
+
+	inline uint32_t get_major_axis(void) const
+	{
+		T m = fabs(m_s[0]);
+		uint32_t r = 0;
+		for (uint32_t i = 1; i < N; i++)
+		{
+			const T c = fabs(m_s[i]);
+			if (c > m)
+			{
+				m = c;
+				r = i;
+			}
+		}
+		return r;
+	}
+
+	inline uint32_t get_minor_axis(void) const
+	{
+		T m = fabs(m_s[0]);
+		uint32_t r = 0;
+		for (uint32_t i = 1; i < N; i++)
+		{
+			const T c = fabs(m_s[i]);
+			if (c < m)
+			{
+				m = c;
+				r = i;
+			}
+		}
+		return r;
+	}
+
+	inline void get_projection_axes(uint32_t& u, uint32_t& v) const
+	{
+		const int axis = get_major_axis();
+		if (m_s[axis] < 0.0f)
+		{
+			v = next_wrap<uint32_t>(axis, N);
+			u = next_wrap<uint32_t>(v, N);
+		}
+		else
+		{
+			u = next_wrap<uint32_t>(axis, N);
+			v = next_wrap<uint32_t>(u, N);
+		}
+	}
+
+	inline T get_absolute_minimum(void) const
+	{
+		T result = fabs(m_s[0]);
+		for (uint32_t i = 1; i < N; i++)
+			result = std::min(result, fabs(m_s[i]));
+		return result;
+	}
+
+	inline T get_absolute_maximum(void) const
+	{
+		T result = fabs(m_s[0]);
+		for (uint32_t i = 1; i < N; i++)
+			result = std::max(result, fabs(m_s[i]));
+		return result;
+	}
+
+	inline T get_minimum(void) const
+	{
+		T result = m_s[0];
+		for (uint32_t i = 1; i < N; i++)
+			result = std::min(result, m_s[i]);
+		return result;
+	}
+
+	inline T get_maximum(void) const
+	{
+		T result = m_s[0];
+		for (uint32_t i = 1; i < N; i++)
+			result = std::max(result, m_s[i]);
+		return result;
+	}
+
+	inline vec& remove_unit_direction(const vec& dir)
+	{
+		*this -= (dot(dir) * dir);
+		return *this;
+	}
+
+	inline vec get_remove_unit_direction(const vec& dir) const
+	{
+		return *this - (dot(dir) * dir);
+	}
+
+	inline bool all_less(const vec& b) const
+	{
+		for (uint32_t i = 0; i < N; i++)
+			if (m_s[i] >= b.m_s[i])
+				return false;
+		return true;
+	}
+
+	inline bool all_less_equal(const vec& b) const
+	{
+		for (uint32_t i = 0; i < N; i++)
+			if (m_s[i] > b.m_s[i])
+				return false;
+		return true;
+	}
+
+	inline bool all_greater(const vec& b) const
+	{
+		for (uint32_t i = 0; i < N; i++)
+			if (m_s[i] <= b.m_s[i])
+				return false;
+		return true;
+	}
+
+	inline bool all_greater_equal(const vec& b) const
+	{
+		for (uint32_t i = 0; i < N; i++)
+			if (m_s[i] < b.m_s[i])
+				return false;
+		return true;
+	}
+
+	inline vec negate_xyz() const
+	{
+		vec ret;
+
+		ret[0] = -m_s[0];
+		if (N >= 2)
+			ret[1] = -m_s[1];
+		if (N >= 3)
+			ret[2] = -m_s[2];
+
+		for (uint32_t i = 3; i < N; i++)
+			ret[i] = m_s[i];
+
+		return ret;
+	}
+
+	inline vec& invert()
+	{
+		for (uint32_t i = 0; i < N; i++)
+			if (m_s[i] != 0.0f)
+				m_s[i] = 1.0f / m_s[i];
+		return *this;
+	}
+
+	inline scalar_type perp_dot(const vec& b) const
+	{
+		ASSUME(N == 2);
+		return m_s[0] * b.m_s[1] - m_s[1] * b.m_s[0];
+	}
+
+	inline vec perp() const
+	{
+		ASSUME(N == 2);
+		return vec(-m_s[1], m_s[0]);
+	}
+
+	inline vec get_floor() const
+	{
+		vec result;
+		for (uint32_t i = 0; i < N; i++)
+			result[i] = floor(m_s[i]);
+		return result;
+	}
+
+	inline vec get_ceil() const
+	{
+		vec result;
+		for (uint32_t i = 0; i < N; i++)
+			result[i] = ceil(m_s[i]);
+		return result;
+	}
+
+	// static helper methods
+
+	static inline vec mul_components(const vec& lhs, const vec& rhs)
+	{
+		vec result;
+		for (uint32_t i = 0; i < N; i++)
+			result[i] = lhs.m_s[i] * rhs.m_s[i];
+		return result;
+	}
+
+	static inline vec mul_add_components(const vec& a, const vec& b, const vec& c)
+	{
+		vec result;
+		for (uint32_t i = 0; i < N; i++)
+			result[i] = a.m_s[i] * b.m_s[i] + c.m_s[i];
+		return result;
+	}
+
+	static inline vec make_axis(uint32_t i)
+	{
+		vec result;
+		result.clear();
+		result[i] = 1;
+		return result;
+	}
+
+	static inline vec equals_mask(const vec& a, const vec& b)
+	{
+		vec ret;
+		for (uint32_t i = 0; i < N; i++)
+			ret[i] = (a[i] == b[i]);
+		return ret;
+	}
+
+	static inline vec not_equals_mask(const vec& a, const vec& b)
+	{
+		vec ret;
+		for (uint32_t i = 0; i < N; i++)
+			ret[i] = (a[i] != b[i]);
+		return ret;
+	}
+
+	static inline vec less_mask(const vec& a, const vec& b)
+	{
+		vec ret;
+		for (uint32_t i = 0; i < N; i++)
+			ret[i] = (a[i] < b[i]);
+		return ret;
+	}
+
+	static inline vec less_equals_mask(const vec& a, const vec& b)
+	{
+		vec ret;
+		for (uint32_t i = 0; i < N; i++)
+			ret[i] = (a[i] <= b[i]);
+		return ret;
+	}
+
+	static inline vec greater_equals_mask(const vec& a, const vec& b)
+	{
+		vec ret;
+		for (uint32_t i = 0; i < N; i++)
+			ret[i] = (a[i] >= b[i]);
+		return ret;
+	}
+
+	static inline vec greater_mask(const vec& a, const vec& b)
+	{
+		vec ret;
+		for (uint32_t i = 0; i < N; i++)
+			ret[i] = (a[i] > b[i]);
+		return ret;
+	}
+
+	static inline vec component_max(const vec& a, const vec& b)
+	{
+		vec ret;
+		for (uint32_t i = 0; i < N; i++)
+			ret.m_s[i] = std::max(a.m_s[i], b.m_s[i]);
+		return ret;
+	}
+
+	static inline vec component_min(const vec& a, const vec& b)
+	{
+		vec ret;
+		for (uint32_t i = 0; i < N; i++)
+			ret.m_s[i] = std::min(a.m_s[i], b.m_s[i]);
+		return ret;
+	}
+
+	static inline vec lerp(const vec& a, const vec& b, float t)
+	{
+		vec ret;
+		for (uint32_t i = 0; i < N; i++)
+			ret.m_s[i] = a.m_s[i] + (b.m_s[i] - a.m_s[i]) * t;
+		return ret;
+	}
+
+	static inline bool equal_tol(const vec& a, const vec& b, float t)
+	{
+		for (uint32_t i = 0; i < N; i++)
+			if (!equal_tol(a.m_s[i], b.m_s[i], t))
+				return false;
+		return true;
+	}
+
+	inline bool equal_tol(const vec& b, float t) const
+	{
+		return equal_tol(*this, b, t);
+	}
+
+protected:
+	T m_s[N];
+};
+
+typedef vec<1, double> vec1D;
+typedef vec<2, double> vec2D;
+typedef vec<3, double> vec3D;
+typedef vec<4, double> vec4D;
+
+typedef vec<1, float> vec1F;
+
+typedef vec<2, float> vec2F;
+typedef std::vector<vec2F> vec2F_array;
+
+typedef vec<3, float> vec3F;
+typedef std::vector<vec3F> vec3F_array;
+
+typedef vec<4, float> vec4F;
+typedef std::vector<vec4F> vec4F_array;
+
+typedef vec<2, uint32_t> vec2U;
+typedef vec<3, uint32_t> vec3U;
+typedef vec<2, int> vec2I;
+typedef vec<3, int> vec3I;
+typedef vec<4, int> vec4I;
+
+typedef vec<2, int16_t> vec2I16;
+typedef vec<3, int16_t> vec3I16;
+
+inline vec2F rotate_point(const vec2F& p, float rad)
+{
+	float c = cos(rad);
+	float s = sin(rad);
+
+	float x = p[0];
+	float y = p[1];
+
+	return vec2F(x * c - y * s, x * s + y * c);
+}
+
+class rect
+{
+public:
+	inline rect()
+	{
+	}
+
+	inline rect(eClear)
+	{
+		clear();
+	}
+
+	inline rect(eInitExpand)
+	{
+		init_expand();
+	}
+
+	// up to, but not including right/bottom
+	inline rect(int left, int top, int right, int bottom)
+	{
+		set(left, top, right, bottom);
+	}
+
+	inline rect(const vec2I& lo, const vec2I& hi)
+	{
+		m_corner[0] = lo;
+		m_corner[1] = hi;
+	}
+
+	inline rect(const vec2I& point)
+	{
+		m_corner[0] = point;
+		m_corner[1].set(point[0] + 1, point[1] + 1);
+	}
+
+	inline bool operator==(const rect& r) const
+	{
+		return (m_corner[0] == r.m_corner[0]) && (m_corner[1] == r.m_corner[1]);
+	}
+
+	inline bool operator<(const rect& r) const
+	{
+		for (uint32_t i = 0; i < 2; i++)
+		{
+			if (m_corner[i] < r.m_corner[i])
+				return true;
+			else if (!(m_corner[i] == r.m_corner[i]))
+				return false;
+		}
+
+		return false;
+	}
+
+	inline void clear()
+	{
+		m_corner[0].clear();
+		m_corner[1].clear();
+	}
+
+	inline void set(int left, int top, int right, int bottom)
+	{
+		m_corner[0].set(left, top);
+		m_corner[1].set(right, bottom);
+	}
+
+	inline void set(const vec2I& lo, const vec2I& hi)
+	{
+		m_corner[0] = lo;
+		m_corner[1] = hi;
+	}
+
+	inline void set(const vec2I& point)
+	{
+		m_corner[0] = point;
+		m_corner[1].set(point[0] + 1, point[1] + 1);
+	}
+
+	inline uint32_t get_width() const
+	{
+		return m_corner[1][0] - m_corner[0][0];
+	}
+	inline uint32_t get_height() const
+	{
+		return m_corner[1][1] - m_corner[0][1];
+	}
+
+	inline int get_left() const
+	{
+		return m_corner[0][0];
+	}
+	inline int get_top() const
+	{
+		return m_corner[0][1];
+	}
+	inline int get_right() const
+	{
+		return m_corner[1][0];
+	}
+	inline int get_bottom() const
+	{
+		return m_corner[1][1];
+	}
+
+	inline bool is_empty() const
+	{
+		return (m_corner[1][0] <= m_corner[0][0]) || (m_corner[1][1] <= m_corner[0][1]);
+	}
+
+	inline uint32_t get_dimension(uint32_t axis) const
+	{
+		return m_corner[1][axis] - m_corner[0][axis];
+	}
+	inline uint32_t get_area() const
+	{
+		return get_dimension(0) * get_dimension(1);
+	}
+
+	inline const vec2I& operator[](uint32_t i) const
+	{
+		assert(i < 2);
+		return m_corner[i];
+	}
+	inline vec2I& operator[](uint32_t i)
+	{
+		assert(i < 2);
+		return m_corner[i];
+	}
+
+	inline rect& translate(int x_ofs, int y_ofs)
+	{
+		m_corner[0][0] += x_ofs;
+		m_corner[0][1] += y_ofs;
+		m_corner[1][0] += x_ofs;
+		m_corner[1][1] += y_ofs;
+		return *this;
+	}
+
+	inline rect& init_expand()
+	{
+		m_corner[0].set(INT_MAX);
+		m_corner[1].set(INT_MIN);
+		return *this;
+	}
+
+	inline rect& expand(int x, int y)
+	{
+		m_corner[0][0] = std::min(m_corner[0][0], x);
+		m_corner[0][1] = std::min(m_corner[0][1], y);
+		m_corner[1][0] = std::max(m_corner[1][0], x + 1);
+		m_corner[1][1] = std::max(m_corner[1][1], y + 1);
+		return *this;
+	}
+
+	inline rect& expand(const rect& r)
+	{
+		m_corner[0][0] = std::min(m_corner[0][0], r[0][0]);
+		m_corner[0][1] = std::min(m_corner[0][1], r[0][1]);
+		m_corner[1][0] = std::max(m_corner[1][0], r[1][0]);
+		m_corner[1][1] = std::max(m_corner[1][1], r[1][1]);
+		return *this;
+	}
+
+	inline bool touches(const rect& r) const
+	{
+		for (uint32_t i = 0; i < 2; i++)
+		{
+			if (r[1][i] <= m_corner[0][i])
+				return false;
+			else if (r[0][i] >= m_corner[1][i])
+				return false;
+		}
+
+		return true;
+	}
+
+	inline bool fully_within(const rect& r) const
+	{
+		for (uint32_t i = 0; i < 2; i++)
+		{
+			if (m_corner[0][i] < r[0][i])
+				return false;
+			else if (m_corner[1][i] > r[1][i])
+				return false;
+		}
+
+		return true;
+	}
+
+	inline bool intersect(const rect& r)
+	{
+		if (!touches(r))
+		{
+			clear();
+			return false;
+		}
+
+		for (uint32_t i = 0; i < 2; i++)
+		{
+			m_corner[0][i] = std::max<int>(m_corner[0][i], r[0][i]);
+			m_corner[1][i] = std::min<int>(m_corner[1][i], r[1][i]);
+		}
+
+		return true;
+	}
+
+	inline bool contains(int x, int y) const
+	{
+		return (x >= m_corner[0][0]) && (x < m_corner[1][0]) &&
+			(y >= m_corner[0][1]) && (y < m_corner[1][1]);
+	}
+
+	inline bool contains(const vec2I& p) const
+	{
+		return contains(p[0], p[1]);
+	}
+
+private:
+	vec2I m_corner[2];
+};
+
+inline rect make_rect(uint32_t width, uint32_t height)
+{
+	return rect(0, 0, width, height);
+}
+
+struct color_quad_u8
+{
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable:4201)
+#endif
+	union
+	{
+		uint8_t m_c[4];
+		struct
+		{
+			uint8_t r;
+			uint8_t g;
+			uint8_t b;
+			uint8_t a;
+		};
+	};
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+	inline color_quad_u8(eClear) : color_quad_u8(0, 0, 0, 0) { }
+
+	inline color_quad_u8(uint8_t cr, uint8_t cg, uint8_t cb, uint8_t ca)
+	{
+		set(cr, cg, cb, ca);
+	}
+
+	inline color_quad_u8(uint8_t cy = 0, uint8_t ca = 255)
+	{
+		set(cy, ca);
+	}
+
+	inline void clear()
+	{
+		set(0, 0, 0, 0);
+	}
+
+	inline color_quad_u8& set(uint8_t cy, uint8_t ca = 255)
+	{
+		m_c[0] = cy;
+		m_c[1] = cy;
+		m_c[2] = cy;
+		m_c[3] = ca;
+		return *this;
+	}
+
+	inline color_quad_u8& set(uint8_t cr, uint8_t cg, uint8_t cb, uint8_t ca)
+	{
+		m_c[0] = cr;
+		m_c[1] = cg;
+		m_c[2] = cb;
+		m_c[3] = ca;
+		return *this;
+	}
+
+	inline color_quad_u8& set_clamped(int cr, int cg, int cb, int ca)
+	{
+		m_c[0] = (uint8_t)clamp(cr, 0, 255);
+		m_c[1] = (uint8_t)clamp(cg, 0, 255);
+		m_c[2] = (uint8_t)clamp(cb, 0, 255);
+		m_c[3] = (uint8_t)clamp(ca, 0, 255);
+		return *this;
+	}
+
+	color_quad_u8& set_alpha(int ca) { a = (uint8_t)clamp(ca, 0, 255); return *this; }
+
+	inline uint8_t& operator[] (uint32_t i) { assert(i < 4);  return m_c[i]; }
+	inline uint8_t operator[] (uint32_t i) const { assert(i < 4); return m_c[i]; }
+		
+	inline int get_luma() const { return (13938U * m_c[0] + 46869U * m_c[1] + 4729U * m_c[2] + 32768U) >> 16U; } // REC709 weightings
+
+	inline bool operator== (const color_quad_u8& other) const
+	{
+		return (m_c[0] == other.m_c[0]) && (m_c[1] == other.m_c[1]) && (m_c[2] == other.m_c[2]) && (m_c[3] == other.m_c[3]);
+	}
+
+	inline bool operator!= (const color_quad_u8& other) const
+	{
+		return !(*this == other);
+	}
+
+	inline uint32_t squared_distance(const color_quad_u8& c, bool alpha = true) const
+	{
+		return square(r - c.r) + square(g - c.g) + square(b - c.b) + (alpha ? square(a - c.a) : 0);
+	}
+
+	inline bool rgb_equals(const color_quad_u8& rhs) const
+	{
+		return (r == rhs.r) && (g == rhs.g) && (b == rhs.b);
+	}
+};
+typedef std::vector<color_quad_u8> color_quad_u8_vec;
+
+inline uint32_t color_distance(bool perceptual, const color_quad_u8& e1, const color_quad_u8& e2, bool alpha)
+{
+	if (perceptual)
+	{
+		const float l1 = e1.r * .2126f + e1.g * .715f + e1.b * .0722f;
+		const float cr1 = e1.r - l1;
+		const float cb1 = e1.b - l1;
+
+		const float l2 = e2.r * .2126f + e2.g * .715f + e2.b * .0722f;
+		const float cr2 = e2.r - l2;
+		const float cb2 = e2.b - l2;
+
+		const float dl = l1 - l2;
+		const float dcr = cr1 - cr2;
+		const float dcb = cb1 - cb2;
+
+		uint32_t d = static_cast<uint32_t>(
+			32.0f * 4.0f * dl * dl + 
+			32.0f * 2.0f * (.5f / (1.0f - .2126f)) * (.5f / (1.0f - .2126f)) * dcr * dcr + 
+			32.0f * .25f * (.5f / (1.0f - .0722f)) * (.5f / (1.0f - .0722f)) * dcb * dcb);
+		
+		if (alpha)
+		{
+			int da = (int)e1.a - (int)e2.a;
+
+			d += static_cast<uint32_t>(128.0f * da * da);
+		}
+
+		return d;
+	}
+	else
+		return e1.squared_distance(e2, alpha);
+}
+
+extern color_quad_u8 g_white_color_u8, g_black_color_u8, g_red_color_u8, g_green_color_u8, g_blue_color_u8, g_yellow_color_u8, g_purple_color_u8, g_magenta_color_u8, g_cyan_color_u8;
+
+class image_u8
+{
+public:
+	image_u8() :
+		m_width(0), m_height(0),
+		m_clip_rect(cClear)
+	{
+	}
+
+	image_u8(uint32_t width, uint32_t height) :
+		m_width(width), m_height(height),
+		m_clip_rect(0, 0, width, height)
+	{
+		m_pixels.resize(width * height);
+	}
+
+	inline const color_quad_u8_vec& get_pixels() const { return m_pixels; }
+	inline color_quad_u8_vec& get_pixels() { return m_pixels; }
+
+	inline uint32_t width() const { return m_width; }
+	inline uint32_t height() const { return m_height; }
+	inline uint32_t total_pixels() const { return m_width * m_height; }
+	
+	inline const rect& get_clip_rect() const { return m_clip_rect; }
+
+	inline void set_clip_rect(const rect& r) 
+	{ 
+		assert((r.get_left() >= 0) && (r.get_top() >= 0) && (r.get_right() <= (int)m_width) && (r.get_bottom() <= (int)m_height));
+
+		m_clip_rect = r; 
+	}
+
+	inline void clear_clip_rect() { m_clip_rect.set(0, 0, m_width, m_height); }
+
+	inline bool is_clipped(int x, int y) const { return !m_clip_rect.contains(x, y); }
+	
+	inline rect get_bounds() const { return rect(0, 0, m_width, m_height); }
+
+	inline color_quad_u8& operator()(uint32_t x, uint32_t y) { assert((x < m_width) && (y < m_height));  return m_pixels[x + m_width * y]; }
+	inline const color_quad_u8& operator()(uint32_t x, uint32_t y) const { assert((x < m_width) && (y < m_height));  return m_pixels[x + m_width * y]; }
+
+	image_u8& clear()
+	{
+		m_width = m_height = 0;
+		m_clip_rect.clear();
+		m_pixels.clear();
+		return *this;
+	}
+
+	image_u8& init(uint32_t width, uint32_t height)
+	{
+		clear();
+
+		m_width = width;
+		m_height = height;
+		m_clip_rect.set(0, 0, width, height);
+		m_pixels.resize(width * height);
+		return *this;
+	}
+
+	image_u8& set_all(const color_quad_u8& p)
+	{
+		for (uint32_t i = 0; i < m_pixels.size(); i++)
+			m_pixels[i] = p;
+		return *this;
+	}
+
+	inline const color_quad_u8& get_clamped(int x, int y) const { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); }
+	inline color_quad_u8& get_clamped(int x, int y) { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); }
+
+	inline image_u8& set_pixel_clipped(int x, int y, const color_quad_u8& c)
+	{
+		if (!is_clipped(x, y))
+			(*this)(x, y) = c;
+		return *this;
+	}
+
+	inline image_u8& fill_box(int x, int y, int w, int h, const color_quad_u8& c)
+	{
+		for (int y_ofs = 0; y_ofs < h; y_ofs++)
+			for (int x_ofs = 0; x_ofs < w; x_ofs++)
+				set_pixel_clipped(x + x_ofs, y + y_ofs, c);
+		return *this;
+	}
+
+	void invert_box(int inX, int inY, int inW, int inH)
+	{
+		for (int y = 0; y < inH; y++)
+		{
+			const uint32_t yy = inY + y;
+
+			for (int x = 0; x < inW; x++)
+			{
+				const uint32_t xx = inX + x;
+
+				if (is_clipped(xx, yy))
+					continue;
+
+				color_quad_u8 c((*this)(xx, yy));
+
+				c.r = 255 - c.r;
+				c.g = 255 - c.g;
+				c.b = 255 - c.b;
+
+				set_pixel_clipped(xx, yy, c);
+			}
+		}
+	}
+
+	image_u8& crop_dup_borders(uint32_t w, uint32_t h)
+	{
+		const uint32_t orig_w = m_width, orig_h = m_height;
+
+		crop(w, h);
+
+		if (orig_w && orig_h)
+		{
+			if (m_width > orig_w)
+			{
+				for (uint32_t x = orig_w; x < m_width; x++)
+					for (uint32_t y = 0; y < m_height; y++)
+						set_pixel_clipped(x, y, get_clamped(std::min(x, orig_w - 1U), std::min(y, orig_h - 1U)));
+			}
+
+			if (m_height > orig_h)
+			{
+				for (uint32_t y = orig_h; y < m_height; y++)
+					for (uint32_t x = 0; x < m_width; x++)
+						set_pixel_clipped(x, y, get_clamped(std::min(x, orig_w - 1U), std::min(y, orig_h - 1U)));
+			}
+		}
+		return *this;
+	}
+
+	image_u8& crop(uint32_t new_width, uint32_t new_height)
+	{
+		if ((m_width == new_width) && (m_height == new_height))
+			return *this;
+
+		image_u8 new_image(new_width, new_height);
+
+		const uint32_t w = std::min(m_width, new_width);
+		const uint32_t h = std::min(m_height, new_height);
+
+		for (uint32_t y = 0; y < h; y++)
+			for (uint32_t x = 0; x < w; x++)
+				new_image(x, y) = (*this)(x, y);
+
+		return swap(new_image);
+	}
+
+	image_u8& swap(image_u8& other)
+	{
+		std::swap(m_width, other.m_width);
+		std::swap(m_height, other.m_height);
+		std::swap(m_pixels, other.m_pixels);
+		std::swap(m_clip_rect, other.m_clip_rect);
+		return *this;
+	}
+
+	// No clipping
+	inline void get_block(uint32_t bx, uint32_t by, uint32_t width, uint32_t height, color_quad_u8* pPixels) const
+	{
+		assert((bx * width + width) <= m_width);
+		assert((by * height + height) <= m_height);
+
+		for (uint32_t y = 0; y < height; y++)
+			memcpy(pPixels + y * width, &(*this)(bx * width, by * height + y), width * sizeof(color_quad_u8));
+	}
+
+	inline void get_block_clamped(uint32_t bx, uint32_t by, uint32_t width, uint32_t height, color_quad_u8* pPixels) const
+	{
+		for (uint32_t y = 0; y < height; y++)
+			for (uint32_t x = 0; x < width; x++)
+				pPixels[x + y * width] = get_clamped(bx * width + x, by * height + y);
+	}
+		
+	// No clipping
+	inline void set_block(uint32_t bx, uint32_t by, uint32_t width, uint32_t height, const color_quad_u8* pPixels)
+	{
+		assert((bx * width + width) <= m_width);
+		assert((by * height + height) <= m_height);
+
+		for (uint32_t y = 0; y < height; y++)
+			memcpy(&(*this)(bx * width, by * height + y), pPixels + y * width, width * sizeof(color_quad_u8));
+	}
+
+	image_u8& swizzle(uint32_t r, uint32_t g, uint32_t b, uint32_t a)
+	{
+		assert((r | g | b | a) <= 3);
+		for (uint32_t y = 0; y < m_height; y++)
+		{
+			for (uint32_t x = 0; x < m_width; x++)
+			{
+				color_quad_u8 tmp((*this)(x, y));
+				(*this)(x, y).set(tmp[r], tmp[g], tmp[b], tmp[a]);
+			}
+		}
+
+		return *this;
+	}
+
+	struct pixel_coord
+	{
+		uint16_t m_x, m_y;
+		pixel_coord() { }
+		pixel_coord(uint32_t x, uint32_t y) : m_x((uint16_t)x), m_y((uint16_t)y) { }
+	};
+		
+	uint32_t flood_fill(int x, int y, const color_quad_u8& c, const color_quad_u8& b, std::vector<pixel_coord>* pSet_pixels = nullptr);
+
+	void draw_line(int xs, int ys, int xe, int ye, const color_quad_u8& color);
+		
+	inline void set_pixel_clipped_alphablend(int x, int y, const color_quad_u8& c)
+	{
+		if (is_clipped(x, y))
+			return;
+
+		color_quad_u8 ct(m_pixels[x + y * m_width]);
+
+		ct.r = static_cast<uint8_t>(ct.r + ((c.r - ct.r) * c.a) / 255);
+		ct.g = static_cast<uint8_t>(ct.g + ((c.g - ct.g) * c.a) / 255);
+		ct.b = static_cast<uint8_t>(ct.b + ((c.b - ct.b) * c.a) / 255);
+		
+		m_pixels[x + y * m_width] = ct;
+	}
+
+private:
+	color_quad_u8_vec m_pixels;
+	uint32_t m_width, m_height;
+	rect m_clip_rect;
+
+	struct fill_segment
+	{
+		int16_t m_y, m_xl, m_xr, m_dy;
+
+		fill_segment(int y, int xl, int xr, int dy) :
+			m_y((int16_t)y), m_xl((int16_t)xl), m_xr((int16_t)xr), m_dy((int16_t)dy)
+		{
+		}
+	};
+
+	inline bool flood_fill_is_inside(int x, int y, const color_quad_u8& b) const
+	{
+		if (is_clipped(x, y))
+			return false;
+
+		return (*this)(x, y) == b;
+	}
+
+	void rasterize_line(int xs, int ys, int xe, int ye, int pred, int inc_dec, int e, int e_inc, int e_no_inc, const color_quad_u8& color);
+
+	void draw_aaline_pixel(int x, int y, int a, color_quad_u8 color)
+	{
+		color.a = static_cast<uint8_t>(255 - a);
+		set_pixel_clipped_alphablend(x, y, color);
+	}
+};
+
+bool load_png(const char* pFilename, image_u8& img);
+
+bool save_png(const char* pFilename, const image_u8& img, bool save_alpha);
+
+class image_metrics
+{
+public:
+	double m_max, m_mean, m_mean_squared, m_root_mean_squared, m_peak_snr;
+
+	image_metrics()
+	{
+		clear();
+	}
+
+	void clear()
+	{
+		memset(this, 0, sizeof(*this));
+	}
+
+	void compute(const image_u8& a, const image_u8& b, uint32_t first_channel, uint32_t num_channels)
+	{
+		const bool average_component_error = true;
+
+		const uint32_t width = std::min(a.width(), b.width());
+		const uint32_t height = std::min(a.height(), b.height());
+
+		assert((first_channel < 4U) && (first_channel + num_channels <= 4U));
+
+		// Histogram approach originally due to Charles Bloom.
+		double hist[256];
+		memset(hist, 0, sizeof(hist));
+
+		for (uint32_t y = 0; y < height; y++)
+		{
+			for (uint32_t x = 0; x < width; x++)
+			{
+				const color_quad_u8& ca = a(x, y);
+				const color_quad_u8& cb = b(x, y);
+
+				if (!num_channels)
+					hist[iabs(ca.get_luma() - cb.get_luma())]++;
+				else
+				{
+					for (uint32_t c = 0; c < num_channels; c++)
+						hist[iabs(ca[first_channel + c] - cb[first_channel + c])]++;
+				}
+			}
+		}
+
+		m_max = 0;
+		double sum = 0.0f, sum2 = 0.0f;
+		for (uint32_t i = 0; i < 256; i++)
+		{
+			if (!hist[i])
+				continue;
+
+			m_max = std::max<double>(m_max, i);
+
+			double x = i * hist[i];
+
+			sum += x;
+			sum2 += i * x;
+		}
+
+		// See http://richg42.blogspot.com/2016/09/how-to-compute-psnr-from-old-berkeley.html
+		double total_values = width * height;
+
+		if (average_component_error)
+			total_values *= clamp<uint32_t>(num_channels, 1, 4);
+
+		m_mean = clamp<double>(sum / total_values, 0.0f, 255.0f);
+		m_mean_squared = clamp<double>(sum2 / total_values, 0.0f, 255.0f * 255.0f);
+
+		m_root_mean_squared = sqrt(m_mean_squared);
+
+		if (!m_root_mean_squared)
+			m_peak_snr = 100.0f;
+		else
+			m_peak_snr = clamp<double>(log10(255.0f / m_root_mean_squared) * 20.0f, 0.0f, 100.0f);
+	}
+};
+
+class imagef
+{
+public:
+	imagef() :
+		m_width(0), m_height(0), m_pitch(0)
+	{
+	}
+
+	imagef(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) :
+		m_width(0), m_height(0), m_pitch(0)
+	{
+		resize(w, h, p);
+	}
+
+	imagef(const imagef& other) :
+		m_width(0), m_height(0), m_pitch(0)
+	{
+		*this = other;
+	}
+
+	imagef& swap(imagef& other)
+	{
+		std::swap(m_width, other.m_width);
+		std::swap(m_height, other.m_height);
+		std::swap(m_pitch, other.m_pitch);
+		m_pixels.swap(other.m_pixels);
+		return *this;
+	}
+
+	imagef& operator= (const imagef& rhs)
+	{
+		if (this != &rhs)
+		{
+			m_width = rhs.m_width;
+			m_height = rhs.m_height;
+			m_pitch = rhs.m_pitch;
+			m_pixels = rhs.m_pixels;
+		}
+		return *this;
+	}
+
+	imagef& clear()
+	{
+		m_width = 0;
+		m_height = 0;
+		m_pitch = 0;
+		m_pixels.resize(0);
+		return *this;
+	}
+
+	imagef& set(const image_u8& src, const vec4F& scale = vec4F(1), const vec4F& bias = vec4F(0))
+	{
+		const uint32_t width = src.width();
+		const uint32_t height = src.height();
+
+		resize(width, height);
+
+		for (int y = 0; y < (int)height; y++)
+		{
+			for (uint32_t x = 0; x < width; x++)
+			{
+				const color_quad_u8& src_pixel = src(x, y);
+				(*this)(x, y).set((float)src_pixel.r * scale[0] + bias[0], (float)src_pixel.g * scale[1] + bias[1], (float)src_pixel.b * scale[2] + bias[2], (float)src_pixel.a * scale[3] + bias[3]);
+			}
+		}
+
+		return *this;
+	}
+
+	imagef& resize(const imagef& other, uint32_t p = UINT32_MAX, const vec4F& background = vec4F(0, 0, 0, 1))
+	{
+		return resize(other.get_width(), other.get_height(), p, background);
+	}
+
+	imagef& resize(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const vec4F& background = vec4F(0, 0, 0, 1))
+	{
+		return crop(w, h, p, background);
+	}
+
+	imagef& set_all(const vec4F& c)
+	{
+		for (uint32_t i = 0; i < m_pixels.size(); i++)
+			m_pixels[i] = c;
+		return *this;
+	}
+
+	imagef& fill_box(uint32_t x, uint32_t y, uint32_t w, uint32_t h, const vec4F& c)
+	{
+		for (uint32_t iy = 0; iy < h; iy++)
+			for (uint32_t ix = 0; ix < w; ix++)
+				set_pixel_clipped(x + ix, y + iy, c);
+		return *this;
+	}
+
+	imagef& crop(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const vec4F& background = vec4F(0, 0, 0, 1))
+	{
+		if (p == UINT32_MAX)
+			p = w;
+
+		if ((w == m_width) && (m_height == h) && (m_pitch == p))
+			return *this;
+
+		if ((!w) || (!h) || (!p))
+		{
+			clear();
+			return *this;
+		}
+
+		vec4F_array cur_state;
+		cur_state.swap(m_pixels);
+
+		m_pixels.resize(p * h);
+
+		for (uint32_t y = 0; y < h; y++)
+		{
+			for (uint32_t x = 0; x < w; x++)
+			{
+				if ((x < m_width) && (y < m_height))
+					m_pixels[x + y * p] = cur_state[x + y * m_pitch];
+				else
+					m_pixels[x + y * p] = background;
+			}
+		}
+
+		m_width = w;
+		m_height = h;
+		m_pitch = p;
+
+		return *this;
+	}
+
+	inline const vec4F& operator() (uint32_t x, uint32_t y) const { assert(x < m_width&& y < m_height); return m_pixels[x + y * m_pitch]; }
+	inline vec4F& operator() (uint32_t x, uint32_t y) { assert(x < m_width&& y < m_height); return m_pixels[x + y * m_pitch]; }
+
+	inline const vec4F& get_clamped(int x, int y) const { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); }
+	inline vec4F& get_clamped(int x, int y) { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); }
+
+	inline const vec4F& get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) const
+	{
+		x = wrap_u ? posmod(x, m_width) : clamp<int>(x, 0, m_width - 1);
+		y = wrap_v ? posmod(y, m_height) : clamp<int>(y, 0, m_height - 1);
+		return m_pixels[x + y * m_pitch];
+	}
+
+	inline vec4F& get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v)
+	{
+		x = wrap_u ? posmod(x, m_width) : clamp<int>(x, 0, m_width - 1);
+		y = wrap_v ? posmod(y, m_height) : clamp<int>(y, 0, m_height - 1);
+		return m_pixels[x + y * m_pitch];
+	}
+
+	inline imagef& set_pixel_clipped(int x, int y, const vec4F& c)
+	{
+		if ((static_cast<uint32_t>(x) < m_width) && (static_cast<uint32_t>(y) < m_height))
+			(*this)(x, y) = c;
+		return *this;
+	}
+
+	// Very straightforward blit with full clipping. Not fast, but it works.
+	imagef& blit(const imagef& src, int src_x, int src_y, int src_w, int src_h, int dst_x, int dst_y)
+	{
+		for (int y = 0; y < src_h; y++)
+		{
+			const int sy = src_y + y;
+			if (sy < 0)
+				continue;
+			else if (sy >= (int)src.get_height())
+				break;
+
+			for (int x = 0; x < src_w; x++)
+			{
+				const int sx = src_x + x;
+				if (sx < 0)
+					continue;
+				else if (sx >= (int)src.get_height())
+					break;
+
+				set_pixel_clipped(dst_x + x, dst_y + y, src(sx, sy));
+			}
+		}
+
+		return *this;
+	}
+
+	const imagef& extract_block_clamped(vec4F* pDst, uint32_t src_x, uint32_t src_y, uint32_t w, uint32_t h) const
+	{
+		for (uint32_t y = 0; y < h; y++)
+			for (uint32_t x = 0; x < w; x++)
+				*pDst++ = get_clamped(src_x + x, src_y + y);
+		return *this;
+	}
+
+	imagef& set_block_clipped(const vec4F* pSrc, uint32_t dst_x, uint32_t dst_y, uint32_t w, uint32_t h)
+	{
+		for (uint32_t y = 0; y < h; y++)
+			for (uint32_t x = 0; x < w; x++)
+				set_pixel_clipped(dst_x + x, dst_y + y, *pSrc++);
+		return *this;
+	}
+
+	inline uint32_t get_width() const { return m_width; }
+	inline uint32_t get_height() const { return m_height; }
+	inline uint32_t get_pitch() const { return m_pitch; }
+	inline uint32_t get_total_pixels() const { return m_width * m_height; }
+
+	inline uint32_t get_block_width(uint32_t w) const { return (m_width + (w - 1)) / w; }
+	inline uint32_t get_block_height(uint32_t h) const { return (m_height + (h - 1)) / h; }
+	inline uint32_t get_total_blocks(uint32_t w, uint32_t h) const { return get_block_width(w) * get_block_height(h); }
+
+	inline const vec4F_array& get_pixels() const { return m_pixels; }
+	inline vec4F_array& get_pixels() { return m_pixels; }
+
+	inline const vec4F* get_ptr() const { return &m_pixels[0]; }
+	inline vec4F* get_ptr() { return &m_pixels[0]; }
+
+private:
+	uint32_t m_width, m_height, m_pitch;  // all in pixels
+	vec4F_array m_pixels;
+};
+
+enum
+{
+	cComputeGaussianFlagNormalize = 1,
+	cComputeGaussianFlagPrint = 2,
+	cComputeGaussianFlagNormalizeCenterToOne = 4
+};
+
+// size_x/y should be odd
+void compute_gaussian_kernel(float* pDst, int size_x, int size_y, float sigma_sqr, uint32_t flags);
+
+void gaussian_filter(imagef& dst, const imagef& orig_img, uint32_t odd_filter_width, float sigma_sqr, bool wrapping = false, uint32_t width_divisor = 1, uint32_t height_divisor = 1);
+
+vec4F compute_ssim(const imagef& a, const imagef& b);
+
+vec4F compute_ssim(const image_u8& a, const image_u8& b, bool luma);
+
+struct block8
+{
+	uint64_t m_vals[1];
+};
+
+typedef std::vector<block8> block8_vec;
+
+struct block16
+{
+	uint64_t m_vals[2];
+};
+
+typedef std::vector<block16> block16_vec;
+
+//bool save_dds(const char* pFilename, uint32_t width, uint32_t height, const void* pBlocks, uint32_t pixel_format_bpp, DXGI_FORMAT dxgi_format, bool srgb, bool force_dx10_header);
+
+void strip_extension(std::string& s);
+void strip_path(std::string& s);
+
+uint32_t hash_hsieh(const uint8_t* pBuf, size_t len);
+
+// https://www.johndcook.com/blog/standard_deviation/
+// This class is for small numbers of integers, so precision shouldn't be an issue.
+class tracked_stat
+{
+public:
+	tracked_stat() { clear(); }
+
+	void clear() { m_num = 0; m_total = 0; m_total2 = 0; }
+
+	void update(uint32_t val) { m_num++; m_total += val; m_total2 += val * val; }
+
+	tracked_stat& operator += (uint32_t val) { update(val); return *this; }
+
+	uint32_t get_number_of_values() const { return m_num; }
+	uint64_t get_total() const { return m_total; }
+	uint64_t get_total2() const { return m_total2; }
+
+	float get_mean() const { return m_num ? (float)m_total / m_num : 0.0f; };
+		
+	float get_variance() const { return m_num ? ((float)(m_num * m_total2 - m_total * m_total)) / (m_num * m_num) : 0.0f; }
+	float get_std_dev() const { return m_num ? sqrtf((float)(m_num * m_total2 - m_total * m_total)) / m_num : 0.0f; }
+
+	float get_sample_variance() const { return (m_num > 1) ? ((float)(m_num * m_total2 - m_total * m_total)) / (m_num * (m_num - 1)) : 0.0f; }
+	float get_sample_std_dev() const { return (m_num > 1) ? sqrtf(get_sample_variance()) : 0.0f; }
+
+private:
+	uint32_t m_num;
+	uint64_t m_total;
+	uint64_t m_total2;
+};
+
+inline float compute_covariance(const float* pA, const float* pB, const tracked_stat& a, const tracked_stat& b, bool sample)
+{
+	const uint32_t n = a.get_number_of_values();
+	assert(n == b.get_number_of_values());
+
+	if (!n)
+	{
+		assert(0);
+		return 0.0f;
+	}
+	if ((sample) && (n == 1))
+	{
+		assert(0);
+		return 0;
+	}
+
+	const float mean_a = a.get_mean();
+	const float mean_b = b.get_mean();
+	
+	float total = 0.0f;
+	for (uint32_t i = 0; i < n; i++)
+		total += (pA[i] - mean_a) * (pB[i] - mean_b);
+
+	return total / (sample ? (n - 1) : n);
+}
+
+inline float compute_correlation_coefficient(const float* pA, const float* pB, const tracked_stat& a, const tracked_stat& b, float c, bool sample)
+{
+	if (!a.get_number_of_values())
+		return 1.0f;
+
+	float covar = compute_covariance(pA, pB, a, b, sample);
+	float std_dev_a = sample ? a.get_sample_std_dev() : a.get_std_dev();
+	float std_dev_b = sample ? b.get_sample_std_dev() : b.get_std_dev();
+	float denom = std_dev_a * std_dev_b + c;
+
+	if (denom < .0000125f)
+		return 1.0f;
+
+	float result = (covar + c) / denom;
+	
+	return clamp(result, -1.0f, 1.0f);
+}
+
+float compute_block_max_std_dev(const color_quad_u8* pPixels, uint32_t block_width, uint32_t block_height, uint32_t num_comps);
+
+class rand
+{
+	std::mt19937 m_mt;
+
+public:
+	rand() {	}
+
+	rand(uint32_t s) { seed(s); }
+	void seed(uint32_t s) { m_mt.seed(s); }
+
+	// between [l,h]
+	int irand(int l, int h) { std::uniform_int_distribution<int> d(l, h); return d(m_mt); }
+
+	uint32_t urand32() { return static_cast<uint32_t>(irand(INT32_MIN, INT32_MAX)); }
+
+	bool bit() { return irand(0, 1) == 1; }
+
+	uint8_t byte() { return static_cast<uint8_t>(urand32()); }
+
+	// between [l,h)
+	float frand(float l, float h) { std::uniform_real_distribution<float> d(l, h); return d(m_mt); }
+
+	float gaussian(float mean, float stddev) { std::normal_distribution<float> d(mean, stddev); return d(m_mt); }
+};
+
+bool save_astc_file(const char* pFilename, block16_vec& blocks, uint32_t width, uint32_t height, uint32_t block_width, uint32_t block_height);
+bool load_astc_file(const char* pFilename, block16_vec& blocks, uint32_t& width, uint32_t& height, uint32_t& block_width, uint32_t& block_height);
+
+class value_stats
+{
+public:
+	value_stats()
+	{
+		clear();
+	}
+
+	void clear()
+	{
+		m_sum = 0;
+		m_sum2 = 0;
+		m_num = 0;
+		m_min = 1e+39;
+		m_max = -1e+39;
+		m_vals.clear();
+	}
+
+	void add(double val)
+	{
+		m_sum += val;
+		m_sum2 += val * val;
+
+		m_num++;
+
+		m_min = std::min(m_min, val);
+		m_max = std::max(m_max, val);
+
+		m_vals.push_back(val);
+	}
+
+	void add(int val)
+	{
+		add(static_cast<double>(val));
+	}
+
+	void add(uint32_t val)
+	{
+		add(static_cast<double>(val));
+	}
+
+	void add(int64_t val)
+	{
+		add(static_cast<double>(val));
+	}
+
+	void add(uint64_t val)
+	{
+		add(static_cast<double>(val));
+	}
+
+	void print(const char* pPrefix = "")
+	{
+		if (!m_vals.size())
+			printf("%s: Empty\n", pPrefix);
+		else
+			printf("%s: Samples: %llu, Total: %f, Avg: %f, Std Dev: %f, Min: %f, Max: %f, Mean: %f\n",
+				pPrefix, (unsigned long long)get_num(), get_total(), get_average(), get_std_dev(), get_min(), get_max(), get_mean());
+	}
+
+	double get_total() const
+	{
+		return m_sum;
+	}
+
+	double get_average() const
+	{
+		return m_num ? (m_sum / m_num) : 0.0f;
+	}
+
+	double get_min() const
+	{
+		return m_min;
+	}
+
+	double get_max() const
+	{
+		return m_max;
+	}
+
+	uint64_t get_num() const
+	{
+		return m_num;
+	}
+
+	double get_val(uint32_t index) const
+	{
+		return m_vals[index];
+	}
+
+	// Returns population standard deviation
+	double get_std_dev() const
+	{
+		if (!m_num)
+			return 0.0f;
+		
+		// TODO: FP precision
+		return sqrt((m_sum2 - ((m_sum * m_sum) / m_num)) / m_num);
+	}
+
+	double get_mean() const
+	{
+		if (!m_num)
+			return 0.0f;
+
+		std::vector<double> sorted_vals(m_vals);
+		std::sort(sorted_vals.begin(), sorted_vals.end());
+		
+		return sorted_vals[sorted_vals.size() / 2];
+	}
+
+private:
+	double m_sum;
+	double m_sum2;
+
+	uint64_t m_num;
+
+	double m_min;
+	double m_max;
+
+	mutable std::vector<double> m_vals;
+};
+
+uint32_t get_deflate_size(const void* pData, size_t data_size);
+
+} // namespace utils
+
+#ifdef _MSC_VER
+#pragma warning (pop)
+#endif
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index 80568fdf..82c35f44 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -2524,29 +2524,29 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
                     uberLevel = 0;
                     maxPartitions = 0;
                     bc7params.m_try_least_squares = false;
-                    bc7params.m_mode_partition_estimation_filterbank = true;
+                    bc7params.m_mode17_partition_estimation_filterbank = true;
                 }
                 else if (info.quality <= 40) {
                     uberLevel = 0;
                     maxPartitions = 16;
                     bc7params.m_try_least_squares = false;
-                    bc7params.m_mode_partition_estimation_filterbank = true;
+                    bc7params.m_mode17_partition_estimation_filterbank = true;
                 }
                 else if (info.quality <= 90) {
                     uberLevel = 1;
                     maxPartitions = 64;
                     bc7params.m_try_least_squares = true;  // true = 0.7s on test case
-                    bc7params.m_mode_partition_estimation_filterbank = true;
+                    bc7params.m_mode17_partition_estimation_filterbank = true;
                 }
                 else {
                     uberLevel = 4;
                     maxPartitions = 64;
                     bc7params.m_try_least_squares = true;
-                    bc7params.m_mode_partition_estimation_filterbank = true;
+                    bc7params.m_mode17_partition_estimation_filterbank = true;
                 }
 
                 bc7params.m_uber_level = std::min(uberLevel, (uint32_t)BC7ENC_MAX_UBER_LEVEL);
-                bc7params.m_max_partitions_mode = std::min(maxPartitions, (uint32_t)BC7ENC_MAX_PARTITIONS1);
+                bc7params.m_max_partitions = std::min(maxPartitions, (uint32_t)BC7ENC_MAX_PARTITIONS);
             }
             else if (info.pixelFormat == MyMTLPixelFormatBC1_RGBA ||
                      info.pixelFormat == MyMTLPixelFormatBC1_RGBA_sRGB ||

From 73f333ff2ce96f6c6aef358545f95f017b8073cc Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 16 Jul 2022 09:08:12 -0700
Subject: [PATCH 333/901] kram - fix etc2 encoder

This has an optimization to knock out color on transparent pixels, but that only works if encoding a premul texture.  kram already knocks out the color pixels in that case.  This was causing artifacts in rgb where alpha was fully 0 across the block on 4 channel ETC2 rgba textures.
---
 libkram/etc2comp/EtcBlock4x4Encoding_ETC1.cpp  | 2 +-
 libkram/etc2comp/EtcBlock4x4Encoding_RGB8.cpp  | 7 ++++++-
 libkram/etc2comp/EtcBlock4x4Encoding_RGBA8.cpp | 6 +++++-
 3 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/libkram/etc2comp/EtcBlock4x4Encoding_ETC1.cpp b/libkram/etc2comp/EtcBlock4x4Encoding_ETC1.cpp
index 360e302e..f591a8bb 100644
--- a/libkram/etc2comp/EtcBlock4x4Encoding_ETC1.cpp
+++ b/libkram/etc2comp/EtcBlock4x4Encoding_ETC1.cpp
@@ -409,7 +409,7 @@ namespace Etc
 			ColorFloatRGBA frgbaSumUR = m_pafrgbaSource[8] + m_pafrgbaSource[9] + m_pafrgbaSource[12] + m_pafrgbaSource[13];
 			ColorFloatRGBA frgbaSumLR = m_pafrgbaSource[10] + m_pafrgbaSource[11] + m_pafrgbaSource[14] + m_pafrgbaSource[15];
 
-            // aveerage value of 8 pixels for each of the 4 corners
+            // average value of 8 pixels for each of the 4 corners
 			m_frgbaSourceAverageLeft = (frgbaSumUL + frgbaSumLL) * 0.125f;
 			m_frgbaSourceAverageRight = (frgbaSumUR + frgbaSumLR) * 0.125f;
 			m_frgbaSourceAverageTop = (frgbaSumUL + frgbaSumUR) * 0.125f;
diff --git a/libkram/etc2comp/EtcBlock4x4Encoding_RGB8.cpp b/libkram/etc2comp/EtcBlock4x4Encoding_RGB8.cpp
index a6f0f125..fe593a26 100644
--- a/libkram/etc2comp/EtcBlock4x4Encoding_RGB8.cpp
+++ b/libkram/etc2comp/EtcBlock4x4Encoding_RGB8.cpp
@@ -451,7 +451,8 @@ namespace Etc
 				float fGrayDistance2ToColor2 = CalcGrayDistance2(m_pafrgbaSource[uiPixel], m_frgbaOriginalColor2_TAndH);
 
 				ColorFloatRGBA frgbaAlphaWeightedSource = m_pafrgbaSource[uiPixel] * alpha;
-					
+                frgbaAlphaWeightedSource.fA = 1.0f;
+                
 				if (fGrayDistance2ToColor1 <= fGrayDistance2ToColor2)
 				{
 					fPixelsCloserToColor1 += alpha;
@@ -468,9 +469,13 @@ namespace Etc
 				break;
 			}
 
+            // this doesn't scale alpha
 			ColorFloatRGBA frgbAvgColor1Pixels = (frgbSumPixelsCloserToColor1 * (1.0f / fPixelsCloserToColor1)).QuantizeR4G4B4();
 			ColorFloatRGBA frgbAvgColor2Pixels = (frgbSumPixelsCloserToColor2 * (1.0f / fPixelsCloserToColor2)).QuantizeR4G4B4();
 
+            frgbAvgColor1Pixels.fA = 1.0f;
+            frgbAvgColor2Pixels.fA = 1.0f;
+            
 			if (frgbAvgColor1Pixels.fR == m_frgbaOriginalColor1_TAndH.fR &&
 				frgbAvgColor1Pixels.fG == m_frgbaOriginalColor1_TAndH.fG &&
 				frgbAvgColor1Pixels.fB == m_frgbaOriginalColor1_TAndH.fB &&
diff --git a/libkram/etc2comp/EtcBlock4x4Encoding_RGBA8.cpp b/libkram/etc2comp/EtcBlock4x4Encoding_RGBA8.cpp
index 2c9fcdaa..ea0a2427 100644
--- a/libkram/etc2comp/EtcBlock4x4Encoding_RGBA8.cpp
+++ b/libkram/etc2comp/EtcBlock4x4Encoding_RGBA8.cpp
@@ -145,7 +145,7 @@ namespace Etc
             m_boolDone = true;
         }
         else if ((sourceAlphaMix == Block4x4::SourceAlphaMix::ALL_ZERO_ALPHA) ||
-                (sourceAlphaMix == Block4x4::SourceAlphaMix::TRANSPARENT))
+                 (sourceAlphaMix == Block4x4::SourceAlphaMix::TRANSPARENT))
         {
             // set the A8 portion
             m_fBase = 0;
@@ -504,6 +504,9 @@ namespace Etc
 		{
             m_alpha.PerformIteration(a_fEffort);
             
+            /* TODO: can only do this if color if encoding premul color
+                but kram already knocks out all the color channels in this cae
+             
             // this skips writing out color too
             if (m_pblockParent->GetSourceAlphaMix() == Block4x4::SourceAlphaMix::TRANSPARENT)
             {
@@ -530,6 +533,7 @@ namespace Etc
                 m_boolDone = true;
                 //m_uiEncodingIterations++;
             }
+            */
 		}
 
         if (!m_boolDone)

From 69c5f44de9efe493cec11760d286db52dcc00b7c Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 16 Jul 2022 09:58:33 -0700
Subject: [PATCH 334/901] kramv - fix decode

Was referencing KRAM_SSE which didn't exist instead of USE_SSE
---
 kramv/KramLoader.mm         | 2 +-
 libkram/kram/TaskSystem.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index 97a08efe..8876ac42 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -74,7 +74,7 @@ - (instancetype)init
 
 // for macOS/win Intel need to decode astc/etc
 // on macOS/arm, the M1 supports all 3 encode formats
-#define DO_DECODE KRAM_SSE
+#define DO_DECODE USE_SSE
 
 #if DO_DECODE
 
diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index f709b22b..8fa1470b 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -480,7 +480,7 @@ static void setThreadAffinity(std::thread::native_handle_type handle, uint32_t t
     
 #if KRAM_MAC
     // don't use this, it's unsupported on ARM chips, and only affinity hints on x64
-//    #if KRAM_SSE
+//    #if USE_SSE
 //    if (!coreInfo.isTranslated) {
 //        thread_affinity_policy_data_t policy = { (int)affinityMask };
 //

From e83124266b5b577353b70e34e5e4ddd25279cb2e Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 16 Jul 2022 13:28:00 -0700
Subject: [PATCH 335/901] kram - decode ETC2 on M1 if using 3d textures

M1 supports all texture formats, but apparently not on 3d texture types - only ASTC/BC there I guess.
---
 kramv/KramLoader.mm | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index 8876ac42..04c5d010 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -72,25 +72,26 @@ - (instancetype)init
                       originalFormat:originalFormat];
 }
 
-// for macOS/win Intel need to decode astc/etc
-// on macOS/arm, the M1 supports all 3 encode formats
-#define DO_DECODE USE_SSE
 
-#if DO_DECODE
 
 // this means format isnt supported on platform, but can be decoded to rgba to
 // display
-bool isDecodeImageNeeded(MyMTLPixelFormat pixelFormat)
+bool isDecodeImageNeeded(MyMTLPixelFormat pixelFormat, MyMTLTextureType type)
 {
     bool needsDecode = false;
 
+#if USE_SSE
     if (isETCFormat(pixelFormat)) {
         needsDecode = true;
     }
     else if (isASTCFormat(pixelFormat)) {
         needsDecode = true;
     }
-
+#else
+    if (isETCFormat(pixelFormat) && type == MyMTLTextureType3D) {
+        needsDecode = true;
+    }
+#endif
     return needsDecode;
 }
 
@@ -98,7 +99,7 @@ bool decodeImage(const KTXImage &image, KTXImage &imageDecoded)
 {
     KramDecoderParams decoderParams;
     KramDecoder decoder;
-
+#if USE_SSE
     if (isETCFormat(image.pixelFormat)) {
         if (!decoder.decode(image, imageDecoded, decoderParams)) {
             return NO;
@@ -109,6 +110,13 @@ bool decodeImage(const KTXImage &image, KTXImage &imageDecoded)
             return NO;
         }
     }
+#else
+    if (isETCFormat(image.pixelFormat) && image.textureType == MyMTLTextureType3D) {
+        if (!decoder.decode(image, imageDecoded, decoderParams)) {
+            return NO;
+        }
+    }
+#endif
     else {
         assert(false);  // don't call this routine if decode not needed
     }
@@ -119,8 +127,6 @@ bool decodeImage(const KTXImage &image, KTXImage &imageDecoded)
     return YES;
 }
 
-#endif
-
 #if SUPPORT_RGB
 
 // TODO: move these into libkram
@@ -260,8 +266,7 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format)
         *originalFormat = (MTLPixelFormat)image.pixelFormat;
     }
 
-#if DO_DECODE
-    if (isDecodeImageNeeded(image.pixelFormat)) {
+    if (isDecodeImageNeeded(image.pixelFormat, image.textureType)) {
         KTXImage imageDecoded;
         if (!decodeImage(image, imageDecoded)) {
             return nil;
@@ -270,7 +275,6 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format)
         return [self blitTextureFromImage:imageDecoded name:name];
     }
     else
-#endif
     {
         // fast load path directly from mmap'ed data, decompress direct to staging
         return [self blitTextureFromImage:image name:name];

From 8a1ea4367f547cb71e8e456389a021a40f5d8a9a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 23 Jul 2022 12:03:50 -0700
Subject: [PATCH 336/901] kram - return failure if src data from dds is
 incomplete

---
 libkram/kram/Kram.cpp          | 10 +++++-----
 libkram/kram/KramDDSHelper.cpp |  5 +++++
 libkram/lodepng/lodepng.h      |  6 ++++--
 3 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 9880a010..01e105dc 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -52,7 +52,7 @@ namespace kram {
 using namespace NAMESPACE_STL;
 
 // lodepng iccp decode is failing when setting this for some reason, find out why
-// Must set in with LODEPNG_NO_COMPILE_ZLIB in lodepng.h if true
+// Must set it with LODEPNG_NO_COMPILE_ZLIB in lodepng.h if true
 static bool useMiniZ = false;
 
 template <typename T>
@@ -64,22 +64,22 @@ void releaseVector(vector<T>& v)
 
 bool isKTXFilename(const char* filename)
 {
-    // should really lookg at first 4 bytes of data
+    // should really look at first 4 bytes of data
     return endsWithExtension(filename, ".ktx");
 }
 bool isKTX2Filename(const char* filename)
 {
-    // should really lookg at first 4 bytes of data
+    // should really look at first 4 bytes of data
     return endsWithExtension(filename, ".ktx2");
 }
 bool isDDSFilename(const char* filename)
 {
-    // should really lookg at first 4 bytes of data
+    // should really look at first 4 bytes of data
     return endsWithExtension(filename, ".dds");
 }
 bool isPNGFilename(const char* filename)
 {
-    // should really lookg at first 4 bytes of data
+    // should really look at first 4 bytes of data
     return endsWithExtension(filename, ".png");
 }
 
diff --git a/libkram/kram/KramDDSHelper.cpp b/libkram/kram/KramDDSHelper.cpp
index 595b3390..af3909d6 100644
--- a/libkram/kram/KramDDSHelper.cpp
+++ b/libkram/kram/KramDDSHelper.cpp
@@ -511,6 +511,11 @@ bool DDSHelper::load(const uint8_t* data, size_t dataSize, KTXImage& image, bool
                 size_t dstOffset = image.chunkOffset(mipNum, chunkNum);
                 size_t mipLength = image.mipLevels[mipNum].length;
         
+                if ((mipDataOffset + srcOffset + mipLength) > dataSize) {
+                    KLOGE("kram", "source image data incomplete");
+                    return false;
+                }
+                
                 memcpy(dstImageData + dstOffset, srcImageData + srcOffset, mipLength);
                 
                 srcOffset += mipLength;
diff --git a/libkram/lodepng/lodepng.h b/libkram/lodepng/lodepng.h
index 785e777f..524bca4f 100644
--- a/libkram/lodepng/lodepng.h
+++ b/libkram/lodepng/lodepng.h
@@ -36,9 +36,11 @@ extern const char* LODEPNG_VERSION_STRING;
 
 // don't need io
 #define LODEPNG_NO_COMPILE_DISK
-// using miniz now
+
+// using miniz now, but this was failing using miniz so switched off
 //#define LODEPNG_NO_COMPILE_ZLIB
-// was not doing png saves, but to strip blocks now need to
+
+// was not doing png encodes, but to strip blocks now need to
 #define LODEPNG_COMPILE_ENCODER
 
 /*

From a3e8ff83e86cd833ae77910668127b5bbe815b60 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 25 Jul 2022 21:43:55 -0700
Subject: [PATCH 337/901] kramv - fix buildShaders.sh script

This wasn't including all shaders, and is simpler to build the metallib while skipping the .air files.  Can drop this onto kramv to hotload shaders by re-opening it in recent file list.
---
 scripts/buildShaders.sh | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/scripts/buildShaders.sh b/scripts/buildShaders.sh
index 1c01481f..d52cc060 100755
--- a/scripts/buildShaders.sh
+++ b/scripts/buildShaders.sh
@@ -1,7 +1,6 @@
 #!/bin/zsh
 
-xcrun -sdk macosx metal -c ../kramv/KramShaders.metal -o ../bin/KramShaders.air
-xcrun -sdk macosx metallib ../bin/KramShaders.air -o ../bin/KramShaders.metallib
-
-# don't need this after metallib built
-rm ../bin/KramShaders.air
\ No newline at end of file
+# run from kram directory
+pushd kramv/Shaders
+xcrun -sdk macosx metal KramShaders.metal skybox.metal pbr.metal hdr.metal brdf.metal -o ../../bin/KramShaders.metallib
+popd

From 0d458babb1ec324ba0c6a1d906bcc646d30f735c Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 27 Jul 2022 10:10:50 -0700
Subject: [PATCH 338/901] kram - expose sdfThreshold, and improve walk of
 samples

Getting more images that are 8-bit gray, and the fixed 120 cutoff isn't enough.  So expose that to callers via -sdfThreshold
Also walk the data in hedistance more in line with the original.
---
 libkram/heman/hedistance.cpp   | 67 ++++++++++++++++++++--------------
 libkram/kram/Kram.cpp          | 20 +++++++++-
 libkram/kram/KramImage.cpp     |  2 +-
 libkram/kram/KramImageInfo.cpp |  2 +
 libkram/kram/KramImageInfo.h   |  5 +++
 libkram/kram/KramSDFMipper.cpp |  5 ++-
 libkram/kram/KramSDFMipper.h   |  2 +-
 7 files changed, 71 insertions(+), 32 deletions(-)

diff --git a/libkram/heman/hedistance.cpp b/libkram/heman/hedistance.cpp
index ab5802cf..0f03de07 100644
--- a/libkram/heman/hedistance.cpp
+++ b/libkram/heman/hedistance.cpp
@@ -114,16 +114,21 @@ static void heman_image_destroy(heman_image* img)
 // and an array of (w * h * nbands) floats, in scanline order.  For simplicity
 // the API disallows struct definitions, so this is just an opaque handle.
 
+using hfloat = float;
+
 // 1E20 isn't big enough to process a 2k image with 1 pixel in each corner
-const float INF = 1E20;
+const hfloat INF = 1E20;
 // 2k max image is 11-bits x squared = 22 + 1
 // this is also the limit of single-float precision in the mantissa
-//const float INF = 1E23;
+//const hfloat INF = 1E23;
 
 #define NEW(t, n) (t*)calloc(n, sizeof(t))
 #define SQR(x) ((x) * (x))
 #define MAX(a, b) ((a) > (b) ? (a) : (b))
 
+// Compare here
+// https://github.com/prideout/heman/blob/master/src/distance.c
+
 // @ 8k x 8K resolution, this needs 1/2 GB for the fp32 buffers
 // so it resizes to the dst area.  It doesn't re-eval off parabolas until the very end.
 // and needs first pass data for second pass.
@@ -131,8 +136,9 @@ const float INF = 1E20;
 // This is really the sedt (squared euclidian distance transform).
 // The advantage is this can be stored in integer values, but this does parabolic lookup.
 // Also since EDT is done as a separable filter, it completes very quickly in O(N) of two passes.
+
 static void squared_edt(
-    const float* f, float* d, float* z, int32_t* w, int32_t numSrcSamples, int32_t numDstSamples)
+    const hfloat* f, hfloat* d, hfloat* z, int32_t* w, int32_t numSrcSamples, int32_t numDstSamples)
 {
     // hull vertices
     w[0] = 0;
@@ -141,19 +147,20 @@ static void squared_edt(
     z[0] = -INF;
     z[1] = +INF;
     
-    for (int32_t k = 0, q = 1; q < numSrcSamples; ++q) {
+    int32_t k = 0;
+    for (int32_t q = 1; q < numSrcSamples; ++q) {
         int32_t wk = w[k];
-        float s;
+        const hfloat sConst = (f[q] + SQR(q));
         
-        s = ((f[q] - f[wk]) + (float)(SQR(q) - SQR(wk))) / (float)(2 * (q - wk));
+        hfloat s = (sConst - f[wk] - SQR(wk)) / (2 * (q - wk));
         
         // this additional parabolic search completes in 0 or 1 iterations, so algorithm still O(n)
-        // sarch back and replace any higher parabola
+        // search back and replace any higher parabola
         while (s <= z[k]) {
             --k;
             wk = w[k];
             
-            s = ((f[q] - f[wk]) + (float)(SQR(q) - SQR(wk))) / (float)(2 * (q - wk));
+            s = (sConst - f[wk] - SQR(wk)) / (2 * (q - wk));
         }
         
         k++;
@@ -169,24 +176,28 @@ static void squared_edt(
     // Note: this can resample do a different sample count, since the stored parabolas
     // can be evaluated at any point along the curve.
     bool isResampling = numSrcSamples > numDstSamples;
-    float conversion = (numSrcSamples / (float)numDstSamples);
+    float conversion = (numDstSamples / (float)numSrcSamples);
     
-    for (int32_t k = 0, q = 0; q < numDstSamples; ++q) {
-        float qSrc = (float)q;
-        // convert q in dstSamples into sample in srcSamples
-        if (isResampling) {
-            qSrc *= conversion;
+    k = 0;
+    for (int32_t q = 0; q < numSrcSamples; ++q) {
+        // lookup the parabola, and evalute distance-squared from that
+        while (z[k + 1] < q) {
+            ++k;
         }
         
-        // lookup the parabola, and evalute distance squared from that
-        while (z[k + 1] < qSrc) {
-            ++k;
+        // convert to dst sample
+        int32_t qDst = q;
+        if (isResampling) {
+            // this may overwrite the same value > 1 time
+            // TODO: what if this skips an entry in d[]? - is that possible
+            qDst = (int32_t)((float)q * conversion); // don't roundf
+            assert(qDst < numDstSamples);
         }
-        int32_t wk = w[k];
-        d[q] = f[wk] + (float)SQR(qSrc - (float)wk);
         
-        // above is adding intersection height to existing sample
+        // add intersection height to existing sample
         // of the lowest point intersection
+        int32_t wk = w[k];
+        d[qDst] = f[wk] + SQR(q - wk);
     }
 }
 
@@ -202,18 +213,18 @@ static void transform_to_distance(heman_image* temp, const my_image* src, int32_
     assert(srcWidth >= dstWidth && srcHeight >= dstHeight);
     
     // these can all just be strip buffers per thread, but only one thread
-    // these were originall turned into 2d arrays for omp
+    // these were originally turned into 2d arrays for omp
     int32_t maxDim = MAX(srcWidth, srcHeight);
-    float* f = NEW(float, maxDim);
-    float* d = NEW(float, maxDim);
-    float* z = NEW(float, maxDim+1); // padded by 1
+    hfloat* f = NEW(hfloat, maxDim);
+    hfloat* d = NEW(hfloat, maxDim);
+    hfloat* z = NEW(hfloat, maxDim+1); // padded by 1
     int32_t* w = NEW(int32_t, maxDim);
 
     // process rows
     for (int32_t y = 0; y < srcHeight; ++y) {
         const uint8_t* s = src->data + y * srcWidth;
         
-        // load data into the rows, this is because tmp width is dstWidth, not srcWidth
+        // load data into the rows,
         if (isPositive) {
             for (int32_t x = 0; x < srcWidth; ++x) {
                 f[x] = s[x] ? INF : 0;
@@ -229,6 +240,7 @@ static void transform_to_distance(heman_image* temp, const my_image* src, int32_
         // this is only pulling from closest parabola, not bilerping
         squared_edt(f, d, z, w, srcWidth, dstWidth);
         
+        // now have dstWidth * srcHeight image for column pass below
         float* t = temp->data + y * dstWidth;
         for (int32_t x = 0; x < dstWidth; ++x) {
             t[x] = d[x];
@@ -237,6 +249,7 @@ static void transform_to_distance(heman_image* temp, const my_image* src, int32_
     
     // process columns
     for (int32_t x = 0; x < dstWidth; ++x) {
+        // Note offset by x references a specific column
         float* t = temp->data + x;
         
         for (int32_t y = 0; y < srcHeight; ++y) {
@@ -247,8 +260,9 @@ static void transform_to_distance(heman_image* temp, const my_image* src, int32_
         // this is only pulling from closest parabola, not bilerping
         squared_edt(f, d, z, w, srcHeight, dstHeight);
         
+        // can write over the same src column from t, it's already offet by x
         for (int32_t y = 0; y < dstHeight; ++y) {
-            t[y * dstWidth] = sqrtf(d[y]); // back to distance
+            t[y * dstWidth] = sqrtf(d[y]); // convert d^2 -> d
         }
     }
     
@@ -283,7 +297,6 @@ void heman_distance_create_sdf(const my_image* src, my_image* dst, float& maxD,
     transform_to_distance(negative, src, dstHeight, false);
     
     if (maxD == 0) {
-        // now find signed distance, and store back into positive array
         float minV = 0.0f, maxV = 0.0f;
         
         for (int32_t y = 0; y < dstHeight; ++y) {
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 01e105dc..3ff68818 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -1688,6 +1688,7 @@ void kramEncodeUsage(bool showVersion = true)
           "\t [-swizzle rg01]\n"
           "\t [-avg rxbx]\n"
           "\t [-sdf]\n"
+          "\t [-sdfThreshold 120]\n"
           "\t [-premul] [-prezero] [-premulrgb]\n"
           "\t [-gray]\n"
           "\t [-optopaque]\n"
@@ -1767,7 +1768,9 @@ void kramEncodeUsage(bool showVersion = true)
           "\tNormal map rg storage signed for etc/bc (rg01), only unsigned astc L+A (gggr).\n"
           "\t-sdf"
           "\tGenerate single-channel SDF from a bitmap, can mip and drop large mips. Encode to r8, bc4, etc2r, astc4x4 (Unorm LLL1) to encode\n"
-
+          "\t-sdfThreshold 120"
+          "\tSDF generation uses bitmap converted from 8-bit red channel\n"
+          
           "\t-gray"
           "\tConvert to grayscale before premul\n"
 
@@ -2655,6 +2658,21 @@ static int32_t kramAppEncode(vector<const char*>& args)
         if (isStringEqual(word, "-sdf")) {
             infoArgs.doSDF = true;
         }
+        else if (isStringEqual(word, "-sdfThreshold")) {
+            ++i;
+            if (i >= argc) {
+                KLOGE("Kram", "sdfThreshold arg invalid");
+                error = true;
+                break;
+            }
+            
+            infoArgs.sdfThreshold = atoi(args[i]);
+            if (infoArgs.sdfThreshold < 1 || infoArgs.sdfThreshold > 255) {
+                KLOGE("Kram", "sdfThreshold arg invalid");
+                error = true;
+                break;
+            }
+        }
         else if (isStringEqual(word, "-optopaque")) {
             infoArgs.optimizeFormatForOpaque = true;
         }
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index 82c35f44..c0c813f9 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -2154,7 +2154,7 @@ bool KramEncoder::createMipsFromChunks(
             }
 
             if (info.doSDF) {
-                sdfMipper.init(srcImage, info.isVerbose);
+                sdfMipper.init(srcImage, info.sdfThreshold, info.isVerbose);
             }
             else {
                 // copy and convert to half4 or float4 image
diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp
index 9b0e1fdf..0d42132c 100644
--- a/libkram/kram/KramImageInfo.cpp
+++ b/libkram/kram/KramImageInfo.cpp
@@ -1015,6 +1015,8 @@ void ImageInfo::initWithArgs(const ImageInfoArgs& args)
     isNormal = args.isNormal;
 
     doSDF = args.doSDF;
+    sdfThreshold = args.sdfThreshold;
+    
     //skipImageLength = args.skipImageLength;
 
     // mips
diff --git a/libkram/kram/KramImageInfo.h b/libkram/kram/KramImageInfo.h
index b7d53ca2..1241a1f3 100644
--- a/libkram/kram/KramImageInfo.h
+++ b/libkram/kram/KramImageInfo.h
@@ -85,6 +85,8 @@ class ImageInfoArgs {
     int32_t chunksX = 0;
     int32_t chunksY = 0;
     int32_t chunksCount = 0;
+    
+    int32_t sdfThreshold = 120;
 };
 
 // preset data that contains all inputs about the encoding
@@ -179,6 +181,9 @@ class ImageInfo {
     int32_t chunksX = 0;
     int32_t chunksY = 0;
     int32_t chunksCount = 0;
+    
+    // This converts incoming image channel to bitmap
+    int32_t sdfThreshold = 120;
 };
 
 bool isSwizzleValid(const char* swizzle);
diff --git a/libkram/kram/KramSDFMipper.cpp b/libkram/kram/KramSDFMipper.cpp
index 2c8d850b..4a8d7845 100644
--- a/libkram/kram/KramSDFMipper.cpp
+++ b/libkram/kram/KramSDFMipper.cpp
@@ -14,14 +14,15 @@ namespace kram {
 using namespace heman;
 using namespace NAMESPACE_STL;
 
-void SDFMipper::init(ImageData& srcImage, bool isVerbose_)
+void SDFMipper::init(ImageData& srcImage, uint8_t sdfThreshold, bool isVerbose_)
 {
     // this resets maxD, which is determined off first mip generated
     // all mips are using same source, so distances should be same range to
     // scale
     maxD = 0.0;
     isVerbose = isVerbose_;
-
+    threshold = sdfThreshold;
+    
     int32_t w = srcImage.width;
     int32_t h = srcImage.height;
 
diff --git a/libkram/kram/KramSDFMipper.h b/libkram/kram/KramSDFMipper.h
index 4fae0da5..8bae8e4f 100644
--- a/libkram/kram/KramSDFMipper.h
+++ b/libkram/kram/KramSDFMipper.h
@@ -22,7 +22,7 @@ struct my_image {
 
 class SDFMipper {
 public:
-    void init(ImageData& srcImage, bool isVerbose = false);
+    void init(ImageData& srcImage, uint8_t sdfThreshold, bool isVerbose = false);
     void mipmap(ImageData& dstImage, int32_t mipLevel);
 
 private:

From 1a6291ca92fda506718f53a00a08c2e8c50f2b74 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 27 Jul 2022 17:04:01 -0700
Subject: [PATCH 339/901] kram - fix numSkippedMips on sdf

This can export mips without building from the larger mips
---
 libkram/kram/KramImage.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index c0c813f9..4d6db30c 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -2197,10 +2197,9 @@ bool KramEncoder::createMipsFromChunks(
             }
             else {
                 if (info.doSDF) {
-                    // have to process all images to SDF
                     // sdf mipper has to build from origin sourceImage
                     // but it can in-place write to the same dstImage
-                    sdfMipper.mipmap(dstImageData, mipLevel);
+                    sdfMipper.mipmap(dstImageData, mipLevel + numSkippedMips);
 
                     w = dstImageData.width;
                     h = dstImageData.height;

From 796f511f997d2213a738c9d95b6d266c5eca099a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 27 Jul 2022 17:28:27 -0700
Subject: [PATCH 340/901] kram/v - bump to macOS 11.0

---
 build2/kramc.xcodeproj/project.pbxproj |  4 ++--
 build2/kramv.xcodeproj/project.pbxproj | 12 ++++++------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/build2/kramc.xcodeproj/project.pbxproj b/build2/kramc.xcodeproj/project.pbxproj
index e73338b2..72bd473b 100644
--- a/build2/kramc.xcodeproj/project.pbxproj
+++ b/build2/kramc.xcodeproj/project.pbxproj
@@ -281,7 +281,7 @@
 				GCC_WARN_SHADOW = YES;
 				GCC_WARN_STRICT_SELECTOR_MATCH = YES;
 				HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram";
-				MACOSX_DEPLOYMENT_TARGET = 10.15;
+				MACOSX_DEPLOYMENT_TARGET = 11.0;
 				PRODUCT_NAME = kram;
 			};
 			name = Debug;
@@ -303,7 +303,7 @@
 				GCC_WARN_SHADOW = YES;
 				GCC_WARN_STRICT_SELECTOR_MATCH = YES;
 				HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram";
-				MACOSX_DEPLOYMENT_TARGET = 10.15;
+				MACOSX_DEPLOYMENT_TARGET = 11.0;
 				PRODUCT_NAME = kram;
 			};
 			name = Release;
diff --git a/build2/kramv.xcodeproj/project.pbxproj b/build2/kramv.xcodeproj/project.pbxproj
index cad5656d..f170f89c 100644
--- a/build2/kramv.xcodeproj/project.pbxproj
+++ b/build2/kramv.xcodeproj/project.pbxproj
@@ -663,7 +663,7 @@
 					"$(inherited)",
 					"@executable_path/../Frameworks",
 				);
-				MACOSX_DEPLOYMENT_TARGET = 10.15;
+				MACOSX_DEPLOYMENT_TARGET = 11.0;
 				PRODUCT_BUNDLE_IDENTIFIER = com.ba.kramv;
 				PRODUCT_NAME = "$(TARGET_NAME)";
 			};
@@ -696,7 +696,7 @@
 					"$(inherited)",
 					"@executable_path/../Frameworks",
 				);
-				MACOSX_DEPLOYMENT_TARGET = 10.15;
+				MACOSX_DEPLOYMENT_TARGET = 11.0;
 				PRODUCT_BUNDLE_IDENTIFIER = com.ba.kramv;
 				PRODUCT_NAME = "$(TARGET_NAME)";
 			};
@@ -714,7 +714,7 @@
 					"@executable_path/../Frameworks",
 					"@executable_path/../../../../Frameworks",
 				);
-				MACOSX_DEPLOYMENT_TARGET = 10.15;
+				MACOSX_DEPLOYMENT_TARGET = 11.0;
 				PRODUCT_BUNDLE_IDENTIFIER = "com.ba.kramv.kram-thumb";
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SKIP_INSTALL = YES;
@@ -733,7 +733,7 @@
 					"@executable_path/../Frameworks",
 					"@executable_path/../../../../Frameworks",
 				);
-				MACOSX_DEPLOYMENT_TARGET = 10.15;
+				MACOSX_DEPLOYMENT_TARGET = 11.0;
 				PRODUCT_BUNDLE_IDENTIFIER = "com.ba.kramv.kram-thumb";
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SKIP_INSTALL = YES;
@@ -752,7 +752,7 @@
 					"@executable_path/../Frameworks",
 					"@executable_path/../../../../Frameworks",
 				);
-				MACOSX_DEPLOYMENT_TARGET = 10.15;
+				MACOSX_DEPLOYMENT_TARGET = 11.0;
 				PRODUCT_BUNDLE_IDENTIFIER = "com.ba.kramv.kram-preview";
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SKIP_INSTALL = YES;
@@ -771,7 +771,7 @@
 					"@executable_path/../Frameworks",
 					"@executable_path/../../../../Frameworks",
 				);
-				MACOSX_DEPLOYMENT_TARGET = 10.15;
+				MACOSX_DEPLOYMENT_TARGET = 11.0;
 				PRODUCT_BUNDLE_IDENTIFIER = "com.ba.kramv.kram-preview";
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SKIP_INSTALL = YES;

From 831607effef82d66c3da3ea0f0a60141fb575894 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 30 Jul 2022 12:09:21 -0700
Subject: [PATCH 341/901] kramv - improve search to find corresponding normal
 map

---
 kramv/KramViewerMain.mm | 51 ++++++++++++++++++++++++++++++-----------
 libkram/kram/Kram.cpp   | 21 +++++++++++++----
 2 files changed, 53 insertions(+), 19 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 2707dce3..37841dca 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -2821,20 +2821,22 @@ - (BOOL)findFilenameInFolders:(const string &)filename
 
 
-static string findNormalMapFromAlbedoFilename(const char* filename)
+static void findPossibleNormalMapFromAlbedoFilename(const char* filename, vector<string>& normalFilenames)
 {
+    normalFilenames.clear();
+    
     string filenameShort = filename;
     
     const char* ext = strrchr(filename, '.');
 
     auto dotPos = filenameShort.find_last_of(".");
     if (dotPos == string::npos)
-        return "";
+        return;
     
     // now chop off the extension
     filenameShort = filenameShort.substr(0, dotPos);
 
-    const char* searches[] = { "-a", "-d" };
+    const char* searches[] = { "-a", "-d", "_Color", "_baseColor" };
     
     for (uint32_t i = 0; i < ArrayCount(searches); ++i) {
         const char* search = searches[i];
@@ -2844,11 +2846,20 @@ static string findNormalMapFromAlbedoFilename(const char* filename)
         }
     }
      
-    // may need to try various names, and see if any exist
-    filenameShort += "-n";
-    filenameShort += ext;
+    const char* suffixes[] = { "-n", "_normal", "_Normal" };
+    
+    string normalFilename;
+    for (uint32_t i = 0; i < ArrayCount(suffixes); ++i) {
+        const char* suffix = suffixes[i];
+        
+        // may need to try various names, and see if any exist
+        normalFilename = filenameShort;
+        normalFilename += suffix;
+        normalFilename += ext;
+        
+        normalFilenames.push_back( normalFilename );
+    }
     
-    return filenameShort;
 }
 
 
@@ -2868,15 +2879,22 @@ - (BOOL)loadFileFromFolder
         return NO;
     }
 
+    vector<string> normalFilenames;
     string normalFilename;
     bool hasNormal = false;
 
     TexContentType texContentType = findContentTypeFromFilename(filename);
     if (texContentType == TexContentTypeAlbedo) {
-        normalFilename = findNormalMapFromAlbedoFilename(filename);
+        findPossibleNormalMapFromAlbedoFilename(filename, normalFilenames);
      
-        if (!normalFilename.empty())
-            hasNormal = [self findFilenameInFolders:normalFilename];
+       for (const auto& name: normalFilenames) {
+            hasNormal = [self findFilenameInFolders:name];
+            
+            if (hasNormal) {
+                normalFilename = name;
+                break;
+            }
+        }
     }
     
     //-------------------------------
@@ -2986,15 +3004,20 @@ - (BOOL)loadFileFromArchive
     
     string normalFilename;
     bool hasNormal = false;
-
+    vector<string> normalFilenames;
     
     TexContentType texContentType = findContentTypeFromFilename(filename);
     if (texContentType == TexContentTypeAlbedo) {
-        normalFilename = findNormalMapFromAlbedoFilename(filename);
+        findPossibleNormalMapFromAlbedoFilename(filename, normalFilenames);
      
-        if (!normalFilename.empty())
-            hasNormal = _zip.extractRaw(normalFilename.c_str(), &imageNormalData,
+        for (const auto& name: normalFilenames) {
+            hasNormal = _zip.extractRaw(name.c_str(), &imageNormalData,
                                         imageNormalDataLength);
+            if (hasNormal) {
+                normalFilename = name;
+                break;
+            }
+        }
     }
 
     //---------------------------
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 3ff68818..5d8c82f3 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -3610,19 +3610,30 @@ TexContentType findContentTypeFromFilename(const char* filename)
     filenameShort = filenameShort.substr(0, dotPos);
 
     // dealing with png means fabricating the format, texture type, and other data
-    if (endsWith(filenameShort, "-n") || endsWith(filenameShort, "_normal")) {
-        return TexContentTypeNormal;
-    }
-    else if (endsWith(filenameShort, "-sdf")) {
+    if (endsWith(filenameShort, "-sdf")) {
         return TexContentTypeSDF;
     }
     else if (endsWith(filenameShort, "-h")) {
         return TexContentTypeHeight;
     }
-    else if (endsWith(filenameShort, "-a") || endsWith(filenameShort, "-d") || endsWith(filenameShort, "_baseColor")) {
+    else if (endsWith(filenameShort, "-n") ||
+             endsWith(filenameShort, "_normal") ||
+             endsWith(filenameShort, "_Normal")
+             )
+    {
+        return TexContentTypeNormal;
+    }
+    else if (endsWith(filenameShort, "-a") ||
+             endsWith(filenameShort, "-d") ||
+             endsWith(filenameShort, "_baseColor") ||
+             endsWith(filenameShort, "_Color")
+             )
+    {
         return TexContentTypeAlbedo;
     }
     
+    // TODO: also _AO, _Metallic, _Roughness
+    
     // fallback to albedo for now
     return TexContentTypeAlbedo;
 }

From ea99ecc35d681a4eefa5692de374e4a09e1e7438 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 30 Jul 2022 12:11:01 -0700
Subject: [PATCH 342/901] kram - start of Win Numa support

Chris Green posted about processor groups on Win to go beyond 64 cores.  For a rare set of multicore Threadripper devs, but might as well do it right.  Commented out, since modifies remap table and other calls.
---
 libkram/kram/TaskSystem.cpp | 84 ++++++++++++++++++++++++-------------
 1 file changed, 55 insertions(+), 29 deletions(-)

diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index 8fa1470b..d2e2fcea 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -473,30 +473,16 @@ static void setThreadAffinity(std::thread::native_handle_type handle, uint32_t t
     
     // for now only allow single core mask
     uint64_t affinityMask = ((uint64_t)1) << threadIndex;
-          
+    
     // These are used in most of the paths
     macroUnusedVar(handle);
     macroUnusedVar(affinityMask);
     
-#if KRAM_MAC
-    // don't use this, it's unsupported on ARM chips, and only affinity hints on x64
-//    #if USE_SSE
-//    if (!coreInfo.isTranslated) {
-//        thread_affinity_policy_data_t policy = { (int)affinityMask };
-//
-//        // TODO: consider skipping affinity on macOS altogether
-//        // this is just a hint on x64-based macOS
-//        int returnVal = thread_policy_set(pthread_mach_thread_np(handle), THREAD_AFFINITY_POLICY, (thread_policy_t)&policy, 1);
-//
-//        if (returnVal != 0) {
-//            // TODO: unsupported on iOS/M1, only have QoS and priority
-//            // big P cores can also be disabled to resolve thermal throttling
-//        }
-//    }
-//    #endif
+    bool success = false;
     
-#elif KRAM_IOS
-    // no support
+#if KRAM_MAC || KRAM_IOS
+    // no support, don't use thread_policy_set it's not on M1 and just a hint
+    success = true;
     
 #elif KRAM_ANDROID
     cpu_set_t cpuset;
@@ -506,16 +492,57 @@ static void setThreadAffinity(std::thread::native_handle_type handle, uint32_t t
     // convert pthread to pid
     pid_t pid;
     pthread_getunique_np(handle, &pid);
-    if (!sched_setaffinity(pid, sizeof(cpu_set_t), &cpuset)) {
-        // TODO: this can fail on some/all cores
-    }
-
+    success = sched_setaffinity(pid, sizeof(cpu_set_t), &cpuset) == 0;
+    
 #elif KRAM_WIN
     // each processor group only has 64 bits
     DWORD_PTR mask = SetThreadAffinityMask(handle, *(const DWORD_PTR*)&affinityMask);
-    if (mask == 0) {
-        // TODO: failure case
+    success = mask != 0;
+    
+#if 0 // TODO: finish this
+    // Revisit Numa groups on Win, have 128-core/256 ThreadRipper
+    // https://chrisgreendevelopmentblog.wordpress.com/2017/08/29/thread-pools-and-windows-processor-groups/
+    
+    // win thread pool, but seems to limit to group 0
+    //  https://github.com/stlab/libraries/blob/develop/stlab/concurrency/default_executor.hpp
+    
+    int32_t threadIndexToGroup(int32_t threadIndex)
+    {
+        for (int32_t i = 0; i < nNumGroups; i++)
+        {
+            if (threadIndex < totalCores[i])
+                return i;
+        }
+        return 0; // error
+    }
+    
+    void setupWinCoreGroups()
+    {
+        // Also have to test for HT on these, and fix remap table.
+        // Table will need to be larger to accomodate.
+        
+        int32_t nNumGroups = GetActiveProcessorGroupCount();
+        int32_t numCores[16] = {}; // TODO: make members
+        int32_t totalCores[16] = 0;
+        for (int32_t i = 0; i < nNumGroups; i++)
+        {
+            numCores[i] = GetMaximumProcessorCount(i);
+            totalCores[i] += numCores[i];
+        }
     }
+
+    // have to adjust the mask for the core group
+    int32_t groupNum = threadIndexToGroup(threadIndex);
+    int32_t groupThreadIndex = (groupNum == 0) ? 0 : totalCores[groupNum-1];
+    affinityMask = ((uint64_t)1) << (threadIndex - groupThreadIndex);
+
+    // set group and affinity
+    GROUP_AFFINITY affinity;
+    affinity.group = groupNum;
+    affinity.mask = *(const DWORD_PTR*)&affinityMask;
+    success = SetThreadGroupAffinity(hndl, &affinity, nullptr);
+#endif
+    
 #else
     // most systems are pthread-based, this is represented with array of bits
     cpu_set_t cpuset;
@@ -523,11 +550,10 @@ static void setThreadAffinity(std::thread::native_handle_type handle, uint32_t t
     CPU_SET(threadIndex, &cpuset);
 
     // TODO: check return
-    int returnVal = pthread_setaffinity_np(handle, sizeof(cpu_set_t), &cpuset);
-    if (returnVal != 0) {
-        // TODO: linux pthread failure case
-    }
+    success = pthread_setaffinity_np(handle, sizeof(cpu_set_t), &cpuset) == 0;
 #endif
+    if (!success)
+        KLOGW("Thread", "Failed to set affinity");
 }
 
 void task_system::set_current_affinity(uint32_t threadIndex)

From 1f1ff8e0fc89ebf87e45944801701f85b8a87091 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 30 Jul 2022 13:12:11 -0700
Subject: [PATCH 343/901] kramv - fix embedded jpg identifier so more gltf load

---
 gtlf/GLTFMTL/Source/GLTFMTLRenderer.m | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/gtlf/GLTFMTL/Source/GLTFMTLRenderer.m b/gtlf/GLTFMTL/Source/GLTFMTLRenderer.m
index 4d388b0b..98e0cf2f 100644
--- a/gtlf/GLTFMTL/Source/GLTFMTLRenderer.m
+++ b/gtlf/GLTFMTL/Source/GLTFMTLRenderer.m
@@ -201,10 +201,11 @@ - (void)enqueueReusableBuffer:(id<MTLBuffer>)buffer {
         texture.label = image.name ? image.name : image.url.lastPathComponent;
     } else if (image.bufferView != nil) {
         GLTFBufferView *bufferView = image.bufferView;
-        NSData *data = [NSData dataWithBytesNoCopy:bufferView.buffer.contents + bufferView.offset length:bufferView.length freeWhenDone:NO];
+        const uint8_t* buffer = bufferView.buffer.contents + bufferView.offset;
+        NSData *data = [NSData dataWithBytesNoCopy:buffer length:bufferView.length freeWhenDone:NO];
         
-        // TODO: identify jpg data by first 4 chars, hande with textureLoaderJpb
-        bool isJpg = false;
+        // identify jpg data by first 3 chars, handle with textureLoaderJpb
+        bool isJpg = buffer[0] == 0xFF && buffer[1] == 0xD8 && buffer[2] == 0xFF;
         
         if (isJpg)
             texture = [self.textureLoaderJpg newTextureWithData:data options:options error:&error];

From 50cc9666aeb22ab112bbdc1216a7cd5098f7296d Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 30 Jul 2022 23:50:01 -0700
Subject: [PATCH 344/901] kramv - add more types of textures

---
 gtlf/GLTFMTL/Source/GLTFMTLRenderer.m |  2 +-
 libkram/kram/Kram.cpp                 | 16 ++++++++++++++--
 libkram/kram/Kram.h                   |  4 +++-
 3 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/gtlf/GLTFMTL/Source/GLTFMTLRenderer.m b/gtlf/GLTFMTL/Source/GLTFMTLRenderer.m
index 98e0cf2f..ee7bc935 100644
--- a/gtlf/GLTFMTL/Source/GLTFMTLRenderer.m
+++ b/gtlf/GLTFMTL/Source/GLTFMTLRenderer.m
@@ -202,7 +202,7 @@ - (void)enqueueReusableBuffer:(id<MTLBuffer>)buffer {
     } else if (image.bufferView != nil) {
         GLTFBufferView *bufferView = image.bufferView;
         const uint8_t* buffer = bufferView.buffer.contents + bufferView.offset;
-        NSData *data = [NSData dataWithBytesNoCopy:buffer length:bufferView.length freeWhenDone:NO];
+        NSData *data = [NSData dataWithBytesNoCopy:(void*)buffer length:bufferView.length freeWhenDone:NO];
         
         // identify jpg data by first 3 chars, handle with textureLoaderJpb
         bool isJpg = buffer[0] == 0xFF && buffer[1] == 0xD8 && buffer[2] == 0xFF;
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 5d8c82f3..fc467a3b 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -3631,8 +3631,20 @@ TexContentType findContentTypeFromFilename(const char* filename)
     {
         return TexContentTypeAlbedo;
     }
-    
-    // TODO: also _AO, _Metallic, _Roughness
+    else if (endsWith(filenameShort, "-ao") ||
+             endsWith(filenameShort, "_AO")
+             )
+    {
+        return TexContentTypeAO;
+    }
+    else if (endsWith(filenameShort, "-mr") ||
+             endsWith(filenameShort, "_Metallic") ||
+             endsWith(filenameShort, "_Roughness") ||
+             endsWith(filenameShort, "_MetaliicRoughness")
+             )
+    {
+        return TexContentTypeMetallicRoughness;
+    }
     
     // fallback to albedo for now
     return TexContentTypeAlbedo;
diff --git a/libkram/kram/Kram.h b/libkram/kram/Kram.h
index 62264a13..b43ce912 100644
--- a/libkram/kram/Kram.h
+++ b/libkram/kram/Kram.h
@@ -80,7 +80,9 @@ enum TexContentType
     TexContentTypeAlbedo,
     TexContentTypeNormal,
     TexContentTypeHeight,
-    TexContentTypeSDF
+    TexContentTypeSDF,
+    TexContentTypeAO,
+    TexContentTypeMetallicRoughness,
 };
 
 // this is a helper to override the format, since sRGB blocks and settings

From 6a4d6e2ce990582d329fa5b2a4522953fa78be32 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 2 Aug 2022 14:25:37 -0700
Subject: [PATCH 345/901] kramv - split out isPremul from doShaderPremul, also
 add title info for review

Can now walk through a folder of images and see if they have premul enabled without having to bring up info.
---
 kramv/KramRenderer.mm           |  7 ++++---
 kramv/KramViewerBase.h          |  3 ++-
 kramv/KramViewerMain.mm         | 25 ++++++++++++++++++++++---
 kramv/Shaders/KramShaders.h     |  2 +-
 kramv/Shaders/KramShaders.metal |  6 +++---
 5 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 40cbd404..e37ae3ba 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -1282,9 +1282,10 @@ - (void)updateImageSettings:(const string &)fullFilename
     // should really have 3 modes, unmul, default, premul
     bool isPNG = isPNGFilename(fullFilename.c_str());
 
-    _showSettings->isPremul = false;
+    _showSettings->isPremul = image.isPremul();
+    _showSettings->doShaderPremul = false;
     if (texContentType == TexContentTypeAlbedo && isPNG) {
-        _showSettings->isPremul =
+        _showSettings->doShaderPremul =
             true;  // convert to premul in shader, so can see other channels
     }
 
@@ -1457,7 +1458,7 @@ - (void)_updateGameState
         *(Uniforms *)_dynamicUniformBuffer[_uniformBufferIndex].contents;
 
     uniforms.isNormal = _showSettings->texContentType == TexContentTypeNormal;
-    uniforms.isPremul = _showSettings->isPremul;
+    uniforms.doShaderPremul = _showSettings->doShaderPremul;
     uniforms.isSigned = _showSettings->isSigned;
     uniforms.isSwizzleAGToRG = _showSettings->isSwizzleAGToRG;
 
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index 146c01c0..a06e85e2 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -123,7 +123,8 @@ class ShowSettings {
 
     //bool isNormal = false;
     bool isSigned = false;
-    bool isPremul = false;  // needed for png which only holds unmul
+    bool isPremul = false; // copy of whether image.isPremul()
+    bool doShaderPremul = false; // needed for png which only holds unmul
     bool isSwizzleAGToRG = false;
     //bool isSDF = false;
     TexContentType texContentType = TexContentTypeUnknown;
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 37841dca..4c57d38e 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -1906,7 +1906,7 @@ - (void)updateUIControlState
 
     Renderer* renderer = (Renderer*)self.delegate;
     auto showAllState = toState(_showSettings->isShowingAllLevelsAndMips);
-    auto premulState = toState(_showSettings->isPremul);
+    auto premulState = toState(_showSettings->doShaderPremul);
     auto signedState = toState(_showSettings->isSigned);
     auto checkerboardState = toState(_showSettings->isCheckerboardShown);
     auto previewState = toState(_showSettings->isPreview);
@@ -2434,10 +2434,10 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
     // toggle premul alpha vs. unmul
     else if (action == _actionPremul) {
         if (!action->isHidden) {
-            _showSettings->isPremul = !_showSettings->isPremul;
+            _showSettings->doShaderPremul = !_showSettings->doShaderPremul;
             isChanged = true;
             text = "Premul ";
-            text += _showSettings->isPremul ? "On" : "Off";
+            text += _showSettings->doShaderPremul ? "On" : "Off";
         }
     }
 
@@ -2949,6 +2949,25 @@ - (BOOL)loadFileFromFolder
     string title = "kramv - ";
     title += formatTypeName(_showSettings->originalFormat);
     title += " - ";
+    
+    // identify what we think the content type is
+    const char* typeText = "";
+    switch(_showSettings->texContentType) {
+        case TexContentTypeAlbedo: typeText = "a"; break;
+        case TexContentTypeNormal: typeText = "n"; break;
+        case TexContentTypeAO: typeText = "ao"; break;
+        case TexContentTypeMetallicRoughness: typeText = "mr"; break;
+        case TexContentTypeSDF: typeText = "sdf"; break;
+        case TexContentTypeHeight: typeText = "h"; break;
+    }
+    title += typeText;
+    // add some info about the texture to avoid needing to go to info
+    // srgb src would be useful too.
+    if (_showSettings->texContentType == TexContentTypeAlbedo && _showSettings->isPremul) {
+        title += ",p";
+        
+    }
+    title += " - ";
     title += filenameShort;
 
     self.window.title = [NSString stringWithUTF8String:title.c_str()];
diff --git a/kramv/Shaders/KramShaders.h b/kramv/Shaders/KramShaders.h
index 6e337c20..282a8d57 100644
--- a/kramv/Shaders/KramShaders.h
+++ b/kramv/Shaders/KramShaders.h
@@ -118,7 +118,7 @@ struct Uniforms {
     bool isSigned;
     bool isNormal;
     bool isSwizzleAGToRG;
-    bool isPremul;
+    bool doShaderPremul;
 
     bool isCheckerboardShown;
     bool isWrap;
diff --git a/kramv/Shaders/KramShaders.metal b/kramv/Shaders/KramShaders.metal
index e5301a0c..4388f7ec 100644
--- a/kramv/Shaders/KramShaders.metal
+++ b/kramv/Shaders/KramShaders.metal
@@ -1000,7 +1000,7 @@ float4 DrawPixels(
             }
             
             // to premul, but also need to see without premul
-            if (uniforms.isPremul) {
+            if (uniforms.doShaderPremul) {
                 c = toPremul(c);
             }
         }
@@ -1050,7 +1050,7 @@ float4 DrawPixels(
             if (uniforms.isSigned) {
                 // Note: premul on signed should occur while still signed, since it's a pull to zoer
                 // to premul, but also need to see without premul
-                if (uniforms.isPremul) {
+                if (uniforms.doShaderPremul) {
                     c = toPremul(c);
                 }
                 
@@ -1058,7 +1058,7 @@ float4 DrawPixels(
                 c.xyz = toUnorm(c.xyz);
             }
             else {
-                if (uniforms.isPremul) {
+                if (uniforms.doShaderPremul) {
                     c = toPremul(c);
                 }
             }

From c0f7986ac6b925d9dcc75c61d8abefe25179a985 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 6 Aug 2022 21:43:47 -0700
Subject: [PATCH 346/901] kram - fix warning

---
 kramv/KramViewerMain.mm | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 4c57d38e..3412fad9 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -2959,6 +2959,7 @@ - (BOOL)loadFileFromFolder
         case TexContentTypeMetallicRoughness: typeText = "mr"; break;
         case TexContentTypeSDF: typeText = "sdf"; break;
         case TexContentTypeHeight: typeText = "h"; break;
+        case TexContentTypeUnknown: typeText = ""; break;
     }
     title += typeText;
     // add some info about the texture to avoid needing to go to info

From f80324870546cf612b0703b1b845c53b783e2371 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 8 Aug 2022 00:53:23 -0700
Subject: [PATCH 347/901] kram - switch to fastl (MIT), and enable pch

Xcode doesn't seem to precompile the pch despite my best efforts.  It does prepend it to each file.  So -include KramConfig.h is now gone from the c flags.

fastl - this is a nice minimal stl from https://github.com/Viladoman/fastl.  Bases everything on vector, so string, map/set and unorderd_map/set are minimal.
   Had to use algorithm, new, mutex, and string from std, so it's a hybrid.  Once remaining holes are fixed, then can enable string.
---
 build2/kram.xcodeproj/project.pbxproj  |  78 ++++-
 build2/kramv.xcodeproj/project.pbxproj |   2 +
 kram-thumb/KramThumbnailProvider.mm    |   8 +-
 kramv/KramViewerMain.mm                |  10 +-
 libkram/etc2comp/EtcImage.cpp          |   2 +-
 libkram/fastl/LICENSE                  |  21 ++
 libkram/fastl/falgorithm.h             |  99 +++++++
 libkram/fastl/fstring.h                | 211 ++++++++++++++
 libkram/fastl/map.h                    | 122 ++++++++
 libkram/fastl/pair.h                   |  60 ++++
 libkram/fastl/set.h                    | 108 +++++++
 libkram/fastl/unordered_map.h          |  28 ++
 libkram/fastl/unordered_set.h          |  28 ++
 libkram/fastl/vector.h                 | 383 +++++++++++++++++++++++++
 libkram/kram/KTXImage.cpp              |  18 +-
 libkram/kram/KTXImage.h                |   2 +-
 libkram/kram/Kram.cpp                  |  61 ++--
 libkram/kram/KramConfig.h              |  65 ++++-
 libkram/kram/KramDDSHelper.h           |   2 +-
 libkram/kram/KramFileHelper.cpp        |   4 +-
 libkram/kram/KramFileHelper.h          |   2 +-
 libkram/kram/KramImage.cpp             |   2 +-
 libkram/kram/KramImage.h               |   2 +-
 libkram/kram/KramImageInfo.h           |   2 +-
 libkram/kram/KramLib.h                 |   2 +-
 libkram/kram/KramLog.cpp               |  11 +-
 libkram/kram/KramLog.h                 |   2 +-
 libkram/kram/KramMipper.h              |   2 +-
 libkram/kram/KramMmapHelper.h          |   2 +-
 libkram/kram/KramSDFMipper.h           |   2 +-
 libkram/kram/KramTimer.h               |   2 +-
 libkram/kram/KramZipHelper.cpp         |   2 +-
 libkram/kram/KramZipHelper.h           |   2 +-
 libkram/kram/TaskSystem.cpp            |  38 +--
 libkram/kram/TaskSystem.h              |  15 +-
 libkram/kram/float4a.h                 |   2 +-
 libkram/lodepng/lodepng.cpp            |   4 +-
 37 files changed, 1273 insertions(+), 133 deletions(-)
 create mode 100644 libkram/fastl/LICENSE
 create mode 100644 libkram/fastl/falgorithm.h
 create mode 100644 libkram/fastl/fstring.h
 create mode 100644 libkram/fastl/map.h
 create mode 100644 libkram/fastl/pair.h
 create mode 100644 libkram/fastl/set.h
 create mode 100644 libkram/fastl/unordered_map.h
 create mode 100644 libkram/fastl/unordered_set.h
 create mode 100644 libkram/fastl/vector.h

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index 4e3a0c5a..e672e339 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -7,6 +7,22 @@
 	objects = {
 
 /* Begin PBXBuildFile section */
+		704738BC289F6AEE00C77A9F /* unordered_map.h in Headers */ = {isa = PBXBuildFile; fileRef = 704738B1289F6AEE00C77A9F /* unordered_map.h */; };
+		704738BD289F6AEE00C77A9F /* unordered_map.h in Headers */ = {isa = PBXBuildFile; fileRef = 704738B1289F6AEE00C77A9F /* unordered_map.h */; };
+		704738BE289F6AEE00C77A9F /* falgorithm.h in Headers */ = {isa = PBXBuildFile; fileRef = 704738B2289F6AEE00C77A9F /* falgorithm.h */; };
+		704738BF289F6AEE00C77A9F /* falgorithm.h in Headers */ = {isa = PBXBuildFile; fileRef = 704738B2289F6AEE00C77A9F /* falgorithm.h */; };
+		704738C0289F6AEE00C77A9F /* map.h in Headers */ = {isa = PBXBuildFile; fileRef = 704738B3289F6AEE00C77A9F /* map.h */; };
+		704738C1289F6AEE00C77A9F /* map.h in Headers */ = {isa = PBXBuildFile; fileRef = 704738B3289F6AEE00C77A9F /* map.h */; };
+		704738C2289F6AEE00C77A9F /* pair.h in Headers */ = {isa = PBXBuildFile; fileRef = 704738B4289F6AEE00C77A9F /* pair.h */; };
+		704738C3289F6AEE00C77A9F /* pair.h in Headers */ = {isa = PBXBuildFile; fileRef = 704738B4289F6AEE00C77A9F /* pair.h */; };
+		704738C6289F6AEE00C77A9F /* unordered_set.h in Headers */ = {isa = PBXBuildFile; fileRef = 704738B6289F6AEE00C77A9F /* unordered_set.h */; };
+		704738C7289F6AEE00C77A9F /* unordered_set.h in Headers */ = {isa = PBXBuildFile; fileRef = 704738B6289F6AEE00C77A9F /* unordered_set.h */; };
+		704738C8289F6AEE00C77A9F /* vector.h in Headers */ = {isa = PBXBuildFile; fileRef = 704738B7289F6AEE00C77A9F /* vector.h */; };
+		704738C9289F6AEE00C77A9F /* vector.h in Headers */ = {isa = PBXBuildFile; fileRef = 704738B7289F6AEE00C77A9F /* vector.h */; };
+		704738CA289F6AEE00C77A9F /* set.h in Headers */ = {isa = PBXBuildFile; fileRef = 704738B8289F6AEE00C77A9F /* set.h */; };
+		704738CB289F6AEE00C77A9F /* set.h in Headers */ = {isa = PBXBuildFile; fileRef = 704738B8289F6AEE00C77A9F /* set.h */; };
+		704738CC289F6AEE00C77A9F /* fstring.h in Headers */ = {isa = PBXBuildFile; fileRef = 704738B9289F6AEE00C77A9F /* fstring.h */; };
+		704738CD289F6AEE00C77A9F /* fstring.h in Headers */ = {isa = PBXBuildFile; fileRef = 704738B9289F6AEE00C77A9F /* fstring.h */; };
 		706EEF7F26D1595D001C950E /* EtcBlock4x4Encoding_RGB8.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDAA26D1583E001C950E /* EtcBlock4x4Encoding_RGB8.cpp */; };
 		706EEF8026D1595D001C950E /* EtcImage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDAC26D1583E001C950E /* EtcImage.cpp */; };
 		706EEF8126D1595D001C950E /* EtcDifferentialTrys.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDAF26D1583E001C950E /* EtcDifferentialTrys.cpp */; };
@@ -345,6 +361,8 @@
 		70A7BD3127092A1200DBCCF7 /* hdr_encode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70A7BD2E27092A1200DBCCF7 /* hdr_encode.cpp */; };
 		70A7BD3227092A1200DBCCF7 /* hdr_encode.h in Headers */ = {isa = PBXBuildFile; fileRef = 70A7BD2F27092A1200DBCCF7 /* hdr_encode.h */; };
 		70A7BD3327092A1200DBCCF7 /* hdr_encode.h in Headers */ = {isa = PBXBuildFile; fileRef = 70A7BD2F27092A1200DBCCF7 /* hdr_encode.h */; };
+		70C6398D289FB234006E7422 /* KramPrefix.pch in Headers */ = {isa = PBXBuildFile; fileRef = 70C6398C289FB234006E7422 /* KramPrefix.pch */; };
+		70C6398E289FB234006E7422 /* KramPrefix.pch in Headers */ = {isa = PBXBuildFile; fileRef = 70C6398C289FB234006E7422 /* KramPrefix.pch */; };
 		70CDB65027A1382700A546C1 /* KramDDSHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 70CDB64E27A1382600A546C1 /* KramDDSHelper.h */; };
 		70CDB65127A1382700A546C1 /* KramDDSHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 70CDB64E27A1382600A546C1 /* KramDDSHelper.h */; };
 		70CDB65227A1382700A546C1 /* KramDDSHelper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70CDB64F27A1382600A546C1 /* KramDDSHelper.cpp */; };
@@ -352,6 +370,14 @@
 /* End PBXBuildFile section */
 
 /* Begin PBXFileReference section */
+		704738B1289F6AEE00C77A9F /* unordered_map.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = unordered_map.h; sourceTree = "<group>"; };
+		704738B2289F6AEE00C77A9F /* falgorithm.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = falgorithm.h; sourceTree = "<group>"; };
+		704738B3289F6AEE00C77A9F /* map.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = map.h; sourceTree = "<group>"; };
+		704738B4289F6AEE00C77A9F /* pair.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = pair.h; sourceTree = "<group>"; };
+		704738B6289F6AEE00C77A9F /* unordered_set.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = unordered_set.h; sourceTree = "<group>"; };
+		704738B7289F6AEE00C77A9F /* vector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = vector.h; sourceTree = "<group>"; };
+		704738B8289F6AEE00C77A9F /* set.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = set.h; sourceTree = "<group>"; };
+		704738B9289F6AEE00C77A9F /* fstring.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = fstring.h; sourceTree = "<group>"; };
 		706ECDDE26D1577A001C950E /* libkram.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libkram.a; sourceTree = BUILT_PRODUCTS_DIR; };
 		706EEDAA26D1583E001C950E /* EtcBlock4x4Encoding_RGB8.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = EtcBlock4x4Encoding_RGB8.cpp; sourceTree = "<group>"; };
 		706EEDAB26D1583E001C950E /* EtcErrorMetric.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = EtcErrorMetric.h; sourceTree = "<group>"; };
@@ -666,6 +692,7 @@
 		708A6A922708CE4700BA5410 /* bc6h_utils.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bc6h_utils.h; sourceTree = "<group>"; };
 		70A7BD2E27092A1200DBCCF7 /* hdr_encode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = hdr_encode.cpp; sourceTree = "<group>"; };
 		70A7BD2F27092A1200DBCCF7 /* hdr_encode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = hdr_encode.h; sourceTree = "<group>"; };
+		70C6398C289FB234006E7422 /* KramPrefix.pch */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = KramPrefix.pch; sourceTree = "<group>"; };
 		70CDB64E27A1382600A546C1 /* KramDDSHelper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = KramDDSHelper.h; sourceTree = "<group>"; };
 		70CDB64F27A1382600A546C1 /* KramDDSHelper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = KramDDSHelper.cpp; sourceTree = "<group>"; };
 /* End PBXFileReference section */
@@ -690,6 +717,21 @@
 /* End PBXFrameworksBuildPhase section */
 
 /* Begin PBXGroup section */
+		704738AF289F6AEE00C77A9F /* fastl */ = {
+			isa = PBXGroup;
+			children = (
+				704738B2289F6AEE00C77A9F /* falgorithm.h */,
+				704738B3289F6AEE00C77A9F /* map.h */,
+				704738B4289F6AEE00C77A9F /* pair.h */,
+				704738B1289F6AEE00C77A9F /* unordered_map.h */,
+				704738B6289F6AEE00C77A9F /* unordered_set.h */,
+				704738B7289F6AEE00C77A9F /* vector.h */,
+				704738B8289F6AEE00C77A9F /* set.h */,
+				704738B9289F6AEE00C77A9F /* fstring.h */,
+			);
+			path = fastl;
+			sourceTree = "<group>";
+		};
 		706ECDD526D1577A001C950E = {
 			isa = PBXGroup;
 			children = (
@@ -713,6 +755,7 @@
 			children = (
 				708A6A882708CE4700BA5410 /* compressonator */,
 				706EFC3E26D3473F001C950E /* eastl */,
+				704738AF289F6AEE00C77A9F /* fastl */,
 				706EEDA926D1583E001C950E /* etc2comp */,
 				706EEDC926D1583E001C950E /* bc7enc */,
 				706EEDD226D1583E001C950E /* astc-encoder */,
@@ -918,6 +961,7 @@
 				706EEE1A26D1583F001C950E /* KramTimer.cpp */,
 				706EEE3326D1583F001C950E /* KramMmapHelper.h */,
 				706EEE2C26D1583F001C950E /* KramMmapHelper.cpp */,
+				70C6398C289FB234006E7422 /* KramPrefix.pch */,
 				706EEE2E26D1583F001C950E /* Kram.h */,
 				706EEE3526D1583F001C950E /* Kram.cpp */,
 				706EEE3626D1583F001C950E /* KramFileHelper.h */,
@@ -1230,6 +1274,7 @@
 			files = (
 				706EEFD126D15984001C950E /* EtcErrorMetric.h in Headers */,
 				706EEFD226D15984001C950E /* EtcColor.h in Headers */,
+				70C6398D289FB234006E7422 /* KramPrefix.pch in Headers */,
 				706EEFD326D15984001C950E /* EtcDifferentialTrys.h in Headers */,
 				706EEFD426D15984001C950E /* EtcBlock4x4Encoding_RGB8.h in Headers */,
 				706EEFD526D15984001C950E /* EtcConfig.h in Headers */,
@@ -1239,6 +1284,7 @@
 				706EEFD826D15984001C950E /* EtcMath.h in Headers */,
 				706EEFD926D15984001C950E /* EtcIndividualTrys.h in Headers */,
 				706EEFDA26D15984001C950E /* EtcBlock4x4EncodingBits.h in Headers */,
+				704738BE289F6AEE00C77A9F /* falgorithm.h in Headers */,
 				706EEFDB26D15984001C950E /* EtcBlock4x4Encoding_RGB8A1.h in Headers */,
 				706EEFDC26D15984001C950E /* EtcBlock4x4.h in Headers */,
 				707789E72881BA81008A51BC /* rgbcx.h in Headers */,
@@ -1260,6 +1306,7 @@
 				706EEFF526D15985001C950E /* basisu_containers_impl.h in Headers */,
 				707789EB2881BA81008A51BC /* utils.h in Headers */,
 				706EEFF626D15985001C950E /* basisu_transcoder_internal.h in Headers */,
+				704738C0289F6AEE00C77A9F /* map.h in Headers */,
 				70871DF927DDDBCD00D0B9E1 /* astcenc_vecmathlib_avx2_8.h in Headers */,
 				70871DFB27DDDBCD00D0B9E1 /* astcenc_vecmathlib_none_4.h in Headers */,
 				706EEFF726D15985001C950E /* basisu_global_selector_cb.h in Headers */,
@@ -1270,6 +1317,8 @@
 				706EEFFB26D15985001C950E /* basisu_file_headers.h in Headers */,
 				706EEFFC26D15985001C950E /* miniz.h in Headers */,
 				706EEFFD26D15985001C950E /* hedistance.h in Headers */,
+				704738BC289F6AEE00C77A9F /* unordered_map.h in Headers */,
+				704738C2289F6AEE00C77A9F /* pair.h in Headers */,
 				706EEFFE26D15985001C950E /* stb_rect_pack.h in Headers */,
 				706EEFFF26D15985001C950E /* KramZipHelper.h in Headers */,
 				706EF00026D15985001C950E /* KramSDFMipper.h in Headers */,
@@ -1284,6 +1333,7 @@
 				70871DDD27DDDBCD00D0B9E1 /* astcenc_vecmathlib_sse_4.h in Headers */,
 				707789D92881BA81008A51BC /* bc7decomp.h in Headers */,
 				706EF00826D15985001C950E /* Kram.h in Headers */,
+				704738C8289F6AEE00C77A9F /* vector.h in Headers */,
 				70871DED27DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.h in Headers */,
 				707789DB2881BA81008A51BC /* ert.h in Headers */,
 				706EF00926D15985001C950E /* KTXImage.h in Headers */,
@@ -1291,6 +1341,7 @@
 				707789DF2881BA81008A51BC /* rgbcx_table4.h in Headers */,
 				70871DF727DDDBCD00D0B9E1 /* astcenc_vecmathlib_neon_4.h in Headers */,
 				706EF00B26D15985001C950E /* KramTimer.h in Headers */,
+				704738C6289F6AEE00C77A9F /* unordered_set.h in Headers */,
 				706EF00C26D15985001C950E /* KramMmapHelper.h in Headers */,
 				706EF00D26D15985001C950E /* float4a.h in Headers */,
 				706EF00E26D15985001C950E /* KramFileHelper.h in Headers */,
@@ -1305,12 +1356,14 @@
 				706EF01526D15985001C950E /* singlecolourfit.h in Headers */,
 				706EF01626D15985001C950E /* maths.h in Headers */,
 				707789F32881BCE2008A51BC /* rdo_bc_encoder.h in Headers */,
+				704738CA289F6AEE00C77A9F /* set.h in Headers */,
 				706EF01726D15985001C950E /* colourset.h in Headers */,
 				708A6AA42708CE4700BA5410 /* bc6h_utils.h in Headers */,
 				706EF01826D15985001C950E /* colourblock.h in Headers */,
 				706EF01926D15985001C950E /* rangefit.h in Headers */,
 				706EF01A26D15985001C950E /* zstd.h in Headers */,
 				70871DF327DDDBCD00D0B9E1 /* astcenc_internal.h in Headers */,
+				704738CC289F6AEE00C77A9F /* fstring.h in Headers */,
 				706EF01B26D15985001C950E /* lodepng.h in Headers */,
 				706EF01C26D15985001C950E /* tmpfileplus.h in Headers */,
 			);
@@ -1322,6 +1375,7 @@
 			files = (
 				706EF14B26D166C5001C950E /* EtcErrorMetric.h in Headers */,
 				706EF14C26D166C5001C950E /* EtcColor.h in Headers */,
+				70C6398E289FB234006E7422 /* KramPrefix.pch in Headers */,
 				706EF14D26D166C5001C950E /* EtcDifferentialTrys.h in Headers */,
 				706EF14E26D166C5001C950E /* EtcBlock4x4Encoding_RGB8.h in Headers */,
 				706EF14F26D166C5001C950E /* EtcConfig.h in Headers */,
@@ -1331,6 +1385,7 @@
 				706EF15226D166C5001C950E /* EtcMath.h in Headers */,
 				706EF15326D166C5001C950E /* EtcIndividualTrys.h in Headers */,
 				706EF15426D166C5001C950E /* EtcBlock4x4EncodingBits.h in Headers */,
+				704738BF289F6AEE00C77A9F /* falgorithm.h in Headers */,
 				706EF15526D166C5001C950E /* EtcBlock4x4Encoding_RGB8A1.h in Headers */,
 				706EF15626D166C5001C950E /* EtcBlock4x4.h in Headers */,
 				707789E82881BA81008A51BC /* rgbcx.h in Headers */,
@@ -1352,6 +1407,7 @@
 				706EF16F26D166C5001C950E /* basisu_containers_impl.h in Headers */,
 				707789EC2881BA81008A51BC /* utils.h in Headers */,
 				706EF17026D166C5001C950E /* basisu_transcoder_internal.h in Headers */,
+				704738C1289F6AEE00C77A9F /* map.h in Headers */,
 				70871DFA27DDDBCD00D0B9E1 /* astcenc_vecmathlib_avx2_8.h in Headers */,
 				70871DFC27DDDBCD00D0B9E1 /* astcenc_vecmathlib_none_4.h in Headers */,
 				706EF17126D166C5001C950E /* basisu_global_selector_cb.h in Headers */,
@@ -1362,6 +1418,8 @@
 				706EF17526D166C5001C950E /* basisu_file_headers.h in Headers */,
 				706EF17626D166C5001C950E /* miniz.h in Headers */,
 				706EF17726D166C5001C950E /* hedistance.h in Headers */,
+				704738BD289F6AEE00C77A9F /* unordered_map.h in Headers */,
+				704738C3289F6AEE00C77A9F /* pair.h in Headers */,
 				706EF17826D166C5001C950E /* stb_rect_pack.h in Headers */,
 				706EF17926D166C5001C950E /* KramZipHelper.h in Headers */,
 				706EF17A26D166C5001C950E /* KramSDFMipper.h in Headers */,
@@ -1376,6 +1434,7 @@
 				70871DDE27DDDBCD00D0B9E1 /* astcenc_vecmathlib_sse_4.h in Headers */,
 				707789DA2881BA81008A51BC /* bc7decomp.h in Headers */,
 				706EF18226D166C5001C950E /* Kram.h in Headers */,
+				704738C9289F6AEE00C77A9F /* vector.h in Headers */,
 				70871DEE27DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.h in Headers */,
 				707789DC2881BA81008A51BC /* ert.h in Headers */,
 				706EF18326D166C5001C950E /* KTXImage.h in Headers */,
@@ -1383,6 +1442,7 @@
 				707789E02881BA81008A51BC /* rgbcx_table4.h in Headers */,
 				70871DF827DDDBCD00D0B9E1 /* astcenc_vecmathlib_neon_4.h in Headers */,
 				706EF18526D166C5001C950E /* KramTimer.h in Headers */,
+				704738C7289F6AEE00C77A9F /* unordered_set.h in Headers */,
 				706EF18626D166C5001C950E /* KramMmapHelper.h in Headers */,
 				706EF18726D166C5001C950E /* float4a.h in Headers */,
 				706EF18826D166C5001C950E /* KramFileHelper.h in Headers */,
@@ -1397,12 +1457,14 @@
 				706EF18F26D166C5001C950E /* singlecolourfit.h in Headers */,
 				706EF19026D166C5001C950E /* maths.h in Headers */,
 				707789F42881BCE2008A51BC /* rdo_bc_encoder.h in Headers */,
+				704738CB289F6AEE00C77A9F /* set.h in Headers */,
 				706EF19126D166C5001C950E /* colourset.h in Headers */,
 				708A6AA52708CE4700BA5410 /* bc6h_utils.h in Headers */,
 				706EF19226D166C5001C950E /* colourblock.h in Headers */,
 				706EF19326D166C5001C950E /* rangefit.h in Headers */,
 				706EF19426D166C5001C950E /* zstd.h in Headers */,
 				70871DF427DDDBCD00D0B9E1 /* astcenc_internal.h in Headers */,
+				704738CD289F6AEE00C77A9F /* fstring.h in Headers */,
 				706EF19526D166C5001C950E /* lodepng.h in Headers */,
 				706EF19626D166C5001C950E /* tmpfileplus.h in Headers */,
 			);
@@ -1712,8 +1774,11 @@
 				GCC_DYNAMIC_NO_PIC = NO;
 				GCC_ENABLE_CPP_EXCEPTIONS = NO;
 				GCC_ENABLE_CPP_RTTI = NO;
+				GCC_INCREASE_PRECOMPILED_HEADER_SHARING = YES;
 				GCC_NO_COMMON_BLOCKS = YES;
 				GCC_OPTIMIZATION_LEVEL = 0;
+				GCC_PRECOMPILE_PREFIX_HEADER = YES;
+				GCC_PREFIX_HEADER = "$(PROJECT_DIR)/../libkram/kram/KramPrefix.pch";
 				GCC_PREPROCESSOR_DEFINITIONS = (
 					"DEBUG=1",
 					"$(inherited)",
@@ -1743,10 +1808,9 @@
 					"-DCOMPILE_SQUISH=1",
 					"-DCOMPILE_BCENC=1",
 					"-DCOMPILE_EASTL=0",
+					"-DCOMPILE_FASTL=1",
 					"-DCOMPILE_COMP=1",
 					"-DCOMPILE_BASIS=1",
-					"-include",
-					KramConfig.h,
 				);
 				PRESERVE_DEAD_CODE_INITS_AND_TERMS = NO;
 				SDKROOT = macosx;
@@ -1798,7 +1862,10 @@
 				GCC_C_LANGUAGE_STANDARD = gnu11;
 				GCC_ENABLE_CPP_EXCEPTIONS = NO;
 				GCC_ENABLE_CPP_RTTI = NO;
+				GCC_INCREASE_PRECOMPILED_HEADER_SHARING = YES;
 				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_PRECOMPILE_PREFIX_HEADER = YES;
+				GCC_PREFIX_HEADER = "$(PROJECT_DIR)/../libkram/kram/KramPrefix.pch";
 				"GCC_WARN_64_TO_32_BIT_CONVERSION[arch=*64]" = NO;
 				GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
 				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
@@ -1823,10 +1890,9 @@
 					"-DCOMPILE_SQUISH=1",
 					"-DCOMPILE_BCENC=1",
 					"-DCOMPILE_EASTL=0",
+					"-DCOMPILE_FASTL=1",
 					"-DCOMPILE_COMP=1",
 					"-DCOMPILE_BASIS=1",
-					"-include",
-					KramConfig.h,
 				);
 				PRESERVE_DEAD_CODE_INITS_AND_TERMS = NO;
 				SDKROOT = macosx;
@@ -1843,7 +1909,6 @@
 				CODE_SIGN_STYLE = Automatic;
 				EXECUTABLE_PREFIX = lib;
 				GCC_PRECOMPILE_PREFIX_HEADER = NO;
-				GCC_PREFIX_HEADER = "$(PROJECT_DIR)/../libkram/kram/KramConfig.h";
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SKIP_INSTALL = YES;
 				SYSTEM_HEADER_SEARCH_PATHS = "";
@@ -1859,7 +1924,6 @@
 				CODE_SIGN_STYLE = Automatic;
 				EXECUTABLE_PREFIX = lib;
 				GCC_PRECOMPILE_PREFIX_HEADER = NO;
-				GCC_PREFIX_HEADER = "$(PROJECT_DIR)/../libkram/kram/KramConfig.h";
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SKIP_INSTALL = YES;
 				SYSTEM_HEADER_SEARCH_PATHS = "";
@@ -1875,7 +1939,6 @@
 				CODE_SIGN_STYLE = Automatic;
 				EXECUTABLE_PREFIX = lib;
 				GCC_PRECOMPILE_PREFIX_HEADER = NO;
-				GCC_PREFIX_HEADER = "$(PROJECT_DIR)/../libkram/kram/KramConfig.h";
 				IPHONEOS_DEPLOYMENT_TARGET = 14.1;
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SDKROOT = iphoneos;
@@ -1894,7 +1957,6 @@
 				CODE_SIGN_STYLE = Automatic;
 				EXECUTABLE_PREFIX = lib;
 				GCC_PRECOMPILE_PREFIX_HEADER = NO;
-				GCC_PREFIX_HEADER = "$(PROJECT_DIR)/../libkram/kram/KramConfig.h";
 				IPHONEOS_DEPLOYMENT_TARGET = 14.1;
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SDKROOT = iphoneos;
diff --git a/build2/kramv.xcodeproj/project.pbxproj b/build2/kramv.xcodeproj/project.pbxproj
index f170f89c..4e79c2bf 100644
--- a/build2/kramv.xcodeproj/project.pbxproj
+++ b/build2/kramv.xcodeproj/project.pbxproj
@@ -562,6 +562,7 @@
 				MTL_LANGUAGE_REVISION = UseDeploymentTarget;
 				ONLY_ACTIVE_ARCH = YES;
 				OTHER_CFLAGS = (
+					"-DCOMPILE_FASTL=1",
 					"-DCOMPILE_EASTL=0",
 					"-include",
 					KramConfig.h,
@@ -627,6 +628,7 @@
 				MTL_FAST_MATH = YES;
 				MTL_LANGUAGE_REVISION = UseDeploymentTarget;
 				OTHER_CFLAGS = (
+					"-DCOMPILE_FASTL=1",
 					"-DCOMPILE_EASTL=0",
 					"-include",
 					KramConfig.h,
diff --git a/kram-thumb/KramThumbnailProvider.mm b/kram-thumb/KramThumbnailProvider.mm
index e263cde1..b8dc82f3 100644
--- a/kram-thumb/KramThumbnailProvider.mm
+++ b/kram-thumb/KramThumbnailProvider.mm
@@ -57,8 +57,8 @@ - (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest *)request complet
         handler(nil, error);
         return;
     }
-        
-    shared_ptr<ImageToPass> imageToPass = make_shared<ImageToPass>();
+       
+    std::shared_ptr<ImageToPass> imageToPass = std::make_shared<ImageToPass>();
     TexEncoder decoderType = kTexEncoderUnknown;
     uint32_t imageWidth, imageHeight;
     
@@ -98,11 +98,11 @@ - (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest *)request complet
     if (imageAspect >= 1.0f)
     {
         requestWidth = contextSize.width;
-        requestHeight = NAMESPACE_STL::clamp((contextSize.width / imageAspect), 1.0, contextSize.height);
+        requestHeight = clamp((contextSize.width / imageAspect), 1.0, contextSize.height);
     }
     else
     {
-        requestWidth = NAMESPACE_STL::clamp((contextSize.height * imageAspect), 1.0, contextSize.width);
+        requestWidth = clamp((contextSize.height * imageAspect), 1.0, contextSize.width);
         requestHeight = contextSize.height;
     }
     
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 3412fad9..da8563f6 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -1025,8 +1025,8 @@ - (void)doZoomMath:(float)newZoom newPan:(float2 &)newPan
 
     // X bound may need adjusted for ar ?
     // that's in model space (+/0.5f, +/0.5f), so convert to texture space
-    pixel.x = NAMESPACE_STL::clamp(pixel.x, -0.5f * ar, maxX);
-    pixel.y = NAMESPACE_STL::clamp(pixel.y, minY, 0.5f);
+    pixel.x = std::clamp(pixel.x, -0.5f * ar, maxX);
+    pixel.y = std::clamp(pixel.y, minY, 0.5f);
 
     // now that's the point that we want to zoom towards
     // No checks on this zoom
@@ -1737,7 +1737,7 @@ - (void)scrollWheel:(NSEvent *)event
         float zoom = _zoomGesture.magnification;
         if (wheelY != 0.0) {
             wheelY *= 0.01;
-            wheelY = clamp(wheelY, -0.1, 0.1);
+            wheelY = std::clamp(wheelY, -0.1, 0.1);
             
             zoom *= 1.0 + wheelY;
             
@@ -3211,7 +3211,7 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
 #if USE_EASTL
             NAMESPACE_STL::quick_sort(files.begin(), files.end());
 #else
-            NAMESPACE_STL::sort(files.begin(), files.end());
+            std::sort(files.begin(), files.end());
 #endif
             // replicate archive logic below
 
@@ -3367,7 +3367,7 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
                 if (formerEntry) {
                     // lookup the index in the remapIndices table
                     _fileArchiveIndex =
-                        (uintptr_t)(formerEntry - &_zip.zipEntrys().front());
+                        (uintptr_t)(formerEntry - &_zip.zipEntrys()[0]);
                 }
                 else {
                     _fileArchiveIndex = 0;
diff --git a/libkram/etc2comp/EtcImage.cpp b/libkram/etc2comp/EtcImage.cpp
index 77f5a071..d555bc10 100644
--- a/libkram/etc2comp/EtcImage.cpp
+++ b/libkram/etc2comp/EtcImage.cpp
@@ -459,7 +459,7 @@ namespace Etc
             }
             
             // sorts largest errors to front
-            NAMESPACE_STL::sort(sortedBlocks.begin(), sortedBlocks.end(), std::greater<SortedBlock>());
+            std::sort(sortedBlocks.begin(), sortedBlocks.end(), std::greater<SortedBlock>());
             
             // lop off the end of the array where blocks are 0 error or don
             int counter = 0;
diff --git a/libkram/fastl/LICENSE b/libkram/fastl/LICENSE
new file mode 100644
index 00000000..9bcd9455
--- /dev/null
+++ b/libkram/fastl/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2020 Ramon Viladomat
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
\ No newline at end of file
diff --git a/libkram/fastl/falgorithm.h b/libkram/fastl/falgorithm.h
new file mode 100644
index 00000000..17f8d055
--- /dev/null
+++ b/libkram/fastl/falgorithm.h
@@ -0,0 +1,99 @@
+#pragma once 
+
+#ifdef USE_FASTL
+
+// for size_t
+#include "../fastl/vector.h"
+
+namespace fastl 
+{ 
+    //------------------------------------------------------------------------------------------
+    template<class Iterator, class Predicate>
+    constexpr Iterator find_if(Iterator first, Iterator last, Predicate p)
+    {
+        for (; first != last; ++first) 
+        {
+            if (p(*first)) return first;
+        }
+        return last;
+    }
+
+    //------------------------------------------------------------------------------------------
+    template<class Iterator, class T>
+    constexpr Iterator find(Iterator first, Iterator last, const T& value)
+    {
+        return find_if(first, last, [=](const T& input) { return input == value; });
+    }
+
+    //------------------------------------------------------------------------------------------
+    template<class Iterator, class Predicate>
+    Iterator remove_if(Iterator first, Iterator last, Predicate p)
+    {
+        first = fastl::find_if(first, last, p);
+        if (first != last)
+        {
+            for(Iterator i = first; ++i != last; )
+            { 
+                if (!p(*i)) *first++ = *i;
+            }
+        }
+        return first;
+    }
+
+    //------------------------------------------------------------------------------------------
+    template< class Iterator, class T >
+    Iterator remove(Iterator first, Iterator last, const T& value)
+    {
+        return remove_if(first, last, [=](const T& input) { return input == value; });
+    }
+
+    //------------------------------------------------------------------------------------------
+    template<class Iterator, class T, class Compare>
+    Iterator lower_bound(Iterator first, Iterator last, const T& value, Compare comp)
+    {
+        //specific implementation for continous memory ( vectors ) 
+        Iterator it;
+        size_t count = last-first; 
+        while (count > 0) {
+            it = first;
+            size_t step = count / 2;
+            it += step;
+            if (comp(*it,value))
+            {
+                first = ++it;
+                count -= step + 1;
+            }
+            else
+            {
+                count = step;
+            }
+        }
+        return first;
+    }
+
+    //------------------------------------------------------------------------------------------
+    template<class Iterator, class T>
+    Iterator lower_bound(Iterator first, Iterator last, const T& value)
+    {
+        return lower_bound(first, last, value, [=](const T& lhs, const T& rhs) { return lhs < rhs; });
+    }
+}
+
+#else 
+
+//#include <algorithm>
+//
+//namespace fastl
+//{
+//    template<class Iterator, class T> constexpr Iterator find(Iterator first, Iterator last, const T& value) { return std::find(first, last, value); }
+//    template<class Iterator, class Predicate> constexpr Iterator find_if(Iterator first, Iterator last, Predicate p) { return std::find_if(first, last, p); }
+//
+//    template<class Iterator, class T > inline Iterator remove(Iterator first, Iterator last, const T& value) { return std::remove(first, last, value); }
+//    template<class Iterator, class Predicate> inline Iterator remove_if(Iterator first, Iterator last, Predicate p) { return std::remove_if(first, last, p);  }
+//
+//    template<class Iterator, class T> Iterator lower_bound(Iterator first, Iterator last, const T& value) { return std::lower_bound(first, last, value); }
+//    template<class Iterator, class T, class Compare> Iterator lower_bound(Iterator first, Iterator last, const T& value, Compare comp) { return std::lower_bound(first, last, value, comp);  }
+//} 
+
+#endif //USE_FASTL
+
diff --git a/libkram/fastl/fstring.h b/libkram/fastl/fstring.h
new file mode 100644
index 00000000..4d0f4241
--- /dev/null
+++ b/libkram/fastl/fstring.h
@@ -0,0 +1,211 @@
+#pragma once
+
+#ifdef USE_FASTL
+
+#include "../fastl/vector.h"
+
+namespace fastl
+{
+	//------------------------------------------------------------------------------------------
+	template<typename TChar>
+    size_t ComputeStrLen(const TChar* str) // strlen
+	{
+		size_t ret; 
+		for (ret = 0u; str[ret] != '\0';++ret){}
+		return ret; 
+	} 
+
+	//------------------------------------------------------------------------------------------
+	template<typename TChar>
+    int ComputeStrCmp(const TChar* a, const TChar* b) // strcmp
+	{
+		for (size_t i = 0; ;++i)
+		{
+			if (a[i] != b[i])
+                return a[i] < b[i] ? -1 : 1;
+			if (a[i] == '\0')
+                return 0;
+		}
+	}
+
+	////////////////////////////////////////////////////////////////////////////////////////////
+	// Build string as a vector<char>
+	template<typename TChar>
+	class StringImpl
+	{
+	private:
+		typedef vector<TChar> TData;
+	public:
+		typedef TChar value_type;
+		typedef typename TData::size_type size_type;
+
+		static constexpr size_type npos = -1;
+	public:
+		StringImpl();
+		StringImpl(const char* input);
+		StringImpl(const char* input, const size_type length);
+
+		void clear();
+
+		bool empty() const { return size() == 0u; }
+		size_type size() const { return m_data.empty() ? 0 : m_data.size() - 1; }
+		size_type length() const { return size(); }
+
+		value_type* begin() { return m_data.begin(); }
+		const value_type* begin() const { return m_data.begin(); }
+		value_type* end() { return m_data.end() - 1; }
+		const value_type* end() const { return m_data.end() - 1; }
+
+		const value_type* c_str() const { return m_data.begin(); }
+
+		value_type& operator[](size_type index) { return m_data[index]; }
+		value_type  operator[](size_type index) const { return m_data[index]; }
+
+		StringImpl& erase(size_type index){ m_data.erase(m_data.begin()+index); return *this; }
+		StringImpl& erase(size_type index, size_type count){ m_data.erase(m_data.begin()+index,m_data.begin()+index+count); return *this; }
+
+		void append( const char* str );
+
+		StringImpl<TChar> operator+(const char c);
+		StringImpl<TChar> operator+(const char* str);
+		StringImpl<TChar> operator+(const StringImpl<TChar>& str);
+
+        StringImpl<TChar>& operator += (const char c) { m_data.insert(m_data.end()-1,c); return *this; }
+        StringImpl<TChar>& operator += (const char* str) { Append(str,ComputeStrLen(str)); return *this; }
+        StringImpl<TChar>& operator += (const StringImpl<TChar>& str) { Append(str.c_str(), str.size()); return *this; }
+
+		bool operator == (const char* str) const { return ComputeStrCmp(c_str(), str) == 0; }
+		bool operator != (const char* str) const { return ComputeStrCmp(c_str(), str) != 0; }
+		bool operator <  (const char* str) const { return ComputeStrCmp(c_str(), str) < 0; }
+		bool operator >  (const char* str) const { return ComputeStrCmp(c_str(), str) > 0; }
+
+		bool operator == (const StringImpl<TChar>& str) const { return *this == str.c_str(); }
+		bool operator != (const StringImpl<TChar>& str) const { return *this != str.c_str(); }
+		bool operator <  (const StringImpl<TChar>& str) const { return *this < str.c_str(); }
+		bool operator >  (const StringImpl<TChar>& str) const { return *this > str.c_str(); }
+
+	private: 
+		void Append(const char* str, const size_type appendSize);
+
+	private: 
+		TData m_data;
+	};
+
+	//Implementation
+
+	//------------------------------------------------------------------------------------------
+	template<typename TChar>
+    StringImpl<TChar>::StringImpl()
+	{ 
+		clear();
+	}
+
+	//------------------------------------------------------------------------------------------
+	template<typename TChar>
+    StringImpl<TChar>::StringImpl(const char* input)
+	{ 
+		clear(); 
+		Append(input, ComputeStrLen(input));
+	}
+
+	//------------------------------------------------------------------------------------------
+	template<typename TChar>
+    StringImpl<TChar>::StringImpl(const char* input, const size_type length)
+	{ 
+		clear();
+		Append(input, length);
+	}
+
+	//------------------------------------------------------------------------------------------
+	template<typename TChar>
+    inline void StringImpl<TChar>::clear()
+	{
+        // TODO: this requires an allocate in all ctors
+        // need small string optimization
+		m_data.resize(1); 
+		m_data[0] = '\0'; 
+	}
+
+	//------------------------------------------------------------------------------------------
+	template<typename TChar>
+    void StringImpl<TChar>::append( const char* str )
+	{
+        Append(str, ComputeStrLen(str));
+	}
+
+	//------------------------------------------------------------------------------------------
+	template<typename TChar>
+    StringImpl<TChar> StringImpl<TChar>::operator+(const char c)
+	{ 
+        StringImpl<TChar> ret;
+        ret.reserve(m_data.size() + 1);
+        
+        char cstr[2] = { c, 0 };
+        ret.Append(c_str(), size());
+        ret.Append(cstr, 1);
+		return ret;
+	}
+	//------------------------------------------------------------------------------------------
+	template<typename TChar>
+    StringImpl<TChar> StringImpl<TChar>::operator+(const char* str)
+	{ 
+		StringImpl<TChar> ret;
+        size_t len = ComputeStrLen(str);
+        ret.reserve(m_data.size() + len);
+
+        ret.Append(c_str(), size());
+        ret.Append(str, len);
+		return ret; 
+	} 
+
+	//------------------------------------------------------------------------------------------
+	template<typename TChar>
+    StringImpl<TChar> StringImpl<TChar>::operator+(const StringImpl<TChar>& str)
+	{ 
+        StringImpl<TChar> ret;
+        size_t len = str.size();
+        ret.reserve(m_data.size() + len);
+
+        ret.Append(c_str(), size());
+        ret.Append(str, len);
+        return ret;
+	}
+
+	//------------------------------------------------------------------------------------------
+	template<typename TChar>
+    void StringImpl<TChar>::Append(const char* str, const size_type appendSize)
+	{ 
+		size_type writeIndex = size();
+		m_data.resize(m_data.size()+appendSize);
+		for (size_type i = 0; i < appendSize; ++i, ++writeIndex)
+		{
+			m_data[writeIndex] = str[i];
+		}
+		m_data.back() = '\0';
+	}	
+
+	using string = StringImpl<char>;
+	
+    // Code above is using char* in many places instead of TChar
+    // TODO: elim wstring if possible
+    // using wstring = StringImpl<wchar_t>;
+}
+
+#else
+
+//#include <string>
+//
+//namespace fastl
+//{
+//  using string = std::string;
+//  using wstring = std::wstring;
+//}
+
+#endif //USE_FASTL
+
+#ifdef FASTL_EXPOSE_PLAIN_ALIAS
+
+using string = fastl::string;
+using wstring = fastl::wstring;
+
+#endif //FASTL_EXPOSE_PLAIN_ALIAS
diff --git a/libkram/fastl/map.h b/libkram/fastl/map.h
new file mode 100644
index 00000000..3c49f73f
--- /dev/null
+++ b/libkram/fastl/map.h
@@ -0,0 +1,122 @@
+#pragma once 
+
+#ifdef USE_FASTL
+
+#include "../fastl/vector.h"
+#include "../fastl/pair.h"
+#include "../fastl/falgorithm.h"
+
+namespace fastl
+{
+	////////////////////////////////////////////////////////////////////////////////////////////
+	// Build map as a vectorMap
+	template<typename TKey, typename TValue>
+	class map
+	{
+	private: 
+		typedef vector<pair<TKey, TValue>> TData;
+
+	public:
+		typedef typename TData::iterator       iterator; 
+		typedef typename TData::const_iterator const_iterator; 
+		typedef typename TData::value_type     value_type;
+		typedef typename TData::size_type      size_type;
+		typedef value_type&                    reference;
+		typedef const value_type&              const_reference;
+
+	public:
+		iterator begin() { return m_data.begin(); }
+		const_iterator begin() const { return m_data.begin(); }
+		iterator end() { return m_data.end(); }
+		const_iterator end() const { return m_data.end(); }
+
+		bool empty() const { return m_data.empty(); }
+		size_type size() const { return m_data.size();  }
+
+		TValue& operator[]( const TKey& key );
+
+		void clear() { m_data.clear(); }
+		
+		iterator insert(iterator hint, const value_type& value) { return m_data.insert(hint, value); }
+		iterator insert(const_iterator hint, const value_type& value) { return m_data.insert(hint, value); }
+		pair<iterator,bool> insert( value_type&& value );
+
+		void erase(iterator it) { m_data.erase(it);  }
+		size_type erase(const TKey& key);
+		
+		iterator find( const TKey& key );
+		const_iterator find( const TKey& key ) const;
+
+	private: 
+		TData m_data;
+	};
+
+	// Implementation
+
+	//------------------------------------------------------------------------------------------
+	template<typename TKey, typename TValue> TValue& map<TKey,TValue>::operator[]( const TKey& key )
+	{ 
+		iterator entryIt = fastl::lower_bound(begin(), end(), key, [=](value_type& value, const TKey& key) {return value.first < key; });
+		if (entryIt == end() || entryIt->first != key)
+		{ 
+			entryIt = m_data.emplace(entryIt,key,TValue());
+		}
+
+		return entryIt->second;
+	}
+
+	//------------------------------------------------------------------------------------------
+	template<typename TKey, typename TValue> pair<typename map<TKey,TValue>::iterator,bool> map<TKey,TValue>::insert( value_type&& inputValue )
+	{ 
+		iterator entryIt = fastl::lower_bound(begin(), end(), inputValue, [=](value_type& a, const value_type& b) {return a.first < b.first; });
+		if (entryIt == end() || entryIt->first != inputValue.first)
+		{ 
+			entryIt = m_data.emplace(entryIt,move(inputValue));
+			return pair<iterator,bool>(entryIt,true);
+		}
+		return pair<iterator,bool>(entryIt,false);
+	}
+
+	//------------------------------------------------------------------------------------------
+	template<typename TKey, typename TValue> typename map<TKey,TValue>::size_type map<TKey,TValue>::erase(const TKey& key)
+	{ 
+		iterator found = find(key);
+		if (found != end())
+		{
+			erase(found);
+		} 
+		return size();
+	}
+
+	//------------------------------------------------------------------------------------------
+	template<typename TKey, typename TValue> typename map<TKey,TValue>::iterator map<TKey,TValue>::find( const TKey& key )
+	{ 
+		iterator found = fastl::lower_bound(begin(), end(), key, [=](value_type& value, const TKey& key) {return value.first < key; });
+		return found != end() && found->first == key ? found : end();
+	}
+
+	//------------------------------------------------------------------------------------------
+	template<typename TKey, typename TValue> typename map<TKey, TValue>::const_iterator map<TKey, TValue>::find(const TKey& key) const
+	{ 
+		const_iterator found = fastl::lower_bound(begin(), end(), key, [=](const value_type& value, const TKey& key) {return value.first < key; });
+		return found != end() && found->first == key ? found : end();
+	}
+
+}
+
+#else 
+
+//#include <map>
+//
+//namespace fastl
+//{
+//	template<typename TKey, typename TValue> using map = std::map<TKey, TValue>;
+//}
+
+#endif //USE_FASTL
+
+#ifdef FASTL_EXPOSE_PLAIN_ALIAS
+
+template<typename TKey, typename TValue> using map = fastl::map<TKey, TValue>;
+
+#endif //FASTL_EXPOSE_PLAIN_ALIAS
diff --git a/libkram/fastl/pair.h b/libkram/fastl/pair.h
new file mode 100644
index 00000000..3eb68ec0
--- /dev/null
+++ b/libkram/fastl/pair.h
@@ -0,0 +1,60 @@
+#pragma once 
+
+#ifdef USE_FASTL
+
+namespace fastl
+{
+	template<typename T1, typename T2>
+	struct pair
+	{
+		typedef T1 first_type;
+		typedef T2 second_type;
+
+		pair()
+            :first(),second(){}
+        pair(const T1& _first, const T2& _second)
+            :first(_first),second(_second) {}
+
+        // added these
+        pair(const pair<T1, T2>& rhs)
+            :first(rhs.first),second(rhs.second) {}
+        pair(pair<T1, T2>&& rhs)
+            :first(std::move(rhs.first)),second(std::move(rhs.second)) {}
+		
+        pair<T1,T2>& operator=(const pair<T1, T2>& rhs)
+        {
+            first = rhs.first;
+            second = rhs.second;
+            return *this;
+        }
+        pair<T1,T2>& operator=(pair<T1, T2>&& rhs)
+        {
+            first = std::move(rhs.first);
+            second = std::move(rhs.second);
+            return *this;
+        }
+        
+		T1 first; 
+		T2 second;
+	};
+
+    template<typename T1, typename T2>
+    pair<T1,T2> make_pair(const T1& k, const T2& v) { return pair(k,v); }
+}
+
+#else 
+
+//#include <utility>
+//
+//namespace fastl
+//{
+//	template<typename TFirst, typename TSecond> using pair = std::pair<TFirst, TSecond>;
+//}
+
+#endif //USE_FASTL
+
+#ifdef FASTL_EXPOSE_PLAIN_ALIAS
+
+template<typename TFirst,typename TSecond> using pair = fastl::pair<TFirst,TSecond>;
+
+#endif //FASTL_EXPOSE_PLAIN_ALIAS
diff --git a/libkram/fastl/set.h b/libkram/fastl/set.h
new file mode 100644
index 00000000..e334c466
--- /dev/null
+++ b/libkram/fastl/set.h
@@ -0,0 +1,108 @@
+#pragma once 
+
+//#ifdef USE_FASTL
+
+#include "../fastl/falgorithm.h"
+#include "../fastl/pair.h"
+#include "../fastl/vector.h"
+
+namespace fastl
+{
+	////////////////////////////////////////////////////////////////////////////////////////////
+	// Build map as a vectorMap
+	template<typename TKey>
+	class set
+	{
+	private:
+		typedef vector<TKey> TData;
+
+	public:
+		typedef typename TData::iterator       iterator;
+		typedef typename TData::const_iterator const_iterator;
+		typedef typename TData::value_type     value_type;
+		typedef typename TData::size_type      size_type;
+		typedef value_type&                    reference;
+		typedef const value_type&              const_reference;
+
+	public:
+		iterator begin() { return m_data.begin(); }
+		const_iterator begin() const { return m_data.begin(); }
+		iterator end() { return m_data.end(); }
+		const_iterator end() const { return m_data.end(); }
+
+		bool empty() const { return m_data.empty(); }
+		size_type size() const { return m_data.size(); }
+
+		void clear() { m_data.clear(); }
+
+		template< class... Args > pair<iterator, bool> emplace( Args&&... args );
+
+		void erase( iterator it ) { m_data.erase( it ); }
+		size_type erase( const TKey& key );
+
+		iterator find( const TKey& key );
+		const_iterator find( const TKey& key ) const;
+
+	private:
+		TData m_data;
+	};
+
+	// Implementation
+
+	//------------------------------------------------------------------------------------------
+	template<typename TKey>
+	template<class... Args > pair<typename set<TKey>::iterator, bool> set<TKey>::emplace( Args&&... args )
+	{
+		TKey inputValue{ args... };
+		iterator entryIt = fastl::lower_bound( begin(), end(), inputValue, [=]( value_type& a, const value_type& b ) {return a < b; } );
+		if( entryIt == end() || *entryIt != inputValue )
+		{
+			entryIt = m_data.emplace( entryIt, args... );
+			return pair<iterator, bool>( entryIt, true );
+		}
+		return pair<iterator, bool>( entryIt, false );
+	}
+
+	//------------------------------------------------------------------------------------------
+	template<typename TKey> typename set<TKey>::size_type set<TKey>::erase( const TKey& key )
+	{
+		iterator found = find( key );
+		if( found != end() )
+		{
+			erase( found );
+		}
+		return size();
+	}
+
+	//------------------------------------------------------------------------------------------
+	template<typename TKey> typename set<TKey>::iterator set<TKey>::find( const TKey& key )
+	{
+		iterator found = fastl::lower_bound( begin(), end(), key, [=]( const TKey& value, const TKey& key ) {return value < key; } );
+		return found != end() && *found == key ? found : end();
+	}
+
+	//------------------------------------------------------------------------------------------
+	template<typename TKey> typename set<TKey>::const_iterator set<TKey>::find( const TKey& key ) const
+	{
+		const_iterator found = fastl::lower_bound( begin(), end(), key, [=]( const TKey& value, const TKey& key ) {return value < key; } );
+		return found != end() && *found == key ? found : end();
+	}
+
+}
+
+//#else
+//
+//#include <set>
+//
+//namespace fastl
+//{
+//	template<typename TKey> using set = std::set<TKey>;
+//}
+//
+//#endif //USE_FASTL
+//
+//#ifdef FASTL_EXPOSE_PLAIN_ALIAS
+//
+//template<typename TKey> using set = fastl::set<TKey>;
+//
+//#endif //FASTL_EXPOSE_PLAIN_ALIAS
diff --git a/libkram/fastl/unordered_map.h b/libkram/fastl/unordered_map.h
new file mode 100644
index 00000000..1b312a1b
--- /dev/null
+++ b/libkram/fastl/unordered_map.h
@@ -0,0 +1,28 @@
+#pragma once
+
+#ifdef USE_FASTL
+
+#include "../fastl/map.h"
+
+namespace fastl
+{
+	// Build unordered_map as a map 
+	template<typename TKey, typename TValue> using unordered_map = fastl::map<TKey, TValue>;
+}
+
+#else 
+
+//#include <unordered_map>
+//
+//namespace fastl
+//{
+//	template<typename TKey, typename TValue> using unordered_map = std::unordered_map<TKey, TValue>;
+//}
+
+#endif //USE_FASTL
+
+#ifdef FASTL_EXPOSE_PLAIN_ALIAS
+
+template<typename TKey, typename TValue> using unordered_map = fastl::unordered_map<TKey, TValue>;
+
+#endif //FASTL_EXPOSE_PLAIN_ALIAS
diff --git a/libkram/fastl/unordered_set.h b/libkram/fastl/unordered_set.h
new file mode 100644
index 00000000..f2aed10d
--- /dev/null
+++ b/libkram/fastl/unordered_set.h
@@ -0,0 +1,28 @@
+#pragma once
+
+#ifdef USE_FASTL
+
+#include "../fastl/set.h"
+
+namespace fastl
+{
+	// Build unordered_map as a map 
+	template<typename TKey> using unordered_set = fastl::set<TKey>;
+}
+
+#else 
+
+//#include <unordered_set>
+//
+//namespace fastl
+//{
+//	template<typename TKey> using unordered_set = std::unordered_set<TKey>;
+//}
+
+#endif //USE_FASTL
+
+#ifdef FASTL_EXPOSE_PLAIN_ALIAS
+
+template<typename TKey, typename TValue> using unordered_set = fastl::unordered_set<TKey>;
+
+#endif //FASTL_EXPOSE_PLAIN_ALIAS
diff --git a/libkram/fastl/vector.h b/libkram/fastl/vector.h
new file mode 100644
index 00000000..dbc5f4a0
--- /dev/null
+++ b/libkram/fastl/vector.h
@@ -0,0 +1,383 @@
+#pragma once
+
+#ifdef USE_FASTL
+
+#include <cstddef> // for size_t
+#include <new> // for placement new
+#include <algorithm> // for move
+
+//Forward declare the placement new in order to avoid #include <new> 
+//extern void* operator new  (size_t size, void* ptr) noexcept;
+
+namespace fastl 
+{ 
+	//------------------------------------------------------------------------------------------
+	//Consider moving this around if needed somewhere else 
+	template <class T> struct remove_reference { typedef T type; };
+	template <class T> struct remove_reference<T&> { typedef T type; };
+	template <class T> struct remove_reference<T&&> { typedef T type; };
+	
+    // This is ambigous if included
+    //template <typename T> typename remove_reference<T>::type&& move(T&& arg) { return static_cast<typename remove_reference<T>::type&&>(arg); }
+
+	template <bool, typename T = void>
+    struct enable_if {};
+	template <typename T>
+    struct enable_if<true, T> { typedef T type; };
+	template <bool B, typename T = void>
+    using enable_if_t = typename enable_if<B, T>::type;
+
+	template<typename T, typename ... Args>
+    void Construct(T* ptr, Args&&... args) { new (ptr) T(std::move(args)...); }
+
+	template<typename T>
+    T* CreateBuffer(size_t size){ return (T*) new char[size*sizeof(T)]; }
+	template<typename T>
+    void DestroyBuffer(T* buffer){ delete[] reinterpret_cast<char*>(buffer); }
+
+	////////////////////////////////////////////////////////////////////////////////////////////
+	template<typename T>
+    class vector
+	{ 
+	private: 
+		enum { DEFAULT_CAPACITY_SIZE = 8 };
+	public:
+		typedef T        value_type;
+		typedef size_t   size_type;
+
+		typedef T*       iterator;
+		typedef const T* const_iterator;
+		typedef T&       reference;
+		typedef const T& const_reference;
+
+	public:
+		vector();
+		explicit vector(size_t size);
+
+		//If more than 1 argument is provided we assume that we want to construct the vector with its elements ( using SFINAE - fake initializer list )
+		template<typename ... Args, enable_if_t<(sizeof...(Args) > 1)>* = nullptr> 
+		vector(Args&&... args) : m_data(CreateBuffer<T>(sizeof...(Args))), m_size(0u), m_capacity(sizeof...(Args))
+		{ 
+			(emplace_back(args),...); 
+		}
+
+		vector(const vector<T>& input);
+		vector(vector<T>&& input);
+		~vector();
+
+		vector<T>& operator = (const vector<T>& t);
+		vector<T>& operator = (vector<T>&& t);
+
+		reference operator[](size_type index) { return m_data[index]; }
+		const_reference operator[](size_type index) const { return m_data[index]; }
+
+		size_type size() const{ return m_size; }
+		size_type capacity() const { return m_capacity; }
+		
+		iterator begin() { return m_data; }
+		const_iterator begin() const { return m_data; }
+		iterator end() { return m_data+m_size;	} 
+		const_iterator end() const { return m_data+m_size;	}
+		reference back() { return m_data[m_size-1]; }
+		bool empty() const { return m_size == 0u; }
+
+		void reserve(const size_type size);
+		void resize(const size_type size);
+		void clear();
+
+		void push_back(const value_type& value);
+		iterator insert(iterator it, const value_type& value);
+        void insert(iterator it, const value_type* begin , const value_type* end)
+        {
+            // TODO: fix this isn't fast
+            while (begin != end)
+            {
+                insert(it, *begin);
+                
+                ++it;
+                ++begin;
+            }
+        }
+      
+		template<typename ... Args>
+        iterator emplace(iterator it, Args&&... args);
+        
+		template<typename ... Args>
+        void emplace_back(Args&&... args);
+
+		void pop_back();
+
+		iterator erase(iterator it);
+		iterator erase(iterator fromIt,iterator toIt); 
+
+        const value_type* data() const { return m_data; }
+        value_type* data() { return m_data; }
+        
+        // TODO: no-op for now, but should release memory
+        void shrink_to_fit() { }
+        
+	private: 
+		void Destroy();
+
+	private:
+		value_type* m_data;
+		size_type   m_size;
+		size_type   m_capacity;
+	};
+
+	//Implementation
+
+	//------------------------------------------------------------------------------------------
+	template<typename T>
+    vector<T>::vector()
+		: m_data(nullptr)
+		, m_size(0u)
+		, m_capacity(0u)
+	{
+	}
+
+	//------------------------------------------------------------------------------------------
+	template<typename T>
+    vector<T>::vector(size_t size)
+		: m_data(CreateBuffer<T>(size))
+		, m_size(size)
+		, m_capacity(size)
+	{ 
+		//Call the default constructor for all preallocated elements
+		for (size_type i = 0u; i < m_size; ++i)
+		{
+			Construct<T>(&m_data[i]); 
+		}
+	}
+
+	//------------------------------------------------------------------------------------------
+	template<typename T> vector<T>::vector(const vector<T>& input)
+		: m_data(CreateBuffer<T>(input.m_capacity))
+		, m_size(input.m_size)
+		, m_capacity(input.m_capacity)
+	{
+		for (size_t i = 0u; i < m_size; ++i)
+		{
+			Construct<T>(&m_data[i]); 
+			m_data[i] = input[i];
+		} 
+	}
+
+	//------------------------------------------------------------------------------------------
+	template<typename T>
+    vector<T>::vector(vector<T>&& input)
+		: m_data(std::move(input.m_data))
+		, m_size(input.m_size)
+		, m_capacity(input.m_capacity)
+	{ 
+		input.m_data = nullptr;
+		input.m_size = 0u; 
+		input.m_capacity = 0u;
+	}
+
+	//------------------------------------------------------------------------------------------
+	template<typename T>
+    vector<T>::~vector()
+	{ 
+		Destroy();
+	}
+
+	//------------------------------------------------------------------------------------------
+	template<typename T>
+    inline vector<T>& vector<T>::operator= (const vector<T>& input)
+	{
+		clear();
+		reserve(input.m_capacity);
+		m_size = input.m_size;
+		for (size_type i = 0u; i < m_size; ++i)
+		{
+			Construct<T>(&m_data[i], input[i]);
+		}
+		return *this;
+	}
+
+	//------------------------------------------------------------------------------------------
+	template<typename T>
+    vector<T>& vector<T>::operator = (vector<T>&& t)
+	{
+		if (this != &t) 
+		{ 
+			Destroy(); 
+			m_data = std::move(t.m_data);
+			m_size = t.m_size;
+			m_capacity = t.m_capacity;
+			t.m_data = nullptr;
+			t.m_size = 0u;
+			t.m_capacity = 0u; 
+		}
+		return *this;
+	}
+
+	//------------------------------------------------------------------------------------------
+	template<typename T>
+    inline void vector<T>::reserve(const size_type size)
+	{
+		if (size > m_capacity)
+		{ 
+			m_capacity = size;
+			T* newData = CreateBuffer<T>(m_capacity);
+
+			for (size_type i = 0u; i < m_size; ++i)
+			{
+				Construct<T>(&newData[i], std::move(m_data[i]));
+				m_data[i].~T();
+			}
+
+			DestroyBuffer(m_data);
+			m_data = newData;
+			
+		}
+	}
+
+	//------------------------------------------------------------------------------------------
+	template<typename T>
+    inline void vector<T>::resize(const size_type size)
+	{
+		reserve(size);
+
+		for (size_type i=size;i<m_size;++i)
+		{
+			m_data[i].~T();
+		}
+
+		for (size_type i=m_size;i<size;++i)
+		{
+			Construct<T>(&m_data[i]);
+		}
+
+		m_size = size;
+	}
+
+	//------------------------------------------------------------------------------------------
+	template<typename T>
+    inline void vector<T>::clear()
+	{
+		resize(0u);
+	}
+
+	//------------------------------------------------------------------------------------------
+	template<typename T>
+    inline void vector<T>::push_back(const value_type& value)
+	{
+		emplace(end(), value);
+	}
+
+	//------------------------------------------------------------------------------------------
+	template<typename T>
+    inline typename vector<T>::iterator vector<T>::insert(iterator it,const value_type& value)
+	{ 
+		return emplace(it, value);
+	}
+
+	//------------------------------------------------------------------------------------------
+	template<typename T> template<typename ... Args>
+    void vector<T>::emplace_back(Args&&... args)
+	{
+		emplace(end(),std::move(args)...);
+	}
+
+	//------------------------------------------------------------------------------------------
+	template<typename T>
+    inline void vector<T>::pop_back()
+	{ 
+		if (!empty())
+		{ 
+			resize(m_size-1);
+		}
+	}
+
+	//------------------------------------------------------------------------------------------
+	template<typename T> template<typename ... Args>
+    typename vector<T>::iterator vector<T>::emplace(iterator it, Args&&... args)
+	{ 
+		const size_type index = it-begin();
+
+		if (m_size == m_capacity)
+		{ 
+			reserve(m_capacity == 0u? DEFAULT_CAPACITY_SIZE : 2u*m_capacity);
+		}
+
+		iterator insertIt = begin() + index; //this is important as reserve might move the memory around
+		iterator endIt = end();
+
+		if(endIt == insertIt)
+		{ 
+			Construct<T>(insertIt, std::move(args)...);
+		}
+		else
+		{
+			//Build the new element 
+			Construct<T>(end(), std::move(*(end()-1)));
+
+			//Shift remaining elements
+			for (iterator i = end()-1; i > insertIt;--i)
+			{
+				*i = std::move(*(i-1));
+			}	
+
+			*insertIt = T(std::move(args)...);
+		}
+
+		++m_size;
+
+		return insertIt;
+	}
+
+	//------------------------------------------------------------------------------------------
+	template<typename T>
+    inline typename vector<T>::iterator vector<T>::erase(iterator it)
+	{ 
+		return erase(it,it+1);
+	}
+
+	//------------------------------------------------------------------------------------------
+	template<typename T>
+    inline typename vector<T>::iterator vector<T>::erase(iterator fromIt, iterator toIt)
+	{
+		const size_type rangeSize = toIt-fromIt;
+		const_iterator batchEndIt = end()-rangeSize;
+
+		for (iterator i = fromIt; i < batchEndIt; ++i)
+		{
+			*i = std::move(*(i + rangeSize));
+		}
+
+		resize(m_size - rangeSize);
+		return fromIt;
+	}
+
+	//------------------------------------------------------------------------------------------
+	template<typename T>
+    void vector<T>::Destroy()
+	{
+		for (size_type i=0u;i<m_size;++i)
+		{
+			m_data[i].~T();
+		}
+
+		DestroyBuffer(m_data);
+	}
+}
+
+#else
+
+//#include <vector>
+//
+//namespace fastl
+//{ 
+//	template<typename T> using vector = std::vector<T>;
+//}
+
+#endif //USE_FASTL
+
+
+#ifdef FASTL_EXPOSE_PLAIN_ALIAS
+
+template<typename T> using vector = fastl::vector<T>;
+
+#endif //FASTL_EXPOSE_PLAIN_ALIAS
diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index 047b45aa..ed60c5e1 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -1279,8 +1279,12 @@ void KTXImage::initProps(const uint8_t* propsData, size_t propDataSize)
             }
 
             //LOGD("KTXImage", "KTXProp '%s': %s\n", keyStart, valueStart);
-
-            props.push_back(make_pair(string((const char*)keyStart), string((const char*)valueStart)));
+            auto propPair = NAMESPACE_STL::make_pair(
+                string((const char*)keyStart),
+                string((const char*)valueStart)
+            );
+                          
+            props.emplace_back(propPair);
 
             // pad to 4 byte alignment
             int32_t valuePadding = 3 - ((dataSize + 3) % 4);
@@ -1297,7 +1301,11 @@ void KTXImage::addProp(const char* name, const char* value)
             return;
         }
     }
-    props.push_back(make_pair(string(name), string(value)));
+    auto propPair = NAMESPACE_STL::make_pair(
+        string(name),
+        string(value)
+    );
+    props.emplace_back(propPair);
 }
 
 string KTXImage::getProp(const char* name) const
@@ -1399,8 +1407,8 @@ void KTXImage::toPropsData(vector<uint8_t>& propsData) const
         const char* value = prop.second.c_str();
 
         // add null-terminate key, and value data
-        propsData.insert(propsData.end(), key, key + prop.first.length() + 1);
-        propsData.insert(propsData.end(), value, value + prop.second.length() + 1);
+        propsData.insert(propsData.end(), (const uint8_t*)key, (const uint8_t*)key + prop.first.length() + 1);
+        propsData.insert(propsData.end(), (const uint8_t*)value, (const uint8_t*)value + prop.second.length() + 1);
 
         // padding to 4 byte multiple
         uint32_t numPadding = 3 - ((size + 3) % 4);
diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h
index fbc0724d..1efc651c 100644
--- a/libkram/kram/KTXImage.h
+++ b/libkram/kram/KTXImage.h
@@ -7,7 +7,7 @@
 //#include <string>
 //#include <vector>
 
-#include "KramConfig.h"
+//#include "KramConfig.h"
 
 namespace kram {
 
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index fc467a3b..f810ec66 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -1221,7 +1221,8 @@ string formatInputAndOutput(int32_t testNumber, const char* srcFilename, MyMTLPi
     cmd += " -input " srcDir;
     cmd += srcFilename;
 
-    cmd += " -output " dstDir + dst;
+    cmd += " -output " dstDir;
+    cmd += dst;
 
     // replace png with ktx
     dst = srcFilename;
@@ -1264,9 +1265,8 @@ bool kramTestCommand(int32_t testNumber,
             // Encoder may be fast, but want quality.  Just decode this file using astcenc to see.
             testNumber = 1;
             encoder = kTexEncoderATE;
-            cmd +=
-                " -normal" ASTCSwizzle2nm +
-                formatInputAndOutput(testNumber, "collectorbarrel-n.png", MyMTLPixelFormatASTC_4x4_LDR, encoder, isNotPremul);
+            cmd += " -normal" ASTCSwizzle2nm;
+            cmd += formatInputAndOutput(testNumber, "collectorbarrel-n.png", MyMTLPixelFormatASTC_4x4_LDR, encoder, isNotPremul);
 
             break;
 
@@ -1281,9 +1281,8 @@ bool kramTestCommand(int32_t testNumber,
         case 3:
             testNumber = 3;
             encoder = kTexEncoderATE;
-            cmd +=
-                " -normal" +  // " -quality 100"
-                formatInputAndOutput(testNumber, "collectorbarrel-n.png", MyMTLPixelFormatBC5_RGUnorm, encoder);
+            cmd += " -normal"; // " -quality 100"
+            cmd += formatInputAndOutput(testNumber, "collectorbarrel-n.png", MyMTLPixelFormatBC5_RGUnorm, encoder);
 
             break;
 
@@ -1306,9 +1305,8 @@ bool kramTestCommand(int32_t testNumber,
         case 10:
             testNumber = 10;
             encoder = kTexEncoderAstcenc;
-            cmd +=
-                " -normal" ASTCSwizzle2nm +
-                formatInputAndOutput(testNumber, "collectorbarrel-n.png", MyMTLPixelFormatASTC_4x4_LDR, encoder, isNotPremul);
+            cmd += " -normal" ASTCSwizzle2nm;
+            cmd += formatInputAndOutput(testNumber, "collectorbarrel-n.png", MyMTLPixelFormatASTC_4x4_LDR, encoder, isNotPremul);
 
             break;
 
@@ -1323,17 +1321,15 @@ bool kramTestCommand(int32_t testNumber,
         case 12:
             testNumber = 12;
             encoder = kTexEncoderSquish;
-            cmd +=
-                " -normal" +
-                formatInputAndOutput(testNumber, "collectorbarrel-n.png", MyMTLPixelFormatBC5_RGUnorm, encoder);
+            cmd += " -normal";
+            cmd += formatInputAndOutput(testNumber, "collectorbarrel-n.png", MyMTLPixelFormatBC5_RGUnorm, encoder);
 
             break;
         case 13:
             testNumber = 13;
             encoder = kTexEncoderBcenc;
-            cmd +=
-                " -normal" +
-                formatInputAndOutput(testNumber, "collectorbarrel-n.png", MyMTLPixelFormatBC5_RGUnorm, encoder);
+            cmd += " -normal";
+            cmd += formatInputAndOutput(testNumber, "collectorbarrel-n.png", MyMTLPixelFormatBC5_RGUnorm, encoder);
 
             break;
 
@@ -1362,8 +1358,8 @@ bool kramTestCommand(int32_t testNumber,
             testNumber = 1020;
             // bc7enc with source, also handles other bc formats, way slower than ATE but why?
             encoder = kTexEncoderBcenc;
-            cmd += " -optopaque" +
-                   formatInputAndOutput(testNumber, "ColorMap-a.png", MyMTLPixelFormatBC7_RGBAUnorm_sRGB, encoder);
+            cmd += " -optopaque";
+            cmd += formatInputAndOutput(testNumber, "ColorMap-a.png", MyMTLPixelFormatBC7_RGBAUnorm_sRGB, encoder);
             break;
 
         // this takes 12s to process, may need to adjust quality to settings, but they're low already
@@ -1395,17 +1391,16 @@ bool kramTestCommand(int32_t testNumber,
         case 2002:
             testNumber = 2002;
             encoder = kTexEncoderEtcenc;
-            cmd +=
-                " -normal" +
-                formatInputAndOutput(testNumber, "collectorbarrel-n.png", MyMTLPixelFormatEAC_RG11Unorm, encoder);
+            cmd += " -normal";
+            cmd += formatInputAndOutput(testNumber, "collectorbarrel-n.png", MyMTLPixelFormatEAC_RG11Unorm, encoder);
 
             break;
 
         case 2003:
             testNumber = 2003;
             encoder = kTexEncoderEtcenc;
-            cmd += " -optopaque" +
-                   formatInputAndOutput(testNumber, "color_grid-a.png", MyMTLPixelFormatEAC_RGBA8_sRGB, encoder);
+            cmd += " -optopaque";
+            cmd += formatInputAndOutput(testNumber, "color_grid-a.png", MyMTLPixelFormatEAC_RGBA8_sRGB, encoder);
             break;
 
             //--------------
@@ -1414,35 +1409,31 @@ bool kramTestCommand(int32_t testNumber,
         case 3001:
             testNumber = 3001;
             encoder = kTexEncoderExplicit;
-            cmd +=
-                " -sdf" +
-                formatInputAndOutput(testNumber, "flipper-sdf.png", MyMTLPixelFormatR8Unorm, encoder);
+            cmd += " -sdf";
+            cmd += formatInputAndOutput(testNumber, "flipper-sdf.png", MyMTLPixelFormatR8Unorm, encoder);
             break;
 
         case 3002:
             testNumber = 3002;
             encoder = kTexEncoderSquish;
-            cmd +=
-                " -sdf" +
-                formatInputAndOutput(testNumber, "flipper-sdf.png", MyMTLPixelFormatBC4_RUnorm, encoder);
+            cmd += " -sdf";
+            cmd += formatInputAndOutput(testNumber, "flipper-sdf.png", MyMTLPixelFormatBC4_RUnorm, encoder);
 
             break;
 
         case 3003:
             testNumber = 3003;
             encoder = kTexEncoderEtcenc;
-            cmd +=
-                " -sdf" +
-                formatInputAndOutput(testNumber, "flipper-sdf.png", MyMTLPixelFormatEAC_R11Unorm, encoder);
+            cmd += " -sdf";
+            cmd +=   formatInputAndOutput(testNumber, "flipper-sdf.png", MyMTLPixelFormatEAC_R11Unorm, encoder);
 
             break;
 
         case 3004:
             testNumber = 3004;
             encoder = kTexEncoderATE;
-            cmd +=
-                " -sdf" ASTCSwizzleL1 +
-                formatInputAndOutput(testNumber, "flipper-sdf.png", MyMTLPixelFormatASTC_4x4_LDR, encoder, isNotPremul);
+            cmd += " -sdf" ASTCSwizzleL1;
+            cmd +=     formatInputAndOutput(testNumber, "flipper-sdf.png", MyMTLPixelFormatASTC_4x4_LDR, encoder, isNotPremul);
             break;
 
         default:
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index 9d186d8a..157466eb 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -151,6 +151,10 @@
 #define COMPILE_EASTL 0
 #endif
 
+#ifndef COMPILE_FASTL
+#define COMPILE_FASTL 0
+#endif
+
 // basis transcoder only (read not writes)
 #ifndef COMPILE_BASIS
 #define COMPILE_BASIS 0
@@ -164,8 +168,12 @@
 // This needs debug support that native stl already has.
 // EASTL only seems to define that for Visual Studio, and not lldb
 #define USE_EASTL COMPILE_EASTL
+#define USE_FASTL COMPILE_FASTL
+
 #if USE_EASTL
 
+#define NAMESPACE_STL eastl
+
 // this probably breaks all STL debugging
 #include <EASTL/algorithm.h>  // for max
 //#include "EASTL/atomic.h"
@@ -185,9 +193,37 @@
 #include <EASTL/unique_ptr.h>
 #include <EASTL/initializer_list.h>
 
+// std - simpler than using eastl version
 #include <atomic>
 
-#define NAMESPACE_STL eastl
+
+#elif USE_FASTL
+
+#define NAMESPACE_STL fastl
+
+// these are all vector based
+#include "../fastl/falgorithm.h"
+#include "../fastl/vector.h"
+#include "../fastl/map.h"
+#include "../fastl/set.h"
+#include "../fastl/unordered_map.h"
+#include "../fastl/unordered_set.h"
+
+// still too many holes in this (rfind, insert, back, pop_back, find_last_of, substr)
+//#include "../fastl/fstring.h"
+#include <string>
+namespace NAMESPACE_STL
+{
+    using string = std::string;
+}
+
+// std - for missing functionality
+#include <atomic>
+#include <functional>
+#include <array>
+#include <deque>
+#include <memory> // for unique_ptr/shared_ptr
+#include <initializer_list>
 
 #else
 
@@ -202,6 +238,7 @@ import std.filesystem;
 import std.regex;
 */
 
+// all std
 #include <algorithm>  // for max
 #include <functional>
 
@@ -218,7 +255,6 @@ import std.regex;
 #include <unordered_map>
 #include <vector>
 
-
 #define NAMESPACE_STL std
 
 #endif
@@ -295,18 +331,21 @@ class half4 {
 
 #if !USE_EASTL
 
-namespace std {
-inline float clamp(float x, float minValue, float maxValue) { return min(max(x, minValue), maxValue); }
-inline double clamp(double x, double minValue, double maxValue) { return min(max(x, minValue), maxValue); }
+namespace NAMESPACE_STL {
+
+// scalar ops
+#if USE_FASTL
+template<typename T>
+inline T min(T x, T minValue) { return x < minValue ? x : minValue; }
+template<typename T>
+inline T max(T x, T maxValue) { return x > maxValue ? x : maxValue; }
+#endif
 
-inline double clamp(int8_t x, int8_t minValue, int8_t maxValue) { return min(max(x, minValue), maxValue); }
-inline double clamp(uint8_t x, uint8_t minValue, uint8_t maxValue) { return min(max(x, minValue), maxValue); }
+// already defined in C++17
+//template<typename T>
+//inline T clamp(T x, T minValue, T maxValue) { return min(max(x, minValue), maxValue); }
 
-inline double clamp(int16_t x, int16_t minValue, int16_t maxValue) { return min(max(x, minValue), maxValue); }
-inline double clamp(uint16_t x, uint16_t minValue, uint16_t maxValue) { return min(max(x, minValue), maxValue); }
 
-inline double clamp(int32_t x, int32_t minValue, int32_t maxValue) { return min(max(x, minValue), maxValue); }
-inline double clamp(int64_t x, int64_t minValue, int64_t maxValue) { return min(max(x, minValue), maxValue); }
 }  // namespace std
 
 #endif
@@ -358,11 +397,11 @@ inline float4 float4m(float x)
 
 inline float saturate(float v)
 {
-    return NAMESPACE_STL::clamp(v, 0.0f, 1.0f);
+    return std::clamp(v, 0.0f, 1.0f);
 }
 inline double saturate(double v)
 {
-    return NAMESPACE_STL::clamp(v, 0.0, 1.0);
+    return std::clamp(v, 0.0, 1.0);
 }
 inline float2 saturate(const float2& v)
 {
diff --git a/libkram/kram/KramDDSHelper.h b/libkram/kram/KramDDSHelper.h
index 0fa25088..ec8dd1ec 100644
--- a/libkram/kram/KramDDSHelper.h
+++ b/libkram/kram/KramDDSHelper.h
@@ -7,7 +7,7 @@
 #include <stddef.h>
 #include <stdint.h>
 
-#include "KramConfig.h"
+//#include "KramConfig.h"
 
 namespace kram {
 using namespace NAMESPACE_STL;
diff --git a/libkram/kram/KramFileHelper.cpp b/libkram/kram/KramFileHelper.cpp
index 05996cb8..a762657f 100644
--- a/libkram/kram/KramFileHelper.cpp
+++ b/libkram/kram/KramFileHelper.cpp
@@ -152,7 +152,7 @@ bool FileHelper::copyTemporaryFileTo(const char* dstFilename)
 
     // DONE: copy in smaller buffered chunks
     size_t maxBufferSize = 256 * 1024;
-    size_t bufferSize = min(size_, maxBufferSize);
+    size_t bufferSize = std::min(size_, maxBufferSize);
     vector<uint8_t> tmpBuf;
     tmpBuf.resize(bufferSize);
 
@@ -176,7 +176,7 @@ bool FileHelper::copyTemporaryFileTo(const char* dstFilename)
 
     size_t bytesRemaining = size_;
     while (bytesRemaining > 0) {
-        size_t bytesToRead = min(bufferSize, bytesRemaining);
+        size_t bytesToRead = std::min(bufferSize, bytesRemaining);
         bytesRemaining -= bytesToRead;
 
         if (!read(tmpBuf.data(), bytesToRead) ||
diff --git a/libkram/kram/KramFileHelper.h b/libkram/kram/KramFileHelper.h
index b91f0a05..2bb50511 100644
--- a/libkram/kram/KramFileHelper.h
+++ b/libkram/kram/KramFileHelper.h
@@ -9,7 +9,7 @@
 
 //#include <string>
 
-#include "KramConfig.h"
+//#include "KramConfig.h"
 
 namespace kram {
 using namespace NAMESPACE_STL;
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index 4d6db30c..3cda6865 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -1689,7 +1689,7 @@ bool KramEncoder::saveKTX2(const KTXImage& srcImage, const KTX2Compressor& compr
 
         // allocate big enough to hold entire uncompressed level
         vector<uint8_t> compressedData;
-        compressedData.resize(mz_compressBound(ktx2Levels.front().length));  // largest mip
+        compressedData.resize(mz_compressBound(ktx2Levels[0].length));  // largest mip
         size_t compressedDataSize = 0;
 
         // reuse a context here
diff --git a/libkram/kram/KramImage.h b/libkram/kram/KramImage.h
index f450bd39..3901fc1c 100644
--- a/libkram/kram/KramImage.h
+++ b/libkram/kram/KramImage.h
@@ -8,7 +8,7 @@
 //#include <vector>
 
 #include "KTXImage.h"  // for MyMTLTextureType
-#include "KramConfig.h"
+//#include "KramConfig.h"
 #include "KramImageInfo.h"
 #include "KramMipper.h"
 
diff --git a/libkram/kram/KramImageInfo.h b/libkram/kram/KramImageInfo.h
index 1241a1f3..9f883de6 100644
--- a/libkram/kram/KramImageInfo.h
+++ b/libkram/kram/KramImageInfo.h
@@ -7,8 +7,8 @@
 //#include <string>
 //#include <vector>
 
+//#include "KramConfig.h"
 #include "KTXImage.h"
-#include "KramConfig.h"
 #include "KramMipper.h"  // for Color
 
 namespace kram {
diff --git a/libkram/kram/KramLib.h b/libkram/kram/KramLib.h
index f737d034..068f4168 100644
--- a/libkram/kram/KramLib.h
+++ b/libkram/kram/KramLib.h
@@ -7,7 +7,7 @@
 // This is a master header.  Can be used if turning this into a framework.
 // But found frameworks to be more difficult to use than libs.
 
-#include "KramConfig.h"
+//#include "KramConfig.h"
 
 // helpers
 #include "KTXImage.h"
diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp
index 5a7726b9..762e522a 100644
--- a/libkram/kram/KramLog.cpp
+++ b/libkram/kram/KramLog.cpp
@@ -130,7 +130,16 @@ bool endsWith(const string& value, const string& ending)
     }
 
     // reverse comparison at end of value
-    return equal(ending.rbegin(), ending.rend(), value.rbegin());
+    if (value.size() < ending.size())
+        return false;
+    uint32_t start = value.size() - ending.size();
+        
+    for (uint32_t i = 0; i < ending.size(); ++i) {
+        if (value[start + i] != ending[i])
+            return false;
+    }
+    
+    return true;
 }
 
 bool endsWithExtension(const char* str, const string& substring)
diff --git a/libkram/kram/KramLog.h b/libkram/kram/KramLog.h
index b8333449..cc3ae985 100644
--- a/libkram/kram/KramLog.h
+++ b/libkram/kram/KramLog.h
@@ -7,7 +7,7 @@
 #include <cassert>
 //#include <string>
 
-#include "KramConfig.h"
+//#include "KramConfig.h"
 
 namespace kram {
 
diff --git a/libkram/kram/KramMipper.h b/libkram/kram/KramMipper.h
index 5a6ddd43..4672367a 100644
--- a/libkram/kram/KramMipper.h
+++ b/libkram/kram/KramMipper.h
@@ -7,7 +7,7 @@
 #include <cstdint>
 //#include <vector>
 
-#include "KramConfig.h"
+//#include "KramConfig.h"
 
 namespace kram {
 using namespace NAMESPACE_STL;
diff --git a/libkram/kram/KramMmapHelper.h b/libkram/kram/KramMmapHelper.h
index 7380038b..04dfe523 100644
--- a/libkram/kram/KramMmapHelper.h
+++ b/libkram/kram/KramMmapHelper.h
@@ -7,7 +7,7 @@
 #include <stddef.h>
 #include <stdint.h>
 
-#include "KramConfig.h"
+//#include "KramConfig.h"
 
 // this holds onto the open file and address from mmap operation
 class MmapHelper {
diff --git a/libkram/kram/KramSDFMipper.h b/libkram/kram/KramSDFMipper.h
index 8bae8e4f..f65d21d0 100644
--- a/libkram/kram/KramSDFMipper.h
+++ b/libkram/kram/KramSDFMipper.h
@@ -6,7 +6,7 @@
 
 //#include <vector>
 
-#include "KramConfig.h"
+//#include "KramConfig.h"
 
 namespace kram {
 using namespace NAMESPACE_STL;
diff --git a/libkram/kram/KramTimer.h b/libkram/kram/KramTimer.h
index e5bf96ca..281663df 100644
--- a/libkram/kram/KramTimer.h
+++ b/libkram/kram/KramTimer.h
@@ -6,7 +6,7 @@
 
 #include <cassert>
 
-#include "KramConfig.h"
+//#include "KramConfig.h"
 
 namespace kram {
 // Can obtain a timestamp to nanosecond accuracy.
diff --git a/libkram/kram/KramZipHelper.cpp b/libkram/kram/KramZipHelper.cpp
index d37d3c48..61fa7215 100644
--- a/libkram/kram/KramZipHelper.cpp
+++ b/libkram/kram/KramZipHelper.cpp
@@ -37,7 +37,7 @@ bool ZipHelper::openForRead(const uint8_t* zipData_, uint64_t zipDataSize)
 
     zipData = zipData_;
 
-    zip = make_unique<mz_zip_archive>();
+    zip = std::make_unique<mz_zip_archive>();
     mz_zip_zero_struct(zip.get());
 
     mz_uint flags = 0;
diff --git a/libkram/kram/KramZipHelper.h b/libkram/kram/KramZipHelper.h
index a3410ea2..0f62c6ac 100644
--- a/libkram/kram/KramZipHelper.h
+++ b/libkram/kram/KramZipHelper.h
@@ -64,7 +64,7 @@ struct ZipHelper {
     int32_t zipEntryIndex(const char* name) const;
 
 private:
-    unique_ptr<mz_zip_archive> zip;
+    std::unique_ptr<mz_zip_archive> zip;
     vector<ZipEntry> _zipEntrys;
 
     const uint8_t* zipData;  // aliased
diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index d2e2fcea..672c07fe 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -212,7 +212,7 @@ static const CoreInfo& GetCoreInfo()
     #endif
     
     // sort faster cores first in the remap table
-    sort(coreInfo.remapTable.begin(), coreInfo.remapTable.end(), [](const CoreNum& lhs, const CoreNum& rhs){
+    std::sort(coreInfo.remapTable.begin(), coreInfo.remapTable.end(), [](const CoreNum& lhs, const CoreNum& rhs){
 #if KRAM_ANDROID
         // sort largest index
         if (lhs.type == rhs.type)
@@ -251,40 +251,6 @@ std::thread::native_handle_type getCurrentThread()
 
 #if KRAM_WIN
 
-/* This is the old way.  This name is only available if debugger attached.
- 
-// Isn't this in a header?
-#pragma pack(push,8)
-struct THREADNAME_INFO
-{
-   DWORD dwType; // Must be 0x1000.
-   LPCSTR szName; // Pointer to name (in user addr space).
-   DWORD dwThreadID; // Thread ID (-1=caller thread).
-   DWORD dwFlags; // Reserved for future use, must be zero.
-};
-#pragma pack(pop)
-
-void setThreadName(std::thread::native_handle_type handle, const char* threadName)
-{
-   DWORD threadID = ::GetThreadId(handle);
-
-   THREADNAME_INFO info;
-   info.dwType = 0x1000;
-   info.szName = threadName;
-   info.dwThreadID = threadID;
-   info.dwFlags = 0;
-
-   __try
-   {
-       // Limits to how long this name can be.  Also copy into ptr to change name.
-      RaiseException(0x406D1388, 0, sizeof(info)/sizeof(ULONG_PTR), (ULONG_PTR*)&info);
-   }
-   __except(EXCEPTION_EXECUTE_HANDLER)
-   {
-   }
-}
-*/
-
 // TODO: on Win, also need to set the following.  Then use Windows Termnial.
 // SetConsoleOutputCP(CP_UTF8);
 
@@ -569,7 +535,7 @@ void task_system::run(int32_t threadIndex)
     while (true) {
         // pop() wait avoids a spinloop.
 
-        function<void()> f;
+        myfunction<void()> f;
 
         // start with ours, but steal from other queues if nothing found
         // Note that if threadIndex queue is empty and stays empty
diff --git a/libkram/kram/TaskSystem.h b/libkram/kram/TaskSystem.h
index ebd86e90..d09d79b0 100644
--- a/libkram/kram/TaskSystem.h
+++ b/libkram/kram/TaskSystem.h
@@ -30,14 +30,17 @@ using mymutex = std::recursive_mutex;
 using mylock = std::unique_lock<mymutex>;
 using mycondition = std::condition_variable_any;
 
+#define mydeque std::deque
+#define myfunction std::function
+
 class notification_queue {
-    deque<function<void()>> _q;
+    mydeque<myfunction<void()>> _q;
     bool _done = false;
     mymutex _mutex;
     mycondition _ready;
 
 public:
-    bool try_pop(function<void()>& x)
+    bool try_pop(myfunction<void()>& x)
     {
         mylock lock{_mutex, std::try_to_lock};
         if (!lock || _q.empty()) {
@@ -48,7 +51,7 @@ class notification_queue {
         return true;
     }
 
-    bool pop(function<void()>& x)
+    bool pop(myfunction<void()>& x)
     {
         mylock lock{_mutex};
         while (_q.empty() && !_done) {
@@ -85,12 +88,12 @@ class notification_queue {
     {
         {
             mylock lock{_mutex};
-            // TODO: fix this construct, it's saying no matching sctor for eastl::deque<eastl::function<void ()>>>::value_type
+            // TODO: fix this construct, it's saying no matching sctor for mydeque<eastl::function<void ()>>>::value_type
 #if USE_EASTL
             KLOGE("TaskSystem", "Fix eastl deque or function");
             //_q.emplace_back(forward<F>(f));
 #else
-            _q.emplace_back(forward<F>(f));
+            _q.emplace_back(std::forward<F>(f));
 #endif
         }
         // allow a waiting pop() to awaken
@@ -184,7 +187,7 @@ class task_system {
         //        }
 
         // otherwise just push to the next indexed queue
-        _q[i % _count].push(forward<F>(f));
+        _q[i % _count].push(std::forward<F>(f));
     }
 };
 
diff --git a/libkram/kram/float4a.h b/libkram/kram/float4a.h
index aa95be0e..f8a21bbf 100644
--- a/libkram/kram/float4a.h
+++ b/libkram/kram/float4a.h
@@ -4,7 +4,7 @@
 
 #pragma once
 
-#include "KramConfig.h"
+//#include "KramConfig.h"
 
 // This is only meant to emulate float4 when lib not available
 // (f.e. win or linux w/o clang) but may move off simd lib to this.  So
diff --git a/libkram/lodepng/lodepng.cpp b/libkram/lodepng/lodepng.cpp
index f7789c49..23bb89c9 100644
--- a/libkram/lodepng/lodepng.cpp
+++ b/libkram/lodepng/lodepng.cpp
@@ -6325,7 +6325,7 @@ unsigned decompress(vector<unsigned char>& out, const unsigned char* in, size_t
   size_t buffersize = 0;
   unsigned error = zlib_decompress(&buffer, &buffersize, 0, in, insize, &settings);
   if(buffer) {
-    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
+    out.insert(out.end(), buffer, buffer + buffersize);
     lodepng_free(buffer);
   }
   return error;
@@ -6344,7 +6344,7 @@ unsigned compress(vector<unsigned char>& out, const unsigned char* in, size_t in
   size_t buffersize = 0;
   unsigned error = zlib_compress(&buffer, &buffersize, in, insize, &settings);
   if(buffer) {
-    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
+    out.insert(out.end(), buffer, buffer + buffersize);
     lodepng_free(buffer);
   }
   return error;

From b93caa08994fa80c7a6a77ab3566cc79b7794560 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 8 Aug 2022 01:20:40 -0700
Subject: [PATCH 348/901] kram - update CMake

removed iOS and Unix builds.  I don't have time to maintain these. iOS is already built by Xcode.   CMake is purely for Win builds and project creation.
Added fastl support
---
 CMakeLists.txt         | 43 +++++---------------------
 libkram/CMakeLists.txt | 70 +++++++++++++++---------------------------
 2 files changed, 33 insertions(+), 80 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 064119e7..59baebc5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -7,25 +7,15 @@ endif()
 
 #-----------------------------------------------------
 
-set(BUILD_IOS FALSE)
 set(BUILD_MAC FALSE)
 set(BUILD_WIN FALSE)
-set(BUILD_UNIX FALSE)
 
 if (APPLE)
-    if (CMAKE_SYSTEM_NAME STREQUAL "iOS")
-        message("build for iOS")
-        set(BUILD_IOS TRUE)
-    else()
-        message("build for macOS")
-        set(BUILD_MAC TRUE)
-    endif()
+    message("build for macOS")
+    set(BUILD_MAC TRUE)
 elseif (WIN32)
     message("build for win x64")
     set(BUILD_WIN TRUE)
-elseif (UNIX AND NOT APPLE)
-    message("build for unix")
-    set(BUILD_UNIX TRUE)
 endif()
 
 #-----------------------------------------------------
@@ -54,10 +44,7 @@ set(CMAKE_CXX_EXTENSIONS NO)
 # set(CMAKE_OSX_SYSROOT macos)  # this doesn't work
 
 # CMAKE_OSX_DEPLOYMENT_TARGET must be set as a CACHE variable, or it will be stripped
-if (BUILD_IOS)
-    set(CMAKE_OSX_DEPLOYMENT_TARGET "14.1" CACHE STRING "Minimum iOS")
-    set(CMAKE_OSX_ARCHITECTURES "arm64" CACHE STRING "Architecture iOS")
-elseif (BUILD_MAC)
+if (BUILD_MAC)
     set(CMAKE_OSX_DEPLOYMENT_TARGET "11.0" CACHE STRING "Minimum macOS")
     set(CMAKE_OSX_ARCHITECTURES "$(ARCHS_STANDARD)" CACHE STRING "Architecture macOS")
 endif()
@@ -67,8 +54,6 @@ set(CMAKE_BUILD_TYPE Release)
 
 if (BUILD_MAC)
     set(CMAKE_DEFAULT_STARTUP_PROJECT "kramc")
-elseif (BUILD_IOS)
-    set(CMAKE_DEFAULT_STARTUP_PROJECT "kramc")
 elseif (BUILD_WIN)
     set(CMAKE_DEFAULT_STARTUP_PROJECT "kramc")
 endif()
@@ -80,7 +65,7 @@ endif()
 # No way to make xcode workspaces, but could do manually.
 set(myTargetWorkspace kramWorkspace)
 
-if (BUILD_MAC OR BUILD_IOS)
+if (BUILD_MAC)
     project(${myTargetWorkspace} LANGUAGES C CXX OBJCXX)
 elseif (BUILD_WIN)
     project(${myTargetWorkspace} LANGUAGES C CXX)
@@ -108,7 +93,7 @@ endif()
 # https://discourse.cmake.org/t/specifying-cmake-osx-sysroot-breaks-xcode-projects-but-no-other-choice/2532/8
 # use snipet from Alian Martin to validate SDK
 
-if (BUILD_MAC OR BUILD_IOS)
+if (BUILD_MAC)
     if(NOT DEFINED CMAKE_OSX_SYSROOT)
         message(FATAL_ERROR "Cannot check SDK version if CMAKE_OSX_SYSROOT is not defined."
     )
@@ -129,18 +114,8 @@ if (BUILD_MAC OR BUILD_IOS)
         OUTPUT_VARIABLE SDK_VERSION
         OUTPUT_STRIP_TRAILING_WHITESPACE
     )
-
-    if (BUILD_IOS)
-        message("iOS SDK ${SDK_VERSION}")
-        message("iOS deploy ${CMAKE_OSX_DEPLOYMENT_TARGET}")
-        message("iOS arch ${CMAKE_OSX_ARCHITECTURES}")
-                
-        if (SDK_VERSION VERSION_LESS XCODE_MIN_SDK_IOS)
-            message(FATAL_ERROR "This project requires at least iPhoneOS ${XCODE_MIN_SDK_IOS}"
-        )
-        endif()
-        
-    elseif (BUILD_MAC)
+    
+    if (BUILD_MAC)
         message("macOS SDK ${SDK_VERSION}")
         message("macOS deploy ${CMAKE_OSX_DEPLOYMENT_TARGET}")
         message("macOS arch ${CMAKE_OSX_ARCHITECTURES}")
@@ -157,9 +132,7 @@ endif()
 # was considering platform-specific builds, but mac/win don't conflict
 set(BIN_DIR ${PROJECT_SOURCE_DIR}/bin)
     
-#if (BUILD_IOS)
-#    set(BIN_DIR ${PROJECT_SOURCE_DIR}/bin/ios)
-#elseif (BUILD_MAC)
+#if (BUILD_MAC)
 #    set(BIN_DIR ${PROJECT_SOURCE_DIR}/bin/mac)
 #elseif (BUILD_WIN)
 #    set(BIN_DIR ${PROJECT_SOURCE_DIR}/bin/win)
diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index 479589bf..7eed163a 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -2,25 +2,15 @@
 
 #-----------------------------------------------------
 
-set(BUILD_IOS FALSE)
 set(BUILD_MAC FALSE)
 set(BUILD_WIN FALSE)
-set(BUILD_UNIX FALSE)
 
 if (APPLE)
-    if (CMAKE_SYSTEM_NAME STREQUAL "iOS")
-        message("build for iOS")
-        set(BUILD_IOS TRUE)
-    else()
-        message("build for macOS")
-        set(BUILD_MAC TRUE)
-    endif()
+    message("build for macOS")
+    set(BUILD_MAC TRUE)
 elseif (WIN32)
     message("build for win x64")
     set(BUILD_WIN TRUE)
-elseif (UNIX AND NOT APPLE)
-    message("build for unix")
-    set(BUILD_UNIX TRUE)
 endif()
 
 #-----------------------------------------------------
@@ -34,6 +24,7 @@ option(BCENC "Compile BCenc Encoder" ON)
 option(COMP "Compile Compressonator Encoder" ON)
 
 option(EASTL "Compile EASTL" OFF)
+option(FASTL "Compile FASTL" ON)
 
 # convert ON to 1, UGH
 set(COMPILE_ATE 0)
@@ -43,7 +34,7 @@ set(COMPILE_SQUISH 0)
 set(COMPILE_ASTCENC 0)
 set(COMPILE_COMP 0)
 
-if (ATE AND (BUILD_MAC OR BUILD_IOS))
+if (ATE AND BUILD_MAC)
     set(COMPILE_ATE 1)
 endif()
 
@@ -67,14 +58,22 @@ if (COMP)
     set(COMPILE_COMP 1)
 endif()
 
+#-----------------------------------------------------
+# stl used
 
-# this isn't an encoder, but replaces stl with eastl
+# replaces parts of std/stl with eastl
 set(COMPILE_EASTL 0)
-
 if (EASTL)
     set(COMPILE_EASTL 1)
 endif()
 
+# replace parts of std/stdl with fastl
+set(COMPILE_EASTL 0)
+if (FASTL)
+    set(COMPILE_FASTL 1)
+endif()
+
+
 #-----------------------------------------------------
 # libkram
 
@@ -133,6 +132,9 @@ file(GLOB_RECURSE libSources CONFIGURE_DEPENDS
     "${SOURCE_DIR}/eastl/*.cpp"
     "${SOURCE_DIR}/eastl/*.h"
     
+    "${SOURCE_DIR}/fastl/*.cpp"
+    "${SOURCE_DIR}/fastl/*.h"
+
     "${SOURCE_DIR}/lodepng/lodepng.cpp"
     "${SOURCE_DIR}/lodepng/lodepng.h"
 
@@ -164,9 +166,6 @@ file(GLOB_RECURSE libSources CONFIGURE_DEPENDS
 if (BUILD_WIN)
     list(FILTER libSources EXCLUDE REGEX ".*ateencoder.mm$")
     list(FILTER libSources EXCLUDE REGEX ".*ateencoder.h$")
-elseif (BUILD_UNIX)
-    list(FILTER libSources EXCLUDE REGEX ".*ateencoder.mm$")
-    list(FILTER libSources EXCLUDE REGEX ".*ateencoder.h$")
 endif()
 
 # remove files not used
@@ -194,6 +193,8 @@ target_include_directories(${myTargetLib} PUBLIC
     
     # why are these public, must be in public headers
     "${SOURCE_DIR}/eastl/include/"
+
+    "${SOURCE_DIR}/fastl/"
 )
 
 target_include_directories(${myTargetLib} PRIVATE
@@ -213,11 +214,12 @@ target_include_directories(${myTargetLib} PRIVATE
 # only add sources to the library
 target_sources(${myTargetLib} PRIVATE ${libSources})
 
+# note: mac build is all done via Xcode workspace/project now, this cmake build is legacy
 if (BUILD_MAC)
     set_target_properties(${myTargetLib} PROPERTIES
         # Note: match this up with CXX version
         # c++11 min
-        XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD "c++14"
+        XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD "c++17"
         XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++"
 
         # avx1 (ignored by universal?)
@@ -231,31 +233,13 @@ if (BUILD_MAC)
         XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC YES
     )
 
-    target_compile_options(${myTargetLib} PRIVATE -include KramConfig.h -W -Wall)
-    
-elseif (BUILD_IOS)
-    set_target_properties(${myTargetLib} PROPERTIES
-        # Note: match this up with CXX version
-        # c++11 min
-        XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD "c++14"
-        XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++"
-
-        # avx1
-        #XCODE_ATTRIBUTE_CLANG_X86_VECTOR_INSTRUCTIONS "avx"
-        
-        # turn off exceptions/rtti
-        XCODE_ATTRIBUTE_GCC_ENABLE_CPP_EXCEPTIONS NO
-        XCODE_ATTRIBUTE_GCC_ENABLE_CPP_RTTI NO
-        
-        # can't believe this isn't on by default in CMAKE
-        XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC YES
-    )
-
+    # TODO: switch to pch setup (KramConfig.pch)
     target_compile_options(${myTargetLib} PRIVATE -include KramConfig.h -W -Wall)
     
 elseif (BUILD_WIN)
     
     # TODO: switch to add_target_definitions
+    # TODO: turn on C++17
 
     # to turn off exceptions/rtti use /GR and /EHsc replacement
     string(REGEX REPLACE "/GR" "/GR-" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
@@ -269,12 +253,7 @@ elseif (BUILD_WIN)
     
     # fix STL
     target_compile_definitions(${myTargetLib} PRIVATE "-D_D_HAS_EXCEPTIONS=0 -D_ITERATOR_DEBUG_LEVEL=0")
-    
-elseif (BUILD_UNIX)
-    # TODO: finish this
-    
-    target_compile_options(${myTargetLib} PRIVATE -include KramConfig.h -W -Wall)
-    
+        
 endif()
 
 target_compile_definitions(${myTargetLib} PUBLIC
@@ -285,4 +264,5 @@ target_compile_definitions(${myTargetLib} PUBLIC
     "-DCOMPILE_ASTCENC=${COMPILE_ASTCENC}"
     "-DCOMPILE_COMP=${COMPILE_COMP}"
     "-DCOMPILE_EASTL=${COMPILE_EASTL}"
+    "-DCOMPILE_FASTL=${COMPILE_FASTL}"
 )

From e672edfcbb24736693d63110842dcda9bcef5cf4 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 8 Aug 2022 01:31:12 -0700
Subject: [PATCH 349/901] kram - fix Win build

---
 libkram/kram/KramConfig.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index 157466eb..64af857a 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -224,6 +224,7 @@ namespace NAMESPACE_STL
 #include <deque>
 #include <memory> // for unique_ptr/shared_ptr
 #include <initializer_list>
+#include <iterator>  // for copy_if and back_inserter on Win
 
 #else
 

From 9b7172738aaef68ab6beb6eaf5849446de67dcf5 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 8 Aug 2022 22:47:34 -0700
Subject: [PATCH 350/901] kram - cmake updates

---
 kramc/CMakeLists.txt   | 5 -----
 libkram/CMakeLists.txt | 7 ++++---
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/kramc/CMakeLists.txt b/kramc/CMakeLists.txt
index ba3aef5c..a1d1f687 100644
--- a/kramc/CMakeLists.txt
+++ b/kramc/CMakeLists.txt
@@ -2,10 +2,8 @@
 
 #-----------------------------------------------------
 
-set(BUILD_IOS FALSE)
 set(BUILD_MAC FALSE)
 set(BUILD_WIN FALSE)
-set(BUILD_UNIX FALSE)
 
 if (APPLE)
     if (CMAKE_SYSTEM_NAME STREQUAL "iOS")
@@ -18,9 +16,6 @@ if (APPLE)
 elseif (WIN32)
     message("build for win x64")
     set(BUILD_WIN TRUE)
-elseif (UNIX AND NOT APPLE)
-    message("build for unix")
-    set(BUILD_UNIX TRUE)
 endif()
 
 #-----------------------------------------------------
diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index 7eed163a..4c89a961 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -219,7 +219,7 @@ if (BUILD_MAC)
     set_target_properties(${myTargetLib} PROPERTIES
         # Note: match this up with CXX version
         # c++11 min
-        XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD "c++17"
+        XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD "c++20"
         XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++"
 
         # avx1 (ignored by universal?)
@@ -245,8 +245,8 @@ elseif (BUILD_WIN)
     string(REGEX REPLACE "/GR" "/GR-" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
     string(REGEX REPLACE "/EHsc" "/EHs-c-" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
 
-    # force include
-    target_compile_options(${myTargetLib} PRIVATE /FIKramConfig.h)
+    # force include (public)
+    target_compile_options(${myTargetLib} PUBLIC /FIKramConfig.h)
         
     # all warnings, AVX1, and multiprocess compiles
     target_compile_options(${myTargetLib} PRIVATE  /W3 /arch:AVX /MP)
@@ -256,6 +256,7 @@ elseif (BUILD_WIN)
         
 endif()
 
+# public
 target_compile_definitions(${myTargetLib} PUBLIC
     "-DCOMPILE_ATE=${COMPILE_ATE}"
     "-DCOMPILE_BCENC=${COMPILE_BCENC}"

From 461aec64e93d29acdbb638907fb8c26722009b78 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 11 Aug 2022 00:40:28 -0700
Subject: [PATCH 351/901] kram - turn on precompiled pch for libkram build for
 Xcode

KramPrefix.pch is only including KramConfig.h for now.   Still need to enable in Cmake for Win build.  I also ran Aras's build analysis tool on the json output of -ftime-trace.  It reported 11s vs 78s of parsing time.  That may be spread across the 6+2 cores on my machine.

Here are M1 (6/2) timings for PCH vs. not from Xcode build timings.
Debug (arm64)  7s vs. 16s
Release (arm64+x64) 33s vs. 44s.
---
 build2/kram.xcodeproj/project.pbxproj | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index e672e339..a90e8726 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -1908,7 +1908,6 @@
 				CLANG_X86_VECTOR_INSTRUCTIONS = avx;
 				CODE_SIGN_STYLE = Automatic;
 				EXECUTABLE_PREFIX = lib;
-				GCC_PRECOMPILE_PREFIX_HEADER = NO;
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SKIP_INSTALL = YES;
 				SYSTEM_HEADER_SEARCH_PATHS = "";
@@ -1923,7 +1922,6 @@
 				CLANG_X86_VECTOR_INSTRUCTIONS = avx;
 				CODE_SIGN_STYLE = Automatic;
 				EXECUTABLE_PREFIX = lib;
-				GCC_PRECOMPILE_PREFIX_HEADER = NO;
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SKIP_INSTALL = YES;
 				SYSTEM_HEADER_SEARCH_PATHS = "";
@@ -1938,7 +1936,6 @@
 				CLANG_X86_VECTOR_INSTRUCTIONS = default;
 				CODE_SIGN_STYLE = Automatic;
 				EXECUTABLE_PREFIX = lib;
-				GCC_PRECOMPILE_PREFIX_HEADER = NO;
 				IPHONEOS_DEPLOYMENT_TARGET = 14.1;
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SDKROOT = iphoneos;
@@ -1956,7 +1953,6 @@
 				CLANG_X86_VECTOR_INSTRUCTIONS = default;
 				CODE_SIGN_STYLE = Automatic;
 				EXECUTABLE_PREFIX = lib;
-				GCC_PRECOMPILE_PREFIX_HEADER = NO;
 				IPHONEOS_DEPLOYMENT_TARGET = 14.1;
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SDKROOT = iphoneos;

From 8f9dab4353a587f8c75e7ddc42956dfac28cb46e Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 11 Aug 2022 09:41:15 -0700
Subject: [PATCH 352/901] kram - turn on cmake pch

This should speed up Win builds as well.
---
 libkram/CMakeLists.txt | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index 4c89a961..f0f438a1 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -100,9 +100,9 @@ else()
     )
 endif()
 
-set_target_properties(${myTargetLib} PROPERTIES
-    # turn off pch
-    DISABLE_PRECOMPILE_HEADERS ON
+# this will be force include (-include, /FI) on GCC/clang/VS
+target_precompile_headers(${myTargetLib} PUBLIC
+    ${SOURCE_DIR}/kram/KramPrefix.pch
 )
 
 # add_library doesn't establish a project, so still pointing at root CMake
@@ -125,6 +125,7 @@ file(GLOB_RECURSE libSources CONFIGURE_DEPENDS
     "${SOURCE_DIR}/heman/hedistance.cpp"
     "${SOURCE_DIR}/heman/hedistance.h"
 
+    "${SOURCE_DIR}/kram/*.pch"
     "${SOURCE_DIR}/kram/*.cpp"
     "${SOURCE_DIR}/kram/*.h"
 

From 719421833907b11027b6c01d8420220edf23c8d3 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 11 Aug 2022 09:44:05 -0700
Subject: [PATCH 353/901] kram - fix cmake pch

---
 libkram/CMakeLists.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index f0f438a1..0f723dbc 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -100,14 +100,14 @@ else()
     )
 endif()
 
+# add_library doesn't establish a project, so still pointing at root CMake
+set(SOURCE_DIR ${PROJECT_SOURCE_DIR}/libkram)
+
 # this will be force include (-include, /FI) on GCC/clang/VS
 target_precompile_headers(${myTargetLib} PUBLIC
     ${SOURCE_DIR}/kram/KramPrefix.pch
 )
 
-# add_library doesn't establish a project, so still pointing at root CMake
-set(SOURCE_DIR ${PROJECT_SOURCE_DIR}/libkram)
-
 file(GLOB_RECURSE libSources CONFIGURE_DEPENDS 
 	"${SOURCE_DIR}/astc-encoder/*.cpp"
 	"${SOURCE_DIR}/astc-encoder/*.h"

From e0bbeaeba3f416f0930d1bc7332329db67808058 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 11 Aug 2022 09:51:40 -0700
Subject: [PATCH 354/901] kram - keep trying to fix cmake pch

Try .h instead of referencing .pch file
---
 libkram/CMakeLists.txt    | 5 +++--
 libkram/kram/KramPrefix.h | 9 +++++++++
 2 files changed, 12 insertions(+), 2 deletions(-)
 create mode 100644 libkram/kram/KramPrefix.h

diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index 0f723dbc..2a9da8d1 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -104,8 +104,9 @@ endif()
 set(SOURCE_DIR ${PROJECT_SOURCE_DIR}/libkram)
 
 # this will be force include (-include, /FI) on GCC/clang/VS
-target_precompile_headers(${myTargetLib} PUBLIC
-    ${SOURCE_DIR}/kram/KramPrefix.pch
+# can't seem to ref KramPrefix.pch file, since this goes into cmake_pch.hxx file
+target_precompile_headers(${myTargetLib} PRIVATE
+    ${SOURCE_DIR}/kram/KramPrefix.h
 )
 
 file(GLOB_RECURSE libSources CONFIGURE_DEPENDS 
diff --git a/libkram/kram/KramPrefix.h b/libkram/kram/KramPrefix.h
new file mode 100644
index 00000000..02374c5e
--- /dev/null
+++ b/libkram/kram/KramPrefix.h
@@ -0,0 +1,9 @@
+// kram - Copyright 2020-2022 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
+
+#pragma once
+
+#include "KramConfig.h"
+
+//#include "KramLib.h"

From 0e35619a68a7f1b9709a16f3ab61b0b5fb602b67 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 14 Aug 2022 12:10:39 -0700
Subject: [PATCH 355/901] kram - more std cleanup, add cba script

---
 kramv/KramViewerMain.mm                       |  12 +-
 libkram/CMakeLists.txt                        |  25 ++--
 .../astc-encoder/astcenc_diagnostic_trace.cpp |   2 +-
 .../astc-encoder/astcenc_diagnostic_trace.h   |   2 +-
 libkram/astc-encoder/astcenc_internal.h       |   9 +-
 .../astc-encoder/astcenc_vecmathlib_none_4.h  |   2 +-
 libkram/ate/ateencoder.mm                     |   2 +-
 libkram/bc7enc/bc7decomp.h                    |   2 +-
 libkram/bc7enc/bc7enc.cpp                     |   2 +-
 libkram/bc7enc/ert.cpp                        |   6 +-
 libkram/bc7enc/ert.h                          |  10 +-
 libkram/bc7enc/rdo_bc_encoder.cpp             |  22 ++--
 libkram/bc7enc/rgbcx.cpp                      |   2 +-
 libkram/bc7enc/rgbcx.h                        |   2 +-
 libkram/bc7enc/utils.cpp                      |  19 +--
 libkram/bc7enc/utils.h                        |  30 ++---
 libkram/fastl/fstring.h                       | 118 +++++++++++++-----
 libkram/fastl/vector.h                        |  16 ++-
 libkram/kram/Kram.cpp                         |  10 +-
 libkram/kram/KramConfig.h                     |  33 +++--
 libkram/kram/KramLog.cpp                      |   2 +-
 libkram/kram/KramZipHelper.cpp                |   2 +-
 libkram/transcoder/basisu_transcoder.cpp      |   3 +-
 libkram/transcoder/basisu_transcoder.h        |   4 +-
 scripts/cba.sh                                |  15 +++
 25 files changed, 234 insertions(+), 118 deletions(-)
 create mode 100755 scripts/cba.sh

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index da8563f6..925a7d02 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -2454,7 +2454,8 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
                     [self setEyedropperText:""];
                     
                     isChanged = true;
-                    text = "Loaded " + _showSettings->lastFilename;
+                    text = "Loaded ";
+                    text += _showSettings->lastFilename;
                 }
             }
             else if (_showSettings->isFolder) {
@@ -2464,7 +2465,8 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
                     [self setEyedropperText:""];
                    
                     isChanged = true;
-                    text = "Loaded " + _showSettings->lastFilename;
+                    text = "Loaded ";
+                    text += _showSettings->lastFilename;
                 }
             }
         }
@@ -2829,10 +2831,12 @@ static void findPossibleNormalMapFromAlbedoFilename(const char* filename, vector
     
     const char* ext = strrchr(filename, '.');
 
-    auto dotPos = filenameShort.find_last_of(".");
-    if (dotPos == string::npos)
+    const char* dosPosStr = strchr(filenameShort.c_str(), '.');
+    if (dosPosStr == nullptr)
         return;
     
+    auto dotPos = dosPosStr - filenameShort.c_str();
+    
     // now chop off the extension
     filenameShort = filenameShort.substr(0, dotPos);
 
diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index 2a9da8d1..95de547a 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -103,12 +103,6 @@ endif()
 # add_library doesn't establish a project, so still pointing at root CMake
 set(SOURCE_DIR ${PROJECT_SOURCE_DIR}/libkram)
 
-# this will be force include (-include, /FI) on GCC/clang/VS
-# can't seem to ref KramPrefix.pch file, since this goes into cmake_pch.hxx file
-target_precompile_headers(${myTargetLib} PRIVATE
-    ${SOURCE_DIR}/kram/KramPrefix.h
-)
-
 file(GLOB_RECURSE libSources CONFIGURE_DEPENDS 
 	"${SOURCE_DIR}/astc-encoder/*.cpp"
 	"${SOURCE_DIR}/astc-encoder/*.h"
@@ -247,8 +241,9 @@ elseif (BUILD_WIN)
     string(REGEX REPLACE "/GR" "/GR-" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
     string(REGEX REPLACE "/EHsc" "/EHs-c-" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
 
+    # this is already done by pch
     # force include (public)
-    target_compile_options(${myTargetLib} PUBLIC /FIKramConfig.h)
+    #target_compile_options(${myTargetLib} PUBLIC /FIKramConfig.h)
         
     # all warnings, AVX1, and multiprocess compiles
     target_compile_options(${myTargetLib} PRIVATE  /W3 /arch:AVX /MP)
@@ -258,14 +253,24 @@ elseif (BUILD_WIN)
         
 endif()
 
+# This will be force include (-include, /FI) on GCC/clang/VS.
+# Can't seem to ref KramPrefix.pch file.  Goes into cmake_pch.hxx file
+target_precompile_headers(${myTargetLib} PRIVATE
+    ${SOURCE_DIR}/kram/KramPrefix.h
+)
+
 # public
-target_compile_definitions(${myTargetLib} PUBLIC
+target_compile_definitions(${myTargetLib} 
+    PUBLIC
+    "-DCOMPILE_EASTL=${COMPILE_EASTL}"
+    "-DCOMPILE_FASTL=${COMPILE_FASTL}"
+
+    PRIVATE
     "-DCOMPILE_ATE=${COMPILE_ATE}"
     "-DCOMPILE_BCENC=${COMPILE_BCENC}"
     "-DCOMPILE_ETCENC=${COMPILE_ETCENC}"
     "-DCOMPILE_SQUISH=${COMPILE_SQUISH}"
     "-DCOMPILE_ASTCENC=${COMPILE_ASTCENC}"
     "-DCOMPILE_COMP=${COMPILE_COMP}"
-    "-DCOMPILE_EASTL=${COMPILE_EASTL}"
-    "-DCOMPILE_FASTL=${COMPILE_FASTL}"
+   
 )
diff --git a/libkram/astc-encoder/astcenc_diagnostic_trace.cpp b/libkram/astc-encoder/astcenc_diagnostic_trace.cpp
index fbf01a97..04afa825 100644
--- a/libkram/astc-encoder/astcenc_diagnostic_trace.cpp
+++ b/libkram/astc-encoder/astcenc_diagnostic_trace.cpp
@@ -24,7 +24,7 @@
 #include <cassert>
 #include <cstdarg>
 #include <cstdio>
-#include <string>
+//#include <string>
 
 #include "astcenc_diagnostic_trace.h"
 
diff --git a/libkram/astc-encoder/astcenc_diagnostic_trace.h b/libkram/astc-encoder/astcenc_diagnostic_trace.h
index 61489498..96c97c87 100644
--- a/libkram/astc-encoder/astcenc_diagnostic_trace.h
+++ b/libkram/astc-encoder/astcenc_diagnostic_trace.h
@@ -158,7 +158,7 @@ class TraceLog
 	/**
 	 * @brief The stack of nodes (newest at the back).
 	 */
-	std::vector<TraceNode*> m_stack;
+	vector<TraceNode*> m_stack;
 
 private:
 	/**
diff --git a/libkram/astc-encoder/astcenc_internal.h b/libkram/astc-encoder/astcenc_internal.h
index aa7f6001..a1b7b39f 100644
--- a/libkram/astc-encoder/astcenc_internal.h
+++ b/libkram/astc-encoder/astcenc_internal.h
@@ -22,15 +22,16 @@
 #ifndef ASTCENC_INTERNAL_INCLUDED
 #define ASTCENC_INTERNAL_INCLUDED
 
-#include <algorithm>
+//#include <algorithm>
+//#include <condition_variable>
+//#include <functional>
+//#include <mutex>
+
 #include <atomic>
 #include <cstddef>
 #include <cstdint>
 #include <cstdio>
 #include <cstdlib>
-#include <condition_variable>
-#include <functional>
-#include <mutex>
 #include <type_traits>
 
 #include "astcenc.h"
diff --git a/libkram/astc-encoder/astcenc_vecmathlib_none_4.h b/libkram/astc-encoder/astcenc_vecmathlib_none_4.h
index 5a399ef5..6aad161e 100644
--- a/libkram/astc-encoder/astcenc_vecmathlib_none_4.h
+++ b/libkram/astc-encoder/astcenc_vecmathlib_none_4.h
@@ -40,7 +40,7 @@
 	#error "Include astcenc_vecmathlib.h, do not include directly"
 #endif
 
-#include <algorithm>
+//#include <algorithm>
 #include <cstdio>
 #include <cstring>
 #include <cfenv>
diff --git a/libkram/ate/ateencoder.mm b/libkram/ate/ateencoder.mm
index f3a43b9b..265110a8 100644
--- a/libkram/ate/ateencoder.mm
+++ b/libkram/ate/ateencoder.mm
@@ -2,7 +2,7 @@
 
 #if COMPILE_ATE
 
-#include <vector>
+//#include <vector>
 #include "KTXImage.h" // for MyMTLPixelFormat
 
 // this contains ATE encoder (libate.dylib)
diff --git a/libkram/bc7enc/bc7decomp.h b/libkram/bc7enc/bc7decomp.h
index 49dc9341..37822fef 100644
--- a/libkram/bc7enc/bc7decomp.h
+++ b/libkram/bc7enc/bc7decomp.h
@@ -7,7 +7,7 @@
 
 #include <stdlib.h>
 #include <stdint.h>
-#include <algorithm>
+//#include <algorithm>
 #include <math.h>
 #include <assert.h>
 
diff --git a/libkram/bc7enc/bc7enc.cpp b/libkram/bc7enc/bc7enc.cpp
index 4cbdd552..d7aec202 100644
--- a/libkram/bc7enc/bc7enc.cpp
+++ b/libkram/bc7enc/bc7enc.cpp
@@ -5,7 +5,7 @@
 #include <memory.h>
 #include <assert.h>
 #include <limits.h>
-#include <algorithm>
+//#include <algorithm>
 
 // Helpers
 static inline int32_t clampi(int32_t value, int32_t low, int32_t high) { if (value < low) value = low; else if (value > high) value = high;	return value; }
diff --git a/libkram/bc7enc/ert.cpp b/libkram/bc7enc/ert.cpp
index c09b9668..6fc2459d 100644
--- a/libkram/bc7enc/ert.cpp
+++ b/libkram/bc7enc/ert.cpp
@@ -282,7 +282,7 @@ namespace ert
 		uint32_t total_block_stride_in_bytes, uint32_t block_size_to_optimize_in_bytes, uint32_t block_width, uint32_t block_height, uint32_t num_comps,
 		const color_rgba* pBlock_pixels, const reduce_entropy_params& params, uint32_t& total_modified,
 		pUnpack_block_func pUnpack_block_func, void* pUnpack_block_func_user_data,
-		std::vector<float>* pBlock_mse_scales)
+		vector<float>* pBlock_mse_scales)
 	{
 		assert(total_block_stride_in_bytes && block_size_to_optimize_in_bytes);
 		assert(total_block_stride_in_bytes >= block_size_to_optimize_in_bytes);
@@ -311,8 +311,8 @@ namespace ert
 
 		const int total_blocks_to_check = std::max<uint32_t>(1U, params.m_lookback_window_size / total_block_stride_in_bytes);
 
-		std::vector<uint32_t> len_hist(MAX_BLOCK_SIZE_IN_BYTES + 1);
-		std::vector<uint32_t> second_len_hist(MAX_BLOCK_SIZE_IN_BYTES + 1);
+		vector<uint32_t> len_hist(MAX_BLOCK_SIZE_IN_BYTES + 1);
+		vector<uint32_t> second_len_hist(MAX_BLOCK_SIZE_IN_BYTES + 1);
 		uint32_t total_second_matches = 0;
 
 		int prev_match_window_ofs_to_favor_cont = -1, prev_match_dist_to_favor = -1;
diff --git a/libkram/bc7enc/ert.h b/libkram/bc7enc/ert.h
index d387f527..509b1aa3 100644
--- a/libkram/bc7enc/ert.h
+++ b/libkram/bc7enc/ert.h
@@ -4,14 +4,16 @@
 #include <stdio.h>
 #include <string.h>
 #include <math.h>
-#include <algorithm>
 #include <assert.h>
 #include <time.h>
-#include <vector>
-#include <string>
+//#include <algorithm>
+//#include <vector>
+//#include <string>
 
 namespace ert
 {
+    using namespace NAMESPACE_STL;
+
 	struct color_rgba { uint8_t m_c[4]; };
 
 	struct reduce_entropy_params
@@ -76,6 +78,6 @@ namespace ert
 		uint32_t total_block_stride_in_bytes, uint32_t block_size_to_optimize_in_bytes, uint32_t block_width, uint32_t block_height, uint32_t num_comps,
 		const color_rgba* pBlock_pixels, const reduce_entropy_params& params, uint32_t& total_modified,
 		pUnpack_block_func pUnpack_block_func, void* pUnpack_block_func_user_data,
-		std::vector<float>* pBlock_mse_scales = nullptr);
+		vector<float>* pBlock_mse_scales = nullptr);
 
 } // namespace ert
diff --git a/libkram/bc7enc/rdo_bc_encoder.cpp b/libkram/bc7enc/rdo_bc_encoder.cpp
index 44d39333..8718dfde 100644
--- a/libkram/bc7enc/rdo_bc_encoder.cpp
+++ b/libkram/bc7enc/rdo_bc_encoder.cpp
@@ -28,7 +28,7 @@ namespace rdo_bc
 		return "?";
 	}
 
-	static std::vector<float> compute_block_mse_scales(const image_u8& source_image, uint32_t blocks_x, uint32_t blocks_y, uint32_t total_blocks, bool rdo_debug_output)
+	static vector<float> compute_block_mse_scales(const image_u8& source_image, uint32_t blocks_x, uint32_t blocks_y, uint32_t total_blocks, bool rdo_debug_output)
 	{
 		const float ULTRASMOOTH_BLOCK_STD_DEV_THRESHOLD = 2.9f;
 		const float DARK_THRESHOLD = 13.0f;
@@ -134,7 +134,7 @@ namespace rdo_bc
 				if (!is_ultrasmooth)
 					continue;
 
-				std::vector<image_u8::pixel_coord> filled_pixels;
+				vector<image_u8::pixel_coord> filled_pixels;
 				filled_pixels.reserve(256);
 
 				uint32_t total_set_pixels = ultrasmooth_blocks_vis.flood_fill(bx, by, color_quad_u8(255, 255, 255, 255), color_quad_u8(0, 0, 0, 255), &filled_pixels);
@@ -155,7 +155,7 @@ namespace rdo_bc
 			save_png("ultrasmooth_block_mask.png", ultrasmooth_blocks_vis, false);
 		}
 
-		std::vector<float> block_mse_scales(total_blocks);
+		vector<float> block_mse_scales(total_blocks);
 
 		uint32_t total_ultrasmooth_blocks = 0;
 		for (uint32_t by = 0; by < blocks_y; by++)
@@ -674,7 +674,7 @@ namespace rdo_bc
 			printf("rdo_total_threads: %u\n", rdo_total_threads);
 
 		int blocks_remaining = m_total_blocks, cur_block_index = 0;
-		std::vector<int> blocks_to_do(rdo_total_threads), first_block_index(rdo_total_threads);
+		vector<int> blocks_to_do(rdo_total_threads), first_block_index(rdo_total_threads);
 		for (int p = 0; p < rdo_total_threads; p++)
 		{
 			const int num_blocks = (p == (rdo_total_threads - 1)) ? blocks_remaining : (m_total_blocks / rdo_total_threads);
@@ -699,9 +699,9 @@ namespace rdo_bc
 		ert_p.m_allow_relative_movement = m_params.m_rdo_allow_relative_movement;
 		ert_p.m_skip_zero_mse_blocks = false;
 		
-		std::vector<float> block_rgb_mse_scales(compute_block_mse_scales(m_source_image, m_blocks_x, m_blocks_y, m_total_blocks, m_params.m_rdo_debug_output));
+		vector<float> block_rgb_mse_scales(compute_block_mse_scales(m_source_image, m_blocks_x, m_blocks_y, m_total_blocks, m_params.m_rdo_debug_output));
 
-		std::vector<rgbcx::color32> block_pixels(m_total_blocks * 16);
+		vector<rgbcx::color32> block_pixels(m_total_blocks * 16);
 
 		for (uint32_t by = 0; by < m_blocks_y; by++)
 			for (uint32_t bx = 0; bx < m_blocks_x; bx++)
@@ -758,7 +758,7 @@ namespace rdo_bc
 
 				uint32_t total_modified_local = 0;
 
-				std::vector<float> local_block_rgb_mse_scales(num_blocks_to_encode);
+				vector<float> local_block_rgb_mse_scales(num_blocks_to_encode);
 				for (int i = 0; i < num_blocks_to_encode; i++)
 					local_block_rgb_mse_scales[i] = block_rgb_mse_scales[first_block_to_encode + i];
 
@@ -817,7 +817,7 @@ namespace rdo_bc
 
 			ert_p.m_lookback_window_size = std::max(16U, m_params.m_lookback_window_size);
 
-			std::vector<rgbcx::color32> block_pixels_r(m_total_blocks * 16), block_pixels_g(m_total_blocks * 16);
+			vector<rgbcx::color32> block_pixels_r(m_total_blocks * 16), block_pixels_g(m_total_blocks * 16);
 
 			for (uint32_t by = 0; by < m_blocks_y; by++)
 			{
@@ -1004,7 +1004,7 @@ namespace rdo_bc
 
 				uint32_t total_modified_local = 0;
 
-				std::vector<float> local_block_rgb_mse_scales(num_blocks_to_encode);
+				vector<float> local_block_rgb_mse_scales(num_blocks_to_encode);
 				for (int i = 0; i < num_blocks_to_encode; i++)
 					local_block_rgb_mse_scales[i] = block_rgb_mse_scales[first_block_to_encode + i];
 
@@ -1036,7 +1036,7 @@ namespace rdo_bc
 
 			ert_p.m_lookback_window_size = std::max(16U, m_params.m_lookback_window_size);
 
-			std::vector<rgbcx::color32> block_pixels_a(m_total_blocks * 16);
+			vector<rgbcx::color32> block_pixels_a(m_total_blocks * 16);
 
 			for (uint32_t by = 0; by < m_blocks_y; by++)
 			{
@@ -1112,7 +1112,7 @@ namespace rdo_bc
 					(ert::color_rgba*)&block_pixels_a[16 * first_block_to_encode], ert_alpha_p, total_modified_local_alpha,
 					unpacker_funcs::unpack_bc4_block, &block_unpackers);
 
-				std::vector<float> local_block_rgb_mse_scales(num_blocks_to_encode);
+				vector<float> local_block_rgb_mse_scales(num_blocks_to_encode);
 				for (int i = 0; i < num_blocks_to_encode; i++)
 					local_block_rgb_mse_scales[i] = block_rgb_mse_scales[first_block_to_encode + i];
 
diff --git a/libkram/bc7enc/rgbcx.cpp b/libkram/bc7enc/rgbcx.cpp
index b0c40880..6a718da1 100644
--- a/libkram/bc7enc/rgbcx.cpp
+++ b/libkram/bc7enc/rgbcx.cpp
@@ -2,7 +2,7 @@
 #include "rgbcx.h"
 #include <string.h>
 #include <math.h>
-#include <vector>
+//#include <vector>
 
 namespace rgbcx
 {
diff --git a/libkram/bc7enc/rgbcx.h b/libkram/bc7enc/rgbcx.h
index cf793921..d5680bc0 100644
--- a/libkram/bc7enc/rgbcx.h
+++ b/libkram/bc7enc/rgbcx.h
@@ -64,7 +64,7 @@
 
 #include <stdlib.h>
 #include <stdint.h>
-#include <algorithm>
+//#include <algorithm>
 #include <assert.h>
 #include <limits.h>
 
diff --git a/libkram/bc7enc/utils.cpp b/libkram/bc7enc/utils.cpp
index 2b3b04d7..b388d3f9 100644
--- a/libkram/bc7enc/utils.cpp
+++ b/libkram/bc7enc/utils.cpp
@@ -9,19 +9,20 @@
 
 
 namespace utils 
-{
-		
+{		
+using namespace NAMESPACE_STL;
+
 #define FLOOD_PUSH(y, xl, xr, dy) if (((y + (dy)) >= 0) && ((y + (dy)) < (int)m_height)) { stack.push_back(fill_segment(y, xl, xr, dy)); }
 
 // See http://www.realtimerendering.com/resources/GraphicsGems/gems/SeedFill.c
-uint32_t image_u8::flood_fill(int x, int y, const color_quad_u8& c, const color_quad_u8& b, std::vector<pixel_coord>* pSet_pixels)
+uint32_t image_u8::flood_fill(int x, int y, const color_quad_u8& c, const color_quad_u8& b, vector<pixel_coord>* pSet_pixels)
 {
 	uint32_t total_set = 0;
 
 	if (!flood_fill_is_inside(x, y, b))
 		return 0;
 
-	std::vector<fill_segment> stack;
+	vector<fill_segment> stack;
 	stack.reserve(64);
 
 	FLOOD_PUSH(y, x, x, 1);
@@ -173,7 +174,7 @@ bool load_png(const char* pFilename, image_u8& img)
 {
 	img.clear();
 
-	std::vector<unsigned char> pixels;
+	vector<unsigned char> pixels;
 	unsigned int w = 0, h = 0;
 	unsigned int e = lodepng::decode(pixels, w, h, pFilename);
 	if (e != 0)
@@ -193,7 +194,7 @@ bool save_png(const char* pFilename, const image_u8& img, bool save_alpha)
 	const uint32_t w = img.width();
 	const uint32_t h = img.height();
 
-	std::vector<unsigned char> pixels;
+	vector<unsigned char> pixels;
 	if (save_alpha)
 	{
 		pixels.resize(w * h * sizeof(color_quad_u8));
@@ -285,7 +286,7 @@ void gaussian_filter(imagef& dst, const imagef& orig_img, uint32_t odd_filter_wi
 	assert(odd_filter_width && (odd_filter_width & 1));
 	odd_filter_width |= 1;
 
-	std::vector<float> kernel(odd_filter_width * odd_filter_width);
+	vector<float> kernel(odd_filter_width * odd_filter_width);
 	compute_gaussian_kernel(&kernel[0], odd_filter_width, odd_filter_width, sigma_sqr, cComputeGaussianFlagNormalize);
 
 	const int dst_width = orig_img.get_width() / width_divisor;
@@ -682,7 +683,7 @@ bool save_dds(const char* pFilename, uint32_t width, uint32_t height, const void
 	return true;
 }
 
-void strip_extension(std::string& s)
+void strip_extension(string& s)
 {
 	for (int32_t i = (int32_t)s.size() - 1; i >= 0; i--)
 	{
@@ -694,7 +695,7 @@ void strip_extension(std::string& s)
 	}
 }
 
-void strip_path(std::string& s)
+void strip_path(string& s)
 {
 	for (int32_t i = (int32_t)s.size() - 1; i >= 0; i--)
 	{
diff --git a/libkram/bc7enc/utils.h b/libkram/bc7enc/utils.h
index 841710c4..2d66b825 100644
--- a/libkram/bc7enc/utils.h
+++ b/libkram/bc7enc/utils.h
@@ -9,11 +9,11 @@
 #include <stdio.h>
 #include <string.h>
 #include <math.h>
-#include <algorithm>
 #include <assert.h>
 #include <time.h>
-#include <vector>
-#include <string>
+//#include <algorithm>
+//#include <vector>
+//#include <string>
 #include <random>
 #include <utility>
 #include <limits.h>
@@ -33,6 +33,8 @@
 
 namespace utils
 {
+using namespace NAMESPACE_STL;
+
 extern const uint32_t g_pretty_colors[];
 extern const uint32_t g_num_pretty_colors;
 
@@ -1399,13 +1401,13 @@ typedef vec<4, double> vec4D;
 typedef vec<1, float> vec1F;
 
 typedef vec<2, float> vec2F;
-typedef std::vector<vec2F> vec2F_array;
+typedef vector<vec2F> vec2F_array;
 
 typedef vec<3, float> vec3F;
-typedef std::vector<vec3F> vec3F_array;
+typedef vector<vec3F> vec3F_array;
 
 typedef vec<4, float> vec4F;
-typedef std::vector<vec4F> vec4F_array;
+typedef vector<vec4F> vec4F_array;
 
 typedef vec<2, uint32_t> vec2U;
 typedef vec<3, uint32_t> vec3U;
@@ -1744,7 +1746,7 @@ struct color_quad_u8
 		return (r == rhs.r) && (g == rhs.g) && (b == rhs.b);
 	}
 };
-typedef std::vector<color_quad_u8> color_quad_u8_vec;
+typedef vector<color_quad_u8> color_quad_u8_vec;
 
 inline uint32_t color_distance(bool perceptual, const color_quad_u8& e1, const color_quad_u8& e2, bool alpha)
 {
@@ -1991,7 +1993,7 @@ class image_u8
 		pixel_coord(uint32_t x, uint32_t y) : m_x((uint16_t)x), m_y((uint16_t)y) { }
 	};
 		
-	uint32_t flood_fill(int x, int y, const color_quad_u8& c, const color_quad_u8& b, std::vector<pixel_coord>* pSet_pixels = nullptr);
+	uint32_t flood_fill(int x, int y, const color_quad_u8& c, const color_quad_u8& b, vector<pixel_coord>* pSet_pixels = nullptr);
 
 	void draw_line(int xs, int ys, int xe, int ye, const color_quad_u8& color);
 		
@@ -2364,19 +2366,19 @@ struct block8
 	uint64_t m_vals[1];
 };
 
-typedef std::vector<block8> block8_vec;
+typedef vector<block8> block8_vec;
 
 struct block16
 {
 	uint64_t m_vals[2];
 };
 
-typedef std::vector<block16> block16_vec;
+typedef vector<block16> block16_vec;
 
 //bool save_dds(const char* pFilename, uint32_t width, uint32_t height, const void* pBlocks, uint32_t pixel_format_bpp, DXGI_FORMAT dxgi_format, bool srgb, bool force_dx10_header);
 
-void strip_extension(std::string& s);
-void strip_path(std::string& s);
+void strip_extension(string& s);
+void strip_path(string& s);
 
 uint32_t hash_hsieh(const uint8_t* pBuf, size_t len);
 
@@ -2590,7 +2592,7 @@ class value_stats
 		if (!m_num)
 			return 0.0f;
 
-		std::vector<double> sorted_vals(m_vals);
+		vector<double> sorted_vals(m_vals);
 		std::sort(sorted_vals.begin(), sorted_vals.end());
 		
 		return sorted_vals[sorted_vals.size() / 2];
@@ -2605,7 +2607,7 @@ class value_stats
 	double m_min;
 	double m_max;
 
-	mutable std::vector<double> m_vals;
+	mutable vector<double> m_vals;
 };
 
 uint32_t get_deflate_size(const void* pData, size_t data_size);
diff --git a/libkram/fastl/fstring.h b/libkram/fastl/fstring.h
index 4d0f4241..87821e9f 100644
--- a/libkram/fastl/fstring.h
+++ b/libkram/fastl/fstring.h
@@ -7,6 +7,8 @@
 namespace fastl
 {
 	//------------------------------------------------------------------------------------------
+
+    // TODO: could make these macros instead to increase debug build speed
 	template<typename TChar>
     size_t ComputeStrLen(const TChar* str) // strlen
 	{
@@ -21,6 +23,7 @@ namespace fastl
 	{
 		for (size_t i = 0; ;++i)
 		{
+            // This also works for utf8
 			if (a[i] != b[i])
                 return a[i] < b[i] ? -1 : 1;
 			if (a[i] == '\0')
@@ -42,8 +45,8 @@ namespace fastl
 		static constexpr size_type npos = -1;
 	public:
 		StringImpl();
-		StringImpl(const char* input);
-		StringImpl(const char* input, const size_type length);
+		StringImpl(const TChar* input);
+		StringImpl(const TChar* input, size_type length);
 
 		void clear();
 
@@ -51,41 +54,92 @@ namespace fastl
 		size_type size() const { return m_data.empty() ? 0 : m_data.size() - 1; }
 		size_type length() const { return size(); }
 
-		value_type* begin() { return m_data.begin(); }
-		const value_type* begin() const { return m_data.begin(); }
-		value_type* end() { return m_data.end() - 1; }
-		const value_type* end() const { return m_data.end() - 1; }
+        TChar* begin() { return m_data.begin(); }
+		const TChar* begin() const { return m_data.begin(); }
+        
+        // this exludes the \0
+        TChar* end() { return m_data.end() - 1; }
+		const TChar* end() const { return m_data.end() - 1; }
 
-		const value_type* c_str() const { return m_data.begin(); }
+        TChar front() const { return *begin(); }
+        TChar back() const { return *end(); }
+        
+        const value_type* c_str() const { return m_data.begin(); }
 
-		value_type& operator[](size_type index) { return m_data[index]; }
-		value_type  operator[](size_type index) const { return m_data[index]; }
+        TChar& operator[](size_type index) { return m_data[index]; }
+        TChar  operator[](size_type index) const { return m_data[index]; }
 
 		StringImpl& erase(size_type index){ m_data.erase(m_data.begin()+index); return *this; }
 		StringImpl& erase(size_type index, size_type count){ m_data.erase(m_data.begin()+index,m_data.begin()+index+count); return *this; }
 
-		void append( const char* str );
+		void append(const TChar* str );
 
-		StringImpl<TChar> operator+(const char c);
-		StringImpl<TChar> operator+(const char* str);
+		StringImpl<TChar> operator+(const TChar c);
+		StringImpl<TChar> operator+(const TChar* str);
 		StringImpl<TChar> operator+(const StringImpl<TChar>& str);
 
-        StringImpl<TChar>& operator += (const char c) { m_data.insert(m_data.end()-1,c); return *this; }
-        StringImpl<TChar>& operator += (const char* str) { Append(str,ComputeStrLen(str)); return *this; }
+        StringImpl<TChar>& operator += (TChar c) { m_data.insert(m_data.end()-1,c); return *this; }
+        StringImpl<TChar>& operator += (const TChar* str) { Append(str,ComputeStrLen(str)); return *this; }
         StringImpl<TChar>& operator += (const StringImpl<TChar>& str) { Append(str.c_str(), str.size()); return *this; }
 
-		bool operator == (const char* str) const { return ComputeStrCmp(c_str(), str) == 0; }
-		bool operator != (const char* str) const { return ComputeStrCmp(c_str(), str) != 0; }
-		bool operator <  (const char* str) const { return ComputeStrCmp(c_str(), str) < 0; }
-		bool operator >  (const char* str) const { return ComputeStrCmp(c_str(), str) > 0; }
+		bool operator == (const TChar* str) const { return ComputeStrCmp(c_str(), str) == 0; }
+		bool operator != (const TChar* str) const { return ComputeStrCmp(c_str(), str) != 0; }
+		bool operator <  (const TChar* str) const { return ComputeStrCmp(c_str(), str) < 0; }
+		bool operator >  (const TChar* str) const { return ComputeStrCmp(c_str(), str) > 0; }
 
 		bool operator == (const StringImpl<TChar>& str) const { return *this == str.c_str(); }
 		bool operator != (const StringImpl<TChar>& str) const { return *this != str.c_str(); }
 		bool operator <  (const StringImpl<TChar>& str) const { return *this < str.c_str(); }
 		bool operator >  (const StringImpl<TChar>& str) const { return *this > str.c_str(); }
 
+        bool find_last_of(TChar c)
+        {
+            return strrchr(m_data.data(), c);
+        }
+        
+        StringImpl<TChar> substr(size_type start, size_type count)
+        {
+            return StringImpl<TChar>(&m_data[start], count);
+        }
+        
+        void pop_back()
+        {
+            if (!empty())
+            {
+                // This doesn't work for multibyte chars
+                m_data.pop_back();
+                m_data[m_data.size()-1] = (TChar)0;
+            }
+        }
+        
+        void insert(size_type index, const TChar* str)
+        {
+            size_type len = ComputeStrLen(str);
+            m_data.insert(m_data.begin()+index, str, str+len);
+        }
+        
+        void resize(size_type size, TChar value = 0)
+        {
+            size_type oldSize = m_data.size();
+            size_type newSize = size+1;
+            if (newSize == oldSize)
+                return;
+            
+            m_data.resize(newSize);
+            
+            // Note: length and strlen with value of 0 unless those chars are filled
+            if (newSize > oldSize)
+            {
+                for (uint32_t i = oldSize-1; i < newSize; ++i)
+                {
+                    m_data[i] = value;
+                }
+            }
+            m_data[newSize-1] = 0;
+        }
+        
 	private: 
-		void Append(const char* str, const size_type appendSize);
+		void Append(const TChar* str, const size_type appendSize);
 
 	private: 
 		TData m_data;
@@ -96,22 +150,27 @@ namespace fastl
 	//------------------------------------------------------------------------------------------
 	template<typename TChar>
     StringImpl<TChar>::StringImpl()
-	{ 
+	{
+        // TODO: this requires a heap allocate for all empty strings
+        m_data.reserve(1);
 		clear();
 	}
 
 	//------------------------------------------------------------------------------------------
 	template<typename TChar>
-    StringImpl<TChar>::StringImpl(const char* input)
+    StringImpl<TChar>::StringImpl(const TChar* input)
 	{ 
-		clear(); 
-		Append(input, ComputeStrLen(input));
+        size_t length = ComputeStrLen(input);
+        m_data.reserve(length + 1);
+        clear();
+        Append(input, length);
 	}
 
 	//------------------------------------------------------------------------------------------
 	template<typename TChar>
-    StringImpl<TChar>::StringImpl(const char* input, const size_type length)
-	{ 
+    StringImpl<TChar>::StringImpl(const TChar* input, const size_type length)
+	{
+        m_data.reserve(length + 1);
 		clear();
 		Append(input, length);
 	}
@@ -120,7 +179,6 @@ namespace fastl
 	template<typename TChar>
     inline void StringImpl<TChar>::clear()
 	{
-        // TODO: this requires an allocate in all ctors
         // need small string optimization
 		m_data.resize(1); 
 		m_data[0] = '\0'; 
@@ -128,14 +186,14 @@ namespace fastl
 
 	//------------------------------------------------------------------------------------------
 	template<typename TChar>
-    void StringImpl<TChar>::append( const char* str )
+    void StringImpl<TChar>::append( const TChar* str )
 	{
         Append(str, ComputeStrLen(str));
 	}
 
 	//------------------------------------------------------------------------------------------
 	template<typename TChar>
-    StringImpl<TChar> StringImpl<TChar>::operator+(const char c)
+    StringImpl<TChar> StringImpl<TChar>::operator+(TChar c)
 	{ 
         StringImpl<TChar> ret;
         ret.reserve(m_data.size() + 1);
@@ -147,7 +205,7 @@ namespace fastl
 	}
 	//------------------------------------------------------------------------------------------
 	template<typename TChar>
-    StringImpl<TChar> StringImpl<TChar>::operator+(const char* str)
+    StringImpl<TChar> StringImpl<TChar>::operator+(const TChar* str)
 	{ 
 		StringImpl<TChar> ret;
         size_t len = ComputeStrLen(str);
@@ -173,7 +231,7 @@ namespace fastl
 
 	//------------------------------------------------------------------------------------------
 	template<typename TChar>
-    void StringImpl<TChar>::Append(const char* str, const size_type appendSize)
+    void StringImpl<TChar>::Append(const TChar* str, const size_type appendSize)
 	{ 
 		size_type writeIndex = size();
 		m_data.resize(m_data.size()+appendSize);
diff --git a/libkram/fastl/vector.h b/libkram/fastl/vector.h
index dbc5f4a0..1144e093 100644
--- a/libkram/fastl/vector.h
+++ b/libkram/fastl/vector.h
@@ -87,15 +87,15 @@ namespace fastl
 
 		void push_back(const value_type& value);
 		iterator insert(iterator it, const value_type& value);
-        void insert(iterator it, const value_type* begin , const value_type* end)
+        void insert(iterator it, const value_type* beg, const value_type* en)
         {
             // TODO: fix this isn't fast
-            while (begin != end)
+            while (beg != en)
             {
-                insert(it, *begin);
+                insert(it, *beg);
                 
                 ++it;
-                ++begin;
+                ++beg;
             }
         }
       
@@ -116,6 +116,14 @@ namespace fastl
         // TODO: no-op for now, but should release memory
         void shrink_to_fit() { }
         
+        void swap(vector<T>& rhs)
+        {
+            if (this == &rhs) return;
+            std::swap(m_data, rhs.m_data);
+            std::swap(m_size, rhs.m_size);
+            std::swap(m_capacity, rhs.m_capacity);
+        }
+        
 	private: 
 		void Destroy();
 
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index f810ec66..e0861795 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -1226,8 +1226,9 @@ string formatInputAndOutput(int32_t testNumber, const char* srcFilename, MyMTLPi
 
     // replace png with ktx
     dst = srcFilename;
-    size_t extSeparator = dst.rfind('.');
-    assert(extSeparator != string::npos);
+    const char* extSeparatorStr = strchr(dst.c_str(), '.');
+    assert(extSeparatorStr != nullptr);
+    size_t extSeparator = extSeparatorStr - dst.c_str();
     dst.erase(extSeparator);
     dst.append(".ktx");  // TODO: test ktx2 too
 
@@ -3593,9 +3594,10 @@ TexContentType findContentTypeFromFilename(const char* filename)
 {
     string filenameShort = filename;
     
-    auto dotPos = filenameShort.find_last_of(".");
-    if (dotPos == string::npos)
+    const char* dotPosStr = strrchr(filenameShort.c_str(), '.');
+    if (dotPosStr == nullptr)
         return TexContentTypeUnknown;
+    auto dotPos = dotPosStr - filenameShort.c_str();
     
     // now chop off the extension
     filenameShort = filenameShort.substr(0, dotPos);
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index 64af857a..b4b6fead 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -210,22 +210,37 @@
 #include "../fastl/unordered_set.h"
 
 // still too many holes in this (rfind, insert, back, pop_back, find_last_of, substr)
-//#include "../fastl/fstring.h"
-#include <string>
-namespace NAMESPACE_STL
-{
-    using string = std::string;
-}
+
+#include "../fastl/fstring.h"
+//#include <string>
+//namespace NAMESPACE_STL
+//{
+//    using string = std::string;
+//}
+
+// what is causing string to instantiate?
+//namespace std
+//{
+//class basic_string
+//{
+//    int32_t b;
+//};
+//}
 
 // std - for missing functionality
-#include <atomic>
-#include <functional>
 #include <array>
 #include <deque>
 #include <memory> // for unique_ptr/shared_ptr
-#include <initializer_list>
+//#include <initializer_list>
 #include <iterator>  // for copy_if and back_inserter on Win
 
+// threads
+#include <functional>
+#include <atomic>
+#include <mutex>
+#include <condition_variable>
+#include <thread>
+
 #else
 
 /*
diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp
index 762e522a..ff597edf 100644
--- a/libkram/kram/KramLog.cpp
+++ b/libkram/kram/KramLog.cpp
@@ -85,7 +85,7 @@ int32_t append_vsprintf(string& str, const char* format, va_list args)
         // resize and format again into string
         str.resize(existingLen + len, 0);
 
-        vsnprintf((char*)str.data() + existingLen, len + 1, format, args);
+        vsnprintf((char*)str.c_str() + existingLen, len + 1, format, args);
     }
 
     return len;
diff --git a/libkram/kram/KramZipHelper.cpp b/libkram/kram/KramZipHelper.cpp
index 61fa7215..22215684 100644
--- a/libkram/kram/KramZipHelper.cpp
+++ b/libkram/kram/KramZipHelper.cpp
@@ -9,7 +9,7 @@
 //#include <stdio.h>
 //#include <unistd.h>
 
-//#include <algorithm> // for copy_if on Win
+// // for copy_if on Win#include <algorithm>
 //#include <iterator> // for copy_if on Win
 //#include <vector>
 
diff --git a/libkram/transcoder/basisu_transcoder.cpp b/libkram/transcoder/basisu_transcoder.cpp
index 081add30..fed078d8 100644
--- a/libkram/transcoder/basisu_transcoder.cpp
+++ b/libkram/transcoder/basisu_transcoder.cpp
@@ -183,6 +183,7 @@ namespace basisu
 
 namespace basist
 {
+    using namespace NAMESPACE_STL;
 
 #if BASISD_ENABLE_DEBUG_FLAGS
 	static uint32_t g_debug_flags = 0;
@@ -17069,7 +17070,7 @@ namespace basist
 		return get_etc1s_image_descs()[etc1s_image_index].m_image_flags;
 	}
 
-	const basisu::uint8_vec* ktx2_transcoder::find_key(const std::string& key_name) const
+	const basisu::uint8_vec* ktx2_transcoder::find_key(const string& key_name) const
 	{
 		for (uint32_t i = 0; i < m_key_values.size(); i++)
 			if (strcmp((const char *)m_key_values[i].m_key.data(), key_name.c_str()) == 0)
diff --git a/libkram/transcoder/basisu_transcoder.h b/libkram/transcoder/basisu_transcoder.h
index bf3aed3d..c8d6ec1a 100644
--- a/libkram/transcoder/basisu_transcoder.h
+++ b/libkram/transcoder/basisu_transcoder.h
@@ -39,6 +39,8 @@
 
 namespace basist
 {
+    using namespace NAMESPACE_STL;
+
 	// High-level composite texture formats supported by the transcoder.
 	// Each of these texture formats directly correspond to OpenGL/D3D/Vulkan etc. texture formats.
 	// Notes:
@@ -857,7 +859,7 @@ namespace basist
 		// The order of key values fields in this array exactly matches the order they were stored in the file. The keys are supposed to be sorted by their Unicode code points.
 		const key_value_vec& get_key_values() const { return m_key_values; }
 
-		const basisu::uint8_vec *find_key(const std::string& key_name) const;
+		const basisu::uint8_vec *find_key(const string& key_name) const;
 
 		// Low-level ETC1S specific accessors
 
diff --git a/scripts/cba.sh b/scripts/cba.sh
new file mode 100755
index 00000000..4537ff36
--- /dev/null
+++ b/scripts/cba.sh
@@ -0,0 +1,15 @@
+#!/bin/zsh
+
+# so can use aliases, why isn't this automatic?
+source ~/.zshrc
+
+# Note this part will change depending on build type, etc.  TODO: see if can obtain from xcodebuild
+# or could force projects to use fixed output folder
+# erbkczkopelnfhennypqjfnicqai
+
+ClangBuildAnalyzer --all ~/Library/Developer/Xcode/DerivedData/kram-erbkczkopelnfhennypqjfnicqai/Build/Intermediates.noindex/kram.build/Debug/kram.build/Objects-normal/arm64 ClangBuildAnalysisPre.dat
+
+ClangBuildAnalyzer --analyze ClangBuildAnalysisPre.dat > ClangBuildAnalysis.txt
+
+subl ClangBuildAnalysis.txt
+

From 24848691216f8d3c22a424ed787fad0c77ccb6a0 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 14 Aug 2022 13:20:21 -0700
Subject: [PATCH 356/901] kram - reduce basic_string usage in headers

Wanted to use fastl/string everywhere.  But buried in <mutex>, <condition_variable>, <thread> is <system_error>, and in that is use of <string>.  Also <random> include <string> too.   So these slow the build even if precompiled, but instanciates 5x versions of basic_string in char, char8_t, cart16_t, char32_t, and wchar_t flavors.  Ugh, no wonder C++ is so slow to build these days.  This added up to 2s of time across the build.

Removed these headers from KramConfig.h.   Added comments warning about use in other files.
---
 build2/kram.xcodeproj/project.pbxproj   | 12 ++++++------
 libkram/astc-encoder/astcenc_internal.h |  6 ++++--
 libkram/bc7enc/rdo_bc_encoder.cpp       |  4 ++--
 libkram/bc7enc/utils.h                  |  3 +++
 libkram/kram/KramConfig.h               | 16 ++++++++++------
 libkram/kram/TaskSystem.h               |  4 ++++
 libkram/transcoder/basisu.h             |  2 ++
 7 files changed, 31 insertions(+), 16 deletions(-)

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index a90e8726..17ae8512 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -237,7 +237,6 @@
 		706EFF7626D34740001C950E /* assert.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5826D3473F001C950E /* assert.cpp */; };
 		706EFF7726D34740001C950E /* string.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5926D3473F001C950E /* string.cpp */; };
 		706EFF7826D34740001C950E /* string.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5926D3473F001C950E /* string.cpp */; };
-		706EFF7926D34740001C950E /* allocator_eastl.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5A26D3473F001C950E /* allocator_eastl.cpp */; };
 		706EFF7A26D34740001C950E /* allocator_eastl.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5A26D3473F001C950E /* allocator_eastl.cpp */; };
 		706EFF7B26D34740001C950E /* numeric_limits.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5B26D3473F001C950E /* numeric_limits.cpp */; };
 		706EFF7C26D34740001C950E /* numeric_limits.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5B26D3473F001C950E /* numeric_limits.cpp */; };
@@ -1622,7 +1621,6 @@
 				70871DFF27DDDBCD00D0B9E1 /* astcenc_pick_best_endpoint_format.cpp in Sources */,
 				70871E0927DDDBCD00D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp in Sources */,
 				70871DCF27DDDBCD00D0B9E1 /* astcenc_symbolic_physical.cpp in Sources */,
-				706EFF7926D34740001C950E /* allocator_eastl.cpp in Sources */,
 				706EEFC026D1595E001C950E /* maths.cpp in Sources */,
 				706EEFC126D1595E001C950E /* singlecolourfit.cpp in Sources */,
 				706EEFC226D1595E001C950E /* zstd.cpp in Sources */,
@@ -1807,10 +1805,11 @@
 					"-DCOMPILE_ETCENC=1",
 					"-DCOMPILE_SQUISH=1",
 					"-DCOMPILE_BCENC=1",
-					"-DCOMPILE_EASTL=0",
-					"-DCOMPILE_FASTL=1",
 					"-DCOMPILE_COMP=1",
 					"-DCOMPILE_BASIS=1",
+					"-DCOMPILE_EASTL=0",
+					"-DCOMPILE_FASTL=1",
+					"-ftime-trace",
 				);
 				PRESERVE_DEAD_CODE_INITS_AND_TERMS = NO;
 				SDKROOT = macosx;
@@ -1889,10 +1888,11 @@
 					"-DCOMPILE_ETCENC=1",
 					"-DCOMPILE_SQUISH=1",
 					"-DCOMPILE_BCENC=1",
-					"-DCOMPILE_EASTL=0",
-					"-DCOMPILE_FASTL=1",
 					"-DCOMPILE_COMP=1",
 					"-DCOMPILE_BASIS=1",
+					"-DCOMPILE_EASTL=0",
+					"-DCOMPILE_FASTL=1",
+					"-ftime-trace",
 				);
 				PRESERVE_DEAD_CODE_INITS_AND_TERMS = NO;
 				SDKROOT = macosx;
diff --git a/libkram/astc-encoder/astcenc_internal.h b/libkram/astc-encoder/astcenc_internal.h
index a1b7b39f..12fa58b4 100644
--- a/libkram/astc-encoder/astcenc_internal.h
+++ b/libkram/astc-encoder/astcenc_internal.h
@@ -23,9 +23,11 @@
 #define ASTCENC_INTERNAL_INCLUDED
 
 //#include <algorithm>
-//#include <condition_variable>
 //#include <functional>
-//#include <mutex>
+
+// these pull in string from system_error which is slow to instantiate on macOS
+#include <condition_variable>
+#include <mutex>
 
 #include <atomic>
 #include <cstddef>
diff --git a/libkram/bc7enc/rdo_bc_encoder.cpp b/libkram/bc7enc/rdo_bc_encoder.cpp
index 8718dfde..7b1ab29c 100644
--- a/libkram/bc7enc/rdo_bc_encoder.cpp
+++ b/libkram/bc7enc/rdo_bc_encoder.cpp
@@ -10,10 +10,10 @@
 #pragma warning (disable: 4127) // conditional expression is constant
 #endif
 
-using namespace utils;
-
 namespace rdo_bc
 {
+    using namespace utils;
+
 	static const char* get_dxgi_format_string(DXGI_FORMAT fmt)
 	{
 		switch (fmt)
diff --git a/libkram/bc7enc/utils.h b/libkram/bc7enc/utils.h
index 2d66b825..e07a0c20 100644
--- a/libkram/bc7enc/utils.h
+++ b/libkram/bc7enc/utils.h
@@ -14,7 +14,10 @@
 //#include <algorithm>
 //#include <vector>
 //#include <string>
+
+// on macOS, random pulls in std::string w/5x impls instanciated
 #include <random>
+
 #include <utility>
 #include <limits.h>
 //#include "dds_defs.h"
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index b4b6fead..508e892f 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -237,9 +237,12 @@
 // threads
 #include <functional>
 #include <atomic>
-#include <mutex>
-#include <condition_variable>
-#include <thread>
+
+// On macOS, mutex, codition_variable, thread pull in system_error which pulls in std::string
+// when then instantiates 5 versions of basic_string into all files
+//#include <mutex>
+//#include <condition_variable>
+//#include <thread>
 
 #else
 
@@ -275,9 +278,10 @@ import std.regex;
 
 #endif
 
-#if COMPILE_BASIS
-#include "basisu_transcoder.h"
-#endif
+// Get this out of config, it pulls in random and other std::fields
+//#if COMPILE_BASIS
+//#include "basisu_transcoder.h"
+//#endif
 
 // includes that are usable across all files
 #include "KramLog.h"
diff --git a/libkram/kram/TaskSystem.h b/libkram/kram/TaskSystem.h
index d09d79b0..af21d2b5 100644
--- a/libkram/kram/TaskSystem.h
+++ b/libkram/kram/TaskSystem.h
@@ -12,9 +12,13 @@
 //#include <deque>
 //#include <functional>
 //#include <memory>
+
+// TODO: get these three out of header, they pull in basic_string via system_errror header
+// but this file isn't included in many places.
 #include <mutex>
 #include <condition_variable>
 #include <thread>
+
 //#include <vector>
 
 
diff --git a/libkram/transcoder/basisu.h b/libkram/transcoder/basisu.h
index 4557c17c..c95a83f8 100644
--- a/libkram/transcoder/basisu.h
+++ b/libkram/transcoder/basisu.h
@@ -64,6 +64,8 @@
 //#include <iterator>
 #include <type_traits>
 #include <assert.h>
+
+// on macOS, random pulls in std::string w/5x impls instanciated
 #include <random>
 
 #include "basisu_containers.h"

From 059f6213cb97dd3666a6a6a07d6ba546d68f335c Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 14 Aug 2022 15:32:34 -0700
Subject: [PATCH 357/901] kram - add ASTCENC_USE_THREADS flag to chop
 ParallelManager code use of mutex/condition_variable/atomic

kram isn't using the threading of astcenc, and runs one thread or process with one thread per texture.   This was just pulling in basic_string via system_error, and causing a slower build.
---
 libkram/astc-encoder/astcenc_internal.h | 98 +++++++++++++++++++++++--
 1 file changed, 91 insertions(+), 7 deletions(-)

diff --git a/libkram/astc-encoder/astcenc_internal.h b/libkram/astc-encoder/astcenc_internal.h
index 12fa58b4..fea15879 100644
--- a/libkram/astc-encoder/astcenc_internal.h
+++ b/libkram/astc-encoder/astcenc_internal.h
@@ -25,11 +25,14 @@
 //#include <algorithm>
 //#include <functional>
 
+#define ASTCENC_USE_THREADS 0
+#if ASTCENC_USE_THREADS
 // these pull in string from system_error which is slow to instantiate on macOS
 #include <condition_variable>
 #include <mutex>
-
 #include <atomic>
+#endif
+
 #include <cstddef>
 #include <cstdint>
 #include <cstdio>
@@ -213,21 +216,27 @@ static_assert((WEIGHTS_MAX_BLOCK_MODES % ASTCENC_SIMD_WIDTH) == 0,
 class ParallelManager
 {
 private:
+    
+#if ASTCENC_USE_THREADS
 	/** @brief Lock used for critical section and condition synchronization. */
 	std::mutex m_lock;
 
+    /** @brief Contition variable for tracking stage processing completion. */
+    std::condition_variable m_complete;
+
+    /** @brief Number of tasks started, but not necessarily finished. */
+    std::atomic<unsigned int> m_start_count;
+#else
+    
+    unsigned int m_start_count;
+#endif
+    
 	/** @brief True if the stage init() step has been executed. */
 	bool m_init_done;
 
 	/** @brief True if the stage term() step has been executed. */
 	bool m_term_done;
 
-	/** @brief Contition variable for tracking stage processing completion. */
-	std::condition_variable m_complete;
-
-	/** @brief Number of tasks started, but not necessarily finished. */
-	std::atomic<unsigned int> m_start_count;
-
 	/** @brief Number of tasks finished. */
 	unsigned int m_done_count;
 
@@ -256,6 +265,79 @@ class ParallelManager
 		m_task_count = 0;
 	}
 
+#if !ASTCENC_USE_THREADS
+    void init(std::function<unsigned int(void)> init_func)
+    {
+        if (!m_init_done)
+        {
+            m_task_count = init_func();
+            m_init_done = true;
+        }
+    }
+    
+    void init(unsigned int task_count)
+    {
+        if (!m_init_done)
+        {
+            m_task_count = task_count;
+            m_init_done = true;
+        }
+    }
+    
+    unsigned int get_task_assignment(unsigned int granule, unsigned int& count)
+    {
+        unsigned int base = m_start_count + granule;
+        if (base >= m_task_count)
+        {
+            count = 0;
+            return 0;
+        }
+
+        count = astc::min(m_task_count - base, granule);
+        return base;
+    }
+    
+    void complete_task_assignment(unsigned int count)
+    {
+        // Note: m_done_count cannot use an atomic without the mutex; this has a race between the
+        // update here and the wait() for other threads
+        m_done_count += count;
+        
+//        if (m_done_count == m_task_count)
+//        {
+//            lck.unlock();
+//            m_complete.notify_all();
+//        }
+    }
+
+    /**
+     * @brief Wait for stage processing to complete.
+     */
+    void wait()
+    {
+        // no wait
+    }
+
+    /**
+     * @brief Trigger the pipeline stage term step.
+     *
+     * This can be called from multi-threaded code. The first thread to hit this will process the
+     * thread termintion. Caller must have called @c wait() prior to calling this function to ensure
+     * that processing is complete.
+     *
+     * @param term_func   Callable which executes the stage termination.
+     */
+    void term(std::function<void(void)> term_func)
+    {
+        if (!m_term_done)
+        {
+            term_func();
+            m_term_done = true;
+        }
+    }
+    
+#else
+    
 	/**
 	 * @brief Trigger the pipeline stage init step.
 	 *
@@ -364,6 +446,8 @@ class ParallelManager
 			m_term_done = true;
 		}
 	}
+    
+#endif
 };
 
 /* ============================================================================

From e424897b5e5baaecacb8ead777978e98dc11f335 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 14 Aug 2022 15:33:04 -0700
Subject: [PATCH 358/901] kram - also chop out the headers on astc_internal.h

---
 libkram/astc-encoder/astcenc_internal.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/libkram/astc-encoder/astcenc_internal.h b/libkram/astc-encoder/astcenc_internal.h
index fea15879..bc129492 100644
--- a/libkram/astc-encoder/astcenc_internal.h
+++ b/libkram/astc-encoder/astcenc_internal.h
@@ -27,10 +27,10 @@
 
 #define ASTCENC_USE_THREADS 0
 #if ASTCENC_USE_THREADS
-// these pull in string from system_error which is slow to instantiate on macOS
-#include <condition_variable>
-#include <mutex>
-#include <atomic>
+    // these pull in string from system_error which is slow to instantiate on macOS
+    #include <condition_variable>
+    #include <mutex>
+    #include <atomic>
 #endif
 
 #include <cstddef>

From 806803b673f8e3a411d9b6a43095af786b7ac8a7 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 14 Aug 2022 15:37:56 -0700
Subject: [PATCH 359/901] kram - turn off basis_transcoder.cpp/h, remove
 -ftime-trace

I haven't had a chance to hook this up, and it was just taking 1s of compile time.
---
 build2/kram.xcodeproj/project.pbxproj    | 6 ++----
 libkram/transcoder/basisu_transcoder.cpp | 4 ++++
 libkram/transcoder/basisu_transcoder.h   | 4 ++++
 3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index 17ae8512..b1f08968 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -1806,10 +1806,9 @@
 					"-DCOMPILE_SQUISH=1",
 					"-DCOMPILE_BCENC=1",
 					"-DCOMPILE_COMP=1",
-					"-DCOMPILE_BASIS=1",
+					"-DCOMPILE_BASIS=0",
 					"-DCOMPILE_EASTL=0",
 					"-DCOMPILE_FASTL=1",
-					"-ftime-trace",
 				);
 				PRESERVE_DEAD_CODE_INITS_AND_TERMS = NO;
 				SDKROOT = macosx;
@@ -1889,10 +1888,9 @@
 					"-DCOMPILE_SQUISH=1",
 					"-DCOMPILE_BCENC=1",
 					"-DCOMPILE_COMP=1",
-					"-DCOMPILE_BASIS=1",
+					"-DCOMPILE_BASIS=0",
 					"-DCOMPILE_EASTL=0",
 					"-DCOMPILE_FASTL=1",
-					"-ftime-trace",
 				);
 				PRESERVE_DEAD_CODE_INITS_AND_TERMS = NO;
 				SDKROOT = macosx;
diff --git a/libkram/transcoder/basisu_transcoder.cpp b/libkram/transcoder/basisu_transcoder.cpp
index fed078d8..6579060f 100644
--- a/libkram/transcoder/basisu_transcoder.cpp
+++ b/libkram/transcoder/basisu_transcoder.cpp
@@ -13,6 +13,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#if COMPILE_BASIS
+
 #include "basisu_transcoder.h"
 #include <limits.h>
 #include "basisu_containers_impl.h"
@@ -17615,3 +17617,5 @@ namespace basist
 	}
 
 } // namespace basist
+
+#endif
diff --git a/libkram/transcoder/basisu_transcoder.h b/libkram/transcoder/basisu_transcoder.h
index c8d6ec1a..9cf29a63 100644
--- a/libkram/transcoder/basisu_transcoder.h
+++ b/libkram/transcoder/basisu_transcoder.h
@@ -13,6 +13,9 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
+
+#if COMPILE_BASIS
+
 #pragma once
 
 // By default KTX2 support is enabled to simplify compilation. This implies the need for the Zstandard library (which we distribute as a single source file in the "zstd" directory) by default.
@@ -941,3 +944,4 @@ namespace basist
 
 } // namespace basisu
 
+#endif

From 1652ebf8e50bebe864b3d1b4e6b0205da3214ca8 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 14 Aug 2022 16:40:24 -0700
Subject: [PATCH 360/901] kram - turn off FASTL for now

The map/set and unordered_map/set behavior likely isn't shippable.  It shifts the address of the values because they are insertion sorted into an array.  All that copying is costly too.  This is likely why a linked-list is used by std, but that has poor allocation locality and allocates one value at a time unless a chunk allocator is specified.
---
 libkram/CMakeLists.txt            |  6 ++++--
 libkram/bc7enc/rdo_bc_encoder.cpp |  2 +-
 libkram/fastl/map.h               | 16 ++++++++++++++--
 libkram/fastl/set.h               |  6 ++++++
 libkram/fastl/vector.h            | 23 ++++++++++++++++++-----
 5 files changed, 43 insertions(+), 10 deletions(-)

diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index 95de547a..e56ae0f8 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -24,7 +24,7 @@ option(BCENC "Compile BCenc Encoder" ON)
 option(COMP "Compile Compressonator Encoder" ON)
 
 option(EASTL "Compile EASTL" OFF)
-option(FASTL "Compile FASTL" ON)
+option(FASTL "Compile FASTL" OFF)
 
 # convert ON to 1, UGH
 set(COMPILE_ATE 0)
@@ -245,7 +245,7 @@ elseif (BUILD_WIN)
     # force include (public)
     #target_compile_options(${myTargetLib} PUBLIC /FIKramConfig.h)
         
-    # all warnings, AVX1, and multiprocess compiles
+    # all warnings, AVX, and multiprocess compiles
     target_compile_options(${myTargetLib} PRIVATE  /W3 /arch:AVX /MP)
     
     # fix STL
@@ -253,6 +253,8 @@ elseif (BUILD_WIN)
         
 endif()
 
+# TODO: Missing dead-strip on Release builds for macOS/Win.  Needed to minimize app size.
+
 # This will be force include (-include, /FI) on GCC/clang/VS.
 # Can't seem to ref KramPrefix.pch file.  Goes into cmake_pch.hxx file
 target_precompile_headers(${myTargetLib} PRIVATE
diff --git a/libkram/bc7enc/rdo_bc_encoder.cpp b/libkram/bc7enc/rdo_bc_encoder.cpp
index 7b1ab29c..8f51f8e4 100644
--- a/libkram/bc7enc/rdo_bc_encoder.cpp
+++ b/libkram/bc7enc/rdo_bc_encoder.cpp
@@ -57,7 +57,7 @@ namespace rdo_bc
 
 				float yl = max_std_dev / ULTRASMOOTH_BLOCK_STD_DEV_THRESHOLD;
 
-				yl = clamp(yl, 0.0f, 1.0f);
+				yl = std::clamp(yl, 0.0f, 1.0f);
 				yl *= yl;
 
 				float y_avg = y_stats.get_mean();
diff --git a/libkram/fastl/map.h b/libkram/fastl/map.h
index 3c49f73f..aeee6bfd 100644
--- a/libkram/fastl/map.h
+++ b/libkram/fastl/map.h
@@ -58,7 +58,11 @@ namespace fastl
 	{ 
 		iterator entryIt = fastl::lower_bound(begin(), end(), key, [=](value_type& value, const TKey& key) {return value.first < key; });
 		if (entryIt == end() || entryIt->first != key)
-		{ 
+		{
+            // TODO: this is expensive to insertion sort into a vector
+            // This causes all elements above to have to be copied and they don't have constant addresses.
+            // Also keys for unordered_map/set only provide == and hash, and not <
+            
 			entryIt = m_data.emplace(entryIt,key,TValue());
 		}
 
@@ -70,7 +74,11 @@ namespace fastl
 	{ 
 		iterator entryIt = fastl::lower_bound(begin(), end(), inputValue, [=](value_type& a, const value_type& b) {return a.first < b.first; });
 		if (entryIt == end() || entryIt->first != inputValue.first)
-		{ 
+		{
+            // TODO: this is expensive to insertion sort into a vector
+            // This causes all elements above to have to be copied and they don't have constant addresses.
+            // Also keys for unordered_map/set only provide == and hash, and not <
+            
 			entryIt = m_data.emplace(entryIt,move(inputValue));
 			return pair<iterator,bool>(entryIt,true);
 		}
@@ -83,6 +91,10 @@ namespace fastl
 		iterator found = find(key);
 		if (found != end())
 		{
+            // TODO: this is expensive to erase an element from a vector
+            // This causes all elements above to have to be copied and they don't have constant addresses.
+            // Also keys for unordered_map/set only provide == and hash, and not <
+            
 			erase(found);
 		} 
 		return size();
diff --git a/libkram/fastl/set.h b/libkram/fastl/set.h
index e334c466..d964050f 100644
--- a/libkram/fastl/set.h
+++ b/libkram/fastl/set.h
@@ -57,6 +57,9 @@ namespace fastl
 		iterator entryIt = fastl::lower_bound( begin(), end(), inputValue, [=]( value_type& a, const value_type& b ) {return a < b; } );
 		if( entryIt == end() || *entryIt != inputValue )
 		{
+            // TODO: this isn't fast to emplace into a vector, all elements above shift
+            // and addresses are no longer constant on elements
+            
 			entryIt = m_data.emplace( entryIt, args... );
 			return pair<iterator, bool>( entryIt, true );
 		}
@@ -69,6 +72,9 @@ namespace fastl
 		iterator found = find( key );
 		if( found != end() )
 		{
+            // TODO: this isn't fast to erase from a vector, all elements above shift
+            // and addresses are no longer constant on elements
+            
 			erase( found );
 		}
 		return size();
diff --git a/libkram/fastl/vector.h b/libkram/fastl/vector.h
index 1144e093..b07b041f 100644
--- a/libkram/fastl/vector.h
+++ b/libkram/fastl/vector.h
@@ -13,9 +13,12 @@ namespace fastl
 { 
 	//------------------------------------------------------------------------------------------
 	//Consider moving this around if needed somewhere else 
-	template <class T> struct remove_reference { typedef T type; };
-	template <class T> struct remove_reference<T&> { typedef T type; };
-	template <class T> struct remove_reference<T&&> { typedef T type; };
+	template <class T>
+    struct remove_reference { typedef T type; };
+	template <class T>
+    struct remove_reference<T&> { typedef T type; };
+	template <class T>
+    struct remove_reference<T&&> { typedef T type; };
 	
     // This is ambigous if included
     //template <typename T> typename remove_reference<T>::type&& move(T&& arg) { return static_cast<typename remove_reference<T>::type&&>(arg); }
@@ -78,7 +81,10 @@ namespace fastl
 		const_iterator begin() const { return m_data; }
 		iterator end() { return m_data+m_size;	} 
 		const_iterator end() const { return m_data+m_size;	}
+        
+        // TOOD: need front
 		reference back() { return m_data[m_size-1]; }
+        
 		bool empty() const { return m_size == 0u; }
 
 		void reserve(const size_type size);
@@ -86,10 +92,15 @@ namespace fastl
 		void clear();
 
 		void push_back(const value_type& value);
+        
 		iterator insert(iterator it, const value_type& value);
         void insert(iterator it, const value_type* beg, const value_type* en)
         {
-            // TODO: fix this isn't fast
+            size_type len = en - beg;
+            reserve(size() + len);
+            
+            // TODO: fix this isn't fast, since it has to shift all elements above
+            // the iterator.  Do that once.
             while (beg != en)
             {
                 insert(it, *beg);
@@ -113,7 +124,7 @@ namespace fastl
         const value_type* data() const { return m_data; }
         value_type* data() { return m_data; }
         
-        // TODO: no-op for now, but should release memory
+        // TODO: no-op for now, but should copy and release memory
         void shrink_to_fit() { }
         
         void swap(vector<T>& rhs)
@@ -129,6 +140,8 @@ namespace fastl
 
 	private:
 		value_type* m_data;
+        
+        // TODO: could map size_type to int32_t
 		size_type   m_size;
 		size_type   m_capacity;
 	};

From ad33819c3ca55f7732a41caee62a6913b95e6a36 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 14 Aug 2022 16:40:54 -0700
Subject: [PATCH 361/901] kram - update Xcode projects to turn off FASTL

---
 build2/kram.xcodeproj/project.pbxproj  | 4 ++--
 build2/kramv.xcodeproj/project.pbxproj | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index b1f08968..3028966c 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -1808,7 +1808,7 @@
 					"-DCOMPILE_COMP=1",
 					"-DCOMPILE_BASIS=0",
 					"-DCOMPILE_EASTL=0",
-					"-DCOMPILE_FASTL=1",
+					"-DCOMPILE_FASTL=0",
 				);
 				PRESERVE_DEAD_CODE_INITS_AND_TERMS = NO;
 				SDKROOT = macosx;
@@ -1890,7 +1890,7 @@
 					"-DCOMPILE_COMP=1",
 					"-DCOMPILE_BASIS=0",
 					"-DCOMPILE_EASTL=0",
-					"-DCOMPILE_FASTL=1",
+					"-DCOMPILE_FASTL=0",
 				);
 				PRESERVE_DEAD_CODE_INITS_AND_TERMS = NO;
 				SDKROOT = macosx;
diff --git a/build2/kramv.xcodeproj/project.pbxproj b/build2/kramv.xcodeproj/project.pbxproj
index 4e79c2bf..9bf1ff07 100644
--- a/build2/kramv.xcodeproj/project.pbxproj
+++ b/build2/kramv.xcodeproj/project.pbxproj
@@ -562,7 +562,7 @@
 				MTL_LANGUAGE_REVISION = UseDeploymentTarget;
 				ONLY_ACTIVE_ARCH = YES;
 				OTHER_CFLAGS = (
-					"-DCOMPILE_FASTL=1",
+					"-DCOMPILE_FASTL=0",
 					"-DCOMPILE_EASTL=0",
 					"-include",
 					KramConfig.h,
@@ -628,7 +628,7 @@
 				MTL_FAST_MATH = YES;
 				MTL_LANGUAGE_REVISION = UseDeploymentTarget;
 				OTHER_CFLAGS = (
-					"-DCOMPILE_FASTL=1",
+					"-DCOMPILE_FASTL=0",
 					"-DCOMPILE_EASTL=0",
 					"-include",
 					KramConfig.h,

From 8adeece0358b70577d33d1e455364c15967f022c Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 14 Aug 2022 16:41:13 -0700
Subject: [PATCH 362/901] kram - one more file

---
 kram-thumb/KramThumbnailProvider.mm | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kram-thumb/KramThumbnailProvider.mm b/kram-thumb/KramThumbnailProvider.mm
index b8dc82f3..b190181b 100644
--- a/kram-thumb/KramThumbnailProvider.mm
+++ b/kram-thumb/KramThumbnailProvider.mm
@@ -98,11 +98,11 @@ - (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest *)request complet
     if (imageAspect >= 1.0f)
     {
         requestWidth = contextSize.width;
-        requestHeight = clamp((contextSize.width / imageAspect), 1.0, contextSize.height);
+        requestHeight = std::clamp((contextSize.width / imageAspect), 1.0, contextSize.height);
     }
     else
     {
-        requestWidth = clamp((contextSize.height * imageAspect), 1.0, contextSize.width);
+        requestWidth = std::clamp((contextSize.height * imageAspect), 1.0, contextSize.width);
         requestHeight = contextSize.height;
     }
     

From 1916f8bef17494673de60f4eb0bab14ceb7a0be7 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 14 Aug 2022 17:22:13 -0700
Subject: [PATCH 363/901] kram - fix build with FASTL off in CMake

---
 libkram/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index e56ae0f8..17c806a6 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -68,7 +68,7 @@ if (EASTL)
 endif()
 
 # replace parts of std/stdl with fastl
-set(COMPILE_EASTL 0)
+set(COMPILE_FASTL 0)
 if (FASTL)
     set(COMPILE_FASTL 1)
 endif()

From a110756db21a97a9f3605b7bf8311ad31742c376 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 14 Aug 2022 17:44:25 -0700
Subject: [PATCH 364/901] kram - more cmake fixes

Add dead-strip, and add back /FI for app projects
---
 libkram/CMakeLists.txt    | 18 +++++++++++++-----
 libkram/kram/KramConfig.h | 16 ++++++----------
 2 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index 17c806a6..f6a77594 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -229,8 +229,12 @@ if (BUILD_MAC)
         XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC YES
     )
 
+    target_compile_options(${myTargetLib} PRIVATE -W -Wall)
+
     # TODO: switch to pch setup (KramConfig.pch)
-    target_compile_options(${myTargetLib} PRIVATE -include KramConfig.h -W -Wall)
+    # this is already done by pch for libkram, but other projects need the force include inherited
+    # force include (public)
+    target_compile_options(${myTargetLib} PUBLIC -include KramConfig.h)
     
 elseif (BUILD_WIN)
     
@@ -241,10 +245,10 @@ elseif (BUILD_WIN)
     string(REGEX REPLACE "/GR" "/GR-" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
     string(REGEX REPLACE "/EHsc" "/EHs-c-" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
 
-    # this is already done by pch
+    # this is already done by pch for libkram, but other projects need the force include inherited
     # force include (public)
-    #target_compile_options(${myTargetLib} PUBLIC /FIKramConfig.h)
-        
+    target_compile_options(${myTargetLib} PUBLIC /FIKramConfig.h)
+       
     # all warnings, AVX, and multiprocess compiles
     target_compile_options(${myTargetLib} PRIVATE  /W3 /arch:AVX /MP)
     
@@ -253,7 +257,11 @@ elseif (BUILD_WIN)
         
 endif()
 
-# TODO: Missing dead-strip on Release builds for macOS/Win.  Needed to minimize app size.
+# turn on dead-code stripping in release.  Don't set this in debug.
+# does this make sense on lib, or just on apps ?
+if (CMAKE_BUILD_TYPE STREQUAL "RELEASE") 
+    target_compile_options(${myTargetLib} PUBLIC -dead_strip)
+endif()
 
 # This will be force include (-include, /FI) on GCC/clang/VS.
 # Can't seem to ref KramPrefix.pch file.  Goes into cmake_pch.hxx file
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index 508e892f..61a50788 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -204,6 +204,8 @@
 // these are all vector based
 #include "../fastl/falgorithm.h"
 #include "../fastl/vector.h"
+
+// These don't really work.  They are constantly shifting the key-value pairs on add/revmoe
 #include "../fastl/map.h"
 #include "../fastl/set.h"
 #include "../fastl/unordered_map.h"
@@ -212,21 +214,14 @@
 // still too many holes in this (rfind, insert, back, pop_back, find_last_of, substr)
 
 #include "../fastl/fstring.h"
+
+// This was to fallback on sso of basic_string
 //#include <string>
 //namespace NAMESPACE_STL
 //{
 //    using string = std::string;
 //}
 
-// what is causing string to instantiate?
-//namespace std
-//{
-//class basic_string
-//{
-//    int32_t b;
-//};
-//}
-
 // std - for missing functionality
 #include <array>
 #include <deque>
@@ -257,6 +252,8 @@ import std.filesystem;
 import std.regex;
 */
 
+#define NAMESPACE_STL std
+
 // all std
 #include <algorithm>  // for max
 #include <functional>
@@ -274,7 +271,6 @@ import std.regex;
 #include <unordered_map>
 #include <vector>
 
-#define NAMESPACE_STL std
 
 #endif
 

From 70369467216fad2e4b2ac501672370713dc68d65 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 18 Sep 2022 17:33:11 -0700
Subject: [PATCH 365/901] kram - add fmt support

This is a precursor to std::format, but has nice ways to bury the code implementation similar to how I handle vsprintf.  It pulls in enough inline functions and template mechanisms, that I have it split off into KramFmt.h.  There are similar log calls, and sprintf/append_sprintf calls there.

Still need to actually append to the existing string while writing instead of allocating another temp dynamic string.
---
 build2/kram.xcodeproj/project.pbxproj |  106 +
 libkram/CMakeLists.txt                |    4 +
 libkram/fmt/LICENSE.rst               |   27 +
 libkram/fmt/args.h                    |  234 ++
 libkram/fmt/chrono.h                  | 2069 ++++++++++++
 libkram/fmt/color.h                   |  651 ++++
 libkram/fmt/compile.h                 |  611 ++++
 libkram/fmt/core.h                    | 3338 +++++++++++++++++++
 libkram/fmt/fmt.cpp                   |  100 +
 libkram/fmt/format-inl.h              | 1754 ++++++++++
 libkram/fmt/format.cpp                |   44 +
 libkram/fmt/format.h                  | 4310 +++++++++++++++++++++++++
 libkram/fmt/os.cpp                    |  373 +++
 libkram/fmt/os.h                      |  478 +++
 libkram/fmt/ostream.h                 |  237 ++
 libkram/fmt/printf.h                  |  640 ++++
 libkram/fmt/ranges.h                  |  722 +++++
 libkram/fmt/std.h                     |  240 ++
 libkram/fmt/xchar.h                   |  248 ++
 libkram/kram/KramFmt.h                |   69 +
 libkram/kram/KramLog.cpp              |  107 +-
 libkram/kram/KramLog.h                |    5 +-
 22 files changed, 16346 insertions(+), 21 deletions(-)
 create mode 100644 libkram/fmt/LICENSE.rst
 create mode 100644 libkram/fmt/args.h
 create mode 100644 libkram/fmt/chrono.h
 create mode 100644 libkram/fmt/color.h
 create mode 100644 libkram/fmt/compile.h
 create mode 100644 libkram/fmt/core.h
 create mode 100644 libkram/fmt/fmt.cpp
 create mode 100644 libkram/fmt/format-inl.h
 create mode 100644 libkram/fmt/format.cpp
 create mode 100644 libkram/fmt/format.h
 create mode 100644 libkram/fmt/os.cpp
 create mode 100644 libkram/fmt/os.h
 create mode 100644 libkram/fmt/ostream.h
 create mode 100644 libkram/fmt/printf.h
 create mode 100644 libkram/fmt/ranges.h
 create mode 100644 libkram/fmt/std.h
 create mode 100644 libkram/fmt/xchar.h
 create mode 100644 libkram/kram/KramFmt.h

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index 3028966c..8f8efb1f 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -356,6 +356,38 @@
 		708A6AA12708CE4700BA5410 /* bc6h_definitions.h in Headers */ = {isa = PBXBuildFile; fileRef = 708A6A902708CE4700BA5410 /* bc6h_definitions.h */; };
 		708A6AA42708CE4700BA5410 /* bc6h_utils.h in Headers */ = {isa = PBXBuildFile; fileRef = 708A6A922708CE4700BA5410 /* bc6h_utils.h */; };
 		708A6AA52708CE4700BA5410 /* bc6h_utils.h in Headers */ = {isa = PBXBuildFile; fileRef = 708A6A922708CE4700BA5410 /* bc6h_utils.h */; };
+		709B8D2D28D7BCAD0081BD1F /* ostream.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D1C28D7BCAD0081BD1F /* ostream.h */; };
+		709B8D2E28D7BCAD0081BD1F /* ostream.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D1C28D7BCAD0081BD1F /* ostream.h */; };
+		709B8D2F28D7BCAD0081BD1F /* format-inl.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D1D28D7BCAD0081BD1F /* format-inl.h */; };
+		709B8D3028D7BCAD0081BD1F /* format-inl.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D1D28D7BCAD0081BD1F /* format-inl.h */; };
+		709B8D3128D7BCAD0081BD1F /* ranges.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D1E28D7BCAD0081BD1F /* ranges.h */; };
+		709B8D3228D7BCAD0081BD1F /* ranges.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D1E28D7BCAD0081BD1F /* ranges.h */; };
+		709B8D3328D7BCAD0081BD1F /* xchar.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D1F28D7BCAD0081BD1F /* xchar.h */; };
+		709B8D3428D7BCAD0081BD1F /* xchar.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D1F28D7BCAD0081BD1F /* xchar.h */; };
+		709B8D3528D7BCAD0081BD1F /* core.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2028D7BCAD0081BD1F /* core.h */; };
+		709B8D3628D7BCAD0081BD1F /* core.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2028D7BCAD0081BD1F /* core.h */; };
+		709B8D3728D7BCAD0081BD1F /* os.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 709B8D2128D7BCAD0081BD1F /* os.cpp */; };
+		709B8D3828D7BCAD0081BD1F /* os.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 709B8D2128D7BCAD0081BD1F /* os.cpp */; };
+		709B8D3928D7BCAD0081BD1F /* format.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 709B8D2228D7BCAD0081BD1F /* format.cpp */; };
+		709B8D3A28D7BCAD0081BD1F /* format.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 709B8D2228D7BCAD0081BD1F /* format.cpp */; };
+		709B8D3D28D7BCAD0081BD1F /* chrono.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2428D7BCAD0081BD1F /* chrono.h */; };
+		709B8D3E28D7BCAD0081BD1F /* chrono.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2428D7BCAD0081BD1F /* chrono.h */; };
+		709B8D3F28D7BCAD0081BD1F /* os.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2528D7BCAD0081BD1F /* os.h */; };
+		709B8D4028D7BCAD0081BD1F /* os.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2528D7BCAD0081BD1F /* os.h */; };
+		709B8D4128D7BCAD0081BD1F /* color.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2628D7BCAD0081BD1F /* color.h */; };
+		709B8D4228D7BCAD0081BD1F /* color.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2628D7BCAD0081BD1F /* color.h */; };
+		709B8D4328D7BCAD0081BD1F /* args.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2728D7BCAD0081BD1F /* args.h */; };
+		709B8D4428D7BCAD0081BD1F /* args.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2728D7BCAD0081BD1F /* args.h */; };
+		709B8D4528D7BCAD0081BD1F /* printf.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2828D7BCAD0081BD1F /* printf.h */; };
+		709B8D4628D7BCAD0081BD1F /* printf.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2828D7BCAD0081BD1F /* printf.h */; };
+		709B8D4728D7BCAD0081BD1F /* compile.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2928D7BCAD0081BD1F /* compile.h */; };
+		709B8D4828D7BCAD0081BD1F /* compile.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2928D7BCAD0081BD1F /* compile.h */; };
+		709B8D4928D7BCAD0081BD1F /* format.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2B28D7BCAD0081BD1F /* format.h */; };
+		709B8D4A28D7BCAD0081BD1F /* format.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2B28D7BCAD0081BD1F /* format.h */; };
+		709B8D4B28D7BCAD0081BD1F /* std.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2C28D7BCAD0081BD1F /* std.h */; };
+		709B8D4C28D7BCAD0081BD1F /* std.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2C28D7BCAD0081BD1F /* std.h */; };
+		709B8D4F28D7C15F0081BD1F /* KramFmt.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D4D28D7C15F0081BD1F /* KramFmt.h */; };
+		709B8D5028D7C15F0081BD1F /* KramFmt.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D4D28D7C15F0081BD1F /* KramFmt.h */; };
 		70A7BD3027092A1200DBCCF7 /* hdr_encode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70A7BD2E27092A1200DBCCF7 /* hdr_encode.cpp */; };
 		70A7BD3127092A1200DBCCF7 /* hdr_encode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70A7BD2E27092A1200DBCCF7 /* hdr_encode.cpp */; };
 		70A7BD3227092A1200DBCCF7 /* hdr_encode.h in Headers */ = {isa = PBXBuildFile; fileRef = 70A7BD2F27092A1200DBCCF7 /* hdr_encode.h */; };
@@ -689,6 +721,23 @@
 		708A6A8E2708CE4700BA5410 /* bc6h_encode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bc6h_encode.h; sourceTree = "<group>"; };
 		708A6A902708CE4700BA5410 /* bc6h_definitions.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bc6h_definitions.h; sourceTree = "<group>"; };
 		708A6A922708CE4700BA5410 /* bc6h_utils.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bc6h_utils.h; sourceTree = "<group>"; };
+		709B8D1C28D7BCAD0081BD1F /* ostream.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ostream.h; sourceTree = "<group>"; };
+		709B8D1D28D7BCAD0081BD1F /* format-inl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "format-inl.h"; sourceTree = "<group>"; };
+		709B8D1E28D7BCAD0081BD1F /* ranges.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ranges.h; sourceTree = "<group>"; };
+		709B8D1F28D7BCAD0081BD1F /* xchar.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = xchar.h; sourceTree = "<group>"; };
+		709B8D2028D7BCAD0081BD1F /* core.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = core.h; sourceTree = "<group>"; };
+		709B8D2128D7BCAD0081BD1F /* os.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = os.cpp; sourceTree = "<group>"; };
+		709B8D2228D7BCAD0081BD1F /* format.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = format.cpp; sourceTree = "<group>"; };
+		709B8D2428D7BCAD0081BD1F /* chrono.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chrono.h; sourceTree = "<group>"; };
+		709B8D2528D7BCAD0081BD1F /* os.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = os.h; sourceTree = "<group>"; };
+		709B8D2628D7BCAD0081BD1F /* color.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = color.h; sourceTree = "<group>"; };
+		709B8D2728D7BCAD0081BD1F /* args.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = args.h; sourceTree = "<group>"; };
+		709B8D2828D7BCAD0081BD1F /* printf.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = printf.h; sourceTree = "<group>"; };
+		709B8D2928D7BCAD0081BD1F /* compile.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compile.h; sourceTree = "<group>"; };
+		709B8D2A28D7BCAD0081BD1F /* LICENSE.rst */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = LICENSE.rst; sourceTree = "<group>"; };
+		709B8D2B28D7BCAD0081BD1F /* format.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = format.h; sourceTree = "<group>"; };
+		709B8D2C28D7BCAD0081BD1F /* std.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = std.h; sourceTree = "<group>"; };
+		709B8D4D28D7C15F0081BD1F /* KramFmt.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = KramFmt.h; sourceTree = "<group>"; };
 		70A7BD2E27092A1200DBCCF7 /* hdr_encode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = hdr_encode.cpp; sourceTree = "<group>"; };
 		70A7BD2F27092A1200DBCCF7 /* hdr_encode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = hdr_encode.h; sourceTree = "<group>"; };
 		70C6398C289FB234006E7422 /* KramPrefix.pch */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = KramPrefix.pch; sourceTree = "<group>"; };
@@ -755,6 +804,7 @@
 				708A6A882708CE4700BA5410 /* compressonator */,
 				706EFC3E26D3473F001C950E /* eastl */,
 				704738AF289F6AEE00C77A9F /* fastl */,
+				709B8D1B28D7BCAD0081BD1F /* fmt */,
 				706EEDA926D1583E001C950E /* etc2comp */,
 				706EEDC926D1583E001C950E /* bc7enc */,
 				706EEDD226D1583E001C950E /* astc-encoder */,
@@ -946,6 +996,7 @@
 				706EEE2326D1583F001C950E /* KramConfig.h */,
 				706EEE3126D1583F001C950E /* KramImageInfo.h */,
 				706EEE2526D1583F001C950E /* KramImageInfo.cpp */,
+				709B8D4D28D7C15F0081BD1F /* KramFmt.h */,
 				706EEE2726D1583F001C950E /* KramLib.h */,
 				706EEE2426D1583F001C950E /* KramLog.h */,
 				706EEE2826D1583F001C950E /* KramLog.cpp */,
@@ -1264,6 +1315,29 @@
 			path = bc6h;
 			sourceTree = "<group>";
 		};
+		709B8D1B28D7BCAD0081BD1F /* fmt */ = {
+			isa = PBXGroup;
+			children = (
+				709B8D1C28D7BCAD0081BD1F /* ostream.h */,
+				709B8D2B28D7BCAD0081BD1F /* format.h */,
+				709B8D1D28D7BCAD0081BD1F /* format-inl.h */,
+				709B8D2228D7BCAD0081BD1F /* format.cpp */,
+				709B8D1E28D7BCAD0081BD1F /* ranges.h */,
+				709B8D1F28D7BCAD0081BD1F /* xchar.h */,
+				709B8D2028D7BCAD0081BD1F /* core.h */,
+				709B8D2428D7BCAD0081BD1F /* chrono.h */,
+				709B8D2528D7BCAD0081BD1F /* os.h */,
+				709B8D2128D7BCAD0081BD1F /* os.cpp */,
+				709B8D2628D7BCAD0081BD1F /* color.h */,
+				709B8D2728D7BCAD0081BD1F /* args.h */,
+				709B8D2828D7BCAD0081BD1F /* printf.h */,
+				709B8D2928D7BCAD0081BD1F /* compile.h */,
+				709B8D2A28D7BCAD0081BD1F /* LICENSE.rst */,
+				709B8D2C28D7BCAD0081BD1F /* std.h */,
+			);
+			path = fmt;
+			sourceTree = "<group>";
+		};
 /* End PBXGroup section */
 
 /* Begin PBXHeadersBuildPhase section */
@@ -1274,6 +1348,7 @@
 				706EEFD126D15984001C950E /* EtcErrorMetric.h in Headers */,
 				706EEFD226D15984001C950E /* EtcColor.h in Headers */,
 				70C6398D289FB234006E7422 /* KramPrefix.pch in Headers */,
+				709B8D3D28D7BCAD0081BD1F /* chrono.h in Headers */,
 				706EEFD326D15984001C950E /* EtcDifferentialTrys.h in Headers */,
 				706EEFD426D15984001C950E /* EtcBlock4x4Encoding_RGB8.h in Headers */,
 				706EEFD526D15984001C950E /* EtcConfig.h in Headers */,
@@ -1290,7 +1365,9 @@
 				706EEFDD26D15984001C950E /* Etc.h in Headers */,
 				707789D72881BA81008A51BC /* bc7enc.h in Headers */,
 				706EEFDE26D15984001C950E /* EtcImage.h in Headers */,
+				709B8D4B28D7BCAD0081BD1F /* std.h in Headers */,
 				70CDB65027A1382700A546C1 /* KramDDSHelper.h in Headers */,
+				709B8D4328D7BCAD0081BD1F /* args.h in Headers */,
 				708A6A9C2708CE4700BA5410 /* bc6h_encode.h in Headers */,
 				706EEFDF26D15984001C950E /* EtcBlock4x4Encoding_ETC1.h in Headers */,
 				706EEFE026D15984001C950E /* EtcBlock4x4Encoding_RGBA8.h in Headers */,
@@ -1299,9 +1376,11 @@
 				706EEFF226D15984001C950E /* ateencoder.h in Headers */,
 				706EEFF326D15984001C950E /* basisu_transcoder.h in Headers */,
 				70A7BD3227092A1200DBCCF7 /* hdr_encode.h in Headers */,
+				709B8D4728D7BCAD0081BD1F /* compile.h in Headers */,
 				708A6AA02708CE4700BA5410 /* bc6h_definitions.h in Headers */,
 				706EEFF426D15984001C950E /* basisu_containers.h in Headers */,
 				70871DD527DDDBCD00D0B9E1 /* astcenc.h in Headers */,
+				709B8D4528D7BCAD0081BD1F /* printf.h in Headers */,
 				706EEFF526D15985001C950E /* basisu_containers_impl.h in Headers */,
 				707789EB2881BA81008A51BC /* utils.h in Headers */,
 				706EEFF626D15985001C950E /* basisu_transcoder_internal.h in Headers */,
@@ -1323,6 +1402,7 @@
 				706EF00026D15985001C950E /* KramSDFMipper.h in Headers */,
 				706EF00126D15985001C950E /* sse2neon.h in Headers */,
 				70871DF127DDDBCD00D0B9E1 /* astcenc_mathlib.h in Headers */,
+				709B8D3128D7BCAD0081BD1F /* ranges.h in Headers */,
 				706EF00226D15985001C950E /* KramConfig.h in Headers */,
 				706EF00326D15985001C950E /* KramLog.h in Headers */,
 				706EF00426D15985001C950E /* KramLib.h in Headers */,
@@ -1330,6 +1410,7 @@
 				706EF00626D15985001C950E /* KramImage.h in Headers */,
 				706EF00726D15985001C950E /* win_mmap.h in Headers */,
 				70871DDD27DDDBCD00D0B9E1 /* astcenc_vecmathlib_sse_4.h in Headers */,
+				709B8D4F28D7C15F0081BD1F /* KramFmt.h in Headers */,
 				707789D92881BA81008A51BC /* bc7decomp.h in Headers */,
 				706EF00826D15985001C950E /* Kram.h in Headers */,
 				704738C8289F6AEE00C77A9F /* vector.h in Headers */,
@@ -1344,13 +1425,16 @@
 				706EF00C26D15985001C950E /* KramMmapHelper.h in Headers */,
 				706EF00D26D15985001C950E /* float4a.h in Headers */,
 				706EF00E26D15985001C950E /* KramFileHelper.h in Headers */,
+				709B8D3F28D7BCAD0081BD1F /* os.h in Headers */,
 				706EF00F26D15985001C950E /* KramMipper.h in Headers */,
 				706EF01026D15985001C950E /* TaskSystem.h in Headers */,
 				706EF01126D15985001C950E /* squish.h in Headers */,
 				706EF01226D15985001C950E /* clusterfit.h in Headers */,
+				709B8D3528D7BCAD0081BD1F /* core.h in Headers */,
 				706EF01326D15985001C950E /* colourfit.h in Headers */,
 				70871DFD27DDDBCD00D0B9E1 /* astcenc_vecmathlib.h in Headers */,
 				706EF01426D15985001C950E /* alpha.h in Headers */,
+				709B8D4128D7BCAD0081BD1F /* color.h in Headers */,
 				708A6A982708CE4700BA5410 /* bc6h_decode.h in Headers */,
 				706EF01526D15985001C950E /* singlecolourfit.h in Headers */,
 				706EF01626D15985001C950E /* maths.h in Headers */,
@@ -1362,9 +1446,13 @@
 				706EF01926D15985001C950E /* rangefit.h in Headers */,
 				706EF01A26D15985001C950E /* zstd.h in Headers */,
 				70871DF327DDDBCD00D0B9E1 /* astcenc_internal.h in Headers */,
+				709B8D2F28D7BCAD0081BD1F /* format-inl.h in Headers */,
 				704738CC289F6AEE00C77A9F /* fstring.h in Headers */,
+				709B8D2D28D7BCAD0081BD1F /* ostream.h in Headers */,
 				706EF01B26D15985001C950E /* lodepng.h in Headers */,
+				709B8D4928D7BCAD0081BD1F /* format.h in Headers */,
 				706EF01C26D15985001C950E /* tmpfileplus.h in Headers */,
+				709B8D3328D7BCAD0081BD1F /* xchar.h in Headers */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@@ -1375,6 +1463,7 @@
 				706EF14B26D166C5001C950E /* EtcErrorMetric.h in Headers */,
 				706EF14C26D166C5001C950E /* EtcColor.h in Headers */,
 				70C6398E289FB234006E7422 /* KramPrefix.pch in Headers */,
+				709B8D3E28D7BCAD0081BD1F /* chrono.h in Headers */,
 				706EF14D26D166C5001C950E /* EtcDifferentialTrys.h in Headers */,
 				706EF14E26D166C5001C950E /* EtcBlock4x4Encoding_RGB8.h in Headers */,
 				706EF14F26D166C5001C950E /* EtcConfig.h in Headers */,
@@ -1391,7 +1480,9 @@
 				706EF15726D166C5001C950E /* Etc.h in Headers */,
 				707789D82881BA81008A51BC /* bc7enc.h in Headers */,
 				706EF15826D166C5001C950E /* EtcImage.h in Headers */,
+				709B8D4C28D7BCAD0081BD1F /* std.h in Headers */,
 				70CDB65127A1382700A546C1 /* KramDDSHelper.h in Headers */,
+				709B8D4428D7BCAD0081BD1F /* args.h in Headers */,
 				708A6A9D2708CE4700BA5410 /* bc6h_encode.h in Headers */,
 				706EF15926D166C5001C950E /* EtcBlock4x4Encoding_ETC1.h in Headers */,
 				706EF15A26D166C5001C950E /* EtcBlock4x4Encoding_RGBA8.h in Headers */,
@@ -1400,9 +1491,11 @@
 				706EF16C26D166C5001C950E /* ateencoder.h in Headers */,
 				706EF16D26D166C5001C950E /* basisu_transcoder.h in Headers */,
 				70A7BD3327092A1200DBCCF7 /* hdr_encode.h in Headers */,
+				709B8D4828D7BCAD0081BD1F /* compile.h in Headers */,
 				708A6AA12708CE4700BA5410 /* bc6h_definitions.h in Headers */,
 				706EF16E26D166C5001C950E /* basisu_containers.h in Headers */,
 				70871DD627DDDBCD00D0B9E1 /* astcenc.h in Headers */,
+				709B8D4628D7BCAD0081BD1F /* printf.h in Headers */,
 				706EF16F26D166C5001C950E /* basisu_containers_impl.h in Headers */,
 				707789EC2881BA81008A51BC /* utils.h in Headers */,
 				706EF17026D166C5001C950E /* basisu_transcoder_internal.h in Headers */,
@@ -1424,6 +1517,7 @@
 				706EF17A26D166C5001C950E /* KramSDFMipper.h in Headers */,
 				706EF17B26D166C5001C950E /* sse2neon.h in Headers */,
 				70871DF227DDDBCD00D0B9E1 /* astcenc_mathlib.h in Headers */,
+				709B8D3228D7BCAD0081BD1F /* ranges.h in Headers */,
 				706EF17C26D166C5001C950E /* KramConfig.h in Headers */,
 				706EF17D26D166C5001C950E /* KramLog.h in Headers */,
 				706EF17E26D166C5001C950E /* KramLib.h in Headers */,
@@ -1431,6 +1525,7 @@
 				706EF18026D166C5001C950E /* KramImage.h in Headers */,
 				706EF18126D166C5001C950E /* win_mmap.h in Headers */,
 				70871DDE27DDDBCD00D0B9E1 /* astcenc_vecmathlib_sse_4.h in Headers */,
+				709B8D5028D7C15F0081BD1F /* KramFmt.h in Headers */,
 				707789DA2881BA81008A51BC /* bc7decomp.h in Headers */,
 				706EF18226D166C5001C950E /* Kram.h in Headers */,
 				704738C9289F6AEE00C77A9F /* vector.h in Headers */,
@@ -1445,13 +1540,16 @@
 				706EF18626D166C5001C950E /* KramMmapHelper.h in Headers */,
 				706EF18726D166C5001C950E /* float4a.h in Headers */,
 				706EF18826D166C5001C950E /* KramFileHelper.h in Headers */,
+				709B8D4028D7BCAD0081BD1F /* os.h in Headers */,
 				706EF18926D166C5001C950E /* KramMipper.h in Headers */,
 				706EF18A26D166C5001C950E /* TaskSystem.h in Headers */,
 				706EF18B26D166C5001C950E /* squish.h in Headers */,
 				706EF18C26D166C5001C950E /* clusterfit.h in Headers */,
+				709B8D3628D7BCAD0081BD1F /* core.h in Headers */,
 				706EF18D26D166C5001C950E /* colourfit.h in Headers */,
 				70871DFE27DDDBCD00D0B9E1 /* astcenc_vecmathlib.h in Headers */,
 				706EF18E26D166C5001C950E /* alpha.h in Headers */,
+				709B8D4228D7BCAD0081BD1F /* color.h in Headers */,
 				708A6A992708CE4700BA5410 /* bc6h_decode.h in Headers */,
 				706EF18F26D166C5001C950E /* singlecolourfit.h in Headers */,
 				706EF19026D166C5001C950E /* maths.h in Headers */,
@@ -1463,9 +1561,13 @@
 				706EF19326D166C5001C950E /* rangefit.h in Headers */,
 				706EF19426D166C5001C950E /* zstd.h in Headers */,
 				70871DF427DDDBCD00D0B9E1 /* astcenc_internal.h in Headers */,
+				709B8D3028D7BCAD0081BD1F /* format-inl.h in Headers */,
 				704738CD289F6AEE00C77A9F /* fstring.h in Headers */,
+				709B8D2E28D7BCAD0081BD1F /* ostream.h in Headers */,
 				706EF19526D166C5001C950E /* lodepng.h in Headers */,
+				709B8D4A28D7BCAD0081BD1F /* format.h in Headers */,
 				706EF19626D166C5001C950E /* tmpfileplus.h in Headers */,
+				709B8D3428D7BCAD0081BD1F /* xchar.h in Headers */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@@ -1603,6 +1705,7 @@
 				706EEFB226D1595D001C950E /* KramLog.cpp in Sources */,
 				706EEFB326D1595D001C950E /* KramSDFMipper.cpp in Sources */,
 				706EEFB426D1595D001C950E /* KramMmapHelper.cpp in Sources */,
+				709B8D3928D7BCAD0081BD1F /* format.cpp in Sources */,
 				70871DCB27DDDBCD00D0B9E1 /* astcenc_image.cpp in Sources */,
 				706EEFB526D1595D001C950E /* float4a.cpp in Sources */,
 				706EFF7326D34740001C950E /* thread_support.cpp in Sources */,
@@ -1610,6 +1713,7 @@
 				706EEFB726D1595D001C950E /* squish.cpp in Sources */,
 				706EEFB826D1595D001C950E /* colourset.cpp in Sources */,
 				70871DD327DDDBCD00D0B9E1 /* astcenc_partition_tables.cpp in Sources */,
+				709B8D3728D7BCAD0081BD1F /* os.cpp in Sources */,
 				706EFF8126D34740001C950E /* hashtable.cpp in Sources */,
 				70871DEF27DDDBCD00D0B9E1 /* astcenc_weight_align.cpp in Sources */,
 				70871DD927DDDBCD00D0B9E1 /* astcenc_compute_variance.cpp in Sources */,
@@ -1641,6 +1745,7 @@
 				70871E0427DDDBCD00D0B9E1 /* astcenc_color_unquantize.cpp in Sources */,
 				70871DD227DDDBCD00D0B9E1 /* astcenc_averages_and_directions.cpp in Sources */,
 				70871DE027DDDBCD00D0B9E1 /* astcenc_mathlib_softfloat.cpp in Sources */,
+				709B8D3828D7BCAD0081BD1F /* os.cpp in Sources */,
 				706EFC2426D1C39B001C950E /* ateencoder.mm in Sources */,
 				707789EE2881BA81008A51BC /* bc7decomp_ref.cpp in Sources */,
 				706EF19826D166C5001C950E /* EtcBlock4x4Encoding_RGB8.cpp in Sources */,
@@ -1720,6 +1825,7 @@
 				706EF1DA26D166C5001C950E /* lodepng.cpp in Sources */,
 				707789E22881BA81008A51BC /* utils.cpp in Sources */,
 				706EF1DB26D166C5001C950E /* tmpfileplus.cpp in Sources */,
+				709B8D3A28D7BCAD0081BD1F /* format.cpp in Sources */,
 				70871E0227DDDBCD00D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index f6a77594..52b2b1ea 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -117,6 +117,9 @@ file(GLOB_RECURSE libSources CONFIGURE_DEPENDS
     "${SOURCE_DIR}/etc2comp/*.cpp"
     "${SOURCE_DIR}/etc2comp/*.h"
 
+    "${SOURCE_DIR}/fmt/*.cpp"
+    "${SOURCE_DIR}/fmt/*.h"
+
     "${SOURCE_DIR}/heman/hedistance.cpp"
     "${SOURCE_DIR}/heman/hedistance.h"
 
@@ -199,6 +202,7 @@ target_include_directories(${myTargetLib} PRIVATE
     "${SOURCE_DIR}/bc7enc/"
     "${SOURCE_DIR}/compressonator/bc6h/"
     "${SOURCE_DIR}/etc2comp/"
+    "${SOURCE_DIR}/fmt/"
     "${SOURCE_DIR}/heman/"
     "${SOURCE_DIR}/lodepng"
     "${SOURCE_DIR}/miniz/"
diff --git a/libkram/fmt/LICENSE.rst b/libkram/fmt/LICENSE.rst
new file mode 100644
index 00000000..f0ec3db4
--- /dev/null
+++ b/libkram/fmt/LICENSE.rst
@@ -0,0 +1,27 @@
+Copyright (c) 2012 - present, Victor Zverovich
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+--- Optional exception to the license ---
+
+As an exception, if, as a result of your compiling your source code, portions
+of this Software are embedded into a machine-executable object form of such
+source code, you may redistribute such embedded portions in such object form
+without including the above copyright and permission notices.
diff --git a/libkram/fmt/args.h b/libkram/fmt/args.h
new file mode 100644
index 00000000..a3966d14
--- /dev/null
+++ b/libkram/fmt/args.h
@@ -0,0 +1,234 @@
+// Formatting library for C++ - dynamic format arguments
+//
+// Copyright (c) 2012 - present, Victor Zverovich
+// All rights reserved.
+//
+// For the license information refer to format.h.
+
+#ifndef FMT_ARGS_H_
+#define FMT_ARGS_H_
+
+#include <functional>  // std::reference_wrapper
+#include <memory>      // std::unique_ptr
+#include <vector>
+
+#include "core.h"
+
+FMT_BEGIN_NAMESPACE
+
+namespace detail {
+
+template <typename T> struct is_reference_wrapper : std::false_type {};
+template <typename T>
+struct is_reference_wrapper<std::reference_wrapper<T>> : std::true_type {};
+
+template <typename T> const T& unwrap(const T& v) { return v; }
+template <typename T> const T& unwrap(const std::reference_wrapper<T>& v) {
+  return static_cast<const T&>(v);
+}
+
+class dynamic_arg_list {
+  // Workaround for clang's -Wweak-vtables. Unlike for regular classes, for
+  // templates it doesn't complain about inability to deduce single translation
+  // unit for placing vtable. So storage_node_base is made a fake template.
+  template <typename = void> struct node {
+    virtual ~node() = default;
+    std::unique_ptr<node<>> next;
+  };
+
+  template <typename T> struct typed_node : node<> {
+    T value;
+
+    template <typename Arg>
+    FMT_CONSTEXPR typed_node(const Arg& arg) : value(arg) {}
+
+    template <typename Char>
+    FMT_CONSTEXPR typed_node(const basic_string_view<Char>& arg)
+        : value(arg.data(), arg.size()) {}
+  };
+
+  std::unique_ptr<node<>> head_;
+
+ public:
+  template <typename T, typename Arg> const T& push(const Arg& arg) {
+    auto new_node = std::unique_ptr<typed_node<T>>(new typed_node<T>(arg));
+    auto& value = new_node->value;
+    new_node->next = std::move(head_);
+    head_ = std::move(new_node);
+    return value;
+  }
+};
+}  // namespace detail
+
+/**
+  \rst
+  A dynamic version of `fmt::format_arg_store`.
+  It's equipped with a storage to potentially temporary objects which lifetimes
+  could be shorter than the format arguments object.
+
+  It can be implicitly converted into `~fmt::basic_format_args` for passing
+  into type-erased formatting functions such as `~fmt::vformat`.
+  \endrst
+ */
+template <typename Context>
+class dynamic_format_arg_store
+#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409
+    // Workaround a GCC template argument substitution bug.
+    : public basic_format_args<Context>
+#endif
+{
+ private:
+  using char_type = typename Context::char_type;
+
+  template <typename T> struct need_copy {
+    static constexpr detail::type mapped_type =
+        detail::mapped_type_constant<T, Context>::value;
+
+    enum {
+      value = !(detail::is_reference_wrapper<T>::value ||
+                std::is_same<T, basic_string_view<char_type>>::value ||
+                std::is_same<T, detail::std_string_view<char_type>>::value ||
+                (mapped_type != detail::type::cstring_type &&
+                 mapped_type != detail::type::string_type &&
+                 mapped_type != detail::type::custom_type))
+    };
+  };
+
+  template <typename T>
+  using stored_type = conditional_t<
+      std::is_convertible<T, std::basic_string<char_type>>::value &&
+          !detail::is_reference_wrapper<T>::value,
+      std::basic_string<char_type>, T>;
+
+  // Storage of basic_format_arg must be contiguous.
+  std::vector<basic_format_arg<Context>> data_;
+  std::vector<detail::named_arg_info<char_type>> named_info_;
+
+  // Storage of arguments not fitting into basic_format_arg must grow
+  // without relocation because items in data_ refer to it.
+  detail::dynamic_arg_list dynamic_args_;
+
+  friend class basic_format_args<Context>;
+
+  unsigned long long get_types() const {
+    return detail::is_unpacked_bit | data_.size() |
+           (named_info_.empty()
+                ? 0ULL
+                : static_cast<unsigned long long>(detail::has_named_args_bit));
+  }
+
+  const basic_format_arg<Context>* data() const {
+    return named_info_.empty() ? data_.data() : data_.data() + 1;
+  }
+
+  template <typename T> void emplace_arg(const T& arg) {
+    data_.emplace_back(detail::make_arg<Context>(arg));
+  }
+
+  template <typename T>
+  void emplace_arg(const detail::named_arg<char_type, T>& arg) {
+    if (named_info_.empty()) {
+      constexpr const detail::named_arg_info<char_type>* zero_ptr{nullptr};
+      data_.insert(data_.begin(), {zero_ptr, 0});
+    }
+    data_.emplace_back(detail::make_arg<Context>(detail::unwrap(arg.value)));
+    auto pop_one = [](std::vector<basic_format_arg<Context>>* data) {
+      data->pop_back();
+    };
+    std::unique_ptr<std::vector<basic_format_arg<Context>>, decltype(pop_one)>
+        guard{&data_, pop_one};
+    named_info_.push_back({arg.name, static_cast<int>(data_.size() - 2u)});
+    data_[0].value_.named_args = {named_info_.data(), named_info_.size()};
+    guard.release();
+  }
+
+ public:
+  constexpr dynamic_format_arg_store() = default;
+
+  /**
+    \rst
+    Adds an argument into the dynamic store for later passing to a formatting
+    function.
+
+    Note that custom types and string types (but not string views) are copied
+    into the store dynamically allocating memory if necessary.
+
+    **Example**::
+
+      fmt::dynamic_format_arg_store<fmt::format_context> store;
+      store.push_back(42);
+      store.push_back("abc");
+      store.push_back(1.5f);
+      std::string result = fmt::vformat("{} and {} and {}", store);
+    \endrst
+  */
+  template <typename T> void push_back(const T& arg) {
+    if (detail::const_check(need_copy<T>::value))
+      emplace_arg(dynamic_args_.push<stored_type<T>>(arg));
+    else
+      emplace_arg(detail::unwrap(arg));
+  }
+
+  /**
+    \rst
+    Adds a reference to the argument into the dynamic store for later passing to
+    a formatting function.
+
+    **Example**::
+
+      fmt::dynamic_format_arg_store<fmt::format_context> store;
+      char band[] = "Rolling Stones";
+      store.push_back(std::cref(band));
+      band[9] = 'c'; // Changing str affects the output.
+      std::string result = fmt::vformat("{}", store);
+      // result == "Rolling Scones"
+    \endrst
+  */
+  template <typename T> void push_back(std::reference_wrapper<T> arg) {
+    static_assert(
+        need_copy<T>::value,
+        "objects of built-in types and string views are always copied");
+    emplace_arg(arg.get());
+  }
+
+  /**
+    Adds named argument into the dynamic store for later passing to a formatting
+    function. ``std::reference_wrapper`` is supported to avoid copying of the
+    argument. The name is always copied into the store.
+  */
+  template <typename T>
+  void push_back(const detail::named_arg<char_type, T>& arg) {
+    const char_type* arg_name =
+        dynamic_args_.push<std::basic_string<char_type>>(arg.name).c_str();
+    if (detail::const_check(need_copy<T>::value)) {
+      emplace_arg(
+          fmt::arg(arg_name, dynamic_args_.push<stored_type<T>>(arg.value)));
+    } else {
+      emplace_arg(fmt::arg(arg_name, arg.value));
+    }
+  }
+
+  /** Erase all elements from the store */
+  void clear() {
+    data_.clear();
+    named_info_.clear();
+    dynamic_args_ = detail::dynamic_arg_list();
+  }
+
+  /**
+    \rst
+    Reserves space to store at least *new_cap* arguments including
+    *new_cap_named* named arguments.
+    \endrst
+  */
+  void reserve(size_t new_cap, size_t new_cap_named) {
+    FMT_ASSERT(new_cap >= new_cap_named,
+               "Set of arguments includes set of named arguments");
+    data_.reserve(new_cap);
+    named_info_.reserve(new_cap_named);
+  }
+};
+
+FMT_END_NAMESPACE
+
+#endif  // FMT_ARGS_H_
diff --git a/libkram/fmt/chrono.h b/libkram/fmt/chrono.h
new file mode 100644
index 00000000..ed7f5f16
--- /dev/null
+++ b/libkram/fmt/chrono.h
@@ -0,0 +1,2069 @@
+// Formatting library for C++ - chrono support
+//
+// Copyright (c) 2012 - present, Victor Zverovich
+// All rights reserved.
+//
+// For the license information refer to format.h.
+
+#ifndef FMT_CHRONO_H_
+#define FMT_CHRONO_H_
+
+#include <algorithm>
+#include <chrono>
+#include <cmath>    // std::isfinite
+#include <cstring>  // std::memcpy
+#include <ctime>
+#include <iterator>
+#include <locale>
+#include <ostream>
+#include <type_traits>
+
+#include "format.h"
+
+FMT_BEGIN_NAMESPACE
+
+// Enable tzset.
+#ifndef FMT_USE_TZSET
+// UWP doesn't provide _tzset.
+#  if FMT_HAS_INCLUDE("winapifamily.h")
+#    include <winapifamily.h>
+#  endif
+#  if defined(_WIN32) && (!defined(WINAPI_FAMILY) || \
+                          (WINAPI_FAMILY == WINAPI_FAMILY_DESKTOP_APP))
+#    define FMT_USE_TZSET 1
+#  else
+#    define FMT_USE_TZSET 0
+#  endif
+#endif
+
+// Enable safe chrono durations, unless explicitly disabled.
+#ifndef FMT_SAFE_DURATION_CAST
+#  define FMT_SAFE_DURATION_CAST 1
+#endif
+#if FMT_SAFE_DURATION_CAST
+
+// For conversion between std::chrono::durations without undefined
+// behaviour or erroneous results.
+// This is a stripped down version of duration_cast, for inclusion in fmt.
+// See https://github.com/pauldreik/safe_duration_cast
+//
+// Copyright Paul Dreik 2019
+namespace safe_duration_cast {
+
+template <typename To, typename From,
+          FMT_ENABLE_IF(!std::is_same<From, To>::value &&
+                        std::numeric_limits<From>::is_signed ==
+                            std::numeric_limits<To>::is_signed)>
+FMT_CONSTEXPR To lossless_integral_conversion(const From from, int& ec) {
+  ec = 0;
+  using F = std::numeric_limits<From>;
+  using T = std::numeric_limits<To>;
+  static_assert(F::is_integer, "From must be integral");
+  static_assert(T::is_integer, "To must be integral");
+
+  // A and B are both signed, or both unsigned.
+  if (detail::const_check(F::digits <= T::digits)) {
+    // From fits in To without any problem.
+  } else {
+    // From does not always fit in To, resort to a dynamic check.
+    if (from < (T::min)() || from > (T::max)()) {
+      // outside range.
+      ec = 1;
+      return {};
+    }
+  }
+  return static_cast<To>(from);
+}
+
+/**
+ * converts From to To, without loss. If the dynamic value of from
+ * can't be converted to To without loss, ec is set.
+ */
+template <typename To, typename From,
+          FMT_ENABLE_IF(!std::is_same<From, To>::value &&
+                        std::numeric_limits<From>::is_signed !=
+                            std::numeric_limits<To>::is_signed)>
+FMT_CONSTEXPR To lossless_integral_conversion(const From from, int& ec) {
+  ec = 0;
+  using F = std::numeric_limits<From>;
+  using T = std::numeric_limits<To>;
+  static_assert(F::is_integer, "From must be integral");
+  static_assert(T::is_integer, "To must be integral");
+
+  if (detail::const_check(F::is_signed && !T::is_signed)) {
+    // From may be negative, not allowed!
+    if (fmt::detail::is_negative(from)) {
+      ec = 1;
+      return {};
+    }
+    // From is positive. Can it always fit in To?
+    if (detail::const_check(F::digits > T::digits) &&
+        from > static_cast<From>(detail::max_value<To>())) {
+      ec = 1;
+      return {};
+    }
+  }
+
+  if (detail::const_check(!F::is_signed && T::is_signed &&
+                          F::digits >= T::digits) &&
+      from > static_cast<From>(detail::max_value<To>())) {
+    ec = 1;
+    return {};
+  }
+  return static_cast<To>(from);  // Lossless conversion.
+}
+
+template <typename To, typename From,
+          FMT_ENABLE_IF(std::is_same<From, To>::value)>
+FMT_CONSTEXPR To lossless_integral_conversion(const From from, int& ec) {
+  ec = 0;
+  return from;
+}  // function
+
+// clang-format off
+/**
+ * converts From to To if possible, otherwise ec is set.
+ *
+ * input                            |    output
+ * ---------------------------------|---------------
+ * NaN                              | NaN
+ * Inf                              | Inf
+ * normal, fits in output           | converted (possibly lossy)
+ * normal, does not fit in output   | ec is set
+ * subnormal                        | best effort
+ * -Inf                             | -Inf
+ */
+// clang-format on
+template <typename To, typename From,
+          FMT_ENABLE_IF(!std::is_same<From, To>::value)>
+FMT_CONSTEXPR To safe_float_conversion(const From from, int& ec) {
+  ec = 0;
+  using T = std::numeric_limits<To>;
+  static_assert(std::is_floating_point<From>::value, "From must be floating");
+  static_assert(std::is_floating_point<To>::value, "To must be floating");
+
+  // catch the only happy case
+  if (std::isfinite(from)) {
+    if (from >= T::lowest() && from <= (T::max)()) {
+      return static_cast<To>(from);
+    }
+    // not within range.
+    ec = 1;
+    return {};
+  }
+
+  // nan and inf will be preserved
+  return static_cast<To>(from);
+}  // function
+
+template <typename To, typename From,
+          FMT_ENABLE_IF(std::is_same<From, To>::value)>
+FMT_CONSTEXPR To safe_float_conversion(const From from, int& ec) {
+  ec = 0;
+  static_assert(std::is_floating_point<From>::value, "From must be floating");
+  return from;
+}
+
+/**
+ * safe duration cast between integral durations
+ */
+template <typename To, typename FromRep, typename FromPeriod,
+          FMT_ENABLE_IF(std::is_integral<FromRep>::value),
+          FMT_ENABLE_IF(std::is_integral<typename To::rep>::value)>
+To safe_duration_cast(std::chrono::duration<FromRep, FromPeriod> from,
+                      int& ec) {
+  using From = std::chrono::duration<FromRep, FromPeriod>;
+  ec = 0;
+  // the basic idea is that we need to convert from count() in the from type
+  // to count() in the To type, by multiplying it with this:
+  struct Factor
+      : std::ratio_divide<typename From::period, typename To::period> {};
+
+  static_assert(Factor::num > 0, "num must be positive");
+  static_assert(Factor::den > 0, "den must be positive");
+
+  // the conversion is like this: multiply from.count() with Factor::num
+  // /Factor::den and convert it to To::rep, all this without
+  // overflow/underflow. let's start by finding a suitable type that can hold
+  // both To, From and Factor::num
+  using IntermediateRep =
+      typename std::common_type<typename From::rep, typename To::rep,
+                                decltype(Factor::num)>::type;
+
+  // safe conversion to IntermediateRep
+  IntermediateRep count =
+      lossless_integral_conversion<IntermediateRep>(from.count(), ec);
+  if (ec) return {};
+  // multiply with Factor::num without overflow or underflow
+  if (detail::const_check(Factor::num != 1)) {
+    const auto max1 = detail::max_value<IntermediateRep>() / Factor::num;
+    if (count > max1) {
+      ec = 1;
+      return {};
+    }
+    const auto min1 =
+        (std::numeric_limits<IntermediateRep>::min)() / Factor::num;
+    if (!std::is_unsigned<IntermediateRep>::value && count < min1) {
+      ec = 1;
+      return {};
+    }
+    count *= Factor::num;
+  }
+
+  if (detail::const_check(Factor::den != 1)) count /= Factor::den;
+  auto tocount = lossless_integral_conversion<typename To::rep>(count, ec);
+  return ec ? To() : To(tocount);
+}
+
+/**
+ * safe duration_cast between floating point durations
+ */
+template <typename To, typename FromRep, typename FromPeriod,
+          FMT_ENABLE_IF(std::is_floating_point<FromRep>::value),
+          FMT_ENABLE_IF(std::is_floating_point<typename To::rep>::value)>
+To safe_duration_cast(std::chrono::duration<FromRep, FromPeriod> from,
+                      int& ec) {
+  using From = std::chrono::duration<FromRep, FromPeriod>;
+  ec = 0;
+  if (std::isnan(from.count())) {
+    // nan in, gives nan out. easy.
+    return To{std::numeric_limits<typename To::rep>::quiet_NaN()};
+  }
+  // maybe we should also check if from is denormal, and decide what to do about
+  // it.
+
+  // +-inf should be preserved.
+  if (std::isinf(from.count())) {
+    return To{from.count()};
+  }
+
+  // the basic idea is that we need to convert from count() in the from type
+  // to count() in the To type, by multiplying it with this:
+  struct Factor
+      : std::ratio_divide<typename From::period, typename To::period> {};
+
+  static_assert(Factor::num > 0, "num must be positive");
+  static_assert(Factor::den > 0, "den must be positive");
+
+  // the conversion is like this: multiply from.count() with Factor::num
+  // /Factor::den and convert it to To::rep, all this without
+  // overflow/underflow. let's start by finding a suitable type that can hold
+  // both To, From and Factor::num
+  using IntermediateRep =
+      typename std::common_type<typename From::rep, typename To::rep,
+                                decltype(Factor::num)>::type;
+
+  // force conversion of From::rep -> IntermediateRep to be safe,
+  // even if it will never happen be narrowing in this context.
+  IntermediateRep count =
+      safe_float_conversion<IntermediateRep>(from.count(), ec);
+  if (ec) {
+    return {};
+  }
+
+  // multiply with Factor::num without overflow or underflow
+  if (detail::const_check(Factor::num != 1)) {
+    constexpr auto max1 = detail::max_value<IntermediateRep>() /
+                          static_cast<IntermediateRep>(Factor::num);
+    if (count > max1) {
+      ec = 1;
+      return {};
+    }
+    constexpr auto min1 = std::numeric_limits<IntermediateRep>::lowest() /
+                          static_cast<IntermediateRep>(Factor::num);
+    if (count < min1) {
+      ec = 1;
+      return {};
+    }
+    count *= static_cast<IntermediateRep>(Factor::num);
+  }
+
+  // this can't go wrong, right? den>0 is checked earlier.
+  if (detail::const_check(Factor::den != 1)) {
+    using common_t = typename std::common_type<IntermediateRep, intmax_t>::type;
+    count /= static_cast<common_t>(Factor::den);
+  }
+
+  // convert to the to type, safely
+  using ToRep = typename To::rep;
+
+  const ToRep tocount = safe_float_conversion<ToRep>(count, ec);
+  if (ec) {
+    return {};
+  }
+  return To{tocount};
+}
+}  // namespace safe_duration_cast
+#endif
+
+// Prevents expansion of a preceding token as a function-style macro.
+// Usage: f FMT_NOMACRO()
+#define FMT_NOMACRO
+
+namespace detail {
+template <typename T = void> struct null {};
+inline null<> localtime_r FMT_NOMACRO(...) { return null<>(); }
+inline null<> localtime_s(...) { return null<>(); }
+inline null<> gmtime_r(...) { return null<>(); }
+inline null<> gmtime_s(...) { return null<>(); }
+
+inline const std::locale& get_classic_locale() {
+  static const auto& locale = std::locale::classic();
+  return locale;
+}
+
+template <typename CodeUnit> struct codecvt_result {
+  static constexpr const size_t max_size = 32;
+  CodeUnit buf[max_size];
+  CodeUnit* end;
+};
+template <typename CodeUnit>
+constexpr const size_t codecvt_result<CodeUnit>::max_size;
+
+template <typename CodeUnit>
+void write_codecvt(codecvt_result<CodeUnit>& out, string_view in_buf,
+                   const std::locale& loc) {
+#if FMT_CLANG_VERSION
+#  pragma clang diagnostic push
+#  pragma clang diagnostic ignored "-Wdeprecated"
+  auto& f = std::use_facet<std::codecvt<CodeUnit, char, std::mbstate_t>>(loc);
+#  pragma clang diagnostic pop
+#else
+  auto& f = std::use_facet<std::codecvt<CodeUnit, char, std::mbstate_t>>(loc);
+#endif
+  auto mb = std::mbstate_t();
+  const char* from_next = nullptr;
+  auto result = f.in(mb, in_buf.begin(), in_buf.end(), from_next,
+                     std::begin(out.buf), std::end(out.buf), out.end);
+  if (result != std::codecvt_base::ok)
+    FMT_THROW(format_error("failed to format time"));
+}
+
+template <typename OutputIt>
+auto write_encoded_tm_str(OutputIt out, string_view in, const std::locale& loc)
+    -> OutputIt {
+  if (detail::is_utf8() && loc != get_classic_locale()) {
+    // char16_t and char32_t codecvts are broken in MSVC (linkage errors) and
+    // gcc-4.
+#if FMT_MSC_VERSION != 0 || \
+    (defined(__GLIBCXX__) && !defined(_GLIBCXX_USE_DUAL_ABI))
+    // The _GLIBCXX_USE_DUAL_ABI macro is always defined in libstdc++ from gcc-5
+    // and newer.
+    using code_unit = wchar_t;
+#else
+    using code_unit = char32_t;
+#endif
+
+    using unit_t = codecvt_result<code_unit>;
+    unit_t unit;
+    write_codecvt(unit, in, loc);
+    // In UTF-8 is used one to four one-byte code units.
+    auto&& buf = basic_memory_buffer<char, unit_t::max_size * 4>();
+    for (code_unit* p = unit.buf; p != unit.end; ++p) {
+      uint32_t c = static_cast<uint32_t>(*p);
+      if (sizeof(code_unit) == 2 && c >= 0xd800 && c <= 0xdfff) {
+        // surrogate pair
+        ++p;
+        if (p == unit.end || (c & 0xfc00) != 0xd800 ||
+            (*p & 0xfc00) != 0xdc00) {
+          FMT_THROW(format_error("failed to format time"));
+        }
+        c = (c << 10) + static_cast<uint32_t>(*p) - 0x35fdc00;
+      }
+      if (c < 0x80) {
+        buf.push_back(static_cast<char>(c));
+      } else if (c < 0x800) {
+        buf.push_back(static_cast<char>(0xc0 | (c >> 6)));
+        buf.push_back(static_cast<char>(0x80 | (c & 0x3f)));
+      } else if ((c >= 0x800 && c <= 0xd7ff) || (c >= 0xe000 && c <= 0xffff)) {
+        buf.push_back(static_cast<char>(0xe0 | (c >> 12)));
+        buf.push_back(static_cast<char>(0x80 | ((c & 0xfff) >> 6)));
+        buf.push_back(static_cast<char>(0x80 | (c & 0x3f)));
+      } else if (c >= 0x10000 && c <= 0x10ffff) {
+        buf.push_back(static_cast<char>(0xf0 | (c >> 18)));
+        buf.push_back(static_cast<char>(0x80 | ((c & 0x3ffff) >> 12)));
+        buf.push_back(static_cast<char>(0x80 | ((c & 0xfff) >> 6)));
+        buf.push_back(static_cast<char>(0x80 | (c & 0x3f)));
+      } else {
+        FMT_THROW(format_error("failed to format time"));
+      }
+    }
+    return copy_str<char>(buf.data(), buf.data() + buf.size(), out);
+  }
+  return copy_str<char>(in.data(), in.data() + in.size(), out);
+}
+
+template <typename Char, typename OutputIt,
+          FMT_ENABLE_IF(!std::is_same<Char, char>::value)>
+auto write_tm_str(OutputIt out, string_view sv, const std::locale& loc)
+    -> OutputIt {
+  codecvt_result<Char> unit;
+  write_codecvt(unit, sv, loc);
+  return copy_str<Char>(unit.buf, unit.end, out);
+}
+
+template <typename Char, typename OutputIt,
+          FMT_ENABLE_IF(std::is_same<Char, char>::value)>
+auto write_tm_str(OutputIt out, string_view sv, const std::locale& loc)
+    -> OutputIt {
+  return write_encoded_tm_str(out, sv, loc);
+}
+
+template <typename Char>
+inline void do_write(buffer<Char>& buf, const std::tm& time,
+                     const std::locale& loc, char format, char modifier) {
+  auto&& format_buf = formatbuf<std::basic_streambuf<Char>>(buf);
+  auto&& os = std::basic_ostream<Char>(&format_buf);
+  os.imbue(loc);
+  using iterator = std::ostreambuf_iterator<Char>;
+  const auto& facet = std::use_facet<std::time_put<Char, iterator>>(loc);
+  auto end = facet.put(os, os, Char(' '), &time, format, modifier);
+  if (end.failed()) FMT_THROW(format_error("failed to format time"));
+}
+
+template <typename Char, typename OutputIt,
+          FMT_ENABLE_IF(!std::is_same<Char, char>::value)>
+auto write(OutputIt out, const std::tm& time, const std::locale& loc,
+           char format, char modifier = 0) -> OutputIt {
+  auto&& buf = get_buffer<Char>(out);
+  do_write<Char>(buf, time, loc, format, modifier);
+  return get_iterator(buf, out);
+}
+
+template <typename Char, typename OutputIt,
+          FMT_ENABLE_IF(std::is_same<Char, char>::value)>
+auto write(OutputIt out, const std::tm& time, const std::locale& loc,
+           char format, char modifier = 0) -> OutputIt {
+  auto&& buf = basic_memory_buffer<Char>();
+  do_write<char>(buf, time, loc, format, modifier);
+  return write_encoded_tm_str(out, string_view(buf.data(), buf.size()), loc);
+}
+
+}  // namespace detail
+
+FMT_MODULE_EXPORT_BEGIN
+
+/**
+  Converts given time since epoch as ``std::time_t`` value into calendar time,
+  expressed in local time. Unlike ``std::localtime``, this function is
+  thread-safe on most platforms.
+ */
+inline std::tm localtime(std::time_t time) {
+  struct dispatcher {
+    std::time_t time_;
+    std::tm tm_;
+
+    dispatcher(std::time_t t) : time_(t) {}
+
+    bool run() {
+      using namespace fmt::detail;
+      return handle(localtime_r(&time_, &tm_));
+    }
+
+    bool handle(std::tm* tm) { return tm != nullptr; }
+
+    bool handle(detail::null<>) {
+      using namespace fmt::detail;
+      return fallback(localtime_s(&tm_, &time_));
+    }
+
+    bool fallback(int res) { return res == 0; }
+
+#if !FMT_MSC_VERSION
+    bool fallback(detail::null<>) {
+      using namespace fmt::detail;
+      std::tm* tm = std::localtime(&time_);
+      if (tm) tm_ = *tm;
+      return tm != nullptr;
+    }
+#endif
+  };
+  dispatcher lt(time);
+  // Too big time values may be unsupported.
+  if (!lt.run()) FMT_THROW(format_error("time_t value out of range"));
+  return lt.tm_;
+}
+
+inline std::tm localtime(
+    std::chrono::time_point<std::chrono::system_clock> time_point) {
+  return localtime(std::chrono::system_clock::to_time_t(time_point));
+}
+
+/**
+  Converts given time since epoch as ``std::time_t`` value into calendar time,
+  expressed in Coordinated Universal Time (UTC). Unlike ``std::gmtime``, this
+  function is thread-safe on most platforms.
+ */
+inline std::tm gmtime(std::time_t time) {
+  struct dispatcher {
+    std::time_t time_;
+    std::tm tm_;
+
+    dispatcher(std::time_t t) : time_(t) {}
+
+    bool run() {
+      using namespace fmt::detail;
+      return handle(gmtime_r(&time_, &tm_));
+    }
+
+    bool handle(std::tm* tm) { return tm != nullptr; }
+
+    bool handle(detail::null<>) {
+      using namespace fmt::detail;
+      return fallback(gmtime_s(&tm_, &time_));
+    }
+
+    bool fallback(int res) { return res == 0; }
+
+#if !FMT_MSC_VERSION
+    bool fallback(detail::null<>) {
+      std::tm* tm = std::gmtime(&time_);
+      if (tm) tm_ = *tm;
+      return tm != nullptr;
+    }
+#endif
+  };
+  dispatcher gt(time);
+  // Too big time values may be unsupported.
+  if (!gt.run()) FMT_THROW(format_error("time_t value out of range"));
+  return gt.tm_;
+}
+
+inline std::tm gmtime(
+    std::chrono::time_point<std::chrono::system_clock> time_point) {
+  return gmtime(std::chrono::system_clock::to_time_t(time_point));
+}
+
+FMT_BEGIN_DETAIL_NAMESPACE
+
+// Writes two-digit numbers a, b and c separated by sep to buf.
+// The method by Pavel Novikov based on
+// https://johnnylee-sde.github.io/Fast-unsigned-integer-to-time-string/.
+inline void write_digit2_separated(char* buf, unsigned a, unsigned b,
+                                   unsigned c, char sep) {
+  unsigned long long digits =
+      a | (b << 24) | (static_cast<unsigned long long>(c) << 48);
+  // Convert each value to BCD.
+  // We have x = a * 10 + b and we want to convert it to BCD y = a * 16 + b.
+  // The difference is
+  //   y - x = a * 6
+  // a can be found from x:
+  //   a = floor(x / 10)
+  // then
+  //   y = x + a * 6 = x + floor(x / 10) * 6
+  // floor(x / 10) is (x * 205) >> 11 (needs 16 bits).
+  digits += (((digits * 205) >> 11) & 0x000f00000f00000f) * 6;
+  // Put low nibbles to high bytes and high nibbles to low bytes.
+  digits = ((digits & 0x00f00000f00000f0) >> 4) |
+           ((digits & 0x000f00000f00000f) << 8);
+  auto usep = static_cast<unsigned long long>(sep);
+  // Add ASCII '0' to each digit byte and insert separators.
+  digits |= 0x3030003030003030 | (usep << 16) | (usep << 40);
+
+  constexpr const size_t len = 8;
+  if (const_check(is_big_endian())) {
+    char tmp[len];
+    std::memcpy(tmp, &digits, len);
+    std::reverse_copy(tmp, tmp + len, buf);
+  } else {
+    std::memcpy(buf, &digits, len);
+  }
+}
+
+template <typename Period> FMT_CONSTEXPR inline const char* get_units() {
+  if (std::is_same<Period, std::atto>::value) return "as";
+  if (std::is_same<Period, std::femto>::value) return "fs";
+  if (std::is_same<Period, std::pico>::value) return "ps";
+  if (std::is_same<Period, std::nano>::value) return "ns";
+  if (std::is_same<Period, std::micro>::value) return "µs";
+  if (std::is_same<Period, std::milli>::value) return "ms";
+  if (std::is_same<Period, std::centi>::value) return "cs";
+  if (std::is_same<Period, std::deci>::value) return "ds";
+  if (std::is_same<Period, std::ratio<1>>::value) return "s";
+  if (std::is_same<Period, std::deca>::value) return "das";
+  if (std::is_same<Period, std::hecto>::value) return "hs";
+  if (std::is_same<Period, std::kilo>::value) return "ks";
+  if (std::is_same<Period, std::mega>::value) return "Ms";
+  if (std::is_same<Period, std::giga>::value) return "Gs";
+  if (std::is_same<Period, std::tera>::value) return "Ts";
+  if (std::is_same<Period, std::peta>::value) return "Ps";
+  if (std::is_same<Period, std::exa>::value) return "Es";
+  if (std::is_same<Period, std::ratio<60>>::value) return "m";
+  if (std::is_same<Period, std::ratio<3600>>::value) return "h";
+  return nullptr;
+}
+
+enum class numeric_system {
+  standard,
+  // Alternative numeric system, e.g. 十二 instead of 12 in ja_JP locale.
+  alternative
+};
+
+// Parses a put_time-like format string and invokes handler actions.
+template <typename Char, typename Handler>
+FMT_CONSTEXPR const Char* parse_chrono_format(const Char* begin,
+                                              const Char* end,
+                                              Handler&& handler) {
+  auto ptr = begin;
+  while (ptr != end) {
+    auto c = *ptr;
+    if (c == '}') break;
+    if (c != '%') {
+      ++ptr;
+      continue;
+    }
+    if (begin != ptr) handler.on_text(begin, ptr);
+    ++ptr;  // consume '%'
+    if (ptr == end) FMT_THROW(format_error("invalid format"));
+    c = *ptr++;
+    switch (c) {
+    case '%':
+      handler.on_text(ptr - 1, ptr);
+      break;
+    case 'n': {
+      const Char newline[] = {'\n'};
+      handler.on_text(newline, newline + 1);
+      break;
+    }
+    case 't': {
+      const Char tab[] = {'\t'};
+      handler.on_text(tab, tab + 1);
+      break;
+    }
+    // Year:
+    case 'Y':
+      handler.on_year(numeric_system::standard);
+      break;
+    case 'y':
+      handler.on_short_year(numeric_system::standard);
+      break;
+    case 'C':
+      handler.on_century(numeric_system::standard);
+      break;
+    case 'G':
+      handler.on_iso_week_based_year();
+      break;
+    case 'g':
+      handler.on_iso_week_based_short_year();
+      break;
+    // Day of the week:
+    case 'a':
+      handler.on_abbr_weekday();
+      break;
+    case 'A':
+      handler.on_full_weekday();
+      break;
+    case 'w':
+      handler.on_dec0_weekday(numeric_system::standard);
+      break;
+    case 'u':
+      handler.on_dec1_weekday(numeric_system::standard);
+      break;
+    // Month:
+    case 'b':
+    case 'h':
+      handler.on_abbr_month();
+      break;
+    case 'B':
+      handler.on_full_month();
+      break;
+    case 'm':
+      handler.on_dec_month(numeric_system::standard);
+      break;
+    // Day of the year/month:
+    case 'U':
+      handler.on_dec0_week_of_year(numeric_system::standard);
+      break;
+    case 'W':
+      handler.on_dec1_week_of_year(numeric_system::standard);
+      break;
+    case 'V':
+      handler.on_iso_week_of_year(numeric_system::standard);
+      break;
+    case 'j':
+      handler.on_day_of_year();
+      break;
+    case 'd':
+      handler.on_day_of_month(numeric_system::standard);
+      break;
+    case 'e':
+      handler.on_day_of_month_space(numeric_system::standard);
+      break;
+    // Hour, minute, second:
+    case 'H':
+      handler.on_24_hour(numeric_system::standard);
+      break;
+    case 'I':
+      handler.on_12_hour(numeric_system::standard);
+      break;
+    case 'M':
+      handler.on_minute(numeric_system::standard);
+      break;
+    case 'S':
+      handler.on_second(numeric_system::standard);
+      break;
+    // Other:
+    case 'c':
+      handler.on_datetime(numeric_system::standard);
+      break;
+    case 'x':
+      handler.on_loc_date(numeric_system::standard);
+      break;
+    case 'X':
+      handler.on_loc_time(numeric_system::standard);
+      break;
+    case 'D':
+      handler.on_us_date();
+      break;
+    case 'F':
+      handler.on_iso_date();
+      break;
+    case 'r':
+      handler.on_12_hour_time();
+      break;
+    case 'R':
+      handler.on_24_hour_time();
+      break;
+    case 'T':
+      handler.on_iso_time();
+      break;
+    case 'p':
+      handler.on_am_pm();
+      break;
+    case 'Q':
+      handler.on_duration_value();
+      break;
+    case 'q':
+      handler.on_duration_unit();
+      break;
+    case 'z':
+      handler.on_utc_offset();
+      break;
+    case 'Z':
+      handler.on_tz_name();
+      break;
+    // Alternative representation:
+    case 'E': {
+      if (ptr == end) FMT_THROW(format_error("invalid format"));
+      c = *ptr++;
+      switch (c) {
+      case 'Y':
+        handler.on_year(numeric_system::alternative);
+        break;
+      case 'y':
+        handler.on_offset_year();
+        break;
+      case 'C':
+        handler.on_century(numeric_system::alternative);
+        break;
+      case 'c':
+        handler.on_datetime(numeric_system::alternative);
+        break;
+      case 'x':
+        handler.on_loc_date(numeric_system::alternative);
+        break;
+      case 'X':
+        handler.on_loc_time(numeric_system::alternative);
+        break;
+      default:
+        FMT_THROW(format_error("invalid format"));
+      }
+      break;
+    }
+    case 'O':
+      if (ptr == end) FMT_THROW(format_error("invalid format"));
+      c = *ptr++;
+      switch (c) {
+      case 'y':
+        handler.on_short_year(numeric_system::alternative);
+        break;
+      case 'm':
+        handler.on_dec_month(numeric_system::alternative);
+        break;
+      case 'U':
+        handler.on_dec0_week_of_year(numeric_system::alternative);
+        break;
+      case 'W':
+        handler.on_dec1_week_of_year(numeric_system::alternative);
+        break;
+      case 'V':
+        handler.on_iso_week_of_year(numeric_system::alternative);
+        break;
+      case 'd':
+        handler.on_day_of_month(numeric_system::alternative);
+        break;
+      case 'e':
+        handler.on_day_of_month_space(numeric_system::alternative);
+        break;
+      case 'w':
+        handler.on_dec0_weekday(numeric_system::alternative);
+        break;
+      case 'u':
+        handler.on_dec1_weekday(numeric_system::alternative);
+        break;
+      case 'H':
+        handler.on_24_hour(numeric_system::alternative);
+        break;
+      case 'I':
+        handler.on_12_hour(numeric_system::alternative);
+        break;
+      case 'M':
+        handler.on_minute(numeric_system::alternative);
+        break;
+      case 'S':
+        handler.on_second(numeric_system::alternative);
+        break;
+      default:
+        FMT_THROW(format_error("invalid format"));
+      }
+      break;
+    default:
+      FMT_THROW(format_error("invalid format"));
+    }
+    begin = ptr;
+  }
+  if (begin != ptr) handler.on_text(begin, ptr);
+  return ptr;
+}
+
+template <typename Derived> struct null_chrono_spec_handler {
+  FMT_CONSTEXPR void unsupported() {
+    static_cast<Derived*>(this)->unsupported();
+  }
+  FMT_CONSTEXPR void on_year(numeric_system) { unsupported(); }
+  FMT_CONSTEXPR void on_short_year(numeric_system) { unsupported(); }
+  FMT_CONSTEXPR void on_offset_year() { unsupported(); }
+  FMT_CONSTEXPR void on_century(numeric_system) { unsupported(); }
+  FMT_CONSTEXPR void on_iso_week_based_year() { unsupported(); }
+  FMT_CONSTEXPR void on_iso_week_based_short_year() { unsupported(); }
+  FMT_CONSTEXPR void on_abbr_weekday() { unsupported(); }
+  FMT_CONSTEXPR void on_full_weekday() { unsupported(); }
+  FMT_CONSTEXPR void on_dec0_weekday(numeric_system) { unsupported(); }
+  FMT_CONSTEXPR void on_dec1_weekday(numeric_system) { unsupported(); }
+  FMT_CONSTEXPR void on_abbr_month() { unsupported(); }
+  FMT_CONSTEXPR void on_full_month() { unsupported(); }
+  FMT_CONSTEXPR void on_dec_month(numeric_system) { unsupported(); }
+  FMT_CONSTEXPR void on_dec0_week_of_year(numeric_system) { unsupported(); }
+  FMT_CONSTEXPR void on_dec1_week_of_year(numeric_system) { unsupported(); }
+  FMT_CONSTEXPR void on_iso_week_of_year(numeric_system) { unsupported(); }
+  FMT_CONSTEXPR void on_day_of_year() { unsupported(); }
+  FMT_CONSTEXPR void on_day_of_month(numeric_system) { unsupported(); }
+  FMT_CONSTEXPR void on_day_of_month_space(numeric_system) { unsupported(); }
+  FMT_CONSTEXPR void on_24_hour(numeric_system) { unsupported(); }
+  FMT_CONSTEXPR void on_12_hour(numeric_system) { unsupported(); }
+  FMT_CONSTEXPR void on_minute(numeric_system) { unsupported(); }
+  FMT_CONSTEXPR void on_second(numeric_system) { unsupported(); }
+  FMT_CONSTEXPR void on_datetime(numeric_system) { unsupported(); }
+  FMT_CONSTEXPR void on_loc_date(numeric_system) { unsupported(); }
+  FMT_CONSTEXPR void on_loc_time(numeric_system) { unsupported(); }
+  FMT_CONSTEXPR void on_us_date() { unsupported(); }
+  FMT_CONSTEXPR void on_iso_date() { unsupported(); }
+  FMT_CONSTEXPR void on_12_hour_time() { unsupported(); }
+  FMT_CONSTEXPR void on_24_hour_time() { unsupported(); }
+  FMT_CONSTEXPR void on_iso_time() { unsupported(); }
+  FMT_CONSTEXPR void on_am_pm() { unsupported(); }
+  FMT_CONSTEXPR void on_duration_value() { unsupported(); }
+  FMT_CONSTEXPR void on_duration_unit() { unsupported(); }
+  FMT_CONSTEXPR void on_utc_offset() { unsupported(); }
+  FMT_CONSTEXPR void on_tz_name() { unsupported(); }
+};
+
+struct tm_format_checker : null_chrono_spec_handler<tm_format_checker> {
+  FMT_NORETURN void unsupported() { FMT_THROW(format_error("no format")); }
+
+  template <typename Char>
+  FMT_CONSTEXPR void on_text(const Char*, const Char*) {}
+  FMT_CONSTEXPR void on_year(numeric_system) {}
+  FMT_CONSTEXPR void on_short_year(numeric_system) {}
+  FMT_CONSTEXPR void on_offset_year() {}
+  FMT_CONSTEXPR void on_century(numeric_system) {}
+  FMT_CONSTEXPR void on_iso_week_based_year() {}
+  FMT_CONSTEXPR void on_iso_week_based_short_year() {}
+  FMT_CONSTEXPR void on_abbr_weekday() {}
+  FMT_CONSTEXPR void on_full_weekday() {}
+  FMT_CONSTEXPR void on_dec0_weekday(numeric_system) {}
+  FMT_CONSTEXPR void on_dec1_weekday(numeric_system) {}
+  FMT_CONSTEXPR void on_abbr_month() {}
+  FMT_CONSTEXPR void on_full_month() {}
+  FMT_CONSTEXPR void on_dec_month(numeric_system) {}
+  FMT_CONSTEXPR void on_dec0_week_of_year(numeric_system) {}
+  FMT_CONSTEXPR void on_dec1_week_of_year(numeric_system) {}
+  FMT_CONSTEXPR void on_iso_week_of_year(numeric_system) {}
+  FMT_CONSTEXPR void on_day_of_year() {}
+  FMT_CONSTEXPR void on_day_of_month(numeric_system) {}
+  FMT_CONSTEXPR void on_day_of_month_space(numeric_system) {}
+  FMT_CONSTEXPR void on_24_hour(numeric_system) {}
+  FMT_CONSTEXPR void on_12_hour(numeric_system) {}
+  FMT_CONSTEXPR void on_minute(numeric_system) {}
+  FMT_CONSTEXPR void on_second(numeric_system) {}
+  FMT_CONSTEXPR void on_datetime(numeric_system) {}
+  FMT_CONSTEXPR void on_loc_date(numeric_system) {}
+  FMT_CONSTEXPR void on_loc_time(numeric_system) {}
+  FMT_CONSTEXPR void on_us_date() {}
+  FMT_CONSTEXPR void on_iso_date() {}
+  FMT_CONSTEXPR void on_12_hour_time() {}
+  FMT_CONSTEXPR void on_24_hour_time() {}
+  FMT_CONSTEXPR void on_iso_time() {}
+  FMT_CONSTEXPR void on_am_pm() {}
+  FMT_CONSTEXPR void on_utc_offset() {}
+  FMT_CONSTEXPR void on_tz_name() {}
+};
+
+inline const char* tm_wday_full_name(int wday) {
+  static constexpr const char* full_name_list[] = {
+      "Sunday",   "Monday", "Tuesday", "Wednesday",
+      "Thursday", "Friday", "Saturday"};
+  return wday >= 0 && wday <= 6 ? full_name_list[wday] : "?";
+}
+inline const char* tm_wday_short_name(int wday) {
+  static constexpr const char* short_name_list[] = {"Sun", "Mon", "Tue", "Wed",
+                                                    "Thu", "Fri", "Sat"};
+  return wday >= 0 && wday <= 6 ? short_name_list[wday] : "???";
+}
+
+inline const char* tm_mon_full_name(int mon) {
+  static constexpr const char* full_name_list[] = {
+      "January", "February", "March",     "April",   "May",      "June",
+      "July",    "August",   "September", "October", "November", "December"};
+  return mon >= 0 && mon <= 11 ? full_name_list[mon] : "?";
+}
+inline const char* tm_mon_short_name(int mon) {
+  static constexpr const char* short_name_list[] = {
+      "Jan", "Feb", "Mar", "Apr", "May", "Jun",
+      "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
+  };
+  return mon >= 0 && mon <= 11 ? short_name_list[mon] : "???";
+}
+
+template <typename T, typename = void>
+struct has_member_data_tm_gmtoff : std::false_type {};
+template <typename T>
+struct has_member_data_tm_gmtoff<T, void_t<decltype(T::tm_gmtoff)>>
+    : std::true_type {};
+
+template <typename T, typename = void>
+struct has_member_data_tm_zone : std::false_type {};
+template <typename T>
+struct has_member_data_tm_zone<T, void_t<decltype(T::tm_zone)>>
+    : std::true_type {};
+
+#if FMT_USE_TZSET
+inline void tzset_once() {
+  static bool init = []() -> bool {
+    _tzset();
+    return true;
+  }();
+  ignore_unused(init);
+}
+#endif
+
+template <typename OutputIt, typename Char> class tm_writer {
+ private:
+  static constexpr int days_per_week = 7;
+
+  const std::locale& loc_;
+  const bool is_classic_;
+  OutputIt out_;
+  const std::tm& tm_;
+
+  auto tm_sec() const noexcept -> int {
+    FMT_ASSERT(tm_.tm_sec >= 0 && tm_.tm_sec <= 61, "");
+    return tm_.tm_sec;
+  }
+  auto tm_min() const noexcept -> int {
+    FMT_ASSERT(tm_.tm_min >= 0 && tm_.tm_min <= 59, "");
+    return tm_.tm_min;
+  }
+  auto tm_hour() const noexcept -> int {
+    FMT_ASSERT(tm_.tm_hour >= 0 && tm_.tm_hour <= 23, "");
+    return tm_.tm_hour;
+  }
+  auto tm_mday() const noexcept -> int {
+    FMT_ASSERT(tm_.tm_mday >= 1 && tm_.tm_mday <= 31, "");
+    return tm_.tm_mday;
+  }
+  auto tm_mon() const noexcept -> int {
+    FMT_ASSERT(tm_.tm_mon >= 0 && tm_.tm_mon <= 11, "");
+    return tm_.tm_mon;
+  }
+  auto tm_year() const noexcept -> long long { return 1900ll + tm_.tm_year; }
+  auto tm_wday() const noexcept -> int {
+    FMT_ASSERT(tm_.tm_wday >= 0 && tm_.tm_wday <= 6, "");
+    return tm_.tm_wday;
+  }
+  auto tm_yday() const noexcept -> int {
+    FMT_ASSERT(tm_.tm_yday >= 0 && tm_.tm_yday <= 365, "");
+    return tm_.tm_yday;
+  }
+
+  auto tm_hour12() const noexcept -> int {
+    const auto h = tm_hour();
+    const auto z = h < 12 ? h : h - 12;
+    return z == 0 ? 12 : z;
+  }
+
+  // POSIX and the C Standard are unclear or inconsistent about what %C and %y
+  // do if the year is negative or exceeds 9999. Use the convention that %C
+  // concatenated with %y yields the same output as %Y, and that %Y contains at
+  // least 4 characters, with more only if necessary.
+  auto split_year_lower(long long year) const noexcept -> int {
+    auto l = year % 100;
+    if (l < 0) l = -l;  // l in [0, 99]
+    return static_cast<int>(l);
+  }
+
+  // Algorithm:
+  // https://en.wikipedia.org/wiki/ISO_week_date#Calculating_the_week_number_from_a_month_and_day_of_the_month_or_ordinal_date
+  auto iso_year_weeks(long long curr_year) const noexcept -> int {
+    const auto prev_year = curr_year - 1;
+    const auto curr_p =
+        (curr_year + curr_year / 4 - curr_year / 100 + curr_year / 400) %
+        days_per_week;
+    const auto prev_p =
+        (prev_year + prev_year / 4 - prev_year / 100 + prev_year / 400) %
+        days_per_week;
+    return 52 + ((curr_p == 4 || prev_p == 3) ? 1 : 0);
+  }
+  auto iso_week_num(int tm_yday, int tm_wday) const noexcept -> int {
+    return (tm_yday + 11 - (tm_wday == 0 ? days_per_week : tm_wday)) /
+           days_per_week;
+  }
+  auto tm_iso_week_year() const noexcept -> long long {
+    const auto year = tm_year();
+    const auto w = iso_week_num(tm_yday(), tm_wday());
+    if (w < 1) return year - 1;
+    if (w > iso_year_weeks(year)) return year + 1;
+    return year;
+  }
+  auto tm_iso_week_of_year() const noexcept -> int {
+    const auto year = tm_year();
+    const auto w = iso_week_num(tm_yday(), tm_wday());
+    if (w < 1) return iso_year_weeks(year - 1);
+    if (w > iso_year_weeks(year)) return 1;
+    return w;
+  }
+
+  void write1(int value) {
+    *out_++ = static_cast<char>('0' + to_unsigned(value) % 10);
+  }
+  void write2(int value) {
+    const char* d = digits2(to_unsigned(value) % 100);
+    *out_++ = *d++;
+    *out_++ = *d;
+  }
+
+  void write_year_extended(long long year) {
+    // At least 4 characters.
+    int width = 4;
+    if (year < 0) {
+      *out_++ = '-';
+      year = 0 - year;
+      --width;
+    }
+    uint32_or_64_or_128_t<long long> n = to_unsigned(year);
+    const int num_digits = count_digits(n);
+    if (width > num_digits) out_ = std::fill_n(out_, width - num_digits, '0');
+    out_ = format_decimal<Char>(out_, n, num_digits).end;
+  }
+  void write_year(long long year) {
+    if (year >= 0 && year < 10000) {
+      write2(static_cast<int>(year / 100));
+      write2(static_cast<int>(year % 100));
+    } else {
+      write_year_extended(year);
+    }
+  }
+
+  void write_utc_offset(long offset) {
+    if (offset < 0) {
+      *out_++ = '-';
+      offset = -offset;
+    } else {
+      *out_++ = '+';
+    }
+    offset /= 60;
+    write2(static_cast<int>(offset / 60));
+    write2(static_cast<int>(offset % 60));
+  }
+  template <typename T, FMT_ENABLE_IF(has_member_data_tm_gmtoff<T>::value)>
+  void format_utc_offset_impl(const T& tm) {
+    write_utc_offset(tm.tm_gmtoff);
+  }
+  template <typename T, FMT_ENABLE_IF(!has_member_data_tm_gmtoff<T>::value)>
+  void format_utc_offset_impl(const T& tm) {
+#if defined(_WIN32) && defined(_UCRT)
+#  if FMT_USE_TZSET
+    tzset_once();
+#  endif
+    long offset = 0;
+    _get_timezone(&offset);
+    if (tm.tm_isdst) {
+      long dstbias = 0;
+      _get_dstbias(&dstbias);
+      offset += dstbias;
+    }
+    write_utc_offset(-offset);
+#else
+    ignore_unused(tm);
+    format_localized('z');
+#endif
+  }
+
+  template <typename T, FMT_ENABLE_IF(has_member_data_tm_zone<T>::value)>
+  void format_tz_name_impl(const T& tm) {
+    if (is_classic_)
+      out_ = write_tm_str<Char>(out_, tm.tm_zone, loc_);
+    else
+      format_localized('Z');
+  }
+  template <typename T, FMT_ENABLE_IF(!has_member_data_tm_zone<T>::value)>
+  void format_tz_name_impl(const T&) {
+    format_localized('Z');
+  }
+
+  void format_localized(char format, char modifier = 0) {
+    out_ = write<Char>(out_, tm_, loc_, format, modifier);
+  }
+
+ public:
+  tm_writer(const std::locale& loc, OutputIt out, const std::tm& tm)
+      : loc_(loc),
+        is_classic_(loc_ == get_classic_locale()),
+        out_(out),
+        tm_(tm) {}
+
+  OutputIt out() const { return out_; }
+
+  FMT_CONSTEXPR void on_text(const Char* begin, const Char* end) {
+    out_ = copy_str<Char>(begin, end, out_);
+  }
+
+  void on_abbr_weekday() {
+    if (is_classic_)
+      out_ = write(out_, tm_wday_short_name(tm_wday()));
+    else
+      format_localized('a');
+  }
+  void on_full_weekday() {
+    if (is_classic_)
+      out_ = write(out_, tm_wday_full_name(tm_wday()));
+    else
+      format_localized('A');
+  }
+  void on_dec0_weekday(numeric_system ns) {
+    if (is_classic_ || ns == numeric_system::standard) return write1(tm_wday());
+    format_localized('w', 'O');
+  }
+  void on_dec1_weekday(numeric_system ns) {
+    if (is_classic_ || ns == numeric_system::standard) {
+      auto wday = tm_wday();
+      write1(wday == 0 ? days_per_week : wday);
+    } else {
+      format_localized('u', 'O');
+    }
+  }
+
+  void on_abbr_month() {
+    if (is_classic_)
+      out_ = write(out_, tm_mon_short_name(tm_mon()));
+    else
+      format_localized('b');
+  }
+  void on_full_month() {
+    if (is_classic_)
+      out_ = write(out_, tm_mon_full_name(tm_mon()));
+    else
+      format_localized('B');
+  }
+
+  void on_datetime(numeric_system ns) {
+    if (is_classic_) {
+      on_abbr_weekday();
+      *out_++ = ' ';
+      on_abbr_month();
+      *out_++ = ' ';
+      on_day_of_month_space(numeric_system::standard);
+      *out_++ = ' ';
+      on_iso_time();
+      *out_++ = ' ';
+      on_year(numeric_system::standard);
+    } else {
+      format_localized('c', ns == numeric_system::standard ? '\0' : 'E');
+    }
+  }
+  void on_loc_date(numeric_system ns) {
+    if (is_classic_)
+      on_us_date();
+    else
+      format_localized('x', ns == numeric_system::standard ? '\0' : 'E');
+  }
+  void on_loc_time(numeric_system ns) {
+    if (is_classic_)
+      on_iso_time();
+    else
+      format_localized('X', ns == numeric_system::standard ? '\0' : 'E');
+  }
+  void on_us_date() {
+    char buf[8];
+    write_digit2_separated(buf, to_unsigned(tm_mon() + 1),
+                           to_unsigned(tm_mday()),
+                           to_unsigned(split_year_lower(tm_year())), '/');
+    out_ = copy_str<Char>(std::begin(buf), std::end(buf), out_);
+  }
+  void on_iso_date() {
+    auto year = tm_year();
+    char buf[10];
+    size_t offset = 0;
+    if (year >= 0 && year < 10000) {
+      copy2(buf, digits2(static_cast<size_t>(year / 100)));
+    } else {
+      offset = 4;
+      write_year_extended(year);
+      year = 0;
+    }
+    write_digit2_separated(buf + 2, static_cast<unsigned>(year % 100),
+                           to_unsigned(tm_mon() + 1), to_unsigned(tm_mday()),
+                           '-');
+    out_ = copy_str<Char>(std::begin(buf) + offset, std::end(buf), out_);
+  }
+
+  void on_utc_offset() { format_utc_offset_impl(tm_); }
+  void on_tz_name() { format_tz_name_impl(tm_); }
+
+  void on_year(numeric_system ns) {
+    if (is_classic_ || ns == numeric_system::standard)
+      return write_year(tm_year());
+    format_localized('Y', 'E');
+  }
+  void on_short_year(numeric_system ns) {
+    if (is_classic_ || ns == numeric_system::standard)
+      return write2(split_year_lower(tm_year()));
+    format_localized('y', 'O');
+  }
+  void on_offset_year() {
+    if (is_classic_) return write2(split_year_lower(tm_year()));
+    format_localized('y', 'E');
+  }
+
+  void on_century(numeric_system ns) {
+    if (is_classic_ || ns == numeric_system::standard) {
+      auto year = tm_year();
+      auto upper = year / 100;
+      if (year >= -99 && year < 0) {
+        // Zero upper on negative year.
+        *out_++ = '-';
+        *out_++ = '0';
+      } else if (upper >= 0 && upper < 100) {
+        write2(static_cast<int>(upper));
+      } else {
+        out_ = write<Char>(out_, upper);
+      }
+    } else {
+      format_localized('C', 'E');
+    }
+  }
+
+  void on_dec_month(numeric_system ns) {
+    if (is_classic_ || ns == numeric_system::standard)
+      return write2(tm_mon() + 1);
+    format_localized('m', 'O');
+  }
+
+  void on_dec0_week_of_year(numeric_system ns) {
+    if (is_classic_ || ns == numeric_system::standard)
+      return write2((tm_yday() + days_per_week - tm_wday()) / days_per_week);
+    format_localized('U', 'O');
+  }
+  void on_dec1_week_of_year(numeric_system ns) {
+    if (is_classic_ || ns == numeric_system::standard) {
+      auto wday = tm_wday();
+      write2((tm_yday() + days_per_week -
+              (wday == 0 ? (days_per_week - 1) : (wday - 1))) /
+             days_per_week);
+    } else {
+      format_localized('W', 'O');
+    }
+  }
+  void on_iso_week_of_year(numeric_system ns) {
+    if (is_classic_ || ns == numeric_system::standard)
+      return write2(tm_iso_week_of_year());
+    format_localized('V', 'O');
+  }
+
+  void on_iso_week_based_year() { write_year(tm_iso_week_year()); }
+  void on_iso_week_based_short_year() {
+    write2(split_year_lower(tm_iso_week_year()));
+  }
+
+  void on_day_of_year() {
+    auto yday = tm_yday() + 1;
+    write1(yday / 100);
+    write2(yday % 100);
+  }
+  void on_day_of_month(numeric_system ns) {
+    if (is_classic_ || ns == numeric_system::standard) return write2(tm_mday());
+    format_localized('d', 'O');
+  }
+  void on_day_of_month_space(numeric_system ns) {
+    if (is_classic_ || ns == numeric_system::standard) {
+      auto mday = to_unsigned(tm_mday()) % 100;
+      const char* d2 = digits2(mday);
+      *out_++ = mday < 10 ? ' ' : d2[0];
+      *out_++ = d2[1];
+    } else {
+      format_localized('e', 'O');
+    }
+  }
+
+  void on_24_hour(numeric_system ns) {
+    if (is_classic_ || ns == numeric_system::standard) return write2(tm_hour());
+    format_localized('H', 'O');
+  }
+  void on_12_hour(numeric_system ns) {
+    if (is_classic_ || ns == numeric_system::standard)
+      return write2(tm_hour12());
+    format_localized('I', 'O');
+  }
+  void on_minute(numeric_system ns) {
+    if (is_classic_ || ns == numeric_system::standard) return write2(tm_min());
+    format_localized('M', 'O');
+  }
+  void on_second(numeric_system ns) {
+    if (is_classic_ || ns == numeric_system::standard) return write2(tm_sec());
+    format_localized('S', 'O');
+  }
+
+  void on_12_hour_time() {
+    if (is_classic_) {
+      char buf[8];
+      write_digit2_separated(buf, to_unsigned(tm_hour12()),
+                             to_unsigned(tm_min()), to_unsigned(tm_sec()), ':');
+      out_ = copy_str<Char>(std::begin(buf), std::end(buf), out_);
+      *out_++ = ' ';
+      on_am_pm();
+    } else {
+      format_localized('r');
+    }
+  }
+  void on_24_hour_time() {
+    write2(tm_hour());
+    *out_++ = ':';
+    write2(tm_min());
+  }
+  void on_iso_time() {
+    char buf[8];
+    write_digit2_separated(buf, to_unsigned(tm_hour()), to_unsigned(tm_min()),
+                           to_unsigned(tm_sec()), ':');
+    out_ = copy_str<Char>(std::begin(buf), std::end(buf), out_);
+  }
+
+  void on_am_pm() {
+    if (is_classic_) {
+      *out_++ = tm_hour() < 12 ? 'A' : 'P';
+      *out_++ = 'M';
+    } else {
+      format_localized('p');
+    }
+  }
+
+  // These apply to chrono durations but not tm.
+  void on_duration_value() {}
+  void on_duration_unit() {}
+};
+
+struct chrono_format_checker : null_chrono_spec_handler<chrono_format_checker> {
+  FMT_NORETURN void unsupported() { FMT_THROW(format_error("no date")); }
+
+  template <typename Char>
+  FMT_CONSTEXPR void on_text(const Char*, const Char*) {}
+  FMT_CONSTEXPR void on_24_hour(numeric_system) {}
+  FMT_CONSTEXPR void on_12_hour(numeric_system) {}
+  FMT_CONSTEXPR void on_minute(numeric_system) {}
+  FMT_CONSTEXPR void on_second(numeric_system) {}
+  FMT_CONSTEXPR void on_12_hour_time() {}
+  FMT_CONSTEXPR void on_24_hour_time() {}
+  FMT_CONSTEXPR void on_iso_time() {}
+  FMT_CONSTEXPR void on_am_pm() {}
+  FMT_CONSTEXPR void on_duration_value() {}
+  FMT_CONSTEXPR void on_duration_unit() {}
+};
+
+template <typename T, FMT_ENABLE_IF(std::is_integral<T>::value)>
+inline bool isfinite(T) {
+  return true;
+}
+
+// Converts value to Int and checks that it's in the range [0, upper).
+template <typename T, typename Int, FMT_ENABLE_IF(std::is_integral<T>::value)>
+inline Int to_nonnegative_int(T value, Int upper) {
+  FMT_ASSERT(std::is_unsigned<Int>::value ||
+                 (value >= 0 && to_unsigned(value) <= to_unsigned(upper)),
+             "invalid value");
+  (void)upper;
+  return static_cast<Int>(value);
+}
+template <typename T, typename Int, FMT_ENABLE_IF(!std::is_integral<T>::value)>
+inline Int to_nonnegative_int(T value, Int upper) {
+  if (value < 0 || value > static_cast<T>(upper))
+    FMT_THROW(format_error("invalid value"));
+  return static_cast<Int>(value);
+}
+
+template <typename T, FMT_ENABLE_IF(std::is_integral<T>::value)>
+inline T mod(T x, int y) {
+  return x % static_cast<T>(y);
+}
+template <typename T, FMT_ENABLE_IF(std::is_floating_point<T>::value)>
+inline T mod(T x, int y) {
+  return std::fmod(x, static_cast<T>(y));
+}
+
+// If T is an integral type, maps T to its unsigned counterpart, otherwise
+// leaves it unchanged (unlike std::make_unsigned).
+template <typename T, bool INTEGRAL = std::is_integral<T>::value>
+struct make_unsigned_or_unchanged {
+  using type = T;
+};
+
+template <typename T> struct make_unsigned_or_unchanged<T, true> {
+  using type = typename std::make_unsigned<T>::type;
+};
+
+#if FMT_SAFE_DURATION_CAST
+// throwing version of safe_duration_cast
+template <typename To, typename FromRep, typename FromPeriod>
+To fmt_safe_duration_cast(std::chrono::duration<FromRep, FromPeriod> from) {
+  int ec;
+  To to = safe_duration_cast::safe_duration_cast<To>(from, ec);
+  if (ec) FMT_THROW(format_error("cannot format duration"));
+  return to;
+}
+#endif
+
+template <typename Rep, typename Period,
+          FMT_ENABLE_IF(std::is_integral<Rep>::value)>
+inline std::chrono::duration<Rep, std::milli> get_milliseconds(
+    std::chrono::duration<Rep, Period> d) {
+  // this may overflow and/or the result may not fit in the
+  // target type.
+#if FMT_SAFE_DURATION_CAST
+  using CommonSecondsType =
+      typename std::common_type<decltype(d), std::chrono::seconds>::type;
+  const auto d_as_common = fmt_safe_duration_cast<CommonSecondsType>(d);
+  const auto d_as_whole_seconds =
+      fmt_safe_duration_cast<std::chrono::seconds>(d_as_common);
+  // this conversion should be nonproblematic
+  const auto diff = d_as_common - d_as_whole_seconds;
+  const auto ms =
+      fmt_safe_duration_cast<std::chrono::duration<Rep, std::milli>>(diff);
+  return ms;
+#else
+  auto s = std::chrono::duration_cast<std::chrono::seconds>(d);
+  return std::chrono::duration_cast<std::chrono::milliseconds>(d - s);
+#endif
+}
+
+// Counts the number of fractional digits in the range [0, 18] according to the
+// C++20 spec. If more than 18 fractional digits are required then returns 6 for
+// microseconds precision.
+template <long long Num, long long Den, int N = 0,
+          bool Enabled = (N < 19) && (Num <= max_value<long long>() / 10)>
+struct count_fractional_digits {
+  static constexpr int value =
+      Num % Den == 0 ? N : count_fractional_digits<Num * 10, Den, N + 1>::value;
+};
+
+// Base case that doesn't instantiate any more templates
+// in order to avoid overflow.
+template <long long Num, long long Den, int N>
+struct count_fractional_digits<Num, Den, N, false> {
+  static constexpr int value = (Num % Den == 0) ? N : 6;
+};
+
+constexpr long long pow10(std::uint32_t n) {
+  return n == 0 ? 1 : 10 * pow10(n - 1);
+}
+
+template <class Rep, class Period,
+          FMT_ENABLE_IF(std::numeric_limits<Rep>::is_signed)>
+constexpr std::chrono::duration<Rep, Period> abs(
+    std::chrono::duration<Rep, Period> d) {
+  // We need to compare the duration using the count() method directly
+  // due to a compiler bug in clang-11 regarding the spaceship operator,
+  // when -Wzero-as-null-pointer-constant is enabled.
+  // In clang-12 the bug has been fixed. See
+  // https://bugs.llvm.org/show_bug.cgi?id=46235 and the reproducible example:
+  // https://www.godbolt.org/z/Knbb5joYx.
+  return d.count() >= d.zero().count() ? d : -d;
+}
+
+template <class Rep, class Period,
+          FMT_ENABLE_IF(!std::numeric_limits<Rep>::is_signed)>
+constexpr std::chrono::duration<Rep, Period> abs(
+    std::chrono::duration<Rep, Period> d) {
+  return d;
+}
+
+template <typename Char, typename Rep, typename OutputIt,
+          FMT_ENABLE_IF(std::is_integral<Rep>::value)>
+OutputIt format_duration_value(OutputIt out, Rep val, int) {
+  return write<Char>(out, val);
+}
+
+template <typename Char, typename Rep, typename OutputIt,
+          FMT_ENABLE_IF(std::is_floating_point<Rep>::value)>
+OutputIt format_duration_value(OutputIt out, Rep val, int precision) {
+  auto specs = basic_format_specs<Char>();
+  specs.precision = precision;
+  specs.type = precision >= 0 ? presentation_type::fixed_lower
+                              : presentation_type::general_lower;
+  return write<Char>(out, val, specs);
+}
+
+template <typename Char, typename OutputIt>
+OutputIt copy_unit(string_view unit, OutputIt out, Char) {
+  return std::copy(unit.begin(), unit.end(), out);
+}
+
+template <typename OutputIt>
+OutputIt copy_unit(string_view unit, OutputIt out, wchar_t) {
+  // This works when wchar_t is UTF-32 because units only contain characters
+  // that have the same representation in UTF-16 and UTF-32.
+  utf8_to_utf16 u(unit);
+  return std::copy(u.c_str(), u.c_str() + u.size(), out);
+}
+
+template <typename Char, typename Period, typename OutputIt>
+OutputIt format_duration_unit(OutputIt out) {
+  if (const char* unit = get_units<Period>())
+    return copy_unit(string_view(unit), out, Char());
+  *out++ = '[';
+  out = write<Char>(out, Period::num);
+  if (const_check(Period::den != 1)) {
+    *out++ = '/';
+    out = write<Char>(out, Period::den);
+  }
+  *out++ = ']';
+  *out++ = 's';
+  return out;
+}
+
+class get_locale {
+ private:
+  union {
+    std::locale locale_;
+  };
+  bool has_locale_ = false;
+
+ public:
+  get_locale(bool localized, locale_ref loc) : has_locale_(localized) {
+    if (localized)
+      ::new (&locale_) std::locale(loc.template get<std::locale>());
+  }
+  ~get_locale() {
+    if (has_locale_) locale_.~locale();
+  }
+  operator const std::locale&() const {
+    return has_locale_ ? locale_ : get_classic_locale();
+  }
+};
+
+template <typename FormatContext, typename OutputIt, typename Rep,
+          typename Period>
+struct chrono_formatter {
+  FormatContext& context;
+  OutputIt out;
+  int precision;
+  bool localized = false;
+  // rep is unsigned to avoid overflow.
+  using rep =
+      conditional_t<std::is_integral<Rep>::value && sizeof(Rep) < sizeof(int),
+                    unsigned, typename make_unsigned_or_unchanged<Rep>::type>;
+  rep val;
+  using seconds = std::chrono::duration<rep>;
+  seconds s;
+  using milliseconds = std::chrono::duration<rep, std::milli>;
+  bool negative;
+
+  using char_type = typename FormatContext::char_type;
+  using tm_writer_type = tm_writer<OutputIt, char_type>;
+
+  chrono_formatter(FormatContext& ctx, OutputIt o,
+                   std::chrono::duration<Rep, Period> d)
+      : context(ctx),
+        out(o),
+        val(static_cast<rep>(d.count())),
+        negative(false) {
+    if (d.count() < 0) {
+      val = 0 - val;
+      negative = true;
+    }
+
+    // this may overflow and/or the result may not fit in the
+    // target type.
+#if FMT_SAFE_DURATION_CAST
+    // might need checked conversion (rep!=Rep)
+    auto tmpval = std::chrono::duration<rep, Period>(val);
+    s = fmt_safe_duration_cast<seconds>(tmpval);
+#else
+    s = std::chrono::duration_cast<seconds>(
+        std::chrono::duration<rep, Period>(val));
+#endif
+  }
+
+  // returns true if nan or inf, writes to out.
+  bool handle_nan_inf() {
+    if (isfinite(val)) {
+      return false;
+    }
+    if (isnan(val)) {
+      write_nan();
+      return true;
+    }
+    // must be +-inf
+    if (val > 0) {
+      write_pinf();
+    } else {
+      write_ninf();
+    }
+    return true;
+  }
+
+  Rep hour() const { return static_cast<Rep>(mod((s.count() / 3600), 24)); }
+
+  Rep hour12() const {
+    Rep hour = static_cast<Rep>(mod((s.count() / 3600), 12));
+    return hour <= 0 ? 12 : hour;
+  }
+
+  Rep minute() const { return static_cast<Rep>(mod((s.count() / 60), 60)); }
+  Rep second() const { return static_cast<Rep>(mod(s.count(), 60)); }
+
+  std::tm time() const {
+    auto time = std::tm();
+    time.tm_hour = to_nonnegative_int(hour(), 24);
+    time.tm_min = to_nonnegative_int(minute(), 60);
+    time.tm_sec = to_nonnegative_int(second(), 60);
+    return time;
+  }
+
+  void write_sign() {
+    if (negative) {
+      *out++ = '-';
+      negative = false;
+    }
+  }
+
+  void write(Rep value, int width) {
+    write_sign();
+    if (isnan(value)) return write_nan();
+    uint32_or_64_or_128_t<int> n =
+        to_unsigned(to_nonnegative_int(value, max_value<int>()));
+    int num_digits = detail::count_digits(n);
+    if (width > num_digits) out = std::fill_n(out, width - num_digits, '0');
+    out = format_decimal<char_type>(out, n, num_digits).end;
+  }
+
+  template <typename Duration> void write_fractional_seconds(Duration d) {
+    FMT_ASSERT(!std::is_floating_point<typename Duration::rep>::value, "");
+    constexpr auto num_fractional_digits =
+        count_fractional_digits<Duration::period::num,
+                                Duration::period::den>::value;
+
+    using subsecond_precision = std::chrono::duration<
+        typename std::common_type<typename Duration::rep,
+                                  std::chrono::seconds::rep>::type,
+        std::ratio<1, detail::pow10(num_fractional_digits)>>;
+    if (std::ratio_less<typename subsecond_precision::period,
+                        std::chrono::seconds::period>::value) {
+      *out++ = '.';
+      auto fractional =
+          detail::abs(d) - std::chrono::duration_cast<std::chrono::seconds>(d);
+      auto subseconds =
+          std::chrono::treat_as_floating_point<
+              typename subsecond_precision::rep>::value
+              ? fractional.count()
+              : std::chrono::duration_cast<subsecond_precision>(fractional)
+                    .count();
+      uint32_or_64_or_128_t<long long> n =
+          to_unsigned(to_nonnegative_int(subseconds, max_value<long long>()));
+      int num_digits = detail::count_digits(n);
+      if (num_fractional_digits > num_digits)
+        out = std::fill_n(out, num_fractional_digits - num_digits, '0');
+      out = format_decimal<char_type>(out, n, num_digits).end;
+    }
+  }
+
+  void write_nan() { std::copy_n("nan", 3, out); }
+  void write_pinf() { std::copy_n("inf", 3, out); }
+  void write_ninf() { std::copy_n("-inf", 4, out); }
+
+  template <typename Callback, typename... Args>
+  void format_tm(const tm& time, Callback cb, Args... args) {
+    if (isnan(val)) return write_nan();
+    get_locale loc(localized, context.locale());
+    auto w = tm_writer_type(loc, out, time);
+    (w.*cb)(args...);
+    out = w.out();
+  }
+
+  void on_text(const char_type* begin, const char_type* end) {
+    std::copy(begin, end, out);
+  }
+
+  // These are not implemented because durations don't have date information.
+  void on_abbr_weekday() {}
+  void on_full_weekday() {}
+  void on_dec0_weekday(numeric_system) {}
+  void on_dec1_weekday(numeric_system) {}
+  void on_abbr_month() {}
+  void on_full_month() {}
+  void on_datetime(numeric_system) {}
+  void on_loc_date(numeric_system) {}
+  void on_loc_time(numeric_system) {}
+  void on_us_date() {}
+  void on_iso_date() {}
+  void on_utc_offset() {}
+  void on_tz_name() {}
+  void on_year(numeric_system) {}
+  void on_short_year(numeric_system) {}
+  void on_offset_year() {}
+  void on_century(numeric_system) {}
+  void on_iso_week_based_year() {}
+  void on_iso_week_based_short_year() {}
+  void on_dec_month(numeric_system) {}
+  void on_dec0_week_of_year(numeric_system) {}
+  void on_dec1_week_of_year(numeric_system) {}
+  void on_iso_week_of_year(numeric_system) {}
+  void on_day_of_year() {}
+  void on_day_of_month(numeric_system) {}
+  void on_day_of_month_space(numeric_system) {}
+
+  void on_24_hour(numeric_system ns) {
+    if (handle_nan_inf()) return;
+
+    if (ns == numeric_system::standard) return write(hour(), 2);
+    auto time = tm();
+    time.tm_hour = to_nonnegative_int(hour(), 24);
+    format_tm(time, &tm_writer_type::on_24_hour, ns);
+  }
+
+  void on_12_hour(numeric_system ns) {
+    if (handle_nan_inf()) return;
+
+    if (ns == numeric_system::standard) return write(hour12(), 2);
+    auto time = tm();
+    time.tm_hour = to_nonnegative_int(hour12(), 12);
+    format_tm(time, &tm_writer_type::on_12_hour, ns);
+  }
+
+  void on_minute(numeric_system ns) {
+    if (handle_nan_inf()) return;
+
+    if (ns == numeric_system::standard) return write(minute(), 2);
+    auto time = tm();
+    time.tm_min = to_nonnegative_int(minute(), 60);
+    format_tm(time, &tm_writer_type::on_minute, ns);
+  }
+
+  void on_second(numeric_system ns) {
+    if (handle_nan_inf()) return;
+
+    if (ns == numeric_system::standard) {
+      if (std::is_floating_point<rep>::value) {
+        constexpr auto num_fractional_digits =
+            count_fractional_digits<Period::num, Period::den>::value;
+        auto buf = memory_buffer();
+        format_to(std::back_inserter(buf), runtime("{:.{}f}"),
+                  std::fmod(val * static_cast<rep>(Period::num) /
+                                static_cast<rep>(Period::den),
+                            static_cast<rep>(60)),
+                  num_fractional_digits);
+        if (negative) *out++ = '-';
+        if (buf.size() < 2 || buf[1] == '.') *out++ = '0';
+        out = std::copy(buf.begin(), buf.end(), out);
+      } else {
+        write(second(), 2);
+        write_fractional_seconds(std::chrono::duration<rep, Period>(val));
+      }
+      return;
+    }
+    auto time = tm();
+    time.tm_sec = to_nonnegative_int(second(), 60);
+    format_tm(time, &tm_writer_type::on_second, ns);
+  }
+
+  void on_12_hour_time() {
+    if (handle_nan_inf()) return;
+    format_tm(time(), &tm_writer_type::on_12_hour_time);
+  }
+
+  void on_24_hour_time() {
+    if (handle_nan_inf()) {
+      *out++ = ':';
+      handle_nan_inf();
+      return;
+    }
+
+    write(hour(), 2);
+    *out++ = ':';
+    write(minute(), 2);
+  }
+
+  void on_iso_time() {
+    on_24_hour_time();
+    *out++ = ':';
+    if (handle_nan_inf()) return;
+    on_second(numeric_system::standard);
+  }
+
+  void on_am_pm() {
+    if (handle_nan_inf()) return;
+    format_tm(time(), &tm_writer_type::on_am_pm);
+  }
+
+  void on_duration_value() {
+    if (handle_nan_inf()) return;
+    write_sign();
+    out = format_duration_value<char_type>(out, val, precision);
+  }
+
+  void on_duration_unit() {
+    out = format_duration_unit<char_type, Period>(out);
+  }
+};
+
+FMT_END_DETAIL_NAMESPACE
+
+#if defined(__cpp_lib_chrono) && __cpp_lib_chrono >= 201907
+using weekday = std::chrono::weekday;
+#else
+// A fallback version of weekday.
+class weekday {
+ private:
+  unsigned char value;
+
+ public:
+  weekday() = default;
+  explicit constexpr weekday(unsigned wd) noexcept
+      : value(static_cast<unsigned char>(wd != 7 ? wd : 0)) {}
+  constexpr unsigned c_encoding() const noexcept { return value; }
+};
+
+class year_month_day {};
+#endif
+
+// A rudimentary weekday formatter.
+template <typename Char> struct formatter<weekday, Char> {
+ private:
+  bool localized = false;
+
+ public:
+  FMT_CONSTEXPR auto parse(basic_format_parse_context<Char>& ctx)
+      -> decltype(ctx.begin()) {
+    auto begin = ctx.begin(), end = ctx.end();
+    if (begin != end && *begin == 'L') {
+      ++begin;
+      localized = true;
+    }
+    return begin;
+  }
+
+  template <typename FormatContext>
+  auto format(weekday wd, FormatContext& ctx) const -> decltype(ctx.out()) {
+    auto time = std::tm();
+    time.tm_wday = static_cast<int>(wd.c_encoding());
+    detail::get_locale loc(localized, ctx.locale());
+    auto w = detail::tm_writer<decltype(ctx.out()), Char>(loc, ctx.out(), time);
+    w.on_abbr_weekday();
+    return w.out();
+  }
+};
+
+template <typename Rep, typename Period, typename Char>
+struct formatter<std::chrono::duration<Rep, Period>, Char> {
+ private:
+  basic_format_specs<Char> specs;
+  int precision = -1;
+  using arg_ref_type = detail::arg_ref<Char>;
+  arg_ref_type width_ref;
+  arg_ref_type precision_ref;
+  bool localized = false;
+  basic_string_view<Char> format_str;
+  using duration = std::chrono::duration<Rep, Period>;
+
+  struct spec_handler {
+    formatter& f;
+    basic_format_parse_context<Char>& context;
+    basic_string_view<Char> format_str;
+
+    template <typename Id> FMT_CONSTEXPR arg_ref_type make_arg_ref(Id arg_id) {
+      context.check_arg_id(arg_id);
+      return arg_ref_type(arg_id);
+    }
+
+    FMT_CONSTEXPR arg_ref_type make_arg_ref(basic_string_view<Char> arg_id) {
+      context.check_arg_id(arg_id);
+      return arg_ref_type(arg_id);
+    }
+
+    FMT_CONSTEXPR arg_ref_type make_arg_ref(detail::auto_id) {
+      return arg_ref_type(context.next_arg_id());
+    }
+
+    void on_error(const char* msg) { FMT_THROW(format_error(msg)); }
+    FMT_CONSTEXPR void on_fill(basic_string_view<Char> fill) {
+      f.specs.fill = fill;
+    }
+    FMT_CONSTEXPR void on_align(align_t align) { f.specs.align = align; }
+    FMT_CONSTEXPR void on_width(int width) { f.specs.width = width; }
+    FMT_CONSTEXPR void on_precision(int _precision) {
+      f.precision = _precision;
+    }
+    FMT_CONSTEXPR void end_precision() {}
+
+    template <typename Id> FMT_CONSTEXPR void on_dynamic_width(Id arg_id) {
+      f.width_ref = make_arg_ref(arg_id);
+    }
+
+    template <typename Id> FMT_CONSTEXPR void on_dynamic_precision(Id arg_id) {
+      f.precision_ref = make_arg_ref(arg_id);
+    }
+  };
+
+  using iterator = typename basic_format_parse_context<Char>::iterator;
+  struct parse_range {
+    iterator begin;
+    iterator end;
+  };
+
+  FMT_CONSTEXPR parse_range do_parse(basic_format_parse_context<Char>& ctx) {
+    auto begin = ctx.begin(), end = ctx.end();
+    if (begin == end || *begin == '}') return {begin, begin};
+    spec_handler handler{*this, ctx, format_str};
+    begin = detail::parse_align(begin, end, handler);
+    if (begin == end) return {begin, begin};
+    begin = detail::parse_width(begin, end, handler);
+    if (begin == end) return {begin, begin};
+    if (*begin == '.') {
+      if (std::is_floating_point<Rep>::value)
+        begin = detail::parse_precision(begin, end, handler);
+      else
+        handler.on_error("precision not allowed for this argument type");
+    }
+    if (begin != end && *begin == 'L') {
+      ++begin;
+      localized = true;
+    }
+    end = detail::parse_chrono_format(begin, end,
+                                      detail::chrono_format_checker());
+    return {begin, end};
+  }
+
+ public:
+  FMT_CONSTEXPR auto parse(basic_format_parse_context<Char>& ctx)
+      -> decltype(ctx.begin()) {
+    auto range = do_parse(ctx);
+    format_str = basic_string_view<Char>(
+        &*range.begin, detail::to_unsigned(range.end - range.begin));
+    return range.end;
+  }
+
+  template <typename FormatContext>
+  auto format(const duration& d, FormatContext& ctx) const
+      -> decltype(ctx.out()) {
+    auto specs_copy = specs;
+    auto precision_copy = precision;
+    auto begin = format_str.begin(), end = format_str.end();
+    // As a possible future optimization, we could avoid extra copying if width
+    // is not specified.
+    basic_memory_buffer<Char> buf;
+    auto out = std::back_inserter(buf);
+    detail::handle_dynamic_spec<detail::width_checker>(specs_copy.width,
+                                                       width_ref, ctx);
+    detail::handle_dynamic_spec<detail::precision_checker>(precision_copy,
+                                                           precision_ref, ctx);
+    if (begin == end || *begin == '}') {
+      out = detail::format_duration_value<Char>(out, d.count(), precision_copy);
+      detail::format_duration_unit<Char, Period>(out);
+    } else {
+      detail::chrono_formatter<FormatContext, decltype(out), Rep, Period> f(
+          ctx, out, d);
+      f.precision = precision_copy;
+      f.localized = localized;
+      detail::parse_chrono_format(begin, end, f);
+    }
+    return detail::write(
+        ctx.out(), basic_string_view<Char>(buf.data(), buf.size()), specs_copy);
+  }
+};
+
+template <typename Char, typename Duration>
+struct formatter<std::chrono::time_point<std::chrono::system_clock, Duration>,
+                 Char> : formatter<std::tm, Char> {
+  FMT_CONSTEXPR formatter() {
+    basic_string_view<Char> default_specs =
+        detail::string_literal<Char, '%', 'F', ' ', '%', 'T'>{};
+    this->do_parse(default_specs.begin(), default_specs.end());
+  }
+
+  template <typename FormatContext>
+  auto format(std::chrono::time_point<std::chrono::system_clock> val,
+              FormatContext& ctx) const -> decltype(ctx.out()) {
+    return formatter<std::tm, Char>::format(localtime(val), ctx);
+  }
+};
+
+template <typename Char> struct formatter<std::tm, Char> {
+ private:
+  enum class spec {
+    unknown,
+    year_month_day,
+    hh_mm_ss,
+  };
+  spec spec_ = spec::unknown;
+  basic_string_view<Char> specs;
+
+ protected:
+  template <typename It> FMT_CONSTEXPR auto do_parse(It begin, It end) -> It {
+    if (begin != end && *begin == ':') ++begin;
+    end = detail::parse_chrono_format(begin, end, detail::tm_format_checker());
+    // Replace default spec only if the new spec is not empty.
+    if (end != begin) specs = {begin, detail::to_unsigned(end - begin)};
+    return end;
+  }
+
+ public:
+  FMT_CONSTEXPR auto parse(basic_format_parse_context<Char>& ctx)
+      -> decltype(ctx.begin()) {
+    auto end = this->do_parse(ctx.begin(), ctx.end());
+    // basic_string_view<>::compare isn't constexpr before C++17.
+    if (specs.size() == 2 && specs[0] == Char('%')) {
+      if (specs[1] == Char('F'))
+        spec_ = spec::year_month_day;
+      else if (specs[1] == Char('T'))
+        spec_ = spec::hh_mm_ss;
+    }
+    return end;
+  }
+
+  template <typename FormatContext>
+  auto format(const std::tm& tm, FormatContext& ctx) const
+      -> decltype(ctx.out()) {
+    const auto loc_ref = ctx.locale();
+    detail::get_locale loc(static_cast<bool>(loc_ref), loc_ref);
+    auto w = detail::tm_writer<decltype(ctx.out()), Char>(loc, ctx.out(), tm);
+    if (spec_ == spec::year_month_day)
+      w.on_iso_date();
+    else if (spec_ == spec::hh_mm_ss)
+      w.on_iso_time();
+    else
+      detail::parse_chrono_format(specs.begin(), specs.end(), w);
+    return w.out();
+  }
+};
+
+FMT_MODULE_EXPORT_END
+FMT_END_NAMESPACE
+
+#endif  // FMT_CHRONO_H_
diff --git a/libkram/fmt/color.h b/libkram/fmt/color.h
new file mode 100644
index 00000000..06b90ba1
--- /dev/null
+++ b/libkram/fmt/color.h
@@ -0,0 +1,651 @@
+// Formatting library for C++ - color support
+//
+// Copyright (c) 2018 - present, Victor Zverovich and fmt contributors
+// All rights reserved.
+//
+// For the license information refer to format.h.
+
+#ifndef FMT_COLOR_H_
+#define FMT_COLOR_H_
+
+#include "format.h"
+
+FMT_BEGIN_NAMESPACE
+FMT_MODULE_EXPORT_BEGIN
+
+enum class color : uint32_t {
+  alice_blue = 0xF0F8FF,               // rgb(240,248,255)
+  antique_white = 0xFAEBD7,            // rgb(250,235,215)
+  aqua = 0x00FFFF,                     // rgb(0,255,255)
+  aquamarine = 0x7FFFD4,               // rgb(127,255,212)
+  azure = 0xF0FFFF,                    // rgb(240,255,255)
+  beige = 0xF5F5DC,                    // rgb(245,245,220)
+  bisque = 0xFFE4C4,                   // rgb(255,228,196)
+  black = 0x000000,                    // rgb(0,0,0)
+  blanched_almond = 0xFFEBCD,          // rgb(255,235,205)
+  blue = 0x0000FF,                     // rgb(0,0,255)
+  blue_violet = 0x8A2BE2,              // rgb(138,43,226)
+  brown = 0xA52A2A,                    // rgb(165,42,42)
+  burly_wood = 0xDEB887,               // rgb(222,184,135)
+  cadet_blue = 0x5F9EA0,               // rgb(95,158,160)
+  chartreuse = 0x7FFF00,               // rgb(127,255,0)
+  chocolate = 0xD2691E,                // rgb(210,105,30)
+  coral = 0xFF7F50,                    // rgb(255,127,80)
+  cornflower_blue = 0x6495ED,          // rgb(100,149,237)
+  cornsilk = 0xFFF8DC,                 // rgb(255,248,220)
+  crimson = 0xDC143C,                  // rgb(220,20,60)
+  cyan = 0x00FFFF,                     // rgb(0,255,255)
+  dark_blue = 0x00008B,                // rgb(0,0,139)
+  dark_cyan = 0x008B8B,                // rgb(0,139,139)
+  dark_golden_rod = 0xB8860B,          // rgb(184,134,11)
+  dark_gray = 0xA9A9A9,                // rgb(169,169,169)
+  dark_green = 0x006400,               // rgb(0,100,0)
+  dark_khaki = 0xBDB76B,               // rgb(189,183,107)
+  dark_magenta = 0x8B008B,             // rgb(139,0,139)
+  dark_olive_green = 0x556B2F,         // rgb(85,107,47)
+  dark_orange = 0xFF8C00,              // rgb(255,140,0)
+  dark_orchid = 0x9932CC,              // rgb(153,50,204)
+  dark_red = 0x8B0000,                 // rgb(139,0,0)
+  dark_salmon = 0xE9967A,              // rgb(233,150,122)
+  dark_sea_green = 0x8FBC8F,           // rgb(143,188,143)
+  dark_slate_blue = 0x483D8B,          // rgb(72,61,139)
+  dark_slate_gray = 0x2F4F4F,          // rgb(47,79,79)
+  dark_turquoise = 0x00CED1,           // rgb(0,206,209)
+  dark_violet = 0x9400D3,              // rgb(148,0,211)
+  deep_pink = 0xFF1493,                // rgb(255,20,147)
+  deep_sky_blue = 0x00BFFF,            // rgb(0,191,255)
+  dim_gray = 0x696969,                 // rgb(105,105,105)
+  dodger_blue = 0x1E90FF,              // rgb(30,144,255)
+  fire_brick = 0xB22222,               // rgb(178,34,34)
+  floral_white = 0xFFFAF0,             // rgb(255,250,240)
+  forest_green = 0x228B22,             // rgb(34,139,34)
+  fuchsia = 0xFF00FF,                  // rgb(255,0,255)
+  gainsboro = 0xDCDCDC,                // rgb(220,220,220)
+  ghost_white = 0xF8F8FF,              // rgb(248,248,255)
+  gold = 0xFFD700,                     // rgb(255,215,0)
+  golden_rod = 0xDAA520,               // rgb(218,165,32)
+  gray = 0x808080,                     // rgb(128,128,128)
+  green = 0x008000,                    // rgb(0,128,0)
+  green_yellow = 0xADFF2F,             // rgb(173,255,47)
+  honey_dew = 0xF0FFF0,                // rgb(240,255,240)
+  hot_pink = 0xFF69B4,                 // rgb(255,105,180)
+  indian_red = 0xCD5C5C,               // rgb(205,92,92)
+  indigo = 0x4B0082,                   // rgb(75,0,130)
+  ivory = 0xFFFFF0,                    // rgb(255,255,240)
+  khaki = 0xF0E68C,                    // rgb(240,230,140)
+  lavender = 0xE6E6FA,                 // rgb(230,230,250)
+  lavender_blush = 0xFFF0F5,           // rgb(255,240,245)
+  lawn_green = 0x7CFC00,               // rgb(124,252,0)
+  lemon_chiffon = 0xFFFACD,            // rgb(255,250,205)
+  light_blue = 0xADD8E6,               // rgb(173,216,230)
+  light_coral = 0xF08080,              // rgb(240,128,128)
+  light_cyan = 0xE0FFFF,               // rgb(224,255,255)
+  light_golden_rod_yellow = 0xFAFAD2,  // rgb(250,250,210)
+  light_gray = 0xD3D3D3,               // rgb(211,211,211)
+  light_green = 0x90EE90,              // rgb(144,238,144)
+  light_pink = 0xFFB6C1,               // rgb(255,182,193)
+  light_salmon = 0xFFA07A,             // rgb(255,160,122)
+  light_sea_green = 0x20B2AA,          // rgb(32,178,170)
+  light_sky_blue = 0x87CEFA,           // rgb(135,206,250)
+  light_slate_gray = 0x778899,         // rgb(119,136,153)
+  light_steel_blue = 0xB0C4DE,         // rgb(176,196,222)
+  light_yellow = 0xFFFFE0,             // rgb(255,255,224)
+  lime = 0x00FF00,                     // rgb(0,255,0)
+  lime_green = 0x32CD32,               // rgb(50,205,50)
+  linen = 0xFAF0E6,                    // rgb(250,240,230)
+  magenta = 0xFF00FF,                  // rgb(255,0,255)
+  maroon = 0x800000,                   // rgb(128,0,0)
+  medium_aquamarine = 0x66CDAA,        // rgb(102,205,170)
+  medium_blue = 0x0000CD,              // rgb(0,0,205)
+  medium_orchid = 0xBA55D3,            // rgb(186,85,211)
+  medium_purple = 0x9370DB,            // rgb(147,112,219)
+  medium_sea_green = 0x3CB371,         // rgb(60,179,113)
+  medium_slate_blue = 0x7B68EE,        // rgb(123,104,238)
+  medium_spring_green = 0x00FA9A,      // rgb(0,250,154)
+  medium_turquoise = 0x48D1CC,         // rgb(72,209,204)
+  medium_violet_red = 0xC71585,        // rgb(199,21,133)
+  midnight_blue = 0x191970,            // rgb(25,25,112)
+  mint_cream = 0xF5FFFA,               // rgb(245,255,250)
+  misty_rose = 0xFFE4E1,               // rgb(255,228,225)
+  moccasin = 0xFFE4B5,                 // rgb(255,228,181)
+  navajo_white = 0xFFDEAD,             // rgb(255,222,173)
+  navy = 0x000080,                     // rgb(0,0,128)
+  old_lace = 0xFDF5E6,                 // rgb(253,245,230)
+  olive = 0x808000,                    // rgb(128,128,0)
+  olive_drab = 0x6B8E23,               // rgb(107,142,35)
+  orange = 0xFFA500,                   // rgb(255,165,0)
+  orange_red = 0xFF4500,               // rgb(255,69,0)
+  orchid = 0xDA70D6,                   // rgb(218,112,214)
+  pale_golden_rod = 0xEEE8AA,          // rgb(238,232,170)
+  pale_green = 0x98FB98,               // rgb(152,251,152)
+  pale_turquoise = 0xAFEEEE,           // rgb(175,238,238)
+  pale_violet_red = 0xDB7093,          // rgb(219,112,147)
+  papaya_whip = 0xFFEFD5,              // rgb(255,239,213)
+  peach_puff = 0xFFDAB9,               // rgb(255,218,185)
+  peru = 0xCD853F,                     // rgb(205,133,63)
+  pink = 0xFFC0CB,                     // rgb(255,192,203)
+  plum = 0xDDA0DD,                     // rgb(221,160,221)
+  powder_blue = 0xB0E0E6,              // rgb(176,224,230)
+  purple = 0x800080,                   // rgb(128,0,128)
+  rebecca_purple = 0x663399,           // rgb(102,51,153)
+  red = 0xFF0000,                      // rgb(255,0,0)
+  rosy_brown = 0xBC8F8F,               // rgb(188,143,143)
+  royal_blue = 0x4169E1,               // rgb(65,105,225)
+  saddle_brown = 0x8B4513,             // rgb(139,69,19)
+  salmon = 0xFA8072,                   // rgb(250,128,114)
+  sandy_brown = 0xF4A460,              // rgb(244,164,96)
+  sea_green = 0x2E8B57,                // rgb(46,139,87)
+  sea_shell = 0xFFF5EE,                // rgb(255,245,238)
+  sienna = 0xA0522D,                   // rgb(160,82,45)
+  silver = 0xC0C0C0,                   // rgb(192,192,192)
+  sky_blue = 0x87CEEB,                 // rgb(135,206,235)
+  slate_blue = 0x6A5ACD,               // rgb(106,90,205)
+  slate_gray = 0x708090,               // rgb(112,128,144)
+  snow = 0xFFFAFA,                     // rgb(255,250,250)
+  spring_green = 0x00FF7F,             // rgb(0,255,127)
+  steel_blue = 0x4682B4,               // rgb(70,130,180)
+  tan = 0xD2B48C,                      // rgb(210,180,140)
+  teal = 0x008080,                     // rgb(0,128,128)
+  thistle = 0xD8BFD8,                  // rgb(216,191,216)
+  tomato = 0xFF6347,                   // rgb(255,99,71)
+  turquoise = 0x40E0D0,                // rgb(64,224,208)
+  violet = 0xEE82EE,                   // rgb(238,130,238)
+  wheat = 0xF5DEB3,                    // rgb(245,222,179)
+  white = 0xFFFFFF,                    // rgb(255,255,255)
+  white_smoke = 0xF5F5F5,              // rgb(245,245,245)
+  yellow = 0xFFFF00,                   // rgb(255,255,0)
+  yellow_green = 0x9ACD32              // rgb(154,205,50)
+};                                     // enum class color
+
+enum class terminal_color : uint8_t {
+  black = 30,
+  red,
+  green,
+  yellow,
+  blue,
+  magenta,
+  cyan,
+  white,
+  bright_black = 90,
+  bright_red,
+  bright_green,
+  bright_yellow,
+  bright_blue,
+  bright_magenta,
+  bright_cyan,
+  bright_white
+};
+
+enum class emphasis : uint8_t {
+  bold = 1,
+  faint = 1 << 1,
+  italic = 1 << 2,
+  underline = 1 << 3,
+  blink = 1 << 4,
+  reverse = 1 << 5,
+  conceal = 1 << 6,
+  strikethrough = 1 << 7,
+};
+
+// rgb is a struct for red, green and blue colors.
+// Using the name "rgb" makes some editors show the color in a tooltip.
+struct rgb {
+  FMT_CONSTEXPR rgb() : r(0), g(0), b(0) {}
+  FMT_CONSTEXPR rgb(uint8_t r_, uint8_t g_, uint8_t b_) : r(r_), g(g_), b(b_) {}
+  FMT_CONSTEXPR rgb(uint32_t hex)
+      : r((hex >> 16) & 0xFF), g((hex >> 8) & 0xFF), b(hex & 0xFF) {}
+  FMT_CONSTEXPR rgb(color hex)
+      : r((uint32_t(hex) >> 16) & 0xFF),
+        g((uint32_t(hex) >> 8) & 0xFF),
+        b(uint32_t(hex) & 0xFF) {}
+  uint8_t r;
+  uint8_t g;
+  uint8_t b;
+};
+
+FMT_BEGIN_DETAIL_NAMESPACE
+
+// color is a struct of either a rgb color or a terminal color.
+struct color_type {
+  FMT_CONSTEXPR color_type() noexcept : is_rgb(), value{} {}
+  FMT_CONSTEXPR color_type(color rgb_color) noexcept : is_rgb(true), value{} {
+    value.rgb_color = static_cast<uint32_t>(rgb_color);
+  }
+  FMT_CONSTEXPR color_type(rgb rgb_color) noexcept : is_rgb(true), value{} {
+    value.rgb_color = (static_cast<uint32_t>(rgb_color.r) << 16) |
+                      (static_cast<uint32_t>(rgb_color.g) << 8) | rgb_color.b;
+  }
+  FMT_CONSTEXPR color_type(terminal_color term_color) noexcept
+      : is_rgb(), value{} {
+    value.term_color = static_cast<uint8_t>(term_color);
+  }
+  bool is_rgb;
+  union color_union {
+    uint8_t term_color;
+    uint32_t rgb_color;
+  } value;
+};
+
+FMT_END_DETAIL_NAMESPACE
+
+/** A text style consisting of foreground and background colors and emphasis. */
+class text_style {
+ public:
+  FMT_CONSTEXPR text_style(emphasis em = emphasis()) noexcept
+      : set_foreground_color(), set_background_color(), ems(em) {}
+
+  FMT_CONSTEXPR text_style& operator|=(const text_style& rhs) {
+    if (!set_foreground_color) {
+      set_foreground_color = rhs.set_foreground_color;
+      foreground_color = rhs.foreground_color;
+    } else if (rhs.set_foreground_color) {
+      if (!foreground_color.is_rgb || !rhs.foreground_color.is_rgb)
+        FMT_THROW(format_error("can't OR a terminal color"));
+      foreground_color.value.rgb_color |= rhs.foreground_color.value.rgb_color;
+    }
+
+    if (!set_background_color) {
+      set_background_color = rhs.set_background_color;
+      background_color = rhs.background_color;
+    } else if (rhs.set_background_color) {
+      if (!background_color.is_rgb || !rhs.background_color.is_rgb)
+        FMT_THROW(format_error("can't OR a terminal color"));
+      background_color.value.rgb_color |= rhs.background_color.value.rgb_color;
+    }
+
+    ems = static_cast<emphasis>(static_cast<uint8_t>(ems) |
+                                static_cast<uint8_t>(rhs.ems));
+    return *this;
+  }
+
+  friend FMT_CONSTEXPR text_style operator|(text_style lhs,
+                                            const text_style& rhs) {
+    return lhs |= rhs;
+  }
+
+  FMT_CONSTEXPR bool has_foreground() const noexcept {
+    return set_foreground_color;
+  }
+  FMT_CONSTEXPR bool has_background() const noexcept {
+    return set_background_color;
+  }
+  FMT_CONSTEXPR bool has_emphasis() const noexcept {
+    return static_cast<uint8_t>(ems) != 0;
+  }
+  FMT_CONSTEXPR detail::color_type get_foreground() const noexcept {
+    FMT_ASSERT(has_foreground(), "no foreground specified for this style");
+    return foreground_color;
+  }
+  FMT_CONSTEXPR detail::color_type get_background() const noexcept {
+    FMT_ASSERT(has_background(), "no background specified for this style");
+    return background_color;
+  }
+  FMT_CONSTEXPR emphasis get_emphasis() const noexcept {
+    FMT_ASSERT(has_emphasis(), "no emphasis specified for this style");
+    return ems;
+  }
+
+ private:
+  FMT_CONSTEXPR text_style(bool is_foreground,
+                           detail::color_type text_color) noexcept
+      : set_foreground_color(), set_background_color(), ems() {
+    if (is_foreground) {
+      foreground_color = text_color;
+      set_foreground_color = true;
+    } else {
+      background_color = text_color;
+      set_background_color = true;
+    }
+  }
+
+  friend FMT_CONSTEXPR text_style fg(detail::color_type foreground) noexcept;
+
+  friend FMT_CONSTEXPR text_style bg(detail::color_type background) noexcept;
+
+  detail::color_type foreground_color;
+  detail::color_type background_color;
+  bool set_foreground_color;
+  bool set_background_color;
+  emphasis ems;
+};
+
+/** Creates a text style from the foreground (text) color. */
+FMT_CONSTEXPR inline text_style fg(detail::color_type foreground) noexcept {
+  return text_style(true, foreground);
+}
+
+/** Creates a text style from the background color. */
+FMT_CONSTEXPR inline text_style bg(detail::color_type background) noexcept {
+  return text_style(false, background);
+}
+
+FMT_CONSTEXPR inline text_style operator|(emphasis lhs, emphasis rhs) noexcept {
+  return text_style(lhs) | rhs;
+}
+
+FMT_BEGIN_DETAIL_NAMESPACE
+
+template <typename Char> struct ansi_color_escape {
+  FMT_CONSTEXPR ansi_color_escape(detail::color_type text_color,
+                                  const char* esc) noexcept {
+    // If we have a terminal color, we need to output another escape code
+    // sequence.
+    if (!text_color.is_rgb) {
+      bool is_background = esc == string_view("\x1b[48;2;");
+      uint32_t value = text_color.value.term_color;
+      // Background ASCII codes are the same as the foreground ones but with
+      // 10 more.
+      if (is_background) value += 10u;
+
+      size_t index = 0;
+      buffer[index++] = static_cast<Char>('\x1b');
+      buffer[index++] = static_cast<Char>('[');
+
+      if (value >= 100u) {
+        buffer[index++] = static_cast<Char>('1');
+        value %= 100u;
+      }
+      buffer[index++] = static_cast<Char>('0' + value / 10u);
+      buffer[index++] = static_cast<Char>('0' + value % 10u);
+
+      buffer[index++] = static_cast<Char>('m');
+      buffer[index++] = static_cast<Char>('\0');
+      return;
+    }
+
+    for (int i = 0; i < 7; i++) {
+      buffer[i] = static_cast<Char>(esc[i]);
+    }
+    rgb color(text_color.value.rgb_color);
+    to_esc(color.r, buffer + 7, ';');
+    to_esc(color.g, buffer + 11, ';');
+    to_esc(color.b, buffer + 15, 'm');
+    buffer[19] = static_cast<Char>(0);
+  }
+  FMT_CONSTEXPR ansi_color_escape(emphasis em) noexcept {
+    uint8_t em_codes[num_emphases] = {};
+    if (has_emphasis(em, emphasis::bold)) em_codes[0] = 1;
+    if (has_emphasis(em, emphasis::faint)) em_codes[1] = 2;
+    if (has_emphasis(em, emphasis::italic)) em_codes[2] = 3;
+    if (has_emphasis(em, emphasis::underline)) em_codes[3] = 4;
+    if (has_emphasis(em, emphasis::blink)) em_codes[4] = 5;
+    if (has_emphasis(em, emphasis::reverse)) em_codes[5] = 7;
+    if (has_emphasis(em, emphasis::conceal)) em_codes[6] = 8;
+    if (has_emphasis(em, emphasis::strikethrough)) em_codes[7] = 9;
+
+    size_t index = 0;
+    for (size_t i = 0; i < num_emphases; ++i) {
+      if (!em_codes[i]) continue;
+      buffer[index++] = static_cast<Char>('\x1b');
+      buffer[index++] = static_cast<Char>('[');
+      buffer[index++] = static_cast<Char>('0' + em_codes[i]);
+      buffer[index++] = static_cast<Char>('m');
+    }
+    buffer[index++] = static_cast<Char>(0);
+  }
+  FMT_CONSTEXPR operator const Char*() const noexcept { return buffer; }
+
+  FMT_CONSTEXPR const Char* begin() const noexcept { return buffer; }
+  FMT_CONSTEXPR_CHAR_TRAITS const Char* end() const noexcept {
+    return buffer + std::char_traits<Char>::length(buffer);
+  }
+
+ private:
+  static constexpr size_t num_emphases = 8;
+  Char buffer[7u + 3u * num_emphases + 1u];
+
+  static FMT_CONSTEXPR void to_esc(uint8_t c, Char* out,
+                                   char delimiter) noexcept {
+    out[0] = static_cast<Char>('0' + c / 100);
+    out[1] = static_cast<Char>('0' + c / 10 % 10);
+    out[2] = static_cast<Char>('0' + c % 10);
+    out[3] = static_cast<Char>(delimiter);
+  }
+  static FMT_CONSTEXPR bool has_emphasis(emphasis em, emphasis mask) noexcept {
+    return static_cast<uint8_t>(em) & static_cast<uint8_t>(mask);
+  }
+};
+
+template <typename Char>
+FMT_CONSTEXPR ansi_color_escape<Char> make_foreground_color(
+    detail::color_type foreground) noexcept {
+  return ansi_color_escape<Char>(foreground, "\x1b[38;2;");
+}
+
+template <typename Char>
+FMT_CONSTEXPR ansi_color_escape<Char> make_background_color(
+    detail::color_type background) noexcept {
+  return ansi_color_escape<Char>(background, "\x1b[48;2;");
+}
+
+template <typename Char>
+FMT_CONSTEXPR ansi_color_escape<Char> make_emphasis(emphasis em) noexcept {
+  return ansi_color_escape<Char>(em);
+}
+
+template <typename Char> inline void fputs(const Char* chars, FILE* stream) {
+  int result = std::fputs(chars, stream);
+  if (result < 0)
+    FMT_THROW(system_error(errno, FMT_STRING("cannot write to file")));
+}
+
+template <> inline void fputs<wchar_t>(const wchar_t* chars, FILE* stream) {
+  int result = std::fputws(chars, stream);
+  if (result < 0)
+    FMT_THROW(system_error(errno, FMT_STRING("cannot write to file")));
+}
+
+template <typename Char> inline void reset_color(FILE* stream) {
+  fputs("\x1b[0m", stream);
+}
+
+template <> inline void reset_color<wchar_t>(FILE* stream) {
+  fputs(L"\x1b[0m", stream);
+}
+
+template <typename Char> inline void reset_color(buffer<Char>& buffer) {
+  auto reset_color = string_view("\x1b[0m");
+  buffer.append(reset_color.begin(), reset_color.end());
+}
+
+template <typename T> struct styled_arg {
+  const T& value;
+  text_style style;
+};
+
+template <typename Char>
+void vformat_to(buffer<Char>& buf, const text_style& ts,
+                basic_string_view<Char> format_str,
+                basic_format_args<buffer_context<type_identity_t<Char>>> args) {
+  bool has_style = false;
+  if (ts.has_emphasis()) {
+    has_style = true;
+    auto emphasis = detail::make_emphasis<Char>(ts.get_emphasis());
+    buf.append(emphasis.begin(), emphasis.end());
+  }
+  if (ts.has_foreground()) {
+    has_style = true;
+    auto foreground = detail::make_foreground_color<Char>(ts.get_foreground());
+    buf.append(foreground.begin(), foreground.end());
+  }
+  if (ts.has_background()) {
+    has_style = true;
+    auto background = detail::make_background_color<Char>(ts.get_background());
+    buf.append(background.begin(), background.end());
+  }
+  detail::vformat_to(buf, format_str, args, {});
+  if (has_style) detail::reset_color<Char>(buf);
+}
+
+FMT_END_DETAIL_NAMESPACE
+
+template <typename S, typename Char = char_t<S>>
+void vprint(std::FILE* f, const text_style& ts, const S& format,
+            basic_format_args<buffer_context<type_identity_t<Char>>> args) {
+  basic_memory_buffer<Char> buf;
+  detail::vformat_to(buf, ts, detail::to_string_view(format), args);
+  if (detail::is_utf8()) {
+    detail::print(f, basic_string_view<Char>(buf.begin(), buf.size()));
+  } else {
+    buf.push_back(Char(0));
+    detail::fputs(buf.data(), f);
+  }
+}
+
+/**
+  \rst
+  Formats a string and prints it to the specified file stream using ANSI
+  escape sequences to specify text formatting.
+
+  **Example**::
+
+    fmt::print(fmt::emphasis::bold | fg(fmt::color::red),
+               "Elapsed time: {0:.2f} seconds", 1.23);
+  \endrst
+ */
+template <typename S, typename... Args,
+          FMT_ENABLE_IF(detail::is_string<S>::value)>
+void print(std::FILE* f, const text_style& ts, const S& format_str,
+           const Args&... args) {
+  vprint(f, ts, format_str,
+         fmt::make_format_args<buffer_context<char_t<S>>>(args...));
+}
+
+/**
+  \rst
+  Formats a string and prints it to stdout using ANSI escape sequences to
+  specify text formatting.
+
+  **Example**::
+
+    fmt::print(fmt::emphasis::bold | fg(fmt::color::red),
+               "Elapsed time: {0:.2f} seconds", 1.23);
+  \endrst
+ */
+template <typename S, typename... Args,
+          FMT_ENABLE_IF(detail::is_string<S>::value)>
+void print(const text_style& ts, const S& format_str, const Args&... args) {
+  return print(stdout, ts, format_str, args...);
+}
+
+template <typename S, typename Char = char_t<S>>
+inline std::basic_string<Char> vformat(
+    const text_style& ts, const S& format_str,
+    basic_format_args<buffer_context<type_identity_t<Char>>> args) {
+  basic_memory_buffer<Char> buf;
+  detail::vformat_to(buf, ts, detail::to_string_view(format_str), args);
+  return fmt::to_string(buf);
+}
+
+/**
+  \rst
+  Formats arguments and returns the result as a string using ANSI
+  escape sequences to specify text formatting.
+
+  **Example**::
+
+    #include <fmt/color.h>
+    std::string message = fmt::format(fmt::emphasis::bold | fg(fmt::color::red),
+                                      "The answer is {}", 42);
+  \endrst
+*/
+template <typename S, typename... Args, typename Char = char_t<S>>
+inline std::basic_string<Char> format(const text_style& ts, const S& format_str,
+                                      const Args&... args) {
+  return fmt::vformat(ts, detail::to_string_view(format_str),
+                      fmt::make_format_args<buffer_context<Char>>(args...));
+}
+
+/**
+  Formats a string with the given text_style and writes the output to ``out``.
+ */
+template <typename OutputIt, typename Char,
+          FMT_ENABLE_IF(detail::is_output_iterator<OutputIt, Char>::value)>
+OutputIt vformat_to(
+    OutputIt out, const text_style& ts, basic_string_view<Char> format_str,
+    basic_format_args<buffer_context<type_identity_t<Char>>> args) {
+  auto&& buf = detail::get_buffer<Char>(out);
+  detail::vformat_to(buf, ts, format_str, args);
+  return detail::get_iterator(buf, out);
+}
+
+/**
+  \rst
+  Formats arguments with the given text_style, writes the result to the output
+  iterator ``out`` and returns the iterator past the end of the output range.
+
+  **Example**::
+
+    std::vector<char> out;
+    fmt::format_to(std::back_inserter(out),
+                   fmt::emphasis::bold | fg(fmt::color::red), "{}", 42);
+  \endrst
+*/
+template <typename OutputIt, typename S, typename... Args,
+          bool enable = detail::is_output_iterator<OutputIt, char_t<S>>::value&&
+              detail::is_string<S>::value>
+inline auto format_to(OutputIt out, const text_style& ts, const S& format_str,
+                      Args&&... args) ->
+    typename std::enable_if<enable, OutputIt>::type {
+  return vformat_to(out, ts, detail::to_string_view(format_str),
+                    fmt::make_format_args<buffer_context<char_t<S>>>(args...));
+}
+
+template <typename T, typename Char>
+struct formatter<detail::styled_arg<T>, Char> : formatter<T, Char> {
+  template <typename FormatContext>
+  auto format(const detail::styled_arg<T>& arg, FormatContext& ctx) const
+      -> decltype(ctx.out()) {
+    const auto& ts = arg.style;
+    const auto& value = arg.value;
+    auto out = ctx.out();
+
+    bool has_style = false;
+    if (ts.has_emphasis()) {
+      has_style = true;
+      auto emphasis = detail::make_emphasis<Char>(ts.get_emphasis());
+      out = std::copy(emphasis.begin(), emphasis.end(), out);
+    }
+    if (ts.has_foreground()) {
+      has_style = true;
+      auto foreground =
+          detail::make_foreground_color<Char>(ts.get_foreground());
+      out = std::copy(foreground.begin(), foreground.end(), out);
+    }
+    if (ts.has_background()) {
+      has_style = true;
+      auto background =
+          detail::make_background_color<Char>(ts.get_background());
+      out = std::copy(background.begin(), background.end(), out);
+    }
+    out = formatter<T, Char>::format(value, ctx);
+    if (has_style) {
+      auto reset_color = string_view("\x1b[0m");
+      out = std::copy(reset_color.begin(), reset_color.end(), out);
+    }
+    return out;
+  }
+};
+
+/**
+  \rst
+  Returns an argument that will be formatted using ANSI escape sequences,
+  to be used in a formatting function.
+
+  **Example**::
+
+    fmt::print("Elapsed time: {0:.2f} seconds",
+               fmt::styled(1.23, fmt::fg(fmt::color::green) |
+                                 fmt::bg(fmt::color::blue)));
+  \endrst
+ */
+template <typename T>
+FMT_CONSTEXPR auto styled(const T& value, text_style ts)
+    -> detail::styled_arg<remove_cvref_t<T>> {
+  return detail::styled_arg<remove_cvref_t<T>>{value, ts};
+}
+
+FMT_MODULE_EXPORT_END
+FMT_END_NAMESPACE
+
+#endif  // FMT_COLOR_H_
diff --git a/libkram/fmt/compile.h b/libkram/fmt/compile.h
new file mode 100644
index 00000000..933668c4
--- /dev/null
+++ b/libkram/fmt/compile.h
@@ -0,0 +1,611 @@
+// Formatting library for C++ - experimental format string compilation
+//
+// Copyright (c) 2012 - present, Victor Zverovich and fmt contributors
+// All rights reserved.
+//
+// For the license information refer to format.h.
+
+#ifndef FMT_COMPILE_H_
+#define FMT_COMPILE_H_
+
+#include "format.h"
+
+FMT_BEGIN_NAMESPACE
+namespace detail {
+
+template <typename Char, typename InputIt>
+FMT_CONSTEXPR inline counting_iterator copy_str(InputIt begin, InputIt end,
+                                                counting_iterator it) {
+  return it + (end - begin);
+}
+
+template <typename OutputIt> class truncating_iterator_base {
+ protected:
+  OutputIt out_;
+  size_t limit_;
+  size_t count_ = 0;
+
+  truncating_iterator_base() : out_(), limit_(0) {}
+
+  truncating_iterator_base(OutputIt out, size_t limit)
+      : out_(out), limit_(limit) {}
+
+ public:
+  using iterator_category = std::output_iterator_tag;
+  using value_type = typename std::iterator_traits<OutputIt>::value_type;
+  using difference_type = std::ptrdiff_t;
+  using pointer = void;
+  using reference = void;
+  FMT_UNCHECKED_ITERATOR(truncating_iterator_base);
+
+  OutputIt base() const { return out_; }
+  size_t count() const { return count_; }
+};
+
+// An output iterator that truncates the output and counts the number of objects
+// written to it.
+template <typename OutputIt,
+          typename Enable = typename std::is_void<
+              typename std::iterator_traits<OutputIt>::value_type>::type>
+class truncating_iterator;
+
+template <typename OutputIt>
+class truncating_iterator<OutputIt, std::false_type>
+    : public truncating_iterator_base<OutputIt> {
+  mutable typename truncating_iterator_base<OutputIt>::value_type blackhole_;
+
+ public:
+  using value_type = typename truncating_iterator_base<OutputIt>::value_type;
+
+  truncating_iterator() = default;
+
+  truncating_iterator(OutputIt out, size_t limit)
+      : truncating_iterator_base<OutputIt>(out, limit) {}
+
+  truncating_iterator& operator++() {
+    if (this->count_++ < this->limit_) ++this->out_;
+    return *this;
+  }
+
+  truncating_iterator operator++(int) {
+    auto it = *this;
+    ++*this;
+    return it;
+  }
+
+  value_type& operator*() const {
+    return this->count_ < this->limit_ ? *this->out_ : blackhole_;
+  }
+};
+
+template <typename OutputIt>
+class truncating_iterator<OutputIt, std::true_type>
+    : public truncating_iterator_base<OutputIt> {
+ public:
+  truncating_iterator() = default;
+
+  truncating_iterator(OutputIt out, size_t limit)
+      : truncating_iterator_base<OutputIt>(out, limit) {}
+
+  template <typename T> truncating_iterator& operator=(T val) {
+    if (this->count_++ < this->limit_) *this->out_++ = val;
+    return *this;
+  }
+
+  truncating_iterator& operator++() { return *this; }
+  truncating_iterator& operator++(int) { return *this; }
+  truncating_iterator& operator*() { return *this; }
+};
+
+// A compile-time string which is compiled into fast formatting code.
+class compiled_string {};
+
+template <typename S>
+struct is_compiled_string : std::is_base_of<compiled_string, S> {};
+
+/**
+  \rst
+  Converts a string literal *s* into a format string that will be parsed at
+  compile time and converted into efficient formatting code. Requires C++17
+  ``constexpr if`` compiler support.
+
+  **Example**::
+
+    // Converts 42 into std::string using the most efficient method and no
+    // runtime format string processing.
+    std::string s = fmt::format(FMT_COMPILE("{}"), 42);
+  \endrst
+ */
+#if defined(__cpp_if_constexpr) && defined(__cpp_return_type_deduction)
+#  define FMT_COMPILE(s) \
+    FMT_STRING_IMPL(s, fmt::detail::compiled_string, explicit)
+#else
+#  define FMT_COMPILE(s) FMT_STRING(s)
+#endif
+
+#if FMT_USE_NONTYPE_TEMPLATE_ARGS
+template <typename Char, size_t N,
+          fmt::detail_exported::fixed_string<Char, N> Str>
+struct udl_compiled_string : compiled_string {
+  using char_type = Char;
+  explicit constexpr operator basic_string_view<char_type>() const {
+    return {Str.data, N - 1};
+  }
+};
+#endif
+
+template <typename T, typename... Tail>
+const T& first(const T& value, const Tail&...) {
+  return value;
+}
+
+#if defined(__cpp_if_constexpr) && defined(__cpp_return_type_deduction)
+template <typename... Args> struct type_list {};
+
+// Returns a reference to the argument at index N from [first, rest...].
+template <int N, typename T, typename... Args>
+constexpr const auto& get([[maybe_unused]] const T& first,
+                          [[maybe_unused]] const Args&... rest) {
+  static_assert(N < 1 + sizeof...(Args), "index is out of bounds");
+  if constexpr (N == 0)
+    return first;
+  else
+    return detail::get<N - 1>(rest...);
+}
+
+template <typename Char, typename... Args>
+constexpr int get_arg_index_by_name(basic_string_view<Char> name,
+                                    type_list<Args...>) {
+  return get_arg_index_by_name<Args...>(name);
+}
+
+template <int N, typename> struct get_type_impl;
+
+template <int N, typename... Args> struct get_type_impl<N, type_list<Args...>> {
+  using type =
+      remove_cvref_t<decltype(detail::get<N>(std::declval<Args>()...))>;
+};
+
+template <int N, typename T>
+using get_type = typename get_type_impl<N, T>::type;
+
+template <typename T> struct is_compiled_format : std::false_type {};
+
+template <typename Char> struct text {
+  basic_string_view<Char> data;
+  using char_type = Char;
+
+  template <typename OutputIt, typename... Args>
+  constexpr OutputIt format(OutputIt out, const Args&...) const {
+    return write<Char>(out, data);
+  }
+};
+
+template <typename Char>
+struct is_compiled_format<text<Char>> : std::true_type {};
+
+template <typename Char>
+constexpr text<Char> make_text(basic_string_view<Char> s, size_t pos,
+                               size_t size) {
+  return {{&s[pos], size}};
+}
+
+template <typename Char> struct code_unit {
+  Char value;
+  using char_type = Char;
+
+  template <typename OutputIt, typename... Args>
+  constexpr OutputIt format(OutputIt out, const Args&...) const {
+    return write<Char>(out, value);
+  }
+};
+
+// This ensures that the argument type is convertible to `const T&`.
+template <typename T, int N, typename... Args>
+constexpr const T& get_arg_checked(const Args&... args) {
+  const auto& arg = detail::get<N>(args...);
+  if constexpr (detail::is_named_arg<remove_cvref_t<decltype(arg)>>()) {
+    return arg.value;
+  } else {
+    return arg;
+  }
+}
+
+template <typename Char>
+struct is_compiled_format<code_unit<Char>> : std::true_type {};
+
+// A replacement field that refers to argument N.
+template <typename Char, typename T, int N> struct field {
+  using char_type = Char;
+
+  template <typename OutputIt, typename... Args>
+  constexpr OutputIt format(OutputIt out, const Args&... args) const {
+    return write<Char>(out, get_arg_checked<T, N>(args...));
+  }
+};
+
+template <typename Char, typename T, int N>
+struct is_compiled_format<field<Char, T, N>> : std::true_type {};
+
+// A replacement field that refers to argument with name.
+template <typename Char> struct runtime_named_field {
+  using char_type = Char;
+  basic_string_view<Char> name;
+
+  template <typename OutputIt, typename T>
+  constexpr static bool try_format_argument(
+      OutputIt& out,
+      // [[maybe_unused]] due to unused-but-set-parameter warning in GCC 7,8,9
+      [[maybe_unused]] basic_string_view<Char> arg_name, const T& arg) {
+    if constexpr (is_named_arg<typename std::remove_cv<T>::type>::value) {
+      if (arg_name == arg.name) {
+        out = write<Char>(out, arg.value);
+        return true;
+      }
+    }
+    return false;
+  }
+
+  template <typename OutputIt, typename... Args>
+  constexpr OutputIt format(OutputIt out, const Args&... args) const {
+    bool found = (try_format_argument(out, name, args) || ...);
+    if (!found) {
+      FMT_THROW(format_error("argument with specified name is not found"));
+    }
+    return out;
+  }
+};
+
+template <typename Char>
+struct is_compiled_format<runtime_named_field<Char>> : std::true_type {};
+
+// A replacement field that refers to argument N and has format specifiers.
+template <typename Char, typename T, int N> struct spec_field {
+  using char_type = Char;
+  formatter<T, Char> fmt;
+
+  template <typename OutputIt, typename... Args>
+  constexpr FMT_INLINE OutputIt format(OutputIt out,
+                                       const Args&... args) const {
+    const auto& vargs =
+        fmt::make_format_args<basic_format_context<OutputIt, Char>>(args...);
+    basic_format_context<OutputIt, Char> ctx(out, vargs);
+    return fmt.format(get_arg_checked<T, N>(args...), ctx);
+  }
+};
+
+template <typename Char, typename T, int N>
+struct is_compiled_format<spec_field<Char, T, N>> : std::true_type {};
+
+template <typename L, typename R> struct concat {
+  L lhs;
+  R rhs;
+  using char_type = typename L::char_type;
+
+  template <typename OutputIt, typename... Args>
+  constexpr OutputIt format(OutputIt out, const Args&... args) const {
+    out = lhs.format(out, args...);
+    return rhs.format(out, args...);
+  }
+};
+
+template <typename L, typename R>
+struct is_compiled_format<concat<L, R>> : std::true_type {};
+
+template <typename L, typename R>
+constexpr concat<L, R> make_concat(L lhs, R rhs) {
+  return {lhs, rhs};
+}
+
+struct unknown_format {};
+
+template <typename Char>
+constexpr size_t parse_text(basic_string_view<Char> str, size_t pos) {
+  for (size_t size = str.size(); pos != size; ++pos) {
+    if (str[pos] == '{' || str[pos] == '}') break;
+  }
+  return pos;
+}
+
+template <typename Args, size_t POS, int ID, typename S>
+constexpr auto compile_format_string(S format_str);
+
+template <typename Args, size_t POS, int ID, typename T, typename S>
+constexpr auto parse_tail(T head, S format_str) {
+  if constexpr (POS !=
+                basic_string_view<typename S::char_type>(format_str).size()) {
+    constexpr auto tail = compile_format_string<Args, POS, ID>(format_str);
+    if constexpr (std::is_same<remove_cvref_t<decltype(tail)>,
+                               unknown_format>())
+      return tail;
+    else
+      return make_concat(head, tail);
+  } else {
+    return head;
+  }
+}
+
+template <typename T, typename Char> struct parse_specs_result {
+  formatter<T, Char> fmt;
+  size_t end;
+  int next_arg_id;
+};
+
+constexpr int manual_indexing_id = -1;
+
+template <typename T, typename Char>
+constexpr parse_specs_result<T, Char> parse_specs(basic_string_view<Char> str,
+                                                  size_t pos, int next_arg_id) {
+  str.remove_prefix(pos);
+  auto ctx = compile_parse_context<Char>(str, max_value<int>(), nullptr, {},
+                                         next_arg_id);
+  auto f = formatter<T, Char>();
+  auto end = f.parse(ctx);
+  return {f, pos + fmt::detail::to_unsigned(end - str.data()),
+          next_arg_id == 0 ? manual_indexing_id : ctx.next_arg_id()};
+}
+
+template <typename Char> struct arg_id_handler {
+  arg_ref<Char> arg_id;
+
+  constexpr int operator()() {
+    FMT_ASSERT(false, "handler cannot be used with automatic indexing");
+    return 0;
+  }
+  constexpr int operator()(int id) {
+    arg_id = arg_ref<Char>(id);
+    return 0;
+  }
+  constexpr int operator()(basic_string_view<Char> id) {
+    arg_id = arg_ref<Char>(id);
+    return 0;
+  }
+
+  constexpr void on_error(const char* message) {
+    FMT_THROW(format_error(message));
+  }
+};
+
+template <typename Char> struct parse_arg_id_result {
+  arg_ref<Char> arg_id;
+  const Char* arg_id_end;
+};
+
+template <int ID, typename Char>
+constexpr auto parse_arg_id(const Char* begin, const Char* end) {
+  auto handler = arg_id_handler<Char>{arg_ref<Char>{}};
+  auto arg_id_end = parse_arg_id(begin, end, handler);
+  return parse_arg_id_result<Char>{handler.arg_id, arg_id_end};
+}
+
+template <typename T, typename Enable = void> struct field_type {
+  using type = remove_cvref_t<T>;
+};
+
+template <typename T>
+struct field_type<T, enable_if_t<detail::is_named_arg<T>::value>> {
+  using type = remove_cvref_t<decltype(T::value)>;
+};
+
+template <typename T, typename Args, size_t END_POS, int ARG_INDEX, int NEXT_ID,
+          typename S>
+constexpr auto parse_replacement_field_then_tail(S format_str) {
+  using char_type = typename S::char_type;
+  constexpr auto str = basic_string_view<char_type>(format_str);
+  constexpr char_type c = END_POS != str.size() ? str[END_POS] : char_type();
+  if constexpr (c == '}') {
+    return parse_tail<Args, END_POS + 1, NEXT_ID>(
+        field<char_type, typename field_type<T>::type, ARG_INDEX>(),
+        format_str);
+  } else if constexpr (c != ':') {
+    FMT_THROW(format_error("expected ':'"));
+  } else {
+    constexpr auto result = parse_specs<typename field_type<T>::type>(
+        str, END_POS + 1, NEXT_ID == manual_indexing_id ? 0 : NEXT_ID);
+    if constexpr (result.end >= str.size() || str[result.end] != '}') {
+      FMT_THROW(format_error("expected '}'"));
+      return 0;
+    } else {
+      return parse_tail<Args, result.end + 1, result.next_arg_id>(
+          spec_field<char_type, typename field_type<T>::type, ARG_INDEX>{
+              result.fmt},
+          format_str);
+    }
+  }
+}
+
+// Compiles a non-empty format string and returns the compiled representation
+// or unknown_format() on unrecognized input.
+template <typename Args, size_t POS, int ID, typename S>
+constexpr auto compile_format_string(S format_str) {
+  using char_type = typename S::char_type;
+  constexpr auto str = basic_string_view<char_type>(format_str);
+  if constexpr (str[POS] == '{') {
+    if constexpr (POS + 1 == str.size())
+      FMT_THROW(format_error("unmatched '{' in format string"));
+    if constexpr (str[POS + 1] == '{') {
+      return parse_tail<Args, POS + 2, ID>(make_text(str, POS, 1), format_str);
+    } else if constexpr (str[POS + 1] == '}' || str[POS + 1] == ':') {
+      static_assert(ID != manual_indexing_id,
+                    "cannot switch from manual to automatic argument indexing");
+      constexpr auto next_id =
+          ID != manual_indexing_id ? ID + 1 : manual_indexing_id;
+      return parse_replacement_field_then_tail<get_type<ID, Args>, Args,
+                                               POS + 1, ID, next_id>(
+          format_str);
+    } else {
+      constexpr auto arg_id_result =
+          parse_arg_id<ID>(str.data() + POS + 1, str.data() + str.size());
+      constexpr auto arg_id_end_pos = arg_id_result.arg_id_end - str.data();
+      constexpr char_type c =
+          arg_id_end_pos != str.size() ? str[arg_id_end_pos] : char_type();
+      static_assert(c == '}' || c == ':', "missing '}' in format string");
+      if constexpr (arg_id_result.arg_id.kind == arg_id_kind::index) {
+        static_assert(
+            ID == manual_indexing_id || ID == 0,
+            "cannot switch from automatic to manual argument indexing");
+        constexpr auto arg_index = arg_id_result.arg_id.val.index;
+        return parse_replacement_field_then_tail<get_type<arg_index, Args>,
+                                                 Args, arg_id_end_pos,
+                                                 arg_index, manual_indexing_id>(
+            format_str);
+      } else if constexpr (arg_id_result.arg_id.kind == arg_id_kind::name) {
+        constexpr auto arg_index =
+            get_arg_index_by_name(arg_id_result.arg_id.val.name, Args{});
+        if constexpr (arg_index != invalid_arg_index) {
+          constexpr auto next_id =
+              ID != manual_indexing_id ? ID + 1 : manual_indexing_id;
+          return parse_replacement_field_then_tail<
+              decltype(get_type<arg_index, Args>::value), Args, arg_id_end_pos,
+              arg_index, next_id>(format_str);
+        } else {
+          if constexpr (c == '}') {
+            return parse_tail<Args, arg_id_end_pos + 1, ID>(
+                runtime_named_field<char_type>{arg_id_result.arg_id.val.name},
+                format_str);
+          } else if constexpr (c == ':') {
+            return unknown_format();  // no type info for specs parsing
+          }
+        }
+      }
+    }
+  } else if constexpr (str[POS] == '}') {
+    if constexpr (POS + 1 == str.size())
+      FMT_THROW(format_error("unmatched '}' in format string"));
+    return parse_tail<Args, POS + 2, ID>(make_text(str, POS, 1), format_str);
+  } else {
+    constexpr auto end = parse_text(str, POS + 1);
+    if constexpr (end - POS > 1) {
+      return parse_tail<Args, end, ID>(make_text(str, POS, end - POS),
+                                       format_str);
+    } else {
+      return parse_tail<Args, end, ID>(code_unit<char_type>{str[POS]},
+                                       format_str);
+    }
+  }
+}
+
+template <typename... Args, typename S,
+          FMT_ENABLE_IF(detail::is_compiled_string<S>::value)>
+constexpr auto compile(S format_str) {
+  constexpr auto str = basic_string_view<typename S::char_type>(format_str);
+  if constexpr (str.size() == 0) {
+    return detail::make_text(str, 0, 0);
+  } else {
+    constexpr auto result =
+        detail::compile_format_string<detail::type_list<Args...>, 0, 0>(
+            format_str);
+    return result;
+  }
+}
+#endif  // defined(__cpp_if_constexpr) && defined(__cpp_return_type_deduction)
+}  // namespace detail
+
+FMT_MODULE_EXPORT_BEGIN
+
+#if defined(__cpp_if_constexpr) && defined(__cpp_return_type_deduction)
+
+template <typename CompiledFormat, typename... Args,
+          typename Char = typename CompiledFormat::char_type,
+          FMT_ENABLE_IF(detail::is_compiled_format<CompiledFormat>::value)>
+FMT_INLINE std::basic_string<Char> format(const CompiledFormat& cf,
+                                          const Args&... args) {
+  auto s = std::basic_string<Char>();
+  cf.format(std::back_inserter(s), args...);
+  return s;
+}
+
+template <typename OutputIt, typename CompiledFormat, typename... Args,
+          FMT_ENABLE_IF(detail::is_compiled_format<CompiledFormat>::value)>
+constexpr FMT_INLINE OutputIt format_to(OutputIt out, const CompiledFormat& cf,
+                                        const Args&... args) {
+  return cf.format(out, args...);
+}
+
+template <typename S, typename... Args,
+          FMT_ENABLE_IF(detail::is_compiled_string<S>::value)>
+FMT_INLINE std::basic_string<typename S::char_type> format(const S&,
+                                                           Args&&... args) {
+  if constexpr (std::is_same<typename S::char_type, char>::value) {
+    constexpr auto str = basic_string_view<typename S::char_type>(S());
+    if constexpr (str.size() == 2 && str[0] == '{' && str[1] == '}') {
+      const auto& first = detail::first(args...);
+      if constexpr (detail::is_named_arg<
+                        remove_cvref_t<decltype(first)>>::value) {
+        return fmt::to_string(first.value);
+      } else {
+        return fmt::to_string(first);
+      }
+    }
+  }
+  constexpr auto compiled = detail::compile<Args...>(S());
+  if constexpr (std::is_same<remove_cvref_t<decltype(compiled)>,
+                             detail::unknown_format>()) {
+    return fmt::format(
+        static_cast<basic_string_view<typename S::char_type>>(S()),
+        std::forward<Args>(args)...);
+  } else {
+    return fmt::format(compiled, std::forward<Args>(args)...);
+  }
+}
+
+template <typename OutputIt, typename S, typename... Args,
+          FMT_ENABLE_IF(detail::is_compiled_string<S>::value)>
+FMT_CONSTEXPR OutputIt format_to(OutputIt out, const S&, Args&&... args) {
+  constexpr auto compiled = detail::compile<Args...>(S());
+  if constexpr (std::is_same<remove_cvref_t<decltype(compiled)>,
+                             detail::unknown_format>()) {
+    return fmt::format_to(
+        out, static_cast<basic_string_view<typename S::char_type>>(S()),
+        std::forward<Args>(args)...);
+  } else {
+    return fmt::format_to(out, compiled, std::forward<Args>(args)...);
+  }
+}
+#endif
+
+template <typename OutputIt, typename S, typename... Args,
+          FMT_ENABLE_IF(detail::is_compiled_string<S>::value)>
+format_to_n_result<OutputIt> format_to_n(OutputIt out, size_t n,
+                                         const S& format_str, Args&&... args) {
+  auto it = fmt::format_to(detail::truncating_iterator<OutputIt>(out, n),
+                           format_str, std::forward<Args>(args)...);
+  return {it.base(), it.count()};
+}
+
+template <typename S, typename... Args,
+          FMT_ENABLE_IF(detail::is_compiled_string<S>::value)>
+FMT_CONSTEXPR20 size_t formatted_size(const S& format_str,
+                                      const Args&... args) {
+  return fmt::format_to(detail::counting_iterator(), format_str, args...)
+      .count();
+}
+
+template <typename S, typename... Args,
+          FMT_ENABLE_IF(detail::is_compiled_string<S>::value)>
+void print(std::FILE* f, const S& format_str, const Args&... args) {
+  memory_buffer buffer;
+  fmt::format_to(std::back_inserter(buffer), format_str, args...);
+  detail::print(f, {buffer.data(), buffer.size()});
+}
+
+template <typename S, typename... Args,
+          FMT_ENABLE_IF(detail::is_compiled_string<S>::value)>
+void print(const S& format_str, const Args&... args) {
+  print(stdout, format_str, args...);
+}
+
+#if FMT_USE_NONTYPE_TEMPLATE_ARGS
+inline namespace literals {
+template <detail_exported::fixed_string Str> constexpr auto operator""_cf() {
+  using char_t = remove_cvref_t<decltype(Str.data[0])>;
+  return detail::udl_compiled_string<char_t, sizeof(Str.data) / sizeof(char_t),
+                                     Str>();
+}
+}  // namespace literals
+#endif
+
+FMT_MODULE_EXPORT_END
+FMT_END_NAMESPACE
+
+#endif  // FMT_COMPILE_H_
diff --git a/libkram/fmt/core.h b/libkram/fmt/core.h
new file mode 100644
index 00000000..549f948f
--- /dev/null
+++ b/libkram/fmt/core.h
@@ -0,0 +1,3338 @@
+// Formatting library for C++ - the core API for char/UTF-8
+//
+// Copyright (c) 2012 - present, Victor Zverovich
+// All rights reserved.
+//
+// For the license information refer to format.h.
+
+#ifndef FMT_CORE_H_
+#define FMT_CORE_H_
+
+#include <cstddef>  // std::byte
+#include <cstdio>   // std::FILE
+#include <cstring>  // std::strlen
+#include <iterator>
+#include <limits>
+#include <string>
+#include <type_traits>
+
+// The fmt library version in the form major * 10000 + minor * 100 + patch.
+#define FMT_VERSION 90101
+
+#if defined(__clang__) && !defined(__ibmxl__)
+#  define FMT_CLANG_VERSION (__clang_major__ * 100 + __clang_minor__)
+#else
+#  define FMT_CLANG_VERSION 0
+#endif
+
+#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER) && \
+    !defined(__NVCOMPILER)
+#  define FMT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#else
+#  define FMT_GCC_VERSION 0
+#endif
+
+#ifndef FMT_GCC_PRAGMA
+// Workaround _Pragma bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59884.
+#  if FMT_GCC_VERSION >= 504
+#    define FMT_GCC_PRAGMA(arg) _Pragma(arg)
+#  else
+#    define FMT_GCC_PRAGMA(arg)
+#  endif
+#endif
+
+#ifdef __ICL
+#  define FMT_ICC_VERSION __ICL
+#elif defined(__INTEL_COMPILER)
+#  define FMT_ICC_VERSION __INTEL_COMPILER
+#else
+#  define FMT_ICC_VERSION 0
+#endif
+
+#ifdef _MSC_VER
+#  define FMT_MSC_VERSION _MSC_VER
+#  define FMT_MSC_WARNING(...) __pragma(warning(__VA_ARGS__))
+#else
+#  define FMT_MSC_VERSION 0
+#  define FMT_MSC_WARNING(...)
+#endif
+
+#ifdef _MSVC_LANG
+#  define FMT_CPLUSPLUS _MSVC_LANG
+#else
+#  define FMT_CPLUSPLUS __cplusplus
+#endif
+
+#ifdef __has_feature
+#  define FMT_HAS_FEATURE(x) __has_feature(x)
+#else
+#  define FMT_HAS_FEATURE(x) 0
+#endif
+
+#if defined(__has_include) || FMT_ICC_VERSION >= 1600 || FMT_MSC_VERSION > 1900
+#  define FMT_HAS_INCLUDE(x) __has_include(x)
+#else
+#  define FMT_HAS_INCLUDE(x) 0
+#endif
+
+#ifdef __has_cpp_attribute
+#  define FMT_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
+#else
+#  define FMT_HAS_CPP_ATTRIBUTE(x) 0
+#endif
+
+#define FMT_HAS_CPP14_ATTRIBUTE(attribute) \
+  (FMT_CPLUSPLUS >= 201402L && FMT_HAS_CPP_ATTRIBUTE(attribute))
+
+#define FMT_HAS_CPP17_ATTRIBUTE(attribute) \
+  (FMT_CPLUSPLUS >= 201703L && FMT_HAS_CPP_ATTRIBUTE(attribute))
+
+// Check if relaxed C++14 constexpr is supported.
+// GCC doesn't allow throw in constexpr until version 6 (bug 67371).
+#ifndef FMT_USE_CONSTEXPR
+#  if (FMT_HAS_FEATURE(cxx_relaxed_constexpr) || FMT_MSC_VERSION >= 1912 || \
+       (FMT_GCC_VERSION >= 600 && FMT_CPLUSPLUS >= 201402L)) &&             \
+      !FMT_ICC_VERSION && !defined(__NVCC__)
+#    define FMT_USE_CONSTEXPR 1
+#  else
+#    define FMT_USE_CONSTEXPR 0
+#  endif
+#endif
+#if FMT_USE_CONSTEXPR
+#  define FMT_CONSTEXPR constexpr
+#else
+#  define FMT_CONSTEXPR
+#endif
+
+#if ((FMT_CPLUSPLUS >= 202002L) &&                            \
+     (!defined(_GLIBCXX_RELEASE) || _GLIBCXX_RELEASE > 9)) || \
+    (FMT_CPLUSPLUS >= 201709L && FMT_GCC_VERSION >= 1002)
+#  define FMT_CONSTEXPR20 constexpr
+#else
+#  define FMT_CONSTEXPR20
+#endif
+
+// Check if constexpr std::char_traits<>::{compare,length} are supported.
+#if defined(__GLIBCXX__)
+#  if FMT_CPLUSPLUS >= 201703L && defined(_GLIBCXX_RELEASE) && \
+      _GLIBCXX_RELEASE >= 7  // GCC 7+ libstdc++ has _GLIBCXX_RELEASE.
+#    define FMT_CONSTEXPR_CHAR_TRAITS constexpr
+#  endif
+#elif defined(_LIBCPP_VERSION) && FMT_CPLUSPLUS >= 201703L && \
+    _LIBCPP_VERSION >= 4000
+#  define FMT_CONSTEXPR_CHAR_TRAITS constexpr
+#elif FMT_MSC_VERSION >= 1914 && FMT_CPLUSPLUS >= 201703L
+#  define FMT_CONSTEXPR_CHAR_TRAITS constexpr
+#endif
+#ifndef FMT_CONSTEXPR_CHAR_TRAITS
+#  define FMT_CONSTEXPR_CHAR_TRAITS
+#endif
+
+// Check if exceptions are disabled.
+#ifndef FMT_EXCEPTIONS
+#  if (defined(__GNUC__) && !defined(__EXCEPTIONS)) || \
+      (FMT_MSC_VERSION && !_HAS_EXCEPTIONS)
+#    define FMT_EXCEPTIONS 0
+#  else
+#    define FMT_EXCEPTIONS 1
+#  endif
+#endif
+
+#ifndef FMT_DEPRECATED
+#  if FMT_HAS_CPP14_ATTRIBUTE(deprecated) || FMT_MSC_VERSION >= 1900
+#    define FMT_DEPRECATED [[deprecated]]
+#  else
+#    if (defined(__GNUC__) && !defined(__LCC__)) || defined(__clang__)
+#      define FMT_DEPRECATED __attribute__((deprecated))
+#    elif FMT_MSC_VERSION
+#      define FMT_DEPRECATED __declspec(deprecated)
+#    else
+#      define FMT_DEPRECATED /* deprecated */
+#    endif
+#  endif
+#endif
+
+// [[noreturn]] is disabled on MSVC and NVCC because of bogus unreachable code
+// warnings.
+#if FMT_EXCEPTIONS && FMT_HAS_CPP_ATTRIBUTE(noreturn) && !FMT_MSC_VERSION && \
+    !defined(__NVCC__)
+#  define FMT_NORETURN [[noreturn]]
+#else
+#  define FMT_NORETURN
+#endif
+
+#if FMT_HAS_CPP17_ATTRIBUTE(fallthrough)
+#  define FMT_FALLTHROUGH [[fallthrough]]
+#elif defined(__clang__)
+#  define FMT_FALLTHROUGH [[clang::fallthrough]]
+#elif FMT_GCC_VERSION >= 700 && \
+    (!defined(__EDG_VERSION__) || __EDG_VERSION__ >= 520)
+#  define FMT_FALLTHROUGH [[gnu::fallthrough]]
+#else
+#  define FMT_FALLTHROUGH
+#endif
+
+#ifndef FMT_NODISCARD
+#  if FMT_HAS_CPP17_ATTRIBUTE(nodiscard)
+#    define FMT_NODISCARD [[nodiscard]]
+#  else
+#    define FMT_NODISCARD
+#  endif
+#endif
+
+#ifndef FMT_USE_FLOAT
+#  define FMT_USE_FLOAT 1
+#endif
+#ifndef FMT_USE_DOUBLE
+#  define FMT_USE_DOUBLE 1
+#endif
+#ifndef FMT_USE_LONG_DOUBLE
+#  define FMT_USE_LONG_DOUBLE 1
+#endif
+
+#ifndef FMT_INLINE
+#  if FMT_GCC_VERSION || FMT_CLANG_VERSION
+#    define FMT_INLINE inline __attribute__((always_inline))
+#  else
+#    define FMT_INLINE inline
+#  endif
+#endif
+
+// An inline std::forward replacement.
+#define FMT_FORWARD(...) static_cast<decltype(__VA_ARGS__)&&>(__VA_ARGS__)
+
+#ifdef _MSC_VER
+#  define FMT_UNCHECKED_ITERATOR(It) \
+    using _Unchecked_type = It  // Mark iterator as checked.
+#else
+#  define FMT_UNCHECKED_ITERATOR(It) using unchecked_type = It
+#endif
+
+#ifndef FMT_BEGIN_NAMESPACE
+#  define FMT_BEGIN_NAMESPACE \
+    namespace fmt {           \
+    inline namespace v9 {
+#  define FMT_END_NAMESPACE \
+    }                       \
+    }
+#endif
+
+#ifndef FMT_MODULE_EXPORT
+#  define FMT_MODULE_EXPORT
+#  define FMT_MODULE_EXPORT_BEGIN
+#  define FMT_MODULE_EXPORT_END
+#  define FMT_BEGIN_DETAIL_NAMESPACE namespace detail {
+#  define FMT_END_DETAIL_NAMESPACE }
+#endif
+
+#if !defined(FMT_HEADER_ONLY) && defined(_WIN32)
+#  define FMT_CLASS_API FMT_MSC_WARNING(suppress : 4275)
+#  ifdef FMT_EXPORT
+#    define FMT_API __declspec(dllexport)
+#  elif defined(FMT_SHARED)
+#    define FMT_API __declspec(dllimport)
+#  endif
+#else
+#  define FMT_CLASS_API
+#  if defined(FMT_EXPORT) || defined(FMT_SHARED)
+#    if defined(__GNUC__) || defined(__clang__)
+#      define FMT_API __attribute__((visibility("default")))
+#    endif
+#  endif
+#endif
+#ifndef FMT_API
+#  define FMT_API
+#endif
+
+// libc++ supports string_view in pre-c++17.
+#if FMT_HAS_INCLUDE(<string_view>) && \
+    (FMT_CPLUSPLUS >= 201703L || defined(_LIBCPP_VERSION))
+#  include <string_view>
+#  define FMT_USE_STRING_VIEW
+#elif FMT_HAS_INCLUDE("experimental/string_view") && FMT_CPLUSPLUS >= 201402L
+#  include <experimental/string_view>
+#  define FMT_USE_EXPERIMENTAL_STRING_VIEW
+#endif
+
+#ifndef FMT_UNICODE
+#  define FMT_UNICODE !FMT_MSC_VERSION
+#endif
+
+#ifndef FMT_CONSTEVAL
+#  if ((FMT_GCC_VERSION >= 1000 || FMT_CLANG_VERSION >= 1101) &&         \
+       FMT_CPLUSPLUS >= 202002L && !defined(__apple_build_version__)) || \
+      (defined(__cpp_consteval) &&                                       \
+       (!FMT_MSC_VERSION || _MSC_FULL_VER >= 193030704))
+// consteval is broken in MSVC before VS2022 and Apple clang 13.
+#    define FMT_CONSTEVAL consteval
+#    define FMT_HAS_CONSTEVAL
+#  else
+#    define FMT_CONSTEVAL
+#  endif
+#endif
+
+#ifndef FMT_USE_NONTYPE_TEMPLATE_ARGS
+#  if defined(__cpp_nontype_template_args) &&                  \
+      ((FMT_GCC_VERSION >= 903 && FMT_CPLUSPLUS >= 201709L) || \
+       __cpp_nontype_template_args >= 201911L) &&              \
+      !defined(__NVCOMPILER) && !defined(__LCC__)
+#    define FMT_USE_NONTYPE_TEMPLATE_ARGS 1
+#  else
+#    define FMT_USE_NONTYPE_TEMPLATE_ARGS 0
+#  endif
+#endif
+
+// Enable minimal optimizations for more compact code in debug mode.
+FMT_GCC_PRAGMA("GCC push_options")
+#if !defined(__OPTIMIZE__) && !defined(__NVCOMPILER) && !defined(__LCC__)
+FMT_GCC_PRAGMA("GCC optimize(\"Og\")")
+#endif
+
+FMT_BEGIN_NAMESPACE
+FMT_MODULE_EXPORT_BEGIN
+
+// Implementations of enable_if_t and other metafunctions for older systems.
+template <bool B, typename T = void>
+using enable_if_t = typename std::enable_if<B, T>::type;
+template <bool B, typename T, typename F>
+using conditional_t = typename std::conditional<B, T, F>::type;
+template <bool B> using bool_constant = std::integral_constant<bool, B>;
+template <typename T>
+using remove_reference_t = typename std::remove_reference<T>::type;
+template <typename T>
+using remove_const_t = typename std::remove_const<T>::type;
+template <typename T>
+using remove_cvref_t = typename std::remove_cv<remove_reference_t<T>>::type;
+template <typename T> struct type_identity { using type = T; };
+template <typename T> using type_identity_t = typename type_identity<T>::type;
+template <typename T>
+using underlying_t = typename std::underlying_type<T>::type;
+
+template <typename...> struct disjunction : std::false_type {};
+template <typename P> struct disjunction<P> : P {};
+template <typename P1, typename... Pn>
+struct disjunction<P1, Pn...>
+    : conditional_t<bool(P1::value), P1, disjunction<Pn...>> {};
+
+template <typename...> struct conjunction : std::true_type {};
+template <typename P> struct conjunction<P> : P {};
+template <typename P1, typename... Pn>
+struct conjunction<P1, Pn...>
+    : conditional_t<bool(P1::value), conjunction<Pn...>, P1> {};
+
+struct monostate {
+  constexpr monostate() {}
+};
+
+// An enable_if helper to be used in template parameters which results in much
+// shorter symbols: https://godbolt.org/z/sWw4vP. Extra parentheses are needed
+// to workaround a bug in MSVC 2019 (see #1140 and #1186).
+#ifdef FMT_DOC
+#  define FMT_ENABLE_IF(...)
+#else
+#  define FMT_ENABLE_IF(...) fmt::enable_if_t<(__VA_ARGS__), int> = 0
+#endif
+
+FMT_BEGIN_DETAIL_NAMESPACE
+
+// Suppresses "unused variable" warnings with the method described in
+// https://herbsutter.com/2009/10/18/mailbag-shutting-up-compiler-warnings/.
+// (void)var does not work on many Intel compilers.
+template <typename... T> FMT_CONSTEXPR void ignore_unused(const T&...) {}
+
+constexpr FMT_INLINE auto is_constant_evaluated(
+    bool default_value = false) noexcept -> bool {
+#ifdef __cpp_lib_is_constant_evaluated
+  ignore_unused(default_value);
+  return std::is_constant_evaluated();
+#else
+  return default_value;
+#endif
+}
+
+// Suppresses "conditional expression is constant" warnings.
+template <typename T> constexpr FMT_INLINE auto const_check(T value) -> T {
+  return value;
+}
+
+FMT_NORETURN FMT_API void assert_fail(const char* file, int line,
+                                      const char* message);
+
+#ifndef FMT_ASSERT
+#  ifdef NDEBUG
+// FMT_ASSERT is not empty to avoid -Wempty-body.
+#    define FMT_ASSERT(condition, message) \
+      ::fmt::detail::ignore_unused((condition), (message))
+#  else
+#    define FMT_ASSERT(condition, message)                                    \
+      ((condition) /* void() fails with -Winvalid-constexpr on clang 4.0.1 */ \
+           ? (void)0                                                          \
+           : ::fmt::detail::assert_fail(__FILE__, __LINE__, (message)))
+#  endif
+#endif
+
+#if defined(FMT_USE_STRING_VIEW)
+template <typename Char> using std_string_view = std::basic_string_view<Char>;
+#elif defined(FMT_USE_EXPERIMENTAL_STRING_VIEW)
+template <typename Char>
+using std_string_view = std::experimental::basic_string_view<Char>;
+#else
+template <typename T> struct std_string_view {};
+#endif
+
+#ifdef FMT_USE_INT128
+// Do nothing.
+#elif defined(__SIZEOF_INT128__) && !defined(__NVCC__) && \
+    !(FMT_CLANG_VERSION && FMT_MSC_VERSION)
+#  define FMT_USE_INT128 1
+using int128_opt = __int128_t;  // An optional native 128-bit integer.
+using uint128_opt = __uint128_t;
+template <typename T> inline auto convert_for_visit(T value) -> T {
+  return value;
+}
+#else
+#  define FMT_USE_INT128 0
+#endif
+#if !FMT_USE_INT128
+enum class int128_opt {};
+enum class uint128_opt {};
+// Reduce template instantiations.
+template <typename T> auto convert_for_visit(T) -> monostate { return {}; }
+#endif
+
+// Casts a nonnegative integer to unsigned.
+template <typename Int>
+FMT_CONSTEXPR auto to_unsigned(Int value) ->
+    typename std::make_unsigned<Int>::type {
+  FMT_ASSERT(std::is_unsigned<Int>::value || value >= 0, "negative value");
+  return static_cast<typename std::make_unsigned<Int>::type>(value);
+}
+
+FMT_MSC_WARNING(suppress : 4566) constexpr unsigned char micro[] = "\u00B5";
+
+constexpr auto is_utf8() -> bool {
+  // Avoid buggy sign extensions in MSVC's constant evaluation mode (#2297).
+  using uchar = unsigned char;
+  return FMT_UNICODE || (sizeof(micro) == 3 && uchar(micro[0]) == 0xC2 &&
+                         uchar(micro[1]) == 0xB5);
+}
+FMT_END_DETAIL_NAMESPACE
+
+/**
+  An implementation of ``std::basic_string_view`` for pre-C++17. It provides a
+  subset of the API. ``fmt::basic_string_view`` is used for format strings even
+  if ``std::string_view`` is available to prevent issues when a library is
+  compiled with a different ``-std`` option than the client code (which is not
+  recommended).
+ */
+template <typename Char> class basic_string_view {
+ private:
+  const Char* data_;
+  size_t size_;
+
+ public:
+  using value_type = Char;
+  using iterator = const Char*;
+
+  constexpr basic_string_view() noexcept : data_(nullptr), size_(0) {}
+
+  /** Constructs a string reference object from a C string and a size. */
+  constexpr basic_string_view(const Char* s, size_t count) noexcept
+      : data_(s), size_(count) {}
+
+  /**
+    \rst
+    Constructs a string reference object from a C string computing
+    the size with ``std::char_traits<Char>::length``.
+    \endrst
+   */
+  FMT_CONSTEXPR_CHAR_TRAITS
+  FMT_INLINE
+  basic_string_view(const Char* s)
+      : data_(s),
+        size_(detail::const_check(std::is_same<Char, char>::value &&
+                                  !detail::is_constant_evaluated(true))
+                  ? std::strlen(reinterpret_cast<const char*>(s))
+                  : std::char_traits<Char>::length(s)) {}
+
+  /** Constructs a string reference from a ``std::basic_string`` object. */
+  template <typename Traits, typename Alloc>
+  FMT_CONSTEXPR basic_string_view(
+      const std::basic_string<Char, Traits, Alloc>& s) noexcept
+      : data_(s.data()), size_(s.size()) {}
+
+  template <typename S, FMT_ENABLE_IF(std::is_same<
+                                      S, detail::std_string_view<Char>>::value)>
+  FMT_CONSTEXPR basic_string_view(S s) noexcept
+      : data_(s.data()), size_(s.size()) {}
+
+  /** Returns a pointer to the string data. */
+  constexpr auto data() const noexcept -> const Char* { return data_; }
+
+  /** Returns the string size. */
+  constexpr auto size() const noexcept -> size_t { return size_; }
+
+  constexpr auto begin() const noexcept -> iterator { return data_; }
+  constexpr auto end() const noexcept -> iterator { return data_ + size_; }
+
+  constexpr auto operator[](size_t pos) const noexcept -> const Char& {
+    return data_[pos];
+  }
+
+  FMT_CONSTEXPR void remove_prefix(size_t n) noexcept {
+    data_ += n;
+    size_ -= n;
+  }
+
+  FMT_CONSTEXPR_CHAR_TRAITS bool starts_with(
+      basic_string_view<Char> sv) const noexcept {
+    return size_ >= sv.size_ &&
+           std::char_traits<Char>::compare(data_, sv.data_, sv.size_) == 0;
+  }
+  FMT_CONSTEXPR_CHAR_TRAITS bool starts_with(Char c) const noexcept {
+    return size_ >= 1 && std::char_traits<Char>::eq(*data_, c);
+  }
+  FMT_CONSTEXPR_CHAR_TRAITS bool starts_with(const Char* s) const {
+    return starts_with(basic_string_view<Char>(s));
+  }
+
+  // Lexicographically compare this string reference to other.
+  FMT_CONSTEXPR_CHAR_TRAITS auto compare(basic_string_view other) const -> int {
+    size_t str_size = size_ < other.size_ ? size_ : other.size_;
+    int result = std::char_traits<Char>::compare(data_, other.data_, str_size);
+    if (result == 0)
+      result = size_ == other.size_ ? 0 : (size_ < other.size_ ? -1 : 1);
+    return result;
+  }
+
+  FMT_CONSTEXPR_CHAR_TRAITS friend auto operator==(basic_string_view lhs,
+                                                   basic_string_view rhs)
+      -> bool {
+    return lhs.compare(rhs) == 0;
+  }
+  friend auto operator!=(basic_string_view lhs, basic_string_view rhs) -> bool {
+    return lhs.compare(rhs) != 0;
+  }
+  friend auto operator<(basic_string_view lhs, basic_string_view rhs) -> bool {
+    return lhs.compare(rhs) < 0;
+  }
+  friend auto operator<=(basic_string_view lhs, basic_string_view rhs) -> bool {
+    return lhs.compare(rhs) <= 0;
+  }
+  friend auto operator>(basic_string_view lhs, basic_string_view rhs) -> bool {
+    return lhs.compare(rhs) > 0;
+  }
+  friend auto operator>=(basic_string_view lhs, basic_string_view rhs) -> bool {
+    return lhs.compare(rhs) >= 0;
+  }
+};
+
+using string_view = basic_string_view<char>;
+
+/** Specifies if ``T`` is a character type. Can be specialized by users. */
+template <typename T> struct is_char : std::false_type {};
+template <> struct is_char<char> : std::true_type {};
+
+FMT_BEGIN_DETAIL_NAMESPACE
+
+// A base class for compile-time strings.
+struct compile_string {};
+
+template <typename S>
+struct is_compile_string : std::is_base_of<compile_string, S> {};
+
+// Returns a string view of `s`.
+template <typename Char, FMT_ENABLE_IF(is_char<Char>::value)>
+FMT_INLINE auto to_string_view(const Char* s) -> basic_string_view<Char> {
+  return s;
+}
+template <typename Char, typename Traits, typename Alloc>
+inline auto to_string_view(const std::basic_string<Char, Traits, Alloc>& s)
+    -> basic_string_view<Char> {
+  return s;
+}
+template <typename Char>
+constexpr auto to_string_view(basic_string_view<Char> s)
+    -> basic_string_view<Char> {
+  return s;
+}
+template <typename Char,
+          FMT_ENABLE_IF(!std::is_empty<std_string_view<Char>>::value)>
+inline auto to_string_view(std_string_view<Char> s) -> basic_string_view<Char> {
+  return s;
+}
+template <typename S, FMT_ENABLE_IF(is_compile_string<S>::value)>
+constexpr auto to_string_view(const S& s)
+    -> basic_string_view<typename S::char_type> {
+  return basic_string_view<typename S::char_type>(s);
+}
+void to_string_view(...);
+
+// Specifies whether S is a string type convertible to fmt::basic_string_view.
+// It should be a constexpr function but MSVC 2017 fails to compile it in
+// enable_if and MSVC 2015 fails to compile it as an alias template.
+// ADL invocation of to_string_view is DEPRECATED!
+template <typename S>
+struct is_string : std::is_class<decltype(to_string_view(std::declval<S>()))> {
+};
+
+template <typename S, typename = void> struct char_t_impl {};
+template <typename S> struct char_t_impl<S, enable_if_t<is_string<S>::value>> {
+  using result = decltype(to_string_view(std::declval<S>()));
+  using type = typename result::value_type;
+};
+
+enum class type {
+  none_type,
+  // Integer types should go first,
+  int_type,
+  uint_type,
+  long_long_type,
+  ulong_long_type,
+  int128_type,
+  uint128_type,
+  bool_type,
+  char_type,
+  last_integer_type = char_type,
+  // followed by floating-point types.
+  float_type,
+  double_type,
+  long_double_type,
+  last_numeric_type = long_double_type,
+  cstring_type,
+  string_type,
+  pointer_type,
+  custom_type
+};
+
+// Maps core type T to the corresponding type enum constant.
+template <typename T, typename Char>
+struct type_constant : std::integral_constant<type, type::custom_type> {};
+
+#define FMT_TYPE_CONSTANT(Type, constant) \
+  template <typename Char>                \
+  struct type_constant<Type, Char>        \
+      : std::integral_constant<type, type::constant> {}
+
+FMT_TYPE_CONSTANT(int, int_type);
+FMT_TYPE_CONSTANT(unsigned, uint_type);
+FMT_TYPE_CONSTANT(long long, long_long_type);
+FMT_TYPE_CONSTANT(unsigned long long, ulong_long_type);
+FMT_TYPE_CONSTANT(int128_opt, int128_type);
+FMT_TYPE_CONSTANT(uint128_opt, uint128_type);
+FMT_TYPE_CONSTANT(bool, bool_type);
+FMT_TYPE_CONSTANT(Char, char_type);
+FMT_TYPE_CONSTANT(float, float_type);
+FMT_TYPE_CONSTANT(double, double_type);
+FMT_TYPE_CONSTANT(long double, long_double_type);
+FMT_TYPE_CONSTANT(const Char*, cstring_type);
+FMT_TYPE_CONSTANT(basic_string_view<Char>, string_type);
+FMT_TYPE_CONSTANT(const void*, pointer_type);
+
+constexpr bool is_integral_type(type t) {
+  return t > type::none_type && t <= type::last_integer_type;
+}
+
+constexpr bool is_arithmetic_type(type t) {
+  return t > type::none_type && t <= type::last_numeric_type;
+}
+
+FMT_NORETURN FMT_API void throw_format_error(const char* message);
+
+struct error_handler {
+  constexpr error_handler() = default;
+  constexpr error_handler(const error_handler&) = default;
+
+  // This function is intentionally not constexpr to give a compile-time error.
+  FMT_NORETURN void on_error(const char* message) {
+    throw_format_error(message);
+  }
+};
+FMT_END_DETAIL_NAMESPACE
+
+/** String's character type. */
+template <typename S> using char_t = typename detail::char_t_impl<S>::type;
+
+/**
+  \rst
+  Parsing context consisting of a format string range being parsed and an
+  argument counter for automatic indexing.
+  You can use the ``format_parse_context`` type alias for ``char`` instead.
+  \endrst
+ */
+template <typename Char, typename ErrorHandler = detail::error_handler>
+class basic_format_parse_context : private ErrorHandler {
+ private:
+  basic_string_view<Char> format_str_;
+  int next_arg_id_;
+
+  FMT_CONSTEXPR void do_check_arg_id(int id);
+
+ public:
+  using char_type = Char;
+  using iterator = typename basic_string_view<Char>::iterator;
+
+  explicit constexpr basic_format_parse_context(
+      basic_string_view<Char> format_str, ErrorHandler eh = {},
+      int next_arg_id = 0)
+      : ErrorHandler(eh), format_str_(format_str), next_arg_id_(next_arg_id) {}
+
+  /**
+    Returns an iterator to the beginning of the format string range being
+    parsed.
+   */
+  constexpr auto begin() const noexcept -> iterator {
+    return format_str_.begin();
+  }
+
+  /**
+    Returns an iterator past the end of the format string range being parsed.
+   */
+  constexpr auto end() const noexcept -> iterator { return format_str_.end(); }
+
+  /** Advances the begin iterator to ``it``. */
+  FMT_CONSTEXPR void advance_to(iterator it) {
+    format_str_.remove_prefix(detail::to_unsigned(it - begin()));
+  }
+
+  /**
+    Reports an error if using the manual argument indexing; otherwise returns
+    the next argument index and switches to the automatic indexing.
+   */
+  FMT_CONSTEXPR auto next_arg_id() -> int {
+    if (next_arg_id_ < 0) {
+      on_error("cannot switch from manual to automatic argument indexing");
+      return 0;
+    }
+    int id = next_arg_id_++;
+    do_check_arg_id(id);
+    return id;
+  }
+
+  /**
+    Reports an error if using the automatic argument indexing; otherwise
+    switches to the manual indexing.
+   */
+  FMT_CONSTEXPR void check_arg_id(int id) {
+    if (next_arg_id_ > 0) {
+      on_error("cannot switch from automatic to manual argument indexing");
+      return;
+    }
+    next_arg_id_ = -1;
+    do_check_arg_id(id);
+  }
+  FMT_CONSTEXPR void check_arg_id(basic_string_view<Char>) {}
+  FMT_CONSTEXPR void check_dynamic_spec(int arg_id);
+
+  FMT_CONSTEXPR void on_error(const char* message) {
+    ErrorHandler::on_error(message);
+  }
+
+  constexpr auto error_handler() const -> ErrorHandler { return *this; }
+};
+
+using format_parse_context = basic_format_parse_context<char>;
+
+FMT_BEGIN_DETAIL_NAMESPACE
+// A parse context with extra data used only in compile-time checks.
+template <typename Char, typename ErrorHandler = detail::error_handler>
+class compile_parse_context
+    : public basic_format_parse_context<Char, ErrorHandler> {
+ private:
+  int num_args_;
+  const type* types_;
+  using base = basic_format_parse_context<Char, ErrorHandler>;
+
+ public:
+  explicit FMT_CONSTEXPR compile_parse_context(
+      basic_string_view<Char> format_str, int num_args, const type* types,
+      ErrorHandler eh = {}, int next_arg_id = 0)
+      : base(format_str, eh, next_arg_id), num_args_(num_args), types_(types) {}
+
+  constexpr auto num_args() const -> int { return num_args_; }
+  constexpr auto arg_type(int id) const -> type { return types_[id]; }
+
+  FMT_CONSTEXPR auto next_arg_id() -> int {
+    int id = base::next_arg_id();
+    if (id >= num_args_) this->on_error("argument not found");
+    return id;
+  }
+
+  FMT_CONSTEXPR void check_arg_id(int id) {
+    base::check_arg_id(id);
+    if (id >= num_args_) this->on_error("argument not found");
+  }
+  using base::check_arg_id;
+
+  FMT_CONSTEXPR void check_dynamic_spec(int arg_id) {
+    detail::ignore_unused(arg_id);
+#if !defined(__LCC__)
+    if (arg_id < num_args_ && types_ && !is_integral_type(types_[arg_id]))
+      this->on_error("width/precision is not integer");
+#endif
+  }
+};
+FMT_END_DETAIL_NAMESPACE
+
+template <typename Char, typename ErrorHandler>
+FMT_CONSTEXPR void
+basic_format_parse_context<Char, ErrorHandler>::do_check_arg_id(int id) {
+  // Argument id is only checked at compile-time during parsing because
+  // formatting has its own validation.
+  if (detail::is_constant_evaluated() && FMT_GCC_VERSION >= 1200) {
+    using context = detail::compile_parse_context<Char, ErrorHandler>;
+    if (id >= static_cast<context*>(this)->num_args())
+      on_error("argument not found");
+  }
+}
+
+template <typename Char, typename ErrorHandler>
+FMT_CONSTEXPR void
+basic_format_parse_context<Char, ErrorHandler>::check_dynamic_spec(int arg_id) {
+  if (detail::is_constant_evaluated()) {
+    using context = detail::compile_parse_context<Char, ErrorHandler>;
+    static_cast<context*>(this)->check_dynamic_spec(arg_id);
+  }
+}
+
+template <typename Context> class basic_format_arg;
+template <typename Context> class basic_format_args;
+template <typename Context> class dynamic_format_arg_store;
+
+// A formatter for objects of type T.
+template <typename T, typename Char = char, typename Enable = void>
+struct formatter {
+  // A deleted default constructor indicates a disabled formatter.
+  formatter() = delete;
+};
+
+// Specifies if T has an enabled formatter specialization. A type can be
+// formattable even if it doesn't have a formatter e.g. via a conversion.
+template <typename T, typename Context>
+using has_formatter =
+    std::is_constructible<typename Context::template formatter_type<T>>;
+
+// Checks whether T is a container with contiguous storage.
+template <typename T> struct is_contiguous : std::false_type {};
+template <typename Char>
+struct is_contiguous<std::basic_string<Char>> : std::true_type {};
+
+class appender;
+
+FMT_BEGIN_DETAIL_NAMESPACE
+
+template <typename Context, typename T>
+constexpr auto has_const_formatter_impl(T*)
+    -> decltype(typename Context::template formatter_type<T>().format(
+                    std::declval<const T&>(), std::declval<Context&>()),
+                true) {
+  return true;
+}
+template <typename Context>
+constexpr auto has_const_formatter_impl(...) -> bool {
+  return false;
+}
+template <typename T, typename Context>
+constexpr auto has_const_formatter() -> bool {
+  return has_const_formatter_impl<Context>(static_cast<T*>(nullptr));
+}
+
+// Extracts a reference to the container from back_insert_iterator.
+template <typename Container>
+inline auto get_container(std::back_insert_iterator<Container> it)
+    -> Container& {
+  using base = std::back_insert_iterator<Container>;
+  struct accessor : base {
+    accessor(base b) : base(b) {}
+    using base::container;
+  };
+  return *accessor(it).container;
+}
+
+template <typename Char, typename InputIt, typename OutputIt>
+FMT_CONSTEXPR auto copy_str(InputIt begin, InputIt end, OutputIt out)
+    -> OutputIt {
+  while (begin != end) *out++ = static_cast<Char>(*begin++);
+  return out;
+}
+
+template <typename Char, typename T, typename U,
+          FMT_ENABLE_IF(
+              std::is_same<remove_const_t<T>, U>::value&& is_char<U>::value)>
+FMT_CONSTEXPR auto copy_str(T* begin, T* end, U* out) -> U* {
+  if (is_constant_evaluated()) return copy_str<Char, T*, U*>(begin, end, out);
+  auto size = to_unsigned(end - begin);
+  memcpy(out, begin, size * sizeof(U));
+  return out + size;
+}
+
+/**
+  \rst
+  A contiguous memory buffer with an optional growing ability. It is an internal
+  class and shouldn't be used directly, only via `~fmt::basic_memory_buffer`.
+  \endrst
+ */
+template <typename T> class buffer {
+ private:
+  T* ptr_;
+  size_t size_;
+  size_t capacity_;
+
+ protected:
+  // Don't initialize ptr_ since it is not accessed to save a few cycles.
+  FMT_MSC_WARNING(suppress : 26495)
+  buffer(size_t sz) noexcept : size_(sz), capacity_(sz) {}
+
+  FMT_CONSTEXPR20 buffer(T* p = nullptr, size_t sz = 0, size_t cap = 0) noexcept
+      : ptr_(p), size_(sz), capacity_(cap) {}
+
+  FMT_CONSTEXPR20 ~buffer() = default;
+  buffer(buffer&&) = default;
+
+  /** Sets the buffer data and capacity. */
+  FMT_CONSTEXPR void set(T* buf_data, size_t buf_capacity) noexcept {
+    ptr_ = buf_data;
+    capacity_ = buf_capacity;
+  }
+
+  /** Increases the buffer capacity to hold at least *capacity* elements. */
+  virtual FMT_CONSTEXPR20 void grow(size_t capacity) = 0;
+
+ public:
+  using value_type = T;
+  using const_reference = const T&;
+
+  buffer(const buffer&) = delete;
+  void operator=(const buffer&) = delete;
+
+  FMT_INLINE auto begin() noexcept -> T* { return ptr_; }
+  FMT_INLINE auto end() noexcept -> T* { return ptr_ + size_; }
+
+  FMT_INLINE auto begin() const noexcept -> const T* { return ptr_; }
+  FMT_INLINE auto end() const noexcept -> const T* { return ptr_ + size_; }
+
+  /** Returns the size of this buffer. */
+  constexpr auto size() const noexcept -> size_t { return size_; }
+
+  /** Returns the capacity of this buffer. */
+  constexpr auto capacity() const noexcept -> size_t { return capacity_; }
+
+  /** Returns a pointer to the buffer data. */
+  FMT_CONSTEXPR auto data() noexcept -> T* { return ptr_; }
+
+  /** Returns a pointer to the buffer data. */
+  FMT_CONSTEXPR auto data() const noexcept -> const T* { return ptr_; }
+
+  /** Clears this buffer. */
+  void clear() { size_ = 0; }
+
+  // Tries resizing the buffer to contain *count* elements. If T is a POD type
+  // the new elements may not be initialized.
+  FMT_CONSTEXPR20 void try_resize(size_t count) {
+    try_reserve(count);
+    size_ = count <= capacity_ ? count : capacity_;
+  }
+
+  // Tries increasing the buffer capacity to *new_capacity*. It can increase the
+  // capacity by a smaller amount than requested but guarantees there is space
+  // for at least one additional element either by increasing the capacity or by
+  // flushing the buffer if it is full.
+  FMT_CONSTEXPR20 void try_reserve(size_t new_capacity) {
+    if (new_capacity > capacity_) grow(new_capacity);
+  }
+
+  FMT_CONSTEXPR20 void push_back(const T& value) {
+    try_reserve(size_ + 1);
+    ptr_[size_++] = value;
+  }
+
+  /** Appends data to the end of the buffer. */
+  template <typename U> void append(const U* begin, const U* end);
+
+  template <typename Idx> FMT_CONSTEXPR auto operator[](Idx index) -> T& {
+    return ptr_[index];
+  }
+  template <typename Idx>
+  FMT_CONSTEXPR auto operator[](Idx index) const -> const T& {
+    return ptr_[index];
+  }
+};
+
+struct buffer_traits {
+  explicit buffer_traits(size_t) {}
+  auto count() const -> size_t { return 0; }
+  auto limit(size_t size) -> size_t { return size; }
+};
+
+class fixed_buffer_traits {
+ private:
+  size_t count_ = 0;
+  size_t limit_;
+
+ public:
+  explicit fixed_buffer_traits(size_t limit) : limit_(limit) {}
+  auto count() const -> size_t { return count_; }
+  auto limit(size_t size) -> size_t {
+    size_t n = limit_ > count_ ? limit_ - count_ : 0;
+    count_ += size;
+    return size < n ? size : n;
+  }
+};
+
+// A buffer that writes to an output iterator when flushed.
+template <typename OutputIt, typename T, typename Traits = buffer_traits>
+class iterator_buffer final : public Traits, public buffer<T> {
+ private:
+  OutputIt out_;
+  enum { buffer_size = 256 };
+  T data_[buffer_size];
+
+ protected:
+  FMT_CONSTEXPR20 void grow(size_t) override {
+    if (this->size() == buffer_size) flush();
+  }
+
+  void flush() {
+    auto size = this->size();
+    this->clear();
+    out_ = copy_str<T>(data_, data_ + this->limit(size), out_);
+  }
+
+ public:
+  explicit iterator_buffer(OutputIt out, size_t n = buffer_size)
+      : Traits(n), buffer<T>(data_, 0, buffer_size), out_(out) {}
+  iterator_buffer(iterator_buffer&& other)
+      : Traits(other), buffer<T>(data_, 0, buffer_size), out_(other.out_) {}
+  ~iterator_buffer() { flush(); }
+
+  auto out() -> OutputIt {
+    flush();
+    return out_;
+  }
+  auto count() const -> size_t { return Traits::count() + this->size(); }
+};
+
+template <typename T>
+class iterator_buffer<T*, T, fixed_buffer_traits> final
+    : public fixed_buffer_traits,
+      public buffer<T> {
+ private:
+  T* out_;
+  enum { buffer_size = 256 };
+  T data_[buffer_size];
+
+ protected:
+  FMT_CONSTEXPR20 void grow(size_t) override {
+    if (this->size() == this->capacity()) flush();
+  }
+
+  void flush() {
+    size_t n = this->limit(this->size());
+    if (this->data() == out_) {
+      out_ += n;
+      this->set(data_, buffer_size);
+    }
+    this->clear();
+  }
+
+ public:
+  explicit iterator_buffer(T* out, size_t n = buffer_size)
+      : fixed_buffer_traits(n), buffer<T>(out, 0, n), out_(out) {}
+  iterator_buffer(iterator_buffer&& other)
+      : fixed_buffer_traits(other),
+        buffer<T>(std::move(other)),
+        out_(other.out_) {
+    if (this->data() != out_) {
+      this->set(data_, buffer_size);
+      this->clear();
+    }
+  }
+  ~iterator_buffer() { flush(); }
+
+  auto out() -> T* {
+    flush();
+    return out_;
+  }
+  auto count() const -> size_t {
+    return fixed_buffer_traits::count() + this->size();
+  }
+};
+
+template <typename T> class iterator_buffer<T*, T> final : public buffer<T> {
+ protected:
+  FMT_CONSTEXPR20 void grow(size_t) override {}
+
+ public:
+  explicit iterator_buffer(T* out, size_t = 0) : buffer<T>(out, 0, ~size_t()) {}
+
+  auto out() -> T* { return &*this->end(); }
+};
+
+// A buffer that writes to a container with the contiguous storage.
+template <typename Container>
+class iterator_buffer<std::back_insert_iterator<Container>,
+                      enable_if_t<is_contiguous<Container>::value,
+                                  typename Container::value_type>>
+    final : public buffer<typename Container::value_type> {
+ private:
+  Container& container_;
+
+ protected:
+  FMT_CONSTEXPR20 void grow(size_t capacity) override {
+    container_.resize(capacity);
+    this->set(&container_[0], capacity);
+  }
+
+ public:
+  explicit iterator_buffer(Container& c)
+      : buffer<typename Container::value_type>(c.size()), container_(c) {}
+  explicit iterator_buffer(std::back_insert_iterator<Container> out, size_t = 0)
+      : iterator_buffer(get_container(out)) {}
+
+  auto out() -> std::back_insert_iterator<Container> {
+    return std::back_inserter(container_);
+  }
+};
+
+// A buffer that counts the number of code units written discarding the output.
+template <typename T = char> class counting_buffer final : public buffer<T> {
+ private:
+  enum { buffer_size = 256 };
+  T data_[buffer_size];
+  size_t count_ = 0;
+
+ protected:
+  FMT_CONSTEXPR20 void grow(size_t) override {
+    if (this->size() != buffer_size) return;
+    count_ += this->size();
+    this->clear();
+  }
+
+ public:
+  counting_buffer() : buffer<T>(data_, 0, buffer_size) {}
+
+  auto count() -> size_t { return count_ + this->size(); }
+};
+
+template <typename T>
+using buffer_appender = conditional_t<std::is_same<T, char>::value, appender,
+                                      std::back_insert_iterator<buffer<T>>>;
+
+// Maps an output iterator to a buffer.
+template <typename T, typename OutputIt>
+auto get_buffer(OutputIt out) -> iterator_buffer<OutputIt, T> {
+  return iterator_buffer<OutputIt, T>(out);
+}
+template <typename T, typename Buf,
+          FMT_ENABLE_IF(std::is_base_of<buffer<char>, Buf>::value)>
+auto get_buffer(std::back_insert_iterator<Buf> out) -> buffer<char>& {
+  return get_container(out);
+}
+
+template <typename Buf, typename OutputIt>
+FMT_INLINE auto get_iterator(Buf& buf, OutputIt) -> decltype(buf.out()) {
+  return buf.out();
+}
+template <typename T, typename OutputIt>
+auto get_iterator(buffer<T>&, OutputIt out) -> OutputIt {
+  return out;
+}
+
+template <typename T, typename Char = char, typename Enable = void>
+struct fallback_formatter {
+  fallback_formatter() = delete;
+};
+
+// Specifies if T has an enabled fallback_formatter specialization.
+template <typename T, typename Char>
+using has_fallback_formatter =
+#ifdef FMT_DEPRECATED_OSTREAM
+    std::is_constructible<fallback_formatter<T, Char>>;
+#else
+    std::false_type;
+#endif
+
+struct view {};
+
+template <typename Char, typename T> struct named_arg : view {
+  const Char* name;
+  const T& value;
+  named_arg(const Char* n, const T& v) : name(n), value(v) {}
+};
+
+template <typename Char> struct named_arg_info {
+  const Char* name;
+  int id;
+};
+
+template <typename T, typename Char, size_t NUM_ARGS, size_t NUM_NAMED_ARGS>
+struct arg_data {
+  // args_[0].named_args points to named_args_ to avoid bloating format_args.
+  // +1 to workaround a bug in gcc 7.5 that causes duplicated-branches warning.
+  T args_[1 + (NUM_ARGS != 0 ? NUM_ARGS : +1)];
+  named_arg_info<Char> named_args_[NUM_NAMED_ARGS];
+
+  template <typename... U>
+  arg_data(const U&... init) : args_{T(named_args_, NUM_NAMED_ARGS), init...} {}
+  arg_data(const arg_data& other) = delete;
+  auto args() const -> const T* { return args_ + 1; }
+  auto named_args() -> named_arg_info<Char>* { return named_args_; }
+};
+
+template <typename T, typename Char, size_t NUM_ARGS>
+struct arg_data<T, Char, NUM_ARGS, 0> {
+  // +1 to workaround a bug in gcc 7.5 that causes duplicated-branches warning.
+  T args_[NUM_ARGS != 0 ? NUM_ARGS : +1];
+
+  template <typename... U>
+  FMT_CONSTEXPR FMT_INLINE arg_data(const U&... init) : args_{init...} {}
+  FMT_CONSTEXPR FMT_INLINE auto args() const -> const T* { return args_; }
+  FMT_CONSTEXPR FMT_INLINE auto named_args() -> std::nullptr_t {
+    return nullptr;
+  }
+};
+
+template <typename Char>
+inline void init_named_args(named_arg_info<Char>*, int, int) {}
+
+template <typename T> struct is_named_arg : std::false_type {};
+template <typename T> struct is_statically_named_arg : std::false_type {};
+
+template <typename T, typename Char>
+struct is_named_arg<named_arg<Char, T>> : std::true_type {};
+
+template <typename Char, typename T, typename... Tail,
+          FMT_ENABLE_IF(!is_named_arg<T>::value)>
+void init_named_args(named_arg_info<Char>* named_args, int arg_count,
+                     int named_arg_count, const T&, const Tail&... args) {
+  init_named_args(named_args, arg_count + 1, named_arg_count, args...);
+}
+
+template <typename Char, typename T, typename... Tail,
+          FMT_ENABLE_IF(is_named_arg<T>::value)>
+void init_named_args(named_arg_info<Char>* named_args, int arg_count,
+                     int named_arg_count, const T& arg, const Tail&... args) {
+  named_args[named_arg_count++] = {arg.name, arg_count};
+  init_named_args(named_args, arg_count + 1, named_arg_count, args...);
+}
+
+template <typename... Args>
+FMT_CONSTEXPR FMT_INLINE void init_named_args(std::nullptr_t, int, int,
+                                              const Args&...) {}
+
+template <bool B = false> constexpr auto count() -> size_t { return B ? 1 : 0; }
+template <bool B1, bool B2, bool... Tail> constexpr auto count() -> size_t {
+  return (B1 ? 1 : 0) + count<B2, Tail...>();
+}
+
+template <typename... Args> constexpr auto count_named_args() -> size_t {
+  return count<is_named_arg<Args>::value...>();
+}
+
+template <typename... Args>
+constexpr auto count_statically_named_args() -> size_t {
+  return count<is_statically_named_arg<Args>::value...>();
+}
+
+struct unformattable {};
+struct unformattable_char : unformattable {};
+struct unformattable_const : unformattable {};
+struct unformattable_pointer : unformattable {};
+
+template <typename Char> struct string_value {
+  const Char* data;
+  size_t size;
+};
+
+template <typename Char> struct named_arg_value {
+  const named_arg_info<Char>* data;
+  size_t size;
+};
+
+template <typename Context> struct custom_value {
+  using parse_context = typename Context::parse_context_type;
+  void* value;
+  void (*format)(void* arg, parse_context& parse_ctx, Context& ctx);
+};
+
+// A formatting argument value.
+template <typename Context> class value {
+ public:
+  using char_type = typename Context::char_type;
+
+  union {
+    monostate no_value;
+    int int_value;
+    unsigned uint_value;
+    long long long_long_value;
+    unsigned long long ulong_long_value;
+    int128_opt int128_value;
+    uint128_opt uint128_value;
+    bool bool_value;
+    char_type char_value;
+    float float_value;
+    double double_value;
+    long double long_double_value;
+    const void* pointer;
+    string_value<char_type> string;
+    custom_value<Context> custom;
+    named_arg_value<char_type> named_args;
+  };
+
+  constexpr FMT_INLINE value() : no_value() {}
+  constexpr FMT_INLINE value(int val) : int_value(val) {}
+  constexpr FMT_INLINE value(unsigned val) : uint_value(val) {}
+  constexpr FMT_INLINE value(long long val) : long_long_value(val) {}
+  constexpr FMT_INLINE value(unsigned long long val) : ulong_long_value(val) {}
+  FMT_INLINE value(int128_opt val) : int128_value(val) {}
+  FMT_INLINE value(uint128_opt val) : uint128_value(val) {}
+  constexpr FMT_INLINE value(float val) : float_value(val) {}
+  constexpr FMT_INLINE value(double val) : double_value(val) {}
+  FMT_INLINE value(long double val) : long_double_value(val) {}
+  constexpr FMT_INLINE value(bool val) : bool_value(val) {}
+  constexpr FMT_INLINE value(char_type val) : char_value(val) {}
+  FMT_CONSTEXPR FMT_INLINE value(const char_type* val) {
+    string.data = val;
+    if (is_constant_evaluated()) string.size = {};
+  }
+  FMT_CONSTEXPR FMT_INLINE value(basic_string_view<char_type> val) {
+    string.data = val.data();
+    string.size = val.size();
+  }
+  FMT_INLINE value(const void* val) : pointer(val) {}
+  FMT_INLINE value(const named_arg_info<char_type>* args, size_t size)
+      : named_args{args, size} {}
+
+  template <typename T> FMT_CONSTEXPR FMT_INLINE value(T& val) {
+    using value_type = remove_cvref_t<T>;
+    custom.value = const_cast<value_type*>(&val);
+    // Get the formatter type through the context to allow different contexts
+    // have different extension points, e.g. `formatter<T>` for `format` and
+    // `printf_formatter<T>` for `printf`.
+    custom.format = format_custom_arg<
+        value_type,
+        conditional_t<has_formatter<value_type, Context>::value,
+                      typename Context::template formatter_type<value_type>,
+                      fallback_formatter<value_type, char_type>>>;
+  }
+  value(unformattable);
+  value(unformattable_char);
+  value(unformattable_const);
+  value(unformattable_pointer);
+
+ private:
+  // Formats an argument of a custom type, such as a user-defined class.
+  template <typename T, typename Formatter>
+  static void format_custom_arg(void* arg,
+                                typename Context::parse_context_type& parse_ctx,
+                                Context& ctx) {
+    auto f = Formatter();
+    parse_ctx.advance_to(f.parse(parse_ctx));
+    using qualified_type =
+        conditional_t<has_const_formatter<T, Context>(), const T, T>;
+    ctx.advance_to(f.format(*static_cast<qualified_type*>(arg), ctx));
+  }
+};
+
+template <typename Context, typename T>
+FMT_CONSTEXPR auto make_arg(T&& value) -> basic_format_arg<Context>;
+
+// To minimize the number of types we need to deal with, long is translated
+// either to int or to long long depending on its size.
+enum { long_short = sizeof(long) == sizeof(int) };
+using long_type = conditional_t<long_short, int, long long>;
+using ulong_type = conditional_t<long_short, unsigned, unsigned long long>;
+
+#ifdef __cpp_lib_byte
+inline auto format_as(std::byte b) -> unsigned char {
+  return static_cast<unsigned char>(b);
+}
+#endif
+
+template <typename T> struct has_format_as {
+  template <typename U, typename V = decltype(format_as(U())),
+            FMT_ENABLE_IF(std::is_enum<U>::value&& std::is_integral<V>::value)>
+  static auto check(U*) -> std::true_type;
+  static auto check(...) -> std::false_type;
+
+  enum { value = decltype(check(static_cast<T*>(nullptr)))::value };
+};
+
+// Maps formatting arguments to core types.
+// arg_mapper reports errors by returning unformattable instead of using
+// static_assert because it's used in the is_formattable trait.
+template <typename Context> struct arg_mapper {
+  using char_type = typename Context::char_type;
+
+  FMT_CONSTEXPR FMT_INLINE auto map(signed char val) -> int { return val; }
+  FMT_CONSTEXPR FMT_INLINE auto map(unsigned char val) -> unsigned {
+    return val;
+  }
+  FMT_CONSTEXPR FMT_INLINE auto map(short val) -> int { return val; }
+  FMT_CONSTEXPR FMT_INLINE auto map(unsigned short val) -> unsigned {
+    return val;
+  }
+  FMT_CONSTEXPR FMT_INLINE auto map(int val) -> int { return val; }
+  FMT_CONSTEXPR FMT_INLINE auto map(unsigned val) -> unsigned { return val; }
+  FMT_CONSTEXPR FMT_INLINE auto map(long val) -> long_type { return val; }
+  FMT_CONSTEXPR FMT_INLINE auto map(unsigned long val) -> ulong_type {
+    return val;
+  }
+  FMT_CONSTEXPR FMT_INLINE auto map(long long val) -> long long { return val; }
+  FMT_CONSTEXPR FMT_INLINE auto map(unsigned long long val)
+      -> unsigned long long {
+    return val;
+  }
+  FMT_CONSTEXPR FMT_INLINE auto map(int128_opt val) -> int128_opt {
+    return val;
+  }
+  FMT_CONSTEXPR FMT_INLINE auto map(uint128_opt val) -> uint128_opt {
+    return val;
+  }
+  FMT_CONSTEXPR FMT_INLINE auto map(bool val) -> bool { return val; }
+
+  template <typename T, FMT_ENABLE_IF(std::is_same<T, char>::value ||
+                                      std::is_same<T, char_type>::value)>
+  FMT_CONSTEXPR FMT_INLINE auto map(T val) -> char_type {
+    return val;
+  }
+  template <typename T, enable_if_t<(std::is_same<T, wchar_t>::value ||
+#ifdef __cpp_char8_t
+                                     std::is_same<T, char8_t>::value ||
+#endif
+                                     std::is_same<T, char16_t>::value ||
+                                     std::is_same<T, char32_t>::value) &&
+                                        !std::is_same<T, char_type>::value,
+                                    int> = 0>
+  FMT_CONSTEXPR FMT_INLINE auto map(T) -> unformattable_char {
+    return {};
+  }
+
+  FMT_CONSTEXPR FMT_INLINE auto map(float val) -> float { return val; }
+  FMT_CONSTEXPR FMT_INLINE auto map(double val) -> double { return val; }
+  FMT_CONSTEXPR FMT_INLINE auto map(long double val) -> long double {
+    return val;
+  }
+
+  FMT_CONSTEXPR FMT_INLINE auto map(char_type* val) -> const char_type* {
+    return val;
+  }
+  FMT_CONSTEXPR FMT_INLINE auto map(const char_type* val) -> const char_type* {
+    return val;
+  }
+  template <typename T,
+            FMT_ENABLE_IF(is_string<T>::value && !std::is_pointer<T>::value &&
+                          std::is_same<char_type, char_t<T>>::value)>
+  FMT_CONSTEXPR FMT_INLINE auto map(const T& val)
+      -> basic_string_view<char_type> {
+    return to_string_view(val);
+  }
+  template <typename T,
+            FMT_ENABLE_IF(is_string<T>::value && !std::is_pointer<T>::value &&
+                          !std::is_same<char_type, char_t<T>>::value)>
+  FMT_CONSTEXPR FMT_INLINE auto map(const T&) -> unformattable_char {
+    return {};
+  }
+  template <typename T,
+            FMT_ENABLE_IF(
+                std::is_convertible<T, basic_string_view<char_type>>::value &&
+                !is_string<T>::value && !has_formatter<T, Context>::value &&
+                !has_fallback_formatter<T, char_type>::value)>
+  FMT_CONSTEXPR FMT_INLINE auto map(const T& val)
+      -> basic_string_view<char_type> {
+    return basic_string_view<char_type>(val);
+  }
+  template <typename T,
+            FMT_ENABLE_IF(
+                std::is_convertible<T, std_string_view<char_type>>::value &&
+                !std::is_convertible<T, basic_string_view<char_type>>::value &&
+                !is_string<T>::value && !has_formatter<T, Context>::value &&
+                !has_fallback_formatter<T, char_type>::value)>
+  FMT_CONSTEXPR FMT_INLINE auto map(const T& val)
+      -> basic_string_view<char_type> {
+    return std_string_view<char_type>(val);
+  }
+
+  FMT_CONSTEXPR FMT_INLINE auto map(void* val) -> const void* { return val; }
+  FMT_CONSTEXPR FMT_INLINE auto map(const void* val) -> const void* {
+    return val;
+  }
+  FMT_CONSTEXPR FMT_INLINE auto map(std::nullptr_t val) -> const void* {
+    return val;
+  }
+
+  // We use SFINAE instead of a const T* parameter to avoid conflicting with
+  // the C array overload.
+  template <
+      typename T,
+      FMT_ENABLE_IF(
+          std::is_pointer<T>::value || std::is_member_pointer<T>::value ||
+          std::is_function<typename std::remove_pointer<T>::type>::value ||
+          (std::is_convertible<const T&, const void*>::value &&
+           !std::is_convertible<const T&, const char_type*>::value &&
+           !has_formatter<T, Context>::value))>
+  FMT_CONSTEXPR auto map(const T&) -> unformattable_pointer {
+    return {};
+  }
+
+  template <typename T, std::size_t N,
+            FMT_ENABLE_IF(!std::is_same<T, wchar_t>::value)>
+  FMT_CONSTEXPR FMT_INLINE auto map(const T (&values)[N]) -> const T (&)[N] {
+    return values;
+  }
+
+  template <typename T,
+            FMT_ENABLE_IF(
+                std::is_enum<T>::value&& std::is_convertible<T, int>::value &&
+                !has_format_as<T>::value && !has_formatter<T, Context>::value &&
+                !has_fallback_formatter<T, char_type>::value)>
+  FMT_DEPRECATED FMT_CONSTEXPR FMT_INLINE auto map(const T& val)
+      -> decltype(std::declval<arg_mapper>().map(
+          static_cast<underlying_t<T>>(val))) {
+    return map(static_cast<underlying_t<T>>(val));
+  }
+
+  template <typename T, FMT_ENABLE_IF(has_format_as<T>::value &&
+                                      !has_formatter<T, Context>::value)>
+  FMT_CONSTEXPR FMT_INLINE auto map(const T& val)
+      -> decltype(std::declval<arg_mapper>().map(format_as(T()))) {
+    return map(format_as(val));
+  }
+
+  template <typename T, typename U = remove_cvref_t<T>>
+  struct formattable
+      : bool_constant<has_const_formatter<U, Context>() ||
+                      !std::is_const<remove_reference_t<T>>::value ||
+                      has_fallback_formatter<U, char_type>::value> {};
+
+#if (FMT_MSC_VERSION != 0 && FMT_MSC_VERSION < 1910) || \
+    FMT_ICC_VERSION != 0 || defined(__NVCC__)
+  // Workaround a bug in MSVC and Intel (Issue 2746).
+  template <typename T> FMT_CONSTEXPR FMT_INLINE auto do_map(T&& val) -> T& {
+    return val;
+  }
+#else
+  template <typename T, FMT_ENABLE_IF(formattable<T>::value)>
+  FMT_CONSTEXPR FMT_INLINE auto do_map(T&& val) -> T& {
+    return val;
+  }
+  template <typename T, FMT_ENABLE_IF(!formattable<T>::value)>
+  FMT_CONSTEXPR FMT_INLINE auto do_map(T&&) -> unformattable_const {
+    return {};
+  }
+#endif
+
+  template <typename T, typename U = remove_cvref_t<T>,
+            FMT_ENABLE_IF(!is_string<U>::value && !is_char<U>::value &&
+                          !std::is_array<U>::value &&
+                          !std::is_pointer<U>::value &&
+                          !has_format_as<U>::value &&
+                          (has_formatter<U, Context>::value ||
+                           has_fallback_formatter<U, char_type>::value))>
+  FMT_CONSTEXPR FMT_INLINE auto map(T&& val)
+      -> decltype(this->do_map(std::forward<T>(val))) {
+    return do_map(std::forward<T>(val));
+  }
+
+  template <typename T, FMT_ENABLE_IF(is_named_arg<T>::value)>
+  FMT_CONSTEXPR FMT_INLINE auto map(const T& named_arg)
+      -> decltype(std::declval<arg_mapper>().map(named_arg.value)) {
+    return map(named_arg.value);
+  }
+
+  auto map(...) -> unformattable { return {}; }
+};
+
+// A type constant after applying arg_mapper<Context>.
+template <typename T, typename Context>
+using mapped_type_constant =
+    type_constant<decltype(arg_mapper<Context>().map(std::declval<const T&>())),
+                  typename Context::char_type>;
+
+enum { packed_arg_bits = 4 };
+// Maximum number of arguments with packed types.
+enum { max_packed_args = 62 / packed_arg_bits };
+enum : unsigned long long { is_unpacked_bit = 1ULL << 63 };
+enum : unsigned long long { has_named_args_bit = 1ULL << 62 };
+
+FMT_END_DETAIL_NAMESPACE
+
+// An output iterator that appends to a buffer.
+// It is used to reduce symbol sizes for the common case.
+class appender : public std::back_insert_iterator<detail::buffer<char>> {
+  using base = std::back_insert_iterator<detail::buffer<char>>;
+
+ public:
+  using std::back_insert_iterator<detail::buffer<char>>::back_insert_iterator;
+  appender(base it) noexcept : base(it) {}
+  FMT_UNCHECKED_ITERATOR(appender);
+
+  auto operator++() noexcept -> appender& { return *this; }
+  auto operator++(int) noexcept -> appender { return *this; }
+};
+
+// A formatting argument. It is a trivially copyable/constructible type to
+// allow storage in basic_memory_buffer.
+template <typename Context> class basic_format_arg {
+ private:
+  detail::value<Context> value_;
+  detail::type type_;
+
+  template <typename ContextType, typename T>
+  friend FMT_CONSTEXPR auto detail::make_arg(T&& value)
+      -> basic_format_arg<ContextType>;
+
+  template <typename Visitor, typename Ctx>
+  friend FMT_CONSTEXPR auto visit_format_arg(Visitor&& vis,
+                                             const basic_format_arg<Ctx>& arg)
+      -> decltype(vis(0));
+
+  friend class basic_format_args<Context>;
+  friend class dynamic_format_arg_store<Context>;
+
+  using char_type = typename Context::char_type;
+
+  template <typename T, typename Char, size_t NUM_ARGS, size_t NUM_NAMED_ARGS>
+  friend struct detail::arg_data;
+
+  basic_format_arg(const detail::named_arg_info<char_type>* args, size_t size)
+      : value_(args, size) {}
+
+ public:
+  class handle {
+   public:
+    explicit handle(detail::custom_value<Context> custom) : custom_(custom) {}
+
+    void format(typename Context::parse_context_type& parse_ctx,
+                Context& ctx) const {
+      custom_.format(custom_.value, parse_ctx, ctx);
+    }
+
+   private:
+    detail::custom_value<Context> custom_;
+  };
+
+  constexpr basic_format_arg() : type_(detail::type::none_type) {}
+
+  constexpr explicit operator bool() const noexcept {
+    return type_ != detail::type::none_type;
+  }
+
+  auto type() const -> detail::type { return type_; }
+
+  auto is_integral() const -> bool { return detail::is_integral_type(type_); }
+  auto is_arithmetic() const -> bool {
+    return detail::is_arithmetic_type(type_);
+  }
+};
+
+/**
+  \rst
+  Visits an argument dispatching to the appropriate visit method based on
+  the argument type. For example, if the argument type is ``double`` then
+  ``vis(value)`` will be called with the value of type ``double``.
+  \endrst
+ */
+template <typename Visitor, typename Context>
+FMT_CONSTEXPR FMT_INLINE auto visit_format_arg(
+    Visitor&& vis, const basic_format_arg<Context>& arg) -> decltype(vis(0)) {
+  switch (arg.type_) {
+  case detail::type::none_type:
+    break;
+  case detail::type::int_type:
+    return vis(arg.value_.int_value);
+  case detail::type::uint_type:
+    return vis(arg.value_.uint_value);
+  case detail::type::long_long_type:
+    return vis(arg.value_.long_long_value);
+  case detail::type::ulong_long_type:
+    return vis(arg.value_.ulong_long_value);
+  case detail::type::int128_type:
+    return vis(detail::convert_for_visit(arg.value_.int128_value));
+  case detail::type::uint128_type:
+    return vis(detail::convert_for_visit(arg.value_.uint128_value));
+  case detail::type::bool_type:
+    return vis(arg.value_.bool_value);
+  case detail::type::char_type:
+    return vis(arg.value_.char_value);
+  case detail::type::float_type:
+    return vis(arg.value_.float_value);
+  case detail::type::double_type:
+    return vis(arg.value_.double_value);
+  case detail::type::long_double_type:
+    return vis(arg.value_.long_double_value);
+  case detail::type::cstring_type:
+    return vis(arg.value_.string.data);
+  case detail::type::string_type:
+    using sv = basic_string_view<typename Context::char_type>;
+    return vis(sv(arg.value_.string.data, arg.value_.string.size));
+  case detail::type::pointer_type:
+    return vis(arg.value_.pointer);
+  case detail::type::custom_type:
+    return vis(typename basic_format_arg<Context>::handle(arg.value_.custom));
+  }
+  return vis(monostate());
+}
+
+FMT_BEGIN_DETAIL_NAMESPACE
+
+template <typename Char, typename InputIt>
+auto copy_str(InputIt begin, InputIt end, appender out) -> appender {
+  get_container(out).append(begin, end);
+  return out;
+}
+
+template <typename Char, typename R, typename OutputIt>
+FMT_CONSTEXPR auto copy_str(R&& rng, OutputIt out) -> OutputIt {
+  return detail::copy_str<Char>(rng.begin(), rng.end(), out);
+}
+
+#if FMT_GCC_VERSION && FMT_GCC_VERSION < 500
+// A workaround for gcc 4.8 to make void_t work in a SFINAE context.
+template <typename... Ts> struct void_t_impl { using type = void; };
+template <typename... Ts>
+using void_t = typename detail::void_t_impl<Ts...>::type;
+#else
+template <typename...> using void_t = void;
+#endif
+
+template <typename It, typename T, typename Enable = void>
+struct is_output_iterator : std::false_type {};
+
+template <typename It, typename T>
+struct is_output_iterator<
+    It, T,
+    void_t<typename std::iterator_traits<It>::iterator_category,
+           decltype(*std::declval<It>() = std::declval<T>())>>
+    : std::true_type {};
+
+template <typename OutputIt>
+struct is_back_insert_iterator : std::false_type {};
+template <typename Container>
+struct is_back_insert_iterator<std::back_insert_iterator<Container>>
+    : std::true_type {};
+
+template <typename OutputIt>
+struct is_contiguous_back_insert_iterator : std::false_type {};
+template <typename Container>
+struct is_contiguous_back_insert_iterator<std::back_insert_iterator<Container>>
+    : is_contiguous<Container> {};
+template <>
+struct is_contiguous_back_insert_iterator<appender> : std::true_type {};
+
+// A type-erased reference to an std::locale to avoid a heavy <locale> include.
+class locale_ref {
+ private:
+  const void* locale_;  // A type-erased pointer to std::locale.
+
+ public:
+  constexpr FMT_INLINE locale_ref() : locale_(nullptr) {}
+  template <typename Locale> explicit locale_ref(const Locale& loc);
+
+  explicit operator bool() const noexcept { return locale_ != nullptr; }
+
+  template <typename Locale> auto get() const -> Locale;
+};
+
+template <typename> constexpr auto encode_types() -> unsigned long long {
+  return 0;
+}
+
+template <typename Context, typename Arg, typename... Args>
+constexpr auto encode_types() -> unsigned long long {
+  return static_cast<unsigned>(mapped_type_constant<Arg, Context>::value) |
+         (encode_types<Context, Args...>() << packed_arg_bits);
+}
+
+template <typename Context, typename T>
+FMT_CONSTEXPR FMT_INLINE auto make_value(T&& val) -> value<Context> {
+  const auto& arg = arg_mapper<Context>().map(FMT_FORWARD(val));
+
+  constexpr bool formattable_char =
+      !std::is_same<decltype(arg), const unformattable_char&>::value;
+  static_assert(formattable_char, "Mixing character types is disallowed.");
+
+  constexpr bool formattable_const =
+      !std::is_same<decltype(arg), const unformattable_const&>::value;
+  static_assert(formattable_const, "Cannot format a const argument.");
+
+  // Formatting of arbitrary pointers is disallowed. If you want to output
+  // a pointer cast it to "void *" or "const void *". In particular, this
+  // forbids formatting of "[const] volatile char *" which is printed as bool
+  // by iostreams.
+  constexpr bool formattable_pointer =
+      !std::is_same<decltype(arg), const unformattable_pointer&>::value;
+  static_assert(formattable_pointer,
+                "Formatting of non-void pointers is disallowed.");
+
+  constexpr bool formattable =
+      !std::is_same<decltype(arg), const unformattable&>::value;
+  static_assert(
+      formattable,
+      "Cannot format an argument. To make type T formattable provide a "
+      "formatter<T> specialization: https://fmt.dev/latest/api.html#udt");
+  return {arg};
+}
+
+template <typename Context, typename T>
+FMT_CONSTEXPR auto make_arg(T&& value) -> basic_format_arg<Context> {
+  basic_format_arg<Context> arg;
+  arg.type_ = mapped_type_constant<T, Context>::value;
+  arg.value_ = make_value<Context>(value);
+  return arg;
+}
+
+// The type template parameter is there to avoid an ODR violation when using
+// a fallback formatter in one translation unit and an implicit conversion in
+// another (not recommended).
+template <bool IS_PACKED, typename Context, type, typename T,
+          FMT_ENABLE_IF(IS_PACKED)>
+FMT_CONSTEXPR FMT_INLINE auto make_arg(T&& val) -> value<Context> {
+  return make_value<Context>(val);
+}
+
+template <bool IS_PACKED, typename Context, type, typename T,
+          FMT_ENABLE_IF(!IS_PACKED)>
+FMT_CONSTEXPR inline auto make_arg(T&& value) -> basic_format_arg<Context> {
+  return make_arg<Context>(value);
+}
+FMT_END_DETAIL_NAMESPACE
+
+// Formatting context.
+template <typename OutputIt, typename Char> class basic_format_context {
+ public:
+  /** The character type for the output. */
+  using char_type = Char;
+
+ private:
+  OutputIt out_;
+  basic_format_args<basic_format_context> args_;
+  detail::locale_ref loc_;
+
+ public:
+  using iterator = OutputIt;
+  using format_arg = basic_format_arg<basic_format_context>;
+  using parse_context_type = basic_format_parse_context<Char>;
+  template <typename T> using formatter_type = formatter<T, char_type>;
+
+  basic_format_context(basic_format_context&&) = default;
+  basic_format_context(const basic_format_context&) = delete;
+  void operator=(const basic_format_context&) = delete;
+  /**
+   Constructs a ``basic_format_context`` object. References to the arguments are
+   stored in the object so make sure they have appropriate lifetimes.
+   */
+  constexpr basic_format_context(
+      OutputIt out, basic_format_args<basic_format_context> ctx_args,
+      detail::locale_ref loc = detail::locale_ref())
+      : out_(out), args_(ctx_args), loc_(loc) {}
+
+  constexpr auto arg(int id) const -> format_arg { return args_.get(id); }
+  FMT_CONSTEXPR auto arg(basic_string_view<char_type> name) -> format_arg {
+    return args_.get(name);
+  }
+  FMT_CONSTEXPR auto arg_id(basic_string_view<char_type> name) -> int {
+    return args_.get_id(name);
+  }
+  auto args() const -> const basic_format_args<basic_format_context>& {
+    return args_;
+  }
+
+  FMT_CONSTEXPR auto error_handler() -> detail::error_handler { return {}; }
+  void on_error(const char* message) { error_handler().on_error(message); }
+
+  // Returns an iterator to the beginning of the output range.
+  FMT_CONSTEXPR auto out() -> iterator { return out_; }
+
+  // Advances the begin iterator to ``it``.
+  void advance_to(iterator it) {
+    if (!detail::is_back_insert_iterator<iterator>()) out_ = it;
+  }
+
+  FMT_CONSTEXPR auto locale() -> detail::locale_ref { return loc_; }
+};
+
+template <typename Char>
+using buffer_context =
+    basic_format_context<detail::buffer_appender<Char>, Char>;
+using format_context = buffer_context<char>;
+
+// Workaround an alias issue: https://stackoverflow.com/q/62767544/471164.
+#define FMT_BUFFER_CONTEXT(Char) \
+  basic_format_context<detail::buffer_appender<Char>, Char>
+
+template <typename T, typename Char = char>
+using is_formattable = bool_constant<
+    !std::is_base_of<detail::unformattable,
+                     decltype(detail::arg_mapper<buffer_context<Char>>().map(
+                         std::declval<T>()))>::value &&
+    !detail::has_fallback_formatter<T, Char>::value>;
+
+/**
+  \rst
+  An array of references to arguments. It can be implicitly converted into
+  `~fmt::basic_format_args` for passing into type-erased formatting functions
+  such as `~fmt::vformat`.
+  \endrst
+ */
+template <typename Context, typename... Args>
+class format_arg_store
+#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409
+    // Workaround a GCC template argument substitution bug.
+    : public basic_format_args<Context>
+#endif
+{
+ private:
+  static const size_t num_args = sizeof...(Args);
+  static const size_t num_named_args = detail::count_named_args<Args...>();
+  static const bool is_packed = num_args <= detail::max_packed_args;
+
+  using value_type = conditional_t<is_packed, detail::value<Context>,
+                                   basic_format_arg<Context>>;
+
+  detail::arg_data<value_type, typename Context::char_type, num_args,
+                   num_named_args>
+      data_;
+
+  friend class basic_format_args<Context>;
+
+  static constexpr unsigned long long desc =
+      (is_packed ? detail::encode_types<Context, Args...>()
+                 : detail::is_unpacked_bit | num_args) |
+      (num_named_args != 0
+           ? static_cast<unsigned long long>(detail::has_named_args_bit)
+           : 0);
+
+ public:
+  template <typename... T>
+  FMT_CONSTEXPR FMT_INLINE format_arg_store(T&&... args)
+      :
+#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409
+        basic_format_args<Context>(*this),
+#endif
+        data_{detail::make_arg<
+            is_packed, Context,
+            detail::mapped_type_constant<remove_cvref_t<T>, Context>::value>(
+            FMT_FORWARD(args))...} {
+    detail::init_named_args(data_.named_args(), 0, 0, args...);
+  }
+};
+
+/**
+  \rst
+  Constructs a `~fmt::format_arg_store` object that contains references to
+  arguments and can be implicitly converted to `~fmt::format_args`. `Context`
+  can be omitted in which case it defaults to `~fmt::context`.
+  See `~fmt::arg` for lifetime considerations.
+  \endrst
+ */
+template <typename Context = format_context, typename... Args>
+constexpr auto make_format_args(Args&&... args)
+    -> format_arg_store<Context, remove_cvref_t<Args>...> {
+  return {FMT_FORWARD(args)...};
+}
+
+/**
+  \rst
+  Returns a named argument to be used in a formatting function.
+  It should only be used in a call to a formatting function or
+  `dynamic_format_arg_store::push_back`.
+
+  **Example**::
+
+    fmt::print("Elapsed time: {s:.2f} seconds", fmt::arg("s", 1.23));
+  \endrst
+ */
+template <typename Char, typename T>
+inline auto arg(const Char* name, const T& arg) -> detail::named_arg<Char, T> {
+  static_assert(!detail::is_named_arg<T>(), "nested named arguments");
+  return {name, arg};
+}
+
+/**
+  \rst
+  A view of a collection of formatting arguments. To avoid lifetime issues it
+  should only be used as a parameter type in type-erased functions such as
+  ``vformat``::
+
+    void vlog(string_view format_str, format_args args);  // OK
+    format_args args = make_format_args(42);  // Error: dangling reference
+  \endrst
+ */
+template <typename Context> class basic_format_args {
+ public:
+  using size_type = int;
+  using format_arg = basic_format_arg<Context>;
+
+ private:
+  // A descriptor that contains information about formatting arguments.
+  // If the number of arguments is less or equal to max_packed_args then
+  // argument types are passed in the descriptor. This reduces binary code size
+  // per formatting function call.
+  unsigned long long desc_;
+  union {
+    // If is_packed() returns true then argument values are stored in values_;
+    // otherwise they are stored in args_. This is done to improve cache
+    // locality and reduce compiled code size since storing larger objects
+    // may require more code (at least on x86-64) even if the same amount of
+    // data is actually copied to stack. It saves ~10% on the bloat test.
+    const detail::value<Context>* values_;
+    const format_arg* args_;
+  };
+
+  constexpr auto is_packed() const -> bool {
+    return (desc_ & detail::is_unpacked_bit) == 0;
+  }
+  auto has_named_args() const -> bool {
+    return (desc_ & detail::has_named_args_bit) != 0;
+  }
+
+  FMT_CONSTEXPR auto type(int index) const -> detail::type {
+    int shift = index * detail::packed_arg_bits;
+    unsigned int mask = (1 << detail::packed_arg_bits) - 1;
+    return static_cast<detail::type>((desc_ >> shift) & mask);
+  }
+
+  constexpr FMT_INLINE basic_format_args(unsigned long long desc,
+                                         const detail::value<Context>* values)
+      : desc_(desc), values_(values) {}
+  constexpr basic_format_args(unsigned long long desc, const format_arg* args)
+      : desc_(desc), args_(args) {}
+
+ public:
+  constexpr basic_format_args() : desc_(0), args_(nullptr) {}
+
+  /**
+   \rst
+   Constructs a `basic_format_args` object from `~fmt::format_arg_store`.
+   \endrst
+   */
+  template <typename... Args>
+  constexpr FMT_INLINE basic_format_args(
+      const format_arg_store<Context, Args...>& store)
+      : basic_format_args(format_arg_store<Context, Args...>::desc,
+                          store.data_.args()) {}
+
+  /**
+   \rst
+   Constructs a `basic_format_args` object from
+   `~fmt::dynamic_format_arg_store`.
+   \endrst
+   */
+  constexpr FMT_INLINE basic_format_args(
+      const dynamic_format_arg_store<Context>& store)
+      : basic_format_args(store.get_types(), store.data()) {}
+
+  /**
+   \rst
+   Constructs a `basic_format_args` object from a dynamic set of arguments.
+   \endrst
+   */
+  constexpr basic_format_args(const format_arg* args, int count)
+      : basic_format_args(detail::is_unpacked_bit | detail::to_unsigned(count),
+                          args) {}
+
+  /** Returns the argument with the specified id. */
+  FMT_CONSTEXPR auto get(int id) const -> format_arg {
+    format_arg arg;
+    if (!is_packed()) {
+      if (id < max_size()) arg = args_[id];
+      return arg;
+    }
+    if (id >= detail::max_packed_args) return arg;
+    arg.type_ = type(id);
+    if (arg.type_ == detail::type::none_type) return arg;
+    arg.value_ = values_[id];
+    return arg;
+  }
+
+  template <typename Char>
+  auto get(basic_string_view<Char> name) const -> format_arg {
+    int id = get_id(name);
+    return id >= 0 ? get(id) : format_arg();
+  }
+
+  template <typename Char>
+  auto get_id(basic_string_view<Char> name) const -> int {
+    if (!has_named_args()) return -1;
+    const auto& named_args =
+        (is_packed() ? values_[-1] : args_[-1].value_).named_args;
+    for (size_t i = 0; i < named_args.size; ++i) {
+      if (named_args.data[i].name == name) return named_args.data[i].id;
+    }
+    return -1;
+  }
+
+  auto max_size() const -> int {
+    unsigned long long max_packed = detail::max_packed_args;
+    return static_cast<int>(is_packed() ? max_packed
+                                        : desc_ & ~detail::is_unpacked_bit);
+  }
+};
+
+/** An alias to ``basic_format_args<format_context>``. */
+// A separate type would result in shorter symbols but break ABI compatibility
+// between clang and gcc on ARM (#1919).
+using format_args = basic_format_args<format_context>;
+
+// We cannot use enum classes as bit fields because of a gcc bug, so we put them
+// in namespaces instead (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61414).
+// Additionally, if an underlying type is specified, older gcc incorrectly warns
+// that the type is too small. Both bugs are fixed in gcc 9.3.
+#if FMT_GCC_VERSION && FMT_GCC_VERSION < 903
+#  define FMT_ENUM_UNDERLYING_TYPE(type)
+#else
+#  define FMT_ENUM_UNDERLYING_TYPE(type) : type
+#endif
+namespace align {
+enum type FMT_ENUM_UNDERLYING_TYPE(unsigned char){none, left, right, center,
+                                                  numeric};
+}
+using align_t = align::type;
+namespace sign {
+enum type FMT_ENUM_UNDERLYING_TYPE(unsigned char){none, minus, plus, space};
+}
+using sign_t = sign::type;
+
+FMT_BEGIN_DETAIL_NAMESPACE
+
+// Workaround an array initialization issue in gcc 4.8.
+template <typename Char> struct fill_t {
+ private:
+  enum { max_size = 4 };
+  Char data_[max_size] = {Char(' '), Char(0), Char(0), Char(0)};
+  unsigned char size_ = 1;
+
+ public:
+  FMT_CONSTEXPR void operator=(basic_string_view<Char> s) {
+    auto size = s.size();
+    if (size > max_size) return throw_format_error("invalid fill");
+    for (size_t i = 0; i < size; ++i) data_[i] = s[i];
+    size_ = static_cast<unsigned char>(size);
+  }
+
+  constexpr auto size() const -> size_t { return size_; }
+  constexpr auto data() const -> const Char* { return data_; }
+
+  FMT_CONSTEXPR auto operator[](size_t index) -> Char& { return data_[index]; }
+  FMT_CONSTEXPR auto operator[](size_t index) const -> const Char& {
+    return data_[index];
+  }
+};
+FMT_END_DETAIL_NAMESPACE
+
+enum class presentation_type : unsigned char {
+  none,
+  // Integer types should go first,
+  dec,             // 'd'
+  oct,             // 'o'
+  hex_lower,       // 'x'
+  hex_upper,       // 'X'
+  bin_lower,       // 'b'
+  bin_upper,       // 'B'
+  hexfloat_lower,  // 'a'
+  hexfloat_upper,  // 'A'
+  exp_lower,       // 'e'
+  exp_upper,       // 'E'
+  fixed_lower,     // 'f'
+  fixed_upper,     // 'F'
+  general_lower,   // 'g'
+  general_upper,   // 'G'
+  chr,             // 'c'
+  string,          // 's'
+  pointer,         // 'p'
+  debug            // '?'
+};
+
+// Format specifiers for built-in and string types.
+template <typename Char> struct basic_format_specs {
+  int width;
+  int precision;
+  presentation_type type;
+  align_t align : 4;
+  sign_t sign : 3;
+  bool alt : 1;  // Alternate form ('#').
+  bool localized : 1;
+  detail::fill_t<Char> fill;
+
+  constexpr basic_format_specs()
+      : width(0),
+        precision(-1),
+        type(presentation_type::none),
+        align(align::none),
+        sign(sign::none),
+        alt(false),
+        localized(false) {}
+};
+
+using format_specs = basic_format_specs<char>;
+
+FMT_BEGIN_DETAIL_NAMESPACE
+
+enum class arg_id_kind { none, index, name };
+
+// An argument reference.
+template <typename Char> struct arg_ref {
+  FMT_CONSTEXPR arg_ref() : kind(arg_id_kind::none), val() {}
+
+  FMT_CONSTEXPR explicit arg_ref(int index)
+      : kind(arg_id_kind::index), val(index) {}
+  FMT_CONSTEXPR explicit arg_ref(basic_string_view<Char> name)
+      : kind(arg_id_kind::name), val(name) {}
+
+  FMT_CONSTEXPR auto operator=(int idx) -> arg_ref& {
+    kind = arg_id_kind::index;
+    val.index = idx;
+    return *this;
+  }
+
+  arg_id_kind kind;
+  union value {
+    FMT_CONSTEXPR value(int id = 0) : index{id} {}
+    FMT_CONSTEXPR value(basic_string_view<Char> n) : name(n) {}
+
+    int index;
+    basic_string_view<Char> name;
+  } val;
+};
+
+// Format specifiers with width and precision resolved at formatting rather
+// than parsing time to allow re-using the same parsed specifiers with
+// different sets of arguments (precompilation of format strings).
+template <typename Char>
+struct dynamic_format_specs : basic_format_specs<Char> {
+  arg_ref<Char> width_ref;
+  arg_ref<Char> precision_ref;
+};
+
+struct auto_id {};
+
+// A format specifier handler that sets fields in basic_format_specs.
+template <typename Char> class specs_setter {
+ protected:
+  basic_format_specs<Char>& specs_;
+
+ public:
+  explicit FMT_CONSTEXPR specs_setter(basic_format_specs<Char>& specs)
+      : specs_(specs) {}
+
+  FMT_CONSTEXPR specs_setter(const specs_setter& other)
+      : specs_(other.specs_) {}
+
+  FMT_CONSTEXPR void on_align(align_t align) { specs_.align = align; }
+  FMT_CONSTEXPR void on_fill(basic_string_view<Char> fill) {
+    specs_.fill = fill;
+  }
+  FMT_CONSTEXPR void on_sign(sign_t s) { specs_.sign = s; }
+  FMT_CONSTEXPR void on_hash() { specs_.alt = true; }
+  FMT_CONSTEXPR void on_localized() { specs_.localized = true; }
+
+  FMT_CONSTEXPR void on_zero() {
+    if (specs_.align == align::none) specs_.align = align::numeric;
+    specs_.fill[0] = Char('0');
+  }
+
+  FMT_CONSTEXPR void on_width(int width) { specs_.width = width; }
+  FMT_CONSTEXPR void on_precision(int precision) {
+    specs_.precision = precision;
+  }
+  FMT_CONSTEXPR void end_precision() {}
+
+  FMT_CONSTEXPR void on_type(presentation_type type) { specs_.type = type; }
+};
+
+// Format spec handler that saves references to arguments representing dynamic
+// width and precision to be resolved at formatting time.
+template <typename ParseContext>
+class dynamic_specs_handler
+    : public specs_setter<typename ParseContext::char_type> {
+ public:
+  using char_type = typename ParseContext::char_type;
+
+  FMT_CONSTEXPR dynamic_specs_handler(dynamic_format_specs<char_type>& specs,
+                                      ParseContext& ctx)
+      : specs_setter<char_type>(specs), specs_(specs), context_(ctx) {}
+
+  FMT_CONSTEXPR dynamic_specs_handler(const dynamic_specs_handler& other)
+      : specs_setter<char_type>(other),
+        specs_(other.specs_),
+        context_(other.context_) {}
+
+  template <typename Id> FMT_CONSTEXPR void on_dynamic_width(Id arg_id) {
+    specs_.width_ref = make_arg_ref(arg_id);
+  }
+
+  template <typename Id> FMT_CONSTEXPR void on_dynamic_precision(Id arg_id) {
+    specs_.precision_ref = make_arg_ref(arg_id);
+  }
+
+  FMT_CONSTEXPR void on_error(const char* message) {
+    context_.on_error(message);
+  }
+
+ private:
+  dynamic_format_specs<char_type>& specs_;
+  ParseContext& context_;
+
+  using arg_ref_type = arg_ref<char_type>;
+
+  FMT_CONSTEXPR auto make_arg_ref(int arg_id) -> arg_ref_type {
+    context_.check_arg_id(arg_id);
+    context_.check_dynamic_spec(arg_id);
+    return arg_ref_type(arg_id);
+  }
+
+  FMT_CONSTEXPR auto make_arg_ref(auto_id) -> arg_ref_type {
+    int arg_id = context_.next_arg_id();
+    context_.check_dynamic_spec(arg_id);
+    return arg_ref_type(arg_id);
+  }
+
+  FMT_CONSTEXPR auto make_arg_ref(basic_string_view<char_type> arg_id)
+      -> arg_ref_type {
+    context_.check_arg_id(arg_id);
+    basic_string_view<char_type> format_str(
+        context_.begin(), to_unsigned(context_.end() - context_.begin()));
+    return arg_ref_type(arg_id);
+  }
+};
+
+template <typename Char> constexpr bool is_ascii_letter(Char c) {
+  return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
+}
+
+// Converts a character to ASCII. Returns a number > 127 on conversion failure.
+template <typename Char, FMT_ENABLE_IF(std::is_integral<Char>::value)>
+constexpr auto to_ascii(Char c) -> Char {
+  return c;
+}
+template <typename Char, FMT_ENABLE_IF(std::is_enum<Char>::value)>
+constexpr auto to_ascii(Char c) -> underlying_t<Char> {
+  return c;
+}
+
+FMT_CONSTEXPR inline auto code_point_length_impl(char c) -> int {
+  return "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0\0\0\2\2\2\2\3\3\4"
+      [static_cast<unsigned char>(c) >> 3];
+}
+
+template <typename Char>
+FMT_CONSTEXPR auto code_point_length(const Char* begin) -> int {
+  if (const_check(sizeof(Char) != 1)) return 1;
+  int len = code_point_length_impl(static_cast<char>(*begin));
+
+  // Compute the pointer to the next character early so that the next
+  // iteration can start working on the next character. Neither Clang
+  // nor GCC figure out this reordering on their own.
+  return len + !len;
+}
+
+// Return the result via the out param to workaround gcc bug 77539.
+template <bool IS_CONSTEXPR, typename T, typename Ptr = const T*>
+FMT_CONSTEXPR auto find(Ptr first, Ptr last, T value, Ptr& out) -> bool {
+  for (out = first; out != last; ++out) {
+    if (*out == value) return true;
+  }
+  return false;
+}
+
+template <>
+inline auto find<false, char>(const char* first, const char* last, char value,
+                              const char*& out) -> bool {
+  out = static_cast<const char*>(
+      std::memchr(first, value, to_unsigned(last - first)));
+  return out != nullptr;
+}
+
+// Parses the range [begin, end) as an unsigned integer. This function assumes
+// that the range is non-empty and the first character is a digit.
+template <typename Char>
+FMT_CONSTEXPR auto parse_nonnegative_int(const Char*& begin, const Char* end,
+                                         int error_value) noexcept -> int {
+  FMT_ASSERT(begin != end && '0' <= *begin && *begin <= '9', "");
+  unsigned value = 0, prev = 0;
+  auto p = begin;
+  do {
+    prev = value;
+    value = value * 10 + unsigned(*p - '0');
+    ++p;
+  } while (p != end && '0' <= *p && *p <= '9');
+  auto num_digits = p - begin;
+  begin = p;
+  if (num_digits <= std::numeric_limits<int>::digits10)
+    return static_cast<int>(value);
+  // Check for overflow.
+  const unsigned max = to_unsigned((std::numeric_limits<int>::max)());
+  return num_digits == std::numeric_limits<int>::digits10 + 1 &&
+                 prev * 10ull + unsigned(p[-1] - '0') <= max
+             ? static_cast<int>(value)
+             : error_value;
+}
+
+// Parses fill and alignment.
+template <typename Char, typename Handler>
+FMT_CONSTEXPR auto parse_align(const Char* begin, const Char* end,
+                               Handler&& handler) -> const Char* {
+  FMT_ASSERT(begin != end, "");
+  auto align = align::none;
+  auto p = begin + code_point_length(begin);
+  if (end - p <= 0) p = begin;
+  for (;;) {
+    switch (to_ascii(*p)) {
+    case '<':
+      align = align::left;
+      break;
+    case '>':
+      align = align::right;
+      break;
+    case '^':
+      align = align::center;
+      break;
+    default:
+      break;
+    }
+    if (align != align::none) {
+      if (p != begin) {
+        auto c = *begin;
+        if (c == '{')
+          return handler.on_error("invalid fill character '{'"), begin;
+        if (c == '}') return begin;
+        handler.on_fill(basic_string_view<Char>(begin, to_unsigned(p - begin)));
+        begin = p + 1;
+      } else
+        ++begin;
+      handler.on_align(align);
+      break;
+    } else if (p == begin) {
+      break;
+    }
+    p = begin;
+  }
+  return begin;
+}
+
+template <typename Char> FMT_CONSTEXPR bool is_name_start(Char c) {
+  return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || '_' == c;
+}
+
+template <typename Char, typename IDHandler>
+FMT_CONSTEXPR auto do_parse_arg_id(const Char* begin, const Char* end,
+                                   IDHandler&& handler) -> const Char* {
+  FMT_ASSERT(begin != end, "");
+  Char c = *begin;
+  if (c >= '0' && c <= '9') {
+    int index = 0;
+    if (c != '0')
+      index =
+          parse_nonnegative_int(begin, end, (std::numeric_limits<int>::max)());
+    else
+      ++begin;
+    if (begin == end || (*begin != '}' && *begin != ':'))
+      handler.on_error("invalid format string");
+    else
+      handler(index);
+    return begin;
+  }
+  if (!is_name_start(c)) {
+    handler.on_error("invalid format string");
+    return begin;
+  }
+  auto it = begin;
+  do {
+    ++it;
+  } while (it != end && (is_name_start(c = *it) || ('0' <= c && c <= '9')));
+  handler(basic_string_view<Char>(begin, to_unsigned(it - begin)));
+  return it;
+}
+
+template <typename Char, typename IDHandler>
+FMT_CONSTEXPR FMT_INLINE auto parse_arg_id(const Char* begin, const Char* end,
+                                           IDHandler&& handler) -> const Char* {
+  Char c = *begin;
+  if (c != '}' && c != ':') return do_parse_arg_id(begin, end, handler);
+  handler();
+  return begin;
+}
+
+template <typename Char, typename Handler>
+FMT_CONSTEXPR auto parse_width(const Char* begin, const Char* end,
+                               Handler&& handler) -> const Char* {
+  using detail::auto_id;
+  struct width_adapter {
+    Handler& handler;
+
+    FMT_CONSTEXPR void operator()() { handler.on_dynamic_width(auto_id()); }
+    FMT_CONSTEXPR void operator()(int id) { handler.on_dynamic_width(id); }
+    FMT_CONSTEXPR void operator()(basic_string_view<Char> id) {
+      handler.on_dynamic_width(id);
+    }
+    FMT_CONSTEXPR void on_error(const char* message) {
+      if (message) handler.on_error(message);
+    }
+  };
+
+  FMT_ASSERT(begin != end, "");
+  if ('0' <= *begin && *begin <= '9') {
+    int width = parse_nonnegative_int(begin, end, -1);
+    if (width != -1)
+      handler.on_width(width);
+    else
+      handler.on_error("number is too big");
+  } else if (*begin == '{') {
+    ++begin;
+    if (begin != end) begin = parse_arg_id(begin, end, width_adapter{handler});
+    if (begin == end || *begin != '}')
+      return handler.on_error("invalid format string"), begin;
+    ++begin;
+  }
+  return begin;
+}
+
+template <typename Char, typename Handler>
+FMT_CONSTEXPR auto parse_precision(const Char* begin, const Char* end,
+                                   Handler&& handler) -> const Char* {
+  using detail::auto_id;
+  struct precision_adapter {
+    Handler& handler;
+
+    FMT_CONSTEXPR void operator()() { handler.on_dynamic_precision(auto_id()); }
+    FMT_CONSTEXPR void operator()(int id) { handler.on_dynamic_precision(id); }
+    FMT_CONSTEXPR void operator()(basic_string_view<Char> id) {
+      handler.on_dynamic_precision(id);
+    }
+    FMT_CONSTEXPR void on_error(const char* message) {
+      if (message) handler.on_error(message);
+    }
+  };
+
+  ++begin;
+  auto c = begin != end ? *begin : Char();
+  if ('0' <= c && c <= '9') {
+    auto precision = parse_nonnegative_int(begin, end, -1);
+    if (precision != -1)
+      handler.on_precision(precision);
+    else
+      handler.on_error("number is too big");
+  } else if (c == '{') {
+    ++begin;
+    if (begin != end)
+      begin = parse_arg_id(begin, end, precision_adapter{handler});
+    if (begin == end || *begin++ != '}')
+      return handler.on_error("invalid format string"), begin;
+  } else {
+    return handler.on_error("missing precision specifier"), begin;
+  }
+  handler.end_precision();
+  return begin;
+}
+
+template <typename Char>
+FMT_CONSTEXPR auto parse_presentation_type(Char type) -> presentation_type {
+  switch (to_ascii(type)) {
+  case 'd':
+    return presentation_type::dec;
+  case 'o':
+    return presentation_type::oct;
+  case 'x':
+    return presentation_type::hex_lower;
+  case 'X':
+    return presentation_type::hex_upper;
+  case 'b':
+    return presentation_type::bin_lower;
+  case 'B':
+    return presentation_type::bin_upper;
+  case 'a':
+    return presentation_type::hexfloat_lower;
+  case 'A':
+    return presentation_type::hexfloat_upper;
+  case 'e':
+    return presentation_type::exp_lower;
+  case 'E':
+    return presentation_type::exp_upper;
+  case 'f':
+    return presentation_type::fixed_lower;
+  case 'F':
+    return presentation_type::fixed_upper;
+  case 'g':
+    return presentation_type::general_lower;
+  case 'G':
+    return presentation_type::general_upper;
+  case 'c':
+    return presentation_type::chr;
+  case 's':
+    return presentation_type::string;
+  case 'p':
+    return presentation_type::pointer;
+  case '?':
+    return presentation_type::debug;
+  default:
+    return presentation_type::none;
+  }
+}
+
+// Parses standard format specifiers and sends notifications about parsed
+// components to handler.
+template <typename Char, typename SpecHandler>
+FMT_CONSTEXPR FMT_INLINE auto parse_format_specs(const Char* begin,
+                                                 const Char* end,
+                                                 SpecHandler&& handler)
+    -> const Char* {
+  if (1 < end - begin && begin[1] == '}' && is_ascii_letter(*begin) &&
+      *begin != 'L') {
+    presentation_type type = parse_presentation_type(*begin++);
+    if (type == presentation_type::none)
+      handler.on_error("invalid type specifier");
+    handler.on_type(type);
+    return begin;
+  }
+
+  if (begin == end) return begin;
+
+  begin = parse_align(begin, end, handler);
+  if (begin == end) return begin;
+
+  // Parse sign.
+  switch (to_ascii(*begin)) {
+  case '+':
+    handler.on_sign(sign::plus);
+    ++begin;
+    break;
+  case '-':
+    handler.on_sign(sign::minus);
+    ++begin;
+    break;
+  case ' ':
+    handler.on_sign(sign::space);
+    ++begin;
+    break;
+  default:
+    break;
+  }
+  if (begin == end) return begin;
+
+  if (*begin == '#') {
+    handler.on_hash();
+    if (++begin == end) return begin;
+  }
+
+  // Parse zero flag.
+  if (*begin == '0') {
+    handler.on_zero();
+    if (++begin == end) return begin;
+  }
+
+  begin = parse_width(begin, end, handler);
+  if (begin == end) return begin;
+
+  // Parse precision.
+  if (*begin == '.') {
+    begin = parse_precision(begin, end, handler);
+    if (begin == end) return begin;
+  }
+
+  if (*begin == 'L') {
+    handler.on_localized();
+    ++begin;
+  }
+
+  // Parse type.
+  if (begin != end && *begin != '}') {
+    presentation_type type = parse_presentation_type(*begin++);
+    if (type == presentation_type::none)
+      handler.on_error("invalid type specifier");
+    handler.on_type(type);
+  }
+  return begin;
+}
+
+template <typename Char, typename Handler>
+FMT_CONSTEXPR auto parse_replacement_field(const Char* begin, const Char* end,
+                                           Handler&& handler) -> const Char* {
+  struct id_adapter {
+    Handler& handler;
+    int arg_id;
+
+    FMT_CONSTEXPR void operator()() { arg_id = handler.on_arg_id(); }
+    FMT_CONSTEXPR void operator()(int id) { arg_id = handler.on_arg_id(id); }
+    FMT_CONSTEXPR void operator()(basic_string_view<Char> id) {
+      arg_id = handler.on_arg_id(id);
+    }
+    FMT_CONSTEXPR void on_error(const char* message) {
+      if (message) handler.on_error(message);
+    }
+  };
+
+  ++begin;
+  if (begin == end) return handler.on_error("invalid format string"), end;
+  if (*begin == '}') {
+    handler.on_replacement_field(handler.on_arg_id(), begin);
+  } else if (*begin == '{') {
+    handler.on_text(begin, begin + 1);
+  } else {
+    auto adapter = id_adapter{handler, 0};
+    begin = parse_arg_id(begin, end, adapter);
+    Char c = begin != end ? *begin : Char();
+    if (c == '}') {
+      handler.on_replacement_field(adapter.arg_id, begin);
+    } else if (c == ':') {
+      begin = handler.on_format_specs(adapter.arg_id, begin + 1, end);
+      if (begin == end || *begin != '}')
+        return handler.on_error("unknown format specifier"), end;
+    } else {
+      return handler.on_error("missing '}' in format string"), end;
+    }
+  }
+  return begin + 1;
+}
+
+template <bool IS_CONSTEXPR, typename Char, typename Handler>
+FMT_CONSTEXPR FMT_INLINE void parse_format_string(
+    basic_string_view<Char> format_str, Handler&& handler) {
+  // Workaround a name-lookup bug in MSVC's modules implementation.
+  using detail::find;
+
+  auto begin = format_str.data();
+  auto end = begin + format_str.size();
+  if (end - begin < 32) {
+    // Use a simple loop instead of memchr for small strings.
+    const Char* p = begin;
+    while (p != end) {
+      auto c = *p++;
+      if (c == '{') {
+        handler.on_text(begin, p - 1);
+        begin = p = parse_replacement_field(p - 1, end, handler);
+      } else if (c == '}') {
+        if (p == end || *p != '}')
+          return handler.on_error("unmatched '}' in format string");
+        handler.on_text(begin, p);
+        begin = ++p;
+      }
+    }
+    handler.on_text(begin, end);
+    return;
+  }
+  struct writer {
+    FMT_CONSTEXPR void operator()(const Char* from, const Char* to) {
+      if (from == to) return;
+      for (;;) {
+        const Char* p = nullptr;
+        if (!find<IS_CONSTEXPR>(from, to, Char('}'), p))
+          return handler_.on_text(from, to);
+        ++p;
+        if (p == to || *p != '}')
+          return handler_.on_error("unmatched '}' in format string");
+        handler_.on_text(from, p);
+        from = p + 1;
+      }
+    }
+    Handler& handler_;
+  } write = {handler};
+  while (begin != end) {
+    // Doing two passes with memchr (one for '{' and another for '}') is up to
+    // 2.5x faster than the naive one-pass implementation on big format strings.
+    const Char* p = begin;
+    if (*begin != '{' && !find<IS_CONSTEXPR>(begin + 1, end, Char('{'), p))
+      return write(begin, end);
+    write(begin, p);
+    begin = parse_replacement_field(p, end, handler);
+  }
+}
+
+template <typename T, bool = is_named_arg<T>::value> struct strip_named_arg {
+  using type = T;
+};
+template <typename T> struct strip_named_arg<T, true> {
+  using type = remove_cvref_t<decltype(T::value)>;
+};
+
+template <typename T, typename ParseContext>
+FMT_CONSTEXPR auto parse_format_specs(ParseContext& ctx)
+    -> decltype(ctx.begin()) {
+  using char_type = typename ParseContext::char_type;
+  using context = buffer_context<char_type>;
+  using stripped_type = typename strip_named_arg<T>::type;
+  using mapped_type = conditional_t<
+      mapped_type_constant<T, context>::value != type::custom_type,
+      decltype(arg_mapper<context>().map(std::declval<const T&>())),
+      stripped_type>;
+  auto f = conditional_t<has_formatter<mapped_type, context>::value,
+                         formatter<mapped_type, char_type>,
+                         fallback_formatter<stripped_type, char_type>>();
+  return f.parse(ctx);
+}
+
+template <typename ErrorHandler>
+FMT_CONSTEXPR void check_int_type_spec(presentation_type type,
+                                       ErrorHandler&& eh) {
+  if (type > presentation_type::bin_upper && type != presentation_type::chr)
+    eh.on_error("invalid type specifier");
+}
+
+// Checks char specs and returns true if the type spec is char (and not int).
+template <typename Char, typename ErrorHandler = error_handler>
+FMT_CONSTEXPR auto check_char_specs(const basic_format_specs<Char>& specs,
+                                    ErrorHandler&& eh = {}) -> bool {
+  if (specs.type != presentation_type::none &&
+      specs.type != presentation_type::chr &&
+      specs.type != presentation_type::debug) {
+    check_int_type_spec(specs.type, eh);
+    return false;
+  }
+  if (specs.align == align::numeric || specs.sign != sign::none || specs.alt)
+    eh.on_error("invalid format specifier for char");
+  return true;
+}
+
+// A floating-point presentation format.
+enum class float_format : unsigned char {
+  general,  // General: exponent notation or fixed point based on magnitude.
+  exp,      // Exponent notation with the default precision of 6, e.g. 1.2e-3.
+  fixed,    // Fixed point with the default precision of 6, e.g. 0.0012.
+  hex
+};
+
+struct float_specs {
+  int precision;
+  float_format format : 8;
+  sign_t sign : 8;
+  bool upper : 1;
+  bool locale : 1;
+  bool binary32 : 1;
+  bool showpoint : 1;
+};
+
+template <typename ErrorHandler = error_handler, typename Char>
+FMT_CONSTEXPR auto parse_float_type_spec(const basic_format_specs<Char>& specs,
+                                         ErrorHandler&& eh = {})
+    -> float_specs {
+  auto result = float_specs();
+  result.showpoint = specs.alt;
+  result.locale = specs.localized;
+  switch (specs.type) {
+  case presentation_type::none:
+    result.format = float_format::general;
+    break;
+  case presentation_type::general_upper:
+    result.upper = true;
+    FMT_FALLTHROUGH;
+  case presentation_type::general_lower:
+    result.format = float_format::general;
+    break;
+  case presentation_type::exp_upper:
+    result.upper = true;
+    FMT_FALLTHROUGH;
+  case presentation_type::exp_lower:
+    result.format = float_format::exp;
+    result.showpoint |= specs.precision != 0;
+    break;
+  case presentation_type::fixed_upper:
+    result.upper = true;
+    FMT_FALLTHROUGH;
+  case presentation_type::fixed_lower:
+    result.format = float_format::fixed;
+    result.showpoint |= specs.precision != 0;
+    break;
+  case presentation_type::hexfloat_upper:
+    result.upper = true;
+    FMT_FALLTHROUGH;
+  case presentation_type::hexfloat_lower:
+    result.format = float_format::hex;
+    break;
+  default:
+    eh.on_error("invalid type specifier");
+    break;
+  }
+  return result;
+}
+
+template <typename ErrorHandler = error_handler>
+FMT_CONSTEXPR auto check_cstring_type_spec(presentation_type type,
+                                           ErrorHandler&& eh = {}) -> bool {
+  if (type == presentation_type::none || type == presentation_type::string ||
+      type == presentation_type::debug)
+    return true;
+  if (type != presentation_type::pointer) eh.on_error("invalid type specifier");
+  return false;
+}
+
+template <typename ErrorHandler = error_handler>
+FMT_CONSTEXPR void check_string_type_spec(presentation_type type,
+                                          ErrorHandler&& eh = {}) {
+  if (type != presentation_type::none && type != presentation_type::string &&
+      type != presentation_type::debug)
+    eh.on_error("invalid type specifier");
+}
+
+template <typename ErrorHandler>
+FMT_CONSTEXPR void check_pointer_type_spec(presentation_type type,
+                                           ErrorHandler&& eh) {
+  if (type != presentation_type::none && type != presentation_type::pointer)
+    eh.on_error("invalid type specifier");
+}
+
+// A parse_format_specs handler that checks if specifiers are consistent with
+// the argument type.
+template <typename Handler> class specs_checker : public Handler {
+ private:
+  detail::type arg_type_;
+
+  FMT_CONSTEXPR void require_numeric_argument() {
+    if (!is_arithmetic_type(arg_type_))
+      this->on_error("format specifier requires numeric argument");
+  }
+
+ public:
+  FMT_CONSTEXPR specs_checker(const Handler& handler, detail::type arg_type)
+      : Handler(handler), arg_type_(arg_type) {}
+
+  FMT_CONSTEXPR void on_align(align_t align) {
+    if (align == align::numeric) require_numeric_argument();
+    Handler::on_align(align);
+  }
+
+  FMT_CONSTEXPR void on_sign(sign_t s) {
+    require_numeric_argument();
+    if (is_integral_type(arg_type_) && arg_type_ != type::int_type &&
+        arg_type_ != type::long_long_type && arg_type_ != type::int128_type &&
+        arg_type_ != type::char_type) {
+      this->on_error("format specifier requires signed argument");
+    }
+    Handler::on_sign(s);
+  }
+
+  FMT_CONSTEXPR void on_hash() {
+    require_numeric_argument();
+    Handler::on_hash();
+  }
+
+  FMT_CONSTEXPR void on_localized() {
+    require_numeric_argument();
+    Handler::on_localized();
+  }
+
+  FMT_CONSTEXPR void on_zero() {
+    require_numeric_argument();
+    Handler::on_zero();
+  }
+
+  FMT_CONSTEXPR void end_precision() {
+    if (is_integral_type(arg_type_) || arg_type_ == type::pointer_type)
+      this->on_error("precision not allowed for this argument type");
+  }
+};
+
+constexpr int invalid_arg_index = -1;
+
+#if FMT_USE_NONTYPE_TEMPLATE_ARGS
+template <int N, typename T, typename... Args, typename Char>
+constexpr auto get_arg_index_by_name(basic_string_view<Char> name) -> int {
+  if constexpr (detail::is_statically_named_arg<T>()) {
+    if (name == T::name) return N;
+  }
+  if constexpr (sizeof...(Args) > 0)
+    return get_arg_index_by_name<N + 1, Args...>(name);
+  (void)name;  // Workaround an MSVC bug about "unused" parameter.
+  return invalid_arg_index;
+}
+#endif
+
+template <typename... Args, typename Char>
+FMT_CONSTEXPR auto get_arg_index_by_name(basic_string_view<Char> name) -> int {
+#if FMT_USE_NONTYPE_TEMPLATE_ARGS
+  if constexpr (sizeof...(Args) > 0)
+    return get_arg_index_by_name<0, Args...>(name);
+#endif
+  (void)name;
+  return invalid_arg_index;
+}
+
+template <typename Char, typename ErrorHandler, typename... Args>
+class format_string_checker {
+ private:
+  // In the future basic_format_parse_context will replace compile_parse_context
+  // here and will use is_constant_evaluated and downcasting to access the data
+  // needed for compile-time checks: https://godbolt.org/z/GvWzcTjh1.
+  using parse_context_type = compile_parse_context<Char, ErrorHandler>;
+  static constexpr int num_args = sizeof...(Args);
+
+  // Format specifier parsing function.
+  using parse_func = const Char* (*)(parse_context_type&);
+
+  parse_context_type context_;
+  parse_func parse_funcs_[num_args > 0 ? static_cast<size_t>(num_args) : 1];
+  type types_[num_args > 0 ? static_cast<size_t>(num_args) : 1];
+
+ public:
+  explicit FMT_CONSTEXPR format_string_checker(
+      basic_string_view<Char> format_str, ErrorHandler eh)
+      : context_(format_str, num_args, types_, eh),
+        parse_funcs_{&parse_format_specs<Args, parse_context_type>...},
+        types_{
+            mapped_type_constant<Args,
+                                 basic_format_context<Char*, Char>>::value...} {
+  }
+
+  FMT_CONSTEXPR void on_text(const Char*, const Char*) {}
+
+  FMT_CONSTEXPR auto on_arg_id() -> int { return context_.next_arg_id(); }
+  FMT_CONSTEXPR auto on_arg_id(int id) -> int {
+    return context_.check_arg_id(id), id;
+  }
+  FMT_CONSTEXPR auto on_arg_id(basic_string_view<Char> id) -> int {
+#if FMT_USE_NONTYPE_TEMPLATE_ARGS
+    auto index = get_arg_index_by_name<Args...>(id);
+    if (index == invalid_arg_index) on_error("named argument is not found");
+    return context_.check_arg_id(index), index;
+#else
+    (void)id;
+    on_error("compile-time checks for named arguments require C++20 support");
+    return 0;
+#endif
+  }
+
+  FMT_CONSTEXPR void on_replacement_field(int, const Char*) {}
+
+  FMT_CONSTEXPR auto on_format_specs(int id, const Char* begin, const Char*)
+      -> const Char* {
+    context_.advance_to(context_.begin() + (begin - &*context_.begin()));
+    // id >= 0 check is a workaround for gcc 10 bug (#2065).
+    return id >= 0 && id < num_args ? parse_funcs_[id](context_) : begin;
+  }
+
+  FMT_CONSTEXPR void on_error(const char* message) {
+    context_.on_error(message);
+  }
+};
+
+// Reports a compile-time error if S is not a valid format string.
+template <typename..., typename S, FMT_ENABLE_IF(!is_compile_string<S>::value)>
+FMT_INLINE void check_format_string(const S&) {
+#ifdef FMT_ENFORCE_COMPILE_STRING
+  static_assert(is_compile_string<S>::value,
+                "FMT_ENFORCE_COMPILE_STRING requires all format strings to use "
+                "FMT_STRING.");
+#endif
+}
+template <typename... Args, typename S,
+          FMT_ENABLE_IF(is_compile_string<S>::value)>
+void check_format_string(S format_str) {
+  FMT_CONSTEXPR auto s = basic_string_view<typename S::char_type>(format_str);
+  using checker = format_string_checker<typename S::char_type, error_handler,
+                                        remove_cvref_t<Args>...>;
+  FMT_CONSTEXPR bool invalid_format =
+      (parse_format_string<true>(s, checker(s, {})), true);
+  ignore_unused(invalid_format);
+}
+
+// Don't use type_identity for args to simplify symbols.
+template <typename Char>
+void vformat_to(buffer<Char>& buf, basic_string_view<Char> fmt,
+                basic_format_args<FMT_BUFFER_CONTEXT(Char)> args,
+                locale_ref loc = {});
+
+FMT_API void vprint_mojibake(std::FILE*, string_view, format_args);
+#ifndef _WIN32
+inline void vprint_mojibake(std::FILE*, string_view, format_args) {}
+#endif
+FMT_END_DETAIL_NAMESPACE
+
+// A formatter specialization for the core types corresponding to detail::type
+// constants.
+template <typename T, typename Char>
+struct formatter<T, Char,
+                 enable_if_t<detail::type_constant<T, Char>::value !=
+                             detail::type::custom_type>> {
+ private:
+  detail::dynamic_format_specs<Char> specs_;
+
+ public:
+  // Parses format specifiers stopping either at the end of the range or at the
+  // terminating '}'.
+  template <typename ParseContext>
+  FMT_CONSTEXPR auto parse(ParseContext& ctx) -> decltype(ctx.begin()) {
+    auto begin = ctx.begin(), end = ctx.end();
+    if (begin == end) return begin;
+    using handler_type = detail::dynamic_specs_handler<ParseContext>;
+    auto type = detail::type_constant<T, Char>::value;
+    auto checker =
+        detail::specs_checker<handler_type>(handler_type(specs_, ctx), type);
+    auto it = detail::parse_format_specs(begin, end, checker);
+    auto eh = ctx.error_handler();
+    switch (type) {
+    case detail::type::none_type:
+      FMT_ASSERT(false, "invalid argument type");
+      break;
+    case detail::type::bool_type:
+      if (specs_.type == presentation_type::none ||
+          specs_.type == presentation_type::string) {
+        break;
+      }
+      FMT_FALLTHROUGH;
+    case detail::type::int_type:
+    case detail::type::uint_type:
+    case detail::type::long_long_type:
+    case detail::type::ulong_long_type:
+    case detail::type::int128_type:
+    case detail::type::uint128_type:
+      detail::check_int_type_spec(specs_.type, eh);
+      break;
+    case detail::type::char_type:
+      detail::check_char_specs(specs_, eh);
+      break;
+    case detail::type::float_type:
+      if (detail::const_check(FMT_USE_FLOAT))
+        detail::parse_float_type_spec(specs_, eh);
+      else
+        FMT_ASSERT(false, "float support disabled");
+      break;
+    case detail::type::double_type:
+      if (detail::const_check(FMT_USE_DOUBLE))
+        detail::parse_float_type_spec(specs_, eh);
+      else
+        FMT_ASSERT(false, "double support disabled");
+      break;
+    case detail::type::long_double_type:
+      if (detail::const_check(FMT_USE_LONG_DOUBLE))
+        detail::parse_float_type_spec(specs_, eh);
+      else
+        FMT_ASSERT(false, "long double support disabled");
+      break;
+    case detail::type::cstring_type:
+      detail::check_cstring_type_spec(specs_.type, eh);
+      break;
+    case detail::type::string_type:
+      detail::check_string_type_spec(specs_.type, eh);
+      break;
+    case detail::type::pointer_type:
+      detail::check_pointer_type_spec(specs_.type, eh);
+      break;
+    case detail::type::custom_type:
+      // Custom format specifiers are checked in parse functions of
+      // formatter specializations.
+      break;
+    }
+    return it;
+  }
+
+  template <detail::type U = detail::type_constant<T, Char>::value,
+            enable_if_t<(U == detail::type::string_type ||
+                         U == detail::type::cstring_type ||
+                         U == detail::type::char_type),
+                        int> = 0>
+  FMT_CONSTEXPR void set_debug_format() {
+    specs_.type = presentation_type::debug;
+  }
+
+  template <typename FormatContext>
+  FMT_CONSTEXPR auto format(const T& val, FormatContext& ctx) const
+      -> decltype(ctx.out());
+};
+
+#define FMT_FORMAT_AS(Type, Base)                                        \
+  template <typename Char>                                               \
+  struct formatter<Type, Char> : formatter<Base, Char> {                 \
+    template <typename FormatContext>                                    \
+    auto format(Type const& val, FormatContext& ctx) const               \
+        -> decltype(ctx.out()) {                                         \
+      return formatter<Base, Char>::format(static_cast<Base>(val), ctx); \
+    }                                                                    \
+  }
+
+FMT_FORMAT_AS(signed char, int);
+FMT_FORMAT_AS(unsigned char, unsigned);
+FMT_FORMAT_AS(short, int);
+FMT_FORMAT_AS(unsigned short, unsigned);
+FMT_FORMAT_AS(long, long long);
+FMT_FORMAT_AS(unsigned long, unsigned long long);
+FMT_FORMAT_AS(Char*, const Char*);
+FMT_FORMAT_AS(std::basic_string<Char>, basic_string_view<Char>);
+FMT_FORMAT_AS(std::nullptr_t, const void*);
+FMT_FORMAT_AS(detail::std_string_view<Char>, basic_string_view<Char>);
+
+template <typename Char> struct basic_runtime { basic_string_view<Char> str; };
+
+/** A compile-time format string. */
+template <typename Char, typename... Args> class basic_format_string {
+ private:
+  basic_string_view<Char> str_;
+
+ public:
+  template <typename S,
+            FMT_ENABLE_IF(
+                std::is_convertible<const S&, basic_string_view<Char>>::value)>
+  FMT_CONSTEVAL FMT_INLINE basic_format_string(const S& s) : str_(s) {
+    static_assert(
+        detail::count<
+            (std::is_base_of<detail::view, remove_reference_t<Args>>::value &&
+             std::is_reference<Args>::value)...>() == 0,
+        "passing views as lvalues is disallowed");
+#ifdef FMT_HAS_CONSTEVAL
+    if constexpr (detail::count_named_args<Args...>() ==
+                  detail::count_statically_named_args<Args...>()) {
+      using checker = detail::format_string_checker<Char, detail::error_handler,
+                                                    remove_cvref_t<Args>...>;
+      detail::parse_format_string<true>(str_, checker(s, {}));
+    }
+#else
+    detail::check_format_string<Args...>(s);
+#endif
+  }
+  basic_format_string(basic_runtime<Char> r) : str_(r.str) {}
+
+  FMT_INLINE operator basic_string_view<Char>() const { return str_; }
+};
+
+#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409
+// Workaround broken conversion on older gcc.
+template <typename...> using format_string = string_view;
+inline auto runtime(string_view s) -> string_view { return s; }
+#else
+template <typename... Args>
+using format_string = basic_format_string<char, type_identity_t<Args>...>;
+/**
+  \rst
+  Creates a runtime format string.
+
+  **Example**::
+
+    // Check format string at runtime instead of compile-time.
+    fmt::print(fmt::runtime("{:d}"), "I am not a number");
+  \endrst
+ */
+inline auto runtime(string_view s) -> basic_runtime<char> { return {{s}}; }
+#endif
+
+FMT_API auto vformat(string_view fmt, format_args args) -> std::string;
+
+/**
+  \rst
+  Formats ``args`` according to specifications in ``fmt`` and returns the result
+  as a string.
+
+  **Example**::
+
+    #include <fmt/core.h>
+    std::string message = fmt::format("The answer is {}.", 42);
+  \endrst
+*/
+template <typename... T>
+FMT_NODISCARD FMT_INLINE auto format(format_string<T...> fmt, T&&... args)
+    -> std::string {
+  return vformat(fmt, fmt::make_format_args(args...));
+}
+
+/** Formats a string and writes the output to ``out``. */
+template <typename OutputIt,
+          FMT_ENABLE_IF(detail::is_output_iterator<OutputIt, char>::value)>
+auto vformat_to(OutputIt out, string_view fmt, format_args args) -> OutputIt {
+  auto&& buf = detail::get_buffer<char>(out);
+  detail::vformat_to(buf, fmt, args, {});
+  return detail::get_iterator(buf, out);
+}
+
+/**
+ \rst
+ Formats ``args`` according to specifications in ``fmt``, writes the result to
+ the output iterator ``out`` and returns the iterator past the end of the output
+ range. `format_to` does not append a terminating null character.
+
+ **Example**::
+
+   auto out = std::vector<char>();
+   fmt::format_to(std::back_inserter(out), "{}", 42);
+ \endrst
+ */
+template <typename OutputIt, typename... T,
+          FMT_ENABLE_IF(detail::is_output_iterator<OutputIt, char>::value)>
+FMT_INLINE auto format_to(OutputIt out, format_string<T...> fmt, T&&... args)
+    -> OutputIt {
+  return vformat_to(out, fmt, fmt::make_format_args(args...));
+}
+
+template <typename OutputIt> struct format_to_n_result {
+  /** Iterator past the end of the output range. */
+  OutputIt out;
+  /** Total (not truncated) output size. */
+  size_t size;
+};
+
+template <typename OutputIt, typename... T,
+          FMT_ENABLE_IF(detail::is_output_iterator<OutputIt, char>::value)>
+auto vformat_to_n(OutputIt out, size_t n, string_view fmt, format_args args)
+    -> format_to_n_result<OutputIt> {
+  using traits = detail::fixed_buffer_traits;
+  auto buf = detail::iterator_buffer<OutputIt, char, traits>(out, n);
+  detail::vformat_to(buf, fmt, args, {});
+  return {buf.out(), buf.count()};
+}
+
+/**
+  \rst
+  Formats ``args`` according to specifications in ``fmt``, writes up to ``n``
+  characters of the result to the output iterator ``out`` and returns the total
+  (not truncated) output size and the iterator past the end of the output range.
+  `format_to_n` does not append a terminating null character.
+  \endrst
+ */
+template <typename OutputIt, typename... T,
+          FMT_ENABLE_IF(detail::is_output_iterator<OutputIt, char>::value)>
+FMT_INLINE auto format_to_n(OutputIt out, size_t n, format_string<T...> fmt,
+                            T&&... args) -> format_to_n_result<OutputIt> {
+  return vformat_to_n(out, n, fmt, fmt::make_format_args(args...));
+}
+
+/** Returns the number of chars in the output of ``format(fmt, args...)``. */
+template <typename... T>
+FMT_NODISCARD FMT_INLINE auto formatted_size(format_string<T...> fmt,
+                                             T&&... args) -> size_t {
+  auto buf = detail::counting_buffer<>();
+  detail::vformat_to(buf, string_view(fmt),
+                     format_args(fmt::make_format_args(args...)), {});
+  return buf.count();
+}
+
+FMT_API void vprint(string_view fmt, format_args args);
+FMT_API void vprint(std::FILE* f, string_view fmt, format_args args);
+
+/**
+  \rst
+  Formats ``args`` according to specifications in ``fmt`` and writes the output
+  to ``stdout``.
+
+  **Example**::
+
+    fmt::print("Elapsed time: {0:.2f} seconds", 1.23);
+  \endrst
+ */
+template <typename... T>
+FMT_INLINE void print(format_string<T...> fmt, T&&... args) {
+  const auto& vargs = fmt::make_format_args(args...);
+  return detail::is_utf8() ? vprint(fmt, vargs)
+                           : detail::vprint_mojibake(stdout, fmt, vargs);
+}
+
+/**
+  \rst
+  Formats ``args`` according to specifications in ``fmt`` and writes the
+  output to the file ``f``.
+
+  **Example**::
+
+    fmt::print(stderr, "Don't {}!", "panic");
+  \endrst
+ */
+template <typename... T>
+FMT_INLINE void print(std::FILE* f, format_string<T...> fmt, T&&... args) {
+  const auto& vargs = fmt::make_format_args(args...);
+  return detail::is_utf8() ? vprint(f, fmt, vargs)
+                           : detail::vprint_mojibake(f, fmt, vargs);
+}
+
+FMT_MODULE_EXPORT_END
+FMT_GCC_PRAGMA("GCC pop_options")
+FMT_END_NAMESPACE
+
+#ifdef FMT_HEADER_ONLY
+#  include "format.h"
+#endif
+#endif  // FMT_CORE_H_
diff --git a/libkram/fmt/fmt.cpp b/libkram/fmt/fmt.cpp
new file mode 100644
index 00000000..971d46da
--- /dev/null
+++ b/libkram/fmt/fmt.cpp
@@ -0,0 +1,100 @@
+module;
+#ifndef __cpp_modules
+#  error Module not supported.
+#endif
+
+// put all implementation-provided headers into the global module fragment
+// to prevent attachment to this module
+#if !defined(_CRT_SECURE_NO_WARNINGS) && defined(_MSC_VER)
+#  define _CRT_SECURE_NO_WARNINGS
+#endif
+#if !defined(WIN32_LEAN_AND_MEAN) && defined(_WIN32)
+#  define WIN32_LEAN_AND_MEAN
+#endif
+
+#include <algorithm>
+#include <cctype>
+#include <cerrno>
+#include <chrono>
+#include <climits>
+#include <clocale>
+#include <cmath>
+#include <cstdarg>
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <ctime>
+#include <cwchar>
+#include <exception>
+#include <functional>
+#include <iterator>
+#include <limits>
+#include <locale>
+#include <memory>
+#include <ostream>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <string_view>
+#include <system_error>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#if _MSC_VER
+#  include <intrin.h>
+#endif
+#if defined __APPLE__ || defined(__FreeBSD__)
+#  include <xlocale.h>
+#endif
+#if __has_include(<winapifamily.h>)
+#  include <winapifamily.h>
+#endif
+#if (__has_include(<fcntl.h>) || defined(__APPLE__) || \
+     defined(__linux__)) &&                            \
+    (!defined(WINAPI_FAMILY) || (WINAPI_FAMILY == WINAPI_FAMILY_DESKTOP_APP))
+#  include <fcntl.h>
+#  include <sys/stat.h>
+#  include <sys/types.h>
+#  ifndef _WIN32
+#    include <unistd.h>
+#  else
+#    include <io.h>
+#  endif
+#endif
+#ifdef _WIN32
+#  include <windows.h>
+#endif
+
+export module fmt;
+
+#define FMT_MODULE_EXPORT export
+#define FMT_MODULE_EXPORT_BEGIN export {
+#define FMT_MODULE_EXPORT_END }
+#define FMT_BEGIN_DETAIL_NAMESPACE \
+  }                                \
+  namespace detail {
+#define FMT_END_DETAIL_NAMESPACE \
+  }                              \
+  export {
+// all library-provided declarations and definitions
+// must be in the module purview to be exported
+#include "args.h"
+#include "chrono.h"
+#include "color.h"
+#include "compile.h"
+#include "format.h"
+#include "os.h"
+#include "printf.h"
+#include "xchar.h"
+
+// gcc doesn't yet implement private module fragments
+#if !FMT_GCC_VERSION
+module : private;
+#endif
+
+// These are already included in project
+//#include "format.cpp"
+//#include "os.cpp"
diff --git a/libkram/fmt/format-inl.h b/libkram/fmt/format-inl.h
new file mode 100644
index 00000000..2d3a4d61
--- /dev/null
+++ b/libkram/fmt/format-inl.h
@@ -0,0 +1,1754 @@
+// Formatting library for C++ - implementation
+//
+// Copyright (c) 2012 - 2016, Victor Zverovich
+// All rights reserved.
+//
+// For the license information refer to format.h.
+
+#ifndef FMT_FORMAT_INL_H_
+#define FMT_FORMAT_INL_H_
+
+#include <algorithm>
+#include <cctype>
+#include <cerrno>  // errno
+#include <climits>
+#include <cmath>
+#include <cstdarg>
+#include <cstring>  // std::memmove
+#include <cwchar>
+#include <exception>
+
+#ifndef FMT_STATIC_THOUSANDS_SEPARATOR
+#  include <locale>
+#endif
+
+#ifdef _WIN32
+#  include <io.h>  // _isatty
+#endif
+
+#include "format.h"
+
+FMT_BEGIN_NAMESPACE
+namespace detail {
+
+FMT_FUNC void assert_fail(const char* file, int line, const char* message) {
+  // Use unchecked std::fprintf to avoid triggering another assertion when
+  // writing to stderr fails
+  std::fprintf(stderr, "%s:%d: assertion failed: %s", file, line, message);
+  // Chosen instead of std::abort to satisfy Clang in CUDA mode during device
+  // code pass.
+  std::terminate();
+}
+
+FMT_FUNC void throw_format_error(const char* message) {
+  FMT_THROW(format_error(message));
+}
+
+FMT_FUNC void format_error_code(detail::buffer<char>& out, int error_code,
+                                string_view message) noexcept {
+  // Report error code making sure that the output fits into
+  // inline_buffer_size to avoid dynamic memory allocation and potential
+  // bad_alloc.
+  out.try_resize(0);
+  static const char SEP[] = ": ";
+  static const char ERROR_STR[] = "error ";
+  // Subtract 2 to account for terminating null characters in SEP and ERROR_STR.
+  size_t error_code_size = sizeof(SEP) + sizeof(ERROR_STR) - 2;
+  auto abs_value = static_cast<uint32_or_64_or_128_t<int>>(error_code);
+  if (detail::is_negative(error_code)) {
+    abs_value = 0 - abs_value;
+    ++error_code_size;
+  }
+  error_code_size += detail::to_unsigned(detail::count_digits(abs_value));
+  auto it = buffer_appender<char>(out);
+  if (message.size() <= inline_buffer_size - error_code_size)
+    format_to(it, FMT_STRING("{}{}"), message, SEP);
+  format_to(it, FMT_STRING("{}{}"), ERROR_STR, error_code);
+  FMT_ASSERT(out.size() <= inline_buffer_size, "");
+}
+
+FMT_FUNC void report_error(format_func func, int error_code,
+                           const char* message) noexcept {
+  memory_buffer full_message;
+  func(full_message, error_code, message);
+  // Don't use fwrite_fully because the latter may throw.
+  if (std::fwrite(full_message.data(), full_message.size(), 1, stderr) > 0)
+    std::fputc('\n', stderr);
+}
+
+// A wrapper around fwrite that throws on error.
+inline void fwrite_fully(const void* ptr, size_t size, size_t count,
+                         FILE* stream) {
+  size_t written = std::fwrite(ptr, size, count, stream);
+  if (written < count)
+    FMT_THROW(system_error(errno, FMT_STRING("cannot write to file")));
+}
+
+#ifndef FMT_STATIC_THOUSANDS_SEPARATOR
+template <typename Locale>
+locale_ref::locale_ref(const Locale& loc) : locale_(&loc) {
+  static_assert(std::is_same<Locale, std::locale>::value, "");
+}
+
+template <typename Locale> Locale locale_ref::get() const {
+  static_assert(std::is_same<Locale, std::locale>::value, "");
+  return locale_ ? *static_cast<const std::locale*>(locale_) : std::locale();
+}
+
+template <typename Char>
+FMT_FUNC auto thousands_sep_impl(locale_ref loc) -> thousands_sep_result<Char> {
+  auto& facet = std::use_facet<std::numpunct<Char>>(loc.get<std::locale>());
+  auto grouping = facet.grouping();
+  auto thousands_sep = grouping.empty() ? Char() : facet.thousands_sep();
+  return {std::move(grouping), thousands_sep};
+}
+template <typename Char> FMT_FUNC Char decimal_point_impl(locale_ref loc) {
+  return std::use_facet<std::numpunct<Char>>(loc.get<std::locale>())
+      .decimal_point();
+}
+#else
+template <typename Char>
+FMT_FUNC auto thousands_sep_impl(locale_ref) -> thousands_sep_result<Char> {
+  return {"\03", FMT_STATIC_THOUSANDS_SEPARATOR};
+}
+template <typename Char> FMT_FUNC Char decimal_point_impl(locale_ref) {
+  return '.';
+}
+#endif
+
+FMT_FUNC auto write_loc(appender out, loc_value value,
+                        const format_specs& specs, locale_ref loc) -> bool {
+#ifndef FMT_STATIC_THOUSANDS_SEPARATOR
+  auto locale = loc.get<std::locale>();
+  // We cannot use the num_put<char> facet because it may produce output in
+  // a wrong encoding.
+  using facet = format_facet<std::locale>;
+  if (std::has_facet<facet>(locale))
+    return std::use_facet<facet>(locale).put(out, value, specs);
+  return facet(locale).put(out, value, specs);
+#endif
+  return false;
+}
+}  // namespace detail
+
+template <typename Locale> typename Locale::id format_facet<Locale>::id;
+
+#ifndef FMT_STATIC_THOUSANDS_SEPARATOR
+template <typename Locale> format_facet<Locale>::format_facet(Locale& loc) {
+  auto& numpunct = std::use_facet<std::numpunct<char>>(loc);
+  grouping_ = numpunct.grouping();
+  if (!grouping_.empty()) separator_ = std::string(1, numpunct.thousands_sep());
+}
+
+template <>
+FMT_API FMT_FUNC auto format_facet<std::locale>::do_put(
+    appender out, loc_value val, const format_specs& specs) const -> bool {
+  return val.visit(
+      detail::loc_writer<>{out, specs, separator_, grouping_, decimal_point_});
+}
+#endif
+
+#if !FMT_MSC_VERSION
+FMT_API FMT_FUNC format_error::~format_error() noexcept = default;
+#endif
+
+FMT_FUNC std::system_error vsystem_error(int error_code, string_view format_str,
+                                         format_args args) {
+  auto ec = std::error_code(error_code, std::generic_category());
+  return std::system_error(ec, vformat(format_str, args));
+}
+
+namespace detail {
+
+template <typename F> inline bool operator==(basic_fp<F> x, basic_fp<F> y) {
+  return x.f == y.f && x.e == y.e;
+}
+
+// Compilers should be able to optimize this into the ror instruction.
+FMT_CONSTEXPR inline uint32_t rotr(uint32_t n, uint32_t r) noexcept {
+  r &= 31;
+  return (n >> r) | (n << (32 - r));
+}
+FMT_CONSTEXPR inline uint64_t rotr(uint64_t n, uint32_t r) noexcept {
+  r &= 63;
+  return (n >> r) | (n << (64 - r));
+}
+
+// Computes 128-bit result of multiplication of two 64-bit unsigned integers.
+inline uint128_fallback umul128(uint64_t x, uint64_t y) noexcept {
+#if FMT_USE_INT128
+  auto p = static_cast<uint128_opt>(x) * static_cast<uint128_opt>(y);
+  return {static_cast<uint64_t>(p >> 64), static_cast<uint64_t>(p)};
+#elif defined(_MSC_VER) && defined(_M_X64)
+  auto result = uint128_fallback();
+  result.lo_ = _umul128(x, y, &result.hi_);
+  return result;
+#else
+  const uint64_t mask = static_cast<uint64_t>(max_value<uint32_t>());
+
+  uint64_t a = x >> 32;
+  uint64_t b = x & mask;
+  uint64_t c = y >> 32;
+  uint64_t d = y & mask;
+
+  uint64_t ac = a * c;
+  uint64_t bc = b * c;
+  uint64_t ad = a * d;
+  uint64_t bd = b * d;
+
+  uint64_t intermediate = (bd >> 32) + (ad & mask) + (bc & mask);
+
+  return {ac + (intermediate >> 32) + (ad >> 32) + (bc >> 32),
+          (intermediate << 32) + (bd & mask)};
+#endif
+}
+
+// Implementation of Dragonbox algorithm: https://github.com/jk-jeon/dragonbox.
+namespace dragonbox {
+// Computes upper 64 bits of multiplication of two 64-bit unsigned integers.
+inline uint64_t umul128_upper64(uint64_t x, uint64_t y) noexcept {
+#if FMT_USE_INT128
+  auto p = static_cast<uint128_opt>(x) * static_cast<uint128_opt>(y);
+  return static_cast<uint64_t>(p >> 64);
+#elif defined(_MSC_VER) && defined(_M_X64)
+  return __umulh(x, y);
+#else
+  return umul128(x, y).high();
+#endif
+}
+
+// Computes upper 128 bits of multiplication of a 64-bit unsigned integer and a
+// 128-bit unsigned integer.
+inline uint128_fallback umul192_upper128(uint64_t x,
+                                         uint128_fallback y) noexcept {
+  uint128_fallback r = umul128(x, y.high());
+  r += umul128_upper64(x, y.low());
+  return r;
+}
+
+// Computes upper 64 bits of multiplication of a 32-bit unsigned integer and a
+// 64-bit unsigned integer.
+inline uint64_t umul96_upper64(uint32_t x, uint64_t y) noexcept {
+  return umul128_upper64(static_cast<uint64_t>(x) << 32, y);
+}
+
+// Computes lower 128 bits of multiplication of a 64-bit unsigned integer and a
+// 128-bit unsigned integer.
+inline uint128_fallback umul192_lower128(uint64_t x,
+                                         uint128_fallback y) noexcept {
+  uint64_t high = x * y.high();
+  uint128_fallback high_low = umul128(x, y.low());
+  return {high + high_low.high(), high_low.low()};
+}
+
+// Computes lower 64 bits of multiplication of a 32-bit unsigned integer and a
+// 64-bit unsigned integer.
+inline uint64_t umul96_lower64(uint32_t x, uint64_t y) noexcept {
+  return x * y;
+}
+
+// Computes floor(log10(pow(2, e))) for e in [-2620, 2620] using the method from
+// https://fmt.dev/papers/Dragonbox.pdf#page=28, section 6.1.
+inline int floor_log10_pow2(int e) noexcept {
+  FMT_ASSERT(e <= 2620 && e >= -2620, "too large exponent");
+  static_assert((-1 >> 1) == -1, "right shift is not arithmetic");
+  return (e * 315653) >> 20;
+}
+
+// Various fast log computations.
+inline int floor_log2_pow10(int e) noexcept {
+  FMT_ASSERT(e <= 1233 && e >= -1233, "too large exponent");
+  return (e * 1741647) >> 19;
+}
+inline int floor_log10_pow2_minus_log10_4_over_3(int e) noexcept {
+  FMT_ASSERT(e <= 2936 && e >= -2985, "too large exponent");
+  return (e * 631305 - 261663) >> 21;
+}
+
+static constexpr struct {
+  uint32_t divisor;
+  int shift_amount;
+} div_small_pow10_infos[] = {{10, 16}, {100, 16}};
+
+// Replaces n by floor(n / pow(10, N)) returning true if and only if n is
+// divisible by pow(10, N).
+// Precondition: n <= pow(10, N + 1).
+template <int N>
+bool check_divisibility_and_divide_by_pow10(uint32_t& n) noexcept {
+  // The numbers below are chosen such that:
+  //   1. floor(n/d) = floor(nm / 2^k) where d=10 or d=100,
+  //   2. nm mod 2^k < m if and only if n is divisible by d,
+  // where m is magic_number, k is shift_amount
+  // and d is divisor.
+  //
+  // Item 1 is a common technique of replacing division by a constant with
+  // multiplication, see e.g. "Division by Invariant Integers Using
+  // Multiplication" by Granlund and Montgomery (1994). magic_number (m) is set
+  // to ceil(2^k/d) for large enough k.
+  // The idea for item 2 originates from Schubfach.
+  constexpr auto info = div_small_pow10_infos[N - 1];
+  FMT_ASSERT(n <= info.divisor * 10, "n is too large");
+  constexpr uint32_t magic_number =
+      (1u << info.shift_amount) / info.divisor + 1;
+  n *= magic_number;
+  const uint32_t comparison_mask = (1u << info.shift_amount) - 1;
+  bool result = (n & comparison_mask) < magic_number;
+  n >>= info.shift_amount;
+  return result;
+}
+
+// Computes floor(n / pow(10, N)) for small n and N.
+// Precondition: n <= pow(10, N + 1).
+template <int N> uint32_t small_division_by_pow10(uint32_t n) noexcept {
+  constexpr auto info = div_small_pow10_infos[N - 1];
+  FMT_ASSERT(n <= info.divisor * 10, "n is too large");
+  constexpr uint32_t magic_number =
+      (1u << info.shift_amount) / info.divisor + 1;
+  return (n * magic_number) >> info.shift_amount;
+}
+
+// Computes floor(n / 10^(kappa + 1)) (float)
+inline uint32_t divide_by_10_to_kappa_plus_1(uint32_t n) noexcept {
+  // 1374389535 = ceil(2^37/100)
+  return static_cast<uint32_t>((static_cast<uint64_t>(n) * 1374389535) >> 37);
+}
+// Computes floor(n / 10^(kappa + 1)) (double)
+inline uint64_t divide_by_10_to_kappa_plus_1(uint64_t n) noexcept {
+  // 2361183241434822607 = ceil(2^(64+7)/1000)
+  return umul128_upper64(n, 2361183241434822607ull) >> 7;
+}
+
+// Various subroutines using pow10 cache
+template <class T> struct cache_accessor;
+
+template <> struct cache_accessor<float> {
+  using carrier_uint = float_info<float>::carrier_uint;
+  using cache_entry_type = uint64_t;
+
+  static uint64_t get_cached_power(int k) noexcept {
+    FMT_ASSERT(k >= float_info<float>::min_k && k <= float_info<float>::max_k,
+               "k is out of range");
+    static constexpr const uint64_t pow10_significands[] = {
+        0x81ceb32c4b43fcf5, 0xa2425ff75e14fc32, 0xcad2f7f5359a3b3f,
+        0xfd87b5f28300ca0e, 0x9e74d1b791e07e49, 0xc612062576589ddb,
+        0xf79687aed3eec552, 0x9abe14cd44753b53, 0xc16d9a0095928a28,
+        0xf1c90080baf72cb2, 0x971da05074da7bef, 0xbce5086492111aeb,
+        0xec1e4a7db69561a6, 0x9392ee8e921d5d08, 0xb877aa3236a4b44a,
+        0xe69594bec44de15c, 0x901d7cf73ab0acda, 0xb424dc35095cd810,
+        0xe12e13424bb40e14, 0x8cbccc096f5088cc, 0xafebff0bcb24aaff,
+        0xdbe6fecebdedd5bf, 0x89705f4136b4a598, 0xabcc77118461cefd,
+        0xd6bf94d5e57a42bd, 0x8637bd05af6c69b6, 0xa7c5ac471b478424,
+        0xd1b71758e219652c, 0x83126e978d4fdf3c, 0xa3d70a3d70a3d70b,
+        0xcccccccccccccccd, 0x8000000000000000, 0xa000000000000000,
+        0xc800000000000000, 0xfa00000000000000, 0x9c40000000000000,
+        0xc350000000000000, 0xf424000000000000, 0x9896800000000000,
+        0xbebc200000000000, 0xee6b280000000000, 0x9502f90000000000,
+        0xba43b74000000000, 0xe8d4a51000000000, 0x9184e72a00000000,
+        0xb5e620f480000000, 0xe35fa931a0000000, 0x8e1bc9bf04000000,
+        0xb1a2bc2ec5000000, 0xde0b6b3a76400000, 0x8ac7230489e80000,
+        0xad78ebc5ac620000, 0xd8d726b7177a8000, 0x878678326eac9000,
+        0xa968163f0a57b400, 0xd3c21bcecceda100, 0x84595161401484a0,
+        0xa56fa5b99019a5c8, 0xcecb8f27f4200f3a, 0x813f3978f8940985,
+        0xa18f07d736b90be6, 0xc9f2c9cd04674edf, 0xfc6f7c4045812297,
+        0x9dc5ada82b70b59e, 0xc5371912364ce306, 0xf684df56c3e01bc7,
+        0x9a130b963a6c115d, 0xc097ce7bc90715b4, 0xf0bdc21abb48db21,
+        0x96769950b50d88f5, 0xbc143fa4e250eb32, 0xeb194f8e1ae525fe,
+        0x92efd1b8d0cf37bf, 0xb7abc627050305ae, 0xe596b7b0c643c71a,
+        0x8f7e32ce7bea5c70, 0xb35dbf821ae4f38c, 0xe0352f62a19e306f};
+    return pow10_significands[k - float_info<float>::min_k];
+  }
+
+  struct compute_mul_result {
+    carrier_uint result;
+    bool is_integer;
+  };
+  struct compute_mul_parity_result {
+    bool parity;
+    bool is_integer;
+  };
+
+  static compute_mul_result compute_mul(
+      carrier_uint u, const cache_entry_type& cache) noexcept {
+    auto r = umul96_upper64(u, cache);
+    return {static_cast<carrier_uint>(r >> 32),
+            static_cast<carrier_uint>(r) == 0};
+  }
+
+  static uint32_t compute_delta(const cache_entry_type& cache,
+                                int beta) noexcept {
+    return static_cast<uint32_t>(cache >> (64 - 1 - beta));
+  }
+
+  static compute_mul_parity_result compute_mul_parity(
+      carrier_uint two_f, const cache_entry_type& cache, int beta) noexcept {
+    FMT_ASSERT(beta >= 1, "");
+    FMT_ASSERT(beta < 64, "");
+
+    auto r = umul96_lower64(two_f, cache);
+    return {((r >> (64 - beta)) & 1) != 0,
+            static_cast<uint32_t>(r >> (32 - beta)) == 0};
+  }
+
+  static carrier_uint compute_left_endpoint_for_shorter_interval_case(
+      const cache_entry_type& cache, int beta) noexcept {
+    return static_cast<carrier_uint>(
+        (cache - (cache >> (num_significand_bits<float>() + 2))) >>
+        (64 - num_significand_bits<float>() - 1 - beta));
+  }
+
+  static carrier_uint compute_right_endpoint_for_shorter_interval_case(
+      const cache_entry_type& cache, int beta) noexcept {
+    return static_cast<carrier_uint>(
+        (cache + (cache >> (num_significand_bits<float>() + 1))) >>
+        (64 - num_significand_bits<float>() - 1 - beta));
+  }
+
+  static carrier_uint compute_round_up_for_shorter_interval_case(
+      const cache_entry_type& cache, int beta) noexcept {
+    return (static_cast<carrier_uint>(
+                cache >> (64 - num_significand_bits<float>() - 2 - beta)) +
+            1) /
+           2;
+  }
+};
+
+template <> struct cache_accessor<double> {
+  using carrier_uint = float_info<double>::carrier_uint;
+  using cache_entry_type = uint128_fallback;
+
+  static uint128_fallback get_cached_power(int k) noexcept {
+    FMT_ASSERT(k >= float_info<double>::min_k && k <= float_info<double>::max_k,
+               "k is out of range");
+
+    static constexpr const uint128_fallback pow10_significands[] = {
+#if FMT_USE_FULL_CACHE_DRAGONBOX
+      {0xff77b1fcbebcdc4f, 0x25e8e89c13bb0f7b},
+      {0x9faacf3df73609b1, 0x77b191618c54e9ad},
+      {0xc795830d75038c1d, 0xd59df5b9ef6a2418},
+      {0xf97ae3d0d2446f25, 0x4b0573286b44ad1e},
+      {0x9becce62836ac577, 0x4ee367f9430aec33},
+      {0xc2e801fb244576d5, 0x229c41f793cda740},
+      {0xf3a20279ed56d48a, 0x6b43527578c11110},
+      {0x9845418c345644d6, 0x830a13896b78aaaa},
+      {0xbe5691ef416bd60c, 0x23cc986bc656d554},
+      {0xedec366b11c6cb8f, 0x2cbfbe86b7ec8aa9},
+      {0x94b3a202eb1c3f39, 0x7bf7d71432f3d6aa},
+      {0xb9e08a83a5e34f07, 0xdaf5ccd93fb0cc54},
+      {0xe858ad248f5c22c9, 0xd1b3400f8f9cff69},
+      {0x91376c36d99995be, 0x23100809b9c21fa2},
+      {0xb58547448ffffb2d, 0xabd40a0c2832a78b},
+      {0xe2e69915b3fff9f9, 0x16c90c8f323f516d},
+      {0x8dd01fad907ffc3b, 0xae3da7d97f6792e4},
+      {0xb1442798f49ffb4a, 0x99cd11cfdf41779d},
+      {0xdd95317f31c7fa1d, 0x40405643d711d584},
+      {0x8a7d3eef7f1cfc52, 0x482835ea666b2573},
+      {0xad1c8eab5ee43b66, 0xda3243650005eed0},
+      {0xd863b256369d4a40, 0x90bed43e40076a83},
+      {0x873e4f75e2224e68, 0x5a7744a6e804a292},
+      {0xa90de3535aaae202, 0x711515d0a205cb37},
+      {0xd3515c2831559a83, 0x0d5a5b44ca873e04},
+      {0x8412d9991ed58091, 0xe858790afe9486c3},
+      {0xa5178fff668ae0b6, 0x626e974dbe39a873},
+      {0xce5d73ff402d98e3, 0xfb0a3d212dc81290},
+      {0x80fa687f881c7f8e, 0x7ce66634bc9d0b9a},
+      {0xa139029f6a239f72, 0x1c1fffc1ebc44e81},
+      {0xc987434744ac874e, 0xa327ffb266b56221},
+      {0xfbe9141915d7a922, 0x4bf1ff9f0062baa9},
+      {0x9d71ac8fada6c9b5, 0x6f773fc3603db4aa},
+      {0xc4ce17b399107c22, 0xcb550fb4384d21d4},
+      {0xf6019da07f549b2b, 0x7e2a53a146606a49},
+      {0x99c102844f94e0fb, 0x2eda7444cbfc426e},
+      {0xc0314325637a1939, 0xfa911155fefb5309},
+      {0xf03d93eebc589f88, 0x793555ab7eba27cb},
+      {0x96267c7535b763b5, 0x4bc1558b2f3458df},
+      {0xbbb01b9283253ca2, 0x9eb1aaedfb016f17},
+      {0xea9c227723ee8bcb, 0x465e15a979c1cadd},
+      {0x92a1958a7675175f, 0x0bfacd89ec191eca},
+      {0xb749faed14125d36, 0xcef980ec671f667c},
+      {0xe51c79a85916f484, 0x82b7e12780e7401b},
+      {0x8f31cc0937ae58d2, 0xd1b2ecb8b0908811},
+      {0xb2fe3f0b8599ef07, 0x861fa7e6dcb4aa16},
+      {0xdfbdcece67006ac9, 0x67a791e093e1d49b},
+      {0x8bd6a141006042bd, 0xe0c8bb2c5c6d24e1},
+      {0xaecc49914078536d, 0x58fae9f773886e19},
+      {0xda7f5bf590966848, 0xaf39a475506a899f},
+      {0x888f99797a5e012d, 0x6d8406c952429604},
+      {0xaab37fd7d8f58178, 0xc8e5087ba6d33b84},
+      {0xd5605fcdcf32e1d6, 0xfb1e4a9a90880a65},
+      {0x855c3be0a17fcd26, 0x5cf2eea09a550680},
+      {0xa6b34ad8c9dfc06f, 0xf42faa48c0ea481f},
+      {0xd0601d8efc57b08b, 0xf13b94daf124da27},
+      {0x823c12795db6ce57, 0x76c53d08d6b70859},
+      {0xa2cb1717b52481ed, 0x54768c4b0c64ca6f},
+      {0xcb7ddcdda26da268, 0xa9942f5dcf7dfd0a},
+      {0xfe5d54150b090b02, 0xd3f93b35435d7c4d},
+      {0x9efa548d26e5a6e1, 0xc47bc5014a1a6db0},
+      {0xc6b8e9b0709f109a, 0x359ab6419ca1091c},
+      {0xf867241c8cc6d4c0, 0xc30163d203c94b63},
+      {0x9b407691d7fc44f8, 0x79e0de63425dcf1e},
+      {0xc21094364dfb5636, 0x985915fc12f542e5},
+      {0xf294b943e17a2bc4, 0x3e6f5b7b17b2939e},
+      {0x979cf3ca6cec5b5a, 0xa705992ceecf9c43},
+      {0xbd8430bd08277231, 0x50c6ff782a838354},
+      {0xece53cec4a314ebd, 0xa4f8bf5635246429},
+      {0x940f4613ae5ed136, 0x871b7795e136be9a},
+      {0xb913179899f68584, 0x28e2557b59846e40},
+      {0xe757dd7ec07426e5, 0x331aeada2fe589d0},
+      {0x9096ea6f3848984f, 0x3ff0d2c85def7622},
+      {0xb4bca50b065abe63, 0x0fed077a756b53aa},
+      {0xe1ebce4dc7f16dfb, 0xd3e8495912c62895},
+      {0x8d3360f09cf6e4bd, 0x64712dd7abbbd95d},
+      {0xb080392cc4349dec, 0xbd8d794d96aacfb4},
+      {0xdca04777f541c567, 0xecf0d7a0fc5583a1},
+      {0x89e42caaf9491b60, 0xf41686c49db57245},
+      {0xac5d37d5b79b6239, 0x311c2875c522ced6},
+      {0xd77485cb25823ac7, 0x7d633293366b828c},
+      {0x86a8d39ef77164bc, 0xae5dff9c02033198},
+      {0xa8530886b54dbdeb, 0xd9f57f830283fdfd},
+      {0xd267caa862a12d66, 0xd072df63c324fd7c},
+      {0x8380dea93da4bc60, 0x4247cb9e59f71e6e},
+      {0xa46116538d0deb78, 0x52d9be85f074e609},
+      {0xcd795be870516656, 0x67902e276c921f8c},
+      {0x806bd9714632dff6, 0x00ba1cd8a3db53b7},
+      {0xa086cfcd97bf97f3, 0x80e8a40eccd228a5},
+      {0xc8a883c0fdaf7df0, 0x6122cd128006b2ce},
+      {0xfad2a4b13d1b5d6c, 0x796b805720085f82},
+      {0x9cc3a6eec6311a63, 0xcbe3303674053bb1},
+      {0xc3f490aa77bd60fc, 0xbedbfc4411068a9d},
+      {0xf4f1b4d515acb93b, 0xee92fb5515482d45},
+      {0x991711052d8bf3c5, 0x751bdd152d4d1c4b},
+      {0xbf5cd54678eef0b6, 0xd262d45a78a0635e},
+      {0xef340a98172aace4, 0x86fb897116c87c35},
+      {0x9580869f0e7aac0e, 0xd45d35e6ae3d4da1},
+      {0xbae0a846d2195712, 0x8974836059cca10a},
+      {0xe998d258869facd7, 0x2bd1a438703fc94c},
+      {0x91ff83775423cc06, 0x7b6306a34627ddd0},
+      {0xb67f6455292cbf08, 0x1a3bc84c17b1d543},
+      {0xe41f3d6a7377eeca, 0x20caba5f1d9e4a94},
+      {0x8e938662882af53e, 0x547eb47b7282ee9d},
+      {0xb23867fb2a35b28d, 0xe99e619a4f23aa44},
+      {0xdec681f9f4c31f31, 0x6405fa00e2ec94d5},
+      {0x8b3c113c38f9f37e, 0xde83bc408dd3dd05},
+      {0xae0b158b4738705e, 0x9624ab50b148d446},
+      {0xd98ddaee19068c76, 0x3badd624dd9b0958},
+      {0x87f8a8d4cfa417c9, 0xe54ca5d70a80e5d7},
+      {0xa9f6d30a038d1dbc, 0x5e9fcf4ccd211f4d},
+      {0xd47487cc8470652b, 0x7647c32000696720},
+      {0x84c8d4dfd2c63f3b, 0x29ecd9f40041e074},
+      {0xa5fb0a17c777cf09, 0xf468107100525891},
+      {0xcf79cc9db955c2cc, 0x7182148d4066eeb5},
+      {0x81ac1fe293d599bf, 0xc6f14cd848405531},
+      {0xa21727db38cb002f, 0xb8ada00e5a506a7d},
+      {0xca9cf1d206fdc03b, 0xa6d90811f0e4851d},
+      {0xfd442e4688bd304a, 0x908f4a166d1da664},
+      {0x9e4a9cec15763e2e, 0x9a598e4e043287ff},
+      {0xc5dd44271ad3cdba, 0x40eff1e1853f29fe},
+      {0xf7549530e188c128, 0xd12bee59e68ef47d},
+      {0x9a94dd3e8cf578b9, 0x82bb74f8301958cf},
+      {0xc13a148e3032d6e7, 0xe36a52363c1faf02},
+      {0xf18899b1bc3f8ca1, 0xdc44e6c3cb279ac2},
+      {0x96f5600f15a7b7e5, 0x29ab103a5ef8c0ba},
+      {0xbcb2b812db11a5de, 0x7415d448f6b6f0e8},
+      {0xebdf661791d60f56, 0x111b495b3464ad22},
+      {0x936b9fcebb25c995, 0xcab10dd900beec35},
+      {0xb84687c269ef3bfb, 0x3d5d514f40eea743},
+      {0xe65829b3046b0afa, 0x0cb4a5a3112a5113},
+      {0x8ff71a0fe2c2e6dc, 0x47f0e785eaba72ac},
+      {0xb3f4e093db73a093, 0x59ed216765690f57},
+      {0xe0f218b8d25088b8, 0x306869c13ec3532d},
+      {0x8c974f7383725573, 0x1e414218c73a13fc},
+      {0xafbd2350644eeacf, 0xe5d1929ef90898fb},
+      {0xdbac6c247d62a583, 0xdf45f746b74abf3a},
+      {0x894bc396ce5da772, 0x6b8bba8c328eb784},
+      {0xab9eb47c81f5114f, 0x066ea92f3f326565},
+      {0xd686619ba27255a2, 0xc80a537b0efefebe},
+      {0x8613fd0145877585, 0xbd06742ce95f5f37},
+      {0xa798fc4196e952e7, 0x2c48113823b73705},
+      {0xd17f3b51fca3a7a0, 0xf75a15862ca504c6},
+      {0x82ef85133de648c4, 0x9a984d73dbe722fc},
+      {0xa3ab66580d5fdaf5, 0xc13e60d0d2e0ebbb},
+      {0xcc963fee10b7d1b3, 0x318df905079926a9},
+      {0xffbbcfe994e5c61f, 0xfdf17746497f7053},
+      {0x9fd561f1fd0f9bd3, 0xfeb6ea8bedefa634},
+      {0xc7caba6e7c5382c8, 0xfe64a52ee96b8fc1},
+      {0xf9bd690a1b68637b, 0x3dfdce7aa3c673b1},
+      {0x9c1661a651213e2d, 0x06bea10ca65c084f},
+      {0xc31bfa0fe5698db8, 0x486e494fcff30a63},
+      {0xf3e2f893dec3f126, 0x5a89dba3c3efccfb},
+      {0x986ddb5c6b3a76b7, 0xf89629465a75e01d},
+      {0xbe89523386091465, 0xf6bbb397f1135824},
+      {0xee2ba6c0678b597f, 0x746aa07ded582e2d},
+      {0x94db483840b717ef, 0xa8c2a44eb4571cdd},
+      {0xba121a4650e4ddeb, 0x92f34d62616ce414},
+      {0xe896a0d7e51e1566, 0x77b020baf9c81d18},
+      {0x915e2486ef32cd60, 0x0ace1474dc1d122f},
+      {0xb5b5ada8aaff80b8, 0x0d819992132456bb},
+      {0xe3231912d5bf60e6, 0x10e1fff697ed6c6a},
+      {0x8df5efabc5979c8f, 0xca8d3ffa1ef463c2},
+      {0xb1736b96b6fd83b3, 0xbd308ff8a6b17cb3},
+      {0xddd0467c64bce4a0, 0xac7cb3f6d05ddbdf},
+      {0x8aa22c0dbef60ee4, 0x6bcdf07a423aa96c},
+      {0xad4ab7112eb3929d, 0x86c16c98d2c953c7},
+      {0xd89d64d57a607744, 0xe871c7bf077ba8b8},
+      {0x87625f056c7c4a8b, 0x11471cd764ad4973},
+      {0xa93af6c6c79b5d2d, 0xd598e40d3dd89bd0},
+      {0xd389b47879823479, 0x4aff1d108d4ec2c4},
+      {0x843610cb4bf160cb, 0xcedf722a585139bb},
+      {0xa54394fe1eedb8fe, 0xc2974eb4ee658829},
+      {0xce947a3da6a9273e, 0x733d226229feea33},
+      {0x811ccc668829b887, 0x0806357d5a3f5260},
+      {0xa163ff802a3426a8, 0xca07c2dcb0cf26f8},
+      {0xc9bcff6034c13052, 0xfc89b393dd02f0b6},
+      {0xfc2c3f3841f17c67, 0xbbac2078d443ace3},
+      {0x9d9ba7832936edc0, 0xd54b944b84aa4c0e},
+      {0xc5029163f384a931, 0x0a9e795e65d4df12},
+      {0xf64335bcf065d37d, 0x4d4617b5ff4a16d6},
+      {0x99ea0196163fa42e, 0x504bced1bf8e4e46},
+      {0xc06481fb9bcf8d39, 0xe45ec2862f71e1d7},
+      {0xf07da27a82c37088, 0x5d767327bb4e5a4d},
+      {0x964e858c91ba2655, 0x3a6a07f8d510f870},
+      {0xbbe226efb628afea, 0x890489f70a55368c},
+      {0xeadab0aba3b2dbe5, 0x2b45ac74ccea842f},
+      {0x92c8ae6b464fc96f, 0x3b0b8bc90012929e},
+      {0xb77ada0617e3bbcb, 0x09ce6ebb40173745},
+      {0xe55990879ddcaabd, 0xcc420a6a101d0516},
+      {0x8f57fa54c2a9eab6, 0x9fa946824a12232e},
+      {0xb32df8e9f3546564, 0x47939822dc96abfa},
+      {0xdff9772470297ebd, 0x59787e2b93bc56f8},
+      {0x8bfbea76c619ef36, 0x57eb4edb3c55b65b},
+      {0xaefae51477a06b03, 0xede622920b6b23f2},
+      {0xdab99e59958885c4, 0xe95fab368e45ecee},
+      {0x88b402f7fd75539b, 0x11dbcb0218ebb415},
+      {0xaae103b5fcd2a881, 0xd652bdc29f26a11a},
+      {0xd59944a37c0752a2, 0x4be76d3346f04960},
+      {0x857fcae62d8493a5, 0x6f70a4400c562ddc},
+      {0xa6dfbd9fb8e5b88e, 0xcb4ccd500f6bb953},
+      {0xd097ad07a71f26b2, 0x7e2000a41346a7a8},
+      {0x825ecc24c873782f, 0x8ed400668c0c28c9},
+      {0xa2f67f2dfa90563b, 0x728900802f0f32fb},
+      {0xcbb41ef979346bca, 0x4f2b40a03ad2ffba},
+      {0xfea126b7d78186bc, 0xe2f610c84987bfa9},
+      {0x9f24b832e6b0f436, 0x0dd9ca7d2df4d7ca},
+      {0xc6ede63fa05d3143, 0x91503d1c79720dbc},
+      {0xf8a95fcf88747d94, 0x75a44c6397ce912b},
+      {0x9b69dbe1b548ce7c, 0xc986afbe3ee11abb},
+      {0xc24452da229b021b, 0xfbe85badce996169},
+      {0xf2d56790ab41c2a2, 0xfae27299423fb9c4},
+      {0x97c560ba6b0919a5, 0xdccd879fc967d41b},
+      {0xbdb6b8e905cb600f, 0x5400e987bbc1c921},
+      {0xed246723473e3813, 0x290123e9aab23b69},
+      {0x9436c0760c86e30b, 0xf9a0b6720aaf6522},
+      {0xb94470938fa89bce, 0xf808e40e8d5b3e6a},
+      {0xe7958cb87392c2c2, 0xb60b1d1230b20e05},
+      {0x90bd77f3483bb9b9, 0xb1c6f22b5e6f48c3},
+      {0xb4ecd5f01a4aa828, 0x1e38aeb6360b1af4},
+      {0xe2280b6c20dd5232, 0x25c6da63c38de1b1},
+      {0x8d590723948a535f, 0x579c487e5a38ad0f},
+      {0xb0af48ec79ace837, 0x2d835a9df0c6d852},
+      {0xdcdb1b2798182244, 0xf8e431456cf88e66},
+      {0x8a08f0f8bf0f156b, 0x1b8e9ecb641b5900},
+      {0xac8b2d36eed2dac5, 0xe272467e3d222f40},
+      {0xd7adf884aa879177, 0x5b0ed81dcc6abb10},
+      {0x86ccbb52ea94baea, 0x98e947129fc2b4ea},
+      {0xa87fea27a539e9a5, 0x3f2398d747b36225},
+      {0xd29fe4b18e88640e, 0x8eec7f0d19a03aae},
+      {0x83a3eeeef9153e89, 0x1953cf68300424ad},
+      {0xa48ceaaab75a8e2b, 0x5fa8c3423c052dd8},
+      {0xcdb02555653131b6, 0x3792f412cb06794e},
+      {0x808e17555f3ebf11, 0xe2bbd88bbee40bd1},
+      {0xa0b19d2ab70e6ed6, 0x5b6aceaeae9d0ec5},
+      {0xc8de047564d20a8b, 0xf245825a5a445276},
+      {0xfb158592be068d2e, 0xeed6e2f0f0d56713},
+      {0x9ced737bb6c4183d, 0x55464dd69685606c},
+      {0xc428d05aa4751e4c, 0xaa97e14c3c26b887},
+      {0xf53304714d9265df, 0xd53dd99f4b3066a9},
+      {0x993fe2c6d07b7fab, 0xe546a8038efe402a},
+      {0xbf8fdb78849a5f96, 0xde98520472bdd034},
+      {0xef73d256a5c0f77c, 0x963e66858f6d4441},
+      {0x95a8637627989aad, 0xdde7001379a44aa9},
+      {0xbb127c53b17ec159, 0x5560c018580d5d53},
+      {0xe9d71b689dde71af, 0xaab8f01e6e10b4a7},
+      {0x9226712162ab070d, 0xcab3961304ca70e9},
+      {0xb6b00d69bb55c8d1, 0x3d607b97c5fd0d23},
+      {0xe45c10c42a2b3b05, 0x8cb89a7db77c506b},
+      {0x8eb98a7a9a5b04e3, 0x77f3608e92adb243},
+      {0xb267ed1940f1c61c, 0x55f038b237591ed4},
+      {0xdf01e85f912e37a3, 0x6b6c46dec52f6689},
+      {0x8b61313bbabce2c6, 0x2323ac4b3b3da016},
+      {0xae397d8aa96c1b77, 0xabec975e0a0d081b},
+      {0xd9c7dced53c72255, 0x96e7bd358c904a22},
+      {0x881cea14545c7575, 0x7e50d64177da2e55},
+      {0xaa242499697392d2, 0xdde50bd1d5d0b9ea},
+      {0xd4ad2dbfc3d07787, 0x955e4ec64b44e865},
+      {0x84ec3c97da624ab4, 0xbd5af13bef0b113f},
+      {0xa6274bbdd0fadd61, 0xecb1ad8aeacdd58f},
+      {0xcfb11ead453994ba, 0x67de18eda5814af3},
+      {0x81ceb32c4b43fcf4, 0x80eacf948770ced8},
+      {0xa2425ff75e14fc31, 0xa1258379a94d028e},
+      {0xcad2f7f5359a3b3e, 0x096ee45813a04331},
+      {0xfd87b5f28300ca0d, 0x8bca9d6e188853fd},
+      {0x9e74d1b791e07e48, 0x775ea264cf55347e},
+      {0xc612062576589dda, 0x95364afe032a819e},
+      {0xf79687aed3eec551, 0x3a83ddbd83f52205},
+      {0x9abe14cd44753b52, 0xc4926a9672793543},
+      {0xc16d9a0095928a27, 0x75b7053c0f178294},
+      {0xf1c90080baf72cb1, 0x5324c68b12dd6339},
+      {0x971da05074da7bee, 0xd3f6fc16ebca5e04},
+      {0xbce5086492111aea, 0x88f4bb1ca6bcf585},
+      {0xec1e4a7db69561a5, 0x2b31e9e3d06c32e6},
+      {0x9392ee8e921d5d07, 0x3aff322e62439fd0},
+      {0xb877aa3236a4b449, 0x09befeb9fad487c3},
+      {0xe69594bec44de15b, 0x4c2ebe687989a9b4},
+      {0x901d7cf73ab0acd9, 0x0f9d37014bf60a11},
+      {0xb424dc35095cd80f, 0x538484c19ef38c95},
+      {0xe12e13424bb40e13, 0x2865a5f206b06fba},
+      {0x8cbccc096f5088cb, 0xf93f87b7442e45d4},
+      {0xafebff0bcb24aafe, 0xf78f69a51539d749},
+      {0xdbe6fecebdedd5be, 0xb573440e5a884d1c},
+      {0x89705f4136b4a597, 0x31680a88f8953031},
+      {0xabcc77118461cefc, 0xfdc20d2b36ba7c3e},
+      {0xd6bf94d5e57a42bc, 0x3d32907604691b4d},
+      {0x8637bd05af6c69b5, 0xa63f9a49c2c1b110},
+      {0xa7c5ac471b478423, 0x0fcf80dc33721d54},
+      {0xd1b71758e219652b, 0xd3c36113404ea4a9},
+      {0x83126e978d4fdf3b, 0x645a1cac083126ea},
+      {0xa3d70a3d70a3d70a, 0x3d70a3d70a3d70a4},
+      {0xcccccccccccccccc, 0xcccccccccccccccd},
+      {0x8000000000000000, 0x0000000000000000},
+      {0xa000000000000000, 0x0000000000000000},
+      {0xc800000000000000, 0x0000000000000000},
+      {0xfa00000000000000, 0x0000000000000000},
+      {0x9c40000000000000, 0x0000000000000000},
+      {0xc350000000000000, 0x0000000000000000},
+      {0xf424000000000000, 0x0000000000000000},
+      {0x9896800000000000, 0x0000000000000000},
+      {0xbebc200000000000, 0x0000000000000000},
+      {0xee6b280000000000, 0x0000000000000000},
+      {0x9502f90000000000, 0x0000000000000000},
+      {0xba43b74000000000, 0x0000000000000000},
+      {0xe8d4a51000000000, 0x0000000000000000},
+      {0x9184e72a00000000, 0x0000000000000000},
+      {0xb5e620f480000000, 0x0000000000000000},
+      {0xe35fa931a0000000, 0x0000000000000000},
+      {0x8e1bc9bf04000000, 0x0000000000000000},
+      {0xb1a2bc2ec5000000, 0x0000000000000000},
+      {0xde0b6b3a76400000, 0x0000000000000000},
+      {0x8ac7230489e80000, 0x0000000000000000},
+      {0xad78ebc5ac620000, 0x0000000000000000},
+      {0xd8d726b7177a8000, 0x0000000000000000},
+      {0x878678326eac9000, 0x0000000000000000},
+      {0xa968163f0a57b400, 0x0000000000000000},
+      {0xd3c21bcecceda100, 0x0000000000000000},
+      {0x84595161401484a0, 0x0000000000000000},
+      {0xa56fa5b99019a5c8, 0x0000000000000000},
+      {0xcecb8f27f4200f3a, 0x0000000000000000},
+      {0x813f3978f8940984, 0x4000000000000000},
+      {0xa18f07d736b90be5, 0x5000000000000000},
+      {0xc9f2c9cd04674ede, 0xa400000000000000},
+      {0xfc6f7c4045812296, 0x4d00000000000000},
+      {0x9dc5ada82b70b59d, 0xf020000000000000},
+      {0xc5371912364ce305, 0x6c28000000000000},
+      {0xf684df56c3e01bc6, 0xc732000000000000},
+      {0x9a130b963a6c115c, 0x3c7f400000000000},
+      {0xc097ce7bc90715b3, 0x4b9f100000000000},
+      {0xf0bdc21abb48db20, 0x1e86d40000000000},
+      {0x96769950b50d88f4, 0x1314448000000000},
+      {0xbc143fa4e250eb31, 0x17d955a000000000},
+      {0xeb194f8e1ae525fd, 0x5dcfab0800000000},
+      {0x92efd1b8d0cf37be, 0x5aa1cae500000000},
+      {0xb7abc627050305ad, 0xf14a3d9e40000000},
+      {0xe596b7b0c643c719, 0x6d9ccd05d0000000},
+      {0x8f7e32ce7bea5c6f, 0xe4820023a2000000},
+      {0xb35dbf821ae4f38b, 0xdda2802c8a800000},
+      {0xe0352f62a19e306e, 0xd50b2037ad200000},
+      {0x8c213d9da502de45, 0x4526f422cc340000},
+      {0xaf298d050e4395d6, 0x9670b12b7f410000},
+      {0xdaf3f04651d47b4c, 0x3c0cdd765f114000},
+      {0x88d8762bf324cd0f, 0xa5880a69fb6ac800},
+      {0xab0e93b6efee0053, 0x8eea0d047a457a00},
+      {0xd5d238a4abe98068, 0x72a4904598d6d880},
+      {0x85a36366eb71f041, 0x47a6da2b7f864750},
+      {0xa70c3c40a64e6c51, 0x999090b65f67d924},
+      {0xd0cf4b50cfe20765, 0xfff4b4e3f741cf6d},
+      {0x82818f1281ed449f, 0xbff8f10e7a8921a5},
+      {0xa321f2d7226895c7, 0xaff72d52192b6a0e},
+      {0xcbea6f8ceb02bb39, 0x9bf4f8a69f764491},
+      {0xfee50b7025c36a08, 0x02f236d04753d5b5},
+      {0x9f4f2726179a2245, 0x01d762422c946591},
+      {0xc722f0ef9d80aad6, 0x424d3ad2b7b97ef6},
+      {0xf8ebad2b84e0d58b, 0xd2e0898765a7deb3},
+      {0x9b934c3b330c8577, 0x63cc55f49f88eb30},
+      {0xc2781f49ffcfa6d5, 0x3cbf6b71c76b25fc},
+      {0xf316271c7fc3908a, 0x8bef464e3945ef7b},
+      {0x97edd871cfda3a56, 0x97758bf0e3cbb5ad},
+      {0xbde94e8e43d0c8ec, 0x3d52eeed1cbea318},
+      {0xed63a231d4c4fb27, 0x4ca7aaa863ee4bde},
+      {0x945e455f24fb1cf8, 0x8fe8caa93e74ef6b},
+      {0xb975d6b6ee39e436, 0xb3e2fd538e122b45},
+      {0xe7d34c64a9c85d44, 0x60dbbca87196b617},
+      {0x90e40fbeea1d3a4a, 0xbc8955e946fe31ce},
+      {0xb51d13aea4a488dd, 0x6babab6398bdbe42},
+      {0xe264589a4dcdab14, 0xc696963c7eed2dd2},
+      {0x8d7eb76070a08aec, 0xfc1e1de5cf543ca3},
+      {0xb0de65388cc8ada8, 0x3b25a55f43294bcc},
+      {0xdd15fe86affad912, 0x49ef0eb713f39ebf},
+      {0x8a2dbf142dfcc7ab, 0x6e3569326c784338},
+      {0xacb92ed9397bf996, 0x49c2c37f07965405},
+      {0xd7e77a8f87daf7fb, 0xdc33745ec97be907},
+      {0x86f0ac99b4e8dafd, 0x69a028bb3ded71a4},
+      {0xa8acd7c0222311bc, 0xc40832ea0d68ce0d},
+      {0xd2d80db02aabd62b, 0xf50a3fa490c30191},
+      {0x83c7088e1aab65db, 0x792667c6da79e0fb},
+      {0xa4b8cab1a1563f52, 0x577001b891185939},
+      {0xcde6fd5e09abcf26, 0xed4c0226b55e6f87},
+      {0x80b05e5ac60b6178, 0x544f8158315b05b5},
+      {0xa0dc75f1778e39d6, 0x696361ae3db1c722},
+      {0xc913936dd571c84c, 0x03bc3a19cd1e38ea},
+      {0xfb5878494ace3a5f, 0x04ab48a04065c724},
+      {0x9d174b2dcec0e47b, 0x62eb0d64283f9c77},
+      {0xc45d1df942711d9a, 0x3ba5d0bd324f8395},
+      {0xf5746577930d6500, 0xca8f44ec7ee3647a},
+      {0x9968bf6abbe85f20, 0x7e998b13cf4e1ecc},
+      {0xbfc2ef456ae276e8, 0x9e3fedd8c321a67f},
+      {0xefb3ab16c59b14a2, 0xc5cfe94ef3ea101f},
+      {0x95d04aee3b80ece5, 0xbba1f1d158724a13},
+      {0xbb445da9ca61281f, 0x2a8a6e45ae8edc98},
+      {0xea1575143cf97226, 0xf52d09d71a3293be},
+      {0x924d692ca61be758, 0x593c2626705f9c57},
+      {0xb6e0c377cfa2e12e, 0x6f8b2fb00c77836d},
+      {0xe498f455c38b997a, 0x0b6dfb9c0f956448},
+      {0x8edf98b59a373fec, 0x4724bd4189bd5ead},
+      {0xb2977ee300c50fe7, 0x58edec91ec2cb658},
+      {0xdf3d5e9bc0f653e1, 0x2f2967b66737e3ee},
+      {0x8b865b215899f46c, 0xbd79e0d20082ee75},
+      {0xae67f1e9aec07187, 0xecd8590680a3aa12},
+      {0xda01ee641a708de9, 0xe80e6f4820cc9496},
+      {0x884134fe908658b2, 0x3109058d147fdcde},
+      {0xaa51823e34a7eede, 0xbd4b46f0599fd416},
+      {0xd4e5e2cdc1d1ea96, 0x6c9e18ac7007c91b},
+      {0x850fadc09923329e, 0x03e2cf6bc604ddb1},
+      {0xa6539930bf6bff45, 0x84db8346b786151d},
+      {0xcfe87f7cef46ff16, 0xe612641865679a64},
+      {0x81f14fae158c5f6e, 0x4fcb7e8f3f60c07f},
+      {0xa26da3999aef7749, 0xe3be5e330f38f09e},
+      {0xcb090c8001ab551c, 0x5cadf5bfd3072cc6},
+      {0xfdcb4fa002162a63, 0x73d9732fc7c8f7f7},
+      {0x9e9f11c4014dda7e, 0x2867e7fddcdd9afb},
+      {0xc646d63501a1511d, 0xb281e1fd541501b9},
+      {0xf7d88bc24209a565, 0x1f225a7ca91a4227},
+      {0x9ae757596946075f, 0x3375788de9b06959},
+      {0xc1a12d2fc3978937, 0x0052d6b1641c83af},
+      {0xf209787bb47d6b84, 0xc0678c5dbd23a49b},
+      {0x9745eb4d50ce6332, 0xf840b7ba963646e1},
+      {0xbd176620a501fbff, 0xb650e5a93bc3d899},
+      {0xec5d3fa8ce427aff, 0xa3e51f138ab4cebf},
+      {0x93ba47c980e98cdf, 0xc66f336c36b10138},
+      {0xb8a8d9bbe123f017, 0xb80b0047445d4185},
+      {0xe6d3102ad96cec1d, 0xa60dc059157491e6},
+      {0x9043ea1ac7e41392, 0x87c89837ad68db30},
+      {0xb454e4a179dd1877, 0x29babe4598c311fc},
+      {0xe16a1dc9d8545e94, 0xf4296dd6fef3d67b},
+      {0x8ce2529e2734bb1d, 0x1899e4a65f58660d},
+      {0xb01ae745b101e9e4, 0x5ec05dcff72e7f90},
+      {0xdc21a1171d42645d, 0x76707543f4fa1f74},
+      {0x899504ae72497eba, 0x6a06494a791c53a9},
+      {0xabfa45da0edbde69, 0x0487db9d17636893},
+      {0xd6f8d7509292d603, 0x45a9d2845d3c42b7},
+      {0x865b86925b9bc5c2, 0x0b8a2392ba45a9b3},
+      {0xa7f26836f282b732, 0x8e6cac7768d7141f},
+      {0xd1ef0244af2364ff, 0x3207d795430cd927},
+      {0x8335616aed761f1f, 0x7f44e6bd49e807b9},
+      {0xa402b9c5a8d3a6e7, 0x5f16206c9c6209a7},
+      {0xcd036837130890a1, 0x36dba887c37a8c10},
+      {0x802221226be55a64, 0xc2494954da2c978a},
+      {0xa02aa96b06deb0fd, 0xf2db9baa10b7bd6d},
+      {0xc83553c5c8965d3d, 0x6f92829494e5acc8},
+      {0xfa42a8b73abbf48c, 0xcb772339ba1f17fa},
+      {0x9c69a97284b578d7, 0xff2a760414536efc},
+      {0xc38413cf25e2d70d, 0xfef5138519684abb},
+      {0xf46518c2ef5b8cd1, 0x7eb258665fc25d6a},
+      {0x98bf2f79d5993802, 0xef2f773ffbd97a62},
+      {0xbeeefb584aff8603, 0xaafb550ffacfd8fb},
+      {0xeeaaba2e5dbf6784, 0x95ba2a53f983cf39},
+      {0x952ab45cfa97a0b2, 0xdd945a747bf26184},
+      {0xba756174393d88df, 0x94f971119aeef9e5},
+      {0xe912b9d1478ceb17, 0x7a37cd5601aab85e},
+      {0x91abb422ccb812ee, 0xac62e055c10ab33b},
+      {0xb616a12b7fe617aa, 0x577b986b314d600a},
+      {0xe39c49765fdf9d94, 0xed5a7e85fda0b80c},
+      {0x8e41ade9fbebc27d, 0x14588f13be847308},
+      {0xb1d219647ae6b31c, 0x596eb2d8ae258fc9},
+      {0xde469fbd99a05fe3, 0x6fca5f8ed9aef3bc},
+      {0x8aec23d680043bee, 0x25de7bb9480d5855},
+      {0xada72ccc20054ae9, 0xaf561aa79a10ae6b},
+      {0xd910f7ff28069da4, 0x1b2ba1518094da05},
+      {0x87aa9aff79042286, 0x90fb44d2f05d0843},
+      {0xa99541bf57452b28, 0x353a1607ac744a54},
+      {0xd3fa922f2d1675f2, 0x42889b8997915ce9},
+      {0x847c9b5d7c2e09b7, 0x69956135febada12},
+      {0xa59bc234db398c25, 0x43fab9837e699096},
+      {0xcf02b2c21207ef2e, 0x94f967e45e03f4bc},
+      {0x8161afb94b44f57d, 0x1d1be0eebac278f6},
+      {0xa1ba1ba79e1632dc, 0x6462d92a69731733},
+      {0xca28a291859bbf93, 0x7d7b8f7503cfdcff},
+      {0xfcb2cb35e702af78, 0x5cda735244c3d43f},
+      {0x9defbf01b061adab, 0x3a0888136afa64a8},
+      {0xc56baec21c7a1916, 0x088aaa1845b8fdd1},
+      {0xf6c69a72a3989f5b, 0x8aad549e57273d46},
+      {0x9a3c2087a63f6399, 0x36ac54e2f678864c},
+      {0xc0cb28a98fcf3c7f, 0x84576a1bb416a7de},
+      {0xf0fdf2d3f3c30b9f, 0x656d44a2a11c51d6},
+      {0x969eb7c47859e743, 0x9f644ae5a4b1b326},
+      {0xbc4665b596706114, 0x873d5d9f0dde1fef},
+      {0xeb57ff22fc0c7959, 0xa90cb506d155a7eb},
+      {0x9316ff75dd87cbd8, 0x09a7f12442d588f3},
+      {0xb7dcbf5354e9bece, 0x0c11ed6d538aeb30},
+      {0xe5d3ef282a242e81, 0x8f1668c8a86da5fb},
+      {0x8fa475791a569d10, 0xf96e017d694487bd},
+      {0xb38d92d760ec4455, 0x37c981dcc395a9ad},
+      {0xe070f78d3927556a, 0x85bbe253f47b1418},
+      {0x8c469ab843b89562, 0x93956d7478ccec8f},
+      {0xaf58416654a6babb, 0x387ac8d1970027b3},
+      {0xdb2e51bfe9d0696a, 0x06997b05fcc0319f},
+      {0x88fcf317f22241e2, 0x441fece3bdf81f04},
+      {0xab3c2fddeeaad25a, 0xd527e81cad7626c4},
+      {0xd60b3bd56a5586f1, 0x8a71e223d8d3b075},
+      {0x85c7056562757456, 0xf6872d5667844e4a},
+      {0xa738c6bebb12d16c, 0xb428f8ac016561dc},
+      {0xd106f86e69d785c7, 0xe13336d701beba53},
+      {0x82a45b450226b39c, 0xecc0024661173474},
+      {0xa34d721642b06084, 0x27f002d7f95d0191},
+      {0xcc20ce9bd35c78a5, 0x31ec038df7b441f5},
+      {0xff290242c83396ce, 0x7e67047175a15272},
+      {0x9f79a169bd203e41, 0x0f0062c6e984d387},
+      {0xc75809c42c684dd1, 0x52c07b78a3e60869},
+      {0xf92e0c3537826145, 0xa7709a56ccdf8a83},
+      {0x9bbcc7a142b17ccb, 0x88a66076400bb692},
+      {0xc2abf989935ddbfe, 0x6acff893d00ea436},
+      {0xf356f7ebf83552fe, 0x0583f6b8c4124d44},
+      {0x98165af37b2153de, 0xc3727a337a8b704b},
+      {0xbe1bf1b059e9a8d6, 0x744f18c0592e4c5d},
+      {0xeda2ee1c7064130c, 0x1162def06f79df74},
+      {0x9485d4d1c63e8be7, 0x8addcb5645ac2ba9},
+      {0xb9a74a0637ce2ee1, 0x6d953e2bd7173693},
+      {0xe8111c87c5c1ba99, 0xc8fa8db6ccdd0438},
+      {0x910ab1d4db9914a0, 0x1d9c9892400a22a3},
+      {0xb54d5e4a127f59c8, 0x2503beb6d00cab4c},
+      {0xe2a0b5dc971f303a, 0x2e44ae64840fd61e},
+      {0x8da471a9de737e24, 0x5ceaecfed289e5d3},
+      {0xb10d8e1456105dad, 0x7425a83e872c5f48},
+      {0xdd50f1996b947518, 0xd12f124e28f7771a},
+      {0x8a5296ffe33cc92f, 0x82bd6b70d99aaa70},
+      {0xace73cbfdc0bfb7b, 0x636cc64d1001550c},
+      {0xd8210befd30efa5a, 0x3c47f7e05401aa4f},
+      {0x8714a775e3e95c78, 0x65acfaec34810a72},
+      {0xa8d9d1535ce3b396, 0x7f1839a741a14d0e},
+      {0xd31045a8341ca07c, 0x1ede48111209a051},
+      {0x83ea2b892091e44d, 0x934aed0aab460433},
+      {0xa4e4b66b68b65d60, 0xf81da84d56178540},
+      {0xce1de40642e3f4b9, 0x36251260ab9d668f},
+      {0x80d2ae83e9ce78f3, 0xc1d72b7c6b42601a},
+      {0xa1075a24e4421730, 0xb24cf65b8612f820},
+      {0xc94930ae1d529cfc, 0xdee033f26797b628},
+      {0xfb9b7cd9a4a7443c, 0x169840ef017da3b2},
+      {0x9d412e0806e88aa5, 0x8e1f289560ee864f},
+      {0xc491798a08a2ad4e, 0xf1a6f2bab92a27e3},
+      {0xf5b5d7ec8acb58a2, 0xae10af696774b1dc},
+      {0x9991a6f3d6bf1765, 0xacca6da1e0a8ef2a},
+      {0xbff610b0cc6edd3f, 0x17fd090a58d32af4},
+      {0xeff394dcff8a948e, 0xddfc4b4cef07f5b1},
+      {0x95f83d0a1fb69cd9, 0x4abdaf101564f98f},
+      {0xbb764c4ca7a4440f, 0x9d6d1ad41abe37f2},
+      {0xea53df5fd18d5513, 0x84c86189216dc5ee},
+      {0x92746b9be2f8552c, 0x32fd3cf5b4e49bb5},
+      {0xb7118682dbb66a77, 0x3fbc8c33221dc2a2},
+      {0xe4d5e82392a40515, 0x0fabaf3feaa5334b},
+      {0x8f05b1163ba6832d, 0x29cb4d87f2a7400f},
+      {0xb2c71d5bca9023f8, 0x743e20e9ef511013},
+      {0xdf78e4b2bd342cf6, 0x914da9246b255417},
+      {0x8bab8eefb6409c1a, 0x1ad089b6c2f7548f},
+      {0xae9672aba3d0c320, 0xa184ac2473b529b2},
+      {0xda3c0f568cc4f3e8, 0xc9e5d72d90a2741f},
+      {0x8865899617fb1871, 0x7e2fa67c7a658893},
+      {0xaa7eebfb9df9de8d, 0xddbb901b98feeab8},
+      {0xd51ea6fa85785631, 0x552a74227f3ea566},
+      {0x8533285c936b35de, 0xd53a88958f872760},
+      {0xa67ff273b8460356, 0x8a892abaf368f138},
+      {0xd01fef10a657842c, 0x2d2b7569b0432d86},
+      {0x8213f56a67f6b29b, 0x9c3b29620e29fc74},
+      {0xa298f2c501f45f42, 0x8349f3ba91b47b90},
+      {0xcb3f2f7642717713, 0x241c70a936219a74},
+      {0xfe0efb53d30dd4d7, 0xed238cd383aa0111},
+      {0x9ec95d1463e8a506, 0xf4363804324a40ab},
+      {0xc67bb4597ce2ce48, 0xb143c6053edcd0d6},
+      {0xf81aa16fdc1b81da, 0xdd94b7868e94050b},
+      {0x9b10a4e5e9913128, 0xca7cf2b4191c8327},
+      {0xc1d4ce1f63f57d72, 0xfd1c2f611f63a3f1},
+      {0xf24a01a73cf2dccf, 0xbc633b39673c8ced},
+      {0x976e41088617ca01, 0xd5be0503e085d814},
+      {0xbd49d14aa79dbc82, 0x4b2d8644d8a74e19},
+      {0xec9c459d51852ba2, 0xddf8e7d60ed1219f},
+      {0x93e1ab8252f33b45, 0xcabb90e5c942b504},
+      {0xb8da1662e7b00a17, 0x3d6a751f3b936244},
+      {0xe7109bfba19c0c9d, 0x0cc512670a783ad5},
+      {0x906a617d450187e2, 0x27fb2b80668b24c6},
+      {0xb484f9dc9641e9da, 0xb1f9f660802dedf7},
+      {0xe1a63853bbd26451, 0x5e7873f8a0396974},
+      {0x8d07e33455637eb2, 0xdb0b487b6423e1e9},
+      {0xb049dc016abc5e5f, 0x91ce1a9a3d2cda63},
+      {0xdc5c5301c56b75f7, 0x7641a140cc7810fc},
+      {0x89b9b3e11b6329ba, 0xa9e904c87fcb0a9e},
+      {0xac2820d9623bf429, 0x546345fa9fbdcd45},
+      {0xd732290fbacaf133, 0xa97c177947ad4096},
+      {0x867f59a9d4bed6c0, 0x49ed8eabcccc485e},
+      {0xa81f301449ee8c70, 0x5c68f256bfff5a75},
+      {0xd226fc195c6a2f8c, 0x73832eec6fff3112},
+      {0x83585d8fd9c25db7, 0xc831fd53c5ff7eac},
+      {0xa42e74f3d032f525, 0xba3e7ca8b77f5e56},
+      {0xcd3a1230c43fb26f, 0x28ce1bd2e55f35ec},
+      {0x80444b5e7aa7cf85, 0x7980d163cf5b81b4},
+      {0xa0555e361951c366, 0xd7e105bcc3326220},
+      {0xc86ab5c39fa63440, 0x8dd9472bf3fefaa8},
+      {0xfa856334878fc150, 0xb14f98f6f0feb952},
+      {0x9c935e00d4b9d8d2, 0x6ed1bf9a569f33d4},
+      {0xc3b8358109e84f07, 0x0a862f80ec4700c9},
+      {0xf4a642e14c6262c8, 0xcd27bb612758c0fb},
+      {0x98e7e9cccfbd7dbd, 0x8038d51cb897789d},
+      {0xbf21e44003acdd2c, 0xe0470a63e6bd56c4},
+      {0xeeea5d5004981478, 0x1858ccfce06cac75},
+      {0x95527a5202df0ccb, 0x0f37801e0c43ebc9},
+      {0xbaa718e68396cffd, 0xd30560258f54e6bb},
+      {0xe950df20247c83fd, 0x47c6b82ef32a206a},
+      {0x91d28b7416cdd27e, 0x4cdc331d57fa5442},
+      {0xb6472e511c81471d, 0xe0133fe4adf8e953},
+      {0xe3d8f9e563a198e5, 0x58180fddd97723a7},
+      {0x8e679c2f5e44ff8f, 0x570f09eaa7ea7649},
+      {0xb201833b35d63f73, 0x2cd2cc6551e513db},
+      {0xde81e40a034bcf4f, 0xf8077f7ea65e58d2},
+      {0x8b112e86420f6191, 0xfb04afaf27faf783},
+      {0xadd57a27d29339f6, 0x79c5db9af1f9b564},
+      {0xd94ad8b1c7380874, 0x18375281ae7822bd},
+      {0x87cec76f1c830548, 0x8f2293910d0b15b6},
+      {0xa9c2794ae3a3c69a, 0xb2eb3875504ddb23},
+      {0xd433179d9c8cb841, 0x5fa60692a46151ec},
+      {0x849feec281d7f328, 0xdbc7c41ba6bcd334},
+      {0xa5c7ea73224deff3, 0x12b9b522906c0801},
+      {0xcf39e50feae16bef, 0xd768226b34870a01},
+      {0x81842f29f2cce375, 0xe6a1158300d46641},
+      {0xa1e53af46f801c53, 0x60495ae3c1097fd1},
+      {0xca5e89b18b602368, 0x385bb19cb14bdfc5},
+      {0xfcf62c1dee382c42, 0x46729e03dd9ed7b6},
+      {0x9e19db92b4e31ba9, 0x6c07a2c26a8346d2},
+      {0xc5a05277621be293, 0xc7098b7305241886},
+      { 0xf70867153aa2db38,
+        0xb8cbee4fc66d1ea8 }
+#else
+      {0xff77b1fcbebcdc4f, 0x25e8e89c13bb0f7b},
+      {0xce5d73ff402d98e3, 0xfb0a3d212dc81290},
+      {0xa6b34ad8c9dfc06f, 0xf42faa48c0ea481f},
+      {0x86a8d39ef77164bc, 0xae5dff9c02033198},
+      {0xd98ddaee19068c76, 0x3badd624dd9b0958},
+      {0xafbd2350644eeacf, 0xe5d1929ef90898fb},
+      {0x8df5efabc5979c8f, 0xca8d3ffa1ef463c2},
+      {0xe55990879ddcaabd, 0xcc420a6a101d0516},
+      {0xb94470938fa89bce, 0xf808e40e8d5b3e6a},
+      {0x95a8637627989aad, 0xdde7001379a44aa9},
+      {0xf1c90080baf72cb1, 0x5324c68b12dd6339},
+      {0xc350000000000000, 0x0000000000000000},
+      {0x9dc5ada82b70b59d, 0xf020000000000000},
+      {0xfee50b7025c36a08, 0x02f236d04753d5b5},
+      {0xcde6fd5e09abcf26, 0xed4c0226b55e6f87},
+      {0xa6539930bf6bff45, 0x84db8346b786151d},
+      {0x865b86925b9bc5c2, 0x0b8a2392ba45a9b3},
+      {0xd910f7ff28069da4, 0x1b2ba1518094da05},
+      {0xaf58416654a6babb, 0x387ac8d1970027b3},
+      {0x8da471a9de737e24, 0x5ceaecfed289e5d3},
+      {0xe4d5e82392a40515, 0x0fabaf3feaa5334b},
+      {0xb8da1662e7b00a17, 0x3d6a751f3b936244},
+      { 0x95527a5202df0ccb,
+        0x0f37801e0c43ebc9 }
+#endif
+    };
+
+#if FMT_USE_FULL_CACHE_DRAGONBOX
+    return pow10_significands[k - float_info<double>::min_k];
+#else
+    static constexpr const uint64_t powers_of_5_64[] = {
+        0x0000000000000001, 0x0000000000000005, 0x0000000000000019,
+        0x000000000000007d, 0x0000000000000271, 0x0000000000000c35,
+        0x0000000000003d09, 0x000000000001312d, 0x000000000005f5e1,
+        0x00000000001dcd65, 0x00000000009502f9, 0x0000000002e90edd,
+        0x000000000e8d4a51, 0x0000000048c27395, 0x000000016bcc41e9,
+        0x000000071afd498d, 0x0000002386f26fc1, 0x000000b1a2bc2ec5,
+        0x000003782dace9d9, 0x00001158e460913d, 0x000056bc75e2d631,
+        0x0001b1ae4d6e2ef5, 0x000878678326eac9, 0x002a5a058fc295ed,
+        0x00d3c21bcecceda1, 0x0422ca8b0a00a425, 0x14adf4b7320334b9};
+
+    static const int compression_ratio = 27;
+
+    // Compute base index.
+    int cache_index = (k - float_info<double>::min_k) / compression_ratio;
+    int kb = cache_index * compression_ratio + float_info<double>::min_k;
+    int offset = k - kb;
+
+    // Get base cache.
+    uint128_fallback base_cache = pow10_significands[cache_index];
+    if (offset == 0) return base_cache;
+
+    // Compute the required amount of bit-shift.
+    int alpha = floor_log2_pow10(kb + offset) - floor_log2_pow10(kb) - offset;
+    FMT_ASSERT(alpha > 0 && alpha < 64, "shifting error detected");
+
+    // Try to recover the real cache.
+    uint64_t pow5 = powers_of_5_64[offset];
+    uint128_fallback recovered_cache = umul128(base_cache.high(), pow5);
+    uint128_fallback middle_low = umul128(base_cache.low(), pow5);
+
+    recovered_cache += middle_low.high();
+
+    uint64_t high_to_middle = recovered_cache.high() << (64 - alpha);
+    uint64_t middle_to_low = recovered_cache.low() << (64 - alpha);
+
+    recovered_cache =
+        uint128_fallback{(recovered_cache.low() >> alpha) | high_to_middle,
+                         ((middle_low.low() >> alpha) | middle_to_low)};
+    FMT_ASSERT(recovered_cache.low() + 1 != 0, "");
+    return {recovered_cache.high(), recovered_cache.low() + 1};
+#endif
+  }
+
+  struct compute_mul_result {
+    carrier_uint result;
+    bool is_integer;
+  };
+  struct compute_mul_parity_result {
+    bool parity;
+    bool is_integer;
+  };
+
+  static compute_mul_result compute_mul(
+      carrier_uint u, const cache_entry_type& cache) noexcept {
+    auto r = umul192_upper128(u, cache);
+    return {r.high(), r.low() == 0};
+  }
+
+  static uint32_t compute_delta(cache_entry_type const& cache,
+                                int beta) noexcept {
+    return static_cast<uint32_t>(cache.high() >> (64 - 1 - beta));
+  }
+
+  static compute_mul_parity_result compute_mul_parity(
+      carrier_uint two_f, const cache_entry_type& cache, int beta) noexcept {
+    FMT_ASSERT(beta >= 1, "");
+    FMT_ASSERT(beta < 64, "");
+
+    auto r = umul192_lower128(two_f, cache);
+    return {((r.high() >> (64 - beta)) & 1) != 0,
+            ((r.high() << beta) | (r.low() >> (64 - beta))) == 0};
+  }
+
+  static carrier_uint compute_left_endpoint_for_shorter_interval_case(
+      const cache_entry_type& cache, int beta) noexcept {
+    return (cache.high() -
+            (cache.high() >> (num_significand_bits<double>() + 2))) >>
+           (64 - num_significand_bits<double>() - 1 - beta);
+  }
+
+  static carrier_uint compute_right_endpoint_for_shorter_interval_case(
+      const cache_entry_type& cache, int beta) noexcept {
+    return (cache.high() +
+            (cache.high() >> (num_significand_bits<double>() + 1))) >>
+           (64 - num_significand_bits<double>() - 1 - beta);
+  }
+
+  static carrier_uint compute_round_up_for_shorter_interval_case(
+      const cache_entry_type& cache, int beta) noexcept {
+    return ((cache.high() >> (64 - num_significand_bits<double>() - 2 - beta)) +
+            1) /
+           2;
+  }
+};
+
+// Various integer checks
+template <class T>
+bool is_left_endpoint_integer_shorter_interval(int exponent) noexcept {
+  const int case_shorter_interval_left_endpoint_lower_threshold = 2;
+  const int case_shorter_interval_left_endpoint_upper_threshold = 3;
+  return exponent >= case_shorter_interval_left_endpoint_lower_threshold &&
+         exponent <= case_shorter_interval_left_endpoint_upper_threshold;
+}
+
+// Remove trailing zeros from n and return the number of zeros removed (float)
+FMT_INLINE int remove_trailing_zeros(uint32_t& n) noexcept {
+  FMT_ASSERT(n != 0, "");
+  const uint32_t mod_inv_5 = 0xcccccccd;
+  const uint32_t mod_inv_25 = mod_inv_5 * mod_inv_5;
+
+  int s = 0;
+  while (true) {
+    auto q = rotr(n * mod_inv_25, 2);
+    if (q > max_value<uint32_t>() / 100) break;
+    n = q;
+    s += 2;
+  }
+  auto q = rotr(n * mod_inv_5, 1);
+  if (q <= max_value<uint32_t>() / 10) {
+    n = q;
+    s |= 1;
+  }
+
+  return s;
+}
+
+// Removes trailing zeros and returns the number of zeros removed (double)
+FMT_INLINE int remove_trailing_zeros(uint64_t& n) noexcept {
+  FMT_ASSERT(n != 0, "");
+
+  // This magic number is ceil(2^90 / 10^8).
+  constexpr uint64_t magic_number = 12379400392853802749ull;
+  auto nm = umul128(n, magic_number);
+
+  // Is n is divisible by 10^8?
+  if ((nm.high() & ((1ull << (90 - 64)) - 1)) == 0 && nm.low() < magic_number) {
+    // If yes, work with the quotient.
+    auto n32 = static_cast<uint32_t>(nm.high() >> (90 - 64));
+
+    const uint32_t mod_inv_5 = 0xcccccccd;
+    const uint32_t mod_inv_25 = mod_inv_5 * mod_inv_5;
+
+    int s = 8;
+    while (true) {
+      auto q = rotr(n32 * mod_inv_25, 2);
+      if (q > max_value<uint32_t>() / 100) break;
+      n32 = q;
+      s += 2;
+    }
+    auto q = rotr(n32 * mod_inv_5, 1);
+    if (q <= max_value<uint32_t>() / 10) {
+      n32 = q;
+      s |= 1;
+    }
+
+    n = n32;
+    return s;
+  }
+
+  // If n is not divisible by 10^8, work with n itself.
+  const uint64_t mod_inv_5 = 0xcccccccccccccccd;
+  const uint64_t mod_inv_25 = mod_inv_5 * mod_inv_5;
+
+  int s = 0;
+  while (true) {
+    auto q = rotr(n * mod_inv_25, 2);
+    if (q > max_value<uint64_t>() / 100) break;
+    n = q;
+    s += 2;
+  }
+  auto q = rotr(n * mod_inv_5, 1);
+  if (q <= max_value<uint64_t>() / 10) {
+    n = q;
+    s |= 1;
+  }
+
+  return s;
+}
+
+// The main algorithm for shorter interval case
+template <class T>
+FMT_INLINE decimal_fp<T> shorter_interval_case(int exponent) noexcept {
+  decimal_fp<T> ret_value;
+  // Compute k and beta
+  const int minus_k = floor_log10_pow2_minus_log10_4_over_3(exponent);
+  const int beta = exponent + floor_log2_pow10(-minus_k);
+
+  // Compute xi and zi
+  using cache_entry_type = typename cache_accessor<T>::cache_entry_type;
+  const cache_entry_type cache = cache_accessor<T>::get_cached_power(-minus_k);
+
+  auto xi = cache_accessor<T>::compute_left_endpoint_for_shorter_interval_case(
+      cache, beta);
+  auto zi = cache_accessor<T>::compute_right_endpoint_for_shorter_interval_case(
+      cache, beta);
+
+  // If the left endpoint is not an integer, increase it
+  if (!is_left_endpoint_integer_shorter_interval<T>(exponent)) ++xi;
+
+  // Try bigger divisor
+  ret_value.significand = zi / 10;
+
+  // If succeed, remove trailing zeros if necessary and return
+  if (ret_value.significand * 10 >= xi) {
+    ret_value.exponent = minus_k + 1;
+    ret_value.exponent += remove_trailing_zeros(ret_value.significand);
+    return ret_value;
+  }
+
+  // Otherwise, compute the round-up of y
+  ret_value.significand =
+      cache_accessor<T>::compute_round_up_for_shorter_interval_case(cache,
+                                                                    beta);
+  ret_value.exponent = minus_k;
+
+  // When tie occurs, choose one of them according to the rule
+  if (exponent >= float_info<T>::shorter_interval_tie_lower_threshold &&
+      exponent <= float_info<T>::shorter_interval_tie_upper_threshold) {
+    ret_value.significand = ret_value.significand % 2 == 0
+                                ? ret_value.significand
+                                : ret_value.significand - 1;
+  } else if (ret_value.significand < xi) {
+    ++ret_value.significand;
+  }
+  return ret_value;
+}
+
+template <typename T> decimal_fp<T> to_decimal(T x) noexcept {
+  // Step 1: integer promotion & Schubfach multiplier calculation.
+
+  using carrier_uint = typename float_info<T>::carrier_uint;
+  using cache_entry_type = typename cache_accessor<T>::cache_entry_type;
+  auto br = bit_cast<carrier_uint>(x);
+
+  // Extract significand bits and exponent bits.
+  const carrier_uint significand_mask =
+      (static_cast<carrier_uint>(1) << num_significand_bits<T>()) - 1;
+  carrier_uint significand = (br & significand_mask);
+  int exponent =
+      static_cast<int>((br & exponent_mask<T>()) >> num_significand_bits<T>());
+
+  if (exponent != 0) {  // Check if normal.
+    exponent -= exponent_bias<T>() + num_significand_bits<T>();
+
+    // Shorter interval case; proceed like Schubfach.
+    // In fact, when exponent == 1 and significand == 0, the interval is
+    // regular. However, it can be shown that the end-results are anyway same.
+    if (significand == 0) return shorter_interval_case<T>(exponent);
+
+    significand |= (static_cast<carrier_uint>(1) << num_significand_bits<T>());
+  } else {
+    // Subnormal case; the interval is always regular.
+    if (significand == 0) return {0, 0};
+    exponent =
+        std::numeric_limits<T>::min_exponent - num_significand_bits<T>() - 1;
+  }
+
+  const bool include_left_endpoint = (significand % 2 == 0);
+  const bool include_right_endpoint = include_left_endpoint;
+
+  // Compute k and beta.
+  const int minus_k = floor_log10_pow2(exponent) - float_info<T>::kappa;
+  const cache_entry_type cache = cache_accessor<T>::get_cached_power(-minus_k);
+  const int beta = exponent + floor_log2_pow10(-minus_k);
+
+  // Compute zi and deltai.
+  // 10^kappa <= deltai < 10^(kappa + 1)
+  const uint32_t deltai = cache_accessor<T>::compute_delta(cache, beta);
+  const carrier_uint two_fc = significand << 1;
+
+  // For the case of binary32, the result of integer check is not correct for
+  // 29711844 * 2^-82
+  // = 6.1442653300000000008655037797566933477355632930994033813476... * 10^-18
+  // and 29711844 * 2^-81
+  // = 1.2288530660000000001731007559513386695471126586198806762695... * 10^-17,
+  // and they are the unique counterexamples. However, since 29711844 is even,
+  // this does not cause any problem for the endpoints calculations; it can only
+  // cause a problem when we need to perform integer check for the center.
+  // Fortunately, with these inputs, that branch is never executed, so we are
+  // fine.
+  const typename cache_accessor<T>::compute_mul_result z_mul =
+      cache_accessor<T>::compute_mul((two_fc | 1) << beta, cache);
+
+  // Step 2: Try larger divisor; remove trailing zeros if necessary.
+
+  // Using an upper bound on zi, we might be able to optimize the division
+  // better than the compiler; we are computing zi / big_divisor here.
+  decimal_fp<T> ret_value;
+  ret_value.significand = divide_by_10_to_kappa_plus_1(z_mul.result);
+  uint32_t r = static_cast<uint32_t>(z_mul.result - float_info<T>::big_divisor *
+                                                        ret_value.significand);
+
+  if (r < deltai) {
+    // Exclude the right endpoint if necessary.
+    if (r == 0 && (z_mul.is_integer & !include_right_endpoint)) {
+      --ret_value.significand;
+      r = float_info<T>::big_divisor;
+      goto small_divisor_case_label;
+    }
+  } else if (r > deltai) {
+    goto small_divisor_case_label;
+  } else {
+    // r == deltai; compare fractional parts.
+    const typename cache_accessor<T>::compute_mul_parity_result x_mul =
+        cache_accessor<T>::compute_mul_parity(two_fc - 1, cache, beta);
+
+    if (!(x_mul.parity | (x_mul.is_integer & include_left_endpoint)))
+      goto small_divisor_case_label;
+  }
+  ret_value.exponent = minus_k + float_info<T>::kappa + 1;
+
+  // We may need to remove trailing zeros.
+  ret_value.exponent += remove_trailing_zeros(ret_value.significand);
+  return ret_value;
+
+  // Step 3: Find the significand with the smaller divisor.
+
+small_divisor_case_label:
+  ret_value.significand *= 10;
+  ret_value.exponent = minus_k + float_info<T>::kappa;
+
+  uint32_t dist = r - (deltai / 2) + (float_info<T>::small_divisor / 2);
+  const bool approx_y_parity =
+      ((dist ^ (float_info<T>::small_divisor / 2)) & 1) != 0;
+
+  // Is dist divisible by 10^kappa?
+  const bool divisible_by_small_divisor =
+      check_divisibility_and_divide_by_pow10<float_info<T>::kappa>(dist);
+
+  // Add dist / 10^kappa to the significand.
+  ret_value.significand += dist;
+
+  if (!divisible_by_small_divisor) return ret_value;
+
+  // Check z^(f) >= epsilon^(f).
+  // We have either yi == zi - epsiloni or yi == (zi - epsiloni) - 1,
+  // where yi == zi - epsiloni if and only if z^(f) >= epsilon^(f).
+  // Since there are only 2 possibilities, we only need to care about the
+  // parity. Also, zi and r should have the same parity since the divisor
+  // is an even number.
+  const auto y_mul = cache_accessor<T>::compute_mul_parity(two_fc, cache, beta);
+
+  // If z^(f) >= epsilon^(f), we might have a tie when z^(f) == epsilon^(f),
+  // or equivalently, when y is an integer.
+  if (y_mul.parity != approx_y_parity)
+    --ret_value.significand;
+  else if (y_mul.is_integer & (ret_value.significand % 2 != 0))
+    --ret_value.significand;
+  return ret_value;
+}
+}  // namespace dragonbox
+
+#ifdef _MSC_VER
+FMT_FUNC auto fmt_snprintf(char* buf, size_t size, const char* fmt, ...)
+    -> int {
+  auto args = va_list();
+  va_start(args, fmt);
+  int result = vsnprintf_s(buf, size, _TRUNCATE, fmt, args);
+  va_end(args);
+  return result;
+}
+#endif
+}  // namespace detail
+
+template <> struct formatter<detail::bigint> {
+  FMT_CONSTEXPR auto parse(format_parse_context& ctx)
+      -> format_parse_context::iterator {
+    return ctx.begin();
+  }
+
+  template <typename FormatContext>
+  auto format(const detail::bigint& n, FormatContext& ctx) const ->
+      typename FormatContext::iterator {
+    auto out = ctx.out();
+    bool first = true;
+    for (auto i = n.bigits_.size(); i > 0; --i) {
+      auto value = n.bigits_[i - 1u];
+      if (first) {
+        out = format_to(out, FMT_STRING("{:x}"), value);
+        first = false;
+        continue;
+      }
+      out = format_to(out, FMT_STRING("{:08x}"), value);
+    }
+    if (n.exp_ > 0)
+      out = format_to(out, FMT_STRING("p{}"),
+                      n.exp_ * detail::bigint::bigit_bits);
+    return out;
+  }
+};
+
+FMT_FUNC detail::utf8_to_utf16::utf8_to_utf16(string_view s) {
+  for_each_codepoint(s, [this](uint32_t cp, string_view) {
+    if (cp == invalid_code_point) FMT_THROW(std::runtime_error("invalid utf8"));
+    if (cp <= 0xFFFF) {
+      buffer_.push_back(static_cast<wchar_t>(cp));
+    } else {
+      cp -= 0x10000;
+      buffer_.push_back(static_cast<wchar_t>(0xD800 + (cp >> 10)));
+      buffer_.push_back(static_cast<wchar_t>(0xDC00 + (cp & 0x3FF)));
+    }
+    return true;
+  });
+  buffer_.push_back(0);
+}
+
+FMT_FUNC void format_system_error(detail::buffer<char>& out, int error_code,
+                                  const char* message) noexcept {
+  FMT_TRY {
+    auto ec = std::error_code(error_code, std::generic_category());
+    write(std::back_inserter(out), std::system_error(ec, message).what());
+    return;
+  }
+  FMT_CATCH(...) {}
+  format_error_code(out, error_code, message);
+}
+
+FMT_FUNC void report_system_error(int error_code,
+                                  const char* message) noexcept {
+  report_error(format_system_error, error_code, message);
+}
+
+FMT_FUNC std::string vformat(string_view fmt, format_args args) {
+  // Don't optimize the "{}" case to keep the binary size small and because it
+  // can be better optimized in fmt::format anyway.
+  auto buffer = memory_buffer();
+  detail::vformat_to(buffer, fmt, args);
+  return to_string(buffer);
+}
+
+namespace detail {
+#ifdef _WIN32
+using dword = conditional_t<sizeof(long) == 4, unsigned long, unsigned>;
+extern "C" __declspec(dllimport) int __stdcall WriteConsoleW(  //
+    void*, const void*, dword, dword*, void*);
+
+FMT_FUNC bool write_console(std::FILE* f, string_view text) {
+  auto fd = _fileno(f);
+  if (_isatty(fd)) {
+    detail::utf8_to_utf16 u16(string_view(text.data(), text.size()));
+    auto written = detail::dword();
+    if (detail::WriteConsoleW(reinterpret_cast<void*>(_get_osfhandle(fd)),
+                              u16.c_str(), static_cast<uint32_t>(u16.size()),
+                              &written, nullptr)) {
+      return true;
+    }
+  }
+  // We return false if the file descriptor was not TTY, or it was but
+  // SetConsoleW failed which can happen if the output has been redirected to
+  // NUL. In both cases when we return false, we should attempt to do regular
+  // write via fwrite or std::ostream::write.
+  return false;
+}
+#endif
+
+FMT_FUNC void print(std::FILE* f, string_view text) {
+#ifdef _WIN32
+  if (write_console(f, text)) return;
+#endif
+  detail::fwrite_fully(text.data(), 1, text.size(), f);
+}
+}  // namespace detail
+
+FMT_FUNC void vprint(std::FILE* f, string_view format_str, format_args args) {
+  memory_buffer buffer;
+  detail::vformat_to(buffer, format_str, args);
+  detail::print(f, {buffer.data(), buffer.size()});
+}
+
+#ifdef _WIN32
+// Print assuming legacy (non-Unicode) encoding.
+FMT_FUNC void detail::vprint_mojibake(std::FILE* f, string_view format_str,
+                                      format_args args) {
+  memory_buffer buffer;
+  detail::vformat_to(buffer, format_str,
+                     basic_format_args<buffer_context<char>>(args));
+  fwrite_fully(buffer.data(), 1, buffer.size(), f);
+}
+#endif
+
+FMT_FUNC void vprint(string_view format_str, format_args args) {
+  vprint(stdout, format_str, args);
+}
+
+namespace detail {
+
+struct singleton {
+  unsigned char upper;
+  unsigned char lower_count;
+};
+
+inline auto is_printable(uint16_t x, const singleton* singletons,
+                         size_t singletons_size,
+                         const unsigned char* singleton_lowers,
+                         const unsigned char* normal, size_t normal_size)
+    -> bool {
+  auto upper = x >> 8;
+  auto lower_start = 0;
+  for (size_t i = 0; i < singletons_size; ++i) {
+    auto s = singletons[i];
+    auto lower_end = lower_start + s.lower_count;
+    if (upper < s.upper) break;
+    if (upper == s.upper) {
+      for (auto j = lower_start; j < lower_end; ++j) {
+        if (singleton_lowers[j] == (x & 0xff)) return false;
+      }
+    }
+    lower_start = lower_end;
+  }
+
+  auto xsigned = static_cast<int>(x);
+  auto current = true;
+  for (size_t i = 0; i < normal_size; ++i) {
+    auto v = static_cast<int>(normal[i]);
+    auto len = (v & 0x80) != 0 ? (v & 0x7f) << 8 | normal[++i] : v;
+    xsigned -= len;
+    if (xsigned < 0) break;
+    current = !current;
+  }
+  return current;
+}
+
+// This code is generated by support/printable.py.
+FMT_FUNC auto is_printable(uint32_t cp) -> bool {
+  static constexpr singleton singletons0[] = {
+      {0x00, 1},  {0x03, 5},  {0x05, 6},  {0x06, 3},  {0x07, 6},  {0x08, 8},
+      {0x09, 17}, {0x0a, 28}, {0x0b, 25}, {0x0c, 20}, {0x0d, 16}, {0x0e, 13},
+      {0x0f, 4},  {0x10, 3},  {0x12, 18}, {0x13, 9},  {0x16, 1},  {0x17, 5},
+      {0x18, 2},  {0x19, 3},  {0x1a, 7},  {0x1c, 2},  {0x1d, 1},  {0x1f, 22},
+      {0x20, 3},  {0x2b, 3},  {0x2c, 2},  {0x2d, 11}, {0x2e, 1},  {0x30, 3},
+      {0x31, 2},  {0x32, 1},  {0xa7, 2},  {0xa9, 2},  {0xaa, 4},  {0xab, 8},
+      {0xfa, 2},  {0xfb, 5},  {0xfd, 4},  {0xfe, 3},  {0xff, 9},
+  };
+  static constexpr unsigned char singletons0_lower[] = {
+      0xad, 0x78, 0x79, 0x8b, 0x8d, 0xa2, 0x30, 0x57, 0x58, 0x8b, 0x8c, 0x90,
+      0x1c, 0x1d, 0xdd, 0x0e, 0x0f, 0x4b, 0x4c, 0xfb, 0xfc, 0x2e, 0x2f, 0x3f,
+      0x5c, 0x5d, 0x5f, 0xb5, 0xe2, 0x84, 0x8d, 0x8e, 0x91, 0x92, 0xa9, 0xb1,
+      0xba, 0xbb, 0xc5, 0xc6, 0xc9, 0xca, 0xde, 0xe4, 0xe5, 0xff, 0x00, 0x04,
+      0x11, 0x12, 0x29, 0x31, 0x34, 0x37, 0x3a, 0x3b, 0x3d, 0x49, 0x4a, 0x5d,
+      0x84, 0x8e, 0x92, 0xa9, 0xb1, 0xb4, 0xba, 0xbb, 0xc6, 0xca, 0xce, 0xcf,
+      0xe4, 0xe5, 0x00, 0x04, 0x0d, 0x0e, 0x11, 0x12, 0x29, 0x31, 0x34, 0x3a,
+      0x3b, 0x45, 0x46, 0x49, 0x4a, 0x5e, 0x64, 0x65, 0x84, 0x91, 0x9b, 0x9d,
+      0xc9, 0xce, 0xcf, 0x0d, 0x11, 0x29, 0x45, 0x49, 0x57, 0x64, 0x65, 0x8d,
+      0x91, 0xa9, 0xb4, 0xba, 0xbb, 0xc5, 0xc9, 0xdf, 0xe4, 0xe5, 0xf0, 0x0d,
+      0x11, 0x45, 0x49, 0x64, 0x65, 0x80, 0x84, 0xb2, 0xbc, 0xbe, 0xbf, 0xd5,
+      0xd7, 0xf0, 0xf1, 0x83, 0x85, 0x8b, 0xa4, 0xa6, 0xbe, 0xbf, 0xc5, 0xc7,
+      0xce, 0xcf, 0xda, 0xdb, 0x48, 0x98, 0xbd, 0xcd, 0xc6, 0xce, 0xcf, 0x49,
+      0x4e, 0x4f, 0x57, 0x59, 0x5e, 0x5f, 0x89, 0x8e, 0x8f, 0xb1, 0xb6, 0xb7,
+      0xbf, 0xc1, 0xc6, 0xc7, 0xd7, 0x11, 0x16, 0x17, 0x5b, 0x5c, 0xf6, 0xf7,
+      0xfe, 0xff, 0x80, 0x0d, 0x6d, 0x71, 0xde, 0xdf, 0x0e, 0x0f, 0x1f, 0x6e,
+      0x6f, 0x1c, 0x1d, 0x5f, 0x7d, 0x7e, 0xae, 0xaf, 0xbb, 0xbc, 0xfa, 0x16,
+      0x17, 0x1e, 0x1f, 0x46, 0x47, 0x4e, 0x4f, 0x58, 0x5a, 0x5c, 0x5e, 0x7e,
+      0x7f, 0xb5, 0xc5, 0xd4, 0xd5, 0xdc, 0xf0, 0xf1, 0xf5, 0x72, 0x73, 0x8f,
+      0x74, 0x75, 0x96, 0x2f, 0x5f, 0x26, 0x2e, 0x2f, 0xa7, 0xaf, 0xb7, 0xbf,
+      0xc7, 0xcf, 0xd7, 0xdf, 0x9a, 0x40, 0x97, 0x98, 0x30, 0x8f, 0x1f, 0xc0,
+      0xc1, 0xce, 0xff, 0x4e, 0x4f, 0x5a, 0x5b, 0x07, 0x08, 0x0f, 0x10, 0x27,
+      0x2f, 0xee, 0xef, 0x6e, 0x6f, 0x37, 0x3d, 0x3f, 0x42, 0x45, 0x90, 0x91,
+      0xfe, 0xff, 0x53, 0x67, 0x75, 0xc8, 0xc9, 0xd0, 0xd1, 0xd8, 0xd9, 0xe7,
+      0xfe, 0xff,
+  };
+  static constexpr singleton singletons1[] = {
+      {0x00, 6},  {0x01, 1}, {0x03, 1},  {0x04, 2}, {0x08, 8},  {0x09, 2},
+      {0x0a, 5},  {0x0b, 2}, {0x0e, 4},  {0x10, 1}, {0x11, 2},  {0x12, 5},
+      {0x13, 17}, {0x14, 1}, {0x15, 2},  {0x17, 2}, {0x19, 13}, {0x1c, 5},
+      {0x1d, 8},  {0x24, 1}, {0x6a, 3},  {0x6b, 2}, {0xbc, 2},  {0xd1, 2},
+      {0xd4, 12}, {0xd5, 9}, {0xd6, 2},  {0xd7, 2}, {0xda, 1},  {0xe0, 5},
+      {0xe1, 2},  {0xe8, 2}, {0xee, 32}, {0xf0, 4}, {0xf8, 2},  {0xf9, 2},
+      {0xfa, 2},  {0xfb, 1},
+  };
+  static constexpr unsigned char singletons1_lower[] = {
+      0x0c, 0x27, 0x3b, 0x3e, 0x4e, 0x4f, 0x8f, 0x9e, 0x9e, 0x9f, 0x06, 0x07,
+      0x09, 0x36, 0x3d, 0x3e, 0x56, 0xf3, 0xd0, 0xd1, 0x04, 0x14, 0x18, 0x36,
+      0x37, 0x56, 0x57, 0x7f, 0xaa, 0xae, 0xaf, 0xbd, 0x35, 0xe0, 0x12, 0x87,
+      0x89, 0x8e, 0x9e, 0x04, 0x0d, 0x0e, 0x11, 0x12, 0x29, 0x31, 0x34, 0x3a,
+      0x45, 0x46, 0x49, 0x4a, 0x4e, 0x4f, 0x64, 0x65, 0x5c, 0xb6, 0xb7, 0x1b,
+      0x1c, 0x07, 0x08, 0x0a, 0x0b, 0x14, 0x17, 0x36, 0x39, 0x3a, 0xa8, 0xa9,
+      0xd8, 0xd9, 0x09, 0x37, 0x90, 0x91, 0xa8, 0x07, 0x0a, 0x3b, 0x3e, 0x66,
+      0x69, 0x8f, 0x92, 0x6f, 0x5f, 0xee, 0xef, 0x5a, 0x62, 0x9a, 0x9b, 0x27,
+      0x28, 0x55, 0x9d, 0xa0, 0xa1, 0xa3, 0xa4, 0xa7, 0xa8, 0xad, 0xba, 0xbc,
+      0xc4, 0x06, 0x0b, 0x0c, 0x15, 0x1d, 0x3a, 0x3f, 0x45, 0x51, 0xa6, 0xa7,
+      0xcc, 0xcd, 0xa0, 0x07, 0x19, 0x1a, 0x22, 0x25, 0x3e, 0x3f, 0xc5, 0xc6,
+      0x04, 0x20, 0x23, 0x25, 0x26, 0x28, 0x33, 0x38, 0x3a, 0x48, 0x4a, 0x4c,
+      0x50, 0x53, 0x55, 0x56, 0x58, 0x5a, 0x5c, 0x5e, 0x60, 0x63, 0x65, 0x66,
+      0x6b, 0x73, 0x78, 0x7d, 0x7f, 0x8a, 0xa4, 0xaa, 0xaf, 0xb0, 0xc0, 0xd0,
+      0xae, 0xaf, 0x79, 0xcc, 0x6e, 0x6f, 0x93,
+  };
+  static constexpr unsigned char normal0[] = {
+      0x00, 0x20, 0x5f, 0x22, 0x82, 0xdf, 0x04, 0x82, 0x44, 0x08, 0x1b, 0x04,
+      0x06, 0x11, 0x81, 0xac, 0x0e, 0x80, 0xab, 0x35, 0x28, 0x0b, 0x80, 0xe0,
+      0x03, 0x19, 0x08, 0x01, 0x04, 0x2f, 0x04, 0x34, 0x04, 0x07, 0x03, 0x01,
+      0x07, 0x06, 0x07, 0x11, 0x0a, 0x50, 0x0f, 0x12, 0x07, 0x55, 0x07, 0x03,
+      0x04, 0x1c, 0x0a, 0x09, 0x03, 0x08, 0x03, 0x07, 0x03, 0x02, 0x03, 0x03,
+      0x03, 0x0c, 0x04, 0x05, 0x03, 0x0b, 0x06, 0x01, 0x0e, 0x15, 0x05, 0x3a,
+      0x03, 0x11, 0x07, 0x06, 0x05, 0x10, 0x07, 0x57, 0x07, 0x02, 0x07, 0x15,
+      0x0d, 0x50, 0x04, 0x43, 0x03, 0x2d, 0x03, 0x01, 0x04, 0x11, 0x06, 0x0f,
+      0x0c, 0x3a, 0x04, 0x1d, 0x25, 0x5f, 0x20, 0x6d, 0x04, 0x6a, 0x25, 0x80,
+      0xc8, 0x05, 0x82, 0xb0, 0x03, 0x1a, 0x06, 0x82, 0xfd, 0x03, 0x59, 0x07,
+      0x15, 0x0b, 0x17, 0x09, 0x14, 0x0c, 0x14, 0x0c, 0x6a, 0x06, 0x0a, 0x06,
+      0x1a, 0x06, 0x59, 0x07, 0x2b, 0x05, 0x46, 0x0a, 0x2c, 0x04, 0x0c, 0x04,
+      0x01, 0x03, 0x31, 0x0b, 0x2c, 0x04, 0x1a, 0x06, 0x0b, 0x03, 0x80, 0xac,
+      0x06, 0x0a, 0x06, 0x21, 0x3f, 0x4c, 0x04, 0x2d, 0x03, 0x74, 0x08, 0x3c,
+      0x03, 0x0f, 0x03, 0x3c, 0x07, 0x38, 0x08, 0x2b, 0x05, 0x82, 0xff, 0x11,
+      0x18, 0x08, 0x2f, 0x11, 0x2d, 0x03, 0x20, 0x10, 0x21, 0x0f, 0x80, 0x8c,
+      0x04, 0x82, 0x97, 0x19, 0x0b, 0x15, 0x88, 0x94, 0x05, 0x2f, 0x05, 0x3b,
+      0x07, 0x02, 0x0e, 0x18, 0x09, 0x80, 0xb3, 0x2d, 0x74, 0x0c, 0x80, 0xd6,
+      0x1a, 0x0c, 0x05, 0x80, 0xff, 0x05, 0x80, 0xdf, 0x0c, 0xee, 0x0d, 0x03,
+      0x84, 0x8d, 0x03, 0x37, 0x09, 0x81, 0x5c, 0x14, 0x80, 0xb8, 0x08, 0x80,
+      0xcb, 0x2a, 0x38, 0x03, 0x0a, 0x06, 0x38, 0x08, 0x46, 0x08, 0x0c, 0x06,
+      0x74, 0x0b, 0x1e, 0x03, 0x5a, 0x04, 0x59, 0x09, 0x80, 0x83, 0x18, 0x1c,
+      0x0a, 0x16, 0x09, 0x4c, 0x04, 0x80, 0x8a, 0x06, 0xab, 0xa4, 0x0c, 0x17,
+      0x04, 0x31, 0xa1, 0x04, 0x81, 0xda, 0x26, 0x07, 0x0c, 0x05, 0x05, 0x80,
+      0xa5, 0x11, 0x81, 0x6d, 0x10, 0x78, 0x28, 0x2a, 0x06, 0x4c, 0x04, 0x80,
+      0x8d, 0x04, 0x80, 0xbe, 0x03, 0x1b, 0x03, 0x0f, 0x0d,
+  };
+  static constexpr unsigned char normal1[] = {
+      0x5e, 0x22, 0x7b, 0x05, 0x03, 0x04, 0x2d, 0x03, 0x66, 0x03, 0x01, 0x2f,
+      0x2e, 0x80, 0x82, 0x1d, 0x03, 0x31, 0x0f, 0x1c, 0x04, 0x24, 0x09, 0x1e,
+      0x05, 0x2b, 0x05, 0x44, 0x04, 0x0e, 0x2a, 0x80, 0xaa, 0x06, 0x24, 0x04,
+      0x24, 0x04, 0x28, 0x08, 0x34, 0x0b, 0x01, 0x80, 0x90, 0x81, 0x37, 0x09,
+      0x16, 0x0a, 0x08, 0x80, 0x98, 0x39, 0x03, 0x63, 0x08, 0x09, 0x30, 0x16,
+      0x05, 0x21, 0x03, 0x1b, 0x05, 0x01, 0x40, 0x38, 0x04, 0x4b, 0x05, 0x2f,
+      0x04, 0x0a, 0x07, 0x09, 0x07, 0x40, 0x20, 0x27, 0x04, 0x0c, 0x09, 0x36,
+      0x03, 0x3a, 0x05, 0x1a, 0x07, 0x04, 0x0c, 0x07, 0x50, 0x49, 0x37, 0x33,
+      0x0d, 0x33, 0x07, 0x2e, 0x08, 0x0a, 0x81, 0x26, 0x52, 0x4e, 0x28, 0x08,
+      0x2a, 0x56, 0x1c, 0x14, 0x17, 0x09, 0x4e, 0x04, 0x1e, 0x0f, 0x43, 0x0e,
+      0x19, 0x07, 0x0a, 0x06, 0x48, 0x08, 0x27, 0x09, 0x75, 0x0b, 0x3f, 0x41,
+      0x2a, 0x06, 0x3b, 0x05, 0x0a, 0x06, 0x51, 0x06, 0x01, 0x05, 0x10, 0x03,
+      0x05, 0x80, 0x8b, 0x62, 0x1e, 0x48, 0x08, 0x0a, 0x80, 0xa6, 0x5e, 0x22,
+      0x45, 0x0b, 0x0a, 0x06, 0x0d, 0x13, 0x39, 0x07, 0x0a, 0x36, 0x2c, 0x04,
+      0x10, 0x80, 0xc0, 0x3c, 0x64, 0x53, 0x0c, 0x48, 0x09, 0x0a, 0x46, 0x45,
+      0x1b, 0x48, 0x08, 0x53, 0x1d, 0x39, 0x81, 0x07, 0x46, 0x0a, 0x1d, 0x03,
+      0x47, 0x49, 0x37, 0x03, 0x0e, 0x08, 0x0a, 0x06, 0x39, 0x07, 0x0a, 0x81,
+      0x36, 0x19, 0x80, 0xb7, 0x01, 0x0f, 0x32, 0x0d, 0x83, 0x9b, 0x66, 0x75,
+      0x0b, 0x80, 0xc4, 0x8a, 0xbc, 0x84, 0x2f, 0x8f, 0xd1, 0x82, 0x47, 0xa1,
+      0xb9, 0x82, 0x39, 0x07, 0x2a, 0x04, 0x02, 0x60, 0x26, 0x0a, 0x46, 0x0a,
+      0x28, 0x05, 0x13, 0x82, 0xb0, 0x5b, 0x65, 0x4b, 0x04, 0x39, 0x07, 0x11,
+      0x40, 0x05, 0x0b, 0x02, 0x0e, 0x97, 0xf8, 0x08, 0x84, 0xd6, 0x2a, 0x09,
+      0xa2, 0xf7, 0x81, 0x1f, 0x31, 0x03, 0x11, 0x04, 0x08, 0x81, 0x8c, 0x89,
+      0x04, 0x6b, 0x05, 0x0d, 0x03, 0x09, 0x07, 0x10, 0x93, 0x60, 0x80, 0xf6,
+      0x0a, 0x73, 0x08, 0x6e, 0x17, 0x46, 0x80, 0x9a, 0x14, 0x0c, 0x57, 0x09,
+      0x19, 0x80, 0x87, 0x81, 0x47, 0x03, 0x85, 0x42, 0x0f, 0x15, 0x85, 0x50,
+      0x2b, 0x80, 0xd5, 0x2d, 0x03, 0x1a, 0x04, 0x02, 0x81, 0x70, 0x3a, 0x05,
+      0x01, 0x85, 0x00, 0x80, 0xd7, 0x29, 0x4c, 0x04, 0x0a, 0x04, 0x02, 0x83,
+      0x11, 0x44, 0x4c, 0x3d, 0x80, 0xc2, 0x3c, 0x06, 0x01, 0x04, 0x55, 0x05,
+      0x1b, 0x34, 0x02, 0x81, 0x0e, 0x2c, 0x04, 0x64, 0x0c, 0x56, 0x0a, 0x80,
+      0xae, 0x38, 0x1d, 0x0d, 0x2c, 0x04, 0x09, 0x07, 0x02, 0x0e, 0x06, 0x80,
+      0x9a, 0x83, 0xd8, 0x08, 0x0d, 0x03, 0x0d, 0x03, 0x74, 0x0c, 0x59, 0x07,
+      0x0c, 0x14, 0x0c, 0x04, 0x38, 0x08, 0x0a, 0x06, 0x28, 0x08, 0x22, 0x4e,
+      0x81, 0x54, 0x0c, 0x15, 0x03, 0x03, 0x05, 0x07, 0x09, 0x19, 0x07, 0x07,
+      0x09, 0x03, 0x0d, 0x07, 0x29, 0x80, 0xcb, 0x25, 0x0a, 0x84, 0x06,
+  };
+  auto lower = static_cast<uint16_t>(cp);
+  if (cp < 0x10000) {
+    return is_printable(lower, singletons0,
+                        sizeof(singletons0) / sizeof(*singletons0),
+                        singletons0_lower, normal0, sizeof(normal0));
+  }
+  if (cp < 0x20000) {
+    return is_printable(lower, singletons1,
+                        sizeof(singletons1) / sizeof(*singletons1),
+                        singletons1_lower, normal1, sizeof(normal1));
+  }
+  if (0x2a6de <= cp && cp < 0x2a700) return false;
+  if (0x2b735 <= cp && cp < 0x2b740) return false;
+  if (0x2b81e <= cp && cp < 0x2b820) return false;
+  if (0x2cea2 <= cp && cp < 0x2ceb0) return false;
+  if (0x2ebe1 <= cp && cp < 0x2f800) return false;
+  if (0x2fa1e <= cp && cp < 0x30000) return false;
+  if (0x3134b <= cp && cp < 0xe0100) return false;
+  if (0xe01f0 <= cp && cp < 0x110000) return false;
+  return cp < 0x110000;
+}
+
+}  // namespace detail
+
+FMT_END_NAMESPACE
+
+#endif  // FMT_FORMAT_INL_H_
diff --git a/libkram/fmt/format.cpp b/libkram/fmt/format.cpp
new file mode 100644
index 00000000..a84bea85
--- /dev/null
+++ b/libkram/fmt/format.cpp
@@ -0,0 +1,44 @@
+// Formatting library for C++
+//
+// Copyright (c) 2012 - 2016, Victor Zverovich
+// All rights reserved.
+//
+// For the license information refer to format.h.
+
+#include "format-inl.h"
+
+FMT_BEGIN_NAMESPACE
+namespace detail {
+
+template FMT_API auto dragonbox::to_decimal(float x) noexcept
+    -> dragonbox::decimal_fp<float>;
+template FMT_API auto dragonbox::to_decimal(double x) noexcept
+    -> dragonbox::decimal_fp<double>;
+
+#ifndef FMT_STATIC_THOUSANDS_SEPARATOR
+template FMT_API locale_ref::locale_ref(const std::locale& loc);
+template FMT_API auto locale_ref::get<std::locale>() const -> std::locale;
+#endif
+
+// Explicit instantiations for char.
+
+template FMT_API auto thousands_sep_impl(locale_ref)
+    -> thousands_sep_result<char>;
+template FMT_API auto decimal_point_impl(locale_ref) -> char;
+
+template FMT_API void buffer<char>::append(const char*, const char*);
+
+template FMT_API void vformat_to(buffer<char>&, string_view,
+                                 basic_format_args<FMT_BUFFER_CONTEXT(char)>,
+                                 locale_ref);
+
+// Explicit instantiations for wchar_t.
+
+template FMT_API auto thousands_sep_impl(locale_ref)
+    -> thousands_sep_result<wchar_t>;
+template FMT_API auto decimal_point_impl(locale_ref) -> wchar_t;
+
+template FMT_API void buffer<wchar_t>::append(const wchar_t*, const wchar_t*);
+
+}  // namespace detail
+FMT_END_NAMESPACE
diff --git a/libkram/fmt/format.h b/libkram/fmt/format.h
new file mode 100644
index 00000000..4b26f926
--- /dev/null
+++ b/libkram/fmt/format.h
@@ -0,0 +1,4310 @@
+/*
+  Formatting library for C++
+
+  Copyright (c) 2012 - present, Victor Zverovich
+
+  Permission is hereby granted, free of charge, to any person obtaining
+  a copy of this software and associated documentation files (the
+  "Software"), to deal in the Software without restriction, including
+  without limitation the rights to use, copy, modify, merge, publish,
+  distribute, sublicense, and/or sell copies of the Software, and to
+  permit persons to whom the Software is furnished to do so, subject to
+  the following conditions:
+
+  The above copyright notice and this permission notice shall be
+  included in all copies or substantial portions of the Software.
+
+  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+  --- Optional exception to the license ---
+
+  As an exception, if, as a result of your compiling your source code, portions
+  of this Software are embedded into a machine-executable object form of such
+  source code, you may redistribute such embedded portions in such object form
+  without including the above copyright and permission notices.
+ */
+
+#ifndef FMT_FORMAT_H_
+#define FMT_FORMAT_H_
+
+#include <cmath>             // std::signbit
+#include <cstdint>           // uint32_t
+#include <cstring>           // std::memcpy
+#include <initializer_list>  // std::initializer_list
+#include <limits>            // std::numeric_limits
+#include <memory>            // std::uninitialized_copy
+#include <stdexcept>         // std::runtime_error
+#include <system_error>      // std::system_error
+
+#ifdef __cpp_lib_bit_cast
+#  include <bit>  // std::bitcast
+#endif
+
+#include "core.h"
+
+#if FMT_GCC_VERSION
+#  define FMT_GCC_VISIBILITY_HIDDEN __attribute__((visibility("hidden")))
+#else
+#  define FMT_GCC_VISIBILITY_HIDDEN
+#endif
+
+#ifdef __NVCC__
+#  define FMT_CUDA_VERSION (__CUDACC_VER_MAJOR__ * 100 + __CUDACC_VER_MINOR__)
+#else
+#  define FMT_CUDA_VERSION 0
+#endif
+
+#ifdef __has_builtin
+#  define FMT_HAS_BUILTIN(x) __has_builtin(x)
+#else
+#  define FMT_HAS_BUILTIN(x) 0
+#endif
+
+#if FMT_GCC_VERSION || FMT_CLANG_VERSION
+#  define FMT_NOINLINE __attribute__((noinline))
+#else
+#  define FMT_NOINLINE
+#endif
+
+#if FMT_MSC_VERSION
+#  define FMT_MSC_DEFAULT = default
+#else
+#  define FMT_MSC_DEFAULT
+#endif
+
+#ifndef FMT_THROW
+#  if FMT_EXCEPTIONS
+#    if FMT_MSC_VERSION || defined(__NVCC__)
+FMT_BEGIN_NAMESPACE
+namespace detail {
+template <typename Exception> inline void do_throw(const Exception& x) {
+  // Silence unreachable code warnings in MSVC and NVCC because these
+  // are nearly impossible to fix in a generic code.
+  volatile bool b = true;
+  if (b) throw x;
+}
+}  // namespace detail
+FMT_END_NAMESPACE
+#      define FMT_THROW(x) detail::do_throw(x)
+#    else
+#      define FMT_THROW(x) throw x
+#    endif
+#  else
+#    define FMT_THROW(x)               \
+      do {                             \
+        FMT_ASSERT(false, (x).what()); \
+      } while (false)
+#  endif
+#endif
+
+#if FMT_EXCEPTIONS
+#  define FMT_TRY try
+#  define FMT_CATCH(x) catch (x)
+#else
+#  define FMT_TRY if (true)
+#  define FMT_CATCH(x) if (false)
+#endif
+
+#ifndef FMT_MAYBE_UNUSED
+#  if FMT_HAS_CPP17_ATTRIBUTE(maybe_unused)
+#    define FMT_MAYBE_UNUSED [[maybe_unused]]
+#  else
+#    define FMT_MAYBE_UNUSED
+#  endif
+#endif
+
+#ifndef FMT_USE_USER_DEFINED_LITERALS
+// EDG based compilers (Intel, NVIDIA, Elbrus, etc), GCC and MSVC support UDLs.
+#  if (FMT_HAS_FEATURE(cxx_user_literals) || FMT_GCC_VERSION >= 407 || \
+       FMT_MSC_VERSION >= 1900) &&                                     \
+      (!defined(__EDG_VERSION__) || __EDG_VERSION__ >= /* UDL feature */ 480)
+#    define FMT_USE_USER_DEFINED_LITERALS 1
+#  else
+#    define FMT_USE_USER_DEFINED_LITERALS 0
+#  endif
+#endif
+
+// Defining FMT_REDUCE_INT_INSTANTIATIONS to 1, will reduce the number of
+// integer formatter template instantiations to just one by only using the
+// largest integer type. This results in a reduction in binary size but will
+// cause a decrease in integer formatting performance.
+#if !defined(FMT_REDUCE_INT_INSTANTIATIONS)
+#  define FMT_REDUCE_INT_INSTANTIATIONS 0
+#endif
+
+// __builtin_clz is broken in clang with Microsoft CodeGen:
+// https://github.com/fmtlib/fmt/issues/519.
+#if !FMT_MSC_VERSION
+#  if FMT_HAS_BUILTIN(__builtin_clz) || FMT_GCC_VERSION || FMT_ICC_VERSION
+#    define FMT_BUILTIN_CLZ(n) __builtin_clz(n)
+#  endif
+#  if FMT_HAS_BUILTIN(__builtin_clzll) || FMT_GCC_VERSION || FMT_ICC_VERSION
+#    define FMT_BUILTIN_CLZLL(n) __builtin_clzll(n)
+#  endif
+#endif
+
+// __builtin_ctz is broken in Intel Compiler Classic on Windows:
+// https://github.com/fmtlib/fmt/issues/2510.
+#ifndef __ICL
+#  if FMT_HAS_BUILTIN(__builtin_ctz) || FMT_GCC_VERSION || FMT_ICC_VERSION || \
+      defined(__NVCOMPILER)
+#    define FMT_BUILTIN_CTZ(n) __builtin_ctz(n)
+#  endif
+#  if FMT_HAS_BUILTIN(__builtin_ctzll) || FMT_GCC_VERSION || \
+      FMT_ICC_VERSION || defined(__NVCOMPILER)
+#    define FMT_BUILTIN_CTZLL(n) __builtin_ctzll(n)
+#  endif
+#endif
+
+#if FMT_MSC_VERSION
+#  include <intrin.h>  // _BitScanReverse[64], _BitScanForward[64], _umul128
+#endif
+
+// Some compilers masquerade as both MSVC and GCC-likes or otherwise support
+// __builtin_clz and __builtin_clzll, so only define FMT_BUILTIN_CLZ using the
+// MSVC intrinsics if the clz and clzll builtins are not available.
+#if FMT_MSC_VERSION && !defined(FMT_BUILTIN_CLZLL) && \
+    !defined(FMT_BUILTIN_CTZLL)
+FMT_BEGIN_NAMESPACE
+namespace detail {
+// Avoid Clang with Microsoft CodeGen's -Wunknown-pragmas warning.
+#  if !defined(__clang__)
+#    pragma intrinsic(_BitScanForward)
+#    pragma intrinsic(_BitScanReverse)
+#    if defined(_WIN64)
+#      pragma intrinsic(_BitScanForward64)
+#      pragma intrinsic(_BitScanReverse64)
+#    endif
+#  endif
+
+inline auto clz(uint32_t x) -> int {
+  unsigned long r = 0;
+  _BitScanReverse(&r, x);
+  FMT_ASSERT(x != 0, "");
+  // Static analysis complains about using uninitialized data
+  // "r", but the only way that can happen is if "x" is 0,
+  // which the callers guarantee to not happen.
+  FMT_MSC_WARNING(suppress : 6102)
+  return 31 ^ static_cast<int>(r);
+}
+#  define FMT_BUILTIN_CLZ(n) detail::clz(n)
+
+inline auto clzll(uint64_t x) -> int {
+  unsigned long r = 0;
+#  ifdef _WIN64
+  _BitScanReverse64(&r, x);
+#  else
+  // Scan the high 32 bits.
+  if (_BitScanReverse(&r, static_cast<uint32_t>(x >> 32))) return 63 ^ (r + 32);
+  // Scan the low 32 bits.
+  _BitScanReverse(&r, static_cast<uint32_t>(x));
+#  endif
+  FMT_ASSERT(x != 0, "");
+  FMT_MSC_WARNING(suppress : 6102)  // Suppress a bogus static analysis warning.
+  return 63 ^ static_cast<int>(r);
+}
+#  define FMT_BUILTIN_CLZLL(n) detail::clzll(n)
+
+inline auto ctz(uint32_t x) -> int {
+  unsigned long r = 0;
+  _BitScanForward(&r, x);
+  FMT_ASSERT(x != 0, "");
+  FMT_MSC_WARNING(suppress : 6102)  // Suppress a bogus static analysis warning.
+  return static_cast<int>(r);
+}
+#  define FMT_BUILTIN_CTZ(n) detail::ctz(n)
+
+inline auto ctzll(uint64_t x) -> int {
+  unsigned long r = 0;
+  FMT_ASSERT(x != 0, "");
+  FMT_MSC_WARNING(suppress : 6102)  // Suppress a bogus static analysis warning.
+#  ifdef _WIN64
+  _BitScanForward64(&r, x);
+#  else
+  // Scan the low 32 bits.
+  if (_BitScanForward(&r, static_cast<uint32_t>(x))) return static_cast<int>(r);
+  // Scan the high 32 bits.
+  _BitScanForward(&r, static_cast<uint32_t>(x >> 32));
+  r += 32;
+#  endif
+  return static_cast<int>(r);
+}
+#  define FMT_BUILTIN_CTZLL(n) detail::ctzll(n)
+}  // namespace detail
+FMT_END_NAMESPACE
+#endif
+
+FMT_BEGIN_NAMESPACE
+namespace detail {
+
+FMT_CONSTEXPR inline void abort_fuzzing_if(bool condition) {
+  ignore_unused(condition);
+#ifdef FMT_FUZZ
+  if (condition) throw std::runtime_error("fuzzing limit reached");
+#endif
+}
+
+template <typename CharT, CharT... C> struct string_literal {
+  static constexpr CharT value[sizeof...(C)] = {C...};
+  constexpr operator basic_string_view<CharT>() const {
+    return {value, sizeof...(C)};
+  }
+};
+
+#if FMT_CPLUSPLUS < 201703L
+template <typename CharT, CharT... C>
+constexpr CharT string_literal<CharT, C...>::value[sizeof...(C)];
+#endif
+
+template <typename Streambuf> class formatbuf : public Streambuf {
+ private:
+  using char_type = typename Streambuf::char_type;
+  using streamsize = decltype(std::declval<Streambuf>().sputn(nullptr, 0));
+  using int_type = typename Streambuf::int_type;
+  using traits_type = typename Streambuf::traits_type;
+
+  buffer<char_type>& buffer_;
+
+ public:
+  explicit formatbuf(buffer<char_type>& buf) : buffer_(buf) {}
+
+ protected:
+  // The put area is always empty. This makes the implementation simpler and has
+  // the advantage that the streambuf and the buffer are always in sync and
+  // sputc never writes into uninitialized memory. A disadvantage is that each
+  // call to sputc always results in a (virtual) call to overflow. There is no
+  // disadvantage here for sputn since this always results in a call to xsputn.
+
+  auto overflow(int_type ch) -> int_type override {
+    if (!traits_type::eq_int_type(ch, traits_type::eof()))
+      buffer_.push_back(static_cast<char_type>(ch));
+    return ch;
+  }
+
+  auto xsputn(const char_type* s, streamsize count) -> streamsize override {
+    buffer_.append(s, s + count);
+    return count;
+  }
+};
+
+// Implementation of std::bit_cast for pre-C++20.
+template <typename To, typename From, FMT_ENABLE_IF(sizeof(To) == sizeof(From))>
+FMT_CONSTEXPR20 auto bit_cast(const From& from) -> To {
+#ifdef __cpp_lib_bit_cast
+  if (is_constant_evaluated()) return std::bit_cast<To>(from);
+#endif
+  auto to = To();
+  // The cast suppresses a bogus -Wclass-memaccess on GCC.
+  std::memcpy(static_cast<void*>(&to), &from, sizeof(to));
+  return to;
+}
+
+inline auto is_big_endian() -> bool {
+#ifdef _WIN32
+  return false;
+#elif defined(__BIG_ENDIAN__)
+  return true;
+#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__)
+  return __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__;
+#else
+  struct bytes {
+    char data[sizeof(int)];
+  };
+  return bit_cast<bytes>(1).data[0] == 0;
+#endif
+}
+
+class uint128_fallback {
+ private:
+  uint64_t lo_, hi_;
+
+  friend uint128_fallback umul128(uint64_t x, uint64_t y) noexcept;
+
+ public:
+  constexpr uint128_fallback(uint64_t hi, uint64_t lo) : lo_(lo), hi_(hi) {}
+  constexpr uint128_fallback(uint64_t value = 0) : lo_(value), hi_(0) {}
+
+  constexpr uint64_t high() const noexcept { return hi_; }
+  constexpr uint64_t low() const noexcept { return lo_; }
+
+  template <typename T, FMT_ENABLE_IF(std::is_integral<T>::value)>
+  constexpr explicit operator T() const {
+    return static_cast<T>(lo_);
+  }
+
+  friend constexpr auto operator==(const uint128_fallback& lhs,
+                                   const uint128_fallback& rhs) -> bool {
+    return lhs.hi_ == rhs.hi_ && lhs.lo_ == rhs.lo_;
+  }
+  friend constexpr auto operator!=(const uint128_fallback& lhs,
+                                   const uint128_fallback& rhs) -> bool {
+    return !(lhs == rhs);
+  }
+  friend constexpr auto operator>(const uint128_fallback& lhs,
+                                  const uint128_fallback& rhs) -> bool {
+    return lhs.hi_ != rhs.hi_ ? lhs.hi_ > rhs.hi_ : lhs.lo_ > rhs.lo_;
+  }
+  friend constexpr auto operator|(const uint128_fallback& lhs,
+                                  const uint128_fallback& rhs)
+      -> uint128_fallback {
+    return {lhs.hi_ | rhs.hi_, lhs.lo_ | rhs.lo_};
+  }
+  friend constexpr auto operator&(const uint128_fallback& lhs,
+                                  const uint128_fallback& rhs)
+      -> uint128_fallback {
+    return {lhs.hi_ & rhs.hi_, lhs.lo_ & rhs.lo_};
+  }
+  friend auto operator+(const uint128_fallback& lhs,
+                        const uint128_fallback& rhs) -> uint128_fallback {
+    auto result = uint128_fallback(lhs);
+    result += rhs;
+    return result;
+  }
+  friend auto operator*(const uint128_fallback& lhs, uint32_t rhs)
+      -> uint128_fallback {
+    FMT_ASSERT(lhs.hi_ == 0, "");
+    uint64_t hi = (lhs.lo_ >> 32) * rhs;
+    uint64_t lo = (lhs.lo_ & ~uint32_t()) * rhs;
+    uint64_t new_lo = (hi << 32) + lo;
+    return {(hi >> 32) + (new_lo < lo ? 1 : 0), new_lo};
+  }
+  friend auto operator-(const uint128_fallback& lhs, uint64_t rhs)
+      -> uint128_fallback {
+    return {lhs.hi_ - (lhs.lo_ < rhs ? 1 : 0), lhs.lo_ - rhs};
+  }
+  FMT_CONSTEXPR auto operator>>(int shift) const -> uint128_fallback {
+    if (shift == 64) return {0, hi_};
+    if (shift > 64) return uint128_fallback(0, hi_) >> (shift - 64);
+    return {hi_ >> shift, (hi_ << (64 - shift)) | (lo_ >> shift)};
+  }
+  FMT_CONSTEXPR auto operator<<(int shift) const -> uint128_fallback {
+    if (shift == 64) return {lo_, 0};
+    if (shift > 64) return uint128_fallback(lo_, 0) << (shift - 64);
+    return {hi_ << shift | (lo_ >> (64 - shift)), (lo_ << shift)};
+  }
+  FMT_CONSTEXPR auto operator>>=(int shift) -> uint128_fallback& {
+    return *this = *this >> shift;
+  }
+  FMT_CONSTEXPR void operator+=(uint128_fallback n) {
+    uint64_t new_lo = lo_ + n.lo_;
+    uint64_t new_hi = hi_ + n.hi_ + (new_lo < lo_ ? 1 : 0);
+    FMT_ASSERT(new_hi >= hi_, "");
+    lo_ = new_lo;
+    hi_ = new_hi;
+  }
+
+  FMT_CONSTEXPR20 uint128_fallback& operator+=(uint64_t n) noexcept {
+    if (is_constant_evaluated()) {
+      lo_ += n;
+      hi_ += (lo_ < n ? 1 : 0);
+      return *this;
+    }
+#if FMT_HAS_BUILTIN(__builtin_addcll) && !defined(__ibmxl__)
+    unsigned long long carry;
+    lo_ = __builtin_addcll(lo_, n, 0, &carry);
+    hi_ += carry;
+#elif FMT_HAS_BUILTIN(__builtin_ia32_addcarryx_u64) && !defined(__ibmxl__)
+    unsigned long long result;
+    auto carry = __builtin_ia32_addcarryx_u64(0, lo_, n, &result);
+    lo_ = result;
+    hi_ += carry;
+#elif defined(_MSC_VER) && defined(_M_X64)
+    auto carry = _addcarry_u64(0, lo_, n, &lo_);
+    _addcarry_u64(carry, hi_, 0, &hi_);
+#else
+    lo_ += n;
+    hi_ += (lo_ < n ? 1 : 0);
+#endif
+    return *this;
+  }
+};
+
+using uint128_t = conditional_t<FMT_USE_INT128, uint128_opt, uint128_fallback>;
+
+#ifdef UINTPTR_MAX
+using uintptr_t = ::uintptr_t;
+#else
+using uintptr_t = uint128_t;
+#endif
+
+// Returns the largest possible value for type T. Same as
+// std::numeric_limits<T>::max() but shorter and not affected by the max macro.
+template <typename T> constexpr auto max_value() -> T {
+  return (std::numeric_limits<T>::max)();
+}
+template <typename T> constexpr auto num_bits() -> int {
+  return std::numeric_limits<T>::digits;
+}
+// std::numeric_limits<T>::digits may return 0 for 128-bit ints.
+template <> constexpr auto num_bits<int128_opt>() -> int { return 128; }
+template <> constexpr auto num_bits<uint128_t>() -> int { return 128; }
+
+// A heterogeneous bit_cast used for converting 96-bit long double to uint128_t
+// and 128-bit pointers to uint128_fallback.
+template <typename To, typename From, FMT_ENABLE_IF(sizeof(To) > sizeof(From))>
+inline auto bit_cast(const From& from) -> To {
+  constexpr auto size = static_cast<int>(sizeof(From) / sizeof(unsigned));
+  struct data_t {
+    unsigned value[static_cast<unsigned>(size)];
+  } data = bit_cast<data_t>(from);
+  auto result = To();
+  if (const_check(is_big_endian())) {
+    for (int i = 0; i < size; ++i)
+      result = (result << num_bits<unsigned>()) | data.value[i];
+  } else {
+    for (int i = size - 1; i >= 0; --i)
+      result = (result << num_bits<unsigned>()) | data.value[i];
+  }
+  return result;
+}
+
+FMT_INLINE void assume(bool condition) {
+  (void)condition;
+#if FMT_HAS_BUILTIN(__builtin_assume) && !FMT_ICC_VERSION
+  __builtin_assume(condition);
+#endif
+}
+
+// An approximation of iterator_t for pre-C++20 systems.
+template <typename T>
+using iterator_t = decltype(std::begin(std::declval<T&>()));
+template <typename T> using sentinel_t = decltype(std::end(std::declval<T&>()));
+
+// A workaround for std::string not having mutable data() until C++17.
+template <typename Char>
+inline auto get_data(std::basic_string<Char>& s) -> Char* {
+  return &s[0];
+}
+template <typename Container>
+inline auto get_data(Container& c) -> typename Container::value_type* {
+  return c.data();
+}
+
+#if defined(_SECURE_SCL) && _SECURE_SCL
+// Make a checked iterator to avoid MSVC warnings.
+template <typename T> using checked_ptr = stdext::checked_array_iterator<T*>;
+template <typename T>
+constexpr auto make_checked(T* p, size_t size) -> checked_ptr<T> {
+  return {p, size};
+}
+#else
+template <typename T> using checked_ptr = T*;
+template <typename T> constexpr auto make_checked(T* p, size_t) -> T* {
+  return p;
+}
+#endif
+
+// Attempts to reserve space for n extra characters in the output range.
+// Returns a pointer to the reserved range or a reference to it.
+template <typename Container, FMT_ENABLE_IF(is_contiguous<Container>::value)>
+#if FMT_CLANG_VERSION >= 307 && !FMT_ICC_VERSION
+__attribute__((no_sanitize("undefined")))
+#endif
+inline auto
+reserve(std::back_insert_iterator<Container> it, size_t n)
+    -> checked_ptr<typename Container::value_type> {
+  Container& c = get_container(it);
+  size_t size = c.size();
+  c.resize(size + n);
+  return make_checked(get_data(c) + size, n);
+}
+
+template <typename T>
+inline auto reserve(buffer_appender<T> it, size_t n) -> buffer_appender<T> {
+  buffer<T>& buf = get_container(it);
+  buf.try_reserve(buf.size() + n);
+  return it;
+}
+
+template <typename Iterator>
+constexpr auto reserve(Iterator& it, size_t) -> Iterator& {
+  return it;
+}
+
+template <typename OutputIt>
+using reserve_iterator =
+    remove_reference_t<decltype(reserve(std::declval<OutputIt&>(), 0))>;
+
+template <typename T, typename OutputIt>
+constexpr auto to_pointer(OutputIt, size_t) -> T* {
+  return nullptr;
+}
+template <typename T> auto to_pointer(buffer_appender<T> it, size_t n) -> T* {
+  buffer<T>& buf = get_container(it);
+  auto size = buf.size();
+  if (buf.capacity() < size + n) return nullptr;
+  buf.try_resize(size + n);
+  return buf.data() + size;
+}
+
+template <typename Container, FMT_ENABLE_IF(is_contiguous<Container>::value)>
+inline auto base_iterator(std::back_insert_iterator<Container>& it,
+                          checked_ptr<typename Container::value_type>)
+    -> std::back_insert_iterator<Container> {
+  return it;
+}
+
+template <typename Iterator>
+constexpr auto base_iterator(Iterator, Iterator it) -> Iterator {
+  return it;
+}
+
+// <algorithm> is spectacularly slow to compile in C++20 so use a simple fill_n
+// instead (#1998).
+template <typename OutputIt, typename Size, typename T>
+FMT_CONSTEXPR auto fill_n(OutputIt out, Size count, const T& value)
+    -> OutputIt {
+  for (Size i = 0; i < count; ++i) *out++ = value;
+  return out;
+}
+template <typename T, typename Size>
+FMT_CONSTEXPR20 auto fill_n(T* out, Size count, char value) -> T* {
+  if (is_constant_evaluated()) {
+    return fill_n<T*, Size, T>(out, count, value);
+  }
+  std::memset(out, value, to_unsigned(count));
+  return out + count;
+}
+
+#ifdef __cpp_char8_t
+using char8_type = char8_t;
+#else
+enum char8_type : unsigned char {};
+#endif
+
+template <typename OutChar, typename InputIt, typename OutputIt>
+FMT_CONSTEXPR FMT_NOINLINE auto copy_str_noinline(InputIt begin, InputIt end,
+                                                  OutputIt out) -> OutputIt {
+  return copy_str<OutChar>(begin, end, out);
+}
+
+// A public domain branchless UTF-8 decoder by Christopher Wellons:
+// https://github.com/skeeto/branchless-utf8
+/* Decode the next character, c, from s, reporting errors in e.
+ *
+ * Since this is a branchless decoder, four bytes will be read from the
+ * buffer regardless of the actual length of the next character. This
+ * means the buffer _must_ have at least three bytes of zero padding
+ * following the end of the data stream.
+ *
+ * Errors are reported in e, which will be non-zero if the parsed
+ * character was somehow invalid: invalid byte sequence, non-canonical
+ * encoding, or a surrogate half.
+ *
+ * The function returns a pointer to the next character. When an error
+ * occurs, this pointer will be a guess that depends on the particular
+ * error, but it will always advance at least one byte.
+ */
+FMT_CONSTEXPR inline auto utf8_decode(const char* s, uint32_t* c, int* e)
+    -> const char* {
+  constexpr const int masks[] = {0x00, 0x7f, 0x1f, 0x0f, 0x07};
+  constexpr const uint32_t mins[] = {4194304, 0, 128, 2048, 65536};
+  constexpr const int shiftc[] = {0, 18, 12, 6, 0};
+  constexpr const int shifte[] = {0, 6, 4, 2, 0};
+
+  int len = code_point_length_impl(*s);
+  // Compute the pointer to the next character early so that the next
+  // iteration can start working on the next character. Neither Clang
+  // nor GCC figure out this reordering on their own.
+  const char* next = s + len + !len;
+
+  using uchar = unsigned char;
+
+  // Assume a four-byte character and load four bytes. Unused bits are
+  // shifted out.
+  *c = uint32_t(uchar(s[0]) & masks[len]) << 18;
+  *c |= uint32_t(uchar(s[1]) & 0x3f) << 12;
+  *c |= uint32_t(uchar(s[2]) & 0x3f) << 6;
+  *c |= uint32_t(uchar(s[3]) & 0x3f) << 0;
+  *c >>= shiftc[len];
+
+  // Accumulate the various error conditions.
+  *e = (*c < mins[len]) << 6;       // non-canonical encoding
+  *e |= ((*c >> 11) == 0x1b) << 7;  // surrogate half?
+  *e |= (*c > 0x10FFFF) << 8;       // out of range?
+  *e |= (uchar(s[1]) & 0xc0) >> 2;
+  *e |= (uchar(s[2]) & 0xc0) >> 4;
+  *e |= uchar(s[3]) >> 6;
+  *e ^= 0x2a;  // top two bits of each tail byte correct?
+  *e >>= shifte[len];
+
+  return next;
+}
+
+constexpr uint32_t invalid_code_point = ~uint32_t();
+
+// Invokes f(cp, sv) for every code point cp in s with sv being the string view
+// corresponding to the code point. cp is invalid_code_point on error.
+template <typename F>
+FMT_CONSTEXPR void for_each_codepoint(string_view s, F f) {
+  auto decode = [f](const char* buf_ptr, const char* ptr) {
+    auto cp = uint32_t();
+    auto error = 0;
+    auto end = utf8_decode(buf_ptr, &cp, &error);
+    bool result = f(error ? invalid_code_point : cp,
+                    string_view(ptr, error ? 1 : to_unsigned(end - buf_ptr)));
+    return result ? (error ? buf_ptr + 1 : end) : nullptr;
+  };
+  auto p = s.data();
+  const size_t block_size = 4;  // utf8_decode always reads blocks of 4 chars.
+  if (s.size() >= block_size) {
+    for (auto end = p + s.size() - block_size + 1; p < end;) {
+      p = decode(p, p);
+      if (!p) return;
+    }
+  }
+  if (auto num_chars_left = s.data() + s.size() - p) {
+    char buf[2 * block_size - 1] = {};
+    copy_str<char>(p, p + num_chars_left, buf);
+    const char* buf_ptr = buf;
+    do {
+      auto end = decode(buf_ptr, p);
+      if (!end) return;
+      p += end - buf_ptr;
+      buf_ptr = end;
+    } while (buf_ptr - buf < num_chars_left);
+  }
+}
+
+template <typename Char>
+inline auto compute_width(basic_string_view<Char> s) -> size_t {
+  return s.size();
+}
+
+// Computes approximate display width of a UTF-8 string.
+FMT_CONSTEXPR inline size_t compute_width(string_view s) {
+  size_t num_code_points = 0;
+  // It is not a lambda for compatibility with C++14.
+  struct count_code_points {
+    size_t* count;
+    FMT_CONSTEXPR auto operator()(uint32_t cp, string_view) const -> bool {
+      *count += detail::to_unsigned(
+          1 +
+          (cp >= 0x1100 &&
+           (cp <= 0x115f ||  // Hangul Jamo init. consonants
+            cp == 0x2329 ||  // LEFT-POINTING ANGLE BRACKET
+            cp == 0x232a ||  // RIGHT-POINTING ANGLE BRACKET
+            // CJK ... Yi except IDEOGRAPHIC HALF FILL SPACE:
+            (cp >= 0x2e80 && cp <= 0xa4cf && cp != 0x303f) ||
+            (cp >= 0xac00 && cp <= 0xd7a3) ||    // Hangul Syllables
+            (cp >= 0xf900 && cp <= 0xfaff) ||    // CJK Compatibility Ideographs
+            (cp >= 0xfe10 && cp <= 0xfe19) ||    // Vertical Forms
+            (cp >= 0xfe30 && cp <= 0xfe6f) ||    // CJK Compatibility Forms
+            (cp >= 0xff00 && cp <= 0xff60) ||    // Fullwidth Forms
+            (cp >= 0xffe0 && cp <= 0xffe6) ||    // Fullwidth Forms
+            (cp >= 0x20000 && cp <= 0x2fffd) ||  // CJK
+            (cp >= 0x30000 && cp <= 0x3fffd) ||
+            // Miscellaneous Symbols and Pictographs + Emoticons:
+            (cp >= 0x1f300 && cp <= 0x1f64f) ||
+            // Supplemental Symbols and Pictographs:
+            (cp >= 0x1f900 && cp <= 0x1f9ff))));
+      return true;
+    }
+  };
+  // We could avoid branches by using utf8_decode directly.
+  for_each_codepoint(s, count_code_points{&num_code_points});
+  return num_code_points;
+}
+
+inline auto compute_width(basic_string_view<char8_type> s) -> size_t {
+  return compute_width(
+      string_view(reinterpret_cast<const char*>(s.data()), s.size()));
+}
+
+template <typename Char>
+inline auto code_point_index(basic_string_view<Char> s, size_t n) -> size_t {
+  size_t size = s.size();
+  return n < size ? n : size;
+}
+
+// Calculates the index of the nth code point in a UTF-8 string.
+inline auto code_point_index(string_view s, size_t n) -> size_t {
+  const char* data = s.data();
+  size_t num_code_points = 0;
+  for (size_t i = 0, size = s.size(); i != size; ++i) {
+    if ((data[i] & 0xc0) != 0x80 && ++num_code_points > n) return i;
+  }
+  return s.size();
+}
+
+inline auto code_point_index(basic_string_view<char8_type> s, size_t n)
+    -> size_t {
+  return code_point_index(
+      string_view(reinterpret_cast<const char*>(s.data()), s.size()), n);
+}
+
+template <typename T> struct is_integral : std::is_integral<T> {};
+template <> struct is_integral<int128_opt> : std::true_type {};
+template <> struct is_integral<uint128_t> : std::true_type {};
+
+template <typename T>
+using is_signed =
+    std::integral_constant<bool, std::numeric_limits<T>::is_signed ||
+                                     std::is_same<T, int128_opt>::value>;
+
+template <typename T>
+using is_integer =
+    bool_constant<is_integral<T>::value && !std::is_same<T, bool>::value &&
+                  !std::is_same<T, char>::value &&
+                  !std::is_same<T, wchar_t>::value>;
+
+#ifndef FMT_USE_FLOAT128
+#  ifdef __SIZEOF_FLOAT128__
+#    define FMT_USE_FLOAT128 1
+#  else
+#    define FMT_USE_FLOAT128 0
+#  endif
+#endif
+#if FMT_USE_FLOAT128
+using float128 = __float128;
+#else
+using float128 = void;
+#endif
+template <typename T> using is_float128 = std::is_same<T, float128>;
+
+template <typename T>
+using is_floating_point =
+    bool_constant<std::is_floating_point<T>::value || is_float128<T>::value>;
+
+template <typename T, bool = std::is_floating_point<T>::value>
+struct is_fast_float : bool_constant<std::numeric_limits<T>::is_iec559 &&
+                                     sizeof(T) <= sizeof(double)> {};
+template <typename T> struct is_fast_float<T, false> : std::false_type {};
+
+template <typename T>
+using is_double_double = bool_constant<std::numeric_limits<T>::digits == 106>;
+
+#ifndef FMT_USE_FULL_CACHE_DRAGONBOX
+#  define FMT_USE_FULL_CACHE_DRAGONBOX 0
+#endif
+
+template <typename T>
+template <typename U>
+void buffer<T>::append(const U* begin, const U* end) {
+  while (begin != end) {
+    auto count = to_unsigned(end - begin);
+    try_reserve(size_ + count);
+    auto free_cap = capacity_ - size_;
+    if (free_cap < count) count = free_cap;
+    std::uninitialized_copy_n(begin, count, make_checked(ptr_ + size_, count));
+    size_ += count;
+    begin += count;
+  }
+}
+
+template <typename T, typename Enable = void>
+struct is_locale : std::false_type {};
+template <typename T>
+struct is_locale<T, void_t<decltype(T::classic())>> : std::true_type {};
+}  // namespace detail
+
+FMT_MODULE_EXPORT_BEGIN
+
+// The number of characters to store in the basic_memory_buffer object itself
+// to avoid dynamic memory allocation.
+enum { inline_buffer_size = 500 };
+
+/**
+  \rst
+  A dynamically growing memory buffer for trivially copyable/constructible types
+  with the first ``SIZE`` elements stored in the object itself.
+
+  You can use the ``memory_buffer`` type alias for ``char`` instead.
+
+  **Example**::
+
+     auto out = fmt::memory_buffer();
+     format_to(std::back_inserter(out), "The answer is {}.", 42);
+
+  This will append the following output to the ``out`` object:
+
+  .. code-block:: none
+
+     The answer is 42.
+
+  The output can be converted to an ``std::string`` with ``to_string(out)``.
+  \endrst
+ */
+template <typename T, size_t SIZE = inline_buffer_size,
+          typename Allocator = std::allocator<T>>
+class basic_memory_buffer final : public detail::buffer<T> {
+ private:
+  T store_[SIZE];
+
+  // Don't inherit from Allocator avoid generating type_info for it.
+  Allocator alloc_;
+
+  // Deallocate memory allocated by the buffer.
+  FMT_CONSTEXPR20 void deallocate() {
+    T* data = this->data();
+    if (data != store_) alloc_.deallocate(data, this->capacity());
+  }
+
+ protected:
+  FMT_CONSTEXPR20 void grow(size_t size) override;
+
+ public:
+  using value_type = T;
+  using const_reference = const T&;
+
+  FMT_CONSTEXPR20 explicit basic_memory_buffer(
+      const Allocator& alloc = Allocator())
+      : alloc_(alloc) {
+    this->set(store_, SIZE);
+    if (detail::is_constant_evaluated()) detail::fill_n(store_, SIZE, T());
+  }
+  FMT_CONSTEXPR20 ~basic_memory_buffer() { deallocate(); }
+
+ private:
+  // Move data from other to this buffer.
+  FMT_CONSTEXPR20 void move(basic_memory_buffer& other) {
+    alloc_ = std::move(other.alloc_);
+    T* data = other.data();
+    size_t size = other.size(), capacity = other.capacity();
+    if (data == other.store_) {
+      this->set(store_, capacity);
+      detail::copy_str<T>(other.store_, other.store_ + size,
+                          detail::make_checked(store_, capacity));
+    } else {
+      this->set(data, capacity);
+      // Set pointer to the inline array so that delete is not called
+      // when deallocating.
+      other.set(other.store_, 0);
+      other.clear();
+    }
+    this->resize(size);
+  }
+
+ public:
+  /**
+    \rst
+    Constructs a :class:`fmt::basic_memory_buffer` object moving the content
+    of the other object to it.
+    \endrst
+   */
+  FMT_CONSTEXPR20 basic_memory_buffer(basic_memory_buffer&& other) noexcept {
+    move(other);
+  }
+
+  /**
+    \rst
+    Moves the content of the other ``basic_memory_buffer`` object to this one.
+    \endrst
+   */
+  auto operator=(basic_memory_buffer&& other) noexcept -> basic_memory_buffer& {
+    FMT_ASSERT(this != &other, "");
+    deallocate();
+    move(other);
+    return *this;
+  }
+
+  // Returns a copy of the allocator associated with this buffer.
+  auto get_allocator() const -> Allocator { return alloc_; }
+
+  /**
+    Resizes the buffer to contain *count* elements. If T is a POD type new
+    elements may not be initialized.
+   */
+  FMT_CONSTEXPR20 void resize(size_t count) { this->try_resize(count); }
+
+  /** Increases the buffer capacity to *new_capacity*. */
+  void reserve(size_t new_capacity) { this->try_reserve(new_capacity); }
+
+  // Directly append data into the buffer
+  using detail::buffer<T>::append;
+  template <typename ContiguousRange>
+  void append(const ContiguousRange& range) {
+    append(range.data(), range.data() + range.size());
+  }
+};
+
+template <typename T, size_t SIZE, typename Allocator>
+FMT_CONSTEXPR20 void basic_memory_buffer<T, SIZE, Allocator>::grow(
+    size_t size) {
+  detail::abort_fuzzing_if(size > 5000);
+  const size_t max_size = std::allocator_traits<Allocator>::max_size(alloc_);
+  size_t old_capacity = this->capacity();
+  size_t new_capacity = old_capacity + old_capacity / 2;
+  if (size > new_capacity)
+    new_capacity = size;
+  else if (new_capacity > max_size)
+    new_capacity = size > max_size ? size : max_size;
+  T* old_data = this->data();
+  T* new_data =
+      std::allocator_traits<Allocator>::allocate(alloc_, new_capacity);
+  // The following code doesn't throw, so the raw pointer above doesn't leak.
+  std::uninitialized_copy(old_data, old_data + this->size(),
+                          detail::make_checked(new_data, new_capacity));
+  this->set(new_data, new_capacity);
+  // deallocate must not throw according to the standard, but even if it does,
+  // the buffer already uses the new storage and will deallocate it in
+  // destructor.
+  if (old_data != store_) alloc_.deallocate(old_data, old_capacity);
+}
+
+using memory_buffer = basic_memory_buffer<char>;
+
+template <typename T, size_t SIZE, typename Allocator>
+struct is_contiguous<basic_memory_buffer<T, SIZE, Allocator>> : std::true_type {
+};
+
+namespace detail {
+#ifdef _WIN32
+FMT_API bool write_console(std::FILE* f, string_view text);
+#endif
+FMT_API void print(std::FILE*, string_view);
+}  // namespace detail
+
+/** An error reported from a formatting function. */
+FMT_CLASS_API
+class FMT_API format_error : public std::runtime_error {
+ public:
+  using std::runtime_error::runtime_error;
+  format_error(const format_error&) = default;
+  format_error& operator=(const format_error&) = default;
+  format_error(format_error&&) = default;
+  format_error& operator=(format_error&&) = default;
+  ~format_error() noexcept override FMT_MSC_DEFAULT;
+};
+
+namespace detail_exported {
+#if FMT_USE_NONTYPE_TEMPLATE_ARGS
+template <typename Char, size_t N> struct fixed_string {
+  constexpr fixed_string(const Char (&str)[N]) {
+    detail::copy_str<Char, const Char*, Char*>(static_cast<const Char*>(str),
+                                               str + N, data);
+  }
+  Char data[N] = {};
+};
+#endif
+
+// Converts a compile-time string to basic_string_view.
+template <typename Char, size_t N>
+constexpr auto compile_string_to_view(const Char (&s)[N])
+    -> basic_string_view<Char> {
+  // Remove trailing NUL character if needed. Won't be present if this is used
+  // with a raw character array (i.e. not defined as a string).
+  return {s, N - (std::char_traits<Char>::to_int_type(s[N - 1]) == 0 ? 1 : 0)};
+}
+template <typename Char>
+constexpr auto compile_string_to_view(detail::std_string_view<Char> s)
+    -> basic_string_view<Char> {
+  return {s.data(), s.size()};
+}
+}  // namespace detail_exported
+
+class loc_value {
+ private:
+  basic_format_arg<format_context> value_;
+
+ public:
+  template <typename T, FMT_ENABLE_IF(!detail::is_float128<T>::value)>
+  loc_value(T value) : value_(detail::make_arg<format_context>(value)) {}
+
+  template <typename T, FMT_ENABLE_IF(detail::is_float128<T>::value)>
+  loc_value(T) {}
+
+  template <typename Visitor> auto visit(Visitor&& vis) -> decltype(vis(0)) {
+    return visit_format_arg(vis, value_);
+  }
+};
+
+// A locale facet that formats values in UTF-8.
+// It is parameterized on the locale to avoid the heavy <locale> include.
+template <typename Locale> class format_facet : public Locale::facet {
+ private:
+  std::string separator_;
+  std::string grouping_;
+  std::string decimal_point_;
+
+ protected:
+  virtual auto do_put(appender out, loc_value val,
+                      const format_specs& specs) const -> bool;
+
+ public:
+  static FMT_API typename Locale::id id;
+
+  explicit format_facet(Locale& loc);
+  explicit format_facet(string_view sep = "",
+                        std::initializer_list<unsigned char> g = {3},
+                        std::string decimal_point = ".")
+      : separator_(sep.data(), sep.size()),
+        grouping_(g.begin(), g.end()),
+        decimal_point_(decimal_point) {}
+
+  auto put(appender out, loc_value val, const format_specs& specs) const
+      -> bool {
+    return do_put(out, val, specs);
+  }
+};
+
+FMT_BEGIN_DETAIL_NAMESPACE
+
+// Returns true if value is negative, false otherwise.
+// Same as `value < 0` but doesn't produce warnings if T is an unsigned type.
+template <typename T, FMT_ENABLE_IF(is_signed<T>::value)>
+constexpr auto is_negative(T value) -> bool {
+  return value < 0;
+}
+template <typename T, FMT_ENABLE_IF(!is_signed<T>::value)>
+constexpr auto is_negative(T) -> bool {
+  return false;
+}
+
+template <typename T>
+FMT_CONSTEXPR auto is_supported_floating_point(T) -> bool {
+  if (std::is_same<T, float>()) return FMT_USE_FLOAT;
+  if (std::is_same<T, double>()) return FMT_USE_DOUBLE;
+  if (std::is_same<T, long double>()) return FMT_USE_LONG_DOUBLE;
+  return true;
+}
+
+// Smallest of uint32_t, uint64_t, uint128_t that is large enough to
+// represent all values of an integral type T.
+template <typename T>
+using uint32_or_64_or_128_t =
+    conditional_t<num_bits<T>() <= 32 && !FMT_REDUCE_INT_INSTANTIATIONS,
+                  uint32_t,
+                  conditional_t<num_bits<T>() <= 64, uint64_t, uint128_t>>;
+template <typename T>
+using uint64_or_128_t = conditional_t<num_bits<T>() <= 64, uint64_t, uint128_t>;
+
+#define FMT_POWERS_OF_10(factor)                                             \
+  factor * 10, (factor)*100, (factor)*1000, (factor)*10000, (factor)*100000, \
+      (factor)*1000000, (factor)*10000000, (factor)*100000000,               \
+      (factor)*1000000000
+
+// Converts value in the range [0, 100) to a string.
+constexpr const char* digits2(size_t value) {
+  // GCC generates slightly better code when value is pointer-size.
+  return &"0001020304050607080910111213141516171819"
+         "2021222324252627282930313233343536373839"
+         "4041424344454647484950515253545556575859"
+         "6061626364656667686970717273747576777879"
+         "8081828384858687888990919293949596979899"[value * 2];
+}
+
+// Sign is a template parameter to workaround a bug in gcc 4.8.
+template <typename Char, typename Sign> constexpr Char sign(Sign s) {
+#if !FMT_GCC_VERSION || FMT_GCC_VERSION >= 604
+  static_assert(std::is_same<Sign, sign_t>::value, "");
+#endif
+  return static_cast<Char>("\0-+ "[s]);
+}
+
+template <typename T> FMT_CONSTEXPR auto count_digits_fallback(T n) -> int {
+  int count = 1;
+  for (;;) {
+    // Integer division is slow so do it for a group of four digits instead
+    // of for every digit. The idea comes from the talk by Alexandrescu
+    // "Three Optimization Tips for C++". See speed-test for a comparison.
+    if (n < 10) return count;
+    if (n < 100) return count + 1;
+    if (n < 1000) return count + 2;
+    if (n < 10000) return count + 3;
+    n /= 10000u;
+    count += 4;
+  }
+}
+#if FMT_USE_INT128
+FMT_CONSTEXPR inline auto count_digits(uint128_opt n) -> int {
+  return count_digits_fallback(n);
+}
+#endif
+
+#ifdef FMT_BUILTIN_CLZLL
+// It is a separate function rather than a part of count_digits to workaround
+// the lack of static constexpr in constexpr functions.
+inline auto do_count_digits(uint64_t n) -> int {
+  // This has comparable performance to the version by Kendall Willets
+  // (https://github.com/fmtlib/format-benchmark/blob/master/digits10)
+  // but uses smaller tables.
+  // Maps bsr(n) to ceil(log10(pow(2, bsr(n) + 1) - 1)).
+  static constexpr uint8_t bsr2log10[] = {
+      1,  1,  1,  2,  2,  2,  3,  3,  3,  4,  4,  4,  4,  5,  5,  5,
+      6,  6,  6,  7,  7,  7,  7,  8,  8,  8,  9,  9,  9,  10, 10, 10,
+      10, 11, 11, 11, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
+      15, 16, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 19, 19, 20};
+  auto t = bsr2log10[FMT_BUILTIN_CLZLL(n | 1) ^ 63];
+  static constexpr const uint64_t zero_or_powers_of_10[] = {
+      0, 0, FMT_POWERS_OF_10(1U), FMT_POWERS_OF_10(1000000000ULL),
+      10000000000000000000ULL};
+  return t - (n < zero_or_powers_of_10[t]);
+}
+#endif
+
+// Returns the number of decimal digits in n. Leading zeros are not counted
+// except for n == 0 in which case count_digits returns 1.
+FMT_CONSTEXPR20 inline auto count_digits(uint64_t n) -> int {
+#ifdef FMT_BUILTIN_CLZLL
+  if (!is_constant_evaluated()) {
+    return do_count_digits(n);
+  }
+#endif
+  return count_digits_fallback(n);
+}
+
+// Counts the number of digits in n. BITS = log2(radix).
+template <int BITS, typename UInt>
+FMT_CONSTEXPR auto count_digits(UInt n) -> int {
+#ifdef FMT_BUILTIN_CLZ
+  if (!is_constant_evaluated() && num_bits<UInt>() == 32)
+    return (FMT_BUILTIN_CLZ(static_cast<uint32_t>(n) | 1) ^ 31) / BITS + 1;
+#endif
+  // Lambda avoids unreachable code warnings from NVHPC.
+  return [](UInt m) {
+    int num_digits = 0;
+    do {
+      ++num_digits;
+    } while ((m >>= BITS) != 0);
+    return num_digits;
+  }(n);
+}
+
+#ifdef FMT_BUILTIN_CLZ
+// It is a separate function rather than a part of count_digits to workaround
+// the lack of static constexpr in constexpr functions.
+FMT_INLINE auto do_count_digits(uint32_t n) -> int {
+// An optimization by Kendall Willets from https://bit.ly/3uOIQrB.
+// This increments the upper 32 bits (log10(T) - 1) when >= T is added.
+#  define FMT_INC(T) (((sizeof(#  T) - 1ull) << 32) - T)
+  static constexpr uint64_t table[] = {
+      FMT_INC(0),          FMT_INC(0),          FMT_INC(0),           // 8
+      FMT_INC(10),         FMT_INC(10),         FMT_INC(10),          // 64
+      FMT_INC(100),        FMT_INC(100),        FMT_INC(100),         // 512
+      FMT_INC(1000),       FMT_INC(1000),       FMT_INC(1000),        // 4096
+      FMT_INC(10000),      FMT_INC(10000),      FMT_INC(10000),       // 32k
+      FMT_INC(100000),     FMT_INC(100000),     FMT_INC(100000),      // 256k
+      FMT_INC(1000000),    FMT_INC(1000000),    FMT_INC(1000000),     // 2048k
+      FMT_INC(10000000),   FMT_INC(10000000),   FMT_INC(10000000),    // 16M
+      FMT_INC(100000000),  FMT_INC(100000000),  FMT_INC(100000000),   // 128M
+      FMT_INC(1000000000), FMT_INC(1000000000), FMT_INC(1000000000),  // 1024M
+      FMT_INC(1000000000), FMT_INC(1000000000)                        // 4B
+  };
+  auto inc = table[FMT_BUILTIN_CLZ(n | 1) ^ 31];
+  return static_cast<int>((n + inc) >> 32);
+}
+#endif
+
+// Optional version of count_digits for better performance on 32-bit platforms.
+FMT_CONSTEXPR20 inline auto count_digits(uint32_t n) -> int {
+#ifdef FMT_BUILTIN_CLZ
+  if (!is_constant_evaluated()) {
+    return do_count_digits(n);
+  }
+#endif
+  return count_digits_fallback(n);
+}
+
+template <typename Int> constexpr auto digits10() noexcept -> int {
+  return std::numeric_limits<Int>::digits10;
+}
+template <> constexpr auto digits10<int128_opt>() noexcept -> int { return 38; }
+template <> constexpr auto digits10<uint128_t>() noexcept -> int { return 38; }
+
+template <typename Char> struct thousands_sep_result {
+  std::string grouping;
+  Char thousands_sep;
+};
+
+template <typename Char>
+FMT_API auto thousands_sep_impl(locale_ref loc) -> thousands_sep_result<Char>;
+template <typename Char>
+inline auto thousands_sep(locale_ref loc) -> thousands_sep_result<Char> {
+  auto result = thousands_sep_impl<char>(loc);
+  return {result.grouping, Char(result.thousands_sep)};
+}
+template <>
+inline auto thousands_sep(locale_ref loc) -> thousands_sep_result<wchar_t> {
+  return thousands_sep_impl<wchar_t>(loc);
+}
+
+template <typename Char>
+FMT_API auto decimal_point_impl(locale_ref loc) -> Char;
+template <typename Char> inline auto decimal_point(locale_ref loc) -> Char {
+  return Char(decimal_point_impl<char>(loc));
+}
+template <> inline auto decimal_point(locale_ref loc) -> wchar_t {
+  return decimal_point_impl<wchar_t>(loc);
+}
+
+// Compares two characters for equality.
+template <typename Char> auto equal2(const Char* lhs, const char* rhs) -> bool {
+  return lhs[0] == Char(rhs[0]) && lhs[1] == Char(rhs[1]);
+}
+inline auto equal2(const char* lhs, const char* rhs) -> bool {
+  return memcmp(lhs, rhs, 2) == 0;
+}
+
+// Copies two characters from src to dst.
+template <typename Char>
+FMT_CONSTEXPR20 FMT_INLINE void copy2(Char* dst, const char* src) {
+  if (!is_constant_evaluated() && sizeof(Char) == sizeof(char)) {
+    memcpy(dst, src, 2);
+    return;
+  }
+  *dst++ = static_cast<Char>(*src++);
+  *dst = static_cast<Char>(*src);
+}
+
+template <typename Iterator> struct format_decimal_result {
+  Iterator begin;
+  Iterator end;
+};
+
+// Formats a decimal unsigned integer value writing into out pointing to a
+// buffer of specified size. The caller must ensure that the buffer is large
+// enough.
+template <typename Char, typename UInt>
+FMT_CONSTEXPR20 auto format_decimal(Char* out, UInt value, int size)
+    -> format_decimal_result<Char*> {
+  FMT_ASSERT(size >= count_digits(value), "invalid digit count");
+  out += size;
+  Char* end = out;
+  while (value >= 100) {
+    // Integer division is slow so do it for a group of two digits instead
+    // of for every digit. The idea comes from the talk by Alexandrescu
+    // "Three Optimization Tips for C++". See speed-test for a comparison.
+    out -= 2;
+    copy2(out, digits2(static_cast<size_t>(value % 100)));
+    value /= 100;
+  }
+  if (value < 10) {
+    *--out = static_cast<Char>('0' + value);
+    return {out, end};
+  }
+  out -= 2;
+  copy2(out, digits2(static_cast<size_t>(value)));
+  return {out, end};
+}
+
+template <typename Char, typename UInt, typename Iterator,
+          FMT_ENABLE_IF(!std::is_pointer<remove_cvref_t<Iterator>>::value)>
+FMT_CONSTEXPR inline auto format_decimal(Iterator out, UInt value, int size)
+    -> format_decimal_result<Iterator> {
+  // Buffer is large enough to hold all digits (digits10 + 1).
+  Char buffer[digits10<UInt>() + 1] = {};
+  auto end = format_decimal(buffer, value, size).end;
+  return {out, detail::copy_str_noinline<Char>(buffer, end, out)};
+}
+
+template <unsigned BASE_BITS, typename Char, typename UInt>
+FMT_CONSTEXPR auto format_uint(Char* buffer, UInt value, int num_digits,
+                               bool upper = false) -> Char* {
+  buffer += num_digits;
+  Char* end = buffer;
+  do {
+    const char* digits = upper ? "0123456789ABCDEF" : "0123456789abcdef";
+    unsigned digit = static_cast<unsigned>(value & ((1 << BASE_BITS) - 1));
+    *--buffer = static_cast<Char>(BASE_BITS < 4 ? static_cast<char>('0' + digit)
+                                                : digits[digit]);
+  } while ((value >>= BASE_BITS) != 0);
+  return end;
+}
+
+template <unsigned BASE_BITS, typename Char, typename It, typename UInt>
+inline auto format_uint(It out, UInt value, int num_digits, bool upper = false)
+    -> It {
+  if (auto ptr = to_pointer<Char>(out, to_unsigned(num_digits))) {
+    format_uint<BASE_BITS>(ptr, value, num_digits, upper);
+    return out;
+  }
+  // Buffer should be large enough to hold all digits (digits / BASE_BITS + 1).
+  char buffer[num_bits<UInt>() / BASE_BITS + 1];
+  format_uint<BASE_BITS>(buffer, value, num_digits, upper);
+  return detail::copy_str_noinline<Char>(buffer, buffer + num_digits, out);
+}
+
+// A converter from UTF-8 to UTF-16.
+class utf8_to_utf16 {
+ private:
+  basic_memory_buffer<wchar_t> buffer_;
+
+ public:
+  FMT_API explicit utf8_to_utf16(string_view s);
+  operator basic_string_view<wchar_t>() const { return {&buffer_[0], size()}; }
+  auto size() const -> size_t { return buffer_.size() - 1; }
+  auto c_str() const -> const wchar_t* { return &buffer_[0]; }
+  auto str() const -> std::wstring { return {&buffer_[0], size()}; }
+};
+
+namespace dragonbox {
+
+// Type-specific information that Dragonbox uses.
+template <typename T, typename Enable = void> struct float_info;
+
+template <> struct float_info<float> {
+  using carrier_uint = uint32_t;
+  static const int exponent_bits = 8;
+  static const int kappa = 1;
+  static const int big_divisor = 100;
+  static const int small_divisor = 10;
+  static const int min_k = -31;
+  static const int max_k = 46;
+  static const int shorter_interval_tie_lower_threshold = -35;
+  static const int shorter_interval_tie_upper_threshold = -35;
+};
+
+template <> struct float_info<double> {
+  using carrier_uint = uint64_t;
+  static const int exponent_bits = 11;
+  static const int kappa = 2;
+  static const int big_divisor = 1000;
+  static const int small_divisor = 100;
+  static const int min_k = -292;
+  static const int max_k = 326;
+  static const int shorter_interval_tie_lower_threshold = -77;
+  static const int shorter_interval_tie_upper_threshold = -77;
+};
+
+// An 80- or 128-bit floating point number.
+template <typename T>
+struct float_info<T, enable_if_t<std::numeric_limits<T>::digits == 64 ||
+                                 std::numeric_limits<T>::digits == 113 ||
+                                 is_float128<T>::value>> {
+  using carrier_uint = detail::uint128_t;
+  static const int exponent_bits = 15;
+};
+
+// A double-double floating point number.
+template <typename T>
+struct float_info<T, enable_if_t<is_double_double<T>::value>> {
+  using carrier_uint = detail::uint128_t;
+};
+
+template <typename T> struct decimal_fp {
+  using significand_type = typename float_info<T>::carrier_uint;
+  significand_type significand;
+  int exponent;
+};
+
+template <typename T> FMT_API auto to_decimal(T x) noexcept -> decimal_fp<T>;
+}  // namespace dragonbox
+
+// Returns true iff Float has the implicit bit which is not stored.
+template <typename Float> constexpr bool has_implicit_bit() {
+  // An 80-bit FP number has a 64-bit significand an no implicit bit.
+  return std::numeric_limits<Float>::digits != 64;
+}
+
+// Returns the number of significand bits stored in Float. The implicit bit is
+// not counted since it is not stored.
+template <typename Float> constexpr int num_significand_bits() {
+  // std::numeric_limits may not support __float128.
+  return is_float128<Float>() ? 112
+                              : (std::numeric_limits<Float>::digits -
+                                 (has_implicit_bit<Float>() ? 1 : 0));
+}
+
+template <typename Float>
+constexpr auto exponent_mask() ->
+    typename dragonbox::float_info<Float>::carrier_uint {
+  using uint = typename dragonbox::float_info<Float>::carrier_uint;
+  return ((uint(1) << dragonbox::float_info<Float>::exponent_bits) - 1)
+         << num_significand_bits<Float>();
+}
+template <typename Float> constexpr auto exponent_bias() -> int {
+  // std::numeric_limits may not support __float128.
+  return is_float128<Float>() ? 16383
+                              : std::numeric_limits<Float>::max_exponent - 1;
+}
+
+// Writes the exponent exp in the form "[+-]d{2,3}" to buffer.
+template <typename Char, typename It>
+FMT_CONSTEXPR auto write_exponent(int exp, It it) -> It {
+  FMT_ASSERT(-10000 < exp && exp < 10000, "exponent out of range");
+  if (exp < 0) {
+    *it++ = static_cast<Char>('-');
+    exp = -exp;
+  } else {
+    *it++ = static_cast<Char>('+');
+  }
+  if (exp >= 100) {
+    const char* top = digits2(to_unsigned(exp / 100));
+    if (exp >= 1000) *it++ = static_cast<Char>(top[0]);
+    *it++ = static_cast<Char>(top[1]);
+    exp %= 100;
+  }
+  const char* d = digits2(to_unsigned(exp));
+  *it++ = static_cast<Char>(d[0]);
+  *it++ = static_cast<Char>(d[1]);
+  return it;
+}
+
+// A floating-point number f * pow(2, e) where F is an unsigned type.
+template <typename F> struct basic_fp {
+  F f;
+  int e;
+
+  static constexpr const int num_significand_bits =
+      static_cast<int>(sizeof(F) * num_bits<unsigned char>());
+
+  constexpr basic_fp() : f(0), e(0) {}
+  constexpr basic_fp(uint64_t f_val, int e_val) : f(f_val), e(e_val) {}
+
+  // Constructs fp from an IEEE754 floating-point number.
+  template <typename Float> FMT_CONSTEXPR basic_fp(Float n) { assign(n); }
+
+  // Assigns n to this and return true iff predecessor is closer than successor.
+  template <typename Float, FMT_ENABLE_IF(!is_double_double<Float>::value)>
+  FMT_CONSTEXPR auto assign(Float n) -> bool {
+    static_assert(std::numeric_limits<Float>::digits <= 113, "unsupported FP");
+    // Assume Float is in the format [sign][exponent][significand].
+    using carrier_uint = typename dragonbox::float_info<Float>::carrier_uint;
+    const auto num_float_significand_bits =
+        detail::num_significand_bits<Float>();
+    const auto implicit_bit = carrier_uint(1) << num_float_significand_bits;
+    const auto significand_mask = implicit_bit - 1;
+    auto u = bit_cast<carrier_uint>(n);
+    f = static_cast<F>(u & significand_mask);
+    auto biased_e = static_cast<int>((u & exponent_mask<Float>()) >>
+                                     num_float_significand_bits);
+    // The predecessor is closer if n is a normalized power of 2 (f == 0)
+    // other than the smallest normalized number (biased_e > 1).
+    auto is_predecessor_closer = f == 0 && biased_e > 1;
+    if (biased_e == 0)
+      biased_e = 1;  // Subnormals use biased exponent 1 (min exponent).
+    else if (has_implicit_bit<Float>())
+      f += static_cast<F>(implicit_bit);
+    e = biased_e - exponent_bias<Float>() - num_float_significand_bits;
+    if (!has_implicit_bit<Float>()) ++e;
+    return is_predecessor_closer;
+  }
+
+  template <typename Float, FMT_ENABLE_IF(is_double_double<Float>::value)>
+  FMT_CONSTEXPR auto assign(Float n) -> bool {
+    static_assert(std::numeric_limits<double>::is_iec559, "unsupported FP");
+    return assign(static_cast<double>(n));
+  }
+};
+
+using fp = basic_fp<unsigned long long>;
+
+// Normalizes the value converted from double and multiplied by (1 << SHIFT).
+template <int SHIFT = 0, typename F>
+FMT_CONSTEXPR basic_fp<F> normalize(basic_fp<F> value) {
+  // Handle subnormals.
+  const auto implicit_bit = F(1) << num_significand_bits<double>();
+  const auto shifted_implicit_bit = implicit_bit << SHIFT;
+  while ((value.f & shifted_implicit_bit) == 0) {
+    value.f <<= 1;
+    --value.e;
+  }
+  // Subtract 1 to account for hidden bit.
+  const auto offset = basic_fp<F>::num_significand_bits -
+                      num_significand_bits<double>() - SHIFT - 1;
+  value.f <<= offset;
+  value.e -= offset;
+  return value;
+}
+
+// Computes lhs * rhs / pow(2, 64) rounded to nearest with half-up tie breaking.
+FMT_CONSTEXPR inline uint64_t multiply(uint64_t lhs, uint64_t rhs) {
+#if FMT_USE_INT128
+  auto product = static_cast<__uint128_t>(lhs) * rhs;
+  auto f = static_cast<uint64_t>(product >> 64);
+  return (static_cast<uint64_t>(product) & (1ULL << 63)) != 0 ? f + 1 : f;
+#else
+  // Multiply 32-bit parts of significands.
+  uint64_t mask = (1ULL << 32) - 1;
+  uint64_t a = lhs >> 32, b = lhs & mask;
+  uint64_t c = rhs >> 32, d = rhs & mask;
+  uint64_t ac = a * c, bc = b * c, ad = a * d, bd = b * d;
+  // Compute mid 64-bit of result and round.
+  uint64_t mid = (bd >> 32) + (ad & mask) + (bc & mask) + (1U << 31);
+  return ac + (ad >> 32) + (bc >> 32) + (mid >> 32);
+#endif
+}
+
+FMT_CONSTEXPR inline fp operator*(fp x, fp y) {
+  return {multiply(x.f, y.f), x.e + y.e + 64};
+}
+
+template <typename T = void> struct basic_data {
+  // Normalized 64-bit significands of pow(10, k), for k = -348, -340, ..., 340.
+  // These are generated by support/compute-powers.py.
+  static constexpr uint64_t pow10_significands[87] = {
+      0xfa8fd5a0081c0288, 0xbaaee17fa23ebf76, 0x8b16fb203055ac76,
+      0xcf42894a5dce35ea, 0x9a6bb0aa55653b2d, 0xe61acf033d1a45df,
+      0xab70fe17c79ac6ca, 0xff77b1fcbebcdc4f, 0xbe5691ef416bd60c,
+      0x8dd01fad907ffc3c, 0xd3515c2831559a83, 0x9d71ac8fada6c9b5,
+      0xea9c227723ee8bcb, 0xaecc49914078536d, 0x823c12795db6ce57,
+      0xc21094364dfb5637, 0x9096ea6f3848984f, 0xd77485cb25823ac7,
+      0xa086cfcd97bf97f4, 0xef340a98172aace5, 0xb23867fb2a35b28e,
+      0x84c8d4dfd2c63f3b, 0xc5dd44271ad3cdba, 0x936b9fcebb25c996,
+      0xdbac6c247d62a584, 0xa3ab66580d5fdaf6, 0xf3e2f893dec3f126,
+      0xb5b5ada8aaff80b8, 0x87625f056c7c4a8b, 0xc9bcff6034c13053,
+      0x964e858c91ba2655, 0xdff9772470297ebd, 0xa6dfbd9fb8e5b88f,
+      0xf8a95fcf88747d94, 0xb94470938fa89bcf, 0x8a08f0f8bf0f156b,
+      0xcdb02555653131b6, 0x993fe2c6d07b7fac, 0xe45c10c42a2b3b06,
+      0xaa242499697392d3, 0xfd87b5f28300ca0e, 0xbce5086492111aeb,
+      0x8cbccc096f5088cc, 0xd1b71758e219652c, 0x9c40000000000000,
+      0xe8d4a51000000000, 0xad78ebc5ac620000, 0x813f3978f8940984,
+      0xc097ce7bc90715b3, 0x8f7e32ce7bea5c70, 0xd5d238a4abe98068,
+      0x9f4f2726179a2245, 0xed63a231d4c4fb27, 0xb0de65388cc8ada8,
+      0x83c7088e1aab65db, 0xc45d1df942711d9a, 0x924d692ca61be758,
+      0xda01ee641a708dea, 0xa26da3999aef774a, 0xf209787bb47d6b85,
+      0xb454e4a179dd1877, 0x865b86925b9bc5c2, 0xc83553c5c8965d3d,
+      0x952ab45cfa97a0b3, 0xde469fbd99a05fe3, 0xa59bc234db398c25,
+      0xf6c69a72a3989f5c, 0xb7dcbf5354e9bece, 0x88fcf317f22241e2,
+      0xcc20ce9bd35c78a5, 0x98165af37b2153df, 0xe2a0b5dc971f303a,
+      0xa8d9d1535ce3b396, 0xfb9b7cd9a4a7443c, 0xbb764c4ca7a44410,
+      0x8bab8eefb6409c1a, 0xd01fef10a657842c, 0x9b10a4e5e9913129,
+      0xe7109bfba19c0c9d, 0xac2820d9623bf429, 0x80444b5e7aa7cf85,
+      0xbf21e44003acdd2d, 0x8e679c2f5e44ff8f, 0xd433179d9c8cb841,
+      0x9e19db92b4e31ba9, 0xeb96bf6ebadf77d9, 0xaf87023b9bf0ee6b,
+  };
+
+#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409
+#  pragma GCC diagnostic push
+#  pragma GCC diagnostic ignored "-Wnarrowing"
+#endif
+  // Binary exponents of pow(10, k), for k = -348, -340, ..., 340, corresponding
+  // to significands above.
+  static constexpr int16_t pow10_exponents[87] = {
+      -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980, -954,
+      -927,  -901,  -874,  -847,  -821,  -794,  -768,  -741,  -715,  -688, -661,
+      -635,  -608,  -582,  -555,  -529,  -502,  -475,  -449,  -422,  -396, -369,
+      -343,  -316,  -289,  -263,  -236,  -210,  -183,  -157,  -130,  -103, -77,
+      -50,   -24,   3,     30,    56,    83,    109,   136,   162,   189,  216,
+      242,   269,   295,   322,   348,   375,   402,   428,   455,   481,  508,
+      534,   561,   588,   614,   641,   667,   694,   720,   747,   774,  800,
+      827,   853,   880,   907,   933,   960,   986,   1013,  1039,  1066};
+#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409
+#  pragma GCC diagnostic pop
+#endif
+
+  static constexpr uint64_t power_of_10_64[20] = {
+      1, FMT_POWERS_OF_10(1ULL), FMT_POWERS_OF_10(1000000000ULL),
+      10000000000000000000ULL};
+};
+
+#if FMT_CPLUSPLUS < 201703L
+template <typename T> constexpr uint64_t basic_data<T>::pow10_significands[];
+template <typename T> constexpr int16_t basic_data<T>::pow10_exponents[];
+template <typename T> constexpr uint64_t basic_data<T>::power_of_10_64[];
+#endif
+
+// This is a struct rather than an alias to avoid shadowing warnings in gcc.
+struct data : basic_data<> {};
+
+// Returns a cached power of 10 `c_k = c_k.f * pow(2, c_k.e)` such that its
+// (binary) exponent satisfies `min_exponent <= c_k.e <= min_exponent + 28`.
+FMT_CONSTEXPR inline fp get_cached_power(int min_exponent,
+                                         int& pow10_exponent) {
+  const int shift = 32;
+  // log10(2) = 0x0.4d104d427de7fbcc...
+  const int64_t significand = 0x4d104d427de7fbcc;
+  int index = static_cast<int>(
+      ((min_exponent + fp::num_significand_bits - 1) * (significand >> shift) +
+       ((int64_t(1) << shift) - 1))  // ceil
+      >> 32                          // arithmetic shift
+  );
+  // Decimal exponent of the first (smallest) cached power of 10.
+  const int first_dec_exp = -348;
+  // Difference between 2 consecutive decimal exponents in cached powers of 10.
+  const int dec_exp_step = 8;
+  index = (index - first_dec_exp - 1) / dec_exp_step + 1;
+  pow10_exponent = first_dec_exp + index * dec_exp_step;
+  // Using *(x + index) instead of x[index] avoids an issue with some compilers
+  // using the EDG frontend (e.g. nvhpc/22.3 in C++17 mode).
+  return {*(data::pow10_significands + index),
+          *(data::pow10_exponents + index)};
+}
+
+#ifndef _MSC_VER
+#  define FMT_SNPRINTF snprintf
+#else
+FMT_API auto fmt_snprintf(char* buf, size_t size, const char* fmt, ...) -> int;
+#  define FMT_SNPRINTF fmt_snprintf
+#endif  // _MSC_VER
+
+// Formats a floating-point number with snprintf using the hexfloat format.
+template <typename T>
+auto snprintf_float(T value, int precision, float_specs specs,
+                    buffer<char>& buf) -> int {
+  // Buffer capacity must be non-zero, otherwise MSVC's vsnprintf_s will fail.
+  FMT_ASSERT(buf.capacity() > buf.size(), "empty buffer");
+  FMT_ASSERT(specs.format == float_format::hex, "");
+  static_assert(!std::is_same<T, float>::value, "");
+
+  // Build the format string.
+  char format[7];  // The longest format is "%#.*Le".
+  char* format_ptr = format;
+  *format_ptr++ = '%';
+  if (specs.showpoint) *format_ptr++ = '#';
+  if (precision >= 0) {
+    *format_ptr++ = '.';
+    *format_ptr++ = '*';
+  }
+  if (std::is_same<T, long double>()) *format_ptr++ = 'L';
+  *format_ptr++ = specs.upper ? 'A' : 'a';
+  *format_ptr = '\0';
+
+  // Format using snprintf.
+  auto offset = buf.size();
+  for (;;) {
+    auto begin = buf.data() + offset;
+    auto capacity = buf.capacity() - offset;
+    abort_fuzzing_if(precision > 100000);
+    // Suppress the warning about a nonliteral format string.
+    // Cannot use auto because of a bug in MinGW (#1532).
+    int (*snprintf_ptr)(char*, size_t, const char*, ...) = FMT_SNPRINTF;
+    int result = precision >= 0
+                     ? snprintf_ptr(begin, capacity, format, precision, value)
+                     : snprintf_ptr(begin, capacity, format, value);
+    if (result < 0) {
+      // The buffer will grow exponentially.
+      buf.try_reserve(buf.capacity() + 1);
+      continue;
+    }
+    auto size = to_unsigned(result);
+    // Size equal to capacity means that the last character was truncated.
+    if (size < capacity) {
+      buf.try_resize(size + offset);
+      return 0;
+    }
+    buf.try_reserve(size + offset + 1);  // Add 1 for the terminating '\0'.
+  }
+}
+
+template <typename T>
+using convert_float_result =
+    conditional_t<std::is_same<T, float>::value ||
+                      std::numeric_limits<T>::digits ==
+                          std::numeric_limits<double>::digits,
+                  double, T>;
+
+template <typename T>
+constexpr auto convert_float(T value) -> convert_float_result<T> {
+  return static_cast<convert_float_result<T>>(value);
+}
+
+template <typename OutputIt, typename Char>
+FMT_NOINLINE FMT_CONSTEXPR auto fill(OutputIt it, size_t n,
+                                     const fill_t<Char>& fill) -> OutputIt {
+  auto fill_size = fill.size();
+  if (fill_size == 1) return detail::fill_n(it, n, fill[0]);
+  auto data = fill.data();
+  for (size_t i = 0; i < n; ++i)
+    it = copy_str<Char>(data, data + fill_size, it);
+  return it;
+}
+
+// Writes the output of f, padded according to format specifications in specs.
+// size: output size in code units.
+// width: output display width in (terminal) column positions.
+template <align::type align = align::left, typename OutputIt, typename Char,
+          typename F>
+FMT_CONSTEXPR auto write_padded(OutputIt out,
+                                const basic_format_specs<Char>& specs,
+                                size_t size, size_t width, F&& f) -> OutputIt {
+  static_assert(align == align::left || align == align::right, "");
+  unsigned spec_width = to_unsigned(specs.width);
+  size_t padding = spec_width > width ? spec_width - width : 0;
+  // Shifts are encoded as string literals because static constexpr is not
+  // supported in constexpr functions.
+  auto* shifts = align == align::left ? "\x1f\x1f\x00\x01" : "\x00\x1f\x00\x01";
+  size_t left_padding = padding >> shifts[specs.align];
+  size_t right_padding = padding - left_padding;
+  auto it = reserve(out, size + padding * specs.fill.size());
+  if (left_padding != 0) it = fill(it, left_padding, specs.fill);
+  it = f(it);
+  if (right_padding != 0) it = fill(it, right_padding, specs.fill);
+  return base_iterator(out, it);
+}
+
+template <align::type align = align::left, typename OutputIt, typename Char,
+          typename F>
+constexpr auto write_padded(OutputIt out, const basic_format_specs<Char>& specs,
+                            size_t size, F&& f) -> OutputIt {
+  return write_padded<align>(out, specs, size, size, f);
+}
+
+template <align::type align = align::left, typename Char, typename OutputIt>
+FMT_CONSTEXPR auto write_bytes(OutputIt out, string_view bytes,
+                               const basic_format_specs<Char>& specs)
+    -> OutputIt {
+  return write_padded<align>(
+      out, specs, bytes.size(), [bytes](reserve_iterator<OutputIt> it) {
+        const char* data = bytes.data();
+        return copy_str<Char>(data, data + bytes.size(), it);
+      });
+}
+
+template <typename Char, typename OutputIt, typename UIntPtr>
+auto write_ptr(OutputIt out, UIntPtr value,
+               const basic_format_specs<Char>* specs) -> OutputIt {
+  int num_digits = count_digits<4>(value);
+  auto size = to_unsigned(num_digits) + size_t(2);
+  auto write = [=](reserve_iterator<OutputIt> it) {
+    *it++ = static_cast<Char>('0');
+    *it++ = static_cast<Char>('x');
+    return format_uint<4, Char>(it, value, num_digits);
+  };
+  return specs ? write_padded<align::right>(out, *specs, size, write)
+               : base_iterator(out, write(reserve(out, size)));
+}
+
+// Returns true iff the code point cp is printable.
+FMT_API auto is_printable(uint32_t cp) -> bool;
+
+inline auto needs_escape(uint32_t cp) -> bool {
+  return cp < 0x20 || cp == 0x7f || cp == '"' || cp == '\\' ||
+         !is_printable(cp);
+}
+
+template <typename Char> struct find_escape_result {
+  const Char* begin;
+  const Char* end;
+  uint32_t cp;
+};
+
+template <typename Char>
+using make_unsigned_char =
+    typename conditional_t<std::is_integral<Char>::value,
+                           std::make_unsigned<Char>,
+                           type_identity<uint32_t>>::type;
+
+template <typename Char>
+auto find_escape(const Char* begin, const Char* end)
+    -> find_escape_result<Char> {
+  for (; begin != end; ++begin) {
+    uint32_t cp = static_cast<make_unsigned_char<Char>>(*begin);
+    if (const_check(sizeof(Char) == 1) && cp >= 0x80) continue;
+    if (needs_escape(cp)) return {begin, begin + 1, cp};
+  }
+  return {begin, nullptr, 0};
+}
+
+inline auto find_escape(const char* begin, const char* end)
+    -> find_escape_result<char> {
+  if (!is_utf8()) return find_escape<char>(begin, end);
+  auto result = find_escape_result<char>{end, nullptr, 0};
+  for_each_codepoint(string_view(begin, to_unsigned(end - begin)),
+                     [&](uint32_t cp, string_view sv) {
+                       if (needs_escape(cp)) {
+                         result = {sv.begin(), sv.end(), cp};
+                         return false;
+                       }
+                       return true;
+                     });
+  return result;
+}
+
+#define FMT_STRING_IMPL(s, base, explicit)                                    \
+  [] {                                                                        \
+    /* Use the hidden visibility as a workaround for a GCC bug (#1973). */    \
+    /* Use a macro-like name to avoid shadowing warnings. */                  \
+    struct FMT_GCC_VISIBILITY_HIDDEN FMT_COMPILE_STRING : base {              \
+      using char_type FMT_MAYBE_UNUSED = fmt::remove_cvref_t<decltype(s[0])>; \
+      FMT_MAYBE_UNUSED FMT_CONSTEXPR explicit                                 \
+      operator fmt::basic_string_view<char_type>() const {                    \
+        return fmt::detail_exported::compile_string_to_view<char_type>(s);    \
+      }                                                                       \
+    };                                                                        \
+    return FMT_COMPILE_STRING();                                              \
+  }()
+
+/**
+  \rst
+  Constructs a compile-time format string from a string literal *s*.
+
+  **Example**::
+
+    // A compile-time error because 'd' is an invalid specifier for strings.
+    std::string s = fmt::format(FMT_STRING("{:d}"), "foo");
+  \endrst
+ */
+#define FMT_STRING(s) FMT_STRING_IMPL(s, fmt::detail::compile_string, )
+
+template <size_t width, typename Char, typename OutputIt>
+auto write_codepoint(OutputIt out, char prefix, uint32_t cp) -> OutputIt {
+  *out++ = static_cast<Char>('\\');
+  *out++ = static_cast<Char>(prefix);
+  Char buf[width];
+  fill_n(buf, width, static_cast<Char>('0'));
+  format_uint<4>(buf, cp, width);
+  return copy_str<Char>(buf, buf + width, out);
+}
+
+template <typename OutputIt, typename Char>
+auto write_escaped_cp(OutputIt out, const find_escape_result<Char>& escape)
+    -> OutputIt {
+  auto c = static_cast<Char>(escape.cp);
+  switch (escape.cp) {
+  case '\n':
+    *out++ = static_cast<Char>('\\');
+    c = static_cast<Char>('n');
+    break;
+  case '\r':
+    *out++ = static_cast<Char>('\\');
+    c = static_cast<Char>('r');
+    break;
+  case '\t':
+    *out++ = static_cast<Char>('\\');
+    c = static_cast<Char>('t');
+    break;
+  case '"':
+    FMT_FALLTHROUGH;
+  case '\'':
+    FMT_FALLTHROUGH;
+  case '\\':
+    *out++ = static_cast<Char>('\\');
+    break;
+  default:
+    if (is_utf8()) {
+      if (escape.cp < 0x100) {
+        return write_codepoint<2, Char>(out, 'x', escape.cp);
+      }
+      if (escape.cp < 0x10000) {
+        return write_codepoint<4, Char>(out, 'u', escape.cp);
+      }
+      if (escape.cp < 0x110000) {
+        return write_codepoint<8, Char>(out, 'U', escape.cp);
+      }
+    }
+    for (Char escape_char : basic_string_view<Char>(
+             escape.begin, to_unsigned(escape.end - escape.begin))) {
+      out = write_codepoint<2, Char>(out, 'x',
+                                     static_cast<uint32_t>(escape_char) & 0xFF);
+    }
+    return out;
+  }
+  *out++ = c;
+  return out;
+}
+
+template <typename Char, typename OutputIt>
+auto write_escaped_string(OutputIt out, basic_string_view<Char> str)
+    -> OutputIt {
+  *out++ = static_cast<Char>('"');
+  auto begin = str.begin(), end = str.end();
+  do {
+    auto escape = find_escape(begin, end);
+    out = copy_str<Char>(begin, escape.begin, out);
+    begin = escape.end;
+    if (!begin) break;
+    out = write_escaped_cp<OutputIt, Char>(out, escape);
+  } while (begin != end);
+  *out++ = static_cast<Char>('"');
+  return out;
+}
+
+template <typename Char, typename OutputIt>
+auto write_escaped_char(OutputIt out, Char v) -> OutputIt {
+  *out++ = static_cast<Char>('\'');
+  if ((needs_escape(static_cast<uint32_t>(v)) && v != static_cast<Char>('"')) ||
+      v == static_cast<Char>('\'')) {
+    out = write_escaped_cp(
+        out, find_escape_result<Char>{&v, &v + 1, static_cast<uint32_t>(v)});
+  } else {
+    *out++ = v;
+  }
+  *out++ = static_cast<Char>('\'');
+  return out;
+}
+
+template <typename Char, typename OutputIt>
+FMT_CONSTEXPR auto write_char(OutputIt out, Char value,
+                              const basic_format_specs<Char>& specs)
+    -> OutputIt {
+  bool is_debug = specs.type == presentation_type::debug;
+  return write_padded(out, specs, 1, [=](reserve_iterator<OutputIt> it) {
+    if (is_debug) return write_escaped_char(it, value);
+    *it++ = value;
+    return it;
+  });
+}
+template <typename Char, typename OutputIt>
+FMT_CONSTEXPR auto write(OutputIt out, Char value,
+                         const basic_format_specs<Char>& specs,
+                         locale_ref loc = {}) -> OutputIt {
+  return check_char_specs(specs)
+             ? write_char(out, value, specs)
+             : write(out, static_cast<int>(value), specs, loc);
+}
+
+// Data for write_int that doesn't depend on output iterator type. It is used to
+// avoid template code bloat.
+template <typename Char> struct write_int_data {
+  size_t size;
+  size_t padding;
+
+  FMT_CONSTEXPR write_int_data(int num_digits, unsigned prefix,
+                               const basic_format_specs<Char>& specs)
+      : size((prefix >> 24) + to_unsigned(num_digits)), padding(0) {
+    if (specs.align == align::numeric) {
+      auto width = to_unsigned(specs.width);
+      if (width > size) {
+        padding = width - size;
+        size = width;
+      }
+    } else if (specs.precision > num_digits) {
+      size = (prefix >> 24) + to_unsigned(specs.precision);
+      padding = to_unsigned(specs.precision - num_digits);
+    }
+  }
+};
+
+// Writes an integer in the format
+//   <left-padding><prefix><numeric-padding><digits><right-padding>
+// where <digits> are written by write_digits(it).
+// prefix contains chars in three lower bytes and the size in the fourth byte.
+template <typename OutputIt, typename Char, typename W>
+FMT_CONSTEXPR FMT_INLINE auto write_int(OutputIt out, int num_digits,
+                                        unsigned prefix,
+                                        const basic_format_specs<Char>& specs,
+                                        W write_digits) -> OutputIt {
+  // Slightly faster check for specs.width == 0 && specs.precision == -1.
+  if ((specs.width | (specs.precision + 1)) == 0) {
+    auto it = reserve(out, to_unsigned(num_digits) + (prefix >> 24));
+    if (prefix != 0) {
+      for (unsigned p = prefix & 0xffffff; p != 0; p >>= 8)
+        *it++ = static_cast<Char>(p & 0xff);
+    }
+    return base_iterator(out, write_digits(it));
+  }
+  auto data = write_int_data<Char>(num_digits, prefix, specs);
+  return write_padded<align::right>(
+      out, specs, data.size, [=](reserve_iterator<OutputIt> it) {
+        for (unsigned p = prefix & 0xffffff; p != 0; p >>= 8)
+          *it++ = static_cast<Char>(p & 0xff);
+        it = detail::fill_n(it, data.padding, static_cast<Char>('0'));
+        return write_digits(it);
+      });
+}
+
+template <typename Char> class digit_grouping {
+ private:
+  std::string grouping_;
+  std::basic_string<Char> thousands_sep_;
+
+  struct next_state {
+    std::string::const_iterator group;
+    int pos;
+  };
+  next_state initial_state() const { return {grouping_.begin(), 0}; }
+
+  // Returns the next digit group separator position.
+  int next(next_state& state) const {
+    if (thousands_sep_.empty()) return max_value<int>();
+    if (state.group == grouping_.end()) return state.pos += grouping_.back();
+    if (*state.group <= 0 || *state.group == max_value<char>())
+      return max_value<int>();
+    state.pos += *state.group++;
+    return state.pos;
+  }
+
+ public:
+  explicit digit_grouping(locale_ref loc, bool localized = true) {
+    if (!localized) return;
+    auto sep = thousands_sep<Char>(loc);
+    grouping_ = sep.grouping;
+    if (sep.thousands_sep) thousands_sep_.assign(1, sep.thousands_sep);
+  }
+  digit_grouping(std::string grouping, std::basic_string<Char> sep)
+      : grouping_(std::move(grouping)), thousands_sep_(std::move(sep)) {}
+
+  bool has_separator() const { return !thousands_sep_.empty(); }
+
+  int count_separators(int num_digits) const {
+    int count = 0;
+    auto state = initial_state();
+    while (num_digits > next(state)) ++count;
+    return count;
+  }
+
+  // Applies grouping to digits and write the output to out.
+  template <typename Out, typename C>
+  Out apply(Out out, basic_string_view<C> digits) const {
+    auto num_digits = static_cast<int>(digits.size());
+    auto separators = basic_memory_buffer<int>();
+    separators.push_back(0);
+    auto state = initial_state();
+    while (int i = next(state)) {
+      if (i >= num_digits) break;
+      separators.push_back(i);
+    }
+    for (int i = 0, sep_index = static_cast<int>(separators.size() - 1);
+         i < num_digits; ++i) {
+      if (num_digits - i == separators[sep_index]) {
+        out =
+            copy_str<Char>(thousands_sep_.data(),
+                           thousands_sep_.data() + thousands_sep_.size(), out);
+        --sep_index;
+      }
+      *out++ = static_cast<Char>(digits[to_unsigned(i)]);
+    }
+    return out;
+  }
+};
+
+// Writes a decimal integer with digit grouping.
+template <typename OutputIt, typename UInt, typename Char>
+auto write_int(OutputIt out, UInt value, unsigned prefix,
+               const basic_format_specs<Char>& specs,
+               const digit_grouping<Char>& grouping) -> OutputIt {
+  static_assert(std::is_same<uint64_or_128_t<UInt>, UInt>::value, "");
+  int num_digits = count_digits(value);
+  char digits[40];
+  format_decimal(digits, value, num_digits);
+  unsigned size = to_unsigned((prefix != 0 ? 1 : 0) + num_digits +
+                              grouping.count_separators(num_digits));
+  return write_padded<align::right>(
+      out, specs, size, size, [&](reserve_iterator<OutputIt> it) {
+        if (prefix != 0) {
+          char sign = static_cast<char>(prefix);
+          *it++ = static_cast<Char>(sign);
+        }
+        return grouping.apply(it, string_view(digits, to_unsigned(num_digits)));
+      });
+}
+
+// Writes a localized value.
+FMT_API auto write_loc(appender out, loc_value value, const format_specs& specs,
+                       locale_ref loc) -> bool;
+template <typename OutputIt, typename Char>
+inline auto write_loc(OutputIt, loc_value, const basic_format_specs<Char>&,
+                      locale_ref) -> bool {
+  return false;
+}
+
+FMT_CONSTEXPR inline void prefix_append(unsigned& prefix, unsigned value) {
+  prefix |= prefix != 0 ? value << 8 : value;
+  prefix += (1u + (value > 0xff ? 1 : 0)) << 24;
+}
+
+template <typename UInt> struct write_int_arg {
+  UInt abs_value;
+  unsigned prefix;
+};
+
+template <typename T>
+FMT_CONSTEXPR auto make_write_int_arg(T value, sign_t sign)
+    -> write_int_arg<uint32_or_64_or_128_t<T>> {
+  auto prefix = 0u;
+  auto abs_value = static_cast<uint32_or_64_or_128_t<T>>(value);
+  if (is_negative(value)) {
+    prefix = 0x01000000 | '-';
+    abs_value = 0 - abs_value;
+  } else {
+    constexpr const unsigned prefixes[4] = {0, 0, 0x1000000u | '+',
+                                            0x1000000u | ' '};
+    prefix = prefixes[sign];
+  }
+  return {abs_value, prefix};
+}
+
+template <typename Char = char> struct loc_writer {
+  buffer_appender<Char> out;
+  const basic_format_specs<Char>& specs;
+  std::basic_string<Char> sep;
+  std::string grouping;
+  std::basic_string<Char> decimal_point;
+
+  template <typename T, FMT_ENABLE_IF(is_integer<T>::value)>
+  auto operator()(T value) -> bool {
+    auto arg = make_write_int_arg(value, specs.sign);
+    write_int(out, static_cast<uint64_or_128_t<T>>(arg.abs_value), arg.prefix,
+              specs, digit_grouping<Char>(grouping, sep));
+    return true;
+  }
+
+  template <typename T, FMT_ENABLE_IF(is_floating_point<T>::value)>
+  auto operator()(T) -> bool {
+    return false;
+  }
+
+  auto operator()(...) -> bool { return false; }
+};
+
+template <typename Char, typename OutputIt, typename T>
+FMT_CONSTEXPR FMT_INLINE auto write_int(OutputIt out, write_int_arg<T> arg,
+                                        const basic_format_specs<Char>& specs,
+                                        locale_ref) -> OutputIt {
+  static_assert(std::is_same<T, uint32_or_64_or_128_t<T>>::value, "");
+  auto abs_value = arg.abs_value;
+  auto prefix = arg.prefix;
+  switch (specs.type) {
+  case presentation_type::none:
+  case presentation_type::dec: {
+    auto num_digits = count_digits(abs_value);
+    return write_int(
+        out, num_digits, prefix, specs, [=](reserve_iterator<OutputIt> it) {
+          return format_decimal<Char>(it, abs_value, num_digits).end;
+        });
+  }
+  case presentation_type::hex_lower:
+  case presentation_type::hex_upper: {
+    bool upper = specs.type == presentation_type::hex_upper;
+    if (specs.alt)
+      prefix_append(prefix, unsigned(upper ? 'X' : 'x') << 8 | '0');
+    int num_digits = count_digits<4>(abs_value);
+    return write_int(
+        out, num_digits, prefix, specs, [=](reserve_iterator<OutputIt> it) {
+          return format_uint<4, Char>(it, abs_value, num_digits, upper);
+        });
+  }
+  case presentation_type::bin_lower:
+  case presentation_type::bin_upper: {
+    bool upper = specs.type == presentation_type::bin_upper;
+    if (specs.alt)
+      prefix_append(prefix, unsigned(upper ? 'B' : 'b') << 8 | '0');
+    int num_digits = count_digits<1>(abs_value);
+    return write_int(out, num_digits, prefix, specs,
+                     [=](reserve_iterator<OutputIt> it) {
+                       return format_uint<1, Char>(it, abs_value, num_digits);
+                     });
+  }
+  case presentation_type::oct: {
+    int num_digits = count_digits<3>(abs_value);
+    // Octal prefix '0' is counted as a digit, so only add it if precision
+    // is not greater than the number of digits.
+    if (specs.alt && specs.precision <= num_digits && abs_value != 0)
+      prefix_append(prefix, '0');
+    return write_int(out, num_digits, prefix, specs,
+                     [=](reserve_iterator<OutputIt> it) {
+                       return format_uint<3, Char>(it, abs_value, num_digits);
+                     });
+  }
+  case presentation_type::chr:
+    return write_char(out, static_cast<Char>(abs_value), specs);
+  default:
+    throw_format_error("invalid type specifier");
+  }
+  return out;
+}
+template <typename Char, typename OutputIt, typename T>
+FMT_CONSTEXPR FMT_NOINLINE auto write_int_noinline(
+    OutputIt out, write_int_arg<T> arg, const basic_format_specs<Char>& specs,
+    locale_ref loc) -> OutputIt {
+  return write_int(out, arg, specs, loc);
+}
+template <typename Char, typename OutputIt, typename T,
+          FMT_ENABLE_IF(is_integral<T>::value &&
+                        !std::is_same<T, bool>::value &&
+                        std::is_same<OutputIt, buffer_appender<Char>>::value)>
+FMT_CONSTEXPR FMT_INLINE auto write(OutputIt out, T value,
+                                    const basic_format_specs<Char>& specs,
+                                    locale_ref loc) -> OutputIt {
+  if (specs.localized && write_loc(out, value, specs, loc)) return out;
+  return write_int_noinline(out, make_write_int_arg(value, specs.sign), specs,
+                            loc);
+}
+// An inlined version of write used in format string compilation.
+template <typename Char, typename OutputIt, typename T,
+          FMT_ENABLE_IF(is_integral<T>::value &&
+                        !std::is_same<T, bool>::value &&
+                        !std::is_same<OutputIt, buffer_appender<Char>>::value)>
+FMT_CONSTEXPR FMT_INLINE auto write(OutputIt out, T value,
+                                    const basic_format_specs<Char>& specs,
+                                    locale_ref loc) -> OutputIt {
+  if (specs.localized && write_loc(out, value, specs, loc)) return out;
+  return write_int(out, make_write_int_arg(value, specs.sign), specs, loc);
+}
+
+// An output iterator that counts the number of objects written to it and
+// discards them.
+class counting_iterator {
+ private:
+  size_t count_;
+
+ public:
+  using iterator_category = std::output_iterator_tag;
+  using difference_type = std::ptrdiff_t;
+  using pointer = void;
+  using reference = void;
+  FMT_UNCHECKED_ITERATOR(counting_iterator);
+
+  struct value_type {
+    template <typename T> FMT_CONSTEXPR void operator=(const T&) {}
+  };
+
+  FMT_CONSTEXPR counting_iterator() : count_(0) {}
+
+  FMT_CONSTEXPR size_t count() const { return count_; }
+
+  FMT_CONSTEXPR counting_iterator& operator++() {
+    ++count_;
+    return *this;
+  }
+  FMT_CONSTEXPR counting_iterator operator++(int) {
+    auto it = *this;
+    ++*this;
+    return it;
+  }
+
+  FMT_CONSTEXPR friend counting_iterator operator+(counting_iterator it,
+                                                   difference_type n) {
+    it.count_ += static_cast<size_t>(n);
+    return it;
+  }
+
+  FMT_CONSTEXPR value_type operator*() const { return {}; }
+};
+
+template <typename Char, typename OutputIt>
+FMT_CONSTEXPR auto write(OutputIt out, basic_string_view<Char> s,
+                         const basic_format_specs<Char>& specs) -> OutputIt {
+  auto data = s.data();
+  auto size = s.size();
+  if (specs.precision >= 0 && to_unsigned(specs.precision) < size)
+    size = code_point_index(s, to_unsigned(specs.precision));
+  bool is_debug = specs.type == presentation_type::debug;
+  size_t width = 0;
+  if (specs.width != 0) {
+    if (is_debug)
+      width = write_escaped_string(counting_iterator{}, s).count();
+    else
+      width = compute_width(basic_string_view<Char>(data, size));
+  }
+  return write_padded(out, specs, size, width,
+                      [=](reserve_iterator<OutputIt> it) {
+                        if (is_debug) return write_escaped_string(it, s);
+                        return copy_str<Char>(data, data + size, it);
+                      });
+}
+template <typename Char, typename OutputIt>
+FMT_CONSTEXPR auto write(OutputIt out,
+                         basic_string_view<type_identity_t<Char>> s,
+                         const basic_format_specs<Char>& specs, locale_ref)
+    -> OutputIt {
+  check_string_type_spec(specs.type);
+  return write(out, s, specs);
+}
+template <typename Char, typename OutputIt>
+FMT_CONSTEXPR auto write(OutputIt out, const Char* s,
+                         const basic_format_specs<Char>& specs, locale_ref)
+    -> OutputIt {
+  return check_cstring_type_spec(specs.type)
+             ? write(out, basic_string_view<Char>(s), specs, {})
+             : write_ptr<Char>(out, bit_cast<uintptr_t>(s), &specs);
+}
+
+template <typename Char, typename OutputIt, typename T,
+          FMT_ENABLE_IF(is_integral<T>::value &&
+                        !std::is_same<T, bool>::value &&
+                        !std::is_same<T, Char>::value)>
+FMT_CONSTEXPR auto write(OutputIt out, T value) -> OutputIt {
+  auto abs_value = static_cast<uint32_or_64_or_128_t<T>>(value);
+  bool negative = is_negative(value);
+  // Don't do -abs_value since it trips unsigned-integer-overflow sanitizer.
+  if (negative) abs_value = ~abs_value + 1;
+  int num_digits = count_digits(abs_value);
+  auto size = (negative ? 1 : 0) + static_cast<size_t>(num_digits);
+  auto it = reserve(out, size);
+  if (auto ptr = to_pointer<Char>(it, size)) {
+    if (negative) *ptr++ = static_cast<Char>('-');
+    format_decimal<Char>(ptr, abs_value, num_digits);
+    return out;
+  }
+  if (negative) *it++ = static_cast<Char>('-');
+  it = format_decimal<Char>(it, abs_value, num_digits).end;
+  return base_iterator(out, it);
+}
+
+template <typename Char, typename OutputIt>
+FMT_CONSTEXPR20 auto write_nonfinite(OutputIt out, bool isnan,
+                                     basic_format_specs<Char> specs,
+                                     const float_specs& fspecs) -> OutputIt {
+  auto str =
+      isnan ? (fspecs.upper ? "NAN" : "nan") : (fspecs.upper ? "INF" : "inf");
+  constexpr size_t str_size = 3;
+  auto sign = fspecs.sign;
+  auto size = str_size + (sign ? 1 : 0);
+  // Replace '0'-padding with space for non-finite values.
+  const bool is_zero_fill =
+      specs.fill.size() == 1 && *specs.fill.data() == static_cast<Char>('0');
+  if (is_zero_fill) specs.fill[0] = static_cast<Char>(' ');
+  return write_padded(out, specs, size, [=](reserve_iterator<OutputIt> it) {
+    if (sign) *it++ = detail::sign<Char>(sign);
+    return copy_str<Char>(str, str + str_size, it);
+  });
+}
+
+// A decimal floating-point number significand * pow(10, exp).
+struct big_decimal_fp {
+  const char* significand;
+  int significand_size;
+  int exponent;
+};
+
+constexpr auto get_significand_size(const big_decimal_fp& f) -> int {
+  return f.significand_size;
+}
+template <typename T>
+inline auto get_significand_size(const dragonbox::decimal_fp<T>& f) -> int {
+  return count_digits(f.significand);
+}
+
+template <typename Char, typename OutputIt>
+constexpr auto write_significand(OutputIt out, const char* significand,
+                                 int significand_size) -> OutputIt {
+  return copy_str<Char>(significand, significand + significand_size, out);
+}
+template <typename Char, typename OutputIt, typename UInt>
+inline auto write_significand(OutputIt out, UInt significand,
+                              int significand_size) -> OutputIt {
+  return format_decimal<Char>(out, significand, significand_size).end;
+}
+template <typename Char, typename OutputIt, typename T, typename Grouping>
+FMT_CONSTEXPR20 auto write_significand(OutputIt out, T significand,
+                                       int significand_size, int exponent,
+                                       const Grouping& grouping) -> OutputIt {
+  if (!grouping.has_separator()) {
+    out = write_significand<Char>(out, significand, significand_size);
+    return detail::fill_n(out, exponent, static_cast<Char>('0'));
+  }
+  auto buffer = memory_buffer();
+  write_significand<char>(appender(buffer), significand, significand_size);
+  detail::fill_n(appender(buffer), exponent, '0');
+  return grouping.apply(out, string_view(buffer.data(), buffer.size()));
+}
+
+template <typename Char, typename UInt,
+          FMT_ENABLE_IF(std::is_integral<UInt>::value)>
+inline auto write_significand(Char* out, UInt significand, int significand_size,
+                              int integral_size, Char decimal_point) -> Char* {
+  if (!decimal_point)
+    return format_decimal(out, significand, significand_size).end;
+  out += significand_size + 1;
+  Char* end = out;
+  int floating_size = significand_size - integral_size;
+  for (int i = floating_size / 2; i > 0; --i) {
+    out -= 2;
+    copy2(out, digits2(static_cast<std::size_t>(significand % 100)));
+    significand /= 100;
+  }
+  if (floating_size % 2 != 0) {
+    *--out = static_cast<Char>('0' + significand % 10);
+    significand /= 10;
+  }
+  *--out = decimal_point;
+  format_decimal(out - integral_size, significand, integral_size);
+  return end;
+}
+
+template <typename OutputIt, typename UInt, typename Char,
+          FMT_ENABLE_IF(!std::is_pointer<remove_cvref_t<OutputIt>>::value)>
+inline auto write_significand(OutputIt out, UInt significand,
+                              int significand_size, int integral_size,
+                              Char decimal_point) -> OutputIt {
+  // Buffer is large enough to hold digits (digits10 + 1) and a decimal point.
+  Char buffer[digits10<UInt>() + 2];
+  auto end = write_significand(buffer, significand, significand_size,
+                               integral_size, decimal_point);
+  return detail::copy_str_noinline<Char>(buffer, end, out);
+}
+
+template <typename OutputIt, typename Char>
+FMT_CONSTEXPR auto write_significand(OutputIt out, const char* significand,
+                                     int significand_size, int integral_size,
+                                     Char decimal_point) -> OutputIt {
+  out = detail::copy_str_noinline<Char>(significand,
+                                        significand + integral_size, out);
+  if (!decimal_point) return out;
+  *out++ = decimal_point;
+  return detail::copy_str_noinline<Char>(significand + integral_size,
+                                         significand + significand_size, out);
+}
+
+template <typename OutputIt, typename Char, typename T, typename Grouping>
+FMT_CONSTEXPR20 auto write_significand(OutputIt out, T significand,
+                                       int significand_size, int integral_size,
+                                       Char decimal_point,
+                                       const Grouping& grouping) -> OutputIt {
+  if (!grouping.has_separator()) {
+    return write_significand(out, significand, significand_size, integral_size,
+                             decimal_point);
+  }
+  auto buffer = basic_memory_buffer<Char>();
+  write_significand(buffer_appender<Char>(buffer), significand,
+                    significand_size, integral_size, decimal_point);
+  grouping.apply(
+      out, basic_string_view<Char>(buffer.data(), to_unsigned(integral_size)));
+  return detail::copy_str_noinline<Char>(buffer.data() + integral_size,
+                                         buffer.end(), out);
+}
+
+template <typename OutputIt, typename DecimalFP, typename Char,
+          typename Grouping = digit_grouping<Char>>
+FMT_CONSTEXPR20 auto do_write_float(OutputIt out, const DecimalFP& f,
+                                    const basic_format_specs<Char>& specs,
+                                    float_specs fspecs, locale_ref loc)
+    -> OutputIt {
+  auto significand = f.significand;
+  int significand_size = get_significand_size(f);
+  const Char zero = static_cast<Char>('0');
+  auto sign = fspecs.sign;
+  size_t size = to_unsigned(significand_size) + (sign ? 1 : 0);
+  using iterator = reserve_iterator<OutputIt>;
+
+  Char decimal_point =
+      fspecs.locale ? detail::decimal_point<Char>(loc) : static_cast<Char>('.');
+
+  int output_exp = f.exponent + significand_size - 1;
+  auto use_exp_format = [=]() {
+    if (fspecs.format == float_format::exp) return true;
+    if (fspecs.format != float_format::general) return false;
+    // Use the fixed notation if the exponent is in [exp_lower, exp_upper),
+    // e.g. 0.0001 instead of 1e-04. Otherwise use the exponent notation.
+    const int exp_lower = -4, exp_upper = 16;
+    return output_exp < exp_lower ||
+           output_exp >= (fspecs.precision > 0 ? fspecs.precision : exp_upper);
+  };
+  if (use_exp_format()) {
+    int num_zeros = 0;
+    if (fspecs.showpoint) {
+      num_zeros = fspecs.precision - significand_size;
+      if (num_zeros < 0) num_zeros = 0;
+      size += to_unsigned(num_zeros);
+    } else if (significand_size == 1) {
+      decimal_point = Char();
+    }
+    auto abs_output_exp = output_exp >= 0 ? output_exp : -output_exp;
+    int exp_digits = 2;
+    if (abs_output_exp >= 100) exp_digits = abs_output_exp >= 1000 ? 4 : 3;
+
+    size += to_unsigned((decimal_point ? 1 : 0) + 2 + exp_digits);
+    char exp_char = fspecs.upper ? 'E' : 'e';
+    auto write = [=](iterator it) {
+      if (sign) *it++ = detail::sign<Char>(sign);
+      // Insert a decimal point after the first digit and add an exponent.
+      it = write_significand(it, significand, significand_size, 1,
+                             decimal_point);
+      if (num_zeros > 0) it = detail::fill_n(it, num_zeros, zero);
+      *it++ = static_cast<Char>(exp_char);
+      return write_exponent<Char>(output_exp, it);
+    };
+    return specs.width > 0 ? write_padded<align::right>(out, specs, size, write)
+                           : base_iterator(out, write(reserve(out, size)));
+  }
+
+  int exp = f.exponent + significand_size;
+  if (f.exponent >= 0) {
+    // 1234e5 -> 123400000[.0+]
+    size += to_unsigned(f.exponent);
+    int num_zeros = fspecs.precision - exp;
+    abort_fuzzing_if(num_zeros > 5000);
+    if (fspecs.showpoint) {
+      ++size;
+      if (num_zeros <= 0 && fspecs.format != float_format::fixed) num_zeros = 1;
+      if (num_zeros > 0) size += to_unsigned(num_zeros);
+    }
+    auto grouping = Grouping(loc, fspecs.locale);
+    size += to_unsigned(grouping.count_separators(exp));
+    return write_padded<align::right>(out, specs, size, [&](iterator it) {
+      if (sign) *it++ = detail::sign<Char>(sign);
+      it = write_significand<Char>(it, significand, significand_size,
+                                   f.exponent, grouping);
+      if (!fspecs.showpoint) return it;
+      *it++ = decimal_point;
+      return num_zeros > 0 ? detail::fill_n(it, num_zeros, zero) : it;
+    });
+  } else if (exp > 0) {
+    // 1234e-2 -> 12.34[0+]
+    int num_zeros = fspecs.showpoint ? fspecs.precision - significand_size : 0;
+    size += 1 + to_unsigned(num_zeros > 0 ? num_zeros : 0);
+    auto grouping = Grouping(loc, fspecs.locale);
+    size += to_unsigned(grouping.count_separators(significand_size));
+    return write_padded<align::right>(out, specs, size, [&](iterator it) {
+      if (sign) *it++ = detail::sign<Char>(sign);
+      it = write_significand(it, significand, significand_size, exp,
+                             decimal_point, grouping);
+      return num_zeros > 0 ? detail::fill_n(it, num_zeros, zero) : it;
+    });
+  }
+  // 1234e-6 -> 0.001234
+  int num_zeros = -exp;
+  if (significand_size == 0 && fspecs.precision >= 0 &&
+      fspecs.precision < num_zeros) {
+    num_zeros = fspecs.precision;
+  }
+  bool pointy = num_zeros != 0 || significand_size != 0 || fspecs.showpoint;
+  size += 1 + (pointy ? 1 : 0) + to_unsigned(num_zeros);
+  return write_padded<align::right>(out, specs, size, [&](iterator it) {
+    if (sign) *it++ = detail::sign<Char>(sign);
+    *it++ = zero;
+    if (!pointy) return it;
+    *it++ = decimal_point;
+    it = detail::fill_n(it, num_zeros, zero);
+    return write_significand<Char>(it, significand, significand_size);
+  });
+}
+
+template <typename Char> class fallback_digit_grouping {
+ public:
+  constexpr fallback_digit_grouping(locale_ref, bool) {}
+
+  constexpr bool has_separator() const { return false; }
+
+  constexpr int count_separators(int) const { return 0; }
+
+  template <typename Out, typename C>
+  constexpr Out apply(Out out, basic_string_view<C>) const {
+    return out;
+  }
+};
+
+template <typename OutputIt, typename DecimalFP, typename Char>
+FMT_CONSTEXPR20 auto write_float(OutputIt out, const DecimalFP& f,
+                                 const basic_format_specs<Char>& specs,
+                                 float_specs fspecs, locale_ref loc)
+    -> OutputIt {
+  if (is_constant_evaluated()) {
+    return do_write_float<OutputIt, DecimalFP, Char,
+                          fallback_digit_grouping<Char>>(out, f, specs, fspecs,
+                                                         loc);
+  } else {
+    return do_write_float(out, f, specs, fspecs, loc);
+  }
+}
+
+template <typename T> constexpr bool isnan(T value) {
+  return !(value >= value);  // std::isnan doesn't support __float128.
+}
+
+template <typename T, typename Enable = void>
+struct has_isfinite : std::false_type {};
+
+template <typename T>
+struct has_isfinite<T, enable_if_t<sizeof(std::isfinite(T())) != 0>>
+    : std::true_type {};
+
+template <typename T, FMT_ENABLE_IF(std::is_floating_point<T>::value&&
+                                        has_isfinite<T>::value)>
+FMT_CONSTEXPR20 bool isfinite(T value) {
+  constexpr T inf = T(std::numeric_limits<double>::infinity());
+  if (is_constant_evaluated())
+    return !detail::isnan(value) && value < inf && value > -inf;
+  return std::isfinite(value);
+}
+template <typename T, FMT_ENABLE_IF(!has_isfinite<T>::value)>
+FMT_CONSTEXPR bool isfinite(T value) {
+  T inf = T(std::numeric_limits<double>::infinity());
+  // std::isfinite doesn't support __float128.
+  return !detail::isnan(value) && value < inf && value > -inf;
+}
+
+template <typename T, FMT_ENABLE_IF(is_floating_point<T>::value)>
+FMT_INLINE FMT_CONSTEXPR bool signbit(T value) {
+  if (is_constant_evaluated()) {
+#ifdef __cpp_if_constexpr
+    if constexpr (std::numeric_limits<double>::is_iec559) {
+      auto bits = detail::bit_cast<uint64_t>(static_cast<double>(value));
+      return (bits >> (num_bits<uint64_t>() - 1)) != 0;
+    }
+#endif
+  }
+  return std::signbit(static_cast<double>(value));
+}
+
+enum class round_direction { unknown, up, down };
+
+// Given the divisor (normally a power of 10), the remainder = v % divisor for
+// some number v and the error, returns whether v should be rounded up, down, or
+// whether the rounding direction can't be determined due to error.
+// error should be less than divisor / 2.
+FMT_CONSTEXPR inline round_direction get_round_direction(uint64_t divisor,
+                                                         uint64_t remainder,
+                                                         uint64_t error) {
+  FMT_ASSERT(remainder < divisor, "");  // divisor - remainder won't overflow.
+  FMT_ASSERT(error < divisor, "");      // divisor - error won't overflow.
+  FMT_ASSERT(error < divisor - error, "");  // error * 2 won't overflow.
+  // Round down if (remainder + error) * 2 <= divisor.
+  if (remainder <= divisor - remainder && error * 2 <= divisor - remainder * 2)
+    return round_direction::down;
+  // Round up if (remainder - error) * 2 >= divisor.
+  if (remainder >= error &&
+      remainder - error >= divisor - (remainder - error)) {
+    return round_direction::up;
+  }
+  return round_direction::unknown;
+}
+
+namespace digits {
+enum result {
+  more,  // Generate more digits.
+  done,  // Done generating digits.
+  error  // Digit generation cancelled due to an error.
+};
+}
+
+struct gen_digits_handler {
+  char* buf;
+  int size;
+  int precision;
+  int exp10;
+  bool fixed;
+
+  FMT_CONSTEXPR digits::result on_digit(char digit, uint64_t divisor,
+                                        uint64_t remainder, uint64_t error,
+                                        bool integral) {
+    FMT_ASSERT(remainder < divisor, "");
+    buf[size++] = digit;
+    if (!integral && error >= remainder) return digits::error;
+    if (size < precision) return digits::more;
+    if (!integral) {
+      // Check if error * 2 < divisor with overflow prevention.
+      // The check is not needed for the integral part because error = 1
+      // and divisor > (1 << 32) there.
+      if (error >= divisor || error >= divisor - error) return digits::error;
+    } else {
+      FMT_ASSERT(error == 1 && divisor > 2, "");
+    }
+    auto dir = get_round_direction(divisor, remainder, error);
+    if (dir != round_direction::up)
+      return dir == round_direction::down ? digits::done : digits::error;
+    ++buf[size - 1];
+    for (int i = size - 1; i > 0 && buf[i] > '9'; --i) {
+      buf[i] = '0';
+      ++buf[i - 1];
+    }
+    if (buf[0] > '9') {
+      buf[0] = '1';
+      if (fixed)
+        buf[size++] = '0';
+      else
+        ++exp10;
+    }
+    return digits::done;
+  }
+};
+
+inline FMT_CONSTEXPR20 void adjust_precision(int& precision, int exp10) {
+  // Adjust fixed precision by exponent because it is relative to decimal
+  // point.
+  if (exp10 > 0 && precision > max_value<int>() - exp10)
+    FMT_THROW(format_error("number is too big"));
+  precision += exp10;
+}
+
+// Generates output using the Grisu digit-gen algorithm.
+// error: the size of the region (lower, upper) outside of which numbers
+// definitely do not round to value (Delta in Grisu3).
+FMT_INLINE FMT_CONSTEXPR20 auto grisu_gen_digits(fp value, uint64_t error,
+                                                 int& exp,
+                                                 gen_digits_handler& handler)
+    -> digits::result {
+  const fp one(1ULL << -value.e, value.e);
+  // The integral part of scaled value (p1 in Grisu) = value / one. It cannot be
+  // zero because it contains a product of two 64-bit numbers with MSB set (due
+  // to normalization) - 1, shifted right by at most 60 bits.
+  auto integral = static_cast<uint32_t>(value.f >> -one.e);
+  FMT_ASSERT(integral != 0, "");
+  FMT_ASSERT(integral == value.f >> -one.e, "");
+  // The fractional part of scaled value (p2 in Grisu) c = value % one.
+  uint64_t fractional = value.f & (one.f - 1);
+  exp = count_digits(integral);  // kappa in Grisu.
+  // Non-fixed formats require at least one digit and no precision adjustment.
+  if (handler.fixed) {
+    adjust_precision(handler.precision, exp + handler.exp10);
+    // Check if precision is satisfied just by leading zeros, e.g.
+    // format("{:.2f}", 0.001) gives "0.00" without generating any digits.
+    if (handler.precision <= 0) {
+      if (handler.precision < 0) return digits::done;
+      // Divide by 10 to prevent overflow.
+      uint64_t divisor = data::power_of_10_64[exp - 1] << -one.e;
+      auto dir = get_round_direction(divisor, value.f / 10, error * 10);
+      if (dir == round_direction::unknown) return digits::error;
+      handler.buf[handler.size++] = dir == round_direction::up ? '1' : '0';
+      return digits::done;
+    }
+  }
+  // Generate digits for the integral part. This can produce up to 10 digits.
+  do {
+    uint32_t digit = 0;
+    auto divmod_integral = [&](uint32_t divisor) {
+      digit = integral / divisor;
+      integral %= divisor;
+    };
+    // This optimization by Milo Yip reduces the number of integer divisions by
+    // one per iteration.
+    switch (exp) {
+    case 10:
+      divmod_integral(1000000000);
+      break;
+    case 9:
+      divmod_integral(100000000);
+      break;
+    case 8:
+      divmod_integral(10000000);
+      break;
+    case 7:
+      divmod_integral(1000000);
+      break;
+    case 6:
+      divmod_integral(100000);
+      break;
+    case 5:
+      divmod_integral(10000);
+      break;
+    case 4:
+      divmod_integral(1000);
+      break;
+    case 3:
+      divmod_integral(100);
+      break;
+    case 2:
+      divmod_integral(10);
+      break;
+    case 1:
+      digit = integral;
+      integral = 0;
+      break;
+    default:
+      FMT_ASSERT(false, "invalid number of digits");
+    }
+    --exp;
+    auto remainder = (static_cast<uint64_t>(integral) << -one.e) + fractional;
+    auto result = handler.on_digit(static_cast<char>('0' + digit),
+                                   data::power_of_10_64[exp] << -one.e,
+                                   remainder, error, true);
+    if (result != digits::more) return result;
+  } while (exp > 0);
+  // Generate digits for the fractional part.
+  for (;;) {
+    fractional *= 10;
+    error *= 10;
+    char digit = static_cast<char>('0' + (fractional >> -one.e));
+    fractional &= one.f - 1;
+    --exp;
+    auto result = handler.on_digit(digit, one.f, fractional, error, false);
+    if (result != digits::more) return result;
+  }
+}
+
+class bigint {
+ private:
+  // A bigint is stored as an array of bigits (big digits), with bigit at index
+  // 0 being the least significant one.
+  using bigit = uint32_t;
+  using double_bigit = uint64_t;
+  enum { bigits_capacity = 32 };
+  basic_memory_buffer<bigit, bigits_capacity> bigits_;
+  int exp_;
+
+  FMT_CONSTEXPR20 bigit operator[](int index) const {
+    return bigits_[to_unsigned(index)];
+  }
+  FMT_CONSTEXPR20 bigit& operator[](int index) {
+    return bigits_[to_unsigned(index)];
+  }
+
+  static constexpr const int bigit_bits = num_bits<bigit>();
+
+  friend struct formatter<bigint>;
+
+  FMT_CONSTEXPR20 void subtract_bigits(int index, bigit other, bigit& borrow) {
+    auto result = static_cast<double_bigit>((*this)[index]) - other - borrow;
+    (*this)[index] = static_cast<bigit>(result);
+    borrow = static_cast<bigit>(result >> (bigit_bits * 2 - 1));
+  }
+
+  FMT_CONSTEXPR20 void remove_leading_zeros() {
+    int num_bigits = static_cast<int>(bigits_.size()) - 1;
+    while (num_bigits > 0 && (*this)[num_bigits] == 0) --num_bigits;
+    bigits_.resize(to_unsigned(num_bigits + 1));
+  }
+
+  // Computes *this -= other assuming aligned bigints and *this >= other.
+  FMT_CONSTEXPR20 void subtract_aligned(const bigint& other) {
+    FMT_ASSERT(other.exp_ >= exp_, "unaligned bigints");
+    FMT_ASSERT(compare(*this, other) >= 0, "");
+    bigit borrow = 0;
+    int i = other.exp_ - exp_;
+    for (size_t j = 0, n = other.bigits_.size(); j != n; ++i, ++j)
+      subtract_bigits(i, other.bigits_[j], borrow);
+    while (borrow > 0) subtract_bigits(i, 0, borrow);
+    remove_leading_zeros();
+  }
+
+  FMT_CONSTEXPR20 void multiply(uint32_t value) {
+    const double_bigit wide_value = value;
+    bigit carry = 0;
+    for (size_t i = 0, n = bigits_.size(); i < n; ++i) {
+      double_bigit result = bigits_[i] * wide_value + carry;
+      bigits_[i] = static_cast<bigit>(result);
+      carry = static_cast<bigit>(result >> bigit_bits);
+    }
+    if (carry != 0) bigits_.push_back(carry);
+  }
+
+  template <typename UInt, FMT_ENABLE_IF(std::is_same<UInt, uint64_t>::value ||
+                                         std::is_same<UInt, uint128_t>::value)>
+  FMT_CONSTEXPR20 void multiply(UInt value) {
+    using half_uint =
+        conditional_t<std::is_same<UInt, uint128_t>::value, uint64_t, uint32_t>;
+    const int shift = num_bits<half_uint>() - bigit_bits;
+    const UInt lower = static_cast<half_uint>(value);
+    const UInt upper = value >> num_bits<half_uint>();
+    UInt carry = 0;
+    for (size_t i = 0, n = bigits_.size(); i < n; ++i) {
+      UInt result = lower * bigits_[i] + static_cast<bigit>(carry);
+      carry = (upper * bigits_[i] << shift) + (result >> bigit_bits) +
+              (carry >> bigit_bits);
+      bigits_[i] = static_cast<bigit>(result);
+    }
+    while (carry != 0) {
+      bigits_.push_back(static_cast<bigit>(carry));
+      carry >>= bigit_bits;
+    }
+  }
+
+  template <typename UInt, FMT_ENABLE_IF(std::is_same<UInt, uint64_t>::value ||
+                                         std::is_same<UInt, uint128_t>::value)>
+  FMT_CONSTEXPR20 void assign(UInt n) {
+    size_t num_bigits = 0;
+    do {
+      bigits_[num_bigits++] = static_cast<bigit>(n);
+      n >>= bigit_bits;
+    } while (n != 0);
+    bigits_.resize(num_bigits);
+    exp_ = 0;
+  }
+
+ public:
+  FMT_CONSTEXPR20 bigint() : exp_(0) {}
+  explicit bigint(uint64_t n) { assign(n); }
+
+  bigint(const bigint&) = delete;
+  void operator=(const bigint&) = delete;
+
+  FMT_CONSTEXPR20 void assign(const bigint& other) {
+    auto size = other.bigits_.size();
+    bigits_.resize(size);
+    auto data = other.bigits_.data();
+    std::copy(data, data + size, make_checked(bigits_.data(), size));
+    exp_ = other.exp_;
+  }
+
+  template <typename Int> FMT_CONSTEXPR20 void operator=(Int n) {
+    FMT_ASSERT(n > 0, "");
+    assign(uint64_or_128_t<Int>(n));
+  }
+
+  FMT_CONSTEXPR20 int num_bigits() const {
+    return static_cast<int>(bigits_.size()) + exp_;
+  }
+
+  FMT_NOINLINE FMT_CONSTEXPR20 bigint& operator<<=(int shift) {
+    FMT_ASSERT(shift >= 0, "");
+    exp_ += shift / bigit_bits;
+    shift %= bigit_bits;
+    if (shift == 0) return *this;
+    bigit carry = 0;
+    for (size_t i = 0, n = bigits_.size(); i < n; ++i) {
+      bigit c = bigits_[i] >> (bigit_bits - shift);
+      bigits_[i] = (bigits_[i] << shift) + carry;
+      carry = c;
+    }
+    if (carry != 0) bigits_.push_back(carry);
+    return *this;
+  }
+
+  template <typename Int> FMT_CONSTEXPR20 bigint& operator*=(Int value) {
+    FMT_ASSERT(value > 0, "");
+    multiply(uint32_or_64_or_128_t<Int>(value));
+    return *this;
+  }
+
+  friend FMT_CONSTEXPR20 int compare(const bigint& lhs, const bigint& rhs) {
+    int num_lhs_bigits = lhs.num_bigits(), num_rhs_bigits = rhs.num_bigits();
+    if (num_lhs_bigits != num_rhs_bigits)
+      return num_lhs_bigits > num_rhs_bigits ? 1 : -1;
+    int i = static_cast<int>(lhs.bigits_.size()) - 1;
+    int j = static_cast<int>(rhs.bigits_.size()) - 1;
+    int end = i - j;
+    if (end < 0) end = 0;
+    for (; i >= end; --i, --j) {
+      bigit lhs_bigit = lhs[i], rhs_bigit = rhs[j];
+      if (lhs_bigit != rhs_bigit) return lhs_bigit > rhs_bigit ? 1 : -1;
+    }
+    if (i != j) return i > j ? 1 : -1;
+    return 0;
+  }
+
+  // Returns compare(lhs1 + lhs2, rhs).
+  friend FMT_CONSTEXPR20 int add_compare(const bigint& lhs1, const bigint& lhs2,
+                                         const bigint& rhs) {
+    auto minimum = [](int a, int b) { return a < b ? a : b; };
+    auto maximum = [](int a, int b) { return a > b ? a : b; };
+    int max_lhs_bigits = maximum(lhs1.num_bigits(), lhs2.num_bigits());
+    int num_rhs_bigits = rhs.num_bigits();
+    if (max_lhs_bigits + 1 < num_rhs_bigits) return -1;
+    if (max_lhs_bigits > num_rhs_bigits) return 1;
+    auto get_bigit = [](const bigint& n, int i) -> bigit {
+      return i >= n.exp_ && i < n.num_bigits() ? n[i - n.exp_] : 0;
+    };
+    double_bigit borrow = 0;
+    int min_exp = minimum(minimum(lhs1.exp_, lhs2.exp_), rhs.exp_);
+    for (int i = num_rhs_bigits - 1; i >= min_exp; --i) {
+      double_bigit sum =
+          static_cast<double_bigit>(get_bigit(lhs1, i)) + get_bigit(lhs2, i);
+      bigit rhs_bigit = get_bigit(rhs, i);
+      if (sum > rhs_bigit + borrow) return 1;
+      borrow = rhs_bigit + borrow - sum;
+      if (borrow > 1) return -1;
+      borrow <<= bigit_bits;
+    }
+    return borrow != 0 ? -1 : 0;
+  }
+
+  // Assigns pow(10, exp) to this bigint.
+  FMT_CONSTEXPR20 void assign_pow10(int exp) {
+    FMT_ASSERT(exp >= 0, "");
+    if (exp == 0) return *this = 1;
+    // Find the top bit.
+    int bitmask = 1;
+    while (exp >= bitmask) bitmask <<= 1;
+    bitmask >>= 1;
+    // pow(10, exp) = pow(5, exp) * pow(2, exp). First compute pow(5, exp) by
+    // repeated squaring and multiplication.
+    *this = 5;
+    bitmask >>= 1;
+    while (bitmask != 0) {
+      square();
+      if ((exp & bitmask) != 0) *this *= 5;
+      bitmask >>= 1;
+    }
+    *this <<= exp;  // Multiply by pow(2, exp) by shifting.
+  }
+
+  FMT_CONSTEXPR20 void square() {
+    int num_bigits = static_cast<int>(bigits_.size());
+    int num_result_bigits = 2 * num_bigits;
+    basic_memory_buffer<bigit, bigits_capacity> n(std::move(bigits_));
+    bigits_.resize(to_unsigned(num_result_bigits));
+    auto sum = uint128_t();
+    for (int bigit_index = 0; bigit_index < num_bigits; ++bigit_index) {
+      // Compute bigit at position bigit_index of the result by adding
+      // cross-product terms n[i] * n[j] such that i + j == bigit_index.
+      for (int i = 0, j = bigit_index; j >= 0; ++i, --j) {
+        // Most terms are multiplied twice which can be optimized in the future.
+        sum += static_cast<double_bigit>(n[i]) * n[j];
+      }
+      (*this)[bigit_index] = static_cast<bigit>(sum);
+      sum >>= num_bits<bigit>();  // Compute the carry.
+    }
+    // Do the same for the top half.
+    for (int bigit_index = num_bigits; bigit_index < num_result_bigits;
+         ++bigit_index) {
+      for (int j = num_bigits - 1, i = bigit_index - j; i < num_bigits;)
+        sum += static_cast<double_bigit>(n[i++]) * n[j--];
+      (*this)[bigit_index] = static_cast<bigit>(sum);
+      sum >>= num_bits<bigit>();
+    }
+    remove_leading_zeros();
+    exp_ *= 2;
+  }
+
+  // If this bigint has a bigger exponent than other, adds trailing zero to make
+  // exponents equal. This simplifies some operations such as subtraction.
+  FMT_CONSTEXPR20 void align(const bigint& other) {
+    int exp_difference = exp_ - other.exp_;
+    if (exp_difference <= 0) return;
+    int num_bigits = static_cast<int>(bigits_.size());
+    bigits_.resize(to_unsigned(num_bigits + exp_difference));
+    for (int i = num_bigits - 1, j = i + exp_difference; i >= 0; --i, --j)
+      bigits_[j] = bigits_[i];
+    std::uninitialized_fill_n(bigits_.data(), exp_difference, 0);
+    exp_ -= exp_difference;
+  }
+
+  // Divides this bignum by divisor, assigning the remainder to this and
+  // returning the quotient.
+  FMT_CONSTEXPR20 int divmod_assign(const bigint& divisor) {
+    FMT_ASSERT(this != &divisor, "");
+    if (compare(*this, divisor) < 0) return 0;
+    FMT_ASSERT(divisor.bigits_[divisor.bigits_.size() - 1u] != 0, "");
+    align(divisor);
+    int quotient = 0;
+    do {
+      subtract_aligned(divisor);
+      ++quotient;
+    } while (compare(*this, divisor) >= 0);
+    return quotient;
+  }
+};
+
+// format_dragon flags.
+enum dragon {
+  predecessor_closer = 1,
+  fixup = 2,  // Run fixup to correct exp10 which can be off by one.
+  fixed = 4,
+};
+
+// Formats a floating-point number using a variation of the Fixed-Precision
+// Positive Floating-Point Printout ((FPP)^2) algorithm by Steele & White:
+// https://fmt.dev/papers/p372-steele.pdf.
+FMT_CONSTEXPR20 inline void format_dragon(basic_fp<uint128_t> value,
+                                          unsigned flags, int num_digits,
+                                          buffer<char>& buf, int& exp10) {
+  bigint numerator;    // 2 * R in (FPP)^2.
+  bigint denominator;  // 2 * S in (FPP)^2.
+  // lower and upper are differences between value and corresponding boundaries.
+  bigint lower;             // (M^- in (FPP)^2).
+  bigint upper_store;       // upper's value if different from lower.
+  bigint* upper = nullptr;  // (M^+ in (FPP)^2).
+  // Shift numerator and denominator by an extra bit or two (if lower boundary
+  // is closer) to make lower and upper integers. This eliminates multiplication
+  // by 2 during later computations.
+  bool is_predecessor_closer = (flags & dragon::predecessor_closer) != 0;
+  int shift = is_predecessor_closer ? 2 : 1;
+  if (value.e >= 0) {
+    numerator = value.f;
+    numerator <<= value.e + shift;
+    lower = 1;
+    lower <<= value.e;
+    if (is_predecessor_closer) {
+      upper_store = 1;
+      upper_store <<= value.e + 1;
+      upper = &upper_store;
+    }
+    denominator.assign_pow10(exp10);
+    denominator <<= shift;
+  } else if (exp10 < 0) {
+    numerator.assign_pow10(-exp10);
+    lower.assign(numerator);
+    if (is_predecessor_closer) {
+      upper_store.assign(numerator);
+      upper_store <<= 1;
+      upper = &upper_store;
+    }
+    numerator *= value.f;
+    numerator <<= shift;
+    denominator = 1;
+    denominator <<= shift - value.e;
+  } else {
+    numerator = value.f;
+    numerator <<= shift;
+    denominator.assign_pow10(exp10);
+    denominator <<= shift - value.e;
+    lower = 1;
+    if (is_predecessor_closer) {
+      upper_store = 1ULL << 1;
+      upper = &upper_store;
+    }
+  }
+  int even = static_cast<int>((value.f & 1) == 0);
+  if (!upper) upper = &lower;
+  if ((flags & dragon::fixup) != 0) {
+    if (add_compare(numerator, *upper, denominator) + even <= 0) {
+      --exp10;
+      numerator *= 10;
+      if (num_digits < 0) {
+        lower *= 10;
+        if (upper != &lower) *upper *= 10;
+      }
+    }
+    if ((flags & dragon::fixed) != 0) adjust_precision(num_digits, exp10 + 1);
+  }
+  // Invariant: value == (numerator / denominator) * pow(10, exp10).
+  if (num_digits < 0) {
+    // Generate the shortest representation.
+    num_digits = 0;
+    char* data = buf.data();
+    for (;;) {
+      int digit = numerator.divmod_assign(denominator);
+      bool low = compare(numerator, lower) - even < 0;  // numerator <[=] lower.
+      // numerator + upper >[=] pow10:
+      bool high = add_compare(numerator, *upper, denominator) + even > 0;
+      data[num_digits++] = static_cast<char>('0' + digit);
+      if (low || high) {
+        if (!low) {
+          ++data[num_digits - 1];
+        } else if (high) {
+          int result = add_compare(numerator, numerator, denominator);
+          // Round half to even.
+          if (result > 0 || (result == 0 && (digit % 2) != 0))
+            ++data[num_digits - 1];
+        }
+        buf.try_resize(to_unsigned(num_digits));
+        exp10 -= num_digits - 1;
+        return;
+      }
+      numerator *= 10;
+      lower *= 10;
+      if (upper != &lower) *upper *= 10;
+    }
+  }
+  // Generate the given number of digits.
+  exp10 -= num_digits - 1;
+  if (num_digits == 0) {
+    denominator *= 10;
+    auto digit = add_compare(numerator, numerator, denominator) > 0 ? '1' : '0';
+    buf.push_back(digit);
+    return;
+  }
+  buf.try_resize(to_unsigned(num_digits));
+  for (int i = 0; i < num_digits - 1; ++i) {
+    int digit = numerator.divmod_assign(denominator);
+    buf[i] = static_cast<char>('0' + digit);
+    numerator *= 10;
+  }
+  int digit = numerator.divmod_assign(denominator);
+  auto result = add_compare(numerator, numerator, denominator);
+  if (result > 0 || (result == 0 && (digit % 2) != 0)) {
+    if (digit == 9) {
+      const auto overflow = '0' + 10;
+      buf[num_digits - 1] = overflow;
+      // Propagate the carry.
+      for (int i = num_digits - 1; i > 0 && buf[i] == overflow; --i) {
+        buf[i] = '0';
+        ++buf[i - 1];
+      }
+      if (buf[0] == overflow) {
+        buf[0] = '1';
+        ++exp10;
+      }
+      return;
+    }
+    ++digit;
+  }
+  buf[num_digits - 1] = static_cast<char>('0' + digit);
+}
+
+template <typename Float>
+FMT_CONSTEXPR20 auto format_float(Float value, int precision, float_specs specs,
+                                  buffer<char>& buf) -> int {
+  // float is passed as double to reduce the number of instantiations.
+  static_assert(!std::is_same<Float, float>::value, "");
+  FMT_ASSERT(value >= 0, "value is negative");
+  auto converted_value = convert_float(value);
+
+  const bool fixed = specs.format == float_format::fixed;
+  if (value <= 0) {  // <= instead of == to silence a warning.
+    if (precision <= 0 || !fixed) {
+      buf.push_back('0');
+      return 0;
+    }
+    buf.try_resize(to_unsigned(precision));
+    fill_n(buf.data(), precision, '0');
+    return -precision;
+  }
+
+  int exp = 0;
+  bool use_dragon = true;
+  unsigned dragon_flags = 0;
+  if (!is_fast_float<Float>()) {
+    const auto inv_log2_10 = 0.3010299956639812;  // 1 / log2(10)
+    using info = dragonbox::float_info<decltype(converted_value)>;
+    const auto f = basic_fp<typename info::carrier_uint>(converted_value);
+    // Compute exp, an approximate power of 10, such that
+    //   10^(exp - 1) <= value < 10^exp or 10^exp <= value < 10^(exp + 1).
+    // This is based on log10(value) == log2(value) / log2(10) and approximation
+    // of log2(value) by e + num_fraction_bits idea from double-conversion.
+    exp = static_cast<int>(
+        std::ceil((f.e + count_digits<1>(f.f) - 1) * inv_log2_10 - 1e-10));
+    dragon_flags = dragon::fixup;
+  } else if (!is_constant_evaluated() && precision < 0) {
+    // Use Dragonbox for the shortest format.
+    if (specs.binary32) {
+      auto dec = dragonbox::to_decimal(static_cast<float>(value));
+      write<char>(buffer_appender<char>(buf), dec.significand);
+      return dec.exponent;
+    }
+    auto dec = dragonbox::to_decimal(static_cast<double>(value));
+    write<char>(buffer_appender<char>(buf), dec.significand);
+    return dec.exponent;
+  } else {
+    // Use Grisu + Dragon4 for the given precision:
+    // https://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf.
+    const int min_exp = -60;  // alpha in Grisu.
+    int cached_exp10 = 0;     // K in Grisu.
+    fp normalized = normalize(fp(converted_value));
+    const auto cached_pow = get_cached_power(
+        min_exp - (normalized.e + fp::num_significand_bits), cached_exp10);
+    normalized = normalized * cached_pow;
+    gen_digits_handler handler{buf.data(), 0, precision, -cached_exp10, fixed};
+    if (grisu_gen_digits(normalized, 1, exp, handler) != digits::error &&
+        !is_constant_evaluated()) {
+      exp += handler.exp10;
+      buf.try_resize(to_unsigned(handler.size));
+      use_dragon = false;
+    } else {
+      exp += handler.size - cached_exp10 - 1;
+      precision = handler.precision;
+    }
+  }
+  if (use_dragon) {
+    auto f = basic_fp<uint128_t>();
+    bool is_predecessor_closer = specs.binary32
+                                     ? f.assign(static_cast<float>(value))
+                                     : f.assign(converted_value);
+    if (is_predecessor_closer) dragon_flags |= dragon::predecessor_closer;
+    if (fixed) dragon_flags |= dragon::fixed;
+    // Limit precision to the maximum possible number of significant digits in
+    // an IEEE754 double because we don't need to generate zeros.
+    const int max_double_digits = 767;
+    if (precision > max_double_digits) precision = max_double_digits;
+    format_dragon(f, dragon_flags, precision, buf, exp);
+  }
+  if (!fixed && !specs.showpoint) {
+    // Remove trailing zeros.
+    auto num_digits = buf.size();
+    while (num_digits > 0 && buf[num_digits - 1] == '0') {
+      --num_digits;
+      ++exp;
+    }
+    buf.try_resize(num_digits);
+  }
+  return exp;
+}
+template <typename Char, typename OutputIt, typename T>
+FMT_CONSTEXPR20 auto write_float(OutputIt out, T value,
+                                 basic_format_specs<Char> specs, locale_ref loc)
+    -> OutputIt {
+  float_specs fspecs = parse_float_type_spec(specs);
+  fspecs.sign = specs.sign;
+  if (detail::signbit(value)) {  // value < 0 is false for NaN so use signbit.
+    fspecs.sign = sign::minus;
+    value = -value;
+  } else if (fspecs.sign == sign::minus) {
+    fspecs.sign = sign::none;
+  }
+
+  if (!detail::isfinite(value))
+    return write_nonfinite(out, detail::isnan(value), specs, fspecs);
+
+  if (specs.align == align::numeric && fspecs.sign) {
+    auto it = reserve(out, 1);
+    *it++ = detail::sign<Char>(fspecs.sign);
+    out = base_iterator(out, it);
+    fspecs.sign = sign::none;
+    if (specs.width != 0) --specs.width;
+  }
+
+  memory_buffer buffer;
+  if (fspecs.format == float_format::hex) {
+    if (fspecs.sign) buffer.push_back(detail::sign<char>(fspecs.sign));
+    snprintf_float(convert_float(value), specs.precision, fspecs, buffer);
+    return write_bytes<align::right>(out, {buffer.data(), buffer.size()},
+                                     specs);
+  }
+  int precision = specs.precision >= 0 || specs.type == presentation_type::none
+                      ? specs.precision
+                      : 6;
+  if (fspecs.format == float_format::exp) {
+    if (precision == max_value<int>())
+      throw_format_error("number is too big");
+    else
+      ++precision;
+  } else if (fspecs.format != float_format::fixed && precision == 0) {
+    precision = 1;
+  }
+  if (const_check(std::is_same<T, float>())) fspecs.binary32 = true;
+  int exp = format_float(convert_float(value), precision, fspecs, buffer);
+  fspecs.precision = precision;
+  auto f = big_decimal_fp{buffer.data(), static_cast<int>(buffer.size()), exp};
+  return write_float(out, f, specs, fspecs, loc);
+}
+
+template <typename Char, typename OutputIt, typename T,
+          FMT_ENABLE_IF(is_floating_point<T>::value)>
+FMT_CONSTEXPR20 auto write(OutputIt out, T value,
+                           basic_format_specs<Char> specs, locale_ref loc = {})
+    -> OutputIt {
+  if (const_check(!is_supported_floating_point(value))) return out;
+  return specs.localized && write_loc(out, value, specs, loc)
+             ? out
+             : write_float(out, value, specs, loc);
+}
+
+template <typename Char, typename OutputIt, typename T,
+          FMT_ENABLE_IF(is_fast_float<T>::value)>
+FMT_CONSTEXPR20 auto write(OutputIt out, T value) -> OutputIt {
+  if (is_constant_evaluated())
+    return write(out, value, basic_format_specs<Char>());
+  if (const_check(!is_supported_floating_point(value))) return out;
+
+  auto fspecs = float_specs();
+  if (detail::signbit(value)) {
+    fspecs.sign = sign::minus;
+    value = -value;
+  }
+
+  constexpr auto specs = basic_format_specs<Char>();
+  using floaty = conditional_t<std::is_same<T, long double>::value, double, T>;
+  using uint = typename dragonbox::float_info<floaty>::carrier_uint;
+  uint mask = exponent_mask<floaty>();
+  if ((bit_cast<uint>(value) & mask) == mask)
+    return write_nonfinite(out, std::isnan(value), specs, fspecs);
+
+  auto dec = dragonbox::to_decimal(static_cast<floaty>(value));
+  return write_float(out, dec, specs, fspecs, {});
+}
+
+template <typename Char, typename OutputIt, typename T,
+          FMT_ENABLE_IF(is_floating_point<T>::value &&
+                        !is_fast_float<T>::value)>
+inline auto write(OutputIt out, T value) -> OutputIt {
+  return write(out, value, basic_format_specs<Char>());
+}
+
+template <typename Char, typename OutputIt>
+auto write(OutputIt out, monostate, basic_format_specs<Char> = {},
+           locale_ref = {}) -> OutputIt {
+  FMT_ASSERT(false, "");
+  return out;
+}
+
+template <typename Char, typename OutputIt>
+FMT_CONSTEXPR auto write(OutputIt out, basic_string_view<Char> value)
+    -> OutputIt {
+  auto it = reserve(out, value.size());
+  it = copy_str_noinline<Char>(value.begin(), value.end(), it);
+  return base_iterator(out, it);
+}
+
+template <typename Char, typename OutputIt, typename T,
+          FMT_ENABLE_IF(is_string<T>::value)>
+constexpr auto write(OutputIt out, const T& value) -> OutputIt {
+  return write<Char>(out, to_string_view(value));
+}
+
+// FMT_ENABLE_IF() condition separated to workaround an MSVC bug.
+template <
+    typename Char, typename OutputIt, typename T,
+    bool check =
+        std::is_enum<T>::value && !std::is_same<T, Char>::value &&
+        mapped_type_constant<T, basic_format_context<OutputIt, Char>>::value !=
+            type::custom_type,
+    FMT_ENABLE_IF(check)>
+FMT_CONSTEXPR auto write(OutputIt out, T value) -> OutputIt {
+  return write<Char>(out, static_cast<underlying_t<T>>(value));
+}
+
+template <typename Char, typename OutputIt, typename T,
+          FMT_ENABLE_IF(std::is_same<T, bool>::value)>
+FMT_CONSTEXPR auto write(OutputIt out, T value,
+                         const basic_format_specs<Char>& specs = {},
+                         locale_ref = {}) -> OutputIt {
+  return specs.type != presentation_type::none &&
+                 specs.type != presentation_type::string
+             ? write(out, value ? 1 : 0, specs, {})
+             : write_bytes(out, value ? "true" : "false", specs);
+}
+
+template <typename Char, typename OutputIt>
+FMT_CONSTEXPR auto write(OutputIt out, Char value) -> OutputIt {
+  auto it = reserve(out, 1);
+  *it++ = value;
+  return base_iterator(out, it);
+}
+
+template <typename Char, typename OutputIt>
+FMT_CONSTEXPR_CHAR_TRAITS auto write(OutputIt out, const Char* value)
+    -> OutputIt {
+  if (!value) {
+    throw_format_error("string pointer is null");
+  } else {
+    out = write(out, basic_string_view<Char>(value));
+  }
+  return out;
+}
+
+template <typename Char, typename OutputIt, typename T,
+          FMT_ENABLE_IF(std::is_same<T, void>::value)>
+auto write(OutputIt out, const T* value,
+           const basic_format_specs<Char>& specs = {}, locale_ref = {})
+    -> OutputIt {
+  check_pointer_type_spec(specs.type, error_handler());
+  return write_ptr<Char>(out, bit_cast<uintptr_t>(value), &specs);
+}
+
+// A write overload that handles implicit conversions.
+template <typename Char, typename OutputIt, typename T,
+          typename Context = basic_format_context<OutputIt, Char>>
+FMT_CONSTEXPR auto write(OutputIt out, const T& value) -> enable_if_t<
+    std::is_class<T>::value && !is_string<T>::value &&
+        !is_floating_point<T>::value && !std::is_same<T, Char>::value &&
+        !std::is_same<const T&,
+                      decltype(arg_mapper<Context>().map(value))>::value,
+    OutputIt> {
+  return write<Char>(out, arg_mapper<Context>().map(value));
+}
+
+template <typename Char, typename OutputIt, typename T,
+          typename Context = basic_format_context<OutputIt, Char>>
+FMT_CONSTEXPR auto write(OutputIt out, const T& value)
+    -> enable_if_t<mapped_type_constant<T, Context>::value == type::custom_type,
+                   OutputIt> {
+  using formatter_type =
+      conditional_t<has_formatter<T, Context>::value,
+                    typename Context::template formatter_type<T>,
+                    fallback_formatter<T, Char>>;
+  auto ctx = Context(out, {}, {});
+  return formatter_type().format(value, ctx);
+}
+
+// An argument visitor that formats the argument and writes it via the output
+// iterator. It's a class and not a generic lambda for compatibility with C++11.
+template <typename Char> struct default_arg_formatter {
+  using iterator = buffer_appender<Char>;
+  using context = buffer_context<Char>;
+
+  iterator out;
+  basic_format_args<context> args;
+  locale_ref loc;
+
+  template <typename T> auto operator()(T value) -> iterator {
+    return write<Char>(out, value);
+  }
+  auto operator()(typename basic_format_arg<context>::handle h) -> iterator {
+    basic_format_parse_context<Char> parse_ctx({});
+    context format_ctx(out, args, loc);
+    h.format(parse_ctx, format_ctx);
+    return format_ctx.out();
+  }
+};
+
+template <typename Char> struct arg_formatter {
+  using iterator = buffer_appender<Char>;
+  using context = buffer_context<Char>;
+
+  iterator out;
+  const basic_format_specs<Char>& specs;
+  locale_ref locale;
+
+  template <typename T>
+  FMT_CONSTEXPR FMT_INLINE auto operator()(T value) -> iterator {
+    return detail::write(out, value, specs, locale);
+  }
+  auto operator()(typename basic_format_arg<context>::handle) -> iterator {
+    // User-defined types are handled separately because they require access
+    // to the parse context.
+    return out;
+  }
+};
+
+template <typename Char> struct custom_formatter {
+  basic_format_parse_context<Char>& parse_ctx;
+  buffer_context<Char>& ctx;
+
+  void operator()(
+      typename basic_format_arg<buffer_context<Char>>::handle h) const {
+    h.format(parse_ctx, ctx);
+  }
+  template <typename T> void operator()(T) const {}
+};
+
+template <typename ErrorHandler> class width_checker {
+ public:
+  explicit FMT_CONSTEXPR width_checker(ErrorHandler& eh) : handler_(eh) {}
+
+  template <typename T, FMT_ENABLE_IF(is_integer<T>::value)>
+  FMT_CONSTEXPR auto operator()(T value) -> unsigned long long {
+    if (is_negative(value)) handler_.on_error("negative width");
+    return static_cast<unsigned long long>(value);
+  }
+
+  template <typename T, FMT_ENABLE_IF(!is_integer<T>::value)>
+  FMT_CONSTEXPR auto operator()(T) -> unsigned long long {
+    handler_.on_error("width is not integer");
+    return 0;
+  }
+
+ private:
+  ErrorHandler& handler_;
+};
+
+template <typename ErrorHandler> class precision_checker {
+ public:
+  explicit FMT_CONSTEXPR precision_checker(ErrorHandler& eh) : handler_(eh) {}
+
+  template <typename T, FMT_ENABLE_IF(is_integer<T>::value)>
+  FMT_CONSTEXPR auto operator()(T value) -> unsigned long long {
+    if (is_negative(value)) handler_.on_error("negative precision");
+    return static_cast<unsigned long long>(value);
+  }
+
+  template <typename T, FMT_ENABLE_IF(!is_integer<T>::value)>
+  FMT_CONSTEXPR auto operator()(T) -> unsigned long long {
+    handler_.on_error("precision is not integer");
+    return 0;
+  }
+
+ private:
+  ErrorHandler& handler_;
+};
+
+template <template <typename> class Handler, typename FormatArg,
+          typename ErrorHandler>
+FMT_CONSTEXPR auto get_dynamic_spec(FormatArg arg, ErrorHandler eh) -> int {
+  unsigned long long value = visit_format_arg(Handler<ErrorHandler>(eh), arg);
+  if (value > to_unsigned(max_value<int>())) eh.on_error("number is too big");
+  return static_cast<int>(value);
+}
+
+template <typename Context, typename ID>
+FMT_CONSTEXPR auto get_arg(Context& ctx, ID id) ->
+    typename Context::format_arg {
+  auto arg = ctx.arg(id);
+  if (!arg) ctx.on_error("argument not found");
+  return arg;
+}
+
+// The standard format specifier handler with checking.
+template <typename Char> class specs_handler : public specs_setter<Char> {
+ private:
+  basic_format_parse_context<Char>& parse_context_;
+  buffer_context<Char>& context_;
+
+  // This is only needed for compatibility with gcc 4.4.
+  using format_arg = basic_format_arg<buffer_context<Char>>;
+
+  FMT_CONSTEXPR auto get_arg(auto_id) -> format_arg {
+    return detail::get_arg(context_, parse_context_.next_arg_id());
+  }
+
+  FMT_CONSTEXPR auto get_arg(int arg_id) -> format_arg {
+    parse_context_.check_arg_id(arg_id);
+    return detail::get_arg(context_, arg_id);
+  }
+
+  FMT_CONSTEXPR auto get_arg(basic_string_view<Char> arg_id) -> format_arg {
+    parse_context_.check_arg_id(arg_id);
+    return detail::get_arg(context_, arg_id);
+  }
+
+ public:
+  FMT_CONSTEXPR specs_handler(basic_format_specs<Char>& specs,
+                              basic_format_parse_context<Char>& parse_ctx,
+                              buffer_context<Char>& ctx)
+      : specs_setter<Char>(specs), parse_context_(parse_ctx), context_(ctx) {}
+
+  template <typename Id> FMT_CONSTEXPR void on_dynamic_width(Id arg_id) {
+    this->specs_.width = get_dynamic_spec<width_checker>(
+        get_arg(arg_id), context_.error_handler());
+  }
+
+  template <typename Id> FMT_CONSTEXPR void on_dynamic_precision(Id arg_id) {
+    this->specs_.precision = get_dynamic_spec<precision_checker>(
+        get_arg(arg_id), context_.error_handler());
+  }
+
+  void on_error(const char* message) { context_.on_error(message); }
+};
+
+template <template <typename> class Handler, typename Context>
+FMT_CONSTEXPR void handle_dynamic_spec(int& value,
+                                       arg_ref<typename Context::char_type> ref,
+                                       Context& ctx) {
+  switch (ref.kind) {
+  case arg_id_kind::none:
+    break;
+  case arg_id_kind::index:
+    value = detail::get_dynamic_spec<Handler>(ctx.arg(ref.val.index),
+                                              ctx.error_handler());
+    break;
+  case arg_id_kind::name:
+    value = detail::get_dynamic_spec<Handler>(ctx.arg(ref.val.name),
+                                              ctx.error_handler());
+    break;
+  }
+}
+
+#if FMT_USE_USER_DEFINED_LITERALS
+template <typename Char> struct udl_formatter {
+  basic_string_view<Char> str;
+
+  template <typename... T>
+  auto operator()(T&&... args) const -> std::basic_string<Char> {
+    return vformat(str, fmt::make_format_args<buffer_context<Char>>(args...));
+  }
+};
+
+#  if FMT_USE_NONTYPE_TEMPLATE_ARGS
+template <typename T, typename Char, size_t N,
+          fmt::detail_exported::fixed_string<Char, N> Str>
+struct statically_named_arg : view {
+  static constexpr auto name = Str.data;
+
+  const T& value;
+  statically_named_arg(const T& v) : value(v) {}
+};
+
+template <typename T, typename Char, size_t N,
+          fmt::detail_exported::fixed_string<Char, N> Str>
+struct is_named_arg<statically_named_arg<T, Char, N, Str>> : std::true_type {};
+
+template <typename T, typename Char, size_t N,
+          fmt::detail_exported::fixed_string<Char, N> Str>
+struct is_statically_named_arg<statically_named_arg<T, Char, N, Str>>
+    : std::true_type {};
+
+template <typename Char, size_t N,
+          fmt::detail_exported::fixed_string<Char, N> Str>
+struct udl_arg {
+  template <typename T> auto operator=(T&& value) const {
+    return statically_named_arg<T, Char, N, Str>(std::forward<T>(value));
+  }
+};
+#  else
+template <typename Char> struct udl_arg {
+  const Char* str;
+
+  template <typename T> auto operator=(T&& value) const -> named_arg<Char, T> {
+    return {str, std::forward<T>(value)};
+  }
+};
+#  endif
+#endif  // FMT_USE_USER_DEFINED_LITERALS
+
+template <typename Locale, typename Char>
+auto vformat(const Locale& loc, basic_string_view<Char> format_str,
+             basic_format_args<buffer_context<type_identity_t<Char>>> args)
+    -> std::basic_string<Char> {
+  basic_memory_buffer<Char> buffer;
+  detail::vformat_to(buffer, format_str, args, detail::locale_ref(loc));
+  return {buffer.data(), buffer.size()};
+}
+
+using format_func = void (*)(detail::buffer<char>&, int, const char*);
+
+FMT_API void format_error_code(buffer<char>& out, int error_code,
+                               string_view message) noexcept;
+
+FMT_API void report_error(format_func func, int error_code,
+                          const char* message) noexcept;
+FMT_END_DETAIL_NAMESPACE
+
+FMT_API auto vsystem_error(int error_code, string_view format_str,
+                           format_args args) -> std::system_error;
+
+/**
+ \rst
+ Constructs :class:`std::system_error` with a message formatted with
+ ``fmt::format(fmt, args...)``.
+  *error_code* is a system error code as given by ``errno``.
+
+ **Example**::
+
+   // This throws std::system_error with the description
+   //   cannot open file 'madeup': No such file or directory
+   // or similar (system message may vary).
+   const char* filename = "madeup";
+   std::FILE* file = std::fopen(filename, "r");
+   if (!file)
+     throw fmt::system_error(errno, "cannot open file '{}'", filename);
+ \endrst
+*/
+template <typename... T>
+auto system_error(int error_code, format_string<T...> fmt, T&&... args)
+    -> std::system_error {
+  return vsystem_error(error_code, fmt, fmt::make_format_args(args...));
+}
+
+/**
+  \rst
+  Formats an error message for an error returned by an operating system or a
+  language runtime, for example a file opening error, and writes it to *out*.
+  The format is the same as the one used by ``std::system_error(ec, message)``
+  where ``ec`` is ``std::error_code(error_code, std::generic_category()})``.
+  It is implementation-defined but normally looks like:
+
+  .. parsed-literal::
+     *<message>*: *<system-message>*
+
+  where *<message>* is the passed message and *<system-message>* is the system
+  message corresponding to the error code.
+  *error_code* is a system error code as given by ``errno``.
+  \endrst
+ */
+FMT_API void format_system_error(detail::buffer<char>& out, int error_code,
+                                 const char* message) noexcept;
+
+// Reports a system error without throwing an exception.
+// Can be used to report errors from destructors.
+FMT_API void report_system_error(int error_code, const char* message) noexcept;
+
+/** Fast integer formatter. */
+class format_int {
+ private:
+  // Buffer should be large enough to hold all digits (digits10 + 1),
+  // a sign and a null character.
+  enum { buffer_size = std::numeric_limits<unsigned long long>::digits10 + 3 };
+  mutable char buffer_[buffer_size];
+  char* str_;
+
+  template <typename UInt> auto format_unsigned(UInt value) -> char* {
+    auto n = static_cast<detail::uint32_or_64_or_128_t<UInt>>(value);
+    return detail::format_decimal(buffer_, n, buffer_size - 1).begin;
+  }
+
+  template <typename Int> auto format_signed(Int value) -> char* {
+    auto abs_value = static_cast<detail::uint32_or_64_or_128_t<Int>>(value);
+    bool negative = value < 0;
+    if (negative) abs_value = 0 - abs_value;
+    auto begin = format_unsigned(abs_value);
+    if (negative) *--begin = '-';
+    return begin;
+  }
+
+ public:
+  explicit format_int(int value) : str_(format_signed(value)) {}
+  explicit format_int(long value) : str_(format_signed(value)) {}
+  explicit format_int(long long value) : str_(format_signed(value)) {}
+  explicit format_int(unsigned value) : str_(format_unsigned(value)) {}
+  explicit format_int(unsigned long value) : str_(format_unsigned(value)) {}
+  explicit format_int(unsigned long long value)
+      : str_(format_unsigned(value)) {}
+
+  /** Returns the number of characters written to the output buffer. */
+  auto size() const -> size_t {
+    return detail::to_unsigned(buffer_ - str_ + buffer_size - 1);
+  }
+
+  /**
+    Returns a pointer to the output buffer content. No terminating null
+    character is appended.
+   */
+  auto data() const -> const char* { return str_; }
+
+  /**
+    Returns a pointer to the output buffer content with terminating null
+    character appended.
+   */
+  auto c_str() const -> const char* {
+    buffer_[buffer_size - 1] = '\0';
+    return str_;
+  }
+
+  /**
+    \rst
+    Returns the content of the output buffer as an ``std::string``.
+    \endrst
+   */
+  auto str() const -> std::string { return std::string(str_, size()); }
+};
+
+template <typename T, typename Char>
+template <typename FormatContext>
+FMT_CONSTEXPR FMT_INLINE auto
+formatter<T, Char,
+          enable_if_t<detail::type_constant<T, Char>::value !=
+                      detail::type::custom_type>>::format(const T& val,
+                                                          FormatContext& ctx)
+    const -> decltype(ctx.out()) {
+  if (specs_.width_ref.kind != detail::arg_id_kind::none ||
+      specs_.precision_ref.kind != detail::arg_id_kind::none) {
+    auto specs = specs_;
+    detail::handle_dynamic_spec<detail::width_checker>(specs.width,
+                                                       specs.width_ref, ctx);
+    detail::handle_dynamic_spec<detail::precision_checker>(
+        specs.precision, specs.precision_ref, ctx);
+    return detail::write<Char>(ctx.out(), val, specs, ctx.locale());
+  }
+  return detail::write<Char>(ctx.out(), val, specs_, ctx.locale());
+}
+
+template <typename Char>
+struct formatter<void*, Char> : formatter<const void*, Char> {
+  template <typename FormatContext>
+  auto format(void* val, FormatContext& ctx) const -> decltype(ctx.out()) {
+    return formatter<const void*, Char>::format(val, ctx);
+  }
+};
+
+template <typename Char, size_t N>
+struct formatter<Char[N], Char> : formatter<basic_string_view<Char>, Char> {
+  template <typename FormatContext>
+  FMT_CONSTEXPR auto format(const Char* val, FormatContext& ctx) const
+      -> decltype(ctx.out()) {
+    return formatter<basic_string_view<Char>, Char>::format(val, ctx);
+  }
+};
+
+// A formatter for types known only at run time such as variant alternatives.
+//
+// Usage:
+//   using variant = std::variant<int, std::string>;
+//   template <>
+//   struct formatter<variant>: dynamic_formatter<> {
+//     auto format(const variant& v, format_context& ctx) {
+//       return visit([&](const auto& val) {
+//           return dynamic_formatter<>::format(val, ctx);
+//       }, v);
+//     }
+//   };
+template <typename Char = char> class dynamic_formatter {
+ private:
+  detail::dynamic_format_specs<Char> specs_;
+  const Char* format_str_;
+
+  struct null_handler : detail::error_handler {
+    void on_align(align_t) {}
+    void on_sign(sign_t) {}
+    void on_hash() {}
+  };
+
+  template <typename Context> void handle_specs(Context& ctx) {
+    detail::handle_dynamic_spec<detail::width_checker>(specs_.width,
+                                                       specs_.width_ref, ctx);
+    detail::handle_dynamic_spec<detail::precision_checker>(
+        specs_.precision, specs_.precision_ref, ctx);
+  }
+
+ public:
+  template <typename ParseContext>
+  FMT_CONSTEXPR auto parse(ParseContext& ctx) -> decltype(ctx.begin()) {
+    format_str_ = ctx.begin();
+    // Checks are deferred to formatting time when the argument type is known.
+    detail::dynamic_specs_handler<ParseContext> handler(specs_, ctx);
+    return detail::parse_format_specs(ctx.begin(), ctx.end(), handler);
+  }
+
+  template <typename T, typename FormatContext>
+  auto format(const T& val, FormatContext& ctx) -> decltype(ctx.out()) {
+    handle_specs(ctx);
+    detail::specs_checker<null_handler> checker(
+        null_handler(), detail::mapped_type_constant<T, FormatContext>::value);
+    checker.on_align(specs_.align);
+    if (specs_.sign != sign::none) checker.on_sign(specs_.sign);
+    if (specs_.alt) checker.on_hash();
+    if (specs_.precision >= 0) checker.end_precision();
+    return detail::write<Char>(ctx.out(), val, specs_, ctx.locale());
+  }
+};
+
+/**
+  \rst
+  Converts ``p`` to ``const void*`` for pointer formatting.
+
+  **Example**::
+
+    auto s = fmt::format("{}", fmt::ptr(p));
+  \endrst
+ */
+template <typename T> auto ptr(T p) -> const void* {
+  static_assert(std::is_pointer<T>::value, "");
+  return detail::bit_cast<const void*>(p);
+}
+template <typename T> auto ptr(const std::unique_ptr<T>& p) -> const void* {
+  return p.get();
+}
+template <typename T> auto ptr(const std::shared_ptr<T>& p) -> const void* {
+  return p.get();
+}
+
+/**
+  \rst
+  Converts ``e`` to the underlying type.
+
+  **Example**::
+
+    enum class color { red, green, blue };
+    auto s = fmt::format("{}", fmt::underlying(color::red));
+  \endrst
+ */
+template <typename Enum>
+constexpr auto underlying(Enum e) noexcept -> underlying_t<Enum> {
+  return static_cast<underlying_t<Enum>>(e);
+}
+
+namespace enums {
+template <typename Enum, FMT_ENABLE_IF(std::is_enum<Enum>::value)>
+constexpr auto format_as(Enum e) noexcept -> underlying_t<Enum> {
+  return static_cast<underlying_t<Enum>>(e);
+}
+}  // namespace enums
+
+class bytes {
+ private:
+  string_view data_;
+  friend struct formatter<bytes>;
+
+ public:
+  explicit bytes(string_view data) : data_(data) {}
+};
+
+template <> struct formatter<bytes> {
+ private:
+  detail::dynamic_format_specs<char> specs_;
+
+ public:
+  template <typename ParseContext>
+  FMT_CONSTEXPR auto parse(ParseContext& ctx) -> decltype(ctx.begin()) {
+    using handler_type = detail::dynamic_specs_handler<ParseContext>;
+    detail::specs_checker<handler_type> handler(handler_type(specs_, ctx),
+                                                detail::type::string_type);
+    auto it = parse_format_specs(ctx.begin(), ctx.end(), handler);
+    detail::check_string_type_spec(specs_.type, ctx.error_handler());
+    return it;
+  }
+
+  template <typename FormatContext>
+  auto format(bytes b, FormatContext& ctx) -> decltype(ctx.out()) {
+    detail::handle_dynamic_spec<detail::width_checker>(specs_.width,
+                                                       specs_.width_ref, ctx);
+    detail::handle_dynamic_spec<detail::precision_checker>(
+        specs_.precision, specs_.precision_ref, ctx);
+    return detail::write_bytes(ctx.out(), b.data_, specs_);
+  }
+};
+
+// group_digits_view is not derived from view because it copies the argument.
+template <typename T> struct group_digits_view { T value; };
+
+/**
+  \rst
+  Returns a view that formats an integer value using ',' as a locale-independent
+  thousands separator.
+
+  **Example**::
+
+    fmt::print("{}", fmt::group_digits(12345));
+    // Output: "12,345"
+  \endrst
+ */
+template <typename T> auto group_digits(T value) -> group_digits_view<T> {
+  return {value};
+}
+
+template <typename T> struct formatter<group_digits_view<T>> : formatter<T> {
+ private:
+  detail::dynamic_format_specs<char> specs_;
+
+ public:
+  template <typename ParseContext>
+  FMT_CONSTEXPR auto parse(ParseContext& ctx) -> decltype(ctx.begin()) {
+    using handler_type = detail::dynamic_specs_handler<ParseContext>;
+    detail::specs_checker<handler_type> handler(handler_type(specs_, ctx),
+                                                detail::type::int_type);
+    auto it = parse_format_specs(ctx.begin(), ctx.end(), handler);
+    detail::check_string_type_spec(specs_.type, ctx.error_handler());
+    return it;
+  }
+
+  template <typename FormatContext>
+  auto format(group_digits_view<T> t, FormatContext& ctx)
+      -> decltype(ctx.out()) {
+    detail::handle_dynamic_spec<detail::width_checker>(specs_.width,
+                                                       specs_.width_ref, ctx);
+    detail::handle_dynamic_spec<detail::precision_checker>(
+        specs_.precision, specs_.precision_ref, ctx);
+    return detail::write_int(
+        ctx.out(), static_cast<detail::uint64_or_128_t<T>>(t.value), 0, specs_,
+        detail::digit_grouping<char>("\3", ","));
+  }
+};
+
+template <typename It, typename Sentinel, typename Char = char>
+struct join_view : detail::view {
+  It begin;
+  Sentinel end;
+  basic_string_view<Char> sep;
+
+  join_view(It b, Sentinel e, basic_string_view<Char> s)
+      : begin(b), end(e), sep(s) {}
+};
+
+template <typename It, typename Sentinel, typename Char>
+struct formatter<join_view<It, Sentinel, Char>, Char> {
+ private:
+  using value_type =
+#ifdef __cpp_lib_ranges
+      std::iter_value_t<It>;
+#else
+      typename std::iterator_traits<It>::value_type;
+#endif
+  using context = buffer_context<Char>;
+  using mapper = detail::arg_mapper<context>;
+
+  template <typename T, FMT_ENABLE_IF(has_formatter<T, context>::value)>
+  static auto map(const T& value) -> const T& {
+    return value;
+  }
+  template <typename T, FMT_ENABLE_IF(!has_formatter<T, context>::value)>
+  static auto map(const T& value) -> decltype(mapper().map(value)) {
+    return mapper().map(value);
+  }
+
+  using formatter_type =
+      conditional_t<is_formattable<value_type, Char>::value,
+                    formatter<remove_cvref_t<decltype(map(
+                                  std::declval<const value_type&>()))>,
+                              Char>,
+                    detail::fallback_formatter<value_type, Char>>;
+
+  formatter_type value_formatter_;
+
+ public:
+  template <typename ParseContext>
+  FMT_CONSTEXPR auto parse(ParseContext& ctx) -> decltype(ctx.begin()) {
+    return value_formatter_.parse(ctx);
+  }
+
+  template <typename FormatContext>
+  auto format(const join_view<It, Sentinel, Char>& value,
+              FormatContext& ctx) const -> decltype(ctx.out()) {
+    auto it = value.begin;
+    auto out = ctx.out();
+    if (it != value.end) {
+      out = value_formatter_.format(map(*it), ctx);
+      ++it;
+      while (it != value.end) {
+        out = detail::copy_str<Char>(value.sep.begin(), value.sep.end(), out);
+        ctx.advance_to(out);
+        out = value_formatter_.format(map(*it), ctx);
+        ++it;
+      }
+    }
+    return out;
+  }
+};
+
+/**
+  Returns a view that formats the iterator range `[begin, end)` with elements
+  separated by `sep`.
+ */
+template <typename It, typename Sentinel>
+auto join(It begin, Sentinel end, string_view sep) -> join_view<It, Sentinel> {
+  return {begin, end, sep};
+}
+
+/**
+  \rst
+  Returns a view that formats `range` with elements separated by `sep`.
+
+  **Example**::
+
+    std::vector<int> v = {1, 2, 3};
+    fmt::print("{}", fmt::join(v, ", "));
+    // Output: "1, 2, 3"
+
+  ``fmt::join`` applies passed format specifiers to the range elements::
+
+    fmt::print("{:02}", fmt::join(v, ", "));
+    // Output: "01, 02, 03"
+  \endrst
+ */
+template <typename Range>
+auto join(Range&& range, string_view sep)
+    -> join_view<detail::iterator_t<Range>, detail::sentinel_t<Range>> {
+  return join(std::begin(range), std::end(range), sep);
+}
+
+/**
+  \rst
+  Converts *value* to ``std::string`` using the default format for type *T*.
+
+  **Example**::
+
+    #include <fmt/format.h>
+
+    std::string answer = fmt::to_string(42);
+  \endrst
+ */
+template <typename T, FMT_ENABLE_IF(!std::is_integral<T>::value)>
+inline auto to_string(const T& value) -> std::string {
+  auto result = std::string();
+  detail::write<char>(std::back_inserter(result), value);
+  return result;
+}
+
+template <typename T, FMT_ENABLE_IF(std::is_integral<T>::value)>
+FMT_NODISCARD inline auto to_string(T value) -> std::string {
+  // The buffer should be large enough to store the number including the sign
+  // or "false" for bool.
+  constexpr int max_size = detail::digits10<T>() + 2;
+  char buffer[max_size > 5 ? static_cast<unsigned>(max_size) : 5];
+  char* begin = buffer;
+  return std::string(begin, detail::write<char>(begin, value));
+}
+
+template <typename Char, size_t SIZE>
+FMT_NODISCARD auto to_string(const basic_memory_buffer<Char, SIZE>& buf)
+    -> std::basic_string<Char> {
+  auto size = buf.size();
+  detail::assume(size < std::basic_string<Char>().max_size());
+  return std::basic_string<Char>(buf.data(), size);
+}
+
+FMT_BEGIN_DETAIL_NAMESPACE
+
+template <typename Char>
+void vformat_to(buffer<Char>& buf, basic_string_view<Char> fmt,
+                basic_format_args<FMT_BUFFER_CONTEXT(Char)> args,
+                locale_ref loc) {
+  // workaround for msvc bug regarding name-lookup in module
+  // link names into function scope
+  using detail::arg_formatter;
+  using detail::buffer_appender;
+  using detail::custom_formatter;
+  using detail::default_arg_formatter;
+  using detail::get_arg;
+  using detail::locale_ref;
+  using detail::parse_format_specs;
+  using detail::specs_checker;
+  using detail::specs_handler;
+  using detail::to_unsigned;
+  using detail::type;
+  using detail::write;
+  auto out = buffer_appender<Char>(buf);
+  if (fmt.size() == 2 && equal2(fmt.data(), "{}")) {
+    auto arg = args.get(0);
+    if (!arg) error_handler().on_error("argument not found");
+    visit_format_arg(default_arg_formatter<Char>{out, args, loc}, arg);
+    return;
+  }
+
+  struct format_handler : error_handler {
+    basic_format_parse_context<Char> parse_context;
+    buffer_context<Char> context;
+
+    format_handler(buffer_appender<Char> p_out, basic_string_view<Char> str,
+                   basic_format_args<buffer_context<Char>> p_args,
+                   locale_ref p_loc)
+        : parse_context(str), context(p_out, p_args, p_loc) {}
+
+    void on_text(const Char* begin, const Char* end) {
+      auto text = basic_string_view<Char>(begin, to_unsigned(end - begin));
+      context.advance_to(write<Char>(context.out(), text));
+    }
+
+    FMT_CONSTEXPR auto on_arg_id() -> int {
+      return parse_context.next_arg_id();
+    }
+    FMT_CONSTEXPR auto on_arg_id(int id) -> int {
+      return parse_context.check_arg_id(id), id;
+    }
+    FMT_CONSTEXPR auto on_arg_id(basic_string_view<Char> id) -> int {
+      int arg_id = context.arg_id(id);
+      if (arg_id < 0) on_error("argument not found");
+      return arg_id;
+    }
+
+    FMT_INLINE void on_replacement_field(int id, const Char*) {
+      auto arg = get_arg(context, id);
+      context.advance_to(visit_format_arg(
+          default_arg_formatter<Char>{context.out(), context.args(),
+                                      context.locale()},
+          arg));
+    }
+
+    auto on_format_specs(int id, const Char* begin, const Char* end)
+        -> const Char* {
+      auto arg = get_arg(context, id);
+      if (arg.type() == type::custom_type) {
+        parse_context.advance_to(parse_context.begin() +
+                                 (begin - &*parse_context.begin()));
+        visit_format_arg(custom_formatter<Char>{parse_context, context}, arg);
+        return parse_context.begin();
+      }
+      auto specs = basic_format_specs<Char>();
+      specs_checker<specs_handler<Char>> handler(
+          specs_handler<Char>(specs, parse_context, context), arg.type());
+      begin = parse_format_specs(begin, end, handler);
+      if (begin == end || *begin != '}')
+        on_error("missing '}' in format string");
+      auto f = arg_formatter<Char>{context.out(), specs, context.locale()};
+      context.advance_to(visit_format_arg(f, arg));
+      return begin;
+    }
+  };
+  detail::parse_format_string<false>(fmt, format_handler(out, fmt, args, loc));
+}
+
+#ifndef FMT_HEADER_ONLY
+extern template FMT_API void vformat_to(
+    buffer<char>&, string_view, basic_format_args<FMT_BUFFER_CONTEXT(char)>,
+    locale_ref);
+extern template FMT_API auto thousands_sep_impl<char>(locale_ref)
+    -> thousands_sep_result<char>;
+extern template FMT_API auto thousands_sep_impl<wchar_t>(locale_ref)
+    -> thousands_sep_result<wchar_t>;
+extern template FMT_API auto decimal_point_impl(locale_ref) -> char;
+extern template FMT_API auto decimal_point_impl(locale_ref) -> wchar_t;
+#endif  // FMT_HEADER_ONLY
+
+FMT_END_DETAIL_NAMESPACE
+
+#if FMT_USE_USER_DEFINED_LITERALS
+inline namespace literals {
+/**
+  \rst
+  User-defined literal equivalent of :func:`fmt::arg`.
+
+  **Example**::
+
+    using namespace fmt::literals;
+    fmt::print("Elapsed time: {s:.2f} seconds", "s"_a=1.23);
+  \endrst
+ */
+#  if FMT_USE_NONTYPE_TEMPLATE_ARGS
+template <detail_exported::fixed_string Str> constexpr auto operator""_a() {
+  using char_t = remove_cvref_t<decltype(Str.data[0])>;
+  return detail::udl_arg<char_t, sizeof(Str.data) / sizeof(char_t), Str>();
+}
+#  else
+constexpr auto operator"" _a(const char* s, size_t) -> detail::udl_arg<char> {
+  return {s};
+}
+#  endif
+}  // namespace literals
+#endif  // FMT_USE_USER_DEFINED_LITERALS
+
+template <typename Locale, FMT_ENABLE_IF(detail::is_locale<Locale>::value)>
+inline auto vformat(const Locale& loc, string_view fmt, format_args args)
+    -> std::string {
+  return detail::vformat(loc, fmt, args);
+}
+
+template <typename Locale, typename... T,
+          FMT_ENABLE_IF(detail::is_locale<Locale>::value)>
+inline auto format(const Locale& loc, format_string<T...> fmt, T&&... args)
+    -> std::string {
+  return vformat(loc, string_view(fmt), fmt::make_format_args(args...));
+}
+
+template <typename OutputIt, typename Locale,
+          FMT_ENABLE_IF(detail::is_output_iterator<OutputIt, char>::value&&
+                            detail::is_locale<Locale>::value)>
+auto vformat_to(OutputIt out, const Locale& loc, string_view fmt,
+                format_args args) -> OutputIt {
+  using detail::get_buffer;
+  auto&& buf = get_buffer<char>(out);
+  detail::vformat_to(buf, fmt, args, detail::locale_ref(loc));
+  return detail::get_iterator(buf, out);
+}
+
+template <typename OutputIt, typename Locale, typename... T,
+          FMT_ENABLE_IF(detail::is_output_iterator<OutputIt, char>::value&&
+                            detail::is_locale<Locale>::value)>
+FMT_INLINE auto format_to(OutputIt out, const Locale& loc,
+                          format_string<T...> fmt, T&&... args) -> OutputIt {
+  return vformat_to(out, loc, fmt, fmt::make_format_args(args...));
+}
+
+template <typename Locale, typename... T,
+          FMT_ENABLE_IF(detail::is_locale<Locale>::value)>
+FMT_NODISCARD FMT_INLINE auto formatted_size(const Locale& loc,
+                                             format_string<T...> fmt,
+                                             T&&... args) -> size_t {
+  auto buf = detail::counting_buffer<>();
+  detail::vformat_to(buf, string_view(fmt),
+                     format_args(fmt::make_format_args(args...)),
+                     detail::locale_ref(loc));
+  return buf.count();
+}
+
+FMT_MODULE_EXPORT_END
+FMT_END_NAMESPACE
+
+#ifdef FMT_HEADER_ONLY
+#  define FMT_FUNC inline
+#  include "format-inl.h"
+#else
+#  define FMT_FUNC
+#endif
+
+#endif  // FMT_FORMAT_H_
diff --git a/libkram/fmt/os.cpp b/libkram/fmt/os.cpp
new file mode 100644
index 00000000..628c7930
--- /dev/null
+++ b/libkram/fmt/os.cpp
@@ -0,0 +1,373 @@
+// Formatting library for C++ - optional OS-specific functionality
+//
+// Copyright (c) 2012 - 2016, Victor Zverovich
+// All rights reserved.
+//
+// For the license information refer to format.h.
+
+// Disable bogus MSVC warnings.
+#if !defined(_CRT_SECURE_NO_WARNINGS) && defined(_MSC_VER)
+#  define _CRT_SECURE_NO_WARNINGS
+#endif
+
+#include "os.h"
+
+#include <climits>
+
+#if FMT_USE_FCNTL
+#  include <sys/stat.h>
+#  include <sys/types.h>
+
+#  ifndef _WIN32
+#    include <unistd.h>
+#  else
+#    ifndef WIN32_LEAN_AND_MEAN
+#      define WIN32_LEAN_AND_MEAN
+#    endif
+#    include <io.h>
+
+#    ifndef S_IRUSR
+#      define S_IRUSR _S_IREAD
+#    endif
+#    ifndef S_IWUSR
+#      define S_IWUSR _S_IWRITE
+#    endif
+#    ifndef S_IRGRP
+#      define S_IRGRP 0
+#    endif
+#    ifndef S_IWGRP
+#      define S_IWGRP 0
+#    endif
+#    ifndef S_IROTH
+#      define S_IROTH 0
+#    endif
+#    ifndef S_IWOTH
+#      define S_IWOTH 0
+#    endif
+#  endif  // _WIN32
+#endif    // FMT_USE_FCNTL
+
+#ifdef _WIN32
+#  include <windows.h>
+#endif
+
+namespace {
+#ifdef _WIN32
+// Return type of read and write functions.
+using rwresult = int;
+
+// On Windows the count argument to read and write is unsigned, so convert
+// it from size_t preventing integer overflow.
+inline unsigned convert_rwcount(std::size_t count) {
+  return count <= UINT_MAX ? static_cast<unsigned>(count) : UINT_MAX;
+}
+#elif FMT_USE_FCNTL
+// Return type of read and write functions.
+using rwresult = ssize_t;
+
+inline std::size_t convert_rwcount(std::size_t count) { return count; }
+#endif
+}  // namespace
+
+FMT_BEGIN_NAMESPACE
+
+#ifdef _WIN32
+detail::utf16_to_utf8::utf16_to_utf8(basic_string_view<wchar_t> s) {
+  if (int error_code = convert(s)) {
+    FMT_THROW(windows_error(error_code,
+                            "cannot convert string from UTF-16 to UTF-8"));
+  }
+}
+
+int detail::utf16_to_utf8::convert(basic_string_view<wchar_t> s) {
+  if (s.size() > INT_MAX) return ERROR_INVALID_PARAMETER;
+  int s_size = static_cast<int>(s.size());
+  if (s_size == 0) {
+    // WideCharToMultiByte does not support zero length, handle separately.
+    buffer_.resize(1);
+    buffer_[0] = 0;
+    return 0;
+  }
+
+  int length = WideCharToMultiByte(CP_UTF8, 0, s.data(), s_size, nullptr, 0,
+                                   nullptr, nullptr);
+  if (length == 0) return GetLastError();
+  buffer_.resize(length + 1);
+  length = WideCharToMultiByte(CP_UTF8, 0, s.data(), s_size, &buffer_[0],
+                               length, nullptr, nullptr);
+  if (length == 0) return GetLastError();
+  buffer_[length] = 0;
+  return 0;
+}
+
+namespace detail {
+
+class system_message {
+  system_message(const system_message&) = delete;
+  void operator=(const system_message&) = delete;
+
+  unsigned long result_;
+  wchar_t* message_;
+
+  static bool is_whitespace(wchar_t c) noexcept {
+    return c == L' ' || c == L'\n' || c == L'\r' || c == L'\t' || c == L'\0';
+  }
+
+ public:
+  explicit system_message(unsigned long error_code)
+      : result_(0), message_(nullptr) {
+    result_ = FormatMessageW(
+        FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM |
+            FORMAT_MESSAGE_IGNORE_INSERTS,
+        nullptr, error_code, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
+        reinterpret_cast<wchar_t*>(&message_), 0, nullptr);
+    if (result_ != 0) {
+      while (result_ != 0 && is_whitespace(message_[result_ - 1])) {
+        --result_;
+      }
+    }
+  }
+  ~system_message() { LocalFree(message_); }
+  explicit operator bool() const noexcept { return result_ != 0; }
+  operator basic_string_view<wchar_t>() const noexcept {
+    return basic_string_view<wchar_t>(message_, result_);
+  }
+};
+
+class utf8_system_category final : public std::error_category {
+ public:
+  const char* name() const noexcept override { return "system"; }
+  std::string message(int error_code) const override {
+    system_message msg(error_code);
+    if (msg) {
+      utf16_to_utf8 utf8_message;
+      if (utf8_message.convert(msg) == ERROR_SUCCESS) {
+        return utf8_message.str();
+      }
+    }
+    return "unknown error";
+  }
+};
+
+}  // namespace detail
+
+FMT_API const std::error_category& system_category() noexcept {
+  static const detail::utf8_system_category category;
+  return category;
+}
+
+std::system_error vwindows_error(int err_code, string_view format_str,
+                                 format_args args) {
+  auto ec = std::error_code(err_code, system_category());
+  return std::system_error(ec, vformat(format_str, args));
+}
+
+void detail::format_windows_error(detail::buffer<char>& out, int error_code,
+                                  const char* message) noexcept {
+  FMT_TRY {
+    system_message msg(error_code);
+    if (msg) {
+      utf16_to_utf8 utf8_message;
+      if (utf8_message.convert(msg) == ERROR_SUCCESS) {
+        fmt::format_to(buffer_appender<char>(out), "{}: {}", message,
+                       utf8_message);
+        return;
+      }
+    }
+  }
+  FMT_CATCH(...) {}
+  format_error_code(out, error_code, message);
+}
+
+void report_windows_error(int error_code, const char* message) noexcept {
+  report_error(detail::format_windows_error, error_code, message);
+}
+#endif  // _WIN32
+
+buffered_file::~buffered_file() noexcept {
+  if (file_ && FMT_SYSTEM(fclose(file_)) != 0)
+    report_system_error(errno, "cannot close file");
+}
+
+buffered_file::buffered_file(cstring_view filename, cstring_view mode) {
+  FMT_RETRY_VAL(file_, FMT_SYSTEM(fopen(filename.c_str(), mode.c_str())),
+                nullptr);
+  if (!file_)
+    FMT_THROW(system_error(errno, FMT_STRING("cannot open file {}"),
+                           filename.c_str()));
+}
+
+void buffered_file::close() {
+  if (!file_) return;
+  int result = FMT_SYSTEM(fclose(file_));
+  file_ = nullptr;
+  if (result != 0)
+    FMT_THROW(system_error(errno, FMT_STRING("cannot close file")));
+}
+
+int buffered_file::descriptor() const {
+  int fd = FMT_POSIX_CALL(fileno(file_));
+  if (fd == -1)
+    FMT_THROW(system_error(errno, FMT_STRING("cannot get file descriptor")));
+  return fd;
+}
+
+#if FMT_USE_FCNTL
+file::file(cstring_view path, int oflag) {
+#  ifdef _WIN32
+  using mode_t = int;
+#  endif
+  constexpr mode_t mode =
+      S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
+#  if defined(_WIN32) && !defined(__MINGW32__)
+  fd_ = -1;
+  FMT_POSIX_CALL(sopen_s(&fd_, path.c_str(), oflag, _SH_DENYNO, mode));
+#  else
+  FMT_RETRY(fd_, FMT_POSIX_CALL(open(path.c_str(), oflag, mode)));
+#  endif
+  if (fd_ == -1)
+    FMT_THROW(
+        system_error(errno, FMT_STRING("cannot open file {}"), path.c_str()));
+}
+
+file::~file() noexcept {
+  // Don't retry close in case of EINTR!
+  // See http://linux.derkeiler.com/Mailing-Lists/Kernel/2005-09/3000.html
+  if (fd_ != -1 && FMT_POSIX_CALL(close(fd_)) != 0)
+    report_system_error(errno, "cannot close file");
+}
+
+void file::close() {
+  if (fd_ == -1) return;
+  // Don't retry close in case of EINTR!
+  // See http://linux.derkeiler.com/Mailing-Lists/Kernel/2005-09/3000.html
+  int result = FMT_POSIX_CALL(close(fd_));
+  fd_ = -1;
+  if (result != 0)
+    FMT_THROW(system_error(errno, FMT_STRING("cannot close file")));
+}
+
+long long file::size() const {
+#  ifdef _WIN32
+  // Use GetFileSize instead of GetFileSizeEx for the case when _WIN32_WINNT
+  // is less than 0x0500 as is the case with some default MinGW builds.
+  // Both functions support large file sizes.
+  DWORD size_upper = 0;
+  HANDLE handle = reinterpret_cast<HANDLE>(_get_osfhandle(fd_));
+  DWORD size_lower = FMT_SYSTEM(GetFileSize(handle, &size_upper));
+  if (size_lower == INVALID_FILE_SIZE) {
+    DWORD error = GetLastError();
+    if (error != NO_ERROR)
+      FMT_THROW(windows_error(GetLastError(), "cannot get file size"));
+  }
+  unsigned long long long_size = size_upper;
+  return (long_size << sizeof(DWORD) * CHAR_BIT) | size_lower;
+#  else
+  using Stat = struct stat;
+  Stat file_stat = Stat();
+  if (FMT_POSIX_CALL(fstat(fd_, &file_stat)) == -1)
+    FMT_THROW(system_error(errno, FMT_STRING("cannot get file attributes")));
+  static_assert(sizeof(long long) >= sizeof(file_stat.st_size),
+                "return type of file::size is not large enough");
+  return file_stat.st_size;
+#  endif
+}
+
+std::size_t file::read(void* buffer, std::size_t count) {
+  rwresult result = 0;
+  FMT_RETRY(result, FMT_POSIX_CALL(read(fd_, buffer, convert_rwcount(count))));
+  if (result < 0)
+    FMT_THROW(system_error(errno, FMT_STRING("cannot read from file")));
+  return detail::to_unsigned(result);
+}
+
+std::size_t file::write(const void* buffer, std::size_t count) {
+  rwresult result = 0;
+  FMT_RETRY(result, FMT_POSIX_CALL(write(fd_, buffer, convert_rwcount(count))));
+  if (result < 0)
+    FMT_THROW(system_error(errno, FMT_STRING("cannot write to file")));
+  return detail::to_unsigned(result);
+}
+
+file file::dup(int fd) {
+  // Don't retry as dup doesn't return EINTR.
+  // http://pubs.opengroup.org/onlinepubs/009695399/functions/dup.html
+  int new_fd = FMT_POSIX_CALL(dup(fd));
+  if (new_fd == -1)
+    FMT_THROW(system_error(
+        errno, FMT_STRING("cannot duplicate file descriptor {}"), fd));
+  return file(new_fd);
+}
+
+void file::dup2(int fd) {
+  int result = 0;
+  FMT_RETRY(result, FMT_POSIX_CALL(dup2(fd_, fd)));
+  if (result == -1) {
+    FMT_THROW(system_error(
+        errno, FMT_STRING("cannot duplicate file descriptor {} to {}"), fd_,
+        fd));
+  }
+}
+
+void file::dup2(int fd, std::error_code& ec) noexcept {
+  int result = 0;
+  FMT_RETRY(result, FMT_POSIX_CALL(dup2(fd_, fd)));
+  if (result == -1) ec = std::error_code(errno, std::generic_category());
+}
+
+void file::pipe(file& read_end, file& write_end) {
+  // Close the descriptors first to make sure that assignments don't throw
+  // and there are no leaks.
+  read_end.close();
+  write_end.close();
+  int fds[2] = {};
+#  ifdef _WIN32
+  // Make the default pipe capacity same as on Linux 2.6.11+.
+  enum { DEFAULT_CAPACITY = 65536 };
+  int result = FMT_POSIX_CALL(pipe(fds, DEFAULT_CAPACITY, _O_BINARY));
+#  else
+  // Don't retry as the pipe function doesn't return EINTR.
+  // http://pubs.opengroup.org/onlinepubs/009696799/functions/pipe.html
+  int result = FMT_POSIX_CALL(pipe(fds));
+#  endif
+  if (result != 0)
+    FMT_THROW(system_error(errno, FMT_STRING("cannot create pipe")));
+  // The following assignments don't throw because read_fd and write_fd
+  // are closed.
+  read_end = file(fds[0]);
+  write_end = file(fds[1]);
+}
+
+buffered_file file::fdopen(const char* mode) {
+// Don't retry as fdopen doesn't return EINTR.
+#  if defined(__MINGW32__) && defined(_POSIX_)
+  FILE* f = ::fdopen(fd_, mode);
+#  else
+  FILE* f = FMT_POSIX_CALL(fdopen(fd_, mode));
+#  endif
+  if (!f)
+    FMT_THROW(system_error(
+        errno, FMT_STRING("cannot associate stream with file descriptor")));
+  buffered_file bf(f);
+  fd_ = -1;
+  return bf;
+}
+
+long getpagesize() {
+#  ifdef _WIN32
+  SYSTEM_INFO si;
+  GetSystemInfo(&si);
+  return si.dwPageSize;
+#  else
+  long size = FMT_POSIX_CALL(sysconf(_SC_PAGESIZE));
+  if (size < 0)
+    FMT_THROW(system_error(errno, FMT_STRING("cannot get memory page size")));
+  return size;
+#  endif
+}
+
+FMT_API void ostream::grow(size_t) {
+  if (this->size() == this->capacity()) flush();
+}
+#endif  // FMT_USE_FCNTL
+FMT_END_NAMESPACE
diff --git a/libkram/fmt/os.h b/libkram/fmt/os.h
new file mode 100644
index 00000000..d82be112
--- /dev/null
+++ b/libkram/fmt/os.h
@@ -0,0 +1,478 @@
+// Formatting library for C++ - optional OS-specific functionality
+//
+// Copyright (c) 2012 - present, Victor Zverovich
+// All rights reserved.
+//
+// For the license information refer to format.h.
+
+#ifndef FMT_OS_H_
+#define FMT_OS_H_
+
+#include <cerrno>
+#include <cstddef>
+#include <cstdio>
+#include <system_error>  // std::system_error
+
+#if defined __APPLE__ || defined(__FreeBSD__)
+#  include <xlocale.h>  // for LC_NUMERIC_MASK on OS X
+#endif
+
+#include "format.h"
+
+#ifndef FMT_USE_FCNTL
+// UWP doesn't provide _pipe.
+#  if FMT_HAS_INCLUDE("winapifamily.h")
+#    include <winapifamily.h>
+#  endif
+#  if (FMT_HAS_INCLUDE(<fcntl.h>) || defined(__APPLE__) || \
+       defined(__linux__)) &&                              \
+      (!defined(WINAPI_FAMILY) ||                          \
+       (WINAPI_FAMILY == WINAPI_FAMILY_DESKTOP_APP))
+#    include <fcntl.h>  // for O_RDONLY
+#    define FMT_USE_FCNTL 1
+#  else
+#    define FMT_USE_FCNTL 0
+#  endif
+#endif
+
+#ifndef FMT_POSIX
+#  if defined(_WIN32) && !defined(__MINGW32__)
+// Fix warnings about deprecated symbols.
+#    define FMT_POSIX(call) _##call
+#  else
+#    define FMT_POSIX(call) call
+#  endif
+#endif
+
+// Calls to system functions are wrapped in FMT_SYSTEM for testability.
+#ifdef FMT_SYSTEM
+#  define FMT_POSIX_CALL(call) FMT_SYSTEM(call)
+#else
+#  define FMT_SYSTEM(call) ::call
+#  ifdef _WIN32
+// Fix warnings about deprecated symbols.
+#    define FMT_POSIX_CALL(call) ::_##call
+#  else
+#    define FMT_POSIX_CALL(call) ::call
+#  endif
+#endif
+
+// Retries the expression while it evaluates to error_result and errno
+// equals to EINTR.
+#ifndef _WIN32
+#  define FMT_RETRY_VAL(result, expression, error_result) \
+    do {                                                  \
+      (result) = (expression);                            \
+    } while ((result) == (error_result) && errno == EINTR)
+#else
+#  define FMT_RETRY_VAL(result, expression, error_result) result = (expression)
+#endif
+
+#define FMT_RETRY(result, expression) FMT_RETRY_VAL(result, expression, -1)
+
+FMT_BEGIN_NAMESPACE
+FMT_MODULE_EXPORT_BEGIN
+
+/**
+  \rst
+  A reference to a null-terminated string. It can be constructed from a C
+  string or ``std::string``.
+
+  You can use one of the following type aliases for common character types:
+
+  +---------------+-----------------------------+
+  | Type          | Definition                  |
+  +===============+=============================+
+  | cstring_view  | basic_cstring_view<char>    |
+  +---------------+-----------------------------+
+  | wcstring_view | basic_cstring_view<wchar_t> |
+  +---------------+-----------------------------+
+
+  This class is most useful as a parameter type to allow passing
+  different types of strings to a function, for example::
+
+    template <typename... Args>
+    std::string format(cstring_view format_str, const Args & ... args);
+
+    format("{}", 42);
+    format(std::string("{}"), 42);
+  \endrst
+ */
+template <typename Char> class basic_cstring_view {
+ private:
+  const Char* data_;
+
+ public:
+  /** Constructs a string reference object from a C string. */
+  basic_cstring_view(const Char* s) : data_(s) {}
+
+  /**
+    \rst
+    Constructs a string reference from an ``std::string`` object.
+    \endrst
+   */
+  basic_cstring_view(const std::basic_string<Char>& s) : data_(s.c_str()) {}
+
+  /** Returns the pointer to a C string. */
+  const Char* c_str() const { return data_; }
+};
+
+using cstring_view = basic_cstring_view<char>;
+using wcstring_view = basic_cstring_view<wchar_t>;
+
+template <typename Char> struct formatter<std::error_code, Char> {
+  template <typename ParseContext>
+  FMT_CONSTEXPR auto parse(ParseContext& ctx) -> decltype(ctx.begin()) {
+    return ctx.begin();
+  }
+
+  template <typename FormatContext>
+  FMT_CONSTEXPR auto format(const std::error_code& ec, FormatContext& ctx) const
+      -> decltype(ctx.out()) {
+    auto out = ctx.out();
+    out = detail::write_bytes(out, ec.category().name(),
+                              basic_format_specs<Char>());
+    out = detail::write<Char>(out, Char(':'));
+    out = detail::write<Char>(out, ec.value());
+    return out;
+  }
+};
+
+#ifdef _WIN32
+FMT_API const std::error_category& system_category() noexcept;
+
+FMT_BEGIN_DETAIL_NAMESPACE
+// A converter from UTF-16 to UTF-8.
+// It is only provided for Windows since other systems support UTF-8 natively.
+class utf16_to_utf8 {
+ private:
+  memory_buffer buffer_;
+
+ public:
+  utf16_to_utf8() {}
+  FMT_API explicit utf16_to_utf8(basic_string_view<wchar_t> s);
+  operator string_view() const { return string_view(&buffer_[0], size()); }
+  size_t size() const { return buffer_.size() - 1; }
+  const char* c_str() const { return &buffer_[0]; }
+  std::string str() const { return std::string(&buffer_[0], size()); }
+
+  // Performs conversion returning a system error code instead of
+  // throwing exception on conversion error. This method may still throw
+  // in case of memory allocation error.
+  FMT_API int convert(basic_string_view<wchar_t> s);
+};
+
+FMT_API void format_windows_error(buffer<char>& out, int error_code,
+                                  const char* message) noexcept;
+FMT_END_DETAIL_NAMESPACE
+
+FMT_API std::system_error vwindows_error(int error_code, string_view format_str,
+                                         format_args args);
+
+/**
+ \rst
+ Constructs a :class:`std::system_error` object with the description
+ of the form
+
+ .. parsed-literal::
+   *<message>*: *<system-message>*
+
+ where *<message>* is the formatted message and *<system-message>* is the
+ system message corresponding to the error code.
+ *error_code* is a Windows error code as given by ``GetLastError``.
+ If *error_code* is not a valid error code such as -1, the system message
+ will look like "error -1".
+
+ **Example**::
+
+   // This throws a system_error with the description
+   //   cannot open file 'madeup': The system cannot find the file specified.
+   // or similar (system message may vary).
+   const char *filename = "madeup";
+   LPOFSTRUCT of = LPOFSTRUCT();
+   HFILE file = OpenFile(filename, &of, OF_READ);
+   if (file == HFILE_ERROR) {
+     throw fmt::windows_error(GetLastError(),
+                              "cannot open file '{}'", filename);
+   }
+ \endrst
+*/
+template <typename... Args>
+std::system_error windows_error(int error_code, string_view message,
+                                const Args&... args) {
+  return vwindows_error(error_code, message, fmt::make_format_args(args...));
+}
+
+// Reports a Windows error without throwing an exception.
+// Can be used to report errors from destructors.
+FMT_API void report_windows_error(int error_code, const char* message) noexcept;
+#else
+inline const std::error_category& system_category() noexcept {
+  return std::system_category();
+}
+#endif  // _WIN32
+
+// std::system is not available on some platforms such as iOS (#2248).
+#ifdef __OSX__
+template <typename S, typename... Args, typename Char = char_t<S>>
+void say(const S& format_str, Args&&... args) {
+  std::system(format("say \"{}\"", format(format_str, args...)).c_str());
+}
+#endif
+
+// A buffered file.
+class buffered_file {
+ private:
+  FILE* file_;
+
+  friend class file;
+
+  explicit buffered_file(FILE* f) : file_(f) {}
+
+ public:
+  buffered_file(const buffered_file&) = delete;
+  void operator=(const buffered_file&) = delete;
+
+  // Constructs a buffered_file object which doesn't represent any file.
+  buffered_file() noexcept : file_(nullptr) {}
+
+  // Destroys the object closing the file it represents if any.
+  FMT_API ~buffered_file() noexcept;
+
+ public:
+  buffered_file(buffered_file&& other) noexcept : file_(other.file_) {
+    other.file_ = nullptr;
+  }
+
+  buffered_file& operator=(buffered_file&& other) {
+    close();
+    file_ = other.file_;
+    other.file_ = nullptr;
+    return *this;
+  }
+
+  // Opens a file.
+  FMT_API buffered_file(cstring_view filename, cstring_view mode);
+
+  // Closes the file.
+  FMT_API void close();
+
+  // Returns the pointer to a FILE object representing this file.
+  FILE* get() const noexcept { return file_; }
+
+  FMT_API int descriptor() const;
+
+  void vprint(string_view format_str, format_args args) {
+    fmt::vprint(file_, format_str, args);
+  }
+
+  template <typename... Args>
+  inline void print(string_view format_str, const Args&... args) {
+    vprint(format_str, fmt::make_format_args(args...));
+  }
+};
+
+#if FMT_USE_FCNTL
+// A file. Closed file is represented by a file object with descriptor -1.
+// Methods that are not declared with noexcept may throw
+// fmt::system_error in case of failure. Note that some errors such as
+// closing the file multiple times will cause a crash on Windows rather
+// than an exception. You can get standard behavior by overriding the
+// invalid parameter handler with _set_invalid_parameter_handler.
+class FMT_API file {
+ private:
+  int fd_;  // File descriptor.
+
+  // Constructs a file object with a given descriptor.
+  explicit file(int fd) : fd_(fd) {}
+
+ public:
+  // Possible values for the oflag argument to the constructor.
+  enum {
+    RDONLY = FMT_POSIX(O_RDONLY),  // Open for reading only.
+    WRONLY = FMT_POSIX(O_WRONLY),  // Open for writing only.
+    RDWR = FMT_POSIX(O_RDWR),      // Open for reading and writing.
+    CREATE = FMT_POSIX(O_CREAT),   // Create if the file doesn't exist.
+    APPEND = FMT_POSIX(O_APPEND),  // Open in append mode.
+    TRUNC = FMT_POSIX(O_TRUNC)     // Truncate the content of the file.
+  };
+
+  // Constructs a file object which doesn't represent any file.
+  file() noexcept : fd_(-1) {}
+
+  // Opens a file and constructs a file object representing this file.
+  file(cstring_view path, int oflag);
+
+ public:
+  file(const file&) = delete;
+  void operator=(const file&) = delete;
+
+  file(file&& other) noexcept : fd_(other.fd_) { other.fd_ = -1; }
+
+  // Move assignment is not noexcept because close may throw.
+  file& operator=(file&& other) {
+    close();
+    fd_ = other.fd_;
+    other.fd_ = -1;
+    return *this;
+  }
+
+  // Destroys the object closing the file it represents if any.
+  ~file() noexcept;
+
+  // Returns the file descriptor.
+  int descriptor() const noexcept { return fd_; }
+
+  // Closes the file.
+  void close();
+
+  // Returns the file size. The size has signed type for consistency with
+  // stat::st_size.
+  long long size() const;
+
+  // Attempts to read count bytes from the file into the specified buffer.
+  size_t read(void* buffer, size_t count);
+
+  // Attempts to write count bytes from the specified buffer to the file.
+  size_t write(const void* buffer, size_t count);
+
+  // Duplicates a file descriptor with the dup function and returns
+  // the duplicate as a file object.
+  static file dup(int fd);
+
+  // Makes fd be the copy of this file descriptor, closing fd first if
+  // necessary.
+  void dup2(int fd);
+
+  // Makes fd be the copy of this file descriptor, closing fd first if
+  // necessary.
+  void dup2(int fd, std::error_code& ec) noexcept;
+
+  // Creates a pipe setting up read_end and write_end file objects for reading
+  // and writing respectively.
+  static void pipe(file& read_end, file& write_end);
+
+  // Creates a buffered_file object associated with this file and detaches
+  // this file object from the file.
+  buffered_file fdopen(const char* mode);
+};
+
+// Returns the memory page size.
+long getpagesize();
+
+FMT_BEGIN_DETAIL_NAMESPACE
+
+struct buffer_size {
+  buffer_size() = default;
+  size_t value = 0;
+  buffer_size operator=(size_t val) const {
+    auto bs = buffer_size();
+    bs.value = val;
+    return bs;
+  }
+};
+
+struct ostream_params {
+  int oflag = file::WRONLY | file::CREATE | file::TRUNC;
+  size_t buffer_size = BUFSIZ > 32768 ? BUFSIZ : 32768;
+
+  ostream_params() {}
+
+  template <typename... T>
+  ostream_params(T... params, int new_oflag) : ostream_params(params...) {
+    oflag = new_oflag;
+  }
+
+  template <typename... T>
+  ostream_params(T... params, detail::buffer_size bs)
+      : ostream_params(params...) {
+    this->buffer_size = bs.value;
+  }
+
+// Intel has a bug that results in failure to deduce a constructor
+// for empty parameter packs.
+#  if defined(__INTEL_COMPILER) && __INTEL_COMPILER < 2000
+  ostream_params(int new_oflag) : oflag(new_oflag) {}
+  ostream_params(detail::buffer_size bs) : buffer_size(bs.value) {}
+#  endif
+};
+
+FMT_END_DETAIL_NAMESPACE
+
+// Added {} below to work around default constructor error known to
+// occur in Xcode versions 7.2.1 and 8.2.1.
+constexpr detail::buffer_size buffer_size{};
+
+/** A fast output stream which is not thread-safe. */
+class FMT_API ostream final : private detail::buffer<char> {
+ private:
+  file file_;
+
+  void grow(size_t) override;
+
+  ostream(cstring_view path, const detail::ostream_params& params)
+      : file_(path, params.oflag) {
+    set(new char[params.buffer_size], params.buffer_size);
+  }
+
+ public:
+  ostream(ostream&& other)
+      : detail::buffer<char>(other.data(), other.size(), other.capacity()),
+        file_(std::move(other.file_)) {
+    other.clear();
+    other.set(nullptr, 0);
+  }
+  ~ostream() {
+    flush();
+    delete[] data();
+  }
+
+  void flush() {
+    if (size() == 0) return;
+    file_.write(data(), size());
+    clear();
+  }
+
+  template <typename... T>
+  friend ostream output_file(cstring_view path, T... params);
+
+  void close() {
+    flush();
+    file_.close();
+  }
+
+  /**
+    Formats ``args`` according to specifications in ``fmt`` and writes the
+    output to the file.
+   */
+  template <typename... T> void print(format_string<T...> fmt, T&&... args) {
+    vformat_to(detail::buffer_appender<char>(*this), fmt,
+               fmt::make_format_args(args...));
+  }
+};
+
+/**
+  \rst
+  Opens a file for writing. Supported parameters passed in *params*:
+
+  * ``<integer>``: Flags passed to `open
+    <https://pubs.opengroup.org/onlinepubs/007904875/functions/open.html>`_
+    (``file::WRONLY | file::CREATE | file::TRUNC`` by default)
+  * ``buffer_size=<integer>``: Output buffer size
+
+  **Example**::
+
+    auto out = fmt::output_file("guide.txt");
+    out.print("Don't {}", "Panic");
+  \endrst
+ */
+template <typename... T>
+inline ostream output_file(cstring_view path, T... params) {
+  return {path, detail::ostream_params(params...)};
+}
+#endif  // FMT_USE_FCNTL
+
+FMT_MODULE_EXPORT_END
+FMT_END_NAMESPACE
+
+#endif  // FMT_OS_H_
diff --git a/libkram/fmt/ostream.h b/libkram/fmt/ostream.h
new file mode 100644
index 00000000..86ec47e1
--- /dev/null
+++ b/libkram/fmt/ostream.h
@@ -0,0 +1,237 @@
+// Formatting library for C++ - std::ostream support
+//
+// Copyright (c) 2012 - present, Victor Zverovich
+// All rights reserved.
+//
+// For the license information refer to format.h.
+
+#ifndef FMT_OSTREAM_H_
+#define FMT_OSTREAM_H_
+
+#include <fstream>
+#include <ostream>
+#if defined(_WIN32) && defined(__GLIBCXX__)
+#  include <ext/stdio_filebuf.h>
+#  include <ext/stdio_sync_filebuf.h>
+#elif defined(_WIN32) && defined(_LIBCPP_VERSION)
+#  include <__std_stream>
+#endif
+
+#include "format.h"
+
+FMT_BEGIN_NAMESPACE
+
+template <typename OutputIt, typename Char> class basic_printf_context;
+
+namespace detail {
+
+// Checks if T has a user-defined operator<<.
+template <typename T, typename Char, typename Enable = void>
+class is_streamable {
+ private:
+  template <typename U>
+  static auto test(int)
+      -> bool_constant<sizeof(std::declval<std::basic_ostream<Char>&>()
+                              << std::declval<U>()) != 0>;
+
+  template <typename> static auto test(...) -> std::false_type;
+
+  using result = decltype(test<T>(0));
+
+ public:
+  is_streamable() = default;
+
+  static const bool value = result::value;
+};
+
+// Formatting of built-in types and arrays is intentionally disabled because
+// it's handled by standard (non-ostream) formatters.
+template <typename T, typename Char>
+struct is_streamable<
+    T, Char,
+    enable_if_t<
+        std::is_arithmetic<T>::value || std::is_array<T>::value ||
+        std::is_pointer<T>::value || std::is_same<T, char8_type>::value ||
+        std::is_convertible<T, fmt::basic_string_view<Char>>::value ||
+        std::is_same<T, std_string_view<Char>>::value ||
+        (std::is_convertible<T, int>::value && !std::is_enum<T>::value)>>
+    : std::false_type {};
+
+// Generate a unique explicit instantion in every translation unit using a tag
+// type in an anonymous namespace.
+namespace {
+struct file_access_tag {};
+}  // namespace
+template <class Tag, class BufType, FILE* BufType::*FileMemberPtr>
+class file_access {
+  friend auto get_file(BufType& obj) -> FILE* { return obj.*FileMemberPtr; }
+};
+
+#if FMT_MSC_VERSION
+template class file_access<file_access_tag, std::filebuf,
+                           &std::filebuf::_Myfile>;
+auto get_file(std::filebuf&) -> FILE*;
+#elif defined(_WIN32) && defined(_LIBCPP_VERSION)
+template class file_access<file_access_tag, std::__stdoutbuf<char>,
+                           &std::__stdoutbuf<char>::__file_>;
+auto get_file(std::__stdoutbuf<char>&) -> FILE*;
+#endif
+
+inline bool write_ostream_unicode(std::ostream& os, fmt::string_view data) {
+#if FMT_MSC_VERSION
+  if (auto* buf = dynamic_cast<std::filebuf*>(os.rdbuf()))
+    if (FILE* f = get_file(*buf)) return write_console(f, data);
+#elif defined(_WIN32) && defined(__GLIBCXX__)
+  auto* rdbuf = os.rdbuf();
+  FILE* c_file;
+  if (auto* sfbuf = dynamic_cast<__gnu_cxx::stdio_sync_filebuf<char>*>(rdbuf))
+    c_file = sfbuf->file();
+  else if (auto* fbuf = dynamic_cast<__gnu_cxx::stdio_filebuf<char>*>(rdbuf))
+    c_file = fbuf->file();
+  else
+    return false;
+  if (c_file) return write_console(c_file, data);
+#elif defined(_WIN32) && defined(_LIBCPP_VERSION)
+  if (auto* buf = dynamic_cast<std::__stdoutbuf<char>*>(os.rdbuf()))
+    if (FILE* f = get_file(*buf)) return write_console(f, data);
+#else
+  ignore_unused(os, data);
+#endif
+  return false;
+}
+inline bool write_ostream_unicode(std::wostream&,
+                                  fmt::basic_string_view<wchar_t>) {
+  return false;
+}
+
+// Write the content of buf to os.
+// It is a separate function rather than a part of vprint to simplify testing.
+template <typename Char>
+void write_buffer(std::basic_ostream<Char>& os, buffer<Char>& buf) {
+  const Char* buf_data = buf.data();
+  using unsigned_streamsize = std::make_unsigned<std::streamsize>::type;
+  unsigned_streamsize size = buf.size();
+  unsigned_streamsize max_size = to_unsigned(max_value<std::streamsize>());
+  do {
+    unsigned_streamsize n = size <= max_size ? size : max_size;
+    os.write(buf_data, static_cast<std::streamsize>(n));
+    buf_data += n;
+    size -= n;
+  } while (size != 0);
+}
+
+template <typename Char, typename T>
+void format_value(buffer<Char>& buf, const T& value,
+                  locale_ref loc = locale_ref()) {
+  auto&& format_buf = formatbuf<std::basic_streambuf<Char>>(buf);
+  auto&& output = std::basic_ostream<Char>(&format_buf);
+#if !defined(FMT_STATIC_THOUSANDS_SEPARATOR)
+  if (loc) output.imbue(loc.get<std::locale>());
+#endif
+  output << value;
+  output.exceptions(std::ios_base::failbit | std::ios_base::badbit);
+}
+
+template <typename T> struct streamed_view { const T& value; };
+
+}  // namespace detail
+
+// Formats an object of type T that has an overloaded ostream operator<<.
+template <typename Char>
+struct basic_ostream_formatter : formatter<basic_string_view<Char>, Char> {
+  void set_debug_format() = delete;
+
+  template <typename T, typename OutputIt>
+  auto format(const T& value, basic_format_context<OutputIt, Char>& ctx) const
+      -> OutputIt {
+    auto buffer = basic_memory_buffer<Char>();
+    format_value(buffer, value, ctx.locale());
+    return formatter<basic_string_view<Char>, Char>::format(
+        {buffer.data(), buffer.size()}, ctx);
+  }
+};
+
+using ostream_formatter = basic_ostream_formatter<char>;
+
+template <typename T, typename Char>
+struct formatter<detail::streamed_view<T>, Char>
+    : basic_ostream_formatter<Char> {
+  template <typename OutputIt>
+  auto format(detail::streamed_view<T> view,
+              basic_format_context<OutputIt, Char>& ctx) const -> OutputIt {
+    return basic_ostream_formatter<Char>::format(view.value, ctx);
+  }
+};
+
+/**
+  \rst
+  Returns a view that formats `value` via an ostream ``operator<<``.
+
+  **Example**::
+
+    fmt::print("Current thread id: {}\n",
+               fmt::streamed(std::this_thread::get_id()));
+  \endrst
+ */
+template <typename T>
+auto streamed(const T& value) -> detail::streamed_view<T> {
+  return {value};
+}
+
+namespace detail {
+
+// Formats an object of type T that has an overloaded ostream operator<<.
+template <typename T, typename Char>
+struct fallback_formatter<T, Char, enable_if_t<is_streamable<T, Char>::value>>
+    : basic_ostream_formatter<Char> {
+  using basic_ostream_formatter<Char>::format;
+};
+
+inline void vprint_directly(std::ostream& os, string_view format_str,
+                            format_args args) {
+  auto buffer = memory_buffer();
+  detail::vformat_to(buffer, format_str, args);
+  detail::write_buffer(os, buffer);
+}
+
+}  // namespace detail
+
+FMT_MODULE_EXPORT template <typename Char>
+void vprint(std::basic_ostream<Char>& os,
+            basic_string_view<type_identity_t<Char>> format_str,
+            basic_format_args<buffer_context<type_identity_t<Char>>> args) {
+  auto buffer = basic_memory_buffer<Char>();
+  detail::vformat_to(buffer, format_str, args);
+  if (detail::write_ostream_unicode(os, {buffer.data(), buffer.size()})) return;
+  detail::write_buffer(os, buffer);
+}
+
+/**
+  \rst
+  Prints formatted data to the stream *os*.
+
+  **Example**::
+
+    fmt::print(cerr, "Don't {}!", "panic");
+  \endrst
+ */
+FMT_MODULE_EXPORT template <typename... T>
+void print(std::ostream& os, format_string<T...> fmt, T&&... args) {
+  const auto& vargs = fmt::make_format_args(args...);
+  if (detail::is_utf8())
+    vprint(os, fmt, vargs);
+  else
+    detail::vprint_directly(os, fmt, vargs);
+}
+
+FMT_MODULE_EXPORT
+template <typename... Args>
+void print(std::wostream& os,
+           basic_format_string<wchar_t, type_identity_t<Args>...> fmt,
+           Args&&... args) {
+  vprint(os, fmt, fmt::make_format_args<buffer_context<wchar_t>>(args...));
+}
+
+FMT_END_NAMESPACE
+
+#endif  // FMT_OSTREAM_H_
diff --git a/libkram/fmt/printf.h b/libkram/fmt/printf.h
new file mode 100644
index 00000000..70a592dc
--- /dev/null
+++ b/libkram/fmt/printf.h
@@ -0,0 +1,640 @@
+// Formatting library for C++ - legacy printf implementation
+//
+// Copyright (c) 2012 - 2016, Victor Zverovich
+// All rights reserved.
+//
+// For the license information refer to format.h.
+
+#ifndef FMT_PRINTF_H_
+#define FMT_PRINTF_H_
+
+#include <algorithm>  // std::max
+#include <limits>     // std::numeric_limits
+
+#include "format.h"
+
+FMT_BEGIN_NAMESPACE
+FMT_MODULE_EXPORT_BEGIN
+
+template <typename T> struct printf_formatter { printf_formatter() = delete; };
+
+template <typename Char>
+class basic_printf_parse_context : public basic_format_parse_context<Char> {
+  using basic_format_parse_context<Char>::basic_format_parse_context;
+};
+
+template <typename OutputIt, typename Char> class basic_printf_context {
+ private:
+  OutputIt out_;
+  basic_format_args<basic_printf_context> args_;
+
+ public:
+  using char_type = Char;
+  using format_arg = basic_format_arg<basic_printf_context>;
+  using parse_context_type = basic_printf_parse_context<Char>;
+  template <typename T> using formatter_type = printf_formatter<T>;
+
+  /**
+    \rst
+    Constructs a ``printf_context`` object. References to the arguments are
+    stored in the context object so make sure they have appropriate lifetimes.
+    \endrst
+   */
+  basic_printf_context(OutputIt out,
+                       basic_format_args<basic_printf_context> args)
+      : out_(out), args_(args) {}
+
+  OutputIt out() { return out_; }
+  void advance_to(OutputIt it) { out_ = it; }
+
+  detail::locale_ref locale() { return {}; }
+
+  format_arg arg(int id) const { return args_.get(id); }
+
+  FMT_CONSTEXPR void on_error(const char* message) {
+    detail::error_handler().on_error(message);
+  }
+};
+
+FMT_BEGIN_DETAIL_NAMESPACE
+
+// Checks if a value fits in int - used to avoid warnings about comparing
+// signed and unsigned integers.
+template <bool IsSigned> struct int_checker {
+  template <typename T> static bool fits_in_int(T value) {
+    unsigned max = max_value<int>();
+    return value <= max;
+  }
+  static bool fits_in_int(bool) { return true; }
+};
+
+template <> struct int_checker<true> {
+  template <typename T> static bool fits_in_int(T value) {
+    return value >= (std::numeric_limits<int>::min)() &&
+           value <= max_value<int>();
+  }
+  static bool fits_in_int(int) { return true; }
+};
+
+class printf_precision_handler {
+ public:
+  template <typename T, FMT_ENABLE_IF(std::is_integral<T>::value)>
+  int operator()(T value) {
+    if (!int_checker<std::numeric_limits<T>::is_signed>::fits_in_int(value))
+      FMT_THROW(format_error("number is too big"));
+    return (std::max)(static_cast<int>(value), 0);
+  }
+
+  template <typename T, FMT_ENABLE_IF(!std::is_integral<T>::value)>
+  int operator()(T) {
+    FMT_THROW(format_error("precision is not integer"));
+    return 0;
+  }
+};
+
+// An argument visitor that returns true iff arg is a zero integer.
+class is_zero_int {
+ public:
+  template <typename T, FMT_ENABLE_IF(std::is_integral<T>::value)>
+  bool operator()(T value) {
+    return value == 0;
+  }
+
+  template <typename T, FMT_ENABLE_IF(!std::is_integral<T>::value)>
+  bool operator()(T) {
+    return false;
+  }
+};
+
+template <typename T> struct make_unsigned_or_bool : std::make_unsigned<T> {};
+
+template <> struct make_unsigned_or_bool<bool> { using type = bool; };
+
+template <typename T, typename Context> class arg_converter {
+ private:
+  using char_type = typename Context::char_type;
+
+  basic_format_arg<Context>& arg_;
+  char_type type_;
+
+ public:
+  arg_converter(basic_format_arg<Context>& arg, char_type type)
+      : arg_(arg), type_(type) {}
+
+  void operator()(bool value) {
+    if (type_ != 's') operator()<bool>(value);
+  }
+
+  template <typename U, FMT_ENABLE_IF(std::is_integral<U>::value)>
+  void operator()(U value) {
+    bool is_signed = type_ == 'd' || type_ == 'i';
+    using target_type = conditional_t<std::is_same<T, void>::value, U, T>;
+    if (const_check(sizeof(target_type) <= sizeof(int))) {
+      // Extra casts are used to silence warnings.
+      if (is_signed) {
+        arg_ = detail::make_arg<Context>(
+            static_cast<int>(static_cast<target_type>(value)));
+      } else {
+        using unsigned_type = typename make_unsigned_or_bool<target_type>::type;
+        arg_ = detail::make_arg<Context>(
+            static_cast<unsigned>(static_cast<unsigned_type>(value)));
+      }
+    } else {
+      if (is_signed) {
+        // glibc's printf doesn't sign extend arguments of smaller types:
+        //   std::printf("%lld", -42);  // prints "4294967254"
+        // but we don't have to do the same because it's a UB.
+        arg_ = detail::make_arg<Context>(static_cast<long long>(value));
+      } else {
+        arg_ = detail::make_arg<Context>(
+            static_cast<typename make_unsigned_or_bool<U>::type>(value));
+      }
+    }
+  }
+
+  template <typename U, FMT_ENABLE_IF(!std::is_integral<U>::value)>
+  void operator()(U) {}  // No conversion needed for non-integral types.
+};
+
+// Converts an integer argument to T for printf, if T is an integral type.
+// If T is void, the argument is converted to corresponding signed or unsigned
+// type depending on the type specifier: 'd' and 'i' - signed, other -
+// unsigned).
+template <typename T, typename Context, typename Char>
+void convert_arg(basic_format_arg<Context>& arg, Char type) {
+  visit_format_arg(arg_converter<T, Context>(arg, type), arg);
+}
+
+// Converts an integer argument to char for printf.
+template <typename Context> class char_converter {
+ private:
+  basic_format_arg<Context>& arg_;
+
+ public:
+  explicit char_converter(basic_format_arg<Context>& arg) : arg_(arg) {}
+
+  template <typename T, FMT_ENABLE_IF(std::is_integral<T>::value)>
+  void operator()(T value) {
+    arg_ = detail::make_arg<Context>(
+        static_cast<typename Context::char_type>(value));
+  }
+
+  template <typename T, FMT_ENABLE_IF(!std::is_integral<T>::value)>
+  void operator()(T) {}  // No conversion needed for non-integral types.
+};
+
+// An argument visitor that return a pointer to a C string if argument is a
+// string or null otherwise.
+template <typename Char> struct get_cstring {
+  template <typename T> const Char* operator()(T) { return nullptr; }
+  const Char* operator()(const Char* s) { return s; }
+};
+
+// Checks if an argument is a valid printf width specifier and sets
+// left alignment if it is negative.
+template <typename Char> class printf_width_handler {
+ private:
+  using format_specs = basic_format_specs<Char>;
+
+  format_specs& specs_;
+
+ public:
+  explicit printf_width_handler(format_specs& specs) : specs_(specs) {}
+
+  template <typename T, FMT_ENABLE_IF(std::is_integral<T>::value)>
+  unsigned operator()(T value) {
+    auto width = static_cast<uint32_or_64_or_128_t<T>>(value);
+    if (detail::is_negative(value)) {
+      specs_.align = align::left;
+      width = 0 - width;
+    }
+    unsigned int_max = max_value<int>();
+    if (width > int_max) FMT_THROW(format_error("number is too big"));
+    return static_cast<unsigned>(width);
+  }
+
+  template <typename T, FMT_ENABLE_IF(!std::is_integral<T>::value)>
+  unsigned operator()(T) {
+    FMT_THROW(format_error("width is not integer"));
+    return 0;
+  }
+};
+
+// The ``printf`` argument formatter.
+template <typename OutputIt, typename Char>
+class printf_arg_formatter : public arg_formatter<Char> {
+ private:
+  using base = arg_formatter<Char>;
+  using context_type = basic_printf_context<OutputIt, Char>;
+  using format_specs = basic_format_specs<Char>;
+
+  context_type& context_;
+
+  OutputIt write_null_pointer(bool is_string = false) {
+    auto s = this->specs;
+    s.type = presentation_type::none;
+    return write_bytes(this->out, is_string ? "(null)" : "(nil)", s);
+  }
+
+ public:
+  printf_arg_formatter(OutputIt iter, format_specs& s, context_type& ctx)
+      : base{iter, s, locale_ref()}, context_(ctx) {}
+
+  OutputIt operator()(monostate value) { return base::operator()(value); }
+
+  template <typename T, FMT_ENABLE_IF(detail::is_integral<T>::value)>
+  OutputIt operator()(T value) {
+    // MSVC2013 fails to compile separate overloads for bool and Char so use
+    // std::is_same instead.
+    if (std::is_same<T, Char>::value) {
+      format_specs fmt_specs = this->specs;
+      if (fmt_specs.type != presentation_type::none &&
+          fmt_specs.type != presentation_type::chr) {
+        return (*this)(static_cast<int>(value));
+      }
+      fmt_specs.sign = sign::none;
+      fmt_specs.alt = false;
+      fmt_specs.fill[0] = ' ';  // Ignore '0' flag for char types.
+      // align::numeric needs to be overwritten here since the '0' flag is
+      // ignored for non-numeric types
+      if (fmt_specs.align == align::none || fmt_specs.align == align::numeric)
+        fmt_specs.align = align::right;
+      return write<Char>(this->out, static_cast<Char>(value), fmt_specs);
+    }
+    return base::operator()(value);
+  }
+
+  template <typename T, FMT_ENABLE_IF(std::is_floating_point<T>::value)>
+  OutputIt operator()(T value) {
+    return base::operator()(value);
+  }
+
+  /** Formats a null-terminated C string. */
+  OutputIt operator()(const char* value) {
+    if (value) return base::operator()(value);
+    return write_null_pointer(this->specs.type != presentation_type::pointer);
+  }
+
+  /** Formats a null-terminated wide C string. */
+  OutputIt operator()(const wchar_t* value) {
+    if (value) return base::operator()(value);
+    return write_null_pointer(this->specs.type != presentation_type::pointer);
+  }
+
+  OutputIt operator()(basic_string_view<Char> value) {
+    return base::operator()(value);
+  }
+
+  /** Formats a pointer. */
+  OutputIt operator()(const void* value) {
+    return value ? base::operator()(value) : write_null_pointer();
+  }
+
+  /** Formats an argument of a custom (user-defined) type. */
+  OutputIt operator()(typename basic_format_arg<context_type>::handle handle) {
+    auto parse_ctx =
+        basic_printf_parse_context<Char>(basic_string_view<Char>());
+    handle.format(parse_ctx, context_);
+    return this->out;
+  }
+};
+
+template <typename Char>
+void parse_flags(basic_format_specs<Char>& specs, const Char*& it,
+                 const Char* end) {
+  for (; it != end; ++it) {
+    switch (*it) {
+    case '-':
+      specs.align = align::left;
+      break;
+    case '+':
+      specs.sign = sign::plus;
+      break;
+    case '0':
+      specs.fill[0] = '0';
+      break;
+    case ' ':
+      if (specs.sign != sign::plus) {
+        specs.sign = sign::space;
+      }
+      break;
+    case '#':
+      specs.alt = true;
+      break;
+    default:
+      return;
+    }
+  }
+}
+
+template <typename Char, typename GetArg>
+int parse_header(const Char*& it, const Char* end,
+                 basic_format_specs<Char>& specs, GetArg get_arg) {
+  int arg_index = -1;
+  Char c = *it;
+  if (c >= '0' && c <= '9') {
+    // Parse an argument index (if followed by '$') or a width possibly
+    // preceded with '0' flag(s).
+    int value = parse_nonnegative_int(it, end, -1);
+    if (it != end && *it == '$') {  // value is an argument index
+      ++it;
+      arg_index = value != -1 ? value : max_value<int>();
+    } else {
+      if (c == '0') specs.fill[0] = '0';
+      if (value != 0) {
+        // Nonzero value means that we parsed width and don't need to
+        // parse it or flags again, so return now.
+        if (value == -1) FMT_THROW(format_error("number is too big"));
+        specs.width = value;
+        return arg_index;
+      }
+    }
+  }
+  parse_flags(specs, it, end);
+  // Parse width.
+  if (it != end) {
+    if (*it >= '0' && *it <= '9') {
+      specs.width = parse_nonnegative_int(it, end, -1);
+      if (specs.width == -1) FMT_THROW(format_error("number is too big"));
+    } else if (*it == '*') {
+      ++it;
+      specs.width = static_cast<int>(visit_format_arg(
+          detail::printf_width_handler<Char>(specs), get_arg(-1)));
+    }
+  }
+  return arg_index;
+}
+
+template <typename Char, typename Context>
+void vprintf(buffer<Char>& buf, basic_string_view<Char> format,
+             basic_format_args<Context> args) {
+  using OutputIt = buffer_appender<Char>;
+  auto out = OutputIt(buf);
+  auto context = basic_printf_context<OutputIt, Char>(out, args);
+  auto parse_ctx = basic_printf_parse_context<Char>(format);
+
+  // Returns the argument with specified index or, if arg_index is -1, the next
+  // argument.
+  auto get_arg = [&](int arg_index) {
+    if (arg_index < 0)
+      arg_index = parse_ctx.next_arg_id();
+    else
+      parse_ctx.check_arg_id(--arg_index);
+    return detail::get_arg(context, arg_index);
+  };
+
+  const Char* start = parse_ctx.begin();
+  const Char* end = parse_ctx.end();
+  auto it = start;
+  while (it != end) {
+    if (!detail::find<false, Char>(it, end, '%', it)) {
+      it = end;  // detail::find leaves it == nullptr if it doesn't find '%'
+      break;
+    }
+    Char c = *it++;
+    if (it != end && *it == c) {
+      out = detail::write(
+          out, basic_string_view<Char>(start, detail::to_unsigned(it - start)));
+      start = ++it;
+      continue;
+    }
+    out = detail::write(out, basic_string_view<Char>(
+                                 start, detail::to_unsigned(it - 1 - start)));
+
+    basic_format_specs<Char> specs;
+    specs.align = align::right;
+
+    // Parse argument index, flags and width.
+    int arg_index = parse_header(it, end, specs, get_arg);
+    if (arg_index == 0) parse_ctx.on_error("argument not found");
+
+    // Parse precision.
+    if (it != end && *it == '.') {
+      ++it;
+      c = it != end ? *it : 0;
+      if ('0' <= c && c <= '9') {
+        specs.precision = parse_nonnegative_int(it, end, 0);
+      } else if (c == '*') {
+        ++it;
+        specs.precision = static_cast<int>(
+            visit_format_arg(detail::printf_precision_handler(), get_arg(-1)));
+      } else {
+        specs.precision = 0;
+      }
+    }
+
+    auto arg = get_arg(arg_index);
+    // For d, i, o, u, x, and X conversion specifiers, if a precision is
+    // specified, the '0' flag is ignored
+    if (specs.precision >= 0 && arg.is_integral())
+      specs.fill[0] =
+          ' ';  // Ignore '0' flag for non-numeric types or if '-' present.
+    if (specs.precision >= 0 && arg.type() == detail::type::cstring_type) {
+      auto str = visit_format_arg(detail::get_cstring<Char>(), arg);
+      auto str_end = str + specs.precision;
+      auto nul = std::find(str, str_end, Char());
+      arg = detail::make_arg<basic_printf_context<OutputIt, Char>>(
+          basic_string_view<Char>(
+              str, detail::to_unsigned(nul != str_end ? nul - str
+                                                      : specs.precision)));
+    }
+    if (specs.alt && visit_format_arg(detail::is_zero_int(), arg))
+      specs.alt = false;
+    if (specs.fill[0] == '0') {
+      if (arg.is_arithmetic() && specs.align != align::left)
+        specs.align = align::numeric;
+      else
+        specs.fill[0] = ' ';  // Ignore '0' flag for non-numeric types or if '-'
+                              // flag is also present.
+    }
+
+    // Parse length and convert the argument to the required type.
+    c = it != end ? *it++ : 0;
+    Char t = it != end ? *it : 0;
+    using detail::convert_arg;
+    switch (c) {
+    case 'h':
+      if (t == 'h') {
+        ++it;
+        t = it != end ? *it : 0;
+        convert_arg<signed char>(arg, t);
+      } else {
+        convert_arg<short>(arg, t);
+      }
+      break;
+    case 'l':
+      if (t == 'l') {
+        ++it;
+        t = it != end ? *it : 0;
+        convert_arg<long long>(arg, t);
+      } else {
+        convert_arg<long>(arg, t);
+      }
+      break;
+    case 'j':
+      convert_arg<intmax_t>(arg, t);
+      break;
+    case 'z':
+      convert_arg<size_t>(arg, t);
+      break;
+    case 't':
+      convert_arg<std::ptrdiff_t>(arg, t);
+      break;
+    case 'L':
+      // printf produces garbage when 'L' is omitted for long double, no
+      // need to do the same.
+      break;
+    default:
+      --it;
+      convert_arg<void>(arg, c);
+    }
+
+    // Parse type.
+    if (it == end) FMT_THROW(format_error("invalid format string"));
+    char type = static_cast<char>(*it++);
+    if (arg.is_integral()) {
+      // Normalize type.
+      switch (type) {
+      case 'i':
+      case 'u':
+        type = 'd';
+        break;
+      case 'c':
+        visit_format_arg(
+            detail::char_converter<basic_printf_context<OutputIt, Char>>(arg),
+            arg);
+        break;
+      }
+    }
+    specs.type = parse_presentation_type(type);
+    if (specs.type == presentation_type::none)
+      parse_ctx.on_error("invalid type specifier");
+
+    start = it;
+
+    // Format argument.
+    out = visit_format_arg(
+        detail::printf_arg_formatter<OutputIt, Char>(out, specs, context), arg);
+  }
+  detail::write(out, basic_string_view<Char>(start, to_unsigned(it - start)));
+}
+FMT_END_DETAIL_NAMESPACE
+
+template <typename Char>
+using basic_printf_context_t =
+    basic_printf_context<detail::buffer_appender<Char>, Char>;
+
+using printf_context = basic_printf_context_t<char>;
+using wprintf_context = basic_printf_context_t<wchar_t>;
+
+using printf_args = basic_format_args<printf_context>;
+using wprintf_args = basic_format_args<wprintf_context>;
+
+/**
+  \rst
+  Constructs an `~fmt::format_arg_store` object that contains references to
+  arguments and can be implicitly converted to `~fmt::printf_args`.
+  \endrst
+ */
+template <typename... T>
+inline auto make_printf_args(const T&... args)
+    -> format_arg_store<printf_context, T...> {
+  return {args...};
+}
+
+/**
+  \rst
+  Constructs an `~fmt::format_arg_store` object that contains references to
+  arguments and can be implicitly converted to `~fmt::wprintf_args`.
+  \endrst
+ */
+template <typename... T>
+inline auto make_wprintf_args(const T&... args)
+    -> format_arg_store<wprintf_context, T...> {
+  return {args...};
+}
+
+template <typename S, typename Char = char_t<S>>
+inline auto vsprintf(
+    const S& fmt,
+    basic_format_args<basic_printf_context_t<type_identity_t<Char>>> args)
+    -> std::basic_string<Char> {
+  basic_memory_buffer<Char> buffer;
+  vprintf(buffer, detail::to_string_view(fmt), args);
+  return to_string(buffer);
+}
+
+/**
+  \rst
+  Formats arguments and returns the result as a string.
+
+  **Example**::
+
+    std::string message = fmt::sprintf("The answer is %d", 42);
+  \endrst
+*/
+template <typename S, typename... T,
+          typename Char = enable_if_t<detail::is_string<S>::value, char_t<S>>>
+inline auto sprintf(const S& fmt, const T&... args) -> std::basic_string<Char> {
+  using context = basic_printf_context_t<Char>;
+  return vsprintf(detail::to_string_view(fmt),
+                  fmt::make_format_args<context>(args...));
+}
+
+template <typename S, typename Char = char_t<S>>
+inline auto vfprintf(
+    std::FILE* f, const S& fmt,
+    basic_format_args<basic_printf_context_t<type_identity_t<Char>>> args)
+    -> int {
+  basic_memory_buffer<Char> buffer;
+  vprintf(buffer, detail::to_string_view(fmt), args);
+  size_t size = buffer.size();
+  return std::fwrite(buffer.data(), sizeof(Char), size, f) < size
+             ? -1
+             : static_cast<int>(size);
+}
+
+/**
+  \rst
+  Prints formatted data to the file *f*.
+
+  **Example**::
+
+    fmt::fprintf(stderr, "Don't %s!", "panic");
+  \endrst
+ */
+template <typename S, typename... T, typename Char = char_t<S>>
+inline auto fprintf(std::FILE* f, const S& fmt, const T&... args) -> int {
+  using context = basic_printf_context_t<Char>;
+  return vfprintf(f, detail::to_string_view(fmt),
+                  fmt::make_format_args<context>(args...));
+}
+
+template <typename S, typename Char = char_t<S>>
+inline auto vprintf(
+    const S& fmt,
+    basic_format_args<basic_printf_context_t<type_identity_t<Char>>> args)
+    -> int {
+  return vfprintf(stdout, detail::to_string_view(fmt), args);
+}
+
+/**
+  \rst
+  Prints formatted data to ``stdout``.
+
+  **Example**::
+
+    fmt::printf("Elapsed time: %.2f seconds", 1.23);
+  \endrst
+ */
+template <typename S, typename... T, FMT_ENABLE_IF(detail::is_string<S>::value)>
+inline auto printf(const S& fmt, const T&... args) -> int {
+  return vprintf(
+      detail::to_string_view(fmt),
+      fmt::make_format_args<basic_printf_context_t<char_t<S>>>(args...));
+}
+
+FMT_MODULE_EXPORT_END
+FMT_END_NAMESPACE
+
+#endif  // FMT_PRINTF_H_
diff --git a/libkram/fmt/ranges.h b/libkram/fmt/ranges.h
new file mode 100644
index 00000000..2105a668
--- /dev/null
+++ b/libkram/fmt/ranges.h
@@ -0,0 +1,722 @@
+// Formatting library for C++ - experimental range support
+//
+// Copyright (c) 2012 - present, Victor Zverovich
+// All rights reserved.
+//
+// For the license information refer to format.h.
+//
+// Copyright (c) 2018 - present, Remotion (Igor Schulz)
+// All Rights Reserved
+// {fmt} support for ranges, containers and types tuple interface.
+
+#ifndef FMT_RANGES_H_
+#define FMT_RANGES_H_
+
+#include <initializer_list>
+#include <tuple>
+#include <type_traits>
+
+#include "format.h"
+
+FMT_BEGIN_NAMESPACE
+
+namespace detail {
+
+template <typename RangeT, typename OutputIterator>
+OutputIterator copy(const RangeT& range, OutputIterator out) {
+  for (auto it = range.begin(), end = range.end(); it != end; ++it)
+    *out++ = *it;
+  return out;
+}
+
+template <typename OutputIterator>
+OutputIterator copy(const char* str, OutputIterator out) {
+  while (*str) *out++ = *str++;
+  return out;
+}
+
+template <typename OutputIterator>
+OutputIterator copy(char ch, OutputIterator out) {
+  *out++ = ch;
+  return out;
+}
+
+template <typename OutputIterator>
+OutputIterator copy(wchar_t ch, OutputIterator out) {
+  *out++ = ch;
+  return out;
+}
+
+// Returns true if T has a std::string-like interface, like std::string_view.
+template <typename T> class is_std_string_like {
+  template <typename U>
+  static auto check(U* p)
+      -> decltype((void)p->find('a'), p->length(), (void)p->data(), int());
+  template <typename> static void check(...);
+
+ public:
+  static constexpr const bool value =
+      is_string<T>::value ||
+      std::is_convertible<T, std_string_view<char>>::value ||
+      !std::is_void<decltype(check<T>(nullptr))>::value;
+};
+
+template <typename Char>
+struct is_std_string_like<fmt::basic_string_view<Char>> : std::true_type {};
+
+template <typename T> class is_map {
+  template <typename U> static auto check(U*) -> typename U::mapped_type;
+  template <typename> static void check(...);
+
+ public:
+#ifdef FMT_FORMAT_MAP_AS_LIST
+  static constexpr const bool value = false;
+#else
+  static constexpr const bool value =
+      !std::is_void<decltype(check<T>(nullptr))>::value;
+#endif
+};
+
+template <typename T> class is_set {
+  template <typename U> static auto check(U*) -> typename U::key_type;
+  template <typename> static void check(...);
+
+ public:
+#ifdef FMT_FORMAT_SET_AS_LIST
+  static constexpr const bool value = false;
+#else
+  static constexpr const bool value =
+      !std::is_void<decltype(check<T>(nullptr))>::value && !is_map<T>::value;
+#endif
+};
+
+template <typename... Ts> struct conditional_helper {};
+
+template <typename T, typename _ = void> struct is_range_ : std::false_type {};
+
+#if !FMT_MSC_VERSION || FMT_MSC_VERSION > 1800
+
+#  define FMT_DECLTYPE_RETURN(val)  \
+    ->decltype(val) { return val; } \
+    static_assert(                  \
+        true, "")  // This makes it so that a semicolon is required after the
+                   // macro, which helps clang-format handle the formatting.
+
+// C array overload
+template <typename T, std::size_t N>
+auto range_begin(const T (&arr)[N]) -> const T* {
+  return arr;
+}
+template <typename T, std::size_t N>
+auto range_end(const T (&arr)[N]) -> const T* {
+  return arr + N;
+}
+
+template <typename T, typename Enable = void>
+struct has_member_fn_begin_end_t : std::false_type {};
+
+template <typename T>
+struct has_member_fn_begin_end_t<T, void_t<decltype(std::declval<T>().begin()),
+                                           decltype(std::declval<T>().end())>>
+    : std::true_type {};
+
+// Member function overload
+template <typename T>
+auto range_begin(T&& rng) FMT_DECLTYPE_RETURN(static_cast<T&&>(rng).begin());
+template <typename T>
+auto range_end(T&& rng) FMT_DECLTYPE_RETURN(static_cast<T&&>(rng).end());
+
+// ADL overload. Only participates in overload resolution if member functions
+// are not found.
+template <typename T>
+auto range_begin(T&& rng)
+    -> enable_if_t<!has_member_fn_begin_end_t<T&&>::value,
+                   decltype(begin(static_cast<T&&>(rng)))> {
+  return begin(static_cast<T&&>(rng));
+}
+template <typename T>
+auto range_end(T&& rng) -> enable_if_t<!has_member_fn_begin_end_t<T&&>::value,
+                                       decltype(end(static_cast<T&&>(rng)))> {
+  return end(static_cast<T&&>(rng));
+}
+
+template <typename T, typename Enable = void>
+struct has_const_begin_end : std::false_type {};
+template <typename T, typename Enable = void>
+struct has_mutable_begin_end : std::false_type {};
+
+template <typename T>
+struct has_const_begin_end<
+    T,
+    void_t<
+        decltype(detail::range_begin(std::declval<const remove_cvref_t<T>&>())),
+        decltype(detail::range_end(std::declval<const remove_cvref_t<T>&>()))>>
+    : std::true_type {};
+
+template <typename T>
+struct has_mutable_begin_end<
+    T, void_t<decltype(detail::range_begin(std::declval<T>())),
+              decltype(detail::range_end(std::declval<T>())),
+              enable_if_t<std::is_copy_constructible<T>::value>>>
+    : std::true_type {};
+
+template <typename T>
+struct is_range_<T, void>
+    : std::integral_constant<bool, (has_const_begin_end<T>::value ||
+                                    has_mutable_begin_end<T>::value)> {};
+#  undef FMT_DECLTYPE_RETURN
+#endif
+
+// tuple_size and tuple_element check.
+template <typename T> class is_tuple_like_ {
+  template <typename U>
+  static auto check(U* p) -> decltype(std::tuple_size<U>::value, int());
+  template <typename> static void check(...);
+
+ public:
+  static constexpr const bool value =
+      !std::is_void<decltype(check<T>(nullptr))>::value;
+};
+
+// Check for integer_sequence
+#if defined(__cpp_lib_integer_sequence) || FMT_MSC_VERSION >= 1900
+template <typename T, T... N>
+using integer_sequence = std::integer_sequence<T, N...>;
+template <size_t... N> using index_sequence = std::index_sequence<N...>;
+template <size_t N> using make_index_sequence = std::make_index_sequence<N>;
+#else
+template <typename T, T... N> struct integer_sequence {
+  using value_type = T;
+
+  static FMT_CONSTEXPR size_t size() { return sizeof...(N); }
+};
+
+template <size_t... N> using index_sequence = integer_sequence<size_t, N...>;
+
+template <typename T, size_t N, T... Ns>
+struct make_integer_sequence : make_integer_sequence<T, N - 1, N - 1, Ns...> {};
+template <typename T, T... Ns>
+struct make_integer_sequence<T, 0, Ns...> : integer_sequence<T, Ns...> {};
+
+template <size_t N>
+using make_index_sequence = make_integer_sequence<size_t, N>;
+#endif
+
+template <typename T>
+using tuple_index_sequence = make_index_sequence<std::tuple_size<T>::value>;
+
+template <typename T, typename C, bool = is_tuple_like_<T>::value>
+class is_tuple_formattable_ {
+ public:
+  static constexpr const bool value = false;
+};
+template <typename T, typename C> class is_tuple_formattable_<T, C, true> {
+  template <std::size_t... Is>
+  static std::true_type check2(index_sequence<Is...>,
+                               integer_sequence<bool, (Is == Is)...>);
+  static std::false_type check2(...);
+  template <std::size_t... Is>
+  static decltype(check2(
+      index_sequence<Is...>{},
+      integer_sequence<
+          bool, (is_formattable<typename std::tuple_element<Is, T>::type,
+                                C>::value)...>{})) check(index_sequence<Is...>);
+
+ public:
+  static constexpr const bool value =
+      decltype(check(tuple_index_sequence<T>{}))::value;
+};
+
+template <class Tuple, class F, size_t... Is>
+void for_each(index_sequence<Is...>, Tuple&& tup, F&& f) noexcept {
+  using std::get;
+  // using free function get<I>(T) now.
+  const int _[] = {0, ((void)f(get<Is>(tup)), 0)...};
+  (void)_;  // blocks warnings
+}
+
+template <class T>
+FMT_CONSTEXPR make_index_sequence<std::tuple_size<T>::value> get_indexes(
+    T const&) {
+  return {};
+}
+
+template <class Tuple, class F> void for_each(Tuple&& tup, F&& f) {
+  const auto indexes = get_indexes(tup);
+  for_each(indexes, std::forward<Tuple>(tup), std::forward<F>(f));
+}
+
+#if FMT_MSC_VERSION && FMT_MSC_VERSION < 1920
+// Older MSVC doesn't get the reference type correctly for arrays.
+template <typename R> struct range_reference_type_impl {
+  using type = decltype(*detail::range_begin(std::declval<R&>()));
+};
+
+template <typename T, std::size_t N> struct range_reference_type_impl<T[N]> {
+  using type = T&;
+};
+
+template <typename T>
+using range_reference_type = typename range_reference_type_impl<T>::type;
+#else
+template <typename Range>
+using range_reference_type =
+    decltype(*detail::range_begin(std::declval<Range&>()));
+#endif
+
+// We don't use the Range's value_type for anything, but we do need the Range's
+// reference type, with cv-ref stripped.
+template <typename Range>
+using uncvref_type = remove_cvref_t<range_reference_type<Range>>;
+
+template <typename Range>
+using uncvref_first_type =
+    remove_cvref_t<decltype(std::declval<range_reference_type<Range>>().first)>;
+
+template <typename Range>
+using uncvref_second_type = remove_cvref_t<
+    decltype(std::declval<range_reference_type<Range>>().second)>;
+
+template <typename OutputIt> OutputIt write_delimiter(OutputIt out) {
+  *out++ = ',';
+  *out++ = ' ';
+  return out;
+}
+
+template <typename Char, typename OutputIt>
+auto write_range_entry(OutputIt out, basic_string_view<Char> str) -> OutputIt {
+  return write_escaped_string(out, str);
+}
+
+template <typename Char, typename OutputIt, typename T,
+          FMT_ENABLE_IF(std::is_convertible<T, std_string_view<char>>::value)>
+inline auto write_range_entry(OutputIt out, const T& str) -> OutputIt {
+  auto sv = std_string_view<Char>(str);
+  return write_range_entry<Char>(out, basic_string_view<Char>(sv));
+}
+
+template <typename Char, typename OutputIt, typename Arg,
+          FMT_ENABLE_IF(std::is_same<Arg, Char>::value)>
+OutputIt write_range_entry(OutputIt out, const Arg v) {
+  return write_escaped_char(out, v);
+}
+
+template <
+    typename Char, typename OutputIt, typename Arg,
+    FMT_ENABLE_IF(!is_std_string_like<typename std::decay<Arg>::type>::value &&
+                  !std::is_same<Arg, Char>::value)>
+OutputIt write_range_entry(OutputIt out, const Arg& v) {
+  return write<Char>(out, v);
+}
+
+}  // namespace detail
+
+template <typename T> struct is_tuple_like {
+  static constexpr const bool value =
+      detail::is_tuple_like_<T>::value && !detail::is_range_<T>::value;
+};
+
+template <typename T, typename C> struct is_tuple_formattable {
+  static constexpr const bool value =
+      detail::is_tuple_formattable_<T, C>::value;
+};
+
+template <typename TupleT, typename Char>
+struct formatter<TupleT, Char,
+                 enable_if_t<fmt::is_tuple_like<TupleT>::value &&
+                             fmt::is_tuple_formattable<TupleT, Char>::value>> {
+ private:
+  basic_string_view<Char> separator_ = detail::string_literal<Char, ',', ' '>{};
+  basic_string_view<Char> opening_bracket_ =
+      detail::string_literal<Char, '('>{};
+  basic_string_view<Char> closing_bracket_ =
+      detail::string_literal<Char, ')'>{};
+
+  // C++11 generic lambda for format().
+  template <typename FormatContext> struct format_each {
+    template <typename T> void operator()(const T& v) {
+      if (i > 0) out = detail::copy_str<Char>(separator, out);
+      out = detail::write_range_entry<Char>(out, v);
+      ++i;
+    }
+    int i;
+    typename FormatContext::iterator& out;
+    basic_string_view<Char> separator;
+  };
+
+ public:
+  FMT_CONSTEXPR formatter() {}
+
+  FMT_CONSTEXPR void set_separator(basic_string_view<Char> sep) {
+    separator_ = sep;
+  }
+
+  FMT_CONSTEXPR void set_brackets(basic_string_view<Char> open,
+                                  basic_string_view<Char> close) {
+    opening_bracket_ = open;
+    closing_bracket_ = close;
+  }
+
+  template <typename ParseContext>
+  FMT_CONSTEXPR auto parse(ParseContext& ctx) -> decltype(ctx.begin()) {
+    return ctx.begin();
+  }
+
+  template <typename FormatContext = format_context>
+  auto format(const TupleT& values, FormatContext& ctx) const
+      -> decltype(ctx.out()) {
+    auto out = ctx.out();
+    out = detail::copy_str<Char>(opening_bracket_, out);
+    detail::for_each(values, format_each<FormatContext>{0, out, separator_});
+    out = detail::copy_str<Char>(closing_bracket_, out);
+    return out;
+  }
+};
+
+template <typename T, typename Char> struct is_range {
+  static constexpr const bool value =
+      detail::is_range_<T>::value && !detail::is_std_string_like<T>::value &&
+      !std::is_convertible<T, std::basic_string<Char>>::value &&
+      !std::is_convertible<T, detail::std_string_view<Char>>::value;
+};
+
+namespace detail {
+template <typename Context> struct range_mapper {
+  using mapper = arg_mapper<Context>;
+
+  template <typename T,
+            FMT_ENABLE_IF(has_formatter<remove_cvref_t<T>, Context>::value)>
+  static auto map(T&& value) -> T&& {
+    return static_cast<T&&>(value);
+  }
+  template <typename T,
+            FMT_ENABLE_IF(!has_formatter<remove_cvref_t<T>, Context>::value)>
+  static auto map(T&& value)
+      -> decltype(mapper().map(static_cast<T&&>(value))) {
+    return mapper().map(static_cast<T&&>(value));
+  }
+};
+
+template <typename Char, typename Element>
+using range_formatter_type = conditional_t<
+    is_formattable<Element, Char>::value,
+    formatter<remove_cvref_t<decltype(range_mapper<buffer_context<Char>>{}.map(
+                  std::declval<Element>()))>,
+              Char>,
+    fallback_formatter<Element, Char>>;
+
+template <typename R>
+using maybe_const_range =
+    conditional_t<has_const_begin_end<R>::value, const R, R>;
+
+// Workaround a bug in MSVC 2015 and earlier.
+#if !FMT_MSC_VERSION || FMT_MSC_VERSION >= 1910
+template <typename R, typename Char>
+struct is_formattable_delayed
+    : disjunction<
+          is_formattable<uncvref_type<maybe_const_range<R>>, Char>,
+          has_fallback_formatter<uncvref_type<maybe_const_range<R>>, Char>> {};
+#endif
+
+}  // namespace detail
+
+template <typename T, typename Char, typename Enable = void>
+struct range_formatter;
+
+template <typename T, typename Char>
+struct range_formatter<
+    T, Char,
+    enable_if_t<conjunction<
+        std::is_same<T, remove_cvref_t<T>>,
+        disjunction<is_formattable<T, Char>,
+                    detail::has_fallback_formatter<T, Char>>>::value>> {
+ private:
+  detail::range_formatter_type<Char, T> underlying_;
+  bool custom_specs_ = false;
+  basic_string_view<Char> separator_ = detail::string_literal<Char, ',', ' '>{};
+  basic_string_view<Char> opening_bracket_ =
+      detail::string_literal<Char, '['>{};
+  basic_string_view<Char> closing_bracket_ =
+      detail::string_literal<Char, ']'>{};
+
+  template <class U>
+  FMT_CONSTEXPR static auto maybe_set_debug_format(U& u, int)
+      -> decltype(u.set_debug_format()) {
+    u.set_debug_format();
+  }
+
+  template <class U>
+  FMT_CONSTEXPR static void maybe_set_debug_format(U&, ...) {}
+
+  FMT_CONSTEXPR void maybe_set_debug_format() {
+    maybe_set_debug_format(underlying_, 0);
+  }
+
+ public:
+  FMT_CONSTEXPR range_formatter() {}
+
+  FMT_CONSTEXPR auto underlying() -> detail::range_formatter_type<Char, T>& {
+    return underlying_;
+  }
+
+  FMT_CONSTEXPR void set_separator(basic_string_view<Char> sep) {
+    separator_ = sep;
+  }
+
+  FMT_CONSTEXPR void set_brackets(basic_string_view<Char> open,
+                                  basic_string_view<Char> close) {
+    opening_bracket_ = open;
+    closing_bracket_ = close;
+  }
+
+  template <typename ParseContext>
+  FMT_CONSTEXPR auto parse(ParseContext& ctx) -> decltype(ctx.begin()) {
+    auto it = ctx.begin();
+    auto end = ctx.end();
+    if (it == end || *it == '}') {
+      maybe_set_debug_format();
+      return it;
+    }
+
+    if (*it == 'n') {
+      set_brackets({}, {});
+      ++it;
+    }
+
+    if (*it == '}') {
+      maybe_set_debug_format();
+      return it;
+    }
+
+    if (*it != ':')
+      FMT_THROW(format_error("no other top-level range formatters supported"));
+
+    custom_specs_ = true;
+    ++it;
+    ctx.advance_to(it);
+    return underlying_.parse(ctx);
+  }
+
+  template <typename R, class FormatContext>
+  auto format(R&& range, FormatContext& ctx) const -> decltype(ctx.out()) {
+    detail::range_mapper<buffer_context<Char>> mapper;
+    auto out = ctx.out();
+    out = detail::copy_str<Char>(opening_bracket_, out);
+    int i = 0;
+    auto it = detail::range_begin(range);
+    auto end = detail::range_end(range);
+    for (; it != end; ++it) {
+      if (i > 0) out = detail::copy_str<Char>(separator_, out);
+      ;
+      ctx.advance_to(out);
+      out = underlying_.format(mapper.map(*it), ctx);
+      ++i;
+    }
+    out = detail::copy_str<Char>(closing_bracket_, out);
+    return out;
+  }
+};
+
+enum class range_format { disabled, map, set, sequence, string, debug_string };
+
+namespace detail {
+template <typename T> struct range_format_kind_ {
+  static constexpr auto value = std::is_same<range_reference_type<T>, T>::value
+                                    ? range_format::disabled
+                                : is_map<T>::value ? range_format::map
+                                : is_set<T>::value ? range_format::set
+                                                   : range_format::sequence;
+};
+
+template <range_format K, typename R, typename Char, typename Enable = void>
+struct range_default_formatter;
+
+template <range_format K>
+using range_format_constant = std::integral_constant<range_format, K>;
+
+template <range_format K, typename R, typename Char>
+struct range_default_formatter<
+    K, R, Char,
+    enable_if_t<(K == range_format::sequence || K == range_format::map ||
+                 K == range_format::set)>> {
+  using range_type = detail::maybe_const_range<R>;
+  range_formatter<detail::uncvref_type<range_type>, Char> underlying_;
+
+  FMT_CONSTEXPR range_default_formatter() { init(range_format_constant<K>()); }
+
+  FMT_CONSTEXPR void init(range_format_constant<range_format::set>) {
+    underlying_.set_brackets(detail::string_literal<Char, '{'>{},
+                             detail::string_literal<Char, '}'>{});
+  }
+
+  FMT_CONSTEXPR void init(range_format_constant<range_format::map>) {
+    underlying_.set_brackets(detail::string_literal<Char, '{'>{},
+                             detail::string_literal<Char, '}'>{});
+    underlying_.underlying().set_brackets({}, {});
+    underlying_.underlying().set_separator(
+        detail::string_literal<Char, ':', ' '>{});
+  }
+
+  FMT_CONSTEXPR void init(range_format_constant<range_format::sequence>) {}
+
+  template <typename ParseContext>
+  FMT_CONSTEXPR auto parse(ParseContext& ctx) -> decltype(ctx.begin()) {
+    return underlying_.parse(ctx);
+  }
+
+  template <typename FormatContext>
+  auto format(range_type& range, FormatContext& ctx) const
+      -> decltype(ctx.out()) {
+    return underlying_.format(range, ctx);
+  }
+};
+}  // namespace detail
+
+template <typename T, typename Char, typename Enable = void>
+struct range_format_kind
+    : conditional_t<
+          is_range<T, Char>::value, detail::range_format_kind_<T>,
+          std::integral_constant<range_format, range_format::disabled>> {};
+
+template <typename R, typename Char>
+struct formatter<
+    R, Char,
+    enable_if_t<conjunction<bool_constant<range_format_kind<R, Char>::value !=
+                                          range_format::disabled>
+// Workaround a bug in MSVC 2015 and earlier.
+#if !FMT_MSC_VERSION || FMT_MSC_VERSION >= 1910
+                            ,
+                            detail::is_formattable_delayed<R, Char>
+#endif
+                            >::value>>
+    : detail::range_default_formatter<range_format_kind<R, Char>::value, R,
+                                      Char> {
+};
+
+template <typename Char, typename... T> struct tuple_join_view : detail::view {
+  const std::tuple<T...>& tuple;
+  basic_string_view<Char> sep;
+
+  tuple_join_view(const std::tuple<T...>& t, basic_string_view<Char> s)
+      : tuple(t), sep{s} {}
+};
+
+template <typename Char, typename... T>
+using tuple_arg_join = tuple_join_view<Char, T...>;
+
+// Define FMT_TUPLE_JOIN_SPECIFIERS to enable experimental format specifiers
+// support in tuple_join. It is disabled by default because of issues with
+// the dynamic width and precision.
+#ifndef FMT_TUPLE_JOIN_SPECIFIERS
+#  define FMT_TUPLE_JOIN_SPECIFIERS 0
+#endif
+
+template <typename Char, typename... T>
+struct formatter<tuple_join_view<Char, T...>, Char> {
+  template <typename ParseContext>
+  FMT_CONSTEXPR auto parse(ParseContext& ctx) -> decltype(ctx.begin()) {
+    return do_parse(ctx, std::integral_constant<size_t, sizeof...(T)>());
+  }
+
+  template <typename FormatContext>
+  auto format(const tuple_join_view<Char, T...>& value,
+              FormatContext& ctx) const -> typename FormatContext::iterator {
+    return do_format(value, ctx,
+                     std::integral_constant<size_t, sizeof...(T)>());
+  }
+
+ private:
+  std::tuple<formatter<typename std::decay<T>::type, Char>...> formatters_;
+
+  template <typename ParseContext>
+  FMT_CONSTEXPR auto do_parse(ParseContext& ctx,
+                              std::integral_constant<size_t, 0>)
+      -> decltype(ctx.begin()) {
+    return ctx.begin();
+  }
+
+  template <typename ParseContext, size_t N>
+  FMT_CONSTEXPR auto do_parse(ParseContext& ctx,
+                              std::integral_constant<size_t, N>)
+      -> decltype(ctx.begin()) {
+    auto end = ctx.begin();
+#if FMT_TUPLE_JOIN_SPECIFIERS
+    end = std::get<sizeof...(T) - N>(formatters_).parse(ctx);
+    if (N > 1) {
+      auto end1 = do_parse(ctx, std::integral_constant<size_t, N - 1>());
+      if (end != end1)
+        FMT_THROW(format_error("incompatible format specs for tuple elements"));
+    }
+#endif
+    return end;
+  }
+
+  template <typename FormatContext>
+  auto do_format(const tuple_join_view<Char, T...>&, FormatContext& ctx,
+                 std::integral_constant<size_t, 0>) const ->
+      typename FormatContext::iterator {
+    return ctx.out();
+  }
+
+  template <typename FormatContext, size_t N>
+  auto do_format(const tuple_join_view<Char, T...>& value, FormatContext& ctx,
+                 std::integral_constant<size_t, N>) const ->
+      typename FormatContext::iterator {
+    auto out = std::get<sizeof...(T) - N>(formatters_)
+                   .format(std::get<sizeof...(T) - N>(value.tuple), ctx);
+    if (N > 1) {
+      out = std::copy(value.sep.begin(), value.sep.end(), out);
+      ctx.advance_to(out);
+      return do_format(value, ctx, std::integral_constant<size_t, N - 1>());
+    }
+    return out;
+  }
+};
+
+FMT_MODULE_EXPORT_BEGIN
+
+/**
+  \rst
+  Returns an object that formats `tuple` with elements separated by `sep`.
+
+  **Example**::
+
+    std::tuple<int, char> t = {1, 'a'};
+    fmt::print("{}", fmt::join(t, ", "));
+    // Output: "1, a"
+  \endrst
+ */
+template <typename... T>
+FMT_CONSTEXPR auto join(const std::tuple<T...>& tuple, string_view sep)
+    -> tuple_join_view<char, T...> {
+  return {tuple, sep};
+}
+
+template <typename... T>
+FMT_CONSTEXPR auto join(const std::tuple<T...>& tuple,
+                        basic_string_view<wchar_t> sep)
+    -> tuple_join_view<wchar_t, T...> {
+  return {tuple, sep};
+}
+
+/**
+  \rst
+  Returns an object that formats `initializer_list` with elements separated by
+  `sep`.
+
+  **Example**::
+
+    fmt::print("{}", fmt::join({1, 2, 3}, ", "));
+    // Output: "1, 2, 3"
+  \endrst
+ */
+template <typename T>
+auto join(std::initializer_list<T> list, string_view sep)
+    -> join_view<const T*, const T*> {
+  return join(std::begin(list), std::end(list), sep);
+}
+
+FMT_MODULE_EXPORT_END
+FMT_END_NAMESPACE
+
+#endif  // FMT_RANGES_H_
diff --git a/libkram/fmt/std.h b/libkram/fmt/std.h
new file mode 100644
index 00000000..02ca6721
--- /dev/null
+++ b/libkram/fmt/std.h
@@ -0,0 +1,240 @@
+// Formatting library for C++ - formatters for standard library types
+//
+// Copyright (c) 2012 - present, Victor Zverovich
+// All rights reserved.
+//
+// For the license information refer to format.h.
+
+#ifndef FMT_STD_H_
+#define FMT_STD_H_
+
+#include <cstdlib>
+#include <exception>
+#include <memory>
+#include <thread>
+#include <type_traits>
+#include <typeinfo>
+#include <utility>
+
+#include "ostream.h"
+
+#if FMT_HAS_INCLUDE(<version>)
+#  include <version>
+#endif
+// Checking FMT_CPLUSPLUS for warning suppression in MSVC.
+#if FMT_CPLUSPLUS >= 201703L
+#  if FMT_HAS_INCLUDE(<filesystem>)
+#    include <filesystem>
+#  endif
+#  if FMT_HAS_INCLUDE(<variant>)
+#    include <variant>
+#  endif
+#endif
+
+// GCC 4 does not support FMT_HAS_INCLUDE.
+#if FMT_HAS_INCLUDE(<cxxabi.h>) || defined(__GLIBCXX__)
+#  include <cxxabi.h>
+// Android NDK with gabi++ library on some archtectures does not implement
+// abi::__cxa_demangle().
+#  ifndef __GABIXX_CXXABI_H__
+#    define FMT_HAS_ABI_CXA_DEMANGLE
+#  endif
+#endif
+
+#ifdef __cpp_lib_filesystem
+FMT_BEGIN_NAMESPACE
+
+namespace detail {
+
+template <typename Char>
+void write_escaped_path(basic_memory_buffer<Char>& quoted,
+                        const std::filesystem::path& p) {
+  write_escaped_string<Char>(std::back_inserter(quoted), p.string<Char>());
+}
+#  ifdef _WIN32
+template <>
+inline void write_escaped_path<char>(basic_memory_buffer<char>& quoted,
+                                     const std::filesystem::path& p) {
+  auto s = p.u8string();
+  write_escaped_string<char>(
+      std::back_inserter(quoted),
+      string_view(reinterpret_cast<const char*>(s.c_str()), s.size()));
+}
+#  endif
+template <>
+inline void write_escaped_path<std::filesystem::path::value_type>(
+    basic_memory_buffer<std::filesystem::path::value_type>& quoted,
+    const std::filesystem::path& p) {
+  write_escaped_string<std::filesystem::path::value_type>(
+      std::back_inserter(quoted), p.native());
+}
+
+}  // namespace detail
+
+template <typename Char>
+struct formatter<std::filesystem::path, Char>
+    : formatter<basic_string_view<Char>> {
+  template <typename FormatContext>
+  auto format(const std::filesystem::path& p, FormatContext& ctx) const ->
+      typename FormatContext::iterator {
+    basic_memory_buffer<Char> quoted;
+    detail::write_escaped_path(quoted, p);
+    return formatter<basic_string_view<Char>>::format(
+        basic_string_view<Char>(quoted.data(), quoted.size()), ctx);
+  }
+};
+FMT_END_NAMESPACE
+#endif
+
+FMT_BEGIN_NAMESPACE
+template <typename Char>
+struct formatter<std::thread::id, Char> : basic_ostream_formatter<Char> {};
+FMT_END_NAMESPACE
+
+#ifdef __cpp_lib_variant
+FMT_BEGIN_NAMESPACE
+template <typename Char> struct formatter<std::monostate, Char> {
+  template <typename ParseContext>
+  FMT_CONSTEXPR auto parse(ParseContext& ctx) -> decltype(ctx.begin()) {
+    return ctx.begin();
+  }
+
+  template <typename FormatContext>
+  auto format(const std::monostate&, FormatContext& ctx) const
+      -> decltype(ctx.out()) {
+    auto out = ctx.out();
+    out = detail::write<Char>(out, "monostate");
+    return out;
+  }
+};
+
+namespace detail {
+
+template <typename T>
+using variant_index_sequence =
+    std::make_index_sequence<std::variant_size<T>::value>;
+
+template <typename> struct is_variant_like_ : std::false_type {};
+template <typename... Types>
+struct is_variant_like_<std::variant<Types...>> : std::true_type {};
+
+// formattable element check.
+template <typename T, typename C> class is_variant_formattable_ {
+  template <std::size_t... Is>
+  static std::conjunction<
+      is_formattable<std::variant_alternative_t<Is, T>, C>...>
+      check(std::index_sequence<Is...>);
+
+ public:
+  static constexpr const bool value =
+      decltype(check(variant_index_sequence<T>{}))::value;
+};
+
+template <typename Char, typename OutputIt, typename T>
+auto write_variant_alternative(OutputIt out, const T& v) -> OutputIt {
+  if constexpr (is_string<T>::value)
+    return write_escaped_string<Char>(out, detail::to_string_view(v));
+  else if constexpr (std::is_same_v<T, Char>)
+    return write_escaped_char(out, v);
+  else
+    return write<Char>(out, v);
+}
+
+}  // namespace detail
+
+template <typename T> struct is_variant_like {
+  static constexpr const bool value = detail::is_variant_like_<T>::value;
+};
+
+template <typename T, typename C> struct is_variant_formattable {
+  static constexpr const bool value =
+      detail::is_variant_formattable_<T, C>::value;
+};
+
+template <typename Variant, typename Char>
+struct formatter<
+    Variant, Char,
+    std::enable_if_t<std::conjunction_v<
+        is_variant_like<Variant>, is_variant_formattable<Variant, Char>>>> {
+  template <typename ParseContext>
+  FMT_CONSTEXPR auto parse(ParseContext& ctx) -> decltype(ctx.begin()) {
+    return ctx.begin();
+  }
+
+  template <typename FormatContext>
+  auto format(const Variant& value, FormatContext& ctx) const
+      -> decltype(ctx.out()) {
+    auto out = ctx.out();
+
+    out = detail::write<Char>(out, "variant(");
+    std::visit(
+        [&](const auto& v) {
+          out = detail::write_variant_alternative<Char>(out, v);
+        },
+        value);
+    *out++ = ')';
+    return out;
+  }
+};
+FMT_END_NAMESPACE
+#endif  // __cpp_lib_variant
+
+FMT_BEGIN_NAMESPACE
+template <typename T, typename Char>
+struct formatter<
+    T, Char,
+    typename std::enable_if<std::is_base_of<std::exception, T>::value>::type> {
+ private:
+  bool with_typename_ = false;
+
+ public:
+  FMT_CONSTEXPR auto parse(basic_format_parse_context<Char>& ctx)
+      -> decltype(ctx.begin()) {
+    auto it = ctx.begin();
+    auto end = ctx.end();
+    if (it == end || *it == '}') return it;
+    if (*it == 't') {
+      ++it;
+      with_typename_ = true;
+    }
+    return it;
+  }
+
+  template <typename OutputIt>
+  auto format(const std::exception& ex,
+              basic_format_context<OutputIt, Char>& ctx) const -> OutputIt {
+    basic_format_specs<Char> spec;
+    auto out = ctx.out();
+    if (!with_typename_)
+      return detail::write_bytes(out, string_view(ex.what()), spec);
+
+    const std::type_info& ti = typeid(ex);
+#ifdef FMT_HAS_ABI_CXA_DEMANGLE
+    int status = 0;
+    std::size_t size = 0;
+    std::unique_ptr<char, decltype(&std::free)> demangled_name_ptr(
+        abi::__cxa_demangle(ti.name(), nullptr, &size, &status), &std::free);
+    out = detail::write_bytes(
+        out,
+        string_view(demangled_name_ptr ? demangled_name_ptr.get() : ti.name()),
+        spec);
+#elif FMT_MSC_VERSION
+    string_view demangled_name_view(ti.name());
+    if (demangled_name_view.starts_with("class "))
+      demangled_name_view.remove_prefix(6);
+    else if (demangled_name_view.starts_with("struct "))
+      demangled_name_view.remove_prefix(7);
+    out = detail::write_bytes(out, demangled_name_view, spec);
+#else
+    out = detail::write_bytes(out, string_view(ti.name()), spec);
+#endif
+    out = detail::write<Char>(out, Char(':'));
+    out = detail::write<Char>(out, Char(' '));
+    out = detail::write_bytes(out, string_view(ex.what()), spec);
+
+    return out;
+  }
+};
+FMT_END_NAMESPACE
+
+#endif  // FMT_STD_H_
diff --git a/libkram/fmt/xchar.h b/libkram/fmt/xchar.h
new file mode 100644
index 00000000..663b877d
--- /dev/null
+++ b/libkram/fmt/xchar.h
@@ -0,0 +1,248 @@
+// Formatting library for C++ - optional wchar_t and exotic character support
+//
+// Copyright (c) 2012 - present, Victor Zverovich
+// All rights reserved.
+//
+// For the license information refer to format.h.
+
+#ifndef FMT_XCHAR_H_
+#define FMT_XCHAR_H_
+
+#include <cwchar>
+
+#include "format.h"
+
+#ifndef FMT_STATIC_THOUSANDS_SEPARATOR
+#  include <locale>
+#endif
+
+FMT_BEGIN_NAMESPACE
+namespace detail {
+
+template <typename T>
+using is_exotic_char = bool_constant<!std::is_same<T, char>::value>;
+
+inline auto write_loc(std::back_insert_iterator<detail::buffer<wchar_t>> out,
+                      loc_value value, const basic_format_specs<wchar_t>& specs,
+                      locale_ref loc) -> bool {
+#ifndef FMT_STATIC_THOUSANDS_SEPARATOR
+  auto& numpunct =
+      std::use_facet<std::numpunct<wchar_t>>(loc.get<std::locale>());
+  auto separator = std::wstring();
+  auto grouping = numpunct.grouping();
+  if (!grouping.empty()) separator = std::wstring(1, numpunct.thousands_sep());
+  return value.visit(loc_writer<wchar_t>{out, specs, separator, grouping, {}});
+#endif
+  return false;
+}
+}  // namespace detail
+
+FMT_MODULE_EXPORT_BEGIN
+
+using wstring_view = basic_string_view<wchar_t>;
+using wformat_parse_context = basic_format_parse_context<wchar_t>;
+using wformat_context = buffer_context<wchar_t>;
+using wformat_args = basic_format_args<wformat_context>;
+using wmemory_buffer = basic_memory_buffer<wchar_t>;
+
+#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409
+// Workaround broken conversion on older gcc.
+template <typename... Args> using wformat_string = wstring_view;
+inline auto runtime(wstring_view s) -> wstring_view { return s; }
+#else
+template <typename... Args>
+using wformat_string = basic_format_string<wchar_t, type_identity_t<Args>...>;
+inline auto runtime(wstring_view s) -> basic_runtime<wchar_t> { return {{s}}; }
+#endif
+
+template <> struct is_char<wchar_t> : std::true_type {};
+template <> struct is_char<detail::char8_type> : std::true_type {};
+template <> struct is_char<char16_t> : std::true_type {};
+template <> struct is_char<char32_t> : std::true_type {};
+
+template <typename... Args>
+constexpr format_arg_store<wformat_context, Args...> make_wformat_args(
+    const Args&... args) {
+  return {args...};
+}
+
+inline namespace literals {
+#if FMT_USE_USER_DEFINED_LITERALS && !FMT_USE_NONTYPE_TEMPLATE_ARGS
+constexpr detail::udl_arg<wchar_t> operator"" _a(const wchar_t* s, size_t) {
+  return {s};
+}
+#endif
+}  // namespace literals
+
+template <typename It, typename Sentinel>
+auto join(It begin, Sentinel end, wstring_view sep)
+    -> join_view<It, Sentinel, wchar_t> {
+  return {begin, end, sep};
+}
+
+template <typename Range>
+auto join(Range&& range, wstring_view sep)
+    -> join_view<detail::iterator_t<Range>, detail::sentinel_t<Range>,
+                 wchar_t> {
+  return join(std::begin(range), std::end(range), sep);
+}
+
+template <typename T>
+auto join(std::initializer_list<T> list, wstring_view sep)
+    -> join_view<const T*, const T*, wchar_t> {
+  return join(std::begin(list), std::end(list), sep);
+}
+
+template <typename Char, FMT_ENABLE_IF(!std::is_same<Char, char>::value)>
+auto vformat(basic_string_view<Char> format_str,
+             basic_format_args<buffer_context<type_identity_t<Char>>> args)
+    -> std::basic_string<Char> {
+  basic_memory_buffer<Char> buffer;
+  detail::vformat_to(buffer, format_str, args);
+  return to_string(buffer);
+}
+
+template <typename... T>
+auto format(wformat_string<T...> fmt, T&&... args) -> std::wstring {
+  return vformat(fmt::wstring_view(fmt), fmt::make_wformat_args(args...));
+}
+
+// Pass char_t as a default template parameter instead of using
+// std::basic_string<char_t<S>> to reduce the symbol size.
+template <typename S, typename... Args, typename Char = char_t<S>,
+          FMT_ENABLE_IF(!std::is_same<Char, char>::value &&
+                        !std::is_same<Char, wchar_t>::value)>
+auto format(const S& format_str, Args&&... args) -> std::basic_string<Char> {
+  return vformat(detail::to_string_view(format_str),
+                 fmt::make_format_args<buffer_context<Char>>(args...));
+}
+
+template <typename Locale, typename S, typename Char = char_t<S>,
+          FMT_ENABLE_IF(detail::is_locale<Locale>::value&&
+                            detail::is_exotic_char<Char>::value)>
+inline auto vformat(
+    const Locale& loc, const S& format_str,
+    basic_format_args<buffer_context<type_identity_t<Char>>> args)
+    -> std::basic_string<Char> {
+  return detail::vformat(loc, detail::to_string_view(format_str), args);
+}
+
+template <typename Locale, typename S, typename... Args,
+          typename Char = char_t<S>,
+          FMT_ENABLE_IF(detail::is_locale<Locale>::value&&
+                            detail::is_exotic_char<Char>::value)>
+inline auto format(const Locale& loc, const S& format_str, Args&&... args)
+    -> std::basic_string<Char> {
+  return detail::vformat(loc, detail::to_string_view(format_str),
+                         fmt::make_format_args<buffer_context<Char>>(args...));
+}
+
+template <typename OutputIt, typename S, typename Char = char_t<S>,
+          FMT_ENABLE_IF(detail::is_output_iterator<OutputIt, Char>::value&&
+                            detail::is_exotic_char<Char>::value)>
+auto vformat_to(OutputIt out, const S& format_str,
+                basic_format_args<buffer_context<type_identity_t<Char>>> args)
+    -> OutputIt {
+  auto&& buf = detail::get_buffer<Char>(out);
+  detail::vformat_to(buf, detail::to_string_view(format_str), args);
+  return detail::get_iterator(buf, out);
+}
+
+template <typename OutputIt, typename S, typename... Args,
+          typename Char = char_t<S>,
+          FMT_ENABLE_IF(detail::is_output_iterator<OutputIt, Char>::value&&
+                            detail::is_exotic_char<Char>::value)>
+inline auto format_to(OutputIt out, const S& fmt, Args&&... args) -> OutputIt {
+  return vformat_to(out, detail::to_string_view(fmt),
+                    fmt::make_format_args<buffer_context<Char>>(args...));
+}
+
+template <typename Locale, typename S, typename OutputIt, typename... Args,
+          typename Char = char_t<S>,
+          FMT_ENABLE_IF(detail::is_output_iterator<OutputIt, Char>::value&&
+                            detail::is_locale<Locale>::value&&
+                                detail::is_exotic_char<Char>::value)>
+inline auto vformat_to(
+    OutputIt out, const Locale& loc, const S& format_str,
+    basic_format_args<buffer_context<type_identity_t<Char>>> args) -> OutputIt {
+  auto&& buf = detail::get_buffer<Char>(out);
+  vformat_to(buf, detail::to_string_view(format_str), args,
+             detail::locale_ref(loc));
+  return detail::get_iterator(buf);
+}
+
+template <
+    typename OutputIt, typename Locale, typename S, typename... Args,
+    typename Char = char_t<S>,
+    bool enable = detail::is_output_iterator<OutputIt, Char>::value&&
+        detail::is_locale<Locale>::value&& detail::is_exotic_char<Char>::value>
+inline auto format_to(OutputIt out, const Locale& loc, const S& format_str,
+                      Args&&... args) ->
+    typename std::enable_if<enable, OutputIt>::type {
+  return vformat_to(out, loc, to_string_view(format_str),
+                    fmt::make_format_args<buffer_context<Char>>(args...));
+}
+
+template <typename OutputIt, typename Char, typename... Args,
+          FMT_ENABLE_IF(detail::is_output_iterator<OutputIt, Char>::value&&
+                            detail::is_exotic_char<Char>::value)>
+inline auto vformat_to_n(
+    OutputIt out, size_t n, basic_string_view<Char> format_str,
+    basic_format_args<buffer_context<type_identity_t<Char>>> args)
+    -> format_to_n_result<OutputIt> {
+  detail::iterator_buffer<OutputIt, Char, detail::fixed_buffer_traits> buf(out,
+                                                                           n);
+  detail::vformat_to(buf, format_str, args);
+  return {buf.out(), buf.count()};
+}
+
+template <typename OutputIt, typename S, typename... Args,
+          typename Char = char_t<S>,
+          FMT_ENABLE_IF(detail::is_output_iterator<OutputIt, Char>::value&&
+                            detail::is_exotic_char<Char>::value)>
+inline auto format_to_n(OutputIt out, size_t n, const S& fmt,
+                        const Args&... args) -> format_to_n_result<OutputIt> {
+  return vformat_to_n(out, n, detail::to_string_view(fmt),
+                      fmt::make_format_args<buffer_context<Char>>(args...));
+}
+
+template <typename S, typename... Args, typename Char = char_t<S>,
+          FMT_ENABLE_IF(detail::is_exotic_char<Char>::value)>
+inline auto formatted_size(const S& fmt, Args&&... args) -> size_t {
+  detail::counting_buffer<Char> buf;
+  detail::vformat_to(buf, detail::to_string_view(fmt),
+                     fmt::make_format_args<buffer_context<Char>>(args...));
+  return buf.count();
+}
+
+inline void vprint(std::FILE* f, wstring_view fmt, wformat_args args) {
+  wmemory_buffer buffer;
+  detail::vformat_to(buffer, fmt, args);
+  buffer.push_back(L'\0');
+  if (std::fputws(buffer.data(), f) == -1)
+    FMT_THROW(system_error(errno, FMT_STRING("cannot write to file")));
+}
+
+inline void vprint(wstring_view fmt, wformat_args args) {
+  vprint(stdout, fmt, args);
+}
+
+template <typename... T>
+void print(std::FILE* f, wformat_string<T...> fmt, T&&... args) {
+  return vprint(f, wstring_view(fmt), fmt::make_wformat_args(args...));
+}
+
+template <typename... T> void print(wformat_string<T...> fmt, T&&... args) {
+  return vprint(wstring_view(fmt), fmt::make_wformat_args(args...));
+}
+
+/**
+  Converts *value* to ``std::wstring`` using the default format for type *T*.
+ */
+template <typename T> inline auto to_wstring(const T& value) -> std::wstring {
+  return format(FMT_STRING(L"{}"), value);
+}
+FMT_MODULE_EXPORT_END
+FMT_END_NAMESPACE
+
+#endif  // FMT_XCHAR_H_
diff --git a/libkram/kram/KramFmt.h b/libkram/kram/KramFmt.h
new file mode 100644
index 00000000..167d5366
--- /dev/null
+++ b/libkram/kram/KramFmt.h
@@ -0,0 +1,69 @@
+// kram - Copyright 2020-2022 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
+
+#pragma once
+
+#include <cassert>
+//#include <string>
+
+//#include "KramConfig.h"
+
+#include "KramLog.h"
+
+#include "core.h" // really fmt/core.h
+#include "format.h" // really fmt/format.h - for FMT_STRING
+
+// Note this is split off from KramLog.h, just because fmt pulls in so
+// much template and inline formating code.
+
+namespace kram {
+
+int32_t logMessage(const char* group, int32_t logLevel,
+                const char* file, int32_t line, const char* func,
+                fmt::string_view format, fmt::format_args args);
+
+// This is a way to convert to single function call, so handling
+// can be buriend within that.
+template <typename S, typename... Args>
+int32_t logMessageConverter(const char* group, int32_t logLevel,
+                            const char* file, int32_t line, const char* func,
+                            const S& format, Args&&... args)
+{
+    return logMessage(group, logLevel, file, line, func,
+                      format, fmt::make_format_args(args...));
+}
+
+// save code space, since file/func aren't output for debug/info
+#define KFLOGD(group, fmt, ...) \
+    logMessageConverter(group, kram::LogLevelDebug, /* __FILE__ */ nullptr, __LINE__, /* __FUNCTION__ */ nullptr, FMT_STRING(fmt), ##__VA_ARGS__)
+#define KFLOGI(group, fmt, ...) \
+    logMessageConverter(group, kram::LogLevelInfo, /* __FILE__ */ nullptr, __LINE__, /* __FUNCTION__ */ nullptr, FMT_STRING(fmt), ##__VA_ARGS__)
+#define KFLOGW(group, fmt, ...) \
+    logMessageConverter(group, kram::LogLevelWarning, __FILE__, __LINE__, __FUNCTION__, FMT_STRING(fmt), ##__VA_ARGS__)
+#define KFLOGE(group, fmt, ...) \
+    logMessageConverter(group, kram::LogLevelError, __FILE__, __LINE__, __FUNCTION__, FMT_STRING(fmt), ##__VA_ARGS__)
+
+// fmt already has versions of these in printf.h, but want buried impls
+
+// returns length of string, -1 if failure
+int32_t sprintf_impl(string& str, fmt::string_view format, fmt::format_args args);
+
+// returns length of chars appended, -1 if failure
+int32_t append_sprintf_impl(string& str, fmt::string_view format, fmt::format_args args);
+
+// This is a way to convert to single function call, so handling
+// can be buried within that.  May need to wrap format with FMT_STRING
+template <typename S, typename... Args>
+int32_t sprintf(string& s, const S& format, Args&&... args)
+{
+    return sprintf_impl(s, format, fmt::make_format_args(args...));
+}
+
+template <typename S, typename... Args>
+int32_t append_sprintf(string& s, const S& format, Args&&... args)
+{
+    return append_sprintf_impl(s, format, fmt::make_format_args(args...));
+}
+
+}  // namespace kram
diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp
index ff597edf..b1525ad8 100644
--- a/libkram/kram/KramLog.cpp
+++ b/libkram/kram/KramLog.cpp
@@ -16,6 +16,10 @@
 #elif KRAM_ANDROID
 #include <log.h>
 #endif
+
+#include "KramFmt.h"
+#include "format.h" // really fmt/format.h
+
 namespace kram {
 
 using mymutex = std::recursive_mutex;
@@ -117,6 +121,39 @@ int32_t append_sprintf(string& str, const char* format, ...)
     return len;
 }
 
+//----------------------------------
+
+static size_t my_formatted_size(fmt::string_view format, fmt::format_args args)
+{
+    auto buf = fmt::detail::counting_buffer<>();
+    fmt::detail::vformat_to(buf, format, args, {});
+    return buf.count();
+}
+
+// returns length of chars appended, -1 if failure
+int32_t append_sprintf_impl(string& str, fmt::string_view format, fmt::format_args args)
+{
+    size_t size = my_formatted_size(format, args);
+    
+    // TODO: write directly to end of str
+    string text = vformat(format, args);
+    
+    // this does all formatting work
+    str.resize(str.size() + size);
+    str.insert(str.back(), text);
+    
+    return size; // how many chars appended, no real failure case yet
+}
+
+// returns length of string, -1 if failure
+int32_t sprintf_impl(string& str, fmt::string_view format, fmt::format_args args)
+{
+    str.clear();
+    return append_sprintf_impl(str, format, args);
+}
+
+//----------------------------------
+
 bool startsWith(const char* str, const string& substring)
 {
     return strncmp(str, substring.c_str(), substring.size()) == 0;
@@ -152,27 +189,13 @@ bool endsWithExtension(const char* str, const string& substring)
     return strcmp(search, substring.c_str()) == 0;
 }
 
-extern int32_t logMessage(const char* group, int32_t logLevel,
+//----------------------------------
+
+static int32_t logMessageImpl(const char* group, int32_t logLevel,
                           const char* file, int32_t line, const char* func,
-                          const char* fmt, ...)
+                          const char* fmt, const char* msg)
 {
-    // TOOD: add any filtering up here
-
-    // convert var ags to a msg
-    const char* msg;
-
-    string str;
-    if (strrchr(fmt, '%') == nullptr) {
-        msg = fmt;
-    }
-    else {
-        va_list args;
-        va_start(args, fmt);
-        vsprintf(str, fmt, args);
-        va_end(args);
-
-        msg = str.c_str();
-    }
+    // TOOD: add any filtering up here, or before msg is built
 
     // pipe to correct place, could even be file output
     FILE* fp = stdout;
@@ -297,4 +320,50 @@ extern int32_t logMessage(const char* group, int32_t logLevel,
     return 0;  // reserved for later
 }
 
+int32_t logMessage(const char* group, int32_t logLevel,
+                          const char* file, int32_t line, const char* func,
+                          const char* fmt, ...)
+{
+    // convert var ags to a msg
+    const char* msg;
+
+    string str;
+    if (strrchr(fmt, '%') == nullptr) {
+        msg = fmt;
+    }
+    else {
+        va_list args;
+        va_start(args, fmt);
+        vsprintf(str, fmt, args);
+        va_end(args);
+
+        msg = str.c_str();
+    }
+    
+    return logMessageImpl(group, logLevel, file, line, func,
+                          fmt, msg);
+}
+
+
+// This is the api reference for fmt.
+// Might be able to use std::format in C++20 instead, but nice
+// to have full source to impl to fix things in fmt.
+// https://fmt.dev/latest/api.html#_CPPv4IDpEN3fmt14formatted_sizeE6size_t13format_stringIDp1TEDpRR1T
+
+// TODO: can this use NAMESPACE_STL::string_view instead ?
+int32_t logMessage(const char* group, int32_t logLevel,
+                          const char* file, int32_t line, const char* func,
+                          fmt::string_view format, fmt::format_args args)
+{
+    // TODO: size_t size = std::formatted_size(format, args);
+    // and then reserve that space in str.  Use that for impl of append_format.
+    // can then append to existing string (see vsprintf)
+    
+    string str = fmt::vformat(format, args);
+    const char* msg = str.c_str();
+    
+    return logMessageImpl(group, logLevel, file, line, func,
+                          format.data(), msg);
+}
+
 }  // namespace kram
diff --git a/libkram/kram/KramLog.h b/libkram/kram/KramLog.h
index cc3ae985..75defab9 100644
--- a/libkram/kram/KramLog.h
+++ b/libkram/kram/KramLog.h
@@ -40,8 +40,9 @@ extern int32_t logMessage(const char* group, int32_t logLevel,
 #define KVERIFY(x) (x)
 #endif
 
-#define KLOGD(group, fmt, ...) logMessage(group, kram::LogLevelDebug, __FILE__, __LINE__, __FUNCTION__, fmt, ##__VA_ARGS__)
-#define KLOGI(group, fmt, ...) logMessage(group, kram::LogLevelInfo, __FILE__, __LINE__, __FUNCTION__, fmt, ##__VA_ARGS__)
+// save code space, since file/func aren't output for debug/info
+#define KLOGD(group, fmt, ...) logMessage(group, kram::LogLevelDebug, /* __FILE__ */ nullptr, __LINE__, /* __FUNCTION__ */ nullptr, fmt, ##__VA_ARGS__)
+#define KLOGI(group, fmt, ...) logMessage(group, kram::LogLevelInfo, /* __FILE__ */ nullptr, __LINE__, /* __FUNCTION__ */ nullptr, fmt, ##__VA_ARGS__)
 #define KLOGW(group, fmt, ...) logMessage(group, kram::LogLevelWarning, __FILE__, __LINE__, __FUNCTION__, fmt, ##__VA_ARGS__)
 #define KLOGE(group, fmt, ...) logMessage(group, kram::LogLevelError, __FILE__, __LINE__, __FUNCTION__, fmt, ##__VA_ARGS__)
 

From 21ff17829299fb6402dc2be3779f16411fe5a69e Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 18 Sep 2022 17:39:12 -0700
Subject: [PATCH 366/901] kram - exclude format.cpp from built sources

---
 libkram/CMakeLists.txt | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index 52b2b1ea..c4bcbe61 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -184,6 +184,9 @@ list(FILTER libSources EXCLUDE REGEX ".astcenccli_toplevel.cpp$")
 list(FILTER libSources EXCLUDE REGEX ".atomic.h$")
 list(FILTER libSources EXCLUDE REGEX ".atomic.cpp$")
 
+# this is trying to setup C++20 module, but that causes compile failures
+list(FILTER libSources EXCLUDE REGEX ".format.cpp$")
+
 # this will preserve hierarchy of sources in a build project
 source_group(TREE "${SOURCE_DIR}" PREFIX "source" FILES ${libSources})
 

From 35869d0f43179dd24b2bd2fe90e75951885cd7d1 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 18 Sep 2022 17:45:04 -0700
Subject: [PATCH 367/901] kram - exclude fmt.cpp not format.cpp

---
 libkram/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index c4bcbe61..19529037 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -185,7 +185,7 @@ list(FILTER libSources EXCLUDE REGEX ".atomic.h$")
 list(FILTER libSources EXCLUDE REGEX ".atomic.cpp$")
 
 # this is trying to setup C++20 module, but that causes compile failures
-list(FILTER libSources EXCLUDE REGEX ".format.cpp$")
+list(FILTER libSources EXCLUDE REGEX ".fmt.cpp$")
 
 # this will preserve hierarchy of sources in a build project
 source_group(TREE "${SOURCE_DIR}" PREFIX "source" FILES ${libSources})

From 22f2fce85c3d575e4043d35dcb3f267080c51df9 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 19 Sep 2022 20:07:36 -0700
Subject: [PATCH 368/901] kram - fix build of kramc, needed forced include

---
 build2/kramc.xcodeproj/project.pbxproj | 12 ++++++++++++
 libkram/kram/Kram.cpp                  |  6 ++----
 libkram/kram/KramFmt.h                 |  8 ++++----
 libkram/kram/KramImageInfo.h           | 10 ++--------
 4 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/build2/kramc.xcodeproj/project.pbxproj b/build2/kramc.xcodeproj/project.pbxproj
index 72bd473b..a65e7cf4 100644
--- a/build2/kramc.xcodeproj/project.pbxproj
+++ b/build2/kramc.xcodeproj/project.pbxproj
@@ -204,6 +204,12 @@
 				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
 				MTL_FAST_MATH = YES;
 				ONLY_ACTIVE_ARCH = YES;
+				OTHER_CFLAGS = (
+					"-DCOMPILE_FASTL=0",
+					"-DCOMPILE_EASTL=0",
+					"-include",
+					KramConfig.h,
+				);
 				PRESERVE_DEAD_CODE_INITS_AND_TERMS = NO;
 				SDKROOT = macosx;
 			};
@@ -260,6 +266,12 @@
 				MACOSX_DEPLOYMENT_TARGET = 11.0;
 				MTL_ENABLE_DEBUG_INFO = NO;
 				MTL_FAST_MATH = YES;
+				OTHER_CFLAGS = (
+					"-DCOMPILE_FASTL=0",
+					"-DCOMPILE_EASTL=0",
+					"-include",
+					KramConfig.h,
+				);
 				PRESERVE_DEAD_CODE_INITS_AND_TERMS = NO;
 				SDKROOT = macosx;
 			};
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index e0861795..d0df1d4e 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -250,10 +250,8 @@ bool KTXImageData::openPNG(const char* filename, KTXImage& image)
 
 bool KTXImageData::openPNG(const uint8_t* data, size_t dataSize, KTXImage& image)
 {
-    //close();
-
-    // the mmap/filehelper point to the png data
-    // use Image to
+    // This is returned by LoadPng.  Note that many png have this set
+    // by default and not controllable by artists.
     bool isSrgb = false;
     
     Image singleImage;
diff --git a/libkram/kram/KramFmt.h b/libkram/kram/KramFmt.h
index 167d5366..7f6c239c 100644
--- a/libkram/kram/KramFmt.h
+++ b/libkram/kram/KramFmt.h
@@ -35,13 +35,13 @@ int32_t logMessageConverter(const char* group, int32_t logLevel,
 }
 
 // save code space, since file/func aren't output for debug/info
-#define KFLOGD(group, fmt, ...) \
+#define KLOGFD(group, fmt, ...) \
     logMessageConverter(group, kram::LogLevelDebug, /* __FILE__ */ nullptr, __LINE__, /* __FUNCTION__ */ nullptr, FMT_STRING(fmt), ##__VA_ARGS__)
-#define KFLOGI(group, fmt, ...) \
+#define KLOGFI(group, fmt, ...) \
     logMessageConverter(group, kram::LogLevelInfo, /* __FILE__ */ nullptr, __LINE__, /* __FUNCTION__ */ nullptr, FMT_STRING(fmt), ##__VA_ARGS__)
-#define KFLOGW(group, fmt, ...) \
+#define KLOGFW(group, fmt, ...) \
     logMessageConverter(group, kram::LogLevelWarning, __FILE__, __LINE__, __FUNCTION__, FMT_STRING(fmt), ##__VA_ARGS__)
-#define KFLOGE(group, fmt, ...) \
+#define KLOGFE(group, fmt, ...) \
     logMessageConverter(group, kram::LogLevelError, __FILE__, __LINE__, __FUNCTION__, FMT_STRING(fmt), ##__VA_ARGS__)
 
 // fmt already has versions of these in printf.h, but want buried impls
diff --git a/libkram/kram/KramImageInfo.h b/libkram/kram/KramImageInfo.h
index 9f883de6..be98d3bd 100644
--- a/libkram/kram/KramImageInfo.h
+++ b/libkram/kram/KramImageInfo.h
@@ -33,9 +33,6 @@ enum TexEncoder {
     kTexEncoderEtcenc,  // etc-r,rg11, etc2, no HDR format
 
     kTexEncoderAstcenc,
-
-    // TODO: add these for cross-platform support
-    // bc - icbc, dxtex (BC6H)
 };
 
 // Fill this out from CLI, and hand to ImageInfo::init
@@ -56,7 +53,6 @@ class ImageInfoArgs {
     KTX2Compressor compressor;
     bool isKTX2 = false;
 
-    //bool skipImageLength = false;
     bool doMipmaps = true;  // default to mips on
     bool isVerbose = false;
     bool doSDF = false;
@@ -120,7 +116,6 @@ class ImageInfo {
 
 public:
     MyMTLTextureType textureType = MyMTLTextureType2D;
-    //MyTexFormat format = kUnknown;
     MyMTLPixelFormat pixelFormat = MyMTLPixelFormatInvalid;
     TexEncoder textureEncoder = kTexEncoderUnknown;
 
@@ -134,16 +129,15 @@ class ImageInfo {
     // output image state
     // Note: difference between input srgb and output srgb, but it's mingled
     // here a bit
-    //bool isSnorm = false; // TODO: rename to isNormalized (with isSigned = snorm, without = unorm)
+    bool isSRGB = false;
+    
     bool isSigned = false;
     bool isNormal = false;
-    bool isSRGB = false;
     bool isColorWeighted = false;
     bool isPremultiplied = false;  // don't premul
     bool isPrezero = false;
     bool isHDR = false;
 
-    //bool skipImageLength = false;  // gen ktxa
     bool doSDF = false;
     bool doMipmaps = false;
     bool optimizeFormatForOpaque = false;

From 72af27fceba1fd8d45ed705a7cabd119ada939f1 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 20 Sep 2022 19:48:31 -0700
Subject: [PATCH 369/901] kram - rename sprintf/append_sprintf using fmt

This was intercepting existing calls and printing nothing.
---
 libkram/kram/KramFmt.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libkram/kram/KramFmt.h b/libkram/kram/KramFmt.h
index 7f6c239c..8bf1f1a3 100644
--- a/libkram/kram/KramFmt.h
+++ b/libkram/kram/KramFmt.h
@@ -55,13 +55,13 @@ int32_t append_sprintf_impl(string& str, fmt::string_view format, fmt::format_ar
 // This is a way to convert to single function call, so handling
 // can be buried within that.  May need to wrap format with FMT_STRING
 template <typename S, typename... Args>
-int32_t sprintf(string& s, const S& format, Args&&... args)
+int32_t sprintf_fmt(string& s, const S& format, Args&&... args)
 {
     return sprintf_impl(s, format, fmt::make_format_args(args...));
 }
 
 template <typename S, typename... Args>
-int32_t append_sprintf(string& s, const S& format, Args&&... args)
+int32_t append_sprintf_fmt(string& s, const S& format, Args&&... args)
 {
     return append_sprintf_impl(s, format, fmt::make_format_args(args...));
 }

From 3ecabb9961fb4dcbb41f881715dce73990b6a139 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 1 Oct 2022 01:40:39 -0700
Subject: [PATCH 370/901] kramv - add multidrop

Can drop any number of files/folders. These will be flattened into a file list.
Only support for a single archive for now.  But could keep multiple archives open.
Switch to Apple's UTI library, since I'm tired at guessing UTI strings that they accept.
Use UTI strings to filter dropped NSURL inputs.
---
 build2/kramv.xcodeproj/project.pbxproj |   4 +
 kramv/Info.plist                       |   2 +-
 kramv/KramViewerBase.h                 |   6 -
 kramv/KramViewerMain.mm                | 862 ++++++++++++-------------
 4 files changed, 404 insertions(+), 470 deletions(-)

diff --git a/build2/kramv.xcodeproj/project.pbxproj b/build2/kramv.xcodeproj/project.pbxproj
index 9bf1ff07..24a58edd 100644
--- a/build2/kramv.xcodeproj/project.pbxproj
+++ b/build2/kramv.xcodeproj/project.pbxproj
@@ -32,6 +32,7 @@
 		708D44D0272B03BF00783DCE /* pbr.txt in Resources */ = {isa = PBXBuildFile; fileRef = 708D44CF272B03BF00783DCE /* pbr.txt */; };
 		708D44D4272FA4C800783DCE /* tropical_beach.ktx in Resources */ = {isa = PBXBuildFile; fileRef = 708D44D2272FA4C800783DCE /* tropical_beach.ktx */; };
 		708D44D5272FA4C800783DCE /* piazza_san_marco.ktx in Resources */ = {isa = PBXBuildFile; fileRef = 708D44D3272FA4C800783DCE /* piazza_san_marco.ktx */; };
+		7099CFBD28E8319C008D4ABF /* UniformTypeIdentifiers.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 7099CFBC28E8319B008D4ABF /* UniformTypeIdentifiers.framework */; };
 		70E33EC826E536BF00CBA422 /* QuickLookThumbnailing.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 70E33EC726E536BF00CBA422 /* QuickLookThumbnailing.framework */; };
 		70E33ECA26E536BF00CBA422 /* Quartz.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 70E33EC926E536BF00CBA422 /* Quartz.framework */; };
 		70E33ECE26E536BF00CBA422 /* KramThumbnailProvider.mm in Sources */ = {isa = PBXBuildFile; fileRef = 70E33ECD26E536BF00CBA422 /* KramThumbnailProvider.mm */; };
@@ -128,6 +129,7 @@
 		708D44CF272B03BF00783DCE /* pbr.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = pbr.txt; sourceTree = "<group>"; };
 		708D44D2272FA4C800783DCE /* tropical_beach.ktx */ = {isa = PBXFileReference; lastKnownFileType = file; path = tropical_beach.ktx; sourceTree = "<group>"; };
 		708D44D3272FA4C800783DCE /* piazza_san_marco.ktx */ = {isa = PBXFileReference; lastKnownFileType = file; path = piazza_san_marco.ktx; sourceTree = "<group>"; };
+		7099CFBC28E8319B008D4ABF /* UniformTypeIdentifiers.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = UniformTypeIdentifiers.framework; path = System/Library/Frameworks/UniformTypeIdentifiers.framework; sourceTree = SDKROOT; };
 		70E33EC626E536BF00CBA422 /* kram-thumb.appex */ = {isa = PBXFileReference; explicitFileType = "wrapper.app-extension"; includeInIndex = 0; path = "kram-thumb.appex"; sourceTree = BUILT_PRODUCTS_DIR; };
 		70E33EC726E536BF00CBA422 /* QuickLookThumbnailing.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = QuickLookThumbnailing.framework; path = System/Library/Frameworks/QuickLookThumbnailing.framework; sourceTree = SDKROOT; };
 		70E33EC926E536BF00CBA422 /* Quartz.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Quartz.framework; path = System/Library/Frameworks/Quartz.framework; sourceTree = SDKROOT; };
@@ -154,6 +156,7 @@
 				706EF25226D17C6F001C950E /* MetalKit.framework in Frameworks */,
 				70871D4327CAD3EA00D0B9E1 /* libkram.a in Frameworks */,
 				706EF25526D17C85001C950E /* Metal.framework in Frameworks */,
+				7099CFBD28E8319C008D4ABF /* UniformTypeIdentifiers.framework in Frameworks */,
 				706EF25726D17C9D001C950E /* AppKit.framework in Frameworks */,
 				70833669271575EA0077BCB6 /* GLTFMTL.framework in Frameworks */,
 				706EF26726D17DFA001C950E /* libate.tbd in Frameworks */,
@@ -236,6 +239,7 @@
 		706EF24726D17BC2001C950E /* Frameworks */ = {
 			isa = PBXGroup;
 			children = (
+				7099CFBC28E8319B008D4ABF /* UniformTypeIdentifiers.framework */,
 				70833668271575EA0077BCB6 /* GLTFMTL.framework */,
 				70833664271575E50077BCB6 /* GLTF.framework */,
 				70833661271564320077BCB6 /* GLTFMTL.framework */,
diff --git a/kramv/Info.plist b/kramv/Info.plist
index ce247db3..3643e95a 100644
--- a/kramv/Info.plist
+++ b/kramv/Info.plist
@@ -129,7 +129,7 @@
 			<string>Default</string>
 			<key>LSItemContentTypes</key>
 			<array>
-				<string>image/dds</string>
+				<string>com.microsoft.dds</string>
 			</array>
 			<key>NSDocumentClass</key>
 			<string>KramDocument</string>
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index a06e85e2..628f517a 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -158,12 +158,6 @@ class ShowSettings {
     // draw with reverseZ to better match perspective
     bool isReverseZ = true;
 
-    // whether files are pulled from zip archive.
-    bool isArchive = false;
-
-    // whether files are pulled from folder(s)
-    bool isFolder = false;
-
     // image vs. gltf model
     bool isModel = false;
     
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 925a7d02..11b0d5ef 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -35,10 +35,19 @@
 static bool doPrintPanZoom = false;
 #endif
 
+#include <UniformTypeIdentifiers/UTType.h>
+
 using namespace simd;
 using namespace kram;
 using namespace NAMESPACE_STL;
 
+struct File {
+    string name;
+    int32_t urlIndex;
+    
+    bool operator <(const File& rhs) const { return strcasecmp(name.c_str(), rhs.name.c_str()) < 0; }
+};
+
 bool isSupportedModelFilename(const char* filename) {
 #if USE_GLTF
     return endsWithExtension(filename, ".gltf") ||
@@ -207,7 +216,7 @@ - (NSView*)hitTest:(NSPoint)aPoint
 
 @interface MyMTKView : MTKView
 // for now only have a single imageURL
-@property(retain, nonatomic, readwrite, nullable) NSURL *imageURL;
+//@property(retain, nonatomic, readwrite, nullable) NSURL *imageURL;
 
 //@property (nonatomic, readwrite, nullable) NSPanGestureRecognizer* panGesture;
 @property(retain, nonatomic, readwrite, nullable)
@@ -224,7 +233,7 @@ @interface MyMTKView : MTKView
 @property(nonatomic, readwrite) MouseData mouseData;
 
 
-- (BOOL)loadTextureFromURL:(NSURL *)url;
+- (BOOL)loadTextureFromURLs:(NSArray<NSURL*>*)url;
 
 - (void)setHudText:(const char *)text;
 
@@ -350,9 +359,13 @@ - (BOOL)readFromURL:(nonnull NSURL *)url
     // TODO: This is only getting called on first open on macOS 12.0 even with hack below.
     // find out why.
     
+    // throw into an array
+    NSArray<NSURL*>* urls = @[url];
+    
     NSApplication* app = [NSApplication sharedApplication];
     MyMTKView* view = app.mainWindow.contentView;
-    BOOL success = [view loadTextureFromURL:url];
+    
+    BOOL success = [view loadTextureFromURLs:urls];
     if (success) {
         // Note: if I return NO from this call then a dialog pops up that image
         // couldn't be loaded, but then the readFromURL is called everytime a new
@@ -402,17 +415,8 @@ - (void)application:(NSApplication *)sender
            openURLs:(nonnull NSArray<NSURL *> *)urls
 {
     // this is called from "Open In..."
-    MyMTKView *view = sender.mainWindow.contentView;
-
-    // TODO: if more than one url dropped, and they are albedo/nmap, then display
-    // them together with the single uv set.  Need controls to show one or all
-    // together.
-
-    // TODO: also do an overlapping diff if two files are dropped with same
-    // dimensions.
-
-    NSURL* url = urls.firstObject;
-    [view loadTextureFromURL:url];
+    MyMTKView* view = sender.mainWindow.contentView;
+    [view loadTextureFromURLs:urls];
     [view fixupDocumentList];
 }
 
@@ -475,7 +479,52 @@ - (IBAction)showAboutDialog:(id)sender
 
 // also NSPasteboardTypeURL
 // also NSPasteboardTypeTIFF
-NSArray<NSString *> *pasteboardTypes = @[ NSPasteboardTypeFileURL ];
+NSArray<NSString *>* pasteboardTypes = @[
+    NSPasteboardTypeFileURL
+];
+
+/* correlates with
+ 
+public.png,
+org.khronos.ktx,
+public.ktx2,
+com.microsoft.dds,
+public.zip-archive,
+dyn.ah62d4rv4ge8043pyqf0g24pc, // ick - metallib
+dyn.ah62d4rv4ge80s5dyq2,       // ick - gltf
+dyn.ah62d4rv4ge80s5dc          // ick - glb
+ 
+*/
+
+// ktx, ktx2, png, and dds for images
+// zip, metallib
+// gltf, glb files for models
+NSArray<NSString*>* utis = @[
+//  @"public.png",
+//  @"org.khronos.ktx",
+//  @"public.ktx2",
+  //@"public.dds",
+  [UTType typeWithFilenameExtension: @"png"].identifier,
+  [UTType typeWithFilenameExtension: @"ktx"].identifier,
+  [UTType typeWithFilenameExtension: @"ktx2"].identifier,
+  [UTType typeWithFilenameExtension: @"dds"].identifier,
+  
+  [UTType typeWithFilenameExtension: @"zip"].identifier, // UTTypeZIP
+  [UTType typeWithFilenameExtension: @"metallib"].identifier,
+  
+  [UTType typeWithFilenameExtension: @"gltf"].identifier,
+  [UTType typeWithFilenameExtension: @"glb"].identifier
+  //@"public/zip-archive", // zip file
+  //@"application/octet-stream", // metallib
+#if USE_GLTF
+  //@"model/gltf+stream",
+  //@"model/gltf+binary"
+#endif
+];
+NSDictionary* pasteboardOptions = @{
+    NSPasteboardURLReadingContentsConformToTypesKey: utis,
+    NSPasteboardURLReadingFileURLsOnlyKey: @YES
+};
 
 @implementation MyMTKView {
     NSMenu* _viewMenu;  // really the items
@@ -496,11 +545,14 @@ @implementation MyMTKView {
     // content
     ZipHelper _zip;
     MmapHelper _zipMmap;
-    int32_t _fileArchiveIndex;
     BOOL _noImageLoaded;
+    int32_t _archiveStartIndex;
 
-    vector<string> _folderFiles;
-    int32_t _fileFolderIndex;
+    // folders and archives and multi-drop files are filled into this
+    vector<File> _files;
+    int32_t _fileIndex;
+   
+    NSArray<NSURL*>* _urls;
     
     Action* _actionPlay;
     Action* _actionShapeUVPreview;
@@ -1846,9 +1898,8 @@ - (void)updateUIAfterLoad
         _showSettings->faceCount <= 1 && _showSettings->sliceCount <= 1;
     bool isMipHidden = _showSettings->mipCount <= 1;
 
-    bool isJumpToNextHidden =
-        !(_showSettings->isArchive || _showSettings->isFolder);
-
+    bool isJumpToNextHidden = _files.size() <= 1;
+    
     bool isRedHidden = _showSettings->numChannels == 0; // models don't show rgba
     bool isGreenHidden = _showSettings->numChannels <= 1;
     bool isBlueHidden = _showSettings->numChannels <= 2 &&
@@ -2276,7 +2327,7 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
     }
     // reload key (also a quick way to reset the settings)
     else if (action == _actionReload) {
-        [self loadTextureFromURL:self.imageURL];
+        [self loadFile];
 
         // reload at actual size
         if (isShiftKeyDown) {
@@ -2444,30 +2495,18 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
     else if (action == _actionItem || action == _actionPrevItem) {
         if (!action->isHidden) {
             // invert shift key for prev, since it's reversese
-            if (action == _actionPrevItem)
+            if (action == _actionPrevItem) {
                 isShiftKeyDown = !isShiftKeyDown;
-            
-            if (_showSettings->isArchive) {
-                if ([self advanceFileFromAchive:!isShiftKeyDown]) {
-                    //_hudHidden = true;
-                    //[self updateHudVisibility];
-                    [self setEyedropperText:""];
-                    
-                    isChanged = true;
-                    text = "Loaded ";
-                    text += _showSettings->lastFilename;
-                }
             }
-            else if (_showSettings->isFolder) {
-                if ([self advanceFileFromFolder:!isShiftKeyDown]) {
-                    //_hudHidden = true;
-                    //[self updateHudVisibility];
-                    [self setEyedropperText:""];
-                   
-                    isChanged = true;
-                    text = "Loaded ";
-                    text += _showSettings->lastFilename;
-                }
+            
+            if ([self advanceFile:!isShiftKeyDown]) {
+                //_hudHidden = true;
+                //[self updateHudVisibility];
+                [self setEyedropperText:""];
+                
+                isChanged = true;
+                text = "Loaded ";
+                text += _showSettings->lastFilename;
             }
         }
     }
@@ -2475,8 +2514,11 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
     else if (action == _actionCounterpart || action == _actionPrevCounterpart) {
         if (!action->isHidden) {
             // invert shift key for prev, since it's reversese
-            if (action == _actionPrevCounterpart)
+            if (action == _actionPrevCounterpart) {
                 isShiftKeyDown = !isShiftKeyDown;
+            }
+            
+            // TODO: build up counterparts when listing out files
             
             /* Archive probably only holds one type of file, could pull in zips?
             if (_showSettings->isArchive) {
@@ -2603,10 +2645,10 @@ - (NSDragOperation)draggingEntered:(id)sender
     if ((NSDragOperationGeneric & [sender draggingSourceOperationMask]) ==
         NSDragOperationGeneric) {
         NSPasteboard *pasteboard = [sender draggingPasteboard];
-
+        
         bool canReadPasteboardObjects =
             [pasteboard canReadObjectForClasses:@[ [NSURL class] ]
-                                        options:nil];
+                                        options:pasteboardOptions];
 
         // don't copy dropped item, want to alias large files on disk without that
         if (canReadPasteboardObjects) {
@@ -2626,35 +2668,17 @@ - (BOOL)prepareForDragOperation:(id)sender
 - (BOOL)performDragOperation:(id)sender
 {
     NSPasteboard* pasteboard = [sender draggingPasteboard];
-
-    NSString* desiredType = [pasteboard availableTypeFromArray:pasteboardTypes];
-
-    if ([desiredType isEqualToString:NSPasteboardTypeFileURL]) {
-        // TODO: use readObjects to drag multiple files onto one view
-        // load one mip of all those, use smaller mips for thumbnail
-
-        // the pasteboard contains a list of filenames
-        NSString* urlString =
-            [pasteboard propertyListForType:NSPasteboardTypeFileURL];
-
-        // this turns it into a real path (supposedly works even with sandbox)
-        NSURL* url = [NSURL URLWithString:urlString];
-
-        // convert the original path and then back to a url, otherwise reload fails
-        // when this file is replaced.
-        const char* filename = url.fileSystemRepresentation;
-        if (filename == nullptr) {
-            KLOGE("kramv", "Fix this drop url returning nil issue");
-            return NO;
-        }
-
-        NSString* filenameString = [NSString stringWithUTF8String:filename];
-
-        url = [NSURL fileURLWithPath:filenameString];
-
-        if ([self loadTextureFromURL:url]) {
+    NSArray<NSURL*>* urls = [pasteboard readObjectsForClasses:@[[NSURL class]]
+                                                      options: pasteboardOptions];
+    int filesCount = [urls count];
+    
+    if (filesCount > 0) {
+        if ([self loadTextureFromURLs:urls]) {
             [self setHudText:""];
 
+            //        if (_files.size() == 2) {
+            //            // TODO: default to diff
+            //        }
             return YES;
         }
     }
@@ -2662,110 +2686,66 @@ - (BOOL)performDragOperation:(id)sender
     return NO;
 }
 
-- (BOOL)loadArchive:(const char *)zipFilename
+// requires zip to be open, lists into _files
+- (BOOL)listFilesInArchive:(int32_t)urlIndex
 {
-    _zipMmap.close();
-    if (!_zipMmap.open(zipFilename)) {
-        return NO;
-    }
-
-    // Note: if mmap fails, could read entire zip into memory
-    // and then still use the same code below.
-
-    if (!_zip.openForRead(_zipMmap.data(), _zipMmap.dataLength())) {
-        return NO;
-    }
-
     // filter out unsupported extensions
     vector<string> extensions = {
         ".ktx", ".ktx2", ".png", ".dds" // textures
-#if USE_GLTF
-        , ".glb", ".gltf" // models
-#endif
+//#if USE_GLTF
+        // TODO: can't support these until have a loader from memory block
+        // GLTFAsset requires a URL.
+        //, ".glb", ".gltf" // models
+//#endif
     };
     
     _zip.filterExtensions(extensions);
-
+    
     // don't switch to empty archive
     if (_zip.zipEntrys().empty()) {
         return NO;
     }
-
-    // load the first entry in the archive
-    _fileArchiveIndex = 0;
     
-    // copy names into the files view
-    [_tableViewController.items removeAllObjects];
     for (const auto& entry: _zip.zipEntrys()) {
-        const char* filenameShort = toFilenameShort(entry.filename);
-        [_tableViewController.items addObject: [NSString stringWithUTF8String: filenameShort]];
+        _files.push_back({string(entry.filename), urlIndex});
     }
-    [_tableView reloadData];
-    
-    // set selection
-    [_tableView selectRowIndexes:[NSIndexSet indexSetWithIndex:_fileArchiveIndex] byExtendingSelection:NO];
-    [_tableView scrollRowToVisible:_fileArchiveIndex];
-    
-    // want it to respond to arrow keys
-    //[self.window makeFirstResponder: _tableView];
-    
-    // hack to see table
-    [self hideFileTable];
     
     return YES;
 }
 
-- (BOOL)advanceFileFromAchive:(BOOL)increment
+// opens single archive, and lists into _files
+- (BOOL)listFilesInArchive:(const char *)zipFilename urlIndex:(int32_t)urlIndex
 {
-    if ((!_zipMmap.data()) || _zip.zipEntrys().empty()) {
-        // no archive loaded or it's empty
+    _zipMmap.close();
+    
+    // open the mmap again
+    if (!_zipMmap.open(zipFilename)) {
+        return NO;
+    }
+    if (!_zip.openForRead(_zipMmap.data(), _zipMmap.dataLength())) {
         return NO;
     }
-    size_t numEntries = _zip.zipEntrys().size();
-
-    if (increment)
-        _fileArchiveIndex++;
-    else
-        _fileArchiveIndex += numEntries - 1;  // back 1
-
-    _fileArchiveIndex = _fileArchiveIndex % numEntries;
-
-    // set selection
-    [_tableView selectRowIndexes:[NSIndexSet indexSetWithIndex:_fileArchiveIndex] byExtendingSelection:NO];
-    [_tableView scrollRowToVisible:_fileArchiveIndex];
-    
-    // want it to respond to arrow keys
-    //[self.window makeFirstResponder: _tableView];
-    
-    // show the files table
-    [self showFileTable];
-    
-    // also have to hide hud or it will obscure the visible table
-    //_hudHidden = true;
-    //[self updateHudVisibility];
-    [self setEyedropperText:""];
     
-    return [self loadFileFromArchive];
+    return [self listFilesInArchive:urlIndex];
 }
 
-- (BOOL)advanceFileFromFolder:(BOOL)increment
+- (BOOL)advanceFile:(BOOL)increment
 {
-    if (_folderFiles.empty()) {
-        // no archive loaded
+    if (_files.empty()) {
         return NO;
     }
-
-    size_t numEntries = _folderFiles.size();
+    
+    size_t numEntries = _files.size();
     if (increment)
-        _fileFolderIndex++;
+        _fileIndex++;
     else
-        _fileFolderIndex += numEntries - 1;  // back 1
+        _fileIndex += numEntries - 1;  // back 1
 
-    _fileFolderIndex = _fileFolderIndex % numEntries;
+    _fileIndex = _fileIndex % numEntries;
 
     // set selection
-    [_tableView selectRowIndexes:[NSIndexSet indexSetWithIndex:_fileFolderIndex] byExtendingSelection:NO];
-    [_tableView scrollRowToVisible:_fileFolderIndex];
+    [_tableView selectRowIndexes:[NSIndexSet indexSetWithIndex:_fileIndex] byExtendingSelection:NO];
+    [_tableView scrollRowToVisible:_fileIndex];
     
     // want it to respond to arrow keys
     //[self.window makeFirstResponder: _tableView];
@@ -2773,27 +2753,22 @@ - (BOOL)advanceFileFromFolder:(BOOL)increment
     // show the files table
     [self showFileTable];
     
+    // also have to hide hud or it will obscure the visible table
     //_hudHidden = true;
     //[self updateHudVisibility];
     [self setEyedropperText:""];
     
-    return [self loadFileFromFolder];
+    return [self loadFile];
 }
 
 - (BOOL)setImageFromSelection:(NSInteger)index {
-    if (_zipMmap.data() && !_zip.zipEntrys().empty()) {
-        if (_fileArchiveIndex != index) {
-            _fileArchiveIndex = index;
-            return [self loadFileFromArchive];
-        }
-    }
-
-    if (!_folderFiles.empty()) {
-        if (_fileFolderIndex != index) {
-            _fileFolderIndex = index;
-            return [self loadFileFromFolder];
+    if (!_files.empty()) {
+        if (_fileIndex != index) {
+            _fileIndex = index;
+            return [self loadFile];
         }
     }
+    
     return NO;
 }
 
@@ -2806,14 +2781,13 @@ - (BOOL)setShapeFromSelection:(NSInteger)index {
     return NO;
 }
 
-- (BOOL)findFilenameInFolders:(const string &)filename
+- (BOOL)findFilename:(const string&)filename
 {
-    // TODO: binary search for the filename in the array, but would have to be in
-    // same directory
-
     bool isFound = false;
-    for (const auto &search : _folderFiles) {
-        if (search == filename) {
+    
+    // linear search
+    for (const auto& search : _files) {
+        if (search.name == filename) {
             isFound = true;
             break;
         }
@@ -2821,8 +2795,6 @@ - (BOOL)findFilenameInFolders:(const string &)filename
     return isFound;
 }
 
-
-
 static void findPossibleNormalMapFromAlbedoFilename(const char* filename, vector<string>& normalFilenames)
 {
     normalFilenames.clear();
@@ -2863,20 +2835,28 @@ static void findPossibleNormalMapFromAlbedoFilename(const char* filename, vector
         
         normalFilenames.push_back( normalFilename );
     }
-    
 }
 
+- (BOOL)isArchive
+{
+    NSURL* url = _urls[_files[_fileIndex].urlIndex];
+    const char* filename = url.fileSystemRepresentation;
+    return isSupportedArchiveFilename(filename);
+}
 
-- (BOOL)loadFileFromFolder
+- (BOOL)loadFile
 {
+    if ([self isArchive]) return [self loadFileFromArchive];
+    
     // now lookup the filename and data at that entry
-    const char* filename = _folderFiles[_fileFolderIndex].c_str();
+    const char* filename = _files[_fileIndex].name.c_str();
+   
     string fullFilename = filename;
     auto timestamp = FileHelper::modificationTimestamp(filename);
     
     bool isModel = isSupportedModelFilename(filename);
     if (isModel)
-        return [self loadModelFile:nil filename:filename];
+        return [self loadModelFile:filename];
     
     // have already filtered filenames out, so this should never get hit
     if (!isSupportedFilename(filename)) {
@@ -2892,7 +2872,7 @@ - (BOOL)loadFileFromFolder
         findPossibleNormalMapFromAlbedoFilename(filename, normalFilenames);
      
        for (const auto& name: normalFilenames) {
-            hasNormal = [self findFilenameInFolders:name];
+            hasNormal = [self findFilename:name];
             
             if (hasNormal) {
                 normalFilename = name;
@@ -2985,9 +2965,6 @@ - (BOOL)loadFileFromFolder
         _noImageLoaded = NO;
     }
 
-    _showSettings->isArchive = false;
-    _showSettings->isFolder = true;
-
     // show/hide button
     [self updateUIAfterLoad];
     
@@ -2995,19 +2972,20 @@ - (BOOL)loadFileFromFolder
     return YES;
 }
 
-
-
 - (BOOL)loadFileFromArchive
 {
     // now lookup the filename and data at that entry
-    const auto& entry = _zip.zipEntrys()[_fileArchiveIndex];
+    uint32_t archiveIndex = _fileIndex - _archiveStartIndex;
+    const auto& entry = _zip.zipEntrys()[_fileIndex - archiveIndex];
+
     const char* filename = entry.filename;
     string fullFilename = filename;
     double timestamp = (double)entry.modificationDate;
 
-    bool isModel = isSupportedModelFilename(filename);
-    if (isModel)
-        return [self loadModelFile:nil filename:filename];
+// TODO: don't have a version which loads gltf model from memory block
+//    bool isModel = isSupportedModelFilename(filename);
+//    if (isModel)
+//        return [self loadModelFile:filename];
     
     //--------
     
@@ -3109,9 +3087,6 @@ - (BOOL)loadFileFromArchive
         _noImageLoaded = NO;
     }
 
-    _showSettings->isArchive = true;
-    _showSettings->isFolder = false;
-
     // show/hide button
     [self updateUIAfterLoad];
 
@@ -3120,178 +3095,169 @@ - (BOOL)loadFileFromArchive
     return YES;
 }
 
-- (BOOL)loadTextureFromURL:(NSURL *)url
+-(void)listFilesInFolder:(NSURL*)url urlIndex:(int32_t)urlIndex
 {
-    // NSLog(@"LoadTexture");
-
-    // turn back on the hud if was in a list view
-    _hudHidden = false;
-    [self updateHudVisibility];
-    
-    const char* filename = url.fileSystemRepresentation;
-    if (filename == nullptr) {
-        // Fixed by converting dropped urls into paths then back to a url.
-        // When file replaced the drop url is no longer valid.
-        KLOGE("kramv", "Fix this load url returning nil issue");
+    NSDirectoryEnumerator* directoryEnumerator =
+    [[NSFileManager defaultManager]
+     enumeratorAtURL:url
+     includingPropertiesForKeys:[NSArray array]
+     options:0
+     errorHandler:  // nil
+     ^BOOL(NSURL *urlArg, NSError *error) {
+        macroUnusedVar(urlArg);
+        macroUnusedVar(error);
+        
+        // handle error
         return NO;
-    }
-    
-    Renderer* renderer = (Renderer *)self.delegate;
+    }];
     
-    // folders can have a . in them f.e. 2.0/blah/...
-    bool isDirectory = url.hasDirectoryPath;
-    
-    // this likely means it's a local file directory
-    if (isDirectory) {
-        // make list of all file in the directory
-
-        if (!self.imageURL || (!([self.imageURL isEqualTo:url]))) {
-            NSDirectoryEnumerator* directoryEnumerator =
-                [[NSFileManager defaultManager]
-                               enumeratorAtURL:url
-                    includingPropertiesForKeys:[NSArray array]
-                                       options:0
-                                  errorHandler:  // nil
-                                      ^BOOL(NSURL *urlArg, NSError *error) {
-                                          macroUnusedVar(urlArg);
-                                          macroUnusedVar(error);
-
-                                          // handle error
-                                          return NO;
-                                      }];
-
-            vector<string> files;
 #if USE_GLTF
-            // only display models in folder if found, ignore the png/jpg files
-            while (NSURL* fileOrDirectoryURL = [directoryEnumerator nextObject]) {
-                const char* name = fileOrDirectoryURL.fileSystemRepresentation;
-
-                bool isModel = isSupportedModelFilename(name);
-                if (isModel)
-                {
-                    files.push_back(name);
-                }
-            }
+    bool foundModel = false;
+    // only display models in folder if found, ignore the png/jpg files
+    while (NSURL* fileOrDirectoryURL = [directoryEnumerator nextObject]) {
+        const char* name = fileOrDirectoryURL.fileSystemRepresentation;
+        
+        bool isModel = isSupportedModelFilename(name);
+        if (isModel)
+        {
+            _files.push_back({string(name),urlIndex});
+            foundModel = true;
+        }
+    }
 #endif
-
-            // don't change to this folder if it's devoid of content
-            if (files.empty()) {
+    
+    // don't change to this folder if it's devoid of content
+    if (!foundModel) {
 #if USE_GLTF
-                // reset the enumerator
-                directoryEnumerator =
-                    [[NSFileManager defaultManager]
-                                   enumeratorAtURL:url
-                        includingPropertiesForKeys:[NSArray array]
-                                           options:0
-                                      errorHandler:  // nil
-                                          ^BOOL(NSURL* urlArg, NSError* error) {
-                                              macroUnusedVar(urlArg);
-                                              macroUnusedVar(error);
-
-                                              // handle error
-                                              return NO;
-                                          }];
+        // reset the enumerator
+        directoryEnumerator =
+        [[NSFileManager defaultManager]
+         enumeratorAtURL:url
+         includingPropertiesForKeys:[NSArray array]
+         options:0
+         errorHandler:  // nil
+         ^BOOL(NSURL* urlArg, NSError* error) {
+            macroUnusedVar(urlArg);
+            macroUnusedVar(error);
+            
+            // handle error
+            return NO;
+        }];
 #endif
-                while (NSURL* fileOrDirectoryURL = [directoryEnumerator nextObject]) {
-                    const char* name = fileOrDirectoryURL.fileSystemRepresentation;
-
-                    if (isSupportedFilename(name))
-                    {
-                        files.push_back(name);
-                    }
-                }
-            }
+        while (NSURL* fileOrDirectoryURL = [directoryEnumerator nextObject]) {
+            const char* name = fileOrDirectoryURL.fileSystemRepresentation;
             
-            if (files.empty()) {
-                return NO;
+            if (isSupportedFilename(name))
+            {
+                _files.push_back({name, urlIndex});
             }
+        }
+    }
+}
 
-            // add it to recent docs
-            NSDocumentController* dc =
-                [NSDocumentController sharedDocumentController];
-            [dc noteNewRecentDocumentURL:url];
+-(void)loadFilesFromUrls:(NSArray<NSURL*>*)urls
+{
+    // Using a member for archives, so limited to one archive in a drop
+    // but that's probably okay for now.  Add a separate array of open
+    // archives if want > 1.
 
-            // sort them
+    _archiveStartIndex = 0;
+    
+    // copy the existing files list
+    string existingFilename;
+    if (_fileIndex < (int32_t)_files.size())
+        existingFilename = _files[_fileIndex].name;
+    
+    // Fill this out again
+    _files.clear();
+    
+    // this will flatten the list
+    int32_t urlIndex = 0;
+    uint32_t archiveCount = 0;
+    
+    for (NSURL* url in urls) {
+        // These will flatten out to a list of files
+        const char* filename = url.fileSystemRepresentation;
+        
+        if (archiveCount == 0 && isSupportedArchiveFilename(filename)) {
+            uint32_t fileIndexCopy = _fileIndex;
+            if ([self listFilesInArchive:filename urlIndex:urlIndex]) {
+                _archiveStartIndex = fileIndexCopy;
+                archiveCount++;
+            }
+        }
+        else if (url.hasDirectoryPath) {
+            // this first loads only models, then textures if only those
+            [self listFilesInFolder:url urlIndex:urlIndex];
+        }
+        else if(isSupportedFilename(filename) ||
+                isSupportedModelFilename(filename)) {
+            _files.push_back({filename, urlIndex});
+        }
+        
+        urlIndex++;
+    }
+    
+    // TODO: sort by urlIndex
 #if USE_EASTL
-            NAMESPACE_STL::quick_sort(files.begin(), files.end());
+    NAMESPACE_STL::quick_sort(_files.begin(), _files.end());
 #else
-            std::sort(files.begin(), files.end());
+    std::sort(_files.begin(), _files.end());
 #endif
-            // replicate archive logic below
-
-            self.imageURL = url;
-
-            // preserve old folder
-            string existingFilename;
-            if (_fileFolderIndex < (int32_t)_folderFiles.size())
-                existingFilename = _folderFiles[_fileFolderIndex];
-            else
-                _fileFolderIndex = 0;
-
-            _folderFiles = files;
-
-            // TODO: preserve filename before load, and restore that index, by finding
-            // that name in refreshed folder list
-
-            if (!existingFilename.empty()) {
-                uint32_t index = 0;
-                for (const auto &fileIt : _folderFiles) {
-                    if (fileIt == existingFilename) {
-                        break;
-                    }
-                }
+    
+    // preserve old file selection
 
-                _fileFolderIndex = index;
-            }
-            
-            [_tableViewController.items removeAllObjects];
-            for (const auto& file: files) {
-                const char* filenameShort = toFilenameShort(file.c_str());
-                [_tableViewController.items addObject: [NSString stringWithUTF8String: filenameShort]];
+    _urls = urls;
+   
+    // preserve filename before load, and restore that index, by finding
+    // that name in refreshed folder list
+    _fileIndex = 0;
+    if (!existingFilename.empty()) {
+        for (uint32_t i = 0; i < _files.size(); ++i) {
+            if (_files[i].name == existingFilename) {
+                _fileIndex = i;
+                break;
             }
-            [_tableView reloadData];
-            
-            
-            [_tableView selectRowIndexes:[NSIndexSet indexSetWithIndex:_fileFolderIndex] byExtendingSelection:NO];
-            [_tableView scrollRowToVisible:_fileFolderIndex];
-            
-            [self hideFileTable];
         }
-
-        // now load image from directory
-        _showSettings->isArchive = false;
-        _showSettings->isFolder = true;
-
-        
-        // now load the file at the index
-        setErrorLogCapture(true);
-
-        BOOL success = [self loadFileFromFolder];
-
-        if (!success) {
-            // get back error text from the failed load
-            string errorText;
-            getErrorLogCaptureText(errorText);
-            setErrorLogCapture(false);
-
-            const string &folder = _folderFiles[_fileFolderIndex];
-
-            // prepend filename
-            string finalErrorText;
-            append_sprintf(finalErrorText, "Could not load from folder:\n %s\n",
-                           folder.c_str());
-            finalErrorText += errorText;
-
-            [self setHudText:finalErrorText.c_str()];
-        }
-
-        setErrorLogCapture(false);
-        return success;
     }
+    
+    // add the files into the file list
+    [_tableViewController.items removeAllObjects];
+    for (const auto& file: _files) {
+        const char* filenameShort = toFilenameShort(file.name.c_str());
+        [_tableViewController.items addObject: [NSString stringWithUTF8String: filenameShort]];
+    }
+    [_tableView reloadData];
+    
+    // Set the active file
+    [_tableView selectRowIndexes:[NSIndexSet indexSetWithIndex:_fileIndex] byExtendingSelection:NO];
+    [_tableView scrollRowToVisible:_fileIndex];
+    
+    [self hideFileTable];
+    
+    // add it to recent docs (only 10 slots))
+    if (urls.count == 1) {
+        NSDocumentController* dc =
+        [NSDocumentController sharedDocumentController];
+        [dc noteNewRecentDocumentURL:urls[0]];
+    }
+}
+
 
-    //-------------------
+- (BOOL)loadTextureFromURLs:(NSArray<NSURL*>*)urls
+{
+    // turn back on the hud if was in a list view
+    _hudHidden = false;
+    [self updateHudVisibility];
     
-    if (endsWithExtension(filename, ".metallib")) {
+    NSURL* url = urls[0];
+    const char* filename = url.fileSystemRepresentation;
+    bool isSingleFile = urls.count == 1;
+    
+    Renderer* renderer = (Renderer *)self.delegate;
+   
+    // Handle shader hotload
+    if (isSingleFile && endsWithExtension(filename, ".metallib")) {
         if ([renderer hotloadShaders:filename]) {
             NSURL* metallibFileURL =
                 [NSURL fileURLWithPath:[NSString stringWithUTF8String:filename]];
@@ -3305,113 +3271,86 @@ - (BOOL)loadTextureFromURL:(NSURL *)url
         }
         return NO;
     }
-
-    // file is not a supported extension
-    if (!(isSupportedArchiveFilename(filename) ||
-          isSupportedFilename(filename) ||
-          isSupportedModelFilename(filename)
-        ))
-    {
-        string errorText =
-            "Unsupported file extension, must be .zip"
-#if USE_GLTF
-            ", .gltf, .glb"
-#endif
-            ", .png, .ktx, .ktx2, .dds\n";
-
-        string finalErrorText;
-        append_sprintf(finalErrorText, "Could not load from file:\n %s\n",
-                       filename);
-        finalErrorText += errorText;
-
-        [self setHudText:finalErrorText.c_str()];
-        return NO;
-    }
-
-    if (isSupportedModelFilename(filename))
-    {
-        return [self loadModelFile:url filename:nullptr];
-    }
     
-    // for now, knock out model if loading an image
-    // TODO: might want to unload even before loading a new model
-    [renderer unloadModel];
+    // don't leave archive table open
+    if (isSingleFile)
+        [self hideFileTable];
     
-    //-------------------
-
-    if (isSupportedArchiveFilename(filename)) {
-        auto archiveTimestamp = FileHelper::modificationTimestamp(filename);
-
-        if (!self.imageURL || (!([self.imageURL isEqualTo:url])) ||
-            (self.lastArchiveTimestamp != archiveTimestamp)) {
-            // copy this out before it's replaced
-            string existingFilename;
-            if (_fileArchiveIndex < (int32_t)_zip.zipEntrys().size())
-                existingFilename = _zip.zipEntrys()[_fileArchiveIndex].filename;
-            else
-                _fileArchiveIndex = 0;
-
-            BOOL isArchiveLoaded = [self loadArchive:filename];
-            if (!isArchiveLoaded) {
-                return NO;
-            }
-
-            // store the archive url
-            self.imageURL = url;
-            self.lastArchiveTimestamp = archiveTimestamp;
-
-            // add it to recent docs
-            NSDocumentController* dc =
-                [NSDocumentController sharedDocumentController];
-            [dc noteNewRecentDocumentURL:url];
-
-            // now reload the filename if needed
-            if (!existingFilename.empty()) {
-                const ZipEntry* formerEntry = _zip.zipEntry(existingFilename.c_str());
-                if (formerEntry) {
-                    // lookup the index in the remapIndices table
-                    _fileArchiveIndex =
-                        (uintptr_t)(formerEntry - &_zip.zipEntrys()[0]);
-                }
-                else {
-                    _fileArchiveIndex = 0;
-                }
-            }
-        }
-
-        setErrorLogCapture(true);
-
-        BOOL success = [self loadFileFromArchive];
-
-        if (!success) {
-            // get back error text from the failed load
-            string errorText;
-            getErrorLogCaptureText(errorText);
-            setErrorLogCapture(false);
-
-            const auto& entry = _zip.zipEntrys()[_fileArchiveIndex];
-            const char* archiveFilename = entry.filename;
-
-            // prepend filename
-            string finalErrorText;
-            append_sprintf(finalErrorText, "Could not load from archive:\n %s\n",
-                           archiveFilename);
-            finalErrorText += errorText;
-
-            [self setHudText:finalErrorText.c_str()];
-        }
-
-        setErrorLogCapture(false);
-        return success;
-    }
-
-    bool success = [self loadImageFile:url];
     
-    // hide table in case last had archive open
-    if (success)
-        [self hideFileTable];
+    [self loadFilesFromUrls:urls];
     
+    BOOL success = [self loadFile];
     return success;
+    
+    //------------
+    
+//    // now load the file at the index
+//    setErrorLogCapture(true);
+//
+//    BOOL success = [self loadFile];
+//
+//    if (!success) {
+//        // get back error text from the failed load
+//        string errorText;
+//        getErrorLogCaptureText(errorText);
+//        setErrorLogCapture(false);
+//
+//        const string &folder = _files[_fileIndex];
+//
+//        // prepend filename
+//        string finalErrorText;
+//        append_sprintf(finalErrorText, "Could not load from folder:\n %s\n",
+//                       folder.c_str());
+//        finalErrorText += errorText;
+//
+//        [self setHudText:finalErrorText.c_str()];
+//    }
+//
+//    setErrorLogCapture(false);
+//    return success;
+//
+//
+////    // file is not a supported extension
+////    if (files.empty())
+////    {
+////        string errorText =
+////            "Unsupported file extension, must be .zip"
+////#if USE_GLTF
+////            ", .gltf, .glb"
+////#endif
+////            ", .png, .ktx, .ktx2, .dds\n";
+////
+////        string finalErrorText;
+////        if (url.count == 1)
+////            append_sprintf(finalErrorText, "Could not load from file:\n %s\n",
+////                       filename);
+////        finalErrorText += errorText;
+////
+////        [self setHudText:finalErrorText.c_str()];
+////        return NO;
+////    }
+////
+////    if (isSupportedModelFilename(filename))
+////    {
+////        return [self loadModelFile:url filename:nullptr];
+////    }
+////
+//    // for now, knock out model if loading an image
+//    // TODO: might want to unload even before loading a new model
+// //   [renderer unloadModel];
+//
+// //   bool success = [self loadImageFile:url];
+//
+//
+////    //-------------------
+////
+////    bool success = [self loadImageFile:url];
+//
+//    // hide table in case last had archive open
+////    if (success)
+////        [self hideFileTable];
+//
+//    return success;
 }
 
 -(double)getTimestampForFile:(NSURL*)url
@@ -3428,7 +3367,7 @@ -(double)getTimestampForFile:(NSURL*)url
     return timestamp;
 }
 
--(BOOL)loadModelFile:(NSURL*)url filename:(const char*)filename
+-(BOOL)loadModelFile:(const char*)filename
 {
 #if USE_GLTF
     // Right now can only load these if they are embedded, since sandbox will
@@ -3448,8 +3387,8 @@ -(BOOL)loadModelFile:(NSURL*)url filename:(const char*)filename
     setErrorLogCapture(true);
 
     // set title to filename, chop this to just file+ext, not directory
-    if (url != nil)
-        filename = url.fileSystemRepresentation;
+    //if (url != nil)
+    //    filename = url.fileSystemRepresentation;
     const char* filenameShort = toFilenameShort(filename);
 
     NSURL* gltfFileURL =
@@ -3487,23 +3426,19 @@ -(BOOL)loadModelFile:(NSURL*)url filename:(const char*)filename
 
     // if url is nil, then loading out of archive or folder
     // and don't want to save that or set imageURL
-    if (url != nil)
-    {
-        // add to recent docs, so can reload quickly
-        NSDocumentController* dc =
-            [NSDocumentController sharedDocumentController];
-        [dc noteNewRecentDocumentURL:gltfFileURL];
-
-        // TODO: not really an image
-        self.imageURL = gltfFileURL;
-        
-        // this may be loading out of folder/archive, but if url passed then it isn't
-        _showSettings->isArchive = false;
-        _showSettings->isFolder = false;
-        
-        // no need for file table on single files
-        [self hideFileTable];
-    }
+//    if (url != nil)
+//    {
+//        // add to recent docs, so can reload quickly
+//        NSDocumentController* dc =
+//            [NSDocumentController sharedDocumentController];
+//        [dc noteNewRecentDocumentURL:gltfFileURL];
+//
+//        // TODO: not really an image
+//        //self.imageURL = gltfFileURL;
+//
+//        // no need for file table on single files
+//        [self hideFileTable];
+//    }
     
     // show the controls
     if (_noImageLoaded) {
@@ -3525,6 +3460,8 @@ -(BOOL)loadModelFile:(NSURL*)url filename:(const char*)filename
 #endif
 }
 
+/* Don't need this anymore
+ 
 -(BOOL)loadImageFile:(NSURL*)url
 {
     Renderer* renderer = (Renderer *)self.delegate;
@@ -3564,11 +3501,12 @@ -(BOOL)loadImageFile:(NSURL*)url
     // some entries may go stale if directories change, not sure who validates the
     // list
 
+    // this is already handled by drop
     // add to recent document menu
-    NSDocumentController* dc = [NSDocumentController sharedDocumentController];
-    [dc noteNewRecentDocumentURL:url];
+    //NSDocumentController* dc = [NSDocumentController sharedDocumentController];
+    //[dc noteNewRecentDocumentURL:url];
 
-    self.imageURL = url;
+    //self.imageURL = url;
 
     // show the controls
     if (_noImageLoaded) {
@@ -3576,9 +3514,6 @@ -(BOOL)loadImageFile:(NSURL*)url
         _noImageLoaded = NO;
     }
 
-    _showSettings->isArchive = false;
-    _showSettings->isFolder = false;
-
     // show/hide button
     [self updateUIAfterLoad];
     // no need for file table on single files
@@ -3587,6 +3522,7 @@ -(BOOL)loadImageFile:(NSURL*)url
     self.needsDisplay = YES;
     return YES;
 }
+*/
 
 - (void)setupUI
 {

From 31bbfae8dd1cab064ed514e2d1564d908bf8ff66 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 1 Oct 2022 02:00:38 -0700
Subject: [PATCH 371/901] kramv - fix folder drop

---
 kramv/KramViewerMain.mm | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 11b0d5ef..8650eb57 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -500,10 +500,6 @@ - (IBAction)showAboutDialog:(id)sender
 // zip, metallib
 // gltf, glb files for models
 NSArray<NSString*>* utis = @[
-//  @"public.png",
-//  @"org.khronos.ktx",
-//  @"public.ktx2",
-  //@"public.dds",
   [UTType typeWithFilenameExtension: @"png"].identifier,
   [UTType typeWithFilenameExtension: @"ktx"].identifier,
   [UTType typeWithFilenameExtension: @"ktx2"].identifier,
@@ -514,16 +510,14 @@ - (IBAction)showAboutDialog:(id)sender
   
   [UTType typeWithFilenameExtension: @"gltf"].identifier,
   [UTType typeWithFilenameExtension: @"glb"].identifier
-  //@"public/zip-archive", // zip file
-  //@"application/octet-stream", // metallib
 #if USE_GLTF
   //@"model/gltf+stream",
   //@"model/gltf+binary"
 #endif
 ];
 NSDictionary* pasteboardOptions = @{
-    NSPasteboardURLReadingContentsConformToTypesKey: utis,
-    NSPasteboardURLReadingFileURLsOnlyKey: @YES
+    NSPasteboardURLReadingContentsConformToTypesKey: utis
+    //NSPasteboardURLReadingFileURLsOnlyKey: @YES
 };
 
 @implementation MyMTKView {

From 347b9d373f72acddf2dbbca8bd8697024a349aea Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 1 Oct 2022 11:56:19 -0700
Subject: [PATCH 372/901] kramv - fix sorting and archive file support

---
 kramv/KramViewerMain.mm | 109 +++++++++++++++++-----------------------
 1 file changed, 46 insertions(+), 63 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 8650eb57..dd1b4cd3 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -41,11 +41,26 @@
 using namespace kram;
 using namespace NAMESPACE_STL;
 
+// this aliases the existing string, so can't chop extension
+inline const char* toFilenameShort(const char* filename) {
+    const char* filenameShort = strrchr(filename, '/');
+    if (filenameShort == nullptr) {
+        filenameShort = filename;
+    }
+    else {
+        filenameShort += 1;
+    }
+    return filenameShort;
+}
+
+
 struct File {
     string name;
     int32_t urlIndex;
     
-    bool operator <(const File& rhs) const { return strcasecmp(name.c_str(), rhs.name.c_str()) < 0; }
+    // Note: not sorting by urlIndex currently
+    bool operator <(const File& rhs) const
+        { return strcasecmp(toFilenameShort(name.c_str()), toFilenameShort(rhs.name.c_str())) < 0; }
 };
 
 bool isSupportedModelFilename(const char* filename) {
@@ -70,17 +85,6 @@ bool isSupportedArchiveFilename(const char* filename) {
     NSPoint pan;
 };
 
-// this aliases the existing string, so can't chop extension
-inline const char* toFilenameShort(const char* filename) {
-    const char* filenameShort = strrchr(filename, '/');
-    if (filenameShort == nullptr) {
-        filenameShort = filename;
-    }
-    else {
-        filenameShort += 1;
-    }
-    return filenameShort;
-}
 
 //-------------
 
@@ -505,19 +509,22 @@ - (IBAction)showAboutDialog:(id)sender
   [UTType typeWithFilenameExtension: @"ktx2"].identifier,
   [UTType typeWithFilenameExtension: @"dds"].identifier,
   
-  [UTType typeWithFilenameExtension: @"zip"].identifier, // UTTypeZIP
+  [UTType typeWithFilenameExtension: @"zip"].identifier,
   [UTType typeWithFilenameExtension: @"metallib"].identifier,
   
+#if USE_GLTF
   [UTType typeWithFilenameExtension: @"gltf"].identifier,
   [UTType typeWithFilenameExtension: @"glb"].identifier
-#if USE_GLTF
-  //@"model/gltf+stream",
+  //@"model/gltf+json",
   //@"model/gltf+binary"
 #endif
 ];
 NSDictionary* pasteboardOptions = @{
+    // This means only these uti can be droped.
     NSPasteboardURLReadingContentsConformToTypesKey: utis
-    //NSPasteboardURLReadingFileURLsOnlyKey: @YES
+    
+    // Don't use this it prevents folder urls
+    //, NSPasteboardURLReadingFileURLsOnlyKey: @YES
 };
 
 @implementation MyMTKView {
@@ -540,8 +547,7 @@ @implementation MyMTKView {
     ZipHelper _zip;
     MmapHelper _zipMmap;
     BOOL _noImageLoaded;
-    int32_t _archiveStartIndex;
-
+    
     // folders and archives and multi-drop files are filled into this
     vector<File> _files;
     int32_t _fileIndex;
@@ -643,7 +649,7 @@ - (instancetype)initWithCoder:(NSCoder *)coder
     
     // added for drag-drop support
     [self registerForDraggedTypes:pasteboardTypes];
-
+    
     // This gesture only works for trackpad
     _zoomGesture = [[NSMagnificationGestureRecognizer alloc]
         initWithTarget:self
@@ -2514,9 +2520,9 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
             
             // TODO: build up counterparts when listing out files
             
-            /* Archive probably only holds one type of file, could pull in zips?
-            if (_showSettings->isArchive) {
-                if ([self advanceCounterpartFromAchive:!isShiftKeyDown]) {
+            /* can mark counterparts on load, and cycle png vs. ktx/ktx2/dds files of same name
+             if (_files.size() > 1) {
+                if ([self advanceCounterpart:!isShiftKeyDown]) {
                     _hudHidden = true;
                     [self updateHudVisibility];
                     
@@ -2524,16 +2530,6 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
                     text = "Loaded " + _showSettings->lastFilename;
                 }
             }
-            else */
-            
-            /* TODO: finish this, should only cycle through counterpart files
-                those are files with same name but different extension under the same folder.
-            if (_showSettings->isFolder) {
-                if ([self advanceCounterpartFromFolder:!isShiftKeyDown]) {
-                    isChanged = true;
-                    text = "Loaded " + _showSettings->lastFilename;
-                }
-            }
             */
         }
     }
@@ -2630,7 +2626,7 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
 
 // Note: docs state that drag&drop should be handled automatically by UTI setup
 // via openURLs but I find these calls are needed, or it doesn't work.  Maybe
-// need to register for NSRUL instead of NSPasteboardTypeFileURL.  For example,
+// need to register for NSURL instead of NSPasteboardTypeFileURL.  For example,
 // in canReadObjectForClasses had to use NSURL.
 
 // drag and drop support
@@ -2840,7 +2836,8 @@ - (BOOL)isArchive
 
 - (BOOL)loadFile
 {
-    if ([self isArchive]) return [self loadFileFromArchive];
+    if ([self isArchive])
+        return [self loadFileFromArchive];
     
     // now lookup the filename and data at that entry
     const char* filename = _files[_fileIndex].name.c_str();
@@ -2848,9 +2845,11 @@ - (BOOL)loadFile
     string fullFilename = filename;
     auto timestamp = FileHelper::modificationTimestamp(filename);
     
+#if USE_GLTF
     bool isModel = isSupportedModelFilename(filename);
     if (isModel)
         return [self loadModelFile:filename];
+#endif
     
     // have already filtered filenames out, so this should never get hit
     if (!isSupportedFilename(filename)) {
@@ -2969,12 +2968,17 @@ - (BOOL)loadFile
 - (BOOL)loadFileFromArchive
 {
     // now lookup the filename and data at that entry
-    uint32_t archiveIndex = _fileIndex - _archiveStartIndex;
-    const auto& entry = _zip.zipEntrys()[_fileIndex - archiveIndex];
-
-    const char* filename = entry.filename;
-    string fullFilename = filename;
-    double timestamp = (double)entry.modificationDate;
+    const File& file = _files[_fileIndex];
+    
+    // now lookup the file in the entrys
+    //for (uint32_t i = 0; i < )
+    //const auto& entry = _zip.zipEntrys()[archiveIndex];
+    //const char* filename = entry.filename;
+    
+    const char* filename = file.name.c_str();
+    const auto* entry = _zip.zipEntry(filename);
+    string fullFilename = entry->filename;
+    double timestamp = (double)entry->modificationDate;
 
 // TODO: don't have a version which loads gltf model from memory block
 //    bool isModel = isSupportedModelFilename(filename);
@@ -3155,8 +3159,6 @@ -(void)loadFilesFromUrls:(NSArray<NSURL*>*)urls
     // but that's probably okay for now.  Add a separate array of open
     // archives if want > 1.
 
-    _archiveStartIndex = 0;
-    
     // copy the existing files list
     string existingFilename;
     if (_fileIndex < (int32_t)_files.size())
@@ -3174,9 +3176,8 @@ -(void)loadFilesFromUrls:(NSArray<NSURL*>*)urls
         const char* filename = url.fileSystemRepresentation;
         
         if (archiveCount == 0 && isSupportedArchiveFilename(filename)) {
-            uint32_t fileIndexCopy = _fileIndex;
+            //uint32_t fileCount = _files.size();
             if ([self listFilesInArchive:filename urlIndex:urlIndex]) {
-                _archiveStartIndex = fileIndexCopy;
                 archiveCount++;
             }
         }
@@ -3411,28 +3412,12 @@ -(BOOL)loadModelFile:(const char*)filename
         
         return NO;
     }
+    setErrorLogCapture(false);
 
     // was using subtitle, but that's macOS 11.0 feature.
     string title = "kramv - ";
     title += filenameShort;
-
     self.window.title = [NSString stringWithUTF8String:title.c_str()];
-
-    // if url is nil, then loading out of archive or folder
-    // and don't want to save that or set imageURL
-//    if (url != nil)
-//    {
-//        // add to recent docs, so can reload quickly
-//        NSDocumentController* dc =
-//            [NSDocumentController sharedDocumentController];
-//        [dc noteNewRecentDocumentURL:gltfFileURL];
-//
-//        // TODO: not really an image
-//        //self.imageURL = gltfFileURL;
-//
-//        // no need for file table on single files
-//        [self hideFileTable];
-//    }
     
     // show the controls
     if (_noImageLoaded) {
@@ -3440,8 +3425,6 @@ -(BOOL)loadModelFile:(const char*)filename
         _noImageLoaded = NO;
     }
 
-    setErrorLogCapture(false);
-
     // store the filename
     _showSettings->lastFilename = filename;
     _showSettings->lastTimestamp = timestamp;

From 53d63f95a7108b3f49756e82580e32156f5974a9 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 1 Oct 2022 16:51:37 -0700
Subject: [PATCH 373/901] kramv - fix folder drops

Folder drops fails pasteboard UTI's are used.   This seems like a bug in AppKit.  For now fallback to unfiltered clipboard to obtain these.
Add counterpart support.  This will cycle through similarly named files.  Will tie this in with diff next.
---
 kramv/KramViewerBase.h  |   3 +
 kramv/KramViewerMain.mm | 254 ++++++++++++++++++++++++++--------------
 2 files changed, 167 insertions(+), 90 deletions(-)

diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index 628f517a..bf3741e8 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -161,6 +161,9 @@ class ShowSettings {
     // image vs. gltf model
     bool isModel = false;
     
+    // if diff texture available, can show diff against source
+    bool isDiff = false;
+    
     // can sample from drawable or from single source texture
     bool isEyedropperFromDrawable();
 
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index dd1b4cd3..80a863ba 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -41,6 +41,64 @@
 using namespace kram;
 using namespace NAMESPACE_STL;
 
+#define ArrayCount(x) (sizeof(x) / sizeof(x[0]))
+
+static string filenameNoExtension(const char* filename)
+{
+    const char* ext = strrchr(filename, '.');
+    
+    const char* dotPosStr = strchr(filename, '.');
+    if (dotPosStr == nullptr)
+        return filename;
+    auto dotPos = dotPosStr - filename;
+    
+    // now chop off the extension
+    string filenameNoExt = filename;
+    return filenameNoExt.substr(0, dotPos);
+}
+
+static void findPossibleNormalMapFromAlbedoFilename(const char* filename, vector<string>& normalFilenames)
+{
+    normalFilenames.clear();
+    
+    string filenameShort = filename;
+    
+    const char* ext = strrchr(filename, '.');
+
+    const char* dotPosStr = strchr(filenameShort.c_str(), '.');
+    if (dotPosStr == nullptr)
+        return;
+    
+    auto dotPos = dotPosStr - filenameShort.c_str();
+    
+    // now chop off the extension
+    filenameShort = filenameShort.substr(0, dotPos);
+
+    const char* searches[] = { "-a", "-d", "_Color", "_baseColor" };
+    
+    for (uint32_t i = 0; i < ArrayCount(searches); ++i) {
+        const char* search = searches[i];
+        if (endsWith(filenameShort, search)) {
+            filenameShort = filenameShort.substr(0, filenameShort.length()-strlen(search));
+            break;
+        }
+    }
+     
+    const char* suffixes[] = { "-n", "_normal", "_Normal" };
+    
+    string normalFilename;
+    for (uint32_t i = 0; i < ArrayCount(suffixes); ++i) {
+        const char* suffix = suffixes[i];
+        
+        // may need to try various names, and see if any exist
+        normalFilename = filenameShort;
+        normalFilename += suffix;
+        normalFilename += ext;
+        
+        normalFilenames.push_back( normalFilename );
+    }
+}
+
 // this aliases the existing string, so can't chop extension
 inline const char* toFilenameShort(const char* filename) {
     const char* filenameShort = strrchr(filename, '/');
@@ -481,9 +539,11 @@ - (IBAction)showAboutDialog:(id)sender
 
 @end
 
-// also NSPasteboardTypeURL
-// also NSPasteboardTypeTIFF
 NSArray<NSString *>* pasteboardTypes = @[
+    // don't really want generic urls, but need folders to drop
+    //NSPasteboardTypeURL
+    
+    // this is preventing folder drops ?
     NSPasteboardTypeFileURL
 ];
 
@@ -566,6 +626,7 @@ @implementation MyMTKView {
     Action* _actionPremul;
     Action* _actionSigned;
     
+    Action* _actionDiff;
     Action* _actionDebug;
     Action* _actionGrid;
     Action* _actionChecker;
@@ -711,6 +772,7 @@ - (NSStackView *)_addButtons
         Action("U", "UI", Key::U),
         Action("V", "UI Vertical", Key::V),
 
+        Action("Q", "Quick Diff", Key::Q), // C/D already taken
         Action("D", "Debug", Key::D),
         Action("G", "Grid", Key::G),
         Action("B", "Checkerboard", Key::B),
@@ -763,6 +825,7 @@ - (NSStackView *)_addButtons
         &_actionHideUI,
         &_actionVertical,
        
+        &_actionDiff,
         &_actionDebug,
         &_actionGrid,
         &_actionChecker,
@@ -800,9 +863,7 @@ - (NSStackView *)_addButtons
     
     NSRect rect = NSMakeRect(0, 10, 30, 30);
 
-    #define ArrayCount(x) (sizeof(x) / sizeof(x[0]))
-
-    int32_t numActions = ArrayCount(actions);
+   int32_t numActions = ArrayCount(actions);
     
     NSMutableArray* buttons = [[NSMutableArray alloc] init];
 
@@ -1899,6 +1960,7 @@ - (void)updateUIAfterLoad
     bool isMipHidden = _showSettings->mipCount <= 1;
 
     bool isJumpToNextHidden = _files.size() <= 1;
+    bool isJumpToCounterpartHidden = isJumpToNextHidden; // really should disable if no counterparts
     
     bool isRedHidden = _showSettings->numChannels == 0; // models don't show rgba
     bool isGreenHidden = _showSettings->numChannels <= 1;
@@ -1920,7 +1982,8 @@ - (void)updateUIAfterLoad
     bool isCheckerboardHidden = !hasAlpha;
 
     bool isSignedHidden = !isSignedFormat(_showSettings->originalFormat);
-    bool isPlayHidden = !_showSettings->isModel;
+    bool isPlayHidden = !_showSettings->isModel; // only for models
+    bool isDiffHidden = _showSettings->isModel; // only for images
     
     _actionPlay->setHidden(isPlayHidden);
     _actionArray->setHidden(isArrayHidden);
@@ -1928,11 +1991,12 @@ - (void)updateUIAfterLoad
     _actionMip->setHidden(isMipHidden);
     _actionShowAll->setHidden(isShowAllHidden);
     
+    _actionDiff->setHidden(isDiffHidden);
     _actionItem->setHidden(isJumpToNextHidden);
     _actionPrevItem->setHidden(isJumpToNextHidden);
     
-    _actionCounterpart->setHidden(isJumpToNextHidden);
-    _actionPrevCounterpart->setHidden(isJumpToNextHidden);
+    _actionCounterpart->setHidden(isJumpToCounterpartHidden);
+    _actionPrevCounterpart->setHidden(isJumpToCounterpartHidden);
     
     _actionR->setHidden(isRedHidden);
     _actionG->setHidden(isGreenHidden);
@@ -1986,7 +2050,8 @@ - (void)updateUIControlState
 
     auto verticalState = toState(_buttonStack.orientation == NSUserInterfaceLayoutOrientationVertical);
     auto uiState = toState(_buttonStack.hidden);
-
+    auto diffState = toState(_showSettings->isDiff);
+    
     _actionVertical->setHighlight(verticalState);
     
     // TODO: pass boolean, and change in the call
@@ -2015,6 +2080,7 @@ - (void)updateUIControlState
     
     _actionShowAll->setHighlight(showAllState);
     _actionPreview->setHighlight(previewState);
+    _actionDiff->setHighlight(diffState);
     _actionShapeMesh->setHighlight(meshState);
     _actionShapeChannel->setHighlight(meshChannelState);
     _actionLighting->setHighlight(lightingState);
@@ -2350,13 +2416,18 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
 
         isChanged = true;
     }
-    // P already used for premul
     else if (action == _actionPreview) {
         _showSettings->isPreview = !_showSettings->isPreview;
         isChanged = true;
         text = "Preview ";
         text += _showSettings->isPreview ? "On" : "Off";
     }
+    else if (action == _actionDiff) {
+        _showSettings->isDiff = !_showSettings->isDiff;
+        isChanged = true;
+        text = "Diff ";
+        text += _showSettings->isPreview ? "On" : "Off";
+    }
     // TODO: might switch c to channel cycle, so could just hit that
     // and depending on the content, it cycles through reasonable channel masks
 
@@ -2517,11 +2588,7 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
             if (action == _actionPrevCounterpart) {
                 isShiftKeyDown = !isShiftKeyDown;
             }
-            
-            // TODO: build up counterparts when listing out files
-            
-            /* can mark counterparts on load, and cycle png vs. ktx/ktx2/dds files of same name
-             if (_files.size() > 1) {
+            if (_files.size() > 1) {
                 if ([self advanceCounterpart:!isShiftKeyDown]) {
                     _hudHidden = true;
                     [self updateHudVisibility];
@@ -2530,7 +2597,6 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
                     text = "Loaded " + _showSettings->lastFilename;
                 }
             }
-            */
         }
     }
     
@@ -2632,14 +2698,21 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
 // drag and drop support
 - (NSDragOperation)draggingEntered:(id)sender
 {
-    if ((NSDragOperationGeneric & [sender draggingSourceOperationMask]) ==
+    if (([sender draggingSourceOperationMask] & NSDragOperationGeneric) ==
         NSDragOperationGeneric) {
-        NSPasteboard *pasteboard = [sender draggingPasteboard];
+        NSPasteboard* pasteboard = [sender draggingPasteboard];
         
         bool canReadPasteboardObjects =
             [pasteboard canReadObjectForClasses:@[ [NSURL class] ]
                                         options:pasteboardOptions];
 
+        // when this fails, toss the pasteboardOptions
+        // like when I drag folders
+        if (!canReadPasteboardObjects) {
+            canReadPasteboardObjects =
+                [pasteboard canReadObjectForClasses:@[ [NSURL class] ]
+                                        options:@{}];
+        }
         // don't copy dropped item, want to alias large files on disk without that
         if (canReadPasteboardObjects) {
             return NSDragOperationGeneric;
@@ -2658,17 +2731,22 @@ - (BOOL)prepareForDragOperation:(id)sender
 - (BOOL)performDragOperation:(id)sender
 {
     NSPasteboard* pasteboard = [sender draggingPasteboard];
+    
+    // This doesn't work when folders are dropped, urls count is 0
     NSArray<NSURL*>* urls = [pasteboard readObjectsForClasses:@[[NSURL class]]
                                                       options: pasteboardOptions];
     int filesCount = [urls count];
     
+    // Could just stop passing the utis
+    if (filesCount == 0) {
+        urls = [pasteboard readObjectsForClasses:@[[NSURL class]]
+                                         options:@{}];
+        filesCount = [urls count];
+    }
+    
     if (filesCount > 0) {
         if ([self loadTextureFromURLs:urls]) {
             [self setHudText:""];
-
-            //        if (_files.size() == 2) {
-            //            // TODO: default to diff
-            //        }
             return YES;
         }
     }
@@ -2719,6 +2797,49 @@ - (BOOL)listFilesInArchive:(const char *)zipFilename urlIndex:(int32_t)urlIndex
     return [self listFilesInArchive:urlIndex];
 }
 
+- (BOOL)advanceCounterpart:(BOOL)increment
+{
+    if (_files.empty()) {
+        return NO;
+    }
+    
+    // see if file has counterparts
+    const File& file = _files[_fileIndex];
+    string currentFilename = filenameNoExtension(toFilenameShort(file.name.c_str()));
+    
+    // TODO: this should cycle through only the counterparts
+    uint32_t nextFileIndex = _fileIndex;
+    
+    size_t numEntries = _files.size();
+    if (increment)
+        nextFileIndex++;
+    else
+        nextFileIndex += numEntries - 1;  // back 1
+
+    nextFileIndex = nextFileIndex % numEntries;
+    
+    const File& nextFile = _files[nextFileIndex];
+    string nextFilename = filenameNoExtension(toFilenameShort(nextFile.name.c_str()));
+    
+    if (currentFilename != nextFilename)
+        return NO;
+    
+    _fileIndex = nextFileIndex;
+    
+    // set selection
+    [_tableView selectRowIndexes:[NSIndexSet indexSetWithIndex:_fileIndex] byExtendingSelection:NO];
+    [_tableView scrollRowToVisible:_fileIndex];
+    
+    // want it to respond to arrow keys
+    //[self.window makeFirstResponder: _tableView];
+    
+    // show the files table
+    [self showFileTable];
+    [self setEyedropperText:""];
+    
+    return [self loadFile];
+}
+
 - (BOOL)advanceFile:(BOOL)increment
 {
     if (_files.empty()) {
@@ -2742,10 +2863,6 @@ - (BOOL)advanceFile:(BOOL)increment
     
     // show the files table
     [self showFileTable];
-    
-    // also have to hide hud or it will obscure the visible table
-    //_hudHidden = true;
-    //[self updateHudVisibility];
     [self setEyedropperText:""];
     
     return [self loadFile];
@@ -2785,47 +2902,7 @@ - (BOOL)findFilename:(const string&)filename
     return isFound;
 }
 
-static void findPossibleNormalMapFromAlbedoFilename(const char* filename, vector<string>& normalFilenames)
-{
-    normalFilenames.clear();
-    
-    string filenameShort = filename;
-    
-    const char* ext = strrchr(filename, '.');
 
-    const char* dosPosStr = strchr(filenameShort.c_str(), '.');
-    if (dosPosStr == nullptr)
-        return;
-    
-    auto dotPos = dosPosStr - filenameShort.c_str();
-    
-    // now chop off the extension
-    filenameShort = filenameShort.substr(0, dotPos);
-
-    const char* searches[] = { "-a", "-d", "_Color", "_baseColor" };
-    
-    for (uint32_t i = 0; i < ArrayCount(searches); ++i) {
-        const char* search = searches[i];
-        if (endsWith(filenameShort, search)) {
-            filenameShort = filenameShort.substr(0, filenameShort.length()-strlen(search));
-            break;
-        }
-    }
-     
-    const char* suffixes[] = { "-n", "_normal", "_Normal" };
-    
-    string normalFilename;
-    for (uint32_t i = 0; i < ArrayCount(suffixes); ++i) {
-        const char* suffix = suffixes[i];
-        
-        // may need to try various names, and see if any exist
-        normalFilename = filenameShort;
-        normalFilename += suffix;
-        normalFilename += ext;
-        
-        normalFilenames.push_back( normalFilename );
-    }
-}
 
 - (BOOL)isArchive
 {
@@ -2874,6 +2951,10 @@ - (BOOL)loadFile
         }
     }
     
+    // TODO: if it's a compressed file, then set a diff target if a corresponding png
+    // is found.  Eventually see if a src dds/ktx/ktx2 exists.  Want to stop
+    // using png as source images.
+    
     //-------------------------------
 
     KTXImage image;
@@ -2900,6 +2981,7 @@ - (BOOL)loadFile
         }
     }
     
+    // Release any loading model textures
     Renderer* renderer = (Renderer *)self.delegate;
     [renderer releaseAllPendingTextures];
     
@@ -2970,11 +3052,6 @@ - (BOOL)loadFileFromArchive
     // now lookup the filename and data at that entry
     const File& file = _files[_fileIndex];
     
-    // now lookup the file in the entrys
-    //for (uint32_t i = 0; i < )
-    //const auto& entry = _zip.zipEntrys()[archiveIndex];
-    //const char* filename = entry.filename;
-    
     const char* filename = file.name.c_str();
     const auto* entry = _zip.zipEntry(filename);
     string fullFilename = entry->filename;
@@ -3348,19 +3425,19 @@ - (BOOL)loadTextureFromURLs:(NSArray<NSURL*>*)urls
 //    return success;
 }
 
--(double)getTimestampForFile:(NSURL*)url
-{
-    // TODO: could just use FileHelper::modificationTimestamp(filename);
-    
-    NSDate* fileDate = nil;
-    NSError* error = nil;
-    [url getResourceValue:&fileDate
-                   forKey:NSURLContentModificationDateKey
-                    error:&error];
-
-    double timestamp = fileDate.timeIntervalSince1970;
-    return timestamp;
-}
+//-(double)getTimestampForFile:(NSURL*)url
+//{
+//    // TODO: could just use FileHelper::modificationTimestamp(filename);
+//
+//    NSDate* fileDate = nil;
+//    NSError* error = nil;
+//    [url getResourceValue:&fileDate
+//                   forKey:NSURLContentModificationDateKey
+//                    error:&error];
+//
+//    double timestamp = fileDate.timeIntervalSince1970;
+//    return timestamp;
+//}
 
 -(BOOL)loadModelFile:(const char*)filename
 {
@@ -3381,15 +3458,12 @@ -(BOOL)loadModelFile:(const char*)filename
     
     setErrorLogCapture(true);
 
-    // set title to filename, chop this to just file+ext, not directory
-    //if (url != nil)
-    //    filename = url.fileSystemRepresentation;
     const char* filenameShort = toFilenameShort(filename);
-
+    double timestamp = FileHelper::modificationTimestamp(filename);
+    
+    // This code only takes url, so construct one
     NSURL* gltfFileURL =
         [NSURL fileURLWithPath:[NSString stringWithUTF8String:filename]];
-    double timestamp = [self getTimestampForFile:gltfFileURL];
-    
     BOOL success = [renderer loadModel:gltfFileURL];
     
     // TODO: split this off to a completion handler, since loadModel is async

From acc773ae398966992084d7c3628e3cc19f4b29ec Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 2 Oct 2022 12:17:56 -0700
Subject: [PATCH 374/901] kramv - skip loading unchanged files in archive or
 folders, fix folder drop

Needed "public.directory" in the utis array for folder drop.
---
 kramv/KramRenderer.mm   | 21 +++++++++-----------
 kramv/KramViewerBase.h  |  9 +++++++++
 kramv/KramViewerMain.mm | 43 +++++++++++++++++++++--------------------
 3 files changed, 40 insertions(+), 33 deletions(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index e37ae3ba..06333fe8 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -1112,11 +1112,9 @@ - (BOOL)loadTextureFromImage:(nonnull const char *)fullFilenameString
     string fullFilename = fullFilenameString;
     const char* filenameShort = toFilenameShort(fullFilename.c_str());
     
-    // Note that modstamp can change, but content data hash may be the same
-    bool isNewFile = (fullFilename != _showSettings->lastFilename);
-    bool isTextureChanged =
-        isNewFile || (timestamp != _showSettings->lastTimestamp);
-
+    bool isTextureNew = _showSettings->isFileNew(fullFilename.c_str());
+    bool isTextureChanged = _showSettings->isFileChanged(fullFilename.c_str(), timestamp);
+    
     if (isTextureChanged) {
         // synchronously cpu upload from ktx file to buffer, with eventual gpu blit
         // from buffer to returned texture.  TODO: If buffer is full, then something
@@ -1175,7 +1173,7 @@ - (BOOL)loadTextureFromImage:(nonnull const char *)fullFilenameString
                             image:image];
     }
 
-    [self resetSomeImageSettings:isNewFile];
+    [self resetSomeImageSettings:isTextureNew];
 
     return YES;
 }
@@ -1193,11 +1191,10 @@ - (BOOL)loadTexture:(nonnull NSURL *)url
 
     // DONE: tie this to url and modstamp differences
     double timestamp = fileDate.timeIntervalSince1970;
-    bool isNewFile = (fullFilename != _showSettings->lastFilename);
-
-    bool isTextureChanged =
-        isNewFile || (timestamp != _showSettings->lastTimestamp);
-
+    
+    bool isTextureNew = _showSettings->isFileNew(fullFilename.c_str());
+    bool isTextureChanged = _showSettings->isFileChanged(fullFilename.c_str(), timestamp);
+    
     // image can be decoded to rgba8u if platform can't display format natively
     // but still want to identify blockSize from original format
     if (isTextureChanged) {
@@ -1251,7 +1248,7 @@ - (BOOL)loadTexture:(nonnull NSURL *)url
                             image:image];
     }
 
-    [self resetSomeImageSettings:isNewFile];
+    [self resetSomeImageSettings:isTextureNew];
 
     return YES;
 }
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index bf3741e8..f9a0ec86 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -250,6 +250,15 @@ class ShowSettings {
             ar = imageBoundsX / (float)imageBoundsY;
         return ar;
     }
+    
+    bool isFileNew(const char* fullFilename) const {
+        return lastFilename != fullFilename;
+    }
+    bool isFileChanged(const char* fullFilename, double timestamp) const {
+        // Note that modstamp can change, but content data hash may be the same
+        return isFileNew(fullFilename) || (timestamp != lastTimestamp);
+    }
+    
     string lastFilename;
     double lastTimestamp = 0.0;
 
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 80a863ba..1814f9c4 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -549,6 +549,7 @@ - (IBAction)showAboutDialog:(id)sender
 
 /* correlates with
  
+public.directory.
 public.png,
 org.khronos.ktx,
 public.ktx2,
@@ -564,6 +565,8 @@ - (IBAction)showAboutDialog:(id)sender
 // zip, metallib
 // gltf, glb files for models
 NSArray<NSString*>* utis = @[
+  @"public.directory",
+    
   [UTType typeWithFilenameExtension: @"png"].identifier,
   [UTType typeWithFilenameExtension: @"ktx"].identifier,
   [UTType typeWithFilenameExtension: @"ktx2"].identifier,
@@ -2393,7 +2396,7 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
     }
     // reload key (also a quick way to reset the settings)
     else if (action == _actionReload) {
-        [self loadFile];
+        bool success = [self loadFile];
 
         // reload at actual size
         if (isShiftKeyDown) {
@@ -2706,13 +2709,6 @@ - (NSDragOperation)draggingEntered:(id)sender
             [pasteboard canReadObjectForClasses:@[ [NSURL class] ]
                                         options:pasteboardOptions];
 
-        // when this fails, toss the pasteboardOptions
-        // like when I drag folders
-        if (!canReadPasteboardObjects) {
-            canReadPasteboardObjects =
-                [pasteboard canReadObjectForClasses:@[ [NSURL class] ]
-                                        options:@{}];
-        }
         // don't copy dropped item, want to alias large files on disk without that
         if (canReadPasteboardObjects) {
             return NSDragOperationGeneric;
@@ -2732,18 +2728,9 @@ - (BOOL)performDragOperation:(id)sender
 {
     NSPasteboard* pasteboard = [sender draggingPasteboard];
     
-    // This doesn't work when folders are dropped, urls count is 0
     NSArray<NSURL*>* urls = [pasteboard readObjectsForClasses:@[[NSURL class]]
                                                       options: pasteboardOptions];
     int filesCount = [urls count];
-    
-    // Could just stop passing the utis
-    if (filesCount == 0) {
-        urls = [pasteboard readObjectsForClasses:@[[NSURL class]]
-                                         options:@{}];
-        filesCount = [urls count];
-    }
-    
     if (filesCount > 0) {
         if ([self loadTextureFromURLs:urls]) {
             [self setHudText:""];
@@ -2913,8 +2900,9 @@ - (BOOL)isArchive
 
 - (BOOL)loadFile
 {
-    if ([self isArchive])
+    if ([self isArchive]) {
         return [self loadFileFromArchive];
+    }
     
     // now lookup the filename and data at that entry
     const char* filename = _files[_fileIndex].name.c_str();
@@ -2922,10 +2910,16 @@ - (BOOL)loadFile
     string fullFilename = filename;
     auto timestamp = FileHelper::modificationTimestamp(filename);
     
+    bool isTextureChanged = _showSettings->isFileChanged(filename, timestamp);
+    if (!isTextureChanged) {
+        return YES;
+    }
+    
 #if USE_GLTF
     bool isModel = isSupportedModelFilename(filename);
-    if (isModel)
+    if (isModel) {
         return [self loadModelFile:filename];
+    }
 #endif
     
     // have already filtered filenames out, so this should never get hit
@@ -3057,6 +3051,11 @@ - (BOOL)loadFileFromArchive
     string fullFilename = entry->filename;
     double timestamp = (double)entry->modificationDate;
 
+    bool isTextureChanged = _showSettings->isFileChanged(filename, timestamp);
+    if (!isTextureChanged) {
+        return YES;
+    }
+    
 // TODO: don't have a version which loads gltf model from memory block
 //    bool isModel = isSupportedModelFilename(filename);
 //    if (isModel)
@@ -3447,7 +3446,7 @@ -(BOOL)loadModelFile:(const char*)filename
     // related items, but they must all be named the same.  I think if folder
     // instead of the file is selected, then could search and find the gltf files
     // and the other files.
-
+    
     //----------------------
     // These assets should be combined into a single hierarchy, and be able to
     // save out a scene with all of them in a single scene.  But that should
@@ -3457,10 +3456,12 @@ -(BOOL)loadModelFile:(const char*)filename
     [renderer releaseAllPendingTextures];
     
     setErrorLogCapture(true);
-
+    
     const char* filenameShort = toFilenameShort(filename);
     double timestamp = FileHelper::modificationTimestamp(filename);
     
+    
+    
     // This code only takes url, so construct one
     NSURL* gltfFileURL =
         [NSURL fileURLWithPath:[NSString stringWithUTF8String:filename]];

From 4875fae6eea8d890dc7399144e9cae3cbbd0efb1 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 2 Oct 2022 17:02:00 -0700
Subject: [PATCH 375/901] kramv - support drop of archive in a folder

---
 kramv/KramRenderer.h    |   4 +
 kramv/KramViewerMain.mm | 181 ++++++++++++++++++++++++++++++++++------
 2 files changed, 161 insertions(+), 24 deletions(-)

diff --git a/kramv/KramRenderer.h b/kramv/KramRenderer.h
index a413b715..aa2b8eb8 100644
--- a/kramv/KramRenderer.h
+++ b/kramv/KramRenderer.h
@@ -14,6 +14,10 @@
 // which only handles import and synchronous loading.
 #define USE_GLTF 1
 
+// Could use ModelIO on macOS/iOS to support usd files.  Adding here for completeness.
+// But would prefer cross-platform C++ solution.
+#define USE_USD 0
+
 // Only use a perspective transform for models/images, otherwise perspective only used for models
 #define USE_PERSPECTIVE 0
 
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 1814f9c4..eb563696 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -45,8 +45,6 @@
 
 static string filenameNoExtension(const char* filename)
 {
-    const char* ext = strrchr(filename, '.');
-    
     const char* dotPosStr = strchr(filename, '.');
     if (dotPosStr == nullptr)
         return filename;
@@ -111,6 +109,16 @@ static void findPossibleNormalMapFromAlbedoFilename(const char* filename, vector
     return filenameShort;
 }
 
+static const vector<const char*> supportedModelExt = {
+#if USE_GLTF
+     ".gltf",
+     ".glb",
+#endif
+#if USE_USD
+    ".gltf",
+    ".glb",
+#endif
+};
 
 struct File {
     string name;
@@ -122,14 +130,13 @@ static void findPossibleNormalMapFromAlbedoFilename(const char* filename, vector
 };
 
 bool isSupportedModelFilename(const char* filename) {
-#if USE_GLTF
-    return endsWithExtension(filename, ".gltf") ||
-           endsWithExtension(filename, ".glb");
-#else
+    for (const char* ext: supportedModelExt) {
+        if (endsWithExtension(filename, ext)) {
+            return true;
+        }
+    }
     return false;
-#endif
 }
-
 bool isSupportedArchiveFilename(const char* filename) {
     return endsWithExtension(filename, ".zip");
 }
@@ -581,6 +588,11 @@ - (IBAction)showAboutDialog:(id)sender
   //@"model/gltf+json",
   //@"model/gltf+binary"
 #endif
+#if USE_USD
+  [UTType typeWithFilenameExtension: @"usd"].identifier,
+  [UTType typeWithFilenameExtension: @"usd"].identifier,
+  [UTType typeWithFilenameExtension: @"usda"].identifier
+#endif
 ];
 NSDictionary* pasteboardOptions = @{
     // This means only these uti can be droped.
@@ -616,6 +628,7 @@ @implementation MyMTKView {
     int32_t _fileIndex;
    
     NSArray<NSURL*>* _urls;
+    string _containerName; // folder, archive, or blank
     
     Action* _actionPlay;
     Action* _actionShapeUVPreview;
@@ -1963,7 +1976,14 @@ - (void)updateUIAfterLoad
     bool isMipHidden = _showSettings->mipCount <= 1;
 
     bool isJumpToNextHidden = _files.size() <= 1;
-    bool isJumpToCounterpartHidden = isJumpToNextHidden; // really should disable if no counterparts
+    
+    bool isJumpToCounterpartHidden = true;
+    bool isJumpToPrevCounterpartHidden = true;
+    
+    if ( _files.size() <= 1) {
+        isJumpToCounterpartHidden = [self hasCounterpart:YES];
+        isJumpToPrevCounterpartHidden  = [self hasCounterpart:NO];
+    }
     
     bool isRedHidden = _showSettings->numChannels == 0; // models don't show rgba
     bool isGreenHidden = _showSettings->numChannels <= 1;
@@ -1999,7 +2019,7 @@ - (void)updateUIAfterLoad
     _actionPrevItem->setHidden(isJumpToNextHidden);
     
     _actionCounterpart->setHidden(isJumpToCounterpartHidden);
-    _actionPrevCounterpart->setHidden(isJumpToCounterpartHidden);
+    _actionPrevCounterpart->setHidden(isJumpToPrevCounterpartHidden);
     
     _actionR->setHidden(isRedHidden);
     _actionG->setHidden(isGreenHidden);
@@ -2396,7 +2416,8 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
     }
     // reload key (also a quick way to reset the settings)
     else if (action == _actionReload) {
-        bool success = [self loadFile];
+        //bool success =
+            [self loadFile];
 
         // reload at actual size
         if (isShiftKeyDown) {
@@ -2580,7 +2601,11 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
                 
                 isChanged = true;
                 text = "Loaded ";
-                text += _showSettings->lastFilename;
+                text += toFilenameShort(_showSettings->lastFilename.c_str());
+                if (!_containerName.empty()) {
+                    text += " in ";
+                    text += _containerName;
+                }
             }
         }
     }
@@ -2595,9 +2620,16 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
                 if ([self advanceCounterpart:!isShiftKeyDown]) {
                     _hudHidden = true;
                     [self updateHudVisibility];
+                    [self setEyedropperText:""];
                     
                     isChanged = true;
-                    text = "Loaded " + _showSettings->lastFilename;
+                    
+                    text = "Loaded ";
+                    text += toFilenameShort(_showSettings->lastFilename.c_str());
+                    if (!_containerName.empty()) {
+                        text += " in ";
+                        text += _containerName;
+                    }
                 }
             }
         }
@@ -2752,6 +2784,9 @@ - (BOOL)listFilesInArchive:(int32_t)urlIndex
         // GLTFAsset requires a URL.
         //, ".glb", ".gltf" // models
 //#endif
+#if USE_USD
+        , ".usd", ".usda", ".usb"
+#endif
     };
     
     _zip.filterExtensions(extensions);
@@ -2784,6 +2819,34 @@ - (BOOL)listFilesInArchive:(const char *)zipFilename urlIndex:(int32_t)urlIndex
     return [self listFilesInArchive:urlIndex];
 }
 
+// TODO: can simplify by storing counterpart id when file list is created
+- (BOOL)hasCounterpart:(BOOL)increment {
+    if (_files.size() <= 1) {
+        return NO;
+    }
+    
+    const File& file = _files[_fileIndex];
+    string currentFilename = filenameNoExtension(toFilenameShort(file.name.c_str()));
+   
+    uint32_t nextFileIndex = _fileIndex;
+    
+    size_t numEntries = _files.size();
+    if (increment)
+        nextFileIndex++;
+    else
+        nextFileIndex += numEntries - 1;  // back 1
+    
+    nextFileIndex = nextFileIndex % numEntries;
+    
+    const File& nextFile = _files[nextFileIndex];
+    string nextFilename = filenameNoExtension(toFilenameShort(nextFile.name.c_str()));
+    
+    if (currentFilename != nextFilename)
+        return NO;
+    
+    return YES;
+}
+
 - (BOOL)advanceCounterpart:(BOOL)increment
 {
     if (_files.empty()) {
@@ -2903,8 +2966,9 @@ - (BOOL)loadFile
     if ([self isArchive]) {
         return [self loadFileFromArchive];
     }
-    
+
     // now lookup the filename and data at that entry
+    const File& file = _files[_fileIndex];
     const char* filename = _files[_fileIndex].name.c_str();
    
     string fullFilename = filename;
@@ -2915,7 +2979,7 @@ - (BOOL)loadFile
         return YES;
     }
     
-#if USE_GLTF
+#if USE_GLTF || USE_USD
     bool isModel = isSupportedModelFilename(filename);
     if (isModel) {
         return [self loadModelFile:filename];
@@ -2927,6 +2991,13 @@ - (BOOL)loadFile
         return NO;
     }
 
+    _containerName.clear();
+    NSURL* url = _urls[file.urlIndex];
+    if (url.hasDirectoryPath) {
+        _containerName = "folder ";
+        _containerName += toFilenameShort(url.fileSystemRepresentation);
+    }
+    
     vector<string> normalFilenames;
     string normalFilename;
     bool hasNormal = false;
@@ -3075,6 +3146,10 @@ - (BOOL)loadFileFromArchive
         return NO;
     }
 
+    NSURL* archiveURL = _urls[file.urlIndex];
+    _containerName = "archive ";
+    _containerName += toFilenameShort(archiveURL.fileSystemRepresentation);
+    
     const uint8_t* imageNormalData = nullptr;
     uint64_t imageNormalDataLength = 0;
     
@@ -3163,12 +3238,39 @@ - (BOOL)loadFileFromArchive
 
     // show/hide button
     [self updateUIAfterLoad];
-
     
     self.needsDisplay = YES;
     return YES;
 }
 
+-(void)listArchivesInFolder:(NSURL*)url archiveFiles:(vector<File>&)archiveFiles
+{
+    NSDirectoryEnumerator* directoryEnumerator =
+    [[NSFileManager defaultManager]
+     enumeratorAtURL:url
+     includingPropertiesForKeys:[NSArray array]
+     options:0
+     errorHandler:  // nil
+     ^BOOL(NSURL *urlArg, NSError *error) {
+        macroUnusedVar(urlArg);
+        macroUnusedVar(error);
+        
+        // handle error
+        return NO;
+    }];
+    
+    // only display models in folder if found, ignore the png/jpg files
+    while (NSURL* fileOrDirectoryURL = [directoryEnumerator nextObject]) {
+        const char* name = fileOrDirectoryURL.fileSystemRepresentation;
+        
+        bool isArchive = isSupportedArchiveFilename(name);
+        if (isArchive)
+        {
+            archiveFiles.push_back({string(name),0});
+        }
+    }
+}
+
 -(void)listFilesInFolder:(NSURL*)url urlIndex:(int32_t)urlIndex
 {
     NSDirectoryEnumerator* directoryEnumerator =
@@ -3247,26 +3349,59 @@ -(void)loadFilesFromUrls:(NSArray<NSURL*>*)urls
     int32_t urlIndex = 0;
     uint32_t archiveCount = 0;
     
+    NSMutableArray<NSURL*>* urlsExtracted = [NSMutableArray new];
+    
     for (NSURL* url in urls) {
         // These will flatten out to a list of files
         const char* filename = url.fileSystemRepresentation;
         
         if (archiveCount == 0 && isSupportedArchiveFilename(filename)) {
-            //uint32_t fileCount = _files.size();
             if ([self listFilesInArchive:filename urlIndex:urlIndex]) {
                 archiveCount++;
+                
+                // only add 1 archive, since only support 1 currently
+                [urlsExtracted addObject:url];
+                urlIndex++;
             }
         }
         else if (url.hasDirectoryPath) {
+            
             // this first loads only models, then textures if only those
             [self listFilesInFolder:url urlIndex:urlIndex];
+            
+            // could skip if nothing added
+            [urlsExtracted addObject:url];
+            urlIndex++;
+            
+            
+            // handle archives within folder
+            if (archiveCount == 0) {
+                vector<File> archiveFiles;
+                [self listArchivesInFolder:url archiveFiles:archiveFiles];
+            
+                for (const File& archiveFile: archiveFiles) {
+                    const char* archiveFilename = archiveFile.name.c_str();
+                    if ([self listFilesInArchive:archiveFilename urlIndex:urlIndex]) {
+                        archiveCount++;
+                        
+                        NSURL* urlArchive = [NSURL fileURLWithPath:[NSString stringWithUTF8String:archiveFilename]];
+                        [urlsExtracted addObject:urlArchive];
+                        urlIndex++;
+                        
+                        // stop search becausing only support 1 archive
+                        break;
+                    }
+
+                }
+            }
         }
         else if(isSupportedFilename(filename) ||
                 isSupportedModelFilename(filename)) {
             _files.push_back({filename, urlIndex});
+            
+            [urlsExtracted addObject:url];
+            urlIndex++;
         }
-        
-        urlIndex++;
     }
     
     // TODO: sort by urlIndex
@@ -3278,7 +3413,7 @@ -(void)loadFilesFromUrls:(NSArray<NSURL*>*)urls
     
     // preserve old file selection
 
-    _urls = urls;
+    _urls = urlsExtracted;
    
     // preserve filename before load, and restore that index, by finding
     // that name in refreshed folder list
@@ -3460,12 +3595,10 @@ -(BOOL)loadModelFile:(const char*)filename
     const char* filenameShort = toFilenameShort(filename);
     double timestamp = FileHelper::modificationTimestamp(filename);
     
-    
-    
     // This code only takes url, so construct one
-    NSURL* gltfFileURL =
+    NSURL* fileURL =
         [NSURL fileURLWithPath:[NSString stringWithUTF8String:filename]];
-    BOOL success = [renderer loadModel:gltfFileURL];
+    BOOL success = [renderer loadModel:fileURL];
     
     // TODO: split this off to a completion handler, since loadModel is async
     // and should probably also have a cancellation (or counter)

From bc3b61ad788a59e661d3157c3e8f796a3de3dffc Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 2 Oct 2022 17:40:24 -0700
Subject: [PATCH 376/901] kramv - support multiple archives

---
 kramv/KramViewerMain.mm | 144 +++++++++++++++++++++++-----------------
 1 file changed, 83 insertions(+), 61 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index eb563696..269eccbe 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -602,6 +602,14 @@ - (IBAction)showAboutDialog:(id)sender
     //, NSPasteboardURLReadingFileURLsOnlyKey: @YES
 };
 
+// This can be archive or folder
+struct FileContainer {
+    // allow zip files to be dropped and opened, and can advance through bundle
+    // content
+    ZipHelper zip;
+    MmapHelper zipMmap;
+};
+
 @implementation MyMTKView {
     NSMenu* _viewMenu;  // really the items
     NSStackView* _buttonStack;
@@ -617,18 +625,16 @@ @implementation MyMTKView {
     vector<string> _textSlots;
     ShowSettings* _showSettings;
 
-    // allow zip files to be dropped and opened, and can advance through bundle
-    // content
-    ZipHelper _zip;
-    MmapHelper _zipMmap;
     BOOL _noImageLoaded;
+    string _containerName; // folder, archive, or blank
     
     // folders and archives and multi-drop files are filled into this
     vector<File> _files;
     int32_t _fileIndex;
    
     NSArray<NSURL*>* _urls;
-    string _containerName; // folder, archive, or blank
+    // One of these per url in _urlss
+    vector<FileContainer*> _containers;
     
     Action* _actionPlay;
     Action* _actionShapeUVPreview;
@@ -2773,52 +2779,67 @@ - (BOOL)performDragOperation:(id)sender
     return NO;
 }
 
-// requires zip to be open, lists into _files
+// opens archive
+- (BOOL)openArchive:(const char *)zipFilename urlIndex:(int32_t)urlIndex
+{
+    // grow the array, how does this not destroy existing helpers though?
+    if (urlIndex > _containers.size()) {
+        _containers.resize(urlIndex + 1, nullptr);
+    }
+    
+    if (_containers[urlIndex] == nullptr)
+        _containers[urlIndex] = new FileContainer;
+    
+    FileContainer& container = *_containers[urlIndex];
+    MmapHelper& zipMmap = container.zipMmap;
+    ZipHelper& zip = container.zip;
+    
+    // close any previous zip
+    zipMmap.close();
+    
+    // open the mmap again
+    if (!zipMmap.open(zipFilename)) {
+        return NO;
+    }
+    if (!zip.openForRead(zipMmap.data(), zipMmap.dataLength())) {
+        return NO;
+    }
+    return YES;
+}
+
+// lists archive into _files
 - (BOOL)listFilesInArchive:(int32_t)urlIndex
 {
+    FileContainer& container = *_containers[urlIndex];
+    ZipHelper& zip = container.zip;
+    
     // filter out unsupported extensions
     vector<string> extensions = {
         ".ktx", ".ktx2", ".png", ".dds" // textures
-//#if USE_GLTF
+#if USE_GLTF
         // TODO: can't support these until have a loader from memory block
         // GLTFAsset requires a URL.
         //, ".glb", ".gltf" // models
-//#endif
+#endif
 #if USE_USD
         , ".usd", ".usda", ".usb"
 #endif
     };
     
-    _zip.filterExtensions(extensions);
+    container.zip.filterExtensions(extensions);
     
     // don't switch to empty archive
-    if (_zip.zipEntrys().empty()) {
+    if (zip.zipEntrys().empty()) {
         return NO;
     }
     
-    for (const auto& entry: _zip.zipEntrys()) {
+    for (const auto& entry: zip.zipEntrys()) {
         _files.push_back({string(entry.filename), urlIndex});
     }
     
     return YES;
 }
 
-// opens single archive, and lists into _files
-- (BOOL)listFilesInArchive:(const char *)zipFilename urlIndex:(int32_t)urlIndex
-{
-    _zipMmap.close();
-    
-    // open the mmap again
-    if (!_zipMmap.open(zipFilename)) {
-        return NO;
-    }
-    if (!_zip.openForRead(_zipMmap.data(), _zipMmap.dataLength())) {
-        return NO;
-    }
-    
-    return [self listFilesInArchive:urlIndex];
-}
-
 // TODO: can simplify by storing counterpart id when file list is created
 - (BOOL)hasCounterpart:(BOOL)increment {
     if (_files.size() <= 1) {
@@ -2991,6 +3012,9 @@ - (BOOL)loadFile
         return NO;
     }
 
+    // TODO: better to extract from filename instead of root of folder dropped
+    // or just keep displaying full path of filename.
+    
     _containerName.clear();
     NSURL* url = _urls[file.urlIndex];
     if (url.hasDirectoryPath) {
@@ -3116,9 +3140,11 @@ - (BOOL)loadFileFromArchive
 {
     // now lookup the filename and data at that entry
     const File& file = _files[_fileIndex];
+    FileContainer& container = *_containers[file.urlIndex];
+    ZipHelper& zip = container.zip;
     
     const char* filename = file.name.c_str();
-    const auto* entry = _zip.zipEntry(filename);
+    const auto* entry = zip.zipEntry(filename);
     string fullFilename = entry->filename;
     double timestamp = (double)entry->modificationDate;
 
@@ -3142,14 +3168,10 @@ - (BOOL)loadFileFromArchive
     uint64_t imageDataLength = 0;
 
     // search for main file - can be albedo or normal
-    if (!_zip.extractRaw(filename, &imageData, imageDataLength)) {
+    if (!zip.extractRaw(filename, &imageData, imageDataLength)) {
         return NO;
     }
 
-    NSURL* archiveURL = _urls[file.urlIndex];
-    _containerName = "archive ";
-    _containerName += toFilenameShort(archiveURL.fileSystemRepresentation);
-    
     const uint8_t* imageNormalData = nullptr;
     uint64_t imageNormalDataLength = 0;
     
@@ -3162,7 +3184,7 @@ - (BOOL)loadFileFromArchive
         findPossibleNormalMapFromAlbedoFilename(filename, normalFilenames);
      
         for (const auto& name: normalFilenames) {
-            hasNormal = _zip.extractRaw(name.c_str(), &imageNormalData,
+            hasNormal = zip.extractRaw(name.c_str(), &imageNormalData,
                                         imageNormalDataLength);
             if (hasNormal) {
                 normalFilename = name;
@@ -3226,6 +3248,10 @@ - (BOOL)loadFileFromArchive
     title += " - ";
     title += filenameShort;
 
+    NSURL* archiveURL = _urls[file.urlIndex];
+    _containerName = "archive ";
+    _containerName += toFilenameShort(archiveURL.fileSystemRepresentation);
+    
     self.window.title = [NSString stringWithUTF8String:title.c_str()];
 
     // doesn't set imageURL or update the recent document menu
@@ -3345,9 +3371,13 @@ -(void)loadFilesFromUrls:(NSArray<NSURL*>*)urls
     // Fill this out again
     _files.clear();
     
+    // clear pointers
+    for (FileContainer* container: _containers)
+        delete container;
+    _containers.clear();
+    
     // this will flatten the list
     int32_t urlIndex = 0;
-    uint32_t archiveCount = 0;
     
     NSMutableArray<NSURL*>* urlsExtracted = [NSMutableArray new];
     
@@ -3355,14 +3385,12 @@ -(void)loadFilesFromUrls:(NSArray<NSURL*>*)urls
         // These will flatten out to a list of files
         const char* filename = url.fileSystemRepresentation;
         
-        if (archiveCount == 0 && isSupportedArchiveFilename(filename)) {
-            if ([self listFilesInArchive:filename urlIndex:urlIndex]) {
-                archiveCount++;
-                
-                // only add 1 archive, since only support 1 currently
-                [urlsExtracted addObject:url];
-                urlIndex++;
-            }
+        if (isSupportedArchiveFilename(filename) &&
+            [self openArchive:filename urlIndex:urlIndex] &&
+            [self listFilesInArchive:urlIndex])
+        {
+            [urlsExtracted addObject:url];
+            urlIndex++;
         }
         else if (url.hasDirectoryPath) {
             
@@ -3373,26 +3401,20 @@ -(void)loadFilesFromUrls:(NSArray<NSURL*>*)urls
             [urlsExtracted addObject:url];
             urlIndex++;
             
-            
             // handle archives within folder
-            if (archiveCount == 0) {
-                vector<File> archiveFiles;
-                [self listArchivesInFolder:url archiveFiles:archiveFiles];
-            
-                for (const File& archiveFile: archiveFiles) {
-                    const char* archiveFilename = archiveFile.name.c_str();
-                    if ([self listFilesInArchive:archiveFilename urlIndex:urlIndex]) {
-                        archiveCount++;
-                        
-                        NSURL* urlArchive = [NSURL fileURLWithPath:[NSString stringWithUTF8String:archiveFilename]];
-                        [urlsExtracted addObject:urlArchive];
-                        urlIndex++;
-                        
-                        // stop search becausing only support 1 archive
-                        break;
-                    }
-
+            vector<File> archiveFiles;
+            [self listArchivesInFolder:url archiveFiles:archiveFiles];
+        
+            for (const File& archiveFile: archiveFiles) {
+                const char* archiveFilename = archiveFile.name.c_str();
+                if ([self openArchive:archiveFilename urlIndex:urlIndex] &&
+                    [self listFilesInArchive:urlIndex]) {
+                    
+                    NSURL* urlArchive = [NSURL fileURLWithPath:[NSString stringWithUTF8String:archiveFilename]];
+                    [urlsExtracted addObject:urlArchive];
+                    urlIndex++;
                 }
+
             }
         }
         else if(isSupportedFilename(filename) ||

From 45d8acfb81d1f736068477fc89b71d2de10dbefe Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 2 Oct 2022 17:58:15 -0700
Subject: [PATCH 377/901] kramv - fix container name for files/folders

Note the archives can have folders too.  So may want to set that similarly.
---
 kramv/KramViewerMain.mm | 52 ++++++++++++++++++++++++-----------------
 1 file changed, 30 insertions(+), 22 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 269eccbe..c2bf6e2a 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -626,7 +626,7 @@ @implementation MyMTKView {
     ShowSettings* _showSettings;
 
     BOOL _noImageLoaded;
-    string _containerName; // folder, archive, or blank
+    string _archiveName; // archive or blank
     
     // folders and archives and multi-drop files are filled into this
     vector<File> _files;
@@ -2216,7 +2216,30 @@ - (void)updateHudVisibility
     _hudLabel2.hidden = _hudHidden || !_showSettings->isHudShown;
 }
 
+- (void)setLoadedText:(string&)text
+{
+    text = "Loaded ";
+
+    string filename = _showSettings->lastFilename;
+    text += toFilenameShort(filename.c_str());
+
+    // archives and file systems have folders, split that off
+    string folderName;
+    const char* slashPos = strrchr(filename.c_str(), '/');
+    if (slashPos != nullptr) {
+        folderName = filename.substr(0, slashPos - filename.c_str());
+    }
 
+    if (!folderName.empty()) {
+        text += " in folder ";
+        text += folderName;
+    }
+
+    if (!_archiveName.empty()) {
+        text += " from archive ";
+        text += _archiveName;
+    }
+}
 
 - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyDown
 {
@@ -2606,12 +2629,8 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
                 [self setEyedropperText:""];
                 
                 isChanged = true;
-                text = "Loaded ";
-                text += toFilenameShort(_showSettings->lastFilename.c_str());
-                if (!_containerName.empty()) {
-                    text += " in ";
-                    text += _containerName;
-                }
+                
+                [self setLoadedText:text];
             }
         }
     }
@@ -2630,12 +2649,7 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
                     
                     isChanged = true;
                     
-                    text = "Loaded ";
-                    text += toFilenameShort(_showSettings->lastFilename.c_str());
-                    if (!_containerName.empty()) {
-                        text += " in ";
-                        text += _containerName;
-                    }
+                    [self setLoadedText:text];
                 }
             }
         }
@@ -3012,15 +3026,10 @@ - (BOOL)loadFile
         return NO;
     }
 
-    // TODO: better to extract from filename instead of root of folder dropped
+    // Note: better to extract from filename instead of root of folder dropped
     // or just keep displaying full path of filename.
     
-    _containerName.clear();
-    NSURL* url = _urls[file.urlIndex];
-    if (url.hasDirectoryPath) {
-        _containerName = "folder ";
-        _containerName += toFilenameShort(url.fileSystemRepresentation);
-    }
+    _archiveName.clear();
     
     vector<string> normalFilenames;
     string normalFilename;
@@ -3249,8 +3258,7 @@ - (BOOL)loadFileFromArchive
     title += filenameShort;
 
     NSURL* archiveURL = _urls[file.urlIndex];
-    _containerName = "archive ";
-    _containerName += toFilenameShort(archiveURL.fileSystemRepresentation);
+    _archiveName = toFilenameShort(archiveURL.fileSystemRepresentation);
     
     self.window.title = [NSString stringWithUTF8String:title.c_str()];
 

From d010b11f943254e236a34ff1ccc958dc3e916afd Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 9 Oct 2022 13:12:54 -0700
Subject: [PATCH 378/901] kramv - add atlas support

fix strchr -> strrchr
adding in diff
split off srgb src and dst
---
 kramv/KramRenderer.mm           |  70 ++++++++-
 kramv/KramViewerBase.cpp        |  48 ++++++
 kramv/KramViewerBase.h          |  13 ++
 kramv/KramViewerMain.mm         | 261 +++++++++++++++++++-------------
 kramv/Shaders/KramShaders.h     |   5 +
 kramv/Shaders/KramShaders.metal |  50 +++++-
 libkram/kram/Kram.cpp           |   9 +-
 libkram/kram/KramImage.cpp      |  39 +++--
 libkram/kram/KramImageInfo.cpp  |  50 +++---
 libkram/kram/KramImageInfo.h    |  23 +--
 libkram/kram/KramMipper.cpp     |  20 ++-
 tests/src/AtlasTest-a.png       |   3 +
 tests/src/AtlasTest-atlas.json  |  24 +++
 13 files changed, 448 insertions(+), 167 deletions(-)
 create mode 100644 tests/src/AtlasTest-a.png
 create mode 100644 tests/src/AtlasTest-atlas.json

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 06333fe8..16de0c63 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -125,6 +125,8 @@ @implementation Renderer {
     id<MTLRenderPipelineState> _pipelineStateCubeArray;
     id<MTLRenderPipelineState> _pipelineStateVolume;
 
+    id<MTLRenderPipelineState> _pipelineStateDrawLines;
+    
     id<MTLComputePipelineState> _pipelineState1DArrayCS;
     id<MTLComputePipelineState> _pipelineStateImageCS;
     id<MTLComputePipelineState> _pipelineStateImageArrayCS;
@@ -135,6 +137,7 @@ @implementation Renderer {
     id<MTLDepthStencilState> _depthStateFull;
     id<MTLDepthStencilState> _depthStateNone;
 
+    
     MTLVertexDescriptor* _mtlVertexDescriptor;
 
     // TODO: Array< id<MTLTexture> > _textures;
@@ -581,6 +584,10 @@ - (void)_createRenderPipelines
                                                        fs:"DrawCubeArrayPS"];
     _pipelineStateVolume = [self _createRenderPipeline:"DrawVolumeVS"
                                                     fs:"DrawVolumePS"];
+    
+    _pipelineStateDrawLines = [self _createRenderPipeline:"DrawLinesVS"
+                                                       fs:"DrawLinesPS"];
+     
 }
 
 - (void)_createSampleRender
@@ -2007,6 +2014,7 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
                 for (int32_t mip = 0; mip < _showSettings->mipCount; ++mip) {
                     // upload this on each face drawn, since want to be able to draw all
                     // mips/levels at once
+                    
                     [self _setUniformsLevel:uniformsLevel mipLOD:mip];
 
                     if (mip == 0) {
@@ -2021,6 +2029,8 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
                     int32_t numLevels = _showSettings->totalChunks();
 
                     for (int32_t level = 0; level < numLevels; ++level) {
+                        [renderEncoder pushDebugGroup:@"DrawLevel"];
+
                         if (isCube) {
                             uniformsLevel.face = level % 6;
                             uniformsLevel.arrayOrSlice = level / 6;
@@ -2091,7 +2101,6 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
                 // mips on on screen faces and arrays and slices go across in a row, and
                 // mips are displayed down from each of those in a column
 
-                
                 for (MTKSubmesh* submesh in _mesh.submeshes) {
                     [renderEncoder drawIndexedPrimitives:submesh.primitiveType
                                               indexCount:submesh.indexCount
@@ -2104,7 +2113,8 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
                 if (_showSettings->is3DView && _showSettings->uvPreview > 0.0) {
                     // need to force color in shader or it's still sampling texture
                     // also need to add z offset
-                    
+                    [renderEncoder pushDebugGroup:@"DrawUVPreview"];
+
                     [renderEncoder setTriangleFillMode:MTLTriangleFillModeLines];
                     
                     // only applies to tris, not points/lines, pushes depth away (towards 0), after clip
@@ -2137,7 +2147,61 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
                     
                     [renderEncoder setTriangleFillMode:MTLTriangleFillModeFill];
                     
+                    [renderEncoder popDebugGroup];
+
+                }
+                
+                // draw last since this changes pipeline state
+                if (!_showSettings->is3DView && !_showSettings->atlas.empty()) { // && _showSettings->drawAtlas) {
+                    [renderEncoder pushDebugGroup:@"DrawAtlas"];
+                    
+                    [renderEncoder setTriangleFillMode:MTLTriangleFillModeLines];
+                
+                    //[renderEncoder setDepthBias:0.015 slopeScale:3.0 clamp: 0.02];
+                    
+                    [renderEncoder setCullMode:MTLCullModeNone];
+                    
+                    [renderEncoder setRenderPipelineState:_pipelineStateDrawLines];
+                    
+                    // TODO: draw line strip with prim reset
+                    // need atlas data in push constants or in vb
+                    
+                    // TOOO: also need to hover name or show names on canvas
+                    
+                    [renderEncoder setVertexBytes:&uniformsLevel
+                                           length:sizeof(uniformsLevel)
+                                          atIndex:BufferIndexUniformsLevel];
+
+                    UniformsDebug uniformsDebug;
+                    
+                    for (const Atlas& atlas: _showSettings->atlas) {
+                        // not accounting for slice
+                        uniformsDebug.rect = float4m(atlas.x, atlas.y, atlas.w, atlas.h);
+                        
+                        
+                        [renderEncoder setVertexBytes:&uniformsDebug
+                                               length:sizeof(uniformsDebug)
+                                              atIndex:BufferIndexUniformsDebug];
+                        
+                        // this will draw diagonal
+                        for (MTKSubmesh* submesh in _mesh.submeshes) {
+                            [renderEncoder drawIndexedPrimitives:submesh.primitiveType
+                                                      indexCount:submesh.indexCount
+                                                       indexType:submesh.indexType
+                                                     indexBuffer:submesh.indexBuffer.buffer
+                                               indexBufferOffset:submesh.indexBuffer.offset];
+                        }
+                    }
+                    
+                    // restore state, even though this isn't a true state shadow
+                    //[renderEncoder setDepthBias:0.0 slopeScale:0.0 clamp:0.0];
+                    
+                    // restore
+                    [renderEncoder setTriangleFillMode:MTLTriangleFillModeFill];
+                    
+                    [renderEncoder popDebugGroup];
                 }
+                
             }
         }
 
@@ -2146,7 +2210,7 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
 
     [renderEncoder endEncoding];
 
-    // TODO: run any post-processing on each texture visible as fsw
+    // TODO: run any post-processing on each texture visible as fsq
     // TODO: environment map preview should be done as fsq
 }
 
diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index 54ae5d75..34222dff 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -345,6 +345,54 @@ void printChannels(string &tmp, const string &label, float4 c,
     }
 }
 
+string ShowSettings::windowTitleString(const char* filename) const
+{    
+    // set title to filename, chop this to just file+ext, not directory
+    const char* filenameShort = strrchr(filename, '/');
+    if (filenameShort == nullptr) {
+        filenameShort = filename;
+    }
+    else {
+        filenameShort += 1;
+    }
+    
+    string title = "kramv - ";
+    
+    if (isModel) {
+        title += formatTypeName(originalFormat);
+        title += " - ";
+        title += filenameShort;
+    }
+    else {
+        // was using subtitle, but that's macOS 11.0 feature.
+        title += formatTypeName(originalFormat);
+        title += " - ";
+        
+        // identify what we think the content type is
+        const char* typeText = "";
+        switch(texContentType) {
+            case TexContentTypeAlbedo: typeText = "a"; break;
+            case TexContentTypeNormal: typeText = "n"; break;
+            case TexContentTypeAO: typeText = "ao"; break;
+            case TexContentTypeMetallicRoughness: typeText = "mr"; break;
+            case TexContentTypeSDF: typeText = "sdf"; break;
+            case TexContentTypeHeight: typeText = "h"; break;
+            case TexContentTypeUnknown: typeText = ""; break;
+        }
+        title += typeText;
+        // add some info about the texture to avoid needing to go to info
+        // srgb src would be useful too.
+        if (texContentType == TexContentTypeAlbedo && isPremul) {
+            title += ",p";
+            
+        }
+        title += " - ";
+        title += filenameShort;
+    }
+    
+    return title;
+}
+
 float4x4 matrix4x4_translation(float tx, float ty, float tz)
 {
     float4x4 m = {(float4){1, 0, 0, 0},
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index f9a0ec86..57dae4f0 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -75,6 +75,15 @@ enum LightingMode {
     LightingModeCount,
 };
 
+struct Atlas
+{
+    string name;
+    float x,y,w,h;
+    float u,v; // padding - to both or just left or right?
+    bool isVertical;
+    uint32_t level;
+};
+
 class ShowSettings {
 public:
     // Can mask various channels (r/g/b/a only, vs. all), may also add toggle of
@@ -230,6 +239,8 @@ class ShowSettings {
     MyMTLPixelFormat originalFormat;
     MyMTLPixelFormat decodedFormat;
 
+    string windowTitleString(const char* filename) const;
+
     void advanceMeshNumber(bool decrement);
     void advanceDebugMode(bool decrement);
     void advanceShapeChannel(bool decrement);
@@ -264,6 +275,8 @@ class ShowSettings {
 
     int32_t meshNumber = 0;
     int32_t meshCount = 5;
+    
+    vector<Atlas> atlas;
 };
 
 float4x4 matrix4x4_translation(float tx, float ty, float tz);
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index c2bf6e2a..1529515f 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -45,7 +45,7 @@
 
 static string filenameNoExtension(const char* filename)
 {
-    const char* dotPosStr = strchr(filename, '.');
+    const char* dotPosStr = strrchr(filename, '.');
     if (dotPosStr == nullptr)
         return filename;
     auto dotPos = dotPosStr - filename;
@@ -63,7 +63,7 @@ static void findPossibleNormalMapFromAlbedoFilename(const char* filename, vector
     
     const char* ext = strrchr(filename, '.');
 
-    const char* dotPosStr = strchr(filenameShort.c_str(), '.');
+    const char* dotPosStr = strrchr(filenameShort.c_str(), '.');
     if (dotPosStr == nullptr)
         return;
     
@@ -150,7 +150,6 @@ bool isSupportedArchiveFilename(const char* filename) {
     NSPoint pan;
 };
 
-
 //-------------
 
 enum Key {
@@ -218,7 +217,8 @@ bool isSupportedArchiveFilename(const char* filename) {
 // This makes dealing with ui much simpler
 class Action {
 public:
-    Action(const char* icon_, const char* tip_, Key keyCode_): icon(icon_), tip(tip_), keyCode(keyCode_) {}
+    Action(const char* icon_, const char* tip_, Key keyCode_)
+        : icon(icon_), tip(tip_), keyCode(keyCode_) {}
     
     const char* icon;
     const char* tip;
@@ -584,15 +584,18 @@ - (IBAction)showAboutDialog:(id)sender
   
 #if USE_GLTF
   [UTType typeWithFilenameExtension: @"gltf"].identifier,
-  [UTType typeWithFilenameExtension: @"glb"].identifier
+  [UTType typeWithFilenameExtension: @"glb"].identifier,
   //@"model/gltf+json",
   //@"model/gltf+binary"
 #endif
 #if USE_USD
   [UTType typeWithFilenameExtension: @"usd"].identifier,
   [UTType typeWithFilenameExtension: @"usd"].identifier,
-  [UTType typeWithFilenameExtension: @"usda"].identifier
+  [UTType typeWithFilenameExtension: @"usda"].identifier,
 #endif
+  
+  // read -atlas.json files
+  [UTType typeWithFilenameExtension: @"json"].identifier
 ];
 NSDictionary* pasteboardOptions = @{
     // This means only these uti can be droped.
@@ -602,10 +605,14 @@ - (IBAction)showAboutDialog:(id)sender
     //, NSPasteboardURLReadingFileURLsOnlyKey: @YES
 };
 
-// This can be archive or folder
+// This is an open archive
 struct FileContainer {
     // allow zip files to be dropped and opened, and can advance through bundle
-    // content
+    // content.
+    
+    // TODO: Add FileHelper if acrhive file is networked, but would require
+    // full load to memory.
+    
     ZipHelper zip;
     MmapHelper zipMmap;
 };
@@ -716,7 +723,7 @@ - (instancetype)initWithCoder:(NSCoder *)coder
 
     _showSettings = new ShowSettings;
 
-    self.clearColor = MTLClearColorMake(0.0f, 0.0f, 0.0f, 0.0f);
+    self.clearColor = MTLClearColorMake(0.005f, 0.005f, 0.005f, 0.0f);
 
     self.clearDepth = _showSettings->isReverseZ ? 0.0f : 1.0f;
 
@@ -823,7 +830,7 @@ - (NSStackView *)_addButtons
 
         Action("", "", Key::A), // sep
 
-        Action(" ", "Play", Key::Space), // TODO: really need icon on this
+        Action(" ", "Play", Key::Space),
         Action("6", "Shape UVPreview", Key::Num6),
         Action("S", "Shape", Key::S),
         Action("C", "Shape Channel", Key::C),
@@ -2793,6 +2800,64 @@ - (BOOL)performDragOperation:(id)sender
     return NO;
 }
 
+// TODO: convert to using a C++ json lib like yyJson or simdJson
+// Then can move into libkram, and embed in the ktx/ktx2 metadata.
+- (BOOL)loadAtlasFile:(const char*)filename
+{
+    NSURL* url = [NSURL fileURLWithPath:[NSString stringWithUTF8String:filename]];
+    NSData* assetData = [NSData dataWithContentsOfURL:url];
+    
+    NSError* error = nil;
+    NSDictionary* rootObject = [NSJSONSerialization JSONObjectWithData:assetData options:NSJSONReadingFragmentsAllowed error:&error];
+    
+    if (error != nil) {
+        // TODO: avoid NSLog
+        NSLog(@"%@", error);
+        return NO;
+    }
+    
+    // Can use hover or a show all on these entries and names.
+    // Draw names on screen using system text in the upper left corner if 1
+    // if showing all, then show names across each mip level.  May want to
+    // snap to pixels on each mip level so can see overlap.
+    
+    _showSettings->atlas.clear();
+    
+    for (NSDictionary* atlasProps in rootObject)
+    {
+        const char* name = [atlasProps[@"name"] UTF8String];
+        
+        // Note: could convert pixel and mip0 size to uv.
+        // normalized uv make these easier to draw across all mips
+        float x = [atlasProps[@"x"] floatValue];
+        float y = [atlasProps[@"y"] floatValue];
+        float w = [atlasProps[@"w"] floatValue];
+        float h = [atlasProps[@"h"] floatValue];
+    
+        // optional
+        // optional uv padding - need two values for non-square
+        // could be inherited by all elements to avoid redundancy
+        // optional horizontal and vertical orient
+        // optional props for 2d arrays
+        
+        float uPad = [atlasProps[@"u"] floatValue];
+        float vPad = [atlasProps[@"v"] floatValue];
+        const char* verticalProp = [atlasProps[@"o"] UTF8String];
+        int slice = [atlasProps[@"slice"] intValue];
+
+        bool isVertical = verticalProp && verticalProp[0] == 't';
+        
+        Atlas atlas = {name, x,y, w,h, uPad,vPad, isVertical, (uint32_t)slice};
+        _showSettings->atlas.emplace_back(std::move(atlas));
+    }
+    
+    // TODO: also need to be able to bring in vector shapes
+    // maybe from svg or files written out from figma or photoshop.
+    // Can triangulate those, and use xatlas to pack those.
+    
+    return YES;
+}
+
 // opens archive
 - (BOOL)openArchive:(const char *)zipFilename urlIndex:(int32_t)urlIndex
 {
@@ -2987,8 +3052,6 @@ - (BOOL)findFilename:(const string&)filename
     return isFound;
 }
 
-
-
 - (BOOL)isArchive
 {
     NSURL* url = _urls[_files[_fileIndex].urlIndex];
@@ -3004,7 +3067,7 @@ - (BOOL)loadFile
 
     // now lookup the filename and data at that entry
     const File& file = _files[_fileIndex];
-    const char* filename = _files[_fileIndex].name.c_str();
+    const char* filename = file.name.c_str();
    
     string fullFilename = filename;
     auto timestamp = FileHelper::modificationTimestamp(filename);
@@ -3031,15 +3094,15 @@ - (BOOL)loadFile
     
     _archiveName.clear();
     
-    vector<string> normalFilenames;
+    vector<string> possibleNormalFilenames;
     string normalFilename;
     bool hasNormal = false;
 
     TexContentType texContentType = findContentTypeFromFilename(filename);
     if (texContentType == TexContentTypeAlbedo) {
-        findPossibleNormalMapFromAlbedoFilename(filename, normalFilenames);
+        findPossibleNormalMapFromAlbedoFilename(filename, possibleNormalFilenames);
      
-       for (const auto& name: normalFilenames) {
+       for (const auto& name: possibleNormalFilenames) {
             hasNormal = [self findFilename:name];
             
             if (hasNormal) {
@@ -3049,9 +3112,47 @@ - (BOOL)loadFile
         }
     }
     
-    // TODO: if it's a compressed file, then set a diff target if a corresponding png
+    // see if there is an atlas file too, and load the rectangles for preview
+    // note sidecar atlas files are a pain to view with a sandbox, may want to
+    // splice into ktx/ktx2 files, but no good metadata for png/dds.
+    _showSettings->atlas.clear();
+    
+    string atlasFilename = filenameNoExtension(filename);
+    bool hasAtlas = false;
+    
+    // replace -a, -d, with -atlas.json
+    const char* dashPosStr = strrchr(atlasFilename.c_str(), '-');
+    if (dashPosStr != nullptr) {
+        atlasFilename = atlasFilename.substr(0, dashPosStr - atlasFilename.c_str());
+    }
+    atlasFilename += "-atlas.json";
+    if ( [self findFilename:atlasFilename.c_str()]) {
+        if ([self loadAtlasFile:atlasFilename.c_str()]) {
+            hasAtlas = true;
+        }
+    }
+    if (!hasAtlas) {
+        atlasFilename.clear();
+    }
+    
+    // if it's a compressed file, then set a diff target if a corresponding png
     // is found.  Eventually see if a src dds/ktx/ktx2 exists.  Want to stop
-    // using png as source images.
+    // using png as source images.  Note png don't have custom mips, unless
+    // flattened to one image.  So have to fabricate mips here.
+    
+    string diffFilename = filenameNoExtension(filename);
+    bool hasDiff = false;
+    
+    diffFilename += ".png";
+    if ( diffFilename != filename && [self findFilename:diffFilename.c_str()]) {
+        // TODO: defer load until diff enabled
+        //if ([self loadDiffFile:diffFilename.c_str()]) {
+            hasDiff = true;
+        //}
+    }
+    if (!hasDiff) {
+        diffFilename.clear();
+    }
     
     //-------------------------------
 
@@ -3093,41 +3194,7 @@ - (BOOL)loadFile
 
     //-------------------------------
 
-    // set title to filename, chop this to just file+ext, not directory
-    const char* filenameShort = strrchr(filename, '/');
-    if (filenameShort == nullptr) {
-        filenameShort = filename;
-    }
-    else {
-        filenameShort += 1;
-    }
-
-    // was using subtitle, but that's macOS 11.0 feature.
-    string title = "kramv - ";
-    title += formatTypeName(_showSettings->originalFormat);
-    title += " - ";
-    
-    // identify what we think the content type is
-    const char* typeText = "";
-    switch(_showSettings->texContentType) {
-        case TexContentTypeAlbedo: typeText = "a"; break;
-        case TexContentTypeNormal: typeText = "n"; break;
-        case TexContentTypeAO: typeText = "ao"; break;
-        case TexContentTypeMetallicRoughness: typeText = "mr"; break;
-        case TexContentTypeSDF: typeText = "sdf"; break;
-        case TexContentTypeHeight: typeText = "h"; break;
-        case TexContentTypeUnknown: typeText = ""; break;
-    }
-    title += typeText;
-    // add some info about the texture to avoid needing to go to info
-    // srgb src would be useful too.
-    if (_showSettings->texContentType == TexContentTypeAlbedo && _showSettings->isPremul) {
-        title += ",p";
-        
-    }
-    title += " - ";
-    title += filenameShort;
-
+    string title = _showSettings->windowTitleString(filename);
     self.window.title = [NSString stringWithUTF8String:title.c_str()];
 
     // doesn't set imageURL or update the recent document menu
@@ -3241,25 +3308,11 @@ - (BOOL)loadFileFromArchive
     }
 
     //---------------------------------
-
-    // set title to filename, chop this to just file+ext, not directory
-    const char* filenameShort = strrchr(filename, '/');
-    if (filenameShort == nullptr) {
-        filenameShort = filename;
-    }
-    else {
-        filenameShort += 1;
-    }
-
-    // was using subtitle, but that's macOS 11.0 feature.
-    string title = "kramv - ";
-    title += formatTypeName(_showSettings->originalFormat);
-    title += " - ";
-    title += filenameShort;
-
+    
     NSURL* archiveURL = _urls[file.urlIndex];
     _archiveName = toFilenameShort(archiveURL.fileSystemRepresentation);
     
+    string title = _showSettings->windowTitleString(filename);
     self.window.title = [NSString stringWithUTF8String:title.c_str()];
 
     // doesn't set imageURL or update the recent document menu
@@ -3317,54 +3370,38 @@ -(void)listFilesInFolder:(NSURL*)url urlIndex:(int32_t)urlIndex
         macroUnusedVar(urlArg);
         macroUnusedVar(error);
         
-        // handle error
+        // handle error - don't change to folder if devoid of valid content
         return NO;
     }];
     
-#if USE_GLTF
-    bool foundModel = false;
-    // only display models in folder if found, ignore the png/jpg files
     while (NSURL* fileOrDirectoryURL = [directoryEnumerator nextObject]) {
         const char* name = fileOrDirectoryURL.fileSystemRepresentation;
         
-        bool isModel = isSupportedModelFilename(name);
-        if (isModel)
-        {
-            _files.push_back({string(name),urlIndex});
-            foundModel = true;
+        bool isValid = isSupportedFilename(name);
+        
+#if USE_GLTF || USE_USD
+        // note: many gltf reference jpg which will load via GltfAsset, but
+        // kram and kramv do not import jpg files.
+        if (!isValid) {
+            isValid = isSupportedModelFilename(name);
         }
-    }
 #endif
-    
-    // don't change to this folder if it's devoid of content
-    if (!foundModel) {
-#if USE_GLTF
-        // reset the enumerator
-        directoryEnumerator =
-        [[NSFileManager defaultManager]
-         enumeratorAtURL:url
-         includingPropertiesForKeys:[NSArray array]
-         options:0
-         errorHandler:  // nil
-         ^BOOL(NSURL* urlArg, NSError* error) {
-            macroUnusedVar(urlArg);
-            macroUnusedVar(error);
-            
-            // handle error
-            return NO;
-        }];
-#endif
-        while (NSURL* fileOrDirectoryURL = [directoryEnumerator nextObject]) {
-            const char* name = fileOrDirectoryURL.fileSystemRepresentation;
-            
-            if (isSupportedFilename(name))
-            {
-                _files.push_back({name, urlIndex});
-            }
+        
+        if (!isValid) {
+            isValid = isSupportedJsonFilename(name);
+        }
+        if (isValid) {
+            _files.push_back({name,urlIndex});
         }
     }
 }
 
+bool isSupportedJsonFilename(const char* filename)
+{
+    return endsWith(filename, "-atlas.json");
+}
+
+    
 -(void)loadFilesFromUrls:(NSArray<NSURL*>*)urls
 {
     // Using a member for archives, so limited to one archive in a drop
@@ -3425,13 +3462,23 @@ -(void)loadFilesFromUrls:(NSArray<NSURL*>*)urls
 
             }
         }
-        else if(isSupportedFilename(filename) ||
-                isSupportedModelFilename(filename)) {
+        else if (isSupportedFilename(filename)
+#if USE_GLTF
+                 || isSupportedModelFilename(filename)
+#endif
+            ) {
+            _files.push_back({filename, urlIndex});
+            
+            [urlsExtracted addObject:url];
+            urlIndex++;
+        }
+        else if (isSupportedJsonFilename(filename)) {
             _files.push_back({filename, urlIndex});
             
             [urlsExtracted addObject:url];
             urlIndex++;
         }
+    
     }
     
     // TODO: sort by urlIndex
diff --git a/kramv/Shaders/KramShaders.h b/kramv/Shaders/KramShaders.h
index 282a8d57..ddd300f3 100644
--- a/kramv/Shaders/KramShaders.h
+++ b/kramv/Shaders/KramShaders.h
@@ -27,6 +27,7 @@ typedef NS_ENUM(int32_t, BufferIndex) {
 
     BufferIndexUniforms = 16,
     BufferIndexUniformsLevel = 17,
+    BufferIndexUniformsDebug = 18,
 
     // for compute
     BufferIndexUniformsCS = 16,
@@ -185,4 +186,8 @@ struct UniformsCS {
     uint32_t mipLOD;
 };
 
+struct UniformsDebug {
+    simd::float4 rect;
+};
+    
 #endif
diff --git a/kramv/Shaders/KramShaders.metal b/kramv/Shaders/KramShaders.metal
index 4388f7ec..466df782 100644
--- a/kramv/Shaders/KramShaders.metal
+++ b/kramv/Shaders/KramShaders.metal
@@ -608,7 +608,6 @@ ColorInOut DrawImageFunc(
         uniforms.shapeChannel == ShaderShapeChannel::ShShapeChannelBitangent;
 
     float4 position = in.position;
-    //position.xy += uniformsLevel.drawOffset;
     float3 normal = in.normal;
     float4 tangent = in.tangent;
     
@@ -1442,7 +1441,6 @@ fragment float4 DrawVolumePS(
 
 //--------------------------------------------------
 
-
 /* not using this yet, need a fsq and some frag coord to sample the normal map at discrete points
  
 // https://www.shadertoy.com/view/4s23DG
@@ -1645,4 +1643,52 @@ kernel void SampleVolumeCS(
     result.write(color, index);
 }
 
+//--------------------------------------------------
+
+// TODO: use instancing to draw these, since to avoid diagonal
+// have to use line strip and prim reset, or 4 line segs.
+struct VertexLinesInput
+{
+    //float4 position [[attribute(VertexAttributePosition)]];
+    float2 texCoord [[attribute(VertexAttributeTexcoord)]];
+};
+
+struct VertexLinesOutput
+{
+    float4 position [[position]];
+};
 
+vertex VertexLinesOutput DrawLinesVS(
+    VertexLinesInput in [[stage_in]],
+    constant Uniforms& uniforms [[ buffer(BufferIndexUniforms) ]],
+    constant UniformsLevel& uniformsLevel [[ buffer(BufferIndexUniformsLevel) ]],
+    constant UniformsDebug& uniformsDebug [[ buffer(BufferIndexUniformsDebug) ]])
+{
+    float4 rect = uniformsDebug.rect;
+    
+    // this reverses orient when applied to meshRect,
+    // due to using uv
+    float2 pos = (rect.xy + in.texCoord * rect.zw) - float2(0.5);
+    pos.y = -pos.y;
+    
+    float4 worldPos = uniforms.modelMatrix * float4(pos, 0.0, 1.0);
+    
+    // try adding pixel offset to pixel values
+    worldPos.xy += uniformsLevel.drawOffset;
+    
+    VertexLinesOutput out;
+    out.position = uniforms.projectionViewMatrix * worldPos;
+    
+    // bias it
+    // use hw bias instead, but image is at 0.9993
+    // and this will be at 0.9994 which is closer w/reverseZ
+    out.position.z += 0.0001;
+    
+    return out;
+}
+
+fragment float4 DrawLinesPS(
+    VertexLinesOutput in [[stage_in]])
+{
+    return float4(1.0);
+}
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index d0df1d4e..c8af01ed 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -1224,7 +1224,7 @@ string formatInputAndOutput(int32_t testNumber, const char* srcFilename, MyMTLPi
 
     // replace png with ktx
     dst = srcFilename;
-    const char* extSeparatorStr = strchr(dst.c_str(), '.');
+    const char* extSeparatorStr = strrchr(dst.c_str(), '.');
     assert(extSeparatorStr != nullptr);
     size_t extSeparator = extSeparatorStr - dst.c_str();
     dst.erase(extSeparator);
@@ -3222,11 +3222,8 @@ int32_t kramAppScript(vector<const char*>& args)
             break;
         }
 
-        if (false) {
-            // TODO: add jobs count
-        }
-        else if (isStringEqual(word, "-input") ||
-                 isStringEqual(word, "-i")) {
+        if (isStringEqual(word, "-input") ||
+            isStringEqual(word, "-i")) {
             ++i;
             if (i >= argc) {
                 KLOGE("Kram", "no input file defined");
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index 3cda6865..42b4060d 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -1462,7 +1462,7 @@ void KramEncoder::addBaseProps(const ImageInfo& info, KTXImage& dstImage) const
     if (info.isNormal) {
         dstImage.addChannelProps("Nrm.x,Nrm.y,X,X");
     }
-    else if (info.isSRGB) {
+    else if (info.isSRGBDst) {
         // !hasAlpha doesn't change the channel designation
         if (info.isPremultiplied) {
             dstImage.addChannelProps("Alb.ra,Alb.ga,Alb.ba,Alb.a");
@@ -2010,7 +2010,7 @@ bool KramEncoder::createMipsFromChunks(
     srcImage.height = data.chunkHeight;
 
     // KramMipper uses these
-    srcImage.isSRGB = info.isSRGB;
+    srcImage.isSRGB = info.isSRGBSrc;
     srcImage.isHDR = info.isHDR;
 
     int32_t w = srcImage.width;
@@ -2082,7 +2082,7 @@ bool KramEncoder::createMipsFromChunks(
         }
 
         // used to store premul and linear color
-        if (info.isSRGB || doPremultiply) {
+        if (info.isSRGBSrc || doPremultiply) {
             halfImage.resize(w * h);
 
             // so large mips even if clamped with -mipmax allocate to largest mip size (2k x 2k @16 = 64MB)
@@ -2168,8 +2168,10 @@ bool KramEncoder::createMipsFromChunks(
         }
 
         // doing in-place mips
+        // could be reading in srgb gray, and writing out bc4 unorm
         ImageData dstImageData = srcImage;
-
+        dstImageData.isSRGB = isSrgbFormat(info.pixelFormat);
+        
         //----------------------------------------------
 
         // build mips for the chunk, dropping mips as needed, but downsampling
@@ -2209,8 +2211,11 @@ bool KramEncoder::createMipsFromChunks(
                     mipper.mipmap(srcImage, dstImageData);
 
                     // dst becomes src for next in-place mipmap
+                    // preserve the isSRGB state
+                    bool isSRGBSrc = srcImage.isSRGB;
                     srcImage = dstImageData;
-
+                    srcImage.isSRGB = isSRGBSrc;
+                    
                     w = dstImageData.width;
                     h = dstImageData.height;
                 }
@@ -2610,6 +2615,10 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
                     //                        doPrintBlock = true;
                     //                    }
 
+                    // could tie to quality parameter, high quality uses the two
+                    // modes of bc3/4/5.
+                    bool useHighQuality = true;
+                    
                     switch (info.pixelFormat) {
                         case MyMTLPixelFormatBC1_RGBA:
                         case MyMTLPixelFormatBC1_RGBA_sRGB: {
@@ -2619,20 +2628,28 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
                         }
                         case MyMTLPixelFormatBC3_RGBA:
                         case MyMTLPixelFormatBC3_RGBA_sRGB: {
-                            rgbcx::encode_bc3(bc3QualityLevel, dstBlock,
-                                              srcPixelCopy);
+                            if (useHighQuality)
+                                rgbcx::encode_bc3_hq(bc3QualityLevel, dstBlock, srcPixelCopy);
+                            else
+                                rgbcx::encode_bc3(bc3QualityLevel, dstBlock, srcPixelCopy);
                             break;
                         }
 
                         case MyMTLPixelFormatBC4_RUnorm:
                         case MyMTLPixelFormatBC4_RSnorm: {
-                            rgbcx::encode_bc4(dstBlock, srcPixelCopy);
+                            if (useHighQuality)
+                                rgbcx::encode_bc4_hq(dstBlock, srcPixelCopy);
+                            else
+                                rgbcx::encode_bc4(dstBlock, srcPixelCopy);
                             break;
                         }
 
                         case MyMTLPixelFormatBC5_RGUnorm:
                         case MyMTLPixelFormatBC5_RGSnorm: {
-                            rgbcx::encode_bc5(dstBlock, srcPixelCopy);
+                            if (useHighQuality)
+                                rgbcx::encode_bc5_hq(dstBlock, srcPixelCopy);
+                            else
+                                rgbcx::encode_bc5(dstBlock, srcPixelCopy);
                             break;
                         }
 
@@ -2856,14 +2873,12 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
             }
 
             astcenc_profile profile;
-            profile = info.isSRGB ? ASTCENC_PRF_LDR_SRGB : ASTCENC_PRF_LDR;
+            profile = info.isSRGBDst ? ASTCENC_PRF_LDR_SRGB : ASTCENC_PRF_LDR;
             if (info.isHDR) {
                 profile =
                     ASTCENC_PRF_HDR;  // TODO: also ASTCENC_PRF_HDR_RGB_LDR_A
             }
 
-            
-            
             // not generating 3d ASTC ever, even for 3D textures
             //Int2 blockDims = image.blockDims();
 
diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp
index 0d42132c..b4913c8a 100644
--- a/libkram/kram/KramImageInfo.cpp
+++ b/libkram/kram/KramImageInfo.cpp
@@ -77,16 +77,18 @@ static MyMTLPixelFormat parseFormat(ImageInfoArgs& infoArgs)
 {
     MyMTLPixelFormat format = MyMTLPixelFormatInvalid;
     const char* formatString = infoArgs.formatString.c_str();
-
+    
+    bool isSRGBDst = infoArgs.isSRGB;
+    
     // bc
     if (isStringEqual(formatString, "bc1")) {
-        format = infoArgs.isSRGB ? MyMTLPixelFormatBC1_RGBA_sRGB : MyMTLPixelFormatBC1_RGBA;
+        format = isSRGBDst ? MyMTLPixelFormatBC1_RGBA_sRGB : MyMTLPixelFormatBC1_RGBA;
     }
     //    else if (isStringEqual(formatString, "bc2")) {
     //        format = MyMTLPixelFormatBC2_RGBA;
     //    }
     else if (isStringEqual(formatString, "bc3")) {
-        format = infoArgs.isSRGB ? MyMTLPixelFormatBC3_RGBA_sRGB : MyMTLPixelFormatBC3_RGBA;
+        format = isSRGBDst ? MyMTLPixelFormatBC3_RGBA_sRGB : MyMTLPixelFormatBC3_RGBA;
     }
     else if (isStringEqual(formatString, "bc4")) {
         format = infoArgs.isSigned ? MyMTLPixelFormatBC4_RSnorm : MyMTLPixelFormatBC4_RUnorm;
@@ -98,7 +100,7 @@ static MyMTLPixelFormat parseFormat(ImageInfoArgs& infoArgs)
         format = infoArgs.isSigned ? MyMTLPixelFormatBC6H_RGBFloat : MyMTLPixelFormatBC6H_RGBUfloat;
     }
     else if (isStringEqual(formatString, "bc7")) {
-        format = infoArgs.isSRGB ? MyMTLPixelFormatBC7_RGBAUnorm_sRGB : MyMTLPixelFormatBC7_RGBAUnorm;
+        format = isSRGBDst ? MyMTLPixelFormatBC7_RGBAUnorm_sRGB : MyMTLPixelFormatBC7_RGBAUnorm;
     }
 
     // etc2
@@ -109,41 +111,40 @@ static MyMTLPixelFormat parseFormat(ImageInfoArgs& infoArgs)
         format = infoArgs.isSigned ? MyMTLPixelFormatEAC_RG11Snorm : MyMTLPixelFormatEAC_RG11Unorm;
     }
     else if (isStringEqual(formatString, "etc2rgb")) {
-        format = infoArgs.isSRGB ? MyMTLPixelFormatETC2_RGB8_sRGB : MyMTLPixelFormatETC2_RGB8;
+        format = isSRGBDst ? MyMTLPixelFormatETC2_RGB8_sRGB : MyMTLPixelFormatETC2_RGB8;
     }
     else if (isStringEqual(formatString, "etc2rgba")) {  // for rgb/rgba
-        format = infoArgs.isSRGB ? MyMTLPixelFormatEAC_RGBA8_sRGB : MyMTLPixelFormatEAC_RGBA8;
+        format = isSRGBDst ? MyMTLPixelFormatEAC_RGBA8_sRGB : MyMTLPixelFormatEAC_RGBA8;
     }
 
     // astc is always 4 channels, but can be swizzled to LLL1, LLLA, RG0, RGB1,
     // or RGBA to save endpoint storage dual plane can occur for more than just
     // RGB+A, any one channel can be a plane to itself if encoder supports
     else if (isStringEqual(formatString, "astc4x4")) {
-        format = infoArgs.isHDR ? MyMTLPixelFormatASTC_4x4_HDR : infoArgs.isSRGB ? MyMTLPixelFormatASTC_4x4_sRGB
-                                                                                 : MyMTLPixelFormatASTC_4x4_LDR;
+        format = infoArgs.isHDR ? MyMTLPixelFormatASTC_4x4_HDR : isSRGBDst ? MyMTLPixelFormatASTC_4x4_sRGB : MyMTLPixelFormatASTC_4x4_LDR;
     }
     else if (isStringEqual(formatString, "astc5x5")) {
-        format = infoArgs.isHDR ? MyMTLPixelFormatASTC_5x5_HDR : infoArgs.isSRGB ? MyMTLPixelFormatASTC_5x5_sRGB
-                                                                                 : MyMTLPixelFormatASTC_5x5_LDR;
+        format = infoArgs.isHDR ? MyMTLPixelFormatASTC_5x5_HDR : isSRGBDst ? MyMTLPixelFormatASTC_5x5_sRGB : MyMTLPixelFormatASTC_5x5_LDR;
     }
     else if (isStringEqual(formatString, "astc6x6")) {
-        format = infoArgs.isHDR ? MyMTLPixelFormatASTC_6x6_HDR : infoArgs.isSRGB ? MyMTLPixelFormatASTC_6x6_sRGB
-                                                                                 : MyMTLPixelFormatASTC_6x6_LDR;
+        format = infoArgs.isHDR ? MyMTLPixelFormatASTC_6x6_HDR : isSRGBDst ? MyMTLPixelFormatASTC_6x6_sRGB : MyMTLPixelFormatASTC_6x6_LDR;
     }
     else if (isStringEqual(formatString, "astc8x8")) {
-        format = infoArgs.isHDR ? MyMTLPixelFormatASTC_8x8_HDR : infoArgs.isSRGB ? MyMTLPixelFormatASTC_8x8_sRGB
-                                                                                 : MyMTLPixelFormatASTC_8x8_LDR;
+        format = infoArgs.isHDR ? MyMTLPixelFormatASTC_8x8_HDR : isSRGBDst ? MyMTLPixelFormatASTC_8x8_sRGB : MyMTLPixelFormatASTC_8x8_LDR;
     }
 
     // explicit formats
     else if (isStringEqual(formatString, "r8")) {
-        format = MyMTLPixelFormatR8Unorm;
+        // also signed and integer forms
+        format = // isSRGBDst // ? MyMTLPixelFormatRGBA8Unorm_sRGB :
+            MyMTLPixelFormatR8Unorm;
     }
     else if (isStringEqual(formatString, "rg8")) {
-        format = MyMTLPixelFormatRG8Unorm;
+        format = // isSRGBDst ? MyMTLPixelFormatRG8Unorm_sRGB :
+            MyMTLPixelFormatRG8Unorm;
     }
     else if (isStringEqual(formatString, "rgba8")) {  // for rgb/rgba
-        format = infoArgs.isSRGB ? MyMTLPixelFormatRGBA8Unorm_sRGB : MyMTLPixelFormatRGBA8Unorm;
+        format = isSRGBDst ? MyMTLPixelFormatRGBA8Unorm_sRGB : MyMTLPixelFormatRGBA8Unorm;
     }
 
     else if (isStringEqual(formatString, "r16f")) {
@@ -1062,8 +1063,12 @@ void ImageInfo::initWithArgs(const ImageInfoArgs& args)
 
     isSigned = isSignedFormat(pixelFormat);
 
-    isSRGB = isSrgbFormat(pixelFormat);
-
+    // formats that aren't srgb, assume the -srgb flag implies isSRGBSrc
+    // image will undergo srgb to linear conversion and then get written out
+    isSRGBDst = isSrgbFormat(pixelFormat);
+    if (!isSRGBDst)
+        isSRGBSrc = args.isSRGB;
+    
     hasAlpha = true;
     hasColor = true;
     if (!isAlphaFormat(pixelFormat))
@@ -1169,8 +1174,13 @@ void ImageInfo::initWithSourceImage(Image& sourceImage)
     // But BC1 565 and 2-bit endpoints are no match for BC7, and bc7enc's BC1 is introducing artifacts into Toof-a.
     optimizeFormat();
 
+    // formats that aren't srgb, assume the -srgb flag implies isSRGBSrc
+    // image will undergo srgb to linear conversion and then get written out
+    isSRGBDst = isSrgbFormat(pixelFormat);
+    isSRGBSrc = sourceImage.isSrgb();
+    
     // this implies color is stored in rgb
-    if (isSRGB) {
+    if (isSRGBDst) {
         isColorWeighted = hasColor;
     }
 
diff --git a/libkram/kram/KramImageInfo.h b/libkram/kram/KramImageInfo.h
index be98d3bd..b5712d6f 100644
--- a/libkram/kram/KramImageInfo.h
+++ b/libkram/kram/KramImageInfo.h
@@ -64,9 +64,11 @@ class ImageInfoArgs {
     bool optimizeFormatForOpaque = false;
 
     // these and formatString set the pixelFormat
-    // if pixelFOrmat set directly, then these are updated off that format
-    bool isSigned = false;
+    // if pixelFormat set directly, then these are updated off that format
+    // This means src is srgb on s/unorm formats, or dst is srgb if pixel format supports.
     bool isSRGB = false;
+    
+    bool isSigned = false;
     bool isHDR = false;
 
     // for now these are only usable with normal to height
@@ -126,11 +128,13 @@ class ImageInfo {
     KTX2Compressor compressor;
     bool isKTX2 = false;
 
-    // output image state
-    // Note: difference between input srgb and output srgb, but it's mingled
-    // here a bit
-    bool isSRGB = false;
+    // source image state
+    bool hasColor = false;
+    bool hasAlpha = false;
+    bool isSRGBSrc = false;
     
+    // output image state
+    bool isSRGBDst = false;
     bool isSigned = false;
     bool isNormal = false;
     bool isColorWeighted = false;
@@ -142,18 +146,15 @@ class ImageInfo {
     bool doMipmaps = false;
     bool optimizeFormatForOpaque = false;
 
-    // source image state
-    bool hasColor = false;
-    bool hasAlpha = false;
-
     bool isVerbose = false;
 
-    // info about the format
+    // compression format
     bool isASTC = false;
     bool isBC = false;
     bool isETC = false;
     bool isExplicit = false;
 
+    // encoder
     bool useATE = false;
     bool useSquish = false;
     bool useBcenc = false;
diff --git a/libkram/kram/KramMipper.cpp b/libkram/kram/KramMipper.cpp
index 7b5b8104..178c483a 100644
--- a/libkram/kram/KramMipper.cpp
+++ b/libkram/kram/KramMipper.cpp
@@ -388,6 +388,10 @@ void Mipper::mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) cons
     float invWidth = 1.0f / width;
     float invHeight = 1.0f / height;
 
+    // After linear combine, convert back to srgb
+    // mip source is always linear to build all levels.
+    bool isSRGBDst = dstImage.isSRGB;
+    
     for (int32_t y = isOddY ? 1 : 0; y < height; y += 2) {
         int32_t ym = y - 1;
         int32_t y0 = y;
@@ -513,7 +517,7 @@ void Mipper::mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) cons
                 // assume hdr pulls from half/float data
                 if (!srcImage.isHDR) {
                     // convert back to srgb for encode
-                    if (srcImage.isSRGB) {
+                    if (isSRGBDst) {
                         // getting some values > 1m, but this saturates
                         cFloat = linearToSRGB(cFloat);
                     }
@@ -532,7 +536,7 @@ void Mipper::mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) cons
                 // assume hdr pulls from half/float data
                 if (!srcImage.isHDR) {
                     // convert back to srgb for encode
-                    if (srcImage.isSRGB) {
+                    if (isSRGBDst) {
                         // getting some values > 1, but this saturates
                         cFloat = linearToSRGB(cFloat);
                     }
@@ -582,7 +586,11 @@ void Mipper::mipmapLevel(const ImageData& srcImage, ImageData& dstImage) const
     const half4* srcHalf = srcImage.pixelsHalf;
 
     // Note the ptrs above may point to same memory
-
+    
+    // After linear combine, convert back to srgb
+    // mip source is always linear to build all levels.
+    bool isSRGBDst = dstImage.isSRGB;
+    
     int32_t dstIndex = 0;
 
     for (int32_t y = 0; y < height; y += 2) {
@@ -612,7 +620,7 @@ void Mipper::mipmapLevel(const ImageData& srcImage, ImageData& dstImage) const
                 // assume hdr pulls from half/float data
                 if (!srcImage.isHDR) {
                     // convert back to srgb for encode
-                    if (srcImage.isSRGB) {
+                    if (isSRGBDst) {
                         cFloat = linearToSRGB(cFloat);
                     }
 
@@ -640,7 +648,7 @@ void Mipper::mipmapLevel(const ImageData& srcImage, ImageData& dstImage) const
                 // assume hdr pulls from half/float data
                 if (!srcImage.isHDR) {
                     // convert back to srgb for encode
-                    if (srcImage.isSRGB) {
+                    if (isSRGBDst) {
                         cFloat = linearToSRGB(cFloat);
                     }
 
@@ -652,7 +660,7 @@ void Mipper::mipmapLevel(const ImageData& srcImage, ImageData& dstImage) const
                 }
             }
             else {
-                // faster 8-bit only path for LDR and unmultiplied
+                // faster 8-bit only path for LDR, linear, and not premul
                 const Color& c0 = srcColor[y0 + x0];
                 const Color& c1 = srcColor[y0 + x1];
 
diff --git a/tests/src/AtlasTest-a.png b/tests/src/AtlasTest-a.png
new file mode 100644
index 00000000..afc78661
--- /dev/null
+++ b/tests/src/AtlasTest-a.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dcc9266375de2e721d091bbc6a2512f938522c0e789995033742df220e00fc51
+size 233
diff --git a/tests/src/AtlasTest-atlas.json b/tests/src/AtlasTest-atlas.json
new file mode 100644
index 00000000..b56c7496
--- /dev/null
+++ b/tests/src/AtlasTest-atlas.json
@@ -0,0 +1,24 @@
+[
+	{ 
+		"name": "red", 
+		"w" : 0.25, 
+		"h" : 0.25, 
+		"x" : 0.0, 
+		"y" : 0.0
+	},
+	{ 
+		"name" : "green", 
+		"w" : 0.25, 
+		"h" : 0.25, 
+		"x" : 0.0, 
+		"y" : 0.50
+	},
+	{
+		"name" : "blue", 
+		"w" : 0.25, 
+		"h" : 0.25, 
+		"x" : 0.50,
+		"y" : 0.25
+	}
+]
+

From 367d0ce0ddc3dc871a0185e77d13af7bbfb85bea Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 9 Oct 2022 16:55:33 -0700
Subject: [PATCH 379/901] kramv - make atlas preview work with show all, scoped
 debug groups

Also using hw bias now, but it has to be set to 5.0
---
 kramv/KramRenderer.mm           | 228 +++++++++++++++++++++-----------
 kramv/Shaders/KramShaders.metal |   2 +-
 2 files changed, 153 insertions(+), 77 deletions(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 16de0c63..802685d7 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -1879,16 +1879,15 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
             _gltfRenderer.viewMatrix = _viewMatrix * regularizationMatrix;
             _gltfRenderer.projectionMatrix = _projectionMatrix;
     
-            [renderEncoder pushDebugGroup:@"DrawModel"];
+            RenderScope drawModelScope( renderEncoder, "DrawModel" );
             [_gltfRenderer renderScene:_asset.defaultScene commandBuffer:commandBuffer commandEncoder:renderEncoder];
-            [renderEncoder popDebugGroup];
         }
     }
     #endif
     
     if (drawShape) {
-        [renderEncoder pushDebugGroup:@"DrawShape"];
-
+        RenderScope drawShapeScope( renderEncoder, "DrawShape" );
+        
         // set the mesh shape
         for (NSUInteger bufferIndex = 0; bufferIndex < _mesh.vertexBuffers.count;
              bufferIndex++) {
@@ -2016,7 +2015,7 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
                     // mips/levels at once
                     
                     [self _setUniformsLevel:uniformsLevel mipLOD:mip];
-
+                    
                     if (mip == 0) {
                         uniformsLevel.drawOffset.y = 0.0f;
                     }
@@ -2024,13 +2023,13 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
                         // all mips draw at top mip size currently
                         uniformsLevel.drawOffset.y -= h + gap;
                     }
-
+                    
                     // this its ktxImage.totalChunks()
                     int32_t numLevels = _showSettings->totalChunks();
-
+                    
                     for (int32_t level = 0; level < numLevels; ++level) {
-                        [renderEncoder pushDebugGroup:@"DrawLevel"];
-
+                        RenderScope drawLevelScope( renderEncoder, "DrawLevel" );
+                        
                         if (isCube) {
                             uniformsLevel.face = level % 6;
                             uniformsLevel.arrayOrSlice = level / 6;
@@ -2038,7 +2037,7 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
                         else {
                             uniformsLevel.arrayOrSlice = level;
                         }
-
+                        
                         // advance x across faces/slices/array elements, 1d array and 2d thin
                         // array are weird though.
                         if (level == 0) {
@@ -2047,25 +2046,25 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
                         else {
                             uniformsLevel.drawOffset.x += w + gap;
                         }
-
+                        
                         [renderEncoder setVertexBytes:&uniformsLevel
                                                length:sizeof(uniformsLevel)
                                               atIndex:BufferIndexUniformsLevel];
-
+                        
                         [renderEncoder setFragmentBytes:&uniformsLevel
                                                  length:sizeof(uniformsLevel)
                                                 atIndex:BufferIndexUniformsLevel];
-
+                        
                         // force lod, and don't mip
                         [renderEncoder setFragmentSamplerState:sampler
                                                    lodMinClamp:mip
                                                    lodMaxClamp:mip + 1
                                                        atIndex:SamplerIndexColor];
-
+                        
                         // TODO: since this isn't a preview, have mode to display all faces
                         // and mips on on screen faces and arrays and slices go across in a
                         // row, and mips are displayed down from each of those in a column
-
+                        
                         for (MTKSubmesh* submesh in _mesh.submeshes) {
                             [renderEncoder drawIndexedPrimitives:submesh.primitiveType
                                                       indexCount:submesh.indexCount
@@ -2075,6 +2074,59 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
                         }
                     }
                 }
+                
+                for (int32_t mip = 0; mip < _showSettings->mipCount; ++mip) {
+                    // upload this on each face drawn, since want to be able to draw all
+                    // mips/levels at once
+                    
+                    [self _setUniformsLevel:uniformsLevel mipLOD:mip];
+
+                    if (mip == 0) {
+                        uniformsLevel.drawOffset.y = 0.0f;
+                    }
+                    else {
+                        // all mips draw at top mip size currently
+                        uniformsLevel.drawOffset.y -= h + gap;
+                    }
+
+                    // this its ktxImage.totalChunks()
+                    int32_t numLevels = _showSettings->totalChunks();
+
+                    for (int32_t level = 0; level < numLevels; ++level) {
+                        if (isCube) {
+                            uniformsLevel.face = level % 6;
+                            uniformsLevel.arrayOrSlice = level / 6;
+                        }
+                        else {
+                            uniformsLevel.arrayOrSlice = level;
+                        }
+                        
+                        // advance x across faces/slices/array elements, 1d array and 2d thin
+                        // array are weird though.
+                        if (level == 0) {
+                            uniformsLevel.drawOffset.x = 0.0f;
+                        }
+                        else {
+                            uniformsLevel.drawOffset.x += w + gap;
+                        }
+                        
+                        [renderEncoder setVertexBytes:&uniformsLevel
+                                               length:sizeof(uniformsLevel)
+                                              atIndex:BufferIndexUniformsLevel];
+                        
+//                        [renderEncoder setFragmentBytes:&uniformsLevel
+//                                                 length:sizeof(uniformsLevel)
+//                                                atIndex:BufferIndexUniformsLevel];
+                        
+                        // force lod, and don't mip
+//                        [renderEncoder setFragmentSamplerState:sampler
+//                                                   lodMinClamp:mip
+//                                                   lodMaxClamp:mip + 1
+//                                                       atIndex:SamplerIndexColor];
+//                        
+                        [self drawAtlas:renderEncoder];
+                    }
+                }
             }
             else {
                 int32_t mip = _showSettings->mipNumber;
@@ -2113,8 +2165,9 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
                 if (_showSettings->is3DView && _showSettings->uvPreview > 0.0) {
                     // need to force color in shader or it's still sampling texture
                     // also need to add z offset
-                    [renderEncoder pushDebugGroup:@"DrawUVPreview"];
-
+                    
+                    RenderScope drawUVPreviewScope( renderEncoder, "DrawUVPreview" );
+                    
                     [renderEncoder setTriangleFillMode:MTLTriangleFillModeLines];
                     
                     // only applies to tris, not points/lines, pushes depth away (towards 0), after clip
@@ -2144,68 +2197,13 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
                     
                     // restore state, even though this isn't a true state shadow
                     [renderEncoder setDepthBias:0.0 slopeScale:0.0 clamp:0.0];
-                    
                     [renderEncoder setTriangleFillMode:MTLTriangleFillModeFill];
                     
-                    [renderEncoder popDebugGroup];
-
-                }
-                
-                // draw last since this changes pipeline state
-                if (!_showSettings->is3DView && !_showSettings->atlas.empty()) { // && _showSettings->drawAtlas) {
-                    [renderEncoder pushDebugGroup:@"DrawAtlas"];
-                    
-                    [renderEncoder setTriangleFillMode:MTLTriangleFillModeLines];
-                
-                    //[renderEncoder setDepthBias:0.015 slopeScale:3.0 clamp: 0.02];
-                    
-                    [renderEncoder setCullMode:MTLCullModeNone];
-                    
-                    [renderEncoder setRenderPipelineState:_pipelineStateDrawLines];
-                    
-                    // TODO: draw line strip with prim reset
-                    // need atlas data in push constants or in vb
-                    
-                    // TOOO: also need to hover name or show names on canvas
-                    
-                    [renderEncoder setVertexBytes:&uniformsLevel
-                                           length:sizeof(uniformsLevel)
-                                          atIndex:BufferIndexUniformsLevel];
-
-                    UniformsDebug uniformsDebug;
-                    
-                    for (const Atlas& atlas: _showSettings->atlas) {
-                        // not accounting for slice
-                        uniformsDebug.rect = float4m(atlas.x, atlas.y, atlas.w, atlas.h);
-                        
-                        
-                        [renderEncoder setVertexBytes:&uniformsDebug
-                                               length:sizeof(uniformsDebug)
-                                              atIndex:BufferIndexUniformsDebug];
-                        
-                        // this will draw diagonal
-                        for (MTKSubmesh* submesh in _mesh.submeshes) {
-                            [renderEncoder drawIndexedPrimitives:submesh.primitiveType
-                                                      indexCount:submesh.indexCount
-                                                       indexType:submesh.indexType
-                                                     indexBuffer:submesh.indexBuffer.buffer
-                                               indexBufferOffset:submesh.indexBuffer.offset];
-                        }
-                    }
-                    
-                    // restore state, even though this isn't a true state shadow
-                    //[renderEncoder setDepthBias:0.0 slopeScale:0.0 clamp:0.0];
-                    
-                    // restore
-                    [renderEncoder setTriangleFillMode:MTLTriangleFillModeFill];
-                    
-                    [renderEncoder popDebugGroup];
                 }
                 
+                [self drawAtlas:renderEncoder];
             }
         }
-
-        [renderEncoder popDebugGroup];
     }
 
     [renderEncoder endEncoding];
@@ -2214,6 +2212,85 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
     // TODO: environment map preview should be done as fsq
 }
 
+class RenderScope
+{
+public:
+    RenderScope(id encoder_, const char* name)
+        : encoder(encoder_)
+    {
+        id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder;
+        [enc pushDebugGroup: [NSString stringWithUTF8String: name]];
+    }
+    
+    void close()
+    {
+        if (encoder) {
+            id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder;
+            [enc popDebugGroup];
+            encoder = nil;
+        }
+    }
+    
+    ~RenderScope()
+    {
+        close();
+    }
+private:
+    id encoder;
+};
+
+- (void)drawAtlas:(nonnull id<MTLRenderCommandEncoder>)renderEncoder {
+    // draw last since this changes pipeline state
+    if (_showSettings->is3DView && _showSettings->atlas.empty())
+        return;
+    
+    //if (!_showSettings->drawAtlas)
+    //    return;
+        
+    RenderScope drawAtlasScope( renderEncoder, "DrawAtlas" );
+    
+    [renderEncoder setTriangleFillMode:MTLTriangleFillModeLines];
+    [renderEncoder setDepthBias:5.0 slopeScale:0.0 clamp: 0.0];
+    [renderEncoder setCullMode:MTLCullModeNone];
+    
+    [renderEncoder setRenderPipelineState:_pipelineStateDrawLines];
+    
+    // TODO: draw line strip with prim reset
+    // need atlas data in push constants or in vb
+    
+    // TOOO: also need to hover name or show names on canvas
+    
+//                    [renderEncoder setVertexBytes:&uniformsLevel
+//                                           length:sizeof(uniformsLevel)
+//                                          atIndex:BufferIndexUniformsLevel];
+
+    UniformsDebug uniformsDebug;
+    
+    for (const Atlas& atlas: _showSettings->atlas) {
+        // not accounting for slice
+        uniformsDebug.rect = float4m(atlas.x, atlas.y, atlas.w, atlas.h);
+        
+        
+        [renderEncoder setVertexBytes:&uniformsDebug
+                               length:sizeof(uniformsDebug)
+                              atIndex:BufferIndexUniformsDebug];
+        
+        // this will draw diagonal
+        for (MTKSubmesh* submesh in _mesh.submeshes) {
+            [renderEncoder drawIndexedPrimitives:submesh.primitiveType
+                                      indexCount:submesh.indexCount
+                                       indexType:submesh.indexType
+                                     indexBuffer:submesh.indexBuffer.buffer
+                               indexBufferOffset:submesh.indexBuffer.offset];
+        }
+    }
+    
+    // restore state, even though this isn't a true state shadow
+    [renderEncoder setCullMode:MTLCullModeBack];
+    [renderEncoder setDepthBias:0.0 slopeScale:0.0 clamp:0.0];
+    [renderEncoder setTriangleFillMode:MTLTriangleFillModeFill];
+}
+
 // want to run samples independent of redrawing the main view
 - (void)drawSample
 {
@@ -2336,8 +2413,8 @@ - (void)drawSamples:(id<MTLCommandBuffer>)commandBuffer
 
     renderEncoder.label = @"SampleCompute";
 
-    [renderEncoder pushDebugGroup:@"DrawShape"];
-
+    RenderScope drawShapeScope( renderEncoder, "DrawShape" );
+    
     UniformsCS uniforms;
     uniforms.uv.x = lookupX;
     uniforms.uv.y = lookupY;
@@ -2391,7 +2468,6 @@ - (void)drawSamples:(id<MTLCommandBuffer>)commandBuffer
     [renderEncoder dispatchThreads:MTLSizeMake(1, 1, 1)
              threadsPerThreadgroup:MTLSizeMake(1, 1, 1)];
 
-    [renderEncoder popDebugGroup];
     [renderEncoder endEncoding];
 }
 
diff --git a/kramv/Shaders/KramShaders.metal b/kramv/Shaders/KramShaders.metal
index 466df782..21bdd4b9 100644
--- a/kramv/Shaders/KramShaders.metal
+++ b/kramv/Shaders/KramShaders.metal
@@ -1682,7 +1682,7 @@ vertex VertexLinesOutput DrawLinesVS(
     // bias it
     // use hw bias instead, but image is at 0.9993
     // and this will be at 0.9994 which is closer w/reverseZ
-    out.position.z += 0.0001;
+    // out.position.z += 0.0001;
     
     return out;
 }

From 1265228a29a978ee1bd8f9dbd86f0a0d3548727f Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 10 Oct 2022 08:35:36 -0700
Subject: [PATCH 380/901] kramv - revise atlas format to be more compact json,
 bias overlay rects for all size images

---
 kramv/KramViewerMain.mm         | 25 ++++++++++++++++---------
 kramv/Shaders/KramShaders.metal |  6 ++++++
 tests/src/AtlasTest-atlas.json  | 24 +++---------------------
 3 files changed, 25 insertions(+), 30 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 1529515f..f063775c 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -2827,24 +2827,31 @@ - (BOOL)loadAtlasFile:(const char*)filename
     {
         const char* name = [atlasProps[@"name"] UTF8String];
         
+        NSArray<NSNumber*>* rectuv = atlasProps[@"ruv"];
+        
         // Note: could convert pixel and mip0 size to uv.
         // normalized uv make these easier to draw across all mips
-        float x = [atlasProps[@"x"] floatValue];
-        float y = [atlasProps[@"y"] floatValue];
-        float w = [atlasProps[@"w"] floatValue];
-        float h = [atlasProps[@"h"] floatValue];
+        float x = [rectuv[0] floatValue];
+        float y = [rectuv[1] floatValue];
+        float w = [rectuv[2] floatValue];
+        float h = [rectuv[3] floatValue];
     
         // optional
         // optional uv padding - need two values for non-square
         // could be inherited by all elements to avoid redundancy
         // optional horizontal and vertical orient
-        // optional props for 2d arrays
-        
-        float uPad = [atlasProps[@"u"] floatValue];
-        float vPad = [atlasProps[@"v"] floatValue];
-        const char* verticalProp = [atlasProps[@"o"] UTF8String];
+        // optional slice for 2d arrays
         int slice = [atlasProps[@"slice"] intValue];
 
+        float uPad = 0.0f;
+        float vPad = 0.0f;
+        NSArray<NSNumber*>* pad = atlasProps[@"puv"];
+        if (pad)
+        {
+            uPad = [pad[0] floatValue];
+            vPad = [pad[1] floatValue];
+        }
+        const char* verticalProp = "f"; // [atlasProps[@"o"] UTF8String];
         bool isVertical = verticalProp && verticalProp[0] == 't';
         
         Atlas atlas = {name, x,y, w,h, uPad,vPad, isVertical, (uint32_t)slice};
diff --git a/kramv/Shaders/KramShaders.metal b/kramv/Shaders/KramShaders.metal
index 21bdd4b9..389f1239 100644
--- a/kramv/Shaders/KramShaders.metal
+++ b/kramv/Shaders/KramShaders.metal
@@ -1679,6 +1679,9 @@ vertex VertexLinesOutput DrawLinesVS(
     VertexLinesOutput out;
     out.position = uniforms.projectionViewMatrix * worldPos;
     
+    // for now, to always have it show up, can't see to bias properly
+    out.position.z = 1.0;
+    
     // bias it
     // use hw bias instead, but image is at 0.9993
     // and this will be at 0.9994 which is closer w/reverseZ
@@ -1690,5 +1693,8 @@ vertex VertexLinesOutput DrawLinesVS(
 fragment float4 DrawLinesPS(
     VertexLinesOutput in [[stage_in]])
 {
+    // TODO: could switch color for contrast on white
+    // if could read the underlying image color or usr fb fetch.
+    
     return float4(1.0);
 }
diff --git a/tests/src/AtlasTest-atlas.json b/tests/src/AtlasTest-atlas.json
index b56c7496..091acd34 100644
--- a/tests/src/AtlasTest-atlas.json
+++ b/tests/src/AtlasTest-atlas.json
@@ -1,24 +1,6 @@
 [
-	{ 
-		"name": "red", 
-		"w" : 0.25, 
-		"h" : 0.25, 
-		"x" : 0.0, 
-		"y" : 0.0
-	},
-	{ 
-		"name" : "green", 
-		"w" : 0.25, 
-		"h" : 0.25, 
-		"x" : 0.0, 
-		"y" : 0.50
-	},
-	{
-		"name" : "blue", 
-		"w" : 0.25, 
-		"h" : 0.25, 
-		"x" : 0.50,
-		"y" : 0.25
-	}
+{"name":"red", "ruv":[0.0,0.0,0.25,0.25]},
+{"name":"green", "ruv":[0.0,0.5,0.25,0.25]},
+{"name" : "blue", "ruv": [0.5,0.25,0.25,0.25]},
 ]
 

From d807763aae5e7283594d8334754fe88078ce5513 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 10 Oct 2022 20:36:09 -0700
Subject: [PATCH 381/901] kramv - add ability to skip subdir traversal on drop
 by holding cmd

This works on recent docs and also dropping folders.
---
 kramv/KramViewerMain.mm | 139 +++++++++-------------------------------
 1 file changed, 31 insertions(+), 108 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index f063775c..decfa0f0 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -412,21 +412,11 @@ - (NSData *)dataOfType:(nonnull NSString *)typeName
     return nil;
 }
 
-
-
 - (BOOL)readFromURL:(nonnull NSURL *)url
              ofType:(nonnull NSString *)typeName
               error:(NSError *_Nullable __autoreleasing *)outError
 {
     // called from OpenRecent documents menu
-
-#if 0
-    //MyMTKView* view = self.windowControllers.firstObject.window.contentView;
-    //return [view loadTextureFromURL:url];
-#else
-
-    // TODO: This is only getting called on first open on macOS 12.0 even with hack below.
-    // find out why.
     
     // throw into an array
     NSArray<NSURL*>* urls = @[url];
@@ -445,7 +435,6 @@ - (BOOL)readFromURL:(nonnull NSURL *)url
     }
 
     return success;
-#endif
 }
 
 @end
@@ -682,6 +671,9 @@ @implementation MyMTKView {
     Action* _actionB;
     Action* _actionA;
     
+    // copy of modifier flags, can tie drop actions to this
+    NSEventModifierFlags _modifierFlags;
+    
     vector<Action> _actions;
 }
 
@@ -2166,6 +2158,11 @@ - (IBAction)handleAction:(id)sender
     [self handleEventAction:action isShiftKeyDown:isShiftKeyDown];
 }
 
+- (void)flagsChanged:(NSEvent *)theEvent
+{
+    _modifierFlags = theEvent.modifierFlags;
+}
+
 - (void)keyDown:(NSEvent *)theEvent
 {
     bool isShiftKeyDown = theEvent.modifierFlags & NSEventModifierFlagShift;
@@ -3337,13 +3334,17 @@ - (BOOL)loadFileFromArchive
     return YES;
 }
 
--(void)listArchivesInFolder:(NSURL*)url archiveFiles:(vector<File>&)archiveFiles
+-(void)listArchivesInFolder:(NSURL*)url archiveFiles:(vector<File>&)archiveFiles skipSubdirs:(BOOL)skipSubdirs
 {
+    NSDirectoryEnumerationOptions options = NSDirectoryEnumerationSkipsHiddenFiles;
+    if (skipSubdirs)
+        options = NSDirectoryEnumerationSkipsSubdirectoryDescendants;
+    
     NSDirectoryEnumerator* directoryEnumerator =
     [[NSFileManager defaultManager]
      enumeratorAtURL:url
      includingPropertiesForKeys:[NSArray array]
-     options:0
+     options:options
      errorHandler:  // nil
      ^BOOL(NSURL *urlArg, NSError *error) {
         macroUnusedVar(urlArg);
@@ -3365,8 +3366,12 @@ -(void)listArchivesInFolder:(NSURL*)url archiveFiles:(vector<File>&)archiveFiles
     }
 }
 
--(void)listFilesInFolder:(NSURL*)url urlIndex:(int32_t)urlIndex
+-(void)listFilesInFolder:(NSURL*)url urlIndex:(int32_t)urlIndex skipSubdirs:(BOOL)skipSubdirs
 {
+    NSDirectoryEnumerationOptions options = NSDirectoryEnumerationSkipsHiddenFiles;
+    if (skipSubdirs)
+        options = NSDirectoryEnumerationSkipsSubdirectoryDescendants;
+    
     NSDirectoryEnumerator* directoryEnumerator =
     [[NSFileManager defaultManager]
      enumeratorAtURL:url
@@ -3411,6 +3416,9 @@ bool isSupportedJsonFilename(const char* filename)
     
 -(void)loadFilesFromUrls:(NSArray<NSURL*>*)urls
 {
+    // don't recurse down subdirs, if cmd key held during drop or recent menu item selection
+    BOOL skipSubdirs = _modifierFlags & NSEventModifierFlagCommand;
+    
     // Using a member for archives, so limited to one archive in a drop
     // but that's probably okay for now.  Add a separate array of open
     // archives if want > 1.
@@ -3447,7 +3455,7 @@ -(void)loadFilesFromUrls:(NSArray<NSURL*>*)urls
         else if (url.hasDirectoryPath) {
             
             // this first loads only models, then textures if only those
-            [self listFilesInFolder:url urlIndex:urlIndex];
+            [self listFilesInFolder:url urlIndex:urlIndex skipSubdirs:skipSubdirs];
             
             // could skip if nothing added
             [urlsExtracted addObject:url];
@@ -3455,7 +3463,7 @@ -(void)loadFilesFromUrls:(NSArray<NSURL*>*)urls
             
             // handle archives within folder
             vector<File> archiveFiles;
-            [self listArchivesInFolder:url archiveFiles:archiveFiles];
+            [self listArchivesInFolder:url archiveFiles:archiveFiles skipSubdirs:skipSubdirs];
         
             for (const File& archiveFile: archiveFiles) {
                 const char* archiveFilename = archiveFile.name.c_str();
@@ -3545,18 +3553,18 @@ - (BOOL)loadTextureFromURLs:(NSArray<NSURL*>*)urls
     bool isSingleFile = urls.count == 1;
     
     Renderer* renderer = (Renderer *)self.delegate;
-   
+    
     // Handle shader hotload
     if (isSingleFile && endsWithExtension(filename, ".metallib")) {
         if ([renderer hotloadShaders:filename]) {
             NSURL* metallibFileURL =
-                [NSURL fileURLWithPath:[NSString stringWithUTF8String:filename]];
-
+            [NSURL fileURLWithPath:[NSString stringWithUTF8String:filename]];
+            
             // add to recent docs, so can reload quickly
             NSDocumentController* dc =
-                [NSDocumentController sharedDocumentController];
+            [NSDocumentController sharedDocumentController];
             [dc noteNewRecentDocumentURL:metallibFileURL];
-
+            
             return YES;
         }
         return NO;
@@ -3565,98 +3573,13 @@ - (BOOL)loadTextureFromURLs:(NSArray<NSURL*>*)urls
     // don't leave archive table open
     if (isSingleFile)
         [self hideFileTable];
-    
-    
+
     [self loadFilesFromUrls:urls];
     
     BOOL success = [self loadFile];
     return success;
-    
-    //------------
-    
-//    // now load the file at the index
-//    setErrorLogCapture(true);
-//
-//    BOOL success = [self loadFile];
-//
-//    if (!success) {
-//        // get back error text from the failed load
-//        string errorText;
-//        getErrorLogCaptureText(errorText);
-//        setErrorLogCapture(false);
-//
-//        const string &folder = _files[_fileIndex];
-//
-//        // prepend filename
-//        string finalErrorText;
-//        append_sprintf(finalErrorText, "Could not load from folder:\n %s\n",
-//                       folder.c_str());
-//        finalErrorText += errorText;
-//
-//        [self setHudText:finalErrorText.c_str()];
-//    }
-//
-//    setErrorLogCapture(false);
-//    return success;
-//
-//
-////    // file is not a supported extension
-////    if (files.empty())
-////    {
-////        string errorText =
-////            "Unsupported file extension, must be .zip"
-////#if USE_GLTF
-////            ", .gltf, .glb"
-////#endif
-////            ", .png, .ktx, .ktx2, .dds\n";
-////
-////        string finalErrorText;
-////        if (url.count == 1)
-////            append_sprintf(finalErrorText, "Could not load from file:\n %s\n",
-////                       filename);
-////        finalErrorText += errorText;
-////
-////        [self setHudText:finalErrorText.c_str()];
-////        return NO;
-////    }
-////
-////    if (isSupportedModelFilename(filename))
-////    {
-////        return [self loadModelFile:url filename:nullptr];
-////    }
-////
-//    // for now, knock out model if loading an image
-//    // TODO: might want to unload even before loading a new model
-// //   [renderer unloadModel];
-//
-// //   bool success = [self loadImageFile:url];
-//
-//
-////    //-------------------
-////
-////    bool success = [self loadImageFile:url];
-//
-//    // hide table in case last had archive open
-////    if (success)
-////        [self hideFileTable];
-//
-//    return success;
 }
-
-//-(double)getTimestampForFile:(NSURL*)url
-//{
-//    // TODO: could just use FileHelper::modificationTimestamp(filename);
-//
-//    NSDate* fileDate = nil;
-//    NSError* error = nil;
-//    [url getResourceValue:&fileDate
-//                   forKey:NSURLContentModificationDateKey
-//                    error:&error];
-//
-//    double timestamp = fileDate.timeIntervalSince1970;
-//    return timestamp;
-//}
-
+   
 -(BOOL)loadModelFile:(const char*)filename
 {
 #if USE_GLTF

From 8d79d8d4856ef635219788557ce9d728cbd18586 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 11 Oct 2022 20:46:07 -0700
Subject: [PATCH 382/901] kramv - fix container growth

---
 kramv/KramViewerMain.mm | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index decfa0f0..d9fb0501 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -2865,8 +2865,8 @@ - (BOOL)loadAtlasFile:(const char*)filename
 // opens archive
 - (BOOL)openArchive:(const char *)zipFilename urlIndex:(int32_t)urlIndex
 {
-    // grow the array, how does this not destroy existing helpers though?
-    if (urlIndex > _containers.size()) {
+    // grow the array, ptrs so that existing mmaps aren't destroyed
+    if (urlIndex >= _containers.size()) {
         _containers.resize(urlIndex + 1, nullptr);
     }
     

From 26f6e7dbf35b1f5a414022ec773ae4b91182c20b Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 11 Oct 2022 21:14:30 -0700
Subject: [PATCH 383/901] kram - checkin KramPrefix.pch

This may be temporary file in Visual Studio, but needed for now for Xcode
---
 .gitignore                  | 4 ++--
 libkram/kram/KramPrefix.pch | 9 +++++++++
 2 files changed, 11 insertions(+), 2 deletions(-)
 create mode 100644 libkram/kram/KramPrefix.pch

diff --git a/.gitignore b/.gitignore
index d4740c2b..6dc47462 100644
--- a/.gitignore
+++ b/.gitignore
@@ -23,8 +23,8 @@ libkram/kram/KramVersion.h
 *.obj
 
 # Precompiled Headers
-*.gch
-*.pch
+# *.gch
+# *.pch
 
 # Compiled Dynamic libraries
 *.so
diff --git a/libkram/kram/KramPrefix.pch b/libkram/kram/KramPrefix.pch
new file mode 100644
index 00000000..02374c5e
--- /dev/null
+++ b/libkram/kram/KramPrefix.pch
@@ -0,0 +1,9 @@
+// kram - Copyright 2020-2022 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
+
+#pragma once
+
+#include "KramConfig.h"
+
+//#include "KramLib.h"

From 316623ff32f34e510c011179382c2eae9469b6fa Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 11 Oct 2022 21:32:50 -0700
Subject: [PATCH 384/901] kram - fix astc decode for single-threaded case

This code wasn't advancing m_start_count ever, so the loop would process the same blocks over and over.
---
 libkram/astc-encoder/astcenc_internal.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/libkram/astc-encoder/astcenc_internal.h b/libkram/astc-encoder/astcenc_internal.h
index bc129492..2ab0216a 100644
--- a/libkram/astc-encoder/astcenc_internal.h
+++ b/libkram/astc-encoder/astcenc_internal.h
@@ -287,12 +287,17 @@ class ParallelManager
     unsigned int get_task_assignment(unsigned int granule, unsigned int& count)
     {
         unsigned int base = m_start_count + granule;
+        
+        // need to update start count in single-threaded case
+        // This is to match fetch_add in threaded case, but really should be done after test below
+        m_start_count = base;
+       
         if (base >= m_task_count)
         {
             count = 0;
             return 0;
         }
-
+        
         count = astc::min(m_task_count - base, granule);
         return base;
     }

From 9f7349f12962ba272d2db7d3a858f47b6f4aca86 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 11 Oct 2022 21:46:29 -0700
Subject: [PATCH 385/901] kramv - fix counterpart

---
 kramv/KramViewerMain.mm | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index d9fb0501..49962cf6 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -1985,9 +1985,9 @@ - (void)updateUIAfterLoad
     bool isJumpToCounterpartHidden = true;
     bool isJumpToPrevCounterpartHidden = true;
     
-    if ( _files.size() <= 1) {
-        isJumpToCounterpartHidden = [self hasCounterpart:YES];
-        isJumpToPrevCounterpartHidden  = [self hasCounterpart:NO];
+    if ( _files.size() > 1) {
+        isJumpToCounterpartHidden = ![self hasCounterpart:YES];
+        isJumpToPrevCounterpartHidden  = ![self hasCounterpart:NO];
     }
     
     bool isRedHidden = _showSettings->numChannels == 0; // models don't show rgba
@@ -2945,15 +2945,16 @@ - (BOOL)hasCounterpart:(BOOL)increment {
     const File& nextFile = _files[nextFileIndex];
     string nextFilename = filenameNoExtension(toFilenameShort(nextFile.name.c_str()));
     
+    // if short name matches (no ext) then it's a counterpart
     if (currentFilename != nextFilename)
-        return NO;
+       return NO;
     
     return YES;
 }
 
 - (BOOL)advanceCounterpart:(BOOL)increment
 {
-    if (_files.empty()) {
+    if (_files.size() <= 1) {
         return NO;
     }
     

From d565516a8cf2c747c420a47fe3dfd31acaf96693 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 12 Oct 2022 08:59:23 -0700
Subject: [PATCH 386/901] kram - emulate the fetch_add better in astc decode

---
 libkram/astc-encoder/astcenc_internal.h | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/libkram/astc-encoder/astcenc_internal.h b/libkram/astc-encoder/astcenc_internal.h
index 2ab0216a..0c689ac0 100644
--- a/libkram/astc-encoder/astcenc_internal.h
+++ b/libkram/astc-encoder/astcenc_internal.h
@@ -286,12 +286,10 @@ class ParallelManager
     
     unsigned int get_task_assignment(unsigned int granule, unsigned int& count)
     {
-        unsigned int base = m_start_count + granule;
-        
-        // need to update start count in single-threaded case
-        // This is to match fetch_add in threaded case, but really should be done after test below
-        m_start_count = base;
-       
+        // match fetch_add which addsd to m_start_count atomically then returns original m_start_count
+        unsigned int base = m_start_count;
+        m_start_count += granule;
+    
         if (base >= m_task_count)
         {
             count = 0;

From 2b9ddd18778b80940c4a17925532572f3e0a3e45 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 19 Oct 2022 13:18:42 -0700
Subject: [PATCH 387/901] kramv - new atlas format, tie recursive folder search
 to holding cmd down.

---
 kramv/KramViewerMain.mm        | 113 +++++++++++++++++++++++++--------
 tests/src/AtlasTest-atlas.json |  20 ++++--
 2 files changed, 100 insertions(+), 33 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 49962cf6..1a102711 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -2820,39 +2820,93 @@ - (BOOL)loadAtlasFile:(const char*)filename
     
     _showSettings->atlas.clear();
     
-    for (NSDictionary* atlasProps in rootObject)
+    // TODO: this ObjC parser is insanely slow, dump it
+    // look into yyJson (dom) or sax parsers.
+    
+    
+    // TODO: support multiple atlases in one file, but need to then
+    // parse an store and apply to differently named textures.
+    
+    NSDictionary* atlasProps = rootObject;
+    
     {
-        const char* name = [atlasProps[@"name"] UTF8String];
+        const char* atlasName = [atlasProps[@"name"] UTF8String];
         
-        NSArray<NSNumber*>* rectuv = atlasProps[@"ruv"];
+        NSNumber* widthProp = atlasProps[@"width"];
+        NSNumber* heightProp = atlasProps[@"height"];
+        if (!heightProp) heightProp = widthProp;
+        
+        int width = [widthProp intValue];
+        int height = [heightProp intValue];
         
-        // Note: could convert pixel and mip0 size to uv.
-        // normalized uv make these easier to draw across all mips
-        float x = [rectuv[0] floatValue];
-        float y = [rectuv[1] floatValue];
-        float w = [rectuv[2] floatValue];
-        float h = [rectuv[3] floatValue];
-    
-        // optional
-        // optional uv padding - need two values for non-square
-        // could be inherited by all elements to avoid redundancy
-        // optional horizontal and vertical orient
-        // optional slice for 2d arrays
         int slice = [atlasProps[@"slice"] intValue];
-
+        
         float uPad = 0.0f;
         float vPad = 0.0f;
-        NSArray<NSNumber*>* pad = atlasProps[@"puv"];
-        if (pad)
+        NSArray<NSNumber*>* pauvProp = atlasProps[@"paduv"];
+        if (pauvProp)
         {
-            uPad = [pad[0] floatValue];
-            vPad = [pad[1] floatValue];
+            uPad = [pauvProp[0] floatValue];
+            vPad = [pauvProp[1] floatValue];
         }
-        const char* verticalProp = "f"; // [atlasProps[@"o"] UTF8String];
-        bool isVertical = verticalProp && verticalProp[0] == 't';
         
-        Atlas atlas = {name, x,y, w,h, uPad,vPad, isVertical, (uint32_t)slice};
-        _showSettings->atlas.emplace_back(std::move(atlas));
+        NSArray<NSNumber*>* padpxProp = atlasProps[@"padpx"];
+        if (padpxProp)
+        {
+            uPad = [padpxProp[0] intValue];
+            vPad = [padpxProp[1] intValue];
+            
+            uPad /= width;
+            vPad /= height;
+        }
+        
+        NSDictionary* regions = atlasProps[@"regions"];
+        
+        for (NSDictionary* regionProps in regions)
+        {
+            const char* name = [regionProps[@"name"] UTF8String];
+            
+            float x = 0.0f;
+            float y = 0.0f;
+            float w = 0.0f;
+            float h = 0.0f;
+            
+            NSArray<NSNumber*>* rectuv = regionProps[@"ruv"];
+            if (rectuv) {
+                // Note: could convert pixel and mip0 size to uv.
+                // normalized uv make these easier to draw across all mips
+                x = [rectuv[0] floatValue];
+                y = [rectuv[1] floatValue];
+                w = [rectuv[2] floatValue];
+                h = [rectuv[3] floatValue];
+            }
+            
+            NSArray<NSNumber*>* rectpx = regionProps[@"rpx"];
+            if (rectpx) {
+                x = [rectpx[0] intValue];
+                y = [rectpx[1] intValue];
+                w = [rectpx[2] intValue];
+                h = [rectpx[3] intValue];
+                
+                // normalize to uv using the width/height
+                x /= width;
+                y /= height;
+                w /= width;
+                h /= height;
+            }
+            
+            // optional
+            // optional uv padding - need two values for non-square
+            // could be inherited by all elements to avoid redundancy
+            // optional horizontal and vertical orient
+            // optional slice for 2d arrays
+            
+            const char* verticalProp = "f"; // [regionProps[@"o"] UTF8String];
+            bool isVertical = verticalProp && verticalProp[0] == 't';
+            
+            Atlas atlas = {name, x,y, w,h, uPad,vPad, isVertical, (uint32_t)slice};
+            _showSettings->atlas.emplace_back(std::move(atlas));
+        }
     }
     
     // TODO: also need to be able to bring in vector shapes
@@ -3339,7 +3393,7 @@ -(void)listArchivesInFolder:(NSURL*)url archiveFiles:(vector<File>&)archiveFiles
 {
     NSDirectoryEnumerationOptions options = NSDirectoryEnumerationSkipsHiddenFiles;
     if (skipSubdirs)
-        options = NSDirectoryEnumerationSkipsSubdirectoryDescendants;
+        options |= NSDirectoryEnumerationSkipsSubdirectoryDescendants;
     
     NSDirectoryEnumerator* directoryEnumerator =
     [[NSFileManager defaultManager]
@@ -3371,13 +3425,13 @@ -(void)listFilesInFolder:(NSURL*)url urlIndex:(int32_t)urlIndex skipSubdirs:(BOO
 {
     NSDirectoryEnumerationOptions options = NSDirectoryEnumerationSkipsHiddenFiles;
     if (skipSubdirs)
-        options = NSDirectoryEnumerationSkipsSubdirectoryDescendants;
+        options |= NSDirectoryEnumerationSkipsSubdirectoryDescendants;
     
     NSDirectoryEnumerator* directoryEnumerator =
     [[NSFileManager defaultManager]
      enumeratorAtURL:url
      includingPropertiesForKeys:[NSArray array]
-     options:0
+     options:options
      errorHandler:  // nil
      ^BOOL(NSURL *urlArg, NSError *error) {
         macroUnusedVar(urlArg);
@@ -3418,7 +3472,10 @@ bool isSupportedJsonFilename(const char* filename)
 -(void)loadFilesFromUrls:(NSArray<NSURL*>*)urls
 {
     // don't recurse down subdirs, if cmd key held during drop or recent menu item selection
-    BOOL skipSubdirs = _modifierFlags & NSEventModifierFlagCommand;
+    bool skipSubdirs = ( _modifierFlags & NSEventModifierFlagCommand ) != 0;
+    
+    // reverse logic, so have to hold cmd to see subfolders
+    skipSubdirs = !skipSubdirs;
     
     // Using a member for archives, so limited to one archive in a drop
     // but that's probably okay for now.  Add a separate array of open
diff --git a/tests/src/AtlasTest-atlas.json b/tests/src/AtlasTest-atlas.json
index 091acd34..54f992c1 100644
--- a/tests/src/AtlasTest-atlas.json
+++ b/tests/src/AtlasTest-atlas.json
@@ -1,6 +1,16 @@
-[
-{"name":"red", "ruv":[0.0,0.0,0.25,0.25]},
-{"name":"green", "ruv":[0.0,0.5,0.25,0.25]},
-{"name" : "blue", "ruv": [0.5,0.25,0.25,0.25]},
-]
+{
+"name": "AtlastTest",
+"width": 128,
+"height": 128,
+
+"pad": 0,
+"padh": 0,
+"slice": 0,
 
+"regions":
+[
+    {"name":"red", "rpx":[0,0,32,32]},
+    {"name":"green", "rpx":[0,64,32,32]},
+    {"name": "blue", "rpx": [64,32,32,32]},
+]    
+}

From 1b10f6da1d348fd583b4eaba926b32f9858d5e09 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 21 Oct 2022 21:03:40 -0700
Subject: [PATCH 388/901] kramv - restore file last opened if it exists in the
 revisited folder/archive, add 4x4 grid to png

The 4x4 block is standard issues for all compressed formats.  This will help with diff.
Also the tableView was resetting _fileIndex to 0 in reloadData call, so last image wasn't preserved.
---
 kramv/KramViewerMain.mm        | 21 +++++++++------------
 tests/src/AtlasTest-atlas.json |  7 +++----
 2 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 1a102711..14434b66 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -2504,10 +2504,8 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
         static int grid = 0;
         static const int kNumGrids = 7;
 
-#define advanceGrid(g, dec) \
-grid = (grid + kNumGrids + (dec ? -1 : 1)) % kNumGrids
-
-        // TODO: display how many blocks there are
+        #define advanceGrid(g, dec) \
+            grid = (grid + kNumGrids + (dec ? -1 : 1)) % kNumGrids
 
         // if block size is 1, then this shouldn't toggle
         _showSettings->isBlockGridShown = false;
@@ -2516,13 +2514,8 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
 
         advanceGrid(grid, isShiftKeyDown);
 
-        if (grid == 2 && _showSettings->blockX == 1) {
-            // skip it
-            advanceGrid(grid, isShiftKeyDown);
-        }
-
         static const uint32_t gridSizes[kNumGrids] = {
-            0, 1, 2, 32, 64, 128, 256  // atlas sizes
+            0, 1, 4, 32, 64, 128, 256  // grid sizes
         };
 
         if (grid == 0) {
@@ -2533,7 +2526,7 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
 
             sprintf(text, "Pixel Grid 1x1");
         }
-        else if (grid == 2) {
+        else if (grid == 2 && _showSettings->blockX > 1) {
             _showSettings->isBlockGridShown = true;
 
             sprintf(text, "Block Grid %dx%d", _showSettings->blockX,
@@ -3583,7 +3576,11 @@ -(void)loadFilesFromUrls:(NSArray<NSURL*>*)urls
         const char* filenameShort = toFilenameShort(file.name.c_str());
         [_tableViewController.items addObject: [NSString stringWithUTF8String: filenameShort]];
     }
+    
+    uint32_t savedFileIndex = _fileIndex;
+    // This calls selectionDidChange which then sets _fileIndex = 0;
     [_tableView reloadData];
+    _fileIndex = savedFileIndex;
     
     // Set the active file
     [_tableView selectRowIndexes:[NSIndexSet indexSetWithIndex:_fileIndex] byExtendingSelection:NO];
@@ -3591,7 +3588,7 @@ -(void)loadFilesFromUrls:(NSArray<NSURL*>*)urls
     
     [self hideFileTable];
     
-    // add it to recent docs (only 10 slots))
+    // add it to recent docs (only 10 slots)
     if (urls.count == 1) {
         NSDocumentController* dc =
         [NSDocumentController sharedDocumentController];
diff --git a/tests/src/AtlasTest-atlas.json b/tests/src/AtlasTest-atlas.json
index 54f992c1..3138d569 100644
--- a/tests/src/AtlasTest-atlas.json
+++ b/tests/src/AtlasTest-atlas.json
@@ -2,15 +2,14 @@
 "name": "AtlastTest",
 "width": 128,
 "height": 128,
-
-"pad": 0,
-"padh": 0,
 "slice": 0,
 
+"padpx": [0,0],
+
 "regions":
 [
     {"name":"red", "rpx":[0,0,32,32]},
     {"name":"green", "rpx":[0,64,32,32]},
     {"name": "blue", "rpx": [64,32,32,32]},
-]    
+]
 }

From 98964b6a423f6bfb55b8ce768f42b8186cca158d Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 21 Oct 2022 21:39:17 -0700
Subject: [PATCH 389/901] kram - add simdjson single header version

This still has warnings in the comments that no longer match the arguments.  But this is the latest shipping version.
---
 build2/kram.xcodeproj/project.pbxproj |    20 +
 libkram/simdjson/simdjson.cpp         | 15984 ++++++++++++
 libkram/simdjson/simdjson.h           | 31624 ++++++++++++++++++++++++
 3 files changed, 47628 insertions(+)
 create mode 100644 libkram/simdjson/simdjson.cpp
 create mode 100644 libkram/simdjson/simdjson.h

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index 8f8efb1f..6fec40dd 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -7,6 +7,10 @@
 	objects = {
 
 /* Begin PBXBuildFile section */
+		70424D31290127BB00CEF9AC /* simdjson.h in Headers */ = {isa = PBXBuildFile; fileRef = 70424D2F290127BB00CEF9AC /* simdjson.h */; };
+		70424D32290127BB00CEF9AC /* simdjson.h in Headers */ = {isa = PBXBuildFile; fileRef = 70424D2F290127BB00CEF9AC /* simdjson.h */; };
+		70424D33290127BB00CEF9AC /* simdjson.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70424D30290127BB00CEF9AC /* simdjson.cpp */; };
+		70424D34290127BB00CEF9AC /* simdjson.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70424D30290127BB00CEF9AC /* simdjson.cpp */; };
 		704738BC289F6AEE00C77A9F /* unordered_map.h in Headers */ = {isa = PBXBuildFile; fileRef = 704738B1289F6AEE00C77A9F /* unordered_map.h */; };
 		704738BD289F6AEE00C77A9F /* unordered_map.h in Headers */ = {isa = PBXBuildFile; fileRef = 704738B1289F6AEE00C77A9F /* unordered_map.h */; };
 		704738BE289F6AEE00C77A9F /* falgorithm.h in Headers */ = {isa = PBXBuildFile; fileRef = 704738B2289F6AEE00C77A9F /* falgorithm.h */; };
@@ -401,6 +405,8 @@
 /* End PBXBuildFile section */
 
 /* Begin PBXFileReference section */
+		70424D2F290127BB00CEF9AC /* simdjson.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = simdjson.h; sourceTree = "<group>"; };
+		70424D30290127BB00CEF9AC /* simdjson.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = simdjson.cpp; sourceTree = "<group>"; };
 		704738B1289F6AEE00C77A9F /* unordered_map.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = unordered_map.h; sourceTree = "<group>"; };
 		704738B2289F6AEE00C77A9F /* falgorithm.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = falgorithm.h; sourceTree = "<group>"; };
 		704738B3289F6AEE00C77A9F /* map.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = map.h; sourceTree = "<group>"; };
@@ -765,6 +771,15 @@
 /* End PBXFrameworksBuildPhase section */
 
 /* Begin PBXGroup section */
+		70424D2E290127BB00CEF9AC /* simdjson */ = {
+			isa = PBXGroup;
+			children = (
+				70424D2F290127BB00CEF9AC /* simdjson.h */,
+				70424D30290127BB00CEF9AC /* simdjson.cpp */,
+			);
+			path = simdjson;
+			sourceTree = "<group>";
+		};
 		704738AF289F6AEE00C77A9F /* fastl */ = {
 			isa = PBXGroup;
 			children = (
@@ -812,6 +827,7 @@
 				706EEDFB26D1583E001C950E /* transcoder */,
 				706EEE1026D1583F001C950E /* miniz */,
 				706EEE1326D1583F001C950E /* heman */,
+				70424D2E290127BB00CEF9AC /* simdjson */,
 				706EEE1626D1583F001C950E /* stb */,
 				706EEE1826D1583F001C950E /* kram */,
 				706EEE3926D1583F001C950E /* squish */,
@@ -1370,6 +1386,7 @@
 				709B8D4328D7BCAD0081BD1F /* args.h in Headers */,
 				708A6A9C2708CE4700BA5410 /* bc6h_encode.h in Headers */,
 				706EEFDF26D15984001C950E /* EtcBlock4x4Encoding_ETC1.h in Headers */,
+				70424D31290127BB00CEF9AC /* simdjson.h in Headers */,
 				706EEFE026D15984001C950E /* EtcBlock4x4Encoding_RGBA8.h in Headers */,
 				706EEFE126D15984001C950E /* EtcColorFloatRGBA.h in Headers */,
 				706EEFE226D15984001C950E /* EtcBlock4x4Encoding.h in Headers */,
@@ -1485,6 +1502,7 @@
 				709B8D4428D7BCAD0081BD1F /* args.h in Headers */,
 				708A6A9D2708CE4700BA5410 /* bc6h_encode.h in Headers */,
 				706EF15926D166C5001C950E /* EtcBlock4x4Encoding_ETC1.h in Headers */,
+				70424D32290127BB00CEF9AC /* simdjson.h in Headers */,
 				706EF15A26D166C5001C950E /* EtcBlock4x4Encoding_RGBA8.h in Headers */,
 				706EF15B26D166C5001C950E /* EtcColorFloatRGBA.h in Headers */,
 				706EF15C26D166C5001C950E /* EtcBlock4x4Encoding.h in Headers */,
@@ -1681,6 +1699,7 @@
 				706EFF7526D34740001C950E /* assert.cpp in Sources */,
 				706EFF8526D34740001C950E /* fixed_pool.cpp in Sources */,
 				706EEFA726D1595D001C950E /* basisu_transcoder.cpp in Sources */,
+				70424D33290127BB00CEF9AC /* simdjson.cpp in Sources */,
 				706EFF8326D34740001C950E /* red_black_tree.cpp in Sources */,
 				70871DE327DDDBCD00D0B9E1 /* astcenc_decompress_symbolic.cpp in Sources */,
 				70871E0727DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.cpp in Sources */,
@@ -1792,6 +1811,7 @@
 				706EF1C526D166C5001C950E /* KramZipHelper.cpp in Sources */,
 				706EF1C626D166C5001C950E /* TaskSystem.cpp in Sources */,
 				706EF1C726D166C5001C950E /* KramFileHelper.cpp in Sources */,
+				70424D34290127BB00CEF9AC /* simdjson.cpp in Sources */,
 				706EFF7C26D34740001C950E /* numeric_limits.cpp in Sources */,
 				706EF1C826D166C5001C950E /* KramImageInfo.cpp in Sources */,
 				70871DEA27DDDBCD00D0B9E1 /* astcenc_integer_sequence.cpp in Sources */,
diff --git a/libkram/simdjson/simdjson.cpp b/libkram/simdjson/simdjson.cpp
new file mode 100644
index 00000000..2b7b3132
--- /dev/null
+++ b/libkram/simdjson/simdjson.cpp
@@ -0,0 +1,15984 @@
+/* auto-generated on 2022-10-16 16:59:15 +0000. Do not edit! */
+/* begin file src/simdjson.cpp */
+#include "simdjson.h"
+
+SIMDJSON_PUSH_DISABLE_WARNINGS
+SIMDJSON_DISABLE_UNDESIRED_WARNINGS
+
+/* begin file src/to_chars.cpp */
+#include <cstring>
+#include <cstdint>
+#include <array>
+#include <cmath>
+
+namespace simdjson {
+namespace internal {
+/*!
+implements the Grisu2 algorithm for binary to decimal floating-point
+conversion.
+Adapted from JSON for Modern C++
+
+This implementation is a slightly modified version of the reference
+implementation which may be obtained from
+http://florian.loitsch.com/publications (bench.tar.gz).
+The code is distributed under the MIT license, Copyright (c) 2009 Florian
+Loitsch. For a detailed description of the algorithm see: [1] Loitsch, "Printing
+Floating-Point Numbers Quickly and Accurately with Integers", Proceedings of the
+ACM SIGPLAN 2010 Conference on Programming Language Design and Implementation,
+PLDI 2010 [2] Burger, Dybvig, "Printing Floating-Point Numbers Quickly and
+Accurately", Proceedings of the ACM SIGPLAN 1996 Conference on Programming
+Language Design and Implementation, PLDI 1996
+*/
+namespace dtoa_impl {
+
+template <typename Target, typename Source>
+Target reinterpret_bits(const Source source) {
+  static_assert(sizeof(Target) == sizeof(Source), "size mismatch");
+
+  Target target;
+  std::memcpy(&target, &source, sizeof(Source));
+  return target;
+}
+
+struct diyfp // f * 2^e
+{
+  static constexpr int kPrecision = 64; // = q
+
+  std::uint64_t f = 0;
+  int e = 0;
+
+  constexpr diyfp(std::uint64_t f_, int e_) noexcept : f(f_), e(e_) {}
+
+  /*!
+  @brief returns x - y
+  @pre x.e == y.e and x.f >= y.f
+  */
+  static diyfp sub(const diyfp &x, const diyfp &y) noexcept {
+
+    return {x.f - y.f, x.e};
+  }
+
+  /*!
+  @brief returns x * y
+  @note The result is rounded. (Only the upper q bits are returned.)
+  */
+  static diyfp mul(const diyfp &x, const diyfp &y) noexcept {
+    static_assert(kPrecision == 64, "internal error");
+
+    // Computes:
+    //  f = round((x.f * y.f) / 2^q)
+    //  e = x.e + y.e + q
+
+    // Emulate the 64-bit * 64-bit multiplication:
+    //
+    // p = u * v
+    //   = (u_lo + 2^32 u_hi) (v_lo + 2^32 v_hi)
+    //   = (u_lo v_lo         ) + 2^32 ((u_lo v_hi         ) + (u_hi v_lo )) +
+    //   2^64 (u_hi v_hi         ) = (p0                ) + 2^32 ((p1 ) + (p2 ))
+    //   + 2^64 (p3                ) = (p0_lo + 2^32 p0_hi) + 2^32 ((p1_lo +
+    //   2^32 p1_hi) + (p2_lo + 2^32 p2_hi)) + 2^64 (p3                ) =
+    //   (p0_lo             ) + 2^32 (p0_hi + p1_lo + p2_lo ) + 2^64 (p1_hi +
+    //   p2_hi + p3) = (p0_lo             ) + 2^32 (Q ) + 2^64 (H ) = (p0_lo ) +
+    //   2^32 (Q_lo + 2^32 Q_hi                           ) + 2^64 (H )
+    //
+    // (Since Q might be larger than 2^32 - 1)
+    //
+    //   = (p0_lo + 2^32 Q_lo) + 2^64 (Q_hi + H)
+    //
+    // (Q_hi + H does not overflow a 64-bit int)
+    //
+    //   = p_lo + 2^64 p_hi
+
+    const std::uint64_t u_lo = x.f & 0xFFFFFFFFu;
+    const std::uint64_t u_hi = x.f >> 32u;
+    const std::uint64_t v_lo = y.f & 0xFFFFFFFFu;
+    const std::uint64_t v_hi = y.f >> 32u;
+
+    const std::uint64_t p0 = u_lo * v_lo;
+    const std::uint64_t p1 = u_lo * v_hi;
+    const std::uint64_t p2 = u_hi * v_lo;
+    const std::uint64_t p3 = u_hi * v_hi;
+
+    const std::uint64_t p0_hi = p0 >> 32u;
+    const std::uint64_t p1_lo = p1 & 0xFFFFFFFFu;
+    const std::uint64_t p1_hi = p1 >> 32u;
+    const std::uint64_t p2_lo = p2 & 0xFFFFFFFFu;
+    const std::uint64_t p2_hi = p2 >> 32u;
+
+    std::uint64_t Q = p0_hi + p1_lo + p2_lo;
+
+    // The full product might now be computed as
+    //
+    // p_hi = p3 + p2_hi + p1_hi + (Q >> 32)
+    // p_lo = p0_lo + (Q << 32)
+    //
+    // But in this particular case here, the full p_lo is not required.
+    // Effectively we only need to add the highest bit in p_lo to p_hi (and
+    // Q_hi + 1 does not overflow).
+
+    Q += std::uint64_t{1} << (64u - 32u - 1u); // round, ties up
+
+    const std::uint64_t h = p3 + p2_hi + p1_hi + (Q >> 32u);
+
+    return {h, x.e + y.e + 64};
+  }
+
+  /*!
+  @brief normalize x such that the significand is >= 2^(q-1)
+  @pre x.f != 0
+  */
+  static diyfp normalize(diyfp x) noexcept {
+
+    while ((x.f >> 63u) == 0) {
+      x.f <<= 1u;
+      x.e--;
+    }
+
+    return x;
+  }
+
+  /*!
+  @brief normalize x such that the result has the exponent E
+  @pre e >= x.e and the upper e - x.e bits of x.f must be zero.
+  */
+  static diyfp normalize_to(const diyfp &x,
+                            const int target_exponent) noexcept {
+    const int delta = x.e - target_exponent;
+
+    return {x.f << delta, target_exponent};
+  }
+};
+
+struct boundaries {
+  diyfp w;
+  diyfp minus;
+  diyfp plus;
+};
+
+/*!
+Compute the (normalized) diyfp representing the input number 'value' and its
+boundaries.
+@pre value must be finite and positive
+*/
+template <typename FloatType> boundaries compute_boundaries(FloatType value) {
+
+  // Convert the IEEE representation into a diyfp.
+  //
+  // If v is denormal:
+  //      value = 0.F * 2^(1 - bias) = (          F) * 2^(1 - bias - (p-1))
+  // If v is normalized:
+  //      value = 1.F * 2^(E - bias) = (2^(p-1) + F) * 2^(E - bias - (p-1))
+
+  static_assert(std::numeric_limits<FloatType>::is_iec559,
+                "internal error: dtoa_short requires an IEEE-754 "
+                "floating-point implementation");
+
+  constexpr int kPrecision =
+      std::numeric_limits<FloatType>::digits; // = p (includes the hidden bit)
+  constexpr int kBias =
+      std::numeric_limits<FloatType>::max_exponent - 1 + (kPrecision - 1);
+  constexpr int kMinExp = 1 - kBias;
+  constexpr std::uint64_t kHiddenBit = std::uint64_t{1}
+                                       << (kPrecision - 1); // = 2^(p-1)
+
+  using bits_type = typename std::conditional<kPrecision == 24, std::uint32_t,
+                                              std::uint64_t>::type;
+
+  const std::uint64_t bits = reinterpret_bits<bits_type>(value);
+  const std::uint64_t E = bits >> (kPrecision - 1);
+  const std::uint64_t F = bits & (kHiddenBit - 1);
+
+  const bool is_denormal = E == 0;
+  const diyfp v = is_denormal
+                      ? diyfp(F, kMinExp)
+                      : diyfp(F + kHiddenBit, static_cast<int>(E) - kBias);
+
+  // Compute the boundaries m- and m+ of the floating-point value
+  // v = f * 2^e.
+  //
+  // Determine v- and v+, the floating-point predecessor and successor if v,
+  // respectively.
+  //
+  //      v- = v - 2^e        if f != 2^(p-1) or e == e_min                (A)
+  //         = v - 2^(e-1)    if f == 2^(p-1) and e > e_min                (B)
+  //
+  //      v+ = v + 2^e
+  //
+  // Let m- = (v- + v) / 2 and m+ = (v + v+) / 2. All real numbers _strictly_
+  // between m- and m+ round to v, regardless of how the input rounding
+  // algorithm breaks ties.
+  //
+  //      ---+-------------+-------------+-------------+-------------+---  (A)
+  //         v-            m-            v             m+            v+
+  //
+  //      -----------------+------+------+-------------+-------------+---  (B)
+  //                       v-     m-     v             m+            v+
+
+  const bool lower_boundary_is_closer = F == 0 && E > 1;
+  const diyfp m_plus = diyfp(2 * v.f + 1, v.e - 1);
+  const diyfp m_minus = lower_boundary_is_closer
+                            ? diyfp(4 * v.f - 1, v.e - 2)  // (B)
+                            : diyfp(2 * v.f - 1, v.e - 1); // (A)
+
+  // Determine the normalized w+ = m+.
+  const diyfp w_plus = diyfp::normalize(m_plus);
+
+  // Determine w- = m- such that e_(w-) = e_(w+).
+  const diyfp w_minus = diyfp::normalize_to(m_minus, w_plus.e);
+
+  return {diyfp::normalize(v), w_minus, w_plus};
+}
+
+// Given normalized diyfp w, Grisu needs to find a (normalized) cached
+// power-of-ten c, such that the exponent of the product c * w = f * 2^e lies
+// within a certain range [alpha, gamma] (Definition 3.2 from [1])
+//
+//      alpha <= e = e_c + e_w + q <= gamma
+//
+// or
+//
+//      f_c * f_w * 2^alpha <= f_c 2^(e_c) * f_w 2^(e_w) * 2^q
+//                          <= f_c * f_w * 2^gamma
+//
+// Since c and w are normalized, i.e. 2^(q-1) <= f < 2^q, this implies
+//
+//      2^(q-1) * 2^(q-1) * 2^alpha <= c * w * 2^q < 2^q * 2^q * 2^gamma
+//
+// or
+//
+//      2^(q - 2 + alpha) <= c * w < 2^(q + gamma)
+//
+// The choice of (alpha,gamma) determines the size of the table and the form of
+// the digit generation procedure. Using (alpha,gamma)=(-60,-32) works out well
+// in practice:
+//
+// The idea is to cut the number c * w = f * 2^e into two parts, which can be
+// processed independently: An integral part p1, and a fractional part p2:
+//
+//      f * 2^e = ( (f div 2^-e) * 2^-e + (f mod 2^-e) ) * 2^e
+//              = (f div 2^-e) + (f mod 2^-e) * 2^e
+//              = p1 + p2 * 2^e
+//
+// The conversion of p1 into decimal form requires a series of divisions and
+// modulos by (a power of) 10. These operations are faster for 32-bit than for
+// 64-bit integers, so p1 should ideally fit into a 32-bit integer. This can be
+// achieved by choosing
+//
+//      -e >= 32   or   e <= -32 := gamma
+//
+// In order to convert the fractional part
+//
+//      p2 * 2^e = p2 / 2^-e = d[-1] / 10^1 + d[-2] / 10^2 + ...
+//
+// into decimal form, the fraction is repeatedly multiplied by 10 and the digits
+// d[-i] are extracted in order:
+//
+//      (10 * p2) div 2^-e = d[-1]
+//      (10 * p2) mod 2^-e = d[-2] / 10^1 + ...
+//
+// The multiplication by 10 must not overflow. It is sufficient to choose
+//
+//      10 * p2 < 16 * p2 = 2^4 * p2 <= 2^64.
+//
+// Since p2 = f mod 2^-e < 2^-e,
+//
+//      -e <= 60   or   e >= -60 := alpha
+
+constexpr int kAlpha = -60;
+constexpr int kGamma = -32;
+
+struct cached_power // c = f * 2^e ~= 10^k
+{
+  std::uint64_t f;
+  int e;
+  int k;
+};
+
+/*!
+For a normalized diyfp w = f * 2^e, this function returns a (normalized) cached
+power-of-ten c = f_c * 2^e_c, such that the exponent of the product w * c
+satisfies (Definition 3.2 from [1])
+     alpha <= e_c + e + q <= gamma.
+*/
+inline cached_power get_cached_power_for_binary_exponent(int e) {
+  // Now
+  //
+  //      alpha <= e_c + e + q <= gamma                                    (1)
+  //      ==> f_c * 2^alpha <= c * 2^e * 2^q
+  //
+  // and since the c's are normalized, 2^(q-1) <= f_c,
+  //
+  //      ==> 2^(q - 1 + alpha) <= c * 2^(e + q)
+  //      ==> 2^(alpha - e - 1) <= c
+  //
+  // If c were an exact power of ten, i.e. c = 10^k, one may determine k as
+  //
+  //      k = ceil( log_10( 2^(alpha - e - 1) ) )
+  //        = ceil( (alpha - e - 1) * log_10(2) )
+  //
+  // From the paper:
+  // "In theory the result of the procedure could be wrong since c is rounded,
+  //  and the computation itself is approximated [...]. In practice, however,
+  //  this simple function is sufficient."
+  //
+  // For IEEE double precision floating-point numbers converted into
+  // normalized diyfp's w = f * 2^e, with q = 64,
+  //
+  //      e >= -1022      (min IEEE exponent)
+  //           -52        (p - 1)
+  //           -52        (p - 1, possibly normalize denormal IEEE numbers)
+  //           -11        (normalize the diyfp)
+  //         = -1137
+  //
+  // and
+  //
+  //      e <= +1023      (max IEEE exponent)
+  //           -52        (p - 1)
+  //           -11        (normalize the diyfp)
+  //         = 960
+  //
+  // This binary exponent range [-1137,960] results in a decimal exponent
+  // range [-307,324]. One does not need to store a cached power for each
+  // k in this range. For each such k it suffices to find a cached power
+  // such that the exponent of the product lies in [alpha,gamma].
+  // This implies that the difference of the decimal exponents of adjacent
+  // table entries must be less than or equal to
+  //
+  //      floor( (gamma - alpha) * log_10(2) ) = 8.
+  //
+  // (A smaller distance gamma-alpha would require a larger table.)
+
+  // NB:
+  // Actually this function returns c, such that -60 <= e_c + e + 64 <= -34.
+
+  constexpr int kCachedPowersMinDecExp = -300;
+  constexpr int kCachedPowersDecStep = 8;
+
+  static constexpr std::array<cached_power, 79> kCachedPowers = {{
+      {0xAB70FE17C79AC6CA, -1060, -300}, {0xFF77B1FCBEBCDC4F, -1034, -292},
+      {0xBE5691EF416BD60C, -1007, -284}, {0x8DD01FAD907FFC3C, -980, -276},
+      {0xD3515C2831559A83, -954, -268},  {0x9D71AC8FADA6C9B5, -927, -260},
+      {0xEA9C227723EE8BCB, -901, -252},  {0xAECC49914078536D, -874, -244},
+      {0x823C12795DB6CE57, -847, -236},  {0xC21094364DFB5637, -821, -228},
+      {0x9096EA6F3848984F, -794, -220},  {0xD77485CB25823AC7, -768, -212},
+      {0xA086CFCD97BF97F4, -741, -204},  {0xEF340A98172AACE5, -715, -196},
+      {0xB23867FB2A35B28E, -688, -188},  {0x84C8D4DFD2C63F3B, -661, -180},
+      {0xC5DD44271AD3CDBA, -635, -172},  {0x936B9FCEBB25C996, -608, -164},
+      {0xDBAC6C247D62A584, -582, -156},  {0xA3AB66580D5FDAF6, -555, -148},
+      {0xF3E2F893DEC3F126, -529, -140},  {0xB5B5ADA8AAFF80B8, -502, -132},
+      {0x87625F056C7C4A8B, -475, -124},  {0xC9BCFF6034C13053, -449, -116},
+      {0x964E858C91BA2655, -422, -108},  {0xDFF9772470297EBD, -396, -100},
+      {0xA6DFBD9FB8E5B88F, -369, -92},   {0xF8A95FCF88747D94, -343, -84},
+      {0xB94470938FA89BCF, -316, -76},   {0x8A08F0F8BF0F156B, -289, -68},
+      {0xCDB02555653131B6, -263, -60},   {0x993FE2C6D07B7FAC, -236, -52},
+      {0xE45C10C42A2B3B06, -210, -44},   {0xAA242499697392D3, -183, -36},
+      {0xFD87B5F28300CA0E, -157, -28},   {0xBCE5086492111AEB, -130, -20},
+      {0x8CBCCC096F5088CC, -103, -12},   {0xD1B71758E219652C, -77, -4},
+      {0x9C40000000000000, -50, 4},      {0xE8D4A51000000000, -24, 12},
+      {0xAD78EBC5AC620000, 3, 20},       {0x813F3978F8940984, 30, 28},
+      {0xC097CE7BC90715B3, 56, 36},      {0x8F7E32CE7BEA5C70, 83, 44},
+      {0xD5D238A4ABE98068, 109, 52},     {0x9F4F2726179A2245, 136, 60},
+      {0xED63A231D4C4FB27, 162, 68},     {0xB0DE65388CC8ADA8, 189, 76},
+      {0x83C7088E1AAB65DB, 216, 84},     {0xC45D1DF942711D9A, 242, 92},
+      {0x924D692CA61BE758, 269, 100},    {0xDA01EE641A708DEA, 295, 108},
+      {0xA26DA3999AEF774A, 322, 116},    {0xF209787BB47D6B85, 348, 124},
+      {0xB454E4A179DD1877, 375, 132},    {0x865B86925B9BC5C2, 402, 140},
+      {0xC83553C5C8965D3D, 428, 148},    {0x952AB45CFA97A0B3, 455, 156},
+      {0xDE469FBD99A05FE3, 481, 164},    {0xA59BC234DB398C25, 508, 172},
+      {0xF6C69A72A3989F5C, 534, 180},    {0xB7DCBF5354E9BECE, 561, 188},
+      {0x88FCF317F22241E2, 588, 196},    {0xCC20CE9BD35C78A5, 614, 204},
+      {0x98165AF37B2153DF, 641, 212},    {0xE2A0B5DC971F303A, 667, 220},
+      {0xA8D9D1535CE3B396, 694, 228},    {0xFB9B7CD9A4A7443C, 720, 236},
+      {0xBB764C4CA7A44410, 747, 244},    {0x8BAB8EEFB6409C1A, 774, 252},
+      {0xD01FEF10A657842C, 800, 260},    {0x9B10A4E5E9913129, 827, 268},
+      {0xE7109BFBA19C0C9D, 853, 276},    {0xAC2820D9623BF429, 880, 284},
+      {0x80444B5E7AA7CF85, 907, 292},    {0xBF21E44003ACDD2D, 933, 300},
+      {0x8E679C2F5E44FF8F, 960, 308},    {0xD433179D9C8CB841, 986, 316},
+      {0x9E19DB92B4E31BA9, 1013, 324},
+  }};
+
+  // This computation gives exactly the same results for k as
+  //      k = ceil((kAlpha - e - 1) * 0.30102999566398114)
+  // for |e| <= 1500, but doesn't require floating-point operations.
+  // NB: log_10(2) ~= 78913 / 2^18
+  const int f = kAlpha - e - 1;
+  const int k = (f * 78913) / (1 << 18) + static_cast<int>(f > 0);
+
+  const int index = (-kCachedPowersMinDecExp + k + (kCachedPowersDecStep - 1)) /
+                    kCachedPowersDecStep;
+
+  const cached_power cached = kCachedPowers[static_cast<std::size_t>(index)];
+
+  return cached;
+}
+
+/*!
+For n != 0, returns k, such that pow10 := 10^(k-1) <= n < 10^k.
+For n == 0, returns 1 and sets pow10 := 1.
+*/
+inline int find_largest_pow10(const std::uint32_t n, std::uint32_t &pow10) {
+  // LCOV_EXCL_START
+  if (n >= 1000000000) {
+    pow10 = 1000000000;
+    return 10;
+  }
+  // LCOV_EXCL_STOP
+  else if (n >= 100000000) {
+    pow10 = 100000000;
+    return 9;
+  } else if (n >= 10000000) {
+    pow10 = 10000000;
+    return 8;
+  } else if (n >= 1000000) {
+    pow10 = 1000000;
+    return 7;
+  } else if (n >= 100000) {
+    pow10 = 100000;
+    return 6;
+  } else if (n >= 10000) {
+    pow10 = 10000;
+    return 5;
+  } else if (n >= 1000) {
+    pow10 = 1000;
+    return 4;
+  } else if (n >= 100) {
+    pow10 = 100;
+    return 3;
+  } else if (n >= 10) {
+    pow10 = 10;
+    return 2;
+  } else {
+    pow10 = 1;
+    return 1;
+  }
+}
+
+inline void grisu2_round(char *buf, int len, std::uint64_t dist,
+                         std::uint64_t delta, std::uint64_t rest,
+                         std::uint64_t ten_k) {
+
+  //               <--------------------------- delta ---->
+  //                                  <---- dist --------->
+  // --------------[------------------+-------------------]--------------
+  //               M-                 w                   M+
+  //
+  //                                  ten_k
+  //                                <------>
+  //                                       <---- rest ---->
+  // --------------[------------------+----+--------------]--------------
+  //                                  w    V
+  //                                       = buf * 10^k
+  //
+  // ten_k represents a unit-in-the-last-place in the decimal representation
+  // stored in buf.
+  // Decrement buf by ten_k while this takes buf closer to w.
+
+  // The tests are written in this order to avoid overflow in unsigned
+  // integer arithmetic.
+
+  while (rest < dist && delta - rest >= ten_k &&
+         (rest + ten_k < dist || dist - rest > rest + ten_k - dist)) {
+    buf[len - 1]--;
+    rest += ten_k;
+  }
+}
+
+/*!
+Generates V = buffer * 10^decimal_exponent, such that M- <= V <= M+.
+M- and M+ must be normalized and share the same exponent -60 <= e <= -32.
+*/
+inline void grisu2_digit_gen(char *buffer, int &length, int &decimal_exponent,
+                             diyfp M_minus, diyfp w, diyfp M_plus) {
+  static_assert(kAlpha >= -60, "internal error");
+  static_assert(kGamma <= -32, "internal error");
+
+  // Generates the digits (and the exponent) of a decimal floating-point
+  // number V = buffer * 10^decimal_exponent in the range [M-, M+]. The diyfp's
+  // w, M- and M+ share the same exponent e, which satisfies alpha <= e <=
+  // gamma.
+  //
+  //               <--------------------------- delta ---->
+  //                                  <---- dist --------->
+  // --------------[------------------+-------------------]--------------
+  //               M-                 w                   M+
+  //
+  // Grisu2 generates the digits of M+ from left to right and stops as soon as
+  // V is in [M-,M+].
+
+  std::uint64_t delta =
+      diyfp::sub(M_plus, M_minus)
+          .f; // (significand of (M+ - M-), implicit exponent is e)
+  std::uint64_t dist =
+      diyfp::sub(M_plus, w)
+          .f; // (significand of (M+ - w ), implicit exponent is e)
+
+  // Split M+ = f * 2^e into two parts p1 and p2 (note: e < 0):
+  //
+  //      M+ = f * 2^e
+  //         = ((f div 2^-e) * 2^-e + (f mod 2^-e)) * 2^e
+  //         = ((p1        ) * 2^-e + (p2        )) * 2^e
+  //         = p1 + p2 * 2^e
+
+  const diyfp one(std::uint64_t{1} << -M_plus.e, M_plus.e);
+
+  auto p1 = static_cast<std::uint32_t>(
+      M_plus.f >>
+      -one.e); // p1 = f div 2^-e (Since -e >= 32, p1 fits into a 32-bit int.)
+  std::uint64_t p2 = M_plus.f & (one.f - 1); // p2 = f mod 2^-e
+
+  // 1)
+  //
+  // Generate the digits of the integral part p1 = d[n-1]...d[1]d[0]
+
+  std::uint32_t pow10;
+  const int k = find_largest_pow10(p1, pow10);
+
+  //      10^(k-1) <= p1 < 10^k, pow10 = 10^(k-1)
+  //
+  //      p1 = (p1 div 10^(k-1)) * 10^(k-1) + (p1 mod 10^(k-1))
+  //         = (d[k-1]         ) * 10^(k-1) + (p1 mod 10^(k-1))
+  //
+  //      M+ = p1                                             + p2 * 2^e
+  //         = d[k-1] * 10^(k-1) + (p1 mod 10^(k-1))          + p2 * 2^e
+  //         = d[k-1] * 10^(k-1) + ((p1 mod 10^(k-1)) * 2^-e + p2) * 2^e
+  //         = d[k-1] * 10^(k-1) + (                         rest) * 2^e
+  //
+  // Now generate the digits d[n] of p1 from left to right (n = k-1,...,0)
+  //
+  //      p1 = d[k-1]...d[n] * 10^n + d[n-1]...d[0]
+  //
+  // but stop as soon as
+  //
+  //      rest * 2^e = (d[n-1]...d[0] * 2^-e + p2) * 2^e <= delta * 2^e
+
+  int n = k;
+  while (n > 0) {
+    // Invariants:
+    //      M+ = buffer * 10^n + (p1 + p2 * 2^e)    (buffer = 0 for n = k)
+    //      pow10 = 10^(n-1) <= p1 < 10^n
+    //
+    const std::uint32_t d = p1 / pow10; // d = p1 div 10^(n-1)
+    const std::uint32_t r = p1 % pow10; // r = p1 mod 10^(n-1)
+    //
+    //      M+ = buffer * 10^n + (d * 10^(n-1) + r) + p2 * 2^e
+    //         = (buffer * 10 + d) * 10^(n-1) + (r + p2 * 2^e)
+    //
+    buffer[length++] = static_cast<char>('0' + d); // buffer := buffer * 10 + d
+    //
+    //      M+ = buffer * 10^(n-1) + (r + p2 * 2^e)
+    //
+    p1 = r;
+    n--;
+    //
+    //      M+ = buffer * 10^n + (p1 + p2 * 2^e)
+    //      pow10 = 10^n
+    //
+
+    // Now check if enough digits have been generated.
+    // Compute
+    //
+    //      p1 + p2 * 2^e = (p1 * 2^-e + p2) * 2^e = rest * 2^e
+    //
+    // Note:
+    // Since rest and delta share the same exponent e, it suffices to
+    // compare the significands.
+    const std::uint64_t rest = (std::uint64_t{p1} << -one.e) + p2;
+    if (rest <= delta) {
+      // V = buffer * 10^n, with M- <= V <= M+.
+
+      decimal_exponent += n;
+
+      // We may now just stop. But instead look if the buffer could be
+      // decremented to bring V closer to w.
+      //
+      // pow10 = 10^n is now 1 ulp in the decimal representation V.
+      // The rounding procedure works with diyfp's with an implicit
+      // exponent of e.
+      //
+      //      10^n = (10^n * 2^-e) * 2^e = ulp * 2^e
+      //
+      const std::uint64_t ten_n = std::uint64_t{pow10} << -one.e;
+      grisu2_round(buffer, length, dist, delta, rest, ten_n);
+
+      return;
+    }
+
+    pow10 /= 10;
+    //
+    //      pow10 = 10^(n-1) <= p1 < 10^n
+    // Invariants restored.
+  }
+
+  // 2)
+  //
+  // The digits of the integral part have been generated:
+  //
+  //      M+ = d[k-1]...d[1]d[0] + p2 * 2^e
+  //         = buffer            + p2 * 2^e
+  //
+  // Now generate the digits of the fractional part p2 * 2^e.
+  //
+  // Note:
+  // No decimal point is generated: the exponent is adjusted instead.
+  //
+  // p2 actually represents the fraction
+  //
+  //      p2 * 2^e
+  //          = p2 / 2^-e
+  //          = d[-1] / 10^1 + d[-2] / 10^2 + ...
+  //
+  // Now generate the digits d[-m] of p1 from left to right (m = 1,2,...)
+  //
+  //      p2 * 2^e = d[-1]d[-2]...d[-m] * 10^-m
+  //                      + 10^-m * (d[-m-1] / 10^1 + d[-m-2] / 10^2 + ...)
+  //
+  // using
+  //
+  //      10^m * p2 = ((10^m * p2) div 2^-e) * 2^-e + ((10^m * p2) mod 2^-e)
+  //                = (                   d) * 2^-e + (                   r)
+  //
+  // or
+  //      10^m * p2 * 2^e = d + r * 2^e
+  //
+  // i.e.
+  //
+  //      M+ = buffer + p2 * 2^e
+  //         = buffer + 10^-m * (d + r * 2^e)
+  //         = (buffer * 10^m + d) * 10^-m + 10^-m * r * 2^e
+  //
+  // and stop as soon as 10^-m * r * 2^e <= delta * 2^e
+
+  int m = 0;
+  for (;;) {
+    // Invariant:
+    //      M+ = buffer * 10^-m + 10^-m * (d[-m-1] / 10 + d[-m-2] / 10^2 + ...)
+    //      * 2^e
+    //         = buffer * 10^-m + 10^-m * (p2                                 )
+    //         * 2^e = buffer * 10^-m + 10^-m * (1/10 * (10 * p2) ) * 2^e =
+    //         buffer * 10^-m + 10^-m * (1/10 * ((10*p2 div 2^-e) * 2^-e +
+    //         (10*p2 mod 2^-e)) * 2^e
+    //
+    p2 *= 10;
+    const std::uint64_t d = p2 >> -one.e;     // d = (10 * p2) div 2^-e
+    const std::uint64_t r = p2 & (one.f - 1); // r = (10 * p2) mod 2^-e
+    //
+    //      M+ = buffer * 10^-m + 10^-m * (1/10 * (d * 2^-e + r) * 2^e
+    //         = buffer * 10^-m + 10^-m * (1/10 * (d + r * 2^e))
+    //         = (buffer * 10 + d) * 10^(-m-1) + 10^(-m-1) * r * 2^e
+    //
+    buffer[length++] = static_cast<char>('0' + d); // buffer := buffer * 10 + d
+    //
+    //      M+ = buffer * 10^(-m-1) + 10^(-m-1) * r * 2^e
+    //
+    p2 = r;
+    m++;
+    //
+    //      M+ = buffer * 10^-m + 10^-m * p2 * 2^e
+    // Invariant restored.
+
+    // Check if enough digits have been generated.
+    //
+    //      10^-m * p2 * 2^e <= delta * 2^e
+    //              p2 * 2^e <= 10^m * delta * 2^e
+    //                    p2 <= 10^m * delta
+    delta *= 10;
+    dist *= 10;
+    if (p2 <= delta) {
+      break;
+    }
+  }
+
+  // V = buffer * 10^-m, with M- <= V <= M+.
+
+  decimal_exponent -= m;
+
+  // 1 ulp in the decimal representation is now 10^-m.
+  // Since delta and dist are now scaled by 10^m, we need to do the
+  // same with ulp in order to keep the units in sync.
+  //
+  //      10^m * 10^-m = 1 = 2^-e * 2^e = ten_m * 2^e
+  //
+  const std::uint64_t ten_m = one.f;
+  grisu2_round(buffer, length, dist, delta, p2, ten_m);
+
+  // By construction this algorithm generates the shortest possible decimal
+  // number (Loitsch, Theorem 6.2) which rounds back to w.
+  // For an input number of precision p, at least
+  //
+  //      N = 1 + ceil(p * log_10(2))
+  //
+  // decimal digits are sufficient to identify all binary floating-point
+  // numbers (Matula, "In-and-Out conversions").
+  // This implies that the algorithm does not produce more than N decimal
+  // digits.
+  //
+  //      N = 17 for p = 53 (IEEE double precision)
+  //      N = 9  for p = 24 (IEEE single precision)
+}
+
+/*!
+v = buf * 10^decimal_exponent
+len is the length of the buffer (number of decimal digits)
+The buffer must be large enough, i.e. >= max_digits10.
+*/
+inline void grisu2(char *buf, int &len, int &decimal_exponent, diyfp m_minus,
+                   diyfp v, diyfp m_plus) {
+
+  //  --------(-----------------------+-----------------------)--------    (A)
+  //          m-                      v                       m+
+  //
+  //  --------------------(-----------+-----------------------)--------    (B)
+  //                      m-          v                       m+
+  //
+  // First scale v (and m- and m+) such that the exponent is in the range
+  // [alpha, gamma].
+
+  const cached_power cached = get_cached_power_for_binary_exponent(m_plus.e);
+
+  const diyfp c_minus_k(cached.f, cached.e); // = c ~= 10^-k
+
+  // The exponent of the products is = v.e + c_minus_k.e + q and is in the range
+  // [alpha,gamma]
+  const diyfp w = diyfp::mul(v, c_minus_k);
+  const diyfp w_minus = diyfp::mul(m_minus, c_minus_k);
+  const diyfp w_plus = diyfp::mul(m_plus, c_minus_k);
+
+  //  ----(---+---)---------------(---+---)---------------(---+---)----
+  //          w-                      w                       w+
+  //          = c*m-                  = c*v                   = c*m+
+  //
+  // diyfp::mul rounds its result and c_minus_k is approximated too. w, w- and
+  // w+ are now off by a small amount.
+  // In fact:
+  //
+  //      w - v * 10^k < 1 ulp
+  //
+  // To account for this inaccuracy, add resp. subtract 1 ulp.
+  //
+  //  --------+---[---------------(---+---)---------------]---+--------
+  //          w-  M-                  w                   M+  w+
+  //
+  // Now any number in [M-, M+] (bounds included) will round to w when input,
+  // regardless of how the input rounding algorithm breaks ties.
+  //
+  // And digit_gen generates the shortest possible such number in [M-, M+].
+  // Note that this does not mean that Grisu2 always generates the shortest
+  // possible number in the interval (m-, m+).
+  const diyfp M_minus(w_minus.f + 1, w_minus.e);
+  const diyfp M_plus(w_plus.f - 1, w_plus.e);
+
+  decimal_exponent = -cached.k; // = -(-k) = k
+
+  grisu2_digit_gen(buf, len, decimal_exponent, M_minus, w, M_plus);
+}
+
+/*!
+v = buf * 10^decimal_exponent
+len is the length of the buffer (number of decimal digits)
+The buffer must be large enough, i.e. >= max_digits10.
+*/
+template <typename FloatType>
+void grisu2(char *buf, int &len, int &decimal_exponent, FloatType value) {
+  static_assert(diyfp::kPrecision >= std::numeric_limits<FloatType>::digits + 3,
+                "internal error: not enough precision");
+
+  // If the neighbors (and boundaries) of 'value' are always computed for
+  // double-precision numbers, all float's can be recovered using strtod (and
+  // strtof). However, the resulting decimal representations are not exactly
+  // "short".
+  //
+  // The documentation for 'std::to_chars'
+  // (https://en.cppreference.com/w/cpp/utility/to_chars) says "value is
+  // converted to a string as if by std::sprintf in the default ("C") locale"
+  // and since sprintf promotes float's to double's, I think this is exactly
+  // what 'std::to_chars' does. On the other hand, the documentation for
+  // 'std::to_chars' requires that "parsing the representation using the
+  // corresponding std::from_chars function recovers value exactly". That
+  // indicates that single precision floating-point numbers should be recovered
+  // using 'std::strtof'.
+  //
+  // NB: If the neighbors are computed for single-precision numbers, there is a
+  // single float
+  //     (7.0385307e-26f) which can't be recovered using strtod. The resulting
+  //     double precision value is off by 1 ulp.
+#if 0
+    const boundaries w = compute_boundaries(static_cast<double>(value));
+#else
+  const boundaries w = compute_boundaries(value);
+#endif
+
+  grisu2(buf, len, decimal_exponent, w.minus, w.w, w.plus);
+}
+
+/*!
+@brief appends a decimal representation of e to buf
+@return a pointer to the element following the exponent.
+@pre -1000 < e < 1000
+*/
+inline char *append_exponent(char *buf, int e) {
+
+  if (e < 0) {
+    e = -e;
+    *buf++ = '-';
+  } else {
+    *buf++ = '+';
+  }
+
+  auto k = static_cast<std::uint32_t>(e);
+  if (k < 10) {
+    // Always print at least two digits in the exponent.
+    // This is for compatibility with printf("%g").
+    *buf++ = '0';
+    *buf++ = static_cast<char>('0' + k);
+  } else if (k < 100) {
+    *buf++ = static_cast<char>('0' + k / 10);
+    k %= 10;
+    *buf++ = static_cast<char>('0' + k);
+  } else {
+    *buf++ = static_cast<char>('0' + k / 100);
+    k %= 100;
+    *buf++ = static_cast<char>('0' + k / 10);
+    k %= 10;
+    *buf++ = static_cast<char>('0' + k);
+  }
+
+  return buf;
+}
+
+/*!
+@brief prettify v = buf * 10^decimal_exponent
+If v is in the range [10^min_exp, 10^max_exp) it will be printed in fixed-point
+notation. Otherwise it will be printed in exponential notation.
+@pre min_exp < 0
+@pre max_exp > 0
+*/
+inline char *format_buffer(char *buf, int len, int decimal_exponent,
+                           int min_exp, int max_exp) {
+
+  const int k = len;
+  const int n = len + decimal_exponent;
+
+  // v = buf * 10^(n-k)
+  // k is the length of the buffer (number of decimal digits)
+  // n is the position of the decimal point relative to the start of the buffer.
+
+  if (k <= n && n <= max_exp) {
+    // digits[000]
+    // len <= max_exp + 2
+
+    std::memset(buf + k, '0', static_cast<size_t>(n) - static_cast<size_t>(k));
+    // Make it look like a floating-point number (#362, #378)
+    // buf[n + 0] = '.';
+    // buf[n + 1] = '0';
+    return buf + (static_cast<size_t>(n));
+  }
+
+  if (0 < n && n <= max_exp) {
+    // dig.its
+    // len <= max_digits10 + 1
+    std::memmove(buf + (static_cast<size_t>(n) + 1), buf + n,
+                 static_cast<size_t>(k) - static_cast<size_t>(n));
+    buf[n] = '.';
+    return buf + (static_cast<size_t>(k) + 1U);
+  }
+
+  if (min_exp < n && n <= 0) {
+    // 0.[000]digits
+    // len <= 2 + (-min_exp - 1) + max_digits10
+
+    std::memmove(buf + (2 + static_cast<size_t>(-n)), buf,
+                 static_cast<size_t>(k));
+    buf[0] = '0';
+    buf[1] = '.';
+    std::memset(buf + 2, '0', static_cast<size_t>(-n));
+    return buf + (2U + static_cast<size_t>(-n) + static_cast<size_t>(k));
+  }
+
+  if (k == 1) {
+    // dE+123
+    // len <= 1 + 5
+
+    buf += 1;
+  } else {
+    // d.igitsE+123
+    // len <= max_digits10 + 1 + 5
+
+    std::memmove(buf + 2, buf + 1, static_cast<size_t>(k) - 1);
+    buf[1] = '.';
+    buf += 1 + static_cast<size_t>(k);
+  }
+
+  *buf++ = 'e';
+  return append_exponent(buf, n - 1);
+}
+
+} // namespace dtoa_impl
+
+/*!
+The format of the resulting decimal representation is similar to printf's %g
+format. Returns an iterator pointing past-the-end of the decimal representation.
+@note The input number must be finite, i.e. NaN's and Inf's are not supported.
+@note The buffer must be large enough.
+@note The result is NOT null-terminated.
+*/
+char *to_chars(char *first, const char *last, double value) {
+  static_cast<void>(last); // maybe unused - fix warning
+  bool negative = std::signbit(value);
+  if (negative) {
+    value = -value;
+    *first++ = '-';
+  }
+
+  if (value == 0) // +-0
+  {
+    *first++ = '0';
+    // Make it look like a floating-point number (#362, #378)
+    if(negative) {
+      *first++ = '.';
+      *first++ = '0';
+    }
+    return first;
+  }
+  // Compute v = buffer * 10^decimal_exponent.
+  // The decimal digits are stored in the buffer, which needs to be interpreted
+  // as an unsigned decimal integer.
+  // len is the length of the buffer, i.e. the number of decimal digits.
+  int len = 0;
+  int decimal_exponent = 0;
+  dtoa_impl::grisu2(first, len, decimal_exponent, value);
+  // Format the buffer like printf("%.*g", prec, value)
+  constexpr int kMinExp = -4;
+  constexpr int kMaxExp = std::numeric_limits<double>::digits10;
+
+  return dtoa_impl::format_buffer(first, len, decimal_exponent, kMinExp,
+                                  kMaxExp);
+}
+} // namespace internal
+} // namespace simdjson
+/* end file src/to_chars.cpp */
+/* begin file src/from_chars.cpp */
+#include <limits>
+namespace simdjson {
+namespace internal {
+
+/**
+ * The code in the internal::from_chars function is meant to handle the floating-point number parsing
+ * when we have more than 19 digits in the decimal mantissa. This should only be seen
+ * in adversarial scenarios: we do not expect production systems to even produce
+ * such floating-point numbers.
+ *
+ * The parser is based on work by Nigel Tao (at https://github.com/google/wuffs/)
+ * who credits Ken Thompson for the design (via a reference to the Go source
+ * code). See
+ * https://github.com/google/wuffs/blob/aa46859ea40c72516deffa1b146121952d6dfd3b/internal/cgen/base/floatconv-submodule-data.c
+ * https://github.com/google/wuffs/blob/46cd8105f47ca07ae2ba8e6a7818ef9c0df6c152/internal/cgen/base/floatconv-submodule-code.c
+ * It is probably not very fast but it is a fallback that should almost never be
+ * called in real life. Google Wuffs is published under APL 2.0.
+ **/
+
+namespace {
+constexpr uint32_t max_digits = 768;
+constexpr int32_t decimal_point_range = 2047;
+} // namespace
+
+struct adjusted_mantissa {
+  uint64_t mantissa;
+  int power2;
+  adjusted_mantissa() : mantissa(0), power2(0) {}
+};
+
+struct decimal {
+  uint32_t num_digits;
+  int32_t decimal_point;
+  bool negative;
+  bool truncated;
+  uint8_t digits[max_digits];
+};
+
+template <typename T> struct binary_format {
+  static constexpr int mantissa_explicit_bits();
+  static constexpr int minimum_exponent();
+  static constexpr int infinite_power();
+  static constexpr int sign_index();
+};
+
+template <> constexpr int binary_format<double>::mantissa_explicit_bits() {
+  return 52;
+}
+
+template <> constexpr int binary_format<double>::minimum_exponent() {
+  return -1023;
+}
+template <> constexpr int binary_format<double>::infinite_power() {
+  return 0x7FF;
+}
+
+template <> constexpr int binary_format<double>::sign_index() { return 63; }
+
+bool is_integer(char c)  noexcept  { return (c >= '0' && c <= '9'); }
+
+// This should always succeed since it follows a call to parse_number.
+decimal parse_decimal(const char *&p) noexcept {
+  decimal answer;
+  answer.num_digits = 0;
+  answer.decimal_point = 0;
+  answer.truncated = false;
+  answer.negative = (*p == '-');
+  if ((*p == '-') || (*p == '+')) {
+    ++p;
+  }
+
+  while (*p == '0') {
+    ++p;
+  }
+  while (is_integer(*p)) {
+    if (answer.num_digits < max_digits) {
+      answer.digits[answer.num_digits] = uint8_t(*p - '0');
+    }
+    answer.num_digits++;
+    ++p;
+  }
+  if (*p == '.') {
+    ++p;
+    const char *first_after_period = p;
+    // if we have not yet encountered a zero, we have to skip it as well
+    if (answer.num_digits == 0) {
+      // skip zeros
+      while (*p == '0') {
+        ++p;
+      }
+    }
+    while (is_integer(*p)) {
+      if (answer.num_digits < max_digits) {
+        answer.digits[answer.num_digits] = uint8_t(*p - '0');
+      }
+      answer.num_digits++;
+      ++p;
+    }
+    answer.decimal_point = int32_t(first_after_period - p);
+  }
+  if(answer.num_digits > 0) {
+    const char *preverse = p - 1;
+    int32_t trailing_zeros = 0;
+    while ((*preverse == '0') || (*preverse == '.')) {
+      if(*preverse == '0') { trailing_zeros++; };
+      --preverse;
+    }
+    answer.decimal_point += int32_t(answer.num_digits);
+    answer.num_digits -= uint32_t(trailing_zeros);
+  }
+  if(answer.num_digits > max_digits ) {
+    answer.num_digits = max_digits;
+    answer.truncated = true;
+  }
+  if (('e' == *p) || ('E' == *p)) {
+    ++p;
+    bool neg_exp = false;
+    if ('-' == *p) {
+      neg_exp = true;
+      ++p;
+    } else if ('+' == *p) {
+      ++p;
+    }
+    int32_t exp_number = 0; // exponential part
+    while (is_integer(*p)) {
+      uint8_t digit = uint8_t(*p - '0');
+      if (exp_number < 0x10000) {
+        exp_number = 10 * exp_number + digit;
+      }
+      ++p;
+    }
+    answer.decimal_point += (neg_exp ? -exp_number : exp_number);
+  }
+  return answer;
+}
+
+// This should always succeed since it follows a call to parse_number.
+// Will not read at or beyond the "end" pointer.
+decimal parse_decimal(const char *&p, const char * end) noexcept {
+  decimal answer;
+  answer.num_digits = 0;
+  answer.decimal_point = 0;
+  answer.truncated = false;
+  if(p == end) { return answer; } // should never happen
+  answer.negative = (*p == '-');
+  if ((*p == '-') || (*p == '+')) {
+    ++p;
+  }
+
+  while ((p != end) && (*p == '0')) {
+    ++p;
+  }
+  while ((p != end) && is_integer(*p)) {
+    if (answer.num_digits < max_digits) {
+      answer.digits[answer.num_digits] = uint8_t(*p - '0');
+    }
+    answer.num_digits++;
+    ++p;
+  }
+  if ((p != end) && (*p == '.')) {
+    ++p;
+    if(p == end) { return answer; } // should never happen
+    const char *first_after_period = p;
+    // if we have not yet encountered a zero, we have to skip it as well
+    if (answer.num_digits == 0) {
+      // skip zeros
+      while (*p == '0') {
+        ++p;
+      }
+    }
+    while ((p != end) && is_integer(*p)) {
+      if (answer.num_digits < max_digits) {
+        answer.digits[answer.num_digits] = uint8_t(*p - '0');
+      }
+      answer.num_digits++;
+      ++p;
+    }
+    answer.decimal_point = int32_t(first_after_period - p);
+  }
+  if(answer.num_digits > 0) {
+    const char *preverse = p - 1;
+    int32_t trailing_zeros = 0;
+    while ((*preverse == '0') || (*preverse == '.')) {
+      if(*preverse == '0') { trailing_zeros++; };
+      --preverse;
+    }
+    answer.decimal_point += int32_t(answer.num_digits);
+    answer.num_digits -= uint32_t(trailing_zeros);
+  }
+  if(answer.num_digits > max_digits ) {
+    answer.num_digits = max_digits;
+    answer.truncated = true;
+  }
+  if ((p != end) && (('e' == *p) || ('E' == *p))) {
+    ++p;
+    if(p == end) { return answer; } // should never happen
+    bool neg_exp = false;
+    if ('-' == *p) {
+      neg_exp = true;
+      ++p;
+    } else if ('+' == *p) {
+      ++p;
+    }
+    int32_t exp_number = 0; // exponential part
+    while ((p != end) && is_integer(*p)) {
+      uint8_t digit = uint8_t(*p - '0');
+      if (exp_number < 0x10000) {
+        exp_number = 10 * exp_number + digit;
+      }
+      ++p;
+    }
+    answer.decimal_point += (neg_exp ? -exp_number : exp_number);
+  }
+  return answer;
+}
+
+namespace {
+
+// remove all final zeroes
+inline void trim(decimal &h) {
+  while ((h.num_digits > 0) && (h.digits[h.num_digits - 1] == 0)) {
+    h.num_digits--;
+  }
+}
+
+uint32_t number_of_digits_decimal_left_shift(decimal &h, uint32_t shift) {
+  shift &= 63;
+  const static uint16_t number_of_digits_decimal_left_shift_table[65] = {
+      0x0000, 0x0800, 0x0801, 0x0803, 0x1006, 0x1009, 0x100D, 0x1812, 0x1817,
+      0x181D, 0x2024, 0x202B, 0x2033, 0x203C, 0x2846, 0x2850, 0x285B, 0x3067,
+      0x3073, 0x3080, 0x388E, 0x389C, 0x38AB, 0x38BB, 0x40CC, 0x40DD, 0x40EF,
+      0x4902, 0x4915, 0x4929, 0x513E, 0x5153, 0x5169, 0x5180, 0x5998, 0x59B0,
+      0x59C9, 0x61E3, 0x61FD, 0x6218, 0x6A34, 0x6A50, 0x6A6D, 0x6A8B, 0x72AA,
+      0x72C9, 0x72E9, 0x7B0A, 0x7B2B, 0x7B4D, 0x8370, 0x8393, 0x83B7, 0x83DC,
+      0x8C02, 0x8C28, 0x8C4F, 0x9477, 0x949F, 0x94C8, 0x9CF2, 0x051C, 0x051C,
+      0x051C, 0x051C,
+  };
+  uint32_t x_a = number_of_digits_decimal_left_shift_table[shift];
+  uint32_t x_b = number_of_digits_decimal_left_shift_table[shift + 1];
+  uint32_t num_new_digits = x_a >> 11;
+  uint32_t pow5_a = 0x7FF & x_a;
+  uint32_t pow5_b = 0x7FF & x_b;
+  const static uint8_t
+      number_of_digits_decimal_left_shift_table_powers_of_5[0x051C] = {
+          5, 2, 5, 1, 2, 5, 6, 2, 5, 3, 1, 2, 5, 1, 5, 6, 2, 5, 7, 8, 1, 2, 5,
+          3, 9, 0, 6, 2, 5, 1, 9, 5, 3, 1, 2, 5, 9, 7, 6, 5, 6, 2, 5, 4, 8, 8,
+          2, 8, 1, 2, 5, 2, 4, 4, 1, 4, 0, 6, 2, 5, 1, 2, 2, 0, 7, 0, 3, 1, 2,
+          5, 6, 1, 0, 3, 5, 1, 5, 6, 2, 5, 3, 0, 5, 1, 7, 5, 7, 8, 1, 2, 5, 1,
+          5, 2, 5, 8, 7, 8, 9, 0, 6, 2, 5, 7, 6, 2, 9, 3, 9, 4, 5, 3, 1, 2, 5,
+          3, 8, 1, 4, 6, 9, 7, 2, 6, 5, 6, 2, 5, 1, 9, 0, 7, 3, 4, 8, 6, 3, 2,
+          8, 1, 2, 5, 9, 5, 3, 6, 7, 4, 3, 1, 6, 4, 0, 6, 2, 5, 4, 7, 6, 8, 3,
+          7, 1, 5, 8, 2, 0, 3, 1, 2, 5, 2, 3, 8, 4, 1, 8, 5, 7, 9, 1, 0, 1, 5,
+          6, 2, 5, 1, 1, 9, 2, 0, 9, 2, 8, 9, 5, 5, 0, 7, 8, 1, 2, 5, 5, 9, 6,
+          0, 4, 6, 4, 4, 7, 7, 5, 3, 9, 0, 6, 2, 5, 2, 9, 8, 0, 2, 3, 2, 2, 3,
+          8, 7, 6, 9, 5, 3, 1, 2, 5, 1, 4, 9, 0, 1, 1, 6, 1, 1, 9, 3, 8, 4, 7,
+          6, 5, 6, 2, 5, 7, 4, 5, 0, 5, 8, 0, 5, 9, 6, 9, 2, 3, 8, 2, 8, 1, 2,
+          5, 3, 7, 2, 5, 2, 9, 0, 2, 9, 8, 4, 6, 1, 9, 1, 4, 0, 6, 2, 5, 1, 8,
+          6, 2, 6, 4, 5, 1, 4, 9, 2, 3, 0, 9, 5, 7, 0, 3, 1, 2, 5, 9, 3, 1, 3,
+          2, 2, 5, 7, 4, 6, 1, 5, 4, 7, 8, 5, 1, 5, 6, 2, 5, 4, 6, 5, 6, 6, 1,
+          2, 8, 7, 3, 0, 7, 7, 3, 9, 2, 5, 7, 8, 1, 2, 5, 2, 3, 2, 8, 3, 0, 6,
+          4, 3, 6, 5, 3, 8, 6, 9, 6, 2, 8, 9, 0, 6, 2, 5, 1, 1, 6, 4, 1, 5, 3,
+          2, 1, 8, 2, 6, 9, 3, 4, 8, 1, 4, 4, 5, 3, 1, 2, 5, 5, 8, 2, 0, 7, 6,
+          6, 0, 9, 1, 3, 4, 6, 7, 4, 0, 7, 2, 2, 6, 5, 6, 2, 5, 2, 9, 1, 0, 3,
+          8, 3, 0, 4, 5, 6, 7, 3, 3, 7, 0, 3, 6, 1, 3, 2, 8, 1, 2, 5, 1, 4, 5,
+          5, 1, 9, 1, 5, 2, 2, 8, 3, 6, 6, 8, 5, 1, 8, 0, 6, 6, 4, 0, 6, 2, 5,
+          7, 2, 7, 5, 9, 5, 7, 6, 1, 4, 1, 8, 3, 4, 2, 5, 9, 0, 3, 3, 2, 0, 3,
+          1, 2, 5, 3, 6, 3, 7, 9, 7, 8, 8, 0, 7, 0, 9, 1, 7, 1, 2, 9, 5, 1, 6,
+          6, 0, 1, 5, 6, 2, 5, 1, 8, 1, 8, 9, 8, 9, 4, 0, 3, 5, 4, 5, 8, 5, 6,
+          4, 7, 5, 8, 3, 0, 0, 7, 8, 1, 2, 5, 9, 0, 9, 4, 9, 4, 7, 0, 1, 7, 7,
+          2, 9, 2, 8, 2, 3, 7, 9, 1, 5, 0, 3, 9, 0, 6, 2, 5, 4, 5, 4, 7, 4, 7,
+          3, 5, 0, 8, 8, 6, 4, 6, 4, 1, 1, 8, 9, 5, 7, 5, 1, 9, 5, 3, 1, 2, 5,
+          2, 2, 7, 3, 7, 3, 6, 7, 5, 4, 4, 3, 2, 3, 2, 0, 5, 9, 4, 7, 8, 7, 5,
+          9, 7, 6, 5, 6, 2, 5, 1, 1, 3, 6, 8, 6, 8, 3, 7, 7, 2, 1, 6, 1, 6, 0,
+          2, 9, 7, 3, 9, 3, 7, 9, 8, 8, 2, 8, 1, 2, 5, 5, 6, 8, 4, 3, 4, 1, 8,
+          8, 6, 0, 8, 0, 8, 0, 1, 4, 8, 6, 9, 6, 8, 9, 9, 4, 1, 4, 0, 6, 2, 5,
+          2, 8, 4, 2, 1, 7, 0, 9, 4, 3, 0, 4, 0, 4, 0, 0, 7, 4, 3, 4, 8, 4, 4,
+          9, 7, 0, 7, 0, 3, 1, 2, 5, 1, 4, 2, 1, 0, 8, 5, 4, 7, 1, 5, 2, 0, 2,
+          0, 0, 3, 7, 1, 7, 4, 2, 2, 4, 8, 5, 3, 5, 1, 5, 6, 2, 5, 7, 1, 0, 5,
+          4, 2, 7, 3, 5, 7, 6, 0, 1, 0, 0, 1, 8, 5, 8, 7, 1, 1, 2, 4, 2, 6, 7,
+          5, 7, 8, 1, 2, 5, 3, 5, 5, 2, 7, 1, 3, 6, 7, 8, 8, 0, 0, 5, 0, 0, 9,
+          2, 9, 3, 5, 5, 6, 2, 1, 3, 3, 7, 8, 9, 0, 6, 2, 5, 1, 7, 7, 6, 3, 5,
+          6, 8, 3, 9, 4, 0, 0, 2, 5, 0, 4, 6, 4, 6, 7, 7, 8, 1, 0, 6, 6, 8, 9,
+          4, 5, 3, 1, 2, 5, 8, 8, 8, 1, 7, 8, 4, 1, 9, 7, 0, 0, 1, 2, 5, 2, 3,
+          2, 3, 3, 8, 9, 0, 5, 3, 3, 4, 4, 7, 2, 6, 5, 6, 2, 5, 4, 4, 4, 0, 8,
+          9, 2, 0, 9, 8, 5, 0, 0, 6, 2, 6, 1, 6, 1, 6, 9, 4, 5, 2, 6, 6, 7, 2,
+          3, 6, 3, 2, 8, 1, 2, 5, 2, 2, 2, 0, 4, 4, 6, 0, 4, 9, 2, 5, 0, 3, 1,
+          3, 0, 8, 0, 8, 4, 7, 2, 6, 3, 3, 3, 6, 1, 8, 1, 6, 4, 0, 6, 2, 5, 1,
+          1, 1, 0, 2, 2, 3, 0, 2, 4, 6, 2, 5, 1, 5, 6, 5, 4, 0, 4, 2, 3, 6, 3,
+          1, 6, 6, 8, 0, 9, 0, 8, 2, 0, 3, 1, 2, 5, 5, 5, 5, 1, 1, 1, 5, 1, 2,
+          3, 1, 2, 5, 7, 8, 2, 7, 0, 2, 1, 1, 8, 1, 5, 8, 3, 4, 0, 4, 5, 4, 1,
+          0, 1, 5, 6, 2, 5, 2, 7, 7, 5, 5, 5, 7, 5, 6, 1, 5, 6, 2, 8, 9, 1, 3,
+          5, 1, 0, 5, 9, 0, 7, 9, 1, 7, 0, 2, 2, 7, 0, 5, 0, 7, 8, 1, 2, 5, 1,
+          3, 8, 7, 7, 7, 8, 7, 8, 0, 7, 8, 1, 4, 4, 5, 6, 7, 5, 5, 2, 9, 5, 3,
+          9, 5, 8, 5, 1, 1, 3, 5, 2, 5, 3, 9, 0, 6, 2, 5, 6, 9, 3, 8, 8, 9, 3,
+          9, 0, 3, 9, 0, 7, 2, 2, 8, 3, 7, 7, 6, 4, 7, 6, 9, 7, 9, 2, 5, 5, 6,
+          7, 6, 2, 6, 9, 5, 3, 1, 2, 5, 3, 4, 6, 9, 4, 4, 6, 9, 5, 1, 9, 5, 3,
+          6, 1, 4, 1, 8, 8, 8, 2, 3, 8, 4, 8, 9, 6, 2, 7, 8, 3, 8, 1, 3, 4, 7,
+          6, 5, 6, 2, 5, 1, 7, 3, 4, 7, 2, 3, 4, 7, 5, 9, 7, 6, 8, 0, 7, 0, 9,
+          4, 4, 1, 1, 9, 2, 4, 4, 8, 1, 3, 9, 1, 9, 0, 6, 7, 3, 8, 2, 8, 1, 2,
+          5, 8, 6, 7, 3, 6, 1, 7, 3, 7, 9, 8, 8, 4, 0, 3, 5, 4, 7, 2, 0, 5, 9,
+          6, 2, 2, 4, 0, 6, 9, 5, 9, 5, 3, 3, 6, 9, 1, 4, 0, 6, 2, 5,
+      };
+  const uint8_t *pow5 =
+      &number_of_digits_decimal_left_shift_table_powers_of_5[pow5_a];
+  uint32_t i = 0;
+  uint32_t n = pow5_b - pow5_a;
+  for (; i < n; i++) {
+    if (i >= h.num_digits) {
+      return num_new_digits - 1;
+    } else if (h.digits[i] == pow5[i]) {
+      continue;
+    } else if (h.digits[i] < pow5[i]) {
+      return num_new_digits - 1;
+    } else {
+      return num_new_digits;
+    }
+  }
+  return num_new_digits;
+}
+
+} // end of anonymous namespace
+
+uint64_t round(decimal &h) {
+  if ((h.num_digits == 0) || (h.decimal_point < 0)) {
+    return 0;
+  } else if (h.decimal_point > 18) {
+    return UINT64_MAX;
+  }
+  // at this point, we know that h.decimal_point >= 0
+  uint32_t dp = uint32_t(h.decimal_point);
+  uint64_t n = 0;
+  for (uint32_t i = 0; i < dp; i++) {
+    n = (10 * n) + ((i < h.num_digits) ? h.digits[i] : 0);
+  }
+  bool round_up = false;
+  if (dp < h.num_digits) {
+    round_up = h.digits[dp] >= 5; // normally, we round up
+    // but we may need to round to even!
+    if ((h.digits[dp] == 5) && (dp + 1 == h.num_digits)) {
+      round_up = h.truncated || ((dp > 0) && (1 & h.digits[dp - 1]));
+    }
+  }
+  if (round_up) {
+    n++;
+  }
+  return n;
+}
+
+// computes h * 2^-shift
+void decimal_left_shift(decimal &h, uint32_t shift) {
+  if (h.num_digits == 0) {
+    return;
+  }
+  uint32_t num_new_digits = number_of_digits_decimal_left_shift(h, shift);
+  int32_t read_index = int32_t(h.num_digits - 1);
+  uint32_t write_index = h.num_digits - 1 + num_new_digits;
+  uint64_t n = 0;
+
+  while (read_index >= 0) {
+    n += uint64_t(h.digits[read_index]) << shift;
+    uint64_t quotient = n / 10;
+    uint64_t remainder = n - (10 * quotient);
+    if (write_index < max_digits) {
+      h.digits[write_index] = uint8_t(remainder);
+    } else if (remainder > 0) {
+      h.truncated = true;
+    }
+    n = quotient;
+    write_index--;
+    read_index--;
+  }
+  while (n > 0) {
+    uint64_t quotient = n / 10;
+    uint64_t remainder = n - (10 * quotient);
+    if (write_index < max_digits) {
+      h.digits[write_index] = uint8_t(remainder);
+    } else if (remainder > 0) {
+      h.truncated = true;
+    }
+    n = quotient;
+    write_index--;
+  }
+  h.num_digits += num_new_digits;
+  if (h.num_digits > max_digits) {
+    h.num_digits = max_digits;
+  }
+  h.decimal_point += int32_t(num_new_digits);
+  trim(h);
+}
+
+// computes h * 2^shift
+void decimal_right_shift(decimal &h, uint32_t shift) {
+  uint32_t read_index = 0;
+  uint32_t write_index = 0;
+
+  uint64_t n = 0;
+
+  while ((n >> shift) == 0) {
+    if (read_index < h.num_digits) {
+      n = (10 * n) + h.digits[read_index++];
+    } else if (n == 0) {
+      return;
+    } else {
+      while ((n >> shift) == 0) {
+        n = 10 * n;
+        read_index++;
+      }
+      break;
+    }
+  }
+  h.decimal_point -= int32_t(read_index - 1);
+  if (h.decimal_point < -decimal_point_range) { // it is zero
+    h.num_digits = 0;
+    h.decimal_point = 0;
+    h.negative = false;
+    h.truncated = false;
+    return;
+  }
+  uint64_t mask = (uint64_t(1) << shift) - 1;
+  while (read_index < h.num_digits) {
+    uint8_t new_digit = uint8_t(n >> shift);
+    n = (10 * (n & mask)) + h.digits[read_index++];
+    h.digits[write_index++] = new_digit;
+  }
+  while (n > 0) {
+    uint8_t new_digit = uint8_t(n >> shift);
+    n = 10 * (n & mask);
+    if (write_index < max_digits) {
+      h.digits[write_index++] = new_digit;
+    } else if (new_digit > 0) {
+      h.truncated = true;
+    }
+  }
+  h.num_digits = write_index;
+  trim(h);
+}
+
+template <typename binary> adjusted_mantissa compute_float(decimal &d) {
+  adjusted_mantissa answer;
+  if (d.num_digits == 0) {
+    // should be zero
+    answer.power2 = 0;
+    answer.mantissa = 0;
+    return answer;
+  }
+  // At this point, going further, we can assume that d.num_digits > 0.
+  // We want to guard against excessive decimal point values because
+  // they can result in long running times. Indeed, we do
+  // shifts by at most 60 bits. We have that log(10**400)/log(2**60) ~= 22
+  // which is fine, but log(10**299995)/log(2**60) ~= 16609 which is not
+  // fine (runs for a long time).
+  //
+  if(d.decimal_point < -324) {
+    // We have something smaller than 1e-324 which is always zero
+    // in binary64 and binary32.
+    // It should be zero.
+    answer.power2 = 0;
+    answer.mantissa = 0;
+    return answer;
+  } else if(d.decimal_point >= 310) {
+    // We have something at least as large as 0.1e310 which is
+    // always infinite.
+    answer.power2 = binary::infinite_power();
+    answer.mantissa = 0;
+    return answer;
+  }
+
+  static const uint32_t max_shift = 60;
+  static const uint32_t num_powers = 19;
+  static const uint8_t powers[19] = {
+      0,  3,  6,  9,  13, 16, 19, 23, 26, 29, //
+      33, 36, 39, 43, 46, 49, 53, 56, 59,     //
+  };
+  int32_t exp2 = 0;
+  while (d.decimal_point > 0) {
+    uint32_t n = uint32_t(d.decimal_point);
+    uint32_t shift = (n < num_powers) ? powers[n] : max_shift;
+    decimal_right_shift(d, shift);
+    if (d.decimal_point < -decimal_point_range) {
+      // should be zero
+      answer.power2 = 0;
+      answer.mantissa = 0;
+      return answer;
+    }
+    exp2 += int32_t(shift);
+  }
+  // We shift left toward [1/2 ... 1].
+  while (d.decimal_point <= 0) {
+    uint32_t shift;
+    if (d.decimal_point == 0) {
+      if (d.digits[0] >= 5) {
+        break;
+      }
+      shift = (d.digits[0] < 2) ? 2 : 1;
+    } else {
+      uint32_t n = uint32_t(-d.decimal_point);
+      shift = (n < num_powers) ? powers[n] : max_shift;
+    }
+    decimal_left_shift(d, shift);
+    if (d.decimal_point > decimal_point_range) {
+      // we want to get infinity:
+      answer.power2 = 0xFF;
+      answer.mantissa = 0;
+      return answer;
+    }
+    exp2 -= int32_t(shift);
+  }
+  // We are now in the range [1/2 ... 1] but the binary format uses [1 ... 2].
+  exp2--;
+  constexpr int32_t minimum_exponent = binary::minimum_exponent();
+  while ((minimum_exponent + 1) > exp2) {
+    uint32_t n = uint32_t((minimum_exponent + 1) - exp2);
+    if (n > max_shift) {
+      n = max_shift;
+    }
+    decimal_right_shift(d, n);
+    exp2 += int32_t(n);
+  }
+  if ((exp2 - minimum_exponent) >= binary::infinite_power()) {
+    answer.power2 = binary::infinite_power();
+    answer.mantissa = 0;
+    return answer;
+  }
+
+  const int mantissa_size_in_bits = binary::mantissa_explicit_bits() + 1;
+  decimal_left_shift(d, mantissa_size_in_bits);
+
+  uint64_t mantissa = round(d);
+  // It is possible that we have an overflow, in which case we need
+  // to shift back.
+  if (mantissa >= (uint64_t(1) << mantissa_size_in_bits)) {
+    decimal_right_shift(d, 1);
+    exp2 += 1;
+    mantissa = round(d);
+    if ((exp2 - minimum_exponent) >= binary::infinite_power()) {
+      answer.power2 = binary::infinite_power();
+      answer.mantissa = 0;
+      return answer;
+    }
+  }
+  answer.power2 = exp2 - binary::minimum_exponent();
+  if (mantissa < (uint64_t(1) << binary::mantissa_explicit_bits())) {
+    answer.power2--;
+  }
+  answer.mantissa =
+      mantissa & ((uint64_t(1) << binary::mantissa_explicit_bits()) - 1);
+  return answer;
+}
+
+template <typename binary>
+adjusted_mantissa parse_long_mantissa(const char *first) {
+  decimal d = parse_decimal(first);
+  return compute_float<binary>(d);
+}
+
+template <typename binary>
+adjusted_mantissa parse_long_mantissa(const char *first, const char *end) {
+  decimal d = parse_decimal(first, end);
+  return compute_float<binary>(d);
+}
+
+double from_chars(const char *first) noexcept {
+  bool negative = first[0] == '-';
+  if (negative) {
+    first++;
+  }
+  adjusted_mantissa am = parse_long_mantissa<binary_format<double>>(first);
+  uint64_t word = am.mantissa;
+  word |= uint64_t(am.power2)
+          << binary_format<double>::mantissa_explicit_bits();
+  word = negative ? word | (uint64_t(1) << binary_format<double>::sign_index())
+                  : word;
+  double value;
+  std::memcpy(&value, &word, sizeof(double));
+  return value;
+}
+
+
+double from_chars(const char *first, const char *end) noexcept {
+  bool negative = first[0] == '-';
+  if (negative) {
+    first++;
+  }
+  adjusted_mantissa am = parse_long_mantissa<binary_format<double>>(first, end);
+  uint64_t word = am.mantissa;
+  word |= uint64_t(am.power2)
+          << binary_format<double>::mantissa_explicit_bits();
+  word = negative ? word | (uint64_t(1) << binary_format<double>::sign_index())
+                  : word;
+  double value;
+  std::memcpy(&value, &word, sizeof(double));
+  return value;
+}
+
+} // internal
+} // simdjson
+/* end file src/from_chars.cpp */
+/* begin file src/internal/error_tables.cpp */
+
+namespace simdjson {
+namespace internal {
+
+  SIMDJSON_DLLIMPORTEXPORT const error_code_info error_codes[] {
+    { SUCCESS, "No error" },
+    { CAPACITY, "This parser can't support a document that big" },
+    { MEMALLOC, "Error allocating memory, we're most likely out of memory" },
+    { TAPE_ERROR, "The JSON document has an improper structure: missing or superfluous commas, braces, missing keys, etc." },
+    { DEPTH_ERROR, "The JSON document was too deep (too many nested objects and arrays)" },
+    { STRING_ERROR, "Problem while parsing a string" },
+    { T_ATOM_ERROR, "Problem while parsing an atom starting with the letter 't'" },
+    { F_ATOM_ERROR, "Problem while parsing an atom starting with the letter 'f'" },
+    { N_ATOM_ERROR, "Problem while parsing an atom starting with the letter 'n'" },
+    { NUMBER_ERROR, "Problem while parsing a number" },
+    { UTF8_ERROR, "The input is not valid UTF-8" },
+    { UNINITIALIZED, "Uninitialized" },
+    { EMPTY, "Empty: no JSON found" },
+    { UNESCAPED_CHARS, "Within strings, some characters must be escaped, we found unescaped characters" },
+    { UNCLOSED_STRING, "A string is opened, but never closed." },
+    { UNSUPPORTED_ARCHITECTURE, "simdjson does not have an implementation supported by this CPU architecture (perhaps it's a non-SIMD CPU?)." },
+    { INCORRECT_TYPE, "The JSON element does not have the requested type." },
+    { NUMBER_OUT_OF_RANGE, "The JSON number is too large or too small to fit within the requested type." },
+    { INDEX_OUT_OF_BOUNDS, "Attempted to access an element of a JSON array that is beyond its length." },
+    { NO_SUCH_FIELD, "The JSON field referenced does not exist in this object." },
+    { IO_ERROR, "Error reading the file." },
+    { INVALID_JSON_POINTER, "Invalid JSON pointer syntax." },
+    { INVALID_URI_FRAGMENT, "Invalid URI fragment syntax." },
+    { UNEXPECTED_ERROR, "Unexpected error, consider reporting this problem as you may have found a bug in simdjson" },
+    { PARSER_IN_USE, "Cannot parse a new document while a document is still in use." },
+    { OUT_OF_ORDER_ITERATION, "Objects and arrays can only be iterated when they are first encountered." },
+    { INSUFFICIENT_PADDING, "simdjson requires the input JSON string to have at least SIMDJSON_PADDING extra bytes allocated, beyond the string's length. Consider using the simdjson::padded_string class if needed." },
+    { INCOMPLETE_ARRAY_OR_OBJECT, "JSON document ended early in the middle of an object or array." },
+    { SCALAR_DOCUMENT_AS_VALUE, "A JSON document made of a scalar (number, Boolean, null or string) is treated as a value. Use get_bool(), get_double(), etc. on the document instead. "},
+    { OUT_OF_BOUNDS, "Attempted to access location outside of document."},
+    { TRAILING_CONTENT, "Unexpected trailing content in the JSON input."}
+  }; // error_messages[]
+
+} // namespace internal
+} // namespace simdjson
+/* end file src/internal/error_tables.cpp */
+/* begin file src/internal/jsoncharutils_tables.cpp */
+
+namespace simdjson {
+namespace internal {
+
+// structural chars here are
+// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c (and NULL)
+// we are also interested in the four whitespace characters
+// space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d
+
+SIMDJSON_DLLIMPORTEXPORT const bool structural_or_whitespace_negated[256] = {
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
+
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1,
+
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+
+SIMDJSON_DLLIMPORTEXPORT const bool structural_or_whitespace[256] = {
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+SIMDJSON_DLLIMPORTEXPORT const uint32_t digit_to_val32[886] = {
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0x0,        0x1,        0x2,        0x3,        0x4,        0x5,
+    0x6,        0x7,        0x8,        0x9,        0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xa,
+    0xb,        0xc,        0xd,        0xe,        0xf,        0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xa,        0xb,        0xc,        0xd,        0xe,
+    0xf,        0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0x0,        0x10,       0x20,       0x30,       0x40,       0x50,
+    0x60,       0x70,       0x80,       0x90,       0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xa0,
+    0xb0,       0xc0,       0xd0,       0xe0,       0xf0,       0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xa0,       0xb0,       0xc0,       0xd0,       0xe0,
+    0xf0,       0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0x0,        0x100,      0x200,      0x300,      0x400,      0x500,
+    0x600,      0x700,      0x800,      0x900,      0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xa00,
+    0xb00,      0xc00,      0xd00,      0xe00,      0xf00,      0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xa00,      0xb00,      0xc00,      0xd00,      0xe00,
+    0xf00,      0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0x0,        0x1000,     0x2000,     0x3000,     0x4000,     0x5000,
+    0x6000,     0x7000,     0x8000,     0x9000,     0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xa000,
+    0xb000,     0xc000,     0xd000,     0xe000,     0xf000,     0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xa000,     0xb000,     0xc000,     0xd000,     0xe000,
+    0xf000,     0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF};
+
+} // namespace internal
+} // namespace simdjson
+/* end file src/internal/jsoncharutils_tables.cpp */
+/* begin file src/internal/numberparsing_tables.cpp */
+
+namespace simdjson {
+namespace internal {
+
+// Precomputed powers of ten from 10^0 to 10^22. These
+// can be represented exactly using the double type.
+SIMDJSON_DLLIMPORTEXPORT const double power_of_ten[] = {
+    1e0,  1e1,  1e2,  1e3,  1e4,  1e5,  1e6,  1e7,  1e8,  1e9,  1e10, 1e11,
+    1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22};
+
+/**
+ * When mapping numbers from decimal to binary,
+ * we go from w * 10^q to m * 2^p but we have
+ * 10^q = 5^q * 2^q, so effectively
+ * we are trying to match
+ * w * 2^q * 5^q to m * 2^p. Thus the powers of two
+ * are not a concern since they can be represented
+ * exactly using the binary notation, only the powers of five
+ * affect the binary significand.
+ */
+
+
+// The truncated powers of five from 5^-342 all the way to 5^308
+// The mantissa is truncated to 128 bits, and
+// never rounded up. Uses about 10KB.
+SIMDJSON_DLLIMPORTEXPORT const uint64_t power_of_five_128[]= {
+        0xeef453d6923bd65a,0x113faa2906a13b3f,
+        0x9558b4661b6565f8,0x4ac7ca59a424c507,
+        0xbaaee17fa23ebf76,0x5d79bcf00d2df649,
+        0xe95a99df8ace6f53,0xf4d82c2c107973dc,
+        0x91d8a02bb6c10594,0x79071b9b8a4be869,
+        0xb64ec836a47146f9,0x9748e2826cdee284,
+        0xe3e27a444d8d98b7,0xfd1b1b2308169b25,
+        0x8e6d8c6ab0787f72,0xfe30f0f5e50e20f7,
+        0xb208ef855c969f4f,0xbdbd2d335e51a935,
+        0xde8b2b66b3bc4723,0xad2c788035e61382,
+        0x8b16fb203055ac76,0x4c3bcb5021afcc31,
+        0xaddcb9e83c6b1793,0xdf4abe242a1bbf3d,
+        0xd953e8624b85dd78,0xd71d6dad34a2af0d,
+        0x87d4713d6f33aa6b,0x8672648c40e5ad68,
+        0xa9c98d8ccb009506,0x680efdaf511f18c2,
+        0xd43bf0effdc0ba48,0x212bd1b2566def2,
+        0x84a57695fe98746d,0x14bb630f7604b57,
+        0xa5ced43b7e3e9188,0x419ea3bd35385e2d,
+        0xcf42894a5dce35ea,0x52064cac828675b9,
+        0x818995ce7aa0e1b2,0x7343efebd1940993,
+        0xa1ebfb4219491a1f,0x1014ebe6c5f90bf8,
+        0xca66fa129f9b60a6,0xd41a26e077774ef6,
+        0xfd00b897478238d0,0x8920b098955522b4,
+        0x9e20735e8cb16382,0x55b46e5f5d5535b0,
+        0xc5a890362fddbc62,0xeb2189f734aa831d,
+        0xf712b443bbd52b7b,0xa5e9ec7501d523e4,
+        0x9a6bb0aa55653b2d,0x47b233c92125366e,
+        0xc1069cd4eabe89f8,0x999ec0bb696e840a,
+        0xf148440a256e2c76,0xc00670ea43ca250d,
+        0x96cd2a865764dbca,0x380406926a5e5728,
+        0xbc807527ed3e12bc,0xc605083704f5ecf2,
+        0xeba09271e88d976b,0xf7864a44c633682e,
+        0x93445b8731587ea3,0x7ab3ee6afbe0211d,
+        0xb8157268fdae9e4c,0x5960ea05bad82964,
+        0xe61acf033d1a45df,0x6fb92487298e33bd,
+        0x8fd0c16206306bab,0xa5d3b6d479f8e056,
+        0xb3c4f1ba87bc8696,0x8f48a4899877186c,
+        0xe0b62e2929aba83c,0x331acdabfe94de87,
+        0x8c71dcd9ba0b4925,0x9ff0c08b7f1d0b14,
+        0xaf8e5410288e1b6f,0x7ecf0ae5ee44dd9,
+        0xdb71e91432b1a24a,0xc9e82cd9f69d6150,
+        0x892731ac9faf056e,0xbe311c083a225cd2,
+        0xab70fe17c79ac6ca,0x6dbd630a48aaf406,
+        0xd64d3d9db981787d,0x92cbbccdad5b108,
+        0x85f0468293f0eb4e,0x25bbf56008c58ea5,
+        0xa76c582338ed2621,0xaf2af2b80af6f24e,
+        0xd1476e2c07286faa,0x1af5af660db4aee1,
+        0x82cca4db847945ca,0x50d98d9fc890ed4d,
+        0xa37fce126597973c,0xe50ff107bab528a0,
+        0xcc5fc196fefd7d0c,0x1e53ed49a96272c8,
+        0xff77b1fcbebcdc4f,0x25e8e89c13bb0f7a,
+        0x9faacf3df73609b1,0x77b191618c54e9ac,
+        0xc795830d75038c1d,0xd59df5b9ef6a2417,
+        0xf97ae3d0d2446f25,0x4b0573286b44ad1d,
+        0x9becce62836ac577,0x4ee367f9430aec32,
+        0xc2e801fb244576d5,0x229c41f793cda73f,
+        0xf3a20279ed56d48a,0x6b43527578c1110f,
+        0x9845418c345644d6,0x830a13896b78aaa9,
+        0xbe5691ef416bd60c,0x23cc986bc656d553,
+        0xedec366b11c6cb8f,0x2cbfbe86b7ec8aa8,
+        0x94b3a202eb1c3f39,0x7bf7d71432f3d6a9,
+        0xb9e08a83a5e34f07,0xdaf5ccd93fb0cc53,
+        0xe858ad248f5c22c9,0xd1b3400f8f9cff68,
+        0x91376c36d99995be,0x23100809b9c21fa1,
+        0xb58547448ffffb2d,0xabd40a0c2832a78a,
+        0xe2e69915b3fff9f9,0x16c90c8f323f516c,
+        0x8dd01fad907ffc3b,0xae3da7d97f6792e3,
+        0xb1442798f49ffb4a,0x99cd11cfdf41779c,
+        0xdd95317f31c7fa1d,0x40405643d711d583,
+        0x8a7d3eef7f1cfc52,0x482835ea666b2572,
+        0xad1c8eab5ee43b66,0xda3243650005eecf,
+        0xd863b256369d4a40,0x90bed43e40076a82,
+        0x873e4f75e2224e68,0x5a7744a6e804a291,
+        0xa90de3535aaae202,0x711515d0a205cb36,
+        0xd3515c2831559a83,0xd5a5b44ca873e03,
+        0x8412d9991ed58091,0xe858790afe9486c2,
+        0xa5178fff668ae0b6,0x626e974dbe39a872,
+        0xce5d73ff402d98e3,0xfb0a3d212dc8128f,
+        0x80fa687f881c7f8e,0x7ce66634bc9d0b99,
+        0xa139029f6a239f72,0x1c1fffc1ebc44e80,
+        0xc987434744ac874e,0xa327ffb266b56220,
+        0xfbe9141915d7a922,0x4bf1ff9f0062baa8,
+        0x9d71ac8fada6c9b5,0x6f773fc3603db4a9,
+        0xc4ce17b399107c22,0xcb550fb4384d21d3,
+        0xf6019da07f549b2b,0x7e2a53a146606a48,
+        0x99c102844f94e0fb,0x2eda7444cbfc426d,
+        0xc0314325637a1939,0xfa911155fefb5308,
+        0xf03d93eebc589f88,0x793555ab7eba27ca,
+        0x96267c7535b763b5,0x4bc1558b2f3458de,
+        0xbbb01b9283253ca2,0x9eb1aaedfb016f16,
+        0xea9c227723ee8bcb,0x465e15a979c1cadc,
+        0x92a1958a7675175f,0xbfacd89ec191ec9,
+        0xb749faed14125d36,0xcef980ec671f667b,
+        0xe51c79a85916f484,0x82b7e12780e7401a,
+        0x8f31cc0937ae58d2,0xd1b2ecb8b0908810,
+        0xb2fe3f0b8599ef07,0x861fa7e6dcb4aa15,
+        0xdfbdcece67006ac9,0x67a791e093e1d49a,
+        0x8bd6a141006042bd,0xe0c8bb2c5c6d24e0,
+        0xaecc49914078536d,0x58fae9f773886e18,
+        0xda7f5bf590966848,0xaf39a475506a899e,
+        0x888f99797a5e012d,0x6d8406c952429603,
+        0xaab37fd7d8f58178,0xc8e5087ba6d33b83,
+        0xd5605fcdcf32e1d6,0xfb1e4a9a90880a64,
+        0x855c3be0a17fcd26,0x5cf2eea09a55067f,
+        0xa6b34ad8c9dfc06f,0xf42faa48c0ea481e,
+        0xd0601d8efc57b08b,0xf13b94daf124da26,
+        0x823c12795db6ce57,0x76c53d08d6b70858,
+        0xa2cb1717b52481ed,0x54768c4b0c64ca6e,
+        0xcb7ddcdda26da268,0xa9942f5dcf7dfd09,
+        0xfe5d54150b090b02,0xd3f93b35435d7c4c,
+        0x9efa548d26e5a6e1,0xc47bc5014a1a6daf,
+        0xc6b8e9b0709f109a,0x359ab6419ca1091b,
+        0xf867241c8cc6d4c0,0xc30163d203c94b62,
+        0x9b407691d7fc44f8,0x79e0de63425dcf1d,
+        0xc21094364dfb5636,0x985915fc12f542e4,
+        0xf294b943e17a2bc4,0x3e6f5b7b17b2939d,
+        0x979cf3ca6cec5b5a,0xa705992ceecf9c42,
+        0xbd8430bd08277231,0x50c6ff782a838353,
+        0xece53cec4a314ebd,0xa4f8bf5635246428,
+        0x940f4613ae5ed136,0x871b7795e136be99,
+        0xb913179899f68584,0x28e2557b59846e3f,
+        0xe757dd7ec07426e5,0x331aeada2fe589cf,
+        0x9096ea6f3848984f,0x3ff0d2c85def7621,
+        0xb4bca50b065abe63,0xfed077a756b53a9,
+        0xe1ebce4dc7f16dfb,0xd3e8495912c62894,
+        0x8d3360f09cf6e4bd,0x64712dd7abbbd95c,
+        0xb080392cc4349dec,0xbd8d794d96aacfb3,
+        0xdca04777f541c567,0xecf0d7a0fc5583a0,
+        0x89e42caaf9491b60,0xf41686c49db57244,
+        0xac5d37d5b79b6239,0x311c2875c522ced5,
+        0xd77485cb25823ac7,0x7d633293366b828b,
+        0x86a8d39ef77164bc,0xae5dff9c02033197,
+        0xa8530886b54dbdeb,0xd9f57f830283fdfc,
+        0xd267caa862a12d66,0xd072df63c324fd7b,
+        0x8380dea93da4bc60,0x4247cb9e59f71e6d,
+        0xa46116538d0deb78,0x52d9be85f074e608,
+        0xcd795be870516656,0x67902e276c921f8b,
+        0x806bd9714632dff6,0xba1cd8a3db53b6,
+        0xa086cfcd97bf97f3,0x80e8a40eccd228a4,
+        0xc8a883c0fdaf7df0,0x6122cd128006b2cd,
+        0xfad2a4b13d1b5d6c,0x796b805720085f81,
+        0x9cc3a6eec6311a63,0xcbe3303674053bb0,
+        0xc3f490aa77bd60fc,0xbedbfc4411068a9c,
+        0xf4f1b4d515acb93b,0xee92fb5515482d44,
+        0x991711052d8bf3c5,0x751bdd152d4d1c4a,
+        0xbf5cd54678eef0b6,0xd262d45a78a0635d,
+        0xef340a98172aace4,0x86fb897116c87c34,
+        0x9580869f0e7aac0e,0xd45d35e6ae3d4da0,
+        0xbae0a846d2195712,0x8974836059cca109,
+        0xe998d258869facd7,0x2bd1a438703fc94b,
+        0x91ff83775423cc06,0x7b6306a34627ddcf,
+        0xb67f6455292cbf08,0x1a3bc84c17b1d542,
+        0xe41f3d6a7377eeca,0x20caba5f1d9e4a93,
+        0x8e938662882af53e,0x547eb47b7282ee9c,
+        0xb23867fb2a35b28d,0xe99e619a4f23aa43,
+        0xdec681f9f4c31f31,0x6405fa00e2ec94d4,
+        0x8b3c113c38f9f37e,0xde83bc408dd3dd04,
+        0xae0b158b4738705e,0x9624ab50b148d445,
+        0xd98ddaee19068c76,0x3badd624dd9b0957,
+        0x87f8a8d4cfa417c9,0xe54ca5d70a80e5d6,
+        0xa9f6d30a038d1dbc,0x5e9fcf4ccd211f4c,
+        0xd47487cc8470652b,0x7647c3200069671f,
+        0x84c8d4dfd2c63f3b,0x29ecd9f40041e073,
+        0xa5fb0a17c777cf09,0xf468107100525890,
+        0xcf79cc9db955c2cc,0x7182148d4066eeb4,
+        0x81ac1fe293d599bf,0xc6f14cd848405530,
+        0xa21727db38cb002f,0xb8ada00e5a506a7c,
+        0xca9cf1d206fdc03b,0xa6d90811f0e4851c,
+        0xfd442e4688bd304a,0x908f4a166d1da663,
+        0x9e4a9cec15763e2e,0x9a598e4e043287fe,
+        0xc5dd44271ad3cdba,0x40eff1e1853f29fd,
+        0xf7549530e188c128,0xd12bee59e68ef47c,
+        0x9a94dd3e8cf578b9,0x82bb74f8301958ce,
+        0xc13a148e3032d6e7,0xe36a52363c1faf01,
+        0xf18899b1bc3f8ca1,0xdc44e6c3cb279ac1,
+        0x96f5600f15a7b7e5,0x29ab103a5ef8c0b9,
+        0xbcb2b812db11a5de,0x7415d448f6b6f0e7,
+        0xebdf661791d60f56,0x111b495b3464ad21,
+        0x936b9fcebb25c995,0xcab10dd900beec34,
+        0xb84687c269ef3bfb,0x3d5d514f40eea742,
+        0xe65829b3046b0afa,0xcb4a5a3112a5112,
+        0x8ff71a0fe2c2e6dc,0x47f0e785eaba72ab,
+        0xb3f4e093db73a093,0x59ed216765690f56,
+        0xe0f218b8d25088b8,0x306869c13ec3532c,
+        0x8c974f7383725573,0x1e414218c73a13fb,
+        0xafbd2350644eeacf,0xe5d1929ef90898fa,
+        0xdbac6c247d62a583,0xdf45f746b74abf39,
+        0x894bc396ce5da772,0x6b8bba8c328eb783,
+        0xab9eb47c81f5114f,0x66ea92f3f326564,
+        0xd686619ba27255a2,0xc80a537b0efefebd,
+        0x8613fd0145877585,0xbd06742ce95f5f36,
+        0xa798fc4196e952e7,0x2c48113823b73704,
+        0xd17f3b51fca3a7a0,0xf75a15862ca504c5,
+        0x82ef85133de648c4,0x9a984d73dbe722fb,
+        0xa3ab66580d5fdaf5,0xc13e60d0d2e0ebba,
+        0xcc963fee10b7d1b3,0x318df905079926a8,
+        0xffbbcfe994e5c61f,0xfdf17746497f7052,
+        0x9fd561f1fd0f9bd3,0xfeb6ea8bedefa633,
+        0xc7caba6e7c5382c8,0xfe64a52ee96b8fc0,
+        0xf9bd690a1b68637b,0x3dfdce7aa3c673b0,
+        0x9c1661a651213e2d,0x6bea10ca65c084e,
+        0xc31bfa0fe5698db8,0x486e494fcff30a62,
+        0xf3e2f893dec3f126,0x5a89dba3c3efccfa,
+        0x986ddb5c6b3a76b7,0xf89629465a75e01c,
+        0xbe89523386091465,0xf6bbb397f1135823,
+        0xee2ba6c0678b597f,0x746aa07ded582e2c,
+        0x94db483840b717ef,0xa8c2a44eb4571cdc,
+        0xba121a4650e4ddeb,0x92f34d62616ce413,
+        0xe896a0d7e51e1566,0x77b020baf9c81d17,
+        0x915e2486ef32cd60,0xace1474dc1d122e,
+        0xb5b5ada8aaff80b8,0xd819992132456ba,
+        0xe3231912d5bf60e6,0x10e1fff697ed6c69,
+        0x8df5efabc5979c8f,0xca8d3ffa1ef463c1,
+        0xb1736b96b6fd83b3,0xbd308ff8a6b17cb2,
+        0xddd0467c64bce4a0,0xac7cb3f6d05ddbde,
+        0x8aa22c0dbef60ee4,0x6bcdf07a423aa96b,
+        0xad4ab7112eb3929d,0x86c16c98d2c953c6,
+        0xd89d64d57a607744,0xe871c7bf077ba8b7,
+        0x87625f056c7c4a8b,0x11471cd764ad4972,
+        0xa93af6c6c79b5d2d,0xd598e40d3dd89bcf,
+        0xd389b47879823479,0x4aff1d108d4ec2c3,
+        0x843610cb4bf160cb,0xcedf722a585139ba,
+        0xa54394fe1eedb8fe,0xc2974eb4ee658828,
+        0xce947a3da6a9273e,0x733d226229feea32,
+        0x811ccc668829b887,0x806357d5a3f525f,
+        0xa163ff802a3426a8,0xca07c2dcb0cf26f7,
+        0xc9bcff6034c13052,0xfc89b393dd02f0b5,
+        0xfc2c3f3841f17c67,0xbbac2078d443ace2,
+        0x9d9ba7832936edc0,0xd54b944b84aa4c0d,
+        0xc5029163f384a931,0xa9e795e65d4df11,
+        0xf64335bcf065d37d,0x4d4617b5ff4a16d5,
+        0x99ea0196163fa42e,0x504bced1bf8e4e45,
+        0xc06481fb9bcf8d39,0xe45ec2862f71e1d6,
+        0xf07da27a82c37088,0x5d767327bb4e5a4c,
+        0x964e858c91ba2655,0x3a6a07f8d510f86f,
+        0xbbe226efb628afea,0x890489f70a55368b,
+        0xeadab0aba3b2dbe5,0x2b45ac74ccea842e,
+        0x92c8ae6b464fc96f,0x3b0b8bc90012929d,
+        0xb77ada0617e3bbcb,0x9ce6ebb40173744,
+        0xe55990879ddcaabd,0xcc420a6a101d0515,
+        0x8f57fa54c2a9eab6,0x9fa946824a12232d,
+        0xb32df8e9f3546564,0x47939822dc96abf9,
+        0xdff9772470297ebd,0x59787e2b93bc56f7,
+        0x8bfbea76c619ef36,0x57eb4edb3c55b65a,
+        0xaefae51477a06b03,0xede622920b6b23f1,
+        0xdab99e59958885c4,0xe95fab368e45eced,
+        0x88b402f7fd75539b,0x11dbcb0218ebb414,
+        0xaae103b5fcd2a881,0xd652bdc29f26a119,
+        0xd59944a37c0752a2,0x4be76d3346f0495f,
+        0x857fcae62d8493a5,0x6f70a4400c562ddb,
+        0xa6dfbd9fb8e5b88e,0xcb4ccd500f6bb952,
+        0xd097ad07a71f26b2,0x7e2000a41346a7a7,
+        0x825ecc24c873782f,0x8ed400668c0c28c8,
+        0xa2f67f2dfa90563b,0x728900802f0f32fa,
+        0xcbb41ef979346bca,0x4f2b40a03ad2ffb9,
+        0xfea126b7d78186bc,0xe2f610c84987bfa8,
+        0x9f24b832e6b0f436,0xdd9ca7d2df4d7c9,
+        0xc6ede63fa05d3143,0x91503d1c79720dbb,
+        0xf8a95fcf88747d94,0x75a44c6397ce912a,
+        0x9b69dbe1b548ce7c,0xc986afbe3ee11aba,
+        0xc24452da229b021b,0xfbe85badce996168,
+        0xf2d56790ab41c2a2,0xfae27299423fb9c3,
+        0x97c560ba6b0919a5,0xdccd879fc967d41a,
+        0xbdb6b8e905cb600f,0x5400e987bbc1c920,
+        0xed246723473e3813,0x290123e9aab23b68,
+        0x9436c0760c86e30b,0xf9a0b6720aaf6521,
+        0xb94470938fa89bce,0xf808e40e8d5b3e69,
+        0xe7958cb87392c2c2,0xb60b1d1230b20e04,
+        0x90bd77f3483bb9b9,0xb1c6f22b5e6f48c2,
+        0xb4ecd5f01a4aa828,0x1e38aeb6360b1af3,
+        0xe2280b6c20dd5232,0x25c6da63c38de1b0,
+        0x8d590723948a535f,0x579c487e5a38ad0e,
+        0xb0af48ec79ace837,0x2d835a9df0c6d851,
+        0xdcdb1b2798182244,0xf8e431456cf88e65,
+        0x8a08f0f8bf0f156b,0x1b8e9ecb641b58ff,
+        0xac8b2d36eed2dac5,0xe272467e3d222f3f,
+        0xd7adf884aa879177,0x5b0ed81dcc6abb0f,
+        0x86ccbb52ea94baea,0x98e947129fc2b4e9,
+        0xa87fea27a539e9a5,0x3f2398d747b36224,
+        0xd29fe4b18e88640e,0x8eec7f0d19a03aad,
+        0x83a3eeeef9153e89,0x1953cf68300424ac,
+        0xa48ceaaab75a8e2b,0x5fa8c3423c052dd7,
+        0xcdb02555653131b6,0x3792f412cb06794d,
+        0x808e17555f3ebf11,0xe2bbd88bbee40bd0,
+        0xa0b19d2ab70e6ed6,0x5b6aceaeae9d0ec4,
+        0xc8de047564d20a8b,0xf245825a5a445275,
+        0xfb158592be068d2e,0xeed6e2f0f0d56712,
+        0x9ced737bb6c4183d,0x55464dd69685606b,
+        0xc428d05aa4751e4c,0xaa97e14c3c26b886,
+        0xf53304714d9265df,0xd53dd99f4b3066a8,
+        0x993fe2c6d07b7fab,0xe546a8038efe4029,
+        0xbf8fdb78849a5f96,0xde98520472bdd033,
+        0xef73d256a5c0f77c,0x963e66858f6d4440,
+        0x95a8637627989aad,0xdde7001379a44aa8,
+        0xbb127c53b17ec159,0x5560c018580d5d52,
+        0xe9d71b689dde71af,0xaab8f01e6e10b4a6,
+        0x9226712162ab070d,0xcab3961304ca70e8,
+        0xb6b00d69bb55c8d1,0x3d607b97c5fd0d22,
+        0xe45c10c42a2b3b05,0x8cb89a7db77c506a,
+        0x8eb98a7a9a5b04e3,0x77f3608e92adb242,
+        0xb267ed1940f1c61c,0x55f038b237591ed3,
+        0xdf01e85f912e37a3,0x6b6c46dec52f6688,
+        0x8b61313bbabce2c6,0x2323ac4b3b3da015,
+        0xae397d8aa96c1b77,0xabec975e0a0d081a,
+        0xd9c7dced53c72255,0x96e7bd358c904a21,
+        0x881cea14545c7575,0x7e50d64177da2e54,
+        0xaa242499697392d2,0xdde50bd1d5d0b9e9,
+        0xd4ad2dbfc3d07787,0x955e4ec64b44e864,
+        0x84ec3c97da624ab4,0xbd5af13bef0b113e,
+        0xa6274bbdd0fadd61,0xecb1ad8aeacdd58e,
+        0xcfb11ead453994ba,0x67de18eda5814af2,
+        0x81ceb32c4b43fcf4,0x80eacf948770ced7,
+        0xa2425ff75e14fc31,0xa1258379a94d028d,
+        0xcad2f7f5359a3b3e,0x96ee45813a04330,
+        0xfd87b5f28300ca0d,0x8bca9d6e188853fc,
+        0x9e74d1b791e07e48,0x775ea264cf55347e,
+        0xc612062576589dda,0x95364afe032a81a0,
+        0xf79687aed3eec551,0x3a83ddbd83f52210,
+        0x9abe14cd44753b52,0xc4926a9672793580,
+        0xc16d9a0095928a27,0x75b7053c0f178400,
+        0xf1c90080baf72cb1,0x5324c68b12dd6800,
+        0x971da05074da7bee,0xd3f6fc16ebca8000,
+        0xbce5086492111aea,0x88f4bb1ca6bd0000,
+        0xec1e4a7db69561a5,0x2b31e9e3d0700000,
+        0x9392ee8e921d5d07,0x3aff322e62600000,
+        0xb877aa3236a4b449,0x9befeb9fad487c3,
+        0xe69594bec44de15b,0x4c2ebe687989a9b4,
+        0x901d7cf73ab0acd9,0xf9d37014bf60a11,
+        0xb424dc35095cd80f,0x538484c19ef38c95,
+        0xe12e13424bb40e13,0x2865a5f206b06fba,
+        0x8cbccc096f5088cb,0xf93f87b7442e45d4,
+        0xafebff0bcb24aafe,0xf78f69a51539d749,
+        0xdbe6fecebdedd5be,0xb573440e5a884d1c,
+        0x89705f4136b4a597,0x31680a88f8953031,
+        0xabcc77118461cefc,0xfdc20d2b36ba7c3e,
+        0xd6bf94d5e57a42bc,0x3d32907604691b4d,
+        0x8637bd05af6c69b5,0xa63f9a49c2c1b110,
+        0xa7c5ac471b478423,0xfcf80dc33721d54,
+        0xd1b71758e219652b,0xd3c36113404ea4a9,
+        0x83126e978d4fdf3b,0x645a1cac083126ea,
+        0xa3d70a3d70a3d70a,0x3d70a3d70a3d70a4,
+        0xcccccccccccccccc,0xcccccccccccccccd,
+        0x8000000000000000,0x0,
+        0xa000000000000000,0x0,
+        0xc800000000000000,0x0,
+        0xfa00000000000000,0x0,
+        0x9c40000000000000,0x0,
+        0xc350000000000000,0x0,
+        0xf424000000000000,0x0,
+        0x9896800000000000,0x0,
+        0xbebc200000000000,0x0,
+        0xee6b280000000000,0x0,
+        0x9502f90000000000,0x0,
+        0xba43b74000000000,0x0,
+        0xe8d4a51000000000,0x0,
+        0x9184e72a00000000,0x0,
+        0xb5e620f480000000,0x0,
+        0xe35fa931a0000000,0x0,
+        0x8e1bc9bf04000000,0x0,
+        0xb1a2bc2ec5000000,0x0,
+        0xde0b6b3a76400000,0x0,
+        0x8ac7230489e80000,0x0,
+        0xad78ebc5ac620000,0x0,
+        0xd8d726b7177a8000,0x0,
+        0x878678326eac9000,0x0,
+        0xa968163f0a57b400,0x0,
+        0xd3c21bcecceda100,0x0,
+        0x84595161401484a0,0x0,
+        0xa56fa5b99019a5c8,0x0,
+        0xcecb8f27f4200f3a,0x0,
+        0x813f3978f8940984,0x4000000000000000,
+        0xa18f07d736b90be5,0x5000000000000000,
+        0xc9f2c9cd04674ede,0xa400000000000000,
+        0xfc6f7c4045812296,0x4d00000000000000,
+        0x9dc5ada82b70b59d,0xf020000000000000,
+        0xc5371912364ce305,0x6c28000000000000,
+        0xf684df56c3e01bc6,0xc732000000000000,
+        0x9a130b963a6c115c,0x3c7f400000000000,
+        0xc097ce7bc90715b3,0x4b9f100000000000,
+        0xf0bdc21abb48db20,0x1e86d40000000000,
+        0x96769950b50d88f4,0x1314448000000000,
+        0xbc143fa4e250eb31,0x17d955a000000000,
+        0xeb194f8e1ae525fd,0x5dcfab0800000000,
+        0x92efd1b8d0cf37be,0x5aa1cae500000000,
+        0xb7abc627050305ad,0xf14a3d9e40000000,
+        0xe596b7b0c643c719,0x6d9ccd05d0000000,
+        0x8f7e32ce7bea5c6f,0xe4820023a2000000,
+        0xb35dbf821ae4f38b,0xdda2802c8a800000,
+        0xe0352f62a19e306e,0xd50b2037ad200000,
+        0x8c213d9da502de45,0x4526f422cc340000,
+        0xaf298d050e4395d6,0x9670b12b7f410000,
+        0xdaf3f04651d47b4c,0x3c0cdd765f114000,
+        0x88d8762bf324cd0f,0xa5880a69fb6ac800,
+        0xab0e93b6efee0053,0x8eea0d047a457a00,
+        0xd5d238a4abe98068,0x72a4904598d6d880,
+        0x85a36366eb71f041,0x47a6da2b7f864750,
+        0xa70c3c40a64e6c51,0x999090b65f67d924,
+        0xd0cf4b50cfe20765,0xfff4b4e3f741cf6d,
+        0x82818f1281ed449f,0xbff8f10e7a8921a4,
+        0xa321f2d7226895c7,0xaff72d52192b6a0d,
+        0xcbea6f8ceb02bb39,0x9bf4f8a69f764490,
+        0xfee50b7025c36a08,0x2f236d04753d5b4,
+        0x9f4f2726179a2245,0x1d762422c946590,
+        0xc722f0ef9d80aad6,0x424d3ad2b7b97ef5,
+        0xf8ebad2b84e0d58b,0xd2e0898765a7deb2,
+        0x9b934c3b330c8577,0x63cc55f49f88eb2f,
+        0xc2781f49ffcfa6d5,0x3cbf6b71c76b25fb,
+        0xf316271c7fc3908a,0x8bef464e3945ef7a,
+        0x97edd871cfda3a56,0x97758bf0e3cbb5ac,
+        0xbde94e8e43d0c8ec,0x3d52eeed1cbea317,
+        0xed63a231d4c4fb27,0x4ca7aaa863ee4bdd,
+        0x945e455f24fb1cf8,0x8fe8caa93e74ef6a,
+        0xb975d6b6ee39e436,0xb3e2fd538e122b44,
+        0xe7d34c64a9c85d44,0x60dbbca87196b616,
+        0x90e40fbeea1d3a4a,0xbc8955e946fe31cd,
+        0xb51d13aea4a488dd,0x6babab6398bdbe41,
+        0xe264589a4dcdab14,0xc696963c7eed2dd1,
+        0x8d7eb76070a08aec,0xfc1e1de5cf543ca2,
+        0xb0de65388cc8ada8,0x3b25a55f43294bcb,
+        0xdd15fe86affad912,0x49ef0eb713f39ebe,
+        0x8a2dbf142dfcc7ab,0x6e3569326c784337,
+        0xacb92ed9397bf996,0x49c2c37f07965404,
+        0xd7e77a8f87daf7fb,0xdc33745ec97be906,
+        0x86f0ac99b4e8dafd,0x69a028bb3ded71a3,
+        0xa8acd7c0222311bc,0xc40832ea0d68ce0c,
+        0xd2d80db02aabd62b,0xf50a3fa490c30190,
+        0x83c7088e1aab65db,0x792667c6da79e0fa,
+        0xa4b8cab1a1563f52,0x577001b891185938,
+        0xcde6fd5e09abcf26,0xed4c0226b55e6f86,
+        0x80b05e5ac60b6178,0x544f8158315b05b4,
+        0xa0dc75f1778e39d6,0x696361ae3db1c721,
+        0xc913936dd571c84c,0x3bc3a19cd1e38e9,
+        0xfb5878494ace3a5f,0x4ab48a04065c723,
+        0x9d174b2dcec0e47b,0x62eb0d64283f9c76,
+        0xc45d1df942711d9a,0x3ba5d0bd324f8394,
+        0xf5746577930d6500,0xca8f44ec7ee36479,
+        0x9968bf6abbe85f20,0x7e998b13cf4e1ecb,
+        0xbfc2ef456ae276e8,0x9e3fedd8c321a67e,
+        0xefb3ab16c59b14a2,0xc5cfe94ef3ea101e,
+        0x95d04aee3b80ece5,0xbba1f1d158724a12,
+        0xbb445da9ca61281f,0x2a8a6e45ae8edc97,
+        0xea1575143cf97226,0xf52d09d71a3293bd,
+        0x924d692ca61be758,0x593c2626705f9c56,
+        0xb6e0c377cfa2e12e,0x6f8b2fb00c77836c,
+        0xe498f455c38b997a,0xb6dfb9c0f956447,
+        0x8edf98b59a373fec,0x4724bd4189bd5eac,
+        0xb2977ee300c50fe7,0x58edec91ec2cb657,
+        0xdf3d5e9bc0f653e1,0x2f2967b66737e3ed,
+        0x8b865b215899f46c,0xbd79e0d20082ee74,
+        0xae67f1e9aec07187,0xecd8590680a3aa11,
+        0xda01ee641a708de9,0xe80e6f4820cc9495,
+        0x884134fe908658b2,0x3109058d147fdcdd,
+        0xaa51823e34a7eede,0xbd4b46f0599fd415,
+        0xd4e5e2cdc1d1ea96,0x6c9e18ac7007c91a,
+        0x850fadc09923329e,0x3e2cf6bc604ddb0,
+        0xa6539930bf6bff45,0x84db8346b786151c,
+        0xcfe87f7cef46ff16,0xe612641865679a63,
+        0x81f14fae158c5f6e,0x4fcb7e8f3f60c07e,
+        0xa26da3999aef7749,0xe3be5e330f38f09d,
+        0xcb090c8001ab551c,0x5cadf5bfd3072cc5,
+        0xfdcb4fa002162a63,0x73d9732fc7c8f7f6,
+        0x9e9f11c4014dda7e,0x2867e7fddcdd9afa,
+        0xc646d63501a1511d,0xb281e1fd541501b8,
+        0xf7d88bc24209a565,0x1f225a7ca91a4226,
+        0x9ae757596946075f,0x3375788de9b06958,
+        0xc1a12d2fc3978937,0x52d6b1641c83ae,
+        0xf209787bb47d6b84,0xc0678c5dbd23a49a,
+        0x9745eb4d50ce6332,0xf840b7ba963646e0,
+        0xbd176620a501fbff,0xb650e5a93bc3d898,
+        0xec5d3fa8ce427aff,0xa3e51f138ab4cebe,
+        0x93ba47c980e98cdf,0xc66f336c36b10137,
+        0xb8a8d9bbe123f017,0xb80b0047445d4184,
+        0xe6d3102ad96cec1d,0xa60dc059157491e5,
+        0x9043ea1ac7e41392,0x87c89837ad68db2f,
+        0xb454e4a179dd1877,0x29babe4598c311fb,
+        0xe16a1dc9d8545e94,0xf4296dd6fef3d67a,
+        0x8ce2529e2734bb1d,0x1899e4a65f58660c,
+        0xb01ae745b101e9e4,0x5ec05dcff72e7f8f,
+        0xdc21a1171d42645d,0x76707543f4fa1f73,
+        0x899504ae72497eba,0x6a06494a791c53a8,
+        0xabfa45da0edbde69,0x487db9d17636892,
+        0xd6f8d7509292d603,0x45a9d2845d3c42b6,
+        0x865b86925b9bc5c2,0xb8a2392ba45a9b2,
+        0xa7f26836f282b732,0x8e6cac7768d7141e,
+        0xd1ef0244af2364ff,0x3207d795430cd926,
+        0x8335616aed761f1f,0x7f44e6bd49e807b8,
+        0xa402b9c5a8d3a6e7,0x5f16206c9c6209a6,
+        0xcd036837130890a1,0x36dba887c37a8c0f,
+        0x802221226be55a64,0xc2494954da2c9789,
+        0xa02aa96b06deb0fd,0xf2db9baa10b7bd6c,
+        0xc83553c5c8965d3d,0x6f92829494e5acc7,
+        0xfa42a8b73abbf48c,0xcb772339ba1f17f9,
+        0x9c69a97284b578d7,0xff2a760414536efb,
+        0xc38413cf25e2d70d,0xfef5138519684aba,
+        0xf46518c2ef5b8cd1,0x7eb258665fc25d69,
+        0x98bf2f79d5993802,0xef2f773ffbd97a61,
+        0xbeeefb584aff8603,0xaafb550ffacfd8fa,
+        0xeeaaba2e5dbf6784,0x95ba2a53f983cf38,
+        0x952ab45cfa97a0b2,0xdd945a747bf26183,
+        0xba756174393d88df,0x94f971119aeef9e4,
+        0xe912b9d1478ceb17,0x7a37cd5601aab85d,
+        0x91abb422ccb812ee,0xac62e055c10ab33a,
+        0xb616a12b7fe617aa,0x577b986b314d6009,
+        0xe39c49765fdf9d94,0xed5a7e85fda0b80b,
+        0x8e41ade9fbebc27d,0x14588f13be847307,
+        0xb1d219647ae6b31c,0x596eb2d8ae258fc8,
+        0xde469fbd99a05fe3,0x6fca5f8ed9aef3bb,
+        0x8aec23d680043bee,0x25de7bb9480d5854,
+        0xada72ccc20054ae9,0xaf561aa79a10ae6a,
+        0xd910f7ff28069da4,0x1b2ba1518094da04,
+        0x87aa9aff79042286,0x90fb44d2f05d0842,
+        0xa99541bf57452b28,0x353a1607ac744a53,
+        0xd3fa922f2d1675f2,0x42889b8997915ce8,
+        0x847c9b5d7c2e09b7,0x69956135febada11,
+        0xa59bc234db398c25,0x43fab9837e699095,
+        0xcf02b2c21207ef2e,0x94f967e45e03f4bb,
+        0x8161afb94b44f57d,0x1d1be0eebac278f5,
+        0xa1ba1ba79e1632dc,0x6462d92a69731732,
+        0xca28a291859bbf93,0x7d7b8f7503cfdcfe,
+        0xfcb2cb35e702af78,0x5cda735244c3d43e,
+        0x9defbf01b061adab,0x3a0888136afa64a7,
+        0xc56baec21c7a1916,0x88aaa1845b8fdd0,
+        0xf6c69a72a3989f5b,0x8aad549e57273d45,
+        0x9a3c2087a63f6399,0x36ac54e2f678864b,
+        0xc0cb28a98fcf3c7f,0x84576a1bb416a7dd,
+        0xf0fdf2d3f3c30b9f,0x656d44a2a11c51d5,
+        0x969eb7c47859e743,0x9f644ae5a4b1b325,
+        0xbc4665b596706114,0x873d5d9f0dde1fee,
+        0xeb57ff22fc0c7959,0xa90cb506d155a7ea,
+        0x9316ff75dd87cbd8,0x9a7f12442d588f2,
+        0xb7dcbf5354e9bece,0xc11ed6d538aeb2f,
+        0xe5d3ef282a242e81,0x8f1668c8a86da5fa,
+        0x8fa475791a569d10,0xf96e017d694487bc,
+        0xb38d92d760ec4455,0x37c981dcc395a9ac,
+        0xe070f78d3927556a,0x85bbe253f47b1417,
+        0x8c469ab843b89562,0x93956d7478ccec8e,
+        0xaf58416654a6babb,0x387ac8d1970027b2,
+        0xdb2e51bfe9d0696a,0x6997b05fcc0319e,
+        0x88fcf317f22241e2,0x441fece3bdf81f03,
+        0xab3c2fddeeaad25a,0xd527e81cad7626c3,
+        0xd60b3bd56a5586f1,0x8a71e223d8d3b074,
+        0x85c7056562757456,0xf6872d5667844e49,
+        0xa738c6bebb12d16c,0xb428f8ac016561db,
+        0xd106f86e69d785c7,0xe13336d701beba52,
+        0x82a45b450226b39c,0xecc0024661173473,
+        0xa34d721642b06084,0x27f002d7f95d0190,
+        0xcc20ce9bd35c78a5,0x31ec038df7b441f4,
+        0xff290242c83396ce,0x7e67047175a15271,
+        0x9f79a169bd203e41,0xf0062c6e984d386,
+        0xc75809c42c684dd1,0x52c07b78a3e60868,
+        0xf92e0c3537826145,0xa7709a56ccdf8a82,
+        0x9bbcc7a142b17ccb,0x88a66076400bb691,
+        0xc2abf989935ddbfe,0x6acff893d00ea435,
+        0xf356f7ebf83552fe,0x583f6b8c4124d43,
+        0x98165af37b2153de,0xc3727a337a8b704a,
+        0xbe1bf1b059e9a8d6,0x744f18c0592e4c5c,
+        0xeda2ee1c7064130c,0x1162def06f79df73,
+        0x9485d4d1c63e8be7,0x8addcb5645ac2ba8,
+        0xb9a74a0637ce2ee1,0x6d953e2bd7173692,
+        0xe8111c87c5c1ba99,0xc8fa8db6ccdd0437,
+        0x910ab1d4db9914a0,0x1d9c9892400a22a2,
+        0xb54d5e4a127f59c8,0x2503beb6d00cab4b,
+        0xe2a0b5dc971f303a,0x2e44ae64840fd61d,
+        0x8da471a9de737e24,0x5ceaecfed289e5d2,
+        0xb10d8e1456105dad,0x7425a83e872c5f47,
+        0xdd50f1996b947518,0xd12f124e28f77719,
+        0x8a5296ffe33cc92f,0x82bd6b70d99aaa6f,
+        0xace73cbfdc0bfb7b,0x636cc64d1001550b,
+        0xd8210befd30efa5a,0x3c47f7e05401aa4e,
+        0x8714a775e3e95c78,0x65acfaec34810a71,
+        0xa8d9d1535ce3b396,0x7f1839a741a14d0d,
+        0xd31045a8341ca07c,0x1ede48111209a050,
+        0x83ea2b892091e44d,0x934aed0aab460432,
+        0xa4e4b66b68b65d60,0xf81da84d5617853f,
+        0xce1de40642e3f4b9,0x36251260ab9d668e,
+        0x80d2ae83e9ce78f3,0xc1d72b7c6b426019,
+        0xa1075a24e4421730,0xb24cf65b8612f81f,
+        0xc94930ae1d529cfc,0xdee033f26797b627,
+        0xfb9b7cd9a4a7443c,0x169840ef017da3b1,
+        0x9d412e0806e88aa5,0x8e1f289560ee864e,
+        0xc491798a08a2ad4e,0xf1a6f2bab92a27e2,
+        0xf5b5d7ec8acb58a2,0xae10af696774b1db,
+        0x9991a6f3d6bf1765,0xacca6da1e0a8ef29,
+        0xbff610b0cc6edd3f,0x17fd090a58d32af3,
+        0xeff394dcff8a948e,0xddfc4b4cef07f5b0,
+        0x95f83d0a1fb69cd9,0x4abdaf101564f98e,
+        0xbb764c4ca7a4440f,0x9d6d1ad41abe37f1,
+        0xea53df5fd18d5513,0x84c86189216dc5ed,
+        0x92746b9be2f8552c,0x32fd3cf5b4e49bb4,
+        0xb7118682dbb66a77,0x3fbc8c33221dc2a1,
+        0xe4d5e82392a40515,0xfabaf3feaa5334a,
+        0x8f05b1163ba6832d,0x29cb4d87f2a7400e,
+        0xb2c71d5bca9023f8,0x743e20e9ef511012,
+        0xdf78e4b2bd342cf6,0x914da9246b255416,
+        0x8bab8eefb6409c1a,0x1ad089b6c2f7548e,
+        0xae9672aba3d0c320,0xa184ac2473b529b1,
+        0xda3c0f568cc4f3e8,0xc9e5d72d90a2741e,
+        0x8865899617fb1871,0x7e2fa67c7a658892,
+        0xaa7eebfb9df9de8d,0xddbb901b98feeab7,
+        0xd51ea6fa85785631,0x552a74227f3ea565,
+        0x8533285c936b35de,0xd53a88958f87275f,
+        0xa67ff273b8460356,0x8a892abaf368f137,
+        0xd01fef10a657842c,0x2d2b7569b0432d85,
+        0x8213f56a67f6b29b,0x9c3b29620e29fc73,
+        0xa298f2c501f45f42,0x8349f3ba91b47b8f,
+        0xcb3f2f7642717713,0x241c70a936219a73,
+        0xfe0efb53d30dd4d7,0xed238cd383aa0110,
+        0x9ec95d1463e8a506,0xf4363804324a40aa,
+        0xc67bb4597ce2ce48,0xb143c6053edcd0d5,
+        0xf81aa16fdc1b81da,0xdd94b7868e94050a,
+        0x9b10a4e5e9913128,0xca7cf2b4191c8326,
+        0xc1d4ce1f63f57d72,0xfd1c2f611f63a3f0,
+        0xf24a01a73cf2dccf,0xbc633b39673c8cec,
+        0x976e41088617ca01,0xd5be0503e085d813,
+        0xbd49d14aa79dbc82,0x4b2d8644d8a74e18,
+        0xec9c459d51852ba2,0xddf8e7d60ed1219e,
+        0x93e1ab8252f33b45,0xcabb90e5c942b503,
+        0xb8da1662e7b00a17,0x3d6a751f3b936243,
+        0xe7109bfba19c0c9d,0xcc512670a783ad4,
+        0x906a617d450187e2,0x27fb2b80668b24c5,
+        0xb484f9dc9641e9da,0xb1f9f660802dedf6,
+        0xe1a63853bbd26451,0x5e7873f8a0396973,
+        0x8d07e33455637eb2,0xdb0b487b6423e1e8,
+        0xb049dc016abc5e5f,0x91ce1a9a3d2cda62,
+        0xdc5c5301c56b75f7,0x7641a140cc7810fb,
+        0x89b9b3e11b6329ba,0xa9e904c87fcb0a9d,
+        0xac2820d9623bf429,0x546345fa9fbdcd44,
+        0xd732290fbacaf133,0xa97c177947ad4095,
+        0x867f59a9d4bed6c0,0x49ed8eabcccc485d,
+        0xa81f301449ee8c70,0x5c68f256bfff5a74,
+        0xd226fc195c6a2f8c,0x73832eec6fff3111,
+        0x83585d8fd9c25db7,0xc831fd53c5ff7eab,
+        0xa42e74f3d032f525,0xba3e7ca8b77f5e55,
+        0xcd3a1230c43fb26f,0x28ce1bd2e55f35eb,
+        0x80444b5e7aa7cf85,0x7980d163cf5b81b3,
+        0xa0555e361951c366,0xd7e105bcc332621f,
+        0xc86ab5c39fa63440,0x8dd9472bf3fefaa7,
+        0xfa856334878fc150,0xb14f98f6f0feb951,
+        0x9c935e00d4b9d8d2,0x6ed1bf9a569f33d3,
+        0xc3b8358109e84f07,0xa862f80ec4700c8,
+        0xf4a642e14c6262c8,0xcd27bb612758c0fa,
+        0x98e7e9cccfbd7dbd,0x8038d51cb897789c,
+        0xbf21e44003acdd2c,0xe0470a63e6bd56c3,
+        0xeeea5d5004981478,0x1858ccfce06cac74,
+        0x95527a5202df0ccb,0xf37801e0c43ebc8,
+        0xbaa718e68396cffd,0xd30560258f54e6ba,
+        0xe950df20247c83fd,0x47c6b82ef32a2069,
+        0x91d28b7416cdd27e,0x4cdc331d57fa5441,
+        0xb6472e511c81471d,0xe0133fe4adf8e952,
+        0xe3d8f9e563a198e5,0x58180fddd97723a6,
+        0x8e679c2f5e44ff8f,0x570f09eaa7ea7648,};
+
+} // namespace internal
+} // namespace simdjson
+/* end file src/internal/numberparsing_tables.cpp */
+/* begin file src/internal/simdprune_tables.cpp */
+#if SIMDJSON_IMPLEMENTATION_ARM64 || SIMDJSON_IMPLEMENTATION_ICELAKE || SIMDJSON_IMPLEMENTATION_HASWELL || SIMDJSON_IMPLEMENTATION_WESTMERE || SIMDJSON_IMPLEMENTATION_PPC64
+
+#include <cstdint>
+
+namespace simdjson { // table modified and copied from
+namespace internal { // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetTable
+SIMDJSON_DLLIMPORTEXPORT  const unsigned char BitsSetTable256mul2[256] = {
+    0,  2,  2,  4,  2,  4,  4,  6,  2,  4,  4,  6,  4,  6,  6,  8,  2,  4,  4,
+    6,  4,  6,  6,  8,  4,  6,  6,  8,  6,  8,  8,  10, 2,  4,  4,  6,  4,  6,
+    6,  8,  4,  6,  6,  8,  6,  8,  8,  10, 4,  6,  6,  8,  6,  8,  8,  10, 6,
+    8,  8,  10, 8,  10, 10, 12, 2,  4,  4,  6,  4,  6,  6,  8,  4,  6,  6,  8,
+    6,  8,  8,  10, 4,  6,  6,  8,  6,  8,  8,  10, 6,  8,  8,  10, 8,  10, 10,
+    12, 4,  6,  6,  8,  6,  8,  8,  10, 6,  8,  8,  10, 8,  10, 10, 12, 6,  8,
+    8,  10, 8,  10, 10, 12, 8,  10, 10, 12, 10, 12, 12, 14, 2,  4,  4,  6,  4,
+    6,  6,  8,  4,  6,  6,  8,  6,  8,  8,  10, 4,  6,  6,  8,  6,  8,  8,  10,
+    6,  8,  8,  10, 8,  10, 10, 12, 4,  6,  6,  8,  6,  8,  8,  10, 6,  8,  8,
+    10, 8,  10, 10, 12, 6,  8,  8,  10, 8,  10, 10, 12, 8,  10, 10, 12, 10, 12,
+    12, 14, 4,  6,  6,  8,  6,  8,  8,  10, 6,  8,  8,  10, 8,  10, 10, 12, 6,
+    8,  8,  10, 8,  10, 10, 12, 8,  10, 10, 12, 10, 12, 12, 14, 6,  8,  8,  10,
+    8,  10, 10, 12, 8,  10, 10, 12, 10, 12, 12, 14, 8,  10, 10, 12, 10, 12, 12,
+    14, 10, 12, 12, 14, 12, 14, 14, 16};
+
+SIMDJSON_DLLIMPORTEXPORT  const uint8_t pshufb_combine_table[272] = {
+    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b,
+    0x0c, 0x0d, 0x0e, 0x0f, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x08,
+    0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0x00, 0x01, 0x02, 0x03,
+    0x04, 0x05, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff,
+    0x00, 0x01, 0x02, 0x03, 0x04, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
+    0x0f, 0xff, 0xff, 0xff, 0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b,
+    0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x02, 0x08,
+    0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0x00, 0x01, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0x00, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
+    0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x08, 0x09, 0x0a, 0x0b,
+    0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+};
+
+// 256 * 8 bytes = 2kB, easily fits in cache.
+SIMDJSON_DLLIMPORTEXPORT  const uint64_t thintable_epi8[256] = {
+    0x0706050403020100, 0x0007060504030201, 0x0007060504030200,
+    0x0000070605040302, 0x0007060504030100, 0x0000070605040301,
+    0x0000070605040300, 0x0000000706050403, 0x0007060504020100,
+    0x0000070605040201, 0x0000070605040200, 0x0000000706050402,
+    0x0000070605040100, 0x0000000706050401, 0x0000000706050400,
+    0x0000000007060504, 0x0007060503020100, 0x0000070605030201,
+    0x0000070605030200, 0x0000000706050302, 0x0000070605030100,
+    0x0000000706050301, 0x0000000706050300, 0x0000000007060503,
+    0x0000070605020100, 0x0000000706050201, 0x0000000706050200,
+    0x0000000007060502, 0x0000000706050100, 0x0000000007060501,
+    0x0000000007060500, 0x0000000000070605, 0x0007060403020100,
+    0x0000070604030201, 0x0000070604030200, 0x0000000706040302,
+    0x0000070604030100, 0x0000000706040301, 0x0000000706040300,
+    0x0000000007060403, 0x0000070604020100, 0x0000000706040201,
+    0x0000000706040200, 0x0000000007060402, 0x0000000706040100,
+    0x0000000007060401, 0x0000000007060400, 0x0000000000070604,
+    0x0000070603020100, 0x0000000706030201, 0x0000000706030200,
+    0x0000000007060302, 0x0000000706030100, 0x0000000007060301,
+    0x0000000007060300, 0x0000000000070603, 0x0000000706020100,
+    0x0000000007060201, 0x0000000007060200, 0x0000000000070602,
+    0x0000000007060100, 0x0000000000070601, 0x0000000000070600,
+    0x0000000000000706, 0x0007050403020100, 0x0000070504030201,
+    0x0000070504030200, 0x0000000705040302, 0x0000070504030100,
+    0x0000000705040301, 0x0000000705040300, 0x0000000007050403,
+    0x0000070504020100, 0x0000000705040201, 0x0000000705040200,
+    0x0000000007050402, 0x0000000705040100, 0x0000000007050401,
+    0x0000000007050400, 0x0000000000070504, 0x0000070503020100,
+    0x0000000705030201, 0x0000000705030200, 0x0000000007050302,
+    0x0000000705030100, 0x0000000007050301, 0x0000000007050300,
+    0x0000000000070503, 0x0000000705020100, 0x0000000007050201,
+    0x0000000007050200, 0x0000000000070502, 0x0000000007050100,
+    0x0000000000070501, 0x0000000000070500, 0x0000000000000705,
+    0x0000070403020100, 0x0000000704030201, 0x0000000704030200,
+    0x0000000007040302, 0x0000000704030100, 0x0000000007040301,
+    0x0000000007040300, 0x0000000000070403, 0x0000000704020100,
+    0x0000000007040201, 0x0000000007040200, 0x0000000000070402,
+    0x0000000007040100, 0x0000000000070401, 0x0000000000070400,
+    0x0000000000000704, 0x0000000703020100, 0x0000000007030201,
+    0x0000000007030200, 0x0000000000070302, 0x0000000007030100,
+    0x0000000000070301, 0x0000000000070300, 0x0000000000000703,
+    0x0000000007020100, 0x0000000000070201, 0x0000000000070200,
+    0x0000000000000702, 0x0000000000070100, 0x0000000000000701,
+    0x0000000000000700, 0x0000000000000007, 0x0006050403020100,
+    0x0000060504030201, 0x0000060504030200, 0x0000000605040302,
+    0x0000060504030100, 0x0000000605040301, 0x0000000605040300,
+    0x0000000006050403, 0x0000060504020100, 0x0000000605040201,
+    0x0000000605040200, 0x0000000006050402, 0x0000000605040100,
+    0x0000000006050401, 0x0000000006050400, 0x0000000000060504,
+    0x0000060503020100, 0x0000000605030201, 0x0000000605030200,
+    0x0000000006050302, 0x0000000605030100, 0x0000000006050301,
+    0x0000000006050300, 0x0000000000060503, 0x0000000605020100,
+    0x0000000006050201, 0x0000000006050200, 0x0000000000060502,
+    0x0000000006050100, 0x0000000000060501, 0x0000000000060500,
+    0x0000000000000605, 0x0000060403020100, 0x0000000604030201,
+    0x0000000604030200, 0x0000000006040302, 0x0000000604030100,
+    0x0000000006040301, 0x0000000006040300, 0x0000000000060403,
+    0x0000000604020100, 0x0000000006040201, 0x0000000006040200,
+    0x0000000000060402, 0x0000000006040100, 0x0000000000060401,
+    0x0000000000060400, 0x0000000000000604, 0x0000000603020100,
+    0x0000000006030201, 0x0000000006030200, 0x0000000000060302,
+    0x0000000006030100, 0x0000000000060301, 0x0000000000060300,
+    0x0000000000000603, 0x0000000006020100, 0x0000000000060201,
+    0x0000000000060200, 0x0000000000000602, 0x0000000000060100,
+    0x0000000000000601, 0x0000000000000600, 0x0000000000000006,
+    0x0000050403020100, 0x0000000504030201, 0x0000000504030200,
+    0x0000000005040302, 0x0000000504030100, 0x0000000005040301,
+    0x0000000005040300, 0x0000000000050403, 0x0000000504020100,
+    0x0000000005040201, 0x0000000005040200, 0x0000000000050402,
+    0x0000000005040100, 0x0000000000050401, 0x0000000000050400,
+    0x0000000000000504, 0x0000000503020100, 0x0000000005030201,
+    0x0000000005030200, 0x0000000000050302, 0x0000000005030100,
+    0x0000000000050301, 0x0000000000050300, 0x0000000000000503,
+    0x0000000005020100, 0x0000000000050201, 0x0000000000050200,
+    0x0000000000000502, 0x0000000000050100, 0x0000000000000501,
+    0x0000000000000500, 0x0000000000000005, 0x0000000403020100,
+    0x0000000004030201, 0x0000000004030200, 0x0000000000040302,
+    0x0000000004030100, 0x0000000000040301, 0x0000000000040300,
+    0x0000000000000403, 0x0000000004020100, 0x0000000000040201,
+    0x0000000000040200, 0x0000000000000402, 0x0000000000040100,
+    0x0000000000000401, 0x0000000000000400, 0x0000000000000004,
+    0x0000000003020100, 0x0000000000030201, 0x0000000000030200,
+    0x0000000000000302, 0x0000000000030100, 0x0000000000000301,
+    0x0000000000000300, 0x0000000000000003, 0x0000000000020100,
+    0x0000000000000201, 0x0000000000000200, 0x0000000000000002,
+    0x0000000000000100, 0x0000000000000001, 0x0000000000000000,
+    0x0000000000000000,
+}; //static uint64_t thintable_epi8[256]
+
+} // namespace internal
+} // namespace simdjson
+
+#endif //  SIMDJSON_IMPLEMENTATION_ARM64 || SIMDJSON_IMPLEMENTATION_ICELAKE || SIMDJSON_IMPLEMENTATION_HASWELL || SIMDJSON_IMPLEMENTATION_WESTMERE || SIMDJSON_IMPLEMENTATION_PPC64
+/* end file src/internal/simdprune_tables.cpp */
+/* begin file src/implementation.cpp */
+#include <initializer_list>
+
+namespace simdjson {
+
+bool implementation::supported_by_runtime_system() const {
+  uint32_t required_instruction_sets = this->required_instruction_sets();
+  uint32_t supported_instruction_sets = internal::detect_supported_architectures();
+  return ((supported_instruction_sets & required_instruction_sets) == required_instruction_sets);
+}
+
+namespace internal {
+
+// Static array of known implementations. We're hoping these get baked into the executable
+// without requiring a static initializer.
+
+#if SIMDJSON_IMPLEMENTATION_ICELAKE
+static const icelake::implementation* get_icelake_singleton() {
+  static const icelake::implementation icelake_singleton{};
+  return &icelake_singleton;
+}
+#endif
+#if SIMDJSON_IMPLEMENTATION_HASWELL
+static const haswell::implementation* get_haswell_singleton() {
+  static const haswell::implementation haswell_singleton{};
+  return &haswell_singleton;
+}
+#endif
+#if SIMDJSON_IMPLEMENTATION_WESTMERE
+static const westmere::implementation* get_westmere_singleton() {
+  static const westmere::implementation westmere_singleton{};
+  return &westmere_singleton;
+}
+#endif // SIMDJSON_IMPLEMENTATION_WESTMERE
+#if SIMDJSON_IMPLEMENTATION_ARM64
+static const arm64::implementation* get_arm64_singleton() {
+  static const arm64::implementation arm64_singleton{};
+  return &arm64_singleton;
+}
+#endif // SIMDJSON_IMPLEMENTATION_ARM64
+#if SIMDJSON_IMPLEMENTATION_PPC64
+static const ppc64::implementation* get_ppc64_singleton() {
+  static const ppc64::implementation ppc64_singleton{};
+  return &ppc64_singleton;
+}
+#endif // SIMDJSON_IMPLEMENTATION_PPC64
+#if SIMDJSON_IMPLEMENTATION_FALLBACK
+static const fallback::implementation* get_fallback_singleton() {
+  static const fallback::implementation fallback_singleton{};
+  return &fallback_singleton;
+}
+#endif // SIMDJSON_IMPLEMENTATION_FALLBACK
+
+/**
+ * @private Detects best supported implementation on first use, and sets it
+ */
+class detect_best_supported_implementation_on_first_use final : public implementation {
+public:
+  const std::string &name() const noexcept final { return set_best()->name(); }
+  const std::string &description() const noexcept final { return set_best()->description(); }
+  uint32_t required_instruction_sets() const noexcept final { return set_best()->required_instruction_sets(); }
+  simdjson_warn_unused error_code create_dom_parser_implementation(
+    size_t capacity,
+    size_t max_length,
+    std::unique_ptr<internal::dom_parser_implementation>& dst
+  ) const noexcept final {
+    return set_best()->create_dom_parser_implementation(capacity, max_length, dst);
+  }
+  simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final {
+    return set_best()->minify(buf, len, dst, dst_len);
+  }
+  simdjson_warn_unused bool validate_utf8(const char * buf, size_t len) const noexcept final override {
+    return set_best()->validate_utf8(buf, len);
+  }
+  simdjson_inline detect_best_supported_implementation_on_first_use() noexcept : implementation("best_supported_detector", "Detects the best supported implementation and sets it", 0) {}
+private:
+  const implementation *set_best() const noexcept;
+};
+
+static const std::initializer_list<const implementation *>& get_available_implementation_pointers() {
+  static const std::initializer_list<const implementation *> available_implementation_pointers {
+#if SIMDJSON_IMPLEMENTATION_ICELAKE
+    get_icelake_singleton(),
+#endif
+#if SIMDJSON_IMPLEMENTATION_HASWELL
+    get_haswell_singleton(),
+#endif
+#if SIMDJSON_IMPLEMENTATION_WESTMERE
+    get_westmere_singleton(),
+#endif
+#if SIMDJSON_IMPLEMENTATION_ARM64
+    get_arm64_singleton(),
+#endif
+#if SIMDJSON_IMPLEMENTATION_PPC64
+    get_ppc64_singleton(),
+#endif
+#if SIMDJSON_IMPLEMENTATION_FALLBACK
+    get_fallback_singleton(),
+#endif
+  }; // available_implementation_pointers
+  return available_implementation_pointers;
+}
+
+// So we can return UNSUPPORTED_ARCHITECTURE from the parser when there is no support
+class unsupported_implementation final : public implementation {
+public:
+  simdjson_warn_unused error_code create_dom_parser_implementation(
+    size_t,
+    size_t,
+    std::unique_ptr<internal::dom_parser_implementation>&
+  ) const noexcept final {
+    return UNSUPPORTED_ARCHITECTURE;
+  }
+  simdjson_warn_unused error_code minify(const uint8_t *, size_t, uint8_t *, size_t &) const noexcept final override {
+    return UNSUPPORTED_ARCHITECTURE;
+  }
+  simdjson_warn_unused bool validate_utf8(const char *, size_t) const noexcept final override {
+    return false; // Just refuse to validate. Given that we have a fallback implementation
+    // it seems unlikely that unsupported_implementation will ever be used. If it is used,
+    // then it will flag all strings as invalid. The alternative is to return an error_code
+    // from which the user has to figure out whether the string is valid UTF-8... which seems
+    // like a lot of work just to handle the very unlikely case that we have an unsupported
+    // implementation. And, when it does happen (that we have an unsupported implementation),
+    // what are the chances that the programmer has a fallback? Given that *we* provide the
+    // fallback, it implies that the programmer would need a fallback for our fallback.
+  }
+  unsupported_implementation() : implementation("unsupported", "Unsupported CPU (no detected SIMD instructions)", 0) {}
+};
+
+const unsupported_implementation* get_unsupported_singleton() {
+    static const unsupported_implementation unsupported_singleton{};
+    return &unsupported_singleton;
+}
+
+size_t available_implementation_list::size() const noexcept {
+  return internal::get_available_implementation_pointers().size();
+}
+const implementation * const *available_implementation_list::begin() const noexcept {
+  return internal::get_available_implementation_pointers().begin();
+}
+const implementation * const *available_implementation_list::end() const noexcept {
+  return internal::get_available_implementation_pointers().end();
+}
+const implementation *available_implementation_list::detect_best_supported() const noexcept {
+  // They are prelisted in priority order, so we just go down the list
+  uint32_t supported_instruction_sets = internal::detect_supported_architectures();
+  for (const implementation *impl : internal::get_available_implementation_pointers()) {
+    uint32_t required_instruction_sets = impl->required_instruction_sets();
+    if ((supported_instruction_sets & required_instruction_sets) == required_instruction_sets) { return impl; }
+  }
+  return get_unsupported_singleton(); // this should never happen?
+}
+
+const implementation *detect_best_supported_implementation_on_first_use::set_best() const noexcept {
+  SIMDJSON_PUSH_DISABLE_WARNINGS
+  SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe
+  char *force_implementation_name = getenv("SIMDJSON_FORCE_IMPLEMENTATION");
+  SIMDJSON_POP_DISABLE_WARNINGS
+
+  if (force_implementation_name) {
+    auto force_implementation = get_available_implementations()[force_implementation_name];
+    if (force_implementation) {
+      return get_active_implementation() = force_implementation;
+    } else {
+      // Note: abort() and stderr usage within the library is forbidden.
+      return get_active_implementation() = get_unsupported_singleton();
+    }
+  }
+  return get_active_implementation() = get_available_implementations().detect_best_supported();
+}
+
+} // namespace internal
+
+SIMDJSON_DLLIMPORTEXPORT const internal::available_implementation_list& get_available_implementations() {
+  static const internal::available_implementation_list available_implementations{};
+  return available_implementations;
+}
+
+SIMDJSON_DLLIMPORTEXPORT internal::atomic_ptr<const implementation>& get_active_implementation() {
+    static const internal::detect_best_supported_implementation_on_first_use detect_best_supported_implementation_on_first_use_singleton;
+    static internal::atomic_ptr<const implementation> active_implementation{&detect_best_supported_implementation_on_first_use_singleton};
+    return active_implementation;
+}
+
+simdjson_warn_unused error_code minify(const char *buf, size_t len, char *dst, size_t &dst_len) noexcept {
+  return get_active_implementation()->minify(reinterpret_cast<const uint8_t *>(buf), len, reinterpret_cast<uint8_t *>(dst), dst_len);
+}
+simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) noexcept {
+  return get_active_implementation()->validate_utf8(buf, len);
+}
+const implementation * builtin_implementation() {
+  static const implementation * builtin_impl = get_available_implementations()[SIMDJSON_STRINGIFY(SIMDJSON_BUILTIN_IMPLEMENTATION)];
+  assert(builtin_impl);
+  return builtin_impl;
+}
+
+
+} // namespace simdjson
+/* end file src/implementation.cpp */
+
+#if SIMDJSON_IMPLEMENTATION_ARM64
+/* begin file src/arm64/implementation.cpp */
+/* begin file include/simdjson/arm64/begin.h */
+// redefining SIMDJSON_IMPLEMENTATION to "arm64"
+// #define SIMDJSON_IMPLEMENTATION arm64
+/* end file include/simdjson/arm64/begin.h */
+
+namespace simdjson {
+namespace arm64 {
+
+simdjson_warn_unused error_code implementation::create_dom_parser_implementation(
+  size_t capacity,
+  size_t max_depth,
+  std::unique_ptr<internal::dom_parser_implementation>& dst
+) const noexcept {
+  dst.reset( new (std::nothrow) dom_parser_implementation() );
+  if (!dst) { return MEMALLOC; }
+  if (auto err = dst->set_capacity(capacity))
+    return err;
+  if (auto err = dst->set_max_depth(max_depth))
+    return err;
+  return SUCCESS;
+}
+
+} // namespace arm64
+} // namespace simdjson
+
+/* begin file include/simdjson/arm64/end.h */
+/* end file include/simdjson/arm64/end.h */
+/* end file src/arm64/implementation.cpp */
+/* begin file src/arm64/dom_parser_implementation.cpp */
+/* begin file include/simdjson/arm64/begin.h */
+// redefining SIMDJSON_IMPLEMENTATION to "arm64"
+// #define SIMDJSON_IMPLEMENTATION arm64
+/* end file include/simdjson/arm64/begin.h */
+
+//
+// Stage 1
+//
+namespace simdjson {
+namespace arm64 {
+namespace {
+
+using namespace simd;
+
+struct json_character_block {
+  static simdjson_inline json_character_block classify(const simd::simd8x64<uint8_t>& in);
+
+  simdjson_inline uint64_t whitespace() const noexcept { return _whitespace; }
+  simdjson_inline uint64_t op() const noexcept { return _op; }
+  simdjson_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); }
+
+  uint64_t _whitespace;
+  uint64_t _op;
+};
+
+simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64<uint8_t>& in) {
+  // Functional programming causes trouble with Visual Studio.
+  // Keeping this version in comments since it is much nicer:
+  // auto v = in.map<uint8_t>([&](simd8<uint8_t> chunk) {
+  //  auto nib_lo = chunk & 0xf;
+  //  auto nib_hi = chunk.shr<4>();
+  //  auto shuf_lo = nib_lo.lookup_16<uint8_t>(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0);
+  //  auto shuf_hi = nib_hi.lookup_16<uint8_t>(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0);
+  //  return shuf_lo & shuf_hi;
+  // });
+  const simd8<uint8_t> table1(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0);
+  const simd8<uint8_t> table2(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0);
+
+  simd8x64<uint8_t> v(
+     (in.chunks[0] & 0xf).lookup_16(table1) & (in.chunks[0].shr<4>()).lookup_16(table2),
+     (in.chunks[1] & 0xf).lookup_16(table1) & (in.chunks[1].shr<4>()).lookup_16(table2),
+     (in.chunks[2] & 0xf).lookup_16(table1) & (in.chunks[2].shr<4>()).lookup_16(table2),
+     (in.chunks[3] & 0xf).lookup_16(table1) & (in.chunks[3].shr<4>()).lookup_16(table2)
+  );
+
+
+  // We compute whitespace and op separately. If the code later only use one or the
+  // other, given the fact that all functions are aggressively inlined, we can
+  // hope that useless computations will be omitted. This is namely case when
+  // minifying (we only need whitespace). *However* if we only need spaces,
+  // it is likely that we will still compute 'v' above with two lookup_16: one
+  // could do it a bit cheaper. This is in contrast with the x64 implementations
+  // where we can, efficiently, do the white space and structural matching
+  // separately. One reason for this difference is that on ARM NEON, the table
+  // lookups either zero or leave unchanged the characters exceeding 0xF whereas
+  // on x64, the equivalent instruction (pshufb) automatically applies a mask,
+  // ignoring the 4 most significant bits. Thus the x64 implementation is
+  // optimized differently. This being said, if you use this code strictly
+  // just for minification (or just to identify the structural characters),
+  // there is a small untaken optimization opportunity here. We deliberately
+  // do not pick it up.
+
+  uint64_t op = simd8x64<bool>(
+        v.chunks[0].any_bits_set(0x7),
+        v.chunks[1].any_bits_set(0x7),
+        v.chunks[2].any_bits_set(0x7),
+        v.chunks[3].any_bits_set(0x7)
+  ).to_bitmask();
+
+  uint64_t whitespace = simd8x64<bool>(
+        v.chunks[0].any_bits_set(0x18),
+        v.chunks[1].any_bits_set(0x18),
+        v.chunks[2].any_bits_set(0x18),
+        v.chunks[3].any_bits_set(0x18)
+  ).to_bitmask();
+
+  return { whitespace, op };
+}
+
+simdjson_inline bool is_ascii(const simd8x64<uint8_t>& input) {
+    simd8<uint8_t> bits = input.reduce_or();
+    return bits.max_val() < 0x80u;
+}
+
+simdjson_unused simdjson_inline simd8<bool> must_be_continuation(const simd8<uint8_t> prev1, const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
+    simd8<bool> is_second_byte = prev1 >= uint8_t(0xc0u);
+    simd8<bool> is_third_byte  = prev2 >= uint8_t(0xe0u);
+    simd8<bool> is_fourth_byte = prev3 >= uint8_t(0xf0u);
+    // Use ^ instead of | for is_*_byte, because ^ is commutative, and the caller is using ^ as well.
+    // This will work fine because we only have to report errors for cases with 0-1 lead bytes.
+    // Multiple lead bytes implies 2 overlapping multibyte characters, and if that happens, there is
+    // guaranteed to be at least *one* lead byte that is part of only 1 other multibyte character.
+    // The error will be detected there.
+    return is_second_byte ^ is_third_byte ^ is_fourth_byte;
+}
+
+simdjson_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
+    simd8<bool> is_third_byte  = prev2 >= uint8_t(0xe0u);
+    simd8<bool> is_fourth_byte = prev3 >= uint8_t(0xf0u);
+    return is_third_byte ^ is_fourth_byte;
+}
+
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdjson
+
+/* begin file src/generic/stage1/utf8_lookup4_algorithm.h */
+namespace simdjson {
+namespace arm64 {
+namespace {
+namespace utf8_validation {
+
+using namespace simd;
+
+  simdjson_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
+// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII)
+// Bit 1 = Too Long (ASCII followed by continuation)
+// Bit 2 = Overlong 3-byte
+// Bit 4 = Surrogate
+// Bit 5 = Overlong 2-byte
+// Bit 7 = Two Continuations
+    constexpr const uint8_t TOO_SHORT   = 1<<0; // 11______ 0_______
+                                                // 11______ 11______
+    constexpr const uint8_t TOO_LONG    = 1<<1; // 0_______ 10______
+    constexpr const uint8_t OVERLONG_3  = 1<<2; // 11100000 100_____
+    constexpr const uint8_t SURROGATE   = 1<<4; // 11101101 101_____
+    constexpr const uint8_t OVERLONG_2  = 1<<5; // 1100000_ 10______
+    constexpr const uint8_t TWO_CONTS   = 1<<7; // 10______ 10______
+    constexpr const uint8_t TOO_LARGE   = 1<<3; // 11110100 1001____
+                                                // 11110100 101_____
+                                                // 11110101 1001____
+                                                // 11110101 101_____
+                                                // 1111011_ 1001____
+                                                // 1111011_ 101_____
+                                                // 11111___ 1001____
+                                                // 11111___ 101_____
+    constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
+                                                // 11110101 1000____
+                                                // 1111011_ 1000____
+                                                // 11111___ 1000____
+    constexpr const uint8_t OVERLONG_4  = 1<<6; // 11110000 1000____
+
+    const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
+      // 0_______ ________ <ASCII in byte 1>
+      TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
+      TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
+      // 10______ ________ <continuation in byte 1>
+      TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS,
+      // 1100____ ________ <two byte lead in byte 1>
+      TOO_SHORT | OVERLONG_2,
+      // 1101____ ________ <two byte lead in byte 1>
+      TOO_SHORT,
+      // 1110____ ________ <three byte lead in byte 1>
+      TOO_SHORT | OVERLONG_3 | SURROGATE,
+      // 1111____ ________ <four+ byte lead in byte 1>
+      TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4
+    );
+    constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
+    const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
+      // ____0000 ________
+      CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4,
+      // ____0001 ________
+      CARRY | OVERLONG_2,
+      // ____001_ ________
+      CARRY,
+      CARRY,
+
+      // ____0100 ________
+      CARRY | TOO_LARGE,
+      // ____0101 ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      // ____011_ ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+
+      // ____1___ ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      // ____1101 ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000
+    );
+    const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
+      // ________ 0_______ <ASCII in byte 2>
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
+
+      // ________ 1000____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4,
+      // ________ 1001____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE,
+      // ________ 101_____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE  | TOO_LARGE,
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE  | TOO_LARGE,
+
+      // ________ 11______
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT
+    );
+    return (byte_1_high & byte_1_low & byte_2_high);
+  }
+  simdjson_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
+      const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
+    simd8<uint8_t> prev2 = input.prev<2>(prev_input);
+    simd8<uint8_t> prev3 = input.prev<3>(prev_input);
+    simd8<uint8_t> must23 = simd8<uint8_t>(must_be_2_3_continuation(prev2, prev3));
+    simd8<uint8_t> must23_80 = must23 & uint8_t(0x80);
+    return must23_80 ^ sc;
+  }
+
+  //
+  // Return nonzero if there are incomplete multibyte characters at the end of the block:
+  // e.g. if there is a 4-byte character, but it's 3 bytes from the end.
+  //
+  simdjson_inline simd8<uint8_t> is_incomplete(const simd8<uint8_t> input) {
+    // If the previous input's last 3 bytes match this, they're too short (they ended at EOF):
+    // ... 1111____ 111_____ 11______
+#if SIMDJSON_IMPLEMENTATION_ICELAKE
+    static const uint8_t max_array[64] = {
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1
+    };
+#else
+    static const uint8_t max_array[32] = {
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1
+    };
+#endif
+    const simd8<uint8_t> max_value(&max_array[sizeof(max_array)-sizeof(simd8<uint8_t>)]);
+    return input.gt_bits(max_value);
+  }
+
+  struct utf8_checker {
+    // If this is nonzero, there has been a UTF-8 error.
+    simd8<uint8_t> error;
+    // The last input we received
+    simd8<uint8_t> prev_input_block;
+    // Whether the last input we received was incomplete (used for ASCII fast path)
+    simd8<uint8_t> prev_incomplete;
+
+    //
+    // Check whether the current bytes are valid UTF-8.
+    //
+    simdjson_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
+      // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes
+      // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers)
+      simd8<uint8_t> prev1 = input.prev<1>(prev_input);
+      simd8<uint8_t> sc = check_special_cases(input, prev1);
+      this->error |= check_multibyte_lengths(input, prev_input, sc);
+    }
+
+    // The only problem that can happen at EOF is that a multibyte character is too short
+    // or a byte value too large in the last bytes: check_special_cases only checks for bytes
+    // too large in the first of two bytes.
+    simdjson_inline void check_eof() {
+      // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't
+      // possibly finish them.
+      this->error |= this->prev_incomplete;
+    }
+
+#ifndef SIMDJSON_IF_CONSTEXPR
+#if SIMDJSON_CPLUSPLUS17
+#define SIMDJSON_IF_CONSTEXPR if constexpr
+#else
+#define SIMDJSON_IF_CONSTEXPR if
+#endif
+#endif
+
+    simdjson_inline void check_next_input(const simd8x64<uint8_t>& input) {
+      if(simdjson_likely(is_ascii(input))) {
+        this->error |= this->prev_incomplete;
+      } else {
+        // you might think that a for-loop would work, but under Visual Studio, it is not good enough.
+        static_assert((simd8x64<uint8_t>::NUM_CHUNKS == 1)
+                ||(simd8x64<uint8_t>::NUM_CHUNKS == 2)
+                || (simd8x64<uint8_t>::NUM_CHUNKS == 4),
+                "We support one, two or four chunks per 64-byte block.");
+        SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 1) {
+          this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
+        } else SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 2) {
+          this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
+          this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+        } else SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 4) {
+          this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
+          this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+          this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
+          this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
+        }
+        this->prev_incomplete = is_incomplete(input.chunks[simd8x64<uint8_t>::NUM_CHUNKS-1]);
+        this->prev_input_block = input.chunks[simd8x64<uint8_t>::NUM_CHUNKS-1];
+      }
+    }
+    // do not forget to call check_eof!
+    simdjson_inline error_code errors() {
+      return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS;
+    }
+
+  }; // struct utf8_checker
+} // namespace utf8_validation
+
+using utf8_validation::utf8_checker;
+
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdjson
+/* end file src/generic/stage1/utf8_lookup4_algorithm.h */
+/* begin file src/generic/stage1/json_structural_indexer.h */
+// This file contains the common code every implementation uses in stage1
+// It is intended to be included multiple times and compiled multiple times
+// We assume the file in which it is included already includes
+// "simdjson/stage1.h" (this simplifies amalgation)
+
+/* begin file src/generic/stage1/buf_block_reader.h */
+namespace simdjson {
+namespace arm64 {
+namespace {
+
+// Walks through a buffer in block-sized increments, loading the last part with spaces
+template<size_t STEP_SIZE>
+struct buf_block_reader {
+public:
+  simdjson_inline buf_block_reader(const uint8_t *_buf, size_t _len);
+  simdjson_inline size_t block_index();
+  simdjson_inline bool has_full_block() const;
+  simdjson_inline const uint8_t *full_block() const;
+  /**
+   * Get the last block, padded with spaces.
+   *
+   * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this
+   * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there
+   * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding.
+   *
+   * @return the number of effective characters in the last block.
+   */
+  simdjson_inline size_t get_remainder(uint8_t *dst) const;
+  simdjson_inline void advance();
+private:
+  const uint8_t *buf;
+  const size_t len;
+  const size_t lenminusstep;
+  size_t idx;
+};
+
+// Routines to print masks and text for debugging bitmask operations
+simdjson_unused static char * format_input_text_64(const uint8_t *text) {
+  static char buf[sizeof(simd8x64<uint8_t>) + 1];
+  for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
+    buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]);
+  }
+  buf[sizeof(simd8x64<uint8_t>)] = '\0';
+  return buf;
+}
+
+// Routines to print masks and text for debugging bitmask operations
+simdjson_unused static char * format_input_text(const simd8x64<uint8_t>& in) {
+  static char buf[sizeof(simd8x64<uint8_t>) + 1];
+  in.store(reinterpret_cast<uint8_t*>(buf));
+  for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
+    if (buf[i] < ' ') { buf[i] = '_'; }
+  }
+  buf[sizeof(simd8x64<uint8_t>)] = '\0';
+  return buf;
+}
+
+simdjson_unused static char * format_mask(uint64_t mask) {
+  static char buf[sizeof(simd8x64<uint8_t>) + 1];
+  for (size_t i=0; i<64; i++) {
+    buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' ';
+  }
+  buf[64] = '\0';
+  return buf;
+}
+
+template<size_t STEP_SIZE>
+simdjson_inline buf_block_reader<STEP_SIZE>::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {}
+
+template<size_t STEP_SIZE>
+simdjson_inline size_t buf_block_reader<STEP_SIZE>::block_index() { return idx; }
+
+template<size_t STEP_SIZE>
+simdjson_inline bool buf_block_reader<STEP_SIZE>::has_full_block() const {
+  return idx < lenminusstep;
+}
+
+template<size_t STEP_SIZE>
+simdjson_inline const uint8_t *buf_block_reader<STEP_SIZE>::full_block() const {
+  return &buf[idx];
+}
+
+template<size_t STEP_SIZE>
+simdjson_inline size_t buf_block_reader<STEP_SIZE>::get_remainder(uint8_t *dst) const {
+  if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers
+  std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once.
+  std::memcpy(dst, buf + idx, len - idx);
+  return len - idx;
+}
+
+template<size_t STEP_SIZE>
+simdjson_inline void buf_block_reader<STEP_SIZE>::advance() {
+  idx += STEP_SIZE;
+}
+
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdjson
+/* end file src/generic/stage1/buf_block_reader.h */
+/* begin file src/generic/stage1/json_string_scanner.h */
+namespace simdjson {
+namespace arm64 {
+namespace {
+namespace stage1 {
+
+struct json_string_block {
+  // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017
+  simdjson_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) :
+  _backslash(backslash), _escaped(escaped), _quote(quote), _in_string(in_string) {}
+
+  // Escaped characters (characters following an escape() character)
+  simdjson_inline uint64_t escaped() const { return _escaped; }
+  // Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \)
+  simdjson_inline uint64_t escape() const { return _backslash & ~_escaped; }
+  // Real (non-backslashed) quotes
+  simdjson_inline uint64_t quote() const { return _quote; }
+  // Start quotes of strings
+  simdjson_inline uint64_t string_start() const { return _quote & _in_string; }
+  // End quotes of strings
+  simdjson_inline uint64_t string_end() const { return _quote & ~_in_string; }
+  // Only characters inside the string (not including the quotes)
+  simdjson_inline uint64_t string_content() const { return _in_string & ~_quote; }
+  // Return a mask of whether the given characters are inside a string (only works on non-quotes)
+  simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; }
+  // Return a mask of whether the given characters are inside a string (only works on non-quotes)
+  simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; }
+  // Tail of string (everything except the start quote)
+  simdjson_inline uint64_t string_tail() const { return _in_string ^ _quote; }
+
+  // backslash characters
+  uint64_t _backslash;
+  // escaped characters (backslashed--does not include the hex characters after \u)
+  uint64_t _escaped;
+  // real quotes (non-backslashed ones)
+  uint64_t _quote;
+  // string characters (includes start quote but not end quote)
+  uint64_t _in_string;
+};
+
+// Scans blocks for string characters, storing the state necessary to do so
+class json_string_scanner {
+public:
+  simdjson_inline json_string_block next(const simd::simd8x64<uint8_t>& in);
+  // Returns either UNCLOSED_STRING or SUCCESS
+  simdjson_inline error_code finish();
+
+private:
+  // Intended to be defined by the implementation
+  simdjson_inline uint64_t find_escaped(uint64_t escape);
+  simdjson_inline uint64_t find_escaped_branchless(uint64_t escape);
+
+  // Whether the last iteration was still inside a string (all 1's = true, all 0's = false).
+  uint64_t prev_in_string = 0ULL;
+  // Whether the first character of the next iteration is escaped.
+  uint64_t prev_escaped = 0ULL;
+};
+
+//
+// Finds escaped characters (characters following \).
+//
+// Handles runs of backslashes like \\\" and \\\\" correctly (yielding 0101 and 01010, respectively).
+//
+// Does this by:
+// - Shift the escape mask to get potentially escaped characters (characters after backslashes).
+// - Mask escaped sequences that start on *even* bits with 1010101010 (odd bits are escaped, even bits are not)
+// - Mask escaped sequences that start on *odd* bits with 0101010101 (even bits are escaped, odd bits are not)
+//
+// To distinguish between escaped sequences starting on even/odd bits, it finds the start of all
+// escape sequences, filters out the ones that start on even bits, and adds that to the mask of
+// escape sequences. This causes the addition to clear out the sequences starting on odd bits (since
+// the start bit causes a carry), and leaves even-bit sequences alone.
+//
+// Example:
+//
+// text           |  \\\ | \\\"\\\" \\\" \\"\\" |
+// escape         |  xxx |  xx xxx  xxx  xx xx  | Removed overflow backslash; will | it into follows_escape
+// odd_starts     |  x   |  x       x       x   | escape & ~even_bits & ~follows_escape
+// even_seq       |     c|    cxxx     c xx   c | c = carry bit -- will be masked out later
+// invert_mask    |      |     cxxx     c xx   c| even_seq << 1
+// follows_escape |   xx | x xx xxx  xxx  xx xx | Includes overflow bit
+// escaped        |   x  | x x  x x  x x  x  x  |
+// desired        |   x  | x x  x x  x x  x  x  |
+// text           |  \\\ | \\\"\\\" \\\" \\"\\" |
+//
+simdjson_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) {
+  // If there was overflow, pretend the first character isn't a backslash
+  backslash &= ~prev_escaped;
+  uint64_t follows_escape = backslash << 1 | prev_escaped;
+
+  // Get sequences starting on even bits by clearing out the odd series using +
+  const uint64_t even_bits = 0x5555555555555555ULL;
+  uint64_t odd_sequence_starts = backslash & ~even_bits & ~follows_escape;
+  uint64_t sequences_starting_on_even_bits;
+  prev_escaped = add_overflow(odd_sequence_starts, backslash, &sequences_starting_on_even_bits);
+  uint64_t invert_mask = sequences_starting_on_even_bits << 1; // The mask we want to return is the *escaped* bits, not escapes.
+
+  // Mask every other backslashed character as an escaped character
+  // Flip the mask for sequences that start on even bits, to correct them
+  return (even_bits ^ invert_mask) & follows_escape;
+}
+
+//
+// Return a mask of all string characters plus end quotes.
+//
+// prev_escaped is overflow saying whether the next character is escaped.
+// prev_in_string is overflow saying whether we're still in a string.
+//
+// Backslash sequences outside of quotes will be detected in stage 2.
+//
+simdjson_inline json_string_block json_string_scanner::next(const simd::simd8x64<uint8_t>& in) {
+  const uint64_t backslash = in.eq('\\');
+  const uint64_t escaped = find_escaped(backslash);
+  const uint64_t quote = in.eq('"') & ~escaped;
+
+  //
+  // prefix_xor flips on bits inside the string (and flips off the end quote).
+  //
+  // Then we xor with prev_in_string: if we were in a string already, its effect is flipped
+  // (characters inside strings are outside, and characters outside strings are inside).
+  //
+  const uint64_t in_string = prefix_xor(quote) ^ prev_in_string;
+
+  //
+  // Check if we're still in a string at the end of the box so the next block will know
+  //
+  // right shift of a signed value expected to be well-defined and standard
+  // compliant as of C++20, John Regher from Utah U. says this is fine code
+  //
+  prev_in_string = uint64_t(static_cast<int64_t>(in_string) >> 63);
+
+  // Use ^ to turn the beginning quote off, and the end quote on.
+
+  // We are returning a function-local object so either we get a move constructor
+  // or we get copy elision.
+  return json_string_block(
+    backslash,
+    escaped,
+    quote,
+    in_string
+  );
+}
+
+simdjson_inline error_code json_string_scanner::finish() {
+  if (prev_in_string) {
+    return UNCLOSED_STRING;
+  }
+  return SUCCESS;
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdjson
+/* end file src/generic/stage1/json_string_scanner.h */
+/* begin file src/generic/stage1/json_scanner.h */
+namespace simdjson {
+namespace arm64 {
+namespace {
+namespace stage1 {
+
+/**
+ * A block of scanned json, with information on operators and scalars.
+ *
+ * We seek to identify pseudo-structural characters. Anything that is inside
+ * a string must be omitted (hence  & ~_string.string_tail()).
+ * Otherwise, pseudo-structural characters come in two forms.
+ * 1. We have the structural characters ([,],{,},:, comma). The
+ *    term 'structural character' is from the JSON RFC.
+ * 2. We have the 'scalar pseudo-structural characters'.
+ *    Scalars are quotes, and any character except structural characters and white space.
+ *
+ * To identify the scalar pseudo-structural characters, we must look at what comes
+ * before them: it must be a space, a quote or a structural characters.
+ * Starting with simdjson v0.3, we identify them by
+ * negation: we identify everything that is followed by a non-quote scalar,
+ * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'.
+ */
+struct json_block {
+public:
+  // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017
+  simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) :
+  _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {}
+  simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) :
+  _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {}
+
+  /**
+   * The start of structurals.
+   * In simdjson prior to v0.3, these were called the pseudo-structural characters.
+   **/
+  simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); }
+  /** All JSON whitespace (i.e. not in a string) */
+  simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); }
+
+  // Helpers
+
+  /** Whether the given characters are inside a string (only works on non-quotes) */
+  simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); }
+  /** Whether the given characters are outside a string (only works on non-quotes) */
+  simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); }
+
+  // string and escape characters
+  json_string_block _string;
+  // whitespace, structural characters ('operators'), scalars
+  json_character_block _characters;
+  // whether the previous character was a scalar
+  uint64_t _follows_potential_nonquote_scalar;
+private:
+  // Potential structurals (i.e. disregarding strings)
+
+  /**
+   * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc".
+   * They may reside inside a string.
+   **/
+  simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); }
+  /**
+   * The start of non-operator runs, like 123, true and "abc".
+   * It main reside inside a string.
+   **/
+  simdjson_inline uint64_t potential_scalar_start() const noexcept {
+    // The term "scalar" refers to anything except structural characters and white space
+    // (so letters, numbers, quotes).
+    // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space
+    // then we know that it is irrelevant structurally.
+    return _characters.scalar() & ~follows_potential_scalar();
+  }
+  /**
+   * Whether the given character is immediately after a non-operator like 123, true.
+   * The characters following a quote are not included.
+   */
+  simdjson_inline uint64_t follows_potential_scalar() const noexcept {
+    // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character
+    // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a
+    // white space.
+    // It is understood that within quoted region, anything at all could be marked (irrelevant).
+    return _follows_potential_nonquote_scalar;
+  }
+};
+
+/**
+ * Scans JSON for important bits: structural characters or 'operators', strings, and scalars.
+ *
+ * The scanner starts by calculating two distinct things:
+ * - string characters (taking \" into account)
+ * - structural characters or 'operators' ([]{},:, comma)
+ *   and scalars (runs of non-operators like 123, true and "abc")
+ *
+ * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel:
+ * in particular, the operator/scalar bit will find plenty of things that are actually part of
+ * strings. When we're done, json_block will fuse the two together by masking out tokens that are
+ * part of a string.
+ */
+class json_scanner {
+public:
+  json_scanner() = default;
+  simdjson_inline json_block next(const simd::simd8x64<uint8_t>& in);
+  // Returns either UNCLOSED_STRING or SUCCESS
+  simdjson_inline error_code finish();
+
+private:
+  // Whether the last character of the previous iteration is part of a scalar token
+  // (anything except whitespace or a structural character/'operator').
+  uint64_t prev_scalar = 0ULL;
+  json_string_scanner string_scanner{};
+};
+
+
+//
+// Check if the current character immediately follows a matching character.
+//
+// For example, this checks for quotes with backslashes in front of them:
+//
+//     const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash);
+//
+simdjson_inline uint64_t follows(const uint64_t match, uint64_t &overflow) {
+  const uint64_t result = match << 1 | overflow;
+  overflow = match >> 63;
+  return result;
+}
+
+simdjson_inline json_block json_scanner::next(const simd::simd8x64<uint8_t>& in) {
+  json_string_block strings = string_scanner.next(in);
+  // identifies the white-space and the structural characters
+  json_character_block characters = json_character_block::classify(in);
+  // The term "scalar" refers to anything except structural characters and white space
+  // (so letters, numbers, quotes).
+  // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers).
+  //
+  // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon)
+  // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential
+  // pseudo-structural character just like we would if we had  ' "a string" true '; otherwise we
+  // may need to add an extra check when parsing strings.
+  //
+  // Performance: there are many ways to skin this cat.
+  const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote();
+  uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar);
+  // We are returning a function-local object so either we get a move constructor
+  // or we get copy elision.
+  return json_block(
+    strings,// strings is a function-local object so either it moves or the copy is elided.
+    characters,
+    follows_nonquote_scalar
+  );
+}
+
+simdjson_inline error_code json_scanner::finish() {
+  return string_scanner.finish();
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdjson
+/* end file src/generic/stage1/json_scanner.h */
+/* begin file src/generic/stage1/json_minifier.h */
+// This file contains the common code every implementation uses in stage1
+// It is intended to be included multiple times and compiled multiple times
+// We assume the file in which it is included already includes
+// "simdjson/stage1.h" (this simplifies amalgation)
+
+namespace simdjson {
+namespace arm64 {
+namespace {
+namespace stage1 {
+
+class json_minifier {
+public:
+  template<size_t STEP_SIZE>
+  static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept;
+
+private:
+  simdjson_inline json_minifier(uint8_t *_dst)
+  : dst{_dst}
+  {}
+  template<size_t STEP_SIZE>
+  simdjson_inline void step(const uint8_t *block_buf, buf_block_reader<STEP_SIZE> &reader) noexcept;
+  simdjson_inline void next(const simd::simd8x64<uint8_t>& in, const json_block& block);
+  simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len);
+  json_scanner scanner{};
+  uint8_t *dst;
+};
+
+simdjson_inline void json_minifier::next(const simd::simd8x64<uint8_t>& in, const json_block& block) {
+  uint64_t mask = block.whitespace();
+  dst += in.compress(mask, dst);
+}
+
+simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) {
+  error_code error = scanner.finish();
+  if (error) { dst_len = 0; return error; }
+  dst_len = dst - dst_start;
+  return SUCCESS;
+}
+
+template<>
+simdjson_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept {
+  simd::simd8x64<uint8_t> in_1(block_buf);
+  simd::simd8x64<uint8_t> in_2(block_buf+64);
+  json_block block_1 = scanner.next(in_1);
+  json_block block_2 = scanner.next(in_2);
+  this->next(in_1, block_1);
+  this->next(in_2, block_2);
+  reader.advance();
+}
+
+template<>
+simdjson_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept {
+  simd::simd8x64<uint8_t> in_1(block_buf);
+  json_block block_1 = scanner.next(in_1);
+  this->next(block_buf, block_1);
+  reader.advance();
+}
+
+template<size_t STEP_SIZE>
+error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept {
+  buf_block_reader<STEP_SIZE> reader(buf, len);
+  json_minifier minifier(dst);
+
+  // Index the first n-1 blocks
+  while (reader.has_full_block()) {
+    minifier.step<STEP_SIZE>(reader.full_block(), reader);
+  }
+
+  // Index the last (remainder) block, padded with spaces
+  uint8_t block[STEP_SIZE];
+  size_t remaining_bytes = reader.get_remainder(block);
+  if (remaining_bytes > 0) {
+    // We do not want to write directly to the output stream. Rather, we write
+    // to a local buffer (for safety).
+    uint8_t out_block[STEP_SIZE];
+    uint8_t * const guarded_dst{minifier.dst};
+    minifier.dst = out_block;
+    minifier.step<STEP_SIZE>(block, reader);
+    size_t to_write = minifier.dst - out_block;
+    // In some cases, we could be enticed to consider the padded spaces
+    // as part of the string. This is fine as long as we do not write more
+    // than we consumed.
+    if(to_write > remaining_bytes) { to_write = remaining_bytes; }
+    memcpy(guarded_dst, out_block, to_write);
+    minifier.dst = guarded_dst + to_write;
+  }
+  return minifier.finish(dst, dst_len);
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdjson
+/* end file src/generic/stage1/json_minifier.h */
+/* begin file src/generic/stage1/find_next_document_index.h */
+namespace simdjson {
+namespace arm64 {
+namespace {
+
+/**
+  * This algorithm is used to quickly identify the last structural position that
+  * makes up a complete document.
+  *
+  * It does this by going backwards and finding the last *document boundary* (a
+  * place where one value follows another without a comma between them). If the
+  * last document (the characters after the boundary) has an equal number of
+  * start and end brackets, it is considered complete.
+  *
+  * Simply put, we iterate over the structural characters, starting from
+  * the end. We consider that we found the end of a JSON document when the
+  * first element of the pair is NOT one of these characters: '{' '[' ':' ','
+  * and when the second element is NOT one of these characters: '}' ']' ':' ','.
+  *
+  * This simple comparison works most of the time, but it does not cover cases
+  * where the batch's structural indexes contain a perfect amount of documents.
+  * In such a case, we do not have access to the structural index which follows
+  * the last document, therefore, we do not have access to the second element in
+  * the pair, and that means we cannot identify the last document. To fix this
+  * issue, we keep a count of the open and closed curly/square braces we found
+  * while searching for the pair. When we find a pair AND the count of open and
+  * closed curly/square braces is the same, we know that we just passed a
+  * complete document, therefore the last json buffer location is the end of the
+  * batch.
+  */
+simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) {
+  // Variant: do not count separately, just figure out depth
+  if(parser.n_structural_indexes == 0) { return 0; }
+  auto arr_cnt = 0;
+  auto obj_cnt = 0;
+  for (auto i = parser.n_structural_indexes - 1; i > 0; i--) {
+    auto idxb = parser.structural_indexes[i];
+    switch (parser.buf[idxb]) {
+    case ':':
+    case ',':
+      continue;
+    case '}':
+      obj_cnt--;
+      continue;
+    case ']':
+      arr_cnt--;
+      continue;
+    case '{':
+      obj_cnt++;
+      break;
+    case '[':
+      arr_cnt++;
+      break;
+    }
+    auto idxa = parser.structural_indexes[i - 1];
+    switch (parser.buf[idxa]) {
+    case '{':
+    case '[':
+    case ':':
+    case ',':
+      continue;
+    }
+    // Last document is complete, so the next document will appear after!
+    if (!arr_cnt && !obj_cnt) {
+      return parser.n_structural_indexes;
+    }
+    // Last document is incomplete; mark the document at i + 1 as the next one
+    return i;
+  }
+  // If we made it to the end, we want to finish counting to see if we have a full document.
+  switch (parser.buf[parser.structural_indexes[0]]) {
+    case '}':
+      obj_cnt--;
+      break;
+    case ']':
+      arr_cnt--;
+      break;
+    case '{':
+      obj_cnt++;
+      break;
+    case '[':
+      arr_cnt++;
+      break;
+  }
+  if (!arr_cnt && !obj_cnt) {
+    // We have a complete document.
+    return parser.n_structural_indexes;
+  }
+  return 0;
+}
+
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdjson
+/* end file src/generic/stage1/find_next_document_index.h */
+
+namespace simdjson {
+namespace arm64 {
+namespace {
+namespace stage1 {
+
+class bit_indexer {
+public:
+  uint32_t *tail;
+
+  simdjson_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {}
+
+  // flatten out values in 'bits' assuming that they are are to have values of idx
+  // plus their position in the bitvector, and store these indexes at
+  // base_ptr[base] incrementing base as we go
+  // will potentially store extra values beyond end of valid bits, so base_ptr
+  // needs to be large enough to handle this
+  //
+  // If the kernel sets SIMDJSON_CUSTOM_BIT_INDEXER, then it will provide its own
+  // version of the code.
+#ifdef SIMDJSON_CUSTOM_BIT_INDEXER
+  simdjson_inline void write(uint32_t idx, uint64_t bits);
+#else
+  simdjson_inline void write(uint32_t idx, uint64_t bits) {
+    // In some instances, the next branch is expensive because it is mispredicted.
+    // Unfortunately, in other cases,
+    // it helps tremendously.
+    if (bits == 0)
+        return;
+#if defined(SIMDJSON_PREFER_REVERSE_BITS)
+    /**
+     * ARM lacks a fast trailing zero instruction, but it has a fast
+     * bit reversal instruction and a fast leading zero instruction.
+     * Thus it may be profitable to reverse the bits (once) and then
+     * to rely on a sequence of instructions that call the leading
+     * zero instruction.
+     *
+     * Performance notes:
+     * The chosen routine is not optimal in terms of data dependency
+     * since zero_leading_bit might require two instructions. However,
+     * it tends to minimize the total number of instructions which is
+     * beneficial.
+     */
+
+    uint64_t rev_bits = reverse_bits(bits);
+    int cnt = static_cast<int>(count_ones(bits));
+    int i = 0;
+    // Do the first 8 all together
+    for (; i<8; i++) {
+      int lz = leading_zeroes(rev_bits);
+      this->tail[i] = static_cast<uint32_t>(idx) + lz;
+      rev_bits = zero_leading_bit(rev_bits, lz);
+    }
+    // Do the next 8 all together (we hope in most cases it won't happen at all
+    // and the branch is easily predicted).
+    if (simdjson_unlikely(cnt > 8)) {
+      i = 8;
+      for (; i<16; i++) {
+        int lz = leading_zeroes(rev_bits);
+        this->tail[i] = static_cast<uint32_t>(idx) + lz;
+        rev_bits = zero_leading_bit(rev_bits, lz);
+      }
+
+
+      // Most files don't have 16+ structurals per block, so we take several basically guaranteed
+      // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :)
+      // or the start of a value ("abc" true 123) every four characters.
+      if (simdjson_unlikely(cnt > 16)) {
+        i = 16;
+        while (rev_bits != 0) {
+          int lz = leading_zeroes(rev_bits);
+          this->tail[i++] = static_cast<uint32_t>(idx) + lz;
+          rev_bits = zero_leading_bit(rev_bits, lz);
+        }
+      }
+    }
+    this->tail += cnt;
+#else // SIMDJSON_PREFER_REVERSE_BITS
+    /**
+     * Under recent x64 systems, we often have both a fast trailing zero
+     * instruction and a fast 'clear-lower-bit' instruction so the following
+     * algorithm can be competitive.
+     */
+
+    int cnt = static_cast<int>(count_ones(bits));
+    // Do the first 8 all together
+    for (int i=0; i<8; i++) {
+      this->tail[i] = idx + trailing_zeroes(bits);
+      bits = clear_lowest_bit(bits);
+    }
+
+    // Do the next 8 all together (we hope in most cases it won't happen at all
+    // and the branch is easily predicted).
+    if (simdjson_unlikely(cnt > 8)) {
+      for (int i=8; i<16; i++) {
+        this->tail[i] = idx + trailing_zeroes(bits);
+        bits = clear_lowest_bit(bits);
+      }
+
+      // Most files don't have 16+ structurals per block, so we take several basically guaranteed
+      // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :)
+      // or the start of a value ("abc" true 123) every four characters.
+      if (simdjson_unlikely(cnt > 16)) {
+        int i = 16;
+        do {
+          this->tail[i] = idx + trailing_zeroes(bits);
+          bits = clear_lowest_bit(bits);
+          i++;
+        } while (i < cnt);
+      }
+    }
+
+    this->tail += cnt;
+#endif
+  }
+#endif // SIMDJSON_CUSTOM_BIT_INDEXER
+
+};
+
+class json_structural_indexer {
+public:
+  /**
+   * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes.
+   *
+   * @param partial Setting the partial parameter to true allows the find_structural_bits to
+   *   tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If
+   *   you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8.
+   */
+  template<size_t STEP_SIZE>
+  static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept;
+
+private:
+  simdjson_inline json_structural_indexer(uint32_t *structural_indexes);
+  template<size_t STEP_SIZE>
+  simdjson_inline void step(const uint8_t *block, buf_block_reader<STEP_SIZE> &reader) noexcept;
+  simdjson_inline void next(const simd::simd8x64<uint8_t>& in, const json_block& block, size_t idx);
+  simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial);
+
+  json_scanner scanner{};
+  utf8_checker checker{};
+  bit_indexer indexer;
+  uint64_t prev_structurals = 0;
+  uint64_t unescaped_chars_error = 0;
+};
+
+simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {}
+
+// Skip the last character if it is partial
+simdjson_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) {
+  if (simdjson_unlikely(len < 3)) {
+    switch (len) {
+      case 2:
+        if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left
+        if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 2 bytes left
+        return len;
+      case 1:
+        if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left
+        return len;
+      case 0:
+        return len;
+    }
+  }
+  if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left
+  if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 1 byte left
+  if (buf[len-3] >= 0xf0) { return len-3; } // 4-byte characters with only 3 bytes left
+  return len;
+}
+
+//
+// PERF NOTES:
+// We pipe 2 inputs through these stages:
+// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load
+//    2 inputs' worth at once so that by the time step 2 is looking for them input, it's available.
+// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path.
+//    The output of step 1 depends entirely on this information. These functions don't quite use
+//    up enough CPU: the second half of the functions is highly serial, only using 1 execution core
+//    at a time. The second input's scans has some dependency on the first ones finishing it, but
+//    they can make a lot of progress before they need that information.
+// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that
+//    to finish: utf-8 checks and generating the output from the last iteration.
+//
+// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all
+// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough
+// workout.
+//
+template<size_t STEP_SIZE>
+error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept {
+  if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; }
+  // We guard the rest of the code so that we can assume that len > 0 throughout.
+  if (len == 0) { return EMPTY; }
+  if (is_streaming(partial)) {
+    len = trim_partial_utf8(buf, len);
+    // If you end up with an empty window after trimming
+    // the partial UTF-8 bytes, then chances are good that you
+    // have an UTF-8 formatting error.
+    if(len == 0) { return UTF8_ERROR; }
+  }
+  buf_block_reader<STEP_SIZE> reader(buf, len);
+  json_structural_indexer indexer(parser.structural_indexes.get());
+
+  // Read all but the last block
+  while (reader.has_full_block()) {
+    indexer.step<STEP_SIZE>(reader.full_block(), reader);
+  }
+  // Take care of the last block (will always be there unless file is empty which is
+  // not supposed to happen.)
+  uint8_t block[STEP_SIZE];
+  if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; }
+  indexer.step<STEP_SIZE>(block, reader);
+  return indexer.finish(parser, reader.block_index(), len, partial);
+}
+
+template<>
+simdjson_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept {
+  simd::simd8x64<uint8_t> in_1(block);
+  simd::simd8x64<uint8_t> in_2(block+64);
+  json_block block_1 = scanner.next(in_1);
+  json_block block_2 = scanner.next(in_2);
+  this->next(in_1, block_1, reader.block_index());
+  this->next(in_2, block_2, reader.block_index()+64);
+  reader.advance();
+}
+
+template<>
+simdjson_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept {
+  simd::simd8x64<uint8_t> in_1(block);
+  json_block block_1 = scanner.next(in_1);
+  this->next(in_1, block_1, reader.block_index());
+  reader.advance();
+}
+
+simdjson_inline void json_structural_indexer::next(const simd::simd8x64<uint8_t>& in, const json_block& block, size_t idx) {
+  uint64_t unescaped = in.lteq(0x1F);
+  checker.check_next_input(in);
+  indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser
+  prev_structurals = block.structural_start();
+  unescaped_chars_error |= block.non_quote_inside_string(unescaped);
+}
+
+simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) {
+  // Write out the final iteration's structurals
+  indexer.write(uint32_t(idx-64), prev_structurals);
+  error_code error = scanner.finish();
+  // We deliberately break down the next expression so that it is
+  // human readable.
+  const bool should_we_exit = is_streaming(partial) ?
+    ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING
+    : (error != SUCCESS); // if partial is false, we must have SUCCESS
+  const bool have_unclosed_string = (error == UNCLOSED_STRING);
+  if (simdjson_unlikely(should_we_exit)) { return error; }
+
+  if (unescaped_chars_error) {
+    return UNESCAPED_CHARS;
+  }
+  parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get());
+  /***
+   * The On Demand API requires special padding.
+   *
+   * This is related to https://github.com/simdjson/simdjson/issues/906
+   * Basically, we want to make sure that if the parsing continues beyond the last (valid)
+   * structural character, it quickly stops.
+   * Only three structural characters can be repeated without triggering an error in JSON:  [,] and }.
+   * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing
+   * continues, then it must be [,] or }.
+   * Suppose it is ] or }. We backtrack to the first character, what could it be that would
+   * not trigger an error? It could be ] or } but no, because you can't start a document that way.
+   * It can't be a comma, a colon or any simple value. So the only way we could continue is
+   * if the repeated character is [. But if so, the document must start with [. But if the document
+   * starts with [, it should end with ]. If we enforce that rule, then we would get
+   * ][[ which is invalid.
+   *
+   * This is illustrated with the test array_iterate_unclosed_error() on the following input:
+   * R"({ "a": [,,)"
+   **/
+  parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final
+  parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len);
+  parser.structural_indexes[parser.n_structural_indexes + 2] = 0;
+  parser.next_structural_index = 0;
+  // a valid JSON file cannot have zero structural indexes - we should have found something
+  if (simdjson_unlikely(parser.n_structural_indexes == 0u)) {
+    return EMPTY;
+  }
+  if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) {
+    return UNEXPECTED_ERROR;
+  }
+  if (partial == stage1_mode::streaming_partial) {
+    // If we have an unclosed string, then the last structural
+    // will be the quote and we want to make sure to omit it.
+    if(have_unclosed_string) {
+      parser.n_structural_indexes--;
+      // a valid JSON file cannot have zero structural indexes - we should have found something
+      if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; }
+    }
+    // We truncate the input to the end of the last complete document (or zero).
+    auto new_structural_indexes = find_next_document_index(parser);
+    if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) {
+      if(parser.structural_indexes[0] == 0) {
+        // If the buffer is partial and we started at index 0 but the document is
+        // incomplete, it's too big to parse.
+        return CAPACITY;
+      } else {
+        // It is possible that the document could be parsed, we just had a lot
+        // of white space.
+        parser.n_structural_indexes = 0;
+        return EMPTY;
+      }
+    }
+
+    parser.n_structural_indexes = new_structural_indexes;
+  } else if (partial == stage1_mode::streaming_final) {
+    if(have_unclosed_string) { parser.n_structural_indexes--; }
+    // We truncate the input to the end of the last complete document (or zero).
+    // Because partial == stage1_mode::streaming_final, it means that we may
+    // silently ignore trailing garbage. Though it sounds bad, we do it
+    // deliberately because many people who have streams of JSON documents
+    // will truncate them for processing. E.g., imagine that you are uncompressing
+    // the data from a size file or receiving it in chunks from the network. You
+    // may not know where exactly the last document will be. Meanwhile the
+    // document_stream instances allow people to know the JSON documents they are
+    // parsing (see the iterator.source() method).
+    parser.n_structural_indexes = find_next_document_index(parser);
+    // We store the initial n_structural_indexes so that the client can see
+    // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes,
+    // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len,
+    // otherwise, it will copy some prior index.
+    parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes];
+    // This next line is critical, do not change it unless you understand what you are
+    // doing.
+    parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len);
+    if (simdjson_unlikely(parser.n_structural_indexes == 0u)) {
+        // We tolerate an unclosed string at the very end of the stream. Indeed, users
+        // often load their data in bulk without being careful and they want us to ignore
+        // the trailing garbage.
+        return EMPTY;
+    }
+  }
+  checker.check_eof();
+  return checker.errors();
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdjson
+/* end file src/generic/stage1/json_structural_indexer.h */
+/* begin file src/generic/stage1/utf8_validator.h */
+namespace simdjson {
+namespace arm64 {
+namespace {
+namespace stage1 {
+
+/**
+ * Validates that the string is actual UTF-8.
+ */
+template<class checker>
+bool generic_validate_utf8(const uint8_t * input, size_t length) {
+    checker c{};
+    buf_block_reader<64> reader(input, length);
+    while (reader.has_full_block()) {
+      simd::simd8x64<uint8_t> in(reader.full_block());
+      c.check_next_input(in);
+      reader.advance();
+    }
+    uint8_t block[64]{};
+    reader.get_remainder(block);
+    simd::simd8x64<uint8_t> in(block);
+    c.check_next_input(in);
+    reader.advance();
+    c.check_eof();
+    return c.errors() == error_code::SUCCESS;
+}
+
+bool generic_validate_utf8(const char * input, size_t length) {
+    return generic_validate_utf8<utf8_checker>(reinterpret_cast<const uint8_t *>(input),length);
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdjson
+/* end file src/generic/stage1/utf8_validator.h */
+
+//
+// Stage 2
+//
+
+/* begin file src/generic/stage2/stringparsing.h */
+// This file contains the common code every implementation uses
+// It is intended to be included multiple times and compiled multiple times
+
+namespace simdjson {
+namespace arm64 {
+namespace {
+/// @private
+namespace stringparsing {
+
+// begin copypasta
+// These chars yield themselves: " \ /
+// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab
+// u not handled in this table as it's complex
+static const uint8_t escape_map[256] = {
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0, // 0x0.
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0x22, 0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0x2f,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0, // 0x4.
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0x5c, 0, 0,    0, // 0x5.
+    0, 0, 0x08, 0, 0,    0, 0x0c, 0, 0, 0, 0, 0, 0,    0, 0x0a, 0, // 0x6.
+    0, 0, 0x0d, 0, 0x09, 0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0, // 0x7.
+
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+};
+
+// handle a unicode codepoint
+// write appropriate values into dest
+// src will advance 6 bytes or 12 bytes
+// dest will advance a variable amount (return via pointer)
+// return true if the unicode codepoint was valid
+// We work in little-endian then swap at write time
+simdjson_warn_unused
+simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr,
+                                            uint8_t **dst_ptr) {
+  // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the
+  // conversion isn't valid; we defer the check for this to inside the
+  // multilingual plane check
+  uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2);
+  *src_ptr += 6;
+
+  // If we found a high surrogate, we must
+  // check for low surrogate for characters
+  // outside the Basic
+  // Multilingual Plane.
+  if (code_point >= 0xd800 && code_point < 0xdc00) {
+    const uint8_t *src_data = *src_ptr;
+    /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */
+    if (((src_data[0] << 8) | src_data[1]) != ((static_cast<uint8_t> ('\\') << 8) | static_cast<uint8_t> ('u'))) {
+      return false;
+    }
+    uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2);
+
+    // We have already checked that the high surrogate is valid and
+    // (code_point - 0xd800) < 1024.
+    //
+    // Check that code_point_2 is in the range 0xdc00..0xdfff
+    // and that code_point_2 was parsed from valid hex.
+    uint32_t low_bit = code_point_2 - 0xdc00;
+    if (low_bit >> 10) {
+      return false;
+    }
+
+    code_point =
+        (((code_point - 0xd800) << 10) | low_bit) + 0x10000;
+    *src_ptr += 6;
+  } else if (code_point >= 0xdc00 && code_point <= 0xdfff) {
+      // If we encounter a low surrogate (not preceded by a high surrogate)
+      // then we have an error.
+      return false;
+  }
+  size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr);
+  *dst_ptr += offset;
+  return offset > 0;
+}
+
+/**
+ * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There
+ * must be an unescaped quote terminating the string. It returns the final output
+ * position as pointer. In case of error (e.g., the string has bad escaped codes),
+ * then null_nullptrptr is returned. It is assumed that the output buffer is large
+ * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes +
+ * SIMDJSON_PADDING bytes.
+ */
+simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) {
+  while (1) {
+    // Copy the next n bytes, and find the backslash and quote in them.
+    auto bs_quote = backslash_and_quote::copy_and_find(src, dst);
+    // If the next thing is the end quote, copy and return
+    if (bs_quote.has_quote_first()) {
+      // we encountered quotes first. Move dst to point to quotes and exit
+      return dst + bs_quote.quote_index();
+    }
+    if (bs_quote.has_backslash()) {
+      /* find out where the backspace is */
+      auto bs_dist = bs_quote.backslash_index();
+      uint8_t escape_char = src[bs_dist + 1];
+      /* we encountered backslash first. Handle backslash */
+      if (escape_char == 'u') {
+        /* move src/dst up to the start; they will be further adjusted
+           within the unicode codepoint handling code. */
+        src += bs_dist;
+        dst += bs_dist;
+        if (!handle_unicode_codepoint(&src, &dst)) {
+          return nullptr;
+        }
+      } else {
+        /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and
+         * write bs_dist+1 characters to output
+         * note this may reach beyond the part of the buffer we've actually
+         * seen. I think this is ok */
+        uint8_t escape_result = escape_map[escape_char];
+        if (escape_result == 0u) {
+          return nullptr; /* bogus escape value is an error */
+        }
+        dst[bs_dist] = escape_result;
+        src += bs_dist + 2;
+        dst += bs_dist + 1;
+      }
+    } else {
+      /* they are the same. Since they can't co-occur, it means we
+       * encountered neither. */
+      src += backslash_and_quote::BYTES_PROCESSED;
+      dst += backslash_and_quote::BYTES_PROCESSED;
+    }
+  }
+  /* can't be reached */
+  return nullptr;
+}
+
+} // namespace stringparsing
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdjson
+/* end file src/generic/stage2/stringparsing.h */
+/* begin file src/generic/stage2/tape_builder.h */
+/* begin file src/generic/stage2/json_iterator.h */
+/* begin file src/generic/stage2/logger.h */
+// This is for an internal-only stage 2 specific logger.
+// Set LOG_ENABLED = true to log what stage 2 is doing!
+namespace simdjson {
+namespace arm64 {
+namespace {
+namespace logger {
+
+  static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------";
+
+#if SIMDJSON_VERBOSE_LOGGING
+  static constexpr const bool LOG_ENABLED = true;
+#else
+  static constexpr const bool LOG_ENABLED = false;
+#endif
+  static constexpr const int LOG_EVENT_LEN = 20;
+  static constexpr const int LOG_BUFFER_LEN = 30;
+  static constexpr const int LOG_SMALL_BUFFER_LEN = 10;
+  static constexpr const int LOG_INDEX_LEN = 5;
+
+  static int log_depth; // Not threadsafe. Log only.
+
+  // Helper to turn unprintable or newline characters into spaces
+  static simdjson_inline char printable_char(char c) {
+    if (c >= 0x20) {
+      return c;
+    } else {
+      return ' ';
+    }
+  }
+
+  // Print the header and set up log_start
+  static simdjson_inline void log_start() {
+    if (LOG_ENABLED) {
+      log_depth = 0;
+      printf("\n");
+      printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#");
+      printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES);
+    }
+  }
+
+  simdjson_unused static simdjson_inline void log_string(const char *message) {
+    if (LOG_ENABLED) {
+      printf("%s\n", message);
+    }
+  }
+
+  // Logs a single line from the stage 2 DOM parser
+  template<typename S>
+  static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) {
+    if (LOG_ENABLED) {
+      printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title);
+      auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1;
+      auto next_index = structurals.next_structural;
+      auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast<const uint8_t*>("                                                       ");
+      auto next = &structurals.buf[*next_index];
+      {
+        // Print the next N characters in the buffer.
+        printf("| ");
+        // Otherwise, print the characters starting from the buffer position.
+        // Print spaces for unprintable or newline characters.
+        for (int i=0;i<LOG_BUFFER_LEN;i++) {
+          printf("%c", printable_char(current[i]));
+        }
+        printf(" ");
+        // Print the next N characters in the buffer.
+        printf("| ");
+        // Otherwise, print the characters starting from the buffer position.
+        // Print spaces for unprintable or newline characters.
+        for (int i=0;i<LOG_SMALL_BUFFER_LEN;i++) {
+          printf("%c", printable_char(next[i]));
+        }
+        printf(" ");
+      }
+      if (current_index) {
+        printf("| %*u ", LOG_INDEX_LEN, *current_index);
+      } else {
+        printf("| %-*s ", LOG_INDEX_LEN, "");
+      }
+      // printf("| %*u ", LOG_INDEX_LEN, structurals.next_tape_index());
+      printf("| %-s ", detail);
+      printf("|\n");
+    }
+  }
+
+} // namespace logger
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdjson
+/* end file src/generic/stage2/logger.h */
+
+namespace simdjson {
+namespace arm64 {
+namespace {
+namespace stage2 {
+
+class json_iterator {
+public:
+  const uint8_t* const buf;
+  uint32_t *next_structural;
+  dom_parser_implementation &dom_parser;
+  uint32_t depth{0};
+
+  /**
+   * Walk the JSON document.
+   *
+   * The visitor receives callbacks when values are encountered. All callbacks pass the iterator as
+   * the first parameter; some callbacks have other parameters as well:
+   *
+   * - visit_document_start() - at the beginning.
+   * - visit_document_end() - at the end (if things were successful).
+   *
+   * - visit_array_start() - at the start `[` of a non-empty array.
+   * - visit_array_end() - at the end `]` of a non-empty array.
+   * - visit_empty_array() - when an empty array is encountered.
+   *
+   * - visit_object_end() - at the start `]` of a non-empty object.
+   * - visit_object_start() - at the end `]` of a non-empty object.
+   * - visit_empty_object() - when an empty object is encountered.
+   * - visit_key(const uint8_t *key) - when a key in an object field is encountered. key is
+   *                                   guaranteed to point at the first quote of the string (`"key"`).
+   * - visit_primitive(const uint8_t *value) - when a value is a string, number, boolean or null.
+   * - visit_root_primitive(iter, uint8_t *value) - when the top-level value is a string, number, boolean or null.
+   *
+   * - increment_count(iter) - each time a value is found in an array or object.
+   */
+  template<bool STREAMING, typename V>
+  simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept;
+
+  /**
+   * Create an iterator capable of walking a JSON document.
+   *
+   * The document must have already passed through stage 1.
+   */
+  simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index);
+
+  /**
+   * Look at the next token.
+   *
+   * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)).
+   *
+   * They may include invalid JSON as well (such as `1.2.3` or `ture`).
+   */
+  simdjson_inline const uint8_t *peek() const noexcept;
+  /**
+   * Advance to the next token.
+   *
+   * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)).
+   *
+   * They may include invalid JSON as well (such as `1.2.3` or `ture`).
+   */
+  simdjson_inline const uint8_t *advance() noexcept;
+  /**
+   * Get the remaining length of the document, from the start of the current token.
+   */
+  simdjson_inline size_t remaining_len() const noexcept;
+  /**
+   * Check if we are at the end of the document.
+   *
+   * If this is true, there are no more tokens.
+   */
+  simdjson_inline bool at_eof() const noexcept;
+  /**
+   * Check if we are at the beginning of the document.
+   */
+  simdjson_inline bool at_beginning() const noexcept;
+  simdjson_inline uint8_t last_structural() const noexcept;
+
+  /**
+   * Log that a value has been found.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_value(const char *type) const noexcept;
+  /**
+   * Log the start of a multipart value.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_start_value(const char *type) const noexcept;
+  /**
+   * Log the end of a multipart value.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_end_value(const char *type) const noexcept;
+  /**
+   * Log an error.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_error(const char *error) const noexcept;
+
+  template<typename V>
+  simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept;
+  template<typename V>
+  simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept;
+};
+
+template<bool STREAMING, typename V>
+simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept {
+  logger::log_start();
+
+  //
+  // Start the document
+  //
+  if (at_eof()) { return EMPTY; }
+  log_start_value("document");
+  SIMDJSON_TRY( visitor.visit_document_start(*this) );
+
+  //
+  // Read first value
+  //
+  {
+    auto value = advance();
+
+    // Make sure the outer object or array is closed before continuing; otherwise, there are ways we
+    // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906
+    if (!STREAMING) {
+      switch (*value) {
+        case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break;
+        case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break;
+      }
+    }
+
+    switch (*value) {
+      case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin;
+      case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin;
+      default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break;
+    }
+  }
+  goto document_end;
+
+//
+// Object parser states
+//
+object_begin:
+  log_start_value("object");
+  depth++;
+  if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; }
+  dom_parser.is_array[depth] = false;
+  SIMDJSON_TRY( visitor.visit_object_start(*this) );
+
+  {
+    auto key = advance();
+    if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; }
+    SIMDJSON_TRY( visitor.increment_count(*this) );
+    SIMDJSON_TRY( visitor.visit_key(*this, key) );
+  }
+
+object_field:
+  if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; }
+  {
+    auto value = advance();
+    switch (*value) {
+      case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin;
+      case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin;
+      default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break;
+    }
+  }
+
+object_continue:
+  switch (*advance()) {
+    case ',':
+      SIMDJSON_TRY( visitor.increment_count(*this) );
+      {
+        auto key = advance();
+        if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; }
+        SIMDJSON_TRY( visitor.visit_key(*this, key) );
+      }
+      goto object_field;
+    case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end;
+    default: log_error("No comma between object fields"); return TAPE_ERROR;
+  }
+
+scope_end:
+  depth--;
+  if (depth == 0) { goto document_end; }
+  if (dom_parser.is_array[depth]) { goto array_continue; }
+  goto object_continue;
+
+//
+// Array parser states
+//
+array_begin:
+  log_start_value("array");
+  depth++;
+  if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; }
+  dom_parser.is_array[depth] = true;
+  SIMDJSON_TRY( visitor.visit_array_start(*this) );
+  SIMDJSON_TRY( visitor.increment_count(*this) );
+
+array_value:
+  {
+    auto value = advance();
+    switch (*value) {
+      case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin;
+      case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin;
+      default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break;
+    }
+  }
+
+array_continue:
+  switch (*advance()) {
+    case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value;
+    case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end;
+    default: log_error("Missing comma between array values"); return TAPE_ERROR;
+  }
+
+document_end:
+  log_end_value("document");
+  SIMDJSON_TRY( visitor.visit_document_end(*this) );
+
+  dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]);
+
+  // If we didn't make it to the end, it's an error
+  if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) {
+    log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!");
+    return TAPE_ERROR;
+  }
+
+  return SUCCESS;
+
+} // walk_document()
+
+simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index)
+  : buf{_dom_parser.buf},
+    next_structural{&_dom_parser.structural_indexes[start_structural_index]},
+    dom_parser{_dom_parser} {
+}
+
+simdjson_inline const uint8_t *json_iterator::peek() const noexcept {
+  return &buf[*(next_structural)];
+}
+simdjson_inline const uint8_t *json_iterator::advance() noexcept {
+  return &buf[*(next_structural++)];
+}
+simdjson_inline size_t json_iterator::remaining_len() const noexcept {
+  return dom_parser.len - *(next_structural-1);
+}
+
+simdjson_inline bool json_iterator::at_eof() const noexcept {
+  return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes];
+}
+simdjson_inline bool json_iterator::at_beginning() const noexcept {
+  return next_structural == dom_parser.structural_indexes.get();
+}
+simdjson_inline uint8_t json_iterator::last_structural() const noexcept {
+  return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]];
+}
+
+simdjson_inline void json_iterator::log_value(const char *type) const noexcept {
+  logger::log_line(*this, "", type, "");
+}
+
+simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept {
+  logger::log_line(*this, "+", type, "");
+  if (logger::LOG_ENABLED) { logger::log_depth++; }
+}
+
+simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept {
+  if (logger::LOG_ENABLED) { logger::log_depth--; }
+  logger::log_line(*this, "-", type, "");
+}
+
+simdjson_inline void json_iterator::log_error(const char *error) const noexcept {
+  logger::log_line(*this, "", "ERROR", error);
+}
+
+template<typename V>
+simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept {
+  switch (*value) {
+    case '"': return visitor.visit_root_string(*this, value);
+    case 't': return visitor.visit_root_true_atom(*this, value);
+    case 'f': return visitor.visit_root_false_atom(*this, value);
+    case 'n': return visitor.visit_root_null_atom(*this, value);
+    case '-':
+    case '0': case '1': case '2': case '3': case '4':
+    case '5': case '6': case '7': case '8': case '9':
+      return visitor.visit_root_number(*this, value);
+    default:
+      log_error("Document starts with a non-value character");
+      return TAPE_ERROR;
+  }
+}
+template<typename V>
+simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept {
+  switch (*value) {
+    case '"': return visitor.visit_string(*this, value);
+    case 't': return visitor.visit_true_atom(*this, value);
+    case 'f': return visitor.visit_false_atom(*this, value);
+    case 'n': return visitor.visit_null_atom(*this, value);
+    case '-':
+    case '0': case '1': case '2': case '3': case '4':
+    case '5': case '6': case '7': case '8': case '9':
+      return visitor.visit_number(*this, value);
+    default:
+      log_error("Non-value found when value was expected!");
+      return TAPE_ERROR;
+  }
+}
+
+} // namespace stage2
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdjson
+/* end file src/generic/stage2/json_iterator.h */
+/* begin file src/generic/stage2/tape_writer.h */
+namespace simdjson {
+namespace arm64 {
+namespace {
+namespace stage2 {
+
+struct tape_writer {
+  /** The next place to write to tape */
+  uint64_t *next_tape_loc;
+
+  /** Write a signed 64-bit value to tape. */
+  simdjson_inline void append_s64(int64_t value) noexcept;
+
+  /** Write an unsigned 64-bit value to tape. */
+  simdjson_inline void append_u64(uint64_t value) noexcept;
+
+  /** Write a double value to tape. */
+  simdjson_inline void append_double(double value) noexcept;
+
+  /**
+   * Append a tape entry (an 8-bit type,and 56 bits worth of value).
+   */
+  simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept;
+
+  /**
+   * Skip the current tape entry without writing.
+   *
+   * Used to skip the start of the container, since we'll come back later to fill it in when the
+   * container ends.
+   */
+  simdjson_inline void skip() noexcept;
+
+  /**
+   * Skip the number of tape entries necessary to write a large u64 or i64.
+   */
+  simdjson_inline void skip_large_integer() noexcept;
+
+  /**
+   * Skip the number of tape entries necessary to write a double.
+   */
+  simdjson_inline void skip_double() noexcept;
+
+  /**
+   * Write a value to a known location on tape.
+   *
+   * Used to go back and write out the start of a container after the container ends.
+   */
+  simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept;
+
+private:
+  /**
+   * Append both the tape entry, and a supplementary value following it. Used for types that need
+   * all 64 bits, such as double and uint64_t.
+   */
+  template<typename T>
+  simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept;
+}; // struct number_writer
+
+simdjson_inline void tape_writer::append_s64(int64_t value) noexcept {
+  append2(0, value, internal::tape_type::INT64);
+}
+
+simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept {
+  append(0, internal::tape_type::UINT64);
+  *next_tape_loc = value;
+  next_tape_loc++;
+}
+
+/** Write a double value to tape. */
+simdjson_inline void tape_writer::append_double(double value) noexcept {
+  append2(0, value, internal::tape_type::DOUBLE);
+}
+
+simdjson_inline void tape_writer::skip() noexcept {
+  next_tape_loc++;
+}
+
+simdjson_inline void tape_writer::skip_large_integer() noexcept {
+  next_tape_loc += 2;
+}
+
+simdjson_inline void tape_writer::skip_double() noexcept {
+  next_tape_loc += 2;
+}
+
+simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept {
+  *next_tape_loc = val | ((uint64_t(char(t))) << 56);
+  next_tape_loc++;
+}
+
+template<typename T>
+simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept {
+  append(val, t);
+  static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!");
+  memcpy(next_tape_loc, &val2, sizeof(val2));
+  next_tape_loc++;
+}
+
+simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept {
+  tape_loc = val | ((uint64_t(char(t))) << 56);
+}
+
+} // namespace stage2
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdjson
+/* end file src/generic/stage2/tape_writer.h */
+
+namespace simdjson {
+namespace arm64 {
+namespace {
+namespace stage2 {
+
+struct tape_builder {
+  template<bool STREAMING>
+  simdjson_warn_unused static simdjson_inline error_code parse_document(
+    dom_parser_implementation &dom_parser,
+    dom::document &doc) noexcept;
+
+  /** Called when a non-empty document starts. */
+  simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept;
+  /** Called when a non-empty document ends without error. */
+  simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept;
+
+  /** Called when a non-empty array starts. */
+  simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept;
+  /** Called when a non-empty array ends. */
+  simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept;
+  /** Called when an empty array is found. */
+  simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept;
+
+  /** Called when a non-empty object starts. */
+  simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept;
+  /**
+   * Called when a key in a field is encountered.
+   *
+   * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array
+   * will be called after this with the field value.
+   */
+  simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept;
+  /** Called when a non-empty object ends. */
+  simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept;
+  /** Called when an empty object is found. */
+  simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept;
+
+  /**
+   * Called when a string, number, boolean or null is found.
+   */
+  simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept;
+  /**
+   * Called when a string, number, boolean or null is found at the top level of a document (i.e.
+   * when there is no array or object and the entire document is a single string, number, boolean or
+   * null.
+   *
+   * This is separate from primitive() because simdjson's normal primitive parsing routines assume
+   * there is at least one more token after the value, which is only true in an array or object.
+   */
+  simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept;
+
+  simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept;
+
+  simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept;
+
+  /** Called each time a new field or element in an array or object is found. */
+  simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept;
+
+  /** Next location to write to tape */
+  tape_writer tape;
+private:
+  /** Next write location in the string buf for stage 2 parsing */
+  uint8_t *current_string_buf_loc;
+
+  simdjson_inline tape_builder(dom::document &doc) noexcept;
+
+  simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept;
+  simdjson_inline void start_container(json_iterator &iter) noexcept;
+  simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept;
+  simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept;
+  simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept;
+  simdjson_inline void on_end_string(uint8_t *dst) noexcept;
+}; // class tape_builder
+
+template<bool STREAMING>
+simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document(
+    dom_parser_implementation &dom_parser,
+    dom::document &doc) noexcept {
+  dom_parser.doc = &doc;
+  json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0);
+  tape_builder builder(doc);
+  return iter.walk_document<STREAMING>(builder);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept {
+  return iter.visit_root_primitive(*this, value);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept {
+  return iter.visit_primitive(*this, value);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept {
+  return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept {
+  return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept {
+  start_container(iter);
+  return SUCCESS;
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept {
+  start_container(iter);
+  return SUCCESS;
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept {
+  start_container(iter);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept {
+  return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept {
+  return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept {
+  constexpr uint32_t start_tape_index = 0;
+  tape.append(start_tape_index, internal::tape_type::ROOT);
+  tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT);
+  return SUCCESS;
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept {
+  return visit_string(iter, key, true);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept {
+  iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1
+  return SUCCESS;
+}
+
+simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept {
+  iter.log_value(key ? "key" : "string");
+  uint8_t *dst = on_start_string(iter);
+  dst = stringparsing::parse_string(value+1, dst);
+  if (dst == nullptr) {
+    iter.log_error("Invalid escape in string");
+    return STRING_ERROR;
+  }
+  on_end_string(dst);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept {
+  return visit_string(iter, value);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("number");
+  return numberparsing::parse_number(value, tape);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept {
+  //
+  // We need to make a copy to make sure that the string is space terminated.
+  // This is not about padding the input, which should already padded up
+  // to len + SIMDJSON_PADDING. However, we have no control at this stage
+  // on how the padding was done. What if the input string was padded with nulls?
+  // It is quite common for an input string to have an extra null character (C string).
+  // We do not want to allow 9\0 (where \0 is the null character) inside a JSON
+  // document, but the string "9\0" by itself is fine. So we make a copy and
+  // pad the input with spaces when we know that there is just one input element.
+  // This copy is relatively expensive, but it will almost never be called in
+  // practice unless you are in the strange scenario where you have many JSON
+  // documents made of single atoms.
+  //
+  std::unique_ptr<uint8_t[]>copy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]);
+  if (copy.get() == nullptr) { return MEMALLOC; }
+  std::memcpy(copy.get(), value, iter.remaining_len());
+  std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING);
+  error_code error = visit_number(iter, copy.get());
+  return error;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("true");
+  if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::TRUE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("true");
+  if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::TRUE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("false");
+  if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::FALSE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("false");
+  if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::FALSE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("null");
+  if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::NULL_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("null");
+  if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::NULL_VALUE);
+  return SUCCESS;
+}
+
+// private:
+
+simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept {
+  return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get());
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept {
+  auto start_index = next_tape_index(iter);
+  tape.append(start_index+2, start);
+  tape.append(start_index, end);
+  return SUCCESS;
+}
+
+simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept {
+  iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter);
+  iter.dom_parser.open_containers[iter.depth].count = 0;
+  tape.skip(); // We don't actually *write* the start element until the end.
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept {
+  // Write the ending tape element, pointing at the start location
+  const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index;
+  tape.append(start_tape_index, end);
+  // Write the start tape element, pointing at the end location (and including count)
+  // count can overflow if it exceeds 24 bits... so we saturate
+  // the convention being that a cnt of 0xffffff or more is undetermined in value (>=  0xffffff).
+  const uint32_t count = iter.dom_parser.open_containers[iter.depth].count;
+  const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count;
+  tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start);
+  return SUCCESS;
+}
+
+simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept {
+  // we advance the point, accounting for the fact that we have a NULL termination
+  tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING);
+  return current_string_buf_loc + sizeof(uint32_t);
+}
+
+simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept {
+  uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t)));
+  // TODO check for overflow in case someone has a crazy string (>=4GB?)
+  // But only add the overflow check when the document itself exceeds 4GB
+  // Currently unneeded because we refuse to parse docs larger or equal to 4GB.
+  memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t));
+  // NULL termination is still handy if you expect all your strings to
+  // be NULL terminated? It comes at a small cost
+  *dst = 0;
+  current_string_buf_loc = dst + 1;
+}
+
+} // namespace stage2
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdjson
+/* end file src/generic/stage2/tape_builder.h */
+
+//
+// Implementation-specific overrides
+//
+namespace simdjson {
+namespace arm64 {
+namespace {
+namespace stage1 {
+
+simdjson_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) {
+  // On ARM, we don't short-circuit this if there are no backslashes, because the branch gives us no
+  // benefit and therefore makes things worse.
+  // if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; }
+  return find_escaped_branchless(backslash);
+}
+
+} // namespace stage1
+} // unnamed namespace
+
+simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept {
+  return arm64::stage1::json_minifier::minify<64>(buf, len, dst, dst_len);
+}
+
+simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept {
+  this->buf = _buf;
+  this->len = _len;
+  return arm64::stage1::json_structural_indexer::index<64>(buf, len, *this, streaming);
+}
+
+simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
+  return arm64::stage1::generic_validate_utf8(buf,len);
+}
+
+simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept {
+  return stage2::tape_builder::parse_document<false>(*this, _doc);
+}
+
+simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept {
+  return stage2::tape_builder::parse_document<true>(*this, _doc);
+}
+
+simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst) const noexcept {
+  return arm64::stringparsing::parse_string(src, dst);
+}
+
+simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept {
+  auto error = stage1(_buf, _len, stage1_mode::regular);
+  if (error) { return error; }
+  return stage2(_doc);
+}
+
+} // namespace arm64
+} // namespace simdjson
+
+/* begin file include/simdjson/arm64/end.h */
+/* end file include/simdjson/arm64/end.h */
+/* end file src/arm64/dom_parser_implementation.cpp */
+#endif
+#if SIMDJSON_IMPLEMENTATION_FALLBACK
+/* begin file src/fallback/implementation.cpp */
+/* begin file include/simdjson/fallback/begin.h */
+// redefining SIMDJSON_IMPLEMENTATION to "fallback"
+// #define SIMDJSON_IMPLEMENTATION fallback
+/* end file include/simdjson/fallback/begin.h */
+
+namespace simdjson {
+namespace fallback {
+
+simdjson_warn_unused error_code implementation::create_dom_parser_implementation(
+  size_t capacity,
+  size_t max_depth,
+  std::unique_ptr<internal::dom_parser_implementation>& dst
+) const noexcept {
+  dst.reset( new (std::nothrow) dom_parser_implementation() );
+  if (!dst) { return MEMALLOC; }
+  if (auto err = dst->set_capacity(capacity))
+    return err;
+  if (auto err = dst->set_max_depth(max_depth))
+    return err;
+  return SUCCESS;
+}
+
+} // namespace fallback
+} // namespace simdjson
+
+/* begin file include/simdjson/fallback/end.h */
+/* end file include/simdjson/fallback/end.h */
+/* end file src/fallback/implementation.cpp */
+/* begin file src/fallback/dom_parser_implementation.cpp */
+/* begin file include/simdjson/fallback/begin.h */
+// redefining SIMDJSON_IMPLEMENTATION to "fallback"
+// #define SIMDJSON_IMPLEMENTATION fallback
+/* end file include/simdjson/fallback/begin.h */
+
+//
+// Stage 1
+//
+/* begin file src/generic/stage1/find_next_document_index.h */
+namespace simdjson {
+namespace fallback {
+namespace {
+
+/**
+  * This algorithm is used to quickly identify the last structural position that
+  * makes up a complete document.
+  *
+  * It does this by going backwards and finding the last *document boundary* (a
+  * place where one value follows another without a comma between them). If the
+  * last document (the characters after the boundary) has an equal number of
+  * start and end brackets, it is considered complete.
+  *
+  * Simply put, we iterate over the structural characters, starting from
+  * the end. We consider that we found the end of a JSON document when the
+  * first element of the pair is NOT one of these characters: '{' '[' ':' ','
+  * and when the second element is NOT one of these characters: '}' ']' ':' ','.
+  *
+  * This simple comparison works most of the time, but it does not cover cases
+  * where the batch's structural indexes contain a perfect amount of documents.
+  * In such a case, we do not have access to the structural index which follows
+  * the last document, therefore, we do not have access to the second element in
+  * the pair, and that means we cannot identify the last document. To fix this
+  * issue, we keep a count of the open and closed curly/square braces we found
+  * while searching for the pair. When we find a pair AND the count of open and
+  * closed curly/square braces is the same, we know that we just passed a
+  * complete document, therefore the last json buffer location is the end of the
+  * batch.
+  */
+simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) {
+  // Variant: do not count separately, just figure out depth
+  if(parser.n_structural_indexes == 0) { return 0; }
+  auto arr_cnt = 0;
+  auto obj_cnt = 0;
+  for (auto i = parser.n_structural_indexes - 1; i > 0; i--) {
+    auto idxb = parser.structural_indexes[i];
+    switch (parser.buf[idxb]) {
+    case ':':
+    case ',':
+      continue;
+    case '}':
+      obj_cnt--;
+      continue;
+    case ']':
+      arr_cnt--;
+      continue;
+    case '{':
+      obj_cnt++;
+      break;
+    case '[':
+      arr_cnt++;
+      break;
+    }
+    auto idxa = parser.structural_indexes[i - 1];
+    switch (parser.buf[idxa]) {
+    case '{':
+    case '[':
+    case ':':
+    case ',':
+      continue;
+    }
+    // Last document is complete, so the next document will appear after!
+    if (!arr_cnt && !obj_cnt) {
+      return parser.n_structural_indexes;
+    }
+    // Last document is incomplete; mark the document at i + 1 as the next one
+    return i;
+  }
+  // If we made it to the end, we want to finish counting to see if we have a full document.
+  switch (parser.buf[parser.structural_indexes[0]]) {
+    case '}':
+      obj_cnt--;
+      break;
+    case ']':
+      arr_cnt--;
+      break;
+    case '{':
+      obj_cnt++;
+      break;
+    case '[':
+      arr_cnt++;
+      break;
+  }
+  if (!arr_cnt && !obj_cnt) {
+    // We have a complete document.
+    return parser.n_structural_indexes;
+  }
+  return 0;
+}
+
+} // unnamed namespace
+} // namespace fallback
+} // namespace simdjson
+/* end file src/generic/stage1/find_next_document_index.h */
+
+namespace simdjson {
+namespace fallback {
+namespace {
+namespace stage1 {
+
+class structural_scanner {
+public:
+
+simdjson_inline structural_scanner(dom_parser_implementation &_parser, stage1_mode _partial)
+  : buf{_parser.buf},
+    next_structural_index{_parser.structural_indexes.get()},
+    parser{_parser},
+    len{static_cast<uint32_t>(_parser.len)},
+    partial{_partial} {
+}
+
+simdjson_inline void add_structural() {
+  *next_structural_index = idx;
+  next_structural_index++;
+}
+
+simdjson_inline bool is_continuation(uint8_t c) {
+  return (c & 0xc0) == 0x80;
+}
+
+simdjson_inline void validate_utf8_character() {
+  // Continuation
+  if (simdjson_unlikely((buf[idx] & 0x40) == 0)) {
+    // extra continuation
+    error = UTF8_ERROR;
+    idx++;
+    return;
+  }
+
+  // 2-byte
+  if ((buf[idx] & 0x20) == 0) {
+    // missing continuation
+    if (simdjson_unlikely(idx+1 > len || !is_continuation(buf[idx+1]))) {
+      if (idx+1 > len && is_streaming(partial)) { idx = len; return; }
+      error = UTF8_ERROR;
+      idx++;
+      return;
+    }
+    // overlong: 1100000_ 10______
+    if (buf[idx] <= 0xc1) { error = UTF8_ERROR; }
+    idx += 2;
+    return;
+  }
+
+  // 3-byte
+  if ((buf[idx] & 0x10) == 0) {
+    // missing continuation
+    if (simdjson_unlikely(idx+2 > len || !is_continuation(buf[idx+1]) || !is_continuation(buf[idx+2]))) {
+      if (idx+2 > len && is_streaming(partial)) { idx = len; return; }
+      error = UTF8_ERROR;
+      idx++;
+      return;
+    }
+    // overlong: 11100000 100_____ ________
+    if (buf[idx] == 0xe0 && buf[idx+1] <= 0x9f) { error = UTF8_ERROR; }
+    // surrogates: U+D800-U+DFFF 11101101 101_____
+    if (buf[idx] == 0xed && buf[idx+1] >= 0xa0) { error = UTF8_ERROR; }
+    idx += 3;
+    return;
+  }
+
+  // 4-byte
+  // missing continuation
+  if (simdjson_unlikely(idx+3 > len || !is_continuation(buf[idx+1]) || !is_continuation(buf[idx+2]) || !is_continuation(buf[idx+3]))) {
+    if (idx+2 > len && is_streaming(partial)) { idx = len; return; }
+    error = UTF8_ERROR;
+    idx++;
+    return;
+  }
+  // overlong: 11110000 1000____ ________ ________
+  if (buf[idx] == 0xf0 && buf[idx+1] <= 0x8f) { error = UTF8_ERROR; }
+  // too large: > U+10FFFF:
+  // 11110100 (1001|101_)____
+  // 1111(1___|011_|0101) 10______
+  // also includes 5, 6, 7 and 8 byte characters:
+  // 11111___
+  if (buf[idx] == 0xf4 && buf[idx+1] >= 0x90) { error = UTF8_ERROR; }
+  if (buf[idx] >= 0xf5) { error = UTF8_ERROR; }
+  idx += 4;
+}
+
+// Returns true if the string is unclosed.
+simdjson_inline bool validate_string() {
+  idx++; // skip first quote
+  while (idx < len && buf[idx] != '"') {
+    if (buf[idx] == '\\') {
+      idx += 2;
+    } else if (simdjson_unlikely(buf[idx] & 0x80)) {
+      validate_utf8_character();
+    } else {
+      if (buf[idx] < 0x20) { error = UNESCAPED_CHARS; }
+      idx++;
+    }
+  }
+  if (idx >= len) { return true; }
+  return false;
+}
+
+simdjson_inline bool is_whitespace_or_operator(uint8_t c) {
+  switch (c) {
+    case '{': case '}': case '[': case ']': case ',': case ':':
+    case ' ': case '\r': case '\n': case '\t':
+      return true;
+    default:
+      return false;
+  }
+}
+
+//
+// Parse the entire input in STEP_SIZE-byte chunks.
+//
+simdjson_inline error_code scan() {
+  bool unclosed_string = false;
+  for (;idx<len;idx++) {
+    switch (buf[idx]) {
+      // String
+      case '"':
+        add_structural();
+        unclosed_string |= validate_string();
+        break;
+      // Operator
+      case '{': case '}': case '[': case ']': case ',': case ':':
+        add_structural();
+        break;
+      // Whitespace
+      case ' ': case '\r': case '\n': case '\t':
+        break;
+      // Primitive or invalid character (invalid characters will be checked in stage 2)
+      default:
+        // Anything else, add the structural and go until we find the next one
+        add_structural();
+        while (idx+1<len && !is_whitespace_or_operator(buf[idx+1])) {
+          idx++;
+        };
+        break;
+    }
+  }
+  // We pad beyond.
+  // https://github.com/simdjson/simdjson/issues/906
+  // See json_structural_indexer.h for an explanation.
+  *next_structural_index = len; // assumed later in partial == stage1_mode::streaming_final
+  next_structural_index[1] = len;
+  next_structural_index[2] = 0;
+  parser.n_structural_indexes = uint32_t(next_structural_index - parser.structural_indexes.get());
+  if (simdjson_unlikely(parser.n_structural_indexes == 0)) { return EMPTY; }
+  parser.next_structural_index = 0;
+  if (partial == stage1_mode::streaming_partial) {
+    if(unclosed_string) {
+      parser.n_structural_indexes--;
+      if (simdjson_unlikely(parser.n_structural_indexes == 0)) { return CAPACITY; }
+    }
+    // We truncate the input to the end of the last complete document (or zero).
+    auto new_structural_indexes = find_next_document_index(parser);
+    if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) {
+      if(parser.structural_indexes[0] == 0) {
+        // If the buffer is partial and we started at index 0 but the document is
+        // incomplete, it's too big to parse.
+        return CAPACITY;
+      } else {
+        // It is possible that the document could be parsed, we just had a lot
+        // of white space.
+        parser.n_structural_indexes = 0;
+        return EMPTY;
+      }
+    }
+    parser.n_structural_indexes = new_structural_indexes;
+  } else if(partial == stage1_mode::streaming_final) {
+    if(unclosed_string) { parser.n_structural_indexes--; }
+    // We truncate the input to the end of the last complete document (or zero).
+    // Because partial == stage1_mode::streaming_final, it means that we may
+    // silently ignore trailing garbage. Though it sounds bad, we do it
+    // deliberately because many people who have streams of JSON documents
+    // will truncate them for processing. E.g., imagine that you are uncompressing
+    // the data from a size file or receiving it in chunks from the network. You
+    // may not know where exactly the last document will be. Meanwhile the
+    // document_stream instances allow people to know the JSON documents they are
+    // parsing (see the iterator.source() method).
+    parser.n_structural_indexes = find_next_document_index(parser);
+    // We store the initial n_structural_indexes so that the client can see
+    // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes,
+    // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len,
+    // otherwise, it will copy some prior index.
+    parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes];
+    // This next line is critical, do not change it unless you understand what you are
+    // doing.
+    parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len);
+    if (parser.n_structural_indexes == 0) { return EMPTY; }
+  } else if(unclosed_string) { error = UNCLOSED_STRING; }
+  return error;
+}
+
+private:
+  const uint8_t *buf;
+  uint32_t *next_structural_index;
+  dom_parser_implementation &parser;
+  uint32_t len;
+  uint32_t idx{0};
+  error_code error{SUCCESS};
+  stage1_mode partial;
+}; // structural_scanner
+
+} // namespace stage1
+} // unnamed namespace
+
+simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode partial) noexcept {
+  this->buf = _buf;
+  this->len = _len;
+  stage1::structural_scanner scanner(*this, partial);
+  return scanner.scan();
+}
+
+// big table for the minifier
+static uint8_t jump_table[256 * 3] = {
+    0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
+    1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1,
+    1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
+    0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0,
+    1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
+    1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
+    0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
+    1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
+    1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
+    0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
+    1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
+    1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
+    0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
+    1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
+    1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
+    0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
+    1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
+    1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
+    0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
+    1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
+    1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
+    0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
+    1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
+    1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
+    0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
+    1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
+    1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
+    0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
+    1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
+    1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
+    0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
+};
+
+simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept {
+  size_t i = 0, pos = 0;
+  uint8_t quote = 0;
+  uint8_t nonescape = 1;
+
+  while (i < len) {
+    unsigned char c = buf[i];
+    uint8_t *meta = jump_table + 3 * c;
+
+    quote = quote ^ (meta[0] & nonescape);
+    dst[pos] = c;
+    pos += meta[2] | quote;
+
+    i += 1;
+    nonescape = uint8_t(~nonescape) | (meta[1]);
+  }
+  dst_len = pos; // we intentionally do not work with a reference
+  // for fear of aliasing
+  return quote ? UNCLOSED_STRING : SUCCESS;
+}
+
+// credit: based on code from Google Fuchsia (Apache Licensed)
+simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
+  const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
+  uint64_t pos = 0;
+  uint32_t code_point = 0;
+  while (pos < len) {
+    // check of the next 8 bytes are ascii.
+    uint64_t next_pos = pos + 16;
+    if (next_pos <= len) { // if it is safe to read 8 more bytes, check that they are ascii
+      uint64_t v1;
+      memcpy(&v1, data + pos, sizeof(uint64_t));
+      uint64_t v2;
+      memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
+      uint64_t v{v1 | v2};
+      if ((v & 0x8080808080808080) == 0) {
+        pos = next_pos;
+        continue;
+      }
+    }
+    unsigned char byte = data[pos];
+    if (byte < 0x80) {
+      pos++;
+      continue;
+    } else if ((byte & 0xe0) == 0xc0) {
+      next_pos = pos + 2;
+      if (next_pos > len) { return false; }
+      if ((data[pos + 1] & 0xc0) != 0x80) { return false; }
+      // range check
+      code_point = (byte & 0x1f) << 6 | (data[pos + 1] & 0x3f);
+      if (code_point < 0x80 || 0x7ff < code_point) { return false; }
+    } else if ((byte & 0xf0) == 0xe0) {
+      next_pos = pos + 3;
+      if (next_pos > len) { return false; }
+      if ((data[pos + 1] & 0xc0) != 0x80) { return false; }
+      if ((data[pos + 2] & 0xc0) != 0x80) { return false; }
+      // range check
+      code_point = (byte & 0x0f) << 12 |
+                   (data[pos + 1] & 0x3f) << 6 |
+                   (data[pos + 2] & 0x3f);
+      if (code_point < 0x800 || 0xffff < code_point ||
+          (0xd7ff < code_point && code_point < 0xe000)) {
+        return false;
+      }
+    } else if ((byte & 0xf8) == 0xf0) { // 0b11110000
+      next_pos = pos + 4;
+      if (next_pos > len) { return false; }
+      if ((data[pos + 1] & 0xc0) != 0x80) { return false; }
+      if ((data[pos + 2] & 0xc0) != 0x80) { return false; }
+      if ((data[pos + 3] & 0xc0) != 0x80) { return false; }
+      // range check
+      code_point =
+          (byte & 0x07) << 18 | (data[pos + 1] & 0x3f) << 12 |
+          (data[pos + 2] & 0x3f) << 6 | (data[pos + 3] & 0x3f);
+      if (code_point <= 0xffff || 0x10ffff < code_point) { return false; }
+    } else {
+      // we may have a continuation
+      return false;
+    }
+    pos = next_pos;
+  }
+  return true;
+}
+
+} // namespace fallback
+} // namespace simdjson
+
+//
+// Stage 2
+//
+/* begin file src/generic/stage2/stringparsing.h */
+// This file contains the common code every implementation uses
+// It is intended to be included multiple times and compiled multiple times
+
+namespace simdjson {
+namespace fallback {
+namespace {
+/// @private
+namespace stringparsing {
+
+// begin copypasta
+// These chars yield themselves: " \ /
+// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab
+// u not handled in this table as it's complex
+static const uint8_t escape_map[256] = {
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0, // 0x0.
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0x22, 0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0x2f,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0, // 0x4.
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0x5c, 0, 0,    0, // 0x5.
+    0, 0, 0x08, 0, 0,    0, 0x0c, 0, 0, 0, 0, 0, 0,    0, 0x0a, 0, // 0x6.
+    0, 0, 0x0d, 0, 0x09, 0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0, // 0x7.
+
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+};
+
+// handle a unicode codepoint
+// write appropriate values into dest
+// src will advance 6 bytes or 12 bytes
+// dest will advance a variable amount (return via pointer)
+// return true if the unicode codepoint was valid
+// We work in little-endian then swap at write time
+simdjson_warn_unused
+simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr,
+                                            uint8_t **dst_ptr) {
+  // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the
+  // conversion isn't valid; we defer the check for this to inside the
+  // multilingual plane check
+  uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2);
+  *src_ptr += 6;
+
+  // If we found a high surrogate, we must
+  // check for low surrogate for characters
+  // outside the Basic
+  // Multilingual Plane.
+  if (code_point >= 0xd800 && code_point < 0xdc00) {
+    const uint8_t *src_data = *src_ptr;
+    /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */
+    if (((src_data[0] << 8) | src_data[1]) != ((static_cast<uint8_t> ('\\') << 8) | static_cast<uint8_t> ('u'))) {
+      return false;
+    }
+    uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2);
+
+    // We have already checked that the high surrogate is valid and
+    // (code_point - 0xd800) < 1024.
+    //
+    // Check that code_point_2 is in the range 0xdc00..0xdfff
+    // and that code_point_2 was parsed from valid hex.
+    uint32_t low_bit = code_point_2 - 0xdc00;
+    if (low_bit >> 10) {
+      return false;
+    }
+
+    code_point =
+        (((code_point - 0xd800) << 10) | low_bit) + 0x10000;
+    *src_ptr += 6;
+  } else if (code_point >= 0xdc00 && code_point <= 0xdfff) {
+      // If we encounter a low surrogate (not preceded by a high surrogate)
+      // then we have an error.
+      return false;
+  }
+  size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr);
+  *dst_ptr += offset;
+  return offset > 0;
+}
+
+/**
+ * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There
+ * must be an unescaped quote terminating the string. It returns the final output
+ * position as pointer. In case of error (e.g., the string has bad escaped codes),
+ * then null_nullptrptr is returned. It is assumed that the output buffer is large
+ * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes +
+ * SIMDJSON_PADDING bytes.
+ */
+simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) {
+  while (1) {
+    // Copy the next n bytes, and find the backslash and quote in them.
+    auto bs_quote = backslash_and_quote::copy_and_find(src, dst);
+    // If the next thing is the end quote, copy and return
+    if (bs_quote.has_quote_first()) {
+      // we encountered quotes first. Move dst to point to quotes and exit
+      return dst + bs_quote.quote_index();
+    }
+    if (bs_quote.has_backslash()) {
+      /* find out where the backspace is */
+      auto bs_dist = bs_quote.backslash_index();
+      uint8_t escape_char = src[bs_dist + 1];
+      /* we encountered backslash first. Handle backslash */
+      if (escape_char == 'u') {
+        /* move src/dst up to the start; they will be further adjusted
+           within the unicode codepoint handling code. */
+        src += bs_dist;
+        dst += bs_dist;
+        if (!handle_unicode_codepoint(&src, &dst)) {
+          return nullptr;
+        }
+      } else {
+        /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and
+         * write bs_dist+1 characters to output
+         * note this may reach beyond the part of the buffer we've actually
+         * seen. I think this is ok */
+        uint8_t escape_result = escape_map[escape_char];
+        if (escape_result == 0u) {
+          return nullptr; /* bogus escape value is an error */
+        }
+        dst[bs_dist] = escape_result;
+        src += bs_dist + 2;
+        dst += bs_dist + 1;
+      }
+    } else {
+      /* they are the same. Since they can't co-occur, it means we
+       * encountered neither. */
+      src += backslash_and_quote::BYTES_PROCESSED;
+      dst += backslash_and_quote::BYTES_PROCESSED;
+    }
+  }
+  /* can't be reached */
+  return nullptr;
+}
+
+} // namespace stringparsing
+} // unnamed namespace
+} // namespace fallback
+} // namespace simdjson
+/* end file src/generic/stage2/stringparsing.h */
+/* begin file src/generic/stage2/tape_builder.h */
+/* begin file src/generic/stage2/json_iterator.h */
+/* begin file src/generic/stage2/logger.h */
+// This is for an internal-only stage 2 specific logger.
+// Set LOG_ENABLED = true to log what stage 2 is doing!
+namespace simdjson {
+namespace fallback {
+namespace {
+namespace logger {
+
+  static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------";
+
+#if SIMDJSON_VERBOSE_LOGGING
+  static constexpr const bool LOG_ENABLED = true;
+#else
+  static constexpr const bool LOG_ENABLED = false;
+#endif
+  static constexpr const int LOG_EVENT_LEN = 20;
+  static constexpr const int LOG_BUFFER_LEN = 30;
+  static constexpr const int LOG_SMALL_BUFFER_LEN = 10;
+  static constexpr const int LOG_INDEX_LEN = 5;
+
+  static int log_depth; // Not threadsafe. Log only.
+
+  // Helper to turn unprintable or newline characters into spaces
+  static simdjson_inline char printable_char(char c) {
+    if (c >= 0x20) {
+      return c;
+    } else {
+      return ' ';
+    }
+  }
+
+  // Print the header and set up log_start
+  static simdjson_inline void log_start() {
+    if (LOG_ENABLED) {
+      log_depth = 0;
+      printf("\n");
+      printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#");
+      printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES);
+    }
+  }
+
+  simdjson_unused static simdjson_inline void log_string(const char *message) {
+    if (LOG_ENABLED) {
+      printf("%s\n", message);
+    }
+  }
+
+  // Logs a single line from the stage 2 DOM parser
+  template<typename S>
+  static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) {
+    if (LOG_ENABLED) {
+      printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title);
+      auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1;
+      auto next_index = structurals.next_structural;
+      auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast<const uint8_t*>("                                                       ");
+      auto next = &structurals.buf[*next_index];
+      {
+        // Print the next N characters in the buffer.
+        printf("| ");
+        // Otherwise, print the characters starting from the buffer position.
+        // Print spaces for unprintable or newline characters.
+        for (int i=0;i<LOG_BUFFER_LEN;i++) {
+          printf("%c", printable_char(current[i]));
+        }
+        printf(" ");
+        // Print the next N characters in the buffer.
+        printf("| ");
+        // Otherwise, print the characters starting from the buffer position.
+        // Print spaces for unprintable or newline characters.
+        for (int i=0;i<LOG_SMALL_BUFFER_LEN;i++) {
+          printf("%c", printable_char(next[i]));
+        }
+        printf(" ");
+      }
+      if (current_index) {
+        printf("| %*u ", LOG_INDEX_LEN, *current_index);
+      } else {
+        printf("| %-*s ", LOG_INDEX_LEN, "");
+      }
+      // printf("| %*u ", LOG_INDEX_LEN, structurals.next_tape_index());
+      printf("| %-s ", detail);
+      printf("|\n");
+    }
+  }
+
+} // namespace logger
+} // unnamed namespace
+} // namespace fallback
+} // namespace simdjson
+/* end file src/generic/stage2/logger.h */
+
+namespace simdjson {
+namespace fallback {
+namespace {
+namespace stage2 {
+
+class json_iterator {
+public:
+  const uint8_t* const buf;
+  uint32_t *next_structural;
+  dom_parser_implementation &dom_parser;
+  uint32_t depth{0};
+
+  /**
+   * Walk the JSON document.
+   *
+   * The visitor receives callbacks when values are encountered. All callbacks pass the iterator as
+   * the first parameter; some callbacks have other parameters as well:
+   *
+   * - visit_document_start() - at the beginning.
+   * - visit_document_end() - at the end (if things were successful).
+   *
+   * - visit_array_start() - at the start `[` of a non-empty array.
+   * - visit_array_end() - at the end `]` of a non-empty array.
+   * - visit_empty_array() - when an empty array is encountered.
+   *
+   * - visit_object_end() - at the start `]` of a non-empty object.
+   * - visit_object_start() - at the end `]` of a non-empty object.
+   * - visit_empty_object() - when an empty object is encountered.
+   * - visit_key(const uint8_t *key) - when a key in an object field is encountered. key is
+   *                                   guaranteed to point at the first quote of the string (`"key"`).
+   * - visit_primitive(const uint8_t *value) - when a value is a string, number, boolean or null.
+   * - visit_root_primitive(iter, uint8_t *value) - when the top-level value is a string, number, boolean or null.
+   *
+   * - increment_count(iter) - each time a value is found in an array or object.
+   */
+  template<bool STREAMING, typename V>
+  simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept;
+
+  /**
+   * Create an iterator capable of walking a JSON document.
+   *
+   * The document must have already passed through stage 1.
+   */
+  simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index);
+
+  /**
+   * Look at the next token.
+   *
+   * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)).
+   *
+   * They may include invalid JSON as well (such as `1.2.3` or `ture`).
+   */
+  simdjson_inline const uint8_t *peek() const noexcept;
+  /**
+   * Advance to the next token.
+   *
+   * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)).
+   *
+   * They may include invalid JSON as well (such as `1.2.3` or `ture`).
+   */
+  simdjson_inline const uint8_t *advance() noexcept;
+  /**
+   * Get the remaining length of the document, from the start of the current token.
+   */
+  simdjson_inline size_t remaining_len() const noexcept;
+  /**
+   * Check if we are at the end of the document.
+   *
+   * If this is true, there are no more tokens.
+   */
+  simdjson_inline bool at_eof() const noexcept;
+  /**
+   * Check if we are at the beginning of the document.
+   */
+  simdjson_inline bool at_beginning() const noexcept;
+  simdjson_inline uint8_t last_structural() const noexcept;
+
+  /**
+   * Log that a value has been found.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_value(const char *type) const noexcept;
+  /**
+   * Log the start of a multipart value.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_start_value(const char *type) const noexcept;
+  /**
+   * Log the end of a multipart value.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_end_value(const char *type) const noexcept;
+  /**
+   * Log an error.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_error(const char *error) const noexcept;
+
+  template<typename V>
+  simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept;
+  template<typename V>
+  simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept;
+};
+
+template<bool STREAMING, typename V>
+simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept {
+  logger::log_start();
+
+  //
+  // Start the document
+  //
+  if (at_eof()) { return EMPTY; }
+  log_start_value("document");
+  SIMDJSON_TRY( visitor.visit_document_start(*this) );
+
+  //
+  // Read first value
+  //
+  {
+    auto value = advance();
+
+    // Make sure the outer object or array is closed before continuing; otherwise, there are ways we
+    // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906
+    if (!STREAMING) {
+      switch (*value) {
+        case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break;
+        case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break;
+      }
+    }
+
+    switch (*value) {
+      case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin;
+      case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin;
+      default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break;
+    }
+  }
+  goto document_end;
+
+//
+// Object parser states
+//
+object_begin:
+  log_start_value("object");
+  depth++;
+  if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; }
+  dom_parser.is_array[depth] = false;
+  SIMDJSON_TRY( visitor.visit_object_start(*this) );
+
+  {
+    auto key = advance();
+    if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; }
+    SIMDJSON_TRY( visitor.increment_count(*this) );
+    SIMDJSON_TRY( visitor.visit_key(*this, key) );
+  }
+
+object_field:
+  if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; }
+  {
+    auto value = advance();
+    switch (*value) {
+      case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin;
+      case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin;
+      default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break;
+    }
+  }
+
+object_continue:
+  switch (*advance()) {
+    case ',':
+      SIMDJSON_TRY( visitor.increment_count(*this) );
+      {
+        auto key = advance();
+        if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; }
+        SIMDJSON_TRY( visitor.visit_key(*this, key) );
+      }
+      goto object_field;
+    case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end;
+    default: log_error("No comma between object fields"); return TAPE_ERROR;
+  }
+
+scope_end:
+  depth--;
+  if (depth == 0) { goto document_end; }
+  if (dom_parser.is_array[depth]) { goto array_continue; }
+  goto object_continue;
+
+//
+// Array parser states
+//
+array_begin:
+  log_start_value("array");
+  depth++;
+  if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; }
+  dom_parser.is_array[depth] = true;
+  SIMDJSON_TRY( visitor.visit_array_start(*this) );
+  SIMDJSON_TRY( visitor.increment_count(*this) );
+
+array_value:
+  {
+    auto value = advance();
+    switch (*value) {
+      case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin;
+      case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin;
+      default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break;
+    }
+  }
+
+array_continue:
+  switch (*advance()) {
+    case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value;
+    case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end;
+    default: log_error("Missing comma between array values"); return TAPE_ERROR;
+  }
+
+document_end:
+  log_end_value("document");
+  SIMDJSON_TRY( visitor.visit_document_end(*this) );
+
+  dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]);
+
+  // If we didn't make it to the end, it's an error
+  if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) {
+    log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!");
+    return TAPE_ERROR;
+  }
+
+  return SUCCESS;
+
+} // walk_document()
+
+simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index)
+  : buf{_dom_parser.buf},
+    next_structural{&_dom_parser.structural_indexes[start_structural_index]},
+    dom_parser{_dom_parser} {
+}
+
+simdjson_inline const uint8_t *json_iterator::peek() const noexcept {
+  return &buf[*(next_structural)];
+}
+simdjson_inline const uint8_t *json_iterator::advance() noexcept {
+  return &buf[*(next_structural++)];
+}
+simdjson_inline size_t json_iterator::remaining_len() const noexcept {
+  return dom_parser.len - *(next_structural-1);
+}
+
+simdjson_inline bool json_iterator::at_eof() const noexcept {
+  return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes];
+}
+simdjson_inline bool json_iterator::at_beginning() const noexcept {
+  return next_structural == dom_parser.structural_indexes.get();
+}
+simdjson_inline uint8_t json_iterator::last_structural() const noexcept {
+  return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]];
+}
+
+simdjson_inline void json_iterator::log_value(const char *type) const noexcept {
+  logger::log_line(*this, "", type, "");
+}
+
+simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept {
+  logger::log_line(*this, "+", type, "");
+  if (logger::LOG_ENABLED) { logger::log_depth++; }
+}
+
+simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept {
+  if (logger::LOG_ENABLED) { logger::log_depth--; }
+  logger::log_line(*this, "-", type, "");
+}
+
+simdjson_inline void json_iterator::log_error(const char *error) const noexcept {
+  logger::log_line(*this, "", "ERROR", error);
+}
+
+template<typename V>
+simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept {
+  switch (*value) {
+    case '"': return visitor.visit_root_string(*this, value);
+    case 't': return visitor.visit_root_true_atom(*this, value);
+    case 'f': return visitor.visit_root_false_atom(*this, value);
+    case 'n': return visitor.visit_root_null_atom(*this, value);
+    case '-':
+    case '0': case '1': case '2': case '3': case '4':
+    case '5': case '6': case '7': case '8': case '9':
+      return visitor.visit_root_number(*this, value);
+    default:
+      log_error("Document starts with a non-value character");
+      return TAPE_ERROR;
+  }
+}
+template<typename V>
+simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept {
+  switch (*value) {
+    case '"': return visitor.visit_string(*this, value);
+    case 't': return visitor.visit_true_atom(*this, value);
+    case 'f': return visitor.visit_false_atom(*this, value);
+    case 'n': return visitor.visit_null_atom(*this, value);
+    case '-':
+    case '0': case '1': case '2': case '3': case '4':
+    case '5': case '6': case '7': case '8': case '9':
+      return visitor.visit_number(*this, value);
+    default:
+      log_error("Non-value found when value was expected!");
+      return TAPE_ERROR;
+  }
+}
+
+} // namespace stage2
+} // unnamed namespace
+} // namespace fallback
+} // namespace simdjson
+/* end file src/generic/stage2/json_iterator.h */
+/* begin file src/generic/stage2/tape_writer.h */
+namespace simdjson {
+namespace fallback {
+namespace {
+namespace stage2 {
+
+struct tape_writer {
+  /** The next place to write to tape */
+  uint64_t *next_tape_loc;
+
+  /** Write a signed 64-bit value to tape. */
+  simdjson_inline void append_s64(int64_t value) noexcept;
+
+  /** Write an unsigned 64-bit value to tape. */
+  simdjson_inline void append_u64(uint64_t value) noexcept;
+
+  /** Write a double value to tape. */
+  simdjson_inline void append_double(double value) noexcept;
+
+  /**
+   * Append a tape entry (an 8-bit type,and 56 bits worth of value).
+   */
+  simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept;
+
+  /**
+   * Skip the current tape entry without writing.
+   *
+   * Used to skip the start of the container, since we'll come back later to fill it in when the
+   * container ends.
+   */
+  simdjson_inline void skip() noexcept;
+
+  /**
+   * Skip the number of tape entries necessary to write a large u64 or i64.
+   */
+  simdjson_inline void skip_large_integer() noexcept;
+
+  /**
+   * Skip the number of tape entries necessary to write a double.
+   */
+  simdjson_inline void skip_double() noexcept;
+
+  /**
+   * Write a value to a known location on tape.
+   *
+   * Used to go back and write out the start of a container after the container ends.
+   */
+  simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept;
+
+private:
+  /**
+   * Append both the tape entry, and a supplementary value following it. Used for types that need
+   * all 64 bits, such as double and uint64_t.
+   */
+  template<typename T>
+  simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept;
+}; // struct number_writer
+
+simdjson_inline void tape_writer::append_s64(int64_t value) noexcept {
+  append2(0, value, internal::tape_type::INT64);
+}
+
+simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept {
+  append(0, internal::tape_type::UINT64);
+  *next_tape_loc = value;
+  next_tape_loc++;
+}
+
+/** Write a double value to tape. */
+simdjson_inline void tape_writer::append_double(double value) noexcept {
+  append2(0, value, internal::tape_type::DOUBLE);
+}
+
+simdjson_inline void tape_writer::skip() noexcept {
+  next_tape_loc++;
+}
+
+simdjson_inline void tape_writer::skip_large_integer() noexcept {
+  next_tape_loc += 2;
+}
+
+simdjson_inline void tape_writer::skip_double() noexcept {
+  next_tape_loc += 2;
+}
+
+simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept {
+  *next_tape_loc = val | ((uint64_t(char(t))) << 56);
+  next_tape_loc++;
+}
+
+template<typename T>
+simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept {
+  append(val, t);
+  static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!");
+  memcpy(next_tape_loc, &val2, sizeof(val2));
+  next_tape_loc++;
+}
+
+simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept {
+  tape_loc = val | ((uint64_t(char(t))) << 56);
+}
+
+} // namespace stage2
+} // unnamed namespace
+} // namespace fallback
+} // namespace simdjson
+/* end file src/generic/stage2/tape_writer.h */
+
+namespace simdjson {
+namespace fallback {
+namespace {
+namespace stage2 {
+
+struct tape_builder {
+  template<bool STREAMING>
+  simdjson_warn_unused static simdjson_inline error_code parse_document(
+    dom_parser_implementation &dom_parser,
+    dom::document &doc) noexcept;
+
+  /** Called when a non-empty document starts. */
+  simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept;
+  /** Called when a non-empty document ends without error. */
+  simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept;
+
+  /** Called when a non-empty array starts. */
+  simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept;
+  /** Called when a non-empty array ends. */
+  simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept;
+  /** Called when an empty array is found. */
+  simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept;
+
+  /** Called when a non-empty object starts. */
+  simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept;
+  /**
+   * Called when a key in a field is encountered.
+   *
+   * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array
+   * will be called after this with the field value.
+   */
+  simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept;
+  /** Called when a non-empty object ends. */
+  simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept;
+  /** Called when an empty object is found. */
+  simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept;
+
+  /**
+   * Called when a string, number, boolean or null is found.
+   */
+  simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept;
+  /**
+   * Called when a string, number, boolean or null is found at the top level of a document (i.e.
+   * when there is no array or object and the entire document is a single string, number, boolean or
+   * null.
+   *
+   * This is separate from primitive() because simdjson's normal primitive parsing routines assume
+   * there is at least one more token after the value, which is only true in an array or object.
+   */
+  simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept;
+
+  simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept;
+
+  simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept;
+
+  /** Called each time a new field or element in an array or object is found. */
+  simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept;
+
+  /** Next location to write to tape */
+  tape_writer tape;
+private:
+  /** Next write location in the string buf for stage 2 parsing */
+  uint8_t *current_string_buf_loc;
+
+  simdjson_inline tape_builder(dom::document &doc) noexcept;
+
+  simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept;
+  simdjson_inline void start_container(json_iterator &iter) noexcept;
+  simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept;
+  simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept;
+  simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept;
+  simdjson_inline void on_end_string(uint8_t *dst) noexcept;
+}; // class tape_builder
+
+template<bool STREAMING>
+simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document(
+    dom_parser_implementation &dom_parser,
+    dom::document &doc) noexcept {
+  dom_parser.doc = &doc;
+  json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0);
+  tape_builder builder(doc);
+  return iter.walk_document<STREAMING>(builder);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept {
+  return iter.visit_root_primitive(*this, value);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept {
+  return iter.visit_primitive(*this, value);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept {
+  return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept {
+  return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept {
+  start_container(iter);
+  return SUCCESS;
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept {
+  start_container(iter);
+  return SUCCESS;
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept {
+  start_container(iter);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept {
+  return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept {
+  return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept {
+  constexpr uint32_t start_tape_index = 0;
+  tape.append(start_tape_index, internal::tape_type::ROOT);
+  tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT);
+  return SUCCESS;
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept {
+  return visit_string(iter, key, true);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept {
+  iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1
+  return SUCCESS;
+}
+
+simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept {
+  iter.log_value(key ? "key" : "string");
+  uint8_t *dst = on_start_string(iter);
+  dst = stringparsing::parse_string(value+1, dst);
+  if (dst == nullptr) {
+    iter.log_error("Invalid escape in string");
+    return STRING_ERROR;
+  }
+  on_end_string(dst);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept {
+  return visit_string(iter, value);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("number");
+  return numberparsing::parse_number(value, tape);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept {
+  //
+  // We need to make a copy to make sure that the string is space terminated.
+  // This is not about padding the input, which should already padded up
+  // to len + SIMDJSON_PADDING. However, we have no control at this stage
+  // on how the padding was done. What if the input string was padded with nulls?
+  // It is quite common for an input string to have an extra null character (C string).
+  // We do not want to allow 9\0 (where \0 is the null character) inside a JSON
+  // document, but the string "9\0" by itself is fine. So we make a copy and
+  // pad the input with spaces when we know that there is just one input element.
+  // This copy is relatively expensive, but it will almost never be called in
+  // practice unless you are in the strange scenario where you have many JSON
+  // documents made of single atoms.
+  //
+  std::unique_ptr<uint8_t[]>copy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]);
+  if (copy.get() == nullptr) { return MEMALLOC; }
+  std::memcpy(copy.get(), value, iter.remaining_len());
+  std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING);
+  error_code error = visit_number(iter, copy.get());
+  return error;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("true");
+  if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::TRUE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("true");
+  if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::TRUE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("false");
+  if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::FALSE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("false");
+  if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::FALSE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("null");
+  if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::NULL_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("null");
+  if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::NULL_VALUE);
+  return SUCCESS;
+}
+
+// private:
+
+simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept {
+  return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get());
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept {
+  auto start_index = next_tape_index(iter);
+  tape.append(start_index+2, start);
+  tape.append(start_index, end);
+  return SUCCESS;
+}
+
+simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept {
+  iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter);
+  iter.dom_parser.open_containers[iter.depth].count = 0;
+  tape.skip(); // We don't actually *write* the start element until the end.
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept {
+  // Write the ending tape element, pointing at the start location
+  const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index;
+  tape.append(start_tape_index, end);
+  // Write the start tape element, pointing at the end location (and including count)
+  // count can overflow if it exceeds 24 bits... so we saturate
+  // the convention being that a cnt of 0xffffff or more is undetermined in value (>=  0xffffff).
+  const uint32_t count = iter.dom_parser.open_containers[iter.depth].count;
+  const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count;
+  tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start);
+  return SUCCESS;
+}
+
+simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept {
+  // we advance the point, accounting for the fact that we have a NULL termination
+  tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING);
+  return current_string_buf_loc + sizeof(uint32_t);
+}
+
+simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept {
+  uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t)));
+  // TODO check for overflow in case someone has a crazy string (>=4GB?)
+  // But only add the overflow check when the document itself exceeds 4GB
+  // Currently unneeded because we refuse to parse docs larger or equal to 4GB.
+  memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t));
+  // NULL termination is still handy if you expect all your strings to
+  // be NULL terminated? It comes at a small cost
+  *dst = 0;
+  current_string_buf_loc = dst + 1;
+}
+
+} // namespace stage2
+} // unnamed namespace
+} // namespace fallback
+} // namespace simdjson
+/* end file src/generic/stage2/tape_builder.h */
+
+namespace simdjson {
+namespace fallback {
+
+simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept {
+  return stage2::tape_builder::parse_document<false>(*this, _doc);
+}
+
+simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept {
+  return stage2::tape_builder::parse_document<true>(*this, _doc);
+}
+
+simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst) const noexcept {
+  return fallback::stringparsing::parse_string(src, dst);
+}
+
+simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept {
+  auto error = stage1(_buf, _len, stage1_mode::regular);
+  if (error) { return error; }
+  return stage2(_doc);
+}
+
+} // namespace fallback
+} // namespace simdjson
+
+/* begin file include/simdjson/fallback/end.h */
+/* end file include/simdjson/fallback/end.h */
+/* end file src/fallback/dom_parser_implementation.cpp */
+#endif
+#if SIMDJSON_IMPLEMENTATION_ICELAKE
+/* begin file src/icelake/implementation.cpp */
+/* begin file include/simdjson/icelake/begin.h */
+// redefining SIMDJSON_IMPLEMENTATION to "icelake"
+// #define SIMDJSON_IMPLEMENTATION icelake
+SIMDJSON_TARGET_ICELAKE
+/* end file include/simdjson/icelake/begin.h */
+
+namespace simdjson {
+namespace icelake {
+
+simdjson_warn_unused error_code implementation::create_dom_parser_implementation(
+  size_t capacity,
+  size_t max_depth,
+  std::unique_ptr<internal::dom_parser_implementation>& dst
+) const noexcept {
+  dst.reset( new (std::nothrow) dom_parser_implementation() );
+  if (!dst) { return MEMALLOC; }
+  if (auto err = dst->set_capacity(capacity))
+    return err;
+  if (auto err = dst->set_max_depth(max_depth))
+    return err;
+  return SUCCESS;
+}
+
+} // namespace icelake
+} // namespace simdjson
+
+/* begin file include/simdjson/icelake/end.h */
+SIMDJSON_UNTARGET_ICELAKE
+/* end file include/simdjson/icelake/end.h */
+
+/* end file src/icelake/implementation.cpp */
+/* begin file src/icelake/dom_parser_implementation.cpp */
+/* begin file include/simdjson/icelake/begin.h */
+// redefining SIMDJSON_IMPLEMENTATION to "icelake"
+// #define SIMDJSON_IMPLEMENTATION icelake
+SIMDJSON_TARGET_ICELAKE
+/* end file include/simdjson/icelake/begin.h */
+
+//
+// Stage 1
+//
+
+namespace simdjson {
+namespace icelake {
+namespace {
+
+using namespace simd;
+
+struct json_character_block {
+  static simdjson_inline json_character_block classify(const simd::simd8x64<uint8_t>& in);
+  //  ASCII white-space ('\r','\n','\t',' ')
+  simdjson_inline uint64_t whitespace() const noexcept;
+  // non-quote structural characters (comma, colon, braces, brackets)
+  simdjson_inline uint64_t op() const noexcept;
+  // neither a structural character nor a white-space, so letters, numbers and quotes
+  simdjson_inline uint64_t scalar() const noexcept;
+
+  uint64_t _whitespace; // ASCII white-space ('\r','\n','\t',' ')
+  uint64_t _op; // structural characters (comma, colon, braces, brackets but not quotes)
+};
+
+simdjson_inline uint64_t json_character_block::whitespace() const noexcept { return _whitespace; }
+simdjson_inline uint64_t json_character_block::op() const noexcept { return _op; }
+simdjson_inline uint64_t json_character_block::scalar() const noexcept { return ~(op() | whitespace()); }
+
+// This identifies structural characters (comma, colon, braces, brackets),
+// and ASCII white-space ('\r','\n','\t',' ').
+simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64<uint8_t>& in) {
+  // These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why
+  // we can't use the generic lookup_16.
+  const auto whitespace_table = simd8<uint8_t>::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100);
+
+  // The 6 operators (:,[]{}) have these values:
+  //
+  // , 2C
+  // : 3A
+  // [ 5B
+  // { 7B
+  // ] 5D
+  // } 7D
+  //
+  // If you use | 0x20 to turn [ and ] into { and }, the lower 4 bits of each character is unique.
+  // We exploit this, using a simd 4-bit lookup to tell us which character match against, and then
+  // match it (against | 0x20).
+  //
+  // To prevent recognizing other characters, everything else gets compared with 0, which cannot
+  // match due to the | 0x20.
+  //
+  // NOTE: Due to the | 0x20, this ALSO treats <FF> and <SUB> (control characters 0C and 1A) like ,
+  // and :. This gets caught in stage 2, which checks the actual character to ensure the right
+  // operators are in the right places.
+  const auto op_table = simd8<uint8_t>::repeat_16(
+    0, 0, 0, 0,
+    0, 0, 0, 0,
+    0, 0, ':', '{', // : = 3A, [ = 5B, { = 7B
+    ',', '}', 0, 0  // , = 2C, ] = 5D, } = 7D
+  );
+
+  // We compute whitespace and op separately. If later code only uses one or the
+  // other, given the fact that all functions are aggressively inlined, we can
+  // hope that useless computations will be omitted. This is namely case when
+  // minifying (we only need whitespace).
+
+  const uint64_t whitespace = in.eq({
+    _mm512_shuffle_epi8(whitespace_table, in.chunks[0])
+  });
+  // Turn [ and ] into { and }
+  const simd8x64<uint8_t> curlified{
+    in.chunks[0] | 0x20
+  };
+  const uint64_t op = curlified.eq({
+    _mm512_shuffle_epi8(op_table, in.chunks[0])
+  });
+
+  return { whitespace, op };
+}
+
+simdjson_inline bool is_ascii(const simd8x64<uint8_t>& input) {
+  return input.reduce_or().is_ascii();
+}
+
+simdjson_unused simdjson_inline simd8<bool> must_be_continuation(const simd8<uint8_t> prev1, const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
+  simd8<uint8_t> is_second_byte = prev1.saturating_sub(0xc0u-1); // Only 11______ will be > 0
+  simd8<uint8_t> is_third_byte  = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0
+  simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0
+  // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine.
+  return simd8<int8_t>(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0);
+}
+
+simdjson_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
+  simd8<uint8_t> is_third_byte  = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0
+  simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0
+  // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine.
+  return simd8<int8_t>(is_third_byte | is_fourth_byte) > int8_t(0);
+}
+
+} // unnamed namespace
+} // namespace icelake
+} // namespace simdjson
+
+/* begin file src/generic/stage1/utf8_lookup4_algorithm.h */
+namespace simdjson {
+namespace icelake {
+namespace {
+namespace utf8_validation {
+
+using namespace simd;
+
+  simdjson_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
+// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII)
+// Bit 1 = Too Long (ASCII followed by continuation)
+// Bit 2 = Overlong 3-byte
+// Bit 4 = Surrogate
+// Bit 5 = Overlong 2-byte
+// Bit 7 = Two Continuations
+    constexpr const uint8_t TOO_SHORT   = 1<<0; // 11______ 0_______
+                                                // 11______ 11______
+    constexpr const uint8_t TOO_LONG    = 1<<1; // 0_______ 10______
+    constexpr const uint8_t OVERLONG_3  = 1<<2; // 11100000 100_____
+    constexpr const uint8_t SURROGATE   = 1<<4; // 11101101 101_____
+    constexpr const uint8_t OVERLONG_2  = 1<<5; // 1100000_ 10______
+    constexpr const uint8_t TWO_CONTS   = 1<<7; // 10______ 10______
+    constexpr const uint8_t TOO_LARGE   = 1<<3; // 11110100 1001____
+                                                // 11110100 101_____
+                                                // 11110101 1001____
+                                                // 11110101 101_____
+                                                // 1111011_ 1001____
+                                                // 1111011_ 101_____
+                                                // 11111___ 1001____
+                                                // 11111___ 101_____
+    constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
+                                                // 11110101 1000____
+                                                // 1111011_ 1000____
+                                                // 11111___ 1000____
+    constexpr const uint8_t OVERLONG_4  = 1<<6; // 11110000 1000____
+
+    const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
+      // 0_______ ________ <ASCII in byte 1>
+      TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
+      TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
+      // 10______ ________ <continuation in byte 1>
+      TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS,
+      // 1100____ ________ <two byte lead in byte 1>
+      TOO_SHORT | OVERLONG_2,
+      // 1101____ ________ <two byte lead in byte 1>
+      TOO_SHORT,
+      // 1110____ ________ <three byte lead in byte 1>
+      TOO_SHORT | OVERLONG_3 | SURROGATE,
+      // 1111____ ________ <four+ byte lead in byte 1>
+      TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4
+    );
+    constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
+    const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
+      // ____0000 ________
+      CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4,
+      // ____0001 ________
+      CARRY | OVERLONG_2,
+      // ____001_ ________
+      CARRY,
+      CARRY,
+
+      // ____0100 ________
+      CARRY | TOO_LARGE,
+      // ____0101 ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      // ____011_ ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+
+      // ____1___ ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      // ____1101 ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000
+    );
+    const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
+      // ________ 0_______ <ASCII in byte 2>
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
+
+      // ________ 1000____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4,
+      // ________ 1001____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE,
+      // ________ 101_____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE  | TOO_LARGE,
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE  | TOO_LARGE,
+
+      // ________ 11______
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT
+    );
+    return (byte_1_high & byte_1_low & byte_2_high);
+  }
+  simdjson_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
+      const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
+    simd8<uint8_t> prev2 = input.prev<2>(prev_input);
+    simd8<uint8_t> prev3 = input.prev<3>(prev_input);
+    simd8<uint8_t> must23 = simd8<uint8_t>(must_be_2_3_continuation(prev2, prev3));
+    simd8<uint8_t> must23_80 = must23 & uint8_t(0x80);
+    return must23_80 ^ sc;
+  }
+
+  //
+  // Return nonzero if there are incomplete multibyte characters at the end of the block:
+  // e.g. if there is a 4-byte character, but it's 3 bytes from the end.
+  //
+  simdjson_inline simd8<uint8_t> is_incomplete(const simd8<uint8_t> input) {
+    // If the previous input's last 3 bytes match this, they're too short (they ended at EOF):
+    // ... 1111____ 111_____ 11______
+#if SIMDJSON_IMPLEMENTATION_ICELAKE
+    static const uint8_t max_array[64] = {
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1
+    };
+#else
+    static const uint8_t max_array[32] = {
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1
+    };
+#endif
+    const simd8<uint8_t> max_value(&max_array[sizeof(max_array)-sizeof(simd8<uint8_t>)]);
+    return input.gt_bits(max_value);
+  }
+
+  struct utf8_checker {
+    // If this is nonzero, there has been a UTF-8 error.
+    simd8<uint8_t> error;
+    // The last input we received
+    simd8<uint8_t> prev_input_block;
+    // Whether the last input we received was incomplete (used for ASCII fast path)
+    simd8<uint8_t> prev_incomplete;
+
+    //
+    // Check whether the current bytes are valid UTF-8.
+    //
+    simdjson_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
+      // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes
+      // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers)
+      simd8<uint8_t> prev1 = input.prev<1>(prev_input);
+      simd8<uint8_t> sc = check_special_cases(input, prev1);
+      this->error |= check_multibyte_lengths(input, prev_input, sc);
+    }
+
+    // The only problem that can happen at EOF is that a multibyte character is too short
+    // or a byte value too large in the last bytes: check_special_cases only checks for bytes
+    // too large in the first of two bytes.
+    simdjson_inline void check_eof() {
+      // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't
+      // possibly finish them.
+      this->error |= this->prev_incomplete;
+    }
+
+#ifndef SIMDJSON_IF_CONSTEXPR
+#if SIMDJSON_CPLUSPLUS17
+#define SIMDJSON_IF_CONSTEXPR if constexpr
+#else
+#define SIMDJSON_IF_CONSTEXPR if
+#endif
+#endif
+
+    simdjson_inline void check_next_input(const simd8x64<uint8_t>& input) {
+      if(simdjson_likely(is_ascii(input))) {
+        this->error |= this->prev_incomplete;
+      } else {
+        // you might think that a for-loop would work, but under Visual Studio, it is not good enough.
+        static_assert((simd8x64<uint8_t>::NUM_CHUNKS == 1)
+                ||(simd8x64<uint8_t>::NUM_CHUNKS == 2)
+                || (simd8x64<uint8_t>::NUM_CHUNKS == 4),
+                "We support one, two or four chunks per 64-byte block.");
+        SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 1) {
+          this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
+        } else SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 2) {
+          this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
+          this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+        } else SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 4) {
+          this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
+          this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+          this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
+          this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
+        }
+        this->prev_incomplete = is_incomplete(input.chunks[simd8x64<uint8_t>::NUM_CHUNKS-1]);
+        this->prev_input_block = input.chunks[simd8x64<uint8_t>::NUM_CHUNKS-1];
+      }
+    }
+    // do not forget to call check_eof!
+    simdjson_inline error_code errors() {
+      return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS;
+    }
+
+  }; // struct utf8_checker
+} // namespace utf8_validation
+
+using utf8_validation::utf8_checker;
+
+} // unnamed namespace
+} // namespace icelake
+} // namespace simdjson
+/* end file src/generic/stage1/utf8_lookup4_algorithm.h */
+// defining SIMDJSON_CUSTOM_BIT_INDEXER allows us to provide our own bit_indexer::write
+#define SIMDJSON_CUSTOM_BIT_INDEXER
+/* begin file src/generic/stage1/json_structural_indexer.h */
+// This file contains the common code every implementation uses in stage1
+// It is intended to be included multiple times and compiled multiple times
+// We assume the file in which it is included already includes
+// "simdjson/stage1.h" (this simplifies amalgation)
+
+/* begin file src/generic/stage1/buf_block_reader.h */
+namespace simdjson {
+namespace icelake {
+namespace {
+
+// Walks through a buffer in block-sized increments, loading the last part with spaces
+template<size_t STEP_SIZE>
+struct buf_block_reader {
+public:
+  simdjson_inline buf_block_reader(const uint8_t *_buf, size_t _len);
+  simdjson_inline size_t block_index();
+  simdjson_inline bool has_full_block() const;
+  simdjson_inline const uint8_t *full_block() const;
+  /**
+   * Get the last block, padded with spaces.
+   *
+   * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this
+   * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there
+   * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding.
+   *
+   * @return the number of effective characters in the last block.
+   */
+  simdjson_inline size_t get_remainder(uint8_t *dst) const;
+  simdjson_inline void advance();
+private:
+  const uint8_t *buf;
+  const size_t len;
+  const size_t lenminusstep;
+  size_t idx;
+};
+
+// Routines to print masks and text for debugging bitmask operations
+simdjson_unused static char * format_input_text_64(const uint8_t *text) {
+  static char buf[sizeof(simd8x64<uint8_t>) + 1];
+  for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
+    buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]);
+  }
+  buf[sizeof(simd8x64<uint8_t>)] = '\0';
+  return buf;
+}
+
+// Routines to print masks and text for debugging bitmask operations
+simdjson_unused static char * format_input_text(const simd8x64<uint8_t>& in) {
+  static char buf[sizeof(simd8x64<uint8_t>) + 1];
+  in.store(reinterpret_cast<uint8_t*>(buf));
+  for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
+    if (buf[i] < ' ') { buf[i] = '_'; }
+  }
+  buf[sizeof(simd8x64<uint8_t>)] = '\0';
+  return buf;
+}
+
+simdjson_unused static char * format_mask(uint64_t mask) {
+  static char buf[sizeof(simd8x64<uint8_t>) + 1];
+  for (size_t i=0; i<64; i++) {
+    buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' ';
+  }
+  buf[64] = '\0';
+  return buf;
+}
+
+template<size_t STEP_SIZE>
+simdjson_inline buf_block_reader<STEP_SIZE>::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {}
+
+template<size_t STEP_SIZE>
+simdjson_inline size_t buf_block_reader<STEP_SIZE>::block_index() { return idx; }
+
+template<size_t STEP_SIZE>
+simdjson_inline bool buf_block_reader<STEP_SIZE>::has_full_block() const {
+  return idx < lenminusstep;
+}
+
+template<size_t STEP_SIZE>
+simdjson_inline const uint8_t *buf_block_reader<STEP_SIZE>::full_block() const {
+  return &buf[idx];
+}
+
+template<size_t STEP_SIZE>
+simdjson_inline size_t buf_block_reader<STEP_SIZE>::get_remainder(uint8_t *dst) const {
+  if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers
+  std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once.
+  std::memcpy(dst, buf + idx, len - idx);
+  return len - idx;
+}
+
+template<size_t STEP_SIZE>
+simdjson_inline void buf_block_reader<STEP_SIZE>::advance() {
+  idx += STEP_SIZE;
+}
+
+} // unnamed namespace
+} // namespace icelake
+} // namespace simdjson
+/* end file src/generic/stage1/buf_block_reader.h */
+/* begin file src/generic/stage1/json_string_scanner.h */
+namespace simdjson {
+namespace icelake {
+namespace {
+namespace stage1 {
+
+struct json_string_block {
+  // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017
+  simdjson_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) :
+  _backslash(backslash), _escaped(escaped), _quote(quote), _in_string(in_string) {}
+
+  // Escaped characters (characters following an escape() character)
+  simdjson_inline uint64_t escaped() const { return _escaped; }
+  // Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \)
+  simdjson_inline uint64_t escape() const { return _backslash & ~_escaped; }
+  // Real (non-backslashed) quotes
+  simdjson_inline uint64_t quote() const { return _quote; }
+  // Start quotes of strings
+  simdjson_inline uint64_t string_start() const { return _quote & _in_string; }
+  // End quotes of strings
+  simdjson_inline uint64_t string_end() const { return _quote & ~_in_string; }
+  // Only characters inside the string (not including the quotes)
+  simdjson_inline uint64_t string_content() const { return _in_string & ~_quote; }
+  // Return a mask of whether the given characters are inside a string (only works on non-quotes)
+  simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; }
+  // Return a mask of whether the given characters are inside a string (only works on non-quotes)
+  simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; }
+  // Tail of string (everything except the start quote)
+  simdjson_inline uint64_t string_tail() const { return _in_string ^ _quote; }
+
+  // backslash characters
+  uint64_t _backslash;
+  // escaped characters (backslashed--does not include the hex characters after \u)
+  uint64_t _escaped;
+  // real quotes (non-backslashed ones)
+  uint64_t _quote;
+  // string characters (includes start quote but not end quote)
+  uint64_t _in_string;
+};
+
+// Scans blocks for string characters, storing the state necessary to do so
+class json_string_scanner {
+public:
+  simdjson_inline json_string_block next(const simd::simd8x64<uint8_t>& in);
+  // Returns either UNCLOSED_STRING or SUCCESS
+  simdjson_inline error_code finish();
+
+private:
+  // Intended to be defined by the implementation
+  simdjson_inline uint64_t find_escaped(uint64_t escape);
+  simdjson_inline uint64_t find_escaped_branchless(uint64_t escape);
+
+  // Whether the last iteration was still inside a string (all 1's = true, all 0's = false).
+  uint64_t prev_in_string = 0ULL;
+  // Whether the first character of the next iteration is escaped.
+  uint64_t prev_escaped = 0ULL;
+};
+
+//
+// Finds escaped characters (characters following \).
+//
+// Handles runs of backslashes like \\\" and \\\\" correctly (yielding 0101 and 01010, respectively).
+//
+// Does this by:
+// - Shift the escape mask to get potentially escaped characters (characters after backslashes).
+// - Mask escaped sequences that start on *even* bits with 1010101010 (odd bits are escaped, even bits are not)
+// - Mask escaped sequences that start on *odd* bits with 0101010101 (even bits are escaped, odd bits are not)
+//
+// To distinguish between escaped sequences starting on even/odd bits, it finds the start of all
+// escape sequences, filters out the ones that start on even bits, and adds that to the mask of
+// escape sequences. This causes the addition to clear out the sequences starting on odd bits (since
+// the start bit causes a carry), and leaves even-bit sequences alone.
+//
+// Example:
+//
+// text           |  \\\ | \\\"\\\" \\\" \\"\\" |
+// escape         |  xxx |  xx xxx  xxx  xx xx  | Removed overflow backslash; will | it into follows_escape
+// odd_starts     |  x   |  x       x       x   | escape & ~even_bits & ~follows_escape
+// even_seq       |     c|    cxxx     c xx   c | c = carry bit -- will be masked out later
+// invert_mask    |      |     cxxx     c xx   c| even_seq << 1
+// follows_escape |   xx | x xx xxx  xxx  xx xx | Includes overflow bit
+// escaped        |   x  | x x  x x  x x  x  x  |
+// desired        |   x  | x x  x x  x x  x  x  |
+// text           |  \\\ | \\\"\\\" \\\" \\"\\" |
+//
+simdjson_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) {
+  // If there was overflow, pretend the first character isn't a backslash
+  backslash &= ~prev_escaped;
+  uint64_t follows_escape = backslash << 1 | prev_escaped;
+
+  // Get sequences starting on even bits by clearing out the odd series using +
+  const uint64_t even_bits = 0x5555555555555555ULL;
+  uint64_t odd_sequence_starts = backslash & ~even_bits & ~follows_escape;
+  uint64_t sequences_starting_on_even_bits;
+  prev_escaped = add_overflow(odd_sequence_starts, backslash, &sequences_starting_on_even_bits);
+  uint64_t invert_mask = sequences_starting_on_even_bits << 1; // The mask we want to return is the *escaped* bits, not escapes.
+
+  // Mask every other backslashed character as an escaped character
+  // Flip the mask for sequences that start on even bits, to correct them
+  return (even_bits ^ invert_mask) & follows_escape;
+}
+
+//
+// Return a mask of all string characters plus end quotes.
+//
+// prev_escaped is overflow saying whether the next character is escaped.
+// prev_in_string is overflow saying whether we're still in a string.
+//
+// Backslash sequences outside of quotes will be detected in stage 2.
+//
+simdjson_inline json_string_block json_string_scanner::next(const simd::simd8x64<uint8_t>& in) {
+  const uint64_t backslash = in.eq('\\');
+  const uint64_t escaped = find_escaped(backslash);
+  const uint64_t quote = in.eq('"') & ~escaped;
+
+  //
+  // prefix_xor flips on bits inside the string (and flips off the end quote).
+  //
+  // Then we xor with prev_in_string: if we were in a string already, its effect is flipped
+  // (characters inside strings are outside, and characters outside strings are inside).
+  //
+  const uint64_t in_string = prefix_xor(quote) ^ prev_in_string;
+
+  //
+  // Check if we're still in a string at the end of the box so the next block will know
+  //
+  // right shift of a signed value expected to be well-defined and standard
+  // compliant as of C++20, John Regher from Utah U. says this is fine code
+  //
+  prev_in_string = uint64_t(static_cast<int64_t>(in_string) >> 63);
+
+  // Use ^ to turn the beginning quote off, and the end quote on.
+
+  // We are returning a function-local object so either we get a move constructor
+  // or we get copy elision.
+  return json_string_block(
+    backslash,
+    escaped,
+    quote,
+    in_string
+  );
+}
+
+simdjson_inline error_code json_string_scanner::finish() {
+  if (prev_in_string) {
+    return UNCLOSED_STRING;
+  }
+  return SUCCESS;
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace icelake
+} // namespace simdjson
+/* end file src/generic/stage1/json_string_scanner.h */
+/* begin file src/generic/stage1/json_scanner.h */
+namespace simdjson {
+namespace icelake {
+namespace {
+namespace stage1 {
+
+/**
+ * A block of scanned json, with information on operators and scalars.
+ *
+ * We seek to identify pseudo-structural characters. Anything that is inside
+ * a string must be omitted (hence  & ~_string.string_tail()).
+ * Otherwise, pseudo-structural characters come in two forms.
+ * 1. We have the structural characters ([,],{,},:, comma). The
+ *    term 'structural character' is from the JSON RFC.
+ * 2. We have the 'scalar pseudo-structural characters'.
+ *    Scalars are quotes, and any character except structural characters and white space.
+ *
+ * To identify the scalar pseudo-structural characters, we must look at what comes
+ * before them: it must be a space, a quote or a structural characters.
+ * Starting with simdjson v0.3, we identify them by
+ * negation: we identify everything that is followed by a non-quote scalar,
+ * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'.
+ */
+struct json_block {
+public:
+  // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017
+  simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) :
+  _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {}
+  simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) :
+  _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {}
+
+  /**
+   * The start of structurals.
+   * In simdjson prior to v0.3, these were called the pseudo-structural characters.
+   **/
+  simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); }
+  /** All JSON whitespace (i.e. not in a string) */
+  simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); }
+
+  // Helpers
+
+  /** Whether the given characters are inside a string (only works on non-quotes) */
+  simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); }
+  /** Whether the given characters are outside a string (only works on non-quotes) */
+  simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); }
+
+  // string and escape characters
+  json_string_block _string;
+  // whitespace, structural characters ('operators'), scalars
+  json_character_block _characters;
+  // whether the previous character was a scalar
+  uint64_t _follows_potential_nonquote_scalar;
+private:
+  // Potential structurals (i.e. disregarding strings)
+
+  /**
+   * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc".
+   * They may reside inside a string.
+   **/
+  simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); }
+  /**
+   * The start of non-operator runs, like 123, true and "abc".
+   * It main reside inside a string.
+   **/
+  simdjson_inline uint64_t potential_scalar_start() const noexcept {
+    // The term "scalar" refers to anything except structural characters and white space
+    // (so letters, numbers, quotes).
+    // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space
+    // then we know that it is irrelevant structurally.
+    return _characters.scalar() & ~follows_potential_scalar();
+  }
+  /**
+   * Whether the given character is immediately after a non-operator like 123, true.
+   * The characters following a quote are not included.
+   */
+  simdjson_inline uint64_t follows_potential_scalar() const noexcept {
+    // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character
+    // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a
+    // white space.
+    // It is understood that within quoted region, anything at all could be marked (irrelevant).
+    return _follows_potential_nonquote_scalar;
+  }
+};
+
+/**
+ * Scans JSON for important bits: structural characters or 'operators', strings, and scalars.
+ *
+ * The scanner starts by calculating two distinct things:
+ * - string characters (taking \" into account)
+ * - structural characters or 'operators' ([]{},:, comma)
+ *   and scalars (runs of non-operators like 123, true and "abc")
+ *
+ * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel:
+ * in particular, the operator/scalar bit will find plenty of things that are actually part of
+ * strings. When we're done, json_block will fuse the two together by masking out tokens that are
+ * part of a string.
+ */
+class json_scanner {
+public:
+  json_scanner() = default;
+  simdjson_inline json_block next(const simd::simd8x64<uint8_t>& in);
+  // Returns either UNCLOSED_STRING or SUCCESS
+  simdjson_inline error_code finish();
+
+private:
+  // Whether the last character of the previous iteration is part of a scalar token
+  // (anything except whitespace or a structural character/'operator').
+  uint64_t prev_scalar = 0ULL;
+  json_string_scanner string_scanner{};
+};
+
+
+//
+// Check if the current character immediately follows a matching character.
+//
+// For example, this checks for quotes with backslashes in front of them:
+//
+//     const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash);
+//
+simdjson_inline uint64_t follows(const uint64_t match, uint64_t &overflow) {
+  const uint64_t result = match << 1 | overflow;
+  overflow = match >> 63;
+  return result;
+}
+
+simdjson_inline json_block json_scanner::next(const simd::simd8x64<uint8_t>& in) {
+  json_string_block strings = string_scanner.next(in);
+  // identifies the white-space and the structural characters
+  json_character_block characters = json_character_block::classify(in);
+  // The term "scalar" refers to anything except structural characters and white space
+  // (so letters, numbers, quotes).
+  // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers).
+  //
+  // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon)
+  // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential
+  // pseudo-structural character just like we would if we had  ' "a string" true '; otherwise we
+  // may need to add an extra check when parsing strings.
+  //
+  // Performance: there are many ways to skin this cat.
+  const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote();
+  uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar);
+  // We are returning a function-local object so either we get a move constructor
+  // or we get copy elision.
+  return json_block(
+    strings,// strings is a function-local object so either it moves or the copy is elided.
+    characters,
+    follows_nonquote_scalar
+  );
+}
+
+simdjson_inline error_code json_scanner::finish() {
+  return string_scanner.finish();
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace icelake
+} // namespace simdjson
+/* end file src/generic/stage1/json_scanner.h */
+/* begin file src/generic/stage1/json_minifier.h */
+// This file contains the common code every implementation uses in stage1
+// It is intended to be included multiple times and compiled multiple times
+// We assume the file in which it is included already includes
+// "simdjson/stage1.h" (this simplifies amalgation)
+
+namespace simdjson {
+namespace icelake {
+namespace {
+namespace stage1 {
+
+class json_minifier {
+public:
+  template<size_t STEP_SIZE>
+  static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept;
+
+private:
+  simdjson_inline json_minifier(uint8_t *_dst)
+  : dst{_dst}
+  {}
+  template<size_t STEP_SIZE>
+  simdjson_inline void step(const uint8_t *block_buf, buf_block_reader<STEP_SIZE> &reader) noexcept;
+  simdjson_inline void next(const simd::simd8x64<uint8_t>& in, const json_block& block);
+  simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len);
+  json_scanner scanner{};
+  uint8_t *dst;
+};
+
+simdjson_inline void json_minifier::next(const simd::simd8x64<uint8_t>& in, const json_block& block) {
+  uint64_t mask = block.whitespace();
+  dst += in.compress(mask, dst);
+}
+
+simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) {
+  error_code error = scanner.finish();
+  if (error) { dst_len = 0; return error; }
+  dst_len = dst - dst_start;
+  return SUCCESS;
+}
+
+template<>
+simdjson_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept {
+  simd::simd8x64<uint8_t> in_1(block_buf);
+  simd::simd8x64<uint8_t> in_2(block_buf+64);
+  json_block block_1 = scanner.next(in_1);
+  json_block block_2 = scanner.next(in_2);
+  this->next(in_1, block_1);
+  this->next(in_2, block_2);
+  reader.advance();
+}
+
+template<>
+simdjson_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept {
+  simd::simd8x64<uint8_t> in_1(block_buf);
+  json_block block_1 = scanner.next(in_1);
+  this->next(block_buf, block_1);
+  reader.advance();
+}
+
+template<size_t STEP_SIZE>
+error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept {
+  buf_block_reader<STEP_SIZE> reader(buf, len);
+  json_minifier minifier(dst);
+
+  // Index the first n-1 blocks
+  while (reader.has_full_block()) {
+    minifier.step<STEP_SIZE>(reader.full_block(), reader);
+  }
+
+  // Index the last (remainder) block, padded with spaces
+  uint8_t block[STEP_SIZE];
+  size_t remaining_bytes = reader.get_remainder(block);
+  if (remaining_bytes > 0) {
+    // We do not want to write directly to the output stream. Rather, we write
+    // to a local buffer (for safety).
+    uint8_t out_block[STEP_SIZE];
+    uint8_t * const guarded_dst{minifier.dst};
+    minifier.dst = out_block;
+    minifier.step<STEP_SIZE>(block, reader);
+    size_t to_write = minifier.dst - out_block;
+    // In some cases, we could be enticed to consider the padded spaces
+    // as part of the string. This is fine as long as we do not write more
+    // than we consumed.
+    if(to_write > remaining_bytes) { to_write = remaining_bytes; }
+    memcpy(guarded_dst, out_block, to_write);
+    minifier.dst = guarded_dst + to_write;
+  }
+  return minifier.finish(dst, dst_len);
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace icelake
+} // namespace simdjson
+/* end file src/generic/stage1/json_minifier.h */
+/* begin file src/generic/stage1/find_next_document_index.h */
+namespace simdjson {
+namespace icelake {
+namespace {
+
+/**
+  * This algorithm is used to quickly identify the last structural position that
+  * makes up a complete document.
+  *
+  * It does this by going backwards and finding the last *document boundary* (a
+  * place where one value follows another without a comma between them). If the
+  * last document (the characters after the boundary) has an equal number of
+  * start and end brackets, it is considered complete.
+  *
+  * Simply put, we iterate over the structural characters, starting from
+  * the end. We consider that we found the end of a JSON document when the
+  * first element of the pair is NOT one of these characters: '{' '[' ':' ','
+  * and when the second element is NOT one of these characters: '}' ']' ':' ','.
+  *
+  * This simple comparison works most of the time, but it does not cover cases
+  * where the batch's structural indexes contain a perfect amount of documents.
+  * In such a case, we do not have access to the structural index which follows
+  * the last document, therefore, we do not have access to the second element in
+  * the pair, and that means we cannot identify the last document. To fix this
+  * issue, we keep a count of the open and closed curly/square braces we found
+  * while searching for the pair. When we find a pair AND the count of open and
+  * closed curly/square braces is the same, we know that we just passed a
+  * complete document, therefore the last json buffer location is the end of the
+  * batch.
+  */
+simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) {
+  // Variant: do not count separately, just figure out depth
+  if(parser.n_structural_indexes == 0) { return 0; }
+  auto arr_cnt = 0;
+  auto obj_cnt = 0;
+  for (auto i = parser.n_structural_indexes - 1; i > 0; i--) {
+    auto idxb = parser.structural_indexes[i];
+    switch (parser.buf[idxb]) {
+    case ':':
+    case ',':
+      continue;
+    case '}':
+      obj_cnt--;
+      continue;
+    case ']':
+      arr_cnt--;
+      continue;
+    case '{':
+      obj_cnt++;
+      break;
+    case '[':
+      arr_cnt++;
+      break;
+    }
+    auto idxa = parser.structural_indexes[i - 1];
+    switch (parser.buf[idxa]) {
+    case '{':
+    case '[':
+    case ':':
+    case ',':
+      continue;
+    }
+    // Last document is complete, so the next document will appear after!
+    if (!arr_cnt && !obj_cnt) {
+      return parser.n_structural_indexes;
+    }
+    // Last document is incomplete; mark the document at i + 1 as the next one
+    return i;
+  }
+  // If we made it to the end, we want to finish counting to see if we have a full document.
+  switch (parser.buf[parser.structural_indexes[0]]) {
+    case '}':
+      obj_cnt--;
+      break;
+    case ']':
+      arr_cnt--;
+      break;
+    case '{':
+      obj_cnt++;
+      break;
+    case '[':
+      arr_cnt++;
+      break;
+  }
+  if (!arr_cnt && !obj_cnt) {
+    // We have a complete document.
+    return parser.n_structural_indexes;
+  }
+  return 0;
+}
+
+} // unnamed namespace
+} // namespace icelake
+} // namespace simdjson
+/* end file src/generic/stage1/find_next_document_index.h */
+
+namespace simdjson {
+namespace icelake {
+namespace {
+namespace stage1 {
+
+class bit_indexer {
+public:
+  uint32_t *tail;
+
+  simdjson_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {}
+
+  // flatten out values in 'bits' assuming that they are are to have values of idx
+  // plus their position in the bitvector, and store these indexes at
+  // base_ptr[base] incrementing base as we go
+  // will potentially store extra values beyond end of valid bits, so base_ptr
+  // needs to be large enough to handle this
+  //
+  // If the kernel sets SIMDJSON_CUSTOM_BIT_INDEXER, then it will provide its own
+  // version of the code.
+#ifdef SIMDJSON_CUSTOM_BIT_INDEXER
+  simdjson_inline void write(uint32_t idx, uint64_t bits);
+#else
+  simdjson_inline void write(uint32_t idx, uint64_t bits) {
+    // In some instances, the next branch is expensive because it is mispredicted.
+    // Unfortunately, in other cases,
+    // it helps tremendously.
+    if (bits == 0)
+        return;
+#if defined(SIMDJSON_PREFER_REVERSE_BITS)
+    /**
+     * ARM lacks a fast trailing zero instruction, but it has a fast
+     * bit reversal instruction and a fast leading zero instruction.
+     * Thus it may be profitable to reverse the bits (once) and then
+     * to rely on a sequence of instructions that call the leading
+     * zero instruction.
+     *
+     * Performance notes:
+     * The chosen routine is not optimal in terms of data dependency
+     * since zero_leading_bit might require two instructions. However,
+     * it tends to minimize the total number of instructions which is
+     * beneficial.
+     */
+
+    uint64_t rev_bits = reverse_bits(bits);
+    int cnt = static_cast<int>(count_ones(bits));
+    int i = 0;
+    // Do the first 8 all together
+    for (; i<8; i++) {
+      int lz = leading_zeroes(rev_bits);
+      this->tail[i] = static_cast<uint32_t>(idx) + lz;
+      rev_bits = zero_leading_bit(rev_bits, lz);
+    }
+    // Do the next 8 all together (we hope in most cases it won't happen at all
+    // and the branch is easily predicted).
+    if (simdjson_unlikely(cnt > 8)) {
+      i = 8;
+      for (; i<16; i++) {
+        int lz = leading_zeroes(rev_bits);
+        this->tail[i] = static_cast<uint32_t>(idx) + lz;
+        rev_bits = zero_leading_bit(rev_bits, lz);
+      }
+
+
+      // Most files don't have 16+ structurals per block, so we take several basically guaranteed
+      // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :)
+      // or the start of a value ("abc" true 123) every four characters.
+      if (simdjson_unlikely(cnt > 16)) {
+        i = 16;
+        while (rev_bits != 0) {
+          int lz = leading_zeroes(rev_bits);
+          this->tail[i++] = static_cast<uint32_t>(idx) + lz;
+          rev_bits = zero_leading_bit(rev_bits, lz);
+        }
+      }
+    }
+    this->tail += cnt;
+#else // SIMDJSON_PREFER_REVERSE_BITS
+    /**
+     * Under recent x64 systems, we often have both a fast trailing zero
+     * instruction and a fast 'clear-lower-bit' instruction so the following
+     * algorithm can be competitive.
+     */
+
+    int cnt = static_cast<int>(count_ones(bits));
+    // Do the first 8 all together
+    for (int i=0; i<8; i++) {
+      this->tail[i] = idx + trailing_zeroes(bits);
+      bits = clear_lowest_bit(bits);
+    }
+
+    // Do the next 8 all together (we hope in most cases it won't happen at all
+    // and the branch is easily predicted).
+    if (simdjson_unlikely(cnt > 8)) {
+      for (int i=8; i<16; i++) {
+        this->tail[i] = idx + trailing_zeroes(bits);
+        bits = clear_lowest_bit(bits);
+      }
+
+      // Most files don't have 16+ structurals per block, so we take several basically guaranteed
+      // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :)
+      // or the start of a value ("abc" true 123) every four characters.
+      if (simdjson_unlikely(cnt > 16)) {
+        int i = 16;
+        do {
+          this->tail[i] = idx + trailing_zeroes(bits);
+          bits = clear_lowest_bit(bits);
+          i++;
+        } while (i < cnt);
+      }
+    }
+
+    this->tail += cnt;
+#endif
+  }
+#endif // SIMDJSON_CUSTOM_BIT_INDEXER
+
+};
+
+class json_structural_indexer {
+public:
+  /**
+   * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes.
+   *
+   * @param partial Setting the partial parameter to true allows the find_structural_bits to
+   *   tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If
+   *   you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8.
+   */
+  template<size_t STEP_SIZE>
+  static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept;
+
+private:
+  simdjson_inline json_structural_indexer(uint32_t *structural_indexes);
+  template<size_t STEP_SIZE>
+  simdjson_inline void step(const uint8_t *block, buf_block_reader<STEP_SIZE> &reader) noexcept;
+  simdjson_inline void next(const simd::simd8x64<uint8_t>& in, const json_block& block, size_t idx);
+  simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial);
+
+  json_scanner scanner{};
+  utf8_checker checker{};
+  bit_indexer indexer;
+  uint64_t prev_structurals = 0;
+  uint64_t unescaped_chars_error = 0;
+};
+
+simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {}
+
+// Skip the last character if it is partial
+simdjson_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) {
+  if (simdjson_unlikely(len < 3)) {
+    switch (len) {
+      case 2:
+        if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left
+        if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 2 bytes left
+        return len;
+      case 1:
+        if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left
+        return len;
+      case 0:
+        return len;
+    }
+  }
+  if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left
+  if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 1 byte left
+  if (buf[len-3] >= 0xf0) { return len-3; } // 4-byte characters with only 3 bytes left
+  return len;
+}
+
+//
+// PERF NOTES:
+// We pipe 2 inputs through these stages:
+// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load
+//    2 inputs' worth at once so that by the time step 2 is looking for them input, it's available.
+// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path.
+//    The output of step 1 depends entirely on this information. These functions don't quite use
+//    up enough CPU: the second half of the functions is highly serial, only using 1 execution core
+//    at a time. The second input's scans has some dependency on the first ones finishing it, but
+//    they can make a lot of progress before they need that information.
+// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that
+//    to finish: utf-8 checks and generating the output from the last iteration.
+//
+// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all
+// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough
+// workout.
+//
+template<size_t STEP_SIZE>
+error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept {
+  if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; }
+  // We guard the rest of the code so that we can assume that len > 0 throughout.
+  if (len == 0) { return EMPTY; }
+  if (is_streaming(partial)) {
+    len = trim_partial_utf8(buf, len);
+    // If you end up with an empty window after trimming
+    // the partial UTF-8 bytes, then chances are good that you
+    // have an UTF-8 formatting error.
+    if(len == 0) { return UTF8_ERROR; }
+  }
+  buf_block_reader<STEP_SIZE> reader(buf, len);
+  json_structural_indexer indexer(parser.structural_indexes.get());
+
+  // Read all but the last block
+  while (reader.has_full_block()) {
+    indexer.step<STEP_SIZE>(reader.full_block(), reader);
+  }
+  // Take care of the last block (will always be there unless file is empty which is
+  // not supposed to happen.)
+  uint8_t block[STEP_SIZE];
+  if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; }
+  indexer.step<STEP_SIZE>(block, reader);
+  return indexer.finish(parser, reader.block_index(), len, partial);
+}
+
+template<>
+simdjson_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept {
+  simd::simd8x64<uint8_t> in_1(block);
+  simd::simd8x64<uint8_t> in_2(block+64);
+  json_block block_1 = scanner.next(in_1);
+  json_block block_2 = scanner.next(in_2);
+  this->next(in_1, block_1, reader.block_index());
+  this->next(in_2, block_2, reader.block_index()+64);
+  reader.advance();
+}
+
+template<>
+simdjson_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept {
+  simd::simd8x64<uint8_t> in_1(block);
+  json_block block_1 = scanner.next(in_1);
+  this->next(in_1, block_1, reader.block_index());
+  reader.advance();
+}
+
+simdjson_inline void json_structural_indexer::next(const simd::simd8x64<uint8_t>& in, const json_block& block, size_t idx) {
+  uint64_t unescaped = in.lteq(0x1F);
+  checker.check_next_input(in);
+  indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser
+  prev_structurals = block.structural_start();
+  unescaped_chars_error |= block.non_quote_inside_string(unescaped);
+}
+
+simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) {
+  // Write out the final iteration's structurals
+  indexer.write(uint32_t(idx-64), prev_structurals);
+  error_code error = scanner.finish();
+  // We deliberately break down the next expression so that it is
+  // human readable.
+  const bool should_we_exit = is_streaming(partial) ?
+    ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING
+    : (error != SUCCESS); // if partial is false, we must have SUCCESS
+  const bool have_unclosed_string = (error == UNCLOSED_STRING);
+  if (simdjson_unlikely(should_we_exit)) { return error; }
+
+  if (unescaped_chars_error) {
+    return UNESCAPED_CHARS;
+  }
+  parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get());
+  /***
+   * The On Demand API requires special padding.
+   *
+   * This is related to https://github.com/simdjson/simdjson/issues/906
+   * Basically, we want to make sure that if the parsing continues beyond the last (valid)
+   * structural character, it quickly stops.
+   * Only three structural characters can be repeated without triggering an error in JSON:  [,] and }.
+   * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing
+   * continues, then it must be [,] or }.
+   * Suppose it is ] or }. We backtrack to the first character, what could it be that would
+   * not trigger an error? It could be ] or } but no, because you can't start a document that way.
+   * It can't be a comma, a colon or any simple value. So the only way we could continue is
+   * if the repeated character is [. But if so, the document must start with [. But if the document
+   * starts with [, it should end with ]. If we enforce that rule, then we would get
+   * ][[ which is invalid.
+   *
+   * This is illustrated with the test array_iterate_unclosed_error() on the following input:
+   * R"({ "a": [,,)"
+   **/
+  parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final
+  parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len);
+  parser.structural_indexes[parser.n_structural_indexes + 2] = 0;
+  parser.next_structural_index = 0;
+  // a valid JSON file cannot have zero structural indexes - we should have found something
+  if (simdjson_unlikely(parser.n_structural_indexes == 0u)) {
+    return EMPTY;
+  }
+  if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) {
+    return UNEXPECTED_ERROR;
+  }
+  if (partial == stage1_mode::streaming_partial) {
+    // If we have an unclosed string, then the last structural
+    // will be the quote and we want to make sure to omit it.
+    if(have_unclosed_string) {
+      parser.n_structural_indexes--;
+      // a valid JSON file cannot have zero structural indexes - we should have found something
+      if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; }
+    }
+    // We truncate the input to the end of the last complete document (or zero).
+    auto new_structural_indexes = find_next_document_index(parser);
+    if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) {
+      if(parser.structural_indexes[0] == 0) {
+        // If the buffer is partial and we started at index 0 but the document is
+        // incomplete, it's too big to parse.
+        return CAPACITY;
+      } else {
+        // It is possible that the document could be parsed, we just had a lot
+        // of white space.
+        parser.n_structural_indexes = 0;
+        return EMPTY;
+      }
+    }
+
+    parser.n_structural_indexes = new_structural_indexes;
+  } else if (partial == stage1_mode::streaming_final) {
+    if(have_unclosed_string) { parser.n_structural_indexes--; }
+    // We truncate the input to the end of the last complete document (or zero).
+    // Because partial == stage1_mode::streaming_final, it means that we may
+    // silently ignore trailing garbage. Though it sounds bad, we do it
+    // deliberately because many people who have streams of JSON documents
+    // will truncate them for processing. E.g., imagine that you are uncompressing
+    // the data from a size file or receiving it in chunks from the network. You
+    // may not know where exactly the last document will be. Meanwhile the
+    // document_stream instances allow people to know the JSON documents they are
+    // parsing (see the iterator.source() method).
+    parser.n_structural_indexes = find_next_document_index(parser);
+    // We store the initial n_structural_indexes so that the client can see
+    // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes,
+    // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len,
+    // otherwise, it will copy some prior index.
+    parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes];
+    // This next line is critical, do not change it unless you understand what you are
+    // doing.
+    parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len);
+    if (simdjson_unlikely(parser.n_structural_indexes == 0u)) {
+        // We tolerate an unclosed string at the very end of the stream. Indeed, users
+        // often load their data in bulk without being careful and they want us to ignore
+        // the trailing garbage.
+        return EMPTY;
+    }
+  }
+  checker.check_eof();
+  return checker.errors();
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace icelake
+} // namespace simdjson
+/* end file src/generic/stage1/json_structural_indexer.h */
+// We must not forget to undefine it now:
+#undef SIMDJSON_CUSTOM_BIT_INDEXER
+
+/**
+ * We provide a custom version of bit_indexer::write using
+ * naked intrinsics.
+ * TODO: make this code more elegant.
+ */
+// Under GCC 12, the intrinsic _mm512_extracti32x4_epi32 may generate 'maybe uninitialized'.
+// as a workaround, we disable warnings within the following function.
+SIMDJSON_PUSH_DISABLE_ALL_WARNINGS
+namespace simdjson { namespace icelake { namespace { namespace stage1 {
+simdjson_inline void bit_indexer::write(uint32_t idx, uint64_t bits) {
+    // In some instances, the next branch is expensive because it is mispredicted.
+    // Unfortunately, in other cases,
+    // it helps tremendously.
+    if (bits == 0) { return; }
+
+    const __m512i indexes = _mm512_maskz_compress_epi8(bits, _mm512_set_epi32(
+      0x3f3e3d3c, 0x3b3a3938, 0x37363534, 0x33323130,
+      0x2f2e2d2c, 0x2b2a2928, 0x27262524, 0x23222120,
+      0x1f1e1d1c, 0x1b1a1918, 0x17161514, 0x13121110,
+      0x0f0e0d0c, 0x0b0a0908, 0x07060504, 0x03020100
+    ));
+    const __m512i start_index = _mm512_set1_epi32(idx);
+
+    const auto count = count_ones(bits);
+    __m512i t0 = _mm512_cvtepu8_epi32(_mm512_castsi512_si128(indexes));
+    _mm512_storeu_si512(this->tail, _mm512_add_epi32(t0, start_index));
+
+    if(count > 16) {
+      const __m512i t1 = _mm512_cvtepu8_epi32(_mm512_extracti32x4_epi32(indexes, 1));
+      _mm512_storeu_si512(this->tail + 16, _mm512_add_epi32(t1, start_index));
+      if(count > 32) {
+        const __m512i t2 = _mm512_cvtepu8_epi32(_mm512_extracti32x4_epi32(indexes, 2));
+        _mm512_storeu_si512(this->tail + 32, _mm512_add_epi32(t2, start_index));
+        if(count > 48) {
+          const __m512i t3 = _mm512_cvtepu8_epi32(_mm512_extracti32x4_epi32(indexes, 3));
+          _mm512_storeu_si512(this->tail + 48, _mm512_add_epi32(t3, start_index));
+        }
+      }
+    }
+    this->tail += count;
+}
+}}}}
+SIMDJSON_POP_DISABLE_WARNINGS
+
+/* begin file src/generic/stage1/utf8_validator.h */
+namespace simdjson {
+namespace icelake {
+namespace {
+namespace stage1 {
+
+/**
+ * Validates that the string is actual UTF-8.
+ */
+template<class checker>
+bool generic_validate_utf8(const uint8_t * input, size_t length) {
+    checker c{};
+    buf_block_reader<64> reader(input, length);
+    while (reader.has_full_block()) {
+      simd::simd8x64<uint8_t> in(reader.full_block());
+      c.check_next_input(in);
+      reader.advance();
+    }
+    uint8_t block[64]{};
+    reader.get_remainder(block);
+    simd::simd8x64<uint8_t> in(block);
+    c.check_next_input(in);
+    reader.advance();
+    c.check_eof();
+    return c.errors() == error_code::SUCCESS;
+}
+
+bool generic_validate_utf8(const char * input, size_t length) {
+    return generic_validate_utf8<utf8_checker>(reinterpret_cast<const uint8_t *>(input),length);
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace icelake
+} // namespace simdjson
+/* end file src/generic/stage1/utf8_validator.h */
+
+//
+// Stage 2
+//
+/* begin file src/generic/stage2/stringparsing.h */
+// This file contains the common code every implementation uses
+// It is intended to be included multiple times and compiled multiple times
+
+namespace simdjson {
+namespace icelake {
+namespace {
+/// @private
+namespace stringparsing {
+
+// begin copypasta
+// These chars yield themselves: " \ /
+// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab
+// u not handled in this table as it's complex
+static const uint8_t escape_map[256] = {
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0, // 0x0.
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0x22, 0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0x2f,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0, // 0x4.
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0x5c, 0, 0,    0, // 0x5.
+    0, 0, 0x08, 0, 0,    0, 0x0c, 0, 0, 0, 0, 0, 0,    0, 0x0a, 0, // 0x6.
+    0, 0, 0x0d, 0, 0x09, 0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0, // 0x7.
+
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+};
+
+// handle a unicode codepoint
+// write appropriate values into dest
+// src will advance 6 bytes or 12 bytes
+// dest will advance a variable amount (return via pointer)
+// return true if the unicode codepoint was valid
+// We work in little-endian then swap at write time
+simdjson_warn_unused
+simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr,
+                                            uint8_t **dst_ptr) {
+  // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the
+  // conversion isn't valid; we defer the check for this to inside the
+  // multilingual plane check
+  uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2);
+  *src_ptr += 6;
+
+  // If we found a high surrogate, we must
+  // check for low surrogate for characters
+  // outside the Basic
+  // Multilingual Plane.
+  if (code_point >= 0xd800 && code_point < 0xdc00) {
+    const uint8_t *src_data = *src_ptr;
+    /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */
+    if (((src_data[0] << 8) | src_data[1]) != ((static_cast<uint8_t> ('\\') << 8) | static_cast<uint8_t> ('u'))) {
+      return false;
+    }
+    uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2);
+
+    // We have already checked that the high surrogate is valid and
+    // (code_point - 0xd800) < 1024.
+    //
+    // Check that code_point_2 is in the range 0xdc00..0xdfff
+    // and that code_point_2 was parsed from valid hex.
+    uint32_t low_bit = code_point_2 - 0xdc00;
+    if (low_bit >> 10) {
+      return false;
+    }
+
+    code_point =
+        (((code_point - 0xd800) << 10) | low_bit) + 0x10000;
+    *src_ptr += 6;
+  } else if (code_point >= 0xdc00 && code_point <= 0xdfff) {
+      // If we encounter a low surrogate (not preceded by a high surrogate)
+      // then we have an error.
+      return false;
+  }
+  size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr);
+  *dst_ptr += offset;
+  return offset > 0;
+}
+
+/**
+ * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There
+ * must be an unescaped quote terminating the string. It returns the final output
+ * position as pointer. In case of error (e.g., the string has bad escaped codes),
+ * then null_nullptrptr is returned. It is assumed that the output buffer is large
+ * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes +
+ * SIMDJSON_PADDING bytes.
+ */
+simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) {
+  while (1) {
+    // Copy the next n bytes, and find the backslash and quote in them.
+    auto bs_quote = backslash_and_quote::copy_and_find(src, dst);
+    // If the next thing is the end quote, copy and return
+    if (bs_quote.has_quote_first()) {
+      // we encountered quotes first. Move dst to point to quotes and exit
+      return dst + bs_quote.quote_index();
+    }
+    if (bs_quote.has_backslash()) {
+      /* find out where the backspace is */
+      auto bs_dist = bs_quote.backslash_index();
+      uint8_t escape_char = src[bs_dist + 1];
+      /* we encountered backslash first. Handle backslash */
+      if (escape_char == 'u') {
+        /* move src/dst up to the start; they will be further adjusted
+           within the unicode codepoint handling code. */
+        src += bs_dist;
+        dst += bs_dist;
+        if (!handle_unicode_codepoint(&src, &dst)) {
+          return nullptr;
+        }
+      } else {
+        /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and
+         * write bs_dist+1 characters to output
+         * note this may reach beyond the part of the buffer we've actually
+         * seen. I think this is ok */
+        uint8_t escape_result = escape_map[escape_char];
+        if (escape_result == 0u) {
+          return nullptr; /* bogus escape value is an error */
+        }
+        dst[bs_dist] = escape_result;
+        src += bs_dist + 2;
+        dst += bs_dist + 1;
+      }
+    } else {
+      /* they are the same. Since they can't co-occur, it means we
+       * encountered neither. */
+      src += backslash_and_quote::BYTES_PROCESSED;
+      dst += backslash_and_quote::BYTES_PROCESSED;
+    }
+  }
+  /* can't be reached */
+  return nullptr;
+}
+
+} // namespace stringparsing
+} // unnamed namespace
+} // namespace icelake
+} // namespace simdjson
+/* end file src/generic/stage2/stringparsing.h */
+/* begin file src/generic/stage2/tape_builder.h */
+/* begin file src/generic/stage2/json_iterator.h */
+/* begin file src/generic/stage2/logger.h */
+// This is for an internal-only stage 2 specific logger.
+// Set LOG_ENABLED = true to log what stage 2 is doing!
+namespace simdjson {
+namespace icelake {
+namespace {
+namespace logger {
+
+  static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------";
+
+#if SIMDJSON_VERBOSE_LOGGING
+  static constexpr const bool LOG_ENABLED = true;
+#else
+  static constexpr const bool LOG_ENABLED = false;
+#endif
+  static constexpr const int LOG_EVENT_LEN = 20;
+  static constexpr const int LOG_BUFFER_LEN = 30;
+  static constexpr const int LOG_SMALL_BUFFER_LEN = 10;
+  static constexpr const int LOG_INDEX_LEN = 5;
+
+  static int log_depth; // Not threadsafe. Log only.
+
+  // Helper to turn unprintable or newline characters into spaces
+  static simdjson_inline char printable_char(char c) {
+    if (c >= 0x20) {
+      return c;
+    } else {
+      return ' ';
+    }
+  }
+
+  // Print the header and set up log_start
+  static simdjson_inline void log_start() {
+    if (LOG_ENABLED) {
+      log_depth = 0;
+      printf("\n");
+      printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#");
+      printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES);
+    }
+  }
+
+  simdjson_unused static simdjson_inline void log_string(const char *message) {
+    if (LOG_ENABLED) {
+      printf("%s\n", message);
+    }
+  }
+
+  // Logs a single line from the stage 2 DOM parser
+  template<typename S>
+  static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) {
+    if (LOG_ENABLED) {
+      printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title);
+      auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1;
+      auto next_index = structurals.next_structural;
+      auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast<const uint8_t*>("                                                       ");
+      auto next = &structurals.buf[*next_index];
+      {
+        // Print the next N characters in the buffer.
+        printf("| ");
+        // Otherwise, print the characters starting from the buffer position.
+        // Print spaces for unprintable or newline characters.
+        for (int i=0;i<LOG_BUFFER_LEN;i++) {
+          printf("%c", printable_char(current[i]));
+        }
+        printf(" ");
+        // Print the next N characters in the buffer.
+        printf("| ");
+        // Otherwise, print the characters starting from the buffer position.
+        // Print spaces for unprintable or newline characters.
+        for (int i=0;i<LOG_SMALL_BUFFER_LEN;i++) {
+          printf("%c", printable_char(next[i]));
+        }
+        printf(" ");
+      }
+      if (current_index) {
+        printf("| %*u ", LOG_INDEX_LEN, *current_index);
+      } else {
+        printf("| %-*s ", LOG_INDEX_LEN, "");
+      }
+      // printf("| %*u ", LOG_INDEX_LEN, structurals.next_tape_index());
+      printf("| %-s ", detail);
+      printf("|\n");
+    }
+  }
+
+} // namespace logger
+} // unnamed namespace
+} // namespace icelake
+} // namespace simdjson
+/* end file src/generic/stage2/logger.h */
+
+namespace simdjson {
+namespace icelake {
+namespace {
+namespace stage2 {
+
+class json_iterator {
+public:
+  const uint8_t* const buf;
+  uint32_t *next_structural;
+  dom_parser_implementation &dom_parser;
+  uint32_t depth{0};
+
+  /**
+   * Walk the JSON document.
+   *
+   * The visitor receives callbacks when values are encountered. All callbacks pass the iterator as
+   * the first parameter; some callbacks have other parameters as well:
+   *
+   * - visit_document_start() - at the beginning.
+   * - visit_document_end() - at the end (if things were successful).
+   *
+   * - visit_array_start() - at the start `[` of a non-empty array.
+   * - visit_array_end() - at the end `]` of a non-empty array.
+   * - visit_empty_array() - when an empty array is encountered.
+   *
+   * - visit_object_end() - at the start `]` of a non-empty object.
+   * - visit_object_start() - at the end `]` of a non-empty object.
+   * - visit_empty_object() - when an empty object is encountered.
+   * - visit_key(const uint8_t *key) - when a key in an object field is encountered. key is
+   *                                   guaranteed to point at the first quote of the string (`"key"`).
+   * - visit_primitive(const uint8_t *value) - when a value is a string, number, boolean or null.
+   * - visit_root_primitive(iter, uint8_t *value) - when the top-level value is a string, number, boolean or null.
+   *
+   * - increment_count(iter) - each time a value is found in an array or object.
+   */
+  template<bool STREAMING, typename V>
+  simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept;
+
+  /**
+   * Create an iterator capable of walking a JSON document.
+   *
+   * The document must have already passed through stage 1.
+   */
+  simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index);
+
+  /**
+   * Look at the next token.
+   *
+   * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)).
+   *
+   * They may include invalid JSON as well (such as `1.2.3` or `ture`).
+   */
+  simdjson_inline const uint8_t *peek() const noexcept;
+  /**
+   * Advance to the next token.
+   *
+   * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)).
+   *
+   * They may include invalid JSON as well (such as `1.2.3` or `ture`).
+   */
+  simdjson_inline const uint8_t *advance() noexcept;
+  /**
+   * Get the remaining length of the document, from the start of the current token.
+   */
+  simdjson_inline size_t remaining_len() const noexcept;
+  /**
+   * Check if we are at the end of the document.
+   *
+   * If this is true, there are no more tokens.
+   */
+  simdjson_inline bool at_eof() const noexcept;
+  /**
+   * Check if we are at the beginning of the document.
+   */
+  simdjson_inline bool at_beginning() const noexcept;
+  simdjson_inline uint8_t last_structural() const noexcept;
+
+  /**
+   * Log that a value has been found.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_value(const char *type) const noexcept;
+  /**
+   * Log the start of a multipart value.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_start_value(const char *type) const noexcept;
+  /**
+   * Log the end of a multipart value.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_end_value(const char *type) const noexcept;
+  /**
+   * Log an error.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_error(const char *error) const noexcept;
+
+  template<typename V>
+  simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept;
+  template<typename V>
+  simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept;
+};
+
+template<bool STREAMING, typename V>
+simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept {
+  logger::log_start();
+
+  //
+  // Start the document
+  //
+  if (at_eof()) { return EMPTY; }
+  log_start_value("document");
+  SIMDJSON_TRY( visitor.visit_document_start(*this) );
+
+  //
+  // Read first value
+  //
+  {
+    auto value = advance();
+
+    // Make sure the outer object or array is closed before continuing; otherwise, there are ways we
+    // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906
+    if (!STREAMING) {
+      switch (*value) {
+        case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break;
+        case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break;
+      }
+    }
+
+    switch (*value) {
+      case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin;
+      case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin;
+      default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break;
+    }
+  }
+  goto document_end;
+
+//
+// Object parser states
+//
+object_begin:
+  log_start_value("object");
+  depth++;
+  if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; }
+  dom_parser.is_array[depth] = false;
+  SIMDJSON_TRY( visitor.visit_object_start(*this) );
+
+  {
+    auto key = advance();
+    if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; }
+    SIMDJSON_TRY( visitor.increment_count(*this) );
+    SIMDJSON_TRY( visitor.visit_key(*this, key) );
+  }
+
+object_field:
+  if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; }
+  {
+    auto value = advance();
+    switch (*value) {
+      case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin;
+      case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin;
+      default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break;
+    }
+  }
+
+object_continue:
+  switch (*advance()) {
+    case ',':
+      SIMDJSON_TRY( visitor.increment_count(*this) );
+      {
+        auto key = advance();
+        if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; }
+        SIMDJSON_TRY( visitor.visit_key(*this, key) );
+      }
+      goto object_field;
+    case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end;
+    default: log_error("No comma between object fields"); return TAPE_ERROR;
+  }
+
+scope_end:
+  depth--;
+  if (depth == 0) { goto document_end; }
+  if (dom_parser.is_array[depth]) { goto array_continue; }
+  goto object_continue;
+
+//
+// Array parser states
+//
+array_begin:
+  log_start_value("array");
+  depth++;
+  if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; }
+  dom_parser.is_array[depth] = true;
+  SIMDJSON_TRY( visitor.visit_array_start(*this) );
+  SIMDJSON_TRY( visitor.increment_count(*this) );
+
+array_value:
+  {
+    auto value = advance();
+    switch (*value) {
+      case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin;
+      case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin;
+      default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break;
+    }
+  }
+
+array_continue:
+  switch (*advance()) {
+    case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value;
+    case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end;
+    default: log_error("Missing comma between array values"); return TAPE_ERROR;
+  }
+
+document_end:
+  log_end_value("document");
+  SIMDJSON_TRY( visitor.visit_document_end(*this) );
+
+  dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]);
+
+  // If we didn't make it to the end, it's an error
+  if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) {
+    log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!");
+    return TAPE_ERROR;
+  }
+
+  return SUCCESS;
+
+} // walk_document()
+
+simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index)
+  : buf{_dom_parser.buf},
+    next_structural{&_dom_parser.structural_indexes[start_structural_index]},
+    dom_parser{_dom_parser} {
+}
+
+simdjson_inline const uint8_t *json_iterator::peek() const noexcept {
+  return &buf[*(next_structural)];
+}
+simdjson_inline const uint8_t *json_iterator::advance() noexcept {
+  return &buf[*(next_structural++)];
+}
+simdjson_inline size_t json_iterator::remaining_len() const noexcept {
+  return dom_parser.len - *(next_structural-1);
+}
+
+simdjson_inline bool json_iterator::at_eof() const noexcept {
+  return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes];
+}
+simdjson_inline bool json_iterator::at_beginning() const noexcept {
+  return next_structural == dom_parser.structural_indexes.get();
+}
+simdjson_inline uint8_t json_iterator::last_structural() const noexcept {
+  return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]];
+}
+
+simdjson_inline void json_iterator::log_value(const char *type) const noexcept {
+  logger::log_line(*this, "", type, "");
+}
+
+simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept {
+  logger::log_line(*this, "+", type, "");
+  if (logger::LOG_ENABLED) { logger::log_depth++; }
+}
+
+simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept {
+  if (logger::LOG_ENABLED) { logger::log_depth--; }
+  logger::log_line(*this, "-", type, "");
+}
+
+simdjson_inline void json_iterator::log_error(const char *error) const noexcept {
+  logger::log_line(*this, "", "ERROR", error);
+}
+
+template<typename V>
+simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept {
+  switch (*value) {
+    case '"': return visitor.visit_root_string(*this, value);
+    case 't': return visitor.visit_root_true_atom(*this, value);
+    case 'f': return visitor.visit_root_false_atom(*this, value);
+    case 'n': return visitor.visit_root_null_atom(*this, value);
+    case '-':
+    case '0': case '1': case '2': case '3': case '4':
+    case '5': case '6': case '7': case '8': case '9':
+      return visitor.visit_root_number(*this, value);
+    default:
+      log_error("Document starts with a non-value character");
+      return TAPE_ERROR;
+  }
+}
+template<typename V>
+simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept {
+  switch (*value) {
+    case '"': return visitor.visit_string(*this, value);
+    case 't': return visitor.visit_true_atom(*this, value);
+    case 'f': return visitor.visit_false_atom(*this, value);
+    case 'n': return visitor.visit_null_atom(*this, value);
+    case '-':
+    case '0': case '1': case '2': case '3': case '4':
+    case '5': case '6': case '7': case '8': case '9':
+      return visitor.visit_number(*this, value);
+    default:
+      log_error("Non-value found when value was expected!");
+      return TAPE_ERROR;
+  }
+}
+
+} // namespace stage2
+} // unnamed namespace
+} // namespace icelake
+} // namespace simdjson
+/* end file src/generic/stage2/json_iterator.h */
+/* begin file src/generic/stage2/tape_writer.h */
+namespace simdjson {
+namespace icelake {
+namespace {
+namespace stage2 {
+
+struct tape_writer {
+  /** The next place to write to tape */
+  uint64_t *next_tape_loc;
+
+  /** Write a signed 64-bit value to tape. */
+  simdjson_inline void append_s64(int64_t value) noexcept;
+
+  /** Write an unsigned 64-bit value to tape. */
+  simdjson_inline void append_u64(uint64_t value) noexcept;
+
+  /** Write a double value to tape. */
+  simdjson_inline void append_double(double value) noexcept;
+
+  /**
+   * Append a tape entry (an 8-bit type,and 56 bits worth of value).
+   */
+  simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept;
+
+  /**
+   * Skip the current tape entry without writing.
+   *
+   * Used to skip the start of the container, since we'll come back later to fill it in when the
+   * container ends.
+   */
+  simdjson_inline void skip() noexcept;
+
+  /**
+   * Skip the number of tape entries necessary to write a large u64 or i64.
+   */
+  simdjson_inline void skip_large_integer() noexcept;
+
+  /**
+   * Skip the number of tape entries necessary to write a double.
+   */
+  simdjson_inline void skip_double() noexcept;
+
+  /**
+   * Write a value to a known location on tape.
+   *
+   * Used to go back and write out the start of a container after the container ends.
+   */
+  simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept;
+
+private:
+  /**
+   * Append both the tape entry, and a supplementary value following it. Used for types that need
+   * all 64 bits, such as double and uint64_t.
+   */
+  template<typename T>
+  simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept;
+}; // struct number_writer
+
+simdjson_inline void tape_writer::append_s64(int64_t value) noexcept {
+  append2(0, value, internal::tape_type::INT64);
+}
+
+simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept {
+  append(0, internal::tape_type::UINT64);
+  *next_tape_loc = value;
+  next_tape_loc++;
+}
+
+/** Write a double value to tape. */
+simdjson_inline void tape_writer::append_double(double value) noexcept {
+  append2(0, value, internal::tape_type::DOUBLE);
+}
+
+simdjson_inline void tape_writer::skip() noexcept {
+  next_tape_loc++;
+}
+
+simdjson_inline void tape_writer::skip_large_integer() noexcept {
+  next_tape_loc += 2;
+}
+
+simdjson_inline void tape_writer::skip_double() noexcept {
+  next_tape_loc += 2;
+}
+
+simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept {
+  *next_tape_loc = val | ((uint64_t(char(t))) << 56);
+  next_tape_loc++;
+}
+
+template<typename T>
+simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept {
+  append(val, t);
+  static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!");
+  memcpy(next_tape_loc, &val2, sizeof(val2));
+  next_tape_loc++;
+}
+
+simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept {
+  tape_loc = val | ((uint64_t(char(t))) << 56);
+}
+
+} // namespace stage2
+} // unnamed namespace
+} // namespace icelake
+} // namespace simdjson
+/* end file src/generic/stage2/tape_writer.h */
+
+namespace simdjson {
+namespace icelake {
+namespace {
+namespace stage2 {
+
+struct tape_builder {
+  template<bool STREAMING>
+  simdjson_warn_unused static simdjson_inline error_code parse_document(
+    dom_parser_implementation &dom_parser,
+    dom::document &doc) noexcept;
+
+  /** Called when a non-empty document starts. */
+  simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept;
+  /** Called when a non-empty document ends without error. */
+  simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept;
+
+  /** Called when a non-empty array starts. */
+  simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept;
+  /** Called when a non-empty array ends. */
+  simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept;
+  /** Called when an empty array is found. */
+  simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept;
+
+  /** Called when a non-empty object starts. */
+  simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept;
+  /**
+   * Called when a key in a field is encountered.
+   *
+   * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array
+   * will be called after this with the field value.
+   */
+  simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept;
+  /** Called when a non-empty object ends. */
+  simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept;
+  /** Called when an empty object is found. */
+  simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept;
+
+  /**
+   * Called when a string, number, boolean or null is found.
+   */
+  simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept;
+  /**
+   * Called when a string, number, boolean or null is found at the top level of a document (i.e.
+   * when there is no array or object and the entire document is a single string, number, boolean or
+   * null.
+   *
+   * This is separate from primitive() because simdjson's normal primitive parsing routines assume
+   * there is at least one more token after the value, which is only true in an array or object.
+   */
+  simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept;
+
+  simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept;
+
+  simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept;
+
+  /** Called each time a new field or element in an array or object is found. */
+  simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept;
+
+  /** Next location to write to tape */
+  tape_writer tape;
+private:
+  /** Next write location in the string buf for stage 2 parsing */
+  uint8_t *current_string_buf_loc;
+
+  simdjson_inline tape_builder(dom::document &doc) noexcept;
+
+  simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept;
+  simdjson_inline void start_container(json_iterator &iter) noexcept;
+  simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept;
+  simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept;
+  simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept;
+  simdjson_inline void on_end_string(uint8_t *dst) noexcept;
+}; // class tape_builder
+
+template<bool STREAMING>
+simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document(
+    dom_parser_implementation &dom_parser,
+    dom::document &doc) noexcept {
+  dom_parser.doc = &doc;
+  json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0);
+  tape_builder builder(doc);
+  return iter.walk_document<STREAMING>(builder);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept {
+  return iter.visit_root_primitive(*this, value);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept {
+  return iter.visit_primitive(*this, value);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept {
+  return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept {
+  return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept {
+  start_container(iter);
+  return SUCCESS;
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept {
+  start_container(iter);
+  return SUCCESS;
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept {
+  start_container(iter);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept {
+  return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept {
+  return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept {
+  constexpr uint32_t start_tape_index = 0;
+  tape.append(start_tape_index, internal::tape_type::ROOT);
+  tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT);
+  return SUCCESS;
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept {
+  return visit_string(iter, key, true);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept {
+  iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1
+  return SUCCESS;
+}
+
+simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept {
+  iter.log_value(key ? "key" : "string");
+  uint8_t *dst = on_start_string(iter);
+  dst = stringparsing::parse_string(value+1, dst);
+  if (dst == nullptr) {
+    iter.log_error("Invalid escape in string");
+    return STRING_ERROR;
+  }
+  on_end_string(dst);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept {
+  return visit_string(iter, value);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("number");
+  return numberparsing::parse_number(value, tape);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept {
+  //
+  // We need to make a copy to make sure that the string is space terminated.
+  // This is not about padding the input, which should already padded up
+  // to len + SIMDJSON_PADDING. However, we have no control at this stage
+  // on how the padding was done. What if the input string was padded with nulls?
+  // It is quite common for an input string to have an extra null character (C string).
+  // We do not want to allow 9\0 (where \0 is the null character) inside a JSON
+  // document, but the string "9\0" by itself is fine. So we make a copy and
+  // pad the input with spaces when we know that there is just one input element.
+  // This copy is relatively expensive, but it will almost never be called in
+  // practice unless you are in the strange scenario where you have many JSON
+  // documents made of single atoms.
+  //
+  std::unique_ptr<uint8_t[]>copy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]);
+  if (copy.get() == nullptr) { return MEMALLOC; }
+  std::memcpy(copy.get(), value, iter.remaining_len());
+  std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING);
+  error_code error = visit_number(iter, copy.get());
+  return error;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("true");
+  if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::TRUE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("true");
+  if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::TRUE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("false");
+  if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::FALSE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("false");
+  if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::FALSE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("null");
+  if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::NULL_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("null");
+  if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::NULL_VALUE);
+  return SUCCESS;
+}
+
+// private:
+
+simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept {
+  return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get());
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept {
+  auto start_index = next_tape_index(iter);
+  tape.append(start_index+2, start);
+  tape.append(start_index, end);
+  return SUCCESS;
+}
+
+simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept {
+  iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter);
+  iter.dom_parser.open_containers[iter.depth].count = 0;
+  tape.skip(); // We don't actually *write* the start element until the end.
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept {
+  // Write the ending tape element, pointing at the start location
+  const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index;
+  tape.append(start_tape_index, end);
+  // Write the start tape element, pointing at the end location (and including count)
+  // count can overflow if it exceeds 24 bits... so we saturate
+  // the convention being that a cnt of 0xffffff or more is undetermined in value (>=  0xffffff).
+  const uint32_t count = iter.dom_parser.open_containers[iter.depth].count;
+  const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count;
+  tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start);
+  return SUCCESS;
+}
+
+simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept {
+  // we advance the point, accounting for the fact that we have a NULL termination
+  tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING);
+  return current_string_buf_loc + sizeof(uint32_t);
+}
+
+simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept {
+  uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t)));
+  // TODO check for overflow in case someone has a crazy string (>=4GB?)
+  // But only add the overflow check when the document itself exceeds 4GB
+  // Currently unneeded because we refuse to parse docs larger or equal to 4GB.
+  memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t));
+  // NULL termination is still handy if you expect all your strings to
+  // be NULL terminated? It comes at a small cost
+  *dst = 0;
+  current_string_buf_loc = dst + 1;
+}
+
+} // namespace stage2
+} // unnamed namespace
+} // namespace icelake
+} // namespace simdjson
+/* end file src/generic/stage2/tape_builder.h */
+
+//
+// Implementation-specific overrides
+//
+namespace simdjson {
+namespace icelake {
+namespace {
+namespace stage1 {
+
+simdjson_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) {
+  if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; }
+  return find_escaped_branchless(backslash);
+}
+
+} // namespace stage1
+} // unnamed namespace
+
+simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept {
+  return icelake::stage1::json_minifier::minify<128>(buf, len, dst, dst_len);
+}
+
+simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept {
+  this->buf = _buf;
+  this->len = _len;
+  return icelake::stage1::json_structural_indexer::index<128>(_buf, _len, *this, streaming);
+}
+
+simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
+  return icelake::stage1::generic_validate_utf8(buf,len);
+}
+
+simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept {
+  return stage2::tape_builder::parse_document<false>(*this, _doc);
+}
+
+simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept {
+  return stage2::tape_builder::parse_document<true>(*this, _doc);
+}
+
+simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst) const noexcept {
+  return icelake::stringparsing::parse_string(src, dst);
+}
+
+simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept {
+  auto error = stage1(_buf, _len, stage1_mode::regular);
+  if (error) { return error; }
+  return stage2(_doc);
+}
+
+} // namespace icelake
+} // namespace simdjson
+
+/* begin file include/simdjson/icelake/end.h */
+SIMDJSON_UNTARGET_ICELAKE
+/* end file include/simdjson/icelake/end.h */
+/* end file src/icelake/dom_parser_implementation.cpp */
+#endif
+#if SIMDJSON_IMPLEMENTATION_HASWELL
+/* begin file src/haswell/implementation.cpp */
+/* begin file include/simdjson/haswell/begin.h */
+// redefining SIMDJSON_IMPLEMENTATION to "haswell"
+// #define SIMDJSON_IMPLEMENTATION haswell
+SIMDJSON_TARGET_HASWELL
+/* end file include/simdjson/haswell/begin.h */
+
+namespace simdjson {
+namespace haswell {
+
+simdjson_warn_unused error_code implementation::create_dom_parser_implementation(
+  size_t capacity,
+  size_t max_depth,
+  std::unique_ptr<internal::dom_parser_implementation>& dst
+) const noexcept {
+  dst.reset( new (std::nothrow) dom_parser_implementation() );
+  if (!dst) { return MEMALLOC; }
+  if (auto err = dst->set_capacity(capacity))
+    return err;
+  if (auto err = dst->set_max_depth(max_depth))
+    return err;
+  return SUCCESS;
+}
+
+} // namespace haswell
+} // namespace simdjson
+
+/* begin file include/simdjson/haswell/end.h */
+SIMDJSON_UNTARGET_HASWELL
+/* end file include/simdjson/haswell/end.h */
+
+/* end file src/haswell/implementation.cpp */
+/* begin file src/haswell/dom_parser_implementation.cpp */
+/* begin file include/simdjson/haswell/begin.h */
+// redefining SIMDJSON_IMPLEMENTATION to "haswell"
+// #define SIMDJSON_IMPLEMENTATION haswell
+SIMDJSON_TARGET_HASWELL
+/* end file include/simdjson/haswell/begin.h */
+
+//
+// Stage 1
+//
+
+namespace simdjson {
+namespace haswell {
+namespace {
+
+using namespace simd;
+
+struct json_character_block {
+  static simdjson_inline json_character_block classify(const simd::simd8x64<uint8_t>& in);
+  //  ASCII white-space ('\r','\n','\t',' ')
+  simdjson_inline uint64_t whitespace() const noexcept;
+  // non-quote structural characters (comma, colon, braces, brackets)
+  simdjson_inline uint64_t op() const noexcept;
+  // neither a structural character nor a white-space, so letters, numbers and quotes
+  simdjson_inline uint64_t scalar() const noexcept;
+
+  uint64_t _whitespace; // ASCII white-space ('\r','\n','\t',' ')
+  uint64_t _op; // structural characters (comma, colon, braces, brackets but not quotes)
+};
+
+simdjson_inline uint64_t json_character_block::whitespace() const noexcept { return _whitespace; }
+simdjson_inline uint64_t json_character_block::op() const noexcept { return _op; }
+simdjson_inline uint64_t json_character_block::scalar() const noexcept { return ~(op() | whitespace()); }
+
+// This identifies structural characters (comma, colon, braces, brackets),
+// and ASCII white-space ('\r','\n','\t',' ').
+simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64<uint8_t>& in) {
+  // These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why
+  // we can't use the generic lookup_16.
+  const auto whitespace_table = simd8<uint8_t>::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100);
+
+  // The 6 operators (:,[]{}) have these values:
+  //
+  // , 2C
+  // : 3A
+  // [ 5B
+  // { 7B
+  // ] 5D
+  // } 7D
+  //
+  // If you use | 0x20 to turn [ and ] into { and }, the lower 4 bits of each character is unique.
+  // We exploit this, using a simd 4-bit lookup to tell us which character match against, and then
+  // match it (against | 0x20).
+  //
+  // To prevent recognizing other characters, everything else gets compared with 0, which cannot
+  // match due to the | 0x20.
+  //
+  // NOTE: Due to the | 0x20, this ALSO treats <FF> and <SUB> (control characters 0C and 1A) like ,
+  // and :. This gets caught in stage 2, which checks the actual character to ensure the right
+  // operators are in the right places.
+  const auto op_table = simd8<uint8_t>::repeat_16(
+    0, 0, 0, 0,
+    0, 0, 0, 0,
+    0, 0, ':', '{', // : = 3A, [ = 5B, { = 7B
+    ',', '}', 0, 0  // , = 2C, ] = 5D, } = 7D
+  );
+
+  // We compute whitespace and op separately. If later code only uses one or the
+  // other, given the fact that all functions are aggressively inlined, we can
+  // hope that useless computations will be omitted. This is namely case when
+  // minifying (we only need whitespace).
+
+  const uint64_t whitespace = in.eq({
+    _mm256_shuffle_epi8(whitespace_table, in.chunks[0]),
+    _mm256_shuffle_epi8(whitespace_table, in.chunks[1])
+  });
+  // Turn [ and ] into { and }
+  const simd8x64<uint8_t> curlified{
+    in.chunks[0] | 0x20,
+    in.chunks[1] | 0x20
+  };
+  const uint64_t op = curlified.eq({
+    _mm256_shuffle_epi8(op_table, in.chunks[0]),
+    _mm256_shuffle_epi8(op_table, in.chunks[1])
+  });
+
+  return { whitespace, op };
+}
+
+simdjson_inline bool is_ascii(const simd8x64<uint8_t>& input) {
+  return input.reduce_or().is_ascii();
+}
+
+simdjson_unused simdjson_inline simd8<bool> must_be_continuation(const simd8<uint8_t> prev1, const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
+  simd8<uint8_t> is_second_byte = prev1.saturating_sub(0xc0u-1); // Only 11______ will be > 0
+  simd8<uint8_t> is_third_byte  = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0
+  simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0
+  // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine.
+  return simd8<int8_t>(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0);
+}
+
+simdjson_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
+  simd8<uint8_t> is_third_byte  = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0
+  simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0
+  // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine.
+  return simd8<int8_t>(is_third_byte | is_fourth_byte) > int8_t(0);
+}
+
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdjson
+
+/* begin file src/generic/stage1/utf8_lookup4_algorithm.h */
+namespace simdjson {
+namespace haswell {
+namespace {
+namespace utf8_validation {
+
+using namespace simd;
+
+  simdjson_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
+// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII)
+// Bit 1 = Too Long (ASCII followed by continuation)
+// Bit 2 = Overlong 3-byte
+// Bit 4 = Surrogate
+// Bit 5 = Overlong 2-byte
+// Bit 7 = Two Continuations
+    constexpr const uint8_t TOO_SHORT   = 1<<0; // 11______ 0_______
+                                                // 11______ 11______
+    constexpr const uint8_t TOO_LONG    = 1<<1; // 0_______ 10______
+    constexpr const uint8_t OVERLONG_3  = 1<<2; // 11100000 100_____
+    constexpr const uint8_t SURROGATE   = 1<<4; // 11101101 101_____
+    constexpr const uint8_t OVERLONG_2  = 1<<5; // 1100000_ 10______
+    constexpr const uint8_t TWO_CONTS   = 1<<7; // 10______ 10______
+    constexpr const uint8_t TOO_LARGE   = 1<<3; // 11110100 1001____
+                                                // 11110100 101_____
+                                                // 11110101 1001____
+                                                // 11110101 101_____
+                                                // 1111011_ 1001____
+                                                // 1111011_ 101_____
+                                                // 11111___ 1001____
+                                                // 11111___ 101_____
+    constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
+                                                // 11110101 1000____
+                                                // 1111011_ 1000____
+                                                // 11111___ 1000____
+    constexpr const uint8_t OVERLONG_4  = 1<<6; // 11110000 1000____
+
+    const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
+      // 0_______ ________ <ASCII in byte 1>
+      TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
+      TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
+      // 10______ ________ <continuation in byte 1>
+      TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS,
+      // 1100____ ________ <two byte lead in byte 1>
+      TOO_SHORT | OVERLONG_2,
+      // 1101____ ________ <two byte lead in byte 1>
+      TOO_SHORT,
+      // 1110____ ________ <three byte lead in byte 1>
+      TOO_SHORT | OVERLONG_3 | SURROGATE,
+      // 1111____ ________ <four+ byte lead in byte 1>
+      TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4
+    );
+    constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
+    const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
+      // ____0000 ________
+      CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4,
+      // ____0001 ________
+      CARRY | OVERLONG_2,
+      // ____001_ ________
+      CARRY,
+      CARRY,
+
+      // ____0100 ________
+      CARRY | TOO_LARGE,
+      // ____0101 ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      // ____011_ ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+
+      // ____1___ ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      // ____1101 ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000
+    );
+    const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
+      // ________ 0_______ <ASCII in byte 2>
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
+
+      // ________ 1000____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4,
+      // ________ 1001____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE,
+      // ________ 101_____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE  | TOO_LARGE,
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE  | TOO_LARGE,
+
+      // ________ 11______
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT
+    );
+    return (byte_1_high & byte_1_low & byte_2_high);
+  }
+  simdjson_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
+      const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
+    simd8<uint8_t> prev2 = input.prev<2>(prev_input);
+    simd8<uint8_t> prev3 = input.prev<3>(prev_input);
+    simd8<uint8_t> must23 = simd8<uint8_t>(must_be_2_3_continuation(prev2, prev3));
+    simd8<uint8_t> must23_80 = must23 & uint8_t(0x80);
+    return must23_80 ^ sc;
+  }
+
+  //
+  // Return nonzero if there are incomplete multibyte characters at the end of the block:
+  // e.g. if there is a 4-byte character, but it's 3 bytes from the end.
+  //
+  simdjson_inline simd8<uint8_t> is_incomplete(const simd8<uint8_t> input) {
+    // If the previous input's last 3 bytes match this, they're too short (they ended at EOF):
+    // ... 1111____ 111_____ 11______
+#if SIMDJSON_IMPLEMENTATION_ICELAKE
+    static const uint8_t max_array[64] = {
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1
+    };
+#else
+    static const uint8_t max_array[32] = {
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1
+    };
+#endif
+    const simd8<uint8_t> max_value(&max_array[sizeof(max_array)-sizeof(simd8<uint8_t>)]);
+    return input.gt_bits(max_value);
+  }
+
+  struct utf8_checker {
+    // If this is nonzero, there has been a UTF-8 error.
+    simd8<uint8_t> error;
+    // The last input we received
+    simd8<uint8_t> prev_input_block;
+    // Whether the last input we received was incomplete (used for ASCII fast path)
+    simd8<uint8_t> prev_incomplete;
+
+    //
+    // Check whether the current bytes are valid UTF-8.
+    //
+    simdjson_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
+      // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes
+      // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers)
+      simd8<uint8_t> prev1 = input.prev<1>(prev_input);
+      simd8<uint8_t> sc = check_special_cases(input, prev1);
+      this->error |= check_multibyte_lengths(input, prev_input, sc);
+    }
+
+    // The only problem that can happen at EOF is that a multibyte character is too short
+    // or a byte value too large in the last bytes: check_special_cases only checks for bytes
+    // too large in the first of two bytes.
+    simdjson_inline void check_eof() {
+      // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't
+      // possibly finish them.
+      this->error |= this->prev_incomplete;
+    }
+
+#ifndef SIMDJSON_IF_CONSTEXPR
+#if SIMDJSON_CPLUSPLUS17
+#define SIMDJSON_IF_CONSTEXPR if constexpr
+#else
+#define SIMDJSON_IF_CONSTEXPR if
+#endif
+#endif
+
+    simdjson_inline void check_next_input(const simd8x64<uint8_t>& input) {
+      if(simdjson_likely(is_ascii(input))) {
+        this->error |= this->prev_incomplete;
+      } else {
+        // you might think that a for-loop would work, but under Visual Studio, it is not good enough.
+        static_assert((simd8x64<uint8_t>::NUM_CHUNKS == 1)
+                ||(simd8x64<uint8_t>::NUM_CHUNKS == 2)
+                || (simd8x64<uint8_t>::NUM_CHUNKS == 4),
+                "We support one, two or four chunks per 64-byte block.");
+        SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 1) {
+          this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
+        } else SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 2) {
+          this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
+          this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+        } else SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 4) {
+          this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
+          this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+          this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
+          this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
+        }
+        this->prev_incomplete = is_incomplete(input.chunks[simd8x64<uint8_t>::NUM_CHUNKS-1]);
+        this->prev_input_block = input.chunks[simd8x64<uint8_t>::NUM_CHUNKS-1];
+      }
+    }
+    // do not forget to call check_eof!
+    simdjson_inline error_code errors() {
+      return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS;
+    }
+
+  }; // struct utf8_checker
+} // namespace utf8_validation
+
+using utf8_validation::utf8_checker;
+
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdjson
+/* end file src/generic/stage1/utf8_lookup4_algorithm.h */
+/* begin file src/generic/stage1/json_structural_indexer.h */
+// This file contains the common code every implementation uses in stage1
+// It is intended to be included multiple times and compiled multiple times
+// We assume the file in which it is included already includes
+// "simdjson/stage1.h" (this simplifies amalgation)
+
+/* begin file src/generic/stage1/buf_block_reader.h */
+namespace simdjson {
+namespace haswell {
+namespace {
+
+// Walks through a buffer in block-sized increments, loading the last part with spaces
+template<size_t STEP_SIZE>
+struct buf_block_reader {
+public:
+  simdjson_inline buf_block_reader(const uint8_t *_buf, size_t _len);
+  simdjson_inline size_t block_index();
+  simdjson_inline bool has_full_block() const;
+  simdjson_inline const uint8_t *full_block() const;
+  /**
+   * Get the last block, padded with spaces.
+   *
+   * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this
+   * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there
+   * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding.
+   *
+   * @return the number of effective characters in the last block.
+   */
+  simdjson_inline size_t get_remainder(uint8_t *dst) const;
+  simdjson_inline void advance();
+private:
+  const uint8_t *buf;
+  const size_t len;
+  const size_t lenminusstep;
+  size_t idx;
+};
+
+// Routines to print masks and text for debugging bitmask operations
+simdjson_unused static char * format_input_text_64(const uint8_t *text) {
+  static char buf[sizeof(simd8x64<uint8_t>) + 1];
+  for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
+    buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]);
+  }
+  buf[sizeof(simd8x64<uint8_t>)] = '\0';
+  return buf;
+}
+
+// Routines to print masks and text for debugging bitmask operations
+simdjson_unused static char * format_input_text(const simd8x64<uint8_t>& in) {
+  static char buf[sizeof(simd8x64<uint8_t>) + 1];
+  in.store(reinterpret_cast<uint8_t*>(buf));
+  for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
+    if (buf[i] < ' ') { buf[i] = '_'; }
+  }
+  buf[sizeof(simd8x64<uint8_t>)] = '\0';
+  return buf;
+}
+
+simdjson_unused static char * format_mask(uint64_t mask) {
+  static char buf[sizeof(simd8x64<uint8_t>) + 1];
+  for (size_t i=0; i<64; i++) {
+    buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' ';
+  }
+  buf[64] = '\0';
+  return buf;
+}
+
+template<size_t STEP_SIZE>
+simdjson_inline buf_block_reader<STEP_SIZE>::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {}
+
+template<size_t STEP_SIZE>
+simdjson_inline size_t buf_block_reader<STEP_SIZE>::block_index() { return idx; }
+
+template<size_t STEP_SIZE>
+simdjson_inline bool buf_block_reader<STEP_SIZE>::has_full_block() const {
+  return idx < lenminusstep;
+}
+
+template<size_t STEP_SIZE>
+simdjson_inline const uint8_t *buf_block_reader<STEP_SIZE>::full_block() const {
+  return &buf[idx];
+}
+
+template<size_t STEP_SIZE>
+simdjson_inline size_t buf_block_reader<STEP_SIZE>::get_remainder(uint8_t *dst) const {
+  if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers
+  std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once.
+  std::memcpy(dst, buf + idx, len - idx);
+  return len - idx;
+}
+
+template<size_t STEP_SIZE>
+simdjson_inline void buf_block_reader<STEP_SIZE>::advance() {
+  idx += STEP_SIZE;
+}
+
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdjson
+/* end file src/generic/stage1/buf_block_reader.h */
+/* begin file src/generic/stage1/json_string_scanner.h */
+namespace simdjson {
+namespace haswell {
+namespace {
+namespace stage1 {
+
+struct json_string_block {
+  // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017
+  simdjson_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) :
+  _backslash(backslash), _escaped(escaped), _quote(quote), _in_string(in_string) {}
+
+  // Escaped characters (characters following an escape() character)
+  simdjson_inline uint64_t escaped() const { return _escaped; }
+  // Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \)
+  simdjson_inline uint64_t escape() const { return _backslash & ~_escaped; }
+  // Real (non-backslashed) quotes
+  simdjson_inline uint64_t quote() const { return _quote; }
+  // Start quotes of strings
+  simdjson_inline uint64_t string_start() const { return _quote & _in_string; }
+  // End quotes of strings
+  simdjson_inline uint64_t string_end() const { return _quote & ~_in_string; }
+  // Only characters inside the string (not including the quotes)
+  simdjson_inline uint64_t string_content() const { return _in_string & ~_quote; }
+  // Return a mask of whether the given characters are inside a string (only works on non-quotes)
+  simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; }
+  // Return a mask of whether the given characters are inside a string (only works on non-quotes)
+  simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; }
+  // Tail of string (everything except the start quote)
+  simdjson_inline uint64_t string_tail() const { return _in_string ^ _quote; }
+
+  // backslash characters
+  uint64_t _backslash;
+  // escaped characters (backslashed--does not include the hex characters after \u)
+  uint64_t _escaped;
+  // real quotes (non-backslashed ones)
+  uint64_t _quote;
+  // string characters (includes start quote but not end quote)
+  uint64_t _in_string;
+};
+
+// Scans blocks for string characters, storing the state necessary to do so
+class json_string_scanner {
+public:
+  simdjson_inline json_string_block next(const simd::simd8x64<uint8_t>& in);
+  // Returns either UNCLOSED_STRING or SUCCESS
+  simdjson_inline error_code finish();
+
+private:
+  // Intended to be defined by the implementation
+  simdjson_inline uint64_t find_escaped(uint64_t escape);
+  simdjson_inline uint64_t find_escaped_branchless(uint64_t escape);
+
+  // Whether the last iteration was still inside a string (all 1's = true, all 0's = false).
+  uint64_t prev_in_string = 0ULL;
+  // Whether the first character of the next iteration is escaped.
+  uint64_t prev_escaped = 0ULL;
+};
+
+//
+// Finds escaped characters (characters following \).
+//
+// Handles runs of backslashes like \\\" and \\\\" correctly (yielding 0101 and 01010, respectively).
+//
+// Does this by:
+// - Shift the escape mask to get potentially escaped characters (characters after backslashes).
+// - Mask escaped sequences that start on *even* bits with 1010101010 (odd bits are escaped, even bits are not)
+// - Mask escaped sequences that start on *odd* bits with 0101010101 (even bits are escaped, odd bits are not)
+//
+// To distinguish between escaped sequences starting on even/odd bits, it finds the start of all
+// escape sequences, filters out the ones that start on even bits, and adds that to the mask of
+// escape sequences. This causes the addition to clear out the sequences starting on odd bits (since
+// the start bit causes a carry), and leaves even-bit sequences alone.
+//
+// Example:
+//
+// text           |  \\\ | \\\"\\\" \\\" \\"\\" |
+// escape         |  xxx |  xx xxx  xxx  xx xx  | Removed overflow backslash; will | it into follows_escape
+// odd_starts     |  x   |  x       x       x   | escape & ~even_bits & ~follows_escape
+// even_seq       |     c|    cxxx     c xx   c | c = carry bit -- will be masked out later
+// invert_mask    |      |     cxxx     c xx   c| even_seq << 1
+// follows_escape |   xx | x xx xxx  xxx  xx xx | Includes overflow bit
+// escaped        |   x  | x x  x x  x x  x  x  |
+// desired        |   x  | x x  x x  x x  x  x  |
+// text           |  \\\ | \\\"\\\" \\\" \\"\\" |
+//
+simdjson_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) {
+  // If there was overflow, pretend the first character isn't a backslash
+  backslash &= ~prev_escaped;
+  uint64_t follows_escape = backslash << 1 | prev_escaped;
+
+  // Get sequences starting on even bits by clearing out the odd series using +
+  const uint64_t even_bits = 0x5555555555555555ULL;
+  uint64_t odd_sequence_starts = backslash & ~even_bits & ~follows_escape;
+  uint64_t sequences_starting_on_even_bits;
+  prev_escaped = add_overflow(odd_sequence_starts, backslash, &sequences_starting_on_even_bits);
+  uint64_t invert_mask = sequences_starting_on_even_bits << 1; // The mask we want to return is the *escaped* bits, not escapes.
+
+  // Mask every other backslashed character as an escaped character
+  // Flip the mask for sequences that start on even bits, to correct them
+  return (even_bits ^ invert_mask) & follows_escape;
+}
+
+//
+// Return a mask of all string characters plus end quotes.
+//
+// prev_escaped is overflow saying whether the next character is escaped.
+// prev_in_string is overflow saying whether we're still in a string.
+//
+// Backslash sequences outside of quotes will be detected in stage 2.
+//
+simdjson_inline json_string_block json_string_scanner::next(const simd::simd8x64<uint8_t>& in) {
+  const uint64_t backslash = in.eq('\\');
+  const uint64_t escaped = find_escaped(backslash);
+  const uint64_t quote = in.eq('"') & ~escaped;
+
+  //
+  // prefix_xor flips on bits inside the string (and flips off the end quote).
+  //
+  // Then we xor with prev_in_string: if we were in a string already, its effect is flipped
+  // (characters inside strings are outside, and characters outside strings are inside).
+  //
+  const uint64_t in_string = prefix_xor(quote) ^ prev_in_string;
+
+  //
+  // Check if we're still in a string at the end of the box so the next block will know
+  //
+  // right shift of a signed value expected to be well-defined and standard
+  // compliant as of C++20, John Regher from Utah U. says this is fine code
+  //
+  prev_in_string = uint64_t(static_cast<int64_t>(in_string) >> 63);
+
+  // Use ^ to turn the beginning quote off, and the end quote on.
+
+  // We are returning a function-local object so either we get a move constructor
+  // or we get copy elision.
+  return json_string_block(
+    backslash,
+    escaped,
+    quote,
+    in_string
+  );
+}
+
+simdjson_inline error_code json_string_scanner::finish() {
+  if (prev_in_string) {
+    return UNCLOSED_STRING;
+  }
+  return SUCCESS;
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdjson
+/* end file src/generic/stage1/json_string_scanner.h */
+/* begin file src/generic/stage1/json_scanner.h */
+namespace simdjson {
+namespace haswell {
+namespace {
+namespace stage1 {
+
+/**
+ * A block of scanned json, with information on operators and scalars.
+ *
+ * We seek to identify pseudo-structural characters. Anything that is inside
+ * a string must be omitted (hence  & ~_string.string_tail()).
+ * Otherwise, pseudo-structural characters come in two forms.
+ * 1. We have the structural characters ([,],{,},:, comma). The
+ *    term 'structural character' is from the JSON RFC.
+ * 2. We have the 'scalar pseudo-structural characters'.
+ *    Scalars are quotes, and any character except structural characters and white space.
+ *
+ * To identify the scalar pseudo-structural characters, we must look at what comes
+ * before them: it must be a space, a quote or a structural characters.
+ * Starting with simdjson v0.3, we identify them by
+ * negation: we identify everything that is followed by a non-quote scalar,
+ * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'.
+ */
+struct json_block {
+public:
+  // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017
+  simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) :
+  _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {}
+  simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) :
+  _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {}
+
+  /**
+   * The start of structurals.
+   * In simdjson prior to v0.3, these were called the pseudo-structural characters.
+   **/
+  simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); }
+  /** All JSON whitespace (i.e. not in a string) */
+  simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); }
+
+  // Helpers
+
+  /** Whether the given characters are inside a string (only works on non-quotes) */
+  simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); }
+  /** Whether the given characters are outside a string (only works on non-quotes) */
+  simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); }
+
+  // string and escape characters
+  json_string_block _string;
+  // whitespace, structural characters ('operators'), scalars
+  json_character_block _characters;
+  // whether the previous character was a scalar
+  uint64_t _follows_potential_nonquote_scalar;
+private:
+  // Potential structurals (i.e. disregarding strings)
+
+  /**
+   * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc".
+   * They may reside inside a string.
+   **/
+  simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); }
+  /**
+   * The start of non-operator runs, like 123, true and "abc".
+   * It main reside inside a string.
+   **/
+  simdjson_inline uint64_t potential_scalar_start() const noexcept {
+    // The term "scalar" refers to anything except structural characters and white space
+    // (so letters, numbers, quotes).
+    // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space
+    // then we know that it is irrelevant structurally.
+    return _characters.scalar() & ~follows_potential_scalar();
+  }
+  /**
+   * Whether the given character is immediately after a non-operator like 123, true.
+   * The characters following a quote are not included.
+   */
+  simdjson_inline uint64_t follows_potential_scalar() const noexcept {
+    // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character
+    // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a
+    // white space.
+    // It is understood that within quoted region, anything at all could be marked (irrelevant).
+    return _follows_potential_nonquote_scalar;
+  }
+};
+
+/**
+ * Scans JSON for important bits: structural characters or 'operators', strings, and scalars.
+ *
+ * The scanner starts by calculating two distinct things:
+ * - string characters (taking \" into account)
+ * - structural characters or 'operators' ([]{},:, comma)
+ *   and scalars (runs of non-operators like 123, true and "abc")
+ *
+ * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel:
+ * in particular, the operator/scalar bit will find plenty of things that are actually part of
+ * strings. When we're done, json_block will fuse the two together by masking out tokens that are
+ * part of a string.
+ */
+class json_scanner {
+public:
+  json_scanner() = default;
+  simdjson_inline json_block next(const simd::simd8x64<uint8_t>& in);
+  // Returns either UNCLOSED_STRING or SUCCESS
+  simdjson_inline error_code finish();
+
+private:
+  // Whether the last character of the previous iteration is part of a scalar token
+  // (anything except whitespace or a structural character/'operator').
+  uint64_t prev_scalar = 0ULL;
+  json_string_scanner string_scanner{};
+};
+
+
+//
+// Check if the current character immediately follows a matching character.
+//
+// For example, this checks for quotes with backslashes in front of them:
+//
+//     const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash);
+//
+simdjson_inline uint64_t follows(const uint64_t match, uint64_t &overflow) {
+  const uint64_t result = match << 1 | overflow;
+  overflow = match >> 63;
+  return result;
+}
+
+simdjson_inline json_block json_scanner::next(const simd::simd8x64<uint8_t>& in) {
+  json_string_block strings = string_scanner.next(in);
+  // identifies the white-space and the structural characters
+  json_character_block characters = json_character_block::classify(in);
+  // The term "scalar" refers to anything except structural characters and white space
+  // (so letters, numbers, quotes).
+  // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers).
+  //
+  // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon)
+  // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential
+  // pseudo-structural character just like we would if we had  ' "a string" true '; otherwise we
+  // may need to add an extra check when parsing strings.
+  //
+  // Performance: there are many ways to skin this cat.
+  const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote();
+  uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar);
+  // We are returning a function-local object so either we get a move constructor
+  // or we get copy elision.
+  return json_block(
+    strings,// strings is a function-local object so either it moves or the copy is elided.
+    characters,
+    follows_nonquote_scalar
+  );
+}
+
+simdjson_inline error_code json_scanner::finish() {
+  return string_scanner.finish();
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdjson
+/* end file src/generic/stage1/json_scanner.h */
+/* begin file src/generic/stage1/json_minifier.h */
+// This file contains the common code every implementation uses in stage1
+// It is intended to be included multiple times and compiled multiple times
+// We assume the file in which it is included already includes
+// "simdjson/stage1.h" (this simplifies amalgation)
+
+namespace simdjson {
+namespace haswell {
+namespace {
+namespace stage1 {
+
+class json_minifier {
+public:
+  template<size_t STEP_SIZE>
+  static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept;
+
+private:
+  simdjson_inline json_minifier(uint8_t *_dst)
+  : dst{_dst}
+  {}
+  template<size_t STEP_SIZE>
+  simdjson_inline void step(const uint8_t *block_buf, buf_block_reader<STEP_SIZE> &reader) noexcept;
+  simdjson_inline void next(const simd::simd8x64<uint8_t>& in, const json_block& block);
+  simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len);
+  json_scanner scanner{};
+  uint8_t *dst;
+};
+
+simdjson_inline void json_minifier::next(const simd::simd8x64<uint8_t>& in, const json_block& block) {
+  uint64_t mask = block.whitespace();
+  dst += in.compress(mask, dst);
+}
+
+simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) {
+  error_code error = scanner.finish();
+  if (error) { dst_len = 0; return error; }
+  dst_len = dst - dst_start;
+  return SUCCESS;
+}
+
+template<>
+simdjson_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept {
+  simd::simd8x64<uint8_t> in_1(block_buf);
+  simd::simd8x64<uint8_t> in_2(block_buf+64);
+  json_block block_1 = scanner.next(in_1);
+  json_block block_2 = scanner.next(in_2);
+  this->next(in_1, block_1);
+  this->next(in_2, block_2);
+  reader.advance();
+}
+
+template<>
+simdjson_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept {
+  simd::simd8x64<uint8_t> in_1(block_buf);
+  json_block block_1 = scanner.next(in_1);
+  this->next(block_buf, block_1);
+  reader.advance();
+}
+
+template<size_t STEP_SIZE>
+error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept {
+  buf_block_reader<STEP_SIZE> reader(buf, len);
+  json_minifier minifier(dst);
+
+  // Index the first n-1 blocks
+  while (reader.has_full_block()) {
+    minifier.step<STEP_SIZE>(reader.full_block(), reader);
+  }
+
+  // Index the last (remainder) block, padded with spaces
+  uint8_t block[STEP_SIZE];
+  size_t remaining_bytes = reader.get_remainder(block);
+  if (remaining_bytes > 0) {
+    // We do not want to write directly to the output stream. Rather, we write
+    // to a local buffer (for safety).
+    uint8_t out_block[STEP_SIZE];
+    uint8_t * const guarded_dst{minifier.dst};
+    minifier.dst = out_block;
+    minifier.step<STEP_SIZE>(block, reader);
+    size_t to_write = minifier.dst - out_block;
+    // In some cases, we could be enticed to consider the padded spaces
+    // as part of the string. This is fine as long as we do not write more
+    // than we consumed.
+    if(to_write > remaining_bytes) { to_write = remaining_bytes; }
+    memcpy(guarded_dst, out_block, to_write);
+    minifier.dst = guarded_dst + to_write;
+  }
+  return minifier.finish(dst, dst_len);
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdjson
+/* end file src/generic/stage1/json_minifier.h */
+/* begin file src/generic/stage1/find_next_document_index.h */
+namespace simdjson {
+namespace haswell {
+namespace {
+
+/**
+  * This algorithm is used to quickly identify the last structural position that
+  * makes up a complete document.
+  *
+  * It does this by going backwards and finding the last *document boundary* (a
+  * place where one value follows another without a comma between them). If the
+  * last document (the characters after the boundary) has an equal number of
+  * start and end brackets, it is considered complete.
+  *
+  * Simply put, we iterate over the structural characters, starting from
+  * the end. We consider that we found the end of a JSON document when the
+  * first element of the pair is NOT one of these characters: '{' '[' ':' ','
+  * and when the second element is NOT one of these characters: '}' ']' ':' ','.
+  *
+  * This simple comparison works most of the time, but it does not cover cases
+  * where the batch's structural indexes contain a perfect amount of documents.
+  * In such a case, we do not have access to the structural index which follows
+  * the last document, therefore, we do not have access to the second element in
+  * the pair, and that means we cannot identify the last document. To fix this
+  * issue, we keep a count of the open and closed curly/square braces we found
+  * while searching for the pair. When we find a pair AND the count of open and
+  * closed curly/square braces is the same, we know that we just passed a
+  * complete document, therefore the last json buffer location is the end of the
+  * batch.
+  */
+simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) {
+  // Variant: do not count separately, just figure out depth
+  if(parser.n_structural_indexes == 0) { return 0; }
+  auto arr_cnt = 0;
+  auto obj_cnt = 0;
+  for (auto i = parser.n_structural_indexes - 1; i > 0; i--) {
+    auto idxb = parser.structural_indexes[i];
+    switch (parser.buf[idxb]) {
+    case ':':
+    case ',':
+      continue;
+    case '}':
+      obj_cnt--;
+      continue;
+    case ']':
+      arr_cnt--;
+      continue;
+    case '{':
+      obj_cnt++;
+      break;
+    case '[':
+      arr_cnt++;
+      break;
+    }
+    auto idxa = parser.structural_indexes[i - 1];
+    switch (parser.buf[idxa]) {
+    case '{':
+    case '[':
+    case ':':
+    case ',':
+      continue;
+    }
+    // Last document is complete, so the next document will appear after!
+    if (!arr_cnt && !obj_cnt) {
+      return parser.n_structural_indexes;
+    }
+    // Last document is incomplete; mark the document at i + 1 as the next one
+    return i;
+  }
+  // If we made it to the end, we want to finish counting to see if we have a full document.
+  switch (parser.buf[parser.structural_indexes[0]]) {
+    case '}':
+      obj_cnt--;
+      break;
+    case ']':
+      arr_cnt--;
+      break;
+    case '{':
+      obj_cnt++;
+      break;
+    case '[':
+      arr_cnt++;
+      break;
+  }
+  if (!arr_cnt && !obj_cnt) {
+    // We have a complete document.
+    return parser.n_structural_indexes;
+  }
+  return 0;
+}
+
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdjson
+/* end file src/generic/stage1/find_next_document_index.h */
+
+namespace simdjson {
+namespace haswell {
+namespace {
+namespace stage1 {
+
+class bit_indexer {
+public:
+  uint32_t *tail;
+
+  simdjson_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {}
+
+  // flatten out values in 'bits' assuming that they are are to have values of idx
+  // plus their position in the bitvector, and store these indexes at
+  // base_ptr[base] incrementing base as we go
+  // will potentially store extra values beyond end of valid bits, so base_ptr
+  // needs to be large enough to handle this
+  //
+  // If the kernel sets SIMDJSON_CUSTOM_BIT_INDEXER, then it will provide its own
+  // version of the code.
+#ifdef SIMDJSON_CUSTOM_BIT_INDEXER
+  simdjson_inline void write(uint32_t idx, uint64_t bits);
+#else
+  simdjson_inline void write(uint32_t idx, uint64_t bits) {
+    // In some instances, the next branch is expensive because it is mispredicted.
+    // Unfortunately, in other cases,
+    // it helps tremendously.
+    if (bits == 0)
+        return;
+#if defined(SIMDJSON_PREFER_REVERSE_BITS)
+    /**
+     * ARM lacks a fast trailing zero instruction, but it has a fast
+     * bit reversal instruction and a fast leading zero instruction.
+     * Thus it may be profitable to reverse the bits (once) and then
+     * to rely on a sequence of instructions that call the leading
+     * zero instruction.
+     *
+     * Performance notes:
+     * The chosen routine is not optimal in terms of data dependency
+     * since zero_leading_bit might require two instructions. However,
+     * it tends to minimize the total number of instructions which is
+     * beneficial.
+     */
+
+    uint64_t rev_bits = reverse_bits(bits);
+    int cnt = static_cast<int>(count_ones(bits));
+    int i = 0;
+    // Do the first 8 all together
+    for (; i<8; i++) {
+      int lz = leading_zeroes(rev_bits);
+      this->tail[i] = static_cast<uint32_t>(idx) + lz;
+      rev_bits = zero_leading_bit(rev_bits, lz);
+    }
+    // Do the next 8 all together (we hope in most cases it won't happen at all
+    // and the branch is easily predicted).
+    if (simdjson_unlikely(cnt > 8)) {
+      i = 8;
+      for (; i<16; i++) {
+        int lz = leading_zeroes(rev_bits);
+        this->tail[i] = static_cast<uint32_t>(idx) + lz;
+        rev_bits = zero_leading_bit(rev_bits, lz);
+      }
+
+
+      // Most files don't have 16+ structurals per block, so we take several basically guaranteed
+      // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :)
+      // or the start of a value ("abc" true 123) every four characters.
+      if (simdjson_unlikely(cnt > 16)) {
+        i = 16;
+        while (rev_bits != 0) {
+          int lz = leading_zeroes(rev_bits);
+          this->tail[i++] = static_cast<uint32_t>(idx) + lz;
+          rev_bits = zero_leading_bit(rev_bits, lz);
+        }
+      }
+    }
+    this->tail += cnt;
+#else // SIMDJSON_PREFER_REVERSE_BITS
+    /**
+     * Under recent x64 systems, we often have both a fast trailing zero
+     * instruction and a fast 'clear-lower-bit' instruction so the following
+     * algorithm can be competitive.
+     */
+
+    int cnt = static_cast<int>(count_ones(bits));
+    // Do the first 8 all together
+    for (int i=0; i<8; i++) {
+      this->tail[i] = idx + trailing_zeroes(bits);
+      bits = clear_lowest_bit(bits);
+    }
+
+    // Do the next 8 all together (we hope in most cases it won't happen at all
+    // and the branch is easily predicted).
+    if (simdjson_unlikely(cnt > 8)) {
+      for (int i=8; i<16; i++) {
+        this->tail[i] = idx + trailing_zeroes(bits);
+        bits = clear_lowest_bit(bits);
+      }
+
+      // Most files don't have 16+ structurals per block, so we take several basically guaranteed
+      // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :)
+      // or the start of a value ("abc" true 123) every four characters.
+      if (simdjson_unlikely(cnt > 16)) {
+        int i = 16;
+        do {
+          this->tail[i] = idx + trailing_zeroes(bits);
+          bits = clear_lowest_bit(bits);
+          i++;
+        } while (i < cnt);
+      }
+    }
+
+    this->tail += cnt;
+#endif
+  }
+#endif // SIMDJSON_CUSTOM_BIT_INDEXER
+
+};
+
+class json_structural_indexer {
+public:
+  /**
+   * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes.
+   *
+   * @param partial Setting the partial parameter to true allows the find_structural_bits to
+   *   tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If
+   *   you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8.
+   */
+  template<size_t STEP_SIZE>
+  static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept;
+
+private:
+  simdjson_inline json_structural_indexer(uint32_t *structural_indexes);
+  template<size_t STEP_SIZE>
+  simdjson_inline void step(const uint8_t *block, buf_block_reader<STEP_SIZE> &reader) noexcept;
+  simdjson_inline void next(const simd::simd8x64<uint8_t>& in, const json_block& block, size_t idx);
+  simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial);
+
+  json_scanner scanner{};
+  utf8_checker checker{};
+  bit_indexer indexer;
+  uint64_t prev_structurals = 0;
+  uint64_t unescaped_chars_error = 0;
+};
+
+simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {}
+
+// Skip the last character if it is partial
+simdjson_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) {
+  if (simdjson_unlikely(len < 3)) {
+    switch (len) {
+      case 2:
+        if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left
+        if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 2 bytes left
+        return len;
+      case 1:
+        if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left
+        return len;
+      case 0:
+        return len;
+    }
+  }
+  if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left
+  if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 1 byte left
+  if (buf[len-3] >= 0xf0) { return len-3; } // 4-byte characters with only 3 bytes left
+  return len;
+}
+
+//
+// PERF NOTES:
+// We pipe 2 inputs through these stages:
+// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load
+//    2 inputs' worth at once so that by the time step 2 is looking for them input, it's available.
+// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path.
+//    The output of step 1 depends entirely on this information. These functions don't quite use
+//    up enough CPU: the second half of the functions is highly serial, only using 1 execution core
+//    at a time. The second input's scans has some dependency on the first ones finishing it, but
+//    they can make a lot of progress before they need that information.
+// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that
+//    to finish: utf-8 checks and generating the output from the last iteration.
+//
+// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all
+// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough
+// workout.
+//
+template<size_t STEP_SIZE>
+error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept {
+  if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; }
+  // We guard the rest of the code so that we can assume that len > 0 throughout.
+  if (len == 0) { return EMPTY; }
+  if (is_streaming(partial)) {
+    len = trim_partial_utf8(buf, len);
+    // If you end up with an empty window after trimming
+    // the partial UTF-8 bytes, then chances are good that you
+    // have an UTF-8 formatting error.
+    if(len == 0) { return UTF8_ERROR; }
+  }
+  buf_block_reader<STEP_SIZE> reader(buf, len);
+  json_structural_indexer indexer(parser.structural_indexes.get());
+
+  // Read all but the last block
+  while (reader.has_full_block()) {
+    indexer.step<STEP_SIZE>(reader.full_block(), reader);
+  }
+  // Take care of the last block (will always be there unless file is empty which is
+  // not supposed to happen.)
+  uint8_t block[STEP_SIZE];
+  if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; }
+  indexer.step<STEP_SIZE>(block, reader);
+  return indexer.finish(parser, reader.block_index(), len, partial);
+}
+
+template<>
+simdjson_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept {
+  simd::simd8x64<uint8_t> in_1(block);
+  simd::simd8x64<uint8_t> in_2(block+64);
+  json_block block_1 = scanner.next(in_1);
+  json_block block_2 = scanner.next(in_2);
+  this->next(in_1, block_1, reader.block_index());
+  this->next(in_2, block_2, reader.block_index()+64);
+  reader.advance();
+}
+
+template<>
+simdjson_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept {
+  simd::simd8x64<uint8_t> in_1(block);
+  json_block block_1 = scanner.next(in_1);
+  this->next(in_1, block_1, reader.block_index());
+  reader.advance();
+}
+
+simdjson_inline void json_structural_indexer::next(const simd::simd8x64<uint8_t>& in, const json_block& block, size_t idx) {
+  uint64_t unescaped = in.lteq(0x1F);
+  checker.check_next_input(in);
+  indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser
+  prev_structurals = block.structural_start();
+  unescaped_chars_error |= block.non_quote_inside_string(unescaped);
+}
+
+simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) {
+  // Write out the final iteration's structurals
+  indexer.write(uint32_t(idx-64), prev_structurals);
+  error_code error = scanner.finish();
+  // We deliberately break down the next expression so that it is
+  // human readable.
+  const bool should_we_exit = is_streaming(partial) ?
+    ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING
+    : (error != SUCCESS); // if partial is false, we must have SUCCESS
+  const bool have_unclosed_string = (error == UNCLOSED_STRING);
+  if (simdjson_unlikely(should_we_exit)) { return error; }
+
+  if (unescaped_chars_error) {
+    return UNESCAPED_CHARS;
+  }
+  parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get());
+  /***
+   * The On Demand API requires special padding.
+   *
+   * This is related to https://github.com/simdjson/simdjson/issues/906
+   * Basically, we want to make sure that if the parsing continues beyond the last (valid)
+   * structural character, it quickly stops.
+   * Only three structural characters can be repeated without triggering an error in JSON:  [,] and }.
+   * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing
+   * continues, then it must be [,] or }.
+   * Suppose it is ] or }. We backtrack to the first character, what could it be that would
+   * not trigger an error? It could be ] or } but no, because you can't start a document that way.
+   * It can't be a comma, a colon or any simple value. So the only way we could continue is
+   * if the repeated character is [. But if so, the document must start with [. But if the document
+   * starts with [, it should end with ]. If we enforce that rule, then we would get
+   * ][[ which is invalid.
+   *
+   * This is illustrated with the test array_iterate_unclosed_error() on the following input:
+   * R"({ "a": [,,)"
+   **/
+  parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final
+  parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len);
+  parser.structural_indexes[parser.n_structural_indexes + 2] = 0;
+  parser.next_structural_index = 0;
+  // a valid JSON file cannot have zero structural indexes - we should have found something
+  if (simdjson_unlikely(parser.n_structural_indexes == 0u)) {
+    return EMPTY;
+  }
+  if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) {
+    return UNEXPECTED_ERROR;
+  }
+  if (partial == stage1_mode::streaming_partial) {
+    // If we have an unclosed string, then the last structural
+    // will be the quote and we want to make sure to omit it.
+    if(have_unclosed_string) {
+      parser.n_structural_indexes--;
+      // a valid JSON file cannot have zero structural indexes - we should have found something
+      if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; }
+    }
+    // We truncate the input to the end of the last complete document (or zero).
+    auto new_structural_indexes = find_next_document_index(parser);
+    if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) {
+      if(parser.structural_indexes[0] == 0) {
+        // If the buffer is partial and we started at index 0 but the document is
+        // incomplete, it's too big to parse.
+        return CAPACITY;
+      } else {
+        // It is possible that the document could be parsed, we just had a lot
+        // of white space.
+        parser.n_structural_indexes = 0;
+        return EMPTY;
+      }
+    }
+
+    parser.n_structural_indexes = new_structural_indexes;
+  } else if (partial == stage1_mode::streaming_final) {
+    if(have_unclosed_string) { parser.n_structural_indexes--; }
+    // We truncate the input to the end of the last complete document (or zero).
+    // Because partial == stage1_mode::streaming_final, it means that we may
+    // silently ignore trailing garbage. Though it sounds bad, we do it
+    // deliberately because many people who have streams of JSON documents
+    // will truncate them for processing. E.g., imagine that you are uncompressing
+    // the data from a size file or receiving it in chunks from the network. You
+    // may not know where exactly the last document will be. Meanwhile the
+    // document_stream instances allow people to know the JSON documents they are
+    // parsing (see the iterator.source() method).
+    parser.n_structural_indexes = find_next_document_index(parser);
+    // We store the initial n_structural_indexes so that the client can see
+    // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes,
+    // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len,
+    // otherwise, it will copy some prior index.
+    parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes];
+    // This next line is critical, do not change it unless you understand what you are
+    // doing.
+    parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len);
+    if (simdjson_unlikely(parser.n_structural_indexes == 0u)) {
+        // We tolerate an unclosed string at the very end of the stream. Indeed, users
+        // often load their data in bulk without being careful and they want us to ignore
+        // the trailing garbage.
+        return EMPTY;
+    }
+  }
+  checker.check_eof();
+  return checker.errors();
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdjson
+/* end file src/generic/stage1/json_structural_indexer.h */
+/* begin file src/generic/stage1/utf8_validator.h */
+namespace simdjson {
+namespace haswell {
+namespace {
+namespace stage1 {
+
+/**
+ * Validates that the string is actual UTF-8.
+ */
+template<class checker>
+bool generic_validate_utf8(const uint8_t * input, size_t length) {
+    checker c{};
+    buf_block_reader<64> reader(input, length);
+    while (reader.has_full_block()) {
+      simd::simd8x64<uint8_t> in(reader.full_block());
+      c.check_next_input(in);
+      reader.advance();
+    }
+    uint8_t block[64]{};
+    reader.get_remainder(block);
+    simd::simd8x64<uint8_t> in(block);
+    c.check_next_input(in);
+    reader.advance();
+    c.check_eof();
+    return c.errors() == error_code::SUCCESS;
+}
+
+bool generic_validate_utf8(const char * input, size_t length) {
+    return generic_validate_utf8<utf8_checker>(reinterpret_cast<const uint8_t *>(input),length);
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdjson
+/* end file src/generic/stage1/utf8_validator.h */
+
+//
+// Stage 2
+//
+/* begin file src/generic/stage2/stringparsing.h */
+// This file contains the common code every implementation uses
+// It is intended to be included multiple times and compiled multiple times
+
+namespace simdjson {
+namespace haswell {
+namespace {
+/// @private
+namespace stringparsing {
+
+// begin copypasta
+// These chars yield themselves: " \ /
+// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab
+// u not handled in this table as it's complex
+static const uint8_t escape_map[256] = {
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0, // 0x0.
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0x22, 0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0x2f,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0, // 0x4.
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0x5c, 0, 0,    0, // 0x5.
+    0, 0, 0x08, 0, 0,    0, 0x0c, 0, 0, 0, 0, 0, 0,    0, 0x0a, 0, // 0x6.
+    0, 0, 0x0d, 0, 0x09, 0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0, // 0x7.
+
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+};
+
+// handle a unicode codepoint
+// write appropriate values into dest
+// src will advance 6 bytes or 12 bytes
+// dest will advance a variable amount (return via pointer)
+// return true if the unicode codepoint was valid
+// We work in little-endian then swap at write time
+simdjson_warn_unused
+simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr,
+                                            uint8_t **dst_ptr) {
+  // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the
+  // conversion isn't valid; we defer the check for this to inside the
+  // multilingual plane check
+  uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2);
+  *src_ptr += 6;
+
+  // If we found a high surrogate, we must
+  // check for low surrogate for characters
+  // outside the Basic
+  // Multilingual Plane.
+  if (code_point >= 0xd800 && code_point < 0xdc00) {
+    const uint8_t *src_data = *src_ptr;
+    /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */
+    if (((src_data[0] << 8) | src_data[1]) != ((static_cast<uint8_t> ('\\') << 8) | static_cast<uint8_t> ('u'))) {
+      return false;
+    }
+    uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2);
+
+    // We have already checked that the high surrogate is valid and
+    // (code_point - 0xd800) < 1024.
+    //
+    // Check that code_point_2 is in the range 0xdc00..0xdfff
+    // and that code_point_2 was parsed from valid hex.
+    uint32_t low_bit = code_point_2 - 0xdc00;
+    if (low_bit >> 10) {
+      return false;
+    }
+
+    code_point =
+        (((code_point - 0xd800) << 10) | low_bit) + 0x10000;
+    *src_ptr += 6;
+  } else if (code_point >= 0xdc00 && code_point <= 0xdfff) {
+      // If we encounter a low surrogate (not preceded by a high surrogate)
+      // then we have an error.
+      return false;
+  }
+  size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr);
+  *dst_ptr += offset;
+  return offset > 0;
+}
+
+/**
+ * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There
+ * must be an unescaped quote terminating the string. It returns the final output
+ * position as pointer. In case of error (e.g., the string has bad escaped codes),
+ * then null_nullptrptr is returned. It is assumed that the output buffer is large
+ * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes +
+ * SIMDJSON_PADDING bytes.
+ */
+simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) {
+  while (1) {
+    // Copy the next n bytes, and find the backslash and quote in them.
+    auto bs_quote = backslash_and_quote::copy_and_find(src, dst);
+    // If the next thing is the end quote, copy and return
+    if (bs_quote.has_quote_first()) {
+      // we encountered quotes first. Move dst to point to quotes and exit
+      return dst + bs_quote.quote_index();
+    }
+    if (bs_quote.has_backslash()) {
+      /* find out where the backspace is */
+      auto bs_dist = bs_quote.backslash_index();
+      uint8_t escape_char = src[bs_dist + 1];
+      /* we encountered backslash first. Handle backslash */
+      if (escape_char == 'u') {
+        /* move src/dst up to the start; they will be further adjusted
+           within the unicode codepoint handling code. */
+        src += bs_dist;
+        dst += bs_dist;
+        if (!handle_unicode_codepoint(&src, &dst)) {
+          return nullptr;
+        }
+      } else {
+        /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and
+         * write bs_dist+1 characters to output
+         * note this may reach beyond the part of the buffer we've actually
+         * seen. I think this is ok */
+        uint8_t escape_result = escape_map[escape_char];
+        if (escape_result == 0u) {
+          return nullptr; /* bogus escape value is an error */
+        }
+        dst[bs_dist] = escape_result;
+        src += bs_dist + 2;
+        dst += bs_dist + 1;
+      }
+    } else {
+      /* they are the same. Since they can't co-occur, it means we
+       * encountered neither. */
+      src += backslash_and_quote::BYTES_PROCESSED;
+      dst += backslash_and_quote::BYTES_PROCESSED;
+    }
+  }
+  /* can't be reached */
+  return nullptr;
+}
+
+} // namespace stringparsing
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdjson
+/* end file src/generic/stage2/stringparsing.h */
+/* begin file src/generic/stage2/tape_builder.h */
+/* begin file src/generic/stage2/json_iterator.h */
+/* begin file src/generic/stage2/logger.h */
+// This is for an internal-only stage 2 specific logger.
+// Set LOG_ENABLED = true to log what stage 2 is doing!
+namespace simdjson {
+namespace haswell {
+namespace {
+namespace logger {
+
+  static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------";
+
+#if SIMDJSON_VERBOSE_LOGGING
+  static constexpr const bool LOG_ENABLED = true;
+#else
+  static constexpr const bool LOG_ENABLED = false;
+#endif
+  static constexpr const int LOG_EVENT_LEN = 20;
+  static constexpr const int LOG_BUFFER_LEN = 30;
+  static constexpr const int LOG_SMALL_BUFFER_LEN = 10;
+  static constexpr const int LOG_INDEX_LEN = 5;
+
+  static int log_depth; // Not threadsafe. Log only.
+
+  // Helper to turn unprintable or newline characters into spaces
+  static simdjson_inline char printable_char(char c) {
+    if (c >= 0x20) {
+      return c;
+    } else {
+      return ' ';
+    }
+  }
+
+  // Print the header and set up log_start
+  static simdjson_inline void log_start() {
+    if (LOG_ENABLED) {
+      log_depth = 0;
+      printf("\n");
+      printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#");
+      printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES);
+    }
+  }
+
+  simdjson_unused static simdjson_inline void log_string(const char *message) {
+    if (LOG_ENABLED) {
+      printf("%s\n", message);
+    }
+  }
+
+  // Logs a single line from the stage 2 DOM parser
+  template<typename S>
+  static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) {
+    if (LOG_ENABLED) {
+      printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title);
+      auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1;
+      auto next_index = structurals.next_structural;
+      auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast<const uint8_t*>("                                                       ");
+      auto next = &structurals.buf[*next_index];
+      {
+        // Print the next N characters in the buffer.
+        printf("| ");
+        // Otherwise, print the characters starting from the buffer position.
+        // Print spaces for unprintable or newline characters.
+        for (int i=0;i<LOG_BUFFER_LEN;i++) {
+          printf("%c", printable_char(current[i]));
+        }
+        printf(" ");
+        // Print the next N characters in the buffer.
+        printf("| ");
+        // Otherwise, print the characters starting from the buffer position.
+        // Print spaces for unprintable or newline characters.
+        for (int i=0;i<LOG_SMALL_BUFFER_LEN;i++) {
+          printf("%c", printable_char(next[i]));
+        }
+        printf(" ");
+      }
+      if (current_index) {
+        printf("| %*u ", LOG_INDEX_LEN, *current_index);
+      } else {
+        printf("| %-*s ", LOG_INDEX_LEN, "");
+      }
+      // printf("| %*u ", LOG_INDEX_LEN, structurals.next_tape_index());
+      printf("| %-s ", detail);
+      printf("|\n");
+    }
+  }
+
+} // namespace logger
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdjson
+/* end file src/generic/stage2/logger.h */
+
+namespace simdjson {
+namespace haswell {
+namespace {
+namespace stage2 {
+
+class json_iterator {
+public:
+  const uint8_t* const buf;
+  uint32_t *next_structural;
+  dom_parser_implementation &dom_parser;
+  uint32_t depth{0};
+
+  /**
+   * Walk the JSON document.
+   *
+   * The visitor receives callbacks when values are encountered. All callbacks pass the iterator as
+   * the first parameter; some callbacks have other parameters as well:
+   *
+   * - visit_document_start() - at the beginning.
+   * - visit_document_end() - at the end (if things were successful).
+   *
+   * - visit_array_start() - at the start `[` of a non-empty array.
+   * - visit_array_end() - at the end `]` of a non-empty array.
+   * - visit_empty_array() - when an empty array is encountered.
+   *
+   * - visit_object_end() - at the start `]` of a non-empty object.
+   * - visit_object_start() - at the end `]` of a non-empty object.
+   * - visit_empty_object() - when an empty object is encountered.
+   * - visit_key(const uint8_t *key) - when a key in an object field is encountered. key is
+   *                                   guaranteed to point at the first quote of the string (`"key"`).
+   * - visit_primitive(const uint8_t *value) - when a value is a string, number, boolean or null.
+   * - visit_root_primitive(iter, uint8_t *value) - when the top-level value is a string, number, boolean or null.
+   *
+   * - increment_count(iter) - each time a value is found in an array or object.
+   */
+  template<bool STREAMING, typename V>
+  simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept;
+
+  /**
+   * Create an iterator capable of walking a JSON document.
+   *
+   * The document must have already passed through stage 1.
+   */
+  simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index);
+
+  /**
+   * Look at the next token.
+   *
+   * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)).
+   *
+   * They may include invalid JSON as well (such as `1.2.3` or `ture`).
+   */
+  simdjson_inline const uint8_t *peek() const noexcept;
+  /**
+   * Advance to the next token.
+   *
+   * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)).
+   *
+   * They may include invalid JSON as well (such as `1.2.3` or `ture`).
+   */
+  simdjson_inline const uint8_t *advance() noexcept;
+  /**
+   * Get the remaining length of the document, from the start of the current token.
+   */
+  simdjson_inline size_t remaining_len() const noexcept;
+  /**
+   * Check if we are at the end of the document.
+   *
+   * If this is true, there are no more tokens.
+   */
+  simdjson_inline bool at_eof() const noexcept;
+  /**
+   * Check if we are at the beginning of the document.
+   */
+  simdjson_inline bool at_beginning() const noexcept;
+  simdjson_inline uint8_t last_structural() const noexcept;
+
+  /**
+   * Log that a value has been found.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_value(const char *type) const noexcept;
+  /**
+   * Log the start of a multipart value.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_start_value(const char *type) const noexcept;
+  /**
+   * Log the end of a multipart value.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_end_value(const char *type) const noexcept;
+  /**
+   * Log an error.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_error(const char *error) const noexcept;
+
+  template<typename V>
+  simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept;
+  template<typename V>
+  simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept;
+};
+
+template<bool STREAMING, typename V>
+simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept {
+  logger::log_start();
+
+  //
+  // Start the document
+  //
+  if (at_eof()) { return EMPTY; }
+  log_start_value("document");
+  SIMDJSON_TRY( visitor.visit_document_start(*this) );
+
+  //
+  // Read first value
+  //
+  {
+    auto value = advance();
+
+    // Make sure the outer object or array is closed before continuing; otherwise, there are ways we
+    // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906
+    if (!STREAMING) {
+      switch (*value) {
+        case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break;
+        case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break;
+      }
+    }
+
+    switch (*value) {
+      case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin;
+      case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin;
+      default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break;
+    }
+  }
+  goto document_end;
+
+//
+// Object parser states
+//
+object_begin:
+  log_start_value("object");
+  depth++;
+  if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; }
+  dom_parser.is_array[depth] = false;
+  SIMDJSON_TRY( visitor.visit_object_start(*this) );
+
+  {
+    auto key = advance();
+    if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; }
+    SIMDJSON_TRY( visitor.increment_count(*this) );
+    SIMDJSON_TRY( visitor.visit_key(*this, key) );
+  }
+
+object_field:
+  if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; }
+  {
+    auto value = advance();
+    switch (*value) {
+      case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin;
+      case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin;
+      default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break;
+    }
+  }
+
+object_continue:
+  switch (*advance()) {
+    case ',':
+      SIMDJSON_TRY( visitor.increment_count(*this) );
+      {
+        auto key = advance();
+        if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; }
+        SIMDJSON_TRY( visitor.visit_key(*this, key) );
+      }
+      goto object_field;
+    case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end;
+    default: log_error("No comma between object fields"); return TAPE_ERROR;
+  }
+
+scope_end:
+  depth--;
+  if (depth == 0) { goto document_end; }
+  if (dom_parser.is_array[depth]) { goto array_continue; }
+  goto object_continue;
+
+//
+// Array parser states
+//
+array_begin:
+  log_start_value("array");
+  depth++;
+  if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; }
+  dom_parser.is_array[depth] = true;
+  SIMDJSON_TRY( visitor.visit_array_start(*this) );
+  SIMDJSON_TRY( visitor.increment_count(*this) );
+
+array_value:
+  {
+    auto value = advance();
+    switch (*value) {
+      case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin;
+      case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin;
+      default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break;
+    }
+  }
+
+array_continue:
+  switch (*advance()) {
+    case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value;
+    case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end;
+    default: log_error("Missing comma between array values"); return TAPE_ERROR;
+  }
+
+document_end:
+  log_end_value("document");
+  SIMDJSON_TRY( visitor.visit_document_end(*this) );
+
+  dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]);
+
+  // If we didn't make it to the end, it's an error
+  if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) {
+    log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!");
+    return TAPE_ERROR;
+  }
+
+  return SUCCESS;
+
+} // walk_document()
+
+simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index)
+  : buf{_dom_parser.buf},
+    next_structural{&_dom_parser.structural_indexes[start_structural_index]},
+    dom_parser{_dom_parser} {
+}
+
+simdjson_inline const uint8_t *json_iterator::peek() const noexcept {
+  return &buf[*(next_structural)];
+}
+simdjson_inline const uint8_t *json_iterator::advance() noexcept {
+  return &buf[*(next_structural++)];
+}
+simdjson_inline size_t json_iterator::remaining_len() const noexcept {
+  return dom_parser.len - *(next_structural-1);
+}
+
+simdjson_inline bool json_iterator::at_eof() const noexcept {
+  return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes];
+}
+simdjson_inline bool json_iterator::at_beginning() const noexcept {
+  return next_structural == dom_parser.structural_indexes.get();
+}
+simdjson_inline uint8_t json_iterator::last_structural() const noexcept {
+  return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]];
+}
+
+simdjson_inline void json_iterator::log_value(const char *type) const noexcept {
+  logger::log_line(*this, "", type, "");
+}
+
+simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept {
+  logger::log_line(*this, "+", type, "");
+  if (logger::LOG_ENABLED) { logger::log_depth++; }
+}
+
+simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept {
+  if (logger::LOG_ENABLED) { logger::log_depth--; }
+  logger::log_line(*this, "-", type, "");
+}
+
+simdjson_inline void json_iterator::log_error(const char *error) const noexcept {
+  logger::log_line(*this, "", "ERROR", error);
+}
+
+template<typename V>
+simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept {
+  switch (*value) {
+    case '"': return visitor.visit_root_string(*this, value);
+    case 't': return visitor.visit_root_true_atom(*this, value);
+    case 'f': return visitor.visit_root_false_atom(*this, value);
+    case 'n': return visitor.visit_root_null_atom(*this, value);
+    case '-':
+    case '0': case '1': case '2': case '3': case '4':
+    case '5': case '6': case '7': case '8': case '9':
+      return visitor.visit_root_number(*this, value);
+    default:
+      log_error("Document starts with a non-value character");
+      return TAPE_ERROR;
+  }
+}
+template<typename V>
+simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept {
+  switch (*value) {
+    case '"': return visitor.visit_string(*this, value);
+    case 't': return visitor.visit_true_atom(*this, value);
+    case 'f': return visitor.visit_false_atom(*this, value);
+    case 'n': return visitor.visit_null_atom(*this, value);
+    case '-':
+    case '0': case '1': case '2': case '3': case '4':
+    case '5': case '6': case '7': case '8': case '9':
+      return visitor.visit_number(*this, value);
+    default:
+      log_error("Non-value found when value was expected!");
+      return TAPE_ERROR;
+  }
+}
+
+} // namespace stage2
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdjson
+/* end file src/generic/stage2/json_iterator.h */
+/* begin file src/generic/stage2/tape_writer.h */
+namespace simdjson {
+namespace haswell {
+namespace {
+namespace stage2 {
+
+struct tape_writer {
+  /** The next place to write to tape */
+  uint64_t *next_tape_loc;
+
+  /** Write a signed 64-bit value to tape. */
+  simdjson_inline void append_s64(int64_t value) noexcept;
+
+  /** Write an unsigned 64-bit value to tape. */
+  simdjson_inline void append_u64(uint64_t value) noexcept;
+
+  /** Write a double value to tape. */
+  simdjson_inline void append_double(double value) noexcept;
+
+  /**
+   * Append a tape entry (an 8-bit type,and 56 bits worth of value).
+   */
+  simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept;
+
+  /**
+   * Skip the current tape entry without writing.
+   *
+   * Used to skip the start of the container, since we'll come back later to fill it in when the
+   * container ends.
+   */
+  simdjson_inline void skip() noexcept;
+
+  /**
+   * Skip the number of tape entries necessary to write a large u64 or i64.
+   */
+  simdjson_inline void skip_large_integer() noexcept;
+
+  /**
+   * Skip the number of tape entries necessary to write a double.
+   */
+  simdjson_inline void skip_double() noexcept;
+
+  /**
+   * Write a value to a known location on tape.
+   *
+   * Used to go back and write out the start of a container after the container ends.
+   */
+  simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept;
+
+private:
+  /**
+   * Append both the tape entry, and a supplementary value following it. Used for types that need
+   * all 64 bits, such as double and uint64_t.
+   */
+  template<typename T>
+  simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept;
+}; // struct number_writer
+
+simdjson_inline void tape_writer::append_s64(int64_t value) noexcept {
+  append2(0, value, internal::tape_type::INT64);
+}
+
+simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept {
+  append(0, internal::tape_type::UINT64);
+  *next_tape_loc = value;
+  next_tape_loc++;
+}
+
+/** Write a double value to tape. */
+simdjson_inline void tape_writer::append_double(double value) noexcept {
+  append2(0, value, internal::tape_type::DOUBLE);
+}
+
+simdjson_inline void tape_writer::skip() noexcept {
+  next_tape_loc++;
+}
+
+simdjson_inline void tape_writer::skip_large_integer() noexcept {
+  next_tape_loc += 2;
+}
+
+simdjson_inline void tape_writer::skip_double() noexcept {
+  next_tape_loc += 2;
+}
+
+simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept {
+  *next_tape_loc = val | ((uint64_t(char(t))) << 56);
+  next_tape_loc++;
+}
+
+template<typename T>
+simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept {
+  append(val, t);
+  static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!");
+  memcpy(next_tape_loc, &val2, sizeof(val2));
+  next_tape_loc++;
+}
+
+simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept {
+  tape_loc = val | ((uint64_t(char(t))) << 56);
+}
+
+} // namespace stage2
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdjson
+/* end file src/generic/stage2/tape_writer.h */
+
+namespace simdjson {
+namespace haswell {
+namespace {
+namespace stage2 {
+
+struct tape_builder {
+  template<bool STREAMING>
+  simdjson_warn_unused static simdjson_inline error_code parse_document(
+    dom_parser_implementation &dom_parser,
+    dom::document &doc) noexcept;
+
+  /** Called when a non-empty document starts. */
+  simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept;
+  /** Called when a non-empty document ends without error. */
+  simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept;
+
+  /** Called when a non-empty array starts. */
+  simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept;
+  /** Called when a non-empty array ends. */
+  simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept;
+  /** Called when an empty array is found. */
+  simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept;
+
+  /** Called when a non-empty object starts. */
+  simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept;
+  /**
+   * Called when a key in a field is encountered.
+   *
+   * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array
+   * will be called after this with the field value.
+   */
+  simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept;
+  /** Called when a non-empty object ends. */
+  simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept;
+  /** Called when an empty object is found. */
+  simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept;
+
+  /**
+   * Called when a string, number, boolean or null is found.
+   */
+  simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept;
+  /**
+   * Called when a string, number, boolean or null is found at the top level of a document (i.e.
+   * when there is no array or object and the entire document is a single string, number, boolean or
+   * null.
+   *
+   * This is separate from primitive() because simdjson's normal primitive parsing routines assume
+   * there is at least one more token after the value, which is only true in an array or object.
+   */
+  simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept;
+
+  simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept;
+
+  simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept;
+
+  /** Called each time a new field or element in an array or object is found. */
+  simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept;
+
+  /** Next location to write to tape */
+  tape_writer tape;
+private:
+  /** Next write location in the string buf for stage 2 parsing */
+  uint8_t *current_string_buf_loc;
+
+  simdjson_inline tape_builder(dom::document &doc) noexcept;
+
+  simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept;
+  simdjson_inline void start_container(json_iterator &iter) noexcept;
+  simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept;
+  simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept;
+  simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept;
+  simdjson_inline void on_end_string(uint8_t *dst) noexcept;
+}; // class tape_builder
+
+template<bool STREAMING>
+simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document(
+    dom_parser_implementation &dom_parser,
+    dom::document &doc) noexcept {
+  dom_parser.doc = &doc;
+  json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0);
+  tape_builder builder(doc);
+  return iter.walk_document<STREAMING>(builder);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept {
+  return iter.visit_root_primitive(*this, value);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept {
+  return iter.visit_primitive(*this, value);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept {
+  return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept {
+  return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept {
+  start_container(iter);
+  return SUCCESS;
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept {
+  start_container(iter);
+  return SUCCESS;
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept {
+  start_container(iter);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept {
+  return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept {
+  return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept {
+  constexpr uint32_t start_tape_index = 0;
+  tape.append(start_tape_index, internal::tape_type::ROOT);
+  tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT);
+  return SUCCESS;
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept {
+  return visit_string(iter, key, true);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept {
+  iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1
+  return SUCCESS;
+}
+
+simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept {
+  iter.log_value(key ? "key" : "string");
+  uint8_t *dst = on_start_string(iter);
+  dst = stringparsing::parse_string(value+1, dst);
+  if (dst == nullptr) {
+    iter.log_error("Invalid escape in string");
+    return STRING_ERROR;
+  }
+  on_end_string(dst);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept {
+  return visit_string(iter, value);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("number");
+  return numberparsing::parse_number(value, tape);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept {
+  //
+  // We need to make a copy to make sure that the string is space terminated.
+  // This is not about padding the input, which should already padded up
+  // to len + SIMDJSON_PADDING. However, we have no control at this stage
+  // on how the padding was done. What if the input string was padded with nulls?
+  // It is quite common for an input string to have an extra null character (C string).
+  // We do not want to allow 9\0 (where \0 is the null character) inside a JSON
+  // document, but the string "9\0" by itself is fine. So we make a copy and
+  // pad the input with spaces when we know that there is just one input element.
+  // This copy is relatively expensive, but it will almost never be called in
+  // practice unless you are in the strange scenario where you have many JSON
+  // documents made of single atoms.
+  //
+  std::unique_ptr<uint8_t[]>copy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]);
+  if (copy.get() == nullptr) { return MEMALLOC; }
+  std::memcpy(copy.get(), value, iter.remaining_len());
+  std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING);
+  error_code error = visit_number(iter, copy.get());
+  return error;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("true");
+  if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::TRUE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("true");
+  if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::TRUE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("false");
+  if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::FALSE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("false");
+  if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::FALSE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("null");
+  if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::NULL_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("null");
+  if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::NULL_VALUE);
+  return SUCCESS;
+}
+
+// private:
+
+simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept {
+  return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get());
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept {
+  auto start_index = next_tape_index(iter);
+  tape.append(start_index+2, start);
+  tape.append(start_index, end);
+  return SUCCESS;
+}
+
+simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept {
+  iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter);
+  iter.dom_parser.open_containers[iter.depth].count = 0;
+  tape.skip(); // We don't actually *write* the start element until the end.
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept {
+  // Write the ending tape element, pointing at the start location
+  const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index;
+  tape.append(start_tape_index, end);
+  // Write the start tape element, pointing at the end location (and including count)
+  // count can overflow if it exceeds 24 bits... so we saturate
+  // the convention being that a cnt of 0xffffff or more is undetermined in value (>=  0xffffff).
+  const uint32_t count = iter.dom_parser.open_containers[iter.depth].count;
+  const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count;
+  tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start);
+  return SUCCESS;
+}
+
+simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept {
+  // we advance the point, accounting for the fact that we have a NULL termination
+  tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING);
+  return current_string_buf_loc + sizeof(uint32_t);
+}
+
+simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept {
+  uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t)));
+  // TODO check for overflow in case someone has a crazy string (>=4GB?)
+  // But only add the overflow check when the document itself exceeds 4GB
+  // Currently unneeded because we refuse to parse docs larger or equal to 4GB.
+  memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t));
+  // NULL termination is still handy if you expect all your strings to
+  // be NULL terminated? It comes at a small cost
+  *dst = 0;
+  current_string_buf_loc = dst + 1;
+}
+
+} // namespace stage2
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdjson
+/* end file src/generic/stage2/tape_builder.h */
+
+//
+// Implementation-specific overrides
+//
+namespace simdjson {
+namespace haswell {
+namespace {
+namespace stage1 {
+
+simdjson_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) {
+  if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; }
+  return find_escaped_branchless(backslash);
+}
+
+} // namespace stage1
+} // unnamed namespace
+
+simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept {
+  return haswell::stage1::json_minifier::minify<128>(buf, len, dst, dst_len);
+}
+
+simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept {
+  this->buf = _buf;
+  this->len = _len;
+  return haswell::stage1::json_structural_indexer::index<128>(_buf, _len, *this, streaming);
+}
+
+simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
+  return haswell::stage1::generic_validate_utf8(buf,len);
+}
+
+simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept {
+  return stage2::tape_builder::parse_document<false>(*this, _doc);
+}
+
+simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept {
+  return stage2::tape_builder::parse_document<true>(*this, _doc);
+}
+
+simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst) const noexcept {
+  return haswell::stringparsing::parse_string(src, dst);
+}
+
+simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept {
+  auto error = stage1(_buf, _len, stage1_mode::regular);
+  if (error) { return error; }
+  return stage2(_doc);
+}
+
+} // namespace haswell
+} // namespace simdjson
+
+/* begin file include/simdjson/haswell/end.h */
+SIMDJSON_UNTARGET_HASWELL
+/* end file include/simdjson/haswell/end.h */
+/* end file src/haswell/dom_parser_implementation.cpp */
+#endif
+#if SIMDJSON_IMPLEMENTATION_PPC64
+/* begin file src/ppc64/implementation.cpp */
+/* begin file include/simdjson/ppc64/begin.h */
+// redefining SIMDJSON_IMPLEMENTATION to "ppc64"
+// #define SIMDJSON_IMPLEMENTATION ppc64
+/* end file include/simdjson/ppc64/begin.h */
+
+namespace simdjson {
+namespace ppc64 {
+
+simdjson_warn_unused error_code implementation::create_dom_parser_implementation(
+  size_t capacity,
+  size_t max_depth,
+  std::unique_ptr<internal::dom_parser_implementation>& dst
+) const noexcept {
+  dst.reset( new (std::nothrow) dom_parser_implementation() );
+  if (!dst) { return MEMALLOC; }
+  if (auto err = dst->set_capacity(capacity))
+    return err;
+  if (auto err = dst->set_max_depth(max_depth))
+    return err;
+  return SUCCESS;
+}
+
+} // namespace ppc64
+} // namespace simdjson
+
+/* begin file include/simdjson/ppc64/end.h */
+/* end file include/simdjson/ppc64/end.h */
+/* end file src/ppc64/implementation.cpp */
+/* begin file src/ppc64/dom_parser_implementation.cpp */
+/* begin file include/simdjson/ppc64/begin.h */
+// redefining SIMDJSON_IMPLEMENTATION to "ppc64"
+// #define SIMDJSON_IMPLEMENTATION ppc64
+/* end file include/simdjson/ppc64/begin.h */
+
+//
+// Stage 1
+//
+namespace simdjson {
+namespace ppc64 {
+namespace {
+
+using namespace simd;
+
+struct json_character_block {
+  static simdjson_inline json_character_block classify(const simd::simd8x64<uint8_t>& in);
+
+  simdjson_inline uint64_t whitespace() const noexcept { return _whitespace; }
+  simdjson_inline uint64_t op() const noexcept { return _op; }
+  simdjson_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); }
+
+  uint64_t _whitespace;
+  uint64_t _op;
+};
+
+simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64<uint8_t>& in) {
+  const simd8<uint8_t> table1(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0);
+  const simd8<uint8_t> table2(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0);
+
+  simd8x64<uint8_t> v(
+     (in.chunks[0] & 0xf).lookup_16(table1) & (in.chunks[0].shr<4>()).lookup_16(table2),
+     (in.chunks[1] & 0xf).lookup_16(table1) & (in.chunks[1].shr<4>()).lookup_16(table2),
+     (in.chunks[2] & 0xf).lookup_16(table1) & (in.chunks[2].shr<4>()).lookup_16(table2),
+     (in.chunks[3] & 0xf).lookup_16(table1) & (in.chunks[3].shr<4>()).lookup_16(table2)
+  );
+
+  uint64_t op = simd8x64<bool>(
+        v.chunks[0].any_bits_set(0x7),
+        v.chunks[1].any_bits_set(0x7),
+        v.chunks[2].any_bits_set(0x7),
+        v.chunks[3].any_bits_set(0x7)
+  ).to_bitmask();
+
+  uint64_t whitespace = simd8x64<bool>(
+        v.chunks[0].any_bits_set(0x18),
+        v.chunks[1].any_bits_set(0x18),
+        v.chunks[2].any_bits_set(0x18),
+        v.chunks[3].any_bits_set(0x18)
+  ).to_bitmask();
+
+  return { whitespace, op };
+}
+
+simdjson_inline bool is_ascii(const simd8x64<uint8_t>& input) {
+  // careful: 0x80 is not ascii.
+  return input.reduce_or().saturating_sub(0x7fu).bits_not_set_anywhere();
+}
+
+simdjson_unused simdjson_inline simd8<bool> must_be_continuation(const simd8<uint8_t> prev1, const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
+  simd8<uint8_t> is_second_byte = prev1.saturating_sub(0xc0u-1); // Only 11______ will be > 0
+  simd8<uint8_t> is_third_byte  = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0
+  simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0
+  // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine.
+  return simd8<int8_t>(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0);
+}
+
+simdjson_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
+  simd8<uint8_t> is_third_byte  = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0
+  simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0
+  // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine.
+  return simd8<int8_t>(is_third_byte | is_fourth_byte) > int8_t(0);
+}
+
+} // unnamed namespace
+} // namespace ppc64
+} // namespace simdjson
+
+/* begin file src/generic/stage1/utf8_lookup4_algorithm.h */
+namespace simdjson {
+namespace ppc64 {
+namespace {
+namespace utf8_validation {
+
+using namespace simd;
+
+  simdjson_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
+// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII)
+// Bit 1 = Too Long (ASCII followed by continuation)
+// Bit 2 = Overlong 3-byte
+// Bit 4 = Surrogate
+// Bit 5 = Overlong 2-byte
+// Bit 7 = Two Continuations
+    constexpr const uint8_t TOO_SHORT   = 1<<0; // 11______ 0_______
+                                                // 11______ 11______
+    constexpr const uint8_t TOO_LONG    = 1<<1; // 0_______ 10______
+    constexpr const uint8_t OVERLONG_3  = 1<<2; // 11100000 100_____
+    constexpr const uint8_t SURROGATE   = 1<<4; // 11101101 101_____
+    constexpr const uint8_t OVERLONG_2  = 1<<5; // 1100000_ 10______
+    constexpr const uint8_t TWO_CONTS   = 1<<7; // 10______ 10______
+    constexpr const uint8_t TOO_LARGE   = 1<<3; // 11110100 1001____
+                                                // 11110100 101_____
+                                                // 11110101 1001____
+                                                // 11110101 101_____
+                                                // 1111011_ 1001____
+                                                // 1111011_ 101_____
+                                                // 11111___ 1001____
+                                                // 11111___ 101_____
+    constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
+                                                // 11110101 1000____
+                                                // 1111011_ 1000____
+                                                // 11111___ 1000____
+    constexpr const uint8_t OVERLONG_4  = 1<<6; // 11110000 1000____
+
+    const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
+      // 0_______ ________ <ASCII in byte 1>
+      TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
+      TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
+      // 10______ ________ <continuation in byte 1>
+      TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS,
+      // 1100____ ________ <two byte lead in byte 1>
+      TOO_SHORT | OVERLONG_2,
+      // 1101____ ________ <two byte lead in byte 1>
+      TOO_SHORT,
+      // 1110____ ________ <three byte lead in byte 1>
+      TOO_SHORT | OVERLONG_3 | SURROGATE,
+      // 1111____ ________ <four+ byte lead in byte 1>
+      TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4
+    );
+    constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
+    const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
+      // ____0000 ________
+      CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4,
+      // ____0001 ________
+      CARRY | OVERLONG_2,
+      // ____001_ ________
+      CARRY,
+      CARRY,
+
+      // ____0100 ________
+      CARRY | TOO_LARGE,
+      // ____0101 ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      // ____011_ ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+
+      // ____1___ ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      // ____1101 ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000
+    );
+    const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
+      // ________ 0_______ <ASCII in byte 2>
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
+
+      // ________ 1000____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4,
+      // ________ 1001____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE,
+      // ________ 101_____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE  | TOO_LARGE,
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE  | TOO_LARGE,
+
+      // ________ 11______
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT
+    );
+    return (byte_1_high & byte_1_low & byte_2_high);
+  }
+  simdjson_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
+      const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
+    simd8<uint8_t> prev2 = input.prev<2>(prev_input);
+    simd8<uint8_t> prev3 = input.prev<3>(prev_input);
+    simd8<uint8_t> must23 = simd8<uint8_t>(must_be_2_3_continuation(prev2, prev3));
+    simd8<uint8_t> must23_80 = must23 & uint8_t(0x80);
+    return must23_80 ^ sc;
+  }
+
+  //
+  // Return nonzero if there are incomplete multibyte characters at the end of the block:
+  // e.g. if there is a 4-byte character, but it's 3 bytes from the end.
+  //
+  simdjson_inline simd8<uint8_t> is_incomplete(const simd8<uint8_t> input) {
+    // If the previous input's last 3 bytes match this, they're too short (they ended at EOF):
+    // ... 1111____ 111_____ 11______
+#if SIMDJSON_IMPLEMENTATION_ICELAKE
+    static const uint8_t max_array[64] = {
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1
+    };
+#else
+    static const uint8_t max_array[32] = {
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1
+    };
+#endif
+    const simd8<uint8_t> max_value(&max_array[sizeof(max_array)-sizeof(simd8<uint8_t>)]);
+    return input.gt_bits(max_value);
+  }
+
+  struct utf8_checker {
+    // If this is nonzero, there has been a UTF-8 error.
+    simd8<uint8_t> error;
+    // The last input we received
+    simd8<uint8_t> prev_input_block;
+    // Whether the last input we received was incomplete (used for ASCII fast path)
+    simd8<uint8_t> prev_incomplete;
+
+    //
+    // Check whether the current bytes are valid UTF-8.
+    //
+    simdjson_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
+      // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes
+      // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers)
+      simd8<uint8_t> prev1 = input.prev<1>(prev_input);
+      simd8<uint8_t> sc = check_special_cases(input, prev1);
+      this->error |= check_multibyte_lengths(input, prev_input, sc);
+    }
+
+    // The only problem that can happen at EOF is that a multibyte character is too short
+    // or a byte value too large in the last bytes: check_special_cases only checks for bytes
+    // too large in the first of two bytes.
+    simdjson_inline void check_eof() {
+      // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't
+      // possibly finish them.
+      this->error |= this->prev_incomplete;
+    }
+
+#ifndef SIMDJSON_IF_CONSTEXPR
+#if SIMDJSON_CPLUSPLUS17
+#define SIMDJSON_IF_CONSTEXPR if constexpr
+#else
+#define SIMDJSON_IF_CONSTEXPR if
+#endif
+#endif
+
+    simdjson_inline void check_next_input(const simd8x64<uint8_t>& input) {
+      if(simdjson_likely(is_ascii(input))) {
+        this->error |= this->prev_incomplete;
+      } else {
+        // you might think that a for-loop would work, but under Visual Studio, it is not good enough.
+        static_assert((simd8x64<uint8_t>::NUM_CHUNKS == 1)
+                ||(simd8x64<uint8_t>::NUM_CHUNKS == 2)
+                || (simd8x64<uint8_t>::NUM_CHUNKS == 4),
+                "We support one, two or four chunks per 64-byte block.");
+        SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 1) {
+          this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
+        } else SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 2) {
+          this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
+          this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+        } else SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 4) {
+          this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
+          this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+          this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
+          this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
+        }
+        this->prev_incomplete = is_incomplete(input.chunks[simd8x64<uint8_t>::NUM_CHUNKS-1]);
+        this->prev_input_block = input.chunks[simd8x64<uint8_t>::NUM_CHUNKS-1];
+      }
+    }
+    // do not forget to call check_eof!
+    simdjson_inline error_code errors() {
+      return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS;
+    }
+
+  }; // struct utf8_checker
+} // namespace utf8_validation
+
+using utf8_validation::utf8_checker;
+
+} // unnamed namespace
+} // namespace ppc64
+} // namespace simdjson
+/* end file src/generic/stage1/utf8_lookup4_algorithm.h */
+/* begin file src/generic/stage1/json_structural_indexer.h */
+// This file contains the common code every implementation uses in stage1
+// It is intended to be included multiple times and compiled multiple times
+// We assume the file in which it is included already includes
+// "simdjson/stage1.h" (this simplifies amalgation)
+
+/* begin file src/generic/stage1/buf_block_reader.h */
+namespace simdjson {
+namespace ppc64 {
+namespace {
+
+// Walks through a buffer in block-sized increments, loading the last part with spaces
+template<size_t STEP_SIZE>
+struct buf_block_reader {
+public:
+  simdjson_inline buf_block_reader(const uint8_t *_buf, size_t _len);
+  simdjson_inline size_t block_index();
+  simdjson_inline bool has_full_block() const;
+  simdjson_inline const uint8_t *full_block() const;
+  /**
+   * Get the last block, padded with spaces.
+   *
+   * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this
+   * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there
+   * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding.
+   *
+   * @return the number of effective characters in the last block.
+   */
+  simdjson_inline size_t get_remainder(uint8_t *dst) const;
+  simdjson_inline void advance();
+private:
+  const uint8_t *buf;
+  const size_t len;
+  const size_t lenminusstep;
+  size_t idx;
+};
+
+// Routines to print masks and text for debugging bitmask operations
+simdjson_unused static char * format_input_text_64(const uint8_t *text) {
+  static char buf[sizeof(simd8x64<uint8_t>) + 1];
+  for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
+    buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]);
+  }
+  buf[sizeof(simd8x64<uint8_t>)] = '\0';
+  return buf;
+}
+
+// Routines to print masks and text for debugging bitmask operations
+simdjson_unused static char * format_input_text(const simd8x64<uint8_t>& in) {
+  static char buf[sizeof(simd8x64<uint8_t>) + 1];
+  in.store(reinterpret_cast<uint8_t*>(buf));
+  for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
+    if (buf[i] < ' ') { buf[i] = '_'; }
+  }
+  buf[sizeof(simd8x64<uint8_t>)] = '\0';
+  return buf;
+}
+
+simdjson_unused static char * format_mask(uint64_t mask) {
+  static char buf[sizeof(simd8x64<uint8_t>) + 1];
+  for (size_t i=0; i<64; i++) {
+    buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' ';
+  }
+  buf[64] = '\0';
+  return buf;
+}
+
+template<size_t STEP_SIZE>
+simdjson_inline buf_block_reader<STEP_SIZE>::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {}
+
+template<size_t STEP_SIZE>
+simdjson_inline size_t buf_block_reader<STEP_SIZE>::block_index() { return idx; }
+
+template<size_t STEP_SIZE>
+simdjson_inline bool buf_block_reader<STEP_SIZE>::has_full_block() const {
+  return idx < lenminusstep;
+}
+
+template<size_t STEP_SIZE>
+simdjson_inline const uint8_t *buf_block_reader<STEP_SIZE>::full_block() const {
+  return &buf[idx];
+}
+
+template<size_t STEP_SIZE>
+simdjson_inline size_t buf_block_reader<STEP_SIZE>::get_remainder(uint8_t *dst) const {
+  if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers
+  std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once.
+  std::memcpy(dst, buf + idx, len - idx);
+  return len - idx;
+}
+
+template<size_t STEP_SIZE>
+simdjson_inline void buf_block_reader<STEP_SIZE>::advance() {
+  idx += STEP_SIZE;
+}
+
+} // unnamed namespace
+} // namespace ppc64
+} // namespace simdjson
+/* end file src/generic/stage1/buf_block_reader.h */
+/* begin file src/generic/stage1/json_string_scanner.h */
+namespace simdjson {
+namespace ppc64 {
+namespace {
+namespace stage1 {
+
+struct json_string_block {
+  // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017
+  simdjson_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) :
+  _backslash(backslash), _escaped(escaped), _quote(quote), _in_string(in_string) {}
+
+  // Escaped characters (characters following an escape() character)
+  simdjson_inline uint64_t escaped() const { return _escaped; }
+  // Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \)
+  simdjson_inline uint64_t escape() const { return _backslash & ~_escaped; }
+  // Real (non-backslashed) quotes
+  simdjson_inline uint64_t quote() const { return _quote; }
+  // Start quotes of strings
+  simdjson_inline uint64_t string_start() const { return _quote & _in_string; }
+  // End quotes of strings
+  simdjson_inline uint64_t string_end() const { return _quote & ~_in_string; }
+  // Only characters inside the string (not including the quotes)
+  simdjson_inline uint64_t string_content() const { return _in_string & ~_quote; }
+  // Return a mask of whether the given characters are inside a string (only works on non-quotes)
+  simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; }
+  // Return a mask of whether the given characters are inside a string (only works on non-quotes)
+  simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; }
+  // Tail of string (everything except the start quote)
+  simdjson_inline uint64_t string_tail() const { return _in_string ^ _quote; }
+
+  // backslash characters
+  uint64_t _backslash;
+  // escaped characters (backslashed--does not include the hex characters after \u)
+  uint64_t _escaped;
+  // real quotes (non-backslashed ones)
+  uint64_t _quote;
+  // string characters (includes start quote but not end quote)
+  uint64_t _in_string;
+};
+
+// Scans blocks for string characters, storing the state necessary to do so
+class json_string_scanner {
+public:
+  simdjson_inline json_string_block next(const simd::simd8x64<uint8_t>& in);
+  // Returns either UNCLOSED_STRING or SUCCESS
+  simdjson_inline error_code finish();
+
+private:
+  // Intended to be defined by the implementation
+  simdjson_inline uint64_t find_escaped(uint64_t escape);
+  simdjson_inline uint64_t find_escaped_branchless(uint64_t escape);
+
+  // Whether the last iteration was still inside a string (all 1's = true, all 0's = false).
+  uint64_t prev_in_string = 0ULL;
+  // Whether the first character of the next iteration is escaped.
+  uint64_t prev_escaped = 0ULL;
+};
+
+//
+// Finds escaped characters (characters following \).
+//
+// Handles runs of backslashes like \\\" and \\\\" correctly (yielding 0101 and 01010, respectively).
+//
+// Does this by:
+// - Shift the escape mask to get potentially escaped characters (characters after backslashes).
+// - Mask escaped sequences that start on *even* bits with 1010101010 (odd bits are escaped, even bits are not)
+// - Mask escaped sequences that start on *odd* bits with 0101010101 (even bits are escaped, odd bits are not)
+//
+// To distinguish between escaped sequences starting on even/odd bits, it finds the start of all
+// escape sequences, filters out the ones that start on even bits, and adds that to the mask of
+// escape sequences. This causes the addition to clear out the sequences starting on odd bits (since
+// the start bit causes a carry), and leaves even-bit sequences alone.
+//
+// Example:
+//
+// text           |  \\\ | \\\"\\\" \\\" \\"\\" |
+// escape         |  xxx |  xx xxx  xxx  xx xx  | Removed overflow backslash; will | it into follows_escape
+// odd_starts     |  x   |  x       x       x   | escape & ~even_bits & ~follows_escape
+// even_seq       |     c|    cxxx     c xx   c | c = carry bit -- will be masked out later
+// invert_mask    |      |     cxxx     c xx   c| even_seq << 1
+// follows_escape |   xx | x xx xxx  xxx  xx xx | Includes overflow bit
+// escaped        |   x  | x x  x x  x x  x  x  |
+// desired        |   x  | x x  x x  x x  x  x  |
+// text           |  \\\ | \\\"\\\" \\\" \\"\\" |
+//
+simdjson_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) {
+  // If there was overflow, pretend the first character isn't a backslash
+  backslash &= ~prev_escaped;
+  uint64_t follows_escape = backslash << 1 | prev_escaped;
+
+  // Get sequences starting on even bits by clearing out the odd series using +
+  const uint64_t even_bits = 0x5555555555555555ULL;
+  uint64_t odd_sequence_starts = backslash & ~even_bits & ~follows_escape;
+  uint64_t sequences_starting_on_even_bits;
+  prev_escaped = add_overflow(odd_sequence_starts, backslash, &sequences_starting_on_even_bits);
+  uint64_t invert_mask = sequences_starting_on_even_bits << 1; // The mask we want to return is the *escaped* bits, not escapes.
+
+  // Mask every other backslashed character as an escaped character
+  // Flip the mask for sequences that start on even bits, to correct them
+  return (even_bits ^ invert_mask) & follows_escape;
+}
+
+//
+// Return a mask of all string characters plus end quotes.
+//
+// prev_escaped is overflow saying whether the next character is escaped.
+// prev_in_string is overflow saying whether we're still in a string.
+//
+// Backslash sequences outside of quotes will be detected in stage 2.
+//
+simdjson_inline json_string_block json_string_scanner::next(const simd::simd8x64<uint8_t>& in) {
+  const uint64_t backslash = in.eq('\\');
+  const uint64_t escaped = find_escaped(backslash);
+  const uint64_t quote = in.eq('"') & ~escaped;
+
+  //
+  // prefix_xor flips on bits inside the string (and flips off the end quote).
+  //
+  // Then we xor with prev_in_string: if we were in a string already, its effect is flipped
+  // (characters inside strings are outside, and characters outside strings are inside).
+  //
+  const uint64_t in_string = prefix_xor(quote) ^ prev_in_string;
+
+  //
+  // Check if we're still in a string at the end of the box so the next block will know
+  //
+  // right shift of a signed value expected to be well-defined and standard
+  // compliant as of C++20, John Regher from Utah U. says this is fine code
+  //
+  prev_in_string = uint64_t(static_cast<int64_t>(in_string) >> 63);
+
+  // Use ^ to turn the beginning quote off, and the end quote on.
+
+  // We are returning a function-local object so either we get a move constructor
+  // or we get copy elision.
+  return json_string_block(
+    backslash,
+    escaped,
+    quote,
+    in_string
+  );
+}
+
+simdjson_inline error_code json_string_scanner::finish() {
+  if (prev_in_string) {
+    return UNCLOSED_STRING;
+  }
+  return SUCCESS;
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace ppc64
+} // namespace simdjson
+/* end file src/generic/stage1/json_string_scanner.h */
+/* begin file src/generic/stage1/json_scanner.h */
+namespace simdjson {
+namespace ppc64 {
+namespace {
+namespace stage1 {
+
+/**
+ * A block of scanned json, with information on operators and scalars.
+ *
+ * We seek to identify pseudo-structural characters. Anything that is inside
+ * a string must be omitted (hence  & ~_string.string_tail()).
+ * Otherwise, pseudo-structural characters come in two forms.
+ * 1. We have the structural characters ([,],{,},:, comma). The
+ *    term 'structural character' is from the JSON RFC.
+ * 2. We have the 'scalar pseudo-structural characters'.
+ *    Scalars are quotes, and any character except structural characters and white space.
+ *
+ * To identify the scalar pseudo-structural characters, we must look at what comes
+ * before them: it must be a space, a quote or a structural characters.
+ * Starting with simdjson v0.3, we identify them by
+ * negation: we identify everything that is followed by a non-quote scalar,
+ * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'.
+ */
+struct json_block {
+public:
+  // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017
+  simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) :
+  _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {}
+  simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) :
+  _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {}
+
+  /**
+   * The start of structurals.
+   * In simdjson prior to v0.3, these were called the pseudo-structural characters.
+   **/
+  simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); }
+  /** All JSON whitespace (i.e. not in a string) */
+  simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); }
+
+  // Helpers
+
+  /** Whether the given characters are inside a string (only works on non-quotes) */
+  simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); }
+  /** Whether the given characters are outside a string (only works on non-quotes) */
+  simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); }
+
+  // string and escape characters
+  json_string_block _string;
+  // whitespace, structural characters ('operators'), scalars
+  json_character_block _characters;
+  // whether the previous character was a scalar
+  uint64_t _follows_potential_nonquote_scalar;
+private:
+  // Potential structurals (i.e. disregarding strings)
+
+  /**
+   * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc".
+   * They may reside inside a string.
+   **/
+  simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); }
+  /**
+   * The start of non-operator runs, like 123, true and "abc".
+   * It main reside inside a string.
+   **/
+  simdjson_inline uint64_t potential_scalar_start() const noexcept {
+    // The term "scalar" refers to anything except structural characters and white space
+    // (so letters, numbers, quotes).
+    // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space
+    // then we know that it is irrelevant structurally.
+    return _characters.scalar() & ~follows_potential_scalar();
+  }
+  /**
+   * Whether the given character is immediately after a non-operator like 123, true.
+   * The characters following a quote are not included.
+   */
+  simdjson_inline uint64_t follows_potential_scalar() const noexcept {
+    // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character
+    // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a
+    // white space.
+    // It is understood that within quoted region, anything at all could be marked (irrelevant).
+    return _follows_potential_nonquote_scalar;
+  }
+};
+
+/**
+ * Scans JSON for important bits: structural characters or 'operators', strings, and scalars.
+ *
+ * The scanner starts by calculating two distinct things:
+ * - string characters (taking \" into account)
+ * - structural characters or 'operators' ([]{},:, comma)
+ *   and scalars (runs of non-operators like 123, true and "abc")
+ *
+ * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel:
+ * in particular, the operator/scalar bit will find plenty of things that are actually part of
+ * strings. When we're done, json_block will fuse the two together by masking out tokens that are
+ * part of a string.
+ */
+class json_scanner {
+public:
+  json_scanner() = default;
+  simdjson_inline json_block next(const simd::simd8x64<uint8_t>& in);
+  // Returns either UNCLOSED_STRING or SUCCESS
+  simdjson_inline error_code finish();
+
+private:
+  // Whether the last character of the previous iteration is part of a scalar token
+  // (anything except whitespace or a structural character/'operator').
+  uint64_t prev_scalar = 0ULL;
+  json_string_scanner string_scanner{};
+};
+
+
+//
+// Check if the current character immediately follows a matching character.
+//
+// For example, this checks for quotes with backslashes in front of them:
+//
+//     const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash);
+//
+simdjson_inline uint64_t follows(const uint64_t match, uint64_t &overflow) {
+  const uint64_t result = match << 1 | overflow;
+  overflow = match >> 63;
+  return result;
+}
+
+simdjson_inline json_block json_scanner::next(const simd::simd8x64<uint8_t>& in) {
+  json_string_block strings = string_scanner.next(in);
+  // identifies the white-space and the structural characters
+  json_character_block characters = json_character_block::classify(in);
+  // The term "scalar" refers to anything except structural characters and white space
+  // (so letters, numbers, quotes).
+  // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers).
+  //
+  // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon)
+  // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential
+  // pseudo-structural character just like we would if we had  ' "a string" true '; otherwise we
+  // may need to add an extra check when parsing strings.
+  //
+  // Performance: there are many ways to skin this cat.
+  const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote();
+  uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar);
+  // We are returning a function-local object so either we get a move constructor
+  // or we get copy elision.
+  return json_block(
+    strings,// strings is a function-local object so either it moves or the copy is elided.
+    characters,
+    follows_nonquote_scalar
+  );
+}
+
+simdjson_inline error_code json_scanner::finish() {
+  return string_scanner.finish();
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace ppc64
+} // namespace simdjson
+/* end file src/generic/stage1/json_scanner.h */
+/* begin file src/generic/stage1/json_minifier.h */
+// This file contains the common code every implementation uses in stage1
+// It is intended to be included multiple times and compiled multiple times
+// We assume the file in which it is included already includes
+// "simdjson/stage1.h" (this simplifies amalgation)
+
+namespace simdjson {
+namespace ppc64 {
+namespace {
+namespace stage1 {
+
+class json_minifier {
+public:
+  template<size_t STEP_SIZE>
+  static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept;
+
+private:
+  simdjson_inline json_minifier(uint8_t *_dst)
+  : dst{_dst}
+  {}
+  template<size_t STEP_SIZE>
+  simdjson_inline void step(const uint8_t *block_buf, buf_block_reader<STEP_SIZE> &reader) noexcept;
+  simdjson_inline void next(const simd::simd8x64<uint8_t>& in, const json_block& block);
+  simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len);
+  json_scanner scanner{};
+  uint8_t *dst;
+};
+
+simdjson_inline void json_minifier::next(const simd::simd8x64<uint8_t>& in, const json_block& block) {
+  uint64_t mask = block.whitespace();
+  dst += in.compress(mask, dst);
+}
+
+simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) {
+  error_code error = scanner.finish();
+  if (error) { dst_len = 0; return error; }
+  dst_len = dst - dst_start;
+  return SUCCESS;
+}
+
+template<>
+simdjson_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept {
+  simd::simd8x64<uint8_t> in_1(block_buf);
+  simd::simd8x64<uint8_t> in_2(block_buf+64);
+  json_block block_1 = scanner.next(in_1);
+  json_block block_2 = scanner.next(in_2);
+  this->next(in_1, block_1);
+  this->next(in_2, block_2);
+  reader.advance();
+}
+
+template<>
+simdjson_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept {
+  simd::simd8x64<uint8_t> in_1(block_buf);
+  json_block block_1 = scanner.next(in_1);
+  this->next(block_buf, block_1);
+  reader.advance();
+}
+
+template<size_t STEP_SIZE>
+error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept {
+  buf_block_reader<STEP_SIZE> reader(buf, len);
+  json_minifier minifier(dst);
+
+  // Index the first n-1 blocks
+  while (reader.has_full_block()) {
+    minifier.step<STEP_SIZE>(reader.full_block(), reader);
+  }
+
+  // Index the last (remainder) block, padded with spaces
+  uint8_t block[STEP_SIZE];
+  size_t remaining_bytes = reader.get_remainder(block);
+  if (remaining_bytes > 0) {
+    // We do not want to write directly to the output stream. Rather, we write
+    // to a local buffer (for safety).
+    uint8_t out_block[STEP_SIZE];
+    uint8_t * const guarded_dst{minifier.dst};
+    minifier.dst = out_block;
+    minifier.step<STEP_SIZE>(block, reader);
+    size_t to_write = minifier.dst - out_block;
+    // In some cases, we could be enticed to consider the padded spaces
+    // as part of the string. This is fine as long as we do not write more
+    // than we consumed.
+    if(to_write > remaining_bytes) { to_write = remaining_bytes; }
+    memcpy(guarded_dst, out_block, to_write);
+    minifier.dst = guarded_dst + to_write;
+  }
+  return minifier.finish(dst, dst_len);
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace ppc64
+} // namespace simdjson
+/* end file src/generic/stage1/json_minifier.h */
+/* begin file src/generic/stage1/find_next_document_index.h */
+namespace simdjson {
+namespace ppc64 {
+namespace {
+
+/**
+  * This algorithm is used to quickly identify the last structural position that
+  * makes up a complete document.
+  *
+  * It does this by going backwards and finding the last *document boundary* (a
+  * place where one value follows another without a comma between them). If the
+  * last document (the characters after the boundary) has an equal number of
+  * start and end brackets, it is considered complete.
+  *
+  * Simply put, we iterate over the structural characters, starting from
+  * the end. We consider that we found the end of a JSON document when the
+  * first element of the pair is NOT one of these characters: '{' '[' ':' ','
+  * and when the second element is NOT one of these characters: '}' ']' ':' ','.
+  *
+  * This simple comparison works most of the time, but it does not cover cases
+  * where the batch's structural indexes contain a perfect amount of documents.
+  * In such a case, we do not have access to the structural index which follows
+  * the last document, therefore, we do not have access to the second element in
+  * the pair, and that means we cannot identify the last document. To fix this
+  * issue, we keep a count of the open and closed curly/square braces we found
+  * while searching for the pair. When we find a pair AND the count of open and
+  * closed curly/square braces is the same, we know that we just passed a
+  * complete document, therefore the last json buffer location is the end of the
+  * batch.
+  */
+simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) {
+  // Variant: do not count separately, just figure out depth
+  if(parser.n_structural_indexes == 0) { return 0; }
+  auto arr_cnt = 0;
+  auto obj_cnt = 0;
+  for (auto i = parser.n_structural_indexes - 1; i > 0; i--) {
+    auto idxb = parser.structural_indexes[i];
+    switch (parser.buf[idxb]) {
+    case ':':
+    case ',':
+      continue;
+    case '}':
+      obj_cnt--;
+      continue;
+    case ']':
+      arr_cnt--;
+      continue;
+    case '{':
+      obj_cnt++;
+      break;
+    case '[':
+      arr_cnt++;
+      break;
+    }
+    auto idxa = parser.structural_indexes[i - 1];
+    switch (parser.buf[idxa]) {
+    case '{':
+    case '[':
+    case ':':
+    case ',':
+      continue;
+    }
+    // Last document is complete, so the next document will appear after!
+    if (!arr_cnt && !obj_cnt) {
+      return parser.n_structural_indexes;
+    }
+    // Last document is incomplete; mark the document at i + 1 as the next one
+    return i;
+  }
+  // If we made it to the end, we want to finish counting to see if we have a full document.
+  switch (parser.buf[parser.structural_indexes[0]]) {
+    case '}':
+      obj_cnt--;
+      break;
+    case ']':
+      arr_cnt--;
+      break;
+    case '{':
+      obj_cnt++;
+      break;
+    case '[':
+      arr_cnt++;
+      break;
+  }
+  if (!arr_cnt && !obj_cnt) {
+    // We have a complete document.
+    return parser.n_structural_indexes;
+  }
+  return 0;
+}
+
+} // unnamed namespace
+} // namespace ppc64
+} // namespace simdjson
+/* end file src/generic/stage1/find_next_document_index.h */
+
+namespace simdjson {
+namespace ppc64 {
+namespace {
+namespace stage1 {
+
+class bit_indexer {
+public:
+  uint32_t *tail;
+
+  simdjson_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {}
+
+  // flatten out values in 'bits' assuming that they are are to have values of idx
+  // plus their position in the bitvector, and store these indexes at
+  // base_ptr[base] incrementing base as we go
+  // will potentially store extra values beyond end of valid bits, so base_ptr
+  // needs to be large enough to handle this
+  //
+  // If the kernel sets SIMDJSON_CUSTOM_BIT_INDEXER, then it will provide its own
+  // version of the code.
+#ifdef SIMDJSON_CUSTOM_BIT_INDEXER
+  simdjson_inline void write(uint32_t idx, uint64_t bits);
+#else
+  simdjson_inline void write(uint32_t idx, uint64_t bits) {
+    // In some instances, the next branch is expensive because it is mispredicted.
+    // Unfortunately, in other cases,
+    // it helps tremendously.
+    if (bits == 0)
+        return;
+#if defined(SIMDJSON_PREFER_REVERSE_BITS)
+    /**
+     * ARM lacks a fast trailing zero instruction, but it has a fast
+     * bit reversal instruction and a fast leading zero instruction.
+     * Thus it may be profitable to reverse the bits (once) and then
+     * to rely on a sequence of instructions that call the leading
+     * zero instruction.
+     *
+     * Performance notes:
+     * The chosen routine is not optimal in terms of data dependency
+     * since zero_leading_bit might require two instructions. However,
+     * it tends to minimize the total number of instructions which is
+     * beneficial.
+     */
+
+    uint64_t rev_bits = reverse_bits(bits);
+    int cnt = static_cast<int>(count_ones(bits));
+    int i = 0;
+    // Do the first 8 all together
+    for (; i<8; i++) {
+      int lz = leading_zeroes(rev_bits);
+      this->tail[i] = static_cast<uint32_t>(idx) + lz;
+      rev_bits = zero_leading_bit(rev_bits, lz);
+    }
+    // Do the next 8 all together (we hope in most cases it won't happen at all
+    // and the branch is easily predicted).
+    if (simdjson_unlikely(cnt > 8)) {
+      i = 8;
+      for (; i<16; i++) {
+        int lz = leading_zeroes(rev_bits);
+        this->tail[i] = static_cast<uint32_t>(idx) + lz;
+        rev_bits = zero_leading_bit(rev_bits, lz);
+      }
+
+
+      // Most files don't have 16+ structurals per block, so we take several basically guaranteed
+      // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :)
+      // or the start of a value ("abc" true 123) every four characters.
+      if (simdjson_unlikely(cnt > 16)) {
+        i = 16;
+        while (rev_bits != 0) {
+          int lz = leading_zeroes(rev_bits);
+          this->tail[i++] = static_cast<uint32_t>(idx) + lz;
+          rev_bits = zero_leading_bit(rev_bits, lz);
+        }
+      }
+    }
+    this->tail += cnt;
+#else // SIMDJSON_PREFER_REVERSE_BITS
+    /**
+     * Under recent x64 systems, we often have both a fast trailing zero
+     * instruction and a fast 'clear-lower-bit' instruction so the following
+     * algorithm can be competitive.
+     */
+
+    int cnt = static_cast<int>(count_ones(bits));
+    // Do the first 8 all together
+    for (int i=0; i<8; i++) {
+      this->tail[i] = idx + trailing_zeroes(bits);
+      bits = clear_lowest_bit(bits);
+    }
+
+    // Do the next 8 all together (we hope in most cases it won't happen at all
+    // and the branch is easily predicted).
+    if (simdjson_unlikely(cnt > 8)) {
+      for (int i=8; i<16; i++) {
+        this->tail[i] = idx + trailing_zeroes(bits);
+        bits = clear_lowest_bit(bits);
+      }
+
+      // Most files don't have 16+ structurals per block, so we take several basically guaranteed
+      // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :)
+      // or the start of a value ("abc" true 123) every four characters.
+      if (simdjson_unlikely(cnt > 16)) {
+        int i = 16;
+        do {
+          this->tail[i] = idx + trailing_zeroes(bits);
+          bits = clear_lowest_bit(bits);
+          i++;
+        } while (i < cnt);
+      }
+    }
+
+    this->tail += cnt;
+#endif
+  }
+#endif // SIMDJSON_CUSTOM_BIT_INDEXER
+
+};
+
+class json_structural_indexer {
+public:
+  /**
+   * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes.
+   *
+   * @param partial Setting the partial parameter to true allows the find_structural_bits to
+   *   tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If
+   *   you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8.
+   */
+  template<size_t STEP_SIZE>
+  static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept;
+
+private:
+  simdjson_inline json_structural_indexer(uint32_t *structural_indexes);
+  template<size_t STEP_SIZE>
+  simdjson_inline void step(const uint8_t *block, buf_block_reader<STEP_SIZE> &reader) noexcept;
+  simdjson_inline void next(const simd::simd8x64<uint8_t>& in, const json_block& block, size_t idx);
+  simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial);
+
+  json_scanner scanner{};
+  utf8_checker checker{};
+  bit_indexer indexer;
+  uint64_t prev_structurals = 0;
+  uint64_t unescaped_chars_error = 0;
+};
+
+simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {}
+
+// Skip the last character if it is partial
+simdjson_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) {
+  if (simdjson_unlikely(len < 3)) {
+    switch (len) {
+      case 2:
+        if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left
+        if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 2 bytes left
+        return len;
+      case 1:
+        if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left
+        return len;
+      case 0:
+        return len;
+    }
+  }
+  if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left
+  if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 1 byte left
+  if (buf[len-3] >= 0xf0) { return len-3; } // 4-byte characters with only 3 bytes left
+  return len;
+}
+
+//
+// PERF NOTES:
+// We pipe 2 inputs through these stages:
+// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load
+//    2 inputs' worth at once so that by the time step 2 is looking for them input, it's available.
+// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path.
+//    The output of step 1 depends entirely on this information. These functions don't quite use
+//    up enough CPU: the second half of the functions is highly serial, only using 1 execution core
+//    at a time. The second input's scans has some dependency on the first ones finishing it, but
+//    they can make a lot of progress before they need that information.
+// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that
+//    to finish: utf-8 checks and generating the output from the last iteration.
+//
+// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all
+// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough
+// workout.
+//
+template<size_t STEP_SIZE>
+error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept {
+  if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; }
+  // We guard the rest of the code so that we can assume that len > 0 throughout.
+  if (len == 0) { return EMPTY; }
+  if (is_streaming(partial)) {
+    len = trim_partial_utf8(buf, len);
+    // If you end up with an empty window after trimming
+    // the partial UTF-8 bytes, then chances are good that you
+    // have an UTF-8 formatting error.
+    if(len == 0) { return UTF8_ERROR; }
+  }
+  buf_block_reader<STEP_SIZE> reader(buf, len);
+  json_structural_indexer indexer(parser.structural_indexes.get());
+
+  // Read all but the last block
+  while (reader.has_full_block()) {
+    indexer.step<STEP_SIZE>(reader.full_block(), reader);
+  }
+  // Take care of the last block (will always be there unless file is empty which is
+  // not supposed to happen.)
+  uint8_t block[STEP_SIZE];
+  if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; }
+  indexer.step<STEP_SIZE>(block, reader);
+  return indexer.finish(parser, reader.block_index(), len, partial);
+}
+
+template<>
+simdjson_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept {
+  simd::simd8x64<uint8_t> in_1(block);
+  simd::simd8x64<uint8_t> in_2(block+64);
+  json_block block_1 = scanner.next(in_1);
+  json_block block_2 = scanner.next(in_2);
+  this->next(in_1, block_1, reader.block_index());
+  this->next(in_2, block_2, reader.block_index()+64);
+  reader.advance();
+}
+
+template<>
+simdjson_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept {
+  simd::simd8x64<uint8_t> in_1(block);
+  json_block block_1 = scanner.next(in_1);
+  this->next(in_1, block_1, reader.block_index());
+  reader.advance();
+}
+
+simdjson_inline void json_structural_indexer::next(const simd::simd8x64<uint8_t>& in, const json_block& block, size_t idx) {
+  uint64_t unescaped = in.lteq(0x1F);
+  checker.check_next_input(in);
+  indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser
+  prev_structurals = block.structural_start();
+  unescaped_chars_error |= block.non_quote_inside_string(unescaped);
+}
+
+simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) {
+  // Write out the final iteration's structurals
+  indexer.write(uint32_t(idx-64), prev_structurals);
+  error_code error = scanner.finish();
+  // We deliberately break down the next expression so that it is
+  // human readable.
+  const bool should_we_exit = is_streaming(partial) ?
+    ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING
+    : (error != SUCCESS); // if partial is false, we must have SUCCESS
+  const bool have_unclosed_string = (error == UNCLOSED_STRING);
+  if (simdjson_unlikely(should_we_exit)) { return error; }
+
+  if (unescaped_chars_error) {
+    return UNESCAPED_CHARS;
+  }
+  parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get());
+  /***
+   * The On Demand API requires special padding.
+   *
+   * This is related to https://github.com/simdjson/simdjson/issues/906
+   * Basically, we want to make sure that if the parsing continues beyond the last (valid)
+   * structural character, it quickly stops.
+   * Only three structural characters can be repeated without triggering an error in JSON:  [,] and }.
+   * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing
+   * continues, then it must be [,] or }.
+   * Suppose it is ] or }. We backtrack to the first character, what could it be that would
+   * not trigger an error? It could be ] or } but no, because you can't start a document that way.
+   * It can't be a comma, a colon or any simple value. So the only way we could continue is
+   * if the repeated character is [. But if so, the document must start with [. But if the document
+   * starts with [, it should end with ]. If we enforce that rule, then we would get
+   * ][[ which is invalid.
+   *
+   * This is illustrated with the test array_iterate_unclosed_error() on the following input:
+   * R"({ "a": [,,)"
+   **/
+  parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final
+  parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len);
+  parser.structural_indexes[parser.n_structural_indexes + 2] = 0;
+  parser.next_structural_index = 0;
+  // a valid JSON file cannot have zero structural indexes - we should have found something
+  if (simdjson_unlikely(parser.n_structural_indexes == 0u)) {
+    return EMPTY;
+  }
+  if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) {
+    return UNEXPECTED_ERROR;
+  }
+  if (partial == stage1_mode::streaming_partial) {
+    // If we have an unclosed string, then the last structural
+    // will be the quote and we want to make sure to omit it.
+    if(have_unclosed_string) {
+      parser.n_structural_indexes--;
+      // a valid JSON file cannot have zero structural indexes - we should have found something
+      if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; }
+    }
+    // We truncate the input to the end of the last complete document (or zero).
+    auto new_structural_indexes = find_next_document_index(parser);
+    if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) {
+      if(parser.structural_indexes[0] == 0) {
+        // If the buffer is partial and we started at index 0 but the document is
+        // incomplete, it's too big to parse.
+        return CAPACITY;
+      } else {
+        // It is possible that the document could be parsed, we just had a lot
+        // of white space.
+        parser.n_structural_indexes = 0;
+        return EMPTY;
+      }
+    }
+
+    parser.n_structural_indexes = new_structural_indexes;
+  } else if (partial == stage1_mode::streaming_final) {
+    if(have_unclosed_string) { parser.n_structural_indexes--; }
+    // We truncate the input to the end of the last complete document (or zero).
+    // Because partial == stage1_mode::streaming_final, it means that we may
+    // silently ignore trailing garbage. Though it sounds bad, we do it
+    // deliberately because many people who have streams of JSON documents
+    // will truncate them for processing. E.g., imagine that you are uncompressing
+    // the data from a size file or receiving it in chunks from the network. You
+    // may not know where exactly the last document will be. Meanwhile the
+    // document_stream instances allow people to know the JSON documents they are
+    // parsing (see the iterator.source() method).
+    parser.n_structural_indexes = find_next_document_index(parser);
+    // We store the initial n_structural_indexes so that the client can see
+    // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes,
+    // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len,
+    // otherwise, it will copy some prior index.
+    parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes];
+    // This next line is critical, do not change it unless you understand what you are
+    // doing.
+    parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len);
+    if (simdjson_unlikely(parser.n_structural_indexes == 0u)) {
+        // We tolerate an unclosed string at the very end of the stream. Indeed, users
+        // often load their data in bulk without being careful and they want us to ignore
+        // the trailing garbage.
+        return EMPTY;
+    }
+  }
+  checker.check_eof();
+  return checker.errors();
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace ppc64
+} // namespace simdjson
+/* end file src/generic/stage1/json_structural_indexer.h */
+/* begin file src/generic/stage1/utf8_validator.h */
+namespace simdjson {
+namespace ppc64 {
+namespace {
+namespace stage1 {
+
+/**
+ * Validates that the string is actual UTF-8.
+ */
+template<class checker>
+bool generic_validate_utf8(const uint8_t * input, size_t length) {
+    checker c{};
+    buf_block_reader<64> reader(input, length);
+    while (reader.has_full_block()) {
+      simd::simd8x64<uint8_t> in(reader.full_block());
+      c.check_next_input(in);
+      reader.advance();
+    }
+    uint8_t block[64]{};
+    reader.get_remainder(block);
+    simd::simd8x64<uint8_t> in(block);
+    c.check_next_input(in);
+    reader.advance();
+    c.check_eof();
+    return c.errors() == error_code::SUCCESS;
+}
+
+bool generic_validate_utf8(const char * input, size_t length) {
+    return generic_validate_utf8<utf8_checker>(reinterpret_cast<const uint8_t *>(input),length);
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace ppc64
+} // namespace simdjson
+/* end file src/generic/stage1/utf8_validator.h */
+
+//
+// Stage 2
+//
+/* begin file src/generic/stage2/stringparsing.h */
+// This file contains the common code every implementation uses
+// It is intended to be included multiple times and compiled multiple times
+
+namespace simdjson {
+namespace ppc64 {
+namespace {
+/// @private
+namespace stringparsing {
+
+// begin copypasta
+// These chars yield themselves: " \ /
+// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab
+// u not handled in this table as it's complex
+static const uint8_t escape_map[256] = {
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0, // 0x0.
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0x22, 0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0x2f,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0, // 0x4.
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0x5c, 0, 0,    0, // 0x5.
+    0, 0, 0x08, 0, 0,    0, 0x0c, 0, 0, 0, 0, 0, 0,    0, 0x0a, 0, // 0x6.
+    0, 0, 0x0d, 0, 0x09, 0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0, // 0x7.
+
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+};
+
+// handle a unicode codepoint
+// write appropriate values into dest
+// src will advance 6 bytes or 12 bytes
+// dest will advance a variable amount (return via pointer)
+// return true if the unicode codepoint was valid
+// We work in little-endian then swap at write time
+simdjson_warn_unused
+simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr,
+                                            uint8_t **dst_ptr) {
+  // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the
+  // conversion isn't valid; we defer the check for this to inside the
+  // multilingual plane check
+  uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2);
+  *src_ptr += 6;
+
+  // If we found a high surrogate, we must
+  // check for low surrogate for characters
+  // outside the Basic
+  // Multilingual Plane.
+  if (code_point >= 0xd800 && code_point < 0xdc00) {
+    const uint8_t *src_data = *src_ptr;
+    /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */
+    if (((src_data[0] << 8) | src_data[1]) != ((static_cast<uint8_t> ('\\') << 8) | static_cast<uint8_t> ('u'))) {
+      return false;
+    }
+    uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2);
+
+    // We have already checked that the high surrogate is valid and
+    // (code_point - 0xd800) < 1024.
+    //
+    // Check that code_point_2 is in the range 0xdc00..0xdfff
+    // and that code_point_2 was parsed from valid hex.
+    uint32_t low_bit = code_point_2 - 0xdc00;
+    if (low_bit >> 10) {
+      return false;
+    }
+
+    code_point =
+        (((code_point - 0xd800) << 10) | low_bit) + 0x10000;
+    *src_ptr += 6;
+  } else if (code_point >= 0xdc00 && code_point <= 0xdfff) {
+      // If we encounter a low surrogate (not preceded by a high surrogate)
+      // then we have an error.
+      return false;
+  }
+  size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr);
+  *dst_ptr += offset;
+  return offset > 0;
+}
+
+/**
+ * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There
+ * must be an unescaped quote terminating the string. It returns the final output
+ * position as pointer. In case of error (e.g., the string has bad escaped codes),
+ * then null_nullptrptr is returned. It is assumed that the output buffer is large
+ * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes +
+ * SIMDJSON_PADDING bytes.
+ */
+simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) {
+  while (1) {
+    // Copy the next n bytes, and find the backslash and quote in them.
+    auto bs_quote = backslash_and_quote::copy_and_find(src, dst);
+    // If the next thing is the end quote, copy and return
+    if (bs_quote.has_quote_first()) {
+      // we encountered quotes first. Move dst to point to quotes and exit
+      return dst + bs_quote.quote_index();
+    }
+    if (bs_quote.has_backslash()) {
+      /* find out where the backspace is */
+      auto bs_dist = bs_quote.backslash_index();
+      uint8_t escape_char = src[bs_dist + 1];
+      /* we encountered backslash first. Handle backslash */
+      if (escape_char == 'u') {
+        /* move src/dst up to the start; they will be further adjusted
+           within the unicode codepoint handling code. */
+        src += bs_dist;
+        dst += bs_dist;
+        if (!handle_unicode_codepoint(&src, &dst)) {
+          return nullptr;
+        }
+      } else {
+        /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and
+         * write bs_dist+1 characters to output
+         * note this may reach beyond the part of the buffer we've actually
+         * seen. I think this is ok */
+        uint8_t escape_result = escape_map[escape_char];
+        if (escape_result == 0u) {
+          return nullptr; /* bogus escape value is an error */
+        }
+        dst[bs_dist] = escape_result;
+        src += bs_dist + 2;
+        dst += bs_dist + 1;
+      }
+    } else {
+      /* they are the same. Since they can't co-occur, it means we
+       * encountered neither. */
+      src += backslash_and_quote::BYTES_PROCESSED;
+      dst += backslash_and_quote::BYTES_PROCESSED;
+    }
+  }
+  /* can't be reached */
+  return nullptr;
+}
+
+} // namespace stringparsing
+} // unnamed namespace
+} // namespace ppc64
+} // namespace simdjson
+/* end file src/generic/stage2/stringparsing.h */
+/* begin file src/generic/stage2/tape_builder.h */
+/* begin file src/generic/stage2/json_iterator.h */
+/* begin file src/generic/stage2/logger.h */
+// This is for an internal-only stage 2 specific logger.
+// Set LOG_ENABLED = true to log what stage 2 is doing!
+namespace simdjson {
+namespace ppc64 {
+namespace {
+namespace logger {
+
+  static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------";
+
+#if SIMDJSON_VERBOSE_LOGGING
+  static constexpr const bool LOG_ENABLED = true;
+#else
+  static constexpr const bool LOG_ENABLED = false;
+#endif
+  static constexpr const int LOG_EVENT_LEN = 20;
+  static constexpr const int LOG_BUFFER_LEN = 30;
+  static constexpr const int LOG_SMALL_BUFFER_LEN = 10;
+  static constexpr const int LOG_INDEX_LEN = 5;
+
+  static int log_depth; // Not threadsafe. Log only.
+
+  // Helper to turn unprintable or newline characters into spaces
+  static simdjson_inline char printable_char(char c) {
+    if (c >= 0x20) {
+      return c;
+    } else {
+      return ' ';
+    }
+  }
+
+  // Print the header and set up log_start
+  static simdjson_inline void log_start() {
+    if (LOG_ENABLED) {
+      log_depth = 0;
+      printf("\n");
+      printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#");
+      printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES);
+    }
+  }
+
+  simdjson_unused static simdjson_inline void log_string(const char *message) {
+    if (LOG_ENABLED) {
+      printf("%s\n", message);
+    }
+  }
+
+  // Logs a single line from the stage 2 DOM parser
+  template<typename S>
+  static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) {
+    if (LOG_ENABLED) {
+      printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title);
+      auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1;
+      auto next_index = structurals.next_structural;
+      auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast<const uint8_t*>("                                                       ");
+      auto next = &structurals.buf[*next_index];
+      {
+        // Print the next N characters in the buffer.
+        printf("| ");
+        // Otherwise, print the characters starting from the buffer position.
+        // Print spaces for unprintable or newline characters.
+        for (int i=0;i<LOG_BUFFER_LEN;i++) {
+          printf("%c", printable_char(current[i]));
+        }
+        printf(" ");
+        // Print the next N characters in the buffer.
+        printf("| ");
+        // Otherwise, print the characters starting from the buffer position.
+        // Print spaces for unprintable or newline characters.
+        for (int i=0;i<LOG_SMALL_BUFFER_LEN;i++) {
+          printf("%c", printable_char(next[i]));
+        }
+        printf(" ");
+      }
+      if (current_index) {
+        printf("| %*u ", LOG_INDEX_LEN, *current_index);
+      } else {
+        printf("| %-*s ", LOG_INDEX_LEN, "");
+      }
+      // printf("| %*u ", LOG_INDEX_LEN, structurals.next_tape_index());
+      printf("| %-s ", detail);
+      printf("|\n");
+    }
+  }
+
+} // namespace logger
+} // unnamed namespace
+} // namespace ppc64
+} // namespace simdjson
+/* end file src/generic/stage2/logger.h */
+
+namespace simdjson {
+namespace ppc64 {
+namespace {
+namespace stage2 {
+
+class json_iterator {
+public:
+  const uint8_t* const buf;
+  uint32_t *next_structural;
+  dom_parser_implementation &dom_parser;
+  uint32_t depth{0};
+
+  /**
+   * Walk the JSON document.
+   *
+   * The visitor receives callbacks when values are encountered. All callbacks pass the iterator as
+   * the first parameter; some callbacks have other parameters as well:
+   *
+   * - visit_document_start() - at the beginning.
+   * - visit_document_end() - at the end (if things were successful).
+   *
+   * - visit_array_start() - at the start `[` of a non-empty array.
+   * - visit_array_end() - at the end `]` of a non-empty array.
+   * - visit_empty_array() - when an empty array is encountered.
+   *
+   * - visit_object_end() - at the start `]` of a non-empty object.
+   * - visit_object_start() - at the end `]` of a non-empty object.
+   * - visit_empty_object() - when an empty object is encountered.
+   * - visit_key(const uint8_t *key) - when a key in an object field is encountered. key is
+   *                                   guaranteed to point at the first quote of the string (`"key"`).
+   * - visit_primitive(const uint8_t *value) - when a value is a string, number, boolean or null.
+   * - visit_root_primitive(iter, uint8_t *value) - when the top-level value is a string, number, boolean or null.
+   *
+   * - increment_count(iter) - each time a value is found in an array or object.
+   */
+  template<bool STREAMING, typename V>
+  simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept;
+
+  /**
+   * Create an iterator capable of walking a JSON document.
+   *
+   * The document must have already passed through stage 1.
+   */
+  simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index);
+
+  /**
+   * Look at the next token.
+   *
+   * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)).
+   *
+   * They may include invalid JSON as well (such as `1.2.3` or `ture`).
+   */
+  simdjson_inline const uint8_t *peek() const noexcept;
+  /**
+   * Advance to the next token.
+   *
+   * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)).
+   *
+   * They may include invalid JSON as well (such as `1.2.3` or `ture`).
+   */
+  simdjson_inline const uint8_t *advance() noexcept;
+  /**
+   * Get the remaining length of the document, from the start of the current token.
+   */
+  simdjson_inline size_t remaining_len() const noexcept;
+  /**
+   * Check if we are at the end of the document.
+   *
+   * If this is true, there are no more tokens.
+   */
+  simdjson_inline bool at_eof() const noexcept;
+  /**
+   * Check if we are at the beginning of the document.
+   */
+  simdjson_inline bool at_beginning() const noexcept;
+  simdjson_inline uint8_t last_structural() const noexcept;
+
+  /**
+   * Log that a value has been found.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_value(const char *type) const noexcept;
+  /**
+   * Log the start of a multipart value.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_start_value(const char *type) const noexcept;
+  /**
+   * Log the end of a multipart value.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_end_value(const char *type) const noexcept;
+  /**
+   * Log an error.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_error(const char *error) const noexcept;
+
+  template<typename V>
+  simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept;
+  template<typename V>
+  simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept;
+};
+
+template<bool STREAMING, typename V>
+simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept {
+  logger::log_start();
+
+  //
+  // Start the document
+  //
+  if (at_eof()) { return EMPTY; }
+  log_start_value("document");
+  SIMDJSON_TRY( visitor.visit_document_start(*this) );
+
+  //
+  // Read first value
+  //
+  {
+    auto value = advance();
+
+    // Make sure the outer object or array is closed before continuing; otherwise, there are ways we
+    // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906
+    if (!STREAMING) {
+      switch (*value) {
+        case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break;
+        case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break;
+      }
+    }
+
+    switch (*value) {
+      case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin;
+      case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin;
+      default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break;
+    }
+  }
+  goto document_end;
+
+//
+// Object parser states
+//
+object_begin:
+  log_start_value("object");
+  depth++;
+  if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; }
+  dom_parser.is_array[depth] = false;
+  SIMDJSON_TRY( visitor.visit_object_start(*this) );
+
+  {
+    auto key = advance();
+    if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; }
+    SIMDJSON_TRY( visitor.increment_count(*this) );
+    SIMDJSON_TRY( visitor.visit_key(*this, key) );
+  }
+
+object_field:
+  if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; }
+  {
+    auto value = advance();
+    switch (*value) {
+      case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin;
+      case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin;
+      default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break;
+    }
+  }
+
+object_continue:
+  switch (*advance()) {
+    case ',':
+      SIMDJSON_TRY( visitor.increment_count(*this) );
+      {
+        auto key = advance();
+        if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; }
+        SIMDJSON_TRY( visitor.visit_key(*this, key) );
+      }
+      goto object_field;
+    case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end;
+    default: log_error("No comma between object fields"); return TAPE_ERROR;
+  }
+
+scope_end:
+  depth--;
+  if (depth == 0) { goto document_end; }
+  if (dom_parser.is_array[depth]) { goto array_continue; }
+  goto object_continue;
+
+//
+// Array parser states
+//
+array_begin:
+  log_start_value("array");
+  depth++;
+  if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; }
+  dom_parser.is_array[depth] = true;
+  SIMDJSON_TRY( visitor.visit_array_start(*this) );
+  SIMDJSON_TRY( visitor.increment_count(*this) );
+
+array_value:
+  {
+    auto value = advance();
+    switch (*value) {
+      case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin;
+      case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin;
+      default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break;
+    }
+  }
+
+array_continue:
+  switch (*advance()) {
+    case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value;
+    case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end;
+    default: log_error("Missing comma between array values"); return TAPE_ERROR;
+  }
+
+document_end:
+  log_end_value("document");
+  SIMDJSON_TRY( visitor.visit_document_end(*this) );
+
+  dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]);
+
+  // If we didn't make it to the end, it's an error
+  if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) {
+    log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!");
+    return TAPE_ERROR;
+  }
+
+  return SUCCESS;
+
+} // walk_document()
+
+simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index)
+  : buf{_dom_parser.buf},
+    next_structural{&_dom_parser.structural_indexes[start_structural_index]},
+    dom_parser{_dom_parser} {
+}
+
+simdjson_inline const uint8_t *json_iterator::peek() const noexcept {
+  return &buf[*(next_structural)];
+}
+simdjson_inline const uint8_t *json_iterator::advance() noexcept {
+  return &buf[*(next_structural++)];
+}
+simdjson_inline size_t json_iterator::remaining_len() const noexcept {
+  return dom_parser.len - *(next_structural-1);
+}
+
+simdjson_inline bool json_iterator::at_eof() const noexcept {
+  return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes];
+}
+simdjson_inline bool json_iterator::at_beginning() const noexcept {
+  return next_structural == dom_parser.structural_indexes.get();
+}
+simdjson_inline uint8_t json_iterator::last_structural() const noexcept {
+  return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]];
+}
+
+simdjson_inline void json_iterator::log_value(const char *type) const noexcept {
+  logger::log_line(*this, "", type, "");
+}
+
+simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept {
+  logger::log_line(*this, "+", type, "");
+  if (logger::LOG_ENABLED) { logger::log_depth++; }
+}
+
+simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept {
+  if (logger::LOG_ENABLED) { logger::log_depth--; }
+  logger::log_line(*this, "-", type, "");
+}
+
+simdjson_inline void json_iterator::log_error(const char *error) const noexcept {
+  logger::log_line(*this, "", "ERROR", error);
+}
+
+template<typename V>
+simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept {
+  switch (*value) {
+    case '"': return visitor.visit_root_string(*this, value);
+    case 't': return visitor.visit_root_true_atom(*this, value);
+    case 'f': return visitor.visit_root_false_atom(*this, value);
+    case 'n': return visitor.visit_root_null_atom(*this, value);
+    case '-':
+    case '0': case '1': case '2': case '3': case '4':
+    case '5': case '6': case '7': case '8': case '9':
+      return visitor.visit_root_number(*this, value);
+    default:
+      log_error("Document starts with a non-value character");
+      return TAPE_ERROR;
+  }
+}
+template<typename V>
+simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept {
+  switch (*value) {
+    case '"': return visitor.visit_string(*this, value);
+    case 't': return visitor.visit_true_atom(*this, value);
+    case 'f': return visitor.visit_false_atom(*this, value);
+    case 'n': return visitor.visit_null_atom(*this, value);
+    case '-':
+    case '0': case '1': case '2': case '3': case '4':
+    case '5': case '6': case '7': case '8': case '9':
+      return visitor.visit_number(*this, value);
+    default:
+      log_error("Non-value found when value was expected!");
+      return TAPE_ERROR;
+  }
+}
+
+} // namespace stage2
+} // unnamed namespace
+} // namespace ppc64
+} // namespace simdjson
+/* end file src/generic/stage2/json_iterator.h */
+/* begin file src/generic/stage2/tape_writer.h */
+namespace simdjson {
+namespace ppc64 {
+namespace {
+namespace stage2 {
+
+struct tape_writer {
+  /** The next place to write to tape */
+  uint64_t *next_tape_loc;
+
+  /** Write a signed 64-bit value to tape. */
+  simdjson_inline void append_s64(int64_t value) noexcept;
+
+  /** Write an unsigned 64-bit value to tape. */
+  simdjson_inline void append_u64(uint64_t value) noexcept;
+
+  /** Write a double value to tape. */
+  simdjson_inline void append_double(double value) noexcept;
+
+  /**
+   * Append a tape entry (an 8-bit type,and 56 bits worth of value).
+   */
+  simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept;
+
+  /**
+   * Skip the current tape entry without writing.
+   *
+   * Used to skip the start of the container, since we'll come back later to fill it in when the
+   * container ends.
+   */
+  simdjson_inline void skip() noexcept;
+
+  /**
+   * Skip the number of tape entries necessary to write a large u64 or i64.
+   */
+  simdjson_inline void skip_large_integer() noexcept;
+
+  /**
+   * Skip the number of tape entries necessary to write a double.
+   */
+  simdjson_inline void skip_double() noexcept;
+
+  /**
+   * Write a value to a known location on tape.
+   *
+   * Used to go back and write out the start of a container after the container ends.
+   */
+  simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept;
+
+private:
+  /**
+   * Append both the tape entry, and a supplementary value following it. Used for types that need
+   * all 64 bits, such as double and uint64_t.
+   */
+  template<typename T>
+  simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept;
+}; // struct number_writer
+
+simdjson_inline void tape_writer::append_s64(int64_t value) noexcept {
+  append2(0, value, internal::tape_type::INT64);
+}
+
+simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept {
+  append(0, internal::tape_type::UINT64);
+  *next_tape_loc = value;
+  next_tape_loc++;
+}
+
+/** Write a double value to tape. */
+simdjson_inline void tape_writer::append_double(double value) noexcept {
+  append2(0, value, internal::tape_type::DOUBLE);
+}
+
+simdjson_inline void tape_writer::skip() noexcept {
+  next_tape_loc++;
+}
+
+simdjson_inline void tape_writer::skip_large_integer() noexcept {
+  next_tape_loc += 2;
+}
+
+simdjson_inline void tape_writer::skip_double() noexcept {
+  next_tape_loc += 2;
+}
+
+simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept {
+  *next_tape_loc = val | ((uint64_t(char(t))) << 56);
+  next_tape_loc++;
+}
+
+template<typename T>
+simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept {
+  append(val, t);
+  static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!");
+  memcpy(next_tape_loc, &val2, sizeof(val2));
+  next_tape_loc++;
+}
+
+simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept {
+  tape_loc = val | ((uint64_t(char(t))) << 56);
+}
+
+} // namespace stage2
+} // unnamed namespace
+} // namespace ppc64
+} // namespace simdjson
+/* end file src/generic/stage2/tape_writer.h */
+
+namespace simdjson {
+namespace ppc64 {
+namespace {
+namespace stage2 {
+
+struct tape_builder {
+  template<bool STREAMING>
+  simdjson_warn_unused static simdjson_inline error_code parse_document(
+    dom_parser_implementation &dom_parser,
+    dom::document &doc) noexcept;
+
+  /** Called when a non-empty document starts. */
+  simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept;
+  /** Called when a non-empty document ends without error. */
+  simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept;
+
+  /** Called when a non-empty array starts. */
+  simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept;
+  /** Called when a non-empty array ends. */
+  simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept;
+  /** Called when an empty array is found. */
+  simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept;
+
+  /** Called when a non-empty object starts. */
+  simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept;
+  /**
+   * Called when a key in a field is encountered.
+   *
+   * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array
+   * will be called after this with the field value.
+   */
+  simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept;
+  /** Called when a non-empty object ends. */
+  simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept;
+  /** Called when an empty object is found. */
+  simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept;
+
+  /**
+   * Called when a string, number, boolean or null is found.
+   */
+  simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept;
+  /**
+   * Called when a string, number, boolean or null is found at the top level of a document (i.e.
+   * when there is no array or object and the entire document is a single string, number, boolean or
+   * null.
+   *
+   * This is separate from primitive() because simdjson's normal primitive parsing routines assume
+   * there is at least one more token after the value, which is only true in an array or object.
+   */
+  simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept;
+
+  simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept;
+
+  simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept;
+
+  /** Called each time a new field or element in an array or object is found. */
+  simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept;
+
+  /** Next location to write to tape */
+  tape_writer tape;
+private:
+  /** Next write location in the string buf for stage 2 parsing */
+  uint8_t *current_string_buf_loc;
+
+  simdjson_inline tape_builder(dom::document &doc) noexcept;
+
+  simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept;
+  simdjson_inline void start_container(json_iterator &iter) noexcept;
+  simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept;
+  simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept;
+  simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept;
+  simdjson_inline void on_end_string(uint8_t *dst) noexcept;
+}; // class tape_builder
+
+template<bool STREAMING>
+simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document(
+    dom_parser_implementation &dom_parser,
+    dom::document &doc) noexcept {
+  dom_parser.doc = &doc;
+  json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0);
+  tape_builder builder(doc);
+  return iter.walk_document<STREAMING>(builder);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept {
+  return iter.visit_root_primitive(*this, value);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept {
+  return iter.visit_primitive(*this, value);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept {
+  return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept {
+  return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept {
+  start_container(iter);
+  return SUCCESS;
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept {
+  start_container(iter);
+  return SUCCESS;
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept {
+  start_container(iter);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept {
+  return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept {
+  return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept {
+  constexpr uint32_t start_tape_index = 0;
+  tape.append(start_tape_index, internal::tape_type::ROOT);
+  tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT);
+  return SUCCESS;
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept {
+  return visit_string(iter, key, true);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept {
+  iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1
+  return SUCCESS;
+}
+
+simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept {
+  iter.log_value(key ? "key" : "string");
+  uint8_t *dst = on_start_string(iter);
+  dst = stringparsing::parse_string(value+1, dst);
+  if (dst == nullptr) {
+    iter.log_error("Invalid escape in string");
+    return STRING_ERROR;
+  }
+  on_end_string(dst);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept {
+  return visit_string(iter, value);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("number");
+  return numberparsing::parse_number(value, tape);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept {
+  //
+  // We need to make a copy to make sure that the string is space terminated.
+  // This is not about padding the input, which should already padded up
+  // to len + SIMDJSON_PADDING. However, we have no control at this stage
+  // on how the padding was done. What if the input string was padded with nulls?
+  // It is quite common for an input string to have an extra null character (C string).
+  // We do not want to allow 9\0 (where \0 is the null character) inside a JSON
+  // document, but the string "9\0" by itself is fine. So we make a copy and
+  // pad the input with spaces when we know that there is just one input element.
+  // This copy is relatively expensive, but it will almost never be called in
+  // practice unless you are in the strange scenario where you have many JSON
+  // documents made of single atoms.
+  //
+  std::unique_ptr<uint8_t[]>copy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]);
+  if (copy.get() == nullptr) { return MEMALLOC; }
+  std::memcpy(copy.get(), value, iter.remaining_len());
+  std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING);
+  error_code error = visit_number(iter, copy.get());
+  return error;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("true");
+  if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::TRUE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("true");
+  if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::TRUE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("false");
+  if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::FALSE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("false");
+  if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::FALSE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("null");
+  if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::NULL_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("null");
+  if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::NULL_VALUE);
+  return SUCCESS;
+}
+
+// private:
+
+simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept {
+  return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get());
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept {
+  auto start_index = next_tape_index(iter);
+  tape.append(start_index+2, start);
+  tape.append(start_index, end);
+  return SUCCESS;
+}
+
+simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept {
+  iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter);
+  iter.dom_parser.open_containers[iter.depth].count = 0;
+  tape.skip(); // We don't actually *write* the start element until the end.
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept {
+  // Write the ending tape element, pointing at the start location
+  const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index;
+  tape.append(start_tape_index, end);
+  // Write the start tape element, pointing at the end location (and including count)
+  // count can overflow if it exceeds 24 bits... so we saturate
+  // the convention being that a cnt of 0xffffff or more is undetermined in value (>=  0xffffff).
+  const uint32_t count = iter.dom_parser.open_containers[iter.depth].count;
+  const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count;
+  tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start);
+  return SUCCESS;
+}
+
+simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept {
+  // we advance the point, accounting for the fact that we have a NULL termination
+  tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING);
+  return current_string_buf_loc + sizeof(uint32_t);
+}
+
+simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept {
+  uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t)));
+  // TODO check for overflow in case someone has a crazy string (>=4GB?)
+  // But only add the overflow check when the document itself exceeds 4GB
+  // Currently unneeded because we refuse to parse docs larger or equal to 4GB.
+  memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t));
+  // NULL termination is still handy if you expect all your strings to
+  // be NULL terminated? It comes at a small cost
+  *dst = 0;
+  current_string_buf_loc = dst + 1;
+}
+
+} // namespace stage2
+} // unnamed namespace
+} // namespace ppc64
+} // namespace simdjson
+/* end file src/generic/stage2/tape_builder.h */
+
+//
+// Implementation-specific overrides
+//
+namespace simdjson {
+namespace ppc64 {
+namespace {
+namespace stage1 {
+
+simdjson_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) {
+  // On PPC, we don't short-circuit this if there are no backslashes, because the branch gives us no
+  // benefit and therefore makes things worse.
+  // if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; }
+  return find_escaped_branchless(backslash);
+}
+
+} // namespace stage1
+} // unnamed namespace
+
+simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept {
+  return ppc64::stage1::json_minifier::minify<64>(buf, len, dst, dst_len);
+}
+
+simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept {
+  this->buf = _buf;
+  this->len = _len;
+  return ppc64::stage1::json_structural_indexer::index<64>(buf, len, *this, streaming);
+}
+
+simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
+  return ppc64::stage1::generic_validate_utf8(buf,len);
+}
+
+simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept {
+  return stage2::tape_builder::parse_document<false>(*this, _doc);
+}
+
+simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept {
+  return stage2::tape_builder::parse_document<true>(*this, _doc);
+}
+
+simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst) const noexcept {
+  return ppc64::stringparsing::parse_string(src, dst);
+}
+
+simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept {
+  auto error = stage1(_buf, _len, stage1_mode::regular);
+  if (error) { return error; }
+  return stage2(_doc);
+}
+
+} // namespace ppc64
+} // namespace simdjson
+
+/* begin file include/simdjson/ppc64/end.h */
+/* end file include/simdjson/ppc64/end.h */
+/* end file src/ppc64/dom_parser_implementation.cpp */
+#endif
+#if SIMDJSON_IMPLEMENTATION_WESTMERE
+/* begin file src/westmere/implementation.cpp */
+/* begin file include/simdjson/westmere/begin.h */
+// redefining SIMDJSON_IMPLEMENTATION to "westmere"
+// #define SIMDJSON_IMPLEMENTATION westmere
+SIMDJSON_TARGET_WESTMERE
+/* end file include/simdjson/westmere/begin.h */
+
+namespace simdjson {
+namespace westmere {
+
+simdjson_warn_unused error_code implementation::create_dom_parser_implementation(
+  size_t capacity,
+  size_t max_depth,
+  std::unique_ptr<internal::dom_parser_implementation>& dst
+) const noexcept {
+  dst.reset( new (std::nothrow) dom_parser_implementation() );
+  if (!dst) { return MEMALLOC; }
+  if (auto err = dst->set_capacity(capacity))
+    return err;
+  if (auto err = dst->set_max_depth(max_depth))
+    return err;
+  return SUCCESS;
+}
+
+} // namespace westmere
+} // namespace simdjson
+
+/* begin file include/simdjson/westmere/end.h */
+SIMDJSON_UNTARGET_WESTMERE
+/* end file include/simdjson/westmere/end.h */
+/* end file src/westmere/implementation.cpp */
+/* begin file src/westmere/dom_parser_implementation.cpp */
+/* begin file include/simdjson/westmere/begin.h */
+// redefining SIMDJSON_IMPLEMENTATION to "westmere"
+// #define SIMDJSON_IMPLEMENTATION westmere
+SIMDJSON_TARGET_WESTMERE
+/* end file include/simdjson/westmere/begin.h */
+
+//
+// Stage 1
+//
+
+namespace simdjson {
+namespace westmere {
+namespace {
+
+using namespace simd;
+
+struct json_character_block {
+  static simdjson_inline json_character_block classify(const simd::simd8x64<uint8_t>& in);
+
+  simdjson_inline uint64_t whitespace() const noexcept { return _whitespace; }
+  simdjson_inline uint64_t op() const noexcept { return _op; }
+  simdjson_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); }
+
+  uint64_t _whitespace;
+  uint64_t _op;
+};
+
+simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64<uint8_t>& in) {
+  // These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why
+  // we can't use the generic lookup_16.
+  auto whitespace_table = simd8<uint8_t>::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100);
+
+  // The 6 operators (:,[]{}) have these values:
+  //
+  // , 2C
+  // : 3A
+  // [ 5B
+  // { 7B
+  // ] 5D
+  // } 7D
+  //
+  // If you use | 0x20 to turn [ and ] into { and }, the lower 4 bits of each character is unique.
+  // We exploit this, using a simd 4-bit lookup to tell us which character match against, and then
+  // match it (against | 0x20).
+  //
+  // To prevent recognizing other characters, everything else gets compared with 0, which cannot
+  // match due to the | 0x20.
+  //
+  // NOTE: Due to the | 0x20, this ALSO treats <FF> and <SUB> (control characters 0C and 1A) like ,
+  // and :. This gets caught in stage 2, which checks the actual character to ensure the right
+  // operators are in the right places.
+  const auto op_table = simd8<uint8_t>::repeat_16(
+    0, 0, 0, 0,
+    0, 0, 0, 0,
+    0, 0, ':', '{', // : = 3A, [ = 5B, { = 7B
+    ',', '}', 0, 0  // , = 2C, ] = 5D, } = 7D
+  );
+
+  // We compute whitespace and op separately. If the code later only use one or the
+  // other, given the fact that all functions are aggressively inlined, we can
+  // hope that useless computations will be omitted. This is namely case when
+  // minifying (we only need whitespace).
+
+
+  const uint64_t whitespace = in.eq({
+    _mm_shuffle_epi8(whitespace_table, in.chunks[0]),
+    _mm_shuffle_epi8(whitespace_table, in.chunks[1]),
+    _mm_shuffle_epi8(whitespace_table, in.chunks[2]),
+    _mm_shuffle_epi8(whitespace_table, in.chunks[3])
+  });
+  // Turn [ and ] into { and }
+  const simd8x64<uint8_t> curlified{
+    in.chunks[0] | 0x20,
+    in.chunks[1] | 0x20,
+    in.chunks[2] | 0x20,
+    in.chunks[3] | 0x20
+  };
+  const uint64_t op = curlified.eq({
+    _mm_shuffle_epi8(op_table, in.chunks[0]),
+    _mm_shuffle_epi8(op_table, in.chunks[1]),
+    _mm_shuffle_epi8(op_table, in.chunks[2]),
+    _mm_shuffle_epi8(op_table, in.chunks[3])
+  });
+    return { whitespace, op };
+}
+
+simdjson_inline bool is_ascii(const simd8x64<uint8_t>& input) {
+  return input.reduce_or().is_ascii();
+}
+
+simdjson_unused simdjson_inline simd8<bool> must_be_continuation(const simd8<uint8_t> prev1, const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
+  simd8<uint8_t> is_second_byte = prev1.saturating_sub(0xc0u-1); // Only 11______ will be > 0
+  simd8<uint8_t> is_third_byte  = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0
+  simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0
+  // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine.
+  return simd8<int8_t>(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0);
+}
+
+simdjson_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
+  simd8<uint8_t> is_third_byte  = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0
+  simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0
+  // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine.
+  return simd8<int8_t>(is_third_byte | is_fourth_byte) > int8_t(0);
+}
+
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdjson
+
+/* begin file src/generic/stage1/utf8_lookup4_algorithm.h */
+namespace simdjson {
+namespace westmere {
+namespace {
+namespace utf8_validation {
+
+using namespace simd;
+
+  simdjson_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
+// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII)
+// Bit 1 = Too Long (ASCII followed by continuation)
+// Bit 2 = Overlong 3-byte
+// Bit 4 = Surrogate
+// Bit 5 = Overlong 2-byte
+// Bit 7 = Two Continuations
+    constexpr const uint8_t TOO_SHORT   = 1<<0; // 11______ 0_______
+                                                // 11______ 11______
+    constexpr const uint8_t TOO_LONG    = 1<<1; // 0_______ 10______
+    constexpr const uint8_t OVERLONG_3  = 1<<2; // 11100000 100_____
+    constexpr const uint8_t SURROGATE   = 1<<4; // 11101101 101_____
+    constexpr const uint8_t OVERLONG_2  = 1<<5; // 1100000_ 10______
+    constexpr const uint8_t TWO_CONTS   = 1<<7; // 10______ 10______
+    constexpr const uint8_t TOO_LARGE   = 1<<3; // 11110100 1001____
+                                                // 11110100 101_____
+                                                // 11110101 1001____
+                                                // 11110101 101_____
+                                                // 1111011_ 1001____
+                                                // 1111011_ 101_____
+                                                // 11111___ 1001____
+                                                // 11111___ 101_____
+    constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
+                                                // 11110101 1000____
+                                                // 1111011_ 1000____
+                                                // 11111___ 1000____
+    constexpr const uint8_t OVERLONG_4  = 1<<6; // 11110000 1000____
+
+    const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
+      // 0_______ ________ <ASCII in byte 1>
+      TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
+      TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
+      // 10______ ________ <continuation in byte 1>
+      TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS,
+      // 1100____ ________ <two byte lead in byte 1>
+      TOO_SHORT | OVERLONG_2,
+      // 1101____ ________ <two byte lead in byte 1>
+      TOO_SHORT,
+      // 1110____ ________ <three byte lead in byte 1>
+      TOO_SHORT | OVERLONG_3 | SURROGATE,
+      // 1111____ ________ <four+ byte lead in byte 1>
+      TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4
+    );
+    constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
+    const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
+      // ____0000 ________
+      CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4,
+      // ____0001 ________
+      CARRY | OVERLONG_2,
+      // ____001_ ________
+      CARRY,
+      CARRY,
+
+      // ____0100 ________
+      CARRY | TOO_LARGE,
+      // ____0101 ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      // ____011_ ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+
+      // ____1___ ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      // ____1101 ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000
+    );
+    const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
+      // ________ 0_______ <ASCII in byte 2>
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
+
+      // ________ 1000____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4,
+      // ________ 1001____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE,
+      // ________ 101_____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE  | TOO_LARGE,
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE  | TOO_LARGE,
+
+      // ________ 11______
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT
+    );
+    return (byte_1_high & byte_1_low & byte_2_high);
+  }
+  simdjson_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
+      const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
+    simd8<uint8_t> prev2 = input.prev<2>(prev_input);
+    simd8<uint8_t> prev3 = input.prev<3>(prev_input);
+    simd8<uint8_t> must23 = simd8<uint8_t>(must_be_2_3_continuation(prev2, prev3));
+    simd8<uint8_t> must23_80 = must23 & uint8_t(0x80);
+    return must23_80 ^ sc;
+  }
+
+  //
+  // Return nonzero if there are incomplete multibyte characters at the end of the block:
+  // e.g. if there is a 4-byte character, but it's 3 bytes from the end.
+  //
+  simdjson_inline simd8<uint8_t> is_incomplete(const simd8<uint8_t> input) {
+    // If the previous input's last 3 bytes match this, they're too short (they ended at EOF):
+    // ... 1111____ 111_____ 11______
+#if SIMDJSON_IMPLEMENTATION_ICELAKE
+    static const uint8_t max_array[64] = {
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1
+    };
+#else
+    static const uint8_t max_array[32] = {
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1
+    };
+#endif
+    const simd8<uint8_t> max_value(&max_array[sizeof(max_array)-sizeof(simd8<uint8_t>)]);
+    return input.gt_bits(max_value);
+  }
+
+  struct utf8_checker {
+    // If this is nonzero, there has been a UTF-8 error.
+    simd8<uint8_t> error;
+    // The last input we received
+    simd8<uint8_t> prev_input_block;
+    // Whether the last input we received was incomplete (used for ASCII fast path)
+    simd8<uint8_t> prev_incomplete;
+
+    //
+    // Check whether the current bytes are valid UTF-8.
+    //
+    simdjson_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
+      // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes
+      // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers)
+      simd8<uint8_t> prev1 = input.prev<1>(prev_input);
+      simd8<uint8_t> sc = check_special_cases(input, prev1);
+      this->error |= check_multibyte_lengths(input, prev_input, sc);
+    }
+
+    // The only problem that can happen at EOF is that a multibyte character is too short
+    // or a byte value too large in the last bytes: check_special_cases only checks for bytes
+    // too large in the first of two bytes.
+    simdjson_inline void check_eof() {
+      // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't
+      // possibly finish them.
+      this->error |= this->prev_incomplete;
+    }
+
+#ifndef SIMDJSON_IF_CONSTEXPR
+#if SIMDJSON_CPLUSPLUS17
+#define SIMDJSON_IF_CONSTEXPR if constexpr
+#else
+#define SIMDJSON_IF_CONSTEXPR if
+#endif
+#endif
+
+    simdjson_inline void check_next_input(const simd8x64<uint8_t>& input) {
+      if(simdjson_likely(is_ascii(input))) {
+        this->error |= this->prev_incomplete;
+      } else {
+        // you might think that a for-loop would work, but under Visual Studio, it is not good enough.
+        static_assert((simd8x64<uint8_t>::NUM_CHUNKS == 1)
+                ||(simd8x64<uint8_t>::NUM_CHUNKS == 2)
+                || (simd8x64<uint8_t>::NUM_CHUNKS == 4),
+                "We support one, two or four chunks per 64-byte block.");
+        SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 1) {
+          this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
+        } else SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 2) {
+          this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
+          this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+        } else SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 4) {
+          this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
+          this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+          this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
+          this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
+        }
+        this->prev_incomplete = is_incomplete(input.chunks[simd8x64<uint8_t>::NUM_CHUNKS-1]);
+        this->prev_input_block = input.chunks[simd8x64<uint8_t>::NUM_CHUNKS-1];
+      }
+    }
+    // do not forget to call check_eof!
+    simdjson_inline error_code errors() {
+      return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS;
+    }
+
+  }; // struct utf8_checker
+} // namespace utf8_validation
+
+using utf8_validation::utf8_checker;
+
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdjson
+/* end file src/generic/stage1/utf8_lookup4_algorithm.h */
+/* begin file src/generic/stage1/json_structural_indexer.h */
+// This file contains the common code every implementation uses in stage1
+// It is intended to be included multiple times and compiled multiple times
+// We assume the file in which it is included already includes
+// "simdjson/stage1.h" (this simplifies amalgation)
+
+/* begin file src/generic/stage1/buf_block_reader.h */
+namespace simdjson {
+namespace westmere {
+namespace {
+
+// Walks through a buffer in block-sized increments, loading the last part with spaces
+template<size_t STEP_SIZE>
+struct buf_block_reader {
+public:
+  simdjson_inline buf_block_reader(const uint8_t *_buf, size_t _len);
+  simdjson_inline size_t block_index();
+  simdjson_inline bool has_full_block() const;
+  simdjson_inline const uint8_t *full_block() const;
+  /**
+   * Get the last block, padded with spaces.
+   *
+   * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this
+   * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there
+   * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding.
+   *
+   * @return the number of effective characters in the last block.
+   */
+  simdjson_inline size_t get_remainder(uint8_t *dst) const;
+  simdjson_inline void advance();
+private:
+  const uint8_t *buf;
+  const size_t len;
+  const size_t lenminusstep;
+  size_t idx;
+};
+
+// Routines to print masks and text for debugging bitmask operations
+simdjson_unused static char * format_input_text_64(const uint8_t *text) {
+  static char buf[sizeof(simd8x64<uint8_t>) + 1];
+  for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
+    buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]);
+  }
+  buf[sizeof(simd8x64<uint8_t>)] = '\0';
+  return buf;
+}
+
+// Routines to print masks and text for debugging bitmask operations
+simdjson_unused static char * format_input_text(const simd8x64<uint8_t>& in) {
+  static char buf[sizeof(simd8x64<uint8_t>) + 1];
+  in.store(reinterpret_cast<uint8_t*>(buf));
+  for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
+    if (buf[i] < ' ') { buf[i] = '_'; }
+  }
+  buf[sizeof(simd8x64<uint8_t>)] = '\0';
+  return buf;
+}
+
+simdjson_unused static char * format_mask(uint64_t mask) {
+  static char buf[sizeof(simd8x64<uint8_t>) + 1];
+  for (size_t i=0; i<64; i++) {
+    buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' ';
+  }
+  buf[64] = '\0';
+  return buf;
+}
+
+template<size_t STEP_SIZE>
+simdjson_inline buf_block_reader<STEP_SIZE>::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {}
+
+template<size_t STEP_SIZE>
+simdjson_inline size_t buf_block_reader<STEP_SIZE>::block_index() { return idx; }
+
+template<size_t STEP_SIZE>
+simdjson_inline bool buf_block_reader<STEP_SIZE>::has_full_block() const {
+  return idx < lenminusstep;
+}
+
+template<size_t STEP_SIZE>
+simdjson_inline const uint8_t *buf_block_reader<STEP_SIZE>::full_block() const {
+  return &buf[idx];
+}
+
+template<size_t STEP_SIZE>
+simdjson_inline size_t buf_block_reader<STEP_SIZE>::get_remainder(uint8_t *dst) const {
+  if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers
+  std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once.
+  std::memcpy(dst, buf + idx, len - idx);
+  return len - idx;
+}
+
+template<size_t STEP_SIZE>
+simdjson_inline void buf_block_reader<STEP_SIZE>::advance() {
+  idx += STEP_SIZE;
+}
+
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdjson
+/* end file src/generic/stage1/buf_block_reader.h */
+/* begin file src/generic/stage1/json_string_scanner.h */
+namespace simdjson {
+namespace westmere {
+namespace {
+namespace stage1 {
+
+struct json_string_block {
+  // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017
+  simdjson_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) :
+  _backslash(backslash), _escaped(escaped), _quote(quote), _in_string(in_string) {}
+
+  // Escaped characters (characters following an escape() character)
+  simdjson_inline uint64_t escaped() const { return _escaped; }
+  // Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \)
+  simdjson_inline uint64_t escape() const { return _backslash & ~_escaped; }
+  // Real (non-backslashed) quotes
+  simdjson_inline uint64_t quote() const { return _quote; }
+  // Start quotes of strings
+  simdjson_inline uint64_t string_start() const { return _quote & _in_string; }
+  // End quotes of strings
+  simdjson_inline uint64_t string_end() const { return _quote & ~_in_string; }
+  // Only characters inside the string (not including the quotes)
+  simdjson_inline uint64_t string_content() const { return _in_string & ~_quote; }
+  // Return a mask of whether the given characters are inside a string (only works on non-quotes)
+  simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; }
+  // Return a mask of whether the given characters are inside a string (only works on non-quotes)
+  simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; }
+  // Tail of string (everything except the start quote)
+  simdjson_inline uint64_t string_tail() const { return _in_string ^ _quote; }
+
+  // backslash characters
+  uint64_t _backslash;
+  // escaped characters (backslashed--does not include the hex characters after \u)
+  uint64_t _escaped;
+  // real quotes (non-backslashed ones)
+  uint64_t _quote;
+  // string characters (includes start quote but not end quote)
+  uint64_t _in_string;
+};
+
+// Scans blocks for string characters, storing the state necessary to do so
+class json_string_scanner {
+public:
+  simdjson_inline json_string_block next(const simd::simd8x64<uint8_t>& in);
+  // Returns either UNCLOSED_STRING or SUCCESS
+  simdjson_inline error_code finish();
+
+private:
+  // Intended to be defined by the implementation
+  simdjson_inline uint64_t find_escaped(uint64_t escape);
+  simdjson_inline uint64_t find_escaped_branchless(uint64_t escape);
+
+  // Whether the last iteration was still inside a string (all 1's = true, all 0's = false).
+  uint64_t prev_in_string = 0ULL;
+  // Whether the first character of the next iteration is escaped.
+  uint64_t prev_escaped = 0ULL;
+};
+
+//
+// Finds escaped characters (characters following \).
+//
+// Handles runs of backslashes like \\\" and \\\\" correctly (yielding 0101 and 01010, respectively).
+//
+// Does this by:
+// - Shift the escape mask to get potentially escaped characters (characters after backslashes).
+// - Mask escaped sequences that start on *even* bits with 1010101010 (odd bits are escaped, even bits are not)
+// - Mask escaped sequences that start on *odd* bits with 0101010101 (even bits are escaped, odd bits are not)
+//
+// To distinguish between escaped sequences starting on even/odd bits, it finds the start of all
+// escape sequences, filters out the ones that start on even bits, and adds that to the mask of
+// escape sequences. This causes the addition to clear out the sequences starting on odd bits (since
+// the start bit causes a carry), and leaves even-bit sequences alone.
+//
+// Example:
+//
+// text           |  \\\ | \\\"\\\" \\\" \\"\\" |
+// escape         |  xxx |  xx xxx  xxx  xx xx  | Removed overflow backslash; will | it into follows_escape
+// odd_starts     |  x   |  x       x       x   | escape & ~even_bits & ~follows_escape
+// even_seq       |     c|    cxxx     c xx   c | c = carry bit -- will be masked out later
+// invert_mask    |      |     cxxx     c xx   c| even_seq << 1
+// follows_escape |   xx | x xx xxx  xxx  xx xx | Includes overflow bit
+// escaped        |   x  | x x  x x  x x  x  x  |
+// desired        |   x  | x x  x x  x x  x  x  |
+// text           |  \\\ | \\\"\\\" \\\" \\"\\" |
+//
+simdjson_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) {
+  // If there was overflow, pretend the first character isn't a backslash
+  backslash &= ~prev_escaped;
+  uint64_t follows_escape = backslash << 1 | prev_escaped;
+
+  // Get sequences starting on even bits by clearing out the odd series using +
+  const uint64_t even_bits = 0x5555555555555555ULL;
+  uint64_t odd_sequence_starts = backslash & ~even_bits & ~follows_escape;
+  uint64_t sequences_starting_on_even_bits;
+  prev_escaped = add_overflow(odd_sequence_starts, backslash, &sequences_starting_on_even_bits);
+  uint64_t invert_mask = sequences_starting_on_even_bits << 1; // The mask we want to return is the *escaped* bits, not escapes.
+
+  // Mask every other backslashed character as an escaped character
+  // Flip the mask for sequences that start on even bits, to correct them
+  return (even_bits ^ invert_mask) & follows_escape;
+}
+
+//
+// Return a mask of all string characters plus end quotes.
+//
+// prev_escaped is overflow saying whether the next character is escaped.
+// prev_in_string is overflow saying whether we're still in a string.
+//
+// Backslash sequences outside of quotes will be detected in stage 2.
+//
+simdjson_inline json_string_block json_string_scanner::next(const simd::simd8x64<uint8_t>& in) {
+  const uint64_t backslash = in.eq('\\');
+  const uint64_t escaped = find_escaped(backslash);
+  const uint64_t quote = in.eq('"') & ~escaped;
+
+  //
+  // prefix_xor flips on bits inside the string (and flips off the end quote).
+  //
+  // Then we xor with prev_in_string: if we were in a string already, its effect is flipped
+  // (characters inside strings are outside, and characters outside strings are inside).
+  //
+  const uint64_t in_string = prefix_xor(quote) ^ prev_in_string;
+
+  //
+  // Check if we're still in a string at the end of the box so the next block will know
+  //
+  // right shift of a signed value expected to be well-defined and standard
+  // compliant as of C++20, John Regher from Utah U. says this is fine code
+  //
+  prev_in_string = uint64_t(static_cast<int64_t>(in_string) >> 63);
+
+  // Use ^ to turn the beginning quote off, and the end quote on.
+
+  // We are returning a function-local object so either we get a move constructor
+  // or we get copy elision.
+  return json_string_block(
+    backslash,
+    escaped,
+    quote,
+    in_string
+  );
+}
+
+simdjson_inline error_code json_string_scanner::finish() {
+  if (prev_in_string) {
+    return UNCLOSED_STRING;
+  }
+  return SUCCESS;
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdjson
+/* end file src/generic/stage1/json_string_scanner.h */
+/* begin file src/generic/stage1/json_scanner.h */
+namespace simdjson {
+namespace westmere {
+namespace {
+namespace stage1 {
+
+/**
+ * A block of scanned json, with information on operators and scalars.
+ *
+ * We seek to identify pseudo-structural characters. Anything that is inside
+ * a string must be omitted (hence  & ~_string.string_tail()).
+ * Otherwise, pseudo-structural characters come in two forms.
+ * 1. We have the structural characters ([,],{,},:, comma). The
+ *    term 'structural character' is from the JSON RFC.
+ * 2. We have the 'scalar pseudo-structural characters'.
+ *    Scalars are quotes, and any character except structural characters and white space.
+ *
+ * To identify the scalar pseudo-structural characters, we must look at what comes
+ * before them: it must be a space, a quote or a structural characters.
+ * Starting with simdjson v0.3, we identify them by
+ * negation: we identify everything that is followed by a non-quote scalar,
+ * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'.
+ */
+struct json_block {
+public:
+  // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017
+  simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) :
+  _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {}
+  simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) :
+  _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {}
+
+  /**
+   * The start of structurals.
+   * In simdjson prior to v0.3, these were called the pseudo-structural characters.
+   **/
+  simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); }
+  /** All JSON whitespace (i.e. not in a string) */
+  simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); }
+
+  // Helpers
+
+  /** Whether the given characters are inside a string (only works on non-quotes) */
+  simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); }
+  /** Whether the given characters are outside a string (only works on non-quotes) */
+  simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); }
+
+  // string and escape characters
+  json_string_block _string;
+  // whitespace, structural characters ('operators'), scalars
+  json_character_block _characters;
+  // whether the previous character was a scalar
+  uint64_t _follows_potential_nonquote_scalar;
+private:
+  // Potential structurals (i.e. disregarding strings)
+
+  /**
+   * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc".
+   * They may reside inside a string.
+   **/
+  simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); }
+  /**
+   * The start of non-operator runs, like 123, true and "abc".
+   * It main reside inside a string.
+   **/
+  simdjson_inline uint64_t potential_scalar_start() const noexcept {
+    // The term "scalar" refers to anything except structural characters and white space
+    // (so letters, numbers, quotes).
+    // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space
+    // then we know that it is irrelevant structurally.
+    return _characters.scalar() & ~follows_potential_scalar();
+  }
+  /**
+   * Whether the given character is immediately after a non-operator like 123, true.
+   * The characters following a quote are not included.
+   */
+  simdjson_inline uint64_t follows_potential_scalar() const noexcept {
+    // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character
+    // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a
+    // white space.
+    // It is understood that within quoted region, anything at all could be marked (irrelevant).
+    return _follows_potential_nonquote_scalar;
+  }
+};
+
+/**
+ * Scans JSON for important bits: structural characters or 'operators', strings, and scalars.
+ *
+ * The scanner starts by calculating two distinct things:
+ * - string characters (taking \" into account)
+ * - structural characters or 'operators' ([]{},:, comma)
+ *   and scalars (runs of non-operators like 123, true and "abc")
+ *
+ * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel:
+ * in particular, the operator/scalar bit will find plenty of things that are actually part of
+ * strings. When we're done, json_block will fuse the two together by masking out tokens that are
+ * part of a string.
+ */
+class json_scanner {
+public:
+  json_scanner() = default;
+  simdjson_inline json_block next(const simd::simd8x64<uint8_t>& in);
+  // Returns either UNCLOSED_STRING or SUCCESS
+  simdjson_inline error_code finish();
+
+private:
+  // Whether the last character of the previous iteration is part of a scalar token
+  // (anything except whitespace or a structural character/'operator').
+  uint64_t prev_scalar = 0ULL;
+  json_string_scanner string_scanner{};
+};
+
+
+//
+// Check if the current character immediately follows a matching character.
+//
+// For example, this checks for quotes with backslashes in front of them:
+//
+//     const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash);
+//
+simdjson_inline uint64_t follows(const uint64_t match, uint64_t &overflow) {
+  const uint64_t result = match << 1 | overflow;
+  overflow = match >> 63;
+  return result;
+}
+
+simdjson_inline json_block json_scanner::next(const simd::simd8x64<uint8_t>& in) {
+  json_string_block strings = string_scanner.next(in);
+  // identifies the white-space and the structural characters
+  json_character_block characters = json_character_block::classify(in);
+  // The term "scalar" refers to anything except structural characters and white space
+  // (so letters, numbers, quotes).
+  // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers).
+  //
+  // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon)
+  // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential
+  // pseudo-structural character just like we would if we had  ' "a string" true '; otherwise we
+  // may need to add an extra check when parsing strings.
+  //
+  // Performance: there are many ways to skin this cat.
+  const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote();
+  uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar);
+  // We are returning a function-local object so either we get a move constructor
+  // or we get copy elision.
+  return json_block(
+    strings,// strings is a function-local object so either it moves or the copy is elided.
+    characters,
+    follows_nonquote_scalar
+  );
+}
+
+simdjson_inline error_code json_scanner::finish() {
+  return string_scanner.finish();
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdjson
+/* end file src/generic/stage1/json_scanner.h */
+/* begin file src/generic/stage1/json_minifier.h */
+// This file contains the common code every implementation uses in stage1
+// It is intended to be included multiple times and compiled multiple times
+// We assume the file in which it is included already includes
+// "simdjson/stage1.h" (this simplifies amalgation)
+
+namespace simdjson {
+namespace westmere {
+namespace {
+namespace stage1 {
+
+class json_minifier {
+public:
+  template<size_t STEP_SIZE>
+  static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept;
+
+private:
+  simdjson_inline json_minifier(uint8_t *_dst)
+  : dst{_dst}
+  {}
+  template<size_t STEP_SIZE>
+  simdjson_inline void step(const uint8_t *block_buf, buf_block_reader<STEP_SIZE> &reader) noexcept;
+  simdjson_inline void next(const simd::simd8x64<uint8_t>& in, const json_block& block);
+  simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len);
+  json_scanner scanner{};
+  uint8_t *dst;
+};
+
+simdjson_inline void json_minifier::next(const simd::simd8x64<uint8_t>& in, const json_block& block) {
+  uint64_t mask = block.whitespace();
+  dst += in.compress(mask, dst);
+}
+
+simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) {
+  error_code error = scanner.finish();
+  if (error) { dst_len = 0; return error; }
+  dst_len = dst - dst_start;
+  return SUCCESS;
+}
+
+template<>
+simdjson_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept {
+  simd::simd8x64<uint8_t> in_1(block_buf);
+  simd::simd8x64<uint8_t> in_2(block_buf+64);
+  json_block block_1 = scanner.next(in_1);
+  json_block block_2 = scanner.next(in_2);
+  this->next(in_1, block_1);
+  this->next(in_2, block_2);
+  reader.advance();
+}
+
+template<>
+simdjson_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept {
+  simd::simd8x64<uint8_t> in_1(block_buf);
+  json_block block_1 = scanner.next(in_1);
+  this->next(block_buf, block_1);
+  reader.advance();
+}
+
+template<size_t STEP_SIZE>
+error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept {
+  buf_block_reader<STEP_SIZE> reader(buf, len);
+  json_minifier minifier(dst);
+
+  // Index the first n-1 blocks
+  while (reader.has_full_block()) {
+    minifier.step<STEP_SIZE>(reader.full_block(), reader);
+  }
+
+  // Index the last (remainder) block, padded with spaces
+  uint8_t block[STEP_SIZE];
+  size_t remaining_bytes = reader.get_remainder(block);
+  if (remaining_bytes > 0) {
+    // We do not want to write directly to the output stream. Rather, we write
+    // to a local buffer (for safety).
+    uint8_t out_block[STEP_SIZE];
+    uint8_t * const guarded_dst{minifier.dst};
+    minifier.dst = out_block;
+    minifier.step<STEP_SIZE>(block, reader);
+    size_t to_write = minifier.dst - out_block;
+    // In some cases, we could be enticed to consider the padded spaces
+    // as part of the string. This is fine as long as we do not write more
+    // than we consumed.
+    if(to_write > remaining_bytes) { to_write = remaining_bytes; }
+    memcpy(guarded_dst, out_block, to_write);
+    minifier.dst = guarded_dst + to_write;
+  }
+  return minifier.finish(dst, dst_len);
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdjson
+/* end file src/generic/stage1/json_minifier.h */
+/* begin file src/generic/stage1/find_next_document_index.h */
+namespace simdjson {
+namespace westmere {
+namespace {
+
+/**
+  * This algorithm is used to quickly identify the last structural position that
+  * makes up a complete document.
+  *
+  * It does this by going backwards and finding the last *document boundary* (a
+  * place where one value follows another without a comma between them). If the
+  * last document (the characters after the boundary) has an equal number of
+  * start and end brackets, it is considered complete.
+  *
+  * Simply put, we iterate over the structural characters, starting from
+  * the end. We consider that we found the end of a JSON document when the
+  * first element of the pair is NOT one of these characters: '{' '[' ':' ','
+  * and when the second element is NOT one of these characters: '}' ']' ':' ','.
+  *
+  * This simple comparison works most of the time, but it does not cover cases
+  * where the batch's structural indexes contain a perfect amount of documents.
+  * In such a case, we do not have access to the structural index which follows
+  * the last document, therefore, we do not have access to the second element in
+  * the pair, and that means we cannot identify the last document. To fix this
+  * issue, we keep a count of the open and closed curly/square braces we found
+  * while searching for the pair. When we find a pair AND the count of open and
+  * closed curly/square braces is the same, we know that we just passed a
+  * complete document, therefore the last json buffer location is the end of the
+  * batch.
+  */
+simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) {
+  // Variant: do not count separately, just figure out depth
+  if(parser.n_structural_indexes == 0) { return 0; }
+  auto arr_cnt = 0;
+  auto obj_cnt = 0;
+  for (auto i = parser.n_structural_indexes - 1; i > 0; i--) {
+    auto idxb = parser.structural_indexes[i];
+    switch (parser.buf[idxb]) {
+    case ':':
+    case ',':
+      continue;
+    case '}':
+      obj_cnt--;
+      continue;
+    case ']':
+      arr_cnt--;
+      continue;
+    case '{':
+      obj_cnt++;
+      break;
+    case '[':
+      arr_cnt++;
+      break;
+    }
+    auto idxa = parser.structural_indexes[i - 1];
+    switch (parser.buf[idxa]) {
+    case '{':
+    case '[':
+    case ':':
+    case ',':
+      continue;
+    }
+    // Last document is complete, so the next document will appear after!
+    if (!arr_cnt && !obj_cnt) {
+      return parser.n_structural_indexes;
+    }
+    // Last document is incomplete; mark the document at i + 1 as the next one
+    return i;
+  }
+  // If we made it to the end, we want to finish counting to see if we have a full document.
+  switch (parser.buf[parser.structural_indexes[0]]) {
+    case '}':
+      obj_cnt--;
+      break;
+    case ']':
+      arr_cnt--;
+      break;
+    case '{':
+      obj_cnt++;
+      break;
+    case '[':
+      arr_cnt++;
+      break;
+  }
+  if (!arr_cnt && !obj_cnt) {
+    // We have a complete document.
+    return parser.n_structural_indexes;
+  }
+  return 0;
+}
+
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdjson
+/* end file src/generic/stage1/find_next_document_index.h */
+
+namespace simdjson {
+namespace westmere {
+namespace {
+namespace stage1 {
+
+class bit_indexer {
+public:
+  uint32_t *tail;
+
+  simdjson_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {}
+
+  // flatten out values in 'bits' assuming that they are are to have values of idx
+  // plus their position in the bitvector, and store these indexes at
+  // base_ptr[base] incrementing base as we go
+  // will potentially store extra values beyond end of valid bits, so base_ptr
+  // needs to be large enough to handle this
+  //
+  // If the kernel sets SIMDJSON_CUSTOM_BIT_INDEXER, then it will provide its own
+  // version of the code.
+#ifdef SIMDJSON_CUSTOM_BIT_INDEXER
+  simdjson_inline void write(uint32_t idx, uint64_t bits);
+#else
+  simdjson_inline void write(uint32_t idx, uint64_t bits) {
+    // In some instances, the next branch is expensive because it is mispredicted.
+    // Unfortunately, in other cases,
+    // it helps tremendously.
+    if (bits == 0)
+        return;
+#if defined(SIMDJSON_PREFER_REVERSE_BITS)
+    /**
+     * ARM lacks a fast trailing zero instruction, but it has a fast
+     * bit reversal instruction and a fast leading zero instruction.
+     * Thus it may be profitable to reverse the bits (once) and then
+     * to rely on a sequence of instructions that call the leading
+     * zero instruction.
+     *
+     * Performance notes:
+     * The chosen routine is not optimal in terms of data dependency
+     * since zero_leading_bit might require two instructions. However,
+     * it tends to minimize the total number of instructions which is
+     * beneficial.
+     */
+
+    uint64_t rev_bits = reverse_bits(bits);
+    int cnt = static_cast<int>(count_ones(bits));
+    int i = 0;
+    // Do the first 8 all together
+    for (; i<8; i++) {
+      int lz = leading_zeroes(rev_bits);
+      this->tail[i] = static_cast<uint32_t>(idx) + lz;
+      rev_bits = zero_leading_bit(rev_bits, lz);
+    }
+    // Do the next 8 all together (we hope in most cases it won't happen at all
+    // and the branch is easily predicted).
+    if (simdjson_unlikely(cnt > 8)) {
+      i = 8;
+      for (; i<16; i++) {
+        int lz = leading_zeroes(rev_bits);
+        this->tail[i] = static_cast<uint32_t>(idx) + lz;
+        rev_bits = zero_leading_bit(rev_bits, lz);
+      }
+
+
+      // Most files don't have 16+ structurals per block, so we take several basically guaranteed
+      // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :)
+      // or the start of a value ("abc" true 123) every four characters.
+      if (simdjson_unlikely(cnt > 16)) {
+        i = 16;
+        while (rev_bits != 0) {
+          int lz = leading_zeroes(rev_bits);
+          this->tail[i++] = static_cast<uint32_t>(idx) + lz;
+          rev_bits = zero_leading_bit(rev_bits, lz);
+        }
+      }
+    }
+    this->tail += cnt;
+#else // SIMDJSON_PREFER_REVERSE_BITS
+    /**
+     * Under recent x64 systems, we often have both a fast trailing zero
+     * instruction and a fast 'clear-lower-bit' instruction so the following
+     * algorithm can be competitive.
+     */
+
+    int cnt = static_cast<int>(count_ones(bits));
+    // Do the first 8 all together
+    for (int i=0; i<8; i++) {
+      this->tail[i] = idx + trailing_zeroes(bits);
+      bits = clear_lowest_bit(bits);
+    }
+
+    // Do the next 8 all together (we hope in most cases it won't happen at all
+    // and the branch is easily predicted).
+    if (simdjson_unlikely(cnt > 8)) {
+      for (int i=8; i<16; i++) {
+        this->tail[i] = idx + trailing_zeroes(bits);
+        bits = clear_lowest_bit(bits);
+      }
+
+      // Most files don't have 16+ structurals per block, so we take several basically guaranteed
+      // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :)
+      // or the start of a value ("abc" true 123) every four characters.
+      if (simdjson_unlikely(cnt > 16)) {
+        int i = 16;
+        do {
+          this->tail[i] = idx + trailing_zeroes(bits);
+          bits = clear_lowest_bit(bits);
+          i++;
+        } while (i < cnt);
+      }
+    }
+
+    this->tail += cnt;
+#endif
+  }
+#endif // SIMDJSON_CUSTOM_BIT_INDEXER
+
+};
+
+class json_structural_indexer {
+public:
+  /**
+   * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes.
+   *
+   * @param partial Setting the partial parameter to true allows the find_structural_bits to
+   *   tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If
+   *   you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8.
+   */
+  template<size_t STEP_SIZE>
+  static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept;
+
+private:
+  simdjson_inline json_structural_indexer(uint32_t *structural_indexes);
+  template<size_t STEP_SIZE>
+  simdjson_inline void step(const uint8_t *block, buf_block_reader<STEP_SIZE> &reader) noexcept;
+  simdjson_inline void next(const simd::simd8x64<uint8_t>& in, const json_block& block, size_t idx);
+  simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial);
+
+  json_scanner scanner{};
+  utf8_checker checker{};
+  bit_indexer indexer;
+  uint64_t prev_structurals = 0;
+  uint64_t unescaped_chars_error = 0;
+};
+
+simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {}
+
+// Skip the last character if it is partial
+simdjson_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) {
+  if (simdjson_unlikely(len < 3)) {
+    switch (len) {
+      case 2:
+        if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left
+        if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 2 bytes left
+        return len;
+      case 1:
+        if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left
+        return len;
+      case 0:
+        return len;
+    }
+  }
+  if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left
+  if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 1 byte left
+  if (buf[len-3] >= 0xf0) { return len-3; } // 4-byte characters with only 3 bytes left
+  return len;
+}
+
+//
+// PERF NOTES:
+// We pipe 2 inputs through these stages:
+// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load
+//    2 inputs' worth at once so that by the time step 2 is looking for them input, it's available.
+// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path.
+//    The output of step 1 depends entirely on this information. These functions don't quite use
+//    up enough CPU: the second half of the functions is highly serial, only using 1 execution core
+//    at a time. The second input's scans has some dependency on the first ones finishing it, but
+//    they can make a lot of progress before they need that information.
+// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that
+//    to finish: utf-8 checks and generating the output from the last iteration.
+//
+// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all
+// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough
+// workout.
+//
+template<size_t STEP_SIZE>
+error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept {
+  if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; }
+  // We guard the rest of the code so that we can assume that len > 0 throughout.
+  if (len == 0) { return EMPTY; }
+  if (is_streaming(partial)) {
+    len = trim_partial_utf8(buf, len);
+    // If you end up with an empty window after trimming
+    // the partial UTF-8 bytes, then chances are good that you
+    // have an UTF-8 formatting error.
+    if(len == 0) { return UTF8_ERROR; }
+  }
+  buf_block_reader<STEP_SIZE> reader(buf, len);
+  json_structural_indexer indexer(parser.structural_indexes.get());
+
+  // Read all but the last block
+  while (reader.has_full_block()) {
+    indexer.step<STEP_SIZE>(reader.full_block(), reader);
+  }
+  // Take care of the last block (will always be there unless file is empty which is
+  // not supposed to happen.)
+  uint8_t block[STEP_SIZE];
+  if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; }
+  indexer.step<STEP_SIZE>(block, reader);
+  return indexer.finish(parser, reader.block_index(), len, partial);
+}
+
+template<>
+simdjson_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept {
+  simd::simd8x64<uint8_t> in_1(block);
+  simd::simd8x64<uint8_t> in_2(block+64);
+  json_block block_1 = scanner.next(in_1);
+  json_block block_2 = scanner.next(in_2);
+  this->next(in_1, block_1, reader.block_index());
+  this->next(in_2, block_2, reader.block_index()+64);
+  reader.advance();
+}
+
+template<>
+simdjson_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept {
+  simd::simd8x64<uint8_t> in_1(block);
+  json_block block_1 = scanner.next(in_1);
+  this->next(in_1, block_1, reader.block_index());
+  reader.advance();
+}
+
+simdjson_inline void json_structural_indexer::next(const simd::simd8x64<uint8_t>& in, const json_block& block, size_t idx) {
+  uint64_t unescaped = in.lteq(0x1F);
+  checker.check_next_input(in);
+  indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser
+  prev_structurals = block.structural_start();
+  unescaped_chars_error |= block.non_quote_inside_string(unescaped);
+}
+
+simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) {
+  // Write out the final iteration's structurals
+  indexer.write(uint32_t(idx-64), prev_structurals);
+  error_code error = scanner.finish();
+  // We deliberately break down the next expression so that it is
+  // human readable.
+  const bool should_we_exit = is_streaming(partial) ?
+    ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING
+    : (error != SUCCESS); // if partial is false, we must have SUCCESS
+  const bool have_unclosed_string = (error == UNCLOSED_STRING);
+  if (simdjson_unlikely(should_we_exit)) { return error; }
+
+  if (unescaped_chars_error) {
+    return UNESCAPED_CHARS;
+  }
+  parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get());
+  /***
+   * The On Demand API requires special padding.
+   *
+   * This is related to https://github.com/simdjson/simdjson/issues/906
+   * Basically, we want to make sure that if the parsing continues beyond the last (valid)
+   * structural character, it quickly stops.
+   * Only three structural characters can be repeated without triggering an error in JSON:  [,] and }.
+   * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing
+   * continues, then it must be [,] or }.
+   * Suppose it is ] or }. We backtrack to the first character, what could it be that would
+   * not trigger an error? It could be ] or } but no, because you can't start a document that way.
+   * It can't be a comma, a colon or any simple value. So the only way we could continue is
+   * if the repeated character is [. But if so, the document must start with [. But if the document
+   * starts with [, it should end with ]. If we enforce that rule, then we would get
+   * ][[ which is invalid.
+   *
+   * This is illustrated with the test array_iterate_unclosed_error() on the following input:
+   * R"({ "a": [,,)"
+   **/
+  parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final
+  parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len);
+  parser.structural_indexes[parser.n_structural_indexes + 2] = 0;
+  parser.next_structural_index = 0;
+  // a valid JSON file cannot have zero structural indexes - we should have found something
+  if (simdjson_unlikely(parser.n_structural_indexes == 0u)) {
+    return EMPTY;
+  }
+  if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) {
+    return UNEXPECTED_ERROR;
+  }
+  if (partial == stage1_mode::streaming_partial) {
+    // If we have an unclosed string, then the last structural
+    // will be the quote and we want to make sure to omit it.
+    if(have_unclosed_string) {
+      parser.n_structural_indexes--;
+      // a valid JSON file cannot have zero structural indexes - we should have found something
+      if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; }
+    }
+    // We truncate the input to the end of the last complete document (or zero).
+    auto new_structural_indexes = find_next_document_index(parser);
+    if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) {
+      if(parser.structural_indexes[0] == 0) {
+        // If the buffer is partial and we started at index 0 but the document is
+        // incomplete, it's too big to parse.
+        return CAPACITY;
+      } else {
+        // It is possible that the document could be parsed, we just had a lot
+        // of white space.
+        parser.n_structural_indexes = 0;
+        return EMPTY;
+      }
+    }
+
+    parser.n_structural_indexes = new_structural_indexes;
+  } else if (partial == stage1_mode::streaming_final) {
+    if(have_unclosed_string) { parser.n_structural_indexes--; }
+    // We truncate the input to the end of the last complete document (or zero).
+    // Because partial == stage1_mode::streaming_final, it means that we may
+    // silently ignore trailing garbage. Though it sounds bad, we do it
+    // deliberately because many people who have streams of JSON documents
+    // will truncate them for processing. E.g., imagine that you are uncompressing
+    // the data from a size file or receiving it in chunks from the network. You
+    // may not know where exactly the last document will be. Meanwhile the
+    // document_stream instances allow people to know the JSON documents they are
+    // parsing (see the iterator.source() method).
+    parser.n_structural_indexes = find_next_document_index(parser);
+    // We store the initial n_structural_indexes so that the client can see
+    // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes,
+    // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len,
+    // otherwise, it will copy some prior index.
+    parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes];
+    // This next line is critical, do not change it unless you understand what you are
+    // doing.
+    parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len);
+    if (simdjson_unlikely(parser.n_structural_indexes == 0u)) {
+        // We tolerate an unclosed string at the very end of the stream. Indeed, users
+        // often load their data in bulk without being careful and they want us to ignore
+        // the trailing garbage.
+        return EMPTY;
+    }
+  }
+  checker.check_eof();
+  return checker.errors();
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdjson
+/* end file src/generic/stage1/json_structural_indexer.h */
+/* begin file src/generic/stage1/utf8_validator.h */
+namespace simdjson {
+namespace westmere {
+namespace {
+namespace stage1 {
+
+/**
+ * Validates that the string is actual UTF-8.
+ */
+template<class checker>
+bool generic_validate_utf8(const uint8_t * input, size_t length) {
+    checker c{};
+    buf_block_reader<64> reader(input, length);
+    while (reader.has_full_block()) {
+      simd::simd8x64<uint8_t> in(reader.full_block());
+      c.check_next_input(in);
+      reader.advance();
+    }
+    uint8_t block[64]{};
+    reader.get_remainder(block);
+    simd::simd8x64<uint8_t> in(block);
+    c.check_next_input(in);
+    reader.advance();
+    c.check_eof();
+    return c.errors() == error_code::SUCCESS;
+}
+
+bool generic_validate_utf8(const char * input, size_t length) {
+    return generic_validate_utf8<utf8_checker>(reinterpret_cast<const uint8_t *>(input),length);
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdjson
+/* end file src/generic/stage1/utf8_validator.h */
+
+//
+// Stage 2
+//
+/* begin file src/generic/stage2/stringparsing.h */
+// This file contains the common code every implementation uses
+// It is intended to be included multiple times and compiled multiple times
+
+namespace simdjson {
+namespace westmere {
+namespace {
+/// @private
+namespace stringparsing {
+
+// begin copypasta
+// These chars yield themselves: " \ /
+// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab
+// u not handled in this table as it's complex
+static const uint8_t escape_map[256] = {
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0, // 0x0.
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0x22, 0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0x2f,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0, // 0x4.
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0x5c, 0, 0,    0, // 0x5.
+    0, 0, 0x08, 0, 0,    0, 0x0c, 0, 0, 0, 0, 0, 0,    0, 0x0a, 0, // 0x6.
+    0, 0, 0x0d, 0, 0x09, 0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0, // 0x7.
+
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+};
+
+// handle a unicode codepoint
+// write appropriate values into dest
+// src will advance 6 bytes or 12 bytes
+// dest will advance a variable amount (return via pointer)
+// return true if the unicode codepoint was valid
+// We work in little-endian then swap at write time
+simdjson_warn_unused
+simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr,
+                                            uint8_t **dst_ptr) {
+  // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the
+  // conversion isn't valid; we defer the check for this to inside the
+  // multilingual plane check
+  uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2);
+  *src_ptr += 6;
+
+  // If we found a high surrogate, we must
+  // check for low surrogate for characters
+  // outside the Basic
+  // Multilingual Plane.
+  if (code_point >= 0xd800 && code_point < 0xdc00) {
+    const uint8_t *src_data = *src_ptr;
+    /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */
+    if (((src_data[0] << 8) | src_data[1]) != ((static_cast<uint8_t> ('\\') << 8) | static_cast<uint8_t> ('u'))) {
+      return false;
+    }
+    uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2);
+
+    // We have already checked that the high surrogate is valid and
+    // (code_point - 0xd800) < 1024.
+    //
+    // Check that code_point_2 is in the range 0xdc00..0xdfff
+    // and that code_point_2 was parsed from valid hex.
+    uint32_t low_bit = code_point_2 - 0xdc00;
+    if (low_bit >> 10) {
+      return false;
+    }
+
+    code_point =
+        (((code_point - 0xd800) << 10) | low_bit) + 0x10000;
+    *src_ptr += 6;
+  } else if (code_point >= 0xdc00 && code_point <= 0xdfff) {
+      // If we encounter a low surrogate (not preceded by a high surrogate)
+      // then we have an error.
+      return false;
+  }
+  size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr);
+  *dst_ptr += offset;
+  return offset > 0;
+}
+
+/**
+ * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There
+ * must be an unescaped quote terminating the string. It returns the final output
+ * position as pointer. In case of error (e.g., the string has bad escaped codes),
+ * then null_nullptrptr is returned. It is assumed that the output buffer is large
+ * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes +
+ * SIMDJSON_PADDING bytes.
+ */
+simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) {
+  while (1) {
+    // Copy the next n bytes, and find the backslash and quote in them.
+    auto bs_quote = backslash_and_quote::copy_and_find(src, dst);
+    // If the next thing is the end quote, copy and return
+    if (bs_quote.has_quote_first()) {
+      // we encountered quotes first. Move dst to point to quotes and exit
+      return dst + bs_quote.quote_index();
+    }
+    if (bs_quote.has_backslash()) {
+      /* find out where the backspace is */
+      auto bs_dist = bs_quote.backslash_index();
+      uint8_t escape_char = src[bs_dist + 1];
+      /* we encountered backslash first. Handle backslash */
+      if (escape_char == 'u') {
+        /* move src/dst up to the start; they will be further adjusted
+           within the unicode codepoint handling code. */
+        src += bs_dist;
+        dst += bs_dist;
+        if (!handle_unicode_codepoint(&src, &dst)) {
+          return nullptr;
+        }
+      } else {
+        /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and
+         * write bs_dist+1 characters to output
+         * note this may reach beyond the part of the buffer we've actually
+         * seen. I think this is ok */
+        uint8_t escape_result = escape_map[escape_char];
+        if (escape_result == 0u) {
+          return nullptr; /* bogus escape value is an error */
+        }
+        dst[bs_dist] = escape_result;
+        src += bs_dist + 2;
+        dst += bs_dist + 1;
+      }
+    } else {
+      /* they are the same. Since they can't co-occur, it means we
+       * encountered neither. */
+      src += backslash_and_quote::BYTES_PROCESSED;
+      dst += backslash_and_quote::BYTES_PROCESSED;
+    }
+  }
+  /* can't be reached */
+  return nullptr;
+}
+
+} // namespace stringparsing
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdjson
+/* end file src/generic/stage2/stringparsing.h */
+/* begin file src/generic/stage2/tape_builder.h */
+/* begin file src/generic/stage2/json_iterator.h */
+/* begin file src/generic/stage2/logger.h */
+// This is for an internal-only stage 2 specific logger.
+// Set LOG_ENABLED = true to log what stage 2 is doing!
+namespace simdjson {
+namespace westmere {
+namespace {
+namespace logger {
+
+  static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------";
+
+#if SIMDJSON_VERBOSE_LOGGING
+  static constexpr const bool LOG_ENABLED = true;
+#else
+  static constexpr const bool LOG_ENABLED = false;
+#endif
+  static constexpr const int LOG_EVENT_LEN = 20;
+  static constexpr const int LOG_BUFFER_LEN = 30;
+  static constexpr const int LOG_SMALL_BUFFER_LEN = 10;
+  static constexpr const int LOG_INDEX_LEN = 5;
+
+  static int log_depth; // Not threadsafe. Log only.
+
+  // Helper to turn unprintable or newline characters into spaces
+  static simdjson_inline char printable_char(char c) {
+    if (c >= 0x20) {
+      return c;
+    } else {
+      return ' ';
+    }
+  }
+
+  // Print the header and set up log_start
+  static simdjson_inline void log_start() {
+    if (LOG_ENABLED) {
+      log_depth = 0;
+      printf("\n");
+      printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#");
+      printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES);
+    }
+  }
+
+  simdjson_unused static simdjson_inline void log_string(const char *message) {
+    if (LOG_ENABLED) {
+      printf("%s\n", message);
+    }
+  }
+
+  // Logs a single line from the stage 2 DOM parser
+  template<typename S>
+  static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) {
+    if (LOG_ENABLED) {
+      printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title);
+      auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1;
+      auto next_index = structurals.next_structural;
+      auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast<const uint8_t*>("                                                       ");
+      auto next = &structurals.buf[*next_index];
+      {
+        // Print the next N characters in the buffer.
+        printf("| ");
+        // Otherwise, print the characters starting from the buffer position.
+        // Print spaces for unprintable or newline characters.
+        for (int i=0;i<LOG_BUFFER_LEN;i++) {
+          printf("%c", printable_char(current[i]));
+        }
+        printf(" ");
+        // Print the next N characters in the buffer.
+        printf("| ");
+        // Otherwise, print the characters starting from the buffer position.
+        // Print spaces for unprintable or newline characters.
+        for (int i=0;i<LOG_SMALL_BUFFER_LEN;i++) {
+          printf("%c", printable_char(next[i]));
+        }
+        printf(" ");
+      }
+      if (current_index) {
+        printf("| %*u ", LOG_INDEX_LEN, *current_index);
+      } else {
+        printf("| %-*s ", LOG_INDEX_LEN, "");
+      }
+      // printf("| %*u ", LOG_INDEX_LEN, structurals.next_tape_index());
+      printf("| %-s ", detail);
+      printf("|\n");
+    }
+  }
+
+} // namespace logger
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdjson
+/* end file src/generic/stage2/logger.h */
+
+namespace simdjson {
+namespace westmere {
+namespace {
+namespace stage2 {
+
+class json_iterator {
+public:
+  const uint8_t* const buf;
+  uint32_t *next_structural;
+  dom_parser_implementation &dom_parser;
+  uint32_t depth{0};
+
+  /**
+   * Walk the JSON document.
+   *
+   * The visitor receives callbacks when values are encountered. All callbacks pass the iterator as
+   * the first parameter; some callbacks have other parameters as well:
+   *
+   * - visit_document_start() - at the beginning.
+   * - visit_document_end() - at the end (if things were successful).
+   *
+   * - visit_array_start() - at the start `[` of a non-empty array.
+   * - visit_array_end() - at the end `]` of a non-empty array.
+   * - visit_empty_array() - when an empty array is encountered.
+   *
+   * - visit_object_end() - at the start `]` of a non-empty object.
+   * - visit_object_start() - at the end `]` of a non-empty object.
+   * - visit_empty_object() - when an empty object is encountered.
+   * - visit_key(const uint8_t *key) - when a key in an object field is encountered. key is
+   *                                   guaranteed to point at the first quote of the string (`"key"`).
+   * - visit_primitive(const uint8_t *value) - when a value is a string, number, boolean or null.
+   * - visit_root_primitive(iter, uint8_t *value) - when the top-level value is a string, number, boolean or null.
+   *
+   * - increment_count(iter) - each time a value is found in an array or object.
+   */
+  template<bool STREAMING, typename V>
+  simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept;
+
+  /**
+   * Create an iterator capable of walking a JSON document.
+   *
+   * The document must have already passed through stage 1.
+   */
+  simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index);
+
+  /**
+   * Look at the next token.
+   *
+   * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)).
+   *
+   * They may include invalid JSON as well (such as `1.2.3` or `ture`).
+   */
+  simdjson_inline const uint8_t *peek() const noexcept;
+  /**
+   * Advance to the next token.
+   *
+   * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)).
+   *
+   * They may include invalid JSON as well (such as `1.2.3` or `ture`).
+   */
+  simdjson_inline const uint8_t *advance() noexcept;
+  /**
+   * Get the remaining length of the document, from the start of the current token.
+   */
+  simdjson_inline size_t remaining_len() const noexcept;
+  /**
+   * Check if we are at the end of the document.
+   *
+   * If this is true, there are no more tokens.
+   */
+  simdjson_inline bool at_eof() const noexcept;
+  /**
+   * Check if we are at the beginning of the document.
+   */
+  simdjson_inline bool at_beginning() const noexcept;
+  simdjson_inline uint8_t last_structural() const noexcept;
+
+  /**
+   * Log that a value has been found.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_value(const char *type) const noexcept;
+  /**
+   * Log the start of a multipart value.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_start_value(const char *type) const noexcept;
+  /**
+   * Log the end of a multipart value.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_end_value(const char *type) const noexcept;
+  /**
+   * Log an error.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_error(const char *error) const noexcept;
+
+  template<typename V>
+  simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept;
+  template<typename V>
+  simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept;
+};
+
+template<bool STREAMING, typename V>
+simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept {
+  logger::log_start();
+
+  //
+  // Start the document
+  //
+  if (at_eof()) { return EMPTY; }
+  log_start_value("document");
+  SIMDJSON_TRY( visitor.visit_document_start(*this) );
+
+  //
+  // Read first value
+  //
+  {
+    auto value = advance();
+
+    // Make sure the outer object or array is closed before continuing; otherwise, there are ways we
+    // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906
+    if (!STREAMING) {
+      switch (*value) {
+        case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break;
+        case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break;
+      }
+    }
+
+    switch (*value) {
+      case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin;
+      case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin;
+      default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break;
+    }
+  }
+  goto document_end;
+
+//
+// Object parser states
+//
+object_begin:
+  log_start_value("object");
+  depth++;
+  if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; }
+  dom_parser.is_array[depth] = false;
+  SIMDJSON_TRY( visitor.visit_object_start(*this) );
+
+  {
+    auto key = advance();
+    if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; }
+    SIMDJSON_TRY( visitor.increment_count(*this) );
+    SIMDJSON_TRY( visitor.visit_key(*this, key) );
+  }
+
+object_field:
+  if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; }
+  {
+    auto value = advance();
+    switch (*value) {
+      case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin;
+      case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin;
+      default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break;
+    }
+  }
+
+object_continue:
+  switch (*advance()) {
+    case ',':
+      SIMDJSON_TRY( visitor.increment_count(*this) );
+      {
+        auto key = advance();
+        if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; }
+        SIMDJSON_TRY( visitor.visit_key(*this, key) );
+      }
+      goto object_field;
+    case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end;
+    default: log_error("No comma between object fields"); return TAPE_ERROR;
+  }
+
+scope_end:
+  depth--;
+  if (depth == 0) { goto document_end; }
+  if (dom_parser.is_array[depth]) { goto array_continue; }
+  goto object_continue;
+
+//
+// Array parser states
+//
+array_begin:
+  log_start_value("array");
+  depth++;
+  if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; }
+  dom_parser.is_array[depth] = true;
+  SIMDJSON_TRY( visitor.visit_array_start(*this) );
+  SIMDJSON_TRY( visitor.increment_count(*this) );
+
+array_value:
+  {
+    auto value = advance();
+    switch (*value) {
+      case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin;
+      case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin;
+      default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break;
+    }
+  }
+
+array_continue:
+  switch (*advance()) {
+    case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value;
+    case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end;
+    default: log_error("Missing comma between array values"); return TAPE_ERROR;
+  }
+
+document_end:
+  log_end_value("document");
+  SIMDJSON_TRY( visitor.visit_document_end(*this) );
+
+  dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]);
+
+  // If we didn't make it to the end, it's an error
+  if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) {
+    log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!");
+    return TAPE_ERROR;
+  }
+
+  return SUCCESS;
+
+} // walk_document()
+
+simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index)
+  : buf{_dom_parser.buf},
+    next_structural{&_dom_parser.structural_indexes[start_structural_index]},
+    dom_parser{_dom_parser} {
+}
+
+simdjson_inline const uint8_t *json_iterator::peek() const noexcept {
+  return &buf[*(next_structural)];
+}
+simdjson_inline const uint8_t *json_iterator::advance() noexcept {
+  return &buf[*(next_structural++)];
+}
+simdjson_inline size_t json_iterator::remaining_len() const noexcept {
+  return dom_parser.len - *(next_structural-1);
+}
+
+simdjson_inline bool json_iterator::at_eof() const noexcept {
+  return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes];
+}
+simdjson_inline bool json_iterator::at_beginning() const noexcept {
+  return next_structural == dom_parser.structural_indexes.get();
+}
+simdjson_inline uint8_t json_iterator::last_structural() const noexcept {
+  return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]];
+}
+
+simdjson_inline void json_iterator::log_value(const char *type) const noexcept {
+  logger::log_line(*this, "", type, "");
+}
+
+simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept {
+  logger::log_line(*this, "+", type, "");
+  if (logger::LOG_ENABLED) { logger::log_depth++; }
+}
+
+simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept {
+  if (logger::LOG_ENABLED) { logger::log_depth--; }
+  logger::log_line(*this, "-", type, "");
+}
+
+simdjson_inline void json_iterator::log_error(const char *error) const noexcept {
+  logger::log_line(*this, "", "ERROR", error);
+}
+
+template<typename V>
+simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept {
+  switch (*value) {
+    case '"': return visitor.visit_root_string(*this, value);
+    case 't': return visitor.visit_root_true_atom(*this, value);
+    case 'f': return visitor.visit_root_false_atom(*this, value);
+    case 'n': return visitor.visit_root_null_atom(*this, value);
+    case '-':
+    case '0': case '1': case '2': case '3': case '4':
+    case '5': case '6': case '7': case '8': case '9':
+      return visitor.visit_root_number(*this, value);
+    default:
+      log_error("Document starts with a non-value character");
+      return TAPE_ERROR;
+  }
+}
+template<typename V>
+simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept {
+  switch (*value) {
+    case '"': return visitor.visit_string(*this, value);
+    case 't': return visitor.visit_true_atom(*this, value);
+    case 'f': return visitor.visit_false_atom(*this, value);
+    case 'n': return visitor.visit_null_atom(*this, value);
+    case '-':
+    case '0': case '1': case '2': case '3': case '4':
+    case '5': case '6': case '7': case '8': case '9':
+      return visitor.visit_number(*this, value);
+    default:
+      log_error("Non-value found when value was expected!");
+      return TAPE_ERROR;
+  }
+}
+
+} // namespace stage2
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdjson
+/* end file src/generic/stage2/json_iterator.h */
+/* begin file src/generic/stage2/tape_writer.h */
+namespace simdjson {
+namespace westmere {
+namespace {
+namespace stage2 {
+
+struct tape_writer {
+  /** The next place to write to tape */
+  uint64_t *next_tape_loc;
+
+  /** Write a signed 64-bit value to tape. */
+  simdjson_inline void append_s64(int64_t value) noexcept;
+
+  /** Write an unsigned 64-bit value to tape. */
+  simdjson_inline void append_u64(uint64_t value) noexcept;
+
+  /** Write a double value to tape. */
+  simdjson_inline void append_double(double value) noexcept;
+
+  /**
+   * Append a tape entry (an 8-bit type,and 56 bits worth of value).
+   */
+  simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept;
+
+  /**
+   * Skip the current tape entry without writing.
+   *
+   * Used to skip the start of the container, since we'll come back later to fill it in when the
+   * container ends.
+   */
+  simdjson_inline void skip() noexcept;
+
+  /**
+   * Skip the number of tape entries necessary to write a large u64 or i64.
+   */
+  simdjson_inline void skip_large_integer() noexcept;
+
+  /**
+   * Skip the number of tape entries necessary to write a double.
+   */
+  simdjson_inline void skip_double() noexcept;
+
+  /**
+   * Write a value to a known location on tape.
+   *
+   * Used to go back and write out the start of a container after the container ends.
+   */
+  simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept;
+
+private:
+  /**
+   * Append both the tape entry, and a supplementary value following it. Used for types that need
+   * all 64 bits, such as double and uint64_t.
+   */
+  template<typename T>
+  simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept;
+}; // struct number_writer
+
+simdjson_inline void tape_writer::append_s64(int64_t value) noexcept {
+  append2(0, value, internal::tape_type::INT64);
+}
+
+simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept {
+  append(0, internal::tape_type::UINT64);
+  *next_tape_loc = value;
+  next_tape_loc++;
+}
+
+/** Write a double value to tape. */
+simdjson_inline void tape_writer::append_double(double value) noexcept {
+  append2(0, value, internal::tape_type::DOUBLE);
+}
+
+simdjson_inline void tape_writer::skip() noexcept {
+  next_tape_loc++;
+}
+
+simdjson_inline void tape_writer::skip_large_integer() noexcept {
+  next_tape_loc += 2;
+}
+
+simdjson_inline void tape_writer::skip_double() noexcept {
+  next_tape_loc += 2;
+}
+
+simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept {
+  *next_tape_loc = val | ((uint64_t(char(t))) << 56);
+  next_tape_loc++;
+}
+
+template<typename T>
+simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept {
+  append(val, t);
+  static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!");
+  memcpy(next_tape_loc, &val2, sizeof(val2));
+  next_tape_loc++;
+}
+
+simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept {
+  tape_loc = val | ((uint64_t(char(t))) << 56);
+}
+
+} // namespace stage2
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdjson
+/* end file src/generic/stage2/tape_writer.h */
+
+namespace simdjson {
+namespace westmere {
+namespace {
+namespace stage2 {
+
+struct tape_builder {
+  template<bool STREAMING>
+  simdjson_warn_unused static simdjson_inline error_code parse_document(
+    dom_parser_implementation &dom_parser,
+    dom::document &doc) noexcept;
+
+  /** Called when a non-empty document starts. */
+  simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept;
+  /** Called when a non-empty document ends without error. */
+  simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept;
+
+  /** Called when a non-empty array starts. */
+  simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept;
+  /** Called when a non-empty array ends. */
+  simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept;
+  /** Called when an empty array is found. */
+  simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept;
+
+  /** Called when a non-empty object starts. */
+  simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept;
+  /**
+   * Called when a key in a field is encountered.
+   *
+   * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array
+   * will be called after this with the field value.
+   */
+  simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept;
+  /** Called when a non-empty object ends. */
+  simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept;
+  /** Called when an empty object is found. */
+  simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept;
+
+  /**
+   * Called when a string, number, boolean or null is found.
+   */
+  simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept;
+  /**
+   * Called when a string, number, boolean or null is found at the top level of a document (i.e.
+   * when there is no array or object and the entire document is a single string, number, boolean or
+   * null.
+   *
+   * This is separate from primitive() because simdjson's normal primitive parsing routines assume
+   * there is at least one more token after the value, which is only true in an array or object.
+   */
+  simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept;
+
+  simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept;
+
+  simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept;
+
+  /** Called each time a new field or element in an array or object is found. */
+  simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept;
+
+  /** Next location to write to tape */
+  tape_writer tape;
+private:
+  /** Next write location in the string buf for stage 2 parsing */
+  uint8_t *current_string_buf_loc;
+
+  simdjson_inline tape_builder(dom::document &doc) noexcept;
+
+  simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept;
+  simdjson_inline void start_container(json_iterator &iter) noexcept;
+  simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept;
+  simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept;
+  simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept;
+  simdjson_inline void on_end_string(uint8_t *dst) noexcept;
+}; // class tape_builder
+
+template<bool STREAMING>
+simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document(
+    dom_parser_implementation &dom_parser,
+    dom::document &doc) noexcept {
+  dom_parser.doc = &doc;
+  json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0);
+  tape_builder builder(doc);
+  return iter.walk_document<STREAMING>(builder);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept {
+  return iter.visit_root_primitive(*this, value);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept {
+  return iter.visit_primitive(*this, value);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept {
+  return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept {
+  return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept {
+  start_container(iter);
+  return SUCCESS;
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept {
+  start_container(iter);
+  return SUCCESS;
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept {
+  start_container(iter);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept {
+  return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept {
+  return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept {
+  constexpr uint32_t start_tape_index = 0;
+  tape.append(start_tape_index, internal::tape_type::ROOT);
+  tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT);
+  return SUCCESS;
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept {
+  return visit_string(iter, key, true);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept {
+  iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1
+  return SUCCESS;
+}
+
+simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept {
+  iter.log_value(key ? "key" : "string");
+  uint8_t *dst = on_start_string(iter);
+  dst = stringparsing::parse_string(value+1, dst);
+  if (dst == nullptr) {
+    iter.log_error("Invalid escape in string");
+    return STRING_ERROR;
+  }
+  on_end_string(dst);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept {
+  return visit_string(iter, value);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("number");
+  return numberparsing::parse_number(value, tape);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept {
+  //
+  // We need to make a copy to make sure that the string is space terminated.
+  // This is not about padding the input, which should already padded up
+  // to len + SIMDJSON_PADDING. However, we have no control at this stage
+  // on how the padding was done. What if the input string was padded with nulls?
+  // It is quite common for an input string to have an extra null character (C string).
+  // We do not want to allow 9\0 (where \0 is the null character) inside a JSON
+  // document, but the string "9\0" by itself is fine. So we make a copy and
+  // pad the input with spaces when we know that there is just one input element.
+  // This copy is relatively expensive, but it will almost never be called in
+  // practice unless you are in the strange scenario where you have many JSON
+  // documents made of single atoms.
+  //
+  std::unique_ptr<uint8_t[]>copy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]);
+  if (copy.get() == nullptr) { return MEMALLOC; }
+  std::memcpy(copy.get(), value, iter.remaining_len());
+  std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING);
+  error_code error = visit_number(iter, copy.get());
+  return error;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("true");
+  if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::TRUE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("true");
+  if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::TRUE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("false");
+  if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::FALSE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("false");
+  if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::FALSE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("null");
+  if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::NULL_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("null");
+  if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::NULL_VALUE);
+  return SUCCESS;
+}
+
+// private:
+
+simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept {
+  return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get());
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept {
+  auto start_index = next_tape_index(iter);
+  tape.append(start_index+2, start);
+  tape.append(start_index, end);
+  return SUCCESS;
+}
+
+simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept {
+  iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter);
+  iter.dom_parser.open_containers[iter.depth].count = 0;
+  tape.skip(); // We don't actually *write* the start element until the end.
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept {
+  // Write the ending tape element, pointing at the start location
+  const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index;
+  tape.append(start_tape_index, end);
+  // Write the start tape element, pointing at the end location (and including count)
+  // count can overflow if it exceeds 24 bits... so we saturate
+  // the convention being that a cnt of 0xffffff or more is undetermined in value (>=  0xffffff).
+  const uint32_t count = iter.dom_parser.open_containers[iter.depth].count;
+  const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count;
+  tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start);
+  return SUCCESS;
+}
+
+simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept {
+  // we advance the point, accounting for the fact that we have a NULL termination
+  tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING);
+  return current_string_buf_loc + sizeof(uint32_t);
+}
+
+simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept {
+  uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t)));
+  // TODO check for overflow in case someone has a crazy string (>=4GB?)
+  // But only add the overflow check when the document itself exceeds 4GB
+  // Currently unneeded because we refuse to parse docs larger or equal to 4GB.
+  memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t));
+  // NULL termination is still handy if you expect all your strings to
+  // be NULL terminated? It comes at a small cost
+  *dst = 0;
+  current_string_buf_loc = dst + 1;
+}
+
+} // namespace stage2
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdjson
+/* end file src/generic/stage2/tape_builder.h */
+
+//
+// Implementation-specific overrides
+//
+
+namespace simdjson {
+namespace westmere {
+namespace {
+namespace stage1 {
+
+simdjson_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) {
+  if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; }
+  return find_escaped_branchless(backslash);
+}
+
+} // namespace stage1
+} // unnamed namespace
+
+simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept {
+  return westmere::stage1::json_minifier::minify<64>(buf, len, dst, dst_len);
+}
+
+simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept {
+  this->buf = _buf;
+  this->len = _len;
+  return westmere::stage1::json_structural_indexer::index<64>(_buf, _len, *this, streaming);
+}
+
+simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
+  return westmere::stage1::generic_validate_utf8(buf,len);
+}
+
+simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept {
+  return stage2::tape_builder::parse_document<false>(*this, _doc);
+}
+
+simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept {
+  return stage2::tape_builder::parse_document<true>(*this, _doc);
+}
+
+simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst) const noexcept {
+  return westmere::stringparsing::parse_string(src, dst);
+}
+
+simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept {
+  auto error = stage1(_buf, _len, stage1_mode::regular);
+  if (error) { return error; }
+  return stage2(_doc);
+}
+
+} // namespace westmere
+} // namespace simdjson
+
+/* begin file include/simdjson/westmere/end.h */
+SIMDJSON_UNTARGET_WESTMERE
+/* end file include/simdjson/westmere/end.h */
+/* end file src/westmere/dom_parser_implementation.cpp */
+#endif
+
+SIMDJSON_POP_DISABLE_WARNINGS
+/* end file src/simdjson.cpp */
diff --git a/libkram/simdjson/simdjson.h b/libkram/simdjson/simdjson.h
new file mode 100644
index 00000000..8f4942e3
--- /dev/null
+++ b/libkram/simdjson/simdjson.h
@@ -0,0 +1,31624 @@
+/* auto-generated on 2022-10-16 16:59:15 +0000. Do not edit! */
+/* begin file include/simdjson.h */
+#ifndef SIMDJSON_H
+#define SIMDJSON_H
+
+/**
+ * @mainpage
+ *
+ * Check the [README.md](https://github.com/simdjson/simdjson/blob/master/README.md#simdjson--parsing-gigabytes-of-json-per-second).
+ *
+ * Sample code. See https://github.com/simdjson/simdjson/blob/master/doc/basics.md for more examples.
+
+    #include "simdjson.h"
+
+    int main(void) {
+      // load from `twitter.json` file:
+      simdjson::dom::parser parser;
+      simdjson::dom::element tweets = parser.load("twitter.json");
+      std::cout << tweets["search_metadata"]["count"] << " results." << std::endl;
+
+      // Parse and iterate through an array of objects
+      auto abstract_json = R"( [
+        {  "12345" : {"a":12.34, "b":56.78, "c": 9998877}   },
+        {  "12545" : {"a":11.44, "b":12.78, "c": 11111111}  }
+        ] )"_padded;
+
+      for (simdjson::dom::object obj : parser.parse(abstract_json)) {
+        for(const auto key_value : obj) {
+          cout << "key: " << key_value.key << " : ";
+          simdjson::dom::object innerobj = key_value.value;
+          cout << "a: " << double(innerobj["a"]) << ", ";
+          cout << "b: " << double(innerobj["b"]) << ", ";
+          cout << "c: " << int64_t(innerobj["c"]) << endl;
+        }
+      }
+    }
+ */
+
+/* begin file include/simdjson/simdjson_version.h */
+// /include/simdjson/simdjson_version.h automatically generated by release.py,
+// do not change by hand
+#ifndef SIMDJSON_SIMDJSON_VERSION_H
+#define SIMDJSON_SIMDJSON_VERSION_H
+
+/** The version of simdjson being used (major.minor.revision) */
+#define SIMDJSON_VERSION 3.0.0
+
+namespace simdjson {
+enum {
+  /**
+   * The major version (MAJOR.minor.revision) of simdjson being used.
+   */
+  SIMDJSON_VERSION_MAJOR = 3,
+  /**
+   * The minor version (major.MINOR.revision) of simdjson being used.
+   */
+  SIMDJSON_VERSION_MINOR = 0,
+  /**
+   * The revision (major.minor.REVISION) of simdjson being used.
+   */
+  SIMDJSON_VERSION_REVISION = 0
+};
+} // namespace simdjson
+
+#endif // SIMDJSON_SIMDJSON_VERSION_H
+/* end file include/simdjson/simdjson_version.h */
+/* begin file include/simdjson/dom.h */
+#ifndef SIMDJSON_DOM_H
+#define SIMDJSON_DOM_H
+
+/* begin file include/simdjson/base.h */
+#ifndef SIMDJSON_BASE_H
+#define SIMDJSON_BASE_H
+
+/* begin file include/simdjson/compiler_check.h */
+#ifndef SIMDJSON_COMPILER_CHECK_H
+#define SIMDJSON_COMPILER_CHECK_H
+
+#ifndef __cplusplus
+#error simdjson requires a C++ compiler
+#endif
+
+#ifndef SIMDJSON_CPLUSPLUS
+#if defined(_MSVC_LANG) && !defined(__clang__)
+#define SIMDJSON_CPLUSPLUS (_MSC_VER == 1900 ? 201103L : _MSVC_LANG)
+#else
+#define SIMDJSON_CPLUSPLUS __cplusplus
+#endif
+#endif
+
+// C++ 17
+#if !defined(SIMDJSON_CPLUSPLUS17) && (SIMDJSON_CPLUSPLUS >= 201703L)
+#define SIMDJSON_CPLUSPLUS17 1
+#endif
+
+// C++ 14
+#if !defined(SIMDJSON_CPLUSPLUS14) && (SIMDJSON_CPLUSPLUS >= 201402L)
+#define SIMDJSON_CPLUSPLUS14 1
+#endif
+
+// C++ 11
+#if !defined(SIMDJSON_CPLUSPLUS11) && (SIMDJSON_CPLUSPLUS >= 201103L)
+#define SIMDJSON_CPLUSPLUS11 1
+#endif
+
+#ifndef SIMDJSON_CPLUSPLUS11
+#error simdjson requires a compiler compliant with the C++11 standard
+#endif
+
+#endif // SIMDJSON_COMPILER_CHECK_H
+/* end file include/simdjson/compiler_check.h */
+/* begin file include/simdjson/common_defs.h */
+#ifndef SIMDJSON_COMMON_DEFS_H
+#define SIMDJSON_COMMON_DEFS_H
+
+#include <cassert>
+/* begin file include/simdjson/portability.h */
+#ifndef SIMDJSON_PORTABILITY_H
+#define SIMDJSON_PORTABILITY_H
+
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <cfloat>
+#include <cassert>
+#ifndef _WIN32
+// strcasecmp, strncasecmp
+#include <strings.h>
+#endif
+
+#ifdef _MSC_VER
+#define SIMDJSON_VISUAL_STUDIO 1
+/**
+ * We want to differentiate carefully between
+ * clang under visual studio and regular visual
+ * studio.
+ *
+ * Under clang for Windows, we enable:
+ *  * target pragmas so that part and only part of the
+ *     code gets compiled for advanced instructions.
+ *
+ */
+#ifdef __clang__
+// clang under visual studio
+#define SIMDJSON_CLANG_VISUAL_STUDIO 1
+#else
+// just regular visual studio (best guess)
+#define SIMDJSON_REGULAR_VISUAL_STUDIO 1
+#endif // __clang__
+#endif // _MSC_VER
+
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+// https://en.wikipedia.org/wiki/C_alternative_tokens
+// This header should have no effect, except maybe
+// under Visual Studio.
+#include <iso646.h>
+#endif
+
+#if defined(__x86_64__) || defined(_M_AMD64)
+#define SIMDJSON_IS_X86_64 1
+#elif defined(__aarch64__) || defined(_M_ARM64)
+#define SIMDJSON_IS_ARM64 1
+#elif defined(__PPC64__) || defined(_M_PPC64)
+#define SIMDJSON_IS_PPC64 1
+#else
+#define SIMDJSON_IS_32BITS 1
+
+// We do not support 32-bit platforms, but it can be
+// handy to identify them.
+#if defined(_M_IX86) || defined(__i386__)
+#define SIMDJSON_IS_X86_32BITS 1
+#elif defined(__arm__) || defined(_M_ARM)
+#define SIMDJSON_IS_ARM_32BITS 1
+#elif defined(__PPC__) || defined(_M_PPC)
+#define SIMDJSON_IS_PPC_32BITS 1
+#endif
+
+#endif // defined(__x86_64__) || defined(_M_AMD64)
+
+#ifdef SIMDJSON_IS_32BITS
+#ifndef SIMDJSON_NO_PORTABILITY_WARNING
+#pragma message("The simdjson library is designed \
+for 64-bit processors and it seems that you are not \
+compiling for a known 64-bit platform. All fast kernels \
+will be disabled and performance may be poor. Please \
+use a 64-bit target such as x64, 64-bit ARM or 64-bit PPC.")
+#endif // SIMDJSON_NO_PORTABILITY_WARNING
+#endif // SIMDJSON_IS_32BITS
+
+// this is almost standard?
+#undef SIMDJSON_STRINGIFY_IMPLEMENTATION_
+#undef SIMDJSON_STRINGIFY
+#define SIMDJSON_STRINGIFY_IMPLEMENTATION_(a) #a
+#define SIMDJSON_STRINGIFY(a) SIMDJSON_STRINGIFY_IMPLEMENTATION_(a)
+
+// Our fast kernels require 64-bit systems.
+//
+// On 32-bit x86, we lack 64-bit popcnt, lzcnt, blsr instructions.
+// Furthermore, the number of SIMD registers is reduced.
+//
+// On 32-bit ARM, we would have smaller registers.
+//
+// The simdjson users should still have the fallback kernel. It is
+// slower, but it should run everywhere.
+
+//
+// Enable valid runtime implementations, and select SIMDJSON_BUILTIN_IMPLEMENTATION
+//
+
+// We are going to use runtime dispatch.
+#ifdef SIMDJSON_IS_X86_64
+#ifdef __clang__
+// clang does not have GCC push pop
+// warning: clang attribute push can't be used within a namespace in clang up
+// til 8.0 so SIMDJSON_TARGET_REGION and SIMDJSON_UNTARGET_REGION must be *outside* of a
+// namespace.
+#define SIMDJSON_TARGET_REGION(T)                                                       \
+  _Pragma(SIMDJSON_STRINGIFY(                                                           \
+      clang attribute push(__attribute__((target(T))), apply_to = function)))
+#define SIMDJSON_UNTARGET_REGION _Pragma("clang attribute pop")
+#elif defined(__GNUC__)
+// GCC is easier
+#define SIMDJSON_TARGET_REGION(T)                                                       \
+  _Pragma("GCC push_options") _Pragma(SIMDJSON_STRINGIFY(GCC target(T)))
+#define SIMDJSON_UNTARGET_REGION _Pragma("GCC pop_options")
+#endif // clang then gcc
+
+#endif // x86
+
+// Default target region macros don't do anything.
+#ifndef SIMDJSON_TARGET_REGION
+#define SIMDJSON_TARGET_REGION(T)
+#define SIMDJSON_UNTARGET_REGION
+#endif
+
+// Is threading enabled?
+#if defined(_REENTRANT) || defined(_MT)
+#ifndef SIMDJSON_THREADS_ENABLED
+#define SIMDJSON_THREADS_ENABLED
+#endif
+#endif
+
+// workaround for large stack sizes under -O0.
+// https://github.com/simdjson/simdjson/issues/691
+#ifdef __APPLE__
+#ifndef __OPTIMIZE__
+// Apple systems have small stack sizes in secondary threads.
+// Lack of compiler optimization may generate high stack usage.
+// Users may want to disable threads for safety, but only when
+// in debug mode which we detect by the fact that the __OPTIMIZE__
+// macro is not defined.
+#undef SIMDJSON_THREADS_ENABLED
+#endif
+#endif
+
+
+#if defined(__clang__)
+#define SIMDJSON_NO_SANITIZE_UNDEFINED __attribute__((no_sanitize("undefined")))
+#elif defined(__GNUC__)
+#define SIMDJSON_NO_SANITIZE_UNDEFINED __attribute__((no_sanitize_undefined))
+#else
+#define SIMDJSON_NO_SANITIZE_UNDEFINED
+#endif
+
+#ifdef SIMDJSON_VISUAL_STUDIO
+// This is one case where we do not distinguish between
+// regular visual studio and clang under visual studio.
+// clang under Windows has _stricmp (like visual studio) but not strcasecmp (as clang normally has)
+#define simdjson_strcasecmp _stricmp
+#define simdjson_strncasecmp _strnicmp
+#else
+// The strcasecmp, strncasecmp, and strcasestr functions do not work with multibyte strings (e.g. UTF-8).
+// So they are only useful for ASCII in our context.
+// https://www.gnu.org/software/libunistring/manual/libunistring.html#char-_002a-strings
+#define simdjson_strcasecmp strcasecmp
+#define simdjson_strncasecmp strncasecmp
+#endif
+
+#ifdef NDEBUG
+
+#ifdef SIMDJSON_VISUAL_STUDIO
+#define SIMDJSON_UNREACHABLE() __assume(0)
+#define SIMDJSON_ASSUME(COND) __assume(COND)
+#else
+#define SIMDJSON_UNREACHABLE() __builtin_unreachable();
+#define SIMDJSON_ASSUME(COND) do { if (!(COND)) __builtin_unreachable(); } while (0)
+#endif
+
+#else // NDEBUG
+
+#define SIMDJSON_UNREACHABLE() assert(0);
+#define SIMDJSON_ASSUME(COND) assert(COND)
+
+#endif
+
+#endif // SIMDJSON_PORTABILITY_H
+/* end file include/simdjson/portability.h */
+
+namespace simdjson {
+
+namespace internal {
+/**
+ * @private
+ * Our own implementation of the C++17 to_chars function.
+ * Defined in src/to_chars
+ */
+char *to_chars(char *first, const char *last, double value);
+/**
+ * @private
+ * A number parsing routine.
+ * Defined in src/from_chars
+ */
+double from_chars(const char *first) noexcept;
+double from_chars(const char *first, const char* end) noexcept;
+
+}
+
+#ifndef SIMDJSON_EXCEPTIONS
+#if __cpp_exceptions
+#define SIMDJSON_EXCEPTIONS 1
+#else
+#define SIMDJSON_EXCEPTIONS 0
+#endif
+#endif
+
+/** The maximum document size supported by simdjson. */
+constexpr size_t SIMDJSON_MAXSIZE_BYTES = 0xFFFFFFFF;
+
+/**
+ * The amount of padding needed in a buffer to parse JSON.
+ *
+ * The input buf should be readable up to buf + SIMDJSON_PADDING
+ * this is a stopgap; there should be a better description of the
+ * main loop and its behavior that abstracts over this
+ * See https://github.com/simdjson/simdjson/issues/174
+ */
+constexpr size_t SIMDJSON_PADDING = 64;
+
+/**
+ * By default, simdjson supports this many nested objects and arrays.
+ *
+ * This is the default for parser::max_depth().
+ */
+constexpr size_t DEFAULT_MAX_DEPTH = 1024;
+
+} // namespace simdjson
+
+#if defined(__GNUC__)
+  // Marks a block with a name so that MCA analysis can see it.
+  #define SIMDJSON_BEGIN_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-BEGIN " #name);
+  #define SIMDJSON_END_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-END " #name);
+  #define SIMDJSON_DEBUG_BLOCK(name, block) BEGIN_DEBUG_BLOCK(name); block; END_DEBUG_BLOCK(name);
+#else
+  #define SIMDJSON_BEGIN_DEBUG_BLOCK(name)
+  #define SIMDJSON_END_DEBUG_BLOCK(name)
+  #define SIMDJSON_DEBUG_BLOCK(name, block)
+#endif
+
+// Align to N-byte boundary
+#define SIMDJSON_ROUNDUP_N(a, n) (((a) + ((n)-1)) & ~((n)-1))
+#define SIMDJSON_ROUNDDOWN_N(a, n) ((a) & ~((n)-1))
+
+#define SIMDJSON_ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n)-1)) == 0)
+
+#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO)
+
+  #define simdjson_really_inline __forceinline
+  #define simdjson_never_inline __declspec(noinline)
+
+  #define simdjson_unused
+  #define simdjson_warn_unused
+
+  #ifndef simdjson_likely
+  #define simdjson_likely(x) x
+  #endif
+  #ifndef simdjson_unlikely
+  #define simdjson_unlikely(x) x
+  #endif
+
+  #define SIMDJSON_PUSH_DISABLE_WARNINGS __pragma(warning( push ))
+  #define SIMDJSON_PUSH_DISABLE_ALL_WARNINGS __pragma(warning( push, 0 ))
+  #define SIMDJSON_DISABLE_VS_WARNING(WARNING_NUMBER) __pragma(warning( disable : WARNING_NUMBER ))
+  // Get rid of Intellisense-only warnings (Code Analysis)
+  // Though __has_include is C++17, it is supported in Visual Studio 2017 or better (_MSC_VER>=1910).
+  #ifdef __has_include
+  #if __has_include(<CppCoreCheck\Warnings.h>)
+  #include <CppCoreCheck\Warnings.h>
+  #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS SIMDJSON_DISABLE_VS_WARNING(ALL_CPPCORECHECK_WARNINGS)
+  #endif
+  #endif
+
+  #ifndef SIMDJSON_DISABLE_UNDESIRED_WARNINGS
+  #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS
+  #endif
+
+  #define SIMDJSON_DISABLE_DEPRECATED_WARNING SIMDJSON_DISABLE_VS_WARNING(4996)
+  #define SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING
+  #define SIMDJSON_POP_DISABLE_WARNINGS __pragma(warning( pop ))
+
+#else // SIMDJSON_REGULAR_VISUAL_STUDIO
+
+  #define simdjson_really_inline inline __attribute__((always_inline))
+  #define simdjson_never_inline inline __attribute__((noinline))
+
+  #define simdjson_unused __attribute__((unused))
+  #define simdjson_warn_unused __attribute__((warn_unused_result))
+
+  #ifndef simdjson_likely
+  #define simdjson_likely(x) __builtin_expect(!!(x), 1)
+  #endif
+  #ifndef simdjson_unlikely
+  #define simdjson_unlikely(x) __builtin_expect(!!(x), 0)
+  #endif
+
+  #define SIMDJSON_PUSH_DISABLE_WARNINGS _Pragma("GCC diagnostic push")
+  // gcc doesn't seem to disable all warnings with all and extra, add warnings here as necessary
+  // We do it separately for clang since it has different warnings.
+  #ifdef __clang__
+  // clang is missing -Wmaybe-uninitialized.
+  #define SIMDJSON_PUSH_DISABLE_ALL_WARNINGS SIMDJSON_PUSH_DISABLE_WARNINGS \
+    SIMDJSON_DISABLE_GCC_WARNING(-Weffc++) \
+    SIMDJSON_DISABLE_GCC_WARNING(-Wall) \
+    SIMDJSON_DISABLE_GCC_WARNING(-Wconversion) \
+    SIMDJSON_DISABLE_GCC_WARNING(-Wextra) \
+    SIMDJSON_DISABLE_GCC_WARNING(-Wattributes) \
+    SIMDJSON_DISABLE_GCC_WARNING(-Wimplicit-fallthrough) \
+    SIMDJSON_DISABLE_GCC_WARNING(-Wnon-virtual-dtor) \
+    SIMDJSON_DISABLE_GCC_WARNING(-Wreturn-type) \
+    SIMDJSON_DISABLE_GCC_WARNING(-Wshadow) \
+    SIMDJSON_DISABLE_GCC_WARNING(-Wunused-parameter) \
+    SIMDJSON_DISABLE_GCC_WARNING(-Wunused-variable)
+  #else // __clang__
+  #define SIMDJSON_PUSH_DISABLE_ALL_WARNINGS SIMDJSON_PUSH_DISABLE_WARNINGS \
+    SIMDJSON_DISABLE_GCC_WARNING(-Weffc++) \
+    SIMDJSON_DISABLE_GCC_WARNING(-Wall) \
+    SIMDJSON_DISABLE_GCC_WARNING(-Wconversion) \
+    SIMDJSON_DISABLE_GCC_WARNING(-Wextra) \
+    SIMDJSON_DISABLE_GCC_WARNING(-Wattributes) \
+    SIMDJSON_DISABLE_GCC_WARNING(-Wimplicit-fallthrough) \
+    SIMDJSON_DISABLE_GCC_WARNING(-Wnon-virtual-dtor) \
+    SIMDJSON_DISABLE_GCC_WARNING(-Wreturn-type) \
+    SIMDJSON_DISABLE_GCC_WARNING(-Wshadow) \
+    SIMDJSON_DISABLE_GCC_WARNING(-Wunused-parameter) \
+    SIMDJSON_DISABLE_GCC_WARNING(-Wunused-variable) \
+    SIMDJSON_DISABLE_GCC_WARNING(-Wmaybe-uninitialized)
+  #endif // __clang__
+
+  #define SIMDJSON_PRAGMA(P) _Pragma(#P)
+  #define SIMDJSON_DISABLE_GCC_WARNING(WARNING) SIMDJSON_PRAGMA(GCC diagnostic ignored #WARNING)
+  #if defined(SIMDJSON_CLANG_VISUAL_STUDIO)
+  #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS SIMDJSON_DISABLE_GCC_WARNING(-Wmicrosoft-include)
+  #else
+  #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS
+  #endif
+  #define SIMDJSON_DISABLE_DEPRECATED_WARNING SIMDJSON_DISABLE_GCC_WARNING(-Wdeprecated-declarations)
+  #define SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING SIMDJSON_DISABLE_GCC_WARNING(-Wstrict-overflow)
+  #define SIMDJSON_POP_DISABLE_WARNINGS _Pragma("GCC diagnostic pop")
+
+
+
+#endif // MSC_VER
+
+#if defined(simdjson_inline)
+  // Prefer the user's definition of simdjson_inline; don't define it ourselves.
+#elif defined(__GNUC__) && !defined(__OPTIMIZE__)
+  // If optimizations are disabled, forcing inlining can lead to significant
+  // code bloat and high compile times. Don't use simdjson_really_inline for
+  // unoptimized builds.
+  #define simdjson_inline inline
+#else
+  // Force inlining for most simdjson functions.
+  #define simdjson_inline simdjson_really_inline
+#endif
+
+#if defined(SIMDJSON_VISUAL_STUDIO)
+    /**
+     * Windows users need to do some extra work when building
+     * or using a dynamic library (DLL). When building, we need
+     * to set SIMDJSON_DLLIMPORTEXPORT to __declspec(dllexport).
+     * When *using* the DLL, the user needs to set
+     * SIMDJSON_DLLIMPORTEXPORT __declspec(dllimport).
+     *
+     * Static libraries not need require such work.
+     *
+     * It does not matter here whether you are using
+     * the regular visual studio or clang under visual
+     * studio, you still need to handle these issues.
+     *
+     * Non-Windows systems do not have this complexity.
+     */
+    #if SIMDJSON_BUILDING_WINDOWS_DYNAMIC_LIBRARY
+    // We set SIMDJSON_BUILDING_WINDOWS_DYNAMIC_LIBRARY when we build a DLL under Windows.
+    // It should never happen that both SIMDJSON_BUILDING_WINDOWS_DYNAMIC_LIBRARY and
+    // SIMDJSON_USING_WINDOWS_DYNAMIC_LIBRARY are set.
+    #define SIMDJSON_DLLIMPORTEXPORT __declspec(dllexport)
+    #elif SIMDJSON_USING_WINDOWS_DYNAMIC_LIBRARY
+    // Windows user who call a dynamic library should set SIMDJSON_USING_WINDOWS_DYNAMIC_LIBRARY to 1.
+    #define SIMDJSON_DLLIMPORTEXPORT __declspec(dllimport)
+    #else
+    // We assume by default static linkage
+    #define SIMDJSON_DLLIMPORTEXPORT
+    #endif
+
+/**
+ * Workaround for the vcpkg package manager. Only vcpkg should
+ * ever touch the next line. The SIMDJSON_USING_LIBRARY macro is otherwise unused.
+ */
+#if SIMDJSON_USING_LIBRARY
+#define SIMDJSON_DLLIMPORTEXPORT __declspec(dllimport)
+#endif
+/**
+ * End of workaround for the vcpkg package manager.
+ */
+#else
+    #define SIMDJSON_DLLIMPORTEXPORT
+#endif
+
+// C++17 requires string_view.
+#if SIMDJSON_CPLUSPLUS17
+#define SIMDJSON_HAS_STRING_VIEW
+#include <string_view> // by the standard, this has to be safe.
+#endif
+
+// This macro (__cpp_lib_string_view) has to be defined
+// for C++17 and better, but if it is otherwise defined,
+// we are going to assume that string_view is available
+// even if we do not have C++17 support.
+#ifdef __cpp_lib_string_view
+#define SIMDJSON_HAS_STRING_VIEW
+#endif
+
+// Some systems have string_view even if we do not have C++17 support,
+// and even if __cpp_lib_string_view is undefined, it is the case
+// with Apple clang version 11.
+// We must handle it. *This is important.*
+#ifndef SIMDJSON_HAS_STRING_VIEW
+#if defined __has_include
+// do not combine the next #if with the previous one (unsafe)
+#if __has_include (<string_view>)
+// now it is safe to trigger the include
+#include <string_view> // though the file is there, it does not follow that we got the implementation
+#if defined(_LIBCPP_STRING_VIEW)
+// Ah! So we under libc++ which under its Library Fundamentals Technical Specification, which preceded C++17,
+// included string_view.
+// This means that we have string_view *even though* we may not have C++17.
+#define SIMDJSON_HAS_STRING_VIEW
+#endif // _LIBCPP_STRING_VIEW
+#endif // __has_include (<string_view>)
+#endif // defined __has_include
+#endif // def SIMDJSON_HAS_STRING_VIEW
+// end of complicated but important routine to try to detect string_view.
+
+//
+// Backfill std::string_view using nonstd::string_view on systems where
+// we expect that string_view is missing. Important: if we get this wrong,
+// we will end up with two string_view definitions and potential trouble.
+// That is why we work so hard above to avoid it.
+//
+#ifndef SIMDJSON_HAS_STRING_VIEW
+SIMDJSON_PUSH_DISABLE_ALL_WARNINGS
+/* begin file include/simdjson/nonstd/string_view.hpp */
+// Copyright 2017-2020 by Martin Moene
+//
+// string-view lite, a C++17-like string_view for C++98 and later.
+// For more information see https://github.com/martinmoene/string-view-lite
+//
+// Distributed under the Boost Software License, Version 1.0.
+// (See accompanying file LICENSE.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#pragma once
+
+#ifndef NONSTD_SV_LITE_H_INCLUDED
+#define NONSTD_SV_LITE_H_INCLUDED
+
+#define string_view_lite_MAJOR  1
+#define string_view_lite_MINOR  6
+#define string_view_lite_PATCH  0
+
+#define string_view_lite_VERSION  nssv_STRINGIFY(string_view_lite_MAJOR) "." nssv_STRINGIFY(string_view_lite_MINOR) "." nssv_STRINGIFY(string_view_lite_PATCH)
+
+#define nssv_STRINGIFY(  x )  nssv_STRINGIFY_( x )
+#define nssv_STRINGIFY_( x )  #x
+
+// string-view lite configuration:
+
+#define nssv_STRING_VIEW_DEFAULT  0
+#define nssv_STRING_VIEW_NONSTD   1
+#define nssv_STRING_VIEW_STD      2
+
+// tweak header support:
+
+#ifdef __has_include
+# if __has_include(<nonstd/string_view.tweak.hpp>)
+#  include <nonstd/string_view.tweak.hpp>
+# endif
+#define nssv_HAVE_TWEAK_HEADER  1
+#else
+#define nssv_HAVE_TWEAK_HEADER  0
+//# pragma message("string_view.hpp: Note: Tweak header not supported.")
+#endif
+
+// string_view selection and configuration:
+
+#if !defined( nssv_CONFIG_SELECT_STRING_VIEW )
+# define nssv_CONFIG_SELECT_STRING_VIEW  ( nssv_HAVE_STD_STRING_VIEW ? nssv_STRING_VIEW_STD : nssv_STRING_VIEW_NONSTD )
+#endif
+
+#ifndef  nssv_CONFIG_STD_SV_OPERATOR
+# define nssv_CONFIG_STD_SV_OPERATOR  0
+#endif
+
+#ifndef  nssv_CONFIG_USR_SV_OPERATOR
+# define nssv_CONFIG_USR_SV_OPERATOR  1
+#endif
+
+#ifdef   nssv_CONFIG_CONVERSION_STD_STRING
+# define nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS   nssv_CONFIG_CONVERSION_STD_STRING
+# define nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS  nssv_CONFIG_CONVERSION_STD_STRING
+#endif
+
+#ifndef  nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS
+# define nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS  1
+#endif
+
+#ifndef  nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS
+# define nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS  1
+#endif
+
+#ifndef  nssv_CONFIG_NO_STREAM_INSERTION
+# define nssv_CONFIG_NO_STREAM_INSERTION  0
+#endif
+
+// Control presence of exception handling (try and auto discover):
+
+#ifndef nssv_CONFIG_NO_EXCEPTIONS
+# if _MSC_VER
+#  include <cstddef>    // for _HAS_EXCEPTIONS
+# endif
+# if defined(__cpp_exceptions) || defined(__EXCEPTIONS) || (_HAS_EXCEPTIONS)
+#  define nssv_CONFIG_NO_EXCEPTIONS  0
+# else
+#  define nssv_CONFIG_NO_EXCEPTIONS  1
+# endif
+#endif
+
+// C++ language version detection (C++20 is speculative):
+// Note: VC14.0/1900 (VS2015) lacks too much from C++14.
+
+#ifndef   nssv_CPLUSPLUS
+# if defined(_MSVC_LANG ) && !defined(__clang__)
+#  define nssv_CPLUSPLUS  (_MSC_VER == 1900 ? 201103L : _MSVC_LANG )
+# else
+#  define nssv_CPLUSPLUS  __cplusplus
+# endif
+#endif
+
+#define nssv_CPP98_OR_GREATER  ( nssv_CPLUSPLUS >= 199711L )
+#define nssv_CPP11_OR_GREATER  ( nssv_CPLUSPLUS >= 201103L )
+#define nssv_CPP11_OR_GREATER_ ( nssv_CPLUSPLUS >= 201103L )
+#define nssv_CPP14_OR_GREATER  ( nssv_CPLUSPLUS >= 201402L )
+#define nssv_CPP17_OR_GREATER  ( nssv_CPLUSPLUS >= 201703L )
+#define nssv_CPP20_OR_GREATER  ( nssv_CPLUSPLUS >= 202000L )
+
+// use C++17 std::string_view if available and requested:
+
+#if nssv_CPP17_OR_GREATER && defined(__has_include )
+# if __has_include( <string_view> )
+#  define nssv_HAVE_STD_STRING_VIEW  1
+# else
+#  define nssv_HAVE_STD_STRING_VIEW  0
+# endif
+#else
+# define  nssv_HAVE_STD_STRING_VIEW  0
+#endif
+
+#define  nssv_USES_STD_STRING_VIEW  ( (nssv_CONFIG_SELECT_STRING_VIEW == nssv_STRING_VIEW_STD) || ((nssv_CONFIG_SELECT_STRING_VIEW == nssv_STRING_VIEW_DEFAULT) && nssv_HAVE_STD_STRING_VIEW) )
+
+#define nssv_HAVE_STARTS_WITH ( nssv_CPP20_OR_GREATER || !nssv_USES_STD_STRING_VIEW )
+#define nssv_HAVE_ENDS_WITH     nssv_HAVE_STARTS_WITH
+
+//
+// Use C++17 std::string_view:
+//
+
+#if nssv_USES_STD_STRING_VIEW
+
+#include <string_view>
+
+// Extensions for std::string:
+
+#if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS
+
+namespace nonstd {
+
+template< class CharT, class Traits, class Allocator = std::allocator<CharT> >
+std::basic_string<CharT, Traits, Allocator>
+to_string( std::basic_string_view<CharT, Traits> v, Allocator const & a = Allocator() )
+{
+    return std::basic_string<CharT,Traits, Allocator>( v.begin(), v.end(), a );
+}
+
+template< class CharT, class Traits, class Allocator >
+std::basic_string_view<CharT, Traits>
+to_string_view( std::basic_string<CharT, Traits, Allocator> const & s )
+{
+    return std::basic_string_view<CharT, Traits>( s.data(), s.size() );
+}
+
+// Literal operators sv and _sv:
+
+#if nssv_CONFIG_STD_SV_OPERATOR
+
+using namespace std::literals::string_view_literals;
+
+#endif
+
+#if nssv_CONFIG_USR_SV_OPERATOR
+
+inline namespace literals {
+inline namespace string_view_literals {
+
+
+constexpr std::string_view operator "" _sv( const char* str, size_t len ) noexcept  // (1)
+{
+    return std::string_view{ str, len };
+}
+
+constexpr std::u16string_view operator "" _sv( const char16_t* str, size_t len ) noexcept  // (2)
+{
+    return std::u16string_view{ str, len };
+}
+
+constexpr std::u32string_view operator "" _sv( const char32_t* str, size_t len ) noexcept  // (3)
+{
+    return std::u32string_view{ str, len };
+}
+
+constexpr std::wstring_view operator "" _sv( const wchar_t* str, size_t len ) noexcept  // (4)
+{
+    return std::wstring_view{ str, len };
+}
+
+}} // namespace literals::string_view_literals
+
+#endif // nssv_CONFIG_USR_SV_OPERATOR
+
+} // namespace nonstd
+
+#endif // nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS
+
+namespace nonstd {
+
+using std::string_view;
+using std::wstring_view;
+using std::u16string_view;
+using std::u32string_view;
+using std::basic_string_view;
+
+// literal "sv" and "_sv", see above
+
+using std::operator==;
+using std::operator!=;
+using std::operator<;
+using std::operator<=;
+using std::operator>;
+using std::operator>=;
+
+using std::operator<<;
+
+} // namespace nonstd
+
+#else // nssv_HAVE_STD_STRING_VIEW
+
+//
+// Before C++17: use string_view lite:
+//
+
+// Compiler versions:
+//
+// MSVC++  6.0  _MSC_VER == 1200  nssv_COMPILER_MSVC_VERSION ==  60  (Visual Studio 6.0)
+// MSVC++  7.0  _MSC_VER == 1300  nssv_COMPILER_MSVC_VERSION ==  70  (Visual Studio .NET 2002)
+// MSVC++  7.1  _MSC_VER == 1310  nssv_COMPILER_MSVC_VERSION ==  71  (Visual Studio .NET 2003)
+// MSVC++  8.0  _MSC_VER == 1400  nssv_COMPILER_MSVC_VERSION ==  80  (Visual Studio 2005)
+// MSVC++  9.0  _MSC_VER == 1500  nssv_COMPILER_MSVC_VERSION ==  90  (Visual Studio 2008)
+// MSVC++ 10.0  _MSC_VER == 1600  nssv_COMPILER_MSVC_VERSION == 100  (Visual Studio 2010)
+// MSVC++ 11.0  _MSC_VER == 1700  nssv_COMPILER_MSVC_VERSION == 110  (Visual Studio 2012)
+// MSVC++ 12.0  _MSC_VER == 1800  nssv_COMPILER_MSVC_VERSION == 120  (Visual Studio 2013)
+// MSVC++ 14.0  _MSC_VER == 1900  nssv_COMPILER_MSVC_VERSION == 140  (Visual Studio 2015)
+// MSVC++ 14.1  _MSC_VER >= 1910  nssv_COMPILER_MSVC_VERSION == 141  (Visual Studio 2017)
+// MSVC++ 14.2  _MSC_VER >= 1920  nssv_COMPILER_MSVC_VERSION == 142  (Visual Studio 2019)
+
+#if defined(_MSC_VER ) && !defined(__clang__)
+# define nssv_COMPILER_MSVC_VER      (_MSC_VER )
+# define nssv_COMPILER_MSVC_VERSION  (_MSC_VER / 10 - 10 * ( 5 + (_MSC_VER < 1900 ) ) )
+#else
+# define nssv_COMPILER_MSVC_VER      0
+# define nssv_COMPILER_MSVC_VERSION  0
+#endif
+
+#define nssv_COMPILER_VERSION( major, minor, patch )  ( 10 * ( 10 * (major) + (minor) ) + (patch) )
+
+#if defined( __apple_build_version__ )
+# define nssv_COMPILER_APPLECLANG_VERSION  nssv_COMPILER_VERSION(__clang_major__, __clang_minor__, __clang_patchlevel__)
+# define nssv_COMPILER_CLANG_VERSION       0
+#elif defined( __clang__ )
+# define nssv_COMPILER_APPLECLANG_VERSION  0
+# define nssv_COMPILER_CLANG_VERSION       nssv_COMPILER_VERSION(__clang_major__, __clang_minor__, __clang_patchlevel__)
+#else
+# define nssv_COMPILER_APPLECLANG_VERSION  0
+# define nssv_COMPILER_CLANG_VERSION       0
+#endif
+
+#if defined(__GNUC__) && !defined(__clang__)
+# define nssv_COMPILER_GNUC_VERSION  nssv_COMPILER_VERSION(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__)
+#else
+# define nssv_COMPILER_GNUC_VERSION  0
+#endif
+
+// half-open range [lo..hi):
+#define nssv_BETWEEN( v, lo, hi ) ( (lo) <= (v) && (v) < (hi) )
+
+// Presence of language and library features:
+
+#ifdef _HAS_CPP0X
+# define nssv_HAS_CPP0X  _HAS_CPP0X
+#else
+# define nssv_HAS_CPP0X  0
+#endif
+
+// Unless defined otherwise below, consider VC14 as C++11 for variant-lite:
+
+#if nssv_COMPILER_MSVC_VER >= 1900
+# undef  nssv_CPP11_OR_GREATER
+# define nssv_CPP11_OR_GREATER  1
+#endif
+
+#define nssv_CPP11_90   (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1500)
+#define nssv_CPP11_100  (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1600)
+#define nssv_CPP11_110  (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1700)
+#define nssv_CPP11_120  (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1800)
+#define nssv_CPP11_140  (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1900)
+#define nssv_CPP11_141  (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1910)
+
+#define nssv_CPP14_000  (nssv_CPP14_OR_GREATER)
+#define nssv_CPP17_000  (nssv_CPP17_OR_GREATER)
+
+// Presence of C++11 language features:
+
+#define nssv_HAVE_CONSTEXPR_11          nssv_CPP11_140
+#define nssv_HAVE_EXPLICIT_CONVERSION   nssv_CPP11_140
+#define nssv_HAVE_INLINE_NAMESPACE      nssv_CPP11_140
+#define nssv_HAVE_NOEXCEPT              nssv_CPP11_140
+#define nssv_HAVE_NULLPTR               nssv_CPP11_100
+#define nssv_HAVE_REF_QUALIFIER         nssv_CPP11_140
+#define nssv_HAVE_UNICODE_LITERALS      nssv_CPP11_140
+#define nssv_HAVE_USER_DEFINED_LITERALS nssv_CPP11_140
+#define nssv_HAVE_WCHAR16_T             nssv_CPP11_100
+#define nssv_HAVE_WCHAR32_T             nssv_CPP11_100
+
+#if ! ( ( nssv_CPP11_OR_GREATER && nssv_COMPILER_CLANG_VERSION ) || nssv_BETWEEN( nssv_COMPILER_CLANG_VERSION, 300, 400 ) )
+# define nssv_HAVE_STD_DEFINED_LITERALS  nssv_CPP11_140
+#else
+# define nssv_HAVE_STD_DEFINED_LITERALS  0
+#endif
+
+// Presence of C++14 language features:
+
+#define nssv_HAVE_CONSTEXPR_14          nssv_CPP14_000
+
+// Presence of C++17 language features:
+
+#define nssv_HAVE_NODISCARD             nssv_CPP17_000
+
+// Presence of C++ library features:
+
+#define nssv_HAVE_STD_HASH              nssv_CPP11_120
+
+// Presence of compiler intrinsics:
+
+// Providing char-type specializations for compare() and length() that
+// use compiler intrinsics can improve compile- and run-time performance.
+//
+// The challenge is in using the right combinations of builtin availability
+// and its constexpr-ness.
+//
+// | compiler | __builtin_memcmp (constexpr) | memcmp  (constexpr) |
+// |----------|------------------------------|---------------------|
+// | clang    | 4.0              (>= 4.0   ) | any     (?        ) |
+// | clang-a  | 9.0              (>= 9.0   ) | any     (?        ) |
+// | gcc      | any              (constexpr) | any     (?        ) |
+// | msvc     | >= 14.2 C++17    (>= 14.2  ) | any     (?        ) |
+
+#define nssv_HAVE_BUILTIN_VER     ( (nssv_CPP17_000 && nssv_COMPILER_MSVC_VERSION >= 142) || nssv_COMPILER_GNUC_VERSION > 0 || nssv_COMPILER_CLANG_VERSION >= 400 || nssv_COMPILER_APPLECLANG_VERSION >= 900 )
+#define nssv_HAVE_BUILTIN_CE      (  nssv_HAVE_BUILTIN_VER )
+
+#define nssv_HAVE_BUILTIN_MEMCMP  ( (nssv_HAVE_CONSTEXPR_14 && nssv_HAVE_BUILTIN_CE) || !nssv_HAVE_CONSTEXPR_14 )
+#define nssv_HAVE_BUILTIN_STRLEN  ( (nssv_HAVE_CONSTEXPR_11 && nssv_HAVE_BUILTIN_CE) || !nssv_HAVE_CONSTEXPR_11 )
+
+#ifdef __has_builtin
+# define nssv_HAVE_BUILTIN( x )  __has_builtin( x )
+#else
+# define nssv_HAVE_BUILTIN( x )  0
+#endif
+
+#if nssv_HAVE_BUILTIN(__builtin_memcmp) || nssv_HAVE_BUILTIN_VER
+# define nssv_BUILTIN_MEMCMP  __builtin_memcmp
+#else
+# define nssv_BUILTIN_MEMCMP  memcmp
+#endif
+
+#if nssv_HAVE_BUILTIN(__builtin_strlen) || nssv_HAVE_BUILTIN_VER
+# define nssv_BUILTIN_STRLEN  __builtin_strlen
+#else
+# define nssv_BUILTIN_STRLEN  strlen
+#endif
+
+// C++ feature usage:
+
+#if nssv_HAVE_CONSTEXPR_11
+# define nssv_constexpr  constexpr
+#else
+# define nssv_constexpr  /*constexpr*/
+#endif
+
+#if  nssv_HAVE_CONSTEXPR_14
+# define nssv_constexpr14  constexpr
+#else
+# define nssv_constexpr14  /*constexpr*/
+#endif
+
+#if nssv_HAVE_EXPLICIT_CONVERSION
+# define nssv_explicit  explicit
+#else
+# define nssv_explicit  /*explicit*/
+#endif
+
+#if nssv_HAVE_INLINE_NAMESPACE
+# define nssv_inline_ns  inline
+#else
+# define nssv_inline_ns  /*inline*/
+#endif
+
+#if nssv_HAVE_NOEXCEPT
+# define nssv_noexcept  noexcept
+#else
+# define nssv_noexcept  /*noexcept*/
+#endif
+
+//#if nssv_HAVE_REF_QUALIFIER
+//# define nssv_ref_qual  &
+//# define nssv_refref_qual  &&
+//#else
+//# define nssv_ref_qual  /*&*/
+//# define nssv_refref_qual  /*&&*/
+//#endif
+
+#if nssv_HAVE_NULLPTR
+# define nssv_nullptr  nullptr
+#else
+# define nssv_nullptr  NULL
+#endif
+
+#if nssv_HAVE_NODISCARD
+# define nssv_nodiscard  [[nodiscard]]
+#else
+# define nssv_nodiscard  /*[[nodiscard]]*/
+#endif
+
+// Additional includes:
+
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <limits>
+#include <string>   // std::char_traits<>
+
+#if ! nssv_CONFIG_NO_STREAM_INSERTION
+# include <ostream>
+#endif
+
+#if ! nssv_CONFIG_NO_EXCEPTIONS
+# include <stdexcept>
+#endif
+
+#if nssv_CPP11_OR_GREATER
+# include <type_traits>
+#endif
+
+// Clang, GNUC, MSVC warning suppression macros:
+
+#if defined(__clang__)
+# pragma clang diagnostic ignored "-Wreserved-user-defined-literal"
+# pragma clang diagnostic push
+# pragma clang diagnostic ignored "-Wuser-defined-literals"
+#elif defined(__GNUC__)
+# pragma  GCC  diagnostic push
+# pragma  GCC  diagnostic ignored "-Wliteral-suffix"
+#endif // __clang__
+
+#if nssv_COMPILER_MSVC_VERSION >= 140
+# define nssv_SUPPRESS_MSGSL_WARNING(expr)        [[gsl::suppress(expr)]]
+# define nssv_SUPPRESS_MSVC_WARNING(code, descr)  __pragma(warning(suppress: code) )
+# define nssv_DISABLE_MSVC_WARNINGS(codes)        __pragma(warning(push))  __pragma(warning(disable: codes))
+#else
+# define nssv_SUPPRESS_MSGSL_WARNING(expr)
+# define nssv_SUPPRESS_MSVC_WARNING(code, descr)
+# define nssv_DISABLE_MSVC_WARNINGS(codes)
+#endif
+
+#if defined(__clang__)
+# define nssv_RESTORE_WARNINGS()  _Pragma("clang diagnostic pop")
+#elif defined(__GNUC__)
+# define nssv_RESTORE_WARNINGS()  _Pragma("GCC diagnostic pop")
+#elif nssv_COMPILER_MSVC_VERSION >= 140
+# define nssv_RESTORE_WARNINGS()  __pragma(warning(pop ))
+#else
+# define nssv_RESTORE_WARNINGS()
+#endif
+
+// Suppress the following MSVC (GSL) warnings:
+// - C4455, non-gsl   : 'operator ""sv': literal suffix identifiers that do not
+//                      start with an underscore are reserved
+// - C26472, gsl::t.1 : don't use a static_cast for arithmetic conversions;
+//                      use brace initialization, gsl::narrow_cast or gsl::narow
+// - C26481: gsl::b.1 : don't use pointer arithmetic. Use span instead
+
+nssv_DISABLE_MSVC_WARNINGS( 4455 26481 26472 )
+//nssv_DISABLE_CLANG_WARNINGS( "-Wuser-defined-literals" )
+//nssv_DISABLE_GNUC_WARNINGS( -Wliteral-suffix )
+
+namespace nonstd { namespace sv_lite {
+
+namespace detail {
+
+// support constexpr comparison in C++14;
+// for C++17 and later, use provided traits:
+
+template< typename CharT >
+inline nssv_constexpr14 int compare( CharT const * s1, CharT const * s2, std::size_t count )
+{
+    while ( count-- != 0 )
+    {
+        if ( *s1 < *s2 ) return -1;
+        if ( *s1 > *s2 ) return +1;
+        ++s1; ++s2;
+    }
+    return 0;
+}
+
+#if nssv_HAVE_BUILTIN_MEMCMP
+
+// specialization of compare() for char, see also generic compare() above:
+
+inline nssv_constexpr14 int compare( char const * s1, char const * s2, std::size_t count )
+{
+    return nssv_BUILTIN_MEMCMP( s1, s2, count );
+}
+
+#endif
+
+#if nssv_HAVE_BUILTIN_STRLEN
+
+// specialization of length() for char, see also generic length() further below:
+
+inline nssv_constexpr std::size_t length( char const * s )
+{
+    return nssv_BUILTIN_STRLEN( s );
+}
+
+#endif
+
+#if defined(__OPTIMIZE__)
+
+// gcc, clang provide __OPTIMIZE__
+// Expect tail call optimization to make length() non-recursive:
+
+template< typename CharT >
+inline nssv_constexpr std::size_t length( CharT * s, std::size_t result = 0 )
+{
+    return *s == '\0' ? result : length( s + 1, result + 1 );
+}
+
+#else // OPTIMIZE
+
+// non-recursive:
+
+template< typename CharT >
+inline nssv_constexpr14 std::size_t length( CharT * s )
+{
+    std::size_t result = 0;
+    while ( *s++ != '\0' )
+    {
+       ++result;
+    }
+    return result;
+}
+
+#endif // OPTIMIZE
+
+} // namespace detail
+
+template
+<
+    class CharT,
+    class Traits = std::char_traits<CharT>
+>
+class basic_string_view;
+
+//
+// basic_string_view:
+//
+
+template
+<
+    class CharT,
+    class Traits /* = std::char_traits<CharT> */
+>
+class basic_string_view
+{
+public:
+    // Member types:
+
+    typedef Traits traits_type;
+    typedef CharT  value_type;
+
+    typedef CharT       * pointer;
+    typedef CharT const * const_pointer;
+    typedef CharT       & reference;
+    typedef CharT const & const_reference;
+
+    typedef const_pointer iterator;
+    typedef const_pointer const_iterator;
+    typedef std::reverse_iterator< const_iterator > reverse_iterator;
+    typedef	std::reverse_iterator< const_iterator > const_reverse_iterator;
+
+    typedef std::size_t     size_type;
+    typedef std::ptrdiff_t  difference_type;
+
+    // 24.4.2.1 Construction and assignment:
+
+    nssv_constexpr basic_string_view() nssv_noexcept
+        : data_( nssv_nullptr )
+        , size_( 0 )
+    {}
+
+#if nssv_CPP11_OR_GREATER
+    nssv_constexpr basic_string_view( basic_string_view const & other ) nssv_noexcept = default;
+#else
+    nssv_constexpr basic_string_view( basic_string_view const & other ) nssv_noexcept
+        : data_( other.data_)
+        , size_( other.size_)
+    {}
+#endif
+
+    nssv_constexpr basic_string_view( CharT const * s, size_type count ) nssv_noexcept // non-standard noexcept
+        : data_( s )
+        , size_( count )
+    {}
+
+    nssv_constexpr basic_string_view( CharT const * s) nssv_noexcept // non-standard noexcept
+        : data_( s )
+#if nssv_CPP17_OR_GREATER
+        , size_( Traits::length(s) )
+#elif nssv_CPP11_OR_GREATER
+        , size_( detail::length(s) )
+#else
+        , size_( Traits::length(s) )
+#endif
+    {}
+
+    // Assignment:
+
+#if nssv_CPP11_OR_GREATER
+    nssv_constexpr14 basic_string_view & operator=( basic_string_view const & other ) nssv_noexcept = default;
+#else
+    nssv_constexpr14 basic_string_view & operator=( basic_string_view const & other ) nssv_noexcept
+    {
+        data_ = other.data_;
+        size_ = other.size_;
+        return *this;
+    }
+#endif
+
+    // 24.4.2.2 Iterator support:
+
+    nssv_constexpr const_iterator begin()  const nssv_noexcept { return data_;         }
+    nssv_constexpr const_iterator end()    const nssv_noexcept { return data_ + size_; }
+
+    nssv_constexpr const_iterator cbegin() const nssv_noexcept { return begin(); }
+    nssv_constexpr const_iterator cend()   const nssv_noexcept { return end();   }
+
+    nssv_constexpr const_reverse_iterator rbegin()  const nssv_noexcept { return const_reverse_iterator( end() );   }
+    nssv_constexpr const_reverse_iterator rend()    const nssv_noexcept { return const_reverse_iterator( begin() ); }
+
+    nssv_constexpr const_reverse_iterator crbegin() const nssv_noexcept { return rbegin(); }
+    nssv_constexpr const_reverse_iterator crend()   const nssv_noexcept { return rend();   }
+
+    // 24.4.2.3 Capacity:
+
+    nssv_constexpr size_type size()     const nssv_noexcept { return size_; }
+    nssv_constexpr size_type length()   const nssv_noexcept { return size_; }
+    nssv_constexpr size_type max_size() const nssv_noexcept { return (std::numeric_limits< size_type >::max)(); }
+
+    // since C++20
+    nssv_nodiscard nssv_constexpr bool empty() const nssv_noexcept
+    {
+        return 0 == size_;
+    }
+
+    // 24.4.2.4 Element access:
+
+    nssv_constexpr const_reference operator[]( size_type pos ) const
+    {
+        return data_at( pos );
+    }
+
+    nssv_constexpr14 const_reference at( size_type pos ) const
+    {
+#if nssv_CONFIG_NO_EXCEPTIONS
+        assert( pos < size() );
+#else
+        if ( pos >= size() )
+        {
+            throw std::out_of_range("nonstd::string_view::at()");
+        }
+#endif
+        return data_at( pos );
+    }
+
+    nssv_constexpr const_reference front() const { return data_at( 0 );          }
+    nssv_constexpr const_reference back()  const { return data_at( size() - 1 ); }
+
+    nssv_constexpr const_pointer   data()  const nssv_noexcept { return data_; }
+
+    // 24.4.2.5 Modifiers:
+
+    nssv_constexpr14 void remove_prefix( size_type n )
+    {
+        assert( n <= size() );
+        data_ += n;
+        size_ -= n;
+    }
+
+    nssv_constexpr14 void remove_suffix( size_type n )
+    {
+        assert( n <= size() );
+        size_ -= n;
+    }
+
+    nssv_constexpr14 void swap( basic_string_view & other ) nssv_noexcept
+    {
+        const basic_string_view tmp(other);
+        other = *this;
+        *this = tmp;
+    }
+
+    // 24.4.2.6 String operations:
+
+    size_type copy( CharT * dest, size_type n, size_type pos = 0 ) const
+    {
+#if nssv_CONFIG_NO_EXCEPTIONS
+        assert( pos <= size() );
+#else
+        if ( pos > size() )
+        {
+            throw std::out_of_range("nonstd::string_view::copy()");
+        }
+#endif
+        const size_type rlen = (std::min)( n, size() - pos );
+
+        (void) Traits::copy( dest, data() + pos, rlen );
+
+        return rlen;
+    }
+
+    nssv_constexpr14 basic_string_view substr( size_type pos = 0, size_type n = npos ) const
+    {
+#if nssv_CONFIG_NO_EXCEPTIONS
+        assert( pos <= size() );
+#else
+        if ( pos > size() )
+        {
+            throw std::out_of_range("nonstd::string_view::substr()");
+        }
+#endif
+        return basic_string_view( data() + pos, (std::min)( n, size() - pos ) );
+    }
+
+    // compare(), 6x:
+
+    nssv_constexpr14 int compare( basic_string_view other ) const nssv_noexcept // (1)
+    {
+#if nssv_CPP17_OR_GREATER
+        if ( const int result = Traits::compare( data(), other.data(), (std::min)( size(), other.size() ) ) )
+#else
+        if ( const int result = detail::compare( data(), other.data(), (std::min)( size(), other.size() ) ) )
+#endif
+        {
+            return result;
+        }
+
+        return size() == other.size() ? 0 : size() < other.size() ? -1 : 1;
+    }
+
+    nssv_constexpr int compare( size_type pos1, size_type n1, basic_string_view other ) const // (2)
+    {
+        return substr( pos1, n1 ).compare( other );
+    }
+
+    nssv_constexpr int compare( size_type pos1, size_type n1, basic_string_view other, size_type pos2, size_type n2 ) const // (3)
+    {
+        return substr( pos1, n1 ).compare( other.substr( pos2, n2 ) );
+    }
+
+    nssv_constexpr int compare( CharT const * s ) const // (4)
+    {
+        return compare( basic_string_view( s ) );
+    }
+
+    nssv_constexpr int compare( size_type pos1, size_type n1, CharT const * s ) const // (5)
+    {
+        return substr( pos1, n1 ).compare( basic_string_view( s ) );
+    }
+
+    nssv_constexpr int compare( size_type pos1, size_type n1, CharT const * s, size_type n2 ) const // (6)
+    {
+        return substr( pos1, n1 ).compare( basic_string_view( s, n2 ) );
+    }
+
+    // 24.4.2.7 Searching:
+
+    // starts_with(), 3x, since C++20:
+
+    nssv_constexpr bool starts_with( basic_string_view v ) const nssv_noexcept  // (1)
+    {
+        return size() >= v.size() && compare( 0, v.size(), v ) == 0;
+    }
+
+    nssv_constexpr bool starts_with( CharT c ) const nssv_noexcept  // (2)
+    {
+        return starts_with( basic_string_view( &c, 1 ) );
+    }
+
+    nssv_constexpr bool starts_with( CharT const * s ) const  // (3)
+    {
+        return starts_with( basic_string_view( s ) );
+    }
+
+    // ends_with(), 3x, since C++20:
+
+    nssv_constexpr bool ends_with( basic_string_view v ) const nssv_noexcept  // (1)
+    {
+        return size() >= v.size() && compare( size() - v.size(), npos, v ) == 0;
+    }
+
+    nssv_constexpr bool ends_with( CharT c ) const nssv_noexcept  // (2)
+    {
+        return ends_with( basic_string_view( &c, 1 ) );
+    }
+
+    nssv_constexpr bool ends_with( CharT const * s ) const  // (3)
+    {
+        return ends_with( basic_string_view( s ) );
+    }
+
+    // find(), 4x:
+
+    nssv_constexpr14 size_type find( basic_string_view v, size_type pos = 0 ) const nssv_noexcept  // (1)
+    {
+        return assert( v.size() == 0 || v.data() != nssv_nullptr )
+            , pos >= size()
+            ? npos
+            : to_pos( std::search( cbegin() + pos, cend(), v.cbegin(), v.cend(), Traits::eq ) );
+    }
+
+    nssv_constexpr14 size_type find( CharT c, size_type pos = 0 ) const nssv_noexcept  // (2)
+    {
+        return find( basic_string_view( &c, 1 ), pos );
+    }
+
+    nssv_constexpr14 size_type find( CharT const * s, size_type pos, size_type n ) const  // (3)
+    {
+        return find( basic_string_view( s, n ), pos );
+    }
+
+    nssv_constexpr14 size_type find( CharT const * s, size_type pos = 0 ) const  // (4)
+    {
+        return find( basic_string_view( s ), pos );
+    }
+
+    // rfind(), 4x:
+
+    nssv_constexpr14 size_type rfind( basic_string_view v, size_type pos = npos ) const nssv_noexcept  // (1)
+    {
+        if ( size() < v.size() )
+        {
+            return npos;
+        }
+
+        if ( v.empty() )
+        {
+            return (std::min)( size(), pos );
+        }
+
+        const_iterator last   = cbegin() + (std::min)( size() - v.size(), pos ) + v.size();
+        const_iterator result = std::find_end( cbegin(), last, v.cbegin(), v.cend(), Traits::eq );
+
+        return result != last ? size_type( result - cbegin() ) : npos;
+    }
+
+    nssv_constexpr14 size_type rfind( CharT c, size_type pos = npos ) const nssv_noexcept  // (2)
+    {
+        return rfind( basic_string_view( &c, 1 ), pos );
+    }
+
+    nssv_constexpr14 size_type rfind( CharT const * s, size_type pos, size_type n ) const  // (3)
+    {
+        return rfind( basic_string_view( s, n ), pos );
+    }
+
+    nssv_constexpr14 size_type rfind( CharT const * s, size_type pos = npos ) const  // (4)
+    {
+        return rfind( basic_string_view( s ), pos );
+    }
+
+    // find_first_of(), 4x:
+
+    nssv_constexpr size_type find_first_of( basic_string_view v, size_type pos = 0 ) const nssv_noexcept  // (1)
+    {
+        return pos >= size()
+            ? npos
+            : to_pos( std::find_first_of( cbegin() + pos, cend(), v.cbegin(), v.cend(), Traits::eq ) );
+    }
+
+    nssv_constexpr size_type find_first_of( CharT c, size_type pos = 0 ) const nssv_noexcept  // (2)
+    {
+        return find_first_of( basic_string_view( &c, 1 ), pos );
+    }
+
+    nssv_constexpr size_type find_first_of( CharT const * s, size_type pos, size_type n ) const  // (3)
+    {
+        return find_first_of( basic_string_view( s, n ), pos );
+    }
+
+    nssv_constexpr size_type find_first_of(  CharT const * s, size_type pos = 0 ) const  // (4)
+    {
+        return find_first_of( basic_string_view( s ), pos );
+    }
+
+    // find_last_of(), 4x:
+
+    nssv_constexpr size_type find_last_of( basic_string_view v, size_type pos = npos ) const nssv_noexcept  // (1)
+    {
+        return empty()
+            ? npos
+            : pos >= size()
+            ? find_last_of( v, size() - 1 )
+            : to_pos( std::find_first_of( const_reverse_iterator( cbegin() + pos + 1 ), crend(), v.cbegin(), v.cend(), Traits::eq ) );
+    }
+
+    nssv_constexpr size_type find_last_of( CharT c, size_type pos = npos ) const nssv_noexcept  // (2)
+    {
+        return find_last_of( basic_string_view( &c, 1 ), pos );
+    }
+
+    nssv_constexpr size_type find_last_of( CharT const * s, size_type pos, size_type count ) const  // (3)
+    {
+        return find_last_of( basic_string_view( s, count ), pos );
+    }
+
+    nssv_constexpr size_type find_last_of( CharT const * s, size_type pos = npos ) const  // (4)
+    {
+        return find_last_of( basic_string_view( s ), pos );
+    }
+
+    // find_first_not_of(), 4x:
+
+    nssv_constexpr size_type find_first_not_of( basic_string_view v, size_type pos = 0 ) const nssv_noexcept  // (1)
+    {
+        return pos >= size()
+            ? npos
+            : to_pos( std::find_if( cbegin() + pos, cend(), not_in_view( v ) ) );
+    }
+
+    nssv_constexpr size_type find_first_not_of( CharT c, size_type pos = 0 ) const nssv_noexcept  // (2)
+    {
+        return find_first_not_of( basic_string_view( &c, 1 ), pos );
+    }
+
+    nssv_constexpr size_type find_first_not_of( CharT const * s, size_type pos, size_type count ) const  // (3)
+    {
+        return find_first_not_of( basic_string_view( s, count ), pos );
+    }
+
+    nssv_constexpr size_type find_first_not_of( CharT const * s, size_type pos = 0 ) const  // (4)
+    {
+        return find_first_not_of( basic_string_view( s ), pos );
+    }
+
+    // find_last_not_of(), 4x:
+
+    nssv_constexpr size_type find_last_not_of( basic_string_view v, size_type pos = npos ) const nssv_noexcept  // (1)
+    {
+        return empty()
+            ? npos
+            : pos >= size()
+            ? find_last_not_of( v, size() - 1 )
+            : to_pos( std::find_if( const_reverse_iterator( cbegin() + pos + 1 ), crend(), not_in_view( v ) ) );
+    }
+
+    nssv_constexpr size_type find_last_not_of( CharT c, size_type pos = npos ) const nssv_noexcept  // (2)
+    {
+        return find_last_not_of( basic_string_view( &c, 1 ), pos );
+    }
+
+    nssv_constexpr size_type find_last_not_of( CharT const * s, size_type pos, size_type count ) const  // (3)
+    {
+        return find_last_not_of( basic_string_view( s, count ), pos );
+    }
+
+    nssv_constexpr size_type find_last_not_of( CharT const * s, size_type pos = npos ) const  // (4)
+    {
+        return find_last_not_of( basic_string_view( s ), pos );
+    }
+
+    // Constants:
+
+#if nssv_CPP17_OR_GREATER
+    static nssv_constexpr size_type npos = size_type(-1);
+#elif nssv_CPP11_OR_GREATER
+    enum : size_type { npos = size_type(-1) };
+#else
+    enum { npos = size_type(-1) };
+#endif
+
+private:
+    struct not_in_view
+    {
+        const basic_string_view v;
+
+        nssv_constexpr explicit not_in_view( basic_string_view v_ ) : v( v_ ) {}
+
+        nssv_constexpr bool operator()( CharT c ) const
+        {
+            return npos == v.find_first_of( c );
+        }
+    };
+
+    nssv_constexpr size_type to_pos( const_iterator it ) const
+    {
+        return it == cend() ? npos : size_type( it - cbegin() );
+    }
+
+    nssv_constexpr size_type to_pos( const_reverse_iterator it ) const
+    {
+        return it == crend() ? npos : size_type( crend() - it - 1 );
+    }
+
+    nssv_constexpr const_reference data_at( size_type pos ) const
+    {
+#if nssv_BETWEEN( nssv_COMPILER_GNUC_VERSION, 1, 500 )
+        return data_[pos];
+#else
+        return assert( pos < size() ), data_[pos];
+#endif
+    }
+
+private:
+    const_pointer data_;
+    size_type     size_;
+
+public:
+#if nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS
+
+    template< class Allocator >
+    basic_string_view( std::basic_string<CharT, Traits, Allocator> const & s ) nssv_noexcept
+        : data_( s.data() )
+        , size_( s.size() )
+    {}
+
+#if nssv_HAVE_EXPLICIT_CONVERSION
+
+    template< class Allocator >
+    explicit operator std::basic_string<CharT, Traits, Allocator>() const
+    {
+        return to_string( Allocator() );
+    }
+
+#endif // nssv_HAVE_EXPLICIT_CONVERSION
+
+#if nssv_CPP11_OR_GREATER
+
+    template< class Allocator = std::allocator<CharT> >
+    std::basic_string<CharT, Traits, Allocator>
+    to_string( Allocator const & a = Allocator() ) const
+    {
+        return std::basic_string<CharT, Traits, Allocator>( begin(), end(), a );
+    }
+
+#else
+
+    std::basic_string<CharT, Traits>
+    to_string() const
+    {
+        return std::basic_string<CharT, Traits>( begin(), end() );
+    }
+
+    template< class Allocator >
+    std::basic_string<CharT, Traits, Allocator>
+    to_string( Allocator const & a ) const
+    {
+        return std::basic_string<CharT, Traits, Allocator>( begin(), end(), a );
+    }
+
+#endif // nssv_CPP11_OR_GREATER
+
+#endif // nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS
+};
+
+//
+// Non-member functions:
+//
+
+// 24.4.3 Non-member comparison functions:
+// lexicographically compare two string views (function template):
+
+template< class CharT, class Traits >
+nssv_constexpr bool operator== (
+    basic_string_view <CharT, Traits> lhs,
+    basic_string_view <CharT, Traits> rhs ) nssv_noexcept
+{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; }
+
+template< class CharT, class Traits >
+nssv_constexpr bool operator!= (
+    basic_string_view <CharT, Traits> lhs,
+    basic_string_view <CharT, Traits> rhs ) nssv_noexcept
+{ return !( lhs == rhs ); }
+
+template< class CharT, class Traits >
+nssv_constexpr bool operator< (
+    basic_string_view <CharT, Traits> lhs,
+    basic_string_view <CharT, Traits> rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) < 0; }
+
+template< class CharT, class Traits >
+nssv_constexpr bool operator<= (
+    basic_string_view <CharT, Traits> lhs,
+    basic_string_view <CharT, Traits> rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) <= 0; }
+
+template< class CharT, class Traits >
+nssv_constexpr bool operator> (
+    basic_string_view <CharT, Traits> lhs,
+    basic_string_view <CharT, Traits> rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) > 0; }
+
+template< class CharT, class Traits >
+nssv_constexpr bool operator>= (
+    basic_string_view <CharT, Traits> lhs,
+    basic_string_view <CharT, Traits> rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) >= 0; }
+
+// Let S be basic_string_view<CharT, Traits>, and sv be an instance of S.
+// Implementations shall provide sufficient additional overloads marked
+// constexpr and noexcept so that an object t with an implicit conversion
+// to S can be compared according to Table 67.
+
+#if ! nssv_CPP11_OR_GREATER || nssv_BETWEEN( nssv_COMPILER_MSVC_VERSION, 100, 141 )
+
+// accommodate for older compilers:
+
+// ==
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator==(
+    basic_string_view<CharT, Traits> lhs,
+    CharT const * rhs ) nssv_noexcept
+{ return lhs.size() == detail::length( rhs ) && lhs.compare( rhs ) == 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator==(
+    CharT const * lhs,
+    basic_string_view<CharT, Traits> rhs ) nssv_noexcept
+{ return detail::length( lhs ) == rhs.size() && rhs.compare( lhs ) == 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator==(
+    basic_string_view<CharT, Traits> lhs,
+    std::basic_string<CharT, Traits> rhs ) nssv_noexcept
+{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator==(
+    std::basic_string<CharT, Traits> rhs,
+    basic_string_view<CharT, Traits> lhs ) nssv_noexcept
+{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; }
+
+// !=
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator!=(
+    basic_string_view<CharT, Traits> lhs,
+    CharT const * rhs ) nssv_noexcept
+{ return !( lhs == rhs ); }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator!=(
+    CharT const * lhs,
+    basic_string_view<CharT, Traits> rhs ) nssv_noexcept
+{ return !( lhs == rhs ); }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator!=(
+    basic_string_view<CharT, Traits> lhs,
+    std::basic_string<CharT, Traits> rhs ) nssv_noexcept
+{ return !( lhs == rhs ); }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator!=(
+    std::basic_string<CharT, Traits> rhs,
+    basic_string_view<CharT, Traits> lhs ) nssv_noexcept
+{ return !( lhs == rhs ); }
+
+// <
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator<(
+    basic_string_view<CharT, Traits> lhs,
+    CharT const * rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) < 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator<(
+    CharT const * lhs,
+    basic_string_view<CharT, Traits> rhs ) nssv_noexcept
+{ return rhs.compare( lhs ) > 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator<(
+    basic_string_view<CharT, Traits> lhs,
+    std::basic_string<CharT, Traits> rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) < 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator<(
+    std::basic_string<CharT, Traits> rhs,
+    basic_string_view<CharT, Traits> lhs ) nssv_noexcept
+{ return rhs.compare( lhs ) > 0; }
+
+// <=
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator<=(
+    basic_string_view<CharT, Traits> lhs,
+    CharT const * rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) <= 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator<=(
+    CharT const * lhs,
+    basic_string_view<CharT, Traits> rhs ) nssv_noexcept
+{ return rhs.compare( lhs ) >= 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator<=(
+    basic_string_view<CharT, Traits> lhs,
+    std::basic_string<CharT, Traits> rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) <= 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator<=(
+    std::basic_string<CharT, Traits> rhs,
+    basic_string_view<CharT, Traits> lhs ) nssv_noexcept
+{ return rhs.compare( lhs ) >= 0; }
+
+// >
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator>(
+    basic_string_view<CharT, Traits> lhs,
+    CharT const * rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) > 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator>(
+    CharT const * lhs,
+    basic_string_view<CharT, Traits> rhs ) nssv_noexcept
+{ return rhs.compare( lhs ) < 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator>(
+    basic_string_view<CharT, Traits> lhs,
+    std::basic_string<CharT, Traits> rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) > 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator>(
+    std::basic_string<CharT, Traits> rhs,
+    basic_string_view<CharT, Traits> lhs ) nssv_noexcept
+{ return rhs.compare( lhs ) < 0; }
+
+// >=
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator>=(
+    basic_string_view<CharT, Traits> lhs,
+    CharT const * rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) >= 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator>=(
+    CharT const * lhs,
+    basic_string_view<CharT, Traits> rhs ) nssv_noexcept
+{ return rhs.compare( lhs ) <= 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator>=(
+    basic_string_view<CharT, Traits> lhs,
+    std::basic_string<CharT, Traits> rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) >= 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator>=(
+    std::basic_string<CharT, Traits> rhs,
+    basic_string_view<CharT, Traits> lhs ) nssv_noexcept
+{ return rhs.compare( lhs ) <= 0; }
+
+#else // newer compilers:
+
+#define nssv_BASIC_STRING_VIEW_I(T,U)  typename std::decay< basic_string_view<T,U> >::type
+
+#if defined(_MSC_VER)       // issue 40
+# define nssv_MSVC_ORDER(x)  , int=x
+#else
+# define nssv_MSVC_ORDER(x)  /*, int=x*/
+#endif
+
+// ==
+
+template< class CharT, class Traits  nssv_MSVC_ORDER(1) >
+nssv_constexpr bool operator==(
+         basic_string_view  <CharT, Traits> lhs,
+    nssv_BASIC_STRING_VIEW_I(CharT, Traits) rhs ) nssv_noexcept
+{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; }
+
+template< class CharT, class Traits  nssv_MSVC_ORDER(2) >
+nssv_constexpr bool operator==(
+    nssv_BASIC_STRING_VIEW_I(CharT, Traits) lhs,
+         basic_string_view  <CharT, Traits> rhs ) nssv_noexcept
+{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; }
+
+// !=
+
+template< class CharT, class Traits  nssv_MSVC_ORDER(1) >
+nssv_constexpr bool operator!= (
+         basic_string_view  < CharT, Traits > lhs,
+    nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept
+{ return !( lhs == rhs ); }
+
+template< class CharT, class Traits  nssv_MSVC_ORDER(2) >
+nssv_constexpr bool operator!= (
+    nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs,
+         basic_string_view  < CharT, Traits > rhs ) nssv_noexcept
+{ return !( lhs == rhs ); }
+
+// <
+
+template< class CharT, class Traits  nssv_MSVC_ORDER(1) >
+nssv_constexpr bool operator< (
+         basic_string_view  < CharT, Traits > lhs,
+    nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) < 0; }
+
+template< class CharT, class Traits  nssv_MSVC_ORDER(2) >
+nssv_constexpr bool operator< (
+    nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs,
+         basic_string_view  < CharT, Traits > rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) < 0; }
+
+// <=
+
+template< class CharT, class Traits  nssv_MSVC_ORDER(1) >
+nssv_constexpr bool operator<= (
+         basic_string_view  < CharT, Traits > lhs,
+    nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) <= 0; }
+
+template< class CharT, class Traits  nssv_MSVC_ORDER(2) >
+nssv_constexpr bool operator<= (
+    nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs,
+         basic_string_view  < CharT, Traits > rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) <= 0; }
+
+// >
+
+template< class CharT, class Traits  nssv_MSVC_ORDER(1) >
+nssv_constexpr bool operator> (
+         basic_string_view  < CharT, Traits > lhs,
+    nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) > 0; }
+
+template< class CharT, class Traits  nssv_MSVC_ORDER(2) >
+nssv_constexpr bool operator> (
+    nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs,
+         basic_string_view  < CharT, Traits > rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) > 0; }
+
+// >=
+
+template< class CharT, class Traits  nssv_MSVC_ORDER(1) >
+nssv_constexpr bool operator>= (
+         basic_string_view  < CharT, Traits > lhs,
+    nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) >= 0; }
+
+template< class CharT, class Traits  nssv_MSVC_ORDER(2) >
+nssv_constexpr bool operator>= (
+    nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs,
+         basic_string_view  < CharT, Traits > rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) >= 0; }
+
+#undef nssv_MSVC_ORDER
+#undef nssv_BASIC_STRING_VIEW_I
+
+#endif // compiler-dependent approach to comparisons
+
+// 24.4.4 Inserters and extractors:
+
+#if ! nssv_CONFIG_NO_STREAM_INSERTION
+
+namespace detail {
+
+template< class Stream >
+void write_padding( Stream & os, std::streamsize n )
+{
+    for ( std::streamsize i = 0; i < n; ++i )
+        os.rdbuf()->sputc( os.fill() );
+}
+
+template< class Stream, class View >
+Stream & write_to_stream( Stream & os, View const & sv )
+{
+    typename Stream::sentry sentry( os );
+
+    if ( !os )
+        return os;
+
+    const std::streamsize length = static_cast<std::streamsize>( sv.length() );
+
+    // Whether, and how, to pad:
+    const bool      pad = ( length < os.width() );
+    const bool left_pad = pad && ( os.flags() & std::ios_base::adjustfield ) == std::ios_base::right;
+
+    if ( left_pad )
+        write_padding( os, os.width() - length );
+
+    // Write span characters:
+    os.rdbuf()->sputn( sv.begin(), length );
+
+    if ( pad && !left_pad )
+        write_padding( os, os.width() - length );
+
+    // Reset output stream width:
+    os.width( 0 );
+
+    return os;
+}
+
+} // namespace detail
+
+template< class CharT, class Traits >
+std::basic_ostream<CharT, Traits> &
+operator<<(
+    std::basic_ostream<CharT, Traits>& os,
+    basic_string_view <CharT, Traits> sv )
+{
+    return detail::write_to_stream( os, sv );
+}
+
+#endif // nssv_CONFIG_NO_STREAM_INSERTION
+
+// Several typedefs for common character types are provided:
+
+typedef basic_string_view<char>      string_view;
+typedef basic_string_view<wchar_t>   wstring_view;
+#if nssv_HAVE_WCHAR16_T
+typedef basic_string_view<char16_t>  u16string_view;
+typedef basic_string_view<char32_t>  u32string_view;
+#endif
+
+}} // namespace nonstd::sv_lite
+
+//
+// 24.4.6 Suffix for basic_string_view literals:
+//
+
+#if nssv_HAVE_USER_DEFINED_LITERALS
+
+namespace nonstd {
+nssv_inline_ns namespace literals {
+nssv_inline_ns namespace string_view_literals {
+
+#if nssv_CONFIG_STD_SV_OPERATOR && nssv_HAVE_STD_DEFINED_LITERALS
+
+nssv_constexpr nonstd::sv_lite::string_view operator "" sv( const char* str, size_t len ) nssv_noexcept  // (1)
+{
+    return nonstd::sv_lite::string_view{ str, len };
+}
+
+nssv_constexpr nonstd::sv_lite::u16string_view operator "" sv( const char16_t* str, size_t len ) nssv_noexcept  // (2)
+{
+    return nonstd::sv_lite::u16string_view{ str, len };
+}
+
+nssv_constexpr nonstd::sv_lite::u32string_view operator "" sv( const char32_t* str, size_t len ) nssv_noexcept  // (3)
+{
+    return nonstd::sv_lite::u32string_view{ str, len };
+}
+
+nssv_constexpr nonstd::sv_lite::wstring_view operator "" sv( const wchar_t* str, size_t len ) nssv_noexcept  // (4)
+{
+    return nonstd::sv_lite::wstring_view{ str, len };
+}
+
+#endif // nssv_CONFIG_STD_SV_OPERATOR && nssv_HAVE_STD_DEFINED_LITERALS
+
+#if nssv_CONFIG_USR_SV_OPERATOR
+
+nssv_constexpr nonstd::sv_lite::string_view operator "" _sv( const char* str, size_t len ) nssv_noexcept  // (1)
+{
+    return nonstd::sv_lite::string_view{ str, len };
+}
+
+nssv_constexpr nonstd::sv_lite::u16string_view operator "" _sv( const char16_t* str, size_t len ) nssv_noexcept  // (2)
+{
+    return nonstd::sv_lite::u16string_view{ str, len };
+}
+
+nssv_constexpr nonstd::sv_lite::u32string_view operator "" _sv( const char32_t* str, size_t len ) nssv_noexcept  // (3)
+{
+    return nonstd::sv_lite::u32string_view{ str, len };
+}
+
+nssv_constexpr nonstd::sv_lite::wstring_view operator "" _sv( const wchar_t* str, size_t len ) nssv_noexcept  // (4)
+{
+    return nonstd::sv_lite::wstring_view{ str, len };
+}
+
+#endif // nssv_CONFIG_USR_SV_OPERATOR
+
+}}} // namespace nonstd::literals::string_view_literals
+
+#endif
+
+//
+// Extensions for std::string:
+//
+
+#if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS
+
+namespace nonstd {
+namespace sv_lite {
+
+// Exclude MSVC 14 (19.00): it yields ambiguous to_string():
+
+#if nssv_CPP11_OR_GREATER && nssv_COMPILER_MSVC_VERSION != 140
+
+template< class CharT, class Traits, class Allocator = std::allocator<CharT> >
+std::basic_string<CharT, Traits, Allocator>
+to_string( basic_string_view<CharT, Traits> v, Allocator const & a = Allocator() )
+{
+    return std::basic_string<CharT,Traits, Allocator>( v.begin(), v.end(), a );
+}
+
+#else
+
+template< class CharT, class Traits >
+std::basic_string<CharT, Traits>
+to_string( basic_string_view<CharT, Traits> v )
+{
+    return std::basic_string<CharT, Traits>( v.begin(), v.end() );
+}
+
+template< class CharT, class Traits, class Allocator >
+std::basic_string<CharT, Traits, Allocator>
+to_string( basic_string_view<CharT, Traits> v, Allocator const & a )
+{
+    return std::basic_string<CharT, Traits, Allocator>( v.begin(), v.end(), a );
+}
+
+#endif // nssv_CPP11_OR_GREATER
+
+template< class CharT, class Traits, class Allocator >
+basic_string_view<CharT, Traits>
+to_string_view( std::basic_string<CharT, Traits, Allocator> const & s )
+{
+    return basic_string_view<CharT, Traits>( s.data(), s.size() );
+}
+
+}} // namespace nonstd::sv_lite
+
+#endif // nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS
+
+//
+// make types and algorithms available in namespace nonstd:
+//
+
+namespace nonstd {
+
+using sv_lite::basic_string_view;
+using sv_lite::string_view;
+using sv_lite::wstring_view;
+
+#if nssv_HAVE_WCHAR16_T
+using sv_lite::u16string_view;
+#endif
+#if nssv_HAVE_WCHAR32_T
+using sv_lite::u32string_view;
+#endif
+
+// literal "sv"
+
+using sv_lite::operator==;
+using sv_lite::operator!=;
+using sv_lite::operator<;
+using sv_lite::operator<=;
+using sv_lite::operator>;
+using sv_lite::operator>=;
+
+#if ! nssv_CONFIG_NO_STREAM_INSERTION
+using sv_lite::operator<<;
+#endif
+
+#if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS
+using sv_lite::to_string;
+using sv_lite::to_string_view;
+#endif
+
+} // namespace nonstd
+
+// 24.4.5 Hash support (C++11):
+
+// Note: The hash value of a string view object is equal to the hash value of
+// the corresponding string object.
+
+#if nssv_HAVE_STD_HASH
+
+#include <functional>
+
+namespace std {
+
+template<>
+struct hash< nonstd::string_view >
+{
+public:
+    std::size_t operator()( nonstd::string_view v ) const nssv_noexcept
+    {
+        return std::hash<std::string>()( std::string( v.data(), v.size() ) );
+    }
+};
+
+template<>
+struct hash< nonstd::wstring_view >
+{
+public:
+    std::size_t operator()( nonstd::wstring_view v ) const nssv_noexcept
+    {
+        return std::hash<std::wstring>()( std::wstring( v.data(), v.size() ) );
+    }
+};
+
+template<>
+struct hash< nonstd::u16string_view >
+{
+public:
+    std::size_t operator()( nonstd::u16string_view v ) const nssv_noexcept
+    {
+        return std::hash<std::u16string>()( std::u16string( v.data(), v.size() ) );
+    }
+};
+
+template<>
+struct hash< nonstd::u32string_view >
+{
+public:
+    std::size_t operator()( nonstd::u32string_view v ) const nssv_noexcept
+    {
+        return std::hash<std::u32string>()( std::u32string( v.data(), v.size() ) );
+    }
+};
+
+} // namespace std
+
+#endif // nssv_HAVE_STD_HASH
+
+nssv_RESTORE_WARNINGS()
+
+#endif // nssv_HAVE_STD_STRING_VIEW
+#endif // NONSTD_SV_LITE_H_INCLUDED
+/* end file include/simdjson/nonstd/string_view.hpp */
+SIMDJSON_POP_DISABLE_WARNINGS
+
+namespace std {
+  using string_view = nonstd::string_view;
+}
+#endif // SIMDJSON_HAS_STRING_VIEW
+#undef SIMDJSON_HAS_STRING_VIEW // We are not going to need this macro anymore.
+
+/// If EXPR is an error, returns it.
+#define SIMDJSON_TRY(EXPR) { auto _err = (EXPR); if (_err) { return _err; } }
+
+// Unless the programmer has already set SIMDJSON_DEVELOPMENT_CHECKS,
+// we want to set it under debug builds. We detect a debug build
+// under Visual Studio when the _DEBUG macro is set. Under the other
+// compilers, we use the fact that they define __OPTIMIZE__ whenever
+// they allow optimizations.
+// It is possible that this could miss some cases where SIMDJSON_DEVELOPMENT_CHECKS
+// is helpful, but the programmer can set the macro SIMDJSON_DEVELOPMENT_CHECKS.
+// It could also wrongly set SIMDJSON_DEVELOPMENT_CHECKS (e.g., if the programmer
+// sets _DEBUG in a release build under Visual Studio, or if some compiler fails to
+// set the __OPTIMIZE__ macro).
+#ifndef SIMDJSON_DEVELOPMENT_CHECKS
+#ifdef _MSC_VER
+// Visual Studio seems to set _DEBUG for debug builds.
+#ifdef _DEBUG
+#define SIMDJSON_DEVELOPMENT_CHECKS 1
+#endif // _DEBUG
+#else // _MSC_VER
+// All other compilers appear to set __OPTIMIZE__ to a positive integer
+// when the compiler is optimizing.
+#ifndef __OPTIMIZE__
+#define SIMDJSON_DEVELOPMENT_CHECKS 1
+#endif // __OPTIMIZE__
+#endif // _MSC_VER
+#endif // SIMDJSON_DEVELOPMENT_CHECKS
+
+// The SIMDJSON_CHECK_EOF macro is a feature flag for the "don't require padding"
+// feature.
+
+#if SIMDJSON_CPLUSPLUS17
+// if we have C++, then fallthrough is a default attribute
+# define simdjson_fallthrough [[fallthrough]]
+// check if we have __attribute__ support
+#elif defined(__has_attribute)
+// check if we have the __fallthrough__ attribute
+#if __has_attribute(__fallthrough__)
+// we are good to go:
+# define simdjson_fallthrough                    __attribute__((__fallthrough__))
+#endif // __has_attribute(__fallthrough__)
+#endif // SIMDJSON_CPLUSPLUS17
+// on some systems, we simply do not have support for fallthrough, so use a default:
+#ifndef simdjson_fallthrough
+# define simdjson_fallthrough do {} while (0)  /* fallthrough */
+#endif // simdjson_fallthrough
+
+#endif // SIMDJSON_COMMON_DEFS_H
+/* end file include/simdjson/common_defs.h */
+
+SIMDJSON_PUSH_DISABLE_WARNINGS
+SIMDJSON_DISABLE_UNDESIRED_WARNINGS
+
+// Public API
+/* begin file include/simdjson/error.h */
+#ifndef SIMDJSON_ERROR_H
+#define SIMDJSON_ERROR_H
+
+#include <string>
+
+namespace simdjson {
+
+/**
+ * All possible errors returned by simdjson. These error codes are subject to change
+ * and not all simdjson kernel returns the same error code given the same input: it is not
+ * well defined which error a given input should produce.
+ *
+ * Only SUCCESS evaluates to false as a Boolean. All other error codes will evaluate
+ * to true as a Boolean.
+ */
+enum error_code {
+  SUCCESS = 0,                ///< No error
+  CAPACITY,                   ///< This parser can't support a document that big
+  MEMALLOC,                   ///< Error allocating memory, most likely out of memory
+  TAPE_ERROR,                 ///< Something went wrong, this is a generic error
+  DEPTH_ERROR,                ///< Your document exceeds the user-specified depth limitation
+  STRING_ERROR,               ///< Problem while parsing a string
+  T_ATOM_ERROR,               ///< Problem while parsing an atom starting with the letter 't'
+  F_ATOM_ERROR,               ///< Problem while parsing an atom starting with the letter 'f'
+  N_ATOM_ERROR,               ///< Problem while parsing an atom starting with the letter 'n'
+  NUMBER_ERROR,               ///< Problem while parsing a number
+  UTF8_ERROR,                 ///< the input is not valid UTF-8
+  UNINITIALIZED,              ///< unknown error, or uninitialized document
+  EMPTY,                      ///< no structural element found
+  UNESCAPED_CHARS,            ///< found unescaped characters in a string.
+  UNCLOSED_STRING,            ///< missing quote at the end
+  UNSUPPORTED_ARCHITECTURE,   ///< unsupported architecture
+  INCORRECT_TYPE,             ///< JSON element has a different type than user expected
+  NUMBER_OUT_OF_RANGE,        ///< JSON number does not fit in 64 bits
+  INDEX_OUT_OF_BOUNDS,        ///< JSON array index too large
+  NO_SUCH_FIELD,              ///< JSON field not found in object
+  IO_ERROR,                   ///< Error reading a file
+  INVALID_JSON_POINTER,       ///< Invalid JSON pointer reference
+  INVALID_URI_FRAGMENT,       ///< Invalid URI fragment
+  UNEXPECTED_ERROR,           ///< indicative of a bug in simdjson
+  PARSER_IN_USE,              ///< parser is already in use.
+  OUT_OF_ORDER_ITERATION,     ///< tried to iterate an array or object out of order
+  INSUFFICIENT_PADDING,       ///< The JSON doesn't have enough padding for simdjson to safely parse it.
+  INCOMPLETE_ARRAY_OR_OBJECT, ///< The document ends early.
+  SCALAR_DOCUMENT_AS_VALUE,   ///< A scalar document is treated as a value.
+  OUT_OF_BOUNDS,              ///< Attempted to access location outside of document.
+  TRAILING_CONTENT,           ///< Unexpected trailing content in the JSON input
+  NUM_ERROR_CODES
+};
+
+/**
+ * Get the error message for the given error code.
+ *
+ *   dom::parser parser;
+ *   dom::element doc;
+ *   auto error = parser.parse("foo",3).get(doc);
+ *   if (error) { printf("Error: %s\n", error_message(error)); }
+ *
+ * @return The error message.
+ */
+inline const char *error_message(error_code error) noexcept;
+
+/**
+ * Write the error message to the output stream
+ */
+inline std::ostream& operator<<(std::ostream& out, error_code error) noexcept;
+
+/**
+ * Exception thrown when an exception-supporting simdjson method is called
+ */
+struct simdjson_error : public std::exception {
+  /**
+   * Create an exception from a simdjson error code.
+   * @param error The error code
+   */
+  simdjson_error(error_code error) noexcept : _error{error} { }
+  /** The error message */
+  const char *what() const noexcept { return error_message(error()); }
+  /** The error code */
+  error_code error() const noexcept { return _error; }
+private:
+  /** The error code that was used */
+  error_code _error;
+};
+
+namespace internal {
+
+/**
+ * The result of a simdjson operation that could fail.
+ *
+ * Gives the option of reading error codes, or throwing an exception by casting to the desired result.
+ *
+ * This is a base class for implementations that want to add functions to the result type for
+ * chaining.
+ *
+ * Override like:
+ *
+ *   struct simdjson_result<T> : public internal::simdjson_result_base<T> {
+ *     simdjson_result() noexcept : internal::simdjson_result_base<T>() {}
+ *     simdjson_result(error_code error) noexcept : internal::simdjson_result_base<T>(error) {}
+ *     simdjson_result(T &&value) noexcept : internal::simdjson_result_base<T>(std::forward(value)) {}
+ *     simdjson_result(T &&value, error_code error) noexcept : internal::simdjson_result_base<T>(value, error) {}
+ *     // Your extra methods here
+ *   }
+ *
+ * Then any method returning simdjson_result<T> will be chainable with your methods.
+ */
+template<typename T>
+struct simdjson_result_base : protected std::pair<T, error_code> {
+
+  /**
+   * Create a new empty result with error = UNINITIALIZED.
+   */
+  simdjson_inline simdjson_result_base() noexcept;
+
+  /**
+   * Create a new error result.
+   */
+  simdjson_inline simdjson_result_base(error_code error) noexcept;
+
+  /**
+   * Create a new successful result.
+   */
+  simdjson_inline simdjson_result_base(T &&value) noexcept;
+
+  /**
+   * Create a new result with both things (use if you don't want to branch when creating the result).
+   */
+  simdjson_inline simdjson_result_base(T &&value, error_code error) noexcept;
+
+  /**
+   * Move the value and the error to the provided variables.
+   *
+   * @param value The variable to assign the value to. May not be set if there is an error.
+   * @param error The variable to assign the error to. Set to SUCCESS if there is no error.
+   */
+  simdjson_inline void tie(T &value, error_code &error) && noexcept;
+
+  /**
+   * Move the value to the provided variable.
+   *
+   * @param value The variable to assign the value to. May not be set if there is an error.
+   */
+  simdjson_inline error_code get(T &value) && noexcept;
+
+  /**
+   * The error.
+   */
+  simdjson_inline error_code error() const noexcept;
+
+#if SIMDJSON_EXCEPTIONS
+
+  /**
+   * Get the result value.
+   *
+   * @throw simdjson_error if there was an error.
+   */
+  simdjson_inline T& value() & noexcept(false);
+
+  /**
+   * Take the result value (move it).
+   *
+   * @throw simdjson_error if there was an error.
+   */
+  simdjson_inline T&& value() && noexcept(false);
+
+  /**
+   * Take the result value (move it).
+   *
+   * @throw simdjson_error if there was an error.
+   */
+  simdjson_inline T&& take_value() && noexcept(false);
+
+  /**
+   * Cast to the value (will throw on error).
+   *
+   * @throw simdjson_error if there was an error.
+   */
+  simdjson_inline operator T&&() && noexcept(false);
+#endif // SIMDJSON_EXCEPTIONS
+
+  /**
+   * Get the result value. This function is safe if and only
+   * the error() method returns a value that evaluates to false.
+   */
+  simdjson_inline const T& value_unsafe() const& noexcept;
+
+  /**
+   * Take the result value (move it). This function is safe if and only
+   * the error() method returns a value that evaluates to false.
+   */
+  simdjson_inline T&& value_unsafe() && noexcept;
+
+}; // struct simdjson_result_base
+
+} // namespace internal
+
+/**
+ * The result of a simdjson operation that could fail.
+ *
+ * Gives the option of reading error codes, or throwing an exception by casting to the desired result.
+ */
+template<typename T>
+struct simdjson_result : public internal::simdjson_result_base<T> {
+  /**
+   * @private Create a new empty result with error = UNINITIALIZED.
+   */
+  simdjson_inline simdjson_result() noexcept;
+  /**
+   * @private Create a new error result.
+   */
+  simdjson_inline simdjson_result(T &&value) noexcept;
+  /**
+   * @private Create a new successful result.
+   */
+  simdjson_inline simdjson_result(error_code error_code) noexcept;
+  /**
+   * @private Create a new result with both things (use if you don't want to branch when creating the result).
+   */
+  simdjson_inline simdjson_result(T &&value, error_code error) noexcept;
+
+  /**
+   * Move the value and the error to the provided variables.
+   *
+   * @param value The variable to assign the value to. May not be set if there is an error.
+   * @param error The variable to assign the error to. Set to SUCCESS if there is no error.
+   */
+  simdjson_inline void tie(T &value, error_code &error) && noexcept;
+
+  /**
+   * Move the value to the provided variable.
+   *
+   * @param value The variable to assign the value to. May not be set if there is an error.
+   */
+  simdjson_warn_unused simdjson_inline error_code get(T &value) && noexcept;
+
+  /**
+   * The error.
+   */
+  simdjson_inline error_code error() const noexcept;
+
+#if SIMDJSON_EXCEPTIONS
+
+  /**
+   * Get the result value.
+   *
+   * @throw simdjson_error if there was an error.
+   */
+  simdjson_inline T& value() & noexcept(false);
+
+  /**
+   * Take the result value (move it).
+   *
+   * @throw simdjson_error if there was an error.
+   */
+  simdjson_inline T&& value() && noexcept(false);
+
+  /**
+   * Take the result value (move it).
+   *
+   * @throw simdjson_error if there was an error.
+   */
+  simdjson_inline T&& take_value() && noexcept(false);
+
+  /**
+   * Cast to the value (will throw on error).
+   *
+   * @throw simdjson_error if there was an error.
+   */
+  simdjson_inline operator T&&() && noexcept(false);
+#endif // SIMDJSON_EXCEPTIONS
+
+  /**
+   * Get the result value. This function is safe if and only
+   * the error() method returns a value that evaluates to false.
+   */
+  simdjson_inline const T& value_unsafe() const& noexcept;
+
+  /**
+   * Take the result value (move it). This function is safe if and only
+   * the error() method returns a value that evaluates to false.
+   */
+  simdjson_inline T&& value_unsafe() && noexcept;
+
+}; // struct simdjson_result
+
+#if SIMDJSON_EXCEPTIONS
+
+template<typename T>
+inline std::ostream& operator<<(std::ostream& out, simdjson_result<T> value) { return out << value.value(); }
+#endif // SIMDJSON_EXCEPTIONS
+
+#ifndef SIMDJSON_DISABLE_DEPRECATED_API
+/**
+ * @deprecated This is an alias and will be removed, use error_code instead
+ */
+using ErrorValues [[deprecated("This is an alias and will be removed, use error_code instead")]] = error_code;
+
+/**
+ * @deprecated Error codes should be stored and returned as `error_code`, use `error_message()` instead.
+ */
+[[deprecated("Error codes should be stored and returned as `error_code`, use `error_message()` instead.")]]
+inline const std::string error_message(int error) noexcept;
+#endif // SIMDJSON_DISABLE_DEPRECATED_API
+} // namespace simdjson
+
+#endif // SIMDJSON_ERROR_H
+/* end file include/simdjson/error.h */
+/* begin file include/simdjson/minify.h */
+#ifndef SIMDJSON_MINIFY_H
+#define SIMDJSON_MINIFY_H
+
+/* begin file include/simdjson/padded_string.h */
+#ifndef SIMDJSON_PADDED_STRING_H
+#define SIMDJSON_PADDED_STRING_H
+
+#include <cstring>
+#include <memory>
+#include <string>
+#include <ostream>
+
+namespace simdjson {
+
+class padded_string_view;
+
+/**
+ * String with extra allocation for ease of use with parser::parse()
+ *
+ * This is a move-only class, it cannot be copied.
+ */
+struct padded_string final {
+
+  /**
+   * Create a new, empty padded string.
+   */
+  explicit inline padded_string() noexcept;
+  /**
+   * Create a new padded string buffer.
+   *
+   * @param length the size of the string.
+   */
+  explicit inline padded_string(size_t length) noexcept;
+  /**
+   * Create a new padded string by copying the given input.
+   *
+   * @param data the buffer to copy
+   * @param length the number of bytes to copy
+   */
+  explicit inline padded_string(const char *data, size_t length) noexcept;
+  /**
+   * Create a new padded string by copying the given input.
+   *
+   * @param str_ the string to copy
+   */
+  inline padded_string(const std::string & str_ ) noexcept;
+  /**
+   * Create a new padded string by copying the given input.
+   *
+   * @param sv_ the string to copy
+   */
+  inline padded_string(std::string_view sv_) noexcept;
+  /**
+   * Move one padded string into another.
+   *
+   * The original padded string will be reduced to zero capacity.
+   *
+   * @param o the string to move.
+   */
+  inline padded_string(padded_string &&o) noexcept;
+  /**
+   * Move one padded string into another.
+   *
+   * The original padded string will be reduced to zero capacity.
+   *
+   * @param o the string to move.
+   */
+  inline padded_string &operator=(padded_string &&o) noexcept;
+  inline void swap(padded_string &o) noexcept;
+  ~padded_string() noexcept;
+
+  /**
+   * The length of the string.
+   *
+   * Does not include padding.
+   */
+  size_t size() const noexcept;
+
+  /**
+   * The length of the string.
+   *
+   * Does not include padding.
+   */
+  size_t length() const noexcept;
+
+  /**
+   * The string data.
+   **/
+  const char *data() const noexcept;
+  const uint8_t *u8data() const noexcept { return static_cast<const uint8_t*>(static_cast<const void*>(data_ptr));}
+
+  /**
+   * The string data.
+   **/
+  char *data() noexcept;
+
+  /**
+   * Create a std::string_view with the same content.
+   */
+  operator std::string_view() const;
+
+  /**
+   * Create a padded_string_view with the same content.
+   */
+  operator padded_string_view() const noexcept;
+
+  /**
+   * Load this padded string from a file.
+   *
+   * @return IO_ERROR on error. Be mindful that on some 32-bit systems,
+   * the file size might be limited to 2 GB.
+   *
+   * @param path the path to the file.
+   **/
+  inline static simdjson_result<padded_string> load(std::string_view path) noexcept;
+
+private:
+  padded_string &operator=(const padded_string &o) = delete;
+  padded_string(const padded_string &o) = delete;
+
+  size_t viable_size{0};
+  char *data_ptr{nullptr};
+
+}; // padded_string
+
+/**
+ * Send padded_string instance to an output stream.
+ *
+ * @param out The output stream.
+ * @param s The padded_string instance.
+ * @throw if there is an error with the underlying output stream. simdjson itself will not throw.
+ */
+inline std::ostream& operator<<(std::ostream& out, const padded_string& s) { return out << s.data(); }
+
+#if SIMDJSON_EXCEPTIONS
+/**
+ * Send padded_string instance to an output stream.
+ *
+ * @param out The output stream.
+ * @param s The padded_string instance.
+  * @throw simdjson_error if the result being printed has an error. If there is an error with the
+ *        underlying output stream, that error will be propagated (simdjson_error will not be
+ *        thrown).
+ */
+inline std::ostream& operator<<(std::ostream& out, simdjson_result<padded_string> &s) noexcept(false) { return out << s.value(); }
+#endif
+
+} // namespace simdjson
+
+// This is deliberately outside of simdjson so that people get it without having to use the namespace
+inline simdjson::padded_string operator "" _padded(const char *str, size_t len) {
+  return simdjson::padded_string(str, len);
+}
+
+namespace simdjson {
+namespace internal {
+
+// The allocate_padded_buffer function is a low-level function to allocate memory
+// with padding so we can read past the "length" bytes safely. It is used by
+// the padded_string class automatically. It returns nullptr in case
+// of error: the caller should check for a null pointer.
+// The length parameter is the maximum size in bytes of the string.
+// The caller is responsible to free the memory (e.g., delete[] (...)).
+inline char *allocate_padded_buffer(size_t length) noexcept;
+
+} // namespace internal
+} // namespace simdjson
+
+#endif // SIMDJSON_PADDED_STRING_H
+/* end file include/simdjson/padded_string.h */
+#include <string>
+#include <ostream>
+#include <sstream>
+
+namespace simdjson {
+
+
+
+/**
+ *
+ * Minify the input string assuming that it represents a JSON string, does not parse or validate.
+ * This function is much faster than parsing a JSON string and then writing a minified version of it.
+ * However, it does not validate the input. It will merely return an error in simple cases (e.g., if
+ * there is a string that was never terminated).
+ *
+ *
+ * @param buf the json document to minify.
+ * @param len the length of the json document.
+ * @param dst the buffer to write the minified document to. *MUST* be allocated up to len bytes.
+ * @param dst_len the number of bytes written. Output only.
+ * @return the error code, or SUCCESS if there was no error.
+ */
+simdjson_warn_unused error_code minify(const char *buf, size_t len, char *dst, size_t &dst_len) noexcept;
+
+} // namespace simdjson
+
+#endif // SIMDJSON_MINIFY_H
+/* end file include/simdjson/minify.h */
+/* begin file include/simdjson/padded_string_view.h */
+#ifndef SIMDJSON_PADDED_STRING_VIEW_H
+#define SIMDJSON_PADDED_STRING_VIEW_H
+
+
+#include <cstring>
+#include <memory>
+#include <string>
+#include <ostream>
+
+namespace simdjson {
+
+/**
+ * User-provided string that promises it has extra padded bytes at the end for use with parser::parse().
+ */
+class padded_string_view : public std::string_view {
+private:
+  size_t _capacity;
+
+public:
+  /** Create an empty padded_string_view. */
+  inline padded_string_view() noexcept = default;
+
+  /**
+   * Promise the given buffer has at least SIMDJSON_PADDING extra bytes allocated to it.
+   *
+   * @param s The string.
+   * @param len The length of the string (not including padding).
+   * @param capacity The allocated length of the string, including padding.
+   */
+  explicit inline padded_string_view(const char* s, size_t len, size_t capacity) noexcept;
+  /** overload explicit inline padded_string_view(const char* s, size_t len) noexcept */
+  explicit inline padded_string_view(const uint8_t* s, size_t len, size_t capacity) noexcept;
+
+  /**
+   * Promise the given string has at least SIMDJSON_PADDING extra bytes allocated to it.
+   *
+   * The capacity of the string will be used to determine its padding.
+   *
+   * @param s The string.
+   */
+  explicit inline padded_string_view(const std::string &s) noexcept;
+
+  /**
+   * Promise the given string_view has at least SIMDJSON_PADDING extra bytes allocated to it.
+   *
+   * @param s The string.
+   * @param capacity The allocated length of the string, including padding.
+   */
+  explicit inline padded_string_view(std::string_view s, size_t capacity) noexcept;
+
+  /** The number of allocated bytes. */
+  inline size_t capacity() const noexcept;
+
+  /** The amount of padding on the string (capacity() - length()) */
+  inline size_t padding() const noexcept;
+
+}; // padded_string_view
+
+#if SIMDJSON_EXCEPTIONS
+/**
+ * Send padded_string instance to an output stream.
+ *
+ * @param out The output stream.
+ * @param s The padded_string_view.
+ * @throw simdjson_error if the result being printed has an error. If there is an error with the
+ *        underlying output stream, that error will be propagated (simdjson_error will not be
+ *        thrown).
+ */
+inline std::ostream& operator<<(std::ostream& out, simdjson_result<padded_string_view> &s) noexcept(false) { return out << s.value(); }
+#endif
+
+} // namespace simdjson
+
+#endif // SIMDJSON_PADDED_STRING_VIEW_H
+/* end file include/simdjson/padded_string_view.h */
+/* begin file include/simdjson/implementation.h */
+#ifndef SIMDJSON_IMPLEMENTATION_H
+#define SIMDJSON_IMPLEMENTATION_H
+
+/* begin file include/simdjson/internal/dom_parser_implementation.h */
+#ifndef SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H
+#define SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H
+
+#include <memory>
+
+namespace simdjson {
+
+namespace dom {
+class document;
+} // namespace dom
+
+/**
+* This enum is used with the dom_parser_implementation::stage1 function.
+* 1) The regular mode expects a fully formed JSON document.
+* 2) The streaming_partial mode expects a possibly truncated
+* input within a stream on JSON documents.
+* 3) The stream_final mode allows us to truncate final
+* unterminated strings. It is useful in conjunction with streaming_partial.
+*/
+enum class stage1_mode { regular, streaming_partial, streaming_final};
+
+/**
+ * Returns true if mode == streaming_partial or mode == streaming_final
+ */
+inline bool is_streaming(stage1_mode mode) {
+  // performance note: it is probably faster to check that mode is different
+  // from regular than checking that it is either streaming_partial or streaming_final.
+  return (mode != stage1_mode::regular);
+  // return (mode == stage1_mode::streaming_partial || mode == stage1_mode::streaming_final);
+}
+
+
+namespace internal {
+
+
+/**
+ * An implementation of simdjson's DOM parser for a particular CPU architecture.
+ *
+ * This class is expected to be accessed only by pointer, and never move in memory (though the
+ * pointer can move).
+ */
+class dom_parser_implementation {
+public:
+
+  /**
+   * @private For internal implementation use
+   *
+   * Run a full JSON parse on a single document (stage1 + stage2).
+   *
+   * Guaranteed only to be called when capacity > document length.
+   *
+   * Overridden by each implementation.
+   *
+   * @param buf The json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes.
+   * @param len The length of the json document.
+   * @return The error code, or SUCCESS if there was no error.
+   */
+  simdjson_warn_unused virtual error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept = 0;
+
+  /**
+   * @private For internal implementation use
+   *
+   * Stage 1 of the document parser.
+   *
+   * Guaranteed only to be called when capacity > document length.
+   *
+   * Overridden by each implementation.
+   *
+   * @param buf The json document to parse.
+   * @param len The length of the json document.
+   * @param streaming Whether this is being called by parser::parse_many.
+   * @return The error code, or SUCCESS if there was no error.
+   */
+  simdjson_warn_unused virtual error_code stage1(const uint8_t *buf, size_t len, stage1_mode streaming) noexcept = 0;
+
+  /**
+   * @private For internal implementation use
+   *
+   * Stage 2 of the document parser.
+   *
+   * Called after stage1().
+   *
+   * Overridden by each implementation.
+   *
+   * @param doc The document to output to.
+   * @return The error code, or SUCCESS if there was no error.
+   */
+  simdjson_warn_unused virtual error_code stage2(dom::document &doc) noexcept = 0;
+
+  /**
+   * @private For internal implementation use
+   *
+   * Stage 2 of the document parser for parser::parse_many.
+   *
+   * Guaranteed only to be called after stage1().
+   * Overridden by each implementation.
+   *
+   * @param doc The document to output to.
+   * @return The error code, SUCCESS if there was no error, or EMPTY if all documents have been parsed.
+   */
+  simdjson_warn_unused virtual error_code stage2_next(dom::document &doc) noexcept = 0;
+
+  /**
+   * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There
+   * must be an unescaped quote terminating the string. It returns the final output
+   * position as pointer. In case of error (e.g., the string has bad escaped codes),
+   * then null_nullptrptr is returned. It is assumed that the output buffer is large
+   * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes +
+   * SIMDJSON_PADDING bytes.
+   *
+   * Overridden by each implementation.
+   *
+   * @param str pointer to the beginning of a valid UTF-8 JSON string, must end with an unescaped quote.
+   * @param dst pointer to a destination buffer, it must point a region in memory of sufficient size.
+   * @return end of the of the written region (exclusive) or nullptr in case of error.
+   */
+  simdjson_warn_unused virtual uint8_t *parse_string(const uint8_t *src, uint8_t *dst) const noexcept = 0;
+
+  /**
+   * Change the capacity of this parser.
+   *
+   * The capacity can never exceed SIMDJSON_MAXSIZE_BYTES (e.g., 4 GB)
+   * and an CAPACITY error is returned if it is attempted.
+   *
+   * Generally used for reallocation.
+   *
+   * @param capacity The new capacity.
+   * @param max_depth The new max_depth.
+   * @return The error code, or SUCCESS if there was no error.
+   */
+  virtual error_code set_capacity(size_t capacity) noexcept = 0;
+
+  /**
+   * Change the max depth of this parser.
+   *
+   * Generally used for reallocation.
+   *
+   * @param capacity The new capacity.
+   * @param max_depth The new max_depth.
+   * @return The error code, or SUCCESS if there was no error.
+   */
+  virtual error_code set_max_depth(size_t max_depth) noexcept = 0;
+
+  /**
+   * Deallocate this parser.
+   */
+  virtual ~dom_parser_implementation() = default;
+
+  /** Number of structural indices passed from stage 1 to stage 2 */
+  uint32_t n_structural_indexes{0};
+  /** Structural indices passed from stage 1 to stage 2 */
+  std::unique_ptr<uint32_t[]> structural_indexes{};
+  /** Next structural index to parse */
+  uint32_t next_structural_index{0};
+
+  /**
+   * The largest document this parser can support without reallocating.
+   *
+   * @return Current capacity, in bytes.
+   */
+  simdjson_inline size_t capacity() const noexcept;
+
+  /**
+   * The maximum level of nested object and arrays supported by this parser.
+   *
+   * @return Maximum depth, in bytes.
+   */
+  simdjson_inline size_t max_depth() const noexcept;
+
+  /**
+   * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length
+   * and `max_depth` depth.
+   *
+   * @param capacity The new capacity.
+   * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH.
+   * @return The error, if there is one.
+   */
+  simdjson_warn_unused inline error_code allocate(size_t capacity, size_t max_depth) noexcept;
+
+
+protected:
+  /**
+   * The maximum document length this parser supports.
+   *
+   * Buffers are large enough to handle any document up to this length.
+   */
+  size_t _capacity{0};
+
+  /**
+   * The maximum depth (number of nested objects and arrays) supported by this parser.
+   *
+   * Defaults to DEFAULT_MAX_DEPTH.
+   */
+  size_t _max_depth{0};
+
+  // Declaring these so that subclasses can use them to implement their constructors.
+  simdjson_inline dom_parser_implementation() noexcept;
+  simdjson_inline dom_parser_implementation(dom_parser_implementation &&other) noexcept;
+  simdjson_inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept;
+
+  simdjson_inline dom_parser_implementation(const dom_parser_implementation &) noexcept = delete;
+  simdjson_inline dom_parser_implementation &operator=(const dom_parser_implementation &other) noexcept = delete;
+}; // class dom_parser_implementation
+
+simdjson_inline dom_parser_implementation::dom_parser_implementation() noexcept = default;
+simdjson_inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default;
+simdjson_inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default;
+
+simdjson_inline size_t dom_parser_implementation::capacity() const noexcept {
+  return _capacity;
+}
+
+simdjson_inline size_t dom_parser_implementation::max_depth() const noexcept {
+  return _max_depth;
+}
+
+simdjson_warn_unused
+inline error_code dom_parser_implementation::allocate(size_t capacity, size_t max_depth) noexcept {
+  if (this->max_depth() != max_depth) {
+    error_code err = set_max_depth(max_depth);
+    if (err) { return err; }
+  }
+  if (_capacity != capacity) {
+    error_code err = set_capacity(capacity);
+    if (err) { return err; }
+  }
+  return SUCCESS;
+}
+
+} // namespace internal
+} // namespace simdjson
+
+#endif // SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H
+/* end file include/simdjson/internal/dom_parser_implementation.h */
+/* begin file include/simdjson/internal/isadetection.h */
+/* From
+https://github.com/endorno/pytorch/blob/master/torch/lib/TH/generic/simd/simd.h
+Highly modified.
+
+Copyright (c) 2016-     Facebook, Inc            (Adam Paszke)
+Copyright (c) 2014-     Facebook, Inc            (Soumith Chintala)
+Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)
+Copyright (c) 2012-2014 Deepmind Technologies    (Koray Kavukcuoglu)
+Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)
+Copyright (c) 2011-2013 NYU                      (Clement Farabet)
+Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou,
+Iain Melvin, Jason Weston) Copyright (c) 2006      Idiap Research Institute
+(Samy Bengio) Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert,
+Samy Bengio, Johnny Mariethoz)
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories
+America and IDIAP Research Institute nor the names of its contributors may be
+   used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef SIMDJSON_INTERNAL_ISADETECTION_H
+#define SIMDJSON_INTERNAL_ISADETECTION_H
+
+#include <cstdint>
+#include <cstdlib>
+#if defined(_MSC_VER)
+#include <intrin.h>
+#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
+#include <cpuid.h>
+#endif
+
+namespace simdjson {
+namespace internal {
+
+
+enum instruction_set {
+  DEFAULT = 0x0,
+  NEON = 0x1,
+  AVX2 = 0x4,
+  SSE42 = 0x8,
+  PCLMULQDQ = 0x10,
+  BMI1 = 0x20,
+  BMI2 = 0x40,
+  ALTIVEC = 0x80,
+  AVX512F = 0x100,
+  AVX512DQ = 0x200,
+  AVX512IFMA = 0x400,
+  AVX512PF = 0x800,
+  AVX512ER = 0x1000,
+  AVX512CD = 0x2000,
+  AVX512BW = 0x4000,
+  AVX512VL = 0x8000,
+  AVX512VBMI2 = 0x10000
+};
+
+#if defined(__PPC64__)
+
+static inline uint32_t detect_supported_architectures() {
+  return instruction_set::ALTIVEC;
+}
+
+#elif defined(__arm__) || defined(__aarch64__) // incl. armel, armhf, arm64
+
+#if defined(__ARM_NEON)
+
+static inline uint32_t detect_supported_architectures() {
+  return instruction_set::NEON;
+}
+
+#else // ARM without NEON
+
+static inline uint32_t detect_supported_architectures() {
+  return instruction_set::DEFAULT;
+}
+
+#endif
+
+#elif defined(__x86_64__) || defined(_M_AMD64) // x64
+
+
+namespace {
+// Can be found on Intel ISA Reference for CPUID
+constexpr uint32_t cpuid_avx2_bit = 1 << 5;         ///< @private Bit 5 of EBX for EAX=0x7
+constexpr uint32_t cpuid_bmi1_bit = 1 << 3;         ///< @private bit 3 of EBX for EAX=0x7
+constexpr uint32_t cpuid_bmi2_bit = 1 << 8;         ///< @private bit 8 of EBX for EAX=0x7
+constexpr uint32_t cpuid_avx512f_bit = 1 << 16;     ///< @private bit 16 of EBX for EAX=0x7
+constexpr uint32_t cpuid_avx512dq_bit = 1 << 17;    ///< @private bit 17 of EBX for EAX=0x7
+constexpr uint32_t cpuid_avx512ifma_bit = 1 << 21;  ///< @private bit 21 of EBX for EAX=0x7
+constexpr uint32_t cpuid_avx512pf_bit = 1 << 26;    ///< @private bit 26 of EBX for EAX=0x7
+constexpr uint32_t cpuid_avx512er_bit = 1 << 27;    ///< @private bit 27 of EBX for EAX=0x7
+constexpr uint32_t cpuid_avx512cd_bit = 1 << 28;    ///< @private bit 28 of EBX for EAX=0x7
+constexpr uint32_t cpuid_avx512bw_bit = 1 << 30;    ///< @private bit 30 of EBX for EAX=0x7
+constexpr uint32_t cpuid_avx512vl_bit = 1U << 31;    ///< @private bit 31 of EBX for EAX=0x7
+constexpr uint32_t cpuid_avx512vbmi2_bit = 1 << 6;  ///< @private bit 6 of ECX for EAX=0x7
+constexpr uint32_t cpuid_sse42_bit = 1 << 20;       ///< @private bit 20 of ECX for EAX=0x1
+constexpr uint32_t cpuid_pclmulqdq_bit = 1 << 1;    ///< @private bit  1 of ECX for EAX=0x1
+}
+
+
+
+static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx,
+                         uint32_t *edx) {
+#if defined(_MSC_VER)
+  int cpu_info[4];
+  __cpuid(cpu_info, *eax);
+  *eax = cpu_info[0];
+  *ebx = cpu_info[1];
+  *ecx = cpu_info[2];
+  *edx = cpu_info[3];
+#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
+  uint32_t level = *eax;
+  __get_cpuid(level, eax, ebx, ecx, edx);
+#else
+  uint32_t a = *eax, b, c = *ecx, d;
+  asm volatile("cpuid\n\t" : "+a"(a), "=b"(b), "+c"(c), "=d"(d));
+  *eax = a;
+  *ebx = b;
+  *ecx = c;
+  *edx = d;
+#endif
+}
+
+static inline uint32_t detect_supported_architectures() {
+  uint32_t eax, ebx, ecx, edx;
+  uint32_t host_isa = 0x0;
+
+  // ECX for EAX=0x7
+  eax = 0x7;
+  ecx = 0x0;
+  cpuid(&eax, &ebx, &ecx, &edx);
+  if (ebx & cpuid_avx2_bit) {
+    host_isa |= instruction_set::AVX2;
+  }
+  if (ebx & cpuid_bmi1_bit) {
+    host_isa |= instruction_set::BMI1;
+  }
+
+  if (ebx & cpuid_bmi2_bit) {
+    host_isa |= instruction_set::BMI2;
+  }
+
+  if (ebx & cpuid_avx512f_bit) {
+    host_isa |= instruction_set::AVX512F;
+  }
+
+  if (ebx & cpuid_avx512dq_bit) {
+    host_isa |= instruction_set::AVX512DQ;
+  }
+
+  if (ebx & cpuid_avx512ifma_bit) {
+    host_isa |= instruction_set::AVX512IFMA;
+  }
+
+  if (ebx & cpuid_avx512pf_bit) {
+    host_isa |= instruction_set::AVX512PF;
+  }
+
+  if (ebx & cpuid_avx512er_bit) {
+    host_isa |= instruction_set::AVX512ER;
+  }
+
+  if (ebx & cpuid_avx512cd_bit) {
+    host_isa |= instruction_set::AVX512CD;
+  }
+
+  if (ebx & cpuid_avx512bw_bit) {
+    host_isa |= instruction_set::AVX512BW;
+  }
+
+  if (ebx & cpuid_avx512vl_bit) {
+    host_isa |= instruction_set::AVX512VL;
+  }
+
+  if (ecx & cpuid_avx512vbmi2_bit) {
+    host_isa |= instruction_set::AVX512VBMI2;
+  }
+
+  // EBX for EAX=0x1
+  eax = 0x1;
+  cpuid(&eax, &ebx, &ecx, &edx);
+
+  if (ecx & cpuid_sse42_bit) {
+    host_isa |= instruction_set::SSE42;
+  }
+
+  if (ecx & cpuid_pclmulqdq_bit) {
+    host_isa |= instruction_set::PCLMULQDQ;
+  }
+
+  return host_isa;
+}
+#else // fallback
+
+
+static inline uint32_t detect_supported_architectures() {
+  return instruction_set::DEFAULT;
+}
+
+
+#endif // end SIMD extension detection code
+
+} // namespace internal
+} // namespace simdjson
+
+#endif // SIMDJSON_INTERNAL_ISADETECTION_H
+/* end file include/simdjson/internal/isadetection.h */
+#include <string>
+#include <atomic>
+#include <vector>
+
+namespace simdjson {
+
+/**
+ * Validate the UTF-8 string.
+ *
+ * @param buf the string to validate.
+ * @param len the length of the string in bytes.
+ * @return true if the string is valid UTF-8.
+ */
+simdjson_warn_unused bool validate_utf8(const char * buf, size_t len) noexcept;
+/**
+ * Validate the UTF-8 string.
+ *
+ * @param sv the string_view to validate.
+ * @return true if the string is valid UTF-8.
+ */
+simdjson_inline simdjson_warn_unused bool validate_utf8(const std::string_view sv) noexcept {
+  return validate_utf8(sv.data(), sv.size());
+}
+
+/**
+ * Validate the UTF-8 string.
+ *
+ * @param p the string to validate.
+ * @return true if the string is valid UTF-8.
+ */
+simdjson_inline simdjson_warn_unused bool validate_utf8(const std::string& s) noexcept {
+  return validate_utf8(s.data(), s.size());
+}
+
+namespace dom {
+  class document;
+} // namespace dom
+
+/**
+ * An implementation of simdjson for a particular CPU architecture.
+ *
+ * Also used to maintain the currently active implementation. The active implementation is
+ * automatically initialized on first use to the most advanced implementation supported by the host.
+ */
+class implementation {
+public:
+
+  /**
+   * The name of this implementation.
+   *
+   *     const implementation *impl = simdjson::get_active_implementation();
+   *     cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl;
+   *
+   * @return the name of the implementation, e.g. "haswell", "westmere", "arm64".
+   */
+  virtual const std::string &name() const { return _name; }
+
+  /**
+   * The description of this implementation.
+   *
+   *     const implementation *impl = simdjson::get_active_implementation();
+   *     cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl;
+   *
+   * @return the description of the implementation, e.g. "Intel/AMD AVX2", "Intel/AMD SSE4.2", "ARM NEON".
+   */
+  virtual const std::string &description() const { return _description; }
+
+  /**
+   * The instruction sets this implementation is compiled against
+   * and the current CPU match. This function may poll the current CPU/system
+   * and should therefore not be called too often if performance is a concern.
+   *
+   * @return true if the implementation can be safely used on the current system (determined at runtime).
+   */
+  bool supported_by_runtime_system() const;
+
+  /**
+   * @private For internal implementation use
+   *
+   * The instruction sets this implementation is compiled against.
+   *
+   * @return a mask of all required `internal::instruction_set::` values.
+   */
+  virtual uint32_t required_instruction_sets() const { return _required_instruction_sets; };
+
+  /**
+   * @private For internal implementation use
+   *
+   *     const implementation *impl = simdjson::get_active_implementation();
+   *     cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl;
+   *
+   * @param capacity The largest document that will be passed to the parser.
+   * @param max_depth The maximum JSON object/array nesting this parser is expected to handle.
+   * @param dst The place to put the resulting parser implementation.
+   * @return the error code, or SUCCESS if there was no error.
+   */
+  virtual error_code create_dom_parser_implementation(
+    size_t capacity,
+    size_t max_depth,
+    std::unique_ptr<internal::dom_parser_implementation> &dst
+  ) const noexcept = 0;
+
+  /**
+   * @private For internal implementation use
+   *
+   * Minify the input string assuming that it represents a JSON string, does not parse or validate.
+   *
+   * Overridden by each implementation.
+   *
+   * @param buf the json document to minify.
+   * @param len the length of the json document.
+   * @param dst the buffer to write the minified document to. *MUST* be allocated up to len + SIMDJSON_PADDING bytes.
+   * @param dst_len the number of bytes written. Output only.
+   * @return the error code, or SUCCESS if there was no error.
+   */
+  simdjson_warn_unused virtual error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept = 0;
+
+
+  /**
+   * Validate the UTF-8 string.
+   *
+   * Overridden by each implementation.
+   *
+   * @param buf the string to validate.
+   * @param len the length of the string in bytes.
+   * @return true if and only if the string is valid UTF-8.
+   */
+  simdjson_warn_unused virtual bool validate_utf8(const char *buf, size_t len) const noexcept = 0;
+
+protected:
+  /** @private Construct an implementation with the given name and description. For subclasses. */
+  simdjson_inline implementation(
+    std::string_view name,
+    std::string_view description,
+    uint32_t required_instruction_sets
+  ) :
+    _name(name),
+    _description(description),
+    _required_instruction_sets(required_instruction_sets)
+  {
+  }
+  virtual ~implementation()=default;
+
+private:
+  /**
+   * The name of this implementation.
+   */
+  const std::string _name;
+
+  /**
+   * The description of this implementation.
+   */
+  const std::string _description;
+
+  /**
+   * Instruction sets required for this implementation.
+   */
+  const uint32_t _required_instruction_sets;
+};
+
+/** @private */
+namespace internal {
+
+/**
+ * The list of available implementations compiled into simdjson.
+ */
+class available_implementation_list {
+public:
+  /** Get the list of available implementations compiled into simdjson */
+  simdjson_inline available_implementation_list() {}
+  /** Number of implementations */
+  size_t size() const noexcept;
+  /** STL const begin() iterator */
+  const implementation * const *begin() const noexcept;
+  /** STL const end() iterator */
+  const implementation * const *end() const noexcept;
+
+  /**
+   * Get the implementation with the given name.
+   *
+   * Case sensitive.
+   *
+   *     const implementation *impl = simdjson::get_available_implementations()["westmere"];
+   *     if (!impl) { exit(1); }
+   *     if (!imp->supported_by_runtime_system()) { exit(1); }
+   *     simdjson::get_active_implementation() = impl;
+   *
+   * @param name the implementation to find, e.g. "westmere", "haswell", "arm64"
+   * @return the implementation, or nullptr if the parse failed.
+   */
+  const implementation * operator[](const std::string_view &name) const noexcept {
+    for (const implementation * impl : *this) {
+      if (impl->name() == name) { return impl; }
+    }
+    return nullptr;
+  }
+
+  /**
+   * Detect the most advanced implementation supported by the current host.
+   *
+   * This is used to initialize the implementation on startup.
+   *
+   *     const implementation *impl = simdjson::available_implementation::detect_best_supported();
+   *     simdjson::get_active_implementation() = impl;
+   *
+   * @return the most advanced supported implementation for the current host, or an
+   *         implementation that returns UNSUPPORTED_ARCHITECTURE if there is no supported
+   *         implementation. Will never return nullptr.
+   */
+  const implementation *detect_best_supported() const noexcept;
+};
+
+template<typename T>
+class atomic_ptr {
+public:
+  atomic_ptr(T *_ptr) : ptr{_ptr} {}
+
+  operator const T*() const { return ptr.load(); }
+  const T& operator*() const { return *ptr; }
+  const T* operator->() const { return ptr.load(); }
+
+  operator T*() { return ptr.load(); }
+  T& operator*() { return *ptr; }
+  T* operator->() { return ptr.load(); }
+  atomic_ptr& operator=(T *_ptr) { ptr = _ptr; return *this; }
+
+private:
+  std::atomic<T*> ptr;
+};
+
+} // namespace internal
+
+/**
+ * The list of available implementations compiled into simdjson.
+ */
+extern SIMDJSON_DLLIMPORTEXPORT const internal::available_implementation_list& get_available_implementations();
+
+/**
+  * The active implementation.
+  *
+  * Automatically initialized on first use to the most advanced implementation supported by this hardware.
+  */
+extern SIMDJSON_DLLIMPORTEXPORT internal::atomic_ptr<const implementation>& get_active_implementation();
+
+} // namespace simdjson
+
+#endif // SIMDJSON_IMPLEMENTATION_H
+/* end file include/simdjson/implementation.h */
+
+// Inline functions
+/* begin file include/simdjson/error-inl.h */
+#ifndef SIMDJSON_INLINE_ERROR_H
+#define SIMDJSON_INLINE_ERROR_H
+
+#include <cstring>
+#include <string>
+#include <utility>
+
+namespace simdjson {
+namespace internal {
+  // We store the error code so we can validate the error message is associated with the right code
+  struct error_code_info {
+    error_code code;
+    const char* message; // do not use a fancy std::string where a simple C string will do (no alloc, no destructor)
+  };
+  // These MUST match the codes in error_code. We check this constraint in basictests.
+  extern SIMDJSON_DLLIMPORTEXPORT const error_code_info error_codes[];
+} // namespace internal
+
+
+inline const char *error_message(error_code error) noexcept {
+  // If you're using error_code, we're trusting you got it from the enum.
+  return internal::error_codes[int(error)].message;
+}
+
+// deprecated function
+#ifndef SIMDJSON_DISABLE_DEPRECATED_API
+inline const std::string error_message(int error) noexcept {
+  if (error < 0 || error >= error_code::NUM_ERROR_CODES) {
+    return internal::error_codes[UNEXPECTED_ERROR].message;
+  }
+  return internal::error_codes[error].message;
+}
+#endif // SIMDJSON_DISABLE_DEPRECATED_API
+
+inline std::ostream& operator<<(std::ostream& out, error_code error) noexcept {
+  return out << error_message(error);
+}
+
+namespace internal {
+
+//
+// internal::simdjson_result_base<T> inline implementation
+//
+
+template<typename T>
+simdjson_inline void simdjson_result_base<T>::tie(T &value, error_code &error) && noexcept {
+  error = this->second;
+  if (!error) {
+    value = std::forward<simdjson_result_base<T>>(*this).first;
+  }
+}
+
+template<typename T>
+simdjson_warn_unused simdjson_inline error_code simdjson_result_base<T>::get(T &value) && noexcept {
+  error_code error;
+  std::forward<simdjson_result_base<T>>(*this).tie(value, error);
+  return error;
+}
+
+template<typename T>
+simdjson_inline error_code simdjson_result_base<T>::error() const noexcept {
+  return this->second;
+}
+
+#if SIMDJSON_EXCEPTIONS
+
+template<typename T>
+simdjson_inline T& simdjson_result_base<T>::value() & noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return this->first;
+}
+
+template<typename T>
+simdjson_inline T&& simdjson_result_base<T>::value() && noexcept(false) {
+  return std::forward<simdjson_result_base<T>>(*this).take_value();
+}
+
+template<typename T>
+simdjson_inline T&& simdjson_result_base<T>::take_value() && noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return std::forward<T>(this->first);
+}
+
+template<typename T>
+simdjson_inline simdjson_result_base<T>::operator T&&() && noexcept(false) {
+  return std::forward<simdjson_result_base<T>>(*this).take_value();
+}
+
+#endif // SIMDJSON_EXCEPTIONS
+
+template<typename T>
+simdjson_inline const T& simdjson_result_base<T>::value_unsafe() const& noexcept {
+  return this->first;
+}
+
+template<typename T>
+simdjson_inline T&& simdjson_result_base<T>::value_unsafe() && noexcept {
+  return std::forward<T>(this->first);
+}
+
+template<typename T>
+simdjson_inline simdjson_result_base<T>::simdjson_result_base(T &&value, error_code error) noexcept
+    : std::pair<T, error_code>(std::forward<T>(value), error) {}
+template<typename T>
+simdjson_inline simdjson_result_base<T>::simdjson_result_base(error_code error) noexcept
+    : simdjson_result_base(T{}, error) {}
+template<typename T>
+simdjson_inline simdjson_result_base<T>::simdjson_result_base(T &&value) noexcept
+    : simdjson_result_base(std::forward<T>(value), SUCCESS) {}
+template<typename T>
+simdjson_inline simdjson_result_base<T>::simdjson_result_base() noexcept
+    : simdjson_result_base(T{}, UNINITIALIZED) {}
+
+} // namespace internal
+
+///
+/// simdjson_result<T> inline implementation
+///
+
+template<typename T>
+simdjson_inline void simdjson_result<T>::tie(T &value, error_code &error) && noexcept {
+  std::forward<internal::simdjson_result_base<T>>(*this).tie(value, error);
+}
+
+template<typename T>
+simdjson_warn_unused simdjson_inline error_code simdjson_result<T>::get(T &value) && noexcept {
+  return std::forward<internal::simdjson_result_base<T>>(*this).get(value);
+}
+
+template<typename T>
+simdjson_inline error_code simdjson_result<T>::error() const noexcept {
+  return internal::simdjson_result_base<T>::error();
+}
+
+#if SIMDJSON_EXCEPTIONS
+
+template<typename T>
+simdjson_inline T& simdjson_result<T>::value() & noexcept(false) {
+  return internal::simdjson_result_base<T>::value();
+}
+
+template<typename T>
+simdjson_inline T&& simdjson_result<T>::value() && noexcept(false) {
+  return std::forward<internal::simdjson_result_base<T>>(*this).value();
+}
+
+template<typename T>
+simdjson_inline T&& simdjson_result<T>::take_value() && noexcept(false) {
+  return std::forward<internal::simdjson_result_base<T>>(*this).take_value();
+}
+
+template<typename T>
+simdjson_inline simdjson_result<T>::operator T&&() && noexcept(false) {
+  return std::forward<internal::simdjson_result_base<T>>(*this).take_value();
+}
+
+#endif // SIMDJSON_EXCEPTIONS
+
+template<typename T>
+simdjson_inline const T& simdjson_result<T>::value_unsafe() const& noexcept {
+  return internal::simdjson_result_base<T>::value_unsafe();
+}
+
+template<typename T>
+simdjson_inline T&& simdjson_result<T>::value_unsafe() && noexcept {
+  return std::forward<internal::simdjson_result_base<T>>(*this).value_unsafe();
+}
+
+template<typename T>
+simdjson_inline simdjson_result<T>::simdjson_result(T &&value, error_code error) noexcept
+    : internal::simdjson_result_base<T>(std::forward<T>(value), error) {}
+template<typename T>
+simdjson_inline simdjson_result<T>::simdjson_result(error_code error) noexcept
+    : internal::simdjson_result_base<T>(error) {}
+template<typename T>
+simdjson_inline simdjson_result<T>::simdjson_result(T &&value) noexcept
+    : internal::simdjson_result_base<T>(std::forward<T>(value)) {}
+template<typename T>
+simdjson_inline simdjson_result<T>::simdjson_result() noexcept
+    : internal::simdjson_result_base<T>() {}
+
+} // namespace simdjson
+
+#endif // SIMDJSON_INLINE_ERROR_H
+/* end file include/simdjson/error-inl.h */
+/* begin file include/simdjson/padded_string-inl.h */
+#ifndef SIMDJSON_INLINE_PADDED_STRING_H
+#define SIMDJSON_INLINE_PADDED_STRING_H
+
+
+#include <climits>
+#include <cstring>
+#include <memory>
+#include <string>
+
+namespace simdjson {
+namespace internal {
+
+// The allocate_padded_buffer function is a low-level function to allocate memory
+// with padding so we can read past the "length" bytes safely. It is used by
+// the padded_string class automatically. It returns nullptr in case
+// of error: the caller should check for a null pointer.
+// The length parameter is the maximum size in bytes of the string.
+// The caller is responsible to free the memory (e.g., delete[] (...)).
+inline char *allocate_padded_buffer(size_t length) noexcept {
+  const size_t totalpaddedlength = length + SIMDJSON_PADDING;
+  if(totalpaddedlength<length) {
+    // overflow
+    return nullptr;
+  }
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+  // avoid getting out of memory
+  if (totalpaddedlength>(1UL<<20)) {
+    return nullptr;
+  }
+#endif
+
+  char *padded_buffer = new (std::nothrow) char[totalpaddedlength];
+  if (padded_buffer == nullptr) {
+    return nullptr;
+  }
+  // We write zeroes in the padded region to avoid having uninitized
+  // garbage. If nothing else, garbage getting read might trigger a
+  // warning in a memory checking.
+  std::memset(padded_buffer + length, 0, totalpaddedlength - length);
+  return padded_buffer;
+} // allocate_padded_buffer()
+
+} // namespace internal
+
+
+inline padded_string::padded_string() noexcept = default;
+inline padded_string::padded_string(size_t length) noexcept
+    : viable_size(length), data_ptr(internal::allocate_padded_buffer(length)) {
+}
+inline padded_string::padded_string(const char *data, size_t length) noexcept
+    : viable_size(length), data_ptr(internal::allocate_padded_buffer(length)) {
+  if ((data != nullptr) && (data_ptr != nullptr)) {
+    std::memcpy(data_ptr, data, length);
+  }
+}
+// note: do not pass std::string arguments by value
+inline padded_string::padded_string(const std::string & str_ ) noexcept
+    : viable_size(str_.size()), data_ptr(internal::allocate_padded_buffer(str_.size())) {
+  if (data_ptr != nullptr) {
+    std::memcpy(data_ptr, str_.data(), str_.size());
+  }
+}
+// note: do pass std::string_view arguments by value
+inline padded_string::padded_string(std::string_view sv_) noexcept
+    : viable_size(sv_.size()), data_ptr(internal::allocate_padded_buffer(sv_.size())) {
+  if(simdjson_unlikely(!data_ptr)) {
+    //allocation failed or zero size
+    viable_size=0;
+    return;
+  }
+  if (sv_.size()) {
+    std::memcpy(data_ptr, sv_.data(), sv_.size());
+  }
+}
+inline padded_string::padded_string(padded_string &&o) noexcept
+    : viable_size(o.viable_size), data_ptr(o.data_ptr) {
+  o.data_ptr = nullptr; // we take ownership
+}
+
+inline padded_string &padded_string::operator=(padded_string &&o) noexcept {
+  delete[] data_ptr;
+  data_ptr = o.data_ptr;
+  viable_size = o.viable_size;
+  o.data_ptr = nullptr; // we take ownership
+  o.viable_size = 0;
+  return *this;
+}
+
+inline void padded_string::swap(padded_string &o) noexcept {
+  size_t tmp_viable_size = viable_size;
+  char *tmp_data_ptr = data_ptr;
+  viable_size = o.viable_size;
+  data_ptr = o.data_ptr;
+  o.data_ptr = tmp_data_ptr;
+  o.viable_size = tmp_viable_size;
+}
+
+inline padded_string::~padded_string() noexcept {
+  delete[] data_ptr;
+}
+
+inline size_t padded_string::size() const noexcept { return viable_size; }
+
+inline size_t padded_string::length() const noexcept { return viable_size; }
+
+inline const char *padded_string::data() const noexcept { return data_ptr; }
+
+inline char *padded_string::data() noexcept { return data_ptr; }
+
+inline padded_string::operator std::string_view() const { return std::string_view(data(), length()); }
+
+inline padded_string::operator padded_string_view() const noexcept {
+  return padded_string_view(data(), length(), length() + SIMDJSON_PADDING);
+}
+
+inline simdjson_result<padded_string> padded_string::load(std::string_view filename) noexcept {
+  // Open the file
+  SIMDJSON_PUSH_DISABLE_WARNINGS
+  SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe
+  std::FILE *fp = std::fopen(filename.data(), "rb");
+  SIMDJSON_POP_DISABLE_WARNINGS
+
+  if (fp == nullptr) {
+    return IO_ERROR;
+  }
+
+  // Get the file size
+  int ret;
+#if defined(SIMDJSON_VISUAL_STUDIO) && !SIMDJSON_IS_32BITS
+  ret = _fseeki64(fp, 0, SEEK_END);
+#else
+  ret = std::fseek(fp, 0, SEEK_END);
+#endif // _WIN64
+  if(ret < 0) {
+    std::fclose(fp);
+    return IO_ERROR;
+  }
+#if defined(SIMDJSON_VISUAL_STUDIO) && !SIMDJSON_IS_32BITS
+  __int64 llen = _ftelli64(fp);
+  if(llen == -1L) {
+    std::fclose(fp);
+    return IO_ERROR;
+  }
+#else
+  long llen = std::ftell(fp);
+  if((llen < 0) || (llen == LONG_MAX)) {
+    std::fclose(fp);
+    return IO_ERROR;
+  }
+#endif
+
+  // Allocate the padded_string
+  size_t len = static_cast<size_t>(llen);
+  padded_string s(len);
+  if (s.data() == nullptr) {
+    std::fclose(fp);
+    return MEMALLOC;
+  }
+
+  // Read the padded_string
+  std::rewind(fp);
+  size_t bytes_read = std::fread(s.data(), 1, len, fp);
+  if (std::fclose(fp) != 0 || bytes_read != len) {
+    return IO_ERROR;
+  }
+
+  return s;
+}
+
+} // namespace simdjson
+
+#endif // SIMDJSON_INLINE_PADDED_STRING_H
+/* end file include/simdjson/padded_string-inl.h */
+/* begin file include/simdjson/padded_string_view-inl.h */
+#ifndef SIMDJSON_PADDED_STRING_VIEW_INL_H
+#define SIMDJSON_PADDED_STRING_VIEW_INL_H
+
+
+#include <climits>
+#include <cstring>
+#include <memory>
+#include <string>
+
+namespace simdjson {
+
+inline padded_string_view::padded_string_view(const char* s, size_t len, size_t capacity) noexcept
+  : std::string_view(s, len), _capacity(capacity)
+{
+}
+
+inline padded_string_view::padded_string_view(const uint8_t* s, size_t len, size_t capacity) noexcept
+  : padded_string_view(reinterpret_cast<const char*>(s), len, capacity)
+{
+}
+
+inline padded_string_view::padded_string_view(const std::string &s) noexcept
+  : std::string_view(s), _capacity(s.capacity())
+{
+}
+
+inline padded_string_view::padded_string_view(std::string_view s, size_t capacity) noexcept
+  : std::string_view(s), _capacity(capacity)
+{
+}
+
+inline size_t padded_string_view::capacity() const noexcept { return _capacity; }
+
+inline size_t padded_string_view::padding() const noexcept { return capacity() - length(); }
+
+} // namespace simdjson
+
+#endif // SIMDJSON_PADDED_STRING_VIEW_INL_H
+/* end file include/simdjson/padded_string_view-inl.h */
+
+SIMDJSON_POP_DISABLE_WARNINGS
+
+#endif // SIMDJSON_BASE_H
+/* end file include/simdjson/base.h */
+
+SIMDJSON_PUSH_DISABLE_WARNINGS
+SIMDJSON_DISABLE_UNDESIRED_WARNINGS
+
+/* begin file include/simdjson/dom/array.h */
+#ifndef SIMDJSON_DOM_ARRAY_H
+#define SIMDJSON_DOM_ARRAY_H
+
+/* begin file include/simdjson/internal/tape_ref.h */
+#ifndef SIMDJSON_INTERNAL_TAPE_REF_H
+#define SIMDJSON_INTERNAL_TAPE_REF_H
+
+/* begin file include/simdjson/internal/tape_type.h */
+#ifndef SIMDJSON_INTERNAL_TAPE_TYPE_H
+#define SIMDJSON_INTERNAL_TAPE_TYPE_H
+
+namespace simdjson {
+namespace internal {
+
+/**
+ * The possible types in the tape.
+ */
+enum class tape_type {
+  ROOT = 'r',
+  START_ARRAY = '[',
+  START_OBJECT = '{',
+  END_ARRAY = ']',
+  END_OBJECT = '}',
+  STRING = '"',
+  INT64 = 'l',
+  UINT64 = 'u',
+  DOUBLE = 'd',
+  TRUE_VALUE = 't',
+  FALSE_VALUE = 'f',
+  NULL_VALUE = 'n'
+}; // enum class tape_type
+
+} // namespace internal
+} // namespace simdjson
+
+#endif // SIMDJSON_INTERNAL_TAPE_TYPE_H
+/* end file include/simdjson/internal/tape_type.h */
+
+namespace simdjson {
+
+namespace dom {
+  class document;
+}
+
+namespace internal {
+
+constexpr const uint64_t JSON_VALUE_MASK = 0x00FFFFFFFFFFFFFF;
+constexpr const uint32_t JSON_COUNT_MASK = 0xFFFFFF;
+
+/**
+ * A reference to an element on the tape. Internal only.
+ */
+class tape_ref {
+public:
+  simdjson_inline tape_ref() noexcept;
+  simdjson_inline tape_ref(const dom::document *doc, size_t json_index) noexcept;
+  inline size_t after_element() const noexcept;
+  simdjson_inline tape_type tape_ref_type() const noexcept;
+  simdjson_inline uint64_t tape_value() const noexcept;
+  simdjson_inline bool is_double() const noexcept;
+  simdjson_inline bool is_int64() const noexcept;
+  simdjson_inline bool is_uint64() const noexcept;
+  simdjson_inline bool is_false() const noexcept;
+  simdjson_inline bool is_true() const noexcept;
+  simdjson_inline bool is_null_on_tape() const noexcept;// different name to avoid clash with is_null.
+  simdjson_inline uint32_t matching_brace_index() const noexcept;
+  simdjson_inline uint32_t scope_count() const noexcept;
+  template<typename T>
+  simdjson_inline T next_tape_value() const noexcept;
+  simdjson_inline uint32_t get_string_length() const noexcept;
+  simdjson_inline const char * get_c_str() const noexcept;
+  inline std::string_view get_string_view() const noexcept;
+  simdjson_inline bool is_document_root() const noexcept;
+  simdjson_inline bool usable() const noexcept;
+
+  /** The document this element references. */
+  const dom::document *doc;
+
+  /** The index of this element on `doc.tape[]` */
+  size_t json_index;
+};
+
+} // namespace internal
+} // namespace simdjson
+
+#endif // SIMDJSON_INTERNAL_TAPE_REF_H
+/* end file include/simdjson/internal/tape_ref.h */
+
+namespace simdjson {
+
+namespace internal {
+template<typename T>
+class string_builder;
+}
+namespace dom {
+
+class document;
+class element;
+
+/**
+ * JSON array.
+ */
+class array {
+public:
+  /** Create a new, invalid array */
+  simdjson_inline array() noexcept;
+
+  class iterator {
+  public:
+    using value_type = element;
+    using difference_type = std::ptrdiff_t;
+
+    /**
+     * Get the actual value
+     */
+    inline value_type operator*() const noexcept;
+    /**
+     * Get the next value.
+     *
+     * Part of the std::iterator interface.
+     */
+    inline iterator& operator++() noexcept;
+    /**
+     * Get the next value.
+     *
+     * Part of the  std::iterator interface.
+     */
+    inline iterator operator++(int) noexcept;
+    /**
+     * Check if these values come from the same place in the JSON.
+     *
+     * Part of the std::iterator interface.
+     */
+    inline bool operator!=(const iterator& other) const noexcept;
+    inline bool operator==(const iterator& other) const noexcept;
+
+    inline bool operator<(const iterator& other) const noexcept;
+    inline bool operator<=(const iterator& other) const noexcept;
+    inline bool operator>=(const iterator& other) const noexcept;
+    inline bool operator>(const iterator& other) const noexcept;
+
+    iterator() noexcept = default;
+    iterator(const iterator&) noexcept = default;
+    iterator& operator=(const iterator&) noexcept = default;
+  private:
+    simdjson_inline iterator(const internal::tape_ref &tape) noexcept;
+    internal::tape_ref tape;
+    friend class array;
+  };
+
+  /**
+   * Return the first array element.
+   *
+   * Part of the std::iterable interface.
+   */
+  inline iterator begin() const noexcept;
+  /**
+   * One past the last array element.
+   *
+   * Part of the std::iterable interface.
+   */
+  inline iterator end() const noexcept;
+  /**
+   * Get the size of the array (number of immediate children).
+   * It is a saturated value with a maximum of 0xFFFFFF: if the value
+   * is 0xFFFFFF then the size is 0xFFFFFF or greater.
+   */
+  inline size_t size() const noexcept;
+  /**
+   * Get the total number of slots used by this array on the tape.
+   *
+   * Note that this is not the same thing as `size()`, which reports the
+   * number of actual elements within an array (not counting its children).
+   *
+   * Since an element can use 1 or 2 slots on the tape, you can only use this
+   * to figure out the total size of an array (including its children,
+   * recursively) if you know its structure ahead of time.
+   **/
+  inline size_t number_of_slots() const noexcept;
+  /**
+   * Get the value associated with the given JSON pointer.  We use the RFC 6901
+   * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node
+   * as the root of its own JSON document.
+   *
+   *   dom::parser parser;
+   *   array a = parser.parse(R"([ { "foo": { "a": [ 10, 20, 30 ] }} ])"_padded);
+   *   a.at_pointer("/0/foo/a/1") == 20
+   *   a.at_pointer("0")["foo"]["a"].at(1) == 20
+   *
+   * @return The value associated with the given JSON pointer, or:
+   *         - NO_SUCH_FIELD if a field does not exist in an object
+   *         - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length
+   *         - INCORRECT_TYPE if a non-integer is used to access an array
+   *         - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed
+   */
+  inline simdjson_result<element> at_pointer(std::string_view json_pointer) const noexcept;
+
+  /**
+   * Get the value at the given index. This function has linear-time complexity and
+   * is equivalent to the following:
+   *
+   *    size_t i=0;
+   *    for (auto element : *this) {
+   *      if (i == index) { return element; }
+   *      i++;
+   *    }
+   *    return INDEX_OUT_OF_BOUNDS;
+   *
+   * Avoid calling the at() function repeatedly.
+   *
+   * @return The value at the given index, or:
+   *         - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length
+   */
+  inline simdjson_result<element> at(size_t index) const noexcept;
+
+private:
+  simdjson_inline array(const internal::tape_ref &tape) noexcept;
+  internal::tape_ref tape;
+  friend class element;
+  friend struct simdjson_result<element>;
+  template<typename T>
+  friend class simdjson::internal::string_builder;
+};
+
+
+} // namespace dom
+
+/** The result of a JSON conversion that may fail. */
+template<>
+struct simdjson_result<dom::array> : public internal::simdjson_result_base<dom::array> {
+public:
+  simdjson_inline simdjson_result() noexcept; ///< @private
+  simdjson_inline simdjson_result(dom::array value) noexcept; ///< @private
+  simdjson_inline simdjson_result(error_code error) noexcept; ///< @private
+
+  inline simdjson_result<dom::element> at_pointer(std::string_view json_pointer) const noexcept;
+  inline simdjson_result<dom::element> at(size_t index) const noexcept;
+
+#if SIMDJSON_EXCEPTIONS
+  inline dom::array::iterator begin() const noexcept(false);
+  inline dom::array::iterator end() const noexcept(false);
+  inline size_t size() const noexcept(false);
+#endif // SIMDJSON_EXCEPTIONS
+};
+
+
+
+} // namespace simdjson
+
+#if defined(__cpp_lib_ranges)
+#include <ranges>
+
+namespace std {
+namespace ranges {
+template<>
+inline constexpr bool enable_view<simdjson::dom::array> = true;
+#if SIMDJSON_EXCEPTIONS
+template<>
+inline constexpr bool enable_view<simdjson::simdjson_result<simdjson::dom::array>> = true;
+#endif // SIMDJSON_EXCEPTIONS
+} // namespace ranges
+} // namespace std
+#endif // defined(__cpp_lib_ranges)
+
+#endif // SIMDJSON_DOM_ARRAY_H
+/* end file include/simdjson/dom/array.h */
+/* begin file include/simdjson/dom/document_stream.h */
+#ifndef SIMDJSON_DOCUMENT_STREAM_H
+#define SIMDJSON_DOCUMENT_STREAM_H
+
+/* begin file include/simdjson/dom/parser.h */
+#ifndef SIMDJSON_DOM_PARSER_H
+#define SIMDJSON_DOM_PARSER_H
+
+/* begin file include/simdjson/dom/document.h */
+#ifndef SIMDJSON_DOM_DOCUMENT_H
+#define SIMDJSON_DOM_DOCUMENT_H
+
+#include <memory>
+#include <ostream>
+
+namespace simdjson {
+namespace dom {
+
+class element;
+
+/**
+ * A parsed JSON document.
+ *
+ * This class cannot be copied, only moved, to avoid unintended allocations.
+ */
+class document {
+public:
+  /**
+   * Create a document container with zero capacity.
+   *
+   * The parser will allocate capacity as needed.
+   */
+  document() noexcept = default;
+  ~document() noexcept = default;
+
+  /**
+   * Take another document's buffers.
+   *
+   * @param other The document to take. Its capacity is zeroed and it is invalidated.
+   */
+  document(document &&other) noexcept = default;
+  /** @private */
+  document(const document &) = delete; // Disallow copying
+  /**
+   * Take another document's buffers.
+   *
+   * @param other The document to take. Its capacity is zeroed.
+   */
+  document &operator=(document &&other) noexcept = default;
+  /** @private */
+  document &operator=(const document &) = delete; // Disallow copying
+
+  /**
+   * Get the root element of this document as a JSON array.
+   */
+  element root() const noexcept;
+
+  /**
+   * @private Dump the raw tape for debugging.
+   *
+   * @param os the stream to output to.
+   * @return false if the tape is likely wrong (e.g., you did not parse a valid JSON).
+   */
+  bool dump_raw_tape(std::ostream &os) const noexcept;
+
+  /** @private Structural values. */
+  std::unique_ptr<uint64_t[]> tape{};
+
+  /** @private String values.
+   *
+   * Should be at least byte_capacity.
+   */
+  std::unique_ptr<uint8_t[]> string_buf{};
+  /** @private Allocate memory to support
+   * input JSON documents of up to len bytes.
+   *
+   * When calling this function, you lose
+   * all the data.
+   *
+   * The memory allocation is strict: you
+   * can you use this function to increase
+   * or lower the amount of allocated memory.
+   * Passsing zero clears the memory.
+   */
+  error_code allocate(size_t len) noexcept;
+  /** @private Capacity in bytes, in terms
+   * of how many bytes of input JSON we can
+   * support.
+   */
+  size_t capacity() const noexcept;
+
+
+private:
+  size_t allocated_capacity{0};
+  friend class parser;
+}; // class document
+
+} // namespace dom
+} // namespace simdjson
+
+#endif // SIMDJSON_DOM_DOCUMENT_H
+/* end file include/simdjson/dom/document.h */
+#include <memory>
+#include <ostream>
+#include <string>
+
+namespace simdjson {
+
+namespace dom {
+
+class document_stream;
+class element;
+
+/** The default batch size for parser.parse_many() and parser.load_many() */
+static constexpr size_t DEFAULT_BATCH_SIZE = 1000000;
+/**
+ * Some adversary might try to set the batch size to 0 or 1, which might cause problems.
+ * We set a minimum of 32B since anything else is highly likely to be an error. In practice,
+ * most users will want a much larger batch size.
+ *
+ * All non-negative MINIMAL_BATCH_SIZE values should be 'safe' except that, obviously, no JSON
+ * document can ever span 0 or 1 byte and that very large values would create memory allocation issues.
+ */
+static constexpr size_t MINIMAL_BATCH_SIZE = 32;
+
+/**
+ * It is wasteful to allocate memory for tiny documents (e.g., 4 bytes).
+ */
+static constexpr size_t MINIMAL_DOCUMENT_CAPACITY = 32;
+
+/**
+ * A persistent document parser.
+ *
+ * The parser is designed to be reused, holding the internal buffers necessary to do parsing,
+ * as well as memory for a single document. The parsed document is overwritten on each parse.
+ *
+ * This class cannot be copied, only moved, to avoid unintended allocations.
+ *
+ * @note Moving a parser instance may invalidate "dom::element" instances. If you need to
+ * preserve both the "dom::element" instances and the parser, consider wrapping the parser
+ * instance in a std::unique_ptr instance:
+ *
+ *   std::unique_ptr<dom::parser> parser(new dom::parser{});
+ *   auto error = parser->load(f).get(root);
+ *
+ * You can then move std::unique_ptr safely.
+ *
+ * @note This is not thread safe: one parser cannot produce two documents at the same time!
+ */
+class parser {
+public:
+  /**
+   * Create a JSON parser.
+   *
+   * The new parser will have zero capacity.
+   *
+   * @param max_capacity The maximum document length the parser can automatically handle. The parser
+   *    will allocate more capacity on an as needed basis (when it sees documents too big to handle)
+   *    up to this amount. The parser still starts with zero capacity no matter what this number is:
+   *    to allocate an initial capacity, call allocate() after constructing the parser.
+   *    Defaults to SIMDJSON_MAXSIZE_BYTES (the largest single document simdjson can process).
+   */
+  simdjson_inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept;
+  /**
+   * Take another parser's buffers and state.
+   *
+   * @param other The parser to take. Its capacity is zeroed.
+   */
+  simdjson_inline parser(parser &&other) noexcept;
+  parser(const parser &) = delete; ///< @private Disallow copying
+  /**
+   * Take another parser's buffers and state.
+   *
+   * @param other The parser to take. Its capacity is zeroed.
+   */
+  simdjson_inline parser &operator=(parser &&other) noexcept;
+  parser &operator=(const parser &) = delete; ///< @private Disallow copying
+
+  /** Deallocate the JSON parser. */
+  ~parser()=default;
+
+  /**
+   * Load a JSON document from a file and return a reference to it.
+   *
+   *   dom::parser parser;
+   *   const element doc = parser.load("jsonexamples/twitter.json");
+   *
+   * The function is eager: the file's content is loaded in memory inside the parser instance
+   * and immediately parsed. The file can be deleted after the  `parser.load` call.
+   *
+   * ### IMPORTANT: Document Lifetime
+   *
+   * The JSON document still lives in the parser: this is the most efficient way to parse JSON
+   * documents because it reuses the same buffers, but you *must* use the document before you
+   * destroy the parser or call parse() again.
+   *
+   * Moving the parser instance is safe, but it invalidates the element instances. You may store
+   * the parser instance without moving it by wrapping it inside an `unique_ptr` instance like
+   * so: `std::unique_ptr<dom::parser> parser(new dom::parser{});`.
+   *
+   * ### Parser Capacity
+   *
+   * If the parser's current capacity is less than the file length, it will allocate enough capacity
+   * to handle it (up to max_capacity).
+   *
+   * @param path The path to load.
+   * @return The document, or an error:
+   *         - IO_ERROR if there was an error opening or reading the file.
+   *           Be mindful that on some 32-bit systems,
+   *           the file size might be limited to 2 GB.
+   *         - MEMALLOC if the parser does not have enough capacity and memory allocation fails.
+   *         - CAPACITY if the parser does not have enough capacity and len > max_capacity.
+   *         - other json errors if parsing fails. You should not rely on these errors to always the same for the
+   *           same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware).
+   */
+  inline simdjson_result<element> load(const std::string &path) & noexcept;
+  inline simdjson_result<element> load(const std::string &path) &&  = delete ;
+  /**
+   * Parse a JSON document and return a temporary reference to it.
+   *
+   *   dom::parser parser;
+   *   element doc_root = parser.parse(buf, len);
+   *
+   * The function eagerly parses the input: the input can be modified and discarded after
+   * the `parser.parse(buf, len)` call has completed.
+   *
+   * ### IMPORTANT: Document Lifetime
+   *
+   * The JSON document still lives in the parser: this is the most efficient way to parse JSON
+   * documents because it reuses the same buffers, but you *must* use the document before you
+   * destroy the parser or call parse() again.
+   *
+   * Moving the parser instance is safe, but it invalidates the element instances. You may store
+   * the parser instance without moving it by wrapping it inside an `unique_ptr` instance like
+   * so: `std::unique_ptr<dom::parser> parser(new dom::parser{});`.
+   *
+   * ### REQUIRED: Buffer Padding
+   *
+   * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
+   * those bytes are initialized to, as long as they are allocated.
+   *
+   * If realloc_if_needed is true (the default), it is assumed that the buffer does *not* have enough padding,
+   * and it is copied into an enlarged temporary buffer before parsing. Thus the following is safe:
+   *
+   *   const char *json      = R"({"key":"value"})";
+   *   const size_t json_len = std::strlen(json);
+   *   simdjson::dom::parser parser;
+   *   simdjson::dom::element element = parser.parse(json, json_len);
+   *
+   * If you set realloc_if_needed to false (e.g., parser.parse(json, json_len, false)),
+   * you must provide a buffer with at least SIMDJSON_PADDING extra bytes at the end.
+   * The benefit of setting realloc_if_needed to false is that you avoid a temporary
+   * memory allocation and a copy.
+   *
+   * The padded bytes may be read. It is not important how you initialize
+   * these bytes though we recommend a sensible default like null character values or spaces.
+   * For example, the following low-level code is safe:
+   *
+   *   const char *json      = R"({"key":"value"})";
+   *   const size_t json_len = std::strlen(json);
+   *   std::unique_ptr<char[]> padded_json_copy{new char[json_len + SIMDJSON_PADDING]};
+   *   std::memcpy(padded_json_copy.get(), json, json_len);
+   *   std::memset(padded_json_copy.get() + json_len, '\0', SIMDJSON_PADDING);
+   *   simdjson::dom::parser parser;
+   *   simdjson::dom::element element = parser.parse(padded_json_copy.get(), json_len, false);
+   *
+   * ### Parser Capacity
+   *
+   * If the parser's current capacity is less than len, it will allocate enough capacity
+   * to handle it (up to max_capacity).
+   *
+   * @param buf The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, unless
+   *            realloc_if_needed is true.
+   * @param len The length of the JSON.
+   * @param realloc_if_needed Whether to reallocate and enlarge the JSON buffer to add padding.
+   * @return An element pointing at the root of the document, or an error:
+   *         - MEMALLOC if realloc_if_needed is true or the parser does not have enough capacity,
+   *           and memory allocation fails.
+   *         - CAPACITY if the parser does not have enough capacity and len > max_capacity.
+   *         - other json errors if parsing fails. You should not rely on these errors to always the same for the
+   *           same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware).
+   */
+  inline simdjson_result<element> parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true) & noexcept;
+  inline simdjson_result<element> parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true) && =delete;
+  /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */
+  simdjson_inline simdjson_result<element> parse(const char *buf, size_t len, bool realloc_if_needed = true) & noexcept;
+  simdjson_inline simdjson_result<element> parse(const char *buf, size_t len, bool realloc_if_needed = true) && =delete;
+  /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */
+  simdjson_inline simdjson_result<element> parse(const std::string &s) & noexcept;
+  simdjson_inline simdjson_result<element> parse(const std::string &s) && =delete;
+  /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */
+  simdjson_inline simdjson_result<element> parse(const padded_string &s) & noexcept;
+  simdjson_inline simdjson_result<element> parse(const padded_string &s) && =delete;
+
+  /** @private We do not want to allow implicit conversion from C string to std::string. */
+  simdjson_inline simdjson_result<element> parse(const char *buf) noexcept = delete;
+
+  /**
+   * Parse a JSON document into a provide document instance and return a temporary reference to it.
+   * It is similar to the function `parse` except that instead of parsing into the internal
+   * `document` instance associated with the parser, it allows the user to provide a document
+   * instance.
+   *
+   *   dom::parser parser;
+   *   dom::document doc;
+   *   element doc_root = parser.parse_into_document(doc, buf, len);
+   *
+   * The function eagerly parses the input: the input can be modified and discarded after
+   * the `parser.parse(buf, len)` call has completed.
+   *
+   * ### IMPORTANT: Document Lifetime
+   *
+   * After the call to parse_into_document, the parser is no longer needed.
+   *
+   * The JSON document lives in the document instance: you must keep the document
+   * instance alive while you navigate through it (i.e., used the returned value from
+   * parse_into_document). You are encourage to reuse the document instance
+   * many times with new data to avoid reallocations:
+   *
+   *   dom::document doc;
+   *   element doc_root1 = parser.parse_into_document(doc, buf1, len);
+   *   //... doc_root1 is a pointer inside doc
+   *   element doc_root2 = parser.parse_into_document(doc, buf1, len);
+   *   //... doc_root2 is a pointer inside doc
+   *   // at this point doc_root1 is no longer safe
+   *
+   * Moving the document instance is safe, but it invalidates the element instances. After
+   * moving a document, you can recover safe access to the document root with its `root()` method.
+   *
+   * @param doc The document instance where the parsed data will be stored (on success).
+   * @param buf The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, unless
+   *            realloc_if_needed is true.
+   * @param len The length of the JSON.
+   * @param realloc_if_needed Whether to reallocate and enlarge the JSON buffer to add padding.
+   * @return An element pointing at the root of document, or an error:
+   *         - MEMALLOC if realloc_if_needed is true or the parser does not have enough capacity,
+   *           and memory allocation fails.
+   *         - CAPACITY if the parser does not have enough capacity and len > max_capacity.
+   *         - other json errors if parsing fails. You should not rely on these errors to always the same for the
+   *           same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware).
+   */
+  inline simdjson_result<element> parse_into_document(document& doc, const uint8_t *buf, size_t len, bool realloc_if_needed = true) & noexcept;
+  inline simdjson_result<element> parse_into_document(document& doc, const uint8_t *buf, size_t len, bool realloc_if_needed = true) && =delete;
+  /** @overload parse_into_document(const uint8_t *buf, size_t len, bool realloc_if_needed) */
+  simdjson_inline simdjson_result<element> parse_into_document(document& doc, const char *buf, size_t len, bool realloc_if_needed = true) & noexcept;
+  simdjson_inline simdjson_result<element> parse_into_document(document& doc, const char *buf, size_t len, bool realloc_if_needed = true) && =delete;
+  /** @overload parse_into_document(const uint8_t *buf, size_t len, bool realloc_if_needed) */
+  simdjson_inline simdjson_result<element> parse_into_document(document& doc, const std::string &s) & noexcept;
+  simdjson_inline simdjson_result<element> parse_into_document(document& doc, const std::string &s) && =delete;
+  /** @overload parse_into_document(const uint8_t *buf, size_t len, bool realloc_if_needed) */
+  simdjson_inline simdjson_result<element> parse_into_document(document& doc, const padded_string &s) & noexcept;
+  simdjson_inline simdjson_result<element> parse_into_document(document& doc, const padded_string &s) && =delete;
+
+  /** @private We do not want to allow implicit conversion from C string to std::string. */
+  simdjson_inline simdjson_result<element> parse_into_document(document& doc, const char *buf) noexcept = delete;
+
+  /**
+   * Load a file containing many JSON documents.
+   *
+   *   dom::parser parser;
+   *   for (const element doc : parser.load_many(path)) {
+   *     cout << std::string(doc["title"]) << endl;
+   *   }
+   *
+   * The file is loaded in memory and can be safely deleted after the `parser.load_many(path)`
+   * function has returned. The memory is held by the `parser` instance.
+   *
+   * The function is lazy: it may be that no more than one JSON document at a time is parsed.
+   * And, possibly, no document many have been parsed when the `parser.load_many(path)` function
+   * returned.
+   *
+   * ### Format
+   *
+   * The file must contain a series of one or more JSON documents, concatenated into a single
+   * buffer, separated by whitespace. It effectively parses until it has a fully valid document,
+   * then starts parsing the next document at that point. (It does this with more parallelism and
+   * lookahead than you might think, though.)
+   *
+   * Documents that consist of an object or array may omit the whitespace between them, concatenating
+   * with no separator. documents that consist of a single primitive (i.e. documents that are not
+   * arrays or objects) MUST be separated with whitespace.
+   *
+   * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse.
+   * Setting batch_size to excessively large or excesively small values may impact negatively the
+   * performance.
+   *
+   * ### Error Handling
+   *
+   * All errors are returned during iteration: if there is a global error such as memory allocation,
+   * it will be yielded as the first result. Iteration always stops after the first error.
+   *
+   * As with all other simdjson methods, non-exception error handling is readily available through
+   * the same interface, requiring you to check the error before using the document:
+   *
+   *   dom::parser parser;
+   *   dom::document_stream docs;
+   *   auto error = parser.load_many(path).get(docs);
+   *   if (error) { cerr << error << endl; exit(1); }
+   *   for (auto doc : docs) {
+   *     std::string_view title;
+   *     if ((error = doc["title"].get(title)) { cerr << error << endl; exit(1); }
+   *     cout << title << endl;
+   *   }
+   *
+   * ### Threads
+   *
+   * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the
+   * hood to do some lookahead.
+   *
+   * ### Parser Capacity
+   *
+   * If the parser's current capacity is less than batch_size, it will allocate enough capacity
+   * to handle it (up to max_capacity).
+   *
+   * @param path File name pointing at the concatenated JSON to parse.
+   * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet
+   *                   spot is cache-related: small enough to fit in cache, yet big enough to
+   *                   parse as many documents as possible in one tight loop.
+   *                   Defaults to 1MB (as simdjson::dom::DEFAULT_BATCH_SIZE), which has been a reasonable sweet
+   *                   spot in our tests.
+   *                   If you set the batch_size to a value smaller than simdjson::dom::MINIMAL_BATCH_SIZE
+   *                   (currently 32B), it will be replaced by simdjson::dom::MINIMAL_BATCH_SIZE.
+   * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors:
+   *         - IO_ERROR if there was an error opening or reading the file.
+   *         - MEMALLOC if the parser does not have enough capacity and memory allocation fails.
+   *         - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity.
+   *         - other json errors if parsing fails. You should not rely on these errors to always the same for the
+   *           same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware).
+   */
+  inline simdjson_result<document_stream> load_many(const std::string &path, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept;
+
+  /**
+   * Parse a buffer containing many JSON documents.
+   *
+   *   dom::parser parser;
+   *   for (element doc : parser.parse_many(buf, len)) {
+   *     cout << std::string(doc["title"]) << endl;
+   *   }
+   *
+   * No copy of the input buffer is made.
+   *
+   * The function is lazy: it may be that no more than one JSON document at a time is parsed.
+   * And, possibly, no document many have been parsed when the `parser.load_many(path)` function
+   * returned.
+   *
+   * The caller is responsabile to ensure that the input string data remains unchanged and is
+   * not deleted during the loop. In particular, the following is unsafe and will not compile:
+   *
+   *   auto docs = parser.parse_many("[\"temporary data\"]"_padded);
+   *   // here the string "[\"temporary data\"]" may no longer exist in memory
+   *   // the parser instance may not have even accessed the input yet
+   *   for (element doc : docs) {
+   *     cout << std::string(doc["title"]) << endl;
+   *   }
+   *
+   * The following is safe:
+   *
+   *   auto json = "[\"temporary data\"]"_padded;
+   *   auto docs = parser.parse_many(json);
+   *   for (element doc : docs) {
+   *     cout << std::string(doc["title"]) << endl;
+   *   }
+   *
+   * ### Format
+   *
+   * The buffer must contain a series of one or more JSON documents, concatenated into a single
+   * buffer, separated by whitespace. It effectively parses until it has a fully valid document,
+   * then starts parsing the next document at that point. (It does this with more parallelism and
+   * lookahead than you might think, though.)
+   *
+   * documents that consist of an object or array may omit the whitespace between them, concatenating
+   * with no separator. documents that consist of a single primitive (i.e. documents that are not
+   * arrays or objects) MUST be separated with whitespace.
+   *
+   * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse.
+   * Setting batch_size to excessively large or excesively small values may impact negatively the
+   * performance.
+   *
+   * ### Error Handling
+   *
+   * All errors are returned during iteration: if there is a global error such as memory allocation,
+   * it will be yielded as the first result. Iteration always stops after the first error.
+   *
+   * As with all other simdjson methods, non-exception error handling is readily available through
+   * the same interface, requiring you to check the error before using the document:
+   *
+   *   dom::parser parser;
+   *   dom::document_stream docs;
+   *   auto error = parser.load_many(path).get(docs);
+   *   if (error) { cerr << error << endl; exit(1); }
+   *   for (auto doc : docs) {
+   *     std::string_view title;
+   *     if ((error = doc["title"].get(title)) { cerr << error << endl; exit(1); }
+   *     cout << title << endl;
+   *   }
+   *
+   * ### REQUIRED: Buffer Padding
+   *
+   * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
+   * those bytes are initialized to, as long as they are allocated.
+   *
+   * ### Threads
+   *
+   * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the
+   * hood to do some lookahead.
+   *
+   * ### Parser Capacity
+   *
+   * If the parser's current capacity is less than batch_size, it will allocate enough capacity
+   * to handle it (up to max_capacity).
+   *
+   * @param buf The concatenated JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes.
+   * @param len The length of the concatenated JSON.
+   * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet
+   *                   spot is cache-related: small enough to fit in cache, yet big enough to
+   *                   parse as many documents as possible in one tight loop.
+   *                   Defaults to 10MB, which has been a reasonable sweet spot in our tests.
+   * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors:
+   *         - MEMALLOC if the parser does not have enough capacity and memory allocation fails
+   *         - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity.
+   *         - other json errors if parsing fails. You should not rely on these errors to always the same for the
+   *           same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware).
+   */
+  inline simdjson_result<document_stream> parse_many(const uint8_t *buf, size_t len, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept;
+  /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */
+  inline simdjson_result<document_stream> parse_many(const char *buf, size_t len, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept;
+  /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */
+  inline simdjson_result<document_stream> parse_many(const std::string &s, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept;
+  inline simdjson_result<document_stream> parse_many(const std::string &&s, size_t batch_size) = delete;// unsafe
+  /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */
+  inline simdjson_result<document_stream> parse_many(const padded_string &s, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept;
+  inline simdjson_result<document_stream> parse_many(const padded_string &&s, size_t batch_size) = delete;// unsafe
+
+  /** @private We do not want to allow implicit conversion from C string to std::string. */
+  simdjson_result<document_stream> parse_many(const char *buf, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept = delete;
+
+  /**
+   * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length
+   * and `max_depth` depth.
+   *
+   * @param capacity The new capacity.
+   * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH.
+   * @return The error, if there is one.
+   */
+  simdjson_warn_unused inline error_code allocate(size_t capacity, size_t max_depth = DEFAULT_MAX_DEPTH) noexcept;
+
+#ifndef SIMDJSON_DISABLE_DEPRECATED_API
+  /**
+   * @private deprecated because it returns bool instead of error_code, which is our standard for
+   * failures. Use allocate() instead.
+   *
+   * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length
+   * and `max_depth` depth.
+   *
+   * @param capacity The new capacity.
+   * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH.
+   * @return true if successful, false if allocation failed.
+   */
+  [[deprecated("Use allocate() instead.")]]
+  simdjson_warn_unused inline bool allocate_capacity(size_t capacity, size_t max_depth = DEFAULT_MAX_DEPTH) noexcept;
+#endif // SIMDJSON_DISABLE_DEPRECATED_API
+  /**
+   * The largest document this parser can support without reallocating.
+   *
+   * @return Current capacity, in bytes.
+   */
+  simdjson_inline size_t capacity() const noexcept;
+
+  /**
+   * The largest document this parser can automatically support.
+   *
+   * The parser may reallocate internal buffers as needed up to this amount.
+   *
+   * @return Maximum capacity, in bytes.
+   */
+  simdjson_inline size_t max_capacity() const noexcept;
+
+  /**
+   * The maximum level of nested object and arrays supported by this parser.
+   *
+   * @return Maximum depth, in bytes.
+   */
+  simdjson_inline size_t max_depth() const noexcept;
+
+  /**
+   * Set max_capacity. This is the largest document this parser can automatically support.
+   *
+   * The parser may reallocate internal buffers as needed up to this amount as documents are passed
+   * to it.
+   *
+   * Note: To avoid limiting the memory to an absurd value, such as zero or two bytes,
+   * iff you try to set max_capacity to a value lower than MINIMAL_DOCUMENT_CAPACITY,
+   * then the maximal capacity is set to MINIMAL_DOCUMENT_CAPACITY.
+   *
+   * This call will not allocate or deallocate, even if capacity is currently above max_capacity.
+   *
+   * @param max_capacity The new maximum capacity, in bytes.
+   */
+  simdjson_inline void set_max_capacity(size_t max_capacity) noexcept;
+
+#ifdef SIMDJSON_THREADS_ENABLED
+  /**
+   * The parser instance can use threads when they are available to speed up some
+   * operations. It is enabled by default. Changing this attribute will change the
+   * behavior of the parser for future operations.
+   */
+  bool threaded{true};
+#endif
+  /** @private Use the new DOM API instead */
+  class Iterator;
+  /** @private Use simdjson_error instead */
+  using InvalidJSON [[deprecated("Use simdjson_error instead")]] = simdjson_error;
+
+  /** @private [for benchmarking access] The implementation to use */
+  std::unique_ptr<internal::dom_parser_implementation> implementation{};
+
+  /** @private Use `if (parser.parse(...).error())` instead */
+  bool valid{false};
+  /** @private Use `parser.parse(...).error()` instead */
+  error_code error{UNINITIALIZED};
+
+  /** @private Use `parser.parse(...).value()` instead */
+  document doc{};
+
+  /** @private returns true if the document parsed was valid */
+  [[deprecated("Use the result of parser.parse() instead")]]
+  inline bool is_valid() const noexcept;
+
+  /**
+   * @private return an error code corresponding to the last parsing attempt, see
+   * simdjson.h will return UNINITIALIZED if no parsing was attempted
+   */
+  [[deprecated("Use the result of parser.parse() instead")]]
+  inline int get_error_code() const noexcept;
+
+  /** @private return the string equivalent of "get_error_code" */
+  [[deprecated("Use error_message() on the result of parser.parse() instead, or cout << error")]]
+  inline std::string get_error_message() const noexcept;
+
+  /** @private */
+  [[deprecated("Use cout << on the result of parser.parse() instead")]]
+  inline bool print_json(std::ostream &os) const noexcept;
+
+  /** @private Private and deprecated: use `parser.parse(...).doc.dump_raw_tape()` instead */
+  inline bool dump_raw_tape(std::ostream &os) const noexcept;
+
+
+private:
+  /**
+   * The maximum document length this parser will automatically support.
+   *
+   * The parser will not be automatically allocated above this amount.
+   */
+  size_t _max_capacity;
+
+  /**
+   * The loaded buffer (reused each time load() is called)
+   */
+  std::unique_ptr<char[]> loaded_bytes;
+
+  /** Capacity of loaded_bytes buffer. */
+  size_t _loaded_bytes_capacity{0};
+
+  // all nodes are stored on the doc.tape using a 64-bit word.
+  //
+  // strings, double and ints are stored as
+  //  a 64-bit word with a pointer to the actual value
+  //
+  //
+  //
+  // for objects or arrays, store [ or {  at the beginning and } and ] at the
+  // end. For the openings ([ or {), we annotate them with a reference to the
+  // location on the doc.tape of the end, and for then closings (} and ]), we
+  // annotate them with a reference to the location of the opening
+  //
+  //
+
+  /**
+   * Ensure we have enough capacity to handle at least desired_capacity bytes,
+   * and auto-allocate if not. This also allocates memory if needed in the
+   * internal document.
+   */
+  inline error_code ensure_capacity(size_t desired_capacity) noexcept;
+  /**
+   * Ensure we have enough capacity to handle at least desired_capacity bytes,
+   * and auto-allocate if not. This also allocates memory if needed in the
+   * provided document.
+   */
+  inline error_code ensure_capacity(document& doc, size_t desired_capacity) noexcept;
+
+  /** Read the file into loaded_bytes */
+  inline simdjson_result<size_t> read_file(const std::string &path) noexcept;
+
+  friend class parser::Iterator;
+  friend class document_stream;
+
+
+}; // class parser
+
+} // namespace dom
+} // namespace simdjson
+
+#endif // SIMDJSON_DOM_PARSER_H
+/* end file include/simdjson/dom/parser.h */
+#ifdef SIMDJSON_THREADS_ENABLED
+#include <thread>
+#include <mutex>
+#include <condition_variable>
+#endif
+
+namespace simdjson {
+namespace dom {
+
+
+#ifdef SIMDJSON_THREADS_ENABLED
+/** @private Custom worker class **/
+struct stage1_worker {
+  stage1_worker() noexcept = default;
+  stage1_worker(const stage1_worker&) = delete;
+  stage1_worker(stage1_worker&&) = delete;
+  stage1_worker operator=(const stage1_worker&) = delete;
+  ~stage1_worker();
+  /**
+   * We only start the thread when it is needed, not at object construction, this may throw.
+   * You should only call this once.
+   **/
+  void start_thread();
+  /**
+   * Start a stage 1 job. You should first call 'run', then 'finish'.
+   * You must call start_thread once before.
+   */
+  void run(document_stream * ds, dom::parser * stage1, size_t next_batch_start);
+  /** Wait for the run to finish (blocking). You should first call 'run', then 'finish'. **/
+  void finish();
+
+private:
+
+  /**
+   * Normally, we would never stop the thread. But we do in the destructor.
+   * This function is only safe assuming that you are not waiting for results. You
+   * should have called run, then finish, and be done.
+   **/
+  void stop_thread();
+
+  std::thread thread{};
+  /** These three variables define the work done by the thread. **/
+  dom::parser * stage1_thread_parser{};
+  size_t _next_batch_start{};
+  document_stream * owner{};
+  /**
+   * We have two state variables. This could be streamlined to one variable in the future but
+   * we use two for clarity.
+   */
+  bool has_work{false};
+  bool can_work{true};
+
+  /**
+   * We lock using a mutex.
+   */
+  std::mutex locking_mutex{};
+  std::condition_variable cond_var{};
+};
+#endif
+
+/**
+ * A forward-only stream of documents.
+ *
+ * Produced by parser::parse_many.
+ *
+ */
+class document_stream {
+public:
+  /**
+   * Construct an uninitialized document_stream.
+   *
+   *  ```c++
+   *  document_stream docs;
+   *  error = parser.parse_many(json).get(docs);
+   *  ```
+   */
+  simdjson_inline document_stream() noexcept;
+  /** Move one document_stream to another. */
+  simdjson_inline document_stream(document_stream &&other) noexcept = default;
+  /** Move one document_stream to another. */
+  simdjson_inline document_stream &operator=(document_stream &&other) noexcept = default;
+
+  simdjson_inline ~document_stream() noexcept;
+  /**
+   * Returns the input size in bytes.
+   */
+  inline size_t size_in_bytes() const noexcept;
+  /**
+   * After iterating through the stream, this method
+   * returns the number of bytes that were not parsed at the end
+   * of the stream. If truncated_bytes() differs from zero,
+   * then the input was truncated maybe because incomplete JSON
+   * documents were found at the end of the stream. You
+   * may need to process the bytes in the interval [size_in_bytes()-truncated_bytes(), size_in_bytes()).
+   *
+   * You should only call truncated_bytes() after streaming through all
+   * documents, like so:
+   *
+   *   document_stream stream = parser.parse_many(json,window);
+   *   for(auto doc : stream) {
+   *      // do something with doc
+   *   }
+   *   size_t truncated = stream.truncated_bytes();
+   *
+   */
+  inline size_t truncated_bytes() const noexcept;
+  /**
+   * An iterator through a forward-only stream of documents.
+   */
+  class iterator {
+  public:
+    using value_type = simdjson_result<element>;
+    using reference  = value_type;
+
+    using difference_type   = std::ptrdiff_t;
+
+    using iterator_category = std::input_iterator_tag;
+
+    /**
+     * Default constructor.
+     */
+    simdjson_inline iterator() noexcept;
+    /**
+     * Get the current document (or error).
+     */
+    simdjson_inline reference operator*() noexcept;
+    /**
+     * Advance to the next document (prefix).
+     */
+    inline iterator& operator++() noexcept;
+    /**
+     * Check if we're at the end yet.
+     * @param other the end iterator to compare to.
+     */
+    simdjson_inline bool operator!=(const iterator &other) const noexcept;
+    /**
+     * @private
+     *
+     * Gives the current index in the input document in bytes.
+     *
+     *   document_stream stream = parser.parse_many(json,window);
+     *   for(auto i = stream.begin(); i != stream.end(); ++i) {
+     *      auto doc = *i;
+     *      size_t index = i.current_index();
+     *   }
+     *
+     * This function (current_index()) is experimental and the usage
+     * may change in future versions of simdjson: we find the API somewhat
+     * awkward and we would like to offer something friendlier.
+     */
+     simdjson_inline size_t current_index() const noexcept;
+    /**
+     * @private
+     *
+     * Gives a view of the current document.
+     *
+     *   document_stream stream = parser.parse_many(json,window);
+     *   for(auto i = stream.begin(); i != stream.end(); ++i) {
+     *      auto doc = *i;
+     *      std::string_view v = i->source();
+     *   }
+     *
+     * The returned string_view instance is simply a map to the (unparsed)
+     * source string: it may thus include white-space characters and all manner
+     * of padding.
+     *
+     * This function (source()) is experimental and the usage
+     * may change in future versions of simdjson: we find the API somewhat
+     * awkward and we would like to offer something friendlier.
+     */
+     simdjson_inline std::string_view source() const noexcept;
+
+  private:
+    simdjson_inline iterator(document_stream *s, bool finished) noexcept;
+    /** The document_stream we're iterating through. */
+    document_stream* stream;
+    /** Whether we're finished or not. */
+    bool finished;
+    friend class document_stream;
+  };
+
+  /**
+   * Start iterating the documents in the stream.
+   */
+  simdjson_inline iterator begin() noexcept;
+  /**
+   * The end of the stream, for iterator comparison purposes.
+   */
+  simdjson_inline iterator end() noexcept;
+
+private:
+
+  document_stream &operator=(const document_stream &) = delete; // Disallow copying
+  document_stream(const document_stream &other) = delete; // Disallow copying
+
+  /**
+   * Construct a document_stream. Does not allocate or parse anything until the iterator is
+   * used.
+   *
+   * @param parser is a reference to the parser instance used to generate this document_stream
+   * @param buf is the raw byte buffer we need to process
+   * @param len is the length of the raw byte buffer in bytes
+   * @param batch_size is the size of the windows (must be strictly greater or equal to the largest JSON document)
+   */
+  simdjson_inline document_stream(
+    dom::parser &parser,
+    const uint8_t *buf,
+    size_t len,
+    size_t batch_size
+  ) noexcept;
+
+  /**
+   * Parse the first document in the buffer. Used by begin(), to handle allocation and
+   * initialization.
+   */
+  inline void start() noexcept;
+
+  /**
+   * Parse the next document found in the buffer previously given to document_stream.
+   *
+   * The content should be a valid JSON document encoded as UTF-8. If there is a
+   * UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are
+   * discouraged.
+   *
+   * You do NOT need to pre-allocate a parser.  This function takes care of
+   * pre-allocating a capacity defined by the batch_size defined when creating the
+   * document_stream object.
+   *
+   * The function returns simdjson::EMPTY if there is no more data to be parsed.
+   *
+   * The function returns simdjson::SUCCESS (as integer = 0) in case of success
+   * and indicates that the buffer has successfully been parsed to the end.
+   * Every document it contained has been parsed without error.
+   *
+   * The function returns an error code from simdjson/simdjson.h in case of failure
+   * such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth;
+   * the simdjson::error_message function converts these error codes into a string).
+   *
+   * You can also check validity by calling parser.is_valid(). The same parser can
+   * and should be reused for the other documents in the buffer.
+   */
+  inline void next() noexcept;
+
+  /**
+   * Pass the next batch through stage 1 and return when finished.
+   * When threads are enabled, this may wait for the stage 1 thread to finish.
+   */
+  inline void load_batch() noexcept;
+
+  /** Get the next document index. */
+  inline size_t next_batch_start() const noexcept;
+
+  /** Pass the next batch through stage 1 with the given parser. */
+  inline error_code run_stage1(dom::parser &p, size_t batch_start) noexcept;
+
+  dom::parser *parser;
+  const uint8_t *buf;
+  size_t len;
+  size_t batch_size;
+  /** The error (or lack thereof) from the current document. */
+  error_code error;
+  size_t batch_start{0};
+  size_t doc_index{};
+#ifdef SIMDJSON_THREADS_ENABLED
+  /** Indicates whether we use threads. Note that this needs to be a constant during the execution of the parsing. */
+  bool use_thread;
+
+  inline void load_from_stage1_thread() noexcept;
+
+  /** Start a thread to run stage 1 on the next batch. */
+  inline void start_stage1_thread() noexcept;
+
+  /** Wait for the stage 1 thread to finish and capture the results. */
+  inline void finish_stage1_thread() noexcept;
+
+  /** The error returned from the stage 1 thread. */
+  error_code stage1_thread_error{UNINITIALIZED};
+  /** The thread used to run stage 1 against the next batch in the background. */
+  friend struct stage1_worker;
+  std::unique_ptr<stage1_worker> worker{new(std::nothrow) stage1_worker()};
+  /**
+   * The parser used to run stage 1 in the background. Will be swapped
+   * with the regular parser when finished.
+   */
+  dom::parser stage1_thread_parser{};
+#endif // SIMDJSON_THREADS_ENABLED
+
+  friend class dom::parser;
+  friend struct simdjson_result<dom::document_stream>;
+  friend struct internal::simdjson_result_base<dom::document_stream>;
+
+}; // class document_stream
+
+} // namespace dom
+
+template<>
+struct simdjson_result<dom::document_stream> : public internal::simdjson_result_base<dom::document_stream> {
+public:
+  simdjson_inline simdjson_result() noexcept; ///< @private
+  simdjson_inline simdjson_result(error_code error) noexcept; ///< @private
+  simdjson_inline simdjson_result(dom::document_stream &&value) noexcept; ///< @private
+
+#if SIMDJSON_EXCEPTIONS
+  simdjson_inline dom::document_stream::iterator begin() noexcept(false);
+  simdjson_inline dom::document_stream::iterator end() noexcept(false);
+#else // SIMDJSON_EXCEPTIONS
+#ifndef SIMDJSON_DISABLE_DEPRECATED_API
+  [[deprecated("parse_many() and load_many() may return errors. Use document_stream stream; error = parser.parse_many().get(doc); instead.")]]
+  simdjson_inline dom::document_stream::iterator begin() noexcept;
+  [[deprecated("parse_many() and load_many() may return errors. Use document_stream stream; error = parser.parse_many().get(doc); instead.")]]
+  simdjson_inline dom::document_stream::iterator end() noexcept;
+#endif // SIMDJSON_DISABLE_DEPRECATED_API
+#endif // SIMDJSON_EXCEPTIONS
+}; // struct simdjson_result<dom::document_stream>
+
+} // namespace simdjson
+
+#endif // SIMDJSON_DOCUMENT_STREAM_H
+/* end file include/simdjson/dom/document_stream.h */
+/* begin file include/simdjson/dom/element.h */
+#ifndef SIMDJSON_DOM_ELEMENT_H
+#define SIMDJSON_DOM_ELEMENT_H
+
+#include <ostream>
+
+namespace simdjson {
+namespace internal {
+template<typename T>
+class string_builder;
+}
+namespace dom {
+class array;
+class document;
+class object;
+
+/**
+ * The actual concrete type of a JSON element
+ * This is the type it is most easily cast to with get<>.
+ */
+enum class element_type {
+  ARRAY = '[',     ///< dom::array
+  OBJECT = '{',    ///< dom::object
+  INT64 = 'l',     ///< int64_t
+  UINT64 = 'u',    ///< uint64_t: any integer that fits in uint64_t but *not* int64_t
+  DOUBLE = 'd',    ///< double: Any number with a "." or "e" that fits in double.
+  STRING = '"',    ///< std::string_view
+  BOOL = 't',      ///< bool
+  NULL_VALUE = 'n' ///< null
+};
+
+/**
+ * A JSON element.
+ *
+ * References an element in a JSON document, representing a JSON null, boolean, string, number,
+ * array or object.
+ */
+class element {
+public:
+  /** Create a new, invalid element. */
+  simdjson_inline element() noexcept;
+
+  /** The type of this element. */
+  simdjson_inline element_type type() const noexcept;
+
+  /**
+   * Cast this element to an array.
+   *
+   * @returns An object that can be used to iterate the array, or:
+   *          INCORRECT_TYPE if the JSON element is not an array.
+   */
+  inline simdjson_result<array> get_array() const noexcept;
+  /**
+   * Cast this element to an object.
+   *
+   * @returns An object that can be used to look up or iterate the object's fields, or:
+   *          INCORRECT_TYPE if the JSON element is not an object.
+   */
+  inline simdjson_result<object> get_object() const noexcept;
+  /**
+   * Cast this element to a null-terminated C string.
+   *
+   * The string is guaranteed to be valid UTF-8.
+   *
+   * The length of the string is given by get_string_length(). Because JSON strings
+   * may contain null characters, it may be incorrect to use strlen to determine the
+   * string length.
+   *
+   * It is possible to get a single string_view instance which represents both the string
+   * content and its length: see get_string().
+   *
+   * @returns A pointer to a null-terminated UTF-8 string. This string is stored in the parser and will
+   *          be invalidated the next time it parses a document or when it is destroyed.
+   *          Returns INCORRECT_TYPE if the JSON element is not a string.
+   */
+  inline simdjson_result<const char *> get_c_str() const noexcept;
+  /**
+   * Gives the length in bytes of the string.
+   *
+   * It is possible to get a single string_view instance which represents both the string
+   * content and its length: see get_string().
+   *
+   * @returns A string length in bytes.
+   *          Returns INCORRECT_TYPE if the JSON element is not a string.
+   */
+  inline simdjson_result<size_t> get_string_length() const noexcept;
+  /**
+   * Cast this element to a string.
+   *
+   * The string is guaranteed to be valid UTF-8.
+   *
+   * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next time it
+   *          parses a document or when it is destroyed.
+   *          Returns INCORRECT_TYPE if the JSON element is not a string.
+   */
+  inline simdjson_result<std::string_view> get_string() const noexcept;
+  /**
+   * Cast this element to a signed integer.
+   *
+   * @returns A signed 64-bit integer.
+   *          Returns INCORRECT_TYPE if the JSON element is not an integer, or NUMBER_OUT_OF_RANGE
+   *          if it is negative.
+   */
+  inline simdjson_result<int64_t> get_int64() const noexcept;
+  /**
+   * Cast this element to an unsigned integer.
+   *
+   * @returns An unsigned 64-bit integer.
+   *          Returns INCORRECT_TYPE if the JSON element is not an integer, or NUMBER_OUT_OF_RANGE
+   *          if it is too large.
+   */
+  inline simdjson_result<uint64_t> get_uint64() const noexcept;
+  /**
+   * Cast this element to a double floating-point.
+   *
+   * @returns A double value.
+   *          Returns INCORRECT_TYPE if the JSON element is not a number.
+   */
+  inline simdjson_result<double> get_double() const noexcept;
+  /**
+   * Cast this element to a bool.
+   *
+   * @returns A bool value.
+   *          Returns INCORRECT_TYPE if the JSON element is not a boolean.
+   */
+  inline simdjson_result<bool> get_bool() const noexcept;
+
+  /**
+   * Whether this element is a json array.
+   *
+   * Equivalent to is<array>().
+   */
+  inline bool is_array() const noexcept;
+  /**
+   * Whether this element is a json object.
+   *
+   * Equivalent to is<object>().
+   */
+  inline bool is_object() const noexcept;
+  /**
+   * Whether this element is a json string.
+   *
+   * Equivalent to is<std::string_view>() or is<const char *>().
+   */
+  inline bool is_string() const noexcept;
+  /**
+   * Whether this element is a json number that fits in a signed 64-bit integer.
+   *
+   * Equivalent to is<int64_t>().
+   */
+  inline bool is_int64() const noexcept;
+  /**
+   * Whether this element is a json number that fits in an unsigned 64-bit integer.
+   *
+   * Equivalent to is<uint64_t>().
+   */
+  inline bool is_uint64() const noexcept;
+  /**
+   * Whether this element is a json number that fits in a double.
+   *
+   * Equivalent to is<double>().
+   */
+  inline bool is_double() const noexcept;
+
+  /**
+   * Whether this element is a json number.
+   *
+   * Both integers and floating points will return true.
+   */
+  inline bool is_number() const noexcept;
+
+  /**
+   * Whether this element is a json `true` or `false`.
+   *
+   * Equivalent to is<bool>().
+   */
+  inline bool is_bool() const noexcept;
+  /**
+   * Whether this element is a json `null`.
+   */
+  inline bool is_null() const noexcept;
+
+  /**
+   * Tell whether the value can be cast to provided type (T).
+   *
+   * Supported types:
+   * - Boolean: bool
+   * - Number: double, uint64_t, int64_t
+   * - String: std::string_view, const char *
+   * - Array: dom::array
+   * - Object: dom::object
+   *
+   * @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object
+   */
+  template<typename T>
+  simdjson_inline bool is() const noexcept;
+
+  /**
+   * Get the value as the provided type (T).
+   *
+   * Supported types:
+   * - Boolean: bool
+   * - Number: double, uint64_t, int64_t
+   * - String: std::string_view, const char *
+   * - Array: dom::array
+   * - Object: dom::object
+   *
+   * You may use get_double(), get_bool(), get_uint64(), get_int64(),
+   * get_object(), get_array() or get_string() instead.
+   *
+   * @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object
+   *
+   * @returns The value cast to the given type, or:
+   *          INCORRECT_TYPE if the value cannot be cast to the given type.
+   */
+
+  template<typename T>
+  inline simdjson_result<T> get() const noexcept {
+    // Unless the simdjson library provides an inline implementation, calling this method should
+    // immediately fail.
+    static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library.");
+  }
+
+  /**
+   * Get the value as the provided type (T).
+   *
+   * Supported types:
+   * - Boolean: bool
+   * - Number: double, uint64_t, int64_t
+   * - String: std::string_view, const char *
+   * - Array: dom::array
+   * - Object: dom::object
+   *
+   * @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object
+   *
+   * @param value The variable to set to the value. May not be set if there is an error.
+   *
+   * @returns The error that occurred, or SUCCESS if there was no error.
+   */
+  template<typename T>
+  simdjson_warn_unused simdjson_inline error_code get(T &value) const noexcept;
+
+  /**
+   * Get the value as the provided type (T), setting error if it's not the given type.
+   *
+   * Supported types:
+   * - Boolean: bool
+   * - Number: double, uint64_t, int64_t
+   * - String: std::string_view, const char *
+   * - Array: dom::array
+   * - Object: dom::object
+   *
+   * @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object
+   *
+   * @param value The variable to set to the given type. value is undefined if there is an error.
+   * @param error The variable to store the error. error is set to error_code::SUCCEED if there is an error.
+   */
+  template<typename T>
+  inline void tie(T &value, error_code &error) && noexcept;
+
+#if SIMDJSON_EXCEPTIONS
+  /**
+   * Read this element as a boolean.
+   *
+   * @return The boolean value
+   * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a boolean.
+   */
+  inline operator bool() const noexcept(false);
+
+  /**
+   * Read this element as a null-terminated UTF-8 string.
+   *
+   * Be mindful that JSON allows strings to contain null characters.
+   *
+   * Does *not* convert other types to a string; requires that the JSON type of the element was
+   * an actual string.
+   *
+   * @return The string value.
+   * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a string.
+   */
+  inline explicit operator const char*() const noexcept(false);
+
+  /**
+   * Read this element as a null-terminated UTF-8 string.
+   *
+   * Does *not* convert other types to a string; requires that the JSON type of the element was
+   * an actual string.
+   *
+   * @return The string value.
+   * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a string.
+   */
+  inline operator std::string_view() const noexcept(false);
+
+  /**
+   * Read this element as an unsigned integer.
+   *
+   * @return The integer value.
+   * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an integer
+   * @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits or is negative
+   */
+  inline operator uint64_t() const noexcept(false);
+  /**
+   * Read this element as an signed integer.
+   *
+   * @return The integer value.
+   * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an integer
+   * @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits
+   */
+  inline operator int64_t() const noexcept(false);
+  /**
+   * Read this element as an double.
+   *
+   * @return The double value.
+   * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a number
+   * @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits or is negative
+   */
+  inline operator double() const noexcept(false);
+  /**
+   * Read this element as a JSON array.
+   *
+   * @return The JSON array.
+   * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an array
+   */
+  inline operator array() const noexcept(false);
+  /**
+   * Read this element as a JSON object (key/value pairs).
+   *
+   * @return The JSON object.
+   * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an object
+   */
+  inline operator object() const noexcept(false);
+
+  /**
+   * Iterate over each element in this array.
+   *
+   * @return The beginning of the iteration.
+   * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an array
+   */
+  inline dom::array::iterator begin() const noexcept(false);
+
+  /**
+   * Iterate over each element in this array.
+   *
+   * @return The end of the iteration.
+   * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an array
+   */
+  inline dom::array::iterator end() const noexcept(false);
+#endif // SIMDJSON_EXCEPTIONS
+
+  /**
+   * Get the value associated with the given key.
+   *
+   * The key will be matched against **unescaped** JSON:
+   *
+   *   dom::parser parser;
+   *   int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1
+   *   parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD
+   *
+   * @return The value associated with this field, or:
+   *         - NO_SUCH_FIELD if the field does not exist in the object
+   *         - INCORRECT_TYPE if this is not an object
+   */
+  inline simdjson_result<element> operator[](std::string_view key) const noexcept;
+
+  /**
+   * Get the value associated with the given key.
+   *
+   * The key will be matched against **unescaped** JSON:
+   *
+   *   dom::parser parser;
+   *   int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1
+   *   parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD
+   *
+   * @return The value associated with this field, or:
+   *         - NO_SUCH_FIELD if the field does not exist in the object
+   *         - INCORRECT_TYPE if this is not an object
+   */
+  inline simdjson_result<element> operator[](const char *key) const noexcept;
+
+  /**
+   * Get the value associated with the given JSON pointer.  We use the RFC 6901
+   * https://tools.ietf.org/html/rfc6901 standard.
+   *
+   *   dom::parser parser;
+   *   element doc = parser.parse(R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded);
+   *   doc.at_pointer("/foo/a/1") == 20
+   *   doc.at_pointer("/foo")["a"].at(1) == 20
+   *   doc.at_pointer("")["foo"]["a"].at(1) == 20
+   *
+   * It is allowed for a key to be the empty string:
+   *
+   *   dom::parser parser;
+   *   object obj = parser.parse(R"({ "": { "a": [ 10, 20, 30 ] }})"_padded);
+   *   obj.at_pointer("//a/1") == 20
+   *
+   * @return The value associated with the given JSON pointer, or:
+   *         - NO_SUCH_FIELD if a field does not exist in an object
+   *         - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length
+   *         - INCORRECT_TYPE if a non-integer is used to access an array
+   *         - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed
+   */
+  inline simdjson_result<element> at_pointer(const std::string_view json_pointer) const noexcept;
+
+#ifndef SIMDJSON_DISABLE_DEPRECATED_API
+  /**
+   *
+   * Version 0.4 of simdjson used an incorrect interpretation of the JSON Pointer standard
+   * and allowed the following :
+   *
+   *   dom::parser parser;
+   *   element doc = parser.parse(R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded);
+   *   doc.at("foo/a/1") == 20
+   *
+   * Though it is intuitive, it is not compliant with RFC 6901
+   * https://tools.ietf.org/html/rfc6901
+   *
+   * For standard compliance, use the at_pointer function instead.
+   *
+   * @return The value associated with the given JSON pointer, or:
+   *         - NO_SUCH_FIELD if a field does not exist in an object
+   *         - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length
+   *         - INCORRECT_TYPE if a non-integer is used to access an array
+   *         - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed
+   */
+  [[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]]
+  inline simdjson_result<element> at(const std::string_view json_pointer) const noexcept;
+#endif // SIMDJSON_DISABLE_DEPRECATED_API
+
+  /**
+   * Get the value at the given index.
+   *
+   * @return The value at the given index, or:
+   *         - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length
+   */
+  inline simdjson_result<element> at(size_t index) const noexcept;
+
+  /**
+   * Get the value associated with the given key.
+   *
+   * The key will be matched against **unescaped** JSON:
+   *
+   *   dom::parser parser;
+   *   int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1
+   *   parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD
+   *
+   * @return The value associated with this field, or:
+   *         - NO_SUCH_FIELD if the field does not exist in the object
+   */
+  inline simdjson_result<element> at_key(std::string_view key) const noexcept;
+
+  /**
+   * Get the value associated with the given key in a case-insensitive manner.
+   *
+   * Note: The key will be matched against **unescaped** JSON.
+   *
+   * @return The value associated with this field, or:
+   *         - NO_SUCH_FIELD if the field does not exist in the object
+   */
+  inline simdjson_result<element> at_key_case_insensitive(std::string_view key) const noexcept;
+
+  /** @private for debugging. Prints out the root element. */
+  inline bool dump_raw_tape(std::ostream &out) const noexcept;
+
+private:
+  simdjson_inline element(const internal::tape_ref &tape) noexcept;
+  internal::tape_ref tape;
+  friend class document;
+  friend class object;
+  friend class array;
+  friend struct simdjson_result<element>;
+  template<typename T>
+  friend class simdjson::internal::string_builder;
+
+};
+
+} // namespace dom
+
+/** The result of a JSON navigation that may fail. */
+template<>
+struct simdjson_result<dom::element> : public internal::simdjson_result_base<dom::element> {
+public:
+  simdjson_inline simdjson_result() noexcept; ///< @private
+  simdjson_inline simdjson_result(dom::element &&value) noexcept; ///< @private
+  simdjson_inline simdjson_result(error_code error) noexcept; ///< @private
+
+  simdjson_inline simdjson_result<dom::element_type> type() const noexcept;
+  template<typename T>
+  simdjson_inline bool is() const noexcept;
+  template<typename T>
+  simdjson_inline simdjson_result<T> get() const noexcept;
+  template<typename T>
+  simdjson_warn_unused simdjson_inline error_code get(T &value) const noexcept;
+
+  simdjson_inline simdjson_result<dom::array> get_array() const noexcept;
+  simdjson_inline simdjson_result<dom::object> get_object() const noexcept;
+  simdjson_inline simdjson_result<const char *> get_c_str() const noexcept;
+  simdjson_inline simdjson_result<size_t> get_string_length() const noexcept;
+  simdjson_inline simdjson_result<std::string_view> get_string() const noexcept;
+  simdjson_inline simdjson_result<int64_t> get_int64() const noexcept;
+  simdjson_inline simdjson_result<uint64_t> get_uint64() const noexcept;
+  simdjson_inline simdjson_result<double> get_double() const noexcept;
+  simdjson_inline simdjson_result<bool> get_bool() const noexcept;
+
+  simdjson_inline bool is_array() const noexcept;
+  simdjson_inline bool is_object() const noexcept;
+  simdjson_inline bool is_string() const noexcept;
+  simdjson_inline bool is_int64() const noexcept;
+  simdjson_inline bool is_uint64() const noexcept;
+  simdjson_inline bool is_double() const noexcept;
+  simdjson_inline bool is_number() const noexcept;
+  simdjson_inline bool is_bool() const noexcept;
+  simdjson_inline bool is_null() const noexcept;
+
+  simdjson_inline simdjson_result<dom::element> operator[](std::string_view key) const noexcept;
+  simdjson_inline simdjson_result<dom::element> operator[](const char *key) const noexcept;
+  simdjson_inline simdjson_result<dom::element> at_pointer(const std::string_view json_pointer) const noexcept;
+  [[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]]
+  simdjson_inline simdjson_result<dom::element> at(const std::string_view json_pointer) const noexcept;
+  simdjson_inline simdjson_result<dom::element> at(size_t index) const noexcept;
+  simdjson_inline simdjson_result<dom::element> at_key(std::string_view key) const noexcept;
+  simdjson_inline simdjson_result<dom::element> at_key_case_insensitive(std::string_view key) const noexcept;
+
+#if SIMDJSON_EXCEPTIONS
+  simdjson_inline operator bool() const noexcept(false);
+  simdjson_inline explicit operator const char*() const noexcept(false);
+  simdjson_inline operator std::string_view() const noexcept(false);
+  simdjson_inline operator uint64_t() const noexcept(false);
+  simdjson_inline operator int64_t() const noexcept(false);
+  simdjson_inline operator double() const noexcept(false);
+  simdjson_inline operator dom::array() const noexcept(false);
+  simdjson_inline operator dom::object() const noexcept(false);
+
+  simdjson_inline dom::array::iterator begin() const noexcept(false);
+  simdjson_inline dom::array::iterator end() const noexcept(false);
+#endif // SIMDJSON_EXCEPTIONS
+};
+
+
+} // namespace simdjson
+
+#endif // SIMDJSON_DOM_DOCUMENT_H
+/* end file include/simdjson/dom/element.h */
+/* begin file include/simdjson/dom/object.h */
+#ifndef SIMDJSON_DOM_OBJECT_H
+#define SIMDJSON_DOM_OBJECT_H
+
+
+namespace simdjson {
+namespace internal {
+template<typename T>
+class string_builder;
+}
+namespace dom {
+
+class document;
+class element;
+class key_value_pair;
+
+/**
+ * JSON object.
+ */
+class object {
+public:
+  /** Create a new, invalid object */
+  simdjson_inline object() noexcept;
+
+  class iterator {
+  public:
+    using value_type = key_value_pair;
+    using difference_type = std::ptrdiff_t;
+
+    /**
+     * Get the actual key/value pair
+     */
+    inline const value_type operator*() const noexcept;
+    /**
+     * Get the next key/value pair.
+     *
+     * Part of the std::iterator interface.
+     *
+     */
+    inline iterator& operator++() noexcept;
+    /**
+     * Get the next key/value pair.
+     *
+     * Part of the std::iterator interface.
+     *
+     */
+    inline iterator operator++(int) noexcept;
+    /**
+     * Check if these values come from the same place in the JSON.
+     *
+     * Part of the std::iterator interface.
+     */
+    inline bool operator!=(const iterator& other) const noexcept;
+    inline bool operator==(const iterator& other) const noexcept;
+
+    inline bool operator<(const iterator& other) const noexcept;
+    inline bool operator<=(const iterator& other) const noexcept;
+    inline bool operator>=(const iterator& other) const noexcept;
+    inline bool operator>(const iterator& other) const noexcept;
+    /**
+     * Get the key of this key/value pair.
+     */
+    inline std::string_view key() const noexcept;
+    /**
+     * Get the length (in bytes) of the key in this key/value pair.
+     * You should expect this function to be faster than key().size().
+     */
+    inline uint32_t key_length() const noexcept;
+    /**
+     * Returns true if the key in this key/value pair is equal
+     * to the provided string_view.
+     */
+    inline bool key_equals(std::string_view o) const noexcept;
+    /**
+     * Returns true if the key in this key/value pair is equal
+     * to the provided string_view in a case-insensitive manner.
+     * Case comparisons may only be handled correctly for ASCII strings.
+     */
+    inline bool key_equals_case_insensitive(std::string_view o) const noexcept;
+    /**
+     * Get the key of this key/value pair.
+     */
+    inline const char *key_c_str() const noexcept;
+    /**
+     * Get the value of this key/value pair.
+     */
+    inline element value() const noexcept;
+
+    iterator() noexcept = default;
+    iterator(const iterator&) noexcept = default;
+    iterator& operator=(const iterator&) noexcept = default;
+  private:
+    simdjson_inline iterator(const internal::tape_ref &tape) noexcept;
+
+    internal::tape_ref tape;
+
+    friend class object;
+  };
+
+  /**
+   * Return the first key/value pair.
+   *
+   * Part of the std::iterable interface.
+   */
+  inline iterator begin() const noexcept;
+  /**
+   * One past the last key/value pair.
+   *
+   * Part of the std::iterable interface.
+   */
+  inline iterator end() const noexcept;
+  /**
+   * Get the size of the object (number of keys).
+   * It is a saturated value with a maximum of 0xFFFFFF: if the value
+   * is 0xFFFFFF then the size is 0xFFFFFF or greater.
+   */
+  inline size_t size() const noexcept;
+  /**
+   * Get the value associated with the given key.
+   *
+   * The key will be matched against **unescaped** JSON:
+   *
+   *   dom::parser parser;
+   *   int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1
+   *   parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD
+   *
+   * This function has linear-time complexity: the keys are checked one by one.
+   *
+   * @return The value associated with this field, or:
+   *         - NO_SUCH_FIELD if the field does not exist in the object
+   *         - INCORRECT_TYPE if this is not an object
+   */
+  inline simdjson_result<element> operator[](std::string_view key) const noexcept;
+
+  /**
+   * Get the value associated with the given key.
+   *
+   * The key will be matched against **unescaped** JSON:
+   *
+   *   dom::parser parser;
+   *   int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1
+   *   parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD
+   *
+   * This function has linear-time complexity: the keys are checked one by one.
+   *
+   * @return The value associated with this field, or:
+   *         - NO_SUCH_FIELD if the field does not exist in the object
+   *         - INCORRECT_TYPE if this is not an object
+   */
+  inline simdjson_result<element> operator[](const char *key) const noexcept;
+
+  /**
+   * Get the value associated with the given JSON pointer. We use the RFC 6901
+   * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node
+   * as the root of its own JSON document.
+   *
+   *   dom::parser parser;
+   *   object obj = parser.parse(R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded);
+   *   obj.at_pointer("/foo/a/1") == 20
+   *   obj.at_pointer("/foo")["a"].at(1) == 20
+   *
+   * It is allowed for a key to be the empty string:
+   *
+   *   dom::parser parser;
+   *   object obj = parser.parse(R"({ "": { "a": [ 10, 20, 30 ] }})"_padded);
+   *   obj.at_pointer("//a/1") == 20
+   *   obj.at_pointer("/")["a"].at(1) == 20
+   *
+   * @return The value associated with the given JSON pointer, or:
+   *         - NO_SUCH_FIELD if a field does not exist in an object
+   *         - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length
+   *         - INCORRECT_TYPE if a non-integer is used to access an array
+   *         - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed
+   */
+  inline simdjson_result<element> at_pointer(std::string_view json_pointer) const noexcept;
+
+  /**
+   * Get the value associated with the given key.
+   *
+   * The key will be matched against **unescaped** JSON:
+   *
+   *   dom::parser parser;
+   *   int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1
+   *   parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD
+   *
+   * This function has linear-time complexity: the keys are checked one by one.
+   *
+   * @return The value associated with this field, or:
+   *         - NO_SUCH_FIELD if the field does not exist in the object
+   */
+  inline simdjson_result<element> at_key(std::string_view key) const noexcept;
+
+  /**
+   * Get the value associated with the given key in a case-insensitive manner.
+   * It is only guaranteed to work over ASCII inputs.
+   *
+   * Note: The key will be matched against **unescaped** JSON.
+   *
+   * This function has linear-time complexity: the keys are checked one by one.
+   *
+   * @return The value associated with this field, or:
+   *         - NO_SUCH_FIELD if the field does not exist in the object
+   */
+  inline simdjson_result<element> at_key_case_insensitive(std::string_view key) const noexcept;
+
+private:
+  simdjson_inline object(const internal::tape_ref &tape) noexcept;
+
+  internal::tape_ref tape;
+
+  friend class element;
+  friend struct simdjson_result<element>;
+  template<typename T>
+  friend class simdjson::internal::string_builder;
+};
+
+/**
+ * Key/value pair in an object.
+ */
+class key_value_pair {
+public:
+  /** key in the key-value pair **/
+  std::string_view key;
+  /** value in the key-value pair **/
+  element value;
+
+private:
+  simdjson_inline key_value_pair(std::string_view _key, element _value) noexcept;
+  friend class object;
+};
+
+} // namespace dom
+
+/** The result of a JSON conversion that may fail. */
+template<>
+struct simdjson_result<dom::object> : public internal::simdjson_result_base<dom::object> {
+public:
+  simdjson_inline simdjson_result() noexcept; ///< @private
+  simdjson_inline simdjson_result(dom::object value) noexcept; ///< @private
+  simdjson_inline simdjson_result(error_code error) noexcept; ///< @private
+
+  inline simdjson_result<dom::element> operator[](std::string_view key) const noexcept;
+  inline simdjson_result<dom::element> operator[](const char *key) const noexcept;
+  inline simdjson_result<dom::element> at_pointer(std::string_view json_pointer) const noexcept;
+  inline simdjson_result<dom::element> at_key(std::string_view key) const noexcept;
+  inline simdjson_result<dom::element> at_key_case_insensitive(std::string_view key) const noexcept;
+
+#if SIMDJSON_EXCEPTIONS
+  inline dom::object::iterator begin() const noexcept(false);
+  inline dom::object::iterator end() const noexcept(false);
+  inline size_t size() const noexcept(false);
+#endif // SIMDJSON_EXCEPTIONS
+};
+
+} // namespace simdjson
+
+#if defined(__cpp_lib_ranges)
+#include <ranges>
+
+namespace std {
+namespace ranges {
+template<>
+inline constexpr bool enable_view<simdjson::dom::object> = true;
+#if SIMDJSON_EXCEPTIONS
+template<>
+inline constexpr bool enable_view<simdjson::simdjson_result<simdjson::dom::object>> = true;
+#endif // SIMDJSON_EXCEPTIONS
+} // namespace ranges
+} // namespace std
+#endif // defined(__cpp_lib_ranges)
+
+#endif // SIMDJSON_DOM_OBJECT_H
+/* end file include/simdjson/dom/object.h */
+/* begin file include/simdjson/dom/serialization.h */
+#ifndef SIMDJSON_SERIALIZATION_H
+#define SIMDJSON_SERIALIZATION_H
+
+#include <vector>
+
+namespace simdjson {
+
+/**
+ * The string_builder template and mini_formatter class
+ * are not part of  our public API and are subject to change
+ * at any time!
+ */
+namespace internal {
+
+class mini_formatter;
+
+/**
+ * @private The string_builder template allows us to construct
+ * a string from a document element. It is parametrized
+ * by a "formatter" which handles the details. Thus
+ * the string_builder template could support both minification
+ * and prettification, and various other tradeoffs.
+ */
+template <class formatter = mini_formatter>
+class string_builder {
+public:
+  /** Construct an initially empty builder, would print the empty string **/
+  string_builder() = default;
+  /** Append an element to the builder (to be printed) **/
+  inline void append(simdjson::dom::element value);
+  /** Append an array to the builder (to be printed) **/
+  inline void append(simdjson::dom::array value);
+  /** Append an object to the builder (to be printed) **/
+  inline void append(simdjson::dom::object value);
+  /** Reset the builder (so that it would print the empty string) **/
+  simdjson_inline void clear();
+  /**
+   * Get access to the string. The string_view is owned by the builder
+   * and it is invalid to use it after the string_builder has been
+   * destroyed.
+   * However you can make a copy of the string_view on memory that you
+   * own.
+   */
+  simdjson_inline std::string_view str() const;
+  /** Append a key_value_pair to the builder (to be printed) **/
+  simdjson_inline void append(simdjson::dom::key_value_pair value);
+private:
+  formatter format{};
+};
+
+/**
+ * @private This is the class that we expect to use with the string_builder
+ * template. It tries to produce a compact version of the JSON element
+ * as quickly as possible.
+ */
+class mini_formatter {
+public:
+  mini_formatter() = default;
+  /** Add a comma **/
+  simdjson_inline void comma();
+  /** Start an array, prints [ **/
+  simdjson_inline void start_array();
+  /** End an array, prints ] **/
+  simdjson_inline void end_array();
+  /** Start an array, prints { **/
+  simdjson_inline void start_object();
+  /** Start an array, prints } **/
+  simdjson_inline void end_object();
+  /** Prints a true **/
+  simdjson_inline void true_atom();
+  /** Prints a false **/
+  simdjson_inline void false_atom();
+  /** Prints a null **/
+  simdjson_inline void null_atom();
+  /** Prints a number **/
+  simdjson_inline void number(int64_t x);
+  /** Prints a number **/
+  simdjson_inline void number(uint64_t x);
+  /** Prints a number **/
+  simdjson_inline void number(double x);
+  /** Prints a key (string + colon) **/
+  simdjson_inline void key(std::string_view unescaped);
+  /** Prints a string. The string is escaped as needed. **/
+  simdjson_inline void string(std::string_view unescaped);
+  /** Clears out the content. **/
+  simdjson_inline void clear();
+  /**
+   * Get access to the buffer, it is owned by the instance, but
+   * the user can make a copy.
+   **/
+  simdjson_inline std::string_view str() const;
+
+private:
+  // implementation details (subject to change)
+  /** Prints one character **/
+  simdjson_inline void one_char(char c);
+  /** Backing buffer **/
+  std::vector<char> buffer{}; // not ideal!
+};
+
+} // internal
+
+namespace dom {
+
+/**
+ * Print JSON to an output stream.
+ *
+ * @param out The output stream.
+ * @param value The element.
+ * @throw if there is an error with the underlying output stream. simdjson itself will not throw.
+ */
+inline std::ostream& operator<<(std::ostream& out, simdjson::dom::element value) {
+    simdjson::internal::string_builder<> sb;
+    sb.append(value);
+    return (out << sb.str());
+}
+#if SIMDJSON_EXCEPTIONS
+inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<simdjson::dom::element> x) {
+    if (x.error()) { throw simdjson::simdjson_error(x.error()); }
+    return (out << x.value());
+}
+#endif
+/**
+ * Print JSON to an output stream.
+ *
+ * @param out The output stream.
+ * @param value The array.
+ * @throw if there is an error with the underlying output stream. simdjson itself will not throw.
+ */
+inline std::ostream& operator<<(std::ostream& out, simdjson::dom::array value)  {
+    simdjson::internal::string_builder<> sb;
+    sb.append(value);
+    return (out << sb.str());
+}
+#if SIMDJSON_EXCEPTIONS
+inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<simdjson::dom::array> x) {
+    if (x.error()) { throw simdjson::simdjson_error(x.error()); }
+    return (out << x.value());
+}
+#endif
+/**
+ * Print JSON to an output stream.
+ *
+ * @param out The output stream.
+ * @param value The object.
+ * @throw if there is an error with the underlying output stream. simdjson itself will not throw.
+ */
+inline std::ostream& operator<<(std::ostream& out, simdjson::dom::object value)   {
+    simdjson::internal::string_builder<> sb;
+    sb.append(value);
+    return (out << sb.str());
+}
+#if SIMDJSON_EXCEPTIONS
+inline std::ostream& operator<<(std::ostream& out,  simdjson::simdjson_result<simdjson::dom::object> x) {
+    if (x.error()) { throw  simdjson::simdjson_error(x.error()); }
+    return (out << x.value());
+}
+#endif
+} // namespace dom
+
+/**
+ * Converts JSON to a string.
+ *
+ *   dom::parser parser;
+ *   element doc = parser.parse("   [ 1 , 2 , 3 ] "_padded);
+ *   cout << to_string(doc) << endl; // prints [1,2,3]
+ *
+ */
+template <class T>
+std::string to_string(T x)   {
+    // in C++, to_string is standard: http://www.cplusplus.com/reference/string/to_string/
+    // Currently minify and to_string are identical but in the future, they may
+    // differ.
+    simdjson::internal::string_builder<> sb;
+    sb.append(x);
+    std::string_view answer = sb.str();
+    return std::string(answer.data(), answer.size());
+}
+#if SIMDJSON_EXCEPTIONS
+template <class T>
+std::string to_string(simdjson_result<T> x) {
+    if (x.error()) { throw simdjson_error(x.error()); }
+    return to_string(x.value());
+}
+#endif
+
+/**
+ * Minifies a JSON element or document, printing the smallest possible valid JSON.
+ *
+ *   dom::parser parser;
+ *   element doc = parser.parse("   [ 1 , 2 , 3 ] "_padded);
+ *   cout << minify(doc) << endl; // prints [1,2,3]
+ *
+ */
+template <class T>
+std::string minify(T x)  {
+  return to_string(x);
+}
+
+#if SIMDJSON_EXCEPTIONS
+template <class T>
+std::string minify(simdjson_result<T> x) {
+    if (x.error()) { throw simdjson_error(x.error()); }
+    return to_string(x.value());
+}
+#endif
+
+
+} // namespace simdjson
+
+
+#endif
+/* end file include/simdjson/dom/serialization.h */
+
+// Deprecated API
+/* begin file include/simdjson/dom/jsonparser.h */
+// TODO Remove this -- deprecated API and files
+
+#ifndef SIMDJSON_DOM_JSONPARSER_H
+#define SIMDJSON_DOM_JSONPARSER_H
+
+/* begin file include/simdjson/dom/parsedjson.h */
+// TODO Remove this -- deprecated API and files
+
+#ifndef SIMDJSON_DOM_PARSEDJSON_H
+#define SIMDJSON_DOM_PARSEDJSON_H
+
+
+namespace simdjson {
+
+/**
+ * @deprecated Use `dom::parser` instead.
+ */
+using ParsedJson [[deprecated("Use dom::parser instead")]] = dom::parser;
+
+} // namespace simdjson
+
+#endif // SIMDJSON_DOM_PARSEDJSON_H
+/* end file include/simdjson/dom/parsedjson.h */
+/* begin file include/simdjson/jsonioutil.h */
+#ifndef SIMDJSON_JSONIOUTIL_H
+#define SIMDJSON_JSONIOUTIL_H
+
+
+namespace simdjson {
+
+#if SIMDJSON_EXCEPTIONS
+#ifndef SIMDJSON_DISABLE_DEPRECATED_API
+[[deprecated("Use padded_string::load() instead")]]
+inline padded_string get_corpus(const char *path) {
+  return padded_string::load(path);
+}
+#endif // SIMDJSON_DISABLE_DEPRECATED_API
+#endif // SIMDJSON_EXCEPTIONS
+
+} // namespace simdjson
+
+#endif // SIMDJSON_JSONIOUTIL_H
+/* end file include/simdjson/jsonioutil.h */
+
+namespace simdjson {
+
+//
+// C API (json_parse and build_parsed_json) declarations
+//
+
+#ifndef SIMDJSON_DISABLE_DEPRECATED_API
+[[deprecated("Use parser.parse() instead")]]
+inline int json_parse(const uint8_t *buf, size_t len, dom::parser &parser, bool realloc_if_needed = true) noexcept {
+  error_code code = parser.parse(buf, len, realloc_if_needed).error();
+  // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid
+  // bits in the parser instead of heeding the result code. The normal parser unsets those in
+  // anticipation of making the error code ephemeral.
+  // Here we put the code back into the parser, until we've removed this method.
+  parser.valid = code == SUCCESS;
+  parser.error = code;
+  return code;
+}
+[[deprecated("Use parser.parse() instead")]]
+inline int json_parse(const char *buf, size_t len, dom::parser &parser, bool realloc_if_needed = true) noexcept {
+  error_code code = parser.parse(buf, len, realloc_if_needed).error();
+  // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid
+  // bits in the parser instead of heeding the result code. The normal parser unsets those in
+  // anticipation of making the error code ephemeral.
+  // Here we put the code back into the parser, until we've removed this method.
+  parser.valid = code == SUCCESS;
+  parser.error = code;
+  return code;
+}
+[[deprecated("Use parser.parse() instead")]]
+inline int json_parse(const std::string &s, dom::parser &parser, bool realloc_if_needed = true) noexcept {
+  error_code code = parser.parse(s.data(), s.length(), realloc_if_needed).error();
+  // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid
+  // bits in the parser instead of heeding the result code. The normal parser unsets those in
+  // anticipation of making the error code ephemeral.
+  // Here we put the code back into the parser, until we've removed this method.
+  parser.valid = code == SUCCESS;
+  parser.error = code;
+  return code;
+}
+[[deprecated("Use parser.parse() instead")]]
+inline int json_parse(const padded_string &s, dom::parser &parser) noexcept {
+  error_code code = parser.parse(s).error();
+  // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid
+  // bits in the parser instead of heeding the result code. The normal parser unsets those in
+  // anticipation of making the error code ephemeral.
+  // Here we put the code back into the parser, until we've removed this method.
+  parser.valid = code == SUCCESS;
+  parser.error = code;
+  return code;
+}
+
+[[deprecated("Use parser.parse() instead")]]
+simdjson_warn_unused inline dom::parser build_parsed_json(const uint8_t *buf, size_t len, bool realloc_if_needed = true) noexcept {
+  dom::parser parser;
+  error_code code = parser.parse(buf, len, realloc_if_needed).error();
+  // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid
+  // bits in the parser instead of heeding the result code. The normal parser unsets those in
+  // anticipation of making the error code ephemeral.
+  // Here we put the code back into the parser, until we've removed this method.
+  parser.valid = code == SUCCESS;
+  parser.error = code;
+  return parser;
+}
+[[deprecated("Use parser.parse() instead")]]
+simdjson_warn_unused inline dom::parser build_parsed_json(const char *buf, size_t len, bool realloc_if_needed = true) noexcept {
+  dom::parser parser;
+  error_code code = parser.parse(buf, len, realloc_if_needed).error();
+  // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid
+  // bits in the parser instead of heeding the result code. The normal parser unsets those in
+  // anticipation of making the error code ephemeral.
+  // Here we put the code back into the parser, until we've removed this method.
+  parser.valid = code == SUCCESS;
+  parser.error = code;
+  return parser;
+}
+[[deprecated("Use parser.parse() instead")]]
+simdjson_warn_unused inline dom::parser build_parsed_json(const std::string &s, bool realloc_if_needed = true) noexcept {
+  dom::parser parser;
+  error_code code = parser.parse(s.data(), s.length(), realloc_if_needed).error();
+  // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid
+  // bits in the parser instead of heeding the result code. The normal parser unsets those in
+  // anticipation of making the error code ephemeral.
+  // Here we put the code back into the parser, until we've removed this method.
+  parser.valid = code == SUCCESS;
+  parser.error = code;
+  return parser;
+}
+[[deprecated("Use parser.parse() instead")]]
+simdjson_warn_unused inline dom::parser build_parsed_json(const padded_string &s) noexcept {
+  dom::parser parser;
+  error_code code = parser.parse(s).error();
+  // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid
+  // bits in the parser instead of heeding the result code. The normal parser unsets those in
+  // anticipation of making the error code ephemeral.
+  // Here we put the code back into the parser, until we've removed this method.
+  parser.valid = code == SUCCESS;
+  parser.error = code;
+  return parser;
+}
+#endif // SIMDJSON_DISABLE_DEPRECATED_API
+
+/** @private We do not want to allow implicit conversion from C string to std::string. */
+int json_parse(const char *buf, dom::parser &parser) noexcept = delete;
+/** @private We do not want to allow implicit conversion from C string to std::string. */
+dom::parser build_parsed_json(const char *buf) noexcept = delete;
+
+} // namespace simdjson
+
+#endif // SIMDJSON_DOM_JSONPARSER_H
+/* end file include/simdjson/dom/jsonparser.h */
+/* begin file include/simdjson/dom/parsedjson_iterator.h */
+// TODO Remove this -- deprecated API and files
+
+#ifndef SIMDJSON_DOM_PARSEDJSON_ITERATOR_H
+#define SIMDJSON_DOM_PARSEDJSON_ITERATOR_H
+
+#include <cstring>
+#include <string>
+#include <ostream>
+#include <iterator>
+#include <limits>
+#include <stdexcept>
+
+/* begin file include/simdjson/internal/jsonformatutils.h */
+#ifndef SIMDJSON_INTERNAL_JSONFORMATUTILS_H
+#define SIMDJSON_INTERNAL_JSONFORMATUTILS_H
+
+#include <iomanip>
+#include <ostream>
+#include <sstream>
+
+namespace simdjson {
+namespace internal {
+
+class escape_json_string;
+
+inline std::ostream& operator<<(std::ostream& out, const escape_json_string &str);
+
+class escape_json_string {
+public:
+  escape_json_string(std::string_view _str) noexcept : str{_str} {}
+  operator std::string() const noexcept { std::stringstream s; s << *this; return s.str(); }
+private:
+  std::string_view str;
+  friend std::ostream& operator<<(std::ostream& out, const escape_json_string &unescaped);
+};
+
+inline std::ostream& operator<<(std::ostream& out, const escape_json_string &unescaped) {
+  for (size_t i=0; i<unescaped.str.length(); i++) {
+    switch (unescaped.str[i]) {
+    case '\b':
+      out << "\\b";
+      break;
+    case '\f':
+      out << "\\f";
+      break;
+    case '\n':
+      out << "\\n";
+      break;
+    case '\r':
+      out << "\\r";
+      break;
+    case '\"':
+      out << "\\\"";
+      break;
+    case '\t':
+      out << "\\t";
+      break;
+    case '\\':
+      out << "\\\\";
+      break;
+    default:
+      if (static_cast<unsigned char>(unescaped.str[i]) <= 0x1F) {
+        // TODO can this be done once at the beginning, or will it mess up << char?
+        std::ios::fmtflags f(out.flags());
+        out << "\\u" << std::hex << std::setw(4) << std::setfill('0') << int(unescaped.str[i]);
+        out.flags(f);
+      } else {
+        out << unescaped.str[i];
+      }
+    }
+  }
+  return out;
+}
+
+} // namespace internal
+} // namespace simdjson
+
+#endif // SIMDJSON_INTERNAL_JSONFORMATUTILS_H
+/* end file include/simdjson/internal/jsonformatutils.h */
+
+#ifndef SIMDJSON_DISABLE_DEPRECATED_API
+
+namespace simdjson {
+/** @private **/
+class [[deprecated("Use the new DOM navigation API instead (see doc/basics.md)")]] dom::parser::Iterator {
+public:
+  inline Iterator(const dom::parser &parser) noexcept(false);
+  inline Iterator(const Iterator &o) noexcept;
+  inline ~Iterator() noexcept;
+
+  inline Iterator& operator=(const Iterator&) = delete;
+
+  inline bool is_ok() const;
+
+  // useful for debugging purposes
+  inline size_t get_tape_location() const;
+
+  // useful for debugging purposes
+  inline size_t get_tape_length() const;
+
+  // returns the current depth (start at 1 with 0 reserved for the fictitious
+  // root node)
+  inline size_t get_depth() const;
+
+  // A scope is a series of nodes at the same depth, typically it is either an
+  // object ({) or an array ([). The root node has type 'r'.
+  inline uint8_t get_scope_type() const;
+
+  // move forward in document order
+  inline bool move_forward();
+
+  // retrieve the character code of what we're looking at:
+  // [{"slutfn are the possibilities
+  inline uint8_t get_type() const {
+      return current_type; // short functions should be inlined!
+  }
+
+  // get the int64_t value at this node; valid only if get_type is "l"
+  inline int64_t get_integer() const {
+      if (location + 1 >= tape_length) {
+      return 0; // default value in case of error
+      }
+      return static_cast<int64_t>(doc.tape[location + 1]);
+  }
+
+  // get the value as uint64; valid only if  if get_type is "u"
+  inline uint64_t get_unsigned_integer() const {
+      if (location + 1 >= tape_length) {
+      return 0; // default value in case of error
+      }
+      return doc.tape[location + 1];
+  }
+
+  // get the string value at this node (NULL ended); valid only if get_type is "
+  // note that tabs, and line endings are escaped in the returned value (see
+  // print_with_escapes) return value is valid UTF-8, it may contain NULL chars
+  // within the string: get_string_length determines the true string length.
+  inline const char *get_string() const {
+      return reinterpret_cast<const char *>(
+          doc.string_buf.get() + (current_val & internal::JSON_VALUE_MASK) + sizeof(uint32_t));
+  }
+
+  // return the length of the string in bytes
+  inline uint32_t get_string_length() const {
+      uint32_t answer;
+      std::memcpy(&answer,
+          reinterpret_cast<const char *>(doc.string_buf.get() +
+                                          (current_val & internal::JSON_VALUE_MASK)),
+          sizeof(uint32_t));
+      return answer;
+  }
+
+  // get the double value at this node; valid only if
+  // get_type() is "d"
+  inline double get_double() const {
+      if (location + 1 >= tape_length) {
+      return std::numeric_limits<double>::quiet_NaN(); // default value in
+                                                      // case of error
+      }
+      double answer;
+      std::memcpy(&answer, &doc.tape[location + 1], sizeof(answer));
+      return answer;
+  }
+
+  inline bool is_object_or_array() const { return is_object() || is_array(); }
+
+  inline bool is_object() const { return get_type() == '{'; }
+
+  inline bool is_array() const { return get_type() == '['; }
+
+  inline bool is_string() const { return get_type() == '"'; }
+
+  // Returns true if the current type of the node is an signed integer.
+  // You can get its value with `get_integer()`.
+  inline bool is_integer() const { return get_type() == 'l'; }
+
+  // Returns true if the current type of the node is an unsigned integer.
+  // You can get its value with `get_unsigned_integer()`.
+  //
+  // NOTE:
+  // Only a large value, which is out of range of a 64-bit signed integer, is
+  // represented internally as an unsigned node. On the other hand, a typical
+  // positive integer, such as 1, 42, or 1000000, is as a signed node.
+  // Be aware this function returns false for a signed node.
+  inline bool is_unsigned_integer() const { return get_type() == 'u'; }
+  // Returns true if the current type of the node is a double floating-point number.
+  inline bool is_double() const { return get_type() == 'd'; }
+  // Returns true if the current type of the node is a number (integer or floating-point).
+  inline bool is_number() const {
+      return is_integer() || is_unsigned_integer() || is_double();
+  }
+  // Returns true if the current type of the node is a bool with true value.
+  inline bool is_true() const { return get_type() == 't'; }
+  // Returns true if the current type of the node is a bool with false value.
+  inline bool is_false() const { return get_type() == 'f'; }
+  // Returns true if the current type of the node is null.
+  inline bool is_null() const { return get_type() == 'n'; }
+  // Returns true if the type byte represents an object of an array
+  static bool is_object_or_array(uint8_t type) {
+      return ((type == '[') || (type == '{'));
+  }
+
+  // when at {, go one level deep, looking for a given key
+  // if successful, we are left pointing at the value,
+  // if not, we are still pointing at the object ({)
+  // (in case of repeated keys, this only finds the first one).
+  // We seek the key using C's strcmp so if your JSON strings contain
+  // NULL chars, this would trigger a false positive: if you expect that
+  // to be the case, take extra precautions.
+  // Furthermore, we do the comparison character-by-character
+  // without taking into account Unicode equivalence.
+  inline bool move_to_key(const char *key);
+
+  // as above, but case insensitive lookup (strcmpi instead of strcmp)
+  inline bool move_to_key_insensitive(const char *key);
+
+  // when at {, go one level deep, looking for a given key
+  // if successful, we are left pointing at the value,
+  // if not, we are still pointing at the object ({)
+  // (in case of repeated keys, this only finds the first one).
+  // The string we search for can contain NULL values.
+  // Furthermore, we do the comparison character-by-character
+  // without taking into account Unicode equivalence.
+  inline bool move_to_key(const char *key, uint32_t length);
+
+  // when at a key location within an object, this moves to the accompanying
+  // value (located next to it). This is equivalent but much faster than
+  // calling "next()".
+  inline void move_to_value();
+
+  // when at [, go one level deep, and advance to the given index.
+  // if successful, we are left pointing at the value,
+  // if not, we are still pointing at the array ([)
+  inline bool move_to_index(uint32_t index);
+
+  // Moves the iterator to the value corresponding to the json pointer.
+  // Always search from the root of the document.
+  // if successful, we are left pointing at the value,
+  // if not, we are still pointing the same value we were pointing before the
+  // call. The json pointer follows the rfc6901 standard's syntax:
+  // https://tools.ietf.org/html/rfc6901 However, the standard says "If a
+  // referenced member name is not unique in an object, the member that is
+  // referenced is undefined, and evaluation fails". Here we just return the
+  // first corresponding value. The length parameter is the length of the
+  // jsonpointer string ('pointer').
+  inline bool move_to(const char *pointer, uint32_t length);
+
+  // Moves the iterator to the value corresponding to the json pointer.
+  // Always search from the root of the document.
+  // if successful, we are left pointing at the value,
+  // if not, we are still pointing the same value we were pointing before the
+  // call. The json pointer implementation follows the rfc6901 standard's
+  // syntax: https://tools.ietf.org/html/rfc6901 However, the standard says
+  // "If a referenced member name is not unique in an object, the member that
+  // is referenced is undefined, and evaluation fails". Here we just return
+  // the first corresponding value.
+  inline bool move_to(const std::string &pointer) {
+      return move_to(pointer.c_str(), uint32_t(pointer.length()));
+  }
+
+  private:
+  // Almost the same as move_to(), except it searches from the current
+  // position. The pointer's syntax is identical, though that case is not
+  // handled by the rfc6901 standard. The '/' is still required at the
+  // beginning. However, contrary to move_to(), the URI Fragment Identifier
+  // Representation is not supported here. Also, in case of failure, we are
+  // left pointing at the closest value it could reach. For these reasons it
+  // is private. It exists because it is used by move_to().
+  inline bool relative_move_to(const char *pointer, uint32_t length);
+
+  public:
+  // throughout return true if we can do the navigation, false
+  // otherwise
+
+  // Within a given scope (series of nodes at the same depth within either an
+  // array or an object), we move forward.
+  // Thus, given [true, null, {"a":1}, [1,2]], we would visit true, null, {
+  // and [. At the object ({) or at the array ([), you can issue a "down" to
+  // visit their content. valid if we're not at the end of a scope (returns
+  // true).
+  inline bool next();
+
+  // Within a given scope (series of nodes at the same depth within either an
+  // array or an object), we move backward.
+  // Thus, given [true, null, {"a":1}, [1,2]], we would visit ], }, null, true
+  // when starting at the end of the scope. At the object ({) or at the array
+  // ([), you can issue a "down" to visit their content.
+  // Performance warning: This function is implemented by starting again
+  // from the beginning of the scope and scanning forward. You should expect
+  // it to be relatively slow.
+  inline bool prev();
+
+  // Moves back to either the containing array or object (type { or [) from
+  // within a contained scope.
+  // Valid unless we are at the first level of the document
+  inline bool up();
+
+  // Valid if we're at a [ or { and it starts a non-empty scope; moves us to
+  // start of that deeper scope if it not empty. Thus, given [true, null,
+  // {"a":1}, [1,2]], if we are at the { node, we would move to the "a" node.
+  inline bool down();
+
+  // move us to the start of our current scope,
+  // a scope is a series of nodes at the same level
+  inline void to_start_scope();
+
+  inline void rewind() {
+      while (up())
+      ;
+  }
+
+
+
+  // print the node we are currently pointing at
+  inline bool print(std::ostream &os, bool escape_strings = true) const;
+
+  private:
+  const document &doc;
+  size_t max_depth{};
+  size_t depth{};
+  size_t location{}; // our current location on a tape
+  size_t tape_length{};
+  uint8_t current_type{};
+  uint64_t current_val{};
+  typedef struct {
+      size_t start_of_scope;
+      uint8_t scope_type;
+  } scopeindex_t;
+
+  scopeindex_t *depth_index{};
+};
+
+} // namespace simdjson
+#endif // SIMDJSON_DISABLE_DEPRECATED_API
+
+#endif // SIMDJSON_DOM_PARSEDJSON_ITERATOR_H
+/* end file include/simdjson/dom/parsedjson_iterator.h */
+
+// Inline functions
+/* begin file include/simdjson/dom/array-inl.h */
+#ifndef SIMDJSON_INLINE_ARRAY_H
+#define SIMDJSON_INLINE_ARRAY_H
+
+// Inline implementations go in here.
+
+#include <utility>
+
+namespace simdjson {
+
+//
+// simdjson_result<dom::array> inline implementation
+//
+simdjson_inline simdjson_result<dom::array>::simdjson_result() noexcept
+    : internal::simdjson_result_base<dom::array>() {}
+simdjson_inline simdjson_result<dom::array>::simdjson_result(dom::array value) noexcept
+    : internal::simdjson_result_base<dom::array>(std::forward<dom::array>(value)) {}
+simdjson_inline simdjson_result<dom::array>::simdjson_result(error_code error) noexcept
+    : internal::simdjson_result_base<dom::array>(error) {}
+
+#if SIMDJSON_EXCEPTIONS
+
+inline dom::array::iterator simdjson_result<dom::array>::begin() const noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first.begin();
+}
+inline dom::array::iterator simdjson_result<dom::array>::end() const noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first.end();
+}
+inline size_t simdjson_result<dom::array>::size() const noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first.size();
+}
+
+#endif // SIMDJSON_EXCEPTIONS
+
+inline simdjson_result<dom::element> simdjson_result<dom::array>::at_pointer(std::string_view json_pointer) const noexcept {
+  if (error()) { return error(); }
+  return first.at_pointer(json_pointer);
+}
+inline simdjson_result<dom::element> simdjson_result<dom::array>::at(size_t index) const noexcept {
+  if (error()) { return error(); }
+  return first.at(index);
+}
+
+namespace dom {
+
+//
+// array inline implementation
+//
+simdjson_inline array::array() noexcept : tape{} {}
+simdjson_inline array::array(const internal::tape_ref &_tape) noexcept : tape{_tape} {}
+inline array::iterator array::begin() const noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  // issue https://github.com/simdjson/simdjson/issues/1914
+  assert (tape.usable()); // the default constructed array is invalid
+#endif
+  return internal::tape_ref(tape.doc, tape.json_index + 1);
+}
+inline array::iterator array::end() const noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  // issue https://github.com/simdjson/simdjson/issues/1914
+  assert (tape.usable()); // the default constructed array is invalid
+#endif
+  return internal::tape_ref(tape.doc, tape.after_element() - 1);
+}
+inline size_t array::size() const noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  // issue https://github.com/simdjson/simdjson/issues/1914
+  assert (tape.usable()); // the default constructed array is invalid
+#endif
+  return tape.scope_count();
+}
+inline size_t array::number_of_slots() const noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  // issue https://github.com/simdjson/simdjson/issues/1914
+  assert (tape.usable()); // the default constructed array is invalid
+#endif
+  return tape.matching_brace_index() - tape.json_index;
+}
+inline simdjson_result<element> array::at_pointer(std::string_view json_pointer) const noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  // issue https://github.com/simdjson/simdjson/issues/1914
+  assert (tape.usable()); // the default constructed array is invalid
+#endif
+  if(json_pointer.empty()) { // an empty string means that we return the current node
+      return element(this->tape); // copy the current node
+  } else if(json_pointer[0] != '/') { // otherwise there is an error
+      return INVALID_JSON_POINTER;
+  }
+  json_pointer = json_pointer.substr(1);
+  // - means "the append position" or "the element after the end of the array"
+  // We don't support this, because we're returning a real element, not a position.
+  if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; }
+
+  // Read the array index
+  size_t array_index = 0;
+  size_t i;
+  for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) {
+    uint8_t digit = uint8_t(json_pointer[i] - '0');
+    // Check for non-digit in array index. If it's there, we're trying to get a field in an object
+    if (digit > 9) { return INCORRECT_TYPE; }
+    array_index = array_index*10 + digit;
+  }
+
+  // 0 followed by other digits is invalid
+  if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0"
+
+  // Empty string is invalid; so is a "/" with no digits before it
+  if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index"
+
+  // Get the child
+  auto child = array(tape).at(array_index);
+  // If there is an error, it ends here
+  if(child.error()) {
+    return child;
+  }
+  // If there is a /, we're not done yet, call recursively.
+  if (i < json_pointer.length()) {
+    child = child.at_pointer(json_pointer.substr(i));
+  }
+  return child;
+}
+
+inline simdjson_result<element> array::at(size_t index) const noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  // issue https://github.com/simdjson/simdjson/issues/1914
+  assert (tape.usable()); // the default constructed array is invalid
+#endif
+  size_t i=0;
+  for (auto element : *this) {
+    if (i == index) { return element; }
+    i++;
+  }
+  return INDEX_OUT_OF_BOUNDS;
+}
+
+//
+// array::iterator inline implementation
+//
+simdjson_inline array::iterator::iterator(const internal::tape_ref &_tape) noexcept : tape{_tape} { }
+inline element array::iterator::operator*() const noexcept {
+  return element(tape);
+}
+inline array::iterator& array::iterator::operator++() noexcept {
+  tape.json_index = tape.after_element();
+  return *this;
+}
+inline array::iterator array::iterator::operator++(int) noexcept {
+  array::iterator out = *this;
+  ++*this;
+  return out;
+}
+inline bool array::iterator::operator!=(const array::iterator& other) const noexcept {
+  return tape.json_index != other.tape.json_index;
+}
+inline bool array::iterator::operator==(const array::iterator& other) const noexcept {
+  return tape.json_index == other.tape.json_index;
+}
+inline bool array::iterator::operator<(const array::iterator& other) const noexcept {
+  return tape.json_index < other.tape.json_index;
+}
+inline bool array::iterator::operator<=(const array::iterator& other) const noexcept {
+  return tape.json_index <= other.tape.json_index;
+}
+inline bool array::iterator::operator>=(const array::iterator& other) const noexcept {
+  return tape.json_index >= other.tape.json_index;
+}
+inline bool array::iterator::operator>(const array::iterator& other) const noexcept {
+  return tape.json_index > other.tape.json_index;
+}
+
+} // namespace dom
+
+
+} // namespace simdjson
+
+/* begin file include/simdjson/dom/element-inl.h */
+#ifndef SIMDJSON_INLINE_ELEMENT_H
+#define SIMDJSON_INLINE_ELEMENT_H
+
+#include <cstring>
+#include <utility>
+
+namespace simdjson {
+
+//
+// simdjson_result<dom::element> inline implementation
+//
+simdjson_inline simdjson_result<dom::element>::simdjson_result() noexcept
+    : internal::simdjson_result_base<dom::element>() {}
+simdjson_inline simdjson_result<dom::element>::simdjson_result(dom::element &&value) noexcept
+    : internal::simdjson_result_base<dom::element>(std::forward<dom::element>(value)) {}
+simdjson_inline simdjson_result<dom::element>::simdjson_result(error_code error) noexcept
+    : internal::simdjson_result_base<dom::element>(error) {}
+inline simdjson_result<dom::element_type> simdjson_result<dom::element>::type() const noexcept {
+  if (error()) { return error(); }
+  return first.type();
+}
+
+template<typename T>
+simdjson_inline bool simdjson_result<dom::element>::is() const noexcept {
+  return !error() && first.is<T>();
+}
+template<typename T>
+simdjson_inline simdjson_result<T> simdjson_result<dom::element>::get() const noexcept {
+  if (error()) { return error(); }
+  return first.get<T>();
+}
+template<typename T>
+simdjson_warn_unused simdjson_inline error_code simdjson_result<dom::element>::get(T &value) const noexcept {
+  if (error()) { return error(); }
+  return first.get<T>(value);
+}
+
+simdjson_inline simdjson_result<dom::array> simdjson_result<dom::element>::get_array() const noexcept {
+  if (error()) { return error(); }
+  return first.get_array();
+}
+simdjson_inline simdjson_result<dom::object> simdjson_result<dom::element>::get_object() const noexcept {
+  if (error()) { return error(); }
+  return first.get_object();
+}
+simdjson_inline simdjson_result<const char *> simdjson_result<dom::element>::get_c_str() const noexcept {
+  if (error()) { return error(); }
+  return first.get_c_str();
+}
+simdjson_inline simdjson_result<size_t> simdjson_result<dom::element>::get_string_length() const noexcept {
+  if (error()) { return error(); }
+  return first.get_string_length();
+}
+simdjson_inline simdjson_result<std::string_view> simdjson_result<dom::element>::get_string() const noexcept {
+  if (error()) { return error(); }
+  return first.get_string();
+}
+simdjson_inline simdjson_result<int64_t> simdjson_result<dom::element>::get_int64() const noexcept {
+  if (error()) { return error(); }
+  return first.get_int64();
+}
+simdjson_inline simdjson_result<uint64_t> simdjson_result<dom::element>::get_uint64() const noexcept {
+  if (error()) { return error(); }
+  return first.get_uint64();
+}
+simdjson_inline simdjson_result<double> simdjson_result<dom::element>::get_double() const noexcept {
+  if (error()) { return error(); }
+  return first.get_double();
+}
+simdjson_inline simdjson_result<bool> simdjson_result<dom::element>::get_bool() const noexcept {
+  if (error()) { return error(); }
+  return first.get_bool();
+}
+
+simdjson_inline bool simdjson_result<dom::element>::is_array() const noexcept {
+  return !error() && first.is_array();
+}
+simdjson_inline bool simdjson_result<dom::element>::is_object() const noexcept {
+  return !error() && first.is_object();
+}
+simdjson_inline bool simdjson_result<dom::element>::is_string() const noexcept {
+  return !error() && first.is_string();
+}
+simdjson_inline bool simdjson_result<dom::element>::is_int64() const noexcept {
+  return !error() && first.is_int64();
+}
+simdjson_inline bool simdjson_result<dom::element>::is_uint64() const noexcept {
+  return !error() && first.is_uint64();
+}
+simdjson_inline bool simdjson_result<dom::element>::is_double() const noexcept {
+  return !error() && first.is_double();
+}
+simdjson_inline bool simdjson_result<dom::element>::is_number() const noexcept {
+  return !error() && first.is_number();
+}
+simdjson_inline bool simdjson_result<dom::element>::is_bool() const noexcept {
+  return !error() && first.is_bool();
+}
+
+simdjson_inline bool simdjson_result<dom::element>::is_null() const noexcept {
+  return !error() && first.is_null();
+}
+
+simdjson_inline simdjson_result<dom::element> simdjson_result<dom::element>::operator[](std::string_view key) const noexcept {
+  if (error()) { return error(); }
+  return first[key];
+}
+simdjson_inline simdjson_result<dom::element> simdjson_result<dom::element>::operator[](const char *key) const noexcept {
+  if (error()) { return error(); }
+  return first[key];
+}
+simdjson_inline simdjson_result<dom::element> simdjson_result<dom::element>::at_pointer(const std::string_view json_pointer) const noexcept {
+  if (error()) { return error(); }
+  return first.at_pointer(json_pointer);
+}
+#ifndef SIMDJSON_DISABLE_DEPRECATED_API
+[[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]]
+simdjson_inline simdjson_result<dom::element> simdjson_result<dom::element>::at(const std::string_view json_pointer) const noexcept {
+SIMDJSON_PUSH_DISABLE_WARNINGS
+SIMDJSON_DISABLE_DEPRECATED_WARNING
+  if (error()) { return error(); }
+  return first.at(json_pointer);
+SIMDJSON_POP_DISABLE_WARNINGS
+}
+#endif // SIMDJSON_DISABLE_DEPRECATED_API
+simdjson_inline simdjson_result<dom::element> simdjson_result<dom::element>::at(size_t index) const noexcept {
+  if (error()) { return error(); }
+  return first.at(index);
+}
+simdjson_inline simdjson_result<dom::element> simdjson_result<dom::element>::at_key(std::string_view key) const noexcept {
+  if (error()) { return error(); }
+  return first.at_key(key);
+}
+simdjson_inline simdjson_result<dom::element> simdjson_result<dom::element>::at_key_case_insensitive(std::string_view key) const noexcept {
+  if (error()) { return error(); }
+  return first.at_key_case_insensitive(key);
+}
+
+#if SIMDJSON_EXCEPTIONS
+
+simdjson_inline simdjson_result<dom::element>::operator bool() const noexcept(false) {
+  return get<bool>();
+}
+simdjson_inline simdjson_result<dom::element>::operator const char *() const noexcept(false) {
+  return get<const char *>();
+}
+simdjson_inline simdjson_result<dom::element>::operator std::string_view() const noexcept(false) {
+  return get<std::string_view>();
+}
+simdjson_inline simdjson_result<dom::element>::operator uint64_t() const noexcept(false) {
+  return get<uint64_t>();
+}
+simdjson_inline simdjson_result<dom::element>::operator int64_t() const noexcept(false) {
+  return get<int64_t>();
+}
+simdjson_inline simdjson_result<dom::element>::operator double() const noexcept(false) {
+  return get<double>();
+}
+simdjson_inline simdjson_result<dom::element>::operator dom::array() const noexcept(false) {
+  return get<dom::array>();
+}
+simdjson_inline simdjson_result<dom::element>::operator dom::object() const noexcept(false) {
+  return get<dom::object>();
+}
+
+simdjson_inline dom::array::iterator simdjson_result<dom::element>::begin() const noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first.begin();
+}
+simdjson_inline dom::array::iterator simdjson_result<dom::element>::end() const noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first.end();
+}
+
+#endif // SIMDJSON_EXCEPTIONS
+
+namespace dom {
+
+//
+// element inline implementation
+//
+simdjson_inline element::element() noexcept : tape{} {}
+simdjson_inline element::element(const internal::tape_ref &_tape) noexcept : tape{_tape} { }
+
+inline element_type element::type() const noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  // issue https://github.com/simdjson/simdjson/issues/1914
+  assert (tape.usable()); // the default constructed element is invalid
+#endif
+  auto tape_type = tape.tape_ref_type();
+  return tape_type == internal::tape_type::FALSE_VALUE ? element_type::BOOL : static_cast<element_type>(tape_type);
+}
+
+inline simdjson_result<bool> element::get_bool() const noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  // issue https://github.com/simdjson/simdjson/issues/1914
+  assert (tape.usable()); // the default constructed element is invalid
+#endif
+  if(tape.is_true()) {
+    return true;
+  } else if(tape.is_false()) {
+    return false;
+  }
+  return INCORRECT_TYPE;
+}
+inline simdjson_result<const char *> element::get_c_str() const noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  // issue https://github.com/simdjson/simdjson/issues/1914
+  assert (tape.usable()); // the default constructed element is invalid
+#endif
+  switch (tape.tape_ref_type()) {
+    case internal::tape_type::STRING: {
+      return tape.get_c_str();
+    }
+    default:
+      return INCORRECT_TYPE;
+  }
+}
+inline simdjson_result<size_t> element::get_string_length() const noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  // issue https://github.com/simdjson/simdjson/issues/1914
+  assert (tape.usable()); // the default constructed element is invalid
+#endif
+  switch (tape.tape_ref_type()) {
+    case internal::tape_type::STRING: {
+      return tape.get_string_length();
+    }
+    default:
+      return INCORRECT_TYPE;
+  }
+}
+inline simdjson_result<std::string_view> element::get_string() const noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  // issue https://github.com/simdjson/simdjson/issues/1914
+  assert (tape.usable()); // the default constructed element is invalid
+#endif
+  switch (tape.tape_ref_type()) {
+    case internal::tape_type::STRING:
+      return tape.get_string_view();
+    default:
+      return INCORRECT_TYPE;
+  }
+}
+inline simdjson_result<uint64_t> element::get_uint64() const noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  // issue https://github.com/simdjson/simdjson/issues/1914
+  assert (tape.usable()); // the default constructed element is invalid
+#endif
+  if(simdjson_unlikely(!tape.is_uint64())) { // branch rarely taken
+    if(tape.is_int64()) {
+      int64_t result = tape.next_tape_value<int64_t>();
+      if (result < 0) {
+        return NUMBER_OUT_OF_RANGE;
+      }
+      return uint64_t(result);
+    }
+    return INCORRECT_TYPE;
+  }
+  return tape.next_tape_value<int64_t>();
+}
+inline simdjson_result<int64_t> element::get_int64() const noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  // issue https://github.com/simdjson/simdjson/issues/1914
+  assert (tape.usable()); // the default constructed element is invalid
+#endif
+  if(simdjson_unlikely(!tape.is_int64())) { // branch rarely taken
+    if(tape.is_uint64()) {
+      uint64_t result = tape.next_tape_value<uint64_t>();
+      // Wrapping max in parens to handle Windows issue: https://stackoverflow.com/questions/11544073/how-do-i-deal-with-the-max-macro-in-windows-h-colliding-with-max-in-std
+      if (result > uint64_t((std::numeric_limits<int64_t>::max)())) {
+        return NUMBER_OUT_OF_RANGE;
+      }
+      return static_cast<int64_t>(result);
+    }
+    return INCORRECT_TYPE;
+  }
+  return tape.next_tape_value<int64_t>();
+}
+inline simdjson_result<double> element::get_double() const noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  // issue https://github.com/simdjson/simdjson/issues/1914
+  assert (tape.usable()); // the default constructed element is invalid
+#endif
+  // Performance considerations:
+  // 1. Querying tape_ref_type() implies doing a shift, it is fast to just do a straight
+  //   comparison.
+  // 2. Using a switch-case relies on the compiler guessing what kind of code generation
+  //    we want... But the compiler cannot know that we expect the type to be "double"
+  //    most of the time.
+  // We can expect get<double> to refer to a double type almost all the time.
+  // It is important to craft the code accordingly so that the compiler can use this
+  // information. (This could also be solved with profile-guided optimization.)
+  if(simdjson_unlikely(!tape.is_double())) { // branch rarely taken
+    if(tape.is_uint64()) {
+      return double(tape.next_tape_value<uint64_t>());
+    } else if(tape.is_int64()) {
+      return double(tape.next_tape_value<int64_t>());
+    }
+    return INCORRECT_TYPE;
+  }
+  // this is common:
+  return tape.next_tape_value<double>();
+}
+inline simdjson_result<array> element::get_array() const noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  // issue https://github.com/simdjson/simdjson/issues/1914
+  assert (tape.usable()); // the default constructed element is invalid
+#endif
+  switch (tape.tape_ref_type()) {
+    case internal::tape_type::START_ARRAY:
+      return array(tape);
+    default:
+      return INCORRECT_TYPE;
+  }
+}
+inline simdjson_result<object> element::get_object() const noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  // issue https://github.com/simdjson/simdjson/issues/1914
+  assert (tape.usable()); // the default constructed element is invalid
+#endif
+  switch (tape.tape_ref_type()) {
+    case internal::tape_type::START_OBJECT:
+      return object(tape);
+    default:
+      return INCORRECT_TYPE;
+  }
+}
+
+template<typename T>
+simdjson_warn_unused simdjson_inline error_code element::get(T &value) const noexcept {
+  return get<T>().get(value);
+}
+// An element-specific version prevents recursion with simdjson_result::get<element>(value)
+template<>
+simdjson_warn_unused simdjson_inline error_code element::get<element>(element &value) const noexcept {
+  value = element(tape);
+  return SUCCESS;
+}
+template<typename T>
+inline void element::tie(T &value, error_code &error) && noexcept {
+  error = get<T>(value);
+}
+
+template<typename T>
+simdjson_inline bool element::is() const noexcept {
+  auto result = get<T>();
+  return !result.error();
+}
+
+template<> inline simdjson_result<array> element::get<array>() const noexcept { return get_array(); }
+template<> inline simdjson_result<object> element::get<object>() const noexcept { return get_object(); }
+template<> inline simdjson_result<const char *> element::get<const char *>() const noexcept { return get_c_str(); }
+template<> inline simdjson_result<std::string_view> element::get<std::string_view>() const noexcept { return get_string(); }
+template<> inline simdjson_result<int64_t> element::get<int64_t>() const noexcept { return get_int64(); }
+template<> inline simdjson_result<uint64_t> element::get<uint64_t>() const noexcept { return get_uint64(); }
+template<> inline simdjson_result<double> element::get<double>() const noexcept { return get_double(); }
+template<> inline simdjson_result<bool> element::get<bool>() const noexcept { return get_bool(); }
+
+inline bool element::is_array() const noexcept { return is<array>(); }
+inline bool element::is_object() const noexcept { return is<object>(); }
+inline bool element::is_string() const noexcept { return is<std::string_view>(); }
+inline bool element::is_int64() const noexcept { return is<int64_t>(); }
+inline bool element::is_uint64() const noexcept { return is<uint64_t>(); }
+inline bool element::is_double() const noexcept { return is<double>(); }
+inline bool element::is_bool() const noexcept { return is<bool>(); }
+inline bool element::is_number() const noexcept { return is_int64() || is_uint64() || is_double(); }
+
+inline bool element::is_null() const noexcept {
+  return tape.is_null_on_tape();
+}
+
+#if SIMDJSON_EXCEPTIONS
+
+inline element::operator bool() const noexcept(false) { return get<bool>(); }
+inline element::operator const char*() const noexcept(false) { return get<const char *>(); }
+inline element::operator std::string_view() const noexcept(false) { return get<std::string_view>(); }
+inline element::operator uint64_t() const noexcept(false) { return get<uint64_t>(); }
+inline element::operator int64_t() const noexcept(false) { return get<int64_t>(); }
+inline element::operator double() const noexcept(false) { return get<double>(); }
+inline element::operator array() const noexcept(false) { return get<array>(); }
+inline element::operator object() const noexcept(false) { return get<object>(); }
+
+inline array::iterator element::begin() const noexcept(false) {
+  return get<array>().begin();
+}
+inline array::iterator element::end() const noexcept(false) {
+  return get<array>().end();
+}
+
+#endif // SIMDJSON_EXCEPTIONS
+
+inline simdjson_result<element> element::operator[](std::string_view key) const noexcept {
+  return at_key(key);
+}
+inline simdjson_result<element> element::operator[](const char *key) const noexcept {
+  return at_key(key);
+}
+
+inline simdjson_result<element> element::at_pointer(std::string_view json_pointer) const noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  // issue https://github.com/simdjson/simdjson/issues/1914
+  assert (tape.usable()); // the default constructed element is invalid
+#endif
+  switch (tape.tape_ref_type()) {
+    case internal::tape_type::START_OBJECT:
+      return object(tape).at_pointer(json_pointer);
+    case internal::tape_type::START_ARRAY:
+      return array(tape).at_pointer(json_pointer);
+    default: {
+      if(!json_pointer.empty()) { // a non-empty string is invalid on an atom
+        return INVALID_JSON_POINTER;
+      }
+      // an empty string means that we return the current node
+      dom::element copy(*this);
+      return simdjson_result<element>(std::move(copy));
+    }
+  }
+}
+#ifndef SIMDJSON_DISABLE_DEPRECATED_API
+[[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]]
+inline simdjson_result<element> element::at(std::string_view json_pointer) const noexcept {
+  // version 0.4 of simdjson allowed non-compliant pointers
+  auto std_pointer = (json_pointer.empty() ? "" : "/") + std::string(json_pointer.begin(), json_pointer.end());
+  return at_pointer(std_pointer);
+}
+#endif // SIMDJSON_DISABLE_DEPRECATED_API
+
+inline simdjson_result<element> element::at(size_t index) const noexcept {
+  return get<array>().at(index);
+}
+inline simdjson_result<element> element::at_key(std::string_view key) const noexcept {
+  return get<object>().at_key(key);
+}
+inline simdjson_result<element> element::at_key_case_insensitive(std::string_view key) const noexcept {
+  return get<object>().at_key_case_insensitive(key);
+}
+
+inline bool element::dump_raw_tape(std::ostream &out) const noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  // issue https://github.com/simdjson/simdjson/issues/1914
+  assert (tape.usable()); // the default constructed element is invalid
+#endif
+  return tape.doc->dump_raw_tape(out);
+}
+
+
+inline std::ostream& operator<<(std::ostream& out, element_type type) {
+  switch (type) {
+    case element_type::ARRAY:
+      return out << "array";
+    case element_type::OBJECT:
+      return out << "object";
+    case element_type::INT64:
+      return out << "int64_t";
+    case element_type::UINT64:
+      return out << "uint64_t";
+    case element_type::DOUBLE:
+      return out << "double";
+    case element_type::STRING:
+      return out << "string";
+    case element_type::BOOL:
+      return out << "bool";
+    case element_type::NULL_VALUE:
+      return out << "null";
+    default:
+      return out << "unexpected content!!!"; // abort() usage is forbidden in the library
+  }
+}
+
+} // namespace dom
+
+} // namespace simdjson
+
+#endif // SIMDJSON_INLINE_ELEMENT_H
+/* end file include/simdjson/dom/element-inl.h */
+
+#if defined(__cpp_lib_ranges)
+static_assert(std::ranges::view<simdjson::dom::array>);
+static_assert(std::ranges::sized_range<simdjson::dom::array>);
+#if SIMDJSON_EXCEPTIONS
+static_assert(std::ranges::view<simdjson::simdjson_result<simdjson::dom::array>>);
+static_assert(std::ranges::sized_range<simdjson::simdjson_result<simdjson::dom::array>>);
+#endif // SIMDJSON_EXCEPTIONS
+#endif // defined(__cpp_lib_ranges)
+
+#endif // SIMDJSON_INLINE_ARRAY_H
+/* end file include/simdjson/dom/array-inl.h */
+/* begin file include/simdjson/dom/document_stream-inl.h */
+#ifndef SIMDJSON_INLINE_DOCUMENT_STREAM_H
+#define SIMDJSON_INLINE_DOCUMENT_STREAM_H
+
+#include <algorithm>
+#include <limits>
+#include <stdexcept>
+namespace simdjson {
+namespace dom {
+
+#ifdef SIMDJSON_THREADS_ENABLED
+inline void stage1_worker::finish() {
+  // After calling "run" someone would call finish() to wait
+  // for the end of the processing.
+  // This function will wait until either the thread has done
+  // the processing or, else, the destructor has been called.
+  std::unique_lock<std::mutex> lock(locking_mutex);
+  cond_var.wait(lock, [this]{return has_work == false;});
+}
+
+inline stage1_worker::~stage1_worker() {
+  // The thread may never outlive the stage1_worker instance
+  // and will always be stopped/joined before the stage1_worker
+  // instance is gone.
+  stop_thread();
+}
+
+inline void stage1_worker::start_thread() {
+  std::unique_lock<std::mutex> lock(locking_mutex);
+  if(thread.joinable()) {
+    return; // This should never happen but we never want to create more than one thread.
+  }
+  thread = std::thread([this]{
+      while(true) {
+        std::unique_lock<std::mutex> thread_lock(locking_mutex);
+        // We wait for either "run" or "stop_thread" to be called.
+        cond_var.wait(thread_lock, [this]{return has_work || !can_work;});
+        // If, for some reason, the stop_thread() method was called (i.e., the
+        // destructor of stage1_worker is called, then we want to immediately destroy
+        // the thread (and not do any more processing).
+        if(!can_work) {
+          break;
+        }
+        this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser,
+              this->_next_batch_start);
+        this->has_work = false;
+        // The condition variable call should be moved after thread_lock.unlock() for performance
+        // reasons but thread sanitizers may report it as a data race if we do.
+        // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock
+        cond_var.notify_one(); // will notify "finish"
+        thread_lock.unlock();
+      }
+    }
+  );
+}
+
+
+inline void stage1_worker::stop_thread() {
+  std::unique_lock<std::mutex> lock(locking_mutex);
+  // We have to make sure that all locks can be released.
+  can_work = false;
+  has_work = false;
+  cond_var.notify_all();
+  lock.unlock();
+  if(thread.joinable()) {
+    thread.join();
+  }
+}
+
+inline void stage1_worker::run(document_stream * ds, dom::parser * stage1, size_t next_batch_start) {
+  std::unique_lock<std::mutex> lock(locking_mutex);
+  owner = ds;
+  _next_batch_start = next_batch_start;
+  stage1_thread_parser = stage1;
+  has_work = true;
+  // The condition variable call should be moved after thread_lock.unlock() for performance
+  // reasons but thread sanitizers may report it as a data race if we do.
+  // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock
+  cond_var.notify_one(); // will notify the thread lock that we have work
+  lock.unlock();
+}
+#endif
+
+simdjson_inline document_stream::document_stream(
+  dom::parser &_parser,
+  const uint8_t *_buf,
+  size_t _len,
+  size_t _batch_size
+) noexcept
+  : parser{&_parser},
+    buf{_buf},
+    len{_len},
+    batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size},
+    error{SUCCESS}
+#ifdef SIMDJSON_THREADS_ENABLED
+    , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change
+#endif
+{
+#ifdef SIMDJSON_THREADS_ENABLED
+  if(worker.get() == nullptr) {
+    error = MEMALLOC;
+  }
+#endif
+}
+
+simdjson_inline document_stream::document_stream() noexcept
+  : parser{nullptr},
+    buf{nullptr},
+    len{0},
+    batch_size{0},
+    error{UNINITIALIZED}
+#ifdef SIMDJSON_THREADS_ENABLED
+    , use_thread(false)
+#endif
+{
+}
+
+simdjson_inline document_stream::~document_stream() noexcept {
+#ifdef SIMDJSON_THREADS_ENABLED
+  worker.reset();
+#endif
+}
+
+simdjson_inline document_stream::iterator::iterator() noexcept
+  : stream{nullptr}, finished{true} {
+}
+
+simdjson_inline document_stream::iterator document_stream::begin() noexcept {
+  start();
+  // If there are no documents, we're finished.
+  return iterator(this, error == EMPTY);
+}
+
+simdjson_inline document_stream::iterator document_stream::end() noexcept {
+  return iterator(this, true);
+}
+
+simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept
+  : stream{_stream}, finished{is_end} {
+}
+
+simdjson_inline document_stream::iterator::reference document_stream::iterator::operator*() noexcept {
+  // Note that in case of error, we do not yet mark
+  // the iterator as "finished": this detection is done
+  // in the operator++ function since it is possible
+  // to call operator++ repeatedly while omitting
+  // calls to operator*.
+  if (stream->error) { return stream->error; }
+  return stream->parser->doc.root();
+}
+
+simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept {
+  // If there is an error, then we want the iterator
+  // to be finished, no matter what. (E.g., we do not
+  // keep generating documents with errors, or go beyond
+  // a document with errors.)
+  //
+  // Users do not have to call "operator*()" when they use operator++,
+  // so we need to end the stream in the operator++ function.
+  //
+  // Note that setting finished = true is essential otherwise
+  // we would enter an infinite loop.
+  if (stream->error) { finished = true; }
+  // Note that stream->error() is guarded against error conditions
+  // (it will immediately return if stream->error casts to false).
+  // In effect, this next function does nothing when (stream->error)
+  // is true (hence the risk of an infinite loop).
+  stream->next();
+  // If that was the last document, we're finished.
+  // It is the only type of error we do not want to appear
+  // in operator*.
+  if (stream->error == EMPTY) { finished = true; }
+  // If we had any other kind of error (not EMPTY) then we want
+  // to pass it along to the operator* and we cannot mark the result
+  // as "finished" just yet.
+  return *this;
+}
+
+simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept {
+  return finished != other.finished;
+}
+
+inline void document_stream::start() noexcept {
+  if (error) { return; }
+  error = parser->ensure_capacity(batch_size);
+  if (error) { return; }
+  // Always run the first stage 1 parse immediately
+  batch_start = 0;
+  error = run_stage1(*parser, batch_start);
+  while(error == EMPTY) {
+    // In exceptional cases, we may start with an empty block
+    batch_start = next_batch_start();
+    if (batch_start >= len) { return; }
+    error = run_stage1(*parser, batch_start);
+  }
+  if (error) { return; }
+#ifdef SIMDJSON_THREADS_ENABLED
+  if (use_thread && next_batch_start() < len) {
+    // Kick off the first thread if needed
+    error = stage1_thread_parser.ensure_capacity(batch_size);
+    if (error) { return; }
+    worker->start_thread();
+    start_stage1_thread();
+    if (error) { return; }
+  }
+#endif // SIMDJSON_THREADS_ENABLED
+  next();
+}
+
+simdjson_inline size_t document_stream::iterator::current_index() const noexcept {
+  return stream->doc_index;
+}
+
+simdjson_inline std::string_view document_stream::iterator::source() const noexcept {
+  const char* start = reinterpret_cast<const char*>(stream->buf) + current_index();
+  bool object_or_array = ((*start == '[') || (*start == '{'));
+  if(object_or_array) {
+    size_t next_doc_index = stream->batch_start + stream->parser->implementation->structural_indexes[stream->parser->implementation->next_structural_index - 1];
+    return std::string_view(start, next_doc_index - current_index() + 1);
+  } else {
+    size_t next_doc_index = stream->batch_start + stream->parser->implementation->structural_indexes[stream->parser->implementation->next_structural_index];
+    return std::string_view(reinterpret_cast<const char*>(stream->buf) + current_index(), next_doc_index - current_index() - 1);
+  }
+}
+
+
+inline void document_stream::next() noexcept {
+  // We always exit at once, once in an error condition.
+  if (error) { return; }
+
+  // Load the next document from the batch
+  doc_index = batch_start + parser->implementation->structural_indexes[parser->implementation->next_structural_index];
+  error = parser->implementation->stage2_next(parser->doc);
+  // If that was the last document in the batch, load another batch (if available)
+  while (error == EMPTY) {
+    batch_start = next_batch_start();
+    if (batch_start >= len) { break; }
+
+#ifdef SIMDJSON_THREADS_ENABLED
+    if(use_thread) {
+      load_from_stage1_thread();
+    } else {
+      error = run_stage1(*parser, batch_start);
+    }
+#else
+    error = run_stage1(*parser, batch_start);
+#endif
+    if (error) { continue; } // If the error was EMPTY, we may want to load another batch.
+    // Run stage 2 on the first document in the batch
+    doc_index = batch_start + parser->implementation->structural_indexes[parser->implementation->next_structural_index];
+    error = parser->implementation->stage2_next(parser->doc);
+  }
+}
+inline size_t document_stream::size_in_bytes() const noexcept {
+  return len;
+}
+
+inline size_t document_stream::truncated_bytes() const noexcept {
+  if(error == CAPACITY) { return len - batch_start; }
+  return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1];
+}
+
+inline size_t document_stream::next_batch_start() const noexcept {
+  return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes];
+}
+
+inline error_code document_stream::run_stage1(dom::parser &p, size_t _batch_start) noexcept {
+  size_t remaining = len - _batch_start;
+  if (remaining <= batch_size) {
+    return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final);
+  } else {
+    return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial);
+  }
+}
+
+#ifdef SIMDJSON_THREADS_ENABLED
+
+inline void document_stream::load_from_stage1_thread() noexcept {
+  worker->finish();
+  // Swap to the parser that was loaded up in the thread. Make sure the parser has
+  // enough memory to swap to, as well.
+  std::swap(*parser, stage1_thread_parser);
+  error = stage1_thread_error;
+  if (error) { return; }
+
+  // If there's anything left, start the stage 1 thread!
+  if (next_batch_start() < len) {
+    start_stage1_thread();
+  }
+}
+
+inline void document_stream::start_stage1_thread() noexcept {
+  // we call the thread on a lambda that will update
+  // this->stage1_thread_error
+  // there is only one thread that may write to this value
+  // TODO this is NOT exception-safe.
+  this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error
+  size_t _next_batch_start = this->next_batch_start();
+
+  worker->run(this, & this->stage1_thread_parser, _next_batch_start);
+}
+
+#endif // SIMDJSON_THREADS_ENABLED
+
+} // namespace dom
+
+simdjson_inline simdjson_result<dom::document_stream>::simdjson_result() noexcept
+  : simdjson_result_base() {
+}
+simdjson_inline simdjson_result<dom::document_stream>::simdjson_result(error_code error) noexcept
+  : simdjson_result_base(error) {
+}
+simdjson_inline simdjson_result<dom::document_stream>::simdjson_result(dom::document_stream &&value) noexcept
+  : simdjson_result_base(std::forward<dom::document_stream>(value)) {
+}
+
+#if SIMDJSON_EXCEPTIONS
+simdjson_inline dom::document_stream::iterator simdjson_result<dom::document_stream>::begin() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first.begin();
+}
+simdjson_inline dom::document_stream::iterator simdjson_result<dom::document_stream>::end() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first.end();
+}
+#else // SIMDJSON_EXCEPTIONS
+#ifndef SIMDJSON_DISABLE_DEPRECATED_API
+simdjson_inline dom::document_stream::iterator simdjson_result<dom::document_stream>::begin() noexcept {
+  first.error = error();
+  return first.begin();
+}
+simdjson_inline dom::document_stream::iterator simdjson_result<dom::document_stream>::end() noexcept {
+  first.error = error();
+  return first.end();
+}
+#endif // SIMDJSON_DISABLE_DEPRECATED_API
+#endif // SIMDJSON_EXCEPTIONS
+
+} // namespace simdjson
+#endif // SIMDJSON_INLINE_DOCUMENT_STREAM_H
+/* end file include/simdjson/dom/document_stream-inl.h */
+/* begin file include/simdjson/dom/document-inl.h */
+#ifndef SIMDJSON_INLINE_DOCUMENT_H
+#define SIMDJSON_INLINE_DOCUMENT_H
+
+// Inline implementations go in here.
+
+#include <ostream>
+#include <cstring>
+
+namespace simdjson {
+namespace dom {
+
+//
+// document inline implementation
+//
+inline element document::root() const noexcept {
+  return element(internal::tape_ref(this, 1));
+}
+simdjson_warn_unused
+inline size_t document::capacity() const noexcept {
+  return allocated_capacity;
+}
+
+simdjson_warn_unused
+inline error_code document::allocate(size_t capacity) noexcept {
+  if (capacity == 0) {
+    string_buf.reset();
+    tape.reset();
+    allocated_capacity = 0;
+    return SUCCESS;
+  }
+
+  // a pathological input like "[[[[..." would generate capacity tape elements, so
+  // need a capacity of at least capacity + 1, but it is also possible to do
+  // worse with "[7,7,7,7,6,7,7,7,6,7,7,6,[7,7,7,7,6,7,7,7,6,7,7,6,7,7,7,7,7,7,6"
+  //where capacity + 1 tape elements are
+  // generated, see issue https://github.com/simdjson/simdjson/issues/345
+  size_t tape_capacity = SIMDJSON_ROUNDUP_N(capacity + 3, 64);
+  // a document with only zero-length strings... could have capacity/3 string
+  // and we would need capacity/3 * 5 bytes on the string buffer
+  size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * capacity / 3 + SIMDJSON_PADDING, 64);
+  string_buf.reset( new (std::nothrow) uint8_t[string_capacity]);
+  tape.reset(new (std::nothrow) uint64_t[tape_capacity]);
+  if(!(string_buf && tape)) {
+    allocated_capacity = 0;
+    string_buf.reset();
+    tape.reset();
+    return MEMALLOC;
+  }
+  // Technically the allocated_capacity might be larger than capacity
+  // so the next line is pessimistic.
+  allocated_capacity = capacity;
+  return SUCCESS;
+}
+
+inline bool document::dump_raw_tape(std::ostream &os) const noexcept {
+  uint32_t string_length;
+  size_t tape_idx = 0;
+  uint64_t tape_val = tape[tape_idx];
+  uint8_t type = uint8_t(tape_val >> 56);
+  os << tape_idx << " : " << type;
+  tape_idx++;
+  size_t how_many = 0;
+  if (type == 'r') {
+    how_many = size_t(tape_val & internal::JSON_VALUE_MASK);
+  } else {
+    // Error: no starting root node?
+    return false;
+  }
+  os << "\t// pointing to " << how_many << " (right after last node)\n";
+  uint64_t payload;
+  for (; tape_idx < how_many; tape_idx++) {
+    os << tape_idx << " : ";
+    tape_val = tape[tape_idx];
+    payload = tape_val & internal::JSON_VALUE_MASK;
+    type = uint8_t(tape_val >> 56);
+    switch (type) {
+    case '"': // we have a string
+      os << "string \"";
+      std::memcpy(&string_length, string_buf.get() + payload, sizeof(uint32_t));
+      os << internal::escape_json_string(std::string_view(
+        reinterpret_cast<const char *>(string_buf.get() + payload + sizeof(uint32_t)),
+        string_length
+      ));
+      os << '"';
+      os << '\n';
+      break;
+    case 'l': // we have a long int
+      if (tape_idx + 1 >= how_many) {
+        return false;
+      }
+      os << "integer " << static_cast<int64_t>(tape[++tape_idx]) << "\n";
+      break;
+    case 'u': // we have a long uint
+      if (tape_idx + 1 >= how_many) {
+        return false;
+      }
+      os << "unsigned integer " << tape[++tape_idx] << "\n";
+      break;
+    case 'd': // we have a double
+      os << "float ";
+      if (tape_idx + 1 >= how_many) {
+        return false;
+      }
+      double answer;
+      std::memcpy(&answer, &tape[++tape_idx], sizeof(answer));
+      os << answer << '\n';
+      break;
+    case 'n': // we have a null
+      os << "null\n";
+      break;
+    case 't': // we have a true
+      os << "true\n";
+      break;
+    case 'f': // we have a false
+      os << "false\n";
+      break;
+    case '{': // we have an object
+      os << "{\t// pointing to next tape location " << uint32_t(payload)
+         << " (first node after the scope), "
+         << " saturated count "
+         << ((payload >> 32) & internal::JSON_COUNT_MASK)<< "\n";
+      break;    case '}': // we end an object
+      os << "}\t// pointing to previous tape location " << uint32_t(payload)
+         << " (start of the scope)\n";
+      break;
+    case '[': // we start an array
+      os << "[\t// pointing to next tape location " << uint32_t(payload)
+         << " (first node after the scope), "
+         << " saturated count "
+         << ((payload >> 32) & internal::JSON_COUNT_MASK)<< "\n";
+      break;
+    case ']': // we end an array
+      os << "]\t// pointing to previous tape location " << uint32_t(payload)
+         << " (start of the scope)\n";
+      break;
+    case 'r': // we start and end with the root node
+      // should we be hitting the root node?
+      return false;
+    default:
+      return false;
+    }
+  }
+  tape_val = tape[tape_idx];
+  payload = tape_val & internal::JSON_VALUE_MASK;
+  type = uint8_t(tape_val >> 56);
+  os << tape_idx << " : " << type << "\t// pointing to " << payload
+     << " (start root)\n";
+  return true;
+}
+
+} // namespace dom
+} // namespace simdjson
+
+#endif // SIMDJSON_INLINE_DOCUMENT_H
+/* end file include/simdjson/dom/document-inl.h */
+/* begin file include/simdjson/dom/object-inl.h */
+#ifndef SIMDJSON_INLINE_OBJECT_H
+#define SIMDJSON_INLINE_OBJECT_H
+
+#include <cstring>
+#include <string>
+
+namespace simdjson {
+
+//
+// simdjson_result<dom::object> inline implementation
+//
+simdjson_inline simdjson_result<dom::object>::simdjson_result() noexcept
+    : internal::simdjson_result_base<dom::object>() {}
+simdjson_inline simdjson_result<dom::object>::simdjson_result(dom::object value) noexcept
+    : internal::simdjson_result_base<dom::object>(std::forward<dom::object>(value)) {}
+simdjson_inline simdjson_result<dom::object>::simdjson_result(error_code error) noexcept
+    : internal::simdjson_result_base<dom::object>(error) {}
+
+inline simdjson_result<dom::element> simdjson_result<dom::object>::operator[](std::string_view key) const noexcept {
+  if (error()) { return error(); }
+  return first[key];
+}
+inline simdjson_result<dom::element> simdjson_result<dom::object>::operator[](const char *key) const noexcept {
+  if (error()) { return error(); }
+  return first[key];
+}
+inline simdjson_result<dom::element> simdjson_result<dom::object>::at_pointer(std::string_view json_pointer) const noexcept {
+  if (error()) { return error(); }
+  return first.at_pointer(json_pointer);
+}
+inline simdjson_result<dom::element> simdjson_result<dom::object>::at_key(std::string_view key) const noexcept {
+  if (error()) { return error(); }
+  return first.at_key(key);
+}
+inline simdjson_result<dom::element> simdjson_result<dom::object>::at_key_case_insensitive(std::string_view key) const noexcept {
+  if (error()) { return error(); }
+  return first.at_key_case_insensitive(key);
+}
+
+#if SIMDJSON_EXCEPTIONS
+
+inline dom::object::iterator simdjson_result<dom::object>::begin() const noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first.begin();
+}
+inline dom::object::iterator simdjson_result<dom::object>::end() const noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first.end();
+}
+inline size_t simdjson_result<dom::object>::size() const noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first.size();
+}
+
+#endif // SIMDJSON_EXCEPTIONS
+
+namespace dom {
+
+//
+// object inline implementation
+//
+simdjson_inline object::object() noexcept : tape{} {}
+simdjson_inline object::object(const internal::tape_ref &_tape) noexcept : tape{_tape} { }
+inline object::iterator object::begin() const noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  // issue https://github.com/simdjson/simdjson/issues/1914
+  assert (tape.usable()); // the default constructed object is invalid
+#endif
+  return internal::tape_ref(tape.doc, tape.json_index + 1);
+}
+inline object::iterator object::end() const noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  // issue https://github.com/simdjson/simdjson/issues/1914
+  assert (tape.usable()); // the default constructed object is invalid
+#endif
+  return internal::tape_ref(tape.doc, tape.after_element() - 1);
+}
+inline size_t object::size() const noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  // issue https://github.com/simdjson/simdjson/issues/1914
+  assert (tape.usable()); // the default constructed object is invalid
+#endif
+  return tape.scope_count();
+}
+
+inline simdjson_result<element> object::operator[](std::string_view key) const noexcept {
+  return at_key(key);
+}
+inline simdjson_result<element> object::operator[](const char *key) const noexcept {
+  return at_key(key);
+}
+inline simdjson_result<element> object::at_pointer(std::string_view json_pointer) const noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  // issue https://github.com/simdjson/simdjson/issues/1914
+  assert (tape.usable()); // the default constructed object is invalid
+#endif
+  if(json_pointer.empty()) { // an empty string means that we return the current node
+      return element(this->tape); // copy the current node
+  } else if(json_pointer[0] != '/') { // otherwise there is an error
+      return INVALID_JSON_POINTER;
+  }
+  json_pointer = json_pointer.substr(1);
+  size_t slash = json_pointer.find('/');
+  std::string_view key = json_pointer.substr(0, slash);
+  // Grab the child with the given key
+  simdjson_result<element> child;
+
+  // If there is an escape character in the key, unescape it and then get the child.
+  size_t escape = key.find('~');
+  if (escape != std::string_view::npos) {
+    // Unescape the key
+    std::string unescaped(key);
+    do {
+      switch (unescaped[escape+1]) {
+        case '0':
+          unescaped.replace(escape, 2, "~");
+          break;
+        case '1':
+          unescaped.replace(escape, 2, "/");
+          break;
+        default:
+          return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer");
+      }
+      escape = unescaped.find('~', escape+1);
+    } while (escape != std::string::npos);
+    child = at_key(unescaped);
+  } else {
+    child = at_key(key);
+  }
+  if(child.error()) {
+    return child; // we do not continue if there was an error
+  }
+  // If there is a /, we have to recurse and look up more of the path
+  if (slash != std::string_view::npos) {
+    child = child.at_pointer(json_pointer.substr(slash));
+  }
+  return child;
+}
+
+inline simdjson_result<element> object::at_key(std::string_view key) const noexcept {
+  iterator end_field = end();
+  for (iterator field = begin(); field != end_field; ++field) {
+    if (field.key_equals(key)) {
+      return field.value();
+    }
+  }
+  return NO_SUCH_FIELD;
+}
+// In case you wonder why we need this, please see
+// https://github.com/simdjson/simdjson/issues/323
+// People do seek keys in a case-insensitive manner.
+inline simdjson_result<element> object::at_key_case_insensitive(std::string_view key) const noexcept {
+  iterator end_field = end();
+  for (iterator field = begin(); field != end_field; ++field) {
+    if (field.key_equals_case_insensitive(key)) {
+      return field.value();
+    }
+  }
+  return NO_SUCH_FIELD;
+}
+
+//
+// object::iterator inline implementation
+//
+simdjson_inline object::iterator::iterator(const internal::tape_ref &_tape) noexcept : tape{_tape} { }
+inline const key_value_pair object::iterator::operator*() const noexcept {
+  return key_value_pair(key(), value());
+}
+inline bool object::iterator::operator!=(const object::iterator& other) const noexcept {
+  return tape.json_index != other.tape.json_index;
+}
+inline bool object::iterator::operator==(const object::iterator& other) const noexcept {
+  return tape.json_index == other.tape.json_index;
+}
+inline bool object::iterator::operator<(const object::iterator& other) const noexcept {
+  return tape.json_index < other.tape.json_index;
+}
+inline bool object::iterator::operator<=(const object::iterator& other) const noexcept {
+  return tape.json_index <= other.tape.json_index;
+}
+inline bool object::iterator::operator>=(const object::iterator& other) const noexcept {
+  return tape.json_index >= other.tape.json_index;
+}
+inline bool object::iterator::operator>(const object::iterator& other) const noexcept {
+  return tape.json_index > other.tape.json_index;
+}
+inline object::iterator& object::iterator::operator++() noexcept {
+  tape.json_index++;
+  tape.json_index = tape.after_element();
+  return *this;
+}
+inline object::iterator object::iterator::operator++(int) noexcept {
+  object::iterator out = *this;
+  ++*this;
+  return out;
+}
+inline std::string_view object::iterator::key() const noexcept {
+  return tape.get_string_view();
+}
+inline uint32_t object::iterator::key_length() const noexcept {
+  return tape.get_string_length();
+}
+inline const char* object::iterator::key_c_str() const noexcept {
+  return reinterpret_cast<const char *>(&tape.doc->string_buf[size_t(tape.tape_value()) + sizeof(uint32_t)]);
+}
+inline element object::iterator::value() const noexcept {
+  return element(internal::tape_ref(tape.doc, tape.json_index + 1));
+}
+
+/**
+ * Design notes:
+ * Instead of constructing a string_view and then comparing it with a
+ * user-provided strings, it is probably more performant to have dedicated
+ * functions taking as a parameter the string we want to compare against
+ * and return true when they are equal. That avoids the creation of a temporary
+ * std::string_view. Though it is possible for the compiler to avoid entirely
+ * any overhead due to string_view, relying too much on compiler magic is
+ * problematic: compiler magic sometimes fail, and then what do you do?
+ * Also, enticing users to rely on high-performance function is probably better
+ * on the long run.
+ */
+
+inline bool object::iterator::key_equals(std::string_view o) const noexcept {
+  // We use the fact that the key length can be computed quickly
+  // without access to the string buffer.
+  const uint32_t len = key_length();
+  if(o.size() == len) {
+    // We avoid construction of a temporary string_view instance.
+    return (memcmp(o.data(), key_c_str(), len) == 0);
+  }
+  return false;
+}
+
+inline bool object::iterator::key_equals_case_insensitive(std::string_view o) const noexcept {
+  // We use the fact that the key length can be computed quickly
+  // without access to the string buffer.
+  const uint32_t len = key_length();
+  if(o.size() == len) {
+      // See For case-insensitive string comparisons, avoid char-by-char functions
+      // https://lemire.me/blog/2020/04/30/for-case-insensitive-string-comparisons-avoid-char-by-char-functions/
+      // Note that it might be worth rolling our own strncasecmp function, with vectorization.
+      return (simdjson_strncasecmp(o.data(), key_c_str(), len) == 0);
+  }
+  return false;
+}
+//
+// key_value_pair inline implementation
+//
+inline key_value_pair::key_value_pair(std::string_view _key, element _value) noexcept :
+  key(_key), value(_value) {}
+
+} // namespace dom
+
+} // namespace simdjson
+
+#if defined(__cpp_lib_ranges)
+static_assert(std::ranges::view<simdjson::dom::object>);
+static_assert(std::ranges::sized_range<simdjson::dom::object>);
+#if SIMDJSON_EXCEPTIONS
+static_assert(std::ranges::view<simdjson::simdjson_result<simdjson::dom::object>>);
+static_assert(std::ranges::sized_range<simdjson::simdjson_result<simdjson::dom::object>>);
+#endif // SIMDJSON_EXCEPTIONS
+#endif // defined(__cpp_lib_ranges)
+
+#endif // SIMDJSON_INLINE_OBJECT_H
+/* end file include/simdjson/dom/object-inl.h */
+/* begin file include/simdjson/dom/parsedjson_iterator-inl.h */
+#ifndef SIMDJSON_INLINE_PARSEDJSON_ITERATOR_H
+#define SIMDJSON_INLINE_PARSEDJSON_ITERATOR_H
+
+#include <cstring>
+
+#ifndef SIMDJSON_DISABLE_DEPRECATED_API
+
+namespace simdjson {
+
+// VS2017 reports deprecated warnings when you define a deprecated class's methods.
+SIMDJSON_PUSH_DISABLE_WARNINGS
+SIMDJSON_DISABLE_DEPRECATED_WARNING
+
+// Because of template weirdness, the actual class definition is inline in the document class
+simdjson_warn_unused bool dom::parser::Iterator::is_ok() const {
+  return location < tape_length;
+}
+
+// useful for debugging purposes
+size_t dom::parser::Iterator::get_tape_location() const {
+  return location;
+}
+
+// useful for debugging purposes
+size_t dom::parser::Iterator::get_tape_length() const {
+  return tape_length;
+}
+
+// returns the current depth (start at 1 with 0 reserved for the fictitious root
+// node)
+size_t dom::parser::Iterator::get_depth() const {
+  return depth;
+}
+
+// A scope is a series of nodes at the same depth, typically it is either an
+// object ({) or an array ([). The root node has type 'r'.
+uint8_t dom::parser::Iterator::get_scope_type() const {
+  return depth_index[depth].scope_type;
+}
+
+bool dom::parser::Iterator::move_forward() {
+  if (location + 1 >= tape_length) {
+    return false; // we are at the end!
+  }
+
+  if ((current_type == '[') || (current_type == '{')) {
+    // We are entering a new scope
+    depth++;
+    assert(depth < max_depth);
+    depth_index[depth].start_of_scope = location;
+    depth_index[depth].scope_type = current_type;
+  } else if ((current_type == ']') || (current_type == '}')) {
+    // Leaving a scope.
+    depth--;
+  } else if (is_number()) {
+    // these types use 2 locations on the tape, not just one.
+    location += 1;
+  }
+
+  location += 1;
+  current_val = doc.tape[location];
+  current_type = uint8_t(current_val >> 56);
+  return true;
+}
+
+void dom::parser::Iterator::move_to_value() {
+  // assume that we are on a key, so move by 1.
+  location += 1;
+  current_val = doc.tape[location];
+  current_type = uint8_t(current_val >> 56);
+}
+
+bool dom::parser::Iterator::move_to_key(const char *key) {
+    if (down()) {
+      do {
+        const bool right_key = (strcmp(get_string(), key) == 0);
+        move_to_value();
+        if (right_key) {
+          return true;
+        }
+      } while (next());
+      up();
+    }
+    return false;
+}
+
+bool dom::parser::Iterator::move_to_key_insensitive(
+    const char *key) {
+    if (down()) {
+      do {
+        const bool right_key = (simdjson_strcasecmp(get_string(), key) == 0);
+        move_to_value();
+        if (right_key) {
+          return true;
+        }
+      } while (next());
+      up();
+    }
+    return false;
+}
+
+bool dom::parser::Iterator::move_to_key(const char *key,
+                                                       uint32_t length) {
+  if (down()) {
+    do {
+      bool right_key = ((get_string_length() == length) &&
+                        (memcmp(get_string(), key, length) == 0));
+      move_to_value();
+      if (right_key) {
+        return true;
+      }
+    } while (next());
+    up();
+  }
+  return false;
+}
+
+bool dom::parser::Iterator::move_to_index(uint32_t index) {
+  if (down()) {
+    uint32_t i = 0;
+    for (; i < index; i++) {
+      if (!next()) {
+        break;
+      }
+    }
+    if (i == index) {
+      return true;
+    }
+    up();
+  }
+  return false;
+}
+
+bool dom::parser::Iterator::prev() {
+  size_t target_location = location;
+  to_start_scope();
+  size_t npos = location;
+  if (target_location == npos) {
+    return false; // we were already at the start
+  }
+  size_t oldnpos;
+  // we have that npos < target_location here
+  do {
+    oldnpos = npos;
+    if ((current_type == '[') || (current_type == '{')) {
+      // we need to jump
+      npos = uint32_t(current_val);
+    } else {
+      npos = npos + ((current_type == 'd' || current_type == 'l') ? 2 : 1);
+    }
+  } while (npos < target_location);
+  location = oldnpos;
+  current_val = doc.tape[location];
+  current_type = uint8_t(current_val >> 56);
+  return true;
+}
+
+bool dom::parser::Iterator::up() {
+  if (depth == 1) {
+    return false; // don't allow moving back to root
+  }
+  to_start_scope();
+  // next we just move to the previous value
+  depth--;
+  location -= 1;
+  current_val = doc.tape[location];
+  current_type = uint8_t(current_val >> 56);
+  return true;
+}
+
+bool dom::parser::Iterator::down() {
+  if (location + 1 >= tape_length) {
+    return false;
+  }
+  if ((current_type == '[') || (current_type == '{')) {
+    size_t npos = uint32_t(current_val);
+    if (npos == location + 2) {
+      return false; // we have an empty scope
+    }
+    depth++;
+    assert(depth < max_depth);
+    location = location + 1;
+    depth_index[depth].start_of_scope = location;
+    depth_index[depth].scope_type = current_type;
+    current_val = doc.tape[location];
+    current_type = uint8_t(current_val >> 56);
+    return true;
+  }
+  return false;
+}
+
+void dom::parser::Iterator::to_start_scope() {
+  location = depth_index[depth].start_of_scope;
+  current_val = doc.tape[location];
+  current_type = uint8_t(current_val >> 56);
+}
+
+bool dom::parser::Iterator::next() {
+  size_t npos;
+  if ((current_type == '[') || (current_type == '{')) {
+    // we need to jump
+    npos = uint32_t(current_val);
+  } else {
+    npos = location + (is_number() ? 2 : 1);
+  }
+  uint64_t next_val = doc.tape[npos];
+  uint8_t next_type = uint8_t(next_val >> 56);
+  if ((next_type == ']') || (next_type == '}')) {
+    return false; // we reached the end of the scope
+  }
+  location = npos;
+  current_val = next_val;
+  current_type = next_type;
+  return true;
+}
+dom::parser::Iterator::Iterator(const dom::parser &pj) noexcept(false)
+    : doc(pj.doc)
+{
+#if SIMDJSON_EXCEPTIONS
+  if (!pj.valid) { throw simdjson_error(pj.error); }
+#else
+  if (!pj.valid) { return; } //  abort() usage is forbidden in the library
+#endif
+
+  max_depth = pj.max_depth();
+  depth_index = new scopeindex_t[max_depth + 1];
+  depth_index[0].start_of_scope = location;
+  current_val = doc.tape[location++];
+  current_type = uint8_t(current_val >> 56);
+  depth_index[0].scope_type = current_type;
+  tape_length = size_t(current_val & internal::JSON_VALUE_MASK);
+  if (location < tape_length) {
+    // If we make it here, then depth_capacity must >=2, but the compiler
+    // may not know this.
+    current_val = doc.tape[location];
+    current_type = uint8_t(current_val >> 56);
+    depth++;
+    assert(depth < max_depth);
+    depth_index[depth].start_of_scope = location;
+    depth_index[depth].scope_type = current_type;
+  }
+}
+dom::parser::Iterator::Iterator(
+    const dom::parser::Iterator &o) noexcept
+    : doc(o.doc),
+    max_depth(o.depth),
+    depth(o.depth),
+    location(o.location),
+    tape_length(o.tape_length),
+    current_type(o.current_type),
+    current_val(o.current_val)
+{
+  depth_index = new scopeindex_t[max_depth+1];
+  std::memcpy(depth_index, o.depth_index, (depth + 1) * sizeof(depth_index[0]));
+}
+
+dom::parser::Iterator::~Iterator() noexcept {
+  if (depth_index) { delete[] depth_index; }
+}
+
+bool dom::parser::Iterator::print(std::ostream &os, bool escape_strings) const {
+  if (!is_ok()) {
+    return false;
+  }
+  switch (current_type) {
+  case '"': // we have a string
+    os << '"';
+    if (escape_strings) {
+      os << internal::escape_json_string(std::string_view(get_string(), get_string_length()));
+    } else {
+      // was: os << get_string();, but given that we can include null chars, we
+      // have to do something crazier:
+      std::copy(get_string(), get_string() + get_string_length(), std::ostream_iterator<char>(os));
+    }
+    os << '"';
+    break;
+  case 'l': // we have a long int
+    os << get_integer();
+    break;
+  case 'u':
+    os << get_unsigned_integer();
+    break;
+  case 'd':
+    os << get_double();
+    break;
+  case 'n': // we have a null
+    os << "null";
+    break;
+  case 't': // we have a true
+    os << "true";
+    break;
+  case 'f': // we have a false
+    os << "false";
+    break;
+  case '{': // we have an object
+  case '}': // we end an object
+  case '[': // we start an array
+  case ']': // we end an array
+    os << char(current_type);
+    break;
+  default:
+    return false;
+  }
+  return true;
+}
+
+bool dom::parser::Iterator::move_to(const char *pointer,
+                                                   uint32_t length) {
+  char *new_pointer = nullptr;
+  if (pointer[0] == '#') {
+    // Converting fragment representation to string representation
+    new_pointer = new char[length];
+    uint32_t new_length = 0;
+    for (uint32_t i = 1; i < length; i++) {
+      if (pointer[i] == '%' && pointer[i + 1] == 'x') {
+#if __cpp_exceptions
+        try {
+#endif
+          int fragment =
+              std::stoi(std::string(&pointer[i + 2], 2), nullptr, 16);
+          if (fragment == '\\' || fragment == '"' || (fragment <= 0x1F)) {
+            // escaping the character
+            new_pointer[new_length] = '\\';
+            new_length++;
+          }
+          new_pointer[new_length] = char(fragment);
+          i += 3;
+#if __cpp_exceptions
+        } catch (std::invalid_argument &) {
+          delete[] new_pointer;
+          return false; // the fragment is invalid
+        }
+#endif
+      } else {
+        new_pointer[new_length] = pointer[i];
+      }
+      new_length++;
+    }
+    length = new_length;
+    pointer = new_pointer;
+  }
+
+  // saving the current state
+  size_t depth_s = depth;
+  size_t location_s = location;
+  uint8_t current_type_s = current_type;
+  uint64_t current_val_s = current_val;
+
+  rewind(); // The json pointer is used from the root of the document.
+
+  bool found = relative_move_to(pointer, length);
+  delete[] new_pointer;
+
+  if (!found) {
+    // since the pointer has found nothing, we get back to the original
+    // position.
+    depth = depth_s;
+    location = location_s;
+    current_type = current_type_s;
+    current_val = current_val_s;
+  }
+
+  return found;
+}
+
+bool dom::parser::Iterator::relative_move_to(const char *pointer,
+                                                            uint32_t length) {
+  if (length == 0) {
+    // returns the whole document
+    return true;
+  }
+
+  if (pointer[0] != '/') {
+    // '/' must be the first character
+    return false;
+  }
+
+  // finding the key in an object or the index in an array
+  std::string key_or_index;
+  uint32_t offset = 1;
+
+  // checking for the "-" case
+  if (is_array() && pointer[1] == '-') {
+    if (length != 2) {
+      // the pointer must be exactly "/-"
+      // there can't be anything more after '-' as an index
+      return false;
+    }
+    key_or_index = '-';
+    offset = length; // will skip the loop coming right after
+  }
+
+  // We either transform the first reference token to a valid json key
+  // or we make sure it is a valid index in an array.
+  for (; offset < length; offset++) {
+    if (pointer[offset] == '/') {
+      // beginning of the next key or index
+      break;
+    }
+    if (is_array() && (pointer[offset] < '0' || pointer[offset] > '9')) {
+      // the index of an array must be an integer
+      // we also make sure std::stoi won't discard whitespaces later
+      return false;
+    }
+    if (pointer[offset] == '~') {
+      // "~1" represents "/"
+      if (pointer[offset + 1] == '1') {
+        key_or_index += '/';
+        offset++;
+        continue;
+      }
+      // "~0" represents "~"
+      if (pointer[offset + 1] == '0') {
+        key_or_index += '~';
+        offset++;
+        continue;
+      }
+    }
+    if (pointer[offset] == '\\') {
+      if (pointer[offset + 1] == '\\' || pointer[offset + 1] == '"' ||
+          (pointer[offset + 1] <= 0x1F)) {
+        key_or_index += pointer[offset + 1];
+        offset++;
+        continue;
+      }
+      return false; // invalid escaped character
+    }
+    if (pointer[offset] == '\"') {
+      // unescaped quote character. this is an invalid case.
+      // lets do nothing and assume most pointers will be valid.
+      // it won't find any corresponding json key anyway.
+      // return false;
+    }
+    key_or_index += pointer[offset];
+  }
+
+  bool found = false;
+  if (is_object()) {
+    if (move_to_key(key_or_index.c_str(), uint32_t(key_or_index.length()))) {
+      found = relative_move_to(pointer + offset, length - offset);
+    }
+  } else if (is_array()) {
+    if (key_or_index == "-") { // handling "-" case first
+      if (down()) {
+        while (next())
+          ; // moving to the end of the array
+        // moving to the nonexistent value right after...
+        size_t npos;
+        if ((current_type == '[') || (current_type == '{')) {
+          // we need to jump
+          npos = uint32_t(current_val);
+        } else {
+          npos =
+              location + ((current_type == 'd' || current_type == 'l') ? 2 : 1);
+        }
+        location = npos;
+        current_val = doc.tape[npos];
+        current_type = uint8_t(current_val >> 56);
+        return true; // how could it fail ?
+      }
+    } else { // regular numeric index
+      // The index can't have a leading '0'
+      if (key_or_index[0] == '0' && key_or_index.length() > 1) {
+        return false;
+      }
+      // it cannot be empty
+      if (key_or_index.length() == 0) {
+        return false;
+      }
+      // we already checked the index contains only valid digits
+      uint32_t index = std::stoi(key_or_index);
+      if (move_to_index(index)) {
+        found = relative_move_to(pointer + offset, length - offset);
+      }
+    }
+  }
+
+  return found;
+}
+
+SIMDJSON_POP_DISABLE_WARNINGS
+} // namespace simdjson
+
+#endif // SIMDJSON_DISABLE_DEPRECATED_API
+
+
+#endif // SIMDJSON_INLINE_PARSEDJSON_ITERATOR_H
+/* end file include/simdjson/dom/parsedjson_iterator-inl.h */
+/* begin file include/simdjson/dom/parser-inl.h */
+#ifndef SIMDJSON_INLINE_PARSER_H
+#define SIMDJSON_INLINE_PARSER_H
+
+#include <cstdio>
+#include <climits>
+
+namespace simdjson {
+namespace dom {
+
+//
+// parser inline implementation
+//
+simdjson_inline parser::parser(size_t max_capacity) noexcept
+  : _max_capacity{max_capacity},
+    loaded_bytes(nullptr) {
+}
+simdjson_inline parser::parser(parser &&other) noexcept = default;
+simdjson_inline parser &parser::operator=(parser &&other) noexcept = default;
+
+inline bool parser::is_valid() const noexcept { return valid; }
+inline int parser::get_error_code() const noexcept { return error; }
+inline std::string parser::get_error_message() const noexcept { return error_message(error); }
+
+inline bool parser::dump_raw_tape(std::ostream &os) const noexcept {
+  return valid ? doc.dump_raw_tape(os) : false;
+}
+
+inline simdjson_result<size_t> parser::read_file(const std::string &path) noexcept {
+  // Open the file
+  SIMDJSON_PUSH_DISABLE_WARNINGS
+  SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe
+  std::FILE *fp = std::fopen(path.c_str(), "rb");
+  SIMDJSON_POP_DISABLE_WARNINGS
+
+  if (fp == nullptr) {
+    return IO_ERROR;
+  }
+
+  // Get the file size
+  int ret;
+#if defined(SIMDJSON_VISUAL_STUDIO) && !SIMDJSON_IS_32BITS
+  ret = _fseeki64(fp, 0, SEEK_END);
+#else
+  ret = std::fseek(fp, 0, SEEK_END);
+#endif // _WIN64
+  if(ret < 0) {
+    std::fclose(fp);
+    return IO_ERROR;
+  }
+#if defined(SIMDJSON_VISUAL_STUDIO) && !SIMDJSON_IS_32BITS
+  __int64 len = _ftelli64(fp);
+  if(len == -1L) {
+    std::fclose(fp);
+    return IO_ERROR;
+  }
+#else
+  long len = std::ftell(fp);
+  if((len < 0) || (len == LONG_MAX)) {
+    std::fclose(fp);
+    return IO_ERROR;
+  }
+#endif
+
+  // Make sure we have enough capacity to load the file
+  if (_loaded_bytes_capacity < size_t(len)) {
+    loaded_bytes.reset( internal::allocate_padded_buffer(len) );
+    if (!loaded_bytes) {
+      std::fclose(fp);
+      return MEMALLOC;
+    }
+    _loaded_bytes_capacity = len;
+  }
+
+  // Read the string
+  std::rewind(fp);
+  size_t bytes_read = std::fread(loaded_bytes.get(), 1, len, fp);
+  if (std::fclose(fp) != 0 || bytes_read != size_t(len)) {
+    return IO_ERROR;
+  }
+
+  return bytes_read;
+}
+
+inline simdjson_result<element> parser::load(const std::string &path) & noexcept {
+  size_t len;
+  auto _error = read_file(path).get(len);
+  if (_error) { return _error; }
+  return parse(loaded_bytes.get(), len, false);
+}
+
+inline simdjson_result<document_stream> parser::load_many(const std::string &path, size_t batch_size) noexcept {
+  size_t len;
+  auto _error = read_file(path).get(len);
+  if (_error) { return _error; }
+  if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; }
+  return document_stream(*this, reinterpret_cast<const uint8_t*>(loaded_bytes.get()), len, batch_size);
+}
+
+inline simdjson_result<element> parser::parse_into_document(document& provided_doc, const uint8_t *buf, size_t len, bool realloc_if_needed) & noexcept {
+  // Important: we need to ensure that document has enough capacity.
+  // Important: It is possible that provided_doc is actually the internal 'doc' within the parser!!!
+  error_code _error = ensure_capacity(provided_doc, len);
+  if (_error) { return _error; }
+  if (realloc_if_needed) {
+    // Make sure we have enough capacity to copy len bytes
+    if (!loaded_bytes || _loaded_bytes_capacity < len) {
+      loaded_bytes.reset( internal::allocate_padded_buffer(len) );
+      if (!loaded_bytes) {
+        return MEMALLOC;
+      }
+      _loaded_bytes_capacity = len;
+    }
+    std::memcpy(static_cast<void *>(loaded_bytes.get()), buf, len);
+  }
+  _error = implementation->parse(realloc_if_needed ? reinterpret_cast<const uint8_t*>(loaded_bytes.get()): buf, len, provided_doc);
+
+  if (_error) { return _error; }
+
+  return provided_doc.root();
+}
+
+simdjson_inline simdjson_result<element> parser::parse_into_document(document& provided_doc, const char *buf, size_t len, bool realloc_if_needed) & noexcept {
+  return parse_into_document(provided_doc, reinterpret_cast<const uint8_t *>(buf), len, realloc_if_needed);
+}
+simdjson_inline simdjson_result<element> parser::parse_into_document(document& provided_doc, const std::string &s) & noexcept {
+  return parse_into_document(provided_doc, s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING);
+}
+simdjson_inline simdjson_result<element> parser::parse_into_document(document& provided_doc, const padded_string &s) & noexcept {
+  return parse_into_document(provided_doc, s.data(), s.length(), false);
+}
+
+
+inline simdjson_result<element> parser::parse(const uint8_t *buf, size_t len, bool realloc_if_needed) & noexcept {
+  return parse_into_document(doc, buf, len, realloc_if_needed);
+}
+
+simdjson_inline simdjson_result<element> parser::parse(const char *buf, size_t len, bool realloc_if_needed) & noexcept {
+  return parse(reinterpret_cast<const uint8_t *>(buf), len, realloc_if_needed);
+}
+simdjson_inline simdjson_result<element> parser::parse(const std::string &s) & noexcept {
+  return parse(s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING);
+}
+simdjson_inline simdjson_result<element> parser::parse(const padded_string &s) & noexcept {
+  return parse(s.data(), s.length(), false);
+}
+
+inline simdjson_result<document_stream> parser::parse_many(const uint8_t *buf, size_t len, size_t batch_size) noexcept {
+  if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; }
+  return document_stream(*this, buf, len, batch_size);
+}
+inline simdjson_result<document_stream> parser::parse_many(const char *buf, size_t len, size_t batch_size) noexcept {
+  return parse_many(reinterpret_cast<const uint8_t *>(buf), len, batch_size);
+}
+inline simdjson_result<document_stream> parser::parse_many(const std::string &s, size_t batch_size) noexcept {
+  return parse_many(s.data(), s.length(), batch_size);
+}
+inline simdjson_result<document_stream> parser::parse_many(const padded_string &s, size_t batch_size) noexcept {
+  return parse_many(s.data(), s.length(), batch_size);
+}
+
+simdjson_inline size_t parser::capacity() const noexcept {
+  return implementation ? implementation->capacity() : 0;
+}
+simdjson_inline size_t parser::max_capacity() const noexcept {
+  return _max_capacity;
+}
+simdjson_inline size_t parser::max_depth() const noexcept {
+  return implementation ? implementation->max_depth() : DEFAULT_MAX_DEPTH;
+}
+
+simdjson_warn_unused
+inline error_code parser::allocate(size_t capacity, size_t max_depth) noexcept {
+  //
+  // Reallocate implementation if needed
+  //
+  error_code err;
+  if (implementation) {
+    err = implementation->allocate(capacity, max_depth);
+  } else {
+    err = simdjson::get_active_implementation()->create_dom_parser_implementation(capacity, max_depth, implementation);
+  }
+  if (err) { return err; }
+  return SUCCESS;
+}
+
+#ifndef SIMDJSON_DISABLE_DEPRECATED_API
+simdjson_warn_unused
+inline bool parser::allocate_capacity(size_t capacity, size_t max_depth) noexcept {
+  return !allocate(capacity, max_depth);
+}
+#endif // SIMDJSON_DISABLE_DEPRECATED_API
+
+inline error_code parser::ensure_capacity(size_t desired_capacity) noexcept {
+  return ensure_capacity(doc, desired_capacity);
+}
+
+
+inline error_code parser::ensure_capacity(document& target_document, size_t desired_capacity) noexcept {
+  // 1. It is wasteful to allocate a document and a parser for documents spanning less than MINIMAL_DOCUMENT_CAPACITY bytes.
+  // 2. If we allow desired_capacity = 0 then it is possible to exit this function with implementation == nullptr.
+  if(desired_capacity < MINIMAL_DOCUMENT_CAPACITY) { desired_capacity = MINIMAL_DOCUMENT_CAPACITY; }
+  // If we don't have enough capacity, (try to) automatically bump it.
+  // If the document needs allocation, do it too.
+  // Both in one if statement to minimize unlikely branching.
+  //
+  // Note: we must make sure that this function is called if capacity() == 0. We do so because we
+  // ensure that desired_capacity > 0.
+  if (simdjson_unlikely(capacity() < desired_capacity || target_document.capacity() < desired_capacity)) {
+    if (desired_capacity > max_capacity()) {
+      return error = CAPACITY;
+    }
+    error_code err1 = target_document.capacity() < desired_capacity ? target_document.allocate(desired_capacity) : SUCCESS;
+    error_code err2 = capacity() < desired_capacity ? allocate(desired_capacity, max_depth()) : SUCCESS;
+    if(err1 != SUCCESS) { return error = err1; }
+    if(err2 != SUCCESS) { return error = err2; }
+  }
+  return SUCCESS;
+}
+
+simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept {
+  if(max_capacity < MINIMAL_DOCUMENT_CAPACITY) {
+    _max_capacity = max_capacity;
+  } else {
+    _max_capacity = MINIMAL_DOCUMENT_CAPACITY;
+  }
+}
+
+} // namespace dom
+} // namespace simdjson
+
+#endif // SIMDJSON_INLINE_PARSER_H
+/* end file include/simdjson/dom/parser-inl.h */
+/* begin file include/simdjson/internal/tape_ref-inl.h */
+#ifndef SIMDJSON_INLINE_TAPE_REF_H
+#define SIMDJSON_INLINE_TAPE_REF_H
+
+#include <cstring>
+
+namespace simdjson {
+namespace internal {
+
+//
+// tape_ref inline implementation
+//
+simdjson_inline tape_ref::tape_ref() noexcept : doc{nullptr}, json_index{0} {}
+simdjson_inline tape_ref::tape_ref(const dom::document *_doc, size_t _json_index) noexcept : doc{_doc}, json_index{_json_index} {}
+
+
+simdjson_inline bool tape_ref::is_document_root() const noexcept {
+  return json_index == 1; // should we ever change the structure of the tape, this should get updated.
+}
+simdjson_inline bool tape_ref::usable() const noexcept {
+  return doc != nullptr; // when the document pointer is null, this tape_ref is uninitialized (should not be accessed).
+}
+// Some value types have a specific on-tape word value. It can be faster
+// to check the type by doing a word-to-word comparison instead of extracting the
+// most significant 8 bits.
+
+simdjson_inline bool tape_ref::is_double() const noexcept {
+  constexpr uint64_t tape_double = uint64_t(tape_type::DOUBLE)<<56;
+  return doc->tape[json_index] == tape_double;
+}
+simdjson_inline bool tape_ref::is_int64() const noexcept {
+  constexpr uint64_t tape_int64 = uint64_t(tape_type::INT64)<<56;
+  return doc->tape[json_index] == tape_int64;
+}
+simdjson_inline bool tape_ref::is_uint64() const noexcept {
+  constexpr uint64_t tape_uint64 = uint64_t(tape_type::UINT64)<<56;
+  return doc->tape[json_index] == tape_uint64;
+}
+simdjson_inline bool tape_ref::is_false() const noexcept {
+  constexpr uint64_t tape_false = uint64_t(tape_type::FALSE_VALUE)<<56;
+  return doc->tape[json_index] == tape_false;
+}
+simdjson_inline bool tape_ref::is_true() const noexcept {
+  constexpr uint64_t tape_true = uint64_t(tape_type::TRUE_VALUE)<<56;
+  return doc->tape[json_index] == tape_true;
+}
+simdjson_inline bool tape_ref::is_null_on_tape() const noexcept {
+  constexpr uint64_t tape_null = uint64_t(tape_type::NULL_VALUE)<<56;
+  return doc->tape[json_index] == tape_null;
+}
+
+inline size_t tape_ref::after_element() const noexcept {
+  switch (tape_ref_type()) {
+    case tape_type::START_ARRAY:
+    case tape_type::START_OBJECT:
+      return matching_brace_index();
+    case tape_type::UINT64:
+    case tape_type::INT64:
+    case tape_type::DOUBLE:
+      return json_index + 2;
+    default:
+      return json_index + 1;
+  }
+}
+simdjson_inline tape_type tape_ref::tape_ref_type() const noexcept {
+  return static_cast<tape_type>(doc->tape[json_index] >> 56);
+}
+simdjson_inline uint64_t internal::tape_ref::tape_value() const noexcept {
+  return doc->tape[json_index] & internal::JSON_VALUE_MASK;
+}
+simdjson_inline uint32_t internal::tape_ref::matching_brace_index() const noexcept {
+  return uint32_t(doc->tape[json_index]);
+}
+simdjson_inline uint32_t internal::tape_ref::scope_count() const noexcept {
+  return uint32_t((doc->tape[json_index] >> 32) & internal::JSON_COUNT_MASK);
+}
+
+template<typename T>
+simdjson_inline T tape_ref::next_tape_value() const noexcept {
+  static_assert(sizeof(T) == sizeof(uint64_t), "next_tape_value() template parameter must be 64-bit");
+  // Though the following is tempting...
+  //  return *reinterpret_cast<const T*>(&doc->tape[json_index + 1]);
+  // It is not generally safe. It is safer, and often faster to rely
+  // on memcpy. Yes, it is uglier, but it is also encapsulated.
+  T x;
+  std::memcpy(&x,&doc->tape[json_index + 1],sizeof(uint64_t));
+  return x;
+}
+
+simdjson_inline uint32_t internal::tape_ref::get_string_length() const noexcept {
+  size_t string_buf_index = size_t(tape_value());
+  uint32_t len;
+  std::memcpy(&len, &doc->string_buf[string_buf_index], sizeof(len));
+  return len;
+}
+
+simdjson_inline const char * internal::tape_ref::get_c_str() const noexcept {
+  size_t string_buf_index = size_t(tape_value());
+  return reinterpret_cast<const char *>(&doc->string_buf[string_buf_index + sizeof(uint32_t)]);
+}
+
+inline std::string_view internal::tape_ref::get_string_view() const noexcept {
+  return std::string_view(
+      get_c_str(),
+      get_string_length()
+  );
+}
+
+} // namespace internal
+} // namespace simdjson
+
+#endif // SIMDJSON_INLINE_TAPE_REF_H
+/* end file include/simdjson/internal/tape_ref-inl.h */
+/* begin file include/simdjson/dom/serialization-inl.h */
+
+#ifndef SIMDJSON_SERIALIZATION_INL_H
+#define SIMDJSON_SERIALIZATION_INL_H
+
+
+#include <cinttypes>
+#include <type_traits>
+
+namespace simdjson {
+namespace dom {
+inline bool parser::print_json(std::ostream &os) const noexcept {
+  if (!valid) { return false; }
+  simdjson::internal::string_builder<> sb;
+  sb.append(doc.root());
+  std::string_view answer = sb.str();
+  os << answer;
+  return true;
+}
+}
+/***
+ * Number utility functions
+ **/
+
+
+namespace {
+/**@private
+ * Escape sequence like \b or \u0001
+ * We expect that most compilers will use 8 bytes for this data structure.
+ **/
+struct escape_sequence {
+    uint8_t length;
+    const char string[7]; // technically, we only ever need 6 characters, we pad to 8
+};
+/**@private
+ * This converts a signed integer into a character sequence.
+ * The caller is responsible for providing enough memory (at least
+ * 20 characters.)
+ * Though various runtime libraries provide itoa functions,
+ * it is not part of the C++ standard. The C++17 standard
+ * adds the to_chars functions which would do as well, but
+ * we want to support C++11.
+ */
+char *fast_itoa(char *output, int64_t value) noexcept {
+  // This is a standard implementation of itoa.
+  char buffer[20];
+  uint64_t value_positive;
+  // In general, negating a signed integer is unsafe.
+  if(value < 0) {
+    *output++ = '-';
+    // Doing value_positive = -value; while avoiding
+    // undefined behavior warnings.
+    // It assumes two complement's which is universal at this
+    // point in time.
+    std::memcpy(&value_positive, &value, sizeof(value));
+    value_positive = (~value_positive) + 1; // this is a negation
+  } else {
+    value_positive = value;
+  }
+  // We work solely with value_positive. It *might* be easier
+  // for an optimizing compiler to deal with an unsigned variable
+  // as far as performance goes.
+  const char *const end_buffer = buffer + 20;
+  char *write_pointer = buffer + 19;
+  // A faster approach is possible if we expect large integers:
+  // unroll the loop (work in 100s, 1000s) and use some kind of
+  // memoization.
+  while(value_positive >= 10) {
+    *write_pointer-- = char('0' + (value_positive % 10));
+    value_positive /= 10;
+  }
+  *write_pointer = char('0' + value_positive);
+  size_t len = end_buffer - write_pointer;
+  std::memcpy(output, write_pointer, len);
+  return output + len;
+}
+/**@private
+ * This converts an unsigned integer into a character sequence.
+ * The caller is responsible for providing enough memory (at least
+ * 19 characters.)
+ * Though various runtime libraries provide itoa functions,
+ * it is not part of the C++ standard. The C++17 standard
+ * adds the to_chars functions which would do as well, but
+ * we want to support C++11.
+ */
+char *fast_itoa(char *output, uint64_t value) noexcept {
+  // This is a standard implementation of itoa.
+  char buffer[20];
+  const char *const end_buffer = buffer + 20;
+  char *write_pointer = buffer + 19;
+  // A faster approach is possible if we expect large integers:
+  // unroll the loop (work in 100s, 1000s) and use some kind of
+  // memoization.
+  while(value >= 10) {
+    *write_pointer-- = char('0' + (value % 10));
+    value /= 10;
+  };
+  *write_pointer = char('0' + value);
+  size_t len = end_buffer - write_pointer;
+  std::memcpy(output, write_pointer, len);
+  return output + len;
+}
+} // anonymous namespace
+namespace internal {
+
+/***
+ * Minifier/formatter code.
+ **/
+
+simdjson_inline void mini_formatter::number(uint64_t x) {
+  char number_buffer[24];
+  char *newp = fast_itoa(number_buffer, x);
+  buffer.insert(buffer.end(), number_buffer, newp);
+}
+
+simdjson_inline void mini_formatter::number(int64_t x) {
+  char number_buffer[24];
+  char *newp = fast_itoa(number_buffer, x);
+  buffer.insert(buffer.end(), number_buffer, newp);
+}
+
+simdjson_inline void mini_formatter::number(double x) {
+  char number_buffer[24];
+  // Currently, passing the nullptr to the second argument is
+  // safe because our implementation does not check the second
+  // argument.
+  char *newp = internal::to_chars(number_buffer, nullptr, x);
+  buffer.insert(buffer.end(), number_buffer, newp);
+}
+
+simdjson_inline void mini_formatter::start_array() { one_char('['); }
+simdjson_inline void mini_formatter::end_array() { one_char(']'); }
+simdjson_inline void mini_formatter::start_object() { one_char('{'); }
+simdjson_inline void mini_formatter::end_object() { one_char('}'); }
+simdjson_inline void mini_formatter::comma() { one_char(','); }
+
+
+simdjson_inline void mini_formatter::true_atom() {
+  const char * s = "true";
+  buffer.insert(buffer.end(), s, s + 4);
+}
+simdjson_inline void mini_formatter::false_atom() {
+  const char * s = "false";
+  buffer.insert(buffer.end(), s, s + 5);
+}
+simdjson_inline void mini_formatter::null_atom() {
+  const char * s = "null";
+  buffer.insert(buffer.end(), s, s + 4);
+}
+simdjson_inline void mini_formatter::one_char(char c) { buffer.push_back(c); }
+simdjson_inline void mini_formatter::key(std::string_view unescaped) {
+  string(unescaped);
+  one_char(':');
+}
+simdjson_inline void mini_formatter::string(std::string_view unescaped) {
+  one_char('\"');
+  size_t i = 0;
+  // Fast path for the case where we have no control character, no ", and no backslash.
+  // This should include most keys.
+  //
+  // We would like to use 'bool' but some compilers take offense to bitwise operation
+  // with bool types.
+  constexpr static char needs_escaping[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+  for(;i + 8 <= unescaped.length(); i += 8) {
+    // Poor's man vectorization. This could get much faster if we used SIMD.
+    //
+    // It is not the case that replacing '|' with '||' would be neutral performance-wise.
+    if(needs_escaping[uint8_t(unescaped[i])] | needs_escaping[uint8_t(unescaped[i+1])]
+      | needs_escaping[uint8_t(unescaped[i+2])] | needs_escaping[uint8_t(unescaped[i+3])]
+      | needs_escaping[uint8_t(unescaped[i+4])] | needs_escaping[uint8_t(unescaped[i+5])]
+      | needs_escaping[uint8_t(unescaped[i+6])] | needs_escaping[uint8_t(unescaped[i+7])]
+      ) { break; }
+  }
+  for(;i < unescaped.length(); i++) {
+    if(needs_escaping[uint8_t(unescaped[i])]) { break; }
+  }
+  // The following is also possible and omits a 256-byte table, but it is slower:
+  // for (; (i < unescaped.length()) && (uint8_t(unescaped[i]) > 0x1F)
+  //      && (unescaped[i] != '\"') && (unescaped[i] != '\\'); i++) {}
+
+  // At least for long strings, the following should be fast. We could
+  // do better by integrating the checks and the insertion.
+  buffer.insert(buffer.end(), unescaped.data(), unescaped.data() + i);
+  // We caught a control character if we enter this loop (slow).
+  // Note that we are do not restart from the beginning, but rather we continue
+  // from the point where we encountered something that requires escaping.
+  for (; i < unescaped.length(); i++) {
+    switch (unescaped[i]) {
+    case '\"':
+      {
+        const char * s = "\\\"";
+        buffer.insert(buffer.end(), s, s + 2);
+      }
+      break;
+    case '\\':
+      {
+        const char * s = "\\\\";
+        buffer.insert(buffer.end(), s, s + 2);
+      }
+      break;
+    default:
+      if (uint8_t(unescaped[i]) <= 0x1F) {
+        // If packed, this uses 8 * 32 bytes.
+        // Note that we expect most compilers to embed this code in the data
+        // section.
+        constexpr static escape_sequence escaped[32] = {
+          {6, "\\u0000"}, {6, "\\u0001"}, {6, "\\u0002"}, {6, "\\u0003"},
+          {6, "\\u0004"}, {6, "\\u0005"}, {6, "\\u0006"}, {6, "\\u0007"},
+          {2, "\\b"},     {2, "\\t"},     {2, "\\n"},     {6, "\\u000b"},
+          {2, "\\f"},     {2, "\\r"},     {6, "\\u000e"}, {6, "\\u000f"},
+          {6, "\\u0010"}, {6, "\\u0011"}, {6, "\\u0012"}, {6, "\\u0013"},
+          {6, "\\u0014"}, {6, "\\u0015"}, {6, "\\u0016"}, {6, "\\u0017"},
+          {6, "\\u0018"}, {6, "\\u0019"}, {6, "\\u001a"}, {6, "\\u001b"},
+          {6, "\\u001c"}, {6, "\\u001d"}, {6, "\\u001e"}, {6, "\\u001f"}};
+        auto u = escaped[uint8_t(unescaped[i])];
+        buffer.insert(buffer.end(), u.string, u.string + u.length);
+      } else {
+        one_char(unescaped[i]);
+      }
+    } // switch
+  }   // for
+  one_char('\"');
+}
+
+inline void mini_formatter::clear() {
+  buffer.clear();
+}
+
+simdjson_inline std::string_view mini_formatter::str() const {
+  return std::string_view(buffer.data(), buffer.size());
+}
+
+
+/***
+ * String building code.
+ **/
+
+template <class serializer>
+inline void string_builder<serializer>::append(simdjson::dom::element value) {
+  // using tape_type = simdjson::internal::tape_type;
+  size_t depth = 0;
+  constexpr size_t MAX_DEPTH = 16;
+  bool is_object[MAX_DEPTH];
+  is_object[0] = false;
+  bool after_value = false;
+
+  internal::tape_ref iter(value.tape);
+  do {
+    // print commas after each value
+    if (after_value) {
+      format.comma();
+    }
+    // If we are in an object, print the next key and :, and skip to the next
+    // value.
+    if (is_object[depth]) {
+      format.key(iter.get_string_view());
+      iter.json_index++;
+    }
+    switch (iter.tape_ref_type()) {
+
+    // Arrays
+    case tape_type::START_ARRAY: {
+      // If we're too deep, we need to recurse to go deeper.
+      depth++;
+      if (simdjson_unlikely(depth >= MAX_DEPTH)) {
+        append(simdjson::dom::array(iter));
+        iter.json_index = iter.matching_brace_index() - 1; // Jump to the ]
+        depth--;
+        break;
+      }
+
+      // Output start [
+      format.start_array();
+      iter.json_index++;
+
+      // Handle empty [] (we don't want to come back around and print commas)
+      if (iter.tape_ref_type() == tape_type::END_ARRAY) {
+        format.end_array();
+        depth--;
+        break;
+      }
+
+      is_object[depth] = false;
+      after_value = false;
+      continue;
+    }
+
+    // Objects
+    case tape_type::START_OBJECT: {
+      // If we're too deep, we need to recurse to go deeper.
+      depth++;
+      if (simdjson_unlikely(depth >= MAX_DEPTH)) {
+        append(simdjson::dom::object(iter));
+        iter.json_index = iter.matching_brace_index() - 1; // Jump to the }
+        depth--;
+        break;
+      }
+
+      // Output start {
+      format.start_object();
+      iter.json_index++;
+
+      // Handle empty {} (we don't want to come back around and print commas)
+      if (iter.tape_ref_type() == tape_type::END_OBJECT) {
+        format.end_object();
+        depth--;
+        break;
+      }
+
+      is_object[depth] = true;
+      after_value = false;
+      continue;
+    }
+
+    // Scalars
+    case tape_type::STRING:
+      format.string(iter.get_string_view());
+      break;
+    case tape_type::INT64:
+      format.number(iter.next_tape_value<int64_t>());
+      iter.json_index++; // numbers take up 2 spots, so we need to increment
+                         // extra
+      break;
+    case tape_type::UINT64:
+      format.number(iter.next_tape_value<uint64_t>());
+      iter.json_index++; // numbers take up 2 spots, so we need to increment
+                         // extra
+      break;
+    case tape_type::DOUBLE:
+      format.number(iter.next_tape_value<double>());
+      iter.json_index++; // numbers take up 2 spots, so we need to increment
+                         // extra
+      break;
+    case tape_type::TRUE_VALUE:
+      format.true_atom();
+      break;
+    case tape_type::FALSE_VALUE:
+      format.false_atom();
+      break;
+    case tape_type::NULL_VALUE:
+      format.null_atom();
+      break;
+
+    // These are impossible
+    case tape_type::END_ARRAY:
+    case tape_type::END_OBJECT:
+    case tape_type::ROOT:
+      SIMDJSON_UNREACHABLE();
+    }
+    iter.json_index++;
+    after_value = true;
+
+    // Handle multiple ends in a row
+    while (depth != 0 && (iter.tape_ref_type() == tape_type::END_ARRAY ||
+                          iter.tape_ref_type() == tape_type::END_OBJECT)) {
+      if (iter.tape_ref_type() == tape_type::END_ARRAY) {
+        format.end_array();
+      } else {
+        format.end_object();
+      }
+      depth--;
+      iter.json_index++;
+    }
+
+    // Stop when we're at depth 0
+  } while (depth != 0);
+}
+
+template <class serializer>
+inline void string_builder<serializer>::append(simdjson::dom::object value) {
+  format.start_object();
+  auto pair = value.begin();
+  auto end = value.end();
+  if (pair != end) {
+    append(*pair);
+    for (++pair; pair != end; ++pair) {
+      format.comma();
+      append(*pair);
+    }
+  }
+  format.end_object();
+}
+
+template <class serializer>
+inline void string_builder<serializer>::append(simdjson::dom::array value) {
+  format.start_array();
+  auto iter = value.begin();
+  auto end = value.end();
+  if (iter != end) {
+    append(*iter);
+    for (++iter; iter != end; ++iter) {
+      format.comma();
+      append(*iter);
+    }
+  }
+  format.end_array();
+}
+
+template <class serializer>
+simdjson_inline void string_builder<serializer>::append(simdjson::dom::key_value_pair kv) {
+  format.key(kv.key);
+  append(kv.value);
+}
+
+template <class serializer>
+simdjson_inline void string_builder<serializer>::clear() {
+  format.clear();
+}
+
+template <class serializer>
+simdjson_inline std::string_view string_builder<serializer>::str() const {
+  return format.str();
+}
+
+
+} // namespace internal
+} // namespace simdjson
+
+#endif
+/* end file include/simdjson/dom/serialization-inl.h */
+
+SIMDJSON_POP_DISABLE_WARNINGS
+
+#endif // SIMDJSON_DOM_H
+/* end file include/simdjson/dom.h */
+/* begin file include/simdjson/builtin.h */
+#ifndef SIMDJSON_BUILTIN_H
+#define SIMDJSON_BUILTIN_H
+
+/* begin file include/simdjson/implementations.h */
+#ifndef SIMDJSON_IMPLEMENTATIONS_H
+#define SIMDJSON_IMPLEMENTATIONS_H
+
+/* begin file include/simdjson/implementation-base.h */
+#ifndef SIMDJSON_IMPLEMENTATION_BASE_H
+#define SIMDJSON_IMPLEMENTATION_BASE_H
+
+/**
+ * @file
+ *
+ * Includes common stuff needed for implementations.
+ */
+
+
+// Implementation-internal files (must be included before the implementations themselves, to keep
+// amalgamation working--otherwise, the first time a file is included, it might be put inside the
+// #ifdef SIMDJSON_IMPLEMENTATION_ARM64/FALLBACK/etc., which means the other implementations can't
+// compile unless that implementation is turned on).
+/* begin file include/simdjson/internal/jsoncharutils_tables.h */
+#ifndef SIMDJSON_INTERNAL_JSONCHARUTILS_TABLES_H
+#define SIMDJSON_INTERNAL_JSONCHARUTILS_TABLES_H
+
+
+#ifdef JSON_TEST_STRINGS
+void found_string(const uint8_t *buf, const uint8_t *parsed_begin,
+                  const uint8_t *parsed_end);
+void found_bad_string(const uint8_t *buf);
+#endif
+
+namespace simdjson {
+namespace internal {
+// structural chars here are
+// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c (and NULL)
+// we are also interested in the four whitespace characters
+// space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d
+
+extern SIMDJSON_DLLIMPORTEXPORT const bool structural_or_whitespace_negated[256];
+extern SIMDJSON_DLLIMPORTEXPORT const bool structural_or_whitespace[256];
+extern SIMDJSON_DLLIMPORTEXPORT const uint32_t digit_to_val32[886];
+
+} // namespace internal
+} // namespace simdjson
+
+#endif // SIMDJSON_INTERNAL_JSONCHARUTILS_TABLES_H
+/* end file include/simdjson/internal/jsoncharutils_tables.h */
+/* begin file include/simdjson/internal/numberparsing_tables.h */
+#ifndef SIMDJSON_INTERNAL_NUMBERPARSING_TABLES_H
+#define SIMDJSON_INTERNAL_NUMBERPARSING_TABLES_H
+
+
+namespace simdjson {
+namespace internal {
+/**
+ * The smallest non-zero float (binary64) is 2^-1074.
+ * We take as input numbers of the form w x 10^q where w < 2^64.
+ * We have that w * 10^-343  <  2^(64-344) 5^-343 < 2^-1076.
+ * However, we have that
+ * (2^64-1) * 10^-342 =  (2^64-1) * 2^-342 * 5^-342 > 2^-1074.
+ * Thus it is possible for a number of the form w * 10^-342 where
+ * w is a 64-bit value to be a non-zero floating-point number.
+ *********
+ * Any number of form w * 10^309 where w>= 1 is going to be
+ * infinite in binary64 so we never need to worry about powers
+ * of 5 greater than 308.
+ */
+constexpr int smallest_power = -342;
+constexpr int largest_power = 308;
+
+/**
+ * Represents a 128-bit value.
+ * low: least significant 64 bits.
+ * high: most significant 64 bits.
+ */
+struct value128 {
+  uint64_t low;
+  uint64_t high;
+};
+
+
+// Precomputed powers of ten from 10^0 to 10^22. These
+// can be represented exactly using the double type.
+extern SIMDJSON_DLLIMPORTEXPORT const double power_of_ten[];
+
+
+/**
+ * When mapping numbers from decimal to binary,
+ * we go from w * 10^q to m * 2^p but we have
+ * 10^q = 5^q * 2^q, so effectively
+ * we are trying to match
+ * w * 2^q * 5^q to m * 2^p. Thus the powers of two
+ * are not a concern since they can be represented
+ * exactly using the binary notation, only the powers of five
+ * affect the binary significand.
+ */
+
+
+// The truncated powers of five from 5^-342 all the way to 5^308
+// The mantissa is truncated to 128 bits, and
+// never rounded up. Uses about 10KB.
+extern SIMDJSON_DLLIMPORTEXPORT const uint64_t power_of_five_128[];
+} // namespace internal
+} // namespace simdjson
+
+#endif // SIMDJSON_INTERNAL_NUMBERPARSING_TABLES_H
+/* end file include/simdjson/internal/numberparsing_tables.h */
+/* begin file include/simdjson/internal/simdprune_tables.h */
+#ifndef SIMDJSON_INTERNAL_SIMDPRUNE_TABLES_H
+#define SIMDJSON_INTERNAL_SIMDPRUNE_TABLES_H
+
+#include <cstdint>
+
+namespace simdjson { // table modified and copied from
+namespace internal { // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetTable
+
+extern SIMDJSON_DLLIMPORTEXPORT const unsigned char BitsSetTable256mul2[256];
+
+extern SIMDJSON_DLLIMPORTEXPORT const uint8_t pshufb_combine_table[272];
+
+// 256 * 8 bytes = 2kB, easily fits in cache.
+extern SIMDJSON_DLLIMPORTEXPORT const uint64_t thintable_epi8[256];
+
+} // namespace internal
+} // namespace simdjson
+
+#endif // SIMDJSON_INTERNAL_SIMDPRUNE_TABLES_H
+/* end file include/simdjson/internal/simdprune_tables.h */
+
+#endif // SIMDJSON_IMPLEMENTATION_BASE_H
+/* end file include/simdjson/implementation-base.h */
+
+//
+// First, figure out which implementations can be run. Doing it here makes it so we don't have to worry about the order
+// in which we include them.
+//
+
+#ifndef SIMDJSON_IMPLEMENTATION_ARM64
+#define SIMDJSON_IMPLEMENTATION_ARM64 (SIMDJSON_IS_ARM64)
+#endif
+#define SIMDJSON_CAN_ALWAYS_RUN_ARM64 SIMDJSON_IMPLEMENTATION_ARM64 && SIMDJSON_IS_ARM64
+
+#ifdef __has_include
+// How do we detect that a compiler supports vbmi2?
+// For sure if the following header is found, we are ok?
+#if __has_include(<avx512vbmi2intrin.h>)
+#define SIMDJSON_COMPILER_SUPPORTS_VBMI2 1
+#endif
+#endif
+
+#ifdef _MSC_VER
+#if _MSC_VER >= 1920
+// Visual Studio 2019 and up support VBMI2 under x64 even if the header
+// avx512vbmi2intrin.h is not found.
+#define SIMDJSON_COMPILER_SUPPORTS_VBMI2 1
+#endif
+#endif
+
+// By default, we allow AVX512.
+#ifndef SIMDJSON_AVX512_ALLOWED
+#define SIMDJSON_AVX512_ALLOWED 1
+#endif
+
+// Default Icelake to on if this is x86-64. Even if we're not compiled for it, it could be selected
+// at runtime.
+#ifndef SIMDJSON_IMPLEMENTATION_ICELAKE
+#define SIMDJSON_IMPLEMENTATION_ICELAKE ((SIMDJSON_IS_X86_64) && (SIMDJSON_AVX512_ALLOWED) && (SIMDJSON_COMPILER_SUPPORTS_VBMI2))
+#endif
+
+#ifdef _MSC_VER
+// To see why  (__BMI__) && (__PCLMUL__) && (__LZCNT__) are not part of this next line, see
+// https://github.com/simdjson/simdjson/issues/1247
+#define SIMDJSON_CAN_ALWAYS_RUN_ICELAKE ((SIMDJSON_IMPLEMENTATION_ICELAKE) && (__AVX2__) && (__AVX512F__) && (__AVX512DQ__) && (__AVX512CD__) && (__AVX512BW__) && (__AVX512VL__) && (__AVX512VBMI2__))
+#else
+#define SIMDJSON_CAN_ALWAYS_RUN_ICELAKE ((SIMDJSON_IMPLEMENTATION_ICELAKE) && (__AVX2__) && (__BMI__) && (__PCLMUL__) && (__LZCNT__) && (__AVX512F__) && (__AVX512DQ__) && (__AVX512CD__) && (__AVX512BW__) && (__AVX512VL__) && (__AVX512VBMI2__))
+#endif
+
+// Default Haswell to on if this is x86-64. Even if we're not compiled for it, it could be selected
+// at runtime.
+#ifndef SIMDJSON_IMPLEMENTATION_HASWELL
+#define SIMDJSON_IMPLEMENTATION_HASWELL SIMDJSON_IS_X86_64
+#endif
+#ifdef _MSC_VER
+// To see why  (__BMI__) && (__PCLMUL__) && (__LZCNT__) are not part of this next line, see
+// https://github.com/simdjson/simdjson/issues/1247
+#define SIMDJSON_CAN_ALWAYS_RUN_HASWELL ((SIMDJSON_IMPLEMENTATION_HASWELL) && (SIMDJSON_IS_X86_64) && (__AVX2__))
+#else
+#define SIMDJSON_CAN_ALWAYS_RUN_HASWELL ((SIMDJSON_IMPLEMENTATION_HASWELL) && (SIMDJSON_IS_X86_64) && (__AVX2__) && (__BMI__) && (__PCLMUL__) && (__LZCNT__))
+#endif
+
+// Default Westmere to on if this is x86-64. Note that the macro SIMDJSON_REQUIRES_HASWELL appears unused.
+#ifndef SIMDJSON_IMPLEMENTATION_WESTMERE
+#define SIMDJSON_IMPLEMENTATION_WESTMERE (SIMDJSON_IS_X86_64 && !SIMDJSON_REQUIRES_HASWELL)
+#endif
+#define SIMDJSON_CAN_ALWAYS_RUN_WESTMERE (SIMDJSON_IMPLEMENTATION_WESTMERE && SIMDJSON_IS_X86_64 && __SSE4_2__ && __PCLMUL__)
+
+#ifndef SIMDJSON_IMPLEMENTATION_PPC64
+#define SIMDJSON_IMPLEMENTATION_PPC64 (SIMDJSON_IS_PPC64)
+#endif
+#define SIMDJSON_CAN_ALWAYS_RUN_PPC64 SIMDJSON_IMPLEMENTATION_PPC64 && SIMDJSON_IS_PPC64
+
+// Default Fallback to on unless a builtin implementation has already been selected.
+#ifndef SIMDJSON_IMPLEMENTATION_FALLBACK
+#define SIMDJSON_IMPLEMENTATION_FALLBACK 1 // (!SIMDJSON_CAN_ALWAYS_RUN_ARM64 && !SIMDJSON_CAN_ALWAYS_RUN_HASWELL && !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE && !SIMDJSON_CAN_ALWAYS_RUN_PPC64)
+#endif
+#define SIMDJSON_CAN_ALWAYS_RUN_FALLBACK SIMDJSON_IMPLEMENTATION_FALLBACK
+
+SIMDJSON_PUSH_DISABLE_WARNINGS
+SIMDJSON_DISABLE_UNDESIRED_WARNINGS
+
+// Implementations
+/* begin file include/simdjson/arm64.h */
+#ifndef SIMDJSON_ARM64_H
+#define SIMDJSON_ARM64_H
+
+
+#if SIMDJSON_IMPLEMENTATION_ARM64
+
+namespace simdjson {
+/**
+ * Implementation for NEON (ARMv8).
+ */
+namespace arm64 {
+} // namespace arm64
+} // namespace simdjson
+
+/* begin file include/simdjson/arm64/implementation.h */
+#ifndef SIMDJSON_ARM64_IMPLEMENTATION_H
+#define SIMDJSON_ARM64_IMPLEMENTATION_H
+
+
+namespace simdjson {
+namespace arm64 {
+
+namespace {
+using namespace simdjson;
+using namespace simdjson::dom;
+}
+
+class implementation final : public simdjson::implementation {
+public:
+  simdjson_inline implementation() : simdjson::implementation("arm64", "ARM NEON", internal::instruction_set::NEON) {}
+  simdjson_warn_unused error_code create_dom_parser_implementation(
+    size_t capacity,
+    size_t max_length,
+    std::unique_ptr<internal::dom_parser_implementation>& dst
+  ) const noexcept final;
+  simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final;
+  simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final;
+};
+
+} // namespace arm64
+} // namespace simdjson
+
+#endif // SIMDJSON_ARM64_IMPLEMENTATION_H
+/* end file include/simdjson/arm64/implementation.h */
+
+/* begin file include/simdjson/arm64/begin.h */
+// redefining SIMDJSON_IMPLEMENTATION to "arm64"
+// #define SIMDJSON_IMPLEMENTATION arm64
+/* end file include/simdjson/arm64/begin.h */
+
+// Declarations
+/* begin file include/simdjson/generic/dom_parser_implementation.h */
+
+namespace simdjson {
+namespace arm64 {
+
+// expectation: sizeof(open_container) = 64/8.
+struct open_container {
+  uint32_t tape_index; // where, on the tape, does the scope ([,{) begins
+  uint32_t count; // how many elements in the scope
+}; // struct open_container
+
+static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits");
+
+class dom_parser_implementation final : public internal::dom_parser_implementation {
+public:
+  /** Tape location of each open { or [ */
+  std::unique_ptr<open_container[]> open_containers{};
+  /** Whether each open container is a [ or { */
+  std::unique_ptr<bool[]> is_array{};
+  /** Buffer passed to stage 1 */
+  const uint8_t *buf{};
+  /** Length passed to stage 1 */
+  size_t len{0};
+  /** Document passed to stage 2 */
+  dom::document *doc{};
+
+  inline dom_parser_implementation() noexcept;
+  inline dom_parser_implementation(dom_parser_implementation &&other) noexcept;
+  inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept;
+  dom_parser_implementation(const dom_parser_implementation &) = delete;
+  dom_parser_implementation &operator=(const dom_parser_implementation &) = delete;
+
+  simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final;
+  simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final;
+  simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final;
+  simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final;
+  simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst) const noexcept final;
+  inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final;
+  inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final;
+private:
+  simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity);
+
+};
+
+} // namespace arm64
+} // namespace simdjson
+
+namespace simdjson {
+namespace arm64 {
+
+inline dom_parser_implementation::dom_parser_implementation() noexcept = default;
+inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default;
+inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default;
+
+// Leaving these here so they can be inlined if so desired
+inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept {
+  if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; }
+  // Stage 1 index output
+  size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7;
+  structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] );
+  if (!structural_indexes) { _capacity = 0; return MEMALLOC; }
+  structural_indexes[0] = 0;
+  n_structural_indexes = 0;
+
+  _capacity = capacity;
+  return SUCCESS;
+}
+
+inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept {
+  // Stage 2 stacks
+  open_containers.reset(new (std::nothrow) open_container[max_depth]);
+  is_array.reset(new (std::nothrow) bool[max_depth]);
+  if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; }
+
+  _max_depth = max_depth;
+  return SUCCESS;
+}
+
+} // namespace arm64
+} // namespace simdjson
+/* end file include/simdjson/generic/dom_parser_implementation.h */
+/* begin file include/simdjson/arm64/intrinsics.h */
+#ifndef SIMDJSON_ARM64_INTRINSICS_H
+#define SIMDJSON_ARM64_INTRINSICS_H
+
+// This should be the correct header whether
+// you use visual studio or other compilers.
+#include <arm_neon.h>
+
+static_assert(sizeof(uint8x16_t) <= simdjson::SIMDJSON_PADDING, "insufficient padding for arm64");
+
+#endif //  SIMDJSON_ARM64_INTRINSICS_H
+/* end file include/simdjson/arm64/intrinsics.h */
+/* begin file include/simdjson/arm64/bitmanipulation.h */
+#ifndef SIMDJSON_ARM64_BITMANIPULATION_H
+#define SIMDJSON_ARM64_BITMANIPULATION_H
+
+namespace simdjson {
+namespace arm64 {
+namespace {
+
+// We sometimes call trailing_zero on inputs that are zero,
+// but the algorithms do not end up using the returned value.
+// Sadly, sanitizers are not smart enough to figure it out.
+SIMDJSON_NO_SANITIZE_UNDEFINED
+simdjson_inline int trailing_zeroes(uint64_t input_num) {
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+  unsigned long ret;
+  // Search the mask data from least significant bit (LSB)
+  // to the most significant bit (MSB) for a set bit (1).
+  _BitScanForward64(&ret, input_num);
+  return (int)ret;
+#else // SIMDJSON_REGULAR_VISUAL_STUDIO
+  return __builtin_ctzll(input_num);
+#endif // SIMDJSON_REGULAR_VISUAL_STUDIO
+}
+
+/* result might be undefined when input_num is zero */
+simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) {
+  return input_num & (input_num-1);
+}
+
+/* result might be undefined when input_num is zero */
+simdjson_inline int leading_zeroes(uint64_t input_num) {
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+  unsigned long leading_zero = 0;
+  // Search the mask data from most significant bit (MSB)
+  // to least significant bit (LSB) for a set bit (1).
+  if (_BitScanReverse64(&leading_zero, input_num))
+    return (int)(63 - leading_zero);
+  else
+    return 64;
+#else
+  return __builtin_clzll(input_num);
+#endif// SIMDJSON_REGULAR_VISUAL_STUDIO
+}
+
+/* result might be undefined when input_num is zero */
+simdjson_inline int count_ones(uint64_t input_num) {
+   return vaddv_u8(vcnt_u8(vcreate_u8(input_num)));
+}
+
+
+#if defined(__GNUC__) // catches clang and gcc
+/**
+ * ARM has a fast 64-bit "bit reversal function" that is handy. However,
+ * it is not generally available as an intrinsic function under Visual
+ * Studio (though this might be changing). Even under clang/gcc, we
+ * apparently need to invoke inline assembly.
+ */
+/*
+ * We use SIMDJSON_PREFER_REVERSE_BITS as a hint that algorithms that
+ * work well with bit reversal may use it.
+ */
+#define SIMDJSON_PREFER_REVERSE_BITS 1
+
+/* reverse the bits */
+simdjson_inline uint64_t reverse_bits(uint64_t input_num) {
+  uint64_t rev_bits;
+  __asm("rbit %0, %1" : "=r"(rev_bits) : "r"(input_num));
+  return rev_bits;
+}
+
+/**
+ * Flips bit at index 63 - lz. Thus if you have 'leading_zeroes' leading zeroes,
+ * then this will set to zero the leading bit. It is possible for leading_zeroes to be
+ * greating or equal to 63 in which case we trigger undefined behavior, but the output
+ * of such undefined behavior is never used.
+ **/
+SIMDJSON_NO_SANITIZE_UNDEFINED
+simdjson_inline uint64_t zero_leading_bit(uint64_t rev_bits, int leading_zeroes) {
+  return rev_bits ^ (uint64_t(0x8000000000000000) >> leading_zeroes);
+}
+
+#endif
+
+simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+  *result = value1 + value2;
+  return *result < value1;
+#else
+  return __builtin_uaddll_overflow(value1, value2,
+                                   reinterpret_cast<unsigned long long *>(result));
+#endif
+}
+
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdjson
+
+#endif // SIMDJSON_ARM64_BITMANIPULATION_H
+/* end file include/simdjson/arm64/bitmanipulation.h */
+/* begin file include/simdjson/arm64/bitmask.h */
+#ifndef SIMDJSON_ARM64_BITMASK_H
+#define SIMDJSON_ARM64_BITMASK_H
+
+namespace simdjson {
+namespace arm64 {
+namespace {
+
+//
+// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered.
+//
+// For example, prefix_xor(00100100) == 00011100
+//
+simdjson_inline uint64_t prefix_xor(uint64_t bitmask) {
+  /////////////
+  // We could do this with PMULL, but it is apparently slow.
+  //
+  //#ifdef __ARM_FEATURE_CRYPTO // some ARM processors lack this extension
+  //return vmull_p64(-1ULL, bitmask);
+  //#else
+  // Analysis by @sebpop:
+  // When diffing the assembly for src/stage1_find_marks.cpp I see that the eors are all spread out
+  // in between other vector code, so effectively the extra cycles of the sequence do not matter
+  // because the GPR units are idle otherwise and the critical path is on the FP side.
+  // Also the PMULL requires two extra fmovs: GPR->FP (3 cycles in N1, 5 cycles in A72 )
+  // and FP->GPR (2 cycles on N1 and 5 cycles on A72.)
+  ///////////
+  bitmask ^= bitmask << 1;
+  bitmask ^= bitmask << 2;
+  bitmask ^= bitmask << 4;
+  bitmask ^= bitmask << 8;
+  bitmask ^= bitmask << 16;
+  bitmask ^= bitmask << 32;
+  return bitmask;
+}
+
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdjson
+
+#endif
+/* end file include/simdjson/arm64/bitmask.h */
+/* begin file include/simdjson/arm64/simd.h */
+#ifndef SIMDJSON_ARM64_SIMD_H
+#define SIMDJSON_ARM64_SIMD_H
+
+#include <type_traits>
+
+
+namespace simdjson {
+namespace arm64 {
+namespace {
+namespace simd {
+
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+namespace {
+// Start of private section with Visual Studio workaround
+
+
+/**
+ * make_uint8x16_t initializes a SIMD register (uint8x16_t).
+ * This is needed because, incredibly, the syntax uint8x16_t x = {1,2,3...}
+ * is not recognized under Visual Studio! This is a workaround.
+ * Using a std::initializer_list<uint8_t>  as a parameter resulted in
+ * inefficient code. With the current approach, if the parameters are
+ * compile-time constants,
+ * GNU GCC compiles it to ldr, the same as uint8x16_t x = {1,2,3...}.
+ * You should not use this function except for compile-time constants:
+ * it is not efficient.
+ */
+simdjson_inline uint8x16_t make_uint8x16_t(uint8_t x1,  uint8_t x2,  uint8_t x3,  uint8_t x4,
+                                         uint8_t x5,  uint8_t x6,  uint8_t x7,  uint8_t x8,
+                                         uint8_t x9,  uint8_t x10, uint8_t x11, uint8_t x12,
+                                         uint8_t x13, uint8_t x14, uint8_t x15, uint8_t x16) {
+  // Doing a load like so end ups generating worse code.
+  // uint8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8,
+  //                     x9, x10,x11,x12,x13,x14,x15,x16};
+  // return vld1q_u8(array);
+  uint8x16_t x{};
+  // incredibly, Visual Studio does not allow x[0] = x1
+  x = vsetq_lane_u8(x1, x, 0);
+  x = vsetq_lane_u8(x2, x, 1);
+  x = vsetq_lane_u8(x3, x, 2);
+  x = vsetq_lane_u8(x4, x, 3);
+  x = vsetq_lane_u8(x5, x, 4);
+  x = vsetq_lane_u8(x6, x, 5);
+  x = vsetq_lane_u8(x7, x, 6);
+  x = vsetq_lane_u8(x8, x, 7);
+  x = vsetq_lane_u8(x9, x, 8);
+  x = vsetq_lane_u8(x10, x, 9);
+  x = vsetq_lane_u8(x11, x, 10);
+  x = vsetq_lane_u8(x12, x, 11);
+  x = vsetq_lane_u8(x13, x, 12);
+  x = vsetq_lane_u8(x14, x, 13);
+  x = vsetq_lane_u8(x15, x, 14);
+  x = vsetq_lane_u8(x16, x, 15);
+  return x;
+}
+
+simdjson_inline uint8x8_t make_uint8x8_t(uint8_t x1,  uint8_t x2,  uint8_t x3,  uint8_t x4,
+                                         uint8_t x5,  uint8_t x6,  uint8_t x7,  uint8_t x8) {
+  uint8x8_t x{};
+  x = vset_lane_u8(x1, x, 0);
+  x = vset_lane_u8(x2, x, 1);
+  x = vset_lane_u8(x3, x, 2);
+  x = vset_lane_u8(x4, x, 3);
+  x = vset_lane_u8(x5, x, 4);
+  x = vset_lane_u8(x6, x, 5);
+  x = vset_lane_u8(x7, x, 6);
+  x = vset_lane_u8(x8, x, 7);
+  return x;
+}
+
+// We have to do the same work for make_int8x16_t
+simdjson_inline int8x16_t make_int8x16_t(int8_t x1,  int8_t x2,  int8_t x3,  int8_t x4,
+                                       int8_t x5,  int8_t x6,  int8_t x7,  int8_t x8,
+                                       int8_t x9,  int8_t x10, int8_t x11, int8_t x12,
+                                       int8_t x13, int8_t x14, int8_t x15, int8_t x16) {
+  // Doing a load like so end ups generating worse code.
+  // int8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8,
+  //                     x9, x10,x11,x12,x13,x14,x15,x16};
+  // return vld1q_s8(array);
+  int8x16_t x{};
+  // incredibly, Visual Studio does not allow x[0] = x1
+  x = vsetq_lane_s8(x1, x, 0);
+  x = vsetq_lane_s8(x2, x, 1);
+  x = vsetq_lane_s8(x3, x, 2);
+  x = vsetq_lane_s8(x4, x, 3);
+  x = vsetq_lane_s8(x5, x, 4);
+  x = vsetq_lane_s8(x6, x, 5);
+  x = vsetq_lane_s8(x7, x, 6);
+  x = vsetq_lane_s8(x8, x, 7);
+  x = vsetq_lane_s8(x9, x, 8);
+  x = vsetq_lane_s8(x10, x, 9);
+  x = vsetq_lane_s8(x11, x, 10);
+  x = vsetq_lane_s8(x12, x, 11);
+  x = vsetq_lane_s8(x13, x, 12);
+  x = vsetq_lane_s8(x14, x, 13);
+  x = vsetq_lane_s8(x15, x, 14);
+  x = vsetq_lane_s8(x16, x, 15);
+  return x;
+}
+
+// End of private section with Visual Studio workaround
+} // namespace
+#endif // SIMDJSON_REGULAR_VISUAL_STUDIO
+
+
+  template<typename T>
+  struct simd8;
+
+  //
+  // Base class of simd8<uint8_t> and simd8<bool>, both of which use uint8x16_t internally.
+  //
+  template<typename T, typename Mask=simd8<bool>>
+  struct base_u8 {
+    uint8x16_t value;
+    static const int SIZE = sizeof(value);
+
+    // Conversion from/to SIMD register
+    simdjson_inline base_u8(const uint8x16_t _value) : value(_value) {}
+    simdjson_inline operator const uint8x16_t&() const { return this->value; }
+    simdjson_inline operator uint8x16_t&() { return this->value; }
+
+    // Bit operations
+    simdjson_inline simd8<T> operator|(const simd8<T> other) const { return vorrq_u8(*this, other); }
+    simdjson_inline simd8<T> operator&(const simd8<T> other) const { return vandq_u8(*this, other); }
+    simdjson_inline simd8<T> operator^(const simd8<T> other) const { return veorq_u8(*this, other); }
+    simdjson_inline simd8<T> bit_andnot(const simd8<T> other) const { return vbicq_u8(*this, other); }
+    simdjson_inline simd8<T> operator~() const { return *this ^ 0xFFu; }
+    simdjson_inline simd8<T>& operator|=(const simd8<T> other) { auto this_cast = static_cast<simd8<T>*>(this); *this_cast = *this_cast | other; return *this_cast; }
+    simdjson_inline simd8<T>& operator&=(const simd8<T> other) { auto this_cast = static_cast<simd8<T>*>(this); *this_cast = *this_cast & other; return *this_cast; }
+    simdjson_inline simd8<T>& operator^=(const simd8<T> other) { auto this_cast = static_cast<simd8<T>*>(this); *this_cast = *this_cast ^ other; return *this_cast; }
+
+    friend simdjson_inline Mask operator==(const simd8<T> lhs, const simd8<T> rhs) { return vceqq_u8(lhs, rhs); }
+
+    template<int N=1>
+    simdjson_inline simd8<T> prev(const simd8<T> prev_chunk) const {
+      return vextq_u8(prev_chunk, *this, 16 - N);
+    }
+  };
+
+  // SIMD byte mask type (returned by things like eq and gt)
+  template<>
+  struct simd8<bool>: base_u8<bool> {
+    typedef uint16_t bitmask_t;
+    typedef uint32_t bitmask2_t;
+
+    static simdjson_inline simd8<bool> splat(bool _value) { return vmovq_n_u8(uint8_t(-(!!_value))); }
+
+    simdjson_inline simd8(const uint8x16_t _value) : base_u8<bool>(_value) {}
+    // False constructor
+    simdjson_inline simd8() : simd8(vdupq_n_u8(0)) {}
+    // Splat constructor
+    simdjson_inline simd8(bool _value) : simd8(splat(_value)) {}
+
+    // We return uint32_t instead of uint16_t because that seems to be more efficient for most
+    // purposes (cutting it down to uint16_t costs performance in some compilers).
+    simdjson_inline uint32_t to_bitmask() const {
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+      const uint8x16_t bit_mask =  make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
+                                                   0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80);
+#else
+      const uint8x16_t bit_mask =  {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
+                                    0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80};
+#endif
+      auto minput = *this & bit_mask;
+      uint8x16_t tmp = vpaddq_u8(minput, minput);
+      tmp = vpaddq_u8(tmp, tmp);
+      tmp = vpaddq_u8(tmp, tmp);
+      return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0);
+    }
+    simdjson_inline bool any() const { return vmaxvq_u8(*this) != 0; }
+  };
+
+  // Unsigned bytes
+  template<>
+  struct simd8<uint8_t>: base_u8<uint8_t> {
+    static simdjson_inline uint8x16_t splat(uint8_t _value) { return vmovq_n_u8(_value); }
+    static simdjson_inline uint8x16_t zero() { return vdupq_n_u8(0); }
+    static simdjson_inline uint8x16_t load(const uint8_t* values) { return vld1q_u8(values); }
+
+    simdjson_inline simd8(const uint8x16_t _value) : base_u8<uint8_t>(_value) {}
+    // Zero constructor
+    simdjson_inline simd8() : simd8(zero()) {}
+    // Array constructor
+    simdjson_inline simd8(const uint8_t values[16]) : simd8(load(values)) {}
+    // Splat constructor
+    simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {}
+    // Member-by-member initialization
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+    simdjson_inline simd8(
+      uint8_t v0,  uint8_t v1,  uint8_t v2,  uint8_t v3,  uint8_t v4,  uint8_t v5,  uint8_t v6,  uint8_t v7,
+      uint8_t v8,  uint8_t v9,  uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15
+    ) : simd8(make_uint8x16_t(
+      v0, v1, v2, v3, v4, v5, v6, v7,
+      v8, v9, v10,v11,v12,v13,v14,v15
+    )) {}
+#else
+    simdjson_inline simd8(
+      uint8_t v0,  uint8_t v1,  uint8_t v2,  uint8_t v3,  uint8_t v4,  uint8_t v5,  uint8_t v6,  uint8_t v7,
+      uint8_t v8,  uint8_t v9,  uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15
+    ) : simd8(uint8x16_t{
+      v0, v1, v2, v3, v4, v5, v6, v7,
+      v8, v9, v10,v11,v12,v13,v14,v15
+    }) {}
+#endif
+
+    // Repeat 16 values as many times as necessary (usually for lookup tables)
+    simdjson_inline static simd8<uint8_t> repeat_16(
+      uint8_t v0,  uint8_t v1,  uint8_t v2,  uint8_t v3,  uint8_t v4,  uint8_t v5,  uint8_t v6,  uint8_t v7,
+      uint8_t v8,  uint8_t v9,  uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15
+    ) {
+      return simd8<uint8_t>(
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15
+      );
+    }
+
+    // Store to array
+    simdjson_inline void store(uint8_t dst[16]) const { return vst1q_u8(dst, *this); }
+
+    // Saturated math
+    simdjson_inline simd8<uint8_t> saturating_add(const simd8<uint8_t> other) const { return vqaddq_u8(*this, other); }
+    simdjson_inline simd8<uint8_t> saturating_sub(const simd8<uint8_t> other) const { return vqsubq_u8(*this, other); }
+
+    // Addition/subtraction are the same for signed and unsigned
+    simdjson_inline simd8<uint8_t> operator+(const simd8<uint8_t> other) const { return vaddq_u8(*this, other); }
+    simdjson_inline simd8<uint8_t> operator-(const simd8<uint8_t> other) const { return vsubq_u8(*this, other); }
+    simdjson_inline simd8<uint8_t>& operator+=(const simd8<uint8_t> other) { *this = *this + other; return *this; }
+    simdjson_inline simd8<uint8_t>& operator-=(const simd8<uint8_t> other) { *this = *this - other; return *this; }
+
+    // Order-specific operations
+    simdjson_inline uint8_t max_val() const { return vmaxvq_u8(*this); }
+    simdjson_inline uint8_t min_val() const { return vminvq_u8(*this); }
+    simdjson_inline simd8<uint8_t> max_val(const simd8<uint8_t> other) const { return vmaxq_u8(*this, other); }
+    simdjson_inline simd8<uint8_t> min_val(const simd8<uint8_t> other) const { return vminq_u8(*this, other); }
+    simdjson_inline simd8<bool> operator<=(const simd8<uint8_t> other) const { return vcleq_u8(*this, other); }
+    simdjson_inline simd8<bool> operator>=(const simd8<uint8_t> other) const { return vcgeq_u8(*this, other); }
+    simdjson_inline simd8<bool> operator<(const simd8<uint8_t> other) const { return vcltq_u8(*this, other); }
+    simdjson_inline simd8<bool> operator>(const simd8<uint8_t> other) const { return vcgtq_u8(*this, other); }
+    // Same as >, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's.
+    simdjson_inline simd8<uint8_t> gt_bits(const simd8<uint8_t> other) const { return simd8<uint8_t>(*this > other); }
+    // Same as <, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's.
+    simdjson_inline simd8<uint8_t> lt_bits(const simd8<uint8_t> other) const { return simd8<uint8_t>(*this < other); }
+
+    // Bit-specific operations
+    simdjson_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const { return vtstq_u8(*this, bits); }
+    simdjson_inline bool any_bits_set_anywhere() const { return this->max_val() != 0; }
+    simdjson_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const { return (*this & bits).any_bits_set_anywhere(); }
+    template<int N>
+    simdjson_inline simd8<uint8_t> shr() const { return vshrq_n_u8(*this, N); }
+    template<int N>
+    simdjson_inline simd8<uint8_t> shl() const { return vshlq_n_u8(*this, N); }
+
+    // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values)
+    template<typename L>
+    simdjson_inline simd8<L> lookup_16(simd8<L> lookup_table) const {
+      return lookup_table.apply_lookup_16_to(*this);
+    }
+
+
+    // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset).
+    // Passing a 0 value for mask would be equivalent to writing out every byte to output.
+    // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes
+    // get written.
+    // Design consideration: it seems like a function with the
+    // signature simd8<L> compress(uint16_t mask) would be
+    // sensible, but the AVX ISA makes this kind of approach difficult.
+    template<typename L>
+    simdjson_inline void compress(uint16_t mask, L * output) const {
+      using internal::thintable_epi8;
+      using internal::BitsSetTable256mul2;
+      using internal::pshufb_combine_table;
+      // this particular implementation was inspired by work done by @animetosho
+      // we do it in two steps, first 8 bytes and then second 8 bytes
+      uint8_t mask1 = uint8_t(mask); // least significant 8 bits
+      uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits
+      // next line just loads the 64-bit values thintable_epi8[mask1] and
+      // thintable_epi8[mask2] into a 128-bit register, using only
+      // two instructions on most compilers.
+      uint64x2_t shufmask64 = {thintable_epi8[mask1], thintable_epi8[mask2]};
+      uint8x16_t shufmask = vreinterpretq_u8_u64(shufmask64);
+      // we increment by 0x08 the second half of the mask
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+      uint8x16_t inc = make_uint8x16_t(0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08);
+#else
+      uint8x16_t inc = {0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08};
+#endif
+      shufmask = vaddq_u8(shufmask, inc);
+      // this is the version "nearly pruned"
+      uint8x16_t pruned = vqtbl1q_u8(*this, shufmask);
+      // we still need to put the two halves together.
+      // we compute the popcount of the first half:
+      int pop1 = BitsSetTable256mul2[mask1];
+      // then load the corresponding mask, what it does is to write
+      // only the first pop1 bytes from the first 8 bytes, and then
+      // it fills in with the bytes from the second 8 bytes + some filling
+      // at the end.
+      uint8x16_t compactmask = vld1q_u8(reinterpret_cast<const uint8_t *>(pshufb_combine_table + pop1 * 8));
+      uint8x16_t answer = vqtbl1q_u8(pruned, compactmask);
+      vst1q_u8(reinterpret_cast<uint8_t*>(output), answer);
+    }
+
+    // Copies all bytes corresponding to a 0 in the low half of the mask (interpreted as a
+    // bitset) to output1, then those corresponding to a 0 in the high half to output2.
+    template<typename L>
+    simdjson_inline void compress_halves(uint16_t mask, L *output1, L *output2) const {
+      using internal::thintable_epi8;
+      uint8_t mask1 = uint8_t(mask); // least significant 8 bits
+      uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits
+      uint8x8_t compactmask1 = vcreate_u8(thintable_epi8[mask1]);
+      uint8x8_t compactmask2 = vcreate_u8(thintable_epi8[mask2]);
+      // we increment by 0x08 the second half of the mask
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+      uint8x8_t inc = make_uint8x8_t(0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08);
+#else
+      uint8x8_t inc = {0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08};
+#endif
+      compactmask2 = vadd_u8(compactmask2, inc);
+      // store each result (with the second store possibly overlapping the first)
+      vst1_u8((uint8_t*)output1, vqtbl1_u8(*this, compactmask1));
+      vst1_u8((uint8_t*)output2, vqtbl1_u8(*this, compactmask2));
+    }
+
+    template<typename L>
+    simdjson_inline simd8<L> lookup_16(
+        L replace0,  L replace1,  L replace2,  L replace3,
+        L replace4,  L replace5,  L replace6,  L replace7,
+        L replace8,  L replace9,  L replace10, L replace11,
+        L replace12, L replace13, L replace14, L replace15) const {
+      return lookup_16(simd8<L>::repeat_16(
+        replace0,  replace1,  replace2,  replace3,
+        replace4,  replace5,  replace6,  replace7,
+        replace8,  replace9,  replace10, replace11,
+        replace12, replace13, replace14, replace15
+      ));
+    }
+
+    template<typename T>
+    simdjson_inline simd8<uint8_t> apply_lookup_16_to(const simd8<T> original) {
+      return vqtbl1q_u8(*this, simd8<uint8_t>(original));
+    }
+  };
+
+  // Signed bytes
+  template<>
+  struct simd8<int8_t> {
+    int8x16_t value;
+
+    static simdjson_inline simd8<int8_t> splat(int8_t _value) { return vmovq_n_s8(_value); }
+    static simdjson_inline simd8<int8_t> zero() { return vdupq_n_s8(0); }
+    static simdjson_inline simd8<int8_t> load(const int8_t values[16]) { return vld1q_s8(values); }
+
+    // Conversion from/to SIMD register
+    simdjson_inline simd8(const int8x16_t _value) : value{_value} {}
+    simdjson_inline operator const int8x16_t&() const { return this->value; }
+    simdjson_inline operator int8x16_t&() { return this->value; }
+
+    // Zero constructor
+    simdjson_inline simd8() : simd8(zero()) {}
+    // Splat constructor
+    simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {}
+    // Array constructor
+    simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {}
+    // Member-by-member initialization
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+    simdjson_inline simd8(
+      int8_t v0,  int8_t v1,  int8_t v2,  int8_t v3, int8_t v4,  int8_t v5,  int8_t v6,  int8_t v7,
+      int8_t v8,  int8_t v9,  int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15
+    ) : simd8(make_int8x16_t(
+      v0, v1, v2, v3, v4, v5, v6, v7,
+      v8, v9, v10,v11,v12,v13,v14,v15
+    )) {}
+#else
+    simdjson_inline simd8(
+      int8_t v0,  int8_t v1,  int8_t v2,  int8_t v3, int8_t v4,  int8_t v5,  int8_t v6,  int8_t v7,
+      int8_t v8,  int8_t v9,  int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15
+    ) : simd8(int8x16_t{
+      v0, v1, v2, v3, v4, v5, v6, v7,
+      v8, v9, v10,v11,v12,v13,v14,v15
+    }) {}
+#endif
+    // Repeat 16 values as many times as necessary (usually for lookup tables)
+    simdjson_inline static simd8<int8_t> repeat_16(
+      int8_t v0,  int8_t v1,  int8_t v2,  int8_t v3,  int8_t v4,  int8_t v5,  int8_t v6,  int8_t v7,
+      int8_t v8,  int8_t v9,  int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15
+    ) {
+      return simd8<int8_t>(
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15
+      );
+    }
+
+    // Store to array
+    simdjson_inline void store(int8_t dst[16]) const { return vst1q_s8(dst, *this); }
+
+    // Explicit conversion to/from unsigned
+    //
+    // Under Visual Studio/ARM64 uint8x16_t and int8x16_t are apparently the same type.
+    // In theory, we could check this occurrence with std::same_as and std::enabled_if but it is C++14
+    // and relatively ugly and hard to read.
+#ifndef SIMDJSON_REGULAR_VISUAL_STUDIO
+    simdjson_inline explicit simd8(const uint8x16_t other): simd8(vreinterpretq_s8_u8(other)) {}
+#endif
+    simdjson_inline explicit operator simd8<uint8_t>() const { return vreinterpretq_u8_s8(this->value); }
+
+    // Math
+    simdjson_inline simd8<int8_t> operator+(const simd8<int8_t> other) const { return vaddq_s8(*this, other); }
+    simdjson_inline simd8<int8_t> operator-(const simd8<int8_t> other) const { return vsubq_s8(*this, other); }
+    simdjson_inline simd8<int8_t>& operator+=(const simd8<int8_t> other) { *this = *this + other; return *this; }
+    simdjson_inline simd8<int8_t>& operator-=(const simd8<int8_t> other) { *this = *this - other; return *this; }
+
+    // Order-sensitive comparisons
+    simdjson_inline simd8<int8_t> max_val(const simd8<int8_t> other) const { return vmaxq_s8(*this, other); }
+    simdjson_inline simd8<int8_t> min_val(const simd8<int8_t> other) const { return vminq_s8(*this, other); }
+    simdjson_inline simd8<bool> operator>(const simd8<int8_t> other) const { return vcgtq_s8(*this, other); }
+    simdjson_inline simd8<bool> operator<(const simd8<int8_t> other) const { return vcltq_s8(*this, other); }
+    simdjson_inline simd8<bool> operator==(const simd8<int8_t> other) const { return vceqq_s8(*this, other); }
+
+    template<int N=1>
+    simdjson_inline simd8<int8_t> prev(const simd8<int8_t> prev_chunk) const {
+      return vextq_s8(prev_chunk, *this, 16 - N);
+    }
+
+    // Perform a lookup assuming no value is larger than 16
+    template<typename L>
+    simdjson_inline simd8<L> lookup_16(simd8<L> lookup_table) const {
+      return lookup_table.apply_lookup_16_to(*this);
+    }
+    template<typename L>
+    simdjson_inline simd8<L> lookup_16(
+        L replace0,  L replace1,  L replace2,  L replace3,
+        L replace4,  L replace5,  L replace6,  L replace7,
+        L replace8,  L replace9,  L replace10, L replace11,
+        L replace12, L replace13, L replace14, L replace15) const {
+      return lookup_16(simd8<L>::repeat_16(
+        replace0,  replace1,  replace2,  replace3,
+        replace4,  replace5,  replace6,  replace7,
+        replace8,  replace9,  replace10, replace11,
+        replace12, replace13, replace14, replace15
+      ));
+    }
+
+    template<typename T>
+    simdjson_inline simd8<int8_t> apply_lookup_16_to(const simd8<T> original) {
+      return vqtbl1q_s8(*this, simd8<uint8_t>(original));
+    }
+  };
+
+  template<typename T>
+  struct simd8x64 {
+    static constexpr int NUM_CHUNKS = 64 / sizeof(simd8<T>);
+    static_assert(NUM_CHUNKS == 4, "ARM kernel should use four registers per 64-byte block.");
+    const simd8<T> chunks[NUM_CHUNKS];
+
+    simd8x64(const simd8x64<T>& o) = delete; // no copy allowed
+    simd8x64<T>& operator=(const simd8<T>& other) = delete; // no assignment allowed
+    simd8x64() = delete; // no default constructor allowed
+
+    simdjson_inline simd8x64(const simd8<T> chunk0, const simd8<T> chunk1, const simd8<T> chunk2, const simd8<T> chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {}
+    simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8<T>::load(ptr), simd8<T>::load(ptr+16), simd8<T>::load(ptr+32), simd8<T>::load(ptr+48)} {}
+
+    simdjson_inline void store(T ptr[64]) const {
+      this->chunks[0].store(ptr+sizeof(simd8<T>)*0);
+      this->chunks[1].store(ptr+sizeof(simd8<T>)*1);
+      this->chunks[2].store(ptr+sizeof(simd8<T>)*2);
+      this->chunks[3].store(ptr+sizeof(simd8<T>)*3);
+    }
+
+    simdjson_inline simd8<T> reduce_or() const {
+      return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]);
+    }
+
+
+    simdjson_inline uint64_t compress(uint64_t mask, T * output) const {
+      uint64_t popcounts = vget_lane_u64(vreinterpret_u64_u8(vcnt_u8(vcreate_u8(~mask))), 0);
+      // compute the prefix sum of the popcounts of each byte
+      uint64_t offsets = popcounts * 0x0101010101010101;
+      this->chunks[0].compress_halves(uint16_t(mask), output, &output[popcounts & 0xFF]);
+      this->chunks[1].compress_halves(uint16_t(mask >> 16), &output[(offsets >> 8) & 0xFF], &output[(offsets >> 16) & 0xFF]);
+      this->chunks[2].compress_halves(uint16_t(mask >> 32), &output[(offsets >> 24) & 0xFF], &output[(offsets >> 32) & 0xFF]);
+      this->chunks[3].compress_halves(uint16_t(mask >> 48), &output[(offsets >> 40) & 0xFF], &output[(offsets >> 48) & 0xFF]);
+      return offsets >> 56;
+    }
+
+    simdjson_inline uint64_t to_bitmask() const {
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+      const uint8x16_t bit_mask = make_uint8x16_t(
+        0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
+        0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
+      );
+#else
+      const uint8x16_t bit_mask = {
+        0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
+        0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
+      };
+#endif
+      // Add each of the elements next to each other, successively, to stuff each 8 byte mask into one.
+      uint8x16_t sum0 = vpaddq_u8(this->chunks[0] & bit_mask, this->chunks[1] & bit_mask);
+      uint8x16_t sum1 = vpaddq_u8(this->chunks[2] & bit_mask, this->chunks[3] & bit_mask);
+      sum0 = vpaddq_u8(sum0, sum1);
+      sum0 = vpaddq_u8(sum0, sum0);
+      return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0);
+    }
+
+    simdjson_inline uint64_t eq(const T m) const {
+      const simd8<T> mask = simd8<T>::splat(m);
+      return  simd8x64<bool>(
+        this->chunks[0] == mask,
+        this->chunks[1] == mask,
+        this->chunks[2] == mask,
+        this->chunks[3] == mask
+      ).to_bitmask();
+    }
+
+    simdjson_inline uint64_t lteq(const T m) const {
+      const simd8<T> mask = simd8<T>::splat(m);
+      return  simd8x64<bool>(
+        this->chunks[0] <= mask,
+        this->chunks[1] <= mask,
+        this->chunks[2] <= mask,
+        this->chunks[3] <= mask
+      ).to_bitmask();
+    }
+  }; // struct simd8x64<T>
+
+} // namespace simd
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdjson
+
+#endif // SIMDJSON_ARM64_SIMD_H
+/* end file include/simdjson/arm64/simd.h */
+/* begin file include/simdjson/generic/jsoncharutils.h */
+
+namespace simdjson {
+namespace arm64 {
+namespace {
+namespace jsoncharutils {
+
+// return non-zero if not a structural or whitespace char
+// zero otherwise
+simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) {
+  return internal::structural_or_whitespace_negated[c];
+}
+
+simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) {
+  return internal::structural_or_whitespace[c];
+}
+
+// returns a value with the high 16 bits set if not valid
+// otherwise returns the conversion of the 4 hex digits at src into the bottom
+// 16 bits of the 32-bit return register
+//
+// see
+// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/
+static inline uint32_t hex_to_u32_nocheck(
+    const uint8_t *src) { // strictly speaking, static inline is a C-ism
+  uint32_t v1 = internal::digit_to_val32[630 + src[0]];
+  uint32_t v2 = internal::digit_to_val32[420 + src[1]];
+  uint32_t v3 = internal::digit_to_val32[210 + src[2]];
+  uint32_t v4 = internal::digit_to_val32[0 + src[3]];
+  return v1 | v2 | v3 | v4;
+}
+
+// given a code point cp, writes to c
+// the utf-8 code, outputting the length in
+// bytes, if the length is zero, the code point
+// is invalid
+//
+// This can possibly be made faster using pdep
+// and clz and table lookups, but JSON documents
+// have few escaped code points, and the following
+// function looks cheap.
+//
+// Note: we assume that surrogates are treated separately
+//
+simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) {
+  if (cp <= 0x7F) {
+    c[0] = uint8_t(cp);
+    return 1; // ascii
+  }
+  if (cp <= 0x7FF) {
+    c[0] = uint8_t((cp >> 6) + 192);
+    c[1] = uint8_t((cp & 63) + 128);
+    return 2; // universal plane
+    //  Surrogates are treated elsewhere...
+    //} //else if (0xd800 <= cp && cp <= 0xdfff) {
+    //  return 0; // surrogates // could put assert here
+  } else if (cp <= 0xFFFF) {
+    c[0] = uint8_t((cp >> 12) + 224);
+    c[1] = uint8_t(((cp >> 6) & 63) + 128);
+    c[2] = uint8_t((cp & 63) + 128);
+    return 3;
+  } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this
+                               // is not needed
+    c[0] = uint8_t((cp >> 18) + 240);
+    c[1] = uint8_t(((cp >> 12) & 63) + 128);
+    c[2] = uint8_t(((cp >> 6) & 63) + 128);
+    c[3] = uint8_t((cp & 63) + 128);
+    return 4;
+  }
+  // will return 0 when the code point was too large.
+  return 0; // bad r
+}
+
+#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm
+// this is a slow emulation routine for 32-bit
+//
+static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) {
+  return x * (uint64_t)y;
+}
+static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) {
+  uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd);
+  uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd);
+  uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32));
+  uint64_t adbc_carry = !!(adbc < ad);
+  uint64_t lo = bd + (adbc << 32);
+  *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) +
+        (adbc_carry << 32) + !!(lo < bd);
+  return lo;
+}
+#endif
+
+using internal::value128;
+
+simdjson_inline value128 full_multiplication(uint64_t value1, uint64_t value2) {
+  value128 answer;
+#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS)
+#ifdef _M_ARM64
+  // ARM64 has native support for 64-bit multiplications, no need to emultate
+  answer.high = __umulh(value1, value2);
+  answer.low = value1 * value2;
+#else
+  answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64
+#endif // _M_ARM64
+#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS)
+  __uint128_t r = (static_cast<__uint128_t>(value1)) * value2;
+  answer.low = uint64_t(r);
+  answer.high = uint64_t(r >> 64);
+#endif
+  return answer;
+}
+
+} // namespace jsoncharutils
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdjson
+/* end file include/simdjson/generic/jsoncharutils.h */
+/* begin file include/simdjson/generic/atomparsing.h */
+namespace simdjson {
+namespace arm64 {
+namespace {
+/// @private
+namespace atomparsing {
+
+// The string_to_uint32 is exclusively used to map literal strings to 32-bit values.
+// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot
+// be certain that the character pointer will be properly aligned.
+// You might think that using memcpy makes this function expensive, but you'd be wrong.
+// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false");
+// to the compile-time constant 1936482662.
+simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; }
+
+
+// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive.
+// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about.
+simdjson_warn_unused
+simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) {
+  uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++)
+  static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes");
+  std::memcpy(&srcval, src, sizeof(uint32_t));
+  return srcval ^ string_to_uint32(atom);
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_true_atom(const uint8_t *src) {
+  return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0;
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) {
+  if (len > 4) { return is_valid_true_atom(src); }
+  else if (len == 4) { return !str4ncmp(src, "true"); }
+  else { return false; }
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_false_atom(const uint8_t *src) {
+  return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0;
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) {
+  if (len > 5) { return is_valid_false_atom(src); }
+  else if (len == 5) { return !str4ncmp(src+1, "alse"); }
+  else { return false; }
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_null_atom(const uint8_t *src) {
+  return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0;
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) {
+  if (len > 4) { return is_valid_null_atom(src); }
+  else if (len == 4) { return !str4ncmp(src, "null"); }
+  else { return false; }
+}
+
+} // namespace atomparsing
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdjson
+/* end file include/simdjson/generic/atomparsing.h */
+/* begin file include/simdjson/arm64/stringparsing.h */
+#ifndef SIMDJSON_ARM64_STRINGPARSING_H
+#define SIMDJSON_ARM64_STRINGPARSING_H
+
+
+namespace simdjson {
+namespace arm64 {
+namespace {
+
+using namespace simd;
+
+// Holds backslashes and quotes locations.
+struct backslash_and_quote {
+public:
+  static constexpr uint32_t BYTES_PROCESSED = 32;
+  simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst);
+
+  simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; }
+  simdjson_inline bool has_backslash() { return bs_bits != 0; }
+  simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); }
+  simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); }
+
+  uint32_t bs_bits;
+  uint32_t quote_bits;
+}; // struct backslash_and_quote
+
+simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) {
+  // this can read up to 31 bytes beyond the buffer size, but we require
+  // SIMDJSON_PADDING of padding
+  static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes");
+  simd8<uint8_t> v0(src);
+  simd8<uint8_t> v1(src + sizeof(v0));
+  v0.store(dst);
+  v1.store(dst + sizeof(v0));
+
+  // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on ARM; therefore, we
+  // smash them together into a 64-byte mask and get the bitmask from there.
+  uint64_t bs_and_quote = simd8x64<bool>(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask();
+  return {
+    uint32_t(bs_and_quote),      // bs_bits
+    uint32_t(bs_and_quote >> 32) // quote_bits
+  };
+}
+
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdjson
+
+#endif // SIMDJSON_ARM64_STRINGPARSING_H
+/* end file include/simdjson/arm64/stringparsing.h */
+/* begin file include/simdjson/arm64/numberparsing.h */
+#ifndef SIMDJSON_ARM64_NUMBERPARSING_H
+#define SIMDJSON_ARM64_NUMBERPARSING_H
+
+namespace simdjson {
+namespace arm64 {
+namespace {
+
+// we don't have SSE, so let us use a scalar function
+// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
+static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) {
+  uint64_t val;
+  std::memcpy(&val, chars, sizeof(uint64_t));
+  val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8;
+  val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16;
+  return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32);
+}
+
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdjson
+
+#define SIMDJSON_SWAR_NUMBER_PARSING 1
+
+/* begin file include/simdjson/generic/numberparsing.h */
+#include <limits>
+
+namespace simdjson {
+namespace arm64 {
+
+namespace ondemand {
+/**
+ * The type of a JSON number
+ */
+enum class number_type {
+    floating_point_number=1, /// a binary64 number
+    signed_integer,          /// a signed integer that fits in a 64-bit word using two's complement
+    unsigned_integer         /// a positive integer larger or equal to 1<<63
+};
+}
+
+namespace {
+/// @private
+namespace numberparsing {
+
+
+
+#ifdef JSON_TEST_NUMBERS
+#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR)
+#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE)))
+#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE)))
+#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE)))
+#else
+#define INVALID_NUMBER(SRC) (NUMBER_ERROR)
+#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE))
+#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE))
+#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE))
+#endif
+
+namespace {
+// Convert a mantissa, an exponent and a sign bit into an ieee64 double.
+// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable).
+// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed.
+simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) {
+    double d;
+    mantissa &= ~(1ULL << 52);
+    mantissa |= real_exponent << 52;
+    mantissa |= ((static_cast<uint64_t>(negative)) << 63);
+    std::memcpy(&d, &mantissa, sizeof(d));
+    return d;
+}
+}
+// Attempts to compute i * 10^(power) exactly; and if "negative" is
+// true, negate the result.
+// This function will only work in some cases, when it does not work, success is
+// set to false. This should work *most of the time* (like 99% of the time).
+// We assume that power is in the [smallest_power,
+// largest_power] interval: the caller is responsible for this check.
+simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) {
+  // we start with a fast path
+  // It was described in
+  // Clinger WD. How to read floating point numbers accurately.
+  // ACM SIGPLAN Notices. 1990
+#ifndef FLT_EVAL_METHOD
+#error "FLT_EVAL_METHOD should be defined, please include cfloat."
+#endif
+#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
+  // We cannot be certain that x/y is rounded to nearest.
+  if (0 <= power && power <= 22 && i <= 9007199254740991) {
+#else
+  if (-22 <= power && power <= 22 && i <= 9007199254740991) {
+#endif
+    // convert the integer into a double. This is lossless since
+    // 0 <= i <= 2^53 - 1.
+    d = double(i);
+    //
+    // The general idea is as follows.
+    // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then
+    // 1) Both s and p can be represented exactly as 64-bit floating-point
+    // values
+    // (binary64).
+    // 2) Because s and p can be represented exactly as floating-point values,
+    // then s * p
+    // and s / p will produce correctly rounded values.
+    //
+    if (power < 0) {
+      d = d / simdjson::internal::power_of_ten[-power];
+    } else {
+      d = d * simdjson::internal::power_of_ten[power];
+    }
+    if (negative) {
+      d = -d;
+    }
+    return true;
+  }
+  // When 22 < power && power <  22 + 16, we could
+  // hope for another, secondary fast path.  It was
+  // described by David M. Gay in  "Correctly rounded
+  // binary-decimal and decimal-binary conversions." (1990)
+  // If you need to compute i * 10^(22 + x) for x < 16,
+  // first compute i * 10^x, if you know that result is exact
+  // (e.g., when i * 10^x < 2^53),
+  // then you can still proceed and do (i * 10^x) * 10^22.
+  // Is this worth your time?
+  // You need  22 < power *and* power <  22 + 16 *and* (i * 10^(x-22) < 2^53)
+  // for this second fast path to work.
+  // If you you have 22 < power *and* power <  22 + 16, and then you
+  // optimistically compute "i * 10^(x-22)", there is still a chance that you
+  // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of
+  // this optimization maybe less common than we would like. Source:
+  // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/
+  // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html
+
+  // The fast path has now failed, so we are failing back on the slower path.
+
+  // In the slow path, we need to adjust i so that it is > 1<<63 which is always
+  // possible, except if i == 0, so we handle i == 0 separately.
+  if(i == 0) {
+    d = negative ? -0.0 : 0.0;
+    return true;
+  }
+
+
+  // The exponent is 1024 + 63 + power
+  //     + floor(log(5**power)/log(2)).
+  // The 1024 comes from the ieee64 standard.
+  // The 63 comes from the fact that we use a 64-bit word.
+  //
+  // Computing floor(log(5**power)/log(2)) could be
+  // slow. Instead we use a fast function.
+  //
+  // For power in (-400,350), we have that
+  // (((152170 + 65536) * power ) >> 16);
+  // is equal to
+  //  floor(log(5**power)/log(2)) + power when power >= 0
+  // and it is equal to
+  //  ceil(log(5**-power)/log(2)) + power when power < 0
+  //
+  // The 65536 is (1<<16) and corresponds to
+  // (65536 * power) >> 16 ---> power
+  //
+  // ((152170 * power ) >> 16) is equal to
+  // floor(log(5**power)/log(2))
+  //
+  // Note that this is not magic: 152170/(1<<16) is
+  // approximatively equal to log(5)/log(2).
+  // The 1<<16 value is a power of two; we could use a
+  // larger power of 2 if we wanted to.
+  //
+  int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63;
+
+
+  // We want the most significant bit of i to be 1. Shift if needed.
+  int lz = leading_zeroes(i);
+  i <<= lz;
+
+
+  // We are going to need to do some 64-bit arithmetic to get a precise product.
+  // We use a table lookup approach.
+  // It is safe because
+  // power >= smallest_power
+  // and power <= largest_power
+  // We recover the mantissa of the power, it has a leading 1. It is always
+  // rounded down.
+  //
+  // We want the most significant 64 bits of the product. We know
+  // this will be non-zero because the most significant bit of i is
+  // 1.
+  const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power);
+  // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.)
+  //
+  // The full_multiplication function computes the 128-bit product of two 64-bit words
+  // with a returned value of type value128 with a "low component" corresponding to the
+  // 64-bit least significant bits of the product and with a "high component" corresponding
+  // to the 64-bit most significant bits of the product.
+  simdjson::internal::value128 firstproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index]);
+  // Both i and power_of_five_128[index] have their most significant bit set to 1 which
+  // implies that the either the most or the second most significant bit of the product
+  // is 1. We pack values in this manner for efficiency reasons: it maximizes the use
+  // we make of the product. It also makes it easy to reason about the product: there
+  // is 0 or 1 leading zero in the product.
+
+  // Unless the least significant 9 bits of the high (64-bit) part of the full
+  // product are all 1s, then we know that the most significant 55 bits are
+  // exact and no further work is needed. Having 55 bits is necessary because
+  // we need 53 bits for the mantissa but we have to have one rounding bit and
+  // we can waste a bit if the most significant bit of the product is zero.
+  if((firstproduct.high & 0x1FF) == 0x1FF) {
+    // We want to compute i * 5^q, but only care about the top 55 bits at most.
+    // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing
+    // the full computation is wasteful. So we do what is called a "truncated
+    // multiplication".
+    // We take the most significant 64-bits, and we put them in
+    // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q
+    // to the desired approximation using one multiplication. Sometimes it does not suffice.
+    // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and
+    // then we get a better approximation to i * 5^q. In very rare cases, even that
+    // will not suffice, though it is seemingly very hard to find such a scenario.
+    //
+    // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat
+    // more complicated.
+    //
+    // There is an extra layer of complexity in that we need more than 55 bits of
+    // accuracy in the round-to-even scenario.
+    //
+    // The full_multiplication function computes the 128-bit product of two 64-bit words
+    // with a returned value of type value128 with a "low component" corresponding to the
+    // 64-bit least significant bits of the product and with a "high component" corresponding
+    // to the 64-bit most significant bits of the product.
+    simdjson::internal::value128 secondproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]);
+    firstproduct.low += secondproduct.high;
+    if(secondproduct.high > firstproduct.low) { firstproduct.high++; }
+    // At this point, we might need to add at most one to firstproduct, but this
+    // can only change the value of firstproduct.high if firstproduct.low is maximal.
+    if(simdjson_unlikely(firstproduct.low  == 0xFFFFFFFFFFFFFFFF)) {
+      // This is very unlikely, but if so, we need to do much more work!
+      return false;
+    }
+  }
+  uint64_t lower = firstproduct.low;
+  uint64_t upper = firstproduct.high;
+  // The final mantissa should be 53 bits with a leading 1.
+  // We shift it so that it occupies 54 bits with a leading 1.
+  ///////
+  uint64_t upperbit = upper >> 63;
+  uint64_t mantissa = upper >> (upperbit + 9);
+  lz += int(1 ^ upperbit);
+
+  // Here we have mantissa < (1<<54).
+  int64_t real_exponent = exponent - lz;
+  if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal?
+    // Here have that real_exponent <= 0 so -real_exponent >= 0
+    if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure.
+      d = negative ? -0.0 : 0.0;
+      return true;
+    }
+    // next line is safe because -real_exponent + 1 < 0
+    mantissa >>= -real_exponent + 1;
+    // Thankfully, we can't have both "round-to-even" and subnormals because
+    // "round-to-even" only occurs for powers close to 0.
+    mantissa += (mantissa & 1); // round up
+    mantissa >>= 1;
+    // There is a weird scenario where we don't have a subnormal but just.
+    // Suppose we start with 2.2250738585072013e-308, we end up
+    // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal
+    // whereas 0x40000000000000 x 2^-1023-53  is normal. Now, we need to round
+    // up 0x3fffffffffffff x 2^-1023-53  and once we do, we are no longer
+    // subnormal, but we can only know this after rounding.
+    // So we only declare a subnormal if we are smaller than the threshold.
+    real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1;
+    d = to_double(mantissa, real_exponent, negative);
+    return true;
+  }
+  // We have to round to even. The "to even" part
+  // is only a problem when we are right in between two floats
+  // which we guard against.
+  // If we have lots of trailing zeros, we may fall right between two
+  // floating-point values.
+  //
+  // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54]
+  // times a power of two. That is, it is right between a number with binary significand
+  // m and another number with binary significand m+1; and it must be the case
+  // that it cannot be represented by a float itself.
+  //
+  // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p.
+  // Recall that 10^q = 5^q * 2^q.
+  // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that
+  //  5^23 <=  2^54 and it is the last power of five to qualify, so q <= 23.
+  // When q<0, we have  w  >=  (2m+1) x 5^{-q}.  We must have that w<2^{64} so
+  // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have
+  // 2^{53} x 5^{-q} < 2^{64}.
+  // Hence we have 5^{-q} < 2^{11}$ or q>= -4.
+  //
+  // We require lower <= 1 and not lower == 0 because we could not prove that
+  // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test.
+  if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) {
+    if((mantissa  << (upperbit + 64 - 53 - 2)) ==  upper) {
+      mantissa &= ~1;             // flip it so that we do not round up
+    }
+  }
+
+  mantissa += mantissa & 1;
+  mantissa >>= 1;
+
+  // Here we have mantissa < (1<<53), unless there was an overflow
+  if (mantissa >= (1ULL << 53)) {
+    //////////
+    // This will happen when parsing values such as 7.2057594037927933e+16
+    ////////
+    mantissa = (1ULL << 52);
+    real_exponent++;
+  }
+  mantissa &= ~(1ULL << 52);
+  // we have to check that real_exponent is in range, otherwise we bail out
+  if (simdjson_unlikely(real_exponent > 2046)) {
+    // We have an infinite value!!! We could actually throw an error here if we could.
+    return false;
+  }
+  d = to_double(mantissa, real_exponent, negative);
+  return true;
+}
+
+// We call a fallback floating-point parser that might be slow. Note
+// it will accept JSON numbers, but the JSON spec. is more restrictive so
+// before you call parse_float_fallback, you need to have validated the input
+// string with the JSON grammar.
+// It will return an error (false) if the parsed number is infinite.
+// The string parsing itself always succeeds. We know that there is at least
+// one digit.
+static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) {
+  *outDouble = simdjson::internal::from_chars(reinterpret_cast<const char *>(ptr));
+  // We do not accept infinite values.
+
+  // Detecting finite values in a portable manner is ridiculously hard, ideally
+  // we would want to do:
+  // return !std::isfinite(*outDouble);
+  // but that mysteriously fails under legacy/old libc++ libraries, see
+  // https://github.com/simdjson/simdjson/issues/1286
+  //
+  // Therefore, fall back to this solution (the extra parens are there
+  // to handle that max may be a macro on windows).
+  return !(*outDouble > (std::numeric_limits<double>::max)() || *outDouble < std::numeric_limits<double>::lowest());
+}
+static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) {
+  *outDouble = simdjson::internal::from_chars(reinterpret_cast<const char *>(ptr), reinterpret_cast<const char *>(end_ptr));
+  // We do not accept infinite values.
+
+  // Detecting finite values in a portable manner is ridiculously hard, ideally
+  // we would want to do:
+  // return !std::isfinite(*outDouble);
+  // but that mysteriously fails under legacy/old libc++ libraries, see
+  // https://github.com/simdjson/simdjson/issues/1286
+  //
+  // Therefore, fall back to this solution (the extra parens are there
+  // to handle that max may be a macro on windows).
+  return !(*outDouble > (std::numeric_limits<double>::max)() || *outDouble < std::numeric_limits<double>::lowest());
+}
+
+// check quickly whether the next 8 chars are made of digits
+// at a glance, it looks better than Mula's
+// http://0x80.pl/articles/swar-digits-validate.html
+simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) {
+  uint64_t val;
+  // this can read up to 7 bytes beyond the buffer size, but we require
+  // SIMDJSON_PADDING of padding
+  static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7");
+  std::memcpy(&val, chars, 8);
+  // a branchy method might be faster:
+  // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030)
+  //  && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) ==
+  //  0x3030303030303030);
+  return (((val & 0xF0F0F0F0F0F0F0F0) |
+           (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) ==
+          0x3333333333333333);
+}
+
+template<typename W>
+error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) {
+  double d;
+  if (parse_float_fallback(src, &d)) {
+    writer.append_double(d);
+    return SUCCESS;
+  }
+  return INVALID_NUMBER(src);
+}
+
+template<typename I>
+SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later
+simdjson_inline bool parse_digit(const uint8_t c, I &i) {
+  const uint8_t digit = static_cast<uint8_t>(c - '0');
+  if (digit > 9) {
+    return false;
+  }
+  // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication
+  i = 10 * i + digit; // might overflow, we will handle the overflow later
+  return true;
+}
+
+simdjson_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) {
+  // we continue with the fiction that we have an integer. If the
+  // floating point number is representable as x * 10^z for some integer
+  // z that fits in 53 bits, then we will be able to convert back the
+  // the integer into a float in a lossless manner.
+  const uint8_t *const first_after_period = p;
+
+#ifdef SIMDJSON_SWAR_NUMBER_PARSING
+#if SIMDJSON_SWAR_NUMBER_PARSING
+  // this helps if we have lots of decimals!
+  // this turns out to be frequent enough.
+  if (is_made_of_eight_digits_fast(p)) {
+    i = i * 100000000 + parse_eight_digits_unrolled(p);
+    p += 8;
+  }
+#endif // SIMDJSON_SWAR_NUMBER_PARSING
+#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING
+  // Unrolling the first digit makes a small difference on some implementations (e.g. westmere)
+  if (parse_digit(*p, i)) { ++p; }
+  while (parse_digit(*p, i)) { p++; }
+  exponent = first_after_period - p;
+  // Decimal without digits (123.) is illegal
+  if (exponent == 0) {
+    return INVALID_NUMBER(src);
+  }
+  return SUCCESS;
+}
+
+simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) {
+  // Exp Sign: -123.456e[-]78
+  bool neg_exp = ('-' == *p);
+  if (neg_exp || '+' == *p) { p++; } // Skip + as well
+
+  // Exponent: -123.456e-[78]
+  auto start_exp = p;
+  int64_t exp_number = 0;
+  while (parse_digit(*p, exp_number)) { ++p; }
+  // It is possible for parse_digit to overflow.
+  // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN.
+  // Thus we *must* check for possible overflow before we negate exp_number.
+
+  // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into
+  // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may
+  // not oblige and may, in fact, generate two distinct paths in any case. It might be
+  // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off
+  // instructions for a simdjson_likely branch, an unconclusive gain.
+
+  // If there were no digits, it's an error.
+  if (simdjson_unlikely(p == start_exp)) {
+    return INVALID_NUMBER(src);
+  }
+  // We have a valid positive exponent in exp_number at this point, except that
+  // it may have overflowed.
+
+  // If there were more than 18 digits, we may have overflowed the integer. We have to do
+  // something!!!!
+  if (simdjson_unlikely(p > start_exp+18)) {
+    // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow
+    while (*start_exp == '0') { start_exp++; }
+    // 19 digits could overflow int64_t and is kind of absurd anyway. We don't
+    // support exponents smaller than -999,999,999,999,999,999 and bigger
+    // than 999,999,999,999,999,999.
+    // We can truncate.
+    // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before
+    // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could
+    // truncate at 324.
+    // Note that there is no reason to fail per se at this point in time.
+    // E.g., 0e999999999999999999999 is a fine number.
+    if (p > start_exp+18) { exp_number = 999999999999999999; }
+  }
+  // At this point, we know that exp_number is a sane, positive, signed integer.
+  // It is <= 999,999,999,999,999,999. As long as 'exponent' is in
+  // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent'
+  // is bounded in magnitude by the size of the JSON input, we are fine in this universe.
+  // To sum it up: the next line should never overflow.
+  exponent += (neg_exp ? -exp_number : exp_number);
+  return SUCCESS;
+}
+
+simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) {
+  // It is possible that the integer had an overflow.
+  // We have to handle the case where we have 0.0000somenumber.
+  const uint8_t *start = start_digits;
+  while ((*start == '0') || (*start == '.')) { ++start; }
+  // we over-decrement by one when there is a '.'
+  return digit_count - size_t(start - start_digits);
+}
+
+template<typename W>
+simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) {
+  // If we frequently had to deal with long strings of digits,
+  // we could extend our code by using a 128-bit integer instead
+  // of a 64-bit integer. However, this is uncommon in practice.
+  //
+  // 9999999999999999999 < 2**64 so we can accommodate 19 digits.
+  // If we have a decimal separator, then digit_count - 1 is the number of digits, but we
+  // may not have a decimal separator!
+  if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) {
+    // Ok, chances are good that we had an overflow!
+    // this is almost never going to get called!!!
+    // we start anew, going slowly!!!
+    // This will happen in the following examples:
+    // 10000000000000000000000000000000000000000000e+308
+    // 3.1415926535897932384626433832795028841971693993751
+    //
+    // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens
+    // because slow_float_parsing is a non-inlined function. If we passed our writer reference to
+    // it, it would force it to be stored in memory, preventing the compiler from picking it apart
+    // and putting into registers. i.e. if we pass it as reference, it gets slow.
+    // This is what forces the skip_double, as well.
+    error_code error = slow_float_parsing(src, writer);
+    writer.skip_double();
+    return error;
+  }
+  // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other
+  // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331
+  // To future reader: we'd love if someone found a better way, or at least could explain this result!
+  if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) {
+    //
+    // Important: smallest_power is such that it leads to a zero value.
+    // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero
+    // so something x 10^-343 goes to zero, but not so with  something x 10^-342.
+    static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough");
+    //
+    if((exponent < simdjson::internal::smallest_power) || (i == 0)) {
+      // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero
+      WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer);
+      return SUCCESS;
+    } else { // (exponent > largest_power) and (i != 0)
+      // We have, for sure, an infinite value and simdjson refuses to parse infinite values.
+      return INVALID_NUMBER(src);
+    }
+  }
+  double d;
+  if (!compute_float_64(exponent, i, negative, d)) {
+    // we are almost never going to get here.
+    if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); }
+  }
+  WRITE_DOUBLE(d, src, writer);
+  return SUCCESS;
+}
+
+// for performance analysis, it is sometimes  useful to skip parsing
+#ifdef SIMDJSON_SKIPNUMBERPARSING
+
+template<typename W>
+simdjson_inline error_code parse_number(const uint8_t *const, W &writer) {
+  writer.append_s64(0);        // always write zero
+  return SUCCESS;              // always succeeds
+}
+
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer_in_string(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<double> parse_double_in_string(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept  { return false; }
+simdjson_unused simdjson_inline simdjson_result<bool> is_integer(const uint8_t * src) noexcept  { return false; }
+simdjson_unused simdjson_inline simdjson_result<ondemand::number_type> get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; }
+#else
+
+// parse the number at src
+// define JSON_TEST_NUMBERS for unit testing
+//
+// It is assumed that the number is followed by a structural ({,},],[) character
+// or a white space character. If that is not the case (e.g., when the JSON
+// document is made of a single number), then it is necessary to copy the
+// content and append a space before calling this function.
+//
+// Our objective is accurate parsing (ULP of 0) at high speed.
+template<typename W>
+simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) {
+
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  const uint8_t *p = src + uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); }
+
+  //
+  // Handle floats if there is a . or e (or both)
+  //
+  int64_t exponent = 0;
+  bool is_float = false;
+  if ('.' == *p) {
+    is_float = true;
+    ++p;
+    SIMDJSON_TRY( parse_decimal(src, p, i, exponent) );
+    digit_count = int(p - start_digits); // used later to guard against overflows
+  }
+  if (('e' == *p) || ('E' == *p)) {
+    is_float = true;
+    ++p;
+    SIMDJSON_TRY( parse_exponent(src, p, exponent) );
+  }
+  if (is_float) {
+    const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p);
+    SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) );
+    if (dirty_end) { return INVALID_NUMBER(src); }
+    return SUCCESS;
+  }
+
+  // The longest negative 64-bit number is 19 digits.
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  size_t longest_digit_count = negative ? 19 : 20;
+  if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); }
+  if (digit_count == longest_digit_count) {
+    if (negative) {
+      // Anything negative above INT64_MAX+1 is invalid
+      if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src);  }
+      WRITE_INTEGER(~i+1, src, writer);
+      if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); }
+      return SUCCESS;
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    }  else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); }
+  }
+
+  // Write unsigned if it doesn't fit in a signed integer.
+  if (i > uint64_t(INT64_MAX)) {
+    WRITE_UNSIGNED(i, src, writer);
+  } else {
+    WRITE_INTEGER(negative ? (~i+1) : i, src, writer);
+  }
+  if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); }
+  return SUCCESS;
+}
+
+// Inlineable functions
+namespace {
+
+// This table can be used to characterize the final character of an integer
+// string. For JSON structural character and allowable white space characters,
+// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise
+// we return NUMBER_ERROR.
+// Optimization note: we could easily reduce the size of the table by half (to 128)
+// at the cost of an extra branch.
+// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits):
+static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast");
+static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast");
+static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast");
+
+const uint8_t integer_string_finisher[256] = {
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, SUCCESS,
+    SUCCESS,      NUMBER_ERROR,   NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   SUCCESS,      NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, SUCCESS,
+    NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, SUCCESS,        NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    SUCCESS,      NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR};
+
+// Parse any number from 0 to 18,446,744,073,709,551,615
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src) noexcept {
+  const uint8_t *p = src;
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > 20))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+
+  if (digit_count == 20) {
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
+  }
+
+  return i;
+}
+
+
+// Parse any number from 0 to 18,446,744,073,709,551,615
+// Never read at src_end or beyond
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept {
+  const uint8_t *p = src;
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while ((p != src_end) && parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > 20))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+
+  if (digit_count == 20) {
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
+  }
+
+  return i;
+}
+
+// Parse any number from 0 to 18,446,744,073,709,551,615
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned_in_string(const uint8_t * const src) noexcept {
+  const uint8_t *p = src + 1;
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > 20))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if (*p != '"') { return NUMBER_ERROR; }
+
+  if (digit_count == 20) {
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    // Note: we use src[1] and not src[0] because src[0] is the quote character in this
+    // instance.
+    if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
+  }
+
+  return i;
+}
+
+// Parse any number from  -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t *src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  const uint8_t *p = src + uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // We go from
+  // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+  // so we can never represent numbers that have more than 19 digits.
+  size_t longest_digit_count = 19;
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > longest_digit_count))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+  // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
+  // Performance note: This check is only needed when digit_count == longest_digit_count but it is
+  // so cheap that we might as well always make it.
+  if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
+  return negative ? (~i+1) : i;
+}
+
+// Parse any number from  -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+// Never read at src_end or beyond
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept {
+  //
+  // Check for minus sign
+  //
+  if(src == src_end) { return NUMBER_ERROR; }
+  bool negative = (*src == '-');
+  const uint8_t *p = src + uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while ((p != src_end) && parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // We go from
+  // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+  // so we can never represent numbers that have more than 19 digits.
+  size_t longest_digit_count = 19;
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > longest_digit_count))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+  // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
+  // Performance note: This check is only needed when digit_count == longest_digit_count but it is
+  // so cheap that we might as well always make it.
+  if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
+  return negative ? (~i+1) : i;
+}
+
+// Parse any number from  -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer_in_string(const uint8_t *src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*(src + 1) == '-');
+  src += uint8_t(negative) + 1;
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = src;
+  uint64_t i = 0;
+  while (parse_digit(*src, i)) { src++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(src - start_digits);
+  // We go from
+  // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+  // so we can never represent numbers that have more than 19 digits.
+  size_t longest_digit_count = 19;
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > longest_digit_count))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*src)) {
+  //  return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if(*src != '"') { return NUMBER_ERROR; }
+  // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
+  // Performance note: This check is only needed when digit_count == longest_digit_count but it is
+  // so cheap that we might as well always make it.
+  if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
+  return negative ? (~i+1) : i;
+}
+
+simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  uint64_t i = 0;
+  const uint8_t *p = src;
+  p += parse_digit(*p, i);
+  bool leading_zero = (i == 0);
+  while (parse_digit(*p, i)) { p++; }
+  // no integer digits, or 0123 (zero must be solo)
+  if ( p == src ) { return INCORRECT_TYPE; }
+  if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; }
+
+  //
+  // Parse the decimal part.
+  //
+  int64_t exponent = 0;
+  bool overflow;
+  if (simdjson_likely(*p == '.')) {
+    p++;
+    const uint8_t *start_decimal_digits = p;
+    if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits
+    p++;
+    while (parse_digit(*p, i)) { p++; }
+    exponent = -(p - start_decimal_digits);
+
+    // Overflow check. More than 19 digits (minus the decimal) may be overflow.
+    overflow = p-src-1 > 19;
+    if (simdjson_unlikely(overflow && leading_zero)) {
+      // Skip leading 0.00000 and see if it still overflows
+      const uint8_t *start_digits = src + 2;
+      while (*start_digits == '0') { start_digits++; }
+      overflow = start_digits-src > 19;
+    }
+  } else {
+    overflow = p-src > 19;
+  }
+
+  //
+  // Parse the exponent
+  //
+  if (*p == 'e' || *p == 'E') {
+    p++;
+    bool exp_neg = *p == '-';
+    p += exp_neg || *p == '+';
+
+    uint64_t exp = 0;
+    const uint8_t *start_exp_digits = p;
+    while (parse_digit(*p, exp)) { p++; }
+    // no exp digits, or 20+ exp digits
+    if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }
+
+    exponent += exp_neg ? 0-exp : exp;
+  }
+
+  if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; }
+
+  overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power;
+
+  //
+  // Assemble (or slow-parse) the float
+  //
+  double d;
+  if (simdjson_likely(!overflow)) {
+    if (compute_float_64(exponent, i, negative, d)) { return d; }
+  }
+  if (!parse_float_fallback(src - uint8_t(negative), &d)) {
+    return NUMBER_ERROR;
+  }
+  return d;
+}
+
+simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept {
+  return (*src == '-');
+}
+
+simdjson_unused simdjson_inline simdjson_result<bool> is_integer(const uint8_t * src) noexcept {
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+  const uint8_t *p = src;
+  while(static_cast<uint8_t>(*p - '0') <= 9) { p++; }
+  if ( p == src ) { return NUMBER_ERROR; }
+  if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; }
+  return false;
+}
+
+simdjson_unused simdjson_inline simdjson_result<ondemand::number_type> get_number_type(const uint8_t * src) noexcept {
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+  const uint8_t *p = src;
+  while(static_cast<uint8_t>(*p - '0') <= 9) { p++; }
+  if ( p == src ) { return NUMBER_ERROR; }
+  if (jsoncharutils::is_structural_or_whitespace(*p)) {
+    // We have an integer.
+    // If the number is negative and valid, it must be a signed integer.
+    if(negative) { return ondemand::number_type::signed_integer; }
+    // We want values larger or equal to 9223372036854775808 to be unsigned
+    // integers, and the other values to be signed integers.
+    int digit_count = int(p - src);
+    if(digit_count >= 19) {
+      const uint8_t * smaller_big_integer = reinterpret_cast<const uint8_t *>("9223372036854775808");
+      if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) {
+        return ondemand::number_type::unsigned_integer;
+      }
+    }
+    return ondemand::number_type::signed_integer;
+  }
+  // Hopefully, we have 'e' or 'E' or '.'.
+  return ondemand::number_type::floating_point_number;
+}
+
+// Never read at src_end or beyond
+simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept {
+  if(src == src_end) { return NUMBER_ERROR; }
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  uint64_t i = 0;
+  const uint8_t *p = src;
+  if(p == src_end) { return NUMBER_ERROR; }
+  p += parse_digit(*p, i);
+  bool leading_zero = (i == 0);
+  while ((p != src_end) && parse_digit(*p, i)) { p++; }
+  // no integer digits, or 0123 (zero must be solo)
+  if ( p == src ) { return INCORRECT_TYPE; }
+  if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; }
+
+  //
+  // Parse the decimal part.
+  //
+  int64_t exponent = 0;
+  bool overflow;
+  if (simdjson_likely((p != src_end) && (*p == '.'))) {
+    p++;
+    const uint8_t *start_decimal_digits = p;
+    if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits
+    p++;
+    while ((p != src_end) && parse_digit(*p, i)) { p++; }
+    exponent = -(p - start_decimal_digits);
+
+    // Overflow check. More than 19 digits (minus the decimal) may be overflow.
+    overflow = p-src-1 > 19;
+    if (simdjson_unlikely(overflow && leading_zero)) {
+      // Skip leading 0.00000 and see if it still overflows
+      const uint8_t *start_digits = src + 2;
+      while (*start_digits == '0') { start_digits++; }
+      overflow = start_digits-src > 19;
+    }
+  } else {
+    overflow = p-src > 19;
+  }
+
+  //
+  // Parse the exponent
+  //
+  if ((p != src_end) && (*p == 'e' || *p == 'E')) {
+    p++;
+    if(p == src_end) { return NUMBER_ERROR; }
+    bool exp_neg = *p == '-';
+    p += exp_neg || *p == '+';
+
+    uint64_t exp = 0;
+    const uint8_t *start_exp_digits = p;
+    while ((p != src_end) && parse_digit(*p, exp)) { p++; }
+    // no exp digits, or 20+ exp digits
+    if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }
+
+    exponent += exp_neg ? 0-exp : exp;
+  }
+
+  if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; }
+
+  overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power;
+
+  //
+  // Assemble (or slow-parse) the float
+  //
+  double d;
+  if (simdjson_likely(!overflow)) {
+    if (compute_float_64(exponent, i, negative, d)) { return d; }
+  }
+  if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) {
+    return NUMBER_ERROR;
+  }
+  return d;
+}
+
+simdjson_unused simdjson_inline simdjson_result<double> parse_double_in_string(const uint8_t * src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*(src + 1) == '-');
+  src += uint8_t(negative) + 1;
+
+  //
+  // Parse the integer part.
+  //
+  uint64_t i = 0;
+  const uint8_t *p = src;
+  p += parse_digit(*p, i);
+  bool leading_zero = (i == 0);
+  while (parse_digit(*p, i)) { p++; }
+  // no integer digits, or 0123 (zero must be solo)
+  if ( p == src ) { return INCORRECT_TYPE; }
+  if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; }
+
+  //
+  // Parse the decimal part.
+  //
+  int64_t exponent = 0;
+  bool overflow;
+  if (simdjson_likely(*p == '.')) {
+    p++;
+    const uint8_t *start_decimal_digits = p;
+    if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits
+    p++;
+    while (parse_digit(*p, i)) { p++; }
+    exponent = -(p - start_decimal_digits);
+
+    // Overflow check. More than 19 digits (minus the decimal) may be overflow.
+    overflow = p-src-1 > 19;
+    if (simdjson_unlikely(overflow && leading_zero)) {
+      // Skip leading 0.00000 and see if it still overflows
+      const uint8_t *start_digits = src + 2;
+      while (*start_digits == '0') { start_digits++; }
+      overflow = start_digits-src > 19;
+    }
+  } else {
+    overflow = p-src > 19;
+  }
+
+  //
+  // Parse the exponent
+  //
+  if (*p == 'e' || *p == 'E') {
+    p++;
+    bool exp_neg = *p == '-';
+    p += exp_neg || *p == '+';
+
+    uint64_t exp = 0;
+    const uint8_t *start_exp_digits = p;
+    while (parse_digit(*p, exp)) { p++; }
+    // no exp digits, or 20+ exp digits
+    if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }
+
+    exponent += exp_neg ? 0-exp : exp;
+  }
+
+  if (*p != '"') { return NUMBER_ERROR; }
+
+  overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power;
+
+  //
+  // Assemble (or slow-parse) the float
+  //
+  double d;
+  if (simdjson_likely(!overflow)) {
+    if (compute_float_64(exponent, i, negative, d)) { return d; }
+  }
+  if (!parse_float_fallback(src - uint8_t(negative), &d)) {
+    return NUMBER_ERROR;
+  }
+  return d;
+}
+} //namespace {}
+#endif // SIMDJSON_SKIPNUMBERPARSING
+
+} // namespace numberparsing
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdjson
+/* end file include/simdjson/generic/numberparsing.h */
+
+#endif // SIMDJSON_ARM64_NUMBERPARSING_H
+/* end file include/simdjson/arm64/numberparsing.h */
+/* begin file include/simdjson/arm64/end.h */
+/* end file include/simdjson/arm64/end.h */
+
+#endif // SIMDJSON_IMPLEMENTATION_ARM64
+
+#endif // SIMDJSON_ARM64_H
+/* end file include/simdjson/arm64.h */
+/* begin file include/simdjson/fallback.h */
+#ifndef SIMDJSON_FALLBACK_H
+#define SIMDJSON_FALLBACK_H
+
+
+#if SIMDJSON_IMPLEMENTATION_FALLBACK
+
+namespace simdjson {
+/**
+ * Fallback implementation (runs on any machine).
+ */
+namespace fallback {
+} // namespace fallback
+} // namespace simdjson
+
+/* begin file include/simdjson/fallback/implementation.h */
+#ifndef SIMDJSON_FALLBACK_IMPLEMENTATION_H
+#define SIMDJSON_FALLBACK_IMPLEMENTATION_H
+
+
+namespace simdjson {
+namespace fallback {
+
+namespace {
+using namespace simdjson;
+using namespace simdjson::dom;
+}
+
+class implementation final : public simdjson::implementation {
+public:
+  simdjson_inline implementation() : simdjson::implementation(
+      "fallback",
+      "Generic fallback implementation",
+      0
+  ) {}
+  simdjson_warn_unused error_code create_dom_parser_implementation(
+    size_t capacity,
+    size_t max_length,
+    std::unique_ptr<internal::dom_parser_implementation>& dst
+  ) const noexcept final;
+  simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final;
+  simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final;
+};
+
+} // namespace fallback
+} // namespace simdjson
+
+#endif // SIMDJSON_FALLBACK_IMPLEMENTATION_H
+/* end file include/simdjson/fallback/implementation.h */
+
+/* begin file include/simdjson/fallback/begin.h */
+// redefining SIMDJSON_IMPLEMENTATION to "fallback"
+// #define SIMDJSON_IMPLEMENTATION fallback
+/* end file include/simdjson/fallback/begin.h */
+
+// Declarations
+/* begin file include/simdjson/generic/dom_parser_implementation.h */
+
+namespace simdjson {
+namespace fallback {
+
+// expectation: sizeof(open_container) = 64/8.
+struct open_container {
+  uint32_t tape_index; // where, on the tape, does the scope ([,{) begins
+  uint32_t count; // how many elements in the scope
+}; // struct open_container
+
+static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits");
+
+class dom_parser_implementation final : public internal::dom_parser_implementation {
+public:
+  /** Tape location of each open { or [ */
+  std::unique_ptr<open_container[]> open_containers{};
+  /** Whether each open container is a [ or { */
+  std::unique_ptr<bool[]> is_array{};
+  /** Buffer passed to stage 1 */
+  const uint8_t *buf{};
+  /** Length passed to stage 1 */
+  size_t len{0};
+  /** Document passed to stage 2 */
+  dom::document *doc{};
+
+  inline dom_parser_implementation() noexcept;
+  inline dom_parser_implementation(dom_parser_implementation &&other) noexcept;
+  inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept;
+  dom_parser_implementation(const dom_parser_implementation &) = delete;
+  dom_parser_implementation &operator=(const dom_parser_implementation &) = delete;
+
+  simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final;
+  simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final;
+  simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final;
+  simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final;
+  simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst) const noexcept final;
+  inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final;
+  inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final;
+private:
+  simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity);
+
+};
+
+} // namespace fallback
+} // namespace simdjson
+
+namespace simdjson {
+namespace fallback {
+
+inline dom_parser_implementation::dom_parser_implementation() noexcept = default;
+inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default;
+inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default;
+
+// Leaving these here so they can be inlined if so desired
+inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept {
+  if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; }
+  // Stage 1 index output
+  size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7;
+  structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] );
+  if (!structural_indexes) { _capacity = 0; return MEMALLOC; }
+  structural_indexes[0] = 0;
+  n_structural_indexes = 0;
+
+  _capacity = capacity;
+  return SUCCESS;
+}
+
+inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept {
+  // Stage 2 stacks
+  open_containers.reset(new (std::nothrow) open_container[max_depth]);
+  is_array.reset(new (std::nothrow) bool[max_depth]);
+  if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; }
+
+  _max_depth = max_depth;
+  return SUCCESS;
+}
+
+} // namespace fallback
+} // namespace simdjson
+/* end file include/simdjson/generic/dom_parser_implementation.h */
+/* begin file include/simdjson/fallback/bitmanipulation.h */
+#ifndef SIMDJSON_FALLBACK_BITMANIPULATION_H
+#define SIMDJSON_FALLBACK_BITMANIPULATION_H
+
+#include <limits>
+
+namespace simdjson {
+namespace fallback {
+namespace {
+
+#if defined(_MSC_VER) && !defined(_M_ARM64) && !defined(_M_X64)
+static inline unsigned char _BitScanForward64(unsigned long* ret, uint64_t x) {
+  unsigned long x0 = (unsigned long)x, top, bottom;
+  _BitScanForward(&top, (unsigned long)(x >> 32));
+  _BitScanForward(&bottom, x0);
+  *ret = x0 ? bottom : 32 + top;
+  return x != 0;
+}
+static unsigned char _BitScanReverse64(unsigned long* ret, uint64_t x) {
+  unsigned long x1 = (unsigned long)(x >> 32), top, bottom;
+  _BitScanReverse(&top, x1);
+  _BitScanReverse(&bottom, (unsigned long)x);
+  *ret = x1 ? top + 32 : bottom;
+  return x != 0;
+}
+#endif
+
+/* result might be undefined when input_num is zero */
+simdjson_inline int leading_zeroes(uint64_t input_num) {
+#ifdef _MSC_VER
+  unsigned long leading_zero = 0;
+  // Search the mask data from most significant bit (MSB)
+  // to least significant bit (LSB) for a set bit (1).
+  if (_BitScanReverse64(&leading_zero, input_num))
+    return (int)(63 - leading_zero);
+  else
+    return 64;
+#else
+  return __builtin_clzll(input_num);
+#endif// _MSC_VER
+}
+
+} // unnamed namespace
+} // namespace fallback
+} // namespace simdjson
+
+#endif // SIMDJSON_FALLBACK_BITMANIPULATION_H
+/* end file include/simdjson/fallback/bitmanipulation.h */
+/* begin file include/simdjson/generic/jsoncharutils.h */
+
+namespace simdjson {
+namespace fallback {
+namespace {
+namespace jsoncharutils {
+
+// return non-zero if not a structural or whitespace char
+// zero otherwise
+simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) {
+  return internal::structural_or_whitespace_negated[c];
+}
+
+simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) {
+  return internal::structural_or_whitespace[c];
+}
+
+// returns a value with the high 16 bits set if not valid
+// otherwise returns the conversion of the 4 hex digits at src into the bottom
+// 16 bits of the 32-bit return register
+//
+// see
+// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/
+static inline uint32_t hex_to_u32_nocheck(
+    const uint8_t *src) { // strictly speaking, static inline is a C-ism
+  uint32_t v1 = internal::digit_to_val32[630 + src[0]];
+  uint32_t v2 = internal::digit_to_val32[420 + src[1]];
+  uint32_t v3 = internal::digit_to_val32[210 + src[2]];
+  uint32_t v4 = internal::digit_to_val32[0 + src[3]];
+  return v1 | v2 | v3 | v4;
+}
+
+// given a code point cp, writes to c
+// the utf-8 code, outputting the length in
+// bytes, if the length is zero, the code point
+// is invalid
+//
+// This can possibly be made faster using pdep
+// and clz and table lookups, but JSON documents
+// have few escaped code points, and the following
+// function looks cheap.
+//
+// Note: we assume that surrogates are treated separately
+//
+simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) {
+  if (cp <= 0x7F) {
+    c[0] = uint8_t(cp);
+    return 1; // ascii
+  }
+  if (cp <= 0x7FF) {
+    c[0] = uint8_t((cp >> 6) + 192);
+    c[1] = uint8_t((cp & 63) + 128);
+    return 2; // universal plane
+    //  Surrogates are treated elsewhere...
+    //} //else if (0xd800 <= cp && cp <= 0xdfff) {
+    //  return 0; // surrogates // could put assert here
+  } else if (cp <= 0xFFFF) {
+    c[0] = uint8_t((cp >> 12) + 224);
+    c[1] = uint8_t(((cp >> 6) & 63) + 128);
+    c[2] = uint8_t((cp & 63) + 128);
+    return 3;
+  } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this
+                               // is not needed
+    c[0] = uint8_t((cp >> 18) + 240);
+    c[1] = uint8_t(((cp >> 12) & 63) + 128);
+    c[2] = uint8_t(((cp >> 6) & 63) + 128);
+    c[3] = uint8_t((cp & 63) + 128);
+    return 4;
+  }
+  // will return 0 when the code point was too large.
+  return 0; // bad r
+}
+
+#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm
+// this is a slow emulation routine for 32-bit
+//
+static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) {
+  return x * (uint64_t)y;
+}
+static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) {
+  uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd);
+  uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd);
+  uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32));
+  uint64_t adbc_carry = !!(adbc < ad);
+  uint64_t lo = bd + (adbc << 32);
+  *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) +
+        (adbc_carry << 32) + !!(lo < bd);
+  return lo;
+}
+#endif
+
+using internal::value128;
+
+simdjson_inline value128 full_multiplication(uint64_t value1, uint64_t value2) {
+  value128 answer;
+#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS)
+#ifdef _M_ARM64
+  // ARM64 has native support for 64-bit multiplications, no need to emultate
+  answer.high = __umulh(value1, value2);
+  answer.low = value1 * value2;
+#else
+  answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64
+#endif // _M_ARM64
+#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS)
+  __uint128_t r = (static_cast<__uint128_t>(value1)) * value2;
+  answer.low = uint64_t(r);
+  answer.high = uint64_t(r >> 64);
+#endif
+  return answer;
+}
+
+} // namespace jsoncharutils
+} // unnamed namespace
+} // namespace fallback
+} // namespace simdjson
+/* end file include/simdjson/generic/jsoncharutils.h */
+/* begin file include/simdjson/generic/atomparsing.h */
+namespace simdjson {
+namespace fallback {
+namespace {
+/// @private
+namespace atomparsing {
+
+// The string_to_uint32 is exclusively used to map literal strings to 32-bit values.
+// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot
+// be certain that the character pointer will be properly aligned.
+// You might think that using memcpy makes this function expensive, but you'd be wrong.
+// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false");
+// to the compile-time constant 1936482662.
+simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; }
+
+
+// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive.
+// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about.
+simdjson_warn_unused
+simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) {
+  uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++)
+  static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes");
+  std::memcpy(&srcval, src, sizeof(uint32_t));
+  return srcval ^ string_to_uint32(atom);
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_true_atom(const uint8_t *src) {
+  return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0;
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) {
+  if (len > 4) { return is_valid_true_atom(src); }
+  else if (len == 4) { return !str4ncmp(src, "true"); }
+  else { return false; }
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_false_atom(const uint8_t *src) {
+  return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0;
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) {
+  if (len > 5) { return is_valid_false_atom(src); }
+  else if (len == 5) { return !str4ncmp(src+1, "alse"); }
+  else { return false; }
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_null_atom(const uint8_t *src) {
+  return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0;
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) {
+  if (len > 4) { return is_valid_null_atom(src); }
+  else if (len == 4) { return !str4ncmp(src, "null"); }
+  else { return false; }
+}
+
+} // namespace atomparsing
+} // unnamed namespace
+} // namespace fallback
+} // namespace simdjson
+/* end file include/simdjson/generic/atomparsing.h */
+/* begin file include/simdjson/fallback/stringparsing.h */
+#ifndef SIMDJSON_FALLBACK_STRINGPARSING_H
+#define SIMDJSON_FALLBACK_STRINGPARSING_H
+
+
+namespace simdjson {
+namespace fallback {
+namespace {
+
+// Holds backslashes and quotes locations.
+struct backslash_and_quote {
+public:
+  static constexpr uint32_t BYTES_PROCESSED = 1;
+  simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst);
+
+  simdjson_inline bool has_quote_first() { return c == '"'; }
+  simdjson_inline bool has_backslash() { return c == '\\'; }
+  simdjson_inline int quote_index() { return c == '"' ? 0 : 1; }
+  simdjson_inline int backslash_index() { return c == '\\' ? 0 : 1; }
+
+  uint8_t c;
+}; // struct backslash_and_quote
+
+simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) {
+  // store to dest unconditionally - we can overwrite the bits we don't like later
+  dst[0] = src[0];
+  return { src[0] };
+}
+
+} // unnamed namespace
+} // namespace fallback
+} // namespace simdjson
+
+#endif // SIMDJSON_FALLBACK_STRINGPARSING_H
+/* end file include/simdjson/fallback/stringparsing.h */
+/* begin file include/simdjson/fallback/numberparsing.h */
+#ifndef SIMDJSON_FALLBACK_NUMBERPARSING_H
+#define SIMDJSON_FALLBACK_NUMBERPARSING_H
+
+#ifdef JSON_TEST_NUMBERS // for unit testing
+void found_invalid_number(const uint8_t *buf);
+void found_integer(int64_t result, const uint8_t *buf);
+void found_unsigned_integer(uint64_t result, const uint8_t *buf);
+void found_float(double result, const uint8_t *buf);
+#endif
+
+namespace simdjson {
+namespace fallback {
+namespace {
+// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
+static simdjson_inline uint32_t parse_eight_digits_unrolled(const char *chars) {
+  uint64_t val;
+  memcpy(&val, chars, sizeof(uint64_t));
+  val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8;
+  val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16;
+  return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32);
+}
+static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) {
+  return parse_eight_digits_unrolled(reinterpret_cast<const char *>(chars));
+}
+
+} // unnamed namespace
+} // namespace fallback
+} // namespace simdjson
+
+#define SIMDJSON_SWAR_NUMBER_PARSING 1
+
+/* begin file include/simdjson/generic/numberparsing.h */
+#include <limits>
+
+namespace simdjson {
+namespace fallback {
+
+namespace ondemand {
+/**
+ * The type of a JSON number
+ */
+enum class number_type {
+    floating_point_number=1, /// a binary64 number
+    signed_integer,          /// a signed integer that fits in a 64-bit word using two's complement
+    unsigned_integer         /// a positive integer larger or equal to 1<<63
+};
+}
+
+namespace {
+/// @private
+namespace numberparsing {
+
+
+
+#ifdef JSON_TEST_NUMBERS
+#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR)
+#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE)))
+#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE)))
+#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE)))
+#else
+#define INVALID_NUMBER(SRC) (NUMBER_ERROR)
+#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE))
+#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE))
+#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE))
+#endif
+
+namespace {
+// Convert a mantissa, an exponent and a sign bit into an ieee64 double.
+// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable).
+// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed.
+simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) {
+    double d;
+    mantissa &= ~(1ULL << 52);
+    mantissa |= real_exponent << 52;
+    mantissa |= ((static_cast<uint64_t>(negative)) << 63);
+    std::memcpy(&d, &mantissa, sizeof(d));
+    return d;
+}
+}
+// Attempts to compute i * 10^(power) exactly; and if "negative" is
+// true, negate the result.
+// This function will only work in some cases, when it does not work, success is
+// set to false. This should work *most of the time* (like 99% of the time).
+// We assume that power is in the [smallest_power,
+// largest_power] interval: the caller is responsible for this check.
+simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) {
+  // we start with a fast path
+  // It was described in
+  // Clinger WD. How to read floating point numbers accurately.
+  // ACM SIGPLAN Notices. 1990
+#ifndef FLT_EVAL_METHOD
+#error "FLT_EVAL_METHOD should be defined, please include cfloat."
+#endif
+#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
+  // We cannot be certain that x/y is rounded to nearest.
+  if (0 <= power && power <= 22 && i <= 9007199254740991) {
+#else
+  if (-22 <= power && power <= 22 && i <= 9007199254740991) {
+#endif
+    // convert the integer into a double. This is lossless since
+    // 0 <= i <= 2^53 - 1.
+    d = double(i);
+    //
+    // The general idea is as follows.
+    // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then
+    // 1) Both s and p can be represented exactly as 64-bit floating-point
+    // values
+    // (binary64).
+    // 2) Because s and p can be represented exactly as floating-point values,
+    // then s * p
+    // and s / p will produce correctly rounded values.
+    //
+    if (power < 0) {
+      d = d / simdjson::internal::power_of_ten[-power];
+    } else {
+      d = d * simdjson::internal::power_of_ten[power];
+    }
+    if (negative) {
+      d = -d;
+    }
+    return true;
+  }
+  // When 22 < power && power <  22 + 16, we could
+  // hope for another, secondary fast path.  It was
+  // described by David M. Gay in  "Correctly rounded
+  // binary-decimal and decimal-binary conversions." (1990)
+  // If you need to compute i * 10^(22 + x) for x < 16,
+  // first compute i * 10^x, if you know that result is exact
+  // (e.g., when i * 10^x < 2^53),
+  // then you can still proceed and do (i * 10^x) * 10^22.
+  // Is this worth your time?
+  // You need  22 < power *and* power <  22 + 16 *and* (i * 10^(x-22) < 2^53)
+  // for this second fast path to work.
+  // If you you have 22 < power *and* power <  22 + 16, and then you
+  // optimistically compute "i * 10^(x-22)", there is still a chance that you
+  // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of
+  // this optimization maybe less common than we would like. Source:
+  // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/
+  // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html
+
+  // The fast path has now failed, so we are failing back on the slower path.
+
+  // In the slow path, we need to adjust i so that it is > 1<<63 which is always
+  // possible, except if i == 0, so we handle i == 0 separately.
+  if(i == 0) {
+    d = negative ? -0.0 : 0.0;
+    return true;
+  }
+
+
+  // The exponent is 1024 + 63 + power
+  //     + floor(log(5**power)/log(2)).
+  // The 1024 comes from the ieee64 standard.
+  // The 63 comes from the fact that we use a 64-bit word.
+  //
+  // Computing floor(log(5**power)/log(2)) could be
+  // slow. Instead we use a fast function.
+  //
+  // For power in (-400,350), we have that
+  // (((152170 + 65536) * power ) >> 16);
+  // is equal to
+  //  floor(log(5**power)/log(2)) + power when power >= 0
+  // and it is equal to
+  //  ceil(log(5**-power)/log(2)) + power when power < 0
+  //
+  // The 65536 is (1<<16) and corresponds to
+  // (65536 * power) >> 16 ---> power
+  //
+  // ((152170 * power ) >> 16) is equal to
+  // floor(log(5**power)/log(2))
+  //
+  // Note that this is not magic: 152170/(1<<16) is
+  // approximatively equal to log(5)/log(2).
+  // The 1<<16 value is a power of two; we could use a
+  // larger power of 2 if we wanted to.
+  //
+  int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63;
+
+
+  // We want the most significant bit of i to be 1. Shift if needed.
+  int lz = leading_zeroes(i);
+  i <<= lz;
+
+
+  // We are going to need to do some 64-bit arithmetic to get a precise product.
+  // We use a table lookup approach.
+  // It is safe because
+  // power >= smallest_power
+  // and power <= largest_power
+  // We recover the mantissa of the power, it has a leading 1. It is always
+  // rounded down.
+  //
+  // We want the most significant 64 bits of the product. We know
+  // this will be non-zero because the most significant bit of i is
+  // 1.
+  const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power);
+  // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.)
+  //
+  // The full_multiplication function computes the 128-bit product of two 64-bit words
+  // with a returned value of type value128 with a "low component" corresponding to the
+  // 64-bit least significant bits of the product and with a "high component" corresponding
+  // to the 64-bit most significant bits of the product.
+  simdjson::internal::value128 firstproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index]);
+  // Both i and power_of_five_128[index] have their most significant bit set to 1 which
+  // implies that the either the most or the second most significant bit of the product
+  // is 1. We pack values in this manner for efficiency reasons: it maximizes the use
+  // we make of the product. It also makes it easy to reason about the product: there
+  // is 0 or 1 leading zero in the product.
+
+  // Unless the least significant 9 bits of the high (64-bit) part of the full
+  // product are all 1s, then we know that the most significant 55 bits are
+  // exact and no further work is needed. Having 55 bits is necessary because
+  // we need 53 bits for the mantissa but we have to have one rounding bit and
+  // we can waste a bit if the most significant bit of the product is zero.
+  if((firstproduct.high & 0x1FF) == 0x1FF) {
+    // We want to compute i * 5^q, but only care about the top 55 bits at most.
+    // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing
+    // the full computation is wasteful. So we do what is called a "truncated
+    // multiplication".
+    // We take the most significant 64-bits, and we put them in
+    // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q
+    // to the desired approximation using one multiplication. Sometimes it does not suffice.
+    // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and
+    // then we get a better approximation to i * 5^q. In very rare cases, even that
+    // will not suffice, though it is seemingly very hard to find such a scenario.
+    //
+    // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat
+    // more complicated.
+    //
+    // There is an extra layer of complexity in that we need more than 55 bits of
+    // accuracy in the round-to-even scenario.
+    //
+    // The full_multiplication function computes the 128-bit product of two 64-bit words
+    // with a returned value of type value128 with a "low component" corresponding to the
+    // 64-bit least significant bits of the product and with a "high component" corresponding
+    // to the 64-bit most significant bits of the product.
+    simdjson::internal::value128 secondproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]);
+    firstproduct.low += secondproduct.high;
+    if(secondproduct.high > firstproduct.low) { firstproduct.high++; }
+    // At this point, we might need to add at most one to firstproduct, but this
+    // can only change the value of firstproduct.high if firstproduct.low is maximal.
+    if(simdjson_unlikely(firstproduct.low  == 0xFFFFFFFFFFFFFFFF)) {
+      // This is very unlikely, but if so, we need to do much more work!
+      return false;
+    }
+  }
+  uint64_t lower = firstproduct.low;
+  uint64_t upper = firstproduct.high;
+  // The final mantissa should be 53 bits with a leading 1.
+  // We shift it so that it occupies 54 bits with a leading 1.
+  ///////
+  uint64_t upperbit = upper >> 63;
+  uint64_t mantissa = upper >> (upperbit + 9);
+  lz += int(1 ^ upperbit);
+
+  // Here we have mantissa < (1<<54).
+  int64_t real_exponent = exponent - lz;
+  if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal?
+    // Here have that real_exponent <= 0 so -real_exponent >= 0
+    if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure.
+      d = negative ? -0.0 : 0.0;
+      return true;
+    }
+    // next line is safe because -real_exponent + 1 < 0
+    mantissa >>= -real_exponent + 1;
+    // Thankfully, we can't have both "round-to-even" and subnormals because
+    // "round-to-even" only occurs for powers close to 0.
+    mantissa += (mantissa & 1); // round up
+    mantissa >>= 1;
+    // There is a weird scenario where we don't have a subnormal but just.
+    // Suppose we start with 2.2250738585072013e-308, we end up
+    // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal
+    // whereas 0x40000000000000 x 2^-1023-53  is normal. Now, we need to round
+    // up 0x3fffffffffffff x 2^-1023-53  and once we do, we are no longer
+    // subnormal, but we can only know this after rounding.
+    // So we only declare a subnormal if we are smaller than the threshold.
+    real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1;
+    d = to_double(mantissa, real_exponent, negative);
+    return true;
+  }
+  // We have to round to even. The "to even" part
+  // is only a problem when we are right in between two floats
+  // which we guard against.
+  // If we have lots of trailing zeros, we may fall right between two
+  // floating-point values.
+  //
+  // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54]
+  // times a power of two. That is, it is right between a number with binary significand
+  // m and another number with binary significand m+1; and it must be the case
+  // that it cannot be represented by a float itself.
+  //
+  // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p.
+  // Recall that 10^q = 5^q * 2^q.
+  // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that
+  //  5^23 <=  2^54 and it is the last power of five to qualify, so q <= 23.
+  // When q<0, we have  w  >=  (2m+1) x 5^{-q}.  We must have that w<2^{64} so
+  // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have
+  // 2^{53} x 5^{-q} < 2^{64}.
+  // Hence we have 5^{-q} < 2^{11}$ or q>= -4.
+  //
+  // We require lower <= 1 and not lower == 0 because we could not prove that
+  // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test.
+  if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) {
+    if((mantissa  << (upperbit + 64 - 53 - 2)) ==  upper) {
+      mantissa &= ~1;             // flip it so that we do not round up
+    }
+  }
+
+  mantissa += mantissa & 1;
+  mantissa >>= 1;
+
+  // Here we have mantissa < (1<<53), unless there was an overflow
+  if (mantissa >= (1ULL << 53)) {
+    //////////
+    // This will happen when parsing values such as 7.2057594037927933e+16
+    ////////
+    mantissa = (1ULL << 52);
+    real_exponent++;
+  }
+  mantissa &= ~(1ULL << 52);
+  // we have to check that real_exponent is in range, otherwise we bail out
+  if (simdjson_unlikely(real_exponent > 2046)) {
+    // We have an infinite value!!! We could actually throw an error here if we could.
+    return false;
+  }
+  d = to_double(mantissa, real_exponent, negative);
+  return true;
+}
+
+// We call a fallback floating-point parser that might be slow. Note
+// it will accept JSON numbers, but the JSON spec. is more restrictive so
+// before you call parse_float_fallback, you need to have validated the input
+// string with the JSON grammar.
+// It will return an error (false) if the parsed number is infinite.
+// The string parsing itself always succeeds. We know that there is at least
+// one digit.
+static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) {
+  *outDouble = simdjson::internal::from_chars(reinterpret_cast<const char *>(ptr));
+  // We do not accept infinite values.
+
+  // Detecting finite values in a portable manner is ridiculously hard, ideally
+  // we would want to do:
+  // return !std::isfinite(*outDouble);
+  // but that mysteriously fails under legacy/old libc++ libraries, see
+  // https://github.com/simdjson/simdjson/issues/1286
+  //
+  // Therefore, fall back to this solution (the extra parens are there
+  // to handle that max may be a macro on windows).
+  return !(*outDouble > (std::numeric_limits<double>::max)() || *outDouble < std::numeric_limits<double>::lowest());
+}
+static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) {
+  *outDouble = simdjson::internal::from_chars(reinterpret_cast<const char *>(ptr), reinterpret_cast<const char *>(end_ptr));
+  // We do not accept infinite values.
+
+  // Detecting finite values in a portable manner is ridiculously hard, ideally
+  // we would want to do:
+  // return !std::isfinite(*outDouble);
+  // but that mysteriously fails under legacy/old libc++ libraries, see
+  // https://github.com/simdjson/simdjson/issues/1286
+  //
+  // Therefore, fall back to this solution (the extra parens are there
+  // to handle that max may be a macro on windows).
+  return !(*outDouble > (std::numeric_limits<double>::max)() || *outDouble < std::numeric_limits<double>::lowest());
+}
+
+// check quickly whether the next 8 chars are made of digits
+// at a glance, it looks better than Mula's
+// http://0x80.pl/articles/swar-digits-validate.html
+simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) {
+  uint64_t val;
+  // this can read up to 7 bytes beyond the buffer size, but we require
+  // SIMDJSON_PADDING of padding
+  static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7");
+  std::memcpy(&val, chars, 8);
+  // a branchy method might be faster:
+  // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030)
+  //  && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) ==
+  //  0x3030303030303030);
+  return (((val & 0xF0F0F0F0F0F0F0F0) |
+           (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) ==
+          0x3333333333333333);
+}
+
+template<typename W>
+error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) {
+  double d;
+  if (parse_float_fallback(src, &d)) {
+    writer.append_double(d);
+    return SUCCESS;
+  }
+  return INVALID_NUMBER(src);
+}
+
+template<typename I>
+SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later
+simdjson_inline bool parse_digit(const uint8_t c, I &i) {
+  const uint8_t digit = static_cast<uint8_t>(c - '0');
+  if (digit > 9) {
+    return false;
+  }
+  // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication
+  i = 10 * i + digit; // might overflow, we will handle the overflow later
+  return true;
+}
+
+simdjson_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) {
+  // we continue with the fiction that we have an integer. If the
+  // floating point number is representable as x * 10^z for some integer
+  // z that fits in 53 bits, then we will be able to convert back the
+  // the integer into a float in a lossless manner.
+  const uint8_t *const first_after_period = p;
+
+#ifdef SIMDJSON_SWAR_NUMBER_PARSING
+#if SIMDJSON_SWAR_NUMBER_PARSING
+  // this helps if we have lots of decimals!
+  // this turns out to be frequent enough.
+  if (is_made_of_eight_digits_fast(p)) {
+    i = i * 100000000 + parse_eight_digits_unrolled(p);
+    p += 8;
+  }
+#endif // SIMDJSON_SWAR_NUMBER_PARSING
+#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING
+  // Unrolling the first digit makes a small difference on some implementations (e.g. westmere)
+  if (parse_digit(*p, i)) { ++p; }
+  while (parse_digit(*p, i)) { p++; }
+  exponent = first_after_period - p;
+  // Decimal without digits (123.) is illegal
+  if (exponent == 0) {
+    return INVALID_NUMBER(src);
+  }
+  return SUCCESS;
+}
+
+simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) {
+  // Exp Sign: -123.456e[-]78
+  bool neg_exp = ('-' == *p);
+  if (neg_exp || '+' == *p) { p++; } // Skip + as well
+
+  // Exponent: -123.456e-[78]
+  auto start_exp = p;
+  int64_t exp_number = 0;
+  while (parse_digit(*p, exp_number)) { ++p; }
+  // It is possible for parse_digit to overflow.
+  // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN.
+  // Thus we *must* check for possible overflow before we negate exp_number.
+
+  // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into
+  // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may
+  // not oblige and may, in fact, generate two distinct paths in any case. It might be
+  // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off
+  // instructions for a simdjson_likely branch, an unconclusive gain.
+
+  // If there were no digits, it's an error.
+  if (simdjson_unlikely(p == start_exp)) {
+    return INVALID_NUMBER(src);
+  }
+  // We have a valid positive exponent in exp_number at this point, except that
+  // it may have overflowed.
+
+  // If there were more than 18 digits, we may have overflowed the integer. We have to do
+  // something!!!!
+  if (simdjson_unlikely(p > start_exp+18)) {
+    // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow
+    while (*start_exp == '0') { start_exp++; }
+    // 19 digits could overflow int64_t and is kind of absurd anyway. We don't
+    // support exponents smaller than -999,999,999,999,999,999 and bigger
+    // than 999,999,999,999,999,999.
+    // We can truncate.
+    // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before
+    // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could
+    // truncate at 324.
+    // Note that there is no reason to fail per se at this point in time.
+    // E.g., 0e999999999999999999999 is a fine number.
+    if (p > start_exp+18) { exp_number = 999999999999999999; }
+  }
+  // At this point, we know that exp_number is a sane, positive, signed integer.
+  // It is <= 999,999,999,999,999,999. As long as 'exponent' is in
+  // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent'
+  // is bounded in magnitude by the size of the JSON input, we are fine in this universe.
+  // To sum it up: the next line should never overflow.
+  exponent += (neg_exp ? -exp_number : exp_number);
+  return SUCCESS;
+}
+
+simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) {
+  // It is possible that the integer had an overflow.
+  // We have to handle the case where we have 0.0000somenumber.
+  const uint8_t *start = start_digits;
+  while ((*start == '0') || (*start == '.')) { ++start; }
+  // we over-decrement by one when there is a '.'
+  return digit_count - size_t(start - start_digits);
+}
+
+template<typename W>
+simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) {
+  // If we frequently had to deal with long strings of digits,
+  // we could extend our code by using a 128-bit integer instead
+  // of a 64-bit integer. However, this is uncommon in practice.
+  //
+  // 9999999999999999999 < 2**64 so we can accommodate 19 digits.
+  // If we have a decimal separator, then digit_count - 1 is the number of digits, but we
+  // may not have a decimal separator!
+  if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) {
+    // Ok, chances are good that we had an overflow!
+    // this is almost never going to get called!!!
+    // we start anew, going slowly!!!
+    // This will happen in the following examples:
+    // 10000000000000000000000000000000000000000000e+308
+    // 3.1415926535897932384626433832795028841971693993751
+    //
+    // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens
+    // because slow_float_parsing is a non-inlined function. If we passed our writer reference to
+    // it, it would force it to be stored in memory, preventing the compiler from picking it apart
+    // and putting into registers. i.e. if we pass it as reference, it gets slow.
+    // This is what forces the skip_double, as well.
+    error_code error = slow_float_parsing(src, writer);
+    writer.skip_double();
+    return error;
+  }
+  // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other
+  // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331
+  // To future reader: we'd love if someone found a better way, or at least could explain this result!
+  if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) {
+    //
+    // Important: smallest_power is such that it leads to a zero value.
+    // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero
+    // so something x 10^-343 goes to zero, but not so with  something x 10^-342.
+    static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough");
+    //
+    if((exponent < simdjson::internal::smallest_power) || (i == 0)) {
+      // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero
+      WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer);
+      return SUCCESS;
+    } else { // (exponent > largest_power) and (i != 0)
+      // We have, for sure, an infinite value and simdjson refuses to parse infinite values.
+      return INVALID_NUMBER(src);
+    }
+  }
+  double d;
+  if (!compute_float_64(exponent, i, negative, d)) {
+    // we are almost never going to get here.
+    if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); }
+  }
+  WRITE_DOUBLE(d, src, writer);
+  return SUCCESS;
+}
+
+// for performance analysis, it is sometimes  useful to skip parsing
+#ifdef SIMDJSON_SKIPNUMBERPARSING
+
+template<typename W>
+simdjson_inline error_code parse_number(const uint8_t *const, W &writer) {
+  writer.append_s64(0);        // always write zero
+  return SUCCESS;              // always succeeds
+}
+
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer_in_string(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<double> parse_double_in_string(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept  { return false; }
+simdjson_unused simdjson_inline simdjson_result<bool> is_integer(const uint8_t * src) noexcept  { return false; }
+simdjson_unused simdjson_inline simdjson_result<ondemand::number_type> get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; }
+#else
+
+// parse the number at src
+// define JSON_TEST_NUMBERS for unit testing
+//
+// It is assumed that the number is followed by a structural ({,},],[) character
+// or a white space character. If that is not the case (e.g., when the JSON
+// document is made of a single number), then it is necessary to copy the
+// content and append a space before calling this function.
+//
+// Our objective is accurate parsing (ULP of 0) at high speed.
+template<typename W>
+simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) {
+
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  const uint8_t *p = src + uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); }
+
+  //
+  // Handle floats if there is a . or e (or both)
+  //
+  int64_t exponent = 0;
+  bool is_float = false;
+  if ('.' == *p) {
+    is_float = true;
+    ++p;
+    SIMDJSON_TRY( parse_decimal(src, p, i, exponent) );
+    digit_count = int(p - start_digits); // used later to guard against overflows
+  }
+  if (('e' == *p) || ('E' == *p)) {
+    is_float = true;
+    ++p;
+    SIMDJSON_TRY( parse_exponent(src, p, exponent) );
+  }
+  if (is_float) {
+    const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p);
+    SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) );
+    if (dirty_end) { return INVALID_NUMBER(src); }
+    return SUCCESS;
+  }
+
+  // The longest negative 64-bit number is 19 digits.
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  size_t longest_digit_count = negative ? 19 : 20;
+  if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); }
+  if (digit_count == longest_digit_count) {
+    if (negative) {
+      // Anything negative above INT64_MAX+1 is invalid
+      if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src);  }
+      WRITE_INTEGER(~i+1, src, writer);
+      if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); }
+      return SUCCESS;
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    }  else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); }
+  }
+
+  // Write unsigned if it doesn't fit in a signed integer.
+  if (i > uint64_t(INT64_MAX)) {
+    WRITE_UNSIGNED(i, src, writer);
+  } else {
+    WRITE_INTEGER(negative ? (~i+1) : i, src, writer);
+  }
+  if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); }
+  return SUCCESS;
+}
+
+// Inlineable functions
+namespace {
+
+// This table can be used to characterize the final character of an integer
+// string. For JSON structural character and allowable white space characters,
+// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise
+// we return NUMBER_ERROR.
+// Optimization note: we could easily reduce the size of the table by half (to 128)
+// at the cost of an extra branch.
+// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits):
+static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast");
+static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast");
+static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast");
+
+const uint8_t integer_string_finisher[256] = {
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, SUCCESS,
+    SUCCESS,      NUMBER_ERROR,   NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   SUCCESS,      NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, SUCCESS,
+    NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, SUCCESS,        NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    SUCCESS,      NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR};
+
+// Parse any number from 0 to 18,446,744,073,709,551,615
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src) noexcept {
+  const uint8_t *p = src;
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > 20))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+
+  if (digit_count == 20) {
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
+  }
+
+  return i;
+}
+
+
+// Parse any number from 0 to 18,446,744,073,709,551,615
+// Never read at src_end or beyond
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept {
+  const uint8_t *p = src;
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while ((p != src_end) && parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > 20))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+
+  if (digit_count == 20) {
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
+  }
+
+  return i;
+}
+
+// Parse any number from 0 to 18,446,744,073,709,551,615
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned_in_string(const uint8_t * const src) noexcept {
+  const uint8_t *p = src + 1;
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > 20))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if (*p != '"') { return NUMBER_ERROR; }
+
+  if (digit_count == 20) {
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    // Note: we use src[1] and not src[0] because src[0] is the quote character in this
+    // instance.
+    if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
+  }
+
+  return i;
+}
+
+// Parse any number from  -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t *src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  const uint8_t *p = src + uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // We go from
+  // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+  // so we can never represent numbers that have more than 19 digits.
+  size_t longest_digit_count = 19;
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > longest_digit_count))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+  // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
+  // Performance note: This check is only needed when digit_count == longest_digit_count but it is
+  // so cheap that we might as well always make it.
+  if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
+  return negative ? (~i+1) : i;
+}
+
+// Parse any number from  -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+// Never read at src_end or beyond
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept {
+  //
+  // Check for minus sign
+  //
+  if(src == src_end) { return NUMBER_ERROR; }
+  bool negative = (*src == '-');
+  const uint8_t *p = src + uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while ((p != src_end) && parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // We go from
+  // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+  // so we can never represent numbers that have more than 19 digits.
+  size_t longest_digit_count = 19;
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > longest_digit_count))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+  // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
+  // Performance note: This check is only needed when digit_count == longest_digit_count but it is
+  // so cheap that we might as well always make it.
+  if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
+  return negative ? (~i+1) : i;
+}
+
+// Parse any number from  -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer_in_string(const uint8_t *src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*(src + 1) == '-');
+  src += uint8_t(negative) + 1;
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = src;
+  uint64_t i = 0;
+  while (parse_digit(*src, i)) { src++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(src - start_digits);
+  // We go from
+  // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+  // so we can never represent numbers that have more than 19 digits.
+  size_t longest_digit_count = 19;
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > longest_digit_count))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*src)) {
+  //  return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if(*src != '"') { return NUMBER_ERROR; }
+  // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
+  // Performance note: This check is only needed when digit_count == longest_digit_count but it is
+  // so cheap that we might as well always make it.
+  if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
+  return negative ? (~i+1) : i;
+}
+
+simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  uint64_t i = 0;
+  const uint8_t *p = src;
+  p += parse_digit(*p, i);
+  bool leading_zero = (i == 0);
+  while (parse_digit(*p, i)) { p++; }
+  // no integer digits, or 0123 (zero must be solo)
+  if ( p == src ) { return INCORRECT_TYPE; }
+  if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; }
+
+  //
+  // Parse the decimal part.
+  //
+  int64_t exponent = 0;
+  bool overflow;
+  if (simdjson_likely(*p == '.')) {
+    p++;
+    const uint8_t *start_decimal_digits = p;
+    if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits
+    p++;
+    while (parse_digit(*p, i)) { p++; }
+    exponent = -(p - start_decimal_digits);
+
+    // Overflow check. More than 19 digits (minus the decimal) may be overflow.
+    overflow = p-src-1 > 19;
+    if (simdjson_unlikely(overflow && leading_zero)) {
+      // Skip leading 0.00000 and see if it still overflows
+      const uint8_t *start_digits = src + 2;
+      while (*start_digits == '0') { start_digits++; }
+      overflow = start_digits-src > 19;
+    }
+  } else {
+    overflow = p-src > 19;
+  }
+
+  //
+  // Parse the exponent
+  //
+  if (*p == 'e' || *p == 'E') {
+    p++;
+    bool exp_neg = *p == '-';
+    p += exp_neg || *p == '+';
+
+    uint64_t exp = 0;
+    const uint8_t *start_exp_digits = p;
+    while (parse_digit(*p, exp)) { p++; }
+    // no exp digits, or 20+ exp digits
+    if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }
+
+    exponent += exp_neg ? 0-exp : exp;
+  }
+
+  if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; }
+
+  overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power;
+
+  //
+  // Assemble (or slow-parse) the float
+  //
+  double d;
+  if (simdjson_likely(!overflow)) {
+    if (compute_float_64(exponent, i, negative, d)) { return d; }
+  }
+  if (!parse_float_fallback(src - uint8_t(negative), &d)) {
+    return NUMBER_ERROR;
+  }
+  return d;
+}
+
+simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept {
+  return (*src == '-');
+}
+
+simdjson_unused simdjson_inline simdjson_result<bool> is_integer(const uint8_t * src) noexcept {
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+  const uint8_t *p = src;
+  while(static_cast<uint8_t>(*p - '0') <= 9) { p++; }
+  if ( p == src ) { return NUMBER_ERROR; }
+  if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; }
+  return false;
+}
+
+simdjson_unused simdjson_inline simdjson_result<ondemand::number_type> get_number_type(const uint8_t * src) noexcept {
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+  const uint8_t *p = src;
+  while(static_cast<uint8_t>(*p - '0') <= 9) { p++; }
+  if ( p == src ) { return NUMBER_ERROR; }
+  if (jsoncharutils::is_structural_or_whitespace(*p)) {
+    // We have an integer.
+    // If the number is negative and valid, it must be a signed integer.
+    if(negative) { return ondemand::number_type::signed_integer; }
+    // We want values larger or equal to 9223372036854775808 to be unsigned
+    // integers, and the other values to be signed integers.
+    int digit_count = int(p - src);
+    if(digit_count >= 19) {
+      const uint8_t * smaller_big_integer = reinterpret_cast<const uint8_t *>("9223372036854775808");
+      if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) {
+        return ondemand::number_type::unsigned_integer;
+      }
+    }
+    return ondemand::number_type::signed_integer;
+  }
+  // Hopefully, we have 'e' or 'E' or '.'.
+  return ondemand::number_type::floating_point_number;
+}
+
+// Never read at src_end or beyond
+simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept {
+  if(src == src_end) { return NUMBER_ERROR; }
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  uint64_t i = 0;
+  const uint8_t *p = src;
+  if(p == src_end) { return NUMBER_ERROR; }
+  p += parse_digit(*p, i);
+  bool leading_zero = (i == 0);
+  while ((p != src_end) && parse_digit(*p, i)) { p++; }
+  // no integer digits, or 0123 (zero must be solo)
+  if ( p == src ) { return INCORRECT_TYPE; }
+  if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; }
+
+  //
+  // Parse the decimal part.
+  //
+  int64_t exponent = 0;
+  bool overflow;
+  if (simdjson_likely((p != src_end) && (*p == '.'))) {
+    p++;
+    const uint8_t *start_decimal_digits = p;
+    if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits
+    p++;
+    while ((p != src_end) && parse_digit(*p, i)) { p++; }
+    exponent = -(p - start_decimal_digits);
+
+    // Overflow check. More than 19 digits (minus the decimal) may be overflow.
+    overflow = p-src-1 > 19;
+    if (simdjson_unlikely(overflow && leading_zero)) {
+      // Skip leading 0.00000 and see if it still overflows
+      const uint8_t *start_digits = src + 2;
+      while (*start_digits == '0') { start_digits++; }
+      overflow = start_digits-src > 19;
+    }
+  } else {
+    overflow = p-src > 19;
+  }
+
+  //
+  // Parse the exponent
+  //
+  if ((p != src_end) && (*p == 'e' || *p == 'E')) {
+    p++;
+    if(p == src_end) { return NUMBER_ERROR; }
+    bool exp_neg = *p == '-';
+    p += exp_neg || *p == '+';
+
+    uint64_t exp = 0;
+    const uint8_t *start_exp_digits = p;
+    while ((p != src_end) && parse_digit(*p, exp)) { p++; }
+    // no exp digits, or 20+ exp digits
+    if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }
+
+    exponent += exp_neg ? 0-exp : exp;
+  }
+
+  if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; }
+
+  overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power;
+
+  //
+  // Assemble (or slow-parse) the float
+  //
+  double d;
+  if (simdjson_likely(!overflow)) {
+    if (compute_float_64(exponent, i, negative, d)) { return d; }
+  }
+  if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) {
+    return NUMBER_ERROR;
+  }
+  return d;
+}
+
+simdjson_unused simdjson_inline simdjson_result<double> parse_double_in_string(const uint8_t * src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*(src + 1) == '-');
+  src += uint8_t(negative) + 1;
+
+  //
+  // Parse the integer part.
+  //
+  uint64_t i = 0;
+  const uint8_t *p = src;
+  p += parse_digit(*p, i);
+  bool leading_zero = (i == 0);
+  while (parse_digit(*p, i)) { p++; }
+  // no integer digits, or 0123 (zero must be solo)
+  if ( p == src ) { return INCORRECT_TYPE; }
+  if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; }
+
+  //
+  // Parse the decimal part.
+  //
+  int64_t exponent = 0;
+  bool overflow;
+  if (simdjson_likely(*p == '.')) {
+    p++;
+    const uint8_t *start_decimal_digits = p;
+    if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits
+    p++;
+    while (parse_digit(*p, i)) { p++; }
+    exponent = -(p - start_decimal_digits);
+
+    // Overflow check. More than 19 digits (minus the decimal) may be overflow.
+    overflow = p-src-1 > 19;
+    if (simdjson_unlikely(overflow && leading_zero)) {
+      // Skip leading 0.00000 and see if it still overflows
+      const uint8_t *start_digits = src + 2;
+      while (*start_digits == '0') { start_digits++; }
+      overflow = start_digits-src > 19;
+    }
+  } else {
+    overflow = p-src > 19;
+  }
+
+  //
+  // Parse the exponent
+  //
+  if (*p == 'e' || *p == 'E') {
+    p++;
+    bool exp_neg = *p == '-';
+    p += exp_neg || *p == '+';
+
+    uint64_t exp = 0;
+    const uint8_t *start_exp_digits = p;
+    while (parse_digit(*p, exp)) { p++; }
+    // no exp digits, or 20+ exp digits
+    if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }
+
+    exponent += exp_neg ? 0-exp : exp;
+  }
+
+  if (*p != '"') { return NUMBER_ERROR; }
+
+  overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power;
+
+  //
+  // Assemble (or slow-parse) the float
+  //
+  double d;
+  if (simdjson_likely(!overflow)) {
+    if (compute_float_64(exponent, i, negative, d)) { return d; }
+  }
+  if (!parse_float_fallback(src - uint8_t(negative), &d)) {
+    return NUMBER_ERROR;
+  }
+  return d;
+}
+} //namespace {}
+#endif // SIMDJSON_SKIPNUMBERPARSING
+
+} // namespace numberparsing
+} // unnamed namespace
+} // namespace fallback
+} // namespace simdjson
+/* end file include/simdjson/generic/numberparsing.h */
+
+#endif // SIMDJSON_FALLBACK_NUMBERPARSING_H
+/* end file include/simdjson/fallback/numberparsing.h */
+/* begin file include/simdjson/fallback/end.h */
+/* end file include/simdjson/fallback/end.h */
+
+#endif // SIMDJSON_IMPLEMENTATION_FALLBACK
+#endif // SIMDJSON_FALLBACK_H
+/* end file include/simdjson/fallback.h */
+/* begin file include/simdjson/icelake.h */
+#ifndef SIMDJSON_ICELAKE_H
+#define SIMDJSON_ICELAKE_H
+
+
+#if SIMDJSON_IMPLEMENTATION_ICELAKE
+
+#if SIMDJSON_CAN_ALWAYS_RUN_ICELAKE
+#define SIMDJSON_TARGET_ICELAKE
+#define SIMDJSON_UNTARGET_ICELAKE
+#else
+#define SIMDJSON_TARGET_ICELAKE SIMDJSON_TARGET_REGION("avx512f,avx512dq,avx512cd,avx512bw,avx512vbmi,avx512vbmi2,avx512vl,avx2,bmi,pclmul,lzcnt")
+#define SIMDJSON_UNTARGET_ICELAKE SIMDJSON_UNTARGET_REGION
+#endif
+
+namespace simdjson {
+/**
+ * Implementation for Icelake (Intel AVX512).
+ */
+namespace icelake {
+} // namespace icelake
+} // namespace simdjson
+
+//
+// These two need to be included outside SIMDJSON_TARGET_ICELAKE
+//
+/* begin file include/simdjson/icelake/implementation.h */
+#ifndef SIMDJSON_ICELAKE_IMPLEMENTATION_H
+#define SIMDJSON_ICELAKE_IMPLEMENTATION_H
+
+
+// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_ICELAKE
+namespace simdjson {
+namespace icelake {
+
+using namespace simdjson;
+
+class implementation final : public simdjson::implementation {
+public:
+  simdjson_inline implementation() : simdjson::implementation(
+      "icelake",
+      "Intel/AMD AVX512",
+      internal::instruction_set::AVX2 | internal::instruction_set::PCLMULQDQ | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 | internal::instruction_set::AVX512F | internal::instruction_set::AVX512DQ | internal::instruction_set::AVX512CD | internal::instruction_set::AVX512BW | internal::instruction_set::AVX512VL | internal::instruction_set::AVX512VBMI2
+  ) {}
+  simdjson_warn_unused error_code create_dom_parser_implementation(
+    size_t capacity,
+    size_t max_length,
+    std::unique_ptr<internal::dom_parser_implementation>& dst
+  ) const noexcept final;
+  simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final;
+  simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final;
+};
+
+} // namespace icelake
+} // namespace simdjson
+
+#endif // SIMDJSON_ICELAKE_IMPLEMENTATION_H
+/* end file include/simdjson/icelake/implementation.h */
+/* begin file include/simdjson/icelake/intrinsics.h */
+#ifndef SIMDJSON_ICELAKE_INTRINSICS_H
+#define SIMDJSON_ICELAKE_INTRINSICS_H
+
+
+#ifdef SIMDJSON_VISUAL_STUDIO
+// under clang within visual studio, this will include <x86intrin.h>
+#include <intrin.h>  // visual studio or clang
+#else
+#include <x86intrin.h> // elsewhere
+#endif // SIMDJSON_VISUAL_STUDIO
+
+#ifdef SIMDJSON_CLANG_VISUAL_STUDIO
+/**
+ * You are not supposed, normally, to include these
+ * headers directly. Instead you should either include intrin.h
+ * or x86intrin.h. However, when compiling with clang
+ * under Windows (i.e., when _MSC_VER is set), these headers
+ * only get included *if* the corresponding features are detected
+ * from macros:
+ * e.g., if __AVX2__ is set... in turn,  we normally set these
+ * macros by compiling against the corresponding architecture
+ * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole
+ * software with these advanced instructions. In simdjson, we
+ * want to compile the whole program for a generic target,
+ * and only target our specific kernels. As a workaround,
+ * we directly include the needed headers. These headers would
+ * normally guard against such usage, but we carefully included
+ * <x86intrin.h>  (or <intrin.h>) before, so the headers
+ * are fooled.
+ */
+#include <bmiintrin.h>   // for _blsr_u64
+#include <lzcntintrin.h> // for  __lzcnt64
+#include <immintrin.h>   // for most things (AVX2, AVX512, _popcnt64)
+#include <smmintrin.h>
+#include <tmmintrin.h>
+#include <avxintrin.h>
+#include <avx2intrin.h>
+#include <wmmintrin.h>   // for  _mm_clmulepi64_si128
+// Important: we need the AVX-512 headers:
+#include <avx512fintrin.h>
+#include <avx512dqintrin.h>
+#include <avx512cdintrin.h>
+#include <avx512bwintrin.h>
+#include <avx512vlintrin.h>
+#include <avx512vbmiintrin.h>
+#include <avx512vbmi2intrin.h>
+// unfortunately, we may not get _blsr_u64, but, thankfully, clang
+// has it as a macro.
+#ifndef _blsr_u64
+// we roll our own
+#define _blsr_u64(n) ((n - 1) & n)
+#endif //  _blsr_u64
+#endif // SIMDJSON_CLANG_VISUAL_STUDIO
+
+static_assert(sizeof(__m512i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for icelake");
+
+#endif // SIMDJSON_ICELAKE_INTRINSICS_H
+/* end file include/simdjson/icelake/intrinsics.h */
+
+//
+// The rest need to be inside the region
+//
+/* begin file include/simdjson/icelake/begin.h */
+// redefining SIMDJSON_IMPLEMENTATION to "icelake"
+// #define SIMDJSON_IMPLEMENTATION icelake
+SIMDJSON_TARGET_ICELAKE
+/* end file include/simdjson/icelake/begin.h */
+
+// Declarations
+/* begin file include/simdjson/generic/dom_parser_implementation.h */
+
+namespace simdjson {
+namespace icelake {
+
+// expectation: sizeof(open_container) = 64/8.
+struct open_container {
+  uint32_t tape_index; // where, on the tape, does the scope ([,{) begins
+  uint32_t count; // how many elements in the scope
+}; // struct open_container
+
+static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits");
+
+class dom_parser_implementation final : public internal::dom_parser_implementation {
+public:
+  /** Tape location of each open { or [ */
+  std::unique_ptr<open_container[]> open_containers{};
+  /** Whether each open container is a [ or { */
+  std::unique_ptr<bool[]> is_array{};
+  /** Buffer passed to stage 1 */
+  const uint8_t *buf{};
+  /** Length passed to stage 1 */
+  size_t len{0};
+  /** Document passed to stage 2 */
+  dom::document *doc{};
+
+  inline dom_parser_implementation() noexcept;
+  inline dom_parser_implementation(dom_parser_implementation &&other) noexcept;
+  inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept;
+  dom_parser_implementation(const dom_parser_implementation &) = delete;
+  dom_parser_implementation &operator=(const dom_parser_implementation &) = delete;
+
+  simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final;
+  simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final;
+  simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final;
+  simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final;
+  simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst) const noexcept final;
+  inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final;
+  inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final;
+private:
+  simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity);
+
+};
+
+} // namespace icelake
+} // namespace simdjson
+
+namespace simdjson {
+namespace icelake {
+
+inline dom_parser_implementation::dom_parser_implementation() noexcept = default;
+inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default;
+inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default;
+
+// Leaving these here so they can be inlined if so desired
+inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept {
+  if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; }
+  // Stage 1 index output
+  size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7;
+  structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] );
+  if (!structural_indexes) { _capacity = 0; return MEMALLOC; }
+  structural_indexes[0] = 0;
+  n_structural_indexes = 0;
+
+  _capacity = capacity;
+  return SUCCESS;
+}
+
+inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept {
+  // Stage 2 stacks
+  open_containers.reset(new (std::nothrow) open_container[max_depth]);
+  is_array.reset(new (std::nothrow) bool[max_depth]);
+  if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; }
+
+  _max_depth = max_depth;
+  return SUCCESS;
+}
+
+} // namespace icelake
+} // namespace simdjson
+/* end file include/simdjson/generic/dom_parser_implementation.h */
+/* begin file include/simdjson/icelake/bitmanipulation.h */
+#ifndef SIMDJSON_ICELAKE_BITMANIPULATION_H
+#define SIMDJSON_ICELAKE_BITMANIPULATION_H
+
+namespace simdjson {
+namespace icelake {
+namespace {
+
+// We sometimes call trailing_zero on inputs that are zero,
+// but the algorithms do not end up using the returned value.
+// Sadly, sanitizers are not smart enough to figure it out.
+SIMDJSON_NO_SANITIZE_UNDEFINED
+simdjson_inline int trailing_zeroes(uint64_t input_num) {
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+  return (int)_tzcnt_u64(input_num);
+#else // SIMDJSON_REGULAR_VISUAL_STUDIO
+  ////////
+  // You might expect the next line to be equivalent to
+  // return (int)_tzcnt_u64(input_num);
+  // but the generated code differs and might be less efficient?
+  ////////
+  return __builtin_ctzll(input_num);
+#endif // SIMDJSON_REGULAR_VISUAL_STUDIO
+}
+
+/* result might be undefined when input_num is zero */
+simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) {
+  return _blsr_u64(input_num);
+}
+
+/* result might be undefined when input_num is zero */
+simdjson_inline int leading_zeroes(uint64_t input_num) {
+  return int(_lzcnt_u64(input_num));
+}
+
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+simdjson_inline unsigned __int64 count_ones(uint64_t input_num) {
+  // note: we do not support legacy 32-bit Windows
+  return __popcnt64(input_num);// Visual Studio wants two underscores
+}
+#else
+simdjson_inline long long int count_ones(uint64_t input_num) {
+  return _popcnt64(input_num);
+}
+#endif
+
+simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2,
+                                uint64_t *result) {
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+  return _addcarry_u64(0, value1, value2,
+                       reinterpret_cast<unsigned __int64 *>(result));
+#else
+  return __builtin_uaddll_overflow(value1, value2,
+                                   reinterpret_cast<unsigned long long *>(result));
+#endif
+}
+
+} // unnamed namespace
+} // namespace icelake
+} // namespace simdjson
+
+#endif // SIMDJSON_ICELAKE_BITMANIPULATION_H
+/* end file include/simdjson/icelake/bitmanipulation.h */
+/* begin file include/simdjson/icelake/bitmask.h */
+#ifndef SIMDJSON_ICELAKE_BITMASK_H
+#define SIMDJSON_ICELAKE_BITMASK_H
+
+namespace simdjson {
+namespace icelake {
+namespace {
+
+//
+// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered.
+//
+// For example, prefix_xor(00100100) == 00011100
+//
+simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) {
+  // There should be no such thing with a processor supporting avx2
+  // but not clmul.
+  __m128i all_ones = _mm_set1_epi8('\xFF');
+  __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0);
+  return _mm_cvtsi128_si64(result);
+}
+
+} // unnamed namespace
+} // namespace icelake
+} // namespace simdjson
+
+#endif // SIMDJSON_ICELAKE_BITMASK_H
+/* end file include/simdjson/icelake/bitmask.h */
+/* begin file include/simdjson/icelake/simd.h */
+#ifndef SIMDJSON_ICELAKE_SIMD_H
+#define SIMDJSON_ICELAKE_SIMD_H
+
+
+
+
+#if defined(__GNUC__) && !defined(__clang__)
+#if __GNUC__ == 8
+#define SIMDJSON_GCC8 1
+#endif //  __GNUC__ == 8
+#endif // defined(__GNUC__) && !defined(__clang__)
+
+#if SIMDJSON_GCC8
+/**
+ * GCC 8 fails to provide _mm512_set_epi8. We roll our own.
+ */
+inline __m512i _mm512_set_epi8(uint8_t a0, uint8_t a1, uint8_t a2, uint8_t a3, uint8_t a4, uint8_t a5, uint8_t a6, uint8_t a7, uint8_t a8, uint8_t a9, uint8_t a10, uint8_t a11, uint8_t a12, uint8_t a13, uint8_t a14, uint8_t a15, uint8_t a16, uint8_t a17, uint8_t a18, uint8_t a19, uint8_t a20, uint8_t a21, uint8_t a22, uint8_t a23, uint8_t a24, uint8_t a25, uint8_t a26, uint8_t a27, uint8_t a28, uint8_t a29, uint8_t a30, uint8_t a31, uint8_t a32, uint8_t a33, uint8_t a34, uint8_t a35, uint8_t a36, uint8_t a37, uint8_t a38, uint8_t a39, uint8_t a40, uint8_t a41, uint8_t a42, uint8_t a43, uint8_t a44, uint8_t a45, uint8_t a46, uint8_t a47, uint8_t a48, uint8_t a49, uint8_t a50, uint8_t a51, uint8_t a52, uint8_t a53, uint8_t a54, uint8_t a55, uint8_t a56, uint8_t a57, uint8_t a58, uint8_t a59, uint8_t a60, uint8_t a61, uint8_t a62, uint8_t a63) {
+  return _mm512_set_epi64(uint64_t(a7) + (uint64_t(a6) << 8) + (uint64_t(a5) << 16) + (uint64_t(a4) << 24) + (uint64_t(a3) << 32) + (uint64_t(a2) << 40) + (uint64_t(a1) << 48) + (uint64_t(a0) << 56),
+                          uint64_t(a15) + (uint64_t(a14) << 8) + (uint64_t(a13) << 16) + (uint64_t(a12) << 24) + (uint64_t(a11) << 32) + (uint64_t(a10) << 40) + (uint64_t(a9) << 48) + (uint64_t(a8) << 56),
+                          uint64_t(a23) + (uint64_t(a22) << 8) + (uint64_t(a21) << 16) + (uint64_t(a20) << 24) + (uint64_t(a19) << 32) + (uint64_t(a18) << 40) + (uint64_t(a17) << 48) + (uint64_t(a16) << 56),
+                          uint64_t(a31) + (uint64_t(a30) << 8) + (uint64_t(a29) << 16) + (uint64_t(a28) << 24) + (uint64_t(a27) << 32) + (uint64_t(a26) << 40) + (uint64_t(a25) << 48) + (uint64_t(a24) << 56),
+                          uint64_t(a39) + (uint64_t(a38) << 8) + (uint64_t(a37) << 16) + (uint64_t(a36) << 24) + (uint64_t(a35) << 32) + (uint64_t(a34) << 40) + (uint64_t(a33) << 48) + (uint64_t(a32) << 56),
+                          uint64_t(a47) + (uint64_t(a46) << 8) + (uint64_t(a45) << 16) + (uint64_t(a44) << 24) + (uint64_t(a43) << 32) + (uint64_t(a42) << 40) + (uint64_t(a41) << 48) + (uint64_t(a40) << 56),
+                          uint64_t(a55) + (uint64_t(a54) << 8) + (uint64_t(a53) << 16) + (uint64_t(a52) << 24) + (uint64_t(a51) << 32) + (uint64_t(a50) << 40) + (uint64_t(a49) << 48) + (uint64_t(a48) << 56),
+                          uint64_t(a63) + (uint64_t(a62) << 8) + (uint64_t(a61) << 16) + (uint64_t(a60) << 24) + (uint64_t(a59) << 32) + (uint64_t(a58) << 40) + (uint64_t(a57) << 48) + (uint64_t(a56) << 56));
+}
+#endif // SIMDJSON_GCC8
+
+
+
+namespace simdjson {
+namespace icelake {
+namespace {
+namespace simd {
+
+  // Forward-declared so they can be used by splat and friends.
+  template<typename Child>
+  struct base {
+    __m512i value;
+
+    // Zero constructor
+    simdjson_inline base() : value{__m512i()} {}
+
+    // Conversion from SIMD register
+    simdjson_inline base(const __m512i _value) : value(_value) {}
+
+    // Conversion to SIMD register
+    simdjson_inline operator const __m512i&() const { return this->value; }
+    simdjson_inline operator __m512i&() { return this->value; }
+
+    // Bit operations
+    simdjson_inline Child operator|(const Child other) const { return _mm512_or_si512(*this, other); }
+    simdjson_inline Child operator&(const Child other) const { return _mm512_and_si512(*this, other); }
+    simdjson_inline Child operator^(const Child other) const { return _mm512_xor_si512(*this, other); }
+    simdjson_inline Child bit_andnot(const Child other) const { return _mm512_andnot_si512(other, *this); }
+    simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast | other; return *this_cast; }
+    simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast & other; return *this_cast; }
+    simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast ^ other; return *this_cast; }
+  };
+
+  // Forward-declared so they can be used by splat and friends.
+  template<typename T>
+  struct simd8;
+
+  template<typename T, typename Mask=simd8<bool>>
+  struct base8: base<simd8<T>> {
+    typedef uint32_t bitmask_t;
+    typedef uint64_t bitmask2_t;
+
+    simdjson_inline base8() : base<simd8<T>>() {}
+    simdjson_inline base8(const __m512i _value) : base<simd8<T>>(_value) {}
+
+    friend simdjson_really_inline uint64_t operator==(const simd8<T> lhs, const simd8<T> rhs) {
+      return _mm512_cmpeq_epi8_mask(lhs, rhs);
+    }
+
+    static const int SIZE = sizeof(base<T>::value);
+
+    template<int N=1>
+    simdjson_inline simd8<T> prev(const simd8<T> prev_chunk) const {
+#if SIMDJSON_GCC8
+     // workaround for compilers unable to figure out that 16 - N is a constant (GCC 8)
+      constexpr int shift = 16 - N;
+      return _mm512_alignr_epi8(*this, _mm512_permutex2var_epi64(prev_chunk, _mm512_set_epi64(13, 12, 11, 10, 9, 8, 7, 6), *this), shift);
+#else
+      return _mm512_alignr_epi8(*this, _mm512_permutex2var_epi64(prev_chunk, _mm512_set_epi64(13, 12, 11, 10, 9, 8, 7, 6), *this), 16 - N);
+#endif
+    }
+  };
+
+  // SIMD byte mask type (returned by things like eq and gt)
+  template<>
+  struct simd8<bool>: base8<bool> {
+    static simdjson_inline simd8<bool> splat(bool _value) { return _mm512_set1_epi8(uint8_t(-(!!_value))); }
+
+    simdjson_inline simd8<bool>() : base8() {}
+    simdjson_inline simd8<bool>(const __m512i _value) : base8<bool>(_value) {}
+    // Splat constructor
+    simdjson_inline simd8<bool>(bool _value) : base8<bool>(splat(_value)) {}
+    simdjson_inline bool any() const { return !!_mm512_test_epi8_mask (*this, *this); }
+    simdjson_inline simd8<bool> operator~() const { return *this ^ true; }
+  };
+
+  template<typename T>
+  struct base8_numeric: base8<T> {
+    static simdjson_inline simd8<T> splat(T _value) { return _mm512_set1_epi8(_value); }
+    static simdjson_inline simd8<T> zero() { return _mm512_setzero_si512(); }
+    static simdjson_inline simd8<T> load(const T values[64]) {
+      return _mm512_loadu_si512(reinterpret_cast<const __m512i *>(values));
+    }
+    // Repeat 16 values as many times as necessary (usually for lookup tables)
+    static simdjson_inline simd8<T> repeat_16(
+      T v0,  T v1,  T v2,  T v3,  T v4,  T v5,  T v6,  T v7,
+      T v8,  T v9,  T v10, T v11, T v12, T v13, T v14, T v15
+    ) {
+      return simd8<T>(
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15,
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15,
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15,
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15
+      );
+    }
+
+    simdjson_inline base8_numeric() : base8<T>() {}
+    simdjson_inline base8_numeric(const __m512i _value) : base8<T>(_value) {}
+
+    // Store to array
+    simdjson_inline void store(T dst[64]) const { return _mm512_storeu_si512(reinterpret_cast<__m512i *>(dst), *this); }
+
+    // Addition/subtraction are the same for signed and unsigned
+    simdjson_inline simd8<T> operator+(const simd8<T> other) const { return _mm512_add_epi8(*this, other); }
+    simdjson_inline simd8<T> operator-(const simd8<T> other) const { return _mm512_sub_epi8(*this, other); }
+    simdjson_inline simd8<T>& operator+=(const simd8<T> other) { *this = *this + other; return *static_cast<simd8<T>*>(this); }
+    simdjson_inline simd8<T>& operator-=(const simd8<T> other) { *this = *this - other; return *static_cast<simd8<T>*>(this); }
+
+    // Override to distinguish from bool version
+    simdjson_inline simd8<T> operator~() const { return *this ^ 0xFFu; }
+
+    // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values)
+    template<typename L>
+    simdjson_inline simd8<L> lookup_16(simd8<L> lookup_table) const {
+      return _mm512_shuffle_epi8(lookup_table, *this);
+    }
+
+    // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset).
+    // Passing a 0 value for mask would be equivalent to writing out every byte to output.
+    // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes
+    // get written.
+    // Design consideration: it seems like a function with the
+    // signature simd8<L> compress(uint32_t mask) would be
+    // sensible, but the AVX ISA makes this kind of approach difficult.
+    template<typename L>
+    simdjson_inline void compress(uint64_t mask, L * output) const {
+      _mm512_mask_compressstoreu_epi8 (output,~mask,*this);
+    }
+
+    template<typename L>
+    simdjson_inline simd8<L> lookup_16(
+        L replace0,  L replace1,  L replace2,  L replace3,
+        L replace4,  L replace5,  L replace6,  L replace7,
+        L replace8,  L replace9,  L replace10, L replace11,
+        L replace12, L replace13, L replace14, L replace15) const {
+      return lookup_16(simd8<L>::repeat_16(
+        replace0,  replace1,  replace2,  replace3,
+        replace4,  replace5,  replace6,  replace7,
+        replace8,  replace9,  replace10, replace11,
+        replace12, replace13, replace14, replace15
+      ));
+    }
+  };
+
+  // Signed bytes
+  template<>
+  struct simd8<int8_t> : base8_numeric<int8_t> {
+    simdjson_inline simd8() : base8_numeric<int8_t>() {}
+    simdjson_inline simd8(const __m512i _value) : base8_numeric<int8_t>(_value) {}
+    // Splat constructor
+    simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {}
+    // Array constructor
+    simdjson_inline simd8(const int8_t values[64]) : simd8(load(values)) {}
+    // Member-by-member initialization
+    simdjson_inline simd8(
+      int8_t v0,  int8_t v1,  int8_t v2,  int8_t v3,  int8_t v4,  int8_t v5,  int8_t v6,  int8_t v7,
+      int8_t v8,  int8_t v9,  int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15,
+      int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23,
+      int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31,
+      int8_t v32, int8_t v33, int8_t v34, int8_t v35, int8_t v36, int8_t v37, int8_t v38, int8_t v39,
+      int8_t v40, int8_t v41, int8_t v42, int8_t v43, int8_t v44, int8_t v45, int8_t v46, int8_t v47,
+      int8_t v48, int8_t v49, int8_t v50, int8_t v51, int8_t v52, int8_t v53, int8_t v54, int8_t v55,
+      int8_t v56, int8_t v57, int8_t v58, int8_t v59, int8_t v60, int8_t v61, int8_t v62, int8_t v63
+    ) : simd8(_mm512_set_epi8(
+      v63, v62, v61, v60, v59, v58, v57, v56,
+      v55, v54, v53, v52, v51, v50, v49, v48,
+      v47, v46, v45, v44, v43, v42, v41, v40,
+      v39, v38, v37, v36, v35, v34, v33, v32,
+      v31, v30, v29, v28, v27, v26, v25, v24,
+      v23, v22, v21, v20, v19, v18, v17, v16,
+      v15, v14, v13, v12, v11, v10,  v9,  v8,
+       v7,  v6,  v5,  v4,  v3,  v2,  v1,  v0
+    )) {}
+
+    // Repeat 16 values as many times as necessary (usually for lookup tables)
+    simdjson_inline static simd8<int8_t> repeat_16(
+      int8_t v0,  int8_t v1,  int8_t v2,  int8_t v3,  int8_t v4,  int8_t v5,  int8_t v6,  int8_t v7,
+      int8_t v8,  int8_t v9,  int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15
+    ) {
+      return simd8<int8_t>(
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15,
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15,
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15,
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15
+      );
+    }
+
+    // Order-sensitive comparisons
+    simdjson_inline simd8<int8_t> max_val(const simd8<int8_t> other) const { return _mm512_max_epi8(*this, other); }
+    simdjson_inline simd8<int8_t> min_val(const simd8<int8_t> other) const { return _mm512_min_epi8(*this, other); }
+
+    simdjson_inline simd8<bool> operator>(const simd8<int8_t> other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(*this, other),_mm512_set1_epi8(uint8_t(0x80))); }
+    simdjson_inline simd8<bool> operator<(const simd8<int8_t> other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(other, *this),_mm512_set1_epi8(uint8_t(0x80))); }
+  };
+
+  // Unsigned bytes
+  template<>
+  struct simd8<uint8_t>: base8_numeric<uint8_t> {
+    simdjson_inline simd8() : base8_numeric<uint8_t>() {}
+    simdjson_inline simd8(const __m512i _value) : base8_numeric<uint8_t>(_value) {}
+    // Splat constructor
+    simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {}
+    // Array constructor
+    simdjson_inline simd8(const uint8_t values[64]) : simd8(load(values)) {}
+    // Member-by-member initialization
+    simdjson_inline simd8(
+      uint8_t v0,  uint8_t v1,  uint8_t v2,  uint8_t v3,  uint8_t v4,  uint8_t v5,  uint8_t v6,  uint8_t v7,
+      uint8_t v8,  uint8_t v9,  uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15,
+      uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23,
+      uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31,
+      uint8_t v32, uint8_t v33, uint8_t v34, uint8_t v35, uint8_t v36, uint8_t v37, uint8_t v38, uint8_t v39,
+      uint8_t v40, uint8_t v41, uint8_t v42, uint8_t v43, uint8_t v44, uint8_t v45, uint8_t v46, uint8_t v47,
+      uint8_t v48, uint8_t v49, uint8_t v50, uint8_t v51, uint8_t v52, uint8_t v53, uint8_t v54, uint8_t v55,
+      uint8_t v56, uint8_t v57, uint8_t v58, uint8_t v59, uint8_t v60, uint8_t v61, uint8_t v62, uint8_t v63
+    ) : simd8(_mm512_set_epi8(
+      v63, v62, v61, v60, v59, v58, v57, v56,
+      v55, v54, v53, v52, v51, v50, v49, v48,
+      v47, v46, v45, v44, v43, v42, v41, v40,
+      v39, v38, v37, v36, v35, v34, v33, v32,
+      v31, v30, v29, v28, v27, v26, v25, v24,
+      v23, v22, v21, v20, v19, v18, v17, v16,
+      v15, v14, v13, v12, v11, v10,  v9,  v8,
+       v7,  v6,  v5,  v4,  v3,  v2,  v1,  v0
+    )) {}
+
+    // Repeat 16 values as many times as necessary (usually for lookup tables)
+    simdjson_inline static simd8<uint8_t> repeat_16(
+      uint8_t v0,  uint8_t v1,  uint8_t v2,  uint8_t v3,  uint8_t v4,  uint8_t v5,  uint8_t v6,  uint8_t v7,
+      uint8_t v8,  uint8_t v9,  uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15
+    ) {
+      return simd8<uint8_t>(
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15,
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15,
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15,
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15
+      );
+    }
+
+    // Saturated math
+    simdjson_inline simd8<uint8_t> saturating_add(const simd8<uint8_t> other) const { return _mm512_adds_epu8(*this, other); }
+    simdjson_inline simd8<uint8_t> saturating_sub(const simd8<uint8_t> other) const { return _mm512_subs_epu8(*this, other); }
+
+    // Order-specific operations
+    simdjson_inline simd8<uint8_t> max_val(const simd8<uint8_t> other) const { return _mm512_max_epu8(*this, other); }
+    simdjson_inline simd8<uint8_t> min_val(const simd8<uint8_t> other) const { return _mm512_min_epu8(other, *this); }
+    // Same as >, but only guarantees true is nonzero (< guarantees true = -1)
+    simdjson_inline simd8<uint8_t> gt_bits(const simd8<uint8_t> other) const { return this->saturating_sub(other); }
+    // Same as <, but only guarantees true is nonzero (< guarantees true = -1)
+    simdjson_inline simd8<uint8_t> lt_bits(const simd8<uint8_t> other) const { return other.saturating_sub(*this); }
+    simdjson_inline uint64_t operator<=(const simd8<uint8_t> other) const { return other.max_val(*this) == other; }
+    simdjson_inline uint64_t operator>=(const simd8<uint8_t> other) const { return other.min_val(*this) == other; }
+    simdjson_inline simd8<bool> operator>(const simd8<uint8_t> other) const { return this->gt_bits(other).any_bits_set(); }
+    simdjson_inline simd8<bool> operator<(const simd8<uint8_t> other) const { return this->lt_bits(other).any_bits_set(); }
+
+    // Bit-specific operations
+    simdjson_inline simd8<bool> bits_not_set() const { return _mm512_mask_blend_epi8(*this == uint8_t(0), _mm512_set1_epi8(0), _mm512_set1_epi8(-1)); }
+    simdjson_inline simd8<bool> bits_not_set(simd8<uint8_t> bits) const { return (*this & bits).bits_not_set(); }
+    simdjson_inline simd8<bool> any_bits_set() const { return ~this->bits_not_set(); }
+    simdjson_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const { return ~this->bits_not_set(bits); }
+
+    simdjson_inline bool is_ascii() const { return _mm512_movepi8_mask(*this) == 0; }
+    simdjson_inline bool bits_not_set_anywhere() const {
+      return !_mm512_test_epi8_mask(*this, *this);
+    }
+    simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); }
+    simdjson_inline bool bits_not_set_anywhere(simd8<uint8_t> bits) const { return !_mm512_test_epi8_mask(*this, bits); }
+    simdjson_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const { return !bits_not_set_anywhere(bits); }
+    template<int N>
+    simdjson_inline simd8<uint8_t> shr() const { return simd8<uint8_t>(_mm512_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); }
+    template<int N>
+    simdjson_inline simd8<uint8_t> shl() const { return simd8<uint8_t>(_mm512_slli_epi16(*this, N)) & uint8_t(0xFFu << N); }
+    // Get one of the bits and make a bitmask out of it.
+    // e.g. value.get_bit<7>() gets the high bit
+    template<int N>
+    simdjson_inline uint64_t get_bit() const { return _mm512_movepi8_mask(_mm512_slli_epi16(*this, 7-N)); }
+  };
+
+  template<typename T>
+  struct simd8x64 {
+    static constexpr int NUM_CHUNKS = 64 / sizeof(simd8<T>);
+    static_assert(NUM_CHUNKS == 1, "Icelake kernel should use one register per 64-byte block.");
+    const simd8<T> chunks[NUM_CHUNKS];
+
+    simd8x64(const simd8x64<T>& o) = delete; // no copy allowed
+    simd8x64<T>& operator=(const simd8<T>& other) = delete; // no assignment allowed
+    simd8x64() = delete; // no default constructor allowed
+
+    simdjson_inline simd8x64(const simd8<T> chunk0, const simd8<T> chunk1) : chunks{chunk0, chunk1} {}
+    simdjson_inline simd8x64(const simd8<T> chunk0) : chunks{chunk0} {}
+    simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8<T>::load(ptr)} {}
+
+    simdjson_inline uint64_t compress(uint64_t mask, T * output) const {
+      this->chunks[0].compress(mask, output);
+      return 64 - count_ones(mask);
+    }
+
+    simdjson_inline void store(T ptr[64]) const {
+      this->chunks[0].store(ptr+sizeof(simd8<T>)*0);
+    }
+
+    simdjson_inline simd8<T> reduce_or() const {
+      return this->chunks[0];
+    }
+
+    simdjson_inline simd8x64<T> bit_or(const T m) const {
+      const simd8<T> mask = simd8<T>::splat(m);
+      return simd8x64<T>(
+        this->chunks[0] | mask
+      );
+    }
+
+    simdjson_inline uint64_t eq(const T m) const {
+      const simd8<T> mask = simd8<T>::splat(m);
+      return this->chunks[0] == mask;
+    }
+
+    simdjson_inline uint64_t eq(const simd8x64<uint8_t> &other) const {
+      return this->chunks[0] == other.chunks[0];
+    }
+
+    simdjson_inline uint64_t lteq(const T m) const {
+      const simd8<T> mask = simd8<T>::splat(m);
+      return this->chunks[0] <= mask;
+    }
+  }; // struct simd8x64<T>
+
+} // namespace simd
+
+} // unnamed namespace
+} // namespace icelake
+} // namespace simdjson
+
+#endif // SIMDJSON_ICELAKE_SIMD_H
+/* end file include/simdjson/icelake/simd.h */
+/* begin file include/simdjson/generic/jsoncharutils.h */
+
+namespace simdjson {
+namespace icelake {
+namespace {
+namespace jsoncharutils {
+
+// return non-zero if not a structural or whitespace char
+// zero otherwise
+simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) {
+  return internal::structural_or_whitespace_negated[c];
+}
+
+simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) {
+  return internal::structural_or_whitespace[c];
+}
+
+// returns a value with the high 16 bits set if not valid
+// otherwise returns the conversion of the 4 hex digits at src into the bottom
+// 16 bits of the 32-bit return register
+//
+// see
+// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/
+static inline uint32_t hex_to_u32_nocheck(
+    const uint8_t *src) { // strictly speaking, static inline is a C-ism
+  uint32_t v1 = internal::digit_to_val32[630 + src[0]];
+  uint32_t v2 = internal::digit_to_val32[420 + src[1]];
+  uint32_t v3 = internal::digit_to_val32[210 + src[2]];
+  uint32_t v4 = internal::digit_to_val32[0 + src[3]];
+  return v1 | v2 | v3 | v4;
+}
+
+// given a code point cp, writes to c
+// the utf-8 code, outputting the length in
+// bytes, if the length is zero, the code point
+// is invalid
+//
+// This can possibly be made faster using pdep
+// and clz and table lookups, but JSON documents
+// have few escaped code points, and the following
+// function looks cheap.
+//
+// Note: we assume that surrogates are treated separately
+//
+simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) {
+  if (cp <= 0x7F) {
+    c[0] = uint8_t(cp);
+    return 1; // ascii
+  }
+  if (cp <= 0x7FF) {
+    c[0] = uint8_t((cp >> 6) + 192);
+    c[1] = uint8_t((cp & 63) + 128);
+    return 2; // universal plane
+    //  Surrogates are treated elsewhere...
+    //} //else if (0xd800 <= cp && cp <= 0xdfff) {
+    //  return 0; // surrogates // could put assert here
+  } else if (cp <= 0xFFFF) {
+    c[0] = uint8_t((cp >> 12) + 224);
+    c[1] = uint8_t(((cp >> 6) & 63) + 128);
+    c[2] = uint8_t((cp & 63) + 128);
+    return 3;
+  } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this
+                               // is not needed
+    c[0] = uint8_t((cp >> 18) + 240);
+    c[1] = uint8_t(((cp >> 12) & 63) + 128);
+    c[2] = uint8_t(((cp >> 6) & 63) + 128);
+    c[3] = uint8_t((cp & 63) + 128);
+    return 4;
+  }
+  // will return 0 when the code point was too large.
+  return 0; // bad r
+}
+
+#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm
+// this is a slow emulation routine for 32-bit
+//
+static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) {
+  return x * (uint64_t)y;
+}
+static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) {
+  uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd);
+  uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd);
+  uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32));
+  uint64_t adbc_carry = !!(adbc < ad);
+  uint64_t lo = bd + (adbc << 32);
+  *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) +
+        (adbc_carry << 32) + !!(lo < bd);
+  return lo;
+}
+#endif
+
+using internal::value128;
+
+simdjson_inline value128 full_multiplication(uint64_t value1, uint64_t value2) {
+  value128 answer;
+#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS)
+#ifdef _M_ARM64
+  // ARM64 has native support for 64-bit multiplications, no need to emultate
+  answer.high = __umulh(value1, value2);
+  answer.low = value1 * value2;
+#else
+  answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64
+#endif // _M_ARM64
+#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS)
+  __uint128_t r = (static_cast<__uint128_t>(value1)) * value2;
+  answer.low = uint64_t(r);
+  answer.high = uint64_t(r >> 64);
+#endif
+  return answer;
+}
+
+} // namespace jsoncharutils
+} // unnamed namespace
+} // namespace icelake
+} // namespace simdjson
+/* end file include/simdjson/generic/jsoncharutils.h */
+/* begin file include/simdjson/generic/atomparsing.h */
+namespace simdjson {
+namespace icelake {
+namespace {
+/// @private
+namespace atomparsing {
+
+// The string_to_uint32 is exclusively used to map literal strings to 32-bit values.
+// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot
+// be certain that the character pointer will be properly aligned.
+// You might think that using memcpy makes this function expensive, but you'd be wrong.
+// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false");
+// to the compile-time constant 1936482662.
+simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; }
+
+
+// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive.
+// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about.
+simdjson_warn_unused
+simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) {
+  uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++)
+  static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes");
+  std::memcpy(&srcval, src, sizeof(uint32_t));
+  return srcval ^ string_to_uint32(atom);
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_true_atom(const uint8_t *src) {
+  return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0;
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) {
+  if (len > 4) { return is_valid_true_atom(src); }
+  else if (len == 4) { return !str4ncmp(src, "true"); }
+  else { return false; }
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_false_atom(const uint8_t *src) {
+  return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0;
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) {
+  if (len > 5) { return is_valid_false_atom(src); }
+  else if (len == 5) { return !str4ncmp(src+1, "alse"); }
+  else { return false; }
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_null_atom(const uint8_t *src) {
+  return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0;
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) {
+  if (len > 4) { return is_valid_null_atom(src); }
+  else if (len == 4) { return !str4ncmp(src, "null"); }
+  else { return false; }
+}
+
+} // namespace atomparsing
+} // unnamed namespace
+} // namespace icelake
+} // namespace simdjson
+/* end file include/simdjson/generic/atomparsing.h */
+/* begin file include/simdjson/icelake/stringparsing.h */
+#ifndef SIMDJSON_ICELAKE_STRINGPARSING_H
+#define SIMDJSON_ICELAKE_STRINGPARSING_H
+
+
+namespace simdjson {
+namespace icelake {
+namespace {
+
+using namespace simd;
+
+// Holds backslashes and quotes locations.
+struct backslash_and_quote {
+public:
+  static constexpr uint32_t BYTES_PROCESSED = 32;
+  simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst);
+
+  simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; }
+  simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; }
+  simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); }
+  simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); }
+
+  uint64_t bs_bits;
+  uint64_t quote_bits;
+}; // struct backslash_and_quote
+
+simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) {
+  // this can read up to 15 bytes beyond the buffer size, but we require
+  // SIMDJSON_PADDING of padding
+  static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes");
+  simd8<uint8_t> v(src);
+  // store to dest unconditionally - we can overwrite the bits we don't like later
+  v.store(dst);
+  return {
+      static_cast<uint64_t>(v == '\\'), // bs_bits
+      static_cast<uint64_t>(v == '"'), // quote_bits
+  };
+}
+
+} // unnamed namespace
+} // namespace icelake
+} // namespace simdjson
+
+#endif // SIMDJSON_ICELAKE_STRINGPARSING_H
+/* end file include/simdjson/icelake/stringparsing.h */
+/* begin file include/simdjson/icelake/numberparsing.h */
+#ifndef SIMDJSON_ICELAKE_NUMBERPARSING_H
+#define SIMDJSON_ICELAKE_NUMBERPARSING_H
+
+namespace simdjson {
+namespace icelake {
+namespace {
+
+static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) {
+  // this actually computes *16* values so we are being wasteful.
+  const __m128i ascii0 = _mm_set1_epi8('0');
+  const __m128i mul_1_10 =
+      _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1);
+  const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1);
+  const __m128i mul_1_10000 =
+      _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1);
+  const __m128i input = _mm_sub_epi8(
+      _mm_loadu_si128(reinterpret_cast<const __m128i *>(chars)), ascii0);
+  const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10);
+  const __m128i t2 = _mm_madd_epi16(t1, mul_1_100);
+  const __m128i t3 = _mm_packus_epi32(t2, t2);
+  const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000);
+  return _mm_cvtsi128_si32(
+      t4); // only captures the sum of the first 8 digits, drop the rest
+}
+
+} // unnamed namespace
+} // namespace icelake
+} // namespace simdjson
+
+#define SIMDJSON_SWAR_NUMBER_PARSING 1
+
+/* begin file include/simdjson/generic/numberparsing.h */
+#include <limits>
+
+namespace simdjson {
+namespace icelake {
+
+namespace ondemand {
+/**
+ * The type of a JSON number
+ */
+enum class number_type {
+    floating_point_number=1, /// a binary64 number
+    signed_integer,          /// a signed integer that fits in a 64-bit word using two's complement
+    unsigned_integer         /// a positive integer larger or equal to 1<<63
+};
+}
+
+namespace {
+/// @private
+namespace numberparsing {
+
+
+
+#ifdef JSON_TEST_NUMBERS
+#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR)
+#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE)))
+#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE)))
+#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE)))
+#else
+#define INVALID_NUMBER(SRC) (NUMBER_ERROR)
+#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE))
+#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE))
+#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE))
+#endif
+
+namespace {
+// Convert a mantissa, an exponent and a sign bit into an ieee64 double.
+// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable).
+// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed.
+simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) {
+    double d;
+    mantissa &= ~(1ULL << 52);
+    mantissa |= real_exponent << 52;
+    mantissa |= ((static_cast<uint64_t>(negative)) << 63);
+    std::memcpy(&d, &mantissa, sizeof(d));
+    return d;
+}
+}
+// Attempts to compute i * 10^(power) exactly; and if "negative" is
+// true, negate the result.
+// This function will only work in some cases, when it does not work, success is
+// set to false. This should work *most of the time* (like 99% of the time).
+// We assume that power is in the [smallest_power,
+// largest_power] interval: the caller is responsible for this check.
+simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) {
+  // we start with a fast path
+  // It was described in
+  // Clinger WD. How to read floating point numbers accurately.
+  // ACM SIGPLAN Notices. 1990
+#ifndef FLT_EVAL_METHOD
+#error "FLT_EVAL_METHOD should be defined, please include cfloat."
+#endif
+#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
+  // We cannot be certain that x/y is rounded to nearest.
+  if (0 <= power && power <= 22 && i <= 9007199254740991) {
+#else
+  if (-22 <= power && power <= 22 && i <= 9007199254740991) {
+#endif
+    // convert the integer into a double. This is lossless since
+    // 0 <= i <= 2^53 - 1.
+    d = double(i);
+    //
+    // The general idea is as follows.
+    // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then
+    // 1) Both s and p can be represented exactly as 64-bit floating-point
+    // values
+    // (binary64).
+    // 2) Because s and p can be represented exactly as floating-point values,
+    // then s * p
+    // and s / p will produce correctly rounded values.
+    //
+    if (power < 0) {
+      d = d / simdjson::internal::power_of_ten[-power];
+    } else {
+      d = d * simdjson::internal::power_of_ten[power];
+    }
+    if (negative) {
+      d = -d;
+    }
+    return true;
+  }
+  // When 22 < power && power <  22 + 16, we could
+  // hope for another, secondary fast path.  It was
+  // described by David M. Gay in  "Correctly rounded
+  // binary-decimal and decimal-binary conversions." (1990)
+  // If you need to compute i * 10^(22 + x) for x < 16,
+  // first compute i * 10^x, if you know that result is exact
+  // (e.g., when i * 10^x < 2^53),
+  // then you can still proceed and do (i * 10^x) * 10^22.
+  // Is this worth your time?
+  // You need  22 < power *and* power <  22 + 16 *and* (i * 10^(x-22) < 2^53)
+  // for this second fast path to work.
+  // If you you have 22 < power *and* power <  22 + 16, and then you
+  // optimistically compute "i * 10^(x-22)", there is still a chance that you
+  // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of
+  // this optimization maybe less common than we would like. Source:
+  // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/
+  // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html
+
+  // The fast path has now failed, so we are failing back on the slower path.
+
+  // In the slow path, we need to adjust i so that it is > 1<<63 which is always
+  // possible, except if i == 0, so we handle i == 0 separately.
+  if(i == 0) {
+    d = negative ? -0.0 : 0.0;
+    return true;
+  }
+
+
+  // The exponent is 1024 + 63 + power
+  //     + floor(log(5**power)/log(2)).
+  // The 1024 comes from the ieee64 standard.
+  // The 63 comes from the fact that we use a 64-bit word.
+  //
+  // Computing floor(log(5**power)/log(2)) could be
+  // slow. Instead we use a fast function.
+  //
+  // For power in (-400,350), we have that
+  // (((152170 + 65536) * power ) >> 16);
+  // is equal to
+  //  floor(log(5**power)/log(2)) + power when power >= 0
+  // and it is equal to
+  //  ceil(log(5**-power)/log(2)) + power when power < 0
+  //
+  // The 65536 is (1<<16) and corresponds to
+  // (65536 * power) >> 16 ---> power
+  //
+  // ((152170 * power ) >> 16) is equal to
+  // floor(log(5**power)/log(2))
+  //
+  // Note that this is not magic: 152170/(1<<16) is
+  // approximatively equal to log(5)/log(2).
+  // The 1<<16 value is a power of two; we could use a
+  // larger power of 2 if we wanted to.
+  //
+  int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63;
+
+
+  // We want the most significant bit of i to be 1. Shift if needed.
+  int lz = leading_zeroes(i);
+  i <<= lz;
+
+
+  // We are going to need to do some 64-bit arithmetic to get a precise product.
+  // We use a table lookup approach.
+  // It is safe because
+  // power >= smallest_power
+  // and power <= largest_power
+  // We recover the mantissa of the power, it has a leading 1. It is always
+  // rounded down.
+  //
+  // We want the most significant 64 bits of the product. We know
+  // this will be non-zero because the most significant bit of i is
+  // 1.
+  const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power);
+  // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.)
+  //
+  // The full_multiplication function computes the 128-bit product of two 64-bit words
+  // with a returned value of type value128 with a "low component" corresponding to the
+  // 64-bit least significant bits of the product and with a "high component" corresponding
+  // to the 64-bit most significant bits of the product.
+  simdjson::internal::value128 firstproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index]);
+  // Both i and power_of_five_128[index] have their most significant bit set to 1 which
+  // implies that the either the most or the second most significant bit of the product
+  // is 1. We pack values in this manner for efficiency reasons: it maximizes the use
+  // we make of the product. It also makes it easy to reason about the product: there
+  // is 0 or 1 leading zero in the product.
+
+  // Unless the least significant 9 bits of the high (64-bit) part of the full
+  // product are all 1s, then we know that the most significant 55 bits are
+  // exact and no further work is needed. Having 55 bits is necessary because
+  // we need 53 bits for the mantissa but we have to have one rounding bit and
+  // we can waste a bit if the most significant bit of the product is zero.
+  if((firstproduct.high & 0x1FF) == 0x1FF) {
+    // We want to compute i * 5^q, but only care about the top 55 bits at most.
+    // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing
+    // the full computation is wasteful. So we do what is called a "truncated
+    // multiplication".
+    // We take the most significant 64-bits, and we put them in
+    // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q
+    // to the desired approximation using one multiplication. Sometimes it does not suffice.
+    // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and
+    // then we get a better approximation to i * 5^q. In very rare cases, even that
+    // will not suffice, though it is seemingly very hard to find such a scenario.
+    //
+    // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat
+    // more complicated.
+    //
+    // There is an extra layer of complexity in that we need more than 55 bits of
+    // accuracy in the round-to-even scenario.
+    //
+    // The full_multiplication function computes the 128-bit product of two 64-bit words
+    // with a returned value of type value128 with a "low component" corresponding to the
+    // 64-bit least significant bits of the product and with a "high component" corresponding
+    // to the 64-bit most significant bits of the product.
+    simdjson::internal::value128 secondproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]);
+    firstproduct.low += secondproduct.high;
+    if(secondproduct.high > firstproduct.low) { firstproduct.high++; }
+    // At this point, we might need to add at most one to firstproduct, but this
+    // can only change the value of firstproduct.high if firstproduct.low is maximal.
+    if(simdjson_unlikely(firstproduct.low  == 0xFFFFFFFFFFFFFFFF)) {
+      // This is very unlikely, but if so, we need to do much more work!
+      return false;
+    }
+  }
+  uint64_t lower = firstproduct.low;
+  uint64_t upper = firstproduct.high;
+  // The final mantissa should be 53 bits with a leading 1.
+  // We shift it so that it occupies 54 bits with a leading 1.
+  ///////
+  uint64_t upperbit = upper >> 63;
+  uint64_t mantissa = upper >> (upperbit + 9);
+  lz += int(1 ^ upperbit);
+
+  // Here we have mantissa < (1<<54).
+  int64_t real_exponent = exponent - lz;
+  if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal?
+    // Here have that real_exponent <= 0 so -real_exponent >= 0
+    if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure.
+      d = negative ? -0.0 : 0.0;
+      return true;
+    }
+    // next line is safe because -real_exponent + 1 < 0
+    mantissa >>= -real_exponent + 1;
+    // Thankfully, we can't have both "round-to-even" and subnormals because
+    // "round-to-even" only occurs for powers close to 0.
+    mantissa += (mantissa & 1); // round up
+    mantissa >>= 1;
+    // There is a weird scenario where we don't have a subnormal but just.
+    // Suppose we start with 2.2250738585072013e-308, we end up
+    // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal
+    // whereas 0x40000000000000 x 2^-1023-53  is normal. Now, we need to round
+    // up 0x3fffffffffffff x 2^-1023-53  and once we do, we are no longer
+    // subnormal, but we can only know this after rounding.
+    // So we only declare a subnormal if we are smaller than the threshold.
+    real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1;
+    d = to_double(mantissa, real_exponent, negative);
+    return true;
+  }
+  // We have to round to even. The "to even" part
+  // is only a problem when we are right in between two floats
+  // which we guard against.
+  // If we have lots of trailing zeros, we may fall right between two
+  // floating-point values.
+  //
+  // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54]
+  // times a power of two. That is, it is right between a number with binary significand
+  // m and another number with binary significand m+1; and it must be the case
+  // that it cannot be represented by a float itself.
+  //
+  // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p.
+  // Recall that 10^q = 5^q * 2^q.
+  // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that
+  //  5^23 <=  2^54 and it is the last power of five to qualify, so q <= 23.
+  // When q<0, we have  w  >=  (2m+1) x 5^{-q}.  We must have that w<2^{64} so
+  // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have
+  // 2^{53} x 5^{-q} < 2^{64}.
+  // Hence we have 5^{-q} < 2^{11}$ or q>= -4.
+  //
+  // We require lower <= 1 and not lower == 0 because we could not prove that
+  // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test.
+  if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) {
+    if((mantissa  << (upperbit + 64 - 53 - 2)) ==  upper) {
+      mantissa &= ~1;             // flip it so that we do not round up
+    }
+  }
+
+  mantissa += mantissa & 1;
+  mantissa >>= 1;
+
+  // Here we have mantissa < (1<<53), unless there was an overflow
+  if (mantissa >= (1ULL << 53)) {
+    //////////
+    // This will happen when parsing values such as 7.2057594037927933e+16
+    ////////
+    mantissa = (1ULL << 52);
+    real_exponent++;
+  }
+  mantissa &= ~(1ULL << 52);
+  // we have to check that real_exponent is in range, otherwise we bail out
+  if (simdjson_unlikely(real_exponent > 2046)) {
+    // We have an infinite value!!! We could actually throw an error here if we could.
+    return false;
+  }
+  d = to_double(mantissa, real_exponent, negative);
+  return true;
+}
+
+// We call a fallback floating-point parser that might be slow. Note
+// it will accept JSON numbers, but the JSON spec. is more restrictive so
+// before you call parse_float_fallback, you need to have validated the input
+// string with the JSON grammar.
+// It will return an error (false) if the parsed number is infinite.
+// The string parsing itself always succeeds. We know that there is at least
+// one digit.
+static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) {
+  *outDouble = simdjson::internal::from_chars(reinterpret_cast<const char *>(ptr));
+  // We do not accept infinite values.
+
+  // Detecting finite values in a portable manner is ridiculously hard, ideally
+  // we would want to do:
+  // return !std::isfinite(*outDouble);
+  // but that mysteriously fails under legacy/old libc++ libraries, see
+  // https://github.com/simdjson/simdjson/issues/1286
+  //
+  // Therefore, fall back to this solution (the extra parens are there
+  // to handle that max may be a macro on windows).
+  return !(*outDouble > (std::numeric_limits<double>::max)() || *outDouble < std::numeric_limits<double>::lowest());
+}
+static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) {
+  *outDouble = simdjson::internal::from_chars(reinterpret_cast<const char *>(ptr), reinterpret_cast<const char *>(end_ptr));
+  // We do not accept infinite values.
+
+  // Detecting finite values in a portable manner is ridiculously hard, ideally
+  // we would want to do:
+  // return !std::isfinite(*outDouble);
+  // but that mysteriously fails under legacy/old libc++ libraries, see
+  // https://github.com/simdjson/simdjson/issues/1286
+  //
+  // Therefore, fall back to this solution (the extra parens are there
+  // to handle that max may be a macro on windows).
+  return !(*outDouble > (std::numeric_limits<double>::max)() || *outDouble < std::numeric_limits<double>::lowest());
+}
+
+// check quickly whether the next 8 chars are made of digits
+// at a glance, it looks better than Mula's
+// http://0x80.pl/articles/swar-digits-validate.html
+simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) {
+  uint64_t val;
+  // this can read up to 7 bytes beyond the buffer size, but we require
+  // SIMDJSON_PADDING of padding
+  static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7");
+  std::memcpy(&val, chars, 8);
+  // a branchy method might be faster:
+  // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030)
+  //  && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) ==
+  //  0x3030303030303030);
+  return (((val & 0xF0F0F0F0F0F0F0F0) |
+           (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) ==
+          0x3333333333333333);
+}
+
+template<typename W>
+error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) {
+  double d;
+  if (parse_float_fallback(src, &d)) {
+    writer.append_double(d);
+    return SUCCESS;
+  }
+  return INVALID_NUMBER(src);
+}
+
+template<typename I>
+SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later
+simdjson_inline bool parse_digit(const uint8_t c, I &i) {
+  const uint8_t digit = static_cast<uint8_t>(c - '0');
+  if (digit > 9) {
+    return false;
+  }
+  // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication
+  i = 10 * i + digit; // might overflow, we will handle the overflow later
+  return true;
+}
+
+simdjson_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) {
+  // we continue with the fiction that we have an integer. If the
+  // floating point number is representable as x * 10^z for some integer
+  // z that fits in 53 bits, then we will be able to convert back the
+  // the integer into a float in a lossless manner.
+  const uint8_t *const first_after_period = p;
+
+#ifdef SIMDJSON_SWAR_NUMBER_PARSING
+#if SIMDJSON_SWAR_NUMBER_PARSING
+  // this helps if we have lots of decimals!
+  // this turns out to be frequent enough.
+  if (is_made_of_eight_digits_fast(p)) {
+    i = i * 100000000 + parse_eight_digits_unrolled(p);
+    p += 8;
+  }
+#endif // SIMDJSON_SWAR_NUMBER_PARSING
+#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING
+  // Unrolling the first digit makes a small difference on some implementations (e.g. westmere)
+  if (parse_digit(*p, i)) { ++p; }
+  while (parse_digit(*p, i)) { p++; }
+  exponent = first_after_period - p;
+  // Decimal without digits (123.) is illegal
+  if (exponent == 0) {
+    return INVALID_NUMBER(src);
+  }
+  return SUCCESS;
+}
+
+simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) {
+  // Exp Sign: -123.456e[-]78
+  bool neg_exp = ('-' == *p);
+  if (neg_exp || '+' == *p) { p++; } // Skip + as well
+
+  // Exponent: -123.456e-[78]
+  auto start_exp = p;
+  int64_t exp_number = 0;
+  while (parse_digit(*p, exp_number)) { ++p; }
+  // It is possible for parse_digit to overflow.
+  // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN.
+  // Thus we *must* check for possible overflow before we negate exp_number.
+
+  // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into
+  // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may
+  // not oblige and may, in fact, generate two distinct paths in any case. It might be
+  // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off
+  // instructions for a simdjson_likely branch, an unconclusive gain.
+
+  // If there were no digits, it's an error.
+  if (simdjson_unlikely(p == start_exp)) {
+    return INVALID_NUMBER(src);
+  }
+  // We have a valid positive exponent in exp_number at this point, except that
+  // it may have overflowed.
+
+  // If there were more than 18 digits, we may have overflowed the integer. We have to do
+  // something!!!!
+  if (simdjson_unlikely(p > start_exp+18)) {
+    // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow
+    while (*start_exp == '0') { start_exp++; }
+    // 19 digits could overflow int64_t and is kind of absurd anyway. We don't
+    // support exponents smaller than -999,999,999,999,999,999 and bigger
+    // than 999,999,999,999,999,999.
+    // We can truncate.
+    // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before
+    // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could
+    // truncate at 324.
+    // Note that there is no reason to fail per se at this point in time.
+    // E.g., 0e999999999999999999999 is a fine number.
+    if (p > start_exp+18) { exp_number = 999999999999999999; }
+  }
+  // At this point, we know that exp_number is a sane, positive, signed integer.
+  // It is <= 999,999,999,999,999,999. As long as 'exponent' is in
+  // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent'
+  // is bounded in magnitude by the size of the JSON input, we are fine in this universe.
+  // To sum it up: the next line should never overflow.
+  exponent += (neg_exp ? -exp_number : exp_number);
+  return SUCCESS;
+}
+
+simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) {
+  // It is possible that the integer had an overflow.
+  // We have to handle the case where we have 0.0000somenumber.
+  const uint8_t *start = start_digits;
+  while ((*start == '0') || (*start == '.')) { ++start; }
+  // we over-decrement by one when there is a '.'
+  return digit_count - size_t(start - start_digits);
+}
+
+template<typename W>
+simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) {
+  // If we frequently had to deal with long strings of digits,
+  // we could extend our code by using a 128-bit integer instead
+  // of a 64-bit integer. However, this is uncommon in practice.
+  //
+  // 9999999999999999999 < 2**64 so we can accommodate 19 digits.
+  // If we have a decimal separator, then digit_count - 1 is the number of digits, but we
+  // may not have a decimal separator!
+  if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) {
+    // Ok, chances are good that we had an overflow!
+    // this is almost never going to get called!!!
+    // we start anew, going slowly!!!
+    // This will happen in the following examples:
+    // 10000000000000000000000000000000000000000000e+308
+    // 3.1415926535897932384626433832795028841971693993751
+    //
+    // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens
+    // because slow_float_parsing is a non-inlined function. If we passed our writer reference to
+    // it, it would force it to be stored in memory, preventing the compiler from picking it apart
+    // and putting into registers. i.e. if we pass it as reference, it gets slow.
+    // This is what forces the skip_double, as well.
+    error_code error = slow_float_parsing(src, writer);
+    writer.skip_double();
+    return error;
+  }
+  // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other
+  // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331
+  // To future reader: we'd love if someone found a better way, or at least could explain this result!
+  if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) {
+    //
+    // Important: smallest_power is such that it leads to a zero value.
+    // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero
+    // so something x 10^-343 goes to zero, but not so with  something x 10^-342.
+    static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough");
+    //
+    if((exponent < simdjson::internal::smallest_power) || (i == 0)) {
+      // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero
+      WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer);
+      return SUCCESS;
+    } else { // (exponent > largest_power) and (i != 0)
+      // We have, for sure, an infinite value and simdjson refuses to parse infinite values.
+      return INVALID_NUMBER(src);
+    }
+  }
+  double d;
+  if (!compute_float_64(exponent, i, negative, d)) {
+    // we are almost never going to get here.
+    if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); }
+  }
+  WRITE_DOUBLE(d, src, writer);
+  return SUCCESS;
+}
+
+// for performance analysis, it is sometimes  useful to skip parsing
+#ifdef SIMDJSON_SKIPNUMBERPARSING
+
+template<typename W>
+simdjson_inline error_code parse_number(const uint8_t *const, W &writer) {
+  writer.append_s64(0);        // always write zero
+  return SUCCESS;              // always succeeds
+}
+
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer_in_string(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<double> parse_double_in_string(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept  { return false; }
+simdjson_unused simdjson_inline simdjson_result<bool> is_integer(const uint8_t * src) noexcept  { return false; }
+simdjson_unused simdjson_inline simdjson_result<ondemand::number_type> get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; }
+#else
+
+// parse the number at src
+// define JSON_TEST_NUMBERS for unit testing
+//
+// It is assumed that the number is followed by a structural ({,},],[) character
+// or a white space character. If that is not the case (e.g., when the JSON
+// document is made of a single number), then it is necessary to copy the
+// content and append a space before calling this function.
+//
+// Our objective is accurate parsing (ULP of 0) at high speed.
+template<typename W>
+simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) {
+
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  const uint8_t *p = src + uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); }
+
+  //
+  // Handle floats if there is a . or e (or both)
+  //
+  int64_t exponent = 0;
+  bool is_float = false;
+  if ('.' == *p) {
+    is_float = true;
+    ++p;
+    SIMDJSON_TRY( parse_decimal(src, p, i, exponent) );
+    digit_count = int(p - start_digits); // used later to guard against overflows
+  }
+  if (('e' == *p) || ('E' == *p)) {
+    is_float = true;
+    ++p;
+    SIMDJSON_TRY( parse_exponent(src, p, exponent) );
+  }
+  if (is_float) {
+    const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p);
+    SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) );
+    if (dirty_end) { return INVALID_NUMBER(src); }
+    return SUCCESS;
+  }
+
+  // The longest negative 64-bit number is 19 digits.
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  size_t longest_digit_count = negative ? 19 : 20;
+  if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); }
+  if (digit_count == longest_digit_count) {
+    if (negative) {
+      // Anything negative above INT64_MAX+1 is invalid
+      if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src);  }
+      WRITE_INTEGER(~i+1, src, writer);
+      if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); }
+      return SUCCESS;
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    }  else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); }
+  }
+
+  // Write unsigned if it doesn't fit in a signed integer.
+  if (i > uint64_t(INT64_MAX)) {
+    WRITE_UNSIGNED(i, src, writer);
+  } else {
+    WRITE_INTEGER(negative ? (~i+1) : i, src, writer);
+  }
+  if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); }
+  return SUCCESS;
+}
+
+// Inlineable functions
+namespace {
+
+// This table can be used to characterize the final character of an integer
+// string. For JSON structural character and allowable white space characters,
+// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise
+// we return NUMBER_ERROR.
+// Optimization note: we could easily reduce the size of the table by half (to 128)
+// at the cost of an extra branch.
+// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits):
+static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast");
+static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast");
+static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast");
+
+const uint8_t integer_string_finisher[256] = {
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, SUCCESS,
+    SUCCESS,      NUMBER_ERROR,   NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   SUCCESS,      NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, SUCCESS,
+    NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, SUCCESS,        NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    SUCCESS,      NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR};
+
+// Parse any number from 0 to 18,446,744,073,709,551,615
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src) noexcept {
+  const uint8_t *p = src;
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > 20))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+
+  if (digit_count == 20) {
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
+  }
+
+  return i;
+}
+
+
+// Parse any number from 0 to 18,446,744,073,709,551,615
+// Never read at src_end or beyond
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept {
+  const uint8_t *p = src;
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while ((p != src_end) && parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > 20))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+
+  if (digit_count == 20) {
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
+  }
+
+  return i;
+}
+
+// Parse any number from 0 to 18,446,744,073,709,551,615
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned_in_string(const uint8_t * const src) noexcept {
+  const uint8_t *p = src + 1;
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > 20))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if (*p != '"') { return NUMBER_ERROR; }
+
+  if (digit_count == 20) {
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    // Note: we use src[1] and not src[0] because src[0] is the quote character in this
+    // instance.
+    if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
+  }
+
+  return i;
+}
+
+// Parse any number from  -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t *src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  const uint8_t *p = src + uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // We go from
+  // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+  // so we can never represent numbers that have more than 19 digits.
+  size_t longest_digit_count = 19;
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > longest_digit_count))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+  // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
+  // Performance note: This check is only needed when digit_count == longest_digit_count but it is
+  // so cheap that we might as well always make it.
+  if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
+  return negative ? (~i+1) : i;
+}
+
+// Parse any number from  -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+// Never read at src_end or beyond
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept {
+  //
+  // Check for minus sign
+  //
+  if(src == src_end) { return NUMBER_ERROR; }
+  bool negative = (*src == '-');
+  const uint8_t *p = src + uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while ((p != src_end) && parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // We go from
+  // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+  // so we can never represent numbers that have more than 19 digits.
+  size_t longest_digit_count = 19;
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > longest_digit_count))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+  // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
+  // Performance note: This check is only needed when digit_count == longest_digit_count but it is
+  // so cheap that we might as well always make it.
+  if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
+  return negative ? (~i+1) : i;
+}
+
+// Parse any number from  -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer_in_string(const uint8_t *src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*(src + 1) == '-');
+  src += uint8_t(negative) + 1;
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = src;
+  uint64_t i = 0;
+  while (parse_digit(*src, i)) { src++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(src - start_digits);
+  // We go from
+  // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+  // so we can never represent numbers that have more than 19 digits.
+  size_t longest_digit_count = 19;
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > longest_digit_count))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*src)) {
+  //  return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if(*src != '"') { return NUMBER_ERROR; }
+  // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
+  // Performance note: This check is only needed when digit_count == longest_digit_count but it is
+  // so cheap that we might as well always make it.
+  if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
+  return negative ? (~i+1) : i;
+}
+
+simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  uint64_t i = 0;
+  const uint8_t *p = src;
+  p += parse_digit(*p, i);
+  bool leading_zero = (i == 0);
+  while (parse_digit(*p, i)) { p++; }
+  // no integer digits, or 0123 (zero must be solo)
+  if ( p == src ) { return INCORRECT_TYPE; }
+  if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; }
+
+  //
+  // Parse the decimal part.
+  //
+  int64_t exponent = 0;
+  bool overflow;
+  if (simdjson_likely(*p == '.')) {
+    p++;
+    const uint8_t *start_decimal_digits = p;
+    if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits
+    p++;
+    while (parse_digit(*p, i)) { p++; }
+    exponent = -(p - start_decimal_digits);
+
+    // Overflow check. More than 19 digits (minus the decimal) may be overflow.
+    overflow = p-src-1 > 19;
+    if (simdjson_unlikely(overflow && leading_zero)) {
+      // Skip leading 0.00000 and see if it still overflows
+      const uint8_t *start_digits = src + 2;
+      while (*start_digits == '0') { start_digits++; }
+      overflow = start_digits-src > 19;
+    }
+  } else {
+    overflow = p-src > 19;
+  }
+
+  //
+  // Parse the exponent
+  //
+  if (*p == 'e' || *p == 'E') {
+    p++;
+    bool exp_neg = *p == '-';
+    p += exp_neg || *p == '+';
+
+    uint64_t exp = 0;
+    const uint8_t *start_exp_digits = p;
+    while (parse_digit(*p, exp)) { p++; }
+    // no exp digits, or 20+ exp digits
+    if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }
+
+    exponent += exp_neg ? 0-exp : exp;
+  }
+
+  if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; }
+
+  overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power;
+
+  //
+  // Assemble (or slow-parse) the float
+  //
+  double d;
+  if (simdjson_likely(!overflow)) {
+    if (compute_float_64(exponent, i, negative, d)) { return d; }
+  }
+  if (!parse_float_fallback(src - uint8_t(negative), &d)) {
+    return NUMBER_ERROR;
+  }
+  return d;
+}
+
+simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept {
+  return (*src == '-');
+}
+
+simdjson_unused simdjson_inline simdjson_result<bool> is_integer(const uint8_t * src) noexcept {
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+  const uint8_t *p = src;
+  while(static_cast<uint8_t>(*p - '0') <= 9) { p++; }
+  if ( p == src ) { return NUMBER_ERROR; }
+  if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; }
+  return false;
+}
+
+simdjson_unused simdjson_inline simdjson_result<ondemand::number_type> get_number_type(const uint8_t * src) noexcept {
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+  const uint8_t *p = src;
+  while(static_cast<uint8_t>(*p - '0') <= 9) { p++; }
+  if ( p == src ) { return NUMBER_ERROR; }
+  if (jsoncharutils::is_structural_or_whitespace(*p)) {
+    // We have an integer.
+    // If the number is negative and valid, it must be a signed integer.
+    if(negative) { return ondemand::number_type::signed_integer; }
+    // We want values larger or equal to 9223372036854775808 to be unsigned
+    // integers, and the other values to be signed integers.
+    int digit_count = int(p - src);
+    if(digit_count >= 19) {
+      const uint8_t * smaller_big_integer = reinterpret_cast<const uint8_t *>("9223372036854775808");
+      if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) {
+        return ondemand::number_type::unsigned_integer;
+      }
+    }
+    return ondemand::number_type::signed_integer;
+  }
+  // Hopefully, we have 'e' or 'E' or '.'.
+  return ondemand::number_type::floating_point_number;
+}
+
+// Never read at src_end or beyond
+simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept {
+  if(src == src_end) { return NUMBER_ERROR; }
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  uint64_t i = 0;
+  const uint8_t *p = src;
+  if(p == src_end) { return NUMBER_ERROR; }
+  p += parse_digit(*p, i);
+  bool leading_zero = (i == 0);
+  while ((p != src_end) && parse_digit(*p, i)) { p++; }
+  // no integer digits, or 0123 (zero must be solo)
+  if ( p == src ) { return INCORRECT_TYPE; }
+  if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; }
+
+  //
+  // Parse the decimal part.
+  //
+  int64_t exponent = 0;
+  bool overflow;
+  if (simdjson_likely((p != src_end) && (*p == '.'))) {
+    p++;
+    const uint8_t *start_decimal_digits = p;
+    if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits
+    p++;
+    while ((p != src_end) && parse_digit(*p, i)) { p++; }
+    exponent = -(p - start_decimal_digits);
+
+    // Overflow check. More than 19 digits (minus the decimal) may be overflow.
+    overflow = p-src-1 > 19;
+    if (simdjson_unlikely(overflow && leading_zero)) {
+      // Skip leading 0.00000 and see if it still overflows
+      const uint8_t *start_digits = src + 2;
+      while (*start_digits == '0') { start_digits++; }
+      overflow = start_digits-src > 19;
+    }
+  } else {
+    overflow = p-src > 19;
+  }
+
+  //
+  // Parse the exponent
+  //
+  if ((p != src_end) && (*p == 'e' || *p == 'E')) {
+    p++;
+    if(p == src_end) { return NUMBER_ERROR; }
+    bool exp_neg = *p == '-';
+    p += exp_neg || *p == '+';
+
+    uint64_t exp = 0;
+    const uint8_t *start_exp_digits = p;
+    while ((p != src_end) && parse_digit(*p, exp)) { p++; }
+    // no exp digits, or 20+ exp digits
+    if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }
+
+    exponent += exp_neg ? 0-exp : exp;
+  }
+
+  if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; }
+
+  overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power;
+
+  //
+  // Assemble (or slow-parse) the float
+  //
+  double d;
+  if (simdjson_likely(!overflow)) {
+    if (compute_float_64(exponent, i, negative, d)) { return d; }
+  }
+  if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) {
+    return NUMBER_ERROR;
+  }
+  return d;
+}
+
+simdjson_unused simdjson_inline simdjson_result<double> parse_double_in_string(const uint8_t * src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*(src + 1) == '-');
+  src += uint8_t(negative) + 1;
+
+  //
+  // Parse the integer part.
+  //
+  uint64_t i = 0;
+  const uint8_t *p = src;
+  p += parse_digit(*p, i);
+  bool leading_zero = (i == 0);
+  while (parse_digit(*p, i)) { p++; }
+  // no integer digits, or 0123 (zero must be solo)
+  if ( p == src ) { return INCORRECT_TYPE; }
+  if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; }
+
+  //
+  // Parse the decimal part.
+  //
+  int64_t exponent = 0;
+  bool overflow;
+  if (simdjson_likely(*p == '.')) {
+    p++;
+    const uint8_t *start_decimal_digits = p;
+    if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits
+    p++;
+    while (parse_digit(*p, i)) { p++; }
+    exponent = -(p - start_decimal_digits);
+
+    // Overflow check. More than 19 digits (minus the decimal) may be overflow.
+    overflow = p-src-1 > 19;
+    if (simdjson_unlikely(overflow && leading_zero)) {
+      // Skip leading 0.00000 and see if it still overflows
+      const uint8_t *start_digits = src + 2;
+      while (*start_digits == '0') { start_digits++; }
+      overflow = start_digits-src > 19;
+    }
+  } else {
+    overflow = p-src > 19;
+  }
+
+  //
+  // Parse the exponent
+  //
+  if (*p == 'e' || *p == 'E') {
+    p++;
+    bool exp_neg = *p == '-';
+    p += exp_neg || *p == '+';
+
+    uint64_t exp = 0;
+    const uint8_t *start_exp_digits = p;
+    while (parse_digit(*p, exp)) { p++; }
+    // no exp digits, or 20+ exp digits
+    if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }
+
+    exponent += exp_neg ? 0-exp : exp;
+  }
+
+  if (*p != '"') { return NUMBER_ERROR; }
+
+  overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power;
+
+  //
+  // Assemble (or slow-parse) the float
+  //
+  double d;
+  if (simdjson_likely(!overflow)) {
+    if (compute_float_64(exponent, i, negative, d)) { return d; }
+  }
+  if (!parse_float_fallback(src - uint8_t(negative), &d)) {
+    return NUMBER_ERROR;
+  }
+  return d;
+}
+} //namespace {}
+#endif // SIMDJSON_SKIPNUMBERPARSING
+
+} // namespace numberparsing
+} // unnamed namespace
+} // namespace icelake
+} // namespace simdjson
+/* end file include/simdjson/generic/numberparsing.h */
+
+#endif // SIMDJSON_ICELAKE_NUMBERPARSING_H
+/* end file include/simdjson/icelake/numberparsing.h */
+/* begin file include/simdjson/icelake/end.h */
+SIMDJSON_UNTARGET_ICELAKE
+/* end file include/simdjson/icelake/end.h */
+
+#endif // SIMDJSON_IMPLEMENTATION_ICELAKE
+#endif // SIMDJSON_ICELAKE_H
+/* end file include/simdjson/icelake.h */
+/* begin file include/simdjson/haswell.h */
+#ifndef SIMDJSON_HASWELL_H
+#define SIMDJSON_HASWELL_H
+
+
+#if SIMDJSON_IMPLEMENTATION_HASWELL
+
+#if SIMDJSON_CAN_ALWAYS_RUN_HASWELL
+#define SIMDJSON_TARGET_HASWELL
+#define SIMDJSON_UNTARGET_HASWELL
+#else
+#define SIMDJSON_TARGET_HASWELL SIMDJSON_TARGET_REGION("avx2,bmi,pclmul,lzcnt")
+#define SIMDJSON_UNTARGET_HASWELL SIMDJSON_UNTARGET_REGION
+#endif
+
+namespace simdjson {
+/**
+ * Implementation for Haswell (Intel AVX2).
+ */
+namespace haswell {
+} // namespace haswell
+} // namespace simdjson
+
+//
+// These two need to be included outside SIMDJSON_TARGET_HASWELL
+//
+/* begin file include/simdjson/haswell/implementation.h */
+#ifndef SIMDJSON_HASWELL_IMPLEMENTATION_H
+#define SIMDJSON_HASWELL_IMPLEMENTATION_H
+
+
+// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_HASWELL
+namespace simdjson {
+namespace haswell {
+
+using namespace simdjson;
+
+class implementation final : public simdjson::implementation {
+public:
+  simdjson_inline implementation() : simdjson::implementation(
+      "haswell",
+      "Intel/AMD AVX2",
+      internal::instruction_set::AVX2 | internal::instruction_set::PCLMULQDQ | internal::instruction_set::BMI1 | internal::instruction_set::BMI2
+  ) {}
+  simdjson_warn_unused error_code create_dom_parser_implementation(
+    size_t capacity,
+    size_t max_length,
+    std::unique_ptr<internal::dom_parser_implementation>& dst
+  ) const noexcept final;
+  simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final;
+  simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final;
+};
+
+} // namespace haswell
+} // namespace simdjson
+
+#endif // SIMDJSON_HASWELL_IMPLEMENTATION_H
+/* end file include/simdjson/haswell/implementation.h */
+/* begin file include/simdjson/haswell/intrinsics.h */
+#ifndef SIMDJSON_HASWELL_INTRINSICS_H
+#define SIMDJSON_HASWELL_INTRINSICS_H
+
+
+#ifdef SIMDJSON_VISUAL_STUDIO
+// under clang within visual studio, this will include <x86intrin.h>
+#include <intrin.h>  // visual studio or clang
+#else
+#include <x86intrin.h> // elsewhere
+#endif // SIMDJSON_VISUAL_STUDIO
+
+#ifdef SIMDJSON_CLANG_VISUAL_STUDIO
+/**
+ * You are not supposed, normally, to include these
+ * headers directly. Instead you should either include intrin.h
+ * or x86intrin.h. However, when compiling with clang
+ * under Windows (i.e., when _MSC_VER is set), these headers
+ * only get included *if* the corresponding features are detected
+ * from macros:
+ * e.g., if __AVX2__ is set... in turn,  we normally set these
+ * macros by compiling against the corresponding architecture
+ * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole
+ * software with these advanced instructions. In simdjson, we
+ * want to compile the whole program for a generic target,
+ * and only target our specific kernels. As a workaround,
+ * we directly include the needed headers. These headers would
+ * normally guard against such usage, but we carefully included
+ * <x86intrin.h>  (or <intrin.h>) before, so the headers
+ * are fooled.
+ */
+#include <bmiintrin.h>   // for _blsr_u64
+#include <lzcntintrin.h> // for  __lzcnt64
+#include <immintrin.h>   // for most things (AVX2, AVX512, _popcnt64)
+#include <smmintrin.h>
+#include <tmmintrin.h>
+#include <avxintrin.h>
+#include <avx2intrin.h>
+#include <wmmintrin.h>   // for  _mm_clmulepi64_si128
+// unfortunately, we may not get _blsr_u64, but, thankfully, clang
+// has it as a macro.
+#ifndef _blsr_u64
+// we roll our own
+#define _blsr_u64(n) ((n - 1) & n)
+#endif //  _blsr_u64
+#endif // SIMDJSON_CLANG_VISUAL_STUDIO
+
+static_assert(sizeof(__m256i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for haswell kernel.");
+
+#endif // SIMDJSON_HASWELL_INTRINSICS_H
+/* end file include/simdjson/haswell/intrinsics.h */
+
+//
+// The rest need to be inside the region
+//
+/* begin file include/simdjson/haswell/begin.h */
+// redefining SIMDJSON_IMPLEMENTATION to "haswell"
+// #define SIMDJSON_IMPLEMENTATION haswell
+SIMDJSON_TARGET_HASWELL
+/* end file include/simdjson/haswell/begin.h */
+
+// Declarations
+/* begin file include/simdjson/generic/dom_parser_implementation.h */
+
+namespace simdjson {
+namespace haswell {
+
+// expectation: sizeof(open_container) = 64/8.
+struct open_container {
+  uint32_t tape_index; // where, on the tape, does the scope ([,{) begins
+  uint32_t count; // how many elements in the scope
+}; // struct open_container
+
+static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits");
+
+class dom_parser_implementation final : public internal::dom_parser_implementation {
+public:
+  /** Tape location of each open { or [ */
+  std::unique_ptr<open_container[]> open_containers{};
+  /** Whether each open container is a [ or { */
+  std::unique_ptr<bool[]> is_array{};
+  /** Buffer passed to stage 1 */
+  const uint8_t *buf{};
+  /** Length passed to stage 1 */
+  size_t len{0};
+  /** Document passed to stage 2 */
+  dom::document *doc{};
+
+  inline dom_parser_implementation() noexcept;
+  inline dom_parser_implementation(dom_parser_implementation &&other) noexcept;
+  inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept;
+  dom_parser_implementation(const dom_parser_implementation &) = delete;
+  dom_parser_implementation &operator=(const dom_parser_implementation &) = delete;
+
+  simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final;
+  simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final;
+  simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final;
+  simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final;
+  simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst) const noexcept final;
+  inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final;
+  inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final;
+private:
+  simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity);
+
+};
+
+} // namespace haswell
+} // namespace simdjson
+
+namespace simdjson {
+namespace haswell {
+
+inline dom_parser_implementation::dom_parser_implementation() noexcept = default;
+inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default;
+inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default;
+
+// Leaving these here so they can be inlined if so desired
+inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept {
+  if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; }
+  // Stage 1 index output
+  size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7;
+  structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] );
+  if (!structural_indexes) { _capacity = 0; return MEMALLOC; }
+  structural_indexes[0] = 0;
+  n_structural_indexes = 0;
+
+  _capacity = capacity;
+  return SUCCESS;
+}
+
+inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept {
+  // Stage 2 stacks
+  open_containers.reset(new (std::nothrow) open_container[max_depth]);
+  is_array.reset(new (std::nothrow) bool[max_depth]);
+  if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; }
+
+  _max_depth = max_depth;
+  return SUCCESS;
+}
+
+} // namespace haswell
+} // namespace simdjson
+/* end file include/simdjson/generic/dom_parser_implementation.h */
+/* begin file include/simdjson/haswell/bitmanipulation.h */
+#ifndef SIMDJSON_HASWELL_BITMANIPULATION_H
+#define SIMDJSON_HASWELL_BITMANIPULATION_H
+
+namespace simdjson {
+namespace haswell {
+namespace {
+
+// We sometimes call trailing_zero on inputs that are zero,
+// but the algorithms do not end up using the returned value.
+// Sadly, sanitizers are not smart enough to figure it out.
+SIMDJSON_NO_SANITIZE_UNDEFINED
+simdjson_inline int trailing_zeroes(uint64_t input_num) {
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+  return (int)_tzcnt_u64(input_num);
+#else // SIMDJSON_REGULAR_VISUAL_STUDIO
+  ////////
+  // You might expect the next line to be equivalent to
+  // return (int)_tzcnt_u64(input_num);
+  // but the generated code differs and might be less efficient?
+  ////////
+  return __builtin_ctzll(input_num);
+#endif // SIMDJSON_REGULAR_VISUAL_STUDIO
+}
+
+/* result might be undefined when input_num is zero */
+simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) {
+  return _blsr_u64(input_num);
+}
+
+/* result might be undefined when input_num is zero */
+simdjson_inline int leading_zeroes(uint64_t input_num) {
+  return int(_lzcnt_u64(input_num));
+}
+
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+simdjson_inline unsigned __int64 count_ones(uint64_t input_num) {
+  // note: we do not support legacy 32-bit Windows
+  return __popcnt64(input_num);// Visual Studio wants two underscores
+}
+#else
+simdjson_inline long long int count_ones(uint64_t input_num) {
+  return _popcnt64(input_num);
+}
+#endif
+
+simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2,
+                                uint64_t *result) {
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+  return _addcarry_u64(0, value1, value2,
+                       reinterpret_cast<unsigned __int64 *>(result));
+#else
+  return __builtin_uaddll_overflow(value1, value2,
+                                   reinterpret_cast<unsigned long long *>(result));
+#endif
+}
+
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdjson
+
+#endif // SIMDJSON_HASWELL_BITMANIPULATION_H
+/* end file include/simdjson/haswell/bitmanipulation.h */
+/* begin file include/simdjson/haswell/bitmask.h */
+#ifndef SIMDJSON_HASWELL_BITMASK_H
+#define SIMDJSON_HASWELL_BITMASK_H
+
+namespace simdjson {
+namespace haswell {
+namespace {
+
+//
+// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered.
+//
+// For example, prefix_xor(00100100) == 00011100
+//
+simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) {
+  // There should be no such thing with a processor supporting avx2
+  // but not clmul.
+  __m128i all_ones = _mm_set1_epi8('\xFF');
+  __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0);
+  return _mm_cvtsi128_si64(result);
+}
+
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdjson
+
+#endif // SIMDJSON_HASWELL_BITMASK_H
+/* end file include/simdjson/haswell/bitmask.h */
+/* begin file include/simdjson/haswell/simd.h */
+#ifndef SIMDJSON_HASWELL_SIMD_H
+#define SIMDJSON_HASWELL_SIMD_H
+
+
+namespace simdjson {
+namespace haswell {
+namespace {
+namespace simd {
+
+  // Forward-declared so they can be used by splat and friends.
+  template<typename Child>
+  struct base {
+    __m256i value;
+
+    // Zero constructor
+    simdjson_inline base() : value{__m256i()} {}
+
+    // Conversion from SIMD register
+    simdjson_inline base(const __m256i _value) : value(_value) {}
+
+    // Conversion to SIMD register
+    simdjson_inline operator const __m256i&() const { return this->value; }
+    simdjson_inline operator __m256i&() { return this->value; }
+
+    // Bit operations
+    simdjson_inline Child operator|(const Child other) const { return _mm256_or_si256(*this, other); }
+    simdjson_inline Child operator&(const Child other) const { return _mm256_and_si256(*this, other); }
+    simdjson_inline Child operator^(const Child other) const { return _mm256_xor_si256(*this, other); }
+    simdjson_inline Child bit_andnot(const Child other) const { return _mm256_andnot_si256(other, *this); }
+    simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast | other; return *this_cast; }
+    simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast & other; return *this_cast; }
+    simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast ^ other; return *this_cast; }
+  };
+
+  // Forward-declared so they can be used by splat and friends.
+  template<typename T>
+  struct simd8;
+
+  template<typename T, typename Mask=simd8<bool>>
+  struct base8: base<simd8<T>> {
+    typedef uint32_t bitmask_t;
+    typedef uint64_t bitmask2_t;
+
+    simdjson_inline base8() : base<simd8<T>>() {}
+    simdjson_inline base8(const __m256i _value) : base<simd8<T>>(_value) {}
+
+    friend simdjson_really_inline Mask operator==(const simd8<T> lhs, const simd8<T> rhs) { return _mm256_cmpeq_epi8(lhs, rhs); }
+
+    static const int SIZE = sizeof(base<T>::value);
+
+    template<int N=1>
+    simdjson_inline simd8<T> prev(const simd8<T> prev_chunk) const {
+      return _mm256_alignr_epi8(*this, _mm256_permute2x128_si256(prev_chunk, *this, 0x21), 16 - N);
+    }
+  };
+
+  // SIMD byte mask type (returned by things like eq and gt)
+  template<>
+  struct simd8<bool>: base8<bool> {
+    static simdjson_inline simd8<bool> splat(bool _value) { return _mm256_set1_epi8(uint8_t(-(!!_value))); }
+
+    simdjson_inline simd8<bool>() : base8() {}
+    simdjson_inline simd8<bool>(const __m256i _value) : base8<bool>(_value) {}
+    // Splat constructor
+    simdjson_inline simd8<bool>(bool _value) : base8<bool>(splat(_value)) {}
+
+    simdjson_inline int to_bitmask() const { return _mm256_movemask_epi8(*this); }
+    simdjson_inline bool any() const { return !_mm256_testz_si256(*this, *this); }
+    simdjson_inline simd8<bool> operator~() const { return *this ^ true; }
+  };
+
+  template<typename T>
+  struct base8_numeric: base8<T> {
+    static simdjson_inline simd8<T> splat(T _value) { return _mm256_set1_epi8(_value); }
+    static simdjson_inline simd8<T> zero() { return _mm256_setzero_si256(); }
+    static simdjson_inline simd8<T> load(const T values[32]) {
+      return _mm256_loadu_si256(reinterpret_cast<const __m256i *>(values));
+    }
+    // Repeat 16 values as many times as necessary (usually for lookup tables)
+    static simdjson_inline simd8<T> repeat_16(
+      T v0,  T v1,  T v2,  T v3,  T v4,  T v5,  T v6,  T v7,
+      T v8,  T v9,  T v10, T v11, T v12, T v13, T v14, T v15
+    ) {
+      return simd8<T>(
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15,
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15
+      );
+    }
+
+    simdjson_inline base8_numeric() : base8<T>() {}
+    simdjson_inline base8_numeric(const __m256i _value) : base8<T>(_value) {}
+
+    // Store to array
+    simdjson_inline void store(T dst[32]) const { return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), *this); }
+
+    // Addition/subtraction are the same for signed and unsigned
+    simdjson_inline simd8<T> operator+(const simd8<T> other) const { return _mm256_add_epi8(*this, other); }
+    simdjson_inline simd8<T> operator-(const simd8<T> other) const { return _mm256_sub_epi8(*this, other); }
+    simdjson_inline simd8<T>& operator+=(const simd8<T> other) { *this = *this + other; return *static_cast<simd8<T>*>(this); }
+    simdjson_inline simd8<T>& operator-=(const simd8<T> other) { *this = *this - other; return *static_cast<simd8<T>*>(this); }
+
+    // Override to distinguish from bool version
+    simdjson_inline simd8<T> operator~() const { return *this ^ 0xFFu; }
+
+    // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values)
+    template<typename L>
+    simdjson_inline simd8<L> lookup_16(simd8<L> lookup_table) const {
+      return _mm256_shuffle_epi8(lookup_table, *this);
+    }
+
+    // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset).
+    // Passing a 0 value for mask would be equivalent to writing out every byte to output.
+    // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes
+    // get written.
+    // Design consideration: it seems like a function with the
+    // signature simd8<L> compress(uint32_t mask) would be
+    // sensible, but the AVX ISA makes this kind of approach difficult.
+    template<typename L>
+    simdjson_inline void compress(uint32_t mask, L * output) const {
+      using internal::thintable_epi8;
+      using internal::BitsSetTable256mul2;
+      using internal::pshufb_combine_table;
+      // this particular implementation was inspired by work done by @animetosho
+      // we do it in four steps, first 8 bytes and then second 8 bytes...
+      uint8_t mask1 = uint8_t(mask); // least significant 8 bits
+      uint8_t mask2 = uint8_t(mask >> 8); // second least significant 8 bits
+      uint8_t mask3 = uint8_t(mask >> 16); // ...
+      uint8_t mask4 = uint8_t(mask >> 24); // ...
+      // next line just loads the 64-bit values thintable_epi8[mask1] and
+      // thintable_epi8[mask2] into a 128-bit register, using only
+      // two instructions on most compilers.
+      __m256i shufmask =  _mm256_set_epi64x(thintable_epi8[mask4], thintable_epi8[mask3],
+        thintable_epi8[mask2], thintable_epi8[mask1]);
+      // we increment by 0x08 the second half of the mask and so forth
+      shufmask =
+      _mm256_add_epi8(shufmask, _mm256_set_epi32(0x18181818, 0x18181818,
+         0x10101010, 0x10101010, 0x08080808, 0x08080808, 0, 0));
+      // this is the version "nearly pruned"
+      __m256i pruned = _mm256_shuffle_epi8(*this, shufmask);
+      // we still need to put the  pieces back together.
+      // we compute the popcount of the first words:
+      int pop1 = BitsSetTable256mul2[mask1];
+      int pop3 = BitsSetTable256mul2[mask3];
+
+      // then load the corresponding mask
+      // could be done with _mm256_loadu2_m128i but many standard libraries omit this intrinsic.
+      __m256i v256 = _mm256_castsi128_si256(
+        _mm_loadu_si128(reinterpret_cast<const __m128i *>(pshufb_combine_table + pop1 * 8)));
+      __m256i compactmask = _mm256_insertf128_si256(v256,
+         _mm_loadu_si128(reinterpret_cast<const __m128i *>(pshufb_combine_table + pop3 * 8)), 1);
+      __m256i almostthere =  _mm256_shuffle_epi8(pruned, compactmask);
+      // We just need to write out the result.
+      // This is the tricky bit that is hard to do
+      // if we want to return a SIMD register, since there
+      // is no single-instruction approach to recombine
+      // the two 128-bit lanes with an offset.
+      __m128i v128;
+      v128 = _mm256_castsi256_si128(almostthere);
+      _mm_storeu_si128( reinterpret_cast<__m128i *>(output), v128);
+      v128 = _mm256_extractf128_si256(almostthere, 1);
+      _mm_storeu_si128( reinterpret_cast<__m128i *>(output + 16 - count_ones(mask & 0xFFFF)), v128);
+    }
+
+    template<typename L>
+    simdjson_inline simd8<L> lookup_16(
+        L replace0,  L replace1,  L replace2,  L replace3,
+        L replace4,  L replace5,  L replace6,  L replace7,
+        L replace8,  L replace9,  L replace10, L replace11,
+        L replace12, L replace13, L replace14, L replace15) const {
+      return lookup_16(simd8<L>::repeat_16(
+        replace0,  replace1,  replace2,  replace3,
+        replace4,  replace5,  replace6,  replace7,
+        replace8,  replace9,  replace10, replace11,
+        replace12, replace13, replace14, replace15
+      ));
+    }
+  };
+
+  // Signed bytes
+  template<>
+  struct simd8<int8_t> : base8_numeric<int8_t> {
+    simdjson_inline simd8() : base8_numeric<int8_t>() {}
+    simdjson_inline simd8(const __m256i _value) : base8_numeric<int8_t>(_value) {}
+    // Splat constructor
+    simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {}
+    // Array constructor
+    simdjson_inline simd8(const int8_t values[32]) : simd8(load(values)) {}
+    // Member-by-member initialization
+    simdjson_inline simd8(
+      int8_t v0,  int8_t v1,  int8_t v2,  int8_t v3,  int8_t v4,  int8_t v5,  int8_t v6,  int8_t v7,
+      int8_t v8,  int8_t v9,  int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15,
+      int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23,
+      int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31
+    ) : simd8(_mm256_setr_epi8(
+      v0, v1, v2, v3, v4, v5, v6, v7,
+      v8, v9, v10,v11,v12,v13,v14,v15,
+      v16,v17,v18,v19,v20,v21,v22,v23,
+      v24,v25,v26,v27,v28,v29,v30,v31
+    )) {}
+    // Repeat 16 values as many times as necessary (usually for lookup tables)
+    simdjson_inline static simd8<int8_t> repeat_16(
+      int8_t v0,  int8_t v1,  int8_t v2,  int8_t v3,  int8_t v4,  int8_t v5,  int8_t v6,  int8_t v7,
+      int8_t v8,  int8_t v9,  int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15
+    ) {
+      return simd8<int8_t>(
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15,
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15
+      );
+    }
+
+    // Order-sensitive comparisons
+    simdjson_inline simd8<int8_t> max_val(const simd8<int8_t> other) const { return _mm256_max_epi8(*this, other); }
+    simdjson_inline simd8<int8_t> min_val(const simd8<int8_t> other) const { return _mm256_min_epi8(*this, other); }
+    simdjson_inline simd8<bool> operator>(const simd8<int8_t> other) const { return _mm256_cmpgt_epi8(*this, other); }
+    simdjson_inline simd8<bool> operator<(const simd8<int8_t> other) const { return _mm256_cmpgt_epi8(other, *this); }
+  };
+
+  // Unsigned bytes
+  template<>
+  struct simd8<uint8_t>: base8_numeric<uint8_t> {
+    simdjson_inline simd8() : base8_numeric<uint8_t>() {}
+    simdjson_inline simd8(const __m256i _value) : base8_numeric<uint8_t>(_value) {}
+    // Splat constructor
+    simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {}
+    // Array constructor
+    simdjson_inline simd8(const uint8_t values[32]) : simd8(load(values)) {}
+    // Member-by-member initialization
+    simdjson_inline simd8(
+      uint8_t v0,  uint8_t v1,  uint8_t v2,  uint8_t v3,  uint8_t v4,  uint8_t v5,  uint8_t v6,  uint8_t v7,
+      uint8_t v8,  uint8_t v9,  uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15,
+      uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23,
+      uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31
+    ) : simd8(_mm256_setr_epi8(
+      v0, v1, v2, v3, v4, v5, v6, v7,
+      v8, v9, v10,v11,v12,v13,v14,v15,
+      v16,v17,v18,v19,v20,v21,v22,v23,
+      v24,v25,v26,v27,v28,v29,v30,v31
+    )) {}
+    // Repeat 16 values as many times as necessary (usually for lookup tables)
+    simdjson_inline static simd8<uint8_t> repeat_16(
+      uint8_t v0,  uint8_t v1,  uint8_t v2,  uint8_t v3,  uint8_t v4,  uint8_t v5,  uint8_t v6,  uint8_t v7,
+      uint8_t v8,  uint8_t v9,  uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15
+    ) {
+      return simd8<uint8_t>(
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15,
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15
+      );
+    }
+
+    // Saturated math
+    simdjson_inline simd8<uint8_t> saturating_add(const simd8<uint8_t> other) const { return _mm256_adds_epu8(*this, other); }
+    simdjson_inline simd8<uint8_t> saturating_sub(const simd8<uint8_t> other) const { return _mm256_subs_epu8(*this, other); }
+
+    // Order-specific operations
+    simdjson_inline simd8<uint8_t> max_val(const simd8<uint8_t> other) const { return _mm256_max_epu8(*this, other); }
+    simdjson_inline simd8<uint8_t> min_val(const simd8<uint8_t> other) const { return _mm256_min_epu8(other, *this); }
+    // Same as >, but only guarantees true is nonzero (< guarantees true = -1)
+    simdjson_inline simd8<uint8_t> gt_bits(const simd8<uint8_t> other) const { return this->saturating_sub(other); }
+    // Same as <, but only guarantees true is nonzero (< guarantees true = -1)
+    simdjson_inline simd8<uint8_t> lt_bits(const simd8<uint8_t> other) const { return other.saturating_sub(*this); }
+    simdjson_inline simd8<bool> operator<=(const simd8<uint8_t> other) const { return other.max_val(*this) == other; }
+    simdjson_inline simd8<bool> operator>=(const simd8<uint8_t> other) const { return other.min_val(*this) == other; }
+    simdjson_inline simd8<bool> operator>(const simd8<uint8_t> other) const { return this->gt_bits(other).any_bits_set(); }
+    simdjson_inline simd8<bool> operator<(const simd8<uint8_t> other) const { return this->lt_bits(other).any_bits_set(); }
+
+    // Bit-specific operations
+    simdjson_inline simd8<bool> bits_not_set() const { return *this == uint8_t(0); }
+    simdjson_inline simd8<bool> bits_not_set(simd8<uint8_t> bits) const { return (*this & bits).bits_not_set(); }
+    simdjson_inline simd8<bool> any_bits_set() const { return ~this->bits_not_set(); }
+    simdjson_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const { return ~this->bits_not_set(bits); }
+    simdjson_inline bool is_ascii() const { return _mm256_movemask_epi8(*this) == 0; }
+    simdjson_inline bool bits_not_set_anywhere() const { return _mm256_testz_si256(*this, *this); }
+    simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); }
+    simdjson_inline bool bits_not_set_anywhere(simd8<uint8_t> bits) const { return _mm256_testz_si256(*this, bits); }
+    simdjson_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const { return !bits_not_set_anywhere(bits); }
+    template<int N>
+    simdjson_inline simd8<uint8_t> shr() const { return simd8<uint8_t>(_mm256_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); }
+    template<int N>
+    simdjson_inline simd8<uint8_t> shl() const { return simd8<uint8_t>(_mm256_slli_epi16(*this, N)) & uint8_t(0xFFu << N); }
+    // Get one of the bits and make a bitmask out of it.
+    // e.g. value.get_bit<7>() gets the high bit
+    template<int N>
+    simdjson_inline int get_bit() const { return _mm256_movemask_epi8(_mm256_slli_epi16(*this, 7-N)); }
+  };
+
+  template<typename T>
+  struct simd8x64 {
+    static constexpr int NUM_CHUNKS = 64 / sizeof(simd8<T>);
+    static_assert(NUM_CHUNKS == 2, "Haswell kernel should use two registers per 64-byte block.");
+    const simd8<T> chunks[NUM_CHUNKS];
+
+    simd8x64(const simd8x64<T>& o) = delete; // no copy allowed
+    simd8x64<T>& operator=(const simd8<T>& other) = delete; // no assignment allowed
+    simd8x64() = delete; // no default constructor allowed
+
+    simdjson_inline simd8x64(const simd8<T> chunk0, const simd8<T> chunk1) : chunks{chunk0, chunk1} {}
+    simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8<T>::load(ptr), simd8<T>::load(ptr+32)} {}
+
+    simdjson_inline uint64_t compress(uint64_t mask, T * output) const {
+      uint32_t mask1 = uint32_t(mask);
+      uint32_t mask2 = uint32_t(mask >> 32);
+      this->chunks[0].compress(mask1, output);
+      this->chunks[1].compress(mask2, output + 32 - count_ones(mask1));
+      return 64 - count_ones(mask);
+    }
+
+    simdjson_inline void store(T ptr[64]) const {
+      this->chunks[0].store(ptr+sizeof(simd8<T>)*0);
+      this->chunks[1].store(ptr+sizeof(simd8<T>)*1);
+    }
+
+    simdjson_inline uint64_t to_bitmask() const {
+      uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask());
+      uint64_t r_hi =                       this->chunks[1].to_bitmask();
+      return r_lo | (r_hi << 32);
+    }
+
+    simdjson_inline simd8<T> reduce_or() const {
+      return this->chunks[0] | this->chunks[1];
+    }
+
+    simdjson_inline simd8x64<T> bit_or(const T m) const {
+      const simd8<T> mask = simd8<T>::splat(m);
+      return simd8x64<T>(
+        this->chunks[0] | mask,
+        this->chunks[1] | mask
+      );
+    }
+
+    simdjson_inline uint64_t eq(const T m) const {
+      const simd8<T> mask = simd8<T>::splat(m);
+      return  simd8x64<bool>(
+        this->chunks[0] == mask,
+        this->chunks[1] == mask
+      ).to_bitmask();
+    }
+
+    simdjson_inline uint64_t eq(const simd8x64<uint8_t> &other) const {
+      return  simd8x64<bool>(
+        this->chunks[0] == other.chunks[0],
+        this->chunks[1] == other.chunks[1]
+      ).to_bitmask();
+    }
+
+    simdjson_inline uint64_t lteq(const T m) const {
+      const simd8<T> mask = simd8<T>::splat(m);
+      return  simd8x64<bool>(
+        this->chunks[0] <= mask,
+        this->chunks[1] <= mask
+      ).to_bitmask();
+    }
+  }; // struct simd8x64<T>
+
+} // namespace simd
+
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdjson
+
+#endif // SIMDJSON_HASWELL_SIMD_H
+/* end file include/simdjson/haswell/simd.h */
+/* begin file include/simdjson/generic/jsoncharutils.h */
+
+namespace simdjson {
+namespace haswell {
+namespace {
+namespace jsoncharutils {
+
+// return non-zero if not a structural or whitespace char
+// zero otherwise
+simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) {
+  return internal::structural_or_whitespace_negated[c];
+}
+
+simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) {
+  return internal::structural_or_whitespace[c];
+}
+
+// returns a value with the high 16 bits set if not valid
+// otherwise returns the conversion of the 4 hex digits at src into the bottom
+// 16 bits of the 32-bit return register
+//
+// see
+// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/
+static inline uint32_t hex_to_u32_nocheck(
+    const uint8_t *src) { // strictly speaking, static inline is a C-ism
+  uint32_t v1 = internal::digit_to_val32[630 + src[0]];
+  uint32_t v2 = internal::digit_to_val32[420 + src[1]];
+  uint32_t v3 = internal::digit_to_val32[210 + src[2]];
+  uint32_t v4 = internal::digit_to_val32[0 + src[3]];
+  return v1 | v2 | v3 | v4;
+}
+
+// given a code point cp, writes to c
+// the utf-8 code, outputting the length in
+// bytes, if the length is zero, the code point
+// is invalid
+//
+// This can possibly be made faster using pdep
+// and clz and table lookups, but JSON documents
+// have few escaped code points, and the following
+// function looks cheap.
+//
+// Note: we assume that surrogates are treated separately
+//
+simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) {
+  if (cp <= 0x7F) {
+    c[0] = uint8_t(cp);
+    return 1; // ascii
+  }
+  if (cp <= 0x7FF) {
+    c[0] = uint8_t((cp >> 6) + 192);
+    c[1] = uint8_t((cp & 63) + 128);
+    return 2; // universal plane
+    //  Surrogates are treated elsewhere...
+    //} //else if (0xd800 <= cp && cp <= 0xdfff) {
+    //  return 0; // surrogates // could put assert here
+  } else if (cp <= 0xFFFF) {
+    c[0] = uint8_t((cp >> 12) + 224);
+    c[1] = uint8_t(((cp >> 6) & 63) + 128);
+    c[2] = uint8_t((cp & 63) + 128);
+    return 3;
+  } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this
+                               // is not needed
+    c[0] = uint8_t((cp >> 18) + 240);
+    c[1] = uint8_t(((cp >> 12) & 63) + 128);
+    c[2] = uint8_t(((cp >> 6) & 63) + 128);
+    c[3] = uint8_t((cp & 63) + 128);
+    return 4;
+  }
+  // will return 0 when the code point was too large.
+  return 0; // bad r
+}
+
+#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm
+// this is a slow emulation routine for 32-bit
+//
+static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) {
+  return x * (uint64_t)y;
+}
+static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) {
+  uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd);
+  uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd);
+  uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32));
+  uint64_t adbc_carry = !!(adbc < ad);
+  uint64_t lo = bd + (adbc << 32);
+  *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) +
+        (adbc_carry << 32) + !!(lo < bd);
+  return lo;
+}
+#endif
+
+using internal::value128;
+
+simdjson_inline value128 full_multiplication(uint64_t value1, uint64_t value2) {
+  value128 answer;
+#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS)
+#ifdef _M_ARM64
+  // ARM64 has native support for 64-bit multiplications, no need to emultate
+  answer.high = __umulh(value1, value2);
+  answer.low = value1 * value2;
+#else
+  answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64
+#endif // _M_ARM64
+#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS)
+  __uint128_t r = (static_cast<__uint128_t>(value1)) * value2;
+  answer.low = uint64_t(r);
+  answer.high = uint64_t(r >> 64);
+#endif
+  return answer;
+}
+
+} // namespace jsoncharutils
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdjson
+/* end file include/simdjson/generic/jsoncharutils.h */
+/* begin file include/simdjson/generic/atomparsing.h */
+namespace simdjson {
+namespace haswell {
+namespace {
+/// @private
+namespace atomparsing {
+
+// The string_to_uint32 is exclusively used to map literal strings to 32-bit values.
+// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot
+// be certain that the character pointer will be properly aligned.
+// You might think that using memcpy makes this function expensive, but you'd be wrong.
+// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false");
+// to the compile-time constant 1936482662.
+simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; }
+
+
+// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive.
+// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about.
+simdjson_warn_unused
+simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) {
+  uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++)
+  static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes");
+  std::memcpy(&srcval, src, sizeof(uint32_t));
+  return srcval ^ string_to_uint32(atom);
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_true_atom(const uint8_t *src) {
+  return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0;
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) {
+  if (len > 4) { return is_valid_true_atom(src); }
+  else if (len == 4) { return !str4ncmp(src, "true"); }
+  else { return false; }
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_false_atom(const uint8_t *src) {
+  return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0;
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) {
+  if (len > 5) { return is_valid_false_atom(src); }
+  else if (len == 5) { return !str4ncmp(src+1, "alse"); }
+  else { return false; }
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_null_atom(const uint8_t *src) {
+  return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0;
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) {
+  if (len > 4) { return is_valid_null_atom(src); }
+  else if (len == 4) { return !str4ncmp(src, "null"); }
+  else { return false; }
+}
+
+} // namespace atomparsing
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdjson
+/* end file include/simdjson/generic/atomparsing.h */
+/* begin file include/simdjson/haswell/stringparsing.h */
+#ifndef SIMDJSON_HASWELL_STRINGPARSING_H
+#define SIMDJSON_HASWELL_STRINGPARSING_H
+
+
+namespace simdjson {
+namespace haswell {
+namespace {
+
+using namespace simd;
+
+// Holds backslashes and quotes locations.
+struct backslash_and_quote {
+public:
+  static constexpr uint32_t BYTES_PROCESSED = 32;
+  simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst);
+
+  simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; }
+  simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; }
+  simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); }
+  simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); }
+
+  uint32_t bs_bits;
+  uint32_t quote_bits;
+}; // struct backslash_and_quote
+
+simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) {
+  // this can read up to 15 bytes beyond the buffer size, but we require
+  // SIMDJSON_PADDING of padding
+  static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes");
+  simd8<uint8_t> v(src);
+  // store to dest unconditionally - we can overwrite the bits we don't like later
+  v.store(dst);
+  return {
+      static_cast<uint32_t>((v == '\\').to_bitmask()),     // bs_bits
+      static_cast<uint32_t>((v == '"').to_bitmask()), // quote_bits
+  };
+}
+
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdjson
+
+#endif // SIMDJSON_HASWELL_STRINGPARSING_H
+/* end file include/simdjson/haswell/stringparsing.h */
+/* begin file include/simdjson/haswell/numberparsing.h */
+#ifndef SIMDJSON_HASWELL_NUMBERPARSING_H
+#define SIMDJSON_HASWELL_NUMBERPARSING_H
+
+namespace simdjson {
+namespace haswell {
+namespace {
+
+static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) {
+  // this actually computes *16* values so we are being wasteful.
+  const __m128i ascii0 = _mm_set1_epi8('0');
+  const __m128i mul_1_10 =
+      _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1);
+  const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1);
+  const __m128i mul_1_10000 =
+      _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1);
+  const __m128i input = _mm_sub_epi8(
+      _mm_loadu_si128(reinterpret_cast<const __m128i *>(chars)), ascii0);
+  const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10);
+  const __m128i t2 = _mm_madd_epi16(t1, mul_1_100);
+  const __m128i t3 = _mm_packus_epi32(t2, t2);
+  const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000);
+  return _mm_cvtsi128_si32(
+      t4); // only captures the sum of the first 8 digits, drop the rest
+}
+
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdjson
+
+#define SIMDJSON_SWAR_NUMBER_PARSING 1
+
+/* begin file include/simdjson/generic/numberparsing.h */
+#include <limits>
+
+namespace simdjson {
+namespace haswell {
+
+namespace ondemand {
+/**
+ * The type of a JSON number
+ */
+enum class number_type {
+    floating_point_number=1, /// a binary64 number
+    signed_integer,          /// a signed integer that fits in a 64-bit word using two's complement
+    unsigned_integer         /// a positive integer larger or equal to 1<<63
+};
+}
+
+namespace {
+/// @private
+namespace numberparsing {
+
+
+
+#ifdef JSON_TEST_NUMBERS
+#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR)
+#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE)))
+#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE)))
+#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE)))
+#else
+#define INVALID_NUMBER(SRC) (NUMBER_ERROR)
+#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE))
+#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE))
+#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE))
+#endif
+
+namespace {
+// Convert a mantissa, an exponent and a sign bit into an ieee64 double.
+// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable).
+// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed.
+simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) {
+    double d;
+    mantissa &= ~(1ULL << 52);
+    mantissa |= real_exponent << 52;
+    mantissa |= ((static_cast<uint64_t>(negative)) << 63);
+    std::memcpy(&d, &mantissa, sizeof(d));
+    return d;
+}
+}
+// Attempts to compute i * 10^(power) exactly; and if "negative" is
+// true, negate the result.
+// This function will only work in some cases, when it does not work, success is
+// set to false. This should work *most of the time* (like 99% of the time).
+// We assume that power is in the [smallest_power,
+// largest_power] interval: the caller is responsible for this check.
+simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) {
+  // we start with a fast path
+  // It was described in
+  // Clinger WD. How to read floating point numbers accurately.
+  // ACM SIGPLAN Notices. 1990
+#ifndef FLT_EVAL_METHOD
+#error "FLT_EVAL_METHOD should be defined, please include cfloat."
+#endif
+#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
+  // We cannot be certain that x/y is rounded to nearest.
+  if (0 <= power && power <= 22 && i <= 9007199254740991) {
+#else
+  if (-22 <= power && power <= 22 && i <= 9007199254740991) {
+#endif
+    // convert the integer into a double. This is lossless since
+    // 0 <= i <= 2^53 - 1.
+    d = double(i);
+    //
+    // The general idea is as follows.
+    // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then
+    // 1) Both s and p can be represented exactly as 64-bit floating-point
+    // values
+    // (binary64).
+    // 2) Because s and p can be represented exactly as floating-point values,
+    // then s * p
+    // and s / p will produce correctly rounded values.
+    //
+    if (power < 0) {
+      d = d / simdjson::internal::power_of_ten[-power];
+    } else {
+      d = d * simdjson::internal::power_of_ten[power];
+    }
+    if (negative) {
+      d = -d;
+    }
+    return true;
+  }
+  // When 22 < power && power <  22 + 16, we could
+  // hope for another, secondary fast path.  It was
+  // described by David M. Gay in  "Correctly rounded
+  // binary-decimal and decimal-binary conversions." (1990)
+  // If you need to compute i * 10^(22 + x) for x < 16,
+  // first compute i * 10^x, if you know that result is exact
+  // (e.g., when i * 10^x < 2^53),
+  // then you can still proceed and do (i * 10^x) * 10^22.
+  // Is this worth your time?
+  // You need  22 < power *and* power <  22 + 16 *and* (i * 10^(x-22) < 2^53)
+  // for this second fast path to work.
+  // If you you have 22 < power *and* power <  22 + 16, and then you
+  // optimistically compute "i * 10^(x-22)", there is still a chance that you
+  // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of
+  // this optimization maybe less common than we would like. Source:
+  // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/
+  // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html
+
+  // The fast path has now failed, so we are failing back on the slower path.
+
+  // In the slow path, we need to adjust i so that it is > 1<<63 which is always
+  // possible, except if i == 0, so we handle i == 0 separately.
+  if(i == 0) {
+    d = negative ? -0.0 : 0.0;
+    return true;
+  }
+
+
+  // The exponent is 1024 + 63 + power
+  //     + floor(log(5**power)/log(2)).
+  // The 1024 comes from the ieee64 standard.
+  // The 63 comes from the fact that we use a 64-bit word.
+  //
+  // Computing floor(log(5**power)/log(2)) could be
+  // slow. Instead we use a fast function.
+  //
+  // For power in (-400,350), we have that
+  // (((152170 + 65536) * power ) >> 16);
+  // is equal to
+  //  floor(log(5**power)/log(2)) + power when power >= 0
+  // and it is equal to
+  //  ceil(log(5**-power)/log(2)) + power when power < 0
+  //
+  // The 65536 is (1<<16) and corresponds to
+  // (65536 * power) >> 16 ---> power
+  //
+  // ((152170 * power ) >> 16) is equal to
+  // floor(log(5**power)/log(2))
+  //
+  // Note that this is not magic: 152170/(1<<16) is
+  // approximatively equal to log(5)/log(2).
+  // The 1<<16 value is a power of two; we could use a
+  // larger power of 2 if we wanted to.
+  //
+  int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63;
+
+
+  // We want the most significant bit of i to be 1. Shift if needed.
+  int lz = leading_zeroes(i);
+  i <<= lz;
+
+
+  // We are going to need to do some 64-bit arithmetic to get a precise product.
+  // We use a table lookup approach.
+  // It is safe because
+  // power >= smallest_power
+  // and power <= largest_power
+  // We recover the mantissa of the power, it has a leading 1. It is always
+  // rounded down.
+  //
+  // We want the most significant 64 bits of the product. We know
+  // this will be non-zero because the most significant bit of i is
+  // 1.
+  const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power);
+  // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.)
+  //
+  // The full_multiplication function computes the 128-bit product of two 64-bit words
+  // with a returned value of type value128 with a "low component" corresponding to the
+  // 64-bit least significant bits of the product and with a "high component" corresponding
+  // to the 64-bit most significant bits of the product.
+  simdjson::internal::value128 firstproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index]);
+  // Both i and power_of_five_128[index] have their most significant bit set to 1 which
+  // implies that the either the most or the second most significant bit of the product
+  // is 1. We pack values in this manner for efficiency reasons: it maximizes the use
+  // we make of the product. It also makes it easy to reason about the product: there
+  // is 0 or 1 leading zero in the product.
+
+  // Unless the least significant 9 bits of the high (64-bit) part of the full
+  // product are all 1s, then we know that the most significant 55 bits are
+  // exact and no further work is needed. Having 55 bits is necessary because
+  // we need 53 bits for the mantissa but we have to have one rounding bit and
+  // we can waste a bit if the most significant bit of the product is zero.
+  if((firstproduct.high & 0x1FF) == 0x1FF) {
+    // We want to compute i * 5^q, but only care about the top 55 bits at most.
+    // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing
+    // the full computation is wasteful. So we do what is called a "truncated
+    // multiplication".
+    // We take the most significant 64-bits, and we put them in
+    // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q
+    // to the desired approximation using one multiplication. Sometimes it does not suffice.
+    // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and
+    // then we get a better approximation to i * 5^q. In very rare cases, even that
+    // will not suffice, though it is seemingly very hard to find such a scenario.
+    //
+    // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat
+    // more complicated.
+    //
+    // There is an extra layer of complexity in that we need more than 55 bits of
+    // accuracy in the round-to-even scenario.
+    //
+    // The full_multiplication function computes the 128-bit product of two 64-bit words
+    // with a returned value of type value128 with a "low component" corresponding to the
+    // 64-bit least significant bits of the product and with a "high component" corresponding
+    // to the 64-bit most significant bits of the product.
+    simdjson::internal::value128 secondproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]);
+    firstproduct.low += secondproduct.high;
+    if(secondproduct.high > firstproduct.low) { firstproduct.high++; }
+    // At this point, we might need to add at most one to firstproduct, but this
+    // can only change the value of firstproduct.high if firstproduct.low is maximal.
+    if(simdjson_unlikely(firstproduct.low  == 0xFFFFFFFFFFFFFFFF)) {
+      // This is very unlikely, but if so, we need to do much more work!
+      return false;
+    }
+  }
+  uint64_t lower = firstproduct.low;
+  uint64_t upper = firstproduct.high;
+  // The final mantissa should be 53 bits with a leading 1.
+  // We shift it so that it occupies 54 bits with a leading 1.
+  ///////
+  uint64_t upperbit = upper >> 63;
+  uint64_t mantissa = upper >> (upperbit + 9);
+  lz += int(1 ^ upperbit);
+
+  // Here we have mantissa < (1<<54).
+  int64_t real_exponent = exponent - lz;
+  if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal?
+    // Here have that real_exponent <= 0 so -real_exponent >= 0
+    if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure.
+      d = negative ? -0.0 : 0.0;
+      return true;
+    }
+    // next line is safe because -real_exponent + 1 < 0
+    mantissa >>= -real_exponent + 1;
+    // Thankfully, we can't have both "round-to-even" and subnormals because
+    // "round-to-even" only occurs for powers close to 0.
+    mantissa += (mantissa & 1); // round up
+    mantissa >>= 1;
+    // There is a weird scenario where we don't have a subnormal but just.
+    // Suppose we start with 2.2250738585072013e-308, we end up
+    // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal
+    // whereas 0x40000000000000 x 2^-1023-53  is normal. Now, we need to round
+    // up 0x3fffffffffffff x 2^-1023-53  and once we do, we are no longer
+    // subnormal, but we can only know this after rounding.
+    // So we only declare a subnormal if we are smaller than the threshold.
+    real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1;
+    d = to_double(mantissa, real_exponent, negative);
+    return true;
+  }
+  // We have to round to even. The "to even" part
+  // is only a problem when we are right in between two floats
+  // which we guard against.
+  // If we have lots of trailing zeros, we may fall right between two
+  // floating-point values.
+  //
+  // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54]
+  // times a power of two. That is, it is right between a number with binary significand
+  // m and another number with binary significand m+1; and it must be the case
+  // that it cannot be represented by a float itself.
+  //
+  // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p.
+  // Recall that 10^q = 5^q * 2^q.
+  // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that
+  //  5^23 <=  2^54 and it is the last power of five to qualify, so q <= 23.
+  // When q<0, we have  w  >=  (2m+1) x 5^{-q}.  We must have that w<2^{64} so
+  // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have
+  // 2^{53} x 5^{-q} < 2^{64}.
+  // Hence we have 5^{-q} < 2^{11}$ or q>= -4.
+  //
+  // We require lower <= 1 and not lower == 0 because we could not prove that
+  // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test.
+  if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) {
+    if((mantissa  << (upperbit + 64 - 53 - 2)) ==  upper) {
+      mantissa &= ~1;             // flip it so that we do not round up
+    }
+  }
+
+  mantissa += mantissa & 1;
+  mantissa >>= 1;
+
+  // Here we have mantissa < (1<<53), unless there was an overflow
+  if (mantissa >= (1ULL << 53)) {
+    //////////
+    // This will happen when parsing values such as 7.2057594037927933e+16
+    ////////
+    mantissa = (1ULL << 52);
+    real_exponent++;
+  }
+  mantissa &= ~(1ULL << 52);
+  // we have to check that real_exponent is in range, otherwise we bail out
+  if (simdjson_unlikely(real_exponent > 2046)) {
+    // We have an infinite value!!! We could actually throw an error here if we could.
+    return false;
+  }
+  d = to_double(mantissa, real_exponent, negative);
+  return true;
+}
+
+// We call a fallback floating-point parser that might be slow. Note
+// it will accept JSON numbers, but the JSON spec. is more restrictive so
+// before you call parse_float_fallback, you need to have validated the input
+// string with the JSON grammar.
+// It will return an error (false) if the parsed number is infinite.
+// The string parsing itself always succeeds. We know that there is at least
+// one digit.
+static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) {
+  *outDouble = simdjson::internal::from_chars(reinterpret_cast<const char *>(ptr));
+  // We do not accept infinite values.
+
+  // Detecting finite values in a portable manner is ridiculously hard, ideally
+  // we would want to do:
+  // return !std::isfinite(*outDouble);
+  // but that mysteriously fails under legacy/old libc++ libraries, see
+  // https://github.com/simdjson/simdjson/issues/1286
+  //
+  // Therefore, fall back to this solution (the extra parens are there
+  // to handle that max may be a macro on windows).
+  return !(*outDouble > (std::numeric_limits<double>::max)() || *outDouble < std::numeric_limits<double>::lowest());
+}
+static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) {
+  *outDouble = simdjson::internal::from_chars(reinterpret_cast<const char *>(ptr), reinterpret_cast<const char *>(end_ptr));
+  // We do not accept infinite values.
+
+  // Detecting finite values in a portable manner is ridiculously hard, ideally
+  // we would want to do:
+  // return !std::isfinite(*outDouble);
+  // but that mysteriously fails under legacy/old libc++ libraries, see
+  // https://github.com/simdjson/simdjson/issues/1286
+  //
+  // Therefore, fall back to this solution (the extra parens are there
+  // to handle that max may be a macro on windows).
+  return !(*outDouble > (std::numeric_limits<double>::max)() || *outDouble < std::numeric_limits<double>::lowest());
+}
+
+// check quickly whether the next 8 chars are made of digits
+// at a glance, it looks better than Mula's
+// http://0x80.pl/articles/swar-digits-validate.html
+simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) {
+  uint64_t val;
+  // this can read up to 7 bytes beyond the buffer size, but we require
+  // SIMDJSON_PADDING of padding
+  static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7");
+  std::memcpy(&val, chars, 8);
+  // a branchy method might be faster:
+  // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030)
+  //  && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) ==
+  //  0x3030303030303030);
+  return (((val & 0xF0F0F0F0F0F0F0F0) |
+           (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) ==
+          0x3333333333333333);
+}
+
+template<typename W>
+error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) {
+  double d;
+  if (parse_float_fallback(src, &d)) {
+    writer.append_double(d);
+    return SUCCESS;
+  }
+  return INVALID_NUMBER(src);
+}
+
+template<typename I>
+SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later
+simdjson_inline bool parse_digit(const uint8_t c, I &i) {
+  const uint8_t digit = static_cast<uint8_t>(c - '0');
+  if (digit > 9) {
+    return false;
+  }
+  // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication
+  i = 10 * i + digit; // might overflow, we will handle the overflow later
+  return true;
+}
+
+simdjson_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) {
+  // we continue with the fiction that we have an integer. If the
+  // floating point number is representable as x * 10^z for some integer
+  // z that fits in 53 bits, then we will be able to convert back the
+  // the integer into a float in a lossless manner.
+  const uint8_t *const first_after_period = p;
+
+#ifdef SIMDJSON_SWAR_NUMBER_PARSING
+#if SIMDJSON_SWAR_NUMBER_PARSING
+  // this helps if we have lots of decimals!
+  // this turns out to be frequent enough.
+  if (is_made_of_eight_digits_fast(p)) {
+    i = i * 100000000 + parse_eight_digits_unrolled(p);
+    p += 8;
+  }
+#endif // SIMDJSON_SWAR_NUMBER_PARSING
+#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING
+  // Unrolling the first digit makes a small difference on some implementations (e.g. westmere)
+  if (parse_digit(*p, i)) { ++p; }
+  while (parse_digit(*p, i)) { p++; }
+  exponent = first_after_period - p;
+  // Decimal without digits (123.) is illegal
+  if (exponent == 0) {
+    return INVALID_NUMBER(src);
+  }
+  return SUCCESS;
+}
+
+simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) {
+  // Exp Sign: -123.456e[-]78
+  bool neg_exp = ('-' == *p);
+  if (neg_exp || '+' == *p) { p++; } // Skip + as well
+
+  // Exponent: -123.456e-[78]
+  auto start_exp = p;
+  int64_t exp_number = 0;
+  while (parse_digit(*p, exp_number)) { ++p; }
+  // It is possible for parse_digit to overflow.
+  // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN.
+  // Thus we *must* check for possible overflow before we negate exp_number.
+
+  // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into
+  // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may
+  // not oblige and may, in fact, generate two distinct paths in any case. It might be
+  // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off
+  // instructions for a simdjson_likely branch, an unconclusive gain.
+
+  // If there were no digits, it's an error.
+  if (simdjson_unlikely(p == start_exp)) {
+    return INVALID_NUMBER(src);
+  }
+  // We have a valid positive exponent in exp_number at this point, except that
+  // it may have overflowed.
+
+  // If there were more than 18 digits, we may have overflowed the integer. We have to do
+  // something!!!!
+  if (simdjson_unlikely(p > start_exp+18)) {
+    // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow
+    while (*start_exp == '0') { start_exp++; }
+    // 19 digits could overflow int64_t and is kind of absurd anyway. We don't
+    // support exponents smaller than -999,999,999,999,999,999 and bigger
+    // than 999,999,999,999,999,999.
+    // We can truncate.
+    // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before
+    // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could
+    // truncate at 324.
+    // Note that there is no reason to fail per se at this point in time.
+    // E.g., 0e999999999999999999999 is a fine number.
+    if (p > start_exp+18) { exp_number = 999999999999999999; }
+  }
+  // At this point, we know that exp_number is a sane, positive, signed integer.
+  // It is <= 999,999,999,999,999,999. As long as 'exponent' is in
+  // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent'
+  // is bounded in magnitude by the size of the JSON input, we are fine in this universe.
+  // To sum it up: the next line should never overflow.
+  exponent += (neg_exp ? -exp_number : exp_number);
+  return SUCCESS;
+}
+
+simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) {
+  // It is possible that the integer had an overflow.
+  // We have to handle the case where we have 0.0000somenumber.
+  const uint8_t *start = start_digits;
+  while ((*start == '0') || (*start == '.')) { ++start; }
+  // we over-decrement by one when there is a '.'
+  return digit_count - size_t(start - start_digits);
+}
+
+template<typename W>
+simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) {
+  // If we frequently had to deal with long strings of digits,
+  // we could extend our code by using a 128-bit integer instead
+  // of a 64-bit integer. However, this is uncommon in practice.
+  //
+  // 9999999999999999999 < 2**64 so we can accommodate 19 digits.
+  // If we have a decimal separator, then digit_count - 1 is the number of digits, but we
+  // may not have a decimal separator!
+  if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) {
+    // Ok, chances are good that we had an overflow!
+    // this is almost never going to get called!!!
+    // we start anew, going slowly!!!
+    // This will happen in the following examples:
+    // 10000000000000000000000000000000000000000000e+308
+    // 3.1415926535897932384626433832795028841971693993751
+    //
+    // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens
+    // because slow_float_parsing is a non-inlined function. If we passed our writer reference to
+    // it, it would force it to be stored in memory, preventing the compiler from picking it apart
+    // and putting into registers. i.e. if we pass it as reference, it gets slow.
+    // This is what forces the skip_double, as well.
+    error_code error = slow_float_parsing(src, writer);
+    writer.skip_double();
+    return error;
+  }
+  // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other
+  // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331
+  // To future reader: we'd love if someone found a better way, or at least could explain this result!
+  if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) {
+    //
+    // Important: smallest_power is such that it leads to a zero value.
+    // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero
+    // so something x 10^-343 goes to zero, but not so with  something x 10^-342.
+    static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough");
+    //
+    if((exponent < simdjson::internal::smallest_power) || (i == 0)) {
+      // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero
+      WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer);
+      return SUCCESS;
+    } else { // (exponent > largest_power) and (i != 0)
+      // We have, for sure, an infinite value and simdjson refuses to parse infinite values.
+      return INVALID_NUMBER(src);
+    }
+  }
+  double d;
+  if (!compute_float_64(exponent, i, negative, d)) {
+    // we are almost never going to get here.
+    if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); }
+  }
+  WRITE_DOUBLE(d, src, writer);
+  return SUCCESS;
+}
+
+// for performance analysis, it is sometimes  useful to skip parsing
+#ifdef SIMDJSON_SKIPNUMBERPARSING
+
+template<typename W>
+simdjson_inline error_code parse_number(const uint8_t *const, W &writer) {
+  writer.append_s64(0);        // always write zero
+  return SUCCESS;              // always succeeds
+}
+
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer_in_string(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<double> parse_double_in_string(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept  { return false; }
+simdjson_unused simdjson_inline simdjson_result<bool> is_integer(const uint8_t * src) noexcept  { return false; }
+simdjson_unused simdjson_inline simdjson_result<ondemand::number_type> get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; }
+#else
+
+// parse the number at src
+// define JSON_TEST_NUMBERS for unit testing
+//
+// It is assumed that the number is followed by a structural ({,},],[) character
+// or a white space character. If that is not the case (e.g., when the JSON
+// document is made of a single number), then it is necessary to copy the
+// content and append a space before calling this function.
+//
+// Our objective is accurate parsing (ULP of 0) at high speed.
+template<typename W>
+simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) {
+
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  const uint8_t *p = src + uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); }
+
+  //
+  // Handle floats if there is a . or e (or both)
+  //
+  int64_t exponent = 0;
+  bool is_float = false;
+  if ('.' == *p) {
+    is_float = true;
+    ++p;
+    SIMDJSON_TRY( parse_decimal(src, p, i, exponent) );
+    digit_count = int(p - start_digits); // used later to guard against overflows
+  }
+  if (('e' == *p) || ('E' == *p)) {
+    is_float = true;
+    ++p;
+    SIMDJSON_TRY( parse_exponent(src, p, exponent) );
+  }
+  if (is_float) {
+    const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p);
+    SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) );
+    if (dirty_end) { return INVALID_NUMBER(src); }
+    return SUCCESS;
+  }
+
+  // The longest negative 64-bit number is 19 digits.
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  size_t longest_digit_count = negative ? 19 : 20;
+  if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); }
+  if (digit_count == longest_digit_count) {
+    if (negative) {
+      // Anything negative above INT64_MAX+1 is invalid
+      if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src);  }
+      WRITE_INTEGER(~i+1, src, writer);
+      if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); }
+      return SUCCESS;
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    }  else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); }
+  }
+
+  // Write unsigned if it doesn't fit in a signed integer.
+  if (i > uint64_t(INT64_MAX)) {
+    WRITE_UNSIGNED(i, src, writer);
+  } else {
+    WRITE_INTEGER(negative ? (~i+1) : i, src, writer);
+  }
+  if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); }
+  return SUCCESS;
+}
+
+// Inlineable functions
+namespace {
+
+// This table can be used to characterize the final character of an integer
+// string. For JSON structural character and allowable white space characters,
+// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise
+// we return NUMBER_ERROR.
+// Optimization note: we could easily reduce the size of the table by half (to 128)
+// at the cost of an extra branch.
+// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits):
+static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast");
+static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast");
+static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast");
+
+const uint8_t integer_string_finisher[256] = {
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, SUCCESS,
+    SUCCESS,      NUMBER_ERROR,   NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   SUCCESS,      NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, SUCCESS,
+    NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, SUCCESS,        NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    SUCCESS,      NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR};
+
+// Parse any number from 0 to 18,446,744,073,709,551,615
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src) noexcept {
+  const uint8_t *p = src;
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > 20))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+
+  if (digit_count == 20) {
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
+  }
+
+  return i;
+}
+
+
+// Parse any number from 0 to 18,446,744,073,709,551,615
+// Never read at src_end or beyond
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept {
+  const uint8_t *p = src;
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while ((p != src_end) && parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > 20))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+
+  if (digit_count == 20) {
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
+  }
+
+  return i;
+}
+
+// Parse any number from 0 to 18,446,744,073,709,551,615
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned_in_string(const uint8_t * const src) noexcept {
+  const uint8_t *p = src + 1;
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > 20))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if (*p != '"') { return NUMBER_ERROR; }
+
+  if (digit_count == 20) {
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    // Note: we use src[1] and not src[0] because src[0] is the quote character in this
+    // instance.
+    if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
+  }
+
+  return i;
+}
+
+// Parse any number from  -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t *src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  const uint8_t *p = src + uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // We go from
+  // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+  // so we can never represent numbers that have more than 19 digits.
+  size_t longest_digit_count = 19;
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > longest_digit_count))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+  // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
+  // Performance note: This check is only needed when digit_count == longest_digit_count but it is
+  // so cheap that we might as well always make it.
+  if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
+  return negative ? (~i+1) : i;
+}
+
+// Parse any number from  -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+// Never read at src_end or beyond
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept {
+  //
+  // Check for minus sign
+  //
+  if(src == src_end) { return NUMBER_ERROR; }
+  bool negative = (*src == '-');
+  const uint8_t *p = src + uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while ((p != src_end) && parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // We go from
+  // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+  // so we can never represent numbers that have more than 19 digits.
+  size_t longest_digit_count = 19;
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > longest_digit_count))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+  // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
+  // Performance note: This check is only needed when digit_count == longest_digit_count but it is
+  // so cheap that we might as well always make it.
+  if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
+  return negative ? (~i+1) : i;
+}
+
+// Parse any number from  -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer_in_string(const uint8_t *src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*(src + 1) == '-');
+  src += uint8_t(negative) + 1;
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = src;
+  uint64_t i = 0;
+  while (parse_digit(*src, i)) { src++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(src - start_digits);
+  // We go from
+  // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+  // so we can never represent numbers that have more than 19 digits.
+  size_t longest_digit_count = 19;
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > longest_digit_count))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*src)) {
+  //  return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if(*src != '"') { return NUMBER_ERROR; }
+  // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
+  // Performance note: This check is only needed when digit_count == longest_digit_count but it is
+  // so cheap that we might as well always make it.
+  if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
+  return negative ? (~i+1) : i;
+}
+
+simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  uint64_t i = 0;
+  const uint8_t *p = src;
+  p += parse_digit(*p, i);
+  bool leading_zero = (i == 0);
+  while (parse_digit(*p, i)) { p++; }
+  // no integer digits, or 0123 (zero must be solo)
+  if ( p == src ) { return INCORRECT_TYPE; }
+  if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; }
+
+  //
+  // Parse the decimal part.
+  //
+  int64_t exponent = 0;
+  bool overflow;
+  if (simdjson_likely(*p == '.')) {
+    p++;
+    const uint8_t *start_decimal_digits = p;
+    if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits
+    p++;
+    while (parse_digit(*p, i)) { p++; }
+    exponent = -(p - start_decimal_digits);
+
+    // Overflow check. More than 19 digits (minus the decimal) may be overflow.
+    overflow = p-src-1 > 19;
+    if (simdjson_unlikely(overflow && leading_zero)) {
+      // Skip leading 0.00000 and see if it still overflows
+      const uint8_t *start_digits = src + 2;
+      while (*start_digits == '0') { start_digits++; }
+      overflow = start_digits-src > 19;
+    }
+  } else {
+    overflow = p-src > 19;
+  }
+
+  //
+  // Parse the exponent
+  //
+  if (*p == 'e' || *p == 'E') {
+    p++;
+    bool exp_neg = *p == '-';
+    p += exp_neg || *p == '+';
+
+    uint64_t exp = 0;
+    const uint8_t *start_exp_digits = p;
+    while (parse_digit(*p, exp)) { p++; }
+    // no exp digits, or 20+ exp digits
+    if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }
+
+    exponent += exp_neg ? 0-exp : exp;
+  }
+
+  if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; }
+
+  overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power;
+
+  //
+  // Assemble (or slow-parse) the float
+  //
+  double d;
+  if (simdjson_likely(!overflow)) {
+    if (compute_float_64(exponent, i, negative, d)) { return d; }
+  }
+  if (!parse_float_fallback(src - uint8_t(negative), &d)) {
+    return NUMBER_ERROR;
+  }
+  return d;
+}
+
+simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept {
+  return (*src == '-');
+}
+
+simdjson_unused simdjson_inline simdjson_result<bool> is_integer(const uint8_t * src) noexcept {
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+  const uint8_t *p = src;
+  while(static_cast<uint8_t>(*p - '0') <= 9) { p++; }
+  if ( p == src ) { return NUMBER_ERROR; }
+  if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; }
+  return false;
+}
+
+simdjson_unused simdjson_inline simdjson_result<ondemand::number_type> get_number_type(const uint8_t * src) noexcept {
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+  const uint8_t *p = src;
+  while(static_cast<uint8_t>(*p - '0') <= 9) { p++; }
+  if ( p == src ) { return NUMBER_ERROR; }
+  if (jsoncharutils::is_structural_or_whitespace(*p)) {
+    // We have an integer.
+    // If the number is negative and valid, it must be a signed integer.
+    if(negative) { return ondemand::number_type::signed_integer; }
+    // We want values larger or equal to 9223372036854775808 to be unsigned
+    // integers, and the other values to be signed integers.
+    int digit_count = int(p - src);
+    if(digit_count >= 19) {
+      const uint8_t * smaller_big_integer = reinterpret_cast<const uint8_t *>("9223372036854775808");
+      if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) {
+        return ondemand::number_type::unsigned_integer;
+      }
+    }
+    return ondemand::number_type::signed_integer;
+  }
+  // Hopefully, we have 'e' or 'E' or '.'.
+  return ondemand::number_type::floating_point_number;
+}
+
+// Never read at src_end or beyond
+simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept {
+  if(src == src_end) { return NUMBER_ERROR; }
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  uint64_t i = 0;
+  const uint8_t *p = src;
+  if(p == src_end) { return NUMBER_ERROR; }
+  p += parse_digit(*p, i);
+  bool leading_zero = (i == 0);
+  while ((p != src_end) && parse_digit(*p, i)) { p++; }
+  // no integer digits, or 0123 (zero must be solo)
+  if ( p == src ) { return INCORRECT_TYPE; }
+  if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; }
+
+  //
+  // Parse the decimal part.
+  //
+  int64_t exponent = 0;
+  bool overflow;
+  if (simdjson_likely((p != src_end) && (*p == '.'))) {
+    p++;
+    const uint8_t *start_decimal_digits = p;
+    if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits
+    p++;
+    while ((p != src_end) && parse_digit(*p, i)) { p++; }
+    exponent = -(p - start_decimal_digits);
+
+    // Overflow check. More than 19 digits (minus the decimal) may be overflow.
+    overflow = p-src-1 > 19;
+    if (simdjson_unlikely(overflow && leading_zero)) {
+      // Skip leading 0.00000 and see if it still overflows
+      const uint8_t *start_digits = src + 2;
+      while (*start_digits == '0') { start_digits++; }
+      overflow = start_digits-src > 19;
+    }
+  } else {
+    overflow = p-src > 19;
+  }
+
+  //
+  // Parse the exponent
+  //
+  if ((p != src_end) && (*p == 'e' || *p == 'E')) {
+    p++;
+    if(p == src_end) { return NUMBER_ERROR; }
+    bool exp_neg = *p == '-';
+    p += exp_neg || *p == '+';
+
+    uint64_t exp = 0;
+    const uint8_t *start_exp_digits = p;
+    while ((p != src_end) && parse_digit(*p, exp)) { p++; }
+    // no exp digits, or 20+ exp digits
+    if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }
+
+    exponent += exp_neg ? 0-exp : exp;
+  }
+
+  if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; }
+
+  overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power;
+
+  //
+  // Assemble (or slow-parse) the float
+  //
+  double d;
+  if (simdjson_likely(!overflow)) {
+    if (compute_float_64(exponent, i, negative, d)) { return d; }
+  }
+  if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) {
+    return NUMBER_ERROR;
+  }
+  return d;
+}
+
+simdjson_unused simdjson_inline simdjson_result<double> parse_double_in_string(const uint8_t * src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*(src + 1) == '-');
+  src += uint8_t(negative) + 1;
+
+  //
+  // Parse the integer part.
+  //
+  uint64_t i = 0;
+  const uint8_t *p = src;
+  p += parse_digit(*p, i);
+  bool leading_zero = (i == 0);
+  while (parse_digit(*p, i)) { p++; }
+  // no integer digits, or 0123 (zero must be solo)
+  if ( p == src ) { return INCORRECT_TYPE; }
+  if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; }
+
+  //
+  // Parse the decimal part.
+  //
+  int64_t exponent = 0;
+  bool overflow;
+  if (simdjson_likely(*p == '.')) {
+    p++;
+    const uint8_t *start_decimal_digits = p;
+    if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits
+    p++;
+    while (parse_digit(*p, i)) { p++; }
+    exponent = -(p - start_decimal_digits);
+
+    // Overflow check. More than 19 digits (minus the decimal) may be overflow.
+    overflow = p-src-1 > 19;
+    if (simdjson_unlikely(overflow && leading_zero)) {
+      // Skip leading 0.00000 and see if it still overflows
+      const uint8_t *start_digits = src + 2;
+      while (*start_digits == '0') { start_digits++; }
+      overflow = start_digits-src > 19;
+    }
+  } else {
+    overflow = p-src > 19;
+  }
+
+  //
+  // Parse the exponent
+  //
+  if (*p == 'e' || *p == 'E') {
+    p++;
+    bool exp_neg = *p == '-';
+    p += exp_neg || *p == '+';
+
+    uint64_t exp = 0;
+    const uint8_t *start_exp_digits = p;
+    while (parse_digit(*p, exp)) { p++; }
+    // no exp digits, or 20+ exp digits
+    if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }
+
+    exponent += exp_neg ? 0-exp : exp;
+  }
+
+  if (*p != '"') { return NUMBER_ERROR; }
+
+  overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power;
+
+  //
+  // Assemble (or slow-parse) the float
+  //
+  double d;
+  if (simdjson_likely(!overflow)) {
+    if (compute_float_64(exponent, i, negative, d)) { return d; }
+  }
+  if (!parse_float_fallback(src - uint8_t(negative), &d)) {
+    return NUMBER_ERROR;
+  }
+  return d;
+}
+} //namespace {}
+#endif // SIMDJSON_SKIPNUMBERPARSING
+
+} // namespace numberparsing
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdjson
+/* end file include/simdjson/generic/numberparsing.h */
+
+#endif // SIMDJSON_HASWELL_NUMBERPARSING_H
+/* end file include/simdjson/haswell/numberparsing.h */
+/* begin file include/simdjson/haswell/end.h */
+SIMDJSON_UNTARGET_HASWELL
+/* end file include/simdjson/haswell/end.h */
+
+#endif // SIMDJSON_IMPLEMENTATION_HASWELL
+#endif // SIMDJSON_HASWELL_COMMON_H
+/* end file include/simdjson/haswell.h */
+/* begin file include/simdjson/ppc64.h */
+#ifndef SIMDJSON_PPC64_H
+#define SIMDJSON_PPC64_H
+
+
+#if SIMDJSON_IMPLEMENTATION_PPC64
+
+namespace simdjson {
+/**
+ * Implementation for ALTIVEC (PPC64).
+ */
+namespace ppc64 {
+} // namespace ppc64
+} // namespace simdjson
+
+/* begin file include/simdjson/ppc64/implementation.h */
+#ifndef SIMDJSON_PPC64_IMPLEMENTATION_H
+#define SIMDJSON_PPC64_IMPLEMENTATION_H
+
+
+namespace simdjson {
+namespace ppc64 {
+
+namespace {
+using namespace simdjson;
+using namespace simdjson::dom;
+} // namespace
+
+class implementation final : public simdjson::implementation {
+public:
+  simdjson_inline implementation()
+      : simdjson::implementation("ppc64", "PPC64 ALTIVEC",
+                                 internal::instruction_set::ALTIVEC) {}
+  simdjson_warn_unused error_code create_dom_parser_implementation(
+      size_t capacity, size_t max_length,
+      std::unique_ptr<internal::dom_parser_implementation> &dst)
+      const noexcept final;
+  simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len,
+                                         uint8_t *dst,
+                                         size_t &dst_len) const noexcept final;
+  simdjson_warn_unused bool validate_utf8(const char *buf,
+                                          size_t len) const noexcept final;
+};
+
+} // namespace ppc64
+} // namespace simdjson
+
+#endif // SIMDJSON_PPC64_IMPLEMENTATION_H
+/* end file include/simdjson/ppc64/implementation.h */
+
+/* begin file include/simdjson/ppc64/begin.h */
+// redefining SIMDJSON_IMPLEMENTATION to "ppc64"
+// #define SIMDJSON_IMPLEMENTATION ppc64
+/* end file include/simdjson/ppc64/begin.h */
+
+// Declarations
+/* begin file include/simdjson/generic/dom_parser_implementation.h */
+
+namespace simdjson {
+namespace ppc64 {
+
+// expectation: sizeof(open_container) = 64/8.
+struct open_container {
+  uint32_t tape_index; // where, on the tape, does the scope ([,{) begins
+  uint32_t count; // how many elements in the scope
+}; // struct open_container
+
+static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits");
+
+class dom_parser_implementation final : public internal::dom_parser_implementation {
+public:
+  /** Tape location of each open { or [ */
+  std::unique_ptr<open_container[]> open_containers{};
+  /** Whether each open container is a [ or { */
+  std::unique_ptr<bool[]> is_array{};
+  /** Buffer passed to stage 1 */
+  const uint8_t *buf{};
+  /** Length passed to stage 1 */
+  size_t len{0};
+  /** Document passed to stage 2 */
+  dom::document *doc{};
+
+  inline dom_parser_implementation() noexcept;
+  inline dom_parser_implementation(dom_parser_implementation &&other) noexcept;
+  inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept;
+  dom_parser_implementation(const dom_parser_implementation &) = delete;
+  dom_parser_implementation &operator=(const dom_parser_implementation &) = delete;
+
+  simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final;
+  simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final;
+  simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final;
+  simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final;
+  simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst) const noexcept final;
+  inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final;
+  inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final;
+private:
+  simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity);
+
+};
+
+} // namespace ppc64
+} // namespace simdjson
+
+namespace simdjson {
+namespace ppc64 {
+
+inline dom_parser_implementation::dom_parser_implementation() noexcept = default;
+inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default;
+inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default;
+
+// Leaving these here so they can be inlined if so desired
+inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept {
+  if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; }
+  // Stage 1 index output
+  size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7;
+  structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] );
+  if (!structural_indexes) { _capacity = 0; return MEMALLOC; }
+  structural_indexes[0] = 0;
+  n_structural_indexes = 0;
+
+  _capacity = capacity;
+  return SUCCESS;
+}
+
+inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept {
+  // Stage 2 stacks
+  open_containers.reset(new (std::nothrow) open_container[max_depth]);
+  is_array.reset(new (std::nothrow) bool[max_depth]);
+  if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; }
+
+  _max_depth = max_depth;
+  return SUCCESS;
+}
+
+} // namespace ppc64
+} // namespace simdjson
+/* end file include/simdjson/generic/dom_parser_implementation.h */
+/* begin file include/simdjson/ppc64/intrinsics.h */
+#ifndef SIMDJSON_PPC64_INTRINSICS_H
+#define SIMDJSON_PPC64_INTRINSICS_H
+
+
+// This should be the correct header whether
+// you use visual studio or other compilers.
+#include <altivec.h>
+
+// These are defined by altivec.h in GCC toolchain, it is safe to undef them.
+#ifdef bool
+#undef bool
+#endif
+
+#ifdef vector
+#undef vector
+#endif
+
+static_assert(sizeof(__vector unsigned char) <= simdjson::SIMDJSON_PADDING, "insufficient padding for ppc64");
+
+#endif //  SIMDJSON_PPC64_INTRINSICS_H
+/* end file include/simdjson/ppc64/intrinsics.h */
+/* begin file include/simdjson/ppc64/bitmanipulation.h */
+#ifndef SIMDJSON_PPC64_BITMANIPULATION_H
+#define SIMDJSON_PPC64_BITMANIPULATION_H
+
+namespace simdjson {
+namespace ppc64 {
+namespace {
+
+// We sometimes call trailing_zero on inputs that are zero,
+// but the algorithms do not end up using the returned value.
+// Sadly, sanitizers are not smart enough to figure it out.
+SIMDJSON_NO_SANITIZE_UNDEFINED
+simdjson_inline int trailing_zeroes(uint64_t input_num) {
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+  unsigned long ret;
+  // Search the mask data from least significant bit (LSB)
+  // to the most significant bit (MSB) for a set bit (1).
+  _BitScanForward64(&ret, input_num);
+  return (int)ret;
+#else  // SIMDJSON_REGULAR_VISUAL_STUDIO
+  return __builtin_ctzll(input_num);
+#endif // SIMDJSON_REGULAR_VISUAL_STUDIO
+}
+
+/* result might be undefined when input_num is zero */
+simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) {
+  return input_num & (input_num - 1);
+}
+
+/* result might be undefined when input_num is zero */
+simdjson_inline int leading_zeroes(uint64_t input_num) {
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+  unsigned long leading_zero = 0;
+  // Search the mask data from most significant bit (MSB)
+  // to least significant bit (LSB) for a set bit (1).
+  if (_BitScanReverse64(&leading_zero, input_num))
+    return (int)(63 - leading_zero);
+  else
+    return 64;
+#else
+  return __builtin_clzll(input_num);
+#endif // SIMDJSON_REGULAR_VISUAL_STUDIO
+}
+
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+simdjson_inline int count_ones(uint64_t input_num) {
+  // note: we do not support legacy 32-bit Windows
+  return __popcnt64(input_num); // Visual Studio wants two underscores
+}
+#else
+simdjson_inline int count_ones(uint64_t input_num) {
+  return __builtin_popcountll(input_num);
+}
+#endif
+
+simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2,
+                                         uint64_t *result) {
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+  *result = value1 + value2;
+  return *result < value1;
+#else
+  return __builtin_uaddll_overflow(value1, value2,
+                                   reinterpret_cast<unsigned long long *>(result));
+#endif
+}
+
+} // unnamed namespace
+} // namespace ppc64
+} // namespace simdjson
+
+#endif // SIMDJSON_PPC64_BITMANIPULATION_H
+/* end file include/simdjson/ppc64/bitmanipulation.h */
+/* begin file include/simdjson/ppc64/bitmask.h */
+#ifndef SIMDJSON_PPC64_BITMASK_H
+#define SIMDJSON_PPC64_BITMASK_H
+
+namespace simdjson {
+namespace ppc64 {
+namespace {
+
+//
+// Perform a "cumulative bitwise xor," flipping bits each time a 1 is
+// encountered.
+//
+// For example, prefix_xor(00100100) == 00011100
+//
+simdjson_inline uint64_t prefix_xor(uint64_t bitmask) {
+  // You can use the version below, however gcc sometimes miscompiles
+  // vec_pmsum_be, it happens somewhere around between 8 and 9th version.
+  // The performance boost was not noticeable, falling back to a usual
+  // implementation.
+  //   __vector unsigned long long all_ones = {~0ull, ~0ull};
+  //   __vector unsigned long long mask = {bitmask, 0};
+  //   // Clang and GCC return different values for pmsum for ull so cast it to one.
+  //   // Generally it is not specified by ALTIVEC ISA what is returned by
+  //   // vec_pmsum_be.
+  // #if defined(__LITTLE_ENDIAN__)
+  //   return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[0]);
+  // #else
+  //   return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[1]);
+  // #endif
+  bitmask ^= bitmask << 1;
+  bitmask ^= bitmask << 2;
+  bitmask ^= bitmask << 4;
+  bitmask ^= bitmask << 8;
+  bitmask ^= bitmask << 16;
+  bitmask ^= bitmask << 32;
+  return bitmask;
+}
+
+} // unnamed namespace
+} // namespace ppc64
+} // namespace simdjson
+
+#endif
+/* end file include/simdjson/ppc64/bitmask.h */
+/* begin file include/simdjson/ppc64/simd.h */
+#ifndef SIMDJSON_PPC64_SIMD_H
+#define SIMDJSON_PPC64_SIMD_H
+
+#include <type_traits>
+
+namespace simdjson {
+namespace ppc64 {
+namespace {
+namespace simd {
+
+using __m128i = __vector unsigned char;
+
+template <typename Child> struct base {
+  __m128i value;
+
+  // Zero constructor
+  simdjson_inline base() : value{__m128i()} {}
+
+  // Conversion from SIMD register
+  simdjson_inline base(const __m128i _value) : value(_value) {}
+
+  // Conversion to SIMD register
+  simdjson_inline operator const __m128i &() const {
+    return this->value;
+  }
+  simdjson_inline operator __m128i &() { return this->value; }
+
+  // Bit operations
+  simdjson_inline Child operator|(const Child other) const {
+    return vec_or(this->value, (__m128i)other);
+  }
+  simdjson_inline Child operator&(const Child other) const {
+    return vec_and(this->value, (__m128i)other);
+  }
+  simdjson_inline Child operator^(const Child other) const {
+    return vec_xor(this->value, (__m128i)other);
+  }
+  simdjson_inline Child bit_andnot(const Child other) const {
+    return vec_andc(this->value, (__m128i)other);
+  }
+  simdjson_inline Child &operator|=(const Child other) {
+    auto this_cast = static_cast<Child*>(this);
+    *this_cast = *this_cast | other;
+    return *this_cast;
+  }
+  simdjson_inline Child &operator&=(const Child other) {
+    auto this_cast = static_cast<Child*>(this);
+    *this_cast = *this_cast & other;
+    return *this_cast;
+  }
+  simdjson_inline Child &operator^=(const Child other) {
+    auto this_cast = static_cast<Child*>(this);
+    *this_cast = *this_cast ^ other;
+    return *this_cast;
+  }
+};
+
+// Forward-declared so they can be used by splat and friends.
+template <typename T> struct simd8;
+
+template <typename T, typename Mask = simd8<bool>>
+struct base8 : base<simd8<T>> {
+  typedef uint16_t bitmask_t;
+  typedef uint32_t bitmask2_t;
+
+  simdjson_inline base8() : base<simd8<T>>() {}
+  simdjson_inline base8(const __m128i _value) : base<simd8<T>>(_value) {}
+
+  friend simdjson_inline Mask operator==(const simd8<T> lhs, const simd8<T> rhs) {
+    return (__m128i)vec_cmpeq(lhs.value, (__m128i)rhs);
+  }
+
+  static const int SIZE = sizeof(base<simd8<T>>::value);
+
+  template <int N = 1>
+  simdjson_inline simd8<T> prev(simd8<T> prev_chunk) const {
+    __m128i chunk = this->value;
+#ifdef __LITTLE_ENDIAN__
+    chunk = (__m128i)vec_reve(this->value);
+    prev_chunk = (__m128i)vec_reve((__m128i)prev_chunk);
+#endif
+    chunk = (__m128i)vec_sld((__m128i)prev_chunk, (__m128i)chunk, 16 - N);
+#ifdef __LITTLE_ENDIAN__
+    chunk = (__m128i)vec_reve((__m128i)chunk);
+#endif
+    return chunk;
+  }
+};
+
+// SIMD byte mask type (returned by things like eq and gt)
+template <> struct simd8<bool> : base8<bool> {
+  static simdjson_inline simd8<bool> splat(bool _value) {
+    return (__m128i)vec_splats((unsigned char)(-(!!_value)));
+  }
+
+  simdjson_inline simd8<bool>() : base8() {}
+  simdjson_inline simd8<bool>(const __m128i _value)
+      : base8<bool>(_value) {}
+  // Splat constructor
+  simdjson_inline simd8<bool>(bool _value)
+      : base8<bool>(splat(_value)) {}
+
+  simdjson_inline int to_bitmask() const {
+    __vector unsigned long long result;
+    const __m128i perm_mask = {0x78, 0x70, 0x68, 0x60, 0x58, 0x50, 0x48, 0x40,
+                               0x38, 0x30, 0x28, 0x20, 0x18, 0x10, 0x08, 0x00};
+
+    result = ((__vector unsigned long long)vec_vbpermq((__m128i)this->value,
+                                                       (__m128i)perm_mask));
+#ifdef __LITTLE_ENDIAN__
+    return static_cast<int>(result[1]);
+#else
+    return static_cast<int>(result[0]);
+#endif
+  }
+  simdjson_inline bool any() const {
+    return !vec_all_eq(this->value, (__m128i)vec_splats(0));
+  }
+  simdjson_inline simd8<bool> operator~() const {
+    return this->value ^ (__m128i)splat(true);
+  }
+};
+
+template <typename T> struct base8_numeric : base8<T> {
+  static simdjson_inline simd8<T> splat(T value) {
+    (void)value;
+    return (__m128i)vec_splats(value);
+  }
+  static simdjson_inline simd8<T> zero() { return splat(0); }
+  static simdjson_inline simd8<T> load(const T values[16]) {
+    return (__m128i)(vec_vsx_ld(0, reinterpret_cast<const uint8_t *>(values)));
+  }
+  // Repeat 16 values as many times as necessary (usually for lookup tables)
+  static simdjson_inline simd8<T> repeat_16(T v0, T v1, T v2, T v3, T v4,
+                                                   T v5, T v6, T v7, T v8, T v9,
+                                                   T v10, T v11, T v12, T v13,
+                                                   T v14, T v15) {
+    return simd8<T>(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13,
+                    v14, v15);
+  }
+
+  simdjson_inline base8_numeric() : base8<T>() {}
+  simdjson_inline base8_numeric(const __m128i _value)
+      : base8<T>(_value) {}
+
+  // Store to array
+  simdjson_inline void store(T dst[16]) const {
+    vec_vsx_st(this->value, 0, reinterpret_cast<__m128i *>(dst));
+  }
+
+  // Override to distinguish from bool version
+  simdjson_inline simd8<T> operator~() const { return *this ^ 0xFFu; }
+
+  // Addition/subtraction are the same for signed and unsigned
+  simdjson_inline simd8<T> operator+(const simd8<T> other) const {
+    return (__m128i)((__m128i)this->value + (__m128i)other);
+  }
+  simdjson_inline simd8<T> operator-(const simd8<T> other) const {
+    return (__m128i)((__m128i)this->value - (__m128i)other);
+  }
+  simdjson_inline simd8<T> &operator+=(const simd8<T> other) {
+    *this = *this + other;
+    return *static_cast<simd8<T> *>(this);
+  }
+  simdjson_inline simd8<T> &operator-=(const simd8<T> other) {
+    *this = *this - other;
+    return *static_cast<simd8<T> *>(this);
+  }
+
+  // Perform a lookup assuming the value is between 0 and 16 (undefined behavior
+  // for out of range values)
+  template <typename L>
+  simdjson_inline simd8<L> lookup_16(simd8<L> lookup_table) const {
+    return (__m128i)vec_perm((__m128i)lookup_table, (__m128i)lookup_table, this->value);
+  }
+
+  // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted
+  // as a bitset). Passing a 0 value for mask would be equivalent to writing out
+  // every byte to output. Only the first 16 - count_ones(mask) bytes of the
+  // result are significant but 16 bytes get written. Design consideration: it
+  // seems like a function with the signature simd8<L> compress(uint32_t mask)
+  // would be sensible, but the AVX ISA makes this kind of approach difficult.
+  template <typename L>
+  simdjson_inline void compress(uint16_t mask, L *output) const {
+    using internal::BitsSetTable256mul2;
+    using internal::pshufb_combine_table;
+    using internal::thintable_epi8;
+    // this particular implementation was inspired by work done by @animetosho
+    // we do it in two steps, first 8 bytes and then second 8 bytes
+    uint8_t mask1 = uint8_t(mask);      // least significant 8 bits
+    uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits
+    // next line just loads the 64-bit values thintable_epi8[mask1] and
+    // thintable_epi8[mask2] into a 128-bit register, using only
+    // two instructions on most compilers.
+#ifdef __LITTLE_ENDIAN__
+    __m128i shufmask = (__m128i)(__vector unsigned long long){
+        thintable_epi8[mask1], thintable_epi8[mask2]};
+#else
+    __m128i shufmask = (__m128i)(__vector unsigned long long){
+        thintable_epi8[mask2], thintable_epi8[mask1]};
+    shufmask = (__m128i)vec_reve((__m128i)shufmask);
+#endif
+    // we increment by 0x08 the second half of the mask
+    shufmask = ((__m128i)shufmask) +
+               ((__m128i)(__vector int){0, 0, 0x08080808, 0x08080808});
+
+    // this is the version "nearly pruned"
+    __m128i pruned = vec_perm(this->value, this->value, shufmask);
+    // we still need to put the two halves together.
+    // we compute the popcount of the first half:
+    int pop1 = BitsSetTable256mul2[mask1];
+    // then load the corresponding mask, what it does is to write
+    // only the first pop1 bytes from the first 8 bytes, and then
+    // it fills in with the bytes from the second 8 bytes + some filling
+    // at the end.
+    __m128i compactmask =
+        vec_vsx_ld(0, reinterpret_cast<const uint8_t *>(pshufb_combine_table + pop1 * 8));
+    __m128i answer = vec_perm(pruned, (__m128i)vec_splats(0), compactmask);
+    vec_vsx_st(answer, 0, reinterpret_cast<__m128i *>(output));
+  }
+
+  template <typename L>
+  simdjson_inline simd8<L>
+  lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4,
+            L replace5, L replace6, L replace7, L replace8, L replace9,
+            L replace10, L replace11, L replace12, L replace13, L replace14,
+            L replace15) const {
+    return lookup_16(simd8<L>::repeat_16(
+        replace0, replace1, replace2, replace3, replace4, replace5, replace6,
+        replace7, replace8, replace9, replace10, replace11, replace12,
+        replace13, replace14, replace15));
+  }
+};
+
+// Signed bytes
+template <> struct simd8<int8_t> : base8_numeric<int8_t> {
+  simdjson_inline simd8() : base8_numeric<int8_t>() {}
+  simdjson_inline simd8(const __m128i _value)
+      : base8_numeric<int8_t>(_value) {}
+  // Splat constructor
+  simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {}
+  // Array constructor
+  simdjson_inline simd8(const int8_t *values) : simd8(load(values)) {}
+  // Member-by-member initialization
+  simdjson_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3,
+                               int8_t v4, int8_t v5, int8_t v6, int8_t v7,
+                               int8_t v8, int8_t v9, int8_t v10, int8_t v11,
+                               int8_t v12, int8_t v13, int8_t v14, int8_t v15)
+      : simd8((__m128i)(__vector signed char){v0, v1, v2, v3, v4, v5, v6, v7,
+                                              v8, v9, v10, v11, v12, v13, v14,
+                                              v15}) {}
+  // Repeat 16 values as many times as necessary (usually for lookup tables)
+  simdjson_inline static simd8<int8_t>
+  repeat_16(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5,
+            int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11,
+            int8_t v12, int8_t v13, int8_t v14, int8_t v15) {
+    return simd8<int8_t>(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
+                         v13, v14, v15);
+  }
+
+  // Order-sensitive comparisons
+  simdjson_inline simd8<int8_t>
+  max_val(const simd8<int8_t> other) const {
+    return (__m128i)vec_max((__vector signed char)this->value,
+                            (__vector signed char)(__m128i)other);
+  }
+  simdjson_inline simd8<int8_t>
+  min_val(const simd8<int8_t> other) const {
+    return (__m128i)vec_min((__vector signed char)this->value,
+                            (__vector signed char)(__m128i)other);
+  }
+  simdjson_inline simd8<bool>
+  operator>(const simd8<int8_t> other) const {
+    return (__m128i)vec_cmpgt((__vector signed char)this->value,
+                              (__vector signed char)(__m128i)other);
+  }
+  simdjson_inline simd8<bool>
+  operator<(const simd8<int8_t> other) const {
+    return (__m128i)vec_cmplt((__vector signed char)this->value,
+                              (__vector signed char)(__m128i)other);
+  }
+};
+
+// Unsigned bytes
+template <> struct simd8<uint8_t> : base8_numeric<uint8_t> {
+  simdjson_inline simd8() : base8_numeric<uint8_t>() {}
+  simdjson_inline simd8(const __m128i _value)
+      : base8_numeric<uint8_t>(_value) {}
+  // Splat constructor
+  simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {}
+  // Array constructor
+  simdjson_inline simd8(const uint8_t *values) : simd8(load(values)) {}
+  // Member-by-member initialization
+  simdjson_inline
+  simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5,
+        uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10,
+        uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15)
+      : simd8((__m128i){v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
+                        v13, v14, v15}) {}
+  // Repeat 16 values as many times as necessary (usually for lookup tables)
+  simdjson_inline static simd8<uint8_t>
+  repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4,
+            uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9,
+            uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14,
+            uint8_t v15) {
+    return simd8<uint8_t>(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
+                          v13, v14, v15);
+  }
+
+  // Saturated math
+  simdjson_inline simd8<uint8_t>
+  saturating_add(const simd8<uint8_t> other) const {
+    return (__m128i)vec_adds(this->value, (__m128i)other);
+  }
+  simdjson_inline simd8<uint8_t>
+  saturating_sub(const simd8<uint8_t> other) const {
+    return (__m128i)vec_subs(this->value, (__m128i)other);
+  }
+
+  // Order-specific operations
+  simdjson_inline simd8<uint8_t>
+  max_val(const simd8<uint8_t> other) const {
+    return (__m128i)vec_max(this->value, (__m128i)other);
+  }
+  simdjson_inline simd8<uint8_t>
+  min_val(const simd8<uint8_t> other) const {
+    return (__m128i)vec_min(this->value, (__m128i)other);
+  }
+  // Same as >, but only guarantees true is nonzero (< guarantees true = -1)
+  simdjson_inline simd8<uint8_t>
+  gt_bits(const simd8<uint8_t> other) const {
+    return this->saturating_sub(other);
+  }
+  // Same as <, but only guarantees true is nonzero (< guarantees true = -1)
+  simdjson_inline simd8<uint8_t>
+  lt_bits(const simd8<uint8_t> other) const {
+    return other.saturating_sub(*this);
+  }
+  simdjson_inline simd8<bool>
+  operator<=(const simd8<uint8_t> other) const {
+    return other.max_val(*this) == other;
+  }
+  simdjson_inline simd8<bool>
+  operator>=(const simd8<uint8_t> other) const {
+    return other.min_val(*this) == other;
+  }
+  simdjson_inline simd8<bool>
+  operator>(const simd8<uint8_t> other) const {
+    return this->gt_bits(other).any_bits_set();
+  }
+  simdjson_inline simd8<bool>
+  operator<(const simd8<uint8_t> other) const {
+    return this->gt_bits(other).any_bits_set();
+  }
+
+  // Bit-specific operations
+  simdjson_inline simd8<bool> bits_not_set() const {
+    return (__m128i)vec_cmpeq(this->value, (__m128i)vec_splats(uint8_t(0)));
+  }
+  simdjson_inline simd8<bool> bits_not_set(simd8<uint8_t> bits) const {
+    return (*this & bits).bits_not_set();
+  }
+  simdjson_inline simd8<bool> any_bits_set() const {
+    return ~this->bits_not_set();
+  }
+  simdjson_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const {
+    return ~this->bits_not_set(bits);
+  }
+  simdjson_inline bool bits_not_set_anywhere() const {
+    return vec_all_eq(this->value, (__m128i)vec_splats(0));
+  }
+  simdjson_inline bool any_bits_set_anywhere() const {
+    return !bits_not_set_anywhere();
+  }
+  simdjson_inline bool bits_not_set_anywhere(simd8<uint8_t> bits) const {
+    return vec_all_eq(vec_and(this->value, (__m128i)bits),
+                      (__m128i)vec_splats(0));
+  }
+  simdjson_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const {
+    return !bits_not_set_anywhere(bits);
+  }
+  template <int N> simdjson_inline simd8<uint8_t> shr() const {
+    return simd8<uint8_t>(
+        (__m128i)vec_sr(this->value, (__m128i)vec_splat_u8(N)));
+  }
+  template <int N> simdjson_inline simd8<uint8_t> shl() const {
+    return simd8<uint8_t>(
+        (__m128i)vec_sl(this->value, (__m128i)vec_splat_u8(N)));
+  }
+};
+
+template <typename T> struct simd8x64 {
+  static constexpr int NUM_CHUNKS = 64 / sizeof(simd8<T>);
+  static_assert(NUM_CHUNKS == 4,
+                "PPC64 kernel should use four registers per 64-byte block.");
+  const simd8<T> chunks[NUM_CHUNKS];
+
+  simd8x64(const simd8x64<T> &o) = delete; // no copy allowed
+  simd8x64<T> &
+  operator=(const simd8<T>& other) = delete; // no assignment allowed
+  simd8x64() = delete;                      // no default constructor allowed
+
+  simdjson_inline simd8x64(const simd8<T> chunk0, const simd8<T> chunk1,
+                                  const simd8<T> chunk2, const simd8<T> chunk3)
+      : chunks{chunk0, chunk1, chunk2, chunk3} {}
+  simdjson_inline simd8x64(const T ptr[64])
+      : chunks{simd8<T>::load(ptr), simd8<T>::load(ptr + 16),
+               simd8<T>::load(ptr + 32), simd8<T>::load(ptr + 48)} {}
+
+  simdjson_inline void store(T ptr[64]) const {
+    this->chunks[0].store(ptr + sizeof(simd8<T>) * 0);
+    this->chunks[1].store(ptr + sizeof(simd8<T>) * 1);
+    this->chunks[2].store(ptr + sizeof(simd8<T>) * 2);
+    this->chunks[3].store(ptr + sizeof(simd8<T>) * 3);
+  }
+
+  simdjson_inline simd8<T> reduce_or() const {
+    return (this->chunks[0] | this->chunks[1]) |
+           (this->chunks[2] | this->chunks[3]);
+  }
+
+  simdjson_inline uint64_t compress(uint64_t mask, T *output) const {
+    this->chunks[0].compress(uint16_t(mask), output);
+    this->chunks[1].compress(uint16_t(mask >> 16),
+                             output + 16 - count_ones(mask & 0xFFFF));
+    this->chunks[2].compress(uint16_t(mask >> 32),
+                             output + 32 - count_ones(mask & 0xFFFFFFFF));
+    this->chunks[3].compress(uint16_t(mask >> 48),
+                             output + 48 - count_ones(mask & 0xFFFFFFFFFFFF));
+    return 64 - count_ones(mask);
+  }
+
+  simdjson_inline uint64_t to_bitmask() const {
+    uint64_t r0 = uint32_t(this->chunks[0].to_bitmask());
+    uint64_t r1 = this->chunks[1].to_bitmask();
+    uint64_t r2 = this->chunks[2].to_bitmask();
+    uint64_t r3 = this->chunks[3].to_bitmask();
+    return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48);
+  }
+
+  simdjson_inline uint64_t eq(const T m) const {
+    const simd8<T> mask = simd8<T>::splat(m);
+    return simd8x64<bool>(this->chunks[0] == mask, this->chunks[1] == mask,
+                          this->chunks[2] == mask, this->chunks[3] == mask)
+        .to_bitmask();
+  }
+
+  simdjson_inline uint64_t eq(const simd8x64<uint8_t> &other) const {
+    return simd8x64<bool>(this->chunks[0] == other.chunks[0],
+                          this->chunks[1] == other.chunks[1],
+                          this->chunks[2] == other.chunks[2],
+                          this->chunks[3] == other.chunks[3])
+        .to_bitmask();
+  }
+
+  simdjson_inline uint64_t lteq(const T m) const {
+    const simd8<T> mask = simd8<T>::splat(m);
+    return simd8x64<bool>(this->chunks[0] <= mask, this->chunks[1] <= mask,
+                          this->chunks[2] <= mask, this->chunks[3] <= mask)
+        .to_bitmask();
+  }
+}; // struct simd8x64<T>
+
+} // namespace simd
+} // unnamed namespace
+} // namespace ppc64
+} // namespace simdjson
+
+#endif // SIMDJSON_PPC64_SIMD_INPUT_H
+/* end file include/simdjson/ppc64/simd.h */
+/* begin file include/simdjson/generic/jsoncharutils.h */
+
+namespace simdjson {
+namespace ppc64 {
+namespace {
+namespace jsoncharutils {
+
+// return non-zero if not a structural or whitespace char
+// zero otherwise
+simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) {
+  return internal::structural_or_whitespace_negated[c];
+}
+
+simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) {
+  return internal::structural_or_whitespace[c];
+}
+
+// returns a value with the high 16 bits set if not valid
+// otherwise returns the conversion of the 4 hex digits at src into the bottom
+// 16 bits of the 32-bit return register
+//
+// see
+// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/
+static inline uint32_t hex_to_u32_nocheck(
+    const uint8_t *src) { // strictly speaking, static inline is a C-ism
+  uint32_t v1 = internal::digit_to_val32[630 + src[0]];
+  uint32_t v2 = internal::digit_to_val32[420 + src[1]];
+  uint32_t v3 = internal::digit_to_val32[210 + src[2]];
+  uint32_t v4 = internal::digit_to_val32[0 + src[3]];
+  return v1 | v2 | v3 | v4;
+}
+
+// given a code point cp, writes to c
+// the utf-8 code, outputting the length in
+// bytes, if the length is zero, the code point
+// is invalid
+//
+// This can possibly be made faster using pdep
+// and clz and table lookups, but JSON documents
+// have few escaped code points, and the following
+// function looks cheap.
+//
+// Note: we assume that surrogates are treated separately
+//
+simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) {
+  if (cp <= 0x7F) {
+    c[0] = uint8_t(cp);
+    return 1; // ascii
+  }
+  if (cp <= 0x7FF) {
+    c[0] = uint8_t((cp >> 6) + 192);
+    c[1] = uint8_t((cp & 63) + 128);
+    return 2; // universal plane
+    //  Surrogates are treated elsewhere...
+    //} //else if (0xd800 <= cp && cp <= 0xdfff) {
+    //  return 0; // surrogates // could put assert here
+  } else if (cp <= 0xFFFF) {
+    c[0] = uint8_t((cp >> 12) + 224);
+    c[1] = uint8_t(((cp >> 6) & 63) + 128);
+    c[2] = uint8_t((cp & 63) + 128);
+    return 3;
+  } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this
+                               // is not needed
+    c[0] = uint8_t((cp >> 18) + 240);
+    c[1] = uint8_t(((cp >> 12) & 63) + 128);
+    c[2] = uint8_t(((cp >> 6) & 63) + 128);
+    c[3] = uint8_t((cp & 63) + 128);
+    return 4;
+  }
+  // will return 0 when the code point was too large.
+  return 0; // bad r
+}
+
+#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm
+// this is a slow emulation routine for 32-bit
+//
+static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) {
+  return x * (uint64_t)y;
+}
+static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) {
+  uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd);
+  uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd);
+  uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32));
+  uint64_t adbc_carry = !!(adbc < ad);
+  uint64_t lo = bd + (adbc << 32);
+  *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) +
+        (adbc_carry << 32) + !!(lo < bd);
+  return lo;
+}
+#endif
+
+using internal::value128;
+
+simdjson_inline value128 full_multiplication(uint64_t value1, uint64_t value2) {
+  value128 answer;
+#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS)
+#ifdef _M_ARM64
+  // ARM64 has native support for 64-bit multiplications, no need to emultate
+  answer.high = __umulh(value1, value2);
+  answer.low = value1 * value2;
+#else
+  answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64
+#endif // _M_ARM64
+#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS)
+  __uint128_t r = (static_cast<__uint128_t>(value1)) * value2;
+  answer.low = uint64_t(r);
+  answer.high = uint64_t(r >> 64);
+#endif
+  return answer;
+}
+
+} // namespace jsoncharutils
+} // unnamed namespace
+} // namespace ppc64
+} // namespace simdjson
+/* end file include/simdjson/generic/jsoncharutils.h */
+/* begin file include/simdjson/generic/atomparsing.h */
+namespace simdjson {
+namespace ppc64 {
+namespace {
+/// @private
+namespace atomparsing {
+
+// The string_to_uint32 is exclusively used to map literal strings to 32-bit values.
+// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot
+// be certain that the character pointer will be properly aligned.
+// You might think that using memcpy makes this function expensive, but you'd be wrong.
+// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false");
+// to the compile-time constant 1936482662.
+simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; }
+
+
+// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive.
+// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about.
+simdjson_warn_unused
+simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) {
+  uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++)
+  static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes");
+  std::memcpy(&srcval, src, sizeof(uint32_t));
+  return srcval ^ string_to_uint32(atom);
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_true_atom(const uint8_t *src) {
+  return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0;
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) {
+  if (len > 4) { return is_valid_true_atom(src); }
+  else if (len == 4) { return !str4ncmp(src, "true"); }
+  else { return false; }
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_false_atom(const uint8_t *src) {
+  return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0;
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) {
+  if (len > 5) { return is_valid_false_atom(src); }
+  else if (len == 5) { return !str4ncmp(src+1, "alse"); }
+  else { return false; }
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_null_atom(const uint8_t *src) {
+  return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0;
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) {
+  if (len > 4) { return is_valid_null_atom(src); }
+  else if (len == 4) { return !str4ncmp(src, "null"); }
+  else { return false; }
+}
+
+} // namespace atomparsing
+} // unnamed namespace
+} // namespace ppc64
+} // namespace simdjson
+/* end file include/simdjson/generic/atomparsing.h */
+/* begin file include/simdjson/ppc64/stringparsing.h */
+#ifndef SIMDJSON_PPC64_STRINGPARSING_H
+#define SIMDJSON_PPC64_STRINGPARSING_H
+
+
+namespace simdjson {
+namespace ppc64 {
+namespace {
+
+using namespace simd;
+
+// Holds backslashes and quotes locations.
+struct backslash_and_quote {
+public:
+  static constexpr uint32_t BYTES_PROCESSED = 32;
+  simdjson_inline static backslash_and_quote
+  copy_and_find(const uint8_t *src, uint8_t *dst);
+
+  simdjson_inline bool has_quote_first() {
+    return ((bs_bits - 1) & quote_bits) != 0;
+  }
+  simdjson_inline bool has_backslash() { return bs_bits != 0; }
+  simdjson_inline int quote_index() {
+    return trailing_zeroes(quote_bits);
+  }
+  simdjson_inline int backslash_index() {
+    return trailing_zeroes(bs_bits);
+  }
+
+  uint32_t bs_bits;
+  uint32_t quote_bits;
+}; // struct backslash_and_quote
+
+simdjson_inline backslash_and_quote
+backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) {
+  // this can read up to 31 bytes beyond the buffer size, but we require
+  // SIMDJSON_PADDING of padding
+  static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1),
+                "backslash and quote finder must process fewer than "
+                "SIMDJSON_PADDING bytes");
+  simd8<uint8_t> v0(src);
+  simd8<uint8_t> v1(src + sizeof(v0));
+  v0.store(dst);
+  v1.store(dst + sizeof(v0));
+
+  // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on
+  // PPC; therefore, we smash them together into a 64-byte mask and get the
+  // bitmask from there.
+  uint64_t bs_and_quote =
+      simd8x64<bool>(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask();
+  return {
+      uint32_t(bs_and_quote),      // bs_bits
+      uint32_t(bs_and_quote >> 32) // quote_bits
+  };
+}
+
+} // unnamed namespace
+} // namespace ppc64
+} // namespace simdjson
+
+#endif // SIMDJSON_PPC64_STRINGPARSING_H
+/* end file include/simdjson/ppc64/stringparsing.h */
+/* begin file include/simdjson/ppc64/numberparsing.h */
+#ifndef SIMDJSON_PPC64_NUMBERPARSING_H
+#define SIMDJSON_PPC64_NUMBERPARSING_H
+
+#if defined(__linux__)
+#include <byteswap.h>
+#elif defined(__FreeBSD__)
+#include <sys/endian.h>
+#endif
+
+namespace simdjson {
+namespace ppc64 {
+namespace {
+
+// we don't have appropriate instructions, so let us use a scalar function
+// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
+static simdjson_inline uint32_t
+parse_eight_digits_unrolled(const uint8_t *chars) {
+  uint64_t val;
+  std::memcpy(&val, chars, sizeof(uint64_t));
+#ifdef __BIG_ENDIAN__
+#if defined(__linux__)
+  val = bswap_64(val);
+#elif defined(__FreeBSD__)
+  val = bswap64(val);
+#endif
+#endif
+  val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8;
+  val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16;
+  return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32);
+}
+
+} // unnamed namespace
+} // namespace ppc64
+} // namespace simdjson
+
+#define SIMDJSON_SWAR_NUMBER_PARSING 1
+
+/* begin file include/simdjson/generic/numberparsing.h */
+#include <limits>
+
+namespace simdjson {
+namespace ppc64 {
+
+namespace ondemand {
+/**
+ * The type of a JSON number
+ */
+enum class number_type {
+    floating_point_number=1, /// a binary64 number
+    signed_integer,          /// a signed integer that fits in a 64-bit word using two's complement
+    unsigned_integer         /// a positive integer larger or equal to 1<<63
+};
+}
+
+namespace {
+/// @private
+namespace numberparsing {
+
+
+
+#ifdef JSON_TEST_NUMBERS
+#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR)
+#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE)))
+#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE)))
+#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE)))
+#else
+#define INVALID_NUMBER(SRC) (NUMBER_ERROR)
+#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE))
+#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE))
+#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE))
+#endif
+
+namespace {
+// Convert a mantissa, an exponent and a sign bit into an ieee64 double.
+// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable).
+// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed.
+simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) {
+    double d;
+    mantissa &= ~(1ULL << 52);
+    mantissa |= real_exponent << 52;
+    mantissa |= ((static_cast<uint64_t>(negative)) << 63);
+    std::memcpy(&d, &mantissa, sizeof(d));
+    return d;
+}
+}
+// Attempts to compute i * 10^(power) exactly; and if "negative" is
+// true, negate the result.
+// This function will only work in some cases, when it does not work, success is
+// set to false. This should work *most of the time* (like 99% of the time).
+// We assume that power is in the [smallest_power,
+// largest_power] interval: the caller is responsible for this check.
+simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) {
+  // we start with a fast path
+  // It was described in
+  // Clinger WD. How to read floating point numbers accurately.
+  // ACM SIGPLAN Notices. 1990
+#ifndef FLT_EVAL_METHOD
+#error "FLT_EVAL_METHOD should be defined, please include cfloat."
+#endif
+#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
+  // We cannot be certain that x/y is rounded to nearest.
+  if (0 <= power && power <= 22 && i <= 9007199254740991) {
+#else
+  if (-22 <= power && power <= 22 && i <= 9007199254740991) {
+#endif
+    // convert the integer into a double. This is lossless since
+    // 0 <= i <= 2^53 - 1.
+    d = double(i);
+    //
+    // The general idea is as follows.
+    // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then
+    // 1) Both s and p can be represented exactly as 64-bit floating-point
+    // values
+    // (binary64).
+    // 2) Because s and p can be represented exactly as floating-point values,
+    // then s * p
+    // and s / p will produce correctly rounded values.
+    //
+    if (power < 0) {
+      d = d / simdjson::internal::power_of_ten[-power];
+    } else {
+      d = d * simdjson::internal::power_of_ten[power];
+    }
+    if (negative) {
+      d = -d;
+    }
+    return true;
+  }
+  // When 22 < power && power <  22 + 16, we could
+  // hope for another, secondary fast path.  It was
+  // described by David M. Gay in  "Correctly rounded
+  // binary-decimal and decimal-binary conversions." (1990)
+  // If you need to compute i * 10^(22 + x) for x < 16,
+  // first compute i * 10^x, if you know that result is exact
+  // (e.g., when i * 10^x < 2^53),
+  // then you can still proceed and do (i * 10^x) * 10^22.
+  // Is this worth your time?
+  // You need  22 < power *and* power <  22 + 16 *and* (i * 10^(x-22) < 2^53)
+  // for this second fast path to work.
+  // If you you have 22 < power *and* power <  22 + 16, and then you
+  // optimistically compute "i * 10^(x-22)", there is still a chance that you
+  // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of
+  // this optimization maybe less common than we would like. Source:
+  // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/
+  // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html
+
+  // The fast path has now failed, so we are failing back on the slower path.
+
+  // In the slow path, we need to adjust i so that it is > 1<<63 which is always
+  // possible, except if i == 0, so we handle i == 0 separately.
+  if(i == 0) {
+    d = negative ? -0.0 : 0.0;
+    return true;
+  }
+
+
+  // The exponent is 1024 + 63 + power
+  //     + floor(log(5**power)/log(2)).
+  // The 1024 comes from the ieee64 standard.
+  // The 63 comes from the fact that we use a 64-bit word.
+  //
+  // Computing floor(log(5**power)/log(2)) could be
+  // slow. Instead we use a fast function.
+  //
+  // For power in (-400,350), we have that
+  // (((152170 + 65536) * power ) >> 16);
+  // is equal to
+  //  floor(log(5**power)/log(2)) + power when power >= 0
+  // and it is equal to
+  //  ceil(log(5**-power)/log(2)) + power when power < 0
+  //
+  // The 65536 is (1<<16) and corresponds to
+  // (65536 * power) >> 16 ---> power
+  //
+  // ((152170 * power ) >> 16) is equal to
+  // floor(log(5**power)/log(2))
+  //
+  // Note that this is not magic: 152170/(1<<16) is
+  // approximatively equal to log(5)/log(2).
+  // The 1<<16 value is a power of two; we could use a
+  // larger power of 2 if we wanted to.
+  //
+  int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63;
+
+
+  // We want the most significant bit of i to be 1. Shift if needed.
+  int lz = leading_zeroes(i);
+  i <<= lz;
+
+
+  // We are going to need to do some 64-bit arithmetic to get a precise product.
+  // We use a table lookup approach.
+  // It is safe because
+  // power >= smallest_power
+  // and power <= largest_power
+  // We recover the mantissa of the power, it has a leading 1. It is always
+  // rounded down.
+  //
+  // We want the most significant 64 bits of the product. We know
+  // this will be non-zero because the most significant bit of i is
+  // 1.
+  const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power);
+  // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.)
+  //
+  // The full_multiplication function computes the 128-bit product of two 64-bit words
+  // with a returned value of type value128 with a "low component" corresponding to the
+  // 64-bit least significant bits of the product and with a "high component" corresponding
+  // to the 64-bit most significant bits of the product.
+  simdjson::internal::value128 firstproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index]);
+  // Both i and power_of_five_128[index] have their most significant bit set to 1 which
+  // implies that the either the most or the second most significant bit of the product
+  // is 1. We pack values in this manner for efficiency reasons: it maximizes the use
+  // we make of the product. It also makes it easy to reason about the product: there
+  // is 0 or 1 leading zero in the product.
+
+  // Unless the least significant 9 bits of the high (64-bit) part of the full
+  // product are all 1s, then we know that the most significant 55 bits are
+  // exact and no further work is needed. Having 55 bits is necessary because
+  // we need 53 bits for the mantissa but we have to have one rounding bit and
+  // we can waste a bit if the most significant bit of the product is zero.
+  if((firstproduct.high & 0x1FF) == 0x1FF) {
+    // We want to compute i * 5^q, but only care about the top 55 bits at most.
+    // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing
+    // the full computation is wasteful. So we do what is called a "truncated
+    // multiplication".
+    // We take the most significant 64-bits, and we put them in
+    // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q
+    // to the desired approximation using one multiplication. Sometimes it does not suffice.
+    // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and
+    // then we get a better approximation to i * 5^q. In very rare cases, even that
+    // will not suffice, though it is seemingly very hard to find such a scenario.
+    //
+    // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat
+    // more complicated.
+    //
+    // There is an extra layer of complexity in that we need more than 55 bits of
+    // accuracy in the round-to-even scenario.
+    //
+    // The full_multiplication function computes the 128-bit product of two 64-bit words
+    // with a returned value of type value128 with a "low component" corresponding to the
+    // 64-bit least significant bits of the product and with a "high component" corresponding
+    // to the 64-bit most significant bits of the product.
+    simdjson::internal::value128 secondproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]);
+    firstproduct.low += secondproduct.high;
+    if(secondproduct.high > firstproduct.low) { firstproduct.high++; }
+    // At this point, we might need to add at most one to firstproduct, but this
+    // can only change the value of firstproduct.high if firstproduct.low is maximal.
+    if(simdjson_unlikely(firstproduct.low  == 0xFFFFFFFFFFFFFFFF)) {
+      // This is very unlikely, but if so, we need to do much more work!
+      return false;
+    }
+  }
+  uint64_t lower = firstproduct.low;
+  uint64_t upper = firstproduct.high;
+  // The final mantissa should be 53 bits with a leading 1.
+  // We shift it so that it occupies 54 bits with a leading 1.
+  ///////
+  uint64_t upperbit = upper >> 63;
+  uint64_t mantissa = upper >> (upperbit + 9);
+  lz += int(1 ^ upperbit);
+
+  // Here we have mantissa < (1<<54).
+  int64_t real_exponent = exponent - lz;
+  if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal?
+    // Here have that real_exponent <= 0 so -real_exponent >= 0
+    if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure.
+      d = negative ? -0.0 : 0.0;
+      return true;
+    }
+    // next line is safe because -real_exponent + 1 < 0
+    mantissa >>= -real_exponent + 1;
+    // Thankfully, we can't have both "round-to-even" and subnormals because
+    // "round-to-even" only occurs for powers close to 0.
+    mantissa += (mantissa & 1); // round up
+    mantissa >>= 1;
+    // There is a weird scenario where we don't have a subnormal but just.
+    // Suppose we start with 2.2250738585072013e-308, we end up
+    // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal
+    // whereas 0x40000000000000 x 2^-1023-53  is normal. Now, we need to round
+    // up 0x3fffffffffffff x 2^-1023-53  and once we do, we are no longer
+    // subnormal, but we can only know this after rounding.
+    // So we only declare a subnormal if we are smaller than the threshold.
+    real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1;
+    d = to_double(mantissa, real_exponent, negative);
+    return true;
+  }
+  // We have to round to even. The "to even" part
+  // is only a problem when we are right in between two floats
+  // which we guard against.
+  // If we have lots of trailing zeros, we may fall right between two
+  // floating-point values.
+  //
+  // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54]
+  // times a power of two. That is, it is right between a number with binary significand
+  // m and another number with binary significand m+1; and it must be the case
+  // that it cannot be represented by a float itself.
+  //
+  // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p.
+  // Recall that 10^q = 5^q * 2^q.
+  // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that
+  //  5^23 <=  2^54 and it is the last power of five to qualify, so q <= 23.
+  // When q<0, we have  w  >=  (2m+1) x 5^{-q}.  We must have that w<2^{64} so
+  // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have
+  // 2^{53} x 5^{-q} < 2^{64}.
+  // Hence we have 5^{-q} < 2^{11}$ or q>= -4.
+  //
+  // We require lower <= 1 and not lower == 0 because we could not prove that
+  // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test.
+  if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) {
+    if((mantissa  << (upperbit + 64 - 53 - 2)) ==  upper) {
+      mantissa &= ~1;             // flip it so that we do not round up
+    }
+  }
+
+  mantissa += mantissa & 1;
+  mantissa >>= 1;
+
+  // Here we have mantissa < (1<<53), unless there was an overflow
+  if (mantissa >= (1ULL << 53)) {
+    //////////
+    // This will happen when parsing values such as 7.2057594037927933e+16
+    ////////
+    mantissa = (1ULL << 52);
+    real_exponent++;
+  }
+  mantissa &= ~(1ULL << 52);
+  // we have to check that real_exponent is in range, otherwise we bail out
+  if (simdjson_unlikely(real_exponent > 2046)) {
+    // We have an infinite value!!! We could actually throw an error here if we could.
+    return false;
+  }
+  d = to_double(mantissa, real_exponent, negative);
+  return true;
+}
+
+// We call a fallback floating-point parser that might be slow. Note
+// it will accept JSON numbers, but the JSON spec. is more restrictive so
+// before you call parse_float_fallback, you need to have validated the input
+// string with the JSON grammar.
+// It will return an error (false) if the parsed number is infinite.
+// The string parsing itself always succeeds. We know that there is at least
+// one digit.
+static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) {
+  *outDouble = simdjson::internal::from_chars(reinterpret_cast<const char *>(ptr));
+  // We do not accept infinite values.
+
+  // Detecting finite values in a portable manner is ridiculously hard, ideally
+  // we would want to do:
+  // return !std::isfinite(*outDouble);
+  // but that mysteriously fails under legacy/old libc++ libraries, see
+  // https://github.com/simdjson/simdjson/issues/1286
+  //
+  // Therefore, fall back to this solution (the extra parens are there
+  // to handle that max may be a macro on windows).
+  return !(*outDouble > (std::numeric_limits<double>::max)() || *outDouble < std::numeric_limits<double>::lowest());
+}
+static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) {
+  *outDouble = simdjson::internal::from_chars(reinterpret_cast<const char *>(ptr), reinterpret_cast<const char *>(end_ptr));
+  // We do not accept infinite values.
+
+  // Detecting finite values in a portable manner is ridiculously hard, ideally
+  // we would want to do:
+  // return !std::isfinite(*outDouble);
+  // but that mysteriously fails under legacy/old libc++ libraries, see
+  // https://github.com/simdjson/simdjson/issues/1286
+  //
+  // Therefore, fall back to this solution (the extra parens are there
+  // to handle that max may be a macro on windows).
+  return !(*outDouble > (std::numeric_limits<double>::max)() || *outDouble < std::numeric_limits<double>::lowest());
+}
+
+// check quickly whether the next 8 chars are made of digits
+// at a glance, it looks better than Mula's
+// http://0x80.pl/articles/swar-digits-validate.html
+simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) {
+  uint64_t val;
+  // this can read up to 7 bytes beyond the buffer size, but we require
+  // SIMDJSON_PADDING of padding
+  static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7");
+  std::memcpy(&val, chars, 8);
+  // a branchy method might be faster:
+  // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030)
+  //  && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) ==
+  //  0x3030303030303030);
+  return (((val & 0xF0F0F0F0F0F0F0F0) |
+           (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) ==
+          0x3333333333333333);
+}
+
+template<typename W>
+error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) {
+  double d;
+  if (parse_float_fallback(src, &d)) {
+    writer.append_double(d);
+    return SUCCESS;
+  }
+  return INVALID_NUMBER(src);
+}
+
+template<typename I>
+SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later
+simdjson_inline bool parse_digit(const uint8_t c, I &i) {
+  const uint8_t digit = static_cast<uint8_t>(c - '0');
+  if (digit > 9) {
+    return false;
+  }
+  // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication
+  i = 10 * i + digit; // might overflow, we will handle the overflow later
+  return true;
+}
+
+simdjson_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) {
+  // we continue with the fiction that we have an integer. If the
+  // floating point number is representable as x * 10^z for some integer
+  // z that fits in 53 bits, then we will be able to convert back the
+  // the integer into a float in a lossless manner.
+  const uint8_t *const first_after_period = p;
+
+#ifdef SIMDJSON_SWAR_NUMBER_PARSING
+#if SIMDJSON_SWAR_NUMBER_PARSING
+  // this helps if we have lots of decimals!
+  // this turns out to be frequent enough.
+  if (is_made_of_eight_digits_fast(p)) {
+    i = i * 100000000 + parse_eight_digits_unrolled(p);
+    p += 8;
+  }
+#endif // SIMDJSON_SWAR_NUMBER_PARSING
+#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING
+  // Unrolling the first digit makes a small difference on some implementations (e.g. westmere)
+  if (parse_digit(*p, i)) { ++p; }
+  while (parse_digit(*p, i)) { p++; }
+  exponent = first_after_period - p;
+  // Decimal without digits (123.) is illegal
+  if (exponent == 0) {
+    return INVALID_NUMBER(src);
+  }
+  return SUCCESS;
+}
+
+simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) {
+  // Exp Sign: -123.456e[-]78
+  bool neg_exp = ('-' == *p);
+  if (neg_exp || '+' == *p) { p++; } // Skip + as well
+
+  // Exponent: -123.456e-[78]
+  auto start_exp = p;
+  int64_t exp_number = 0;
+  while (parse_digit(*p, exp_number)) { ++p; }
+  // It is possible for parse_digit to overflow.
+  // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN.
+  // Thus we *must* check for possible overflow before we negate exp_number.
+
+  // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into
+  // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may
+  // not oblige and may, in fact, generate two distinct paths in any case. It might be
+  // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off
+  // instructions for a simdjson_likely branch, an unconclusive gain.
+
+  // If there were no digits, it's an error.
+  if (simdjson_unlikely(p == start_exp)) {
+    return INVALID_NUMBER(src);
+  }
+  // We have a valid positive exponent in exp_number at this point, except that
+  // it may have overflowed.
+
+  // If there were more than 18 digits, we may have overflowed the integer. We have to do
+  // something!!!!
+  if (simdjson_unlikely(p > start_exp+18)) {
+    // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow
+    while (*start_exp == '0') { start_exp++; }
+    // 19 digits could overflow int64_t and is kind of absurd anyway. We don't
+    // support exponents smaller than -999,999,999,999,999,999 and bigger
+    // than 999,999,999,999,999,999.
+    // We can truncate.
+    // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before
+    // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could
+    // truncate at 324.
+    // Note that there is no reason to fail per se at this point in time.
+    // E.g., 0e999999999999999999999 is a fine number.
+    if (p > start_exp+18) { exp_number = 999999999999999999; }
+  }
+  // At this point, we know that exp_number is a sane, positive, signed integer.
+  // It is <= 999,999,999,999,999,999. As long as 'exponent' is in
+  // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent'
+  // is bounded in magnitude by the size of the JSON input, we are fine in this universe.
+  // To sum it up: the next line should never overflow.
+  exponent += (neg_exp ? -exp_number : exp_number);
+  return SUCCESS;
+}
+
+simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) {
+  // It is possible that the integer had an overflow.
+  // We have to handle the case where we have 0.0000somenumber.
+  const uint8_t *start = start_digits;
+  while ((*start == '0') || (*start == '.')) { ++start; }
+  // we over-decrement by one when there is a '.'
+  return digit_count - size_t(start - start_digits);
+}
+
+template<typename W>
+simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) {
+  // If we frequently had to deal with long strings of digits,
+  // we could extend our code by using a 128-bit integer instead
+  // of a 64-bit integer. However, this is uncommon in practice.
+  //
+  // 9999999999999999999 < 2**64 so we can accommodate 19 digits.
+  // If we have a decimal separator, then digit_count - 1 is the number of digits, but we
+  // may not have a decimal separator!
+  if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) {
+    // Ok, chances are good that we had an overflow!
+    // this is almost never going to get called!!!
+    // we start anew, going slowly!!!
+    // This will happen in the following examples:
+    // 10000000000000000000000000000000000000000000e+308
+    // 3.1415926535897932384626433832795028841971693993751
+    //
+    // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens
+    // because slow_float_parsing is a non-inlined function. If we passed our writer reference to
+    // it, it would force it to be stored in memory, preventing the compiler from picking it apart
+    // and putting into registers. i.e. if we pass it as reference, it gets slow.
+    // This is what forces the skip_double, as well.
+    error_code error = slow_float_parsing(src, writer);
+    writer.skip_double();
+    return error;
+  }
+  // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other
+  // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331
+  // To future reader: we'd love if someone found a better way, or at least could explain this result!
+  if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) {
+    //
+    // Important: smallest_power is such that it leads to a zero value.
+    // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero
+    // so something x 10^-343 goes to zero, but not so with  something x 10^-342.
+    static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough");
+    //
+    if((exponent < simdjson::internal::smallest_power) || (i == 0)) {
+      // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero
+      WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer);
+      return SUCCESS;
+    } else { // (exponent > largest_power) and (i != 0)
+      // We have, for sure, an infinite value and simdjson refuses to parse infinite values.
+      return INVALID_NUMBER(src);
+    }
+  }
+  double d;
+  if (!compute_float_64(exponent, i, negative, d)) {
+    // we are almost never going to get here.
+    if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); }
+  }
+  WRITE_DOUBLE(d, src, writer);
+  return SUCCESS;
+}
+
+// for performance analysis, it is sometimes  useful to skip parsing
+#ifdef SIMDJSON_SKIPNUMBERPARSING
+
+template<typename W>
+simdjson_inline error_code parse_number(const uint8_t *const, W &writer) {
+  writer.append_s64(0);        // always write zero
+  return SUCCESS;              // always succeeds
+}
+
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer_in_string(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<double> parse_double_in_string(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept  { return false; }
+simdjson_unused simdjson_inline simdjson_result<bool> is_integer(const uint8_t * src) noexcept  { return false; }
+simdjson_unused simdjson_inline simdjson_result<ondemand::number_type> get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; }
+#else
+
+// parse the number at src
+// define JSON_TEST_NUMBERS for unit testing
+//
+// It is assumed that the number is followed by a structural ({,},],[) character
+// or a white space character. If that is not the case (e.g., when the JSON
+// document is made of a single number), then it is necessary to copy the
+// content and append a space before calling this function.
+//
+// Our objective is accurate parsing (ULP of 0) at high speed.
+template<typename W>
+simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) {
+
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  const uint8_t *p = src + uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); }
+
+  //
+  // Handle floats if there is a . or e (or both)
+  //
+  int64_t exponent = 0;
+  bool is_float = false;
+  if ('.' == *p) {
+    is_float = true;
+    ++p;
+    SIMDJSON_TRY( parse_decimal(src, p, i, exponent) );
+    digit_count = int(p - start_digits); // used later to guard against overflows
+  }
+  if (('e' == *p) || ('E' == *p)) {
+    is_float = true;
+    ++p;
+    SIMDJSON_TRY( parse_exponent(src, p, exponent) );
+  }
+  if (is_float) {
+    const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p);
+    SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) );
+    if (dirty_end) { return INVALID_NUMBER(src); }
+    return SUCCESS;
+  }
+
+  // The longest negative 64-bit number is 19 digits.
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  size_t longest_digit_count = negative ? 19 : 20;
+  if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); }
+  if (digit_count == longest_digit_count) {
+    if (negative) {
+      // Anything negative above INT64_MAX+1 is invalid
+      if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src);  }
+      WRITE_INTEGER(~i+1, src, writer);
+      if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); }
+      return SUCCESS;
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    }  else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); }
+  }
+
+  // Write unsigned if it doesn't fit in a signed integer.
+  if (i > uint64_t(INT64_MAX)) {
+    WRITE_UNSIGNED(i, src, writer);
+  } else {
+    WRITE_INTEGER(negative ? (~i+1) : i, src, writer);
+  }
+  if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); }
+  return SUCCESS;
+}
+
+// Inlineable functions
+namespace {
+
+// This table can be used to characterize the final character of an integer
+// string. For JSON structural character and allowable white space characters,
+// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise
+// we return NUMBER_ERROR.
+// Optimization note: we could easily reduce the size of the table by half (to 128)
+// at the cost of an extra branch.
+// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits):
+static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast");
+static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast");
+static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast");
+
+const uint8_t integer_string_finisher[256] = {
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, SUCCESS,
+    SUCCESS,      NUMBER_ERROR,   NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   SUCCESS,      NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, SUCCESS,
+    NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, SUCCESS,        NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    SUCCESS,      NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR};
+
+// Parse any number from 0 to 18,446,744,073,709,551,615
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src) noexcept {
+  const uint8_t *p = src;
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > 20))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+
+  if (digit_count == 20) {
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
+  }
+
+  return i;
+}
+
+
+// Parse any number from 0 to 18,446,744,073,709,551,615
+// Never read at src_end or beyond
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept {
+  const uint8_t *p = src;
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while ((p != src_end) && parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > 20))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+
+  if (digit_count == 20) {
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
+  }
+
+  return i;
+}
+
+// Parse any number from 0 to 18,446,744,073,709,551,615
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned_in_string(const uint8_t * const src) noexcept {
+  const uint8_t *p = src + 1;
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > 20))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if (*p != '"') { return NUMBER_ERROR; }
+
+  if (digit_count == 20) {
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    // Note: we use src[1] and not src[0] because src[0] is the quote character in this
+    // instance.
+    if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
+  }
+
+  return i;
+}
+
+// Parse any number from  -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t *src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  const uint8_t *p = src + uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // We go from
+  // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+  // so we can never represent numbers that have more than 19 digits.
+  size_t longest_digit_count = 19;
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > longest_digit_count))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+  // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
+  // Performance note: This check is only needed when digit_count == longest_digit_count but it is
+  // so cheap that we might as well always make it.
+  if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
+  return negative ? (~i+1) : i;
+}
+
+// Parse any number from  -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+// Never read at src_end or beyond
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept {
+  //
+  // Check for minus sign
+  //
+  if(src == src_end) { return NUMBER_ERROR; }
+  bool negative = (*src == '-');
+  const uint8_t *p = src + uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while ((p != src_end) && parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // We go from
+  // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+  // so we can never represent numbers that have more than 19 digits.
+  size_t longest_digit_count = 19;
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > longest_digit_count))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+  // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
+  // Performance note: This check is only needed when digit_count == longest_digit_count but it is
+  // so cheap that we might as well always make it.
+  if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
+  return negative ? (~i+1) : i;
+}
+
+// Parse any number from  -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer_in_string(const uint8_t *src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*(src + 1) == '-');
+  src += uint8_t(negative) + 1;
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = src;
+  uint64_t i = 0;
+  while (parse_digit(*src, i)) { src++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(src - start_digits);
+  // We go from
+  // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+  // so we can never represent numbers that have more than 19 digits.
+  size_t longest_digit_count = 19;
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > longest_digit_count))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*src)) {
+  //  return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if(*src != '"') { return NUMBER_ERROR; }
+  // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
+  // Performance note: This check is only needed when digit_count == longest_digit_count but it is
+  // so cheap that we might as well always make it.
+  if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
+  return negative ? (~i+1) : i;
+}
+
+simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  uint64_t i = 0;
+  const uint8_t *p = src;
+  p += parse_digit(*p, i);
+  bool leading_zero = (i == 0);
+  while (parse_digit(*p, i)) { p++; }
+  // no integer digits, or 0123 (zero must be solo)
+  if ( p == src ) { return INCORRECT_TYPE; }
+  if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; }
+
+  //
+  // Parse the decimal part.
+  //
+  int64_t exponent = 0;
+  bool overflow;
+  if (simdjson_likely(*p == '.')) {
+    p++;
+    const uint8_t *start_decimal_digits = p;
+    if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits
+    p++;
+    while (parse_digit(*p, i)) { p++; }
+    exponent = -(p - start_decimal_digits);
+
+    // Overflow check. More than 19 digits (minus the decimal) may be overflow.
+    overflow = p-src-1 > 19;
+    if (simdjson_unlikely(overflow && leading_zero)) {
+      // Skip leading 0.00000 and see if it still overflows
+      const uint8_t *start_digits = src + 2;
+      while (*start_digits == '0') { start_digits++; }
+      overflow = start_digits-src > 19;
+    }
+  } else {
+    overflow = p-src > 19;
+  }
+
+  //
+  // Parse the exponent
+  //
+  if (*p == 'e' || *p == 'E') {
+    p++;
+    bool exp_neg = *p == '-';
+    p += exp_neg || *p == '+';
+
+    uint64_t exp = 0;
+    const uint8_t *start_exp_digits = p;
+    while (parse_digit(*p, exp)) { p++; }
+    // no exp digits, or 20+ exp digits
+    if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }
+
+    exponent += exp_neg ? 0-exp : exp;
+  }
+
+  if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; }
+
+  overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power;
+
+  //
+  // Assemble (or slow-parse) the float
+  //
+  double d;
+  if (simdjson_likely(!overflow)) {
+    if (compute_float_64(exponent, i, negative, d)) { return d; }
+  }
+  if (!parse_float_fallback(src - uint8_t(negative), &d)) {
+    return NUMBER_ERROR;
+  }
+  return d;
+}
+
+simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept {
+  return (*src == '-');
+}
+
+simdjson_unused simdjson_inline simdjson_result<bool> is_integer(const uint8_t * src) noexcept {
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+  const uint8_t *p = src;
+  while(static_cast<uint8_t>(*p - '0') <= 9) { p++; }
+  if ( p == src ) { return NUMBER_ERROR; }
+  if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; }
+  return false;
+}
+
+simdjson_unused simdjson_inline simdjson_result<ondemand::number_type> get_number_type(const uint8_t * src) noexcept {
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+  const uint8_t *p = src;
+  while(static_cast<uint8_t>(*p - '0') <= 9) { p++; }
+  if ( p == src ) { return NUMBER_ERROR; }
+  if (jsoncharutils::is_structural_or_whitespace(*p)) {
+    // We have an integer.
+    // If the number is negative and valid, it must be a signed integer.
+    if(negative) { return ondemand::number_type::signed_integer; }
+    // We want values larger or equal to 9223372036854775808 to be unsigned
+    // integers, and the other values to be signed integers.
+    int digit_count = int(p - src);
+    if(digit_count >= 19) {
+      const uint8_t * smaller_big_integer = reinterpret_cast<const uint8_t *>("9223372036854775808");
+      if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) {
+        return ondemand::number_type::unsigned_integer;
+      }
+    }
+    return ondemand::number_type::signed_integer;
+  }
+  // Hopefully, we have 'e' or 'E' or '.'.
+  return ondemand::number_type::floating_point_number;
+}
+
+// Never read at src_end or beyond
+simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept {
+  if(src == src_end) { return NUMBER_ERROR; }
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  uint64_t i = 0;
+  const uint8_t *p = src;
+  if(p == src_end) { return NUMBER_ERROR; }
+  p += parse_digit(*p, i);
+  bool leading_zero = (i == 0);
+  while ((p != src_end) && parse_digit(*p, i)) { p++; }
+  // no integer digits, or 0123 (zero must be solo)
+  if ( p == src ) { return INCORRECT_TYPE; }
+  if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; }
+
+  //
+  // Parse the decimal part.
+  //
+  int64_t exponent = 0;
+  bool overflow;
+  if (simdjson_likely((p != src_end) && (*p == '.'))) {
+    p++;
+    const uint8_t *start_decimal_digits = p;
+    if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits
+    p++;
+    while ((p != src_end) && parse_digit(*p, i)) { p++; }
+    exponent = -(p - start_decimal_digits);
+
+    // Overflow check. More than 19 digits (minus the decimal) may be overflow.
+    overflow = p-src-1 > 19;
+    if (simdjson_unlikely(overflow && leading_zero)) {
+      // Skip leading 0.00000 and see if it still overflows
+      const uint8_t *start_digits = src + 2;
+      while (*start_digits == '0') { start_digits++; }
+      overflow = start_digits-src > 19;
+    }
+  } else {
+    overflow = p-src > 19;
+  }
+
+  //
+  // Parse the exponent
+  //
+  if ((p != src_end) && (*p == 'e' || *p == 'E')) {
+    p++;
+    if(p == src_end) { return NUMBER_ERROR; }
+    bool exp_neg = *p == '-';
+    p += exp_neg || *p == '+';
+
+    uint64_t exp = 0;
+    const uint8_t *start_exp_digits = p;
+    while ((p != src_end) && parse_digit(*p, exp)) { p++; }
+    // no exp digits, or 20+ exp digits
+    if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }
+
+    exponent += exp_neg ? 0-exp : exp;
+  }
+
+  if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; }
+
+  overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power;
+
+  //
+  // Assemble (or slow-parse) the float
+  //
+  double d;
+  if (simdjson_likely(!overflow)) {
+    if (compute_float_64(exponent, i, negative, d)) { return d; }
+  }
+  if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) {
+    return NUMBER_ERROR;
+  }
+  return d;
+}
+
+simdjson_unused simdjson_inline simdjson_result<double> parse_double_in_string(const uint8_t * src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*(src + 1) == '-');
+  src += uint8_t(negative) + 1;
+
+  //
+  // Parse the integer part.
+  //
+  uint64_t i = 0;
+  const uint8_t *p = src;
+  p += parse_digit(*p, i);
+  bool leading_zero = (i == 0);
+  while (parse_digit(*p, i)) { p++; }
+  // no integer digits, or 0123 (zero must be solo)
+  if ( p == src ) { return INCORRECT_TYPE; }
+  if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; }
+
+  //
+  // Parse the decimal part.
+  //
+  int64_t exponent = 0;
+  bool overflow;
+  if (simdjson_likely(*p == '.')) {
+    p++;
+    const uint8_t *start_decimal_digits = p;
+    if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits
+    p++;
+    while (parse_digit(*p, i)) { p++; }
+    exponent = -(p - start_decimal_digits);
+
+    // Overflow check. More than 19 digits (minus the decimal) may be overflow.
+    overflow = p-src-1 > 19;
+    if (simdjson_unlikely(overflow && leading_zero)) {
+      // Skip leading 0.00000 and see if it still overflows
+      const uint8_t *start_digits = src + 2;
+      while (*start_digits == '0') { start_digits++; }
+      overflow = start_digits-src > 19;
+    }
+  } else {
+    overflow = p-src > 19;
+  }
+
+  //
+  // Parse the exponent
+  //
+  if (*p == 'e' || *p == 'E') {
+    p++;
+    bool exp_neg = *p == '-';
+    p += exp_neg || *p == '+';
+
+    uint64_t exp = 0;
+    const uint8_t *start_exp_digits = p;
+    while (parse_digit(*p, exp)) { p++; }
+    // no exp digits, or 20+ exp digits
+    if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }
+
+    exponent += exp_neg ? 0-exp : exp;
+  }
+
+  if (*p != '"') { return NUMBER_ERROR; }
+
+  overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power;
+
+  //
+  // Assemble (or slow-parse) the float
+  //
+  double d;
+  if (simdjson_likely(!overflow)) {
+    if (compute_float_64(exponent, i, negative, d)) { return d; }
+  }
+  if (!parse_float_fallback(src - uint8_t(negative), &d)) {
+    return NUMBER_ERROR;
+  }
+  return d;
+}
+} //namespace {}
+#endif // SIMDJSON_SKIPNUMBERPARSING
+
+} // namespace numberparsing
+} // unnamed namespace
+} // namespace ppc64
+} // namespace simdjson
+/* end file include/simdjson/generic/numberparsing.h */
+
+#endif // SIMDJSON_PPC64_NUMBERPARSING_H
+/* end file include/simdjson/ppc64/numberparsing.h */
+/* begin file include/simdjson/ppc64/end.h */
+/* end file include/simdjson/ppc64/end.h */
+
+#endif // SIMDJSON_IMPLEMENTATION_PPC64
+
+#endif // SIMDJSON_PPC64_H
+/* end file include/simdjson/ppc64.h */
+/* begin file include/simdjson/westmere.h */
+#ifndef SIMDJSON_WESTMERE_H
+#define SIMDJSON_WESTMERE_H
+
+
+#if SIMDJSON_IMPLEMENTATION_WESTMERE
+
+#if SIMDJSON_CAN_ALWAYS_RUN_WESTMERE
+#define SIMDJSON_TARGET_WESTMERE
+#define SIMDJSON_UNTARGET_WESTMERE
+#else
+#define SIMDJSON_TARGET_WESTMERE SIMDJSON_TARGET_REGION("sse4.2,pclmul")
+#define SIMDJSON_UNTARGET_WESTMERE SIMDJSON_UNTARGET_REGION
+#endif
+
+namespace simdjson {
+/**
+ * Implementation for Westmere (Intel SSE4.2).
+ */
+namespace westmere {
+} // namespace westmere
+} // namespace simdjson
+
+//
+// These two need to be included outside SIMDJSON_TARGET_WESTMERE
+//
+/* begin file include/simdjson/westmere/implementation.h */
+#ifndef SIMDJSON_WESTMERE_IMPLEMENTATION_H
+#define SIMDJSON_WESTMERE_IMPLEMENTATION_H
+
+
+// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE
+namespace simdjson {
+namespace westmere {
+
+namespace {
+using namespace simdjson;
+using namespace simdjson::dom;
+}
+
+class implementation final : public simdjson::implementation {
+public:
+  simdjson_inline implementation() : simdjson::implementation("westmere", "Intel/AMD SSE4.2", internal::instruction_set::SSE42 | internal::instruction_set::PCLMULQDQ) {}
+  simdjson_warn_unused error_code create_dom_parser_implementation(
+    size_t capacity,
+    size_t max_length,
+    std::unique_ptr<internal::dom_parser_implementation>& dst
+  ) const noexcept final;
+  simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final;
+  simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final;
+};
+
+} // namespace westmere
+} // namespace simdjson
+
+#endif // SIMDJSON_WESTMERE_IMPLEMENTATION_H
+/* end file include/simdjson/westmere/implementation.h */
+/* begin file include/simdjson/westmere/intrinsics.h */
+#ifndef SIMDJSON_WESTMERE_INTRINSICS_H
+#define SIMDJSON_WESTMERE_INTRINSICS_H
+
+#ifdef SIMDJSON_VISUAL_STUDIO
+// under clang within visual studio, this will include <x86intrin.h>
+#include <intrin.h> // visual studio or clang
+#else
+#include <x86intrin.h> // elsewhere
+#endif // SIMDJSON_VISUAL_STUDIO
+
+
+#ifdef SIMDJSON_CLANG_VISUAL_STUDIO
+/**
+ * You are not supposed, normally, to include these
+ * headers directly. Instead you should either include intrin.h
+ * or x86intrin.h. However, when compiling with clang
+ * under Windows (i.e., when _MSC_VER is set), these headers
+ * only get included *if* the corresponding features are detected
+ * from macros:
+ */
+#include <smmintrin.h>  // for _mm_alignr_epi8
+#include <wmmintrin.h>  // for  _mm_clmulepi64_si128
+#endif
+
+static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for westmere");
+
+#endif // SIMDJSON_WESTMERE_INTRINSICS_H
+/* end file include/simdjson/westmere/intrinsics.h */
+
+//
+// The rest need to be inside the region
+//
+/* begin file include/simdjson/westmere/begin.h */
+// redefining SIMDJSON_IMPLEMENTATION to "westmere"
+// #define SIMDJSON_IMPLEMENTATION westmere
+SIMDJSON_TARGET_WESTMERE
+/* end file include/simdjson/westmere/begin.h */
+
+// Declarations
+/* begin file include/simdjson/generic/dom_parser_implementation.h */
+
+namespace simdjson {
+namespace westmere {
+
+// expectation: sizeof(open_container) = 64/8.
+struct open_container {
+  uint32_t tape_index; // where, on the tape, does the scope ([,{) begins
+  uint32_t count; // how many elements in the scope
+}; // struct open_container
+
+static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits");
+
+class dom_parser_implementation final : public internal::dom_parser_implementation {
+public:
+  /** Tape location of each open { or [ */
+  std::unique_ptr<open_container[]> open_containers{};
+  /** Whether each open container is a [ or { */
+  std::unique_ptr<bool[]> is_array{};
+  /** Buffer passed to stage 1 */
+  const uint8_t *buf{};
+  /** Length passed to stage 1 */
+  size_t len{0};
+  /** Document passed to stage 2 */
+  dom::document *doc{};
+
+  inline dom_parser_implementation() noexcept;
+  inline dom_parser_implementation(dom_parser_implementation &&other) noexcept;
+  inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept;
+  dom_parser_implementation(const dom_parser_implementation &) = delete;
+  dom_parser_implementation &operator=(const dom_parser_implementation &) = delete;
+
+  simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final;
+  simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final;
+  simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final;
+  simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final;
+  simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst) const noexcept final;
+  inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final;
+  inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final;
+private:
+  simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity);
+
+};
+
+} // namespace westmere
+} // namespace simdjson
+
+namespace simdjson {
+namespace westmere {
+
+inline dom_parser_implementation::dom_parser_implementation() noexcept = default;
+inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default;
+inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default;
+
+// Leaving these here so they can be inlined if so desired
+inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept {
+  if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; }
+  // Stage 1 index output
+  size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7;
+  structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] );
+  if (!structural_indexes) { _capacity = 0; return MEMALLOC; }
+  structural_indexes[0] = 0;
+  n_structural_indexes = 0;
+
+  _capacity = capacity;
+  return SUCCESS;
+}
+
+inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept {
+  // Stage 2 stacks
+  open_containers.reset(new (std::nothrow) open_container[max_depth]);
+  is_array.reset(new (std::nothrow) bool[max_depth]);
+  if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; }
+
+  _max_depth = max_depth;
+  return SUCCESS;
+}
+
+} // namespace westmere
+} // namespace simdjson
+/* end file include/simdjson/generic/dom_parser_implementation.h */
+/* begin file include/simdjson/westmere/bitmanipulation.h */
+#ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H
+#define SIMDJSON_WESTMERE_BITMANIPULATION_H
+
+namespace simdjson {
+namespace westmere {
+namespace {
+
+// We sometimes call trailing_zero on inputs that are zero,
+// but the algorithms do not end up using the returned value.
+// Sadly, sanitizers are not smart enough to figure it out.
+SIMDJSON_NO_SANITIZE_UNDEFINED
+simdjson_inline int trailing_zeroes(uint64_t input_num) {
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+  unsigned long ret;
+  // Search the mask data from least significant bit (LSB)
+  // to the most significant bit (MSB) for a set bit (1).
+  _BitScanForward64(&ret, input_num);
+  return (int)ret;
+#else // SIMDJSON_REGULAR_VISUAL_STUDIO
+  return __builtin_ctzll(input_num);
+#endif // SIMDJSON_REGULAR_VISUAL_STUDIO
+}
+
+/* result might be undefined when input_num is zero */
+simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) {
+  return input_num & (input_num-1);
+}
+
+/* result might be undefined when input_num is zero */
+simdjson_inline int leading_zeroes(uint64_t input_num) {
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+  unsigned long leading_zero = 0;
+  // Search the mask data from most significant bit (MSB)
+  // to least significant bit (LSB) for a set bit (1).
+  if (_BitScanReverse64(&leading_zero, input_num))
+    return (int)(63 - leading_zero);
+  else
+    return 64;
+#else
+  return __builtin_clzll(input_num);
+#endif// SIMDJSON_REGULAR_VISUAL_STUDIO
+}
+
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+simdjson_inline unsigned __int64 count_ones(uint64_t input_num) {
+  // note: we do not support legacy 32-bit Windows
+  return __popcnt64(input_num);// Visual Studio wants two underscores
+}
+#else
+simdjson_inline long long int count_ones(uint64_t input_num) {
+  return _popcnt64(input_num);
+}
+#endif
+
+simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2,
+                                uint64_t *result) {
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+  return _addcarry_u64(0, value1, value2,
+                       reinterpret_cast<unsigned __int64 *>(result));
+#else
+  return __builtin_uaddll_overflow(value1, value2,
+                                   reinterpret_cast<unsigned long long *>(result));
+#endif
+}
+
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdjson
+
+#endif // SIMDJSON_WESTMERE_BITMANIPULATION_H
+/* end file include/simdjson/westmere/bitmanipulation.h */
+/* begin file include/simdjson/westmere/bitmask.h */
+#ifndef SIMDJSON_WESTMERE_BITMASK_H
+#define SIMDJSON_WESTMERE_BITMASK_H
+
+namespace simdjson {
+namespace westmere {
+namespace {
+
+//
+// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered.
+//
+// For example, prefix_xor(00100100) == 00011100
+//
+simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) {
+  // There should be no such thing with a processing supporting avx2
+  // but not clmul.
+  __m128i all_ones = _mm_set1_epi8('\xFF');
+  __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0);
+  return _mm_cvtsi128_si64(result);
+}
+
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdjson
+
+#endif // SIMDJSON_WESTMERE_BITMASK_H
+/* end file include/simdjson/westmere/bitmask.h */
+/* begin file include/simdjson/westmere/simd.h */
+#ifndef SIMDJSON_WESTMERE_SIMD_H
+#define SIMDJSON_WESTMERE_SIMD_H
+
+
+namespace simdjson {
+namespace westmere {
+namespace {
+namespace simd {
+
+  template<typename Child>
+  struct base {
+    __m128i value;
+
+    // Zero constructor
+    simdjson_inline base() : value{__m128i()} {}
+
+    // Conversion from SIMD register
+    simdjson_inline base(const __m128i _value) : value(_value) {}
+
+    // Conversion to SIMD register
+    simdjson_inline operator const __m128i&() const { return this->value; }
+    simdjson_inline operator __m128i&() { return this->value; }
+
+    // Bit operations
+    simdjson_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); }
+    simdjson_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); }
+    simdjson_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); }
+    simdjson_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); }
+    simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast | other; return *this_cast; }
+    simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast & other; return *this_cast; }
+    simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast ^ other; return *this_cast; }
+  };
+
+  // Forward-declared so they can be used by splat and friends.
+  template<typename T>
+  struct simd8;
+
+  template<typename T, typename Mask=simd8<bool>>
+  struct base8: base<simd8<T>> {
+    typedef uint16_t bitmask_t;
+    typedef uint32_t bitmask2_t;
+
+    simdjson_inline base8() : base<simd8<T>>() {}
+    simdjson_inline base8(const __m128i _value) : base<simd8<T>>(_value) {}
+
+    friend simdjson_inline Mask operator==(const simd8<T> lhs, const simd8<T> rhs) { return _mm_cmpeq_epi8(lhs, rhs); }
+
+    static const int SIZE = sizeof(base<simd8<T>>::value);
+
+    template<int N=1>
+    simdjson_inline simd8<T> prev(const simd8<T> prev_chunk) const {
+      return _mm_alignr_epi8(*this, prev_chunk, 16 - N);
+    }
+  };
+
+  // SIMD byte mask type (returned by things like eq and gt)
+  template<>
+  struct simd8<bool>: base8<bool> {
+    static simdjson_inline simd8<bool> splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); }
+
+    simdjson_inline simd8<bool>() : base8() {}
+    simdjson_inline simd8<bool>(const __m128i _value) : base8<bool>(_value) {}
+    // Splat constructor
+    simdjson_inline simd8<bool>(bool _value) : base8<bool>(splat(_value)) {}
+
+    simdjson_inline int to_bitmask() const { return _mm_movemask_epi8(*this); }
+    simdjson_inline bool any() const { return !_mm_testz_si128(*this, *this); }
+    simdjson_inline simd8<bool> operator~() const { return *this ^ true; }
+  };
+
+  template<typename T>
+  struct base8_numeric: base8<T> {
+    static simdjson_inline simd8<T> splat(T _value) { return _mm_set1_epi8(_value); }
+    static simdjson_inline simd8<T> zero() { return _mm_setzero_si128(); }
+    static simdjson_inline simd8<T> load(const T values[16]) {
+      return _mm_loadu_si128(reinterpret_cast<const __m128i *>(values));
+    }
+    // Repeat 16 values as many times as necessary (usually for lookup tables)
+    static simdjson_inline simd8<T> repeat_16(
+      T v0,  T v1,  T v2,  T v3,  T v4,  T v5,  T v6,  T v7,
+      T v8,  T v9,  T v10, T v11, T v12, T v13, T v14, T v15
+    ) {
+      return simd8<T>(
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15
+      );
+    }
+
+    simdjson_inline base8_numeric() : base8<T>() {}
+    simdjson_inline base8_numeric(const __m128i _value) : base8<T>(_value) {}
+
+    // Store to array
+    simdjson_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); }
+
+    // Override to distinguish from bool version
+    simdjson_inline simd8<T> operator~() const { return *this ^ 0xFFu; }
+
+    // Addition/subtraction are the same for signed and unsigned
+    simdjson_inline simd8<T> operator+(const simd8<T> other) const { return _mm_add_epi8(*this, other); }
+    simdjson_inline simd8<T> operator-(const simd8<T> other) const { return _mm_sub_epi8(*this, other); }
+    simdjson_inline simd8<T>& operator+=(const simd8<T> other) { *this = *this + other; return *static_cast<simd8<T>*>(this); }
+    simdjson_inline simd8<T>& operator-=(const simd8<T> other) { *this = *this - other; return *static_cast<simd8<T>*>(this); }
+
+    // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values)
+    template<typename L>
+    simdjson_inline simd8<L> lookup_16(simd8<L> lookup_table) const {
+      return _mm_shuffle_epi8(lookup_table, *this);
+    }
+
+    // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset).
+    // Passing a 0 value for mask would be equivalent to writing out every byte to output.
+    // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes
+    // get written.
+    // Design consideration: it seems like a function with the
+    // signature simd8<L> compress(uint32_t mask) would be
+    // sensible, but the AVX ISA makes this kind of approach difficult.
+    template<typename L>
+    simdjson_inline void compress(uint16_t mask, L * output) const {
+      using internal::thintable_epi8;
+      using internal::BitsSetTable256mul2;
+      using internal::pshufb_combine_table;
+      // this particular implementation was inspired by work done by @animetosho
+      // we do it in two steps, first 8 bytes and then second 8 bytes
+      uint8_t mask1 = uint8_t(mask); // least significant 8 bits
+      uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits
+      // next line just loads the 64-bit values thintable_epi8[mask1] and
+      // thintable_epi8[mask2] into a 128-bit register, using only
+      // two instructions on most compilers.
+      __m128i shufmask =  _mm_set_epi64x(thintable_epi8[mask2], thintable_epi8[mask1]);
+      // we increment by 0x08 the second half of the mask
+      shufmask =
+      _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0));
+      // this is the version "nearly pruned"
+      __m128i pruned = _mm_shuffle_epi8(*this, shufmask);
+      // we still need to put the two halves together.
+      // we compute the popcount of the first half:
+      int pop1 = BitsSetTable256mul2[mask1];
+      // then load the corresponding mask, what it does is to write
+      // only the first pop1 bytes from the first 8 bytes, and then
+      // it fills in with the bytes from the second 8 bytes + some filling
+      // at the end.
+      __m128i compactmask =
+      _mm_loadu_si128(reinterpret_cast<const __m128i *>(pshufb_combine_table + pop1 * 8));
+      __m128i answer = _mm_shuffle_epi8(pruned, compactmask);
+      _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer);
+    }
+
+    template<typename L>
+    simdjson_inline simd8<L> lookup_16(
+        L replace0,  L replace1,  L replace2,  L replace3,
+        L replace4,  L replace5,  L replace6,  L replace7,
+        L replace8,  L replace9,  L replace10, L replace11,
+        L replace12, L replace13, L replace14, L replace15) const {
+      return lookup_16(simd8<L>::repeat_16(
+        replace0,  replace1,  replace2,  replace3,
+        replace4,  replace5,  replace6,  replace7,
+        replace8,  replace9,  replace10, replace11,
+        replace12, replace13, replace14, replace15
+      ));
+    }
+  };
+
+  // Signed bytes
+  template<>
+  struct simd8<int8_t> : base8_numeric<int8_t> {
+    simdjson_inline simd8() : base8_numeric<int8_t>() {}
+    simdjson_inline simd8(const __m128i _value) : base8_numeric<int8_t>(_value) {}
+    // Splat constructor
+    simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {}
+    // Array constructor
+    simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {}
+    // Member-by-member initialization
+    simdjson_inline simd8(
+      int8_t v0,  int8_t v1,  int8_t v2,  int8_t v3,  int8_t v4,  int8_t v5,  int8_t v6,  int8_t v7,
+      int8_t v8,  int8_t v9,  int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15
+    ) : simd8(_mm_setr_epi8(
+      v0, v1, v2, v3, v4, v5, v6, v7,
+      v8, v9, v10,v11,v12,v13,v14,v15
+    )) {}
+    // Repeat 16 values as many times as necessary (usually for lookup tables)
+    simdjson_inline static simd8<int8_t> repeat_16(
+      int8_t v0,  int8_t v1,  int8_t v2,  int8_t v3,  int8_t v4,  int8_t v5,  int8_t v6,  int8_t v7,
+      int8_t v8,  int8_t v9,  int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15
+    ) {
+      return simd8<int8_t>(
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15
+      );
+    }
+
+    // Order-sensitive comparisons
+    simdjson_inline simd8<int8_t> max_val(const simd8<int8_t> other) const { return _mm_max_epi8(*this, other); }
+    simdjson_inline simd8<int8_t> min_val(const simd8<int8_t> other) const { return _mm_min_epi8(*this, other); }
+    simdjson_inline simd8<bool> operator>(const simd8<int8_t> other) const { return _mm_cmpgt_epi8(*this, other); }
+    simdjson_inline simd8<bool> operator<(const simd8<int8_t> other) const { return _mm_cmpgt_epi8(other, *this); }
+  };
+
+  // Unsigned bytes
+  template<>
+  struct simd8<uint8_t>: base8_numeric<uint8_t> {
+    simdjson_inline simd8() : base8_numeric<uint8_t>() {}
+    simdjson_inline simd8(const __m128i _value) : base8_numeric<uint8_t>(_value) {}
+    // Splat constructor
+    simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {}
+    // Array constructor
+    simdjson_inline simd8(const uint8_t* values) : simd8(load(values)) {}
+    // Member-by-member initialization
+    simdjson_inline simd8(
+      uint8_t v0,  uint8_t v1,  uint8_t v2,  uint8_t v3,  uint8_t v4,  uint8_t v5,  uint8_t v6,  uint8_t v7,
+      uint8_t v8,  uint8_t v9,  uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15
+    ) : simd8(_mm_setr_epi8(
+      v0, v1, v2, v3, v4, v5, v6, v7,
+      v8, v9, v10,v11,v12,v13,v14,v15
+    )) {}
+    // Repeat 16 values as many times as necessary (usually for lookup tables)
+    simdjson_inline static simd8<uint8_t> repeat_16(
+      uint8_t v0,  uint8_t v1,  uint8_t v2,  uint8_t v3,  uint8_t v4,  uint8_t v5,  uint8_t v6,  uint8_t v7,
+      uint8_t v8,  uint8_t v9,  uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15
+    ) {
+      return simd8<uint8_t>(
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15
+      );
+    }
+
+    // Saturated math
+    simdjson_inline simd8<uint8_t> saturating_add(const simd8<uint8_t> other) const { return _mm_adds_epu8(*this, other); }
+    simdjson_inline simd8<uint8_t> saturating_sub(const simd8<uint8_t> other) const { return _mm_subs_epu8(*this, other); }
+
+    // Order-specific operations
+    simdjson_inline simd8<uint8_t> max_val(const simd8<uint8_t> other) const { return _mm_max_epu8(*this, other); }
+    simdjson_inline simd8<uint8_t> min_val(const simd8<uint8_t> other) const { return _mm_min_epu8(*this, other); }
+    // Same as >, but only guarantees true is nonzero (< guarantees true = -1)
+    simdjson_inline simd8<uint8_t> gt_bits(const simd8<uint8_t> other) const { return this->saturating_sub(other); }
+    // Same as <, but only guarantees true is nonzero (< guarantees true = -1)
+    simdjson_inline simd8<uint8_t> lt_bits(const simd8<uint8_t> other) const { return other.saturating_sub(*this); }
+    simdjson_inline simd8<bool> operator<=(const simd8<uint8_t> other) const { return other.max_val(*this) == other; }
+    simdjson_inline simd8<bool> operator>=(const simd8<uint8_t> other) const { return other.min_val(*this) == other; }
+    simdjson_inline simd8<bool> operator>(const simd8<uint8_t> other) const { return this->gt_bits(other).any_bits_set(); }
+    simdjson_inline simd8<bool> operator<(const simd8<uint8_t> other) const { return this->gt_bits(other).any_bits_set(); }
+
+    // Bit-specific operations
+    simdjson_inline simd8<bool> bits_not_set() const { return *this == uint8_t(0); }
+    simdjson_inline simd8<bool> bits_not_set(simd8<uint8_t> bits) const { return (*this & bits).bits_not_set(); }
+    simdjson_inline simd8<bool> any_bits_set() const { return ~this->bits_not_set(); }
+    simdjson_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const { return ~this->bits_not_set(bits); }
+    simdjson_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; }
+    simdjson_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); }
+    simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); }
+    simdjson_inline bool bits_not_set_anywhere(simd8<uint8_t> bits) const { return _mm_testz_si128(*this, bits); }
+    simdjson_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const { return !bits_not_set_anywhere(bits); }
+    template<int N>
+    simdjson_inline simd8<uint8_t> shr() const { return simd8<uint8_t>(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); }
+    template<int N>
+    simdjson_inline simd8<uint8_t> shl() const { return simd8<uint8_t>(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); }
+    // Get one of the bits and make a bitmask out of it.
+    // e.g. value.get_bit<7>() gets the high bit
+    template<int N>
+    simdjson_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); }
+  };
+
+  template<typename T>
+  struct simd8x64 {
+    static constexpr int NUM_CHUNKS = 64 / sizeof(simd8<T>);
+    static_assert(NUM_CHUNKS == 4, "Westmere kernel should use four registers per 64-byte block.");
+    const simd8<T> chunks[NUM_CHUNKS];
+
+    simd8x64(const simd8x64<T>& o) = delete; // no copy allowed
+    simd8x64<T>& operator=(const simd8<T>& other) = delete; // no assignment allowed
+    simd8x64() = delete; // no default constructor allowed
+
+    simdjson_inline simd8x64(const simd8<T> chunk0, const simd8<T> chunk1, const simd8<T> chunk2, const simd8<T> chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {}
+    simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8<T>::load(ptr), simd8<T>::load(ptr+16), simd8<T>::load(ptr+32), simd8<T>::load(ptr+48)} {}
+
+    simdjson_inline void store(T ptr[64]) const {
+      this->chunks[0].store(ptr+sizeof(simd8<T>)*0);
+      this->chunks[1].store(ptr+sizeof(simd8<T>)*1);
+      this->chunks[2].store(ptr+sizeof(simd8<T>)*2);
+      this->chunks[3].store(ptr+sizeof(simd8<T>)*3);
+    }
+
+    simdjson_inline simd8<T> reduce_or() const {
+      return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]);
+    }
+
+    simdjson_inline uint64_t compress(uint64_t mask, T * output) const {
+      this->chunks[0].compress(uint16_t(mask), output);
+      this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF));
+      this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF));
+      this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF));
+      return 64 - count_ones(mask);
+    }
+
+    simdjson_inline uint64_t to_bitmask() const {
+      uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() );
+      uint64_t r1 =          this->chunks[1].to_bitmask() ;
+      uint64_t r2 =          this->chunks[2].to_bitmask() ;
+      uint64_t r3 =          this->chunks[3].to_bitmask() ;
+      return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48);
+    }
+
+    simdjson_inline uint64_t eq(const T m) const {
+      const simd8<T> mask = simd8<T>::splat(m);
+      return  simd8x64<bool>(
+        this->chunks[0] == mask,
+        this->chunks[1] == mask,
+        this->chunks[2] == mask,
+        this->chunks[3] == mask
+      ).to_bitmask();
+    }
+
+    simdjson_inline uint64_t eq(const simd8x64<uint8_t> &other) const {
+      return  simd8x64<bool>(
+        this->chunks[0] == other.chunks[0],
+        this->chunks[1] == other.chunks[1],
+        this->chunks[2] == other.chunks[2],
+        this->chunks[3] == other.chunks[3]
+      ).to_bitmask();
+    }
+
+    simdjson_inline uint64_t lteq(const T m) const {
+      const simd8<T> mask = simd8<T>::splat(m);
+      return  simd8x64<bool>(
+        this->chunks[0] <= mask,
+        this->chunks[1] <= mask,
+        this->chunks[2] <= mask,
+        this->chunks[3] <= mask
+      ).to_bitmask();
+    }
+  }; // struct simd8x64<T>
+
+} // namespace simd
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdjson
+
+#endif // SIMDJSON_WESTMERE_SIMD_INPUT_H
+/* end file include/simdjson/westmere/simd.h */
+/* begin file include/simdjson/generic/jsoncharutils.h */
+
+namespace simdjson {
+namespace westmere {
+namespace {
+namespace jsoncharutils {
+
+// return non-zero if not a structural or whitespace char
+// zero otherwise
+simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) {
+  return internal::structural_or_whitespace_negated[c];
+}
+
+simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) {
+  return internal::structural_or_whitespace[c];
+}
+
+// returns a value with the high 16 bits set if not valid
+// otherwise returns the conversion of the 4 hex digits at src into the bottom
+// 16 bits of the 32-bit return register
+//
+// see
+// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/
+static inline uint32_t hex_to_u32_nocheck(
+    const uint8_t *src) { // strictly speaking, static inline is a C-ism
+  uint32_t v1 = internal::digit_to_val32[630 + src[0]];
+  uint32_t v2 = internal::digit_to_val32[420 + src[1]];
+  uint32_t v3 = internal::digit_to_val32[210 + src[2]];
+  uint32_t v4 = internal::digit_to_val32[0 + src[3]];
+  return v1 | v2 | v3 | v4;
+}
+
+// given a code point cp, writes to c
+// the utf-8 code, outputting the length in
+// bytes, if the length is zero, the code point
+// is invalid
+//
+// This can possibly be made faster using pdep
+// and clz and table lookups, but JSON documents
+// have few escaped code points, and the following
+// function looks cheap.
+//
+// Note: we assume that surrogates are treated separately
+//
+simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) {
+  if (cp <= 0x7F) {
+    c[0] = uint8_t(cp);
+    return 1; // ascii
+  }
+  if (cp <= 0x7FF) {
+    c[0] = uint8_t((cp >> 6) + 192);
+    c[1] = uint8_t((cp & 63) + 128);
+    return 2; // universal plane
+    //  Surrogates are treated elsewhere...
+    //} //else if (0xd800 <= cp && cp <= 0xdfff) {
+    //  return 0; // surrogates // could put assert here
+  } else if (cp <= 0xFFFF) {
+    c[0] = uint8_t((cp >> 12) + 224);
+    c[1] = uint8_t(((cp >> 6) & 63) + 128);
+    c[2] = uint8_t((cp & 63) + 128);
+    return 3;
+  } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this
+                               // is not needed
+    c[0] = uint8_t((cp >> 18) + 240);
+    c[1] = uint8_t(((cp >> 12) & 63) + 128);
+    c[2] = uint8_t(((cp >> 6) & 63) + 128);
+    c[3] = uint8_t((cp & 63) + 128);
+    return 4;
+  }
+  // will return 0 when the code point was too large.
+  return 0; // bad r
+}
+
+#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm
+// this is a slow emulation routine for 32-bit
+//
+static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) {
+  return x * (uint64_t)y;
+}
+static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) {
+  uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd);
+  uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd);
+  uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32));
+  uint64_t adbc_carry = !!(adbc < ad);
+  uint64_t lo = bd + (adbc << 32);
+  *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) +
+        (adbc_carry << 32) + !!(lo < bd);
+  return lo;
+}
+#endif
+
+using internal::value128;
+
+simdjson_inline value128 full_multiplication(uint64_t value1, uint64_t value2) {
+  value128 answer;
+#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS)
+#ifdef _M_ARM64
+  // ARM64 has native support for 64-bit multiplications, no need to emultate
+  answer.high = __umulh(value1, value2);
+  answer.low = value1 * value2;
+#else
+  answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64
+#endif // _M_ARM64
+#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS)
+  __uint128_t r = (static_cast<__uint128_t>(value1)) * value2;
+  answer.low = uint64_t(r);
+  answer.high = uint64_t(r >> 64);
+#endif
+  return answer;
+}
+
+} // namespace jsoncharutils
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdjson
+/* end file include/simdjson/generic/jsoncharutils.h */
+/* begin file include/simdjson/generic/atomparsing.h */
+namespace simdjson {
+namespace westmere {
+namespace {
+/// @private
+namespace atomparsing {
+
+// The string_to_uint32 is exclusively used to map literal strings to 32-bit values.
+// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot
+// be certain that the character pointer will be properly aligned.
+// You might think that using memcpy makes this function expensive, but you'd be wrong.
+// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false");
+// to the compile-time constant 1936482662.
+simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; }
+
+
+// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive.
+// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about.
+simdjson_warn_unused
+simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) {
+  uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++)
+  static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes");
+  std::memcpy(&srcval, src, sizeof(uint32_t));
+  return srcval ^ string_to_uint32(atom);
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_true_atom(const uint8_t *src) {
+  return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0;
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) {
+  if (len > 4) { return is_valid_true_atom(src); }
+  else if (len == 4) { return !str4ncmp(src, "true"); }
+  else { return false; }
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_false_atom(const uint8_t *src) {
+  return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0;
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) {
+  if (len > 5) { return is_valid_false_atom(src); }
+  else if (len == 5) { return !str4ncmp(src+1, "alse"); }
+  else { return false; }
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_null_atom(const uint8_t *src) {
+  return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0;
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) {
+  if (len > 4) { return is_valid_null_atom(src); }
+  else if (len == 4) { return !str4ncmp(src, "null"); }
+  else { return false; }
+}
+
+} // namespace atomparsing
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdjson
+/* end file include/simdjson/generic/atomparsing.h */
+/* begin file include/simdjson/westmere/stringparsing.h */
+#ifndef SIMDJSON_WESTMERE_STRINGPARSING_H
+#define SIMDJSON_WESTMERE_STRINGPARSING_H
+
+namespace simdjson {
+namespace westmere {
+namespace {
+
+using namespace simd;
+
+// Holds backslashes and quotes locations.
+struct backslash_and_quote {
+public:
+  static constexpr uint32_t BYTES_PROCESSED = 32;
+  simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst);
+
+  simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; }
+  simdjson_inline bool has_backslash() { return bs_bits != 0; }
+  simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); }
+  simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); }
+
+  uint32_t bs_bits;
+  uint32_t quote_bits;
+}; // struct backslash_and_quote
+
+simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) {
+  // this can read up to 31 bytes beyond the buffer size, but we require
+  // SIMDJSON_PADDING of padding
+  static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes");
+  simd8<uint8_t> v0(src);
+  simd8<uint8_t> v1(src + 16);
+  v0.store(dst);
+  v1.store(dst + 16);
+  uint64_t bs_and_quote = simd8x64<bool>(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask();
+  return {
+    uint32_t(bs_and_quote),      // bs_bits
+    uint32_t(bs_and_quote >> 32) // quote_bits
+  };
+}
+
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdjson
+
+#endif // SIMDJSON_WESTMERE_STRINGPARSING_H
+/* end file include/simdjson/westmere/stringparsing.h */
+/* begin file include/simdjson/westmere/numberparsing.h */
+#ifndef SIMDJSON_WESTMERE_NUMBERPARSING_H
+#define SIMDJSON_WESTMERE_NUMBERPARSING_H
+
+namespace simdjson {
+namespace westmere {
+namespace {
+
+static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) {
+  // this actually computes *16* values so we are being wasteful.
+  const __m128i ascii0 = _mm_set1_epi8('0');
+  const __m128i mul_1_10 =
+      _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1);
+  const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1);
+  const __m128i mul_1_10000 =
+      _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1);
+  const __m128i input = _mm_sub_epi8(
+      _mm_loadu_si128(reinterpret_cast<const __m128i *>(chars)), ascii0);
+  const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10);
+  const __m128i t2 = _mm_madd_epi16(t1, mul_1_100);
+  const __m128i t3 = _mm_packus_epi32(t2, t2);
+  const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000);
+  return _mm_cvtsi128_si32(
+      t4); // only captures the sum of the first 8 digits, drop the rest
+}
+
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdjson
+
+#define SIMDJSON_SWAR_NUMBER_PARSING 1
+
+/* begin file include/simdjson/generic/numberparsing.h */
+#include <limits>
+
+namespace simdjson {
+namespace westmere {
+
+namespace ondemand {
+/**
+ * The type of a JSON number
+ */
+enum class number_type {
+    floating_point_number=1, /// a binary64 number
+    signed_integer,          /// a signed integer that fits in a 64-bit word using two's complement
+    unsigned_integer         /// a positive integer larger or equal to 1<<63
+};
+}
+
+namespace {
+/// @private
+namespace numberparsing {
+
+
+
+#ifdef JSON_TEST_NUMBERS
+#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR)
+#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE)))
+#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE)))
+#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE)))
+#else
+#define INVALID_NUMBER(SRC) (NUMBER_ERROR)
+#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE))
+#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE))
+#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE))
+#endif
+
+namespace {
+// Convert a mantissa, an exponent and a sign bit into an ieee64 double.
+// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable).
+// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed.
+simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) {
+    double d;
+    mantissa &= ~(1ULL << 52);
+    mantissa |= real_exponent << 52;
+    mantissa |= ((static_cast<uint64_t>(negative)) << 63);
+    std::memcpy(&d, &mantissa, sizeof(d));
+    return d;
+}
+}
+// Attempts to compute i * 10^(power) exactly; and if "negative" is
+// true, negate the result.
+// This function will only work in some cases, when it does not work, success is
+// set to false. This should work *most of the time* (like 99% of the time).
+// We assume that power is in the [smallest_power,
+// largest_power] interval: the caller is responsible for this check.
+simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) {
+  // we start with a fast path
+  // It was described in
+  // Clinger WD. How to read floating point numbers accurately.
+  // ACM SIGPLAN Notices. 1990
+#ifndef FLT_EVAL_METHOD
+#error "FLT_EVAL_METHOD should be defined, please include cfloat."
+#endif
+#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
+  // We cannot be certain that x/y is rounded to nearest.
+  if (0 <= power && power <= 22 && i <= 9007199254740991) {
+#else
+  if (-22 <= power && power <= 22 && i <= 9007199254740991) {
+#endif
+    // convert the integer into a double. This is lossless since
+    // 0 <= i <= 2^53 - 1.
+    d = double(i);
+    //
+    // The general idea is as follows.
+    // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then
+    // 1) Both s and p can be represented exactly as 64-bit floating-point
+    // values
+    // (binary64).
+    // 2) Because s and p can be represented exactly as floating-point values,
+    // then s * p
+    // and s / p will produce correctly rounded values.
+    //
+    if (power < 0) {
+      d = d / simdjson::internal::power_of_ten[-power];
+    } else {
+      d = d * simdjson::internal::power_of_ten[power];
+    }
+    if (negative) {
+      d = -d;
+    }
+    return true;
+  }
+  // When 22 < power && power <  22 + 16, we could
+  // hope for another, secondary fast path.  It was
+  // described by David M. Gay in  "Correctly rounded
+  // binary-decimal and decimal-binary conversions." (1990)
+  // If you need to compute i * 10^(22 + x) for x < 16,
+  // first compute i * 10^x, if you know that result is exact
+  // (e.g., when i * 10^x < 2^53),
+  // then you can still proceed and do (i * 10^x) * 10^22.
+  // Is this worth your time?
+  // You need  22 < power *and* power <  22 + 16 *and* (i * 10^(x-22) < 2^53)
+  // for this second fast path to work.
+  // If you you have 22 < power *and* power <  22 + 16, and then you
+  // optimistically compute "i * 10^(x-22)", there is still a chance that you
+  // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of
+  // this optimization maybe less common than we would like. Source:
+  // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/
+  // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html
+
+  // The fast path has now failed, so we are failing back on the slower path.
+
+  // In the slow path, we need to adjust i so that it is > 1<<63 which is always
+  // possible, except if i == 0, so we handle i == 0 separately.
+  if(i == 0) {
+    d = negative ? -0.0 : 0.0;
+    return true;
+  }
+
+
+  // The exponent is 1024 + 63 + power
+  //     + floor(log(5**power)/log(2)).
+  // The 1024 comes from the ieee64 standard.
+  // The 63 comes from the fact that we use a 64-bit word.
+  //
+  // Computing floor(log(5**power)/log(2)) could be
+  // slow. Instead we use a fast function.
+  //
+  // For power in (-400,350), we have that
+  // (((152170 + 65536) * power ) >> 16);
+  // is equal to
+  //  floor(log(5**power)/log(2)) + power when power >= 0
+  // and it is equal to
+  //  ceil(log(5**-power)/log(2)) + power when power < 0
+  //
+  // The 65536 is (1<<16) and corresponds to
+  // (65536 * power) >> 16 ---> power
+  //
+  // ((152170 * power ) >> 16) is equal to
+  // floor(log(5**power)/log(2))
+  //
+  // Note that this is not magic: 152170/(1<<16) is
+  // approximatively equal to log(5)/log(2).
+  // The 1<<16 value is a power of two; we could use a
+  // larger power of 2 if we wanted to.
+  //
+  int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63;
+
+
+  // We want the most significant bit of i to be 1. Shift if needed.
+  int lz = leading_zeroes(i);
+  i <<= lz;
+
+
+  // We are going to need to do some 64-bit arithmetic to get a precise product.
+  // We use a table lookup approach.
+  // It is safe because
+  // power >= smallest_power
+  // and power <= largest_power
+  // We recover the mantissa of the power, it has a leading 1. It is always
+  // rounded down.
+  //
+  // We want the most significant 64 bits of the product. We know
+  // this will be non-zero because the most significant bit of i is
+  // 1.
+  const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power);
+  // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.)
+  //
+  // The full_multiplication function computes the 128-bit product of two 64-bit words
+  // with a returned value of type value128 with a "low component" corresponding to the
+  // 64-bit least significant bits of the product and with a "high component" corresponding
+  // to the 64-bit most significant bits of the product.
+  simdjson::internal::value128 firstproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index]);
+  // Both i and power_of_five_128[index] have their most significant bit set to 1 which
+  // implies that the either the most or the second most significant bit of the product
+  // is 1. We pack values in this manner for efficiency reasons: it maximizes the use
+  // we make of the product. It also makes it easy to reason about the product: there
+  // is 0 or 1 leading zero in the product.
+
+  // Unless the least significant 9 bits of the high (64-bit) part of the full
+  // product are all 1s, then we know that the most significant 55 bits are
+  // exact and no further work is needed. Having 55 bits is necessary because
+  // we need 53 bits for the mantissa but we have to have one rounding bit and
+  // we can waste a bit if the most significant bit of the product is zero.
+  if((firstproduct.high & 0x1FF) == 0x1FF) {
+    // We want to compute i * 5^q, but only care about the top 55 bits at most.
+    // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing
+    // the full computation is wasteful. So we do what is called a "truncated
+    // multiplication".
+    // We take the most significant 64-bits, and we put them in
+    // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q
+    // to the desired approximation using one multiplication. Sometimes it does not suffice.
+    // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and
+    // then we get a better approximation to i * 5^q. In very rare cases, even that
+    // will not suffice, though it is seemingly very hard to find such a scenario.
+    //
+    // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat
+    // more complicated.
+    //
+    // There is an extra layer of complexity in that we need more than 55 bits of
+    // accuracy in the round-to-even scenario.
+    //
+    // The full_multiplication function computes the 128-bit product of two 64-bit words
+    // with a returned value of type value128 with a "low component" corresponding to the
+    // 64-bit least significant bits of the product and with a "high component" corresponding
+    // to the 64-bit most significant bits of the product.
+    simdjson::internal::value128 secondproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]);
+    firstproduct.low += secondproduct.high;
+    if(secondproduct.high > firstproduct.low) { firstproduct.high++; }
+    // At this point, we might need to add at most one to firstproduct, but this
+    // can only change the value of firstproduct.high if firstproduct.low is maximal.
+    if(simdjson_unlikely(firstproduct.low  == 0xFFFFFFFFFFFFFFFF)) {
+      // This is very unlikely, but if so, we need to do much more work!
+      return false;
+    }
+  }
+  uint64_t lower = firstproduct.low;
+  uint64_t upper = firstproduct.high;
+  // The final mantissa should be 53 bits with a leading 1.
+  // We shift it so that it occupies 54 bits with a leading 1.
+  ///////
+  uint64_t upperbit = upper >> 63;
+  uint64_t mantissa = upper >> (upperbit + 9);
+  lz += int(1 ^ upperbit);
+
+  // Here we have mantissa < (1<<54).
+  int64_t real_exponent = exponent - lz;
+  if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal?
+    // Here have that real_exponent <= 0 so -real_exponent >= 0
+    if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure.
+      d = negative ? -0.0 : 0.0;
+      return true;
+    }
+    // next line is safe because -real_exponent + 1 < 0
+    mantissa >>= -real_exponent + 1;
+    // Thankfully, we can't have both "round-to-even" and subnormals because
+    // "round-to-even" only occurs for powers close to 0.
+    mantissa += (mantissa & 1); // round up
+    mantissa >>= 1;
+    // There is a weird scenario where we don't have a subnormal but just.
+    // Suppose we start with 2.2250738585072013e-308, we end up
+    // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal
+    // whereas 0x40000000000000 x 2^-1023-53  is normal. Now, we need to round
+    // up 0x3fffffffffffff x 2^-1023-53  and once we do, we are no longer
+    // subnormal, but we can only know this after rounding.
+    // So we only declare a subnormal if we are smaller than the threshold.
+    real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1;
+    d = to_double(mantissa, real_exponent, negative);
+    return true;
+  }
+  // We have to round to even. The "to even" part
+  // is only a problem when we are right in between two floats
+  // which we guard against.
+  // If we have lots of trailing zeros, we may fall right between two
+  // floating-point values.
+  //
+  // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54]
+  // times a power of two. That is, it is right between a number with binary significand
+  // m and another number with binary significand m+1; and it must be the case
+  // that it cannot be represented by a float itself.
+  //
+  // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p.
+  // Recall that 10^q = 5^q * 2^q.
+  // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that
+  //  5^23 <=  2^54 and it is the last power of five to qualify, so q <= 23.
+  // When q<0, we have  w  >=  (2m+1) x 5^{-q}.  We must have that w<2^{64} so
+  // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have
+  // 2^{53} x 5^{-q} < 2^{64}.
+  // Hence we have 5^{-q} < 2^{11}$ or q>= -4.
+  //
+  // We require lower <= 1 and not lower == 0 because we could not prove that
+  // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test.
+  if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) {
+    if((mantissa  << (upperbit + 64 - 53 - 2)) ==  upper) {
+      mantissa &= ~1;             // flip it so that we do not round up
+    }
+  }
+
+  mantissa += mantissa & 1;
+  mantissa >>= 1;
+
+  // Here we have mantissa < (1<<53), unless there was an overflow
+  if (mantissa >= (1ULL << 53)) {
+    //////////
+    // This will happen when parsing values such as 7.2057594037927933e+16
+    ////////
+    mantissa = (1ULL << 52);
+    real_exponent++;
+  }
+  mantissa &= ~(1ULL << 52);
+  // we have to check that real_exponent is in range, otherwise we bail out
+  if (simdjson_unlikely(real_exponent > 2046)) {
+    // We have an infinite value!!! We could actually throw an error here if we could.
+    return false;
+  }
+  d = to_double(mantissa, real_exponent, negative);
+  return true;
+}
+
+// We call a fallback floating-point parser that might be slow. Note
+// it will accept JSON numbers, but the JSON spec. is more restrictive so
+// before you call parse_float_fallback, you need to have validated the input
+// string with the JSON grammar.
+// It will return an error (false) if the parsed number is infinite.
+// The string parsing itself always succeeds. We know that there is at least
+// one digit.
+static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) {
+  *outDouble = simdjson::internal::from_chars(reinterpret_cast<const char *>(ptr));
+  // We do not accept infinite values.
+
+  // Detecting finite values in a portable manner is ridiculously hard, ideally
+  // we would want to do:
+  // return !std::isfinite(*outDouble);
+  // but that mysteriously fails under legacy/old libc++ libraries, see
+  // https://github.com/simdjson/simdjson/issues/1286
+  //
+  // Therefore, fall back to this solution (the extra parens are there
+  // to handle that max may be a macro on windows).
+  return !(*outDouble > (std::numeric_limits<double>::max)() || *outDouble < std::numeric_limits<double>::lowest());
+}
+static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) {
+  *outDouble = simdjson::internal::from_chars(reinterpret_cast<const char *>(ptr), reinterpret_cast<const char *>(end_ptr));
+  // We do not accept infinite values.
+
+  // Detecting finite values in a portable manner is ridiculously hard, ideally
+  // we would want to do:
+  // return !std::isfinite(*outDouble);
+  // but that mysteriously fails under legacy/old libc++ libraries, see
+  // https://github.com/simdjson/simdjson/issues/1286
+  //
+  // Therefore, fall back to this solution (the extra parens are there
+  // to handle that max may be a macro on windows).
+  return !(*outDouble > (std::numeric_limits<double>::max)() || *outDouble < std::numeric_limits<double>::lowest());
+}
+
+// check quickly whether the next 8 chars are made of digits
+// at a glance, it looks better than Mula's
+// http://0x80.pl/articles/swar-digits-validate.html
+simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) {
+  uint64_t val;
+  // this can read up to 7 bytes beyond the buffer size, but we require
+  // SIMDJSON_PADDING of padding
+  static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7");
+  std::memcpy(&val, chars, 8);
+  // a branchy method might be faster:
+  // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030)
+  //  && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) ==
+  //  0x3030303030303030);
+  return (((val & 0xF0F0F0F0F0F0F0F0) |
+           (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) ==
+          0x3333333333333333);
+}
+
+template<typename W>
+error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) {
+  double d;
+  if (parse_float_fallback(src, &d)) {
+    writer.append_double(d);
+    return SUCCESS;
+  }
+  return INVALID_NUMBER(src);
+}
+
+template<typename I>
+SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later
+simdjson_inline bool parse_digit(const uint8_t c, I &i) {
+  const uint8_t digit = static_cast<uint8_t>(c - '0');
+  if (digit > 9) {
+    return false;
+  }
+  // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication
+  i = 10 * i + digit; // might overflow, we will handle the overflow later
+  return true;
+}
+
+simdjson_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) {
+  // we continue with the fiction that we have an integer. If the
+  // floating point number is representable as x * 10^z for some integer
+  // z that fits in 53 bits, then we will be able to convert back the
+  // the integer into a float in a lossless manner.
+  const uint8_t *const first_after_period = p;
+
+#ifdef SIMDJSON_SWAR_NUMBER_PARSING
+#if SIMDJSON_SWAR_NUMBER_PARSING
+  // this helps if we have lots of decimals!
+  // this turns out to be frequent enough.
+  if (is_made_of_eight_digits_fast(p)) {
+    i = i * 100000000 + parse_eight_digits_unrolled(p);
+    p += 8;
+  }
+#endif // SIMDJSON_SWAR_NUMBER_PARSING
+#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING
+  // Unrolling the first digit makes a small difference on some implementations (e.g. westmere)
+  if (parse_digit(*p, i)) { ++p; }
+  while (parse_digit(*p, i)) { p++; }
+  exponent = first_after_period - p;
+  // Decimal without digits (123.) is illegal
+  if (exponent == 0) {
+    return INVALID_NUMBER(src);
+  }
+  return SUCCESS;
+}
+
+simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) {
+  // Exp Sign: -123.456e[-]78
+  bool neg_exp = ('-' == *p);
+  if (neg_exp || '+' == *p) { p++; } // Skip + as well
+
+  // Exponent: -123.456e-[78]
+  auto start_exp = p;
+  int64_t exp_number = 0;
+  while (parse_digit(*p, exp_number)) { ++p; }
+  // It is possible for parse_digit to overflow.
+  // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN.
+  // Thus we *must* check for possible overflow before we negate exp_number.
+
+  // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into
+  // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may
+  // not oblige and may, in fact, generate two distinct paths in any case. It might be
+  // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off
+  // instructions for a simdjson_likely branch, an unconclusive gain.
+
+  // If there were no digits, it's an error.
+  if (simdjson_unlikely(p == start_exp)) {
+    return INVALID_NUMBER(src);
+  }
+  // We have a valid positive exponent in exp_number at this point, except that
+  // it may have overflowed.
+
+  // If there were more than 18 digits, we may have overflowed the integer. We have to do
+  // something!!!!
+  if (simdjson_unlikely(p > start_exp+18)) {
+    // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow
+    while (*start_exp == '0') { start_exp++; }
+    // 19 digits could overflow int64_t and is kind of absurd anyway. We don't
+    // support exponents smaller than -999,999,999,999,999,999 and bigger
+    // than 999,999,999,999,999,999.
+    // We can truncate.
+    // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before
+    // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could
+    // truncate at 324.
+    // Note that there is no reason to fail per se at this point in time.
+    // E.g., 0e999999999999999999999 is a fine number.
+    if (p > start_exp+18) { exp_number = 999999999999999999; }
+  }
+  // At this point, we know that exp_number is a sane, positive, signed integer.
+  // It is <= 999,999,999,999,999,999. As long as 'exponent' is in
+  // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent'
+  // is bounded in magnitude by the size of the JSON input, we are fine in this universe.
+  // To sum it up: the next line should never overflow.
+  exponent += (neg_exp ? -exp_number : exp_number);
+  return SUCCESS;
+}
+
+simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) {
+  // It is possible that the integer had an overflow.
+  // We have to handle the case where we have 0.0000somenumber.
+  const uint8_t *start = start_digits;
+  while ((*start == '0') || (*start == '.')) { ++start; }
+  // we over-decrement by one when there is a '.'
+  return digit_count - size_t(start - start_digits);
+}
+
+template<typename W>
+simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) {
+  // If we frequently had to deal with long strings of digits,
+  // we could extend our code by using a 128-bit integer instead
+  // of a 64-bit integer. However, this is uncommon in practice.
+  //
+  // 9999999999999999999 < 2**64 so we can accommodate 19 digits.
+  // If we have a decimal separator, then digit_count - 1 is the number of digits, but we
+  // may not have a decimal separator!
+  if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) {
+    // Ok, chances are good that we had an overflow!
+    // this is almost never going to get called!!!
+    // we start anew, going slowly!!!
+    // This will happen in the following examples:
+    // 10000000000000000000000000000000000000000000e+308
+    // 3.1415926535897932384626433832795028841971693993751
+    //
+    // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens
+    // because slow_float_parsing is a non-inlined function. If we passed our writer reference to
+    // it, it would force it to be stored in memory, preventing the compiler from picking it apart
+    // and putting into registers. i.e. if we pass it as reference, it gets slow.
+    // This is what forces the skip_double, as well.
+    error_code error = slow_float_parsing(src, writer);
+    writer.skip_double();
+    return error;
+  }
+  // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other
+  // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331
+  // To future reader: we'd love if someone found a better way, or at least could explain this result!
+  if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) {
+    //
+    // Important: smallest_power is such that it leads to a zero value.
+    // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero
+    // so something x 10^-343 goes to zero, but not so with  something x 10^-342.
+    static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough");
+    //
+    if((exponent < simdjson::internal::smallest_power) || (i == 0)) {
+      // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero
+      WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer);
+      return SUCCESS;
+    } else { // (exponent > largest_power) and (i != 0)
+      // We have, for sure, an infinite value and simdjson refuses to parse infinite values.
+      return INVALID_NUMBER(src);
+    }
+  }
+  double d;
+  if (!compute_float_64(exponent, i, negative, d)) {
+    // we are almost never going to get here.
+    if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); }
+  }
+  WRITE_DOUBLE(d, src, writer);
+  return SUCCESS;
+}
+
+// for performance analysis, it is sometimes  useful to skip parsing
+#ifdef SIMDJSON_SKIPNUMBERPARSING
+
+template<typename W>
+simdjson_inline error_code parse_number(const uint8_t *const, W &writer) {
+  writer.append_s64(0);        // always write zero
+  return SUCCESS;              // always succeeds
+}
+
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer_in_string(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<double> parse_double_in_string(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept  { return false; }
+simdjson_unused simdjson_inline simdjson_result<bool> is_integer(const uint8_t * src) noexcept  { return false; }
+simdjson_unused simdjson_inline simdjson_result<ondemand::number_type> get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; }
+#else
+
+// parse the number at src
+// define JSON_TEST_NUMBERS for unit testing
+//
+// It is assumed that the number is followed by a structural ({,},],[) character
+// or a white space character. If that is not the case (e.g., when the JSON
+// document is made of a single number), then it is necessary to copy the
+// content and append a space before calling this function.
+//
+// Our objective is accurate parsing (ULP of 0) at high speed.
+template<typename W>
+simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) {
+
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  const uint8_t *p = src + uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); }
+
+  //
+  // Handle floats if there is a . or e (or both)
+  //
+  int64_t exponent = 0;
+  bool is_float = false;
+  if ('.' == *p) {
+    is_float = true;
+    ++p;
+    SIMDJSON_TRY( parse_decimal(src, p, i, exponent) );
+    digit_count = int(p - start_digits); // used later to guard against overflows
+  }
+  if (('e' == *p) || ('E' == *p)) {
+    is_float = true;
+    ++p;
+    SIMDJSON_TRY( parse_exponent(src, p, exponent) );
+  }
+  if (is_float) {
+    const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p);
+    SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) );
+    if (dirty_end) { return INVALID_NUMBER(src); }
+    return SUCCESS;
+  }
+
+  // The longest negative 64-bit number is 19 digits.
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  size_t longest_digit_count = negative ? 19 : 20;
+  if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); }
+  if (digit_count == longest_digit_count) {
+    if (negative) {
+      // Anything negative above INT64_MAX+1 is invalid
+      if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src);  }
+      WRITE_INTEGER(~i+1, src, writer);
+      if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); }
+      return SUCCESS;
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    }  else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); }
+  }
+
+  // Write unsigned if it doesn't fit in a signed integer.
+  if (i > uint64_t(INT64_MAX)) {
+    WRITE_UNSIGNED(i, src, writer);
+  } else {
+    WRITE_INTEGER(negative ? (~i+1) : i, src, writer);
+  }
+  if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); }
+  return SUCCESS;
+}
+
+// Inlineable functions
+namespace {
+
+// This table can be used to characterize the final character of an integer
+// string. For JSON structural character and allowable white space characters,
+// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise
+// we return NUMBER_ERROR.
+// Optimization note: we could easily reduce the size of the table by half (to 128)
+// at the cost of an extra branch.
+// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits):
+static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast");
+static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast");
+static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast");
+
+const uint8_t integer_string_finisher[256] = {
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, SUCCESS,
+    SUCCESS,      NUMBER_ERROR,   NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   SUCCESS,      NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, SUCCESS,
+    NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, SUCCESS,        NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    SUCCESS,      NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR};
+
+// Parse any number from 0 to 18,446,744,073,709,551,615
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src) noexcept {
+  const uint8_t *p = src;
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > 20))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+
+  if (digit_count == 20) {
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
+  }
+
+  return i;
+}
+
+
+// Parse any number from 0 to 18,446,744,073,709,551,615
+// Never read at src_end or beyond
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept {
+  const uint8_t *p = src;
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while ((p != src_end) && parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > 20))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+
+  if (digit_count == 20) {
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
+  }
+
+  return i;
+}
+
+// Parse any number from 0 to 18,446,744,073,709,551,615
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned_in_string(const uint8_t * const src) noexcept {
+  const uint8_t *p = src + 1;
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > 20))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if (*p != '"') { return NUMBER_ERROR; }
+
+  if (digit_count == 20) {
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    // Note: we use src[1] and not src[0] because src[0] is the quote character in this
+    // instance.
+    if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
+  }
+
+  return i;
+}
+
+// Parse any number from  -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t *src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  const uint8_t *p = src + uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // We go from
+  // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+  // so we can never represent numbers that have more than 19 digits.
+  size_t longest_digit_count = 19;
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > longest_digit_count))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+  // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
+  // Performance note: This check is only needed when digit_count == longest_digit_count but it is
+  // so cheap that we might as well always make it.
+  if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
+  return negative ? (~i+1) : i;
+}
+
+// Parse any number from  -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+// Never read at src_end or beyond
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept {
+  //
+  // Check for minus sign
+  //
+  if(src == src_end) { return NUMBER_ERROR; }
+  bool negative = (*src == '-');
+  const uint8_t *p = src + uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while ((p != src_end) && parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // We go from
+  // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+  // so we can never represent numbers that have more than 19 digits.
+  size_t longest_digit_count = 19;
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > longest_digit_count))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+  // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
+  // Performance note: This check is only needed when digit_count == longest_digit_count but it is
+  // so cheap that we might as well always make it.
+  if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
+  return negative ? (~i+1) : i;
+}
+
+// Parse any number from  -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer_in_string(const uint8_t *src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*(src + 1) == '-');
+  src += uint8_t(negative) + 1;
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = src;
+  uint64_t i = 0;
+  while (parse_digit(*src, i)) { src++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(src - start_digits);
+  // We go from
+  // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+  // so we can never represent numbers that have more than 19 digits.
+  size_t longest_digit_count = 19;
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > longest_digit_count))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*src)) {
+  //  return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if(*src != '"') { return NUMBER_ERROR; }
+  // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
+  // Performance note: This check is only needed when digit_count == longest_digit_count but it is
+  // so cheap that we might as well always make it.
+  if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
+  return negative ? (~i+1) : i;
+}
+
+simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  uint64_t i = 0;
+  const uint8_t *p = src;
+  p += parse_digit(*p, i);
+  bool leading_zero = (i == 0);
+  while (parse_digit(*p, i)) { p++; }
+  // no integer digits, or 0123 (zero must be solo)
+  if ( p == src ) { return INCORRECT_TYPE; }
+  if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; }
+
+  //
+  // Parse the decimal part.
+  //
+  int64_t exponent = 0;
+  bool overflow;
+  if (simdjson_likely(*p == '.')) {
+    p++;
+    const uint8_t *start_decimal_digits = p;
+    if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits
+    p++;
+    while (parse_digit(*p, i)) { p++; }
+    exponent = -(p - start_decimal_digits);
+
+    // Overflow check. More than 19 digits (minus the decimal) may be overflow.
+    overflow = p-src-1 > 19;
+    if (simdjson_unlikely(overflow && leading_zero)) {
+      // Skip leading 0.00000 and see if it still overflows
+      const uint8_t *start_digits = src + 2;
+      while (*start_digits == '0') { start_digits++; }
+      overflow = start_digits-src > 19;
+    }
+  } else {
+    overflow = p-src > 19;
+  }
+
+  //
+  // Parse the exponent
+  //
+  if (*p == 'e' || *p == 'E') {
+    p++;
+    bool exp_neg = *p == '-';
+    p += exp_neg || *p == '+';
+
+    uint64_t exp = 0;
+    const uint8_t *start_exp_digits = p;
+    while (parse_digit(*p, exp)) { p++; }
+    // no exp digits, or 20+ exp digits
+    if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }
+
+    exponent += exp_neg ? 0-exp : exp;
+  }
+
+  if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; }
+
+  overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power;
+
+  //
+  // Assemble (or slow-parse) the float
+  //
+  double d;
+  if (simdjson_likely(!overflow)) {
+    if (compute_float_64(exponent, i, negative, d)) { return d; }
+  }
+  if (!parse_float_fallback(src - uint8_t(negative), &d)) {
+    return NUMBER_ERROR;
+  }
+  return d;
+}
+
+simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept {
+  return (*src == '-');
+}
+
+simdjson_unused simdjson_inline simdjson_result<bool> is_integer(const uint8_t * src) noexcept {
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+  const uint8_t *p = src;
+  while(static_cast<uint8_t>(*p - '0') <= 9) { p++; }
+  if ( p == src ) { return NUMBER_ERROR; }
+  if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; }
+  return false;
+}
+
+simdjson_unused simdjson_inline simdjson_result<ondemand::number_type> get_number_type(const uint8_t * src) noexcept {
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+  const uint8_t *p = src;
+  while(static_cast<uint8_t>(*p - '0') <= 9) { p++; }
+  if ( p == src ) { return NUMBER_ERROR; }
+  if (jsoncharutils::is_structural_or_whitespace(*p)) {
+    // We have an integer.
+    // If the number is negative and valid, it must be a signed integer.
+    if(negative) { return ondemand::number_type::signed_integer; }
+    // We want values larger or equal to 9223372036854775808 to be unsigned
+    // integers, and the other values to be signed integers.
+    int digit_count = int(p - src);
+    if(digit_count >= 19) {
+      const uint8_t * smaller_big_integer = reinterpret_cast<const uint8_t *>("9223372036854775808");
+      if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) {
+        return ondemand::number_type::unsigned_integer;
+      }
+    }
+    return ondemand::number_type::signed_integer;
+  }
+  // Hopefully, we have 'e' or 'E' or '.'.
+  return ondemand::number_type::floating_point_number;
+}
+
+// Never read at src_end or beyond
+simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept {
+  if(src == src_end) { return NUMBER_ERROR; }
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  uint64_t i = 0;
+  const uint8_t *p = src;
+  if(p == src_end) { return NUMBER_ERROR; }
+  p += parse_digit(*p, i);
+  bool leading_zero = (i == 0);
+  while ((p != src_end) && parse_digit(*p, i)) { p++; }
+  // no integer digits, or 0123 (zero must be solo)
+  if ( p == src ) { return INCORRECT_TYPE; }
+  if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; }
+
+  //
+  // Parse the decimal part.
+  //
+  int64_t exponent = 0;
+  bool overflow;
+  if (simdjson_likely((p != src_end) && (*p == '.'))) {
+    p++;
+    const uint8_t *start_decimal_digits = p;
+    if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits
+    p++;
+    while ((p != src_end) && parse_digit(*p, i)) { p++; }
+    exponent = -(p - start_decimal_digits);
+
+    // Overflow check. More than 19 digits (minus the decimal) may be overflow.
+    overflow = p-src-1 > 19;
+    if (simdjson_unlikely(overflow && leading_zero)) {
+      // Skip leading 0.00000 and see if it still overflows
+      const uint8_t *start_digits = src + 2;
+      while (*start_digits == '0') { start_digits++; }
+      overflow = start_digits-src > 19;
+    }
+  } else {
+    overflow = p-src > 19;
+  }
+
+  //
+  // Parse the exponent
+  //
+  if ((p != src_end) && (*p == 'e' || *p == 'E')) {
+    p++;
+    if(p == src_end) { return NUMBER_ERROR; }
+    bool exp_neg = *p == '-';
+    p += exp_neg || *p == '+';
+
+    uint64_t exp = 0;
+    const uint8_t *start_exp_digits = p;
+    while ((p != src_end) && parse_digit(*p, exp)) { p++; }
+    // no exp digits, or 20+ exp digits
+    if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }
+
+    exponent += exp_neg ? 0-exp : exp;
+  }
+
+  if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; }
+
+  overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power;
+
+  //
+  // Assemble (or slow-parse) the float
+  //
+  double d;
+  if (simdjson_likely(!overflow)) {
+    if (compute_float_64(exponent, i, negative, d)) { return d; }
+  }
+  if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) {
+    return NUMBER_ERROR;
+  }
+  return d;
+}
+
+simdjson_unused simdjson_inline simdjson_result<double> parse_double_in_string(const uint8_t * src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*(src + 1) == '-');
+  src += uint8_t(negative) + 1;
+
+  //
+  // Parse the integer part.
+  //
+  uint64_t i = 0;
+  const uint8_t *p = src;
+  p += parse_digit(*p, i);
+  bool leading_zero = (i == 0);
+  while (parse_digit(*p, i)) { p++; }
+  // no integer digits, or 0123 (zero must be solo)
+  if ( p == src ) { return INCORRECT_TYPE; }
+  if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; }
+
+  //
+  // Parse the decimal part.
+  //
+  int64_t exponent = 0;
+  bool overflow;
+  if (simdjson_likely(*p == '.')) {
+    p++;
+    const uint8_t *start_decimal_digits = p;
+    if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits
+    p++;
+    while (parse_digit(*p, i)) { p++; }
+    exponent = -(p - start_decimal_digits);
+
+    // Overflow check. More than 19 digits (minus the decimal) may be overflow.
+    overflow = p-src-1 > 19;
+    if (simdjson_unlikely(overflow && leading_zero)) {
+      // Skip leading 0.00000 and see if it still overflows
+      const uint8_t *start_digits = src + 2;
+      while (*start_digits == '0') { start_digits++; }
+      overflow = start_digits-src > 19;
+    }
+  } else {
+    overflow = p-src > 19;
+  }
+
+  //
+  // Parse the exponent
+  //
+  if (*p == 'e' || *p == 'E') {
+    p++;
+    bool exp_neg = *p == '-';
+    p += exp_neg || *p == '+';
+
+    uint64_t exp = 0;
+    const uint8_t *start_exp_digits = p;
+    while (parse_digit(*p, exp)) { p++; }
+    // no exp digits, or 20+ exp digits
+    if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }
+
+    exponent += exp_neg ? 0-exp : exp;
+  }
+
+  if (*p != '"') { return NUMBER_ERROR; }
+
+  overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power;
+
+  //
+  // Assemble (or slow-parse) the float
+  //
+  double d;
+  if (simdjson_likely(!overflow)) {
+    if (compute_float_64(exponent, i, negative, d)) { return d; }
+  }
+  if (!parse_float_fallback(src - uint8_t(negative), &d)) {
+    return NUMBER_ERROR;
+  }
+  return d;
+}
+} //namespace {}
+#endif // SIMDJSON_SKIPNUMBERPARSING
+
+} // namespace numberparsing
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdjson
+/* end file include/simdjson/generic/numberparsing.h */
+
+#endif //  SIMDJSON_WESTMERE_NUMBERPARSING_H
+/* end file include/simdjson/westmere/numberparsing.h */
+/* begin file include/simdjson/westmere/end.h */
+SIMDJSON_UNTARGET_WESTMERE
+/* end file include/simdjson/westmere/end.h */
+
+#endif // SIMDJSON_IMPLEMENTATION_WESTMERE
+#endif // SIMDJSON_WESTMERE_COMMON_H
+/* end file include/simdjson/westmere.h */
+
+// Builtin implementation
+
+SIMDJSON_POP_DISABLE_WARNINGS
+
+#endif // SIMDJSON_IMPLEMENTATIONS_H
+/* end file include/simdjson/implementations.h */
+
+// Determine the best builtin implementation
+#ifndef SIMDJSON_BUILTIN_IMPLEMENTATION
+#if SIMDJSON_CAN_ALWAYS_RUN_ICELAKE
+#define SIMDJSON_BUILTIN_IMPLEMENTATION icelake
+#elif SIMDJSON_CAN_ALWAYS_RUN_HASWELL
+#define SIMDJSON_BUILTIN_IMPLEMENTATION haswell
+#elif SIMDJSON_CAN_ALWAYS_RUN_WESTMERE
+#define SIMDJSON_BUILTIN_IMPLEMENTATION westmere
+#elif SIMDJSON_CAN_ALWAYS_RUN_ARM64
+#define SIMDJSON_BUILTIN_IMPLEMENTATION arm64
+#elif SIMDJSON_CAN_ALWAYS_RUN_PPC64
+#define SIMDJSON_BUILTIN_IMPLEMENTATION ppc64
+#elif SIMDJSON_CAN_ALWAYS_RUN_FALLBACK
+#define SIMDJSON_BUILTIN_IMPLEMENTATION fallback
+#else
+#error "All possible implementations (including fallback) have been disabled! simdjson will not run."
+#endif
+#endif // SIMDJSON_BUILTIN_IMPLEMENTATION
+
+// redefining SIMDJSON_IMPLEMENTATION to "SIMDJSON_BUILTIN_IMPLEMENTATION"
+// #define SIMDJSON_IMPLEMENTATION SIMDJSON_BUILTIN_IMPLEMENTATION
+
+// ondemand is only compiled as part of the builtin implementation at present
+
+// Interface declarations
+/* begin file include/simdjson/generic/implementation_simdjson_result_base.h */
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+
+// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair
+// so we can avoid inlining errors
+// TODO reconcile these!
+/**
+ * The result of a simdjson operation that could fail.
+ *
+ * Gives the option of reading error codes, or throwing an exception by casting to the desired result.
+ *
+ * This is a base class for implementations that want to add functions to the result type for
+ * chaining.
+ *
+ * Override like:
+ *
+ *   struct simdjson_result<T> : public internal::implementation_simdjson_result_base<T> {
+ *     simdjson_result() noexcept : internal::implementation_simdjson_result_base<T>() {}
+ *     simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base<T>(error) {}
+ *     simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base<T>(std::forward(value)) {}
+ *     simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base<T>(value, error) {}
+ *     // Your extra methods here
+ *   }
+ *
+ * Then any method returning simdjson_result<T> will be chainable with your methods.
+ */
+template<typename T>
+struct implementation_simdjson_result_base {
+
+  /**
+   * Create a new empty result with error = UNINITIALIZED.
+   */
+  simdjson_inline implementation_simdjson_result_base() noexcept = default;
+
+  /**
+   * Create a new error result.
+   */
+  simdjson_inline implementation_simdjson_result_base(error_code error) noexcept;
+
+  /**
+   * Create a new successful result.
+   */
+  simdjson_inline implementation_simdjson_result_base(T &&value) noexcept;
+
+  /**
+   * Create a new result with both things (use if you don't want to branch when creating the result).
+   */
+  simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept;
+
+  /**
+   * Move the value and the error to the provided variables.
+   *
+   * @param value The variable to assign the value to. May not be set if there is an error.
+   * @param error The variable to assign the error to. Set to SUCCESS if there is no error.
+   */
+  simdjson_inline void tie(T &value, error_code &error) && noexcept;
+
+  /**
+   * Move the value to the provided variable.
+   *
+   * @param value The variable to assign the value to. May not be set if there is an error.
+   */
+  simdjson_inline error_code get(T &value) && noexcept;
+
+  /**
+   * The error.
+   */
+  simdjson_inline error_code error() const noexcept;
+
+#if SIMDJSON_EXCEPTIONS
+
+  /**
+   * Get the result value.
+   *
+   * @throw simdjson_error if there was an error.
+   */
+  simdjson_inline T& value() & noexcept(false);
+
+  /**
+   * Take the result value (move it).
+   *
+   * @throw simdjson_error if there was an error.
+   */
+  simdjson_inline T&& value() && noexcept(false);
+
+  /**
+   * Take the result value (move it).
+   *
+   * @throw simdjson_error if there was an error.
+   */
+  simdjson_inline T&& take_value() && noexcept(false);
+
+  /**
+   * Cast to the value (will throw on error).
+   *
+   * @throw simdjson_error if there was an error.
+   */
+  simdjson_inline operator T&&() && noexcept(false);
+
+
+#endif // SIMDJSON_EXCEPTIONS
+
+  /**
+   * Get the result value. This function is safe if and only
+   * the error() method returns a value that evaluates to false.
+   */
+  simdjson_inline const T& value_unsafe() const& noexcept;
+  /**
+   * Get the result value. This function is safe if and only
+   * the error() method returns a value that evaluates to false.
+   */
+  simdjson_inline T& value_unsafe() & noexcept;
+  /**
+   * Take the result value (move it). This function is safe if and only
+   * the error() method returns a value that evaluates to false.
+   */
+  simdjson_inline T&& value_unsafe() && noexcept;
+protected:
+  /** users should never directly access first and second. **/
+  T first{}; /** Users should never directly access 'first'. **/
+  error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/
+}; // struct implementation_simdjson_result_base
+
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+/* end file include/simdjson/generic/implementation_simdjson_result_base.h */
+/* begin file include/simdjson/generic/ondemand.h */
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+/**
+ * A fast, simple, DOM-like interface that parses JSON as you use it.
+ *
+ * Designed for maximum speed and a lower memory profile.
+ */
+namespace ondemand {
+
+/** Represents the depth of a JSON value (number of nested arrays/objects). */
+using depth_t = int32_t;
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+/* begin file include/simdjson/generic/ondemand/json_type.h */
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+/**
+ * The type of a JSON value.
+ */
+enum class json_type {
+    // Start at 1 to catch uninitialized / default values more easily
+    array=1, ///< A JSON array   ( [ 1, 2, 3 ... ] )
+    object,  ///< A JSON object  ( { "a": 1, "b" 2, ... } )
+    number,  ///< A JSON number  ( 1 or -2.3 or 4.5e6 ...)
+    string,  ///< A JSON string  ( "a" or "hello world\n" ...)
+    boolean, ///< A JSON boolean (true or false)
+    null     ///< A JSON null    (null)
+};
+
+class value_iterator;
+
+/**
+ * A type representing a JSON number.
+ * The design of the struct is deliberately straight-forward. All
+ * functions return standard values with no error check.
+ */
+struct number {
+
+  /**
+   * return the automatically determined type of
+   * the number: number_type::floating_point_number,
+   * number_type::signed_integer or number_type::unsigned_integer.
+   *
+   *    enum class number_type {
+   *        floating_point_number=1, /// a binary64 number
+   *        signed_integer,          /// a signed integer that fits in a 64-bit word using two's complement
+   *        unsigned_integer         /// a positive integer larger or equal to 1<<63
+   *    };
+   */
+  simdjson_inline number_type get_number_type() const noexcept;
+  /**
+   * return true if the automatically determined type of
+   * the number is number_type::unsigned_integer.
+   */
+  simdjson_inline bool is_uint64() const noexcept;
+  /**
+   * return the value as a uint64_t, only valid if is_uint64() is true.
+   */
+  simdjson_inline uint64_t get_uint64() const noexcept;
+  simdjson_inline operator uint64_t() const noexcept;
+
+  /**
+   * return true if the automatically determined type of
+   * the number is number_type::signed_integer.
+   */
+  simdjson_inline bool is_int64() const noexcept;
+  /**
+   * return the value as a int64_t, only valid if is_int64() is true.
+   */
+  simdjson_inline int64_t get_int64() const noexcept;
+  simdjson_inline operator int64_t() const noexcept;
+
+
+  /**
+   * return true if the automatically determined type of
+   * the number is number_type::floating_point_number.
+   */
+  simdjson_inline bool is_double() const noexcept;
+  /**
+   * return the value as a double, only valid if is_double() is true.
+   */
+  simdjson_inline double get_double() const noexcept;
+  simdjson_inline operator double() const noexcept;
+
+  /**
+   * Convert the number to a double. Though it always succeed, the conversion
+   * may be lossy if the number cannot be represented exactly.
+   */
+  simdjson_inline double as_double() const noexcept;
+
+
+protected:
+  /**
+   * The next block of declaration is designed so that we can call the number parsing
+   * functions on a number type. They are protected and should never be used outside
+   * of the core simdjson library.
+   */
+  friend class value_iterator;
+  template<typename W>
+  friend error_code numberparsing::write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer);
+  template<typename W>
+  friend error_code numberparsing::parse_number(const uint8_t *const src, W &writer);
+  template<typename W>
+  friend error_code numberparsing::slow_float_parsing(simdjson_unused const uint8_t * src, W writer);
+  /** Store a signed 64-bit value to the number. */
+  simdjson_inline void append_s64(int64_t value) noexcept;
+  /** Store an unsigned 64-bit value to the number. */
+  simdjson_inline void append_u64(uint64_t value) noexcept;
+  /** Store a double value to the number. */
+  simdjson_inline void append_double(double value) noexcept;
+  /** Specifies that the value is a double, but leave it undefined. */
+  simdjson_inline void skip_double() noexcept;
+  /**
+   * End of friend declarations.
+   */
+
+  /**
+   * Our attributes are a union type (size = 64 bits)
+   * followed by a type indicator.
+   */
+  union {
+    double floating_point_number;
+    int64_t signed_integer;
+    uint64_t unsigned_integer;
+  } payload{0};
+  number_type type{number_type::signed_integer};
+};
+
+/**
+ * Write the JSON type to the output stream
+ *
+ * @param out The output stream.
+ * @param type The json_type.
+ */
+inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept;
+inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept;
+
+#if SIMDJSON_EXCEPTIONS
+/**
+ * Send JSON type to an output stream.
+ *
+ * @param out The output stream.
+ * @param type The json_type.
+ * @throw simdjson_error if the result being printed has an error. If there is an error with the
+ *        underlying output stream, that error will be propagated (simdjson_error will not be
+ *        thrown).
+ */
+inline std::ostream& operator<<(std::ostream& out, simdjson_result<json_type> &type) noexcept(false);
+#endif
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+template<>
+struct simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type> : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type> {
+public:
+  simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type &&value) noexcept; ///< @private
+  simdjson_inline simdjson_result(error_code error) noexcept; ///< @private
+  simdjson_inline simdjson_result() noexcept = default;
+  simdjson_inline ~simdjson_result() noexcept = default; ///< @private
+};
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/json_type.h */
+/* begin file include/simdjson/generic/ondemand/token_position.h */
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+/** @private Position in the JSON buffer indexes */
+using token_position = const uint32_t *;
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/token_position.h */
+/* begin file include/simdjson/generic/ondemand/logger.h */
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+class json_iterator;
+class value_iterator;
+
+namespace logger {
+
+#if SIMDJSON_VERBOSE_LOGGING
+  static constexpr const bool LOG_ENABLED = true;
+#else
+  static constexpr const bool LOG_ENABLED = false;
+#endif
+
+// We do not want these functions to be 'really inlined' since real inlining is
+// for performance purposes and if you are using the loggers, you do not care about
+// performance (or should not).
+static inline void log_headers() noexcept;
+static inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail) noexcept;
+static inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept;
+static inline void log_event(const json_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept;
+static inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept;
+static inline void log_value(const json_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept;
+static inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept;
+static inline void log_start_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept;
+static inline void log_end_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept;
+static inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail="") noexcept;
+static inline void log_error(const json_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept;
+
+static inline void log_event(const value_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept;
+static inline void log_value(const value_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept;
+static inline void log_start_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept;
+static inline void log_end_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept;
+static inline void log_error(const value_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept;
+
+} // namespace logger
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/logger.h */
+/* begin file include/simdjson/generic/ondemand/raw_json_string.h */
+
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+class object;
+class parser;
+class json_iterator;
+
+/**
+ * A string escaped per JSON rules, terminated with quote ("). They are used to represent
+ * unescaped keys inside JSON documents.
+ *
+ * (In other words, a pointer to the beginning of a string, just after the start quote, inside a
+ * JSON file.)
+ *
+ * This class is deliberately simplistic and has little functionality. You can
+ * compare a raw_json_string instance with an unescaped C string, but
+ * that is nearly all you can do.
+ *
+ * The raw_json_string is unescaped. If you wish to write an unescaped version of it to your own
+ * buffer, you may do so using the parser.unescape(string, buff) method, using an ondemand::parser
+ * instance. Doing so requires you to have a sufficiently large buffer.
+ *
+ * The raw_json_string instances originate typically from field instance which in turn represent
+ * key-value pairs from object instances. From a field instance, you get the raw_json_string
+ * instance by calling key(). You can, if you want a more usable string_view instance, call
+ * the unescaped_key() method on the field instance. You may also create a raw_json_string from
+ * any other string value, with the value.get_raw_json_string() method. Again, you can get
+ * a more usable string_view instance by calling get_string().
+ *
+ */
+class raw_json_string {
+public:
+  /**
+   * Create a new invalid raw_json_string.
+   *
+   * Exists so you can declare a variable and later assign to it before use.
+   */
+  simdjson_inline raw_json_string() noexcept = default;
+
+  /**
+   * Create a new invalid raw_json_string pointed at the given location in the JSON.
+   *
+   * The given location must be just *after* the beginning quote (") in the JSON file.
+   *
+   * It *must* be terminated by a ", and be a valid JSON string.
+   */
+  simdjson_inline raw_json_string(const uint8_t * _buf) noexcept;
+  /**
+   * Get the raw pointer to the beginning of the string in the JSON (just after the ").
+   *
+   * It is possible for this function to return a null pointer if the instance
+   * has outlived its existence.
+   */
+  simdjson_inline const char * raw() const noexcept;
+
+  /**
+   * This compares the current instance to the std::string_view target: returns true if
+   * they are byte-by-byte equal (no escaping is done) on target.size() characters,
+   * and if the raw_json_string instance has a quote character at byte index target.size().
+   * We never read more than length + 1 bytes in the raw_json_string instance.
+   * If length is smaller than target.size(), this will return false.
+   *
+   * The std::string_view instance may contain any characters. However, the caller
+   * is responsible for setting length so that length bytes may be read in the
+   * raw_json_string.
+   *
+   * Performance: the comparison may be done using memcmp which may be efficient
+   * for long strings.
+   */
+  simdjson_inline bool unsafe_is_equal(size_t length, std::string_view target) const noexcept;
+
+  /**
+   * This compares the current instance to the std::string_view target: returns true if
+   * they are byte-by-byte equal (no escaping is done).
+   * The std::string_view instance should not contain unescaped quote characters:
+   * the caller is responsible for this check. See is_free_from_unescaped_quote.
+   *
+   * Performance: the comparison is done byte-by-byte which might be inefficient for
+   * long strings.
+   *
+   * If target is a compile-time constant, and your compiler likes you,
+   * you should be able to do the following without performance penalty...
+   *
+   *   static_assert(raw_json_string::is_free_from_unescaped_quote(target), "");
+   *   s.unsafe_is_equal(target);
+   */
+  simdjson_inline bool unsafe_is_equal(std::string_view target) const noexcept;
+
+  /**
+   * This compares the current instance to the C string target: returns true if
+   * they are byte-by-byte equal (no escaping is done).
+   * The provided C string should not contain an unescaped quote character:
+   * the caller is responsible for this check. See is_free_from_unescaped_quote.
+   *
+   * If target is a compile-time constant, and your compiler likes you,
+   * you should be able to do the following without performance penalty...
+   *
+   *   static_assert(raw_json_string::is_free_from_unescaped_quote(target), "");
+   *   s.unsafe_is_equal(target);
+   */
+  simdjson_inline bool unsafe_is_equal(const char* target) const noexcept;
+
+  /**
+   * This compares the current instance to the std::string_view target: returns true if
+   * they are byte-by-byte equal (no escaping is done).
+   */
+  simdjson_inline bool is_equal(std::string_view target) const noexcept;
+
+  /**
+   * This compares the current instance to the C string target: returns true if
+   * they are byte-by-byte equal (no escaping is done).
+   */
+  simdjson_inline bool is_equal(const char* target) const noexcept;
+
+  /**
+   * Returns true if target is free from unescaped quote. If target is known at
+   * compile-time, we might expect the computation to happen at compile time with
+   * many compilers (not all!).
+   */
+  static simdjson_inline bool is_free_from_unescaped_quote(std::string_view target) noexcept;
+  static simdjson_inline bool is_free_from_unescaped_quote(const char* target) noexcept;
+
+private:
+
+
+  /**
+   * This will set the inner pointer to zero, effectively making
+   * this instance unusable.
+   */
+  simdjson_inline void consume() noexcept { buf = nullptr; }
+
+  /**
+   * Checks whether the inner pointer is non-null and thus usable.
+   */
+  simdjson_inline simdjson_warn_unused bool alive() const noexcept { return buf != nullptr; }
+
+  /**
+   * Unescape this JSON string, replacing \\ with \, \n with newline, etc.
+   *
+   * ## IMPORTANT: string_view lifetime
+   *
+   * The string_view is only valid until the next parse() call on the parser.
+   *
+   * @param iter A json_iterator, which contains a buffer where the string will be written.
+   */
+  simdjson_inline simdjson_warn_unused simdjson_result<std::string_view> unescape(json_iterator &iter) const noexcept;
+
+  const uint8_t * buf{};
+  friend class object;
+  friend class field;
+  friend class parser;
+  friend struct simdjson_result<raw_json_string>;
+};
+
+simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &, const raw_json_string &) noexcept;
+
+/**
+ * Comparisons between raw_json_string and std::string_view instances are potentially unsafe: the user is responsible
+ * for providing a string with no unescaped quote. Note that unescaped quotes cannot be present in valid JSON strings.
+ */
+simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept;
+simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept;
+simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept;
+simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept;
+
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+template<>
+struct simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string> : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string> {
+public:
+  simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string &&value) noexcept; ///< @private
+  simdjson_inline simdjson_result(error_code error) noexcept; ///< @private
+  simdjson_inline simdjson_result() noexcept = default;
+  simdjson_inline ~simdjson_result() noexcept = default; ///< @private
+
+  simdjson_inline simdjson_result<const char *> raw() const noexcept;
+  simdjson_inline simdjson_warn_unused simdjson_result<std::string_view> unescape(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator &iter) const noexcept;
+};
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/raw_json_string.h */
+/* begin file include/simdjson/generic/ondemand/token_iterator.h */
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+/**
+ * Iterates through JSON tokens (`{` `}` `[` `]` `,` `:` `"<string>"` `123` `true` `false` `null`)
+ * detected by stage 1.
+ *
+ * @private This is not intended for external use.
+ */
+class token_iterator {
+public:
+  /**
+   * Create a new invalid token_iterator.
+   *
+   * Exists so you can declare a variable and later assign to it before use.
+   */
+  simdjson_inline token_iterator() noexcept = default;
+  simdjson_inline token_iterator(token_iterator &&other) noexcept = default;
+  simdjson_inline token_iterator &operator=(token_iterator &&other) noexcept = default;
+  simdjson_inline token_iterator(const token_iterator &other) noexcept = default;
+  simdjson_inline token_iterator &operator=(const token_iterator &other) noexcept = default;
+
+  /**
+   * Advance to the next token (returning the current one).
+   */
+  simdjson_inline const uint8_t *return_current_and_advance() noexcept;
+  /**
+   * Reports the current offset in bytes from the start of the underlying buffer.
+   */
+  simdjson_inline uint32_t current_offset() const noexcept;
+  /**
+   * Get the JSON text for a given token (relative).
+   *
+   * This is not null-terminated; it is a view into the JSON.
+   *
+   * @param delta The relative position of the token to retrieve. e.g. 0 = current token,
+   *              1 = next token, -1 = prev token.
+   *
+   * TODO consider a string_view, assuming the length will get stripped out by the optimizer when
+   * it isn't used ...
+   */
+  simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept;
+  /**
+   * Get the maximum length of the JSON text for a given token.
+   *
+   * The length will include any whitespace at the end of the token.
+   *
+   * @param delta The relative position of the token to retrieve. e.g. 0 = current token,
+   *              1 = next token, -1 = prev token.
+   */
+  simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept;
+
+  /**
+   * Get the JSON text for a given token.
+   *
+   * This is not null-terminated; it is a view into the JSON.
+   *
+   * @param position The position of the token.
+   *
+   */
+  simdjson_inline const uint8_t *peek(token_position position) const noexcept;
+  /**
+   * Get the maximum length of the JSON text for a given token.
+   *
+   * The length will include any whitespace at the end of the token.
+   *
+   * @param position The position of the token.
+   */
+  simdjson_inline uint32_t peek_length(token_position position) const noexcept;
+
+  /**
+   * Return the current index.
+   */
+  simdjson_inline token_position position() const noexcept;
+  /**
+   * Reset to a previously saved index.
+   */
+  simdjson_inline void set_position(token_position target_position) noexcept;
+
+  // NOTE: we don't support a full C++ iterator interface, because we expect people to make
+  // different calls to advance the iterator based on *their own* state.
+
+  simdjson_inline bool operator==(const token_iterator &other) const noexcept;
+  simdjson_inline bool operator!=(const token_iterator &other) const noexcept;
+  simdjson_inline bool operator>(const token_iterator &other) const noexcept;
+  simdjson_inline bool operator>=(const token_iterator &other) const noexcept;
+  simdjson_inline bool operator<(const token_iterator &other) const noexcept;
+  simdjson_inline bool operator<=(const token_iterator &other) const noexcept;
+
+protected:
+  simdjson_inline token_iterator(const uint8_t *buf, token_position position) noexcept;
+
+  /**
+   * Get the index of the JSON text for a given token (relative).
+   *
+   * This is not null-terminated; it is a view into the JSON.
+   *
+   * @param delta The relative position of the token to retrieve. e.g. 0 = current token,
+   *              1 = next token, -1 = prev token.
+   */
+  simdjson_inline uint32_t peek_index(int32_t delta=0) const noexcept;
+  /**
+   * Get the index of the JSON text for a given token.
+   *
+   * This is not null-terminated; it is a view into the JSON.
+   *
+   * @param position The position of the token.
+   *
+   */
+  simdjson_inline uint32_t peek_index(token_position position) const noexcept;
+
+  const uint8_t *buf{};
+  token_position _position{};
+
+  friend class json_iterator;
+  friend class value_iterator;
+  friend class object;
+  friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept;
+  friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail) noexcept;
+};
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+template<>
+struct simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::token_iterator> : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::token_iterator> {
+public:
+  simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::token_iterator &&value) noexcept; ///< @private
+  simdjson_inline simdjson_result(error_code error) noexcept; ///< @private
+  simdjson_inline simdjson_result() noexcept = default;
+  simdjson_inline ~simdjson_result() noexcept = default; ///< @private
+};
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/token_iterator.h */
+/* begin file include/simdjson/generic/ondemand/json_iterator.h */
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+class document;
+class document_stream;
+class object;
+class array;
+class value;
+class raw_json_string;
+class parser;
+
+/**
+ * Iterates through JSON tokens, keeping track of depth and string buffer.
+ *
+ * @private This is not intended for external use.
+ */
+class json_iterator {
+protected:
+  token_iterator token{};
+  ondemand::parser *parser{};
+  /**
+   * Next free location in the string buffer.
+   *
+   * Used by raw_json_string::unescape() to have a place to unescape strings to.
+   */
+  uint8_t *_string_buf_loc{};
+  /**
+   * JSON error, if there is one.
+   *
+   * INCORRECT_TYPE and NO_SUCH_FIELD are *not* stored here, ever.
+   *
+   * PERF NOTE: we *hope* this will be elided into control flow, as it is only used (a) in the first
+   * iteration of the loop, or (b) for the final iteration after a missing comma is found in ++. If
+   * this is not elided, we should make sure it's at least not using up a register. Failing that,
+   * we should store it in document so there's only one of them.
+   */
+  error_code error{SUCCESS};
+  /**
+   * Depth of the current token in the JSON.
+   *
+   * - 0 = finished with document
+   * - 1 = document root value (could be [ or {, not yet known)
+   * - 2 = , or } inside root array/object
+   * - 3 = key or value inside root array/object.
+   */
+  depth_t _depth{};
+  /**
+   * Beginning of the document indexes.
+   * Normally we have root == parser->implementation->structural_indexes.get()
+   * but this may differ, especially in streaming mode (where we have several
+   * documents);
+   */
+  token_position _root{};
+  /**
+   * Normally, a json_iterator operates over a single document, but in
+   * some cases, we may have a stream of documents. This attribute is meant
+   * as meta-data: the json_iterator works the same irrespective of the
+   * value of this attribute.
+   */
+  bool _streaming{false};
+
+public:
+  simdjson_inline json_iterator() noexcept = default;
+  simdjson_inline json_iterator(json_iterator &&other) noexcept;
+  simdjson_inline json_iterator &operator=(json_iterator &&other) noexcept;
+  simdjson_inline explicit json_iterator(const json_iterator &other) noexcept = default;
+  simdjson_inline json_iterator &operator=(const json_iterator &other) noexcept = default;
+  /**
+   * Skips a JSON value, whether it is a scalar, array or object.
+   */
+  simdjson_warn_unused simdjson_inline error_code skip_child(depth_t parent_depth) noexcept;
+
+  /**
+   * Tell whether the iterator is still at the start
+   */
+  simdjson_inline bool at_root() const noexcept;
+
+  /**
+   * Tell whether we should be expected to run in streaming
+   * mode (iterating over many documents). It is pure metadata
+   * that does not affect how the iterator works. It is used by
+   * start_root_array() and start_root_object().
+   */
+  simdjson_inline bool streaming() const noexcept;
+
+  /**
+   * Get the root value iterator
+   */
+  simdjson_inline token_position root_position() const noexcept;
+  /**
+   * Assert that we are at the document depth (== 1)
+   */
+  simdjson_inline void assert_at_document_depth() const noexcept;
+  /**
+   * Assert that we are at the root of the document
+   */
+  simdjson_inline void assert_at_root() const noexcept;
+
+  /**
+   * Tell whether the iterator is at the EOF mark
+   */
+  simdjson_inline bool at_end() const noexcept;
+
+  /**
+   * Tell whether the iterator is live (has not been moved).
+   */
+  simdjson_inline bool is_alive() const noexcept;
+
+  /**
+   * Abandon this iterator, setting depth to 0 (as if the document is finished).
+   */
+  simdjson_inline void abandon() noexcept;
+
+  /**
+   * Advance the current token without modifying depth.
+   */
+  simdjson_inline const uint8_t *return_current_and_advance() noexcept;
+
+  /**
+   * Returns true if there is a single token in the index (i.e., it is
+   * a JSON with a scalar value such as a single number).
+   *
+   * @return whether there is a single token
+   */
+  simdjson_inline bool is_single_token() const noexcept;
+
+  /**
+   * Assert that there are at least the given number of tokens left.
+   *
+   * Has no effect in release builds.
+   */
+  simdjson_inline void assert_more_tokens(uint32_t required_tokens=1) const noexcept;
+  /**
+   * Assert that the given position addresses an actual token (is within bounds).
+   *
+   * Has no effect in release builds.
+   */
+  simdjson_inline void assert_valid_position(token_position position) const noexcept;
+  /**
+   * Get the JSON text for a given token (relative).
+   *
+   * This is not null-terminated; it is a view into the JSON.
+   *
+   * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token.
+   *
+   * TODO consider a string_view, assuming the length will get stripped out by the optimizer when
+   * it isn't used ...
+   */
+  simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept;
+  /**
+   * Get the maximum length of the JSON text for the current token (or relative).
+   *
+   * The length will include any whitespace at the end of the token.
+   *
+   * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token.
+   */
+  simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept;
+  /**
+   * Get a pointer to the current location in the input buffer.
+   *
+   * This is not null-terminated; it is a view into the JSON.
+   *
+   * You may be pointing outside of the input buffer: it is not generally
+   * safe to dereference this pointer.
+   */
+  simdjson_inline const uint8_t *unsafe_pointer() const noexcept;
+  /**
+   * Get the JSON text for a given token.
+   *
+   * This is not null-terminated; it is a view into the JSON.
+   *
+   * @param position The position of the token to retrieve.
+   *
+   * TODO consider a string_view, assuming the length will get stripped out by the optimizer when
+   * it isn't used ...
+   */
+  simdjson_inline const uint8_t *peek(token_position position) const noexcept;
+  /**
+   * Get the maximum length of the JSON text for the current token (or relative).
+   *
+   * The length will include any whitespace at the end of the token.
+   *
+   * @param position The position of the token to retrieve.
+   */
+  simdjson_inline uint32_t peek_length(token_position position) const noexcept;
+  /**
+   * Get the JSON text for the last token in the document.
+   *
+   * This is not null-terminated; it is a view into the JSON.
+   *
+   * TODO consider a string_view, assuming the length will get stripped out by the optimizer when
+   * it isn't used ...
+   */
+  simdjson_inline const uint8_t *peek_last() const noexcept;
+
+  /**
+   * Ascend one level.
+   *
+   * Validates that the depth - 1 == parent_depth.
+   *
+   * @param parent_depth the expected parent depth.
+   */
+  simdjson_inline void ascend_to(depth_t parent_depth) noexcept;
+
+  /**
+   * Descend one level.
+   *
+   * Validates that the new depth == child_depth.
+   *
+   * @param child_depth the expected child depth.
+   */
+  simdjson_inline void descend_to(depth_t child_depth) noexcept;
+  simdjson_inline void descend_to(depth_t child_depth, int32_t delta) noexcept;
+
+  /**
+   * Get current depth.
+   */
+  simdjson_inline depth_t depth() const noexcept;
+
+  /**
+   * Get current (writeable) location in the string buffer.
+   */
+  simdjson_inline uint8_t *&string_buf_loc() noexcept;
+
+  /**
+   * Report an unrecoverable error, preventing further iteration.
+   *
+   * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD.
+   * @param message An error message to report with the error.
+   */
+  simdjson_inline error_code report_error(error_code error, const char *message) noexcept;
+
+  /**
+   * Log error, but don't stop iteration.
+   * @param error The error to report. Must be INCORRECT_TYPE, or NO_SUCH_FIELD.
+   * @param message An error message to report with the error.
+   */
+  simdjson_inline error_code optional_error(error_code error, const char *message) noexcept;
+
+  template<int N> simdjson_warn_unused simdjson_inline bool copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t (&tmpbuf)[N]) noexcept;
+
+  simdjson_inline token_position position() const noexcept;
+  /**
+   * Write the raw_json_string to the string buffer and return a string_view.
+   * Each raw_json_string should be unescaped once, or else the string buffer might
+   * overflow.
+   */
+  simdjson_inline simdjson_result<std::string_view> unescape(raw_json_string in) noexcept;
+  simdjson_inline void reenter_child(token_position position, depth_t child_depth) noexcept;
+
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  simdjson_inline token_position start_position(depth_t depth) const noexcept;
+  simdjson_inline void set_start_position(depth_t depth, token_position position) noexcept;
+#endif
+
+  /* Useful for debugging and logging purposes. */
+  inline std::string to_string() const noexcept;
+
+  /**
+   * Returns the current location in the document if in bounds.
+   */
+  inline simdjson_result<const char *> current_location() noexcept;
+
+  /**
+   * Updates this json iterator so that it is back at the beginning of the document,
+   * as if it had just been created.
+   */
+  inline void rewind() noexcept;
+  /**
+   * This checks whether the {,},[,] are balanced so that the document
+   * ends with proper zero depth. This requires scanning the whole document
+   * and it may be expensive. It is expected that it will be rarely called.
+   * It does not attempt to match { with } and [ with ].
+   */
+  inline bool balanced() const noexcept;
+protected:
+  simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept;
+  /// The last token before the end
+  simdjson_inline token_position last_position() const noexcept;
+  /// The token *at* the end. This points at gibberish and should only be used for comparison.
+  simdjson_inline token_position end_position() const noexcept;
+  /// The end of the buffer.
+  simdjson_inline token_position end() const noexcept;
+
+  friend class document;
+  friend class document_stream;
+  friend class object;
+  friend class array;
+  friend class value;
+  friend class raw_json_string;
+  friend class parser;
+  friend class value_iterator;
+  friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept;
+  friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail) noexcept;
+}; // json_iterator
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+template<>
+struct simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator> : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator> {
+public:
+  simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator &&value) noexcept; ///< @private
+  simdjson_inline simdjson_result(error_code error) noexcept; ///< @private
+
+  simdjson_inline simdjson_result() noexcept = default;
+};
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/json_iterator.h */
+/* begin file include/simdjson/generic/ondemand/value_iterator.h */
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+class document;
+class object;
+class array;
+class value;
+class raw_json_string;
+class parser;
+
+/**
+ * Iterates through a single JSON value at a particular depth.
+ *
+ * Does not keep track of the type of value: provides methods for objects, arrays and scalars and expects
+ * the caller to call the right ones.
+ *
+ * @private This is not intended for external use.
+ */
+class value_iterator {
+protected:
+  /** The underlying JSON iterator */
+  json_iterator *_json_iter{};
+  /** The depth of this value */
+  depth_t _depth{};
+  /**
+   * The starting token index for this value
+   */
+  token_position _start_position{};
+
+public:
+  simdjson_inline value_iterator() noexcept = default;
+
+  /**
+   * Denote that we're starting a document.
+   */
+  simdjson_inline void start_document() noexcept;
+
+  /**
+   * Skips a non-iterated or partially-iterated JSON value, whether it is a scalar, array or object.
+   *
+   * Optimized for scalars.
+   */
+  simdjson_warn_unused simdjson_inline error_code skip_child() noexcept;
+
+  /**
+   * Tell whether the iterator is at the EOF mark
+   */
+  simdjson_inline bool at_end() const noexcept;
+
+  /**
+   * Tell whether the iterator is at the start of the value
+   */
+  simdjson_inline bool at_start() const noexcept;
+
+  /**
+   * Tell whether the value is open--if the value has not been used, or the array/object is still open.
+   */
+  simdjson_inline bool is_open() const noexcept;
+
+  /**
+   * Tell whether the value is at an object's first field (just after the {).
+   */
+  simdjson_inline bool at_first_field() const noexcept;
+
+  /**
+   * Abandon all iteration.
+   */
+  simdjson_inline void abandon() noexcept;
+
+  /**
+   * Get the child value as a value_iterator.
+   */
+  simdjson_inline value_iterator child_value() const noexcept;
+
+  /**
+   * Get the depth of this value.
+   */
+  simdjson_inline int32_t depth() const noexcept;
+
+  /**
+   * Get the JSON type of this value.
+   *
+   * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse".
+   */
+  simdjson_inline simdjson_result<json_type> type() const noexcept;
+
+  /**
+   * @addtogroup object Object iteration
+   *
+   * Methods to iterate and find object fields. These methods generally *assume* the value is
+   * actually an object; the caller is responsible for keeping track of that fact.
+   *
+   * @{
+   */
+
+  /**
+   * Start an object iteration.
+   *
+   * @returns Whether the object had any fields (returns false for empty).
+   * @error INCORRECT_TYPE if there is no opening {
+   */
+  simdjson_warn_unused simdjson_inline simdjson_result<bool> start_object() noexcept;
+  /**
+   * Start an object iteration from the root.
+   *
+   * @returns Whether the object had any fields (returns false for empty).
+   * @error INCORRECT_TYPE if there is no opening {
+   * @error TAPE_ERROR if there is no matching } at end of document
+   */
+  simdjson_warn_unused simdjson_inline simdjson_result<bool> start_root_object() noexcept;
+
+  /**
+   * Start an object iteration after the user has already checked and moved past the {.
+   *
+   * Does not move the iterator unless the object is empty ({}).
+   *
+   * @returns Whether the object had any fields (returns false for empty).
+   * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent*
+   *        array or object is incomplete).
+   */
+  simdjson_warn_unused simdjson_inline simdjson_result<bool> started_object() noexcept;
+  /**
+   * Start an object iteration from the root, after the user has already checked and moved past the {.
+   *
+   * Does not move the iterator unless the object is empty ({}).
+   *
+   * @returns Whether the object had any fields (returns false for empty).
+   * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent*
+   *        array or object is incomplete).
+   */
+  simdjson_warn_unused simdjson_inline simdjson_result<bool> started_root_object() noexcept;
+
+  /**
+   * Moves to the next field in an object.
+   *
+   * Looks for , and }. If } is found, the object is finished and the iterator advances past it.
+   * Otherwise, it advances to the next value.
+   *
+   * @return whether there is another field in the object.
+   * @error TAPE_ERROR If there is a comma missing between fields.
+   * @error TAPE_ERROR If there is a comma, but not enough tokens remaining to have a key, :, and value.
+   */
+  simdjson_warn_unused simdjson_inline simdjson_result<bool> has_next_field() noexcept;
+
+  /**
+   * Get the current field's key.
+   */
+  simdjson_warn_unused simdjson_inline simdjson_result<raw_json_string> field_key() noexcept;
+
+  /**
+   * Pass the : in the field and move to its value.
+   */
+  simdjson_warn_unused simdjson_inline error_code field_value() noexcept;
+
+  /**
+   * Find the next field with the given key.
+   *
+   * Assumes you have called next_field() or otherwise matched the previous value.
+   *
+   * This means the iterator must be sitting at the next key:
+   *
+   * ```
+   * { "a": 1, "b": 2 }
+   *           ^
+   * ```
+   *
+   * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to
+   * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may
+   * fail to match some keys with escapes (\u, \n, etc.).
+   */
+  simdjson_warn_unused simdjson_inline error_code find_field(const std::string_view key) noexcept;
+
+  /**
+   * Find the next field with the given key, *without* unescaping. This assumes object order: it
+   * will not find the field if it was already passed when looking for some *other* field.
+   *
+   * Assumes you have called next_field() or otherwise matched the previous value.
+   *
+   * This means the iterator must be sitting at the next key:
+   *
+   * ```
+   * { "a": 1, "b": 2 }
+   *           ^
+   * ```
+   *
+   * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to
+   * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may
+   * fail to match some keys with escapes (\u, \n, etc.).
+   */
+  simdjson_warn_unused simdjson_inline simdjson_result<bool> find_field_raw(const std::string_view key) noexcept;
+
+  /**
+   * Find the field with the given key without regard to order, and *without* unescaping.
+   *
+   * This is an unordered object lookup: if the field is not found initially, it will cycle around and scan from the beginning.
+   *
+   * Assumes you have called next_field() or otherwise matched the previous value.
+   *
+   * This means the iterator must be sitting at the next key:
+   *
+   * ```
+   * { "a": 1, "b": 2 }
+   *           ^
+   * ```
+   *
+   * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to
+   * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may
+   * fail to match some keys with escapes (\u, \n, etc.).
+   */
+  simdjson_warn_unused simdjson_inline simdjson_result<bool> find_field_unordered_raw(const std::string_view key) noexcept;
+
+  /** @} */
+
+  /**
+   * @addtogroup array Array iteration
+   * Methods to iterate over array elements. These methods generally *assume* the value is actually
+   * an object; the caller is responsible for keeping track of that fact.
+   * @{
+   */
+
+  /**
+   * Check for an opening [ and start an array iteration.
+   *
+   * @returns Whether the array had any elements (returns false for empty).
+   * @error INCORRECT_TYPE If there is no [.
+   */
+  simdjson_warn_unused simdjson_inline simdjson_result<bool> start_array() noexcept;
+  /**
+   * Check for an opening [ and start an array iteration while at the root.
+   *
+   * @returns Whether the array had any elements (returns false for empty).
+   * @error INCORRECT_TYPE If there is no [.
+   * @error TAPE_ERROR if there is no matching ] at end of document
+   */
+  simdjson_warn_unused simdjson_inline simdjson_result<bool> start_root_array() noexcept;
+
+  /**
+   * Start an array iteration, after the user has already checked and moved past the [.
+   *
+   * Does not move the iterator unless the array is empty ([]).
+   *
+   * @returns Whether the array had any elements (returns false for empty).
+   * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent*
+   *        array or object is incomplete).
+   */
+  simdjson_warn_unused simdjson_inline simdjson_result<bool> started_array() noexcept;
+  /**
+   * Start an array iteration from the root, after the user has already checked and moved past the [.
+   *
+   * Does not move the iterator unless the array is empty ([]).
+   *
+   * @returns Whether the array had any elements (returns false for empty).
+   * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent*
+   *        array or object is incomplete).
+   */
+  simdjson_warn_unused simdjson_inline simdjson_result<bool> started_root_array() noexcept;
+
+  /**
+   * Moves to the next element in an array.
+   *
+   * Looks for , and ]. If ] is found, the array is finished and the iterator advances past it.
+   * Otherwise, it advances to the next value.
+   *
+   * @return Whether there is another element in the array.
+   * @error TAPE_ERROR If there is a comma missing between elements.
+   */
+  simdjson_warn_unused simdjson_inline simdjson_result<bool> has_next_element() noexcept;
+
+  /**
+   * Get a child value iterator.
+   */
+  simdjson_warn_unused simdjson_inline value_iterator child() const noexcept;
+
+  /** @} */
+
+  /**
+   * @defgroup scalar Scalar values
+   * @addtogroup scalar
+   * @{
+   */
+
+  simdjson_warn_unused simdjson_inline simdjson_result<std::string_view> get_string() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<raw_json_string> get_raw_json_string() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<uint64_t> get_uint64() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<uint64_t> get_uint64_in_string() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<int64_t> get_int64() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<int64_t> get_int64_in_string() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<double> get_double() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<double> get_double_in_string() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<bool> get_bool() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<bool> is_null() noexcept;
+  simdjson_warn_unused simdjson_inline bool is_negative() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<bool> is_integer() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<number_type> get_number_type() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<number> get_number() noexcept;
+
+  simdjson_warn_unused simdjson_inline simdjson_result<std::string_view> get_root_string() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<raw_json_string> get_root_raw_json_string() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<uint64_t> get_root_uint64() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<uint64_t> get_root_uint64_in_string() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<int64_t> get_root_int64() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<int64_t> get_root_int64_in_string() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<double> get_root_double() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<double> get_root_double_in_string() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<bool> get_root_bool() noexcept;
+  simdjson_warn_unused simdjson_inline bool is_root_negative() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<bool> is_root_integer() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<number_type> get_root_number_type() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<number> get_root_number() noexcept;
+  simdjson_inline bool is_root_null() noexcept;
+
+  simdjson_inline error_code error() const noexcept;
+  simdjson_inline uint8_t *&string_buf_loc() noexcept;
+  simdjson_inline const json_iterator &json_iter() const noexcept;
+  simdjson_inline json_iterator &json_iter() noexcept;
+
+  simdjson_inline void assert_is_valid() const noexcept;
+  simdjson_inline bool is_valid() const noexcept;
+
+  /** @} */
+protected:
+  /**
+   * Restarts an array iteration.
+   * @returns Whether the array has any elements (returns false for empty).
+   */
+  simdjson_inline simdjson_result<bool> reset_array() noexcept;
+  /**
+   * Restarts an object iteration.
+   * @returns Whether the object has any fields (returns false for empty).
+   */
+  simdjson_inline simdjson_result<bool> reset_object() noexcept;
+  /**
+   * move_at_start(): moves us so that we are pointing at the beginning of
+   * the container. It updates the index so that at_start() is true and it
+   * syncs the depth. The user can then create a new container instance.
+   *
+   * Usage: used with value::count_elements().
+   **/
+  simdjson_inline void move_at_start() noexcept;
+
+  /**
+   * move_at_container_start(): moves us so that we are pointing at the beginning of
+   * the container so that assert_at_container_start() passes.
+   *
+   * Usage: used with reset_array() and reset_object().
+   **/
+   simdjson_inline void move_at_container_start() noexcept;
+  /* Useful for debugging and logging purposes. */
+  inline std::string to_string() const noexcept;
+  simdjson_inline value_iterator(json_iterator *json_iter, depth_t depth, token_position start_index) noexcept;
+
+  simdjson_inline simdjson_result<bool> parse_null(const uint8_t *json) const noexcept;
+  simdjson_inline simdjson_result<bool> parse_bool(const uint8_t *json) const noexcept;
+  simdjson_inline const uint8_t *peek_start() const noexcept;
+  simdjson_inline uint32_t peek_start_length() const noexcept;
+
+  /**
+   * The general idea of the advance_... methods and the peek_* methods
+   * is that you first peek and check that you have desired type. If you do,
+   * and only if you do, then you advance.
+   *
+   * We used to unconditionally advance. But this made reasoning about our
+   * current state difficult.
+   * Suppose you always advance. Look at the 'value' matching the key
+   * "shadowable" in the following example...
+   *
+   * ({"globals":{"a":{"shadowable":[}}}})
+   *
+   * If the user thinks it is a Boolean and asks for it, then we check the '[',
+   * decide it is not a Boolean, but still move into the next character ('}'). Now
+   * we are left pointing at '}' right after a '['. And we have not yet reported
+   * an error, only that we do not have a Boolean.
+   *
+   * If, instead, you just stand your ground until it is content that you know, then
+   * you will only even move beyond the '[' if the user tells you that you have an
+   * array. So you will be at the '}' character inside the array and, hopefully, you
+   * will then catch the error because an array cannot start with '}', but the code
+   * processing Boolean values does not know this.
+   *
+   * So the contract is: first call 'peek_...' and then call 'advance_...' only
+   * if you have determined that it is a type you can handle.
+   *
+   * Unfortunately, it makes the code more verbose, longer and maybe more error prone.
+   */
+
+  simdjson_inline void advance_scalar(const char *type) noexcept;
+  simdjson_inline void advance_root_scalar(const char *type) noexcept;
+  simdjson_inline void advance_non_root_scalar(const char *type) noexcept;
+
+  simdjson_inline const uint8_t *peek_scalar(const char *type) noexcept;
+  simdjson_inline const uint8_t *peek_root_scalar(const char *type) noexcept;
+  simdjson_inline const uint8_t *peek_non_root_scalar(const char *type) noexcept;
+
+
+  simdjson_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept;
+  simdjson_inline error_code end_container() noexcept;
+
+  /**
+   * Advance to a place expecting a value (increasing depth).
+   *
+   * @return The current token (the one left behind).
+   * @error TAPE_ERROR If the document ended early.
+   */
+  simdjson_inline simdjson_result<const uint8_t *> advance_to_value() noexcept;
+
+  simdjson_inline error_code incorrect_type_error(const char *message) const noexcept;
+  simdjson_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept;
+
+  simdjson_inline bool is_at_start() const noexcept;
+  /**
+   * is_at_iterator_start() returns true on an array or object after it has just been
+   * created, whether the instance is empty or not.
+   *
+   * Usage: used by array::begin() in debug mode (SIMDJSON_DEVELOPMENT_CHECKS)
+   */
+  simdjson_inline bool is_at_iterator_start() const noexcept;
+
+  /**
+   * Assuming that we are within an object, this returns true if we
+   * are pointing at a key.
+   *
+   * Usage: the skip_child() method should never be used while we are pointing
+   * at a key inside an object.
+   */
+  simdjson_inline bool is_at_key() const noexcept;
+
+  inline void assert_at_start() const noexcept;
+  inline void assert_at_container_start() const noexcept;
+  inline void assert_at_root() const noexcept;
+  inline void assert_at_child() const noexcept;
+  inline void assert_at_next() const noexcept;
+  inline void assert_at_non_root_start() const noexcept;
+
+  /** Get the starting position of this value */
+  simdjson_inline token_position start_position() const noexcept;
+
+  /** @copydoc error_code json_iterator::position() const noexcept; */
+  simdjson_inline token_position position() const noexcept;
+  /** @copydoc error_code json_iterator::end_position() const noexcept; */
+  simdjson_inline token_position last_position() const noexcept;
+  /** @copydoc error_code json_iterator::end_position() const noexcept; */
+  simdjson_inline token_position end_position() const noexcept;
+  /** @copydoc error_code json_iterator::report_error(error_code error, const char *message) noexcept; */
+  simdjson_inline error_code report_error(error_code error, const char *message) noexcept;
+
+  friend class document;
+  friend class object;
+  friend class array;
+  friend class value;
+}; // value_iterator
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+template<>
+struct simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value_iterator> : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value_iterator> {
+public:
+  simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value_iterator &&value) noexcept; ///< @private
+  simdjson_inline simdjson_result(error_code error) noexcept; ///< @private
+  simdjson_inline simdjson_result() noexcept = default;
+};
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/value_iterator.h */
+/* begin file include/simdjson/generic/ondemand/array_iterator.h */
+
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+class array;
+class value;
+class document;
+
+/**
+ * A forward-only JSON array.
+ *
+ * This is an input_iterator, meaning:
+ * - It is forward-only
+ * - * must be called exactly once per element.
+ * - ++ must be called exactly once in between each * (*, ++, *, ++, * ...)
+ */
+class array_iterator {
+public:
+  /** Create a new, invalid array iterator. */
+  simdjson_inline array_iterator() noexcept = default;
+
+  //
+  // Iterator interface
+  //
+
+  /**
+   * Get the current element.
+   *
+   * Part of the std::iterator interface.
+   */
+  simdjson_inline simdjson_result<value> operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION.
+  /**
+   * Check if we are at the end of the JSON.
+   *
+   * Part of the std::iterator interface.
+   *
+   * @return true if there are no more elements in the JSON array.
+   */
+  simdjson_inline bool operator==(const array_iterator &) const noexcept;
+  /**
+   * Check if there are more elements in the JSON array.
+   *
+   * Part of the std::iterator interface.
+   *
+   * @return true if there are more elements in the JSON array.
+   */
+  simdjson_inline bool operator!=(const array_iterator &) const noexcept;
+  /**
+   * Move to the next element.
+   *
+   * Part of the std::iterator interface.
+   */
+  simdjson_inline array_iterator &operator++() noexcept;
+
+private:
+  value_iterator iter{};
+
+  simdjson_inline array_iterator(const value_iterator &iter) noexcept;
+
+  friend class array;
+  friend class value;
+  friend struct simdjson_result<array_iterator>;
+};
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+template<>
+struct simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> {
+public:
+  simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator &&value) noexcept; ///< @private
+  simdjson_inline simdjson_result(error_code error) noexcept; ///< @private
+  simdjson_inline simdjson_result() noexcept = default;
+
+  //
+  // Iterator interface
+  //
+
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION.
+  simdjson_inline bool operator==(const simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> &) const noexcept;
+  simdjson_inline bool operator!=(const simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> &) const noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> &operator++() noexcept;
+};
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/array_iterator.h */
+/* begin file include/simdjson/generic/ondemand/object_iterator.h */
+
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+class field;
+
+class object_iterator {
+public:
+  /**
+   * Create a new invalid object_iterator.
+   *
+   * Exists so you can declare a variable and later assign to it before use.
+   */
+  simdjson_inline object_iterator() noexcept = default;
+
+  //
+  // Iterator interface
+  //
+
+  // Reads key and value, yielding them to the user.
+  // MUST ONLY BE CALLED ONCE PER ITERATION.
+  simdjson_inline simdjson_result<field> operator*() noexcept;
+  // Assumes it's being compared with the end. true if depth < iter->depth.
+  simdjson_inline bool operator==(const object_iterator &) const noexcept;
+  // Assumes it's being compared with the end. true if depth >= iter->depth.
+  simdjson_inline bool operator!=(const object_iterator &) const noexcept;
+  // Checks for ']' and ','
+  simdjson_inline object_iterator &operator++() noexcept;
+
+private:
+  /**
+   * The underlying JSON iterator.
+   *
+   * PERF NOTE: expected to be elided in favor of the parent document: this is set when the object
+   * is first used, and never changes afterwards.
+   */
+  value_iterator iter{};
+
+  simdjson_inline object_iterator(const value_iterator &iter) noexcept;
+  friend struct simdjson_result<object_iterator>;
+  friend class object;
+};
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+template<>
+struct simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator> : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator> {
+public:
+  simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator &&value) noexcept; ///< @private
+  simdjson_inline simdjson_result(error_code error) noexcept; ///< @private
+  simdjson_inline simdjson_result() noexcept = default;
+
+  //
+  // Iterator interface
+  //
+
+  // Reads key and value, yielding them to the user.
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::field> operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION.
+  // Assumes it's being compared with the end. true if depth < iter->depth.
+  simdjson_inline bool operator==(const simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator> &) const noexcept;
+  // Assumes it's being compared with the end. true if depth >= iter->depth.
+  simdjson_inline bool operator!=(const simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator> &) const noexcept;
+  // Checks for ']' and ','
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator> &operator++() noexcept;
+};
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/object_iterator.h */
+/* begin file include/simdjson/generic/ondemand/array.h */
+
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+class value;
+class document;
+
+/**
+ * A forward-only JSON array.
+ */
+class array {
+public:
+  /**
+   * Create a new invalid array.
+   *
+   * Exists so you can declare a variable and later assign to it before use.
+   */
+  simdjson_inline array() noexcept = default;
+
+  /**
+   * Begin array iteration.
+   *
+   * Part of the std::iterable interface.
+   */
+  simdjson_inline simdjson_result<array_iterator> begin() noexcept;
+  /**
+   * Sentinel representing the end of the array.
+   *
+   * Part of the std::iterable interface.
+   */
+  simdjson_inline simdjson_result<array_iterator> end() noexcept;
+  /**
+   * This method scans the array and counts the number of elements.
+   * The count_elements method should always be called before you have begun
+   * iterating through the array: it is expected that you are pointing at
+   * the beginning of the array.
+   * The runtime complexity is linear in the size of the array. After
+   * calling this function, if successful, the array is 'rewinded' at its
+   * beginning as if it had never been accessed. If the JSON is malformed (e.g.,
+   * there is a missing comma), then an error is returned and it is no longer
+   * safe to continue.
+   *
+   * To check that an array is empty, it is more performant to use
+   * the is_empty() method.
+   */
+  simdjson_inline simdjson_result<size_t> count_elements() & noexcept;
+  /**
+   * This method scans the beginning of the array and checks whether the
+   * array is empty.
+   * The runtime complexity is constant time. After
+   * calling this function, if successful, the array is 'rewinded' at its
+   * beginning as if it had never been accessed. If the JSON is malformed (e.g.,
+   * there is a missing comma), then an error is returned and it is no longer
+   * safe to continue.
+   */
+  simdjson_inline simdjson_result<bool> is_empty() & noexcept;
+  /**
+   * Reset the iterator so that we are pointing back at the
+   * beginning of the array. You should still consume values only once even if you
+   * can iterate through the array more than once. If you unescape a string
+   * within the array more than once, you have unsafe code. Note that rewinding
+   * an array means that you may need to reparse it anew: it is not a free
+   * operation.
+   *
+   * @returns true if the array contains some elements (not empty)
+   */
+  inline simdjson_result<bool> reset() & noexcept;
+  /**
+   * Get the value associated with the given JSON pointer.  We use the RFC 6901
+   * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node
+   * as the root of its own JSON document.
+   *
+   *   ondemand::parser parser;
+   *   auto json = R"([ { "foo": { "a": [ 10, 20, 30 ] }} ])"_padded;
+   *   auto doc = parser.iterate(json);
+   *   doc.at_pointer("/0/foo/a/1") == 20
+   *
+   * Note that at_pointer() called on the document automatically calls the document's rewind
+   * method between each call. It invalidates all previously accessed arrays, objects and values
+   * that have not been consumed. Yet it is not the case when calling at_pointer on an array
+   * instance: there is no rewind and no invalidation.
+   *
+   * You may only call at_pointer on an array after it has been created, but before it has
+   * been first accessed. When calling at_pointer on an array, the pointer is advanced to
+   * the location indicated by the JSON pointer (in case of success). It is no longer possible
+   * to call at_pointer on the same array.
+   *
+   * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching.
+   *
+   * @return The value associated with the given JSON pointer, or:
+   *         - NO_SUCH_FIELD if a field does not exist in an object
+   *         - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length
+   *         - INCORRECT_TYPE if a non-integer is used to access an array
+   *         - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed
+   */
+  inline simdjson_result<value> at_pointer(std::string_view json_pointer) noexcept;
+  /**
+   * Consumes the array and returns a string_view instance corresponding to the
+   * array as represented in JSON. It points inside the original document.
+   */
+  simdjson_inline simdjson_result<std::string_view> raw_json() noexcept;
+
+  /**
+   * Get the value at the given index. This function has linear-time complexity.
+   * This function should only be called once on an array instance since the array iterator is not reset between each call.
+   *
+   * @return The value at the given index, or:
+   *         - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length
+   */
+  simdjson_inline simdjson_result<value> at(size_t index) noexcept;
+protected:
+  /**
+   * Go to the end of the array, no matter where you are right now.
+   */
+  simdjson_inline error_code consume() noexcept;
+
+  /**
+   * Begin array iteration.
+   *
+   * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the
+   *        resulting array.
+   * @error INCORRECT_TYPE if the iterator is not at [.
+   */
+  static simdjson_inline simdjson_result<array> start(value_iterator &iter) noexcept;
+  /**
+   * Begin array iteration from the root.
+   *
+   * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the
+   *        resulting array.
+   * @error INCORRECT_TYPE if the iterator is not at [.
+   * @error TAPE_ERROR if there is no closing ] at the end of the document.
+   */
+  static simdjson_inline simdjson_result<array> start_root(value_iterator &iter) noexcept;
+  /**
+   * Begin array iteration.
+   *
+   * This version of the method should be called after the initial [ has been verified, and is
+   * intended for use by switch statements that check the type of a value.
+   *
+   * @param iter The iterator. Must be after the initial [. Will be *moved* into the resulting array.
+   */
+  static simdjson_inline simdjson_result<array> started(value_iterator &iter) noexcept;
+
+  /**
+   * Create an array at the given Internal array creation. Call array::start() or array::started() instead of this.
+   *
+   * @param iter The iterator. Must either be at the start of the first element with iter.is_alive()
+   *        == true, or past the [] with is_alive() == false if the array is empty. Will be *moved*
+   *        into the resulting array.
+   */
+  simdjson_inline array(const value_iterator &iter) noexcept;
+
+  /**
+   * Iterator marking current position.
+   *
+   * iter.is_alive() == false indicates iteration is complete.
+   */
+  value_iterator iter{};
+
+  friend class value;
+  friend class document;
+  friend struct simdjson_result<value>;
+  friend struct simdjson_result<array>;
+  friend class array_iterator;
+};
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+template<>
+struct simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array> : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array> {
+public:
+  simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array &&value) noexcept; ///< @private
+  simdjson_inline simdjson_result(error_code error) noexcept; ///< @private
+  simdjson_inline simdjson_result() noexcept = default;
+
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> begin() noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> end() noexcept;
+  inline simdjson_result<size_t> count_elements() & noexcept;
+  inline simdjson_result<bool> is_empty() & noexcept;
+  inline simdjson_result<bool> reset() & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> at(size_t index) noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> at_pointer(std::string_view json_pointer) noexcept;
+};
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/array.h */
+/* begin file include/simdjson/generic/ondemand/document.h */
+
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+class parser;
+class array;
+class object;
+class value;
+class raw_json_string;
+class array_iterator;
+class document_stream;
+
+/**
+ * A JSON document. It holds a json_iterator instance.
+ *
+ * Used by tokens to get text, and string buffer location.
+ *
+ * You must keep the document around during iteration.
+ */
+class document {
+public:
+  /**
+   * Create a new invalid document.
+   *
+   * Exists so you can declare a variable and later assign to it before use.
+   */
+  simdjson_inline document() noexcept = default;
+  simdjson_inline document(const document &other) noexcept = delete; // pass your documents by reference, not by copy
+  simdjson_inline document(document &&other) noexcept = default;
+  simdjson_inline document &operator=(const document &other) noexcept = delete;
+  simdjson_inline document &operator=(document &&other) noexcept = default;
+
+  /**
+   * Cast this JSON value to an array.
+   *
+   * @returns An object that can be used to iterate the array.
+   * @returns INCORRECT_TYPE If the JSON value is not an array.
+   */
+  simdjson_inline simdjson_result<array> get_array() & noexcept;
+  /**
+   * Cast this JSON value to an object.
+   *
+   * @returns An object that can be used to look up or iterate fields.
+   * @returns INCORRECT_TYPE If the JSON value is not an object.
+   */
+  simdjson_inline simdjson_result<object> get_object() & noexcept;
+  /**
+   * Cast this JSON value to an unsigned integer.
+   *
+   * @returns A signed 64-bit integer.
+   * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer.
+   */
+  simdjson_inline simdjson_result<uint64_t> get_uint64() noexcept;
+  /**
+   * Cast this JSON value (inside string) to an unsigned integer.
+   *
+   * @returns A signed 64-bit integer.
+   * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer.
+   */
+  simdjson_inline simdjson_result<uint64_t> get_uint64_in_string() noexcept;
+  /**
+   * Cast this JSON value to a signed integer.
+   *
+   * @returns A signed 64-bit integer.
+   * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer.
+   */
+  simdjson_inline simdjson_result<int64_t> get_int64() noexcept;
+  /**
+   * Cast this JSON value (inside string) to a signed integer.
+   *
+   * @returns A signed 64-bit integer.
+   * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer.
+   */
+  simdjson_inline simdjson_result<int64_t> get_int64_in_string() noexcept;
+  /**
+   * Cast this JSON value to a double.
+   *
+   * @returns A double.
+   * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number.
+   */
+  simdjson_inline simdjson_result<double> get_double() noexcept;
+
+  /**
+   * Cast this JSON value (inside string) to a double.
+   *
+   * @returns A double.
+   * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number.
+   */
+  simdjson_inline simdjson_result<double> get_double_in_string() noexcept;
+  /**
+   * Cast this JSON value to a string.
+   *
+   * The string is guaranteed to be valid UTF-8.
+   *
+   * Important: Calling get_string() twice on the same document is an error.
+   *
+   * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next
+   *          time it parses a document or when it is destroyed.
+   * @returns INCORRECT_TYPE if the JSON value is not a string.
+   */
+  simdjson_inline simdjson_result<std::string_view> get_string() noexcept;
+  /**
+   * Cast this JSON value to a raw_json_string.
+   *
+   * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n).
+   *
+   * @returns A pointer to the raw JSON for the given string.
+   * @returns INCORRECT_TYPE if the JSON value is not a string.
+   */
+  simdjson_inline simdjson_result<raw_json_string> get_raw_json_string() noexcept;
+  /**
+   * Cast this JSON value to a bool.
+   *
+   * @returns A bool value.
+   * @returns INCORRECT_TYPE if the JSON value is not true or false.
+   */
+  simdjson_inline simdjson_result<bool> get_bool() noexcept;
+  /**
+   * Cast this JSON value to a value when the document is an object or an array.
+   *
+   * @returns A value if a JSON array or object cannot be found.
+   * @returns SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function).
+   */
+  simdjson_inline simdjson_result<value> get_value() noexcept;
+
+  /**
+   * Checks if this JSON value is null.  If and only if the value is
+   * null, then it is consumed (we advance). If we find a token that
+   * begins with 'n' but is not 'null', then an error is returned.
+   *
+   * @returns Whether the value is null.
+   * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'.
+   */
+  simdjson_inline simdjson_result<bool> is_null() noexcept;
+
+  /**
+   * Get this value as the given type.
+   *
+   * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool
+   *
+   * You may use get_double(), get_bool(), get_uint64(), get_int64(),
+   * get_object(), get_array(), get_raw_json_string(), or get_string() instead.
+   *
+   * @returns A value of the given type, parsed from the JSON.
+   * @returns INCORRECT_TYPE If the JSON value is not the given type.
+   */
+  template<typename T> simdjson_inline simdjson_result<T> get() & noexcept {
+    // Unless the simdjson library provides an inline implementation, calling this method should
+    // immediately fail.
+    static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library.");
+  }
+  /** @overload template<typename T> simdjson_result<T> get() & noexcept */
+  template<typename T> simdjson_inline simdjson_result<T> get() && noexcept {
+    // Unless the simdjson library provides an inline implementation, calling this method should
+    // immediately fail.
+    static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library.");
+  }
+
+  /**
+   * Get this value as the given type.
+   *
+   * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool, value
+   *
+   * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances.
+   *
+   * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized.
+   * @returns INCORRECT_TYPE If the JSON value is not an object.
+   * @returns SUCCESS If the parse succeeded and the out parameter was set to the value.
+   */
+  template<typename T> simdjson_inline error_code get(T &out) & noexcept;
+  /** @overload template<typename T> error_code get(T &out) & noexcept */
+  template<typename T> simdjson_inline error_code get(T &out) && noexcept;
+
+#if SIMDJSON_EXCEPTIONS
+  /**
+   * Cast this JSON value to an array.
+   *
+   * @returns An object that can be used to iterate the array.
+   * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array.
+   */
+  simdjson_inline operator array() & noexcept(false);
+  /**
+   * Cast this JSON value to an object.
+   *
+   * @returns An object that can be used to look up or iterate fields.
+   * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object.
+   */
+  simdjson_inline operator object() & noexcept(false);
+  /**
+   * Cast this JSON value to an unsigned integer.
+   *
+   * @returns A signed 64-bit integer.
+   * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer.
+   */
+  simdjson_inline operator uint64_t() noexcept(false);
+  /**
+   * Cast this JSON value to a signed integer.
+   *
+   * @returns A signed 64-bit integer.
+   * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer.
+   */
+  simdjson_inline operator int64_t() noexcept(false);
+  /**
+   * Cast this JSON value to a double.
+   *
+   * @returns A double.
+   * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number.
+   */
+  simdjson_inline operator double() noexcept(false);
+  /**
+   * Cast this JSON value to a string.
+   *
+   * The string is guaranteed to be valid UTF-8.
+   *
+   * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next
+   *          time it parses a document or when it is destroyed.
+   * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string.
+   */
+  simdjson_inline operator std::string_view() noexcept(false);
+  /**
+   * Cast this JSON value to a raw_json_string.
+   *
+   * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n).
+   *
+   * @returns A pointer to the raw JSON for the given string.
+   * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string.
+   */
+  simdjson_inline operator raw_json_string() noexcept(false);
+  /**
+   * Cast this JSON value to a bool.
+   *
+   * @returns A bool value.
+   * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false.
+   */
+  simdjson_inline operator bool() noexcept(false);
+  /**
+   * Cast this JSON value to a value.
+   *
+   * @returns A value value.
+   * @exception if a JSON value cannot be found
+   */
+  simdjson_inline operator value() noexcept(false);
+#endif
+  /**
+   * This method scans the array and counts the number of elements.
+   * The count_elements method should always be called before you have begun
+   * iterating through the array: it is expected that you are pointing at
+   * the beginning of the array.
+   * The runtime complexity is linear in the size of the array. After
+   * calling this function, if successful, the array is 'rewinded' at its
+   * beginning as if it had never been accessed. If the JSON is malformed (e.g.,
+   * there is a missing comma), then an error is returned and it is no longer
+   * safe to continue.
+   */
+  simdjson_inline simdjson_result<size_t> count_elements() & noexcept;
+   /**
+   * This method scans the object and counts the number of key-value pairs.
+   * The count_fields method should always be called before you have begun
+   * iterating through the object: it is expected that you are pointing at
+   * the beginning of the object.
+   * The runtime complexity is linear in the size of the object. After
+   * calling this function, if successful, the object is 'rewinded' at its
+   * beginning as if it had never been accessed. If the JSON is malformed (e.g.,
+   * there is a missing comma), then an error is returned and it is no longer
+   * safe to continue.
+   *
+   * To check that an object is empty, it is more performant to use
+   * the is_empty() method.
+   */
+  simdjson_inline simdjson_result<size_t> count_fields() & noexcept;
+  /**
+   * Get the value at the given index in the array. This function has linear-time complexity.
+   * This function should only be called once on an array instance since the array iterator is not reset between each call.
+   *
+   * @return The value at the given index, or:
+   *         - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length
+   */
+  simdjson_inline simdjson_result<value> at(size_t index) & noexcept;
+  /**
+   * Begin array iteration.
+   *
+   * Part of the std::iterable interface.
+   */
+  simdjson_inline simdjson_result<array_iterator> begin() & noexcept;
+  /**
+   * Sentinel representing the end of the array.
+   *
+   * Part of the std::iterable interface.
+   */
+  simdjson_inline simdjson_result<array_iterator> end() & noexcept;
+
+  /**
+   * Look up a field by name on an object (order-sensitive).
+   *
+   * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the
+   * JSON `{ "x": 1, "y": 2, "z": 3 }`:
+   *
+   * ```c++
+   * simdjson::ondemand::parser parser;
+   * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded);
+   * double z = obj.find_field("z");
+   * double y = obj.find_field("y");
+   * double x = obj.find_field("x");
+   * ```
+   *
+   * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys.
+   * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`.
+   *
+   *
+   * You must consume the fields on an object one at a time. A request for a new key
+   * invalidates previous field values: it makes them unsafe. E.g., the array
+   * given by content["bids"].get_array() should not be accessed after you have called
+   * content["asks"].get_array(). You can detect such mistakes by first compiling and running
+   * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an
+   * OUT_OF_ORDER_ITERATION error is generated.
+   *
+   * You are expected to access keys only once. You should access the value corresponding to
+   * a key a single time. Doing object["mykey"].to_string()and then again object["mykey"].to_string()
+   * is an error.
+   *
+   * @param key The key to look up.
+   * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object.
+   */
+  simdjson_inline simdjson_result<value> find_field(std::string_view key) & noexcept;
+  /** @overload simdjson_inline simdjson_result<value> find_field(std::string_view key) & noexcept; */
+  simdjson_inline simdjson_result<value> find_field(const char *key) & noexcept;
+
+  /**
+   * Look up a field by name on an object, without regard to key order.
+   *
+   * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies
+   * and often appears negligible. It starts out normally, starting out at the last field; but if
+   * the field is not found, it scans from the beginning of the object to see if it missed it. That
+   * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object
+   * in question is large. The fact that the extra code is there also bumps the executable size.
+   *
+   * It is the default, however, because it would be highly surprising (and hard to debug) if the
+   * default behavior failed to look up a field just because it was in the wrong order--and many
+   * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order.
+   *
+   * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the
+   * field wasn't there when they aren't).
+   *
+   * You must consume the fields on an object one at a time. A request for a new key
+   * invalidates previous field values: it makes them unsafe. E.g., the array
+   * given by content["bids"].get_array() should not be accessed after you have called
+   * content["asks"].get_array(). You can detect such mistakes by first compiling and running
+   * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an
+   * OUT_OF_ORDER_ITERATION error is generated.
+   *
+   * You are expected to access keys only once. You should access the value corresponding to a key
+   * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string()
+   * is an error.
+   *
+   * @param key The key to look up.
+   * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object.
+   */
+  simdjson_inline simdjson_result<value> find_field_unordered(std::string_view key) & noexcept;
+  /** @overload simdjson_inline simdjson_result<value> find_field_unordered(std::string_view key) & noexcept; */
+  simdjson_inline simdjson_result<value> find_field_unordered(const char *key) & noexcept;
+  /** @overload simdjson_inline simdjson_result<value> find_field_unordered(std::string_view key) & noexcept; */
+  simdjson_inline simdjson_result<value> operator[](std::string_view key) & noexcept;
+  /** @overload simdjson_inline simdjson_result<value> find_field_unordered(std::string_view key) & noexcept; */
+  simdjson_inline simdjson_result<value> operator[](const char *key) & noexcept;
+
+  /**
+   * Get the type of this JSON value. It does not validate or consume the value.
+   * E.g., you must still call "is_null()" to check that a value is null even if
+   * "type()" returns json_type::null.
+   *
+   * NOTE: If you're only expecting a value to be one type (a typical case), it's generally
+   * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just
+   * let it throw an exception).
+   *
+   * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse".
+   */
+  simdjson_inline simdjson_result<json_type> type() noexcept;
+
+  /**
+   * Checks whether the document is a scalar (string, number, null, Boolean).
+   * Returns false when there it is an array or object.
+   *
+   * @returns true if the type is string, number, null, Boolean
+   * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse".
+   */
+  simdjson_inline simdjson_result<bool> is_scalar() noexcept;
+
+  /**
+   * Checks whether the document is a negative number.
+   *
+   * @returns true if the number if negative.
+   */
+  simdjson_inline bool is_negative() noexcept;
+  /**
+   * Checks whether the document is an integer number. Note that
+   * this requires to partially parse the number string. If
+   * the value is determined to be an integer, it may still
+   * not parse properly as an integer in subsequent steps
+   * (e.g., it might overflow).
+   *
+   * @returns true if the number if negative.
+   */
+  simdjson_inline simdjson_result<bool> is_integer() noexcept;
+  /**
+   * Determine the number type (integer or floating-point number) as quickly
+   * as possible. This function does not fully validate the input. It is
+   * useful when you only need to classify the numbers, without parsing them.
+   *
+   * If you are planning to retrieve the value or you need full validation,
+   * consider using the get_number() method instead: it will fully parse
+   * and validate the input, and give you access to the type:
+   * get_number().get_number_type().
+   *
+   * get_number_type() is number_type::unsigned_integer if we have
+   * an integer greater or equal to 9223372036854775808
+   * get_number_type() is number_type::signed_integer if we have an
+   * integer that is less than 9223372036854775808
+   * Otherwise, get_number_type() has value number_type::floating_point_number
+   *
+   * This function requires processing the number string, but it is expected
+   * to be faster than get_number().get_number_type() because it is does not
+   * parse the number value.
+   *
+   * @returns the type of the number
+   */
+  simdjson_inline simdjson_result<number_type> get_number_type() noexcept;
+
+  /**
+   * Attempt to parse an ondemand::number. An ondemand::number may
+   * contain an integer value or a floating-point value, the simdjson
+   * library will autodetect the type. Thus it is a dynamically typed
+   * number. Before accessing the value, you must determine the detected
+   * type.
+   *
+   * number.get_number_type() is number_type::signed_integer if we have
+   * an integer in [-9223372036854775808,9223372036854775808)
+   * You can recover the value by calling number.get_int64() and you
+   * have that number.is_int64() is true.
+   *
+   * number.get_number_type() is number_type::unsigned_integer if we have
+   * an integer in [9223372036854775808,18446744073709551616)
+   * You can recover the value by calling number.get_uint64() and you
+   * have that number.is_uint64() is true.
+   *
+   * Otherwise, number.get_number_type() has value number_type::floating_point_number
+   * and we have a binary64 number.
+   * You can recover the value by calling number.get_double() and you
+   * have that number.is_double() is true.
+   *
+   * You must check the type before accessing the value: it is an error
+   * to call "get_int64()" when number.get_number_type() is not
+   * number_type::signed_integer and when number.is_int64() is false.
+   */
+  simdjson_warn_unused simdjson_inline simdjson_result<number> get_number() noexcept;
+
+  /**
+   * Get the raw JSON for this token.
+   *
+   * The string_view will always point into the input buffer.
+   *
+   * The string_view will start at the beginning of the token, and include the entire token
+   * *as well as all spaces until the next token (or EOF).* This means, for example, that a
+   * string token always begins with a " and is always terminated by the final ", possibly
+   * followed by a number of spaces.
+   *
+   * The string_view is *not* null-terminated. If this is a scalar (string, number,
+   * boolean, or null), the character after the end of the string_view may be the padded buffer.
+   *
+   * Tokens include:
+   * - {
+   * - [
+   * - "a string (possibly with UTF-8 or backslashed characters like \\\")".
+   * - -1.2e-100
+   * - true
+   * - false
+   * - null
+   */
+  simdjson_inline simdjson_result<std::string_view> raw_json_token() noexcept;
+
+  /**
+   * Reset the iterator inside the document instance so we are pointing back at the
+   * beginning of the document, as if it had just been created. It invalidates all
+   * values, objects and arrays that you have created so far (including unescaped strings).
+   */
+  inline void rewind() noexcept;
+  /**
+   * Returns debugging information.
+   */
+  inline std::string to_debug_string() noexcept;
+  /**
+   * Some unrecoverable error conditions may render the document instance unusable.
+   * The is_alive() method returns true when the document is still suitable.
+   */
+  inline bool is_alive() noexcept;
+
+  /**
+   * Returns the current location in the document if in bounds.
+   */
+  inline simdjson_result<const char *> current_location() noexcept;
+
+  /**
+   * Returns the current depth in the document if in bounds.
+   *
+   * E.g.,
+   *  0 = finished with document
+   *  1 = document root value (could be [ or {, not yet known)
+   *  2 = , or } inside root array/object
+   *  3 = key or value inside root array/object.
+   */
+  simdjson_inline int32_t current_depth() const noexcept;
+
+  /**
+   * Get the value associated with the given JSON pointer.  We use the RFC 6901
+   * https://tools.ietf.org/html/rfc6901 standard.
+   *
+   *   ondemand::parser parser;
+   *   auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded;
+   *   auto doc = parser.iterate(json);
+   *   doc.at_pointer("/foo/a/1") == 20
+   *
+   * It is allowed for a key to be the empty string:
+   *
+   *   ondemand::parser parser;
+   *   auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded;
+   *   auto doc = parser.iterate(json);
+   *   doc.at_pointer("//a/1") == 20
+   *
+   * Note that at_pointer() automatically calls rewind between each call. Thus
+   * all values, objects and arrays that you have created so far (including unescaped strings)
+   * are invalidated. After calling at_pointer, you need to consume the result: string values
+   * should be stored in your own variables, arrays should be decoded and stored in your own array-like
+   * structures and so forth.
+   *
+   * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching
+   *
+   * @return The value associated with the given JSON pointer, or:
+   *         - NO_SUCH_FIELD if a field does not exist in an object
+   *         - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length
+   *         - INCORRECT_TYPE if a non-integer is used to access an array
+   *         - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed
+   *         - SCALAR_DOCUMENT_AS_VALUE if the json_pointer is empty and the document is not a scalar (see is_scalar() function).
+   */
+  simdjson_inline simdjson_result<value> at_pointer(std::string_view json_pointer) noexcept;
+  /**
+   * Consumes the document and returns a string_view instance corresponding to the
+   * document as represented in JSON. It points inside the original byte array containing
+   * the JSON document.
+   */
+  simdjson_inline simdjson_result<std::string_view> raw_json() noexcept;
+protected:
+  /**
+   * Consumes the document.
+   */
+  simdjson_inline error_code consume() noexcept;
+
+  simdjson_inline document(ondemand::json_iterator &&iter) noexcept;
+  simdjson_inline const uint8_t *text(uint32_t idx) const noexcept;
+
+  simdjson_inline value_iterator resume_value_iterator() noexcept;
+  simdjson_inline value_iterator get_root_value_iterator() noexcept;
+  simdjson_inline simdjson_result<object> start_or_resume_object() noexcept;
+  static simdjson_inline document start(ondemand::json_iterator &&iter) noexcept;
+
+  //
+  // Fields
+  //
+  json_iterator iter{}; ///< Current position in the document
+  static constexpr depth_t DOCUMENT_DEPTH = 0; ///< document depth is always 0
+
+  friend class array_iterator;
+  friend class value;
+  friend class ondemand::parser;
+  friend class object;
+  friend class array;
+  friend class field;
+  friend class token;
+  friend class document_stream;
+};
+
+
+/**
+ * A document_reference is a thin wrapper around a document reference instance.
+ */
+class document_reference {
+public:
+  simdjson_inline document_reference() noexcept;
+  simdjson_inline document_reference(document &d) noexcept;
+  simdjson_inline document_reference(const document_reference &other) noexcept = default;
+  simdjson_inline document_reference& operator=(const document_reference &other) noexcept = default;
+  simdjson_inline void rewind() noexcept;
+  simdjson_inline simdjson_result<array> get_array() & noexcept;
+  simdjson_inline simdjson_result<object> get_object() & noexcept;
+  simdjson_inline simdjson_result<uint64_t> get_uint64() noexcept;
+  simdjson_inline simdjson_result<int64_t> get_int64() noexcept;
+  simdjson_inline simdjson_result<double> get_double() noexcept;
+  simdjson_inline simdjson_result<std::string_view> get_string() noexcept;
+  simdjson_inline simdjson_result<raw_json_string> get_raw_json_string() noexcept;
+  simdjson_inline simdjson_result<bool> get_bool() noexcept;
+  simdjson_inline simdjson_result<value> get_value() noexcept;
+
+  simdjson_inline simdjson_result<bool> is_null() noexcept;
+  simdjson_inline simdjson_result<std::string_view> raw_json() noexcept;
+  simdjson_inline operator document&() const noexcept;
+
+#if SIMDJSON_EXCEPTIONS
+  simdjson_inline operator array() & noexcept(false);
+  simdjson_inline operator object() & noexcept(false);
+  simdjson_inline operator uint64_t() noexcept(false);
+  simdjson_inline operator int64_t() noexcept(false);
+  simdjson_inline operator double() noexcept(false);
+  simdjson_inline operator std::string_view() noexcept(false);
+  simdjson_inline operator raw_json_string() noexcept(false);
+  simdjson_inline operator bool() noexcept(false);
+  simdjson_inline operator value() noexcept(false);
+#endif
+  simdjson_inline simdjson_result<size_t> count_elements() & noexcept;
+  simdjson_inline simdjson_result<size_t> count_fields() & noexcept;
+  simdjson_inline simdjson_result<value> at(size_t index) & noexcept;
+  simdjson_inline simdjson_result<array_iterator> begin() & noexcept;
+  simdjson_inline simdjson_result<array_iterator> end() & noexcept;
+  simdjson_inline simdjson_result<value> find_field(std::string_view key) & noexcept;
+  simdjson_inline simdjson_result<value> find_field(const char *key) & noexcept;
+  simdjson_inline simdjson_result<value> operator[](std::string_view key) & noexcept;
+  simdjson_inline simdjson_result<value> operator[](const char *key) & noexcept;
+  simdjson_inline simdjson_result<value> find_field_unordered(std::string_view key) & noexcept;
+  simdjson_inline simdjson_result<value> find_field_unordered(const char *key) & noexcept;
+
+  simdjson_inline simdjson_result<json_type> type() noexcept;
+  simdjson_inline simdjson_result<bool> is_scalar() noexcept;
+
+  simdjson_inline simdjson_result<const char *> current_location() noexcept;
+  simdjson_inline int32_t current_depth() const noexcept;
+  simdjson_inline bool is_negative() noexcept;
+  simdjson_inline simdjson_result<bool> is_integer() noexcept;
+  simdjson_inline simdjson_result<number_type> get_number_type() noexcept;
+  simdjson_inline simdjson_result<number> get_number() noexcept;
+  simdjson_inline simdjson_result<std::string_view> raw_json_token() noexcept;
+  simdjson_inline simdjson_result<value> at_pointer(std::string_view json_pointer) noexcept;
+private:
+  document *doc{nullptr};
+};
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+template<>
+struct simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document> : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document> {
+public:
+  simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document &&value) noexcept; ///< @private
+  simdjson_inline simdjson_result(error_code error) noexcept; ///< @private
+  simdjson_inline simdjson_result() noexcept = default;
+  simdjson_inline error_code rewind() noexcept;
+
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array> get_array() & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object> get_object() & noexcept;
+  simdjson_inline simdjson_result<uint64_t> get_uint64() noexcept;
+  simdjson_inline simdjson_result<int64_t> get_int64() noexcept;
+  simdjson_inline simdjson_result<double> get_double() noexcept;
+  simdjson_inline simdjson_result<double> get_double_from_string() noexcept;
+  simdjson_inline simdjson_result<std::string_view> get_string() noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string> get_raw_json_string() noexcept;
+  simdjson_inline simdjson_result<bool> get_bool() noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> get_value() noexcept;
+  simdjson_inline simdjson_result<bool> is_null() noexcept;
+
+  template<typename T> simdjson_inline simdjson_result<T> get() & noexcept;
+  template<typename T> simdjson_inline simdjson_result<T> get() && noexcept;
+
+  template<typename T> simdjson_inline error_code get(T &out) & noexcept;
+  template<typename T> simdjson_inline error_code get(T &out) && noexcept;
+
+#if SIMDJSON_EXCEPTIONS
+  simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() & noexcept(false);
+  simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() & noexcept(false);
+  simdjson_inline operator uint64_t() noexcept(false);
+  simdjson_inline operator int64_t() noexcept(false);
+  simdjson_inline operator double() noexcept(false);
+  simdjson_inline operator std::string_view() noexcept(false);
+  simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false);
+  simdjson_inline operator bool() noexcept(false);
+  simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value() noexcept(false);
+#endif
+  simdjson_inline simdjson_result<size_t> count_elements() & noexcept;
+  simdjson_inline simdjson_result<size_t> count_fields() & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> at(size_t index) & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> begin() & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> end() & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> find_field(std::string_view key) & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> find_field(const char *key) & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> operator[](std::string_view key) & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> operator[](const char *key) & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> find_field_unordered(std::string_view key) & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> find_field_unordered(const char *key) & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type> type() noexcept;
+  simdjson_inline simdjson_result<bool> is_scalar() noexcept;
+  simdjson_inline simdjson_result<const char *> current_location() noexcept;
+  simdjson_inline int32_t current_depth() const noexcept;
+  simdjson_inline bool is_negative() noexcept;
+  simdjson_inline simdjson_result<bool> is_integer() noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::number_type> get_number_type() noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::number> get_number() noexcept;
+  /** @copydoc simdjson_inline std::string_view document::raw_json_token() const noexcept */
+  simdjson_inline simdjson_result<std::string_view> raw_json_token() noexcept;
+
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> at_pointer(std::string_view json_pointer) noexcept;
+};
+
+
+} // namespace simdjson
+
+
+
+namespace simdjson {
+
+template<>
+struct simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference> : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference> {
+public:
+  simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference value, error_code error) noexcept;
+  simdjson_inline simdjson_result() noexcept = default;
+  simdjson_inline error_code rewind() noexcept;
+
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array> get_array() & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object> get_object() & noexcept;
+  simdjson_inline simdjson_result<uint64_t> get_uint64() noexcept;
+  simdjson_inline simdjson_result<int64_t> get_int64() noexcept;
+  simdjson_inline simdjson_result<double> get_double() noexcept;
+  simdjson_inline simdjson_result<std::string_view> get_string() noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string> get_raw_json_string() noexcept;
+  simdjson_inline simdjson_result<bool> get_bool() noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> get_value() noexcept;
+  simdjson_inline simdjson_result<bool> is_null() noexcept;
+
+#if SIMDJSON_EXCEPTIONS
+  simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() & noexcept(false);
+  simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() & noexcept(false);
+  simdjson_inline operator uint64_t() noexcept(false);
+  simdjson_inline operator int64_t() noexcept(false);
+  simdjson_inline operator double() noexcept(false);
+  simdjson_inline operator std::string_view() noexcept(false);
+  simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false);
+  simdjson_inline operator bool() noexcept(false);
+  simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value() noexcept(false);
+#endif
+  simdjson_inline simdjson_result<size_t> count_elements() & noexcept;
+  simdjson_inline simdjson_result<size_t> count_fields() & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> at(size_t index) & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> begin() & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> end() & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> find_field(std::string_view key) & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> find_field(const char *key) & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> operator[](std::string_view key) & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> operator[](const char *key) & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> find_field_unordered(std::string_view key) & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> find_field_unordered(const char *key) & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type> type() noexcept;
+  simdjson_inline simdjson_result<bool> is_scalar() noexcept;
+  simdjson_inline simdjson_result<const char *> current_location() noexcept;
+  simdjson_inline simdjson_result<int32_t> current_depth() const noexcept;
+  simdjson_inline simdjson_result<bool> is_negative() noexcept;
+  simdjson_inline simdjson_result<bool> is_integer() noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::number_type> get_number_type() noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::number> get_number() noexcept;
+  /** @copydoc simdjson_inline std::string_view document_reference::raw_json_token() const noexcept */
+  simdjson_inline simdjson_result<std::string_view> raw_json_token() noexcept;
+
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> at_pointer(std::string_view json_pointer) noexcept;
+};
+
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/document.h */
+/* begin file include/simdjson/generic/ondemand/value.h */
+
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+class array;
+class document;
+class field;
+class object;
+class raw_json_string;
+
+/**
+ * An ephemeral JSON value returned during iteration. It is only valid for as long as you do
+ * not access more data in the JSON document.
+ */
+class value {
+public:
+  /**
+   * Create a new invalid value.
+   *
+   * Exists so you can declare a variable and later assign to it before use.
+   */
+  simdjson_inline value() noexcept = default;
+
+  /**
+   * Get this value as the given type.
+   *
+   * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool
+   *
+   * You may use get_double(), get_bool(), get_uint64(), get_int64(),
+   * get_object(), get_array(), get_raw_json_string(), or get_string() instead.
+   *
+   * @returns A value of the given type, parsed from the JSON.
+   * @returns INCORRECT_TYPE If the JSON value is not the given type.
+   */
+  template<typename T> simdjson_inline simdjson_result<T> get() noexcept {
+    // Unless the simdjson library provides an inline implementation, calling this method should
+    // immediately fail.
+    static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library.");
+  }
+
+  /**
+   * Get this value as the given type.
+   *
+   * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool
+   *
+   * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized.
+   * @returns INCORRECT_TYPE If the JSON value is not an object.
+   * @returns SUCCESS If the parse succeeded and the out parameter was set to the value.
+   */
+  template<typename T> simdjson_inline error_code get(T &out) noexcept;
+
+  /**
+   * Cast this JSON value to an array.
+   *
+   * @returns An object that can be used to iterate the array.
+   * @returns INCORRECT_TYPE If the JSON value is not an array.
+   */
+  simdjson_inline simdjson_result<array> get_array() noexcept;
+
+  /**
+   * Cast this JSON value to an object.
+   *
+   * @returns An object that can be used to look up or iterate fields.
+   * @returns INCORRECT_TYPE If the JSON value is not an object.
+   */
+  simdjson_inline simdjson_result<object> get_object() noexcept;
+
+  /**
+   * Cast this JSON value to an unsigned integer.
+   *
+   * @returns A unsigned 64-bit integer.
+   * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer.
+   */
+  simdjson_inline simdjson_result<uint64_t> get_uint64() noexcept;
+
+  /**
+   * Cast this JSON value (inside string) to a unsigned integer.
+   *
+   * @returns A unsigned 64-bit integer.
+   * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer.
+   */
+  simdjson_inline simdjson_result<uint64_t> get_uint64_in_string() noexcept;
+
+  /**
+   * Cast this JSON value to a signed integer.
+   *
+   * @returns A signed 64-bit integer.
+   * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer.
+   */
+  simdjson_inline simdjson_result<int64_t> get_int64() noexcept;
+
+  /**
+   * Cast this JSON value (inside string) to a signed integer.
+   *
+   * @returns A signed 64-bit integer.
+   * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer.
+   */
+  simdjson_inline simdjson_result<int64_t> get_int64_in_string() noexcept;
+
+  /**
+   * Cast this JSON value to a double.
+   *
+   * @returns A double.
+   * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number.
+   */
+  simdjson_inline simdjson_result<double> get_double() noexcept;
+
+  /**
+   * Cast this JSON value (inside string) to a double
+   *
+   * @returns A double.
+   * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number.
+   */
+  simdjson_inline simdjson_result<double> get_double_in_string() noexcept;
+
+  /**
+   * Cast this JSON value to a string.
+   *
+   * The string is guaranteed to be valid UTF-8.
+   *
+   * Equivalent to get<std::string_view>().
+   *
+   * Important: a value should be consumed once. Calling get_string() twice on the same value
+   * is an error.
+   *
+   * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next
+   *          time it parses a document or when it is destroyed.
+   * @returns INCORRECT_TYPE if the JSON value is not a string.
+   */
+  simdjson_inline simdjson_result<std::string_view> get_string() noexcept;
+
+  /**
+   * Cast this JSON value to a raw_json_string.
+   *
+   * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n).
+   *
+   * @returns A pointer to the raw JSON for the given string.
+   * @returns INCORRECT_TYPE if the JSON value is not a string.
+   */
+  simdjson_inline simdjson_result<raw_json_string> get_raw_json_string() noexcept;
+
+  /**
+   * Cast this JSON value to a bool.
+   *
+   * @returns A bool value.
+   * @returns INCORRECT_TYPE if the JSON value is not true or false.
+   */
+  simdjson_inline simdjson_result<bool> get_bool() noexcept;
+
+  /**
+   * Checks if this JSON value is null. If and only if the value is
+   * null, then it is consumed (we advance). If we find a token that
+   * begins with 'n' but is not 'null', then an error is returned.
+   *
+   * @returns Whether the value is null.
+   * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'.
+   */
+  simdjson_inline simdjson_result<bool> is_null() noexcept;
+
+#if SIMDJSON_EXCEPTIONS
+  /**
+   * Cast this JSON value to an array.
+   *
+   * @returns An object that can be used to iterate the array.
+   * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array.
+   */
+  simdjson_inline operator array() noexcept(false);
+  /**
+   * Cast this JSON value to an object.
+   *
+   * @returns An object that can be used to look up or iterate fields.
+   * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object.
+   */
+  simdjson_inline operator object() noexcept(false);
+  /**
+   * Cast this JSON value to an unsigned integer.
+   *
+   * @returns A signed 64-bit integer.
+   * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer.
+   */
+  simdjson_inline operator uint64_t() noexcept(false);
+  /**
+   * Cast this JSON value to a signed integer.
+   *
+   * @returns A signed 64-bit integer.
+   * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer.
+   */
+  simdjson_inline operator int64_t() noexcept(false);
+  /**
+   * Cast this JSON value to a double.
+   *
+   * @returns A double.
+   * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number.
+   */
+  simdjson_inline operator double() noexcept(false);
+  /**
+   * Cast this JSON value to a string.
+   *
+   * The string is guaranteed to be valid UTF-8.
+   *
+   * Equivalent to get<std::string_view>().
+   *
+   * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next
+   *          time it parses a document or when it is destroyed.
+   * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string.
+   */
+  simdjson_inline operator std::string_view() noexcept(false);
+  /**
+   * Cast this JSON value to a raw_json_string.
+   *
+   * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n).
+   *
+   * @returns A pointer to the raw JSON for the given string.
+   * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string.
+   */
+  simdjson_inline operator raw_json_string() noexcept(false);
+  /**
+   * Cast this JSON value to a bool.
+   *
+   * @returns A bool value.
+   * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false.
+   */
+  simdjson_inline operator bool() noexcept(false);
+#endif
+
+  /**
+   * Begin array iteration.
+   *
+   * Part of the std::iterable interface.
+   *
+   * @returns INCORRECT_TYPE If the JSON value is not an array.
+   */
+  simdjson_inline simdjson_result<array_iterator> begin() & noexcept;
+  /**
+   * Sentinel representing the end of the array.
+   *
+   * Part of the std::iterable interface.
+   */
+  simdjson_inline simdjson_result<array_iterator> end() & noexcept;
+  /**
+   * This method scans the array and counts the number of elements.
+   * The count_elements method should always be called before you have begun
+   * iterating through the array: it is expected that you are pointing at
+   * the beginning of the array.
+   * The runtime complexity is linear in the size of the array. After
+   * calling this function, if successful, the array is 'rewinded' at its
+   * beginning as if it had never been accessed. If the JSON is malformed (e.g.,
+   * there is a missing comma), then an error is returned and it is no longer
+   * safe to continue.
+   *
+   * Performance hint: You should only call count_elements() as a last
+   * resort as it may require scanning the document twice or more.
+   */
+  simdjson_inline simdjson_result<size_t> count_elements() & noexcept;
+  /**
+   * This method scans the object and counts the number of key-value pairs.
+   * The count_fields method should always be called before you have begun
+   * iterating through the object: it is expected that you are pointing at
+   * the beginning of the object.
+   * The runtime complexity is linear in the size of the object. After
+   * calling this function, if successful, the object is 'rewinded' at its
+   * beginning as if it had never been accessed. If the JSON is malformed (e.g.,
+   * there is a missing comma), then an error is returned and it is no longer
+   * safe to continue.
+   *
+   * To check that an object is empty, it is more performant to use
+   * the is_empty() method on the object instance.
+   *
+   * Performance hint: You should only call count_fields() as a last
+   * resort as it may require scanning the document twice or more.
+   */
+  simdjson_inline simdjson_result<size_t> count_fields() & noexcept;
+  /**
+   * Get the value at the given index in the array. This function has linear-time complexity.
+   * This function should only be called once on an array instance since the array iterator is not reset between each call.
+   *
+   * @return The value at the given index, or:
+   *         - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length
+   */
+  simdjson_inline simdjson_result<value> at(size_t index) noexcept;
+  /**
+   * Look up a field by name on an object (order-sensitive).
+   *
+   * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the
+   * JSON `{ "x": 1, "y": 2, "z": 3 }`:
+   *
+   * ```c++
+   * simdjson::ondemand::parser parser;
+   * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded);
+   * double z = obj.find_field("z");
+   * double y = obj.find_field("y");
+   * double x = obj.find_field("x");
+   * ```
+   * If you have multiple fields with a matching key ({"x": 1,  "x": 1}) be mindful
+   * that only one field is returned.
+
+   * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys.
+   * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`.
+   *
+   * @param key The key to look up.
+   * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object.
+   */
+  simdjson_inline simdjson_result<value> find_field(std::string_view key) noexcept;
+  /** @overload simdjson_inline simdjson_result<value> find_field(std::string_view key) noexcept; */
+  simdjson_inline simdjson_result<value> find_field(const char *key) noexcept;
+
+  /**
+   * Look up a field by name on an object, without regard to key order.
+   *
+   * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies
+   * and often appears negligible. It starts out normally, starting out at the last field; but if
+   * the field is not found, it scans from the beginning of the object to see if it missed it. That
+   * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object
+   * in question is large. The fact that the extra code is there also bumps the executable size.
+   *
+   * It is the default, however, because it would be highly surprising (and hard to debug) if the
+   * default behavior failed to look up a field just because it was in the wrong order--and many
+   * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order.
+   *
+   * If you have multiple fields with a matching key ({"x": 1,  "x": 1}) be mindful
+   * that only one field is returned.
+   *
+   * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the
+   * field wasn't there when they aren't).
+   *
+   * @param key The key to look up.
+   * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object.
+   */
+  simdjson_inline simdjson_result<value> find_field_unordered(std::string_view key) noexcept;
+  /** @overload simdjson_inline simdjson_result<value> find_field_unordered(std::string_view key) noexcept; */
+  simdjson_inline simdjson_result<value> find_field_unordered(const char *key) noexcept;
+  /** @overload simdjson_inline simdjson_result<value> find_field_unordered(std::string_view key) noexcept; */
+  simdjson_inline simdjson_result<value> operator[](std::string_view key) noexcept;
+  /** @overload simdjson_inline simdjson_result<value> find_field_unordered(std::string_view key) noexcept; */
+  simdjson_inline simdjson_result<value> operator[](const char *key) noexcept;
+
+  /**
+   * Get the type of this JSON value. It does not validate or consume the value.
+   * E.g., you must still call "is_null()" to check that a value is null even if
+   * "type()" returns json_type::null.
+   *
+   * NOTE: If you're only expecting a value to be one type (a typical case), it's generally
+   * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just
+   * let it throw an exception).
+   *
+   * @return The type of JSON value (json_type::array, json_type::object, json_type::string,
+   *     json_type::number, json_type::boolean, or json_type::null).
+   * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse".
+   */
+  simdjson_inline simdjson_result<json_type> type() noexcept;
+
+  /**
+   * Checks whether the value is a scalar (string, number, null, Boolean).
+   * Returns false when there it is an array or object.
+   *
+   * @returns true if the type is string, number, null, Boolean
+   * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse".
+   */
+  simdjson_inline simdjson_result<bool> is_scalar() noexcept;
+
+  /**
+   * Checks whether the value is a negative number.
+   *
+   * @returns true if the number if negative.
+   */
+  simdjson_inline bool is_negative() noexcept;
+  /**
+   * Checks whether the value is an integer number. Note that
+   * this requires to partially parse the number string. If
+   * the value is determined to be an integer, it may still
+   * not parse properly as an integer in subsequent steps
+   * (e.g., it might overflow).
+   *
+   * Performance note: if you call this function systematically
+   * before parsing a number, you may have fallen for a performance
+   * anti-pattern.
+   *
+   * @returns true if the number if negative.
+   */
+  simdjson_inline simdjson_result<bool> is_integer() noexcept;
+  /**
+   * Determine the number type (integer or floating-point number) as quickly
+   * as possible. This function does not fully validate the input. It is
+   * useful when you only need to classify the numbers, without parsing them.
+   *
+   * If you are planning to retrieve the value or you need full validation,
+   * consider using the get_number() method instead: it will fully parse
+   * and validate the input, and give you access to the type:
+   * get_number().get_number_type().
+   *
+   * get_number_type() is number_type::unsigned_integer if we have
+   * an integer greater or equal to 9223372036854775808
+   * get_number_type() is number_type::signed_integer if we have an
+   * integer that is less than 9223372036854775808
+   * Otherwise, get_number_type() has value number_type::floating_point_number
+   *
+   * This function requires processing the number string, but it is expected
+   * to be faster than get_number().get_number_type() because it is does not
+   * parse the number value.
+   *
+   * @returns the type of the number
+   */
+  simdjson_inline simdjson_result<number_type> get_number_type() noexcept;
+
+  /**
+   * Attempt to parse an ondemand::number. An ondemand::number may
+   * contain an integer value or a floating-point value, the simdjson
+   * library will autodetect the type. Thus it is a dynamically typed
+   * number. Before accessing the value, you must determine the detected
+   * type.
+   *
+   * number.get_number_type() is number_type::signed_integer if we have
+   * an integer in [-9223372036854775808,9223372036854775808)
+   * You can recover the value by calling number.get_int64() and you
+   * have that number.is_int64() is true.
+   *
+   * number.get_number_type() is number_type::unsigned_integer if we have
+   * an integer in [9223372036854775808,18446744073709551616)
+   * You can recover the value by calling number.get_uint64() and you
+   * have that number.is_uint64() is true.
+   *
+   * Otherwise, number.get_number_type() has value number_type::floating_point_number
+   * and we have a binary64 number.
+   * You can recover the value by calling number.get_double() and you
+   * have that number.is_double() is true.
+   *
+   * You must check the type before accessing the value: it is an error
+   * to call "get_int64()" when number.get_number_type() is not
+   * number_type::signed_integer and when number.is_int64() is false.
+   *
+   * Performance note: this is designed with performance in mind. When
+   * calling 'get_number()', you scan the number string only once, determining
+   * efficiently the type and storing it in an efficient manner.
+   */
+  simdjson_warn_unused simdjson_inline simdjson_result<number> get_number() noexcept;
+
+
+  /**
+   * Get the raw JSON for this token.
+   *
+   * The string_view will always point into the input buffer.
+   *
+   * The string_view will start at the beginning of the token, and include the entire token
+   * *as well as all spaces until the next token (or EOF).* This means, for example, that a
+   * string token always begins with a " and is always terminated by the final ", possibly
+   * followed by a number of spaces.
+   *
+   * The string_view is *not* null-terminated. However, if this is a scalar (string, number,
+   * boolean, or null), the character after the end of the string_view is guaranteed to be
+   * a non-space token.
+   *
+   * Tokens include:
+   * - {
+   * - [
+   * - "a string (possibly with UTF-8 or backslashed characters like \\\")".
+   * - -1.2e-100
+   * - true
+   * - false
+   * - null
+   */
+  simdjson_inline std::string_view raw_json_token() noexcept;
+
+  /**
+   * Returns the current location in the document if in bounds.
+   */
+  simdjson_inline simdjson_result<const char *> current_location() noexcept;
+
+  /**
+   * Returns the current depth in the document if in bounds.
+   *
+   * E.g.,
+   *  0 = finished with document
+   *  1 = document root value (could be [ or {, not yet known)
+   *  2 = , or } inside root array/object
+   *  3 = key or value inside root array/object.
+   */
+  simdjson_inline int32_t current_depth() const noexcept;
+
+  /**
+   * Get the value associated with the given JSON pointer.  We use the RFC 6901
+   * https://tools.ietf.org/html/rfc6901 standard.
+   *
+   *   ondemand::parser parser;
+   *   auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded;
+   *   auto doc = parser.iterate(json);
+   *   doc.at_pointer("/foo/a/1") == 20
+   *
+   * It is allowed for a key to be the empty string:
+   *
+   *   ondemand::parser parser;
+   *   auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded;
+   *   auto doc = parser.iterate(json);
+   *   doc.at_pointer("//a/1") == 20
+   *
+   * Note that at_pointer() called on the document automatically calls the document's rewind
+   * method between each call. It invalidates all previously accessed arrays, objects and values
+   * that have not been consumed.
+   *
+   * Calling at_pointer() on non-document instances (e.g., arrays and objects) is not
+   * standardized (by RFC 6901). We provide some experimental support for JSON pointers
+   * on non-document instances.  Yet it is not the case when calling at_pointer on an array
+   * or an object instance: there is no rewind and no invalidation.
+   *
+   * You may only call at_pointer on an array after it has been created, but before it has
+   * been first accessed. When calling at_pointer on an array, the pointer is advanced to
+   * the location indicated by the JSON pointer (in case of success). It is no longer possible
+   * to call at_pointer on the same array.
+   *
+   * You may call at_pointer more than once on an object, but each time the pointer is advanced
+   * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding
+   * key (as well as the current key) can no longer be used with following JSON pointer calls.
+   *
+   * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching
+   *
+   * @return The value associated with the given JSON pointer, or:
+   *         - NO_SUCH_FIELD if a field does not exist in an object
+   *         - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length
+   *         - INCORRECT_TYPE if a non-integer is used to access an array
+   *         - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed
+   */
+  simdjson_inline simdjson_result<value> at_pointer(std::string_view json_pointer) noexcept;
+
+protected:
+  /**
+   * Create a value.
+   */
+  simdjson_inline value(const value_iterator &iter) noexcept;
+
+  /**
+   * Skip this value, allowing iteration to continue.
+   */
+  simdjson_inline void skip() noexcept;
+
+  /**
+   * Start a value at the current position.
+   *
+   * (It should already be started; this is just a self-documentation method.)
+   */
+  static simdjson_inline value start(const value_iterator &iter) noexcept;
+
+  /**
+   * Resume a value.
+   */
+  static simdjson_inline value resume(const value_iterator &iter) noexcept;
+
+  /**
+   * Get the object, starting or resuming it as necessary
+   */
+  simdjson_inline simdjson_result<object> start_or_resume_object() noexcept;
+
+  // simdjson_inline void log_value(const char *type) const noexcept;
+  // simdjson_inline void log_error(const char *message) const noexcept;
+
+  value_iterator iter{};
+
+  friend class document;
+  friend class array_iterator;
+  friend class field;
+  friend class object;
+  friend struct simdjson_result<value>;
+  friend struct simdjson_result<field>;
+};
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+template<>
+struct simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> {
+public:
+  simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value &&value) noexcept; ///< @private
+  simdjson_inline simdjson_result(error_code error) noexcept; ///< @private
+  simdjson_inline simdjson_result() noexcept = default;
+
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array> get_array() noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object> get_object() noexcept;
+
+  simdjson_inline simdjson_result<uint64_t> get_uint64() noexcept;
+  simdjson_inline simdjson_result<uint64_t> get_uint64_in_string() noexcept;
+  simdjson_inline simdjson_result<int64_t> get_int64() noexcept;
+  simdjson_inline simdjson_result<int64_t> get_int64_in_string() noexcept;
+  simdjson_inline simdjson_result<double> get_double() noexcept;
+  simdjson_inline simdjson_result<double> get_double_in_string() noexcept;
+  simdjson_inline simdjson_result<std::string_view> get_string() noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string> get_raw_json_string() noexcept;
+  simdjson_inline simdjson_result<bool> get_bool() noexcept;
+  simdjson_inline simdjson_result<bool> is_null() noexcept;
+
+  template<typename T> simdjson_inline simdjson_result<T> get() noexcept;
+
+  template<typename T> simdjson_inline error_code get(T &out) noexcept;
+
+#if SIMDJSON_EXCEPTIONS
+  simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() noexcept(false);
+  simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() noexcept(false);
+  simdjson_inline operator uint64_t() noexcept(false);
+  simdjson_inline operator int64_t() noexcept(false);
+  simdjson_inline operator double() noexcept(false);
+  simdjson_inline operator std::string_view() noexcept(false);
+  simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false);
+  simdjson_inline operator bool() noexcept(false);
+#endif
+  simdjson_inline simdjson_result<size_t> count_elements() & noexcept;
+  simdjson_inline simdjson_result<size_t> count_fields() & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> at(size_t index) noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> begin() & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> end() & noexcept;
+
+  /**
+   * Look up a field by name on an object (order-sensitive).
+   *
+   * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the
+   * JSON `{ "x": 1, "y": 2, "z": 3 }`:
+   *
+   * ```c++
+   * simdjson::ondemand::parser parser;
+   * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded);
+   * double z = obj.find_field("z");
+   * double y = obj.find_field("y");
+   * double x = obj.find_field("x");
+   * ```
+   *
+   * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys.
+   * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`.
+   *
+   * @param key The key to look up.
+   * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object.
+   */
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> find_field(std::string_view key) noexcept;
+  /** @overload simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> find_field(std::string_view key) noexcept; */
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> find_field(const char *key) noexcept;
+
+  /**
+   * Look up a field by name on an object, without regard to key order.
+   *
+   * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies
+   * and often appears negligible. It starts out normally, starting out at the last field; but if
+   * the field is not found, it scans from the beginning of the object to see if it missed it. That
+   * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object
+   * in question is large. The fact that the extra code is there also bumps the executable size.
+   *
+   * It is the default, however, because it would be highly surprising (and hard to debug) if the
+   * default behavior failed to look up a field just because it was in the wrong order--and many
+   * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order.
+   *
+   * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the
+   * field wasn't there when they aren't).
+   *
+   * @param key The key to look up.
+   * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object.
+   */
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> find_field_unordered(std::string_view key) noexcept;
+  /** @overload simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> find_field_unordered(std::string_view key) noexcept; */
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> find_field_unordered(const char *key) noexcept;
+  /** @overload simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> find_field_unordered(std::string_view key) noexcept; */
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> operator[](std::string_view key) noexcept;
+  /** @overload simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> find_field_unordered(std::string_view key) noexcept; */
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> operator[](const char *key) noexcept;
+
+  /**
+   * Get the type of this JSON value.
+   *
+   * NOTE: If you're only expecting a value to be one type (a typical case), it's generally
+   * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just
+   * let it throw an exception).
+   */
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type> type() noexcept;
+  simdjson_inline simdjson_result<bool> is_scalar() noexcept;
+  simdjson_inline simdjson_result<bool> is_negative() noexcept;
+  simdjson_inline simdjson_result<bool> is_integer() noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::number_type> get_number_type() noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::number> get_number() noexcept;
+
+  /** @copydoc simdjson_inline std::string_view value::raw_json_token() const noexcept */
+  simdjson_inline simdjson_result<std::string_view> raw_json_token() noexcept;
+
+  /** @copydoc simdjson_inline simdjson_result<const char *> current_location() noexcept */
+  simdjson_inline simdjson_result<const char *> current_location() noexcept;
+  /** @copydoc simdjson_inline int32_t current_depth() const noexcept */
+  simdjson_inline simdjson_result<int32_t> current_depth() const noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> at_pointer(std::string_view json_pointer) noexcept;
+};
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/value.h */
+/* begin file include/simdjson/generic/ondemand/field.h */
+
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+/**
+ * A JSON field (key/value pair) in an object.
+ *
+ * Returned from object iteration.
+ *
+ * Extends from std::pair<raw_json_string, value> so you can use C++ algorithms that rely on pairs.
+ */
+class field : public std::pair<raw_json_string, value> {
+public:
+  /**
+   * Create a new invalid field.
+   *
+   * Exists so you can declare a variable and later assign to it before use.
+   */
+  simdjson_inline field() noexcept;
+
+  /**
+   * Get the key as a string_view (for higher speed, consider raw_key).
+   * We deliberately use a more cumbersome name (unescaped_key) to force users
+   * to think twice about using it.
+   *
+   * This consumes the key: once you have called unescaped_key(), you cannot
+   * call it again nor can you call key().
+   */
+  simdjson_inline simdjson_warn_unused simdjson_result<std::string_view> unescaped_key() noexcept;
+  /**
+   * Get the key as a raw_json_string. Can be used for direct comparison with
+   * an unescaped C string: e.g., key() == "test".
+   */
+  simdjson_inline raw_json_string key() const noexcept;
+  /**
+   * Get the field value.
+   */
+  simdjson_inline ondemand::value &value() & noexcept;
+  /**
+   * @overload ondemand::value &ondemand::value() & noexcept
+   */
+  simdjson_inline ondemand::value value() && noexcept;
+
+protected:
+  simdjson_inline field(raw_json_string key, ondemand::value &&value) noexcept;
+  static simdjson_inline simdjson_result<field> start(value_iterator &parent_iter) noexcept;
+  static simdjson_inline simdjson_result<field> start(const value_iterator &parent_iter, raw_json_string key) noexcept;
+  friend struct simdjson_result<field>;
+  friend class object_iterator;
+};
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+template<>
+struct simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::field> : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::field> {
+public:
+  simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::field &&value) noexcept; ///< @private
+  simdjson_inline simdjson_result(error_code error) noexcept; ///< @private
+  simdjson_inline simdjson_result() noexcept = default;
+
+  simdjson_inline simdjson_result<std::string_view> unescaped_key() noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string> key() noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> value() noexcept;
+};
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/field.h */
+/* begin file include/simdjson/generic/ondemand/object.h */
+
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+/**
+ * A forward-only JSON object field iterator.
+ */
+class object {
+public:
+  /**
+   * Create a new invalid object.
+   *
+   * Exists so you can declare a variable and later assign to it before use.
+   */
+  simdjson_inline object() noexcept = default;
+
+  simdjson_inline simdjson_result<object_iterator> begin() noexcept;
+  simdjson_inline simdjson_result<object_iterator> end() noexcept;
+  /**
+   * Look up a field by name on an object (order-sensitive).
+   *
+   * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the
+   * JSON `{ "x": 1, "y": 2, "z": 3 }`:
+   *
+   * ```c++
+   * simdjson::ondemand::parser parser;
+   * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded);
+   * double z = obj.find_field("z");
+   * double y = obj.find_field("y");
+   * double x = obj.find_field("x");
+   * ```
+   * If you have multiple fields with a matching key ({"x": 1,  "x": 1}) be mindful
+   * that only one field is returned.
+   *
+   * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys.
+   * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`.
+   *
+   * You must consume the fields on an object one at a time. A request for a new key
+   * invalidates previous field values: it makes them unsafe. The value instance you get
+   * from  `content["bids"]` becomes invalid when you call `content["asks"]`. The array
+   * given by content["bids"].get_array() should not be accessed after you have called
+   * content["asks"].get_array(). You can detect such mistakes by first compiling and running
+   * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an
+   * OUT_OF_ORDER_ITERATION error is generated.
+   *
+   * You are expected to access keys only once. You should access the value corresponding to a
+   * key a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string()
+   * is an error.
+   *
+   * @param key The key to look up.
+   * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object.
+   */
+  simdjson_inline simdjson_result<value> find_field(std::string_view key) & noexcept;
+  /** @overload simdjson_inline simdjson_result<value> find_field(std::string_view key) & noexcept; */
+  simdjson_inline simdjson_result<value> find_field(std::string_view key) && noexcept;
+
+  /**
+   * Look up a field by name on an object, without regard to key order.
+   *
+   * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies
+   * and often appears negligible. It starts out normally, starting out at the last field; but if
+   * the field is not found, it scans from the beginning of the object to see if it missed it. That
+   * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object
+   * in question is large. The fact that the extra code is there also bumps the executable size.
+   *
+   * It is the default, however, because it would be highly surprising (and hard to debug) if the
+   * default behavior failed to look up a field just because it was in the wrong order--and many
+   * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order.
+   *
+   * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the
+   * field wasn't there when they aren't).
+   *
+   * If you have multiple fields with a matching key ({"x": 1,  "x": 1}) be mindful
+   * that only one field is returned.
+   *
+   * You must consume the fields on an object one at a time. A request for a new key
+   * invalidates previous field values: it makes them unsafe. The value instance you get
+   * from  `content["bids"]` becomes invalid when you call `content["asks"]`. The array
+   * given by content["bids"].get_array() should not be accessed after you have called
+   * content["asks"].get_array(). You can detect such mistakes by first compiling and running
+   * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an
+   * OUT_OF_ORDER_ITERATION error is generated.
+   *
+   * You are expected to access keys only once. You should access the value corresponding to a key
+   * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() is an error.
+   *
+   * @param key The key to look up.
+   * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object.
+   */
+  simdjson_inline simdjson_result<value> find_field_unordered(std::string_view key) & noexcept;
+  /** @overload simdjson_inline simdjson_result<value> find_field_unordered(std::string_view key) & noexcept; */
+  simdjson_inline simdjson_result<value> find_field_unordered(std::string_view key) && noexcept;
+  /** @overload simdjson_inline simdjson_result<value> find_field_unordered(std::string_view key) & noexcept; */
+  simdjson_inline simdjson_result<value> operator[](std::string_view key) & noexcept;
+  /** @overload simdjson_inline simdjson_result<value> find_field_unordered(std::string_view key) & noexcept; */
+  simdjson_inline simdjson_result<value> operator[](std::string_view key) && noexcept;
+
+  /**
+   * Get the value associated with the given JSON pointer. We use the RFC 6901
+   * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node
+   * as the root of its own JSON document.
+   *
+   *   ondemand::parser parser;
+   *   auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded;
+   *   auto doc = parser.iterate(json);
+   *   doc.at_pointer("/foo/a/1") == 20
+   *
+   * It is allowed for a key to be the empty string:
+   *
+   *   ondemand::parser parser;
+   *   auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded;
+   *   auto doc = parser.iterate(json);
+   *   doc.at_pointer("//a/1") == 20
+   *
+   * Note that at_pointer() called on the document automatically calls the document's rewind
+   * method between each call. It invalidates all previously accessed arrays, objects and values
+   * that have not been consumed. Yet it is not the case when calling at_pointer on an object
+   * instance: there is no rewind and no invalidation.
+   *
+   * You may call at_pointer more than once on an object, but each time the pointer is advanced
+   * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding
+   * key (as well as the current key) can no longer be used with following JSON pointer calls.
+   *
+   * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching.
+   *
+   * @return The value associated with the given JSON pointer, or:
+   *         - NO_SUCH_FIELD if a field does not exist in an object
+   *         - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length
+   *         - INCORRECT_TYPE if a non-integer is used to access an array
+   *         - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed
+   */
+  inline simdjson_result<value> at_pointer(std::string_view json_pointer) noexcept;
+
+  /**
+   * Reset the iterator so that we are pointing back at the
+   * beginning of the object. You should still consume values only once even if you
+   * can iterate through the object more than once. If you unescape a string within
+   * the object more than once, you have unsafe code. Note that rewinding an object
+   * means that you may need to reparse it anew: it is not a free operation.
+   *
+   * @returns true if the object contains some elements (not empty)
+   */
+  inline simdjson_result<bool> reset() & noexcept;
+  /**
+   * This method scans the beginning of the object and checks whether the
+   * object is empty.
+   * The runtime complexity is constant time. After
+   * calling this function, if successful, the object is 'rewinded' at its
+   * beginning as if it had never been accessed. If the JSON is malformed (e.g.,
+   * there is a missing comma), then an error is returned and it is no longer
+   * safe to continue.
+   */
+  inline simdjson_result<bool> is_empty() & noexcept;
+  /**
+   * This method scans the object and counts the number of key-value pairs.
+   * The count_fields method should always be called before you have begun
+   * iterating through the object: it is expected that you are pointing at
+   * the beginning of the object.
+   * The runtime complexity is linear in the size of the object. After
+   * calling this function, if successful, the object is 'rewinded' at its
+   * beginning as if it had never been accessed. If the JSON is malformed (e.g.,
+   * there is a missing comma), then an error is returned and it is no longer
+   * safe to continue.
+   *
+   * To check that an object is empty, it is more performant to use
+   * the is_empty() method.
+   *
+   * Performance hint: You should only call count_fields() as a last
+   * resort as it may require scanning the document twice or more.
+   */
+  simdjson_inline simdjson_result<size_t> count_fields() & noexcept;
+  /**
+   * Consumes the object and returns a string_view instance corresponding to the
+   * object as represented in JSON. It points inside the original byte array containing
+   * the JSON document.
+   */
+  simdjson_inline simdjson_result<std::string_view> raw_json() noexcept;
+
+protected:
+  /**
+   * Go to the end of the object, no matter where you are right now.
+   */
+  simdjson_inline error_code consume() noexcept;
+  static simdjson_inline simdjson_result<object> start(value_iterator &iter) noexcept;
+  static simdjson_inline simdjson_result<object> start_root(value_iterator &iter) noexcept;
+  static simdjson_inline simdjson_result<object> started(value_iterator &iter) noexcept;
+  static simdjson_inline object resume(const value_iterator &iter) noexcept;
+  simdjson_inline object(const value_iterator &iter) noexcept;
+
+  simdjson_warn_unused simdjson_inline error_code find_field_raw(const std::string_view key) noexcept;
+
+  value_iterator iter{};
+
+  friend class value;
+  friend class document;
+  friend struct simdjson_result<object>;
+};
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+template<>
+struct simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object> : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object> {
+public:
+  simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object &&value) noexcept; ///< @private
+  simdjson_inline simdjson_result(error_code error) noexcept; ///< @private
+  simdjson_inline simdjson_result() noexcept = default;
+
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator> begin() noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator> end() noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> find_field(std::string_view key) & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> find_field(std::string_view key) && noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> find_field_unordered(std::string_view key) & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> find_field_unordered(std::string_view key) && noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> operator[](std::string_view key) & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> operator[](std::string_view key) && noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> at_pointer(std::string_view json_pointer) noexcept;
+  inline simdjson_result<bool> reset() noexcept;
+  inline simdjson_result<bool> is_empty() noexcept;
+  inline simdjson_result<size_t> count_fields() & noexcept;
+
+};
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/object.h */
+/* begin file include/simdjson/generic/ondemand/parser.h */
+
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+class array;
+class object;
+class value;
+class raw_json_string;
+class document_stream;
+
+/**
+ * The default batch size for document_stream instances for this On Demand kernel.
+ * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value
+ * in the future.
+ */
+static constexpr size_t DEFAULT_BATCH_SIZE = 1000000;
+/**
+ * Some adversary might try to set the batch size to 0 or 1, which might cause problems.
+ * We set a minimum of 32B since anything else is highly likely to be an error. In practice,
+ * most users will want a much larger batch size.
+ *
+ * All non-negative MINIMAL_BATCH_SIZE values should be 'safe' except that, obviously, no JSON
+ * document can ever span 0 or 1 byte and that very large values would create memory allocation issues.
+ */
+static constexpr size_t MINIMAL_BATCH_SIZE = 32;
+
+/**
+ * A JSON fragment iterator.
+ *
+ * This holds the actual iterator as well as the buffer for writing strings.
+ */
+class parser {
+public:
+  /**
+   * Create a JSON parser.
+   *
+   * The new parser will have zero capacity.
+   */
+  inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept;
+
+  inline parser(parser &&other) noexcept = default;
+  simdjson_inline parser(const parser &other) = delete;
+  simdjson_inline parser &operator=(const parser &other) = delete;
+  simdjson_inline parser &operator=(parser &&other) noexcept = default;
+
+  /** Deallocate the JSON parser. */
+  inline ~parser() noexcept = default;
+
+  /**
+   * Start iterating an on-demand JSON document.
+   *
+   *   ondemand::parser parser;
+   *   document doc = parser.iterate(json);
+   *
+   * It is expected that the content is a valid UTF-8 file, containing a valid JSON document.
+   * Otherwise the iterate method may return an error. In particular, the whole input should be
+   * valid: we do not attempt to tolerate incorrect content either before or after a JSON
+   * document.
+   *
+   * ### IMPORTANT: Validate what you use
+   *
+   * Calling iterate on an invalid JSON document may not immediately trigger an error. The call to
+   * iterate does not parse and validate the whole document.
+   *
+   * ### IMPORTANT: Buffer Lifetime
+   *
+   * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as
+   * long as the document iteration.
+   *
+   * ### IMPORTANT: Document Lifetime
+   *
+   * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during
+   * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before
+   * you call parse() again or destroy the parser.
+   *
+   * ### REQUIRED: Buffer Padding
+   *
+   * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
+   * those bytes are initialized to, as long as they are allocated.
+   *
+   * @param json The JSON to parse.
+   * @param len The length of the JSON.
+   * @param capacity The number of bytes allocated in the JSON (must be at least len+SIMDJSON_PADDING).
+   *
+   * @return The document, or an error:
+   *         - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes.
+   *         - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory
+   *           allocation fails.
+   *         - EMPTY if the document is all whitespace.
+   *         - UTF8_ERROR if the document is not valid UTF-8.
+   *         - UNESCAPED_CHARS if a string contains control characters that must be escaped
+   *         - UNCLOSED_STRING if there is an unclosed string in the document.
+   */
+  simdjson_warn_unused simdjson_result<document> iterate(padded_string_view json) & noexcept;
+  /** @overload simdjson_result<document> iterate(padded_string_view json) & noexcept */
+  simdjson_warn_unused simdjson_result<document> iterate(const char *json, size_t len, size_t capacity) & noexcept;
+  /** @overload simdjson_result<document> iterate(padded_string_view json) & noexcept */
+  simdjson_warn_unused simdjson_result<document> iterate(const uint8_t *json, size_t len, size_t capacity) & noexcept;
+  /** @overload simdjson_result<document> iterate(padded_string_view json) & noexcept */
+  simdjson_warn_unused simdjson_result<document> iterate(std::string_view json, size_t capacity) & noexcept;
+  /** @overload simdjson_result<document> iterate(padded_string_view json) & noexcept */
+  simdjson_warn_unused simdjson_result<document> iterate(const std::string &json) & noexcept;
+  /** @overload simdjson_result<document> iterate(padded_string_view json) & noexcept */
+  simdjson_warn_unused simdjson_result<document> iterate(const simdjson_result<padded_string> &json) & noexcept;
+  /** @overload simdjson_result<document> iterate(padded_string_view json) & noexcept */
+  simdjson_warn_unused simdjson_result<document> iterate(const simdjson_result<padded_string_view> &json) & noexcept;
+  /** @overload simdjson_result<document> iterate(padded_string_view json) & noexcept */
+  simdjson_warn_unused simdjson_result<document> iterate(padded_string &&json) & noexcept = delete;
+
+  /**
+   * @private
+   *
+   * Start iterating an on-demand JSON document.
+   *
+   *   ondemand::parser parser;
+   *   json_iterator doc = parser.iterate(json);
+   *
+   * ### IMPORTANT: Buffer Lifetime
+   *
+   * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as
+   * long as the document iteration.
+   *
+   * ### IMPORTANT: Document Lifetime
+   *
+   * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during
+   * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before
+   * you call parse() again or destroy the parser.
+   *
+   * The ondemand::document instance holds the iterator. The document must remain in scope
+   * while you are accessing instances of ondemand::value, ondemand::object, ondemand::array.
+   *
+   * ### REQUIRED: Buffer Padding
+   *
+   * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
+   * those bytes are initialized to, as long as they are allocated.
+   *
+   * @param json The JSON to parse.
+   *
+   * @return The iterator, or an error:
+   *         - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes.
+   *         - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory
+   *           allocation fails.
+   *         - EMPTY if the document is all whitespace.
+   *         - UTF8_ERROR if the document is not valid UTF-8.
+   *         - UNESCAPED_CHARS if a string contains control characters that must be escaped
+   *         - UNCLOSED_STRING if there is an unclosed string in the document.
+   */
+  simdjson_warn_unused simdjson_result<json_iterator> iterate_raw(padded_string_view json) & noexcept;
+
+
+  /**
+   * Parse a buffer containing many JSON documents.
+   *
+   *   auto json = R"({ "foo": 1 } { "foo": 2 } { "foo": 3 } )"_padded;
+   *   ondemand::parser parser;
+   *   ondemand::document_stream docs = parser.iterate_many(json);
+   *   for (auto & doc : docs) {
+   *     std::cout << doc["foo"] << std::endl;
+   *   }
+   *   // Prints 1 2 3
+   *
+   * No copy of the input buffer is made.
+   *
+   * The function is lazy: it may be that no more than one JSON document at a time is parsed.
+   *
+   * The caller is responsabile to ensure that the input string data remains unchanged and is
+   * not deleted during the loop.
+   *
+   * ### Format
+   *
+   * The buffer must contain a series of one or more JSON documents, concatenated into a single
+   * buffer, separated by ASCII whitespace. It effectively parses until it has a fully valid document,
+   * then starts parsing the next document at that point. (It does this with more parallelism and
+   * lookahead than you might think, though.)
+   *
+   * documents that consist of an object or array may omit the whitespace between them, concatenating
+   * with no separator. Documents that consist of a single primitive (i.e. documents that are not
+   * arrays or objects) MUST be separated with ASCII whitespace.
+   *
+   * The characters inside a JSON document, and between JSON documents, must be valid Unicode (UTF-8).
+   *
+   * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse.
+   * Setting batch_size to excessively large or excessively small values may impact negatively the
+   * performance.
+   *
+   * ### REQUIRED: Buffer Padding
+   *
+   * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
+   * those bytes are initialized to, as long as they are allocated.
+   *
+   * ### Threads
+   *
+   * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the
+   * hood to do some lookahead.
+   *
+   * ### Parser Capacity
+   *
+   * If the parser's current capacity is less than batch_size, it will allocate enough capacity
+   * to handle it (up to max_capacity).
+   *
+   * @param buf The concatenated JSON to parse.
+   * @param len The length of the concatenated JSON.
+   * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet
+   *                   spot is cache-related: small enough to fit in cache, yet big enough to
+   *                   parse as many documents as possible in one tight loop.
+   *                   Defaults to 10MB, which has been a reasonable sweet spot in our tests.
+   * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors:
+   *         - MEMALLOC if the parser does not have enough capacity and memory allocation fails
+   *         - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity.
+   *         - other json errors if parsing fails. You should not rely on these errors to always the same for the
+   *           same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware).
+   */
+  inline simdjson_result<document_stream> iterate_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept;
+  /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */
+  inline simdjson_result<document_stream> iterate_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept;
+  /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */
+  inline simdjson_result<document_stream> iterate_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept;
+  inline simdjson_result<document_stream> iterate_many(const std::string &&s, size_t batch_size) = delete;// unsafe
+  /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */
+  inline simdjson_result<document_stream> iterate_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept;
+  inline simdjson_result<document_stream> iterate_many(const padded_string &&s, size_t batch_size) = delete;// unsafe
+
+  /** @private We do not want to allow implicit conversion from C string to std::string. */
+  simdjson_result<document_stream> iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete;
+
+  /** The capacity of this parser (the largest document it can process). */
+  simdjson_inline size_t capacity() const noexcept;
+  /** The maximum capacity of this parser (the largest document it is allowed to process). */
+  simdjson_inline size_t max_capacity() const noexcept;
+  simdjson_inline void set_max_capacity(size_t max_capacity) noexcept;
+  /**
+   * The maximum depth of this parser (the most deeply nested objects and arrays it can process).
+   * This parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true.
+   * The document's instance current_depth() method should be used to monitor the parsing
+   * depth and limit it if desired.
+   */
+  simdjson_inline size_t max_depth() const noexcept;
+
+  /**
+   * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length
+   * and `max_depth` depth.
+   *
+   * The max_depth parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true.
+   * The document's instance current_depth() method should be used to monitor the parsing
+   * depth and limit it if desired.
+   *
+   * @param capacity The new capacity.
+   * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH.
+   * @return The error, if there is one.
+   */
+  simdjson_warn_unused error_code allocate(size_t capacity, size_t max_depth=DEFAULT_MAX_DEPTH) noexcept;
+
+  #ifdef SIMDJSON_THREADS_ENABLED
+  /**
+   * The parser instance can use threads when they are available to speed up some
+   * operations. It is enabled by default. Changing this attribute will change the
+   * behavior of the parser for future operations.
+   */
+  bool threaded{true};
+  #endif
+
+  /**
+   * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer.
+   * The provided pointer is advanced to the end of the string by reference, and a string_view instance
+   * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least
+   * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer.
+   *
+   * This unescape function is a low-level function. If you want a more user-friendly approach, you should
+   * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string()
+   * instead of get_raw_json_string()).
+   *
+   * ## IMPORTANT: string_view lifetime
+   *
+   * The string_view is only valid as long as the bytes in dst.
+   *
+   * @param raw_json_string input
+   * @param dst A pointer to a buffer at least large enough to write this string as well as
+   *            an additional SIMDJSON_PADDING bytes.
+   * @return A string_view pointing at the unescaped string in dst
+   * @error STRING_ERROR if escapes are incorrect.
+   */
+  simdjson_inline simdjson_result<std::string_view> unescape(raw_json_string in, uint8_t *&dst) const noexcept;
+private:
+  /** @private [for benchmarking access] The implementation to use */
+  std::unique_ptr<internal::dom_parser_implementation> implementation{};
+  size_t _capacity{0};
+  size_t _max_capacity;
+  size_t _max_depth{DEFAULT_MAX_DEPTH};
+  std::unique_ptr<uint8_t[]> string_buf{};
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  std::unique_ptr<token_position[]> start_positions{};
+#endif
+
+  friend class json_iterator;
+  friend class document_stream;
+};
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+template<>
+struct simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::parser> : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::parser> {
+public:
+  simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::parser &&value) noexcept; ///< @private
+  simdjson_inline simdjson_result(error_code error) noexcept; ///< @private
+  simdjson_inline simdjson_result() noexcept = default;
+};
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/parser.h */
+/* begin file include/simdjson/generic/ondemand/document_stream.h */
+#ifdef SIMDJSON_THREADS_ENABLED
+#include <thread>
+#include <mutex>
+#include <condition_variable>
+#endif
+
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+class parser;
+class json_iterator;
+class document;
+
+#ifdef SIMDJSON_THREADS_ENABLED
+/** @private Custom worker class **/
+struct stage1_worker {
+  stage1_worker() noexcept = default;
+  stage1_worker(const stage1_worker&) = delete;
+  stage1_worker(stage1_worker&&) = delete;
+  stage1_worker operator=(const stage1_worker&) = delete;
+  ~stage1_worker();
+  /**
+   * We only start the thread when it is needed, not at object construction, this may throw.
+   * You should only call this once.
+   **/
+  void start_thread();
+  /**
+   * Start a stage 1 job. You should first call 'run', then 'finish'.
+   * You must call start_thread once before.
+   */
+  void run(document_stream * ds, parser * stage1, size_t next_batch_start);
+  /** Wait for the run to finish (blocking). You should first call 'run', then 'finish'. **/
+  void finish();
+
+private:
+
+  /**
+   * Normally, we would never stop the thread. But we do in the destructor.
+   * This function is only safe assuming that you are not waiting for results. You
+   * should have called run, then finish, and be done.
+   **/
+  void stop_thread();
+
+  std::thread thread{};
+  /** These three variables define the work done by the thread. **/
+  ondemand::parser * stage1_thread_parser{};
+  size_t _next_batch_start{};
+  document_stream * owner{};
+  /**
+   * We have two state variables. This could be streamlined to one variable in the future but
+   * we use two for clarity.
+   */
+  bool has_work{false};
+  bool can_work{true};
+
+  /**
+   * We lock using a mutex.
+   */
+  std::mutex locking_mutex{};
+  std::condition_variable cond_var{};
+
+  friend class document_stream;
+};
+#endif  // SIMDJSON_THREADS_ENABLED
+
+/**
+ * A forward-only stream of documents.
+ *
+ * Produced by parser::iterate_many.
+ *
+ */
+class document_stream {
+public:
+  /**
+   * Construct an uninitialized document_stream.
+   *
+   *  ```c++
+   *  document_stream docs;
+   *  auto error = parser.iterate_many(json).get(docs);
+   *  ```
+   */
+  simdjson_inline document_stream() noexcept;
+  /** Move one document_stream to another. */
+  simdjson_inline document_stream(document_stream &&other) noexcept = default;
+  /** Move one document_stream to another. */
+  simdjson_inline document_stream &operator=(document_stream &&other) noexcept = default;
+
+  simdjson_inline ~document_stream() noexcept;
+
+  /**
+   * Returns the input size in bytes.
+   */
+  inline size_t size_in_bytes() const noexcept;
+
+  /**
+   * After iterating through the stream, this method
+   * returns the number of bytes that were not parsed at the end
+   * of the stream. If truncated_bytes() differs from zero,
+   * then the input was truncated maybe because incomplete JSON
+   * documents were found at the end of the stream. You
+   * may need to process the bytes in the interval [size_in_bytes()-truncated_bytes(), size_in_bytes()).
+   *
+   * You should only call truncated_bytes() after streaming through all
+   * documents, like so:
+   *
+   *   document_stream stream = parser.iterate_many(json,window);
+   *   for(auto & doc : stream) {
+   *      // do something with doc
+   *   }
+   *   size_t truncated = stream.truncated_bytes();
+   *
+   */
+  inline size_t truncated_bytes() const noexcept;
+
+  class iterator {
+  public:
+    using value_type = simdjson_result<document>;
+    using reference  = value_type;
+
+    using difference_type   = std::ptrdiff_t;
+
+    using iterator_category = std::input_iterator_tag;
+
+    /**
+     * Default constructor.
+     */
+    simdjson_inline iterator() noexcept;
+    /**
+     * Get the current document (or error).
+     */
+    simdjson_inline simdjson_result<ondemand::document_reference> operator*() noexcept;
+    /**
+     * Advance to the next document (prefix).
+     */
+    inline iterator& operator++() noexcept;
+    /**
+     * Check if we're at the end yet.
+     * @param other the end iterator to compare to.
+     */
+    simdjson_inline bool operator!=(const iterator &other) const noexcept;
+    /**
+     * @private
+     *
+     * Gives the current index in the input document in bytes.
+     *
+     *   document_stream stream = parser.parse_many(json,window);
+     *   for(auto i = stream.begin(); i != stream.end(); ++i) {
+     *      auto doc = *i;
+     *      size_t index = i.current_index();
+     *   }
+     *
+     * This function (current_index()) is experimental and the usage
+     * may change in future versions of simdjson: we find the API somewhat
+     * awkward and we would like to offer something friendlier.
+     */
+     simdjson_inline size_t current_index() const noexcept;
+
+     /**
+     * @private
+     *
+     * Gives a view of the current document at the current position.
+     *
+     *   document_stream stream = parser.iterate_many(json,window);
+     *   for(auto i = stream.begin(); i != stream.end(); ++i) {
+     *      std::string_view v = i.source();
+     *   }
+     *
+     * The returned string_view instance is simply a map to the (unparsed)
+     * source string: it may thus include white-space characters and all manner
+     * of padding.
+     *
+     * This function (source()) is experimental and the usage
+     * may change in future versions of simdjson: we find the API somewhat
+     * awkward and we would like to offer something friendlier.
+     *
+     */
+     simdjson_inline std::string_view source() const noexcept;
+
+    /**
+     * Returns error of the stream (if any).
+     */
+     inline error_code error() const noexcept;
+
+  private:
+    simdjson_inline iterator(document_stream *s, bool finished) noexcept;
+    /** The document_stream we're iterating through. */
+    document_stream* stream;
+    /** Whether we're finished or not. */
+    bool finished;
+
+    friend class document;
+    friend class document_stream;
+    friend class json_iterator;
+  };
+
+  /**
+   * Start iterating the documents in the stream.
+   */
+  simdjson_inline iterator begin() noexcept;
+  /**
+   * The end of the stream, for iterator comparison purposes.
+   */
+  simdjson_inline iterator end() noexcept;
+
+private:
+
+  document_stream &operator=(const document_stream &) = delete; // Disallow copying
+  document_stream(const document_stream &other) = delete; // Disallow copying
+
+  /**
+   * Construct a document_stream. Does not allocate or parse anything until the iterator is
+   * used.
+   *
+   * @param parser is a reference to the parser instance used to generate this document_stream
+   * @param buf is the raw byte buffer we need to process
+   * @param len is the length of the raw byte buffer in bytes
+   * @param batch_size is the size of the windows (must be strictly greater or equal to the largest JSON document)
+   */
+  simdjson_inline document_stream(
+    ondemand::parser &parser,
+    const uint8_t *buf,
+    size_t len,
+    size_t batch_size
+  ) noexcept;
+
+  /**
+   * Parse the first document in the buffer. Used by begin(), to handle allocation and
+   * initialization.
+   */
+  inline void start() noexcept;
+
+  /**
+   * Parse the next document found in the buffer previously given to document_stream.
+   *
+   * The content should be a valid JSON document encoded as UTF-8. If there is a
+   * UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are
+   * discouraged.
+   *
+   * You do NOT need to pre-allocate a parser.  This function takes care of
+   * pre-allocating a capacity defined by the batch_size defined when creating the
+   * document_stream object.
+   *
+   * The function returns simdjson::EMPTY if there is no more data to be parsed.
+   *
+   * The function returns simdjson::SUCCESS (as integer = 0) in case of success
+   * and indicates that the buffer has successfully been parsed to the end.
+   * Every document it contained has been parsed without error.
+   *
+   * The function returns an error code from simdjson/simdjson.h in case of failure
+   * such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth;
+   * the simdjson::error_message function converts these error codes into a string).
+   *
+   * You can also check validity by calling parser.is_valid(). The same parser can
+   * and should be reused for the other documents in the buffer.
+   */
+  inline void next() noexcept;
+
+  /** Move the json_iterator of the document to the location of the next document in the stream. */
+  inline void next_document() noexcept;
+
+  /** Get the next document index. */
+  inline size_t next_batch_start() const noexcept;
+
+  /** Pass the next batch through stage 1 with the given parser. */
+  inline error_code run_stage1(ondemand::parser &p, size_t batch_start) noexcept;
+
+  // Fields
+  ondemand::parser *parser;
+  const uint8_t *buf;
+  size_t len;
+  size_t batch_size;
+  /**
+   * We are going to use just one document instance. The document owns
+   * the json_iterator. It implies that we only ever pass a reference
+   * to the document to the users.
+   */
+  document doc{};
+  /** The error (or lack thereof) from the current document. */
+  error_code error;
+  size_t batch_start{0};
+  size_t doc_index{};
+
+  #ifdef SIMDJSON_THREADS_ENABLED
+  /** Indicates whether we use threads. Note that this needs to be a constant during the execution of the parsing. */
+  bool use_thread;
+
+  inline void load_from_stage1_thread() noexcept;
+
+  /** Start a thread to run stage 1 on the next batch. */
+  inline void start_stage1_thread() noexcept;
+
+  /** Wait for the stage 1 thread to finish and capture the results. */
+  inline void finish_stage1_thread() noexcept;
+
+  /** The error returned from the stage 1 thread. */
+  error_code stage1_thread_error{UNINITIALIZED};
+  /** The thread used to run stage 1 against the next batch in the background. */
+  std::unique_ptr<stage1_worker> worker{new(std::nothrow) stage1_worker()};
+  /**
+   * The parser used to run stage 1 in the background. Will be swapped
+   * with the regular parser when finished.
+   */
+  ondemand::parser stage1_thread_parser{};
+
+  friend struct stage1_worker;
+  #endif // SIMDJSON_THREADS_ENABLED
+
+  friend class parser;
+  friend class document;
+  friend class json_iterator;
+  friend struct simdjson_result<ondemand::document_stream>;
+  friend struct internal::simdjson_result_base<ondemand::document_stream>;
+};  // document_stream
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+template<>
+struct simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_stream> : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_stream> {
+public:
+  simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_stream &&value) noexcept; ///< @private
+  simdjson_inline simdjson_result(error_code error) noexcept; ///< @private
+  simdjson_inline simdjson_result() noexcept = default;
+};
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/document_stream.h */
+/* begin file include/simdjson/generic/ondemand/serialization.h */
+
+namespace simdjson {
+/**
+ * Create a string-view instance out of a document instance. The string-view instance
+ * contains JSON text that is suitable to be parsed as JSON again. It does not
+ * validate the content.
+ */
+inline simdjson_result<std::string_view> to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document& x) noexcept;
+/**
+ * Create a string-view instance out of a value instance. The string-view instance
+ * contains JSON text that is suitable to be parsed as JSON again. The value must
+ * not have been accessed previously. It does not
+ * validate the content.
+ */
+inline simdjson_result<std::string_view> to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value& x) noexcept;
+/**
+ * Create a string-view instance out of an object instance. The string-view instance
+ * contains JSON text that is suitable to be parsed as JSON again. It does not
+ * validate the content.
+ */
+inline simdjson_result<std::string_view> to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object& x) noexcept;
+/**
+ * Create a string-view instance out of an array instance. The string-view instance
+ * contains JSON text that is suitable to be parsed as JSON again. It does not
+ * validate the content.
+ */
+inline simdjson_result<std::string_view> to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array& x) noexcept;
+inline simdjson_result<std::string_view> to_json_string(simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document> x);
+inline simdjson_result<std::string_view> to_json_string(simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> x);
+inline simdjson_result<std::string_view> to_json_string(simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object> x);
+inline simdjson_result<std::string_view> to_json_string(simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array> x);
+} // namespace simdjson
+
+/**
+ * We want to support argument-dependent lookup (ADL).
+ * Hence we should define operator<< in the namespace
+ * where the argument (here value, object, etc.) resides.
+ * Credit: @madhur4127
+ * See https://github.com/simdjson/simdjson/issues/1768
+ */
+namespace simdjson { namespace SIMDJSON_BUILTIN_IMPLEMENTATION { namespace ondemand {
+
+/**
+ * Print JSON to an output stream.  It does not
+ * validate the content.
+ *
+ * @param out The output stream.
+ * @param value The element.
+ * @throw if there is an error with the underlying output stream. simdjson itself will not throw.
+ */
+inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value x);
+#if SIMDJSON_EXCEPTIONS
+inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> x);
+#endif
+/**
+ * Print JSON to an output stream. It does not
+ * validate the content.
+ *
+ * @param out The output stream.
+ * @param value The array.
+ * @throw if there is an error with the underlying output stream. simdjson itself will not throw.
+ */
+inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array value);
+#if SIMDJSON_EXCEPTIONS
+inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array> x);
+#endif
+/**
+ * Print JSON to an output stream. It does not
+ * validate the content.
+ *
+ * @param out The output stream.
+ * @param value The array.
+ * @throw if there is an error with the underlying output stream. simdjson itself will not throw.
+ */
+inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document& value);
+#if SIMDJSON_EXCEPTIONS
+inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>&& x);
+#endif
+inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference& value);
+#if SIMDJSON_EXCEPTIONS
+inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>&& x);
+#endif
+/**
+ * Print JSON to an output stream. It does not
+ * validate the content.
+ *
+ * @param out The output stream.
+ * @param value The object.
+ * @throw if there is an error with the underlying output stream. simdjson itself will not throw.
+ */
+inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object value);
+#if SIMDJSON_EXCEPTIONS
+inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object> x);
+#endif
+}}} // namespace simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand
+/* end file include/simdjson/generic/ondemand/serialization.h */
+/* end file include/simdjson/generic/ondemand.h */
+
+// Inline definitions
+/* begin file include/simdjson/generic/implementation_simdjson_result_base-inl.h */
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+
+//
+// internal::implementation_simdjson_result_base<T> inline implementation
+//
+
+template<typename T>
+simdjson_inline void implementation_simdjson_result_base<T>::tie(T &value, error_code &error) && noexcept {
+  error = this->second;
+  if (!error) {
+    value = std::forward<implementation_simdjson_result_base<T>>(*this).first;
+  }
+}
+
+template<typename T>
+simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base<T>::get(T &value) && noexcept {
+  error_code error;
+  std::forward<implementation_simdjson_result_base<T>>(*this).tie(value, error);
+  return error;
+}
+
+template<typename T>
+simdjson_inline error_code implementation_simdjson_result_base<T>::error() const noexcept {
+  return this->second;
+}
+
+#if SIMDJSON_EXCEPTIONS
+
+template<typename T>
+simdjson_inline T& implementation_simdjson_result_base<T>::value() & noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return this->first;
+}
+
+template<typename T>
+simdjson_inline T&& implementation_simdjson_result_base<T>::value() && noexcept(false) {
+  return std::forward<implementation_simdjson_result_base<T>>(*this).take_value();
+}
+
+template<typename T>
+simdjson_inline T&& implementation_simdjson_result_base<T>::take_value() && noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return std::forward<T>(this->first);
+}
+
+template<typename T>
+simdjson_inline implementation_simdjson_result_base<T>::operator T&&() && noexcept(false) {
+  return std::forward<implementation_simdjson_result_base<T>>(*this).take_value();
+}
+
+#endif // SIMDJSON_EXCEPTIONS
+
+template<typename T>
+simdjson_inline const T& implementation_simdjson_result_base<T>::value_unsafe() const& noexcept {
+  return this->first;
+}
+
+template<typename T>
+simdjson_inline T& implementation_simdjson_result_base<T>::value_unsafe() & noexcept {
+  return this->first;
+}
+
+template<typename T>
+simdjson_inline T&& implementation_simdjson_result_base<T>::value_unsafe() && noexcept {
+  return std::forward<T>(this->first);
+}
+
+template<typename T>
+simdjson_inline implementation_simdjson_result_base<T>::implementation_simdjson_result_base(T &&value, error_code error) noexcept
+    : first{std::forward<T>(value)}, second{error} {}
+template<typename T>
+simdjson_inline implementation_simdjson_result_base<T>::implementation_simdjson_result_base(error_code error) noexcept
+    : implementation_simdjson_result_base(T{}, error) {}
+template<typename T>
+simdjson_inline implementation_simdjson_result_base<T>::implementation_simdjson_result_base(T &&value) noexcept
+    : implementation_simdjson_result_base(std::forward<T>(value), SUCCESS) {}
+
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+/* end file include/simdjson/generic/implementation_simdjson_result_base-inl.h */
+/* begin file include/simdjson/generic/ondemand-inl.h */
+/* begin file include/simdjson/generic/ondemand/json_type-inl.h */
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept {
+    switch (type) {
+        case json_type::array: out << "array"; break;
+        case json_type::object: out << "object"; break;
+        case json_type::number: out << "number"; break;
+        case json_type::string: out << "string"; break;
+        case json_type::boolean: out << "boolean"; break;
+        case json_type::null: out << "null"; break;
+        default: SIMDJSON_UNREACHABLE();
+    }
+    return out;
+}
+
+inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept {
+    switch (type) {
+        case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break;
+        case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break;
+        case number_type::floating_point_number: out << "floating-point number (binary64)"; break;
+        default: SIMDJSON_UNREACHABLE();
+    }
+    return out;
+}
+#if SIMDJSON_EXCEPTIONS
+inline std::ostream& operator<<(std::ostream& out, simdjson_result<json_type> &type) noexcept(false) {
+    return out << type.value();
+}
+#endif
+
+
+
+simdjson_inline number_type number::get_number_type() const noexcept {
+  return type;
+}
+
+simdjson_inline bool number::is_uint64() const noexcept {
+  return get_number_type() == number_type::unsigned_integer;
+}
+
+simdjson_inline uint64_t number::get_uint64() const noexcept {
+  return payload.unsigned_integer;
+}
+
+simdjson_inline number::operator uint64_t() const noexcept {
+  return get_uint64();
+}
+
+
+simdjson_inline bool number::is_int64() const noexcept {
+  return get_number_type() == number_type::signed_integer;
+}
+
+simdjson_inline int64_t number::get_int64() const noexcept {
+  return payload.signed_integer;
+}
+
+simdjson_inline number::operator int64_t() const noexcept {
+  return get_int64();
+}
+
+simdjson_inline bool number::is_double() const noexcept {
+    return get_number_type() == number_type::floating_point_number;
+}
+
+simdjson_inline double number::get_double() const noexcept {
+  return payload.floating_point_number;
+}
+
+simdjson_inline number::operator double() const noexcept {
+  return get_double();
+}
+
+simdjson_inline double number::as_double() const noexcept {
+  if(is_double()) {
+    return payload.floating_point_number;
+  }
+  if(is_int64()) {
+    return double(payload.signed_integer);
+  }
+  return double(payload.unsigned_integer);
+}
+
+simdjson_inline void number::append_s64(int64_t value) noexcept {
+  payload.signed_integer = value;
+  type = number_type::signed_integer;
+}
+
+simdjson_inline void number::append_u64(uint64_t value) noexcept {
+  payload.unsigned_integer = value;
+  type = number_type::unsigned_integer;
+}
+
+simdjson_inline void number::append_double(double value) noexcept {
+  payload.floating_point_number = value;
+  type = number_type::floating_point_number;
+}
+
+simdjson_inline void number::skip_double() noexcept {
+  type = number_type::floating_point_number;
+}
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type>::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type &&value) noexcept
+    : implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type>(std::forward<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type>(value)) {}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type>::simdjson_result(error_code error) noexcept
+    : implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type>(error) {}
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/json_type-inl.h */
+/* begin file include/simdjson/generic/ondemand/logger-inl.h */
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+namespace logger {
+
+static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------";
+static constexpr const int LOG_EVENT_LEN = 20;
+static constexpr const int LOG_BUFFER_LEN = 30;
+static constexpr const int LOG_SMALL_BUFFER_LEN = 10;
+static int log_depth = 0; // Not threadsafe. Log only.
+
+// Helper to turn unprintable or newline characters into spaces
+static inline char printable_char(char c) {
+  if (c >= 0x20) {
+    return c;
+  } else {
+    return ' ';
+  }
+}
+
+inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept {
+  log_line(iter, "", type, detail, delta, depth_delta);
+}
+
+inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept {
+  log_line(iter, index, depth, "", type, detail);
+}
+inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept {
+  log_line(iter, "", type, detail, delta, depth_delta);
+}
+
+inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept {
+  log_line(iter, index, depth, "+", type, detail);
+  if (LOG_ENABLED) { log_depth++; }
+}
+inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept {
+  log_line(iter, "+", type, "", delta, depth_delta);
+  if (LOG_ENABLED) { log_depth++; }
+}
+
+inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept {
+  if (LOG_ENABLED) { log_depth--; }
+  log_line(iter, "-", type, "", delta, depth_delta);
+}
+
+inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept {
+  log_line(iter, "ERROR: ", error, detail, delta, depth_delta);
+}
+inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept {
+  log_line(iter, index, depth, "ERROR: ", error, detail);
+}
+
+inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept {
+  log_event(iter.json_iter(), type, detail, delta, depth_delta);
+}
+
+inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept {
+  log_value(iter.json_iter(), type, detail, delta, depth_delta);
+}
+
+inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept {
+  log_start_value(iter.json_iter(), type, delta, depth_delta);
+}
+
+inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept {
+  log_end_value(iter.json_iter(), type, delta, depth_delta);
+}
+
+inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept {
+  log_error(iter.json_iter(), error, detail, delta, depth_delta);
+}
+
+inline void log_headers() noexcept {
+  if (LOG_ENABLED) {
+    // Technically a static variable is not thread-safe, but if you are using threads
+    // and logging... well...
+    static bool displayed_hint{false};
+    log_depth = 0;
+    printf("\n");
+    if(!displayed_hint) {
+      // We only print this helpful header once.
+      printf("# Logging provides the depth and position of the iterator user-visible steps:\n");
+      printf("# +array says 'this is where we were when we discovered the start array'\n");
+      printf("# -array says 'this is where we were when we ended the array'\n");
+      printf("# skip says 'this is a structural or value I am skipping'\n");
+      printf("# +/-skip says 'this is a start/end array or object I am skipping'\n");
+      printf("#\n");
+      printf("# The indentation of the terms (array, string,...) indicates the depth,\n");
+      printf("# in addition to the depth being displayed.\n");
+      printf("#\n");
+      printf("# Every token in the document has a single depth determined by the tokens before it,\n");
+      printf("# and is not affected by what the token actually is.\n");
+      printf("#\n");
+      printf("# Not all structural elements are presented as tokens in the logs.\n");
+      printf("#\n");
+      printf("# We never give control to the user within an empty array or an empty object.\n");
+      printf("#\n");
+      printf("# Inside an array, having a depth greater than the array's depth means that\n");
+      printf("# we are pointing inside a value.\n");
+      printf("# Having a depth equal to the array means that we are pointing right before a value.\n");
+      printf("# Having a depth smaller than the array means that we have moved beyond the array.\n");
+      displayed_hint = true;
+    }
+    printf("\n");
+    printf("| %-*s ", LOG_EVENT_LEN,        "Event");
+    printf("| %-*s ", LOG_BUFFER_LEN,       "Buffer");
+    printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next");
+    // printf("| %-*s ", 5,                    "Next#");
+    printf("| %-*s ", 5,                    "Depth");
+    printf("| Detail ");
+    printf("|\n");
+
+    printf("|%.*s", LOG_EVENT_LEN+2, DASHES);
+    printf("|%.*s", LOG_BUFFER_LEN+2, DASHES);
+    printf("|%.*s", LOG_SMALL_BUFFER_LEN+2, DASHES);
+    // printf("|%.*s", 5+2, DASHES);
+    printf("|%.*s", 5+2, DASHES);
+    printf("|--------");
+    printf("|\n");
+    fflush(stdout);
+  }
+}
+
+inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept {
+  log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail);
+}
+inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail) noexcept {
+  if (LOG_ENABLED) {
+    const int indent = depth*2;
+    const auto buf = iter.token.buf;
+    printf("| %*s%s%-*s ",
+      indent, "",
+      title_prefix,
+      LOG_EVENT_LEN - indent - int(strlen(title_prefix)), title
+      );
+    {
+      // Print the current structural.
+      printf("| ");
+      auto current_structural = &buf[*index];
+      for (int i=0;i<LOG_BUFFER_LEN;i++) {
+        printf("%c", printable_char(current_structural[i]));
+      }
+      printf(" ");
+    }
+    {
+      // Print the next structural.
+      printf("| ");
+      auto next_structural = &buf[*(index+1)];
+      for (int i=0;i<LOG_SMALL_BUFFER_LEN;i++) {
+        printf("%c", printable_char(next_structural[i]));
+      }
+      printf(" ");
+    }
+    // printf("| %5u ", *(index+1));
+    printf("| %5i ", depth);
+    printf("| %.*s ", int(detail.size()), detail.data());
+    printf("|\n");
+    fflush(stdout);
+  }
+}
+
+} // namespace logger
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/logger-inl.h */
+/* begin file include/simdjson/generic/ondemand/raw_json_string-inl.h */
+namespace simdjson {
+
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {}
+
+simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast<const char *>(buf); }
+
+
+simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept {
+  size_t pos{0};
+  // if the content has no escape character, just scan through it quickly!
+  for(;pos < target.size() && target[pos] != '\\';pos++) {}
+  // slow path may begin.
+  bool escaping{false};
+  for(;pos < target.size();pos++) {
+    if((target[pos] == '"') && !escaping) {
+      return false;
+    } else if(target[pos] == '\\') {
+      escaping = !escaping;
+    } else {
+      escaping = false;
+    }
+  }
+  return true;
+}
+
+simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept {
+  size_t pos{0};
+  // if the content has no escape character, just scan through it quickly!
+  for(;target[pos] && target[pos] != '\\';pos++) {}
+  // slow path may begin.
+  bool escaping{false};
+  for(;target[pos];pos++) {
+    if((target[pos] == '"') && !escaping) {
+      return false;
+    } else if(target[pos] == '\\') {
+      escaping = !escaping;
+    } else {
+      escaping = false;
+    }
+  }
+  return true;
+}
+
+
+simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept {
+  // If we are going to call memcmp, then we must know something about the length of the raw_json_string.
+  return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size());
+}
+
+simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept {
+  // Assumptions: does not contain unescaped quote characters, and
+  // the raw content is quote terminated within a valid JSON string.
+  if(target.size() <= SIMDJSON_PADDING) {
+    return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size());
+  }
+  const char * r{raw()};
+  size_t pos{0};
+  for(;pos < target.size();pos++) {
+    if(r[pos] != target[pos]) { return false; }
+  }
+  if(r[pos] != '"') { return false; }
+  return true;
+}
+
+simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept {
+  const char * r{raw()};
+  size_t pos{0};
+  bool escaping{false};
+  for(;pos < target.size();pos++) {
+    if(r[pos] != target[pos]) { return false; }
+    // if target is a compile-time constant and it is free from
+    // quotes, then the next part could get optimized away through
+    // inlining.
+    if((target[pos] == '"') && !escaping) {
+      // We have reached the end of the raw_json_string but
+      // the target is not done.
+      return false;
+    } else if(target[pos] == '\\') {
+      escaping = !escaping;
+    } else {
+      escaping = false;
+    }
+  }
+  if(r[pos] != '"') { return false; }
+  return true;
+}
+
+
+simdjson_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept {
+  // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and
+  // the raw content is quote terminated within a valid JSON string.
+  const char * r{raw()};
+  size_t pos{0};
+  for(;target[pos];pos++) {
+    if(r[pos] != target[pos]) { return false; }
+  }
+  if(r[pos] != '"') { return false; }
+  return true;
+}
+
+simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept {
+  // Assumptions: does not contain unescaped quote characters, and
+  // the raw content is quote terminated within a valid JSON string.
+  const char * r{raw()};
+  size_t pos{0};
+  bool escaping{false};
+  for(;target[pos];pos++) {
+    if(r[pos] != target[pos]) { return false; }
+    // if target is a compile-time constant and it is free from
+    // quotes, then the next part could get optimized away through
+    // inlining.
+    if((target[pos] == '"') && !escaping) {
+      // We have reached the end of the raw_json_string but
+      // the target is not done.
+      return false;
+    } else if(target[pos] == '\\') {
+      escaping = !escaping;
+    } else {
+      escaping = false;
+    }
+  }
+  if(r[pos] != '"') { return false; }
+  return true;
+}
+
+simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept {
+  return a.unsafe_is_equal(c);
+}
+
+simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept {
+  return a == c;
+}
+
+simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept {
+  return !(a == c);
+}
+
+simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept {
+  return !(a == c);
+}
+
+
+simdjson_inline simdjson_warn_unused simdjson_result<std::string_view> raw_json_string::unescape(json_iterator &iter) const noexcept {
+  return iter.unescape(*this);
+}
+
+
+simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept {
+  bool in_escape = false;
+  const char *s = str.raw();
+  while (true) {
+    switch (*s) {
+      case '\\': in_escape = !in_escape; break;
+      case '"': if (in_escape) { in_escape = false; } else { return out; } break;
+      default: if (in_escape) { in_escape = false; }
+    }
+    out << *s;
+    s++;
+  }
+}
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string>::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string &&value) noexcept
+    : implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string>(std::forward<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string>(value)) {}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string>::simdjson_result(error_code error) noexcept
+    : implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string>(error) {}
+
+simdjson_inline simdjson_result<const char *> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string>::raw() const noexcept {
+  if (error()) { return error(); }
+  return first.raw();
+}
+simdjson_inline simdjson_warn_unused simdjson_result<std::string_view> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string>::unescape(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator &iter) const noexcept {
+  if (error()) { return error(); }
+  return first.unescape(iter);
+}
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/raw_json_string-inl.h */
+/* begin file include/simdjson/generic/ondemand/token_iterator-inl.h */
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+simdjson_inline token_iterator::token_iterator(
+  const uint8_t *_buf,
+  token_position position
+) noexcept : buf{_buf}, _position{position}
+{
+}
+
+simdjson_inline uint32_t token_iterator::current_offset() const noexcept {
+  return *(_position);
+}
+
+
+simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept {
+  return &buf[*(_position++)];
+}
+
+simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept {
+  return &buf[*position];
+}
+simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept {
+  return *position;
+}
+simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept {
+  return *(position+1) - *position;
+}
+
+simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept {
+  return &buf[*(_position+delta)];
+}
+simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept {
+  return *(_position+delta);
+}
+simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept {
+  return *(_position+delta+1) - *(_position+delta);
+}
+
+simdjson_inline token_position token_iterator::position() const noexcept {
+  return _position;
+}
+simdjson_inline void token_iterator::set_position(token_position target_position) noexcept {
+  _position = target_position;
+}
+
+simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept {
+  return _position == other._position;
+}
+simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept {
+  return _position != other._position;
+}
+simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept {
+  return _position > other._position;
+}
+simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept {
+  return _position >= other._position;
+}
+simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept {
+  return _position < other._position;
+}
+simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept {
+  return _position <= other._position;
+}
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::token_iterator>::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::token_iterator &&value) noexcept
+    : implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::token_iterator>(std::forward<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::token_iterator>(value)) {}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::token_iterator>::simdjson_result(error_code error) noexcept
+    : implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::token_iterator>(error) {}
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/token_iterator-inl.h */
+/* begin file include/simdjson/generic/ondemand/json_iterator-inl.h */
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept
+  : token(std::forward<token_iterator>(other.token)),
+    parser{other.parser},
+    _string_buf_loc{other._string_buf_loc},
+    error{other.error},
+    _depth{other._depth},
+    _root{other._root},
+    _streaming{other._streaming}
+{
+  other.parser = nullptr;
+}
+simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept {
+  token = other.token;
+  parser = other.parser;
+  _string_buf_loc = other._string_buf_loc;
+  error = other.error;
+  _depth = other._depth;
+  _root = other._root;
+  _streaming = other._streaming;
+  other.parser = nullptr;
+  return *this;
+}
+
+simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept
+  : token(buf, &_parser->implementation->structural_indexes[0]),
+    parser{_parser},
+    _string_buf_loc{parser->string_buf.get()},
+    _depth{1},
+    _root{parser->implementation->structural_indexes.get()},
+    _streaming{false}
+
+{
+  logger::log_headers();
+#if SIMDJSON_CHECK_EOF
+  assert_more_tokens();
+#endif
+}
+
+inline void json_iterator::rewind() noexcept {
+  token.set_position( root_position() );
+  logger::log_headers(); // We start again
+  _string_buf_loc = parser->string_buf.get();
+  _depth = 1;
+}
+
+inline bool json_iterator::balanced() const noexcept {
+  token_iterator ti(token);
+  int32_t count{0};
+  ti.set_position( root_position() );
+  while(ti.peek() <= peek_last()) {
+    switch (*ti.return_current_and_advance())
+    {
+    case '[': case '{':
+      count++;
+      break;
+    case ']': case '}':
+      count--;
+      break;
+    default:
+      break;
+    }
+  }
+  return count == 0;
+}
+
+
+// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller
+// relating depth and parent_depth, which is a desired effect. The warning does not show up if the
+// skip_child() function is not marked inline).
+SIMDJSON_PUSH_DISABLE_WARNINGS
+SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING
+simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept {
+  if (depth() <= parent_depth) { return SUCCESS; }
+  switch (*return_current_and_advance()) {
+    // TODO consider whether matching braces is a requirement: if non-matching braces indicates
+    // *missing* braces, then future lookups are not in the object/arrays they think they are,
+    // violating the rule "validate enough structure that the user can be confident they are
+    // looking at the right values."
+    // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth
+
+    // For the first open array/object in a value, we've already incremented depth, so keep it the same
+    // We never stop at colon, but if we did, it wouldn't affect depth
+    case '[': case '{': case ':':
+      logger::log_start_value(*this, "skip");
+      break;
+    // If there is a comma, we have just finished a value in an array/object, and need to get back in
+    case ',':
+      logger::log_value(*this, "skip");
+      break;
+    // ] or } means we just finished a value and need to jump out of the array/object
+    case ']': case '}':
+      logger::log_end_value(*this, "skip");
+      _depth--;
+      if (depth() <= parent_depth) { return SUCCESS; }
+#if SIMDJSON_CHECK_EOF
+      // If there are no more tokens, the parent is incomplete.
+      if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); }
+#endif // SIMDJSON_CHECK_EOF
+      break;
+    case '"':
+      if(*peek() == ':') {
+        // We are at a key!!!
+        // This might happen if you just started an object and you skip it immediately.
+        // Performance note: it would be nice to get rid of this check as it is somewhat
+        // expensive.
+        // https://github.com/simdjson/simdjson/issues/1742
+        logger::log_value(*this, "key");
+        return_current_and_advance(); // eat up the ':'
+        break; // important!!!
+      }
+      simdjson_fallthrough;
+    // Anything else must be a scalar value
+    default:
+      // For the first scalar, we will have incremented depth already, so we decrement it here.
+      logger::log_value(*this, "skip");
+      _depth--;
+      if (depth() <= parent_depth) { return SUCCESS; }
+      break;
+  }
+
+  // Now that we've considered the first value, we only increment/decrement for arrays/objects
+  while (position() < end_position()) {
+    switch (*return_current_and_advance()) {
+      case '[': case '{':
+        logger::log_start_value(*this, "skip");
+        _depth++;
+        break;
+      // TODO consider whether matching braces is a requirement: if non-matching braces indicates
+      // *missing* braces, then future lookups are not in the object/arrays they think they are,
+      // violating the rule "validate enough structure that the user can be confident they are
+      // looking at the right values."
+      // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth
+      case ']': case '}':
+        logger::log_end_value(*this, "skip");
+        _depth--;
+        if (depth() <= parent_depth) { return SUCCESS; }
+        break;
+      default:
+        logger::log_value(*this, "skip", "");
+        break;
+    }
+  }
+
+  return report_error(TAPE_ERROR, "not enough close braces");
+}
+
+SIMDJSON_POP_DISABLE_WARNINGS
+
+simdjson_inline bool json_iterator::at_root() const noexcept {
+  return position() == root_position();
+}
+
+simdjson_inline bool json_iterator::is_single_token() const noexcept {
+  return parser->implementation->n_structural_indexes == 1;
+}
+
+simdjson_inline bool json_iterator::streaming() const noexcept {
+  return _streaming;
+}
+
+simdjson_inline token_position json_iterator::root_position() const noexcept {
+  return _root;
+}
+
+simdjson_inline void json_iterator::assert_at_document_depth() const noexcept {
+  SIMDJSON_ASSUME( _depth == 1 );
+}
+
+simdjson_inline void json_iterator::assert_at_root() const noexcept {
+  SIMDJSON_ASSUME( _depth == 1 );
+#ifndef SIMDJSON_CLANG_VISUAL_STUDIO
+  // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument
+  // has side effects that will be discarded.
+  SIMDJSON_ASSUME( token.position() == _root );
+#endif
+}
+
+simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept {
+  assert_valid_position(token._position + required_tokens - 1);
+}
+
+simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept {
+#ifndef SIMDJSON_CLANG_VISUAL_STUDIO
+  SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] );
+  SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] );
+#endif
+}
+
+simdjson_inline bool json_iterator::at_end() const noexcept {
+  return position() == end_position();
+}
+simdjson_inline token_position json_iterator::end_position() const noexcept {
+  uint32_t n_structural_indexes{parser->implementation->n_structural_indexes};
+  return &parser->implementation->structural_indexes[n_structural_indexes];
+}
+
+inline std::string json_iterator::to_string() const noexcept {
+  if( !is_alive() ) { return "dead json_iterator instance"; }
+  const char * current_structural = reinterpret_cast<const char *>(token.peek());
+  return std::string("json_iterator [ depth : ") + std::to_string(_depth)
+          + std::string(", structural : '") + std::string(current_structural,1)
+          + std::string("', offset : ") + std::to_string(token.current_offset())
+          + std::string("', error : ") + error_message(error)
+          + std::string(" ]");
+}
+
+inline simdjson_result<const char *> json_iterator::current_location() noexcept {
+  if (!is_alive()) {    // Unrecoverable error
+    if (!at_root()) {
+      return reinterpret_cast<const char *>(token.peek(-1));
+    } else {
+      return reinterpret_cast<const char *>(token.peek());
+    }
+  }
+  if (at_end()) {
+    return OUT_OF_BOUNDS;
+  }
+  return reinterpret_cast<const char *>(token.peek());
+}
+
+simdjson_inline bool json_iterator::is_alive() const noexcept {
+  return parser;
+}
+
+simdjson_inline void json_iterator::abandon() noexcept {
+  parser = nullptr;
+  _depth = 0;
+}
+
+simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept {
+#if SIMDJSON_CHECK_EOF
+  assert_more_tokens();
+#endif // SIMDJSON_CHECK_EOF
+  return token.return_current_and_advance();
+}
+
+simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept {
+  // deliberately done without safety guard:
+  return token.peek(0);
+}
+
+simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept {
+#if SIMDJSON_CHECK_EOF
+  assert_more_tokens(delta+1);
+#endif // SIMDJSON_CHECK_EOF
+  return token.peek(delta);
+}
+
+simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept {
+#if SIMDJSON_CHECK_EOF
+  assert_more_tokens(delta+1);
+#endif // #if SIMDJSON_CHECK_EOF
+  return token.peek_length(delta);
+}
+
+simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept {
+  // todo: currently we require end-of-string buffering, but the following
+  // assert_valid_position should be turned on if/when we lift that condition.
+  // assert_valid_position(position);
+  // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF
+  // is ON by default, we have no choice but to disable it for real with a comment.
+  return token.peek(position);
+}
+
+simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept {
+#if SIMDJSON_CHECK_EOF
+  assert_valid_position(position);
+#endif // SIMDJSON_CHECK_EOF
+  return token.peek_length(position);
+}
+
+simdjson_inline token_position json_iterator::last_position() const noexcept {
+  // The following line fails under some compilers...
+  // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0);
+  // since it has side-effects.
+  uint32_t n_structural_indexes{parser->implementation->n_structural_indexes};
+  SIMDJSON_ASSUME(n_structural_indexes > 0);
+  return &parser->implementation->structural_indexes[n_structural_indexes - 1];
+}
+simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept {
+  return token.peek(last_position());
+}
+
+simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept {
+  SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1);
+  SIMDJSON_ASSUME(_depth == parent_depth + 1);
+  _depth = parent_depth;
+}
+
+simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept {
+  SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX);
+  SIMDJSON_ASSUME(_depth == child_depth - 1);
+  _depth = child_depth;
+}
+
+simdjson_inline depth_t json_iterator::depth() const noexcept {
+  return _depth;
+}
+
+simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept {
+  return _string_buf_loc;
+}
+
+simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept {
+  SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD);
+  logger::log_error(*this, message);
+  error = _error;
+  return error;
+}
+
+simdjson_inline token_position json_iterator::position() const noexcept {
+  return token.position();
+}
+
+simdjson_inline simdjson_result<std::string_view> json_iterator::unescape(raw_json_string in) noexcept {
+  return parser->unescape(in, _string_buf_loc);
+}
+
+simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept {
+  SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX);
+  SIMDJSON_ASSUME(_depth == child_depth - 1);
+#if SIMDJSON_DEVELOPMENT_CHECKS
+#ifndef SIMDJSON_CLANG_VISUAL_STUDIO
+  SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth());
+  SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]);
+#endif
+#endif
+  token.set_position(position);
+  _depth = child_depth;
+}
+
+#if SIMDJSON_DEVELOPMENT_CHECKS
+
+simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept {
+  SIMDJSON_ASSUME(size_t(depth) < parser->max_depth());
+  return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0;
+}
+
+simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept {
+  SIMDJSON_ASSUME(size_t(depth) < parser->max_depth());
+  if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; }
+}
+
+#endif
+
+
+simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept {
+  SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD);
+  logger::log_error(*this, message);
+  return _error;
+}
+
+template<int N>
+simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t (&tmpbuf)[N]) noexcept {
+  // Let us guard against silly cases:
+  if((N < max_len) || (N == 0)) { return false; }
+  // Truncate whitespace to fit the buffer.
+  if (max_len > N-1) {
+    // if (jsoncharutils::is_not_structural_or_whitespace(json[N-1])) { return false; }
+    max_len = N-1;
+  }
+
+  // Copy to the buffer.
+  std::memcpy(tmpbuf, json, max_len);
+  tmpbuf[max_len] = ' ';
+  return true;
+}
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator>::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator &&value) noexcept
+    : implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator>(std::forward<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator>(value)) {}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator>::simdjson_result(error_code error) noexcept
+    : implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator>(error) {}
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/json_iterator-inl.h */
+/* begin file include/simdjson/generic/ondemand/value_iterator-inl.h */
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+simdjson_inline value_iterator::value_iterator(
+  json_iterator *json_iter,
+  depth_t depth,
+  token_position start_position
+) noexcept : _json_iter{json_iter}, _depth{depth}, _start_position{start_position}
+{
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::start_object() noexcept {
+  SIMDJSON_TRY( start_container('{', "Not an object", "object") );
+  return started_object();
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::start_root_object() noexcept {
+  SIMDJSON_TRY( start_container('{', "Not an object", "object") );
+  return started_root_object();
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::started_object() noexcept {
+  assert_at_container_start();
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  _json_iter->set_start_position(_depth, start_position());
+#endif
+  if (*_json_iter->peek() == '}') {
+    logger::log_value(*_json_iter, "empty object");
+    _json_iter->return_current_and_advance();
+    end_container();
+    return false;
+  }
+  return true;
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::started_root_object() noexcept {
+  // When in streaming mode, we cannot expect peek_last() to be the last structural element of the
+  // current document. It only works in the normal mode where we have indexed a single document.
+  // Note that adding a check for 'streaming' is not expensive since we only have at most
+  // one root element.
+  if ( ! _json_iter->streaming() ) {
+    if (*_json_iter->peek_last() != '}') {
+      _json_iter->abandon();
+      return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end");
+    }
+    // If the last character is } *and* the first gibberish character is also '}'
+    // then on-demand could accidentally go over. So we need additional checks.
+    // https://github.com/simdjson/simdjson/issues/1834
+    // Checking that the document is balanced requires a full scan which is potentially
+    // expensive, but it only happens in edge cases where the first padding character is
+    // a closing bracket.
+    if ((*_json_iter->peek(_json_iter->end_position()) == '}') && (!_json_iter->balanced())) {
+      _json_iter->abandon();
+      // The exact error would require more work. It will typically be an unclosed object.
+      return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced");
+    }
+  }
+  return started_object();
+}
+
+simdjson_warn_unused simdjson_inline error_code value_iterator::end_container() noexcept {
+#if SIMDJSON_CHECK_EOF
+    if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); }
+    // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); }
+#endif // SIMDJSON_CHECK_EOF
+    _json_iter->ascend_to(depth()-1);
+    return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::has_next_field() noexcept {
+  assert_at_next();
+
+  // It's illegal to call this unless there are more tokens: anything that ends in } or ] is
+  // obligated to verify there are more tokens if they are not the top level.
+  switch (*_json_iter->return_current_and_advance()) {
+    case '}':
+      logger::log_end_value(*_json_iter, "object");
+      SIMDJSON_TRY( end_container() );
+      return false;
+    case ',':
+      return true;
+    default:
+      return report_error(TAPE_ERROR, "Missing comma between object fields");
+  }
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::find_field_raw(const std::string_view key) noexcept {
+  error_code error;
+  bool has_value;
+  //
+  // Initially, the object can be in one of a few different places:
+  //
+  // 1. The start of the object, at the first field:
+  //
+  //    ```
+  //    { "a": [ 1, 2 ], "b": [ 3, 4 ] }
+  //      ^ (depth 2, index 1)
+  //    ```
+  if (at_first_field()) {
+    has_value = true;
+
+  //
+  // 2. When a previous search did not yield a value or the object is empty:
+  //
+  //    ```
+  //    { "a": [ 1, 2 ], "b": [ 3, 4 ] }
+  //                                     ^ (depth 0)
+  //    { }
+  //        ^ (depth 0, index 2)
+  //    ```
+  //
+  } else if (!is_open()) {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+    // If we're past the end of the object, we're being iterated out of order.
+    // Note: this isn't perfect detection. It's possible the user is inside some other object; if so,
+    // this object iterator will blithely scan that object for fields.
+    if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; }
+#endif
+    return false;
+
+  // 3. When a previous search found a field or an iterator yielded a value:
+  //
+  //    ```
+  //    // When a field was not fully consumed (or not even touched at all)
+  //    { "a": [ 1, 2 ], "b": [ 3, 4 ] }
+  //           ^ (depth 2)
+  //    // When a field was fully consumed
+  //    { "a": [ 1, 2 ], "b": [ 3, 4 ] }
+  //                   ^ (depth 1)
+  //    // When the last field was fully consumed
+  //    { "a": [ 1, 2 ], "b": [ 3, 4 ] }
+  //                                   ^ (depth 1)
+  //    ```
+  //
+  } else {
+    if ((error = skip_child() )) { abandon(); return error; }
+    if ((error = has_next_field().get(has_value) )) { abandon(); return error; }
+#if SIMDJSON_DEVELOPMENT_CHECKS
+    if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; }
+#endif
+  }
+  while (has_value) {
+    // Get the key and colon, stopping at the value.
+    raw_json_string actual_key;
+    // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes
+    // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2.
+    // field_key() advances the pointer and checks that '"' is found (corresponding to a key).
+    // The depth is left unchanged by field_key().
+    if ((error = field_key().get(actual_key) )) { abandon(); return error; };
+    // field_value() will advance and check that we find a ':' separating the
+    // key and the value. It will also increment the depth by one.
+    if ((error = field_value() )) { abandon(); return error; }
+    // If it matches, stop and return
+    // We could do it this way if we wanted to allow arbitrary
+    // key content (including escaped quotes).
+    //if (actual_key.unsafe_is_equal(max_key_length, key)) {
+    // Instead we do the following which may trigger buffer overruns if the
+    // user provides an adversarial key (containing a well placed unescaped quote
+    // character and being longer than the number of bytes remaining in the JSON
+    // input).
+    if (actual_key.unsafe_is_equal(key)) {
+      logger::log_event(*this, "match", key, -2);
+      // If we return here, then we return while pointing at the ':' that we just checked.
+      return true;
+    }
+
+    // No match: skip the value and see if , or } is next
+    logger::log_event(*this, "no match", key, -2);
+    // The call to skip_child is meant to skip over the value corresponding to the key.
+    // After skip_child(), we are right before the next comma (',') or the final brace ('}').
+    SIMDJSON_TRY( skip_child() ); // Skip the value entirely
+    // The has_next_field() advances the pointer and check that either ',' or '}' is found.
+    // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found,
+    // then we are in error and we abort.
+    if ((error = has_next_field().get(has_value) )) { abandon(); return error; }
+  }
+
+  // If the loop ended, we're out of fields to look at.
+  return false;
+}
+
+SIMDJSON_PUSH_DISABLE_WARNINGS
+SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING
+simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::find_field_unordered_raw(const std::string_view key) noexcept {
+  /**
+   * When find_field_unordered_raw is called, we can either be pointing at the
+   * first key, pointing outside (at the closing brace) or if a key was matched
+   * we can be either pointing right afterthe ':' right before the value (that we need skip),
+   * or we may have consumed the value and we might be at a comma or at the
+   * final brace (ready for a call to has_next_field()).
+   */
+  error_code error;
+  bool has_value;
+
+  // First, we scan from that point to the end.
+  // If we don't find a match, we may loop back around, and scan from the beginning to that point.
+  token_position search_start = _json_iter->position();
+
+  // We want to know whether we need to go back to the beginning.
+  bool at_first = at_first_field();
+  ///////////////
+  // Initially, the object can be in one of a few different places:
+  //
+  // 1. At the first key:
+  //
+  //    ```
+  //    { "a": [ 1, 2 ], "b": [ 3, 4 ] }
+  //      ^ (depth 2, index 1)
+  //    ```
+  //
+  if (at_first) {
+    has_value = true;
+
+  // 2. When a previous search did not yield a value or the object is empty:
+  //
+  //    ```
+  //    { "a": [ 1, 2 ], "b": [ 3, 4 ] }
+  //                                     ^ (depth 0)
+  //    { }
+  //        ^ (depth 0, index 2)
+  //    ```
+  //
+  } else if (!is_open()) {
+
+#if SIMDJSON_DEVELOPMENT_CHECKS
+    // If we're past the end of the object, we're being iterated out of order.
+    // Note: this isn't perfect detection. It's possible the user is inside some other object; if so,
+    // this object iterator will blithely scan that object for fields.
+    if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; }
+#endif
+    SIMDJSON_TRY(reset_object().get(has_value));
+    at_first = true;
+  // 3. When a previous search found a field or an iterator yielded a value:
+  //
+  //    ```
+  //    // When a field was not fully consumed (or not even touched at all)
+  //    { "a": [ 1, 2 ], "b": [ 3, 4 ] }
+  //           ^ (depth 2)
+  //    // When a field was fully consumed
+  //    { "a": [ 1, 2 ], "b": [ 3, 4 ] }
+  //                   ^ (depth 1)
+  //    // When the last field was fully consumed
+  //    { "a": [ 1, 2 ], "b": [ 3, 4 ] }
+  //                                   ^ (depth 1)
+  //    ```
+  //
+  } else {
+    // If someone queried a key but they not did access the value, then we are left pointing
+    // at the ':' and we need to move forward through the value... If the value was
+    // processed then skip_child() does not move the iterator (but may adjust the depth).
+    if ((error = skip_child() )) { abandon(); return error; }
+    search_start = _json_iter->position();
+    if ((error = has_next_field().get(has_value) )) { abandon(); return error; }
+#if SIMDJSON_DEVELOPMENT_CHECKS
+    if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; }
+#endif
+  }
+
+  // After initial processing, we will be in one of two states:
+  //
+  // ```
+  // // At the beginning of a field
+  // { "a": [ 1, 2 ], "b": [ 3, 4 ] }
+  //   ^ (depth 1)
+  // { "a": [ 1, 2 ], "b": [ 3, 4 ] }
+  //                  ^ (depth 1)
+  // // At the end of the object
+  // { "a": [ 1, 2 ], "b": [ 3, 4 ] }
+  //                                  ^ (depth 0)
+  // ```
+  //
+  // Next, we find a match starting from the current position.
+  while (has_value) {
+    SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field
+
+    // Get the key and colon, stopping at the value.
+    raw_json_string actual_key;
+    // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes
+    // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2.
+    // field_key() advances the pointer and checks that '"' is found (corresponding to a key).
+    // The depth is left unchanged by field_key().
+    if ((error = field_key().get(actual_key) )) { abandon(); return error; };
+    // field_value() will advance and check that we find a ':' separating the
+    // key and the value. It will also increment the depth by one.
+    if ((error = field_value() )) { abandon(); return error; }
+
+    // If it matches, stop and return
+    // We could do it this way if we wanted to allow arbitrary
+    // key content (including escaped quotes).
+    // if (actual_key.unsafe_is_equal(max_key_length, key)) {
+    // Instead we do the following which may trigger buffer overruns if the
+    // user provides an adversarial key (containing a well placed unescaped quote
+    // character and being longer than the number of bytes remaining in the JSON
+    // input).
+    if (actual_key.unsafe_is_equal(key)) {
+      logger::log_event(*this, "match", key, -2);
+      // If we return here, then we return while pointing at the ':' that we just checked.
+      return true;
+    }
+
+    // No match: skip the value and see if , or } is next
+    logger::log_event(*this, "no match", key, -2);
+    // The call to skip_child is meant to skip over the value corresponding to the key.
+    // After skip_child(), we are right before the next comma (',') or the final brace ('}').
+    SIMDJSON_TRY( skip_child() );
+    // The has_next_field() advances the pointer and check that either ',' or '}' is found.
+    // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found,
+    // then we are in error and we abort.
+    if ((error = has_next_field().get(has_value) )) { abandon(); return error; }
+  }
+  // Performance note: it maybe wasteful to rewind to the beginning when there might be
+  // no other query following. Indeed, it would require reskipping the whole object.
+  // Instead, you can just stay where you are. If there is a new query, there is always time
+  // to rewind.
+  if(at_first) { return false; }
+
+  // If we reach the end without finding a match, search the rest of the fields starting at the
+  // beginning of the object.
+  // (We have already run through the object before, so we've already validated its structure. We
+  // don't check errors in this bit.)
+  SIMDJSON_TRY(reset_object().get(has_value));
+  while (true) {
+    SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object
+    SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field
+
+    // Get the key and colon, stopping at the value.
+    raw_json_string actual_key;
+    // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes
+    // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2.
+    // field_key() advances the pointer and checks that '"' is found (corresponding to a key).
+    // The depth is left unchanged by field_key().
+    error = field_key().get(actual_key); SIMDJSON_ASSUME(!error);
+    // field_value() will advance and check that we find a ':' separating the
+    // key and the value.  It will also increment the depth by one.
+    error = field_value(); SIMDJSON_ASSUME(!error);
+
+    // If it matches, stop and return
+    // We could do it this way if we wanted to allow arbitrary
+    // key content (including escaped quotes).
+    // if (actual_key.unsafe_is_equal(max_key_length, key)) {
+    // Instead we do the following which may trigger buffer overruns if the
+    // user provides an adversarial key (containing a well placed unescaped quote
+    // character and being longer than the number of bytes remaining in the JSON
+    // input).
+    if (actual_key.unsafe_is_equal(key)) {
+      logger::log_event(*this, "match", key, -2);
+      // If we return here, then we return while pointing at the ':' that we just checked.
+      return true;
+    }
+
+    // No match: skip the value and see if , or } is next
+    logger::log_event(*this, "no match", key, -2);
+    // The call to skip_child is meant to skip over the value corresponding to the key.
+    // After skip_child(), we are right before the next comma (',') or the final brace ('}').
+    SIMDJSON_TRY( skip_child() );
+    // If we reached the end of the key-value pair we started from, then we know
+    // that the key is not there so we return false. We are either right before
+    // the next comma or the final brace.
+    if(_json_iter->position() == search_start) { return false; }
+    // The has_next_field() advances the pointer and check that either ',' or '}' is found.
+    // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found,
+    // then we are in error and we abort.
+    error = has_next_field().get(has_value); SIMDJSON_ASSUME(!error);
+    // If we make the mistake of exiting here, then we could be left pointing at a key
+    // in the middle of an object. That's not an allowable state.
+  }
+  // If the loop ended, we're out of fields to look at. The program should
+  // never reach this point.
+  return false;
+}
+SIMDJSON_POP_DISABLE_WARNINGS
+
+simdjson_warn_unused simdjson_inline simdjson_result<raw_json_string> value_iterator::field_key() noexcept {
+  assert_at_next();
+
+  const uint8_t *key = _json_iter->return_current_and_advance();
+  if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); }
+  return raw_json_string(key);
+}
+
+simdjson_warn_unused simdjson_inline error_code value_iterator::field_value() noexcept {
+  assert_at_next();
+
+  if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); }
+  _json_iter->descend_to(depth()+1);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::start_array() noexcept {
+  SIMDJSON_TRY( start_container('[', "Not an array", "array") );
+  return started_array();
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::start_root_array() noexcept {
+  SIMDJSON_TRY( start_container('[', "Not an array", "array") );
+  return started_root_array();
+}
+
+inline std::string value_iterator::to_string() const noexcept {
+  auto answer = std::string("value_iterator [ depth : ") + std::to_string(_depth) + std::string(", ");
+  if(_json_iter != nullptr) { answer +=  _json_iter->to_string(); }
+  answer += std::string(" ]");
+  return answer;
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::started_array() noexcept {
+  assert_at_container_start();
+  if (*_json_iter->peek() == ']') {
+    logger::log_value(*_json_iter, "empty array");
+    _json_iter->return_current_and_advance();
+    SIMDJSON_TRY( end_container() );
+    return false;
+  }
+  _json_iter->descend_to(depth()+1);
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  _json_iter->set_start_position(_depth, start_position());
+#endif
+  return true;
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::started_root_array() noexcept {
+  // When in streaming mode, we cannot expect peek_last() to be the last structural element of the
+  // current document. It only works in the normal mode where we have indexed a single document.
+  // Note that adding a check for 'streaming' is not expensive since we only have at most
+  // one root element.
+  if ( ! _json_iter->streaming() ) {
+    if (*_json_iter->peek_last() != ']') {
+      _json_iter->abandon();
+      return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end");
+    }
+    // If the last character is ] *and* the first gibberish character is also ']'
+    // then on-demand could accidentally go over. So we need additional checks.
+    // https://github.com/simdjson/simdjson/issues/1834
+    // Checking that the document is balanced requires a full scan which is potentially
+    // expensive, but it only happens in edge cases where the first padding character is
+    // a closing bracket.
+    if ((*_json_iter->peek(_json_iter->end_position()) == ']') && (!_json_iter->balanced())) {
+      _json_iter->abandon();
+      // The exact error would require more work. It will typically be an unclosed array.
+      return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced");
+    }
+  }
+  return started_array();
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::has_next_element() noexcept {
+  assert_at_next();
+
+  logger::log_event(*this, "has_next_element");
+  switch (*_json_iter->return_current_and_advance()) {
+    case ']':
+      logger::log_end_value(*_json_iter, "array");
+      SIMDJSON_TRY( end_container() );
+      return false;
+    case ',':
+      _json_iter->descend_to(depth()+1);
+      return true;
+    default:
+      return report_error(TAPE_ERROR, "Missing comma between array elements");
+  }
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::parse_bool(const uint8_t *json) const noexcept {
+  auto not_true = atomparsing::str4ncmp(json, "true");
+  auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e');
+  bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]);
+  if (error) { return incorrect_type_error("Not a boolean"); }
+  return simdjson_result<bool>(!not_true);
+}
+simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::parse_null(const uint8_t *json) const noexcept {
+  bool is_null_string = !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]);
+  // if we start with 'n', we must be a null
+  if(!is_null_string && json[0]=='n') { return incorrect_type_error("Not a null but starts with n"); }
+  return is_null_string;
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<std::string_view> value_iterator::get_string() noexcept {
+  return get_raw_json_string().unescape(json_iter());
+}
+simdjson_warn_unused simdjson_inline simdjson_result<raw_json_string> value_iterator::get_raw_json_string() noexcept {
+  auto json = peek_scalar("string");
+  if (*json != '"') { return incorrect_type_error("Not a string"); }
+  advance_scalar("string");
+  return raw_json_string(json+1);
+}
+simdjson_warn_unused simdjson_inline simdjson_result<uint64_t> value_iterator::get_uint64() noexcept {
+  auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64"));
+  if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); }
+  return result;
+}
+simdjson_warn_unused simdjson_inline simdjson_result<uint64_t> value_iterator::get_uint64_in_string() noexcept {
+  auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64"));
+  if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); }
+  return result;
+}
+simdjson_warn_unused simdjson_inline simdjson_result<int64_t> value_iterator::get_int64() noexcept {
+  auto result = numberparsing::parse_integer(peek_non_root_scalar("int64"));
+  if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); }
+  return result;
+}
+simdjson_warn_unused simdjson_inline simdjson_result<int64_t> value_iterator::get_int64_in_string() noexcept {
+  auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64"));
+  if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); }
+  return result;
+}
+simdjson_warn_unused simdjson_inline simdjson_result<double> value_iterator::get_double() noexcept {
+  auto result = numberparsing::parse_double(peek_non_root_scalar("double"));
+  if(result.error() == SUCCESS) { advance_non_root_scalar("double"); }
+  return result;
+}
+simdjson_warn_unused simdjson_inline simdjson_result<double> value_iterator::get_double_in_string() noexcept {
+  auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double"));
+  if(result.error() == SUCCESS) { advance_non_root_scalar("double"); }
+  return result;
+}
+simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::get_bool() noexcept {
+  auto result = parse_bool(peek_non_root_scalar("bool"));
+  if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); }
+  return result;
+}
+simdjson_inline simdjson_result<bool> value_iterator::is_null() noexcept {
+  bool is_null_value;
+  SIMDJSON_TRY(parse_null(peek_non_root_scalar("null")).get(is_null_value));
+  if(is_null_value) { advance_non_root_scalar("null"); }
+  return is_null_value;
+}
+simdjson_inline bool value_iterator::is_negative() noexcept {
+  return numberparsing::is_negative(peek_non_root_scalar("numbersign"));
+}
+simdjson_inline bool value_iterator::is_root_negative() noexcept {
+  return numberparsing::is_negative(peek_root_scalar("numbersign"));
+}
+simdjson_inline simdjson_result<bool> value_iterator::is_integer() noexcept {
+  return numberparsing::is_integer(peek_non_root_scalar("integer"));
+}
+simdjson_inline simdjson_result<number_type> value_iterator::get_number_type() noexcept {
+  return numberparsing::get_number_type(peek_non_root_scalar("integer"));
+}
+simdjson_inline simdjson_result<number> value_iterator::get_number() noexcept {
+  number num;
+  error_code error =  numberparsing::parse_number(peek_non_root_scalar("number"), num);
+  if(error) { return error; }
+  return num;
+}
+
+simdjson_inline simdjson_result<bool> value_iterator::is_root_integer() noexcept {
+  auto max_len = peek_start_length();
+  auto json = peek_root_scalar("is_root_integer");
+  uint8_t tmpbuf[20+1]; // <20 digits> is the longest possible unsigned integer
+  if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) {
+    return false; // if there are more than 20 characters, it cannot be represented as an integer.
+  }
+  auto answer = numberparsing::is_integer(tmpbuf);
+  // If the parsing was a success, we must still check that it is
+  // a single scalar. Note that we parse first because of cases like '[]' where
+  // getting TRAILING_CONTENT is wrong.
+  if((answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; }
+  return answer;
+}
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::number_type> value_iterator::get_root_number_type() noexcept {
+  if (!_json_iter->is_single_token()) { return TRAILING_CONTENT; }
+  auto max_len = peek_start_length();
+  auto json = peek_root_scalar("number");
+  // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/,
+  // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest
+  // number: -0.<fraction>e-308.
+  uint8_t tmpbuf[1074+8+1];
+  if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) {
+    logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters");
+    return NUMBER_ERROR;
+  }
+  // If the parsing was a success, we must still check that it is
+  // a single scalar. Note that we parse first because of cases like '[]' where
+  // getting TRAILING_CONTENT is wrong.
+  auto answer = numberparsing::get_number_type(tmpbuf);
+  if((answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; }
+  return answer;
+}
+simdjson_inline simdjson_result<number> value_iterator::get_root_number() noexcept {
+  auto max_len = peek_start_length();
+  auto json = peek_root_scalar("number");
+  // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/,
+  // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest
+  // number: -0.<fraction>e-308.
+  uint8_t tmpbuf[1074+8+1];
+  if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) {
+    logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters");
+    return NUMBER_ERROR;
+  }
+  number num;
+  error_code error =  numberparsing::parse_number(tmpbuf, num);
+  if(error) { return error; }
+  if (!_json_iter->is_single_token()) { return TRAILING_CONTENT; }
+  advance_root_scalar("number");
+  return num;
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<std::string_view> value_iterator::get_root_string() noexcept {
+  return get_string();
+}
+simdjson_warn_unused simdjson_inline simdjson_result<raw_json_string> value_iterator::get_root_raw_json_string() noexcept {
+  return get_raw_json_string();
+}
+simdjson_warn_unused simdjson_inline simdjson_result<uint64_t> value_iterator::get_root_uint64() noexcept {
+  auto max_len = peek_start_length();
+  auto json = peek_root_scalar("uint64");
+  uint8_t tmpbuf[20+1]; // <20 digits> is the longest possible unsigned integer
+  if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) {
+    logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters");
+    return NUMBER_ERROR;
+  }
+  auto result = numberparsing::parse_unsigned(tmpbuf);
+  if(result.error() == SUCCESS) {
+    if (!_json_iter->is_single_token()) { return TRAILING_CONTENT; }
+    advance_root_scalar("uint64");
+  }
+  return result;
+}
+simdjson_warn_unused simdjson_inline simdjson_result<uint64_t> value_iterator::get_root_uint64_in_string() noexcept {
+  auto max_len = peek_start_length();
+  auto json = peek_root_scalar("uint64");
+  uint8_t tmpbuf[20+1]; // <20 digits> is the longest possible unsigned integer
+  if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) {
+    logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters");
+    return NUMBER_ERROR;
+  }
+  auto result = numberparsing::parse_unsigned_in_string(tmpbuf);
+  if(result.error() == SUCCESS) {
+    if (!_json_iter->is_single_token()) { return TRAILING_CONTENT; }
+    advance_root_scalar("uint64");
+  }
+  return result;
+}
+simdjson_warn_unused simdjson_inline simdjson_result<int64_t> value_iterator::get_root_int64() noexcept {
+  auto max_len = peek_start_length();
+  auto json = peek_root_scalar("int64");
+  uint8_t tmpbuf[20+1]; // -<19 digits> is the longest possible integer
+  if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) {
+    logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters");
+    return NUMBER_ERROR;
+  }
+
+  auto result = numberparsing::parse_integer(tmpbuf);
+  if(result.error() == SUCCESS) {
+    if (!_json_iter->is_single_token()) { return TRAILING_CONTENT; }
+    advance_root_scalar("int64");
+  }
+  return result;
+}
+simdjson_warn_unused simdjson_inline simdjson_result<int64_t> value_iterator::get_root_int64_in_string() noexcept {
+  auto max_len = peek_start_length();
+  auto json = peek_root_scalar("int64");
+  uint8_t tmpbuf[20+1]; // -<19 digits> is the longest possible integer
+  if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) {
+    logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters");
+    return NUMBER_ERROR;
+  }
+
+  auto result = numberparsing::parse_integer_in_string(tmpbuf);
+  if(result.error() == SUCCESS) {
+    if (!_json_iter->is_single_token()) { return TRAILING_CONTENT; }
+    advance_root_scalar("int64");
+  }
+  return result;
+}
+simdjson_warn_unused simdjson_inline simdjson_result<double> value_iterator::get_root_double() noexcept {
+  auto max_len = peek_start_length();
+  auto json = peek_root_scalar("double");
+  // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/,
+  // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest
+  // number: -0.<fraction>e-308.
+  uint8_t tmpbuf[1074+8+1];
+  if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) {
+    logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters");
+    return NUMBER_ERROR;
+  }
+  auto result = numberparsing::parse_double(tmpbuf);
+  if(result.error() == SUCCESS) {
+    if (!_json_iter->is_single_token()) { return TRAILING_CONTENT; }
+    advance_root_scalar("double");
+  }
+  return result;
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<double> value_iterator::get_root_double_in_string() noexcept {
+  auto max_len = peek_start_length();
+  auto json = peek_root_scalar("double");
+  // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/,
+  // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest
+  // number: -0.<fraction>e-308.
+  uint8_t tmpbuf[1074+8+1];
+  if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) {
+    logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters");
+    return NUMBER_ERROR;
+  }
+  auto result = numberparsing::parse_double_in_string(tmpbuf);
+  if(result.error() == SUCCESS) {
+    if (!_json_iter->is_single_token()) { return TRAILING_CONTENT; }
+    advance_root_scalar("double");
+  }
+  return result;
+}
+simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::get_root_bool() noexcept {
+  auto max_len = peek_start_length();
+  auto json = peek_root_scalar("bool");
+  uint8_t tmpbuf[5+1];
+  if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { return incorrect_type_error("Not a boolean"); }
+  auto result = parse_bool(tmpbuf);
+  if(result.error() == SUCCESS) {
+    if (!_json_iter->is_single_token()) { return TRAILING_CONTENT; }
+    advance_root_scalar("bool");
+  }
+  return result;
+}
+simdjson_inline bool value_iterator::is_root_null() noexcept {
+  // If there is trailing content, then the document is not null.
+  if (!_json_iter->is_single_token()) { return false; }
+  auto max_len = peek_start_length();
+  auto json = peek_root_scalar("null");
+  bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") &&
+         (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[5])));
+  if(result) { advance_root_scalar("null"); }
+  return result;
+}
+
+simdjson_warn_unused simdjson_inline error_code value_iterator::skip_child() noexcept {
+  SIMDJSON_ASSUME( _json_iter->token._position > _start_position );
+  SIMDJSON_ASSUME( _json_iter->_depth >= _depth );
+
+  return _json_iter->skip_child(depth());
+}
+
+simdjson_inline value_iterator value_iterator::child() const noexcept {
+  assert_at_child();
+  return { _json_iter, depth()+1, _json_iter->token.position() };
+}
+
+// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller
+// relating depth and iterator depth, which is a desired effect. It does not happen if is_open is
+// marked non-inline.
+SIMDJSON_PUSH_DISABLE_WARNINGS
+SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING
+simdjson_inline bool value_iterator::is_open() const noexcept {
+  return _json_iter->depth() >= depth();
+}
+SIMDJSON_POP_DISABLE_WARNINGS
+
+simdjson_inline bool value_iterator::at_end() const noexcept {
+  return _json_iter->at_end();
+}
+
+simdjson_inline bool value_iterator::at_start() const noexcept {
+  return _json_iter->token.position() == start_position();
+}
+
+simdjson_inline bool value_iterator::at_first_field() const noexcept {
+  SIMDJSON_ASSUME( _json_iter->token._position > _start_position );
+  return _json_iter->token.position() == start_position() + 1;
+}
+
+simdjson_inline void value_iterator::abandon() noexcept {
+  _json_iter->abandon();
+}
+
+simdjson_warn_unused simdjson_inline depth_t value_iterator::depth() const noexcept {
+  return _depth;
+}
+simdjson_warn_unused simdjson_inline error_code value_iterator::error() const noexcept {
+  return _json_iter->error;
+}
+simdjson_warn_unused simdjson_inline uint8_t *&value_iterator::string_buf_loc() noexcept {
+  return _json_iter->string_buf_loc();
+}
+simdjson_warn_unused simdjson_inline const json_iterator &value_iterator::json_iter() const noexcept {
+  return *_json_iter;
+}
+simdjson_warn_unused simdjson_inline json_iterator &value_iterator::json_iter() noexcept {
+  return *_json_iter;
+}
+
+simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept {
+  return _json_iter->peek(start_position());
+}
+simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept {
+  return _json_iter->peek_length(start_position());
+}
+
+simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept {
+  logger::log_value(*_json_iter, start_position(), depth(), type);
+  // If we're not at the position anymore, we don't want to advance the cursor.
+  if (!is_at_start()) { return peek_start(); }
+
+  // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value.
+  assert_at_start();
+  return _json_iter->peek();
+}
+
+simdjson_inline void value_iterator::advance_scalar(const char *type) noexcept {
+  logger::log_value(*_json_iter, start_position(), depth(), type);
+  // If we're not at the position anymore, we don't want to advance the cursor.
+  if (!is_at_start()) { return; }
+
+  // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value.
+  assert_at_start();
+  _json_iter->return_current_and_advance();
+  _json_iter->ascend_to(depth()-1);
+}
+
+simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept {
+  logger::log_start_value(*_json_iter, start_position(), depth(), type);
+  // If we're not at the position anymore, we don't want to advance the cursor.
+  const uint8_t *json;
+  if (!is_at_start()) {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+    if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; }
+#endif
+    json = peek_start();
+    if (*json != start_char) { return incorrect_type_error(incorrect_type_message); }
+  } else {
+    assert_at_start();
+    /**
+     * We should be prudent. Let us peek. If it is not the right type, we
+     * return an error. Only once we have determined that we have the right
+     * type are we allowed to advance!
+     */
+    json = _json_iter->peek();
+    if (*json != start_char) { return incorrect_type_error(incorrect_type_message); }
+    _json_iter->return_current_and_advance();
+  }
+
+
+  return SUCCESS;
+}
+
+
+simdjson_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept {
+  logger::log_value(*_json_iter, start_position(), depth(), type);
+  if (!is_at_start()) { return peek_start(); }
+
+  assert_at_root();
+  return _json_iter->peek();
+}
+simdjson_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept {
+  logger::log_value(*_json_iter, start_position(), depth(), type);
+  if (!is_at_start()) { return peek_start(); }
+
+  assert_at_non_root_start();
+  return _json_iter->peek();
+}
+
+simdjson_inline void value_iterator::advance_root_scalar(const char *type) noexcept {
+  logger::log_value(*_json_iter, start_position(), depth(), type);
+  if (!is_at_start()) { return; }
+
+  assert_at_root();
+  _json_iter->return_current_and_advance();
+  _json_iter->ascend_to(depth()-1);
+}
+simdjson_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept {
+  logger::log_value(*_json_iter, start_position(), depth(), type);
+  if (!is_at_start()) { return; }
+
+  assert_at_non_root_start();
+  _json_iter->return_current_and_advance();
+  _json_iter->ascend_to(depth()-1);
+}
+
+simdjson_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept {
+  logger::log_error(*_json_iter, start_position(), depth(), message);
+  return INCORRECT_TYPE;
+}
+
+simdjson_inline bool value_iterator::is_at_start() const noexcept {
+  return position() == start_position();
+}
+
+simdjson_inline bool value_iterator::is_at_key() const noexcept {
+  // Keys are at the same depth as the object.
+  // Note here that we could be safer and check that we are within an object,
+  // but we do not.
+  return _depth == _json_iter->_depth && *_json_iter->peek() == '"';
+}
+
+simdjson_inline bool value_iterator::is_at_iterator_start() const noexcept {
+  // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]).
+  auto delta = position() - start_position();
+  return delta == 1 || delta == 2;
+}
+
+inline void value_iterator::assert_at_start() const noexcept {
+  SIMDJSON_ASSUME( _json_iter->token._position == _start_position );
+  SIMDJSON_ASSUME( _json_iter->_depth == _depth );
+  SIMDJSON_ASSUME( _depth > 0 );
+}
+
+inline void value_iterator::assert_at_container_start() const noexcept {
+  SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 );
+  SIMDJSON_ASSUME( _json_iter->_depth == _depth );
+  SIMDJSON_ASSUME( _depth > 0 );
+}
+
+inline void value_iterator::assert_at_next() const noexcept {
+  SIMDJSON_ASSUME( _json_iter->token._position > _start_position );
+  SIMDJSON_ASSUME( _json_iter->_depth == _depth );
+  SIMDJSON_ASSUME( _depth > 0 );
+}
+
+simdjson_inline void value_iterator::move_at_start() noexcept {
+  _json_iter->_depth = _depth;
+  _json_iter->token.set_position(_start_position);
+}
+
+simdjson_inline void value_iterator::move_at_container_start() noexcept {
+  _json_iter->_depth = _depth;
+  _json_iter->token.set_position(_start_position + 1);
+}
+
+simdjson_inline simdjson_result<bool> value_iterator::reset_array() noexcept {
+  move_at_container_start();
+  return started_array();
+}
+
+simdjson_inline simdjson_result<bool> value_iterator::reset_object() noexcept {
+  move_at_container_start();
+  return started_object();
+}
+
+inline void value_iterator::assert_at_child() const noexcept {
+  SIMDJSON_ASSUME( _json_iter->token._position > _start_position );
+  SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 );
+  SIMDJSON_ASSUME( _depth > 0 );
+}
+
+inline void value_iterator::assert_at_root() const noexcept {
+  assert_at_start();
+  SIMDJSON_ASSUME( _depth == 1 );
+}
+
+inline void value_iterator::assert_at_non_root_start() const noexcept {
+  assert_at_start();
+  SIMDJSON_ASSUME( _depth > 1 );
+}
+
+inline void value_iterator::assert_is_valid() const noexcept {
+  SIMDJSON_ASSUME( _json_iter != nullptr );
+}
+
+simdjson_inline bool value_iterator::is_valid() const noexcept {
+  return _json_iter != nullptr;
+}
+
+simdjson_inline simdjson_result<json_type> value_iterator::type() const noexcept {
+  switch (*peek_start()) {
+    case '{':
+      return json_type::object;
+    case '[':
+      return json_type::array;
+    case '"':
+      return json_type::string;
+    case 'n':
+      return json_type::null;
+    case 't': case 'f':
+      return json_type::boolean;
+    case '-':
+    case '0': case '1': case '2': case '3': case '4':
+    case '5': case '6': case '7': case '8': case '9':
+      return json_type::number;
+    default:
+      return TAPE_ERROR;
+  }
+}
+
+simdjson_inline token_position value_iterator::start_position() const noexcept {
+  return _start_position;
+}
+
+simdjson_inline token_position value_iterator::position() const noexcept {
+  return _json_iter->position();
+}
+
+simdjson_inline token_position value_iterator::end_position() const noexcept {
+  return _json_iter->end_position();
+}
+
+simdjson_inline token_position value_iterator::last_position() const noexcept {
+  return _json_iter->last_position();
+}
+
+simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept {
+  return _json_iter->report_error(error, message);
+}
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value_iterator>::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value_iterator &&value) noexcept
+    : implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value_iterator>(std::forward<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value_iterator>(value)) {}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value_iterator>::simdjson_result(error_code error) noexcept
+    : implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value_iterator>(error) {}
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/value_iterator-inl.h */
+/* begin file include/simdjson/generic/ondemand/array_iterator-inl.h */
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+simdjson_inline array_iterator::array_iterator(const value_iterator &_iter) noexcept
+  : iter{_iter}
+{}
+
+simdjson_inline simdjson_result<value> array_iterator::operator*() noexcept {
+  if (iter.error()) { iter.abandon(); return iter.error(); }
+  return value(iter.child());
+}
+simdjson_inline bool array_iterator::operator==(const array_iterator &other) const noexcept {
+  return !(*this != other);
+}
+simdjson_inline bool array_iterator::operator!=(const array_iterator &) const noexcept {
+  return iter.is_open();
+}
+simdjson_inline array_iterator &array_iterator::operator++() noexcept {
+  error_code error;
+  // PERF NOTE this is a safety rail ... users should exit loops as soon as they receive an error, so we'll never get here.
+  // However, it does not seem to make a perf difference, so we add it out of an abundance of caution.
+  if (( error = iter.error() )) { return *this; }
+  if (( error = iter.skip_child() )) { return *this; }
+  if (( error = iter.has_next_element().error() )) { return *this; }
+  return *this;
+}
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator>::simdjson_result(
+  SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator &&value
+) noexcept
+  : SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator>(std::forward<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator>(value))
+{
+  first.iter.assert_is_valid();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator>::simdjson_result(error_code error) noexcept
+  : SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator>({}, error)
+{
+}
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator>::operator*() noexcept {
+  if (error()) { return error(); }
+  return *first;
+}
+simdjson_inline bool simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator>::operator==(const simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> &other) const noexcept {
+  if (!first.iter.is_valid()) { return !error(); }
+  return first == other.first;
+}
+simdjson_inline bool simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator>::operator!=(const simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> &other) const noexcept {
+  if (!first.iter.is_valid()) { return error(); }
+  return first != other.first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> &simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator>::operator++() noexcept {
+  // Clear the error if there is one, so we don't yield it twice
+  if (error()) { second = SUCCESS; return *this; }
+  ++(first);
+  return *this;
+}
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/array_iterator-inl.h */
+/* begin file include/simdjson/generic/ondemand/object_iterator-inl.h */
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+//
+// object_iterator
+//
+
+simdjson_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept
+  : iter{_iter}
+{}
+
+simdjson_inline simdjson_result<field> object_iterator::operator*() noexcept {
+  error_code error = iter.error();
+  if (error) { iter.abandon(); return error; }
+  auto result = field::start(iter);
+  // TODO this is a safety rail ... users should exit loops as soon as they receive an error.
+  // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free.
+  if (result.error()) { iter.abandon(); }
+  return result;
+}
+simdjson_inline bool object_iterator::operator==(const object_iterator &other) const noexcept {
+  return !(*this != other);
+}
+simdjson_inline bool object_iterator::operator!=(const object_iterator &) const noexcept {
+  return iter.is_open();
+}
+
+SIMDJSON_PUSH_DISABLE_WARNINGS
+SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING
+simdjson_inline object_iterator &object_iterator::operator++() noexcept {
+  // TODO this is a safety rail ... users should exit loops as soon as they receive an error.
+  // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free.
+  if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error
+
+  simdjson_unused error_code error;
+  if ((error = iter.skip_child() )) { return *this; }
+
+  simdjson_unused bool has_value;
+  if ((error = iter.has_next_field().get(has_value) )) { return *this; };
+  return *this;
+}
+SIMDJSON_POP_DISABLE_WARNINGS
+
+//
+// ### Live States
+//
+// While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is
+// always SUCCESS:
+//
+// - Start: This is the state when the object is first found and the iterator is just past the {.
+//   In this state, at_start == true.
+// - Next: After we hand a scalar value to the user, or an array/object which they then fully
+//   iterate over, the iterator is at the , or } before the next value. In this state,
+//   depth == iter.depth, at_start == false, and error == SUCCESS.
+// - Unfinished Business: When we hand an array/object to the user which they do not fully
+//   iterate over, we need to finish that iteration by skipping child values until we reach the
+//   Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS.
+//
+// ## Error States
+//
+// In error states, we will yield exactly one more value before stopping. iter.depth == depth
+// and at_start is always false. We decrement after yielding the error, moving to the Finished
+// state.
+//
+// - Chained Error: When the object iterator is part of an error chain--for example, in
+//   `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an
+//   object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and
+//   iter.depth == depth, and at_start == false. We decrement depth when we yield the error.
+// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields,
+//   we flag that as an error and treat it exactly the same as a Chained Error. In this state,
+//   error == TAPE_ERROR, iter.depth == depth, and at_start == false.
+//
+// Errors that occur while reading a field to give to the user (such as when the key is not a
+// string or the field is missing a colon) are yielded immediately. Depth is then decremented,
+// moving to the Finished state without transitioning through an Error state at all.
+//
+// ## Terminal State
+//
+// The terminal state has iter.depth < depth. at_start is always false.
+//
+// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth.
+//   In this state, iter.depth < depth, at_start == false, and error == SUCCESS.
+//
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator>::simdjson_result(
+  SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator &&value
+) noexcept
+  : implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator>(std::forward<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator>(value))
+{
+  first.iter.assert_is_valid();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator>::simdjson_result(error_code error) noexcept
+  : implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator>({}, error)
+{
+}
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::field> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator>::operator*() noexcept {
+  if (error()) { return error(); }
+  return *first;
+}
+// If we're iterating and there is an error, return the error once.
+simdjson_inline bool simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator>::operator==(const simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator> &other) const noexcept {
+  if (!first.iter.is_valid()) { return !error(); }
+  return first == other.first;
+}
+// If we're iterating and there is an error, return the error once.
+simdjson_inline bool simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator>::operator!=(const simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator> &other) const noexcept {
+  if (!first.iter.is_valid()) { return error(); }
+  return first != other.first;
+}
+// Checks for ']' and ','
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator> &simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator>::operator++() noexcept {
+  // Clear the error if there is one, so we don't yield it twice
+  if (error()) { second = SUCCESS; return *this; }
+  ++first;
+  return *this;
+}
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/object_iterator-inl.h */
+/* begin file include/simdjson/generic/ondemand/array-inl.h */
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+//
+// ### Live States
+//
+// While iterating or looking up values, depth >= iter->depth. at_start may vary. Error is
+// always SUCCESS:
+//
+// - Start: This is the state when the array is first found and the iterator is just past the `{`.
+//   In this state, at_start == true.
+// - Next: After we hand a scalar value to the user, or an array/object which they then fully
+//   iterate over, the iterator is at the `,` before the next value (or `]`). In this state,
+//   depth == iter->depth, at_start == false, and error == SUCCESS.
+// - Unfinished Business: When we hand an array/object to the user which they do not fully
+//   iterate over, we need to finish that iteration by skipping child values until we reach the
+//   Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS.
+//
+// ## Error States
+//
+// In error states, we will yield exactly one more value before stopping. iter->depth == depth
+// and at_start is always false. We decrement after yielding the error, moving to the Finished
+// state.
+//
+// - Chained Error: When the array iterator is part of an error chain--for example, in
+//   `for (auto tweet : doc["tweets"])`, where the tweet element may be missing or not be an
+//   array--we yield that error in the loop, exactly once. In this state, error != SUCCESS and
+//   iter->depth == depth, and at_start == false. We decrement depth when we yield the error.
+// - Missing Comma Error: When the iterator ++ method discovers there is no comma between elements,
+//   we flag that as an error and treat it exactly the same as a Chained Error. In this state,
+//   error == TAPE_ERROR, iter->depth == depth, and at_start == false.
+//
+// ## Terminal State
+//
+// The terminal state has iter->depth < depth. at_start is always false.
+//
+// - Finished: When we have reached a `]` or have reported an error, we are finished. We signal this
+//   by decrementing depth. In this state, iter->depth < depth, at_start == false, and
+//   error == SUCCESS.
+//
+
+simdjson_inline array::array(const value_iterator &_iter) noexcept
+  : iter{_iter}
+{
+}
+
+simdjson_inline simdjson_result<array> array::start(value_iterator &iter) noexcept {
+  // We don't need to know if the array is empty to start iteration, but we do want to know if there
+  // is an error--thus `simdjson_unused`.
+  simdjson_unused bool has_value;
+  SIMDJSON_TRY( iter.start_array().get(has_value) );
+  return array(iter);
+}
+simdjson_inline simdjson_result<array> array::start_root(value_iterator &iter) noexcept {
+  simdjson_unused bool has_value;
+  SIMDJSON_TRY( iter.start_root_array().get(has_value) );
+  return array(iter);
+}
+simdjson_inline simdjson_result<array> array::started(value_iterator &iter) noexcept {
+  bool has_value;
+  SIMDJSON_TRY(iter.started_array().get(has_value));
+  return array(iter);
+}
+
+simdjson_inline simdjson_result<array_iterator> array::begin() noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; }
+#endif
+  return array_iterator(iter);
+}
+simdjson_inline simdjson_result<array_iterator> array::end() noexcept {
+  return array_iterator(iter);
+}
+simdjson_inline error_code array::consume() noexcept {
+  auto error = iter.json_iter().skip_child(iter.depth()-1);
+  if(error) { iter.abandon(); }
+  return error;
+}
+
+simdjson_inline simdjson_result<std::string_view> array::raw_json() noexcept {
+  const uint8_t * starting_point{iter.peek_start()};
+  auto error = consume();
+  if(error) { return error; }
+  // After 'consume()', we could be left pointing just beyond the document, but that
+  // is ok because we are not going to dereference the final pointer position, we just
+  // use it to compute the length in bytes.
+  const uint8_t * final_point{iter._json_iter->unsafe_pointer()};
+  return std::string_view(reinterpret_cast<const char*>(starting_point), size_t(final_point - starting_point));
+}
+
+SIMDJSON_PUSH_DISABLE_WARNINGS
+SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING
+simdjson_inline simdjson_result<size_t> array::count_elements() & noexcept {
+  size_t count{0};
+  // Important: we do not consume any of the values.
+  for(simdjson_unused auto v : *this) { count++; }
+  // The above loop will always succeed, but we want to report errors.
+  if(iter.error()) { return iter.error(); }
+  // We need to move back at the start because we expect users to iterate through
+  // the array after counting the number of elements.
+  iter.reset_array();
+  return count;
+}
+SIMDJSON_POP_DISABLE_WARNINGS
+
+simdjson_inline simdjson_result<bool> array::is_empty() & noexcept {
+  bool is_not_empty;
+  auto error = iter.reset_array().get(is_not_empty);
+  if(error) { return error; }
+  return !is_not_empty;
+}
+
+inline simdjson_result<bool> array::reset() & noexcept {
+  return iter.reset_array();
+}
+
+inline simdjson_result<value> array::at_pointer(std::string_view json_pointer) noexcept {
+  if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; }
+  json_pointer = json_pointer.substr(1);
+  // - means "the append position" or "the element after the end of the array"
+  // We don't support this, because we're returning a real element, not a position.
+  if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; }
+
+  // Read the array index
+  size_t array_index = 0;
+  size_t i;
+  for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) {
+    uint8_t digit = uint8_t(json_pointer[i] - '0');
+    // Check for non-digit in array index. If it's there, we're trying to get a field in an object
+    if (digit > 9) { return INCORRECT_TYPE; }
+    array_index = array_index*10 + digit;
+  }
+
+  // 0 followed by other digits is invalid
+  if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0"
+
+  // Empty string is invalid; so is a "/" with no digits before it
+  if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index"
+  // Get the child
+  auto child = at(array_index);
+  // If there is an error, it ends here
+  if(child.error()) {
+    return child;
+  }
+
+  // If there is a /, we're not done yet, call recursively.
+  if (i < json_pointer.length()) {
+    child = child.at_pointer(json_pointer.substr(i));
+  }
+  return child;
+}
+
+simdjson_inline simdjson_result<value> array::at(size_t index) noexcept {
+  size_t i = 0;
+  for (auto value : *this) {
+    if (i == index) { return value; }
+    i++;
+  }
+  return INDEX_OUT_OF_BOUNDS;
+}
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array>::simdjson_result(
+  SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array &&value
+) noexcept
+  : implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array>(
+      std::forward<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array>(value)
+    )
+{
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array>::simdjson_result(
+  error_code error
+) noexcept
+  : implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array>(error)
+{
+}
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array>::begin() noexcept {
+  if (error()) { return error(); }
+  return first.begin();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array>::end() noexcept {
+  if (error()) { return error(); }
+  return first.end();
+}
+simdjson_inline  simdjson_result<size_t> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array>::count_elements() & noexcept {
+  if (error()) { return error(); }
+  return first.count_elements();
+}
+simdjson_inline  simdjson_result<bool> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array>::is_empty() & noexcept {
+  if (error()) { return error(); }
+  return first.is_empty();
+}
+simdjson_inline  simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array>::at(size_t index) noexcept {
+  if (error()) { return error(); }
+  return first.at(index);
+}
+simdjson_inline  simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array>::at_pointer(std::string_view json_pointer) noexcept {
+  if (error()) { return error(); }
+  return first.at_pointer(json_pointer);
+}
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/array-inl.h */
+/* begin file include/simdjson/generic/ondemand/document-inl.h */
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+simdjson_inline document::document(ondemand::json_iterator &&_iter) noexcept
+  : iter{std::forward<json_iterator>(_iter)}
+{
+  logger::log_start_value(iter, "document");
+}
+
+simdjson_inline document document::start(json_iterator &&iter) noexcept {
+  return document(std::forward<json_iterator>(iter));
+}
+
+inline void document::rewind() noexcept {
+  iter.rewind();
+}
+
+inline std::string document::to_debug_string() noexcept {
+  return iter.to_string();
+}
+
+inline simdjson_result<const char *> document::current_location() noexcept {
+  return iter.current_location();
+}
+
+inline int32_t document::current_depth() const noexcept {
+  return iter.depth();
+}
+
+inline bool document::is_alive() noexcept {
+  return iter.is_alive();
+}
+simdjson_inline value_iterator document::resume_value_iterator() noexcept {
+  return value_iterator(&iter, 1, iter.root_position());
+}
+simdjson_inline value_iterator document::get_root_value_iterator() noexcept {
+  return resume_value_iterator();
+}
+simdjson_inline simdjson_result<object> document::start_or_resume_object() noexcept {
+  if (iter.at_root()) {
+    return get_object();
+  } else {
+    return object::resume(resume_value_iterator());
+  }
+}
+simdjson_inline simdjson_result<value> document::get_value() noexcept {
+  // Make sure we start any arrays or objects before returning, so that start_root_<object/array>()
+  // gets called.
+  iter.assert_at_document_depth();
+  switch (*iter.peek()) {
+    case '[':
+    case '{':
+      return value(get_root_value_iterator());
+    default:
+      // Unfortunately, scalar documents are a special case in simdjson and they cannot
+      // be safely converted to value instances.
+      return SCALAR_DOCUMENT_AS_VALUE;
+      // return value(get_root_value_iterator());
+  }
+}
+simdjson_inline simdjson_result<array> document::get_array() & noexcept {
+  auto value = get_root_value_iterator();
+  return array::start_root(value);
+}
+simdjson_inline simdjson_result<object> document::get_object() & noexcept {
+  auto value = get_root_value_iterator();
+  return object::start_root(value);
+}
+simdjson_inline simdjson_result<uint64_t> document::get_uint64() noexcept {
+  return get_root_value_iterator().get_root_uint64();
+}
+simdjson_inline simdjson_result<uint64_t> document::get_uint64_in_string() noexcept {
+  return get_root_value_iterator().get_root_uint64_in_string();
+}
+simdjson_inline simdjson_result<int64_t> document::get_int64() noexcept {
+  return get_root_value_iterator().get_root_int64();
+}
+simdjson_inline simdjson_result<int64_t> document::get_int64_in_string() noexcept {
+  return get_root_value_iterator().get_root_int64_in_string();
+}
+simdjson_inline simdjson_result<double> document::get_double() noexcept {
+  return get_root_value_iterator().get_root_double();
+}
+simdjson_inline simdjson_result<double> document::get_double_in_string() noexcept {
+  return get_root_value_iterator().get_root_double_in_string();
+}
+simdjson_inline simdjson_result<std::string_view> document::get_string() noexcept {
+  return get_root_value_iterator().get_root_string();
+}
+simdjson_inline simdjson_result<raw_json_string> document::get_raw_json_string() noexcept {
+  return get_root_value_iterator().get_root_raw_json_string();
+}
+simdjson_inline simdjson_result<bool> document::get_bool() noexcept {
+  return get_root_value_iterator().get_root_bool();
+}
+simdjson_inline simdjson_result<bool> document::is_null() noexcept {
+  return get_root_value_iterator().is_root_null();
+}
+
+template<> simdjson_inline simdjson_result<array> document::get() & noexcept { return get_array(); }
+template<> simdjson_inline simdjson_result<object> document::get() & noexcept { return get_object(); }
+template<> simdjson_inline simdjson_result<raw_json_string> document::get() & noexcept { return get_raw_json_string(); }
+template<> simdjson_inline simdjson_result<std::string_view> document::get() & noexcept { return get_string(); }
+template<> simdjson_inline simdjson_result<double> document::get() & noexcept { return get_double(); }
+template<> simdjson_inline simdjson_result<uint64_t> document::get() & noexcept { return get_uint64(); }
+template<> simdjson_inline simdjson_result<int64_t> document::get() & noexcept { return get_int64(); }
+template<> simdjson_inline simdjson_result<bool> document::get() & noexcept { return get_bool(); }
+template<> simdjson_inline simdjson_result<value> document::get() & noexcept { return get_value(); }
+
+template<> simdjson_inline simdjson_result<raw_json_string> document::get() && noexcept { return get_raw_json_string(); }
+template<> simdjson_inline simdjson_result<std::string_view> document::get() && noexcept { return get_string(); }
+template<> simdjson_inline simdjson_result<double> document::get() && noexcept { return std::forward<document>(*this).get_double(); }
+template<> simdjson_inline simdjson_result<uint64_t> document::get() && noexcept { return std::forward<document>(*this).get_uint64(); }
+template<> simdjson_inline simdjson_result<int64_t> document::get() && noexcept { return std::forward<document>(*this).get_int64(); }
+template<> simdjson_inline simdjson_result<bool> document::get() && noexcept { return std::forward<document>(*this).get_bool(); }
+template<> simdjson_inline simdjson_result<value> document::get() && noexcept { return get_value(); }
+
+template<typename T> simdjson_inline error_code document::get(T &out) & noexcept {
+  return get<T>().get(out);
+}
+template<typename T> simdjson_inline error_code document::get(T &out) && noexcept {
+  return std::forward<document>(*this).get<T>().get(out);
+}
+
+#if SIMDJSON_EXCEPTIONS
+simdjson_inline document::operator array() & noexcept(false) { return get_array(); }
+simdjson_inline document::operator object() & noexcept(false) { return get_object(); }
+simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); }
+simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); }
+simdjson_inline document::operator double() noexcept(false) { return get_double(); }
+simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(); }
+simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); }
+simdjson_inline document::operator bool() noexcept(false) { return get_bool(); }
+simdjson_inline document::operator value() noexcept(false) { return get_value(); }
+
+#endif
+simdjson_inline simdjson_result<size_t> document::count_elements() & noexcept {
+  auto a = get_array();
+  simdjson_result<size_t> answer = a.count_elements();
+  /* If there was an array, we are now left pointing at its first element. */
+  if(answer.error() == SUCCESS) { rewind(); }
+  return answer;
+}
+simdjson_inline simdjson_result<size_t> document::count_fields() & noexcept {
+  auto a = get_object();
+  simdjson_result<size_t> answer = a.count_fields();
+  /* If there was an object, we are now left pointing at its first element. */
+  if(answer.error() == SUCCESS) { rewind(); }
+  return answer;
+}
+simdjson_inline simdjson_result<value> document::at(size_t index) & noexcept {
+  auto a = get_array();
+  return a.at(index);
+}
+simdjson_inline simdjson_result<array_iterator> document::begin() & noexcept {
+  return get_array().begin();
+}
+simdjson_inline simdjson_result<array_iterator> document::end() & noexcept {
+  return {};
+}
+
+simdjson_inline simdjson_result<value> document::find_field(std::string_view key) & noexcept {
+  return start_or_resume_object().find_field(key);
+}
+simdjson_inline simdjson_result<value> document::find_field(const char *key) & noexcept {
+  return start_or_resume_object().find_field(key);
+}
+simdjson_inline simdjson_result<value> document::find_field_unordered(std::string_view key) & noexcept {
+  return start_or_resume_object().find_field_unordered(key);
+}
+simdjson_inline simdjson_result<value> document::find_field_unordered(const char *key) & noexcept {
+  return start_or_resume_object().find_field_unordered(key);
+}
+simdjson_inline simdjson_result<value> document::operator[](std::string_view key) & noexcept {
+  return start_or_resume_object()[key];
+}
+simdjson_inline simdjson_result<value> document::operator[](const char *key) & noexcept {
+  return start_or_resume_object()[key];
+}
+
+simdjson_inline error_code document::consume() noexcept {
+  auto error = iter.skip_child(0);
+  if(error) { iter.abandon(); }
+  return error;
+}
+
+simdjson_inline simdjson_result<std::string_view> document::raw_json() noexcept {
+  auto _iter = get_root_value_iterator();
+  const uint8_t * starting_point{_iter.peek_start()};
+  auto error = consume();
+  if(error) { return error; }
+  // After 'consume()', we could be left pointing just beyond the document, but that
+  // is ok because we are not going to dereference the final pointer position, we just
+  // use it to compute the length in bytes.
+  const uint8_t * final_point{iter.unsafe_pointer()};
+  return std::string_view(reinterpret_cast<const char*>(starting_point), size_t(final_point - starting_point));
+}
+
+simdjson_inline simdjson_result<json_type> document::type() noexcept {
+  return get_root_value_iterator().type();
+}
+
+simdjson_inline simdjson_result<bool> document::is_scalar() noexcept {
+  json_type this_type;
+  auto error = type().get(this_type);
+  if(error) { return error; }
+  return ! ((this_type == json_type::array) || (this_type == json_type::object));
+}
+
+simdjson_inline bool document::is_negative() noexcept {
+  return get_root_value_iterator().is_root_negative();
+}
+
+simdjson_inline simdjson_result<bool> document::is_integer() noexcept {
+  return get_root_value_iterator().is_root_integer();
+}
+
+simdjson_inline simdjson_result<number_type> document::get_number_type() noexcept {
+  return get_root_value_iterator().get_root_number_type();
+}
+
+simdjson_inline simdjson_result<number> document::get_number() noexcept {
+  return get_root_value_iterator().get_root_number();
+}
+
+
+simdjson_inline simdjson_result<std::string_view> document::raw_json_token() noexcept {
+  auto _iter = get_root_value_iterator();
+  return std::string_view(reinterpret_cast<const char*>(_iter.peek_start()), _iter.peek_start_length());
+}
+
+simdjson_inline simdjson_result<value> document::at_pointer(std::string_view json_pointer) noexcept {
+  rewind(); // Rewind the document each time at_pointer is called
+  if (json_pointer.empty()) {
+    return this->get_value();
+  }
+  json_type t;
+  SIMDJSON_TRY(type().get(t));
+  switch (t)
+  {
+    case json_type::array:
+      return (*this).get_array().at_pointer(json_pointer);
+    case json_type::object:
+      return (*this).get_object().at_pointer(json_pointer);
+    default:
+      return INVALID_JSON_POINTER;
+  }
+}
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::simdjson_result(
+  SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document &&value
+) noexcept :
+    implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>(
+      std::forward<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>(value)
+    )
+{
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::simdjson_result(
+  error_code error
+) noexcept :
+    implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>(
+      error
+    )
+{
+}
+simdjson_inline simdjson_result<size_t> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::count_elements() & noexcept {
+  if (error()) { return error(); }
+  return first.count_elements();
+}
+simdjson_inline simdjson_result<size_t> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::count_fields() & noexcept {
+  if (error()) { return error(); }
+  return first.count_fields();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::at(size_t index) & noexcept {
+  if (error()) { return error(); }
+  return first.at(index);
+}
+simdjson_inline error_code simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::rewind() noexcept {
+  if (error()) { return error(); }
+  first.rewind();
+  return SUCCESS;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::begin() & noexcept {
+  if (error()) { return error(); }
+  return first.begin();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::end() & noexcept {
+  return {};
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::find_field_unordered(std::string_view key) & noexcept {
+  if (error()) { return error(); }
+  return first.find_field_unordered(key);
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::find_field_unordered(const char *key) & noexcept {
+  if (error()) { return error(); }
+  return first.find_field_unordered(key);
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::operator[](std::string_view key) & noexcept {
+  if (error()) { return error(); }
+  return first[key];
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::operator[](const char *key) & noexcept {
+  if (error()) { return error(); }
+  return first[key];
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::find_field(std::string_view key) & noexcept {
+  if (error()) { return error(); }
+  return first.find_field(key);
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::find_field(const char *key) & noexcept {
+  if (error()) { return error(); }
+  return first.find_field(key);
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::get_array() & noexcept {
+  if (error()) { return error(); }
+  return first.get_array();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::get_object() & noexcept {
+  if (error()) { return error(); }
+  return first.get_object();
+}
+simdjson_inline simdjson_result<uint64_t> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::get_uint64() noexcept {
+  if (error()) { return error(); }
+  return first.get_uint64();
+}
+simdjson_inline simdjson_result<int64_t> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::get_int64() noexcept {
+  if (error()) { return error(); }
+  return first.get_int64();
+}
+simdjson_inline simdjson_result<double> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::get_double() noexcept {
+  if (error()) { return error(); }
+  return first.get_double();
+}
+simdjson_inline simdjson_result<std::string_view> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::get_string() noexcept {
+  if (error()) { return error(); }
+  return first.get_string();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::get_raw_json_string() noexcept {
+  if (error()) { return error(); }
+  return first.get_raw_json_string();
+}
+simdjson_inline simdjson_result<bool> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::get_bool() noexcept {
+  if (error()) { return error(); }
+  return first.get_bool();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::get_value() noexcept {
+  if (error()) { return error(); }
+  return first.get_value();
+}
+simdjson_inline simdjson_result<bool> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::is_null() noexcept {
+  if (error()) { return error(); }
+  return first.is_null();
+}
+
+template<typename T>
+simdjson_inline simdjson_result<T> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::get() & noexcept {
+  if (error()) { return error(); }
+  return first.get<T>();
+}
+template<typename T>
+simdjson_inline simdjson_result<T> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::get() && noexcept {
+  if (error()) { return error(); }
+  return std::forward<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>(first).get<T>();
+}
+template<typename T>
+simdjson_inline error_code simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::get(T &out) & noexcept {
+  if (error()) { return error(); }
+  return first.get<T>(out);
+}
+template<typename T>
+simdjson_inline error_code simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::get(T &out) && noexcept {
+  if (error()) { return error(); }
+  return std::forward<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>(first).get<T>(out);
+}
+
+template<> simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::get<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>() & noexcept = delete;
+template<> simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::get<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>() && noexcept {
+  if (error()) { return error(); }
+  return std::forward<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>(first);
+}
+template<> simdjson_inline error_code simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::get<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document &out) & noexcept = delete;
+template<> simdjson_inline error_code simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::get<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document &out) && noexcept {
+  if (error()) { return error(); }
+  out = std::forward<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>(first);
+  return SUCCESS;
+}
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::type() noexcept {
+  if (error()) { return error(); }
+  return first.type();
+}
+
+simdjson_inline simdjson_result<bool> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::is_scalar() noexcept {
+  if (error()) { return error(); }
+  return first.is_scalar();
+}
+
+
+simdjson_inline bool simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::is_negative() noexcept {
+  if (error()) { return error(); }
+  return first.is_negative();
+}
+
+simdjson_inline simdjson_result<bool> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::is_integer() noexcept {
+  if (error()) { return error(); }
+  return first.is_integer();
+}
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::number_type> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::get_number_type() noexcept {
+  if (error()) { return error(); }
+  return first.get_number_type();
+}
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::number> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::get_number() noexcept {
+  if (error()) { return error(); }
+  return first.get_number();
+}
+
+
+#if SIMDJSON_EXCEPTIONS
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() & noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() & noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::operator uint64_t() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::operator int64_t() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::operator double() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::operator std::string_view() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::operator bool() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+#endif
+
+
+simdjson_inline simdjson_result<const char *> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::current_location() noexcept {
+  if (error()) { return error(); }
+  return first.current_location();
+}
+
+simdjson_inline int32_t simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::current_depth() const noexcept {
+  if (error()) { return error(); }
+  return first.current_depth();
+}
+
+simdjson_inline simdjson_result<std::string_view> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::raw_json_token() noexcept {
+  if (error()) { return error(); }
+  return first.raw_json_token();
+}
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::at_pointer(std::string_view json_pointer) noexcept {
+  if (error()) { return error(); }
+  return first.at_pointer(json_pointer);
+}
+
+
+} // namespace simdjson
+
+
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {}
+simdjson_inline document_reference::document_reference(document &d) noexcept : doc(&d) {}
+simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); }
+simdjson_inline simdjson_result<array> document_reference::get_array() & noexcept { return doc->get_array(); }
+simdjson_inline simdjson_result<object> document_reference::get_object() & noexcept { return doc->get_object(); }
+simdjson_inline simdjson_result<uint64_t> document_reference::get_uint64() noexcept { return doc->get_uint64(); }
+simdjson_inline simdjson_result<int64_t> document_reference::get_int64() noexcept { return doc->get_int64(); }
+simdjson_inline simdjson_result<double> document_reference::get_double() noexcept { return doc->get_double(); }
+simdjson_inline simdjson_result<std::string_view> document_reference::get_string() noexcept { return doc->get_string(); }
+simdjson_inline simdjson_result<raw_json_string> document_reference::get_raw_json_string() noexcept { return doc->get_raw_json_string(); }
+simdjson_inline simdjson_result<bool> document_reference::get_bool() noexcept { return doc->get_bool(); }
+simdjson_inline simdjson_result<value> document_reference::get_value() noexcept { return doc->get_value(); }
+simdjson_inline simdjson_result<bool> document_reference::is_null() noexcept { return doc->is_null(); }
+
+#if SIMDJSON_EXCEPTIONS
+simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); }
+simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); }
+simdjson_inline document_reference::operator uint64_t() noexcept(false) { return uint64_t(*doc); }
+simdjson_inline document_reference::operator int64_t() noexcept(false) { return int64_t(*doc); }
+simdjson_inline document_reference::operator double() noexcept(false) { return double(*doc); }
+simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); }
+simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return raw_json_string(*doc); }
+simdjson_inline document_reference::operator bool() noexcept(false) { return bool(*doc); }
+simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); }
+#endif
+simdjson_inline simdjson_result<size_t> document_reference::count_elements() & noexcept { return doc->count_elements(); }
+simdjson_inline simdjson_result<size_t> document_reference::count_fields() & noexcept { return doc->count_fields(); }
+simdjson_inline simdjson_result<value> document_reference::at(size_t index) & noexcept { return doc->at(index); }
+simdjson_inline simdjson_result<array_iterator> document_reference::begin() & noexcept { return doc->begin(); }
+simdjson_inline simdjson_result<array_iterator> document_reference::end() & noexcept { return doc->end(); }
+simdjson_inline simdjson_result<value> document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); }
+simdjson_inline simdjson_result<value> document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); }
+simdjson_inline simdjson_result<value> document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; }
+simdjson_inline simdjson_result<value> document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; }
+simdjson_inline simdjson_result<value> document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); }
+simdjson_inline simdjson_result<value> document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); }
+simdjson_inline simdjson_result<json_type> document_reference::type() noexcept { return doc->type(); }
+simdjson_inline simdjson_result<bool> document_reference::is_scalar() noexcept { return doc->is_scalar(); }
+simdjson_inline simdjson_result<const char *> document_reference::current_location() noexcept { return doc->current_location(); }
+simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); }
+simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); }
+simdjson_inline simdjson_result<bool> document_reference::is_integer() noexcept { return doc->is_integer(); }
+simdjson_inline simdjson_result<number_type> document_reference::get_number_type() noexcept { return doc->get_number_type(); }
+simdjson_inline simdjson_result<number> document_reference::get_number() noexcept { return doc->get_number(); }
+simdjson_inline simdjson_result<std::string_view> document_reference::raw_json_token() noexcept { return doc->raw_json_token(); }
+simdjson_inline simdjson_result<value> document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); }
+simdjson_inline simdjson_result<std::string_view> document_reference::raw_json() noexcept { return doc->raw_json();}
+simdjson_inline document_reference::operator document&() const noexcept { return *doc; }
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+
+
+namespace simdjson {
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference value, error_code error)
+  noexcept : implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>(std::forward<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>(value), error) {}
+
+
+simdjson_inline simdjson_result<size_t> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::count_elements() & noexcept {
+  if (error()) { return error(); }
+  return first.count_elements();
+}
+simdjson_inline simdjson_result<size_t> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::count_fields() & noexcept {
+  if (error()) { return error(); }
+  return first.count_fields();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::at(size_t index) & noexcept {
+  if (error()) { return error(); }
+  return first.at(index);
+}
+simdjson_inline error_code simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::rewind() noexcept {
+  if (error()) { return error(); }
+  first.rewind();
+  return SUCCESS;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::begin() & noexcept {
+  if (error()) { return error(); }
+  return first.begin();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::end() & noexcept {
+  return {};
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::find_field_unordered(std::string_view key) & noexcept {
+  if (error()) { return error(); }
+  return first.find_field_unordered(key);
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::find_field_unordered(const char *key) & noexcept {
+  if (error()) { return error(); }
+  return first.find_field_unordered(key);
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::operator[](std::string_view key) & noexcept {
+  if (error()) { return error(); }
+  return first[key];
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::operator[](const char *key) & noexcept {
+  if (error()) { return error(); }
+  return first[key];
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::find_field(std::string_view key) & noexcept {
+  if (error()) { return error(); }
+  return first.find_field(key);
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::find_field(const char *key) & noexcept {
+  if (error()) { return error(); }
+  return first.find_field(key);
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::get_array() & noexcept {
+  if (error()) { return error(); }
+  return first.get_array();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::get_object() & noexcept {
+  if (error()) { return error(); }
+  return first.get_object();
+}
+simdjson_inline simdjson_result<uint64_t> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::get_uint64() noexcept {
+  if (error()) { return error(); }
+  return first.get_uint64();
+}
+simdjson_inline simdjson_result<int64_t> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::get_int64() noexcept {
+  if (error()) { return error(); }
+  return first.get_int64();
+}
+simdjson_inline simdjson_result<double> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::get_double() noexcept {
+  if (error()) { return error(); }
+  return first.get_double();
+}
+simdjson_inline simdjson_result<std::string_view> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::get_string() noexcept {
+  if (error()) { return error(); }
+  return first.get_string();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::get_raw_json_string() noexcept {
+  if (error()) { return error(); }
+  return first.get_raw_json_string();
+}
+simdjson_inline simdjson_result<bool> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::get_bool() noexcept {
+  if (error()) { return error(); }
+  return first.get_bool();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::get_value() noexcept {
+  if (error()) { return error(); }
+  return first.get_value();
+}
+simdjson_inline simdjson_result<bool> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::is_null() noexcept {
+  if (error()) { return error(); }
+  return first.is_null();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::type() noexcept {
+  if (error()) { return error(); }
+  return first.type();
+}
+simdjson_inline simdjson_result<bool> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::is_scalar() noexcept {
+  if (error()) { return error(); }
+  return first.is_scalar();
+}
+simdjson_inline simdjson_result<bool> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::is_negative() noexcept {
+  if (error()) { return error(); }
+  return first.is_negative();
+}
+simdjson_inline simdjson_result<bool> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::is_integer() noexcept {
+  if (error()) { return error(); }
+  return first.is_integer();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::number_type> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::get_number_type() noexcept {
+  if (error()) { return error(); }
+  return first.get_number_type();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::number> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::get_number() noexcept {
+  if (error()) { return error(); }
+  return first.get_number();
+}
+#if SIMDJSON_EXCEPTIONS
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() & noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() & noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::operator uint64_t() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::operator int64_t() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::operator double() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::operator std::string_view() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::operator bool() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+#endif
+
+simdjson_inline simdjson_result<const char *> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::current_location() noexcept {
+  if (error()) { return error(); }
+  return first.current_location();
+}
+
+simdjson_inline simdjson_result<std::string_view> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::raw_json_token() noexcept {
+  if (error()) { return error(); }
+  return first.raw_json_token();
+}
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::at_pointer(std::string_view json_pointer) noexcept {
+  if (error()) { return error(); }
+  return first.at_pointer(json_pointer);
+}
+
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/document-inl.h */
+/* begin file include/simdjson/generic/ondemand/value-inl.h */
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+simdjson_inline value::value(const value_iterator &_iter) noexcept
+  : iter{_iter}
+{
+}
+simdjson_inline value value::start(const value_iterator &iter) noexcept {
+  return iter;
+}
+simdjson_inline value value::resume(const value_iterator &iter) noexcept {
+  return iter;
+}
+
+simdjson_inline simdjson_result<array> value::get_array() noexcept {
+  return array::start(iter);
+}
+simdjson_inline simdjson_result<object> value::get_object() noexcept {
+  return object::start(iter);
+}
+simdjson_inline simdjson_result<object> value::start_or_resume_object() noexcept {
+  if (iter.at_start()) {
+    return get_object();
+  } else {
+    return object::resume(iter);
+  }
+}
+
+simdjson_inline simdjson_result<raw_json_string> value::get_raw_json_string() noexcept {
+  return iter.get_raw_json_string();
+}
+simdjson_inline simdjson_result<std::string_view> value::get_string() noexcept {
+  return iter.get_string();
+}
+simdjson_inline simdjson_result<double> value::get_double() noexcept {
+  return iter.get_double();
+}
+simdjson_inline simdjson_result<double> value::get_double_in_string() noexcept {
+  return iter.get_double_in_string();
+}
+simdjson_inline simdjson_result<uint64_t> value::get_uint64() noexcept {
+  return iter.get_uint64();
+}
+simdjson_inline simdjson_result<uint64_t> value::get_uint64_in_string() noexcept {
+  return iter.get_uint64_in_string();
+}
+simdjson_inline simdjson_result<int64_t> value::get_int64() noexcept {
+  return iter.get_int64();
+}
+simdjson_inline simdjson_result<int64_t> value::get_int64_in_string() noexcept {
+  return iter.get_int64_in_string();
+}
+simdjson_inline simdjson_result<bool> value::get_bool() noexcept {
+  return iter.get_bool();
+}
+simdjson_inline simdjson_result<bool> value::is_null() noexcept {
+  return iter.is_null();
+}
+template<> simdjson_inline simdjson_result<array> value::get() noexcept { return get_array(); }
+template<> simdjson_inline simdjson_result<object> value::get() noexcept { return get_object(); }
+template<> simdjson_inline simdjson_result<raw_json_string> value::get() noexcept { return get_raw_json_string(); }
+template<> simdjson_inline simdjson_result<std::string_view> value::get() noexcept { return get_string(); }
+template<> simdjson_inline simdjson_result<number> value::get() noexcept { return get_number(); }
+template<> simdjson_inline simdjson_result<double> value::get() noexcept { return get_double(); }
+template<> simdjson_inline simdjson_result<uint64_t> value::get() noexcept { return get_uint64(); }
+template<> simdjson_inline simdjson_result<int64_t> value::get() noexcept { return get_int64(); }
+template<> simdjson_inline simdjson_result<bool> value::get() noexcept { return get_bool(); }
+
+template<typename T> simdjson_inline error_code value::get(T &out) noexcept {
+  return get<T>().get(out);
+}
+
+#if SIMDJSON_EXCEPTIONS
+simdjson_inline value::operator array() noexcept(false) {
+  return get_array();
+}
+simdjson_inline value::operator object() noexcept(false) {
+  return get_object();
+}
+simdjson_inline value::operator uint64_t() noexcept(false) {
+  return get_uint64();
+}
+simdjson_inline value::operator int64_t() noexcept(false) {
+  return get_int64();
+}
+simdjson_inline value::operator double() noexcept(false) {
+  return get_double();
+}
+simdjson_inline value::operator std::string_view() noexcept(false) {
+  return get_string();
+}
+simdjson_inline value::operator raw_json_string() noexcept(false) {
+  return get_raw_json_string();
+}
+simdjson_inline value::operator bool() noexcept(false) {
+  return get_bool();
+}
+#endif
+
+simdjson_inline simdjson_result<array_iterator> value::begin() & noexcept {
+  return get_array().begin();
+}
+simdjson_inline simdjson_result<array_iterator> value::end() & noexcept {
+  return {};
+}
+simdjson_inline simdjson_result<size_t> value::count_elements() & noexcept {
+  simdjson_result<size_t> answer;
+  auto a = get_array();
+  answer = a.count_elements();
+  // count_elements leaves you pointing inside the array, at the first element.
+  // We need to move back so that the user can create a new array (which requires that
+  // we point at '[').
+  iter.move_at_start();
+  return answer;
+}
+simdjson_inline simdjson_result<size_t> value::count_fields() & noexcept {
+  simdjson_result<size_t> answer;
+  auto a = get_object();
+  answer = a.count_fields();
+  iter.move_at_start();
+  return answer;
+}
+simdjson_inline simdjson_result<value> value::at(size_t index) noexcept {
+  auto a = get_array();
+  return a.at(index);
+}
+
+simdjson_inline simdjson_result<value> value::find_field(std::string_view key) noexcept {
+  return start_or_resume_object().find_field(key);
+}
+simdjson_inline simdjson_result<value> value::find_field(const char *key) noexcept {
+  return start_or_resume_object().find_field(key);
+}
+
+simdjson_inline simdjson_result<value> value::find_field_unordered(std::string_view key) noexcept {
+  return start_or_resume_object().find_field_unordered(key);
+}
+simdjson_inline simdjson_result<value> value::find_field_unordered(const char *key) noexcept {
+  return start_or_resume_object().find_field_unordered(key);
+}
+
+simdjson_inline simdjson_result<value> value::operator[](std::string_view key) noexcept {
+  return start_or_resume_object()[key];
+}
+simdjson_inline simdjson_result<value> value::operator[](const char *key) noexcept {
+  return start_or_resume_object()[key];
+}
+
+simdjson_inline simdjson_result<json_type> value::type() noexcept {
+  return iter.type();
+}
+
+simdjson_inline simdjson_result<bool> value::is_scalar() noexcept {
+  json_type this_type;
+  auto error = type().get(this_type);
+  if(error) { return error; }
+  return ! ((this_type == json_type::array) || (this_type == json_type::object));
+}
+
+simdjson_inline bool value::is_negative() noexcept {
+  return iter.is_negative();
+}
+
+simdjson_inline simdjson_result<bool> value::is_integer() noexcept {
+  return iter.is_integer();
+}
+simdjson_warn_unused simdjson_inline simdjson_result<number_type> value::get_number_type() noexcept {
+  return iter.get_number_type();
+}
+simdjson_warn_unused simdjson_inline simdjson_result<number> value::get_number() noexcept {
+  return iter.get_number();
+}
+
+simdjson_inline std::string_view value::raw_json_token() noexcept {
+  return std::string_view(reinterpret_cast<const char*>(iter.peek_start()), iter.peek_start_length());
+}
+
+simdjson_inline simdjson_result<const char *> value::current_location() noexcept {
+  return iter.json_iter().current_location();
+}
+
+simdjson_inline int32_t value::current_depth() const noexcept{
+  return iter.json_iter().depth();
+}
+
+simdjson_inline simdjson_result<value> value::at_pointer(std::string_view json_pointer) noexcept {
+  json_type t;
+  SIMDJSON_TRY(type().get(t));
+  switch (t)
+  {
+    case json_type::array:
+      return (*this).get_array().at_pointer(json_pointer);
+    case json_type::object:
+      return (*this).get_object().at_pointer(json_pointer);
+    default:
+      return INVALID_JSON_POINTER;
+  }
+}
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::simdjson_result(
+  SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value &&value
+) noexcept :
+    implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>(
+      std::forward<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>(value)
+    )
+{
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::simdjson_result(
+  error_code error
+) noexcept :
+    implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>(error)
+{
+}
+simdjson_inline simdjson_result<size_t> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::count_elements() & noexcept {
+  if (error()) { return error(); }
+  return first.count_elements();
+}
+simdjson_inline simdjson_result<size_t> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::count_fields() & noexcept {
+  if (error()) { return error(); }
+  return first.count_fields();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::at(size_t index) noexcept {
+  if (error()) { return error(); }
+  return first.at(index);
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::begin() & noexcept {
+  if (error()) { return error(); }
+  return first.begin();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::end() & noexcept {
+  if (error()) { return error(); }
+  return {};
+}
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::find_field(std::string_view key) noexcept {
+  if (error()) { return error(); }
+  return first.find_field(key);
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::find_field(const char *key) noexcept {
+  if (error()) { return error(); }
+  return first.find_field(key);
+}
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::find_field_unordered(std::string_view key) noexcept {
+  if (error()) { return error(); }
+  return first.find_field_unordered(key);
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::find_field_unordered(const char *key) noexcept {
+  if (error()) { return error(); }
+  return first.find_field_unordered(key);
+}
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::operator[](std::string_view key) noexcept {
+  if (error()) { return error(); }
+  return first[key];
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::operator[](const char *key) noexcept {
+  if (error()) { return error(); }
+  return first[key];
+}
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::get_array() noexcept {
+  if (error()) { return error(); }
+  return first.get_array();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::get_object() noexcept {
+  if (error()) { return error(); }
+  return first.get_object();
+}
+simdjson_inline simdjson_result<uint64_t> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::get_uint64() noexcept {
+  if (error()) { return error(); }
+  return first.get_uint64();
+}
+simdjson_inline simdjson_result<uint64_t> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::get_uint64_in_string() noexcept {
+  if (error()) { return error(); }
+  return first.get_uint64_in_string();
+}
+simdjson_inline simdjson_result<int64_t> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::get_int64() noexcept {
+  if (error()) { return error(); }
+  return first.get_int64();
+}
+simdjson_inline simdjson_result<int64_t> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::get_int64_in_string() noexcept {
+  if (error()) { return error(); }
+  return first.get_int64_in_string();
+}
+simdjson_inline simdjson_result<double> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::get_double() noexcept {
+  if (error()) { return error(); }
+  return first.get_double();
+}
+simdjson_inline simdjson_result<double> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::get_double_in_string() noexcept {
+  if (error()) { return error(); }
+  return first.get_double_in_string();
+}
+simdjson_inline simdjson_result<std::string_view> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::get_string() noexcept {
+  if (error()) { return error(); }
+  return first.get_string();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::get_raw_json_string() noexcept {
+  if (error()) { return error(); }
+  return first.get_raw_json_string();
+}
+simdjson_inline simdjson_result<bool> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::get_bool() noexcept {
+  if (error()) { return error(); }
+  return first.get_bool();
+}
+simdjson_inline simdjson_result<bool> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::is_null() noexcept {
+  if (error()) { return error(); }
+  return first.is_null();
+}
+
+template<typename T> simdjson_inline simdjson_result<T> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::get() noexcept {
+  if (error()) { return error(); }
+  return first.get<T>();
+}
+template<typename T> simdjson_inline error_code simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::get(T &out) noexcept {
+  if (error()) { return error(); }
+  return first.get<T>(out);
+}
+
+template<> simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::get<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>() noexcept  {
+  if (error()) { return error(); }
+  return std::move(first);
+}
+template<> simdjson_inline error_code simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::get<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value &out) noexcept {
+  if (error()) { return error(); }
+  out = first;
+  return SUCCESS;
+}
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::type() noexcept {
+  if (error()) { return error(); }
+  return first.type();
+}
+simdjson_inline simdjson_result<bool> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::is_scalar() noexcept {
+  if (error()) { return error(); }
+  return first.is_scalar();
+}
+simdjson_inline simdjson_result<bool> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::is_negative() noexcept {
+  if (error()) { return error(); }
+  return first.is_negative();
+}
+simdjson_inline simdjson_result<bool> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::is_integer() noexcept {
+  if (error()) { return error(); }
+  return first.is_integer();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::number_type> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::get_number_type() noexcept {
+  if (error()) { return error(); }
+  return first.get_number_type();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::number> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::get_number() noexcept {
+  if (error()) { return error(); }
+  return first.get_number();
+}
+#if SIMDJSON_EXCEPTIONS
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::operator uint64_t() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::operator int64_t() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::operator double() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::operator std::string_view() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::operator bool() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+#endif
+
+simdjson_inline simdjson_result<std::string_view> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::raw_json_token() noexcept {
+  if (error()) { return error(); }
+  return first.raw_json_token();
+}
+
+simdjson_inline simdjson_result<const char *> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::current_location() noexcept {
+  if (error()) { return error(); }
+  return first.current_location();
+}
+
+simdjson_inline simdjson_result<int32_t> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::current_depth() const noexcept {
+  if (error()) { return error(); }
+  return first.current_depth();
+}
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::at_pointer(std::string_view json_pointer) noexcept {
+  if (error()) { return error(); }
+  return first.at_pointer(json_pointer);
+}
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/value-inl.h */
+/* begin file include/simdjson/generic/ondemand/field-inl.h */
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+// clang 6 doesn't think the default constructor can be noexcept, so we make it explicit
+simdjson_inline field::field() noexcept : std::pair<raw_json_string, ondemand::value>() {}
+
+simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept
+  : std::pair<raw_json_string, ondemand::value>(key, std::forward<ondemand::value>(value))
+{
+}
+
+simdjson_inline simdjson_result<field> field::start(value_iterator &parent_iter) noexcept {
+  raw_json_string key;
+  SIMDJSON_TRY( parent_iter.field_key().get(key) );
+  SIMDJSON_TRY( parent_iter.field_value() );
+  return field::start(parent_iter, key);
+}
+
+simdjson_inline simdjson_result<field> field::start(const value_iterator &parent_iter, raw_json_string key) noexcept {
+    return field(key, parent_iter.child());
+}
+
+simdjson_inline simdjson_warn_unused simdjson_result<std::string_view> field::unescaped_key() noexcept {
+  SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us.
+  simdjson_result<std::string_view> answer = first.unescape(second.iter.json_iter());
+  first.consume();
+  return answer;
+}
+
+simdjson_inline raw_json_string field::key() const noexcept {
+  SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us.
+  return first;
+}
+
+simdjson_inline value &field::value() & noexcept {
+  return second;
+}
+
+simdjson_inline value field::value() && noexcept {
+  return std::forward<field>(*this).second;
+}
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::field>::simdjson_result(
+  SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::field &&value
+) noexcept :
+    implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::field>(
+      std::forward<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::field>(value)
+    )
+{
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::field>::simdjson_result(
+  error_code error
+) noexcept :
+    implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::field>(error)
+{
+}
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::field>::key() noexcept {
+  if (error()) { return error(); }
+  return first.key();
+}
+simdjson_inline simdjson_result<std::string_view> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::field>::unescaped_key() noexcept {
+  if (error()) { return error(); }
+  return first.unescaped_key();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::field>::value() noexcept {
+  if (error()) { return error(); }
+  return std::move(first.value());
+}
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/field-inl.h */
+/* begin file include/simdjson/generic/ondemand/object-inl.h */
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+simdjson_inline simdjson_result<value> object::find_field_unordered(const std::string_view key) & noexcept {
+  bool has_value;
+  SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) );
+  if (!has_value) { return NO_SUCH_FIELD; }
+  return value(iter.child());
+}
+simdjson_inline simdjson_result<value> object::find_field_unordered(const std::string_view key) && noexcept {
+  bool has_value;
+  SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) );
+  if (!has_value) { return NO_SUCH_FIELD; }
+  return value(iter.child());
+}
+simdjson_inline simdjson_result<value> object::operator[](const std::string_view key) & noexcept {
+  return find_field_unordered(key);
+}
+simdjson_inline simdjson_result<value> object::operator[](const std::string_view key) && noexcept {
+  return std::forward<object>(*this).find_field_unordered(key);
+}
+simdjson_inline simdjson_result<value> object::find_field(const std::string_view key) & noexcept {
+  bool has_value;
+  SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) );
+  if (!has_value) { return NO_SUCH_FIELD; }
+  return value(iter.child());
+}
+simdjson_inline simdjson_result<value> object::find_field(const std::string_view key) && noexcept {
+  bool has_value;
+  SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) );
+  if (!has_value) { return NO_SUCH_FIELD; }
+  return value(iter.child());
+}
+
+simdjson_inline simdjson_result<object> object::start(value_iterator &iter) noexcept {
+  SIMDJSON_TRY( iter.start_object().error() );
+  return object(iter);
+}
+simdjson_inline simdjson_result<object> object::start_root(value_iterator &iter) noexcept {
+  SIMDJSON_TRY( iter.start_root_object().error() );
+  return object(iter);
+}
+simdjson_inline error_code object::consume() noexcept {
+  if(iter.is_at_key()) {
+    /**
+     * whenever you are pointing at a key, calling skip_child() is
+     * unsafe because you will hit a string and you will assume that
+     * it is string value, and this mistake will lead you to make bad
+     * depth computation.
+     */
+    /**
+     * We want to 'consume' the key. We could really
+     * just do _json_iter->return_current_and_advance(); at this
+     * point, but, for clarity, we will use the high-level API to
+     * eat the key. We assume that the compiler optimizes away
+     * most of the work.
+     */
+    simdjson_unused raw_json_string actual_key;
+    auto error = iter.field_key().get(actual_key);
+    if (error) { iter.abandon(); return error; };
+    // Let us move to the value while we are at it.
+    if ((error = iter.field_value())) { iter.abandon(); return error; }
+  }
+  auto error_skip = iter.json_iter().skip_child(iter.depth()-1);
+  if(error_skip) { iter.abandon(); }
+  return error_skip;
+}
+
+simdjson_inline simdjson_result<std::string_view> object::raw_json() noexcept {
+  const uint8_t * starting_point{iter.peek_start()};
+  auto error = consume();
+  if(error) { return error; }
+  const uint8_t * final_point{iter._json_iter->peek(0)};
+  return std::string_view(reinterpret_cast<const char*>(starting_point), size_t(final_point - starting_point));
+}
+
+simdjson_inline simdjson_result<object> object::started(value_iterator &iter) noexcept {
+  SIMDJSON_TRY( iter.started_object().error() );
+  return object(iter);
+}
+
+simdjson_inline object object::resume(const value_iterator &iter) noexcept {
+  return iter;
+}
+
+simdjson_inline object::object(const value_iterator &_iter) noexcept
+  : iter{_iter}
+{
+}
+
+simdjson_inline simdjson_result<object_iterator> object::begin() noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; }
+#endif
+  return object_iterator(iter);
+}
+simdjson_inline simdjson_result<object_iterator> object::end() noexcept {
+  return object_iterator(iter);
+}
+
+inline simdjson_result<value> object::at_pointer(std::string_view json_pointer) noexcept {
+  if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; }
+  json_pointer = json_pointer.substr(1);
+  size_t slash = json_pointer.find('/');
+  std::string_view key = json_pointer.substr(0, slash);
+  // Grab the child with the given key
+  simdjson_result<value> child;
+
+  // If there is an escape character in the key, unescape it and then get the child.
+  size_t escape = key.find('~');
+  if (escape != std::string_view::npos) {
+    // Unescape the key
+    std::string unescaped(key);
+    do {
+      switch (unescaped[escape+1]) {
+        case '0':
+          unescaped.replace(escape, 2, "~");
+          break;
+        case '1':
+          unescaped.replace(escape, 2, "/");
+          break;
+        default:
+          return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer");
+      }
+      escape = unescaped.find('~', escape+1);
+    } while (escape != std::string::npos);
+    child = find_field(unescaped);  // Take note find_field does not unescape keys when matching
+  } else {
+    child = find_field(key);
+  }
+  if(child.error()) {
+    return child; // we do not continue if there was an error
+  }
+  // If there is a /, we have to recurse and look up more of the path
+  if (slash != std::string_view::npos) {
+    child = child.at_pointer(json_pointer.substr(slash));
+  }
+  return child;
+}
+
+simdjson_inline simdjson_result<size_t> object::count_fields() & noexcept {
+  size_t count{0};
+  // Important: we do not consume any of the values.
+  for(simdjson_unused auto v : *this) { count++; }
+  // The above loop will always succeed, but we want to report errors.
+  if(iter.error()) { return iter.error(); }
+  // We need to move back at the start because we expect users to iterate through
+  // the object after counting the number of elements.
+  iter.reset_object();
+  return count;
+}
+
+simdjson_inline simdjson_result<bool> object::is_empty() & noexcept {
+  bool is_not_empty;
+  auto error = iter.reset_object().get(is_not_empty);
+  if(error) { return error; }
+  return !is_not_empty;
+}
+
+simdjson_inline simdjson_result<bool> object::reset() & noexcept {
+  return iter.reset_object();
+}
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object>::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object &&value) noexcept
+    : implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object>(std::forward<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object>(value)) {}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object>::simdjson_result(error_code error) noexcept
+    : implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object>(error) {}
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object>::begin() noexcept {
+  if (error()) { return error(); }
+  return first.begin();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object>::end() noexcept {
+  if (error()) { return error(); }
+  return first.end();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object>::find_field_unordered(std::string_view key) & noexcept {
+  if (error()) { return error(); }
+  return first.find_field_unordered(key);
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object>::find_field_unordered(std::string_view key) && noexcept {
+  if (error()) { return error(); }
+  return std::forward<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object>(first).find_field_unordered(key);
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object>::operator[](std::string_view key) & noexcept {
+  if (error()) { return error(); }
+  return first[key];
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object>::operator[](std::string_view key) && noexcept {
+  if (error()) { return error(); }
+  return std::forward<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object>(first)[key];
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object>::find_field(std::string_view key) & noexcept {
+  if (error()) { return error(); }
+  return first.find_field(key);
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object>::find_field(std::string_view key) && noexcept {
+  if (error()) { return error(); }
+  return std::forward<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object>(first).find_field(key);
+}
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object>::at_pointer(std::string_view json_pointer) noexcept {
+  if (error()) { return error(); }
+  return first.at_pointer(json_pointer);
+}
+
+inline simdjson_result<bool> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object>::reset() noexcept {
+  if (error()) { return error(); }
+  return first.reset();
+}
+
+inline simdjson_result<bool> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object>::is_empty() noexcept {
+  if (error()) { return error(); }
+  return first.is_empty();
+}
+
+simdjson_inline  simdjson_result<size_t> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object>::count_fields() & noexcept {
+  if (error()) { return error(); }
+  return first.count_fields();
+}
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/object-inl.h */
+/* begin file include/simdjson/generic/ondemand/parser-inl.h */
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+simdjson_inline parser::parser(size_t max_capacity) noexcept
+  : _max_capacity{max_capacity} {
+}
+
+simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept {
+  if (new_capacity > max_capacity()) { return CAPACITY; }
+  if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; }
+
+  // string_capacity copied from document::allocate
+  _capacity = 0;
+  size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64);
+  string_buf.reset(new (std::nothrow) uint8_t[string_capacity]);
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  start_positions.reset(new (std::nothrow) token_position[new_max_depth]);
+#endif
+  if (implementation) {
+    SIMDJSON_TRY( implementation->set_capacity(new_capacity) );
+    SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) );
+  } else {
+    SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) );
+  }
+  _capacity = new_capacity;
+  _max_depth = new_max_depth;
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<document> parser::iterate(padded_string_view json) & noexcept {
+  if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; }
+
+  // Allocate if needed
+  if (capacity() < json.length() || !string_buf) {
+    SIMDJSON_TRY( allocate(json.length(), max_depth()) );
+  }
+
+  // Run stage 1.
+  SIMDJSON_TRY( implementation->stage1(reinterpret_cast<const uint8_t *>(json.data()), json.length(), stage1_mode::regular) );
+  return document::start({ reinterpret_cast<const uint8_t *>(json.data()), this });
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<document> parser::iterate(const char *json, size_t len, size_t allocated) & noexcept {
+  return iterate(padded_string_view(json, len, allocated));
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<document> parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept {
+  return iterate(padded_string_view(json, len, allocated));
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<document> parser::iterate(std::string_view json, size_t allocated) & noexcept {
+  return iterate(padded_string_view(json, allocated));
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<document> parser::iterate(const std::string &json) & noexcept {
+  return iterate(padded_string_view(json));
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<document> parser::iterate(const simdjson_result<padded_string_view> &result) & noexcept {
+  // We don't presently have a way to temporarily get a const T& from a simdjson_result<T> without throwing an exception
+  SIMDJSON_TRY( result.error() );
+  padded_string_view json = result.value_unsafe();
+  return iterate(json);
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<document> parser::iterate(const simdjson_result<padded_string> &result) & noexcept {
+  // We don't presently have a way to temporarily get a const T& from a simdjson_result<T> without throwing an exception
+  SIMDJSON_TRY( result.error() );
+  const padded_string &json = result.value_unsafe();
+  return iterate(json);
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<json_iterator> parser::iterate_raw(padded_string_view json) & noexcept {
+  if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; }
+
+  // Allocate if needed
+  if (capacity() < json.length()) {
+    SIMDJSON_TRY( allocate(json.length(), max_depth()) );
+  }
+
+  // Run stage 1.
+  SIMDJSON_TRY( implementation->stage1(reinterpret_cast<const uint8_t *>(json.data()), json.length(), stage1_mode::regular) );
+  return json_iterator(reinterpret_cast<const uint8_t *>(json.data()), this);
+}
+
+inline simdjson_result<document_stream> parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size) noexcept {
+  if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; }
+  return document_stream(*this, buf, len, batch_size);
+}
+inline simdjson_result<document_stream> parser::iterate_many(const char *buf, size_t len, size_t batch_size) noexcept {
+  return iterate_many(reinterpret_cast<const uint8_t *>(buf), len, batch_size);
+}
+inline simdjson_result<document_stream> parser::iterate_many(const std::string &s, size_t batch_size) noexcept {
+  return iterate_many(s.data(), s.length(), batch_size);
+}
+inline simdjson_result<document_stream> parser::iterate_many(const padded_string &s, size_t batch_size) noexcept {
+  return iterate_many(s.data(), s.length(), batch_size);
+}
+
+simdjson_inline size_t parser::capacity() const noexcept {
+  return _capacity;
+}
+simdjson_inline size_t parser::max_capacity() const noexcept {
+  return _max_capacity;
+}
+simdjson_inline size_t parser::max_depth() const noexcept {
+  return _max_depth;
+}
+
+simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept {
+  if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) {
+    _max_capacity = max_capacity;
+  } else {
+    _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY;
+  }
+}
+
+simdjson_inline simdjson_warn_unused simdjson_result<std::string_view> parser::unescape(raw_json_string in, uint8_t *&dst) const noexcept {
+  uint8_t *end = implementation->parse_string(in.buf, dst);
+  if (!end) { return STRING_ERROR; }
+  std::string_view result(reinterpret_cast<const char *>(dst), end-dst);
+  dst = end;
+  return result;
+}
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::parser>::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::parser &&value) noexcept
+    : implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::parser>(std::forward<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::parser>(value)) {}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::parser>::simdjson_result(error_code error) noexcept
+    : implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::parser>(error) {}
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/parser-inl.h */
+/* begin file include/simdjson/generic/ondemand/document_stream-inl.h */
+#include <algorithm>
+#include <limits>
+#include <stdexcept>
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+#ifdef SIMDJSON_THREADS_ENABLED
+
+inline void stage1_worker::finish() {
+  // After calling "run" someone would call finish() to wait
+  // for the end of the processing.
+  // This function will wait until either the thread has done
+  // the processing or, else, the destructor has been called.
+  std::unique_lock<std::mutex> lock(locking_mutex);
+  cond_var.wait(lock, [this]{return has_work == false;});
+}
+
+inline stage1_worker::~stage1_worker() {
+  // The thread may never outlive the stage1_worker instance
+  // and will always be stopped/joined before the stage1_worker
+  // instance is gone.
+  stop_thread();
+}
+
+inline void stage1_worker::start_thread() {
+  std::unique_lock<std::mutex> lock(locking_mutex);
+  if(thread.joinable()) {
+    return; // This should never happen but we never want to create more than one thread.
+  }
+  thread = std::thread([this]{
+      while(true) {
+        std::unique_lock<std::mutex> thread_lock(locking_mutex);
+        // We wait for either "run" or "stop_thread" to be called.
+        cond_var.wait(thread_lock, [this]{return has_work || !can_work;});
+        // If, for some reason, the stop_thread() method was called (i.e., the
+        // destructor of stage1_worker is called, then we want to immediately destroy
+        // the thread (and not do any more processing).
+        if(!can_work) {
+          break;
+        }
+        this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser,
+              this->_next_batch_start);
+        this->has_work = false;
+        // The condition variable call should be moved after thread_lock.unlock() for performance
+        // reasons but thread sanitizers may report it as a data race if we do.
+        // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock
+        cond_var.notify_one(); // will notify "finish"
+        thread_lock.unlock();
+      }
+    }
+  );
+}
+
+
+inline void stage1_worker::stop_thread() {
+  std::unique_lock<std::mutex> lock(locking_mutex);
+  // We have to make sure that all locks can be released.
+  can_work = false;
+  has_work = false;
+  cond_var.notify_all();
+  lock.unlock();
+  if(thread.joinable()) {
+    thread.join();
+  }
+}
+
+inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) {
+  std::unique_lock<std::mutex> lock(locking_mutex);
+  owner = ds;
+  _next_batch_start = next_batch_start;
+  stage1_thread_parser = stage1;
+  has_work = true;
+  // The condition variable call should be moved after thread_lock.unlock() for performance
+  // reasons but thread sanitizers may report it as a data race if we do.
+  // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock
+  cond_var.notify_one(); // will notify the thread lock that we have work
+  lock.unlock();
+}
+
+#endif  // SIMDJSON_THREADS_ENABLED
+
+simdjson_inline document_stream::document_stream(
+  ondemand::parser &_parser,
+  const uint8_t *_buf,
+  size_t _len,
+  size_t _batch_size
+) noexcept
+  : parser{&_parser},
+    buf{_buf},
+    len{_len},
+    batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size},
+    error{SUCCESS}
+    #ifdef SIMDJSON_THREADS_ENABLED
+    , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change
+    #endif
+{
+#ifdef SIMDJSON_THREADS_ENABLED
+  if(worker.get() == nullptr) {
+    error = MEMALLOC;
+  }
+#endif
+}
+
+simdjson_inline document_stream::document_stream() noexcept
+  : parser{nullptr},
+    buf{nullptr},
+    len{0},
+    batch_size{0},
+    error{UNINITIALIZED}
+    #ifdef SIMDJSON_THREADS_ENABLED
+    , use_thread(false)
+    #endif
+{
+}
+
+simdjson_inline document_stream::~document_stream() noexcept
+{
+  #ifdef SIMDJSON_THREADS_ENABLED
+  worker.reset();
+  #endif
+}
+
+inline size_t document_stream::size_in_bytes() const noexcept {
+  return len;
+}
+
+inline size_t document_stream::truncated_bytes() const noexcept {
+  if(error == CAPACITY) { return len - batch_start; }
+  return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1];
+}
+
+simdjson_inline document_stream::iterator::iterator() noexcept
+  : stream{nullptr}, finished{true} {
+}
+
+simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept
+  : stream{_stream}, finished{is_end} {
+}
+
+simdjson_inline simdjson_result<ondemand::document_reference> document_stream::iterator::operator*() noexcept {
+  //if(stream->error) { return stream->error; }
+  return simdjson_result<ondemand::document_reference>(stream->doc, stream->error);
+}
+
+simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept {
+  // If there is an error, then we want the iterator
+  // to be finished, no matter what. (E.g., we do not
+  // keep generating documents with errors, or go beyond
+  // a document with errors.)
+  //
+  // Users do not have to call "operator*()" when they use operator++,
+  // so we need to end the stream in the operator++ function.
+  //
+  // Note that setting finished = true is essential otherwise
+  // we would enter an infinite loop.
+  if (stream->error) { finished = true; }
+  // Note that stream->error() is guarded against error conditions
+  // (it will immediately return if stream->error casts to false).
+  // In effect, this next function does nothing when (stream->error)
+  // is true (hence the risk of an infinite loop).
+  stream->next();
+  // If that was the last document, we're finished.
+  // It is the only type of error we do not want to appear
+  // in operator*.
+  if (stream->error == EMPTY) { finished = true; }
+  // If we had any other kind of error (not EMPTY) then we want
+  // to pass it along to the operator* and we cannot mark the result
+  // as "finished" just yet.
+  return *this;
+}
+
+simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept {
+  return finished != other.finished;
+}
+
+simdjson_inline document_stream::iterator document_stream::begin() noexcept {
+  start();
+  // If there are no documents, we're finished.
+  return iterator(this, error == EMPTY);
+}
+
+simdjson_inline document_stream::iterator document_stream::end() noexcept {
+  return iterator(this, true);
+}
+
+inline void document_stream::start() noexcept {
+  if (error) { return; }
+  error = parser->allocate(batch_size);
+  if (error) { return; }
+  // Always run the first stage 1 parse immediately
+  batch_start = 0;
+  error = run_stage1(*parser, batch_start);
+  while(error == EMPTY) {
+    // In exceptional cases, we may start with an empty block
+    batch_start = next_batch_start();
+    if (batch_start >= len) { return; }
+    error = run_stage1(*parser, batch_start);
+  }
+  if (error) { return; }
+  doc_index = batch_start;
+  doc = document(json_iterator(&buf[batch_start], parser));
+  doc.iter._streaming = true;
+
+  #ifdef SIMDJSON_THREADS_ENABLED
+  if (use_thread && next_batch_start() < len) {
+    // Kick off the first thread on next batch if needed
+    error = stage1_thread_parser.allocate(batch_size);
+    if (error) { return; }
+    worker->start_thread();
+    start_stage1_thread();
+    if (error) { return; }
+  }
+  #endif // SIMDJSON_THREADS_ENABLED
+}
+
+inline void document_stream::next() noexcept {
+  // We always enter at once once in an error condition.
+  if (error) { return; }
+  next_document();
+  if (error) { return; }
+  auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get();
+  doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index];
+
+  // Check if at end of structural indexes (i.e. at end of batch)
+  if(cur_struct_index >= static_cast<int64_t>(parser->implementation->n_structural_indexes)) {
+    error = EMPTY;
+    // Load another batch (if available)
+    while (error == EMPTY) {
+      batch_start = next_batch_start();
+      if (batch_start >= len) { break; }
+      #ifdef SIMDJSON_THREADS_ENABLED
+      if(use_thread) {
+        load_from_stage1_thread();
+      } else {
+        error = run_stage1(*parser, batch_start);
+      }
+      #else
+      error = run_stage1(*parser, batch_start);
+      #endif
+      /**
+       * Whenever we move to another window, we need to update all pointers to make
+       * it appear as if the input buffer started at the beginning of the window.
+       *
+       * Take this input:
+       *
+       * {"z":5}  {"1":1,"2":2,"4":4} [7,  10,   9]  [15,  11,   12, 13]  [154,  110,   112, 1311]
+       *
+       * Say you process the following window...
+       *
+       * '{"z":5}  {"1":1,"2":2,"4":4} [7,  10,   9]'
+       *
+       * When you do so, the json_iterator has a pointer at the beginning of the memory region
+       * (pointing at the beginning of '{"z"...'.
+       *
+       * When you move to the window that starts at...
+       *
+       * '[7,  10,   9]  [15,  11,   12, 13] ...
+       *
+       * then it is not sufficient to just run stage 1. You also need to re-anchor the
+       * json_iterator so that it believes we are starting at '[7,  10,   9]...'.
+       *
+       * Under the DOM front-end, this gets done automatically because the parser owns
+       * the pointer the data, and when you call stage1 and then stage2 on the same
+       * parser, then stage2 will run on the pointer acquired by stage1.
+       *
+       * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that
+       * we used. But json_iterator has no callback when stage1 is called on the parser.
+       * In fact, I think that the parser is unaware of json_iterator.
+       *
+       *
+       * So we need to re-anchor the json_iterator after each call to stage 1 so that
+       * all of the pointers are in sync.
+       */
+      doc.iter = json_iterator(&buf[batch_start], parser);
+      doc.iter._streaming = true;
+      /**
+       * End of resync.
+       */
+
+      if (error) { continue; } // If the error was EMPTY, we may want to load another batch.
+      doc_index = batch_start;
+    }
+  }
+}
+
+inline void document_stream::next_document() noexcept {
+  // Go to next place where depth=0 (document depth)
+  error = doc.iter.skip_child(0);
+  if (error) { return; }
+  // Always set depth=1 at the start of document
+  doc.iter._depth = 1;
+  // Resets the string buffer at the beginning, thus invalidating the strings.
+  doc.iter._string_buf_loc = parser->string_buf.get();
+  doc.iter._root = doc.iter.position();
+}
+
+inline size_t document_stream::next_batch_start() const noexcept {
+  return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes];
+}
+
+inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept {
+  // This code only updates the structural index in the parser, it does not update any json_iterator
+  // instance.
+  size_t remaining = len - _batch_start;
+  if (remaining <= batch_size) {
+    return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final);
+  } else {
+    return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial);
+  }
+}
+
+simdjson_inline size_t document_stream::iterator::current_index() const noexcept {
+  return stream->doc_index;
+}
+
+simdjson_inline std::string_view document_stream::iterator::source() const noexcept {
+  auto depth = stream->doc.iter.depth();
+  auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get();
+
+  // If at root, process the first token to determine if scalar value
+  if (stream->doc.iter.at_root()) {
+    switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) {
+      case '{': case '[':   // Depth=1 already at start of document
+        break;
+      case '}': case ']':
+        depth--;
+        break;
+      default:    // Scalar value document
+        // TODO: Remove any trailing whitespaces
+        // This returns a string spanning from start of value to the beginning of the next document (excluded)
+        return std::string_view(reinterpret_cast<const char*>(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[++cur_struct_index] - current_index() - 1);
+    }
+    cur_struct_index++;
+  }
+
+  while (cur_struct_index <= static_cast<int64_t>(stream->parser->implementation->n_structural_indexes)) {
+    switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) {
+      case '{': case '[':
+        depth++;
+        break;
+      case '}': case ']':
+        depth--;
+        break;
+    }
+    if (depth == 0) { break; }
+    cur_struct_index++;
+  }
+
+  return std::string_view(reinterpret_cast<const char*>(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);;
+}
+
+inline error_code document_stream::iterator::error() const noexcept {
+  return stream->error;
+}
+
+#ifdef SIMDJSON_THREADS_ENABLED
+
+inline void document_stream::load_from_stage1_thread() noexcept {
+  worker->finish();
+  // Swap to the parser that was loaded up in the thread. Make sure the parser has
+  // enough memory to swap to, as well.
+  std::swap(stage1_thread_parser,*parser);
+  error = stage1_thread_error;
+  if (error) { return; }
+
+  // If there's anything left, start the stage 1 thread!
+  if (next_batch_start() < len) {
+    start_stage1_thread();
+  }
+}
+
+inline void document_stream::start_stage1_thread() noexcept {
+  // we call the thread on a lambda that will update
+  // this->stage1_thread_error
+  // there is only one thread that may write to this value
+  // TODO this is NOT exception-safe.
+  this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error
+  size_t _next_batch_start = this->next_batch_start();
+
+  worker->run(this, & this->stage1_thread_parser, _next_batch_start);
+}
+
+#endif // SIMDJSON_THREADS_ENABLED
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_stream>::simdjson_result(
+  error_code error
+) noexcept :
+    implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_stream>(error)
+{
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_stream>::simdjson_result(
+  SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_stream &&value
+) noexcept :
+    implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_stream>(
+      std::forward<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_stream>(value)
+    )
+{
+}
+
+}
+/* end file include/simdjson/generic/ondemand/document_stream-inl.h */
+/* begin file include/simdjson/generic/ondemand/serialization-inl.h */
+
+
+namespace simdjson {
+
+inline std::string_view trim(const std::string_view str) noexcept {
+  // We can almost surely do better by rolling our own find_first_not_of function.
+  size_t first = str.find_first_not_of(" \t\n\r");
+  // If we have the empty string (just white space), then no trimming is possible, and
+  // we return the empty string_view.
+  if (std::string_view::npos == first) { return std::string_view(); }
+  size_t last = str.find_last_not_of(" \t\n\r");
+  return str.substr(first, (last - first + 1));
+}
+
+
+inline simdjson_result<std::string_view> to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document& x) noexcept {
+  std::string_view v;
+  auto error = x.raw_json().get(v);
+  if(error) {return error; }
+  return trim(v);
+}
+
+inline simdjson_result<std::string_view> to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference& x) noexcept {
+  std::string_view v;
+  auto error = x.raw_json().get(v);
+  if(error) {return error; }
+  return trim(v);
+}
+
+inline simdjson_result<std::string_view> to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value& x) noexcept {
+  /**
+   * If we somehow receive a value that has already been consumed,
+   * then the following code could be in trouble. E.g., we create
+   * an array as needed, but if an array was already created, then
+   * it could be bad.
+   */
+  using namespace SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand;
+  SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type t;
+  auto error = x.type().get(t);
+  if(error != SUCCESS) { return error; }
+  switch (t)
+  {
+    case json_type::array:
+    {
+      SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array array;
+      error = x.get_array().get(array);
+      if(error) { return error; }
+      return to_json_string(array);
+    }
+    case json_type::object:
+    {
+      SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object object;
+      error = x.get_object().get(object);
+      if(error) { return error; }
+      return to_json_string(object);
+    }
+    default:
+      return trim(x.raw_json_token());
+  }
+}
+
+inline simdjson_result<std::string_view> to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object& x) noexcept {
+  std::string_view v;
+  auto error = x.raw_json().get(v);
+  if(error) {return error; }
+  return trim(v);
+}
+
+inline simdjson_result<std::string_view> to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array& x) noexcept {
+  std::string_view v;
+  auto error = x.raw_json().get(v);
+  if(error) {return error; }
+  return trim(v);
+}
+
+inline simdjson_result<std::string_view> to_json_string(simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document> x) {
+  if (x.error()) { return x.error(); }
+  return to_json_string(x.value_unsafe());
+}
+
+inline simdjson_result<std::string_view> to_json_string(simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference> x) {
+  if (x.error()) { return x.error(); }
+  return to_json_string(x.value_unsafe());
+}
+
+inline simdjson_result<std::string_view> to_json_string(simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> x) {
+  if (x.error()) { return x.error(); }
+  return to_json_string(x.value_unsafe());
+}
+
+inline simdjson_result<std::string_view> to_json_string(simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object> x) {
+  if (x.error()) { return x.error(); }
+  return to_json_string(x.value_unsafe());
+}
+
+inline simdjson_result<std::string_view> to_json_string(simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array> x) {
+  if (x.error()) { return x.error(); }
+  return to_json_string(x.value_unsafe());
+}
+} // namespace simdjson
+
+namespace simdjson { namespace SIMDJSON_BUILTIN_IMPLEMENTATION { namespace ondemand {
+
+#if SIMDJSON_EXCEPTIONS
+inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value x) {
+  std::string_view v;
+  auto error = simdjson::to_json_string(x).get(v);
+  if(error == simdjson::SUCCESS) {
+    return (out << v);
+  } else {
+    throw simdjson::simdjson_error(error);
+  }
+}
+inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> x) {
+  if (x.error()) { throw simdjson::simdjson_error(x.error()); }
+  return (out << x.value());
+}
+#else
+inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value x) {
+  std::string_view v;
+  auto error = simdjson::to_json_string(x).get(v);
+  if(error == simdjson::SUCCESS) {
+    return (out << v);
+  } else {
+    return (out << error);
+  }
+}
+#endif
+
+#if SIMDJSON_EXCEPTIONS
+inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array value) {
+  std::string_view v;
+  auto error = simdjson::to_json_string(value).get(v);
+  if(error == simdjson::SUCCESS) {
+    return (out << v);
+  } else {
+    throw simdjson::simdjson_error(error);
+  }
+}
+inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array> x) {
+  if (x.error()) { throw simdjson::simdjson_error(x.error()); }
+  return (out << x.value());
+}
+#else
+inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array value) {
+  std::string_view v;
+  auto error = simdjson::to_json_string(value).get(v);
+  if(error == simdjson::SUCCESS) {
+    return (out << v);
+  } else {
+    return (out << error);
+  }
+}
+#endif
+
+#if SIMDJSON_EXCEPTIONS
+inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document& value)  {
+  std::string_view v;
+  auto error = simdjson::to_json_string(value).get(v);
+  if(error == simdjson::SUCCESS) {
+    return (out << v);
+  } else {
+    throw simdjson::simdjson_error(error);
+  }
+}
+inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference& value)  {
+  std::string_view v;
+  auto error = simdjson::to_json_string(value).get(v);
+  if(error == simdjson::SUCCESS) {
+    return (out << v);
+  } else {
+    throw simdjson::simdjson_error(error);
+  }
+}
+inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>&& x) {
+  if (x.error()) { throw simdjson::simdjson_error(x.error()); }
+  return (out << x.value());
+}
+inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>&& x) {
+  if (x.error()) { throw simdjson::simdjson_error(x.error()); }
+  return (out << x.value());
+}
+#else
+inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document& value)  {
+  std::string_view v;
+  auto error = simdjson::to_json_string(value).get(v);
+  if(error == simdjson::SUCCESS) {
+    return (out << v);
+  } else {
+    return (out << error);
+  }
+}
+#endif
+
+#if SIMDJSON_EXCEPTIONS
+inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object value) {
+  std::string_view v;
+  auto error = simdjson::to_json_string(value).get(v);
+  if(error == simdjson::SUCCESS) {
+    return (out << v);
+  } else {
+    throw simdjson::simdjson_error(error);
+  }
+}
+inline std::ostream& operator<<(std::ostream& out,  simdjson::simdjson_result<simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object> x) {
+  if (x.error()) { throw  simdjson::simdjson_error(x.error()); }
+  return (out << x.value());
+}
+#else
+inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object value) {
+  std::string_view v;
+  auto error = simdjson::to_json_string(value).get(v);
+  if(error == simdjson::SUCCESS) {
+    return (out << v);
+  } else {
+    return (out << error);
+  }
+}
+#endif
+}}} // namespace simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand
+/* end file include/simdjson/generic/ondemand/serialization-inl.h */
+/* end file include/simdjson/generic/ondemand-inl.h */
+
+
+namespace simdjson {
+  /**
+   * Represents the best statically linked simdjson implementation that can be used by the compiling
+   * program.
+   *
+   * Detects what options the program is compiled against, and picks the minimum implementation that
+   * will work on any computer that can run the program. For example, if you compile with g++
+   * -march=westmere, it will pick the westmere implementation. The haswell implementation will
+   * still be available, and can be selected at runtime, but the builtin implementation (and any
+   * code that uses it) will use westmere.
+   */
+  namespace builtin = SIMDJSON_BUILTIN_IMPLEMENTATION;
+  /**
+   * @copydoc simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand
+   */
+  namespace ondemand = SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand;
+  /**
+   * Function which returns a pointer to an implementation matching the "builtin" implementation.
+   * The builtin implementation is the best statically linked simdjson implementation that can be used by the compiling
+   * program. If you compile with g++ -march=haswell, this will return the haswell implementation.
+   * It is handy to be able to check what builtin was used: builtin_implementation()->name().
+   */
+  const implementation * builtin_implementation();
+} // namespace simdjson
+
+#endif // SIMDJSON_BUILTIN_H
+/* end file include/simdjson/builtin.h */
+
+#endif // SIMDJSON_H
+/* end file include/simdjson.h */

From f451b523ffc09a5b3106aaf882045ea5dc3098bf Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 21 Oct 2022 21:43:38 -0700
Subject: [PATCH 390/901] kram - fix simdjson comment param warnings

---
 libkram/simdjson/simdjson.h | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/libkram/simdjson/simdjson.h b/libkram/simdjson/simdjson.h
index 8f4942e3..4ad510d3 100644
--- a/libkram/simdjson/simdjson.h
+++ b/libkram/simdjson/simdjson.h
@@ -2961,7 +2961,7 @@ class dom_parser_implementation {
    *
    * Overridden by each implementation.
    *
-   * @param str pointer to the beginning of a valid UTF-8 JSON string, must end with an unescaped quote.
+   * @param src pointer to the beginning of a valid UTF-8 JSON string, must end with an unescaped quote.
    * @param dst pointer to a destination buffer, it must point a region in memory of sufficient size.
    * @return end of the of the written region (exclusive) or nullptr in case of error.
    */
@@ -2976,7 +2976,6 @@ class dom_parser_implementation {
    * Generally used for reallocation.
    *
    * @param capacity The new capacity.
-   * @param max_depth The new max_depth.
    * @return The error code, or SUCCESS if there was no error.
    */
   virtual error_code set_capacity(size_t capacity) noexcept = 0;
@@ -2986,7 +2985,6 @@ class dom_parser_implementation {
    *
    * Generally used for reallocation.
    *
-   * @param capacity The new capacity.
    * @param max_depth The new max_depth.
    * @return The error code, or SUCCESS if there was no error.
    */
@@ -3342,7 +3340,7 @@ simdjson_inline simdjson_warn_unused bool validate_utf8(const std::string_view s
 /**
  * Validate the UTF-8 string.
  *
- * @param p the string to validate.
+ * @param s the string to validate.
  * @return true if the string is valid UTF-8.
  */
 simdjson_inline simdjson_warn_unused bool validate_utf8(const std::string& s) noexcept {
@@ -26279,11 +26277,11 @@ class parser {
    *         - UNESCAPED_CHARS if a string contains control characters that must be escaped
    *         - UNCLOSED_STRING if there is an unclosed string in the document.
    */
-  simdjson_warn_unused simdjson_result<document> iterate(padded_string_view json) & noexcept;
-  /** @overload simdjson_result<document> iterate(padded_string_view json) & noexcept */
   simdjson_warn_unused simdjson_result<document> iterate(const char *json, size_t len, size_t capacity) & noexcept;
   /** @overload simdjson_result<document> iterate(padded_string_view json) & noexcept */
-  simdjson_warn_unused simdjson_result<document> iterate(const uint8_t *json, size_t len, size_t capacity) & noexcept;
+simdjson_warn_unused simdjson_result<document> iterate(padded_string_view json) & noexcept;
+/** @overload simdjson_result<document> iterate(padded_string_view json) & noexcept */
+simdjson_warn_unused simdjson_result<document> iterate(const uint8_t *json, size_t len, size_t capacity) & noexcept;
   /** @overload simdjson_result<document> iterate(padded_string_view json) & noexcept */
   simdjson_warn_unused simdjson_result<document> iterate(std::string_view json, size_t capacity) & noexcept;
   /** @overload simdjson_result<document> iterate(padded_string_view json) & noexcept */
@@ -26461,7 +26459,7 @@ class parser {
    *
    * The string_view is only valid as long as the bytes in dst.
    *
-   * @param raw_json_string input
+   * @param in input
    * @param dst A pointer to a buffer at least large enough to write this string as well as
    *            an additional SIMDJSON_PADDING bytes.
    * @return A string_view pointing at the unescaped string in dst
@@ -26878,7 +26876,7 @@ namespace simdjson { namespace SIMDJSON_BUILTIN_IMPLEMENTATION { namespace ondem
  * validate the content.
  *
  * @param out The output stream.
- * @param value The element.
+ * @param x The element.
  * @throw if there is an error with the underlying output stream. simdjson itself will not throw.
  */
 inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value x);

From 8a4da754cdc0e6815f783c6fecdded1326abf97b Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 21 Oct 2022 22:12:24 -0700
Subject: [PATCH 391/901] kramv - fix file list text to be visible atop white

This has no backdrop, so text must be done like buttons with attributed string.
Switch json parsing to simdjson.
Add filenameShort to File, but sadly must be a copy and not an alias.
---
 kramv/KramViewerMain.mm | 248 ++++++++++++++++++++++++++--------------
 1 file changed, 165 insertions(+), 83 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 14434b66..97a05f49 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -29,6 +29,8 @@
 //#include "KramImage.h"
 #include "KramViewerBase.h"
 
+#include "simdjson/simdjson.h"
+
 #ifdef NDEBUG
 static bool doPrintPanZoom = false;
 #else
@@ -93,7 +95,7 @@ static void findPossibleNormalMapFromAlbedoFilename(const char* filename, vector
         normalFilename += suffix;
         normalFilename += ext;
         
-        normalFilenames.push_back( normalFilename );
+        normalFilenames.push_back(normalFilename);
     }
 }
 
@@ -121,12 +123,28 @@ static void findPossibleNormalMapFromAlbedoFilename(const char* filename, vector
 };
 
 struct File {
-    string name;
-    int32_t urlIndex;
+public:
+    File(const char* name_, int32_t urlIndex_)
+        : name(name_), urlIndex(urlIndex_), nameShort(toFilenameShort(name_))
+    {
+    }
     
     // Note: not sorting by urlIndex currently
     bool operator <(const File& rhs) const
-        { return strcasecmp(toFilenameShort(name.c_str()), toFilenameShort(rhs.name.c_str())) < 0; }
+    {
+        // sort by shortname
+        int compare = strcasecmp(nameShort.c_str(), rhs.nameShort.c_str());
+        if ( compare != 0 )
+            return compare < 0;
+        
+        // if equal, then sort by longname
+        return strcasecmp(name.c_str(), rhs.name.c_str()) < 0;
+    }
+    
+public:
+    string name;
+    int32_t urlIndex;
+    string nameShort; // would alias name, but too risky
 };
 
 bool isSupportedModelFilename(const char* filename) {
@@ -320,7 +338,7 @@ - (void)fixupDocumentList;
 
 // https://medium.com/@kevingutowski/how-to-setup-a-tableview-in-2019-obj-c-c7dece203333
 @interface TableViewController : NSObject <NSTableViewDataSource, NSTableViewDelegate>
-@property (nonatomic, strong) NSMutableArray<NSString*>* items;
+@property (nonatomic, strong) NSMutableArray<NSAttributedString*>* items;
 @end
 
 @implementation TableViewController
@@ -343,7 +361,7 @@ -(NSView *)tableView:(NSTableView *)tableView viewForTableColumn:(NSTableColumn
 {
     NSString* identifier = tableColumn.identifier;
     NSTableCellView* cell = [tableView makeViewWithIdentifier:identifier owner:self];
-    cell.textField.stringValue = [self.items objectAtIndex:row];
+    cell.textField.attributedStringValue = [self.items objectAtIndex:row];
     return cell;
 }
 
@@ -2790,6 +2808,119 @@ - (BOOL)performDragOperation:(id)sender
     return NO;
 }
 
+// Want to avoid Apple libs for things that have C++ equivalents.
+// simdjson without exceptions isn't super readable or safe looking.
+#define USE_SIMDJSON 1
+#if USE_SIMDJSON
+
+- (BOOL)loadAtlasFile:(const char*)filename
+{
+    using namespace simdjson;
+    
+    ondemand::parser parser;
+    
+    // TODO: can just mmap the json to provide
+    auto json = padded_string::load(filename);
+    auto atlasProps = parser.iterate(json);
+       
+    // Can use hover or a show all on these entries and names.
+    // Draw names on screen using system text in the upper left corner if 1
+    // if showing all, then show names across each mip level.  May want to
+    // snap to pixels on each mip level so can see overlap.
+    
+    _showSettings->atlas.clear();
+    
+    {
+        std::vector<double> values;
+        string_view atlasName = atlasProps["name"].get_string().value_unsafe();
+        
+        uint64_t width = atlasProps["width"].get_uint64().value_unsafe();
+        uint64_t height = atlasProps["height"].get_uint64().value_unsafe();
+    
+        uint64_t slice = atlasProps["slice"].get_uint64().value_unsafe();
+        
+        float uPad = 0.0f;
+        float vPad = 0.0f;
+        
+        if (atlasProps["paduv"].get_array().error() != NO_SUCH_FIELD) {
+            values.clear();
+            for (auto value : atlasProps["paduv"])
+                values.push_back(value.get_double().value_unsafe());
+            
+            uPad = values[0];
+            vPad = values[1];
+        }
+        else if (atlasProps["padpx"].get_array().error() != NO_SUCH_FIELD) {
+            values.clear();
+            for (auto value : atlasProps["padpx"])
+                values.push_back(value.get_double().value_unsafe());
+            
+            uPad = values[0];
+            vPad = values[1];
+            
+            uPad /= width;
+            vPad /= height;
+        }
+        
+        for (auto regionProps: atlasProps["regions"])
+        {
+            string_view name = regionProps["name"].get_string().value_unsafe();
+            
+            float x = 0.0f;
+            float y = 0.0f;
+            float w = 0.0f;
+            float h = 0.0f;
+            
+            if (regionProps["ruv"].get_array().error() != NO_SUCH_FIELD)
+            {
+                values.clear();
+                for (auto value : regionProps["ruv"])
+                    values.push_back(value.get_double().value_unsafe());
+            
+                // Note: could convert pixel and mip0 size to uv.
+                // normalized uv make these easier to draw across all mips
+                x = values[0];
+                y = values[1];
+                w = values[2];
+                h = values[3];
+            }
+            else if (regionProps["rpx"].get_array().error() != NO_SUCH_FIELD)
+            {
+                values.clear();
+                for (auto value : regionProps["rpx"])
+                    values.push_back(value.get_double().value_unsafe());
+            
+                x = values[0];
+                y = values[1];
+                w = values[2];
+                h = values[3];
+                
+                // normalize to uv using the width/height
+                x /= width;
+                y /= height;
+                w /= width;
+                h /= height;
+            }
+                
+            const char* verticalProp = "f"; // regionProps["rot"];
+            bool isVertical = verticalProp && verticalProp[0] == 't';
+            
+            Atlas atlas = {(string)name, x,y, w,h, uPad,vPad, isVertical, (uint32_t)slice};
+            _showSettings->atlas.emplace_back(std::move(atlas));
+        }
+    }
+    
+    // TODO: also need to be able to bring in vector shapes
+    // maybe from svg or files written out from figma or photoshop.
+    // Can triangulate those, and use xatlas to pack those.
+    // Also xatlas can flatten out a 3d model into a chart.
+    
+    return YES;
+}
+
+#else
+
+/*
 // TODO: convert to using a C++ json lib like yyJson or simdJson
 // Then can move into libkram, and embed in the ktx/ktx2 metadata.
 - (BOOL)loadAtlasFile:(const char*)filename
@@ -2908,6 +3039,9 @@ - (BOOL)loadAtlasFile:(const char*)filename
     
     return YES;
 }
+*/
+
+#endif
 
 // opens archive
 - (BOOL)openArchive:(const char *)zipFilename urlIndex:(int32_t)urlIndex
@@ -2964,7 +3098,7 @@ - (BOOL)listFilesInArchive:(int32_t)urlIndex
     }
     
     for (const auto& entry: zip.zipEntrys()) {
-        _files.push_back({string(entry.filename), urlIndex});
+        _files.emplace_back(File(entry.filename, urlIndex));
     }
     
     return YES;
@@ -2977,7 +3111,7 @@ - (BOOL)hasCounterpart:(BOOL)increment {
     }
     
     const File& file = _files[_fileIndex];
-    string currentFilename = filenameNoExtension(toFilenameShort(file.name.c_str()));
+    string currentFilename = filenameNoExtension(file.nameShort.c_str());
    
     uint32_t nextFileIndex = _fileIndex;
     
@@ -2990,7 +3124,7 @@ - (BOOL)hasCounterpart:(BOOL)increment {
     nextFileIndex = nextFileIndex % numEntries;
     
     const File& nextFile = _files[nextFileIndex];
-    string nextFilename = filenameNoExtension(toFilenameShort(nextFile.name.c_str()));
+    string nextFilename = filenameNoExtension(nextFile.nameShort.c_str());
     
     // if short name matches (no ext) then it's a counterpart
     if (currentFilename != nextFilename)
@@ -3007,7 +3141,7 @@ - (BOOL)advanceCounterpart:(BOOL)increment
     
     // see if file has counterparts
     const File& file = _files[_fileIndex];
-    string currentFilename = filenameNoExtension(toFilenameShort(file.name.c_str()));
+    string currentFilename = filenameNoExtension(file.nameShort.c_str());
     
     // TODO: this should cycle through only the counterparts
     uint32_t nextFileIndex = _fileIndex;
@@ -3021,7 +3155,7 @@ - (BOOL)advanceCounterpart:(BOOL)increment
     nextFileIndex = nextFileIndex % numEntries;
     
     const File& nextFile = _files[nextFileIndex];
-    string nextFilename = filenameNoExtension(toFilenameShort(nextFile.name.c_str()));
+    string nextFilename = filenameNoExtension(nextFile.nameShort.c_str());
     
     if (currentFilename != nextFilename)
         return NO;
@@ -3187,10 +3321,11 @@ - (BOOL)loadFile
         atlasFilename.clear();
     }
     
-    // if it's a compressed file, then set a diff target if a corresponding png
+    // If it's a compressed file, then set a diff target if a corresponding png
     // is found.  Eventually see if a src dds/ktx/ktx2 exists.  Want to stop
     // using png as source images.  Note png don't have custom mips, unless
-    // flattened to one image.  So have to fabricate mips here.
+    // flattened to one image.  So have to fabricate mips here.  KTXImage
+    // can already load up striped png into slices, etc.
     
     string diffFilename = filenameNoExtension(filename);
     bool hasDiff = false;
@@ -3409,7 +3544,7 @@ -(void)listArchivesInFolder:(NSURL*)url archiveFiles:(vector<File>&)archiveFiles
         bool isArchive = isSupportedArchiveFilename(name);
         if (isArchive)
         {
-            archiveFiles.push_back({string(name),0});
+            archiveFiles.emplace_back(File(name,0));
         }
     }
 }
@@ -3451,7 +3586,7 @@ -(void)listFilesInFolder:(NSURL*)url urlIndex:(int32_t)urlIndex skipSubdirs:(BOO
             isValid = isSupportedJsonFilename(name);
         }
         if (isValid) {
-            _files.push_back({name,urlIndex});
+            _files.emplace_back(File(name,urlIndex));
         }
     }
 }
@@ -3533,13 +3668,13 @@ -(void)loadFilesFromUrls:(NSArray<NSURL*>*)urls
                  || isSupportedModelFilename(filename)
 #endif
             ) {
-            _files.push_back({filename, urlIndex});
+            _files.emplace_back(File(filename, urlIndex));
             
             [urlsExtracted addObject:url];
             urlIndex++;
         }
         else if (isSupportedJsonFilename(filename)) {
-            _files.push_back({filename, urlIndex});
+            _files.emplace_back(File(filename, urlIndex));
             
             [urlsExtracted addObject:url];
             urlIndex++;
@@ -3570,11 +3705,22 @@ -(void)loadFilesFromUrls:(NSArray<NSURL*>*)urls
         }
     }
     
+    NSMutableDictionary* attribsOff = [NSMutableDictionary dictionaryWithObjectsAndKeys:
+        //[NSFont systemFontOfSize:64.0],NSFontAttributeName,
+        [NSColor whiteColor],NSForegroundColorAttributeName,
+        [NSNumber numberWithFloat:-2.0],NSStrokeWidthAttributeName,
+        [NSColor blackColor],NSStrokeColorAttributeName,
+        nil];
+    
     // add the files into the file list
     [_tableViewController.items removeAllObjects];
     for (const auto& file: _files) {
-        const char* filenameShort = toFilenameShort(file.name.c_str());
-        [_tableViewController.items addObject: [NSString stringWithUTF8String: filenameShort]];
+        const char* filenameShort = file.nameShort.c_str();
+        
+        NSString* fileMenuText = [NSString stringWithUTF8String: filenameShort];
+        NSMutableAttributedString* fileMenuStr = [[NSMutableAttributedString alloc] initWithString:fileMenuText attributes:attribsOff];
+        
+        [_tableViewController.items addObject:fileMenuStr];
     }
     
     uint32_t savedFileIndex = _fileIndex;
@@ -3707,70 +3853,6 @@ -(BOOL)loadModelFile:(const char*)filename
 #endif
 }
 
-/* Don't need this anymore
- 
--(BOOL)loadImageFile:(NSURL*)url
-{
-    Renderer* renderer = (Renderer *)self.delegate;
-    setErrorLogCapture(true);
-
-    // set title to filename, chop this to just file+ext, not directory
-    const char* filename = url.fileSystemRepresentation;
-    const char* filenameShort = toFilenameShort(filename);
-    
-    BOOL success = [renderer loadTexture:url];
-
-    if (!success) {
-        // get back error text from the failed load
-        string errorText;
-        getErrorLogCaptureText(errorText);
-        setErrorLogCapture(false);
-
-        // prepend filename
-        string finalErrorText;
-        append_sprintf(finalErrorText, "Could not load from file\n %s\n", filename);
-        finalErrorText += errorText;
-
-        [self setHudText:finalErrorText.c_str()];
-        return NO;
-    }
-    setErrorLogCapture(false);
-
-    // was using subtitle, but that's macOS 11.0 feature.
-    string title = "kramv - ";
-    title += formatTypeName(_showSettings->originalFormat);
-    title += " - ";
-    title += filenameShort;
-
-    self.window.title = [NSString stringWithUTF8String:title.c_str()];
-
-    // topmost entry will be the recently opened document
-    // some entries may go stale if directories change, not sure who validates the
-    // list
-
-    // this is already handled by drop
-    // add to recent document menu
-    //NSDocumentController* dc = [NSDocumentController sharedDocumentController];
-    //[dc noteNewRecentDocumentURL:url];
-
-    //self.imageURL = url;
-
-    // show the controls
-    if (_noImageLoaded) {
-        _buttonStack.hidden = NO;  // show controls
-        _noImageLoaded = NO;
-    }
-
-    // show/hide button
-    [self updateUIAfterLoad];
-    // no need for file table on single files
-    [self hideFileTable];
-    
-    self.needsDisplay = YES;
-    return YES;
-}
-*/
-
 - (void)setupUI
 {
     [self hideFileTable];

From 07a061314ed2d27555cd1c4a57065d6538dbea16 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 21 Oct 2022 22:13:12 -0700
Subject: [PATCH 392/901] kramv - add path to libkram for simdjson

---
 build2/kramv.xcodeproj/project.pbxproj | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/build2/kramv.xcodeproj/project.pbxproj b/build2/kramv.xcodeproj/project.pbxproj
index 24a58edd..a8710a67 100644
--- a/build2/kramv.xcodeproj/project.pbxproj
+++ b/build2/kramv.xcodeproj/project.pbxproj
@@ -559,7 +559,10 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
-				HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram";
+				HEADER_SEARCH_PATHS = (
+					"$(PROJECT_DIR)/../libkram/kram",
+					"$(PROJECT_DIR)/../libkram",
+				);
 				MACOSX_DEPLOYMENT_TARGET = 11.0;
 				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
 				MTL_FAST_MATH = YES;
@@ -626,7 +629,10 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
-				HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram";
+				HEADER_SEARCH_PATHS = (
+					"$(PROJECT_DIR)/../libkram/kram",
+					"$(PROJECT_DIR)/../libkram",
+				);
 				MACOSX_DEPLOYMENT_TARGET = 11.0;
 				MTL_ENABLE_DEBUG_INFO = NO;
 				MTL_FAST_MATH = YES;

From d3275609505ba26ed87f74b20d9c80084f216301 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 26 Dec 2022 12:22:34 -0800
Subject: [PATCH 393/901] kramv - c++ breakup

Trying to split off ObjC from mostly C++ code in the viewer.   Still tough to completely split.  Using C++ delegate.
---
 kramv/KramRenderer.h    |    2 +-
 kramv/KramRenderer.mm   |   26 +-
 kramv/KramViewerBase.h  |   12 +-
 kramv/KramViewerMain.mm | 1254 +++++++++++++++++++++------------------
 4 files changed, 709 insertions(+), 585 deletions(-)

diff --git a/kramv/KramRenderer.h b/kramv/KramRenderer.h
index aa2b8eb8..5cc1c975 100644
--- a/kramv/KramRenderer.h
+++ b/kramv/KramRenderer.h
@@ -66,7 +66,7 @@ class KTXImage;
 - (void)releaseAllPendingTextures;
 
 // load a gltf model
-- (BOOL)loadModel:(nonnull NSURL*)url;
+- (BOOL)loadModel:(nonnull const char*)url;
 
 // unload gltf model
 - (void)unloadModel;
diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 802685d7..05b43406 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -18,6 +18,9 @@
 #include <os/signpost.h>
 #include <mutex> // for recursive_mutex
 
+using mymutex = std::recursive_mutex;
+using mylock = std::unique_lock<mymutex>;
+
 os_log_t gLogKramv = os_log_create("com.ba.kramv", "");
 
 class Signpost
@@ -53,9 +56,6 @@ void stop()
 
 #if USE_GLTF
 
-using mymutex = std::recursive_mutex;
-using mylock = std::unique_lock<mymutex>;
-
 // TODO: make part of renderer
 static mymutex gModelLock;
 
@@ -759,27 +759,32 @@ - (void)updateModelSettings:(const string &)fullFilename
     [self resetSomeImageSettings:isNewFile];
 }
 
-- (BOOL)loadModel:(nonnull NSURL*)url
+- (BOOL)loadModel:(nonnull const char*)filename
 {
+    NSURL* fileURL =
+        [NSURL fileURLWithPath:[NSString stringWithUTF8String:filename]];
+
 #if USE_GLTF
 
+        
+    
         // TODO: move to async version of this, many of these load slow
         // but is there a way to cancel the load.  Or else move to cgltf which is faster.
         // see GLTFKit2.
 
 #define DO_ASYNC 0
 #if DO_ASYNC
-        [GLTFAsset loadAssetWithURL:url bufferAllocator:_bufferAllocator delegate:self];
+        // [GLTFAsset loadAssetWithURL:url bufferAllocator:_bufferAllocator delegate:self];
 #else
     @autoreleasepool {
-        GLTFAsset* newAsset = [[GLTFAsset alloc] initWithURL:url bufferAllocator:_bufferAllocator];
+        GLTFAsset* newAsset = [[GLTFAsset alloc] initWithURL:fileURL bufferAllocator:_bufferAllocator];
 
         if (!newAsset) {
             return NO;
         }
 
         // tie into delegate callback
-        [self assetWithURL:url didFinishLoading:newAsset];
+        [self assetWithURL:fileURL didFinishLoading:newAsset];
     }
 #endif
 
@@ -2311,7 +2316,7 @@ - (void)drawSample
     // this reads directly from compressed texture via a compute shader
     int32_t textureLookupX = _showSettings->textureLookupX;
     int32_t textureLookupY = _showSettings->textureLookupY;
-
+    
     bool isDrawableBlit = _showSettings->isEyedropperFromDrawable();
 
     // TODO: only don't blit for plane + no debug or shape
@@ -2394,7 +2399,10 @@ - (void)drawSample
         self->_showSettings->textureResult = data;
         self->_showSettings->textureResultX = textureLookupX;
         self->_showSettings->textureResultY = textureLookupY;
-
+        
+        // TODO: This completed handler runs long after the hud has updated
+        // so need to invalidate the hud.  So the pixel location is out of date.
+        
         // printf("Color %f %f %f %f\n", data.x, data.y, data.z, data.w);
     }];
 
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index 57dae4f0..47538423 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -82,6 +82,8 @@ struct Atlas
     float u,v; // padding - to both or just left or right?
     bool isVertical;
     uint32_t level;
+    
+    float4 rect() const { return float4m(x,y,w,h); }
 };
 
 class ShowSettings {
@@ -110,7 +112,12 @@ class ShowSettings {
 
     // DONE: hook all these up to shader and view
     bool isHudShown = true;
-
+    
+    bool isHideUI = false;
+    bool isVerticalUI = true;
+    
+    bool isPlayAnimations = false;
+    
     // transparency checkboard under the image
     bool isCheckerboardShown = false;
 
@@ -184,6 +191,8 @@ class ShowSettings {
     int32_t imageBoundsX = 0;  // px
     int32_t imageBoundsY = 0;  // px
 
+    bool outsideImageBounds = false;
+    
     // size of the block, used in block grid drawing
     int32_t blockX = 1;
     int32_t blockY = 1;
@@ -276,6 +285,7 @@ class ShowSettings {
     int32_t meshNumber = 0;
     int32_t meshCount = 5;
     
+    const Atlas* lastAtlas = nullptr; // Might move to index
     vector<Atlas> atlas;
 };
 
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 97a05f49..e1b8823f 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -6,6 +6,7 @@
 //@import Cocoa;
 //@import Metal;
 //@import MetalKit;
+//@import CoreText;
 
 #import <Cocoa/Cocoa.h>
 #import <Metal/Metal.h>
@@ -31,6 +32,11 @@
 
 #include "simdjson/simdjson.h"
 
+#include <mutex> // for recursive_mutex
+
+using mymutex = std::recursive_mutex;
+using mylock = std::unique_lock<mymutex>;
+
 #ifdef NDEBUG
 static bool doPrintPanZoom = false;
 #else
@@ -159,6 +165,10 @@ bool isSupportedArchiveFilename(const char* filename) {
     return endsWithExtension(filename, ".zip");
 }
 
+bool isSupportedJsonFilename(const char* filename) {
+    return endsWith(filename, "-atlas.json");
+}
+
 struct MouseData
 {
     NSPoint originPoint;
@@ -302,10 +312,7 @@ - (NSView*)hitTest:(NSPoint)aPoint
 //-------------
 
 @interface MyMTKView : MTKView
-// for now only have a single imageURL
-//@property(retain, nonatomic, readwrite, nullable) NSURL *imageURL;
 
-//@property (nonatomic, readwrite, nullable) NSPanGestureRecognizer* panGesture;
 @property(retain, nonatomic, readwrite, nullable)
     NSMagnificationGestureRecognizer *zoomGesture;
 
@@ -347,6 +354,7 @@ - (instancetype)init {
     self = [super init];
     
     _items = [[NSMutableArray alloc] init];
+    
     return self;
 }
 
@@ -624,29 +632,97 @@ - (IBAction)showAboutDialog:(id)sender
     MmapHelper zipMmap;
 };
 
-@implementation MyMTKView {
-    NSMenu* _viewMenu;  // really the items
-    NSStackView* _buttonStack;
-    NSMutableArray<NSButton *>* _buttonArray;
-    NSTextField* _hudLabel;
-    NSTextField* _hudLabel2;
+struct ActionState
+{
+    string hudText;
+    bool isChanged;
+    bool isStateChanged;
+};
+
+enum TextSlot
+{
+    kTextSlotHud,
+    kTextSlotEyedropper
+};
+
+// This allows wrapping all the ObjC stuff
+struct DataDelegate
+{
+    bool loadFile(bool clear = false);
     
-    // Offer list of files in archives
-    // TODO: move to NSOutlineView since that can show archive folders with content inside
-    IBOutlet NSTableView* _tableView;
-    IBOutlet TableViewController* _tableViewController;
+    bool loadModelFile(const char* filename);
+   
+    bool loadTextureFromImage(const char* fullFilename, double timestamp, KTXImage& image, KTXImage* imageNormal, bool isArchive);
+    
+public:
+    id view; // MyMTKView*
+    id _urls; // NSArray<NSURL*>*
+};
+
+struct Data {
+    Data()
+    {
+        _showSettings = new ShowSettings();
+        
+        _textSlots.resize(2);
+    }
+    ~Data()
+    {
+        delete _showSettings;
+    }
+    
+    bool loadAtlasFile(const char* filename);
+    bool listFilesInArchive(int32_t urlIndex);
+    bool openArchive(const char * zipFilename, int32_t urlIndex);
+
+    bool hasCounterpart(bool increment);
+    bool advanceCounterpart(bool increment);
+    bool advanceFile(bool increment);
+
+    bool findFilename(const string& filename);
+    bool findFilenameShort(const string& filename);
+    const Atlas* findAtlasAtCursor(float2 pt);
+    bool isArchive() const;
+    bool loadFile();
+    
+    bool handleEventAction(const Action* action, bool isShiftKeyDown, ActionState& actionState);
+    void updateUIAfterLoad();
+    void updateUIControlState();
+
+    const Action* actionFromMenu(id menuItem) const;
+    const Action* actionFromButton(id button) const;
+    const Action* actionFromKey(uint32_t keyCodes) const;
+
+    void setLoadedText(string& text);
+
+    void initActions();
+    vector<Action>& actions() { return _actions; }
+    void initDisabledButtons();
+
+    string textFromSlots() const;
+    void setTextSlot(TextSlot slot, const char* text);
+
+    void loadFilesFromUrls(NSArray<NSURL*>* urls, bool skipSubdirs);
+    void listArchivesInFolder(NSURL* url, vector<File>& archiveFiles, bool skipSubdirs);
+    void listFilesInFolder(NSURL* url, int32_t urlIndex, bool skipSubdirs);
+
+    // See these to split off ObjC code
+    DataDelegate _delegate;
     
+private:
+    bool loadFileFromArchive();
+
+public:
     vector<string> _textSlots;
-    ShowSettings* _showSettings;
+    ShowSettings* _showSettings = nullptr;
 
-    BOOL _noImageLoaded;
+    bool _noImageLoaded = true;
     string _archiveName; // archive or blank
     
     // folders and archives and multi-drop files are filled into this
     vector<File> _files;
-    int32_t _fileIndex;
+    int32_t _fileIndex = 0;
    
-    NSArray<NSURL*>* _urls;
     // One of these per url in _urlss
     vector<FileContainer*> _containers;
     
@@ -689,12 +765,56 @@ @implementation MyMTKView {
     Action* _actionB;
     Action* _actionA;
     
+    vector<Action> _actions;
+};
+
+
+
+string Data::textFromSlots() const
+{
+    // combine textSlots
+    string text = _textSlots[kTextSlotHud];
+    if (!text.empty() && text.back() != '\n')
+        text += "\n";
+        
+    // don't show eyedropper text with table up, it's many lines and overlaps
+    // TODO: fix
+    // if (!_tableView.hidden)
+        text += _textSlots[kTextSlotEyedropper];
+    
+    return text;
+}
+
+void Data::setTextSlot(TextSlot slot, const char* text)
+{
+    _textSlots[slot] = text;
+}
+
+
+//----------------------------------------------------
+
+
+@implementation MyMTKView {
+    NSMenu* _viewMenu;  // really the items
+    NSStackView* _buttonStack;
+    NSMutableArray<NSButton *>* _buttonArray;
+    NSTextField* _hudLabel;
+    NSTextField* _hudLabel2;
+    
+    // Offer list of files in archives
+    // TODO: move to NSOutlineView since that can show archive folders with content inside
+    IBOutlet NSTableView* _tableView;
+    IBOutlet TableViewController* _tableViewController;
+    
     // copy of modifier flags, can tie drop actions to this
     NSEventModifierFlags _modifierFlags;
     
-    vector<Action> _actions;
+    ShowSettings* _showSettings;
+    Data _data;
 }
 
+
+
 - (void)awakeFromNib
 {
     [super awakeFromNib];
@@ -705,6 +825,8 @@ - (void)awakeFromNib
     rect.origin.y += 50;
     scrollView.frame = rect;
     
+    // C++ delegate
+    _data._delegate.view = self;
 
     // TODO: see if can only open this
     // NSLog(@"AwakeFromNIB");
@@ -731,7 +853,7 @@ - (instancetype)initWithCoder:(NSCoder *)coder
 {
     self = [super initWithCoder:coder];
 
-    _showSettings = new ShowSettings;
+    _showSettings = _data._showSettings;
 
     self.clearColor = MTLClearColorMake(0.005f, 0.005f, 0.005f, 0.0f);
 
@@ -740,13 +862,13 @@ - (instancetype)initWithCoder:(NSCoder *)coder
     // only re-render when changes are made
     // Note: this breaks ability to gpu capture, since display link not running.
     // so disable this if want to do captures.  Or just move the cursor to capture.
-#ifndef NDEBUG  // KRAM_RELEASE
+//#ifndef NDEBUG  // KRAM_RELEASE
     self.enableSetNeedsDisplay = YES;
-#endif
+//#endif
     // openFile in appDelegate handles "Open in..."
 
-    _textSlots.resize(2);
-    
+   
+
     // added for drag-drop support
     [self registerForDraggedTypes:pasteboardTypes];
     
@@ -763,9 +885,9 @@ - (instancetype)initWithCoder:(NSCoder *)coder
     _buttonStack = [self _addButtons];
 
     // hide until image loaded
+    _showSettings->isHideUI = true;
     _buttonStack.hidden = YES;
-    _noImageLoaded = YES;
-
+    
     _hudLabel2 = [self _addHud:YES];
     _hudLabel = [self _addHud:NO];
     [self setHudText:""];
@@ -801,7 +923,7 @@ -(void)fixupDocumentList
     }
 }
 
-- (NSStackView *)_addButtons
+void Data::initActions()
 {
     // Don't reorder without also matching actionPtrs below
     Action actions[] = {
@@ -899,10 +1021,36 @@ - (NSStackView *)_addButtons
         &_actionB,
         &_actionA,
     };
+
+    uint32_t numActions = ArrayCount(actions);
+    
+    // copy all of them to a vector, and then assign the action ptrs
+    for (int32_t i = 0; i < numActions; ++i) {
+        Action& action = actions[i];
+        const char* icon = action.icon;  // single char
+        
+        // skip separators
+        bool isSeparator = icon[0] == 0;
+        if (isSeparator) continue;
+        
+        _actions.push_back(action);
+    }
+
+    // now alias Actions to the vector above
+    //assert(_actions.size() == ArrayCount(actionPtrs));
+    for (int32_t i = 0; i < _actions.size(); ++i) {
+        *(actionPtrs[i]) = &_actions[i];
+    }
+}
+
+- (NSStackView *)_addButtons
+{
+    _data.initActions();
     
     NSRect rect = NSMakeRect(0, 10, 30, 30);
 
-   int32_t numActions = ArrayCount(actions);
+    vector<Action>& actions = _data.actions();
+    int32_t numActions = actions.size();
     
     NSMutableArray* buttons = [[NSMutableArray alloc] init];
 
@@ -946,24 +1094,6 @@ - (NSStackView *)_addButtons
         // Have to set this too, or button doesn't go blue
         button.attributedAlternateTitle = [[NSMutableAttributedString alloc] initWithString:name attributes:attribsOn];
         
-#if 0 // this isn't appearing
-        button.wantsLayer = YES;
-        if (button.layer) {
-//            CGFloat glowColor[] = {1.0, 0.0, 0.0, 1.0};
-//            button.layer.masksToBounds = false;
-//            button.layer.shadowColor = CGColorCreate(CGColorSpaceCreateDeviceRGB(), glowColor);
-//            button.layer.shadowRadius = 10.0;
-//            button.layer.shadowOpacity = 1.0;
-//            //button.layer.shadowOffset = .zero;
-            
-            NSShadow* dropShadow = [[NSShadow alloc] init];
-            [dropShadow setShadowColor:[NSColor redColor]];
-            [dropShadow setShadowOffset:NSMakeSize(0, 0)];
-            [dropShadow setShadowBlurRadius:10.0];
-            [button setShadow: dropShadow];
-        }
-#endif
-        
         // stackView seems to disperse the items evenly across the area, so this
         // doesn't work
         bool isSeparator = icon[0] == 0;
@@ -1050,36 +1180,25 @@ - (NSStackView *)_addButtons
 
     //----------------------
     
-    // copy all of them to a vector, and then assign the action ptrs
-    for (int32_t i = 0; i < numActions; ++i) {
-        Action& action = actions[i];
-        const char* icon = action.icon;  // single char
-        
-        // skip separators
-        bool isSeparator = icon[0] == 0;
-        if (isSeparator) continue;
-        
-        _actions.push_back(action);
-    }
-    
-    // now alias Actions to the vector above
-    //assert(_actions.size() == ArrayCount(actionPtrs));
-    for (int32_t i = 0; i < _actions.size(); ++i) {
-        *(actionPtrs[i]) = &_actions[i];
-    }
+    // don't want some buttons showing up, menu only
+    _data.initDisabledButtons();
     
+    return stackView;
+}
+
+
+void Data::initDisabledButtons()
+{
     // don't want these buttons showing up, menu only
     _actionPrevItem->disableButton();
     _actionItem->disableButton();
     _actionPrevCounterpart->disableButton();
     _actionCounterpart->disableButton();
-    
+
     _actionHud->disableButton();
     _actionHelp->disableButton();
     _actionHideUI->disableButton();
     _actionVertical->disableButton();
-    
-    return stackView;
 }
 
 - (NSTextField *)_addHud:(BOOL)isShadow
@@ -1430,14 +1549,17 @@ - (void)mouseMoved:(NSEvent *)event
     // I think magnofication of zoom gesture is affecting coordinates reported by
     // this
 
-    NSPoint point = [self convertPoint:[event locationInWindow] fromView:nil];
+    NSPoint point = [event locationInWindow];
+    
+    // This flips so upper left corner is 0,0, vs. bottom left
+    point = [self convertPoint:point fromView:nil];
 
     // this needs to change if view is resized, but will likely receive mouseMoved
     // events
     _showSettings->cursorX = (int32_t)point.x;
     _showSettings->cursorY = (int32_t)point.y;
 
-    // should really do this in draw call, since moved messeage come in so quickly
+    // should really do this in draw call, since moved message come in so quickly
     [self updateEyedropper];
 }
 
@@ -1500,6 +1622,13 @@ - (void)updateEyedropper
         return;
     }
 
+    // getting a lot of repeat cursor locations
+    // could have panning underneath cursor to deal with
+    if (_showSettings->cursorX == _showSettings->lastCursorX &&
+        _showSettings->cursorY == _showSettings->lastCursorY) {
+        return;
+    }
+    
     float4x4 projectionViewModelMatrix =
         [renderer computeImageTransform:_showSettings->panX
                                    panY:_showSettings->panY
@@ -1541,9 +1670,12 @@ - (void)updateEyedropper
     float ar = _showSettings->imageAspectRatio();
     
     // that's in model space (+/0.5f * ar, +/0.5f), so convert to texture space
-    pixel.x = 0.999f * (pixel.x / ar + 0.5f);
-    pixel.y = 0.999f * (-pixel.y + 0.5f);
+    pixel.x = (pixel.x / ar + 0.5f);
+    pixel.y = (-pixel.y + 0.5f);
 
+    //pixel.x *= 0.999f;
+    //pixel.y *= 0.999f;
+    
     float2 uv = pixel.xy;
 
     // pixels are 0 based
@@ -1566,6 +1698,7 @@ - (void)updateEyedropper
     if (pixel.x < 0.0f || pixel.x >= (float)_showSettings->imageBoundsX) {
         sprintf(text, "canvas: %d %d\n", (int32_t)pixel.x, (int32_t)pixel.y);
         [self setEyedropperText:text.c_str()];  // ick
+        _showSettings->outsideImageBounds = true;
         return;
     }
     if (pixel.y < 0.0f || pixel.y >= (float)_showSettings->imageBoundsY) {
@@ -1573,6 +1706,7 @@ - (void)updateEyedropper
         // image maybe not enough precision with float.
         sprintf(text, "canvas: %d %d\n", (int32_t)pixel.x, (int32_t)pixel.y);
         [self setEyedropperText:text.c_str()];
+        _showSettings->outsideImageBounds = true;
         return;
     }
 
@@ -1582,12 +1716,16 @@ - (void)updateEyedropper
     int32_t newX = (int32_t)pixel.x;
     int32_t newY = (int32_t)pixel.y;
 
-    if (_showSettings->textureLookupX != newX ||
-        _showSettings->textureLookupY != newY) {
+    
+    if (_showSettings->outsideImageBounds ||
+        (_showSettings->textureLookupX != newX ||
+         _showSettings->textureLookupY != newY)) {
         // Note: this only samples from the original texture via compute shaders
         // so preview mode pixel colors are not conveyed.  But can see underlying
         // data driving preview.
 
+        _showSettings->outsideImageBounds = false;
+
         // %.0f rounds the value, but want truncation
         _showSettings->textureLookupX = newX;
         _showSettings->textureLookupY = newY;
@@ -1606,7 +1744,9 @@ - (void)showEyedropperData:(float2)uv
     string tmp;
 
     float4 c = _showSettings->textureResult;
-
+    int32_t x = _showSettings->textureResultX;
+    int32_t y = _showSettings->textureResultY;
+    
     // DONE: use these to format the text
     MyMTLPixelFormat format = _showSettings->originalFormat;
     bool isSrgb = isSrgbFormat(format);
@@ -1677,9 +1817,7 @@ - (void)showEyedropperData:(float2)uv
         // this will be out of sync with gpu eval, so may want to only display px
         // from returned lookup this will always be a linear color
 
-        int32_t x = _showSettings->textureResultX;
-        int32_t y = _showSettings->textureResultY;
-
+        
         // show uv, so can relate to gpu coordinates stored in geometry and find
         // atlas areas
         append_sprintf(text, "uv:%0.3f %0.3f\n",
@@ -1823,36 +1961,22 @@ - (void)showEyedropperData:(float2)uv
     // TODO: Stuff these on clipboard with a click, or use cmd+C?
 }
 
-enum TextSlot
-{
-    kTextSlotHud,
-    kTextSlotEyedropper
-};
-
 - (void)setEyedropperText:(const char *)text
 {
-    _textSlots[kTextSlotEyedropper] = text;
-
+    _data.setTextSlot(kTextSlotEyedropper, text);
     [self updateHudText];
 }
 
 - (void)setHudText:(const char *)text
 {
-    _textSlots[kTextSlotHud] = text;
-
+    _data.setTextSlot(kTextSlotHud, text);
     [self updateHudText];
 }
 
 - (void)updateHudText
 {
     // combine textSlots
-    string text = _textSlots[kTextSlotHud];
-    if (!text.empty() && text.back() != '\n')
-        text += "\n";
-        
-    // don't show eyedropper text with table up, it's many lines and overlaps
-    if (!_tableView.hidden)
-        text += _textSlots[kTextSlotEyedropper];
+    string text = _data.textFromSlots();
 
     NSString *textNS = [NSString stringWithUTF8String:text.c_str()];
     _hudLabel2.stringValue = textNS;
@@ -1984,7 +2108,9 @@ - (BOOL)validateUserInterfaceItem:(id<NSValidatedUserInterfaceItem>)item
     return YES;
 }
 
-- (void)updateUIAfterLoad
+
+
+void Data::updateUIAfterLoad()
 {
     // TODO: move these to actions, and test their state instead of looking up
     // buttons here and in HandleKey.
@@ -2004,8 +2130,8 @@ - (void)updateUIAfterLoad
     bool isJumpToPrevCounterpartHidden = true;
     
     if ( _files.size() > 1) {
-        isJumpToCounterpartHidden = ![self hasCounterpart:YES];
-        isJumpToPrevCounterpartHidden  = ![self hasCounterpart:NO];
+        isJumpToCounterpartHidden = !hasCounterpart(true);
+        isJumpToPrevCounterpartHidden  = !hasCounterpart(false);
     }
     
     bool isRedHidden = _showSettings->numChannels == 0; // models don't show rgba
@@ -2054,10 +2180,10 @@ - (void)updateUIAfterLoad
     _actionChecker->setHidden(isCheckerboardHidden);
     
     // also need to call after each toggle
-    [self updateUIControlState];
+    updateUIControlState();
 }
 
-- (void)updateUIControlState
+void Data::updateUIControlState()
 {
     // there is also mixed state, but not using that
     auto On = true;
@@ -2065,7 +2191,6 @@ - (void)updateUIControlState
     
 #define toState(x) (x) ? On : Off
 
-    Renderer* renderer = (Renderer*)self.delegate;
     auto showAllState = toState(_showSettings->isShowingAllLevelsAndMips);
     auto premulState = toState(_showSettings->doShaderPremul);
     auto signedState = toState(_showSettings->isSigned);
@@ -2074,7 +2199,6 @@ - (void)updateUIControlState
     auto gridState = toState(_showSettings->isAnyGridShown());
     auto wrapState = toState(_showSettings->isWrap);
     auto debugState = toState(_showSettings->debugMode != DebugModeNone);
-    auto playState = toState(_showSettings->isModel && renderer.playAnimations);
     auto hudState = toState(_showSettings->isHudShown);
     
     TextureChannels &channels = _showSettings->channels;
@@ -2094,8 +2218,11 @@ - (void)updateUIControlState
         toState(_showSettings->lightingMode != LightingModeNone);
     auto tangentState = toState(_showSettings->useTangent);
 
-    auto verticalState = toState(_buttonStack.orientation == NSUserInterfaceLayoutOrientationVertical);
-    auto uiState = toState(_buttonStack.hidden);
+    // TODO: shadow the state on these, so don't have to to go ObjC
+    //Renderer* renderer = (Renderer*)self.delegate;
+    auto playState = toState(_showSettings->isModel && _showSettings->isPlayAnimations);
+    auto verticalState = toState(_showSettings->isVerticalUI);
+    auto uiState = toState(_showSettings->isHideUI);
     auto diffState = toState(_showSettings->isDiff);
     
     _actionVertical->setHighlight(verticalState);
@@ -2143,6 +2270,48 @@ - (void)updateUIControlState
 // TODO: convert to C++ actions, and then call into Base holding all this
 // move pan/zoom logic too.  Then use that as start of Win32 kramv.
 
+const Action* Data::actionFromMenu(id menuItem) const
+{
+    const Action* action = nullptr;
+    
+    for (const auto& search: _actions) {
+        if (search.menuItem == menuItem) {
+            action = &search;
+            break;
+        }
+    }
+    
+    return action;
+}
+
+const Action* Data::actionFromButton(id button) const
+{
+    const Action* action = nullptr;
+    
+    for (const auto& search: _actions) {
+        if (search.button == button) {
+            action = &search;
+            break;
+        }
+    }
+    
+    return action;
+}
+
+const Action* Data::actionFromKey(uint32_t keyCode) const
+{
+    const Action* action = nullptr;
+    
+    for (const auto& search: _actions) {
+        if (search.keyCode == keyCode) {
+            action = &search;
+            break;
+        }
+    }
+    
+    return action;
+}
+
 - (IBAction)handleAction:(id)sender
 {
     NSEvent* theEvent = [NSApp currentEvent];
@@ -2150,22 +2319,10 @@ - (IBAction)handleAction:(id)sender
 
     const Action* action = nullptr;
     if ([sender isKindOfClass:[NSButton class]]) {
-        NSButton* button = (NSButton *)sender;
-        for (const auto& search: _actions) {
-            if (search.button == button) {
-                action = &search;
-                break;
-            }
-        }
+        action = _data.actionFromButton(sender);
     }
     else if ([sender isKindOfClass:[NSMenuItem class]]) {
-        NSMenuItem* menuItem = (NSMenuItem *)sender;
-        for (const auto& search: _actions) {
-            if (search.menuItem == menuItem) {
-                action = &search;
-                break;
-            }
-        }
+        action = _data.actionFromMenu(sender);
     }
     
     if (!action) {
@@ -2195,14 +2352,7 @@ - (void)keyDown:(NSEvent *)theEvent
         return;
     }
     
-    const Action* action = nullptr;
-    for (const auto& search: _actions) {
-        if (search.keyCode == keyCode) {
-            action = &search;
-            break;
-        }
-    }
-    
+    const Action* action = _data.actionFromKey(keyCode);
     if (!action) {
         [super keyDown:theEvent];
         //KLOGE("kram", "unknown UI element");
@@ -2238,7 +2388,7 @@ - (void)updateHudVisibility
     _hudLabel2.hidden = _hudHidden || !_showSettings->isHudShown;
 }
 
-- (void)setLoadedText:(string&)text
+void Data::setLoadedText(string& text)
 {
     text = "Loaded ";
 
@@ -2264,27 +2414,65 @@ - (void)setLoadedText:(string&)text
 }
 
 - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyDown
+{
+    ActionState actionState;
+    if (!_data.handleEventAction(action, isShiftKeyDown, actionState))
+        return false;
+        
+    // Do the leftover action work to call ObjC
+    if (action == _data._actionVertical) {
+        _buttonStack.orientation = _showSettings->isVerticalUI
+            ? NSUserInterfaceLayoutOrientationVertical
+            : NSUserInterfaceLayoutOrientationHorizontal;
+    }
+    else if (action == _data._actionHideUI) {
+        _buttonStack.hidden = _showSettings->isHideUI;
+    }
+    else if (action == _data._actionHud) {
+        [self updateHudVisibility];
+    }
+    else if (action == _data._actionInfo) {
+        if (_showSettings->isHudShown) {
+            
+            // also hide the file table, since this can be long
+            [self hideFileTable];
+        }
+    }
+    else if (action == _data._actionPlay) {
+        if (!action->isHidden) {
+            Renderer* renderer = (Renderer*)self.delegate;
+            renderer.playAnimations = _showSettings->isPlayAnimations;
+        }
+    }
+            
+    if (!actionState.hudText.empty()) {
+        [self setHudText:actionState.hudText.c_str()];
+    }
+
+    if (actionState.isChanged || actionState.isStateChanged) {
+        _data.updateUIControlState();
+    }
+
+    if (actionState.isChanged) {
+        self.needsDisplay = YES;
+    }
+    return true;
+}
+
+bool Data::handleEventAction(const Action* action, bool isShiftKeyDown, ActionState& actionState)
 {
     // Some data depends on the texture data (isSigned, isNormal, ..)
     bool isChanged = false;
     bool isStateChanged = false;
-
+    
     // TODO: fix isChanged to only be set when value changes
     // f.e. clamped values don't need to re-render
     string text;
-
-    Renderer* renderer = (Renderer*)self.delegate;
     
     if (action == _actionVertical) {
-        bool isVertical =
-            _buttonStack.orientation == NSUserInterfaceLayoutOrientationVertical;
-        isVertical = !isVertical;
-
-        _buttonStack.orientation = isVertical
-                                       ? NSUserInterfaceLayoutOrientationVertical
-                                       : NSUserInterfaceLayoutOrientationHorizontal;
-        text = isVertical ? "Vert UI" : "Horiz UI";
-
+        _showSettings->isVerticalUI = !_showSettings->isVerticalUI;
+        text = _showSettings->isVerticalUI ? "Vert UI" : "Horiz UI";
+        
         // just to update toggle state to Off
         isStateChanged = true;
     }
@@ -2293,18 +2481,18 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
         if (_noImageLoaded) {
             return true;
         }
-
-        _buttonStack.hidden = !_buttonStack.hidden;
-        text = _buttonStack.hidden ? "Hide UI" : "Show UI";
-
+        
+        _showSettings->isHideUI = !_showSettings->isHideUI;
+        text = _showSettings->isHideUI ? "Hide UI" : "Show UI";
+        
         // just to update toggle state to Off
         isStateChanged = true;
     }
-
+    
     else if (action == _actionR) {
         if (!action->isHidden) {
             TextureChannels& channels = _showSettings->channels;
-
+            
             if (channels == TextureChannels::ModeR001) {
                 channels = TextureChannels::ModeRGBA;
                 text = "Mask RGBA";
@@ -2315,12 +2503,12 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
             }
             isChanged = true;
         }
-
+        
     }
     else if (action == _actionG) {
         if (!action->isHidden) {
             TextureChannels& channels = _showSettings->channels;
-
+            
             if (channels == TextureChannels::Mode0G01) {
                 channels = TextureChannels::ModeRGBA;
                 text = "Mask RGBA";
@@ -2335,7 +2523,7 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
     else if (action == _actionB) {
         if (!action->isHidden) {
             TextureChannels& channels = _showSettings->channels;
-
+            
             if (channels == TextureChannels::Mode00B1) {
                 channels = TextureChannels::ModeRGBA;
                 text = "Mask RGBA";
@@ -2344,14 +2532,14 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
                 channels = TextureChannels::Mode00B1;
                 text = "Mask 00B1";
             }
-
+            
             isChanged = true;
         }
     }
     else if (action == _actionA) {
         if (!action->isHidden) {
             TextureChannels& channels = _showSettings->channels;
-
+            
             if (channels == TextureChannels::ModeAAA1) {
                 channels = TextureChannels::ModeRGBA;
                 text = "Mask RGBA";
@@ -2360,23 +2548,21 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
                 channels = TextureChannels::ModeAAA1;
                 text = "Mask AAA1";
             }
-
+            
             isChanged = true;
         }
         
     }
     else if (action == _actionPlay) {
         if (!action->isHidden) {
-           
-            renderer.playAnimations = !renderer.playAnimations;
             
-            text = renderer.playAnimations ? "Play" : "Pause";
-            isChanged = true;
+            _showSettings->isPlayAnimations = ! _showSettings->isPlayAnimations;
             
-            //[renderer updateAnimationState:self];
-        }
-        else {
-            //[renderer updateAnimationState:self];
+            //Renderer* renderer = (Renderer*)self.delegate;
+            //renderer.playAnimations = !renderer.playAnimations;
+            
+            text = _showSettings->isPlayAnimations ? "Play" : "Pause";
+            isChanged = true;
         }
     }
     else if (action == _actionShapeUVPreview) {
@@ -2387,9 +2573,6 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
         isChanged = true;
         
         _showSettings->uvPreviewFrames = 10;
-        
-        // also need to call this in display link, for when it reaches end
-        //[renderer updateAnimationState:self];
     }
     
     else if (action == _actionShapeChannel) {
@@ -2419,18 +2602,18 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
     else if (action == _actionHelp) {
         // display the chars for now
         text =
-            "1234-rgba, Preview, Debug, A-show all\n"
-            "Info, Hud, Reload, 0-fit\n"
-            "Checker, Grid\n"
-            "Wrap, 8-signed, 9-premul\n"
-            "Mip, Face, Y-array\n"
-            "↓-next item, →-next counterpart\n"
-            "Lighting, S-shape, C-shape channel\n";
+        "1234-rgba, Preview, Debug, A-show all\n"
+        "Info, Hud, Reload, 0-fit\n"
+        "Checker, Grid\n"
+        "Wrap, 8-signed, 9-premul\n"
+        "Mip, Face, Y-array\n"
+        "↓-next item, →-next counterpart\n"
+        "Lighting, S-shape, C-shape channel\n";
         
         // just to update toggle state to Off
         isStateChanged = true;
     }
-
+    
     else if (action == _actionFit) {
         float zoom;
         // fit image or mip
@@ -2441,18 +2624,18 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
             // fit to topmost image
             zoom = _showSettings->zoomFit;
         }
-
+        
         // This zoom needs to be checked against zoom limits
         // there's a cap on the zoom multiplier.
         // This is reducing zoom which expands the image.
         zoom *= 1.0f / (1 << _showSettings->mipNumber);
-
+        
         // even if zoom same, still do this since it resets the pan
         _showSettings->zoom = zoom;
-
+        
         _showSettings->panX = 0.0f;
         _showSettings->panY = 0.0f;
-
+        
         text = "Scale Image\n";
         if (doPrintPanZoom) {
             string tmp;
@@ -2462,19 +2645,19 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
                     _showSettings->panX, _showSettings->panY, _showSettings->zoom);
             text += tmp;
         }
-
+        
         isChanged = true;
     }
     // reload key (also a quick way to reset the settings)
     else if (action == _actionReload) {
         //bool success =
-            [self loadFile];
-
+        _delegate.loadFile();
+        
         // reload at actual size
         if (isShiftKeyDown) {
             _showSettings->zoom = 1.0f;
         }
-
+        
         // Name change if image
         if (_showSettings->isModel)
             text = "Reload Model\n";
@@ -2488,7 +2671,7 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
                     _showSettings->panX, _showSettings->panY, _showSettings->zoom);
             text += tmp;
         }
-
+        
         isChanged = true;
     }
     else if (action == _actionPreview) {
@@ -2505,7 +2688,7 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
     }
     // TODO: might switch c to channel cycle, so could just hit that
     // and depending on the content, it cycles through reasonable channel masks
-
+    
     // toggle checkerboard for transparency
     else if (action == _actionChecker) {
         if (action->isHidden) {
@@ -2515,92 +2698,92 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
             text += _showSettings->isCheckerboardShown ? "On" : "Off";
         }
     }
-
+    
     // toggle pixel grid when magnified above 1 pixel, can happen from mipmap
     // changes too
     else if (action == _actionGrid) {
         static int grid = 0;
         static const int kNumGrids = 7;
-
-        #define advanceGrid(g, dec) \
-            grid = (grid + kNumGrids + (dec ? -1 : 1)) % kNumGrids
-
+        
+#define advanceGrid(g, dec) \
+grid = (grid + kNumGrids + (dec ? -1 : 1)) % kNumGrids
+        
         // if block size is 1, then this shouldn't toggle
         _showSettings->isBlockGridShown = false;
         _showSettings->isAtlasGridShown = false;
         _showSettings->isPixelGridShown = false;
-
+        
         advanceGrid(grid, isShiftKeyDown);
-
+        
         static const uint32_t gridSizes[kNumGrids] = {
             0, 1, 4, 32, 64, 128, 256  // grid sizes
         };
-
+        
         if (grid == 0) {
             sprintf(text, "Grid Off");
         }
         else if (grid == 1) {
             _showSettings->isPixelGridShown = true;
-
+            
             sprintf(text, "Pixel Grid 1x1");
         }
         else if (grid == 2 && _showSettings->blockX > 1) {
             _showSettings->isBlockGridShown = true;
-
+            
             sprintf(text, "Block Grid %dx%d", _showSettings->blockX,
                     _showSettings->blockY);
         }
         else {
             _showSettings->isAtlasGridShown = true;
-
+            
             // want to be able to show altases tht have long entries derived from
             // props but right now just a square grid atlas
             _showSettings->gridSizeX = _showSettings->gridSizeY = gridSizes[grid];
-
+            
             sprintf(text, "Atlas Grid %dx%d", _showSettings->gridSizeX,
                     _showSettings->gridSizeY);
         }
-
+        
         isChanged = true;
     }
     else if (action == _actionShowAll) {
         if (!action->isHidden) {
             // TODO: have drawAllMips, drawAllLevels, drawAllLevelsAndMips
             _showSettings->isShowingAllLevelsAndMips =
-                !_showSettings->isShowingAllLevelsAndMips;
+            !_showSettings->isShowingAllLevelsAndMips;
             isChanged = true;
             text = "Show All ";
             text += _showSettings->isShowingAllLevelsAndMips ? "On" : "Off";
         }
     }
-
+    
     // toggle hud that shows name and pixel value under the cursor
     // this may require calling setNeedsDisplay on the UILabel as cursor moves
     else if (action == _actionHud) {
         _showSettings->isHudShown = !_showSettings->isHudShown;
-        [self updateHudVisibility];
+        //[self updateHudVisibility];
         // isChanged = true;
         text = "Hud ";
         text += _showSettings->isHudShown ? "On" : "Off";
         isStateChanged = true;
     }
-
+    
     // info on the texture, could request info from lib, but would want to cache
     // that info
     else if (action == _actionInfo) {
         if (_showSettings->isHudShown) {
             
             // also hide the file table, since this can be long
-            [self hideFileTable];
+            //[self hideFileTable];
             
             sprintf(text, "%s",
                     isShiftKeyDown ? _showSettings->imageInfoVerbose.c_str()
-                                   : _showSettings->imageInfo.c_str());
+                    : _showSettings->imageInfo.c_str());
         }
         // just to update toggle state to Off
         isStateChanged = true;
     }
-
+    
     // toggle wrap/clamp
     else if (action == _actionWrap) {
         // TODO: cycle through all possible modes (clamp, repeat, mirror-once,
@@ -2610,7 +2793,7 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
         text = "Wrap ";
         text += _showSettings->isWrap ? "On" : "Off";
     }
-
+    
     // toggle signed vs. unsigned
     else if (action == _actionSigned) {
         if (!action->isHidden) {
@@ -2620,7 +2803,7 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
             text += _showSettings->isSigned ? "On" : "Off";
         }
     }
-
+    
     // toggle premul alpha vs. unmul
     else if (action == _actionPremul) {
         if (!action->isHidden) {
@@ -2630,42 +2813,40 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
             text += _showSettings->doShaderPremul ? "On" : "Off";
         }
     }
-
+    
     else if (action == _actionItem || action == _actionPrevItem) {
         if (!action->isHidden) {
-            // invert shift key for prev, since it's reversese
+            // invert shift key for prev, since it's reverse
             if (action == _actionPrevItem) {
                 isShiftKeyDown = !isShiftKeyDown;
             }
             
-            if ([self advanceFile:!isShiftKeyDown]) {
+            if (advanceFile(!isShiftKeyDown)) {
                 //_hudHidden = true;
                 //[self updateHudVisibility];
-                [self setEyedropperText:""];
+                //[self setEyedropperText:""];
                 
                 isChanged = true;
-                
-                [self setLoadedText:text];
+            
+                setLoadedText(text);
             }
         }
     }
-
+    
     else if (action == _actionCounterpart || action == _actionPrevCounterpart) {
         if (!action->isHidden) {
-            // invert shift key for prev, since it's reversese
+            // invert shift key for prev, since it's reverse
             if (action == _actionPrevCounterpart) {
                 isShiftKeyDown = !isShiftKeyDown;
             }
-            if (_files.size() > 1) {
-                if ([self advanceCounterpart:!isShiftKeyDown]) {
-                    _hudHidden = true;
-                    [self updateHudVisibility];
-                    [self setEyedropperText:""];
-                    
-                    isChanged = true;
-                    
-                    [self setLoadedText:text];
-                }
+            if (advanceCounterpart(!isShiftKeyDown)) {
+                //_hudHidden = true;
+                //[self updateHudVisibility];
+                //[self setEyedropperText:""];
+                
+                isChanged = true;
+                
+                setLoadedText(text);
             }
         }
     }
@@ -2678,9 +2859,9 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
             isChanged = true;
         }
     }
-
+    
     // TODO: should probably have these wrap and not clamp to count limits
-
+    
     // mip up/down
     else if (action == _actionMip) {
         if (_showSettings->mipCount > 1) {
@@ -2689,14 +2870,14 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
             }
             else {
                 _showSettings->mipNumber =
-                    MIN(_showSettings->mipNumber + 1, _showSettings->mipCount - 1);
+                MIN(_showSettings->mipNumber + 1, _showSettings->mipCount - 1);
             }
             sprintf(text, "Mip %d/%d", _showSettings->mipNumber,
                     _showSettings->mipCount);
             isChanged = true;
         }
     }
-
+    
     else if (action == _actionFace) {
         // cube or cube array, but hit s to pick cubearray
         if (_showSettings->faceCount > 1) {
@@ -2705,14 +2886,14 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
             }
             else {
                 _showSettings->faceNumber =
-                    MIN(_showSettings->faceNumber + 1, _showSettings->faceCount - 1);
+                MIN(_showSettings->faceNumber + 1, _showSettings->faceCount - 1);
             }
             sprintf(text, "Face %d/%d", _showSettings->faceNumber,
                     _showSettings->faceCount);
             isChanged = true;
         }
     }
-
+    
     else if (action == _actionArray) {
         // slice
         if (_showSettings->sliceCount > 1) {
@@ -2721,7 +2902,7 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
             }
             else {
                 _showSettings->sliceNumber =
-                    MIN(_showSettings->sliceNumber + 1, _showSettings->sliceCount - 1);
+                MIN(_showSettings->sliceNumber + 1, _showSettings->sliceCount - 1);
             }
             sprintf(text, "Slice %d/%d", _showSettings->sliceNumber,
                     _showSettings->sliceCount);
@@ -2734,7 +2915,7 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
             }
             else {
                 _showSettings->arrayNumber =
-                    MIN(_showSettings->arrayNumber + 1, _showSettings->arrayCount - 1);
+                MIN(_showSettings->arrayNumber + 1, _showSettings->arrayCount - 1);
             }
             sprintf(text, "Array %d/%d", _showSettings->arrayNumber,
                     _showSettings->arrayCount);
@@ -2745,21 +2926,16 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
         // non-handled action
         return false;
     }
-
-    if (!text.empty()) {
-        [self setHudText:text.c_str()];
-    }
-
-    if (isChanged || isStateChanged) {
-        [self updateUIControlState];
-    }
-
-    if (isChanged) {
-        self.needsDisplay = YES;
-    }
+    
+    actionState.hudText = text;
+    actionState.isChanged = isChanged;
+    actionState.isStateChanged = isStateChanged;
+    
     return true;
 }
 
+
+
 // Note: docs state that drag&drop should be handled automatically by UTI setup
 // via openURLs but I find these calls are needed, or it doesn't work.  Maybe
 // need to register for NSURL instead of NSPasteboardTypeFileURL.  For example,
@@ -2808,12 +2984,13 @@ - (BOOL)performDragOperation:(id)sender
     return NO;
 }
 
+
+
 // Want to avoid Apple libs for things that have C++ equivalents.
 // simdjson without exceptions isn't super readable or safe looking.
-#define USE_SIMDJSON 1
-#if USE_SIMDJSON
+// TODO: see if simdjson is stable enough, using unsafe calls
 
-- (BOOL)loadAtlasFile:(const char*)filename
+bool Data::loadAtlasFile(const char* filename)
 {
     using namespace simdjson;
     
@@ -2915,136 +3092,11 @@ - (BOOL)loadAtlasFile:(const char*)filename
     // Can triangulate those, and use xatlas to pack those.
     // Also xatlas can flatten out a 3d model into a chart.
     
-    return YES;
-}
-
-#else
-
-/*
-// TODO: convert to using a C++ json lib like yyJson or simdJson
-// Then can move into libkram, and embed in the ktx/ktx2 metadata.
-- (BOOL)loadAtlasFile:(const char*)filename
-{
-    NSURL* url = [NSURL fileURLWithPath:[NSString stringWithUTF8String:filename]];
-    NSData* assetData = [NSData dataWithContentsOfURL:url];
-    
-    NSError* error = nil;
-    NSDictionary* rootObject = [NSJSONSerialization JSONObjectWithData:assetData options:NSJSONReadingFragmentsAllowed error:&error];
-    
-    if (error != nil) {
-        // TODO: avoid NSLog
-        NSLog(@"%@", error);
-        return NO;
-    }
-    
-    // Can use hover or a show all on these entries and names.
-    // Draw names on screen using system text in the upper left corner if 1
-    // if showing all, then show names across each mip level.  May want to
-    // snap to pixels on each mip level so can see overlap.
-    
-    _showSettings->atlas.clear();
-    
-    // TODO: this ObjC parser is insanely slow, dump it
-    // look into yyJson (dom) or sax parsers.
-    
-    
-    // TODO: support multiple atlases in one file, but need to then
-    // parse an store and apply to differently named textures.
-    
-    NSDictionary* atlasProps = rootObject;
-    
-    {
-        const char* atlasName = [atlasProps[@"name"] UTF8String];
-        
-        NSNumber* widthProp = atlasProps[@"width"];
-        NSNumber* heightProp = atlasProps[@"height"];
-        if (!heightProp) heightProp = widthProp;
-        
-        int width = [widthProp intValue];
-        int height = [heightProp intValue];
-        
-        int slice = [atlasProps[@"slice"] intValue];
-        
-        float uPad = 0.0f;
-        float vPad = 0.0f;
-        NSArray<NSNumber*>* pauvProp = atlasProps[@"paduv"];
-        if (pauvProp)
-        {
-            uPad = [pauvProp[0] floatValue];
-            vPad = [pauvProp[1] floatValue];
-        }
-        
-        NSArray<NSNumber*>* padpxProp = atlasProps[@"padpx"];
-        if (padpxProp)
-        {
-            uPad = [padpxProp[0] intValue];
-            vPad = [padpxProp[1] intValue];
-            
-            uPad /= width;
-            vPad /= height;
-        }
-        
-        NSDictionary* regions = atlasProps[@"regions"];
-        
-        for (NSDictionary* regionProps in regions)
-        {
-            const char* name = [regionProps[@"name"] UTF8String];
-            
-            float x = 0.0f;
-            float y = 0.0f;
-            float w = 0.0f;
-            float h = 0.0f;
-            
-            NSArray<NSNumber*>* rectuv = regionProps[@"ruv"];
-            if (rectuv) {
-                // Note: could convert pixel and mip0 size to uv.
-                // normalized uv make these easier to draw across all mips
-                x = [rectuv[0] floatValue];
-                y = [rectuv[1] floatValue];
-                w = [rectuv[2] floatValue];
-                h = [rectuv[3] floatValue];
-            }
-            
-            NSArray<NSNumber*>* rectpx = regionProps[@"rpx"];
-            if (rectpx) {
-                x = [rectpx[0] intValue];
-                y = [rectpx[1] intValue];
-                w = [rectpx[2] intValue];
-                h = [rectpx[3] intValue];
-                
-                // normalize to uv using the width/height
-                x /= width;
-                y /= height;
-                w /= width;
-                h /= height;
-            }
-            
-            // optional
-            // optional uv padding - need two values for non-square
-            // could be inherited by all elements to avoid redundancy
-            // optional horizontal and vertical orient
-            // optional slice for 2d arrays
-            
-            const char* verticalProp = "f"; // [regionProps[@"o"] UTF8String];
-            bool isVertical = verticalProp && verticalProp[0] == 't';
-            
-            Atlas atlas = {name, x,y, w,h, uPad,vPad, isVertical, (uint32_t)slice};
-            _showSettings->atlas.emplace_back(std::move(atlas));
-        }
-    }
-    
-    // TODO: also need to be able to bring in vector shapes
-    // maybe from svg or files written out from figma or photoshop.
-    // Can triangulate those, and use xatlas to pack those.
-    
-    return YES;
+    return true;
 }
-*/
-
-#endif
 
 // opens archive
-- (BOOL)openArchive:(const char *)zipFilename urlIndex:(int32_t)urlIndex
+bool Data::openArchive(const char * zipFilename, int32_t urlIndex)
 {
     // grow the array, ptrs so that existing mmaps aren't destroyed
     if (urlIndex >= _containers.size()) {
@@ -3072,7 +3124,7 @@ - (BOOL)openArchive:(const char *)zipFilename urlIndex:(int32_t)urlIndex
 }
 
 // lists archive into _files
-- (BOOL)listFilesInArchive:(int32_t)urlIndex
+bool Data::listFilesInArchive(int32_t urlIndex)
 {
     FileContainer& container = *_containers[urlIndex];
     ZipHelper& zip = container.zip;
@@ -3105,7 +3157,7 @@ - (BOOL)listFilesInArchive:(int32_t)urlIndex
 }
 
 // TODO: can simplify by storing counterpart id when file list is created
-- (BOOL)hasCounterpart:(BOOL)increment {
+bool Data::hasCounterpart(bool increment) {
     if (_files.size() <= 1) {
         return NO;
     }
@@ -3133,10 +3185,10 @@ - (BOOL)hasCounterpart:(BOOL)increment {
     return YES;
 }
 
-- (BOOL)advanceCounterpart:(BOOL)increment
-{
+bool Data::advanceCounterpart(bool increment) {
+    
     if (_files.size() <= 1) {
-        return NO;
+        return false;
     }
     
     // see if file has counterparts
@@ -3158,28 +3210,16 @@ - (BOOL)advanceCounterpart:(BOOL)increment
     string nextFilename = filenameNoExtension(nextFile.nameShort.c_str());
     
     if (currentFilename != nextFilename)
-        return NO;
+        return false;
     
     _fileIndex = nextFileIndex;
     
-    // set selection
-    [_tableView selectRowIndexes:[NSIndexSet indexSetWithIndex:_fileIndex] byExtendingSelection:NO];
-    [_tableView scrollRowToVisible:_fileIndex];
-    
-    // want it to respond to arrow keys
-    //[self.window makeFirstResponder: _tableView];
-    
-    // show the files table
-    [self showFileTable];
-    [self setEyedropperText:""];
-    
-    return [self loadFile];
+    return _delegate.loadFile(true);
 }
 
-- (BOOL)advanceFile:(BOOL)increment
-{
+bool Data::advanceFile(bool increment) {
     if (_files.empty()) {
-        return NO;
+        return false;
     }
     
     size_t numEntries = _files.size();
@@ -3189,25 +3229,22 @@ - (BOOL)advanceFile:(BOOL)increment
         _fileIndex += numEntries - 1;  // back 1
 
     _fileIndex = _fileIndex % numEntries;
+    
+    return _delegate.loadFile(true);
+}
 
+- (void)updateFileSelection
+{
     // set selection
-    [_tableView selectRowIndexes:[NSIndexSet indexSetWithIndex:_fileIndex] byExtendingSelection:NO];
-    [_tableView scrollRowToVisible:_fileIndex];
-    
-    // want it to respond to arrow keys
-    //[self.window makeFirstResponder: _tableView];
-    
-    // show the files table
-    [self showFileTable];
-    [self setEyedropperText:""];
-    
-    return [self loadFile];
+    uint32_t fileIndex = _data._fileIndex;
+    [_tableView selectRowIndexes:[NSIndexSet indexSetWithIndex:fileIndex] byExtendingSelection:NO];
+    [_tableView scrollRowToVisible:fileIndex];
 }
 
 - (BOOL)setImageFromSelection:(NSInteger)index {
-    if (!_files.empty()) {
-        if (_fileIndex != index) {
-            _fileIndex = index;
+    if (!_data._files.empty()) {
+        if (_data._fileIndex != index) {
+            _data._fileIndex = index;
             return [self loadFile];
         }
     }
@@ -3224,7 +3261,9 @@ - (BOOL)setShapeFromSelection:(NSInteger)index {
     return NO;
 }
 
-- (BOOL)findFilename:(const string&)filename
+
+
+bool Data::findFilename(const string& filename)
 {
     bool isFound = false;
     
@@ -3238,23 +3277,77 @@ - (BOOL)findFilename:(const string&)filename
     return isFound;
 }
 
-- (BOOL)isArchive
+bool Data::findFilenameShort(const string& filename)
+{
+    bool isFound = false;
+    
+    // linear search
+    for (const auto& search : _files) {
+        if (search.nameShort == filename) {
+            isFound = true;
+            break;
+        }
+    }
+    return isFound;
+}
+
+// rect here is expect xy, wh
+bool isPtInRect(float2 pt, float4 r)
 {
-    NSURL* url = _urls[_files[_fileIndex].urlIndex];
+    return all((pt >= r.xy) & (pt <= r.xy + r.zw));
+}
+
+const Atlas* Data::findAtlasAtCursor(float2 pt)
+{
+    const Atlas* atlas = nullptr;
+    
+    // TODO: rects are in uv, so need to convert pt
+    
+    // This might need to become an atlas array index instead of ptr
+    const Atlas* lastAtlas = _showSettings->lastAtlas;
+    
+    if (lastAtlas) {
+        if (isPtInRect(pt, lastAtlas->rect())) {
+            atlas = lastAtlas;
+        }
+    }
+    
+    if (!atlas) {
+        // linear search
+        for (const auto& search : _showSettings->atlas) {
+            if (isPtInRect(pt, search.rect())) {
+                atlas = &search;
+                break;
+            }
+        }
+        
+        _showSettings->lastAtlas = atlas;
+    }
+    
+    return atlas;
+}
+
+
+bool Data::isArchive() const
+{
+    NSArray<NSURL*>* urls_ = (NSArray<NSURL*>*)_delegate._urls;
+    NSURL* url = urls_[_files[_fileIndex].urlIndex];
     const char* filename = url.fileSystemRepresentation;
     return isSupportedArchiveFilename(filename);
 }
 
-- (BOOL)loadFile
+
+
+bool Data::loadFile()
 {
-    if ([self isArchive]) {
-        return [self loadFileFromArchive];
+    if (isArchive()) {
+        return loadFileFromArchive();
     }
-
+    
     // now lookup the filename and data at that entry
     const File& file = _files[_fileIndex];
     const char* filename = file.name.c_str();
-   
+    
     string fullFilename = filename;
     auto timestamp = FileHelper::modificationTimestamp(filename);
     
@@ -3266,7 +3359,15 @@ - (BOOL)loadFile
 #if USE_GLTF || USE_USD
     bool isModel = isSupportedModelFilename(filename);
     if (isModel) {
-        return [self loadModelFile:filename];
+        bool success = _delegate.loadModelFile(filename);
+        
+        if (success) {
+            // store the filename
+            _showSettings->lastFilename = filename;
+            _showSettings->lastTimestamp = timestamp;
+        }
+        
+        return success;
     }
 #endif
     
@@ -3274,7 +3375,7 @@ - (BOOL)loadFile
     if (!isSupportedFilename(filename)) {
         return NO;
     }
-
+    
     // Note: better to extract from filename instead of root of folder dropped
     // or just keep displaying full path of filename.
     
@@ -3283,13 +3384,13 @@ - (BOOL)loadFile
     vector<string> possibleNormalFilenames;
     string normalFilename;
     bool hasNormal = false;
-
+    
     TexContentType texContentType = findContentTypeFromFilename(filename);
     if (texContentType == TexContentTypeAlbedo) {
         findPossibleNormalMapFromAlbedoFilename(filename, possibleNormalFilenames);
-     
-       for (const auto& name: possibleNormalFilenames) {
-            hasNormal = [self findFilename:name];
+        
+        for (const auto& name: possibleNormalFilenames) {
+            hasNormal = findFilename(name);
             
             if (hasNormal) {
                 normalFilename = name;
@@ -3312,8 +3413,8 @@ - (BOOL)loadFile
         atlasFilename = atlasFilename.substr(0, dashPosStr - atlasFilename.c_str());
     }
     atlasFilename += "-atlas.json";
-    if ( [self findFilename:atlasFilename.c_str()]) {
-        if ([self loadAtlasFile:atlasFilename.c_str()]) {
+    if ( findFilename(atlasFilename.c_str())) {
+        if (loadAtlasFile(atlasFilename.c_str())) {
             hasAtlas = true;
         }
     }
@@ -3331,10 +3432,10 @@ - (BOOL)loadFile
     bool hasDiff = false;
     
     diffFilename += ".png";
-    if ( diffFilename != filename && [self findFilename:diffFilename.c_str()]) {
+    if ( diffFilename != filename && findFilename(diffFilename.c_str())) {
         // TODO: defer load until diff enabled
         //if ([self loadDiffFile:diffFilename.c_str()]) {
-            hasDiff = true;
+        hasDiff = true;
         //}
     }
     if (!hasDiff) {
@@ -3342,18 +3443,18 @@ - (BOOL)loadFile
     }
     
     //-------------------------------
-
+    
     KTXImage image;
     KTXImageData imageDataKTX;
-
+    
     KTXImage imageNormal;
     KTXImageData imageNormalDataKTX;
-
+    
     // this requires decode and conversion to RGBA8u
     if (!imageDataKTX.open(fullFilename.c_str(), image)) {
         return NO;
     }
-
+    
     if (hasNormal &&
         imageNormalDataKTX.open(normalFilename.c_str(), imageNormal)) {
         // shaders only pull from albedo + normal on these texture types
@@ -3367,39 +3468,76 @@ - (BOOL)loadFile
         }
     }
     
-    // Release any loading model textures
-    Renderer* renderer = (Renderer *)self.delegate;
-    [renderer releaseAllPendingTextures];
+    //---------------------------------
     
-    if (![renderer loadTextureFromImage:fullFilename.c_str()
-                              timestamp:timestamp
-                                  image:image
-                            imageNormal:hasNormal ? &imageNormal : nullptr
-                              isArchive:NO]) {
+    // Release any loading model textures
+//    Renderer* renderer = (Renderer *)self.delegate;
+//    [renderer releaseAllPendingTextures];
+//
+//    if (![renderer loadTextureFromImage:fullFilename.c_str()
+//                              timestamp:timestamp
+//                                  image:image
+//                            imageNormal:hasNormal ? &imageNormal : nullptr
+//                              isArchive:NO]) {
+//        return false;
+//    }
+    
+    if (!_delegate.loadTextureFromImage(fullFilename.c_str(), (double)timestamp, image, hasNormal ? &imageNormal : nullptr, false)) {
         return NO;
     }
+    
+    // store the filename
+    _showSettings->lastFilename = filename;
+    _showSettings->lastTimestamp = timestamp;
+    
+    return true;
+}
 
-    //-------------------------------
-
+-(BOOL)loadFile
+{
+    // lookup the filename and data at that entry
+    const File& file = _data._files[_data._fileIndex];
+    const char* filename = file.nameShort.c_str();
+    
+    setErrorLogCapture( true );
+    
+    bool success = _data.loadFile();
+    if (!success) {
+        string errorText;
+        getErrorLogCaptureText(errorText);
+        setErrorLogCapture(false);
+        
+        string finalErrorText;
+        append_sprintf(finalErrorText, "Could not load from file:\n %s\n",
+                       filename);
+        finalErrorText += errorText;
+        
+        [self setHudText:finalErrorText.c_str()];
+        return NO;
+    }
+    setErrorLogCapture( false );
+    
+    // -------------
     string title = _showSettings->windowTitleString(filename);
     self.window.title = [NSString stringWithUTF8String:title.c_str()];
-
+    
     // doesn't set imageURL or update the recent document menu
-
+    
     // show the controls
-    if (_noImageLoaded) {
+    if (!_data._noImageLoaded) {
+        _showSettings->isHideUI = false;
         _buttonStack.hidden = NO;  // show controls
-        _noImageLoaded = NO;
+        _data._noImageLoaded = false;
     }
-
+    
     // show/hide button
-    [self updateUIAfterLoad];
+    _data.updateUIAfterLoad();
     
     self.needsDisplay = YES;
     return YES;
 }
 
-- (BOOL)loadFileFromArchive
+bool Data::loadFileFromArchive()
 {
     // now lookup the filename and data at that entry
     const File& file = _files[_fileIndex];
@@ -3483,41 +3621,33 @@ - (BOOL)loadFileFromArchive
         }
     }
 
-    Renderer* renderer = (Renderer *)self.delegate;
-    [renderer releaseAllPendingTextures];
+    //---------------------------------
     
-    if (![renderer loadTextureFromImage:fullFilename.c_str()
-                              timestamp:(double)timestamp
-                                  image:image
-                            imageNormal:hasNormal ? &imageNormal : nullptr
-                              isArchive:YES]) {
+    if (!_delegate.loadTextureFromImage(fullFilename.c_str(), (double)timestamp, image, hasNormal ? &imageNormal : nullptr, true)) {
         return NO;
     }
+//    Renderer* renderer = (Renderer *)self.delegate;
+//    [renderer releaseAllPendingTextures];
+//
+//    if (![renderer loadTextureFromImage:fullFilename.c_str()
+//                              timestamp:(double)timestamp
+//                                  image:image
+//                            imageNormal:hasNormal ? &imageNormal : nullptr
+//                              isArchive:YES]) {
+//        return NO;
+//    }
 
     //---------------------------------
     
-    NSURL* archiveURL = _urls[file.urlIndex];
+    NSArray<NSURL*>* urls_ = (NSArray<NSURL*>*)_delegate._urls;
+    NSURL* archiveURL = urls_[file.urlIndex];
     _archiveName = toFilenameShort(archiveURL.fileSystemRepresentation);
     
-    string title = _showSettings->windowTitleString(filename);
-    self.window.title = [NSString stringWithUTF8String:title.c_str()];
-
-    // doesn't set imageURL or update the recent document menu
-
-    // show the controls
-    if (_noImageLoaded) {
-        _buttonStack.hidden = NO;  // show controls
-        _noImageLoaded = NO;
-    }
-
-    // show/hide button
-    [self updateUIAfterLoad];
-    
-    self.needsDisplay = YES;
     return YES;
 }
 
--(void)listArchivesInFolder:(NSURL*)url archiveFiles:(vector<File>&)archiveFiles skipSubdirs:(BOOL)skipSubdirs
+
+void Data::listArchivesInFolder(NSURL* url, vector<File>& archiveFiles, bool skipSubdirs)
 {
     NSDirectoryEnumerationOptions options = NSDirectoryEnumerationSkipsHiddenFiles;
     if (skipSubdirs)
@@ -3549,7 +3679,7 @@ -(void)listArchivesInFolder:(NSURL*)url archiveFiles:(vector<File>&)archiveFiles
     }
 }
 
--(void)listFilesInFolder:(NSURL*)url urlIndex:(int32_t)urlIndex skipSubdirs:(BOOL)skipSubdirs
+void Data::listFilesInFolder(NSURL* url, int32_t urlIndex, bool skipSubdirs)
 {
     NSDirectoryEnumerationOptions options = NSDirectoryEnumerationSkipsHiddenFiles;
     if (skipSubdirs)
@@ -3591,24 +3721,13 @@ -(void)listFilesInFolder:(NSURL*)url urlIndex:(int32_t)urlIndex skipSubdirs:(BOO
     }
 }
 
-bool isSupportedJsonFilename(const char* filename)
-{
-    return endsWith(filename, "-atlas.json");
-}
 
-    
--(void)loadFilesFromUrls:(NSArray<NSURL*>*)urls
+void Data::loadFilesFromUrls(NSArray<NSURL*>* urls, bool skipSubdirs)
 {
-    // don't recurse down subdirs, if cmd key held during drop or recent menu item selection
-    bool skipSubdirs = ( _modifierFlags & NSEventModifierFlagCommand ) != 0;
-    
-    // reverse logic, so have to hold cmd to see subfolders
-    skipSubdirs = !skipSubdirs;
-    
     // Using a member for archives, so limited to one archive in a drop
     // but that's probably okay for now.  Add a separate array of open
     // archives if want > 1.
-
+    
     // copy the existing files list
     string existingFilename;
     if (_fileIndex < (int32_t)_files.size())
@@ -3632,8 +3751,8 @@ -(void)loadFilesFromUrls:(NSArray<NSURL*>*)urls
         const char* filename = url.fileSystemRepresentation;
         
         if (isSupportedArchiveFilename(filename) &&
-            [self openArchive:filename urlIndex:urlIndex] &&
-            [self listFilesInArchive:urlIndex])
+            openArchive(filename, urlIndex) &&
+            listFilesInArchive(urlIndex))
         {
             [urlsExtracted addObject:url];
             urlIndex++;
@@ -3641,7 +3760,7 @@ -(void)loadFilesFromUrls:(NSArray<NSURL*>*)urls
         else if (url.hasDirectoryPath) {
             
             // this first loads only models, then textures if only those
-            [self listFilesInFolder:url urlIndex:urlIndex skipSubdirs:skipSubdirs];
+            listFilesInFolder(url, urlIndex, skipSubdirs);
             
             // could skip if nothing added
             [urlsExtracted addObject:url];
@@ -3649,25 +3768,25 @@ -(void)loadFilesFromUrls:(NSArray<NSURL*>*)urls
             
             // handle archives within folder
             vector<File> archiveFiles;
-            [self listArchivesInFolder:url archiveFiles:archiveFiles skipSubdirs:skipSubdirs];
-        
+            listArchivesInFolder(url, archiveFiles, skipSubdirs);
+            
             for (const File& archiveFile: archiveFiles) {
                 const char* archiveFilename = archiveFile.name.c_str();
-                if ([self openArchive:archiveFilename urlIndex:urlIndex] &&
-                    [self listFilesInArchive:urlIndex]) {
+                if (openArchive(archiveFilename, urlIndex) &&
+                    listFilesInArchive(urlIndex)) {
                     
                     NSURL* urlArchive = [NSURL fileURLWithPath:[NSString stringWithUTF8String:archiveFilename]];
                     [urlsExtracted addObject:urlArchive];
                     urlIndex++;
                 }
-
+                
             }
         }
         else if (isSupportedFilename(filename)
 #if USE_GLTF
                  || isSupportedModelFilename(filename)
 #endif
-            ) {
+                 ) {
             _files.emplace_back(File(filename, urlIndex));
             
             [urlsExtracted addObject:url];
@@ -3679,20 +3798,16 @@ -(void)loadFilesFromUrls:(NSArray<NSURL*>*)urls
             [urlsExtracted addObject:url];
             urlIndex++;
         }
-    
+        
     }
     
-    // TODO: sort by urlIndex
+    // sort them by short filename
 #if USE_EASTL
     NAMESPACE_STL::quick_sort(_files.begin(), _files.end());
 #else
     std::sort(_files.begin(), _files.end());
 #endif
     
-    // preserve old file selection
-
-    _urls = urlsExtracted;
-   
     // preserve filename before load, and restore that index, by finding
     // that name in refreshed folder list
     _fileIndex = 0;
@@ -3705,6 +3820,17 @@ -(void)loadFilesFromUrls:(NSArray<NSURL*>*)urls
         }
     }
     
+    // preserve old file selection
+    _delegate._urls = urlsExtracted;
+}
+
+-(void)loadFilesFromUrls:(NSArray<NSURL*>*)urls skipSubdirs:(BOOL)skipSubdirs
+{
+    // C++ to build list
+    _data.loadFilesFromUrls(urls, skipSubdirs);
+    
+    //-------------------
+    
     NSMutableDictionary* attribsOff = [NSMutableDictionary dictionaryWithObjectsAndKeys:
         //[NSFont systemFontOfSize:64.0],NSFontAttributeName,
         [NSColor whiteColor],NSForegroundColorAttributeName,
@@ -3714,7 +3840,7 @@ -(void)loadFilesFromUrls:(NSArray<NSURL*>*)urls
     
     // add the files into the file list
     [_tableViewController.items removeAllObjects];
-    for (const auto& file: _files) {
+    for (const auto& file: _data._files) {
         const char* filenameShort = file.nameShort.c_str();
         
         NSString* fileMenuText = [NSString stringWithUTF8String: filenameShort];
@@ -3723,15 +3849,12 @@ -(void)loadFilesFromUrls:(NSArray<NSURL*>*)urls
         [_tableViewController.items addObject:fileMenuStr];
     }
     
-    uint32_t savedFileIndex = _fileIndex;
-    // This calls selectionDidChange which then sets _fileIndex = 0;
+    // reloadData calls selectionDidChange which then sets _fileIndex = 0;
+    uint32_t fileIndex = _data._fileIndex;
     [_tableView reloadData];
-    _fileIndex = savedFileIndex;
-    
-    // Set the active file
-    [_tableView selectRowIndexes:[NSIndexSet indexSetWithIndex:_fileIndex] byExtendingSelection:NO];
-    [_tableView scrollRowToVisible:_fileIndex];
+    _data._fileIndex = fileIndex;
     
+    [self updateFileSelection];
     [self hideFileTable];
     
     // add it to recent docs (only 10 slots)
@@ -3775,7 +3898,10 @@ - (BOOL)loadTextureFromURLs:(NSArray<NSURL*>*)urls
     if (isSingleFile)
         [self hideFileTable];
 
-    [self loadFilesFromUrls:urls];
+    // only recurse down subdirs if cmd key held during drop or recent menu item selection
+    bool skipSubdirs = ( _modifierFlags & NSEventModifierFlagCommand ) == 0;
+    
+    [self loadFilesFromUrls:urls skipSubdirs:skipSubdirs];
     
     BOOL success = [self loadFile];
     return success;
@@ -3795,58 +3921,26 @@ -(BOOL)loadModelFile:(const char*)filename
     // save out a scene with all of them in a single scene.  But that should
     // probably reference original content in case it's updated.
     
-    Renderer* renderer = (Renderer *)self.delegate;
-    [renderer releaseAllPendingTextures];
-    
-    setErrorLogCapture(true);
+    // const char* filenameShort = toFilenameShort(filename);
+    //double timestamp = FileHelper::modificationTimestamp(filename);
     
-    const char* filenameShort = toFilenameShort(filename);
-    double timestamp = FileHelper::modificationTimestamp(filename);
+    // TODO: this used to compare filename timestamp?
     
     // This code only takes url, so construct one
-    NSURL* fileURL =
-        [NSURL fileURLWithPath:[NSString stringWithUTF8String:filename]];
-    BOOL success = [renderer loadModel:fileURL];
+    Renderer* renderer = (Renderer *)self.delegate;
+    [renderer releaseAllPendingTextures];
+    BOOL success = [renderer loadModel:filename];
     
     // TODO: split this off to a completion handler, since loadModel is async
     // and should probably also have a cancellation (or counter)
     
     // show/hide button
-    [self updateUIAfterLoad];
+    _data.updateUIAfterLoad();
     
     if (!success) {
-        string errorText;
-        getErrorLogCaptureText(errorText);
-        setErrorLogCapture(false);
-        
-        string finalErrorText;
-        append_sprintf(finalErrorText, "Could not load model from file:\n %s\n",
-                       filename);
-        finalErrorText += errorText;
-
-        [self setHudText:finalErrorText.c_str()];
-        
         return NO;
     }
-    setErrorLogCapture(false);
-
-    // was using subtitle, but that's macOS 11.0 feature.
-    string title = "kramv - ";
-    title += filenameShort;
-    self.window.title = [NSString stringWithUTF8String:title.c_str()];
     
-    // show the controls
-    if (_noImageLoaded) {
-        _buttonStack.hidden = NO;  // show controls
-        _noImageLoaded = NO;
-    }
-
-    // store the filename
-    _showSettings->lastFilename = filename;
-    _showSettings->lastTimestamp = timestamp;
-    
-    self.needsDisplay = YES;
-
     return success;
 #else
     return NO;
@@ -3863,38 +3957,6 @@ - (void)concludeDragOperation:(id)sender
     // did setNeedsDisplay, but already doing that in loadTextureFromURL
 }
 
-// this doesn't seem to enable New.  Was able to get "Open" to highlight by
-// setting NSDocument as class for doc types.
-// https://developer.apple.com/library/archive/documentation/Cocoa/Conceptual/EventOverview/EventArchitecture/EventArchitecture.html
-#if 0
-/*
-// "New"" calls this
-- (__kindof NSDocument *)openUntitledDocumentAndDisplay:(BOOL)displayDocument
-                                                  error:(NSError * _Nullable *)outError
-{
-    // TODO: this should add an empty MyMTKView and can drag/drop to that.
-    // Need to track images and data dropped per view then.
-    return nil;
-}
-
-// "Open File" calls this
-- (void)openDocumentWithContentsOfURL:(NSURL *)url
-                              display:(BOOL)displayDocument
-                    completionHandler:(void (^)(NSDocument *document, BOOL documentWasAlreadyOpen, NSError *error))completionHandler
-{
-    [self loadTextureFromURL:url];
-}
-
-- (IBAction)openDocument {
-    // calls openDocumentWithContentsOfURL above
-}
-
-- (IBAction)newDocument {
-    // calls openUntitledDocumentAndDisplay above
-}
-*/
-#endif
-
 - (void)tableViewSelectionDidChange:(NSNotification *)notification
 {
     if (notification.object == _tableView)
@@ -3990,6 +4052,7 @@ - (void)viewDidLoad
     // drawableSize this was causing all sorts of inconsistencies
     [_renderer mtkView:_view drawableSizeWillChange:_view.drawableSize];
 
+    // ObjC++ delegate
     _view.delegate = _renderer;
 }
 
@@ -3997,6 +4060,49 @@ - (void)viewDidLoad
 
 @end
 
+bool DataDelegate::loadFile(bool clear)
+{
+    MyMTKView* view_ = (MyMTKView*)view;
+    
+    if (clear) {
+        // set selection
+        [view_ updateFileSelection];
+        
+        // want it to respond to arrow keys
+        //[self.window makeFirstResponder: _tableView];
+        
+        // show the files table
+        [view_ showFileTable];
+        [view_ setEyedropperText:""];
+    }
+    
+    return [view_ loadFile];
+}
+
+bool DataDelegate::loadModelFile(const char* filename)
+{
+    MyMTKView* view_ = (MyMTKView*)view;
+    return [view_ loadModelFile:filename];
+}
+
+bool DataDelegate::loadTextureFromImage(const char* fullFilename, double timestamp, KTXImage& image, KTXImage* imageNormal, bool isArchive)
+{
+    MyMTKView* view_ = (MyMTKView*)view;
+    Renderer* renderer = (Renderer *)view_.delegate;
+    [renderer releaseAllPendingTextures];
+    
+    if (![renderer loadTextureFromImage:fullFilename
+                              timestamp:timestamp
+                                  image:image
+                            imageNormal:imageNormal
+                              isArchive:isArchive]) {
+        return false;
+    }
+    
+    return true;
+}
+
+
 //-------------
 
 int main(int argc, const char *argv[])

From 75877a7d191fe7e317cd8c320704ee4f62cd3a41 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 31 Dec 2022 11:41:14 -0800
Subject: [PATCH 394/901] kramv - split out more of the C++ code

Some bridge ops to convert id to/from void*.
Moved matrices out of the renderer and into Data class.
---
 gtlf/GLTFMTL/Source/GLTFMTLRenderer.m |    4 +-
 kramv/KramRenderer.h                  |    8 +-
 kramv/KramRenderer.mm                 |  347 +---
 kramv/KramViewerBase.cpp              | 2439 +++++++++++++++++++++-
 kramv/KramViewerBase.h                |  287 +++
 kramv/KramViewerMain.mm               | 2720 ++-----------------------
 libkram/kram/KramFileHelper.cpp       |   11 +
 libkram/kram/KramFileHelper.h         |    2 +
 8 files changed, 2963 insertions(+), 2855 deletions(-)

diff --git a/gtlf/GLTFMTL/Source/GLTFMTLRenderer.m b/gtlf/GLTFMTL/Source/GLTFMTLRenderer.m
index ee7bc935..8293c39b 100644
--- a/gtlf/GLTFMTL/Source/GLTFMTLRenderer.m
+++ b/gtlf/GLTFMTL/Source/GLTFMTLRenderer.m
@@ -101,8 +101,8 @@ - (instancetype)initWithDevice:(id<MTLDevice>)device {
         
         //_commandQueue = [_device newCommandQueue];
         
-        _viewMatrix = matrix_identity_float4x4;
-        _projectionMatrix = matrix_identity_float4x4;
+        //_viewMatrix = matrix_identity_float4x4;
+        //_projectionMatrix = matrix_identity_float4x4;
 
         _drawableSize = CGSizeMake(1, 1);
         _colorPixelFormat = MTLPixelFormatBGRA8Unorm;
diff --git a/kramv/KramRenderer.h b/kramv/KramRenderer.h
index 5cc1c975..7bdf082c 100644
--- a/kramv/KramRenderer.h
+++ b/kramv/KramRenderer.h
@@ -31,6 +31,7 @@
 
 namespace kram {
 class ShowSettings;
+class Data;
 class KTXImage;
 }
 
@@ -45,7 +46,8 @@ class KTXImage;
 
 - (nonnull instancetype)initWithMetalKitView:(nonnull MTKView *)view
                                     settings:
-                                        (nonnull kram::ShowSettings *)settings;
+                                        (nonnull kram::ShowSettings *)settings
+                                    data:(nonnull kram::Data *)data;
 
 - (BOOL)loadTextureFromImage:(nonnull const char *)fullFilenameString
                    timestamp:(double)timestamp
@@ -55,10 +57,6 @@ class KTXImage;
 
 - (BOOL)loadTexture:(nonnull NSURL *)url;
 
-- (simd::float4x4)computeImageTransform:(float)panX
-                                   panY:(float)panY
-                                   zoom:(float)zoom;
-
 - (BOOL)hotloadShaders:(nonnull const char *)filename;
 
 
diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 05b43406..8e9b87ca 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -165,16 +165,7 @@ @implementation Renderer {
 
     uint8_t _uniformBufferIndex;
 
-    float4x4 _projectionMatrix;
-
-    // 2d versions
-    float4x4 _viewMatrix;
-    float4x4 _modelMatrix;
-
-    // 3d versions
-    float4x4 _viewMatrix3D;
-    float4x4 _modelMatrix3D;
-
+   
     // float _rotation;
     KramLoader* _loader;
     MTKMesh* _mesh;
@@ -195,6 +186,7 @@ @implementation Renderer {
     ViewFramebufferData _viewFramebuffer;
 
     ShowSettings* _showSettings;
+    Data* _data;
     
 #if USE_GLTF
     KramGLTFTextureLoader* _textureLoader;
@@ -215,11 +207,12 @@ @implementation Renderer {
 
 - (nonnull instancetype)initWithMetalKitView:(nonnull MTKView *)view
                                     settings:(nonnull ShowSettings *)settings
+                                    data:(nonnull Data*)data
 {
     self = [super init];
     if (self) {
         _showSettings = settings;
-
+        _data = data;
         _device = view.device;
 
         _loader = [KramLoader new];
@@ -280,7 +273,7 @@ - (void)_createSamplers
     samplerDescriptor.sAddressMode = MTLSamplerAddressModeClampToEdge;
     samplerDescriptor.tAddressMode = MTLSamplerAddressModeClampToEdge;
     samplerDescriptor.rAddressMode = MTLSamplerAddressModeClampToEdge;
-    samplerDescriptor.label = @"colorMapSamplerNearsetEdge";
+    samplerDescriptor.label = @"colorMapSamplerNearestEdge";
 
     _colorMapSamplerNearestEdge =
         [_device newSamplerStateWithDescriptor:samplerDescriptor];
@@ -804,75 +797,7 @@ - (void)unloadModel
 #endif
 }
 
-// TODO: remove this
-//- (void)updateProjTransform
-//{
-//    // float aspect = size.width / (float)size.height;
-//    //_projectionMatrix = perspective_rhs(45.0f * (M_PI / 180.0f), aspect, 0.1f,
-//    //100.0f);
-//    _projectionMatrix =
-//        orthographic_rhs(_showSettings->viewSizeX, _showSettings->viewSizeY, 0.1f,
-//                         100000.0f, _showSettings->isReverseZ);
-//
-//    // DONE: adjust zoom to fit the entire image to the window
-//    _showSettings->zoomFit =
-//        MIN((float)_showSettings->viewSizeX, (float)_showSettings->viewSizeY) /
-//        MAX(1, MAX((float)_showSettings->imageBoundsX,
-//                   (float)_showSettings->imageBoundsY));
-//
-//    // already using drawableSize which includes scale
-//    // TODO: remove contentScaleFactor of view, this can be 1.0 to 2.0f
-//    // why does this always report 2x even when I change monitor res.
-//    //_showSettings->zoomFit /= _showSettings->viewContentScaleFactor;
-//}
-
-- (void)updateProjTransform
-{
-    // Want to move to always using perspective even for 2d images, but still more math
-    // to work out to keep zoom to cursor working.
-#if USE_PERSPECTIVE
-    float aspect = _showSettings->viewSizeX /  (float)_showSettings->viewSizeY;
-    _projectionMatrix = perspective_rhs(90.0f * (M_PI / 180.0f), aspect, 0.1f, 100000.0f, _showSettings->isReverseZ);
-
-    // This was used to reset zoom to a baseline that had a nice zoom.  But little connected to it now.
-    // Remember with rotation, the bounds can hit the nearClip.  Note all shapes are 0.5 radius,
-    // so at 1 this is 2x to leave gap around the shape for now.
-    float shapeHeightInY = 1;
-    _showSettings->zoomFit = shapeHeightInY; // / (float)_showSettings->viewSizeY;
-
-#else
-
-    if (_showSettings->isModel) {
-        float aspect = _showSettings->viewSizeX /  (float)_showSettings->viewSizeY;
-        _projectionMatrix = perspective_rhs(90.0f * (M_PI / 180.0f), aspect, 0.1f, 100000.0f, _showSettings->isReverseZ);
 
-        _showSettings->zoomFit = 1;
-    }
-    else {
-        _projectionMatrix =
-            orthographic_rhs(_showSettings->viewSizeX, _showSettings->viewSizeY, 0.1f,
-                             100000.0f, _showSettings->isReverseZ);
-
-        // DONE: adjust zoom to fit the entire image to the window
-        _showSettings->zoomFit =
-            MIN((float)_showSettings->viewSizeX, (float)_showSettings->viewSizeY) /
-            MAX(1, MAX((float)_showSettings->imageBoundsX,
-                       (float)_showSettings->imageBoundsY));
-        
-        static bool useImageAndViewBounds = true;
-        if (useImageAndViewBounds) {
-            float invWidth = 1.0f / MAX(1.0f, (float)_showSettings->imageBoundsX);
-            float invHeight = 1.0f / MAX(1.0f, (float)_showSettings->imageBoundsY);
-
-            // DONE: adjust zoom to fit the entire image to the window
-            // the best fit depends on dimension of image and window
-            _showSettings->zoomFit =
-                MIN( (float)_showSettings->viewSizeX * invWidth,
-                     (float)_showSettings->viewSizeY * invHeight);
-        }
-    }
-#endif
-}
     
 - (void)_createMeshRect:(float)aspectRatioXToY
 {
@@ -1181,8 +1106,9 @@ - (BOOL)loadTextureFromImage:(nonnull const char *)fullFilenameString
             _normalMap = normalTexture;
         }
 
-        [self updateImageSettings:fullFilename
-                            image:image];
+        // this is the actual format, may have been decoded
+        MyMTLPixelFormat format = (MyMTLPixelFormat)_colorMap.pixelFormat;
+       _data->updateImageSettings(fullFilename, image, format);
     }
 
     [self resetSomeImageSettings:isTextureNew];
@@ -1256,8 +1182,8 @@ - (BOOL)loadTexture:(nonnull NSURL *)url
             _normalMap = nil;
         }
 
-        [self updateImageSettings:fullFilename
-                            image:image];
+        MyMTLPixelFormat format = (MyMTLPixelFormat)_colorMap.pixelFormat;
+        _data->updateImageSettings(fullFilename, image, format);
     }
 
     [self resetSomeImageSettings:isTextureNew];
@@ -1265,199 +1191,17 @@ - (BOOL)loadTexture:(nonnull NSURL *)url
     return YES;
 }
 
-// only called on new or modstamp-changed image
-- (void)updateImageSettings:(const string &)fullFilename
-                      image:(KTXImage &)image
-{
-    _showSettings->isModel = false;
-
-    // this is the actual format, may have been decoded
-    id<MTLTexture> texture = _colorMap;
-    MyMTLPixelFormat format = (MyMTLPixelFormat)texture.pixelFormat;
-
-    // format may be trancoded to gpu-friendly format
-    MyMTLPixelFormat originalFormat = image.pixelFormat;
-
-    _showSettings->blockX = image.blockDims().x;
-    _showSettings->blockY = image.blockDims().y;
-
-    _showSettings->isSigned = isSignedFormat(format);
-    
-    TexContentType texContentType = findContentTypeFromFilename(fullFilename.c_str());
-    _showSettings->texContentType = texContentType;
-    //_showSettings->isSDF = isSDF;
-
-    // textures are already premul, so don't need to premul in shader
-    // should really have 3 modes, unmul, default, premul
-    bool isPNG = isPNGFilename(fullFilename.c_str());
-
-    _showSettings->isPremul = image.isPremul();
-    _showSettings->doShaderPremul = false;
-    if (texContentType == TexContentTypeAlbedo && isPNG) {
-        _showSettings->doShaderPremul =
-            true;  // convert to premul in shader, so can see other channels
-    }
-
-    int32_t numChannels = numChannelsOfFormat(originalFormat);
-    _showSettings->numChannels = numChannels;
-
-    // TODO: identify if texture holds normal data from the props
-    // have too many 4 channel normals that shouldn't swizzle like this
-    // kramTextures.py is using etc2rg on iOS for now, and not astc.
-
-    _showSettings->isSwizzleAGToRG = false;
-
-    // For best sdf and normal reconstruct from ASTC or BC3, must use RRR1 and
-    // GGGR or RRRG BC1nm multiply r*a in the shader, but just use BC5 anymore.
-    //    if (isASTCFormat(originalFormat) && isNormal) {
-    //        // channels after = "ag01"
-    //        _showSettings->isSwizzleAGToRG = true;
-    //    }
-
-    // can derive these from texture queries
-    _showSettings->mipCount = (int32_t)image.mipLevels.size();
-    _showSettings->faceCount = (image.textureType == MyMTLTextureTypeCube ||
-                                image.textureType == MyMTLTextureTypeCubeArray)
-                                   ? 6
-                                   : 0;
-    _showSettings->arrayCount = (int32_t)image.header.numberOfArrayElements;
-    _showSettings->sliceCount = (int32_t)image.depth;
-
-    _showSettings->imageBoundsX = (int32_t)image.width;
-    _showSettings->imageBoundsY = (int32_t)image.height;
-}
-
-float zoom3D = 1.0f;
 
 - (void)resetSomeImageSettings:(BOOL)isNewFile
 {
-    // only reset these on new texture, but have to revalidate
-    if (isNewFile) {
-        // then can manipulate this after loading
-        _showSettings->mipNumber = 0;
-        _showSettings->faceNumber = 0;
-        _showSettings->arrayNumber = 0;
-        _showSettings->sliceNumber = 0;
-
-        _showSettings->channels = TextureChannels::ModeRGBA;
-
-        // wish could keep existing setting, but new texture might not
-        // be supported debugMode for new texture
-        _showSettings->debugMode = DebugMode::DebugModeNone;
-
-        _showSettings->shapeChannel = ShapeChannel::ShapeChannelNone;
-    }
-    else {
-        // reloaded file may have different limits
-        _showSettings->mipNumber =
-            std::min(_showSettings->mipNumber, _showSettings->mipCount);
-        _showSettings->faceNumber =
-            std::min(_showSettings->faceNumber, _showSettings->faceCount);
-        _showSettings->arrayNumber =
-            std::min(_showSettings->arrayNumber, _showSettings->arrayCount);
-        _showSettings->sliceNumber =
-            std::min(_showSettings->sliceNumber, _showSettings->sliceCount);
-    }
-
-    [self updateProjTransform];
-
+    _data->resetSomeImageSettings(isNewFile);
+    
     // the rect is ar:1 for images
     float aspectRatioXtoY = _showSettings->imageAspectRatio();
     [self _createMeshRect:aspectRatioXtoY];
-
-    // this controls viewMatrix (global to all visible textures)
-    _showSettings->panX = 0.0f;
-    _showSettings->panY = 0.0f;
-
-    _showSettings->zoom = _showSettings->zoomFit;
-
-    // Y is always 1.0 on the plane, so scale to imageBoundsY
-    // plane is already a non-uniform size, so can keep uniform scale
-    
-    // have one of these for each texture added to the viewer
-    //float scaleX = MAX(1, _showSettings->imageBoundsX);
-    float scaleY = MAX(1, _showSettings->imageBoundsY);
-    float scaleX = scaleY;
-    float scaleZ = scaleY;
-
-    _modelMatrix =
-        float4x4(float4m(scaleX, scaleY, scaleZ, 1.0f)); // uniform scale
-    _modelMatrix = _modelMatrix *
-                   matrix4x4_translation(0.0f, 0.0f, -1.0);  // set z=-1 unit back
-
-    // uniform scaled 3d primitive
-    float scale = scaleY; // MAX(scaleX, scaleY);
-
-    // store the zoom into thew view matrix
-    // fragment tangents seem to break down at high model scale due to precision
-    // differences between worldPos and uv
-//    static bool useZoom3D = false;
-//    if (useZoom3D) {
-//        zoom3D = scale;  // * _showSettings->viewSizeX / 2.0f;
-//        scale = 1.0;
-//    }
-
-    _modelMatrix3D = float4x4(float4m(scale, scale, scale, 1.0f));  // uniform scale
-    _modelMatrix3D =
-        _modelMatrix3D *
-        matrix4x4_translation(0.0f, 0.0f, -1.0f);  // set z=-1 unit back
 }
 
-- (float4x4)computeImageTransform:(float)panX
-                             panY:(float)panY
-                             zoom:(float)zoom
-{
-    // translate
-    float4x4 panTransform = matrix4x4_translation(-panX, panY, 0.0);
-
-    // non-uniform scale is okay here, only affects ortho volume
-    // setting this to uniform zoom and object is not visible, zoom can be 20x in
-    // x and y
-    if (_showSettings->is3DView) {
-        zoom *= zoom3D;
-    }
-
-    float4x4 viewMatrix = float4x4(float4m(zoom, zoom, 1.0f, 1.0f));
-    viewMatrix = panTransform * viewMatrix;
-
-    // scale
-    if (_showSettings->is3DView) {
-        return _projectionMatrix * viewMatrix * _modelMatrix3D;
-    }
-    else {
-        return _projectionMatrix * viewMatrix * _modelMatrix;
-    }
-}
 
-inline bool almost_equal_elements(float3 v, float tol)
-{
-    return (fabs(v.x - v.y) < tol) && (fabs(v.x - v.z) < tol);
-}
-
-inline const float3x3& toFloat3x3(const float4x4 &m) { return (const float3x3 &)m; }
-
-float4 inverseScaleSquared(const float4x4 &m)
-{
-    float3 scaleSquared = float3m(length_squared(m.columns[0].xyz),
-                                  length_squared(m.columns[1].xyz),
-                                  length_squared(m.columns[2].xyz));
-
-    // if uniform, then set scaleSquared all to 1
-    if (almost_equal_elements(scaleSquared, 1e-5f)) {
-        scaleSquared = float3m(1.0f);
-    }
-
-    // don't divide by 0
-    float3 invScaleSquared =
-        recip(simd::max(float3m(0.0001 * 0.0001), scaleSquared));
-
-    // identify determinant here for flipping orientation
-    // all shapes with negative determinant need orientation flipped for
-    // backfacing and need to be grouned together if rendering with instancing
-    float det = determinant(toFloat3x3(m));
-
-    return float4m(invScaleSquared, det);
-}
 
 - (void)_updateGameState
 {
@@ -1568,59 +1312,18 @@ - (void)_updateGameState
         uniforms.isInsetByHalfPixel = true;
     }
 
-    // translate
-    float4x4 panTransform =
-        matrix4x4_translation(-_showSettings->panX, _showSettings->panY, 0.0);
-
+    _data->updateTransforms();
+    
     // this is an animated effect, that overlays the shape uv wires over the image
     uniforms.isUVPreview = _showSettings->uvPreview > 0.0;
     uniforms.uvPreview = _showSettings->uvPreview;
-    
-    // scale
-    float zoom = _showSettings->zoom;
-
-    if (_showSettings->is3DView) {
-        _viewMatrix3D = float4x4(float4m(zoom, zoom, 1.0f, 1.0f));  // non-uniform
-        _viewMatrix3D = panTransform * _viewMatrix3D;
-
-        // viewMatrix should typically be the inverse
-        //_viewMatrix = simd_inverse(_viewMatrix3D);
-
-        float4x4 projectionViewMatrix = _projectionMatrix * _viewMatrix3D;
-        uniforms.projectionViewMatrix = projectionViewMatrix;
-
-        // works when only one texture, but switch to projectViewMatrix
-        uniforms.modelMatrix = _modelMatrix3D;
-
-        uniforms.modelMatrixInvScale2 = inverseScaleSquared(_modelMatrix3D);
-
-        _showSettings->isInverted = uniforms.modelMatrixInvScale2.w < 0.0f;
-
-        // cache the camera position
-        uniforms.cameraPosition =
-            inverse(_viewMatrix3D).columns[3].xyz;  // this is all ortho
-    }
-    else {
-        _viewMatrix = float4x4(float4m(zoom, zoom, 1.0f, 1.0f));
-        _viewMatrix = panTransform * _viewMatrix;
-
-        // viewMatrix should typically be the inverse
-        //_viewMatrix = simd_inverse(_viewMatrix3D);
-
-        float4x4 projectionViewMatrix = _projectionMatrix * _viewMatrix;
-        uniforms.projectionViewMatrix = projectionViewMatrix;
-
-        // works when only one texture, but switch to projectViewMatrix
-        uniforms.modelMatrix = _modelMatrix;
-
-        uniforms.modelMatrixInvScale2 = inverseScaleSquared(_modelMatrix);
-
-        _showSettings->isInverted = uniforms.modelMatrixInvScale2.w < 0.0f;
-
-        // cache the camera position
-        uniforms.cameraPosition =
-            inverse(_viewMatrix).columns[3].xyz;  // this is all ortho
-    }
+   
+    uniforms.projectionViewMatrix = _data->_projectionViewMatrix;
+    uniforms.cameraPosition = _data->_cameraPosition;
+   
+    // This is per object
+    uniforms.modelMatrix = _data->_modelMatrix;
+    uniforms.modelMatrixInvScale2 = _data->_modelMatrixInvScale2;
 
     //_rotation += .01;
 }
@@ -1881,8 +1584,8 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
             // do not modify viewMatrix here since that messes with world space.
     
             // set the view and projection matrix
-            _gltfRenderer.viewMatrix = _viewMatrix * regularizationMatrix;
-            _gltfRenderer.projectionMatrix = _projectionMatrix;
+            _gltfRenderer.viewMatrix = _data->_viewMatrix * regularizationMatrix;
+            _gltfRenderer.projectionMatrix = _data->_projectionMatrix;
     
             RenderScope drawModelScope( renderEncoder, "DrawModel" );
             [_gltfRenderer renderScene:_asset.defaultScene commandBuffer:commandBuffer commandEncoder:renderEncoder];
@@ -2496,7 +2199,7 @@ - (void)mtkView:(nonnull MTKView *)view drawableSizeWillChange:(CGSize)size
 
     _showSettings->viewContentScaleFactor = framebufferScale;
 
-    [self updateProjTransform];
+    _data->updateProjTransform();
     
 #if USE_GLTF
     _gltfRenderer.drawableSize = size;
@@ -2504,7 +2207,7 @@ - (void)mtkView:(nonnull MTKView *)view drawableSizeWillChange:(CGSize)size
     _gltfRenderer.depthStencilPixelFormat = view.depthStencilPixelFormat;
 #endif
     
-    [self updateProjTransform];
+    _data->updateProjTransform();
 }
 
 #if USE_GLTF
diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index 34222dff..21e20e52 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -1,9 +1,167 @@
 #include "KramViewerBase.h"
 
+#include "simdjson/simdjson.h"
+
 namespace kram {
 using namespace simd;
 using namespace NAMESPACE_STL;
 
+#define ArrayCount(x) (sizeof(x) / sizeof(x[0]))
+
+#ifdef NDEBUG
+bool doPrintPanZoom = false;
+#else
+bool doPrintPanZoom = false;
+#endif
+
+// Writing out to rgba32 for sampling, but unorm formats like ASTC and RGBA8
+// are still off and need to use the following.
+float  toSnorm8(float c)  { return (255.0f / 127.0f) * c - (128.0f / 127.0f); }
+float2 toSnorm8(float2 c) { return (255.0f / 127.0f) * c - (128.0f / 127.0f); }
+float3 toSnorm8(float3 c) { return (255.0f / 127.0f) * c - (128.0f / 127.0f); }
+float4 toSnorm8(float4 c) { return (255.0f / 127.0f) * c - (128.0f / 127.0f); }
+
+float4 toSnorm(float4 c)  { return 2.0f * c - 1.0f; }
+
+inline float4 toPremul(const float4 &c)
+{
+    // premul with a
+    float4 cpremul = c;
+    float a = c.a;
+    cpremul.w = 1.0f;
+    cpremul *= a;
+    return cpremul;
+}
+
+inline bool almost_equal_elements(float3 v, float tol)
+{
+    return (fabs(v.x - v.y) < tol) && (fabs(v.x - v.z) < tol);
+}
+
+inline const float3x3& toFloat3x3(const float4x4 &m) { return (const float3x3 &)m; }
+
+float4 inverseScaleSquared(const float4x4 &m)
+{
+    float3 scaleSquared = float3m(length_squared(m.columns[0].xyz),
+                                  length_squared(m.columns[1].xyz),
+                                  length_squared(m.columns[2].xyz));
+
+    // if uniform, then set scaleSquared all to 1
+    if (almost_equal_elements(scaleSquared, 1e-5f)) {
+        scaleSquared = float3m(1.0f);
+    }
+
+    // don't divide by 0
+    float3 invScaleSquared =
+        recip(simd::max(float3m(0.0001 * 0.0001), scaleSquared));
+
+    // identify determinant here for flipping orientation
+    // all shapes with negative determinant need orientation flipped for
+    // backfacing and need to be grouned together if rendering with instancing
+    float det = determinant(toFloat3x3(m));
+
+    return float4m(invScaleSquared, det);
+}
+
+static string filenameNoExtension(const char* filename)
+{
+    const char* dotPosStr = strrchr(filename, '.');
+    if (dotPosStr == nullptr)
+        return filename;
+    auto dotPos = dotPosStr - filename;
+    
+    // now chop off the extension
+    string filenameNoExt = filename;
+    return filenameNoExt.substr(0, dotPos);
+}
+
+static void findPossibleNormalMapFromAlbedoFilename(const char* filename, vector<string>& normalFilenames)
+{
+    normalFilenames.clear();
+    
+    string filenameShort = filename;
+    
+    const char* ext = strrchr(filename, '.');
+
+    const char* dotPosStr = strrchr(filenameShort.c_str(), '.');
+    if (dotPosStr == nullptr)
+        return;
+    
+    auto dotPos = dotPosStr - filenameShort.c_str();
+    
+    // now chop off the extension
+    filenameShort = filenameShort.substr(0, dotPos);
+
+    const char* searches[] = { "-a", "-d", "_Color", "_baseColor" };
+    
+    for (uint32_t i = 0; i < ArrayCount(searches); ++i) {
+        const char* search = searches[i];
+        if (endsWith(filenameShort, search)) {
+            filenameShort = filenameShort.substr(0, filenameShort.length()-strlen(search));
+            break;
+        }
+    }
+     
+    const char* suffixes[] = { "-n", "_normal", "_Normal" };
+    
+    string normalFilename;
+    for (uint32_t i = 0; i < ArrayCount(suffixes); ++i) {
+        const char* suffix = suffixes[i];
+        
+        // may need to try various names, and see if any exist
+        normalFilename = filenameShort;
+        normalFilename += suffix;
+        normalFilename += ext;
+        
+        normalFilenames.push_back(normalFilename);
+    }
+}
+
+// this aliases the existing string, so can't chop extension
+inline const char* toFilenameShort(const char* filename) {
+    const char* filenameShort = strrchr(filename, '/');
+    if (filenameShort == nullptr) {
+        filenameShort = filename;
+    }
+    else {
+        filenameShort += 1;
+    }
+    return filenameShort;
+}
+
+static const vector<const char*> supportedModelExt = {
+#if USE_GLTF
+     ".gltf",
+     ".glb",
+#endif
+#if USE_USD
+    ".gltf",
+    ".glb",
+#endif
+};
+
+
+bool isSupportedModelFilename(const char* filename) {
+    for (const char* ext: supportedModelExt) {
+        if (endsWithExtension(filename, ext)) {
+            return true;
+        }
+    }
+    return false;
+}
+bool isSupportedArchiveFilename(const char* filename) {
+    return endsWithExtension(filename, ".zip");
+}
+
+bool isSupportedJsonFilename(const char* filename) {
+    return endsWith(filename, "-atlas.json");
+}
+
+bool isDirectory(const char* filename) {
+    FileHelper fileHelper;
+    return fileHelper.isDirectory(filename);
+}
+
 int32_t ShowSettings::totalChunks() const
 {
     int32_t one = 1;
@@ -12,6 +170,11 @@ int32_t ShowSettings::totalChunks() const
            std::max(one, sliceCount);
 }
 
+File::File(const char* name_, int32_t urlIndex_)
+    : name(name_), urlIndex(urlIndex_), nameShort(toFilenameShort(name_))
+{
+}
+
 const char *ShowSettings::meshNumberName(uint32_t meshNumber_) const
 {
     const char *text = "";
@@ -478,22 +641,22 @@ float4x4 orthographic_rhs(float width, float height, float nearZ, float farZ,
     // float aspectRatio = width / height;
     float xs = 2.0f / width;
     float ys = 2.0f / height;
-
+    
     float xoff = 0.0f;  // -0.5f * width;
     float yoff = 0.0f;  // -0.5f * height;
-
+    
     float dz = -(farZ - nearZ);
     float zs = 1.0f / dz;
-
+    
     float m22 = zs;
     float m23 = zs * nearZ;
-
+    
     // revZ, can't use infiniteZ with ortho view
     if (isReverseZ) {
         m22 = -m22;
         m23 = 1.0f - m23;
     }
-
+    
     float4x4 m = {
         (float4){xs, 0, 0, 0},
         (float4){0, ys, 0, 0},
@@ -501,6 +664,2272 @@ float4x4 orthographic_rhs(float width, float height, float nearZ, float farZ,
         (float4){xoff, yoff, m23, 1}
     };
     return m;
+    
+}
+
+//--------------------------------
+
+// Want to avoid Apple libs for things that have C++ equivalents.
+// simdjson without exceptions isn't super readable or safe looking.
+// TODO: see if simdjson is stable enough, using unsafe calls
+
+Data::Data()
+{
+    _showSettings = new ShowSettings();
+    
+    _textSlots.resize(2);
+}
+Data::~Data()
+{
+    delete _showSettings;
+}
+
+bool Data::loadAtlasFile(const char* filename)
+{
+    using namespace simdjson;
+    
+    ondemand::parser parser;
+    
+    // TODO: can just mmap the json to provide
+    auto json = padded_string::load(filename);
+    auto atlasProps = parser.iterate(json);
+       
+    // Can use hover or a show all on these entries and names.
+    // Draw names on screen using system text in the upper left corner if 1
+    // if showing all, then show names across each mip level.  May want to
+    // snap to pixels on each mip level so can see overlap.
+    
+    _showSettings->atlas.clear();
+    
+    {
+        std::vector<double> values;
+        string_view atlasName = atlasProps["name"].get_string().value_unsafe();
+        
+        uint64_t width = atlasProps["width"].get_uint64().value_unsafe();
+        uint64_t height = atlasProps["height"].get_uint64().value_unsafe();
+    
+        uint64_t slice = atlasProps["slice"].get_uint64().value_unsafe();
+        
+        float uPad = 0.0f;
+        float vPad = 0.0f;
+        
+        if (atlasProps["paduv"].get_array().error() != NO_SUCH_FIELD) {
+            values.clear();
+            for (auto value : atlasProps["paduv"])
+                values.push_back(value.get_double().value_unsafe());
+            
+            uPad = values[0];
+            vPad = values[1];
+        }
+        else if (atlasProps["padpx"].get_array().error() != NO_SUCH_FIELD) {
+            values.clear();
+            for (auto value : atlasProps["padpx"])
+                values.push_back(value.get_double().value_unsafe());
+            
+            uPad = values[0];
+            vPad = values[1];
+            
+            uPad /= width;
+            vPad /= height;
+        }
+        
+        for (auto regionProps: atlasProps["regions"])
+        {
+            string_view name = regionProps["name"].get_string().value_unsafe();
+            
+            float x = 0.0f;
+            float y = 0.0f;
+            float w = 0.0f;
+            float h = 0.0f;
+            
+            if (regionProps["ruv"].get_array().error() != NO_SUCH_FIELD)
+            {
+                values.clear();
+                for (auto value : regionProps["ruv"])
+                    values.push_back(value.get_double().value_unsafe());
+            
+                // Note: could convert pixel and mip0 size to uv.
+                // normalized uv make these easier to draw across all mips
+                x = values[0];
+                y = values[1];
+                w = values[2];
+                h = values[3];
+            }
+            else if (regionProps["rpx"].get_array().error() != NO_SUCH_FIELD)
+            {
+                values.clear();
+                for (auto value : regionProps["rpx"])
+                    values.push_back(value.get_double().value_unsafe());
+            
+                x = values[0];
+                y = values[1];
+                w = values[2];
+                h = values[3];
+                
+                // normalize to uv using the width/height
+                x /= width;
+                y /= height;
+                w /= width;
+                h /= height;
+            }
+                
+            const char* verticalProp = "f"; // regionProps["rot"];
+            bool isVertical = verticalProp && verticalProp[0] == 't';
+            
+            Atlas atlas = {(string)name, x,y, w,h, uPad,vPad, isVertical, (uint32_t)slice};
+            _showSettings->atlas.emplace_back(std::move(atlas));
+        }
+    }
+    
+    // TODO: also need to be able to bring in vector shapes
+    // maybe from svg or files written out from figma or photoshop.
+    // Can triangulate those, and use xatlas to pack those.
+    // Also xatlas can flatten out a 3d model into a chart.
+    
+    return true;
+}
+
+// opens archive
+bool Data::openArchive(const char * zipFilename, int32_t urlIndex)
+{
+    // grow the array, ptrs so that existing mmaps aren't destroyed
+    if (urlIndex >= _containers.size()) {
+        _containers.resize(urlIndex + 1, nullptr);
+    }
+    
+    if (_containers[urlIndex] == nullptr)
+        _containers[urlIndex] = new FileContainer;
+    
+    FileContainer& container = *_containers[urlIndex];
+    MmapHelper& zipMmap = container.zipMmap;
+    ZipHelper& zip = container.zip;
+    
+    // close any previous zip
+    zipMmap.close();
+    
+    // open the mmap again
+    if (!zipMmap.open(zipFilename)) {
+        return false;
+    }
+    if (!zip.openForRead(zipMmap.data(), zipMmap.dataLength())) {
+        return false;
+    }
+    return true;
+}
+
+// lists archive into _files
+bool Data::listFilesInArchive(int32_t urlIndex)
+{
+    FileContainer& container = *_containers[urlIndex];
+    ZipHelper& zip = container.zip;
+    
+    // filter out unsupported extensions
+    vector<string> extensions = {
+        ".ktx", ".ktx2", ".png", ".dds" // textures
+#if USE_GLTF
+        // TODO: can't support these until have a loader from memory block
+        // GLTFAsset requires a URL.
+        //, ".glb", ".gltf" // models
+#endif
+#if USE_USD
+        , ".usd", ".usda", ".usb"
+#endif
+    };
+    
+    container.zip.filterExtensions(extensions);
+    
+    // don't switch to empty archive
+    if (zip.zipEntrys().empty()) {
+        return false;
+    }
+    
+    for (const auto& entry: zip.zipEntrys()) {
+        _files.emplace_back(File(entry.filename, urlIndex));
+    }
+    
+    return true;
+}
+
+// TODO: can simplify by storing counterpart id when file list is created
+bool Data::hasCounterpart(bool increment) {
+    if (_files.size() <= 1) {
+        return false;
+    }
+    
+    const File& file = _files[_fileIndex];
+    string currentFilename = filenameNoExtension(file.nameShort.c_str());
+   
+    uint32_t nextFileIndex = _fileIndex;
+    
+    size_t numEntries = _files.size();
+    if (increment)
+        nextFileIndex++;
+    else
+        nextFileIndex += numEntries - 1;  // back 1
+    
+    nextFileIndex = nextFileIndex % numEntries;
+    
+    const File& nextFile = _files[nextFileIndex];
+    string nextFilename = filenameNoExtension(nextFile.nameShort.c_str());
+    
+    // if short name matches (no ext) then it's a counterpart
+    if (currentFilename != nextFilename)
+       return false;
+    
+    return true;
+}
+
+bool Data::advanceCounterpart(bool increment) {
+    
+    if (_files.size() <= 1) {
+        return false;
+    }
+    
+    // see if file has counterparts
+    const File& file = _files[_fileIndex];
+    string currentFilename = filenameNoExtension(file.nameShort.c_str());
+    
+    // TODO: this should cycle through only the counterparts
+    uint32_t nextFileIndex = _fileIndex;
+    
+    size_t numEntries = _files.size();
+    if (increment)
+        nextFileIndex++;
+    else
+        nextFileIndex += numEntries - 1;  // back 1
+
+    nextFileIndex = nextFileIndex % numEntries;
+    
+    const File& nextFile = _files[nextFileIndex];
+    string nextFilename = filenameNoExtension(nextFile.nameShort.c_str());
+    
+    if (currentFilename != nextFilename)
+        return false;
+    
+    _fileIndex = nextFileIndex;
+    
+    return _delegate.loadFile(true);
+}
+
+bool Data::advanceFile(bool increment) {
+    if (_files.empty()) {
+        return false;
+    }
+    
+    size_t numEntries = _files.size();
+    if (increment)
+        _fileIndex++;
+    else
+        _fileIndex += numEntries - 1;  // back 1
+
+    _fileIndex = _fileIndex % numEntries;
+    
+    return _delegate.loadFile(true);
+}
+
+bool Data::findFilename(const string& filename)
+{
+    bool isFound = false;
+    
+    // linear search
+    for (const auto& search : _files) {
+        if (search.name == filename) {
+            isFound = true;
+            break;
+        }
+    }
+    return isFound;
+}
+
+bool Data::findFilenameShort(const string& filename)
+{
+    bool isFound = false;
+    
+    // linear search
+    for (const auto& search : _files) {
+        if (search.nameShort == filename) {
+            isFound = true;
+            break;
+        }
+    }
+    return isFound;
+}
+
+// rect here is expect xy, wh
+bool isPtInRect(float2 pt, float4 r)
+{
+    return all((pt >= r.xy) & (pt <= r.xy + r.zw));
+}
+
+const Atlas* Data::findAtlasAtCursor(float2 pt)
+{
+    const Atlas* atlas = nullptr;
+    
+    // TODO: rects are in uv, so need to convert pt
+    
+    // This might need to become an atlas array index instead of ptr
+    const Atlas* lastAtlas = _showSettings->lastAtlas;
+    
+    if (lastAtlas) {
+        if (isPtInRect(pt, lastAtlas->rect())) {
+            atlas = lastAtlas;
+        }
+    }
+    
+    if (!atlas) {
+        // linear search
+        for (const auto& search : _showSettings->atlas) {
+            if (isPtInRect(pt, search.rect())) {
+                atlas = &search;
+                break;
+            }
+        }
+        
+        _showSettings->lastAtlas = atlas;
+    }
+    
+    return atlas;
+}
+
+
+bool Data::isArchive() const
+{
+    //NSArray<NSURL*>* urls_ = (NSArray<NSURL*>*)_delegate._urls;
+    //NSURL* url = urls_[_files[_fileIndex].urlIndex];
+    //const char* filename = url.fileSystemRepresentation;
+    
+    string filename = _urls[_files[_fileIndex].urlIndex];
+    return isSupportedArchiveFilename(filename.c_str());
+}
+
+
+
+bool Data::loadFile()
+{
+    if (isArchive()) {
+        return loadFileFromArchive();
+    }
+    
+    // now lookup the filename and data at that entry
+    const File& file = _files[_fileIndex];
+    const char* filename = file.name.c_str();
+    
+    string fullFilename = filename;
+    auto timestamp = FileHelper::modificationTimestamp(filename);
+    
+    bool isTextureChanged = _showSettings->isFileChanged(filename, timestamp);
+    if (!isTextureChanged) {
+        return true;
+    }
+    
+#if USE_GLTF || USE_USD
+    bool isModel = isSupportedModelFilename(filename);
+    if (isModel) {
+        bool success = _delegate.loadModelFile(filename);
+        
+        if (success) {
+            // store the filename
+            _showSettings->lastFilename = filename;
+            _showSettings->lastTimestamp = timestamp;
+        }
+        
+        return success;
+    }
+#endif
+    
+    // have already filtered filenames out, so this should never get hit
+    if (!isSupportedFilename(filename)) {
+        return false;
+    }
+    
+    // Note: better to extract from filename instead of root of folder dropped
+    // or just keep displaying full path of filename.
+    
+    _archiveName.clear();
+    
+    vector<string> possibleNormalFilenames;
+    string normalFilename;
+    bool hasNormal = false;
+    
+    TexContentType texContentType = findContentTypeFromFilename(filename);
+    if (texContentType == TexContentTypeAlbedo) {
+        findPossibleNormalMapFromAlbedoFilename(filename, possibleNormalFilenames);
+        
+        for (const auto& name: possibleNormalFilenames) {
+            hasNormal = findFilename(name);
+            
+            if (hasNormal) {
+                normalFilename = name;
+                break;
+            }
+        }
+    }
+    
+    // see if there is an atlas file too, and load the rectangles for preview
+    // note sidecar atlas files are a pain to view with a sandbox, may want to
+    // splice into ktx/ktx2 files, but no good metadata for png/dds.
+    _showSettings->atlas.clear();
+    
+    string atlasFilename = filenameNoExtension(filename);
+    bool hasAtlas = false;
+    
+    // replace -a, -d, with -atlas.json
+    const char* dashPosStr = strrchr(atlasFilename.c_str(), '-');
+    if (dashPosStr != nullptr) {
+        atlasFilename = atlasFilename.substr(0, dashPosStr - atlasFilename.c_str());
+    }
+    atlasFilename += "-atlas.json";
+    if ( findFilename(atlasFilename.c_str())) {
+        if (loadAtlasFile(atlasFilename.c_str())) {
+            hasAtlas = true;
+        }
+    }
+    if (!hasAtlas) {
+        atlasFilename.clear();
+    }
+    
+    // If it's a compressed file, then set a diff target if a corresponding png
+    // is found.  Eventually see if a src dds/ktx/ktx2 exists.  Want to stop
+    // using png as source images.  Note png don't have custom mips, unless
+    // flattened to one image.  So have to fabricate mips here.  KTXImage
+    // can already load up striped png into slices, etc.
+    
+    string diffFilename = filenameNoExtension(filename);
+    bool hasDiff = false;
+    
+    diffFilename += ".png";
+    if ( diffFilename != filename && findFilename(diffFilename.c_str())) {
+        // TODO: defer load until diff enabled
+        //if ([self loadDiffFile:diffFilename.c_str()]) {
+        hasDiff = true;
+        //}
+    }
+    if (!hasDiff) {
+        diffFilename.clear();
+    }
+    
+    //-------------------------------
+    
+    KTXImage image;
+    KTXImageData imageDataKTX;
+    
+    KTXImage imageNormal;
+    KTXImageData imageNormalDataKTX;
+    
+    // this requires decode and conversion to RGBA8u
+    if (!imageDataKTX.open(fullFilename.c_str(), image)) {
+        return false;
+    }
+    
+    if (hasNormal &&
+        imageNormalDataKTX.open(normalFilename.c_str(), imageNormal)) {
+        // shaders only pull from albedo + normal on these texture types
+        if (imageNormal.textureType == image.textureType &&
+            (imageNormal.textureType == MyMTLTextureType2D ||
+             imageNormal.textureType == MyMTLTextureType2DArray)) {
+            // hasNormal = true;
+        }
+        else {
+            hasNormal = false;
+        }
+    }
+    
+    //---------------------------------
+    
+    // Release any loading model textures
+//    Renderer* renderer = (Renderer *)self.delegate;
+//    [renderer releaseAllPendingTextures];
+//
+//    if (![renderer loadTextureFromImage:fullFilename.c_str()
+//                              timestamp:timestamp
+//                                  image:image
+//                            imageNormal:hasNormal ? &imageNormal : nullptr
+//                              isArchive:NO]) {
+//        return false;
+//    }
+    
+    if (!_delegate.loadTextureFromImage(fullFilename.c_str(), (double)timestamp, image, hasNormal ? &imageNormal : nullptr, false)) {
+        return false;
+    }
+    
+    // store the filename
+    _showSettings->lastFilename = filename;
+    _showSettings->lastTimestamp = timestamp;
+    
+    return true;
 }
 
+bool Data::loadFileFromArchive()
+{
+    // now lookup the filename and data at that entry
+    const File& file = _files[_fileIndex];
+    FileContainer& container = *_containers[file.urlIndex];
+    ZipHelper& zip = container.zip;
+    
+    const char* filename = file.name.c_str();
+    const auto* entry = zip.zipEntry(filename);
+    string fullFilename = entry->filename;
+    double timestamp = (double)entry->modificationDate;
+
+    bool isTextureChanged = _showSettings->isFileChanged(filename, timestamp);
+    if (!isTextureChanged) {
+        return true;
+    }
+    
+// TODO: don't have a version which loads gltf model from memory block
+//    bool isModel = isSupportedModelFilename(filename);
+//    if (isModel)
+//        return [self loadModelFile:filename];
+    
+    //--------
+    
+    if (!isSupportedFilename(filename)) {
+        return false;
+    }
+    
+    const uint8_t* imageData = nullptr;
+    uint64_t imageDataLength = 0;
+
+    // search for main file - can be albedo or normal
+    if (!zip.extractRaw(filename, &imageData, imageDataLength)) {
+        return false;
+    }
+
+    const uint8_t* imageNormalData = nullptr;
+    uint64_t imageNormalDataLength = 0;
+    
+    string normalFilename;
+    bool hasNormal = false;
+    vector<string> normalFilenames;
+    
+    TexContentType texContentType = findContentTypeFromFilename(filename);
+    if (texContentType == TexContentTypeAlbedo) {
+        findPossibleNormalMapFromAlbedoFilename(filename, normalFilenames);
+     
+        for (const auto& name: normalFilenames) {
+            hasNormal = zip.extractRaw(name.c_str(), &imageNormalData,
+                                        imageNormalDataLength);
+            if (hasNormal) {
+                normalFilename = name;
+                break;
+            }
+        }
+    }
+
+    //---------------------------
+
+    // files in archive are just offsets into the mmap
+    // That's why we can't just pass filenames to the renderer
+    KTXImage image;
+    KTXImageData imageDataKTX;
+
+    KTXImage imageNormal;
+    KTXImageData imageNormalDataKTX;
+
+    if (!imageDataKTX.open(imageData, imageDataLength, image)) {
+        return false;
+    }
+
+    if (hasNormal && imageNormalDataKTX.open(
+                         imageNormalData, imageNormalDataLength, imageNormal)) {
+        // shaders only pull from albedo + normal on these texture types
+        if (imageNormal.textureType == image.textureType &&
+            (imageNormal.textureType == MyMTLTextureType2D ||
+             imageNormal.textureType == MyMTLTextureType2DArray)) {
+            // hasNormal = true;
+        }
+        else {
+            hasNormal = false;
+        }
+    }
+
+    //---------------------------------
+    
+    if (!_delegate.loadTextureFromImage(fullFilename.c_str(), (double)timestamp, image, hasNormal ? &imageNormal : nullptr, true)) {
+        return false;
+    }
+//    Renderer* renderer = (Renderer *)self.delegate;
+//    [renderer releaseAllPendingTextures];
+//
+//    if (![renderer loadTextureFromImage:fullFilename.c_str()
+//                              timestamp:(double)timestamp
+//                                  image:image
+//                            imageNormal:hasNormal ? &imageNormal : nullptr
+//                              isArchive:YES]) {
+//        return false;
+//    }
+
+    //---------------------------------
+    
+   // NSArray<NSURL*>* urls_ = (NSArray<NSURL*>*)_delegate._urls;
+    string archiveURL = _urls[file.urlIndex];
+    _archiveName = toFilenameShort(archiveURL.c_str());
+    
+    return true;
+}
+
+
+
+
+void Data::loadFilesFromUrls(vector<string>& urls, bool skipSubdirs)
+{
+    // Using a member for archives, so limited to one archive in a drop
+    // but that's probably okay for now.  Add a separate array of open
+    // archives if want > 1.
+    
+    // copy the existing files list
+    string existingFilename;
+    if (_fileIndex < (int32_t)_files.size())
+        existingFilename = _files[_fileIndex].name;
+    
+    // Fill this out again
+    _files.clear();
+    
+    // clear pointers
+    for (FileContainer* container: _containers)
+        delete container;
+    _containers.clear();
+    
+    // this will flatten the list
+    int32_t urlIndex = 0;
+    
+    vector<string> urlsExtracted;
+    
+    for (const auto& url: urls) {
+        // These will flatten out to a list of files
+        const char* filename = url.c_str();
+        
+        if (isSupportedArchiveFilename(filename) &&
+            openArchive(filename, urlIndex) &&
+            listFilesInArchive(urlIndex))
+        {
+            urlsExtracted.push_back(filename);
+            urlIndex++;
+        }
+        else if (isDirectory(filename)) {
+            
+            // this first loads only models, then textures if only those
+            listFilesInFolder(url, urlIndex, skipSubdirs);
+            
+            // could skip if nothing added
+            urlsExtracted.push_back(url);
+            urlIndex++;
+            
+            // handle archives within folder
+            vector<File> archiveFiles;
+            listArchivesInFolder(url, archiveFiles, skipSubdirs);
+            
+            for (const File& archiveFile: archiveFiles) {
+                const char* archiveFilename = archiveFile.name.c_str();
+                if (openArchive(archiveFilename, urlIndex) &&
+                    listFilesInArchive(urlIndex)) {
+                    
+                    //NSURL* urlArchive = [NSURL fileURLWithPath:[NSString stringWithUTF8String:archiveFilename]];
+                    //[urlsExtracted addObject:urlArchive];
+                    urlsExtracted.push_back(archiveFilename);
+                    urlIndex++;
+                }
+                
+            }
+        }
+        else if (isSupportedFilename(filename)
+#if USE_GLTF
+                 || isSupportedModelFilename(filename)
+#endif
+                 ) {
+            _files.emplace_back(File(filename, urlIndex));
+            
+            //[urlsExtracted addObject:url];
+            urlsExtracted.push_back(filename);
+            urlIndex++;
+        }
+        else if (isSupportedJsonFilename(filename)) {
+            _files.emplace_back(File(filename, urlIndex));
+            
+            //[urlsExtracted addObject:url];
+            urlsExtracted.push_back(filename);
+            urlIndex++;
+        }
+        
+    }
+    
+    // sort them by short filename
+#if USE_EASTL
+    NAMESPACE_STL::quick_sort(_files.begin(), _files.end());
+#else
+    std::sort(_files.begin(), _files.end());
+#endif
+    
+    // preserve filename before load, and restore that index, by finding
+    // that name in refreshed folder list
+    _fileIndex = 0;
+    if (!existingFilename.empty()) {
+        for (uint32_t i = 0; i < _files.size(); ++i) {
+            if (_files[i].name == existingFilename) {
+                _fileIndex = i;
+                break;
+            }
+        }
+    }
+    
+    // preserve old file selection
+    _urls = urlsExtracted;
+}
+
+void Data::showEyedropperData(const float2& uv)
+{
+    string text;
+    string tmp;
+
+    float4 c = _showSettings->textureResult;
+    int32_t x = _showSettings->textureResultX;
+    int32_t y = _showSettings->textureResultY;
+    
+    // DONE: use these to format the text
+    MyMTLPixelFormat format = _showSettings->originalFormat;
+    bool isSrgb = isSrgbFormat(format);
+    bool isSigned = isSignedFormat(format);
+
+    bool isHdr = isHdrFormat(format);
+    bool isFloat = isHdr;
+
+    int32_t numChannels = _showSettings->numChannels;
+
+    bool isNormal = _showSettings->texContentType == TexContentTypeNormal;
+    bool isColor = !isNormal;
+
+    bool isDirection = false;
+    bool isValue = false;
+
+    if (_showSettings->isEyedropperFromDrawable()) {
+        // TODO: could write barycentric, then lookup uv from that
+        // then could show the block info.
+
+        // interpret based on shapeChannel, debugMode, etc
+        switch (_showSettings->shapeChannel) {
+            case ShapeChannelDepth:
+                isSigned = false;  // using fract on uv
+
+                isValue = true;
+                isFloat = true;
+                numChannels = 1;
+                break;
+            case ShapeChannelUV0:
+                isSigned = false;  // using fract on uv
+
+                isValue = true;
+                isFloat = true;
+                numChannels = 2;  // TODO: fix for 3d uvw
+                break;
+
+            case ShapeChannelFaceNormal:
+            case ShapeChannelNormal:
+            case ShapeChannelTangent:
+            case ShapeChannelBitangent:
+                isDirection = true;
+                numChannels = 3;
+
+                // convert unorm to snnorm
+                c = toSnorm(c);
+                break;
+
+            case ShapeChannelMipLevel:
+                isValue = true;
+                isSigned = false;
+                isFloat = true;
+
+                // viz is mipNumber as alpha
+                numChannels = 1;
+                c.r = 4.0 - (c.a * 4.0);
+                break;
+
+            default:
+                break;
+        }
+
+        // debug mode
+
+        // preview vs. not
+    }
+    else {
+        // this will be out of sync with gpu eval, so may want to only display px
+        // from returned lookup this will always be a linear color
+
+        
+        // show uv, so can relate to gpu coordinates stored in geometry and find
+        // atlas areas
+        append_sprintf(text, "uv:%0.3f %0.3f\n",
+                       (float)x / _showSettings->imageBoundsX,
+                       (float)y / _showSettings->imageBoundsY);
+
+        // pixel at top-level mip
+        append_sprintf(text, "px:%d %d\n", x, y);
+
+        // show block num
+        int mipLOD = _showSettings->mipNumber;
+
+        int mipX = _showSettings->imageBoundsX;
+        int mipY = _showSettings->imageBoundsY;
+
+        mipX = mipX >> mipLOD;
+        mipY = mipY >> mipLOD;
+
+        mipX = std::max(1, mipX);
+        mipY = std::max(1, mipY);
+
+        mipX = (int32_t)(uv.x * mipX);
+        mipY = (int32_t)(uv.y * mipY);
+
+        _showSettings->textureLookupMipX = mipX;
+        _showSettings->textureLookupMipY = mipY;
+
+        // TODO: may want to return mip in pixel readback
+        // don't have it right now, so don't display if preview is enabled
+        if (_showSettings->isPreview)
+            mipLOD = 0;
+
+        auto blockDims = blockDimsOfFormat(format);
+        if (blockDims.x > 1)
+            append_sprintf(text, "bpx: %d %d\n", mipX / blockDims.x,
+                           mipY / blockDims.y);
+
+        // TODO: on astc if we have original blocks can run analysis from
+        // astc-encoder about each block.
+
+        // show the mip pixel (only if not preview and mip changed)
+        if (mipLOD > 0 && !_showSettings->isPreview)
+            append_sprintf(text, "mpx: %d %d\n", mipX, mipY);
+
+        // TODO: more criteria here, can have 2 channel PBR metal-roughness
+        // also have 4 channel normals where zw store other data.
+
+        bool isDecodeSigned = isSignedFormat(_showSettings->decodedFormat);
+        if (isSigned && !isDecodeSigned) {
+            c = toSnorm8(c);
+        }
+    }
+
+    if (isValue) {
+        printChannels(tmp, "val: ", c, numChannels, isFloat, isSigned);
+        text += tmp;
+    }
+    else if (isDirection) {
+        // print direction
+        isFloat = true;
+        isSigned = true;
+
+        printChannels(tmp, "dir: ", c, numChannels, isFloat, isSigned);
+        text += tmp;
+    }
+    else if (isNormal) {
+        float nx = c.x;
+        float ny = c.y;
+
+        // unorm -> snorm
+        if (!isSigned) {
+            nx = toSnorm8(nx);
+            ny = toSnorm8(ny);
+        }
+
+        // Note: not clamping nx,ny to < 1 like in shader
+
+        // this is always postive on tan-space normals
+        // assuming we're not viewing world normals
+        const float maxLen2 = 0.999 * 0.999;
+        float len2 = nx * nx + ny * ny;
+        if (len2 > maxLen2)
+            len2 = maxLen2;
+
+        float nz = sqrt(1.0f - len2);
+
+        // print the underlying color (some nmaps are xy in 4 channels)
+        printChannels(tmp, "lin: ", c, numChannels, isFloat, isSigned);
+        text += tmp;
+
+        // print direction
+        float4 d = float4m(nx, ny, nz, 0.0f);
+        isFloat = true;
+        isSigned = true;
+        printChannels(tmp, "dir: ", d, 3, isFloat, isSigned);
+        text += tmp;
+    }
+    else if (isColor) {
+        // DONE: write some print helpers based on float4 and length
+        printChannels(tmp, "lin: ", c, numChannels, isFloat, isSigned);
+        text += tmp;
+
+        if (isSrgb) {
+            // this saturates the value, so don't use for extended srgb
+            float4 s = linearToSRGB(c);
+
+            printChannels(tmp, "srg: ", s, numChannels, isFloat, isSigned);
+            text += tmp;
+        }
+
+        // display the premul values too, but not fully transparent pixels
+        if (c.a > 0.0 && c.a < 1.0f) {
+            printChannels(tmp, "lnp: ", toPremul(c), numChannels, isFloat, isSigned);
+            text += tmp;
+
+            // TODO: do we need the premul srgb color too?
+            if (isSrgb) {
+                // this saturates the value, so don't use for extended srgb
+                float4 s = linearToSRGB(c);
+
+                printChannels(tmp, "srp: ", toPremul(s), numChannels, isFloat,
+                              isSigned);
+                text += tmp;
+            }
+        }
+    }
+
+    setEyedropperText(text.c_str());
+
+    // TODO: range display of pixels is useful, only showing pixels that fall
+    // within a given range, but would need slider then, and determine range of
+    // pixels.
+    // TODO: Auto-range is also useful for depth (ignore far plane of 0 or 1).
+
+    // TODO: display histogram from compute, bin into buffer counts of pixels
+
+    // DONE: stop clobbering hud text, need another set of labels
+    // and a zoom preview of the pixels under the cursor.
+    // Otherwise, can't really see the underlying color.
+
+    // TODO: Stuff these on clipboard with a click, or use cmd+C?
+}
+
+void Data::setEyedropperText(const char * text)
+{
+    setTextSlot(kTextSlotEyedropper, text);
+}
+
+string Data::textFromSlots() const
+{
+    // combine textSlots
+    string text = _textSlots[kTextSlotHud];
+    if (!text.empty() && text.back() != '\n')
+        text += "\n";
+        
+    // don't show eyedropper text with table up, it's many lines and overlaps
+    // TODO: fix
+    // if (!_tableView.hidden)
+        text += _textSlots[kTextSlotEyedropper];
+    
+    return text;
+}
+
+void Data::setTextSlot(TextSlot slot, const char* text)
+{
+    _textSlots[slot] = text;
+}
+
+void Data::updateUIAfterLoad()
+{
+    // TODO: move these to actions, and test their state instead of looking up
+    // buttons here and in HandleKey.
+
+    // base on showSettings, hide some fo the buttons
+    bool isShowAllHidden =
+        _showSettings->totalChunks() <= 1 && _showSettings->mipCount <= 1;
+
+    bool isArrayHidden = _showSettings->arrayCount <= 1;
+    bool isFaceSliceHidden =
+        _showSettings->faceCount <= 1 && _showSettings->sliceCount <= 1;
+    bool isMipHidden = _showSettings->mipCount <= 1;
+
+    bool isJumpToNextHidden = _files.size() <= 1;
+    
+    bool isJumpToCounterpartHidden = true;
+    bool isJumpToPrevCounterpartHidden = true;
+    
+    if ( _files.size() > 1) {
+        isJumpToCounterpartHidden = !hasCounterpart(true);
+        isJumpToPrevCounterpartHidden  = !hasCounterpart(false);
+    }
+    
+    bool isRedHidden = _showSettings->numChannels == 0; // models don't show rgba
+    bool isGreenHidden = _showSettings->numChannels <= 1;
+    bool isBlueHidden = _showSettings->numChannels <= 2 &&
+                        _showSettings->texContentType != TexContentTypeNormal;  // reconstruct z = b on normals
+
+    // TODO: also need a hasAlpha for pixels, since many compressed formats like
+    // ASTC always have 4 channels but internally store R,RG01,... etc.  Can get
+    // more data from swizzle in the props. Often alpha doesn't store anything
+    // useful to view.
+
+    // DONE: may want to disable isPremul on block textures that already have
+    // premul in data or else premul is applied a second time to the visual
+
+    bool hasAlpha = _showSettings->numChannels >= 3;
+
+    bool isAlphaHidden = !hasAlpha;
+    bool isPremulHidden = !hasAlpha;
+    bool isCheckerboardHidden = !hasAlpha;
+
+    bool isSignedHidden = !isSignedFormat(_showSettings->originalFormat);
+    bool isPlayHidden = !_showSettings->isModel; // only for models
+    bool isDiffHidden = _showSettings->isModel; // only for images
+    
+    _actionPlay->setHidden(isPlayHidden);
+    _actionArray->setHidden(isArrayHidden);
+    _actionFace->setHidden(isFaceSliceHidden);
+    _actionMip->setHidden(isMipHidden);
+    _actionShowAll->setHidden(isShowAllHidden);
+    
+    _actionDiff->setHidden(isDiffHidden);
+    _actionItem->setHidden(isJumpToNextHidden);
+    _actionPrevItem->setHidden(isJumpToNextHidden);
+    
+    _actionCounterpart->setHidden(isJumpToCounterpartHidden);
+    _actionPrevCounterpart->setHidden(isJumpToPrevCounterpartHidden);
+    
+    _actionR->setHidden(isRedHidden);
+    _actionG->setHidden(isGreenHidden);
+    _actionB->setHidden(isBlueHidden);
+    _actionA->setHidden(isAlphaHidden);
+    
+    _actionPremul->setHidden(isPremulHidden);
+    _actionSigned->setHidden(isSignedHidden);
+    _actionChecker->setHidden(isCheckerboardHidden);
+    
+    // also need to call after each toggle
+    updateUIControlState();
+}
+
+void Data::updateUIControlState()
+{
+    // there is also mixed state, but not using that
+    auto On = true;
+    auto Off = false;
+    
+#define toState(x) (x) ? On : Off
+
+    auto showAllState = toState(_showSettings->isShowingAllLevelsAndMips);
+    auto premulState = toState(_showSettings->doShaderPremul);
+    auto signedState = toState(_showSettings->isSigned);
+    auto checkerboardState = toState(_showSettings->isCheckerboardShown);
+    auto previewState = toState(_showSettings->isPreview);
+    auto gridState = toState(_showSettings->isAnyGridShown());
+    auto wrapState = toState(_showSettings->isWrap);
+    auto debugState = toState(_showSettings->debugMode != DebugModeNone);
+    auto hudState = toState(_showSettings->isHudShown);
+    
+    TextureChannels &channels = _showSettings->channels;
+
+    auto redState = toState(channels == TextureChannels::ModeR001);
+    auto greenState = toState(channels == TextureChannels::Mode0G01);
+    auto blueState = toState(channels == TextureChannels::Mode00B1);
+    auto alphaState = toState(channels == TextureChannels::ModeAAA1);
+
+    auto arrayState = toState(_showSettings->arrayNumber > 0);
+    auto faceState = toState(_showSettings->faceNumber > 0);
+    auto mipState = toState(_showSettings->mipNumber > 0);
+
+    auto meshState = toState(_showSettings->meshNumber > 0);
+    auto meshChannelState = toState(_showSettings->shapeChannel > 0);
+    auto lightingState =
+        toState(_showSettings->lightingMode != LightingModeNone);
+    auto tangentState = toState(_showSettings->useTangent);
+
+    // TODO: shadow the state on these, so don't have to to go ObjC
+    //Renderer* renderer = (Renderer*)self.delegate;
+    auto playState = toState(_showSettings->isModel && _showSettings->isPlayAnimations);
+    auto verticalState = toState(_showSettings->isVerticalUI);
+    auto uiState = toState(_showSettings->isHideUI);
+    auto diffState = toState(_showSettings->isDiff);
+    
+    _actionVertical->setHighlight(verticalState);
+    
+    // TODO: pass boolean, and change in the call
+    _actionPlay->setHighlight(playState);
+    _actionHelp->setHighlight(Off);
+    _actionInfo->setHighlight(Off);
+    _actionHud->setHighlight(hudState);
+    
+    _actionArray->setHighlight(arrayState);
+    _actionFace->setHighlight(faceState);
+    _actionMip->setHighlight(mipState);
+    
+    // these never show check state
+    _actionItem->setHighlight(Off);
+    _actionPrevItem->setHighlight(Off);
+    
+    _actionCounterpart->setHighlight(Off);
+    _actionPrevCounterpart->setHighlight(Off);
+    
+    _actionHideUI->setHighlight(uiState); // note below button always off, menu has state
+    
+    _actionR->setHighlight(redState);
+    _actionG->setHighlight(greenState);
+    _actionB->setHighlight(blueState);
+    _actionA->setHighlight(alphaState);
+    
+    _actionShowAll->setHighlight(showAllState);
+    _actionPreview->setHighlight(previewState);
+    _actionDiff->setHighlight(diffState);
+    _actionShapeMesh->setHighlight(meshState);
+    _actionShapeChannel->setHighlight(meshChannelState);
+    _actionLighting->setHighlight(lightingState);
+    _actionWrap->setHighlight(wrapState);
+    _actionGrid->setHighlight(gridState);
+    _actionDebug->setHighlight(debugState);
+    _actionTangent->setHighlight(tangentState);
+    
+    _actionPremul->setHighlight(premulState);
+    _actionSigned->setHighlight(signedState);
+    _actionChecker->setHighlight(checkerboardState);
+}
+
+// TODO: convert to C++ actions, and then call into Base holding all this
+// move pan/zoom logic too.  Then use that as start of Win32 kramv.
+
+const Action* Data::actionFromMenu(kram_id menuItem) const
+{
+    const Action* action = nullptr;
+    
+    for (const auto& search: _actions) {
+        if (search.menuItem == menuItem) {
+            action = &search;
+            break;
+        }
+    }
+    
+    return action;
+}
+
+const Action* Data::actionFromButton(kram_id button) const
+{
+    const Action* action = nullptr;
+    
+    for (const auto& search: _actions) {
+        if (search.button == button) {
+            action = &search;
+            break;
+        }
+    }
+    
+    return action;
+}
+
+const Action* Data::actionFromKey(uint32_t keyCode) const
+{
+    const Action* action = nullptr;
+    
+    for (const auto& search: _actions) {
+        if (search.keyCode == keyCode) {
+            action = &search;
+            break;
+        }
+    }
+    
+    return action;
+}
+
+void Data::setLoadedText(string& text)
+{
+    text = "Loaded ";
+
+    string filename = _showSettings->lastFilename;
+    text += toFilenameShort(filename.c_str());
+
+    // archives and file systems have folders, split that off
+    string folderName;
+    const char* slashPos = strrchr(filename.c_str(), '/');
+    if (slashPos != nullptr) {
+        folderName = filename.substr(0, slashPos - filename.c_str());
+    }
+
+    if (!folderName.empty()) {
+        text += " in folder ";
+        text += folderName;
+    }
+
+    if (!_archiveName.empty()) {
+        text += " from archive ";
+        text += _archiveName;
+    }
+}
+
+void Data::initActions()
+{
+    // Don't reorder without also matching actionPtrs below
+    Action actions[] = {
+        Action("?", "Help", Key::Slash),
+        Action("I", "Info", Key::I),
+        Action("H", "Hud", Key::H),
+        Action("U", "UI", Key::U),
+        Action("V", "UI Vertical", Key::V),
+
+        Action("Q", "Quick Diff", Key::Q), // C/D already taken
+        Action("D", "Debug", Key::D),
+        Action("G", "Grid", Key::G),
+        Action("B", "Checkerboard", Key::B),
+        
+        Action("", "", Key::A), // sep
+
+        Action("P", "Preview", Key::P),
+        Action("W", "Wrap", Key::W),
+        Action("8", "Premul", Key::Num8),
+        Action("7", "Signed", Key::Num7),
+        
+        Action("", "", Key::A), // sep
+
+        Action("A", "Show All", Key::A),
+        Action("M", "Mip", Key::M),
+        Action("F", "Face", Key::F),
+        Action("Y", "Array", Key::Y),
+        
+        Action("↑", "Prev Item", Key::UpArrow),
+        Action("↓", "Next Item", Key::DownArrow),
+        Action("←", "Prev Counterpart", Key::LeftArrow),
+        Action("→", "Next Counterpart", Key::RightArrow),
+        
+        Action("R", "Reload", Key::R),
+        Action("0", "Fit", Key::Num0),
+
+        Action("", "", Key::A), // sep
+
+        Action(" ", "Play", Key::Space),
+        Action("6", "Shape UVPreview", Key::Num6),
+        Action("S", "Shape", Key::S),
+        Action("C", "Shape Channel", Key::C),
+        Action("L", "Lighting", Key::L),
+        Action("T", "Tangents", Key::T),
+
+        Action("", "", Key::A), // sep
+
+        // make these individual toggles and exclusive toggle off shift
+        Action("1", "Red", Key::Num1),
+        Action("2", "Green", Key::Num2),
+        Action("3", "Blue", Key::Num3),
+        Action("4", "Alpha", Key::Num4),
+    };
+    
+    // These have to be in same order as above.  May want to go back to search for text above.
+    Action** actionPtrs[] = {
+        &_actionHelp,
+        &_actionInfo,
+        &_actionHud,
+        &_actionHideUI,
+        &_actionVertical,
+       
+        &_actionDiff,
+        &_actionDebug,
+        &_actionGrid,
+        &_actionChecker,
+        
+        &_actionPreview,
+        &_actionWrap,
+        &_actionPremul,
+        &_actionSigned,
+        
+        &_actionShowAll,
+        &_actionMip,
+        &_actionFace,
+        &_actionArray,
+        
+        &_actionPrevItem,
+        &_actionItem,
+        &_actionPrevCounterpart,
+        &_actionCounterpart,
+        
+        &_actionReload,
+        &_actionFit,
+        
+        &_actionPlay,
+        &_actionShapeUVPreview,
+        &_actionShapeMesh,
+        &_actionShapeChannel,
+        &_actionLighting,
+        &_actionTangent,
+        
+        &_actionR,
+        &_actionG,
+        &_actionB,
+        &_actionA,
+    };
+
+    uint32_t numActions = ArrayCount(actions);
+    
+    // copy all of them to a vector, and then assign the action ptrs
+    for (int32_t i = 0; i < numActions; ++i) {
+        Action& action = actions[i];
+        const char* icon = action.icon;  // single char
+        
+        // skip separators
+        bool isSeparator = icon[0] == 0;
+        if (isSeparator) continue;
+        
+        _actions.push_back(action);
+    }
+
+    // now alias Actions to the vector above
+    //assert(_actions.size() == ArrayCount(actionPtrs));
+    for (int32_t i = 0; i < _actions.size(); ++i) {
+        *(actionPtrs[i]) = &_actions[i];
+    }
+}
+
+void Data::initDisabledButtons()
+{
+    // don't want these buttons showing up, menu only
+    _actionPrevItem->disableButton();
+    _actionItem->disableButton();
+    _actionPrevCounterpart->disableButton();
+    _actionCounterpart->disableButton();
+
+    _actionHud->disableButton();
+    _actionHelp->disableButton();
+    _actionHideUI->disableButton();
+    _actionVertical->disableButton();
+}
+
+void Data::updateEyedropper()
+{
+    if ((!_showSettings->isHudShown)) {
+        return;
+    }
+
+    if (_showSettings->imageBoundsX == 0) {
+        // TODO: this return will leave old hud text up
+        return;
+    }
+
+    // getting a lot of repeat cursor locations
+    // could have panning underneath cursor to deal with
+    if (_showSettings->lastCursorX == _showSettings->cursorX &&
+        _showSettings->lastCursorY == _showSettings->cursorY) {
+        return;
+    }
+    
+    if (_showSettings->isEyedropperFromDrawable()) {
+        // this only needs the cursor location, but can't supply uv to
+        // displayPixelData
+
+        //if (_showSettings->lastCursorX != _showSettings->cursorX ||
+        //    _showSettings->lastCursorY != _showSettings->cursorY) {
+            // TODO: this means pan/zoom doesn't update data, may want to track some
+            // absolute location in virtal canvas.
+
+            _showSettings->lastCursorX = _showSettings->cursorX;
+            _showSettings->lastCursorY = _showSettings->cursorY;
+
+            // This just samples from drawable, so no re-render is needed
+            showEyedropperData(float2m(0, 0));
+
+            // TODO: remove this, but only way to get drawSamples to execute right
+            // now, but then entire texture re-renders and that's not power efficient.
+            // Really just want to sample from the already rendered texture since
+            // content isn't animated.
+
+            //self.needsDisplay = YES;
+        //}
+
+        return;
+    }
+
+    // don't wait on renderer to update this matrix
+    float4x4 projectionViewModelMatrix =
+        computeImageTransform(_showSettings->panX,
+                                   _showSettings->panY,
+                                   _showSettings->zoom);
+
+    // convert to clip space, or else need to apply additional viewport transform
+    float halfX = _showSettings->viewSizeX * 0.5f;
+    float halfY = _showSettings->viewSizeY * 0.5f;
+
+    // sometimes get viewSizeX that's scaled by retina, and other times not.
+    // account for contentScaleFactor (viewSizeX is 2x bigger than cursorX on
+    // retina display) now passing down drawableSize instead of view.bounds.size
+    halfX /= (float)_showSettings->viewContentScaleFactor;
+    halfY /= (float)_showSettings->viewContentScaleFactor;
+
+    float4 cursor = float4m(_showSettings->cursorX, _showSettings->cursorY, 0.0f, 1.0f);
+    
+    float4x4 pixelToClipTfm =
+    {
+        (float4){ halfX,      0, 0, 0 },
+        (float4){ 0,     -halfY, 0, 0 },
+        (float4){ 0,          0, 1, 0 },
+        (float4){ halfX,  halfY, 0, 1 },
+    };
+    pixelToClipTfm = inverse(pixelToClipTfm);
+    
+    cursor = pixelToClipTfm * cursor;
+    
+    //float4 clipPoint;
+    //clipPoint.x = (point.x - halfX) / halfX;
+    //clipPoint.y = -(point.y - halfY) / halfY;
+
+    // convert point in window to point in texture
+    float4x4 mInv = inverse(projectionViewModelMatrix);
+    
+    float4 pixel = mInv * float4m(cursor.x, cursor.y, 1.0f, 1.0f);
+    pixel.xyz /= pixel.w; // in case perspective used
+
+    float ar = _showSettings->imageAspectRatio();
+    
+    // that's in model space (+/0.5f * ar, +/0.5f), so convert to texture space
+    pixel.x = (pixel.x / ar + 0.5f);
+    pixel.y = (-pixel.y + 0.5f);
+
+    //pixel.x *= 0.999f;
+    //pixel.y *= 0.999f;
+    
+    float2 uv = pixel.xy;
+
+    // pixels are 0 based
+    pixel.x *= _showSettings->imageBoundsX;
+    pixel.y *= _showSettings->imageBoundsY;
+
+    // TODO: finish this logic, need to account for gaps too, and then isolate to
+    // a given level and mip to sample
+    //    if (_showSettings->isShowingAllLevelsAndMips) {
+    //        pixel.x *= _showSettings->totalChunks();
+    //        pixel.y *= _showSettings->mipCount;
+    //    }
+
+    // TODO: clearing out the last px visited makes it hard to gather data
+    // put value on clipboard, or allow click to lock the displayed pixel and
+    // value. Might just change header to px(last): ...
+    string text;
+
+    bool outsideImageBounds =
+        pixel.x < 0.0f || pixel.x >= (float)_showSettings->imageBoundsX ||
+        pixel.y < 0.0f || pixel.y >= (float)_showSettings->imageBoundsY;
+    
+    // only display pixel if over image
+    if (outsideImageBounds) {
+        sprintf(text, "canvas: %d %d\n", (int32_t)pixel.x, (int32_t)pixel.y);
+        setEyedropperText(text.c_str());  // ick
+        _showSettings->outsideImageBounds = true;
+    }
+    else {
+        // Note: fromView: nil returns isFlipped coordinate, fromView:self flips it
+        // back.
+        
+        int32_t newX = (int32_t)pixel.x;
+        int32_t newY = (int32_t)pixel.y;
+        
+        if (_showSettings->outsideImageBounds ||
+            (_showSettings->textureLookupX != newX ||
+             _showSettings->textureLookupY != newY)) {
+            // Note: this only samples from the original texture via compute shaders
+            // so preview mode pixel colors are not conveyed.  But can see underlying
+            // data driving preview.
+            
+            _showSettings->outsideImageBounds = false;
+            
+            // %.0f rounds the value, but want truncation
+            _showSettings->textureLookupX = newX;
+            _showSettings->textureLookupY = newY;
+            
+            showEyedropperData(uv);
+        }
+    }
+}
+
+
+bool Data::handleEventAction(const Action* action, bool isShiftKeyDown, ActionState& actionState)
+{
+    // Some data depends on the texture data (isSigned, isNormal, ..)
+    bool isChanged = false;
+    bool isStateChanged = false;
+    
+    // TODO: fix isChanged to only be set when value changes
+    // f.e. clamped values don't need to re-render
+    string text;
+    
+    if (action == _actionVertical) {
+        _showSettings->isVerticalUI = !_showSettings->isVerticalUI;
+        text = _showSettings->isVerticalUI ? "Vert UI" : "Horiz UI";
+        
+        // just to update toggle state to Off
+        isStateChanged = true;
+    }
+    else if (action == _actionHideUI) {
+        // this means no image loaded yet
+        if (_noImageLoaded) {
+            return true;
+        }
+        
+        _showSettings->isHideUI = !_showSettings->isHideUI;
+        text = _showSettings->isHideUI ? "Hide UI" : "Show UI";
+        
+        // just to update toggle state to Off
+        isStateChanged = true;
+    }
+    
+    else if (action == _actionR) {
+        if (!action->isHidden) {
+            TextureChannels& channels = _showSettings->channels;
+            
+            if (channels == TextureChannels::ModeR001) {
+                channels = TextureChannels::ModeRGBA;
+                text = "Mask RGBA";
+            }
+            else {
+                channels = TextureChannels::ModeR001;
+                text = "Mask R001";
+            }
+            isChanged = true;
+        }
+        
+    }
+    else if (action == _actionG) {
+        if (!action->isHidden) {
+            TextureChannels& channels = _showSettings->channels;
+            
+            if (channels == TextureChannels::Mode0G01) {
+                channels = TextureChannels::ModeRGBA;
+                text = "Mask RGBA";
+            }
+            else {
+                channels = TextureChannels::Mode0G01;
+                text = "Mask 0G01";
+            }
+            isChanged = true;
+        }
+    }
+    else if (action == _actionB) {
+        if (!action->isHidden) {
+            TextureChannels& channels = _showSettings->channels;
+            
+            if (channels == TextureChannels::Mode00B1) {
+                channels = TextureChannels::ModeRGBA;
+                text = "Mask RGBA";
+            }
+            else {
+                channels = TextureChannels::Mode00B1;
+                text = "Mask 00B1";
+            }
+            
+            isChanged = true;
+        }
+    }
+    else if (action == _actionA) {
+        if (!action->isHidden) {
+            TextureChannels& channels = _showSettings->channels;
+            
+            if (channels == TextureChannels::ModeAAA1) {
+                channels = TextureChannels::ModeRGBA;
+                text = "Mask RGBA";
+            }
+            else {
+                channels = TextureChannels::ModeAAA1;
+                text = "Mask AAA1";
+            }
+            
+            isChanged = true;
+        }
+        
+    }
+    else if (action == _actionPlay) {
+        if (!action->isHidden) {
+            
+            _showSettings->isPlayAnimations = ! _showSettings->isPlayAnimations;
+            
+            //Renderer* renderer = (Renderer*)self.delegate;
+            //renderer.playAnimations = !renderer.playAnimations;
+            
+            text = _showSettings->isPlayAnimations ? "Play" : "Pause";
+            isChanged = true;
+        }
+    }
+    else if (action == _actionShapeUVPreview) {
+        
+        // toggle state
+        _showSettings->isUVPreview = !_showSettings->isUVPreview;
+        text = _showSettings->isUVPreview ? "Show UVPreview" : "Hide UvPreview";
+        isChanged = true;
+        
+        _showSettings->uvPreviewFrames = 10;
+    }
+    
+    else if (action == _actionShapeChannel) {
+        _showSettings->advanceShapeChannel(isShiftKeyDown);
+        
+        text = _showSettings->shapeChannelText();
+        isChanged = true;
+    }
+    else if (action == _actionLighting) {
+        _showSettings->advanceLightingMode(isShiftKeyDown);
+        text = _showSettings->lightingModeText();
+        isChanged = true;
+    }
+    else if (action == _actionTangent) {
+        _showSettings->useTangent = !_showSettings->useTangent;
+        if (_showSettings->useTangent)
+            text = "Vertex Tangents";
+        else
+            text = "Fragment Tangents";
+        isChanged = true;
+    }
+    else if (action == _actionDebug) {
+        _showSettings->advanceDebugMode(isShiftKeyDown);
+        text = _showSettings->debugModeText();
+        isChanged = true;
+    }
+    else if (action == _actionHelp) {
+        // display the chars for now
+        text =
+        "1234-rgba, Preview, Debug, A-show all\n"
+        "Info, Hud, Reload, 0-fit\n"
+        "Checker, Grid\n"
+        "Wrap, 8-signed, 9-premul\n"
+        "Mip, Face, Y-array\n"
+        "↓-next item, →-next counterpart\n"
+        "Lighting, S-shape, C-shape channel\n";
+        
+        // just to update toggle state to Off
+        isStateChanged = true;
+    }
+    
+    else if (action == _actionFit) {
+        float zoom;
+        // fit image or mip
+        if (isShiftKeyDown) {
+            zoom = 1.0f;
+        }
+        else {
+            // fit to topmost image
+            zoom = _showSettings->zoomFit;
+        }
+        
+        // This zoom needs to be checked against zoom limits
+        // there's a cap on the zoom multiplier.
+        // This is reducing zoom which expands the image.
+        zoom *= 1.0f / (1 << _showSettings->mipNumber);
+        
+        // even if zoom same, still do this since it resets the pan
+        _showSettings->zoom = zoom;
+        
+        _showSettings->panX = 0.0f;
+        _showSettings->panY = 0.0f;
+        
+        text = "Scale Image\n";
+        if (doPrintPanZoom) {
+            string tmp;
+            sprintf(tmp,
+                    "Pan %.3f,%.3f\n"
+                    "Zoom %.2fx\n",
+                    _showSettings->panX, _showSettings->panY, _showSettings->zoom);
+            text += tmp;
+        }
+        
+        isChanged = true;
+    }
+    // reload key (also a quick way to reset the settings)
+    else if (action == _actionReload) {
+        //bool success =
+        _delegate.loadFile();
+        
+        // reload at actual size
+        if (isShiftKeyDown) {
+            _showSettings->zoom = 1.0f;
+        }
+        
+        // Name change if image
+        if (_showSettings->isModel)
+            text = "Reload Model\n";
+        else
+            text = "Reload Image\n";
+        if (doPrintPanZoom) {
+            string tmp;
+            sprintf(tmp,
+                    "Pan %.3f,%.3f\n"
+                    "Zoom %.2fx\n",
+                    _showSettings->panX, _showSettings->panY, _showSettings->zoom);
+            text += tmp;
+        }
+        
+        isChanged = true;
+    }
+    else if (action == _actionPreview) {
+        _showSettings->isPreview = !_showSettings->isPreview;
+        isChanged = true;
+        text = "Preview ";
+        text += _showSettings->isPreview ? "On" : "Off";
+    }
+    else if (action == _actionDiff) {
+        _showSettings->isDiff = !_showSettings->isDiff;
+        isChanged = true;
+        text = "Diff ";
+        text += _showSettings->isPreview ? "On" : "Off";
+    }
+    // TODO: might switch c to channel cycle, so could just hit that
+    // and depending on the content, it cycles through reasonable channel masks
+    
+    // toggle checkerboard for transparency
+    else if (action == _actionChecker) {
+        if (action->isHidden) {
+            _showSettings->isCheckerboardShown = !_showSettings->isCheckerboardShown;
+            isChanged = true;
+            text = "Checker ";
+            text += _showSettings->isCheckerboardShown ? "On" : "Off";
+        }
+    }
+    
+    // toggle pixel grid when magnified above 1 pixel, can happen from mipmap
+    // changes too
+    else if (action == _actionGrid) {
+        static int grid = 0;
+        static const int kNumGrids = 7;
+        
+#define advanceGrid(g, dec) \
+grid = (grid + kNumGrids + (dec ? -1 : 1)) % kNumGrids
+        
+        // if block size is 1, then this shouldn't toggle
+        _showSettings->isBlockGridShown = false;
+        _showSettings->isAtlasGridShown = false;
+        _showSettings->isPixelGridShown = false;
+        
+        advanceGrid(grid, isShiftKeyDown);
+        
+        static const uint32_t gridSizes[kNumGrids] = {
+            0, 1, 4, 32, 64, 128, 256  // grid sizes
+        };
+        
+        if (grid == 0) {
+            sprintf(text, "Grid Off");
+        }
+        else if (grid == 1) {
+            _showSettings->isPixelGridShown = true;
+            
+            sprintf(text, "Pixel Grid 1x1");
+        }
+        else if (grid == 2 && _showSettings->blockX > 1) {
+            _showSettings->isBlockGridShown = true;
+            
+            sprintf(text, "Block Grid %dx%d", _showSettings->blockX,
+                    _showSettings->blockY);
+        }
+        else {
+            _showSettings->isAtlasGridShown = true;
+            
+            // want to be able to show altases tht have long entries derived from
+            // props but right now just a square grid atlas
+            _showSettings->gridSizeX = _showSettings->gridSizeY = gridSizes[grid];
+            
+            sprintf(text, "Atlas Grid %dx%d", _showSettings->gridSizeX,
+                    _showSettings->gridSizeY);
+        }
+        
+        isChanged = true;
+    }
+    else if (action == _actionShowAll) {
+        if (!action->isHidden) {
+            // TODO: have drawAllMips, drawAllLevels, drawAllLevelsAndMips
+            _showSettings->isShowingAllLevelsAndMips =
+            !_showSettings->isShowingAllLevelsAndMips;
+            isChanged = true;
+            text = "Show All ";
+            text += _showSettings->isShowingAllLevelsAndMips ? "On" : "Off";
+        }
+    }
+    
+    // toggle hud that shows name and pixel value under the cursor
+    // this may require calling setNeedsDisplay on the UILabel as cursor moves
+    else if (action == _actionHud) {
+        _showSettings->isHudShown = !_showSettings->isHudShown;
+        //[self updateHudVisibility];
+        // isChanged = true;
+        text = "Hud ";
+        text += _showSettings->isHudShown ? "On" : "Off";
+        isStateChanged = true;
+    }
+    
+    // info on the texture, could request info from lib, but would want to cache
+    // that info
+    else if (action == _actionInfo) {
+        if (_showSettings->isHudShown) {
+            
+            // also hide the file table, since this can be long
+            //[self hideFileTable];
+            
+            sprintf(text, "%s",
+                    isShiftKeyDown ? _showSettings->imageInfoVerbose.c_str()
+                    : _showSettings->imageInfo.c_str());
+        }
+        // just to update toggle state to Off
+        isStateChanged = true;
+    }
+    
+    // toggle wrap/clamp
+    else if (action == _actionWrap) {
+        // TODO: cycle through all possible modes (clamp, repeat, mirror-once,
+        // mirror-repeat, ...)
+        _showSettings->isWrap = !_showSettings->isWrap;
+        isChanged = true;
+        text = "Wrap ";
+        text += _showSettings->isWrap ? "On" : "Off";
+    }
+    
+    // toggle signed vs. unsigned
+    else if (action == _actionSigned) {
+        if (!action->isHidden) {
+            _showSettings->isSigned = !_showSettings->isSigned;
+            isChanged = true;
+            text = "Signed ";
+            text += _showSettings->isSigned ? "On" : "Off";
+        }
+    }
+    
+    // toggle premul alpha vs. unmul
+    else if (action == _actionPremul) {
+        if (!action->isHidden) {
+            _showSettings->doShaderPremul = !_showSettings->doShaderPremul;
+            isChanged = true;
+            text = "Premul ";
+            text += _showSettings->doShaderPremul ? "On" : "Off";
+        }
+    }
+    
+    else if (action == _actionItem || action == _actionPrevItem) {
+        if (!action->isHidden) {
+            // invert shift key for prev, since it's reverse
+            if (action == _actionPrevItem) {
+                isShiftKeyDown = !isShiftKeyDown;
+            }
+            
+            if (advanceFile(!isShiftKeyDown)) {
+                //_hudHidden = true;
+                //[self updateHudVisibility];
+                //[self setEyedropperText:""];
+                
+                isChanged = true;
+            
+                setLoadedText(text);
+            }
+        }
+    }
+    
+    else if (action == _actionCounterpart || action == _actionPrevCounterpart) {
+        if (!action->isHidden) {
+            // invert shift key for prev, since it's reverse
+            if (action == _actionPrevCounterpart) {
+                isShiftKeyDown = !isShiftKeyDown;
+            }
+            if (advanceCounterpart(!isShiftKeyDown)) {
+                //_hudHidden = true;
+                //[self updateHudVisibility];
+                //[self setEyedropperText:""];
+                
+                isChanged = true;
+                
+                setLoadedText(text);
+            }
+        }
+    }
+    
+    // test out different shapes
+    else if (action == _actionShapeMesh) {
+        if (_showSettings->meshCount > 1) {
+            _showSettings->advanceMeshNumber(isShiftKeyDown);
+            text = _showSettings->meshNumberText();
+            isChanged = true;
+        }
+    }
+    
+    // TODO: should probably have these wrap and not clamp to count limits
+    
+    // mip up/down
+    else if (action == _actionMip) {
+        if (_showSettings->mipCount > 1) {
+            if (isShiftKeyDown) {
+                _showSettings->mipNumber = std::max(_showSettings->mipNumber - 1, 0);
+            }
+            else {
+                _showSettings->mipNumber =
+                std::min(_showSettings->mipNumber + 1, _showSettings->mipCount - 1);
+            }
+            sprintf(text, "Mip %d/%d", _showSettings->mipNumber,
+                    _showSettings->mipCount);
+            isChanged = true;
+        }
+    }
+    
+    else if (action == _actionFace) {
+        // cube or cube array, but hit s to pick cubearray
+        if (_showSettings->faceCount > 1) {
+            if (isShiftKeyDown) {
+                _showSettings->faceNumber = std::max(_showSettings->faceNumber - 1, 0);
+            }
+            else {
+                _showSettings->faceNumber =
+                std::min(_showSettings->faceNumber + 1, _showSettings->faceCount - 1);
+            }
+            sprintf(text, "Face %d/%d", _showSettings->faceNumber,
+                    _showSettings->faceCount);
+            isChanged = true;
+        }
+    }
+    
+    else if (action == _actionArray) {
+        // slice
+        if (_showSettings->sliceCount > 1) {
+            if (isShiftKeyDown) {
+                _showSettings->sliceNumber = std::max(_showSettings->sliceNumber - 1, 0);
+            }
+            else {
+                _showSettings->sliceNumber =
+                std::min(_showSettings->sliceNumber + 1, _showSettings->sliceCount - 1);
+            }
+            sprintf(text, "Slice %d/%d", _showSettings->sliceNumber,
+                    _showSettings->sliceCount);
+            isChanged = true;
+        }
+        // array
+        else if (_showSettings->arrayCount > 1) {
+            if (isShiftKeyDown) {
+                _showSettings->arrayNumber = std::max(_showSettings->arrayNumber - 1, 0);
+            }
+            else {
+                _showSettings->arrayNumber =
+                std::min(_showSettings->arrayNumber + 1, _showSettings->arrayCount - 1);
+            }
+            sprintf(text, "Array %d/%d", _showSettings->arrayNumber,
+                    _showSettings->arrayCount);
+            isChanged = true;
+        }
+    }
+    else {
+        // non-handled action
+        return false;
+    }
+    
+    actionState.hudText = text;
+    actionState.isChanged = isChanged;
+    actionState.isStateChanged = isStateChanged;
+    
+    return true;
+}
+
+// only called on new or modstamp-changed image
+void Data::updateImageSettings(const string& fullFilename, KTXImage& image, MyMTLPixelFormat format)
+{
+    _showSettings->isModel = false;
+
+    // format may be trancoded to gpu-friendly format
+    MyMTLPixelFormat originalFormat = image.pixelFormat;
+
+    _showSettings->blockX = image.blockDims().x;
+    _showSettings->blockY = image.blockDims().y;
+
+    _showSettings->isSigned = isSignedFormat(format);
+    
+    TexContentType texContentType = findContentTypeFromFilename(fullFilename.c_str());
+    _showSettings->texContentType = texContentType;
+    //_showSettings->isSDF = isSDF;
+
+    // textures are already premul, so don't need to premul in shader
+    // should really have 3 modes, unmul, default, premul
+    bool isPNG = isPNGFilename(fullFilename.c_str());
+
+    _showSettings->isPremul = image.isPremul();
+    _showSettings->doShaderPremul = false;
+    if (texContentType == TexContentTypeAlbedo && isPNG) {
+        _showSettings->doShaderPremul =
+            true;  // convert to premul in shader, so can see other channels
+    }
+
+    int32_t numChannels = numChannelsOfFormat(originalFormat);
+    _showSettings->numChannels = numChannels;
+
+    // TODO: identify if texture holds normal data from the props
+    // have too many 4 channel normals that shouldn't swizzle like this
+    // kramTextures.py is using etc2rg on iOS for now, and not astc.
+
+    _showSettings->isSwizzleAGToRG = false;
+
+    // For best sdf and normal reconstruct from ASTC or BC3, must use RRR1 and
+    // GGGR or RRRG BC1nm multiply r*a in the shader, but just use BC5 anymore.
+    //    if (isASTCFormat(originalFormat) && isNormal) {
+    //        // channels after = "ag01"
+    //        _showSettings->isSwizzleAGToRG = true;
+    //    }
+
+    // can derive these from texture queries
+    _showSettings->mipCount = (int32_t)image.mipLevels.size();
+    _showSettings->faceCount = (image.textureType == MyMTLTextureTypeCube ||
+                                image.textureType == MyMTLTextureTypeCubeArray)
+                                   ? 6
+                                   : 0;
+    _showSettings->arrayCount = (int32_t)image.header.numberOfArrayElements;
+    _showSettings->sliceCount = (int32_t)image.depth;
+
+    _showSettings->imageBoundsX = (int32_t)image.width;
+    _showSettings->imageBoundsY = (int32_t)image.height;
+}
+
+
+
+
+float zoom3D = 1.0f;
+
+void Data::updateProjTransform()
+{
+    // Want to move to always using perspective even for 2d images, but still more math
+    // to work out to keep zoom to cursor working.
+#if USE_PERSPECTIVE
+    float aspect = _showSettings->viewSizeX /  (float)_showSettings->viewSizeY;
+    _projectionMatrix = perspective_rhs(90.0f * (M_PI / 180.0f), aspect, 0.1f, 100000.0f, _showSettings->isReverseZ);
+
+    // This was used to reset zoom to a baseline that had a nice zoom.  But little connected to it now.
+    // Remember with rotation, the bounds can hit the nearClip.  Note all shapes are 0.5 radius,
+    // so at 1 this is 2x to leave gap around the shape for now.
+    float shapeHeightInY = 1;
+    _showSettings->zoomFit = shapeHeightInY; // / (float)_showSettings->viewSizeY;
+
+#else
+
+    if (_showSettings->isModel) {
+        float aspect = _showSettings->viewSizeX /  (float)_showSettings->viewSizeY;
+        _projectionMatrix = perspective_rhs(90.0f * (M_PI / 180.0f), aspect, 0.1f, 100000.0f, _showSettings->isReverseZ);
+
+        _showSettings->zoomFit = 1;
+    }
+    else {
+        _projectionMatrix =
+            orthographic_rhs(_showSettings->viewSizeX, _showSettings->viewSizeY, 0.1f,
+                             100000.0f, _showSettings->isReverseZ);
+
+        // DONE: adjust zoom to fit the entire image to the window
+        _showSettings->zoomFit =
+            std::min((float)_showSettings->viewSizeX, (float)_showSettings->viewSizeY) /
+            std::max(1.0f, std::max((float)_showSettings->imageBoundsX,
+                                    (float)_showSettings->imageBoundsY));
+        
+        static bool useImageAndViewBounds = true;
+        if (useImageAndViewBounds) {
+            float invWidth = 1.0f / std::max(1.0f, (float)_showSettings->imageBoundsX);
+            float invHeight = 1.0f / std::max(1.0f, (float)_showSettings->imageBoundsY);
+
+            // DONE: adjust zoom to fit the entire image to the window
+            // the best fit depends on dimension of image and window
+            _showSettings->zoomFit =
+                std::min( (float)_showSettings->viewSizeX * invWidth,
+                          (float)_showSettings->viewSizeY * invHeight);
+        }
+    }
+#endif
+}
+
+void Data::resetSomeImageSettings(bool isNewFile)
+{
+    // only reset these on new texture, but have to revalidate
+    if (isNewFile) {
+        // then can manipulate this after loading
+        _showSettings->mipNumber = 0;
+        _showSettings->faceNumber = 0;
+        _showSettings->arrayNumber = 0;
+        _showSettings->sliceNumber = 0;
+        
+        _showSettings->channels = TextureChannels::ModeRGBA;
+        
+        // wish could keep existing setting, but new texture might not
+        // be supported debugMode for new texture
+        _showSettings->debugMode = DebugMode::DebugModeNone;
+        
+        _showSettings->shapeChannel = ShapeChannel::ShapeChannelNone;
+    }
+    else {
+        // reloaded file may have different limits
+        _showSettings->mipNumber =
+        std::min(_showSettings->mipNumber, _showSettings->mipCount);
+        _showSettings->faceNumber =
+        std::min(_showSettings->faceNumber, _showSettings->faceCount);
+        _showSettings->arrayNumber =
+        std::min(_showSettings->arrayNumber, _showSettings->arrayCount);
+        _showSettings->sliceNumber =
+        std::min(_showSettings->sliceNumber, _showSettings->sliceCount);
+    }
+    
+    updateProjTransform();
+    
+    
+    // this controls viewMatrix (global to all visible textures)
+    _showSettings->panX = 0.0f;
+    _showSettings->panY = 0.0f;
+    
+    _showSettings->zoom = _showSettings->zoomFit;
+    
+    // Y is always 1.0 on the plane, so scale to imageBoundsY
+    // plane is already a non-uniform size, so can keep uniform scale
+    
+    // have one of these for each texture added to the viewer
+    //float scaleX = MAX(1, _showSettings->imageBoundsX);
+    float scaleY = std::max(1, _showSettings->imageBoundsY);
+    float scaleX = scaleY;
+    float scaleZ = scaleY;
+    
+    _modelMatrix2D =
+    float4x4(float4m(scaleX, scaleY, scaleZ, 1.0f)); // uniform scale
+    _modelMatrix2D = _modelMatrix2D *
+    matrix4x4_translation(0.0f, 0.0f, -1.0);  // set z=-1 unit back
+    
+    // uniform scaled 3d primitive
+    float scale = scaleY; // MAX(scaleX, scaleY);
+    
+    // store the zoom into thew view matrix
+    // fragment tangents seem to break down at high model scale due to precision
+    // differences between worldPos and uv
+    //    static bool useZoom3D = false;
+    //    if (useZoom3D) {
+    //        zoom3D = scale;  // * _showSettings->viewSizeX / 2.0f;
+    //        scale = 1.0;
+    //    }
+    
+    _modelMatrix3D = float4x4(float4m(scale, scale, scale, 1.0f));  // uniform scale
+    _modelMatrix3D =
+    _modelMatrix3D *
+    matrix4x4_translation(0.0f, 0.0f, -1.0f);  // set z=-1 unit back
+}
+
+void Data::updateTransforms()
+{
+    // scale
+    float zoom = _showSettings->zoom;
+    
+    // translate
+    float4x4 panTransform =
+        matrix4x4_translation(-_showSettings->panX, _showSettings->panY, 0.0);
+
+    if (_showSettings->is3DView) {
+        _viewMatrix3D = float4x4(float4m(zoom, zoom, 1.0f, 1.0f));  // non-uniform
+        _viewMatrix3D = panTransform * _viewMatrix3D;
+        
+        _viewMatrix = _viewMatrix3D;
+        
+        // obj specific
+        _modelMatrix = _modelMatrix3D;
+    }
+    else {
+        _viewMatrix2D = float4x4(float4m(zoom, zoom, 1.0f, 1.0f));
+        _viewMatrix2D = panTransform * _viewMatrix2D;
+        
+        _viewMatrix = _viewMatrix2D;
+        
+        // obj specific
+        _modelMatrix = _modelMatrix2D;
+    }
+    
+    // viewMatrix should typically be the inverse
+    //_viewMatrix = simd_inverse(_viewMatrix);
+    
+    _projectionViewMatrix = _projectionMatrix * _viewMatrix;
+    
+    // cache the camera position
+    _cameraPosition =
+        inverse(_viewMatrix).columns[3].xyz;  // this is all ortho
+    
+    // obj specific
+    _modelMatrixInvScale2 = inverseScaleSquared(_modelMatrix);
+    _showSettings->isInverted = _modelMatrixInvScale2.w < 0.0f;
+}
+
+float4x4 Data::computeImageTransform(float panX, float panY, float zoom)
+{
+    // translate
+    float4x4 panTransform = matrix4x4_translation(-panX, panY, 0.0);
+
+    // non-uniform scale is okay here, only affects ortho volume
+    // setting this to uniform zoom and object is not visible, zoom can be 20x in
+    // x and y
+    if (_showSettings->is3DView) {
+        zoom *= zoom3D;
+    }
+
+    float4x4 viewMatrix = float4x4(float4m(zoom, zoom, 1.0f, 1.0f));
+    viewMatrix = panTransform * viewMatrix;
+
+    // scale
+    if (_showSettings->is3DView) {
+        return _projectionMatrix * viewMatrix * _modelMatrix3D;
+    }
+    else {
+        return _projectionMatrix * viewMatrix * _modelMatrix2D;
+    }
+}
+
+
+void Data::doZoomMath(float newZoom, float2& newPan)
+{
+    // transform the cursor to texture coordinate, or clamped version if outside
+    float4x4 projectionViewModelMatrix = computeImageTransform(
+                                    _showSettings->panX,
+                                    _showSettings->panY,
+                                    _showSettings->zoom);
+
+    // convert from pixel to clip space
+    float halfX = _showSettings->viewSizeX * 0.5f;
+    float halfY = _showSettings->viewSizeY * 0.5f;
+    
+    // sometimes get viewSizeX that's scaled by retina, and other times not.
+    // account for contentScaleFactor (viewSizeX is 2x bigger than cursorX on
+    // retina display) now passing down drawableSize instead of view.bounds.size
+    halfX /= (float)_showSettings->viewContentScaleFactor;
+    halfY /= (float)_showSettings->viewContentScaleFactor;
+    
+    float4x4 viewportMatrix =
+    {
+        (float4){ halfX,      0, 0, 0 },
+        (float4){ 0,     -halfY, 0, 0 },
+        (float4){ 0,          0, 1, 0 },
+        (float4){ halfX,  halfY, 0, 1 },
+    };
+    viewportMatrix = inverse(viewportMatrix);
+    
+    float4 cursor = float4m(_showSettings->cursorX, _showSettings->cursorY, 0.0f, 1.0f);
+    
+    cursor = viewportMatrix * cursor;
+    
+    //NSPoint clipPoint;
+    //clipPoint.x = (point.x - halfX) / halfX;
+    //clipPoint.y = -(point.y - halfY) / halfY;
+
+    // convert point in window to point in model space
+    float4x4 mInv = inverse(projectionViewModelMatrix);
+    
+    float4 pixel = mInv * float4m(cursor.x, cursor.y, 1.0f, 1.0f);
+    pixel.xyz /= pixel.w; // in case perspective used
+
+    // allow pan to extend to show all
+    float ar = _showSettings->imageAspectRatio();
+    float maxX = 0.5f * ar;
+    float minY = -0.5f;
+    if (_showSettings->isShowingAllLevelsAndMips) {
+        maxX += ar * 1.0f * (_showSettings->totalChunks() - 1);
+        minY -= 1.0f * (_showSettings->mipCount - 1);
+    }
+
+    // X bound may need adjusted for ar ?
+    // that's in model space (+/0.5f, +/0.5f), so convert to texture space
+    pixel.x = std::clamp(pixel.x, -0.5f * ar, maxX);
+    pixel.y = std::clamp(pixel.y, minY, 0.5f);
+
+    // now that's the point that we want to zoom towards
+    // No checks on this zoom
+    // old - newPosition from the zoom
+
+    // normalized coords to pixel coords
+    pixel.x *= _showSettings->imageBoundsX;
+    pixel.y *= _showSettings->imageBoundsY;
+    
+    // this fixes pinch-zoom on cube which are 6:1
+    pixel.x /= ar;
+    
+#if USE_PERSPECTIVE
+    // TODO: this doesn't work for perspective
+    newPan.x = _showSettings->panX - (_showSettings->zoom - newZoom) * pixel.x;
+    newPan.y = _showSettings->panY + (_showSettings->zoom - newZoom) * pixel.y;
+#else
+    newPan.x = _showSettings->panX - (_showSettings->zoom - newZoom) * pixel.x;
+    newPan.y = _showSettings->panY + (_showSettings->zoom - newZoom) * pixel.y;
+#endif
+}
+
+
+
 }  // namespace kram
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index 47538423..70b2b852 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -302,4 +302,291 @@ float4x4 matrix4x4_rotation(float radians, vector_float3 axis);
 void printChannels(string &tmp, const string &label, float4 c,
                    int32_t numChannels, bool isFloat, bool isSigned);
 
+
+enum Key {
+    A = 0x00,
+    S = 0x01,
+    D = 0x02,
+    F = 0x03,
+    H = 0x04,
+    G = 0x05,
+    Z = 0x06,
+    X = 0x07,
+    C = 0x08,
+    V = 0x09,
+    B = 0x0B,
+    Q = 0x0C,
+    W = 0x0D,
+    E = 0x0E,
+    R = 0x0F,
+    Y = 0x10,
+    T = 0x11,
+    O = 0x1F,
+    U = 0x20,
+    I = 0x22,
+    P = 0x23,
+    L = 0x25,
+    J = 0x26,
+    K = 0x28,
+    N = 0x2D,
+    M = 0x2E,
+
+    // https://eastmanreference.com/complete-list-of-applescript-key-codes
+    Num1 = 0x12,
+    Num2 = 0x13,
+    Num3 = 0x14,
+    Num4 = 0x15,
+    Num5 = 0x17,
+    Num6 = 0x16,
+    Num7 = 0x1A,
+    Num8 = 0x1C,
+    Num9 = 0x19,
+    Num0 = 0x1D,
+
+    LeftBrace = 0x21,
+    RightBrace = 0x1E,
+
+    LeftBracket = 0x21,
+    RightBracket = 0x1E,
+
+    Quote = 0x27,
+    Semicolon = 0x29,
+    Backslash = 0x2A,
+    Comma = 0x2B,
+    Slash = 0x2C,
+
+    LeftArrow = 0x7B,
+    RightArrow = 0x7C,
+    DownArrow = 0x7D,
+    UpArrow = 0x7E,
+    
+    Space = 0x31,
+    Escape = 0x35,
+};
+
+using kram_id = void*;
+
+// This makes dealing with ui much simpler
+class Action {
+public:
+    Action(const char* icon_, const char* tip_, Key keyCode_)
+        : icon(icon_), tip(tip_), keyCode(keyCode_) {}
+    
+    const char* icon;
+    const char* tip;
+
+    // Note these are not ref-counted, but AppKit already does
+    kram_id button; // NSButton*
+    kram_id menuItem; // NSMenuItem*
+    Key keyCode;
+    
+    bool isHighlighted = false;
+    bool isHidden = false;
+    bool isButtonDisabled = false;
+    
+    // This have platform impl
+    void setHighlight(bool enable);
+    void setHidden(bool enable);
+    void disableButton();
+};
+
+// This is an open archive
+struct FileContainer {
+    // allow zip files to be dropped and opened, and can advance through bundle
+    // content.
+    
+    // TODO: Add FileHelper if acrhive file is networked, but would require
+    // full load to memory.
+    
+    ZipHelper zip;
+    MmapHelper zipMmap;
+};
+
+struct ActionState
+{
+    string hudText;
+    bool isChanged;
+    bool isStateChanged;
+};
+
+enum TextSlot
+{
+    kTextSlotHud,
+    kTextSlotEyedropper
+};
+
+struct File {
+public:
+    File(const char* name_, int32_t urlIndex_);
+    
+    // Note: not sorting by urlIndex currently
+    bool operator <(const File& rhs) const
+    {
+        // sort by shortname
+        int compare = strcasecmp(nameShort.c_str(), rhs.nameShort.c_str());
+        if ( compare != 0 )
+            return compare < 0;
+        
+        // if equal, then sort by longname
+        return strcasecmp(name.c_str(), rhs.name.c_str()) < 0;
+    }
+    
+public:
+    string name;
+    int32_t urlIndex;
+    string nameShort; // would alias name, but too risky
+};
+
+// This allows wrapping all the ObjC stuff
+struct DataDelegate
+{
+    bool loadFile(bool clear = false);
+    
+    bool loadModelFile(const char* filename);
+   
+    bool loadTextureFromImage(const char* fullFilename, double timestamp, KTXImage& image, KTXImage* imageNormal, bool isArchive);
+    
+public:
+    kram_id view; // MyMTKView*
+};
+
+struct Data {
+    Data();
+    ~Data();
+    
+    bool loadAtlasFile(const char* filename);
+    bool listFilesInArchive(int32_t urlIndex);
+    bool openArchive(const char * zipFilename, int32_t urlIndex);
+
+    bool hasCounterpart(bool increment);
+    bool advanceCounterpart(bool increment);
+    bool advanceFile(bool increment);
+
+    bool findFilename(const string& filename);
+    bool findFilenameShort(const string& filename);
+    const Atlas* findAtlasAtCursor(float2 pt);
+    bool isArchive() const;
+    bool loadFile();
+    
+    bool handleEventAction(const Action* action, bool isShiftKeyDown, ActionState& actionState);
+    void updateUIAfterLoad();
+    void updateUIControlState();
+
+    const Action* actionFromMenu(kram_id menuItem) const;
+    const Action* actionFromButton(kram_id button) const;
+    const Action* actionFromKey(uint32_t keyCodes) const;
+
+    void setLoadedText(string& text);
+
+    void initActions();
+    vector<Action>& actions() { return _actions; }
+    void initDisabledButtons();
+
+    string textFromSlots() const;
+    void setTextSlot(TextSlot slot, const char* text);
+
+    void loadFilesFromUrls(vector<string>& urls, bool skipSubdirs);
+    void listArchivesInFolder(const string& archiveFilename, vector<File>& archiveFiles, bool skipSubdirs);
+    void listFilesInFolder(const string& folderFilename, int32_t urlIndex, bool skipSubdirs);
+
+    // See these to split off ObjC code
+    DataDelegate _delegate;
+    
+    void updateEyedropper();
+    
+    float4x4 computeImageTransform(float panX, float panY, float zoom);
+    void updateProjTransform();
+    void resetSomeImageSettings(bool isNewFile);
+    void updateImageSettings(const string& fullFilename, KTXImage& image, MyMTLPixelFormat format);
+
+    void doZoomMath(float newZoom, float2& newPan);
+
+private:
+    bool loadFileFromArchive();
+
+public:
+    void showEyedropperData(const float2& uv);
+    void setEyedropperText(const char * text);
+    void updateTransforms();
+
+    //----------------
+    float4x4 _projectionMatrix;
+    
+    float4x4 _projectionViewMatrix;
+    float3 _cameraPosition;
+    
+    float4x4 _viewMatrix;
+    float4x4 _viewMatrix2D;
+    float4x4 _viewMatrix3D;
+
+    // object specific
+    float4x4 _modelMatrix;
+    float4 _modelMatrixInvScale2;
+    float4x4 _modelMatrix2D;
+    float4x4 _modelMatrix3D;
+
+    //----------------
+    
+    vector<string> _textSlots;
+    ShowSettings* _showSettings = nullptr;
+
+    bool _noImageLoaded = true;
+    string _archiveName; // archive or blank
+    
+    // folders and archives and multi-drop files are filled into this
+    vector<File> _files;
+    int32_t _fileIndex = 0;
+   
+    // One of these per url in _urlss
+    vector<FileContainer*> _containers;
+    vector<string> _urls;
+    
+    Action* _actionPlay;
+    Action* _actionShapeUVPreview;
+    Action* _actionHelp;
+    Action* _actionInfo;
+    Action* _actionHud;
+    Action* _actionShowAll;
+    
+    Action* _actionPreview;
+    Action* _actionWrap;
+    Action* _actionPremul;
+    Action* _actionSigned;
+    
+    Action* _actionDiff;
+    Action* _actionDebug;
+    Action* _actionGrid;
+    Action* _actionChecker;
+    Action* _actionHideUI;
+    Action* _actionVertical;
+    
+    Action* _actionMip;
+    Action* _actionFace;
+    Action* _actionArray;
+    Action* _actionItem;
+    Action* _actionPrevItem;
+    Action* _actionCounterpart;
+    Action* _actionPrevCounterpart;
+    Action* _actionReload;
+    Action* _actionFit;
+    
+    Action* _actionShapeMesh;
+    Action* _actionShapeChannel;
+    Action* _actionLighting;
+    Action* _actionTangent;
+    
+    Action* _actionR;
+    Action* _actionG;
+    Action* _actionB;
+    Action* _actionA;
+    
+    vector<Action> _actions;
+};
+
+bool isSupportedModelFilename(const char* filename);
+bool isSupportedArchiveFilename(const char* filename);
+bool isSupportedJsonFilename(const char* filename);
+
+extern bool doPrintPanZoom;
+
 }  // namespace kram
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index e1b8823f..4f4d39a0 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -30,262 +30,139 @@
 //#include "KramImage.h"
 #include "KramViewerBase.h"
 
-#include "simdjson/simdjson.h"
 
 #include <mutex> // for recursive_mutex
 
 using mymutex = std::recursive_mutex;
 using mylock = std::unique_lock<mymutex>;
 
-#ifdef NDEBUG
-static bool doPrintPanZoom = false;
-#else
-static bool doPrintPanZoom = false;
-#endif
-
 #include <UniformTypeIdentifiers/UTType.h>
 
 using namespace simd;
 using namespace kram;
 using namespace NAMESPACE_STL;
 
-#define ArrayCount(x) (sizeof(x) / sizeof(x[0]))
 
-static string filenameNoExtension(const char* filename)
-{
-    const char* dotPosStr = strrchr(filename, '.');
-    if (dotPosStr == nullptr)
-        return filename;
-    auto dotPos = dotPosStr - filename;
-    
-    // now chop off the extension
-    string filenameNoExt = filename;
-    return filenameNoExt.substr(0, dotPos);
-}
 
-static void findPossibleNormalMapFromAlbedoFilename(const char* filename, vector<string>& normalFilenames)
+
+struct MouseData
 {
-    normalFilenames.clear();
-    
-    string filenameShort = filename;
+    NSPoint originPoint;
+    NSPoint oldPoint;
+    NSPoint newPoint;
     
-    const char* ext = strrchr(filename, '.');
+    NSPoint pan;
+};
 
-    const char* dotPosStr = strrchr(filenameShort.c_str(), '.');
-    if (dotPosStr == nullptr)
-        return;
-    
-    auto dotPos = dotPosStr - filenameShort.c_str();
-    
-    // now chop off the extension
-    filenameShort = filenameShort.substr(0, dotPos);
+//-------------
 
-    const char* searches[] = { "-a", "-d", "_Color", "_baseColor" };
+
+void Action::setHighlight(bool enable) {
+    isHighlighted = enable;
     
-    for (uint32_t i = 0; i < ArrayCount(searches); ++i) {
-        const char* search = searches[i];
-        if (endsWith(filenameShort, search)) {
-            filenameShort = filenameShort.substr(0, filenameShort.length()-strlen(search));
-            break;
-        }
-    }
-     
-    const char* suffixes[] = { "-n", "_normal", "_Normal" };
+    auto On = 1; // NSControlStateValueOn;
+    auto Off = 0; // NSControlStateValueOff;
     
-    string normalFilename;
-    for (uint32_t i = 0; i < ArrayCount(suffixes); ++i) {
-        const char* suffix = suffixes[i];
-        
-        // may need to try various names, and see if any exist
-        normalFilename = filenameShort;
-        normalFilename += suffix;
-        normalFilename += ext;
-        
-        normalFilenames.push_back(normalFilename);
+    if (!isButtonDisabled) {
+        ((__bridge NSButton*)button).state = enable ? On : Off;
     }
+    ((__bridge NSMenuItem*)menuItem).state = enable ? On : Off;
 }
 
-// this aliases the existing string, so can't chop extension
-inline const char* toFilenameShort(const char* filename) {
-    const char* filenameShort = strrchr(filename, '/');
-    if (filenameShort == nullptr) {
-        filenameShort = filename;
-    }
-    else {
-        filenameShort += 1;
+void Action::setHidden(bool enable) {
+    isHidden = enable;
+    
+    if (!isButtonDisabled) {
+        ((__bridge NSButton*)button).hidden = enable;
     }
-    return filenameShort;
+    ((__bridge NSMenuItem*)menuItem).hidden = enable;
 }
 
-static const vector<const char*> supportedModelExt = {
-#if USE_GLTF
-     ".gltf",
-     ".glb",
-#endif
-#if USE_USD
-    ".gltf",
-    ".glb",
-#endif
-};
+void Action::disableButton() {
+    ((__bridge NSButton*)button).hidden = true;
+    isButtonDisabled = true;
+}
 
-struct File {
-public:
-    File(const char* name_, int32_t urlIndex_)
-        : name(name_), urlIndex(urlIndex_), nameShort(toFilenameShort(name_))
-    {
-    }
+
+// These are using NSFileManager to list files, so must be ObjC
+void Data::listArchivesInFolder(const string& folderFilename, vector<File>& archiveFiles, bool skipSubdirs)
+{
+    NSURL* url = [NSURL fileURLWithPath:[NSString stringWithUTF8String:folderFilename.c_str()]];
+        
+    NSDirectoryEnumerationOptions options = NSDirectoryEnumerationSkipsHiddenFiles;
+    if (skipSubdirs)
+        options |= NSDirectoryEnumerationSkipsSubdirectoryDescendants;
     
-    // Note: not sorting by urlIndex currently
-    bool operator <(const File& rhs) const
-    {
-        // sort by shortname
-        int compare = strcasecmp(nameShort.c_str(), rhs.nameShort.c_str());
-        if ( compare != 0 )
-            return compare < 0;
+    NSDirectoryEnumerator* directoryEnumerator =
+    [[NSFileManager defaultManager]
+     enumeratorAtURL:url
+     includingPropertiesForKeys:[NSArray array]
+     options:options
+     errorHandler:  // nil
+     ^BOOL(NSURL *urlArg, NSError *error) {
+        macroUnusedVar(urlArg);
+        macroUnusedVar(error);
         
-        // if equal, then sort by longname
-        return strcasecmp(name.c_str(), rhs.name.c_str()) < 0;
-    }
+        // handle error
+        return false;
+    }];
     
-public:
-    string name;
-    int32_t urlIndex;
-    string nameShort; // would alias name, but too risky
-};
-
-bool isSupportedModelFilename(const char* filename) {
-    for (const char* ext: supportedModelExt) {
-        if (endsWithExtension(filename, ext)) {
-            return true;
+    // only display models in folder if found, ignore the png/jpg files
+    while (NSURL* fileOrDirectoryURL = [directoryEnumerator nextObject]) {
+        const char* name = fileOrDirectoryURL.fileSystemRepresentation;
+        
+        bool isArchive = isSupportedArchiveFilename(name);
+        if (isArchive)
+        {
+            archiveFiles.emplace_back(File(name,0));
         }
     }
-    return false;
-}
-bool isSupportedArchiveFilename(const char* filename) {
-    return endsWithExtension(filename, ".zip");
-}
-
-bool isSupportedJsonFilename(const char* filename) {
-    return endsWith(filename, "-atlas.json");
 }
 
-struct MouseData
+void Data::listFilesInFolder(const string& archiveFilename, int32_t urlIndex, bool skipSubdirs)
 {
-    NSPoint originPoint;
-    NSPoint oldPoint;
-    NSPoint newPoint;
-    
-    NSPoint pan;
-};
-
-//-------------
-
-enum Key {
-    A = 0x00,
-    S = 0x01,
-    D = 0x02,
-    F = 0x03,
-    H = 0x04,
-    G = 0x05,
-    Z = 0x06,
-    X = 0x07,
-    C = 0x08,
-    V = 0x09,
-    B = 0x0B,
-    Q = 0x0C,
-    W = 0x0D,
-    E = 0x0E,
-    R = 0x0F,
-    Y = 0x10,
-    T = 0x11,
-    O = 0x1F,
-    U = 0x20,
-    I = 0x22,
-    P = 0x23,
-    L = 0x25,
-    J = 0x26,
-    K = 0x28,
-    N = 0x2D,
-    M = 0x2E,
-
-    // https://eastmanreference.com/complete-list-of-applescript-key-codes
-    Num1 = 0x12,
-    Num2 = 0x13,
-    Num3 = 0x14,
-    Num4 = 0x15,
-    Num5 = 0x17,
-    Num6 = 0x16,
-    Num7 = 0x1A,
-    Num8 = 0x1C,
-    Num9 = 0x19,
-    Num0 = 0x1D,
-
-    LeftBrace = 0x21,
-    RightBrace = 0x1E,
-
-    LeftBracket = 0x21,
-    RightBracket = 0x1E,
-
-    Quote = 0x27,
-    Semicolon = 0x29,
-    Backslash = 0x2A,
-    Comma = 0x2B,
-    Slash = 0x2C,
-
-    LeftArrow = 0x7B,
-    RightArrow = 0x7C,
-    DownArrow = 0x7D,
-    UpArrow = 0x7E,
-    
-    Space = 0x31,
-    Escape = 0x35,
-};
-
-
-// This makes dealing with ui much simpler
-class Action {
-public:
-    Action(const char* icon_, const char* tip_, Key keyCode_)
-        : icon(icon_), tip(tip_), keyCode(keyCode_) {}
+    // Hope this hsas same permissions
+    NSURL* url = [NSURL fileURLWithPath:[NSString stringWithUTF8String:archiveFilename.c_str()]];
     
-    const char* icon;
-    const char* tip;
-
-    // Note these are not ref-counted, but AppKit already does
-    id button; // NSButton*
-    id menuItem; // NSMenuItem*
-    Key keyCode;
+    NSDirectoryEnumerationOptions options = NSDirectoryEnumerationSkipsHiddenFiles;
+    if (skipSubdirs)
+        options |= NSDirectoryEnumerationSkipsSubdirectoryDescendants;
     
-    bool isHighlighted = false;
-    bool isHidden = false;
-    bool isButtonDisabled = false;
+    NSDirectoryEnumerator* directoryEnumerator =
+    [[NSFileManager defaultManager]
+     enumeratorAtURL:url
+     includingPropertiesForKeys:[NSArray array]
+     options:options
+     errorHandler:  // nil
+     ^BOOL(NSURL *urlArg, NSError *error) {
+        macroUnusedVar(urlArg);
+        macroUnusedVar(error);
+        
+        // handle error - don't change to folder if devoid of valid content
+        return false;
+    }];
     
-    void setHighlight(bool enable) {
-        isHighlighted = enable;
+    while (NSURL* fileOrDirectoryURL = [directoryEnumerator nextObject]) {
+        const char* name = fileOrDirectoryURL.fileSystemRepresentation;
         
-        auto On = NSControlStateValueOn;
-        auto Off = NSControlStateValueOff;
+        bool isValid = isSupportedFilename(name);
         
-        if (!isButtonDisabled) {
-            ((NSButton*)button).state = enable ? On : Off;
+#if USE_GLTF || USE_USD
+        // note: many gltf reference jpg which will load via GltfAsset, but
+        // kram and kramv do not import jpg files.
+        if (!isValid) {
+            isValid = isSupportedModelFilename(name);
         }
-        ((NSMenuItem*)menuItem).state = enable ? On : Off;
-    }
-    
-    void setHidden(bool enable) {
-        isHidden = enable;
+#endif
         
-        if (!isButtonDisabled) {
-            ((NSButton*)button).hidden = enable;
+        if (!isValid) {
+            isValid = isSupportedJsonFilename(name);
+        }
+        if (isValid) {
+            _files.emplace_back(File(name,urlIndex));
         }
-        ((NSMenuItem*)menuItem).hidden = enable;
-    }
-    
-    void disableButton() {
-        ((NSButton*)button).hidden = true;
-        isButtonDisabled = true;
     }
-};
+}
 
 //-------------
 
@@ -620,175 +497,9 @@ - (IBAction)showAboutDialog:(id)sender
     //, NSPasteboardURLReadingFileURLsOnlyKey: @YES
 };
 
-// This is an open archive
-struct FileContainer {
-    // allow zip files to be dropped and opened, and can advance through bundle
-    // content.
-    
-    // TODO: Add FileHelper if acrhive file is networked, but would require
-    // full load to memory.
-    
-    ZipHelper zip;
-    MmapHelper zipMmap;
-};
-
-struct ActionState
-{
-    string hudText;
-    bool isChanged;
-    bool isStateChanged;
-};
-
-enum TextSlot
-{
-    kTextSlotHud,
-    kTextSlotEyedropper
-};
-
-// This allows wrapping all the ObjC stuff
-struct DataDelegate
-{
-    bool loadFile(bool clear = false);
-    
-    bool loadModelFile(const char* filename);
-   
-    bool loadTextureFromImage(const char* fullFilename, double timestamp, KTXImage& image, KTXImage* imageNormal, bool isArchive);
-    
-public:
-    id view; // MyMTKView*
-    id _urls; // NSArray<NSURL*>*
-};
-
-struct Data {
-    Data()
-    {
-        _showSettings = new ShowSettings();
-        
-        _textSlots.resize(2);
-    }
-    ~Data()
-    {
-        delete _showSettings;
-    }
-    
-    bool loadAtlasFile(const char* filename);
-    bool listFilesInArchive(int32_t urlIndex);
-    bool openArchive(const char * zipFilename, int32_t urlIndex);
-
-    bool hasCounterpart(bool increment);
-    bool advanceCounterpart(bool increment);
-    bool advanceFile(bool increment);
-
-    bool findFilename(const string& filename);
-    bool findFilenameShort(const string& filename);
-    const Atlas* findAtlasAtCursor(float2 pt);
-    bool isArchive() const;
-    bool loadFile();
-    
-    bool handleEventAction(const Action* action, bool isShiftKeyDown, ActionState& actionState);
-    void updateUIAfterLoad();
-    void updateUIControlState();
-
-    const Action* actionFromMenu(id menuItem) const;
-    const Action* actionFromButton(id button) const;
-    const Action* actionFromKey(uint32_t keyCodes) const;
-
-    void setLoadedText(string& text);
-
-    void initActions();
-    vector<Action>& actions() { return _actions; }
-    void initDisabledButtons();
-
-    string textFromSlots() const;
-    void setTextSlot(TextSlot slot, const char* text);
-
-    void loadFilesFromUrls(NSArray<NSURL*>* urls, bool skipSubdirs);
-    void listArchivesInFolder(NSURL* url, vector<File>& archiveFiles, bool skipSubdirs);
-    void listFilesInFolder(NSURL* url, int32_t urlIndex, bool skipSubdirs);
-
-    // See these to split off ObjC code
-    DataDelegate _delegate;
-    
-private:
-    bool loadFileFromArchive();
-
-public:
-    vector<string> _textSlots;
-    ShowSettings* _showSettings = nullptr;
-
-    bool _noImageLoaded = true;
-    string _archiveName; // archive or blank
-    
-    // folders and archives and multi-drop files are filled into this
-    vector<File> _files;
-    int32_t _fileIndex = 0;
-   
-    // One of these per url in _urlss
-    vector<FileContainer*> _containers;
-    
-    Action* _actionPlay;
-    Action* _actionShapeUVPreview;
-    Action* _actionHelp;
-    Action* _actionInfo;
-    Action* _actionHud;
-    Action* _actionShowAll;
-    
-    Action* _actionPreview;
-    Action* _actionWrap;
-    Action* _actionPremul;
-    Action* _actionSigned;
-    
-    Action* _actionDiff;
-    Action* _actionDebug;
-    Action* _actionGrid;
-    Action* _actionChecker;
-    Action* _actionHideUI;
-    Action* _actionVertical;
-    
-    Action* _actionMip;
-    Action* _actionFace;
-    Action* _actionArray;
-    Action* _actionItem;
-    Action* _actionPrevItem;
-    Action* _actionCounterpart;
-    Action* _actionPrevCounterpart;
-    Action* _actionReload;
-    Action* _actionFit;
-    
-    Action* _actionShapeMesh;
-    Action* _actionShapeChannel;
-    Action* _actionLighting;
-    Action* _actionTangent;
-    
-    Action* _actionR;
-    Action* _actionG;
-    Action* _actionB;
-    Action* _actionA;
-    
-    vector<Action> _actions;
-};
 
 
-string Data::textFromSlots() const
-{
-    // combine textSlots
-    string text = _textSlots[kTextSlotHud];
-    if (!text.empty() && text.back() != '\n')
-        text += "\n";
-        
-    // don't show eyedropper text with table up, it's many lines and overlaps
-    // TODO: fix
-    // if (!_tableView.hidden)
-        text += _textSlots[kTextSlotEyedropper];
-    
-    return text;
-}
-
-void Data::setTextSlot(TextSlot slot, const char* text)
-{
-    _textSlots[slot] = text;
-}
 
 
 //----------------------------------------------------
@@ -826,7 +537,7 @@ - (void)awakeFromNib
     scrollView.frame = rect;
     
     // C++ delegate
-    _data._delegate.view = self;
+    _data._delegate.view = (__bridge void*)self;
 
     // TODO: see if can only open this
     // NSLog(@"AwakeFromNIB");
@@ -900,6 +611,11 @@ - (nonnull ShowSettings *)showSettings
     return _showSettings;
 }
 
+- (nonnull kram::Data *)data
+{
+    return &_data;
+}
+
 -(void)fixupDocumentList
 {
     // DONE: this recent menu only seems to work the first time
@@ -923,125 +639,7 @@ -(void)fixupDocumentList
     }
 }
 
-void Data::initActions()
-{
-    // Don't reorder without also matching actionPtrs below
-    Action actions[] = {
-        Action("?", "Help", Key::Slash),
-        Action("I", "Info", Key::I),
-        Action("H", "Hud", Key::H),
-        Action("U", "UI", Key::U),
-        Action("V", "UI Vertical", Key::V),
-
-        Action("Q", "Quick Diff", Key::Q), // C/D already taken
-        Action("D", "Debug", Key::D),
-        Action("G", "Grid", Key::G),
-        Action("B", "Checkerboard", Key::B),
-        
-        Action("", "", Key::A), // sep
-
-        Action("P", "Preview", Key::P),
-        Action("W", "Wrap", Key::W),
-        Action("8", "Premul", Key::Num8),
-        Action("7", "Signed", Key::Num7),
-        
-        Action("", "", Key::A), // sep
-
-        Action("A", "Show All", Key::A),
-        Action("M", "Mip", Key::M),
-        Action("F", "Face", Key::F),
-        Action("Y", "Array", Key::Y),
-        
-        Action("↑", "Prev Item", Key::UpArrow),
-        Action("↓", "Next Item", Key::DownArrow),
-        Action("←", "Prev Counterpart", Key::LeftArrow),
-        Action("→", "Next Counterpart", Key::RightArrow),
-        
-        Action("R", "Reload", Key::R),
-        Action("0", "Fit", Key::Num0),
-
-        Action("", "", Key::A), // sep
-
-        Action(" ", "Play", Key::Space),
-        Action("6", "Shape UVPreview", Key::Num6),
-        Action("S", "Shape", Key::S),
-        Action("C", "Shape Channel", Key::C),
-        Action("L", "Lighting", Key::L),
-        Action("T", "Tangents", Key::T),
-
-        Action("", "", Key::A), // sep
-
-        // make these individual toggles and exclusive toggle off shift
-        Action("1", "Red", Key::Num1),
-        Action("2", "Green", Key::Num2),
-        Action("3", "Blue", Key::Num3),
-        Action("4", "Alpha", Key::Num4),
-    };
-    
-    // These have to be in same order as above.  May want to go back to search for text above.
-    Action** actionPtrs[] = {
-        &_actionHelp,
-        &_actionInfo,
-        &_actionHud,
-        &_actionHideUI,
-        &_actionVertical,
-       
-        &_actionDiff,
-        &_actionDebug,
-        &_actionGrid,
-        &_actionChecker,
-        
-        &_actionPreview,
-        &_actionWrap,
-        &_actionPremul,
-        &_actionSigned,
-        
-        &_actionShowAll,
-        &_actionMip,
-        &_actionFace,
-        &_actionArray,
-        
-        &_actionPrevItem,
-        &_actionItem,
-        &_actionPrevCounterpart,
-        &_actionCounterpart,
-        
-        &_actionReload,
-        &_actionFit,
-        
-        &_actionPlay,
-        &_actionShapeUVPreview,
-        &_actionShapeMesh,
-        &_actionShapeChannel,
-        &_actionLighting,
-        &_actionTangent,
-        
-        &_actionR,
-        &_actionG,
-        &_actionB,
-        &_actionA,
-    };
-
-    uint32_t numActions = ArrayCount(actions);
-    
-    // copy all of them to a vector, and then assign the action ptrs
-    for (int32_t i = 0; i < numActions; ++i) {
-        Action& action = actions[i];
-        const char* icon = action.icon;  // single char
-        
-        // skip separators
-        bool isSeparator = icon[0] == 0;
-        if (isSeparator) continue;
-        
-        _actions.push_back(action);
-    }
 
-    // now alias Actions to the vector above
-    //assert(_actions.size() == ArrayCount(actionPtrs));
-    for (int32_t i = 0; i < _actions.size(); ++i) {
-        *(actionPtrs[i]) = &_actions[i];
-    }
-}
 
 - (NSStackView *)_addButtons
 {
@@ -1103,7 +701,7 @@ - (NSStackView *)_addButtons
             button.enabled = NO;
         }
         else {
-            action.button = button;
+            action.button = (__bridge void*)button;
             
             // rect.origin.y += 25;
 
@@ -1172,7 +770,7 @@ - (NSStackView *)_addButtons
 
             [_viewMenu addItem:menuItem];
             
-            action.menuItem = menuItem;
+            action.menuItem = (__bridge void*)menuItem;
         }
     }
 
@@ -1187,19 +785,6 @@ - (NSStackView *)_addButtons
 }
 
 
-void Data::initDisabledButtons()
-{
-    // don't want these buttons showing up, menu only
-    _actionPrevItem->disableButton();
-    _actionItem->disableButton();
-    _actionPrevCounterpart->disableButton();
-    _actionCounterpart->disableButton();
-
-    _actionHud->disableButton();
-    _actionHelp->disableButton();
-    _actionHideUI->disableButton();
-    _actionVertical->disableButton();
-}
 
 - (NSTextField *)_addHud:(BOOL)isShadow
 {
@@ -1243,82 +828,6 @@ - (NSTextField *)_addHud:(BOOL)isShadow
     return label;
 }
 
-- (void)doZoomMath:(float)newZoom newPan:(float2 &)newPan
-{
-    // transform the cursor to texture coordinate, or clamped version if outside
-    Renderer* renderer = (Renderer *)self.delegate;
-    float4x4 projectionViewModelMatrix =
-        [renderer computeImageTransform:_showSettings->panX
-                                   panY:_showSettings->panY
-                                   zoom:_showSettings->zoom];
-
-    // convert from pixel to clip space
-    float halfX = _showSettings->viewSizeX * 0.5f;
-    float halfY = _showSettings->viewSizeY * 0.5f;
-    
-    // sometimes get viewSizeX that's scaled by retina, and other times not.
-    // account for contentScaleFactor (viewSizeX is 2x bigger than cursorX on
-    // retina display) now passing down drawableSize instead of view.bounds.size
-    halfX /= (float)_showSettings->viewContentScaleFactor;
-    halfY /= (float)_showSettings->viewContentScaleFactor;
-    
-    float4x4 viewportMatrix =
-    {
-        (float4){ halfX,      0, 0, 0 },
-        (float4){ 0,     -halfY, 0, 0 },
-        (float4){ 0,          0, 1, 0 },
-        (float4){ halfX,  halfY, 0, 1 },
-    };
-    viewportMatrix = inverse(viewportMatrix);
-    
-    float4 cursor = float4m(_showSettings->cursorX, _showSettings->cursorY, 0.0f, 1.0f);
-    
-    cursor = viewportMatrix * cursor;
-    
-    //NSPoint clipPoint;
-    //clipPoint.x = (point.x - halfX) / halfX;
-    //clipPoint.y = -(point.y - halfY) / halfY;
-
-    // convert point in window to point in model space
-    float4x4 mInv = inverse(projectionViewModelMatrix);
-    
-    float4 pixel = mInv * float4m(cursor.x, cursor.y, 1.0f, 1.0f);
-    pixel.xyz /= pixel.w; // in case perspective used
-
-    // allow pan to extend to show all
-    float ar = _showSettings->imageAspectRatio();
-    float maxX = 0.5f * ar;
-    float minY = -0.5f;
-    if (_showSettings->isShowingAllLevelsAndMips) {
-        maxX += ar * 1.0f * (_showSettings->totalChunks() - 1);
-        minY -= 1.0f * (_showSettings->mipCount - 1);
-    }
-
-    // X bound may need adjusted for ar ?
-    // that's in model space (+/0.5f, +/0.5f), so convert to texture space
-    pixel.x = std::clamp(pixel.x, -0.5f * ar, maxX);
-    pixel.y = std::clamp(pixel.y, minY, 0.5f);
-
-    // now that's the point that we want to zoom towards
-    // No checks on this zoom
-    // old - newPosition from the zoom
-
-    // normalized coords to pixel coords
-    pixel.x *= _showSettings->imageBoundsX;
-    pixel.y *= _showSettings->imageBoundsY;
-    
-    // this fixes pinch-zoom on cube which are 6:1
-    pixel.x /= ar;
-    
-#if USE_PERSPECTIVE
-    // TODO: this doesn't work for perspective
-    newPan.x = _showSettings->panX - (_showSettings->zoom - newZoom) * pixel.x;
-    newPan.y = _showSettings->panY + (_showSettings->zoom - newZoom) * pixel.y;
-#else
-    newPan.x = _showSettings->panX - (_showSettings->zoom - newZoom) * pixel.x;
-    newPan.y = _showSettings->panY + (_showSettings->zoom - newZoom) * pixel.y;
-#endif
-}
 
 - (void)handleGesture:(NSGestureRecognizer *)gestureRecognizer
 {
@@ -1376,10 +885,9 @@ -(void)updateZoom:(float)zoom
     float4 bottomLeftCorner = float4m(-0.5 * ar, -0.5f, 0.0f, 1.0f);
     float4 topRightCorner = float4m(0.5 * ar, 0.5f, 0.0f, 1.0f);
 
-    Renderer* renderer = (Renderer *)self.delegate;
-    float4x4 newMatrix = [renderer computeImageTransform:_showSettings->panX
-                                                    panY:_showSettings->panY
-                                                    zoom:zoom];
+    float4x4 newMatrix = _data.computeImageTransform(_showSettings->panX,
+                                                    _showSettings->panY,
+                                                    zoom);
 
     // don't allow panning the entire image off the view boundary
     // transform the upper left and bottom right corner of the image
@@ -1461,7 +969,7 @@ -(void)updateZoom:(float)zoom
         // feels wrong. now adjust the pan so that cursor text stays locked under
         // (zoom to cursor)
         float2 newPan;
-        [self doZoomMath:zoom newPan:newPan];
+        _data.doZoomMath(zoom, newPan);
 
         // store this
         _validMagnification = _zoomGesture.magnification;
@@ -1481,7 +989,7 @@ -(void)updateZoom:(float)zoom
         }
 
         [self updateEyedropper];
-        self.needsDisplay = YES;
+        //self.needsDisplay = YES;
     }
 }
 
@@ -1560,435 +1068,53 @@ - (void)mouseMoved:(NSEvent *)event
     _showSettings->cursorY = (int32_t)point.y;
 
     // should really do this in draw call, since moved message come in so quickly
+    // TODO: can this mark hud as needsDisplay, and then handle in update
     [self updateEyedropper];
 }
 
-inline float4 toPremul(const float4 &c)
+
+
+
+-(void)updateEyedropper
 {
-    // premul with a
-    float4 cpremul = c;
-    float a = c.a;
-    cpremul.w = 1.0f;
-    cpremul *= a;
-    return cpremul;
+    _data.updateEyedropper();
+    
+    // This calls setNeedsDisplay on the hud section that displays the eyeDropper
+    [self updateHudText];
+    
+    // TODO: remove this, but only way to get drawSamples to execute right now,
+    // but then entire texture re-renders and that's not power efficient.
+   self.needsDisplay = YES;
 }
 
-// Writing out to rgba32 for sampling, but unorm formats like ASTC and RGBA8
-// are still off and need to use the following.
-float  toSnorm8(float c)  { return (255.0f / 127.0f) * c - (128.0f / 127.0f); }
-float2 toSnorm8(float2 c) { return (255.0f / 127.0f) * c - (128.0f / 127.0f); }
-float3 toSnorm8(float3 c) { return (255.0f / 127.0f) * c - (128.0f / 127.0f); }
-float4 toSnorm8(float4 c) { return (255.0f / 127.0f) * c - (128.0f / 127.0f); }
+- (void)setEyedropperText:(const char *)text
+{
+    _data.setEyedropperText(text);
+    [self updateHudText];
+}
 
-float4 toSnorm(float4 c)  { return 2.0f * c - 1.0f; }
+- (void)setHudText:(const char *)text
+{
+    _data.setTextSlot(kTextSlotHud, text);
+    [self updateHudText];
+}
 
-- (void)updateEyedropper
+- (void)updateHudText
 {
-    if ((!_showSettings->isHudShown)) {
-        return;
-    }
+    // combine textSlots
+    string text = _data.textFromSlots();
+
+    NSString *textNS = [NSString stringWithUTF8String:text.c_str()];
+    _hudLabel2.stringValue = textNS;
+    _hudLabel2.needsDisplay = YES;
+    
+    _hudLabel.stringValue = textNS;
+    _hudLabel.needsDisplay = YES;
+}
 
-    if (_showSettings->imageBoundsX == 0) {
-        // TODO: this return will leave old hud text up
-        return;
-    }
-
-    // don't wait on renderer to update this matrix
-    Renderer* renderer = (Renderer *)self.delegate;
-
-    if (_showSettings->isEyedropperFromDrawable()) {
-        // this only needs the cursor location, but can't supply uv to
-        // displayPixelData
-
-        if (_showSettings->lastCursorX != _showSettings->cursorX ||
-            _showSettings->lastCursorY != _showSettings->cursorY) {
-            // TODO: this means pan/zoom doesn't update data, may want to track some
-            // absolute location in virtal canvas.
-
-            _showSettings->lastCursorX = _showSettings->cursorX;
-            _showSettings->lastCursorY = _showSettings->cursorY;
-
-            // This just samples from drawable, so no re-render is needed
-            [self showEyedropperData:float2m(0, 0)];
-
-            // TODO: remove this, but only way to get drawSamples to execute right
-            // now, but then entire texture re-renders and that's not power efficient.
-            // Really just want to sample from the already rendered texture since
-            // content isn't animated.
-
-            self.needsDisplay = YES;
-        }
-
-        return;
-    }
-
-    // getting a lot of repeat cursor locations
-    // could have panning underneath cursor to deal with
-    if (_showSettings->cursorX == _showSettings->lastCursorX &&
-        _showSettings->cursorY == _showSettings->lastCursorY) {
-        return;
-    }
-    
-    float4x4 projectionViewModelMatrix =
-        [renderer computeImageTransform:_showSettings->panX
-                                   panY:_showSettings->panY
-                                   zoom:_showSettings->zoom];
-
-    // convert to clip space, or else need to apply additional viewport transform
-    float halfX = _showSettings->viewSizeX * 0.5f;
-    float halfY = _showSettings->viewSizeY * 0.5f;
-
-    // sometimes get viewSizeX that's scaled by retina, and other times not.
-    // account for contentScaleFactor (viewSizeX is 2x bigger than cursorX on
-    // retina display) now passing down drawableSize instead of view.bounds.size
-    halfX /= (float)_showSettings->viewContentScaleFactor;
-    halfY /= (float)_showSettings->viewContentScaleFactor;
-
-    float4 cursor = float4m(_showSettings->cursorX, _showSettings->cursorY, 0.0f, 1.0f);
-    
-    float4x4 pixelToClipTfm =
-    {
-        (float4){ halfX,      0, 0, 0 },
-        (float4){ 0,     -halfY, 0, 0 },
-        (float4){ 0,          0, 1, 0 },
-        (float4){ halfX,  halfY, 0, 1 },
-    };
-    pixelToClipTfm = inverse(pixelToClipTfm);
-    
-    cursor = pixelToClipTfm * cursor;
-    
-    //float4 clipPoint;
-    //clipPoint.x = (point.x - halfX) / halfX;
-    //clipPoint.y = -(point.y - halfY) / halfY;
-
-    // convert point in window to point in texture
-    float4x4 mInv = inverse(projectionViewModelMatrix);
-    
-    float4 pixel = mInv * float4m(cursor.x, cursor.y, 1.0f, 1.0f);
-    pixel.xyz /= pixel.w; // in case perspective used
-
-    float ar = _showSettings->imageAspectRatio();
-    
-    // that's in model space (+/0.5f * ar, +/0.5f), so convert to texture space
-    pixel.x = (pixel.x / ar + 0.5f);
-    pixel.y = (-pixel.y + 0.5f);
-
-    //pixel.x *= 0.999f;
-    //pixel.y *= 0.999f;
-    
-    float2 uv = pixel.xy;
-
-    // pixels are 0 based
-    pixel.x *= _showSettings->imageBoundsX;
-    pixel.y *= _showSettings->imageBoundsY;
-
-    // TODO: finish this logic, need to account for gaps too, and then isolate to
-    // a given level and mip to sample
-    //    if (_showSettings->isShowingAllLevelsAndMips) {
-    //        pixel.x *= _showSettings->totalChunks();
-    //        pixel.y *= _showSettings->mipCount;
-    //    }
-
-    // TODO: clearing out the last px visited makes it hard to gather data
-    // put value on clipboard, or allow click to lock the displayed pixel and
-    // value. Might just change header to px(last): ...
-    string text;
-
-    // only display pixel if over image
-    if (pixel.x < 0.0f || pixel.x >= (float)_showSettings->imageBoundsX) {
-        sprintf(text, "canvas: %d %d\n", (int32_t)pixel.x, (int32_t)pixel.y);
-        [self setEyedropperText:text.c_str()];  // ick
-        _showSettings->outsideImageBounds = true;
-        return;
-    }
-    if (pixel.y < 0.0f || pixel.y >= (float)_showSettings->imageBoundsY) {
-        // was blanking out, but was going blank on color_grid-a when over zoomed in
-        // image maybe not enough precision with float.
-        sprintf(text, "canvas: %d %d\n", (int32_t)pixel.x, (int32_t)pixel.y);
-        [self setEyedropperText:text.c_str()];
-        _showSettings->outsideImageBounds = true;
-        return;
-    }
-
-    // Note: fromView: nil returns isFlipped coordinate, fromView:self flips it
-    // back.
-
-    int32_t newX = (int32_t)pixel.x;
-    int32_t newY = (int32_t)pixel.y;
-
-    
-    if (_showSettings->outsideImageBounds ||
-        (_showSettings->textureLookupX != newX ||
-         _showSettings->textureLookupY != newY)) {
-        // Note: this only samples from the original texture via compute shaders
-        // so preview mode pixel colors are not conveyed.  But can see underlying
-        // data driving preview.
-
-        _showSettings->outsideImageBounds = false;
-
-        // %.0f rounds the value, but want truncation
-        _showSettings->textureLookupX = newX;
-        _showSettings->textureLookupY = newY;
-
-        [self showEyedropperData:uv];
-
-        // TODO: remove this, but only way to get drawSamples to execute right now,
-        // but then entire texture re-renders and that's not power efficient.
-        self.needsDisplay = YES;
-    }
-}
-
-- (void)showEyedropperData:(float2)uv
-{
-    string text;
-    string tmp;
-
-    float4 c = _showSettings->textureResult;
-    int32_t x = _showSettings->textureResultX;
-    int32_t y = _showSettings->textureResultY;
-    
-    // DONE: use these to format the text
-    MyMTLPixelFormat format = _showSettings->originalFormat;
-    bool isSrgb = isSrgbFormat(format);
-    bool isSigned = isSignedFormat(format);
-
-    bool isHdr = isHdrFormat(format);
-    bool isFloat = isHdr;
-
-    int32_t numChannels = _showSettings->numChannels;
-
-    bool isNormal = _showSettings->texContentType == TexContentTypeNormal;
-    bool isColor = !isNormal;
-
-    bool isDirection = false;
-    bool isValue = false;
-
-    if (_showSettings->isEyedropperFromDrawable()) {
-        // TODO: could write barycentric, then lookup uv from that
-        // then could show the block info.
-
-        // interpret based on shapeChannel, debugMode, etc
-        switch (_showSettings->shapeChannel) {
-            case ShapeChannelDepth:
-                isSigned = false;  // using fract on uv
-
-                isValue = true;
-                isFloat = true;
-                numChannels = 1;
-                break;
-            case ShapeChannelUV0:
-                isSigned = false;  // using fract on uv
-
-                isValue = true;
-                isFloat = true;
-                numChannels = 2;  // TODO: fix for 3d uvw
-                break;
-
-            case ShapeChannelFaceNormal:
-            case ShapeChannelNormal:
-            case ShapeChannelTangent:
-            case ShapeChannelBitangent:
-                isDirection = true;
-                numChannels = 3;
-
-                // convert unorm to snnorm
-                c = toSnorm(c);
-                break;
-
-            case ShapeChannelMipLevel:
-                isValue = true;
-                isSigned = false;
-                isFloat = true;
-
-                // viz is mipNumber as alpha
-                numChannels = 1;
-                c.r = 4.0 - (c.a * 4.0);
-                break;
-
-            default:
-                break;
-        }
-
-        // debug mode
-
-        // preview vs. not
-    }
-    else {
-        // this will be out of sync with gpu eval, so may want to only display px
-        // from returned lookup this will always be a linear color
-
-        
-        // show uv, so can relate to gpu coordinates stored in geometry and find
-        // atlas areas
-        append_sprintf(text, "uv:%0.3f %0.3f\n",
-                       (float)x / _showSettings->imageBoundsX,
-                       (float)y / _showSettings->imageBoundsY);
-
-        // pixel at top-level mip
-        append_sprintf(text, "px:%d %d\n", x, y);
-
-        // show block num
-        int mipLOD = _showSettings->mipNumber;
-
-        int mipX = _showSettings->imageBoundsX;
-        int mipY = _showSettings->imageBoundsY;
-
-        mipX = mipX >> mipLOD;
-        mipY = mipY >> mipLOD;
-
-        mipX = std::max(1, mipX);
-        mipY = std::max(1, mipY);
-
-        mipX = (int32_t)(uv.x * mipX);
-        mipY = (int32_t)(uv.y * mipY);
-
-        _showSettings->textureLookupMipX = mipX;
-        _showSettings->textureLookupMipY = mipY;
-
-        // TODO: may want to return mip in pixel readback
-        // don't have it right now, so don't display if preview is enabled
-        if (_showSettings->isPreview)
-            mipLOD = 0;
-
-        auto blockDims = blockDimsOfFormat(format);
-        if (blockDims.x > 1)
-            append_sprintf(text, "bpx: %d %d\n", mipX / blockDims.x,
-                           mipY / blockDims.y);
-
-        // TODO: on astc if we have original blocks can run analysis from
-        // astc-encoder about each block.
-
-        // show the mip pixel (only if not preview and mip changed)
-        if (mipLOD > 0 && !_showSettings->isPreview)
-            append_sprintf(text, "mpx: %d %d\n", mipX, mipY);
-
-        // TODO: more criteria here, can have 2 channel PBR metal-roughness
-        // also have 4 channel normals where zw store other data.
-
-        bool isDecodeSigned = isSignedFormat(_showSettings->decodedFormat);
-        if (isSigned && !isDecodeSigned) {
-            c = toSnorm8(c);
-        }
-    }
-
-    if (isValue) {
-        printChannels(tmp, "val: ", c, numChannels, isFloat, isSigned);
-        text += tmp;
-    }
-    else if (isDirection) {
-        // print direction
-        isFloat = true;
-        isSigned = true;
-
-        printChannels(tmp, "dir: ", c, numChannels, isFloat, isSigned);
-        text += tmp;
-    }
-    else if (isNormal) {
-        float nx = c.x;
-        float ny = c.y;
-
-        // unorm -> snorm
-        if (!isSigned) {
-            nx = toSnorm8(nx);
-            ny = toSnorm8(ny);
-        }
-
-        // Note: not clamping nx,ny to < 1 like in shader
-
-        // this is always postive on tan-space normals
-        // assuming we're not viewing world normals
-        const float maxLen2 = 0.999 * 0.999;
-        float len2 = nx * nx + ny * ny;
-        if (len2 > maxLen2)
-            len2 = maxLen2;
-
-        float nz = sqrt(1.0f - len2);
-
-        // print the underlying color (some nmaps are xy in 4 channels)
-        printChannels(tmp, "lin: ", c, numChannels, isFloat, isSigned);
-        text += tmp;
-
-        // print direction
-        float4 d = float4m(nx, ny, nz, 0.0f);
-        isFloat = true;
-        isSigned = true;
-        printChannels(tmp, "dir: ", d, 3, isFloat, isSigned);
-        text += tmp;
-    }
-    else if (isColor) {
-        // DONE: write some print helpers based on float4 and length
-        printChannels(tmp, "lin: ", c, numChannels, isFloat, isSigned);
-        text += tmp;
-
-        if (isSrgb) {
-            // this saturates the value, so don't use for extended srgb
-            float4 s = linearToSRGB(c);
-
-            printChannels(tmp, "srg: ", s, numChannels, isFloat, isSigned);
-            text += tmp;
-        }
-
-        // display the premul values too, but not fully transparent pixels
-        if (c.a > 0.0 && c.a < 1.0f) {
-            printChannels(tmp, "lnp: ", toPremul(c), numChannels, isFloat, isSigned);
-            text += tmp;
-
-            // TODO: do we need the premul srgb color too?
-            if (isSrgb) {
-                // this saturates the value, so don't use for extended srgb
-                float4 s = linearToSRGB(c);
-
-                printChannels(tmp, "srp: ", toPremul(s), numChannels, isFloat,
-                              isSigned);
-                text += tmp;
-            }
-        }
-    }
-
-    [self setEyedropperText:text.c_str()];
-
-    // TODO: range display of pixels is useful, only showing pixels that fall
-    // within a given range, but would need slider then, and determine range of
-    // pixels.
-    // TODO: Auto-range is also useful for depth (ignore far plane of 0 or 1).
-
-    // TOOD: display histogram from compute, bin into buffer counts of pixels
-
-    // DONE: stop clobbering hud text, need another set of labels
-    // and a zoom preview of the pixels under the cursor.
-    // Otherwise, can't really see the underlying color.
-
-    // TODO: Stuff these on clipboard with a click, or use cmd+C?
-}
-
-- (void)setEyedropperText:(const char *)text
-{
-    _data.setTextSlot(kTextSlotEyedropper, text);
-    [self updateHudText];
-}
-
-- (void)setHudText:(const char *)text
-{
-    _data.setTextSlot(kTextSlotHud, text);
-    [self updateHudText];
-}
-
-- (void)updateHudText
-{
-    // combine textSlots
-    string text = _data.textFromSlots();
-
-    NSString *textNS = [NSString stringWithUTF8String:text.c_str()];
-    _hudLabel2.stringValue = textNS;
-    _hudLabel2.needsDisplay = YES;
-    
-    _hudLabel.stringValue = textNS;
-    _hudLabel.needsDisplay = YES;
-}
-
-- (void)scrollWheel:(NSEvent *)event
-{
-    // skip until image loaded
+- (void)scrollWheel:(NSEvent *)event
+{
+    // skip until image loaded
     if (_showSettings->imageBoundsX == 0) {
         return;
     }
@@ -2035,13 +1161,14 @@ - (void)scrollWheel:(NSEvent *)event
     }
 }
 
+// TODO: movef to data, but eliminate CGRect usage
 - (void)updatePan:(float)panX panY:(float)panY
 {
-    Renderer* renderer = (Renderer *)self.delegate;
+    //Renderer* renderer = (Renderer *)self.delegate;
     float4x4 projectionViewModelMatrix =
-        [renderer computeImageTransform:panX
-                                   panY:panY
-                                   zoom:_showSettings->zoom];
+        _data.computeImageTransform(panX,
+                                   panY,
+                                   _showSettings->zoom);
 
     // don't allow panning the entire image off the view boundary
     // transform the upper left and bottom right corner or the image
@@ -2091,7 +1218,7 @@ - (void)updatePan:(float)panX panY:(float)panY
         }
 
         [self updateEyedropper];
-        self.needsDisplay = YES;
+        //self.needsDisplay = YES;
     }
 }
 
@@ -2110,219 +1237,19 @@ - (BOOL)validateUserInterfaceItem:(id<NSValidatedUserInterfaceItem>)item
 
 
-void Data::updateUIAfterLoad()
-{
-    // TODO: move these to actions, and test their state instead of looking up
-    // buttons here and in HandleKey.
-
-    // base on showSettings, hide some fo the buttons
-    bool isShowAllHidden =
-        _showSettings->totalChunks() <= 1 && _showSettings->mipCount <= 1;
-
-    bool isArrayHidden = _showSettings->arrayCount <= 1;
-    bool isFaceSliceHidden =
-        _showSettings->faceCount <= 1 && _showSettings->sliceCount <= 1;
-    bool isMipHidden = _showSettings->mipCount <= 1;
-
-    bool isJumpToNextHidden = _files.size() <= 1;
-    
-    bool isJumpToCounterpartHidden = true;
-    bool isJumpToPrevCounterpartHidden = true;
-    
-    if ( _files.size() > 1) {
-        isJumpToCounterpartHidden = !hasCounterpart(true);
-        isJumpToPrevCounterpartHidden  = !hasCounterpart(false);
-    }
-    
-    bool isRedHidden = _showSettings->numChannels == 0; // models don't show rgba
-    bool isGreenHidden = _showSettings->numChannels <= 1;
-    bool isBlueHidden = _showSettings->numChannels <= 2 &&
-                        _showSettings->texContentType != TexContentTypeNormal;  // reconstruct z = b on normals
-
-    // TODO: also need a hasAlpha for pixels, since many compressed formats like
-    // ASTC always have 4 channels but internally store R,RG01,... etc.  Can get
-    // more data from swizzle in the props. Often alpha doesn't store anything
-    // useful to view.
-
-    // DONE: may want to disable isPremul on block textures that already have
-    // premul in data or else premul is applied a second time to the visual
-
-    bool hasAlpha = _showSettings->numChannels >= 3;
-
-    bool isAlphaHidden = !hasAlpha;
-    bool isPremulHidden = !hasAlpha;
-    bool isCheckerboardHidden = !hasAlpha;
-
-    bool isSignedHidden = !isSignedFormat(_showSettings->originalFormat);
-    bool isPlayHidden = !_showSettings->isModel; // only for models
-    bool isDiffHidden = _showSettings->isModel; // only for images
-    
-    _actionPlay->setHidden(isPlayHidden);
-    _actionArray->setHidden(isArrayHidden);
-    _actionFace->setHidden(isFaceSliceHidden);
-    _actionMip->setHidden(isMipHidden);
-    _actionShowAll->setHidden(isShowAllHidden);
-    
-    _actionDiff->setHidden(isDiffHidden);
-    _actionItem->setHidden(isJumpToNextHidden);
-    _actionPrevItem->setHidden(isJumpToNextHidden);
-    
-    _actionCounterpart->setHidden(isJumpToCounterpartHidden);
-    _actionPrevCounterpart->setHidden(isJumpToPrevCounterpartHidden);
-    
-    _actionR->setHidden(isRedHidden);
-    _actionG->setHidden(isGreenHidden);
-    _actionB->setHidden(isBlueHidden);
-    _actionA->setHidden(isAlphaHidden);
-    
-    _actionPremul->setHidden(isPremulHidden);
-    _actionSigned->setHidden(isSignedHidden);
-    _actionChecker->setHidden(isCheckerboardHidden);
-    
-    // also need to call after each toggle
-    updateUIControlState();
-}
-
-void Data::updateUIControlState()
-{
-    // there is also mixed state, but not using that
-    auto On = true;
-    auto Off = false;
-    
-#define toState(x) (x) ? On : Off
-
-    auto showAllState = toState(_showSettings->isShowingAllLevelsAndMips);
-    auto premulState = toState(_showSettings->doShaderPremul);
-    auto signedState = toState(_showSettings->isSigned);
-    auto checkerboardState = toState(_showSettings->isCheckerboardShown);
-    auto previewState = toState(_showSettings->isPreview);
-    auto gridState = toState(_showSettings->isAnyGridShown());
-    auto wrapState = toState(_showSettings->isWrap);
-    auto debugState = toState(_showSettings->debugMode != DebugModeNone);
-    auto hudState = toState(_showSettings->isHudShown);
-    
-    TextureChannels &channels = _showSettings->channels;
-
-    auto redState = toState(channels == TextureChannels::ModeR001);
-    auto greenState = toState(channels == TextureChannels::Mode0G01);
-    auto blueState = toState(channels == TextureChannels::Mode00B1);
-    auto alphaState = toState(channels == TextureChannels::ModeAAA1);
-
-    auto arrayState = toState(_showSettings->arrayNumber > 0);
-    auto faceState = toState(_showSettings->faceNumber > 0);
-    auto mipState = toState(_showSettings->mipNumber > 0);
-
-    auto meshState = toState(_showSettings->meshNumber > 0);
-    auto meshChannelState = toState(_showSettings->shapeChannel > 0);
-    auto lightingState =
-        toState(_showSettings->lightingMode != LightingModeNone);
-    auto tangentState = toState(_showSettings->useTangent);
-
-    // TODO: shadow the state on these, so don't have to to go ObjC
-    //Renderer* renderer = (Renderer*)self.delegate;
-    auto playState = toState(_showSettings->isModel && _showSettings->isPlayAnimations);
-    auto verticalState = toState(_showSettings->isVerticalUI);
-    auto uiState = toState(_showSettings->isHideUI);
-    auto diffState = toState(_showSettings->isDiff);
-    
-    _actionVertical->setHighlight(verticalState);
-    
-    // TODO: pass boolean, and change in the call
-    _actionPlay->setHighlight(playState);
-    _actionHelp->setHighlight(Off);
-    _actionInfo->setHighlight(Off);
-    _actionHud->setHighlight(hudState);
-    
-    _actionArray->setHighlight(arrayState);
-    _actionFace->setHighlight(faceState);
-    _actionMip->setHighlight(mipState);
-    
-    // these never show check state
-    _actionItem->setHighlight(Off);
-    _actionPrevItem->setHighlight(Off);
-    
-    _actionCounterpart->setHighlight(Off);
-    _actionPrevCounterpart->setHighlight(Off);
-    
-    _actionHideUI->setHighlight(uiState); // note below button always off, menu has state
-    
-    _actionR->setHighlight(redState);
-    _actionG->setHighlight(greenState);
-    _actionB->setHighlight(blueState);
-    _actionA->setHighlight(alphaState);
-    
-    _actionShowAll->setHighlight(showAllState);
-    _actionPreview->setHighlight(previewState);
-    _actionDiff->setHighlight(diffState);
-    _actionShapeMesh->setHighlight(meshState);
-    _actionShapeChannel->setHighlight(meshChannelState);
-    _actionLighting->setHighlight(lightingState);
-    _actionWrap->setHighlight(wrapState);
-    _actionGrid->setHighlight(gridState);
-    _actionDebug->setHighlight(debugState);
-    _actionTangent->setHighlight(tangentState);
-    
-    _actionPremul->setHighlight(premulState);
-    _actionSigned->setHighlight(signedState);
-    _actionChecker->setHighlight(checkerboardState);
-}
-
-// TODO: convert to C++ actions, and then call into Base holding all this
-// move pan/zoom logic too.  Then use that as start of Win32 kramv.
-
-const Action* Data::actionFromMenu(id menuItem) const
-{
-    const Action* action = nullptr;
-    
-    for (const auto& search: _actions) {
-        if (search.menuItem == menuItem) {
-            action = &search;
-            break;
-        }
-    }
-    
-    return action;
-}
-
-const Action* Data::actionFromButton(id button) const
-{
-    const Action* action = nullptr;
-    
-    for (const auto& search: _actions) {
-        if (search.button == button) {
-            action = &search;
-            break;
-        }
-    }
-    
-    return action;
-}
-
-const Action* Data::actionFromKey(uint32_t keyCode) const
-{
-    const Action* action = nullptr;
-    
-    for (const auto& search: _actions) {
-        if (search.keyCode == keyCode) {
-            action = &search;
-            break;
-        }
-    }
-    
-    return action;
-}
 
 - (IBAction)handleAction:(id)sender
 {
     NSEvent* theEvent = [NSApp currentEvent];
     bool isShiftKeyDown = (theEvent.modifierFlags & NSEventModifierFlagShift);
 
+    void* senderPtr = (__bridge void*)sender;
     const Action* action = nullptr;
     if ([sender isKindOfClass:[NSButton class]]) {
-        action = _data.actionFromButton(sender);
+        action = _data.actionFromButton(senderPtr);
     }
     else if ([sender isKindOfClass:[NSMenuItem class]]) {
-        action = _data.actionFromMenu(sender);
+        action = _data.actionFromMenu(senderPtr);
     }
     
     if (!action) {
@@ -2388,30 +1315,6 @@ - (void)updateHudVisibility
     _hudLabel2.hidden = _hudHidden || !_showSettings->isHudShown;
 }
 
-void Data::setLoadedText(string& text)
-{
-    text = "Loaded ";
-
-    string filename = _showSettings->lastFilename;
-    text += toFilenameShort(filename.c_str());
-
-    // archives and file systems have folders, split that off
-    string folderName;
-    const char* slashPos = strrchr(filename.c_str(), '/');
-    if (slashPos != nullptr) {
-        folderName = filename.substr(0, slashPos - filename.c_str());
-    }
-
-    if (!folderName.empty()) {
-        text += " in folder ";
-        text += folderName;
-    }
-
-    if (!_archiveName.empty()) {
-        text += " from archive ";
-        text += _archiveName;
-    }
-}
 
 - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyDown
 {
@@ -2459,525 +1362,52 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
     return true;
 }
 
-bool Data::handleEventAction(const Action* action, bool isShiftKeyDown, ActionState& actionState)
+
+
+
+
+// Note: docs state that drag&drop should be handled automatically by UTI setup
+// via openURLs but I find these calls are needed, or it doesn't work.  Maybe
+// need to register for NSURL instead of NSPasteboardTypeFileURL.  For example,
+// in canReadObjectForClasses had to use NSURL.
+
+// drag and drop support
+- (NSDragOperation)draggingEntered:(id)sender
 {
-    // Some data depends on the texture data (isSigned, isNormal, ..)
-    bool isChanged = false;
-    bool isStateChanged = false;
-    
-    // TODO: fix isChanged to only be set when value changes
-    // f.e. clamped values don't need to re-render
-    string text;
-    
-    if (action == _actionVertical) {
-        _showSettings->isVerticalUI = !_showSettings->isVerticalUI;
-        text = _showSettings->isVerticalUI ? "Vert UI" : "Horiz UI";
+    if (([sender draggingSourceOperationMask] & NSDragOperationGeneric) ==
+        NSDragOperationGeneric) {
+        NSPasteboard* pasteboard = [sender draggingPasteboard];
         
-        // just to update toggle state to Off
-        isStateChanged = true;
-    }
-    else if (action == _actionHideUI) {
-        // this means no image loaded yet
-        if (_noImageLoaded) {
-            return true;
+        bool canReadPasteboardObjects =
+            [pasteboard canReadObjectForClasses:@[ [NSURL class] ]
+                                        options:pasteboardOptions];
+
+        // don't copy dropped item, want to alias large files on disk without that
+        if (canReadPasteboardObjects) {
+            return NSDragOperationGeneric;
         }
-        
-        _showSettings->isHideUI = !_showSettings->isHideUI;
-        text = _showSettings->isHideUI ? "Hide UI" : "Show UI";
-        
-        // just to update toggle state to Off
-        isStateChanged = true;
     }
+
+    // not a drag we can use
+    return NSDragOperationNone;
+}
+
+- (BOOL)prepareForDragOperation:(id)sender
+{
+    return YES;
+}
+
+- (BOOL)performDragOperation:(id)sender
+{
+    NSPasteboard* pasteboard = [sender draggingPasteboard];
     
-    else if (action == _actionR) {
-        if (!action->isHidden) {
-            TextureChannels& channels = _showSettings->channels;
-            
-            if (channels == TextureChannels::ModeR001) {
-                channels = TextureChannels::ModeRGBA;
-                text = "Mask RGBA";
-            }
-            else {
-                channels = TextureChannels::ModeR001;
-                text = "Mask R001";
-            }
-            isChanged = true;
-        }
-        
-    }
-    else if (action == _actionG) {
-        if (!action->isHidden) {
-            TextureChannels& channels = _showSettings->channels;
-            
-            if (channels == TextureChannels::Mode0G01) {
-                channels = TextureChannels::ModeRGBA;
-                text = "Mask RGBA";
-            }
-            else {
-                channels = TextureChannels::Mode0G01;
-                text = "Mask 0G01";
-            }
-            isChanged = true;
-        }
-    }
-    else if (action == _actionB) {
-        if (!action->isHidden) {
-            TextureChannels& channels = _showSettings->channels;
-            
-            if (channels == TextureChannels::Mode00B1) {
-                channels = TextureChannels::ModeRGBA;
-                text = "Mask RGBA";
-            }
-            else {
-                channels = TextureChannels::Mode00B1;
-                text = "Mask 00B1";
-            }
-            
-            isChanged = true;
-        }
-    }
-    else if (action == _actionA) {
-        if (!action->isHidden) {
-            TextureChannels& channels = _showSettings->channels;
-            
-            if (channels == TextureChannels::ModeAAA1) {
-                channels = TextureChannels::ModeRGBA;
-                text = "Mask RGBA";
-            }
-            else {
-                channels = TextureChannels::ModeAAA1;
-                text = "Mask AAA1";
-            }
-            
-            isChanged = true;
-        }
-        
-    }
-    else if (action == _actionPlay) {
-        if (!action->isHidden) {
-            
-            _showSettings->isPlayAnimations = ! _showSettings->isPlayAnimations;
-            
-            //Renderer* renderer = (Renderer*)self.delegate;
-            //renderer.playAnimations = !renderer.playAnimations;
-            
-            text = _showSettings->isPlayAnimations ? "Play" : "Pause";
-            isChanged = true;
-        }
-    }
-    else if (action == _actionShapeUVPreview) {
-        
-        // toggle state
-        _showSettings->isUVPreview = !_showSettings->isUVPreview;
-        text = _showSettings->isUVPreview ? "Show UVPreview" : "Hide UvPreview";
-        isChanged = true;
-        
-        _showSettings->uvPreviewFrames = 10;
-    }
-    
-    else if (action == _actionShapeChannel) {
-        _showSettings->advanceShapeChannel(isShiftKeyDown);
-        
-        text = _showSettings->shapeChannelText();
-        isChanged = true;
-    }
-    else if (action == _actionLighting) {
-        _showSettings->advanceLightingMode(isShiftKeyDown);
-        text = _showSettings->lightingModeText();
-        isChanged = true;
-    }
-    else if (action == _actionTangent) {
-        _showSettings->useTangent = !_showSettings->useTangent;
-        if (_showSettings->useTangent)
-            text = "Vertex Tangents";
-        else
-            text = "Fragment Tangents";
-        isChanged = true;
-    }
-    else if (action == _actionDebug) {
-        _showSettings->advanceDebugMode(isShiftKeyDown);
-        text = _showSettings->debugModeText();
-        isChanged = true;
-    }
-    else if (action == _actionHelp) {
-        // display the chars for now
-        text =
-        "1234-rgba, Preview, Debug, A-show all\n"
-        "Info, Hud, Reload, 0-fit\n"
-        "Checker, Grid\n"
-        "Wrap, 8-signed, 9-premul\n"
-        "Mip, Face, Y-array\n"
-        "↓-next item, →-next counterpart\n"
-        "Lighting, S-shape, C-shape channel\n";
-        
-        // just to update toggle state to Off
-        isStateChanged = true;
-    }
-    
-    else if (action == _actionFit) {
-        float zoom;
-        // fit image or mip
-        if (isShiftKeyDown) {
-            zoom = 1.0f;
-        }
-        else {
-            // fit to topmost image
-            zoom = _showSettings->zoomFit;
-        }
-        
-        // This zoom needs to be checked against zoom limits
-        // there's a cap on the zoom multiplier.
-        // This is reducing zoom which expands the image.
-        zoom *= 1.0f / (1 << _showSettings->mipNumber);
-        
-        // even if zoom same, still do this since it resets the pan
-        _showSettings->zoom = zoom;
-        
-        _showSettings->panX = 0.0f;
-        _showSettings->panY = 0.0f;
-        
-        text = "Scale Image\n";
-        if (doPrintPanZoom) {
-            string tmp;
-            sprintf(tmp,
-                    "Pan %.3f,%.3f\n"
-                    "Zoom %.2fx\n",
-                    _showSettings->panX, _showSettings->panY, _showSettings->zoom);
-            text += tmp;
-        }
-        
-        isChanged = true;
-    }
-    // reload key (also a quick way to reset the settings)
-    else if (action == _actionReload) {
-        //bool success =
-        _delegate.loadFile();
-        
-        // reload at actual size
-        if (isShiftKeyDown) {
-            _showSettings->zoom = 1.0f;
-        }
-        
-        // Name change if image
-        if (_showSettings->isModel)
-            text = "Reload Model\n";
-        else
-            text = "Reload Image\n";
-        if (doPrintPanZoom) {
-            string tmp;
-            sprintf(tmp,
-                    "Pan %.3f,%.3f\n"
-                    "Zoom %.2fx\n",
-                    _showSettings->panX, _showSettings->panY, _showSettings->zoom);
-            text += tmp;
-        }
-        
-        isChanged = true;
-    }
-    else if (action == _actionPreview) {
-        _showSettings->isPreview = !_showSettings->isPreview;
-        isChanged = true;
-        text = "Preview ";
-        text += _showSettings->isPreview ? "On" : "Off";
-    }
-    else if (action == _actionDiff) {
-        _showSettings->isDiff = !_showSettings->isDiff;
-        isChanged = true;
-        text = "Diff ";
-        text += _showSettings->isPreview ? "On" : "Off";
-    }
-    // TODO: might switch c to channel cycle, so could just hit that
-    // and depending on the content, it cycles through reasonable channel masks
-    
-    // toggle checkerboard for transparency
-    else if (action == _actionChecker) {
-        if (action->isHidden) {
-            _showSettings->isCheckerboardShown = !_showSettings->isCheckerboardShown;
-            isChanged = true;
-            text = "Checker ";
-            text += _showSettings->isCheckerboardShown ? "On" : "Off";
-        }
-    }
-    
-    // toggle pixel grid when magnified above 1 pixel, can happen from mipmap
-    // changes too
-    else if (action == _actionGrid) {
-        static int grid = 0;
-        static const int kNumGrids = 7;
-        
-#define advanceGrid(g, dec) \
-grid = (grid + kNumGrids + (dec ? -1 : 1)) % kNumGrids
-        
-        // if block size is 1, then this shouldn't toggle
-        _showSettings->isBlockGridShown = false;
-        _showSettings->isAtlasGridShown = false;
-        _showSettings->isPixelGridShown = false;
-        
-        advanceGrid(grid, isShiftKeyDown);
-        
-        static const uint32_t gridSizes[kNumGrids] = {
-            0, 1, 4, 32, 64, 128, 256  // grid sizes
-        };
-        
-        if (grid == 0) {
-            sprintf(text, "Grid Off");
-        }
-        else if (grid == 1) {
-            _showSettings->isPixelGridShown = true;
-            
-            sprintf(text, "Pixel Grid 1x1");
-        }
-        else if (grid == 2 && _showSettings->blockX > 1) {
-            _showSettings->isBlockGridShown = true;
-            
-            sprintf(text, "Block Grid %dx%d", _showSettings->blockX,
-                    _showSettings->blockY);
-        }
-        else {
-            _showSettings->isAtlasGridShown = true;
-            
-            // want to be able to show altases tht have long entries derived from
-            // props but right now just a square grid atlas
-            _showSettings->gridSizeX = _showSettings->gridSizeY = gridSizes[grid];
-            
-            sprintf(text, "Atlas Grid %dx%d", _showSettings->gridSizeX,
-                    _showSettings->gridSizeY);
-        }
-        
-        isChanged = true;
-    }
-    else if (action == _actionShowAll) {
-        if (!action->isHidden) {
-            // TODO: have drawAllMips, drawAllLevels, drawAllLevelsAndMips
-            _showSettings->isShowingAllLevelsAndMips =
-            !_showSettings->isShowingAllLevelsAndMips;
-            isChanged = true;
-            text = "Show All ";
-            text += _showSettings->isShowingAllLevelsAndMips ? "On" : "Off";
-        }
-    }
-    
-    // toggle hud that shows name and pixel value under the cursor
-    // this may require calling setNeedsDisplay on the UILabel as cursor moves
-    else if (action == _actionHud) {
-        _showSettings->isHudShown = !_showSettings->isHudShown;
-        //[self updateHudVisibility];
-        // isChanged = true;
-        text = "Hud ";
-        text += _showSettings->isHudShown ? "On" : "Off";
-        isStateChanged = true;
-    }
-    
-    // info on the texture, could request info from lib, but would want to cache
-    // that info
-    else if (action == _actionInfo) {
-        if (_showSettings->isHudShown) {
-            
-            // also hide the file table, since this can be long
-            //[self hideFileTable];
-            
-            sprintf(text, "%s",
-                    isShiftKeyDown ? _showSettings->imageInfoVerbose.c_str()
-                    : _showSettings->imageInfo.c_str());
-        }
-        // just to update toggle state to Off
-        isStateChanged = true;
-    }
-    
-    // toggle wrap/clamp
-    else if (action == _actionWrap) {
-        // TODO: cycle through all possible modes (clamp, repeat, mirror-once,
-        // mirror-repeat, ...)
-        _showSettings->isWrap = !_showSettings->isWrap;
-        isChanged = true;
-        text = "Wrap ";
-        text += _showSettings->isWrap ? "On" : "Off";
-    }
-    
-    // toggle signed vs. unsigned
-    else if (action == _actionSigned) {
-        if (!action->isHidden) {
-            _showSettings->isSigned = !_showSettings->isSigned;
-            isChanged = true;
-            text = "Signed ";
-            text += _showSettings->isSigned ? "On" : "Off";
-        }
-    }
-    
-    // toggle premul alpha vs. unmul
-    else if (action == _actionPremul) {
-        if (!action->isHidden) {
-            _showSettings->doShaderPremul = !_showSettings->doShaderPremul;
-            isChanged = true;
-            text = "Premul ";
-            text += _showSettings->doShaderPremul ? "On" : "Off";
-        }
-    }
-    
-    else if (action == _actionItem || action == _actionPrevItem) {
-        if (!action->isHidden) {
-            // invert shift key for prev, since it's reverse
-            if (action == _actionPrevItem) {
-                isShiftKeyDown = !isShiftKeyDown;
-            }
-            
-            if (advanceFile(!isShiftKeyDown)) {
-                //_hudHidden = true;
-                //[self updateHudVisibility];
-                //[self setEyedropperText:""];
-                
-                isChanged = true;
-            
-                setLoadedText(text);
-            }
-        }
-    }
-    
-    else if (action == _actionCounterpart || action == _actionPrevCounterpart) {
-        if (!action->isHidden) {
-            // invert shift key for prev, since it's reverse
-            if (action == _actionPrevCounterpart) {
-                isShiftKeyDown = !isShiftKeyDown;
-            }
-            if (advanceCounterpart(!isShiftKeyDown)) {
-                //_hudHidden = true;
-                //[self updateHudVisibility];
-                //[self setEyedropperText:""];
-                
-                isChanged = true;
-                
-                setLoadedText(text);
-            }
-        }
-    }
-    
-    // test out different shapes
-    else if (action == _actionShapeMesh) {
-        if (_showSettings->meshCount > 1) {
-            _showSettings->advanceMeshNumber(isShiftKeyDown);
-            text = _showSettings->meshNumberText();
-            isChanged = true;
-        }
-    }
-    
-    // TODO: should probably have these wrap and not clamp to count limits
-    
-    // mip up/down
-    else if (action == _actionMip) {
-        if (_showSettings->mipCount > 1) {
-            if (isShiftKeyDown) {
-                _showSettings->mipNumber = MAX(_showSettings->mipNumber - 1, 0);
-            }
-            else {
-                _showSettings->mipNumber =
-                MIN(_showSettings->mipNumber + 1, _showSettings->mipCount - 1);
-            }
-            sprintf(text, "Mip %d/%d", _showSettings->mipNumber,
-                    _showSettings->mipCount);
-            isChanged = true;
-        }
-    }
-    
-    else if (action == _actionFace) {
-        // cube or cube array, but hit s to pick cubearray
-        if (_showSettings->faceCount > 1) {
-            if (isShiftKeyDown) {
-                _showSettings->faceNumber = MAX(_showSettings->faceNumber - 1, 0);
-            }
-            else {
-                _showSettings->faceNumber =
-                MIN(_showSettings->faceNumber + 1, _showSettings->faceCount - 1);
-            }
-            sprintf(text, "Face %d/%d", _showSettings->faceNumber,
-                    _showSettings->faceCount);
-            isChanged = true;
-        }
-    }
-    
-    else if (action == _actionArray) {
-        // slice
-        if (_showSettings->sliceCount > 1) {
-            if (isShiftKeyDown) {
-                _showSettings->sliceNumber = MAX(_showSettings->sliceNumber - 1, 0);
-            }
-            else {
-                _showSettings->sliceNumber =
-                MIN(_showSettings->sliceNumber + 1, _showSettings->sliceCount - 1);
-            }
-            sprintf(text, "Slice %d/%d", _showSettings->sliceNumber,
-                    _showSettings->sliceCount);
-            isChanged = true;
-        }
-        // array
-        else if (_showSettings->arrayCount > 1) {
-            if (isShiftKeyDown) {
-                _showSettings->arrayNumber = MAX(_showSettings->arrayNumber - 1, 0);
-            }
-            else {
-                _showSettings->arrayNumber =
-                MIN(_showSettings->arrayNumber + 1, _showSettings->arrayCount - 1);
-            }
-            sprintf(text, "Array %d/%d", _showSettings->arrayNumber,
-                    _showSettings->arrayCount);
-            isChanged = true;
-        }
-    }
-    else {
-        // non-handled action
-        return false;
-    }
-    
-    actionState.hudText = text;
-    actionState.isChanged = isChanged;
-    actionState.isStateChanged = isStateChanged;
-    
-    return true;
-}
-
-
-
-// Note: docs state that drag&drop should be handled automatically by UTI setup
-// via openURLs but I find these calls are needed, or it doesn't work.  Maybe
-// need to register for NSURL instead of NSPasteboardTypeFileURL.  For example,
-// in canReadObjectForClasses had to use NSURL.
-
-// drag and drop support
-- (NSDragOperation)draggingEntered:(id)sender
-{
-    if (([sender draggingSourceOperationMask] & NSDragOperationGeneric) ==
-        NSDragOperationGeneric) {
-        NSPasteboard* pasteboard = [sender draggingPasteboard];
-        
-        bool canReadPasteboardObjects =
-            [pasteboard canReadObjectForClasses:@[ [NSURL class] ]
-                                        options:pasteboardOptions];
-
-        // don't copy dropped item, want to alias large files on disk without that
-        if (canReadPasteboardObjects) {
-            return NSDragOperationGeneric;
-        }
-    }
-
-    // not a drag we can use
-    return NSDragOperationNone;
-}
-
-- (BOOL)prepareForDragOperation:(id)sender
-{
-    return YES;
-}
-
-- (BOOL)performDragOperation:(id)sender
-{
-    NSPasteboard* pasteboard = [sender draggingPasteboard];
-    
-    NSArray<NSURL*>* urls = [pasteboard readObjectsForClasses:@[[NSURL class]]
-                                                      options: pasteboardOptions];
-    int filesCount = [urls count];
-    if (filesCount > 0) {
-        if ([self loadTextureFromURLs:urls]) {
-            [self setHudText:""];
-            return YES;
+    NSArray<NSURL*>* urls = [pasteboard readObjectsForClasses:@[[NSURL class]]
+                                                      options: pasteboardOptions];
+    int filesCount = [urls count];
+    if (filesCount > 0) {
+        if ([self loadTextureFromURLs:urls]) {
+            [self setHudText:""];
+            return YES;
         }
     }
 
@@ -2986,252 +1416,7 @@ - (BOOL)performDragOperation:(id)sender
 
 
-// Want to avoid Apple libs for things that have C++ equivalents.
-// simdjson without exceptions isn't super readable or safe looking.
-// TODO: see if simdjson is stable enough, using unsafe calls
-
-bool Data::loadAtlasFile(const char* filename)
-{
-    using namespace simdjson;
-    
-    ondemand::parser parser;
-    
-    // TODO: can just mmap the json to provide
-    auto json = padded_string::load(filename);
-    auto atlasProps = parser.iterate(json);
-       
-    // Can use hover or a show all on these entries and names.
-    // Draw names on screen using system text in the upper left corner if 1
-    // if showing all, then show names across each mip level.  May want to
-    // snap to pixels on each mip level so can see overlap.
-    
-    _showSettings->atlas.clear();
-    
-    {
-        std::vector<double> values;
-        string_view atlasName = atlasProps["name"].get_string().value_unsafe();
-        
-        uint64_t width = atlasProps["width"].get_uint64().value_unsafe();
-        uint64_t height = atlasProps["height"].get_uint64().value_unsafe();
-    
-        uint64_t slice = atlasProps["slice"].get_uint64().value_unsafe();
-        
-        float uPad = 0.0f;
-        float vPad = 0.0f;
-        
-        if (atlasProps["paduv"].get_array().error() != NO_SUCH_FIELD) {
-            values.clear();
-            for (auto value : atlasProps["paduv"])
-                values.push_back(value.get_double().value_unsafe());
-            
-            uPad = values[0];
-            vPad = values[1];
-        }
-        else if (atlasProps["padpx"].get_array().error() != NO_SUCH_FIELD) {
-            values.clear();
-            for (auto value : atlasProps["padpx"])
-                values.push_back(value.get_double().value_unsafe());
-            
-            uPad = values[0];
-            vPad = values[1];
-            
-            uPad /= width;
-            vPad /= height;
-        }
-        
-        for (auto regionProps: atlasProps["regions"])
-        {
-            string_view name = regionProps["name"].get_string().value_unsafe();
-            
-            float x = 0.0f;
-            float y = 0.0f;
-            float w = 0.0f;
-            float h = 0.0f;
-            
-            if (regionProps["ruv"].get_array().error() != NO_SUCH_FIELD)
-            {
-                values.clear();
-                for (auto value : regionProps["ruv"])
-                    values.push_back(value.get_double().value_unsafe());
-            
-                // Note: could convert pixel and mip0 size to uv.
-                // normalized uv make these easier to draw across all mips
-                x = values[0];
-                y = values[1];
-                w = values[2];
-                h = values[3];
-            }
-            else if (regionProps["rpx"].get_array().error() != NO_SUCH_FIELD)
-            {
-                values.clear();
-                for (auto value : regionProps["rpx"])
-                    values.push_back(value.get_double().value_unsafe());
-            
-                x = values[0];
-                y = values[1];
-                w = values[2];
-                h = values[3];
-                
-                // normalize to uv using the width/height
-                x /= width;
-                y /= height;
-                w /= width;
-                h /= height;
-            }
-                
-            const char* verticalProp = "f"; // regionProps["rot"];
-            bool isVertical = verticalProp && verticalProp[0] == 't';
-            
-            Atlas atlas = {(string)name, x,y, w,h, uPad,vPad, isVertical, (uint32_t)slice};
-            _showSettings->atlas.emplace_back(std::move(atlas));
-        }
-    }
-    
-    // TODO: also need to be able to bring in vector shapes
-    // maybe from svg or files written out from figma or photoshop.
-    // Can triangulate those, and use xatlas to pack those.
-    // Also xatlas can flatten out a 3d model into a chart.
-    
-    return true;
-}
-
-// opens archive
-bool Data::openArchive(const char * zipFilename, int32_t urlIndex)
-{
-    // grow the array, ptrs so that existing mmaps aren't destroyed
-    if (urlIndex >= _containers.size()) {
-        _containers.resize(urlIndex + 1, nullptr);
-    }
-    
-    if (_containers[urlIndex] == nullptr)
-        _containers[urlIndex] = new FileContainer;
-    
-    FileContainer& container = *_containers[urlIndex];
-    MmapHelper& zipMmap = container.zipMmap;
-    ZipHelper& zip = container.zip;
-    
-    // close any previous zip
-    zipMmap.close();
-    
-    // open the mmap again
-    if (!zipMmap.open(zipFilename)) {
-        return NO;
-    }
-    if (!zip.openForRead(zipMmap.data(), zipMmap.dataLength())) {
-        return NO;
-    }
-    return YES;
-}
-
-// lists archive into _files
-bool Data::listFilesInArchive(int32_t urlIndex)
-{
-    FileContainer& container = *_containers[urlIndex];
-    ZipHelper& zip = container.zip;
-    
-    // filter out unsupported extensions
-    vector<string> extensions = {
-        ".ktx", ".ktx2", ".png", ".dds" // textures
-#if USE_GLTF
-        // TODO: can't support these until have a loader from memory block
-        // GLTFAsset requires a URL.
-        //, ".glb", ".gltf" // models
-#endif
-#if USE_USD
-        , ".usd", ".usda", ".usb"
-#endif
-    };
-    
-    container.zip.filterExtensions(extensions);
-    
-    // don't switch to empty archive
-    if (zip.zipEntrys().empty()) {
-        return NO;
-    }
-    
-    for (const auto& entry: zip.zipEntrys()) {
-        _files.emplace_back(File(entry.filename, urlIndex));
-    }
-    
-    return YES;
-}
-
-// TODO: can simplify by storing counterpart id when file list is created
-bool Data::hasCounterpart(bool increment) {
-    if (_files.size() <= 1) {
-        return NO;
-    }
-    
-    const File& file = _files[_fileIndex];
-    string currentFilename = filenameNoExtension(file.nameShort.c_str());
-   
-    uint32_t nextFileIndex = _fileIndex;
-    
-    size_t numEntries = _files.size();
-    if (increment)
-        nextFileIndex++;
-    else
-        nextFileIndex += numEntries - 1;  // back 1
-    
-    nextFileIndex = nextFileIndex % numEntries;
-    
-    const File& nextFile = _files[nextFileIndex];
-    string nextFilename = filenameNoExtension(nextFile.nameShort.c_str());
-    
-    // if short name matches (no ext) then it's a counterpart
-    if (currentFilename != nextFilename)
-       return NO;
-    
-    return YES;
-}
 
-bool Data::advanceCounterpart(bool increment) {
-    
-    if (_files.size() <= 1) {
-        return false;
-    }
-    
-    // see if file has counterparts
-    const File& file = _files[_fileIndex];
-    string currentFilename = filenameNoExtension(file.nameShort.c_str());
-    
-    // TODO: this should cycle through only the counterparts
-    uint32_t nextFileIndex = _fileIndex;
-    
-    size_t numEntries = _files.size();
-    if (increment)
-        nextFileIndex++;
-    else
-        nextFileIndex += numEntries - 1;  // back 1
-
-    nextFileIndex = nextFileIndex % numEntries;
-    
-    const File& nextFile = _files[nextFileIndex];
-    string nextFilename = filenameNoExtension(nextFile.nameShort.c_str());
-    
-    if (currentFilename != nextFilename)
-        return false;
-    
-    _fileIndex = nextFileIndex;
-    
-    return _delegate.loadFile(true);
-}
-
-bool Data::advanceFile(bool increment) {
-    if (_files.empty()) {
-        return false;
-    }
-    
-    size_t numEntries = _files.size();
-    if (increment)
-        _fileIndex++;
-    else
-        _fileIndex += numEntries - 1;  // back 1
-
-    _fileIndex = _fileIndex % numEntries;
-    
-    return _delegate.loadFile(true);
-}
 
 - (void)updateFileSelection
 {
@@ -3263,235 +1448,6 @@ - (BOOL)setShapeFromSelection:(NSInteger)index {
 
 
-bool Data::findFilename(const string& filename)
-{
-    bool isFound = false;
-    
-    // linear search
-    for (const auto& search : _files) {
-        if (search.name == filename) {
-            isFound = true;
-            break;
-        }
-    }
-    return isFound;
-}
-
-bool Data::findFilenameShort(const string& filename)
-{
-    bool isFound = false;
-    
-    // linear search
-    for (const auto& search : _files) {
-        if (search.nameShort == filename) {
-            isFound = true;
-            break;
-        }
-    }
-    return isFound;
-}
-
-// rect here is expect xy, wh
-bool isPtInRect(float2 pt, float4 r)
-{
-    return all((pt >= r.xy) & (pt <= r.xy + r.zw));
-}
-
-const Atlas* Data::findAtlasAtCursor(float2 pt)
-{
-    const Atlas* atlas = nullptr;
-    
-    // TODO: rects are in uv, so need to convert pt
-    
-    // This might need to become an atlas array index instead of ptr
-    const Atlas* lastAtlas = _showSettings->lastAtlas;
-    
-    if (lastAtlas) {
-        if (isPtInRect(pt, lastAtlas->rect())) {
-            atlas = lastAtlas;
-        }
-    }
-    
-    if (!atlas) {
-        // linear search
-        for (const auto& search : _showSettings->atlas) {
-            if (isPtInRect(pt, search.rect())) {
-                atlas = &search;
-                break;
-            }
-        }
-        
-        _showSettings->lastAtlas = atlas;
-    }
-    
-    return atlas;
-}
-
-
-bool Data::isArchive() const
-{
-    NSArray<NSURL*>* urls_ = (NSArray<NSURL*>*)_delegate._urls;
-    NSURL* url = urls_[_files[_fileIndex].urlIndex];
-    const char* filename = url.fileSystemRepresentation;
-    return isSupportedArchiveFilename(filename);
-}
-
-
-
-bool Data::loadFile()
-{
-    if (isArchive()) {
-        return loadFileFromArchive();
-    }
-    
-    // now lookup the filename and data at that entry
-    const File& file = _files[_fileIndex];
-    const char* filename = file.name.c_str();
-    
-    string fullFilename = filename;
-    auto timestamp = FileHelper::modificationTimestamp(filename);
-    
-    bool isTextureChanged = _showSettings->isFileChanged(filename, timestamp);
-    if (!isTextureChanged) {
-        return YES;
-    }
-    
-#if USE_GLTF || USE_USD
-    bool isModel = isSupportedModelFilename(filename);
-    if (isModel) {
-        bool success = _delegate.loadModelFile(filename);
-        
-        if (success) {
-            // store the filename
-            _showSettings->lastFilename = filename;
-            _showSettings->lastTimestamp = timestamp;
-        }
-        
-        return success;
-    }
-#endif
-    
-    // have already filtered filenames out, so this should never get hit
-    if (!isSupportedFilename(filename)) {
-        return NO;
-    }
-    
-    // Note: better to extract from filename instead of root of folder dropped
-    // or just keep displaying full path of filename.
-    
-    _archiveName.clear();
-    
-    vector<string> possibleNormalFilenames;
-    string normalFilename;
-    bool hasNormal = false;
-    
-    TexContentType texContentType = findContentTypeFromFilename(filename);
-    if (texContentType == TexContentTypeAlbedo) {
-        findPossibleNormalMapFromAlbedoFilename(filename, possibleNormalFilenames);
-        
-        for (const auto& name: possibleNormalFilenames) {
-            hasNormal = findFilename(name);
-            
-            if (hasNormal) {
-                normalFilename = name;
-                break;
-            }
-        }
-    }
-    
-    // see if there is an atlas file too, and load the rectangles for preview
-    // note sidecar atlas files are a pain to view with a sandbox, may want to
-    // splice into ktx/ktx2 files, but no good metadata for png/dds.
-    _showSettings->atlas.clear();
-    
-    string atlasFilename = filenameNoExtension(filename);
-    bool hasAtlas = false;
-    
-    // replace -a, -d, with -atlas.json
-    const char* dashPosStr = strrchr(atlasFilename.c_str(), '-');
-    if (dashPosStr != nullptr) {
-        atlasFilename = atlasFilename.substr(0, dashPosStr - atlasFilename.c_str());
-    }
-    atlasFilename += "-atlas.json";
-    if ( findFilename(atlasFilename.c_str())) {
-        if (loadAtlasFile(atlasFilename.c_str())) {
-            hasAtlas = true;
-        }
-    }
-    if (!hasAtlas) {
-        atlasFilename.clear();
-    }
-    
-    // If it's a compressed file, then set a diff target if a corresponding png
-    // is found.  Eventually see if a src dds/ktx/ktx2 exists.  Want to stop
-    // using png as source images.  Note png don't have custom mips, unless
-    // flattened to one image.  So have to fabricate mips here.  KTXImage
-    // can already load up striped png into slices, etc.
-    
-    string diffFilename = filenameNoExtension(filename);
-    bool hasDiff = false;
-    
-    diffFilename += ".png";
-    if ( diffFilename != filename && findFilename(diffFilename.c_str())) {
-        // TODO: defer load until diff enabled
-        //if ([self loadDiffFile:diffFilename.c_str()]) {
-        hasDiff = true;
-        //}
-    }
-    if (!hasDiff) {
-        diffFilename.clear();
-    }
-    
-    //-------------------------------
-    
-    KTXImage image;
-    KTXImageData imageDataKTX;
-    
-    KTXImage imageNormal;
-    KTXImageData imageNormalDataKTX;
-    
-    // this requires decode and conversion to RGBA8u
-    if (!imageDataKTX.open(fullFilename.c_str(), image)) {
-        return NO;
-    }
-    
-    if (hasNormal &&
-        imageNormalDataKTX.open(normalFilename.c_str(), imageNormal)) {
-        // shaders only pull from albedo + normal on these texture types
-        if (imageNormal.textureType == image.textureType &&
-            (imageNormal.textureType == MyMTLTextureType2D ||
-             imageNormal.textureType == MyMTLTextureType2DArray)) {
-            // hasNormal = true;
-        }
-        else {
-            hasNormal = false;
-        }
-    }
-    
-    //---------------------------------
-    
-    // Release any loading model textures
-//    Renderer* renderer = (Renderer *)self.delegate;
-//    [renderer releaseAllPendingTextures];
-//
-//    if (![renderer loadTextureFromImage:fullFilename.c_str()
-//                              timestamp:timestamp
-//                                  image:image
-//                            imageNormal:hasNormal ? &imageNormal : nullptr
-//                              isArchive:NO]) {
-//        return false;
-//    }
-    
-    if (!_delegate.loadTextureFromImage(fullFilename.c_str(), (double)timestamp, image, hasNormal ? &imageNormal : nullptr, false)) {
-        return NO;
-    }
-    
-    // store the filename
-    _showSettings->lastFilename = filename;
-    _showSettings->lastTimestamp = timestamp;
-    
-    return true;
-}
 
 -(BOOL)loadFile
 {
@@ -3537,297 +1493,18 @@ -(BOOL)loadFile
     return YES;
 }
 
-bool Data::loadFileFromArchive()
-{
-    // now lookup the filename and data at that entry
-    const File& file = _files[_fileIndex];
-    FileContainer& container = *_containers[file.urlIndex];
-    ZipHelper& zip = container.zip;
-    
-    const char* filename = file.name.c_str();
-    const auto* entry = zip.zipEntry(filename);
-    string fullFilename = entry->filename;
-    double timestamp = (double)entry->modificationDate;
-
-    bool isTextureChanged = _showSettings->isFileChanged(filename, timestamp);
-    if (!isTextureChanged) {
-        return YES;
-    }
-    
-// TODO: don't have a version which loads gltf model from memory block
-//    bool isModel = isSupportedModelFilename(filename);
-//    if (isModel)
-//        return [self loadModelFile:filename];
-    
-    //--------
-    
-    if (!isSupportedFilename(filename)) {
-        return NO;
-    }
-    
-    const uint8_t* imageData = nullptr;
-    uint64_t imageDataLength = 0;
-
-    // search for main file - can be albedo or normal
-    if (!zip.extractRaw(filename, &imageData, imageDataLength)) {
-        return NO;
-    }
-
-    const uint8_t* imageNormalData = nullptr;
-    uint64_t imageNormalDataLength = 0;
-    
-    string normalFilename;
-    bool hasNormal = false;
-    vector<string> normalFilenames;
-    
-    TexContentType texContentType = findContentTypeFromFilename(filename);
-    if (texContentType == TexContentTypeAlbedo) {
-        findPossibleNormalMapFromAlbedoFilename(filename, normalFilenames);
-     
-        for (const auto& name: normalFilenames) {
-            hasNormal = zip.extractRaw(name.c_str(), &imageNormalData,
-                                        imageNormalDataLength);
-            if (hasNormal) {
-                normalFilename = name;
-                break;
-            }
-        }
-    }
-
-    //---------------------------
-
-    // files in archive are just offsets into the mmap
-    // That's why we can't just pass filenames to the renderer
-    KTXImage image;
-    KTXImageData imageDataKTX;
-
-    KTXImage imageNormal;
-    KTXImageData imageNormalDataKTX;
-
-    if (!imageDataKTX.open(imageData, imageDataLength, image)) {
-        return NO;
-    }
-
-    if (hasNormal && imageNormalDataKTX.open(
-                         imageNormalData, imageNormalDataLength, imageNormal)) {
-        // shaders only pull from albedo + normal on these texture types
-        if (imageNormal.textureType == image.textureType &&
-            (imageNormal.textureType == MyMTLTextureType2D ||
-             imageNormal.textureType == MyMTLTextureType2DArray)) {
-            // hasNormal = true;
-        }
-        else {
-            hasNormal = false;
-        }
-    }
-
-    //---------------------------------
-    
-    if (!_delegate.loadTextureFromImage(fullFilename.c_str(), (double)timestamp, image, hasNormal ? &imageNormal : nullptr, true)) {
-        return NO;
-    }
-//    Renderer* renderer = (Renderer *)self.delegate;
-//    [renderer releaseAllPendingTextures];
-//
-//    if (![renderer loadTextureFromImage:fullFilename.c_str()
-//                              timestamp:(double)timestamp
-//                                  image:image
-//                            imageNormal:hasNormal ? &imageNormal : nullptr
-//                              isArchive:YES]) {
-//        return NO;
-//    }
-
-    //---------------------------------
-    
-    NSArray<NSURL*>* urls_ = (NSArray<NSURL*>*)_delegate._urls;
-    NSURL* archiveURL = urls_[file.urlIndex];
-    _archiveName = toFilenameShort(archiveURL.fileSystemRepresentation);
-    
-    return YES;
-}
-
-
-void Data::listArchivesInFolder(NSURL* url, vector<File>& archiveFiles, bool skipSubdirs)
-{
-    NSDirectoryEnumerationOptions options = NSDirectoryEnumerationSkipsHiddenFiles;
-    if (skipSubdirs)
-        options |= NSDirectoryEnumerationSkipsSubdirectoryDescendants;
-    
-    NSDirectoryEnumerator* directoryEnumerator =
-    [[NSFileManager defaultManager]
-     enumeratorAtURL:url
-     includingPropertiesForKeys:[NSArray array]
-     options:options
-     errorHandler:  // nil
-     ^BOOL(NSURL *urlArg, NSError *error) {
-        macroUnusedVar(urlArg);
-        macroUnusedVar(error);
-        
-        // handle error
-        return NO;
-    }];
-    
-    // only display models in folder if found, ignore the png/jpg files
-    while (NSURL* fileOrDirectoryURL = [directoryEnumerator nextObject]) {
-        const char* name = fileOrDirectoryURL.fileSystemRepresentation;
-        
-        bool isArchive = isSupportedArchiveFilename(name);
-        if (isArchive)
-        {
-            archiveFiles.emplace_back(File(name,0));
-        }
-    }
-}
-
-void Data::listFilesInFolder(NSURL* url, int32_t urlIndex, bool skipSubdirs)
-{
-    NSDirectoryEnumerationOptions options = NSDirectoryEnumerationSkipsHiddenFiles;
-    if (skipSubdirs)
-        options |= NSDirectoryEnumerationSkipsSubdirectoryDescendants;
-    
-    NSDirectoryEnumerator* directoryEnumerator =
-    [[NSFileManager defaultManager]
-     enumeratorAtURL:url
-     includingPropertiesForKeys:[NSArray array]
-     options:options
-     errorHandler:  // nil
-     ^BOOL(NSURL *urlArg, NSError *error) {
-        macroUnusedVar(urlArg);
-        macroUnusedVar(error);
-        
-        // handle error - don't change to folder if devoid of valid content
-        return NO;
-    }];
-    
-    while (NSURL* fileOrDirectoryURL = [directoryEnumerator nextObject]) {
-        const char* name = fileOrDirectoryURL.fileSystemRepresentation;
-        
-        bool isValid = isSupportedFilename(name);
-        
-#if USE_GLTF || USE_USD
-        // note: many gltf reference jpg which will load via GltfAsset, but
-        // kram and kramv do not import jpg files.
-        if (!isValid) {
-            isValid = isSupportedModelFilename(name);
-        }
-#endif
-        
-        if (!isValid) {
-            isValid = isSupportedJsonFilename(name);
-        }
-        if (isValid) {
-            _files.emplace_back(File(name,urlIndex));
-        }
-    }
-}
 
 
-void Data::loadFilesFromUrls(NSArray<NSURL*>* urls, bool skipSubdirs)
+-(void)loadFilesFromUrls:(NSArray<NSURL*>*)urls skipSubdirs:(BOOL)skipSubdirs
 {
-    // Using a member for archives, so limited to one archive in a drop
-    // but that's probably okay for now.  Add a separate array of open
-    // archives if want > 1.
-    
-    // copy the existing files list
-    string existingFilename;
-    if (_fileIndex < (int32_t)_files.size())
-        existingFilename = _files[_fileIndex].name;
-    
-    // Fill this out again
-    _files.clear();
-    
-    // clear pointers
-    for (FileContainer* container: _containers)
-        delete container;
-    _containers.clear();
-    
-    // this will flatten the list
-    int32_t urlIndex = 0;
-    
-    NSMutableArray<NSURL*>* urlsExtracted = [NSMutableArray new];
-    
+    // convert urls to vector<string> for C++
+    vector<string> urlStrings;
     for (NSURL* url in urls) {
-        // These will flatten out to a list of files
-        const char* filename = url.fileSystemRepresentation;
-        
-        if (isSupportedArchiveFilename(filename) &&
-            openArchive(filename, urlIndex) &&
-            listFilesInArchive(urlIndex))
-        {
-            [urlsExtracted addObject:url];
-            urlIndex++;
-        }
-        else if (url.hasDirectoryPath) {
-            
-            // this first loads only models, then textures if only those
-            listFilesInFolder(url, urlIndex, skipSubdirs);
-            
-            // could skip if nothing added
-            [urlsExtracted addObject:url];
-            urlIndex++;
-            
-            // handle archives within folder
-            vector<File> archiveFiles;
-            listArchivesInFolder(url, archiveFiles, skipSubdirs);
-            
-            for (const File& archiveFile: archiveFiles) {
-                const char* archiveFilename = archiveFile.name.c_str();
-                if (openArchive(archiveFilename, urlIndex) &&
-                    listFilesInArchive(urlIndex)) {
-                    
-                    NSURL* urlArchive = [NSURL fileURLWithPath:[NSString stringWithUTF8String:archiveFilename]];
-                    [urlsExtracted addObject:urlArchive];
-                    urlIndex++;
-                }
-                
-            }
-        }
-        else if (isSupportedFilename(filename)
-#if USE_GLTF
-                 || isSupportedModelFilename(filename)
-#endif
-                 ) {
-            _files.emplace_back(File(filename, urlIndex));
-            
-            [urlsExtracted addObject:url];
-            urlIndex++;
-        }
-        else if (isSupportedJsonFilename(filename)) {
-            _files.emplace_back(File(filename, urlIndex));
-            
-            [urlsExtracted addObject:url];
-            urlIndex++;
-        }
-        
+        urlStrings.push_back(url.fileSystemRepresentation);
     }
     
-    // sort them by short filename
-#if USE_EASTL
-    NAMESPACE_STL::quick_sort(_files.begin(), _files.end());
-#else
-    std::sort(_files.begin(), _files.end());
-#endif
-    
-    // preserve filename before load, and restore that index, by finding
-    // that name in refreshed folder list
-    _fileIndex = 0;
-    if (!existingFilename.empty()) {
-        for (uint32_t i = 0; i < _files.size(); ++i) {
-            if (_files[i].name == existingFilename) {
-                _fileIndex = i;
-                break;
-            }
-        }
-    }
-    
-    // preserve old file selection
-    _delegate._urls = urlsExtracted;
-}
-
--(void)loadFilesFromUrls:(NSArray<NSURL*>*)urls skipSubdirs:(BOOL)skipSubdirs
-{
     // C++ to build list
-    _data.loadFilesFromUrls(urls, skipSubdirs);
+    _data.loadFilesFromUrls(urlStrings, skipSubdirs);
     
     //-------------------
     
@@ -4026,7 +1703,8 @@ - (void)viewDidLoad
     }
 
     _renderer = [[Renderer alloc] initWithMetalKitView:_view
-                                              settings:_view.showSettings];
+                                              settings:_view.showSettings
+                                              data:_view.data];
 
     
     // https://developer.apple.com/library/archive/documentation/Cocoa/Conceptual/EventOverview/TrackingAreaObjects/TrackingAreaObjects.html
@@ -4062,7 +1740,7 @@ - (void)viewDidLoad
 
 bool DataDelegate::loadFile(bool clear)
 {
-    MyMTKView* view_ = (MyMTKView*)view;
+    MyMTKView* view_ = (__bridge MyMTKView*)view;
     
     if (clear) {
         // set selection
@@ -4081,13 +1759,13 @@ - (void)viewDidLoad
 
 bool DataDelegate::loadModelFile(const char* filename)
 {
-    MyMTKView* view_ = (MyMTKView*)view;
+    MyMTKView* view_ = (__bridge MyMTKView*)view;
     return [view_ loadModelFile:filename];
 }
 
 bool DataDelegate::loadTextureFromImage(const char* fullFilename, double timestamp, KTXImage& image, KTXImage* imageNormal, bool isArchive)
 {
-    MyMTKView* view_ = (MyMTKView*)view;
+    MyMTKView* view_ = (__bridge MyMTKView*)view;
     Renderer* renderer = (Renderer *)view_.delegate;
     [renderer releaseAllPendingTextures];
     
diff --git a/libkram/kram/KramFileHelper.cpp b/libkram/kram/KramFileHelper.cpp
index a762657f..ad17bc0f 100644
--- a/libkram/kram/KramFileHelper.cpp
+++ b/libkram/kram/KramFileHelper.cpp
@@ -252,6 +252,17 @@ bool FileHelper::exists(const char* filename) const
     return true;
 }
 
+bool FileHelper::isDirectory(const char* filename) const
+{
+    struct stat stats;
+    if( stat(filename,&stats) == 0 )
+    {
+        if( stats.st_mode & S_IFDIR )
+            return true;
+    }
+    return false;
+}
+
 uint64_t FileHelper::modificationTimestamp(const char* filename)
 {
 // Win has to rename all this, so make it happy using wrappers from miniz
diff --git a/libkram/kram/KramFileHelper.h b/libkram/kram/KramFileHelper.h
index 2bb50511..b7d0f48f 100644
--- a/libkram/kram/KramFileHelper.h
+++ b/libkram/kram/KramFileHelper.h
@@ -21,6 +21,8 @@ class FileHelper {
 public:
     ~FileHelper();
 
+    bool isDirectory(const char* filename) const;
+    
     bool exists(const char* filename) const;
 
     bool open(const char* filename, const char* access);

From e983b6e7f696893f015d1e905fd3a814ad7ec470 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 2 Jan 2023 17:27:02 -0800
Subject: [PATCH 395/901] kramv - fix eyedropper lag and accuracy

Display pixel data without Metal delay from running drawSamples in Metal completed handler.  On call updates the texture lookup, and the other runs updating the hud from the main thread since it's using NSView calls.
---
 kramv/KramRenderer.h     |  9 +++++++
 kramv/KramRenderer.mm    | 22 +++++++++++++----
 kramv/KramViewerBase.cpp | 51 +++++++++++++++++++++-------------------
 kramv/KramViewerMain.mm  | 40 +++++++++++++++++++------------
 4 files changed, 78 insertions(+), 44 deletions(-)

diff --git a/kramv/KramRenderer.h b/kramv/KramRenderer.h
index 7bdf082c..8fbd1601 100644
--- a/kramv/KramRenderer.h
+++ b/kramv/KramRenderer.h
@@ -35,6 +35,12 @@ class Data;
 class KTXImage;
 }
 
+// Need renderer to be able to call back up to view to update hud.
+@protocol MyMTKViewUpdates <NSObject>
+- (void)updateEyedropperText;
+@end
+
+
 // Our platform independent renderer class.   Implements the MTKViewDelegate
 // protocol which
 //   allows it to accept per-frame update and drawable resize callbacks.
@@ -72,6 +78,9 @@ class KTXImage;
 // called from view and renderer in render loop
 - (void)updateAnimationState:(nonnull MTKView*)view;
 
+// So caller can respond to completed callback
+- (void)setEyedropperDelegate:(nullable id)delegate;
+
 // can play animations in gltf models
 @property (nonatomic) BOOL playAnimations;
 
diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 8e9b87ca..3096d89b 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -201,6 +201,7 @@ @implementation Renderer {
     NSURLSession* _urlSession;
 #endif
 
+    __weak id _delegateHud;
 }
 
 @synthesize playAnimations;
@@ -726,7 +727,7 @@ - (void)updateAnimationState:(nonnull MTKView*)view
 {
     bool animateDisplay = self.playAnimations;
     
-    // animate the uvPreviw until it reaches endPoint, no scrubber yet
+    // animate the uvPreview until it reaches endPoint, no scrubber yet
     _showSettings->updateUVPreviewState();
     
     if (_showSettings->uvPreviewFrames > 0) {
@@ -738,6 +739,11 @@ - (void)updateAnimationState:(nonnull MTKView*)view
     view.paused = !animateDisplay;
 }
 
+// So caller can respond to completed callback
+- (void)setEyedropperDelegate:(nullable id)delegate
+{
+    _delegateHud = delegate;
+}
 
 - (void)updateModelSettings:(const string &)fullFilename
 {
@@ -2098,10 +2104,16 @@ - (void)drawSample
             [texture getBytes:&data bytesPerRow:16 fromRegion:region mipmapLevel:0];
         }
 
-        // return the value at the sample
-        self->_showSettings->textureResult = data;
-        self->_showSettings->textureResultX = textureLookupX;
-        self->_showSettings->textureResultY = textureLookupY;
+        // Call this to update the hud text.
+        // This makes sure the frame delay is accounted for.
+        dispatch_async(dispatch_get_main_queue(), ^{
+            // return the value at the sample
+            self->_showSettings->textureResult = data;
+            self->_showSettings->textureResultX = textureLookupX;
+            self->_showSettings->textureResultY = textureLookupY;
+            
+            [self->_delegateHud updateEyedropperText];
+        });
         
         // TODO: This completed handler runs long after the hud has updated
         // so need to invalidate the hud.  So the pixel location is out of date.
diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index 21e20e52..101abf70 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -1480,8 +1480,9 @@ void Data::showEyedropperData(const float2& uv)
         mipX = (int32_t)(uv.x * mipX);
         mipY = (int32_t)(uv.y * mipY);
 
-        _showSettings->textureLookupMipX = mipX;
-        _showSettings->textureLookupMipY = mipY;
+        // Has to be set in other call, not here
+        //_showSettings->textureLookupMipX = mipX;
+        //_showSettings->textureLookupMipY = mipY;
 
         // TODO: may want to return mip in pixel readback
         // don't have it right now, so don't display if preview is enabled
@@ -2004,28 +2005,11 @@ void Data::updateEyedropper()
     }
     
     if (_showSettings->isEyedropperFromDrawable()) {
-        // this only needs the cursor location, but can't supply uv to
-        // displayPixelData
-
-        //if (_showSettings->lastCursorX != _showSettings->cursorX ||
-        //    _showSettings->lastCursorY != _showSettings->cursorY) {
-            // TODO: this means pan/zoom doesn't update data, may want to track some
-            // absolute location in virtal canvas.
-
-            _showSettings->lastCursorX = _showSettings->cursorX;
-            _showSettings->lastCursorY = _showSettings->cursorY;
-
-            // This just samples from drawable, so no re-render is needed
-            showEyedropperData(float2m(0, 0));
-
-            // TODO: remove this, but only way to get drawSamples to execute right
-            // now, but then entire texture re-renders and that's not power efficient.
-            // Really just want to sample from the already rendered texture since
-            // content isn't animated.
-
-            //self.needsDisplay = YES;
-        //}
+        _showSettings->lastCursorX = _showSettings->cursorX;
+        _showSettings->lastCursorY = _showSettings->cursorY;
 
+        // This just samples from drawable, so no re-render is needed
+        // showEyedropperData(float2m(0, 0));
         return;
     }
 
@@ -2125,7 +2109,26 @@ void Data::updateEyedropper()
             _showSettings->textureLookupX = newX;
             _showSettings->textureLookupY = newY;
             
-            showEyedropperData(uv);
+            // show block num
+            int mipLOD = _showSettings->mipNumber;
+
+            int mipX = _showSettings->imageBoundsX;
+            int mipY = _showSettings->imageBoundsY;
+
+            mipX = mipX >> mipLOD;
+            mipY = mipY >> mipLOD;
+
+            mipX = std::max(1, mipX);
+            mipY = std::max(1, mipY);
+
+            mipX = (int32_t)(uv.x * mipX);
+            mipY = (int32_t)(uv.y * mipY);
+
+            // Has to be set in other call, not here
+            _showSettings->textureLookupMipX = mipX;
+            _showSettings->textureLookupMipY = mipY;
+            
+            // showEyedropperData(uv);
         }
     }
 }
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 4f4d39a0..0070590b 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -988,8 +988,9 @@ -(void)updateZoom:(float)zoom
             [self setHudText:text.c_str()];
         }
 
-        [self updateEyedropper];
-        //self.needsDisplay = YES;
+        // Cause a new sample for eyedropper
+        _data.updateEyedropper();
+        self.needsDisplay = YES;
     }
 }
 
@@ -1053,7 +1054,7 @@ - (void)mouseMoved:(NSEvent *)event
         return;
     }
     
-    // pixel in non-square window coords, run thorugh inverse to get texel space
+    // pixel in non-square window coords, run through inverse to get texel space
     // I think magnofication of zoom gesture is affecting coordinates reported by
     // this
 
@@ -1067,24 +1068,28 @@ - (void)mouseMoved:(NSEvent *)event
     _showSettings->cursorX = (int32_t)point.x;
     _showSettings->cursorY = (int32_t)point.y;
 
-    // should really do this in draw call, since moved message come in so quickly
-    // TODO: can this mark hud as needsDisplay, and then handle in update
-    [self updateEyedropper];
+    _data.updateEyedropper();
+    
+    // Cause a new sample for eyedropper (will run in Metal CompletedHandler)
+    self.needsDisplay = YES;
 }
 
 
--(void)updateEyedropper
+-(void)updateEyedropperText
 {
-    _data.updateEyedropper();
+    if (_showSettings->imageBoundsX == 0) return;
+    
+    float2 uv;
+    uv.x = _showSettings->textureLookupX / _showSettings->imageBoundsX;
+    uv.y = _showSettings->textureLookupY / _showSettings->imageBoundsY;
+    
+    // convert data to text
+    _data.showEyedropperData(uv);
     
     // This calls setNeedsDisplay on the hud section that displays the eyeDropper
     [self updateHudText];
-    
-    // TODO: remove this, but only way to get drawSamples to execute right now,
-    // but then entire texture re-renders and that's not power efficient.
-   self.needsDisplay = YES;
 }
 
 - (void)setEyedropperText:(const char *)text
@@ -1105,6 +1110,8 @@ - (void)updateHudText
     string text = _data.textFromSlots();
 
     NSString *textNS = [NSString stringWithUTF8String:text.c_str()];
+    
+    // This is drop shadowed by drawing same text twice
     _hudLabel2.stringValue = textNS;
     _hudLabel2.needsDisplay = YES;
     
@@ -1161,7 +1168,7 @@ - (void)scrollWheel:(NSEvent *)event
     }
 }
 
-// TODO: movef to data, but eliminate CGRect usage
+// TODO: move to data, but eliminate CGRect usage
 - (void)updatePan:(float)panX panY:(float)panY
 {
     //Renderer* renderer = (Renderer *)self.delegate;
@@ -1217,8 +1224,9 @@ - (void)updatePan:(float)panX panY:(float)panY
             [self setHudText:text.c_str()];
         }
 
-        [self updateEyedropper];
-        //self.needsDisplay = YES;
+        // Cause a new sample from Metal to eyeDropper
+        _data.updateEyedropper();
+        self.needsDisplay = YES;
     }
 }
 
@@ -1732,6 +1740,8 @@ - (void)viewDidLoad
 
     // ObjC++ delegate
     _view.delegate = _renderer;
+    
+    [_renderer setEyedropperDelegate:_view];
 }
 
 
From 7ed6017571cff40bc8fb764561c0be5645601061 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 3 Jan 2023 11:29:34 -0800
Subject: [PATCH 396/901] kramv - add atlas hud text

Show the location and dims in pixels and the name in the hud.  This is based on the cursor location.  Pixel locations are scaled to the current mipNumber.
---
 kramv/KramViewerBase.cpp | 30 +++++++++++++++++++++---------
 kramv/KramViewerBase.h   | 10 ++++++----
 kramv/KramViewerMain.mm  | 26 +++++++++++++++++++++++---
 3 files changed, 50 insertions(+), 16 deletions(-)

diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index 101abf70..da4482ff 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -961,11 +961,14 @@ bool isPtInRect(float2 pt, float4 r)
     return all((pt >= r.xy) & (pt <= r.xy + r.zw));
 }
 
-const Atlas* Data::findAtlasAtCursor(float2 pt)
+const Atlas* Data::findAtlasAtUV(float2 pt)
 {
+    if (_showSettings->atlas.empty()) return nullptr;
+    if (_showSettings->imageBoundsX == 0) return nullptr;
+   
     const Atlas* atlas = nullptr;
     
-    // TODO: rects are in uv, so need to convert pt
+    // Note: rects are in uv
     
     // This might need to become an atlas array index instead of ptr
     const Atlas* lastAtlas = _showSettings->lastAtlas;
@@ -1447,6 +1450,8 @@ void Data::showEyedropperData(const float2& uv)
                 break;
         }
 
+        // TODO: indicate px, mip, etc (f.e. showAll)
+        
         // debug mode
 
         // preview vs. not
@@ -1480,10 +1485,6 @@ void Data::showEyedropperData(const float2& uv)
         mipX = (int32_t)(uv.x * mipX);
         mipY = (int32_t)(uv.y * mipY);
 
-        // Has to be set in other call, not here
-        //_showSettings->textureLookupMipX = mipX;
-        //_showSettings->textureLookupMipY = mipY;
-
         // TODO: may want to return mip in pixel readback
         // don't have it right now, so don't display if preview is enabled
         if (_showSettings->isPreview)
@@ -1605,7 +1606,12 @@ void Data::setEyedropperText(const char * text)
     setTextSlot(kTextSlotEyedropper, text);
 }
 
-string Data::textFromSlots() const
+void Data::setAtlasText(const char * text)
+{
+    setTextSlot(kTextSlotAtlas, text);
+}
+
+string Data::textFromSlots(bool isFileListHidden) const
 {
     // combine textSlots
     string text = _textSlots[kTextSlotHud];
@@ -1613,9 +1619,15 @@ string Data::textFromSlots() const
         text += "\n";
         
     // don't show eyedropper text with table up, it's many lines and overlaps
-    // TODO: fix
-    // if (!_tableView.hidden)
+    if (!isFileListHidden)
+    {
         text += _textSlots[kTextSlotEyedropper];
+        if (!text.empty() && text.back() != '\n')
+            text += "\n";
+        
+        text += _textSlots[kTextSlotAtlas];
+    }
+    
     
     return text;
 }
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index 70b2b852..fe08150c 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -412,7 +412,8 @@ struct ActionState
 enum TextSlot
 {
     kTextSlotHud,
-    kTextSlotEyedropper
+    kTextSlotEyedropper,
+    kTextSlotAtlas
 };
 
 struct File {
@@ -464,7 +465,7 @@ struct Data {
 
     bool findFilename(const string& filename);
     bool findFilenameShort(const string& filename);
-    const Atlas* findAtlasAtCursor(float2 pt);
+    const Atlas* findAtlasAtUV(float2 uv);
     bool isArchive() const;
     bool loadFile();
     
@@ -482,7 +483,7 @@ struct Data {
     vector<Action>& actions() { return _actions; }
     void initDisabledButtons();
 
-    string textFromSlots() const;
+    string textFromSlots(bool isFileListHidden) const;
     void setTextSlot(TextSlot slot, const char* text);
 
     void loadFilesFromUrls(vector<string>& urls, bool skipSubdirs);
@@ -507,8 +508,9 @@ struct Data {
 public:
     void showEyedropperData(const float2& uv);
     void setEyedropperText(const char * text);
+    void setAtlasText(const char * text);
     void updateTransforms();
-
+   
     //----------------
     float4x4 _projectionMatrix;
     
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 0070590b..5e636daa 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -1082,12 +1082,32 @@ -(void)updateEyedropperText
     if (_showSettings->imageBoundsX == 0) return;
     
     float2 uv;
-    uv.x = _showSettings->textureLookupX / _showSettings->imageBoundsX;
-    uv.y = _showSettings->textureLookupY / _showSettings->imageBoundsY;
+    uv.x = _showSettings->textureLookupX / (float)_showSettings->imageBoundsX;
+    uv.y = _showSettings->textureLookupY / (float)_showSettings->imageBoundsY;
     
     // convert data to text
     _data.showEyedropperData(uv);
     
+    const Atlas* atlas = _data.findAtlasAtUV(uv);
+    if (atlas) {
+        // convert back to pixels in the current mip
+        float mipBoundsX = std::max(1, _showSettings->imageBoundsX >> _showSettings->mipNumber);
+        float mipBoundsY = std::max(1, _showSettings->imageBoundsY >> _showSettings->mipNumber);
+        
+        float4 rect = atlas->rect();
+        rect.xz *= mipBoundsX;
+        rect.yw *= mipBoundsY;
+        
+        string atlasText;
+        sprintf(atlasText, "%d,%d %dx%d %s",
+                (int32_t)rect.x, (int32_t)rect.y,
+                (int32_t)rect.z, (int32_t)rect.w,
+                atlas->name.c_str());
+        _data.setAtlasText(atlasText.c_str());
+    }
+    else {
+        _data.setAtlasText("");
+    }
     // This calls setNeedsDisplay on the hud section that displays the eyeDropper
     [self updateHudText];
 }
@@ -1107,7 +1127,7 @@ - (void)setHudText:(const char *)text
 - (void)updateHudText
 {
     // combine textSlots
-    string text = _data.textFromSlots();
+    string text = _data.textFromSlots(_tableView.hidden);
 
     NSString *textNS = [NSString stringWithUTF8String:text.c_str()];
     

From 8f508eeadd4ad296bcf89501470f0c6b6ee86181 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 15 Jan 2023 18:15:58 -0800
Subject: [PATCH 397/901] kramv - split off rect math to C++, fix atlas
 AtlasTest

---
 kramv/KramViewerMain.mm        | 25 +++++++++++++++++++------
 tests/src/AtlasTest-atlas.json |  4 ++--
 2 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 5e636daa..c080c567 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -1188,6 +1188,19 @@ - (void)scrollWheel:(NSEvent *)event
     }
 }
 
+bool rectIntersectsRect(float4 lhs, float4 rhs)
+{
+    // convert rect from (origin, size) to (min, max)
+    float4 lRect = lhs.xyxy;
+    lRect.zw += lhs.zw;
+    
+    float4 rRect = rhs.xyxy;
+    rRect.zw += rhs.zw;
+    
+    return all(lRect.xy <= rRect.zw) && // min <= max
+           all(lRect.zw >= rRect.xy);   // max >= min
+}
+
 // TODO: move to data, but eliminate CGRect usage
 - (void)updatePan:(float)panX panY:(float)panY
 {
@@ -1214,20 +1227,20 @@ - (void)updatePan:(float)panX panY:(float)panY
     float2 ptSize = abs(pt0.xy - pt1.xy);
 
     // see that rectangle intersects the view, view is -1 to 1
-    CGRect imageRect = CGRectMake(ptOrigin.x, ptOrigin.y, ptSize.x, ptSize.y);
-    CGRect viewRect = CGRectMake(-1.0f, -1.0f, 2.0f, 2.0f);
+    float4 imageRect = float4m(ptOrigin.x, ptOrigin.y, ptSize.x, ptSize.y);
+    float4 viewRect = float4m(-1.0f, -1.0f, 2.0f, 2.0f);
 
     int32_t numTexturesX = _showSettings->totalChunks();
     int32_t numTexturesY = _showSettings->mipCount;
 
     if (_showSettings->isShowingAllLevelsAndMips) {
-        imageRect.origin.y -= (numTexturesY - 1) * imageRect.size.height;
+        imageRect.y -= (numTexturesY - 1) * imageRect.w;
 
-        imageRect.size.width *= numTexturesX;
-        imageRect.size.height *= numTexturesY;
+        imageRect.z *= numTexturesX; // w
+        imageRect.w *= numTexturesY; // h
     }
 
-    if (!NSIntersectsRect(imageRect, viewRect)) {
+    if (!rectIntersectsRect(imageRect, viewRect)) {
         return;
     }
 
diff --git a/tests/src/AtlasTest-atlas.json b/tests/src/AtlasTest-atlas.json
index 3138d569..a8e2803e 100644
--- a/tests/src/AtlasTest-atlas.json
+++ b/tests/src/AtlasTest-atlas.json
@@ -9,7 +9,7 @@
 "regions":
 [
     {"name":"red", "rpx":[0,0,32,32]},
-    {"name":"green", "rpx":[0,64,32,32]},
-    {"name": "blue", "rpx": [64,32,32,32]},
+    {"name":"green", "rpx": [64,32,32,32]},
+    {"name":"blue", "rpx":[0,64,32,32]},
 ]
 }

From 55559a800941b4cdc79f2e23d254bb8b9c41331c Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 25 Jan 2023 22:01:17 -0800
Subject: [PATCH 398/901] kramv - fix slot counts so viewer doesn't crash, more
 C++ conversion, add isSourcePremultiplied

isPremulRGB wasn't getting flagged as a premul file in the props, so now it does.  This means the viewer won't try to apply premul again.
---
 kramv/KramViewerBase.cpp       | 44 +++++++++++++--------------
 kramv/KramViewerBase.h         |  6 ++--
 kramv/KramViewerMain.mm        | 55 +++++++++++++++++-----------------
 libkram/kram/Kram.cpp          |  1 +
 libkram/kram/KramImage.cpp     |  2 +-
 libkram/kram/KramImageInfo.cpp | 16 ++++++----
 libkram/kram/KramImageInfo.h   |  2 ++
 7 files changed, 69 insertions(+), 57 deletions(-)

diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index da4482ff..d337edc1 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -8,11 +8,11 @@ using namespace NAMESPACE_STL;
 
 #define ArrayCount(x) (sizeof(x) / sizeof(x[0]))
 
-#ifdef NDEBUG
-bool doPrintPanZoom = false;
-#else
-bool doPrintPanZoom = false;
-#endif
+//#ifdef NDEBUG
+//bool doPrintPanZoom = false;
+//#else
+//bool doPrintPanZoom = false;
+//#endif
 
 // Writing out to rgba32 for sampling, but unorm formats like ASTC and RGBA8
 // are still off and need to use the following.
@@ -677,7 +677,7 @@ Data::Data()
 {
     _showSettings = new ShowSettings();
     
-    _textSlots.resize(2);
+    _textSlots.resize(kTextSlotCount);
 }
 Data::~Data()
 {
@@ -2324,14 +2324,14 @@ bool Data::handleEventAction(const Action* action, bool isShiftKeyDown, ActionSt
         _showSettings->panY = 0.0f;
         
         text = "Scale Image\n";
-        if (doPrintPanZoom) {
-            string tmp;
-            sprintf(tmp,
-                    "Pan %.3f,%.3f\n"
-                    "Zoom %.2fx\n",
-                    _showSettings->panX, _showSettings->panY, _showSettings->zoom);
-            text += tmp;
-        }
+//        if (doPrintPanZoom) {
+//            string tmp;
+//            sprintf(tmp,
+//                    "Pan %.3f,%.3f\n"
+//                    "Zoom %.2fx\n",
+//                    _showSettings->panX, _showSettings->panY, _showSettings->zoom);
+//            text += tmp;
+//        }
         
         isChanged = true;
     }
@@ -2350,14 +2350,14 @@ bool Data::handleEventAction(const Action* action, bool isShiftKeyDown, ActionSt
             text = "Reload Model\n";
         else
             text = "Reload Image\n";
-        if (doPrintPanZoom) {
-            string tmp;
-            sprintf(tmp,
-                    "Pan %.3f,%.3f\n"
-                    "Zoom %.2fx\n",
-                    _showSettings->panX, _showSettings->panY, _showSettings->zoom);
-            text += tmp;
-        }
+//        if (doPrintPanZoom) {
+//            string tmp;
+//            sprintf(tmp,
+//                    "Pan %.3f,%.3f\n"
+//                    "Zoom %.2fx\n",
+//                    _showSettings->panX, _showSettings->panY, _showSettings->zoom);
+//            text += tmp;
+//        }
         
         isChanged = true;
     }
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index fe08150c..bfb8f2a2 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -413,7 +413,9 @@ enum TextSlot
 {
     kTextSlotHud,
     kTextSlotEyedropper,
-    kTextSlotAtlas
+    kTextSlotAtlas,
+    
+    kTextSlotCount // not a slot
 };
 
 struct File {
@@ -589,6 +591,6 @@ bool isSupportedModelFilename(const char* filename);
 bool isSupportedArchiveFilename(const char* filename);
 bool isSupportedJsonFilename(const char* filename);
 
-extern bool doPrintPanZoom;
+//extern bool doPrintPanZoom;
 
 }  // namespace kram
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index c080c567..c3112cd7 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -903,22 +903,22 @@ -(void)updateZoom:(float)zoom
     float2 ptOrigin = simd::min(pt0.xy, pt1.xy);
     float2 ptSize = abs(pt0.xy - pt1.xy);
 
-    CGRect imageRect = CGRectMake(ptOrigin.x, ptOrigin.y, ptSize.x, ptSize.y);
-    CGRect viewRect = CGRectMake(-1.0f, -1.0f, 2.0f, 2.0f);
+    float4 imageRect = float4m(ptOrigin.x, ptOrigin.y, ptSize.x, ptSize.y);
+    float4 viewRect = float4m(-1.0f, -1.0f, 2.0f, 2.0f);
 
     int32_t numTexturesX = _showSettings->totalChunks();
     int32_t numTexturesY = _showSettings->mipCount;
 
     if (_showSettings->isShowingAllLevelsAndMips) {
-        imageRect.origin.y -= (numTexturesY - 1) * imageRect.size.height;
+        imageRect.y -= (numTexturesY - 1) * imageRect.w;
 
-        imageRect.size.width *= numTexturesX;
-        imageRect.size.height *= numTexturesY;
+        imageRect.z *= numTexturesX; // w
+        imageRect.w *= numTexturesY; // h
     }
 
-    float visibleWidth = imageRect.size.width * _showSettings->viewSizeX /
+    float visibleWidth = imageRect.z * _showSettings->viewSizeX /
                          _showSettings->viewContentScaleFactor;
-    float visibleHeight = imageRect.size.height * _showSettings->viewSizeY /
+    float visibleHeight = imageRect.w * _showSettings->viewSizeY /
                           _showSettings->viewContentScaleFactor;
 
     // don't allow image to get too big
@@ -955,12 +955,12 @@ -(void)updateZoom:(float)zoom
     }
 
     // or completely off-screen
-    if (!NSIntersectsRect(imageRect, viewRect)) {
+    if (!rectIntersectsRect(imageRect, viewRect)) {
         isZoomChanged = false;
     }
     
     if (!isZoomChanged) {
-        _zoomGesture.magnification = _validMagnification;
+        _zoomGesture.magnification = _validMagnification; // objC
         return;
     }
 
@@ -972,25 +972,26 @@ -(void)updateZoom:(float)zoom
         _data.doZoomMath(zoom, newPan);
 
         // store this
-        _validMagnification = _zoomGesture.magnification;
+        _validMagnification = _zoomGesture.magnification; // objC
 
         _showSettings->zoom = zoom;
 
         _showSettings->panX = newPan.x;
         _showSettings->panY = newPan.y;
 
-        if (doPrintPanZoom) {
-            string text;
-            sprintf(text,
-                    "Pan %.3f,%.3f\n"
-                    "Zoom %.2fx\n",
-                    _showSettings->panX, _showSettings->panY, _showSettings->zoom);
-            [self setHudText:text.c_str()];
-        }
+//        if (doPrintPanZoom) {
+//            string text;
+//            sprintf(text,
+//                    "Pan %.3f,%.3f\n"
+//                    "Zoom %.2fx\n",
+//                    _showSettings->panX, _showSettings->panY, _showSettings->zoom);
+//            [self setHudText:text.c_str()];
+//        }
 
         // Cause a new sample for eyedropper
         _data.updateEyedropper();
-        self.needsDisplay = YES;
+        
+        self.needsDisplay = YES; // objC
     }
 }
 
@@ -1248,14 +1249,14 @@ - (void)updatePan:(float)panX panY:(float)panY
         _showSettings->panX = panX;
         _showSettings->panY = panY;
 
-        if (doPrintPanZoom) {
-            string text;
-            sprintf(text,
-                    "Pan %.3f,%.3f\n"
-                    "Zoom %.2fx\n",
-                    _showSettings->panX, _showSettings->panY, _showSettings->zoom);
-            [self setHudText:text.c_str()];
-        }
+//        if (doPrintPanZoom) {
+//            string text;
+//            sprintf(text,
+//                    "Pan %.3f,%.3f\n"
+//                    "Zoom %.2fx\n",
+//                    _showSettings->panX, _showSettings->panY, _showSettings->zoom);
+//            [self setHudText:text.c_str()];
+//        }
 
         // Cause a new sample from Metal to eyeDropper
         _data.updateEyedropper();
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index c8af01ed..545fdbdd 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -2896,6 +2896,7 @@ static int32_t kramAppEncode(vector<const char*>& args)
         // this means premul the data at read from srgb, this it to match photoshop
         else if (isStringEqual(word, "-premulrgb")) {
             isPremulRgb = true;
+            infoArgs.isSourcePremultiplied = true;
         }
 
         else if (isStringEqual(word, "-v") ||
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index 42b4060d..406379c9 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -1464,7 +1464,7 @@ void KramEncoder::addBaseProps(const ImageInfo& info, KTXImage& dstImage) const
     }
     else if (info.isSRGBDst) {
         // !hasAlpha doesn't change the channel designation
-        if (info.isPremultiplied) {
+        if (info.isPremultiplied || info.isSourcePremultiplied) {
             dstImage.addChannelProps("Alb.ra,Alb.ga,Alb.ba,Alb.a");
         }
         else {
diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp
index b4913c8a..2567a461 100644
--- a/libkram/kram/KramImageInfo.cpp
+++ b/libkram/kram/KramImageInfo.cpp
@@ -1008,11 +1008,17 @@ void ImageInfo::initWithArgs(const ImageInfoArgs& args)
     isKTX2 = args.isKTX2;
     compressor = args.compressor;
 
-    isPrezero = args.isPrezero;
-    isPremultiplied = args.isPremultiplied;
-    if (isPremultiplied)
-        isPrezero = false;
-
+    isPrezero = false;
+    isPremultiplied = false;
+    isSourcePremultiplied = false;
+    
+    if (args.isSourcePremultiplied)
+        isSourcePremultiplied = true;
+    else if (args.isPremultiplied)
+        isPremultiplied = true;
+    else if (args.isPrezero)
+        isPrezero = true;
+    
     isNormal = args.isNormal;
 
     doSDF = args.doSDF;
diff --git a/libkram/kram/KramImageInfo.h b/libkram/kram/KramImageInfo.h
index b5712d6f..d8b41fbd 100644
--- a/libkram/kram/KramImageInfo.h
+++ b/libkram/kram/KramImageInfo.h
@@ -56,6 +56,7 @@ class ImageInfoArgs {
     bool doMipmaps = true;  // default to mips on
     bool isVerbose = false;
     bool doSDF = false;
+    bool isSourcePremultiplied = false;
     bool isPremultiplied = false;
     bool isPrezero = false;
     bool isNormal = false;  // signed, but may be stored unorm and swizzled (f.e. astc/bc3nm gggr or rrrg)
@@ -138,6 +139,7 @@ class ImageInfo {
     bool isSigned = false;
     bool isNormal = false;
     bool isColorWeighted = false;
+    bool isSourcePremultiplied = false;
     bool isPremultiplied = false;  // don't premul
     bool isPrezero = false;
     bool isHDR = false;

From 0b8824d4502fec1eb7c6128c672f26f5f836f74f Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 25 Jan 2023 22:19:16 -0800
Subject: [PATCH 399/901] kramv - fix logic to hide/show ui

---
 kramv/KramViewerMain.mm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index c3112cd7..585dfd07 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -1522,7 +1522,7 @@ -(BOOL)loadFile
     // doesn't set imageURL or update the recent document menu
     
     // show the controls
-    if (!_data._noImageLoaded) {
+    if (_data._noImageLoaded) {
         _showSettings->isHideUI = false;
         _buttonStack.hidden = NO;  // show controls
         _data._noImageLoaded = false;

From c829d73e46deb6a58d044864028787d3735fd1f1 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 25 Jan 2023 22:29:24 -0800
Subject: [PATCH 400/901] kram - mark image with isSourcePremultiplied as
 Albedo premulf

This is so kramv doesn't then apply premul to the image when loaded.
---
 libkram/kram/KramImage.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index 406379c9..de94ae9c 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -1462,7 +1462,7 @@ void KramEncoder::addBaseProps(const ImageInfo& info, KTXImage& dstImage) const
     if (info.isNormal) {
         dstImage.addChannelProps("Nrm.x,Nrm.y,X,X");
     }
-    else if (info.isSRGBDst) {
+    else if (info.isSRGBDst || info.isSourcePremultiplied) {
         // !hasAlpha doesn't change the channel designation
         if (info.isPremultiplied || info.isSourcePremultiplied) {
             dstImage.addChannelProps("Alb.ra,Alb.ga,Alb.ba,Alb.a");

From e7b044c079ab29cf30bb8afeace1a73cf7dd769c Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 26 Jan 2023 10:35:33 -0800
Subject: [PATCH 401/901] kramv - fix checkerboard activation

---
 kramv/KramViewerBase.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index d337edc1..333e53cf 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -2378,7 +2378,7 @@ bool Data::handleEventAction(const Action* action, bool isShiftKeyDown, ActionSt
     
     // toggle checkerboard for transparency
     else if (action == _actionChecker) {
-        if (action->isHidden) {
+        if (!action->isHidden) {
             _showSettings->isCheckerboardShown = !_showSettings->isCheckerboardShown;
             isChanged = true;
             text = "Checker ";

From b2aa6e54b80afcf88fa4b970e359ad8869fd32c8 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 26 Jan 2023 12:49:08 -0800
Subject: [PATCH 402/901] kramv - fix colorspace, fix stretch to checkerboard.

Switch to simple srgb color space. The shader saturates hdr values, so despite rgba16f buffer, can't yet display HDR. This gets the colors to align with Preview and Photoshop.  Fix some warnings only on Intel.
---
 kramv/KramRenderer.mm           | 67 +++++++++++++++++++++------------
 kramv/KramViewerBase.cpp        |  2 +-
 kramv/Shaders/KramShaders.h     |  1 +
 kramv/Shaders/KramShaders.metal |  6 ++-
 libkram/bc7enc/bc7decomp.cpp    |  6 ++-
 libkram/kram/KramImage.cpp      |  8 +++-
 6 files changed, 61 insertions(+), 29 deletions(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 3096d89b..f7681432 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -372,9 +372,15 @@ - (void)_loadMetalWithView:(nonnull MTKView *)view
     // Important to set color space, or colors are wrong.  Why doesn't one of these work (or the default)
     // false is good for srgb -> rgba16f
     // true is good for non-srgb -> rgba16f
-    CGColorSpaceRef viewColorSpace = CGColorSpaceCreateWithName(kCGColorSpaceGenericRGBLinear);
-    //bool pickOne = false;
-    // pickOne ? kCGColorSpaceSRGB : kCGColorSpaceLinearSRGB);
+    CGColorSpaceRef viewColorSpace;
+    
+    // This doesn't look like Figma or Photoshop for a rgb,a = 255,0 to 255,1 gradient across a 256px wide rect.   The shader is saturating
+    // the color to 0,1.  So can get away with SRGB color space for now.
+    // This also lines up with Preview.
+    // viewColorSpace  = CGColorSpaceCreateWithName(kCGColorSpaceGenericRGBLinear);
+    
+    viewColorSpace = CGColorSpaceCreateWithName(kCGColorSpaceSRGB);
+    
     view.colorspace = viewColorSpace;
     
     view.colorPixelFormat = MTLPixelFormatRGBA16Float;
@@ -1212,28 +1218,28 @@ - (void)resetSomeImageSettings:(BOOL)isNewFile
 - (void)_updateGameState
 {
     /// Update any game state before encoding rendering commands to our drawable
-
+    
     Uniforms &uniforms =
-        *(Uniforms *)_dynamicUniformBuffer[_uniformBufferIndex].contents;
-
+    *(Uniforms *)_dynamicUniformBuffer[_uniformBufferIndex].contents;
+    
     uniforms.isNormal = _showSettings->texContentType == TexContentTypeNormal;
     uniforms.doShaderPremul = _showSettings->doShaderPremul;
     uniforms.isSigned = _showSettings->isSigned;
     uniforms.isSwizzleAGToRG = _showSettings->isSwizzleAGToRG;
-
+    
     uniforms.isSDF = _showSettings->texContentType == TexContentTypeSDF;
     uniforms.numChannels = _showSettings->numChannels;
     uniforms.lightingMode = (ShaderLightingMode)_showSettings->lightingMode;
-
+    
     MyMTLTextureType textureType = MyMTLTextureType2D;
     MyMTLPixelFormat textureFormat = MyMTLPixelFormatInvalid;
     if (_colorMap) {
         textureType = (MyMTLTextureType)_colorMap.textureType;
         textureFormat = (MyMTLPixelFormat)_colorMap.pixelFormat;
     }
-
+    
     uniforms.isCheckerboardShown = _showSettings->isCheckerboardShown;
-
+    
     // addressing mode
     bool isCube = (textureType == MyMTLTextureTypeCube ||
                    textureType == MyMTLTextureTypeCubeArray);
@@ -1241,26 +1247,26 @@ - (void)_updateGameState
     bool doEdge = !doWrap;
     bool doZero = !doEdge;
     uniforms.isWrap = doWrap ? _showSettings->isWrap : false;
-
+    
     uniforms.isPreview = _showSettings->isPreview;
-
+    
     uniforms.isNormalMapPreview = false;
     if (uniforms.isPreview) {
         uniforms.isNormalMapPreview = uniforms.isNormal || (_normalMap != nil);
-
+        
         if (_normalMap != nil) {
             uniforms.isNormalMapSigned =
-                isSignedFormat((MyMTLPixelFormat)_normalMap.pixelFormat);
+            isSignedFormat((MyMTLPixelFormat)_normalMap.pixelFormat);
             uniforms.isNormalMapSwizzleAGToRG = false;  // TODO: need a prop for this
         }
     }
-
+    
     // a few things to fix before enabling this
     uniforms.useTangent = _showSettings->useTangent;
-
+    
     uniforms.gridX = 0;
     uniforms.gridY = 0;
-
+    
     if (_showSettings->isPixelGridShown) {
         uniforms.gridX = 1;
         uniforms.gridY = 1;
@@ -1275,19 +1281,19 @@ - (void)_updateGameState
         uniforms.gridX = _showSettings->gridSizeX;
         uniforms.gridY = _showSettings->gridSizeY;
     }
-
+    
     // no debug mode when preview kicks on, make it possible to toggle back and
     // forth more easily
     uniforms.debugMode = (ShaderDebugMode)_showSettings->debugMode;
     uniforms.shapeChannel = (ShaderShapeChannel)_showSettings->shapeChannel;
     uniforms.channels = (ShaderTextureChannels)_showSettings->channels;
-
+    
     // turn these off in preview mode, but they may be useful?
     if (_showSettings->isPreview) {
         uniforms.debugMode = ShaderDebugMode::ShDebugModeNone;
         uniforms.shapeChannel = ShaderShapeChannel::ShShapeChannelNone;
     }
-
+    
     // crude shape experiment
     _showSettings->is3DView = true;
     switch (_showSettings->meshNumber) {
@@ -1304,26 +1310,39 @@ - (void)_updateGameState
         case 3:
             _mesh = _meshSphereMirrored;
             break;
-        // case 3: _mesh = _meshCylinder; break;
+            // case 3: _mesh = _meshCylinder; break;
         case 4:
             _mesh = _meshCapsule;
             break;
     }
     uniforms.is3DView = _showSettings->is3DView;
-
+    
     // on small textures can really see missing pixel (3 instead of 4 pixels)
     // so only do this on the sphere/capsule which wrap-around uv space
     uniforms.isInsetByHalfPixel = false;
     if (_showSettings->meshNumber >= 2 && doZero) {
         uniforms.isInsetByHalfPixel = true;
     }
-
+    
     _data->updateTransforms();
     
     // this is an animated effect, that overlays the shape uv wires over the image
     uniforms.isUVPreview = _showSettings->uvPreview > 0.0;
     uniforms.uvPreview = _showSettings->uvPreview;
-   
+    
+    uniforms.uvToShapeRatio = 1.0f;
+    switch(_showSettings->meshNumber) {
+        case 0:
+            if (_showSettings->imageBoundsY)
+                uniforms.uvToShapeRatio = _showSettings->imageBoundsX / (float)_showSettings->imageBoundsY;
+            break;
+        case 2:
+            uniforms.uvToShapeRatio = 2.0f;
+            break;
+        case 4:
+            uniforms.uvToShapeRatio = 2.0f * M_PI * 0.3333f;
+            break;
+    }
     uniforms.projectionViewMatrix = _data->_projectionViewMatrix;
     uniforms.cameraPosition = _data->_cameraPosition;
    
diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index 333e53cf..0a16ae5b 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -703,7 +703,7 @@ bool Data::loadAtlasFile(const char* filename)
     
     {
         std::vector<double> values;
-        string_view atlasName = atlasProps["name"].get_string().value_unsafe();
+        // string_view atlasName = atlasProps["name"].get_string().value_unsafe();
         
         uint64_t width = atlasProps["width"].get_uint64().value_unsafe();
         uint64_t height = atlasProps["height"].get_uint64().value_unsafe();
diff --git a/kramv/Shaders/KramShaders.h b/kramv/Shaders/KramShaders.h
index ddd300f3..c8af8de9 100644
--- a/kramv/Shaders/KramShaders.h
+++ b/kramv/Shaders/KramShaders.h
@@ -115,6 +115,7 @@ struct Uniforms {
     simd::float4 modelMatrixInvScale2;  // to supply inverse, w is determinant
     simd::float3 cameraPosition;        // world-space
     float uvPreview;
+    float uvToShapeRatio;
     
     bool isSigned;
     bool isNormal;
diff --git a/kramv/Shaders/KramShaders.metal b/kramv/Shaders/KramShaders.metal
index 389f1239..83dfca1b 100644
--- a/kramv/Shaders/KramShaders.metal
+++ b/kramv/Shaders/KramShaders.metal
@@ -1163,8 +1163,12 @@ float4 DrawPixels(
         // fix that.  Also make this scale with zoom.
         
         // https://www.geeks3d.com/hacklab/20190225/demo-checkerboard-in-glsl/
+        
+        float2 coord = in.texCoord;
+        coord.x *= uniforms.uvToShapeRatio;
+        
         float repeats = 20.0;
-        float2 checker = floor(repeats * in.texCoord);
+        float2 checker = floor(repeats * coord);
         float selector = sign(fmod(checker.x + checker.y, 2.0));
         float cb = mix(float(1), float(222.0/255.0), selector);
         
diff --git a/libkram/bc7enc/bc7decomp.cpp b/libkram/bc7enc/bc7decomp.cpp
index cf1574af..e5e8c696 100644
--- a/libkram/bc7enc/bc7decomp.cpp
+++ b/libkram/bc7enc/bc7decomp.cpp
@@ -175,7 +175,9 @@ bool unpack_bc7_mode0_2(uint32_t mode, const uint64_t* data_chunks, color_rgba*
 	const uint32_t ENDPOINT_BITS = (mode == 0) ? 4 : 5;
 	const uint32_t ENDPOINT_MASK = (1 << ENDPOINT_BITS) - 1;
 	const uint32_t PBITS = (mode == 0) ? 6 : 0;
+#ifndef BC7DECOMP_USE_SSE2
 	const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS;
+#endif
 	const uint32_t PART_BITS = (mode == 0) ? 4 : 6;
 	const uint32_t PART_MASK = (1 << PART_BITS) - 1;
 
@@ -272,8 +274,10 @@ bool unpack_bc7_mode1_3_7(uint32_t mode, const uint64_t* data_chunks, color_rgba
 	const uint32_t ENDPOINT_MASK = (1 << ENDPOINT_BITS) - 1;
 	const uint32_t PBITS = (mode == 1) ? 2 : 4;
 	const uint32_t SHARED_PBITS = (mode == 1) ? true : false;
+#ifndef BC7DECOMP_USE_SSE2
 	const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS;
-
+#endif
+    
 	const uint64_t low_chunk = data_chunks[0];
 	const uint64_t high_chunk = data_chunks[1];
 
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index de94ae9c..ef877687 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -1462,7 +1462,7 @@ void KramEncoder::addBaseProps(const ImageInfo& info, KTXImage& dstImage) const
     if (info.isNormal) {
         dstImage.addChannelProps("Nrm.x,Nrm.y,X,X");
     }
-    else if (info.isSRGBDst || info.isSourcePremultiplied) {
+    else if (info.isSRGBDst) {
         // !hasAlpha doesn't change the channel designation
         if (info.isPremultiplied || info.isSourcePremultiplied) {
             dstImage.addChannelProps("Alb.ra,Alb.ga,Alb.ba,Alb.a");
@@ -1471,7 +1471,11 @@ void KramEncoder::addBaseProps(const ImageInfo& info, KTXImage& dstImage) const
             dstImage.addChannelProps("Alb.r,Alb.g,Alb.b,Alb.a");
         }
     }
-
+    else if (info.isSourcePremultiplied)
+    {
+        dstImage.addChannelProps("Alb.ra,Alb.ga,Alb.ba,Alb.a");
+    }
+    
     // TODO: texture encode can depend on wrap vs. clamp state (f.e. normal map gen, sdf)
     // and formsts like PVRTC must know wrap/clamp before encode
     // address: Wrap, Clamp, MirrorWrap, MirrorClamp, BorderClamp, BorderClamp0

From e3bc34d5aae3079e22286774b463899b43e1f020 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 26 Jan 2023 16:00:11 -0800
Subject: [PATCH 403/901] kram - use lf instead of crlf

The bash scripts weren't running with carriage returns in them on Windows.
---
 .gitattributes | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.gitattributes b/.gitattributes
index 1ff03220..595a3764 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,3 +1,7 @@
+# Make all line endings match macOS/linux, even on Windows.  This allows bash to run.
+text eol=lf
+
+# commit various binary file types to git-lfs
 *.png filter=lfs diff=lfs merge=lfs -text
 *.ktx filter=lfs diff=lfs merge=lfs -text
 *.ktx2 filter=lfs diff=lfs merge=lfs -text

From a32d03e0260820abac83b06e5b916c072e34268f Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 26 Jan 2023 16:04:42 -0800
Subject: [PATCH 404/901] kram - remove carriage returns

---
 .gitattributes | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitattributes b/.gitattributes
index 595a3764..ffd4fb1a 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,5 +1,5 @@
 # Make all line endings match macOS/linux, even on Windows.  This allows bash to run.
-text eol=lf
+* text eol=lf
 
 # commit various binary file types to git-lfs
 *.png filter=lfs diff=lfs merge=lfs -text

From 4f6cf63d588781b51507f9948a82ffa6f6864cdd Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 27 Jan 2023 15:09:49 -0800
Subject: [PATCH 405/901] kramv - add back srgb toggle (key 9) for png

The problem is that encoded formats and mips already have srgb baked in.  So currently this only applies to PNG, where kramv may have selected the wrong srgb setting if the name doesn't identify the content type clearly.  The fallback is to turn on srgb to match other tools.

This uses a MTLTextureView to disable srgb, so no more memory is needed.  Also the usage view setting doesn't have to be set for just a format change.

Can see where this does work on TestColorGradient.png and doesn't work on TestAlphaGradient.png.  This is because the all white color alpha gradient is the same whether in srgb or not.  But when dealing with premul, the alpha has modded the color itself, and will be affected by the srgb/non-srgb of the format.
---
 kramv/KramLoader.mm             |  7 +++
 kramv/KramRenderer.h            |  6 +++
 kramv/KramRenderer.mm           | 80 ++++++++++++++++++++++++++++++---
 kramv/KramViewerBase.cpp        | 22 ++++++++-
 kramv/KramViewerBase.h          |  3 +-
 kramv/KramViewerMain.mm         | 18 +++++++-
 tests/src/TestAlphaGradient.png |  3 ++
 tests/src/TestColorGradient.png |  3 ++
 8 files changed, 131 insertions(+), 11 deletions(-)
 create mode 100644 tests/src/TestAlphaGradient.png
 create mode 100644 tests/src/TestColorGradient.png

diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index 04c5d010..9bafd027 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -325,6 +325,13 @@ - (BOOL)loadImageFromURL:(nonnull NSURL *)url
 
     textureDescriptor.arrayLength = MAX(1, image.arrayCount());
 
+    // This is inefficient to set, but needed for viewwer.
+    // Only set if texture type is toggleable.
+    // only need this if changing components, type, etc.
+//    {
+//        textureDescriptor.usage |= MTLTextureUsagePixelFormatView;
+//    }
+    
     // ignoring 0 (auto mip), but might need to support for explicit formats
     // must have hw filtering support for format, and 32f filtering only first
     // appeared on A14/M1 and only get box filtering in API-level filters.  But
diff --git a/kramv/KramRenderer.h b/kramv/KramRenderer.h
index 8fbd1601..ef3fb10b 100644
--- a/kramv/KramRenderer.h
+++ b/kramv/KramRenderer.h
@@ -84,5 +84,11 @@ class KTXImage;
 // can play animations in gltf models
 @property (nonatomic) BOOL playAnimations;
 
+// can toggle on/off srgb if that is psosible
+@property (nonatomic) BOOL isToggleView;
+
+// true if a toggle is present
+@property (nonatomic) BOOL hasToggleView;
+
 @end
 
diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index f7681432..cd32814c 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -142,9 +142,10 @@ @implementation Renderer {
 
     // TODO: Array< id<MTLTexture> > _textures;
     id<MTLTexture> _colorMap;
+    id<MTLTexture> _colorMapView;
     id<MTLTexture> _normalMap;
     id<MTLTexture> _lastDrawableTexture;
-
+    
     // border is a better edge sample, but at edges it filters in the transparent
     // color around the border which is undesirable.  It would be better if the hw
     // did clamp to edge until uv outside 0 to 1.  This results in having to inset
@@ -205,6 +206,8 @@ @implementation Renderer {
 }
 
 @synthesize playAnimations;
+@synthesize isToggleView;
+@synthesize hasToggleView;
 
 - (nonnull instancetype)initWithMetalKitView:(nonnull MTKView *)view
                                     settings:(nonnull ShowSettings *)settings
@@ -373,17 +376,28 @@ - (void)_loadMetalWithView:(nonnull MTKView *)view
     // false is good for srgb -> rgba16f
     // true is good for non-srgb -> rgba16f
     CGColorSpaceRef viewColorSpace;
+    MTLPixelFormat format;
     
     // This doesn't look like Figma or Photoshop for a rgb,a = 255,0 to 255,1 gradient across a 256px wide rect.   The shader is saturating
     // the color to 0,1.  So can get away with SRGB color space for now.
     // This also lines up with Preview.
-    // viewColorSpace  = CGColorSpaceCreateWithName(kCGColorSpaceGenericRGBLinear);
+    //viewColorSpace  = CGColorSpaceCreateWithName(kCGColorSpaceGenericRGBLinear);
+    
+    
     
+    // was using 16f so could sample hdr images from it
+    //  and also so hdr data went out to the display
+    format = MTLPixelFormatRGBA16Float;
     viewColorSpace = CGColorSpaceCreateWithName(kCGColorSpaceSRGB);
+    //viewColorSpace = CGColorSpaceCreateWithName(kCGColorSpaceLinearSRGB);
     
+    // This doesn't work with or without srgb color space
+    //format = MTLPixelFormatRGBA8Unorm_sRGB;
+   
+    view.colorPixelFormat = format;
     view.colorspace = viewColorSpace;
+   
     
-    view.colorPixelFormat = MTLPixelFormatRGBA16Float;
     view.depthStencilPixelFormat = MTLPixelFormatDepth32Float_Stencil8;
     view.sampleCount = 1;
 
@@ -1077,6 +1091,26 @@ - (BOOL)loadTextureFromImage:(nonnull const char *)fullFilenameString
         if (!texture) {
             return NO;
         }
+        
+        bool isPNG = isPNGFilename(fullFilename.c_str());
+        
+        // to be able to turn on/off srgb, need to set a view
+        id<MTLTexture> textureView;
+        MyMTLPixelFormat textureFormat = (MyMTLPixelFormat)image.pixelFormat;
+        
+        // TODO: may only want to offer on png files, where format is
+        MyMTLPixelFormat viewFormat = textureFormat;
+        if (isPNG) // && isSrgbFormat(textureFormat))
+            viewFormat = toggleSrgbFormat(textureFormat);
+        if (viewFormat == textureFormat) {
+            viewFormat = MyMTLPixelFormatInvalid;
+        }
+        else {
+            // This may fail.
+            textureView = [texture newTextureViewWithPixelFormat:(MTLPixelFormat)viewFormat];
+            
+            textureView.label = [texture.label stringByAppendingString:@"View"];
+        }
 
         // hacking in the normal texture here, so can display them together during
         // preview
@@ -1095,7 +1129,6 @@ - (BOOL)loadTextureFromImage:(nonnull const char *)fullFilenameString
         // Would need original png data to look at header
         // This is only info on image, not on imageNormal
 
-        bool isPNG = isPNGFilename(fullFilename.c_str());
         if (!isArchive && isPNG) {
             _showSettings->imageInfo = kramInfoToString(fullFilename, false);
             _showSettings->imageInfoVerbose = kramInfoToString(fullFilename, true);
@@ -1115,7 +1148,10 @@ - (BOOL)loadTextureFromImage:(nonnull const char *)fullFilenameString
 
         @autoreleasepool {
             _colorMap = texture;
+            _colorMapView = textureView;
             _normalMap = normalTexture;
+            
+            self.hasToggleView = _colorMapView != nil;
         }
 
         // this is the actual format, may have been decoded
@@ -1166,12 +1202,31 @@ - (BOOL)loadTexture:(nonnull NSURL *)url
             return NO;
         }
 
+        bool isPNG = isPNGFilename(fullFilename.c_str());
+        
+        // to be able to turn on/off srgb, need to set a view
+        id<MTLTexture> textureView;
+        MyMTLPixelFormat textureFormat = (MyMTLPixelFormat)image.pixelFormat;
+        
+        // DONE: may only want to offer on png files, where format is
+        MyMTLPixelFormat viewFormat = textureFormat;
+        if (isPNG) // && isSrgbFormat(textureFormat))
+            viewFormat = toggleSrgbFormat(textureFormat);
+        if (viewFormat == textureFormat) {
+            viewFormat = MyMTLPixelFormatInvalid;
+        }
+        else {
+            // This may fail.
+            textureView = [texture newTextureViewWithPixelFormat:(MTLPixelFormat)viewFormat];
+            
+            textureView.label = [texture.label stringByAppendingString:@"View"];
+        }
+        
         // This doesn't look for or load corresponding normal map, but should
 
         // this is not the png data, but info on converted png to ktx level
         // But this avoids loading the image 2 more times
         // Size of png is very different than decompressed or recompressed ktx
-        bool isPNG = isPNGFilename(fullFilename.c_str());
         if (isPNG) {
             _showSettings->imageInfo = kramInfoToString(fullFilename, false);
             _showSettings->imageInfoVerbose = kramInfoToString(fullFilename, true);
@@ -1191,7 +1246,10 @@ - (BOOL)loadTexture:(nonnull NSURL *)url
 
         @autoreleasepool {
             _colorMap = texture;
+            _colorMapView = textureView;
             _normalMap = nil;
+            
+            self.hasToggleView = _colorMapView != nil;
         }
 
         MyMTLPixelFormat format = (MyMTLPixelFormat)_colorMap.pixelFormat;
@@ -1696,8 +1754,12 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
                                       offset:0
                                      atIndex:BufferIndexUniforms];
 
+            id<MTLTexture> tex = _colorMap;
+            if (self.isToggleView && _colorMap && _colorMapView)
+                tex = _colorMapView;
+            
             // set the texture up
-            [renderEncoder setFragmentTexture:_colorMap atIndex:TextureIndexColor];
+            [renderEncoder setFragmentTexture:tex atIndex:TextureIndexColor];
 
             // setup normal map
             if (_normalMap && _showSettings->isPreview) {
@@ -2196,8 +2258,12 @@ - (void)drawSamples:(id<MTLCommandBuffer>)commandBuffer
             break;
     }
 
+    id<MTLTexture> tex = _colorMap;
+    if (self.isToggleView && _colorMap && _colorMapView)
+        tex = _colorMapView;
+    
     // input and output texture
-    [renderEncoder setTexture:_colorMap
+    [renderEncoder setTexture:tex
                       atIndex:TextureIndexColor];
 
     [renderEncoder setTexture:_sampleComputeTex atIndex:TextureIndexSamples];
diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index 0a16ae5b..5617a31e 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -1706,6 +1706,11 @@ void Data::updateUIAfterLoad()
     _actionSigned->setHidden(isSignedHidden);
     _actionChecker->setHidden(isCheckerboardHidden);
     
+    // only allow srgb to be disabled, not toggle on if off at load
+    MyMTLPixelFormat format = _showSettings->originalFormat;
+    bool isSrgb = isSrgbFormat(format);
+   _actionSrgb->setHidden(!isSrgb);
+    
     // also need to call after each toggle
     updateUIControlState();
 }
@@ -1792,6 +1797,9 @@ void Data::updateUIControlState()
     _actionPremul->setHighlight(premulState);
     _actionSigned->setHighlight(signedState);
     _actionChecker->setHighlight(checkerboardState);
+    
+    auto srgbState = toState(_showSettings->isSRGBShown);
+    _actionSrgb->setHighlight(srgbState);
 }
 
 // TODO: convert to C++ actions, and then call into Base holding all this
@@ -1892,6 +1900,7 @@ void Data::initActions()
         Action("M", "Mip", Key::M),
         Action("F", "Face", Key::F),
         Action("Y", "Array", Key::Y),
+        Action("9", "Srgb", Key::Num9),
         
         Action("↑", "Prev Item", Key::UpArrow),
         Action("↓", "Next Item", Key::DownArrow),
@@ -1941,7 +1950,8 @@ void Data::initActions()
         &_actionMip,
         &_actionFace,
         &_actionArray,
-        
+        &_actionSrgb,
+       
         &_actionPrevItem,
         &_actionItem,
         &_actionPrevCounterpart,
@@ -2386,6 +2396,16 @@ bool Data::handleEventAction(const Action* action, bool isShiftKeyDown, ActionSt
         }
     }
     
+    else if (action == _actionSrgb) {
+        if (!action->isHidden) {
+            _showSettings->isSRGBShown = !_showSettings->isSRGBShown;
+            
+            sprintf(text, "Format %s", _showSettings->isSRGBShown ? "srgb on" : "srgb off");
+            
+            isChanged = true;
+        }
+    }
+    
     // toggle pixel grid when magnified above 1 pixel, can happen from mipmap
     // changes too
     else if (action == _actionGrid) {
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index bfb8f2a2..9b2ff408 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -166,7 +166,7 @@ class ShowSettings {
     // TODO: Might eliminate this, since mips are either built with or without
     // srgb and disabling with a MTLView caused many flags to have to be set on
     // MTLTexture
-    // bool isSRGBShown = true;
+    bool isSRGBShown = false;
 
     // whether to use normal to tangent (false), or vertex tangents (true)
     bool useTangent = true;
@@ -556,6 +556,7 @@ struct Data {
     Action* _actionWrap;
     Action* _actionPremul;
     Action* _actionSigned;
+    Action* _actionSrgb;
     
     Action* _actionDiff;
     Action* _actionDebug;
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 585dfd07..12f71a12 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -1360,6 +1360,8 @@ - (void)updateHudVisibility
 
 - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyDown
 {
+    Renderer* renderer = (Renderer*)self.delegate;
+   
     ActionState actionState;
     if (!_data.handleEventAction(action, isShiftKeyDown, actionState))
         return false;
@@ -1385,11 +1387,16 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
     }
     else if (action == _data._actionPlay) {
         if (!action->isHidden) {
-            Renderer* renderer = (Renderer*)self.delegate;
             renderer.playAnimations = _showSettings->isPlayAnimations;
         }
     }
-            
+    else if (action == _data._actionSrgb) {
+        // tell the renderer to show one or other view
+        renderer.isToggleView = !_showSettings->isSRGBShown;
+    }
+    
+    //-------------
+    // Update everything
     if (!actionState.hudText.empty()) {
         [self setHudText:actionState.hudText.c_str()];
     }
@@ -1515,6 +1522,13 @@ -(BOOL)loadFile
     }
     setErrorLogCapture( false );
     
+    //-------
+    Renderer* renderer = (Renderer*)self.delegate;
+    _showSettings->isSRGBShown = false;
+    if (success && renderer.hasToggleView) {
+        _showSettings->isSRGBShown = isSrgbFormat(_showSettings->originalFormat);
+    }
+    
     // -------------
     string title = _showSettings->windowTitleString(filename);
     self.window.title = [NSString stringWithUTF8String:title.c_str()];
diff --git a/tests/src/TestAlphaGradient.png b/tests/src/TestAlphaGradient.png
new file mode 100644
index 00000000..331d4c22
--- /dev/null
+++ b/tests/src/TestAlphaGradient.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5fd724d1256f60807f48b8f58f87ab4f40c541191229eb5658c0b8bf3608ad09
+size 844
diff --git a/tests/src/TestColorGradient.png b/tests/src/TestColorGradient.png
new file mode 100644
index 00000000..5be20e3f
--- /dev/null
+++ b/tests/src/TestColorGradient.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e58d10fcab5b2a55bf269419de898be4e2aa914abdc65570a8c7f1c008da2e2d
+size 1421

From bc76b8dab1a9348de123f6c78b4f0682c02997b3 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 31 Jan 2023 14:00:16 -0800
Subject: [PATCH 406/901] kram - for now turn off using the srgb state of
 srcImage

This was causing files from figma which are linear color to do srgb -> lin conversion.  This caused mismatches with files converted to ktx from apps like Photoshop which don't default to setting srgb on png files unles a checkbox is clicked.  Note that neither app actually mods the pixels.  127 = 127 output. Now the conversion only occurs if -srgb (dst) is set which was already happening in initWithArgs.
---
 libkram/kram/KramImageInfo.cpp | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp
index 2567a461..0ffdbb3c 100644
--- a/libkram/kram/KramImageInfo.cpp
+++ b/libkram/kram/KramImageInfo.cpp
@@ -1182,8 +1182,15 @@ void ImageInfo::initWithSourceImage(Image& sourceImage)
 
     // formats that aren't srgb, assume the -srgb flag implies isSRGBSrc
     // image will undergo srgb to linear conversion and then get written out
-    isSRGBDst = isSrgbFormat(pixelFormat);
-    isSRGBSrc = sourceImage.isSrgb();
+    // Note: this are unreliable since most tools use linear RGBA8 blends
+    // and just write out the pixel as is (f.e. Photoshop, figma, etc).
+    // So for now, don't want isSRGBSrc set since sgrb -> lin conversion
+    // will occur.  Note that could use this for dds/ktx/ktx2 files, it's
+    // usually the tools and png that are problematic.  But there are many
+    // invalid dds files out there.
+    
+    // isSRGBDst = isSrgbFormat(pixelFormat);
+    // isSRGBSrc = sourceImage.isSrgb();
     
     // this implies color is stored in rgb
     if (isSRGBDst) {

From 8f0551f3ffede54884b3b8375871d0b3901035fa Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 31 Jan 2023 22:01:10 -0800
Subject: [PATCH 407/901] kram - more cli options

srclin, srcsrgb, srcsrgbflag.  This cleans up some of the handling of srgb and linear input in kram.   Can mix srgb vs. linear for src/dst.
---
 libkram/kram/Kram.cpp          | 44 ++++++++++++++++++++++++++++------
 libkram/kram/KramImageInfo.cpp | 24 +++++++------------
 libkram/kram/KramImageInfo.h   | 18 ++++++++++----
 3 files changed, 59 insertions(+), 27 deletions(-)

diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 545fdbdd..a95943fa 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -1663,7 +1663,8 @@ void kramEncodeUsage(bool showVersion = true)
           "Usage: kram encode\n"
           "\t -f/ormat (bc1 | astc4x4 | etc2rgba | rgba16f) [-quality 0-100]\n"
           "\t [-zstd 0] or [-zlib 0] (for .ktx2 output)\n"
-          "\t [-srgb] [-signed] [-normal]\n"
+          "\t [-srgb] [-srcsrgb] [-srclin] [-srcsrgbflag]\n"
+          "\t [-signed] [-normal]\n"
           "\t -i/nput <source.png | .ktx | .ktx2 | .dds>\n"
           "\t -o/utput <target.ktx | .ktx | .ktx2 | .dds>\n"
           "\n"
@@ -1750,12 +1751,23 @@ void kramEncodeUsage(bool showVersion = true)
           "\tWrap texture at edges (height only for now)\n"
           "\n"
 
+          // srgb
           "\t-srgb"
-          "\tsRGB for rgb/rgba formats\n"
+          "\tsRGB for rgb/rgba formats - applied to src/dst\n"
+          "\t-srcsrgb"
+          "\tsrc set to sRGB\n"
+          "\t-srclin"
+          "\tsrc set to linear\n"
+          "\t-srcsrgbimage"
+          "\tsrc set to png flag (unreliable) or container format\n"
+          
+          // normals and snorm data
           "\t-signed"
           "\tSigned r or rg for etc/bc formats, astc doesn't have signed format.\n"
           "\t-normal"
           "\tNormal map rg storage signed for etc/bc (rg01), only unsigned astc L+A (gggr).\n"
+          
+          // sdf
           "\t-sdf"
           "\tGenerate single-channel SDF from a bitmap, can mip and drop large mips. Encode to r8, bc4, etc2r, astc4x4 (Unorm LLL1) to encode\n"
           "\t-sdfThreshold 120"
@@ -2858,17 +2870,35 @@ static int32_t kramAppEncode(vector<const char*>& args)
 
         // these affect the format
         else if (isStringEqual(word, "-hdr")) {
-            // not validating format for whether it's srgb or not
+            // not validating format for whether it's hdr or not
             infoArgs.isHDR = true;
         }
-        else if (isStringEqual(word, "-srgb")) {
-            // not validating format for whether it's srgb or not
-            infoArgs.isSRGB = true;
-        }
         else if (isStringEqual(word, "-signed")) {
             // not validating format for whether it's signed or not
             infoArgs.isSigned = true;
         }
+        else if (isStringEqual(word, "-srgb")) {
+            // not validating format for whether it's srgb or not
+            infoArgs.isSRGBSrc = true;
+            
+            // The format may override this setting.  Not all formats
+            // have an srgb varient.
+            infoArgs.isSRGBDst = true;
+        }
+        
+        // This means ignore the srgb state on the src image
+        // This has to be specified after -srgb
+        else if (isStringEqual(word, "-srclin")) {
+            infoArgs.isSRGBSrc = false;
+        }
+        else if (isStringEqual(word, "-srcsrgb")) {
+            infoArgs.isSRGBSrc = true;
+        }
+        // This pulls from the format on dds/ktx/ktx2, or png srgb/chrm/iccp flag
+        // Make sure image isn't set to isSRGBSrc.
+        else if (isStringEqual(word, "-srcsrgbimage")) {
+            infoArgs.isSRGBSrcFlag = false;
+        }
 
         else if (isStringEqual(word, "-normal")) {
             infoArgs.isNormal = true;
diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp
index 0ffdbb3c..77965489 100644
--- a/libkram/kram/KramImageInfo.cpp
+++ b/libkram/kram/KramImageInfo.cpp
@@ -78,7 +78,7 @@ static MyMTLPixelFormat parseFormat(ImageInfoArgs& infoArgs)
     MyMTLPixelFormat format = MyMTLPixelFormatInvalid;
     const char* formatString = infoArgs.formatString.c_str();
     
-    bool isSRGBDst = infoArgs.isSRGB;
+    bool isSRGBDst = infoArgs.isSRGBDst;
     
     // bc
     if (isStringEqual(formatString, "bc1")) {
@@ -1069,11 +1069,11 @@ void ImageInfo::initWithArgs(const ImageInfoArgs& args)
 
     isSigned = isSignedFormat(pixelFormat);
 
-    // formats that aren't srgb, assume the -srgb flag implies isSRGBSrc
+    // formats that aren't srgb
     // image will undergo srgb to linear conversion and then get written out
+    isSRGBSrc = args.isSRGBSrc;
+    isSRGBSrcFlag = args.isSRGBSrcFlag;
     isSRGBDst = isSrgbFormat(pixelFormat);
-    if (!isSRGBDst)
-        isSRGBSrc = args.isSRGB;
     
     hasAlpha = true;
     hasColor = true;
@@ -1180,17 +1180,11 @@ void ImageInfo::initWithSourceImage(Image& sourceImage)
     // But BC1 565 and 2-bit endpoints are no match for BC7, and bc7enc's BC1 is introducing artifacts into Toof-a.
     optimizeFormat();
 
-    // formats that aren't srgb, assume the -srgb flag implies isSRGBSrc
-    // image will undergo srgb to linear conversion and then get written out
-    // Note: this are unreliable since most tools use linear RGBA8 blends
-    // and just write out the pixel as is (f.e. Photoshop, figma, etc).
-    // So for now, don't want isSRGBSrc set since sgrb -> lin conversion
-    // will occur.  Note that could use this for dds/ktx/ktx2 files, it's
-    // usually the tools and png that are problematic.  But there are many
-    // invalid dds files out there.
-    
-    // isSRGBDst = isSrgbFormat(pixelFormat);
-    // isSRGBSrc = sourceImage.isSrgb();
+    // Note: srgb flags are unreliable in png since most tools use linear
+    // RGBA8 blends and just write out the pixel as is (f.e. Photoshop, figma, etc).
+    // TODO: offer mode to use srg image srgb state if author has fixed up
+     if (isSRGBSrcFlag)
+        isSRGBSrc = sourceImage.isSrgb();
     
     // this implies color is stored in rgb
     if (isSRGBDst) {
diff --git a/libkram/kram/KramImageInfo.h b/libkram/kram/KramImageInfo.h
index d8b41fbd..b3d8a4b3 100644
--- a/libkram/kram/KramImageInfo.h
+++ b/libkram/kram/KramImageInfo.h
@@ -56,20 +56,27 @@ class ImageInfoArgs {
     bool doMipmaps = true;  // default to mips on
     bool isVerbose = false;
     bool doSDF = false;
-    bool isSourcePremultiplied = false;
+    
+    bool isSourcePremultiplied = false; // skip further premul of src
     bool isPremultiplied = false;
     bool isPrezero = false;
+    
     bool isNormal = false;  // signed, but may be stored unorm and swizzled (f.e. astc/bc3nm gggr or rrrg)
 
     // can pick a smaller format if alpha = 1 (only for bc and etc)
     bool optimizeFormatForOpaque = false;
 
-    // these and formatString set the pixelFormat
-    // if pixelFormat set directly, then these are updated off that format
-    // This means src is srgb on s/unorm formats, or dst is srgb if pixel format supports.
-    bool isSRGB = false;
+    // Two conversions occur - srgb -> lin to premul and build mip
+    //                         lin -> srgb to encode
+    // isSRGBDst and formatString set the pixelFormat
+    bool isSRGBSrc = false;
+    bool isSRGBSrcFlag = false;
+    bool isSRGBDst = false;
     
+    // For dst. TODO: could have signed source passed in
     bool isSigned = false;
+    
+    // Applies to src.  But also have hdr specific output formats.
     bool isHDR = false;
 
     // for now these are only usable with normal to height
@@ -133,6 +140,7 @@ class ImageInfo {
     bool hasColor = false;
     bool hasAlpha = false;
     bool isSRGBSrc = false;
+    bool isSRGBSrcFlag = false;
     
     // output image state
     bool isSRGBDst = false;

From 3cd6283d1ffc5d8e7a4638fe2ef6fa8a5a1fb3f4 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 10 Feb 2023 21:40:29 -0800
Subject: [PATCH 408/901] kramv - fix display of images to match
 Figma/Photoshop

These apps do all filtering, blends, and premul in srgb space.  This is totally wrong, but to match have to emulate it.  The premul code applies the srgbToLinear conversion (not pow2.2) to the alpha before shader premul.  pow2.2 is not a good approximation, with 20% of a black to white gradient not matching up.   Note that kram already has isPremul and isPremulRGB.  These correspond with linear and srgb premul to the texture.  See below for how they differ.

sRGB data must be held in an RGBA8Unorm (not RGBAUnorm_srgb) buffer to emulate what these tools do.  This is counter-intuitive.  But premul is directl is then correlated between alpha and rgb.  When premul is applied to srgb, the rgb values are < alpha even when rgb = 111 due to engamma of rgb.  This affects fromPremul() calls which try to divide out the alpha.

isPremul:
     res = srgbToLinear( srgb ) * a )
     build and filter mips in linear space
     res2 = linearToSrgb( res )
     write out res2 to BC7_srgb

isPremulRGB:
    res = srgb * a
    build and filter mips in srgb space
    write out res to BC7
---
 kramv/KramRenderer.mm           | 96 ++++++++++++++++++++++++++++++---
 kramv/Shaders/KramShaders.h     |  1 +
 kramv/Shaders/KramShaders.metal | 41 ++++++++++++--
 3 files changed, 128 insertions(+), 10 deletions(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index cd32814c..a2f160e3 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -376,7 +376,7 @@ - (void)_loadMetalWithView:(nonnull MTKView *)view
     // false is good for srgb -> rgba16f
     // true is good for non-srgb -> rgba16f
     CGColorSpaceRef viewColorSpace;
-    MTLPixelFormat format;
+    MTLPixelFormat format = MTLPixelFormatRGBA16Float;
     
     // This doesn't look like Figma or Photoshop for a rgb,a = 255,0 to 255,1 gradient across a 256px wide rect.   The shader is saturating
     // the color to 0,1.  So can get away with SRGB color space for now.
@@ -384,19 +384,100 @@ - (void)_loadMetalWithView:(nonnull MTKView *)view
     //viewColorSpace  = CGColorSpaceCreateWithName(kCGColorSpaceGenericRGBLinear);
     
     
+    CAMetalLayer* metalLayer = (CAMetalLayer*)[view layer];
     
     // was using 16f so could sample hdr images from it
     //  and also so hdr data went out to the display
-    format = MTLPixelFormatRGBA16Float;
-    viewColorSpace = CGColorSpaceCreateWithName(kCGColorSpaceSRGB);
-    //viewColorSpace = CGColorSpaceCreateWithName(kCGColorSpaceLinearSRGB);
+    uint32_t colorSpaceChoice = 1;
+    switch(colorSpaceChoice) {
+        default:
+        case 0:
+            // This is best so far
+            format = MTLPixelFormatRGBA16Float;
+            viewColorSpace = CGColorSpaceCreateWithName(kCGColorSpaceSRGB);
+            //viewColorSpace = CGColorSpaceCreateWithName(kCGColorSpaceLinearSRGB);
+            break;
+            
+        case 1: {
+            // Display P3 is a standard made by Apple that covers the same colour space as DCI-P3, but uses the more neutral D65 as a white point instead of the green white of the DCI standard.
+            // Ideally feed 16-bit color to P3.
+            
+            // This also works
+            // 25% larger than srgb
+            format = MTLPixelFormatRGBA16Float;
+           
+            // This is industry format
+            // viewColorSpace = CGColorSpaceCreateWithName(kCGColorSpaceDCIP3);
+            
+            // This isn't edr
+            // viewColorSpace = CGColorSpaceCreateWithName(kCGColorSpaceDisplayP3);
+            
+            // Use this because it exists from 10.14.3+
+            viewColorSpace = CGColorSpaceCreateWithName(kCGColorSpaceExtendedLinearDisplayP3);
+           
+            // don't set this yet.
+            // metalLayer.wantsExtendedDynamicRangeContent = YES;
+            
+            // https://developer.apple.com/videos/play/wwdc2021/10161/
+            
+            /* Can detect if on HDR display or not
+                user can mod the brightness, or move to another monitor,
+                need to listen for notification when this changes.
+             
+            NSScreen* screen = NSScreen.mainScreen;
+            
+            // This reports 1
+            CGFloat val1 = screen.maximumExtendedDynamicRangeColorComponentValue;
+            
+            // This is 16
+            CGFloat maxPot =  screen.maximumPotentialExtendedDynamicRangeColorComponentValue;
+            
+            // This is 0
+            CGFloat maxRef = screen.maximumReferenceExtendedDynamicRangeColorComponentValue;
+            */
+            
+            // M1 monitor
+            
+            
+            break;
+        }
+        case 2:
+            // This doesn't match wnen srgb is turned off on TestColorGradient
+            format = MTLPixelFormatRGBA8Unorm_sRGB;
+            viewColorSpace = CGColorSpaceCreateWithName(kCGColorSpaceSRGB);
+            
+            // this looks totally wrong
+            //viewColorSpace = CGColorSpaceCreateWithName(kCGColorLinearSpaceSRGB);
+            break;
+            
+        /*
+        case 3: {
+            // There is an exrMetadata field on NSView to set as well.
+            // https://developer.apple.com/documentation/metal/hdr_content/using_color_spaces_to_display_hdr_content?language=objc
+            
+            // Rec2020 color primaries, with PQ Transfer function.
+            // Would have to get into Rec2020 colors to set this, also go from 10bit
+            format = MTLPixelFormatBGR10A2Unorm;
+            viewColorSpace = CGColorSpaceCreateWithName(kCGColorSpaceITUR_2100_PQ);
+            
+            metalLayer.wantsExtendedDynamicRangeContent = YES;
+            
+            // https://developer.apple.com/documentation/metal/hdr_content/using_system_tone_mapping_on_video_content?language=objc
+            // must do os version check on this
+            // 1.0 is 100 nits of output
+            CAEDRMetadata* edrMetaData = [CAEDRMetadata HDR10MetadataWithMinLuminance: 0.005 maxLuminance: 1000 opticalOutputScale: 100];
+            metalLayer.EDRMetadata = edrMetaData;
+            
+            break;
+        }
+        */
+    }
+    
     
-    // This doesn't work with or without srgb color space
-    //format = MTLPixelFormatRGBA8Unorm_sRGB;
-   
     view.colorPixelFormat = format;
     view.colorspace = viewColorSpace;
    
+    CGColorSpaceRelease(viewColorSpace);
     
     view.depthStencilPixelFormat = MTLPixelFormatDepth32Float_Stencil8;
     view.sampleCount = 1;
@@ -1282,6 +1363,7 @@ - (void)_updateGameState
     
     uniforms.isNormal = _showSettings->texContentType == TexContentTypeNormal;
     uniforms.doShaderPremul = _showSettings->doShaderPremul;
+    uniforms.isSrgbInput = _showSettings->isSRGBShown && isSrgbFormat(_showSettings->originalFormat);
     uniforms.isSigned = _showSettings->isSigned;
     uniforms.isSwizzleAGToRG = _showSettings->isSwizzleAGToRG;
     
diff --git a/kramv/Shaders/KramShaders.h b/kramv/Shaders/KramShaders.h
index c8af8de9..1bb121f8 100644
--- a/kramv/Shaders/KramShaders.h
+++ b/kramv/Shaders/KramShaders.h
@@ -127,6 +127,7 @@ struct Uniforms {
     bool isSDF;
     bool isPreview; // render w/lighting, normals, etc
     bool isUVPreview; // show uv overlay
+    bool isSrgbInput;
     
     bool is3DView;
     bool isNormalMapPreview;  // for isNormal or combined
diff --git a/kramv/Shaders/KramShaders.metal b/kramv/Shaders/KramShaders.metal
index 83dfca1b..4bbf5495 100644
--- a/kramv/Shaders/KramShaders.metal
+++ b/kramv/Shaders/KramShaders.metal
@@ -915,6 +915,41 @@ float3 calculateViewDir(float3 worldPos, float3 cameraPosition) {
 // This is writing out to 16F and could write snorm data, but then that couldn't be displayed.
 // So code first converts to Unorm.
 
+float srgbToLinear(float s) {
+    s = saturate(s);
+    //return (s < 0.04044823) ? (s / 12.92)
+    //                                 : pow((s + 0.055) / 1.055, 2.4);
+    
+    return (s < 0.04044823) ? (s * ( 1.0 / 12.92 ))
+                                     : pow((s + 0.055) * ( 1.0 / 1.055 ), 2.4);
+    
+}
+
+void shaderPremul(thread float4& c, bool isSrgbInput) {
+    float alpha = c.a;
+    
+    // This is because Figma/PS do all filering/blends in premul srgb.
+    // So srgbToLinear( srgb * a ) = srgbToLinear( srgb ) * srgbToLinear( a ).
+    // Emulating srgb blends also takes two alpha or access to dst.
+    // Shader would need to export srgbToLinear( 1 - a ) for dstColor,
+    // but that's not correct to blend alpha.  Alpha still needs linear 1-a.
+    
+    // TODO: make this one of the premul options (default for png)
+    if (isSrgbInput)
+    {
+        // can tell difference in last 20% of TestAlphaGradient.png
+        //c.a = pow(c.a, 2.2); // approx
+        
+        c.a = srgbToLinear(c.a);
+    }
+    
+    c = toPremul(c);
+    
+    // Note: be careful fromPremul would need to do similar math
+    // have an alpha that is much smaller in rgb, than the blend alpha.
+    c.a = alpha;
+}
+
 float4 DrawPixels(
     ColorInOut in [[stage_in]],
     bool facing,
@@ -1000,7 +1035,7 @@ float4 DrawPixels(
             
             // to premul, but also need to see without premul
             if (uniforms.doShaderPremul) {
-                c = toPremul(c);
+                shaderPremul(c, uniforms.isSrgbInput);
             }
         }
         
@@ -1050,7 +1085,7 @@ float4 DrawPixels(
                 // Note: premul on signed should occur while still signed, since it's a pull to zoer
                 // to premul, but also need to see without premul
                 if (uniforms.doShaderPremul) {
-                    c = toPremul(c);
+                    shaderPremul(c, uniforms.isSrgbInput);
                 }
                 
                 sc = c;
@@ -1058,7 +1093,7 @@ float4 DrawPixels(
             }
             else {
                 if (uniforms.doShaderPremul) {
-                    c = toPremul(c);
+                    shaderPremul(c, uniforms.isSrgbInput);
                 }
             }
             

From c4823fc2f010e8bccdcdaaf3d307682cec06bfd6 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 12 Feb 2023 11:08:01 -0800
Subject: [PATCH 409/901] kram - add more test images, and fixup srgb state on
 existing images

---
 tests/src/CircleGlow-d.png | 4 ++--
 tests/src/ColorMap-a.png   | 4 ++--
 tests/src/Toof-a.png       | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/src/CircleGlow-d.png b/tests/src/CircleGlow-d.png
index 51d6e21c..99a02149 100644
--- a/tests/src/CircleGlow-d.png
+++ b/tests/src/CircleGlow-d.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1e368534e87c208cb9a07b97768700a2c083e61c175c93286cdd9056b65babae
-size 45814
+oid sha256:3ee9af915537edacf86c9f36ec4f492153f52d7693fa81b478382b3b8099ab1a
+size 160769
diff --git a/tests/src/ColorMap-a.png b/tests/src/ColorMap-a.png
index 4390e991..f7ec7db1 100644
--- a/tests/src/ColorMap-a.png
+++ b/tests/src/ColorMap-a.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:07038c92e5af83deaa3e425898e4f6403bd7f8a0e9bab3c59a204455be080d6f
-size 10149
+oid sha256:5172c70405bf7b21a2e7246bc4f45cf4c4b22f71fe23ae6fae889877089b889e
+size 14916
diff --git a/tests/src/Toof-a.png b/tests/src/Toof-a.png
index 6bc19300..91180ec5 100644
--- a/tests/src/Toof-a.png
+++ b/tests/src/Toof-a.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1d10908e7ae164c574d7c217c879c75e22291f576f4e4d2824e978bbb17b5e57
-size 33600
+oid sha256:aadc39689268d3aaf4eea3d8132e5f6f6cfee073d081d33914bd3ff6bc1dc70e
+size 57554

From 4b34200e032d3cbce1d729592f62961545973bfb Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 12 Feb 2023 14:18:21 -0800
Subject: [PATCH 410/901] kramv - connect up Diff

This is Q key to enable quick diff (for lack of letters).  This does pixel comparisons to 1/255 threshold for now, and highlights the diffs in purple.  1/2/3/4 shows the per channel diffs in purple.
---
 kramv/KramRenderer.h            |  1 +
 kramv/KramRenderer.mm           | 25 ++++++++-
 kramv/KramViewerBase.cpp        | 94 +++++++++++++++++++------------
 kramv/KramViewerBase.h          |  3 +-
 kramv/KramViewerMain.mm         |  3 +-
 kramv/Shaders/KramShaders.h     |  4 +-
 kramv/Shaders/KramShaders.metal | 99 ++++++++++++++++++++++-----------
 7 files changed, 155 insertions(+), 74 deletions(-)

diff --git a/kramv/KramRenderer.h b/kramv/KramRenderer.h
index ef3fb10b..1413a0ae 100644
--- a/kramv/KramRenderer.h
+++ b/kramv/KramRenderer.h
@@ -59,6 +59,7 @@ class KTXImage;
                    timestamp:(double)timestamp
                        image:(kram::KTXImage &)image
                  imageNormal:(nullable kram::KTXImage *)imageNormal
+                   imageDiff:(nullable kram::KTXImage *)imageDiff
                    isArchive:(BOOL)isArchive;
 
 - (BOOL)loadTexture:(nonnull NSURL *)url;
diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index a2f160e3..a7ebd415 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -144,6 +144,8 @@ @implementation Renderer {
     id<MTLTexture> _colorMap;
     id<MTLTexture> _colorMapView;
     id<MTLTexture> _normalMap;
+    id<MTLTexture> _diffMap;
+    
     id<MTLTexture> _lastDrawableTexture;
     
     // border is a better edge sample, but at edges it filters in the transparent
@@ -384,7 +386,7 @@ - (void)_loadMetalWithView:(nonnull MTKView *)view
     //viewColorSpace  = CGColorSpaceCreateWithName(kCGColorSpaceGenericRGBLinear);
     
     
-    CAMetalLayer* metalLayer = (CAMetalLayer*)[view layer];
+    //CAMetalLayer* metalLayer = (CAMetalLayer*)[view layer];
     
     // was using 16f so could sample hdr images from it
     //  and also so hdr data went out to the display
@@ -1148,7 +1150,8 @@ - (void)_loadAssets
 - (BOOL)loadTextureFromImage:(nonnull const char *)fullFilenameString
                    timestamp:(double)timestamp
                        image:(kram::KTXImage &)image
-                 imageNormal:(kram::KTXImage *)imageNormal
+                 imageNormal:(nullable kram::KTXImage *)imageNormal
+                   imageDiff:(nullable kram::KTXImage *)imageDiff
                    isArchive:(BOOL)isArchive
 {
     // image can be decoded to rgba8u if platform can't display format natively
@@ -1205,6 +1208,17 @@ - (BOOL)loadTextureFromImage:(nonnull const char *)fullFilenameString
             }
         }
 
+        id<MTLTexture> diffTexture;
+        if (imageDiff) {
+            // Note: this name may not be the same name
+            diffTexture = [_loader loadTextureFromImage:*imageDiff
+                                           originalFormat:nil
+                                                     name:filenameShort];
+            if (!diffTexture) {
+                return NO;
+            }
+        }
+        
         // if archive contained png, then it's been converted to ktx
         // so the info below may not reflect original data
         // Would need original png data to look at header
@@ -1231,6 +1245,7 @@ - (BOOL)loadTextureFromImage:(nonnull const char *)fullFilenameString
             _colorMap = texture;
             _colorMapView = textureView;
             _normalMap = normalTexture;
+            _diffMap = diffTexture;
             
             self.hasToggleView = _colorMapView != nil;
         }
@@ -1329,6 +1344,7 @@ - (BOOL)loadTexture:(nonnull NSURL *)url
             _colorMap = texture;
             _colorMapView = textureView;
             _normalMap = nil;
+            _diffMap = nil;
             
             self.hasToggleView = _colorMapView != nil;
         }
@@ -1389,6 +1405,7 @@ - (void)_updateGameState
     uniforms.isWrap = doWrap ? _showSettings->isWrap : false;
     
     uniforms.isPreview = _showSettings->isPreview;
+    uniforms.isDiff = _showSettings->isDiff;
     
     uniforms.isNormalMapPreview = false;
     if (uniforms.isPreview) {
@@ -1847,6 +1864,10 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
             if (_normalMap && _showSettings->isPreview) {
                 [renderEncoder setFragmentTexture:_normalMap atIndex:TextureIndexNormal];
             }
+            
+            if (_diffMap && _showSettings->isDiff) {
+                [renderEncoder setFragmentTexture:_diffMap atIndex:TextureIndexDiff];
+            }
 
             UniformsLevel uniformsLevel;
             uniformsLevel.drawOffset = float2m(0.0f);
diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index 5617a31e..c6e4e322 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -955,6 +955,17 @@ bool Data::findFilenameShort(const string& filename)
     return isFound;
 }
 
+const File* Data::findFileShort(const string& filename)
+{
+    // linear search
+    for (const auto& search : _files) {
+        if (search.nameShort == filename) {
+            return &search;
+        }
+    }
+    return nullptr;
+}
+
 // rect here is expect xy, wh
 bool isPtInRect(float2 pt, float4 r)
 {
@@ -1097,18 +1108,24 @@ bool Data::loadFile()
     // flattened to one image.  So have to fabricate mips here.  KTXImage
     // can already load up striped png into slices, etc.
     
-    string diffFilename = filenameNoExtension(filename);
     bool hasDiff = false;
+    string diffFilename;
     
-    diffFilename += ".png";
-    if ( diffFilename != filename && findFilename(diffFilename.c_str())) {
-        // TODO: defer load until diff enabled
-        //if ([self loadDiffFile:diffFilename.c_str()]) {
-        hasDiff = true;
-        //}
-    }
-    if (!hasDiff) {
-        diffFilename.clear();
+    if (!isPNGFilename(filename)) {
+        diffFilename = filenameNoExtension(filename);
+        diffFilename += ".png";
+        
+        diffFilename = toFilenameShort(diffFilename.c_str());
+        if (diffFilename != filename) {
+            const File* diffFile = findFileShort(diffFilename.c_str());
+            if (diffFile) {
+                diffFilename = diffFile->name;
+                hasDiff = true;
+            }
+        }
+        
+        if (!hasDiff)
+            diffFilename.clear();
     }
     
     //-------------------------------
@@ -1119,11 +1136,31 @@ bool Data::loadFile()
     KTXImage imageNormal;
     KTXImageData imageNormalDataKTX;
     
+    KTXImage imageDiff;
+    KTXImageData imageDiffDataKTX;
+    
     // this requires decode and conversion to RGBA8u
     if (!imageDataKTX.open(fullFilename.c_str(), image)) {
         return false;
     }
     
+    // load up the diff, but would prefer to defer this
+    if (hasDiff && !imageDiffDataKTX.open(diffFilename.c_str(), imageDiff)) {
+        hasDiff = false;
+        
+        // TODO: could also compare dimensions to see if same
+        
+        if (imageDiff.textureType == image.textureType &&
+            (imageDiff.textureType == MyMTLTextureType2D) )
+        {
+            
+        }
+        else
+        {
+            hasDiff = false;
+        }
+    }
+    
     if (hasNormal &&
         imageNormalDataKTX.open(normalFilename.c_str(), imageNormal)) {
         // shaders only pull from albedo + normal on these texture types
@@ -1139,19 +1176,12 @@ bool Data::loadFile()
     
     //---------------------------------
     
-    // Release any loading model textures
-//    Renderer* renderer = (Renderer *)self.delegate;
-//    [renderer releaseAllPendingTextures];
-//
-//    if (![renderer loadTextureFromImage:fullFilename.c_str()
-//                              timestamp:timestamp
-//                                  image:image
-//                            imageNormal:hasNormal ? &imageNormal : nullptr
-//                              isArchive:NO]) {
-//        return false;
-//    }
-    
-    if (!_delegate.loadTextureFromImage(fullFilename.c_str(), (double)timestamp, image, hasNormal ? &imageNormal : nullptr, false)) {
+    if (!_delegate.loadTextureFromImage(fullFilename.c_str(), (double)timestamp,
+        image,
+        hasNormal ? &imageNormal : nullptr,
+        hasDiff ? &imageDiff : nullptr,
+        false))
+    {
         return false;
     }
     
@@ -1229,6 +1259,8 @@ bool Data::loadFileFromArchive()
     KTXImage imageNormal;
     KTXImageData imageNormalDataKTX;
 
+    // TODO: do imageDiff here?
+    
     if (!imageDataKTX.open(imageData, imageDataLength, image)) {
         return false;
     }
@@ -1248,19 +1280,9 @@ bool Data::loadFileFromArchive()
 
     //---------------------------------
     
-    if (!_delegate.loadTextureFromImage(fullFilename.c_str(), (double)timestamp, image, hasNormal ? &imageNormal : nullptr, true)) {
+    if (!_delegate.loadTextureFromImage(fullFilename.c_str(), (double)timestamp, image, hasNormal ? &imageNormal : nullptr, nullptr, true)) {
         return false;
     }
-//    Renderer* renderer = (Renderer *)self.delegate;
-//    [renderer releaseAllPendingTextures];
-//
-//    if (![renderer loadTextureFromImage:fullFilename.c_str()
-//                              timestamp:(double)timestamp
-//                                  image:image
-//                            imageNormal:hasNormal ? &imageNormal : nullptr
-//                              isArchive:YES]) {
-//        return false;
-//    }
 
     //---------------------------------
     
@@ -1682,6 +1704,8 @@ void Data::updateUIAfterLoad()
 
     bool isSignedHidden = !isSignedFormat(_showSettings->originalFormat);
     bool isPlayHidden = !_showSettings->isModel; // only for models
+    
+    // TODO: tie to whether diffMap is loaded
     bool isDiffHidden = _showSettings->isModel; // only for images
     
     _actionPlay->setHidden(isPlayHidden);
@@ -2381,7 +2405,7 @@ bool Data::handleEventAction(const Action* action, bool isShiftKeyDown, ActionSt
         _showSettings->isDiff = !_showSettings->isDiff;
         isChanged = true;
         text = "Diff ";
-        text += _showSettings->isPreview ? "On" : "Off";
+        text += _showSettings->isDiff ? "On" : "Off";
     }
     // TODO: might switch c to channel cycle, so could just hit that
     // and depending on the content, it cycles through reasonable channel masks
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index 9b2ff408..1004bb0e 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -447,7 +447,7 @@ struct DataDelegate
     
     bool loadModelFile(const char* filename);
    
-    bool loadTextureFromImage(const char* fullFilename, double timestamp, KTXImage& image, KTXImage* imageNormal, bool isArchive);
+    bool loadTextureFromImage(const char* fullFilename, double timestamp, KTXImage& image, KTXImage* imageNormal, KTXImage* imageDiff, bool isArchive);
     
 public:
     kram_id view; // MyMTKView*
@@ -467,6 +467,7 @@ struct Data {
 
     bool findFilename(const string& filename);
     bool findFilenameShort(const string& filename);
+    const File* findFileShort(const string& filename);
     const Atlas* findAtlasAtUV(float2 uv);
     bool isArchive() const;
     bool loadFile();
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 12f71a12..b43d2931 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -1821,7 +1821,7 @@ - (void)viewDidLoad
     return [view_ loadModelFile:filename];
 }
 
-bool DataDelegate::loadTextureFromImage(const char* fullFilename, double timestamp, KTXImage& image, KTXImage* imageNormal, bool isArchive)
+bool DataDelegate::loadTextureFromImage(const char* fullFilename, double timestamp, KTXImage& image, KTXImage* imageNormal, KTXImage* imageDiff, bool isArchive)
 {
     MyMTKView* view_ = (__bridge MyMTKView*)view;
     Renderer* renderer = (Renderer *)view_.delegate;
@@ -1831,6 +1831,7 @@ - (void)viewDidLoad
                               timestamp:timestamp
                                   image:image
                             imageNormal:imageNormal
+                              imageDiff:imageDiff
                               isArchive:isArchive]) {
         return false;
     }
diff --git a/kramv/Shaders/KramShaders.h b/kramv/Shaders/KramShaders.h
index 1bb121f8..ebe288dc 100644
--- a/kramv/Shaders/KramShaders.h
+++ b/kramv/Shaders/KramShaders.h
@@ -43,8 +43,9 @@ typedef NS_ENUM(int32_t, VertexAttribute) {
 typedef NS_ENUM(int32_t, TextureIndex) {
     TextureIndexColor = 0,
     TextureIndexNormal = 1,
+    TextureIndexDiff = 2,
 
-    TextureIndexSamples = 2,  // used for compute
+    TextureIndexSamples = 3,  // used for compute
 };
 
 typedef NS_ENUM(int32_t, SamplerIndex) {
@@ -128,6 +129,7 @@ struct Uniforms {
     bool isPreview; // render w/lighting, normals, etc
     bool isUVPreview; // show uv overlay
     bool isSrgbInput;
+    bool isDiff;
     
     bool is3DView;
     bool isNormalMapPreview;  // for isNormal or combined
diff --git a/kramv/Shaders/KramShaders.metal b/kramv/Shaders/KramShaders.metal
index 4bbf5495..14ae85bb 100644
--- a/kramv/Shaders/KramShaders.metal
+++ b/kramv/Shaders/KramShaders.metal
@@ -956,6 +956,7 @@ float4 DrawPixels(
     constant Uniforms& uniforms,
     float4 c,
     float4 nmap,
+    float4 cDiff,
     float2 textureSize,
     uint passNumber
 )
@@ -985,13 +986,13 @@ float4 DrawPixels(
             // adapted for signed field above,
             // sdf distance from edge (scalar)
             float dist = c.r;
-
+            
             // size of one pixel line
             float onePixel = recip(max(0.0001, length(float2(dfdx(dist), dfdy(dist)))));
-
+            
             // distance to edge in pixels (scalar)
             float pixelDist = dist * onePixel;
-
+            
             // typically source recommends smoothstep, so that get a soft instead of hard ramp of alpha at edges
             
             // store as preml alpha
@@ -1002,13 +1003,13 @@ float4 DrawPixels(
             half4 nmapH = toHalf(c);
             
             half3 n = transformNormal(nmapH, in.normal, in.tangent,
-                                       in.worldPos, in.texCoord, uniforms.useTangent, // to build TBN
-                                       uniforms.isSwizzleAGToRG, uniforms.isSigned, facing);
+                                      in.worldPos, in.texCoord, uniforms.useTangent, // to build TBN
+                                      uniforms.isSwizzleAGToRG, uniforms.isSigned, facing);
             
             
             float3 viewDir = calculateViewDir(in.worldPos, uniforms.cameraPosition);
             c = doLighting(float4(1.0), viewDir, toFloat(n), toFloat(in.normal), uniforms.lightingMode);
-
+            
             c.a = 1;
         }
         else {
@@ -1021,10 +1022,10 @@ float4 DrawPixels(
                 
                 if (uniforms.isNormalMapPreview) {
                     half4 nmapH = toHalf(nmap);
-                   
+                    
                     half3 n = transformNormal(nmapH, in.normal, in.tangent,
-                                               in.worldPos, in.texCoord, uniforms.useTangent, // to build TBN
-                                               uniforms.isNormalMapSwizzleAGToRG, uniforms.isNormalMapSigned, facing);
+                                              in.worldPos, in.texCoord, uniforms.useTangent, // to build TBN
+                                              uniforms.isNormalMapSwizzleAGToRG, uniforms.isNormalMapSigned, facing);
                     
                     c = doLighting(c, viewDir, toFloat(n), toFloat(in.normal), uniforms.lightingMode);
                 }
@@ -1045,6 +1046,31 @@ float4 DrawPixels(
             c = float4(fract(in.texCoord), 0.0, 1.0);
         }
     }
+    else if (uniforms.isDiff) {
+        // This is only for 2d textures right now
+        // Could run the color reconstruct logic below.
+        
+        float4 cDelta = abs(c - cDiff);
+        
+        switch(uniforms.channels)
+        {
+            case ShModeRGBA: break;
+                
+                // with premul formats, already have ra,ga,ba
+            case ShModeR001: cDelta = float4(cDelta.r,0,0,0); break;
+            case ShMode0G01: cDelta = float4(0,cDelta.g,0,0); break;
+            case ShMode00B1: cDelta = float4(0,0,cDelta.b,0); break;
+            case ShModeAAA1: cDelta = float4(0,0,0,cDelta.a); break;
+        }
+        
+        const float delta = 1.0/255.0;
+        bool isDiffDetected = any( cDelta > delta );
+        
+        if (isDiffDetected) {
+            float3 highlightColor = float3(1, 0, 1);
+            c.rgb = highlightColor;
+        }
+    }
     else {
         // handle single channel and SDF content
         if (uniforms.numChannels == 1) {
@@ -1082,7 +1108,7 @@ float4 DrawPixels(
             // signed 1/2 channel formats return sr,0,0, and sr,sg,0 for rgb?
             // May want to display those as 0 not 0.5.
             if (uniforms.isSigned) {
-                // Note: premul on signed should occur while still signed, since it's a pull to zoer
+                // Note: premul on signed should occur while still signed, since it's a pull to zero
                 // to premul, but also need to see without premul
                 if (uniforms.doShaderPremul) {
                     shaderPremul(c, uniforms.isSrgbInput);
@@ -1159,9 +1185,9 @@ float4 DrawPixels(
         else if (uniforms.shapeChannel == ShShapeChannelMipLevel) {
             c = toMipLevelColor(in.texCoord * textureSize.xy); // only for 2d textures
         }
-//        else if (uniforms.shapeChannel == ShShapeChannelBumpNormal) {
-//            c.rgb = saturate(bumpNormal);
-//        }
+        //        else if (uniforms.shapeChannel == ShShapeChannelBumpNormal) {
+        //            c.rgb = saturate(bumpNormal);
+        //        }
         
         if (uniforms.shapeChannel != ShShapeChannelMipLevel) {
             c.a = 1.0;
@@ -1170,20 +1196,23 @@ float4 DrawPixels(
     
     // mask to see one channel in isolation, this is really 0'ing out other channels
     // would be nice to be able to keep this set on each channel independently.
-    switch(uniforms.channels)
+    if (!uniforms.isDiff)
     {
-        case ShModeRGBA: break;
-            
-        // with premul formats, already have ra,ga,ba
-        case ShModeR001: c = float4(c.r,0,0,1); break;
-        case ShMode0G01: c = float4(0,c.g,0,1); break;
-        case ShMode00B1: c = float4(0,0,c.b,1); break;
-            
-//        case ShModeRRR1: c = float4(c.rrr,1); break;
-//        case ShModeGGG1: c = float4(c.ggg,1); break;
-//        case ShModeBBB1: c = float4(c.bbb,1); break;
-//
-        case ShModeAAA1: c = float4(c.aaa,1); break;
+        switch(uniforms.channels)
+        {
+            case ShModeRGBA: break;
+                
+            // with premul formats, already have ra,ga,ba
+            case ShModeR001: c = float4(c.r,0,0,1); break;
+            case ShMode0G01: c = float4(0,c.g,0,1); break;
+            case ShMode00B1: c = float4(0,0,c.b,1); break;
+                
+            //        case ShModeRRR1: c = float4(c.rrr,1); break;
+            //        case ShModeGGG1: c = float4(c.ggg,1); break;
+            //        case ShModeBBB1: c = float4(c.bbb,1); break;
+            //
+            case ShModeAAA1: c = float4(c.aaa,1); break;
+        }
     }
     
     // be able to pinch-zoom into/back from the image
@@ -1358,7 +1387,7 @@ fragment float4 Draw1DArrayPS(
     // colorMap.get_num_mip_levels();
 
     float4 n = float4(0,0,1,1);
-    return DrawPixels(in, facing, uniforms, c, n, textureSize, uniformsLevel.passNumber);
+    return DrawPixels(in, facing, uniforms, c, n, c, textureSize, uniformsLevel.passNumber);
 }
 
 fragment float4 DrawImagePS(
@@ -1368,18 +1397,20 @@ fragment float4 DrawImagePS(
     constant UniformsLevel& uniformsLevel [[ buffer(BufferIndexUniformsLevel) ]],
     sampler colorSampler [[ sampler(SamplerIndexColor) ]],
     texture2d<float> colorMap [[ texture(TextureIndexColor) ]],
-    texture2d<float> normalMap [[ texture(TextureIndexNormal) ]]
+    texture2d<float> normalMap [[ texture(TextureIndexNormal) ]],
+    texture2d<float> diffMap [[ texture(TextureIndexDiff) ]]
 )
 {
     float4 c = colorMap.sample(colorSampler, in.texCoordXYZ.xy);
     float4 n = normalMap.sample(colorSampler, in.texCoordXYZ.xy);
-   
+    float4 d = diffMap.sample(colorSampler, in.texCoordXYZ.xy);
+    
     // here are the pixel dimensions of the lod
     uint lod = uniformsLevel.mipLOD;
     float2 textureSize = float2(colorMap.get_width(lod), colorMap.get_height(lod));
     // colorMap.get_num_mip_levels();
 
-    return DrawPixels(in, facing, uniforms, c, n, textureSize, uniformsLevel.passNumber);
+    return DrawPixels(in, facing, uniforms, c, n, d, textureSize, uniformsLevel.passNumber);
 }
 
 fragment float4 DrawImageArrayPS(
@@ -1400,7 +1431,7 @@ fragment float4 DrawImageArrayPS(
     float2 textureSize = float2(colorMap.get_width(lod), colorMap.get_height(lod));
     // colorMap.get_num_mip_levels();
 
-    return DrawPixels(in, facing, uniforms, c, n, textureSize, uniformsLevel.passNumber);
+    return DrawPixels(in, facing, uniforms, c, n, c, textureSize, uniformsLevel.passNumber);
 }
 
 
@@ -1422,7 +1453,7 @@ fragment float4 DrawCubePS(
     // colorMap.get_num_mip_levels();
 
     float4 n = float4(0,0,1,1);
-    return DrawPixels(in, facing, uniforms, c, n, textureSize, uniformsLevel.passNumber);
+    return DrawPixels(in, facing, uniforms, c, n, c, textureSize, uniformsLevel.passNumber);
 }
 
 fragment float4 DrawCubeArrayPS(
@@ -1443,7 +1474,7 @@ fragment float4 DrawCubeArrayPS(
     // colorMap.get_num_mip_levels();
 
     float4 n = float4(0,0,1,1);
-    return DrawPixels(in, facing, uniforms, c, n, textureSize, uniformsLevel.passNumber);
+    return DrawPixels(in, facing, uniforms, c, n, c, textureSize, uniformsLevel.passNumber);
 }
 
 
@@ -1475,7 +1506,7 @@ fragment float4 DrawVolumePS(
     // colorMap.get_num_mip_levels();
 
     float4 n = float4(0,0,1,1);
-    return DrawPixels(in, facing, uniforms, c, n, textureSize, uniformsLevel.passNumber);
+    return DrawPixels(in, facing, uniforms, c, n, c, textureSize, uniformsLevel.passNumber);
 }
 
 //--------------------------------------------------

From 0fa53a219a5de966071be899609aa92b14d0a53f Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 12 Feb 2023 15:01:17 -0800
Subject: [PATCH 411/901] kramv - update isSRGBShown and diff state.

---
 kramv/KramRenderer.mm    |  5 +++++
 kramv/KramViewerBase.cpp | 14 ++++++++------
 kramv/KramViewerBase.h   |  5 ++++-
 kramv/KramViewerMain.mm  |  7 +++++++
 4 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index a7ebd415..3df60073 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -1240,6 +1240,7 @@ - (BOOL)loadTextureFromImage:(nonnull const char *)fullFilenameString
 
         _showSettings->lastFilename = fullFilename;
         _showSettings->lastTimestamp = timestamp;
+        _showSettings->hasDiffTexture = diffTexture != nil;
 
         @autoreleasepool {
             _colorMap = texture;
@@ -1340,6 +1341,10 @@ - (BOOL)loadTexture:(nonnull NSURL *)url
         _showSettings->lastFilename = fullFilename;
         _showSettings->lastTimestamp = timestamp;
 
+        // TODO: should archive work with diff?
+        id<MTLTexture> diffTexture = nil;
+        _showSettings->hasDiffTexture = diffTexture != nil;
+        
         @autoreleasepool {
             _colorMap = texture;
             _colorMapView = textureView;
diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index c6e4e322..79452c24 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -1705,9 +1705,10 @@ void Data::updateUIAfterLoad()
     bool isSignedHidden = !isSignedFormat(_showSettings->originalFormat);
     bool isPlayHidden = !_showSettings->isModel; // only for models
     
-    // TODO: tie to whether diffMap is loaded
-    bool isDiffHidden = _showSettings->isModel; // only for images
-    
+    bool isDiffHidden = false; // only for images
+    if (!_showSettings->isModel && _showSettings->hasDiffTexture) {
+        isDiffHidden = false;
+    }
     _actionPlay->setHidden(isPlayHidden);
     _actionArray->setHidden(isArrayHidden);
     _actionFace->setHidden(isFaceSliceHidden);
@@ -1779,7 +1780,9 @@ void Data::updateUIControlState()
     auto playState = toState(_showSettings->isModel && _showSettings->isPlayAnimations);
     auto verticalState = toState(_showSettings->isVerticalUI);
     auto uiState = toState(_showSettings->isHideUI);
-    auto diffState = toState(_showSettings->isDiff);
+    auto diffState = toState(_showSettings->isDiff && _showSettings->hasDiffTexture);
+    
+    auto srgbState = toState(_showSettings->isSRGBShown);
     
     _actionVertical->setHighlight(verticalState);
     
@@ -1822,7 +1825,6 @@ void Data::updateUIControlState()
     _actionSigned->setHighlight(signedState);
     _actionChecker->setHighlight(checkerboardState);
     
-    auto srgbState = toState(_showSettings->isSRGBShown);
     _actionSrgb->setHighlight(srgbState);
 }
 
@@ -2424,7 +2426,7 @@ bool Data::handleEventAction(const Action* action, bool isShiftKeyDown, ActionSt
         if (!action->isHidden) {
             _showSettings->isSRGBShown = !_showSettings->isSRGBShown;
             
-            sprintf(text, "Format %s", _showSettings->isSRGBShown ? "srgb on" : "srgb off");
+            sprintf(text, "Format srgb %s", _showSettings->isSRGBShown ? "On" : "Off");
             
             isChanged = true;
         }
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index 1004bb0e..0dbdc9fb 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -180,6 +180,9 @@ class ShowSettings {
     // if diff texture available, can show diff against source
     bool isDiff = false;
     
+    // currently loading the diff texture if found, this slows loads
+    bool hasDiffTexture = false;
+    
     // can sample from drawable or from single source texture
     bool isEyedropperFromDrawable();
 
@@ -238,7 +241,7 @@ class ShowSettings {
 
     LightingMode lightingMode = LightingModeDiffuse;
 
-    bool isInverted;
+    bool isInverted = false;
 
     // cached on load, raw info about the texture from libkram
     string imageInfo;
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index b43d2931..7ec59e7b 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -1507,6 +1507,9 @@ -(BOOL)loadFile
     setErrorLogCapture( true );
     
     bool success = _data.loadFile();
+    
+    // Update these settings
+    
     if (!success) {
         string errorText;
         getErrorLogCaptureText(errorText);
@@ -1524,11 +1527,15 @@ -(BOOL)loadFile
     
     //-------
     Renderer* renderer = (Renderer*)self.delegate;
+    
     _showSettings->isSRGBShown = false;
     if (success && renderer.hasToggleView) {
         _showSettings->isSRGBShown = isSrgbFormat(_showSettings->originalFormat);
     }
     
+    renderer.playAnimations = _showSettings->isPlayAnimations;
+    renderer.isToggleView = !_showSettings->isSRGBShown;
+    
     // -------------
     string title = _showSettings->windowTitleString(filename);
     self.window.title = [NSString stringWithUTF8String:title.c_str()];

From 88e1bb1dba48016f59554aa519782e295dd40b08 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 18 Feb 2023 19:33:02 -0800
Subject: [PATCH 412/901] kram - add mipflood, remove in-place mips, fix timer

Add mipflooding.  This build the entire mip chain, and them upsamples it to the higher mip.  This is a cheaper dilate, and also compresses better, since many of the regions have blocky pixels.  Still unclear if using premul is better than doing this.  But it works for God of War.

So this has to allocate more memory for the smaller mip chain, since it no longer does in-place mips.  The smaller mips are only 1/3rd of the largest mip.  Skip mips in place, and then build out the rest of the mip chain.  This allows

Fix the timer by replacing the clock with system specific calls.  The clock::now() calls seem to have a granularity of 32ms which is terrible.   Go back to system calls that I know are in nanoseconds.  Was also getting an assert by initializing startTime within the call with clock().  That would return a time delta of 0, so init those things in static init.
---
 libkram/kram/Kram.cpp          |   8 +-
 libkram/kram/KramImage.cpp     | 209 ++++++++++++++++++++++-----------
 libkram/kram/KramImage.h       |   3 +-
 libkram/kram/KramImageInfo.cpp |   1 +
 libkram/kram/KramImageInfo.h   |   4 +-
 libkram/kram/KramMipper.cpp    |  69 ++++++++++-
 libkram/kram/KramMipper.h      |  21 ++--
 libkram/kram/KramTimer.cpp     |  76 ++++++++++--
 libkram/kram/win_mmap.h        |  26 +---
 9 files changed, 300 insertions(+), 117 deletions(-)

diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index a95943fa..0077c104 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -1672,7 +1672,7 @@ void kramEncodeUsage(bool showVersion = true)
           "\t [-e/ncoder (squish | ate | etcenc | bcenc | astcenc | explicit | ..)]\n"
           "\t [-resize (16x32 | pow2)]\n"
           "\n"
-          "\t [-mipnone]\n"
+          "\t [-mipnone] [-mipflood]\n"
           "\t [-mipmin size] [-mipmax size] [-mipskip count]\n"
           "\n"
           "\t [-chunks 4x4]\n"
@@ -1732,6 +1732,9 @@ void kramEncodeUsage(bool showVersion = true)
           "\t-mipnone"
           "\tDon't build mips even if pow2 dimensions\n"
 
+          "\t-mipflood"
+          "\tDilate color by upscaling smaller mips to higher\n"
+
           "\t-mipmin size"
           "\tOnly output mips >= size px\n"
 
@@ -2732,6 +2735,9 @@ static int32_t kramAppEncode(vector<const char*>& args)
             // disable mips even if pow2
             infoArgs.doMipmaps = false;
         }
+        else if (isStringEqual(word, "-mipflood")) {
+            infoArgs.doMipflood = true;
+        }
 
         else if (isStringEqual(word, "-heightScale")) {
             ++i;
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index ef877687..8d51a403 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -477,8 +477,8 @@ void Image::setSrgbState(bool isSrgb, bool hasSrgbBlock, bool hasNonSrgbBlocks)
 // being set ot 0.  This runs counter to ASTC L+A mode though which eliminates
 // the endpoint storage.
 void KramEncoder::averageChannelsInBlock(
-    const char* averageChannels, const KTXImage& image, ImageData& srcImage,
-    vector<Color>& tmpImageData8) const  // otherwise, it's BlueAlpha averaging
+    const char* averageChannels, const KTXImage& image, ImageData& srcImage
+) const  // otherwise, it's BlueAlpha averaging
 {
     int32_t w = srcImage.width;
     int32_t h = srcImage.height;
@@ -494,12 +494,9 @@ void KramEncoder::averageChannelsInBlock(
     // these then don't affect the fitting, but do affect endpoint storage (f.e.
     // RGB instead of L+A) must be done before the encode due to complexity of
     // BC6/7 and ASTC
-
-    // copy the level into a temp buffer and average all values for the block
-    // dims
+    
     Int2 blockDims = image.blockDims();
-    tmpImageData8.resize(w * h);
-
+  
     for (int32_t yy = 0; yy < h; yy += blockDims.y) {
         for (int32_t xx = 0; xx < w; xx += blockDims.x) {
             // compute clamped blockDims
@@ -541,7 +538,7 @@ void KramEncoder::averageChannelsInBlock(
                      ++x) {
                     // replace red/blue with average of block
                     Color c = srcImage.pixels[y0 + x];
-                    Color& cDst = tmpImageData8[y0 + x];
+                    Color& cDst = c;
                     if (isRed) c.r = red;
                     if (isGreen) c.g = green;
                     if (isBlue) c.b = blue;
@@ -1157,8 +1154,6 @@ bool KramEncoder::encode(ImageInfo& info, Image& singleImage, FILE* dstFile) con
 
 // Use this for in-place construction of mips
 struct MipConstructData {
-    vector<Color> tmpImageData8;  // for average channels per block
-
     // use this for complex texture types, copy data from vertical/horizotnal
     // strip image into here to then gen mips
     vector<Color> copyImage;
@@ -2092,13 +2087,7 @@ bool KramEncoder::createMipsFromChunks(
             // so large mips even if clamped with -mipmax allocate to largest mip size (2k x 2k @16 = 64MB)
             // have to build the mips off that.  srgb and premul is why fp32 is
             // needed, and need to downsample in linear space.
-
-            //            int32_t size = w * h * sizeof(float4);
-            //            if (size > 16*1024*1024) {
-            //                int32_t bp = 0;
-            //                bp = bp;
-            //            }
-
+            
             srcImage.pixelsHalf = halfImage.data();
         }
     }
@@ -2112,6 +2101,8 @@ bool KramEncoder::createMipsFromChunks(
     int32_t srcTopMipHeight = srcImage.height;
 
     for (int32_t chunk = 0; chunk < numChunks; ++chunk) {
+        Timer timerBuildMips;
+        
         // this needs to append before chunkOffset copy below
         w = srcTopMipWidth;
         h = srcTopMipHeight;
@@ -2124,6 +2115,8 @@ bool KramEncoder::createMipsFromChunks(
         srcImage.height = h;
 
         if (info.isHDR) {
+            // TODO: should this support halfImage too?
+            
             if (isMultichunk) {
                 const float4* srcPixels = (const float4*)singleImage.pixelsFloat().data();
                 for (int32_t y = 0; y < h; ++y) {
@@ -2171,90 +2164,164 @@ bool KramEncoder::createMipsFromChunks(
             }
         }
 
-        // doing in-place mips
-        // could be reading in srgb gray, and writing out bc4 unorm
-        ImageData dstImageData = srcImage;
-        dstImageData.isSRGB = isSrgbFormat(info.pixelFormat);
+        // Build mips for the chunk, dropping mips as needed, but downsampling
+        // from available srcImage.   This is no longer done in-place so that
+        // mipgen and encoding are separated.  This simplifies mipFlood and
+        // channel averaging.
+        const int32_t numMipLevels = (int32_t)dstMipLevels.size();
+       
+        vector<ImageData> dstMipImages;
+        dstMipImages.resize(numMipLevels);
         
-        //----------------------------------------------
-
-        // build mips for the chunk, dropping mips as needed, but downsampling
-        // from available image
-
-        int32_t numSkippedMips = data.numSkippedMips;
-
-        for (int32_t mipLevel = 0; mipLevel < (int32_t)dstMipLevels.size(); ++mipLevel) {
-            const auto& dstMipLevel = dstMipLevels[mipLevel];
-
-            if (mipLevel == 0 && !info.doSDF) {
+        // mip1...n are held here
+        vector<Color> mipPixels;
+        vector<half4> mipPixelsHalf;
+        vector<float4> mipPixelsFloat;
+        
+        {
+            ImageData dstImageData = srcImage;
+            dstImageData.isSRGB = isSrgbFormat(info.pixelFormat);
+            
+            int32_t numSkippedMips = data.numSkippedMips;
+            
+            if (info.doSDF) {
+                // count up pixels needed for all mips of this chunk
+                uint32_t numPixels = 0;
+                for (int32_t mipLevel = 0; mipLevel < numMipLevels; ++mipLevel) {
+                    w = srcImage.width;
+                    h = srcImage.height;
+                    int32_t d = 1;
+                    mipDown(w, h, d, mipLevel + numSkippedMips);
+                    numPixels += w * h;
+                }
+                
+                // now allocate enough memory to hold all the mips
+                mipPixels.resize(numPixels);
+                
+                size_t pixelOffset = 0;
+                for (int32_t mipLevel = 0; mipLevel < numMipLevels; ++mipLevel) {
+                    //const auto& dstMipLevel = dstMipLevels[mipLevel];
+                    ImageData& dstMipImage = dstMipImages[mipLevel];
+                    
+                    dstMipImage = dstImageData; // settings replaced in mipmap call
+                    dstMipImage.pixels = mipPixels.data() + pixelOffset;
+                    
+                    // sdf mipper has to build from largest sourceImage
+                    // but it can in-place write to the same dstImage
+                    // But not doing in-place mips anymore.
+                    sdfMipper.mipmap(dstMipImage, mipLevel + numSkippedMips);
+                    
+                    // assumes depth = 1
+                    pixelOffset += dstMipImage.width * dstMipImage.height;
+                }
+            }
+            else {
                 if (numSkippedMips > 0) {
                     // this does in-place mipmap to dstImage (also updates floatPixels if used)
                     for (int32_t i = 0; i < numSkippedMips; ++i) {
                         // have to build the submips even with skipMip
                         mipper.mipmap(srcImage, dstImageData);
-
+                        
                         // dst becomes src for next in-place mipmap
                         srcImage = dstImageData;
-
-                        w = dstImageData.width;
-                        h = dstImageData.height;
                     }
                 }
-            }
-            else {
-                if (info.doSDF) {
-                    // sdf mipper has to build from origin sourceImage
-                    // but it can in-place write to the same dstImage
-                    sdfMipper.mipmap(dstImageData, mipLevel + numSkippedMips);
-
-                    w = dstImageData.width;
-                    h = dstImageData.height;
+                
+                // allocate memory for mips
+                dstMipImages[0] = dstImageData;
+                
+                // count up pixels needed for all sub mips of this chunk
+                uint32_t numPixels = 0;
+                for (int32_t mipLevel = 1; mipLevel < numMipLevels; ++mipLevel) {
+                    w = srcImage.width;
+                    h = srcImage.height;
+                    int32_t d = 1;
+                    mipDown(w, h, d, mipLevel + numSkippedMips);
+                    numPixels += w * h;
                 }
-                else {
-                    // have to build the submips even with skipMip
-                    mipper.mipmap(srcImage, dstImageData);
-
-                    // dst becomes src for next in-place mipmap
+                
+                // This is more memory than in-place, but the submips
+                // are only 1/3rd the memory of the main mip
+                mipPixels.resize(numPixels);
+                if (srcImage.pixelsFloat)
+                    mipPixelsFloat.resize(numPixels);
+                else if (srcImage.pixelsHalf)
+                    mipPixelsHalf.resize(numPixels);
+                
+                size_t pixelOffset = 0;
+                for (int32_t mipLevel = 1; mipLevel < numMipLevels; ++mipLevel) {
+                    ImageData& dstMipImage = dstMipImages[mipLevel];
+                    dstMipImage.isSRGB = dstImageData.isSRGB;
+                    
+                    dstMipImage.pixels = mipPixels.data() + pixelOffset;
+                    if (srcImage.pixelsFloat)
+                        dstMipImage.pixelsFloat = mipPixelsFloat.data() + pixelOffset;
+                    else if (srcImage.pixelsHalf)
+                        dstMipImage.pixelsHalf = mipPixelsHalf.data() + pixelOffset;
+                      
+                    mipper.mipmap(srcImage, dstMipImage);
+                    
+                    // dst becomes src for next mipmap
                     // preserve the isSRGB state
                     bool isSRGBSrc = srcImage.isSRGB;
-                    srcImage = dstImageData;
+                    srcImage = dstMipImage;
                     srcImage.isSRGB = isSRGBSrc;
                     
-                    w = dstImageData.width;
-                    h = dstImageData.height;
+                    pixelOffset += dstMipImage.width * dstMipImage.height;
+                }
+                
+                // Now can run mip flooding on image
+                if (info.doMipflood) {
+                    mipper.mipflood(dstMipImages);
+                }
+                
+                // apply average channels, now that unique mips
+                bool isFloat = srcImage.pixelsHalf || srcImage.pixelsFloat;
+                if (!info.averageChannels.empty() && !isFloat) {
+                    for (int32_t mipLevel = 0; mipLevel < numMipLevels; ++mipLevel) {
+                        ImageData& dstMipImage = dstMipImages[mipLevel];
+                        
+                        // this isn't applied to srgb data (what about premul?)
+                        averageChannelsInBlock(info.averageChannels.c_str(), dstImage,
+                                               dstMipImage);
+                    }
                 }
             }
+        }
+        
+        timerBuildMips.stop();
+        
+        if (info.isVerbose) {
+            KLOGI("Image", "Chunk %d source %d miplevels in %0.3fs\n",
+                  chunk, numMipLevels,
+                  timerBuildMips.timeElapsed() );
+        }
+        
+        //----------------------------------------------
 
+        for (int32_t mipLevel = 0; mipLevel < numMipLevels; ++mipLevel) {
+            const auto& dstMipLevel = dstMipLevels[mipLevel];
+            ImageData& dstImageData = dstMipImages[mipLevel]; // TODO: fix const
+
+            w = dstImageData.width;
+            h = dstImageData.height;
+            
             // size of one mip, not levelSize = numChunks * mipStorageSize
             size_t mipStorageSize = dstMipLevel.length;
 
             // offset only valid for KTX and KTX2 w/o isCompressed
             size_t mipChunkOffset = dstMipLevel.offset + chunk * mipStorageSize;
 
-            // just to check that each mip has a unique offset
-            //KLOGI("Image", "chunk:%d %d\n", chunk, mipOffset);
-
-            // average channels per block if requested (mods 8-bit data on a per block basis)
-            ImageData mipImage = dstImageData;
-
-            if (!info.averageChannels.empty()) {
-                // this isn't applied to srgb data (what about premul?)
-                averageChannelsInBlock(info.averageChannels.c_str(), dstImage,
-                                       mipImage, data.tmpImageData8);
-
-                mipImage.pixels = data.tmpImageData8.data();
-                mipImage.pixelsFloat = nullptr;
-            }
-
-            Timer timer;
+            Timer timerEncodeMips;
             bool success =
                 compressMipLevel(info, dstImage,
-                                 mipImage, outputTexture, mipStorageSize);
+                                 dstImageData, outputTexture, mipStorageSize);
             assert(success);
 
             if (success) {
                 if (info.isVerbose) {
-                    KLOGI("Image", "Compressed mipLevel %dx%d in %0.3fs\n", w, h, timer.timeElapsed());
+                    KLOGI("Image", "Compressed mipLevel %dx%d in %0.3fs\n", w, h,
+                          timerEncodeMips.timeElapsed());
                 }
             }
 
diff --git a/libkram/kram/KramImage.h b/libkram/kram/KramImage.h
index 3901fc1c..02655ace 100644
--- a/libkram/kram/KramImage.h
+++ b/libkram/kram/KramImage.h
@@ -171,8 +171,7 @@ class KramEncoder {
 
     // can pass in which channels to average
     void averageChannelsInBlock(const char* averageChannels,
-                                const KTXImage& image, ImageData& srcImage,
-                                vector<Color>& tmpImage) const;
+                                const KTXImage& image, ImageData& srcImage) const;
 
     bool createMipsFromChunks(ImageInfo& info,
                               Image& singleImage,
diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp
index 77965489..35c25c3b 100644
--- a/libkram/kram/KramImageInfo.cpp
+++ b/libkram/kram/KramImageInfo.cpp
@@ -1028,6 +1028,7 @@ void ImageInfo::initWithArgs(const ImageInfoArgs& args)
 
     // mips
     doMipmaps = args.doMipmaps;
+    doMipflood = args.doMipflood;
     mipMinSize = args.mipMinSize;
     mipMaxSize = args.mipMaxSize;
     mipSkip = args.mipSkip;
diff --git a/libkram/kram/KramImageInfo.h b/libkram/kram/KramImageInfo.h
index b3d8a4b3..c31a8f35 100644
--- a/libkram/kram/KramImageInfo.h
+++ b/libkram/kram/KramImageInfo.h
@@ -54,6 +54,7 @@ class ImageInfoArgs {
     bool isKTX2 = false;
 
     bool doMipmaps = true;  // default to mips on
+    bool doMipflood = false;
     bool isVerbose = false;
     bool doSDF = false;
     
@@ -154,8 +155,9 @@ class ImageInfo {
 
     bool doSDF = false;
     bool doMipmaps = false;
+    bool doMipflood = false;
     bool optimizeFormatForOpaque = false;
-
+    
     bool isVerbose = false;
 
     // compression format
diff --git a/libkram/kram/KramMipper.cpp b/libkram/kram/KramMipper.cpp
index 178c483a..62d93a1d 100644
--- a/libkram/kram/KramMipper.cpp
+++ b/libkram/kram/KramMipper.cpp
@@ -240,7 +240,7 @@ void Mipper::initPixelsHalfIfNeeded(ImageData& srcImage, bool doPremultiply, boo
             for (int32_t x = 0; x < w; x++) {
                 Color& c0 = srcImage.pixels[y0 + x];
 
-                // TODO: assumes 16, need 32f path too
+                // TODO: 32f path
                 if (c0.a == 0) {
                     c0 = zeroColor;
                     halfImage[y0 + x] = zeroColorh;
@@ -346,6 +346,69 @@ void remapFromSignedBCEndpoint88(uint16_t& endpoint)
     endpoint = (*(const uint8_t*)&e0) | ((*(const uint8_t*)&e1) << 8);
 }
 
+void mipfloodBigMip(const ImageData& smallMip, ImageData& bigMip)
+{
+    // DONE: convert that to pixel in lower mip, might have odd count
+    // horizontal or vertically, so lower mip mapping not so easy
+    // if we assume pow2, then simpler.  Could still have non-square
+    // pow2, which don't want to read off end of buffer.
+    
+    uint32_t w = bigMip.width;
+    uint32_t h = bigMip.height;
+    
+    uint32_t wDst = smallMip.width;
+    uint32_t hDst = smallMip.height;
+    
+    const uint8_t kAlphaThreshold = 0;
+    
+    // now run through the pixels with 0 alpha, and flood them with pixel from below
+    for (uint32_t y = 0; y < h; ++y) {
+       Color* srcRow = &bigMip.pixels[y * w];
+        uint32_t yDst = y/2;
+        if (yDst >= hDst)
+            yDst = hDst - 1;
+        
+        const Color* dstRow = &smallMip.pixels[yDst * wDst];
+        
+        for (uint32_t x = 0; x < w; ++x) {
+            // skip any pixels above threshold
+            Color& srcPixel = srcRow[x];
+            if (srcPixel.a > kAlphaThreshold) continue;
+            
+            // replace the rest
+            uint32_t xDst = x/2;
+            if (xDst == wDst)
+                xDst = wDst - 1;
+            
+            Color dstPixel = dstRow[xDst];
+            dstPixel.a = srcPixel.a;
+            
+            // an invalid premul color with rgb > a, may want valid non-premul
+            srcPixel = dstPixel;
+        }
+    }
+}
+         
+// Propogate up from bottom so that every 0 pixel gets a non-zero value.
+void Mipper::mipflood(vector<ImageData>& mips) const
+{
+    // God of War uses this weighted by coverage on unpremul textures
+    // instead of doing expensive and biased dilation.  It compresses
+    // better since it is blocky.
+    // Unclear why they didn't use premul instead, but maybe compression
+    // quality was better.  So this masks the filtering errors of black halos.
+    // https://www.youtube.com/watch?v=MKX45_riWQA?t=2991
+    
+    int32_t numMipLevels = mips.size();
+        
+    // this overwrites the existing mips
+    for (int32_t i = numMipLevels-1; i >= 1; --i)
+    {
+        mipfloodBigMip(mips[i], mips[i-1]);
+    }
+}
+
+
 void Mipper::mipmap(const ImageData& srcImage, ImageData& dstImage) const
 {
     dstImage.width = srcImage.width;
@@ -614,7 +677,7 @@ void Mipper::mipmapLevel(const ImageData& srcImage, ImageData& dstImage) const
                 // assumes alpha premultiplied already
                 float4 cFloat = (c0 + c1 + c2 + c3) * 0.25;
 
-                // overwrite float4 image
+                // overwrite half4 image
                 cDstHalf[dstIndex] = toHalf4(cFloat);
 
                 // assume hdr pulls from half/float data
@@ -624,7 +687,7 @@ void Mipper::mipmapLevel(const ImageData& srcImage, ImageData& dstImage) const
                         cFloat = linearToSRGB(cFloat);
                     }
 
-                    // override rgba8u version, since this is what is encoded
+                    // Overwrite rgba8u version, since this is what is encoded
                     Color c = Unormfloat4ToColor(cFloat);
 
                     // can only skip this if cSrc = cDst
diff --git a/libkram/kram/KramMipper.h b/libkram/kram/KramMipper.h
index 4672367a..72634a2d 100644
--- a/libkram/kram/KramMipper.h
+++ b/libkram/kram/KramMipper.h
@@ -91,26 +91,33 @@ class Mipper {
 public:
     Mipper();
 
-    // drop by 1 mip level by box filter
-    void mipmap(const ImageData &srcImage, ImageData &dstImage) const;
+    // start here, then can mipmap or mipFlood
+    void initPixelsHalfIfNeeded(ImageData& srcImage, bool doPremultiply, bool doPrezero,
+                                vector<half4>& halfImage) const;
 
-    void initPixelsHalfIfNeeded(ImageData &srcImage, bool doPremultiply, bool doPrezero,
-                                vector<half4> &halfImage) const;
+    // drop by 1 mip level by box filter
+    void mipmap(const ImageData& srcImage, ImageData& dstImage) const;
 
+    // Build all the mips, and then copy from small to big
+    // wherever the alpha is 0.  This is a form of cheap
+    // dilation, but will result in invalid premul colors r > a.
+    void mipflood(vector<ImageData>& srcImage) const;
+    
     // these use table lookups, so need to be class members
     float toLinear(uint8_t srgb) const { return srgbToLinear[srgb]; }
     float toAlphaFloat(uint8_t alpha) const { return alphaToFloat[alpha]; }
 
-    float4 toLinear(const Color &c) const { return float4m(toLinear(c.r), toLinear(c.g), toLinear(c.b), toAlphaFloat(c.a)); }
+    float4 toLinear(const Color& c) const { return float4m(toLinear(c.r), toLinear(c.g), toLinear(c.b), toAlphaFloat(c.a)); }
 
     uint8_t toPremul(uint8_t channelIntensity, uint8_t alpha) const { return ((uint32_t)channelIntensity * (uint32_t)alpha) / 255; }
 
+    
 private:
     void initTables();
 
-    void mipmapLevel(const ImageData &srcImage, ImageData &dstImage) const;
+    void mipmapLevel(const ImageData& srcImage, ImageData& dstImage) const;
 
-    void mipmapLevelOdd(const ImageData &srcImage, ImageData &dstImage) const;
+    void mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) const;
 };
 
 }  // namespace kram
diff --git a/libkram/kram/KramTimer.cpp b/libkram/kram/KramTimer.cpp
index f46a1880..b140bbf9 100644
--- a/libkram/kram/KramTimer.cpp
+++ b/libkram/kram/KramTimer.cpp
@@ -4,32 +4,84 @@
 
 #include "KramTimer.h"
 
-#if USE_EASTL
-#include "EASTL/chrono.h"
-#else
-#include <chrono>
+#if KRAM_WIN
+#include <windows.h>
+#elif KRAM_MAC || KRAM_IOS
+#include <mach/mach_time.h>
 #endif
 
 namespace kram {
 
 using namespace NAMESPACE_STL;
 
+#if KRAM_WIN
+
+static double queryFrequency()
+{
+    LARGE_INTEGER frequency;
+    QueryPerformanceFrequency(&frequency);
+    return double(1000000000.0L / frequency.QuadPart);  // nanoseconds per tick
+};
+
+static uint64_t queryCounter()
+{
+    LARGE_INTEGER counter;
+    QueryPerformanceCounter(&counter);
+    return counter.QuadPart;
+};
+
+static uint64_t gStartTime = queryCounter();
+static double gQueryFrequency = queryFrequency();
+
+double currentTimestamp()
+{
+    return (double)(queryCounter() - gStartTime) * gQueryFrequency;
+}
+
+#else
+
+static uint64_t gStartTime = mach_absolute_time();
+double currentTimestamp()
+{
+    return (double)(mach_absolute_time() - gStartTime) * 1e-9;
+}
+
+#endif
+
+/* This is the worse timing system ever, with min times of 0.032s even
+   using the high_resolution_clock on macOS.
+ 
+#if USE_EASTL
+#include "EASTL/chrono.h"
+#else
+#include <chrono>
+#endif
+ 
 #if USE_EASTL
 using namespace eastl::chrono;
 #else
 using namespace std::chrono;
 #endif
 
-double currentTimestamp()
-{
-    using clock = high_resolution_clock;
-    //using clock = system_clock;
-    //using clock = steady_clock;
+// high-res sucks
+using clock = high_resolution_clock;
+//using clock = system_clock;
+//using clock = steady_clock;
 
-    static clock::time_point startTime = clock::now();
+static clock::time_point startTime = clock::now();
 
-    auto duration = duration_cast<nanoseconds>(clock::now() - startTime);
-    return (double)duration.count() * 1e-9;
+double currentTimestamp()
+{
+    auto t = clock::now();
+    auto timeSpan = duration_cast<duration<double> >(t - startTime);
+    double count = (double)timeSpan.count();
+    
+    // this happens the first time function is called if static
+    // is inside the runction call.  Will return 0
+    // assert( count != 0.0 );
+    
+    return count;
 }
+*/
 
 }  // namespace kram
diff --git a/libkram/kram/win_mmap.h b/libkram/kram/win_mmap.h
index 5412e7aa..703184ae 100644
--- a/libkram/kram/win_mmap.h
+++ b/libkram/kram/win_mmap.h
@@ -1,5 +1,7 @@
-#ifndef PORTABLE_MMAP_H
-#define PORTABLE_MMAP_H
+//#ifndef PORTABLE_MMAP_H
+//#define PORTABLE_MMAP_H
+
+#pragma once
 
 #ifdef _WIN32
 /* mmap() replacement for Windows
@@ -21,27 +23,15 @@
 
 #define PROT_READ     0x1
 #define PROT_WRITE    0x2
-/* This flag is only available in WinXP+ */
-#ifdef FILE_MAP_EXECUTE
 #define PROT_EXEC     0x4
-#else
-#define PROT_EXEC        0x0
-#define FILE_MAP_EXECUTE 0
-#endif
 
-#define MAP_SHARED    0x01
+//#define MAP_SHARED    0x01
 #define MAP_PRIVATE   0x02
-#define MAP_ANONYMOUS 0x20
-#define MAP_ANON      MAP_ANONYMOUS
+#define MAP_ANON      0x20
 #define MAP_FAILED    ((void *) -1)
 
-#ifdef __USE_FILE_OFFSET64
 # define DWORD_HI(x) (x >> 32)
 # define DWORD_LO(x) ((x) & 0xffffffff)
-#else
-# define DWORD_HI(x) (0)
-# define DWORD_LO(x) (x)
-#endif
 
 static void *mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset)
 {
@@ -108,9 +98,5 @@ static void munmap(void *addr, size_t length)
 
 #undef DWORD_HI
 #undef DWORD_LO
- 
-#else
-#  include <sys/mman.h>
-#endif
 
 #endif

From b00a5c05ca3f091b25c9c2e06b3ecaaf9e1647c7 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 18 Feb 2023 19:47:58 -0800
Subject: [PATCH 413/901] kram - not sure how these files changed, but
 sourcetree things entire file diff

Maybe line endings.
---
 libkram/etc2comp/EtcBlock4x4EncodingBits.h    |  630 ++--
 libkram/etc2comp/EtcBlock4x4Encoding_ETC1.cpp | 2670 ++++++++---------
 libkram/etc2comp/EtcBlock4x4Encoding_ETC1.h   |  380 +--
 libkram/etc2comp/EtcImage.h                   |  406 +--
 4 files changed, 2043 insertions(+), 2043 deletions(-)

diff --git a/libkram/etc2comp/EtcBlock4x4EncodingBits.h b/libkram/etc2comp/EtcBlock4x4EncodingBits.h
index d0267978..e0b30ae9 100644
--- a/libkram/etc2comp/EtcBlock4x4EncodingBits.h
+++ b/libkram/etc2comp/EtcBlock4x4EncodingBits.h
@@ -1,315 +1,315 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <assert.h>
-
-namespace Etc
-{
-
-	// ################################################################################
-	// Block4x4EncodingBits
-	// Base class for Block4x4EncodingBits_XXXX
-	// ################################################################################
-
-	class Block4x4EncodingBits
-	{
-	public:
-
-		enum class Format
-		{
-			UNKNOWN,
-			//
-			RGB8,
-			RGBA8,
-			R11,
-			RG11,
-			RGB8A1,
-			//
-			FORMATS
-		};
-
-		static unsigned int GetBytesPerBlock(Format a_format)
-		{
-			switch (a_format)
-			{
-            case Block4x4EncodingBits::Format::RGB8:
-            case Block4x4EncodingBits::Format::R11:
-            case Block4x4EncodingBits::Format::RGB8A1:
-				return 8;
-				break;
-
-            case Block4x4EncodingBits::Format::RGBA8:
-            case Block4x4EncodingBits::Format::RG11:
-				return 16;
-				break;
-
-			default:
-				return 0;
-				break;
-			}
-
-		}
-
-	};
-
-	// ################################################################################
-	// Block4x4EncodingBits_RGB8
-	// Encoding bits for the RGB portion of ETC1, RGB8, RGB8A1 and RGBA8
-	// ################################################################################
-
-	class Block4x4EncodingBits_RGB8
-	{
-	public:
-
-		static const unsigned int BYTES_PER_BLOCK = 8;
-
-		inline Block4x4EncodingBits_RGB8(void)
-		{
-			assert(sizeof(Block4x4EncodingBits_RGB8) == BYTES_PER_BLOCK);
-
-			for (unsigned int uiByte = 0; uiByte < BYTES_PER_BLOCK; uiByte++)
-			{
-				auc[uiByte] = 0;
-			}
-
-		}
-
-		typedef struct
-		{
-			unsigned red2 : 4;
-			unsigned red1 : 4;
-			//
-			unsigned green2 : 4;
-			unsigned green1 : 4;
-			//
-			unsigned blue2 : 4;
-			unsigned blue1 : 4;
-			//
-			unsigned flip : 1;
-			unsigned diff : 1;
-			unsigned cw2 : 3;
-			unsigned cw1 : 3;
-			//
-			unsigned int selectors;
-		} Individual;
-
-		typedef struct
-		{
-			signed dred2 : 3;
-			unsigned red1 : 5;
-			//
-			signed dgreen2 : 3;
-			unsigned green1 : 5;
-			//
-			signed dblue2 : 3;
-			unsigned blue1 : 5;
-			//
-			unsigned flip : 1;
-			unsigned diff : 1;
-			unsigned cw2 : 3;
-			unsigned cw1 : 3;
-			//
-			unsigned int selectors;
-		} Differential;
-
-		typedef struct
-		{
-			unsigned red1b : 2;
-			unsigned detect2 : 1;
-			unsigned red1a : 2;
-			unsigned detect1 : 3;
-			//
-			unsigned blue1 : 4;
-			unsigned green1 : 4;
-			//
-			unsigned green2 : 4;
-			unsigned red2 : 4;
-			//
-			unsigned db : 1;
-			unsigned diff : 1;
-			unsigned da : 2;
-			unsigned blue2 : 4;
-			//
-			unsigned int selectors;
-		} T;
-
-		typedef struct
-		{
-			unsigned green1a : 3;
-			unsigned red1 : 4;
-			unsigned detect1 : 1;
-			//
-			unsigned blue1b : 2;
-			unsigned detect3 : 1;
-			unsigned blue1a : 1;
-			unsigned green1b : 1;
-			unsigned detect2 : 3;
-			//
-			unsigned green2a : 3;
-			unsigned red2 : 4;
-			unsigned blue1c : 1;
-			//
-			unsigned db : 1;
-			unsigned diff : 1;
-			unsigned da : 1;
-			unsigned blue2 : 4;
-			unsigned green2b : 1;
-			//
-			unsigned int selectors;
-		} H;
-
-		typedef struct
-		{
-			unsigned originGreen1 : 1;
-			unsigned originRed : 6;
-			unsigned detect1 : 1;
-			//
-			unsigned originBlue1 : 1;
-			unsigned originGreen2 : 6;
-			unsigned detect2 : 1;
-			//
-			unsigned originBlue3 : 2;
-			unsigned detect4 : 1;
-			unsigned originBlue2 : 2;
-			unsigned detect3 : 3;
-			//
-			unsigned horizRed2 : 1;
-			unsigned diff : 1;
-			unsigned horizRed1 : 5;
-			unsigned originBlue4 : 1;
-			//
-			unsigned horizBlue1: 1;
-			unsigned horizGreen : 7;
-			//
-			unsigned vertRed1 : 3;
-			unsigned horizBlue2 : 5;
-			//
-			unsigned vertGreen1 : 5;
-			unsigned vertRed2 : 3;
-			//
-			unsigned vertBlue : 6;
-			unsigned vertGreen2 : 2;
-		} Planar;
-
-		union
-		{
-			unsigned char auc[BYTES_PER_BLOCK];
-			unsigned long int ul;
-			Individual individual;
-			Differential differential;
-			T t;
-			H h;
-			Planar planar;
-		};
-
-	};
-
-	// ################################################################################
-	// Block4x4EncodingBits_A8
-	// Encoding bits for the A portion of RGBA8
-	// ################################################################################
-
-	class Block4x4EncodingBits_A8
-	{
-	public:
-
-		static const unsigned int BYTES_PER_BLOCK = 8;
-		static const unsigned int SELECTOR_BYTES = 6;
-
-		typedef struct
-		{
-			unsigned base : 8;
-			unsigned table : 4;
-			unsigned multiplier : 4;
-			unsigned selectors0 : 8;
-			unsigned selectors1 : 8;
-			unsigned selectors2 : 8;
-			unsigned selectors3 : 8;
-			unsigned selectors4 : 8;
-			unsigned selectors5 : 8;
-		} Data;
-
-		Data data;
-
-	};
-
-	// ################################################################################
-	// Block4x4EncodingBits_R11
-	// Encoding bits for the R portion of R11
-	// ################################################################################
-
-	class Block4x4EncodingBits_R11
-	{
-	public:
-
-		static const unsigned int BYTES_PER_BLOCK = 8;
-		static const unsigned int SELECTOR_BYTES = 6;
-
-		typedef struct
-		{
-			unsigned base : 8;
-			unsigned table : 4;
-			unsigned multiplier : 4;
-			unsigned selectors0 : 8;
-			unsigned selectors1 : 8;
-			unsigned selectors2 : 8;
-			unsigned selectors3 : 8;
-			unsigned selectors4 : 8;
-			unsigned selectors5 : 8;
-		} Data;
-
-		Data data;
-
-	};
-
-	class Block4x4EncodingBits_RG11
-	{
-	public:
-
-		static const unsigned int BYTES_PER_BLOCK = 16;
-		static const unsigned int SELECTOR_BYTES = 12;
-
-		typedef struct
-		{
-			//Red portion
-			unsigned baseR : 8;
-			unsigned tableIndexR : 4;
-			unsigned multiplierR : 4;
-			unsigned selectorsR0 : 8;
-			unsigned selectorsR1 : 8;
-			unsigned selectorsR2 : 8;
-			unsigned selectorsR3 : 8;
-			unsigned selectorsR4 : 8;
-			unsigned selectorsR5 : 8;
-			//Green portion
-			unsigned baseG : 8;
-			unsigned tableIndexG : 4;
-			unsigned multiplierG : 4;
-			unsigned selectorsG0 : 8;
-			unsigned selectorsG1 : 8;
-			unsigned selectorsG2 : 8;
-			unsigned selectorsG3 : 8;
-			unsigned selectorsG4 : 8;
-			unsigned selectorsG5 : 8;
-		} Data;
-
-		Data data;
-
-	};
-
-}
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <assert.h>
+
+namespace Etc
+{
+
+	// ################################################################################
+	// Block4x4EncodingBits
+	// Base class for Block4x4EncodingBits_XXXX
+	// ################################################################################
+
+	class Block4x4EncodingBits
+	{
+	public:
+
+		enum class Format
+		{
+			UNKNOWN,
+			//
+			RGB8,
+			RGBA8,
+			R11,
+			RG11,
+			RGB8A1,
+			//
+			FORMATS
+		};
+
+		static unsigned int GetBytesPerBlock(Format a_format)
+		{
+			switch (a_format)
+			{
+            case Block4x4EncodingBits::Format::RGB8:
+            case Block4x4EncodingBits::Format::R11:
+            case Block4x4EncodingBits::Format::RGB8A1:
+				return 8;
+				break;
+
+            case Block4x4EncodingBits::Format::RGBA8:
+            case Block4x4EncodingBits::Format::RG11:
+				return 16;
+				break;
+
+			default:
+				return 0;
+				break;
+			}
+
+		}
+
+	};
+
+	// ################################################################################
+	// Block4x4EncodingBits_RGB8
+	// Encoding bits for the RGB portion of ETC1, RGB8, RGB8A1 and RGBA8
+	// ################################################################################
+
+	class Block4x4EncodingBits_RGB8
+	{
+	public:
+
+		static const unsigned int BYTES_PER_BLOCK = 8;
+
+		inline Block4x4EncodingBits_RGB8(void)
+		{
+			assert(sizeof(Block4x4EncodingBits_RGB8) == BYTES_PER_BLOCK);
+
+			for (unsigned int uiByte = 0; uiByte < BYTES_PER_BLOCK; uiByte++)
+			{
+				auc[uiByte] = 0;
+			}
+
+		}
+
+		typedef struct
+		{
+			unsigned red2 : 4;
+			unsigned red1 : 4;
+			//
+			unsigned green2 : 4;
+			unsigned green1 : 4;
+			//
+			unsigned blue2 : 4;
+			unsigned blue1 : 4;
+			//
+			unsigned flip : 1;
+			unsigned diff : 1;
+			unsigned cw2 : 3;
+			unsigned cw1 : 3;
+			//
+			unsigned int selectors;
+		} Individual;
+
+		typedef struct
+		{
+			signed dred2 : 3;
+			unsigned red1 : 5;
+			//
+			signed dgreen2 : 3;
+			unsigned green1 : 5;
+			//
+			signed dblue2 : 3;
+			unsigned blue1 : 5;
+			//
+			unsigned flip : 1;
+			unsigned diff : 1;
+			unsigned cw2 : 3;
+			unsigned cw1 : 3;
+			//
+			unsigned int selectors;
+		} Differential;
+
+		typedef struct
+		{
+			unsigned red1b : 2;
+			unsigned detect2 : 1;
+			unsigned red1a : 2;
+			unsigned detect1 : 3;
+			//
+			unsigned blue1 : 4;
+			unsigned green1 : 4;
+			//
+			unsigned green2 : 4;
+			unsigned red2 : 4;
+			//
+			unsigned db : 1;
+			unsigned diff : 1;
+			unsigned da : 2;
+			unsigned blue2 : 4;
+			//
+			unsigned int selectors;
+		} T;
+
+		typedef struct
+		{
+			unsigned green1a : 3;
+			unsigned red1 : 4;
+			unsigned detect1 : 1;
+			//
+			unsigned blue1b : 2;
+			unsigned detect3 : 1;
+			unsigned blue1a : 1;
+			unsigned green1b : 1;
+			unsigned detect2 : 3;
+			//
+			unsigned green2a : 3;
+			unsigned red2 : 4;
+			unsigned blue1c : 1;
+			//
+			unsigned db : 1;
+			unsigned diff : 1;
+			unsigned da : 1;
+			unsigned blue2 : 4;
+			unsigned green2b : 1;
+			//
+			unsigned int selectors;
+		} H;
+
+		typedef struct
+		{
+			unsigned originGreen1 : 1;
+			unsigned originRed : 6;
+			unsigned detect1 : 1;
+			//
+			unsigned originBlue1 : 1;
+			unsigned originGreen2 : 6;
+			unsigned detect2 : 1;
+			//
+			unsigned originBlue3 : 2;
+			unsigned detect4 : 1;
+			unsigned originBlue2 : 2;
+			unsigned detect3 : 3;
+			//
+			unsigned horizRed2 : 1;
+			unsigned diff : 1;
+			unsigned horizRed1 : 5;
+			unsigned originBlue4 : 1;
+			//
+			unsigned horizBlue1: 1;
+			unsigned horizGreen : 7;
+			//
+			unsigned vertRed1 : 3;
+			unsigned horizBlue2 : 5;
+			//
+			unsigned vertGreen1 : 5;
+			unsigned vertRed2 : 3;
+			//
+			unsigned vertBlue : 6;
+			unsigned vertGreen2 : 2;
+		} Planar;
+
+		union
+		{
+			unsigned char auc[BYTES_PER_BLOCK];
+			unsigned long int ul;
+			Individual individual;
+			Differential differential;
+			T t;
+			H h;
+			Planar planar;
+		};
+
+	};
+
+	// ################################################################################
+	// Block4x4EncodingBits_A8
+	// Encoding bits for the A portion of RGBA8
+	// ################################################################################
+
+	class Block4x4EncodingBits_A8
+	{
+	public:
+
+		static const unsigned int BYTES_PER_BLOCK = 8;
+		static const unsigned int SELECTOR_BYTES = 6;
+
+		typedef struct
+		{
+			unsigned base : 8;
+			unsigned table : 4;
+			unsigned multiplier : 4;
+			unsigned selectors0 : 8;
+			unsigned selectors1 : 8;
+			unsigned selectors2 : 8;
+			unsigned selectors3 : 8;
+			unsigned selectors4 : 8;
+			unsigned selectors5 : 8;
+		} Data;
+
+		Data data;
+
+	};
+
+	// ################################################################################
+	// Block4x4EncodingBits_R11
+	// Encoding bits for the R portion of R11
+	// ################################################################################
+
+	class Block4x4EncodingBits_R11
+	{
+	public:
+
+		static const unsigned int BYTES_PER_BLOCK = 8;
+		static const unsigned int SELECTOR_BYTES = 6;
+
+		typedef struct
+		{
+			unsigned base : 8;
+			unsigned table : 4;
+			unsigned multiplier : 4;
+			unsigned selectors0 : 8;
+			unsigned selectors1 : 8;
+			unsigned selectors2 : 8;
+			unsigned selectors3 : 8;
+			unsigned selectors4 : 8;
+			unsigned selectors5 : 8;
+		} Data;
+
+		Data data;
+
+	};
+
+	class Block4x4EncodingBits_RG11
+	{
+	public:
+
+		static const unsigned int BYTES_PER_BLOCK = 16;
+		static const unsigned int SELECTOR_BYTES = 12;
+
+		typedef struct
+		{
+			//Red portion
+			unsigned baseR : 8;
+			unsigned tableIndexR : 4;
+			unsigned multiplierR : 4;
+			unsigned selectorsR0 : 8;
+			unsigned selectorsR1 : 8;
+			unsigned selectorsR2 : 8;
+			unsigned selectorsR3 : 8;
+			unsigned selectorsR4 : 8;
+			unsigned selectorsR5 : 8;
+			//Green portion
+			unsigned baseG : 8;
+			unsigned tableIndexG : 4;
+			unsigned multiplierG : 4;
+			unsigned selectorsG0 : 8;
+			unsigned selectorsG1 : 8;
+			unsigned selectorsG2 : 8;
+			unsigned selectorsG3 : 8;
+			unsigned selectorsG4 : 8;
+			unsigned selectorsG5 : 8;
+		} Data;
+
+		Data data;
+
+	};
+
+}
diff --git a/libkram/etc2comp/EtcBlock4x4Encoding_ETC1.cpp b/libkram/etc2comp/EtcBlock4x4Encoding_ETC1.cpp
index f591a8bb..8de857f4 100644
--- a/libkram/etc2comp/EtcBlock4x4Encoding_ETC1.cpp
+++ b/libkram/etc2comp/EtcBlock4x4Encoding_ETC1.cpp
@@ -1,1335 +1,1335 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
-EtcBlock4x4Encoding_ETC1.cpp
-
-Block4x4Encoding_ETC1 is the encoder to use when targetting file format ETC1.  This encoder is also
-used for the ETC1 subset of file format RGB8, RGBA8 and RGB8A1
-
-*/
-
-#include "EtcConfig.h"
-#include "EtcBlock4x4Encoding_ETC1.h"
-
-#include "EtcBlock4x4.h"
-#include "EtcBlock4x4EncodingBits.h"
-#include "EtcDifferentialTrys.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-#include <float.h>
-#include <limits>
-
-namespace Etc
-{
-
-	// pixel processing order if the flip bit = 0 (horizontal split)
-	const unsigned int Block4x4Encoding_ETC1::s_auiPixelOrderFlip0[PIXELS] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
-
-	// pixel processing order if the flip bit = 1 (vertical split)
-	const unsigned int Block4x4Encoding_ETC1::s_auiPixelOrderFlip1[PIXELS] = { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 };
-
-	// pixel processing order for horizontal scan (ETC normally does a vertical scan)
-	const unsigned int Block4x4Encoding_ETC1::s_auiPixelOrderHScan[PIXELS] = { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 };
-
-	// pixel indices for different block halves
-	const unsigned int Block4x4Encoding_ETC1::s_auiLeftPixelMapping[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
-	const unsigned int Block4x4Encoding_ETC1::s_auiRightPixelMapping[8] = { 8, 9, 10, 11, 12, 13, 14, 15 };
-	const unsigned int Block4x4Encoding_ETC1::s_auiTopPixelMapping[8] = { 0, 1, 4, 5, 8, 9, 12, 13 };
-	const unsigned int Block4x4Encoding_ETC1::s_auiBottomPixelMapping[8] = { 2, 3, 6, 7, 10, 11, 14, 15 };
-
-	// CW ranges that the ETC1 decoders use
-	// CW is basically a contrast for the different selector bits, since these values are offsets to the base color
-	// the first axis in the array is indexed by the CW in the encoding bits
-	// the second axis in the array is indexed by the selector bits
-	float Block4x4Encoding_ETC1::s_aafCwTable[CW_RANGES][SELECTORS] =
-	{
-		{ 2.0f / 255.0f, 8.0f / 255.0f, -2.0f / 255.0f, -8.0f / 255.0f },
-		{ 5.0f / 255.0f, 17.0f / 255.0f, -5.0f / 255.0f, -17.0f / 255.0f },
-		{ 9.0f / 255.0f, 29.0f / 255.0f, -9.0f / 255.0f, -29.0f / 255.0f },
-		{ 13.0f / 255.0f, 42.0f / 255.0f, -13.0f / 255.0f, -42.0f / 255.0f },
-		{ 18.0f / 255.0f, 60.0f / 255.0f, -18.0f / 255.0f, -60.0f / 255.0f },
-		{ 24.0f / 255.0f, 80.0f / 255.0f, -24.0f / 255.0f, -80.0f / 255.0f },
-		{ 33.0f / 255.0f, 106.0f / 255.0f, -33.0f / 255.0f, -106.0f / 255.0f },
-		{ 47.0f / 255.0f, 183.0f / 255.0f, -47.0f / 255.0f, -183.0f / 255.0f }
-	};
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-	Block4x4Encoding_ETC1::Block4x4Encoding_ETC1(void)
-	{
-        InitETC1();
-	}
-
-	 Block4x4Encoding_ETC1::~Block4x4Encoding_ETC1(void) {}
-
-    void Block4x4Encoding_ETC1::InitETC1()
-    {
-        m_mode = MODE_ETC1;
-        m_boolDiff = false;
-        m_boolFlip = false;
-        m_frgbaColor1 = ColorFloatRGBA();
-        m_frgbaColor2 = ColorFloatRGBA();
-        m_uiCW1 = 0;
-        m_uiCW2 = 0;
-        for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-        {
-            m_auiSelectors[uiPixel] = 0;
-            //m_afDecodedAlphas[uiPixel] = 1.0f;
-        }
-
-        // these aren't initialized
-        m_frgbaSourceAverageLeft = ColorFloatRGBA();
-        m_frgbaSourceAverageRight = ColorFloatRGBA();
-        m_frgbaSourceAverageTop = ColorFloatRGBA();
-        m_frgbaSourceAverageBottom = ColorFloatRGBA();
-        
-        m_boolMostLikelyFlip = false;
-
-        m_fError = -1.0f;
-        m_fError1 = -1.0f;
-        m_fError2 = -1.0f;
-        m_boolSeverelyBentDifferentialColors = false;
-    }
-
-	// ----------------------------------------------------------------------------------------------------
-	// initialization prior to encoding
-	// a_pblockParent points to the block associated with this encoding
-	// a_errormetric is used to choose the best encoding
-	// a_pafrgbaSource points to a 4x4 block subset of the source image
-	// a_paucEncodingBits points to the final encoding bits
-	//
-	void Block4x4Encoding_ETC1::Encode(Block4x4 *a_pblockParent,
-												const ColorFloatRGBA *a_pafrgbaSource,
-												unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric)
-	{
-
-        // call ctor doesn't work, so call InitETC1
-        //Block4x4Encoding_ETC1();
-        InitETC1();
-        
-        Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource, a_errormetric, 0);
-
-		m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)(a_paucEncodingBits);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// initialization from the encoding bits of a previous encoding
-	// a_pblockParent points to the block associated with this encoding
-	// a_errormetric is used to choose the best encoding
-	// a_pafrgbaSource points to a 4x4 block subset of the source image
-	// a_paucEncodingBits points to the final encoding bits of a previous encoding
-	//
-	void Block4x4Encoding_ETC1::Decode(Block4x4 *a_pblockParent,
-														unsigned char *a_paucEncodingBits,
-														const ColorFloatRGBA *a_pafrgbaSource, 
-														ErrorMetric a_errormetric,
-                                                        uint16_t iterationCount)
-	{
-        // this can't hurt
-        InitETC1();
-        
-		Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource, a_errormetric, iterationCount);
-		m_fError = -1.0f;
-
-		m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)a_paucEncodingBits;
-
-		m_mode = MODE_ETC1;
-		m_boolDiff = m_pencodingbitsRGB8->individual.diff;
-		m_boolFlip = m_pencodingbitsRGB8->individual.flip;
-		if (m_boolDiff)
-		{
-			int iR2 = (int)(m_pencodingbitsRGB8->differential.red1 + m_pencodingbitsRGB8->differential.dred2);
-            int iG2 = (int)(m_pencodingbitsRGB8->differential.green1 + m_pencodingbitsRGB8->differential.dgreen2);
-            int iB2 = (int)(m_pencodingbitsRGB8->differential.blue1 + m_pencodingbitsRGB8->differential.dblue2);
-            
-            if (iR2 < 0)
-			{
-				iR2 = 0;
-			}
-			else if (iR2 > 31)
-			{
-				iR2 = 31;
-			}
-
-			if (iG2 < 0)
-			{
-				iG2 = 0;
-			}
-			else if (iG2 > 31)
-			{
-				iG2 = 31;
-			}
-
-			if (iB2 < 0)
-			{
-				iB2 = 0;
-			}
-			else if (iB2 > 31)
-			{
-				iB2 = 31;
-			}
-
-			m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB5(m_pencodingbitsRGB8->differential.red1, m_pencodingbitsRGB8->differential.green1, m_pencodingbitsRGB8->differential.blue1);
-			m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)iR2, (unsigned char)iG2, (unsigned char)iB2);
-
-		}
-		else
-		{
-			m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(m_pencodingbitsRGB8->individual.red1, m_pencodingbitsRGB8->individual.green1, m_pencodingbitsRGB8->individual.blue1);
-			m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(m_pencodingbitsRGB8->individual.red2, m_pencodingbitsRGB8->individual.green2, m_pencodingbitsRGB8->individual.blue2);
-		}
-
-		m_uiCW1 = m_pencodingbitsRGB8->individual.cw1;
-		m_uiCW2 = m_pencodingbitsRGB8->individual.cw2;
-
-		InitFromEncodingBits_Selectors();
-
-		Decode();
-
-		CalcBlockError();
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// init the selectors from a prior encoding
-	//
-	void Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors(void)
-	{
-
-		unsigned char *paucSelectors = (unsigned char *)&m_pencodingbitsRGB8->individual.selectors;
-
-		for (unsigned int iPixel = 0; iPixel < PIXELS; iPixel++)
-		{
-			unsigned int uiByteMSB = (unsigned int)(1 - (iPixel / 8));
-			unsigned int uiByteLSB = (unsigned int)(3 - (iPixel / 8));
-			unsigned int uiShift = (unsigned int)(iPixel & 7);
-
-			unsigned int uiSelectorMSB = (unsigned int)((paucSelectors[uiByteMSB] >> uiShift) & 1);
-			unsigned int uiSelectorLSB = (unsigned int)((paucSelectors[uiByteLSB] >> uiShift) & 1);
-
-			m_auiSelectors[iPixel] = (uiSelectorMSB << 1) + uiSelectorLSB;
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// perform a single encoding iteration
-	// replace the encoding if a better encoding was found
-	// subsequent iterations generally take longer for each iteration
-	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
-	//
-	void Block4x4Encoding_ETC1::PerformIteration(float a_fEffort)
-	{
-		assert(!m_boolDone);
-
-		switch (m_uiEncodingIterations)
-		{
-		case 0:
-			PerformFirstIteration();
-			break;
-
-		case 1:
-			TryDifferential(m_boolMostLikelyFlip, 1, 0, 0);
-			break;
-
-		case 2:
-			TryIndividual(m_boolMostLikelyFlip, 1);
-			if (a_fEffort <= 49.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 3:
-			TryDifferential(!m_boolMostLikelyFlip, 1, 0, 0);
-			if (a_fEffort <= 59.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 4:
-			TryIndividual(!m_boolMostLikelyFlip, 1);
-			if (a_fEffort <= 69.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 5:
-			TryDegenerates1();
-			if (a_fEffort <= 79.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 6:
-			TryDegenerates2();
-			if (a_fEffort <= 89.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 7:
-			TryDegenerates3();
-			if (a_fEffort <= 99.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 8:
-			TryDegenerates4();
-			m_boolDone = true;
-			break;
-
-		default:
-			assert(0);
-			break;
-		}
-
-		m_uiEncodingIterations++;
-		SetDoneIfPerfect();
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// find best initial encoding to ensure block has a valid encoding
-	//
-	void Block4x4Encoding_ETC1::PerformFirstIteration(void)
-	{
-		CalculateMostLikelyFlip();
-
-		m_fError = FLT_MAX;
-
-		TryDifferential(m_boolMostLikelyFlip, 0, 0, 0);
-		SetDoneIfPerfect();
-		if (m_boolDone)
-		{
-			return;
-		}
-
-		TryIndividual(m_boolMostLikelyFlip, 0);
-		SetDoneIfPerfect();
-		if (m_boolDone)
-		{
-			return;
-		}
-		TryDifferential(!m_boolMostLikelyFlip, 0, 0, 0);
-		SetDoneIfPerfect();
-		if (m_boolDone)
-		{
-			return;
-		}
-		TryIndividual(!m_boolMostLikelyFlip, 0);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// algorithm:
-	// create a source average color for the Left, Right, Top and Bottom halves using the 8 pixels in each half
-	// note: the "gray line" is the line of equal delta RGB that goes thru the average color
-	// for each half:
-	//		see how close each of the 8 pixels are to the "gray line" that goes thru the source average color
-	//		create an error value that is the sum of the distances from the gray line
-	// h_error is the sum of Left and Right errors
-	// v_error is the sum of Top and Bottom errors
-	//
-	void Block4x4Encoding_ETC1::CalculateMostLikelyFlip(void)
-	{
-		static const bool DEBUG_PRINT = false;
-
-		CalculateSourceAverages();
-
-		float fLeftGrayErrorSum = 0.0f;
-		float fRightGrayErrorSum = 0.0f;
-		float fTopGrayErrorSum = 0.0f;
-		float fBottomGrayErrorSum = 0.0f;
-
-		for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
-		{
-			const ColorFloatRGBA *pfrgbaLeft = &m_pafrgbaSource[uiPixel];
-            const ColorFloatRGBA *pfrgbaRight = &m_pafrgbaSource[uiPixel + 8];
-            const ColorFloatRGBA *pfrgbaTop = &m_pafrgbaSource[s_auiTopPixelMapping[uiPixel]];
-            const ColorFloatRGBA *pfrgbaBottom = &m_pafrgbaSource[s_auiBottomPixelMapping[uiPixel]];
-
-			float fLeftGrayError = CalcGrayDistance2(*pfrgbaLeft, m_frgbaSourceAverageLeft);
-			float fRightGrayError = CalcGrayDistance2(*pfrgbaRight, m_frgbaSourceAverageRight);
-			float fTopGrayError = CalcGrayDistance2(*pfrgbaTop, m_frgbaSourceAverageTop);
-			float fBottomGrayError = CalcGrayDistance2(*pfrgbaBottom, m_frgbaSourceAverageBottom);
-
-			fLeftGrayErrorSum += fLeftGrayError;
-			fRightGrayErrorSum += fRightGrayError;
-			fTopGrayErrorSum += fTopGrayError;
-			fBottomGrayErrorSum += fBottomGrayError;
-		}
-
-		if (DEBUG_PRINT)
-		{
-            KLOGI("EtcComp", "\n%.2f %.2f\n", fLeftGrayErrorSum + fRightGrayErrorSum, fTopGrayErrorSum + fBottomGrayErrorSum);
-		}
-
-		m_boolMostLikelyFlip = (fTopGrayErrorSum + fBottomGrayErrorSum) < (fLeftGrayErrorSum + fRightGrayErrorSum);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// calculate source pixel averages for each 2x2 quadrant in a 4x4 block
-	// these are used to determine the averages for each of the 4 different halves (left, right, top, bottom)
-	// ignore pixels that have alpha == NAN (these are border pixels outside of the source image)
-	// weight the averages based on a pixel's alpha
-	//
-	void Block4x4Encoding_ETC1::CalculateSourceAverages(void)
-	{
-		static const bool DEBUG_PRINT = false;
-
-//		bool boolRGBX = m_pblockParent->GetImageSource()->GetErrorMetric() == ErrorMetric::RGBX;
-//
-//		if (m_pblockParent->GetSourceAlphaMix() == Block4x4::SourceAlphaMix::OPAQUE || boolRGBX)
-		{
-			ColorFloatRGBA frgbaSumUL = m_pafrgbaSource[0] + m_pafrgbaSource[1] + m_pafrgbaSource[4] + m_pafrgbaSource[5];
-			ColorFloatRGBA frgbaSumLL = m_pafrgbaSource[2] + m_pafrgbaSource[3] + m_pafrgbaSource[6] + m_pafrgbaSource[7];
-			ColorFloatRGBA frgbaSumUR = m_pafrgbaSource[8] + m_pafrgbaSource[9] + m_pafrgbaSource[12] + m_pafrgbaSource[13];
-			ColorFloatRGBA frgbaSumLR = m_pafrgbaSource[10] + m_pafrgbaSource[11] + m_pafrgbaSource[14] + m_pafrgbaSource[15];
-
-            // average value of 8 pixels for each of the 4 corners
-			m_frgbaSourceAverageLeft = (frgbaSumUL + frgbaSumLL) * 0.125f;
-			m_frgbaSourceAverageRight = (frgbaSumUR + frgbaSumLR) * 0.125f;
-			m_frgbaSourceAverageTop = (frgbaSumUL + frgbaSumUR) * 0.125f;
-			m_frgbaSourceAverageBottom = (frgbaSumLL + frgbaSumLR) * 0.125f;
-            
-            // * doesn't multiply fA above, it calls ScaleRGB, so a = 8
-            m_frgbaSourceAverageLeft.fA = 1.0f;
-            m_frgbaSourceAverageRight.fA = 1.0f;
-            m_frgbaSourceAverageTop.fA = 1.0f;
-            m_frgbaSourceAverageBottom.fA = 1.0f;
-		}
-//		else
-//		{
-//			float afSourceAlpha[PIXELS];
-//
-//			// treat alpha NAN as 0.0f
-//			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-//			{
-//				afSourceAlpha[uiPixel] = isnan(m_pafrgbaSource[uiPixel].fA) ?
-//																		0.0f :
-//																		m_pafrgbaSource[uiPixel].fA;
-//			}
-//
-//			ColorFloatRGBA afrgbaAlphaWeightedSource[PIXELS];
-//			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-//			{
-//				afrgbaAlphaWeightedSource[uiPixel] = m_pafrgbaSource[uiPixel] * afSourceAlpha[uiPixel];
-//			}
-//
-//			ColorFloatRGBA frgbaSumUL = afrgbaAlphaWeightedSource[0] +
-//										afrgbaAlphaWeightedSource[1] +
-//										afrgbaAlphaWeightedSource[4] +
-//										afrgbaAlphaWeightedSource[5];
-//
-//			ColorFloatRGBA frgbaSumLL = afrgbaAlphaWeightedSource[2] +
-//										afrgbaAlphaWeightedSource[3] +
-//										afrgbaAlphaWeightedSource[6] +
-//										afrgbaAlphaWeightedSource[7];
-//
-//			ColorFloatRGBA frgbaSumUR = afrgbaAlphaWeightedSource[8] +
-//										afrgbaAlphaWeightedSource[9] +
-//										afrgbaAlphaWeightedSource[12] +
-//										afrgbaAlphaWeightedSource[13];
-//
-//			ColorFloatRGBA frgbaSumLR = afrgbaAlphaWeightedSource[10] +
-//										afrgbaAlphaWeightedSource[11] +
-//										afrgbaAlphaWeightedSource[14] +
-//										afrgbaAlphaWeightedSource[15];
-//
-//			float fWeightSumUL = afSourceAlpha[0] +
-//									afSourceAlpha[1] +
-//									afSourceAlpha[4] +
-//									afSourceAlpha[5];
-//
-//			float fWeightSumLL = afSourceAlpha[2] +
-//									afSourceAlpha[3] +
-//									afSourceAlpha[6] +
-//									afSourceAlpha[7];
-//
-//			float fWeightSumUR = afSourceAlpha[8] +
-//									afSourceAlpha[9] +
-//									afSourceAlpha[12] +
-//									afSourceAlpha[13];
-//
-//			float fWeightSumLR = afSourceAlpha[10] +
-//									afSourceAlpha[11] +
-//									afSourceAlpha[14] +
-//									afSourceAlpha[15];
-//
-//			ColorFloatRGBA frgbaSumLeft = frgbaSumUL + frgbaSumLL;
-//			ColorFloatRGBA frgbaSumRight = frgbaSumUR + frgbaSumLR;
-//			ColorFloatRGBA frgbaSumTop = frgbaSumUL + frgbaSumUR;
-//			ColorFloatRGBA frgbaSumBottom = frgbaSumLL + frgbaSumLR;
-//
-//			float fWeightSumLeft = fWeightSumUL + fWeightSumLL;
-//			float fWeightSumRight = fWeightSumUR + fWeightSumLR;
-//			float fWeightSumTop = fWeightSumUL + fWeightSumUR;
-//			float fWeightSumBottom = fWeightSumLL + fWeightSumLR;
-//
-//			// check to see if there is at least 1 pixel with  non-zero alpha
-//			// completely transparent block should not make it to this code
-//			assert((fWeightSumLeft + fWeightSumRight) > 0.0f);
-//			assert((fWeightSumTop + fWeightSumBottom) > 0.0f);
-//
-//			if (fWeightSumLeft > 0.0f)
-//			{
-//				m_frgbaSourceAverageLeft = frgbaSumLeft * (1.0f/fWeightSumLeft);
-//			}
-//			if (fWeightSumRight > 0.0f)
-//			{
-//				m_frgbaSourceAverageRight = frgbaSumRight * (1.0f/fWeightSumRight);
-//			}
-//			if (fWeightSumTop > 0.0f)
-//			{
-//				m_frgbaSourceAverageTop = frgbaSumTop * (1.0f/fWeightSumTop);
-//			}
-//			if (fWeightSumBottom > 0.0f)
-//			{
-//				m_frgbaSourceAverageBottom = frgbaSumBottom * (1.0f/fWeightSumBottom);
-//			}
-//
-//			if (fWeightSumLeft == 0.0f)
-//			{
-//				assert(fWeightSumRight > 0.0f);
-//				m_frgbaSourceAverageLeft = m_frgbaSourceAverageRight;
-//			}
-//			if (fWeightSumRight == 0.0f)
-//			{
-//				assert(fWeightSumLeft > 0.0f);
-//				m_frgbaSourceAverageRight = m_frgbaSourceAverageLeft;
-//			}
-//			if (fWeightSumTop == 0.0f)
-//			{
-//				assert(fWeightSumBottom > 0.0f);
-//				m_frgbaSourceAverageTop = m_frgbaSourceAverageBottom;
-//			}
-//			if (fWeightSumBottom == 0.0f)
-//			{
-//				assert(fWeightSumTop > 0.0f);
-//				m_frgbaSourceAverageBottom = m_frgbaSourceAverageTop;
-//			}
-//		}
-
-		
-
-		if (DEBUG_PRINT)
-		{
-            KLOGI("EtcComp", "\ntarget: [%.2f,%.2f,%.2f] [%.2f,%.2f,%.2f] [%.2f,%.2f,%.2f] [%.2f,%.2f,%.2f]\n",
-				m_frgbaSourceAverageLeft.fR, m_frgbaSourceAverageLeft.fG, m_frgbaSourceAverageLeft.fB,
-				m_frgbaSourceAverageRight.fR, m_frgbaSourceAverageRight.fG, m_frgbaSourceAverageRight.fB,
-				m_frgbaSourceAverageTop.fR, m_frgbaSourceAverageTop.fG, m_frgbaSourceAverageTop.fB,
-				m_frgbaSourceAverageBottom.fR, m_frgbaSourceAverageBottom.fG, m_frgbaSourceAverageBottom.fB);
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try an ETC1 differential mode encoding
-	// use a_boolFlip to set the encoding F bit
-	// use a_uiRadius to alter basecolor components in the range[-a_uiRadius:a_uiRadius]
-	// use a_iGrayOffset1 and a_iGrayOffset2 to offset the basecolor to search for degenerate encodings
-	// replace the encoding if the encoding error is less than previous encoding
-	//
-	void Block4x4Encoding_ETC1::TryDifferential(bool a_boolFlip, unsigned int a_uiRadius,
-												int a_iGrayOffset1, int a_iGrayOffset2)
-	{
-
-		ColorFloatRGBA frgbaColor1;
-		ColorFloatRGBA frgbaColor2;
-
-		const unsigned int *pauiPixelMapping1;
-		const unsigned int *pauiPixelMapping2;
-
-		if (a_boolFlip)
-		{
-			frgbaColor1 = m_frgbaSourceAverageTop;
-			frgbaColor2 = m_frgbaSourceAverageBottom;
-
-			pauiPixelMapping1 = s_auiTopPixelMapping;
-			pauiPixelMapping2 = s_auiBottomPixelMapping;
-		}
-		else
-		{
-			frgbaColor1 = m_frgbaSourceAverageLeft;
-			frgbaColor2 = m_frgbaSourceAverageRight;
-
-			pauiPixelMapping1 = s_auiLeftPixelMapping;
-			pauiPixelMapping2 = s_auiRightPixelMapping;
-		}
-
-		DifferentialTrys trys(frgbaColor1, frgbaColor2, pauiPixelMapping1, pauiPixelMapping2, 
-								a_uiRadius, a_iGrayOffset1, a_iGrayOffset2);
-
-		Block4x4Encoding_ETC1 encodingTry = *this;
-		encodingTry.m_boolFlip = a_boolFlip;
-
-		encodingTry.TryDifferentialHalf(&trys.m_half1);
-		encodingTry.TryDifferentialHalf(&trys.m_half2);
-
-		// find best halves that are within differential range
-		DifferentialTrys::Try *ptryBest1 = nullptr;
-		DifferentialTrys::Try *ptryBest2 = nullptr;
-		encodingTry.m_fError = FLT_MAX;
-
-		// see if the best of each half are in differential range
-		int iDRed = trys.m_half2.m_ptryBest->m_iRed - trys.m_half1.m_ptryBest->m_iRed;
-		int iDGreen = trys.m_half2.m_ptryBest->m_iGreen - trys.m_half1.m_ptryBest->m_iGreen;
-		int iDBlue = trys.m_half2.m_ptryBest->m_iBlue - trys.m_half1.m_ptryBest->m_iBlue;
-		if (iDRed >= -4 && iDRed <= 3 && iDGreen >= -4 && iDGreen <= 3 && iDBlue >= -4 && iDBlue <= 3)
-		{
-			ptryBest1 = trys.m_half1.m_ptryBest;
-			ptryBest2 = trys.m_half2.m_ptryBest;
-			encodingTry.m_fError = trys.m_half1.m_ptryBest->m_fError + trys.m_half2.m_ptryBest->m_fError;
-		}
-		else
-		{
-			// else, find the next best halves that are in differential range
-			for (DifferentialTrys::Try *ptry1 = &trys.m_half1.m_atry[0];
-			ptry1 < &trys.m_half1.m_atry[trys.m_half1.m_uiTrys];
-				ptry1++)
-			{
-				for (DifferentialTrys::Try *ptry2 = &trys.m_half2.m_atry[0];
-				ptry2 < &trys.m_half2.m_atry[trys.m_half2.m_uiTrys];
-					ptry2++)
-				{
-					iDRed = ptry2->m_iRed - ptry1->m_iRed;
-					bool boolValidRedDelta = iDRed <= 3 && iDRed >= -4;
-					iDGreen = ptry2->m_iGreen - ptry1->m_iGreen;
-					bool boolValidGreenDelta = iDGreen <= 3 && iDGreen >= -4;
-					iDBlue = ptry2->m_iBlue - ptry1->m_iBlue;
-					bool boolValidBlueDelta = iDBlue <= 3 && iDBlue >= -4;
-
-					if (boolValidRedDelta && boolValidGreenDelta && boolValidBlueDelta)
-					{
-						float fError = ptry1->m_fError + ptry2->m_fError;
-
-						if (fError < encodingTry.m_fError)
-						{
-							encodingTry.m_fError = fError;
-
-							ptryBest1 = ptry1;
-							ptryBest2 = ptry2;
-						}
-					}
-
-				}
-			}
-			assert(encodingTry.m_fError < FLT_MAX);
-			assert(ptryBest1 != nullptr);
-			assert(ptryBest2 != nullptr);
-		}
-
-		if (encodingTry.m_fError < m_fError)
-		{
-			m_mode = MODE_ETC1;
-			m_boolDiff = true;
-			m_boolFlip = encodingTry.m_boolFlip;
-			m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)ptryBest1->m_iRed, (unsigned char)ptryBest1->m_iGreen, (unsigned char)ptryBest1->m_iBlue);
-			m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)ptryBest2->m_iRed, (unsigned char)ptryBest2->m_iGreen, (unsigned char)ptryBest2->m_iBlue);
-			m_uiCW1 = ptryBest1->m_uiCW;
-			m_uiCW2 = ptryBest2->m_uiCW;
-
-			for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS / 2; uiPixelOrder++)
-			{
-				unsigned int uiPixel1 = pauiPixelMapping1[uiPixelOrder];
-				unsigned int uiPixel2 = pauiPixelMapping2[uiPixelOrder];
-
-				unsigned int uiSelector1 = ptryBest1->m_auiSelectors[uiPixelOrder];
-				unsigned int uiSelector2 = ptryBest2->m_auiSelectors[uiPixelOrder];
-
-				m_auiSelectors[uiPixel1] = uiSelector1;
-				m_auiSelectors[uiPixel2] = ptryBest2->m_auiSelectors[uiPixelOrder];
-
-				float fDeltaRGB1 = s_aafCwTable[m_uiCW1][uiSelector1];
-				float fDeltaRGB2 = s_aafCwTable[m_uiCW2][uiSelector2];
-
-				m_afrgbaDecodedColors[uiPixel1] = (m_frgbaColor1 + fDeltaRGB1).ClampRGB();
-				m_afrgbaDecodedColors[uiPixel2] = (m_frgbaColor2 + fDeltaRGB2).ClampRGB();
-			}
-
-			m_fError1 = ptryBest1->m_fError;
-			m_fError2 = ptryBest2->m_fError;
-			m_boolSeverelyBentDifferentialColors = trys.m_boolSeverelyBentColors;
-			m_fError = m_fError1 + m_fError2;
-
-			// sanity check
-			{
-				int iRed1 = m_frgbaColor1.IntRed(31.0f);
-				int iGreen1 = m_frgbaColor1.IntGreen(31.0f);
-				int iBlue1 = m_frgbaColor1.IntBlue(31.0f);
-
-				int iRed2 = m_frgbaColor2.IntRed(31.0f);
-				int iGreen2 = m_frgbaColor2.IntGreen(31.0f);
-				int iBlue2 = m_frgbaColor2.IntBlue(31.0f);
-
-				iDRed = iRed2 - iRed1;
-				iDGreen = iGreen2 - iGreen1;
-				iDBlue = iBlue2 - iBlue1;
-
-				assert(iDRed >= -4 && iDRed < 4);
-				assert(iDGreen >= -4 && iDGreen < 4);
-				assert(iDBlue >= -4 && iDBlue < 4);
-			}
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try an ETC1 differential mode encoding for a half of a 4x4 block
-	// vary the basecolor components using a radius
-	//
-	void Block4x4Encoding_ETC1::TryDifferentialHalf(DifferentialTrys::Half *a_phalf)
-	{
-
-		a_phalf->m_ptryBest = nullptr;
-		float fBestTryError = FLT_MAX;
-
-		a_phalf->m_uiTrys = 0;
-        
-        int radius = (int)a_phalf->m_uiRadius;
-        int radiusGB = radius;
-        
-        // Only iterate one color on all grayscale, otherwise this picks a red color when
-        // encoding grayscale images, since it stops on an early iteration of red.
-        
-        // TODO: Why is grayscale image stopping on a early red radius iteration?
-        // Maybe error equal, but this doesn't prefer 0 radius result on equality.
-        // Can happen if metric isn't gray, so fix this.
-        
-        bool isGray = m_errormetric == GRAY || !m_pblockParent->HasColorPixels();
-        if (isGray)
-        {
-            // drop out green/blue iteration
-            radiusGB = 0;
-        }
-        
-		for (int iRed = a_phalf->m_iRed - radius;
-				iRed <= a_phalf->m_iRed + radius;
-				iRed++)
-		{
-			assert(iRed >= 0 && iRed <= 31);
-
-			for (int iGreen = a_phalf->m_iGreen - radiusGB;
-					iGreen <= a_phalf->m_iGreen + radiusGB;
-					iGreen++)
-			{
-				assert(iGreen >= 0 && iGreen <= 31);
-
-				for (int iBlue = a_phalf->m_iBlue - radiusGB;
-						iBlue <= a_phalf->m_iBlue + radiusGB;
-						iBlue++)
-				{
-					assert(iBlue >= 0 && iBlue <= 31);
-
-					DifferentialTrys::Try *ptry = &a_phalf->m_atry[a_phalf->m_uiTrys];
-					assert(ptry < &a_phalf->m_atry[DifferentialTrys::Half::MAX_TRYS]);
-
-					if (isGray)
-                    {
-                        ptry->m_iRed = iRed;
-                        ptry->m_iGreen = iRed;
-                        ptry->m_iBlue = iRed;
-                    }
-                    else
-                    {
-                        ptry->m_iRed = iRed;
-                        ptry->m_iGreen = iGreen;
-                        ptry->m_iBlue = iBlue;
-                    }
-                    
-					ptry->m_fError = FLT_MAX;
-					ColorFloatRGBA frgbaColor = ColorFloatRGBA::ConvertFromRGB5((unsigned char)ptry->m_iRed, (unsigned char)ptry->m_iGreen, (unsigned char)ptry->m_iBlue);
-
-					// try each CW
-					for (unsigned int uiCW = 0; uiCW < CW_RANGES; uiCW++)
-					{
-						unsigned int auiPixelSelectors[PIXELS / 2];
-						ColorFloatRGBA	afrgbaDecodedPixels[PIXELS / 2];
-						float afPixelErrors[PIXELS / 2] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, 
-															FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
-
-						// pre-compute decoded pixels for each selector
-						ColorFloatRGBA afrgbaSelectors[SELECTORS];
-						assert(SELECTORS == 4);
-						afrgbaSelectors[0] = (frgbaColor + s_aafCwTable[uiCW][0]).ClampRGB();
-						afrgbaSelectors[1] = (frgbaColor + s_aafCwTable[uiCW][1]).ClampRGB();
-						afrgbaSelectors[2] = (frgbaColor + s_aafCwTable[uiCW][2]).ClampRGB();
-						afrgbaSelectors[3] = (frgbaColor + s_aafCwTable[uiCW][3]).ClampRGB();
-
-						for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
-						{
-                            int srcPixelIndex = a_phalf->m_pauiPixelMapping[uiPixel];
-                            
-							ColorFloatRGBA frgbaDecodedPixel;
-
-							for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
-							{
-								frgbaDecodedPixel = afrgbaSelectors[uiSelector];
-
-								float fPixelError = CalcPixelError(frgbaDecodedPixel, srcPixelIndex);
-
-								if (fPixelError < afPixelErrors[uiPixel])
-								{
-									auiPixelSelectors[uiPixel] = uiSelector;
-									afrgbaDecodedPixels[uiPixel] = frgbaDecodedPixel;
-									afPixelErrors[uiPixel] = fPixelError;
-								}
-
-							}
-						}
-
-						// add up all pixel errors
-						float fCWError = 0.0f;
-						for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
-						{	
-							fCWError += afPixelErrors[uiPixel];
-						}
-
-						// if best CW so far
-						if (fCWError < ptry->m_fError)
-						{
-							ptry->m_uiCW = uiCW;
-							for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
-							{
-								ptry->m_auiSelectors[uiPixel] = auiPixelSelectors[uiPixel];
-							}
-							ptry->m_fError = fCWError;
-						}
-
-					}
-
-					if (ptry->m_fError < fBestTryError)
-					{
-						a_phalf->m_ptryBest = ptry;
-						fBestTryError = ptry->m_fError;
-					}
-
-					assert(ptry->m_fError < FLT_MAX);
-
-					a_phalf->m_uiTrys++;
-				}
-			}
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try an ETC1 individual mode encoding
-	// use a_boolFlip to set the encoding F bit
-	// use a_uiRadius to alter basecolor components in the range[-a_uiRadius:a_uiRadius]
-	// replace the encoding if the encoding error is less than previous encoding
-	//
-	void Block4x4Encoding_ETC1::TryIndividual(bool a_boolFlip, unsigned int a_uiRadius)
-	{
-
-		ColorFloatRGBA frgbaColor1;
-		ColorFloatRGBA frgbaColor2;
-
-		const unsigned int *pauiPixelMapping1;
-		const unsigned int *pauiPixelMapping2;
-
-		if (a_boolFlip)
-		{
-			frgbaColor1 = m_frgbaSourceAverageTop;
-			frgbaColor2 = m_frgbaSourceAverageBottom;
-
-			pauiPixelMapping1 = s_auiTopPixelMapping;
-			pauiPixelMapping2 = s_auiBottomPixelMapping;
-		}
-		else
-		{
-			frgbaColor1 = m_frgbaSourceAverageLeft;
-			frgbaColor2 = m_frgbaSourceAverageRight;
-
-			pauiPixelMapping1 = s_auiLeftPixelMapping;
-			pauiPixelMapping2 = s_auiRightPixelMapping;
-		}
-
-		IndividualTrys trys(frgbaColor1, frgbaColor2, pauiPixelMapping1, pauiPixelMapping2, a_uiRadius);
-
-		Block4x4Encoding_ETC1 encodingTry = *this;
-		encodingTry.m_boolFlip = a_boolFlip;
-
-		encodingTry.TryIndividualHalf(&trys.m_half1);
-		encodingTry.TryIndividualHalf(&trys.m_half2);
-
-		// use the best of each half
-		IndividualTrys::Try *ptryBest1 = trys.m_half1.m_ptryBest;
-		IndividualTrys::Try *ptryBest2 = trys.m_half2.m_ptryBest;
-		encodingTry.m_fError = trys.m_half1.m_ptryBest->m_fError + trys.m_half2.m_ptryBest->m_fError;
-
-		if (encodingTry.m_fError < m_fError)
-		{
-			m_mode = MODE_ETC1;
-			m_boolDiff = false;
-			m_boolFlip = encodingTry.m_boolFlip;
-			m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)ptryBest1->m_iRed, (unsigned char)ptryBest1->m_iGreen, (unsigned char)ptryBest1->m_iBlue);
-			m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)ptryBest2->m_iRed, (unsigned char)ptryBest2->m_iGreen, (unsigned char)ptryBest2->m_iBlue);
-			m_uiCW1 = ptryBest1->m_uiCW;
-			m_uiCW2 = ptryBest2->m_uiCW;
-
-			for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS / 2; uiPixelOrder++)
-			{
-				unsigned int uiPixel1 = pauiPixelMapping1[uiPixelOrder];
-				unsigned int uiPixel2 = pauiPixelMapping2[uiPixelOrder];
-
-				unsigned int uiSelector1 = ptryBest1->m_auiSelectors[uiPixelOrder];
-				unsigned int uiSelector2 = ptryBest2->m_auiSelectors[uiPixelOrder];
-
-				m_auiSelectors[uiPixel1] = uiSelector1;
-				m_auiSelectors[uiPixel2] = ptryBest2->m_auiSelectors[uiPixelOrder];
-
-				float fDeltaRGB1 = s_aafCwTable[m_uiCW1][uiSelector1];
-				float fDeltaRGB2 = s_aafCwTable[m_uiCW2][uiSelector2];
-
-				m_afrgbaDecodedColors[uiPixel1] = (m_frgbaColor1 + fDeltaRGB1).ClampRGB();
-				m_afrgbaDecodedColors[uiPixel2] = (m_frgbaColor2 + fDeltaRGB2).ClampRGB();
-			}
-
-			m_fError1 = ptryBest1->m_fError;
-			m_fError2 = ptryBest2->m_fError;
-			m_fError = m_fError1 + m_fError2;
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try an ETC1 differential mode encoding for a half of a 4x4 block
-	// vary the basecolor components using a radius
-	//
-	void Block4x4Encoding_ETC1::TryIndividualHalf(IndividualTrys::Half *a_phalf)
-	{
-
-		a_phalf->m_ptryBest = nullptr;
-		float fBestTryError = FLT_MAX;
-
-		a_phalf->m_uiTrys = 0;
-        
-        int radius = (int)a_phalf->m_uiRadius;
-        int radiusGB = radius;
-        
-        // only iterate one color on grayscale
-        // Note: this won't work for color images with gray blocks
-        bool isGray = m_errormetric == GRAY || !m_pblockParent->HasColorPixels();
-        if (isGray)
-        {
-            radiusGB = 0;
-        }
-        
-		for (int iRed = a_phalf->m_iRed - radius;
-			iRed <= a_phalf->m_iRed + radius;
-			iRed++)
-		{
-			assert(iRed >= 0 && iRed <= 15);
-
-			for (int iGreen = a_phalf->m_iGreen - radiusGB;
-				iGreen <= a_phalf->m_iGreen + radiusGB;
-				iGreen++)
-			{
-				assert(iGreen >= 0 && iGreen <= 15);
-
-				for (int iBlue = a_phalf->m_iBlue - radiusGB;
-					iBlue <= a_phalf->m_iBlue + radiusGB;
-					iBlue++)
-				{
-					assert(iBlue >= 0 && iBlue <= 15);
-
-					IndividualTrys::Try *ptry = &a_phalf->m_atry[a_phalf->m_uiTrys];
-					assert(ptry < &a_phalf->m_atry[IndividualTrys::Half::MAX_TRYS]);
-
-					if (isGray)
-                    {
-                        ptry->m_iRed = iRed;
-                        ptry->m_iGreen = iRed;
-                        ptry->m_iBlue = iRed;
-                    }
-                    else
-                    {
-                        ptry->m_iRed = iRed;
-                        ptry->m_iGreen = iGreen;
-                        ptry->m_iBlue = iBlue;
-                    }
-                    
-					ptry->m_fError = FLT_MAX;
-					ColorFloatRGBA frgbaColor = ColorFloatRGBA::ConvertFromRGB4((unsigned char)ptry->m_iRed, (unsigned char)ptry->m_iGreen, (unsigned char)ptry->m_iBlue);
-
-					// try each CW
-					for (unsigned int uiCW = 0; uiCW < CW_RANGES; uiCW++)
-					{
-						unsigned int auiPixelSelectors[PIXELS / 2];
-						ColorFloatRGBA	afrgbaDecodedPixels[PIXELS / 2];
-						float afPixelErrors[PIXELS / 2] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,
-															FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
-
-						// pre-compute decoded pixels for each selector
-						ColorFloatRGBA afrgbaSelectors[SELECTORS];
-						assert(SELECTORS == 4);
-						afrgbaSelectors[0] = (frgbaColor + s_aafCwTable[uiCW][0]).ClampRGB();
-						afrgbaSelectors[1] = (frgbaColor + s_aafCwTable[uiCW][1]).ClampRGB();
-						afrgbaSelectors[2] = (frgbaColor + s_aafCwTable[uiCW][2]).ClampRGB();
-						afrgbaSelectors[3] = (frgbaColor + s_aafCwTable[uiCW][3]).ClampRGB();
-
-						for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
-						{
-                            int srcPixelIndex = a_phalf->m_pauiPixelMapping[uiPixel];
-							//const ColorFloatRGBA *pfrgbaSourcePixel = &m_pafrgbaSource[srcPixelIndex];
-							ColorFloatRGBA frgbaDecodedPixel;
-
-							for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
-							{
-								frgbaDecodedPixel = afrgbaSelectors[uiSelector];
-
-								float fPixelError = CalcPixelError(frgbaDecodedPixel, srcPixelIndex);
-
-								if (fPixelError < afPixelErrors[uiPixel])
-								{
-									auiPixelSelectors[uiPixel] = uiSelector;
-									afrgbaDecodedPixels[uiPixel] = frgbaDecodedPixel;
-									afPixelErrors[uiPixel] = fPixelError;
-								}
-
-							}
-						}
-
-						// add up all pixel errors
-						float fCWError = 0.0f;
-						for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
-						{
-							fCWError += afPixelErrors[uiPixel];
-						}
-
-						// if best CW so far
-						if (fCWError < ptry->m_fError)
-						{
-							ptry->m_uiCW = uiCW;
-							for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
-							{
-								ptry->m_auiSelectors[uiPixel] = auiPixelSelectors[uiPixel];
-							}
-							ptry->m_fError = fCWError;
-						}
-
-					}
-
-					if (ptry->m_fError < fBestTryError)
-					{
-						a_phalf->m_ptryBest = ptry;
-						fBestTryError = ptry->m_fError;
-					}
-
-					assert(ptry->m_fError < FLT_MAX);
-
-					a_phalf->m_uiTrys++;
-				}
-			}
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try version 1 of the degenerate search
-	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
-	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
-	//		be successfull
-	//
-	void Block4x4Encoding_ETC1::TryDegenerates1(void)
-	{
-
-		TryDifferential(m_boolMostLikelyFlip, 1, -2, 0);
-		TryDifferential(m_boolMostLikelyFlip, 1, 2, 0);
-		TryDifferential(m_boolMostLikelyFlip, 1, 0, 2);
-		TryDifferential(m_boolMostLikelyFlip, 1, 0, -2);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try version 2 of the degenerate search
-	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
-	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
-	//		be successfull
-	//
-	void Block4x4Encoding_ETC1::TryDegenerates2(void)
-	{
-
-		TryDifferential(!m_boolMostLikelyFlip, 1, -2, 0);
-		TryDifferential(!m_boolMostLikelyFlip, 1, 2, 0);
-		TryDifferential(!m_boolMostLikelyFlip, 1, 0, 2);
-		TryDifferential(!m_boolMostLikelyFlip, 1, 0, -2);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try version 3 of the degenerate search
-	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
-	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
-	//		be successfull
-	//
-	void Block4x4Encoding_ETC1::TryDegenerates3(void)
-	{
-
-		TryDifferential(m_boolMostLikelyFlip, 1, -2, -2);
-		TryDifferential(m_boolMostLikelyFlip, 1, -2, 2);
-		TryDifferential(m_boolMostLikelyFlip, 1, 2, -2);
-		TryDifferential(m_boolMostLikelyFlip, 1, 2, 2);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try version 4 of the degenerate search
-	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
-	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
-	//		be successfull
-	//
-	void Block4x4Encoding_ETC1::TryDegenerates4(void)
-	{
-
-		TryDifferential(m_boolMostLikelyFlip, 1, -4, 0);
-		TryDifferential(m_boolMostLikelyFlip, 1, 4, 0);
-		TryDifferential(m_boolMostLikelyFlip, 1, 0, 4);
-		TryDifferential(m_boolMostLikelyFlip, 1, 0, -4);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// find the best selector for each pixel based on a particular basecolor and CW that have been previously set
-	// calculate the selectors for each half of the block separately
-	// set the block error as the sum of each half's error
-	//
-	void Block4x4Encoding_ETC1::CalculateSelectors()
-	{
-		if (m_boolFlip)
-		{
-			CalculateHalfOfTheSelectors(0, s_auiTopPixelMapping);
-			CalculateHalfOfTheSelectors(1, s_auiBottomPixelMapping);
-		}
-		else
-		{
-			CalculateHalfOfTheSelectors(0, s_auiLeftPixelMapping);
-			CalculateHalfOfTheSelectors(1, s_auiRightPixelMapping);
-		}
-
-		m_fError = m_fError1 + m_fError2;
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// choose best selectors for half of the block
-	// calculate the error for half of the block
-	//
-	void Block4x4Encoding_ETC1::CalculateHalfOfTheSelectors(unsigned int a_uiHalf,
-		const unsigned int *pauiPixelMapping)
-	{
-		static const bool DEBUG_PRINT = false;
-
-		ColorFloatRGBA *pfrgbaColor = a_uiHalf ? &m_frgbaColor2 : &m_frgbaColor1;
-		unsigned int *puiCW = a_uiHalf ? &m_uiCW2 : &m_uiCW1;
-
-		float *pfHalfError = a_uiHalf ? &m_fError2 : &m_fError1;
-		*pfHalfError = FLT_MAX;
-
-		// try each CW
-		for (unsigned int uiCW = 0; uiCW < CW_RANGES; uiCW++)
-		{
-			if (DEBUG_PRINT)
-			{
-                KLOGI("EtcComp", "\ncw=%u\n", uiCW);
-			}
-
-			unsigned int auiPixelSelectors[PIXELS / 2];
-			ColorFloatRGBA	afrgbaDecodedPixels[PIXELS / 2];
-			float afPixelErrors[PIXELS / 2] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
-
-			for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
-			{
-				if (DEBUG_PRINT)
-				{
-                    KLOGI("EtcComp", "\tsource [%.2f,%.2f,%.2f]\n", m_pafrgbaSource[pauiPixelMapping[uiPixel]].fR,
-						m_pafrgbaSource[pauiPixelMapping[uiPixel]].fG, m_pafrgbaSource[pauiPixelMapping[uiPixel]].fB);
-				}
-
-                int srcPixelIndex = pauiPixelMapping[uiPixel];
-                
-				//const ColorFloatRGBA *pfrgbaSourcePixel = &m_pafrgbaSource[pauiPixelMapping[uiPixel]];
-				ColorFloatRGBA frgbaDecodedPixel;
-
-				for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
-				{
-					float fDeltaRGB = s_aafCwTable[uiCW][uiSelector];
-
-					frgbaDecodedPixel = (*pfrgbaColor + fDeltaRGB).ClampRGB();
-
-					float fPixelError = CalcPixelError(frgbaDecodedPixel, srcPixelIndex);
-					
-                    bool isBelowError = false;
-					if (fPixelError < afPixelErrors[uiPixel])
-					{
-						if (DEBUG_PRINT)
-						{
-                            isBelowError = true;
-						}
-
-						auiPixelSelectors[uiPixel] = uiSelector;
-						afrgbaDecodedPixels[uiPixel] = frgbaDecodedPixel;
-						afPixelErrors[uiPixel] = fPixelError;
-					}
-
-					if (DEBUG_PRINT)
-					{
-                        KLOGI("EtcComp", "\tpixel %u, index %u [%.2f,%.2f,%.2f], error %.2f%s\n", uiPixel, uiSelector,
-                            frgbaDecodedPixel.fR,
-                            frgbaDecodedPixel.fG,
-                            frgbaDecodedPixel.fB,
-                            fPixelError,
-                             isBelowError ? " *": "");
-					}
-				}
-			}
-
-			// add up all pixel errors
-			float fCWError = 0.0f;
-			for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
-			{
-				fCWError += afPixelErrors[uiPixel];
-			}
-			if (DEBUG_PRINT)
-			{
-                KLOGI("EtcComp", "\terror %.2f\n", fCWError);
-			}
-
-			// if best CW so far
-			if (fCWError < *pfHalfError)
-			{
-				*pfHalfError = fCWError;
-				*puiCW = uiCW;
-				for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
-				{
-					m_auiSelectors[pauiPixelMapping[uiPixel]] = auiPixelSelectors[uiPixel];
-					m_afrgbaDecodedColors[pauiPixelMapping[uiPixel]] = afrgbaDecodedPixels[uiPixel];
-				}
-			}
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the encoding bits based on encoding state
-	//
-	void Block4x4Encoding_ETC1::SetEncodingBits(void)
-	{
-		assert(m_mode == MODE_ETC1);
-
-		if (m_boolDiff)
-		{
-			int iRed1 = m_frgbaColor1.IntRed(31.0f);
-			int iGreen1 = m_frgbaColor1.IntGreen(31.0f);
-			int iBlue1 = m_frgbaColor1.IntBlue(31.0f);
-
-			int iRed2 = m_frgbaColor2.IntRed(31.0f);
-			int iGreen2 = m_frgbaColor2.IntGreen(31.0f);
-			int iBlue2 = m_frgbaColor2.IntBlue(31.0f);
-
-			int iDRed2 = iRed2 - iRed1;
-			int iDGreen2 = iGreen2 - iGreen1;
-			int iDBlue2 = iBlue2 - iBlue1;
-
-			assert(iDRed2 >= -4 && iDRed2 < 4);
-			assert(iDGreen2 >= -4 && iDGreen2 < 4);
-			assert(iDBlue2 >= -4 && iDBlue2 < 4);
-
-			m_pencodingbitsRGB8->differential.red1 = (unsigned int)iRed1;
-			m_pencodingbitsRGB8->differential.green1 = (unsigned int)iGreen1;
-			m_pencodingbitsRGB8->differential.blue1 = (unsigned int)iBlue1;
-
-			m_pencodingbitsRGB8->differential.dred2 = iDRed2;
-			m_pencodingbitsRGB8->differential.dgreen2 = iDGreen2;
-			m_pencodingbitsRGB8->differential.dblue2 = iDBlue2;
-		}
-		else
-		{
-			m_pencodingbitsRGB8->individual.red1 = (unsigned int)m_frgbaColor1.IntRed(15.0f);
-			m_pencodingbitsRGB8->individual.green1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f);
-			m_pencodingbitsRGB8->individual.blue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f);
-
-			m_pencodingbitsRGB8->individual.red2 = (unsigned int)m_frgbaColor2.IntRed(15.0f);
-			m_pencodingbitsRGB8->individual.green2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f);
-			m_pencodingbitsRGB8->individual.blue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f);
-		}
-
-		m_pencodingbitsRGB8->individual.cw1 = m_uiCW1;
-		m_pencodingbitsRGB8->individual.cw2 = m_uiCW2;
-
-		SetEncodingBits_Selectors();
-
-		m_pencodingbitsRGB8->individual.diff = (unsigned int)m_boolDiff;
-		m_pencodingbitsRGB8->individual.flip = (unsigned int)m_boolFlip;
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the selectors in the encoding bits
-	//
-	void Block4x4Encoding_ETC1::SetEncodingBits_Selectors(void)
-	{
-
-		m_pencodingbitsRGB8->individual.selectors = 0;
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			unsigned int uiSelector = m_auiSelectors[uiPixel];
-
-			// set index msb
-			m_pencodingbitsRGB8->individual.selectors |= (uiSelector >> 1) << (uiPixel ^ 8);
-
-			// set index lsb
-			m_pencodingbitsRGB8->individual.selectors |= (uiSelector & 1) << ((16 + uiPixel) ^ 8);
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the decoded colors and decoded alpha based on the encoding state
-	//
-	void Block4x4Encoding_ETC1::Decode(void)
-	{
-
-		const unsigned int *pauiPixelOrder = m_boolFlip ? s_auiPixelOrderFlip1 : s_auiPixelOrderFlip0;
-
-		for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS; uiPixelOrder++)
-		{
-			ColorFloatRGBA *pfrgbaCenter = uiPixelOrder < 8 ? &m_frgbaColor1 : &m_frgbaColor2;
-			unsigned int uiCW = uiPixelOrder < 8 ? m_uiCW1 : m_uiCW2;
-
-			unsigned int uiPixel = pauiPixelOrder[uiPixelOrder];
-
-			float fDelta = s_aafCwTable[uiCW][m_auiSelectors[uiPixel]];
-			m_afrgbaDecodedColors[uiPixel] = (*pfrgbaCenter + fDelta).ClampRGB();
-			//m_afDecodedAlphas[uiPixel] = 1.0f;
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-
-} // namespace Etc
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+EtcBlock4x4Encoding_ETC1.cpp
+
+Block4x4Encoding_ETC1 is the encoder to use when targetting file format ETC1.  This encoder is also
+used for the ETC1 subset of file format RGB8, RGBA8 and RGB8A1
+
+*/
+
+#include "EtcConfig.h"
+#include "EtcBlock4x4Encoding_ETC1.h"
+
+#include "EtcBlock4x4.h"
+#include "EtcBlock4x4EncodingBits.h"
+#include "EtcDifferentialTrys.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <float.h>
+#include <limits>
+
+namespace Etc
+{
+
+	// pixel processing order if the flip bit = 0 (horizontal split)
+	const unsigned int Block4x4Encoding_ETC1::s_auiPixelOrderFlip0[PIXELS] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
+
+	// pixel processing order if the flip bit = 1 (vertical split)
+	const unsigned int Block4x4Encoding_ETC1::s_auiPixelOrderFlip1[PIXELS] = { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 };
+
+	// pixel processing order for horizontal scan (ETC normally does a vertical scan)
+	const unsigned int Block4x4Encoding_ETC1::s_auiPixelOrderHScan[PIXELS] = { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 };
+
+	// pixel indices for different block halves
+	const unsigned int Block4x4Encoding_ETC1::s_auiLeftPixelMapping[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
+	const unsigned int Block4x4Encoding_ETC1::s_auiRightPixelMapping[8] = { 8, 9, 10, 11, 12, 13, 14, 15 };
+	const unsigned int Block4x4Encoding_ETC1::s_auiTopPixelMapping[8] = { 0, 1, 4, 5, 8, 9, 12, 13 };
+	const unsigned int Block4x4Encoding_ETC1::s_auiBottomPixelMapping[8] = { 2, 3, 6, 7, 10, 11, 14, 15 };
+
+	// CW ranges that the ETC1 decoders use
+	// CW is basically a contrast for the different selector bits, since these values are offsets to the base color
+	// the first axis in the array is indexed by the CW in the encoding bits
+	// the second axis in the array is indexed by the selector bits
+	float Block4x4Encoding_ETC1::s_aafCwTable[CW_RANGES][SELECTORS] =
+	{
+		{ 2.0f / 255.0f, 8.0f / 255.0f, -2.0f / 255.0f, -8.0f / 255.0f },
+		{ 5.0f / 255.0f, 17.0f / 255.0f, -5.0f / 255.0f, -17.0f / 255.0f },
+		{ 9.0f / 255.0f, 29.0f / 255.0f, -9.0f / 255.0f, -29.0f / 255.0f },
+		{ 13.0f / 255.0f, 42.0f / 255.0f, -13.0f / 255.0f, -42.0f / 255.0f },
+		{ 18.0f / 255.0f, 60.0f / 255.0f, -18.0f / 255.0f, -60.0f / 255.0f },
+		{ 24.0f / 255.0f, 80.0f / 255.0f, -24.0f / 255.0f, -80.0f / 255.0f },
+		{ 33.0f / 255.0f, 106.0f / 255.0f, -33.0f / 255.0f, -106.0f / 255.0f },
+		{ 47.0f / 255.0f, 183.0f / 255.0f, -47.0f / 255.0f, -183.0f / 255.0f }
+	};
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	Block4x4Encoding_ETC1::Block4x4Encoding_ETC1(void)
+	{
+        InitETC1();
+	}
+
+	 Block4x4Encoding_ETC1::~Block4x4Encoding_ETC1(void) {}
+
+    void Block4x4Encoding_ETC1::InitETC1()
+    {
+        m_mode = MODE_ETC1;
+        m_boolDiff = false;
+        m_boolFlip = false;
+        m_frgbaColor1 = ColorFloatRGBA();
+        m_frgbaColor2 = ColorFloatRGBA();
+        m_uiCW1 = 0;
+        m_uiCW2 = 0;
+        for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+        {
+            m_auiSelectors[uiPixel] = 0;
+            //m_afDecodedAlphas[uiPixel] = 1.0f;
+        }
+
+        // these aren't initialized
+        m_frgbaSourceAverageLeft = ColorFloatRGBA();
+        m_frgbaSourceAverageRight = ColorFloatRGBA();
+        m_frgbaSourceAverageTop = ColorFloatRGBA();
+        m_frgbaSourceAverageBottom = ColorFloatRGBA();
+        
+        m_boolMostLikelyFlip = false;
+
+        m_fError = -1.0f;
+        m_fError1 = -1.0f;
+        m_fError2 = -1.0f;
+        m_boolSeverelyBentDifferentialColors = false;
+    }
+
+	// ----------------------------------------------------------------------------------------------------
+	// initialization prior to encoding
+	// a_pblockParent points to the block associated with this encoding
+	// a_errormetric is used to choose the best encoding
+	// a_pafrgbaSource points to a 4x4 block subset of the source image
+	// a_paucEncodingBits points to the final encoding bits
+	//
+	void Block4x4Encoding_ETC1::Encode(Block4x4 *a_pblockParent,
+												const ColorFloatRGBA *a_pafrgbaSource,
+												unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric)
+	{
+
+        // call ctor doesn't work, so call InitETC1
+        //Block4x4Encoding_ETC1();
+        InitETC1();
+        
+        Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource, a_errormetric, 0);
+
+		m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)(a_paucEncodingBits);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// initialization from the encoding bits of a previous encoding
+	// a_pblockParent points to the block associated with this encoding
+	// a_errormetric is used to choose the best encoding
+	// a_pafrgbaSource points to a 4x4 block subset of the source image
+	// a_paucEncodingBits points to the final encoding bits of a previous encoding
+	//
+	void Block4x4Encoding_ETC1::Decode(Block4x4 *a_pblockParent,
+														unsigned char *a_paucEncodingBits,
+														const ColorFloatRGBA *a_pafrgbaSource, 
+														ErrorMetric a_errormetric,
+                                                        uint16_t iterationCount)
+	{
+        // this can't hurt
+        InitETC1();
+        
+		Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource, a_errormetric, iterationCount);
+		m_fError = -1.0f;
+
+		m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)a_paucEncodingBits;
+
+		m_mode = MODE_ETC1;
+		m_boolDiff = m_pencodingbitsRGB8->individual.diff;
+		m_boolFlip = m_pencodingbitsRGB8->individual.flip;
+		if (m_boolDiff)
+		{
+			int iR2 = (int)(m_pencodingbitsRGB8->differential.red1 + m_pencodingbitsRGB8->differential.dred2);
+            int iG2 = (int)(m_pencodingbitsRGB8->differential.green1 + m_pencodingbitsRGB8->differential.dgreen2);
+            int iB2 = (int)(m_pencodingbitsRGB8->differential.blue1 + m_pencodingbitsRGB8->differential.dblue2);
+            
+            if (iR2 < 0)
+			{
+				iR2 = 0;
+			}
+			else if (iR2 > 31)
+			{
+				iR2 = 31;
+			}
+
+			if (iG2 < 0)
+			{
+				iG2 = 0;
+			}
+			else if (iG2 > 31)
+			{
+				iG2 = 31;
+			}
+
+			if (iB2 < 0)
+			{
+				iB2 = 0;
+			}
+			else if (iB2 > 31)
+			{
+				iB2 = 31;
+			}
+
+			m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB5(m_pencodingbitsRGB8->differential.red1, m_pencodingbitsRGB8->differential.green1, m_pencodingbitsRGB8->differential.blue1);
+			m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)iR2, (unsigned char)iG2, (unsigned char)iB2);
+
+		}
+		else
+		{
+			m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(m_pencodingbitsRGB8->individual.red1, m_pencodingbitsRGB8->individual.green1, m_pencodingbitsRGB8->individual.blue1);
+			m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(m_pencodingbitsRGB8->individual.red2, m_pencodingbitsRGB8->individual.green2, m_pencodingbitsRGB8->individual.blue2);
+		}
+
+		m_uiCW1 = m_pencodingbitsRGB8->individual.cw1;
+		m_uiCW2 = m_pencodingbitsRGB8->individual.cw2;
+
+		InitFromEncodingBits_Selectors();
+
+		Decode();
+
+		CalcBlockError();
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// init the selectors from a prior encoding
+	//
+	void Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors(void)
+	{
+
+		unsigned char *paucSelectors = (unsigned char *)&m_pencodingbitsRGB8->individual.selectors;
+
+		for (unsigned int iPixel = 0; iPixel < PIXELS; iPixel++)
+		{
+			unsigned int uiByteMSB = (unsigned int)(1 - (iPixel / 8));
+			unsigned int uiByteLSB = (unsigned int)(3 - (iPixel / 8));
+			unsigned int uiShift = (unsigned int)(iPixel & 7);
+
+			unsigned int uiSelectorMSB = (unsigned int)((paucSelectors[uiByteMSB] >> uiShift) & 1);
+			unsigned int uiSelectorLSB = (unsigned int)((paucSelectors[uiByteLSB] >> uiShift) & 1);
+
+			m_auiSelectors[iPixel] = (uiSelectorMSB << 1) + uiSelectorLSB;
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// perform a single encoding iteration
+	// replace the encoding if a better encoding was found
+	// subsequent iterations generally take longer for each iteration
+	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
+	//
+	void Block4x4Encoding_ETC1::PerformIteration(float a_fEffort)
+	{
+		assert(!m_boolDone);
+
+		switch (m_uiEncodingIterations)
+		{
+		case 0:
+			PerformFirstIteration();
+			break;
+
+		case 1:
+			TryDifferential(m_boolMostLikelyFlip, 1, 0, 0);
+			break;
+
+		case 2:
+			TryIndividual(m_boolMostLikelyFlip, 1);
+			if (a_fEffort <= 49.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 3:
+			TryDifferential(!m_boolMostLikelyFlip, 1, 0, 0);
+			if (a_fEffort <= 59.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 4:
+			TryIndividual(!m_boolMostLikelyFlip, 1);
+			if (a_fEffort <= 69.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 5:
+			TryDegenerates1();
+			if (a_fEffort <= 79.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 6:
+			TryDegenerates2();
+			if (a_fEffort <= 89.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 7:
+			TryDegenerates3();
+			if (a_fEffort <= 99.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 8:
+			TryDegenerates4();
+			m_boolDone = true;
+			break;
+
+		default:
+			assert(0);
+			break;
+		}
+
+		m_uiEncodingIterations++;
+		SetDoneIfPerfect();
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// find best initial encoding to ensure block has a valid encoding
+	//
+	void Block4x4Encoding_ETC1::PerformFirstIteration(void)
+	{
+		CalculateMostLikelyFlip();
+
+		m_fError = FLT_MAX;
+
+		TryDifferential(m_boolMostLikelyFlip, 0, 0, 0);
+		SetDoneIfPerfect();
+		if (m_boolDone)
+		{
+			return;
+		}
+
+		TryIndividual(m_boolMostLikelyFlip, 0);
+		SetDoneIfPerfect();
+		if (m_boolDone)
+		{
+			return;
+		}
+		TryDifferential(!m_boolMostLikelyFlip, 0, 0, 0);
+		SetDoneIfPerfect();
+		if (m_boolDone)
+		{
+			return;
+		}
+		TryIndividual(!m_boolMostLikelyFlip, 0);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// algorithm:
+	// create a source average color for the Left, Right, Top and Bottom halves using the 8 pixels in each half
+	// note: the "gray line" is the line of equal delta RGB that goes thru the average color
+	// for each half:
+	//		see how close each of the 8 pixels are to the "gray line" that goes thru the source average color
+	//		create an error value that is the sum of the distances from the gray line
+	// h_error is the sum of Left and Right errors
+	// v_error is the sum of Top and Bottom errors
+	//
+	void Block4x4Encoding_ETC1::CalculateMostLikelyFlip(void)
+	{
+		static const bool DEBUG_PRINT = false;
+
+		CalculateSourceAverages();
+
+		float fLeftGrayErrorSum = 0.0f;
+		float fRightGrayErrorSum = 0.0f;
+		float fTopGrayErrorSum = 0.0f;
+		float fBottomGrayErrorSum = 0.0f;
+
+		for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
+		{
+			const ColorFloatRGBA *pfrgbaLeft = &m_pafrgbaSource[uiPixel];
+            const ColorFloatRGBA *pfrgbaRight = &m_pafrgbaSource[uiPixel + 8];
+            const ColorFloatRGBA *pfrgbaTop = &m_pafrgbaSource[s_auiTopPixelMapping[uiPixel]];
+            const ColorFloatRGBA *pfrgbaBottom = &m_pafrgbaSource[s_auiBottomPixelMapping[uiPixel]];
+
+			float fLeftGrayError = CalcGrayDistance2(*pfrgbaLeft, m_frgbaSourceAverageLeft);
+			float fRightGrayError = CalcGrayDistance2(*pfrgbaRight, m_frgbaSourceAverageRight);
+			float fTopGrayError = CalcGrayDistance2(*pfrgbaTop, m_frgbaSourceAverageTop);
+			float fBottomGrayError = CalcGrayDistance2(*pfrgbaBottom, m_frgbaSourceAverageBottom);
+
+			fLeftGrayErrorSum += fLeftGrayError;
+			fRightGrayErrorSum += fRightGrayError;
+			fTopGrayErrorSum += fTopGrayError;
+			fBottomGrayErrorSum += fBottomGrayError;
+		}
+
+		if (DEBUG_PRINT)
+		{
+            KLOGI("EtcComp", "\n%.2f %.2f\n", fLeftGrayErrorSum + fRightGrayErrorSum, fTopGrayErrorSum + fBottomGrayErrorSum);
+		}
+
+		m_boolMostLikelyFlip = (fTopGrayErrorSum + fBottomGrayErrorSum) < (fLeftGrayErrorSum + fRightGrayErrorSum);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// calculate source pixel averages for each 2x2 quadrant in a 4x4 block
+	// these are used to determine the averages for each of the 4 different halves (left, right, top, bottom)
+	// ignore pixels that have alpha == NAN (these are border pixels outside of the source image)
+	// weight the averages based on a pixel's alpha
+	//
+	void Block4x4Encoding_ETC1::CalculateSourceAverages(void)
+	{
+		static const bool DEBUG_PRINT = false;
+
+//		bool boolRGBX = m_pblockParent->GetImageSource()->GetErrorMetric() == ErrorMetric::RGBX;
+//
+//		if (m_pblockParent->GetSourceAlphaMix() == Block4x4::SourceAlphaMix::OPAQUE || boolRGBX)
+		{
+			ColorFloatRGBA frgbaSumUL = m_pafrgbaSource[0] + m_pafrgbaSource[1] + m_pafrgbaSource[4] + m_pafrgbaSource[5];
+			ColorFloatRGBA frgbaSumLL = m_pafrgbaSource[2] + m_pafrgbaSource[3] + m_pafrgbaSource[6] + m_pafrgbaSource[7];
+			ColorFloatRGBA frgbaSumUR = m_pafrgbaSource[8] + m_pafrgbaSource[9] + m_pafrgbaSource[12] + m_pafrgbaSource[13];
+			ColorFloatRGBA frgbaSumLR = m_pafrgbaSource[10] + m_pafrgbaSource[11] + m_pafrgbaSource[14] + m_pafrgbaSource[15];
+
+            // average value of 8 pixels for each of the 4 corners
+			m_frgbaSourceAverageLeft = (frgbaSumUL + frgbaSumLL) * 0.125f;
+			m_frgbaSourceAverageRight = (frgbaSumUR + frgbaSumLR) * 0.125f;
+			m_frgbaSourceAverageTop = (frgbaSumUL + frgbaSumUR) * 0.125f;
+			m_frgbaSourceAverageBottom = (frgbaSumLL + frgbaSumLR) * 0.125f;
+            
+            // * doesn't multiply fA above, it calls ScaleRGB, so a = 8
+            m_frgbaSourceAverageLeft.fA = 1.0f;
+            m_frgbaSourceAverageRight.fA = 1.0f;
+            m_frgbaSourceAverageTop.fA = 1.0f;
+            m_frgbaSourceAverageBottom.fA = 1.0f;
+		}
+//		else
+//		{
+//			float afSourceAlpha[PIXELS];
+//
+//			// treat alpha NAN as 0.0f
+//			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+//			{
+//				afSourceAlpha[uiPixel] = isnan(m_pafrgbaSource[uiPixel].fA) ?
+//																		0.0f :
+//																		m_pafrgbaSource[uiPixel].fA;
+//			}
+//
+//			ColorFloatRGBA afrgbaAlphaWeightedSource[PIXELS];
+//			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+//			{
+//				afrgbaAlphaWeightedSource[uiPixel] = m_pafrgbaSource[uiPixel] * afSourceAlpha[uiPixel];
+//			}
+//
+//			ColorFloatRGBA frgbaSumUL = afrgbaAlphaWeightedSource[0] +
+//										afrgbaAlphaWeightedSource[1] +
+//										afrgbaAlphaWeightedSource[4] +
+//										afrgbaAlphaWeightedSource[5];
+//
+//			ColorFloatRGBA frgbaSumLL = afrgbaAlphaWeightedSource[2] +
+//										afrgbaAlphaWeightedSource[3] +
+//										afrgbaAlphaWeightedSource[6] +
+//										afrgbaAlphaWeightedSource[7];
+//
+//			ColorFloatRGBA frgbaSumUR = afrgbaAlphaWeightedSource[8] +
+//										afrgbaAlphaWeightedSource[9] +
+//										afrgbaAlphaWeightedSource[12] +
+//										afrgbaAlphaWeightedSource[13];
+//
+//			ColorFloatRGBA frgbaSumLR = afrgbaAlphaWeightedSource[10] +
+//										afrgbaAlphaWeightedSource[11] +
+//										afrgbaAlphaWeightedSource[14] +
+//										afrgbaAlphaWeightedSource[15];
+//
+//			float fWeightSumUL = afSourceAlpha[0] +
+//									afSourceAlpha[1] +
+//									afSourceAlpha[4] +
+//									afSourceAlpha[5];
+//
+//			float fWeightSumLL = afSourceAlpha[2] +
+//									afSourceAlpha[3] +
+//									afSourceAlpha[6] +
+//									afSourceAlpha[7];
+//
+//			float fWeightSumUR = afSourceAlpha[8] +
+//									afSourceAlpha[9] +
+//									afSourceAlpha[12] +
+//									afSourceAlpha[13];
+//
+//			float fWeightSumLR = afSourceAlpha[10] +
+//									afSourceAlpha[11] +
+//									afSourceAlpha[14] +
+//									afSourceAlpha[15];
+//
+//			ColorFloatRGBA frgbaSumLeft = frgbaSumUL + frgbaSumLL;
+//			ColorFloatRGBA frgbaSumRight = frgbaSumUR + frgbaSumLR;
+//			ColorFloatRGBA frgbaSumTop = frgbaSumUL + frgbaSumUR;
+//			ColorFloatRGBA frgbaSumBottom = frgbaSumLL + frgbaSumLR;
+//
+//			float fWeightSumLeft = fWeightSumUL + fWeightSumLL;
+//			float fWeightSumRight = fWeightSumUR + fWeightSumLR;
+//			float fWeightSumTop = fWeightSumUL + fWeightSumUR;
+//			float fWeightSumBottom = fWeightSumLL + fWeightSumLR;
+//
+//			// check to see if there is at least 1 pixel with  non-zero alpha
+//			// completely transparent block should not make it to this code
+//			assert((fWeightSumLeft + fWeightSumRight) > 0.0f);
+//			assert((fWeightSumTop + fWeightSumBottom) > 0.0f);
+//
+//			if (fWeightSumLeft > 0.0f)
+//			{
+//				m_frgbaSourceAverageLeft = frgbaSumLeft * (1.0f/fWeightSumLeft);
+//			}
+//			if (fWeightSumRight > 0.0f)
+//			{
+//				m_frgbaSourceAverageRight = frgbaSumRight * (1.0f/fWeightSumRight);
+//			}
+//			if (fWeightSumTop > 0.0f)
+//			{
+//				m_frgbaSourceAverageTop = frgbaSumTop * (1.0f/fWeightSumTop);
+//			}
+//			if (fWeightSumBottom > 0.0f)
+//			{
+//				m_frgbaSourceAverageBottom = frgbaSumBottom * (1.0f/fWeightSumBottom);
+//			}
+//
+//			if (fWeightSumLeft == 0.0f)
+//			{
+//				assert(fWeightSumRight > 0.0f);
+//				m_frgbaSourceAverageLeft = m_frgbaSourceAverageRight;
+//			}
+//			if (fWeightSumRight == 0.0f)
+//			{
+//				assert(fWeightSumLeft > 0.0f);
+//				m_frgbaSourceAverageRight = m_frgbaSourceAverageLeft;
+//			}
+//			if (fWeightSumTop == 0.0f)
+//			{
+//				assert(fWeightSumBottom > 0.0f);
+//				m_frgbaSourceAverageTop = m_frgbaSourceAverageBottom;
+//			}
+//			if (fWeightSumBottom == 0.0f)
+//			{
+//				assert(fWeightSumTop > 0.0f);
+//				m_frgbaSourceAverageBottom = m_frgbaSourceAverageTop;
+//			}
+//		}
+
+		
+
+		if (DEBUG_PRINT)
+		{
+            KLOGI("EtcComp", "\ntarget: [%.2f,%.2f,%.2f] [%.2f,%.2f,%.2f] [%.2f,%.2f,%.2f] [%.2f,%.2f,%.2f]\n",
+				m_frgbaSourceAverageLeft.fR, m_frgbaSourceAverageLeft.fG, m_frgbaSourceAverageLeft.fB,
+				m_frgbaSourceAverageRight.fR, m_frgbaSourceAverageRight.fG, m_frgbaSourceAverageRight.fB,
+				m_frgbaSourceAverageTop.fR, m_frgbaSourceAverageTop.fG, m_frgbaSourceAverageTop.fB,
+				m_frgbaSourceAverageBottom.fR, m_frgbaSourceAverageBottom.fG, m_frgbaSourceAverageBottom.fB);
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try an ETC1 differential mode encoding
+	// use a_boolFlip to set the encoding F bit
+	// use a_uiRadius to alter basecolor components in the range[-a_uiRadius:a_uiRadius]
+	// use a_iGrayOffset1 and a_iGrayOffset2 to offset the basecolor to search for degenerate encodings
+	// replace the encoding if the encoding error is less than previous encoding
+	//
+	void Block4x4Encoding_ETC1::TryDifferential(bool a_boolFlip, unsigned int a_uiRadius,
+												int a_iGrayOffset1, int a_iGrayOffset2)
+	{
+
+		ColorFloatRGBA frgbaColor1;
+		ColorFloatRGBA frgbaColor2;
+
+		const unsigned int *pauiPixelMapping1;
+		const unsigned int *pauiPixelMapping2;
+
+		if (a_boolFlip)
+		{
+			frgbaColor1 = m_frgbaSourceAverageTop;
+			frgbaColor2 = m_frgbaSourceAverageBottom;
+
+			pauiPixelMapping1 = s_auiTopPixelMapping;
+			pauiPixelMapping2 = s_auiBottomPixelMapping;
+		}
+		else
+		{
+			frgbaColor1 = m_frgbaSourceAverageLeft;
+			frgbaColor2 = m_frgbaSourceAverageRight;
+
+			pauiPixelMapping1 = s_auiLeftPixelMapping;
+			pauiPixelMapping2 = s_auiRightPixelMapping;
+		}
+
+		DifferentialTrys trys(frgbaColor1, frgbaColor2, pauiPixelMapping1, pauiPixelMapping2, 
+								a_uiRadius, a_iGrayOffset1, a_iGrayOffset2);
+
+		Block4x4Encoding_ETC1 encodingTry = *this;
+		encodingTry.m_boolFlip = a_boolFlip;
+
+		encodingTry.TryDifferentialHalf(&trys.m_half1);
+		encodingTry.TryDifferentialHalf(&trys.m_half2);
+
+		// find best halves that are within differential range
+		DifferentialTrys::Try *ptryBest1 = nullptr;
+		DifferentialTrys::Try *ptryBest2 = nullptr;
+		encodingTry.m_fError = FLT_MAX;
+
+		// see if the best of each half are in differential range
+		int iDRed = trys.m_half2.m_ptryBest->m_iRed - trys.m_half1.m_ptryBest->m_iRed;
+		int iDGreen = trys.m_half2.m_ptryBest->m_iGreen - trys.m_half1.m_ptryBest->m_iGreen;
+		int iDBlue = trys.m_half2.m_ptryBest->m_iBlue - trys.m_half1.m_ptryBest->m_iBlue;
+		if (iDRed >= -4 && iDRed <= 3 && iDGreen >= -4 && iDGreen <= 3 && iDBlue >= -4 && iDBlue <= 3)
+		{
+			ptryBest1 = trys.m_half1.m_ptryBest;
+			ptryBest2 = trys.m_half2.m_ptryBest;
+			encodingTry.m_fError = trys.m_half1.m_ptryBest->m_fError + trys.m_half2.m_ptryBest->m_fError;
+		}
+		else
+		{
+			// else, find the next best halves that are in differential range
+			for (DifferentialTrys::Try *ptry1 = &trys.m_half1.m_atry[0];
+			ptry1 < &trys.m_half1.m_atry[trys.m_half1.m_uiTrys];
+				ptry1++)
+			{
+				for (DifferentialTrys::Try *ptry2 = &trys.m_half2.m_atry[0];
+				ptry2 < &trys.m_half2.m_atry[trys.m_half2.m_uiTrys];
+					ptry2++)
+				{
+					iDRed = ptry2->m_iRed - ptry1->m_iRed;
+					bool boolValidRedDelta = iDRed <= 3 && iDRed >= -4;
+					iDGreen = ptry2->m_iGreen - ptry1->m_iGreen;
+					bool boolValidGreenDelta = iDGreen <= 3 && iDGreen >= -4;
+					iDBlue = ptry2->m_iBlue - ptry1->m_iBlue;
+					bool boolValidBlueDelta = iDBlue <= 3 && iDBlue >= -4;
+
+					if (boolValidRedDelta && boolValidGreenDelta && boolValidBlueDelta)
+					{
+						float fError = ptry1->m_fError + ptry2->m_fError;
+
+						if (fError < encodingTry.m_fError)
+						{
+							encodingTry.m_fError = fError;
+
+							ptryBest1 = ptry1;
+							ptryBest2 = ptry2;
+						}
+					}
+
+				}
+			}
+			assert(encodingTry.m_fError < FLT_MAX);
+			assert(ptryBest1 != nullptr);
+			assert(ptryBest2 != nullptr);
+		}
+
+		if (encodingTry.m_fError < m_fError)
+		{
+			m_mode = MODE_ETC1;
+			m_boolDiff = true;
+			m_boolFlip = encodingTry.m_boolFlip;
+			m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)ptryBest1->m_iRed, (unsigned char)ptryBest1->m_iGreen, (unsigned char)ptryBest1->m_iBlue);
+			m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)ptryBest2->m_iRed, (unsigned char)ptryBest2->m_iGreen, (unsigned char)ptryBest2->m_iBlue);
+			m_uiCW1 = ptryBest1->m_uiCW;
+			m_uiCW2 = ptryBest2->m_uiCW;
+
+			for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS / 2; uiPixelOrder++)
+			{
+				unsigned int uiPixel1 = pauiPixelMapping1[uiPixelOrder];
+				unsigned int uiPixel2 = pauiPixelMapping2[uiPixelOrder];
+
+				unsigned int uiSelector1 = ptryBest1->m_auiSelectors[uiPixelOrder];
+				unsigned int uiSelector2 = ptryBest2->m_auiSelectors[uiPixelOrder];
+
+				m_auiSelectors[uiPixel1] = uiSelector1;
+				m_auiSelectors[uiPixel2] = ptryBest2->m_auiSelectors[uiPixelOrder];
+
+				float fDeltaRGB1 = s_aafCwTable[m_uiCW1][uiSelector1];
+				float fDeltaRGB2 = s_aafCwTable[m_uiCW2][uiSelector2];
+
+				m_afrgbaDecodedColors[uiPixel1] = (m_frgbaColor1 + fDeltaRGB1).ClampRGB();
+				m_afrgbaDecodedColors[uiPixel2] = (m_frgbaColor2 + fDeltaRGB2).ClampRGB();
+			}
+
+			m_fError1 = ptryBest1->m_fError;
+			m_fError2 = ptryBest2->m_fError;
+			m_boolSeverelyBentDifferentialColors = trys.m_boolSeverelyBentColors;
+			m_fError = m_fError1 + m_fError2;
+
+			// sanity check
+			{
+				int iRed1 = m_frgbaColor1.IntRed(31.0f);
+				int iGreen1 = m_frgbaColor1.IntGreen(31.0f);
+				int iBlue1 = m_frgbaColor1.IntBlue(31.0f);
+
+				int iRed2 = m_frgbaColor2.IntRed(31.0f);
+				int iGreen2 = m_frgbaColor2.IntGreen(31.0f);
+				int iBlue2 = m_frgbaColor2.IntBlue(31.0f);
+
+				iDRed = iRed2 - iRed1;
+				iDGreen = iGreen2 - iGreen1;
+				iDBlue = iBlue2 - iBlue1;
+
+				assert(iDRed >= -4 && iDRed < 4);
+				assert(iDGreen >= -4 && iDGreen < 4);
+				assert(iDBlue >= -4 && iDBlue < 4);
+			}
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try an ETC1 differential mode encoding for a half of a 4x4 block
+	// vary the basecolor components using a radius
+	//
+	void Block4x4Encoding_ETC1::TryDifferentialHalf(DifferentialTrys::Half *a_phalf)
+	{
+
+		a_phalf->m_ptryBest = nullptr;
+		float fBestTryError = FLT_MAX;
+
+		a_phalf->m_uiTrys = 0;
+        
+        int radius = (int)a_phalf->m_uiRadius;
+        int radiusGB = radius;
+        
+        // Only iterate one color on all grayscale, otherwise this picks a red color when
+        // encoding grayscale images, since it stops on an early iteration of red.
+        
+        // TODO: Why is grayscale image stopping on a early red radius iteration?
+        // Maybe error equal, but this doesn't prefer 0 radius result on equality.
+        // Can happen if metric isn't gray, so fix this.
+        
+        bool isGray = m_errormetric == GRAY || !m_pblockParent->HasColorPixels();
+        if (isGray)
+        {
+            // drop out green/blue iteration
+            radiusGB = 0;
+        }
+        
+		for (int iRed = a_phalf->m_iRed - radius;
+				iRed <= a_phalf->m_iRed + radius;
+				iRed++)
+		{
+			assert(iRed >= 0 && iRed <= 31);
+
+			for (int iGreen = a_phalf->m_iGreen - radiusGB;
+					iGreen <= a_phalf->m_iGreen + radiusGB;
+					iGreen++)
+			{
+				assert(iGreen >= 0 && iGreen <= 31);
+
+				for (int iBlue = a_phalf->m_iBlue - radiusGB;
+						iBlue <= a_phalf->m_iBlue + radiusGB;
+						iBlue++)
+				{
+					assert(iBlue >= 0 && iBlue <= 31);
+
+					DifferentialTrys::Try *ptry = &a_phalf->m_atry[a_phalf->m_uiTrys];
+					assert(ptry < &a_phalf->m_atry[DifferentialTrys::Half::MAX_TRYS]);
+
+					if (isGray)
+                    {
+                        ptry->m_iRed = iRed;
+                        ptry->m_iGreen = iRed;
+                        ptry->m_iBlue = iRed;
+                    }
+                    else
+                    {
+                        ptry->m_iRed = iRed;
+                        ptry->m_iGreen = iGreen;
+                        ptry->m_iBlue = iBlue;
+                    }
+                    
+					ptry->m_fError = FLT_MAX;
+					ColorFloatRGBA frgbaColor = ColorFloatRGBA::ConvertFromRGB5((unsigned char)ptry->m_iRed, (unsigned char)ptry->m_iGreen, (unsigned char)ptry->m_iBlue);
+
+					// try each CW
+					for (unsigned int uiCW = 0; uiCW < CW_RANGES; uiCW++)
+					{
+						unsigned int auiPixelSelectors[PIXELS / 2];
+						ColorFloatRGBA	afrgbaDecodedPixels[PIXELS / 2];
+						float afPixelErrors[PIXELS / 2] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, 
+															FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
+
+						// pre-compute decoded pixels for each selector
+						ColorFloatRGBA afrgbaSelectors[SELECTORS];
+						assert(SELECTORS == 4);
+						afrgbaSelectors[0] = (frgbaColor + s_aafCwTable[uiCW][0]).ClampRGB();
+						afrgbaSelectors[1] = (frgbaColor + s_aafCwTable[uiCW][1]).ClampRGB();
+						afrgbaSelectors[2] = (frgbaColor + s_aafCwTable[uiCW][2]).ClampRGB();
+						afrgbaSelectors[3] = (frgbaColor + s_aafCwTable[uiCW][3]).ClampRGB();
+
+						for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
+						{
+                            int srcPixelIndex = a_phalf->m_pauiPixelMapping[uiPixel];
+                            
+							ColorFloatRGBA frgbaDecodedPixel;
+
+							for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
+							{
+								frgbaDecodedPixel = afrgbaSelectors[uiSelector];
+
+								float fPixelError = CalcPixelError(frgbaDecodedPixel, srcPixelIndex);
+
+								if (fPixelError < afPixelErrors[uiPixel])
+								{
+									auiPixelSelectors[uiPixel] = uiSelector;
+									afrgbaDecodedPixels[uiPixel] = frgbaDecodedPixel;
+									afPixelErrors[uiPixel] = fPixelError;
+								}
+
+							}
+						}
+
+						// add up all pixel errors
+						float fCWError = 0.0f;
+						for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
+						{	
+							fCWError += afPixelErrors[uiPixel];
+						}
+
+						// if best CW so far
+						if (fCWError < ptry->m_fError)
+						{
+							ptry->m_uiCW = uiCW;
+							for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
+							{
+								ptry->m_auiSelectors[uiPixel] = auiPixelSelectors[uiPixel];
+							}
+							ptry->m_fError = fCWError;
+						}
+
+					}
+
+					if (ptry->m_fError < fBestTryError)
+					{
+						a_phalf->m_ptryBest = ptry;
+						fBestTryError = ptry->m_fError;
+					}
+
+					assert(ptry->m_fError < FLT_MAX);
+
+					a_phalf->m_uiTrys++;
+				}
+			}
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try an ETC1 individual mode encoding
+	// use a_boolFlip to set the encoding F bit
+	// use a_uiRadius to alter basecolor components in the range[-a_uiRadius:a_uiRadius]
+	// replace the encoding if the encoding error is less than previous encoding
+	//
+	void Block4x4Encoding_ETC1::TryIndividual(bool a_boolFlip, unsigned int a_uiRadius)
+	{
+
+		ColorFloatRGBA frgbaColor1;
+		ColorFloatRGBA frgbaColor2;
+
+		const unsigned int *pauiPixelMapping1;
+		const unsigned int *pauiPixelMapping2;
+
+		if (a_boolFlip)
+		{
+			frgbaColor1 = m_frgbaSourceAverageTop;
+			frgbaColor2 = m_frgbaSourceAverageBottom;
+
+			pauiPixelMapping1 = s_auiTopPixelMapping;
+			pauiPixelMapping2 = s_auiBottomPixelMapping;
+		}
+		else
+		{
+			frgbaColor1 = m_frgbaSourceAverageLeft;
+			frgbaColor2 = m_frgbaSourceAverageRight;
+
+			pauiPixelMapping1 = s_auiLeftPixelMapping;
+			pauiPixelMapping2 = s_auiRightPixelMapping;
+		}
+
+		IndividualTrys trys(frgbaColor1, frgbaColor2, pauiPixelMapping1, pauiPixelMapping2, a_uiRadius);
+
+		Block4x4Encoding_ETC1 encodingTry = *this;
+		encodingTry.m_boolFlip = a_boolFlip;
+
+		encodingTry.TryIndividualHalf(&trys.m_half1);
+		encodingTry.TryIndividualHalf(&trys.m_half2);
+
+		// use the best of each half
+		IndividualTrys::Try *ptryBest1 = trys.m_half1.m_ptryBest;
+		IndividualTrys::Try *ptryBest2 = trys.m_half2.m_ptryBest;
+		encodingTry.m_fError = trys.m_half1.m_ptryBest->m_fError + trys.m_half2.m_ptryBest->m_fError;
+
+		if (encodingTry.m_fError < m_fError)
+		{
+			m_mode = MODE_ETC1;
+			m_boolDiff = false;
+			m_boolFlip = encodingTry.m_boolFlip;
+			m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)ptryBest1->m_iRed, (unsigned char)ptryBest1->m_iGreen, (unsigned char)ptryBest1->m_iBlue);
+			m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)ptryBest2->m_iRed, (unsigned char)ptryBest2->m_iGreen, (unsigned char)ptryBest2->m_iBlue);
+			m_uiCW1 = ptryBest1->m_uiCW;
+			m_uiCW2 = ptryBest2->m_uiCW;
+
+			for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS / 2; uiPixelOrder++)
+			{
+				unsigned int uiPixel1 = pauiPixelMapping1[uiPixelOrder];
+				unsigned int uiPixel2 = pauiPixelMapping2[uiPixelOrder];
+
+				unsigned int uiSelector1 = ptryBest1->m_auiSelectors[uiPixelOrder];
+				unsigned int uiSelector2 = ptryBest2->m_auiSelectors[uiPixelOrder];
+
+				m_auiSelectors[uiPixel1] = uiSelector1;
+				m_auiSelectors[uiPixel2] = ptryBest2->m_auiSelectors[uiPixelOrder];
+
+				float fDeltaRGB1 = s_aafCwTable[m_uiCW1][uiSelector1];
+				float fDeltaRGB2 = s_aafCwTable[m_uiCW2][uiSelector2];
+
+				m_afrgbaDecodedColors[uiPixel1] = (m_frgbaColor1 + fDeltaRGB1).ClampRGB();
+				m_afrgbaDecodedColors[uiPixel2] = (m_frgbaColor2 + fDeltaRGB2).ClampRGB();
+			}
+
+			m_fError1 = ptryBest1->m_fError;
+			m_fError2 = ptryBest2->m_fError;
+			m_fError = m_fError1 + m_fError2;
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try an ETC1 differential mode encoding for a half of a 4x4 block
+	// vary the basecolor components using a radius
+	//
+	void Block4x4Encoding_ETC1::TryIndividualHalf(IndividualTrys::Half *a_phalf)
+	{
+
+		a_phalf->m_ptryBest = nullptr;
+		float fBestTryError = FLT_MAX;
+
+		a_phalf->m_uiTrys = 0;
+        
+        int radius = (int)a_phalf->m_uiRadius;
+        int radiusGB = radius;
+        
+        // only iterate one color on grayscale
+        // Note: this won't work for color images with gray blocks
+        bool isGray = m_errormetric == GRAY || !m_pblockParent->HasColorPixels();
+        if (isGray)
+        {
+            radiusGB = 0;
+        }
+        
+		for (int iRed = a_phalf->m_iRed - radius;
+			iRed <= a_phalf->m_iRed + radius;
+			iRed++)
+		{
+			assert(iRed >= 0 && iRed <= 15);
+
+			for (int iGreen = a_phalf->m_iGreen - radiusGB;
+				iGreen <= a_phalf->m_iGreen + radiusGB;
+				iGreen++)
+			{
+				assert(iGreen >= 0 && iGreen <= 15);
+
+				for (int iBlue = a_phalf->m_iBlue - radiusGB;
+					iBlue <= a_phalf->m_iBlue + radiusGB;
+					iBlue++)
+				{
+					assert(iBlue >= 0 && iBlue <= 15);
+
+					IndividualTrys::Try *ptry = &a_phalf->m_atry[a_phalf->m_uiTrys];
+					assert(ptry < &a_phalf->m_atry[IndividualTrys::Half::MAX_TRYS]);
+
+					if (isGray)
+                    {
+                        ptry->m_iRed = iRed;
+                        ptry->m_iGreen = iRed;
+                        ptry->m_iBlue = iRed;
+                    }
+                    else
+                    {
+                        ptry->m_iRed = iRed;
+                        ptry->m_iGreen = iGreen;
+                        ptry->m_iBlue = iBlue;
+                    }
+                    
+					ptry->m_fError = FLT_MAX;
+					ColorFloatRGBA frgbaColor = ColorFloatRGBA::ConvertFromRGB4((unsigned char)ptry->m_iRed, (unsigned char)ptry->m_iGreen, (unsigned char)ptry->m_iBlue);
+
+					// try each CW
+					for (unsigned int uiCW = 0; uiCW < CW_RANGES; uiCW++)
+					{
+						unsigned int auiPixelSelectors[PIXELS / 2];
+						ColorFloatRGBA	afrgbaDecodedPixels[PIXELS / 2];
+						float afPixelErrors[PIXELS / 2] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,
+															FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
+
+						// pre-compute decoded pixels for each selector
+						ColorFloatRGBA afrgbaSelectors[SELECTORS];
+						assert(SELECTORS == 4);
+						afrgbaSelectors[0] = (frgbaColor + s_aafCwTable[uiCW][0]).ClampRGB();
+						afrgbaSelectors[1] = (frgbaColor + s_aafCwTable[uiCW][1]).ClampRGB();
+						afrgbaSelectors[2] = (frgbaColor + s_aafCwTable[uiCW][2]).ClampRGB();
+						afrgbaSelectors[3] = (frgbaColor + s_aafCwTable[uiCW][3]).ClampRGB();
+
+						for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
+						{
+                            int srcPixelIndex = a_phalf->m_pauiPixelMapping[uiPixel];
+							//const ColorFloatRGBA *pfrgbaSourcePixel = &m_pafrgbaSource[srcPixelIndex];
+							ColorFloatRGBA frgbaDecodedPixel;
+
+							for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
+							{
+								frgbaDecodedPixel = afrgbaSelectors[uiSelector];
+
+								float fPixelError = CalcPixelError(frgbaDecodedPixel, srcPixelIndex);
+
+								if (fPixelError < afPixelErrors[uiPixel])
+								{
+									auiPixelSelectors[uiPixel] = uiSelector;
+									afrgbaDecodedPixels[uiPixel] = frgbaDecodedPixel;
+									afPixelErrors[uiPixel] = fPixelError;
+								}
+
+							}
+						}
+
+						// add up all pixel errors
+						float fCWError = 0.0f;
+						for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
+						{
+							fCWError += afPixelErrors[uiPixel];
+						}
+
+						// if best CW so far
+						if (fCWError < ptry->m_fError)
+						{
+							ptry->m_uiCW = uiCW;
+							for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
+							{
+								ptry->m_auiSelectors[uiPixel] = auiPixelSelectors[uiPixel];
+							}
+							ptry->m_fError = fCWError;
+						}
+
+					}
+
+					if (ptry->m_fError < fBestTryError)
+					{
+						a_phalf->m_ptryBest = ptry;
+						fBestTryError = ptry->m_fError;
+					}
+
+					assert(ptry->m_fError < FLT_MAX);
+
+					a_phalf->m_uiTrys++;
+				}
+			}
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try version 1 of the degenerate search
+	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
+	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
+	//		be successfull
+	//
+	void Block4x4Encoding_ETC1::TryDegenerates1(void)
+	{
+
+		TryDifferential(m_boolMostLikelyFlip, 1, -2, 0);
+		TryDifferential(m_boolMostLikelyFlip, 1, 2, 0);
+		TryDifferential(m_boolMostLikelyFlip, 1, 0, 2);
+		TryDifferential(m_boolMostLikelyFlip, 1, 0, -2);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try version 2 of the degenerate search
+	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
+	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
+	//		be successfull
+	//
+	void Block4x4Encoding_ETC1::TryDegenerates2(void)
+	{
+
+		TryDifferential(!m_boolMostLikelyFlip, 1, -2, 0);
+		TryDifferential(!m_boolMostLikelyFlip, 1, 2, 0);
+		TryDifferential(!m_boolMostLikelyFlip, 1, 0, 2);
+		TryDifferential(!m_boolMostLikelyFlip, 1, 0, -2);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try version 3 of the degenerate search
+	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
+	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
+	//		be successfull
+	//
+	void Block4x4Encoding_ETC1::TryDegenerates3(void)
+	{
+
+		TryDifferential(m_boolMostLikelyFlip, 1, -2, -2);
+		TryDifferential(m_boolMostLikelyFlip, 1, -2, 2);
+		TryDifferential(m_boolMostLikelyFlip, 1, 2, -2);
+		TryDifferential(m_boolMostLikelyFlip, 1, 2, 2);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try version 4 of the degenerate search
+	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
+	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
+	//		be successfull
+	//
+	void Block4x4Encoding_ETC1::TryDegenerates4(void)
+	{
+
+		TryDifferential(m_boolMostLikelyFlip, 1, -4, 0);
+		TryDifferential(m_boolMostLikelyFlip, 1, 4, 0);
+		TryDifferential(m_boolMostLikelyFlip, 1, 0, 4);
+		TryDifferential(m_boolMostLikelyFlip, 1, 0, -4);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// find the best selector for each pixel based on a particular basecolor and CW that have been previously set
+	// calculate the selectors for each half of the block separately
+	// set the block error as the sum of each half's error
+	//
+	void Block4x4Encoding_ETC1::CalculateSelectors()
+	{
+		if (m_boolFlip)
+		{
+			CalculateHalfOfTheSelectors(0, s_auiTopPixelMapping);
+			CalculateHalfOfTheSelectors(1, s_auiBottomPixelMapping);
+		}
+		else
+		{
+			CalculateHalfOfTheSelectors(0, s_auiLeftPixelMapping);
+			CalculateHalfOfTheSelectors(1, s_auiRightPixelMapping);
+		}
+
+		m_fError = m_fError1 + m_fError2;
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// choose best selectors for half of the block
+	// calculate the error for half of the block
+	//
+	void Block4x4Encoding_ETC1::CalculateHalfOfTheSelectors(unsigned int a_uiHalf,
+		const unsigned int *pauiPixelMapping)
+	{
+		static const bool DEBUG_PRINT = false;
+
+		ColorFloatRGBA *pfrgbaColor = a_uiHalf ? &m_frgbaColor2 : &m_frgbaColor1;
+		unsigned int *puiCW = a_uiHalf ? &m_uiCW2 : &m_uiCW1;
+
+		float *pfHalfError = a_uiHalf ? &m_fError2 : &m_fError1;
+		*pfHalfError = FLT_MAX;
+
+		// try each CW
+		for (unsigned int uiCW = 0; uiCW < CW_RANGES; uiCW++)
+		{
+			if (DEBUG_PRINT)
+			{
+                KLOGI("EtcComp", "\ncw=%u\n", uiCW);
+			}
+
+			unsigned int auiPixelSelectors[PIXELS / 2];
+			ColorFloatRGBA	afrgbaDecodedPixels[PIXELS / 2];
+			float afPixelErrors[PIXELS / 2] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
+
+			for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
+			{
+				if (DEBUG_PRINT)
+				{
+                    KLOGI("EtcComp", "\tsource [%.2f,%.2f,%.2f]\n", m_pafrgbaSource[pauiPixelMapping[uiPixel]].fR,
+						m_pafrgbaSource[pauiPixelMapping[uiPixel]].fG, m_pafrgbaSource[pauiPixelMapping[uiPixel]].fB);
+				}
+
+                int srcPixelIndex = pauiPixelMapping[uiPixel];
+                
+				//const ColorFloatRGBA *pfrgbaSourcePixel = &m_pafrgbaSource[pauiPixelMapping[uiPixel]];
+				ColorFloatRGBA frgbaDecodedPixel;
+
+				for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
+				{
+					float fDeltaRGB = s_aafCwTable[uiCW][uiSelector];
+
+					frgbaDecodedPixel = (*pfrgbaColor + fDeltaRGB).ClampRGB();
+
+					float fPixelError = CalcPixelError(frgbaDecodedPixel, srcPixelIndex);
+					
+                    bool isBelowError = false;
+					if (fPixelError < afPixelErrors[uiPixel])
+					{
+						if (DEBUG_PRINT)
+						{
+                            isBelowError = true;
+						}
+
+						auiPixelSelectors[uiPixel] = uiSelector;
+						afrgbaDecodedPixels[uiPixel] = frgbaDecodedPixel;
+						afPixelErrors[uiPixel] = fPixelError;
+					}
+
+					if (DEBUG_PRINT)
+					{
+                        KLOGI("EtcComp", "\tpixel %u, index %u [%.2f,%.2f,%.2f], error %.2f%s\n", uiPixel, uiSelector,
+                            frgbaDecodedPixel.fR,
+                            frgbaDecodedPixel.fG,
+                            frgbaDecodedPixel.fB,
+                            fPixelError,
+                             isBelowError ? " *": "");
+					}
+				}
+			}
+
+			// add up all pixel errors
+			float fCWError = 0.0f;
+			for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
+			{
+				fCWError += afPixelErrors[uiPixel];
+			}
+			if (DEBUG_PRINT)
+			{
+                KLOGI("EtcComp", "\terror %.2f\n", fCWError);
+			}
+
+			// if best CW so far
+			if (fCWError < *pfHalfError)
+			{
+				*pfHalfError = fCWError;
+				*puiCW = uiCW;
+				for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
+				{
+					m_auiSelectors[pauiPixelMapping[uiPixel]] = auiPixelSelectors[uiPixel];
+					m_afrgbaDecodedColors[pauiPixelMapping[uiPixel]] = afrgbaDecodedPixels[uiPixel];
+				}
+			}
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the encoding bits based on encoding state
+	//
+	void Block4x4Encoding_ETC1::SetEncodingBits(void)
+	{
+		assert(m_mode == MODE_ETC1);
+
+		if (m_boolDiff)
+		{
+			int iRed1 = m_frgbaColor1.IntRed(31.0f);
+			int iGreen1 = m_frgbaColor1.IntGreen(31.0f);
+			int iBlue1 = m_frgbaColor1.IntBlue(31.0f);
+
+			int iRed2 = m_frgbaColor2.IntRed(31.0f);
+			int iGreen2 = m_frgbaColor2.IntGreen(31.0f);
+			int iBlue2 = m_frgbaColor2.IntBlue(31.0f);
+
+			int iDRed2 = iRed2 - iRed1;
+			int iDGreen2 = iGreen2 - iGreen1;
+			int iDBlue2 = iBlue2 - iBlue1;
+
+			assert(iDRed2 >= -4 && iDRed2 < 4);
+			assert(iDGreen2 >= -4 && iDGreen2 < 4);
+			assert(iDBlue2 >= -4 && iDBlue2 < 4);
+
+			m_pencodingbitsRGB8->differential.red1 = (unsigned int)iRed1;
+			m_pencodingbitsRGB8->differential.green1 = (unsigned int)iGreen1;
+			m_pencodingbitsRGB8->differential.blue1 = (unsigned int)iBlue1;
+
+			m_pencodingbitsRGB8->differential.dred2 = iDRed2;
+			m_pencodingbitsRGB8->differential.dgreen2 = iDGreen2;
+			m_pencodingbitsRGB8->differential.dblue2 = iDBlue2;
+		}
+		else
+		{
+			m_pencodingbitsRGB8->individual.red1 = (unsigned int)m_frgbaColor1.IntRed(15.0f);
+			m_pencodingbitsRGB8->individual.green1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f);
+			m_pencodingbitsRGB8->individual.blue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f);
+
+			m_pencodingbitsRGB8->individual.red2 = (unsigned int)m_frgbaColor2.IntRed(15.0f);
+			m_pencodingbitsRGB8->individual.green2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f);
+			m_pencodingbitsRGB8->individual.blue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f);
+		}
+
+		m_pencodingbitsRGB8->individual.cw1 = m_uiCW1;
+		m_pencodingbitsRGB8->individual.cw2 = m_uiCW2;
+
+		SetEncodingBits_Selectors();
+
+		m_pencodingbitsRGB8->individual.diff = (unsigned int)m_boolDiff;
+		m_pencodingbitsRGB8->individual.flip = (unsigned int)m_boolFlip;
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the selectors in the encoding bits
+	//
+	void Block4x4Encoding_ETC1::SetEncodingBits_Selectors(void)
+	{
+
+		m_pencodingbitsRGB8->individual.selectors = 0;
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			unsigned int uiSelector = m_auiSelectors[uiPixel];
+
+			// set index msb
+			m_pencodingbitsRGB8->individual.selectors |= (uiSelector >> 1) << (uiPixel ^ 8);
+
+			// set index lsb
+			m_pencodingbitsRGB8->individual.selectors |= (uiSelector & 1) << ((16 + uiPixel) ^ 8);
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the decoded colors and decoded alpha based on the encoding state
+	//
+	void Block4x4Encoding_ETC1::Decode(void)
+	{
+
+		const unsigned int *pauiPixelOrder = m_boolFlip ? s_auiPixelOrderFlip1 : s_auiPixelOrderFlip0;
+
+		for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS; uiPixelOrder++)
+		{
+			ColorFloatRGBA *pfrgbaCenter = uiPixelOrder < 8 ? &m_frgbaColor1 : &m_frgbaColor2;
+			unsigned int uiCW = uiPixelOrder < 8 ? m_uiCW1 : m_uiCW2;
+
+			unsigned int uiPixel = pauiPixelOrder[uiPixelOrder];
+
+			float fDelta = s_aafCwTable[uiCW][m_auiSelectors[uiPixel]];
+			m_afrgbaDecodedColors[uiPixel] = (*pfrgbaCenter + fDelta).ClampRGB();
+			//m_afDecodedAlphas[uiPixel] = 1.0f;
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+
+} // namespace Etc
diff --git a/libkram/etc2comp/EtcBlock4x4Encoding_ETC1.h b/libkram/etc2comp/EtcBlock4x4Encoding_ETC1.h
index 295d19b0..0ceca140 100644
--- a/libkram/etc2comp/EtcBlock4x4Encoding_ETC1.h
+++ b/libkram/etc2comp/EtcBlock4x4Encoding_ETC1.h
@@ -1,190 +1,190 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "EtcBlock4x4Encoding.h"
-#include "EtcBlock4x4EncodingBits.h"
-#include "EtcDifferentialTrys.h"
-#include "EtcIndividualTrys.h"
-
-namespace Etc
-{
-
-	// base class for Block4x4Encoding_RGB8
-	class Block4x4Encoding_ETC1 : public Block4x4Encoding
-	{
-	public:
-
-		Block4x4Encoding_ETC1(void);
-		virtual ~Block4x4Encoding_ETC1(void);
-
-		virtual void Encode(Block4x4 *a_pblockParent,
-									const ColorFloatRGBA *a_pafrgbaSource,
-
-									unsigned char *a_paucEncodingBits,
-									ErrorMetric a_errormetric) override;
-
-		virtual void Decode(Block4x4 *a_pblockParent,
-											unsigned char *a_paucEncodingBits,
-											const ColorFloatRGBA *a_pafrgbaSource, 
-
-											ErrorMetric a_errormetric,
-                                            uint16_t iterationCount) override;
-
-		virtual void PerformIteration(float a_fEffort) override;
-
-        virtual void SetEncodingBits(void) override;
-
-        inline bool GetFlip(void) const
-		{
-			return m_boolFlip;
-		}
-
-		inline bool IsDifferential(void) const
-		{
-			return m_boolDiff;
-		}
-
-        inline bool HasSeverelyBentDifferentialColors(void) const
-        {
-            return m_boolSeverelyBentDifferentialColors;
-        }
-        
-        //--------------------
-        
-        void Decode(void);
-
-		inline ColorFloatRGBA GetColor1(void) const
-		{
-			return m_frgbaColor1;
-		}
-
-		inline ColorFloatRGBA GetColor2(void) const
-		{
-			return m_frgbaColor2;
-		}
-
-		inline const unsigned int * GetSelectors(void) const
-		{
-			return m_auiSelectors;
-		}
-
-		inline unsigned int GetCW1(void) const
-		{
-			return m_uiCW1;
-		}
-
-		inline unsigned int GetCW2(void) const
-		{
-			return m_uiCW2;
-		}
-
-	protected: // RGBA calls over into this, so it's nearly impossible to make private
-
-		void InitFromEncodingBits_Selectors(void);
-
-		void PerformFirstIteration(void);
-		void CalculateMostLikelyFlip(void);
-
-		void TryDifferential(bool a_boolFlip, unsigned int a_uiRadius,
-								int a_iGrayOffset1, int a_iGrayOffset2);
-		void TryDifferentialHalf(DifferentialTrys::Half *a_phalf);
-
-		void TryIndividual(bool a_boolFlip, unsigned int a_uiRadius);
-		void TryIndividualHalf(IndividualTrys::Half *a_phalf);
-
-		void TryDegenerates1(void);
-		void TryDegenerates2(void);
-		void TryDegenerates3(void);
-		void TryDegenerates4(void);
-
-		void CalculateSelectors();
-		void CalculateHalfOfTheSelectors(unsigned int a_uiHalf,
-											const unsigned int *pauiPixelMapping);
-
-		// calculate the distance2 of r_frgbaPixel from r_frgbaTarget's gray line
-		inline float CalcGrayDistance2(const ColorFloatRGBA &r_frgbaPixel,
-										const ColorFloatRGBA &r_frgbaTarget)
-		{
-			float fDeltaGray = ((r_frgbaPixel.fR - r_frgbaTarget.fR) +
-								(r_frgbaPixel.fG - r_frgbaTarget.fG) +
-								(r_frgbaPixel.fB - r_frgbaTarget.fB)) / 3.0f;
-
-			ColorFloatRGBA frgbaPointOnGrayLine = (r_frgbaTarget + fDeltaGray).ClampRGB();
-
-			float fDR = r_frgbaPixel.fR - frgbaPointOnGrayLine.fR;
-			float fDG = r_frgbaPixel.fG - frgbaPointOnGrayLine.fG;
-			float fDB = r_frgbaPixel.fB - frgbaPointOnGrayLine.fB;
-
-			return (fDR*fDR) + (fDG*fDG) + (fDB*fDB);
-		}
-
-		void SetEncodingBits_Selectors(void);
-
-		// intermediate encoding
-		bool			m_boolDiff;
-		bool			m_boolFlip;
-		ColorFloatRGBA	m_frgbaColor1;
-		ColorFloatRGBA	m_frgbaColor2;
-		unsigned int	m_uiCW1;
-		unsigned int	m_uiCW2;
-		unsigned int	m_auiSelectors[PIXELS];
-
-		// state shared between iterations
-		ColorFloatRGBA	m_frgbaSourceAverageLeft;
-		ColorFloatRGBA	m_frgbaSourceAverageRight;
-		ColorFloatRGBA	m_frgbaSourceAverageTop;
-		ColorFloatRGBA	m_frgbaSourceAverageBottom;
-		bool			m_boolMostLikelyFlip;
-
-		// stats
-		float			m_fError1;	// error for Etc1 half 1
-		float			m_fError2;	// error for Etc1 half 2
-		bool			m_boolSeverelyBentDifferentialColors;	// only valid if m_boolDiff;
-
-		// final encoding
-		Block4x4EncodingBits_RGB8 *m_pencodingbitsRGB8;		// or RGB8 portion of Block4x4EncodingBits_RGB8A8
-
-        // bunch of tables and constants
-        static const unsigned int s_auiPixelOrderFlip0[PIXELS];
-        static const unsigned int s_auiPixelOrderFlip1[PIXELS];
-        static const unsigned int s_auiPixelOrderHScan[PIXELS];
-
-        static const unsigned int s_auiLeftPixelMapping[8];
-        static const unsigned int s_auiRightPixelMapping[8];
-        static const unsigned int s_auiTopPixelMapping[8];
-        static const unsigned int s_auiBottomPixelMapping[8];
-
-        static const unsigned int SELECTOR_BITS = 2;
-        static const unsigned int SELECTORS = 1 << SELECTOR_BITS;
-
-        static const unsigned int CW_BITS = 3;
-        static const unsigned int CW_RANGES = 1 << CW_BITS;
-
-        static float s_aafCwTable[CW_RANGES][SELECTORS];
-        static unsigned char s_aucDifferentialCwRange[256];
-
-        static const int MAX_DIFFERENTIAL = 3;
-        static const int MIN_DIFFERENTIAL = -4;
-
-		private:
-
-		void CalculateSourceAverages(void);
-        void InitETC1();
-	};
-
-} // namespace Etc
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "EtcBlock4x4Encoding.h"
+#include "EtcBlock4x4EncodingBits.h"
+#include "EtcDifferentialTrys.h"
+#include "EtcIndividualTrys.h"
+
+namespace Etc
+{
+
+	// base class for Block4x4Encoding_RGB8
+	class Block4x4Encoding_ETC1 : public Block4x4Encoding
+	{
+	public:
+
+		Block4x4Encoding_ETC1(void);
+		virtual ~Block4x4Encoding_ETC1(void);
+
+		virtual void Encode(Block4x4 *a_pblockParent,
+									const ColorFloatRGBA *a_pafrgbaSource,
+
+									unsigned char *a_paucEncodingBits,
+									ErrorMetric a_errormetric) override;
+
+		virtual void Decode(Block4x4 *a_pblockParent,
+											unsigned char *a_paucEncodingBits,
+											const ColorFloatRGBA *a_pafrgbaSource, 
+
+											ErrorMetric a_errormetric,
+                                            uint16_t iterationCount) override;
+
+		virtual void PerformIteration(float a_fEffort) override;
+
+        virtual void SetEncodingBits(void) override;
+
+        inline bool GetFlip(void) const
+		{
+			return m_boolFlip;
+		}
+
+		inline bool IsDifferential(void) const
+		{
+			return m_boolDiff;
+		}
+
+        inline bool HasSeverelyBentDifferentialColors(void) const
+        {
+            return m_boolSeverelyBentDifferentialColors;
+        }
+        
+        //--------------------
+        
+        void Decode(void);
+
+		inline ColorFloatRGBA GetColor1(void) const
+		{
+			return m_frgbaColor1;
+		}
+
+		inline ColorFloatRGBA GetColor2(void) const
+		{
+			return m_frgbaColor2;
+		}
+
+		inline const unsigned int * GetSelectors(void) const
+		{
+			return m_auiSelectors;
+		}
+
+		inline unsigned int GetCW1(void) const
+		{
+			return m_uiCW1;
+		}
+
+		inline unsigned int GetCW2(void) const
+		{
+			return m_uiCW2;
+		}
+
+	protected: // RGBA calls over into this, so it's nearly impossible to make private
+
+		void InitFromEncodingBits_Selectors(void);
+
+		void PerformFirstIteration(void);
+		void CalculateMostLikelyFlip(void);
+
+		void TryDifferential(bool a_boolFlip, unsigned int a_uiRadius,
+								int a_iGrayOffset1, int a_iGrayOffset2);
+		void TryDifferentialHalf(DifferentialTrys::Half *a_phalf);
+
+		void TryIndividual(bool a_boolFlip, unsigned int a_uiRadius);
+		void TryIndividualHalf(IndividualTrys::Half *a_phalf);
+
+		void TryDegenerates1(void);
+		void TryDegenerates2(void);
+		void TryDegenerates3(void);
+		void TryDegenerates4(void);
+
+		void CalculateSelectors();
+		void CalculateHalfOfTheSelectors(unsigned int a_uiHalf,
+											const unsigned int *pauiPixelMapping);
+
+		// calculate the distance2 of r_frgbaPixel from r_frgbaTarget's gray line
+		inline float CalcGrayDistance2(const ColorFloatRGBA &r_frgbaPixel,
+										const ColorFloatRGBA &r_frgbaTarget)
+		{
+			float fDeltaGray = ((r_frgbaPixel.fR - r_frgbaTarget.fR) +
+								(r_frgbaPixel.fG - r_frgbaTarget.fG) +
+								(r_frgbaPixel.fB - r_frgbaTarget.fB)) / 3.0f;
+
+			ColorFloatRGBA frgbaPointOnGrayLine = (r_frgbaTarget + fDeltaGray).ClampRGB();
+
+			float fDR = r_frgbaPixel.fR - frgbaPointOnGrayLine.fR;
+			float fDG = r_frgbaPixel.fG - frgbaPointOnGrayLine.fG;
+			float fDB = r_frgbaPixel.fB - frgbaPointOnGrayLine.fB;
+
+			return (fDR*fDR) + (fDG*fDG) + (fDB*fDB);
+		}
+
+		void SetEncodingBits_Selectors(void);
+
+		// intermediate encoding
+		bool			m_boolDiff;
+		bool			m_boolFlip;
+		ColorFloatRGBA	m_frgbaColor1;
+		ColorFloatRGBA	m_frgbaColor2;
+		unsigned int	m_uiCW1;
+		unsigned int	m_uiCW2;
+		unsigned int	m_auiSelectors[PIXELS];
+
+		// state shared between iterations
+		ColorFloatRGBA	m_frgbaSourceAverageLeft;
+		ColorFloatRGBA	m_frgbaSourceAverageRight;
+		ColorFloatRGBA	m_frgbaSourceAverageTop;
+		ColorFloatRGBA	m_frgbaSourceAverageBottom;
+		bool			m_boolMostLikelyFlip;
+
+		// stats
+		float			m_fError1;	// error for Etc1 half 1
+		float			m_fError2;	// error for Etc1 half 2
+		bool			m_boolSeverelyBentDifferentialColors;	// only valid if m_boolDiff;
+
+		// final encoding
+		Block4x4EncodingBits_RGB8 *m_pencodingbitsRGB8;		// or RGB8 portion of Block4x4EncodingBits_RGB8A8
+
+        // bunch of tables and constants
+        static const unsigned int s_auiPixelOrderFlip0[PIXELS];
+        static const unsigned int s_auiPixelOrderFlip1[PIXELS];
+        static const unsigned int s_auiPixelOrderHScan[PIXELS];
+
+        static const unsigned int s_auiLeftPixelMapping[8];
+        static const unsigned int s_auiRightPixelMapping[8];
+        static const unsigned int s_auiTopPixelMapping[8];
+        static const unsigned int s_auiBottomPixelMapping[8];
+
+        static const unsigned int SELECTOR_BITS = 2;
+        static const unsigned int SELECTORS = 1 << SELECTOR_BITS;
+
+        static const unsigned int CW_BITS = 3;
+        static const unsigned int CW_RANGES = 1 << CW_BITS;
+
+        static float s_aafCwTable[CW_RANGES][SELECTORS];
+        static unsigned char s_aucDifferentialCwRange[256];
+
+        static const int MAX_DIFFERENTIAL = 3;
+        static const int MIN_DIFFERENTIAL = -4;
+
+		private:
+
+		void CalculateSourceAverages(void);
+        void InitETC1();
+	};
+
+} // namespace Etc
diff --git a/libkram/etc2comp/EtcImage.h b/libkram/etc2comp/EtcImage.h
index 83047e40..7c2e0232 100644
--- a/libkram/etc2comp/EtcImage.h
+++ b/libkram/etc2comp/EtcImage.h
@@ -1,203 +1,203 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "EtcColorFloatRGBA.h"
-#include "EtcBlock4x4EncodingBits.h"
-#include "EtcErrorMetric.h"
-
-
-namespace Etc
-{
-	class Block4x4;
-	class EncoderSpec;
-	
-    class Image
-    {
-    public:
-
-		enum  EncodingStatus
-		{
-			SUCCESS = 0,
-		};
-		
-		enum class Format
-		{
-			UNKNOWN,
-			//
-			ETC1,
-			//
-			// ETC2 formats
-			RGB8,
-			SRGB8,
-			RGBA8,
-			SRGBA8,
-			
-            R11,
-			SIGNED_R11,
-			RG11,
-			SIGNED_RG11,
-            
-			RGB8A1,
-			SRGB8A1,
-			//
-			FORMATS,
-			//
-			DEFAULT = SRGB8
-		};
-
-		// constructor using source image
-		Image(Format a_format, const ColorR8G8B8A8 *a_pafSourceRGBA,
-                unsigned int a_uiSourceWidth, unsigned int a_uiSourceHeight,
-				ErrorMetric a_errormetric);
-
-		// constructor using encoding bits
-//		Image(Format a_format, 
-//				unsigned int a_uiSourceWidth, unsigned int a_uiSourceHeight,
-//				unsigned char *a_paucEncidingBits, unsigned int a_uiEncodingBitsBytes,
-//				//Image *a_pimageSource,
-//				ErrorMetric a_errormetric);
-
-		~Image(void);
-
-        // Multipass encoding.  Uses tons of memory but can thread even though it doesn't help.
-		EncodingStatus Encode(float blockPercent, float a_fEffort, uint8_t* outputTexture);
-
-        // Single-pass encoding. One block at a time to not was so much memory and time as Encode does.
-        EncodingStatus EncodeSinglepass(float a_fEffort, uint8_t* outputTexture);
-        
-        // Translate to rgba8unorm texture (even r/rg11)
-        EncodingStatus Decode(const uint8_t* etcBlocks, uint8_t* outputTexture);
-       
-		inline void AddToEncodingStatus(EncodingStatus a_encStatus)
-		{
-			m_encodingStatus = (EncodingStatus)((unsigned int)m_encodingStatus | (unsigned int)a_encStatus);
-		}
-		
-		inline unsigned int GetSourceWidth(void) const
-		{
-			return m_uiSourceWidth;
-		}
-
-		inline unsigned int GetSourceHeight(void) const
-		{
-			return m_uiSourceHeight;
-		}
-
-		inline unsigned int GetNumberOfBlocks() const
-		{
-			return m_uiBlockColumns * m_uiBlockRows;
-		}
-        
-        inline unsigned char * GetEncodingBits(void)
-		{
-			return m_paucEncodingBits;
-		}
-
-		inline unsigned int GetEncodingBitsBytes(void)
-		{
-			return m_uiEncodingBitsBytes;
-		}
-
-		inline int GetEncodingTimeMs(void) const
-		{
-			return m_iEncodeTime_ms;
-		}
-
-		float GetError(void) const;
-
-        inline bool HasSourcePixels() const
-        {
-            return m_pafrgbaSource != nullptr;
-        }
-        
-		inline ColorFloatRGBA GetSourcePixel(unsigned int x, unsigned int y) const
-		{
-            // clamp on border instead of returning nullptr and NaNs.  Might weight color more.
-			if (x >= m_uiSourceWidth)
-			{
-                x = m_uiSourceWidth - 1;
-			}
-            if (y >= m_uiSourceHeight)
-            {
-                y = m_uiSourceHeight - 1;
-            }
-
-            // Convert to float pixel here.  This keeps input image much smaller.  Only 8-bit data.
-            // But can't encode to R11 or R11G11 with full fp32 inputs.
-			return ColorFloatRGBA::ConvertFromRGBA8(m_pafrgbaSource[y * m_uiSourceWidth + x]);
-		}
-
-		inline Format GetFormat(void) const
-		{
-			return m_format;
-		}
-
-		static Block4x4EncodingBits::Format DetermineEncodingBitsFormat(Format a_format);
-
-		inline static unsigned short CalcExtendedDimension(unsigned short a_ushOriginalDimension)
-		{
-			return (unsigned short)((a_ushOriginalDimension + 3) & ~3);
-		}
-
-		inline ErrorMetric GetErrorMetric(void) const
-		{
-			return m_errormetric;
-		}
-
-		static const char * EncodingFormatToString(Image::Format a_format);
-        
-		const char * EncodingFormatToString(void) const;
-		
-        void SetVerboseOutput(bool enabled)
-        {
-            m_bVerboseOutput = enabled;
-        }
-        bool GetVerboseOutput() const
-        {
-            return m_bVerboseOutput;
-        }
-        
-	private:
-        bool m_bVerboseOutput;
-       
-		
-		//Image(void);
-		
-		// inputs
-		const ColorR8G8B8A8 *m_pafrgbaSource;
-		unsigned int m_uiSourceWidth;
-		unsigned int m_uiSourceHeight;
-		unsigned int m_uiBlockColumns;
-		unsigned int m_uiBlockRows;
-		
-        // encoding
-		Format m_format;
-		Block4x4EncodingBits::Format m_encodingbitsformat;
-		unsigned int m_uiEncodingBitsBytes;		// for entire image
-		unsigned char *m_paucEncodingBits;
-		ErrorMetric m_errormetric;
-		float m_fEffort;
-        
-		// stats
-		int m_iEncodeTime_ms;
-		
-		//this will hold any warning or errors that happen during encoding
-		EncodingStatus m_encodingStatus;
-	};
-
-} // namespace Etc
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "EtcColorFloatRGBA.h"
+#include "EtcBlock4x4EncodingBits.h"
+#include "EtcErrorMetric.h"
+
+
+namespace Etc
+{
+	class Block4x4;
+	class EncoderSpec;
+	
+    class Image
+    {
+    public:
+
+		enum  EncodingStatus
+		{
+			SUCCESS = 0,
+		};
+		
+		enum class Format
+		{
+			UNKNOWN,
+			//
+			ETC1,
+			//
+			// ETC2 formats
+			RGB8,
+			SRGB8,
+			RGBA8,
+			SRGBA8,
+			
+            R11,
+			SIGNED_R11,
+			RG11,
+			SIGNED_RG11,
+            
+			RGB8A1,
+			SRGB8A1,
+			//
+			FORMATS,
+			//
+			DEFAULT = SRGB8
+		};
+
+		// constructor using source image
+		Image(Format a_format, const ColorR8G8B8A8 *a_pafSourceRGBA,
+                unsigned int a_uiSourceWidth, unsigned int a_uiSourceHeight,
+				ErrorMetric a_errormetric);
+
+		// constructor using encoding bits
+//		Image(Format a_format, 
+//				unsigned int a_uiSourceWidth, unsigned int a_uiSourceHeight,
+//				unsigned char *a_paucEncidingBits, unsigned int a_uiEncodingBitsBytes,
+//				//Image *a_pimageSource,
+//				ErrorMetric a_errormetric);
+
+		~Image(void);
+
+        // Multipass encoding.  Uses tons of memory but can thread even though it doesn't help.
+		EncodingStatus Encode(float blockPercent, float a_fEffort, uint8_t* outputTexture);
+
+        // Single-pass encoding. One block at a time to not was so much memory and time as Encode does.
+        EncodingStatus EncodeSinglepass(float a_fEffort, uint8_t* outputTexture);
+        
+        // Translate to rgba8unorm texture (even r/rg11)
+        EncodingStatus Decode(const uint8_t* etcBlocks, uint8_t* outputTexture);
+       
+		inline void AddToEncodingStatus(EncodingStatus a_encStatus)
+		{
+			m_encodingStatus = (EncodingStatus)((unsigned int)m_encodingStatus | (unsigned int)a_encStatus);
+		}
+		
+		inline unsigned int GetSourceWidth(void) const
+		{
+			return m_uiSourceWidth;
+		}
+
+		inline unsigned int GetSourceHeight(void) const
+		{
+			return m_uiSourceHeight;
+		}
+
+		inline unsigned int GetNumberOfBlocks() const
+		{
+			return m_uiBlockColumns * m_uiBlockRows;
+		}
+        
+        inline unsigned char * GetEncodingBits(void)
+		{
+			return m_paucEncodingBits;
+		}
+
+		inline unsigned int GetEncodingBitsBytes(void)
+		{
+			return m_uiEncodingBitsBytes;
+		}
+
+		inline int GetEncodingTimeMs(void) const
+		{
+			return m_iEncodeTime_ms;
+		}
+
+		float GetError(void) const;
+
+        inline bool HasSourcePixels() const
+        {
+            return m_pafrgbaSource != nullptr;
+        }
+        
+		inline ColorFloatRGBA GetSourcePixel(unsigned int x, unsigned int y) const
+		{
+            // clamp on border instead of returning nullptr and NaNs.  Might weight color more.
+			if (x >= m_uiSourceWidth)
+			{
+                x = m_uiSourceWidth - 1;
+			}
+            if (y >= m_uiSourceHeight)
+            {
+                y = m_uiSourceHeight - 1;
+            }
+
+            // Convert to float pixel here.  This keeps input image much smaller.  Only 8-bit data.
+            // But can't encode to R11 or R11G11 with full fp32 inputs.
+			return ColorFloatRGBA::ConvertFromRGBA8(m_pafrgbaSource[y * m_uiSourceWidth + x]);
+		}
+
+		inline Format GetFormat(void) const
+		{
+			return m_format;
+		}
+
+		static Block4x4EncodingBits::Format DetermineEncodingBitsFormat(Format a_format);
+
+		inline static unsigned short CalcExtendedDimension(unsigned short a_ushOriginalDimension)
+		{
+			return (unsigned short)((a_ushOriginalDimension + 3) & ~3);
+		}
+
+		inline ErrorMetric GetErrorMetric(void) const
+		{
+			return m_errormetric;
+		}
+
+		static const char * EncodingFormatToString(Image::Format a_format);
+        
+		const char * EncodingFormatToString(void) const;
+		
+        void SetVerboseOutput(bool enabled)
+        {
+            m_bVerboseOutput = enabled;
+        }
+        bool GetVerboseOutput() const
+        {
+            return m_bVerboseOutput;
+        }
+        
+	private:
+        bool m_bVerboseOutput;
+       
+		
+		//Image(void);
+		
+		// inputs
+		const ColorR8G8B8A8 *m_pafrgbaSource;
+		unsigned int m_uiSourceWidth;
+		unsigned int m_uiSourceHeight;
+		unsigned int m_uiBlockColumns;
+		unsigned int m_uiBlockRows;
+		
+        // encoding
+		Format m_format;
+		Block4x4EncodingBits::Format m_encodingbitsformat;
+		unsigned int m_uiEncodingBitsBytes;		// for entire image
+		unsigned char *m_paucEncodingBits;
+		ErrorMetric m_errormetric;
+		float m_fEffort;
+        
+		// stats
+		int m_iEncodeTime_ms;
+		
+		//this will hold any warning or errors that happen during encoding
+		EncodingStatus m_encodingStatus;
+	};
+
+} // namespace Etc

From 2f3147ae78bcd46d9831c4f81bd2770bae596c8e Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 18 Feb 2023 22:58:55 -0800
Subject: [PATCH 414/901] kram - put back mmap code to fix Win

---
 libkram/kram/win_mmap.h | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/libkram/kram/win_mmap.h b/libkram/kram/win_mmap.h
index 703184ae..68511b28 100644
--- a/libkram/kram/win_mmap.h
+++ b/libkram/kram/win_mmap.h
@@ -25,13 +25,19 @@
 #define PROT_WRITE    0x2
 #define PROT_EXEC     0x4
 
-//#define MAP_SHARED    0x01
+#define MAP_SHARED    0x01
 #define MAP_PRIVATE   0x02
 #define MAP_ANON      0x20
 #define MAP_FAILED    ((void *) -1)
 
-# define DWORD_HI(x) (x >> 32)
-# define DWORD_LO(x) ((x) & 0xffffffff)
+// TODO: find out which path this takes, want 64-bit mmsp
+#ifdef __USE_FILE_OFFSET64
+# define DWORD_HI(x) ((x) >> (uint64_t)32)
+# define DWORD_LO(x) ((x) & (uint64_t)0xffffffff)
+#else
+# define DWORD_HI(x) (0)
+# define DWORD_LO(x) (x)
+#endif
 
 static void *mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset)
 {

From 5bf09d0563629088d3908048b6e529a0bdd847f1 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 20 Feb 2023 15:39:27 -0800
Subject: [PATCH 415/901] kram - fix timer for M1

Turns out macOS M1 doesn't run at 1ns per tick like on Intel, but uses 41.667 ns intervals for ticks of mach_absolute_time().   But if the code runs under Rosetta2, then it returns the Intel value.   Also fix the Win timer to be in seconds.
---
 libkram/kram/KramImage.cpp |  8 ++--
 libkram/kram/KramTimer.cpp | 85 +++++++++++++++++++++++++++++---------
 libkram/kram/KramTimer.h   |  6 +++
 3 files changed, 76 insertions(+), 23 deletions(-)

diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index 8d51a403..53f3db1e 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -2292,9 +2292,9 @@ bool KramEncoder::createMipsFromChunks(
         timerBuildMips.stop();
         
         if (info.isVerbose) {
-            KLOGI("Image", "Chunk %d source %d miplevels in %0.3fs\n",
+            KLOGI("Image", "Chunk %d source %d miplevels in %0.3fms\n",
                   chunk, numMipLevels,
-                  timerBuildMips.timeElapsed() );
+                  timerBuildMips.timeElapsedMillis() );
         }
         
         //----------------------------------------------
@@ -2320,8 +2320,8 @@ bool KramEncoder::createMipsFromChunks(
 
             if (success) {
                 if (info.isVerbose) {
-                    KLOGI("Image", "Compressed mipLevel %dx%d in %0.3fs\n", w, h,
-                          timerEncodeMips.timeElapsed());
+                    KLOGI("Image", "Compressed mipLevel %dx%d in %0.3fms\n", w, h,
+                          timerEncodeMips.timeElapsedMillis());
                 }
             }
 
diff --git a/libkram/kram/KramTimer.cpp b/libkram/kram/KramTimer.cpp
index b140bbf9..b3cc5123 100644
--- a/libkram/kram/KramTimer.cpp
+++ b/libkram/kram/KramTimer.cpp
@@ -4,6 +4,7 @@
 
 #include "KramTimer.h"
 
+#if 1
 #if KRAM_WIN
 #include <windows.h>
 #elif KRAM_MAC || KRAM_IOS
@@ -16,11 +17,13 @@ using namespace NAMESPACE_STL;
 
 #if KRAM_WIN
 
-static double queryFrequency()
+static double queryPeriod()
 {
     LARGE_INTEGER frequency;
     QueryPerformanceFrequency(&frequency);
-    return double(1000000000.0L / frequency.QuadPart);  // nanoseconds per tick
+    
+    // convert from nanos to seconds
+    return 1.0 / double(frequency.QuadPart);
 };
 
 static uint64_t queryCounter()
@@ -30,26 +33,63 @@ static uint64_t queryCounter()
     return counter.QuadPart;
 };
 
-static uint64_t gStartTime = queryCounter();
-static double gQueryFrequency = queryFrequency();
+static const uint64_t gStartTime = queryCounter();
+static const double gQueryPeriod = queryPeriod();
 
 double currentTimestamp()
 {
-    return (double)(queryCounter() - gStartTime) * gQueryFrequency;
+    return (double)(queryCounter() - gStartTime) * gQueryPeriod;
 }
 
-#else
+#elif KRAM_IOS || KRAM_MAC
+
+static uint64_t queryCounter()
+{
+    // increments when app sleeps
+    //return mach_continuous_time();
+    
+    // does not increment when app sleeps
+    return mach_absolute_time();
+}
+
+static double queryPeriod()
+{
+    mach_timebase_info_data_t timebase;
+    mach_timebase_info(&timebase);
+    
+    // https://eclecticlight.co/2020/11/27/inside-m1-macs-time-and-logs/
+    // On macOS Intel, ticks are 1ns (1/1)
+    // On macOS M1, ticks are 41.67ns (num/denom = 125/3)
+    double period = 1e-9 * (double)timebase.denom / timebase.numer;
+    return period;
+}
+static const uint64_t gStartTime = queryCounter();
+static const double gQueryPeriod = queryPeriod();
 
-static uint64_t gStartTime = mach_absolute_time();
 double currentTimestamp()
 {
-    return (double)(mach_absolute_time() - gStartTime) * 1e-9;
+    return (double)(queryCounter() - gStartTime) * gQueryPeriod;
+}
+
 }
 
 #endif
 
-/* This is the worse timing system ever, with min times of 0.032s even
-   using the high_resolution_clock on macOS.
+#else
+
+/*
+// This is the worst timing system.  On macOS, resolution of 32ms even
+//   using the high_resolution_clock.
+ 
+// see sources here
+// https://codebrowser.dev/llvm/libcxx/src/chrono.cpp.html
+// can't find high_resolution_clock source,
+// but steady on macOS uses clock_gettime(CLOCK_MONOTONIC_RAW, &tp)
+//   which should be mach_continuous_time()
+//
+// also see sources here for timers
+// https://opensource.apple.com/source/Libc/Libc-1158.1.2/gen/clock_gettime.c.auto.html
+// mach_continuous_time() vs. mach_absolute_time()
  
 #if USE_EASTL
 #include "EASTL/chrono.h"
@@ -57,24 +97,29 @@ double currentTimestamp()
 #include <chrono>
 #endif
  
+namespace kram {
+
+using namespace NAMESPACE_STL;
+
 #if USE_EASTL
 using namespace eastl::chrono;
 #else
 using namespace std::chrono;
 #endif
 
-// high-res sucks
-using clock = high_resolution_clock;
-//using clock = system_clock;
-//using clock = steady_clock;
+// high-res sucks  (defaults to steady or system in libcxx)
+// doesn't matter whether system/stead used, they both have 32ms resolution
+//using myclock = high_resolution_clock;
+//using myclock = system_clock;
+using myclock = steady_clock;
 
-static clock::time_point startTime = clock::now();
+static const myclock::time_point gStartTime = myclock::now();
 
 double currentTimestamp()
 {
-    auto t = clock::now();
-    auto timeSpan = duration_cast<duration<double> >(t - startTime);
-    double count = (double)timeSpan.count();
+    auto t = myclock::now();
+    duration<double, std::milli> timeSpan = t - gStartTime;
+    double count = (double)timeSpan.count() * 1e-3;
     
     // this happens the first time function is called if static
     // is inside the runction call.  Will return 0
@@ -82,6 +127,8 @@ double currentTimestamp()
     
     return count;
 }
-*/
 
 }  // namespace kram
+*/
+#endif
+
diff --git a/libkram/kram/KramTimer.h b/libkram/kram/KramTimer.h
index 281663df..3575c585 100644
--- a/libkram/kram/KramTimer.h
+++ b/libkram/kram/KramTimer.h
@@ -41,6 +41,12 @@ class Timer {
         }
         return time;
     }
+    
+    double timeElapsedMillis() const
+    {
+        return timeElapsed() * 1e3;
+    }
+    
     bool isStopped() const { return _timeElapsed < 0.0; }
 
 private:

From 68d7a4e3a144709e64f5ea1e5ef1b982bfee8529 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 20 Feb 2023 16:35:08 -0800
Subject: [PATCH 416/901] kram - one more fix to timer on macOS

---
 libkram/kram/KramImage.cpp | 14 ++++++++++++++
 libkram/kram/KramTimer.cpp | 14 ++++++++------
 2 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index 53f3db1e..308f1d32 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -1993,6 +1993,8 @@ bool KramEncoder::createMipsFromChunks(
     FILE* dstFile,
     KTXImage& dstImage) const
 {
+    Timer totalTimer;
+    
     // ----------------------------------------------------
 
     // set the structure fields and allocate it, only need enough to hold single
@@ -2358,6 +2360,18 @@ bool KramEncoder::createMipsFromChunks(
             }
         }
     }
+    
+    if (info.isVerbose) {
+        KLOGI("Image", "Total time in %0.3fms\n",
+              totalTimer.timeElapsedMillis() );
+    }
+    
+//    Timer test;
+//    test.stop();
+//
+//    KLOGI("Image", "Test time in %0.3fms\n",
+//          test.timeElapsedMillis() );
+    
     return true;
 }
 
diff --git a/libkram/kram/KramTimer.cpp b/libkram/kram/KramTimer.cpp
index b3cc5123..32592b19 100644
--- a/libkram/kram/KramTimer.cpp
+++ b/libkram/kram/KramTimer.cpp
@@ -45,10 +45,10 @@ double currentTimestamp()
 
 static uint64_t queryCounter()
 {
-    // increments when app sleeps
-    //return mach_continuous_time();
+    // increment when app sleeps
+    // return mach_continuous_time();
     
-    // does not increment when app sleeps
+    // no increment when app sleeps
     return mach_absolute_time();
 }
 
@@ -58,9 +58,11 @@ static double queryPeriod()
     mach_timebase_info(&timebase);
     
     // https://eclecticlight.co/2020/11/27/inside-m1-macs-time-and-logs/
-    // On macOS Intel, ticks are 1ns (1/1)
-    // On macOS M1, ticks are 41.67ns (num/denom = 125/3)
-    double period = 1e-9 * (double)timebase.denom / timebase.numer;
+    // On macOS Intel, nanosecondsPerTick are 1ns (1/1)
+    // On macOS M1, nanosecondsPerTick are 41.67ns (num/denom = 125/3)
+    double period = (double)timebase.numer / timebase.denom;
+    period *= 1e-9; // convert to seconds
+    
     return period;
 }
 static const uint64_t gStartTime = queryCounter();

From d1033aa8637647ccc12ed075713233b3a90fb2c1 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 20 Feb 2023 16:45:00 -0800
Subject: [PATCH 417/901] kram - fix Win timer

---
 libkram/kram/KramTimer.cpp | 29 +++++++----------------------
 1 file changed, 7 insertions(+), 22 deletions(-)

diff --git a/libkram/kram/KramTimer.cpp b/libkram/kram/KramTimer.cpp
index 32592b19..47df403f 100644
--- a/libkram/kram/KramTimer.cpp
+++ b/libkram/kram/KramTimer.cpp
@@ -5,6 +5,7 @@
 #include "KramTimer.h"
 
 #if 1
+
 #if KRAM_WIN
 #include <windows.h>
 #elif KRAM_MAC || KRAM_IOS
@@ -33,14 +34,6 @@ static uint64_t queryCounter()
     return counter.QuadPart;
 };
 
-static const uint64_t gStartTime = queryCounter();
-static const double gQueryPeriod = queryPeriod();
-
-double currentTimestamp()
-{
-    return (double)(queryCounter() - gStartTime) * gQueryPeriod;
-}
-
 #elif KRAM_IOS || KRAM_MAC
 
 static uint64_t queryCounter()
@@ -65,6 +58,9 @@ static double queryPeriod()
     
     return period;
 }
+
+#endif
+
 static const uint64_t gStartTime = queryCounter();
 static const double gQueryPeriod = queryPeriod();
 
@@ -73,19 +69,13 @@ double currentTimestamp()
     return (double)(queryCounter() - gStartTime) * gQueryPeriod;
 }
 
-}
-
-#endif
+} // namespace kram
 
 #else
 
 /*
-// This is the worst timing system.  On macOS, resolution of 32ms even
-//   using the high_resolution_clock.
- 
 // see sources here
 // https://codebrowser.dev/llvm/libcxx/src/chrono.cpp.html
-// can't find high_resolution_clock source,
 // but steady on macOS uses clock_gettime(CLOCK_MONOTONIC_RAW, &tp)
 //   which should be mach_continuous_time()
 //
@@ -109,8 +99,7 @@ using namespace eastl::chrono;
 using namespace std::chrono;
 #endif
 
-// high-res sucks  (defaults to steady or system in libcxx)
-// doesn't matter whether system/stead used, they both have 32ms resolution
+// high-res  (defaults to steady or system in libcxx)
 //using myclock = high_resolution_clock;
 //using myclock = system_clock;
 using myclock = steady_clock;
@@ -122,15 +111,11 @@ double currentTimestamp()
     auto t = myclock::now();
     duration<double, std::milli> timeSpan = t - gStartTime;
     double count = (double)timeSpan.count() * 1e-3;
-    
-    // this happens the first time function is called if static
-    // is inside the runction call.  Will return 0
-    // assert( count != 0.0 );
-    
     return count;
 }
 
 }  // namespace kram
 */
+
 #endif
 

From 6b3336bcc259b5bb1c03e57bc5fde00cfa9a6e6e Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 24 Feb 2023 23:34:37 -0800
Subject: [PATCH 418/901] kram - adapt Emil Persson's new OutputDebugStringU
 call

Always wanted something that would handle UTF8 debug output properly on Win.
---
 libkram/kram/KramLog.cpp | 48 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 47 insertions(+), 1 deletion(-)

diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp
index b1525ad8..df9dd4e8 100644
--- a/libkram/kram/KramLog.cpp
+++ b/libkram/kram/KramLog.cpp
@@ -189,6 +189,49 @@ bool endsWithExtension(const char* str, const string& substring)
     return strcmp(search, substring.c_str()) == 0;
 }
 
+#if KRAM_WIN
+// Adapted from Emil Persson's post.
+// https://twitter.com/_Humus_/status/1629165133359460352
+// ODS that supports UTF8 characters
+inline void OutputDebugStringU(LPCSTR lpOutputString, uint32_t len8)
+{
+    // empty string
+    if (len8 == 0) return;
+    
+    // Run the conversion twice, first to get length, then to do the conversion
+    int len16 = MultiByteToWideChar(CP_UTF8, 0, lpOutputString, (int)len8, nullptr, 0);
+    
+    // watch out for large len16
+    if (len16 == 0 || len16 > 128*1024) return;
+    
+    whcar_t* strWide = (wchar_t*)_malloca(len16 * sizeof(wchar_t));
+    
+    // ran out of stack
+    if (!strWide) return;
+    
+    MultiByteToWideChar(CP_UTF8, 0, lpOutputString, (int)len8, strWide, len16);
+    
+    ULONG_PTR args[4] = {
+        (ULONG_PTR)len16 + 1, (ULONG_PTR)strWide,
+        (ULONG_PTR)len8 + 1, (ULONG_PTR)lpOutputString
+    };
+    
+    // TODO: note that there is a limit to the length of this string
+    // so may want to split up the string in a loop.
+    
+    RaiseException(0x4001000A, 0, 4, args); // DBG_PRINTEXCEPTION_WIDE_C
+    
+    _freea(strWiide);
+    
+    // Can't use OutputDebugStringW.
+    // OutputDebugStringW converts the specified string based on the current system
+    // locale information and passes it to OutputDebugStringA to be displayed. As a
+    // result, some Unicode characters may not be displayed correctly.  So above is
+    // what ODSA calls internally but with the action wide string.
+}
+
+#endif
+
 //----------------------------------
 
 static int32_t logMessageImpl(const char* group, int32_t logLevel,
@@ -283,7 +326,10 @@ static int32_t logMessageImpl(const char* group, int32_t logLevel,
     if (::IsDebuggerPresent()) {
         // TODO: split string up into multiple logs
         // this is limited to 32K
-        OutputDebugString(buffer.c_str());
+        // OutputDebugString(buffer.c_str());
+        
+        // This supports UTF8 strings by converting them to wide
+        OutputDebugStringU(buffer.c_str(), buffer.size());
     }
     else {
         // avoid double print to debugger

From 0fe5be3142b0dfe1516253a6587fe69991370c94 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 24 Feb 2023 23:35:50 -0800
Subject: [PATCH 419/901] kram - small timer mods

---
 libkram/kram/KramTimer.cpp | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/libkram/kram/KramTimer.cpp b/libkram/kram/KramTimer.cpp
index 47df403f..e75e619a 100644
--- a/libkram/kram/KramTimer.cpp
+++ b/libkram/kram/KramTimer.cpp
@@ -36,15 +36,6 @@ static uint64_t queryCounter()
 
 #elif KRAM_IOS || KRAM_MAC
 
-static uint64_t queryCounter()
-{
-    // increment when app sleeps
-    // return mach_continuous_time();
-    
-    // no increment when app sleeps
-    return mach_absolute_time();
-}
-
 static double queryPeriod()
 {
     mach_timebase_info_data_t timebase;
@@ -59,14 +50,24 @@ static double queryPeriod()
     return period;
 }
 
+static uint64_t queryCounter()
+{
+    // increment when app sleeps
+    // return mach_continuous_time();
+    
+    // no increment when app sleeps
+    return mach_absolute_time();
+}
+
 #endif
 
-static const uint64_t gStartTime = queryCounter();
 static const double gQueryPeriod = queryPeriod();
+static const uint64_t gStartTime = queryCounter();
 
 double currentTimestamp()
 {
-    return (double)(queryCounter() - gStartTime) * gQueryPeriod;
+    uint64_t delta = queryCounter() - gStartTime;
+    return (double)delta * gQueryPeriod;
 }
 
 } // namespace kram
@@ -82,6 +83,7 @@ double currentTimestamp()
 // also see sources here for timers
 // https://opensource.apple.com/source/Libc/Libc-1158.1.2/gen/clock_gettime.c.auto.html
 // mach_continuous_time() vs. mach_absolute_time()
+// https://developer.apple.com/library/archive/qa/qa1398/_index.html
  
 #if USE_EASTL
 #include "EASTL/chrono.h"

From 98ea83d6aa4033bb98a1369df9a6778acca8c155 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 24 Feb 2023 23:44:29 -0800
Subject: [PATCH 420/901] kram - fix Win log

---
 libkram/kram/KramLog.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp
index df9dd4e8..06e043e8 100644
--- a/libkram/kram/KramLog.cpp
+++ b/libkram/kram/KramLog.cpp
@@ -204,7 +204,7 @@ inline void OutputDebugStringU(LPCSTR lpOutputString, uint32_t len8)
     // watch out for large len16
     if (len16 == 0 || len16 > 128*1024) return;
     
-    whcar_t* strWide = (wchar_t*)_malloca(len16 * sizeof(wchar_t));
+    wchar_t* strWide = (wchar_t*)_malloca(len16 * sizeof(wchar_t));
     
     // ran out of stack
     if (!strWide) return;

From 7d8077ef96c7a92d54f86352ac4dd86b0c5feda1 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 24 Feb 2023 23:50:29 -0800
Subject: [PATCH 421/901] kram - one more fix to log

---
 libkram/kram/KramLog.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp
index 06e043e8..83f5511f 100644
--- a/libkram/kram/KramLog.cpp
+++ b/libkram/kram/KramLog.cpp
@@ -221,7 +221,7 @@ inline void OutputDebugStringU(LPCSTR lpOutputString, uint32_t len8)
     
     RaiseException(0x4001000A, 0, 4, args); // DBG_PRINTEXCEPTION_WIDE_C
     
-    _freea(strWiide);
+    _freea(strWide);
     
     // Can't use OutputDebugStringW.
     // OutputDebugStringW converts the specified string based on the current system

From 02dc35d55b4e7ebece1f5bf33747674585cf616b Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 3 Mar 2023 21:33:09 -0800
Subject: [PATCH 422/901] kram - add hlslparser

Thanks to The Witness team (Ignacia Castano), Roblox team (who did some fixup).

I modded and cleaned up the hlslparser from The Witness.  This takes a mix of DX9/DX10 style HLSL and converts it to DX10/11 and MSL.  This doesn't need any other code.  I probably broke the Win build with strlcpy and a few other mods, but these should be fairly easy to fix.  GLSLGenerator was removed from the project, and I disabled legacy DX9 style HLSL generation from HLSLGenerator.

The code wasn't calling PruneTree or any of the tree optimization phases.  So now can feed one HLSL file with vertex/fragment shader, and gen one metal file with both.  Same for HLSL, but DXC's HLSL/SPIRV gen seems to have regressed back to only exporting a vs or ps into each output file.

Added xcode project.  Moved many of the hardcoded strings out to ShaderMSL.h and ShaderHLSL.h files.  These translate the calls that the parser verifies into proper MSL/HLSL code.

Added a sample vertex/fragment shader.  This is a simple Skinning shader just to test out calls.   The matrices need to be flipped from v * m to m *v.   I'm requiring column matrices to match MSL/PSSL/Unity/etc.

Added buildShaders.sh which runs DXC and the metal compiler on the sources to verify that they are valid.   DXC can be installed by installed the Vulkan SDK on macOS.  For some odd reason, Microsoft does not provide binaries except for Linux/Win.   Note that Apple supplies a metal compiler for Win.  This allows conversion of HLSL to Spirv.  And spriv-cross can convert that to just about any shading language.  But spriv and spriv-cross are limited to whatever spriv can express, and it seriously mods the source code.

Added length_squared, min3, max3, and some other calls.  I want to land this code first, before I make drastic changes.
Cleaned up () on HLSL output similar to MSL.  Strip trailing zeroes.
Want to get comment preservation in, but it was causing parse failures.
Also needs compute support and a preprocessor for variants (or function/specialization constant support).
Also want to move to more DX11/MSL style shading language.
---
 hlslparser/LICENSE                            |   21 +
 hlslparser/README.md                          |   54 +
 hlslparser/ShaderHLSL.h                       |  209 +
 hlslparser/ShaderMSL.h                        |  275 ++
 hlslparser/buildShaders.sh                    |  113 +
 hlslparser/hlslparser.sln                     |   22 +
 hlslparser/hlslparser.vcxproj                 |   90 +
 hlslparser/hlslparser.vcxproj.filters         |   72 +
 .../hlslparser.xcodeproj/project.pbxproj      |  338 ++
 hlslparser/premake4.lua                       |   18 +
 hlslparser/shaders/ShaderHLSL.h               |  209 +
 hlslparser/shaders/ShaderMSL.h                |  275 ++
 hlslparser/shaders/Skinning.hlsl              |  126 +
 hlslparser/src/CodeWriter.cpp                 |  151 +
 hlslparser/src/CodeWriter.h                   |   61 +
 hlslparser/src/Engine.cpp                     |  207 +
 hlslparser/src/Engine.h                       |  195 +
 hlslparser/src/GLSLGenerator.cpp              | 1920 ++++++++
 hlslparser/src/GLSLGenerator.h                |  164 +
 hlslparser/src/HLSLGenerator.cpp              | 1328 ++++++
 hlslparser/src/HLSLGenerator.h                |  110 +
 hlslparser/src/HLSLParser.cpp                 | 3930 +++++++++++++++++
 hlslparser/src/HLSLParser.h                   |  146 +
 hlslparser/src/HLSLTokenizer.cpp              |  674 +++
 hlslparser/src/HLSLTokenizer.h                |  173 +
 hlslparser/src/HLSLTree.cpp                   | 1973 +++++++++
 hlslparser/src/HLSLTree.h                     |  998 +++++
 hlslparser/src/MSLGenerator.cpp               | 2607 +++++++++++
 hlslparser/src/MSLGenerator.h                 |  137 +
 hlslparser/src/Main.cpp                       |  319 ++
 30 files changed, 16915 insertions(+)
 create mode 100644 hlslparser/LICENSE
 create mode 100644 hlslparser/README.md
 create mode 100644 hlslparser/ShaderHLSL.h
 create mode 100644 hlslparser/ShaderMSL.h
 create mode 100755 hlslparser/buildShaders.sh
 create mode 100644 hlslparser/hlslparser.sln
 create mode 100644 hlslparser/hlslparser.vcxproj
 create mode 100644 hlslparser/hlslparser.vcxproj.filters
 create mode 100644 hlslparser/hlslparser.xcodeproj/project.pbxproj
 create mode 100644 hlslparser/premake4.lua
 create mode 100644 hlslparser/shaders/ShaderHLSL.h
 create mode 100644 hlslparser/shaders/ShaderMSL.h
 create mode 100644 hlslparser/shaders/Skinning.hlsl
 create mode 100644 hlslparser/src/CodeWriter.cpp
 create mode 100644 hlslparser/src/CodeWriter.h
 create mode 100755 hlslparser/src/Engine.cpp
 create mode 100755 hlslparser/src/Engine.h
 create mode 100644 hlslparser/src/GLSLGenerator.cpp
 create mode 100644 hlslparser/src/GLSLGenerator.h
 create mode 100644 hlslparser/src/HLSLGenerator.cpp
 create mode 100644 hlslparser/src/HLSLGenerator.h
 create mode 100644 hlslparser/src/HLSLParser.cpp
 create mode 100644 hlslparser/src/HLSLParser.h
 create mode 100644 hlslparser/src/HLSLTokenizer.cpp
 create mode 100644 hlslparser/src/HLSLTokenizer.h
 create mode 100644 hlslparser/src/HLSLTree.cpp
 create mode 100644 hlslparser/src/HLSLTree.h
 create mode 100644 hlslparser/src/MSLGenerator.cpp
 create mode 100644 hlslparser/src/MSLGenerator.h
 create mode 100644 hlslparser/src/Main.cpp

diff --git a/hlslparser/LICENSE b/hlslparser/LICENSE
new file mode 100644
index 00000000..7439620b
--- /dev/null
+++ b/hlslparser/LICENSE
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2013-2014 Unknown Worlds Entertainment, Inc.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
\ No newline at end of file
diff --git a/hlslparser/README.md b/hlslparser/README.md
new file mode 100644
index 00000000..391d2f13
--- /dev/null
+++ b/hlslparser/README.md
@@ -0,0 +1,54 @@
+HLSLParser
+==========
+
+This is a fork of [Unknownworld's hlslparser](https://github.com/unknownworlds/hlslparser) adapted to our needs in [The Witness](http://the-witness.net). We currently use it to translate pseudo-HLSL shaders (using the legacy D3D9 syntax) to HLSL10 and Metal Shading Language (MSL). There's also a GLSL translator available that we do not use yet, but that is being maintained by community contributions.
+
+The HLSL parser has been extended with many HLSL10 features, but retaining the original HLSL C-based syntax.
+
+For example, the following functions in our HLSL dialect:
+
+```C
+float tex2Dcmp(sampler2DShadow s, float3 texcoord_comparevalue);
+float4 tex2DMSfetch(sampler2DMS s, int2 texcoord, int sample);
+int2 tex2Dsize(sampler2D s);
+```
+
+Are equivalent to these methods in HLSL10:
+
+```C++
+float Texture2D::SampleCmp(SamplerComparisonState s, float2 texcoord, float comparevalue);
+float4 Texture2DMS<float4>::Load(int2 texcoord, int sample);
+void Texture2D<float4>::GetDimensions(out uint w, out uint h);
+```
+
+
+
+Here are the original release notes:
+
+
+> HLSL Parser and GLSL code generator
+>
+> This is the code we used in Natural Selection 2 to convert HLSL shader code to
+GLSL for use with OpenGL. The code is pulled from a larger codebase and has some
+dependencies which have been replaced with stubs. These dependencies are all very
+basic (array classes, memory allocators, etc.) so replacing them with our own
+equivalent should be simple if you want to use this code.
+>
+> The parser is designed to work with HLSL code written in the legacy Direct3D 9
+style (e.g. D3DCOMPILE_ENABLE_BACKWARDS_COMPATIBILITY should be used with D3D11).
+The parser works with cbuffers for uniforms, so in addition to generating GLSL,
+there is a class provided for generating D3D9-compatible HLSL which doesn't
+support cbuffers. The GLSL code requires version 3.1 for support of uniform blocks.
+The parser is designed to catch all errors and generate "clean" GLSL which can
+then be compiled without any errors.
+>
+> The HLSL parsing is done though a basic recursive descent parser coded by hand
+rather than using a parser generator. We believe makes the code easier to
+understand and work with.
+>
+> To get consistent results from Direct3D and OpenGL, our engine renders in OpenGL
+"upside down". This is automatically added into the generated GLSL vertex shaders.
+>
+> Although this code was written specifically for our use, we hope that it may be
+useful as an educational tool or a base for someone who wants to do something
+similar.
diff --git a/hlslparser/ShaderHLSL.h b/hlslparser/ShaderHLSL.h
new file mode 100644
index 00000000..eeb83560
--- /dev/null
+++ b/hlslparser/ShaderHLSL.h
@@ -0,0 +1,209 @@
+#pragma once
+
+#define USE_HALF 1
+
+// Don't know why HLSL doesn't support these
+#define min3(x,y,z) min(x, min(y, z))
+#define max3(x,y,z) max(x, max(y, z))
+#define length_squared(x) ((x)*(x))
+
+struct Texture2DSampler {
+    Texture2D t;
+    SamplerState s;
+};
+
+// TODO: this only supports half on Texture2D
+#if USE_HALF
+// unique type, even though same data
+struct Texture2DHalfSampler {
+    Texture2D t;
+    SamplerState s;
+};
+
+#endif
+
+struct Texture3DSampler {
+    Texture3D t;
+    SamplerState s;
+};
+
+struct TextureCubeSampler {
+    TextureCube t;
+    SamplerState s;
+};
+
+struct Texture2DShadowSampler {
+    Texture2D t;
+    SamplerComparisonState s;
+};
+
+struct Texture2DArraySampler {
+    Texture2DArray t;
+    SamplerState s;
+};
+
+//------------------------------
+
+// https://colinbarrebrisebois.com/2021/11/01/working-around-constructors-in-hlsl-or-lack-thereof/
+// Stupid HLSL lacks ctors.  Ugh!
+// Can't simplify to "return { t, a };" either.
+
+Texture2DSampler Texture2DSamplerCtor(Texture2D t, SamplerState s)
+{
+    Texture2DSampler a = { t, s };
+    return a;
+}
+
+#if USE_HALF
+
+Texture2DHalfSampler Texture2DHalfSamplerCtor(Texture2D t, SamplerState s)
+{
+    Texture2DHalfSampler a = { t, s };
+    return a;
+}
+#endif
+
+Texture3DSampler Texture3DSamplerCtor(Texture3D t, SamplerState s)
+{
+    Texture3DSampler a = { t, s };
+    return a;
+}
+
+TextureCubeSampler TextureCubeSamplerCtor(TextureCube t, SamplerState s)
+{
+    TextureCubeSampler a = { t, s };
+    return a;
+}
+
+Texture2DShadowSampler Texture2DShadowSamplerCtor(Texture2D t, SamplerComparisonState s)
+{
+    Texture2DShadowSampler a = { t, s };
+    return a;
+}
+
+Texture2DArraySampler Texture2DArraySamplerCtor(Texture2DArray t, SamplerState s)
+{
+    Texture2DArraySampler a = { t, s };
+    return a;
+}
+
+//------------------------------
+
+float4 tex2D(Texture2DSampler ts, float2 texCoord) {
+    return ts.t.Sample(ts.s, texCoord);
+}
+
+// gather only works on mip0
+float4 tex2DgatherRed(Texture2DSampler ts, float2 texCoord, int2 offset=0) {
+    return ts.t.GatherRed(ts.s, texCoord, offset);
+}
+
+float4 tex2DgatherGreen(Texture2DSampler ts, float2 texCoord, int2 offset=0) {
+    return ts.t.GatherGreen(ts.s, texCoord, offset);
+}
+
+float4 tex2DgatherBlue(Texture2DSampler ts, float2 texCoord, int2 offset=0) {
+    return ts.t.GatherBlue(ts.s, texCoord, offset);
+}
+
+float4 tex2DgatherAlpha(Texture2DSampler ts, float2 texCoord, int2 offset=0) {
+    return ts.t.GatherAlpha(ts.s, texCoord, offset);
+}
+
+int2 tex2Dsize(Texture2DSampler ts) {
+    int2 size;
+    ts.t.GetDimensions(size.x, size.y);
+    return size;
+}
+
+int3 tex3Dsize(Texture3DSampler ts) {
+    int3 size;
+    ts.t.GetDimensions(size.x, size.y, size.z);
+    return size;
+}
+
+int texCUBEsize(TextureCubeSampler ts) {
+    int size;
+    ts.t.GetDimensions(size, size); // sizexsize
+    return size;
+}
+
+// don't use for PCF
+float4 tex2Dproj(Texture2DSampler ts, float4 texCoord) {
+    return ts.t.Sample(ts.s, texCoord.xy / texCoord.w);
+}
+
+float4 tex2Dlod(Texture2DSampler ts, float4 texCoord, int2 offset = 0) {
+    return ts.t.SampleLevel(ts.s, texCoord.xy, texCoord.w, offset);
+}
+
+float4 tex2Dbias(Texture2DSampler ts, float4 texCoord) {
+    return ts.t.SampleBias(ts.s, texCoord.xy, texCoord.w);
+}
+
+float4 tex2Dgrad(Texture2DSampler ts, float2 texCoord, float2 gradx, float2 grady) {
+   return ts.t.SampleGrad(ts.s, texCoord.xy, gradx, grady);
+}
+
+// This only applies to mip0
+float4 tex2Dcmp(Texture2DShadowSampler ts, float4 texCoord) {
+    return ts.t.SampleCmpLevelZero(ts.s, texCoord.xy, texCoord.z);
+}
+
+// This is int3 returning int2 in HLSL
+//float4 tex2Dfetch(Texture2DSampler ts, int2 texCoord) {
+//    return ts.t.Load(texCoord);
+//}
+
+float4 tex2DMSfetch(Texture2DMS<float4> t, int2 texCoord, int sample) {
+    return t.Load(texCoord, sample);
+}
+
+#if USE_HALF
+
+half4 tex2DH(Texture2DHalfSampler ts, float2 texCoord) {
+    return (half4)ts.t.Sample(ts.s, texCoord);
+}
+
+half4 tex2DHlod(Texture2DHalfSampler ts, float4 texCoordMip) {
+    return (half4)ts.t.Sample(ts.s, texCoordMip.xy, texCoordMip.w);
+}
+
+half4 tex2DHbias(Texture2DHalfSampler ts, float4 texCoordBias) {
+    return (half4)ts.t.SampleBias(ts.s, texCoordBias.xy, texCoordBias.w);
+}
+
+#else
+
+#define tex2DH tex2D
+#define tex2DHlod tex2Dlod
+#define tex2DHbias tex2Dbias
+
+#endif
+
+float4 tex3D(Texture3DSampler ts, float3 texCoord) {
+    return ts.t.Sample(ts.s, texCoord);
+}
+
+float4 tex3Dlod(Texture3DSampler ts, float4 texCoord) {
+    return ts.t.SampleLevel(ts.s, texCoord.xyz, texCoord.w);
+}
+
+float4 texCUBE(TextureCubeSampler ts, float3 texCoord) {
+    return ts.t.Sample(ts.s, texCoord);
+}
+
+float4 texCUBElod(TextureCubeSampler ts, float4 texCoord) {
+    return ts.t.SampleLevel(ts.s, texCoord.xyz, texCoord.w);
+}
+
+float4 texCUBEbias(TextureCubeSampler ts, float4 texCoord) {
+    return ts.t.SampleBias(ts.s, texCoord.xyz, texCoord.w);
+}
+
+// TODO: fix this
+//float4 tex2DArray(Texture2DArraySampler ts, float3 texCoord) {
+//    return ts.t.Sample(ts.s, texCoord.xy, texCoord.z + 0.5); // 0.5 offset needed on nvidia gpus
+//}
+
+    
diff --git a/hlslparser/ShaderMSL.h b/hlslparser/ShaderMSL.h
new file mode 100644
index 00000000..ea3daa9f
--- /dev/null
+++ b/hlslparser/ShaderMSL.h
@@ -0,0 +1,275 @@
+#pragma once
+
+// TODO: support function_constants in MSL, is there HLSL equivalent yet
+// [[function_constant(index)]]
+//
+// Here's someone trying to solve for HLSL by modding DXIL from DXC
+// https://twitter.com/RandomPedroJ/status/1532725156623286272
+// Vulkan doesn't run on older Intel, but DX12 does.
+
+// Header can be pulled into regular code to build
+// Taken from KramShaders.h
+
+#ifndef __METAL_VERSION__
+#import <Foundation/Foundation.h>
+#else
+#include <metal_stdlib>
+#endif
+
+#import <simd/simd.h>
+
+#ifdef __METAL_VERSION__
+#define NS_ENUM(_type, _name) \
+    enum _name : _type _name; \
+    enum _name : _type
+#endif
+
+typedef NS_ENUM(int32_t, VA) {
+    VAPosition = 0,
+    VATexcoord = 1,
+    
+    VANormal = 2,
+    VATangent = 3,
+    
+    VABlendIndices = 4,
+    VABlendWeight = 5,
+    
+    VAColor0 = 6,
+    
+    VATexcoord0 = 8,
+    VATexcoord1 = 9,
+    VATexcoord2 = 10,
+    VATexcoord3 = 11,
+};
+
+// May want to only do using in the .metal files themselvs.
+using namespace metal;
+using namespace simd;
+
+// can safely use half on Metal
+#define USE_HALF 1
+
+// #define mad precise::fma"
+    
+float mad(float a, float b, float c) {
+    return a * b + c;
+}
+float2 mad(float2 a, float2 b, float2 c) {
+    return a * b + c;
+}
+float3 mad(float3 a, float3 b, float3 c) {
+    return a * b + c;
+}
+float4 mad(float4 a, float4 b, float4 c) {
+    return a * b + c;
+}
+
+float2 mul(float2 a, float2x2 m) { return a * m; }
+float3 mul(float3 a, float3x3 m) { return a * m; }
+float4 mul(float4 a, float4x4 m) { return a * m; }
+
+float2 mul(float2x2 m, float2 a) { return m * a; }
+float3 mul(float3x3 m, float3 a) { return m * a; }
+float4 mul(float4x4 m, float4 a) { return m * a; }
+
+float3 mul(float4 a, float3x4 m) { return a * m; } // why no macro ?
+float2 mul(float4 a, float2x4 m) { return a * m; }
+
+#if USE_HALF
+half mad(half a, half b, half c) {
+    return a * b + c;
+}
+half2 mad(half2 a, half2 b, half2 c) {
+    return a * b + c;
+}
+half3 mad(half3 a, half3 b, half3 c) {
+    return a * b + c;
+}
+half4 mad(half4 a, half4 b, half4 c) {
+    return a * b + c;
+}
+
+half2 mul(half2 a, half2x2 m) { return a * m; }
+half3 mul(half3 a, half3x3 m) { return a * m; }
+half4 mul(half4 a, half4x4 m) { return a * m; }
+
+half2 mul(half2x2 m, half2 a) { return m * a; }
+half3 mul(half3x3 m, half3 a) { return m * a; }
+half4 mul(half4x4 m, half4 a) { return m * a; }
+
+half3 mul(half4 a, half3x4 m) { return a * m; } // why no macro ?
+half2 mul(half4 a, half2x4 m) { return a * m; }
+#endif
+
+#define lerp mix
+#define rcp recip
+#define ddx dfdx
+#define ddy dfdy
+#define frac fract
+#define isinfinite isinf
+
+void clip(float x) {
+    if (x < 0.0) discard_fragment();
+}
+
+struct Texture2DSampler {
+    Texture2DSampler(thread const texture2d<float>& t, thread const sampler& s) : t(t), s(s) {};
+    const thread texture2d<float>& t;
+    const thread sampler& s;
+};
+
+#if USE_HALF
+struct Texture2DHalfSampler {
+    Texture2DHalfSampler(thread const texture2d<half>& t, thread const sampler& s) : t(t), s(s) {};
+    const thread texture2d<half>& t;
+    const thread sampler& s;
+};
+#endif
+
+struct Texture3DSampler {
+    Texture3DSampler(thread const texture3d<float>& t, thread const sampler& s) : t(t), s(s) {};
+    const thread texture3d<float>& t;
+    const thread sampler& s;
+};
+
+struct TextureCubeSampler {
+    TextureCubeSampler(thread const texturecube<float>& t, thread const sampler& s) : t(t), s(s) {};
+    const thread texturecube<float>& t;
+    const thread sampler& s;
+};
+
+struct Texture2DShadowSampler {
+    Texture2DShadowSampler(thread const depth2d<float>& t, thread const sampler& s) : t(t), s(s) {};
+    const thread depth2d<float>& t;
+    const thread sampler& s;
+};
+
+struct Texture2DArraySampler {
+    const thread texture2d_array<float>& t;
+    const thread sampler& s;
+    Texture2DArraySampler(thread const texture2d_array<float>& t, thread const sampler& s) : t(t), s(s) {};
+};
+    
+int2 tex2Dsize(Texture2DSampler ts) {
+    int2 size(ts.t.get_width(), ts.t.get_height());
+    return size;
+}
+
+
+int3 tex3Dsize(Texture3DSampler ts) {
+    int3 size(ts.t.get_width(), ts.t.get_height(), ts.t.get_depth());
+    return size;
+}
+
+int texCUBEsize(TextureCubeSampler ts) {
+    int size(ts.t.get_width());
+    return size;
+}
+    
+    
+float4 tex2D(Texture2DSampler ts, float2 texCoord) {
+    return ts.t.sample(ts.s, texCoord);
+}
+
+// don't use for PCF
+float4 tex2Dproj(Texture2DSampler ts, float4 texCoord) {
+    return ts.t.sample(ts.s, texCoord.xy / texCoord.w);
+}
+
+// gather only works on mip0
+float4 tex2DgatherRed(Texture2DSampler ts, float2 texCoord, int2 offset=0) {
+    return ts.t.gather(ts.s, texCoord, offset, component::x); // TODO: int to component
+}
+  
+float4 tex2DgatherGreen(Texture2DSampler ts, float2 texCoord,  int2 offset=0) {
+    return ts.t.gather(ts.s, texCoord, offset, component::y); // TODO: int to component
+}
+
+float4 tex2DgatherBlue(Texture2DSampler ts, float2 texCoord,  int2 offset=0) {
+    return ts.t.gather(ts.s, texCoord, offset, component::z); // TODO: int to component
+}
+
+float4 tex2DgatherAlpha(Texture2DSampler ts, float2 texCoord, int2 offset=0) {
+    return ts.t.gather(ts.s, texCoord, offset, component::w); // TODO: int to component
+}
+
+float4 tex2Dlod(Texture2DSampler ts, float4 texCoordMip) {
+    return ts.t.sample(ts.s, texCoordMip.xy, level(texCoordMip.w));
+}
+
+float4 tex2Dgrad(Texture2DSampler ts, float2 texCoord, float2 gradx, float2 grady) {
+   return ts.t.sample(ts.s, texCoord.xy, gradient2d(gradx, grady));
+}
+
+float4 tex2Dbias(Texture2DSampler ts, float4 texCoordBias) {
+    return ts.t.sample(ts.s, texCoordBias.xy, bias(texCoordBias.w));
+}
+
+float4 tex2Dfetch(Texture2DSampler ts, int2 texCoord) {
+    return ts.t.read((uint2)texCoord);
+}
+
+#if USE_HALF
+
+// use samper2D<half> to specify these
+
+half4 tex2DH(Texture2DHalfSampler ts, float2 texCoord) {
+    return ts.t.sample(ts.s, texCoord);
+}
+
+half4 tex2DHlod(Texture2DHalfSampler ts, float4 texCoordMip) {
+    return ts.t.sample(ts.s, texCoordMip.xy, level(texCoordMip.w));
+}
+
+half4 tex2DHbias(Texture2DHalfSampler ts, float4 texCoordBias) {
+    return ts.t.sample(ts.s, texCoordBias.xy, bias(texCoordBias.w));
+}
+
+#else
+
+#define tex2DH tex2D
+#define tex2DHlod tex2Dlod
+#define tex2DHbias tex2Dbias
+
+#endif
+
+float4 tex3D(Texture3DSampler ts, float3 texCoord) {
+    return ts.t.sample(ts.s, texCoord);
+}
+
+
+float4 tex3Dlod(Texture3DSampler ts, float4 texCoordMip) {
+    return ts.t.sample(ts.s, texCoordMip.xyz, level(texCoordMip.w));
+}
+
+float4 texCUBE(TextureCubeSampler ts, float3 texCoord) {
+    return ts.t.sample(ts.s, texCoord);
+}
+
+float4 texCUBElod(TextureCubeSampler ts, float4 texCoordMip) {
+    return ts.t.sample(ts.s, texCoordMip.xyz, level(texCoordMip.w));
+}
+
+float4 texCUBEbias(TextureCubeSampler ts, float4 texCoordBias) {
+    return ts.t.sample(ts.s, texCoordBias.xyz, bias(texCoordBias.w));
+}
+    
+// iOS may need shadow sampler inline
+//    float4 tex2Dcmp(Texture2DShadowSampler ts, float4 texCoordCompare) {
+//        constexpr sampler shadow_constant_sampler(mip_filter::none, min_filter::linear, mag_filter::linear, address::clamp_to_edge, compare_func::less);"
+//        return ts.t.sample_compare(shadow_constant_sampler, texCoordCompare.xy, texCoordCompare.z);
+//    }
+
+
+float4 tex2Dcmp(Texture2DShadowSampler ts, float4 texCoordCompare) {
+    return ts.t.sample_compare(ts.s, texCoordCompare.xy, texCoordCompare.z);
+}
+
+float4 tex2DMSfetch(texture2d_ms<float> t, int2 texCoord, int sample) {
+    return t.read((uint2)texCoord, (uint)sample);
+}
+
+float4 tex2DArray(Texture2DArraySampler ts, float3 texCoord) {
+    return ts.t.sample(ts.s, texCoord.xy, texCoord.z + 0.5); // 0.5 offset needed on nvidia gpus
+}
+        
diff --git a/hlslparser/buildShaders.sh b/hlslparser/buildShaders.sh
new file mode 100755
index 00000000..596f7bc0
--- /dev/null
+++ b/hlslparser/buildShaders.sh
@@ -0,0 +1,113 @@
+#!/bin/bash
+
+
+pushd out
+
+# display commands
+set -x
+
+app=../build/hlslparser/Build/Products/Release/hlslparser
+
+# copy over the headers that translate to MSL/HLSL
+cp ../shaders/ShaderMSL.h .
+cp ../shaders/ShaderHLSL.h .
+
+
+# build the metal shaders
+echo gen MSL
+${app} -metal ../shaders/Skinning.hlsl
+
+# build the hlsl shaders
+echo gen HLSL
+${app} -hlsl ../shaders/Skinning.hlsl
+
+# see if HLSL compiles (requires macOS Vulkan install)
+# this will pull /usr/bin/local/dxc
+# looks like DXC wants a ps/vs/cs profile, so is expecting one shader per file
+
+
+# see if MSL compile
+echo compile MSL
+xcrun -sdk macosx metal Skinning.metal -o Skinning.metallib
+
+args="-nologo "
+
+# debug
+# args+="-Zi "
+
+# column matrices
+args+="-Zpc "
+
+# enable half instead of min16float
+# can't use for input/output on Adreno or Nvidia
+# can't use in push constants on AMD
+# also watch interpolation if using for input/output
+args+="-enable-16bit-types "
+
+# have to also compile to 6.2
+vsargs=${args}
+vsargs+="-T vs_6_2 "
+
+psargs=${args}
+psargs+="-T ps_6_2 "
+
+#echo ${vsargs}
+#echo ${psargs}
+
+# more HLSL garbage decoded
+# https://therealmjp.github.io/posts/shader-fp16/
+# https://therealmjp.github.io/posts/dxil-linking/
+
+# first gen dxil to see if HLSL is valid
+# see this garbage here.  Can only sign dxil on Windows.
+# dxc only loads DXIL.dll on Windows
+#  https://www.wihlidal.com/blog/pipeline/2018-09-16-dxil-signing-post-compile/
+echo compile HLSL
+dxc ${vsargs} -E SkinningVS -Fo SkinningVS.dxil Skinning.hlsl
+dxc ${psargs} -E SkinningPS -Fo SkinningPS.dxil Skinning.hlsl
+
+#echo gen SPIRV 1.0
+#dxc ${vsargs} -spirv -fspv-target-env=vulkan1.0 -E SkinningVS -Fo SkinningVS.vert.spv Skinning.hlsl
+#dxc ${psargs} -spirv -fspv-target-env=vulkan1.0 -E SkinningPS -Fo SkinningPS.frag.spv Skinning.hlsl
+
+#echo gen SPIRV 1.1
+#dxc ${vsargs} -spirv -fspv-target-env=vulkan1.1 -E SkinningVS -Fo SkinningVS.vert.spv1 Skinning.hlsl
+#dxc ${psargs} -spirv -fspv-target-env=vulkan1.1 -E SkinningPS -Fo SkinningPS.frag.spv1 Skinning.hlsl
+
+echo gen SPIRV 1.2
+dxc ${vsargs} -spirv -fspv-target-env=vulkan1.2 -E SkinningVS -Fo SkinningVS.vert.spv2 Skinning.hlsl
+dxc ${psargs} -spirv -fspv-target-env=vulkan1.2 -E SkinningPS -Fo SkinningPS.frag.spv2 Skinning.hlsl
+
+# TODO: need to group files into library/module
+# also create a readable spv file, so can look through that
+
+
+# TODO: create reflect data w/o needing spriv
+
+# here are flags to use
+
+# add reflect data
+# -fspv-reflect
+
+# Negate SV_Position.y before writing to stage output in VS/DS/GS to accommodate Vulkan's coordinate system
+# -fvk-invert-y
+
+# Reciprocate SV_Position.w after reading from stage input in PS to accommodate the difference between Vulkan and DirectX
+# -fvk-use-dx-position-w
+
+# layout
+# -fvk-use-gl-layout      Use strict OpenGL std140/std430 memory layout for Vulkan resources
+# -fvk-use-scalar-layout  Use scalar memory layout for Vulkan resources
+ 
+# -Zpc                    Pack matrices in column-major order
+# -Zpr                    Pack matrices in row-major order
+  
+# -WX                     Treat warnings as errors
+# -Zi                     Enable debug information
+  
+# TODO: transpile with spriv-cross to WGSL, GLSL, etc off the spirv.
+
+# -enable-16bit-types     Enable 16bit types and disable min precision types. Available in HLSL 2018 and shader model 6.2
+# -Fc <file>              Output assembly code listing file
+
+popd
diff --git a/hlslparser/hlslparser.sln b/hlslparser/hlslparser.sln
new file mode 100644
index 00000000..d3abff53
--- /dev/null
+++ b/hlslparser/hlslparser.sln
@@ -0,0 +1,22 @@
+﻿
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 2013
+VisualStudioVersion = 12.0.40629.0
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "hlslparser", "hlslparser.vcxproj", "{FAA5AD82-3351-479F-A315-F287EBD0A816}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|Win32 = Debug|Win32
+		Release|Win32 = Release|Win32
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{FAA5AD82-3351-479F-A315-F287EBD0A816}.Debug|Win32.ActiveCfg = Debug|Win32
+		{FAA5AD82-3351-479F-A315-F287EBD0A816}.Debug|Win32.Build.0 = Debug|Win32
+		{FAA5AD82-3351-479F-A315-F287EBD0A816}.Release|Win32.ActiveCfg = Release|Win32
+		{FAA5AD82-3351-479F-A315-F287EBD0A816}.Release|Win32.Build.0 = Release|Win32
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
diff --git a/hlslparser/hlslparser.vcxproj b/hlslparser/hlslparser.vcxproj
new file mode 100644
index 00000000..a4f9f227
--- /dev/null
+++ b/hlslparser/hlslparser.vcxproj
@@ -0,0 +1,90 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{FAA5AD82-3351-479F-A315-F287EBD0A816}</ProjectGuid>
+    <RootNamespace>hlslparser</RootNamespace>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v120</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v120</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup />
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="src\CodeWriter.cpp" />
+    <ClCompile Include="src\Engine.cpp" />
+    <ClCompile Include="src\GLSLGenerator.cpp" />
+    <ClCompile Include="src\HLSLGenerator.cpp" />
+    <ClCompile Include="src\HLSLParser.cpp" />
+    <ClCompile Include="src\HLSLTokenizer.cpp" />
+    <ClCompile Include="src\HLSLTree.cpp" />
+    <ClCompile Include="src\Main.cpp" />
+    <ClCompile Include="src\MSLGenerator.cpp" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="src\CodeWriter.h" />
+    <ClInclude Include="src\Engine.h" />
+    <ClInclude Include="src\GLSLGenerator.h" />
+    <ClInclude Include="src\HLSLGenerator.h" />
+    <ClInclude Include="src\HLSLParser.h" />
+    <ClInclude Include="src\HLSLTokenizer.h" />
+    <ClInclude Include="src\HLSLTree.h" />
+    <ClInclude Include="src\MSLGenerator.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/hlslparser/hlslparser.vcxproj.filters b/hlslparser/hlslparser.vcxproj.filters
new file mode 100644
index 00000000..79268e9a
--- /dev/null
+++ b/hlslparser/hlslparser.vcxproj.filters
@@ -0,0 +1,72 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="src\CodeWriter.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="src\GLSLGenerator.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="src\HLSLGenerator.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="src\HLSLParser.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="src\HLSLTokenizer.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="src\HLSLTree.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="src\Main.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="src\Engine.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="src\MSLGenerator.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="src\CodeWriter.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="src\GLSLGenerator.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="src\HLSLGenerator.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="src\HLSLParser.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="src\HLSLTokenizer.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="src\HLSLTree.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="src\Engine.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="src\MSLGenerator.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/hlslparser/hlslparser.xcodeproj/project.pbxproj b/hlslparser/hlslparser.xcodeproj/project.pbxproj
new file mode 100644
index 00000000..f5cca7a8
--- /dev/null
+++ b/hlslparser/hlslparser.xcodeproj/project.pbxproj
@@ -0,0 +1,338 @@
+// !$*UTF8*$!
+{
+	archiveVersion = 1;
+	classes = {
+	};
+	objectVersion = 56;
+	objects = {
+
+/* Begin PBXBuildFile section */
+		702A2B7229A49DDE007D9A99 /* Main.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 702A2B6329A49DDE007D9A99 /* Main.cpp */; };
+		702A2B7329A49DDE007D9A99 /* MSLGenerator.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 702A2B6629A49DDE007D9A99 /* MSLGenerator.cpp */; };
+		702A2B7429A49DDE007D9A99 /* HLSLTokenizer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 702A2B6829A49DDE007D9A99 /* HLSLTokenizer.cpp */; };
+		702A2B7529A49DDE007D9A99 /* HLSLTree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 702A2B6B29A49DDE007D9A99 /* HLSLTree.cpp */; };
+		702A2B7629A49DDE007D9A99 /* CodeWriter.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 702A2B6C29A49DDE007D9A99 /* CodeWriter.cpp */; };
+		702A2B7729A49DDE007D9A99 /* HLSLGenerator.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 702A2B6E29A49DDE007D9A99 /* HLSLGenerator.cpp */; };
+		702A2B7829A49DDE007D9A99 /* HLSLParser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 702A2B6F29A49DDE007D9A99 /* HLSLParser.cpp */; };
+		702A2B7929A49DDE007D9A99 /* Engine.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 702A2B7129A49DDE007D9A99 /* Engine.cpp */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXCopyFilesBuildPhase section */
+		702A2B5729A49DC8007D9A99 /* CopyFiles */ = {
+			isa = PBXCopyFilesBuildPhase;
+			buildActionMask = 2147483647;
+			dstPath = /usr/share/man/man1/;
+			dstSubfolderSpec = 0;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 1;
+		};
+/* End PBXCopyFilesBuildPhase section */
+
+/* Begin PBXFileReference section */
+		70235C3729B303AA00909C95 /* buildShaders.sh */ = {isa = PBXFileReference; lastKnownFileType = text.script.sh; path = buildShaders.sh; sourceTree = "<group>"; };
+		70235C3829B3041E00909C95 /* out */ = {isa = PBXFileReference; lastKnownFileType = folder; path = out; sourceTree = "<group>"; };
+		702A2B5929A49DC8007D9A99 /* hlslparser */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = hlslparser; sourceTree = BUILT_PRODUCTS_DIR; };
+		702A2B6329A49DDE007D9A99 /* Main.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Main.cpp; path = ../src/Main.cpp; sourceTree = "<group>"; };
+		702A2B6429A49DDE007D9A99 /* HLSLTokenizer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = HLSLTokenizer.h; path = ../src/HLSLTokenizer.h; sourceTree = "<group>"; };
+		702A2B6529A49DDE007D9A99 /* HLSLTree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = HLSLTree.h; path = ../src/HLSLTree.h; sourceTree = "<group>"; };
+		702A2B6629A49DDE007D9A99 /* MSLGenerator.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = MSLGenerator.cpp; path = ../src/MSLGenerator.cpp; sourceTree = "<group>"; };
+		702A2B6729A49DDE007D9A99 /* CodeWriter.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = CodeWriter.h; path = ../src/CodeWriter.h; sourceTree = "<group>"; };
+		702A2B6829A49DDE007D9A99 /* HLSLTokenizer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = HLSLTokenizer.cpp; path = ../src/HLSLTokenizer.cpp; sourceTree = "<group>"; };
+		702A2B6929A49DDE007D9A99 /* Engine.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Engine.h; path = ../src/Engine.h; sourceTree = "<group>"; };
+		702A2B6A29A49DDE007D9A99 /* HLSLGenerator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = HLSLGenerator.h; path = ../src/HLSLGenerator.h; sourceTree = "<group>"; };
+		702A2B6B29A49DDE007D9A99 /* HLSLTree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = HLSLTree.cpp; path = ../src/HLSLTree.cpp; sourceTree = "<group>"; };
+		702A2B6C29A49DDE007D9A99 /* CodeWriter.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = CodeWriter.cpp; path = ../src/CodeWriter.cpp; sourceTree = "<group>"; };
+		702A2B6D29A49DDE007D9A99 /* MSLGenerator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = MSLGenerator.h; path = ../src/MSLGenerator.h; sourceTree = "<group>"; };
+		702A2B6E29A49DDE007D9A99 /* HLSLGenerator.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = HLSLGenerator.cpp; path = ../src/HLSLGenerator.cpp; sourceTree = "<group>"; };
+		702A2B6F29A49DDE007D9A99 /* HLSLParser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = HLSLParser.cpp; path = ../src/HLSLParser.cpp; sourceTree = "<group>"; };
+		702A2B7029A49DDE007D9A99 /* HLSLParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = HLSLParser.h; path = ../src/HLSLParser.h; sourceTree = "<group>"; };
+		702A2B7129A49DDE007D9A99 /* Engine.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Engine.cpp; path = ../src/Engine.cpp; sourceTree = "<group>"; };
+		702A2B7E29AA942D007D9A99 /* Skinning.hlsl */ = {isa = PBXFileReference; lastKnownFileType = text; path = Skinning.hlsl; sourceTree = "<group>"; };
+		702A2B8329AC25BA007D9A99 /* ShaderMSL.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = ShaderMSL.h; sourceTree = "<group>"; };
+		702A2B8429AC4B23007D9A99 /* ShaderHLSL.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = ShaderHLSL.h; sourceTree = "<group>"; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+		702A2B5629A49DC8007D9A99 /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+		702A2B5029A49DC8007D9A99 = {
+			isa = PBXGroup;
+			children = (
+				70235C3729B303AA00909C95 /* buildShaders.sh */,
+				70235C3829B3041E00909C95 /* out */,
+				702A2B7B29AA942D007D9A99 /* shaders */,
+				702A2B5B29A49DC8007D9A99 /* hlslparser */,
+				702A2B5A29A49DC8007D9A99 /* Products */,
+			);
+			sourceTree = "<group>";
+		};
+		702A2B5A29A49DC8007D9A99 /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				702A2B5929A49DC8007D9A99 /* hlslparser */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
+		702A2B5B29A49DC8007D9A99 /* hlslparser */ = {
+			isa = PBXGroup;
+			children = (
+				702A2B6C29A49DDE007D9A99 /* CodeWriter.cpp */,
+				702A2B6729A49DDE007D9A99 /* CodeWriter.h */,
+				702A2B7129A49DDE007D9A99 /* Engine.cpp */,
+				702A2B6929A49DDE007D9A99 /* Engine.h */,
+				702A2B6E29A49DDE007D9A99 /* HLSLGenerator.cpp */,
+				702A2B6A29A49DDE007D9A99 /* HLSLGenerator.h */,
+				702A2B6F29A49DDE007D9A99 /* HLSLParser.cpp */,
+				702A2B7029A49DDE007D9A99 /* HLSLParser.h */,
+				702A2B6829A49DDE007D9A99 /* HLSLTokenizer.cpp */,
+				702A2B6429A49DDE007D9A99 /* HLSLTokenizer.h */,
+				702A2B6B29A49DDE007D9A99 /* HLSLTree.cpp */,
+				702A2B6529A49DDE007D9A99 /* HLSLTree.h */,
+				702A2B6329A49DDE007D9A99 /* Main.cpp */,
+				702A2B6629A49DDE007D9A99 /* MSLGenerator.cpp */,
+				702A2B6D29A49DDE007D9A99 /* MSLGenerator.h */,
+			);
+			path = hlslparser;
+			sourceTree = "<group>";
+		};
+		702A2B7B29AA942D007D9A99 /* shaders */ = {
+			isa = PBXGroup;
+			children = (
+				702A2B8329AC25BA007D9A99 /* ShaderMSL.h */,
+				702A2B8429AC4B23007D9A99 /* ShaderHLSL.h */,
+				702A2B7E29AA942D007D9A99 /* Skinning.hlsl */,
+			);
+			path = shaders;
+			sourceTree = "<group>";
+		};
+/* End PBXGroup section */
+
+/* Begin PBXNativeTarget section */
+		702A2B5829A49DC8007D9A99 /* hlslparser */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 702A2B6029A49DC8007D9A99 /* Build configuration list for PBXNativeTarget "hlslparser" */;
+			buildPhases = (
+				702A2B5529A49DC8007D9A99 /* Sources */,
+				702A2B5629A49DC8007D9A99 /* Frameworks */,
+				702A2B5729A49DC8007D9A99 /* CopyFiles */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = hlslparser;
+			productName = hlslparser;
+			productReference = 702A2B5929A49DC8007D9A99 /* hlslparser */;
+			productType = "com.apple.product-type.tool";
+		};
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+		702A2B5129A49DC8007D9A99 /* Project object */ = {
+			isa = PBXProject;
+			attributes = {
+				BuildIndependentTargetsInParallel = 1;
+				LastUpgradeCheck = 1410;
+				TargetAttributes = {
+					702A2B5829A49DC8007D9A99 = {
+						CreatedOnToolsVersion = 14.1;
+					};
+				};
+			};
+			buildConfigurationList = 702A2B5429A49DC8007D9A99 /* Build configuration list for PBXProject "hlslparser" */;
+			compatibilityVersion = "Xcode 14.0";
+			developmentRegion = en;
+			hasScannedForEncodings = 0;
+			knownRegions = (
+				en,
+				Base,
+			);
+			mainGroup = 702A2B5029A49DC8007D9A99;
+			productRefGroup = 702A2B5A29A49DC8007D9A99 /* Products */;
+			projectDirPath = "";
+			projectRoot = "";
+			targets = (
+				702A2B5829A49DC8007D9A99 /* hlslparser */,
+			);
+		};
+/* End PBXProject section */
+
+/* Begin PBXSourcesBuildPhase section */
+		702A2B5529A49DC8007D9A99 /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				702A2B7429A49DDE007D9A99 /* HLSLTokenizer.cpp in Sources */,
+				702A2B7629A49DDE007D9A99 /* CodeWriter.cpp in Sources */,
+				702A2B7229A49DDE007D9A99 /* Main.cpp in Sources */,
+				702A2B7529A49DDE007D9A99 /* HLSLTree.cpp in Sources */,
+				702A2B7729A49DDE007D9A99 /* HLSLGenerator.cpp in Sources */,
+				702A2B7329A49DDE007D9A99 /* MSLGenerator.cpp in Sources */,
+				702A2B7929A49DDE007D9A99 /* Engine.cpp in Sources */,
+				702A2B7829A49DDE007D9A99 /* HLSLParser.cpp in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXSourcesBuildPhase section */
+
+/* Begin XCBuildConfiguration section */
+		702A2B5E29A49DC8007D9A99 /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_ANALYZER_NONNULL = YES;
+				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
+				CLANG_ENABLE_MODULES = YES;
+				CLANG_ENABLE_OBJC_ARC = YES;
+				CLANG_ENABLE_OBJC_WEAK = YES;
+				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_COMMA = YES;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INFINITE_RECURSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
+				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+				CLANG_WARN_STRICT_PROTOTYPES = YES;
+				CLANG_WARN_SUSPICIOUS_MOVE = YES;
+				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
+				CLANG_WARN_UNREACHABLE_CODE = YES;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				COPY_PHASE_STRIP = NO;
+				DEBUG_INFORMATION_FORMAT = dwarf;
+				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				ENABLE_TESTABILITY = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu11;
+				GCC_DYNAMIC_NO_PIC = NO;
+				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_OPTIMIZATION_LEVEL = 0;
+				GCC_PREPROCESSOR_DEFINITIONS = (
+					"DEBUG=1",
+					"$(inherited)",
+				);
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				MACOSX_DEPLOYMENT_TARGET = 11.0;
+				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
+				MTL_FAST_MATH = YES;
+				ONLY_ACTIVE_ARCH = YES;
+				SDKROOT = macosx;
+			};
+			name = Debug;
+		};
+		702A2B5F29A49DC8007D9A99 /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_ANALYZER_NONNULL = YES;
+				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
+				CLANG_ENABLE_MODULES = YES;
+				CLANG_ENABLE_OBJC_ARC = YES;
+				CLANG_ENABLE_OBJC_WEAK = YES;
+				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_COMMA = YES;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INFINITE_RECURSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
+				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+				CLANG_WARN_STRICT_PROTOTYPES = YES;
+				CLANG_WARN_SUSPICIOUS_MOVE = YES;
+				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
+				CLANG_WARN_UNREACHABLE_CODE = YES;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				COPY_PHASE_STRIP = NO;
+				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+				ENABLE_NS_ASSERTIONS = NO;
+				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu11;
+				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				MACOSX_DEPLOYMENT_TARGET = 11.0;
+				MTL_ENABLE_DEBUG_INFO = NO;
+				MTL_FAST_MATH = YES;
+				SDKROOT = macosx;
+			};
+			name = Release;
+		};
+		702A2B6129A49DC8007D9A99 /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				CODE_SIGN_STYLE = Automatic;
+				PRODUCT_NAME = "$(TARGET_NAME)";
+			};
+			name = Debug;
+		};
+		702A2B6229A49DC8007D9A99 /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				CODE_SIGN_STYLE = Automatic;
+				PRODUCT_NAME = "$(TARGET_NAME)";
+			};
+			name = Release;
+		};
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+		702A2B5429A49DC8007D9A99 /* Build configuration list for PBXProject "hlslparser" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				702A2B5E29A49DC8007D9A99 /* Debug */,
+				702A2B5F29A49DC8007D9A99 /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		702A2B6029A49DC8007D9A99 /* Build configuration list for PBXNativeTarget "hlslparser" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				702A2B6129A49DC8007D9A99 /* Debug */,
+				702A2B6229A49DC8007D9A99 /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+/* End XCConfigurationList section */
+	};
+	rootObject = 702A2B5129A49DC8007D9A99 /* Project object */;
+}
diff --git a/hlslparser/premake4.lua b/hlslparser/premake4.lua
new file mode 100644
index 00000000..b51682c9
--- /dev/null
+++ b/hlslparser/premake4.lua
@@ -0,0 +1,18 @@
+solution "HLSLParser"
+    location "build"
+    configurations { "Debug", "Release" }
+ 
+    project "HLSLParser"
+        kind "ConsoleApp"
+        language "C++"
+        files { "src/**.h", "src/**.cpp" }
+ 
+    configuration "Debug"
+        targetdir "bin/debug"
+        defines { "DEBUG" }
+        flags { "Symbols" }
+ 
+    configuration "Release"
+        targetdir "bin/release"
+        defines { "NDEBUG" }
+        flags { "Optimize" }    
\ No newline at end of file
diff --git a/hlslparser/shaders/ShaderHLSL.h b/hlslparser/shaders/ShaderHLSL.h
new file mode 100644
index 00000000..eeb83560
--- /dev/null
+++ b/hlslparser/shaders/ShaderHLSL.h
@@ -0,0 +1,209 @@
+#pragma once
+
+#define USE_HALF 1
+
+// Don't know why HLSL doesn't support these
+#define min3(x,y,z) min(x, min(y, z))
+#define max3(x,y,z) max(x, max(y, z))
+#define length_squared(x) ((x)*(x))
+
+struct Texture2DSampler {
+    Texture2D t;
+    SamplerState s;
+};
+
+// TODO: this only supports half on Texture2D
+#if USE_HALF
+// unique type, even though same data
+struct Texture2DHalfSampler {
+    Texture2D t;
+    SamplerState s;
+};
+
+#endif
+
+struct Texture3DSampler {
+    Texture3D t;
+    SamplerState s;
+};
+
+struct TextureCubeSampler {
+    TextureCube t;
+    SamplerState s;
+};
+
+struct Texture2DShadowSampler {
+    Texture2D t;
+    SamplerComparisonState s;
+};
+
+struct Texture2DArraySampler {
+    Texture2DArray t;
+    SamplerState s;
+};
+
+//------------------------------
+
+// https://colinbarrebrisebois.com/2021/11/01/working-around-constructors-in-hlsl-or-lack-thereof/
+// Stupid HLSL lacks ctors.  Ugh!
+// Can't simplify to "return { t, a };" either.
+
+Texture2DSampler Texture2DSamplerCtor(Texture2D t, SamplerState s)
+{
+    Texture2DSampler a = { t, s };
+    return a;
+}
+
+#if USE_HALF
+
+Texture2DHalfSampler Texture2DHalfSamplerCtor(Texture2D t, SamplerState s)
+{
+    Texture2DHalfSampler a = { t, s };
+    return a;
+}
+#endif
+
+Texture3DSampler Texture3DSamplerCtor(Texture3D t, SamplerState s)
+{
+    Texture3DSampler a = { t, s };
+    return a;
+}
+
+TextureCubeSampler TextureCubeSamplerCtor(TextureCube t, SamplerState s)
+{
+    TextureCubeSampler a = { t, s };
+    return a;
+}
+
+Texture2DShadowSampler Texture2DShadowSamplerCtor(Texture2D t, SamplerComparisonState s)
+{
+    Texture2DShadowSampler a = { t, s };
+    return a;
+}
+
+Texture2DArraySampler Texture2DArraySamplerCtor(Texture2DArray t, SamplerState s)
+{
+    Texture2DArraySampler a = { t, s };
+    return a;
+}
+
+//------------------------------
+
+float4 tex2D(Texture2DSampler ts, float2 texCoord) {
+    return ts.t.Sample(ts.s, texCoord);
+}
+
+// gather only works on mip0
+float4 tex2DgatherRed(Texture2DSampler ts, float2 texCoord, int2 offset=0) {
+    return ts.t.GatherRed(ts.s, texCoord, offset);
+}
+
+float4 tex2DgatherGreen(Texture2DSampler ts, float2 texCoord, int2 offset=0) {
+    return ts.t.GatherGreen(ts.s, texCoord, offset);
+}
+
+float4 tex2DgatherBlue(Texture2DSampler ts, float2 texCoord, int2 offset=0) {
+    return ts.t.GatherBlue(ts.s, texCoord, offset);
+}
+
+float4 tex2DgatherAlpha(Texture2DSampler ts, float2 texCoord, int2 offset=0) {
+    return ts.t.GatherAlpha(ts.s, texCoord, offset);
+}
+
+int2 tex2Dsize(Texture2DSampler ts) {
+    int2 size;
+    ts.t.GetDimensions(size.x, size.y);
+    return size;
+}
+
+int3 tex3Dsize(Texture3DSampler ts) {
+    int3 size;
+    ts.t.GetDimensions(size.x, size.y, size.z);
+    return size;
+}
+
+int texCUBEsize(TextureCubeSampler ts) {
+    int size;
+    ts.t.GetDimensions(size, size); // sizexsize
+    return size;
+}
+
+// don't use for PCF
+float4 tex2Dproj(Texture2DSampler ts, float4 texCoord) {
+    return ts.t.Sample(ts.s, texCoord.xy / texCoord.w);
+}
+
+float4 tex2Dlod(Texture2DSampler ts, float4 texCoord, int2 offset = 0) {
+    return ts.t.SampleLevel(ts.s, texCoord.xy, texCoord.w, offset);
+}
+
+float4 tex2Dbias(Texture2DSampler ts, float4 texCoord) {
+    return ts.t.SampleBias(ts.s, texCoord.xy, texCoord.w);
+}
+
+float4 tex2Dgrad(Texture2DSampler ts, float2 texCoord, float2 gradx, float2 grady) {
+   return ts.t.SampleGrad(ts.s, texCoord.xy, gradx, grady);
+}
+
+// This only applies to mip0
+float4 tex2Dcmp(Texture2DShadowSampler ts, float4 texCoord) {
+    return ts.t.SampleCmpLevelZero(ts.s, texCoord.xy, texCoord.z);
+}
+
+// This is int3 returning int2 in HLSL
+//float4 tex2Dfetch(Texture2DSampler ts, int2 texCoord) {
+//    return ts.t.Load(texCoord);
+//}
+
+float4 tex2DMSfetch(Texture2DMS<float4> t, int2 texCoord, int sample) {
+    return t.Load(texCoord, sample);
+}
+
+#if USE_HALF
+
+half4 tex2DH(Texture2DHalfSampler ts, float2 texCoord) {
+    return (half4)ts.t.Sample(ts.s, texCoord);
+}
+
+half4 tex2DHlod(Texture2DHalfSampler ts, float4 texCoordMip) {
+    return (half4)ts.t.Sample(ts.s, texCoordMip.xy, texCoordMip.w);
+}
+
+half4 tex2DHbias(Texture2DHalfSampler ts, float4 texCoordBias) {
+    return (half4)ts.t.SampleBias(ts.s, texCoordBias.xy, texCoordBias.w);
+}
+
+#else
+
+#define tex2DH tex2D
+#define tex2DHlod tex2Dlod
+#define tex2DHbias tex2Dbias
+
+#endif
+
+float4 tex3D(Texture3DSampler ts, float3 texCoord) {
+    return ts.t.Sample(ts.s, texCoord);
+}
+
+float4 tex3Dlod(Texture3DSampler ts, float4 texCoord) {
+    return ts.t.SampleLevel(ts.s, texCoord.xyz, texCoord.w);
+}
+
+float4 texCUBE(TextureCubeSampler ts, float3 texCoord) {
+    return ts.t.Sample(ts.s, texCoord);
+}
+
+float4 texCUBElod(TextureCubeSampler ts, float4 texCoord) {
+    return ts.t.SampleLevel(ts.s, texCoord.xyz, texCoord.w);
+}
+
+float4 texCUBEbias(TextureCubeSampler ts, float4 texCoord) {
+    return ts.t.SampleBias(ts.s, texCoord.xyz, texCoord.w);
+}
+
+// TODO: fix this
+//float4 tex2DArray(Texture2DArraySampler ts, float3 texCoord) {
+//    return ts.t.Sample(ts.s, texCoord.xy, texCoord.z + 0.5); // 0.5 offset needed on nvidia gpus
+//}
+
+    
diff --git a/hlslparser/shaders/ShaderMSL.h b/hlslparser/shaders/ShaderMSL.h
new file mode 100644
index 00000000..ea3daa9f
--- /dev/null
+++ b/hlslparser/shaders/ShaderMSL.h
@@ -0,0 +1,275 @@
+#pragma once
+
+// TODO: support function_constants in MSL, is there HLSL equivalent yet
+// [[function_constant(index)]]
+//
+// Here's someone trying to solve for HLSL by modding DXIL from DXC
+// https://twitter.com/RandomPedroJ/status/1532725156623286272
+// Vulkan doesn't run on older Intel, but DX12 does.
+
+// Header can be pulled into regular code to build
+// Taken from KramShaders.h
+
+#ifndef __METAL_VERSION__
+#import <Foundation/Foundation.h>
+#else
+#include <metal_stdlib>
+#endif
+
+#import <simd/simd.h>
+
+#ifdef __METAL_VERSION__
+#define NS_ENUM(_type, _name) \
+    enum _name : _type _name; \
+    enum _name : _type
+#endif
+
+typedef NS_ENUM(int32_t, VA) {
+    VAPosition = 0,
+    VATexcoord = 1,
+    
+    VANormal = 2,
+    VATangent = 3,
+    
+    VABlendIndices = 4,
+    VABlendWeight = 5,
+    
+    VAColor0 = 6,
+    
+    VATexcoord0 = 8,
+    VATexcoord1 = 9,
+    VATexcoord2 = 10,
+    VATexcoord3 = 11,
+};
+
+// May want to only do using in the .metal files themselvs.
+using namespace metal;
+using namespace simd;
+
+// can safely use half on Metal
+#define USE_HALF 1
+
+// #define mad precise::fma"
+    
+float mad(float a, float b, float c) {
+    return a * b + c;
+}
+float2 mad(float2 a, float2 b, float2 c) {
+    return a * b + c;
+}
+float3 mad(float3 a, float3 b, float3 c) {
+    return a * b + c;
+}
+float4 mad(float4 a, float4 b, float4 c) {
+    return a * b + c;
+}
+
+float2 mul(float2 a, float2x2 m) { return a * m; }
+float3 mul(float3 a, float3x3 m) { return a * m; }
+float4 mul(float4 a, float4x4 m) { return a * m; }
+
+float2 mul(float2x2 m, float2 a) { return m * a; }
+float3 mul(float3x3 m, float3 a) { return m * a; }
+float4 mul(float4x4 m, float4 a) { return m * a; }
+
+float3 mul(float4 a, float3x4 m) { return a * m; } // why no macro ?
+float2 mul(float4 a, float2x4 m) { return a * m; }
+
+#if USE_HALF
+half mad(half a, half b, half c) {
+    return a * b + c;
+}
+half2 mad(half2 a, half2 b, half2 c) {
+    return a * b + c;
+}
+half3 mad(half3 a, half3 b, half3 c) {
+    return a * b + c;
+}
+half4 mad(half4 a, half4 b, half4 c) {
+    return a * b + c;
+}
+
+half2 mul(half2 a, half2x2 m) { return a * m; }
+half3 mul(half3 a, half3x3 m) { return a * m; }
+half4 mul(half4 a, half4x4 m) { return a * m; }
+
+half2 mul(half2x2 m, half2 a) { return m * a; }
+half3 mul(half3x3 m, half3 a) { return m * a; }
+half4 mul(half4x4 m, half4 a) { return m * a; }
+
+half3 mul(half4 a, half3x4 m) { return a * m; } // why no macro ?
+half2 mul(half4 a, half2x4 m) { return a * m; }
+#endif
+
+#define lerp mix
+#define rcp recip
+#define ddx dfdx
+#define ddy dfdy
+#define frac fract
+#define isinfinite isinf
+
+void clip(float x) {
+    if (x < 0.0) discard_fragment();
+}
+
+struct Texture2DSampler {
+    Texture2DSampler(thread const texture2d<float>& t, thread const sampler& s) : t(t), s(s) {};
+    const thread texture2d<float>& t;
+    const thread sampler& s;
+};
+
+#if USE_HALF
+struct Texture2DHalfSampler {
+    Texture2DHalfSampler(thread const texture2d<half>& t, thread const sampler& s) : t(t), s(s) {};
+    const thread texture2d<half>& t;
+    const thread sampler& s;
+};
+#endif
+
+struct Texture3DSampler {
+    Texture3DSampler(thread const texture3d<float>& t, thread const sampler& s) : t(t), s(s) {};
+    const thread texture3d<float>& t;
+    const thread sampler& s;
+};
+
+struct TextureCubeSampler {
+    TextureCubeSampler(thread const texturecube<float>& t, thread const sampler& s) : t(t), s(s) {};
+    const thread texturecube<float>& t;
+    const thread sampler& s;
+};
+
+struct Texture2DShadowSampler {
+    Texture2DShadowSampler(thread const depth2d<float>& t, thread const sampler& s) : t(t), s(s) {};
+    const thread depth2d<float>& t;
+    const thread sampler& s;
+};
+
+struct Texture2DArraySampler {
+    const thread texture2d_array<float>& t;
+    const thread sampler& s;
+    Texture2DArraySampler(thread const texture2d_array<float>& t, thread const sampler& s) : t(t), s(s) {};
+};
+    
+int2 tex2Dsize(Texture2DSampler ts) {
+    int2 size(ts.t.get_width(), ts.t.get_height());
+    return size;
+}
+
+
+int3 tex3Dsize(Texture3DSampler ts) {
+    int3 size(ts.t.get_width(), ts.t.get_height(), ts.t.get_depth());
+    return size;
+}
+
+int texCUBEsize(TextureCubeSampler ts) {
+    int size(ts.t.get_width());
+    return size;
+}
+    
+    
+float4 tex2D(Texture2DSampler ts, float2 texCoord) {
+    return ts.t.sample(ts.s, texCoord);
+}
+
+// don't use for PCF
+float4 tex2Dproj(Texture2DSampler ts, float4 texCoord) {
+    return ts.t.sample(ts.s, texCoord.xy / texCoord.w);
+}
+
+// gather only works on mip0
+float4 tex2DgatherRed(Texture2DSampler ts, float2 texCoord, int2 offset=0) {
+    return ts.t.gather(ts.s, texCoord, offset, component::x); // TODO: int to component
+}
+  
+float4 tex2DgatherGreen(Texture2DSampler ts, float2 texCoord,  int2 offset=0) {
+    return ts.t.gather(ts.s, texCoord, offset, component::y); // TODO: int to component
+}
+
+float4 tex2DgatherBlue(Texture2DSampler ts, float2 texCoord,  int2 offset=0) {
+    return ts.t.gather(ts.s, texCoord, offset, component::z); // TODO: int to component
+}
+
+float4 tex2DgatherAlpha(Texture2DSampler ts, float2 texCoord, int2 offset=0) {
+    return ts.t.gather(ts.s, texCoord, offset, component::w); // TODO: int to component
+}
+
+float4 tex2Dlod(Texture2DSampler ts, float4 texCoordMip) {
+    return ts.t.sample(ts.s, texCoordMip.xy, level(texCoordMip.w));
+}
+
+float4 tex2Dgrad(Texture2DSampler ts, float2 texCoord, float2 gradx, float2 grady) {
+   return ts.t.sample(ts.s, texCoord.xy, gradient2d(gradx, grady));
+}
+
+float4 tex2Dbias(Texture2DSampler ts, float4 texCoordBias) {
+    return ts.t.sample(ts.s, texCoordBias.xy, bias(texCoordBias.w));
+}
+
+float4 tex2Dfetch(Texture2DSampler ts, int2 texCoord) {
+    return ts.t.read((uint2)texCoord);
+}
+
+#if USE_HALF
+
+// use samper2D<half> to specify these
+
+half4 tex2DH(Texture2DHalfSampler ts, float2 texCoord) {
+    return ts.t.sample(ts.s, texCoord);
+}
+
+half4 tex2DHlod(Texture2DHalfSampler ts, float4 texCoordMip) {
+    return ts.t.sample(ts.s, texCoordMip.xy, level(texCoordMip.w));
+}
+
+half4 tex2DHbias(Texture2DHalfSampler ts, float4 texCoordBias) {
+    return ts.t.sample(ts.s, texCoordBias.xy, bias(texCoordBias.w));
+}
+
+#else
+
+#define tex2DH tex2D
+#define tex2DHlod tex2Dlod
+#define tex2DHbias tex2Dbias
+
+#endif
+
+float4 tex3D(Texture3DSampler ts, float3 texCoord) {
+    return ts.t.sample(ts.s, texCoord);
+}
+
+
+float4 tex3Dlod(Texture3DSampler ts, float4 texCoordMip) {
+    return ts.t.sample(ts.s, texCoordMip.xyz, level(texCoordMip.w));
+}
+
+float4 texCUBE(TextureCubeSampler ts, float3 texCoord) {
+    return ts.t.sample(ts.s, texCoord);
+}
+
+float4 texCUBElod(TextureCubeSampler ts, float4 texCoordMip) {
+    return ts.t.sample(ts.s, texCoordMip.xyz, level(texCoordMip.w));
+}
+
+float4 texCUBEbias(TextureCubeSampler ts, float4 texCoordBias) {
+    return ts.t.sample(ts.s, texCoordBias.xyz, bias(texCoordBias.w));
+}
+    
+// iOS may need shadow sampler inline
+//    float4 tex2Dcmp(Texture2DShadowSampler ts, float4 texCoordCompare) {
+//        constexpr sampler shadow_constant_sampler(mip_filter::none, min_filter::linear, mag_filter::linear, address::clamp_to_edge, compare_func::less);"
+//        return ts.t.sample_compare(shadow_constant_sampler, texCoordCompare.xy, texCoordCompare.z);
+//    }
+
+
+float4 tex2Dcmp(Texture2DShadowSampler ts, float4 texCoordCompare) {
+    return ts.t.sample_compare(ts.s, texCoordCompare.xy, texCoordCompare.z);
+}
+
+float4 tex2DMSfetch(texture2d_ms<float> t, int2 texCoord, int sample) {
+    return t.read((uint2)texCoord, (uint)sample);
+}
+
+float4 tex2DArray(Texture2DArraySampler ts, float3 texCoord) {
+    return ts.t.sample(ts.s, texCoord.xy, texCoord.z + 0.5); // 0.5 offset needed on nvidia gpus
+}
+        
diff --git a/hlslparser/shaders/Skinning.hlsl b/hlslparser/shaders/Skinning.hlsl
new file mode 100644
index 00000000..5105e4c8
--- /dev/null
+++ b/hlslparser/shaders/Skinning.hlsl
@@ -0,0 +1,126 @@
+struct InputVS
+{
+    float4 position : POSITION;
+    float3 normal : NORMAL;
+    float2 uv : TEXCOORD0;
+    float4 blendWeights : BLENDWEIGHT;
+    uint4  blendIndices : BLENDINDICES;
+};
+
+// On half:
+// HLSL only added back half support in 6.2
+// GLSL has a float16_t type that is barely supported on mobile.
+// Half inputOuput not support on Nvidia/Adreno
+// Also interpolating half leads to banding
+// On TBDR, half inputOutput only way to save parameter buffer
+// space.  Can output half from VS, but define as float
+// input to PS to avoid banding.
+// Note Adreno doesn't suport half stored in blocks either.
+// What a mess!
+
+struct OutputVS
+{
+    float4  position : POSITION;
+    half    diffuse : COLOR;
+    float2  uv : TEXCOORD0;
+};
+
+cbuffer Uniforms 
+{
+    // should these be float3x4?
+    float4x4 skinTfms[256];
+    half3    lightDir;
+    float4x4 worldToClipTfm;
+};
+
+// old DX9 style
+// sampler2D tex;
+
+// mod from HLSLParser for half
+// can specify half, float, or none (float)
+sampler2D<half> tex;
+
+// new style in DX10 to move to non-paired sampler/texture
+// Texture2D<half4> tex;
+// SamplerState pointSampler;
+
+// and in MSL
+// texture2d<half> tex;
+// sampler pointSampler;
+
+// TODO: using column matrices for MSL/PSSL/GLSL constency
+// so switch from premul to postmul in shader.  And used float3x4
+// but need to add support to parsers/generators.
+
+float4x4 DoSkinTfm(float4x4 skinTfms[256], float4 blendWeights, uint4 blendIndices)
+{
+    float4x4 skinTfm = blendWeights[0] * skinTfms[blendIndices[0]];
+       
+    for (uint i = 1; i < 4; ++i)
+    {
+        skinTfm += blendWeights[i] * skinTfms[blendIndices[i]];
+    }
+
+    return skinTfm;
+}
+
+OutputVS SkinningVS(InputVS input)
+{
+    OutputVS output;
+
+    // TODO: this needs to declare array param as constant for
+    // MSL function call.  Pointers can't be missing working space.
+    
+    float4x4 skinTfm = 0;
+       // DoSkinTfm(skinTfms, input.blendWeights, input.blendIndices);
+
+    // Skin to world space
+    float3 position = mul(input.position, skinTfm).xyz;
+    half3 normal = half3(mul(float4(input.normal,0.0), skinTfm).xyz);
+        
+    // Output stuff
+    output.position = mul(float4(position, 1.0), worldToClipTfm);
+    output.diffuse  = dot(lightDir, normal);
+    output.uv = input.uv;
+
+    return output;
+}
+
+// Want to pass OutputVS as input, but DXC can't handle the redefinition
+// in the same file.  So have to keep OutputVS and InputPS in sync.
+struct InputPS
+{
+    // this can include position on MSL, but not on HLSL
+    half    diffuse : COLOR;
+    float2  uv : TEXCOORD0;
+};
+
+struct OutputPS
+{
+    half4 color : COLOR0;
+};
+
+// Note: don't write as void SkinningPS(VS_OUTPUT input, out PS_OUTPUT output)
+// this is worse MSL codegen.
+
+OutputPS SkinningPS(InputPS input)
+{
+    OutputPS output;
+   
+    // This is hard to reflect with combined tex/sampler
+    // have way more textures than samplers on mobile.
+    //float4 color = tex2D(tex, input.uv);
+    half4 color = tex2DH(tex, input.uv);
+    
+    // TODO: move to DX10 style, but MSL codegen is trickier then
+    // since it wraps the vars
+    // half4 color = tex.Sample(pointSampler, input.uv);
+    
+    // just to test min3 support
+    color.rgb = min3(color.r, color.g, color.b);
+    
+    color.rgb *= input.diffuse;
+    output.color = color;
+
+    return output;
+}
diff --git a/hlslparser/src/CodeWriter.cpp b/hlslparser/src/CodeWriter.cpp
new file mode 100644
index 00000000..a0783e0c
--- /dev/null
+++ b/hlslparser/src/CodeWriter.cpp
@@ -0,0 +1,151 @@
+//=============================================================================
+//
+// Render/CodeWriter.cpp
+//
+// Created by Max McGuire (max@unknownworlds.com)
+// Copyright (c) 2013, Unknown Worlds Entertainment, Inc.
+//
+//=============================================================================
+
+//#include "Engine/Assert.h"
+//#include "Engine/String.h"
+#include "Engine.h"
+
+#include "CodeWriter.h"
+
+#include <stdarg.h>
+
+namespace M4
+{
+
+static const int _maxLineLength = 2048;
+
+CodeWriter::CodeWriter(bool writeFileNames)
+{
+    m_currentLine       = 1;
+    m_currentFileName   = NULL;
+    m_spacesPerIndent   = 4;
+    m_writeLines        = true;
+    m_writeFileNames    = writeFileNames;
+}
+
+void CodeWriter::BeginLine(int indent, const char* fileName, int lineNumber)
+{
+    if (m_writeLines)
+    {
+        bool outputLine = false;
+        bool outputFile = false;
+
+        // Output a line number pragma if necessary.
+        if (fileName != NULL && m_currentFileName != fileName)
+        {
+            m_currentFileName = fileName;
+            fileName = m_currentFileName;
+            outputFile = true;
+        }
+        if (lineNumber != -1 && m_currentLine != lineNumber)
+        {
+            m_currentLine = lineNumber;
+            outputLine = true;
+        }
+
+        /* TODO: Alec, removed this for now
+        if (outputLine || outputFile)
+        {
+            char buffer[256];
+            String_Printf(buffer, sizeof(buffer), "#line %d", lineNumber);
+            m_buffer += buffer;
+            if (outputFile && m_writeFileNames)
+            {
+                m_buffer += " \"";
+                m_buffer += fileName;
+                m_buffer += "\"\n";
+            }
+            else
+            {
+                m_buffer += "\n";
+            }
+        }
+        */
+    }
+
+    // Handle the indentation.
+    for (int i = 0; i < indent * m_spacesPerIndent; ++i)
+    {
+        m_buffer += " ";
+    }
+}
+
+void CodeWriter::EndLine(const char* text)
+{
+    if (text != NULL)
+    {
+        m_buffer += text;
+    }
+    m_buffer += "\n";
+    ++m_currentLine;
+}
+
+void CodeWriter::Write(const char* format, ...)
+{
+    va_list args;
+    va_start(args, format);
+
+    char buffer[_maxLineLength];
+    String_PrintfArgList(buffer, sizeof(buffer), format, args);
+
+    m_buffer += buffer;
+
+    va_end(args);      
+}
+
+void CodeWriter::WriteLine(int indent, const char* format, ...)
+{
+    va_list args;
+    va_start(args, format);
+
+    char buffer[_maxLineLength];
+
+    int result = String_PrintfArgList(buffer, sizeof(buffer), format, args);
+    ASSERT(result != -1);
+
+    for (int i = 0; i < indent * m_spacesPerIndent; ++i)
+    {
+        m_buffer += " ";
+    }
+    m_buffer += buffer;
+
+    EndLine();
+
+    va_end(args);        
+}
+
+void CodeWriter::WriteLineTagged(int indent, const char* fileName, int lineNumber, const char* format, ...)
+{
+    va_list args;
+    va_start(args, format);
+
+    BeginLine(indent, fileName, lineNumber);
+
+    char buffer[_maxLineLength];
+    int result = String_PrintfArgList(buffer, sizeof(buffer), format, args);
+    ASSERT(result != -1);
+
+    m_buffer += buffer;
+
+    EndLine();
+
+    va_end(args);        
+}
+
+const char* CodeWriter::GetResult() const
+{
+    return m_buffer.c_str();
+}
+
+void CodeWriter::Reset()
+{
+    m_buffer.clear();
+}
+
+}
\ No newline at end of file
diff --git a/hlslparser/src/CodeWriter.h b/hlslparser/src/CodeWriter.h
new file mode 100644
index 00000000..b583e59c
--- /dev/null
+++ b/hlslparser/src/CodeWriter.h
@@ -0,0 +1,61 @@
+//=============================================================================
+//
+// Render/CodeWriter.h
+//
+// Created by Max McGuire (max@unknownworlds.com)
+// Copyright (c) 2013, Unknown Worlds Entertainment, Inc.
+//
+//=============================================================================
+
+#ifndef CODE_WRITER_H
+#define CODE_WRITER_H
+
+#include "Engine.h"
+#include <string>
+
+// this is similar to printflike macro, checks format args
+#if defined(__GNUC__)
+#define M4_PRINTF_ATTR(string_index, first_to_check) __attribute__((format(printf, string_index, first_to_check)))
+#else
+#define M4_PRINTF_ATTR(string_index, first_to_check)
+#endif
+
+namespace M4
+{
+
+class Allocator;
+
+/**
+ * This class is used for outputting code. It handles indentation and inserting #line markers
+ * to match the desired output line numbers.
+ */
+class CodeWriter
+{
+
+public:
+    CodeWriter(bool writeFileNames = true);
+
+    void BeginLine(int indent, const char* fileName = NULL, int lineNumber = -1);
+    M4_PRINTF_ATTR(2, 3) void Write(const char* format, ...);
+    void EndLine(const char* text = NULL);
+
+    M4_PRINTF_ATTR(3, 4) void WriteLine(int indent, const char* format, ...);
+    M4_PRINTF_ATTR(5, 6) void WriteLineTagged(int indent, const char* fileName, int lineNumber, const char* format, ...);
+
+    const char* GetResult() const;
+    void Reset();
+
+private:
+
+    std::string     m_buffer;
+    int             m_currentLine;
+    const char*     m_currentFileName;
+    int             m_spacesPerIndent;
+    bool            m_writeLines;
+    bool            m_writeFileNames;
+
+};
+
+}
+
+#endif
diff --git a/hlslparser/src/Engine.cpp b/hlslparser/src/Engine.cpp
new file mode 100755
index 00000000..5d0a76ca
--- /dev/null
+++ b/hlslparser/src/Engine.cpp
@@ -0,0 +1,207 @@
+
+#include "Engine.h"
+
+#include <stdio.h>  // vsnprintf
+#include <string.h> // strcmp, strcasecmp
+#include <stdlib.h>	// strtod, strtol
+
+
+namespace M4 {
+
+// Engine/String.cpp
+
+int String_PrintfArgList(char * buffer, int size, const char * format, va_list args) {
+
+    va_list tmp;
+    va_copy(tmp, args);
+
+#if _MSC_VER >= 1400
+	int n = vsnprintf_s(buffer, size, _TRUNCATE, format, tmp);
+#else
+	int n = vsnprintf(buffer, size, format, tmp);
+#endif
+
+    va_end(tmp);
+
+	if (n < 0 || n > size) return -1;
+	return n;
+}
+
+int String_Printf(char * buffer, int size, const char * format, ...) {
+
+    va_list args;
+    va_start(args, format);
+
+    int n = String_PrintfArgList(buffer, size, format, args);
+
+    va_end(args);
+
+	return n;
+}
+
+int String_FormatFloat(char * buffer, int size, float value) {
+    return String_Printf(buffer, size, "%f", value);
+}
+
+bool String_Equal(const char * a, const char * b) {
+	if (a == b) return true;
+	if (a == NULL || b == NULL) return false;
+	return strcmp(a, b) == 0;
+}
+
+bool String_EqualNoCase(const char * a, const char * b) {
+	if (a == b) return true;
+	if (a == NULL || b == NULL) return false;
+#if _MSC_VER
+	return _stricmp(a, b) == 0;
+#else
+	return strcasecmp(a, b) == 0;
+#endif
+}
+
+double String_ToDouble(const char * str, char ** endptr) {
+	return strtod(str, endptr);
+}
+
+int String_ToInteger(const char * str, char ** endptr) {
+	return (int)strtol(str, endptr, 10);
+}
+
+int String_ToIntegerHex(const char * str, char ** endptr) {
+	return (int)strtol(str, endptr, 16);
+}
+
+
+void String_StripTrailingFloatZeroes(char* buffer)
+{
+    const char* dotPos = strrchr(buffer, '.');
+    if (dotPos == nullptr) return;
+    
+    uint32_t bufferLen = (uint32_t)strlen(buffer);
+    
+    // strip trailing zeroes
+    while (bufferLen > 0)
+    {
+        char& c = buffer[bufferLen-1];
+        if (c == '0')
+        {
+            c = 0;
+            bufferLen--;
+        }
+        else
+        {
+            break;
+        }
+    }
+    
+    // strip the period (only for MSL)
+    char& c = buffer[bufferLen-1];
+    if (dotPos == &c)
+    {
+        c = 0;
+        bufferLen--;
+    }
+}
+
+// Engine/Log.cpp
+
+void Log_Error(const char * format, ...) {
+    va_list args;
+    va_start(args, format);
+    Log_ErrorArgList(format, args);
+    va_end(args);
+}
+
+void Log_ErrorArgList(const char * format, va_list args) {
+#if 1 // @@ Don't we need to do this?
+    va_list tmp;
+    va_copy(tmp, args);
+    // TODO: this doesn't work on Win/Android
+    // use a real log abstraction to ODS/etc from Kram
+    vfprintf( stderr, format, tmp );
+    va_end(tmp);
+#else
+    vprintf( format, args );
+#endif
+}
+
+
+// Engine/StringPool.cpp
+
+StringPool::StringPool(Allocator * allocator) : stringArray(allocator) {
+}
+StringPool::~StringPool() {
+    for (int i = 0; i < stringArray.GetSize(); i++) {
+        free((void *)stringArray[i]);
+        stringArray[i] = NULL;
+    }
+}
+
+const char * StringPool::AddString(const char * string) {
+    for (int i = 0; i < stringArray.GetSize(); i++) {
+        if (String_Equal(stringArray[i], string)) return stringArray[i];
+    }
+#if _MSC_VER
+    const char * dup = _strdup(string);
+#else
+    const char * dup = strdup(string);
+#endif
+    stringArray.PushBack(dup);
+    return dup;
+}
+
+// @@ From mprintf.cpp
+static char* mprintf_valist(const char* fmt, va_list args) {
+    char* res = nullptr;
+    
+    va_list tmp;
+
+    // va_copy needed?
+    va_copy(tmp, args);
+    
+    // just call 2x, once for len
+    int len = vsnprintf(nullptr, 0, fmt, tmp);
+    if (len >= 0)
+    {
+        res = new char[len+1];
+        vsnprintf(res, len+1, fmt, tmp);
+    }
+    va_end(tmp);
+
+    return res;
+}
+
+const char * StringPool::AddStringFormatList(const char * format, va_list args) {
+    va_list tmp;
+    va_copy(tmp, args);
+    const char * string = mprintf_valist(format, tmp);
+    va_end(tmp);
+
+    for (int i = 0; i < stringArray.GetSize(); i++) {
+        if (String_Equal(stringArray[i], string)) {
+            delete [] string;
+            return stringArray[i];
+        }
+    }
+
+    stringArray.PushBack(string);
+    return string;
+}
+
+const char * StringPool::AddStringFormat(const char * format, ...) {
+    va_list args;
+    va_start(args, format);
+    const char * string = AddStringFormatList(format, args);
+    va_end(args);
+
+    return string;
+}
+
+bool StringPool::GetContainsString(const char * string) const {
+    for (int i = 0; i < stringArray.GetSize(); i++) {
+        if (String_Equal(stringArray[i], string)) return true;
+    }
+    return false;
+}
+
+} // M4 namespace
diff --git a/hlslparser/src/Engine.h b/hlslparser/src/Engine.h
new file mode 100755
index 00000000..e4bdc61d
--- /dev/null
+++ b/hlslparser/src/Engine.h
@@ -0,0 +1,195 @@
+#ifndef ENGINE_H
+#define ENGINE_H
+
+#if _MSC_VER
+#define _CRT_SECURE_NO_WARNINGS 1
+#endif
+
+#include <stdarg.h> // va_list, vsnprintf
+#include <stdlib.h> // malloc
+#include <new> // for placement new
+
+#ifndef NULL
+#define NULL    0
+#endif
+
+#ifndef va_copy
+#define va_copy(a, b) (a) = (b)
+#endif
+
+// Engine/Assert.h
+#include <cassert>
+//#define assert(...)
+#define ASSERT assert
+
+namespace M4 {
+
+
+// Engine/Allocator.h
+
+class Allocator {
+public:
+    template <typename T> T * New() {
+        return (T *)malloc(sizeof(T));
+    }
+    template <typename T> T * New(size_t count) {
+        return (T *)malloc(sizeof(T) * count);
+    }
+    template <typename T> void Delete(T * ptr) {
+        free((void *)ptr);
+    }
+    template <typename T> T * Realloc(T * ptr, size_t count) {
+        return (T *)realloc(ptr, sizeof(T) * count);
+    }
+};
+
+
+// Engine/String.h
+
+int String_Printf(char * buffer, int size, const char * format, ...);
+int String_PrintfArgList(char * buffer, int size, const char * format, va_list args);
+int String_FormatFloat(char * buffer, int size, float value);
+bool String_Equal(const char * a, const char * b);
+bool String_EqualNoCase(const char * a, const char * b);
+double String_ToDouble(const char * str, char ** end);
+int String_ToInteger(const char * str, char ** end);
+
+void String_StripTrailingFloatZeroes(char* buffer);
+
+// Engine/Log.h
+
+void Log_Error(const char * format, ...);
+void Log_ErrorArgList(const char * format, va_list args);
+
+
+// Engine/Array.h
+
+template <typename T>
+void ConstructRange(T * buffer, int new_size, int old_size) {
+    for (int i = old_size; i < new_size; i++) {
+        new(buffer+i) T; // placement new
+    }
+}
+
+template <typename T>
+void ConstructRange(T * buffer, int new_size, int old_size, const T & val) {
+    for (int i = old_size; i < new_size; i++) {
+        new(buffer+i) T(val); // placement new
+    }
+}
+
+template <typename T>
+void DestroyRange(T * buffer, int new_size, int old_size) {
+    for (int i = new_size; i < old_size; i++) {
+        (buffer+i)->~T(); // Explicit call to the destructor
+    }
+}
+
+
+template <typename T>
+class Array {
+public:
+    Array(Allocator * allocator) : allocator(allocator), buffer(NULL), size(0), capacity(0) {}
+
+    void PushBack(const T & val) {
+        ASSERT(&val < buffer || &val >= buffer+size);
+
+        int old_size = size;
+        int new_size = size + 1;
+
+        SetSize(new_size);
+
+        ConstructRange(buffer, new_size, old_size, val);
+    }
+    T & PushBackNew() {
+        int old_size = size;
+        int new_size = size + 1;
+
+        SetSize(new_size);
+
+        ConstructRange(buffer, new_size, old_size);
+
+        return buffer[old_size];
+    }
+    void Resize(int new_size) {
+        int old_size = size;
+
+        DestroyRange(buffer, new_size, old_size);
+
+        SetSize(new_size);
+
+        ConstructRange(buffer, new_size, old_size);
+    }
+
+    int GetSize() const { return size; }
+    const T & operator[](int i) const { ASSERT(i < size); return buffer[i]; }
+    T & operator[](int i) { ASSERT(i < size); return buffer[i]; }
+
+private:
+
+    // Change array size.
+    void SetSize(int new_size) {
+        size = new_size;
+
+        if (new_size > capacity) {
+            int new_buffer_size;
+            if (capacity == 0) {
+                // first allocation is exact
+                new_buffer_size = new_size;
+            }
+            else {
+                // following allocations grow array by 25%
+                new_buffer_size = new_size + (new_size >> 2);
+            }
+
+            SetCapacity(new_buffer_size);
+        }
+    }
+
+    // Change array capacity.
+    void SetCapacity(int new_capacity) {
+        ASSERT(new_capacity >= size);
+
+        if (new_capacity == 0) {
+            // free the buffer.
+            if (buffer != NULL) {
+                allocator->Delete<T>(buffer);
+                buffer = NULL;
+            }
+        }
+        else {
+            // realloc the buffer
+            buffer = allocator->Realloc<T>(buffer, new_capacity);
+        }
+
+        capacity = new_capacity;
+    }
+
+
+private:
+    Allocator * allocator; // @@ Do we really have to keep a pointer to this?
+    T * buffer;
+    int size;
+    int capacity;
+};
+
+
+// Engine/StringPool.h
+
+// @@ Implement this with a hash table!
+struct StringPool {
+    StringPool(Allocator * allocator);
+    ~StringPool();
+
+    const char * AddString(const char * string);
+    const char * AddStringFormat(const char * fmt, ...);
+    const char * AddStringFormatList(const char * fmt, va_list args);
+    bool GetContainsString(const char * string) const;
+
+    Array<const char *> stringArray;
+};
+
+
+} // M4 namespace
+
+#endif // ENGINE_H
diff --git a/hlslparser/src/GLSLGenerator.cpp b/hlslparser/src/GLSLGenerator.cpp
new file mode 100644
index 00000000..49562143
--- /dev/null
+++ b/hlslparser/src/GLSLGenerator.cpp
@@ -0,0 +1,1920 @@
+//=============================================================================
+//
+// Render/GLSLGenerator.cpp
+//
+// Created by Max McGuire (max@unknownworlds.com)
+// Copyright (c) 2013, Unknown Worlds Entertainment, Inc.
+//
+//=============================================================================
+
+#include "GLSLGenerator.h"
+#include "HLSLParser.h"
+#include "HLSLTree.h"
+
+//#include "Engine/String.h"
+//#include "Engine/Log.h"
+#include "Engine.h"
+
+#include <stdarg.h>
+#include <string.h>
+
+namespace M4
+{
+
+static const HLSLType kFloatType(HLSLBaseType_Float);
+static const HLSLType kUintType(HLSLBaseType_Uint);
+static const HLSLType kIntType(HLSLBaseType_Int);
+static const HLSLType kBoolType(HLSLBaseType_Bool);
+
+// These are reserved words in GLSL that aren't reserved in HLSL.
+const char* GLSLGenerator::s_reservedWord[] =
+    {
+        "output",
+        "input",
+        "mod",
+        "mix",
+        "fract",
+        "dFdx",
+        "dFdy",
+    };
+
+static const char* GetTypeName(const HLSLType& type)
+{
+    switch (type.baseType)
+    {
+    case HLSLBaseType_Void:         return "void";
+    case HLSLBaseType_Float:        return "float";
+    case HLSLBaseType_Float2:       return "vec2";
+    case HLSLBaseType_Float3:       return "vec3";
+    case HLSLBaseType_Float4:       return "vec4";
+	case HLSLBaseType_Float2x2:     return "mat2";
+    case HLSLBaseType_Float3x3:     return "mat3";
+    case HLSLBaseType_Float4x4:     return "mat4";
+    case HLSLBaseType_Half:         return "float";
+    case HLSLBaseType_Half2:        return "vec2";
+    case HLSLBaseType_Half3:        return "vec3";
+    case HLSLBaseType_Half4:        return "vec4";
+	case HLSLBaseType_Half2x2:      return "mat2";
+    case HLSLBaseType_Half3x3:      return "mat3";
+    case HLSLBaseType_Half4x4:      return "mat4";
+    case HLSLBaseType_Bool:         return "bool";
+	case HLSLBaseType_Bool2:        return "bvec2";
+	case HLSLBaseType_Bool3:        return "bvec3";
+	case HLSLBaseType_Bool4:        return "bvec4";
+    case HLSLBaseType_Int:          return "int";
+    case HLSLBaseType_Int2:         return "ivec2";
+    case HLSLBaseType_Int3:         return "ivec3";
+    case HLSLBaseType_Int4:         return "ivec4";
+    case HLSLBaseType_Uint:         return "uint";
+    case HLSLBaseType_Uint2:        return "uvec2";
+    case HLSLBaseType_Uint3:        return "uvec3";
+    case HLSLBaseType_Uint4:        return "uvec4";
+    case HLSLBaseType_Texture:      return "texture";
+    case HLSLBaseType_Sampler:      return "sampler";
+    case HLSLBaseType_Sampler2D:    return "sampler2D";
+    case HLSLBaseType_Sampler3D:    return "sampler3D";
+    case HLSLBaseType_SamplerCube:  return "samplerCube";
+    case HLSLBaseType_Sampler2DMS:  return "sampler2DMS";
+    case HLSLBaseType_Sampler2DArray:  return "sampler2DArray";
+    case HLSLBaseType_UserDefined:  return type.typeName;
+    default: return "?";
+    }
+}
+
+static bool GetCanImplicitCast(const HLSLType& srcType, const HLSLType& dstType)
+{
+    return srcType.baseType == dstType.baseType;
+}
+
+static int GetFunctionArguments(HLSLFunctionCall* functionCall, HLSLExpression* expression[], int maxArguments)
+{
+    HLSLExpression* argument = functionCall->argument;
+    int numArguments = 0;
+    while (argument != NULL)
+    {
+        if (numArguments < maxArguments)
+        {
+            expression[numArguments] = argument;
+        }
+        argument = argument->nextExpression;
+        ++numArguments;
+    }
+    return numArguments;
+}
+
+GLSLGenerator::GLSLGenerator() :
+    m_writer(/* writeFileNames= */ false)
+{
+    m_tree                      = NULL;
+    m_entryName                 = NULL;
+    m_target                    = Target_VertexShader;
+    m_version                   = Version_140;
+    m_versionLegacy             = false;
+    m_inAttribPrefix            = NULL;
+    m_outAttribPrefix           = NULL;
+    m_error                     = false;
+    m_matrixRowFunction[0]      = 0;
+    m_matrixCtorFunction[0]     = 0;
+    m_matrixMulFunction[0]      = 0;
+    m_clipFunction[0]           = 0;
+    m_tex2DlodFunction[0]       = 0;
+    m_tex2DbiasFunction[0]      = 0;
+    m_tex3DlodFunction[0]       = 0;
+    m_texCUBEbiasFunction[0]    = 0;
+	m_texCUBElodFunction[ 0 ] 	= 0;
+    m_scalarSwizzle2Function[0] = 0;
+    m_scalarSwizzle3Function[0] = 0;
+    m_scalarSwizzle4Function[0] = 0;
+    m_sinCosFunction[0]         = 0;
+	m_bvecTernary[ 0 ]			= 0;
+    m_outputPosition            = false;
+    m_outputTargets             = 0;
+}
+
+bool GLSLGenerator::Generate(HLSLTree* tree, Target target, Version version, const char* entryName, const Options& options)
+{
+
+    m_tree      = tree;
+    m_entryName = entryName;
+    m_target    = target;
+    m_version   = version;
+    m_versionLegacy = (version == Version_110 || version == Version_100_ES);
+    m_options   = options;
+
+    ChooseUniqueName("matrix_row", m_matrixRowFunction, sizeof(m_matrixRowFunction));
+    ChooseUniqueName("matrix_ctor", m_matrixCtorFunction, sizeof(m_matrixCtorFunction));
+    ChooseUniqueName("matrix_mul", m_matrixMulFunction, sizeof(m_matrixMulFunction));
+    ChooseUniqueName("clip", m_clipFunction, sizeof(m_clipFunction));
+    ChooseUniqueName("tex2Dlod", m_tex2DlodFunction, sizeof(m_tex2DlodFunction));
+    ChooseUniqueName("tex2Dbias", m_tex2DbiasFunction, sizeof(m_tex2DbiasFunction));
+    ChooseUniqueName("tex2Dgrad", m_tex2DgradFunction, sizeof(m_tex2DgradFunction));
+    ChooseUniqueName("tex3Dlod", m_tex3DlodFunction, sizeof(m_tex3DlodFunction));
+    ChooseUniqueName("texCUBEbias", m_texCUBEbiasFunction, sizeof(m_texCUBEbiasFunction));
+	ChooseUniqueName( "texCUBElod", m_texCUBElodFunction, sizeof( m_texCUBElodFunction ) );
+
+    for (int i = 0; i < s_numReservedWords; ++i)
+    {
+        ChooseUniqueName( s_reservedWord[i], m_reservedWord[i], sizeof(m_reservedWord[i]) );
+    }
+
+    ChooseUniqueName("m_scalar_swizzle2", m_scalarSwizzle2Function, sizeof(m_scalarSwizzle2Function));
+    ChooseUniqueName("m_scalar_swizzle3", m_scalarSwizzle3Function, sizeof(m_scalarSwizzle3Function));
+    ChooseUniqueName("m_scalar_swizzle4", m_scalarSwizzle4Function, sizeof(m_scalarSwizzle4Function));
+
+    ChooseUniqueName("sincos", m_sinCosFunction, sizeof(m_sinCosFunction));
+
+	ChooseUniqueName( "bvecTernary", m_bvecTernary, sizeof( m_bvecTernary ) );
+
+    if (target == Target_VertexShader)
+    {
+        m_inAttribPrefix  = "";
+        m_outAttribPrefix = "frag_";
+    }
+    else
+    {
+        m_inAttribPrefix  = "frag_";
+        m_outAttribPrefix = "rast_";
+    }
+
+    HLSLRoot* root = m_tree->GetRoot();
+    HLSLStatement* statement = root->statement;
+
+    // Find the entry point function.
+    HLSLFunction* entryFunction = FindFunction(root, m_entryName);
+    if (entryFunction == NULL)
+    {
+        Error("Entry point '%s' doesn't exist", m_entryName);
+        return false;
+    }
+
+    if (m_version == Version_110)
+    {
+        m_writer.WriteLine(0, "#version 110");
+    }
+    else if (m_version == Version_140)
+    {
+        m_writer.WriteLine(0, "#version 140");
+
+        // Pragmas for NVIDIA.
+        m_writer.WriteLine(0, "#pragma optionNV(fastmath on)");
+        //m_writer.WriteLine(0, "#pragma optionNV(fastprecision on)");
+        m_writer.WriteLine(0, "#pragma optionNV(ifcvt none)");
+        m_writer.WriteLine(0, "#pragma optionNV(inline all)");
+        m_writer.WriteLine(0, "#pragma optionNV(strict on)");
+        m_writer.WriteLine(0, "#pragma optionNV(unroll all)");
+    }
+    else if (m_version == Version_150)
+    {
+        m_writer.WriteLine(0, "#version 150");
+    }
+    else if (m_version == Version_100_ES)
+    {
+        m_writer.WriteLine(0, "#version 100");
+        m_writer.WriteLine(0, "precision highp float;");
+    }
+    else if (m_version == Version_300_ES)
+    {
+        m_writer.WriteLine(0, "#version 300 es");
+        m_writer.WriteLine(0, "precision highp float;");
+    }
+    else
+    {
+        Error("Unrecognized target version");
+        return false;
+    }
+
+    // Output the special function used to access rows in a matrix.
+    m_writer.WriteLine(0, "vec3 %s(mat3 m, int i) { return vec3( m[0][i], m[1][i], m[2][i] ); }", m_matrixRowFunction);
+    m_writer.WriteLine(0, "vec4 %s(mat4 m, int i) { return vec4( m[0][i], m[1][i], m[2][i], m[3][i] ); }", m_matrixRowFunction);
+
+    // Output the special function used to do matrix cast for OpenGL 2.0
+    if (m_version == Version_110)
+    {
+        m_writer.WriteLine(0, "mat3 %s(mat4 m) { return mat3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2]); }", m_matrixCtorFunction);
+    }
+
+    // Output the special functions used for matrix multiplication lowering
+    // They make sure glsl-optimizer can fold expressions better
+    if (m_tree->NeedsFunction("mul") && (m_options.flags & Flag_LowerMatrixMultiplication))
+    {
+        m_writer.WriteLine(0, "vec2 %s(mat2 m, vec2 v) { return m[0] * v.x + m[1] * v.y; }", m_matrixMulFunction);
+        m_writer.WriteLine(0, "vec2 %s(vec2 v, mat2 m) { return vec2(dot(m[0], v), dot(m[1], v)); }", m_matrixMulFunction);
+        m_writer.WriteLine(0, "vec3 %s(mat3 m, vec3 v) { return m[0] * v.x + m[1] * v.y + m[2] * v.z; }", m_matrixMulFunction);
+        m_writer.WriteLine(0, "vec3 %s(vec3 v, mat3 m) { return vec3(dot(m[0], v), dot(m[1], v), dot(m[2], v)); }", m_matrixMulFunction);
+        m_writer.WriteLine(0, "vec4 %s(mat4 m, vec4 v) { return m[0] * v.x + m[1] * v.y + m[2] * v.z + m[3] * v.w; }", m_matrixMulFunction);
+        m_writer.WriteLine(0, "vec4 %s(vec4 v, mat4 m) { return vec4(dot(m[0], v), dot(m[1], v), dot(m[2], v), dot(m[3], v)); }", m_matrixMulFunction);
+    }
+
+    // Output the special function used to emulate HLSL clip.
+    if (m_tree->NeedsFunction("clip"))
+    {
+        const char* discard = m_target == Target_FragmentShader ? "discard" : "";
+        m_writer.WriteLine(0, "void %s(float x) { if (x < 0.0) %s;  }", m_clipFunction, discard);
+        m_writer.WriteLine(0, "void %s(vec2  x) { if (any(lessThan(x, vec2(0.0, 0.0)))) %s;  }", m_clipFunction, discard);
+        m_writer.WriteLine(0, "void %s(vec3  x) { if (any(lessThan(x, vec3(0.0, 0.0, 0.0)))) %s;  }", m_clipFunction, discard);
+        m_writer.WriteLine(0, "void %s(vec4  x) { if (any(lessThan(x, vec4(0.0, 0.0, 0.0, 0.0)))) %s;  }", m_clipFunction, discard);
+    }
+
+    // Output the special function used to emulate tex2Dlod.
+    if (m_tree->NeedsFunction("tex2Dlod"))
+    {
+        const char* function = "textureLod";
+
+        if (m_version == Version_110)
+        {
+            m_writer.WriteLine(0, "#extension GL_ARB_shader_texture_lod : require");
+            function = "texture2DLod";
+        }
+        else if (m_version == Version_100_ES)
+        {
+            m_writer.WriteLine(0, "#extension GL_EXT_shader_texture_lod : require");
+            function = "texture2DLodEXT";
+        }
+
+        m_writer.WriteLine(0, "vec4 %s(sampler2D samp, vec4 texCoord) { return %s(samp, texCoord.xy, texCoord.w);  }", m_tex2DlodFunction, function);
+    }
+
+    // Output the special function used to emulate tex2Dgrad.
+    if (m_tree->NeedsFunction("tex2Dgrad"))
+    {
+        const char* function = "textureGrad";
+
+        if (m_version == Version_110)
+        {
+            m_writer.WriteLine(0, "#extension GL_ARB_shader_texture_lod : require");
+            function = "texture2DGradARB";
+        }
+        else if (m_version == Version_100_ES)
+        {
+            m_writer.WriteLine(0, "#extension GL_EXT_shader_texture_lod : require");
+            function = "texture2DGradEXT";
+        }
+
+        m_writer.WriteLine(0, "vec4 %s(sampler2D samp, vec2 texCoord, vec2 dx, vec2 dy) { return %s(samp, texCoord, dx, dy);  }", m_tex2DgradFunction, function);
+    }
+
+    // Output the special function used to emulate tex2Dbias.
+    if (m_tree->NeedsFunction("tex2Dbias"))
+    {
+        if (target == Target_FragmentShader)
+        {
+            m_writer.WriteLine(0, "vec4 %s(sampler2D samp, vec4 texCoord) { return %s(samp, texCoord.xy, texCoord.w);  }", m_tex2DbiasFunction, m_versionLegacy ? "texture2D" : "texture" );
+        }
+        else
+        {
+            // Bias value is not supported in vertex shader.
+            m_writer.WriteLine(0, "vec4 %s(sampler2D samp, vec4 texCoord) { return texture(samp, texCoord.xy);  }", m_tex2DbiasFunction );
+        }
+    }
+
+    // Output the special function used to emulate tex2DMSfetch.
+    if (m_tree->NeedsFunction("tex2DMSfetch"))
+    {
+        m_writer.WriteLine(0, "vec4 tex2DMSfetch(sampler2DMS samp, ivec2 texCoord, int sample) {");
+        m_writer.WriteLine(1, "return texelFetch(samp, texCoord, sample);");
+        m_writer.WriteLine(0, "}");
+    }
+
+    // Output the special function used to emulate tex3Dlod.
+    if (m_tree->NeedsFunction("tex3Dlod"))
+    {
+        m_writer.WriteLine(0, "vec4 %s(sampler3D samp, vec4 texCoord) { return %s(samp, texCoord.xyz, texCoord.w);  }", m_tex3DlodFunction, m_versionLegacy ? "texture3D" : "texture" );
+    }
+
+    // Output the special function used to emulate texCUBEbias.
+    if (m_tree->NeedsFunction("texCUBEbias"))
+    {
+        if (target == Target_FragmentShader)
+        {
+            m_writer.WriteLine(0, "vec4 %s(samplerCube samp, vec4 texCoord) { return %s(samp, texCoord.xyz, texCoord.w);  }", m_texCUBEbiasFunction, m_versionLegacy ? "textureCube" : "texture" );
+        }
+        else
+        {
+            // Bias value is not supported in vertex shader.
+            m_writer.WriteLine(0, "vec4 %s(samplerCube samp, vec4 texCoord) { return texture(samp, texCoord.xyz);  }", m_texCUBEbiasFunction );
+        }
+    }
+
+	// Output the special function used to emulate texCUBElod
+	if (m_tree->NeedsFunction("texCUBElod"))
+	{
+        const char* function = "textureLod";
+
+        if (m_version == Version_110)
+        {
+            m_writer.WriteLine(0, "#extension GL_ARB_shader_texture_lod : require");
+            function = "textureCubeLod";
+        }
+        else if (m_version == Version_100_ES)
+        {
+            m_writer.WriteLine(0, "#extension GL_EXT_shader_texture_lod : require");
+            function = "textureCubeLodEXT";
+        }
+
+		m_writer.WriteLine( 0, "vec4 %s(samplerCube samp, vec4 texCoord) { return %s(samp, texCoord.xyz, texCoord.w);  }", m_texCUBElodFunction, function);
+	}
+
+    m_writer.WriteLine(0, "vec2  %s(float x) { return  vec2(x, x); }", m_scalarSwizzle2Function);
+    m_writer.WriteLine(0, "ivec2 %s(int   x) { return ivec2(x, x); }", m_scalarSwizzle2Function);
+
+    m_writer.WriteLine(0, "vec3  %s(float x) { return  vec3(x, x, x); }", m_scalarSwizzle3Function);
+    m_writer.WriteLine(0, "ivec3 %s(int   x) { return ivec3(x, x, x); }", m_scalarSwizzle3Function);
+
+    m_writer.WriteLine(0, "vec4  %s(float x) { return  vec4(x, x, x, x); }", m_scalarSwizzle4Function);
+    m_writer.WriteLine(0, "ivec4 %s(int   x) { return ivec4(x, x, x, x); }", m_scalarSwizzle4Function);
+
+    if (!m_versionLegacy)
+    {
+        m_writer.WriteLine(0, "uvec2 %s(uint  x) { return uvec2(x, x); }", m_scalarSwizzle2Function);
+        m_writer.WriteLine(0, "uvec3 %s(uint  x) { return uvec3(x, x, x); }", m_scalarSwizzle3Function);
+        m_writer.WriteLine(0, "uvec4 %s(uint  x) { return uvec4(x, x, x, x); }", m_scalarSwizzle4Function);
+    }
+
+    if (m_tree->NeedsFunction("sincos"))
+    {
+        const char* floatTypes[] = { "float", "vec2", "vec3", "vec4" };
+        for (int i = 0; i < 4; ++i)
+        {
+            m_writer.WriteLine(0, "void %s(%s x, out %s s, out %s c) { s = sin(x); c = cos(x); }", m_sinCosFunction,
+                floatTypes[i], floatTypes[i], floatTypes[i]);
+        }
+    }
+
+	// special function to emulate ?: with bool{2,3,4} condition type
+	m_writer.WriteLine( 0, "vec2 %s(bvec2 cond, vec2 trueExpr, vec2 falseExpr) { vec2 ret; ret.x = cond.x ? trueExpr.x : falseExpr.x; ret.y = cond.y ? trueExpr.y : falseExpr.y; return ret; }", m_bvecTernary );
+	m_writer.WriteLine( 0, "vec3 %s(bvec3 cond, vec3 trueExpr, vec3 falseExpr) { vec3 ret; ret.x = cond.x ? trueExpr.x : falseExpr.x; ret.y = cond.y ? trueExpr.y : falseExpr.y; ret.z = cond.z ? trueExpr.z : falseExpr.z; return ret; }", m_bvecTernary );
+	m_writer.WriteLine( 0, "vec4 %s(bvec4 cond, vec4 trueExpr, vec4 falseExpr) { vec4 ret; ret.x = cond.x ? trueExpr.x : falseExpr.x; ret.y = cond.y ? trueExpr.y : falseExpr.y; ret.z = cond.z ? trueExpr.z : falseExpr.z; ret.w = cond.w ? trueExpr.w : falseExpr.w; return ret; }", m_bvecTernary );
+
+    // Output the extension used for dFdx/dFdy in GLES2
+    if (m_version == Version_100_ES && (m_tree->NeedsFunction("ddx") || m_tree->NeedsFunction("ddy")))
+    {
+        m_writer.WriteLine(0, "#extension GL_OES_standard_derivatives : require");
+    }
+
+    OutputAttributes(entryFunction);
+
+    if (m_target == Target_FragmentShader)
+    {
+        if (!m_outputTargets)
+            Error("Fragment shader must output a color");
+
+        if (!m_versionLegacy)
+            m_writer.WriteLine(0, "out vec4 rast_FragData[%d];", m_outputTargets);
+    }
+
+    OutputStatements(0, statement);
+    OutputEntryCaller(entryFunction);
+
+    m_tree = NULL;
+
+    // The GLSL compilers don't check for this, so generate our own error message.
+    if (target == Target_VertexShader && !m_outputPosition)
+    {
+        Error("Vertex shader must output a position");
+    }
+
+    return !m_error;
+
+}
+
+const char* GLSLGenerator::GetResult() const
+{
+    return m_writer.GetResult();
+}
+
+void GLSLGenerator::OutputExpressionList(HLSLExpression* expression, HLSLArgument* argument)
+{
+    int numExpressions = 0;
+    while (expression != NULL)
+    {
+        if (numExpressions > 0)
+        {
+            m_writer.Write(", ");
+        }
+        
+        HLSLType* expectedType = NULL;
+        if (argument != NULL)
+        {
+            expectedType = &argument->type;
+            argument = argument->nextArgument;
+        }
+
+        OutputExpression(expression, expectedType);
+        expression = expression->nextExpression;
+        ++numExpressions;
+    }
+}
+
+const HLSLType* commonScalarType(const HLSLType& lhs, const HLSLType& rhs)
+{
+    if (!IsScalarType(lhs) || !IsScalarType(rhs))
+        return NULL;
+
+    if (lhs.baseType == HLSLBaseType_Float || lhs.baseType == HLSLBaseType_Half ||
+        rhs.baseType == HLSLBaseType_Float || rhs.baseType == HLSLBaseType_Half)
+        return &kFloatType;
+
+    if (lhs.baseType == HLSLBaseType_Uint || rhs.baseType == HLSLBaseType_Uint)
+        return &kUintType;
+
+    if (lhs.baseType == HLSLBaseType_Int || rhs.baseType == HLSLBaseType_Int)
+        return &kIntType;
+
+    if (lhs.baseType == HLSLBaseType_Bool || rhs.baseType == HLSLBaseType_Bool)
+        return &kBoolType;
+
+    return NULL;
+}
+
+void GLSLGenerator::OutputExpression(HLSLExpression* expression, const HLSLType* dstType)
+{
+
+    bool cast = dstType != NULL && !GetCanImplicitCast(expression->expressionType, *dstType);
+    if (expression->nodeType == HLSLNodeType_CastingExpression)
+    {
+        // No need to include a cast if the expression is already doing it.
+        cast = false;
+    }
+
+    if (cast)
+    {
+        OutputCast(*dstType);
+        m_writer.Write("(");
+    }
+
+    HLSLBuffer* bufferAccess = (m_options.flags & Flag_EmulateConstantBuffer) ? GetBufferAccessExpression(expression) : 0;
+
+    if (bufferAccess)
+    {
+        OutputBufferAccessExpression(bufferAccess, expression, expression->expressionType, 0);
+    }
+    else if (expression->nodeType == HLSLNodeType_IdentifierExpression)
+    {
+        HLSLIdentifierExpression* identifierExpression = static_cast<HLSLIdentifierExpression*>(expression);
+        OutputIdentifier(identifierExpression->name);
+    }
+    else if (expression->nodeType == HLSLNodeType_ConstructorExpression)
+    {
+        HLSLConstructorExpression* constructorExpression = static_cast<HLSLConstructorExpression*>(expression);
+        m_writer.Write("%s(", GetTypeName(constructorExpression->type));
+        OutputExpressionList(constructorExpression->argument);
+        m_writer.Write(")");
+    }
+    else if (expression->nodeType == HLSLNodeType_CastingExpression)
+    {
+        HLSLCastingExpression* castingExpression = static_cast<HLSLCastingExpression*>(expression);
+        OutputCast(castingExpression->type);
+        m_writer.Write("(");
+        OutputExpression(castingExpression->expression);
+        m_writer.Write(")");
+    }
+    else if (expression->nodeType == HLSLNodeType_LiteralExpression)
+    {
+        HLSLLiteralExpression* literalExpression = static_cast<HLSLLiteralExpression*>(expression);
+        switch (literalExpression->type)
+        {
+        case HLSLBaseType_Half:
+        case HLSLBaseType_Float:
+            {
+                // Don't use printf directly so that we don't use the system locale.
+                char buffer[64];
+                String_FormatFloat(buffer, sizeof(buffer), literalExpression->fValue);
+                m_writer.Write("%s", buffer);
+            }
+            break;
+        case HLSLBaseType_Int:
+            m_writer.Write("%d", literalExpression->iValue);
+            break;
+        case HLSLBaseType_Uint:
+            m_writer.Write("%uu", literalExpression->iValue);
+	    break;
+	case HLSLBaseType_Bool:
+            m_writer.Write("%s", literalExpression->bValue ? "true" : "false");
+            break;
+        default:
+            ASSERT(0);
+        }
+    }
+    else if (expression->nodeType == HLSLNodeType_UnaryExpression)
+    {
+        HLSLUnaryExpression* unaryExpression = static_cast<HLSLUnaryExpression*>(expression);
+        const char* op = "?";
+        bool pre = true;
+        const HLSLType* dstType = NULL;
+        switch (unaryExpression->unaryOp)
+        {
+        case HLSLUnaryOp_Negative:      op = "-";  break;
+        case HLSLUnaryOp_Positive:      op = "+";  break;
+        case HLSLUnaryOp_Not:           op = "!";  dstType = &unaryExpression->expressionType; break;
+        case HLSLUnaryOp_PreIncrement:  op = "++"; break;
+        case HLSLUnaryOp_PreDecrement:  op = "--"; break;
+        case HLSLUnaryOp_PostIncrement: op = "++"; pre = false; break;
+        case HLSLUnaryOp_PostDecrement: op = "--"; pre = false; break;
+        case HLSLUnaryOp_BitNot:        op = "~";  break;
+        }
+        m_writer.Write("(");
+        if (pre)
+        {
+            m_writer.Write("%s", op);
+            OutputExpression(unaryExpression->expression, dstType);
+        }
+        else
+        {
+            OutputExpression(unaryExpression->expression, dstType);
+            m_writer.Write("%s", op);
+        }
+        m_writer.Write(")");
+    }
+    else if (expression->nodeType == HLSLNodeType_BinaryExpression)
+    {
+        HLSLBinaryExpression* binaryExpression = static_cast<HLSLBinaryExpression*>(expression);
+        const char* op = "?";
+        const HLSLType* dstType1 = NULL;
+        const HLSLType* dstType2 = NULL;
+
+		//
+		bool vectorExpression = IsVectorType( binaryExpression->expression1->expressionType ) || IsVectorType( binaryExpression->expression2->expressionType );
+		if( vectorExpression && IsCompareOp( binaryExpression->binaryOp ))
+		{
+			switch (binaryExpression->binaryOp)
+			{
+			case HLSLBinaryOp_Less:         m_writer.Write("lessThan(");			break;
+			case HLSLBinaryOp_Greater:      m_writer.Write("greaterThan(");			break;
+			case HLSLBinaryOp_LessEqual:    m_writer.Write("lessThanEqual(");		break;
+			case HLSLBinaryOp_GreaterEqual: m_writer.Write("greaterThanEqual(");	break;
+			case HLSLBinaryOp_Equal:        m_writer.Write("equal(");				break;
+			case HLSLBinaryOp_NotEqual:     m_writer.Write("notEqual(");			break;
+			default:
+				ASSERT(0); // is so, check isCompareOp
+			}
+
+			if( IsVectorType( binaryExpression->expression1->expressionType ) && IsScalarType( binaryExpression->expression2->expressionType ) )
+				dstType2 = &binaryExpression->expression1->expressionType;
+			else if( IsScalarType( binaryExpression->expression1->expressionType ) && IsVectorType( binaryExpression->expression2->expressionType ) )
+				dstType1 = &binaryExpression->expression2->expressionType;
+			// TODO if both expressions are vector but with different dimension handle it here or in parser?
+
+			OutputExpression(binaryExpression->expression1, dstType1);
+			m_writer.Write(", ");
+			OutputExpression(binaryExpression->expression2, dstType2);
+			m_writer.Write(")");
+		}
+		else
+		{
+			switch (binaryExpression->binaryOp)
+			{
+			case HLSLBinaryOp_Add:          op = " + "; dstType1 = dstType2 = &binaryExpression->expressionType; break;
+			case HLSLBinaryOp_Sub:          op = " - "; dstType1 = dstType2 = &binaryExpression->expressionType; break;
+			case HLSLBinaryOp_Mul:          op = " * "; dstType1 = dstType2 = &binaryExpression->expressionType; break;
+			case HLSLBinaryOp_Div:          op = " / "; dstType1 = dstType2 = &binaryExpression->expressionType; break;
+			case HLSLBinaryOp_Less:         op = " < "; dstType1 = dstType2 = commonScalarType(binaryExpression->expression1->expressionType, binaryExpression->expression2->expressionType); break;
+			case HLSLBinaryOp_Greater:      op = " > "; dstType1 = dstType2 = commonScalarType(binaryExpression->expression1->expressionType, binaryExpression->expression2->expressionType); break;
+			case HLSLBinaryOp_LessEqual:    op = " <= "; dstType1 = dstType2 = commonScalarType(binaryExpression->expression1->expressionType, binaryExpression->expression2->expressionType); break;
+			case HLSLBinaryOp_GreaterEqual: op = " >= "; dstType1 = dstType2 = commonScalarType(binaryExpression->expression1->expressionType, binaryExpression->expression2->expressionType); break;
+			case HLSLBinaryOp_Equal:        op = " == "; dstType1 = dstType2 = commonScalarType(binaryExpression->expression1->expressionType, binaryExpression->expression2->expressionType); break;
+			case HLSLBinaryOp_NotEqual:     op = " != "; dstType1 = dstType2 = commonScalarType(binaryExpression->expression1->expressionType, binaryExpression->expression2->expressionType); break;
+			case HLSLBinaryOp_Assign:       op = " = ";  dstType2 = &binaryExpression->expressionType; break;
+			case HLSLBinaryOp_AddAssign:    op = " += "; dstType2 = &binaryExpression->expressionType; break;
+			case HLSLBinaryOp_SubAssign:    op = " -= "; dstType2 = &binaryExpression->expressionType; break;
+			case HLSLBinaryOp_MulAssign:    op = " *= "; dstType2 = &binaryExpression->expressionType; break;
+			case HLSLBinaryOp_DivAssign:    op = " /= "; dstType2 = &binaryExpression->expressionType; break;
+			case HLSLBinaryOp_And:          op = " && "; dstType1 = dstType2 = &binaryExpression->expressionType; break;
+			case HLSLBinaryOp_Or:           op = " || "; dstType1 = dstType2 = &binaryExpression->expressionType; break;
+			case HLSLBinaryOp_BitAnd:       op = " & "; dstType1 = dstType2 = commonScalarType(binaryExpression->expression1->expressionType, binaryExpression->expression2->expressionType); break;
+			case HLSLBinaryOp_BitOr:        op = " | "; dstType1 = dstType2 = commonScalarType(binaryExpression->expression1->expressionType, binaryExpression->expression2->expressionType); break;
+			case HLSLBinaryOp_BitXor:       op = " ^ "; dstType1 = dstType2 = commonScalarType(binaryExpression->expression1->expressionType, binaryExpression->expression2->expressionType); break;
+			default:
+				ASSERT(0);
+			}
+			m_writer.Write("(");
+			OutputExpression(binaryExpression->expression1, dstType1);
+			m_writer.Write("%s", op);
+			OutputExpression(binaryExpression->expression2, dstType2);
+			m_writer.Write(")");
+		}
+    }
+    else if (expression->nodeType == HLSLNodeType_ConditionalExpression)
+    {
+        HLSLConditionalExpression* conditionalExpression = static_cast<HLSLConditionalExpression*>(expression);
+		if( IsVectorType( conditionalExpression->condition->expressionType ) )
+		{
+			m_writer.Write( "%s", m_bvecTernary );
+			m_writer.Write( "( " );
+			OutputExpression( conditionalExpression->condition );
+			m_writer.Write( ", " );
+			OutputExpression( conditionalExpression->trueExpression, &conditionalExpression->expressionType );
+			m_writer.Write( ", " );
+			OutputExpression( conditionalExpression->falseExpression, &conditionalExpression->expressionType  );
+			m_writer.Write( " )" );
+		}
+		else
+		{
+			m_writer.Write( "((" );
+			OutputExpression( conditionalExpression->condition, &kBoolType );
+			m_writer.Write( ")?(" );
+			OutputExpression( conditionalExpression->trueExpression, dstType );
+			m_writer.Write( "):(" );
+			OutputExpression( conditionalExpression->falseExpression, dstType );
+			m_writer.Write( "))" );
+		}
+    }
+    else if (expression->nodeType == HLSLNodeType_MemberAccess)
+    {
+
+        HLSLMemberAccess* memberAccess = static_cast<HLSLMemberAccess*>(expression);
+
+        if (memberAccess->object->expressionType.baseType == HLSLBaseType_Half  ||
+            memberAccess->object->expressionType.baseType == HLSLBaseType_Float ||
+            memberAccess->object->expressionType.baseType == HLSLBaseType_Int   ||
+            memberAccess->object->expressionType.baseType == HLSLBaseType_Uint)
+        {
+            // Handle swizzling on scalar values.
+            size_t swizzleLength = strlen(memberAccess->field);
+            if (swizzleLength == 2)
+            {
+                m_writer.Write("%s", m_scalarSwizzle2Function);
+            }
+            else if (swizzleLength == 3)
+            {
+                m_writer.Write("%s", m_scalarSwizzle3Function);
+            }
+            else if (swizzleLength == 4)
+            {
+                m_writer.Write("%s", m_scalarSwizzle4Function);
+            }
+            m_writer.Write("(");
+            OutputExpression(memberAccess->object);
+            m_writer.Write(")");
+        }
+        else
+        {
+            m_writer.Write("(");
+            OutputExpression(memberAccess->object);
+            m_writer.Write(")");
+
+			if( memberAccess->object->expressionType.baseType == HLSLBaseType_Float2x2 ||
+				memberAccess->object->expressionType.baseType == HLSLBaseType_Float3x3 ||
+                memberAccess->object->expressionType.baseType == HLSLBaseType_Float4x4 ||
+				memberAccess->object->expressionType.baseType == HLSLBaseType_Half2x2 ||
+				memberAccess->object->expressionType.baseType == HLSLBaseType_Half3x3 ||
+				memberAccess->object->expressionType.baseType == HLSLBaseType_Half4x4 )
+            {
+                // Handle HLSL matrix "swizzling".
+                // TODO: Properly handle multiple element selection such as _m00_m12
+                const char* n = memberAccess->field;
+                while (n[0] != 0)
+                {
+                    if ( n[0] != '_' )
+                    {
+                        ASSERT(0);
+                        break;
+                    }
+                    ++n;
+                    char base = '1';
+                    if (n[0] == 'm')
+                    {
+                        base = '0';
+                        ++n;
+                    }
+                    if (isdigit(n[0]) && isdigit(n[1]) )
+                    {
+                        m_writer.Write("[%d][%d]", n[1] - base, n[0] - base);
+                        n += 2;
+                    }
+                    else
+                    {
+                        ASSERT(0);
+                        break;
+                    }
+                }
+            }
+            else
+            {
+                m_writer.Write(".%s", memberAccess->field);
+            }
+
+        }
+
+    }
+    else if (expression->nodeType == HLSLNodeType_ArrayAccess)
+    {
+        HLSLArrayAccess* arrayAccess = static_cast<HLSLArrayAccess*>(expression);
+
+        if (!arrayAccess->array->expressionType.array &&
+			(arrayAccess->array->expressionType.baseType == HLSLBaseType_Float2x2 ||
+			 arrayAccess->array->expressionType.baseType == HLSLBaseType_Float3x3 ||
+             arrayAccess->array->expressionType.baseType == HLSLBaseType_Float4x4 ||
+			 arrayAccess->array->expressionType.baseType == HLSLBaseType_Half2x2 ||
+			 arrayAccess->array->expressionType.baseType == HLSLBaseType_Half3x3 ||
+			 arrayAccess->array->expressionType.baseType == HLSLBaseType_Half4x4 ) )
+        {
+            // GLSL access a matrix as m[c][r] while HLSL is m[r][c], so use our
+            // special row access function to convert.
+            m_writer.Write("%s(", m_matrixRowFunction);
+            OutputExpression(arrayAccess->array);
+            m_writer.Write(",");
+            OutputExpression(arrayAccess->index);
+            m_writer.Write(")");
+        }
+        else
+        {
+            OutputExpression(arrayAccess->array);
+            m_writer.Write("[");
+            OutputExpression(arrayAccess->index);
+            m_writer.Write("]");
+        }
+
+    }
+    else if (expression->nodeType == HLSLNodeType_FunctionCall)
+    {
+        HLSLFunctionCall* functionCall = static_cast<HLSLFunctionCall*>(expression);
+
+        // Handle intrinsic funtions that are different between HLSL and GLSL.
+        bool handled = false;
+        const char* functionName = functionCall->function->name;
+
+        if (String_Equal(functionName, "mul"))
+        {
+            HLSLExpression* argument[2];
+            if (GetFunctionArguments(functionCall, argument, 2) != 2)
+            {
+                Error("mul expects 2 arguments");
+                return;
+            }
+
+            const HLSLType& type0 = functionCall->function->argument->type;
+            const HLSLType& type1 = functionCall->function->argument->nextArgument->type;
+
+            const char* prefix = (m_options.flags & Flag_LowerMatrixMultiplication) ? m_matrixMulFunction : "";
+            const char* infix = (m_options.flags & Flag_LowerMatrixMultiplication) ? "," : "*";
+
+            if (m_options.flags & Flag_PackMatrixRowMajor)
+            {
+                m_writer.Write("%s((", prefix);
+                OutputExpression(argument[1], &type1);
+                m_writer.Write(")%s(", infix);
+                OutputExpression(argument[0], &type0);
+                m_writer.Write("))");
+            }
+            else
+            {
+                m_writer.Write("%s((", prefix);
+                OutputExpression(argument[0], &type0);
+                m_writer.Write(")%s(", infix);
+                OutputExpression(argument[1], &type1);
+                m_writer.Write("))");
+            }
+
+            handled = true;
+        }
+        else if (String_Equal(functionName, "saturate"))
+        {
+            HLSLExpression* argument[1];
+            if (GetFunctionArguments(functionCall, argument, 1) != 1)
+            {
+                Error("saturate expects 1 argument");
+                return;
+            }
+            m_writer.Write("clamp(");
+            OutputExpression(argument[0]);
+            m_writer.Write(", 0.0, 1.0)");
+            handled = true;
+        }
+
+        if (!handled)
+        {
+            OutputIdentifier(functionName);
+            m_writer.Write("(");
+            OutputExpressionList(functionCall->argument, functionCall->function->argument);
+            m_writer.Write(")");
+        }
+    }
+    else
+    {
+        m_writer.Write("<unknown expression>");
+    }
+
+    if (cast)
+    {
+/*
+        const BaseTypeDescription& srcTypeDesc = _baseTypeDescriptions[expression->expressionType.baseType];
+        const BaseTypeDescription& dstTypeDesc = _baseTypeDescriptions[dstType->baseType];
+
+        if (dstTypeDesc.numDimensions == 1 && dstTypeDesc.numComponents > 1)
+        {
+            // Casting to a vector - pad with 0s
+            for (int i = srcTypeDesc.numComponents; i < dstTypeDesc.numComponents; ++i)
+            {
+                m_writer.Write(", 0");
+            }
+        }
+*/
+
+        m_writer.Write(")");
+    }
+
+}
+
+void GLSLGenerator::OutputIdentifier(const char* name)
+{
+
+    // Remap intrinstic functions.
+    if (String_Equal(name, "tex2D"))
+    {
+        name = m_versionLegacy ? "texture2D" : "texture";
+    }
+    else if (String_Equal(name, "tex2Dproj"))
+    {
+        name = m_versionLegacy ? "texture2DProj" : "textureProj";
+    }
+    else if (String_Equal(name, "texCUBE"))
+    {
+        name = m_versionLegacy ? "textureCube" : "texture";
+    }
+    else if (String_Equal(name, "tex3D"))
+    {
+        name = m_versionLegacy ? "texture3D" : "texture";
+    }
+    else if (String_Equal(name, "clip"))
+    {
+        name = m_clipFunction;
+    }
+    else if (String_Equal(name, "tex2Dlod"))
+    {
+        name = m_tex2DlodFunction;
+    }
+    else if (String_Equal(name, "tex2Dbias"))
+    {
+        name = m_tex2DbiasFunction;
+    }
+    else if (String_Equal(name, "tex2Dgrad"))
+    {
+        name = m_tex2DgradFunction;
+    }
+    else if (String_Equal(name, "tex2DArray"))
+    {
+        name = "texture";
+    }
+    else if (String_Equal(name, "texCUBEbias"))
+    {
+        name = m_texCUBEbiasFunction;
+    }
+	else if( String_Equal( name, "texCUBElod" ) )
+	{
+		name = m_texCUBElodFunction;
+	}
+    else if (String_Equal(name, "atan2"))
+    {
+        name = "atan";
+    }
+    else if (String_Equal(name, "sincos"))
+    {
+        name = m_sinCosFunction;
+    }
+    else if (String_Equal(name, "fmod"))
+    {
+        // mod is not the same as fmod if the parameter is negative!
+        // The equivalent of fmod(x, y) is x - y * floor(x/y)
+        // We use the mod version for performance.
+        name = "mod";
+    }
+    else if (String_Equal(name, "lerp"))
+    {
+        name = "mix";
+    }
+    else if (String_Equal(name, "frac"))
+    {
+        name = "fract";
+    }
+    else if (String_Equal(name, "ddx"))
+    {
+        name = "dFdx";
+    }
+    else if (String_Equal(name, "ddy"))
+    {
+        name = "dFdy";
+    }
+    else 
+    {
+        // The identifier could be a GLSL reserved word (if it's not also a HLSL reserved word).
+        name = GetSafeIdentifierName(name);
+    }
+    m_writer.Write("%s", name);
+
+}
+
+void GLSLGenerator::OutputArguments(HLSLArgument* argument)
+{
+    int numArgs = 0;
+    while (argument != NULL)
+    {
+        if (numArgs > 0)
+        {
+            m_writer.Write(", ");
+        }
+
+        switch (argument->modifier)
+        {
+        case HLSLArgumentModifier_In:
+            m_writer.Write("in ");
+            break;
+        case HLSLArgumentModifier_Out:
+            m_writer.Write("out ");
+            break;
+        case HLSLArgumentModifier_Inout:
+            m_writer.Write("inout ");
+            break;
+        default:
+            break;
+        }
+
+        OutputDeclaration(argument->type, argument->name);
+        argument = argument->nextArgument;
+        ++numArgs;
+    }
+}
+
+void GLSLGenerator::OutputStatements(int indent, HLSLStatement* statement, const HLSLType* returnType)
+{
+
+    while (statement != NULL)
+    {
+        if (statement->hidden)
+        {
+            statement = statement->nextStatement;
+            continue;
+        }
+
+        if (statement->nodeType == HLSLNodeType_Declaration)
+        {
+            HLSLDeclaration* declaration = static_cast<HLSLDeclaration*>(statement);
+
+            // GLSL doesn't seem have texture uniforms, so just ignore them.
+            if (declaration->type.baseType != HLSLBaseType_Texture)
+            {
+                m_writer.BeginLine(indent, declaration->fileName, declaration->line);
+                if (indent == 0)
+                {
+                    // At the top level, we need the "uniform" keyword.
+                    if ((declaration->type.flags & HLSLTypeFlag_Static) == 0)
+                        m_writer.Write("uniform ");
+                }
+                OutputDeclaration(declaration);
+                m_writer.EndLine(";");
+            }
+        }
+        else if (statement->nodeType == HLSLNodeType_Struct)
+        {
+            HLSLStruct* structure = static_cast<HLSLStruct*>(statement);
+            m_writer.WriteLine(indent, "struct %s {", structure->name);
+            HLSLStructField* field = structure->field;
+            while (field != NULL)
+            {
+                m_writer.BeginLine(indent + 1, field->fileName, field->line);
+                OutputDeclaration(field->type, field->name);
+                m_writer.Write(";");
+                m_writer.EndLine();
+                field = field->nextField;
+            }
+            m_writer.WriteLine(indent, "};");
+        }
+        else if (statement->nodeType == HLSLNodeType_Buffer)
+        {
+            HLSLBuffer* buffer = static_cast<HLSLBuffer*>(statement);
+            OutputBuffer(indent, buffer);
+        }
+        else if (statement->nodeType == HLSLNodeType_Function)
+        {
+            HLSLFunction* function = static_cast<HLSLFunction*>(statement);
+
+            // Use an alternate name for the function which is supposed to be entry point
+            // so that we can supply our own function which will be the actual entry point.
+            const char* functionName   = GetSafeIdentifierName(function->name);
+            const char* returnTypeName = GetTypeName(function->returnType);
+
+            m_writer.BeginLine(indent, function->fileName, function->line);
+            m_writer.Write("%s %s(", returnTypeName, functionName);
+
+            OutputArguments(function->argument);
+
+            if (function->forward)
+            {
+                m_writer.WriteLine(indent, ");");
+            }
+            else
+            {
+                m_writer.Write(") {");
+                m_writer.EndLine();
+
+                OutputStatements(indent + 1, function->statement, &function->returnType);
+                m_writer.WriteLine(indent, "}");
+            }
+        }
+        else if (statement->nodeType == HLSLNodeType_ExpressionStatement)
+        {
+            HLSLExpressionStatement* expressionStatement = static_cast<HLSLExpressionStatement*>(statement);
+            m_writer.BeginLine(indent, statement->fileName, statement->line);
+            OutputExpression(expressionStatement->expression);
+            m_writer.EndLine(";");
+        }
+        else if (statement->nodeType == HLSLNodeType_ReturnStatement)
+        {
+            HLSLReturnStatement* returnStatement = static_cast<HLSLReturnStatement*>(statement);
+            if (returnStatement->expression != NULL)
+            {
+                m_writer.BeginLine(indent, returnStatement->fileName, returnStatement->line);
+                m_writer.Write("return ");
+                OutputExpression(returnStatement->expression, returnType);
+                m_writer.EndLine(";");
+            }
+            else
+            {
+                m_writer.WriteLineTagged(indent, returnStatement->fileName, returnStatement->line, "return;");
+            }
+        }
+        else if (statement->nodeType == HLSLNodeType_DiscardStatement)
+        {
+            HLSLDiscardStatement* discardStatement = static_cast<HLSLDiscardStatement*>(statement);
+            if (m_target == Target_FragmentShader)
+            {
+                m_writer.WriteLineTagged(indent, discardStatement->fileName, discardStatement->line, "discard;");
+            }
+        }
+        else if (statement->nodeType == HLSLNodeType_BreakStatement)
+        {
+            HLSLBreakStatement* breakStatement = static_cast<HLSLBreakStatement*>(statement);
+            m_writer.WriteLineTagged(indent, breakStatement->fileName, breakStatement->line, "break;");
+        }
+        else if (statement->nodeType == HLSLNodeType_ContinueStatement)
+        {
+            HLSLContinueStatement* continueStatement = static_cast<HLSLContinueStatement*>(statement);
+            m_writer.WriteLineTagged(indent, continueStatement->fileName, continueStatement->line, "continue;");
+        }
+        else if (statement->nodeType == HLSLNodeType_IfStatement)
+        {
+            HLSLIfStatement* ifStatement = static_cast<HLSLIfStatement*>(statement);
+            m_writer.BeginLine(indent, ifStatement->fileName, ifStatement->line);
+            m_writer.Write("if (");
+            OutputExpression(ifStatement->condition, &kBoolType);
+            m_writer.Write(") {");
+            m_writer.EndLine();
+            OutputStatements(indent + 1, ifStatement->statement, returnType);
+            m_writer.WriteLine(indent, "}");
+            if (ifStatement->elseStatement != NULL)
+            {
+                m_writer.WriteLine(indent, "else {");
+                OutputStatements(indent + 1, ifStatement->elseStatement, returnType);
+                m_writer.WriteLine(indent, "}");
+            }
+        }
+        else if (statement->nodeType == HLSLNodeType_ForStatement)
+        {
+            HLSLForStatement* forStatement = static_cast<HLSLForStatement*>(statement);
+            m_writer.BeginLine(indent, forStatement->fileName, forStatement->line);
+            m_writer.Write("for (");
+            OutputDeclaration(forStatement->initialization);
+            m_writer.Write("; ");
+            OutputExpression(forStatement->condition, &kBoolType);
+            m_writer.Write("; ");
+            OutputExpression(forStatement->increment);
+            m_writer.Write(") {");
+            m_writer.EndLine();
+            OutputStatements(indent + 1, forStatement->statement, returnType);
+            m_writer.WriteLine(indent, "}");
+        }
+        else
+        {
+            // Unhanded statement type.
+            ASSERT(0);
+        }
+
+        statement = statement->nextStatement;
+
+    }
+
+}
+
+void GLSLGenerator::OutputBuffer(int indent, HLSLBuffer* buffer)
+{
+    // Empty uniform blocks cause compilation errors on NVIDIA, so don't emit them.
+    if (buffer->field == NULL)
+        return;
+
+    if (m_options.flags & Flag_EmulateConstantBuffer)
+    {
+        unsigned int size = 0;
+        LayoutBuffer(buffer, size);
+
+        unsigned int uniformSize = (size + 3) / 4;
+
+        m_writer.WriteLineTagged(indent, buffer->fileName, buffer->line, "uniform vec4 %s%s[%d];", m_options.constantBufferPrefix, buffer->name, uniformSize);
+    }
+    else
+    {
+        m_writer.WriteLineTagged(indent, buffer->fileName, buffer->line, "layout (std140) uniform %s%s {", m_options.constantBufferPrefix, buffer->name);
+        HLSLDeclaration* field = buffer->field;
+        while (field != NULL)
+        {
+            m_writer.BeginLine(indent + 1, field->fileName, field->line);
+            OutputDeclaration(field->type, field->name);
+            m_writer.Write(";");
+            m_writer.EndLine();
+            field = (HLSLDeclaration*)field->nextStatement;
+        }
+        m_writer.WriteLine(indent, "};");
+    }
+}
+
+inline void alignForWrite(unsigned int& offset, unsigned int size)
+{
+    ASSERT(size <= 4);
+
+    if (offset / 4 != (offset + size - 1) / 4)
+        offset = (offset + 3) & ~3;
+}
+
+void GLSLGenerator::LayoutBuffer(HLSLBuffer* buffer, unsigned int& offset)
+{
+    for (HLSLDeclaration* field = buffer->field; field; field = (HLSLDeclaration*)field->nextStatement)
+    {
+        LayoutBuffer(field->type, offset);
+    }
+}
+
+void GLSLGenerator::LayoutBuffer(const HLSLType& type, unsigned int& offset)
+{
+    LayoutBufferAlign(type, offset);
+
+    if (type.array)
+    {
+        int arraySize = 0;
+        m_tree->GetExpressionValue(type.arraySize, arraySize);
+
+        unsigned int elementSize = 0;
+        LayoutBufferElement(type, elementSize);
+
+        unsigned int alignedElementSize = (elementSize + 3) & ~3;
+
+        offset += alignedElementSize * arraySize;
+    }
+    else
+    {
+        LayoutBufferElement(type, offset);
+    }
+}
+
+void GLSLGenerator::LayoutBufferElement(const HLSLType& type, unsigned int& offset)
+{
+    if (type.baseType == HLSLBaseType_Float)
+    {
+        offset += 1;
+    }
+    else if (type.baseType == HLSLBaseType_Float2)
+    {
+        offset += 2;
+    }
+    else if (type.baseType == HLSLBaseType_Float3)
+    {
+        offset += 3;
+    }
+    else if (type.baseType == HLSLBaseType_Float4)
+    {
+        offset += 4;
+    }
+    else if (type.baseType == HLSLBaseType_Float4x4)
+    {
+        offset += 16;
+    }
+    else if (type.baseType == HLSLBaseType_UserDefined)
+    {
+        HLSLStruct * st = m_tree->FindGlobalStruct(type.typeName);
+
+        if (st)
+        {
+            for (HLSLStructField* field = st->field; field; field = field->nextField)
+            {
+                LayoutBuffer(field->type, offset);
+            }
+        }
+        else
+        {
+            Error("Unknown type %s", type.typeName);
+        }
+    }
+    else
+    {
+        Error("Constant buffer layout is not supported for %s", GetTypeName(type));
+    }
+}
+
+void GLSLGenerator::LayoutBufferAlign(const HLSLType& type, unsigned int& offset)
+{
+    if (type.array)
+    {
+        alignForWrite(offset, 4);
+    }
+    else if (type.baseType == HLSLBaseType_Float)
+    {
+        alignForWrite(offset, 1);
+    }
+    else if (type.baseType == HLSLBaseType_Float2)
+    {
+        alignForWrite(offset, 2);
+    }
+    else if (type.baseType == HLSLBaseType_Float3)
+    {
+        alignForWrite(offset, 3);
+    }
+    else if (type.baseType == HLSLBaseType_Float4)
+    {
+        alignForWrite(offset, 4);
+    }
+    else if (type.baseType == HLSLBaseType_Float4x4)
+    {
+        alignForWrite(offset, 4);
+    }
+    else if (type.baseType == HLSLBaseType_UserDefined)
+    {
+        alignForWrite(offset, 4);
+    }
+    else
+    {
+        Error("Constant buffer layout is not supported for %s", GetTypeName(type));
+    }
+}
+
+HLSLBuffer* GLSLGenerator::GetBufferAccessExpression(HLSLExpression* expression)
+{
+    if (expression->nodeType == HLSLNodeType_IdentifierExpression)
+    {
+        HLSLIdentifierExpression* identifierExpression = static_cast<HLSLIdentifierExpression*>(expression);
+
+        if (identifierExpression->global)
+        {
+            HLSLDeclaration * declaration = m_tree->FindGlobalDeclaration(identifierExpression->name);
+
+            if (declaration && declaration->buffer)
+                return declaration->buffer;
+        }
+    }
+    else if (expression->nodeType == HLSLNodeType_MemberAccess)
+    {
+        HLSLMemberAccess* memberAccess = static_cast<HLSLMemberAccess*>(expression);
+
+        if (memberAccess->object->expressionType.baseType == HLSLBaseType_UserDefined)
+            return GetBufferAccessExpression(memberAccess->object);
+    }
+    else if (expression->nodeType == HLSLNodeType_ArrayAccess)
+    {
+        HLSLArrayAccess* arrayAccess = static_cast<HLSLArrayAccess*>(expression);
+
+        if (arrayAccess->array->expressionType.array)
+            return GetBufferAccessExpression(arrayAccess->array);
+    }
+
+    return 0;
+}
+
+void GLSLGenerator::OutputBufferAccessExpression(HLSLBuffer* buffer, HLSLExpression* expression, const HLSLType& type, unsigned int postOffset)
+{
+    if (type.array)
+    {
+        Error("Constant buffer access is not supported for arrays (use indexing instead)");
+    }
+    else if (type.baseType == HLSLBaseType_Float)
+    {
+        m_writer.Write("%s%s[", m_options.constantBufferPrefix, buffer->name);
+        unsigned int index = OutputBufferAccessIndex(expression, postOffset);
+        m_writer.Write("%d].%c", index / 4, "xyzw"[index % 4]);
+    }
+    else if (type.baseType == HLSLBaseType_Float2)
+    {
+        m_writer.Write("%s%s[", m_options.constantBufferPrefix, buffer->name);
+        unsigned int index = OutputBufferAccessIndex(expression, postOffset);
+        m_writer.Write("%d].%s", index / 4, index % 4 == 0 ? "xy" : index % 4 == 1 ? "yz" : "zw");
+    }
+    else if (type.baseType == HLSLBaseType_Float3)
+    {
+        m_writer.Write("%s%s[", m_options.constantBufferPrefix, buffer->name);
+        unsigned int index = OutputBufferAccessIndex(expression, postOffset);
+        m_writer.Write("%d].%s", index / 4, index % 4 == 0 ? "xyz" : "yzw");
+    }
+    else if (type.baseType == HLSLBaseType_Float4)
+    {
+        m_writer.Write("%s%s[", m_options.constantBufferPrefix, buffer->name);
+        unsigned int index = OutputBufferAccessIndex(expression, postOffset);
+        ASSERT(index % 4 == 0);
+        m_writer.Write("%d]", index / 4);
+    }
+    else if (type.baseType == HLSLBaseType_Float4x4)
+    {
+        m_writer.Write("mat4(");
+        for (int i = 0; i < 4; ++i)
+        {
+            m_writer.Write("%s%s[", m_options.constantBufferPrefix, buffer->name);
+            unsigned int index = OutputBufferAccessIndex(expression, postOffset + i * 4);
+            ASSERT(index % 4 == 0);
+            m_writer.Write("%d]%c", index / 4, i == 3 ? ')' : ',');
+        }
+    }
+    else if (type.baseType == HLSLBaseType_UserDefined)
+    {
+        HLSLStruct * st = m_tree->FindGlobalStruct(type.typeName);
+
+        if (st)
+        {
+            m_writer.Write("%s(", st->name);
+
+            unsigned int offset = postOffset;
+
+            for (HLSLStructField* field = st->field; field; field = field->nextField)
+            {
+                OutputBufferAccessExpression(buffer, expression, field->type, offset);
+
+                if (field->nextField)
+                    m_writer.Write(",");
+
+                LayoutBuffer(field->type, offset);
+            }
+
+            m_writer.Write(")");
+        }
+        else
+        {
+            Error("Unknown type %s", type.typeName);
+        }
+    }
+    else
+    {
+        Error("Constant buffer layout is not supported for %s", GetTypeName(type));
+    }
+}
+
+unsigned int GLSLGenerator::OutputBufferAccessIndex(HLSLExpression* expression, unsigned int postOffset)
+{
+    if (expression->nodeType == HLSLNodeType_IdentifierExpression)
+    {
+        HLSLIdentifierExpression* identifierExpression = static_cast<HLSLIdentifierExpression*>(expression);
+        ASSERT(identifierExpression->global);
+
+        HLSLDeclaration * declaration = m_tree->FindGlobalDeclaration(identifierExpression->name);
+        ASSERT(declaration);
+
+        HLSLBuffer * buffer = declaration->buffer;
+        ASSERT(buffer);
+
+        unsigned int offset = 0;
+
+        for (HLSLDeclaration* field = buffer->field; field; field = (HLSLDeclaration*)field->nextStatement)
+        {
+            if (field == declaration)
+            {
+                LayoutBufferAlign(field->type, offset);
+                break;
+            }
+
+            LayoutBuffer(field->type, offset);
+        }
+
+        return offset + postOffset;
+    }
+    else if (expression->nodeType == HLSLNodeType_MemberAccess)
+    {
+        HLSLMemberAccess* memberAccess = static_cast<HLSLMemberAccess*>(expression);
+
+        const HLSLType& type = memberAccess->object->expressionType;
+        ASSERT(type.baseType == HLSLBaseType_UserDefined);
+
+        HLSLStruct * st = m_tree->FindGlobalStruct(type.typeName);
+
+        if (st)
+        {
+            unsigned int offset = 0;
+
+            for (HLSLStructField* field = st->field; field; field = field->nextField)
+            {
+                if (field->name == memberAccess->field)
+                {
+                    LayoutBufferAlign(field->type, offset);
+                    break;
+                }
+
+                LayoutBuffer(field->type, offset);
+            }
+
+            return offset + OutputBufferAccessIndex(memberAccess->object, postOffset);
+        }
+        else
+        {
+            Error("Unknown type %s", type.typeName);
+        }
+    }
+    else if (expression->nodeType == HLSLNodeType_ArrayAccess)
+    {
+        HLSLArrayAccess* arrayAccess = static_cast<HLSLArrayAccess*>(expression);
+
+        const HLSLType& type = arrayAccess->array->expressionType;
+        ASSERT(type.array);
+
+        unsigned int elementSize = 0;
+        LayoutBufferElement(type, elementSize);
+
+        unsigned int alignedElementSize = (elementSize + 3) & ~3;
+
+        int arrayIndex = 0;
+        if (m_tree->GetExpressionValue(arrayAccess->index, arrayIndex))
+        {
+            unsigned int offset = arrayIndex * alignedElementSize;
+
+            return offset + OutputBufferAccessIndex(arrayAccess->array, postOffset);
+        }
+        else
+        {
+            m_writer.Write("%d*(", alignedElementSize / 4);
+            OutputExpression(arrayAccess->index);
+            m_writer.Write(")+");
+
+            return OutputBufferAccessIndex(arrayAccess->array, postOffset);
+        }
+    }
+    else
+    {
+        ASSERT(!"IsBufferAccessExpression should have returned false");
+    }
+
+    return 0;
+}
+
+HLSLFunction* GLSLGenerator::FindFunction(HLSLRoot* root, const char* name)
+{
+    HLSLStatement* statement = root->statement;
+    while (statement != NULL)
+    {
+        if (statement->nodeType == HLSLNodeType_Function)
+        {
+            HLSLFunction* function = static_cast<HLSLFunction*>(statement);
+            if (String_Equal(function->name, name))
+            {
+                return function;
+            }
+        }
+        statement = statement->nextStatement;
+    }
+    return NULL;
+}
+
+HLSLStruct* GLSLGenerator::FindStruct(HLSLRoot* root, const char* name)
+{
+    HLSLStatement* statement = root->statement;
+    while (statement != NULL)
+    {
+        if (statement->nodeType == HLSLNodeType_Struct)
+        {
+            HLSLStruct* structDeclaration = static_cast<HLSLStruct*>(statement);
+            if (String_Equal(structDeclaration->name, name))
+            {
+                return structDeclaration;
+            }
+        }
+        statement = statement->nextStatement;
+    }
+    return NULL;
+}
+
+
+const char* GLSLGenerator::GetAttribQualifier(AttributeModifier modifier)
+{
+    if (m_versionLegacy)
+    {
+        if (m_target == Target_VertexShader)
+            return (modifier == AttributeModifier_In) ? "attribute" : "varying";
+        else
+            return (modifier == AttributeModifier_In) ? "varying" : "out";
+    }
+    else
+    {
+        return (modifier == AttributeModifier_In) ? "in" : "out";
+    }
+}
+
+void GLSLGenerator::OutputAttribute(const HLSLType& type, const char* semantic, AttributeModifier modifier)
+{
+    const char* qualifier = GetAttribQualifier(modifier);
+    const char* prefix = (modifier == AttributeModifier_In) ? m_inAttribPrefix : m_outAttribPrefix;
+
+    HLSLRoot* root = m_tree->GetRoot();
+    if (type.baseType == HLSLBaseType_UserDefined)
+    {
+        // If the argument is a struct with semantics specified, we need to
+        // grab them.
+        HLSLStruct* structDeclaration = FindStruct(root, type.typeName);
+        ASSERT(structDeclaration != NULL);
+        HLSLStructField* field = structDeclaration->field;
+        while (field != NULL)
+        {
+            if (field->semantic != NULL && GetBuiltInSemantic(field->semantic, modifier) == NULL)
+            {
+                m_writer.Write( "%s ", qualifier );
+				char attribName[ 64 ];
+				String_Printf( attribName, 64, "%s%s", prefix, field->semantic );
+				OutputDeclaration( field->type, attribName );
+				m_writer.EndLine(";");
+            }
+            field = field->nextField;
+        }
+    }
+    else if (semantic != NULL && GetBuiltInSemantic(semantic, modifier) == NULL)
+    {
+		m_writer.Write( "%s ", qualifier );
+		char attribName[ 64 ];
+		String_Printf( attribName, 64, "%s%s", prefix, semantic );
+		OutputDeclaration( type, attribName );
+		m_writer.EndLine(";");
+    }
+}
+
+void GLSLGenerator::OutputAttributes(HLSLFunction* entryFunction)
+{
+    // Write out the input/output attributes to the shader.
+    HLSLArgument* argument = entryFunction->argument;
+    while (argument != NULL)
+    {
+        if (argument->modifier == HLSLArgumentModifier_None || argument->modifier == HLSLArgumentModifier_In)
+            OutputAttribute(argument->type, argument->semantic, AttributeModifier_In);
+        if (argument->modifier == HLSLArgumentModifier_Out)
+            OutputAttribute(argument->type, argument->semantic, AttributeModifier_Out);
+
+        argument = argument->nextArgument;
+    }
+
+    // Write out the output attributes from the shader.
+    OutputAttribute(entryFunction->returnType, entryFunction->semantic, AttributeModifier_Out);
+}
+
+void GLSLGenerator::OutputSetOutAttribute(const char* semantic, const char* resultName)
+{
+    int outputIndex = -1;
+    const char* builtInSemantic = GetBuiltInSemantic(semantic, AttributeModifier_Out, &outputIndex);
+    if (builtInSemantic != NULL)
+    {
+        if (String_Equal(builtInSemantic, "gl_Position"))
+        {
+            if (m_options.flags & Flag_FlipPositionOutput)
+            {
+                // Mirror the y-coordinate when we're outputing from
+                // the vertex shader so that we match the D3D texture
+                // coordinate origin convention in render-to-texture
+                // operations.
+                // We also need to convert the normalized device
+                // coordinates from the D3D convention of 0 to 1 to the
+                // OpenGL convention of -1 to 1.
+                m_writer.WriteLine(1, "vec4 temp = %s;", resultName);
+                m_writer.WriteLine(1, "%s = temp * vec4(1,-1,2,1) - vec4(0,0,temp.w,0);", builtInSemantic);
+            }
+            else
+            {
+                m_writer.WriteLine(1, "%s = %s;", builtInSemantic, resultName);
+            }
+
+            m_outputPosition = true;
+        }
+        else if (String_Equal(builtInSemantic, "gl_FragDepth"))
+        {
+            // If the value goes outside of the 0 to 1 range, the
+            // fragment will be rejected unlike in D3D, so clamp it.
+            m_writer.WriteLine(1, "%s = clamp(float(%s), 0.0, 1.0);", builtInSemantic, resultName);
+        }
+        else if (outputIndex >= 0)
+        {
+            m_writer.WriteLine(1, "%s[%d] = %s;", builtInSemantic, outputIndex, resultName);
+        }
+        else
+        {
+            m_writer.WriteLine(1, "%s = %s;", builtInSemantic, resultName);
+        }
+    }
+    else if (m_target == Target_FragmentShader)
+    {
+        Error("Output attribute %s does not map to any built-ins", semantic);
+    }
+    else
+    {
+        m_writer.WriteLine(1, "%s%s = %s;", m_outAttribPrefix, semantic, resultName);
+    }
+}
+
+void GLSLGenerator::OutputEntryCaller(HLSLFunction* entryFunction)
+{
+    HLSLRoot* root = m_tree->GetRoot();
+
+    m_writer.WriteLine(0, "void main() {");
+
+    // Create local variables for each of the parameters we'll need to pass
+    // into the entry point function.
+    HLSLArgument* argument = entryFunction->argument;
+    while (argument != NULL)
+    {
+        m_writer.BeginLine(1);
+        OutputDeclaration(argument->type, argument->name);
+        m_writer.EndLine(";");
+
+        if (argument->modifier != HLSLArgumentModifier_Out)
+        {
+            // Set the value for the local variable.
+            if (argument->type.baseType == HLSLBaseType_UserDefined)
+            {
+                HLSLStruct* structDeclaration = FindStruct(root, argument->type.typeName);
+                ASSERT(structDeclaration != NULL);
+                HLSLStructField* field = structDeclaration->field;
+                while (field != NULL)
+                {
+                    if (field->semantic != NULL)
+                    {
+                        const char* builtInSemantic = GetBuiltInSemantic(field->semantic, AttributeModifier_In);
+                        if (builtInSemantic)
+                        {
+                            m_writer.WriteLine(1, "%s.%s = %s;", GetSafeIdentifierName(argument->name), GetSafeIdentifierName(field->name), builtInSemantic);
+                        }
+                        else
+                        {
+                            m_writer.WriteLine(1, "%s.%s = %s%s;", GetSafeIdentifierName(argument->name), GetSafeIdentifierName(field->name), m_inAttribPrefix, field->semantic);
+                        }
+                    }
+                    field = field->nextField;
+                }
+            }
+            else if (argument->semantic != NULL)
+            {
+                const char* builtInSemantic = GetBuiltInSemantic(argument->semantic, AttributeModifier_In);
+                if (builtInSemantic)
+                {
+                    m_writer.WriteLine(1, "%s = %s;", GetSafeIdentifierName(argument->name), builtInSemantic);
+                }
+                else
+                {
+                    m_writer.WriteLine(1, "%s = %s%s;", GetSafeIdentifierName(argument->name), m_inAttribPrefix, argument->semantic);
+                }
+            }
+        }
+
+        argument = argument->nextArgument;
+    }
+
+    const char* resultName = "result";
+
+    // Call the original entry function.
+    m_writer.BeginLine(1);
+    if (entryFunction->returnType.baseType != HLSLBaseType_Void)
+        m_writer.Write("%s %s = ", GetTypeName(entryFunction->returnType), resultName);
+    m_writer.Write("%s(", m_entryName);
+
+    int numArgs = 0;
+    argument = entryFunction->argument;
+    while (argument != NULL)
+    {
+        if (numArgs > 0)
+        {
+            m_writer.Write(", ");
+        }
+
+        m_writer.Write("%s", GetSafeIdentifierName(argument->name));
+
+        argument = argument->nextArgument;
+        ++numArgs;
+    }
+    m_writer.EndLine(");");
+
+    // Copy values from the result into the out attributes as necessary.
+    argument = entryFunction->argument;
+    while (argument != NULL)
+    {
+        if (argument->modifier == HLSLArgumentModifier_Out && argument->semantic)
+            OutputSetOutAttribute(argument->semantic, GetSafeIdentifierName(argument->name));
+
+        argument = argument->nextArgument;
+    }
+
+    if (entryFunction->returnType.baseType == HLSLBaseType_UserDefined)
+    {
+        HLSLStruct* structDeclaration = FindStruct(root, entryFunction->returnType.typeName);
+        ASSERT(structDeclaration != NULL);
+        HLSLStructField* field = structDeclaration->field;
+        while (field != NULL)
+        {
+            char fieldResultName[1024];
+            String_Printf( fieldResultName, sizeof(fieldResultName), "%s.%s", resultName, field->name );
+            OutputSetOutAttribute( field->semantic, fieldResultName );
+            field = field->nextField;
+        }
+    }
+    else if (entryFunction->semantic != NULL)
+    {
+        OutputSetOutAttribute(entryFunction->semantic, resultName);
+    }
+
+    m_writer.WriteLine(0, "}");
+}
+
+void GLSLGenerator::OutputDeclaration(HLSLDeclaration* declaration)
+{
+	OutputDeclarationType( declaration->type );
+
+	HLSLDeclaration* lastDecl = nullptr;
+	while( declaration )
+	{
+		if( lastDecl )
+			m_writer.Write( ", " );
+
+		OutputDeclarationBody( declaration->type, GetSafeIdentifierName( declaration->name ) );
+
+		if( declaration->assignment != NULL )
+		{
+			m_writer.Write( " = " );
+			if( declaration->type.array )
+			{
+				m_writer.Write( "%s[]( ", GetTypeName( declaration->type ) );
+				OutputExpressionList( declaration->assignment );
+				m_writer.Write( " )" );
+			}
+			else
+			{
+				OutputExpression( declaration->assignment, &declaration->type );
+			}
+		}
+
+		lastDecl = declaration;
+		declaration = declaration->nextDeclaration;
+	}
+}
+
+void GLSLGenerator::OutputDeclaration(const HLSLType& type, const char* name)
+{
+	OutputDeclarationType( type );
+	OutputDeclarationBody( type, name );
+}
+
+void GLSLGenerator::OutputDeclarationType( const HLSLType& type )
+{
+	if ((type.flags & HLSLTypeFlag_Const) && (type.flags & HLSLTypeFlag_Static))
+	{
+		m_writer.Write("const ");
+	}
+
+	m_writer.Write( "%s ", GetTypeName( type ) );
+}
+
+void GLSLGenerator::OutputDeclarationBody( const HLSLType& type, const char* name )
+{
+	if( !type.array )
+	{
+		m_writer.Write( "%s", GetSafeIdentifierName( name ) );
+	}
+	else
+	{
+		m_writer.Write( "%s[", GetSafeIdentifierName( name ) );
+		if( type.arraySize != NULL )
+		{
+			OutputExpression( type.arraySize );
+		}
+		m_writer.Write( "]" );
+	}
+}
+
+void GLSLGenerator::OutputCast(const HLSLType& type)
+{
+    if (m_version == Version_110 && type.baseType == HLSLBaseType_Float3x3)
+        m_writer.Write("%s", m_matrixCtorFunction);
+    else
+        OutputDeclaration(type, "");
+}
+
+void GLSLGenerator::Error(const char* format, ...)
+{
+    // It's not always convenient to stop executing when an error occurs,
+    // so just track once we've hit an error and stop reporting them until
+    // we successfully bail out of execution.
+    if (m_error)
+    {
+        return;
+    }
+    m_error = true;
+
+    va_list arg;
+    va_start(arg, format);
+    Log_ErrorArgList(format, arg);
+    va_end(arg);
+} 
+
+const char* GLSLGenerator::GetSafeIdentifierName(const char* name) const
+{
+    for (int i = 0; i < s_numReservedWords; ++i)
+    {
+        if (String_Equal(s_reservedWord[i], name))
+        {
+            return m_reservedWord[i];
+        }
+    }
+    return name;
+}
+
+bool GLSLGenerator::ChooseUniqueName(const char* base, char* dst, int dstLength) const
+{
+    for (int i = 0; i < 1024; ++i)
+    {
+        String_Printf(dst, dstLength, "%s%d", base, i);
+        if (!m_tree->GetContainsString(dst))
+        {
+            return true;
+        }
+    }
+    return false;
+}
+
+const char* GLSLGenerator::GetBuiltInSemantic(const char* semantic, AttributeModifier modifier, int* outputIndex)
+{
+    if (outputIndex)
+        *outputIndex = -1;
+
+    if (m_target == Target_VertexShader && modifier == AttributeModifier_Out && String_Equal(semantic, "POSITION"))
+        return "gl_Position";
+
+    if (m_target == Target_VertexShader && modifier == AttributeModifier_Out && String_Equal(semantic, "SV_Position"))
+        return "gl_Position";
+
+    if (m_target == Target_VertexShader && modifier == AttributeModifier_Out && String_Equal(semantic, "PSIZE"))
+        return "gl_PointSize";
+
+    if (m_target == Target_VertexShader && modifier == AttributeModifier_In && String_Equal(semantic, "SV_InstanceID"))
+        return "gl_InstanceID";
+
+    if (m_target == Target_FragmentShader && modifier == AttributeModifier_Out && String_Equal(semantic, "SV_Depth"))
+        return "gl_FragDepth";
+
+    if (m_target == Target_FragmentShader && modifier == AttributeModifier_In && String_Equal(semantic, "SV_Position"))
+        return "gl_FragCoord";
+
+    if (m_target == Target_FragmentShader && modifier == AttributeModifier_Out)
+    {
+        int index = -1;
+
+        if (strncmp(semantic, "COLOR", 5) == 0)
+            index = atoi(semantic + 5);
+        else if (strncmp(semantic, "SV_Target", 9) == 0)
+            index = atoi(semantic + 9);
+
+        if (index >= 0)
+        {
+            if (m_outputTargets <= index)
+                m_outputTargets = index + 1;
+
+            if (outputIndex)
+                *outputIndex = index;
+
+            return m_versionLegacy ? "gl_FragData" : "rast_FragData";
+        }
+    }
+
+    return NULL;
+}
+
+}
diff --git a/hlslparser/src/GLSLGenerator.h b/hlslparser/src/GLSLGenerator.h
new file mode 100644
index 00000000..ecd727c4
--- /dev/null
+++ b/hlslparser/src/GLSLGenerator.h
@@ -0,0 +1,164 @@
+//=============================================================================
+//
+// Render/GLSLGenerator.h
+//
+// Created by Max McGuire (max@unknownworlds.com)
+// Copyright (c) 2013, Unknown Worlds Entertainment, Inc.
+//
+//=============================================================================
+
+#ifndef GLSL_GENERATOR_H
+#define GLSL_GENERATOR_H
+
+#include "CodeWriter.h"
+#include "HLSLTree.h"
+
+namespace M4
+{
+
+class GLSLGenerator
+{
+
+public:
+    enum Target
+    {
+        Target_VertexShader,
+        Target_FragmentShader,
+    };
+
+    enum Version
+    {
+        Version_110, // OpenGL 2.0
+        Version_140, // OpenGL 3.1
+        Version_150, // OpenGL 3.2
+        Version_100_ES, // OpenGL ES 2.0
+        Version_300_ES, // OpenGL ES 3.0
+    };
+
+    enum Flags
+    {
+        Flag_FlipPositionOutput = 1 << 0,
+        Flag_EmulateConstantBuffer = 1 << 1,
+        Flag_PackMatrixRowMajor = 1 << 2,
+        Flag_LowerMatrixMultiplication = 1 << 3,
+    };
+
+    struct Options
+    {
+        unsigned int flags;
+        const char* constantBufferPrefix;
+
+        Options()
+        {
+            flags = 0;
+            constantBufferPrefix = "";
+        }
+    };
+
+    GLSLGenerator();
+    
+    bool Generate(HLSLTree* tree, Target target, Version versiom, const char* entryName, const Options& options = Options());
+    const char* GetResult() const;
+
+private:
+
+    enum AttributeModifier
+    {
+        AttributeModifier_In,
+        AttributeModifier_Out,
+    };
+
+    void OutputExpressionList(HLSLExpression* expression, HLSLArgument* argument = NULL);
+    void OutputExpression(HLSLExpression* expression, const HLSLType* dstType = NULL);
+    void OutputIdentifier(const char* name);
+    void OutputArguments(HLSLArgument* argument);
+    
+    /**
+     * If the statements are part of a function, then returnType can be used to specify the type
+     * that a return statement is expected to produce so that correct casts will be generated.
+     */
+    void OutputStatements(int indent, HLSLStatement* statement, const HLSLType* returnType = NULL);
+
+    void OutputAttribute(const HLSLType& type, const char* semantic, AttributeModifier modifier);
+    void OutputAttributes(HLSLFunction* entryFunction);
+    void OutputEntryCaller(HLSLFunction* entryFunction);
+    void OutputDeclaration(HLSLDeclaration* declaration);
+	void OutputDeclarationType( const HLSLType& type );
+	void OutputDeclarationBody( const HLSLType& type, const char* name );
+    void OutputDeclaration(const HLSLType& type, const char* name);
+    void OutputCast(const HLSLType& type);
+
+    void OutputSetOutAttribute(const char* semantic, const char* resultName);
+
+    void LayoutBuffer(HLSLBuffer* buffer, unsigned int& offset);
+    void LayoutBuffer(const HLSLType& type, unsigned int& offset);
+    void LayoutBufferElement(const HLSLType& type, unsigned int& offset);
+    void LayoutBufferAlign(const HLSLType& type, unsigned int& offset);
+
+    HLSLBuffer* GetBufferAccessExpression(HLSLExpression* expression);
+    void OutputBufferAccessExpression(HLSLBuffer* buffer, HLSLExpression* expression, const HLSLType& type, unsigned int postOffset);
+    unsigned int OutputBufferAccessIndex(HLSLExpression* expression, unsigned int postOffset);
+
+    void OutputBuffer(int indent, HLSLBuffer* buffer);
+
+    HLSLFunction* FindFunction(HLSLRoot* root, const char* name);
+    HLSLStruct* FindStruct(HLSLRoot* root, const char* name);
+
+    void Error(const char* format, ...);
+
+    /** GLSL contains some reserved words that don't exist in HLSL. This function will
+     * sanitize those names. */
+    const char* GetSafeIdentifierName(const char* name) const;
+
+    /** Generates a name of the format "base+n" where n is an integer such that the name
+     * isn't used in the syntax tree. */
+    bool ChooseUniqueName(const char* base, char* dst, int dstLength) const;
+
+    const char* GetBuiltInSemantic(const char* semantic, AttributeModifier modifier, int* outputIndex = 0);
+    const char* GetAttribQualifier(AttributeModifier modifier);
+
+private:
+
+    static const int    s_numReservedWords = 7;
+    static const char*  s_reservedWord[s_numReservedWords];
+
+    CodeWriter          m_writer;
+
+    HLSLTree*           m_tree;
+    const char*         m_entryName;
+    Target              m_target;
+    Version             m_version;
+    bool                m_versionLegacy;
+    Options             m_options;
+
+    bool                m_outputPosition;
+    int                 m_outputTargets;
+
+    const char*         m_outAttribPrefix;
+    const char*         m_inAttribPrefix;
+
+    char                m_matrixRowFunction[64];
+    char                m_matrixCtorFunction[64];
+    char                m_matrixMulFunction[64];
+    char                m_clipFunction[64];
+    char                m_tex2DlodFunction[64];
+    char                m_tex2DbiasFunction[64];
+    char                m_tex2DgradFunction[64];
+    char                m_tex3DlodFunction[64];
+    char                m_texCUBEbiasFunction[64];
+	char                m_texCUBElodFunction[ 64 ];
+    char                m_scalarSwizzle2Function[64];
+    char                m_scalarSwizzle3Function[64];
+    char                m_scalarSwizzle4Function[64];
+    char                m_sinCosFunction[64];
+	char                m_bvecTernary[ 64 ];
+
+    bool                m_error;
+
+    char                m_reservedWord[s_numReservedWords][64];
+
+};
+
+}
+
+#endif
\ No newline at end of file
diff --git a/hlslparser/src/HLSLGenerator.cpp b/hlslparser/src/HLSLGenerator.cpp
new file mode 100644
index 00000000..0182da5a
--- /dev/null
+++ b/hlslparser/src/HLSLGenerator.cpp
@@ -0,0 +1,1328 @@
+//=============================================================================
+//
+// Render/HLSLGenerator.cpp
+//
+// Created by Max McGuire (max@unknownworlds.com)
+// Copyright (c) 2013, Unknown Worlds Entertainment, Inc.
+//
+//=============================================================================
+
+//#include "Engine/String.h"
+//#include "Engine/Log.h"
+#include "Engine.h"
+
+#include "HLSLGenerator.h"
+#include "HLSLParser.h"
+#include "HLSLTree.h"
+
+namespace M4
+{
+
+static const char* GetTypeName(const HLSLType& type)
+{
+    switch (type.baseType)
+    {
+    case HLSLBaseType_Void:         return "void";
+    case HLSLBaseType_Float:        return "float";
+    case HLSLBaseType_Float2:       return "float2";
+    case HLSLBaseType_Float3:       return "float3";
+    case HLSLBaseType_Float4:       return "float4";
+	case HLSLBaseType_Float2x2:     return "float2x2";
+    case HLSLBaseType_Float3x3:     return "float3x3";
+    case HLSLBaseType_Float4x4:     return "float4x4";
+    case HLSLBaseType_Float4x3:     return "float4x3";
+    case HLSLBaseType_Float4x2:     return "float4x2";
+            
+    // Half support, these were all returning float
+    case HLSLBaseType_Half:        return "half";
+    case HLSLBaseType_Half2:       return "half2";
+    case HLSLBaseType_Half3:       return "half3";
+    case HLSLBaseType_Half4:       return "half4";
+    case HLSLBaseType_Half2x2:     return "halfx2";
+    case HLSLBaseType_Half3x3:     return "half3x3";
+    case HLSLBaseType_Half4x4:     return "half4x4";
+    case HLSLBaseType_Half4x3:     return "half4x3";
+    case HLSLBaseType_Half4x2:     return "half4x2";
+            
+    case HLSLBaseType_Bool:         return "bool";
+	case HLSLBaseType_Bool2:        return "bool2";
+	case HLSLBaseType_Bool3:        return "bool3";
+	case HLSLBaseType_Bool4:        return "bool4";
+    case HLSLBaseType_Int:          return "int";
+    case HLSLBaseType_Int2:         return "int2";
+    case HLSLBaseType_Int3:         return "int3";
+    case HLSLBaseType_Int4:         return "int4";
+    case HLSLBaseType_Uint:         return "uint";
+    case HLSLBaseType_Uint2:        return "uint2";
+    case HLSLBaseType_Uint3:        return "uint3";
+    case HLSLBaseType_Uint4:        return "uint4";
+    case HLSLBaseType_Texture:      return "texture";
+    case HLSLBaseType_Sampler:      return "sampler";
+    case HLSLBaseType_Sampler2D:    return "sampler2D";
+    case HLSLBaseType_Sampler3D:    return "sampler3D";
+    case HLSLBaseType_SamplerCube:  return "samplerCUBE";
+    case HLSLBaseType_Sampler2DShadow:  return "sampler2DShadow";
+    case HLSLBaseType_Sampler2DMS:  return "sampler2DMS";
+    case HLSLBaseType_Sampler2DArray:    return "sampler2DArray";
+    case HLSLBaseType_UserDefined:  return type.typeName;
+    default: return "<unknown type>";
+    }
+}
+
+// TODO: copied from MSLGenerator
+// @@ We could be a lot smarter removing parenthesis based on the operator precedence of the parent expression.
+static bool NeedsParenthesis(HLSLExpression* expression, HLSLExpression* parentExpression) {
+
+    // For now we just omit the parenthesis if there's no parent expression.
+    if (parentExpression == NULL)
+    {
+        return false;
+    }
+
+    // One more special case that's pretty common.
+    if (parentExpression->nodeType == HLSLNodeType_MemberAccess)
+    {
+        if (expression->nodeType == HLSLNodeType_IdentifierExpression ||
+            expression->nodeType == HLSLNodeType_ArrayAccess ||
+            expression->nodeType == HLSLNodeType_MemberAccess)
+        {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+/* unused
+static int GetFunctionArguments(HLSLFunctionCall* functionCall, HLSLExpression* expression[], int maxArguments)
+{
+    HLSLExpression* argument = functionCall->argument;
+    int numArguments = 0;
+    while (argument != NULL)
+    {
+        if (numArguments < maxArguments)
+        {
+            expression[numArguments] = argument;
+        }
+        argument = argument->nextExpression;
+        ++numArguments;
+    }
+    return numArguments;
+}
+*/
+
+HLSLGenerator::HLSLGenerator()
+{
+    m_tree                          = NULL;
+    m_entryName                     = NULL;
+    //m_legacy                        = false;
+    m_target                        = Target_VertexShader;
+    m_isInsideBuffer                = false;
+    m_error                         = false;
+    
+    /*
+    m_textureSampler2DStruct[0]     = 0;
+    m_textureSampler2DCtor[0]       = 0;
+    m_textureSampler3DStruct[0]     = 0;
+    m_textureSampler3DCtor[0]       = 0;
+    m_textureSamplerCubeStruct[0]   = 0;
+    m_textureSamplerCubeCtor[0]     = 0;
+    m_tex2DFunction[0]              = 0;
+    m_tex2DProjFunction[0]          = 0;
+    m_tex2DLodFunction[0]           = 0;
+    m_tex2DBiasFunction[0]          = 0;
+    m_tex2DGradFunction[0]          = 0;
+    m_tex2DGatherFunction[0]        = 0;
+    m_tex2DSizeFunction[0]          = 0;
+    m_tex2DFetchFunction[0]         = 0;
+    m_tex2DCmpFunction[0]           = 0;
+    m_tex2DMSFetchFunction[0]       = 0;
+    m_tex3DFunction[0]              = 0;
+    m_tex3DLodFunction[0]           = 0;
+    m_tex3DBiasFunction[0]          = 0;
+    m_texCubeFunction[0]            = 0;
+    m_texCubeLodFunction[0]         = 0;
+    m_texCubeBiasFunction[0]        = 0;
+    */
+}
+
+
+// @@ We need a better way of doing semantic replacement:
+// - Look at the function being generated.
+// - Return semantic, semantics associated to fields of the return structure, or output arguments, or fields of structures associated to output arguments -> output semantic replacement.
+// - Semantics associated input arguments or fields of the input arguments -> input semantic replacement.
+static const char * TranslateSemantic(const char* semantic, bool output, HLSLGenerator::Target target)
+{
+    if (target == HLSLGenerator::Target_VertexShader)
+    {
+        if (output) 
+        {
+            if (String_Equal("POSITION", semantic))     return "SV_Position";
+        }
+        else {
+            if (String_Equal("INSTANCE_ID", semantic))  return "SV_InstanceID";
+        }
+    }
+    else if (target == HLSLGenerator::Target_PixelShader)
+    {
+        if (output)
+        {
+            if (String_Equal("DEPTH", semantic))      return "SV_Depth";
+            if (String_Equal("COLOR", semantic))      return "SV_Target";
+            if (String_Equal("COLOR0", semantic))     return "SV_Target0";
+            if (String_Equal("COLOR0_1", semantic))   return "SV_Target1";
+            if (String_Equal("COLOR1", semantic))     return "SV_Target1";
+            if (String_Equal("COLOR2", semantic))     return "SV_Target2";
+            if (String_Equal("COLOR3", semantic))     return "SV_Target3";
+        }
+        else
+        {
+            if (String_Equal("VPOS", semantic))       return "SV_Position";
+            if (String_Equal("VFACE", semantic))      return "SV_IsFrontFace";    // bool   @@ Should we do type replacement too?
+        }
+    }
+    return NULL;
+}
+
+void HLSLGenerator::Error(const char* format, ...)
+{
+    // It's not always convenient to stop executing when an error occurs,
+    // so just track once we've hit an error and stop reporting them until
+    // we successfully bail out of execution.
+    if (m_error)
+    {
+        return;
+    }
+    m_error = true;
+
+    va_list arg;
+    va_start(arg, format);
+    Log_ErrorArgList(format, arg);
+    va_end(arg);
+}
+
+bool HLSLGenerator::Generate(HLSLTree* tree, Target target, const char* entryName)
+{
+    m_tree      = tree;
+    m_entryName = entryName;
+    m_target    = target;
+    //m_legacy    = legacy;
+    m_isInsideBuffer = false;
+
+    m_writer.Reset();
+
+    // Find entry point function
+    HLSLFunction* entryFunction = tree->FindFunction(entryName);
+    if (entryFunction == NULL)
+    {
+        Error("Entry point '%s' doesn't exist\n", entryName);
+        return false;
+    }
+
+    // PruneTree resets hidden flags to true, then marks visible elements
+    // based on whether entry point visits them.
+    PruneTree(tree, entryFunction->name); // Note: takes second entry
+    
+    // This sorts tree by type, but keeps ordering
+    SortTree(tree);
+   
+    // This strips any unused inputs to the entry point function
+    HideUnusedArguments(entryFunction);
+    
+    // Note sure if/where to add these calls.  Just wanted to point
+    // out that nothing is calling them, but could be useful.
+    //EmulateAlphaTest(tree, entryName, 0.5f);
+    FlattenExpressions(tree);
+    
+    // TODO: see if can unify to Shader.h, and have it sub-include
+    m_writer.WriteLine(0, "#include \"ShaderHLSL.h\"");
+    
+    // @@ Should we generate an entirely new copy of the tree so that we can modify it in place?
+    //if (!legacy)
+    {
+        HLSLFunction * function = tree->FindFunction(entryName);
+
+        // Handle return value semantics
+        if (function->semantic != NULL) {
+            function->sv_semantic = TranslateSemantic(function->semantic, /*output=*/true, target);
+        }
+        if (function->returnType.baseType == HLSLBaseType_UserDefined) {
+            HLSLStruct * s = tree->FindGlobalStruct(function->returnType.typeName);
+
+			HLSLStructField * sv_fields = NULL;
+
+			HLSLStructField * lastField = NULL;
+            HLSLStructField * field = s->field;
+            while (field) {
+				HLSLStructField * nextField = field->nextField;
+
+                if (field->semantic) {
+					field->hidden = false;
+                    field->sv_semantic = TranslateSemantic(field->semantic, /*output=*/true, target);
+
+					// Fields with SV semantics are stored at the end to avoid linkage problems.
+					if (field->sv_semantic != NULL) {
+						// Unlink from last.
+						if (lastField != NULL) lastField->nextField = nextField;
+						else s->field = nextField;
+
+						// Add to sv_fields.
+						field->nextField = sv_fields;
+						sv_fields = field;
+					}
+                }
+
+				if (field != sv_fields) lastField = field;
+                field = nextField;
+            }
+
+			// Append SV fields at the end.
+			if (sv_fields != NULL) {
+				if (lastField == NULL) {
+					s->field = sv_fields;
+				}
+				else {
+					ASSERT(lastField->nextField == NULL);
+					lastField->nextField = sv_fields;
+				}
+			}
+        }
+
+        // Handle argument semantics.
+        // @@ It would be nice to flag arguments that are used by the program and skip or hide the unused ones.
+        HLSLArgument * argument = function->argument;
+        while (argument) {
+            bool output = argument->modifier == HLSLArgumentModifier_Out;
+            if (argument->semantic) {
+                argument->sv_semantic = TranslateSemantic(argument->semantic, output, target); 
+            }
+
+            if (argument->type.baseType == HLSLBaseType_UserDefined) {
+                HLSLStruct * s = tree->FindGlobalStruct(argument->type.typeName);
+
+                HLSLStructField * field = s->field;
+                while (field) {
+                    if (field->semantic) {
+						field->hidden = false;
+
+						if (target == Target_PixelShader && !output && String_EqualNoCase(field->semantic, "POSITION")) {
+                            // this is firing on NULL
+							//ASSERT(String_EqualNoCase(field->sv_semantic, "SV_Position"));
+							field->hidden = true;
+						}
+
+                        field->sv_semantic = TranslateSemantic(field->semantic, output, target);
+                    }
+
+                    field = field->nextField;
+                }
+            }
+
+            argument = argument->nextArgument;
+        }
+    }
+
+#if 0 // Removing all this for include
+/*
+    // TODO: don't do this, can just write these once
+    // as helpers.  And then reference them from the header.
+    // See MSLGenerator, for how it wraps only the user
+    // code in a struct to avoid renaming.
+    
+    ChooseUniqueName("TextureSampler2D",            m_textureSampler2DStruct,   sizeof(m_textureSampler2DStruct));
+    ChooseUniqueName("CreateTextureSampler2D",      m_textureSampler2DCtor,     sizeof(m_textureSampler2DCtor));
+    ChooseUniqueName("TextureSampler2DShadow",      m_textureSampler2DShadowStruct, sizeof(m_textureSampler2DShadowStruct));
+    ChooseUniqueName("CreateTextureSampler2DShadow",m_textureSampler2DShadowCtor,   sizeof(m_textureSampler2DShadowCtor));
+    ChooseUniqueName("TextureSampler3D",            m_textureSampler3DStruct,   sizeof(m_textureSampler3DStruct));
+    ChooseUniqueName("CreateTextureSampler3D",      m_textureSampler3DCtor,     sizeof(m_textureSampler3DCtor));
+    ChooseUniqueName("TextureSamplerCube",          m_textureSamplerCubeStruct, sizeof(m_textureSamplerCubeStruct));
+    ChooseUniqueName("CreateTextureSamplerCube",    m_textureSamplerCubeCtor,   sizeof(m_textureSamplerCubeCtor));
+    ChooseUniqueName("tex2D",                       m_tex2DFunction,            sizeof(m_tex2DFunction));
+    ChooseUniqueName("tex2Dproj",                   m_tex2DProjFunction,        sizeof(m_tex2DProjFunction));
+    ChooseUniqueName("tex2Dlod",                    m_tex2DLodFunction,         sizeof(m_tex2DLodFunction));
+    ChooseUniqueName("tex2Dbias",                   m_tex2DBiasFunction,        sizeof(m_tex2DBiasFunction));
+    ChooseUniqueName("tex2Dgrad",                   m_tex2DGradFunction,        sizeof(m_tex2DGradFunction));
+    ChooseUniqueName("tex2Dgather",                 m_tex2DGatherFunction,      sizeof(m_tex2DGatherFunction));
+    ChooseUniqueName("tex2Dsize",                   m_tex2DSizeFunction,        sizeof(m_tex2DSizeFunction));
+    ChooseUniqueName("tex2Dfetch",                  m_tex2DFetchFunction,       sizeof(m_tex2DFetchFunction));
+    ChooseUniqueName("tex2Dcmp",                    m_tex2DCmpFunction,         sizeof(m_tex2DCmpFunction));
+    ChooseUniqueName("tex2DMSfetch",                m_tex2DMSFetchFunction,     sizeof(m_tex2DMSFetchFunction));
+    ChooseUniqueName("tex2DMSsize",                 m_tex2DMSSizeFunction,      sizeof(m_tex2DMSSizeFunction));
+    ChooseUniqueName("tex3D",                       m_tex3DFunction,            sizeof(m_tex3DFunction));
+    ChooseUniqueName("tex3Dlod",                    m_tex3DLodFunction,         sizeof(m_tex3DLodFunction));
+    ChooseUniqueName("tex3Dbias",                   m_tex3DBiasFunction,        sizeof(m_tex3DBiasFunction));
+    ChooseUniqueName("tex3Dsize",                   m_tex3DSizeFunction,        sizeof(m_tex3DSizeFunction));
+    ChooseUniqueName("texCUBE",                     m_texCubeFunction,          sizeof(m_texCubeFunction));
+    ChooseUniqueName("texCUBElod",                  m_texCubeLodFunction,       sizeof(m_texCubeLodFunction));
+    ChooseUniqueName("texCUBEbias",                 m_texCubeBiasFunction,      sizeof(m_texCubeBiasFunction));
+    ChooseUniqueName("texCUBEsize",                 m_texCubeSizeFunction,      sizeof(m_texCubeSizeFunction));
+
+    //if (!m_legacy)
+    {
+        // @@ Only emit code for sampler types that are actually used?
+
+        m_writer.WriteLine(0, "struct %s {", m_textureSampler2DStruct);
+        m_writer.WriteLine(1, "Texture2D    t;");
+        m_writer.WriteLine(1, "SamplerState s;");
+        m_writer.WriteLine(0, "};");
+
+        m_writer.WriteLine(0, "struct %s {", m_textureSampler2DShadowStruct);
+        m_writer.WriteLine(1, "Texture2D                t;");
+        m_writer.WriteLine(1, "SamplerComparisonState   s;");
+        m_writer.WriteLine(0, "};");
+
+        m_writer.WriteLine(0, "struct %s {", m_textureSampler3DStruct);
+        m_writer.WriteLine(1, "Texture3D    t;");
+        m_writer.WriteLine(1, "SamplerState s;");
+        m_writer.WriteLine(0, "};");
+
+        m_writer.WriteLine(0, "struct %s {", m_textureSamplerCubeStruct);
+        m_writer.WriteLine(1, "TextureCube  t;");
+        m_writer.WriteLine(1, "SamplerState s;");
+        m_writer.WriteLine(0, "};");
+
+        m_writer.WriteLine(0, "%s %s(Texture2D t, SamplerState s) {", m_textureSampler2DStruct, m_textureSampler2DCtor);
+        m_writer.WriteLine(1, "%s ts;", m_textureSampler2DStruct);
+        m_writer.WriteLine(1, "ts.t = t; ts.s = s;");
+        m_writer.WriteLine(1, "return ts;");
+        m_writer.WriteLine(0, "}");
+
+        m_writer.WriteLine(0, "%s %s(Texture2D t, SamplerComparisonState s) {", m_textureSampler2DShadowStruct, m_textureSampler2DShadowCtor);
+        m_writer.WriteLine(1, "%s ts;", m_textureSampler2DShadowStruct);
+        m_writer.WriteLine(1, "ts.t = t; ts.s = s;");
+        m_writer.WriteLine(1, "return ts;");
+        m_writer.WriteLine(0, "}");
+
+        m_writer.WriteLine(0, "%s %s(Texture3D t, SamplerState s) {", m_textureSampler3DStruct, m_textureSampler3DCtor);
+        m_writer.WriteLine(1, "%s ts;", m_textureSampler3DStruct);
+        m_writer.WriteLine(1, "ts.t = t; ts.s = s;");
+        m_writer.WriteLine(1, "return ts;");
+        m_writer.WriteLine(0, "}");
+
+        m_writer.WriteLine(0, "%s %s(TextureCube t, SamplerState s) {", m_textureSamplerCubeStruct, m_textureSamplerCubeCtor);
+        m_writer.WriteLine(1, "%s ts;", m_textureSamplerCubeStruct);
+        m_writer.WriteLine(1, "ts.t = t; ts.s = s;");
+        m_writer.WriteLine(1, "return ts;");
+        m_writer.WriteLine(0, "}");
+        
+        if (m_tree->GetContainsString("tex2D")) 
+        {
+            m_writer.WriteLine(0, "float4 %s(%s ts, float2 texCoord) {", m_tex2DFunction, m_textureSampler2DStruct);
+            m_writer.WriteLine(1, "return ts.t.Sample(ts.s, texCoord);");
+            m_writer.WriteLine(0, "}");
+        }
+        if (m_tree->GetContainsString("tex2Dproj"))
+        {
+            m_writer.WriteLine(0, "float4 %s(%s ts, float4 texCoord) {", m_tex2DProjFunction, m_textureSampler2DStruct);
+            m_writer.WriteLine(1, "return ts.t.Sample(ts.s, texCoord.xy / texCoord.w);");
+            m_writer.WriteLine(0, "}");
+        }
+        if (m_tree->GetContainsString("tex2Dlod"))
+        {
+            m_writer.WriteLine(0, "float4 %s(%s ts, float4 texCoord, int2 offset=0) {", m_tex2DLodFunction, m_textureSampler2DStruct);
+            m_writer.WriteLine(1, "return ts.t.SampleLevel(ts.s, texCoord.xy, texCoord.w, offset);");
+            m_writer.WriteLine(0, "}");
+        }
+        if (m_tree->GetContainsString("tex2Dbias"))
+        {
+            m_writer.WriteLine(0, "float4 %s(%s ts, float4 texCoord) {", m_tex2DBiasFunction, m_textureSampler2DStruct);
+            m_writer.WriteLine(1, "return ts.t.SampleBias(ts.s, texCoord.xy, texCoord.w);");
+            m_writer.WriteLine(0, "}");
+        }
+        if (m_tree->GetContainsString("tex2Dgrad"))
+        {
+            m_writer.WriteLine(0, "float4 %s(%s ts, float2 texCoord, float2 ddx, float2 ddy) {", m_tex2DGradFunction, m_textureSampler2DStruct);
+            m_writer.WriteLine(1, "return ts.t.SampleGrad(ts.s, texCoord.xy, ddx, ddy);");
+            m_writer.WriteLine(0, "}");
+        }
+        if (m_tree->GetContainsString("tex2Dgather"))
+        {
+            m_writer.WriteLine(0, "float4 %s(%s ts, float2 texCoord, int component, int2 offset=0) {", m_tex2DGatherFunction, m_textureSampler2DStruct);
+            m_writer.WriteLine(1, "if(component == 0) return ts.t.GatherRed(ts.s, texCoord, offset);");
+            m_writer.WriteLine(1, "if(component == 1) return ts.t.GatherGreen(ts.s, texCoord, offset);");
+            m_writer.WriteLine(1, "if(component == 2) return ts.t.GatherBlue(ts.s, texCoord, offset);");
+            m_writer.WriteLine(1, "return ts.t.GatherAlpha(ts.s, texCoord, offset);");
+            m_writer.WriteLine(0, "}");
+        }
+        if (m_tree->GetContainsString("tex2Dsize"))
+        {
+            m_writer.WriteLine(0, "int2 %s(%s ts) {", m_tex2DSizeFunction, m_textureSampler2DStruct);
+            m_writer.WriteLine(1, "int2 size; ts.t.GetDimensions(size.x, size.y); return size;");
+            m_writer.WriteLine(0, "}");
+        }
+        if (m_tree->GetContainsString("tex2Dfetch"))
+        {
+            m_writer.WriteLine(0, "int2 %s(%s ts, int3 texCoord) {", m_tex2DFetchFunction, m_textureSampler2DStruct);
+            m_writer.WriteLine(1, "return ts.t.Load(texCoord.xyz);");
+            m_writer.WriteLine(0, "}");
+        }
+        if (m_tree->GetContainsString("tex2Dcmp"))
+        {
+            m_writer.WriteLine(0, "float4 %s(%s ts, float4 texCoord) {", m_tex2DCmpFunction, m_textureSampler2DShadowStruct);
+            m_writer.WriteLine(1, "return ts.t.SampleCmpLevelZero(ts.s, texCoord.xy, texCoord.z);");
+            m_writer.WriteLine(0, "}");
+        }
+        if (m_tree->GetContainsString("tex2DMSfetch"))
+        {
+            m_writer.WriteLine(0, "float4 %s(Texture2DMS<float4> t, int2 texCoord, int sample) {", m_tex2DMSFetchFunction);
+            m_writer.WriteLine(1, "return t.Load(texCoord, sample);");
+            m_writer.WriteLine(0, "}");
+        }
+        if (m_tree->GetContainsString("tex2DMSsize"))
+        {
+            m_writer.WriteLine(0, "int3 %s(Texture2DMS<float4> t) {", m_tex2DMSSizeFunction);
+            m_writer.WriteLine(1, "int3 size; t.GetDimensions(size.x, size.y, size.z); return size;");   // @@ Not tested, does this return the number of samples in the third argument?
+            m_writer.WriteLine(0, "}");
+        }
+        if (m_tree->GetContainsString("tex3D"))
+        {
+            m_writer.WriteLine(0, "float4 %s(%s ts, float3 texCoord) {", m_tex3DFunction, m_textureSampler3DStruct);
+            m_writer.WriteLine(1, "return ts.t.Sample(ts.s, texCoord);");
+            m_writer.WriteLine(0, "}");
+        }
+        if (m_tree->GetContainsString("tex3Dlod"))
+        {
+            m_writer.WriteLine(0, "float4 %s(%s ts, float4 texCoord) {", m_tex3DLodFunction, m_textureSampler3DStruct);
+            m_writer.WriteLine(1, "return ts.t.SampleLevel(ts.s, texCoord.xyz, texCoord.w);");
+            m_writer.WriteLine(0, "}");
+        }
+        if (m_tree->GetContainsString("tex3Dbias"))
+        {
+            m_writer.WriteLine(0, "float4 %s(%s ts, float4 texCoord) {", m_tex3DBiasFunction, m_textureSampler3DStruct);
+            m_writer.WriteLine(1, "return ts.t.SampleBias(ts.s, texCoord.xyz, texCoord.w);");
+            m_writer.WriteLine(0, "}");
+        }
+        if (m_tree->GetContainsString("tex3Dsize"))
+        {
+            m_writer.WriteLine(0, "int3 %s(%s ts) {", m_tex3DSizeFunction, m_textureSampler3DStruct);
+            m_writer.WriteLine(1, "int3 size; ts.t.GetDimensions(size.x, size.y, size.z); return size;");
+            m_writer.WriteLine(0, "}");
+        }
+        if (m_tree->GetContainsString("texCUBE"))
+        {
+            m_writer.WriteLine(0, "float4 %s(%s ts, float3 texCoord) {", m_texCubeFunction, m_textureSamplerCubeStruct);
+            m_writer.WriteLine(1, "return ts.t.Sample(ts.s, texCoord);");
+            m_writer.WriteLine(0, "}");
+        }
+        if (m_tree->GetContainsString("texCUBElod"))
+        {
+            m_writer.WriteLine(0, "float4 %s(%s ts, float4 texCoord) {", m_texCubeLodFunction, m_textureSamplerCubeStruct);
+            m_writer.WriteLine(1, "return ts.t.SampleLevel(ts.s, texCoord.xyz, texCoord.w);");
+            m_writer.WriteLine(0, "}");
+        }
+        if (m_tree->GetContainsString("texCUBEbias"))
+        {
+            m_writer.WriteLine(0, "float4 %s(%s ts, float4 texCoord) {", m_texCubeBiasFunction, m_textureSamplerCubeStruct);
+            m_writer.WriteLine(1, "return ts.t.SampleBias(ts.s, texCoord.xyz, texCoord.w);");
+            m_writer.WriteLine(0, "}");
+        }
+        if (m_tree->GetContainsString("texCUBEsize"))
+        {
+            m_writer.WriteLine(0, "int %s(%s ts) {", m_texCubeSizeFunction, m_textureSamplerCubeStruct);
+            m_writer.WriteLine(1, "int size; ts.t.GetDimensions(size); return size;");   // @@ Not tested, does this return a single value?
+            m_writer.WriteLine(0, "}");
+        }
+    }
+*/
+#endif
+    
+    HLSLRoot* root = m_tree->GetRoot();
+    OutputStatements(0, root->statement);
+
+    m_tree = NULL;
+    return true;
+}
+
+const char* HLSLGenerator::GetResult() const
+{
+    return m_writer.GetResult();
+}
+
+void HLSLGenerator::OutputExpressionList(HLSLExpression* expression)
+{
+    int numExpressions = 0;
+    while (expression != NULL)
+    {
+        if (numExpressions > 0)
+        {
+            m_writer.Write(", ");
+        }
+        OutputExpression(expression);
+        expression = expression->nextExpression;
+        ++numExpressions;
+    }
+}
+
+
+
+void HLSLGenerator::OutputExpression(HLSLExpression* expression)
+{
+    if (expression->nodeType == HLSLNodeType_IdentifierExpression)
+    {
+        HLSLIdentifierExpression* identifierExpression = static_cast<HLSLIdentifierExpression*>(expression);
+        const char* name = identifierExpression->name;
+        if (//!m_legacy &&
+            IsSamplerType(identifierExpression->expressionType) && identifierExpression->global)
+        {
+            // @@ Handle generic sampler type.
+
+            // Stupid HLSL doesn't have ctors, so have ctor calls.
+            if (identifierExpression->expressionType.baseType == HLSLBaseType_Sampler2D)
+            {
+                if (identifierExpression->expressionType.samplerType == HLSLBaseType_Half) // TODO: && !m_options.treatHalfAsFloat)
+                {
+                    m_writer.Write("Texture2DHalfSamplerCtor(%s_texture, %s_sampler)", name, name);
+                }
+                else
+                {
+                    m_writer.Write("Texture2DSamplerCtor(%s_texture, %s_sampler)", name, name);
+                }
+                
+                // Remove this, so have half support above
+                //m_writer.Write("%s(%s_texture, %s_sampler)", "Texture2DSamplerCtor", name, name);
+            }
+            else if (identifierExpression->expressionType.baseType == HLSLBaseType_Sampler3D)
+            {
+                m_writer.Write("%s(%s_texture, %s_sampler)", "Texture3DSamplerCtor", name, name);
+            }
+            else if (identifierExpression->expressionType.baseType == HLSLBaseType_SamplerCube)
+            {
+                m_writer.Write("%s(%s_texture, %s_sampler)", "TextureCubeSamplerCtor", name, name);
+            }
+            else if (identifierExpression->expressionType.baseType == HLSLBaseType_Sampler2DShadow)
+            {
+                m_writer.Write("%s(%s_texture, %s_sampler)", "Texture2DShadowSamplerCtor", name, name);
+            }
+            // TODO: add all ctor types
+            else if (identifierExpression->expressionType.baseType == HLSLBaseType_Sampler2DMS)
+            {
+                m_writer.Write("%s", name);
+            }
+        }
+        else
+        {
+            m_writer.Write("%s", name);
+        }
+    }
+    else if (expression->nodeType == HLSLNodeType_CastingExpression)
+    {
+        HLSLCastingExpression* castingExpression = static_cast<HLSLCastingExpression*>(expression);
+        m_writer.Write("(");
+        OutputDeclaration(castingExpression->type, "");
+        m_writer.Write(")(");
+        OutputExpression(castingExpression->expression);
+        m_writer.Write(")");
+    }
+    else if (expression->nodeType == HLSLNodeType_ConstructorExpression)
+    {
+        HLSLConstructorExpression* constructorExpression = static_cast<HLSLConstructorExpression*>(expression);
+        m_writer.Write("%s(", GetTypeName(constructorExpression->type));
+        OutputExpressionList(constructorExpression->argument);
+        m_writer.Write(")");
+    }
+    else if (expression->nodeType == HLSLNodeType_LiteralExpression)
+    {
+        HLSLLiteralExpression* literalExpression = static_cast<HLSLLiteralExpression*>(expression);
+        switch (literalExpression->type)
+        {
+        case HLSLBaseType_Half:
+        case HLSLBaseType_Float:
+            {
+                // Don't use printf directly so that we don't use the system locale.
+                char buffer[64];
+                String_FormatFloat(buffer, sizeof(buffer), literalExpression->fValue);
+                String_StripTrailingFloatZeroes(buffer);
+                m_writer.Write("%s", buffer);
+            }
+            break;        
+        case HLSLBaseType_Int:
+            m_writer.Write("%d", literalExpression->iValue);
+            break;
+        case HLSLBaseType_Bool:
+            m_writer.Write("%s", literalExpression->bValue ? "true" : "false");
+            break;
+        default:
+            ASSERT(0);
+        }
+    }
+    else if (expression->nodeType == HLSLNodeType_UnaryExpression)
+    {
+        HLSLUnaryExpression* unaryExpression = static_cast<HLSLUnaryExpression*>(expression);
+        const char* op = "?";
+        bool pre = true;
+        switch (unaryExpression->unaryOp)
+        {
+        case HLSLUnaryOp_Negative:      op = "-";  break;
+        case HLSLUnaryOp_Positive:      op = "+";  break;
+        case HLSLUnaryOp_Not:           op = "!";  break;
+        case HLSLUnaryOp_PreIncrement:  op = "++"; break;
+        case HLSLUnaryOp_PreDecrement:  op = "--"; break;
+        case HLSLUnaryOp_PostIncrement: op = "++"; pre = false; break;
+        case HLSLUnaryOp_PostDecrement: op = "--"; pre = false; break;
+        case HLSLUnaryOp_BitNot:        op = "~";  break;
+        }
+        
+        // eliminate () if pure characters
+        bool addParenthesis = NeedsParenthesis(unaryExpression->expression, expression);
+        if (addParenthesis) m_writer.Write("(");
+        
+        if (pre)
+        {
+            m_writer.Write("%s", op);
+            OutputExpression(unaryExpression->expression);
+        }
+        else
+        {
+            OutputExpression(unaryExpression->expression);
+            m_writer.Write("%s", op);
+        }
+        if (addParenthesis) m_writer.Write(")");
+    }
+    else if (expression->nodeType == HLSLNodeType_BinaryExpression)
+    {
+        HLSLBinaryExpression* binaryExpression = static_cast<HLSLBinaryExpression*>(expression);
+        
+        // TODO: to fix this need to pass in parentExpression to
+        // the call.  And MSLGenerator passes NULL for most of these.
+        // TODO: eliminate () if pure characters
+        
+        bool addParenthesis = false; // NeedsParenthesis(expression, parentExpression);
+        if (addParenthesis) m_writer.Write("(");
+        
+        OutputExpression(binaryExpression->expression1);
+        const char* op = "?";
+        switch (binaryExpression->binaryOp)
+        {
+        case HLSLBinaryOp_Add:          op = " + "; break;
+        case HLSLBinaryOp_Sub:          op = " - "; break;
+        case HLSLBinaryOp_Mul:          op = " * "; break;
+        case HLSLBinaryOp_Div:          op = " / "; break;
+        case HLSLBinaryOp_Less:         op = " < "; break;
+        case HLSLBinaryOp_Greater:      op = " > "; break;
+        case HLSLBinaryOp_LessEqual:    op = " <= "; break;
+        case HLSLBinaryOp_GreaterEqual: op = " >= "; break;
+        case HLSLBinaryOp_Equal:        op = " == "; break;
+        case HLSLBinaryOp_NotEqual:     op = " != "; break;
+        case HLSLBinaryOp_Assign:       op = " = "; break;
+        case HLSLBinaryOp_AddAssign:    op = " += "; break;
+        case HLSLBinaryOp_SubAssign:    op = " -= "; break;
+        case HLSLBinaryOp_MulAssign:    op = " *= "; break;
+        case HLSLBinaryOp_DivAssign:    op = " /= "; break;
+        case HLSLBinaryOp_And:          op = " && "; break;
+        case HLSLBinaryOp_Or:           op = " || "; break;
+		case HLSLBinaryOp_BitAnd:       op = " & "; break;
+        case HLSLBinaryOp_BitOr:        op = " | "; break;
+        case HLSLBinaryOp_BitXor:       op = " ^ "; break;
+        default:
+            ASSERT(0);
+        }
+        m_writer.Write("%s", op);
+        OutputExpression(binaryExpression->expression2);
+        if (addParenthesis) m_writer.Write(")");
+    }
+    else if (expression->nodeType == HLSLNodeType_ConditionalExpression)
+    {
+        HLSLConditionalExpression* conditionalExpression = static_cast<HLSLConditionalExpression*>(expression);
+        
+        // TODO: eliminate () if pure characters
+        m_writer.Write("((");
+        OutputExpression(conditionalExpression->condition);
+        m_writer.Write(")?(");
+        OutputExpression(conditionalExpression->trueExpression);
+        m_writer.Write("):(");
+        OutputExpression(conditionalExpression->falseExpression);
+        m_writer.Write("))");
+    }
+    else if (expression->nodeType == HLSLNodeType_MemberAccess)
+    {
+        HLSLMemberAccess* memberAccess = static_cast<HLSLMemberAccess*>(expression);
+        
+        bool addParenthesis = NeedsParenthesis(memberAccess->object, expression);
+        
+        // eliminate () if pure characters
+        if ( addParenthesis ) m_writer.Write("(");
+        OutputExpression(memberAccess->object);
+        if ( addParenthesis ) m_writer.Write(")");
+        m_writer.Write(".%s", memberAccess->field);
+    }
+    else if (expression->nodeType == HLSLNodeType_ArrayAccess)
+    {
+        HLSLArrayAccess* arrayAccess = static_cast<HLSLArrayAccess*>(expression);
+        OutputExpression(arrayAccess->array);
+        m_writer.Write("[");
+        OutputExpression(arrayAccess->index);
+        m_writer.Write("]");
+    }
+    else if (expression->nodeType == HLSLNodeType_FunctionCall)
+    {
+        HLSLFunctionCall* functionCall = static_cast<HLSLFunctionCall*>(expression);
+        const char* name = functionCall->function->name;
+#if 0
+/* Don't need thie remapping, it will just call the function as is
+        //if (!m_legacy)
+        {
+            if (String_Equal(name, "tex2D"))
+            {
+                name = m_tex2DFunction;
+            }
+            else if (String_Equal(name, "tex2Dproj"))
+            {
+                name = m_tex2DProjFunction;
+            }
+            else if (String_Equal(name, "tex2Dlod"))
+            {
+                name = m_tex2DLodFunction;
+            }
+            else if (String_Equal(name, "tex2Dbias"))
+            {
+                name = m_tex2DBiasFunction;
+            }
+            else if (String_Equal(name, "tex2Dgrad"))
+            {
+                name = m_tex2DGradFunction;
+            }
+            else if (String_Equal(name, "tex2Dgather"))
+            {
+                name = m_tex2DGatherFunction;
+            }
+            else if (String_Equal(name, "tex2Dsize"))
+            {
+                name = m_tex2DSizeFunction;
+            }
+            else if (String_Equal(name, "tex2Dfetch"))
+            {
+                name = m_tex2DFetchFunction;
+            }
+            else if (String_Equal(name, "tex2Dcmp"))
+            {
+                name = m_tex2DCmpFunction;
+            }
+            else if (String_Equal(name, "tex2DMSfetch"))
+            {
+                name = m_tex2DMSFetchFunction;
+            }
+            else if (String_Equal(name, "tex2DMSsize"))
+            {
+                name = m_tex2DMSSizeFunction;
+            }
+            else if (String_Equal(name, "tex3D"))
+            {
+                name = m_tex3DFunction;
+            }
+            else if (String_Equal(name, "tex3Dlod"))
+            {
+                name = m_tex3DLodFunction;
+            }
+            else if (String_Equal(name, "tex3Dbias"))
+            {
+                name = m_tex3DBiasFunction;
+            }
+            else if (String_Equal(name, "tex3Dsize"))
+            {
+                name = m_tex3DSizeFunction;
+            }
+            else if (String_Equal(name, "texCUBE"))
+            {
+                name = m_texCubeFunction;
+            }
+            else if (String_Equal(name, "texCUBElod"))
+            {
+                name = m_texCubeLodFunction;
+            }
+            else if (String_Equal(name, "texCUBEbias"))
+            {
+                name = m_texCubeBiasFunction;
+            }
+            else if (String_Equal(name, "texCUBEsize"))
+            {
+                name = m_texCubeSizeFunction;
+            }
+        }
+ */
+#endif
+        m_writer.Write("%s(", name);
+        OutputExpressionList(functionCall->argument);
+        m_writer.Write(")");
+    }
+    else
+    {
+        m_writer.Write("<unknown expression>");
+    }
+}
+
+void HLSLGenerator::OutputArguments(HLSLArgument* argument)
+{
+    int numArgs = 0;
+    while (argument != NULL)
+    {
+        if (numArgs > 0)
+        {
+            m_writer.Write(", ");
+        }
+
+        switch (argument->modifier)
+        {
+        case HLSLArgumentModifier_In:
+            m_writer.Write("in ");
+            break;
+        case HLSLArgumentModifier_Out:
+            m_writer.Write("out ");
+            break;
+        case HLSLArgumentModifier_Inout:
+            m_writer.Write("inout ");
+            break;
+        case HLSLArgumentModifier_Uniform:
+            m_writer.Write("uniform ");
+            break;
+        default:
+            break;
+        }
+
+        const char * semantic = argument->sv_semantic ? argument->sv_semantic : argument->semantic;
+
+        OutputDeclaration(argument->type, argument->name, semantic, /*registerName=*/NULL, argument->defaultValue);
+        argument = argument->nextArgument;
+        ++numArgs;
+    }
+}
+
+static const char * GetAttributeName(HLSLAttributeType attributeType)
+{
+    if (attributeType == HLSLAttributeType_Unroll) return "unroll";
+    if (attributeType == HLSLAttributeType_Branch) return "branch";
+    if (attributeType == HLSLAttributeType_Flatten) return "flatten";
+    return NULL;
+}
+
+void HLSLGenerator::OutputAttributes(int indent, HLSLAttribute* attribute)
+{
+    while (attribute != NULL)
+    {
+        const char * attributeName = GetAttributeName(attribute->attributeType);
+    
+        if (attributeName != NULL)
+        {
+            m_writer.WriteLineTagged(indent, attribute->fileName, attribute->line, "[%s]", attributeName);
+        }
+
+        attribute = attribute->nextAttribute;
+    }
+}
+
+void HLSLGenerator::OutputStatements(int indent, HLSLStatement* statement)
+{
+    while (statement != NULL)
+    {
+        if (statement->hidden) 
+        {
+            statement = statement->nextStatement;
+            continue;
+        }
+
+        OutputAttributes(indent, statement->attributes);
+
+        if (statement->nodeType == HLSLNodeType_Declaration)
+        {
+            HLSLDeclaration* declaration = static_cast<HLSLDeclaration*>(statement);
+            m_writer.BeginLine(indent, declaration->fileName, declaration->line);
+            OutputDeclaration(declaration);
+            m_writer.EndLine(";");
+        }
+        else if (statement->nodeType == HLSLNodeType_Struct)
+        {
+            HLSLStruct* structure = static_cast<HLSLStruct*>(statement);
+            m_writer.WriteLineTagged(indent, structure->fileName, structure->line, "struct %s {", structure->name);
+            HLSLStructField* field = structure->field;
+            while (field != NULL)
+            {
+                if (!field->hidden)
+                {
+                    m_writer.BeginLine(indent + 1, field->fileName, field->line);
+                    const char * semantic = field->sv_semantic ? field->sv_semantic : field->semantic;
+                    OutputDeclaration(field->type, field->name, semantic);
+                    m_writer.Write(";");
+                    m_writer.EndLine();
+                }
+                field = field->nextField;
+            }
+            m_writer.WriteLine(indent, "};");
+        }
+        else if (statement->nodeType == HLSLNodeType_Buffer)
+        {
+            HLSLBuffer* buffer = static_cast<HLSLBuffer*>(statement);
+            HLSLDeclaration* field = buffer->field;
+
+            //if (!m_legacy)
+            {
+                m_writer.BeginLine(indent, buffer->fileName, buffer->line);
+                m_writer.Write("cbuffer %s", buffer->name);
+                if (buffer->registerName != NULL)
+                {
+                    m_writer.Write(" : register(%s)", buffer->registerName);
+                }
+                m_writer.EndLine(" {");
+            }
+
+            m_isInsideBuffer = true;
+
+            while (field != NULL)
+            {
+                if (!field->hidden)
+                {
+                    m_writer.BeginLine(indent + 1, field->fileName, field->line);
+                    OutputDeclaration(field->type, field->name, /*semantic=*/NULL, /*registerName*/field->registerName, field->assignment);
+                    m_writer.Write(";");
+                    m_writer.EndLine();
+                }
+                field = (HLSLDeclaration*)field->nextStatement;
+            }
+
+            m_isInsideBuffer = false;
+
+            //if (!m_legacy)
+            {
+                m_writer.WriteLine(indent, "};");
+            }
+        }
+        else if (statement->nodeType == HLSLNodeType_Function)
+        {
+            HLSLFunction* function = static_cast<HLSLFunction*>(statement);
+
+            // Use an alternate name for the function which is supposed to be entry point
+            // so that we can supply our own function which will be the actual entry point.
+            const char* functionName   = function->name;
+            const char* returnTypeName = GetTypeName(function->returnType);
+
+            m_writer.BeginLine(indent, function->fileName, function->line);
+            m_writer.Write("%s %s(", returnTypeName, functionName);
+
+            OutputArguments(function->argument);
+
+            const char * semantic = function->sv_semantic ? function->sv_semantic : function->semantic;
+            if (semantic != NULL)
+            {
+                m_writer.Write(") : %s {", semantic);
+            }
+            else
+            {
+                m_writer.Write(") {");
+            }
+
+            m_writer.EndLine();
+
+            OutputStatements(indent + 1, function->statement);
+            m_writer.WriteLine(indent, "};");
+        }
+        else if (statement->nodeType == HLSLNodeType_ExpressionStatement)
+        {
+            HLSLExpressionStatement* expressionStatement = static_cast<HLSLExpressionStatement*>(statement);
+            m_writer.BeginLine(indent, statement->fileName, statement->line);
+            OutputExpression(expressionStatement->expression);
+            m_writer.EndLine(";");
+        }
+        else if (statement->nodeType == HLSLNodeType_ReturnStatement)
+        {
+            HLSLReturnStatement* returnStatement = static_cast<HLSLReturnStatement*>(statement);
+            if (returnStatement->expression != NULL)
+            {
+                m_writer.BeginLine(indent, returnStatement->fileName, returnStatement->line);
+                m_writer.Write("return ");
+                OutputExpression(returnStatement->expression);
+                m_writer.EndLine(";");
+            }
+            else
+            {
+                m_writer.WriteLineTagged(indent, returnStatement->fileName, returnStatement->line, "return;");
+            }
+        }
+        else if (statement->nodeType == HLSLNodeType_DiscardStatement)
+        {
+            HLSLDiscardStatement* discardStatement = static_cast<HLSLDiscardStatement*>(statement);
+            m_writer.WriteLineTagged(indent, discardStatement->fileName, discardStatement->line, "discard;");
+        }
+        else if (statement->nodeType == HLSLNodeType_BreakStatement)
+        {
+            HLSLBreakStatement* breakStatement = static_cast<HLSLBreakStatement*>(statement);
+            m_writer.WriteLineTagged(indent, breakStatement->fileName, breakStatement->line, "break;");
+        }
+        else if (statement->nodeType == HLSLNodeType_ContinueStatement)
+        {
+            HLSLContinueStatement* continueStatement = static_cast<HLSLContinueStatement*>(statement);
+            m_writer.WriteLineTagged(indent, continueStatement->fileName, continueStatement->line, "continue;");
+        }
+        else if (statement->nodeType == HLSLNodeType_IfStatement)
+        {
+            HLSLIfStatement* ifStatement = static_cast<HLSLIfStatement*>(statement);
+            m_writer.BeginLine(indent, ifStatement->fileName, ifStatement->line);
+            m_writer.Write("if (");
+            OutputExpression(ifStatement->condition);
+            m_writer.Write(") {");
+            m_writer.EndLine();
+            OutputStatements(indent + 1, ifStatement->statement);
+            m_writer.WriteLine(indent, "}");
+            if (ifStatement->elseStatement != NULL)
+            {
+                m_writer.WriteLine(indent, "else {");
+                OutputStatements(indent + 1, ifStatement->elseStatement);
+                m_writer.WriteLine(indent, "}");
+            }
+        }
+        else if (statement->nodeType == HLSLNodeType_ForStatement)
+        {
+            HLSLForStatement* forStatement = static_cast<HLSLForStatement*>(statement);
+            m_writer.BeginLine(indent, forStatement->fileName, forStatement->line);
+            m_writer.Write("for (");
+            OutputDeclaration(forStatement->initialization);
+            m_writer.Write("; ");
+            OutputExpression(forStatement->condition);
+            m_writer.Write("; ");
+            OutputExpression(forStatement->increment);
+            m_writer.Write(") {");
+            m_writer.EndLine();
+            OutputStatements(indent + 1, forStatement->statement);
+            m_writer.WriteLine(indent, "}");
+        }
+        else if (statement->nodeType == HLSLNodeType_BlockStatement)
+        {
+            HLSLBlockStatement* blockStatement = static_cast<HLSLBlockStatement*>(statement);
+            m_writer.WriteLineTagged(indent, blockStatement->fileName, blockStatement->line, "{");
+            OutputStatements(indent + 1, blockStatement->statement);
+            m_writer.WriteLine(indent, "}");
+        }
+        else if (statement->nodeType == HLSLNodeType_Technique)
+        {
+            // Techniques are ignored.
+        }
+        else if (statement->nodeType == HLSLNodeType_Pipeline)
+        {
+            // Pipelines are ignored.
+        }
+        else
+        {
+            // Unhanded statement type.
+            ASSERT(0);
+        }
+
+        statement = statement->nextStatement;
+    }
+}
+
+void HLSLGenerator::OutputDeclaration(HLSLDeclaration* declaration)
+{
+    bool isSamplerType = IsSamplerType(declaration->type);
+
+    //if (!m_legacy && isSamplerType)
+    if (isSamplerType)
+    {
+        int reg = -1;
+        if (declaration->registerName != NULL)
+        {
+            sscanf(declaration->registerName, "s%d", &reg);
+        }
+
+        const char* textureType = NULL;
+        const char* samplerType = "SamplerState";
+        // @@ Handle generic sampler type.
+
+        if (declaration->type.baseType == HLSLBaseType_Sampler2D)
+        {
+            textureType = "Texture2D";
+        }
+        else if (declaration->type.baseType == HLSLBaseType_Sampler3D)
+        {
+            textureType = "Texture3D";
+        }
+        else if (declaration->type.baseType == HLSLBaseType_SamplerCube)
+        {
+            textureType = "TextureCube";
+        }
+        else if (declaration->type.baseType == HLSLBaseType_Sampler2DShadow)
+        {
+            textureType = "Texture2D";
+            samplerType = "SamplerComparisonState";
+        }
+        else if (declaration->type.baseType == HLSLBaseType_Sampler2DMS)
+        {
+            textureType = "Texture2DMS<float4>";  // @@ Is template argument required?
+            samplerType = NULL;
+        }
+
+        if (samplerType != NULL)
+        {
+            if (reg != -1)
+            {
+                m_writer.Write("%s %s_texture : register(t%d); %s %s_sampler : register(s%d)", textureType, declaration->name, reg, samplerType, declaration->name, reg);
+            }
+            else
+            {
+                m_writer.Write("%s %s_texture; %s %s_sampler", textureType, declaration->name, samplerType, declaration->name);
+            }
+        }
+        else
+        {
+            if (reg != -1)
+            {
+                m_writer.Write("%s %s : register(t%d)", textureType, declaration->name, reg);
+            }
+            else
+            {
+                m_writer.Write("%s %s", textureType, declaration->name);
+            }
+        }
+        return;
+    }
+
+    OutputDeclarationType(declaration->type);
+    OutputDeclarationBody(declaration->type, declaration->name, declaration->semantic, declaration->registerName, declaration->assignment);
+    declaration = declaration->nextDeclaration;
+
+    while(declaration != NULL) {
+        m_writer.Write(", ");
+        OutputDeclarationBody(declaration->type, declaration->name, declaration->semantic, declaration->registerName, declaration->assignment);
+        declaration = declaration->nextDeclaration;
+    };
+}
+
+void HLSLGenerator::OutputDeclarationType(const HLSLType& type)
+{
+    const char* typeName = GetTypeName(type);
+    //if (!m_legacy)
+    {
+        if (type.baseType == HLSLBaseType_Sampler2D)
+        {
+            if (type.samplerType == HLSLBaseType_Half
+                // TODO: && !m_options.treatHalfAsFloat
+                ) {
+                typeName = "Texture2DHalfSampler";
+            }
+            else {
+                typeName = "Texture2DSampler";
+            }
+        }
+        else if (type.baseType == HLSLBaseType_Sampler3D)
+        {
+            typeName = "Texture3DSampler";
+        }
+        else if (type.baseType == HLSLBaseType_SamplerCube)
+        {
+            typeName =  "TextureCubeSampler";
+        }
+        else if (type.baseType == HLSLBaseType_Sampler2DShadow)
+        {
+            typeName = "Texture2DShadowSampler";
+        }
+        else if (type.baseType == HLSLBaseType_Sampler2DMS)
+        {
+            typeName = "Texture2DMS<float4>";
+        }
+    }
+
+    if (type.flags & HLSLTypeFlag_Const)
+    {
+        m_writer.Write("const ");
+    }
+    if (type.flags & HLSLTypeFlag_Static)
+    {
+        m_writer.Write("static ");
+    }
+
+    // Interpolation modifiers.
+    if (type.flags & HLSLTypeFlag_Centroid)
+    {
+        m_writer.Write("centroid ");
+    }
+    if (type.flags & HLSLTypeFlag_Linear)
+    {
+        m_writer.Write("linear ");
+    }
+    if (type.flags & HLSLTypeFlag_NoInterpolation)
+    {
+        m_writer.Write("nointerpolation ");
+    }
+    if (type.flags & HLSLTypeFlag_NoPerspective)
+    {
+        m_writer.Write("noperspective ");
+    }
+    if (type.flags & HLSLTypeFlag_Sample)   // @@ Only in shader model >= 4.1
+    {
+        m_writer.Write("sample ");
+    }
+
+    m_writer.Write("%s ", typeName);
+}
+
+void HLSLGenerator::OutputDeclarationBody(const HLSLType& type, const char* name, const char* semantic/*=NULL*/, const char* registerName/*=NULL*/, HLSLExpression * assignment/*=NULL*/)
+{
+    m_writer.Write("%s", name);
+
+    if (type.array)
+    {
+        ASSERT(semantic == NULL);
+        m_writer.Write("[");
+        if (type.arraySize != NULL)
+        {
+            OutputExpression(type.arraySize);
+        }
+        m_writer.Write("]");
+    }
+
+    if (semantic != NULL) 
+    {
+        m_writer.Write(" : %s", semantic);
+    }
+
+    if (registerName != NULL)
+    {
+        if (m_isInsideBuffer)
+        {
+            m_writer.Write(" : packoffset(%s)", registerName);
+        }
+        else 
+        {
+            m_writer.Write(" : register(%s)", registerName);
+        }
+    }
+
+    if (assignment != NULL && !IsSamplerType(type))
+    {
+        m_writer.Write(" = ");
+        if (type.array)
+        {
+            m_writer.Write("{ ");
+            OutputExpressionList(assignment);
+            m_writer.Write(" }");
+        }
+        else
+        {
+            OutputExpression(assignment);
+        }
+    }
+}
+
+void HLSLGenerator::OutputDeclaration(const HLSLType& type, const char* name, const char* semantic/*=NULL*/, const char* registerName/*=NULL*/, HLSLExpression * assignment/*=NULL*/)
+{
+    OutputDeclarationType(type);
+    OutputDeclarationBody(type, name, semantic, registerName, assignment);
+}
+
+bool HLSLGenerator::ChooseUniqueName(const char* base, char* dst, int dstLength) const
+{
+    // IC: Try without suffix first.
+    String_Printf(dst, dstLength, "%s", base);
+    if (!m_tree->GetContainsString(base))
+    {
+        return true;
+    }
+
+    for (int i = 1; i < 1024; ++i)
+    {
+        String_Printf(dst, dstLength, "%s%d", base, i);
+        if (!m_tree->GetContainsString(dst))
+        {
+            return true;
+        }
+    }
+    return false;
+}
+
+}
diff --git a/hlslparser/src/HLSLGenerator.h b/hlslparser/src/HLSLGenerator.h
new file mode 100644
index 00000000..c1daf712
--- /dev/null
+++ b/hlslparser/src/HLSLGenerator.h
@@ -0,0 +1,110 @@
+//=============================================================================
+//
+// Render/HLSLGenerator.h
+//
+// Created by Max McGuire (max@unknownworlds.com)
+// Copyright (c) 2013, Unknown Worlds Entertainment, Inc.
+//
+//=============================================================================
+
+#ifndef HLSL_GENERATOR_H
+#define HLSL_GENERATOR_H
+
+#include "CodeWriter.h"
+#include "HLSLTree.h"
+
+namespace M4
+{
+
+class  HLSLTree;
+struct HLSLFunction;
+struct HLSLStruct;
+
+/**
+ * This class is used to generate HLSL which is compatible with the D3D9
+ * compiler (i.e. no cbuffers).
+ */
+class HLSLGenerator
+{
+
+public:
+
+    enum Target
+    {
+        Target_VertexShader,
+        Target_PixelShader,
+    };
+
+    HLSLGenerator();
+    
+    bool Generate(HLSLTree* tree, Target target, const char* entryName);
+    const char* GetResult() const;
+
+private:
+
+    void OutputExpressionList(HLSLExpression* expression);
+    void OutputExpression(HLSLExpression* expression);
+    void OutputArguments(HLSLArgument* argument);
+    void OutputAttributes(int indent, HLSLAttribute* attribute);
+    void OutputStatements(int indent, HLSLStatement* statement);
+    void OutputDeclaration(HLSLDeclaration* declaration);
+    void OutputDeclaration(const HLSLType& type, const char* name, const char* semantic = NULL, const char* registerName = NULL, HLSLExpression* defaultValue = NULL);
+    void OutputDeclarationType(const HLSLType& type);
+    void OutputDeclarationBody(const HLSLType& type, const char* name, const char* semantic =NULL, const char* registerName = NULL, HLSLExpression * assignment = NULL);
+
+    /** Generates a name of the format "base+n" where n is an integer such that the name
+     * isn't used in the syntax tree. */
+    bool ChooseUniqueName(const char* base, char* dst, int dstLength) const;
+
+    void Error(const char* format, ...);
+    
+private:
+
+    CodeWriter      m_writer;
+
+    const HLSLTree* m_tree;
+    const char*     m_entryName;
+    Target          m_target;
+    //bool            m_legacy;
+    bool            m_isInsideBuffer;
+    bool            m_error;
+   
+#if 0
+/*
+    // TODO: remove
+    // defining these once in include file, can HLSL compile inside a struct like MSL
+    char            m_textureSampler2DStruct[64];
+    char            m_textureSampler2DCtor[64];
+    char            m_textureSampler2DShadowStruct[64];
+    char            m_textureSampler2DShadowCtor[64];
+    char            m_textureSampler3DStruct[64];
+    char            m_textureSampler3DCtor[64];
+    char            m_textureSamplerCubeStruct[64];
+    char            m_textureSamplerCubeCtor[64];
+    char            m_tex2DFunction[64];
+    char            m_tex2DProjFunction[64];
+    char            m_tex2DLodFunction[64];
+    char            m_tex2DBiasFunction[64];
+    char            m_tex2DGradFunction[64];
+    char            m_tex2DGatherFunction[64];
+    char            m_tex2DSizeFunction[64];
+    char            m_tex2DFetchFunction[64];
+    char            m_tex2DCmpFunction[64];
+    char            m_tex2DMSFetchFunction[64];
+    char            m_tex2DMSSizeFunction[64];
+    char            m_tex3DFunction[64];
+    char            m_tex3DLodFunction[64];
+    char            m_tex3DBiasFunction[64];
+    char            m_tex3DSizeFunction[64];
+    char            m_texCubeFunction[64];
+    char            m_texCubeLodFunction[64];
+    char            m_texCubeBiasFunction[64];
+    char            m_texCubeSizeFunction[64];
+*/
+#endif
+    
+};
+
+} // M4
+
+#endif
diff --git a/hlslparser/src/HLSLParser.cpp b/hlslparser/src/HLSLParser.cpp
new file mode 100644
index 00000000..851d81ca
--- /dev/null
+++ b/hlslparser/src/HLSLParser.cpp
@@ -0,0 +1,3930 @@
+//=============================================================================
+//
+// Render/HLSLParser.cpp
+//
+// Created by Max McGuire (max@unknownworlds.com)
+// Copyright (c) 2013, Unknown Worlds Entertainment, Inc.
+//
+//=============================================================================
+
+//#include "Engine/String.h"
+#include "Engine.h"
+
+#include "HLSLParser.h"
+#include "HLSLTree.h"
+
+#include <algorithm>
+#include <ctype.h>
+#include <string.h>
+
+namespace M4
+{
+
+enum CompareFunctionsResult
+{
+    FunctionsEqual,
+    Function1Better,
+    Function2Better
+};
+
+    
+/** This structure stores a HLSLFunction-like declaration for an intrinsic function */
+struct Intrinsic
+{
+    explicit Intrinsic(const char* name, HLSLBaseType returnType)
+    {
+        function.name                   = name;
+        function.returnType.baseType    = returnType;
+        function.numArguments           = 0;
+    }
+    explicit Intrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType arg1)
+    {
+        function.name                   = name;
+        function.returnType.baseType    = returnType;
+        function.numArguments           = 1;
+        function.argument               = argument + 0;
+        argument[0].type.baseType       = arg1;
+        argument[0].type.flags          = HLSLTypeFlag_Const;
+    }
+    explicit Intrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType arg1, HLSLBaseType arg2)
+    {
+        function.name                   = name;
+        function.returnType.baseType    = returnType;
+        function.argument               = argument + 0;
+        function.numArguments           = 2;
+        argument[0].type.baseType       = arg1;
+        argument[0].type.flags          = HLSLTypeFlag_Const;
+        argument[0].nextArgument        = argument + 1;
+        argument[1].type.baseType       = arg2;
+        argument[1].type.flags          = HLSLTypeFlag_Const;
+    }
+    explicit Intrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType arg1, HLSLBaseType arg2, HLSLBaseType arg3)
+    {
+        function.name                   = name;
+        function.returnType.baseType    = returnType;
+        function.argument               = argument + 0;
+        function.numArguments           = 3;
+        argument[0].type.baseType       = arg1;
+        argument[0].type.flags          = HLSLTypeFlag_Const;
+        argument[0].nextArgument        = argument + 1;
+        argument[1].type.baseType       = arg2;
+        argument[1].type.flags          = HLSLTypeFlag_Const;
+        argument[1].nextArgument        = argument + 2;
+        argument[2].type.baseType       = arg3;
+        argument[2].type.flags          = HLSLTypeFlag_Const;
+    }
+    explicit Intrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType arg1, HLSLBaseType arg2, HLSLBaseType arg3, HLSLBaseType arg4)
+    {
+        function.name                   = name;
+        function.returnType.baseType    = returnType;
+        function.argument               = argument + 0;
+        function.numArguments           = 4;
+        argument[0].type.baseType       = arg1;
+        argument[0].type.flags          = HLSLTypeFlag_Const;
+        argument[0].nextArgument        = argument + 1;
+        argument[1].type.baseType       = arg2;
+        argument[1].type.flags          = HLSLTypeFlag_Const;
+        argument[1].nextArgument        = argument + 2;
+        argument[2].type.baseType       = arg3;
+        argument[2].type.flags          = HLSLTypeFlag_Const;
+        argument[2].nextArgument        = argument + 3;
+        argument[3].type.baseType       = arg4;
+        argument[3].type.flags          = HLSLTypeFlag_Const;
+    }
+    HLSLFunction    function;
+    HLSLArgument    argument[4];
+};
+    
+Intrinsic SamplerIntrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType arg1, HLSLBaseType samplerType, HLSLBaseType arg2)
+{
+    Intrinsic i(name, returnType, arg1, arg2);
+    i.argument[0].type.samplerType = samplerType;
+    return i;
+}
+
+
+enum NumericType
+{
+    NumericType_Float,
+    NumericType_Half,
+    NumericType_Bool,
+    NumericType_Int,
+    NumericType_Uint,
+    NumericType_Count,
+    NumericType_NaN,
+};
+
+static const int _numberTypeRank[NumericType_Count][NumericType_Count] = 
+{
+    //F  H  B  I  U    
+    { 0, 4, 4, 4, 4 },  // NumericType_Float
+    { 1, 0, 4, 4, 4 },  // NumericType_Half
+    { 5, 5, 0, 5, 5 },  // NumericType_Bool
+    { 5, 5, 4, 0, 3 },  // NumericType_Int
+    { 5, 5, 4, 2, 0 }   // NumericType_Uint
+};
+
+
+struct EffectStateValue
+{
+    const char * name;
+    int value;
+};
+
+static const EffectStateValue textureFilteringValues[] = {
+    {"None", 0},
+    {"Point", 1},
+    {"Linear", 2},
+    {"Anisotropic", 3},
+    {NULL, 0}
+};
+
+static const EffectStateValue textureAddressingValues[] = {
+    {"Wrap", 1},
+    {"Mirror", 2},
+    {"Clamp", 3},
+    {"Border", 4},
+    {"MirrorOnce", 5},
+    {NULL, 0}
+};
+
+static const EffectStateValue booleanValues[] = {
+    {"False", 0},
+    {"True", 1},
+    {NULL, 0}
+};
+
+static const EffectStateValue cullValues[] = {
+    {"None", 1},
+    {"CW", 2},
+    {"CCW", 3},
+    {NULL, 0}
+};
+
+static const EffectStateValue cmpValues[] = {
+    {"Never", 1},
+    {"Less", 2},
+    {"Equal", 3},
+    {"LessEqual", 4},
+    {"Greater", 5},
+    {"NotEqual", 6},
+    {"GreaterEqual", 7},
+    {"Always", 8},
+    {NULL, 0}
+};
+
+static const EffectStateValue blendValues[] = {
+    {"Zero", 1},
+    {"One", 2},
+    {"SrcColor", 3},
+    {"InvSrcColor", 4},
+    {"SrcAlpha", 5},
+    {"InvSrcAlpha", 6},
+    {"DestAlpha", 7},
+    {"InvDestAlpha", 8},
+    {"DestColor", 9},
+    {"InvDestColor", 10},
+    {"SrcAlphaSat", 11},
+    {"BothSrcAlpha", 12},
+    {"BothInvSrcAlpha", 13},
+    {"BlendFactor", 14},
+    {"InvBlendFactor", 15},
+    {"SrcColor2", 16},          // Dual source blending. D3D9Ex only.
+    {"InvSrcColor2", 17},
+    {NULL, 0}
+};
+
+static const EffectStateValue blendOpValues[] = {
+    {"Add", 1},
+    {"Subtract", 2},
+    {"RevSubtract", 3},
+    {"Min", 4},
+    {"Max", 5},
+    {NULL, 0}
+};
+
+static const EffectStateValue fillModeValues[] = {
+    {"Point", 1},
+    {"Wireframe", 2},
+    {"Solid", 3},
+    {NULL, 0}
+};
+
+static const EffectStateValue stencilOpValues[] = {
+    {"Keep", 1},
+    {"Zero", 2},
+    {"Replace", 3},
+    {"IncrSat", 4},
+    {"DecrSat", 5},
+    {"Invert", 6},
+    {"Incr", 7},
+    {"Decr", 8},
+    {NULL, 0}
+};
+
+// These are flags.
+static const EffectStateValue colorMaskValues[] = {
+    {"False", 0},
+    {"Red",   1<<0},
+    {"Green", 1<<1},
+    {"Blue",  1<<2},
+    {"Alpha", 1<<3},
+    {"X", 1<<0},
+    {"Y", 1<<1},
+    {"Z", 1<<2},
+    {"W", 1<<3},
+    {NULL, 0}
+};
+
+static const EffectStateValue integerValues[] = {
+    {NULL, 0}
+};
+
+static const EffectStateValue floatValues[] = {
+    {NULL, 0}
+};
+
+
+struct EffectState
+{
+    const char * name;
+    int d3drs;
+    const EffectStateValue * values;
+};
+
+static const EffectState samplerStates[] = {
+    {"AddressU", 1, textureAddressingValues},
+    {"AddressV", 2, textureAddressingValues},
+    {"AddressW", 3, textureAddressingValues},
+    // "BorderColor", 4, D3DCOLOR
+    {"MagFilter", 5, textureFilteringValues},
+    {"MinFilter", 6, textureFilteringValues},
+    {"MipFilter", 7, textureFilteringValues},
+    {"MipMapLodBias", 8, floatValues},
+    {"MaxMipLevel", 9, integerValues},
+    {"MaxAnisotropy", 10, integerValues},
+    {"sRGBTexture", 11, booleanValues},    
+};
+
+static const EffectState effectStates[] = {
+    {"VertexShader", 0, NULL},
+    {"PixelShader", 0, NULL},
+    {"AlphaBlendEnable", 27, booleanValues},
+    {"SrcBlend", 19, blendValues},
+    {"DestBlend", 20, blendValues},
+    {"BlendOp", 171, blendOpValues},
+    {"SeparateAlphaBlendEanble", 206, booleanValues},
+    {"SrcBlendAlpha", 207, blendValues},
+    {"DestBlendAlpha", 208, blendValues},
+    {"BlendOpAlpha", 209, blendOpValues},
+    {"AlphaTestEnable", 15, booleanValues},
+    {"AlphaRef", 24, integerValues},
+    {"AlphaFunc", 25, cmpValues},
+    {"CullMode", 22, cullValues},
+    {"ZEnable", 7, booleanValues},
+    {"ZWriteEnable", 14, booleanValues},
+    {"ZFunc", 23, cmpValues},
+    {"StencilEnable", 52, booleanValues},
+    {"StencilFail", 53, stencilOpValues},
+    {"StencilZFail", 54, stencilOpValues},
+    {"StencilPass", 55, stencilOpValues},
+    {"StencilFunc", 56, cmpValues},
+    {"StencilRef", 57, integerValues},
+    {"StencilMask", 58, integerValues},
+    {"StencilWriteMask", 59, integerValues},
+    {"TwoSidedStencilMode", 185, booleanValues},
+    {"CCW_StencilFail", 186, stencilOpValues},
+    {"CCW_StencilZFail", 187, stencilOpValues},
+    {"CCW_StencilPass", 188, stencilOpValues},
+    {"CCW_StencilFunc", 189, cmpValues},
+    {"ColorWriteEnable", 168, colorMaskValues},
+    {"FillMode", 8, fillModeValues},
+    {"MultisampleAlias", 161, booleanValues},
+    {"MultisampleMask", 162, integerValues},
+    {"ScissorTestEnable", 174, booleanValues},
+    {"SlopeScaleDepthBias", 175, floatValues},
+    {"DepthBias", 195, floatValues}
+};
+
+
+static const EffectStateValue witnessCullModeValues[] = {
+    {"None", 0},
+    {"Back", 1},
+    {"Front", 2},
+    {NULL, 0}
+};
+
+static const EffectStateValue witnessFillModeValues[] = {
+    {"Solid", 0},
+    {"Wireframe", 1},
+    {NULL, 0}
+};
+
+static const EffectStateValue witnessBlendModeValues[] = {
+    {"Disabled", 0},
+    {"AlphaBlend", 1},          // src * a + dst * (1-a)
+    {"Add", 2},                 // src + dst
+    {"Mixed", 3},               // src + dst * (1-a)
+    {"Multiply", 4},            // src * dst
+    {"Multiply2", 5},           // 2 * src * dst
+    {NULL, 0}
+};
+
+static const EffectStateValue witnessDepthFuncValues[] = {
+    {"LessEqual", 0},
+    {"Less", 1},
+    {"Equal", 2},
+    {"Greater", 3},
+    {"Always", 4},
+    {NULL, 0}
+};
+
+static const EffectStateValue witnessStencilModeValues[] = {
+    {"Disabled", 0},
+    {"Set", 1},
+    {"Test", 2},
+    {NULL, 0}
+};
+
+/* why aren't these used
+static const EffectStateValue witnessFilterModeValues[] = {
+    {"Point", 0},
+    {"Linear", 1},
+    {"Mipmap_Nearest", 2},
+    {"Mipmap_Best", 3},     // Quality of mipmap filtering depends on render settings.
+    {"Anisotropic", 4},     // Aniso without mipmaps for reflection maps.
+    {NULL, 0}
+};
+
+static const EffectStateValue witnessWrapModeValues[] = {
+    {"Repeat", 0},
+    {"Clamp", 1},
+    {"ClampToBorder", 2},
+    {NULL, 0}
+};
+*/
+
+static const EffectState pipelineStates[] = {
+    {"VertexShader", 0, NULL},
+    {"PixelShader", 0, NULL},
+
+    // Depth_Stencil_State
+    {"DepthWrite", 0, booleanValues},
+    {"DepthEnable", 0, booleanValues},
+    {"DepthFunc", 0, witnessDepthFuncValues},
+    {"StencilMode", 0, witnessStencilModeValues},
+
+    // Raster_State
+    {"CullMode", 0, witnessCullModeValues},
+    {"FillMode", 0, witnessFillModeValues},
+    {"MultisampleEnable", 0, booleanValues},
+    {"PolygonOffset", 0, booleanValues},
+
+    // Blend_State
+    {"BlendMode", 0, witnessBlendModeValues},
+    {"ColorWrite", 0, booleanValues},
+    {"AlphaWrite", 0, booleanValues},
+    {"AlphaTest", 0, booleanValues},       // This is really alpha to coverage.
+};
+
+
+
+struct BaseTypeDescription
+{
+    const char*     typeName;
+    NumericType     numericType;
+    int             numComponents;
+    int             numDimensions;
+    int             height;
+    int             binaryOpRank;
+};
+
+
+#define INTRINSIC_FLOAT1_FUNCTION(name) \
+        Intrinsic( name, HLSLBaseType_Float,   HLSLBaseType_Float  ),   \
+        Intrinsic( name, HLSLBaseType_Float2,  HLSLBaseType_Float2 ),   \
+        Intrinsic( name, HLSLBaseType_Float3,  HLSLBaseType_Float3 ),   \
+        Intrinsic( name, HLSLBaseType_Float4,  HLSLBaseType_Float4 ),   \
+        Intrinsic( name, HLSLBaseType_Half,    HLSLBaseType_Half   ),   \
+        Intrinsic( name, HLSLBaseType_Half2,   HLSLBaseType_Half2  ),   \
+        Intrinsic( name, HLSLBaseType_Half3,   HLSLBaseType_Half3  ),   \
+        Intrinsic( name, HLSLBaseType_Half4,   HLSLBaseType_Half4  )
+
+#define INTRINSIC_FLOAT2_FUNCTION(name) \
+        Intrinsic( name, HLSLBaseType_Float,   HLSLBaseType_Float,   HLSLBaseType_Float  ),   \
+        Intrinsic( name, HLSLBaseType_Float2,  HLSLBaseType_Float2,  HLSLBaseType_Float2 ),   \
+        Intrinsic( name, HLSLBaseType_Float3,  HLSLBaseType_Float3,  HLSLBaseType_Float3 ),   \
+        Intrinsic( name, HLSLBaseType_Float4,  HLSLBaseType_Float4,  HLSLBaseType_Float4 ),   \
+        Intrinsic( name, HLSLBaseType_Half,    HLSLBaseType_Half,    HLSLBaseType_Half   ),   \
+        Intrinsic( name, HLSLBaseType_Half2,   HLSLBaseType_Half2,   HLSLBaseType_Half2  ),   \
+        Intrinsic( name, HLSLBaseType_Half3,   HLSLBaseType_Half3,   HLSLBaseType_Half3  ),   \
+        Intrinsic( name, HLSLBaseType_Half4,   HLSLBaseType_Half4,   HLSLBaseType_Half4  )
+
+#define INTRINSIC_FLOAT3_FUNCTION(name) \
+        Intrinsic( name, HLSLBaseType_Float,   HLSLBaseType_Float,   HLSLBaseType_Float,  HLSLBaseType_Float ),   \
+        Intrinsic( name, HLSLBaseType_Float2,  HLSLBaseType_Float2,  HLSLBaseType_Float2,  HLSLBaseType_Float2 ),  \
+        Intrinsic( name, HLSLBaseType_Float3,  HLSLBaseType_Float3,  HLSLBaseType_Float3,  HLSLBaseType_Float3 ),  \
+        Intrinsic( name, HLSLBaseType_Float4,  HLSLBaseType_Float4,  HLSLBaseType_Float4,  HLSLBaseType_Float4 ),  \
+        Intrinsic( name, HLSLBaseType_Half,    HLSLBaseType_Half,    HLSLBaseType_Half,   HLSLBaseType_Half ),    \
+        Intrinsic( name, HLSLBaseType_Half2,   HLSLBaseType_Half2,   HLSLBaseType_Half2,  HLSLBaseType_Half2 ),    \
+        Intrinsic( name, HLSLBaseType_Half3,   HLSLBaseType_Half3,   HLSLBaseType_Half3,  HLSLBaseType_Half3 ),    \
+        Intrinsic( name, HLSLBaseType_Half4,   HLSLBaseType_Half4,   HLSLBaseType_Half4,  HLSLBaseType_Half4 )
+
+#if 1
+// @@ IC: For some reason this is not working with the Visual Studio compiler:
+// This broke using half, so don't just comment this out.
+#define SAMPLER_INTRINSIC_FUNCTION(name, nameH, sampler, arg1) \
+        SamplerIntrinsic( name, HLSLBaseType_Float4, sampler, HLSLBaseType_Float, arg1),   \
+        SamplerIntrinsic( nameH, HLSLBaseType_Half4,  sampler, HLSLBaseType_Half,  arg1  )
+//#else
+//#define SAMPLER_INTRINSIC_FUNCTION(name, sampler, arg1) \
+//        Intrinsic( name, HLSLBaseType_Float4, sampler, arg1)
+#endif
+    
+const Intrinsic _intrinsic[] =
+    {
+        INTRINSIC_FLOAT1_FUNCTION( "abs" ),
+        INTRINSIC_FLOAT1_FUNCTION( "acos" ),
+
+        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Float ),
+        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Float2 ),
+        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Float3 ),
+        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Float4 ),
+		Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Float2x2 ),
+        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Float3x3 ),
+        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Float4x4 ),
+        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Float4x3 ),
+        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Float4x2 ),
+        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Half ),
+        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Half2 ),
+        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Half3 ),
+        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Half4 ),
+		Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Half2x2 ),
+        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Half3x3 ),
+        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Half4x4 ),
+        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Half4x3 ),
+        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Half4x2 ),
+        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Bool ),
+        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Int ),
+        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Int2 ),
+        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Int3 ),
+        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Int4 ),
+        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Uint ),
+        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Uint2 ),
+        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Uint3 ),
+        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Uint4 ),
+
+        // TODO: "all"
+        // TODO: select
+        
+        INTRINSIC_FLOAT1_FUNCTION( "asin" ),
+        INTRINSIC_FLOAT1_FUNCTION( "atan" ),
+        INTRINSIC_FLOAT2_FUNCTION( "atan2" ),
+        INTRINSIC_FLOAT3_FUNCTION( "clamp" ),
+        INTRINSIC_FLOAT1_FUNCTION( "cos" ),
+
+        INTRINSIC_FLOAT3_FUNCTION( "lerp" ),
+        INTRINSIC_FLOAT3_FUNCTION( "smoothstep" ),
+
+        INTRINSIC_FLOAT1_FUNCTION( "floor" ),
+        INTRINSIC_FLOAT1_FUNCTION( "ceil" ),
+        INTRINSIC_FLOAT1_FUNCTION( "frac" ),
+
+        INTRINSIC_FLOAT2_FUNCTION( "fmod" ),
+
+        INTRINSIC_FLOAT3_FUNCTION("min3"),
+        INTRINSIC_FLOAT3_FUNCTION("max3"),
+        
+        // MSL constructs, may be in HLSL
+        // distance
+        // distance_squared
+        // refract
+        // deteriminant
+        
+        // this one isn't cheap to emulate
+        //INTRINSIC_FLOAT3_FUNCTION("median3"),
+        
+        Intrinsic( "clip", HLSLBaseType_Void,  HLSLBaseType_Float    ),
+        Intrinsic( "clip", HLSLBaseType_Void,  HLSLBaseType_Float2   ),
+        Intrinsic( "clip", HLSLBaseType_Void,  HLSLBaseType_Float3   ),
+        Intrinsic( "clip", HLSLBaseType_Void,  HLSLBaseType_Float4   ),
+        Intrinsic( "clip", HLSLBaseType_Void,  HLSLBaseType_Half     ),
+        Intrinsic( "clip", HLSLBaseType_Void,  HLSLBaseType_Half2    ),
+        Intrinsic( "clip", HLSLBaseType_Void,  HLSLBaseType_Half3    ),
+        Intrinsic( "clip", HLSLBaseType_Void,  HLSLBaseType_Half4    ),
+
+        Intrinsic( "dot", HLSLBaseType_Float,  HLSLBaseType_Float,   HLSLBaseType_Float  ),
+        Intrinsic( "dot", HLSLBaseType_Float,  HLSLBaseType_Float2,  HLSLBaseType_Float2 ),
+        Intrinsic( "dot", HLSLBaseType_Float,  HLSLBaseType_Float3,  HLSLBaseType_Float3 ),
+        Intrinsic( "dot", HLSLBaseType_Float,  HLSLBaseType_Float4,  HLSLBaseType_Float4 ),
+        Intrinsic( "dot", HLSLBaseType_Half,   HLSLBaseType_Half,    HLSLBaseType_Half   ),
+        Intrinsic( "dot", HLSLBaseType_Half,   HLSLBaseType_Half2,   HLSLBaseType_Half2  ),
+        Intrinsic( "dot", HLSLBaseType_Half,   HLSLBaseType_Half3,   HLSLBaseType_Half3  ),
+        Intrinsic( "dot", HLSLBaseType_Half,   HLSLBaseType_Half4,   HLSLBaseType_Half4  ),
+
+        // 3d cross product only
+        Intrinsic( "cross", HLSLBaseType_Float3,  HLSLBaseType_Float3,  HLSLBaseType_Float3 ),
+
+        Intrinsic( "length", HLSLBaseType_Float,  HLSLBaseType_Float  ),
+        Intrinsic( "length", HLSLBaseType_Float,  HLSLBaseType_Float2 ),
+        Intrinsic( "length", HLSLBaseType_Float,  HLSLBaseType_Float3 ),
+        Intrinsic( "length", HLSLBaseType_Float,  HLSLBaseType_Float4 ),
+        Intrinsic( "length", HLSLBaseType_Half,   HLSLBaseType_Half   ),
+        Intrinsic( "length", HLSLBaseType_Half,   HLSLBaseType_Half2  ),
+        Intrinsic( "length", HLSLBaseType_Half,   HLSLBaseType_Half3  ),
+        Intrinsic( "length", HLSLBaseType_Half,   HLSLBaseType_Half4  ),
+
+        // MSL construct, dumb that HLSL lacks this
+        Intrinsic( "length_squared", HLSLBaseType_Float,  HLSLBaseType_Float  ),
+        Intrinsic( "length_squared", HLSLBaseType_Float,  HLSLBaseType_Float2 ),
+        Intrinsic( "length_squared", HLSLBaseType_Float,  HLSLBaseType_Float3 ),
+        Intrinsic( "length_squared", HLSLBaseType_Float,  HLSLBaseType_Float4 ),
+        Intrinsic( "length_squared", HLSLBaseType_Half,   HLSLBaseType_Half   ),
+        Intrinsic( "length_squared", HLSLBaseType_Half,   HLSLBaseType_Half2  ),
+        Intrinsic( "length_squared", HLSLBaseType_Half,   HLSLBaseType_Half3  ),
+        Intrinsic( "length_squared", HLSLBaseType_Half,   HLSLBaseType_Half4  ),
+
+        
+        INTRINSIC_FLOAT2_FUNCTION( "max" ),
+        INTRINSIC_FLOAT2_FUNCTION( "min" ),
+
+        // @@ Add all combinations.
+        // scalar = mul(scalar, scalar)
+        // vector<N> = mul(scalar, vector<N>)
+        // vector<N> = mul(vector<N>, scalar)
+        // vector<N> = mul(vector<N>, vector<N>)
+        // vector<M> = mul(vector<N>, matrix<N,M>) ?
+        // vector<N> = mul(matrix<N,M>, vector<M>) ?
+        // matrix<N,M> = mul(matrix<N,M>, matrix<M,N>) ?
+        
+        INTRINSIC_FLOAT2_FUNCTION( "mul" ),
+		Intrinsic( "mul", HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2x2 ),
+        Intrinsic( "mul", HLSLBaseType_Float3, HLSLBaseType_Float3, HLSLBaseType_Float3x3 ),
+        Intrinsic( "mul", HLSLBaseType_Float4, HLSLBaseType_Float4, HLSLBaseType_Float4x4 ),
+        Intrinsic( "mul", HLSLBaseType_Float2, HLSLBaseType_Float2x2, HLSLBaseType_Float2 ),
+        Intrinsic( "mul", HLSLBaseType_Float3, HLSLBaseType_Float3x3, HLSLBaseType_Float3 ),
+        Intrinsic( "mul", HLSLBaseType_Float4, HLSLBaseType_Float4x4, HLSLBaseType_Float4 ),
+        Intrinsic( "mul", HLSLBaseType_Float3, HLSLBaseType_Float4, HLSLBaseType_Float4x3 ),
+        Intrinsic( "mul", HLSLBaseType_Float2, HLSLBaseType_Float4, HLSLBaseType_Float4x2 ),
+
+        // matrix transpose
+		Intrinsic( "transpose", HLSLBaseType_Float2x2, HLSLBaseType_Float2x2 ),
+        Intrinsic( "transpose", HLSLBaseType_Float3x3, HLSLBaseType_Float3x3 ),
+        Intrinsic( "transpose", HLSLBaseType_Float4x4, HLSLBaseType_Float4x4 ),
+        Intrinsic( "transpose", HLSLBaseType_Half2x2, HLSLBaseType_Half2x2 ),
+        Intrinsic( "transpose", HLSLBaseType_Half3x3, HLSLBaseType_Half3x3 ),
+        Intrinsic( "transpose", HLSLBaseType_Half4x4, HLSLBaseType_Half4x4 ),
+
+        INTRINSIC_FLOAT1_FUNCTION( "normalize" ),
+        INTRINSIC_FLOAT2_FUNCTION( "pow" ),
+        INTRINSIC_FLOAT1_FUNCTION( "saturate" ),
+        INTRINSIC_FLOAT1_FUNCTION( "sin" ),
+        INTRINSIC_FLOAT1_FUNCTION( "sqrt" ),
+        INTRINSIC_FLOAT1_FUNCTION( "rsqrt" ),
+        INTRINSIC_FLOAT1_FUNCTION( "rcp" ),
+        INTRINSIC_FLOAT1_FUNCTION( "exp" ),
+        INTRINSIC_FLOAT1_FUNCTION( "exp2" ),
+        // TODO: MSL INTRINSIC_FLOAT1_FUNCTION( "exp10" ),
+       
+        INTRINSIC_FLOAT1_FUNCTION( "log" ),
+        INTRINSIC_FLOAT1_FUNCTION( "log2" ),
+        // TODO: MSL INTRINSIC_FLOAT1_FUNCTION( "log10" ),
+        
+        INTRINSIC_FLOAT1_FUNCTION( "ddx" ),
+        INTRINSIC_FLOAT1_FUNCTION( "ddy" ),
+        
+        INTRINSIC_FLOAT1_FUNCTION( "sign" ),
+        INTRINSIC_FLOAT2_FUNCTION( "step" ),
+        INTRINSIC_FLOAT2_FUNCTION( "reflect" ),
+
+		INTRINSIC_FLOAT1_FUNCTION("isnan"),
+		INTRINSIC_FLOAT1_FUNCTION("isinf"),
+
+		Intrinsic("asuint", HLSLBaseType_Uint, HLSLBaseType_Float),
+
+        // This macro defines float/half versions
+        SAMPLER_INTRINSIC_FUNCTION("tex2D", "tex2DH", HLSLBaseType_Sampler2D, HLSLBaseType_Float2),
+        SAMPLER_INTRINSIC_FUNCTION("tex2Dlod", "tex2DHlod", HLSLBaseType_Sampler2D, HLSLBaseType_Float4),
+        SAMPLER_INTRINSIC_FUNCTION("tex2Dbias", "tex2DHbias", HLSLBaseType_Sampler2D, HLSLBaseType_Float4),
+        
+    
+        // Not sure this tex2Dproj is worth adding, have tex2DCmp
+        Intrinsic("tex2Dproj", HLSLBaseType_Float4, HLSLBaseType_Sampler2D, HLSLBaseType_Float4),
+
+//        Intrinsic("tex2Dlod",  HLSLBaseType_Float4, HLSLBaseType_Sampler2D, HLSLBaseType_Float4, HLSLBaseType_Int2),   // With offset.
+
+        
+        Intrinsic("tex2Dgrad", HLSLBaseType_Float4, HLSLBaseType_Sampler2D, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2),
+        Intrinsic("tex2Dgather", HLSLBaseType_Float4, HLSLBaseType_Sampler2D, HLSLBaseType_Float2, HLSLBaseType_Int),
+        Intrinsic("tex2Dgather", HLSLBaseType_Float4, HLSLBaseType_Sampler2D, HLSLBaseType_Float2, HLSLBaseType_Int2, HLSLBaseType_Int),    // With offset.
+        Intrinsic("tex2Dsize", HLSLBaseType_Int2, HLSLBaseType_Sampler2D),
+        Intrinsic("tex2Dfetch", HLSLBaseType_Float4, HLSLBaseType_Sampler2D, HLSLBaseType_Int3),    // u,v,mipmap
+
+        Intrinsic("tex2Dcmp", HLSLBaseType_Float4, HLSLBaseType_Sampler2DShadow, HLSLBaseType_Float4),                // @@ IC: This really takes a float3 (uvz) and returns a float.
+
+        Intrinsic("tex2DMSfetch", HLSLBaseType_Float4, HLSLBaseType_Sampler2DMS, HLSLBaseType_Int2, HLSLBaseType_Int),
+        Intrinsic("tex2DMSsize", HLSLBaseType_Int3, HLSLBaseType_Sampler2DMS),
+
+        Intrinsic("tex2DArray", HLSLBaseType_Float4, HLSLBaseType_Sampler2DArray, HLSLBaseType_Float3),
+
+        Intrinsic("tex3D",     HLSLBaseType_Float4, HLSLBaseType_Sampler3D, HLSLBaseType_Float3),
+        Intrinsic("tex3Dlod",  HLSLBaseType_Float4, HLSLBaseType_Sampler3D, HLSLBaseType_Float4),
+        Intrinsic("tex3Dbias", HLSLBaseType_Float4, HLSLBaseType_Sampler3D, HLSLBaseType_Float4),
+        Intrinsic("tex3Dsize", HLSLBaseType_Int3, HLSLBaseType_Sampler3D),
+
+        Intrinsic("texCUBE",       HLSLBaseType_Float4, HLSLBaseType_SamplerCube, HLSLBaseType_Float3),
+        Intrinsic("texCUBElod", HLSLBaseType_Float4, HLSLBaseType_SamplerCube, HLSLBaseType_Float4),
+        Intrinsic("texCUBEbias", HLSLBaseType_Float4, HLSLBaseType_SamplerCube, HLSLBaseType_Float4),
+        Intrinsic("texCUBEsize", HLSLBaseType_Int, HLSLBaseType_SamplerCube),
+
+        // only for vec, void return type
+        Intrinsic( "sincos", HLSLBaseType_Void,  HLSLBaseType_Float,   HLSLBaseType_Float,  HLSLBaseType_Float ),
+        Intrinsic( "sincos", HLSLBaseType_Void,  HLSLBaseType_Float2,  HLSLBaseType_Float,  HLSLBaseType_Float2 ),
+        Intrinsic( "sincos", HLSLBaseType_Void,  HLSLBaseType_Float3,  HLSLBaseType_Float,  HLSLBaseType_Float3 ),
+        Intrinsic( "sincos", HLSLBaseType_Void,  HLSLBaseType_Float4,  HLSLBaseType_Float,  HLSLBaseType_Float4 ),
+        Intrinsic( "sincos", HLSLBaseType_Void,  HLSLBaseType_Half,    HLSLBaseType_Half,   HLSLBaseType_Half ),
+        Intrinsic( "sincos", HLSLBaseType_Void,  HLSLBaseType_Half2,   HLSLBaseType_Half2,  HLSLBaseType_Half2 ),
+        Intrinsic( "sincos", HLSLBaseType_Void,  HLSLBaseType_Half3,   HLSLBaseType_Half3,  HLSLBaseType_Half3 ),
+        Intrinsic( "sincos", HLSLBaseType_Void,  HLSLBaseType_Half4,   HLSLBaseType_Half4,  HLSLBaseType_Half4 ),
+        
+        // why is this only defined for vec
+        Intrinsic( "mad", HLSLBaseType_Float, HLSLBaseType_Float, HLSLBaseType_Float, HLSLBaseType_Float ),
+        Intrinsic( "mad", HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2 ),
+        Intrinsic( "mad", HLSLBaseType_Float3, HLSLBaseType_Float3, HLSLBaseType_Float3, HLSLBaseType_Float3 ),
+        Intrinsic( "mad", HLSLBaseType_Float4, HLSLBaseType_Float4, HLSLBaseType_Float4, HLSLBaseType_Float4 ),
+        Intrinsic( "mad", HLSLBaseType_Half, HLSLBaseType_Half, HLSLBaseType_Half, HLSLBaseType_Half ),
+        Intrinsic( "mad", HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Half2 ),
+        Intrinsic( "mad", HLSLBaseType_Half3, HLSLBaseType_Half3, HLSLBaseType_Half3, HLSLBaseType_Half3 ),
+        Intrinsic( "mad", HLSLBaseType_Half4, HLSLBaseType_Half4, HLSLBaseType_Half4, HLSLBaseType_Half4 ),
+    };
+
+const int _numIntrinsics = sizeof(_intrinsic) / sizeof(Intrinsic);
+
+// The order in this array must match up with HLSLBinaryOp
+const int _binaryOpPriority[] =
+    {
+        2, 1, //  &&, ||
+        8, 8, //  +,  -
+        9, 9, //  *,  /
+        7, 7, //  <,  >,
+        7, 7, //  <=, >=,
+        6, 6, //  ==, !=
+        5, 3, 4, // &, |, ^
+    };
+
+const BaseTypeDescription _baseTypeDescriptions[HLSLBaseType_Count] = 
+    {
+        { "unknown type",       NumericType_NaN,        0, 0, 0, -1 },      // HLSLBaseType_Unknown
+        { "void",               NumericType_NaN,        0, 0, 0, -1 },      // HLSLBaseType_Void
+        { "float",              NumericType_Float,      1, 0, 1,  0 },      // HLSLBaseType_Float
+        { "float2",             NumericType_Float,      2, 1, 1,  0 },      // HLSLBaseType_Float2
+        { "float3",             NumericType_Float,      3, 1, 1,  0 },      // HLSLBaseType_Float3
+        { "float4",             NumericType_Float,      4, 1, 1,  0 },      // HLSLBaseType_Float4
+		{ "float2x2",			NumericType_Float,		2, 2, 2,  0 },		// HLSLBaseType_Float2x2
+        { "float3x3",           NumericType_Float,      3, 2, 3,  0 },      // HLSLBaseType_Float3x3
+        { "float4x4",           NumericType_Float,      4, 2, 4,  0 },      // HLSLBaseType_Float4x4
+        { "float4x3",           NumericType_Float,      4, 2, 3,  0 },      // HLSLBaseType_Float4x3
+        { "float4x2",           NumericType_Float,      4, 2, 2,  0 },      // HLSLBaseType_Float4x2
+
+        { "half",               NumericType_Half,       1, 0, 1,  1 },      // HLSLBaseType_Half
+        { "half2",              NumericType_Half,       2, 1, 1,  1 },      // HLSLBaseType_Half2
+        { "half3",              NumericType_Half,       3, 1, 1,  1 },      // HLSLBaseType_Half3
+        { "half4",              NumericType_Half,       4, 1, 1,  1 },      // HLSLBaseType_Half4
+		{ "half2x2",            NumericType_Float,		2, 2, 2,  0 },		// HLSLBaseType_Half2x2
+        { "half3x3",            NumericType_Half,       3, 2, 3,  1 },      // HLSLBaseType_Half3x3
+        { "half4x4",            NumericType_Half,       4, 2, 4,  1 },      // HLSLBaseType_Half4x4
+        { "half4x3",            NumericType_Half,       4, 2, 3,  1 },      // HLSLBaseType_Half4x3
+        { "half4x2",            NumericType_Half,       4, 2, 2,  1 },      // HLSLBaseType_Half4x2
+
+        { "bool",               NumericType_Bool,       1, 0, 1,  4 },      // HLSLBaseType_Bool
+		{ "bool2",				NumericType_Bool,		2, 1, 1,  4 },      // HLSLBaseType_Bool2
+		{ "bool3",				NumericType_Bool,		3, 1, 1,  4 },      // HLSLBaseType_Bool3
+		{ "bool4",				NumericType_Bool,		4, 1, 1,  4 },      // HLSLBaseType_Bool4
+
+        { "int",                NumericType_Int,        1, 0, 1,  3 },      // HLSLBaseType_Int
+        { "int2",               NumericType_Int,        2, 1, 1,  3 },      // HLSLBaseType_Int2
+        { "int3",               NumericType_Int,        3, 1, 1,  3 },      // HLSLBaseType_Int3
+        { "int4",               NumericType_Int,        4, 1, 1,  3 },      // HLSLBaseType_Int4
+
+        { "uint",               NumericType_Uint,       1, 0, 1,  2 },      // HLSLBaseType_Uint
+        { "uint2",              NumericType_Uint,       2, 1, 1,  2 },      // HLSLBaseType_Uint2
+        { "uint3",              NumericType_Uint,       3, 1, 1,  2 },      // HLSLBaseType_Uint3
+        { "uint4",              NumericType_Uint,       4, 1, 1,  2 },      // HLSLBaseType_Uint4
+
+        { "texture",            NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Texture
+        { "sampler",            NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Sampler
+        { "sampler2D",          NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Sampler2D
+        { "sampler3D",          NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Sampler3D
+        { "samplerCUBE",        NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_SamplerCube
+        { "sampler2DShadow",    NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Sampler2DShadow
+        { "sampler2DMS",        NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Sampler2DMS
+        { "sampler2DArray",     NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Sampler2DArray
+        { "user defined",       NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_UserDefined
+        { "expression",         NumericType_NaN,        1, 0, 0, -1 }       // HLSLBaseType_Expression
+    };
+
+// IC: I'm not sure this table is right, but any errors should be caught by the backend compiler.
+// Also, this is operator dependent. The type resulting from (float4 * float4x4) is not the same as (float4 + float4x4).
+// We should probably distinguish between component-wise operator and only allow same dimensions
+HLSLBaseType _binaryOpTypeLookup[HLSLBaseType_NumericCount][HLSLBaseType_NumericCount] = 
+    {
+        {   // float
+            HLSLBaseType_Float, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float4, HLSLBaseType_Float2x2, HLSLBaseType_Float3x3, HLSLBaseType_Float4x4, HLSLBaseType_Float4x3, HLSLBaseType_Float4x2,
+            HLSLBaseType_Float, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float4, HLSLBaseType_Float2x2, HLSLBaseType_Float3x3, HLSLBaseType_Float4x4, HLSLBaseType_Float4x3, HLSLBaseType_Float4x2,
+            HLSLBaseType_Float, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float4,
+            HLSLBaseType_Float, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float4,
+            HLSLBaseType_Float, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float4
+        },
+        {   // float2
+            HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2,
+            HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2,
+            HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2
+        },
+        {   // float3
+            HLSLBaseType_Float3, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Float3, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Float3, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float3,
+            HLSLBaseType_Float3, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float3,
+            HLSLBaseType_Float3, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float3
+        },
+        {   // float4
+            HLSLBaseType_Float4, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float4, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Float4, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float4, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Float4, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float4,
+            HLSLBaseType_Float4, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float4,
+            HLSLBaseType_Float4, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float4
+        },
+        {   // float2x2
+            HLSLBaseType_Float2x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Float2x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Float2x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Float2x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Float2x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Float2x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Float2x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown
+        },
+        {   // float3x3
+            HLSLBaseType_Float3x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Float3x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Float3x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Float3x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Float3x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Float3x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Float3x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown
+        },
+        {   // float4x4
+            HLSLBaseType_Float4x4, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Float4x4, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Float4x4, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Float4x4, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Float4x4, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Float4x4, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Float4x4, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown
+        },
+        {   // float4x3
+            HLSLBaseType_Float4x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Float4x3, HLSLBaseType_Unknown,
+            HLSLBaseType_Float4x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Float4x3, HLSLBaseType_Unknown,
+            HLSLBaseType_Float4x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Float4x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Float4x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown
+        },
+        {   // float4x2
+            HLSLBaseType_Float4x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Float4x2,
+            HLSLBaseType_Float4x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Float4x2,
+            HLSLBaseType_Float4x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Float4x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Float4x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown
+        },
+        {   // half
+            HLSLBaseType_Float, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float4, HLSLBaseType_Float2x2, HLSLBaseType_Float3x3, HLSLBaseType_Float4x4, HLSLBaseType_Float4x3, HLSLBaseType_Float4x2, 
+            HLSLBaseType_Half, HLSLBaseType_Half2, HLSLBaseType_Half3, HLSLBaseType_Half4, HLSLBaseType_Half2x2, HLSLBaseType_Half3x3, HLSLBaseType_Half4x4, HLSLBaseType_Half4x3, HLSLBaseType_Half4x2, 
+            HLSLBaseType_Half, HLSLBaseType_Half2, HLSLBaseType_Half3, HLSLBaseType_Half4,
+            HLSLBaseType_Half, HLSLBaseType_Half2, HLSLBaseType_Half3, HLSLBaseType_Half4,
+            HLSLBaseType_Half, HLSLBaseType_Half2, HLSLBaseType_Half3, HLSLBaseType_Half4
+        },
+        {   // half2
+            HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Half2,
+            HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Half2,
+            HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Half2
+        },
+        {   // half3
+            HLSLBaseType_Float3, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Half3, HLSLBaseType_Half2, HLSLBaseType_Half3, HLSLBaseType_Half3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Half3, HLSLBaseType_Half2, HLSLBaseType_Half3, HLSLBaseType_Half3,
+            HLSLBaseType_Half3, HLSLBaseType_Half2, HLSLBaseType_Half3, HLSLBaseType_Half3,
+            HLSLBaseType_Half3, HLSLBaseType_Half2, HLSLBaseType_Half3, HLSLBaseType_Half3
+        },
+        {   // half4
+            HLSLBaseType_Float4, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float4, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Half4, HLSLBaseType_Half2, HLSLBaseType_Half3, HLSLBaseType_Half4, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Half4, HLSLBaseType_Half2, HLSLBaseType_Half3, HLSLBaseType_Half4,
+            HLSLBaseType_Half4, HLSLBaseType_Half2, HLSLBaseType_Half3, HLSLBaseType_Half4,
+            HLSLBaseType_Half4, HLSLBaseType_Half2, HLSLBaseType_Half3, HLSLBaseType_Half4
+        },
+        {   // half2x2
+            HLSLBaseType_Float2x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Float2x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Half2x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Half2x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Half2x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Half2x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Half2x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown
+        },
+        {   // half3x3
+            HLSLBaseType_Float3x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Float3x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Half3x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Half3x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Half3x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Half3x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Half3x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown
+        },
+        {   // half4x4
+            HLSLBaseType_Float4x4, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Float4x4, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Half4x4, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Half4x4, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Half4x4, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Half4x4, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Half4x4, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown
+        },
+        {   // float4x3
+            HLSLBaseType_Float4x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Float4x3, HLSLBaseType_Unknown,
+            HLSLBaseType_Half4x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Half4x3, HLSLBaseType_Unknown,
+            HLSLBaseType_Half4x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Half4x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Half4x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown
+        },
+        {   // float4x2
+            HLSLBaseType_Float4x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Float4x2,
+            HLSLBaseType_Half4x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Half4x2,
+            HLSLBaseType_Half4x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Half4x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Half4x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown
+        },
+        {   // bool
+            HLSLBaseType_Float, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float4, HLSLBaseType_Float2x2, HLSLBaseType_Float3x3, HLSLBaseType_Float4x4, HLSLBaseType_Float4x3, HLSLBaseType_Float4x2,
+            HLSLBaseType_Half, HLSLBaseType_Half2, HLSLBaseType_Half3, HLSLBaseType_Half4, HLSLBaseType_Half2x2, HLSLBaseType_Half3x3, HLSLBaseType_Half4x4, HLSLBaseType_Half4x3, HLSLBaseType_Half4x2,
+            HLSLBaseType_Int, HLSLBaseType_Int2, HLSLBaseType_Int3, HLSLBaseType_Int4,
+            HLSLBaseType_Int, HLSLBaseType_Int2, HLSLBaseType_Int3, HLSLBaseType_Int4,
+            HLSLBaseType_Uint, HLSLBaseType_Uint2, HLSLBaseType_Uint3, HLSLBaseType_Uint4
+        },
+        {   // bool2
+            HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float4, HLSLBaseType_Float2x2, HLSLBaseType_Float3x3, HLSLBaseType_Float4x4, HLSLBaseType_Float4x3, HLSLBaseType_Float4x2,
+            HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Half3, HLSLBaseType_Half4, HLSLBaseType_Half2x2, HLSLBaseType_Half3x3, HLSLBaseType_Half4x4, HLSLBaseType_Half4x3, HLSLBaseType_Half4x2,
+            HLSLBaseType_Int2, HLSLBaseType_Int2, HLSLBaseType_Int3, HLSLBaseType_Int4,
+            HLSLBaseType_Int2, HLSLBaseType_Int2, HLSLBaseType_Int3, HLSLBaseType_Int4,
+            HLSLBaseType_Uint2, HLSLBaseType_Uint2, HLSLBaseType_Uint3, HLSLBaseType_Uint4
+        },
+        {   // bool3
+            HLSLBaseType_Float3, HLSLBaseType_Float3, HLSLBaseType_Float3, HLSLBaseType_Float4, HLSLBaseType_Float2x2, HLSLBaseType_Float3x3, HLSLBaseType_Float4x4, HLSLBaseType_Float4x3, HLSLBaseType_Float4x2,
+            HLSLBaseType_Half3, HLSLBaseType_Half3, HLSLBaseType_Half3, HLSLBaseType_Half4, HLSLBaseType_Half2x2, HLSLBaseType_Half3x3, HLSLBaseType_Half4x4, HLSLBaseType_Half4x3, HLSLBaseType_Half4x2,
+            HLSLBaseType_Int3, HLSLBaseType_Int2, HLSLBaseType_Int3, HLSLBaseType_Int4,
+            HLSLBaseType_Int3, HLSLBaseType_Int2, HLSLBaseType_Int3, HLSLBaseType_Int4,
+            HLSLBaseType_Uint3, HLSLBaseType_Uint2, HLSLBaseType_Uint3, HLSLBaseType_Uint4
+        },
+        {   // bool4
+            HLSLBaseType_Float, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float4, HLSLBaseType_Float2x2, HLSLBaseType_Float3x3, HLSLBaseType_Float4x4, HLSLBaseType_Float4x3, HLSLBaseType_Float4x2,
+            HLSLBaseType_Half, HLSLBaseType_Half2, HLSLBaseType_Half3, HLSLBaseType_Half4, HLSLBaseType_Half2x2, HLSLBaseType_Half3x3, HLSLBaseType_Half4x4, HLSLBaseType_Half4x3, HLSLBaseType_Half4x2,
+            HLSLBaseType_Int, HLSLBaseType_Int2, HLSLBaseType_Int3, HLSLBaseType_Int4,
+            HLSLBaseType_Int, HLSLBaseType_Int2, HLSLBaseType_Int3, HLSLBaseType_Int4,
+            HLSLBaseType_Uint, HLSLBaseType_Uint2, HLSLBaseType_Uint3, HLSLBaseType_Uint4
+        },
+        {   // int
+            HLSLBaseType_Float, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float4, HLSLBaseType_Float2x2, HLSLBaseType_Float3x3, HLSLBaseType_Float4x4, HLSLBaseType_Float4x3, HLSLBaseType_Float4x2,
+            HLSLBaseType_Half, HLSLBaseType_Half2, HLSLBaseType_Half3, HLSLBaseType_Half4, HLSLBaseType_Half2x2, HLSLBaseType_Half3x3, HLSLBaseType_Half4x4, HLSLBaseType_Half4x3, HLSLBaseType_Half4x2,
+            HLSLBaseType_Int, HLSLBaseType_Int2, HLSLBaseType_Int2, HLSLBaseType_Int2,
+            HLSLBaseType_Int, HLSLBaseType_Int2, HLSLBaseType_Int3, HLSLBaseType_Int4,
+            HLSLBaseType_Uint, HLSLBaseType_Uint2, HLSLBaseType_Uint3, HLSLBaseType_Uint4
+        },
+        {   // int2
+            HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Int2, HLSLBaseType_Int2, HLSLBaseType_Int2, HLSLBaseType_Int2,
+            HLSLBaseType_Int2, HLSLBaseType_Int2, HLSLBaseType_Int2, HLSLBaseType_Int2,
+            HLSLBaseType_Uint2, HLSLBaseType_Uint2, HLSLBaseType_Uint2, HLSLBaseType_Uint2
+        },
+        {   // int3
+            HLSLBaseType_Float3, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Half3, HLSLBaseType_Half2, HLSLBaseType_Half3, HLSLBaseType_Half3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Int3, HLSLBaseType_Int2, HLSLBaseType_Int3, HLSLBaseType_Int3,
+            HLSLBaseType_Int3, HLSLBaseType_Int2, HLSLBaseType_Int3, HLSLBaseType_Int3,
+            HLSLBaseType_Uint3, HLSLBaseType_Uint2, HLSLBaseType_Uint3, HLSLBaseType_Uint3
+        },
+        {   // int4
+            HLSLBaseType_Float4, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float4, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Half4, HLSLBaseType_Half2, HLSLBaseType_Half3, HLSLBaseType_Half4, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Int4, HLSLBaseType_Int2, HLSLBaseType_Int3, HLSLBaseType_Int4,
+            HLSLBaseType_Int4, HLSLBaseType_Int2, HLSLBaseType_Int3, HLSLBaseType_Int4,
+            HLSLBaseType_Uint4, HLSLBaseType_Uint2, HLSLBaseType_Uint3, HLSLBaseType_Uint4
+        },
+        {   // uint
+            HLSLBaseType_Float, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float4, HLSLBaseType_Float2x2, HLSLBaseType_Float3x3, HLSLBaseType_Float4x4, HLSLBaseType_Float4x3, HLSLBaseType_Float4x2,
+            HLSLBaseType_Half, HLSLBaseType_Half2, HLSLBaseType_Half3, HLSLBaseType_Half4, HLSLBaseType_Half2x2, HLSLBaseType_Half3x3, HLSLBaseType_Half4x4, HLSLBaseType_Half4x3, HLSLBaseType_Half4x2,
+            HLSLBaseType_Uint, HLSLBaseType_Uint2, HLSLBaseType_Uint3, HLSLBaseType_Uint4,
+            HLSLBaseType_Uint, HLSLBaseType_Uint2, HLSLBaseType_Uint3, HLSLBaseType_Uint4,
+            HLSLBaseType_Uint, HLSLBaseType_Uint2, HLSLBaseType_Uint3, HLSLBaseType_Uint4
+        },
+        {   // uint2
+            HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Uint2, HLSLBaseType_Uint2, HLSLBaseType_Uint2, HLSLBaseType_Uint2,
+            HLSLBaseType_Uint2, HLSLBaseType_Uint2, HLSLBaseType_Uint2, HLSLBaseType_Uint2,
+            HLSLBaseType_Uint2, HLSLBaseType_Uint2, HLSLBaseType_Uint2, HLSLBaseType_Uint2
+        },
+        {   // uint3
+            HLSLBaseType_Float3, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Half3, HLSLBaseType_Half2, HLSLBaseType_Half3, HLSLBaseType_Half3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Uint3, HLSLBaseType_Uint2, HLSLBaseType_Uint3, HLSLBaseType_Uint3,
+            HLSLBaseType_Uint3, HLSLBaseType_Uint2, HLSLBaseType_Uint3, HLSLBaseType_Uint3,
+            HLSLBaseType_Uint3, HLSLBaseType_Uint2, HLSLBaseType_Uint3, HLSLBaseType_Uint3
+        },
+        {   // uint4
+            HLSLBaseType_Float4, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float4, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Half4, HLSLBaseType_Half2, HLSLBaseType_Half3, HLSLBaseType_Half4, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
+            HLSLBaseType_Uint4, HLSLBaseType_Uint2, HLSLBaseType_Uint3, HLSLBaseType_Uint4,
+            HLSLBaseType_Uint4, HLSLBaseType_Uint2, HLSLBaseType_Uint3, HLSLBaseType_Uint4,
+            HLSLBaseType_Uint4, HLSLBaseType_Uint2, HLSLBaseType_Uint3, HLSLBaseType_Uint4
+        },
+    };
+
+// Priority of the ? : operator.
+const int _conditionalOpPriority = 1;
+
+static const char* GetTypeName(const HLSLType& type)
+{
+    if (type.baseType == HLSLBaseType_UserDefined)
+    {
+        return type.typeName;
+    }
+    else
+    {
+        return _baseTypeDescriptions[type.baseType].typeName;
+    }
+}
+
+static const char* GetBinaryOpName(HLSLBinaryOp binaryOp)
+{
+    switch (binaryOp)
+    {
+    case HLSLBinaryOp_And:          return "&&";
+    case HLSLBinaryOp_Or:           return "||";
+    case HLSLBinaryOp_Add:          return "+";
+    case HLSLBinaryOp_Sub:          return "-";
+    case HLSLBinaryOp_Mul:          return "*";
+    case HLSLBinaryOp_Div:          return "/";
+    case HLSLBinaryOp_Less:         return "<";
+    case HLSLBinaryOp_Greater:      return ">";
+    case HLSLBinaryOp_LessEqual:    return "<=";
+    case HLSLBinaryOp_GreaterEqual: return ">=";
+    case HLSLBinaryOp_Equal:        return "==";
+    case HLSLBinaryOp_NotEqual:     return "!=";
+    case HLSLBinaryOp_BitAnd:       return "&";
+    case HLSLBinaryOp_BitOr:        return "|";
+    case HLSLBinaryOp_BitXor:       return "^";
+    case HLSLBinaryOp_Assign:       return "=";
+    case HLSLBinaryOp_AddAssign:    return "+=";
+    case HLSLBinaryOp_SubAssign:    return "-=";
+    case HLSLBinaryOp_MulAssign:    return "*=";
+    case HLSLBinaryOp_DivAssign:    return "/=";
+    default:
+        ASSERT(0);
+        return "???";
+    }
+}
+
+
+/*
+ * 1.) Match
+ * 2.) Scalar dimension promotion (scalar -> vector/matrix)
+ * 3.) Conversion
+ * 4.) Conversion + scalar dimension promotion
+ * 5.) Truncation (vector -> scalar or lower component vector, matrix -> scalar or lower component matrix)
+ * 6.) Conversion + truncation
+ */    
+static int GetTypeCastRank(HLSLTree * tree, const HLSLType& srcType, const HLSLType& dstType)
+{
+    /*if (srcType.array != dstType.array || srcType.arraySize != dstType.arraySize)
+    {
+        return -1;
+    }*/
+
+    if (srcType.array != dstType.array)
+    {
+        return -1;
+    }
+
+    if (srcType.array == true)
+    {
+        ASSERT(dstType.array == true);
+        int srcArraySize = -1;
+        int dstArraySize = -1;
+
+        tree->GetExpressionValue(srcType.arraySize, srcArraySize);
+        tree->GetExpressionValue(dstType.arraySize, dstArraySize);
+
+        if (srcArraySize != dstArraySize) {
+            return -1;
+        }
+    }
+
+    if (srcType.baseType == HLSLBaseType_UserDefined && dstType.baseType == HLSLBaseType_UserDefined)
+    {
+        return strcmp(srcType.typeName, dstType.typeName) == 0 ? 0 : -1;
+    }
+
+    if (srcType.baseType == dstType.baseType)
+    {
+        if (IsSamplerType(srcType.baseType))
+        {
+            return srcType.samplerType == dstType.samplerType ? 0 : -1;
+        }
+        
+        return 0;
+    }
+
+    const BaseTypeDescription& srcDesc = _baseTypeDescriptions[srcType.baseType];
+    const BaseTypeDescription& dstDesc = _baseTypeDescriptions[dstType.baseType];
+    if (srcDesc.numericType == NumericType_NaN || dstDesc.numericType == NumericType_NaN)
+    {
+        return -1;
+    }
+
+    // Result bits: T R R R P (T = truncation, R = conversion rank, P = dimension promotion)
+    int result = _numberTypeRank[srcDesc.numericType][dstDesc.numericType] << 1;
+
+    if (srcDesc.numDimensions == 0 && dstDesc.numDimensions > 0)
+    {
+        // Scalar dimension promotion
+        result |= (1 << 0);
+    }
+    else if ((srcDesc.numDimensions == dstDesc.numDimensions && (srcDesc.numComponents > dstDesc.numComponents || srcDesc.height > dstDesc.height)) ||
+             (srcDesc.numDimensions > 0 && dstDesc.numDimensions == 0))
+    {
+        // Truncation
+        result |= (1 << 4);
+    }
+    else if (srcDesc.numDimensions != dstDesc.numDimensions ||
+             srcDesc.numComponents != dstDesc.numComponents ||
+             srcDesc.height != dstDesc.height)
+    {
+        // Can't convert
+        return -1;
+    }
+    
+    return result;
+    
+}
+
+static bool GetFunctionCallCastRanks(HLSLTree* tree, const HLSLFunctionCall* call, const HLSLFunction* function, int* rankBuffer)
+{
+
+    if (function == NULL || function->numArguments < call->numArguments)
+    {
+        // Function not viable
+        return false;
+    }
+
+    const HLSLExpression* expression = call->argument;
+    const HLSLArgument* argument = function->argument;
+   
+    for (int i = 0; i < call->numArguments; ++i)
+    {
+        int rank = GetTypeCastRank(tree, expression->expressionType, argument->type);
+        if (rank == -1)
+        {
+            return false;
+        }
+
+        rankBuffer[i] = rank;
+        
+        argument = argument->nextArgument;
+        expression = expression->nextExpression;
+    }
+
+    for (int i = call->numArguments; i < function->numArguments; ++i)
+    {
+        if (argument->defaultValue == NULL)
+        {
+            // Function not viable.
+            return false;
+        }
+    }
+
+    return true;
+
+}
+
+struct CompareRanks
+{
+    bool operator() (const int& rank1, const int& rank2) { return rank1 > rank2; }
+};
+
+static CompareFunctionsResult CompareFunctions(HLSLTree* tree, const HLSLFunctionCall* call, const HLSLFunction* function1, const HLSLFunction* function2)
+{ 
+
+    int* function1Ranks = static_cast<int*>(alloca(sizeof(int) * call->numArguments));
+    int* function2Ranks = static_cast<int*>(alloca(sizeof(int) * call->numArguments));
+
+    const bool function1Viable = GetFunctionCallCastRanks(tree, call, function1, function1Ranks);
+    const bool function2Viable = GetFunctionCallCastRanks(tree, call, function2, function2Ranks);
+
+    // Both functions have to be viable to be able to compare them
+    if (!(function1Viable && function2Viable))
+    {
+        if (function1Viable)
+        {
+            return Function1Better;
+        }
+        else if (function2Viable)
+        {
+            return Function2Better;
+        }
+        else
+        {
+            return FunctionsEqual;
+        }
+    }
+
+    std::sort(function1Ranks, function1Ranks + call->numArguments, CompareRanks());
+    std::sort(function2Ranks, function2Ranks + call->numArguments, CompareRanks());
+    
+    for (int i = 0; i < call->numArguments; ++i)
+    {
+        if (function1Ranks[i] < function2Ranks[i])
+        {
+            return Function1Better;
+        }
+        else if (function2Ranks[i] < function1Ranks[i])
+        {
+            return Function2Better;
+        }
+    }
+
+    return FunctionsEqual;
+
+}
+
+static bool GetBinaryOpResultType(HLSLBinaryOp binaryOp, const HLSLType& type1, const HLSLType& type2, HLSLType& result)
+{
+
+    if (type1.baseType < HLSLBaseType_FirstNumeric || type1.baseType > HLSLBaseType_LastNumeric || type1.array ||
+        type2.baseType < HLSLBaseType_FirstNumeric || type2.baseType > HLSLBaseType_LastNumeric || type2.array)
+    {
+         return false;
+    }
+
+    if (binaryOp == HLSLBinaryOp_BitAnd || binaryOp == HLSLBinaryOp_BitOr || binaryOp == HLSLBinaryOp_BitXor)
+    {
+        if (type1.baseType < HLSLBaseType_FirstInteger || type1.baseType > HLSLBaseType_LastInteger)
+        {
+            return false;
+        }
+    }
+
+    switch (binaryOp)
+    {
+    case HLSLBinaryOp_And:
+    case HLSLBinaryOp_Or:
+    case HLSLBinaryOp_Less:
+    case HLSLBinaryOp_Greater:
+    case HLSLBinaryOp_LessEqual:
+    case HLSLBinaryOp_GreaterEqual:
+    case HLSLBinaryOp_Equal:
+	case HLSLBinaryOp_NotEqual:
+		{
+			int numComponents = std::max( _baseTypeDescriptions[ type1.baseType ].numComponents, _baseTypeDescriptions[ type2.baseType ].numComponents );
+			result.baseType = HLSLBaseType( HLSLBaseType_Bool + numComponents - 1 );
+			break;
+		}
+    default:
+        result.baseType = _binaryOpTypeLookup[type1.baseType - HLSLBaseType_FirstNumeric][type2.baseType - HLSLBaseType_FirstNumeric];
+        break;
+    }
+
+    result.typeName     = NULL;
+    result.array        = false;
+    result.arraySize    = NULL;
+    result.flags        = (type1.flags & type2.flags) & HLSLTypeFlag_Const; // Propagate constness.
+    
+    return result.baseType != HLSLBaseType_Unknown;
+
+}
+
+HLSLParser::HLSLParser(Allocator* allocator, const char* fileName, const char* buffer, size_t length) : 
+    m_tokenizer(fileName, buffer, length),
+    m_userTypes(allocator),
+    m_variables(allocator),
+    m_functions(allocator)
+{
+    m_numGlobals = 0;
+    m_tree = NULL;
+}
+
+bool HLSLParser::Accept(int token)
+{
+    if (m_tokenizer.GetToken() == token)
+    {
+       m_tokenizer.Next();
+       return true;
+    }
+    return false;
+}
+
+bool HLSLParser::Accept(const char* token)
+{
+    if (m_tokenizer.GetToken() == HLSLToken_Identifier && String_Equal( token, m_tokenizer.GetIdentifier() ) )
+    {
+        m_tokenizer.Next();
+        return true;
+    }
+    return false;
+}
+
+bool HLSLParser::Expect(int token)
+{
+    if (!Accept(token))
+    {
+        char want[HLSLTokenizer::s_maxIdentifier];
+        m_tokenizer.GetTokenName(token, want);
+        char near[HLSLTokenizer::s_maxIdentifier];
+        m_tokenizer.GetTokenName(near);
+        m_tokenizer.Error("Syntax error: expected '%s' near '%s'", want, near);
+        return false;
+    }
+    return true;
+}
+
+bool HLSLParser::Expect(const char * token)
+{
+    if (!Accept(token))
+    {
+        const char * want = token;
+        char near[HLSLTokenizer::s_maxIdentifier];
+        m_tokenizer.GetTokenName(near);
+        m_tokenizer.Error("Syntax error: expected '%s' near '%s'", want, near);
+        return false;
+    }
+    return true;
+}
+
+
+bool HLSLParser::AcceptIdentifier(const char*& identifier)
+{
+    if (m_tokenizer.GetToken() == HLSLToken_Identifier)
+    {
+        identifier = m_tree->AddString( m_tokenizer.GetIdentifier() );
+        m_tokenizer.Next();
+        return true;
+    }
+    return false;
+}
+
+bool HLSLParser::ExpectIdentifier(const char*& identifier)
+{
+    if (!AcceptIdentifier(identifier))
+    {
+        char near[HLSLTokenizer::s_maxIdentifier];
+        m_tokenizer.GetTokenName(near);
+        m_tokenizer.Error("Syntax error: expected identifier near '%s'", near);
+        identifier = "";
+        return false;
+    }
+    return true;
+}
+
+bool HLSLParser::AcceptFloat(float& value)
+{
+    if (m_tokenizer.GetToken() == HLSLToken_FloatLiteral)
+    {
+        value = m_tokenizer.GetFloat();
+        m_tokenizer.Next();
+        return true;
+    }
+    return false;
+}
+
+bool HLSLParser::AcceptHalf( float& value )
+{
+	if( m_tokenizer.GetToken() == HLSLToken_HalfLiteral )
+	{
+		value = m_tokenizer.GetFloat();
+		m_tokenizer.Next();
+		return true;
+	}
+	return false;
+}
+
+bool HLSLParser::AcceptInt(int& value)
+{
+    if (m_tokenizer.GetToken() == HLSLToken_IntLiteral)
+    {
+        value = m_tokenizer.GetInt();
+        m_tokenizer.Next();
+        return true;
+    }
+    return false;
+}
+
+bool HLSLParser::ParseTopLevel(HLSLStatement*& statement)
+{
+    HLSLAttribute * attributes = NULL;
+    ParseAttributeBlock(attributes);
+
+    int line             = GetLineNumber();
+    const char* fileName = GetFileName();
+    
+    HLSLType type;
+    //HLSLBaseType type;
+    //const char*  typeName = NULL;
+    //int          typeFlags = false;
+
+    bool doesNotExpectSemicolon = false;
+
+    if (Accept(HLSLToken_Struct))
+    {
+        // Struct declaration.
+
+        const char* structName = NULL;
+        if (!ExpectIdentifier(structName))
+        {
+            return false;
+        }
+        if (FindUserDefinedType(structName) != NULL)
+        {
+            m_tokenizer.Error("struct %s already defined", structName);
+            return false;
+        }
+
+        if (!Expect('{'))
+        {
+            return false;
+        }
+
+        HLSLStruct* structure = m_tree->AddNode<HLSLStruct>(fileName, line);
+        structure->name = structName;
+
+        m_userTypes.PushBack(structure);
+ 
+        HLSLStructField* lastField = NULL;
+
+        // Add the struct to our list of user defined types.
+        while (!Accept('}'))
+        {
+            if (CheckForUnexpectedEndOfStream('}'))
+            {
+                return false;
+            }
+            HLSLStructField* field = NULL;
+            if (!ParseFieldDeclaration(field))
+            {
+                return false;
+            }
+            ASSERT(field != NULL);
+            if (lastField == NULL)
+            {
+                structure->field = field;
+            }
+            else
+            {
+                lastField->nextField = field;
+            }
+            lastField = field;
+        }
+
+        statement = structure;
+    }
+    else if (Accept(HLSLToken_CBuffer) || Accept(HLSLToken_TBuffer))
+    {
+        // cbuffer/tbuffer declaration.
+
+        HLSLBuffer* buffer = m_tree->AddNode<HLSLBuffer>(fileName, line);
+        AcceptIdentifier(buffer->name);
+
+        // Optional register assignment.
+        if (Accept(':'))
+        {
+            if (!Expect(HLSLToken_Register) || !Expect('(') || !ExpectIdentifier(buffer->registerName) || !Expect(')'))
+            {
+                return false;
+            }
+            // TODO: Check that we aren't re-using a register.
+        }
+
+        // Fields.
+        if (!Expect('{'))
+        {
+            return false;
+        }
+        HLSLDeclaration* lastField = NULL;
+        while (!Accept('}'))
+        {
+            if (CheckForUnexpectedEndOfStream('}'))
+            {
+                return false;
+            }
+            HLSLDeclaration* field = NULL;
+            if (!ParseDeclaration(field))
+            {
+                m_tokenizer.Error("Expected variable declaration");
+                return false;
+            }
+            DeclareVariable( field->name, field->type );
+            field->buffer = buffer;
+            if (buffer->field == NULL)
+            {
+                buffer->field = field;
+            }
+            else
+            {
+                lastField->nextStatement = field;
+            }
+            lastField = field;
+
+            if (!Expect(';')) {
+                return false;
+            }
+        }
+
+        statement = buffer;
+    }
+    else if (AcceptType(true, type))
+    {
+        // Global declaration (uniform or function).
+        const char* globalName = NULL;
+        if (!ExpectIdentifier(globalName))
+        {
+            return false;
+        }
+
+        if (Accept('('))
+        {
+            // Function declaration.
+
+            HLSLFunction* function = m_tree->AddNode<HLSLFunction>(fileName, line);
+            function->name                  = globalName;
+            function->returnType.baseType   = type.baseType;
+            function->returnType.typeName   = type.typeName;
+            function->attributes            = attributes;
+
+            BeginScope();
+
+            if (!ParseArgumentList(function->argument, function->numArguments, function->numOutputArguments))
+            {
+                return false;
+            }
+
+            const HLSLFunction* declaration = FindFunction(function);
+
+            // Forward declaration
+            if (Accept(';'))
+            {
+                // Add a function entry so that calls can refer to it
+                if (!declaration)
+                {
+                    m_functions.PushBack( function );
+                    statement = function;
+                }
+                EndScope();
+                return true;
+            }
+
+            // Optional semantic.
+            if (Accept(':') && !ExpectIdentifier(function->semantic))
+            {
+                return false;
+            }
+
+            if (declaration)
+            {
+                if (declaration->forward || declaration->statement)
+                {
+                    m_tokenizer.Error("Duplicate function definition");
+                    return false;
+                }
+
+                const_cast<HLSLFunction*>(declaration)->forward = function;
+            }
+            else
+            {
+                m_functions.PushBack( function );
+            }
+
+            if (!Expect('{') || !ParseBlock(function->statement, function->returnType))
+            {
+                return false;
+            }
+
+            EndScope();
+
+            // Note, no semi-colon at the end of a function declaration.
+            statement = function;
+            
+            return true;
+        }
+        else
+        {
+            // Uniform declaration.
+            HLSLDeclaration* declaration = m_tree->AddNode<HLSLDeclaration>(fileName, line);
+            declaration->name            = globalName;
+            declaration->type            = type;
+
+            // Handle array syntax.
+            if (Accept('['))
+            {
+                if (!Accept(']'))
+                {
+                    if (!ParseExpression(declaration->type.arraySize) || !Expect(']'))
+                    {
+                        return false;
+                    }
+                }
+                declaration->type.array = true;
+            }
+
+            // Handle optional register.
+            if (Accept(':'))
+            {
+                // @@ Currently we support either a semantic or a register, but not both.
+                if (AcceptIdentifier(declaration->semantic)) {
+                    // int k = 1;
+                }
+                else if (!Expect(HLSLToken_Register) || !Expect('(') || !ExpectIdentifier(declaration->registerName) || !Expect(')'))
+                {
+                    return false;
+                }
+            }
+
+            DeclareVariable( globalName, declaration->type );
+
+            if (!ParseDeclarationAssignment(declaration))
+            {
+                return false;
+            }
+
+            // TODO: Multiple variables declared on one line.
+            
+            statement = declaration;
+        }
+    }
+    else if (ParseTechnique(statement)) {
+        doesNotExpectSemicolon = true;
+    }
+    else if (ParsePipeline(statement)) {
+        doesNotExpectSemicolon = true;
+    }
+    else if (ParseStage(statement)) {
+        doesNotExpectSemicolon = true;
+    }
+    else if (ParseComment(statement)) {
+        doesNotExpectSemicolon = true;
+    }
+
+    if (statement != NULL) {
+        statement->attributes = attributes;
+    }
+
+    return doesNotExpectSemicolon || Expect(';');
+}
+
+bool HLSLParser::ParseStatementOrBlock(HLSLStatement*& firstStatement, const HLSLType& returnType, bool scoped/*=true*/)
+{
+    if (scoped)
+    {
+        BeginScope();
+    }
+    if (Accept('{'))
+    {
+        if (!ParseBlock(firstStatement, returnType))
+        {
+            return false;
+        }
+    }
+    else
+    {
+        if (!ParseStatement(firstStatement, returnType))
+        {
+            return false;
+        }
+    }
+    if (scoped)
+    {
+        EndScope();
+    }
+    return true;
+}
+
+bool HLSLParser::ParseBlock(HLSLStatement*& firstStatement, const HLSLType& returnType)
+{
+    HLSLStatement* lastStatement = NULL;
+    while (!Accept('}'))
+    {
+        if (CheckForUnexpectedEndOfStream('}'))
+        {
+            return false;
+        }
+        HLSLStatement* statement = NULL;
+        if (!ParseStatement(statement, returnType))
+        {
+            return false;
+        }
+        if (statement != NULL)
+        {
+            if (firstStatement == NULL)
+            {
+                firstStatement = statement;
+            }
+            else
+            {
+                lastStatement->nextStatement = statement;
+            }
+            lastStatement = statement;
+            while (lastStatement->nextStatement) lastStatement = lastStatement->nextStatement;
+        }
+    }
+    return true;
+}
+
+bool HLSLParser::ParseStatement(HLSLStatement*& statement, const HLSLType& returnType)
+{
+    const char* fileName = GetFileName();
+    int         line     = GetLineNumber();
+
+    // Empty statements.
+    if (Accept(';'))
+    {
+        return true;
+    }
+
+    HLSLAttribute * attributes = NULL;
+    ParseAttributeBlock(attributes);    // @@ Leak if not assigned to node? 
+
+#if 0 // @@ Work in progress.
+    // Static statements: @if only for now.
+    if (Accept('@'))
+    {
+        if (Accept(HLSLToken_If))
+        {
+            //HLSLIfStatement* ifStatement = m_tree->AddNode<HLSLIfStatement>(fileName, line);
+            //ifStatement->isStatic = true;
+            //ifStatement->attributes = attributes;
+            
+            HLSLExpression * condition = NULL;
+            
+            m_allowUndeclaredIdentifiers = true;    // Not really correct... better to push to stack?
+            if (!Expect('(') || !ParseExpression(condition) || !Expect(')'))
+            {
+                m_allowUndeclaredIdentifiers = false;
+                return false;
+            }
+            m_allowUndeclaredIdentifiers = false;
+            
+            if ((condition->expressionType.flags & HLSLTypeFlag_Const) == 0)
+            {
+                m_tokenizer.Error("Syntax error: @if condition is not constant");
+                return false;
+            }
+            
+            int conditionValue;
+            if (!m_tree->GetExpressionValue(condition, conditionValue))
+            {
+                m_tokenizer.Error("Syntax error: Cannot evaluate @if condition");
+                return false;
+            }
+            
+            if (!conditionValue) m_disableSemanticValidation = true;
+            
+            HLSLStatement * ifStatements = NULL;
+            HLSLStatement * elseStatements = NULL;
+            
+            if (!ParseStatementOrBlock(ifStatements, returnType, /*scoped=*/false))
+            {
+                m_disableSemanticValidation = false;
+                return false;
+            }
+            if (Accept(HLSLToken_Else))
+            {
+                if (conditionValue) m_disableSemanticValidation = true;
+                
+                if (!ParseStatementOrBlock(elseStatements, returnType, /*scoped=*/false))
+                {
+                    m_disableSemanticValidation = false;
+                    return false;
+                }
+            }
+            m_disableSemanticValidation = false;
+            
+            if (conditionValue) statement = ifStatements;
+            else statement = elseStatements;
+            
+            // @@ Free the pruned statements?
+            
+            return true;
+        }
+        else {
+            m_tokenizer.Error("Syntax error: unexpected token '@'");
+        }
+    }
+#endif
+    
+    // If statement.
+    if (Accept(HLSLToken_If))
+    {
+        HLSLIfStatement* ifStatement = m_tree->AddNode<HLSLIfStatement>(fileName, line);
+        ifStatement->attributes = attributes;
+        if (!Expect('(') || !ParseExpression(ifStatement->condition) || !Expect(')'))
+        {
+            return false;
+        }
+        statement = ifStatement;
+        if (!ParseStatementOrBlock(ifStatement->statement, returnType))
+        {
+            return false;
+        }
+        if (Accept(HLSLToken_Else))
+        {
+            return ParseStatementOrBlock(ifStatement->elseStatement, returnType);
+        }
+        return true;
+    }
+    
+    // For statement.
+    if (Accept(HLSLToken_For))
+    {
+        HLSLForStatement* forStatement = m_tree->AddNode<HLSLForStatement>(fileName, line);
+        forStatement->attributes = attributes;
+        if (!Expect('('))
+        {
+            return false;
+        }
+        BeginScope();
+        if (!ParseDeclaration(forStatement->initialization))
+        {
+            return false;
+        }
+        if (!Expect(';'))
+        {
+            return false;
+        }
+        ParseExpression(forStatement->condition);
+        if (!Expect(';'))
+        {
+            return false;
+        }
+        ParseExpression(forStatement->increment);
+        if (!Expect(')'))
+        {
+            return false;
+        }
+        statement = forStatement;
+        if (!ParseStatementOrBlock(forStatement->statement, returnType))
+        {
+            return false;
+        }
+        EndScope();
+        return true;
+    }
+
+    if (attributes != NULL)
+    {
+        // @@ Error. Unexpected attribute. We only support attributes associated to if and for statements.
+    }
+
+    // Block statement.
+    if (Accept('{'))
+    {
+        HLSLBlockStatement* blockStatement = m_tree->AddNode<HLSLBlockStatement>(fileName, line);
+        statement = blockStatement;
+        BeginScope();
+        bool success = ParseBlock(blockStatement->statement, returnType);
+        EndScope();
+        return success;
+    }
+
+    // Discard statement.
+    if (Accept(HLSLToken_Discard))
+    {
+        HLSLDiscardStatement* discardStatement = m_tree->AddNode<HLSLDiscardStatement>(fileName, line);
+        statement = discardStatement;
+        return Expect(';');
+    }
+
+    // Break statement.
+    if (Accept(HLSLToken_Break))
+    {
+        HLSLBreakStatement* breakStatement = m_tree->AddNode<HLSLBreakStatement>(fileName, line);
+        statement = breakStatement;
+        return Expect(';');
+    }
+
+    // Continue statement.
+    if (Accept(HLSLToken_Continue))
+    {
+        HLSLContinueStatement* continueStatement = m_tree->AddNode<HLSLContinueStatement>(fileName, line);
+        statement = continueStatement;
+        return Expect(';');
+    }
+
+    // Return statement
+    if (Accept(HLSLToken_Return))
+    {
+        HLSLReturnStatement* returnStatement = m_tree->AddNode<HLSLReturnStatement>(fileName, line);
+        if (!Accept(';') && !ParseExpression(returnStatement->expression))
+        {
+            return false;
+        }
+        // Check that the return expression can be cast to the return type of the function.
+        HLSLType voidType(HLSLBaseType_Void);
+        if (!CheckTypeCast(returnStatement->expression ? returnStatement->expression->expressionType : voidType, returnType))
+        {
+            return false;
+        }
+
+        statement = returnStatement;
+        return Expect(';');
+    }
+
+    HLSLDeclaration* declaration = NULL;
+    HLSLExpression*  expression  = NULL;
+
+    if (ParseDeclaration(declaration))
+    {
+        statement = declaration;
+    }
+    else if (ParseExpression(expression))
+    {
+        HLSLExpressionStatement* expressionStatement;
+        expressionStatement = m_tree->AddNode<HLSLExpressionStatement>(fileName, line);
+        expressionStatement->expression = expression;
+        statement = expressionStatement;
+    }
+
+    return Expect(';');
+}
+
+
+// IC: This is only used in block statements, or within control flow statements. So, it doesn't support semantics or layout modifiers.
+// @@ We should add suport for semantics for inline input/output declarations.
+bool HLSLParser::ParseDeclaration(HLSLDeclaration*& declaration)
+{
+    const char* fileName    = GetFileName();
+    int         line        = GetLineNumber();
+
+    HLSLType type;
+    if (!AcceptType(/*allowVoid=*/false, type))
+    {
+        return false;
+    }
+
+    bool allowUnsizedArray = true;  // @@ Really?
+
+    HLSLDeclaration * firstDeclaration = NULL;
+    HLSLDeclaration * lastDeclaration = NULL;
+
+    do {
+        const char* name;
+        if (!ExpectIdentifier(name))
+        {
+            // TODO: false means we didn't accept a declaration and we had an error!
+            return false;
+        }
+        // Handle array syntax.
+        if (Accept('['))
+        {
+            type.array = true;
+            // Optionally allow no size to the specified for the array.
+            if (Accept(']') && allowUnsizedArray)
+            {
+                return true;
+            }
+            if (!ParseExpression(type.arraySize) || !Expect(']'))
+            {
+                return false;
+            }
+        }
+
+        HLSLDeclaration * declaration = m_tree->AddNode<HLSLDeclaration>(fileName, line);
+        declaration->type  = type;
+        declaration->name  = name;
+
+        DeclareVariable( declaration->name, declaration->type );
+
+        // Handle option assignment of the declared variables(s).
+        if (!ParseDeclarationAssignment( declaration )) {
+            return false;
+        }
+
+        if (firstDeclaration == NULL) firstDeclaration = declaration;
+        if (lastDeclaration != NULL) lastDeclaration->nextDeclaration = declaration;
+        lastDeclaration = declaration;
+
+    } while(Accept(','));
+
+    declaration = firstDeclaration;
+
+    return true;
+}
+
+bool HLSLParser::ParseDeclarationAssignment(HLSLDeclaration* declaration)
+{
+    if (Accept('='))
+    {
+        // Handle array initialization syntax.
+        if (declaration->type.array)
+        {
+            int numValues = 0;
+            if (!Expect('{') || !ParseExpressionList('}', true, declaration->assignment, numValues))
+            {
+                return false;
+            }
+        }
+        else if (IsSamplerType(declaration->type.baseType))
+        {
+            if (!ParseSamplerState(declaration->assignment))
+            {
+                return false;
+            }
+        }
+        else if (!ParseExpression(declaration->assignment))
+        {
+            return false;
+        }
+    }
+    return true;
+}
+
+bool HLSLParser::ParseFieldDeclaration(HLSLStructField*& field)
+{
+    field = m_tree->AddNode<HLSLStructField>( GetFileName(), GetLineNumber() );
+    if (!ExpectDeclaration(false, field->type, field->name))
+    {
+        return false;
+    }
+    // Handle optional semantics.
+    if (Accept(':'))
+    {
+        if (!ExpectIdentifier(field->semantic))
+        {
+            return false;
+        }
+    }
+    return Expect(';');
+}
+
+// @@ Add support for packoffset to general declarations.
+/*bool HLSLParser::ParseBufferFieldDeclaration(HLSLBufferField*& field)
+{
+    field = m_tree->AddNode<HLSLBufferField>( GetFileName(), GetLineNumber() );
+    if (AcceptDeclaration(false, field->type, field->name))
+    {
+        // Handle optional packoffset.
+        if (Accept(':'))
+        {
+            if (!Expect("packoffset"))
+            {
+                return false;
+            }
+            const char* constantName = NULL;
+            const char* swizzleMask  = NULL;
+            if (!Expect('(') || !ExpectIdentifier(constantName) || !Expect('.') || !ExpectIdentifier(swizzleMask) || !Expect(')'))
+            {
+                return false;
+            }
+        }
+        return Expect(';');
+    }
+    return false;
+}*/
+
+bool HLSLParser::CheckTypeCast(const HLSLType& srcType, const HLSLType& dstType)
+{
+    if (GetTypeCastRank(m_tree, srcType, dstType) == -1)
+    {
+        const char* srcTypeName = GetTypeName(srcType);
+        const char* dstTypeName = GetTypeName(dstType);
+        m_tokenizer.Error("Cannot implicitly convert from '%s' to '%s'", srcTypeName, dstTypeName);
+        return false;
+    }
+    return true;
+}
+
+bool HLSLParser::ParseExpression(HLSLExpression*& expression)
+{
+    if (!ParseBinaryExpression(0, expression))
+    {
+        return false;
+    }
+
+    HLSLBinaryOp assignOp;
+    if (AcceptAssign(assignOp))
+    {
+        HLSLExpression* expression2 = NULL;
+        if (!ParseExpression(expression2))
+        {
+            return false;
+        }
+        HLSLBinaryExpression* binaryExpression = m_tree->AddNode<HLSLBinaryExpression>(expression->fileName, expression->line);
+        binaryExpression->binaryOp = assignOp;
+        binaryExpression->expression1 = expression;
+        binaryExpression->expression2 = expression2;
+        // This type is not strictly correct, since the type should be a reference.
+        // However, for our usage of the types it should be sufficient.
+        binaryExpression->expressionType = expression->expressionType;
+
+        if (!CheckTypeCast(expression2->expressionType, expression->expressionType))
+        {
+            const char* srcTypeName = GetTypeName(expression2->expressionType);
+            const char* dstTypeName = GetTypeName(expression->expressionType);
+            m_tokenizer.Error("Cannot implicitly convert from '%s' to '%s'", srcTypeName, dstTypeName);
+            return false;
+        }
+
+        expression = binaryExpression;
+    }
+
+    return true;
+}
+
+bool HLSLParser::AcceptBinaryOperator(int priority, HLSLBinaryOp& binaryOp)
+{
+    int token = m_tokenizer.GetToken();
+    switch (token)
+    {
+    case HLSLToken_LogicalAnd:      binaryOp = HLSLBinaryOp_And;          break;
+    case HLSLToken_LogicalOr:       binaryOp = HLSLBinaryOp_Or;           break;
+    case '+':                       binaryOp = HLSLBinaryOp_Add;          break;
+    case '-':                       binaryOp = HLSLBinaryOp_Sub;          break;
+    case '*':                       binaryOp = HLSLBinaryOp_Mul;          break;
+    case '/':                       binaryOp = HLSLBinaryOp_Div;          break;
+    case '<':                       binaryOp = HLSLBinaryOp_Less;         break;
+    case '>':                       binaryOp = HLSLBinaryOp_Greater;      break;
+    case HLSLToken_LessEqual:       binaryOp = HLSLBinaryOp_LessEqual;    break;
+    case HLSLToken_GreaterEqual:    binaryOp = HLSLBinaryOp_GreaterEqual; break;
+    case HLSLToken_EqualEqual:      binaryOp = HLSLBinaryOp_Equal;        break;
+    case HLSLToken_NotEqual:        binaryOp = HLSLBinaryOp_NotEqual;     break;
+    case '&':                       binaryOp = HLSLBinaryOp_BitAnd;       break;
+    case '|':                       binaryOp = HLSLBinaryOp_BitOr;        break;
+    case '^':                       binaryOp = HLSLBinaryOp_BitXor;       break;
+    default:
+        return false;
+    }
+    if (_binaryOpPriority[binaryOp] > priority)
+    {
+        m_tokenizer.Next();
+        return true;
+    }
+    return false;
+}
+
+bool HLSLParser::AcceptUnaryOperator(bool pre, HLSLUnaryOp& unaryOp)
+{
+    int token = m_tokenizer.GetToken();
+    if (token == HLSLToken_PlusPlus)
+    {
+        unaryOp = pre ? HLSLUnaryOp_PreIncrement : HLSLUnaryOp_PostIncrement;
+    }
+    else if (token == HLSLToken_MinusMinus)
+    {
+        unaryOp = pre ? HLSLUnaryOp_PreDecrement : HLSLUnaryOp_PostDecrement;
+    }
+    else if (pre && token == '-')
+    {
+        unaryOp = HLSLUnaryOp_Negative;
+    }
+    else if (pre && token == '+')
+    {
+        unaryOp = HLSLUnaryOp_Positive;
+    }
+    else if (pre && token == '!')
+    {
+        unaryOp = HLSLUnaryOp_Not;
+    }
+    else if (pre && token == '~')
+    {
+        unaryOp = HLSLUnaryOp_Not;
+    }
+    else
+    {
+        return false;
+    }
+    m_tokenizer.Next();
+    return true;
+}
+
+bool HLSLParser::AcceptAssign(HLSLBinaryOp& binaryOp)
+{
+    if (Accept('='))
+    {
+        binaryOp = HLSLBinaryOp_Assign;
+    }
+    else if (Accept(HLSLToken_PlusEqual))
+    {
+        binaryOp = HLSLBinaryOp_AddAssign;
+    }
+    else if (Accept(HLSLToken_MinusEqual))
+    {
+        binaryOp = HLSLBinaryOp_SubAssign;
+    }     
+    else if (Accept(HLSLToken_TimesEqual))
+    {
+        binaryOp = HLSLBinaryOp_MulAssign;
+    }     
+    else if (Accept(HLSLToken_DivideEqual))
+    {
+        binaryOp = HLSLBinaryOp_DivAssign;
+    }     
+    else
+    {
+        return false;
+    }
+    return true;
+}
+
+bool HLSLParser::ParseBinaryExpression(int priority, HLSLExpression*& expression)
+{
+    const char* fileName = GetFileName();
+    int         line     = GetLineNumber();
+
+    bool needsEndParen;
+
+    if (!ParseTerminalExpression(expression, needsEndParen))
+    {
+        return false;
+    }
+
+	// reset priority cause openned parenthesis
+	if( needsEndParen )
+		priority = 0;
+
+    while (1)
+    {
+        HLSLBinaryOp binaryOp;
+        if (AcceptBinaryOperator(priority, binaryOp))
+        {
+
+            HLSLExpression* expression2 = NULL;
+            ASSERT( binaryOp < sizeof(_binaryOpPriority) / sizeof(int) );
+            if (!ParseBinaryExpression(_binaryOpPriority[binaryOp], expression2))
+            {
+                return false;
+            }
+            HLSLBinaryExpression* binaryExpression = m_tree->AddNode<HLSLBinaryExpression>(fileName, line);
+            binaryExpression->binaryOp    = binaryOp;
+            binaryExpression->expression1 = expression;
+            binaryExpression->expression2 = expression2;
+            if (!GetBinaryOpResultType( binaryOp, expression->expressionType, expression2->expressionType, binaryExpression->expressionType ))
+            {
+                const char* typeName1 = GetTypeName( binaryExpression->expression1->expressionType );
+                const char* typeName2 = GetTypeName( binaryExpression->expression2->expressionType );
+                m_tokenizer.Error("binary '%s' : no global operator found which takes types '%s' and '%s' (or there is no acceptable conversion)",
+                    GetBinaryOpName(binaryOp), typeName1, typeName2);
+
+                return false;
+            }
+            
+            // Propagate constness.
+            binaryExpression->expressionType.flags = (expression->expressionType.flags | expression2->expressionType.flags) & HLSLTypeFlag_Const;
+            
+            expression = binaryExpression;
+        }
+        else if (_conditionalOpPriority > priority && Accept('?'))
+        {
+
+            HLSLConditionalExpression* conditionalExpression = m_tree->AddNode<HLSLConditionalExpression>(fileName, line);
+            conditionalExpression->condition = expression;
+            
+            HLSLExpression* expression1 = NULL;
+            HLSLExpression* expression2 = NULL;
+            if (!ParseBinaryExpression(_conditionalOpPriority, expression1) || !Expect(':') || !ParseBinaryExpression(_conditionalOpPriority, expression2))
+            {
+                return false;
+            }
+
+            // Make sure both cases have compatible types.
+            if (GetTypeCastRank(m_tree, expression1->expressionType, expression2->expressionType) == -1)
+            {
+                const char* srcTypeName = GetTypeName(expression2->expressionType);
+                const char* dstTypeName = GetTypeName(expression1->expressionType);
+                m_tokenizer.Error("':' no possible conversion from from '%s' to '%s'", srcTypeName, dstTypeName);
+                return false;
+            }
+
+            conditionalExpression->trueExpression  = expression1;
+            conditionalExpression->falseExpression = expression2;
+            conditionalExpression->expressionType  = expression1->expressionType;
+
+            expression = conditionalExpression;
+        }
+        else
+        {
+            break;
+        }
+
+		if( needsEndParen )
+		{
+			if( !Expect( ')' ) )
+				return false;
+			needsEndParen = false;
+		}
+    }
+
+    return !needsEndParen || Expect(')');
+}
+
+bool HLSLParser::ParsePartialConstructor(HLSLExpression*& expression, HLSLBaseType type, const char* typeName)
+{
+    const char* fileName = GetFileName();
+    int         line     = GetLineNumber();
+
+    HLSLConstructorExpression* constructorExpression = m_tree->AddNode<HLSLConstructorExpression>(fileName, line);
+    constructorExpression->type.baseType = type;
+    constructorExpression->type.typeName = typeName;
+    int numArguments = 0;
+    if (!ParseExpressionList(')', false, constructorExpression->argument, numArguments))
+    {
+        return false;
+    }    
+    constructorExpression->expressionType = constructorExpression->type;
+    constructorExpression->expressionType.flags = HLSLTypeFlag_Const;
+    expression = constructorExpression;
+    return true;
+}
+
+bool HLSLParser::ParseTerminalExpression(HLSLExpression*& expression, bool& needsEndParen)
+{
+    const char* fileName = GetFileName();
+    int         line     = GetLineNumber();
+
+    needsEndParen = false;
+
+    HLSLUnaryOp unaryOp;
+    if (AcceptUnaryOperator(true, unaryOp))
+    {
+        HLSLUnaryExpression* unaryExpression = m_tree->AddNode<HLSLUnaryExpression>(fileName, line);
+        unaryExpression->unaryOp = unaryOp;
+        if (!ParseTerminalExpression(unaryExpression->expression, needsEndParen))
+        {
+            return false;
+        }
+        if (unaryOp == HLSLUnaryOp_BitNot)
+        {
+            if (unaryExpression->expression->expressionType.baseType < HLSLBaseType_FirstInteger || 
+                unaryExpression->expression->expressionType.baseType > HLSLBaseType_LastInteger)
+            {
+                const char * typeName = GetTypeName(unaryExpression->expression->expressionType);
+                m_tokenizer.Error("unary '~' : no global operator found which takes type '%s' (or there is no acceptable conversion)", typeName);
+                return false;
+            }
+        }
+        if (unaryOp == HLSLUnaryOp_Not)
+        {
+            unaryExpression->expressionType = HLSLType(HLSLBaseType_Bool);
+            
+            // Propagate constness.
+            unaryExpression->expressionType.flags = unaryExpression->expression->expressionType.flags & HLSLTypeFlag_Const;
+        }
+        else
+        {
+            unaryExpression->expressionType = unaryExpression->expression->expressionType;
+        }
+        expression = unaryExpression;
+        return true;
+    }
+    
+    // Expressions inside parenthesis or casts.
+    if (Accept('('))
+    {
+        // Check for a casting operator.
+        HLSLType type;
+        if (AcceptType(false, type))
+        {
+            // This is actually a type constructor like (float2(...
+            if (Accept('('))
+            {
+                needsEndParen = true;
+                return ParsePartialConstructor(expression, type.baseType, type.typeName);
+            }
+            HLSLCastingExpression* castingExpression = m_tree->AddNode<HLSLCastingExpression>(fileName, line);
+            castingExpression->type = type;
+            expression = castingExpression;
+            castingExpression->expressionType = type;
+            return Expect(')') && ParseExpression(castingExpression->expression);
+        }
+        
+        if (!ParseExpression(expression) || !Expect(')'))
+        {
+            return false;
+        }
+    }
+    else
+    {
+        // Terminal values.
+        float fValue = 0.0f;
+        int   iValue = 0;
+        
+        if (AcceptFloat(fValue))
+        {
+            HLSLLiteralExpression* literalExpression = m_tree->AddNode<HLSLLiteralExpression>(fileName, line);
+            literalExpression->type   = HLSLBaseType_Float;
+            literalExpression->fValue = fValue;
+            literalExpression->expressionType.baseType = literalExpression->type;
+            literalExpression->expressionType.flags = HLSLTypeFlag_Const;
+            expression = literalExpression;
+            return true;
+        }
+		if( AcceptHalf( fValue ) )
+		{
+			HLSLLiteralExpression* literalExpression = m_tree->AddNode<HLSLLiteralExpression>( fileName, line );
+			literalExpression->type = HLSLBaseType_Half;
+			literalExpression->fValue = fValue;
+			literalExpression->expressionType.baseType = literalExpression->type;
+			literalExpression->expressionType.flags = HLSLTypeFlag_Const;
+			expression = literalExpression;
+			return true;
+		}
+        else if (AcceptInt(iValue))
+        {
+            HLSLLiteralExpression* literalExpression = m_tree->AddNode<HLSLLiteralExpression>(fileName, line);
+            literalExpression->type   = HLSLBaseType_Int;
+            literalExpression->iValue = iValue;
+            literalExpression->expressionType.baseType = literalExpression->type;
+            literalExpression->expressionType.flags = HLSLTypeFlag_Const;
+            expression = literalExpression;
+            return true;
+        }
+        else if (Accept(HLSLToken_True))
+        {
+            HLSLLiteralExpression* literalExpression = m_tree->AddNode<HLSLLiteralExpression>(fileName, line);
+            literalExpression->type   = HLSLBaseType_Bool;
+            literalExpression->bValue = true;
+            literalExpression->expressionType.baseType = literalExpression->type;
+            literalExpression->expressionType.flags = HLSLTypeFlag_Const;
+            expression = literalExpression;
+            return true;
+        }
+        else if (Accept(HLSLToken_False))
+        {
+            HLSLLiteralExpression* literalExpression = m_tree->AddNode<HLSLLiteralExpression>(fileName, line);
+            literalExpression->type   = HLSLBaseType_Bool;
+            literalExpression->bValue = false;
+            literalExpression->expressionType.baseType = literalExpression->type;
+            literalExpression->expressionType.flags = HLSLTypeFlag_Const;
+            expression = literalExpression;
+            return true;
+        }
+
+        // Type constructor.
+        HLSLType type;
+        if (AcceptType(/*allowVoid=*/false, type))
+        {
+            Expect('(');
+            if (!ParsePartialConstructor(expression, type.baseType, type.typeName))
+            {
+                return false;
+            }
+        }
+        else
+        {
+            HLSLIdentifierExpression* identifierExpression = m_tree->AddNode<HLSLIdentifierExpression>(fileName, line);
+            if (!ExpectIdentifier(identifierExpression->name))
+            {
+                return false;
+            }
+
+            bool undeclaredIdentifier = false;
+
+            const HLSLType* identifierType = FindVariable(identifierExpression->name, identifierExpression->global);
+            if (identifierType != NULL)
+            {
+                identifierExpression->expressionType = *identifierType;
+            }
+            else
+            {
+                if (GetIsFunction(identifierExpression->name))
+                {
+                    // Functions are always global scope.
+                    identifierExpression->global = true;
+                }
+                else
+                {
+                    undeclaredIdentifier = true;
+                }
+            }
+
+            if (undeclaredIdentifier)
+            {
+                if (m_allowUndeclaredIdentifiers)
+                {
+                    HLSLLiteralExpression* literalExpression = m_tree->AddNode<HLSLLiteralExpression>(fileName, line);
+                    literalExpression->bValue = false;
+                    literalExpression->type = HLSLBaseType_Bool;
+                    literalExpression->expressionType.baseType = literalExpression->type;
+                    literalExpression->expressionType.flags = HLSLTypeFlag_Const;
+                    expression = literalExpression;
+                }
+                else
+                {
+                    m_tokenizer.Error("Undeclared identifier '%s'", identifierExpression->name);
+                    return false;
+                }
+            }
+            else {
+                expression = identifierExpression;
+            }
+        }
+    }
+
+    bool done = false;
+    while (!done)
+    {
+        done = true;
+
+        // Post fix unary operator
+        HLSLUnaryOp unaryOp;
+        while (AcceptUnaryOperator(false, unaryOp))
+        {
+            HLSLUnaryExpression* unaryExpression = m_tree->AddNode<HLSLUnaryExpression>(fileName, line);
+            unaryExpression->unaryOp = unaryOp;
+            unaryExpression->expression = expression;
+            unaryExpression->expressionType = unaryExpression->expression->expressionType;
+            expression = unaryExpression;
+            done = false;
+        }
+
+        // Member access operator.
+        while (Accept('.'))
+        {
+            HLSLMemberAccess* memberAccess = m_tree->AddNode<HLSLMemberAccess>(fileName, line);
+            memberAccess->object = expression;
+            if (!ExpectIdentifier(memberAccess->field))
+            {
+                return false;
+            }
+            if (!GetMemberType( expression->expressionType, memberAccess))
+            {
+                m_tokenizer.Error("Couldn't access '%s'", memberAccess->field);
+                return false;
+            }
+            expression = memberAccess;
+            done = false;
+        }
+
+        // Handle array access.
+        while (Accept('['))
+        {
+            HLSLArrayAccess* arrayAccess = m_tree->AddNode<HLSLArrayAccess>(fileName, line);
+            arrayAccess->array = expression;
+            if (!ParseExpression(arrayAccess->index) || !Expect(']'))
+            {
+                return false;
+            }
+
+            if (expression->expressionType.array)
+            {
+                arrayAccess->expressionType = expression->expressionType;
+                arrayAccess->expressionType.array     = false;
+                arrayAccess->expressionType.arraySize = NULL;
+            }
+            else
+            {
+                switch (expression->expressionType.baseType)
+                {
+                case HLSLBaseType_Float2:
+                case HLSLBaseType_Float3:
+                case HLSLBaseType_Float4:
+                    arrayAccess->expressionType.baseType = HLSLBaseType_Float;
+                    break;
+				case HLSLBaseType_Float2x2:
+					arrayAccess->expressionType.baseType = HLSLBaseType_Float2;
+					break;
+                case HLSLBaseType_Float3x3:
+                    arrayAccess->expressionType.baseType = HLSLBaseType_Float3;
+                    break;
+                case HLSLBaseType_Float4x4:
+                    arrayAccess->expressionType.baseType = HLSLBaseType_Float4;
+                    break;
+                case HLSLBaseType_Float4x3:
+                    arrayAccess->expressionType.baseType = HLSLBaseType_Float3;
+                    break;
+                case HLSLBaseType_Float4x2:
+                    arrayAccess->expressionType.baseType = HLSLBaseType_Float2;
+                    break;
+                case HLSLBaseType_Half2:
+                case HLSLBaseType_Half3:
+                case HLSLBaseType_Half4:
+                    arrayAccess->expressionType.baseType = HLSLBaseType_Half;
+                    break;
+				case HLSLBaseType_Half2x2:
+					arrayAccess->expressionType.baseType = HLSLBaseType_Half2;
+					break;
+                case HLSLBaseType_Half3x3:
+                    arrayAccess->expressionType.baseType = HLSLBaseType_Half3;
+                    break;
+                case HLSLBaseType_Half4x4:
+                    arrayAccess->expressionType.baseType = HLSLBaseType_Half4;
+                    break;
+                case HLSLBaseType_Half4x3:
+                    arrayAccess->expressionType.baseType = HLSLBaseType_Half3;
+                    break;
+                case HLSLBaseType_Half4x2:
+                    arrayAccess->expressionType.baseType = HLSLBaseType_Half2;
+                    break;
+                case HLSLBaseType_Int2:
+                case HLSLBaseType_Int3:
+                case HLSLBaseType_Int4:
+                    arrayAccess->expressionType.baseType = HLSLBaseType_Int;
+                    break;
+                case HLSLBaseType_Uint2:
+                case HLSLBaseType_Uint3:
+                case HLSLBaseType_Uint4:
+                    arrayAccess->expressionType.baseType = HLSLBaseType_Uint;
+                    break;
+                default:
+                    m_tokenizer.Error("array, matrix, vector, or indexable object type expected in index expression");
+                    return false;
+                }
+            }
+
+            expression = arrayAccess;
+            done = false;
+        }
+
+        // Handle function calls. Note, HLSL functions aren't like C function
+        // pointers -- we can only directly call on an identifier, not on an
+        // expression.
+        if (Accept('('))
+        {
+            HLSLFunctionCall* functionCall = m_tree->AddNode<HLSLFunctionCall>(fileName, line);
+            done = false;
+            if (!ParseExpressionList(')', false, functionCall->argument, functionCall->numArguments))
+            {
+                return false;
+            }
+
+            if (expression->nodeType != HLSLNodeType_IdentifierExpression)
+            {
+                m_tokenizer.Error("Expected function identifier");
+                return false;
+            }
+
+            const HLSLIdentifierExpression* identifierExpression = static_cast<const HLSLIdentifierExpression*>(expression);
+            const HLSLFunction* function = MatchFunctionCall( functionCall, identifierExpression->name );
+            if (function == NULL)
+            {
+                return false;
+            }
+
+            functionCall->function = function;
+            functionCall->expressionType = function->returnType;
+            expression = functionCall;
+        }
+
+    }
+    return true;
+
+}
+
+bool HLSLParser::ParseExpressionList(int endToken, bool allowEmptyEnd, HLSLExpression*& firstExpression, int& numExpressions)
+{
+    numExpressions = 0;
+    HLSLExpression* lastExpression = NULL;
+    while (!Accept(endToken))
+    {
+        if (CheckForUnexpectedEndOfStream(endToken))
+        {
+            return false;
+        }
+        if (numExpressions > 0 && !Expect(','))
+        {
+            return false;
+        }
+        // It is acceptable for the final element in the initialization list to
+        // have a trailing comma in some cases, like array initialization such as {1, 2, 3,}
+        if (allowEmptyEnd && Accept(endToken))
+        {
+            break;
+        }
+        HLSLExpression* expression = NULL;
+        if (!ParseExpression(expression))
+        {
+            return false;
+        }
+        if (firstExpression == NULL)
+        {
+            firstExpression = expression;
+        }
+        else
+        {
+            lastExpression->nextExpression = expression;
+        }
+        lastExpression = expression;
+        ++numExpressions;
+    }
+    return true;
+}
+
+bool HLSLParser::ParseArgumentList(HLSLArgument*& firstArgument, int& numArguments, int& numOutputArguments)
+{
+    const char* fileName = GetFileName();
+    int         line     = GetLineNumber();
+        
+    HLSLArgument* lastArgument = NULL;
+    numArguments = 0;
+
+    while (!Accept(')'))
+    {
+        if (CheckForUnexpectedEndOfStream(')'))
+        {
+            return false;
+        }
+        if (numArguments > 0 && !Expect(','))
+        {
+            return false;
+        }
+
+        HLSLArgument* argument = m_tree->AddNode<HLSLArgument>(fileName, line);
+
+        if (Accept(HLSLToken_Uniform))     { argument->modifier = HLSLArgumentModifier_Uniform; }
+        else if (Accept(HLSLToken_In))     { argument->modifier = HLSLArgumentModifier_In;      }
+        else if (Accept(HLSLToken_Out))    { argument->modifier = HLSLArgumentModifier_Out;     }
+        else if (Accept(HLSLToken_InOut))  { argument->modifier = HLSLArgumentModifier_Inout;   }
+        else if (Accept(HLSLToken_Const))  { argument->modifier = HLSLArgumentModifier_Const;   }
+
+        if (!ExpectDeclaration(/*allowUnsizedArray=*/true, argument->type, argument->name))
+        {
+            return false;
+        }
+
+        DeclareVariable( argument->name, argument->type );
+
+        // Optional semantic.
+        if (Accept(':') && !ExpectIdentifier(argument->semantic))
+        {
+            return false;
+        }
+
+        if (Accept('=') && !ParseExpression(argument->defaultValue))
+        {
+            // @@ Print error!
+            return false;
+        }
+
+        if (lastArgument != NULL)
+        {
+            lastArgument->nextArgument = argument;
+        }
+        else
+        {
+            firstArgument = argument;
+        }
+        lastArgument = argument;
+
+        ++numArguments;
+        if (argument->modifier == HLSLArgumentModifier_Out || argument->modifier == HLSLArgumentModifier_Inout)
+        {
+            ++numOutputArguments;
+        }
+    }
+    return true;
+}
+
+
+bool HLSLParser::ParseSamplerState(HLSLExpression*& expression)
+{
+    if (!Expect(HLSLToken_SamplerState))
+    {
+        return false;
+    }
+
+    const char* fileName = GetFileName();
+    int         line     = GetLineNumber();
+
+    HLSLSamplerState* samplerState = m_tree->AddNode<HLSLSamplerState>(fileName, line);
+
+    if (!Expect('{'))
+    {
+        return false;
+    }
+
+    HLSLStateAssignment* lastStateAssignment = NULL;
+
+    // Parse state assignments.
+    while (!Accept('}'))
+    {
+        if (CheckForUnexpectedEndOfStream('}'))
+        {
+            return false;
+        }
+
+        HLSLStateAssignment* stateAssignment = NULL;
+        if (!ParseStateAssignment(stateAssignment, /*isSamplerState=*/true, /*isPipeline=*/false))
+        {
+            return false;
+        }
+        ASSERT(stateAssignment != NULL);
+        if (lastStateAssignment == NULL)
+        {
+            samplerState->stateAssignments = stateAssignment;
+        }
+        else
+        {
+            lastStateAssignment->nextStateAssignment = stateAssignment;
+        }
+        lastStateAssignment = stateAssignment;
+        samplerState->numStateAssignments++;
+    }
+
+    expression = samplerState;
+    return true;
+}
+
+bool HLSLParser::ParseComment(HLSLStatement*& statement)
+{
+    if (!Accept(HLSLToken_Comment)) {
+        return false;
+    }
+    
+    HLSLComment* comment = m_tree->AddNode<HLSLComment>(GetFileName(), GetLineNumber());
+    comment->text = "hello";  // TODO: process text of comment
+    
+    return true;
+}
+
+bool HLSLParser::ParseTechnique(HLSLStatement*& statement)
+{
+    if (!Accept(HLSLToken_Technique)) {
+        return false;
+    }
+
+    const char* techniqueName = NULL;
+    if (!ExpectIdentifier(techniqueName))
+    {
+        return false;
+    }
+
+    if (!Expect('{'))
+    {
+        return false;
+    }
+
+    HLSLTechnique* technique = m_tree->AddNode<HLSLTechnique>(GetFileName(), GetLineNumber());
+    technique->name = techniqueName;
+
+    //m_techniques.PushBack(technique);
+
+    HLSLPass* lastPass = NULL;
+
+    // Parse state assignments.
+    while (!Accept('}'))
+    {
+        if (CheckForUnexpectedEndOfStream('}'))
+        {
+            return false;
+        }
+
+        HLSLPass* pass = NULL;
+        if (!ParsePass(pass))
+        {
+            return false;
+        }
+        ASSERT(pass != NULL);
+        if (lastPass == NULL)
+        {
+            technique->passes = pass;
+        }
+        else
+        {
+            lastPass->nextPass = pass;
+        }
+        lastPass = pass;
+        technique->numPasses++;
+    }
+
+    statement = technique;
+    return true;
+}
+
+bool HLSLParser::ParsePass(HLSLPass*& pass)
+{
+    if (!Accept(HLSLToken_Pass)) {
+        return false;
+    }
+
+    // Optional pass name.
+    const char* passName = NULL;
+    AcceptIdentifier(passName);
+
+    if (!Expect('{'))
+    {
+        return false;
+    }
+
+    const char* fileName = GetFileName();
+    int         line     = GetLineNumber();
+
+    pass = m_tree->AddNode<HLSLPass>(fileName, line);
+    pass->name = passName;
+
+    HLSLStateAssignment* lastStateAssignment = NULL;
+
+    // Parse state assignments.
+    while (!Accept('}'))
+    {
+        if (CheckForUnexpectedEndOfStream('}'))
+        {
+            return false;
+        }
+
+        HLSLStateAssignment* stateAssignment = NULL;
+        if (!ParseStateAssignment(stateAssignment, /*isSamplerState=*/false, /*isPipelineState=*/false))
+        {
+            return false;
+        }
+        ASSERT(stateAssignment != NULL);
+        if (lastStateAssignment == NULL)
+        {
+            pass->stateAssignments = stateAssignment;
+        }
+        else
+        {
+            lastStateAssignment->nextStateAssignment = stateAssignment;
+        }
+        lastStateAssignment = stateAssignment;
+        pass->numStateAssignments++;
+    }
+    return true;
+}
+
+
+bool HLSLParser::ParsePipeline(HLSLStatement*& statement)
+{
+    if (!Accept("pipeline")) {
+        return false;
+    }
+
+    // Optional pipeline name.
+    const char* pipelineName = NULL;
+    AcceptIdentifier(pipelineName);
+
+    if (!Expect('{'))
+    {
+        return false;
+    }
+
+    HLSLPipeline* pipeline = m_tree->AddNode<HLSLPipeline>(GetFileName(), GetLineNumber());
+    pipeline->name = pipelineName;
+
+    HLSLStateAssignment* lastStateAssignment = NULL;
+
+    // Parse state assignments.
+    while (!Accept('}'))
+    {
+        if (CheckForUnexpectedEndOfStream('}'))
+        {
+            return false;
+        }
+
+        HLSLStateAssignment* stateAssignment = NULL;
+        if (!ParseStateAssignment(stateAssignment, /*isSamplerState=*/false, /*isPipeline=*/true))
+        {
+            return false;
+        }
+        ASSERT(stateAssignment != NULL);
+        if (lastStateAssignment == NULL)
+        {
+            pipeline->stateAssignments = stateAssignment;
+        }
+        else
+        {
+            lastStateAssignment->nextStateAssignment = stateAssignment;
+        }
+        lastStateAssignment = stateAssignment;
+        pipeline->numStateAssignments++;
+    }
+
+    statement = pipeline;
+    return true;
+}
+
+
+const EffectState* GetEffectState(const char* name, bool isSamplerState, bool isPipeline)
+{
+    const EffectState* validStates = effectStates;
+    int count = sizeof(effectStates)/sizeof(effectStates[0]);
+    
+    if (isPipeline)
+    {
+        validStates = pipelineStates;
+        count = sizeof(pipelineStates) / sizeof(pipelineStates[0]);
+    }
+
+    if (isSamplerState)
+    {
+        validStates = samplerStates;
+        count = sizeof(samplerStates)/sizeof(samplerStates[0]);
+    }
+
+    // Case insensitive comparison.
+    for (int i = 0; i < count; i++)
+    {
+        if (String_EqualNoCase(name, validStates[i].name)) 
+        {
+            return &validStates[i];
+        }
+    }
+
+    return NULL;
+}
+
+static const EffectStateValue* GetStateValue(const char* name, const EffectState* state)
+{
+    // Case insensitive comparison.
+    for (int i = 0; ; i++) 
+    {
+        const EffectStateValue & value = state->values[i];
+        if (value.name == NULL) break;
+
+        if (String_EqualNoCase(name, value.name)) 
+        {
+            return &value;
+        }
+    }
+
+    return NULL;
+}
+
+
+bool HLSLParser::ParseStateName(bool isSamplerState, bool isPipelineState, const char*& name, const EffectState *& state)
+{
+    if (m_tokenizer.GetToken() != HLSLToken_Identifier)
+    {
+        char near[HLSLTokenizer::s_maxIdentifier];
+        m_tokenizer.GetTokenName(near);
+        m_tokenizer.Error("Syntax error: expected identifier near '%s'", near);
+        return false;
+    }
+
+    state = GetEffectState(m_tokenizer.GetIdentifier(), isSamplerState, isPipelineState);
+    if (state == NULL)
+    {
+        m_tokenizer.Error("Syntax error: unexpected identifier '%s'", m_tokenizer.GetIdentifier());
+        return false;
+    }
+
+    m_tokenizer.Next();
+    return true;
+}
+
+bool HLSLParser::ParseColorMask(int& mask)
+{
+    mask = 0;
+
+    do {
+        if (m_tokenizer.GetToken() == HLSLToken_IntLiteral) {
+            mask |= m_tokenizer.GetInt();
+        }
+        else if (m_tokenizer.GetToken() == HLSLToken_Identifier) {
+            const char * ident = m_tokenizer.GetIdentifier();
+            const EffectStateValue * stateValue = colorMaskValues;
+            while (stateValue->name != NULL) {
+                if (String_EqualNoCase(stateValue->name, ident)) {
+                    mask |= stateValue->value;
+                    break;
+                }
+                ++stateValue;
+            }
+        }
+        else {
+            return false;
+        }
+        m_tokenizer.Next();
+    } while (Accept('|'));
+
+    return true;
+}
+
+bool HLSLParser::ParseStateValue(const EffectState * state, HLSLStateAssignment* stateAssignment)
+{
+    const bool expectsExpression = state->values == colorMaskValues;
+    const bool expectsInteger = state->values == integerValues;
+    const bool expectsFloat = state->values == floatValues;
+    const bool expectsBoolean = state->values == booleanValues;
+
+    if (!expectsExpression && !expectsInteger && !expectsFloat && !expectsBoolean) 
+    {
+        if (m_tokenizer.GetToken() != HLSLToken_Identifier)
+        {
+            char near[HLSLTokenizer::s_maxIdentifier];
+            m_tokenizer.GetTokenName(near);
+            m_tokenizer.Error("Syntax error: expected identifier near '%s'", near);
+            stateAssignment->iValue = 0;
+            return false;
+        }
+    }
+
+    if (state->values == NULL)
+    {
+        if (strcmp(m_tokenizer.GetIdentifier(), "compile") != 0)
+        {
+            m_tokenizer.Error("Syntax error: unexpected identifier '%s' expected compile statement", m_tokenizer.GetIdentifier());
+            stateAssignment->iValue = 0;
+            return false;
+        }
+
+        // @@ Parse profile name, function name, argument expressions.
+
+        // Skip the rest of the compile statement.
+        while(m_tokenizer.GetToken() != ';')
+        {
+            m_tokenizer.Next();
+        }
+    }
+    else {
+        if (expectsInteger)
+        {
+            if (!AcceptInt(stateAssignment->iValue))
+            {
+                m_tokenizer.Error("Syntax error: expected integer near '%s'", m_tokenizer.GetIdentifier());
+                stateAssignment->iValue = 0;
+                return false;
+            }
+        }
+        else if (expectsFloat)
+        {
+            if (!AcceptFloat(stateAssignment->fValue))
+            {
+                m_tokenizer.Error("Syntax error: expected float near '%s'", m_tokenizer.GetIdentifier());
+                stateAssignment->iValue = 0;
+                return false;
+            }
+        }
+        else if (expectsBoolean)
+        {
+            const EffectStateValue * stateValue = GetStateValue(m_tokenizer.GetIdentifier(), state);
+
+            if (stateValue != NULL)
+            {
+                stateAssignment->iValue = stateValue->value;
+
+                m_tokenizer.Next();
+            }
+            else if (AcceptInt(stateAssignment->iValue))
+            {
+                stateAssignment->iValue = (stateAssignment->iValue != 0);
+            }
+            else {
+                m_tokenizer.Error("Syntax error: expected bool near '%s'", m_tokenizer.GetIdentifier());
+                stateAssignment->iValue = 0;
+                return false;
+            }
+        }
+        else if (expectsExpression)
+        {
+            if (!ParseColorMask(stateAssignment->iValue))
+            {
+                m_tokenizer.Error("Syntax error: expected color mask near '%s'", m_tokenizer.GetIdentifier());
+                stateAssignment->iValue = 0;
+                return false;
+            }
+        }
+        else 
+        {
+            // Expect one of the allowed values.
+            const EffectStateValue * stateValue = GetStateValue(m_tokenizer.GetIdentifier(), state);
+
+            if (stateValue == NULL)
+            {
+                m_tokenizer.Error("Syntax error: unexpected value '%s' for state '%s'", m_tokenizer.GetIdentifier(), state->name);
+                stateAssignment->iValue = 0;
+                return false;
+            }
+
+            stateAssignment->iValue = stateValue->value;
+
+            m_tokenizer.Next();
+        }
+    }
+
+    return true;
+}
+
+bool HLSLParser::ParseStateAssignment(HLSLStateAssignment*& stateAssignment, bool isSamplerState, bool isPipelineState)
+{
+    const char* fileName = GetFileName();
+    int         line     = GetLineNumber();
+
+    stateAssignment = m_tree->AddNode<HLSLStateAssignment>(fileName, line);
+
+    const EffectState * state;
+    if (!ParseStateName(isSamplerState, isPipelineState, stateAssignment->stateName, state)) {
+        return false;
+    }
+
+    //stateAssignment->name = m_tree->AddString(m_tokenizer.GetIdentifier());
+    stateAssignment->stateName = state->name;
+    stateAssignment->d3dRenderState = state->d3drs;
+
+    if (!Expect('=')) {
+        return false;
+    }
+
+    if (!ParseStateValue(state, stateAssignment)) {
+        return false;
+    }
+
+    if (!Expect(';')) {
+        return false;
+    }
+
+    return true;
+}
+
+
+bool HLSLParser::ParseAttributeList(HLSLAttribute*& firstAttribute)
+{
+    const char* fileName = GetFileName();
+    int         line     = GetLineNumber();
+    
+    HLSLAttribute * lastAttribute = firstAttribute;
+    do {
+        const char * identifier = NULL;
+        if (!ExpectIdentifier(identifier)) {
+            return false;
+        }
+
+        HLSLAttribute * attribute = m_tree->AddNode<HLSLAttribute>(fileName, line);
+        
+        if (strcmp(identifier, "unroll") == 0) attribute->attributeType = HLSLAttributeType_Unroll;
+        else if (strcmp(identifier, "flatten") == 0) attribute->attributeType = HLSLAttributeType_Flatten;
+        else if (strcmp(identifier, "branch") == 0) attribute->attributeType = HLSLAttributeType_Branch;
+        else if (strcmp(identifier, "nofastmath") == 0) attribute->attributeType = HLSLAttributeType_NoFastMath;
+        
+        // @@ parse arguments, () not required if attribute constructor has no arguments.
+
+        if (firstAttribute == NULL)
+        {
+            firstAttribute = attribute;
+        }
+        else
+        {
+            lastAttribute->nextAttribute = attribute;
+        }
+        lastAttribute = attribute;
+        
+    } while(Accept(','));
+
+    return true;
+}
+
+// Attributes can have all these forms:
+//   [A] statement;
+//   [A,B] statement;
+//   [A][B] statement;
+// These are not supported yet:
+//   [A] statement [B];
+//   [A()] statement;
+//   [A(a)] statement;
+bool HLSLParser::ParseAttributeBlock(HLSLAttribute*& attribute)
+{
+    HLSLAttribute ** lastAttribute = &attribute;
+    while (*lastAttribute != NULL) { lastAttribute = &(*lastAttribute)->nextAttribute; }
+
+    if (!Accept('['))
+    {
+        return false;
+    }
+
+    // Parse list of attribute constructors.
+    ParseAttributeList(*lastAttribute);
+
+    if (!Expect(']'))
+    {
+        return false;
+    }
+
+    // Parse additional [] blocks.
+    ParseAttributeBlock(*lastAttribute);
+
+    return true;
+}
+
+bool HLSLParser::ParseStage(HLSLStatement*& statement)
+{
+    if (!Accept("stage"))
+    {
+        return false;
+    }
+
+    // Required stage name.
+    const char* stageName = NULL;
+    if (!ExpectIdentifier(stageName))
+    {
+        return false;
+    }
+
+    if (!Expect('{'))
+    {
+        return false;
+    }
+
+    HLSLStage* stage = m_tree->AddNode<HLSLStage>(GetFileName(), GetLineNumber());
+    stage->name = stageName;
+
+    BeginScope();
+
+    HLSLType voidType(HLSLBaseType_Void);
+    if (!Expect('{') || !ParseBlock(stage->statement, voidType))
+    {
+        return false;
+    }
+
+    EndScope();
+
+    // @@ To finish the stage definition we should traverse the statements recursively (including function calls) and find all the input/output declarations.
+
+    statement = stage;
+    return true;
+}
+
+
+
+
+bool HLSLParser::Parse(HLSLTree* tree)
+{
+    m_tree = tree;
+    
+    HLSLRoot* root = m_tree->GetRoot();
+    HLSLStatement* lastStatement = NULL;
+
+    while (!Accept(HLSLToken_EndOfStream))
+    {
+        HLSLStatement* statement = NULL;
+        if (!ParseTopLevel(statement))
+        {
+            return false;
+        }
+        if (statement != NULL)
+        {   
+            if (lastStatement == NULL)
+            {
+                root->statement = statement;
+            }
+            else
+            {
+                lastStatement->nextStatement = statement;
+            }
+            lastStatement = statement;
+            while (lastStatement->nextStatement) lastStatement = lastStatement->nextStatement;
+        }
+    }
+    return true;
+}
+
+bool HLSLParser::AcceptTypeModifier(int& flags)
+{
+    if (Accept(HLSLToken_Const))
+    {
+        flags |= HLSLTypeFlag_Const;
+        return true;
+    }
+    else if (Accept(HLSLToken_Static))
+    {
+        flags |= HLSLTypeFlag_Static;
+        return true;
+    }
+    else if (Accept(HLSLToken_Uniform))
+    {
+        //flags |= HLSLTypeFlag_Uniform;      // @@ Ignored.
+        return true;
+    }
+    else if (Accept(HLSLToken_Inline))
+    {
+        //flags |= HLSLTypeFlag_Uniform;      // @@ Ignored. In HLSL all functions are inline.
+        return true;
+    }
+    /*else if (Accept("in"))
+    {
+        flags |= HLSLTypeFlag_Input;
+        return true;
+    }
+    else if (Accept("out"))
+    {
+        flags |= HLSLTypeFlag_Output;
+        return true;
+    }*/
+
+    // Not an usage keyword.
+    return false;
+}
+
+bool HLSLParser::AcceptInterpolationModifier(int& flags)
+{
+    if (Accept("linear"))
+    { 
+        flags |= HLSLTypeFlag_Linear; 
+        return true;
+    }
+    else if (Accept("centroid"))
+    { 
+        flags |= HLSLTypeFlag_Centroid;
+        return true;
+    }
+    else if (Accept("nointerpolation"))
+    {
+        flags |= HLSLTypeFlag_NoInterpolation;
+        return true;
+    }
+    else if (Accept("noperspective"))
+    {
+        flags |= HLSLTypeFlag_NoPerspective;
+        return true;
+    }
+    else if (Accept("sample"))
+    {
+        flags |= HLSLTypeFlag_Sample;
+        return true;
+    }
+
+    return false;
+}
+
+
+bool HLSLParser::AcceptType(bool allowVoid, HLSLType& type/*, bool acceptFlags*/)
+{
+    //if (type.flags != NULL)
+    {
+        type.flags = 0;
+        while(AcceptTypeModifier(type.flags) || AcceptInterpolationModifier(type.flags)) {}
+    }
+
+    int token = m_tokenizer.GetToken();
+
+    // Check built-in types.
+    type.baseType = HLSLBaseType_Void;
+    switch (token)
+    {
+    case HLSLToken_Float:
+        type.baseType = HLSLBaseType_Float;
+        break;
+    case HLSLToken_Float2:      
+        type.baseType = HLSLBaseType_Float2;
+        break;
+    case HLSLToken_Float3:
+        type.baseType = HLSLBaseType_Float3;
+        break;
+    case HLSLToken_Float4:
+        type.baseType = HLSLBaseType_Float4;
+        break;
+	case HLSLToken_Float2x2:
+		type.baseType = HLSLBaseType_Float2x2;
+		break;
+    case HLSLToken_Float3x3:
+        type.baseType = HLSLBaseType_Float3x3;
+        break;
+    case HLSLToken_Float4x4:
+        type.baseType = HLSLBaseType_Float4x4;
+        break;
+    case HLSLToken_Float4x3:
+        type.baseType = HLSLBaseType_Float4x3;
+        break;
+    case HLSLToken_Float4x2:
+        type.baseType = HLSLBaseType_Float4x2;
+        break;
+    case HLSLToken_Half:
+        type.baseType = HLSLBaseType_Half;
+        break;
+    case HLSLToken_Half2:      
+        type.baseType = HLSLBaseType_Half2;
+        break;
+    case HLSLToken_Half3:
+        type.baseType = HLSLBaseType_Half3;
+        break;
+    case HLSLToken_Half4:
+        type.baseType = HLSLBaseType_Half4;
+        break;
+	case HLSLToken_Half2x2:
+		type.baseType = HLSLBaseType_Half2x2;
+		break;
+    case HLSLToken_Half3x3:
+        type.baseType = HLSLBaseType_Half3x3;
+        break;
+    case HLSLToken_Half4x4:
+        type.baseType = HLSLBaseType_Half4x4;
+        break;
+    case HLSLToken_Half4x3:
+        type.baseType = HLSLBaseType_Half4x3;
+        break;
+    case HLSLToken_Half4x2:
+        type.baseType = HLSLBaseType_Half4x2;
+        break;
+    case HLSLToken_Bool:
+        type.baseType = HLSLBaseType_Bool;
+        break;
+	case HLSLToken_Bool2:
+		type.baseType = HLSLBaseType_Bool2;
+		break;
+	case HLSLToken_Bool3:
+		type.baseType = HLSLBaseType_Bool3;
+		break;
+	case HLSLToken_Bool4:
+		type.baseType = HLSLBaseType_Bool4;
+		break;
+    case HLSLToken_Int:
+        type.baseType = HLSLBaseType_Int;
+        break;
+    case HLSLToken_Int2:
+        type.baseType = HLSLBaseType_Int2;
+        break;
+    case HLSLToken_Int3:
+        type.baseType = HLSLBaseType_Int3;
+        break;
+    case HLSLToken_Int4:
+        type.baseType = HLSLBaseType_Int4;
+        break;
+    case HLSLToken_Uint:
+        type.baseType = HLSLBaseType_Uint;
+        break;
+    case HLSLToken_Uint2:
+        type.baseType = HLSLBaseType_Uint2;
+        break;
+    case HLSLToken_Uint3:
+        type.baseType = HLSLBaseType_Uint3;
+        break;
+    case HLSLToken_Uint4:
+        type.baseType = HLSLBaseType_Uint4;
+        break;
+            
+    // Textures
+    case HLSLToken_Texture:
+        type.baseType = HLSLBaseType_Texture;
+        break;
+            
+    // Samplers
+    case HLSLToken_Sampler:
+        type.baseType = HLSLBaseType_Sampler2D;  // @@ IC: For now we assume that generic samplers are always sampler2D
+        break;
+    case HLSLToken_Sampler2D:
+        type.baseType = HLSLBaseType_Sampler2D;
+        break;
+    case HLSLToken_Sampler3D:
+        type.baseType = HLSLBaseType_Sampler3D;
+        break;
+    case HLSLToken_SamplerCube:
+        type.baseType = HLSLBaseType_SamplerCube;
+        break;
+    case HLSLToken_Sampler2DShadow:
+        type.baseType = HLSLBaseType_Sampler2DShadow;
+        break;
+    case HLSLToken_Sampler2DMS:
+        type.baseType = HLSLBaseType_Sampler2DMS;
+        break;
+    case HLSLToken_Sampler2DArray:
+        type.baseType = HLSLBaseType_Sampler2DArray;
+        break;
+    }
+    if (type.baseType != HLSLBaseType_Void)
+    {
+        m_tokenizer.Next();
+        
+        if (IsSamplerType(type.baseType))
+        {
+            // Parse optional sampler type.
+            if (Accept('<'))
+            {
+                // Hack.
+                // Sampler2D<half> or Sampler2D<float>
+                // but this doesn't line up with DX10
+                // which is Texture2D<half4> or Texture2D<float4>
+                
+                int token = m_tokenizer.GetToken();
+                if (token == HLSLToken_Float)
+                {
+                    type.samplerType = HLSLBaseType_Float;
+                }
+                else if (token == HLSLToken_Half)
+                {
+                    type.samplerType = HLSLBaseType_Half;
+                }
+                else
+                {
+                    m_tokenizer.Error("Expected half or float.");
+                    return false;
+                }
+                m_tokenizer.Next();
+                
+                if (!Expect('>'))
+                {
+                    return false;
+                }
+            }
+        }
+        return true;
+    }
+
+    if (allowVoid && Accept(HLSLToken_Void))
+    {
+        type.baseType = HLSLBaseType_Void;
+        return true;
+    }
+    if (token == HLSLToken_Identifier)
+    {
+        const char* identifier = m_tree->AddString( m_tokenizer.GetIdentifier() );
+        if (FindUserDefinedType(identifier) != NULL)
+        {
+            m_tokenizer.Next();
+            type.baseType = HLSLBaseType_UserDefined;
+            type.typeName = identifier;
+            return true;
+        }
+    }
+    return false;
+}
+
+bool HLSLParser::ExpectType(bool allowVoid, HLSLType& type)
+{
+    if (!AcceptType(allowVoid, type))
+    {
+        m_tokenizer.Error("Expected type");
+        return false;
+    }
+    return true;
+}
+
+bool HLSLParser::AcceptDeclaration(bool allowUnsizedArray, HLSLType& type, const char*& name)
+{
+    if (!AcceptType(/*allowVoid=*/false, type))
+    {
+        return false;
+    }
+
+    if (!ExpectIdentifier(name))
+    {
+        // TODO: false means we didn't accept a declaration and we had an error!
+        return false;
+    }
+    // Handle array syntax.
+    if (Accept('['))
+    {
+        type.array = true;
+        // Optionally allow no size to the specified for the array.
+        if (Accept(']') && allowUnsizedArray)
+        {
+            return true;
+        }
+        if (!ParseExpression(type.arraySize) || !Expect(']'))
+        {
+            return false;
+        }
+    }
+    return true;
+}
+
+bool HLSLParser::ExpectDeclaration(bool allowUnsizedArray, HLSLType& type, const char*& name)
+{
+    if (!AcceptDeclaration(allowUnsizedArray, type, name))
+    {
+        m_tokenizer.Error("Expected declaration");
+        return false;
+    }
+    return true;
+}
+
+const HLSLStruct* HLSLParser::FindUserDefinedType(const char* name) const
+{
+    // Pointer comparison is sufficient for strings since they exist in the
+    // string pool.
+    for (int i = 0; i < m_userTypes.GetSize(); ++i)
+    {
+        if (m_userTypes[i]->name == name)
+        {
+            return m_userTypes[i];
+        }
+    }
+    return NULL;
+}
+
+bool HLSLParser::CheckForUnexpectedEndOfStream(int endToken)
+{
+    if (Accept(HLSLToken_EndOfStream))
+    {
+        char what[HLSLTokenizer::s_maxIdentifier];
+        m_tokenizer.GetTokenName(endToken, what);
+        m_tokenizer.Error("Unexpected end of file while looking for '%s'", what);
+        return true;
+    }
+    return false;
+}
+
+int HLSLParser::GetLineNumber() const
+{
+    return m_tokenizer.GetLineNumber();
+}
+
+const char* HLSLParser::GetFileName()
+{
+    return m_tree->AddString( m_tokenizer.GetFileName() );
+}
+
+void HLSLParser::BeginScope()
+{
+    // Use NULL as a sentinel that indices a new scope level.
+    Variable& variable = m_variables.PushBackNew();
+    variable.name = NULL;
+}
+
+void HLSLParser::EndScope()
+{
+    int numVariables = m_variables.GetSize() - 1;
+    while (m_variables[numVariables].name != NULL)
+    {
+        --numVariables;
+        ASSERT(numVariables >= 0);
+    }
+    m_variables.Resize(numVariables);
+}
+
+const HLSLType* HLSLParser::FindVariable(const char* name, bool& global) const
+{
+    for (int i = m_variables.GetSize() - 1; i >= 0; --i)
+    {
+        if (m_variables[i].name == name)
+        {
+            global = (i < m_numGlobals);
+            return &m_variables[i].type;
+        }
+    }
+    return NULL;
+}
+
+const HLSLFunction* HLSLParser::FindFunction(const char* name) const
+{
+    for (int i = 0; i < m_functions.GetSize(); ++i)
+    {
+        if (m_functions[i]->name == name)
+        {
+            return m_functions[i];
+        }
+    }
+    return NULL;
+}
+
+static bool AreTypesEqual(HLSLTree* tree, const HLSLType& lhs, const HLSLType& rhs)
+{
+    return GetTypeCastRank(tree, lhs, rhs) == 0;
+}
+
+static bool AreArgumentListsEqual(HLSLTree* tree, HLSLArgument* lhs, HLSLArgument* rhs)
+{
+    while (lhs && rhs)
+    {
+        if (!AreTypesEqual(tree, lhs->type, rhs->type))
+            return false;
+
+        if (lhs->modifier != rhs->modifier)
+            return false;
+
+        if (lhs->semantic != rhs->semantic || lhs->sv_semantic != rhs->sv_semantic)
+            return false;
+
+        lhs = lhs->nextArgument;
+        rhs = rhs->nextArgument;
+    }
+
+    return lhs == NULL && rhs == NULL;
+}
+
+const HLSLFunction* HLSLParser::FindFunction(const HLSLFunction* fun) const
+{
+    for (int i = 0; i < m_functions.GetSize(); ++i)
+    {
+        if (m_functions[i]->name == fun->name &&
+            AreTypesEqual(m_tree, m_functions[i]->returnType, fun->returnType) &&
+            AreArgumentListsEqual(m_tree, m_functions[i]->argument, fun->argument))
+        {
+            return m_functions[i];
+        }
+    }
+    return NULL;
+}
+
+void HLSLParser::DeclareVariable(const char* name, const HLSLType& type)
+{
+    if (m_variables.GetSize() == m_numGlobals)
+    {
+        ++m_numGlobals;
+    }
+    Variable& variable = m_variables.PushBackNew();
+    variable.name = name;
+    variable.type = type;
+}
+
+bool HLSLParser::GetIsFunction(const char* name) const
+{
+    for (int i = 0; i < m_functions.GetSize(); ++i)
+    {
+        // == is ok here because we're passed the strings through the string pool.
+        if (m_functions[i]->name == name)
+        {
+            return true;
+        }
+    }
+    for (int i = 0; i < _numIntrinsics; ++i)
+    {
+        // Intrinsic names are not in the string pool (since they are compile time
+        // constants, so we need full string compare).
+        if (String_Equal(name, _intrinsic[i].function.name))
+        {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+const HLSLFunction* HLSLParser::MatchFunctionCall(const HLSLFunctionCall* functionCall, const char* name)
+{
+    const HLSLFunction* matchedFunction     = NULL;
+
+    //int  numArguments           = functionCall->numArguments;
+    int  numMatchedOverloads    = 0;
+    bool nameMatches            = false;
+
+    // Get the user defined functions with the specified name.
+    for (int i = 0; i < m_functions.GetSize(); ++i)
+    {
+        const HLSLFunction* function = m_functions[i];
+        if (function->name == name)
+        {
+            nameMatches = true;
+            
+            CompareFunctionsResult result = CompareFunctions( m_tree, functionCall, function, matchedFunction );
+            if (result == Function1Better)
+            {
+                matchedFunction = function;
+                numMatchedOverloads = 1;
+            }
+            else if (result == FunctionsEqual)
+            {
+                ++numMatchedOverloads;
+            }
+        }
+    }
+
+    // Get the intrinsic functions with the specified name.
+    for (int i = 0; i < _numIntrinsics; ++i)
+    {
+        const HLSLFunction* function = &_intrinsic[i].function;
+        if (String_Equal(function->name, name))
+        {
+            nameMatches = true;
+
+            CompareFunctionsResult result = CompareFunctions( m_tree, functionCall, function, matchedFunction );
+            if (result == Function1Better)
+            {
+                matchedFunction = function;
+                numMatchedOverloads = 1;
+            }
+            else if (result == FunctionsEqual)
+            {
+                ++numMatchedOverloads;
+            }
+        }
+    }
+
+    if (matchedFunction != NULL && numMatchedOverloads > 1)
+    {
+        // Multiple overloads match.
+        m_tokenizer.Error("'%s' %d overloads have similar conversions", name, numMatchedOverloads);
+        return NULL;
+    }
+    else if (matchedFunction == NULL)
+    {
+        if (nameMatches)
+        {
+            m_tokenizer.Error("'%s' no overloaded function matched all of the arguments", name);
+        }
+        else
+        {
+            m_tokenizer.Error("Undeclared identifier '%s'", name);
+        }
+    }
+
+    return matchedFunction;
+}
+
+bool HLSLParser::GetMemberType(const HLSLType& objectType, HLSLMemberAccess * memberAccess)
+{
+    const char* fieldName = memberAccess->field;
+
+    if (objectType.baseType == HLSLBaseType_UserDefined)
+    {
+        const HLSLStruct* structure = FindUserDefinedType( objectType.typeName );
+        ASSERT(structure != NULL);
+
+        const HLSLStructField* field = structure->field;
+        while (field != NULL)
+        {
+            if (field->name == fieldName)
+            {
+                memberAccess->expressionType = field->type;
+                return true;
+            }
+            field = field->nextField;
+        }
+
+        return false;
+    }
+
+    if (_baseTypeDescriptions[objectType.baseType].numericType == NumericType_NaN)
+    {
+        // Currently we don't have an non-numeric types that allow member access.
+        return false;
+    }
+
+    int swizzleLength = 0;
+
+    if (_baseTypeDescriptions[objectType.baseType].numDimensions <= 1)
+    {
+        // Check for a swizzle on the scalar/vector types.
+        for (int i = 0; fieldName[i] != 0; ++i)
+        {
+            if (fieldName[i] != 'x' && fieldName[i] != 'y' && fieldName[i] != 'z' && fieldName[i] != 'w' &&
+                fieldName[i] != 'r' && fieldName[i] != 'g' && fieldName[i] != 'b' && fieldName[i] != 'a')
+            {
+                m_tokenizer.Error("Invalid swizzle '%s'", fieldName);
+                return false;
+            }
+            ++swizzleLength;
+        }
+        ASSERT(swizzleLength > 0);
+    }
+    else
+    {
+
+        // Check for a matrix element access (e.g. _m00 or _11)
+
+        const char* n = fieldName;
+        while (n[0] == '_')
+        {
+            ++n;
+            int base = 1;
+            if (n[0] == 'm')
+            {
+                base = 0;
+                ++n;
+            }
+            if (!isdigit(n[0]) || !isdigit(n[1]))
+            {
+                return false;
+            }
+
+            int r = (n[0] - '0') - base;
+            int c = (n[1] - '0') - base;
+            if (r >= _baseTypeDescriptions[objectType.baseType].height ||
+                c >= _baseTypeDescriptions[objectType.baseType].numComponents)
+            {
+                return false;
+            }
+            ++swizzleLength;
+            n += 2;
+
+        }
+
+        if (n[0] != 0)
+        {
+            return false;
+        }
+
+    }
+
+    if (swizzleLength > 4)
+    {
+        m_tokenizer.Error("Invalid swizzle '%s'", fieldName);
+        return false;
+    }
+
+    static const HLSLBaseType floatType[] = { HLSLBaseType_Float, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float4 };
+    static const HLSLBaseType halfType[]  = { HLSLBaseType_Half,  HLSLBaseType_Half2,  HLSLBaseType_Half3,  HLSLBaseType_Half4  };
+    static const HLSLBaseType intType[]   = { HLSLBaseType_Int,   HLSLBaseType_Int2,   HLSLBaseType_Int3,   HLSLBaseType_Int4   };
+    static const HLSLBaseType uintType[]  = { HLSLBaseType_Uint,  HLSLBaseType_Uint2,  HLSLBaseType_Uint3,  HLSLBaseType_Uint4  };
+    static const HLSLBaseType boolType[]  = { HLSLBaseType_Bool,  HLSLBaseType_Bool2,  HLSLBaseType_Bool3,  HLSLBaseType_Bool4  };
+    
+    switch (_baseTypeDescriptions[objectType.baseType].numericType)
+    {
+    case NumericType_Float:
+        memberAccess->expressionType.baseType = floatType[swizzleLength - 1];
+        break;
+    case NumericType_Half:
+        memberAccess->expressionType.baseType = halfType[swizzleLength - 1];
+        break;
+    case NumericType_Int:
+        memberAccess->expressionType.baseType = intType[swizzleLength - 1];
+        break;
+    case NumericType_Uint:
+        memberAccess->expressionType.baseType = uintType[swizzleLength - 1];
+            break;
+    case NumericType_Bool:
+        memberAccess->expressionType.baseType = boolType[swizzleLength - 1];
+            break;
+    default:
+        ASSERT(0);
+    }
+
+    memberAccess->swizzle = true;
+    
+    return true;
+}
+
+}
diff --git a/hlslparser/src/HLSLParser.h b/hlslparser/src/HLSLParser.h
new file mode 100644
index 00000000..b9a9991a
--- /dev/null
+++ b/hlslparser/src/HLSLParser.h
@@ -0,0 +1,146 @@
+//=============================================================================
+//
+// Render/HLSLParser.h
+//
+// Created by Max McGuire (max@unknownworlds.com)
+// Copyright (c) 2013, Unknown Worlds Entertainment, Inc.
+//
+//=============================================================================
+
+#ifndef HLSL_PARSER_H
+#define HLSL_PARSER_H
+
+//#include "Engine/StringPool.h"
+//#include "Engine/Array.h"
+#include "Engine.h"
+
+#include "HLSLTokenizer.h"
+#include "HLSLTree.h"
+
+namespace M4
+{
+
+struct EffectState;
+
+class HLSLParser
+{
+
+public:
+
+    HLSLParser(Allocator* allocator, const char* fileName, const char* buffer, size_t length);
+
+    bool Parse(HLSLTree* tree);
+
+private:
+
+    bool Accept(int token);
+    bool Expect(int token);
+
+    /**
+     * Special form of Accept for accepting a word that is not actually a token
+     * but will be treated like one. This is useful for HLSL keywords that are
+     * only tokens in specific contexts (like in/inout in parameter lists).
+     */
+    bool Accept(const char* token);
+    bool Expect(const char* token);
+
+    bool AcceptIdentifier(const char*& identifier);
+    bool ExpectIdentifier(const char*& identifier);
+    bool AcceptFloat(float& value);
+	bool AcceptHalf( float& value );
+    bool AcceptInt(int& value);
+    bool AcceptType(bool allowVoid, HLSLType& type);
+    bool ExpectType(bool allowVoid, HLSLType& type);
+    bool AcceptBinaryOperator(int priority, HLSLBinaryOp& binaryOp);
+    bool AcceptUnaryOperator(bool pre, HLSLUnaryOp& unaryOp);
+    bool AcceptAssign(HLSLBinaryOp& binaryOp);
+    bool AcceptTypeModifier(int & typeFlags);
+    bool AcceptInterpolationModifier(int& flags);
+
+    /**
+     * Handles a declaration like: "float2 name[5]". If allowUnsizedArray is true, it is
+     * is acceptable for the declaration to not specify the bounds of the array (i.e. name[]).
+     */
+    bool AcceptDeclaration(bool allowUnsizedArray, HLSLType& type, const char*& name);
+    bool ExpectDeclaration(bool allowUnsizedArray, HLSLType& type, const char*& name);
+
+    bool ParseTopLevel(HLSLStatement*& statement);
+    bool ParseBlock(HLSLStatement*& firstStatement, const HLSLType& returnType);
+    bool ParseStatementOrBlock(HLSLStatement*& firstStatement, const HLSLType& returnType, bool scoped = true);
+    bool ParseStatement(HLSLStatement*& statement, const HLSLType& returnType);
+    bool ParseDeclaration(HLSLDeclaration*& declaration);
+    bool ParseFieldDeclaration(HLSLStructField*& field);
+    //bool ParseBufferFieldDeclaration(HLSLBufferField*& field);
+    bool ParseExpression(HLSLExpression*& expression);
+    bool ParseBinaryExpression(int priority, HLSLExpression*& expression);
+    bool ParseTerminalExpression(HLSLExpression*& expression, bool& needsEndParen);
+    bool ParseExpressionList(int endToken, bool allowEmptyEnd, HLSLExpression*& firstExpression, int& numExpressions);
+    bool ParseArgumentList(HLSLArgument*& firstArgument, int& numArguments, int& numOutputArguments);
+    bool ParseDeclarationAssignment(HLSLDeclaration* declaration);
+    bool ParsePartialConstructor(HLSLExpression*& expression, HLSLBaseType type, const char* typeName);
+
+    bool ParseStateName(bool isSamplerState, bool isPipelineState, const char*& name, const EffectState *& state);
+    bool ParseColorMask(int& mask);
+    bool ParseStateValue(const EffectState * state, HLSLStateAssignment* stateAssignment);
+    bool ParseStateAssignment(HLSLStateAssignment*& stateAssignment, bool isSamplerState, bool isPipelineState);
+    bool ParseSamplerState(HLSLExpression*& expression);
+    bool ParseTechnique(HLSLStatement*& statement);
+    bool ParsePass(HLSLPass*& pass);
+    bool ParsePipeline(HLSLStatement*& pipeline);
+    bool ParseStage(HLSLStatement*& stage);
+    bool ParseComment(HLSLStatement*& statement);
+
+    bool ParseAttributeList(HLSLAttribute*& attribute);
+    bool ParseAttributeBlock(HLSLAttribute*& attribute);
+
+    bool CheckForUnexpectedEndOfStream(int endToken);
+
+    const HLSLStruct* FindUserDefinedType(const char* name) const;
+
+    void BeginScope();
+    void EndScope();
+
+    void DeclareVariable(const char* name, const HLSLType& type);
+
+    /** Returned pointer is only valid until Declare or Begin/EndScope is called. */
+    const HLSLType* FindVariable(const char* name, bool& global) const;
+
+    const HLSLFunction* FindFunction(const char* name) const;
+    const HLSLFunction* FindFunction(const HLSLFunction* fun) const;
+
+    bool GetIsFunction(const char* name) const;
+    
+    /** Finds the overloaded function that matches the specified call. */
+    const HLSLFunction* MatchFunctionCall(const HLSLFunctionCall* functionCall, const char* name);
+
+    /** Gets the type of the named field on the specified object type (fieldName can also specify a swizzle. ) */
+    bool GetMemberType(const HLSLType& objectType, HLSLMemberAccess * memberAccess);
+
+    bool CheckTypeCast(const HLSLType& srcType, const HLSLType& dstType);
+
+    const char* GetFileName();
+    int GetLineNumber() const;
+
+private:
+
+    struct Variable
+    {
+        const char*     name;
+        HLSLType        type;
+    };
+
+    HLSLTokenizer           m_tokenizer;
+    Array<HLSLStruct*>      m_userTypes;
+    Array<Variable>         m_variables;
+    Array<HLSLFunction*>    m_functions;
+    int                     m_numGlobals;
+
+    HLSLTree*               m_tree;
+    
+    bool                    m_allowUndeclaredIdentifiers = false;
+    bool                    m_disableSemanticValidation = false;
+};
+
+}
+
+#endif
diff --git a/hlslparser/src/HLSLTokenizer.cpp b/hlslparser/src/HLSLTokenizer.cpp
new file mode 100644
index 00000000..2fab535a
--- /dev/null
+++ b/hlslparser/src/HLSLTokenizer.cpp
@@ -0,0 +1,674 @@
+//#include "Engine/Log.h"
+//#include "Engine/String.h"
+#include "Engine.h"
+
+#include "HLSLTokenizer.h"
+
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+
+// mac only
+#include <syslog.h>
+
+namespace M4
+{
+
+bool skipSingleLineComments = true;
+
+
+// The order here must match the order in the Token enum.
+static const char* _reservedWords[] =
+    {
+        "float",
+        "float2",
+        "float3",
+        "float4",
+		"float2x2",
+        "float3x3",
+        "float4x4",
+        "float4x3",
+        "float4x2",
+        "half",
+        "half2",
+        "half3",
+        "half4",
+		"half2x2",
+        "half3x3",
+        "half4x4",
+        "half4x3",
+        "half4x2",
+        "bool",
+		"bool2",
+		"bool3",
+		"bool4",
+        "int",
+        "int2",
+        "int3",
+        "int4",
+        "uint",
+        "uint2",
+        "uint3",
+        "uint4",
+        "texture",
+        "sampler",
+        "sampler2D",
+        "sampler3D",
+        "samplerCUBE",
+        "sampler2DShadow",
+        "sampler2DMS",
+        "sampler2DArray",
+        "if",
+        "else",
+        "for",
+        "while",
+        "break",
+        "true",
+        "false",
+        "void",
+        "struct",
+        "cbuffer",
+        "tbuffer",
+        "register",
+        "return",
+        "continue",
+        "discard",
+        "const",
+        "static",
+        "inline",
+        "uniform",
+        "in",
+        "out",
+        "inout",
+        "sampler_state",
+        "technique",
+        "pass",
+    };
+
+static bool GetIsSymbol(char c)
+{
+    switch (c)
+    {
+    case ';':
+    case ':':
+    case '(': case ')':
+    case '[': case ']':
+    case '{': case '}':
+    case '-': case '+':
+    case '*': case '/':
+    case '?':
+    case '!':
+    case ',':
+    case '=':
+    case '.':
+    case '<': case '>':
+    case '|': case '&': case '^': case '~':
+    case '@':
+        return true;
+    }
+    return false;
+}
+
+/** Returns true if the character is a valid token separator at the end of a number type token */
+static bool GetIsNumberSeparator(char c)
+{
+    return c == 0 || isspace(c) || GetIsSymbol(c);
+}
+
+HLSLTokenizer::HLSLTokenizer(const char* fileName, const char* buffer, size_t length)
+{
+    m_buffer            = buffer;
+    m_bufferEnd         = buffer + length;
+    m_fileName          = fileName;
+    m_lineNumber        = 1;
+    m_tokenLineNumber   = 1;
+    m_error             = false;
+    Next();
+}
+
+void HLSLTokenizer::Next()
+{
+	while( SkipWhitespace() || SkipComment() || ScanLineDirective() || SkipPragmaDirective() )
+    {
+    }
+
+    if (m_error)
+    {
+        m_token = HLSLToken_EndOfStream;
+        return;
+    }
+
+    m_tokenLineNumber = m_lineNumber;
+
+    if (m_buffer >= m_bufferEnd || *m_buffer == '\0')
+    {
+        m_token = HLSLToken_EndOfStream;
+        return;
+    }
+
+    const char* start = m_buffer;
+
+    // single line comments
+    if (!skipSingleLineComments && (m_buffer[0] == '/' && m_buffer[1] == '/'))
+    {
+        m_token = HLSLToken_Comment;
+        m_buffer += 2;
+        
+        // skip the newline too, but would need to increment lineNumber
+        while (m_buffer < m_bufferEnd)
+        {
+            if (*(m_buffer++) == '\n')
+            {
+                ++m_lineNumber;
+                break;
+            }
+        }
+        
+        return;
+    }
+    
+    // +=, -=, *=, /=, ==, <=, >=
+    if (m_buffer[0] == '+' && m_buffer[1] == '=')
+    {
+        m_token = HLSLToken_PlusEqual;
+        m_buffer += 2;
+        return;
+    }
+    else if (m_buffer[0] == '-' && m_buffer[1] == '=')
+    {
+        m_token = HLSLToken_MinusEqual;
+        m_buffer += 2;
+        return;
+    }
+    else if (m_buffer[0] == '*' && m_buffer[1] == '=')
+    {
+        m_token = HLSLToken_TimesEqual;
+        m_buffer += 2;
+        return;
+    }
+    else if (m_buffer[0] == '/' && m_buffer[1] == '=')
+    {
+        m_token = HLSLToken_DivideEqual;
+        m_buffer += 2;
+        return;
+    }
+    else if (m_buffer[0] == '=' && m_buffer[1] == '=')
+    {
+        m_token = HLSLToken_EqualEqual;
+        m_buffer += 2;
+        return;
+    }
+    else if (m_buffer[0] == '!' && m_buffer[1] == '=')
+    {
+        m_token = HLSLToken_NotEqual;
+        m_buffer += 2;
+        return;
+    }
+    else if (m_buffer[0] == '<' && m_buffer[1] == '=')
+    {
+        m_token = HLSLToken_LessEqual;
+        m_buffer += 2;
+        return;
+    }
+    else if (m_buffer[0] == '>' && m_buffer[1] == '=')
+    {
+        m_token = HLSLToken_GreaterEqual;
+        m_buffer += 2;
+        return;
+    }
+    else if (m_buffer[0] == '&' && m_buffer[1] == '&')
+    {
+        m_token = HLSLToken_LogicalAnd;
+        m_buffer += 2;
+        return;
+    }
+    else if (m_buffer[0] == '|' && m_buffer[1] == '|')
+    {
+        m_token = HLSLToken_LogicalOr;
+        m_buffer += 2;
+        return;
+    }
+
+    // ++, --
+    if ((m_buffer[0] == '-' || m_buffer[1] == '-'))
+    {
+        m_token = HLSLToken_MinusMinus;
+        m_buffer += 2;
+        return;
+    }
+    if ((m_buffer[0] == '+' || m_buffer[1] == '+'))
+    {
+        m_token = HLSLToken_PlusPlus;
+        m_buffer += 2;
+        return;
+    }
+    
+    // Check for the start of a number.
+    if (ScanNumber())
+    {
+        return;
+    }
+    
+    if (GetIsSymbol(m_buffer[0]))
+    {
+        m_token = static_cast<unsigned char>(m_buffer[0]);
+        ++m_buffer;
+        return;
+    }
+
+    // Must be an identifier or a reserved word.
+    while (m_buffer < m_bufferEnd && m_buffer[0] != 0 && !GetIsSymbol(m_buffer[0]) && !isspace(m_buffer[0]))
+    {
+        ++m_buffer;
+    }
+
+    size_t length = m_buffer - start;
+    memcpy(m_identifier, start, length);
+    m_identifier[length] = 0;
+    
+    const int numReservedWords = sizeof(_reservedWords) / sizeof(const char*);
+    for (int i = 0; i < numReservedWords; ++i)
+    {
+        if (strcmp(_reservedWords[i], m_identifier) == 0)
+        {
+            m_token = 256 + i;
+            return;
+        }
+    }
+
+    m_token = HLSLToken_Identifier;
+
+}
+
+bool HLSLTokenizer::SkipWhitespace()
+{
+    bool result = false;
+    while (m_buffer < m_bufferEnd && isspace(m_buffer[0]))
+    {
+        result = true;
+        if (m_buffer[0] == '\n')
+        {
+            ++m_lineNumber;
+        }
+        ++m_buffer;
+    }
+    return result;
+}
+
+bool HLSLTokenizer::SkipComment()
+{
+    bool result = false;
+    if (m_buffer[0] == '/')
+    {
+        if (skipSingleLineComments && m_buffer[1] == '/')
+        {
+            // Single line comment.
+            result = true;
+            m_buffer += 2;
+            while (m_buffer < m_bufferEnd)
+            {
+                if (*(m_buffer++) == '\n')
+                {
+                    ++m_lineNumber;
+                    break;
+                }
+            }
+        }
+        else if (m_buffer[1] == '*')
+        {
+            // Multi-line comment.
+            result = true;
+            m_buffer += 2;
+            while (m_buffer < m_bufferEnd)
+            {
+                if (m_buffer[0] == '\n')
+                {
+                    ++m_lineNumber;
+                }
+                if (m_buffer[0] == '*' && m_buffer[1] == '/')
+                {
+                    break;
+                }
+                ++m_buffer;
+            }
+            if (m_buffer < m_bufferEnd)
+            {
+                m_buffer += 2;
+            }
+        }
+    }
+    return result;
+}
+
+bool HLSLTokenizer::SkipPragmaDirective()
+{
+	bool result = false;
+	if( m_bufferEnd - m_buffer > 7 && *m_buffer == '#' )
+	{
+		const char* ptr = m_buffer + 1;
+		while( isspace( *ptr ) )
+			ptr++;
+
+		if( strncmp( ptr, "pragma", 6 ) == 0 && isspace( ptr[ 6 ] ) )
+		{
+			m_buffer = ptr + 6;
+			result = true;
+			while( m_buffer < m_bufferEnd )
+			{
+				if( *( m_buffer++ ) == '\n' )
+				{
+					++m_lineNumber;
+					break;
+				}
+			}
+		}
+	}
+	return result;
+}
+
+bool HLSLTokenizer::ScanNumber()
+{
+
+    // Don't treat the + or - as part of the number.
+    if (m_buffer[0] == '+' || m_buffer[0] == '-')
+    {
+        return false;
+    }
+
+    // Parse hex literals.
+    if (m_bufferEnd - m_buffer > 2 && m_buffer[0] == '0' && m_buffer[1] == 'x')
+    {
+        char*   hEnd = NULL;
+        int     iValue = (int)strtol(m_buffer+2, &hEnd, 16);
+        if (GetIsNumberSeparator(hEnd[0]))
+        {
+            m_buffer = hEnd;
+            m_token  = HLSLToken_IntLiteral;
+            m_iValue = iValue;
+            return true;
+        }
+    }
+
+    char* fEnd = NULL;
+    double fValue = String_ToDouble(m_buffer, &fEnd);
+
+    if (fEnd == m_buffer)
+    {
+        return false;
+    }
+
+    char*  iEnd = NULL;
+    int    iValue = String_ToInteger(m_buffer, &iEnd);
+
+    // If the character after the number is an f then the f is treated as part
+    // of the number (to handle 1.0f syntax).
+	if( ( fEnd[ 0 ] == 'f' || fEnd[ 0 ] == 'h' ) && fEnd < m_bufferEnd )
+	{
+		++fEnd;
+	}
+
+	if( fEnd > iEnd && GetIsNumberSeparator( fEnd[ 0 ] ) )
+	{
+		m_buffer = fEnd;
+		m_token = fEnd[ 0 ] == 'h' ? HLSLToken_HalfLiteral : HLSLToken_FloatLiteral;
+        m_fValue = static_cast<float>(fValue);
+        return true;
+    }
+    else if (iEnd > m_buffer && GetIsNumberSeparator(iEnd[0]))
+    {
+        m_buffer = iEnd;
+        m_token  = HLSLToken_IntLiteral;
+        m_iValue = iValue;
+        return true;
+    }
+
+    return false;
+}
+
+bool HLSLTokenizer::ScanLineDirective()
+{
+    
+    if (m_bufferEnd - m_buffer > 5 && strncmp(m_buffer, "#line", 5) == 0 && isspace(m_buffer[5]))
+    {
+
+        m_buffer += 5;
+        
+        while (m_buffer < m_bufferEnd && isspace(m_buffer[0]))
+        {
+            if (m_buffer[0] == '\n')
+            {
+                Error("Syntax error: expected line number after #line");
+                return false;
+            }
+            ++m_buffer;
+        }
+
+        char* iEnd = NULL;
+        int lineNumber = String_ToInteger(m_buffer, &iEnd);
+
+        if (!isspace(*iEnd))
+        {
+            Error("Syntax error: expected line number after #line");
+            return false;
+        }
+
+        m_buffer = iEnd;
+        while (m_buffer < m_bufferEnd && isspace(m_buffer[0]))
+        {
+            char c = m_buffer[0];
+            ++m_buffer;
+            if (c == '\n')
+            {
+                m_lineNumber = lineNumber;
+                return true;
+            }
+        }
+
+        if (m_buffer >= m_bufferEnd)
+        {
+            m_lineNumber = lineNumber;
+            return true;
+        }
+
+        if (m_buffer[0] != '"')
+        {
+            Error("Syntax error: expected '\"' after line number near #line");
+            return false;
+        }
+            
+        ++m_buffer;
+        
+        int i = 0;
+        while (i + 1 < s_maxIdentifier && m_buffer < m_bufferEnd && m_buffer[0] != '"')
+        {
+            if (m_buffer[0] == '\n')
+            {
+                Error("Syntax error: expected '\"' before end of line near #line");
+                return false;
+            }
+
+            m_lineDirectiveFileName[i] = *m_buffer;
+            ++m_buffer;
+            ++i;
+        }
+        
+        m_lineDirectiveFileName[i] = 0;
+        
+        if (m_buffer >= m_bufferEnd)
+        {
+            Error("Syntax error: expected '\"' before end of file near #line");
+            return false;
+        }
+
+        if (i + 1 >= s_maxIdentifier)
+        {
+            Error("Syntax error: file name too long near #line");
+            return false;
+        }
+
+        // Skip the closing quote
+        ++m_buffer;
+        
+        while (m_buffer < m_bufferEnd && m_buffer[0] != '\n')
+        {
+            if (!isspace(m_buffer[0]))
+            {
+                Error("Syntax error: unexpected input after file name near #line");
+                return false;
+            }
+            ++m_buffer;
+        }
+
+        // Skip new line
+        ++m_buffer;
+
+        m_lineNumber = lineNumber;
+        m_fileName = m_lineDirectiveFileName;
+
+        return true;
+
+    }
+
+    return false;
+
+}
+
+int HLSLTokenizer::GetToken() const
+{
+    return m_token;
+}
+
+float HLSLTokenizer::GetFloat() const
+{
+    return m_fValue;
+}
+
+int HLSLTokenizer::GetInt() const
+{
+    return m_iValue;
+}
+
+const char* HLSLTokenizer::GetIdentifier() const
+{
+    return m_identifier;
+}
+
+int HLSLTokenizer::GetLineNumber() const
+{
+    return m_tokenLineNumber;
+}
+
+const char* HLSLTokenizer::GetFileName() const
+{
+    return m_fileName;
+}
+
+void HLSLTokenizer::Error(const char* format, ...)
+{
+    // It's not always convenient to stop executing when an error occurs,
+    // so just track once we've hit an error and stop reporting them until
+    // we successfully bail out of execution.
+    if (m_error)
+    {
+        return;
+    }
+    m_error = true;
+
+
+    char buffer[1024];
+    va_list args;
+    va_start(args, format);
+    /*int result =*/ vsnprintf(buffer, sizeof(buffer) - 1, format, args);
+    va_end(args);
+
+    // Make running in a build step does generate a clickable error/warning.
+    // But an app spitting out to Xcode console is not clickable.
+    
+    //Log_Error("%s(%d): error %s\n", m_fileName, m_lineNumber, buffer);
+    Log_Error("%s(%d): %s\n", m_fileName, m_lineNumber, buffer);
+    
+    // try syslog and os_log
+    // syslog(LOG_ERR, "%s(%d): %s\n", m_fileName, m_lineNumber, buffer);
+} 
+
+void HLSLTokenizer::GetTokenName(char buffer[s_maxIdentifier]) const
+{
+    if (m_token == HLSLToken_FloatLiteral || m_token == HLSLToken_HalfLiteral )
+    {
+        snprintf(buffer, s_maxIdentifier, "%f", m_fValue);
+        
+        String_StripTrailingFloatZeroes(buffer);
+    }
+    else if (m_token == HLSLToken_IntLiteral)
+    {
+        snprintf(buffer, s_maxIdentifier, "%d", m_iValue);
+    }
+    else if (m_token == HLSLToken_Identifier)
+    {
+        strlcpy(buffer, m_identifier, s_maxIdentifier); // TODO: Alec, put in alt for Win
+    }
+    else
+    {
+        GetTokenName(m_token, buffer);
+    }
+}
+
+void HLSLTokenizer::GetTokenName(int token, char buffer[s_maxIdentifier])
+{
+    if (token < 256)
+    {
+        buffer[0] = (char)token;
+        buffer[1] = 0;
+    }
+    else if (token < HLSLToken_LessEqual)
+    {
+        strcpy(buffer, _reservedWords[token - 256]);
+    }
+    else
+    {
+        switch (token)
+        {
+        case HLSLToken_PlusPlus:
+            strcpy(buffer, "++");
+            break;
+        case HLSLToken_MinusMinus:
+            strcpy(buffer, "--");
+            break;
+        case HLSLToken_PlusEqual:
+            strcpy(buffer, "+=");
+            break;
+        case HLSLToken_MinusEqual:
+            strcpy(buffer, "-=");
+            break;
+        case HLSLToken_TimesEqual:
+            strcpy(buffer, "*=");
+            break;
+        case HLSLToken_DivideEqual:
+            strcpy(buffer, "/=");
+            break;
+		case HLSLToken_HalfLiteral:
+			strcpy( buffer, "half" );
+			break;
+        case HLSLToken_FloatLiteral:
+            strcpy(buffer, "float");
+            break;
+        case HLSLToken_IntLiteral:
+            strcpy(buffer, "int");
+            break;
+        case HLSLToken_Identifier:
+            strcpy(buffer, "identifier");
+            break;
+        case HLSLToken_EndOfStream:
+            strcpy(buffer, "<eof>");
+            break;
+        default:
+            strcpy(buffer, "unknown");
+            break;
+        }
+    }
+
+}
+
+}
diff --git a/hlslparser/src/HLSLTokenizer.h b/hlslparser/src/HLSLTokenizer.h
new file mode 100644
index 00000000..9187639a
--- /dev/null
+++ b/hlslparser/src/HLSLTokenizer.h
@@ -0,0 +1,173 @@
+#ifndef HLSL_TOKENIZER_H
+#define HLSL_TOKENIZER_H
+
+namespace M4
+{
+
+/** In addition to the values in this enum, all of the ASCII characters are
+valid tokens. */
+enum HLSLToken
+{
+    // Built-in types.
+    HLSLToken_Float         = 256,
+    HLSLToken_Float2,
+    HLSLToken_Float3,
+    HLSLToken_Float4,
+	HLSLToken_Float2x2,
+    HLSLToken_Float3x3,
+    HLSLToken_Float4x4,
+    HLSLToken_Float4x3,
+    HLSLToken_Float4x2,
+    HLSLToken_Half,
+    HLSLToken_Half2,
+    HLSLToken_Half3,
+    HLSLToken_Half4,
+	HLSLToken_Half2x2,
+    HLSLToken_Half3x3,
+    HLSLToken_Half4x4,
+    HLSLToken_Half4x3,
+    HLSLToken_Half4x2,
+    HLSLToken_Bool,
+	HLSLToken_Bool2,
+	HLSLToken_Bool3,
+	HLSLToken_Bool4,
+    HLSLToken_Int,
+    HLSLToken_Int2,
+    HLSLToken_Int3,
+    HLSLToken_Int4,
+    HLSLToken_Uint,
+    HLSLToken_Uint2,
+    HLSLToken_Uint3,
+    HLSLToken_Uint4,
+    HLSLToken_Texture,
+    HLSLToken_Sampler,
+    HLSLToken_Sampler2D,
+    HLSLToken_Sampler3D,
+    HLSLToken_SamplerCube,
+    HLSLToken_Sampler2DShadow,
+    HLSLToken_Sampler2DMS,
+    HLSLToken_Sampler2DArray,
+
+    // Reserved words.
+    HLSLToken_If,
+    HLSLToken_Else,
+    HLSLToken_For,
+    HLSLToken_While,
+    HLSLToken_Break,
+    HLSLToken_True,
+    HLSLToken_False,
+    HLSLToken_Void,
+    HLSLToken_Struct,
+    HLSLToken_CBuffer,
+    HLSLToken_TBuffer,
+    HLSLToken_Register,
+    HLSLToken_Return,
+    HLSLToken_Continue,
+    HLSLToken_Discard,
+    HLSLToken_Const,
+    HLSLToken_Static,
+    HLSLToken_Inline,
+
+    // Input modifiers.
+    HLSLToken_Uniform,
+    HLSLToken_In,
+    HLSLToken_Out,
+    HLSLToken_InOut,
+
+    // Effect keywords.
+    HLSLToken_SamplerState,
+    HLSLToken_Technique,
+    HLSLToken_Pass,
+
+    // Multi-character symbols.
+    HLSLToken_LessEqual,
+    HLSLToken_GreaterEqual,
+    HLSLToken_EqualEqual,
+    HLSLToken_NotEqual,
+    HLSLToken_PlusPlus,
+    HLSLToken_MinusMinus,
+    HLSLToken_PlusEqual,
+    HLSLToken_MinusEqual,
+    HLSLToken_TimesEqual,
+    HLSLToken_DivideEqual,
+    HLSLToken_LogicalAnd,       // &&
+    HLSLToken_LogicalOr,        // ||
+    
+    // Other token types.
+    HLSLToken_FloatLiteral,
+	HLSLToken_HalfLiteral,
+    HLSLToken_IntLiteral,
+    HLSLToken_Identifier,
+    HLSLToken_Comment,          //
+    
+    HLSLToken_EndOfStream,
+};
+
+class HLSLTokenizer
+{
+
+public:
+
+    /// Maximum string length of an identifier.
+    static const int s_maxIdentifier = 255 + 1;
+
+    /** The file name is only used for error reporting. */
+    HLSLTokenizer(const char* fileName, const char* buffer, size_t length);
+
+    /** Advances to the next token in the stream. */
+    void Next();
+
+    /** Returns the current token in the stream. */
+    int GetToken() const;
+
+    /** Returns the number of the current token. */
+    float GetFloat() const;
+    int   GetInt() const;
+
+    /** Returns the identifier for the current token. */
+    const char* GetIdentifier() const;
+
+    /** Returns the line number where the current token began. */
+    int GetLineNumber() const;
+
+    /** Returns the file name where the current token began. */
+    const char* GetFileName() const;
+
+    /** Gets a human readable text description of the current token. */
+    void GetTokenName(char buffer[s_maxIdentifier]) const;
+
+    /** Reports an error using printf style formatting. The current line number
+    is included. Only the first error reported will be output. */
+    void Error(const char* format, ...);
+
+    /** Gets a human readable text description of the specified token. */
+    static void GetTokenName(int token, char buffer[s_maxIdentifier]);
+
+private:
+
+    bool SkipWhitespace();
+    bool SkipComment();
+	bool SkipPragmaDirective();
+    bool ScanNumber();
+    bool ScanLineDirective();
+
+private:
+
+    const char*         m_fileName;
+    const char*         m_buffer;
+    const char*         m_bufferEnd;
+    int                 m_lineNumber;
+    bool                m_error;
+
+    int                 m_token;
+    float               m_fValue;
+    int                 m_iValue;
+    char                m_identifier[s_maxIdentifier];
+    char                m_lineDirectiveFileName[s_maxIdentifier];
+    int                 m_tokenLineNumber;
+
+};
+
+}
+
+#endif
diff --git a/hlslparser/src/HLSLTree.cpp b/hlslparser/src/HLSLTree.cpp
new file mode 100644
index 00000000..301a754c
--- /dev/null
+++ b/hlslparser/src/HLSLTree.cpp
@@ -0,0 +1,1973 @@
+//#include "Engine/Assert.h"
+#include "Engine.h"
+
+#include "HLSLTree.h"
+
+namespace M4
+{
+
+const HLSLTypeDimension BaseTypeDimension[HLSLBaseType_Count] =
+{
+    HLSLTypeDimension_None,     // HLSLBaseType_Unknown,
+    HLSLTypeDimension_None,     // HLSLBaseType_Void,
+    
+    HLSLTypeDimension_Scalar,   // HLSLBaseType_Float,
+    HLSLTypeDimension_Vector2,  // HLSLBaseType_Float2,
+    HLSLTypeDimension_Vector3,  // HLSLBaseType_Float3,
+    HLSLTypeDimension_Vector4,  // HLSLBaseType_Float4,
+    HLSLTypeDimension_Matrix2x2,// HLSLBaseType_Float2x2,
+    HLSLTypeDimension_Matrix3x3,// HLSLBaseType_Float3x3,
+    HLSLTypeDimension_Matrix4x4,// HLSLBaseType_Float4x4,
+    HLSLTypeDimension_Matrix4x3,// HLSLBaseType_Float4x3,
+    HLSLTypeDimension_Matrix4x2,// HLSLBaseType_Float4x2,
+    HLSLTypeDimension_Scalar,   // HLSLBaseType_Half,
+    HLSLTypeDimension_Vector2,  // HLSLBaseType_Half2,
+    HLSLTypeDimension_Vector3,  // HLSLBaseType_Half3,
+    HLSLTypeDimension_Vector4,  // HLSLBaseType_Half4,
+    HLSLTypeDimension_Matrix2x2,// HLSLBaseType_Half2x2,
+    HLSLTypeDimension_Matrix3x3,// HLSLBaseType_Half3x3,
+    HLSLTypeDimension_Matrix4x4,// HLSLBaseType_Half4x4,
+    HLSLTypeDimension_Matrix4x3,// HLSLBaseType_Half4x3,
+    HLSLTypeDimension_Matrix4x2,// HLSLBaseType_Half4x2,
+    HLSLTypeDimension_Scalar,   // HLSLBaseType_Bool,
+    HLSLTypeDimension_Vector2,  // HLSLBaseType_Bool2,
+    HLSLTypeDimension_Vector3,  // HLSLBaseType_Bool3,
+    HLSLTypeDimension_Vector4,  // HLSLBaseType_Bool4,
+    HLSLTypeDimension_Scalar,   // HLSLBaseType_Int,
+    HLSLTypeDimension_Vector2,  // HLSLBaseType_Int2,
+    HLSLTypeDimension_Vector3,  // HLSLBaseType_Int3,
+    HLSLTypeDimension_Vector4,  // HLSLBaseType_Int4,
+    HLSLTypeDimension_Scalar,   // HLSLBaseType_Uint,
+    HLSLTypeDimension_Vector2,  // HLSLBaseType_Uint2,
+    HLSLTypeDimension_Vector3,  // HLSLBaseType_Uint3,
+    HLSLTypeDimension_Vector4,  // HLSLBaseType_Uint4,
+    
+    HLSLTypeDimension_None,     // HLSLBaseType_Texture,
+    
+    HLSLTypeDimension_None,     // HLSLBaseType_Sampler,           // @@ use type inference to determine sampler type.
+    HLSLTypeDimension_None,     // HLSLBaseType_Sampler2D,
+    HLSLTypeDimension_None,     // HLSLBaseType_Sampler3D,
+    HLSLTypeDimension_None,     // HLSLBaseType_SamplerCube,
+    HLSLTypeDimension_None,     // HLSLBaseType_Sampler2DShadow,
+    HLSLTypeDimension_None,     // HLSLBaseType_Sampler2DMS,
+    HLSLTypeDimension_None,     // HLSLBaseType_Sampler2DArray,
+    
+    HLSLTypeDimension_None,     // HLSLBaseType_UserDefined,       // struct
+    HLSLTypeDimension_None,     // HLSLBaseType_Expression,        // type argument for defined() sizeof() and typeof().
+    HLSLTypeDimension_None,     // HLSLBaseType_Auto,
+};
+
+const HLSLBaseType ScalarBaseType[HLSLBaseType_Count] = {
+    HLSLBaseType_Unknown,       // HLSLBaseType_Unknown,
+    HLSLBaseType_Void,          // HLSLBaseType_Void,
+    
+    HLSLBaseType_Float,         // HLSLBaseType_Float,
+    HLSLBaseType_Float,         // HLSLBaseType_Float2,
+    HLSLBaseType_Float,         // HLSLBaseType_Float3,
+    HLSLBaseType_Float,         // HLSLBaseType_Float4,
+    HLSLBaseType_Float,         // HLSLBaseType_Float2x2,
+    HLSLBaseType_Float,         // HLSLBaseType_Float3x3,
+    HLSLBaseType_Float,         // HLSLBaseType_Float4x4,
+    HLSLBaseType_Float,         // HLSLBaseType_Float4x3,
+    HLSLBaseType_Float,         // HLSLBaseType_Float4x2,
+    HLSLBaseType_Half,          // HLSLBaseType_Half,
+    HLSLBaseType_Half,          // HLSLBaseType_Half2,
+    HLSLBaseType_Half,          // HLSLBaseType_Half3,
+    HLSLBaseType_Half,          // HLSLBaseType_Half4,
+    HLSLBaseType_Half,          // HLSLBaseType_Half2x2,
+    HLSLBaseType_Half,          // HLSLBaseType_Half3x3,
+    HLSLBaseType_Half,          // HLSLBaseType_Half4x4,
+    HLSLBaseType_Half,          // HLSLBaseType_Half4x3,
+    HLSLBaseType_Half,          // HLSLBaseType_Half4x2,
+    HLSLBaseType_Bool,          // HLSLBaseType_Bool,
+    HLSLBaseType_Bool,          // HLSLBaseType_Bool2,
+    HLSLBaseType_Bool,          // HLSLBaseType_Bool3,
+    HLSLBaseType_Bool,          // HLSLBaseType_Bool4,
+    HLSLBaseType_Int,           // HLSLBaseType_Int,
+    HLSLBaseType_Int,           // HLSLBaseType_Int2,
+    HLSLBaseType_Int,           // HLSLBaseType_Int3,
+    HLSLBaseType_Int,           // HLSLBaseType_Int4,
+    HLSLBaseType_Uint,          // HLSLBaseType_Uint,
+    HLSLBaseType_Uint,          // HLSLBaseType_Uint2,
+    HLSLBaseType_Uint,          // HLSLBaseType_Uint3,
+    HLSLBaseType_Uint,          // HLSLBaseType_Uint4,
+    
+    HLSLBaseType_Unknown,       // HLSLBaseType_Texture,
+    
+    HLSLBaseType_Unknown,       // HLSLBaseType_Sampler,           // @@ use type inference to determine sampler type.
+    HLSLBaseType_Unknown,       // HLSLBaseType_Sampler2D,
+    HLSLBaseType_Unknown,       // HLSLBaseType_Sampler3D,
+    HLSLBaseType_Unknown,       // HLSLBaseType_SamplerCube,
+    HLSLBaseType_Unknown,       // HLSLBaseType_Sampler2DShadow,
+    HLSLBaseType_Unknown,       // HLSLBaseType_Sampler2DMS,
+    HLSLBaseType_Unknown,       // HLSLBaseType_Sampler2DArray,
+    HLSLBaseType_Unknown,       // HLSLBaseType_UserDefined,       // struct
+    
+    HLSLBaseType_Unknown,       // HLSLBaseType_Expression,        // type argument for defined() sizeof() and typeof().
+    HLSLBaseType_Unknown,       // HLSLBaseType_Auto,
+};
+
+
+HLSLTree::HLSLTree(Allocator* allocator) :
+    m_allocator(allocator), m_stringPool(allocator)
+{
+    m_firstPage         = m_allocator->New<NodePage>();
+    m_firstPage->next   = NULL;
+
+    m_currentPage       = m_firstPage;
+    m_currentPageOffset = 0;
+
+    m_root              = AddNode<HLSLRoot>(NULL, 1);
+}
+
+HLSLTree::~HLSLTree()
+{
+    NodePage* page = m_firstPage;
+    while (page != NULL)
+    {
+        NodePage* next = page->next;
+        m_allocator->Delete(page);
+        page = next;
+    }
+}
+
+void HLSLTree::AllocatePage()
+{
+    NodePage* newPage    = m_allocator->New<NodePage>();
+    newPage->next        = NULL;
+    m_currentPage->next  = newPage;
+    m_currentPageOffset  = 0;
+    m_currentPage        = newPage;
+}
+
+const char* HLSLTree::AddString(const char* string)
+{   
+    return m_stringPool.AddString(string);
+}
+
+const char* HLSLTree::AddStringFormat(const char* format, ...)
+{
+    va_list args;
+    va_start(args, format);
+    const char * string = m_stringPool.AddStringFormatList(format, args);
+    va_end(args);
+    return string;
+}
+
+bool HLSLTree::GetContainsString(const char* string) const
+{
+    return m_stringPool.GetContainsString(string);
+}
+
+HLSLRoot* HLSLTree::GetRoot() const
+{
+    return m_root;
+}
+
+void* HLSLTree::AllocateMemory(size_t size)
+{
+    if (m_currentPageOffset + size > s_nodePageSize)
+    {
+        AllocatePage();
+    }
+    void* buffer = m_currentPage->buffer + m_currentPageOffset;
+    m_currentPageOffset += size;
+    return buffer;
+}
+
+// @@ This doesn't do any parameter matching. Simply returns the first function with that name.
+HLSLFunction * HLSLTree::FindFunction(const char * name)
+{
+    HLSLStatement * statement = m_root->statement;
+    while (statement != NULL)
+    {
+        if (statement->nodeType == HLSLNodeType_Function)
+        {
+            HLSLFunction * function = (HLSLFunction *)statement;
+            if (String_Equal(name, function->name))
+            {
+                return function;
+            }
+        }
+
+        statement = statement->nextStatement;
+    }
+
+    return NULL;
+}
+
+HLSLDeclaration * HLSLTree::FindGlobalDeclaration(const char * name, HLSLBuffer ** buffer_out/*=NULL*/)
+{
+    HLSLStatement * statement = m_root->statement;
+    while (statement != NULL)
+    {
+        if (statement->nodeType == HLSLNodeType_Declaration)
+        {
+            HLSLDeclaration * declaration = (HLSLDeclaration *)statement;
+            if (String_Equal(name, declaration->name))
+            {
+                if (buffer_out) *buffer_out = NULL;
+                return declaration;
+            }
+        }
+        else if (statement->nodeType == HLSLNodeType_Buffer)
+        {
+            HLSLBuffer* buffer = (HLSLBuffer*)statement;
+
+            HLSLDeclaration* field = buffer->field;
+            while (field != NULL)
+            {
+                ASSERT(field->nodeType == HLSLNodeType_Declaration);
+                if (String_Equal(name, field->name))
+                {
+                    if (buffer_out) *buffer_out = buffer;
+                    return field;
+                }
+                field = (HLSLDeclaration*)field->nextStatement;
+            }
+        }
+
+        statement = statement->nextStatement;
+    }
+
+    if (buffer_out) *buffer_out = NULL;
+    return NULL;
+}
+
+HLSLStruct * HLSLTree::FindGlobalStruct(const char * name)
+{
+    HLSLStatement * statement = m_root->statement;
+    while (statement != NULL)
+    {
+        if (statement->nodeType == HLSLNodeType_Struct)
+        {
+            HLSLStruct * declaration = (HLSLStruct *)statement;
+            if (String_Equal(name, declaration->name))
+            {
+                return declaration;
+            }
+        }
+
+        statement = statement->nextStatement;
+    }
+
+    return NULL;
+}
+
+HLSLTechnique * HLSLTree::FindTechnique(const char * name)
+{
+    HLSLStatement * statement = m_root->statement;
+    while (statement != NULL)
+    {
+        if (statement->nodeType == HLSLNodeType_Technique)
+        {
+            HLSLTechnique * technique = (HLSLTechnique *)statement;
+            if (String_Equal(name, technique->name))
+            {
+                return technique;
+            }
+        }
+
+        statement = statement->nextStatement;
+    }
+
+    return NULL;
+}
+
+HLSLPipeline * HLSLTree::FindFirstPipeline()
+{
+    return FindNextPipeline(NULL);
+}
+
+HLSLPipeline * HLSLTree::FindNextPipeline(HLSLPipeline * current)
+{
+    HLSLStatement * statement = current ? current : m_root->statement;
+    while (statement != NULL)
+    {
+        if (statement->nodeType == HLSLNodeType_Pipeline)
+        {
+            return (HLSLPipeline *)statement;
+        }
+
+        statement = statement->nextStatement;
+    }
+
+    return NULL;
+}
+
+HLSLPipeline * HLSLTree::FindPipeline(const char * name)
+{
+    HLSLStatement * statement = m_root->statement;
+    while (statement != NULL)
+    {
+        if (statement->nodeType == HLSLNodeType_Pipeline)
+        {
+            HLSLPipeline * pipeline = (HLSLPipeline *)statement;
+            if (String_Equal(name, pipeline->name))
+            {
+                return pipeline;
+            }
+        }
+
+        statement = statement->nextStatement;
+    }
+
+    return NULL;
+}
+
+HLSLBuffer * HLSLTree::FindBuffer(const char * name)
+{
+    HLSLStatement * statement = m_root->statement;
+    while (statement != NULL)
+    {
+        if (statement->nodeType == HLSLNodeType_Buffer)
+        {
+            HLSLBuffer * buffer = (HLSLBuffer *)statement;
+            if (String_Equal(name, buffer->name))
+            {
+                return buffer;
+            }
+        }
+
+        statement = statement->nextStatement;
+    }
+
+    return NULL;
+}
+
+
+
+bool HLSLTree::GetExpressionValue(HLSLExpression * expression, int & value)
+{
+    ASSERT (expression != NULL);
+
+    // Expression must be constant.
+    if ((expression->expressionType.flags & HLSLTypeFlag_Const) == 0) 
+    {
+        return false;
+    }
+
+    // We are expecting an integer scalar. @@ Add support for type conversion from other scalar types.
+    if (expression->expressionType.baseType != HLSLBaseType_Int &&
+        expression->expressionType.baseType != HLSLBaseType_Bool)
+    {
+        return false;
+    }
+
+    if (expression->expressionType.array) 
+    {
+        return false;
+    }
+
+    if (expression->nodeType == HLSLNodeType_BinaryExpression) 
+    {
+        HLSLBinaryExpression * binaryExpression = (HLSLBinaryExpression *)expression;
+
+        int value1, value2;
+        if (!GetExpressionValue(binaryExpression->expression1, value1) ||
+            !GetExpressionValue(binaryExpression->expression2, value2))
+        {
+            return false;
+        }
+
+        switch(binaryExpression->binaryOp)
+        {
+            case HLSLBinaryOp_And:
+                value = value1 && value2;
+                return true;
+            case HLSLBinaryOp_Or:
+                value = value1 || value2;
+                return true;
+            case HLSLBinaryOp_Add:
+                value = value1 + value2;
+                return true;
+            case HLSLBinaryOp_Sub:
+                value = value1 - value2;
+                return true;
+            case HLSLBinaryOp_Mul:
+                value = value1 * value2;
+                return true;
+            case HLSLBinaryOp_Div:
+                value = value1 / value2;
+                return true;
+            case HLSLBinaryOp_Less:
+                value = value1 < value2;
+                return true;
+            case HLSLBinaryOp_Greater:
+                value = value1 > value2;
+                return true;
+            case HLSLBinaryOp_LessEqual:
+                value = value1 <= value2;
+                return true;
+            case HLSLBinaryOp_GreaterEqual:
+                value = value1 >= value2;
+                return true;
+            case HLSLBinaryOp_Equal:
+                value = value1 == value2;
+                return true;
+            case HLSLBinaryOp_NotEqual:
+                value = value1 != value2;
+                return true;
+            case HLSLBinaryOp_BitAnd:
+                value = value1 & value2;
+                return true;
+            case HLSLBinaryOp_BitOr:
+                value = value1 | value2;
+                return true;
+            case HLSLBinaryOp_BitXor:
+                value = value1 ^ value2;
+                return true;
+            case HLSLBinaryOp_Assign:
+            case HLSLBinaryOp_AddAssign:
+            case HLSLBinaryOp_SubAssign:
+            case HLSLBinaryOp_MulAssign:
+            case HLSLBinaryOp_DivAssign:
+                // IC: These are not valid on non-constant expressions and should fail earlier when querying expression value.
+                return false;
+        }
+    }
+    else if (expression->nodeType == HLSLNodeType_UnaryExpression) 
+    {
+        HLSLUnaryExpression * unaryExpression = (HLSLUnaryExpression *)expression;
+
+        if (!GetExpressionValue(unaryExpression->expression, value))
+        {
+            return false;
+        }
+
+        switch(unaryExpression->unaryOp)
+        {
+            case HLSLUnaryOp_Negative:
+                value = -value;
+                return true;
+            case HLSLUnaryOp_Positive:
+                // nop.
+                return true;
+            case HLSLUnaryOp_Not:
+                value = !value;
+                return true;
+            case HLSLUnaryOp_BitNot:
+                value = ~value;
+                return true;
+            case HLSLUnaryOp_PostDecrement:
+            case HLSLUnaryOp_PostIncrement:
+            case HLSLUnaryOp_PreDecrement:
+            case HLSLUnaryOp_PreIncrement:
+                // IC: These are not valid on non-constant expressions and should fail earlier when querying expression value.
+                return false;
+        }
+    }
+    else if (expression->nodeType == HLSLNodeType_IdentifierExpression)
+    {
+        HLSLIdentifierExpression * identifier = (HLSLIdentifierExpression *)expression;
+
+        HLSLDeclaration * declaration = FindGlobalDeclaration(identifier->name);
+        if (declaration == NULL) 
+        {
+            return false;
+        }
+        if ((declaration->type.flags & HLSLTypeFlag_Const) == 0)
+        {
+            return false;
+        }
+
+        return GetExpressionValue(declaration->assignment, value);
+    }
+    else if (expression->nodeType == HLSLNodeType_LiteralExpression) 
+    {
+        HLSLLiteralExpression * literal = (HLSLLiteralExpression *)expression;
+   
+        if (literal->expressionType.baseType == HLSLBaseType_Int) value = literal->iValue;
+        else if (literal->expressionType.baseType == HLSLBaseType_Bool) value = (int)literal->bValue;
+        else return false;
+        
+        return true;
+    }
+
+    return false;
+}
+
+bool HLSLTree::NeedsFunction(const char* name)
+{
+    // Early out
+    if (!GetContainsString(name))
+        return false;
+
+    struct NeedsFunctionVisitor: HLSLTreeVisitor
+    {
+        const char* name;
+        bool result;
+
+        virtual void VisitTopLevelStatement(HLSLStatement * node)
+        {
+            if (!node->hidden)
+                HLSLTreeVisitor::VisitTopLevelStatement(node);
+        }
+
+        virtual void VisitFunctionCall(HLSLFunctionCall * node)
+        {
+            result = result || String_Equal(name, node->function->name);
+
+            HLSLTreeVisitor::VisitFunctionCall(node);
+        }
+    };
+
+    NeedsFunctionVisitor visitor;
+    visitor.name = name;
+    visitor.result = false;
+
+    visitor.VisitRoot(m_root);
+
+    return visitor.result;
+}
+
+int GetVectorDimension(HLSLType & type)
+{
+    if (type.baseType >= HLSLBaseType_FirstNumeric &&
+        type.baseType <= HLSLBaseType_LastNumeric)
+    {
+        if (type.baseType == HLSLBaseType_Float || type.baseType == HLSLBaseType_Half) return 1;
+        if (type.baseType == HLSLBaseType_Float2 || type.baseType == HLSLBaseType_Half2) return 2;
+        if (type.baseType == HLSLBaseType_Float3 || type.baseType == HLSLBaseType_Half3) return 3;
+        if (type.baseType == HLSLBaseType_Float4 || type.baseType == HLSLBaseType_Half4) return 4;
+
+    }
+    return 0;
+}
+
+// Returns dimension, 0 if invalid.
+int HLSLTree::GetExpressionValue(HLSLExpression * expression, float values[4])
+{
+    ASSERT (expression != NULL);
+
+    // Expression must be constant.
+    if ((expression->expressionType.flags & HLSLTypeFlag_Const) == 0) 
+    {
+        return 0;
+    }
+
+    if (expression->expressionType.baseType == HLSLBaseType_Int ||
+        expression->expressionType.baseType == HLSLBaseType_Bool)
+    {
+        int int_value;
+        if (GetExpressionValue(expression, int_value)) {
+            for (int i = 0; i < 4; i++) values[i] = (float)int_value;   // @@ Warn if conversion is not exact.
+            return 1;
+        }
+
+        return 0;
+    }
+    if (expression->expressionType.baseType >= HLSLBaseType_FirstInteger && expression->expressionType.baseType <= HLSLBaseType_LastInteger)
+    {
+        // @@ Add support for uints?
+        // @@ Add support for int vectors?
+        return 0;
+    }
+    if (expression->expressionType.baseType > HLSLBaseType_LastNumeric)
+    {
+        return 0;
+    }
+
+    // @@ Not supported yet, but we may need it?
+    if (expression->expressionType.array) 
+    {
+        return false;
+    }
+
+    if (expression->nodeType == HLSLNodeType_BinaryExpression) 
+    {
+        HLSLBinaryExpression * binaryExpression = (HLSLBinaryExpression *)expression;
+        int dim = GetVectorDimension(binaryExpression->expressionType);
+
+        float values1[4], values2[4];
+        int dim1 = GetExpressionValue(binaryExpression->expression1, values1);
+        int dim2 = GetExpressionValue(binaryExpression->expression2, values2);
+
+        if (dim1 == 0 || dim2 == 0)
+        {
+            return 0;
+        }
+
+        if (dim1 != dim2)
+        {
+            // Brodacast scalar to vector size.
+            if (dim1 == 1)
+            {
+                for (int i = 1; i < dim2; i++) values1[i] = values1[0];
+                dim1 = dim2;
+            }
+            else if (dim2 == 1)
+            {
+                for (int i = 1; i < dim1; i++) values2[i] = values2[0];
+                dim2 = dim1;
+            }
+            else
+            {
+                return 0;
+            }
+        }
+        ASSERT(dim == dim1);
+
+        switch(binaryExpression->binaryOp)
+        {
+            case HLSLBinaryOp_Add:
+                for (int i = 0; i < dim; i++) values[i] = values1[i] + values2[i];
+                return dim;
+            case HLSLBinaryOp_Sub:
+                for (int i = 0; i < dim; i++) values[i] = values1[i] - values2[i];
+                return dim;
+            case HLSLBinaryOp_Mul:
+                for (int i = 0; i < dim; i++) values[i] = values1[i] * values2[i];
+                return dim;
+            case HLSLBinaryOp_Div:
+                for (int i = 0; i < dim; i++) values[i] = values1[i] / values2[i];
+                return dim;
+            default:
+                return 0;
+        }
+    }
+    else if (expression->nodeType == HLSLNodeType_UnaryExpression) 
+    {
+        HLSLUnaryExpression * unaryExpression = (HLSLUnaryExpression *)expression;
+        int dim = GetVectorDimension(unaryExpression->expressionType);
+
+        int dim1 = GetExpressionValue(unaryExpression->expression, values);
+        if (dim1 == 0)
+        {
+            return 0;
+        }
+        ASSERT(dim == dim1);
+
+        switch(unaryExpression->unaryOp)
+        {
+            case HLSLUnaryOp_Negative:
+                for (int i = 0; i < dim; i++) values[i] = -values[i];
+                return dim;
+            case HLSLUnaryOp_Positive:
+                // nop.
+                return dim;
+            default:
+                return 0;
+        }
+    }
+    else if (expression->nodeType == HLSLNodeType_ConstructorExpression)
+    {
+        HLSLConstructorExpression * constructor = (HLSLConstructorExpression *)expression;
+
+        int dim = GetVectorDimension(constructor->expressionType);
+
+        int idx = 0;
+        HLSLExpression * arg = constructor->argument;
+        while (arg != NULL)
+        {
+            float tmp[4];
+            int n = GetExpressionValue(arg, tmp);
+            for (int i = 0; i < n; i++) values[idx + i] = tmp[i];
+            idx += n;
+
+            arg = arg->nextExpression;
+        }
+        ASSERT(dim == idx);
+
+        return dim;
+    }
+    else if (expression->nodeType == HLSLNodeType_IdentifierExpression)
+    {
+        HLSLIdentifierExpression * identifier = (HLSLIdentifierExpression *)expression;
+
+        HLSLDeclaration * declaration = FindGlobalDeclaration(identifier->name);
+        if (declaration == NULL) 
+        {
+            return 0;
+        }
+        if ((declaration->type.flags & HLSLTypeFlag_Const) == 0)
+        {
+            return 0;
+        }
+
+        return GetExpressionValue(declaration->assignment, values);
+    }
+    else if (expression->nodeType == HLSLNodeType_LiteralExpression)
+    {
+        HLSLLiteralExpression * literal = (HLSLLiteralExpression *)expression;
+
+        if (literal->expressionType.baseType == HLSLBaseType_Float) values[0] = literal->fValue;
+        else if (literal->expressionType.baseType == HLSLBaseType_Half) values[0] = literal->fValue;
+        else if (literal->expressionType.baseType == HLSLBaseType_Bool) values[0] = literal->bValue;
+        else if (literal->expressionType.baseType == HLSLBaseType_Int) values[0] = (float)literal->iValue;  // @@ Warn if conversion is not exact.
+        else return 0;
+
+        return 1;
+    }
+
+    return 0;
+}
+
+
+
+
+void HLSLTreeVisitor::VisitType(HLSLType & type)
+{
+}
+
+void HLSLTreeVisitor::VisitRoot(HLSLRoot * root)
+{
+    HLSLStatement * statement = root->statement;
+    while (statement != NULL) {
+        VisitTopLevelStatement(statement);
+        statement = statement->nextStatement;
+    }
+}
+
+void HLSLTreeVisitor::VisitTopLevelStatement(HLSLStatement * node)
+{
+    if (node->nodeType == HLSLNodeType_Declaration) {
+        VisitDeclaration((HLSLDeclaration *)node);
+    }
+    else if (node->nodeType == HLSLNodeType_Struct) {
+        VisitStruct((HLSLStruct *)node);
+    }
+    else if (node->nodeType == HLSLNodeType_Buffer) {
+        VisitBuffer((HLSLBuffer *)node);
+    }
+    else if (node->nodeType == HLSLNodeType_Function) {
+        VisitFunction((HLSLFunction *)node);
+    }
+    else if (node->nodeType == HLSLNodeType_Technique) {
+        VisitTechnique((HLSLTechnique *)node);
+    }
+    else if (node->nodeType == HLSLNodeType_Pipeline) {
+        VisitPipeline((HLSLPipeline *)node);
+    }
+    else {
+        ASSERT(0);
+    }
+}
+
+void HLSLTreeVisitor::VisitStatements(HLSLStatement * statement)
+{
+    while (statement != NULL) {
+        VisitStatement(statement);
+        statement = statement->nextStatement;
+    }
+}
+
+void HLSLTreeVisitor::VisitStatement(HLSLStatement * node)
+{
+    // Function statements
+    if (node->nodeType == HLSLNodeType_Declaration) {
+        VisitDeclaration((HLSLDeclaration *)node);
+    }
+    else if (node->nodeType == HLSLNodeType_ExpressionStatement) {
+        VisitExpressionStatement((HLSLExpressionStatement *)node);
+    }
+    else if (node->nodeType == HLSLNodeType_ReturnStatement) {
+        VisitReturnStatement((HLSLReturnStatement *)node);
+    }
+    else if (node->nodeType == HLSLNodeType_DiscardStatement) {
+        VisitDiscardStatement((HLSLDiscardStatement *)node);
+    }
+    else if (node->nodeType == HLSLNodeType_BreakStatement) {
+        VisitBreakStatement((HLSLBreakStatement *)node);
+    }
+    else if (node->nodeType == HLSLNodeType_ContinueStatement) {
+        VisitContinueStatement((HLSLContinueStatement *)node);
+    }
+    else if (node->nodeType == HLSLNodeType_IfStatement) {
+        VisitIfStatement((HLSLIfStatement *)node);
+    }
+    else if (node->nodeType == HLSLNodeType_ForStatement) {
+        VisitForStatement((HLSLForStatement *)node);
+    }
+    else if (node->nodeType == HLSLNodeType_BlockStatement) {
+        VisitBlockStatement((HLSLBlockStatement *)node);
+    }
+    else {
+        ASSERT(0);
+    }
+}
+
+void HLSLTreeVisitor::VisitDeclaration(HLSLDeclaration * node)
+{
+    VisitType(node->type);
+    /*do {
+        VisitExpression(node->assignment);
+        node = node->nextDeclaration;
+    } while (node);*/
+    if (node->assignment != NULL) {
+        VisitExpression(node->assignment);
+    }
+    if (node->nextDeclaration != NULL) {
+        VisitDeclaration(node->nextDeclaration);
+    }
+}
+
+void HLSLTreeVisitor::VisitStruct(HLSLStruct * node)
+{
+    HLSLStructField * field = node->field;
+    while (field != NULL) {
+        VisitStructField(field);
+        field = field->nextField;
+    }
+}
+
+void HLSLTreeVisitor::VisitStructField(HLSLStructField * node)
+{
+    VisitType(node->type);
+}
+
+void HLSLTreeVisitor::VisitBuffer(HLSLBuffer * node)
+{
+    HLSLDeclaration * field = node->field;
+    while (field != NULL) {
+        ASSERT(field->nodeType == HLSLNodeType_Declaration);
+        VisitDeclaration(field);
+        ASSERT(field->nextDeclaration == NULL);
+        field = (HLSLDeclaration *)field->nextStatement;
+    }
+}
+
+/*void HLSLTreeVisitor::VisitBufferField(HLSLBufferField * node)
+{
+    VisitType(node->type);
+}*/
+
+void HLSLTreeVisitor::VisitFunction(HLSLFunction * node)
+{
+    VisitType(node->returnType);
+
+    HLSLArgument * argument = node->argument;
+    while (argument != NULL) {
+        VisitArgument(argument);
+        argument = argument->nextArgument;
+    }
+
+    VisitStatements(node->statement);
+}
+
+void HLSLTreeVisitor::VisitArgument(HLSLArgument * node)
+{
+    VisitType(node->type);
+    if (node->defaultValue != NULL) {
+        VisitExpression(node->defaultValue);
+    }
+}
+
+void HLSLTreeVisitor::VisitExpressionStatement(HLSLExpressionStatement * node)
+{
+    VisitExpression(node->expression);
+}
+
+void HLSLTreeVisitor::VisitExpression(HLSLExpression * node)
+{
+    VisitType(node->expressionType);
+
+    if (node->nodeType == HLSLNodeType_UnaryExpression) {
+        VisitUnaryExpression((HLSLUnaryExpression *)node);
+    }
+    else if (node->nodeType == HLSLNodeType_BinaryExpression) {
+        VisitBinaryExpression((HLSLBinaryExpression *)node);
+    }
+    else if (node->nodeType == HLSLNodeType_ConditionalExpression) {
+        VisitConditionalExpression((HLSLConditionalExpression *)node);
+    }
+    else if (node->nodeType == HLSLNodeType_CastingExpression) {
+        VisitCastingExpression((HLSLCastingExpression *)node);
+    }
+    else if (node->nodeType == HLSLNodeType_LiteralExpression) {
+        VisitLiteralExpression((HLSLLiteralExpression *)node);
+    }
+    else if (node->nodeType == HLSLNodeType_IdentifierExpression) {
+        VisitIdentifierExpression((HLSLIdentifierExpression *)node);
+    }
+    else if (node->nodeType == HLSLNodeType_ConstructorExpression) {
+        VisitConstructorExpression((HLSLConstructorExpression *)node);
+    }
+    else if (node->nodeType == HLSLNodeType_MemberAccess) {
+        VisitMemberAccess((HLSLMemberAccess *)node);
+    }
+    else if (node->nodeType == HLSLNodeType_ArrayAccess) {
+        VisitArrayAccess((HLSLArrayAccess *)node);
+    }
+    else if (node->nodeType == HLSLNodeType_FunctionCall) {
+        VisitFunctionCall((HLSLFunctionCall *)node);
+    }
+    // Acoget-TODO: This was missing. Did adding it break anything?
+    else if (node->nodeType == HLSLNodeType_SamplerState) {
+        VisitSamplerState((HLSLSamplerState *)node);
+    }
+    else {
+        ASSERT(0);
+    }
+}
+
+void HLSLTreeVisitor::VisitReturnStatement(HLSLReturnStatement * node)
+{
+    VisitExpression(node->expression);
+}
+
+void HLSLTreeVisitor::VisitDiscardStatement(HLSLDiscardStatement * node) {}
+void HLSLTreeVisitor::VisitBreakStatement(HLSLBreakStatement * node) {}
+void HLSLTreeVisitor::VisitContinueStatement(HLSLContinueStatement * node) {}
+
+void HLSLTreeVisitor::VisitIfStatement(HLSLIfStatement * node)
+{
+    VisitExpression(node->condition);
+    VisitStatements(node->statement);
+    if (node->elseStatement) {
+        VisitStatements(node->elseStatement);
+    }
+}
+
+void HLSLTreeVisitor::VisitForStatement(HLSLForStatement * node)
+{
+    if (node->initialization) {
+        VisitDeclaration(node->initialization);
+    }
+    if (node->condition) {
+        VisitExpression(node->condition);
+    }
+    if (node->increment) {
+        VisitExpression(node->increment);
+    }
+    VisitStatements(node->statement);
+}
+
+void HLSLTreeVisitor::VisitBlockStatement(HLSLBlockStatement * node)
+{
+    VisitStatements(node->statement);
+}
+
+void HLSLTreeVisitor::VisitUnaryExpression(HLSLUnaryExpression * node)
+{
+    VisitExpression(node->expression);
+}
+
+void HLSLTreeVisitor::VisitBinaryExpression(HLSLBinaryExpression * node)
+{
+    VisitExpression(node->expression1);
+    VisitExpression(node->expression2);
+}
+
+void HLSLTreeVisitor::VisitConditionalExpression(HLSLConditionalExpression * node)
+{
+    VisitExpression(node->condition);
+    VisitExpression(node->falseExpression);
+    VisitExpression(node->trueExpression);
+}
+
+void HLSLTreeVisitor::VisitCastingExpression(HLSLCastingExpression * node)
+{
+    VisitType(node->type);
+    VisitExpression(node->expression);
+}
+
+void HLSLTreeVisitor::VisitLiteralExpression(HLSLLiteralExpression * node) {}
+void HLSLTreeVisitor::VisitIdentifierExpression(HLSLIdentifierExpression * node) {}
+
+void HLSLTreeVisitor::VisitConstructorExpression(HLSLConstructorExpression * node)
+{
+    HLSLExpression * argument = node->argument;
+    while (argument != NULL) {
+        VisitExpression(argument);
+        argument = argument->nextExpression;
+    }
+}
+
+void HLSLTreeVisitor::VisitMemberAccess(HLSLMemberAccess * node)
+{
+    VisitExpression(node->object);
+}
+
+void HLSLTreeVisitor::VisitArrayAccess(HLSLArrayAccess * node)
+{
+    VisitExpression(node->array);
+    VisitExpression(node->index);
+}
+
+void HLSLTreeVisitor::VisitFunctionCall(HLSLFunctionCall * node)
+{
+    HLSLExpression * argument = node->argument;
+    while (argument != NULL) {
+        VisitExpression(argument);
+        argument = argument->nextExpression;
+    }
+}
+
+void HLSLTreeVisitor::VisitStateAssignment(HLSLStateAssignment * node) {}
+
+void HLSLTreeVisitor::VisitSamplerState(HLSLSamplerState * node)
+{
+    HLSLStateAssignment * stateAssignment = node->stateAssignments;
+    while (stateAssignment != NULL) {
+        VisitStateAssignment(stateAssignment);
+        stateAssignment = stateAssignment->nextStateAssignment;
+    }
+}
+
+void HLSLTreeVisitor::VisitPass(HLSLPass * node)
+{
+    HLSLStateAssignment * stateAssignment = node->stateAssignments;
+    while (stateAssignment != NULL) {
+        VisitStateAssignment(stateAssignment);
+        stateAssignment = stateAssignment->nextStateAssignment;
+    }
+}
+
+void HLSLTreeVisitor::VisitTechnique(HLSLTechnique * node)
+{
+    HLSLPass * pass = node->passes;
+    while (pass != NULL) {
+        VisitPass(pass);
+        pass = pass->nextPass;
+    }
+}
+
+void HLSLTreeVisitor::VisitPipeline(HLSLPipeline * node)
+{
+    // @@ ?
+}
+
+void HLSLTreeVisitor::VisitFunctions(HLSLRoot * root)
+{
+    HLSLStatement * statement = root->statement;
+    while (statement != NULL) {
+        if (statement->nodeType == HLSLNodeType_Function) {
+            VisitFunction((HLSLFunction *)statement);
+        }
+
+        statement = statement->nextStatement;
+    }
+}
+
+void HLSLTreeVisitor::VisitParameters(HLSLRoot * root)
+{
+    HLSLStatement * statement = root->statement;
+    while (statement != NULL) {
+        if (statement->nodeType == HLSLNodeType_Declaration) {
+            VisitDeclaration((HLSLDeclaration *)statement);
+        }
+
+        statement = statement->nextStatement;
+    }
+}
+
+
+class ResetHiddenFlagVisitor : public HLSLTreeVisitor
+{
+public:
+    virtual void VisitTopLevelStatement(HLSLStatement * statement)
+    {
+        statement->hidden = true;
+
+        if (statement->nodeType == HLSLNodeType_Buffer)
+        {
+            VisitBuffer((HLSLBuffer*)statement);
+        }
+    }
+
+    // Hide buffer fields.
+    virtual void VisitDeclaration(HLSLDeclaration * node)
+    {
+        node->hidden = true;
+    }
+
+    virtual void VisitArgument(HLSLArgument * node)
+    {
+        node->hidden = false;   // Arguments are visible by default.
+    }
+};
+
+class MarkVisibleStatementsVisitor : public HLSLTreeVisitor
+{
+public:
+    HLSLTree * tree;
+    MarkVisibleStatementsVisitor(HLSLTree * tree) : tree(tree) {}
+
+    virtual void VisitFunction(HLSLFunction * node)
+    {
+        node->hidden = false;
+        HLSLTreeVisitor::VisitFunction(node);
+
+        if (node->forward)
+            VisitFunction(node->forward);
+    }
+
+    virtual void VisitFunctionCall(HLSLFunctionCall * node)
+    {
+        HLSLTreeVisitor::VisitFunctionCall(node);
+
+        if (node->function->hidden)
+        {
+            VisitFunction(const_cast<HLSLFunction*>(node->function));
+        }
+    }
+
+    virtual void VisitIdentifierExpression(HLSLIdentifierExpression * node)
+    {
+        HLSLTreeVisitor::VisitIdentifierExpression(node);
+
+        if (node->global)
+        {
+            HLSLDeclaration * declaration = tree->FindGlobalDeclaration(node->name);
+            if (declaration != NULL && declaration->hidden)
+            {
+                declaration->hidden = false;
+                VisitDeclaration(declaration);
+            }
+        }
+    }
+
+    virtual void VisitType(HLSLType & type)
+    {
+        if (type.baseType == HLSLBaseType_UserDefined)
+        {
+            HLSLStruct * globalStruct = tree->FindGlobalStruct(type.typeName);
+            if (globalStruct != NULL)
+            {
+                globalStruct->hidden = false;
+                VisitStruct(globalStruct);
+            }
+        }
+    }
+
+};
+
+
+void PruneTree(HLSLTree* tree, const char* entryName0, const char* entryName1/*=NULL*/)
+{
+    HLSLRoot* root = tree->GetRoot();
+
+    // Reset all flags.
+    ResetHiddenFlagVisitor reset;
+    reset.VisitRoot(root);
+
+    // Mark all the statements necessary for these entrypoints.
+    HLSLFunction* entry = tree->FindFunction(entryName0);
+    if (entry != NULL)
+    {
+        MarkVisibleStatementsVisitor mark(tree);
+        mark.VisitFunction(entry);
+    }
+
+    if (entryName1 != NULL)
+    {
+        entry = tree->FindFunction(entryName1);
+        if (entry != NULL)
+        {
+            MarkVisibleStatementsVisitor mark(tree);
+            mark.VisitFunction(entry);
+        }
+    }
+
+    // Mark buffers visible, if any of their fields is visible.
+    HLSLStatement * statement = root->statement;
+    while (statement != NULL)
+    {
+        if (statement->nodeType == HLSLNodeType_Buffer)
+        {
+            HLSLBuffer* buffer = (HLSLBuffer*)statement;
+
+            HLSLDeclaration* field = buffer->field;
+            while (field != NULL)
+            {
+                ASSERT(field->nodeType == HLSLNodeType_Declaration);
+                if (!field->hidden)
+                {
+                    buffer->hidden = false;
+                    break;
+                }
+                field = (HLSLDeclaration*)field->nextStatement;
+            }
+        }
+
+        statement = statement->nextStatement;
+    }
+}
+
+
+void SortTree(HLSLTree * tree)
+{
+    // Stable sort so that statements are in this order:
+    // structs, declarations, functions, techniques.
+	// but their relative order is preserved.
+
+    HLSLRoot* root = tree->GetRoot();
+
+    HLSLStatement* structs = NULL;
+    HLSLStatement* lastStruct = NULL;
+    HLSLStatement* constDeclarations = NULL;
+    HLSLStatement* lastConstDeclaration = NULL;
+    HLSLStatement* declarations = NULL;
+    HLSLStatement* lastDeclaration = NULL;
+    HLSLStatement* functions = NULL;
+    HLSLStatement* lastFunction = NULL;
+    HLSLStatement* other = NULL;
+    HLSLStatement* lastOther = NULL;
+
+    HLSLStatement* statement = root->statement;
+    while (statement != NULL) {
+        HLSLStatement* nextStatement = statement->nextStatement;
+        statement->nextStatement = NULL;
+
+        if (statement->nodeType == HLSLNodeType_Struct) {
+            if (structs == NULL) structs = statement;
+            if (lastStruct != NULL) lastStruct->nextStatement = statement;
+            lastStruct = statement;
+        }
+        else if (statement->nodeType == HLSLNodeType_Declaration ||
+                 statement->nodeType == HLSLNodeType_Buffer)
+        {
+            if (statement->nodeType == HLSLNodeType_Declaration && (((HLSLDeclaration *)statement)->type.flags & HLSLTypeFlag_Const)) {
+                if (constDeclarations == NULL) constDeclarations = statement;
+                if (lastConstDeclaration != NULL) lastConstDeclaration->nextStatement = statement;
+                lastConstDeclaration = statement;
+            }
+            else {
+                if (declarations == NULL) declarations = statement;
+                if (lastDeclaration != NULL) lastDeclaration->nextStatement = statement;
+                lastDeclaration = statement;
+            }
+        }
+        else if (statement->nodeType == HLSLNodeType_Function) {
+            if (functions == NULL) functions = statement;
+            if (lastFunction != NULL) lastFunction->nextStatement = statement;
+            lastFunction = statement;
+        }
+        else {
+            if (other == NULL) other = statement;
+            if (lastOther != NULL) lastOther->nextStatement = statement;
+            lastOther = statement;
+        }
+
+        statement = nextStatement;
+    }
+
+    // Chain all the statements in the order that we want.
+    HLSLStatement * firstStatement = structs;
+    HLSLStatement * lastStatement = lastStruct;
+
+    if (constDeclarations != NULL) {
+        if (firstStatement == NULL) firstStatement = constDeclarations;
+        else lastStatement->nextStatement = constDeclarations;
+        lastStatement = lastConstDeclaration;
+    }
+
+    if (declarations != NULL) {
+        if (firstStatement == NULL) firstStatement = declarations;
+        else lastStatement->nextStatement = declarations;
+        lastStatement = lastDeclaration;
+    }
+
+    if (functions != NULL) {
+        if (firstStatement == NULL) firstStatement = functions;
+        else lastStatement->nextStatement = functions;
+        lastStatement = lastFunction;
+    }
+
+    if (other != NULL) {
+        if (firstStatement == NULL) firstStatement = other;
+        else lastStatement->nextStatement = other;
+        lastStatement = lastOther;
+    }
+
+    root->statement = firstStatement;
+}
+
+
+
+
+
+// First and last can be the same.
+void AddStatements(HLSLRoot * root, HLSLStatement * before, HLSLStatement * first, HLSLStatement * last)
+{
+    if (before == NULL) {
+        last->nextStatement = root->statement;
+        root->statement = first;
+    }
+    else {
+        last->nextStatement = before->nextStatement;
+        before->nextStatement = first;
+    }
+}
+
+void AddSingleStatement(HLSLRoot * root, HLSLStatement * before, HLSLStatement * statement)
+{
+    AddStatements(root, before, statement, statement);
+}
+
+
+
+// @@ This is very game-specific. Should be moved to pipeline_parser or somewhere else.
+void GroupParameters(HLSLTree * tree)
+{
+    // Sort parameters based on semantic and group them in cbuffers.
+
+    HLSLRoot* root = tree->GetRoot();
+
+    HLSLDeclaration * firstPerItemDeclaration = NULL;
+    HLSLDeclaration * lastPerItemDeclaration = NULL;
+
+    HLSLDeclaration * instanceDataDeclaration = NULL;
+
+    HLSLDeclaration * firstPerPassDeclaration = NULL;
+    HLSLDeclaration * lastPerPassDeclaration = NULL;
+
+    HLSLDeclaration * firstPerItemSampler = NULL;
+    HLSLDeclaration * lastPerItemSampler = NULL;
+
+    HLSLDeclaration * firstPerPassSampler = NULL;
+    HLSLDeclaration * lastPerPassSampler = NULL;
+
+    HLSLStatement * statementBeforeBuffers = NULL;
+    
+    HLSLStatement* previousStatement = NULL;
+    HLSLStatement* statement = root->statement;
+    while (statement != NULL)
+    {
+        HLSLStatement* nextStatement = statement->nextStatement;
+
+        if (statement->nodeType == HLSLNodeType_Struct) // Do not remove this, or it will mess the else clause below.
+        {   
+            statementBeforeBuffers = statement;
+        }
+        else if (statement->nodeType == HLSLNodeType_Declaration)
+        {
+            HLSLDeclaration* declaration = (HLSLDeclaration*)statement;
+
+            // We insert buffers after the last const declaration.
+            if ((declaration->type.flags & HLSLTypeFlag_Const) != 0)
+            {
+                statementBeforeBuffers = statement;
+            }
+
+            // Do not move samplers or static/const parameters.
+            if ((declaration->type.flags & (HLSLTypeFlag_Static|HLSLTypeFlag_Const)) == 0)
+            {
+                // Unlink statement.
+                statement->nextStatement = NULL;
+                if (previousStatement != NULL) previousStatement->nextStatement = nextStatement;
+                else root->statement = nextStatement;
+
+                while(declaration != NULL)
+                {
+                    HLSLDeclaration* nextDeclaration = declaration->nextDeclaration;
+
+                    if (declaration->semantic != NULL && String_EqualNoCase(declaration->semantic, "PER_INSTANCED_ITEM"))
+                    {
+                        ASSERT(instanceDataDeclaration == NULL);
+                        instanceDataDeclaration = declaration;
+                    }
+                    else
+                    {
+                        // Select group based on type and semantic.
+                        HLSLDeclaration ** first, ** last;
+                        if (declaration->semantic == NULL || String_EqualNoCase(declaration->semantic, "PER_ITEM") || String_EqualNoCase(declaration->semantic, "PER_MATERIAL"))
+                        {
+                            if (IsSamplerType(declaration->type))
+                            {
+                                first = &firstPerItemSampler;
+                                last = &lastPerItemSampler;
+                            }
+                            else
+                            {
+                                first = &firstPerItemDeclaration;
+                                last = &lastPerItemDeclaration;
+                            }
+                        }
+                        else
+                        {
+                            if (IsSamplerType(declaration->type))
+                            {
+                                first = &firstPerPassSampler;
+                                last = &lastPerPassSampler;
+                            }
+                            else
+                            {
+                                first = &firstPerPassDeclaration;
+                                last = &lastPerPassDeclaration;
+                            }
+                        }
+
+                        // Add declaration to new list.
+                        if (*first == NULL) *first = declaration;
+                        else (*last)->nextStatement = declaration;
+                        *last = declaration;
+                    }
+
+                    // Unlink from declaration list.
+                    declaration->nextDeclaration = NULL;
+
+                    // Reset attributes.
+                    declaration->registerName = NULL;
+                    //declaration->semantic = NULL;         // @@ Don't do this!
+
+                    declaration = nextDeclaration;
+                }
+            }
+        }
+        /*else
+        {
+            if (statementBeforeBuffers == NULL) {
+                // This is the location where we will insert our buffers.
+                statementBeforeBuffers = previousStatement;
+            }
+        }*/
+
+        if (statement->nextStatement == nextStatement) {
+            previousStatement = statement;
+        }
+        statement = nextStatement;
+    }
+
+
+    // Add instance data declaration at the end of the per_item buffer.
+    if (instanceDataDeclaration != NULL)
+    {
+        if (firstPerItemDeclaration == NULL) firstPerItemDeclaration = instanceDataDeclaration;
+        else lastPerItemDeclaration->nextStatement = instanceDataDeclaration;
+    }
+
+
+    // Add samplers.
+    if (firstPerItemSampler != NULL) {
+        AddStatements(root, statementBeforeBuffers, firstPerItemSampler, lastPerItemSampler);
+        statementBeforeBuffers = lastPerItemSampler;
+    }
+    if (firstPerPassSampler != NULL) {
+        AddStatements(root, statementBeforeBuffers, firstPerPassSampler, lastPerPassSampler);
+        statementBeforeBuffers = lastPerPassSampler;
+    }
+
+
+    // @@ We are assuming per_item and per_pass buffers don't already exist. @@ We should assert on that.
+
+    if (firstPerItemDeclaration != NULL)
+    {
+        // Create buffer statement.
+        HLSLBuffer * perItemBuffer = tree->AddNode<HLSLBuffer>(firstPerItemDeclaration->fileName, firstPerItemDeclaration->line-1);
+        perItemBuffer->name = tree->AddString("per_item");
+        perItemBuffer->registerName = tree->AddString("b0");
+        perItemBuffer->field = firstPerItemDeclaration;
+        
+        // Set declaration buffer pointers.
+        HLSLDeclaration * field = perItemBuffer->field;
+        while (field != NULL)
+        {
+            field->buffer = perItemBuffer;
+            field = (HLSLDeclaration *)field->nextStatement;
+        }
+
+        // Add buffer to statements.
+        AddSingleStatement(root, statementBeforeBuffers, perItemBuffer);
+        statementBeforeBuffers = perItemBuffer;
+    }
+
+    if (firstPerPassDeclaration != NULL)
+    {
+        // Create buffer statement.
+        HLSLBuffer * perPassBuffer = tree->AddNode<HLSLBuffer>(firstPerPassDeclaration->fileName, firstPerPassDeclaration->line-1);
+        perPassBuffer->name = tree->AddString("per_pass");
+        perPassBuffer->registerName = tree->AddString("b1");
+        perPassBuffer->field = firstPerPassDeclaration;
+
+        // Set declaration buffer pointers.
+        HLSLDeclaration * field = perPassBuffer->field;
+        while (field != NULL)
+        {
+            field->buffer = perPassBuffer;
+            field = (HLSLDeclaration *)field->nextStatement;
+        }
+        
+        // Add buffer to statements.
+        AddSingleStatement(root, statementBeforeBuffers, perPassBuffer);
+    }
+}
+
+
+class FindArgumentVisitor : public HLSLTreeVisitor
+{
+public:
+    bool found;
+    const char * name;
+
+	FindArgumentVisitor()
+	{
+		found = false;
+		name  = NULL;
+	}
+
+    bool FindArgument(const char * name, HLSLFunction * function)
+    {
+        this->found = false;
+        this->name = name;
+        VisitStatements(function->statement);
+        return found;
+    }
+    
+    virtual void VisitStatements(HLSLStatement * statement) override
+    {
+        while (statement != NULL && !found)
+        {
+            VisitStatement(statement);
+            statement = statement->nextStatement;
+        }
+    }
+
+    virtual void VisitIdentifierExpression(HLSLIdentifierExpression * node) override
+    {
+        if (node->name == name)
+        {
+            found = true;
+        }
+    }
+};
+
+
+void HideUnusedArguments(HLSLFunction * function)
+{
+    FindArgumentVisitor visitor;
+ 
+    // For each argument.
+    HLSLArgument * arg = function->argument;
+    while (arg != NULL)
+    {
+        if (!visitor.FindArgument(arg->name, function))
+        {
+            arg->hidden = true;
+        }
+
+        arg = arg->nextArgument;
+    }
+}
+
+bool EmulateAlphaTest(HLSLTree* tree, const char* entryName, float alphaRef/*=0.5*/)
+{
+    // Find all return statements of this entry point.
+    HLSLFunction* entry = tree->FindFunction(entryName);
+    if (entry != NULL)
+    {
+        HLSLStatement ** ptr = &entry->statement;
+        HLSLStatement * statement = entry->statement;
+        while (statement != NULL)
+        {
+            if (statement->nodeType == HLSLNodeType_ReturnStatement)
+            {
+                HLSLReturnStatement * returnStatement = (HLSLReturnStatement *)statement;
+                HLSLBaseType returnType = returnStatement->expression->expressionType.baseType;
+                
+                // Build statement: "if (%s.a < 0.5) discard;"
+
+                HLSLDiscardStatement * discard = tree->AddNode<HLSLDiscardStatement>(statement->fileName, statement->line);
+                
+                HLSLExpression * alpha = NULL;
+                if (returnType == HLSLBaseType_Float4 || returnType == HLSLBaseType_Half4)
+                {
+                    // @@ If return expression is a constructor, grab 4th argument.
+                    // That's not as easy, since we support 'float4(float3, float)' or 'float4(float, float3)', extracting
+                    // the latter is not that easy.
+                    /*if (returnStatement->expression->nodeType == HLSLNodeType_ConstructorExpression) {
+                        HLSLConstructorExpression * constructor = (HLSLConstructorExpression *)returnStatement->expression;
+                        //constructor->
+                    }
+                    */
+                    
+                    if (alpha == NULL) {
+                        HLSLMemberAccess * access = tree->AddNode<HLSLMemberAccess>(statement->fileName, statement->line);
+                        access->expressionType = HLSLType(HLSLBaseType_Float);
+                        access->object = returnStatement->expression;     // @@ Is reference OK? Or should we clone expression?
+                        access->field = tree->AddString("a");
+                        access->swizzle = true;
+                        
+                        alpha = access;
+                    }
+                }
+                else if (returnType == HLSLBaseType_Float || returnType == HLSLBaseType_Half)
+                {
+                    alpha = returnStatement->expression;     // @@ Is reference OK? Or should we clone expression?
+                }
+                else
+                {
+                    return false;
+                }
+                
+                HLSLLiteralExpression * threshold = tree->AddNode<HLSLLiteralExpression>(statement->fileName, statement->line);
+                threshold->expressionType = HLSLType(HLSLBaseType_Float);
+                threshold->fValue = alphaRef;
+                threshold->type = HLSLBaseType_Float;
+                
+                HLSLBinaryExpression * condition = tree->AddNode<HLSLBinaryExpression>(statement->fileName, statement->line);
+                condition->expressionType = HLSLType(HLSLBaseType_Bool);
+                condition->binaryOp = HLSLBinaryOp_Less;
+                condition->expression1 = alpha;
+                condition->expression2 = threshold;
+
+                // Insert statement.
+                HLSLIfStatement * st = tree->AddNode<HLSLIfStatement>(statement->fileName, statement->line);
+                st->condition = condition;
+                st->statement = discard;
+                st->nextStatement = statement;
+                *ptr = st;
+            }
+        
+            ptr = &statement->nextStatement;
+            statement = statement->nextStatement;
+        }
+    }
+
+    return true;
+}
+
+bool NeedsFlattening(HLSLExpression * expr, int level = 0) {
+    if (expr == NULL) {
+        return false;
+    }
+    if (expr->nodeType == HLSLNodeType_UnaryExpression) {
+        HLSLUnaryExpression * unaryExpr = (HLSLUnaryExpression *)expr;
+        return NeedsFlattening(unaryExpr->expression, level+1) || NeedsFlattening(expr->nextExpression, level);
+    }
+    else if (expr->nodeType == HLSLNodeType_BinaryExpression) {
+        HLSLBinaryExpression * binaryExpr = (HLSLBinaryExpression *)expr;
+        if (IsAssignOp(binaryExpr->binaryOp)) {
+            return NeedsFlattening(binaryExpr->expression2, level+1) || NeedsFlattening(expr->nextExpression, level);
+        }
+        else {
+            return NeedsFlattening(binaryExpr->expression1, level+1) || NeedsFlattening(binaryExpr->expression2, level+1) || NeedsFlattening(expr->nextExpression, level);
+        }
+    }
+    else if (expr->nodeType == HLSLNodeType_ConditionalExpression) {
+        HLSLConditionalExpression * conditionalExpr = (HLSLConditionalExpression *)expr;
+        return NeedsFlattening(conditionalExpr->condition, level+1) || NeedsFlattening(conditionalExpr->trueExpression, level+1) || NeedsFlattening(conditionalExpr->falseExpression, level+1) || NeedsFlattening(expr->nextExpression, level);
+    }
+    else if (expr->nodeType == HLSLNodeType_CastingExpression) {
+        HLSLCastingExpression * castingExpr = (HLSLCastingExpression *)expr;
+        return NeedsFlattening(castingExpr->expression, level+1) || NeedsFlattening(expr->nextExpression, level);
+    }
+    else if (expr->nodeType == HLSLNodeType_LiteralExpression) {
+        return NeedsFlattening(expr->nextExpression, level);
+    }
+    else if (expr->nodeType == HLSLNodeType_IdentifierExpression) {
+        return NeedsFlattening(expr->nextExpression, level);
+    }
+    else if (expr->nodeType == HLSLNodeType_ConstructorExpression) {
+        HLSLConstructorExpression * constructorExpr = (HLSLConstructorExpression *)expr;
+        return NeedsFlattening(constructorExpr->argument, level+1) || NeedsFlattening(expr->nextExpression, level);
+    }
+    else if (expr->nodeType == HLSLNodeType_MemberAccess) {
+        return NeedsFlattening(expr->nextExpression, level+1);
+    }
+    else if (expr->nodeType == HLSLNodeType_ArrayAccess) {
+        HLSLArrayAccess * arrayAccess = (HLSLArrayAccess *)expr;
+        return NeedsFlattening(arrayAccess->array, level+1) || NeedsFlattening(arrayAccess->index, level+1) || NeedsFlattening(expr->nextExpression, level);
+    }
+    else if (expr->nodeType == HLSLNodeType_FunctionCall) {
+        HLSLFunctionCall * functionCall = (HLSLFunctionCall *)expr;
+        if (functionCall->function->numOutputArguments > 0) {
+            if (level > 0) {
+                return true;
+            }
+        }
+        return NeedsFlattening(functionCall->argument, level+1) || NeedsFlattening(expr->nextExpression, level);
+    }
+    else {
+        //assert(false);
+        return false;
+    }
+}
+
+
+struct StatementList {
+    HLSLStatement * head = NULL;
+    HLSLStatement * tail = NULL;
+    void append(HLSLStatement * st) {
+        if (head == NULL) {
+            tail = head = st;
+        }
+        tail->nextStatement = st;
+        tail = st;
+    }
+};
+
+
+    class ExpressionFlattener : public HLSLTreeVisitor
+    {
+    public:
+        HLSLTree * m_tree;
+        int tmp_index;
+        HLSLStatement ** statement_pointer;
+        HLSLFunction * current_function;
+        
+        ExpressionFlattener()
+        {
+            m_tree = NULL;
+            tmp_index = 0;
+            statement_pointer = NULL;
+            current_function = NULL;
+        }
+        
+        void FlattenExpressions(HLSLTree * tree)
+        {
+            m_tree = tree;
+            VisitRoot(tree->GetRoot());
+        }
+
+        // Visit all statements updating the statement_pointer so that we can insert and replace statements. @@ Add this to the default visitor?
+        virtual void VisitFunction(HLSLFunction * node) override
+        {
+            current_function = node;
+            statement_pointer = &node->statement;
+            VisitStatements(node->statement);
+            statement_pointer = NULL;
+            current_function = NULL;
+        }
+
+        virtual void VisitIfStatement(HLSLIfStatement * node) override
+        {
+            if (NeedsFlattening(node->condition, 1)) {
+                assert(false);  // @@ Add statements before if statement.
+            }
+            
+            statement_pointer = &node->statement;
+            VisitStatements(node->statement);
+            if (node->elseStatement) {
+                statement_pointer = &node->elseStatement;
+                VisitStatements(node->elseStatement);
+            }
+        }
+        
+        virtual void VisitForStatement(HLSLForStatement * node) override
+        {
+            if (NeedsFlattening(node->initialization->assignment, 1)) {
+                assert(false);  // @@ Add statements before for statement.
+            }
+            if (NeedsFlattening(node->condition, 1) || NeedsFlattening(node->increment, 1)) {
+                assert(false);  // @@ These are tricky to implement. Need to handle all loop exits.
+            }
+
+            statement_pointer = &node->statement;
+            VisitStatements(node->statement);
+        }
+        
+        virtual void VisitBlockStatement(HLSLBlockStatement * node) override
+        {
+            statement_pointer = &node->statement;
+            VisitStatements(node->statement);
+        }
+        
+        virtual void VisitStatements(HLSLStatement * statement) override
+        {
+            while (statement != NULL) {
+                VisitStatement(statement);
+                statement_pointer = &statement->nextStatement;
+                statement = statement->nextStatement;
+            }
+        }
+
+        // This is usually a function call or assignment.
+        virtual void VisitExpressionStatement(HLSLExpressionStatement * node) override
+        {
+            if (NeedsFlattening(node->expression, 0))
+            {
+                StatementList statements;
+                Flatten(node->expression, statements, false);
+                
+                // Link beginning of statement list.
+                *statement_pointer = statements.head;
+
+                // Link end of statement list.
+                HLSLStatement * tail = statements.tail;
+                tail->nextStatement = node->nextStatement;
+                
+                // Update statement pointer.
+                statement_pointer = &tail->nextStatement;
+                
+                // @@ Delete node?
+            }
+        }
+
+        virtual void VisitDeclaration(HLSLDeclaration * node) override
+        {
+            // Skip global declarations.
+            if (statement_pointer == NULL) return;
+            
+            if (NeedsFlattening(node->assignment, 1))
+            {
+                StatementList statements;
+                HLSLIdentifierExpression * ident = Flatten(node->assignment, statements, true);
+                
+                // @@ Delete node->assignment?
+                
+                node->assignment = ident;
+                statements.append(node);
+                
+                // Link beginning of statement list.
+                *statement_pointer = statements.head;
+                
+                // Link end of statement list.
+                HLSLStatement * tail = statements.tail;
+                tail->nextStatement = node->nextStatement;
+                
+                // Update statement pointer.
+                statement_pointer = &tail->nextStatement;
+            }
+        }
+
+        virtual void VisitReturnStatement(HLSLReturnStatement * node) override
+        {
+            if (NeedsFlattening(node->expression, 1))
+            {
+                StatementList statements;
+                HLSLIdentifierExpression * ident = Flatten(node->expression, statements, true);
+
+                // @@ Delete node->expression?
+                
+                node->expression = ident;
+                statements.append(node);
+                
+                // Link beginning of statement list.
+                *statement_pointer = statements.head;
+                
+                // Link end of statement list.
+                HLSLStatement * tail = statements.tail;
+                tail->nextStatement = node->nextStatement;
+                
+                // Update statement pointer.
+                statement_pointer = &tail->nextStatement;
+            }
+        }
+
+        
+        HLSLDeclaration * BuildTemporaryDeclaration(HLSLExpression * expr)
+        {
+            assert(expr->expressionType.baseType != HLSLBaseType_Void);
+            
+            HLSLDeclaration * declaration = m_tree->AddNode<HLSLDeclaration>(expr->fileName, expr->line);
+            declaration->name = m_tree->AddStringFormat("tmp%d", tmp_index++);
+            declaration->type = expr->expressionType;
+            declaration->assignment = expr;
+            
+            //HLSLIdentifierExpression * ident = (HLSLIdentifierExpression *)expr;
+            
+            return declaration;
+        }
+
+        HLSLExpressionStatement * BuildExpressionStatement(HLSLExpression * expr)
+        {
+            HLSLExpressionStatement * statement = m_tree->AddNode<HLSLExpressionStatement>(expr->fileName, expr->line);
+            statement->expression = expr;
+            return statement;
+        }
+
+        HLSLIdentifierExpression * AddExpressionStatement(HLSLExpression * expr, StatementList & statements, bool wantIdent)
+        {
+            if (wantIdent) {
+                HLSLDeclaration * declaration = BuildTemporaryDeclaration(expr);
+                statements.append(declaration);
+                
+                HLSLIdentifierExpression * ident = m_tree->AddNode<HLSLIdentifierExpression>(expr->fileName, expr->line);
+                ident->name = declaration->name;
+                ident->expressionType = declaration->type;
+                return ident;
+            }
+            else {
+                HLSLExpressionStatement * statement = BuildExpressionStatement(expr);
+                statements.append(statement);
+                return NULL;
+            }
+        }
+        
+        HLSLIdentifierExpression * Flatten(HLSLExpression * expr, StatementList & statements, bool wantIdent = true)
+        {
+            if (!NeedsFlattening(expr, wantIdent)) {
+                return AddExpressionStatement(expr, statements, wantIdent);
+            }
+            
+            if (expr->nodeType == HLSLNodeType_UnaryExpression) {
+                assert(expr->nextExpression == NULL);
+                
+                HLSLUnaryExpression * unaryExpr = (HLSLUnaryExpression *)expr;
+                
+                HLSLIdentifierExpression * tmp = Flatten(unaryExpr->expression, statements, true);
+                
+                HLSLUnaryExpression * newUnaryExpr = m_tree->AddNode<HLSLUnaryExpression>(unaryExpr->fileName, unaryExpr->line);
+                newUnaryExpr->unaryOp = unaryExpr->unaryOp;
+                newUnaryExpr->expression = tmp;
+                newUnaryExpr->expressionType = unaryExpr->expressionType;
+
+                return AddExpressionStatement(newUnaryExpr, statements, wantIdent);
+            }
+            else if (expr->nodeType == HLSLNodeType_BinaryExpression) {
+                assert(expr->nextExpression == NULL);
+                
+                HLSLBinaryExpression * binaryExpr = (HLSLBinaryExpression *)expr;
+                
+                if (IsAssignOp(binaryExpr->binaryOp)) {
+                    // Flatten right hand side only.
+                    HLSLIdentifierExpression * tmp2 = Flatten(binaryExpr->expression2, statements, true);
+                    
+                    HLSLBinaryExpression * newBinaryExpr = m_tree->AddNode<HLSLBinaryExpression>(binaryExpr->fileName, binaryExpr->line);
+                    newBinaryExpr->binaryOp = binaryExpr->binaryOp;
+                    newBinaryExpr->expression1 = binaryExpr->expression1;
+                    newBinaryExpr->expression2 = tmp2;
+                    newBinaryExpr->expressionType = binaryExpr->expressionType;
+                    
+                    return AddExpressionStatement(newBinaryExpr, statements, wantIdent);
+                }
+                else {
+                    HLSLIdentifierExpression * tmp1 = Flatten(binaryExpr->expression1, statements, true);
+                    HLSLIdentifierExpression * tmp2 = Flatten(binaryExpr->expression2, statements, true);
+
+                    HLSLBinaryExpression * newBinaryExpr = m_tree->AddNode<HLSLBinaryExpression>(binaryExpr->fileName, binaryExpr->line);
+                    newBinaryExpr->binaryOp = binaryExpr->binaryOp;
+                    newBinaryExpr->expression1 = tmp1;
+                    newBinaryExpr->expression2 = tmp2;
+                    newBinaryExpr->expressionType = binaryExpr->expressionType;
+                    
+                    return AddExpressionStatement(newBinaryExpr, statements, wantIdent);
+                }
+            }
+            else if (expr->nodeType == HLSLNodeType_ConditionalExpression) {
+                assert(false);
+            }
+            else if (expr->nodeType == HLSLNodeType_CastingExpression) {
+                assert(false);
+            }
+            else if (expr->nodeType == HLSLNodeType_LiteralExpression) {
+                assert(false);
+            }
+            else if (expr->nodeType == HLSLNodeType_IdentifierExpression) {
+                assert(false);
+            }
+            else if (expr->nodeType == HLSLNodeType_ConstructorExpression) {
+                assert(false);
+            }
+            else if (expr->nodeType == HLSLNodeType_MemberAccess) {
+                assert(false);
+            }
+            else if (expr->nodeType == HLSLNodeType_ArrayAccess) {
+                assert(false);
+            }
+            else if (expr->nodeType == HLSLNodeType_FunctionCall) {
+                HLSLFunctionCall * functionCall = (HLSLFunctionCall *)expr;
+
+                // @@ Output function as is?
+                // @@ We have to flatten function arguments! This is tricky, need to handle input/output arguments.
+                assert(!NeedsFlattening(functionCall->argument));
+                
+                return AddExpressionStatement(expr, statements, wantIdent);
+            }
+            else {
+                assert(false);
+            }
+            return NULL;
+        }
+    };
+
+    
+void FlattenExpressions(HLSLTree* tree) {
+    ExpressionFlattener flattener;
+    flattener.FlattenExpressions(tree);
+}
+
+} // M4
+
diff --git a/hlslparser/src/HLSLTree.h b/hlslparser/src/HLSLTree.h
new file mode 100644
index 00000000..9e5665ce
--- /dev/null
+++ b/hlslparser/src/HLSLTree.h
@@ -0,0 +1,998 @@
+#ifndef HLSL_TREE_H
+#define HLSL_TREE_H
+
+//#include "Engine/StringPool.h"
+#include "Engine.h"
+
+#include <new>
+
+namespace M4
+{
+
+enum HLSLNodeType
+{
+    HLSLNodeType_Root,
+    HLSLNodeType_Declaration,
+    HLSLNodeType_Struct,
+    HLSLNodeType_StructField,
+    HLSLNodeType_Buffer,
+    HLSLNodeType_BufferField,
+    HLSLNodeType_Function,
+    HLSLNodeType_Argument,
+    HLSLNodeType_ExpressionStatement,
+    HLSLNodeType_Expression,
+    HLSLNodeType_ReturnStatement,
+    HLSLNodeType_DiscardStatement,
+    HLSLNodeType_BreakStatement,
+    HLSLNodeType_ContinueStatement,
+    HLSLNodeType_IfStatement,
+    HLSLNodeType_ForStatement,
+    HLSLNodeType_BlockStatement,
+    HLSLNodeType_UnaryExpression,
+    HLSLNodeType_BinaryExpression,
+    HLSLNodeType_ConditionalExpression,
+    HLSLNodeType_CastingExpression,
+    HLSLNodeType_LiteralExpression,
+    HLSLNodeType_IdentifierExpression,
+    HLSLNodeType_ConstructorExpression,
+    HLSLNodeType_MemberAccess,
+    HLSLNodeType_ArrayAccess,
+    HLSLNodeType_FunctionCall,
+    HLSLNodeType_StateAssignment,
+    HLSLNodeType_SamplerState,
+    HLSLNodeType_Pass,
+    HLSLNodeType_Technique,
+    HLSLNodeType_Attribute,
+    HLSLNodeType_Pipeline,
+    HLSLNodeType_Stage,
+    HLSLNodeType_Comment
+};
+
+enum HLSLTypeDimension
+{
+    HLSLTypeDimension_None,
+    HLSLTypeDimension_Scalar,
+    HLSLTypeDimension_Vector2,
+    HLSLTypeDimension_Vector3,
+    HLSLTypeDimension_Vector4,
+    HLSLTypeDimension_Matrix2x2,
+    HLSLTypeDimension_Matrix3x3,
+    HLSLTypeDimension_Matrix4x4,
+    HLSLTypeDimension_Matrix4x3, // TODO: no 3x4
+    HLSLTypeDimension_Matrix4x2
+};
+    
+enum HLSLBaseType
+{
+    HLSLBaseType_Unknown,
+    HLSLBaseType_Void,
+    
+    // float
+    HLSLBaseType_Float,
+    HLSLBaseType_Float2,
+    HLSLBaseType_Float3,
+    HLSLBaseType_Float4,
+	HLSLBaseType_Float2x2,
+    HLSLBaseType_Float3x3,
+    HLSLBaseType_Float4x4,
+    HLSLBaseType_Float4x3, // TODO: missing Float3x4
+    HLSLBaseType_Float4x2,
+    HLSLBaseType_Half,
+    HLSLBaseType_Half2,
+    HLSLBaseType_Half3,
+    HLSLBaseType_Half4,
+	HLSLBaseType_Half2x2,
+    HLSLBaseType_Half3x3,
+    HLSLBaseType_Half4x4,
+    HLSLBaseType_Half4x3, // TODO: missing Half3x4
+    HLSLBaseType_Half4x2,
+    
+    // integer
+    HLSLBaseType_Bool,
+    HLSLBaseType_Bool2,
+	HLSLBaseType_Bool3,
+	HLSLBaseType_Bool4,
+    HLSLBaseType_Int,
+    HLSLBaseType_Int2,
+    HLSLBaseType_Int3,
+    HLSLBaseType_Int4,
+    HLSLBaseType_Uint,
+    HLSLBaseType_Uint2,
+    HLSLBaseType_Uint3,
+    HLSLBaseType_Uint4,
+    /*HLSLBaseType_Short,   // @@ Separate dimension from Base type, this is getting out of control.
+    HLSLBaseType_Short2,
+    HLSLBaseType_Short3,
+    HLSLBaseType_Short4,
+    HLSLBaseType_Ushort,
+    HLSLBaseType_Ushort2,
+    HLSLBaseType_Ushort3,
+    HLSLBaseType_Ushort4,*/
+    
+    // texture
+    HLSLBaseType_Texture,
+    
+    // samplers
+    HLSLBaseType_Sampler,           // @@ use type inference to determine sampler type.
+    HLSLBaseType_Sampler2D,
+    HLSLBaseType_Sampler3D,
+    HLSLBaseType_SamplerCube,
+    HLSLBaseType_Sampler2DShadow,
+    HLSLBaseType_Sampler2DMS,
+    HLSLBaseType_Sampler2DArray,
+    
+    HLSLBaseType_UserDefined,       // struct
+    HLSLBaseType_Expression,        // type argument for defined() sizeof() and typeof().
+    HLSLBaseType_Auto,
+    HLSLBaseType_Count,
+    
+    // counts
+    HLSLBaseType_FirstNumeric = HLSLBaseType_Float,
+    HLSLBaseType_LastNumeric = HLSLBaseType_Uint4,
+    
+    HLSLBaseType_FirstInteger = HLSLBaseType_Bool,
+    HLSLBaseType_LastInteger = HLSLBaseType_Uint4,
+   
+    HLSLBaseType_NumericCount = HLSLBaseType_LastNumeric - HLSLBaseType_FirstNumeric + 1
+};
+    
+extern const HLSLTypeDimension BaseTypeDimension[HLSLBaseType_Count];
+extern const HLSLBaseType ScalarBaseType[HLSLBaseType_Count];
+
+inline bool IsSamplerType(HLSLBaseType baseType)
+{
+    return baseType == HLSLBaseType_Sampler ||
+           baseType == HLSLBaseType_Sampler2D ||
+           baseType == HLSLBaseType_Sampler3D ||
+           baseType == HLSLBaseType_SamplerCube ||
+           baseType == HLSLBaseType_Sampler2DShadow ||
+           baseType == HLSLBaseType_Sampler2DMS ||
+           baseType == HLSLBaseType_Sampler2DArray;
+}
+
+inline bool IsMatrixType(HLSLBaseType baseType)
+{
+    return  baseType == HLSLBaseType_Float3x3 ||
+            baseType == HLSLBaseType_Float4x4 ||
+            baseType == HLSLBaseType_Float4x3 ||
+            baseType == HLSLBaseType_Float4x2 ||
+            baseType == HLSLBaseType_Half3x3 ||
+            baseType == HLSLBaseType_Half4x4 ||
+            baseType == HLSLBaseType_Half4x3 ||
+            baseType == HLSLBaseType_Half4x2;
+}
+
+inline bool IsScalarType( HLSLBaseType baseType )
+{
+	return  baseType == HLSLBaseType_Float ||
+			baseType == HLSLBaseType_Half ||
+			baseType == HLSLBaseType_Bool ||
+			baseType == HLSLBaseType_Int ||
+			baseType == HLSLBaseType_Uint;
+}
+
+inline bool IsVectorType( HLSLBaseType baseType )
+{
+	return  baseType == HLSLBaseType_Float2 ||
+		baseType == HLSLBaseType_Float3 ||
+		baseType == HLSLBaseType_Float4 ||
+		baseType == HLSLBaseType_Half2 ||
+		baseType == HLSLBaseType_Half3 ||
+		baseType == HLSLBaseType_Half4 ||
+		baseType == HLSLBaseType_Bool2 ||
+		baseType == HLSLBaseType_Bool3 ||
+		baseType == HLSLBaseType_Bool4 ||
+		baseType == HLSLBaseType_Int2  ||
+		baseType == HLSLBaseType_Int3  ||
+		baseType == HLSLBaseType_Int4  ||
+		baseType == HLSLBaseType_Uint2 ||
+		baseType == HLSLBaseType_Uint3 ||
+		baseType == HLSLBaseType_Uint4;
+}
+
+
+enum HLSLBinaryOp
+{
+    HLSLBinaryOp_And,
+    HLSLBinaryOp_Or,
+    HLSLBinaryOp_Add,
+    HLSLBinaryOp_Sub,
+    HLSLBinaryOp_Mul,
+    HLSLBinaryOp_Div,
+    HLSLBinaryOp_Less,
+    HLSLBinaryOp_Greater,
+    HLSLBinaryOp_LessEqual,
+    HLSLBinaryOp_GreaterEqual,
+    HLSLBinaryOp_Equal,
+    HLSLBinaryOp_NotEqual,
+    HLSLBinaryOp_BitAnd,
+    HLSLBinaryOp_BitOr,
+    HLSLBinaryOp_BitXor,
+    HLSLBinaryOp_Assign,
+    HLSLBinaryOp_AddAssign,
+    HLSLBinaryOp_SubAssign,
+    HLSLBinaryOp_MulAssign,
+    HLSLBinaryOp_DivAssign,
+};
+
+inline bool IsCompareOp( HLSLBinaryOp op )
+{
+	return op == HLSLBinaryOp_Less ||
+		op == HLSLBinaryOp_Greater ||
+		op == HLSLBinaryOp_LessEqual ||
+		op == HLSLBinaryOp_GreaterEqual ||
+		op == HLSLBinaryOp_Equal ||
+		op == HLSLBinaryOp_NotEqual;
+}
+
+inline bool IsArithmeticOp( HLSLBinaryOp op )
+{
+    return op == HLSLBinaryOp_Add ||
+        op == HLSLBinaryOp_Sub ||
+        op == HLSLBinaryOp_Mul ||
+        op == HLSLBinaryOp_Div;
+}
+
+inline bool IsLogicOp( HLSLBinaryOp op )
+{
+    return op == HLSLBinaryOp_And ||
+        op == HLSLBinaryOp_Or;
+}
+
+inline bool IsAssignOp( HLSLBinaryOp op )
+{
+    return op == HLSLBinaryOp_Assign ||
+        op == HLSLBinaryOp_AddAssign ||
+        op == HLSLBinaryOp_SubAssign ||
+        op == HLSLBinaryOp_MulAssign ||
+        op == HLSLBinaryOp_DivAssign;
+}
+
+    
+enum HLSLUnaryOp
+{
+    HLSLUnaryOp_Negative,       // -x
+    HLSLUnaryOp_Positive,       // +x
+    HLSLUnaryOp_Not,            // !x
+    HLSLUnaryOp_PreIncrement,   // ++x
+    HLSLUnaryOp_PreDecrement,   // --x
+    HLSLUnaryOp_PostIncrement,  // x++
+    HLSLUnaryOp_PostDecrement,  // x++
+    HLSLUnaryOp_BitNot,         // ~x
+};
+
+enum HLSLArgumentModifier
+{
+    HLSLArgumentModifier_None,
+    HLSLArgumentModifier_In,
+    HLSLArgumentModifier_Out,
+    HLSLArgumentModifier_Inout,
+    HLSLArgumentModifier_Uniform,
+    HLSLArgumentModifier_Const,
+};
+
+enum HLSLTypeFlags
+{
+    HLSLTypeFlag_None = 0,
+    HLSLTypeFlag_Const = 0x01,
+    HLSLTypeFlag_Static = 0x02,
+    //HLSLTypeFlag_Uniform = 0x04,
+    //HLSLTypeFlag_Extern = 0x10,
+    //HLSLTypeFlag_Volatile = 0x20,
+    //HLSLTypeFlag_Shared = 0x40,
+    //HLSLTypeFlag_Precise = 0x80,
+
+    HLSLTypeFlag_Input = 0x100,
+    HLSLTypeFlag_Output = 0x200,
+
+    // Interpolation modifiers.
+    HLSLTypeFlag_Linear = 0x10000,
+    HLSLTypeFlag_Centroid = 0x20000,
+    HLSLTypeFlag_NoInterpolation = 0x40000,
+    HLSLTypeFlag_NoPerspective = 0x80000,
+    HLSLTypeFlag_Sample = 0x100000,
+
+    // Misc.
+    HLSLTypeFlag_NoPromote = 0x200000,
+};
+
+enum HLSLAttributeType
+{
+    HLSLAttributeType_Unknown,
+    HLSLAttributeType_Unroll,
+    HLSLAttributeType_Branch,
+    HLSLAttributeType_Flatten,
+    HLSLAttributeType_NoFastMath,
+};
+
+enum HLSLAddressSpace
+{
+    HLSLAddressSpace_Undefined,
+    HLSLAddressSpace_Constant,
+    HLSLAddressSpace_Device,
+    HLSLAddressSpace_Thread,
+    HLSLAddressSpace_Shared,
+};
+
+
+struct HLSLNode;
+struct HLSLRoot;
+struct HLSLStatement;
+struct HLSLAttribute;
+struct HLSLDeclaration;
+struct HLSLStruct;
+struct HLSLStructField;
+struct HLSLBuffer;
+struct HLSLFunction;
+struct HLSLArgument;
+struct HLSLExpressionStatement;
+struct HLSLExpression;
+struct HLSLBinaryExpression;
+struct HLSLLiteralExpression;
+struct HLSLIdentifierExpression;
+struct HLSLConstructorExpression;
+struct HLSLFunctionCall;
+struct HLSLArrayAccess;
+struct HLSLAttribute;
+
+struct HLSLType
+{
+    explicit HLSLType(HLSLBaseType _baseType = HLSLBaseType_Unknown)
+    { 
+        baseType    = _baseType;
+        samplerType = HLSLBaseType_Float;
+        typeName    = NULL;
+        array       = false;
+        arraySize   = NULL;
+        flags       = 0;
+        addressSpace = HLSLAddressSpace_Undefined;
+    }
+    HLSLBaseType        baseType;
+    HLSLBaseType        samplerType;    // Half or Float
+    const char*         typeName;       // For user defined types.
+    bool                array;
+    HLSLExpression*     arraySize;
+    int                 flags;
+    HLSLAddressSpace    addressSpace;
+};
+
+inline bool IsSamplerType(const HLSLType & type)
+{
+    return IsSamplerType(type.baseType);
+}
+
+inline bool IsScalarType(const HLSLType & type)
+{
+	return IsScalarType(type.baseType);
+}
+
+inline bool IsVectorType(const HLSLType & type)
+{
+	return IsVectorType(type.baseType);
+}
+
+
+/** Base class for all nodes in the HLSL AST */
+struct HLSLNode
+{
+    HLSLNodeType        nodeType;
+    const char*         fileName;
+    int                 line;
+};
+
+struct HLSLRoot : public HLSLNode
+{
+    static const HLSLNodeType s_type = HLSLNodeType_Root;
+    HLSLRoot()          { statement = NULL; }
+    HLSLStatement*      statement;          // First statement.
+};
+
+struct HLSLStatement : public HLSLNode
+{
+    HLSLStatement() 
+    { 
+        nextStatement   = NULL; 
+        attributes      = NULL;
+        hidden          = false;
+    }
+    HLSLStatement*      nextStatement;      // Next statement in the block.
+    HLSLAttribute*      attributes;
+    mutable bool        hidden;
+};
+
+struct HLSLAttribute : public HLSLNode
+{
+    static const HLSLNodeType s_type = HLSLNodeType_Attribute;
+	HLSLAttribute()
+	{
+		attributeType = HLSLAttributeType_Unknown;
+		argument      = NULL;
+		nextAttribute = NULL;
+	}
+    HLSLAttributeType   attributeType;
+    HLSLExpression*     argument;
+    HLSLAttribute*      nextAttribute;
+};
+
+struct HLSLDeclaration : public HLSLStatement
+{
+    static const HLSLNodeType s_type = HLSLNodeType_Declaration;
+    HLSLDeclaration()
+    {
+        name            = NULL;
+        registerName    = NULL;
+        semantic        = NULL;
+        nextDeclaration = NULL;
+        assignment      = NULL;
+        buffer          = NULL;
+    }
+    const char*         name;
+    HLSLType            type;
+    const char*         registerName;       // @@ Store register index?
+    const char*         semantic;
+    HLSLDeclaration*    nextDeclaration;    // If multiple variables declared on a line.
+    HLSLExpression*     assignment;
+    HLSLBuffer*         buffer;
+};
+
+struct HLSLStruct : public HLSLStatement
+{
+    static const HLSLNodeType s_type = HLSLNodeType_Struct;
+    HLSLStruct()
+    {
+        name            = NULL;
+        field           = NULL;
+    }
+    const char*         name;
+    HLSLStructField*    field;              // First field in the structure.
+};
+
+struct HLSLStructField : public HLSLNode
+{
+    static const HLSLNodeType s_type = HLSLNodeType_StructField;
+    HLSLStructField()
+    {
+        name            = NULL;
+        semantic        = NULL;
+        sv_semantic     = NULL;
+        nextField       = NULL;
+        hidden          = false;
+    }
+    const char*         name;
+    HLSLType            type;
+    const char*         semantic;
+    const char*         sv_semantic;
+    HLSLStructField*    nextField;      // Next field in the structure.
+    bool                hidden;
+};
+
+/** A cbuffer or tbuffer declaration. */
+struct HLSLBuffer : public HLSLStatement
+{
+    static const HLSLNodeType s_type = HLSLNodeType_Buffer;
+    HLSLBuffer()
+    {
+        name            = NULL;
+        registerName    = NULL;
+        field           = NULL;
+    }
+    const char*         name;
+    const char*         registerName;
+    HLSLDeclaration*    field;
+};
+
+
+/** Function declaration */
+struct HLSLFunction : public HLSLStatement
+{
+    static const HLSLNodeType s_type = HLSLNodeType_Function;
+    HLSLFunction()
+    {
+        name            = NULL;
+        semantic        = NULL;
+        sv_semantic     = NULL;
+        statement       = NULL;
+        argument        = NULL;
+        numArguments    = 0;
+        numOutputArguments = 0;
+        forward         = NULL;
+    }
+    const char*         name;
+    HLSLType            returnType;
+    const char*         semantic;
+    const char*         sv_semantic;
+    int                 numArguments;
+    int                 numOutputArguments;     // Includes out and inout arguments.
+    HLSLArgument*       argument;
+    HLSLStatement*      statement;
+    HLSLFunction*       forward; // Which HLSLFunction this one forward-declares
+};
+
+/** Declaration of an argument to a function. */
+struct HLSLArgument : public HLSLNode
+{
+    static const HLSLNodeType s_type = HLSLNodeType_Argument;
+    HLSLArgument()
+    {
+        name            = NULL;
+        modifier        = HLSLArgumentModifier_None;
+        semantic        = NULL;
+        sv_semantic     = NULL;
+        defaultValue    = NULL;
+        nextArgument    = NULL;
+        hidden          = false;
+    }
+    const char*             name;
+    HLSLArgumentModifier    modifier;
+    HLSLType                type;
+    const char*             semantic;
+    const char*             sv_semantic;
+    HLSLExpression*         defaultValue;
+    HLSLArgument*           nextArgument;
+    bool                    hidden;
+};
+
+/** A expression which forms a complete statement. */
+struct HLSLExpressionStatement : public HLSLStatement
+{
+    static const HLSLNodeType s_type = HLSLNodeType_ExpressionStatement;
+    HLSLExpressionStatement()
+    {
+        expression = NULL;
+    }
+    HLSLExpression*     expression;
+};
+
+struct HLSLReturnStatement : public HLSLStatement
+{
+    static const HLSLNodeType s_type = HLSLNodeType_ReturnStatement;
+    HLSLReturnStatement()
+    {
+        expression = NULL;
+    }
+    HLSLExpression*     expression;
+};
+
+struct HLSLDiscardStatement : public HLSLStatement
+{
+    static const HLSLNodeType s_type = HLSLNodeType_DiscardStatement;
+};
+
+struct HLSLBreakStatement : public HLSLStatement
+{
+    static const HLSLNodeType s_type = HLSLNodeType_BreakStatement;
+};
+
+struct HLSLContinueStatement : public HLSLStatement
+{
+    static const HLSLNodeType s_type = HLSLNodeType_ContinueStatement;
+};
+
+struct HLSLIfStatement : public HLSLStatement
+{
+    static const HLSLNodeType s_type = HLSLNodeType_IfStatement;
+    HLSLIfStatement()
+    {
+        condition     = NULL;
+        statement     = NULL;
+        elseStatement = NULL;
+        isStatic      = false;
+    }
+    HLSLExpression*     condition;
+    HLSLStatement*      statement;
+    HLSLStatement*      elseStatement;
+    bool                isStatic;
+};
+
+struct HLSLForStatement : public HLSLStatement
+{
+    static const HLSLNodeType s_type = HLSLNodeType_ForStatement;
+    HLSLForStatement()
+    {
+        initialization = NULL;
+        condition = NULL;
+        increment = NULL;
+        statement = NULL;
+    }
+    HLSLDeclaration*    initialization;
+    HLSLExpression*     condition;
+    HLSLExpression*     increment;
+    HLSLStatement*      statement;
+};
+
+struct HLSLBlockStatement : public HLSLStatement
+{
+    static const HLSLNodeType s_type = HLSLNodeType_BlockStatement;
+    HLSLBlockStatement()
+    {
+        statement = NULL;
+    }
+    HLSLStatement*      statement;
+};
+
+
+/** Base type for all types of expressions. */
+struct HLSLExpression : public HLSLNode
+{
+    static const HLSLNodeType s_type = HLSLNodeType_Expression;
+    HLSLExpression()
+    {
+        nextExpression = NULL;
+    }
+    HLSLType            expressionType;
+    HLSLExpression*     nextExpression; // Used when the expression is part of a list, like in a function call.
+};
+
+struct HLSLUnaryExpression : public HLSLExpression
+{
+    static const HLSLNodeType s_type = HLSLNodeType_UnaryExpression;
+    HLSLUnaryExpression()
+    {
+        expression = NULL;
+    }
+    HLSLUnaryOp         unaryOp;
+    HLSLExpression*     expression;
+};
+
+struct HLSLBinaryExpression : public HLSLExpression
+{
+    static const HLSLNodeType s_type = HLSLNodeType_BinaryExpression;
+    HLSLBinaryExpression()
+    {
+        expression1 = NULL;
+        expression2 = NULL;
+    }
+    HLSLBinaryOp        binaryOp;
+    HLSLExpression*     expression1;
+    HLSLExpression*     expression2;
+};
+
+/** ? : construct */
+struct HLSLConditionalExpression : public HLSLExpression
+{
+    static const HLSLNodeType s_type = HLSLNodeType_ConditionalExpression;
+    HLSLConditionalExpression()
+    {
+        condition       = NULL;
+        trueExpression  = NULL;
+        falseExpression = NULL;
+    }
+    HLSLExpression*     condition;
+    HLSLExpression*     trueExpression;
+    HLSLExpression*     falseExpression;
+};
+
+struct HLSLCastingExpression : public HLSLExpression
+{
+    static const HLSLNodeType s_type = HLSLNodeType_CastingExpression;
+    HLSLCastingExpression()
+    {
+        expression = NULL;
+    }
+    HLSLType            type;
+    HLSLExpression*     expression;
+};
+
+/** Float, integer, boolean, etc. literal constant. */
+struct HLSLLiteralExpression : public HLSLExpression
+{
+    static const HLSLNodeType s_type = HLSLNodeType_LiteralExpression;
+    HLSLBaseType        type;   // Note, not all types can be literals.
+    union
+    {
+        bool            bValue;
+        float           fValue;
+        int             iValue;
+    };
+};
+
+/** An identifier, typically a variable name or structure field name. */
+struct HLSLIdentifierExpression : public HLSLExpression
+{
+    static const HLSLNodeType s_type = HLSLNodeType_IdentifierExpression;
+    HLSLIdentifierExpression()
+    {
+        name     = NULL;
+        global  = false;
+    }
+    const char*         name;
+    bool                global; // This is a global variable.
+};
+
+/** float2(1, 2) */
+struct HLSLConstructorExpression : public HLSLExpression
+{
+    static const HLSLNodeType s_type = HLSLNodeType_ConstructorExpression;
+	HLSLConstructorExpression()
+	{
+		argument = NULL;
+	}
+    HLSLType            type;
+    HLSLExpression*     argument;
+};
+
+/** object.member **/
+struct HLSLMemberAccess : public HLSLExpression
+{
+    static const HLSLNodeType s_type = HLSLNodeType_MemberAccess;
+	HLSLMemberAccess()
+	{
+		object  = NULL;
+		field   = NULL;
+		swizzle = false;
+	}
+    HLSLExpression*     object;
+    const char*         field;
+    bool                swizzle;
+};
+
+/** array[index] **/
+struct HLSLArrayAccess : public HLSLExpression
+{
+    static const HLSLNodeType s_type = HLSLNodeType_ArrayAccess;
+	HLSLArrayAccess()
+	{
+		array = NULL;
+		index = NULL;
+	}
+    HLSLExpression*     array;
+    HLSLExpression*     index;
+};
+
+struct HLSLFunctionCall : public HLSLExpression
+{
+    static const HLSLNodeType s_type = HLSLNodeType_FunctionCall;
+	HLSLFunctionCall()
+	{
+		function     = NULL;
+		argument     = NULL;
+		numArguments = 0;
+	}
+    const HLSLFunction* function;
+    HLSLExpression*     argument;
+    int                 numArguments;
+};
+
+struct HLSLStateAssignment : public HLSLNode
+{
+    static const HLSLNodeType s_type = HLSLNodeType_StateAssignment;
+    HLSLStateAssignment()
+    {
+        stateName = NULL;
+        sValue = NULL;
+        nextStateAssignment = NULL;
+    }
+
+    const char*             stateName;
+    int                     d3dRenderState;
+    union {
+        int                 iValue;
+        float               fValue;
+        const char *        sValue;
+    };
+    HLSLStateAssignment*    nextStateAssignment;
+};
+
+struct HLSLSamplerState : public HLSLExpression // @@ Does this need to be an expression? Does it have a type? I guess type is useful.
+{
+    static const HLSLNodeType s_type = HLSLNodeType_SamplerState;
+    HLSLSamplerState()
+    {
+        numStateAssignments = 0;
+        stateAssignments = NULL;
+    }
+
+    int                     numStateAssignments;
+    HLSLStateAssignment*    stateAssignments;
+};
+
+struct HLSLPass : public HLSLNode
+{
+    static const HLSLNodeType s_type = HLSLNodeType_Pass;
+    HLSLPass()
+    {
+        name = NULL;
+        numStateAssignments = 0;
+        stateAssignments = NULL;
+        nextPass = NULL;
+    }
+    
+    const char*             name;
+    int                     numStateAssignments;
+    HLSLStateAssignment*    stateAssignments;
+    HLSLPass*               nextPass;
+};
+
+struct HLSLTechnique : public HLSLStatement
+{
+    static const HLSLNodeType s_type = HLSLNodeType_Technique;
+    HLSLTechnique()
+    {
+        name = NULL;
+        numPasses = 0;
+        passes = NULL;
+    }
+
+    const char*         name;
+    int                 numPasses;
+    HLSLPass*           passes;
+};
+
+struct HLSLPipeline : public HLSLStatement
+{
+    static const HLSLNodeType s_type = HLSLNodeType_Pipeline;
+    HLSLPipeline()
+    {
+        name = NULL;
+        numStateAssignments = 0;
+        stateAssignments = NULL;
+    }
+    
+    const char*             name;
+    int                     numStateAssignments;
+    HLSLStateAssignment*    stateAssignments;
+};
+
+struct HLSLStage : public HLSLStatement
+{
+    static const HLSLNodeType s_type = HLSLNodeType_Stage;
+    HLSLStage()
+    {
+        name = NULL;
+        statement = NULL;
+        inputs = NULL;
+        outputs = NULL;
+    }
+
+    const char*             name;
+    HLSLStatement*          statement;
+    HLSLDeclaration*        inputs;
+    HLSLDeclaration*        outputs;
+};
+
+struct HLSLComment : public HLSLStatement
+{
+    static const HLSLNodeType s_type = HLSLNodeType_Comment;
+    HLSLComment()
+    {
+        text = NULL;
+    }
+
+    const char*         text;
+};
+
+/**
+ * Abstract syntax tree for parsed HLSL code.
+ */
+class HLSLTree
+{
+
+public:
+
+    explicit HLSLTree(Allocator* allocator);
+    ~HLSLTree();
+
+    /** Adds a string to the string pool used by the tree. */
+    const char* AddString(const char* string);
+    const char* AddStringFormat(const char* string, ...);
+
+    /** Returns true if the string is contained within the tree. */
+    bool GetContainsString(const char* string) const;
+
+    /** Returns the root block in the tree */
+    HLSLRoot* GetRoot() const;
+
+    /** Adds a new node to the tree with the specified type. */
+    template <class T>
+    T* AddNode(const char* fileName, int line)
+    {
+        HLSLNode* node = new (AllocateMemory(sizeof(T))) T();
+        node->nodeType  = T::s_type;
+        node->fileName  = fileName;
+        node->line      = line;
+        return static_cast<T*>(node);
+    }
+
+    HLSLFunction * FindFunction(const char * name);
+    HLSLDeclaration * FindGlobalDeclaration(const char * name, HLSLBuffer ** buffer_out = NULL);
+    HLSLStruct * FindGlobalStruct(const char * name);
+    HLSLTechnique * FindTechnique(const char * name);
+    HLSLPipeline * FindFirstPipeline();
+    HLSLPipeline * FindNextPipeline(HLSLPipeline * current);
+    HLSLPipeline * FindPipeline(const char * name);
+    HLSLBuffer * FindBuffer(const char * name);
+
+    bool GetExpressionValue(HLSLExpression * expression, int & value);
+    int GetExpressionValue(HLSLExpression * expression, float values[4]);
+
+    bool NeedsFunction(const char * name);
+
+private:
+
+    void* AllocateMemory(size_t size);
+    void  AllocatePage();
+
+private:
+
+    static const size_t s_nodePageSize = 1024 * 4;
+
+    struct NodePage
+    {
+        NodePage*   next;
+        char        buffer[s_nodePageSize];
+    };
+
+    Allocator*      m_allocator;
+    StringPool      m_stringPool;
+    HLSLRoot*       m_root;
+
+    NodePage*       m_firstPage;
+    NodePage*       m_currentPage;
+    size_t          m_currentPageOffset;
+
+};
+
+
+
+class HLSLTreeVisitor
+{
+public:
+    virtual void VisitType(HLSLType & type);
+
+    virtual void VisitRoot(HLSLRoot * node);
+    virtual void VisitTopLevelStatement(HLSLStatement * node);
+    virtual void VisitStatements(HLSLStatement * statement);
+    virtual void VisitStatement(HLSLStatement * node);
+    virtual void VisitDeclaration(HLSLDeclaration * node);
+    virtual void VisitStruct(HLSLStruct * node);
+    virtual void VisitStructField(HLSLStructField * node);
+    virtual void VisitBuffer(HLSLBuffer * node);
+    //virtual void VisitBufferField(HLSLBufferField * node);
+    virtual void VisitFunction(HLSLFunction * node);
+    virtual void VisitArgument(HLSLArgument * node);
+    virtual void VisitExpressionStatement(HLSLExpressionStatement * node);
+    virtual void VisitExpression(HLSLExpression * node);
+    virtual void VisitReturnStatement(HLSLReturnStatement * node);
+    virtual void VisitDiscardStatement(HLSLDiscardStatement * node);
+    virtual void VisitBreakStatement(HLSLBreakStatement * node);
+    virtual void VisitContinueStatement(HLSLContinueStatement * node);
+    virtual void VisitIfStatement(HLSLIfStatement * node);
+    virtual void VisitForStatement(HLSLForStatement * node);
+    virtual void VisitBlockStatement(HLSLBlockStatement * node);
+    virtual void VisitUnaryExpression(HLSLUnaryExpression * node);
+    virtual void VisitBinaryExpression(HLSLBinaryExpression * node);
+    virtual void VisitConditionalExpression(HLSLConditionalExpression * node);
+    virtual void VisitCastingExpression(HLSLCastingExpression * node);
+    virtual void VisitLiteralExpression(HLSLLiteralExpression * node);
+    virtual void VisitIdentifierExpression(HLSLIdentifierExpression * node);
+    virtual void VisitConstructorExpression(HLSLConstructorExpression * node);
+    virtual void VisitMemberAccess(HLSLMemberAccess * node);
+    virtual void VisitArrayAccess(HLSLArrayAccess * node);
+    virtual void VisitFunctionCall(HLSLFunctionCall * node);
+    virtual void VisitStateAssignment(HLSLStateAssignment * node);
+    virtual void VisitSamplerState(HLSLSamplerState * node);
+    virtual void VisitPass(HLSLPass * node);
+    virtual void VisitTechnique(HLSLTechnique * node);
+    virtual void VisitPipeline(HLSLPipeline * node);
+
+
+    virtual void VisitFunctions(HLSLRoot * root);
+    virtual void VisitParameters(HLSLRoot * root);
+
+    HLSLFunction * FindFunction(HLSLRoot * root, const char * name);
+    HLSLDeclaration * FindGlobalDeclaration(HLSLRoot * root, const char * name);
+    HLSLStruct * FindGlobalStruct(HLSLRoot * root, const char * name);
+};
+
+
+// Tree transformations:
+extern void PruneTree(HLSLTree* tree, const char* entryName0, const char* entryName1 = NULL);
+extern void SortTree(HLSLTree* tree);
+extern void GroupParameters(HLSLTree* tree);
+extern void HideUnusedArguments(HLSLFunction * function);
+extern bool EmulateAlphaTest(HLSLTree* tree, const char* entryName, float alphaRef = 0.5f);
+extern void FlattenExpressions(HLSLTree* tree);
+    
+} // M4
+
+#endif
diff --git a/hlslparser/src/MSLGenerator.cpp b/hlslparser/src/MSLGenerator.cpp
new file mode 100644
index 00000000..ba99aa5f
--- /dev/null
+++ b/hlslparser/src/MSLGenerator.cpp
@@ -0,0 +1,2607 @@
+//=============================================================================
+//
+// Render/MSLGenerator.cpp
+//
+// Created by Max McGuire (max@unknownworlds.com)
+// Copyright (c) 2013, Unknown Worlds Entertainment, Inc.
+//
+//=============================================================================
+
+//#include "Engine/String.h"
+//#include "Engine/Log.h"
+#include "Engine.h"
+
+#include "MSLGenerator.h"
+#include "HLSLParser.h"
+#include "HLSLTree.h"
+
+#include <string.h>
+
+// MSL limitations:
+// - Some type conversions and constructors don't work exactly the same way. For example, casts to smaller size vectors are not alloweed in C++. @@ Add more details...
+// - Swizzles on scalar types, whether or not it expands them. a_float.x, a_float.xxxx both cause compile errors.
+// - Using ints as floats without the trailing .0 makes the compiler sad.
+// Unsupported by this generator:
+// - Matrix [] access is implemented as a function call, so result cannot be passed as out/inout argument.
+// - Matrix [] access is not supported in all l-value expressions. Only simple assignments.
+// - No support for boolean vectors and logical operators involving vectors. This is not just in metal.
+// - No support for non-float texture types
+
+namespace M4
+{
+    /* unused
+    static const char* GetTypeName(const HLSLType& type)
+    {
+        switch (type.baseType)
+        {
+        case HLSLBaseType_Void:             return "void";
+        case HLSLBaseType_Float:            return "float";
+        case HLSLBaseType_Float2:           return "float2";
+        case HLSLBaseType_Float3:           return "float3";
+        case HLSLBaseType_Float4:           return "float4";
+        case HLSLBaseType_Float2x2:         return "float2x2";
+        case HLSLBaseType_Float3x3:         return "float3x3";
+        case HLSLBaseType_Float4x4:         return "float4x4";
+        case HLSLBaseType_Float4x3:         return "float3x4";
+        case HLSLBaseType_Float4x2:         return "float2x4";
+        case HLSLBaseType_Half:             return "half";
+        case HLSLBaseType_Half2:            return "half2";
+        case HLSLBaseType_Half3:            return "half3";
+        case HLSLBaseType_Half4:            return "half4";
+        case HLSLBaseType_Half2x2:          return "half2x2";
+        case HLSLBaseType_Half3x3:          return "half3x3";
+        case HLSLBaseType_Half4x4:          return "half4x4";
+        case HLSLBaseType_Half4x3:          return "half3x4";
+        case HLSLBaseType_Half4x2:          return "half2x4";
+        case HLSLBaseType_Bool:             return "bool";
+        case HLSLBaseType_Bool2:            return "bool2";
+        case HLSLBaseType_Bool3:            return "bool3";
+        case HLSLBaseType_Bool4:            return "bool4";
+        case HLSLBaseType_Int:              return "int";
+        case HLSLBaseType_Int2:             return "int2";
+        case HLSLBaseType_Int3:             return "int3";
+        case HLSLBaseType_Int4:             return "int4";
+        case HLSLBaseType_Uint:             return "uint";
+        case HLSLBaseType_Uint2:            return "uint2";
+        case HLSLBaseType_Uint3:            return "uint3";
+        case HLSLBaseType_Uint4:            return "uint4";
+        case HLSLBaseType_Texture:          return "texture";
+        case HLSLBaseType_Sampler:          return "sampler";
+            // ACoget-TODO: How to detect non-float textures, if relevant?
+        case HLSLBaseType_Sampler2D:        return "texture2d<float>";
+        case HLSLBaseType_Sampler3D:        return "texture3d<float>";
+        case HLSLBaseType_SamplerCube:      return "texturecube<float>";
+        case HLSLBaseType_Sampler2DShadow:  return "depth2d<float>";
+        case HLSLBaseType_Sampler2DMS:      return "texture2d_ms<float>";
+        case HLSLBaseType_Sampler2DArray:   return "texture2d_array<float>";
+        case HLSLBaseType_UserDefined:      return type.typeName;
+        default:
+            ASSERT(0);
+            return "<unknown type>";
+        }
+    }
+    */
+
+    static void ParseSemantic(const char* semantic, unsigned int* outputLength, unsigned int* outputIndex)
+    {
+        const char* semanticIndex = semantic;
+
+        while (*semanticIndex && !isdigit(*semanticIndex))
+        {
+            semanticIndex++;
+        }
+
+        *outputLength = (uint32_t)(semanticIndex - semantic);
+        *outputIndex = atoi(semanticIndex);
+    }
+
+    // Parse register name and advance next register index.
+    static int ParseRegister(const char* registerName, int& nextRegister)
+    {
+        if (!registerName)
+        {
+            return nextRegister++;
+        }
+
+        while (*registerName && !isdigit(*registerName))
+        {
+            registerName++;
+        }
+
+        if (!*registerName)
+        {
+            return nextRegister++;
+        }
+
+        int result = atoi(registerName);
+
+        if (nextRegister <= result)
+        {
+            nextRegister = result + 1;
+        }
+
+        return result;
+    }
+
+    inline bool IsHalf(HLSLBaseType type)
+    {
+        return type >= HLSLBaseType_Half && type <= HLSLBaseType_Half4x2;
+    }
+
+    inline bool IsFloat(HLSLBaseType type)
+    {
+        return type >= HLSLBaseType_Float && type <= HLSLBaseType_Float4x2;
+    }
+
+
+    MSLGenerator::MSLGenerator()
+    {
+        m_tree = NULL;
+        m_entryName = NULL;
+        m_target = Target_VertexShader;
+        m_error = false;
+
+        m_firstClassArgument = NULL;
+        m_lastClassArgument = NULL;
+
+        m_currentFunction = NULL;
+    }
+
+    // Copied from GLSLGenerator
+    void MSLGenerator::Error(const char* format, ...)
+    {
+        // It's not always convenient to stop executing when an error occurs,
+        // so just track once we've hit an error and stop reporting them until
+        // we successfully bail out of execution.
+        if (m_error)
+        {
+            return;
+        }
+        m_error = true;
+
+        va_list arg;
+        va_start(arg, format);
+        Log_ErrorArgList(format, arg);
+        va_end(arg);
+    }
+
+    inline void MSLGenerator::AddClassArgument(ClassArgument* arg)
+    {
+        if (m_firstClassArgument == NULL)
+        {
+            m_firstClassArgument = arg;
+        }
+        else
+        {
+            m_lastClassArgument->nextArg = arg;
+        }
+        m_lastClassArgument = arg;
+    }
+
+
+    void MSLGenerator::Prepass(HLSLTree* tree, Target target, HLSLFunction* entryFunction)
+    {
+        // Hide unused arguments. @@ It would be good to do this in the other generators too.
+        
+        // PruneTree resets hidden flags to true, then marks visible elements
+        // based on whether entry point visits them.
+        PruneTree(tree, entryFunction->name); // Note: takes second entry
+        
+        // This sorts tree by type, but keeps ordering
+        SortTree(tree);
+       
+        // This strips any unused inputs to the entry point function
+        HideUnusedArguments(entryFunction);
+        
+        // Note sure if/where to add these calls.  Just wanted to point
+        // out that nothing is calling them, but could be useful.
+        //EmulateAlphaTest(tree, entryName, 0.5f);
+        FlattenExpressions(tree);
+        
+        HLSLRoot* root = tree->GetRoot();
+        HLSLStatement* statement = root->statement;
+        ASSERT(m_firstClassArgument == NULL);
+
+        HLSLType samplerType(HLSLBaseType_Sampler);
+
+        int nextTextureRegister = 0;
+        int nextBufferRegister = 0;
+
+        while (statement != NULL)
+        {
+            if (statement->nodeType == HLSLNodeType_Declaration)
+            {
+                HLSLDeclaration* declaration = (HLSLDeclaration*)statement;
+
+                if (!declaration->hidden && IsSamplerType(declaration->type))
+                {
+                    int textureRegister = ParseRegister(declaration->registerName, nextTextureRegister);
+
+                    const char * textureName = m_tree->AddStringFormat("%s_texture", declaration->name);
+                    const char * textureRegisterName = m_tree->AddStringFormat("texture(%d)", textureRegister);
+                    AddClassArgument(new ClassArgument(textureName, declaration->type, textureRegisterName));
+
+                    if (declaration->type.baseType != HLSLBaseType_Sampler2DMS)
+                    {
+                        const char * samplerName = m_tree->AddStringFormat("%s_sampler", declaration->name);
+                        const char * samplerRegisterName = m_tree->AddStringFormat("sampler(%d)", textureRegister);
+                        AddClassArgument(new ClassArgument(samplerName, samplerType, samplerRegisterName));
+                    }
+                }
+            }
+            else if (statement->nodeType == HLSLNodeType_Buffer)
+            {
+                HLSLBuffer * buffer = (HLSLBuffer *)statement;
+
+                if (!buffer->hidden)
+                {
+                    HLSLType type(HLSLBaseType_UserDefined);
+                    type.addressSpace = HLSLAddressSpace_Constant;
+                    type.typeName = m_tree->AddStringFormat("%s_ubo", buffer->name);
+
+                    int bufferRegister = ParseRegister(buffer->registerName, nextBufferRegister) + m_options.bufferRegisterOffset;
+
+                    const char * bufferRegisterName = m_tree->AddStringFormat("buffer(%d)", bufferRegister);
+
+                    AddClassArgument(new ClassArgument(buffer->name, type, bufferRegisterName));
+                }
+            }
+
+            statement = statement->nextStatement;
+        }
+
+        // @@ IC: instance_id parameter must be a function argument. If we find it inside a struct we must move it to the function arguments
+        // and patch all the references to it!
+
+        // Translate semantics.
+        HLSLArgument* argument = entryFunction->argument;
+        while (argument != NULL)
+        {
+            if (argument->hidden)
+            {
+                argument = argument->nextArgument;
+                continue;
+            }
+
+            if (argument->modifier == HLSLArgumentModifier_Out)
+            {
+                // Translate output arguments semantics.
+                if (argument->type.baseType == HLSLBaseType_UserDefined)
+                {
+                    // Our vertex input is a struct and its fields need to be tagged when we generate that
+                    HLSLStruct* structure = tree->FindGlobalStruct(argument->type.typeName);
+                    if (structure == NULL)
+                    {
+                        Error("Vertex shader output struct '%s' not found in shader\n", argument->type.typeName);
+                    }
+
+                    HLSLStructField* field = structure->field;
+                    while (field != NULL)
+                    {
+                        if (!field->hidden)
+                        {
+                            field->sv_semantic = TranslateOutputSemantic(field->semantic);
+                        }
+                        field = field->nextField;
+                    }
+                }
+                else
+                {
+                    argument->sv_semantic = TranslateOutputSemantic(argument->semantic);
+                }
+            }
+            else
+            {
+                // Translate input arguments semantics.
+                if (argument->type.baseType == HLSLBaseType_UserDefined)
+                {
+                    // Our vertex input is a struct and its fields need to be tagged when we generate that
+                    HLSLStruct* structure = tree->FindGlobalStruct(argument->type.typeName);
+                    if (structure == NULL)
+                    {
+                        Error("Vertex shader input struct '%s' not found in shader\n", argument->type.typeName);
+                    }
+
+                    HLSLStructField* field = structure->field;
+                    while (field != NULL)
+                    {
+                        // Hide vertex shader output position from fragment shader. @@ This is messing up the runtime compiler...
+                        /*if (target == Target_FragmentShader && is_semantic(field->semantic, "POSITION"))
+                        {
+                        field->hidden = true;
+                        }*/
+
+                        if (!field->hidden)
+                        {
+                            field->sv_semantic = TranslateInputSemantic(field->semantic);
+
+                            // Force type to uint.
+                            if (field->sv_semantic && strcmp(field->sv_semantic, "sample_id") == 0) {
+                                field->type.baseType = HLSLBaseType_Uint;
+                                field->type.flags |= HLSLTypeFlag_NoPromote;
+                            }
+
+                            /*if (target == Target_VertexShader && is_semantic(field->semantic, "COLOR"))
+                            {
+                            field->type.flags |= HLSLTypeFlag_Swizzle_BGRA;
+                            }*/
+                        }
+                        field = field->nextField;
+                    }
+                }
+                else
+                {
+                    argument->sv_semantic = TranslateInputSemantic(argument->semantic);
+
+                    // Force type to uint.
+                    if (argument->sv_semantic && strcmp(argument->sv_semantic, "sample_id") == 0) {
+                        argument->type.baseType = HLSLBaseType_Uint;
+                        argument->type.flags |= HLSLTypeFlag_NoPromote;
+                    }
+                }
+            }
+
+            argument = argument->nextArgument;
+        }
+
+        // Translate return value semantic.
+        if (entryFunction->returnType.baseType != HLSLBaseType_Void)
+        {
+            if (entryFunction->returnType.baseType == HLSLBaseType_UserDefined)
+            {
+                // Our vertex input is a struct and its fields need to be tagged when we generate that
+                HLSLStruct* structure = tree->FindGlobalStruct(entryFunction->returnType.typeName);
+                if (structure == NULL)
+                {
+                    Error("Vertex shader output struct '%s' not found in shader\n", entryFunction->returnType.typeName);
+                }
+
+                HLSLStructField* field = structure->field;
+                while (field != NULL)
+                {
+                    if (!field->hidden)
+                    {
+                        field->sv_semantic = TranslateOutputSemantic(field->semantic);
+                    }
+                    field = field->nextField;
+                }
+            }
+            else
+            {
+                entryFunction->sv_semantic = TranslateOutputSemantic(entryFunction->semantic);
+
+                //Error("MSL only supports COLOR semantic in return \n", entryFunction->returnType.typeName);
+            }
+        }
+    }
+
+    void MSLGenerator::CleanPrepass()
+    {
+        ClassArgument* currentArg = m_firstClassArgument;
+        while (currentArg != NULL)
+        {
+            ClassArgument* nextArg = currentArg->nextArg;
+            delete currentArg;
+            currentArg = nextArg;
+        }
+        delete currentArg;
+        m_firstClassArgument = NULL;
+        m_lastClassArgument = NULL;
+    }
+
+    void MSLGenerator::PrependDeclarations()
+    {
+        // Any special function stubs we need go here
+        // That includes special constructors to emulate HLSL not being strict
+        
+#if 1
+        //Branch internally to HLSL vs. MSL verision
+        m_writer.WriteLine(0, "#include \"ShaderMSL.h\"");
+#else
+/*
+        m_writer.WriteLine(0, "#include <metal_stdlib>");
+        m_writer.WriteLine(0, "using namespace metal;");
+        m_writer.WriteLine(0, "");
+
+        if (m_tree->NeedsFunction("mad"))
+        {
+            if (m_options.usePreciseFma) {
+                m_writer.WriteLine(0, "#define mad precise::fma");
+            }
+            else {
+                //if (!m_options.forceHalfPrecision)
+                {
+                    m_writer.WriteLine(0,
+R"(
+float mad(float a, float b, float c) {
+    return a * b + c;
+}
+float2 mad(float2 a, float2 b, float2 c) {
+    return a * b + c;
+}
+float3 mad(float3 a, float3 b, float3 c) {
+    return a * b + c;
+}
+float4 mad(float4 a, float4 b, float4 c) {
+    return a * b + c;
+}
+)");
+                }
+
+                if (!m_options.treatHalfAsFloat)
+                {
+                    m_writer.WriteLine(0,
+R"(
+half mad(half a, half b, half c) {
+    return a * b + c;
+}
+half2 mad(half2 a, half2 b, half2 c) {
+    return a * b + c;
+}
+half3 mad(half3 a, half3 b, half3 c) {
+    return a * b + c;
+}
+half4 mad(half4 a, half4 b, half4 c) {
+    return a * b + c;
+}
+)"
+                        );
+                }
+            }
+        }
+
+        // @@ These should not be needed anymore.
+//        if (m_tree->NeedsFunction("max"))
+//        {
+//        m_writer.WriteLine(0, "inline float max(int a, float b) {");
+//        m_writer.WriteLine(1, "return max((float)a, b);");
+//        m_writer.WriteLine(0, "}");
+//        m_writer.WriteLine(0, "inline float max(float a, int b) {");
+//        m_writer.WriteLine(1, "return max(a, (float)b);");
+//        m_writer.WriteLine(0, "}");
+//        }
+//        if (m_tree->NeedsFunction("min"))
+//        {
+//        m_writer.WriteLine(0, "inline float min(int a, float b) {");
+//        m_writer.WriteLine(1, "return min((float)a, b);");
+//        m_writer.WriteLine(0, "}");
+//        m_writer.WriteLine(0, "inline float min(float a, int b) {");
+//        m_writer.WriteLine(1, "return min(a, (float)b);");
+//        m_writer.WriteLine(0, "}");
+//        }
+
+        if (m_tree->NeedsFunction("lerp"))
+        {
+            //m_writer.WriteLine(0, "template<typename T, typename X> inline T mix(T a, T b, X x) {");
+            //m_writer.WriteLine(1, "return mix(a, b, (float)x);");
+            //m_writer.WriteLine(0, "}");
+            m_writer.WriteLine(0, "#define lerp mix");
+        }
+
+        if (m_tree->NeedsFunction("mul"))
+        {
+            const char* am = (m_options.flags & Flag_PackMatrixRowMajor) ? "m * a" : "a * m";
+            const char* ma = (m_options.flags & Flag_PackMatrixRowMajor) ? "a * m" : "m * a";
+
+            //if (!m_options.forceHalfPrecision)
+            {
+                // TODO: Support PackMatrixRowMajor for float3x4/float4x3
+               
+                m_writer.WriteLine(0,
+R"(
+inline float2 mul(float2 a, float2x2 m) { return %s; }
+inline float3 mul(float3 a, float3x3 m) { return %s; }
+inline float4 mul(float4 a, float4x4 m) { return %s; }
+
+inline float2 mul(float2x2 m, float2 a) { return %s; }
+inline float3 mul(float3x3 m, float3 a) { return %s; }
+inline float4 mul(float4x4 m, float4 a) { return %s; }
+
+inline float3 mul(float4 a, float3x4 m) { return a * m; }
+inline float2 mul(float4 a, float2x4 m) { return a * m; }
+)",
+                 am, am, am,
+                 ma, ma, ma );
+            }
+
+            if (!m_options.treatHalfAsFloat)
+            {
+                // TODO: Support PackMatrixRowMajor for half3x4/half4x3
+                
+                m_writer.WriteLine(0, R"(
+inline half2 mul(half2 a, half2x2 m) { return %s; }
+inline half3 mul(half3 a, half3x3 m) { return %s; }
+inline half4 mul(half4 a, half4x4 m) { return %s; }
+
+inline half2 mul(half2x2 m, half2 a) { return %s; }
+inline half3 mul(half3x3 m, half3 a) { return %s; }
+inline half4 mul(half4x4 m, half4 a) { return %s; }
+
+inline half3 mul(half4 a, half3x4 m) { return a * m; }
+inline half2 mul(half4 a, half2x4 m) { return a * m; }
+                 )",
+                am, am, am,
+                ma, ma, ma );
+            }
+
+        }
+
+        // @@ How do we know if these will be needed? We could write these after parsing the whole file and prepend them.
+// Alec, I've never needed these
+//        m_writer.WriteLine(0, "inline float4 column(float4x4 m, int i) {");
+//        m_writer.WriteLine(1, "return float4(m[0][i], m[1][i], m[2][i], m[3][i]);");
+//        m_writer.WriteLine(0, "}");
+//
+//        m_writer.WriteLine(0, "inline float3 column(float3x4 m, int i) {");
+//        m_writer.WriteLine(1, "return float3(m[0][i], m[1][i], m[2][i]);");
+//        m_writer.WriteLine(0, "}");
+//
+//        m_writer.WriteLine(0, "inline float2 column(float2x4 m, int i) {");
+//        m_writer.WriteLine(1, "return float2(m[0][i], m[1][i]);");
+//        m_writer.WriteLine(0, "}");
+//
+//        m_writer.WriteLine(0, "inline float4 set_column(thread float4x4& m, int i, float4 v) {");
+//        m_writer.WriteLine(1, "    m[0][i] = v.x; m[1][i] = v.y; m[2][i] = v.z; m[3][i] = v.w; return v;");
+//        m_writer.WriteLine(0, "}");
+//
+//        m_writer.WriteLine(0, "inline float3 set_column(thread float3x4& m, int i, float3 v) {");
+//        m_writer.WriteLine(1, "    m[0][i] = v.x; m[1][i] = v.y; m[2][i] = v.z; return v;");
+//        m_writer.WriteLine(0, "}");
+//
+//        m_writer.WriteLine(0, "inline float2 set_column(thread float2x4& m, int i, float2 v) {");
+//        m_writer.WriteLine(1, "    m[0][i] = v.x; m[1][i] = v.y; return v;");
+//        m_writer.WriteLine(0, "}");
+//
+//        m_writer.WriteLine(0, "inline float3x3 matrix_ctor(float4x4 m) {");
+//        m_writer.WriteLine(1, "    return float3x3(m[0].xyz, m[1].xyz, m[2].xyz);");
+//        m_writer.WriteLine(0, "}");
+
+
+        if (m_tree->NeedsFunction("clip"))
+        {
+            m_writer.WriteLine(0, R"(
+void clip(float x) {
+   if (x < 0.0) discard_fragment();
+}
+            )");
+            
+        }
+        
+        // TODO: this then needs preprocessor to splice the macro
+        if (m_tree->NeedsFunction("rcp"))
+        {
+            m_writer.WriteLine(0, "#define rcp recip");
+        }
+
+        if (m_tree->NeedsFunction("ddx")) m_writer.WriteLine(0, "#define ddx dfdx");
+        if (m_tree->NeedsFunction("ddy")) m_writer.WriteLine(0, "#define ddy dfdy");
+        if (m_tree->NeedsFunction("frac")) m_writer.WriteLine(0, "#define frac fract");
+
+        //m_writer.WriteLine(0, "#define mad fma");     // @@ This doesn't seem to work.
+
+        // This would be nice
+        //const char * samplerType = "float";
+        //if (m_options.halfTextureSamplers)
+        //{
+        //samplerType = "half";
+        //}
+        
+        //const char * intType = "int";
+        //const char * uintType = "uint";
+        
+//        if (m_options.use16BitIntegers) {
+//            intType = "short";
+//            uintType = "ushort";
+//        }
+
+        if (m_tree->NeedsFunction("tex2D") ||
+            m_tree->NeedsFunction("tex2Dlod") ||
+            m_tree->NeedsFunction("tex2Dgrad") ||
+            m_tree->NeedsFunction("tex2Dbias") ||
+            m_tree->NeedsFunction("tex2Dfetch"))
+        {
+            m_writer.WriteLine(0, R"(
+struct Texture2DSampler {
+    Texture2DSampler(thread const texture2d<float>& t, thread const sampler& s) : t(t), s(s) {};
+    const thread texture2d<float>& t;
+    const thread sampler& s;
+};
+                 )");
+
+            if (!m_options.treatHalfAsFloat) {
+                m_writer.WriteLine(0, R"(
+struct Texture2DHalfSampler {
+    Texture2DHalfSampler(thread const texture2d<half>& t, thread const sampler& s) : t(t), s(s) {};
+    const thread texture2d<half>& t;
+    const thread sampler& s;
+};
+                )");
+            }
+        }
+
+        if (m_tree->NeedsFunction("tex2D"))
+        {
+            m_writer.WriteLine(0, R"(
+inline float4 tex2D(Texture2DSampler ts, float2 texCoord) {
+   return ts.t.sample(ts.s, texCoord);
+}
+            )");
+
+            if (!m_options.treatHalfAsFloat)
+            {
+                m_writer.WriteLine(0, R"(
+inline half4 tex2D(Texture2DHalfSampler ts, float2 texCoord) {
+    return ts.t.sample(ts.s, texCoord);
+}
+)");
+            }
+        }
+        if (m_tree->NeedsFunction("tex2Dlod"))
+        {
+            m_writer.WriteLine(0, R"(
+float4 tex2Dlod(Texture2DSampler ts, float4 texCoordMip) {
+    return ts.t.sample(ts.s, texCoordMip.xy, level(texCoordMip.w));
+}
+                 )");
+
+            if (!m_options.treatHalfAsFloat)
+            {
+                m_writer.WriteLine(0, R"(
+half4 tex2Dlod(Texture2DHalfSampler ts, float4 texCoordMip) {
+    return ts.t.sample(ts.s, texCoordMip.xy, level(texCoordMip.w));
+ }
+                 )");
+            }
+        }
+        if (m_tree->NeedsFunction("tex2Dgrad"))
+        {
+            m_writer.WriteLine(0, R"(
+float4 tex2Dgrad(Texture2DSampler ts, float2 texCoord, float2 gradx, float2 grady) {
+m_writer.WriteLine(1, "return ts.t.sample(ts.s, texCoord.xy, gradient2d(gradx, grady));
+}
+           )");
+        }
+        if (m_tree->NeedsFunction("tex2Dbias"))
+        {
+            m_writer.WriteLine(0, R"(
+float4 tex2Dbias(Texture2DSampler ts, float4 texCoordBias) {
+   return ts.t.sample(ts.s, texCoordBias.xy, bias(texCoordBias.w));
+}
+            )");
+
+            if (!m_options.treatHalfAsFloat)
+            {
+                m_writer.WriteLine(0, R"(
+half4 tex2Dbias(Texture2DHalfSampler ts, float4 texCoordBias) {
+    m_writer.WriteLine(1, "return ts.t.sample(ts.s, texCoordBias.xy, bias(texCoordBias.w));
+}
+                )");
+            }
+        }
+        if (m_tree->NeedsFunction("tex2Dfetch"))
+        {
+            // @@ not used? not tested?
+            m_writer.WriteLine(0, R"(
+float4 tex2Dfetch(Texture2DSampler ts, int2 texCoord) {
+    return ts.t.read((uint2)texCoord);
+}
+                )");
+        }
+
+        if (m_tree->NeedsFunction("tex3D") ||
+            m_tree->NeedsFunction("tex3Dlod"))
+        {
+            m_writer.WriteLine(0, R"(
+struct Texture3DSampler {
+Texture3DSampler(thread const texture3d<float>& t, thread const sampler& s) : t(t), s(s) {};
+const thread texture3d<float>& t;
+const thread sampler& s;
+};
+            )");
+        }
+
+        if (m_tree->NeedsFunction("tex3D"))
+        {
+            m_writer.WriteLine(0, R"(
+float4 tex3D(Texture3DSampler ts, float3 texCoord) {
+return ts.t.sample(ts.s, texCoord);
+}
+            )");
+        }
+        if (m_tree->NeedsFunction("tex3Dlod"))
+        {
+            m_writer.WriteLine(0, R"(
+float4 tex3Dlod(Texture3DSampler ts, float4 texCoordMip) {
+    return ts.t.sample(ts.s, texCoordMip.xyz, level(texCoordMip.w));
+}
+            )");
+        }
+
+        if (m_tree->NeedsFunction("texCUBE") ||
+            m_tree->NeedsFunction("texCUBElod") ||
+            m_tree->NeedsFunction("texCUBEbias"))
+        {
+            m_writer.WriteLine(0, R"(
+struct TextureCubeSampler {
+TextureCubeSampler(thread const texturecube<float>& t, thread const sampler& s) : t(t), s(s) {};
+const thread texturecube<float>& t;
+const thread sampler& s;
+};
+            
+            )");
+        }
+
+        if (m_tree->NeedsFunction("texCUBE"))
+        {
+            m_writer.WriteLine(0, R"(
+float4 texCUBE(TextureCubeSampler ts, float3 texCoord) {");
+    return ts.t.sample(ts.s, texCoord);");
+}
+             )");
+        }
+
+        if (m_tree->NeedsFunction("texCUBElod"))
+        {
+            m_writer.WriteLine(0, R"(
+float4 texCUBElod(TextureCubeSampler ts, float4 texCoordMip) {");
+    return ts.t.sample(ts.s, texCoordMip.xyz, level(texCoordMip.w));");
+}
+             )");
+        }
+
+        if (m_tree->NeedsFunction("texCUBEbias"))
+        {
+            m_writer.WriteLine(0, R"(
+float4 texCUBEbias(TextureCubeSampler ts, float4 texCoordBias) {
+    return ts.t.sample(ts.s, texCoordBias.xyz, bias(texCoordBias.w));
+}
+             )");
+        }
+
+        if (m_tree->NeedsFunction("tex2Dcmp"))
+        {
+            m_writer.WriteLine(0, R"(
+struct Texture2DShadowSampler {
+Texture2DShadowSampler(thread const depth2d<float>& t, thread const sampler& s) : t(t), s(s) {};
+const thread depth2d<float>& t;
+const thread sampler& s;
+};
+            )");
+
+            // iOS Metal requires that the sampler in sample_compare is a compile-time constant
+            if (m_options.flags & Flag_ConstShadowSampler)
+            {
+                m_writer.WriteLine(0, R"(
+float4 tex2Dcmp(Texture2DShadowSampler ts, float4 texCoordCompare) {
+constexpr sampler shadow_constant_sampler(mip_filter::none, min_filter::linear, mag_filter::linear, address::clamp_to_edge, compare_func::less);"
+return ts.t.sample_compare(shadow_constant_sampler, texCoordCompare.xy, texCoordCompare.z);
+}
+                )");
+            
+            }
+            else
+            {
+                m_writer.WriteLine(0, R"(
+float4 tex2Dcmp(Texture2DShadowSampler ts, float4 texCoordCompare) {
+ return ts.t.sample_compare(ts.s, texCoordCompare.xy, texCoordCompare.z);
+}
+                )");
+            }
+        }
+
+        if (m_tree->NeedsFunction("tex2DMSfetch"))
+        {
+            m_writer.WriteLine(0, R"(
+float4 tex2DMSfetch(texture2d_ms<float> t, int2 texCoord, int sample) {
+  return t.read((uint2)texCoord, (uint)sample);
+ }
+             )");
+        }
+
+        if (m_tree->NeedsFunction("tex2DArray"))
+        {
+            m_writer.WriteLine(0, R"(
+struct Texture2DArraySampler {
+const thread texture2d_array<float>& t;
+const thread sampler& s;
+Texture2DArraySampler(thread const texture2d_array<float>& t, thread const sampler& s) : t(t), s(s) {};
+}
+
+float4 tex2DArray(Texture2DArraySampler ts, float3 texCoord) {
+ return ts.t.sample(ts.s, texCoord.xy, texCoord.z + 0.5); // 0.5 offset needed on nvidia gpus
+}
+            )");
+        }
+ */
+#endif
+    
+    }
+
+    bool MSLGenerator::Generate(HLSLTree* tree, Target target, const char* entryName, const Options& options)
+    {
+        m_firstClassArgument = NULL;
+        m_lastClassArgument = NULL;
+
+        m_tree = tree;
+        m_target = target;
+        ASSERT(m_target == Target_VertexShader || m_target == Target_FragmentShader);
+        m_entryName = entryName;
+        m_options = options;
+
+        m_writer.Reset();
+
+        // Find entry point function
+        HLSLFunction* entryFunction = tree->FindFunction(entryName);
+        if (entryFunction == NULL)
+        {
+            Error("Entry point '%s' doesn't exist\n", entryName);
+            return false;
+        }
+
+        Prepass(tree, target, entryFunction);
+
+        PrependDeclarations();
+
+        HLSLRoot* root = m_tree->GetRoot();
+
+        OutputStaticDeclarations(0, root->statement);
+
+        // In MSL, uniforms are parameters for the entry point, not globals:
+        // to limit code rewriting, we wrap the entire original shader into a class.
+        // Uniforms are then passed to the constructor and copied to member variables.
+        std::string shaderClassNameStr = entryName;
+        shaderClassNameStr += "NS"; // to distinguish from function
+        
+        const char* shaderClassName = shaderClassNameStr.c_str();
+        // This doesn't work if want to have multiple shaders in one file
+        // (target == MSLGenerator::Target_VertexShader) ? "Vertex_Shader" : "Pixel_Shader";
+        m_writer.WriteLine(0, "struct %s {", shaderClassName);
+
+        OutputStatements(1, root->statement);
+
+        // Generate constructor
+        m_writer.WriteLine(0, "");
+        m_writer.BeginLine(1);
+
+        m_writer.Write("%s(", shaderClassName);
+        const ClassArgument* currentArg = m_firstClassArgument;
+        while (currentArg != NULL)
+        {
+            if (currentArg->type.addressSpace == HLSLAddressSpace_Constant)                  m_writer.Write("constant ");
+            else
+                m_writer.Write("thread ");
+
+            m_writer.Write("%s & %s", GetTypeName(currentArg->type, /*exactType=*/true), currentArg->name);
+
+            currentArg = currentArg->nextArg;
+            if (currentArg) m_writer.Write(", ");
+        }
+        m_writer.Write(")");
+
+        currentArg = m_firstClassArgument;
+        if (currentArg) m_writer.Write(" : ");
+        while (currentArg != NULL)
+        {
+            m_writer.Write("%s(%s)", currentArg->name, currentArg->name);
+            currentArg = currentArg->nextArg;
+            if (currentArg) m_writer.Write(", ");
+        }
+        m_writer.EndLine(" {}");
+
+        m_writer.WriteLine(0, "};"); // Class
+
+
+        // Generate real entry point, the one called by Metal
+        m_writer.WriteLine(0, "");
+
+        // If function return value has a non-color output semantic, declare a temporary struct for the output.
+        bool wrapReturnType = false;
+        if (entryFunction->sv_semantic != NULL && strcmp(entryFunction->sv_semantic, "color(0)") != 0)
+        {
+            wrapReturnType = true;
+
+            m_writer.WriteLine(0, "struct %s_output { %s tmp [[%s]]; };", entryName, GetTypeName(entryFunction->returnType, /*exactType=*/true), entryFunction->sv_semantic);
+
+            m_writer.WriteLine(0, "");
+        }
+
+
+        m_writer.BeginLine(0);
+
+        // @@ Add/Translate function attributes.
+        // entryFunction->attributes
+
+        if (m_target == Target_VertexShader)
+        {
+            m_writer.Write("vertex ");
+        }
+        else
+        {
+            m_writer.Write("fragment ");
+        }
+
+        // Return type.
+        if (wrapReturnType)
+        {
+            m_writer.Write("%s_output", entryName);
+        }
+        else
+        {
+            if (entryFunction->returnType.baseType == HLSLBaseType_UserDefined)
+            {
+                m_writer.Write("%s::", shaderClassName);
+            }
+            m_writer.Write("%s", GetTypeName(entryFunction->returnType, /*exactType=*/true));
+        }
+
+        m_writer.Write(" %s(", entryName);
+
+        int argumentCount = 0;
+        HLSLArgument* argument = entryFunction->argument;
+        while (argument != NULL)
+        {
+            if (!argument->hidden)
+            {
+                if (argument->type.baseType == HLSLBaseType_UserDefined)
+                {
+                    m_writer.Write("%s::", shaderClassName);
+                }
+                m_writer.Write("%s %s", GetTypeName(argument->type, /*exactType=*/true), argument->name);
+
+                // @@ IC: We are assuming that the first argument is the 'stage_in'.
+                if (argument->type.baseType == HLSLBaseType_UserDefined && argument == entryFunction->argument)
+                {
+                    m_writer.Write(" [[stage_in]]");
+                }
+                else if (argument->sv_semantic)
+                {
+                    m_writer.Write(" [[%s]]", argument->sv_semantic);
+                }
+                argumentCount++;
+            }
+            argument = argument->nextArgument;
+            if (argument && !argument->hidden)
+            {
+                m_writer.Write(", ");
+            }
+        }
+
+        currentArg = m_firstClassArgument;
+        if (argumentCount && currentArg != NULL)
+        {
+            m_writer.Write(", ");
+        }
+        while (currentArg != NULL)
+        {
+            //if (currentArg->type.addressSpace == HLSLAddressSpace_Constant) m_writer.Write("constant ");
+            //else m_writer.Write("thread ");
+
+            if (currentArg->type.baseType == HLSLBaseType_UserDefined)
+            {
+                m_writer.Write("constant %s::%s & %s [[%s]]", shaderClassName, currentArg->type.typeName, currentArg->name, currentArg->registerName);
+            }
+            else
+            {
+                m_writer.Write("%s %s [[%s]]", GetTypeName(currentArg->type, /*exactType=*/true), currentArg->name, currentArg->registerName);
+            }
+
+            currentArg = currentArg->nextArg;
+            if (currentArg)
+            {
+                m_writer.Write(", ");
+            }
+        }
+        m_writer.EndLine(") {");
+
+        // Create the helper class instance and call the entry point from the original shader
+        m_writer.BeginLine(1);
+        m_writer.Write("%s %s", shaderClassName, entryName);
+
+        currentArg = m_firstClassArgument;
+        if (currentArg)
+        {
+            m_writer.Write("(");
+
+            while (currentArg != NULL)
+            {
+                m_writer.Write("%s", currentArg->name);
+                currentArg = currentArg->nextArg;
+                if (currentArg)
+                {
+                    m_writer.Write(", ");
+                }
+            }
+
+            m_writer.Write(")");
+        }
+        m_writer.EndLine(";");
+
+        m_writer.BeginLine(1);
+
+        if (wrapReturnType)
+        {
+            m_writer.Write("%s_output output; output.tmp = %s.%s(", entryName, entryName, entryName);
+        }
+        else
+        {
+            m_writer.Write("return %s.%s(", entryName, entryName);
+        }
+
+        argument = entryFunction->argument;
+        while (argument != NULL)
+        {
+            if (!argument->hidden)
+            {
+                m_writer.Write("%s", argument->name);
+            }
+            argument = argument->nextArgument;
+            if (argument && !argument->hidden)
+            {
+                m_writer.Write(", ");
+            }
+        }
+
+        m_writer.EndLine(");");
+
+        if (wrapReturnType)
+        {
+            m_writer.WriteLine(1, "return output;");
+        }
+
+        m_writer.WriteLine(0, "}");
+
+        CleanPrepass();
+        m_tree = NULL;
+
+        // Any final check goes here, but shouldn't be needed as the Metal compiler is solid
+
+        return !m_error;
+    }
+
+    const char* MSLGenerator::GetResult() const
+    {
+        return m_writer.GetResult();
+    }
+
+    void MSLGenerator::OutputStaticDeclarations(int indent, HLSLStatement* statement)
+    {
+        while (statement != NULL)
+        {
+            if (statement->hidden)
+            {
+                statement = statement->nextStatement;
+                continue;
+            }
+
+            if (statement->nodeType == HLSLNodeType_Declaration)
+            {
+                HLSLDeclaration* declaration = static_cast<HLSLDeclaration*>(statement);
+
+                const HLSLType& type = declaration->type;
+
+                if ((type.flags & HLSLTypeFlag_Const) && (type.flags & HLSLTypeFlag_Static))
+                {
+                    m_writer.BeginLine(indent, declaration->fileName, declaration->line);
+                    OutputDeclaration(declaration);
+                    m_writer.EndLine(";");
+
+                    // hide declaration from subsequent passes
+                    declaration->hidden = true;
+                }
+            }
+            else if (statement->nodeType == HLSLNodeType_Function)
+            {
+                HLSLFunction* function = static_cast<HLSLFunction*>(statement);
+
+                if (!function->forward)
+                    OutputStaticDeclarations(indent, function->statement);
+            }
+
+            statement = statement->nextStatement;
+        }
+    }
+
+    void MSLGenerator::OutputStatements(int indent, HLSLStatement* statement)
+    {
+        // Main generator loop: called recursively
+        while (statement != NULL)
+        {
+            if (statement->hidden)
+            {
+                statement = statement->nextStatement;
+                continue;
+            }
+
+            OutputAttributes(indent, statement->attributes);
+
+            if (statement->nodeType == HLSLNodeType_Declaration)
+            {
+                HLSLDeclaration* declaration = static_cast<HLSLDeclaration*>(statement);
+
+                if (declaration->assignment && declaration->assignment->nodeType == HLSLNodeType_FunctionCall)
+                {
+                    OutputFunctionCallStatement(indent, (HLSLFunctionCall*)declaration->assignment, declaration);
+                }
+                else
+                {
+                    m_writer.BeginLine(indent, declaration->fileName, declaration->line);
+                    OutputDeclaration(declaration);
+                    m_writer.EndLine(";");
+                }
+            }
+            else if (statement->nodeType == HLSLNodeType_Struct)
+            {
+                HLSLStruct* structure = static_cast<HLSLStruct*>(statement);
+                OutputStruct(indent, structure);
+            }
+            else if (statement->nodeType == HLSLNodeType_Buffer)
+            {
+                HLSLBuffer* buffer = static_cast<HLSLBuffer*>(statement);
+                OutputBuffer(indent, buffer);
+            }
+            else if (statement->nodeType == HLSLNodeType_Function)
+            {
+                HLSLFunction* function = static_cast<HLSLFunction*>(statement);
+
+                if (!function->forward)
+                {
+                    OutputFunction(indent, function);
+                }
+            }
+            else if (statement->nodeType == HLSLNodeType_ExpressionStatement)
+            {
+                HLSLExpressionStatement* expressionStatement = static_cast<HLSLExpressionStatement*>(statement);
+                HLSLExpression* expression = expressionStatement->expression;
+
+                if (expression->nodeType == HLSLNodeType_FunctionCall)
+                {
+                    OutputFunctionCallStatement(indent, (HLSLFunctionCall*)expression, NULL);
+                }
+                else
+                {
+                    m_writer.BeginLine(indent, statement->fileName, statement->line);
+                    OutputExpression(expressionStatement->expression, NULL);
+                    m_writer.EndLine(";");
+                }
+            }
+            else if (statement->nodeType == HLSLNodeType_ReturnStatement)
+            {
+                HLSLReturnStatement* returnStatement = static_cast<HLSLReturnStatement*>(statement);
+                if (m_currentFunction->numOutputArguments > 0)
+                {
+                    m_writer.BeginLine(indent, returnStatement->fileName, returnStatement->line);
+                    m_writer.Write("return { ");
+
+                    int numArguments = 0;
+                    if (returnStatement->expression != NULL)
+                    {
+                        OutputTypedExpression(m_currentFunction->returnType, returnStatement->expression, NULL);
+                        numArguments++;
+                    }
+
+                    HLSLArgument * argument = m_currentFunction->argument;
+                    while (argument != NULL)
+                    {
+                        if (argument->modifier == HLSLArgumentModifier_Out || argument->modifier == HLSLArgumentModifier_Inout)
+                        {
+                            if (numArguments) m_writer.Write(", ");
+                            m_writer.Write("%s", argument->name);
+                            numArguments++;
+                        }
+                        argument = argument->nextArgument;
+                    }
+
+                    m_writer.EndLine(" };");
+                }
+                else if (returnStatement->expression != NULL)
+                {
+                    m_writer.BeginLine(indent, returnStatement->fileName, returnStatement->line);
+                    m_writer.Write("return ");
+                    OutputTypedExpression(m_currentFunction->returnType, returnStatement->expression, NULL);
+                    m_writer.EndLine(";");
+                }
+                else
+                {
+                    m_writer.WriteLineTagged(indent, returnStatement->fileName, returnStatement->line, "return;");
+                }
+            }
+            else if (statement->nodeType == HLSLNodeType_DiscardStatement)
+            {
+                HLSLDiscardStatement* discardStatement = static_cast<HLSLDiscardStatement*>(statement);
+                m_writer.WriteLineTagged(indent, discardStatement->fileName, discardStatement->line, "discard_fragment();");
+            }
+            else if (statement->nodeType == HLSLNodeType_BreakStatement)
+            {
+                HLSLBreakStatement* breakStatement = static_cast<HLSLBreakStatement*>(statement);
+                m_writer.WriteLineTagged(indent, breakStatement->fileName, breakStatement->line, "break;");
+            }
+            else if (statement->nodeType == HLSLNodeType_ContinueStatement)
+            {
+                HLSLContinueStatement* continueStatement = static_cast<HLSLContinueStatement*>(statement);
+                m_writer.WriteLineTagged(indent, continueStatement->fileName, continueStatement->line, "continue;");
+            }
+            else if (statement->nodeType == HLSLNodeType_IfStatement)
+            {
+                HLSLIfStatement* ifStatement = static_cast<HLSLIfStatement*>(statement);
+
+                if (ifStatement->isStatic) {
+                    int value;
+                    if (!m_tree->GetExpressionValue(ifStatement->condition, value)) {
+                        Error("@if condition could not be evaluated.\n");
+                    }
+                    if (value != 0) {
+                        OutputStatements(indent + 1, ifStatement->statement);
+                    }
+                    else if (ifStatement->elseStatement != NULL) {
+                        OutputStatements(indent + 1, ifStatement->elseStatement);
+                    }
+                }
+                else {
+                    m_writer.BeginLine(indent, ifStatement->fileName, ifStatement->line);
+                    m_writer.Write("if (");
+                    OutputExpression(ifStatement->condition, NULL);
+                    m_writer.Write(") {");
+                    m_writer.EndLine();
+                    OutputStatements(indent + 1, ifStatement->statement);
+                    m_writer.WriteLine(indent, "}");
+                    if (ifStatement->elseStatement != NULL)
+                    {
+                        m_writer.WriteLine(indent, "else {");
+                        OutputStatements(indent + 1, ifStatement->elseStatement);
+                        m_writer.WriteLine(indent, "}");
+                    }
+                }
+            }
+            else if (statement->nodeType == HLSLNodeType_ForStatement)
+            {
+                HLSLForStatement* forStatement = static_cast<HLSLForStatement*>(statement);
+                m_writer.BeginLine(indent, forStatement->fileName, forStatement->line);
+                m_writer.Write("for (");
+                OutputDeclaration(forStatement->initialization);
+                m_writer.Write("; ");
+                OutputExpression(forStatement->condition, NULL);
+                m_writer.Write("; ");
+                OutputExpression(forStatement->increment, NULL);
+                m_writer.Write(") {");
+                m_writer.EndLine();
+                OutputStatements(indent + 1, forStatement->statement);
+                m_writer.WriteLine(indent, "}");
+            }
+            else if (statement->nodeType == HLSLNodeType_BlockStatement)
+            {
+                HLSLBlockStatement* blockStatement = static_cast<HLSLBlockStatement*>(statement);
+                m_writer.WriteLineTagged(indent, blockStatement->fileName, blockStatement->line, "{");
+                OutputStatements(indent + 1, blockStatement->statement);
+                m_writer.WriteLine(indent, "}");
+            }
+            else if (statement->nodeType == HLSLNodeType_Technique)
+            {
+                // Techniques are ignored.
+            }
+            else if (statement->nodeType == HLSLNodeType_Pipeline)
+            {
+                // Pipelines are ignored.
+            }
+            else
+            {
+                // Unhandled statement type.
+                ASSERT(0);
+            }
+
+            statement = statement->nextStatement;
+        }
+    }
+
+    // Called by OutputStatements
+
+    void MSLGenerator::OutputAttributes(int indent, HLSLAttribute* attribute)
+    {
+        // IC: These do not appear to exist in MSL.
+        while (attribute != NULL) {
+            if (attribute->attributeType == HLSLAttributeType_Unroll)
+            {
+                // @@ Do any of these work?
+                //m_writer.WriteLine(indent, attribute->fileName, attribute->line, "#pragma unroll");
+                //m_writer.WriteLine(indent, attribute->fileName, attribute->line, "[[unroll]]");
+            }
+            else if (attribute->attributeType == HLSLAttributeType_Flatten)
+            {
+                // @@
+            }
+            else if (attribute->attributeType == HLSLAttributeType_Branch)
+            {
+                // @@
+            }
+
+            attribute = attribute->nextAttribute;
+        }
+    }
+
+    void MSLGenerator::OutputDeclaration(HLSLDeclaration* declaration)
+    {
+        if (IsSamplerType(declaration->type))
+        {
+            // Declare a texture and a sampler instead
+            // We do not handle multiple textures on the same line
+            ASSERT(declaration->nextDeclaration == NULL);
+            const char * typeName = "float";
+            if (declaration->type.samplerType == HLSLBaseType_Half && !m_options.treatHalfAsFloat)
+            {
+                typeName = "half";
+            }
+
+            if (declaration->type.baseType == HLSLBaseType_Sampler2D)
+            {
+                m_writer.Write("thread texture2d<%s>& %s_texture;", typeName, declaration->name);
+                m_writer.Write("thread sampler& %s_sampler", declaration->name);
+            }
+            else if (declaration->type.baseType == HLSLBaseType_Sampler3D)
+            {
+                m_writer.Write("thread texture3d<%s>& %s_texture;", typeName, declaration->name);
+                m_writer.Write("thread sampler& %s_sampler", declaration->name);
+            }
+            else if (declaration->type.baseType == HLSLBaseType_SamplerCube)
+            {
+                m_writer.Write("thread texturecube<%s>& %s_texture;", typeName, declaration->name);
+                m_writer.Write("thread sampler& %s_sampler", declaration->name);
+            }
+            else if (declaration->type.baseType == HLSLBaseType_Sampler2DShadow)
+            {
+                m_writer.Write("thread depth2d<float>& %s_texture;", declaration->name);
+                m_writer.Write("thread sampler& %s_sampler", declaration->name);
+            }
+            else if (declaration->type.baseType == HLSLBaseType_Sampler2DMS)
+            {
+                m_writer.Write("thread texture2d_ms<%s>& %s_texture;", typeName, declaration->name);
+            }
+            else if (declaration->type.baseType == HLSLBaseType_Sampler2DArray)
+            {
+                m_writer.Write("thread texture2d_array<float>& %s_texture;", declaration->name);
+                m_writer.Write("thread sampler& %s_sampler", declaration->name);
+            }
+            else
+            {
+                // TODO: this should be error, not stuff into file
+                m_writer.Write("<unhandled texture type>");
+            }
+        }
+        else
+        {
+            OutputDeclaration(declaration->type, declaration->name, declaration->assignment);
+
+            declaration = declaration->nextDeclaration;
+            while (declaration != NULL)
+            {
+                m_writer.Write(",");
+                OutputDeclarationBody(declaration->type, declaration->name, declaration->assignment);
+                declaration = declaration->nextDeclaration;
+            };
+        }
+    }
+
+    void MSLGenerator::OutputStruct(int indent, HLSLStruct* structure)
+    {
+        m_writer.WriteLineTagged(indent, structure->fileName, structure->line, "struct %s {", structure->name);
+        HLSLStructField* field = structure->field;
+        while (field != NULL)
+        {
+            if (!field->hidden)
+            {
+                m_writer.BeginLine(indent + 1, field->fileName, field->line);
+                OutputDeclaration(field->type, field->name, NULL);
+                if (field->sv_semantic)
+                {
+                    m_writer.Write(" [[%s]]", field->sv_semantic);
+                }
+
+                m_writer.Write(";");
+                m_writer.EndLine();
+            }
+            field = field->nextField;
+        }
+        m_writer.WriteLine(indent, "};");
+    }
+
+    void MSLGenerator::OutputBuffer(int indent, HLSLBuffer* buffer)
+    {
+        HLSLDeclaration* field = buffer->field;
+
+        m_writer.BeginLine(indent, buffer->fileName, buffer->line);
+        m_writer.Write("struct %s_ubo", buffer->name);
+        m_writer.EndLine(" {");
+        while (field != NULL)
+        {
+            if (!field->hidden)
+            {
+                m_writer.BeginLine(indent + 1, field->fileName, field->line);
+                OutputDeclaration(field->type, field->name, field->assignment, false, false, /*alignment=*/16);
+                m_writer.EndLine(";");
+            }
+            field = (HLSLDeclaration*)field->nextStatement;
+        }
+        m_writer.WriteLine(indent, "};");
+
+        m_writer.WriteLine(indent, "constant %s_ubo & %s;", buffer->name, buffer->name);
+    }
+
+    void MSLGenerator::OutputFunction(int indent, HLSLFunction* function)
+    {
+        const char* functionName = function->name;
+        const char* returnTypeName = GetTypeName(function->returnType, /*exactType=*/false);
+
+        // Declare output tuple.
+        if (function->numOutputArguments > 0)
+        {
+            returnTypeName = m_tree->AddStringFormat("%s_out%d", functionName, function->line); // @@ Find a better way to generate unique name.
+
+            m_writer.BeginLine(indent, function->fileName, function->line);
+            m_writer.Write("struct %s { ", returnTypeName);
+            m_writer.EndLine();
+
+            if (function->returnType.baseType != HLSLBaseType_Void)
+            {
+                m_writer.BeginLine(indent + 1, function->fileName, function->line);
+                OutputDeclaration(function->returnType, "__result", /*defaultValue=*/NULL, /*isRef=*/false, /*isConst=*/false);
+                m_writer.EndLine(";");
+            }
+
+            HLSLArgument * argument = function->argument;
+            while (argument != NULL)
+            {
+                if (argument->modifier == HLSLArgumentModifier_Out || argument->modifier == HLSLArgumentModifier_Inout)
+                {
+                    m_writer.BeginLine(indent + 1, function->fileName, function->line);
+                    OutputDeclaration(argument->type, argument->name, /*defaultValue=*/NULL, /*isRef=*/false, /*isConst=*/false);
+                    m_writer.EndLine(";");
+                }
+                argument = argument->nextArgument;
+            }
+
+            m_writer.WriteLine(indent, "};");
+
+            // Create unique function name to avoid collision with overloads and different return types.
+            m_writer.BeginLine(indent, function->fileName, function->line);
+            m_writer.Write("%s %s_%d(", returnTypeName, functionName, function->line);
+        }
+        else
+        {
+            m_writer.BeginLine(indent, function->fileName, function->line);
+            m_writer.Write("%s %s(", returnTypeName, functionName);
+        }
+
+        OutputArguments(function->argument);
+
+        m_writer.EndLine(") {");
+        m_currentFunction = function;
+
+        // Local declarations for output arguments.
+        HLSLArgument * argument = function->argument;
+        while (argument != NULL)
+        {
+            if (argument->modifier == HLSLArgumentModifier_Out)
+            {
+                m_writer.BeginLine(indent + 1, function->fileName, function->line);
+                OutputDeclaration(argument->type, argument->name, /*defaultValue=*/NULL, /*isRef=*/false, /*isConst=*/false);
+                m_writer.EndLine(";");
+            }
+            argument = argument->nextArgument;
+        }
+
+        OutputStatements(indent + 1, function->statement); // @@ Modify return statements if function has multiple output arguments!
+
+        // Output implicit return.
+        if (function->numOutputArguments > 0)
+        {
+            bool needsImplicitReturn = true;
+            HLSLStatement * statement = function->statement;
+            if (statement != NULL)
+            {
+                while (statement->nextStatement != NULL)
+                {
+                    statement = statement->nextStatement;
+                }
+                needsImplicitReturn = (statement->nodeType != HLSLNodeType_ReturnStatement) && function->returnType.baseType == HLSLBaseType_Void;
+            }
+
+            if (needsImplicitReturn)
+            {
+                m_writer.BeginLine(indent + 1);
+                m_writer.Write("return { ");
+
+                int numArguments = 0;
+                HLSLArgument * argument = m_currentFunction->argument;
+                while (argument != NULL)
+                {
+                    if (argument->modifier == HLSLArgumentModifier_Out || argument->modifier == HLSLArgumentModifier_Inout)
+                    {
+                        if (numArguments) m_writer.Write(", ");
+                        m_writer.Write("%s ", argument->name);
+                        numArguments++;
+                    }
+                    argument = argument->nextArgument;
+                }
+
+                m_writer.EndLine(" };");
+            }
+        }
+
+        m_writer.WriteLine(indent, "};");
+        m_currentFunction = NULL;
+    }
+
+
+    // @@ We could be a lot smarter removing parenthesis based on the operator precedence of the parent expression.
+    static bool NeedsParenthesis(HLSLExpression* expression, HLSLExpression* parentExpression) {
+
+        // For now we just omit the parenthesis if there's no parent expression.
+        if (parentExpression == NULL)
+        {
+            return false;
+        }
+
+        // One more special case that's pretty common.
+        if (parentExpression->nodeType == HLSLNodeType_MemberAccess)
+        {
+            if (expression->nodeType == HLSLNodeType_IdentifierExpression ||
+                expression->nodeType == HLSLNodeType_ArrayAccess ||
+                expression->nodeType == HLSLNodeType_MemberAccess)
+            {
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    bool MSLGenerator::NeedsCast(const HLSLType & target, const HLSLType & source)
+    {
+        HLSLBaseType targetType = target.baseType;
+        HLSLBaseType sourceType = source.baseType;
+
+        if (sourceType == HLSLBaseType_Int) {
+            // int k = 1;
+        }
+
+        /*if (IsScalarType(target))
+        {
+        // Scalar types do not need casting.
+        return false;
+        }*/
+
+        if (m_options.treatHalfAsFloat)
+        {
+            if (IsHalf(targetType)) targetType = HLSLBaseType(targetType + HLSLBaseType_Float - HLSLBaseType_Half);
+            if (IsHalf(sourceType)) sourceType = HLSLBaseType(sourceType + HLSLBaseType_Float - HLSLBaseType_Half);
+        }
+
+        return targetType != sourceType && (BaseTypeDimension[targetType] == BaseTypeDimension[sourceType] || BaseTypeDimension[sourceType] == HLSLTypeDimension_Scalar);
+    }
+
+
+    void MSLGenerator::OutputTypedExpression(const HLSLType& type, HLSLExpression* expression, HLSLExpression* parentExpression)
+    {
+        // If base types are not exactly the same, do explicit cast.
+        bool closeCastExpression = false;
+        if (NeedsCast(type, expression->expressionType))
+        {
+            OutputCast(type);
+            m_writer.Write("(");
+            closeCastExpression = true;
+        }
+
+        OutputExpression(expression, parentExpression);
+
+        if (closeCastExpression)
+        {
+            m_writer.Write(")");
+        }
+    }
+
+    void MSLGenerator::OutputExpression(HLSLExpression* expression, HLSLExpression* parentExpression)
+    {
+        if (expression->nodeType == HLSLNodeType_IdentifierExpression)
+        {
+            HLSLIdentifierExpression* identifierExpression = static_cast<HLSLIdentifierExpression*>(expression);
+            const char* name = identifierExpression->name;
+            // For texture declaration, generate a struct instead
+            if (identifierExpression->global && IsSamplerType(identifierExpression->expressionType))
+            {
+                if (identifierExpression->expressionType.baseType == HLSLBaseType_Sampler2D)
+                {
+                    if (identifierExpression->expressionType.samplerType == HLSLBaseType_Half && !m_options.treatHalfAsFloat)
+                    {
+                        m_writer.Write("Texture2DHalfSampler(%s_texture, %s_sampler)", name, name);
+                    }
+                    else
+                    {
+                        m_writer.Write("Texture2DSampler(%s_texture, %s_sampler)", name, name);
+                    }
+                }
+                else if (identifierExpression->expressionType.baseType == HLSLBaseType_Sampler3D)
+                    m_writer.Write("Texture3DSampler(%s_texture, %s_sampler)", name, name);
+                else if (identifierExpression->expressionType.baseType == HLSLBaseType_SamplerCube)
+                    m_writer.Write("TextureCubeSampler(%s_texture, %s_sampler)", name, name);
+                else if (identifierExpression->expressionType.baseType == HLSLBaseType_Sampler2DShadow)
+                    m_writer.Write("Texture2DShadowSampler(%s_texture, %s_sampler)", name, name);
+                else if (identifierExpression->expressionType.baseType == HLSLBaseType_Sampler2DMS)
+                    m_writer.Write("%s_texture", name);
+                else if (identifierExpression->expressionType.baseType == HLSLBaseType_Sampler2DArray)
+                    m_writer.Write("Texture2DArraySampler(%s_texture, %s_sampler)", name, name);
+                else
+                    m_writer.Write("<unhandled texture type>");
+            }
+            else
+            {
+                if (identifierExpression->global)
+                {
+                    HLSLBuffer * buffer;
+                    HLSLDeclaration * declaration = m_tree->FindGlobalDeclaration(identifierExpression->name, &buffer);
+
+                    if (declaration && declaration->buffer)
+                    {
+                        ASSERT(buffer == declaration->buffer);
+                        m_writer.Write("%s.", declaration->buffer->name);
+                    }
+                }
+                m_writer.Write("%s", name);
+
+                // IC: Add swizzle if this is a member access of a field that has the swizzle flag.
+                /*if (parentExpression->nodeType == HLSLNodeType_MemberAccess)
+                {
+                HLSLMemberAccess* memberAccess = (HLSLMemberAccess*)parentExpression;
+                const HLSLType & objectType = memberAccess->object->expressionType;
+                const HLSLStruct* structure = m_tree->FindGlobalStruct(objectType.typeName);
+                if (structure != NULL)
+                {
+                const HLSLStructField* field = structure->field;
+                while (field != NULL)
+                {
+                if (field->name == name)
+                {
+                if (field->type.flags & HLSLTypeFlag_Swizzle_BGRA)
+                {
+                m_writer.Write(".bgra", name);
+                }
+                }
+                }
+                }
+                }*/
+            }
+        }
+        else if (expression->nodeType == HLSLNodeType_CastingExpression)
+        {
+            HLSLCastingExpression* castingExpression = static_cast<HLSLCastingExpression*>(expression);
+            OutputCast(castingExpression->type);
+            m_writer.Write("(");
+            OutputExpression(castingExpression->expression, castingExpression);
+            m_writer.Write(")");
+        }
+        else if (expression->nodeType == HLSLNodeType_ConstructorExpression)
+        {
+            HLSLConstructorExpression* constructorExpression = static_cast<HLSLConstructorExpression*>(expression);
+            
+            m_writer.Write("%s(", GetTypeName(constructorExpression->type, /*exactType=*/false));
+            //OutputExpressionList(constructorExpression->type, constructorExpression->argument);   // @@ Get element type.
+            OutputExpressionList(constructorExpression->argument);
+            m_writer.Write(")");
+        }
+        else if (expression->nodeType == HLSLNodeType_LiteralExpression)
+        {
+            HLSLLiteralExpression* literalExpression = static_cast<HLSLLiteralExpression*>(expression);
+            
+            char floatBuffer[32];
+            
+            switch (literalExpression->type)
+            {
+            case HLSLBaseType_Half:
+                if (m_options.treatHalfAsFloat) {
+                    snprintf(floatBuffer, sizeof(floatBuffer), "%f", literalExpression->fValue);
+                    String_StripTrailingFloatZeroes(floatBuffer);
+                    m_writer.Write("%s", floatBuffer);
+                }
+                else {
+                    // TODO: reduce digits since fp16 has much less precision
+                    snprintf(floatBuffer, sizeof(floatBuffer), "%f", literalExpression->fValue);
+                    String_StripTrailingFloatZeroes(floatBuffer);
+                    m_writer.Write("%sh", floatBuffer);
+                }
+                break;
+            case HLSLBaseType_Float:
+                snprintf(floatBuffer, sizeof(floatBuffer), "%f", literalExpression->fValue);
+                String_StripTrailingFloatZeroes(floatBuffer);
+                m_writer.Write("%s", floatBuffer);
+                break;
+            case HLSLBaseType_Int:
+                m_writer.Write("%d", literalExpression->iValue);
+                break;
+            case HLSLBaseType_Bool:
+                m_writer.Write("%s", literalExpression->bValue ? "true" : "false");
+                break;
+            default:
+                ASSERT(0);
+            }
+        }
+        else if (expression->nodeType == HLSLNodeType_UnaryExpression)
+        {
+            HLSLUnaryExpression* unaryExpression = static_cast<HLSLUnaryExpression*>(expression);
+            const char* op = "?";
+            bool pre = true;
+            switch (unaryExpression->unaryOp)
+            {
+            case HLSLUnaryOp_Negative:      op = "-";  break;
+            case HLSLUnaryOp_Positive:      op = "+";  break;
+            case HLSLUnaryOp_Not:           op = "!";  break;
+            case HLSLUnaryOp_BitNot:        op = "~";  break;
+            case HLSLUnaryOp_PreIncrement:  op = "++"; break;
+            case HLSLUnaryOp_PreDecrement:  op = "--"; break;
+            case HLSLUnaryOp_PostIncrement: op = "++"; pre = false; break;
+            case HLSLUnaryOp_PostDecrement: op = "--"; pre = false; break;
+            }
+            bool addParenthesis = NeedsParenthesis(unaryExpression->expression, expression);
+            if (addParenthesis) m_writer.Write("(");
+            if (pre)
+            {
+                m_writer.Write("%s", op);
+                OutputExpression(unaryExpression->expression, unaryExpression);
+            }
+            else
+            {
+                OutputExpression(unaryExpression->expression, unaryExpression);
+                m_writer.Write("%s", op);
+            }
+            if (addParenthesis) m_writer.Write(")");
+        }
+        else if (expression->nodeType == HLSLNodeType_BinaryExpression)
+        {
+            HLSLBinaryExpression* binaryExpression = static_cast<HLSLBinaryExpression*>(expression);
+
+            bool addParenthesis = NeedsParenthesis(expression, parentExpression);
+            if (addParenthesis) m_writer.Write("(");
+
+            bool rewrite_assign = false;
+            if (binaryExpression->binaryOp == HLSLBinaryOp_Assign && binaryExpression->expression1->nodeType == HLSLNodeType_ArrayAccess)
+            {
+                HLSLArrayAccess* arrayAccess = static_cast<HLSLArrayAccess*>(binaryExpression->expression1);
+                if (!arrayAccess->array->expressionType.array && IsMatrixType(arrayAccess->array->expressionType.baseType))
+                {
+                    // TODO: Alec, I eliminated the set_column code
+                    // does that need added back?
+                    rewrite_assign = true;
+
+                    m_writer.Write("set_column(");
+                    OutputExpression(arrayAccess->array, NULL);
+                    m_writer.Write(", ");
+                    OutputExpression(arrayAccess->index, NULL);
+                    m_writer.Write(", ");
+                    OutputExpression(binaryExpression->expression2, NULL);
+                    m_writer.Write(")");
+                }
+            }
+
+            if (!rewrite_assign)
+            {
+                if (IsArithmeticOp(binaryExpression->binaryOp) || IsLogicOp(binaryExpression->binaryOp))
+                {
+                    // Do intermediate type promotion, without changing dimension:
+                    HLSLType promotedType = binaryExpression->expression1->expressionType;
+
+                    if (ScalarBaseType[binaryExpression->expressionType.baseType] != ScalarBaseType[promotedType.baseType])
+                    {
+                        promotedType.baseType = HLSLBaseType(ScalarBaseType[binaryExpression->expressionType.baseType] + BaseTypeDimension[promotedType.baseType] - 1);
+                    }
+
+                    OutputTypedExpression(promotedType, binaryExpression->expression1, binaryExpression);
+                }
+                else
+                {
+                    OutputExpression(binaryExpression->expression1, binaryExpression);
+                }
+
+                const char* op = "?";
+                switch (binaryExpression->binaryOp)
+                {
+                case HLSLBinaryOp_Add:          op = " + "; break;
+                case HLSLBinaryOp_Sub:          op = " - "; break;
+                case HLSLBinaryOp_Mul:          op = " * "; break;
+                case HLSLBinaryOp_Div:          op = " / "; break;
+                case HLSLBinaryOp_Less:         op = " < "; break;
+                case HLSLBinaryOp_Greater:      op = " > "; break;
+                case HLSLBinaryOp_LessEqual:    op = " <= "; break;
+                case HLSLBinaryOp_GreaterEqual: op = " >= "; break;
+                case HLSLBinaryOp_Equal:        op = " == "; break;
+                case HLSLBinaryOp_NotEqual:     op = " != "; break;
+                case HLSLBinaryOp_Assign:       op = " = "; break;
+                case HLSLBinaryOp_AddAssign:    op = " += "; break;
+                case HLSLBinaryOp_SubAssign:    op = " -= "; break;
+                case HLSLBinaryOp_MulAssign:    op = " *= "; break;
+                case HLSLBinaryOp_DivAssign:    op = " /= "; break;
+                case HLSLBinaryOp_And:          op = " && "; break;
+                case HLSLBinaryOp_Or:           op = " || "; break;
+                case HLSLBinaryOp_BitAnd:       op = " & "; break;
+                case HLSLBinaryOp_BitOr:        op = " | "; break;
+                case HLSLBinaryOp_BitXor:       op = " ^ "; break;
+                default:
+                    ASSERT(0);
+                }
+                m_writer.Write("%s", op);
+
+                if (binaryExpression->binaryOp == HLSLBinaryOp_MulAssign ||
+                    binaryExpression->binaryOp == HLSLBinaryOp_DivAssign ||
+                    IsArithmeticOp(binaryExpression->binaryOp) ||
+                    IsLogicOp(binaryExpression->binaryOp))
+                {
+                    // Do intermediate type promotion, without changing dimension:
+                    HLSLType promotedType = binaryExpression->expression2->expressionType;
+
+                    if (ScalarBaseType[binaryExpression->expressionType.baseType] != ScalarBaseType[promotedType.baseType])
+                    {
+                        promotedType.baseType = HLSLBaseType(ScalarBaseType[binaryExpression->expressionType.baseType] + BaseTypeDimension[promotedType.baseType] - 1);
+                    }
+
+                    OutputTypedExpression(promotedType, binaryExpression->expression2, binaryExpression);
+                }
+                else if (IsAssignOp(binaryExpression->binaryOp))
+                {
+                    OutputTypedExpression(binaryExpression->expressionType, binaryExpression->expression2, binaryExpression);
+                }
+                else
+                {
+                    OutputExpression(binaryExpression->expression2, binaryExpression);
+                }
+            }
+            if (addParenthesis) m_writer.Write(")");
+        }
+        else if (expression->nodeType == HLSLNodeType_ConditionalExpression)
+        {
+            HLSLConditionalExpression* conditionalExpression = static_cast<HLSLConditionalExpression*>(expression);
+            
+            // TODO: @@ Remove parenthesis.
+            m_writer.Write("((");
+            OutputExpression(conditionalExpression->condition, NULL);
+            m_writer.Write(")?(");
+            OutputExpression(conditionalExpression->trueExpression, NULL);
+            m_writer.Write("):(");
+            OutputExpression(conditionalExpression->falseExpression, NULL);
+            m_writer.Write("))");
+        }
+        else if (expression->nodeType == HLSLNodeType_MemberAccess)
+        {
+            HLSLMemberAccess* memberAccess = static_cast<HLSLMemberAccess*>(expression);
+            bool addParenthesis = NeedsParenthesis(memberAccess->object, expression);
+
+            if (addParenthesis)
+            {
+                m_writer.Write("(");
+            }
+            OutputExpression(memberAccess->object, NULL);
+            if (addParenthesis)
+            {
+                m_writer.Write(")");
+            }
+
+            m_writer.Write(".%s", memberAccess->field);
+        }
+        else if (expression->nodeType == HLSLNodeType_ArrayAccess)
+        {
+            HLSLArrayAccess* arrayAccess = static_cast<HLSLArrayAccess*>(expression);
+            if (arrayAccess->array->expressionType.array || !IsMatrixType(arrayAccess->array->expressionType.baseType))
+            {
+                OutputExpression(arrayAccess->array, expression);
+                m_writer.Write("[");
+                OutputExpression(arrayAccess->index, NULL);
+                m_writer.Write("]");
+            }
+            else
+            {
+                // @@ This doesn't work for l-values!
+                m_writer.Write("column(");
+                OutputExpression(arrayAccess->array, NULL);
+                m_writer.Write(", ");
+                OutputExpression(arrayAccess->index, NULL);
+                m_writer.Write(")");
+            }
+        }
+        else if (expression->nodeType == HLSLNodeType_FunctionCall)
+        {
+            HLSLFunctionCall* functionCall = static_cast<HLSLFunctionCall*>(expression);
+            OutputFunctionCall(functionCall, parentExpression);
+        }
+        else
+        {
+            m_writer.Write("<unknown expression>");
+        }
+    }
+
+    void MSLGenerator::OutputCast(const HLSLType& type)
+    {
+        if (type.baseType == HLSLBaseType_Float3x3)
+        {
+            m_writer.Write("float3x3");
+        }
+        else
+        {
+            m_writer.Write("(");
+            OutputDeclarationType(type, /*isConst=*/false, /*isRef=*/false, /*alignment=*/0, /*isTypeCast=*/true);
+            m_writer.Write(")");
+        }
+    }
+
+    // Called by the various Output functions
+    void MSLGenerator::OutputArguments(HLSLArgument* argument)
+    {
+        int numArgs = 0;
+        while (argument != NULL)
+        {
+            // Skip hidden and output arguments.
+            if (argument->hidden || argument->modifier == HLSLArgumentModifier_Out)
+            {
+                argument = argument->nextArgument;
+                continue;
+            }
+
+            if (numArgs > 0)
+            {
+                m_writer.Write(", ");
+            }
+
+            //bool isRef = false;
+            bool isConst = false;
+            /*if (argument->modifier == HLSLArgumentModifier_Out || argument->modifier == HLSLArgumentModifier_Inout)
+            {
+            isRef = true;
+            }*/
+            if (argument->modifier == HLSLArgumentModifier_In || argument->modifier == HLSLArgumentModifier_Const)
+            {
+                isConst = true;
+            }
+
+            OutputDeclaration(argument->type, argument->name, argument->defaultValue, /*isRef=*/false, isConst);
+            argument = argument->nextArgument;
+            ++numArgs;
+        }
+    }
+
+    void MSLGenerator::OutputDeclaration(const HLSLType& type, const char* name, HLSLExpression* assignment, bool isRef, bool isConst, int alignment)
+    {
+        OutputDeclarationType(type, isRef, isConst, alignment);
+        OutputDeclarationBody(type, name, assignment, isRef);
+    }
+
+    void MSLGenerator::OutputDeclarationType(const HLSLType& type, bool isRef, bool isConst, int alignment, bool isTypeCast)
+    {
+        const char* typeName = GetTypeName(type, /*exactType=*/isTypeCast);  // @@ Don't allow type changes in uniform/globals or casts!
+
+        /*if (!isTypeCast)*/
+        {
+            if (isRef && !isTypeCast)
+            {
+                m_writer.Write("thread ");
+            }
+            if (isConst || type.flags & HLSLTypeFlag_Const)
+            {
+                m_writer.Write("const ");
+
+                if ((type.flags & HLSLTypeFlag_Static) != 0 && !isTypeCast)
+                {
+                    m_writer.Write("static constant constexpr ");
+                }
+            }
+        }
+        if (IsSamplerType(type))
+        {
+            if (type.baseType == HLSLBaseType_Sampler2D) {
+                if (type.samplerType == HLSLBaseType_Half && !m_options.treatHalfAsFloat) {
+                    typeName = "Texture2DHalfSampler";
+                }
+                else {
+                    typeName = "Texture2DSampler";
+                }
+            }
+            else if (type.baseType == HLSLBaseType_Sampler3D)
+                typeName = "Texture3DSampler";
+            else if (type.baseType == HLSLBaseType_SamplerCube)
+                typeName = "TextureCubeSampler";
+            else if (type.baseType == HLSLBaseType_Sampler2DShadow)
+                typeName = "Texture2DShadowSampler";
+            else if (type.baseType == HLSLBaseType_Sampler2DShadow)
+                typeName = "Texture2DMSSampler";
+            else
+                typeName = "<unhandled texture type>";
+        }
+        else if (alignment != 0 && !isTypeCast)
+        {
+            m_writer.Write("alignas(%d) ", alignment);
+        }
+
+        m_writer.Write("%s", typeName);
+
+        if (isTypeCast)
+        {
+            // Do not output modifiers inside type cast expressions.
+            return;
+        }
+
+        // Interpolation modifiers.
+        if (type.flags & HLSLTypeFlag_NoInterpolation)
+        {
+            m_writer.Write(" [[flat]]");
+        }
+        else
+        {
+            if (type.flags & HLSLTypeFlag_NoPerspective)
+            {
+                if (type.flags & HLSLTypeFlag_Centroid)
+                {
+                    m_writer.Write(" [[centroid_no_perspective]]");
+                }
+                else if (type.flags & HLSLTypeFlag_Sample)
+                {
+                    m_writer.Write(" [[sample_no_perspective]]");
+                }
+                else
+                {
+                    m_writer.Write(" [[center_no_perspective]]");
+                }
+            }
+            else
+            {
+                if (type.flags & HLSLTypeFlag_Centroid)
+                {
+                    m_writer.Write(" [[centroid_perspective]]");
+                }
+                else if (type.flags & HLSLTypeFlag_Sample)
+                {
+                    m_writer.Write(" [[sample_perspective]]");
+                }
+                else
+                {
+                    // Default.
+                    //m_writer.Write(" [[center_perspective]]");
+                }
+            }
+        }
+    }
+
+    void MSLGenerator::OutputDeclarationBody(const HLSLType& type, const char* name, HLSLExpression* assignment, bool isRef)
+    {
+        if (isRef)
+        {
+            // Arrays of refs are illegal in C++ and hence MSL, need to "link" the & to the var name
+            m_writer.Write("(&");
+        }
+
+        // Then name
+        m_writer.Write(" %s", name);
+
+        if (isRef)
+        {
+            m_writer.Write(")");
+        }
+
+        // Add brackets for arrays
+        if (type.array)
+        {
+            m_writer.Write("[");
+            if (type.arraySize != NULL)
+            {
+                OutputExpression(type.arraySize, NULL);
+            }
+            m_writer.Write("]");
+        }
+
+        // Semantics and registers unhandled for now
+
+        // Assignment handling
+        if (assignment != NULL)
+        {
+            m_writer.Write(" = ");
+            if (type.array)
+            {
+                m_writer.Write("{ ");
+                OutputExpressionList(assignment);
+                m_writer.Write(" }");
+            }
+            else
+            {
+                OutputTypedExpression(type, assignment, NULL);
+            }
+        }
+    }
+
+    void MSLGenerator::OutputExpressionList(HLSLExpression* expression)
+    {
+        int numExpressions = 0;
+        while (expression != NULL)
+        {
+            if (numExpressions > 0)
+            {
+                m_writer.Write(", ");
+            }
+            OutputExpression(expression, NULL);
+            expression = expression->nextExpression;
+            ++numExpressions;
+        }
+    }
+
+    // Cast all expressions to given type.
+    void MSLGenerator::OutputExpressionList(const HLSLType & type, HLSLExpression* expression)
+    {
+        int numExpressions = 0;
+        while (expression != NULL)
+        {
+            if (numExpressions > 0)
+            {
+                m_writer.Write(", ");
+            }
+
+            OutputTypedExpression(type, expression, NULL);
+            expression = expression->nextExpression;
+            ++numExpressions;
+        }
+    }
+
+    // Cast each expression to corresponding argument type.
+    void MSLGenerator::OutputExpressionList(HLSLArgument* argument, HLSLExpression* expression)
+    {
+        int numExpressions = 0;
+        while (expression != NULL)
+        {
+            ASSERT(argument != NULL);
+            if (argument->modifier != HLSLArgumentModifier_Out)
+            {
+                if (numExpressions > 0)
+                {
+                    m_writer.Write(", ");
+                }
+
+                OutputTypedExpression(argument->type, expression, NULL);
+                ++numExpressions;
+            }
+
+            expression = expression->nextExpression;
+            argument = argument->nextArgument;
+        }
+    }
+
+
+
+    inline bool isAddressable(HLSLExpression* expression)
+    {
+        if (expression->nodeType == HLSLNodeType_IdentifierExpression)
+        {
+            return true;
+        }
+        if (expression->nodeType == HLSLNodeType_ArrayAccess)
+        {
+            return true;
+        }
+        if (expression->nodeType == HLSLNodeType_MemberAccess)
+        {
+            HLSLMemberAccess* memberAccess = (HLSLMemberAccess*)expression;
+            return !memberAccess->swizzle;
+        }
+        return false;
+    }
+
+
+    void MSLGenerator::OutputFunctionCallStatement(int indent, HLSLFunctionCall* functionCall, HLSLDeclaration* declaration)
+    {
+        // Nothing special about these cases:
+        if (functionCall->function->numOutputArguments == 0)
+        {
+            m_writer.BeginLine(indent, functionCall->fileName, functionCall->line);
+            if (declaration)
+            {
+                OutputDeclaration(declaration);
+            }
+            else
+            {
+                OutputExpression(functionCall, NULL);
+            }
+            m_writer.EndLine(";");
+            return;
+        }
+
+
+        // Transform this:
+        // float foo = functionCall(bah, poo);
+
+        // Into:
+        // auto tmp = functionCall(bah, poo);
+        // bah = tmp.bah;
+        // poo = tmp.poo;
+        // float foo = tmp.__result;
+
+        const char* functionName = functionCall->function->name;
+
+        m_writer.BeginLine(indent, functionCall->fileName, functionCall->line);
+        m_writer.Write("auto out%d = %s_%d(", functionCall->line, functionName, functionCall->function->line);
+        OutputExpressionList(functionCall->function->argument, functionCall->argument);
+        m_writer.EndLine(");");
+
+        HLSLExpression * expression = functionCall->argument;
+        HLSLArgument * argument = functionCall->function->argument;
+        while (argument != NULL)
+        {
+            if (argument->modifier == HLSLArgumentModifier_Out || argument->modifier == HLSLArgumentModifier_Inout)
+            {
+                m_writer.BeginLine(indent);
+                OutputExpression(expression, NULL);
+                // @@ This assignment may need a cast.
+                m_writer.Write(" = ");
+                if (NeedsCast(expression->expressionType, argument->type)) {
+                    m_writer.Write("(%s)", GetTypeName(expression->expressionType, true));
+                }
+                m_writer.Write("out%d.%s;", functionCall->line, argument->name);
+                m_writer.EndLine();
+            }
+
+            expression = expression->nextExpression;
+            argument = argument->nextArgument;
+        }
+
+        if (declaration)
+        {
+            m_writer.BeginLine(indent);
+            OutputDeclarationType(declaration->type);
+            m_writer.Write(" %s = out%d.__result;", declaration->name, functionCall->line);
+            m_writer.EndLine();
+        }
+
+
+/* TODO: Alec, why is all this chopped out?
+ 
+        int argumentIndex = 0;
+        HLSLArgument* argument = functionCall->function->argument;
+        HLSLExpression* expression = functionCall->argument;
+        while (argument != NULL)
+        {
+            if (!isAddressable(expression))
+            {
+                if (argument->modifier == HLSLArgumentModifier_Out)
+                {
+                    m_writer.BeginLine(indent, functionCall->fileName, functionCall->line);
+                    OutputDeclarationType(argument->type);
+                    m_writer.Write("tmp%d;", argumentIndex);
+                    m_writer.EndLine();
+                }
+                else if (argument->modifier == HLSLArgumentModifier_Inout)
+                {
+                    m_writer.BeginLine(indent, functionCall->fileName, functionCall->line);
+                    OutputDeclarationType(argument->type);
+                    m_writer.Write("tmp%d = ", argumentIndex);
+                    OutputExpression(expression, NULL);
+                    m_writer.EndLine(";");
+                }
+            }
+            argument = argument->nextArgument;
+            expression = expression->nextExpression;
+            argumentIndex++;
+        }
+
+        m_writer.BeginLine(indent, functionCall->fileName, functionCall->line);
+        const char* name = functionCall->function->name;
+        m_writer.Write("%s(", name);
+        //OutputExpressionList(functionCall->argument);
+
+        // Output expression list with temporary substitution.
+        argumentIndex = 0;
+        argument = functionCall->function->argument;
+        expression = functionCall->argument;
+        while (expression != NULL)
+        {
+            if (!isAddressable(expression) && (argument->modifier == HLSLArgumentModifier_Out || argument->modifier == HLSLArgumentModifier_Inout))
+            {
+                m_writer.Write("tmp%d", argumentIndex);
+            }
+            else
+            {
+                OutputExpression(expression, NULL);
+            }
+
+            argument = argument->nextArgument;
+            expression = expression->nextExpression;
+            argumentIndex++;
+            if (expression)
+            {
+                m_writer.Write(", ");
+            }
+        }
+        m_writer.EndLine(");");
+
+        argumentIndex = 0;
+        argument = functionCall->function->argument;
+        expression = functionCall->argument;
+        while (expression != NULL)
+        {
+            if (!isAddressable(expression) && (argument->modifier == HLSLArgumentModifier_Out || argument->modifier == HLSLArgumentModifier_Inout))
+            {
+                m_writer.BeginLine(indent, functionCall->fileName, functionCall->line);
+                OutputExpression(expression, NULL);
+                m_writer.Write(" = tmp%d", argumentIndex);
+                m_writer.EndLine(";");
+            }
+
+            argument = argument->nextArgument;
+            expression = expression->nextExpression;
+            argumentIndex++;
+        }
+*/
+    }
+
+    void MSLGenerator::OutputFunctionCall(HLSLFunctionCall* functionCall, HLSLExpression * parentExpression)
+    {
+        if (functionCall->function->numOutputArguments > 0)
+        {
+            ASSERT(false);
+        }
+
+        const char* functionName = functionCall->function->name;
+
+        // If function begins with tex, then it returns float4 or half4 depending on options.halfTextureSamplers
+        /*if (strncmp(functionName, "tex", 3) == 0)
+        {
+        if (parentExpression && IsFloat(parentExpression->expressionType.baseType))
+        {
+        if (m_options.halfTextureSamplers)
+        {
+        OutputCast(functionCall->expressionType);
+        }
+        }
+        }*/
+
+        {
+            m_writer.Write("%s(", functionName);
+            OutputExpressionList(functionCall->function->argument, functionCall->argument);
+            //OutputExpressionList(functionCall->argument);
+            m_writer.Write(")");
+        }
+    }
+
+    const char* MSLGenerator::TranslateInputSemantic(const char * semantic)
+    {
+        if (semantic == NULL)
+            return NULL;
+
+        unsigned int length, index;
+        ParseSemantic(semantic, &length, &index);
+
+        if (m_target == MSLGenerator::Target_VertexShader)
+        {
+            // TODO: eliminate old DX9 legacy constructs like this
+            if (String_Equal(semantic, "INSTANCE_ID"))
+                return "instance_id";
+            if (String_Equal(semantic, "VERTEX_ID"))
+                return "vertex_id";
+            if (String_Equal(semantic, "POSITION"))
+                return "attribute(VAPosition)";
+            
+            // These are DX10 convention
+            if (String_Equal(semantic, "SV_InstanceID"))
+                return "instance_id";
+            if (String_Equal(semantic, "SV_VertexID"))
+                return "vertex_id";
+
+            if (String_Equal(semantic, "SV_Position"))
+                return "attribute(VAPosition)";
+            
+            // TODO: offer numbers on all of these
+            if (String_Equal(semantic, "NORMAL"))
+                return "attribute(VANormal)";
+            if (String_Equal(semantic, "TANGENT"))
+                return "attribute(VATangent)";
+            if (String_Equal(semantic, "BLENDINDICES"))
+                return "attribute(VABlendIndices)";
+            if (String_Equal(semantic, "BLENDWEIGHT"))
+                return "attribute(VABlendWeight)";
+            
+            if (String_Equal(semantic, "COLOR"))
+                return "attribute(VAColor0)";
+            
+            if (String_Equal(semantic, "TEXCOORD0"))
+                return "attribute(VATexcoord0)";
+            if (String_Equal(semantic, "TEXCOORD1"))
+                return "attribute(VATexcoord1)";
+            if (String_Equal(semantic, "TEXCOORD2"))
+                return "attribute(VATexcoord2)";
+            if (String_Equal(semantic, "TEXCOORD3"))
+                return "attribute(VATexcoord3)";
+            
+            if (m_options.attributeCallback)
+            {
+                char name[64];
+                ASSERT(length < sizeof(name));
+
+                strncpy(name, semantic, length);
+                name[length] = 0;
+
+                int attribute = m_options.attributeCallback(name, index);
+
+                if (attribute >= 0)
+                {
+                    return m_tree->AddStringFormat("attribute(%d)", attribute);
+                }
+            }
+        }
+        else if (m_target == MSLGenerator::Target_FragmentShader)
+        {
+            // PS inputs
+            
+            if (String_Equal(semantic, "SV_Position"))
+                return "position";
+            
+            if (String_Equal(semantic, "POSITION"))
+                return "position";
+            if (String_Equal(semantic, "VFACE"))
+                return "front_facing";
+            
+            // VS sets what layer to render into, ps can look at it.
+            // Gpu Family 5.
+            if (String_Equal(semantic, "TARGET_INDEX"))
+                return "render_target_array_index";
+            
+            if (String_Equal(semantic, "DST_COLOR"))
+                return "color(0)";
+            
+            if (String_Equal(semantic, "SAMPLE_ID"))
+                return "sample_id";
+            //if (String_Equal(semantic, "SAMPLE_MASK")) return "sample_mask";
+            //if (String_Equal(semantic, "SAMPLE_MASK")) return "sample_mask,post_depth_coverage";
+        }
+
+        return NULL;
+    }
+
+    const char* MSLGenerator::TranslateOutputSemantic(const char * semantic)
+    {
+        if (semantic == NULL)
+            return NULL;
+
+        unsigned int length, index;
+        ParseSemantic(semantic, &length, &index);
+
+        if (m_target == MSLGenerator::Target_VertexShader)
+        {
+            if (String_Equal(semantic, "POSITION")) return "position";
+            if (String_Equal(semantic, "SV_Position")) return "position";
+            
+            // PSIZE is non-square in DX9, and square in DX10 (and MSL)
+            if (String_Equal(semantic, "PSIZE")) return "point_size";
+            if (String_Equal(semantic, "POINT_SIZE")) return "point_size";
+            
+            // control layer in Gpu Family 5
+            if (String_Equal(semantic, "TARGET_INDEX")) return "render_target_array_index";
+        }
+        else if (m_target == MSLGenerator::Target_FragmentShader)
+        {
+            if (m_options.flags & MSLGenerator::Flag_NoIndexAttribute)
+            {
+                // No dual-source blending on iOS, and no index() attribute
+                if (String_Equal(semantic, "COLOR0_1")) return NULL;
+            }
+            else
+            {
+                // See these settings
+                // MTLBlendFactorSource1Color, OneMinusSource1Color, Source1Alpha, OneMinuSource1Alpha.
+                
+                // @@ IC: Hardcoded for this specific case, extend ParseSemantic?
+                if (String_Equal(semantic, "COLOR0_1")) return "color(0), index(1)";
+            }
+
+            if (strncmp(semantic, "SV_Target", length) == 0)
+            {
+                return m_tree->AddStringFormat("color(%d)", index);
+            }
+            if (strncmp(semantic, "COLOR", length) == 0)
+            {
+                return m_tree->AddStringFormat("color(%d)", index);
+            }
+
+            // depth variants to preserve earlyz, use greater on reverseZ
+            if (String_Equal(semantic, "DEPTH")) return "depth(any)";
+            if (String_Equal(semantic, "DEPTH_GT")) return "depth(greater)";
+            if (String_Equal(semantic, "DEPTH_LT")) return "depth(less)";
+            
+            if (String_Equal(semantic, "SAMPLE_MASK")) return "sample_mask";
+        }
+
+        return NULL;
+    }
+
+    HLSLBaseType MSLGenerator::HalfToFloatBaseType(HLSLBaseType type)
+    {
+        switch(type)
+        {
+            case HLSLBaseType_Half: return HLSLBaseType_Float;
+            case HLSLBaseType_Half2: return HLSLBaseType_Float2;
+            case HLSLBaseType_Half3: return HLSLBaseType_Float3;
+            case HLSLBaseType_Half4: return HLSLBaseType_Float4;
+            case HLSLBaseType_Half2x2: return HLSLBaseType_Float2x2;
+            case HLSLBaseType_Half3x3: return HLSLBaseType_Float3x3;
+            // case HLSLBaseType_Half4x2: return HLSLBaseType_Float4x2;
+            // case HLSLBaseType_Half4x3: return HLSLBaseType_Float4x3;
+            case HLSLBaseType_Half4x4: return HLSLBaseType_Float4x4;
+        }
+        
+        return type;
+    }
+        
+    const char* MSLGenerator::GetTypeName(const HLSLType& type, bool exactType)
+    {
+        bool promote = ((type.flags & HLSLTypeFlag_NoPromote) == 0);
+
+        bool half_to_float = promote && m_options.treatHalfAsFloat;// && !exactType;
+        
+        // TODO: move half/float to texture
+        bool half_samplers = promote && type.samplerType == HLSLBaseType_Half && !m_options.treatHalfAsFloat;
+
+        auto baseType = type.baseType;
+        if (half_to_float)
+        {
+            baseType = HalfToFloatBaseType(baseType);
+        }
+        
+        switch (baseType)
+        {
+        case HLSLBaseType_Void:             return "void";
+                
+        case HLSLBaseType_Float:            return "float";
+        case HLSLBaseType_Float2:           return "float2";
+        case HLSLBaseType_Float3:           return "float3";
+        case HLSLBaseType_Float4:           return "float4";
+        case HLSLBaseType_Float2x2:         return "float2x2";
+        case HLSLBaseType_Float3x3:         return "float3x3";
+        case HLSLBaseType_Float4x4:         return "float4x4";
+                
+        // TODO: This was flipping the rows/columns
+        //case HLSLBaseType_Float4x3:         return "float3x4";
+        //case HLSLBaseType_Float4x2:         return "float2x4";
+        
+        case HLSLBaseType_Half:             return "half";
+        case HLSLBaseType_Half2:            return "half2";
+        case HLSLBaseType_Half3:            return "half3";
+        case HLSLBaseType_Half4:            return "half4";
+        case HLSLBaseType_Half2x2:          return "half2x2";
+        case HLSLBaseType_Half3x3:          return "half3x3";
+        case HLSLBaseType_Half4x4:          return "half4x4";
+        
+        // TODO: This was flipping the rows/columns
+        //case HLSLBaseType_Half4x3:          return "half3x4";
+        //case HLSLBaseType_Half4x2:          return "half2x4";
+                
+        case HLSLBaseType_Bool:             return "bool";
+        case HLSLBaseType_Bool2:            return "bool2";
+        case HLSLBaseType_Bool3:            return "bool3";
+        case HLSLBaseType_Bool4:            return "bool4";
+                
+        case HLSLBaseType_Int:              return "int";
+        case HLSLBaseType_Int2:             return "int2";
+        case HLSLBaseType_Int3:             return "int3";
+        case HLSLBaseType_Int4:             return "int4";
+                
+        case HLSLBaseType_Uint:             return "uint";
+        case HLSLBaseType_Uint2:            return "uint2";
+        case HLSLBaseType_Uint3:            return "uint3";
+        case HLSLBaseType_Uint4:            return "uint4";
+            
+        // Texture should cart the half vs. float
+        case HLSLBaseType_Texture:          return "texture";
+                
+        case HLSLBaseType_Sampler:          return "sampler";
+            // ACoget-TODO: How to detect non-float textures, if relevant?
+        case HLSLBaseType_Sampler2D:        return half_samplers ? "texture2d<half>" : "texture2d<float>";
+        case HLSLBaseType_Sampler3D:        return half_samplers ? "texture3d<half>" : "texture3d<float>";
+        case HLSLBaseType_SamplerCube:      return half_samplers ? "texturecube<half>" : "texturecube<float>";
+        case HLSLBaseType_Sampler2DShadow:  return "depth2d<float>";
+        case HLSLBaseType_Sampler2DMS:      return half_samplers ? "texture2d_ms<half>" : "texture2d_ms<float>";
+                
+        case HLSLBaseType_UserDefined:      return type.typeName;
+        default:
+            ASSERT(0);
+            return "<unknown type>";
+        }
+    }
+
+
+
+} // M4
diff --git a/hlslparser/src/MSLGenerator.h b/hlslparser/src/MSLGenerator.h
new file mode 100644
index 00000000..c60254df
--- /dev/null
+++ b/hlslparser/src/MSLGenerator.h
@@ -0,0 +1,137 @@
+#ifndef MSL_GENERATOR_H
+#define MSL_GENERATOR_H
+
+#include "CodeWriter.h"
+#include "HLSLTree.h"
+
+namespace M4
+{
+
+class  HLSLTree;
+struct HLSLFunction;
+struct HLSLStruct;
+    
+/**
+ * This class is used to generate MSL shaders.
+ */
+class MSLGenerator
+{
+public:
+    enum Target
+    {
+        Target_VertexShader,
+        Target_FragmentShader,
+    };
+    
+    enum Flags
+    {
+        Flag_None = 0,
+        Flag_ConstShadowSampler = 1 << 0,
+        Flag_PackMatrixRowMajor = 1 << 1,
+        Flag_NoIndexAttribute   = 1 << 2,
+    };
+
+    struct Options
+    {
+        unsigned int flags;
+        unsigned int bufferRegisterOffset;
+        int (*attributeCallback)(const char* name, unsigned int index);
+        bool treatHalfAsFloat;
+        bool usePreciseFma;
+        //bool use16BitIntegers;
+
+        Options()
+        {
+            // DX and MSL are both column major
+            flags = Flag_None;
+            bufferRegisterOffset = 0;
+            attributeCallback = NULL;
+            treatHalfAsFloat = false;
+            usePreciseFma = false;
+            //use16BitIntegers = false;
+        }
+    };
+
+    MSLGenerator();
+
+    bool Generate(HLSLTree* tree, Target target, const char* entryName, const Options& options = Options());
+    const char* GetResult() const;
+
+private:
+    
+    // @@ Rename class argument. Add buffers & textures.
+    struct ClassArgument
+    {
+        const char* name;
+        HLSLType type;
+        //const char* typeName;     // @@ Do we need more than the type name?
+        const char* registerName;
+
+        ClassArgument * nextArg;
+        
+        ClassArgument(const char* name, HLSLType type, const char * registerName) :
+            name(name), type(type), registerName(registerName)
+		{
+			nextArg = NULL;
+		}
+    };
+
+    void AddClassArgument(ClassArgument * arg);
+
+    void Prepass(HLSLTree* tree, Target target, HLSLFunction* entryFunction);
+    void CleanPrepass();
+    
+    void PrependDeclarations();
+    
+    void OutputStaticDeclarations(int indent, HLSLStatement* statement);
+    void OutputStatements(int indent, HLSLStatement* statement);
+    void OutputAttributes(int indent, HLSLAttribute* attribute);
+    void OutputDeclaration(HLSLDeclaration* declaration);
+    void OutputStruct(int indent, HLSLStruct* structure);
+    void OutputBuffer(int indent, HLSLBuffer* buffer);
+    void OutputFunction(int indent, HLSLFunction* function);
+    void OutputExpression(HLSLExpression* expression, HLSLExpression* parentExpression);
+    void OutputTypedExpression(const HLSLType& type, HLSLExpression* expression, HLSLExpression* parentExpression);
+    bool NeedsCast(const HLSLType & target, const HLSLType & source);
+    void OutputCast(const HLSLType& type);
+    
+    void OutputArguments(HLSLArgument* argument);
+    void OutputDeclaration(const HLSLType& type, const char* name, HLSLExpression* assignment, bool isRef = false, bool isConst = false, int alignment = 0);
+    void OutputDeclarationType(const HLSLType& type, bool isConst = false, bool isRef = false, int alignment = 0, bool isTypeCast = false);
+    void OutputDeclarationBody(const HLSLType& type, const char* name, HLSLExpression* assignment, bool isRef = false);
+    void OutputExpressionList(HLSLExpression* expression);
+    void OutputExpressionList(const HLSLType& type, HLSLExpression* expression);
+    void OutputExpressionList(HLSLArgument* argument, HLSLExpression* expression);
+    
+    void OutputFunctionCallStatement(int indent, HLSLFunctionCall* functionCall, HLSLDeclaration* assingmentExpression);
+    void OutputFunctionCall(HLSLFunctionCall* functionCall, HLSLExpression * parentExpression);
+
+    const char* TranslateInputSemantic(const char* semantic);
+    const char* TranslateOutputSemantic(const char* semantic);
+
+    const char* GetTypeName(const HLSLType& type, bool exactType);
+    
+    void Error(const char* format, ...);
+
+    HLSLBaseType HalfToFloatBaseType(HLSLBaseType type);
+    
+private:
+
+    CodeWriter      m_writer;
+
+    HLSLTree*       m_tree;
+    const char*     m_entryName;
+    Target          m_target;
+    Options         m_options;
+
+    bool            m_error;
+
+    ClassArgument * m_firstClassArgument;
+    ClassArgument * m_lastClassArgument;
+    
+    HLSLFunction *  m_currentFunction;
+};
+
+} // M4
+
+#endif
diff --git a/hlslparser/src/Main.cpp b/hlslparser/src/Main.cpp
new file mode 100644
index 00000000..ac03d85b
--- /dev/null
+++ b/hlslparser/src/Main.cpp
@@ -0,0 +1,319 @@
+#include "HLSLParser.h"
+
+//#include "GLSLGenerator.h"
+#include "HLSLGenerator.h"
+#include "MSLGenerator.h"
+
+#include <vector>
+#include <stdio.h>
+#include <sys/stat.h>
+
+using namespace std;
+
+enum Target
+{
+    Target_VertexShader,
+    Target_FragmentShader,
+    Target_ComputeShader,
+};
+
+enum Language
+{
+    Language_MSL,
+	Language_HLSL,
+};
+
+bool ReadFile( const char* fileName, string& str )
+{
+    struct stat stats = {};
+    if (stat(fileName, &stats) < 0) {
+        return false;
+    }
+    size_t size = (int64_t)stats.st_size;
+
+    str.resize(size);
+
+    FILE* fp = fopen(fileName, "r");
+    if (fp) {
+        fread((char*)str.data(), 1, size, fp);
+    }
+    fclose(fp);
+    return true;
+}
+
+void PrintUsage()
+{
+	fprintf(stderr, "usage: hlslparser [-h] -i shader.hlsl -o [shader.hlsl | shader.metal]\n"
+		 "\n"
+		 "Translate DX9-style HLSL shader to HLSL/MSL shader.\n"
+		 "\n"
+		 //" ENTRYNAME   entry point of the shader\n"
+		 "\n"
+		 "optional arguments:\n"
+        // " -vs, -fs    vertex or fragment shader\n"
+         " -h, --help  show this help message and exit\n"
+		 " -hlsl       generate HLSL\n"
+		 " -metal      generate MSL\n");
+}
+
+// Taken from KrmaLog.cpp
+static bool endsWith(const string& value, const string& ending)
+{
+    if (ending.size() > value.size()) {
+        return false;
+    }
+
+    // reverse comparison at end of value
+    if (value.size() < ending.size())
+        return false;
+    uint32_t start = (uint32_t)(value.size() - ending.size());
+        
+    for (uint32_t i = 0; i < ending.size(); ++i) {
+        if (value[start + i] != ending[i])
+            return false;
+    }
+    
+    return true;
+}
+
+// Alec, brought over from kram
+static string filenameNoExtension(const char* filename)
+{
+    const char* dotPosStr = strrchr(filename, '.');
+    if (dotPosStr == nullptr)
+        return filename;
+    auto dotPos = dotPosStr - filename;
+    
+    // now chop off the extension
+    string filenameNoExt = filename;
+    return filenameNoExt.substr(0, dotPos);
+}
+
+int main( int argc, char* argv[] )
+{
+	using namespace M4;
+
+	// Parse arguments
+	const char* fileName = NULL;
+	const char* entryName = NULL;
+
+	// TODO: could we take modern DX12 HLSL and translate to MSL only
+	// That would simplify all this.  What spriv-cross already does though.
+	// Could drop HLSLGenerator then, and just use this to gen MSL.
+	// Much of the glue code can just be in a header, but having it
+	// in parser, lets this only splice code that is needed.
+
+	Language language = Language_MSL;
+	Target target = Target_FragmentShader;
+    string outputFileName;
+    
+	for( int argn = 1; argn < argc; ++argn )
+	{
+		const char* const arg = argv[ argn ];
+
+		if( String_Equal( arg, "-h" ) || String_Equal( arg, "--help" ) )
+		{
+			PrintUsage();
+			return 0;
+		}
+		
+        else if( String_Equal( arg, "-o" ) || String_Equal( arg, "-output" ) )
+        {
+            if ( ++argn < argc )
+                outputFileName = argv[ argn ];
+        }
+        else if( String_Equal( arg, "-i" ) || String_Equal( arg, "-input" ) )
+		{
+            if ( ++argn < argc )
+                fileName = argv[ argn ];
+		}
+        
+// This is derived from end characters of entry point
+//        else if( String_Equal( arg, "-vs" ) )
+//        {
+//            target = Target_VertexShader;
+//        }
+//        else if( String_Equal( arg, "-fs" ) )
+//        {
+//            target = Target_FragmentShader;
+//        }
+ // TODO: require a arg to set entryName
+//		else if( entryName == NULL )
+//		{
+//			entryName = arg;
+//		}
+		else
+		{
+			Log_Error( "Too many arguments\n" );
+			PrintUsage();
+			return 1;
+		}
+	}
+
+	if( fileName == NULL  )
+	{
+		Log_Error( "Missing source filename\n" );
+		PrintUsage();
+		return 1;
+	}
+    if( !endsWith( fileName, "hlsl" ) )
+    {
+        Log_Error( "Input filename must end with .hlsl\n" );
+        PrintUsage();
+        return 1;
+    }
+    
+    if( outputFileName.empty() )
+    {
+        Log_Error( "Missing dest filename\n" );
+        PrintUsage();
+        return 1;
+    }
+    if( endsWith( outputFileName, "hlsl" ) )
+    {
+        language = Language_HLSL;
+    }
+    else if( endsWith( outputFileName, "metal" ) )
+    {
+        language = Language_MSL;
+    }
+    else
+    {
+        Log_Error( "Output file must end with .hlsl or msls\n" );
+        PrintUsage();
+        return 1;
+    }
+    
+    // replace the extension on the output file
+    outputFileName = filenameNoExtension( outputFileName.c_str() );
+    
+    // Allow a mix of shaders in file.
+    // Code now finds entry points.
+    // outputFileName += (target == Target_FragmentShader) ? "PS" : "VS";
+    
+    if ( language == Language_MSL )
+    {
+        outputFileName += ".metal";
+    }
+    else if ( language == Language_HLSL )
+    {
+        outputFileName += ".hlsl";
+    }
+    
+    if ( outputFileName == fileName )
+    {
+        Log_Error( "Src and Dst filenames match.  Exiting.\n" );
+        return 1;
+    }
+    
+    
+    //------------------------------------
+    // Now start the work
+    
+	// Read input file
+    string source;
+    if (!ReadFile( fileName, source ))
+    {
+        Log_Error( "Input file not found\n" );
+        return 1;
+    }
+
+	// Parse input file
+	Allocator allocator;
+	HLSLParser parser( &allocator, fileName, source.data(), source.size() );
+	HLSLTree tree( &allocator );
+	if( !parser.Parse( &tree ) )
+	{
+		Log_Error( "Parsing failed\n" );
+		return 1;
+	}
+
+    
+    // using wb to avoid having Win convert \n to \r\n
+    FILE* fp = fopen( outputFileName.c_str(), "wb" );
+    if ( !fp )
+    {
+        Log_Error( "Could not open output file %s\n", outputFileName.c_str() );
+        return 1;
+    }
+    
+    int status = 0;
+    
+    // build a list of entryPoints
+    vector<const char*> entryPoints;
+    if (entryName != nullptr)
+    {
+        entryPoints.push_back(entryName);
+    }
+    else
+    {
+        // search all functions with designated endings
+        HLSLStatement* statement = tree.GetRoot()->statement;
+        while (statement != NULL)
+        {
+            if (statement->nodeType == HLSLNodeType_Function)
+            {
+                HLSLFunction* function = (HLSLFunction*)statement;
+                const char* name = function->name;
+                
+                if (endsWith(name, "VS"))
+                {
+                    entryPoints.push_back(name);
+                }
+                else if (endsWith(name, "PS"))
+                {
+                    entryPoints.push_back(name);
+                }
+                else if (endsWith(name, "CS"))
+                {
+                    // compute not yet supported
+                    // entryPoints.push_back(name);
+                }
+            }
+
+            statement = statement->nextStatement;
+        }
+    }
+    for (auto& entryPoint: entryPoints)
+    {
+        entryName = entryPoint;
+        if (endsWith(entryPoint, "VS"))
+            target = Target_VertexShader;
+        else if (endsWith(entryPoint, "PS"))
+            target = Target_FragmentShader;
+        else if (endsWith(entryPoint, "CS"))
+            target = Target_ComputeShader;
+            
+        // Generate output
+        if (language == Language_HLSL)
+        {
+            HLSLGenerator generator;
+            if (generator.Generate( &tree, HLSLGenerator::Target(target), entryName ))
+            {
+                fprintf( fp, "%s", generator.GetResult() );
+            }
+            else
+            {
+                Log_Error( "Translation failed, aborting\n" );
+                status = 1;
+            }
+        }
+        else if (language == Language_MSL)
+        {
+            MSLGenerator generator;
+            if (generator.Generate( &tree, MSLGenerator::Target(target), entryName ))
+            {
+                fprintf( fp, "%s", generator.GetResult() );
+            }
+            else
+            {
+                Log_Error( "Translation failed, aborting\n" );
+                status = 1;
+            }
+        }
+    }
+    
+    fclose( fp );
+
+    return status;
+}

From c471e6415139b415fc4f439562e6dcb4878100dd Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 3 Mar 2023 21:58:13 -0800
Subject: [PATCH 423/901] kram - update project and script

---
 hlslparser/buildShaders.sh                    |   5 +-
 .../hlslparser.xcodeproj/project.pbxproj      | 114 +++++++++---------
 2 files changed, 63 insertions(+), 56 deletions(-)

diff --git a/hlslparser/buildShaders.sh b/hlslparser/buildShaders.sh
index 596f7bc0..e7a8231d 100755
--- a/hlslparser/buildShaders.sh
+++ b/hlslparser/buildShaders.sh
@@ -1,5 +1,6 @@
 #!/bin/bash
 
+mkdir -p out
 
 pushd out
 
@@ -15,11 +16,11 @@ cp ../shaders/ShaderHLSL.h .
 
 # build the metal shaders
 echo gen MSL
-${app} -metal ../shaders/Skinning.hlsl
+${app} -i ../shaders/Skinning.hlsl -o Skinning.metal
 
 # build the hlsl shaders
 echo gen HLSL
-${app} -hlsl ../shaders/Skinning.hlsl
+${app} -i ../shaders/Skinning.hlsl -o Skinning.hlsl
 
 # see if HLSL compiles (requires macOS Vulkan install)
 # this will pull /usr/bin/local/dxc
diff --git a/hlslparser/hlslparser.xcodeproj/project.pbxproj b/hlslparser/hlslparser.xcodeproj/project.pbxproj
index f5cca7a8..d82d4283 100644
--- a/hlslparser/hlslparser.xcodeproj/project.pbxproj
+++ b/hlslparser/hlslparser.xcodeproj/project.pbxproj
@@ -7,14 +7,15 @@
 	objects = {
 
 /* Begin PBXBuildFile section */
-		702A2B7229A49DDE007D9A99 /* Main.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 702A2B6329A49DDE007D9A99 /* Main.cpp */; };
-		702A2B7329A49DDE007D9A99 /* MSLGenerator.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 702A2B6629A49DDE007D9A99 /* MSLGenerator.cpp */; };
-		702A2B7429A49DDE007D9A99 /* HLSLTokenizer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 702A2B6829A49DDE007D9A99 /* HLSLTokenizer.cpp */; };
-		702A2B7529A49DDE007D9A99 /* HLSLTree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 702A2B6B29A49DDE007D9A99 /* HLSLTree.cpp */; };
-		702A2B7629A49DDE007D9A99 /* CodeWriter.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 702A2B6C29A49DDE007D9A99 /* CodeWriter.cpp */; };
-		702A2B7729A49DDE007D9A99 /* HLSLGenerator.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 702A2B6E29A49DDE007D9A99 /* HLSLGenerator.cpp */; };
-		702A2B7829A49DDE007D9A99 /* HLSLParser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 702A2B6F29A49DDE007D9A99 /* HLSLParser.cpp */; };
-		702A2B7929A49DDE007D9A99 /* Engine.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 702A2B7129A49DDE007D9A99 /* Engine.cpp */; };
+		70235C4B29B3145200909C95 /* HLSLParser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70235C3B29B3145200909C95 /* HLSLParser.cpp */; };
+		70235C4C29B3145200909C95 /* CodeWriter.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70235C3C29B3145200909C95 /* CodeWriter.cpp */; };
+		70235C4D29B3145200909C95 /* HLSLGenerator.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70235C3F29B3145200909C95 /* HLSLGenerator.cpp */; };
+		70235C4E29B3145200909C95 /* HLSLTree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70235C4029B3145200909C95 /* HLSLTree.cpp */; };
+		70235C4F29B3145200909C95 /* GLSLGenerator.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70235C4129B3145200909C95 /* GLSLGenerator.cpp */; };
+		70235C5029B3145200909C95 /* Engine.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70235C4329B3145200909C95 /* Engine.cpp */; };
+		70235C5129B3145200909C95 /* Main.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70235C4529B3145200909C95 /* Main.cpp */; };
+		70235C5229B3145200909C95 /* MSLGenerator.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70235C4629B3145200909C95 /* MSLGenerator.cpp */; };
+		70235C5329B3145200909C95 /* HLSLTokenizer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70235C4829B3145200909C95 /* HLSLTokenizer.cpp */; };
 /* End PBXBuildFile section */
 
 /* Begin PBXCopyFilesBuildPhase section */
@@ -32,22 +33,24 @@
 /* Begin PBXFileReference section */
 		70235C3729B303AA00909C95 /* buildShaders.sh */ = {isa = PBXFileReference; lastKnownFileType = text.script.sh; path = buildShaders.sh; sourceTree = "<group>"; };
 		70235C3829B3041E00909C95 /* out */ = {isa = PBXFileReference; lastKnownFileType = folder; path = out; sourceTree = "<group>"; };
+		70235C3A29B3145200909C95 /* Engine.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Engine.h; sourceTree = "<group>"; };
+		70235C3B29B3145200909C95 /* HLSLParser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = HLSLParser.cpp; sourceTree = "<group>"; };
+		70235C3C29B3145200909C95 /* CodeWriter.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = CodeWriter.cpp; sourceTree = "<group>"; };
+		70235C3D29B3145200909C95 /* GLSLGenerator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = GLSLGenerator.h; sourceTree = "<group>"; };
+		70235C3E29B3145200909C95 /* HLSLTokenizer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = HLSLTokenizer.h; sourceTree = "<group>"; };
+		70235C3F29B3145200909C95 /* HLSLGenerator.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = HLSLGenerator.cpp; sourceTree = "<group>"; };
+		70235C4029B3145200909C95 /* HLSLTree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = HLSLTree.cpp; sourceTree = "<group>"; };
+		70235C4129B3145200909C95 /* GLSLGenerator.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = GLSLGenerator.cpp; sourceTree = "<group>"; };
+		70235C4229B3145200909C95 /* HLSLParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = HLSLParser.h; sourceTree = "<group>"; };
+		70235C4329B3145200909C95 /* Engine.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Engine.cpp; sourceTree = "<group>"; };
+		70235C4429B3145200909C95 /* CodeWriter.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CodeWriter.h; sourceTree = "<group>"; };
+		70235C4529B3145200909C95 /* Main.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Main.cpp; sourceTree = "<group>"; };
+		70235C4629B3145200909C95 /* MSLGenerator.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = MSLGenerator.cpp; sourceTree = "<group>"; };
+		70235C4729B3145200909C95 /* MSLGenerator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MSLGenerator.h; sourceTree = "<group>"; };
+		70235C4829B3145200909C95 /* HLSLTokenizer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = HLSLTokenizer.cpp; sourceTree = "<group>"; };
+		70235C4929B3145200909C95 /* HLSLGenerator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = HLSLGenerator.h; sourceTree = "<group>"; };
+		70235C4A29B3145200909C95 /* HLSLTree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = HLSLTree.h; sourceTree = "<group>"; };
 		702A2B5929A49DC8007D9A99 /* hlslparser */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = hlslparser; sourceTree = BUILT_PRODUCTS_DIR; };
-		702A2B6329A49DDE007D9A99 /* Main.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Main.cpp; path = ../src/Main.cpp; sourceTree = "<group>"; };
-		702A2B6429A49DDE007D9A99 /* HLSLTokenizer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = HLSLTokenizer.h; path = ../src/HLSLTokenizer.h; sourceTree = "<group>"; };
-		702A2B6529A49DDE007D9A99 /* HLSLTree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = HLSLTree.h; path = ../src/HLSLTree.h; sourceTree = "<group>"; };
-		702A2B6629A49DDE007D9A99 /* MSLGenerator.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = MSLGenerator.cpp; path = ../src/MSLGenerator.cpp; sourceTree = "<group>"; };
-		702A2B6729A49DDE007D9A99 /* CodeWriter.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = CodeWriter.h; path = ../src/CodeWriter.h; sourceTree = "<group>"; };
-		702A2B6829A49DDE007D9A99 /* HLSLTokenizer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = HLSLTokenizer.cpp; path = ../src/HLSLTokenizer.cpp; sourceTree = "<group>"; };
-		702A2B6929A49DDE007D9A99 /* Engine.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Engine.h; path = ../src/Engine.h; sourceTree = "<group>"; };
-		702A2B6A29A49DDE007D9A99 /* HLSLGenerator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = HLSLGenerator.h; path = ../src/HLSLGenerator.h; sourceTree = "<group>"; };
-		702A2B6B29A49DDE007D9A99 /* HLSLTree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = HLSLTree.cpp; path = ../src/HLSLTree.cpp; sourceTree = "<group>"; };
-		702A2B6C29A49DDE007D9A99 /* CodeWriter.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = CodeWriter.cpp; path = ../src/CodeWriter.cpp; sourceTree = "<group>"; };
-		702A2B6D29A49DDE007D9A99 /* MSLGenerator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = MSLGenerator.h; path = ../src/MSLGenerator.h; sourceTree = "<group>"; };
-		702A2B6E29A49DDE007D9A99 /* HLSLGenerator.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = HLSLGenerator.cpp; path = ../src/HLSLGenerator.cpp; sourceTree = "<group>"; };
-		702A2B6F29A49DDE007D9A99 /* HLSLParser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = HLSLParser.cpp; path = ../src/HLSLParser.cpp; sourceTree = "<group>"; };
-		702A2B7029A49DDE007D9A99 /* HLSLParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = HLSLParser.h; path = ../src/HLSLParser.h; sourceTree = "<group>"; };
-		702A2B7129A49DDE007D9A99 /* Engine.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Engine.cpp; path = ../src/Engine.cpp; sourceTree = "<group>"; };
 		702A2B7E29AA942D007D9A99 /* Skinning.hlsl */ = {isa = PBXFileReference; lastKnownFileType = text; path = Skinning.hlsl; sourceTree = "<group>"; };
 		702A2B8329AC25BA007D9A99 /* ShaderMSL.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = ShaderMSL.h; sourceTree = "<group>"; };
 		702A2B8429AC4B23007D9A99 /* ShaderHLSL.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = ShaderHLSL.h; sourceTree = "<group>"; };
@@ -64,13 +67,37 @@
 /* End PBXFrameworksBuildPhase section */
 
 /* Begin PBXGroup section */
+		70235C3929B3145200909C95 /* src */ = {
+			isa = PBXGroup;
+			children = (
+				70235C3A29B3145200909C95 /* Engine.h */,
+				70235C3B29B3145200909C95 /* HLSLParser.cpp */,
+				70235C3C29B3145200909C95 /* CodeWriter.cpp */,
+				70235C3D29B3145200909C95 /* GLSLGenerator.h */,
+				70235C3E29B3145200909C95 /* HLSLTokenizer.h */,
+				70235C3F29B3145200909C95 /* HLSLGenerator.cpp */,
+				70235C4029B3145200909C95 /* HLSLTree.cpp */,
+				70235C4129B3145200909C95 /* GLSLGenerator.cpp */,
+				70235C4229B3145200909C95 /* HLSLParser.h */,
+				70235C4329B3145200909C95 /* Engine.cpp */,
+				70235C4429B3145200909C95 /* CodeWriter.h */,
+				70235C4529B3145200909C95 /* Main.cpp */,
+				70235C4629B3145200909C95 /* MSLGenerator.cpp */,
+				70235C4729B3145200909C95 /* MSLGenerator.h */,
+				70235C4829B3145200909C95 /* HLSLTokenizer.cpp */,
+				70235C4929B3145200909C95 /* HLSLGenerator.h */,
+				70235C4A29B3145200909C95 /* HLSLTree.h */,
+			);
+			path = src;
+			sourceTree = "<group>";
+		};
 		702A2B5029A49DC8007D9A99 = {
 			isa = PBXGroup;
 			children = (
 				70235C3729B303AA00909C95 /* buildShaders.sh */,
 				70235C3829B3041E00909C95 /* out */,
+				70235C3929B3145200909C95 /* src */,
 				702A2B7B29AA942D007D9A99 /* shaders */,
-				702A2B5B29A49DC8007D9A99 /* hlslparser */,
 				702A2B5A29A49DC8007D9A99 /* Products */,
 			);
 			sourceTree = "<group>";
@@ -83,28 +110,6 @@
 			name = Products;
 			sourceTree = "<group>";
 		};
-		702A2B5B29A49DC8007D9A99 /* hlslparser */ = {
-			isa = PBXGroup;
-			children = (
-				702A2B6C29A49DDE007D9A99 /* CodeWriter.cpp */,
-				702A2B6729A49DDE007D9A99 /* CodeWriter.h */,
-				702A2B7129A49DDE007D9A99 /* Engine.cpp */,
-				702A2B6929A49DDE007D9A99 /* Engine.h */,
-				702A2B6E29A49DDE007D9A99 /* HLSLGenerator.cpp */,
-				702A2B6A29A49DDE007D9A99 /* HLSLGenerator.h */,
-				702A2B6F29A49DDE007D9A99 /* HLSLParser.cpp */,
-				702A2B7029A49DDE007D9A99 /* HLSLParser.h */,
-				702A2B6829A49DDE007D9A99 /* HLSLTokenizer.cpp */,
-				702A2B6429A49DDE007D9A99 /* HLSLTokenizer.h */,
-				702A2B6B29A49DDE007D9A99 /* HLSLTree.cpp */,
-				702A2B6529A49DDE007D9A99 /* HLSLTree.h */,
-				702A2B6329A49DDE007D9A99 /* Main.cpp */,
-				702A2B6629A49DDE007D9A99 /* MSLGenerator.cpp */,
-				702A2B6D29A49DDE007D9A99 /* MSLGenerator.h */,
-			);
-			path = hlslparser;
-			sourceTree = "<group>";
-		};
 		702A2B7B29AA942D007D9A99 /* shaders */ = {
 			isa = PBXGroup;
 			children = (
@@ -172,14 +177,15 @@
 			isa = PBXSourcesBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
-				702A2B7429A49DDE007D9A99 /* HLSLTokenizer.cpp in Sources */,
-				702A2B7629A49DDE007D9A99 /* CodeWriter.cpp in Sources */,
-				702A2B7229A49DDE007D9A99 /* Main.cpp in Sources */,
-				702A2B7529A49DDE007D9A99 /* HLSLTree.cpp in Sources */,
-				702A2B7729A49DDE007D9A99 /* HLSLGenerator.cpp in Sources */,
-				702A2B7329A49DDE007D9A99 /* MSLGenerator.cpp in Sources */,
-				702A2B7929A49DDE007D9A99 /* Engine.cpp in Sources */,
-				702A2B7829A49DDE007D9A99 /* HLSLParser.cpp in Sources */,
+				70235C4E29B3145200909C95 /* HLSLTree.cpp in Sources */,
+				70235C5129B3145200909C95 /* Main.cpp in Sources */,
+				70235C5329B3145200909C95 /* HLSLTokenizer.cpp in Sources */,
+				70235C5029B3145200909C95 /* Engine.cpp in Sources */,
+				70235C4B29B3145200909C95 /* HLSLParser.cpp in Sources */,
+				70235C5229B3145200909C95 /* MSLGenerator.cpp in Sources */,
+				70235C4C29B3145200909C95 /* CodeWriter.cpp in Sources */,
+				70235C4F29B3145200909C95 /* GLSLGenerator.cpp in Sources */,
+				70235C4D29B3145200909C95 /* HLSLGenerator.cpp in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};

From 211f0bbfbfa255d83657fbb908da983b1d1d0715 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 4 Mar 2023 00:13:56 -0800
Subject: [PATCH 424/901] kram - more hlslparser cleanup

---
 hlslparser/ShaderHLSL.h                       | 209 -------------
 hlslparser/ShaderMSL.h                        | 275 ------------------
 .../hlslparser.xcodeproj/project.pbxproj      |  28 +-
 3 files changed, 12 insertions(+), 500 deletions(-)
 delete mode 100644 hlslparser/ShaderHLSL.h
 delete mode 100644 hlslparser/ShaderMSL.h

diff --git a/hlslparser/ShaderHLSL.h b/hlslparser/ShaderHLSL.h
deleted file mode 100644
index eeb83560..00000000
--- a/hlslparser/ShaderHLSL.h
+++ /dev/null
@@ -1,209 +0,0 @@
-#pragma once
-
-#define USE_HALF 1
-
-// Don't know why HLSL doesn't support these
-#define min3(x,y,z) min(x, min(y, z))
-#define max3(x,y,z) max(x, max(y, z))
-#define length_squared(x) ((x)*(x))
-
-struct Texture2DSampler {
-    Texture2D t;
-    SamplerState s;
-};
-
-// TODO: this only supports half on Texture2D
-#if USE_HALF
-// unique type, even though same data
-struct Texture2DHalfSampler {
-    Texture2D t;
-    SamplerState s;
-};
-
-#endif
-
-struct Texture3DSampler {
-    Texture3D t;
-    SamplerState s;
-};
-
-struct TextureCubeSampler {
-    TextureCube t;
-    SamplerState s;
-};
-
-struct Texture2DShadowSampler {
-    Texture2D t;
-    SamplerComparisonState s;
-};
-
-struct Texture2DArraySampler {
-    Texture2DArray t;
-    SamplerState s;
-};
-
-//------------------------------
-
-// https://colinbarrebrisebois.com/2021/11/01/working-around-constructors-in-hlsl-or-lack-thereof/
-// Stupid HLSL lacks ctors.  Ugh!
-// Can't simplify to "return { t, a };" either.
-
-Texture2DSampler Texture2DSamplerCtor(Texture2D t, SamplerState s)
-{
-    Texture2DSampler a = { t, s };
-    return a;
-}
-
-#if USE_HALF
-
-Texture2DHalfSampler Texture2DHalfSamplerCtor(Texture2D t, SamplerState s)
-{
-    Texture2DHalfSampler a = { t, s };
-    return a;
-}
-#endif
-
-Texture3DSampler Texture3DSamplerCtor(Texture3D t, SamplerState s)
-{
-    Texture3DSampler a = { t, s };
-    return a;
-}
-
-TextureCubeSampler TextureCubeSamplerCtor(TextureCube t, SamplerState s)
-{
-    TextureCubeSampler a = { t, s };
-    return a;
-}
-
-Texture2DShadowSampler Texture2DShadowSamplerCtor(Texture2D t, SamplerComparisonState s)
-{
-    Texture2DShadowSampler a = { t, s };
-    return a;
-}
-
-Texture2DArraySampler Texture2DArraySamplerCtor(Texture2DArray t, SamplerState s)
-{
-    Texture2DArraySampler a = { t, s };
-    return a;
-}
-
-//------------------------------
-
-float4 tex2D(Texture2DSampler ts, float2 texCoord) {
-    return ts.t.Sample(ts.s, texCoord);
-}
-
-// gather only works on mip0
-float4 tex2DgatherRed(Texture2DSampler ts, float2 texCoord, int2 offset=0) {
-    return ts.t.GatherRed(ts.s, texCoord, offset);
-}
-
-float4 tex2DgatherGreen(Texture2DSampler ts, float2 texCoord, int2 offset=0) {
-    return ts.t.GatherGreen(ts.s, texCoord, offset);
-}
-
-float4 tex2DgatherBlue(Texture2DSampler ts, float2 texCoord, int2 offset=0) {
-    return ts.t.GatherBlue(ts.s, texCoord, offset);
-}
-
-float4 tex2DgatherAlpha(Texture2DSampler ts, float2 texCoord, int2 offset=0) {
-    return ts.t.GatherAlpha(ts.s, texCoord, offset);
-}
-
-int2 tex2Dsize(Texture2DSampler ts) {
-    int2 size;
-    ts.t.GetDimensions(size.x, size.y);
-    return size;
-}
-
-int3 tex3Dsize(Texture3DSampler ts) {
-    int3 size;
-    ts.t.GetDimensions(size.x, size.y, size.z);
-    return size;
-}
-
-int texCUBEsize(TextureCubeSampler ts) {
-    int size;
-    ts.t.GetDimensions(size, size); // sizexsize
-    return size;
-}
-
-// don't use for PCF
-float4 tex2Dproj(Texture2DSampler ts, float4 texCoord) {
-    return ts.t.Sample(ts.s, texCoord.xy / texCoord.w);
-}
-
-float4 tex2Dlod(Texture2DSampler ts, float4 texCoord, int2 offset = 0) {
-    return ts.t.SampleLevel(ts.s, texCoord.xy, texCoord.w, offset);
-}
-
-float4 tex2Dbias(Texture2DSampler ts, float4 texCoord) {
-    return ts.t.SampleBias(ts.s, texCoord.xy, texCoord.w);
-}
-
-float4 tex2Dgrad(Texture2DSampler ts, float2 texCoord, float2 gradx, float2 grady) {
-   return ts.t.SampleGrad(ts.s, texCoord.xy, gradx, grady);
-}
-
-// This only applies to mip0
-float4 tex2Dcmp(Texture2DShadowSampler ts, float4 texCoord) {
-    return ts.t.SampleCmpLevelZero(ts.s, texCoord.xy, texCoord.z);
-}
-
-// This is int3 returning int2 in HLSL
-//float4 tex2Dfetch(Texture2DSampler ts, int2 texCoord) {
-//    return ts.t.Load(texCoord);
-//}
-
-float4 tex2DMSfetch(Texture2DMS<float4> t, int2 texCoord, int sample) {
-    return t.Load(texCoord, sample);
-}
-
-#if USE_HALF
-
-half4 tex2DH(Texture2DHalfSampler ts, float2 texCoord) {
-    return (half4)ts.t.Sample(ts.s, texCoord);
-}
-
-half4 tex2DHlod(Texture2DHalfSampler ts, float4 texCoordMip) {
-    return (half4)ts.t.Sample(ts.s, texCoordMip.xy, texCoordMip.w);
-}
-
-half4 tex2DHbias(Texture2DHalfSampler ts, float4 texCoordBias) {
-    return (half4)ts.t.SampleBias(ts.s, texCoordBias.xy, texCoordBias.w);
-}
-
-#else
-
-#define tex2DH tex2D
-#define tex2DHlod tex2Dlod
-#define tex2DHbias tex2Dbias
-
-#endif
-
-float4 tex3D(Texture3DSampler ts, float3 texCoord) {
-    return ts.t.Sample(ts.s, texCoord);
-}
-
-float4 tex3Dlod(Texture3DSampler ts, float4 texCoord) {
-    return ts.t.SampleLevel(ts.s, texCoord.xyz, texCoord.w);
-}
-
-float4 texCUBE(TextureCubeSampler ts, float3 texCoord) {
-    return ts.t.Sample(ts.s, texCoord);
-}
-
-float4 texCUBElod(TextureCubeSampler ts, float4 texCoord) {
-    return ts.t.SampleLevel(ts.s, texCoord.xyz, texCoord.w);
-}
-
-float4 texCUBEbias(TextureCubeSampler ts, float4 texCoord) {
-    return ts.t.SampleBias(ts.s, texCoord.xyz, texCoord.w);
-}
-
-// TODO: fix this
-//float4 tex2DArray(Texture2DArraySampler ts, float3 texCoord) {
-//    return ts.t.Sample(ts.s, texCoord.xy, texCoord.z + 0.5); // 0.5 offset needed on nvidia gpus
-//}
-
-    
diff --git a/hlslparser/ShaderMSL.h b/hlslparser/ShaderMSL.h
deleted file mode 100644
index ea3daa9f..00000000
--- a/hlslparser/ShaderMSL.h
+++ /dev/null
@@ -1,275 +0,0 @@
-#pragma once
-
-// TODO: support function_constants in MSL, is there HLSL equivalent yet
-// [[function_constant(index)]]
-//
-// Here's someone trying to solve for HLSL by modding DXIL from DXC
-// https://twitter.com/RandomPedroJ/status/1532725156623286272
-// Vulkan doesn't run on older Intel, but DX12 does.
-
-// Header can be pulled into regular code to build
-// Taken from KramShaders.h
-
-#ifndef __METAL_VERSION__
-#import <Foundation/Foundation.h>
-#else
-#include <metal_stdlib>
-#endif
-
-#import <simd/simd.h>
-
-#ifdef __METAL_VERSION__
-#define NS_ENUM(_type, _name) \
-    enum _name : _type _name; \
-    enum _name : _type
-#endif
-
-typedef NS_ENUM(int32_t, VA) {
-    VAPosition = 0,
-    VATexcoord = 1,
-    
-    VANormal = 2,
-    VATangent = 3,
-    
-    VABlendIndices = 4,
-    VABlendWeight = 5,
-    
-    VAColor0 = 6,
-    
-    VATexcoord0 = 8,
-    VATexcoord1 = 9,
-    VATexcoord2 = 10,
-    VATexcoord3 = 11,
-};
-
-// May want to only do using in the .metal files themselvs.
-using namespace metal;
-using namespace simd;
-
-// can safely use half on Metal
-#define USE_HALF 1
-
-// #define mad precise::fma"
-    
-float mad(float a, float b, float c) {
-    return a * b + c;
-}
-float2 mad(float2 a, float2 b, float2 c) {
-    return a * b + c;
-}
-float3 mad(float3 a, float3 b, float3 c) {
-    return a * b + c;
-}
-float4 mad(float4 a, float4 b, float4 c) {
-    return a * b + c;
-}
-
-float2 mul(float2 a, float2x2 m) { return a * m; }
-float3 mul(float3 a, float3x3 m) { return a * m; }
-float4 mul(float4 a, float4x4 m) { return a * m; }
-
-float2 mul(float2x2 m, float2 a) { return m * a; }
-float3 mul(float3x3 m, float3 a) { return m * a; }
-float4 mul(float4x4 m, float4 a) { return m * a; }
-
-float3 mul(float4 a, float3x4 m) { return a * m; } // why no macro ?
-float2 mul(float4 a, float2x4 m) { return a * m; }
-
-#if USE_HALF
-half mad(half a, half b, half c) {
-    return a * b + c;
-}
-half2 mad(half2 a, half2 b, half2 c) {
-    return a * b + c;
-}
-half3 mad(half3 a, half3 b, half3 c) {
-    return a * b + c;
-}
-half4 mad(half4 a, half4 b, half4 c) {
-    return a * b + c;
-}
-
-half2 mul(half2 a, half2x2 m) { return a * m; }
-half3 mul(half3 a, half3x3 m) { return a * m; }
-half4 mul(half4 a, half4x4 m) { return a * m; }
-
-half2 mul(half2x2 m, half2 a) { return m * a; }
-half3 mul(half3x3 m, half3 a) { return m * a; }
-half4 mul(half4x4 m, half4 a) { return m * a; }
-
-half3 mul(half4 a, half3x4 m) { return a * m; } // why no macro ?
-half2 mul(half4 a, half2x4 m) { return a * m; }
-#endif
-
-#define lerp mix
-#define rcp recip
-#define ddx dfdx
-#define ddy dfdy
-#define frac fract
-#define isinfinite isinf
-
-void clip(float x) {
-    if (x < 0.0) discard_fragment();
-}
-
-struct Texture2DSampler {
-    Texture2DSampler(thread const texture2d<float>& t, thread const sampler& s) : t(t), s(s) {};
-    const thread texture2d<float>& t;
-    const thread sampler& s;
-};
-
-#if USE_HALF
-struct Texture2DHalfSampler {
-    Texture2DHalfSampler(thread const texture2d<half>& t, thread const sampler& s) : t(t), s(s) {};
-    const thread texture2d<half>& t;
-    const thread sampler& s;
-};
-#endif
-
-struct Texture3DSampler {
-    Texture3DSampler(thread const texture3d<float>& t, thread const sampler& s) : t(t), s(s) {};
-    const thread texture3d<float>& t;
-    const thread sampler& s;
-};
-
-struct TextureCubeSampler {
-    TextureCubeSampler(thread const texturecube<float>& t, thread const sampler& s) : t(t), s(s) {};
-    const thread texturecube<float>& t;
-    const thread sampler& s;
-};
-
-struct Texture2DShadowSampler {
-    Texture2DShadowSampler(thread const depth2d<float>& t, thread const sampler& s) : t(t), s(s) {};
-    const thread depth2d<float>& t;
-    const thread sampler& s;
-};
-
-struct Texture2DArraySampler {
-    const thread texture2d_array<float>& t;
-    const thread sampler& s;
-    Texture2DArraySampler(thread const texture2d_array<float>& t, thread const sampler& s) : t(t), s(s) {};
-};
-    
-int2 tex2Dsize(Texture2DSampler ts) {
-    int2 size(ts.t.get_width(), ts.t.get_height());
-    return size;
-}
-
-
-int3 tex3Dsize(Texture3DSampler ts) {
-    int3 size(ts.t.get_width(), ts.t.get_height(), ts.t.get_depth());
-    return size;
-}
-
-int texCUBEsize(TextureCubeSampler ts) {
-    int size(ts.t.get_width());
-    return size;
-}
-    
-    
-float4 tex2D(Texture2DSampler ts, float2 texCoord) {
-    return ts.t.sample(ts.s, texCoord);
-}
-
-// don't use for PCF
-float4 tex2Dproj(Texture2DSampler ts, float4 texCoord) {
-    return ts.t.sample(ts.s, texCoord.xy / texCoord.w);
-}
-
-// gather only works on mip0
-float4 tex2DgatherRed(Texture2DSampler ts, float2 texCoord, int2 offset=0) {
-    return ts.t.gather(ts.s, texCoord, offset, component::x); // TODO: int to component
-}
-  
-float4 tex2DgatherGreen(Texture2DSampler ts, float2 texCoord,  int2 offset=0) {
-    return ts.t.gather(ts.s, texCoord, offset, component::y); // TODO: int to component
-}
-
-float4 tex2DgatherBlue(Texture2DSampler ts, float2 texCoord,  int2 offset=0) {
-    return ts.t.gather(ts.s, texCoord, offset, component::z); // TODO: int to component
-}
-
-float4 tex2DgatherAlpha(Texture2DSampler ts, float2 texCoord, int2 offset=0) {
-    return ts.t.gather(ts.s, texCoord, offset, component::w); // TODO: int to component
-}
-
-float4 tex2Dlod(Texture2DSampler ts, float4 texCoordMip) {
-    return ts.t.sample(ts.s, texCoordMip.xy, level(texCoordMip.w));
-}
-
-float4 tex2Dgrad(Texture2DSampler ts, float2 texCoord, float2 gradx, float2 grady) {
-   return ts.t.sample(ts.s, texCoord.xy, gradient2d(gradx, grady));
-}
-
-float4 tex2Dbias(Texture2DSampler ts, float4 texCoordBias) {
-    return ts.t.sample(ts.s, texCoordBias.xy, bias(texCoordBias.w));
-}
-
-float4 tex2Dfetch(Texture2DSampler ts, int2 texCoord) {
-    return ts.t.read((uint2)texCoord);
-}
-
-#if USE_HALF
-
-// use samper2D<half> to specify these
-
-half4 tex2DH(Texture2DHalfSampler ts, float2 texCoord) {
-    return ts.t.sample(ts.s, texCoord);
-}
-
-half4 tex2DHlod(Texture2DHalfSampler ts, float4 texCoordMip) {
-    return ts.t.sample(ts.s, texCoordMip.xy, level(texCoordMip.w));
-}
-
-half4 tex2DHbias(Texture2DHalfSampler ts, float4 texCoordBias) {
-    return ts.t.sample(ts.s, texCoordBias.xy, bias(texCoordBias.w));
-}
-
-#else
-
-#define tex2DH tex2D
-#define tex2DHlod tex2Dlod
-#define tex2DHbias tex2Dbias
-
-#endif
-
-float4 tex3D(Texture3DSampler ts, float3 texCoord) {
-    return ts.t.sample(ts.s, texCoord);
-}
-
-
-float4 tex3Dlod(Texture3DSampler ts, float4 texCoordMip) {
-    return ts.t.sample(ts.s, texCoordMip.xyz, level(texCoordMip.w));
-}
-
-float4 texCUBE(TextureCubeSampler ts, float3 texCoord) {
-    return ts.t.sample(ts.s, texCoord);
-}
-
-float4 texCUBElod(TextureCubeSampler ts, float4 texCoordMip) {
-    return ts.t.sample(ts.s, texCoordMip.xyz, level(texCoordMip.w));
-}
-
-float4 texCUBEbias(TextureCubeSampler ts, float4 texCoordBias) {
-    return ts.t.sample(ts.s, texCoordBias.xyz, bias(texCoordBias.w));
-}
-    
-// iOS may need shadow sampler inline
-//    float4 tex2Dcmp(Texture2DShadowSampler ts, float4 texCoordCompare) {
-//        constexpr sampler shadow_constant_sampler(mip_filter::none, min_filter::linear, mag_filter::linear, address::clamp_to_edge, compare_func::less);"
-//        return ts.t.sample_compare(shadow_constant_sampler, texCoordCompare.xy, texCoordCompare.z);
-//    }
-
-
-float4 tex2Dcmp(Texture2DShadowSampler ts, float4 texCoordCompare) {
-    return ts.t.sample_compare(ts.s, texCoordCompare.xy, texCoordCompare.z);
-}
-
-float4 tex2DMSfetch(texture2d_ms<float> t, int2 texCoord, int sample) {
-    return t.read((uint2)texCoord, (uint)sample);
-}
-
-float4 tex2DArray(Texture2DArraySampler ts, float3 texCoord) {
-    return ts.t.sample(ts.s, texCoord.xy, texCoord.z + 0.5); // 0.5 offset needed on nvidia gpus
-}
-        
diff --git a/hlslparser/hlslparser.xcodeproj/project.pbxproj b/hlslparser/hlslparser.xcodeproj/project.pbxproj
index d82d4283..c058feb3 100644
--- a/hlslparser/hlslparser.xcodeproj/project.pbxproj
+++ b/hlslparser/hlslparser.xcodeproj/project.pbxproj
@@ -11,7 +11,6 @@
 		70235C4C29B3145200909C95 /* CodeWriter.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70235C3C29B3145200909C95 /* CodeWriter.cpp */; };
 		70235C4D29B3145200909C95 /* HLSLGenerator.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70235C3F29B3145200909C95 /* HLSLGenerator.cpp */; };
 		70235C4E29B3145200909C95 /* HLSLTree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70235C4029B3145200909C95 /* HLSLTree.cpp */; };
-		70235C4F29B3145200909C95 /* GLSLGenerator.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70235C4129B3145200909C95 /* GLSLGenerator.cpp */; };
 		70235C5029B3145200909C95 /* Engine.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70235C4329B3145200909C95 /* Engine.cpp */; };
 		70235C5129B3145200909C95 /* Main.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70235C4529B3145200909C95 /* Main.cpp */; };
 		70235C5229B3145200909C95 /* MSLGenerator.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70235C4629B3145200909C95 /* MSLGenerator.cpp */; };
@@ -36,11 +35,9 @@
 		70235C3A29B3145200909C95 /* Engine.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Engine.h; sourceTree = "<group>"; };
 		70235C3B29B3145200909C95 /* HLSLParser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = HLSLParser.cpp; sourceTree = "<group>"; };
 		70235C3C29B3145200909C95 /* CodeWriter.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = CodeWriter.cpp; sourceTree = "<group>"; };
-		70235C3D29B3145200909C95 /* GLSLGenerator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = GLSLGenerator.h; sourceTree = "<group>"; };
 		70235C3E29B3145200909C95 /* HLSLTokenizer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = HLSLTokenizer.h; sourceTree = "<group>"; };
 		70235C3F29B3145200909C95 /* HLSLGenerator.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = HLSLGenerator.cpp; sourceTree = "<group>"; };
 		70235C4029B3145200909C95 /* HLSLTree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = HLSLTree.cpp; sourceTree = "<group>"; };
-		70235C4129B3145200909C95 /* GLSLGenerator.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = GLSLGenerator.cpp; sourceTree = "<group>"; };
 		70235C4229B3145200909C95 /* HLSLParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = HLSLParser.h; sourceTree = "<group>"; };
 		70235C4329B3145200909C95 /* Engine.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Engine.cpp; sourceTree = "<group>"; };
 		70235C4429B3145200909C95 /* CodeWriter.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CodeWriter.h; sourceTree = "<group>"; };
@@ -50,6 +47,7 @@
 		70235C4829B3145200909C95 /* HLSLTokenizer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = HLSLTokenizer.cpp; sourceTree = "<group>"; };
 		70235C4929B3145200909C95 /* HLSLGenerator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = HLSLGenerator.h; sourceTree = "<group>"; };
 		70235C4A29B3145200909C95 /* HLSLTree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = HLSLTree.h; sourceTree = "<group>"; };
+		70235C5429B3260100909C95 /* README.md */ = {isa = PBXFileReference; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = "<group>"; };
 		702A2B5929A49DC8007D9A99 /* hlslparser */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = hlslparser; sourceTree = BUILT_PRODUCTS_DIR; };
 		702A2B7E29AA942D007D9A99 /* Skinning.hlsl */ = {isa = PBXFileReference; lastKnownFileType = text; path = Skinning.hlsl; sourceTree = "<group>"; };
 		702A2B8329AC25BA007D9A99 /* ShaderMSL.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = ShaderMSL.h; sourceTree = "<group>"; };
@@ -70,23 +68,21 @@
 		70235C3929B3145200909C95 /* src */ = {
 			isa = PBXGroup;
 			children = (
-				70235C3A29B3145200909C95 /* Engine.h */,
-				70235C3B29B3145200909C95 /* HLSLParser.cpp */,
+				70235C4429B3145200909C95 /* CodeWriter.h */,
 				70235C3C29B3145200909C95 /* CodeWriter.cpp */,
-				70235C3D29B3145200909C95 /* GLSLGenerator.h */,
-				70235C3E29B3145200909C95 /* HLSLTokenizer.h */,
-				70235C3F29B3145200909C95 /* HLSLGenerator.cpp */,
+				70235C3A29B3145200909C95 /* Engine.h */,
+				70235C4329B3145200909C95 /* Engine.cpp */,
+				70235C4A29B3145200909C95 /* HLSLTree.h */,
 				70235C4029B3145200909C95 /* HLSLTree.cpp */,
-				70235C4129B3145200909C95 /* GLSLGenerator.cpp */,
 				70235C4229B3145200909C95 /* HLSLParser.h */,
-				70235C4329B3145200909C95 /* Engine.cpp */,
-				70235C4429B3145200909C95 /* CodeWriter.h */,
-				70235C4529B3145200909C95 /* Main.cpp */,
-				70235C4629B3145200909C95 /* MSLGenerator.cpp */,
-				70235C4729B3145200909C95 /* MSLGenerator.h */,
+				70235C3B29B3145200909C95 /* HLSLParser.cpp */,
+				70235C3E29B3145200909C95 /* HLSLTokenizer.h */,
 				70235C4829B3145200909C95 /* HLSLTokenizer.cpp */,
 				70235C4929B3145200909C95 /* HLSLGenerator.h */,
-				70235C4A29B3145200909C95 /* HLSLTree.h */,
+				70235C3F29B3145200909C95 /* HLSLGenerator.cpp */,
+				70235C4729B3145200909C95 /* MSLGenerator.h */,
+				70235C4629B3145200909C95 /* MSLGenerator.cpp */,
+				70235C4529B3145200909C95 /* Main.cpp */,
 			);
 			path = src;
 			sourceTree = "<group>";
@@ -95,6 +91,7 @@
 			isa = PBXGroup;
 			children = (
 				70235C3729B303AA00909C95 /* buildShaders.sh */,
+				70235C5429B3260100909C95 /* README.md */,
 				70235C3829B3041E00909C95 /* out */,
 				70235C3929B3145200909C95 /* src */,
 				702A2B7B29AA942D007D9A99 /* shaders */,
@@ -184,7 +181,6 @@
 				70235C4B29B3145200909C95 /* HLSLParser.cpp in Sources */,
 				70235C5229B3145200909C95 /* MSLGenerator.cpp in Sources */,
 				70235C4C29B3145200909C95 /* CodeWriter.cpp in Sources */,
-				70235C4F29B3145200909C95 /* GLSLGenerator.cpp in Sources */,
 				70235C4D29B3145200909C95 /* HLSLGenerator.cpp in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;

From 3380e5844fa90c6ae5853fc57550f725c8c8fc21 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 4 Mar 2023 15:32:16 -0800
Subject: [PATCH 425/901] kram - hlsl code cleanup, make work with glslc

glslc and dxc are competing compilers for HLSL in the Vulkan SDK.  Make sure code works with either.
Simplify the code to mostly a single array of type data.  This avoids a ton of code switch statements that can get out of sync.
Update readme with more info.  It needs more work.
---
 hlslparser/README.md             |  93 ++++++
 hlslparser/buildShaders.sh       |  31 +-
 hlslparser/shaders/ShaderHLSL.h  |   7 +-
 hlslparser/shaders/ShaderMSL.h   |   8 +-
 hlslparser/src/CodeWriter.cpp    |   6 +-
 hlslparser/src/CodeWriter.h      |   4 +-
 hlslparser/src/Engine.h          |   5 +-
 hlslparser/src/HLSLGenerator.cpp | 403 +++---------------------
 hlslparser/src/HLSLGenerator.h   |  43 +--
 hlslparser/src/HLSLParser.cpp    | 317 +++++++++++++------
 hlslparser/src/HLSLParser.h      |  40 ++-
 hlslparser/src/HLSLTokenizer.cpp |  12 +-
 hlslparser/src/HLSLTokenizer.h   |   6 +-
 hlslparser/src/HLSLTree.cpp      | 107 +------
 hlslparser/src/HLSLTree.h        | 108 ++-----
 hlslparser/src/MSLGenerator.cpp  | 520 ++-----------------------------
 hlslparser/src/MSLGenerator.h    |   4 +-
 17 files changed, 502 insertions(+), 1212 deletions(-)

diff --git a/hlslparser/README.md b/hlslparser/README.md
index 391d2f13..abceb86a 100644
--- a/hlslparser/README.md
+++ b/hlslparser/README.md
@@ -1,6 +1,99 @@
 HLSLParser
 ==========
 
+This version of uknown worlds and thekla/hlslparser takes a HLSL-like syntax and then converts that into modern HLSL and MSL.  Special thanks to Max McGuire (@Unknown Worlds) and Ignacio Castano and Johnathan Blow (@Thelka) for releasing this as open-source.  I've left out GLSL compilation, and legacy DX9 HLSL codegen to simplify maintaining the codebase.
+
+Overview
+---
+HLSLTokenizer - produces HLSLTree
+HLSLParser - produces HLSLTree
+HLSLTree - AST tree of the source HLSL
+
+MSLGenerator - convert HLSLTree to MSL
+HLSLGenerator - to a DX10-style HLSL
+GLSLGenerator - unsupported
+
+Engine - some string and log helpers
+
+ShaderMSL.h - macros/functions to translate to MSL
+ShaderHLSL.h - to HLSL
+
+buildShaders.sh - build hlslparser first, and update ShaderMSL/HLSL.h files, then run this script.   It will run hlslparer to generate outputs, and then run that through DXC and the Metal compiler.  On macOS, install VulkanSDK as a faster path to DXC.
+
+About Shader Models
+---
+HLSL 6.2 includes full half and int support.   So that is the compilation target.  But note the table below before adopting half in shaders.  Nvidia/AMD tried to phase out half support on the desktop, but iOS re-popularized half usage.  Android has many dragons using half (see below)
+
+Platforms - iOS, Adreno,  Mali, Nvida, AMD
+| Feature        | I | A | M | N | A |
+|----------------|---|---|---|---|---|
+| Half Interp    | y | n | y | n | y |
+| Half UBO       | y | n | y | n | y | 
+| Half Push      | y | y | y | y | n |
+| Half ALU       | y | y | y | y | y |
+
+https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_16bit_storage.html
+
+StorageBuffer16BitAccess
+UniformAndStorageBuffer16BitAccess
+StoragePushConstant16
+StorageInputOutput16
+
+There is also the limitation of half interpolation creating banding, and likely why Adreno/Nvidia do not support StorageInputOutput16.  Mali recommends using half to minimize parameter buffer storage out of the vertex shader, but then declaring float for the same variables in the fragment shader.  This limits sharing input/output structs.
+
+Android Gotchas
+---
+
+Mali cannot write SSBO in the vertex shader, but can in the fragment shader.  Only Vulkan supports readonly SSBO so no ES support.
+Mali sparse index buffer limits 
+  https://community.arm.com/support-forums/f/graphics-gaming-and-vr-forum/53672/vulkan-what-should-i-do-about-this-warning-bestpractices-vkcmddrawindexed-sparse-index-buffer
+Mali 180MB parameter buffer limit - device lost after exceeded
+Mali missing fillAsLines - affects debug visuals
+Mali is missing firstInstance to use MDI and SBO
+
+Adreno
+Occlusion queries can cause a switch from TBDR to IMR
+
+iOS 
+Has no SamplerMinmax 
+ A9 - limited MultiDrawInidirect that can only draw 1 element per draw
+      cpu ICB
+A11 - gpu ICB
+A13 - Argument Buffers indirection
+
+Shader Variants - it's good to define which variants of shaders to generate.  Can use static and dynamic branching to reduce variant count.  Can lead to requiring shader source if can't predefine variant count.
+
+Specialization Constants - allow variants to be generated within a single shader.  Spriv is marked and compiled based on these settings.  Metal has equivalent function constants
+
+Unlike GLSL, HLSL and MSL don't have an extension mechansims.  These shader models update in revisions. This is good and bad, but in general it simplfies picking the target for shader codegen.   These 
+
+HLSL
+missing specialization constants - Vulkan adds these in for Spirv, but it's not valid HLSL for DX12
+
+SM 6.2 adds back half and short support
+
+MSL
+SM 2.1
+SM 2.2
+SM 2.3
+SM 2.4
+
+Vulkan
+SM spv1.1
+SM spv1.3
+SM spv1.5
+
+HLSL 
+SM 6.2 - added back int/half support
+  
+TODO:
+compute shader support
+preprocessed variants
+fix shader input names
+handle reflection (spirv-reflect?)
+
+---------------------------------
+
 This is a fork of [Unknownworld's hlslparser](https://github.com/unknownworlds/hlslparser) adapted to our needs in [The Witness](http://the-witness.net). We currently use it to translate pseudo-HLSL shaders (using the legacy D3D9 syntax) to HLSL10 and Metal Shading Language (MSL). There's also a GLSL translator available that we do not use yet, but that is being maintained by community contributions.
 
 The HLSL parser has been extended with many HLSL10 features, but retaining the original HLSL C-based syntax.
diff --git a/hlslparser/buildShaders.sh b/hlslparser/buildShaders.sh
index e7a8231d..ee487ef6 100755
--- a/hlslparser/buildShaders.sh
+++ b/hlslparser/buildShaders.sh
@@ -5,7 +5,7 @@ mkdir -p out
 pushd out
 
 # display commands
-set -x
+# set -x
 
 app=../build/hlslparser/Build/Products/Release/hlslparser
 
@@ -63,10 +63,11 @@ psargs+="-T ps_6_2 "
 # see this garbage here.  Can only sign dxil on Windows.
 # dxc only loads DXIL.dll on Windows
 #  https://www.wihlidal.com/blog/pipeline/2018-09-16-dxil-signing-post-compile/
-echo compile HLSL
+echo gen DXIL with dxc
 dxc ${vsargs} -E SkinningVS -Fo SkinningVS.dxil Skinning.hlsl
 dxc ${psargs} -E SkinningPS -Fo SkinningPS.dxil Skinning.hlsl
 
+# 1.0,1.1,1.2 is spv1.1,1.3,1.5
 #echo gen SPIRV 1.0
 #dxc ${vsargs} -spirv -fspv-target-env=vulkan1.0 -E SkinningVS -Fo SkinningVS.vert.spv Skinning.hlsl
 #dxc ${psargs} -spirv -fspv-target-env=vulkan1.0 -E SkinningPS -Fo SkinningPS.frag.spv Skinning.hlsl
@@ -75,10 +76,31 @@ dxc ${psargs} -E SkinningPS -Fo SkinningPS.dxil Skinning.hlsl
 #dxc ${vsargs} -spirv -fspv-target-env=vulkan1.1 -E SkinningVS -Fo SkinningVS.vert.spv1 Skinning.hlsl
 #dxc ${psargs} -spirv -fspv-target-env=vulkan1.1 -E SkinningPS -Fo SkinningPS.frag.spv1 Skinning.hlsl
 
-echo gen SPIRV 1.2
+echo gen SPIRV 1.2 with dxc
 dxc ${vsargs} -spirv -fspv-target-env=vulkan1.2 -E SkinningVS -Fo SkinningVS.vert.spv2 Skinning.hlsl
 dxc ${psargs} -spirv -fspv-target-env=vulkan1.2 -E SkinningPS -Fo SkinningPS.frag.spv2 Skinning.hlsl
 
+vsargs="-Os -fshader-stage=vert --target-env=vulkan1.2 "
+psargs="-Os -fshader-stage=vert --target-env=vulkan1.2 "
+
+# see SPV_GOOGLE_hlsl_functionality1
+# -fhlsl_functionality1
+# -g source level debugging info
+# -I include search path
+
+# note: glsl has a preprocesor
+echo gen SPRIV 1.2 with glslc
+glslc ${vsargs} -fentry-point=SkinningVS Skinning.hlsl -o SkinningVS.vert.gspv2
+glslc ${psargs} -fentry-point=SkinningPS Skinning.hlsl -o SkinningPS.frag.gspv2
+
+# TODO: need to enable half (float16_t) usage in spriv generated shaders
+# how to identify compliation is targeting Vulkan?
+
+# barely human readable spv assembly listing
+glslc -S ${vsargs} -fentry-point=SkinningVS Skinning.hlsl -o SkinningVS.vert.gspv2.txt
+glslc -S ${vsargs} -fentry-point=SkinningPS Skinning.hlsl -o SkinningPS.frag.gspv2.txt
+
+
 # TODO: need to group files into library/module
 # also create a readable spv file, so can look through that
 
@@ -111,4 +133,5 @@ dxc ${psargs} -spirv -fspv-target-env=vulkan1.2 -E SkinningPS -Fo SkinningPS.fra
 # -enable-16bit-types     Enable 16bit types and disable min precision types. Available in HLSL 2018 and shader model 6.2
 # -Fc <file>              Output assembly code listing file
 
-popd
+# this prints cwd if not redirected
+popd > /dev/null
diff --git a/hlslparser/shaders/ShaderHLSL.h b/hlslparser/shaders/ShaderHLSL.h
index eeb83560..f44f9d71 100644
--- a/hlslparser/shaders/ShaderHLSL.h
+++ b/hlslparser/shaders/ShaderHLSL.h
@@ -1,4 +1,8 @@
-#pragma once
+#ifndef ShaderHLSL_h
+#define ShaderHLSL_h
+
+// glslc doesn't support #pragma once
+//#pragma once
 
 #define USE_HALF 1
 
@@ -206,4 +210,5 @@ float4 texCUBEbias(TextureCubeSampler ts, float4 texCoord) {
 //    return ts.t.Sample(ts.s, texCoord.xy, texCoord.z + 0.5); // 0.5 offset needed on nvidia gpus
 //}
 
+#endif // ShaderHLSL_h
     
diff --git a/hlslparser/shaders/ShaderMSL.h b/hlslparser/shaders/ShaderMSL.h
index ea3daa9f..166145ab 100644
--- a/hlslparser/shaders/ShaderMSL.h
+++ b/hlslparser/shaders/ShaderMSL.h
@@ -1,4 +1,8 @@
-#pragma once
+#ifndef ShaderMSL_h
+#define ShaderMSL_h
+
+// glslc doesn't support #pragma once
+//#pragma once
 
 // TODO: support function_constants in MSL, is there HLSL equivalent yet
 // [[function_constant(index)]]
@@ -273,3 +277,5 @@ float4 tex2DArray(Texture2DArraySampler ts, float3 texCoord) {
     return ts.t.sample(ts.s, texCoord.xy, texCoord.z + 0.5); // 0.5 offset needed on nvidia gpus
 }
         
+#endif // ShaderMSL_h
+
diff --git a/hlslparser/src/CodeWriter.cpp b/hlslparser/src/CodeWriter.cpp
index a0783e0c..c030fdb4 100644
--- a/hlslparser/src/CodeWriter.cpp
+++ b/hlslparser/src/CodeWriter.cpp
@@ -7,8 +7,6 @@
 //
 //=============================================================================
 
-//#include "Engine/Assert.h"
-//#include "Engine/String.h"
 #include "Engine.h"
 
 #include "CodeWriter.h"
@@ -50,6 +48,8 @@ void CodeWriter::BeginLine(int indent, const char* fileName, int lineNumber)
         }
 
         /* TODO: Alec, removed this for now
+            This writes in #line directives back to orignal source files
+         
         if (outputLine || outputFile)
         {
             char buffer[256];
@@ -148,4 +148,4 @@ void CodeWriter::Reset()
     m_buffer.clear();
 }
 
-}
\ No newline at end of file
+}
diff --git a/hlslparser/src/CodeWriter.h b/hlslparser/src/CodeWriter.h
index b583e59c..074d7caa 100644
--- a/hlslparser/src/CodeWriter.h
+++ b/hlslparser/src/CodeWriter.h
@@ -7,8 +7,7 @@
 //
 //=============================================================================
 
-#ifndef CODE_WRITER_H
-#define CODE_WRITER_H
+#pragma once
 
 #include "Engine.h"
 #include <string>
@@ -58,4 +57,3 @@ class CodeWriter
 
 }
 
-#endif
diff --git a/hlslparser/src/Engine.h b/hlslparser/src/Engine.h
index e4bdc61d..d7cadce6 100755
--- a/hlslparser/src/Engine.h
+++ b/hlslparser/src/Engine.h
@@ -1,5 +1,4 @@
-#ifndef ENGINE_H
-#define ENGINE_H
+#pragma once
 
 #if _MSC_VER
 #define _CRT_SECURE_NO_WARNINGS 1
@@ -191,5 +190,3 @@ struct StringPool {
 
 
 } // M4 namespace
-
-#endif // ENGINE_H
diff --git a/hlslparser/src/HLSLGenerator.cpp b/hlslparser/src/HLSLGenerator.cpp
index 0182da5a..cfc8faec 100644
--- a/hlslparser/src/HLSLGenerator.cpp
+++ b/hlslparser/src/HLSLGenerator.cpp
@@ -7,11 +7,9 @@
 //
 //=============================================================================
 
-//#include "Engine/String.h"
-//#include "Engine/Log.h"
-#include "Engine.h"
-
 #include "HLSLGenerator.h"
+
+#include "Engine.h"
 #include "HLSLParser.h"
 #include "HLSLTree.h"
 
@@ -30,8 +28,8 @@ static const char* GetTypeName(const HLSLType& type)
 	case HLSLBaseType_Float2x2:     return "float2x2";
     case HLSLBaseType_Float3x3:     return "float3x3";
     case HLSLBaseType_Float4x4:     return "float4x4";
-    case HLSLBaseType_Float4x3:     return "float4x3";
-    case HLSLBaseType_Float4x2:     return "float4x2";
+    //case HLSLBaseType_Float4x3:     return "float4x3";
+    //case HLSLBaseType_Float4x2:     return "float4x2";
             
     // Half support, these were all returning float
     case HLSLBaseType_Half:        return "half";
@@ -41,8 +39,8 @@ static const char* GetTypeName(const HLSLType& type)
     case HLSLBaseType_Half2x2:     return "halfx2";
     case HLSLBaseType_Half3x3:     return "half3x3";
     case HLSLBaseType_Half4x4:     return "half4x4";
-    case HLSLBaseType_Half4x3:     return "half4x3";
-    case HLSLBaseType_Half4x2:     return "half4x2";
+    //case HLSLBaseType_Half4x3:     return "half4x3";
+    //case HLSLBaseType_Half4x2:     return "half4x2";
             
     case HLSLBaseType_Bool:         return "bool";
 	case HLSLBaseType_Bool2:        return "bool2";
@@ -115,35 +113,9 @@ HLSLGenerator::HLSLGenerator()
 {
     m_tree                          = NULL;
     m_entryName                     = NULL;
-    //m_legacy                        = false;
     m_target                        = Target_VertexShader;
     m_isInsideBuffer                = false;
     m_error                         = false;
-    
-    /*
-    m_textureSampler2DStruct[0]     = 0;
-    m_textureSampler2DCtor[0]       = 0;
-    m_textureSampler3DStruct[0]     = 0;
-    m_textureSampler3DCtor[0]       = 0;
-    m_textureSamplerCubeStruct[0]   = 0;
-    m_textureSamplerCubeCtor[0]     = 0;
-    m_tex2DFunction[0]              = 0;
-    m_tex2DProjFunction[0]          = 0;
-    m_tex2DLodFunction[0]           = 0;
-    m_tex2DBiasFunction[0]          = 0;
-    m_tex2DGradFunction[0]          = 0;
-    m_tex2DGatherFunction[0]        = 0;
-    m_tex2DSizeFunction[0]          = 0;
-    m_tex2DFetchFunction[0]         = 0;
-    m_tex2DCmpFunction[0]           = 0;
-    m_tex2DMSFetchFunction[0]       = 0;
-    m_tex3DFunction[0]              = 0;
-    m_tex3DLodFunction[0]           = 0;
-    m_tex3DBiasFunction[0]          = 0;
-    m_texCubeFunction[0]            = 0;
-    m_texCubeLodFunction[0]         = 0;
-    m_texCubeBiasFunction[0]        = 0;
-    */
 }
 
 
@@ -206,7 +178,6 @@ bool HLSLGenerator::Generate(HLSLTree* tree, Target target, const char* entryNam
     m_tree      = tree;
     m_entryName = entryName;
     m_target    = target;
-    //m_legacy    = legacy;
     m_isInsideBuffer = false;
 
     m_writer.Reset();
@@ -234,7 +205,6 @@ bool HLSLGenerator::Generate(HLSLTree* tree, Target target, const char* entryNam
     //EmulateAlphaTest(tree, entryName, 0.5f);
     FlattenExpressions(tree);
     
-    // TODO: see if can unify to Shader.h, and have it sub-include
     m_writer.WriteLine(0, "#include \"ShaderHLSL.h\"");
     
     // @@ Should we generate an entirely new copy of the tree so that we can modify it in place?
@@ -321,210 +291,6 @@ bool HLSLGenerator::Generate(HLSLTree* tree, Target target, const char* entryNam
             argument = argument->nextArgument;
         }
     }
-
-#if 0 // Removing all this for include
-/*
-    // TODO: don't do this, can just write these once
-    // as helpers.  And then reference them from the header.
-    // See MSLGenerator, for how it wraps only the user
-    // code in a struct to avoid renaming.
-    
-    ChooseUniqueName("TextureSampler2D",            m_textureSampler2DStruct,   sizeof(m_textureSampler2DStruct));
-    ChooseUniqueName("CreateTextureSampler2D",      m_textureSampler2DCtor,     sizeof(m_textureSampler2DCtor));
-    ChooseUniqueName("TextureSampler2DShadow",      m_textureSampler2DShadowStruct, sizeof(m_textureSampler2DShadowStruct));
-    ChooseUniqueName("CreateTextureSampler2DShadow",m_textureSampler2DShadowCtor,   sizeof(m_textureSampler2DShadowCtor));
-    ChooseUniqueName("TextureSampler3D",            m_textureSampler3DStruct,   sizeof(m_textureSampler3DStruct));
-    ChooseUniqueName("CreateTextureSampler3D",      m_textureSampler3DCtor,     sizeof(m_textureSampler3DCtor));
-    ChooseUniqueName("TextureSamplerCube",          m_textureSamplerCubeStruct, sizeof(m_textureSamplerCubeStruct));
-    ChooseUniqueName("CreateTextureSamplerCube",    m_textureSamplerCubeCtor,   sizeof(m_textureSamplerCubeCtor));
-    ChooseUniqueName("tex2D",                       m_tex2DFunction,            sizeof(m_tex2DFunction));
-    ChooseUniqueName("tex2Dproj",                   m_tex2DProjFunction,        sizeof(m_tex2DProjFunction));
-    ChooseUniqueName("tex2Dlod",                    m_tex2DLodFunction,         sizeof(m_tex2DLodFunction));
-    ChooseUniqueName("tex2Dbias",                   m_tex2DBiasFunction,        sizeof(m_tex2DBiasFunction));
-    ChooseUniqueName("tex2Dgrad",                   m_tex2DGradFunction,        sizeof(m_tex2DGradFunction));
-    ChooseUniqueName("tex2Dgather",                 m_tex2DGatherFunction,      sizeof(m_tex2DGatherFunction));
-    ChooseUniqueName("tex2Dsize",                   m_tex2DSizeFunction,        sizeof(m_tex2DSizeFunction));
-    ChooseUniqueName("tex2Dfetch",                  m_tex2DFetchFunction,       sizeof(m_tex2DFetchFunction));
-    ChooseUniqueName("tex2Dcmp",                    m_tex2DCmpFunction,         sizeof(m_tex2DCmpFunction));
-    ChooseUniqueName("tex2DMSfetch",                m_tex2DMSFetchFunction,     sizeof(m_tex2DMSFetchFunction));
-    ChooseUniqueName("tex2DMSsize",                 m_tex2DMSSizeFunction,      sizeof(m_tex2DMSSizeFunction));
-    ChooseUniqueName("tex3D",                       m_tex3DFunction,            sizeof(m_tex3DFunction));
-    ChooseUniqueName("tex3Dlod",                    m_tex3DLodFunction,         sizeof(m_tex3DLodFunction));
-    ChooseUniqueName("tex3Dbias",                   m_tex3DBiasFunction,        sizeof(m_tex3DBiasFunction));
-    ChooseUniqueName("tex3Dsize",                   m_tex3DSizeFunction,        sizeof(m_tex3DSizeFunction));
-    ChooseUniqueName("texCUBE",                     m_texCubeFunction,          sizeof(m_texCubeFunction));
-    ChooseUniqueName("texCUBElod",                  m_texCubeLodFunction,       sizeof(m_texCubeLodFunction));
-    ChooseUniqueName("texCUBEbias",                 m_texCubeBiasFunction,      sizeof(m_texCubeBiasFunction));
-    ChooseUniqueName("texCUBEsize",                 m_texCubeSizeFunction,      sizeof(m_texCubeSizeFunction));
-
-    //if (!m_legacy)
-    {
-        // @@ Only emit code for sampler types that are actually used?
-
-        m_writer.WriteLine(0, "struct %s {", m_textureSampler2DStruct);
-        m_writer.WriteLine(1, "Texture2D    t;");
-        m_writer.WriteLine(1, "SamplerState s;");
-        m_writer.WriteLine(0, "};");
-
-        m_writer.WriteLine(0, "struct %s {", m_textureSampler2DShadowStruct);
-        m_writer.WriteLine(1, "Texture2D                t;");
-        m_writer.WriteLine(1, "SamplerComparisonState   s;");
-        m_writer.WriteLine(0, "};");
-
-        m_writer.WriteLine(0, "struct %s {", m_textureSampler3DStruct);
-        m_writer.WriteLine(1, "Texture3D    t;");
-        m_writer.WriteLine(1, "SamplerState s;");
-        m_writer.WriteLine(0, "};");
-
-        m_writer.WriteLine(0, "struct %s {", m_textureSamplerCubeStruct);
-        m_writer.WriteLine(1, "TextureCube  t;");
-        m_writer.WriteLine(1, "SamplerState s;");
-        m_writer.WriteLine(0, "};");
-
-        m_writer.WriteLine(0, "%s %s(Texture2D t, SamplerState s) {", m_textureSampler2DStruct, m_textureSampler2DCtor);
-        m_writer.WriteLine(1, "%s ts;", m_textureSampler2DStruct);
-        m_writer.WriteLine(1, "ts.t = t; ts.s = s;");
-        m_writer.WriteLine(1, "return ts;");
-        m_writer.WriteLine(0, "}");
-
-        m_writer.WriteLine(0, "%s %s(Texture2D t, SamplerComparisonState s) {", m_textureSampler2DShadowStruct, m_textureSampler2DShadowCtor);
-        m_writer.WriteLine(1, "%s ts;", m_textureSampler2DShadowStruct);
-        m_writer.WriteLine(1, "ts.t = t; ts.s = s;");
-        m_writer.WriteLine(1, "return ts;");
-        m_writer.WriteLine(0, "}");
-
-        m_writer.WriteLine(0, "%s %s(Texture3D t, SamplerState s) {", m_textureSampler3DStruct, m_textureSampler3DCtor);
-        m_writer.WriteLine(1, "%s ts;", m_textureSampler3DStruct);
-        m_writer.WriteLine(1, "ts.t = t; ts.s = s;");
-        m_writer.WriteLine(1, "return ts;");
-        m_writer.WriteLine(0, "}");
-
-        m_writer.WriteLine(0, "%s %s(TextureCube t, SamplerState s) {", m_textureSamplerCubeStruct, m_textureSamplerCubeCtor);
-        m_writer.WriteLine(1, "%s ts;", m_textureSamplerCubeStruct);
-        m_writer.WriteLine(1, "ts.t = t; ts.s = s;");
-        m_writer.WriteLine(1, "return ts;");
-        m_writer.WriteLine(0, "}");
-        
-        if (m_tree->GetContainsString("tex2D")) 
-        {
-            m_writer.WriteLine(0, "float4 %s(%s ts, float2 texCoord) {", m_tex2DFunction, m_textureSampler2DStruct);
-            m_writer.WriteLine(1, "return ts.t.Sample(ts.s, texCoord);");
-            m_writer.WriteLine(0, "}");
-        }
-        if (m_tree->GetContainsString("tex2Dproj"))
-        {
-            m_writer.WriteLine(0, "float4 %s(%s ts, float4 texCoord) {", m_tex2DProjFunction, m_textureSampler2DStruct);
-            m_writer.WriteLine(1, "return ts.t.Sample(ts.s, texCoord.xy / texCoord.w);");
-            m_writer.WriteLine(0, "}");
-        }
-        if (m_tree->GetContainsString("tex2Dlod"))
-        {
-            m_writer.WriteLine(0, "float4 %s(%s ts, float4 texCoord, int2 offset=0) {", m_tex2DLodFunction, m_textureSampler2DStruct);
-            m_writer.WriteLine(1, "return ts.t.SampleLevel(ts.s, texCoord.xy, texCoord.w, offset);");
-            m_writer.WriteLine(0, "}");
-        }
-        if (m_tree->GetContainsString("tex2Dbias"))
-        {
-            m_writer.WriteLine(0, "float4 %s(%s ts, float4 texCoord) {", m_tex2DBiasFunction, m_textureSampler2DStruct);
-            m_writer.WriteLine(1, "return ts.t.SampleBias(ts.s, texCoord.xy, texCoord.w);");
-            m_writer.WriteLine(0, "}");
-        }
-        if (m_tree->GetContainsString("tex2Dgrad"))
-        {
-            m_writer.WriteLine(0, "float4 %s(%s ts, float2 texCoord, float2 ddx, float2 ddy) {", m_tex2DGradFunction, m_textureSampler2DStruct);
-            m_writer.WriteLine(1, "return ts.t.SampleGrad(ts.s, texCoord.xy, ddx, ddy);");
-            m_writer.WriteLine(0, "}");
-        }
-        if (m_tree->GetContainsString("tex2Dgather"))
-        {
-            m_writer.WriteLine(0, "float4 %s(%s ts, float2 texCoord, int component, int2 offset=0) {", m_tex2DGatherFunction, m_textureSampler2DStruct);
-            m_writer.WriteLine(1, "if(component == 0) return ts.t.GatherRed(ts.s, texCoord, offset);");
-            m_writer.WriteLine(1, "if(component == 1) return ts.t.GatherGreen(ts.s, texCoord, offset);");
-            m_writer.WriteLine(1, "if(component == 2) return ts.t.GatherBlue(ts.s, texCoord, offset);");
-            m_writer.WriteLine(1, "return ts.t.GatherAlpha(ts.s, texCoord, offset);");
-            m_writer.WriteLine(0, "}");
-        }
-        if (m_tree->GetContainsString("tex2Dsize"))
-        {
-            m_writer.WriteLine(0, "int2 %s(%s ts) {", m_tex2DSizeFunction, m_textureSampler2DStruct);
-            m_writer.WriteLine(1, "int2 size; ts.t.GetDimensions(size.x, size.y); return size;");
-            m_writer.WriteLine(0, "}");
-        }
-        if (m_tree->GetContainsString("tex2Dfetch"))
-        {
-            m_writer.WriteLine(0, "int2 %s(%s ts, int3 texCoord) {", m_tex2DFetchFunction, m_textureSampler2DStruct);
-            m_writer.WriteLine(1, "return ts.t.Load(texCoord.xyz);");
-            m_writer.WriteLine(0, "}");
-        }
-        if (m_tree->GetContainsString("tex2Dcmp"))
-        {
-            m_writer.WriteLine(0, "float4 %s(%s ts, float4 texCoord) {", m_tex2DCmpFunction, m_textureSampler2DShadowStruct);
-            m_writer.WriteLine(1, "return ts.t.SampleCmpLevelZero(ts.s, texCoord.xy, texCoord.z);");
-            m_writer.WriteLine(0, "}");
-        }
-        if (m_tree->GetContainsString("tex2DMSfetch"))
-        {
-            m_writer.WriteLine(0, "float4 %s(Texture2DMS<float4> t, int2 texCoord, int sample) {", m_tex2DMSFetchFunction);
-            m_writer.WriteLine(1, "return t.Load(texCoord, sample);");
-            m_writer.WriteLine(0, "}");
-        }
-        if (m_tree->GetContainsString("tex2DMSsize"))
-        {
-            m_writer.WriteLine(0, "int3 %s(Texture2DMS<float4> t) {", m_tex2DMSSizeFunction);
-            m_writer.WriteLine(1, "int3 size; t.GetDimensions(size.x, size.y, size.z); return size;");   // @@ Not tested, does this return the number of samples in the third argument?
-            m_writer.WriteLine(0, "}");
-        }
-        if (m_tree->GetContainsString("tex3D"))
-        {
-            m_writer.WriteLine(0, "float4 %s(%s ts, float3 texCoord) {", m_tex3DFunction, m_textureSampler3DStruct);
-            m_writer.WriteLine(1, "return ts.t.Sample(ts.s, texCoord);");
-            m_writer.WriteLine(0, "}");
-        }
-        if (m_tree->GetContainsString("tex3Dlod"))
-        {
-            m_writer.WriteLine(0, "float4 %s(%s ts, float4 texCoord) {", m_tex3DLodFunction, m_textureSampler3DStruct);
-            m_writer.WriteLine(1, "return ts.t.SampleLevel(ts.s, texCoord.xyz, texCoord.w);");
-            m_writer.WriteLine(0, "}");
-        }
-        if (m_tree->GetContainsString("tex3Dbias"))
-        {
-            m_writer.WriteLine(0, "float4 %s(%s ts, float4 texCoord) {", m_tex3DBiasFunction, m_textureSampler3DStruct);
-            m_writer.WriteLine(1, "return ts.t.SampleBias(ts.s, texCoord.xyz, texCoord.w);");
-            m_writer.WriteLine(0, "}");
-        }
-        if (m_tree->GetContainsString("tex3Dsize"))
-        {
-            m_writer.WriteLine(0, "int3 %s(%s ts) {", m_tex3DSizeFunction, m_textureSampler3DStruct);
-            m_writer.WriteLine(1, "int3 size; ts.t.GetDimensions(size.x, size.y, size.z); return size;");
-            m_writer.WriteLine(0, "}");
-        }
-        if (m_tree->GetContainsString("texCUBE"))
-        {
-            m_writer.WriteLine(0, "float4 %s(%s ts, float3 texCoord) {", m_texCubeFunction, m_textureSamplerCubeStruct);
-            m_writer.WriteLine(1, "return ts.t.Sample(ts.s, texCoord);");
-            m_writer.WriteLine(0, "}");
-        }
-        if (m_tree->GetContainsString("texCUBElod"))
-        {
-            m_writer.WriteLine(0, "float4 %s(%s ts, float4 texCoord) {", m_texCubeLodFunction, m_textureSamplerCubeStruct);
-            m_writer.WriteLine(1, "return ts.t.SampleLevel(ts.s, texCoord.xyz, texCoord.w);");
-            m_writer.WriteLine(0, "}");
-        }
-        if (m_tree->GetContainsString("texCUBEbias"))
-        {
-            m_writer.WriteLine(0, "float4 %s(%s ts, float4 texCoord) {", m_texCubeBiasFunction, m_textureSamplerCubeStruct);
-            m_writer.WriteLine(1, "return ts.t.SampleBias(ts.s, texCoord.xyz, texCoord.w);");
-            m_writer.WriteLine(0, "}");
-        }
-        if (m_tree->GetContainsString("texCUBEsize"))
-        {
-            m_writer.WriteLine(0, "int %s(%s ts) {", m_texCubeSizeFunction, m_textureSamplerCubeStruct);
-            m_writer.WriteLine(1, "int size; ts.t.GetDimensions(size); return size;");   // @@ Not tested, does this return a single value?
-            m_writer.WriteLine(0, "}");
-        }
-    }
-*/
-#endif
     
     HLSLRoot* root = m_tree->GetRoot();
     OutputStatements(0, root->statement);
@@ -561,8 +327,7 @@ void HLSLGenerator::OutputExpression(HLSLExpression* expression)
     {
         HLSLIdentifierExpression* identifierExpression = static_cast<HLSLIdentifierExpression*>(expression);
         const char* name = identifierExpression->name;
-        if (//!m_legacy &&
-            IsSamplerType(identifierExpression->expressionType) && identifierExpression->global)
+        if (IsSamplerType(identifierExpression->expressionType) && identifierExpression->global)
         {
             // @@ Handle generic sampler type.
 
@@ -757,89 +522,6 @@ void HLSLGenerator::OutputExpression(HLSLExpression* expression)
     {
         HLSLFunctionCall* functionCall = static_cast<HLSLFunctionCall*>(expression);
         const char* name = functionCall->function->name;
-#if 0
-/* Don't need thie remapping, it will just call the function as is
-        //if (!m_legacy)
-        {
-            if (String_Equal(name, "tex2D"))
-            {
-                name = m_tex2DFunction;
-            }
-            else if (String_Equal(name, "tex2Dproj"))
-            {
-                name = m_tex2DProjFunction;
-            }
-            else if (String_Equal(name, "tex2Dlod"))
-            {
-                name = m_tex2DLodFunction;
-            }
-            else if (String_Equal(name, "tex2Dbias"))
-            {
-                name = m_tex2DBiasFunction;
-            }
-            else if (String_Equal(name, "tex2Dgrad"))
-            {
-                name = m_tex2DGradFunction;
-            }
-            else if (String_Equal(name, "tex2Dgather"))
-            {
-                name = m_tex2DGatherFunction;
-            }
-            else if (String_Equal(name, "tex2Dsize"))
-            {
-                name = m_tex2DSizeFunction;
-            }
-            else if (String_Equal(name, "tex2Dfetch"))
-            {
-                name = m_tex2DFetchFunction;
-            }
-            else if (String_Equal(name, "tex2Dcmp"))
-            {
-                name = m_tex2DCmpFunction;
-            }
-            else if (String_Equal(name, "tex2DMSfetch"))
-            {
-                name = m_tex2DMSFetchFunction;
-            }
-            else if (String_Equal(name, "tex2DMSsize"))
-            {
-                name = m_tex2DMSSizeFunction;
-            }
-            else if (String_Equal(name, "tex3D"))
-            {
-                name = m_tex3DFunction;
-            }
-            else if (String_Equal(name, "tex3Dlod"))
-            {
-                name = m_tex3DLodFunction;
-            }
-            else if (String_Equal(name, "tex3Dbias"))
-            {
-                name = m_tex3DBiasFunction;
-            }
-            else if (String_Equal(name, "tex3Dsize"))
-            {
-                name = m_tex3DSizeFunction;
-            }
-            else if (String_Equal(name, "texCUBE"))
-            {
-                name = m_texCubeFunction;
-            }
-            else if (String_Equal(name, "texCUBElod"))
-            {
-                name = m_texCubeLodFunction;
-            }
-            else if (String_Equal(name, "texCUBEbias"))
-            {
-                name = m_texCubeBiasFunction;
-            }
-            else if (String_Equal(name, "texCUBEsize"))
-            {
-                name = m_texCubeSizeFunction;
-            }
-        }
- */
-#endif
         m_writer.Write("%s(", name);
         OutputExpressionList(functionCall->argument);
         m_writer.Write(")");
@@ -952,16 +634,14 @@ void HLSLGenerator::OutputStatements(int indent, HLSLStatement* statement)
             HLSLBuffer* buffer = static_cast<HLSLBuffer*>(statement);
             HLSLDeclaration* field = buffer->field;
 
-            //if (!m_legacy)
+            m_writer.BeginLine(indent, buffer->fileName, buffer->line);
+            m_writer.Write("cbuffer %s", buffer->name);
+            if (buffer->registerName != NULL)
             {
-                m_writer.BeginLine(indent, buffer->fileName, buffer->line);
-                m_writer.Write("cbuffer %s", buffer->name);
-                if (buffer->registerName != NULL)
-                {
-                    m_writer.Write(" : register(%s)", buffer->registerName);
-                }
-                m_writer.EndLine(" {");
+                m_writer.Write(" : register(%s)", buffer->registerName);
             }
+            m_writer.EndLine(" {");
+            
 
             m_isInsideBuffer = true;
 
@@ -979,10 +659,7 @@ void HLSLGenerator::OutputStatements(int indent, HLSLStatement* statement)
 
             m_isInsideBuffer = false;
 
-            //if (!m_legacy)
-            {
-                m_writer.WriteLine(indent, "};");
-            }
+            m_writer.WriteLine(indent, "};");
         }
         else if (statement->nodeType == HLSLNodeType_Function)
         {
@@ -1111,7 +788,6 @@ void HLSLGenerator::OutputDeclaration(HLSLDeclaration* declaration)
 {
     bool isSamplerType = IsSamplerType(declaration->type);
 
-    //if (!m_legacy && isSamplerType)
     if (isSamplerType)
     {
         int reg = -1;
@@ -1186,36 +862,35 @@ void HLSLGenerator::OutputDeclaration(HLSLDeclaration* declaration)
 void HLSLGenerator::OutputDeclarationType(const HLSLType& type)
 {
     const char* typeName = GetTypeName(type);
-    //if (!m_legacy)
+    
+    if (type.baseType == HLSLBaseType_Sampler2D)
     {
-        if (type.baseType == HLSLBaseType_Sampler2D)
-        {
-            if (type.samplerType == HLSLBaseType_Half
-                // TODO: && !m_options.treatHalfAsFloat
-                ) {
-                typeName = "Texture2DHalfSampler";
-            }
-            else {
-                typeName = "Texture2DSampler";
-            }
+        if (type.samplerType == HLSLBaseType_Half
+            // TODO: && !m_options.treatHalfAsFloat
+            ) {
+            typeName = "Texture2DHalfSampler";
         }
-        else if (type.baseType == HLSLBaseType_Sampler3D)
-        {
-            typeName = "Texture3DSampler";
-        }
-        else if (type.baseType == HLSLBaseType_SamplerCube)
-        {
-            typeName =  "TextureCubeSampler";
-        }
-        else if (type.baseType == HLSLBaseType_Sampler2DShadow)
-        {
-            typeName = "Texture2DShadowSampler";
-        }
-        else if (type.baseType == HLSLBaseType_Sampler2DMS)
-        {
-            typeName = "Texture2DMS<float4>";
+        else {
+            typeName = "Texture2DSampler";
         }
     }
+    else if (type.baseType == HLSLBaseType_Sampler3D)
+    {
+        typeName = "Texture3DSampler";
+    }
+    else if (type.baseType == HLSLBaseType_SamplerCube)
+    {
+        typeName =  "TextureCubeSampler";
+    }
+    else if (type.baseType == HLSLBaseType_Sampler2DShadow)
+    {
+        typeName = "Texture2DShadowSampler";
+    }
+    else if (type.baseType == HLSLBaseType_Sampler2DMS)
+    {
+        typeName = "Texture2DMS<float4>";
+    }
+    
 
     if (type.flags & HLSLTypeFlag_Const)
     {
diff --git a/hlslparser/src/HLSLGenerator.h b/hlslparser/src/HLSLGenerator.h
index c1daf712..b58c6917 100644
--- a/hlslparser/src/HLSLGenerator.h
+++ b/hlslparser/src/HLSLGenerator.h
@@ -1,14 +1,13 @@
 //=============================================================================
 //
-// Render/HLSLGenerator.h
+// HLSLGenerator.h
 //
 // Created by Max McGuire (max@unknownworlds.com)
 // Copyright (c) 2013, Unknown Worlds Entertainment, Inc.
 //
 //=============================================================================
 
-#ifndef HLSL_GENERATOR_H
-#define HLSL_GENERATOR_H
+#pragma once
 
 #include "CodeWriter.h"
 #include "HLSLTree.h"
@@ -65,46 +64,8 @@ class HLSLGenerator
     const HLSLTree* m_tree;
     const char*     m_entryName;
     Target          m_target;
-    //bool            m_legacy;
     bool            m_isInsideBuffer;
     bool            m_error;
-   
-#if 0
-/*
-    // TODO: remove
-    // defining these once in include file, can HLSL compile inside a struct like MSL
-    char            m_textureSampler2DStruct[64];
-    char            m_textureSampler2DCtor[64];
-    char            m_textureSampler2DShadowStruct[64];
-    char            m_textureSampler2DShadowCtor[64];
-    char            m_textureSampler3DStruct[64];
-    char            m_textureSampler3DCtor[64];
-    char            m_textureSamplerCubeStruct[64];
-    char            m_textureSamplerCubeCtor[64];
-    char            m_tex2DFunction[64];
-    char            m_tex2DProjFunction[64];
-    char            m_tex2DLodFunction[64];
-    char            m_tex2DBiasFunction[64];
-    char            m_tex2DGradFunction[64];
-    char            m_tex2DGatherFunction[64];
-    char            m_tex2DSizeFunction[64];
-    char            m_tex2DFetchFunction[64];
-    char            m_tex2DCmpFunction[64];
-    char            m_tex2DMSFetchFunction[64];
-    char            m_tex2DMSSizeFunction[64];
-    char            m_tex3DFunction[64];
-    char            m_tex3DLodFunction[64];
-    char            m_tex3DBiasFunction[64];
-    char            m_tex3DSizeFunction[64];
-    char            m_texCubeFunction[64];
-    char            m_texCubeLodFunction[64];
-    char            m_texCubeBiasFunction[64];
-    char            m_texCubeSizeFunction[64];
-*/
-#endif
-    
 };
 
 } // M4
-
-#endif
diff --git a/hlslparser/src/HLSLParser.cpp b/hlslparser/src/HLSLParser.cpp
index 851d81ca..fc085f88 100644
--- a/hlslparser/src/HLSLParser.cpp
+++ b/hlslparser/src/HLSLParser.cpp
@@ -7,10 +7,10 @@
 //
 //=============================================================================
 
-//#include "Engine/String.h"
+#include "HLSLParser.h"
+
 #include "Engine.h"
 
-#include "HLSLParser.h"
 #include "HLSLTree.h"
 
 #include <algorithm>
@@ -27,7 +27,156 @@ enum CompareFunctionsResult
     Function2Better
 };
 
+enum CoreType
+{
+    CoreType_None,
+    
+    CoreType_Scalar,
+    CoreType_Vector,
+    CoreType_Matrix,
+    
+    CoreType_Sampler,
+    CoreType_Texture,
+    CoreType_Struct,
+    CoreType_Void,
+    CoreType_Expression,
+    
+    CoreType_Count // must be last
+};
+
+enum DimensionType
+{
+    DimensionType_None,
+
+    DimensionType_Scalar,
+
+    DimensionType_Vector2,
+    DimensionType_Vector3,
+    DimensionType_Vector4,
+
+    DimensionType_Matrix2x2,
+    DimensionType_Matrix3x3,
+    DimensionType_Matrix4x4,
+    
+    DimensionType_Matrix4x3, // TODO: no 3x4
+    DimensionType_Matrix4x2
+};
+
+// Can use this to break apart type to useful constructs
+struct BaseTypeDescription
+{
+    const char*     typeName;
+    CoreType        coreType;
+    DimensionType   dimensionType;
+    NumericType     numericType;
+    int             numComponents;
+    
+    // TODO: is this useful ?
+    int             numDimensions; // scalar = 0, vector = 1, matrix = 2
     
+    int             height;
+    int             binaryOpRank;
+};
+
+extern const BaseTypeDescription baseTypeDescriptions[HLSLBaseType_Count];
+
+bool IsSamplerType(HLSLBaseType baseType)
+{
+    return baseTypeDescriptions[baseType].coreType == CoreType_Sampler;
+}
+
+bool IsMatrixType(HLSLBaseType baseType)
+{
+    return baseTypeDescriptions[baseType].coreType == CoreType_Matrix;
+}
+
+bool IsVectorType(HLSLBaseType baseType)
+{
+    return baseTypeDescriptions[baseType].coreType == CoreType_Vector;
+}
+
+bool IsScalarType(HLSLBaseType baseType)
+{
+    return baseTypeDescriptions[baseType].coreType == CoreType_Scalar;
+}
+
+bool IsTextureType(HLSLBaseType baseType)
+{
+    return baseTypeDescriptions[baseType].coreType == CoreType_Texture;
+}
+
+bool IsCoreTypeEqual(HLSLBaseType lhsType, HLSLBaseType rhsType)
+{
+    return baseTypeDescriptions[lhsType].coreType ==
+           baseTypeDescriptions[rhsType].coreType;
+}
+
+bool IsNumericTypeEqual(HLSLBaseType lhsType, HLSLBaseType rhsType)
+{
+    return baseTypeDescriptions[lhsType].numericType ==
+           baseTypeDescriptions[rhsType].numericType;
+}
+
+bool IsHalf(HLSLBaseType type)
+{
+    return baseTypeDescriptions[type].numericType == NumericType_Half;
+}
+
+bool IsFloat(HLSLBaseType type)
+{
+    return baseTypeDescriptions[type].numericType == NumericType_Float;
+}
+
+bool IsSamplerType(const HLSLType & type)
+{
+    return IsSamplerType(type.baseType);
+}
+
+bool IsScalarType(const HLSLType & type)
+{
+    return IsScalarType(type.baseType);
+}
+
+bool IsVectorType(const HLSLType & type)
+{
+    return IsVectorType(type.baseType);
+}
+
+bool IsMatrixType(const HLSLType & type)
+{
+    return IsMatrixType(type.baseType);
+}
+
+bool IsTextureType(const HLSLType & type)
+{
+    return IsTextureType(type.baseType);
+}
+
+HLSLBaseType NumericToBaseType(NumericType numericType)
+{
+    HLSLBaseType baseType = HLSLBaseType_Unknown;
+    switch(numericType)
+    {
+        case NumericType_Float: baseType = HLSLBaseType_Float; break;
+        case NumericType_Half: baseType = HLSLBaseType_Half; break;
+        case NumericType_Int: baseType = HLSLBaseType_Int; break;
+        case NumericType_Uint: baseType = HLSLBaseType_Uint; break;
+        case NumericType_Bool: baseType = HLSLBaseType_Bool; break;
+        // TODO: short,ushort,double
+        default:
+            break;
+    }
+    return baseType;
+}
+
+HLSLBaseType PromoteType(HLSLBaseType toType, HLSLBaseType type)
+{
+    return HLSLBaseType(NumericToBaseType(baseTypeDescriptions[type].numericType) +
+                        baseTypeDescriptions[type].dimensionType - DimensionType_Scalar);
+}
+
+
+
 /** This structure stores a HLSLFunction-like declaration for an intrinsic function */
 struct Intrinsic
 {
@@ -102,18 +251,6 @@ Intrinsic SamplerIntrinsic(const char* name, HLSLBaseType returnType, HLSLBaseTy
     return i;
 }
 
-
-enum NumericType
-{
-    NumericType_Float,
-    NumericType_Half,
-    NumericType_Bool,
-    NumericType_Int,
-    NumericType_Uint,
-    NumericType_Count,
-    NumericType_NaN,
-};
-
 static const int _numberTypeRank[NumericType_Count][NumericType_Count] = 
 {
     //F  H  B  I  U    
@@ -387,19 +524,6 @@ static const EffectState pipelineStates[] = {
     {"AlphaTest", 0, booleanValues},       // This is really alpha to coverage.
 };
 
-
-
-struct BaseTypeDescription
-{
-    const char*     typeName;
-    NumericType     numericType;
-    int             numComponents;
-    int             numDimensions;
-    int             height;
-    int             binaryOpRank;
-};
-
-
 #define INTRINSIC_FLOAT1_FUNCTION(name) \
         Intrinsic( name, HLSLBaseType_Float,   HLSLBaseType_Float  ),   \
         Intrinsic( name, HLSLBaseType_Float2,  HLSLBaseType_Float2 ),   \
@@ -430,7 +554,7 @@ struct BaseTypeDescription
         Intrinsic( name, HLSLBaseType_Half3,   HLSLBaseType_Half3,   HLSLBaseType_Half3,  HLSLBaseType_Half3 ),    \
         Intrinsic( name, HLSLBaseType_Half4,   HLSLBaseType_Half4,   HLSLBaseType_Half4,  HLSLBaseType_Half4 )
 
-#if 1
+//#if 1
 // @@ IC: For some reason this is not working with the Visual Studio compiler:
 // This broke using half, so don't just comment this out.
 #define SAMPLER_INTRINSIC_FUNCTION(name, nameH, sampler, arg1) \
@@ -439,7 +563,7 @@ struct BaseTypeDescription
 //#else
 //#define SAMPLER_INTRINSIC_FUNCTION(name, sampler, arg1) \
 //        Intrinsic( name, HLSLBaseType_Float4, sampler, arg1)
-#endif
+//#endif
     
 const Intrinsic _intrinsic[] =
     {
@@ -453,8 +577,8 @@ const Intrinsic _intrinsic[] =
 		Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Float2x2 ),
         Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Float3x3 ),
         Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Float4x4 ),
-        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Float4x3 ),
-        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Float4x2 ),
+        //Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Float4x3 ),
+        //Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Float4x2 ),
         Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Half ),
         Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Half2 ),
         Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Half3 ),
@@ -462,8 +586,8 @@ const Intrinsic _intrinsic[] =
 		Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Half2x2 ),
         Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Half3x3 ),
         Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Half4x4 ),
-        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Half4x3 ),
-        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Half4x2 ),
+        //Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Half4x3 ),
+        //Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Half4x2 ),
         Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Bool ),
         Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Int ),
         Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Int2 ),
@@ -564,8 +688,8 @@ const Intrinsic _intrinsic[] =
         Intrinsic( "mul", HLSLBaseType_Float2, HLSLBaseType_Float2x2, HLSLBaseType_Float2 ),
         Intrinsic( "mul", HLSLBaseType_Float3, HLSLBaseType_Float3x3, HLSLBaseType_Float3 ),
         Intrinsic( "mul", HLSLBaseType_Float4, HLSLBaseType_Float4x4, HLSLBaseType_Float4 ),
-        Intrinsic( "mul", HLSLBaseType_Float3, HLSLBaseType_Float4, HLSLBaseType_Float4x3 ),
-        Intrinsic( "mul", HLSLBaseType_Float2, HLSLBaseType_Float4, HLSLBaseType_Float4x2 ),
+        //Intrinsic( "mul", HLSLBaseType_Float3, HLSLBaseType_Float4, HLSLBaseType_Float4x3 ),
+        //Intrinsic( "mul", HLSLBaseType_Float2, HLSLBaseType_Float4, HLSLBaseType_Float4x2 ),
 
         // matrix transpose
 		Intrinsic( "transpose", HLSLBaseType_Float2x2, HLSLBaseType_Float2x2 ),
@@ -672,55 +796,62 @@ const int _binaryOpPriority[] =
         5, 3, 4, // &, |, ^
     };
 
-const BaseTypeDescription _baseTypeDescriptions[HLSLBaseType_Count] = 
-    {
-        { "unknown type",       NumericType_NaN,        0, 0, 0, -1 },      // HLSLBaseType_Unknown
-        { "void",               NumericType_NaN,        0, 0, 0, -1 },      // HLSLBaseType_Void
-        { "float",              NumericType_Float,      1, 0, 1,  0 },      // HLSLBaseType_Float
-        { "float2",             NumericType_Float,      2, 1, 1,  0 },      // HLSLBaseType_Float2
-        { "float3",             NumericType_Float,      3, 1, 1,  0 },      // HLSLBaseType_Float3
-        { "float4",             NumericType_Float,      4, 1, 1,  0 },      // HLSLBaseType_Float4
-		{ "float2x2",			NumericType_Float,		2, 2, 2,  0 },		// HLSLBaseType_Float2x2
-        { "float3x3",           NumericType_Float,      3, 2, 3,  0 },      // HLSLBaseType_Float3x3
-        { "float4x4",           NumericType_Float,      4, 2, 4,  0 },      // HLSLBaseType_Float4x4
-        { "float4x3",           NumericType_Float,      4, 2, 3,  0 },      // HLSLBaseType_Float4x3
-        { "float4x2",           NumericType_Float,      4, 2, 2,  0 },      // HLSLBaseType_Float4x2
-
-        { "half",               NumericType_Half,       1, 0, 1,  1 },      // HLSLBaseType_Half
-        { "half2",              NumericType_Half,       2, 1, 1,  1 },      // HLSLBaseType_Half2
-        { "half3",              NumericType_Half,       3, 1, 1,  1 },      // HLSLBaseType_Half3
-        { "half4",              NumericType_Half,       4, 1, 1,  1 },      // HLSLBaseType_Half4
-		{ "half2x2",            NumericType_Float,		2, 2, 2,  0 },		// HLSLBaseType_Half2x2
-        { "half3x3",            NumericType_Half,       3, 2, 3,  1 },      // HLSLBaseType_Half3x3
-        { "half4x4",            NumericType_Half,       4, 2, 4,  1 },      // HLSLBaseType_Half4x4
-        { "half4x3",            NumericType_Half,       4, 2, 3,  1 },      // HLSLBaseType_Half4x3
-        { "half4x2",            NumericType_Half,       4, 2, 2,  1 },      // HLSLBaseType_Half4x2
-
-        { "bool",               NumericType_Bool,       1, 0, 1,  4 },      // HLSLBaseType_Bool
-		{ "bool2",				NumericType_Bool,		2, 1, 1,  4 },      // HLSLBaseType_Bool2
-		{ "bool3",				NumericType_Bool,		3, 1, 1,  4 },      // HLSLBaseType_Bool3
-		{ "bool4",				NumericType_Bool,		4, 1, 1,  4 },      // HLSLBaseType_Bool4
-
-        { "int",                NumericType_Int,        1, 0, 1,  3 },      // HLSLBaseType_Int
-        { "int2",               NumericType_Int,        2, 1, 1,  3 },      // HLSLBaseType_Int2
-        { "int3",               NumericType_Int,        3, 1, 1,  3 },      // HLSLBaseType_Int3
-        { "int4",               NumericType_Int,        4, 1, 1,  3 },      // HLSLBaseType_Int4
-
-        { "uint",               NumericType_Uint,       1, 0, 1,  2 },      // HLSLBaseType_Uint
-        { "uint2",              NumericType_Uint,       2, 1, 1,  2 },      // HLSLBaseType_Uint2
-        { "uint3",              NumericType_Uint,       3, 1, 1,  2 },      // HLSLBaseType_Uint3
-        { "uint4",              NumericType_Uint,       4, 1, 1,  2 },      // HLSLBaseType_Uint4
-
-        { "texture",            NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Texture
-        { "sampler",            NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Sampler
-        { "sampler2D",          NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Sampler2D
-        { "sampler3D",          NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Sampler3D
-        { "samplerCUBE",        NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_SamplerCube
-        { "sampler2DShadow",    NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Sampler2DShadow
-        { "sampler2DMS",        NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Sampler2DMS
-        { "sampler2DArray",     NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Sampler2DArray
-        { "user defined",       NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_UserDefined
-        { "expression",         NumericType_NaN,        1, 0, 0, -1 }       // HLSLBaseType_Expression
+const BaseTypeDescription baseTypeDescriptions[HLSLBaseType_Count] = 
+    {
+        { "unknown type",       CoreType_None, DimensionType_None, NumericType_NaN,        0, 0, 0, -1 },      // HLSLBaseType_Unknown
+        { "void",               CoreType_Void, DimensionType_None, NumericType_NaN,        0, 0, 0, -1 },      // HLSLBaseType_Void
+        
+        { "float",              CoreType_Scalar, DimensionType_Scalar, NumericType_Float,       1, 0, 1,  0 },      // HLSLBaseType_Float
+        { "float2",             CoreType_Vector, DimensionType_Vector2, NumericType_Float,      2, 1, 1,  0 },      // HLSLBaseType_Float2
+        { "float3",             CoreType_Vector, DimensionType_Vector3, NumericType_Float,      3, 1, 1,  0 },      // HLSLBaseType_Float3
+        { "float4",             CoreType_Vector, DimensionType_Vector4, NumericType_Float,      4, 1, 1,  0 },      // HLSLBaseType_Float4
+        
+		{ "float2x2",			CoreType_Matrix, DimensionType_Matrix2x2, NumericType_Float,     2, 2, 2,  0 },		// HLSLBaseType_Float2x2
+        { "float3x3",           CoreType_Matrix, DimensionType_Matrix3x3, NumericType_Float,     3, 2, 3,  0 },      // HLSLBaseType_Float3x3
+        { "float4x4",           CoreType_Matrix, DimensionType_Matrix4x4, NumericType_Float,     4, 2, 4,  0 },      // HLSLBaseType_Float4x4
+        { "float4x3",           CoreType_Matrix, DimensionType_Matrix4x3, NumericType_Float,     4, 2, 3,  0 },      // HLSLBaseType_Float4x3
+        { "float4x2",           CoreType_Matrix, DimensionType_Matrix4x2, NumericType_Float,     4, 2, 2,  0 },      // HLSLBaseType_Float4x2
+
+        { "half",               CoreType_Scalar, DimensionType_Scalar, NumericType_Half,        1, 0, 1,  1 },      // HLSLBaseType_Half
+        { "half2",              CoreType_Vector, DimensionType_Vector2, NumericType_Half,       2, 1, 1,  1 },      // HLSLBaseType_Half2
+        { "half3",              CoreType_Vector, DimensionType_Vector3, NumericType_Half,       3, 1, 1,  1 },      // HLSLBaseType_Half3
+        { "half4",              CoreType_Vector, DimensionType_Vector4, NumericType_Half,       4, 1, 1,  1 },      // HLSLBaseType_Half4
+        
+		{ "half2x2",            CoreType_Matrix, DimensionType_Matrix2x2, NumericType_Half,		2, 2, 2,  0 },		// HLSLBaseType_Half2x2
+        { "half3x3",            CoreType_Matrix, DimensionType_Matrix3x3, NumericType_Half,     3, 2, 3,  1 },      // HLSLBaseType_Half3x3
+        { "half4x4",            CoreType_Matrix, DimensionType_Matrix4x4, NumericType_Half,     4, 2, 4,  1 },      // HLSLBaseType_Half4x4
+        { "half4x3",            CoreType_Matrix, DimensionType_Matrix4x3, NumericType_Half,     4, 2, 3,  1 },      // HLSLBaseType_Half4x3
+        { "half4x2",            CoreType_Matrix, DimensionType_Matrix4x2, NumericType_Half,     4, 2, 2,  1 },      // HLSLBaseType_Half4x2
+
+        { "bool",               CoreType_Scalar, DimensionType_Scalar, NumericType_Bool,       1, 0, 1,  4 },      // HLSLBaseType_Bool
+		{ "bool2",				CoreType_Vector, DimensionType_Vector2, NumericType_Bool,	   2, 1, 1,  4 },      // HLSLBaseType_Bool2
+		{ "bool3",				CoreType_Vector, DimensionType_Vector3, NumericType_Bool,	   3, 1, 1,  4 },      // HLSLBaseType_Bool3
+		{ "bool4",				CoreType_Vector, DimensionType_Vector4, NumericType_Bool,	   4, 1, 1,  4 },      // HLSLBaseType_Bool4
+
+        { "int",                CoreType_Scalar, DimensionType_Scalar, NumericType_Int,        1, 0, 1,  3 },      // HLSLBaseType_Int
+        { "int2",               CoreType_Vector, DimensionType_Vector2, NumericType_Int,       2, 1, 1,  3 },      // HLSLBaseType_Int2
+        { "int3",               CoreType_Vector, DimensionType_Vector3, NumericType_Int,       3, 1, 1,  3 },      // HLSLBaseType_Int3
+        { "int4",               CoreType_Vector, DimensionType_Vector4, NumericType_Int,       4, 1, 1,  3 },      // HLSLBaseType_Int4
+
+        { "uint",               CoreType_Scalar, DimensionType_Scalar, NumericType_Uint,       1, 0, 1,  2 },      // HLSLBaseType_Uint
+        { "uint2",              CoreType_Vector, DimensionType_Vector2, NumericType_Uint,      2, 1, 1,  2 },      // HLSLBaseType_Uint2
+        { "uint3",              CoreType_Vector, DimensionType_Vector3, NumericType_Uint,      3, 1, 1,  2 },      // HLSLBaseType_Uint3
+        { "uint4",              CoreType_Vector, DimensionType_Vector4, NumericType_Uint,      4, 1, 1,  2 },      // HLSLBaseType_Uint4
+
+        // TODO: add ushort/short
+        
+        { "texture",            CoreType_Texture, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Texture
+        
+        { "sampler",            CoreType_Sampler, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Sampler
+        { "sampler2D",          CoreType_Sampler, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Sampler2D
+        { "sampler3D",          CoreType_Sampler, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Sampler3D
+        { "samplerCUBE",        CoreType_Sampler, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_SamplerCube
+        { "sampler2DShadow",    CoreType_Sampler, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Sampler2DShadow
+        { "sampler2DMS",        CoreType_Sampler, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Sampler2DMS
+        { "sampler2DArray",     CoreType_Sampler, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Sampler2DArray
+        
+        { "user defined",       CoreType_Struct, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_UserDefined
+        { "expression",         CoreType_Expression, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 }       // HLSLBaseType_Expression
     };
 
 // IC: I'm not sure this table is right, but any errors should be caught by the backend compiler.
@@ -951,7 +1082,7 @@ static const char* GetTypeName(const HLSLType& type)
     }
     else
     {
-        return _baseTypeDescriptions[type.baseType].typeName;
+        return baseTypeDescriptions[type.baseType].typeName;
     }
 }
 
@@ -1035,8 +1166,8 @@ static int GetTypeCastRank(HLSLTree * tree, const HLSLType& srcType, const HLSLT
         return 0;
     }
 
-    const BaseTypeDescription& srcDesc = _baseTypeDescriptions[srcType.baseType];
-    const BaseTypeDescription& dstDesc = _baseTypeDescriptions[dstType.baseType];
+    const BaseTypeDescription& srcDesc = baseTypeDescriptions[srcType.baseType];
+    const BaseTypeDescription& dstDesc = baseTypeDescriptions[dstType.baseType];
     if (srcDesc.numericType == NumericType_NaN || dstDesc.numericType == NumericType_NaN)
     {
         return -1;
@@ -1185,7 +1316,7 @@ static bool GetBinaryOpResultType(HLSLBinaryOp binaryOp, const HLSLType& type1,
     case HLSLBinaryOp_Equal:
 	case HLSLBinaryOp_NotEqual:
 		{
-			int numComponents = std::max( _baseTypeDescriptions[ type1.baseType ].numComponents, _baseTypeDescriptions[ type2.baseType ].numComponents );
+			int numComponents = std::max( baseTypeDescriptions[ type1.baseType ].numComponents, baseTypeDescriptions[ type2.baseType ].numComponents );
 			result.baseType = HLSLBaseType( HLSLBaseType_Bool + numComponents - 1 );
 			break;
 		}
@@ -3827,7 +3958,7 @@ bool HLSLParser::GetMemberType(const HLSLType& objectType, HLSLMemberAccess * me
         return false;
     }
 
-    if (_baseTypeDescriptions[objectType.baseType].numericType == NumericType_NaN)
+    if (baseTypeDescriptions[objectType.baseType].numericType == NumericType_NaN)
     {
         // Currently we don't have an non-numeric types that allow member access.
         return false;
@@ -3835,7 +3966,7 @@ bool HLSLParser::GetMemberType(const HLSLType& objectType, HLSLMemberAccess * me
 
     int swizzleLength = 0;
 
-    if (_baseTypeDescriptions[objectType.baseType].numDimensions <= 1)
+    if (baseTypeDescriptions[objectType.baseType].numDimensions <= 1)
     {
         // Check for a swizzle on the scalar/vector types.
         for (int i = 0; fieldName[i] != 0; ++i)
@@ -3872,8 +4003,8 @@ bool HLSLParser::GetMemberType(const HLSLType& objectType, HLSLMemberAccess * me
 
             int r = (n[0] - '0') - base;
             int c = (n[1] - '0') - base;
-            if (r >= _baseTypeDescriptions[objectType.baseType].height ||
-                c >= _baseTypeDescriptions[objectType.baseType].numComponents)
+            if (r >= baseTypeDescriptions[objectType.baseType].height ||
+                c >= baseTypeDescriptions[objectType.baseType].numComponents)
             {
                 return false;
             }
@@ -3901,7 +4032,7 @@ bool HLSLParser::GetMemberType(const HLSLType& objectType, HLSLMemberAccess * me
     static const HLSLBaseType uintType[]  = { HLSLBaseType_Uint,  HLSLBaseType_Uint2,  HLSLBaseType_Uint3,  HLSLBaseType_Uint4  };
     static const HLSLBaseType boolType[]  = { HLSLBaseType_Bool,  HLSLBaseType_Bool2,  HLSLBaseType_Bool3,  HLSLBaseType_Bool4  };
     
-    switch (_baseTypeDescriptions[objectType.baseType].numericType)
+    switch (baseTypeDescriptions[objectType.baseType].numericType)
     {
     case NumericType_Float:
         memberAccess->expressionType.baseType = floatType[swizzleLength - 1];
diff --git a/hlslparser/src/HLSLParser.h b/hlslparser/src/HLSLParser.h
index b9a9991a..880cb51b 100644
--- a/hlslparser/src/HLSLParser.h
+++ b/hlslparser/src/HLSLParser.h
@@ -7,11 +7,8 @@
 //
 //=============================================================================
 
-#ifndef HLSL_PARSER_H
-#define HLSL_PARSER_H
+#pragma once
 
-//#include "Engine/StringPool.h"
-//#include "Engine/Array.h"
 #include "Engine.h"
 
 #include "HLSLTokenizer.h"
@@ -141,6 +138,37 @@ class HLSLParser
     bool                    m_disableSemanticValidation = false;
 };
 
-}
+enum NumericType
+{
+    NumericType_Float,
+    NumericType_Half,
+    NumericType_Bool,
+    NumericType_Int,
+    NumericType_Uint,
+    // TODO: Double, Short, Ushort
+    NumericType_Count,
+    
+    NumericType_NaN, // not in count?
+};
+
+bool IsHalf(HLSLBaseType type);
+bool IsFloat(HLSLBaseType type);
 
-#endif
+extern bool IsSamplerType(HLSLBaseType baseType);
+extern bool IsMatrixType(HLSLBaseType baseType);
+extern bool IsVectorType(HLSLBaseType baseType);
+extern bool IsScalarType(HLSLBaseType baseType);
+extern bool IsTextureType(HLSLBaseType baseType);
+
+extern bool IsCoreTypeEqual(HLSLBaseType lhsType, HLSLBaseType rhsType);
+extern bool IsNumericTypeEqual(HLSLBaseType lhsType, HLSLBaseType rhsType);
+
+extern bool IsSamplerType(const HLSLType& type);
+extern bool IsMatrixType(const HLSLType& type);
+extern bool IsVectorType(const HLSLType& type);
+extern bool IsScalarType(const HLSLType& type);
+extern bool IsTextureType(const HLSLType& type);
+
+extern HLSLBaseType PromoteType(HLSLBaseType toType, HLSLBaseType type);
+
+}
diff --git a/hlslparser/src/HLSLTokenizer.cpp b/hlslparser/src/HLSLTokenizer.cpp
index 2fab535a..aaebb6aa 100644
--- a/hlslparser/src/HLSLTokenizer.cpp
+++ b/hlslparser/src/HLSLTokenizer.cpp
@@ -1,18 +1,13 @@
-//#include "Engine/Log.h"
-//#include "Engine/String.h"
-#include "Engine.h"
-
 #include "HLSLTokenizer.h"
 
+#include "Engine.h"
+
 #include <ctype.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdarg.h>
 
-// mac only
-#include <syslog.h>
-
 namespace M4
 {
 
@@ -588,9 +583,6 @@ void HLSLTokenizer::Error(const char* format, ...)
     
     //Log_Error("%s(%d): error %s\n", m_fileName, m_lineNumber, buffer);
     Log_Error("%s(%d): %s\n", m_fileName, m_lineNumber, buffer);
-    
-    // try syslog and os_log
-    // syslog(LOG_ERR, "%s(%d): %s\n", m_fileName, m_lineNumber, buffer);
 } 
 
 void HLSLTokenizer::GetTokenName(char buffer[s_maxIdentifier]) const
diff --git a/hlslparser/src/HLSLTokenizer.h b/hlslparser/src/HLSLTokenizer.h
index 9187639a..58f8471b 100644
--- a/hlslparser/src/HLSLTokenizer.h
+++ b/hlslparser/src/HLSLTokenizer.h
@@ -1,5 +1,6 @@
-#ifndef HLSL_TOKENIZER_H
-#define HLSL_TOKENIZER_H
+#pragma once
+
+#include "Engine.h"
 
 namespace M4
 {
@@ -170,4 +171,3 @@ class HLSLTokenizer
 
 }
 
-#endif
diff --git a/hlslparser/src/HLSLTree.cpp b/hlslparser/src/HLSLTree.cpp
index 301a754c..fc3a3dc1 100644
--- a/hlslparser/src/HLSLTree.cpp
+++ b/hlslparser/src/HLSLTree.cpp
@@ -1,111 +1,12 @@
-//#include "Engine/Assert.h"
-#include "Engine.h"
-
 #include "HLSLTree.h"
 
-namespace M4
-{
+#include "Engine.h"
 
-const HLSLTypeDimension BaseTypeDimension[HLSLBaseType_Count] =
+namespace M4
 {
-    HLSLTypeDimension_None,     // HLSLBaseType_Unknown,
-    HLSLTypeDimension_None,     // HLSLBaseType_Void,
-    
-    HLSLTypeDimension_Scalar,   // HLSLBaseType_Float,
-    HLSLTypeDimension_Vector2,  // HLSLBaseType_Float2,
-    HLSLTypeDimension_Vector3,  // HLSLBaseType_Float3,
-    HLSLTypeDimension_Vector4,  // HLSLBaseType_Float4,
-    HLSLTypeDimension_Matrix2x2,// HLSLBaseType_Float2x2,
-    HLSLTypeDimension_Matrix3x3,// HLSLBaseType_Float3x3,
-    HLSLTypeDimension_Matrix4x4,// HLSLBaseType_Float4x4,
-    HLSLTypeDimension_Matrix4x3,// HLSLBaseType_Float4x3,
-    HLSLTypeDimension_Matrix4x2,// HLSLBaseType_Float4x2,
-    HLSLTypeDimension_Scalar,   // HLSLBaseType_Half,
-    HLSLTypeDimension_Vector2,  // HLSLBaseType_Half2,
-    HLSLTypeDimension_Vector3,  // HLSLBaseType_Half3,
-    HLSLTypeDimension_Vector4,  // HLSLBaseType_Half4,
-    HLSLTypeDimension_Matrix2x2,// HLSLBaseType_Half2x2,
-    HLSLTypeDimension_Matrix3x3,// HLSLBaseType_Half3x3,
-    HLSLTypeDimension_Matrix4x4,// HLSLBaseType_Half4x4,
-    HLSLTypeDimension_Matrix4x3,// HLSLBaseType_Half4x3,
-    HLSLTypeDimension_Matrix4x2,// HLSLBaseType_Half4x2,
-    HLSLTypeDimension_Scalar,   // HLSLBaseType_Bool,
-    HLSLTypeDimension_Vector2,  // HLSLBaseType_Bool2,
-    HLSLTypeDimension_Vector3,  // HLSLBaseType_Bool3,
-    HLSLTypeDimension_Vector4,  // HLSLBaseType_Bool4,
-    HLSLTypeDimension_Scalar,   // HLSLBaseType_Int,
-    HLSLTypeDimension_Vector2,  // HLSLBaseType_Int2,
-    HLSLTypeDimension_Vector3,  // HLSLBaseType_Int3,
-    HLSLTypeDimension_Vector4,  // HLSLBaseType_Int4,
-    HLSLTypeDimension_Scalar,   // HLSLBaseType_Uint,
-    HLSLTypeDimension_Vector2,  // HLSLBaseType_Uint2,
-    HLSLTypeDimension_Vector3,  // HLSLBaseType_Uint3,
-    HLSLTypeDimension_Vector4,  // HLSLBaseType_Uint4,
-    
-    HLSLTypeDimension_None,     // HLSLBaseType_Texture,
-    
-    HLSLTypeDimension_None,     // HLSLBaseType_Sampler,           // @@ use type inference to determine sampler type.
-    HLSLTypeDimension_None,     // HLSLBaseType_Sampler2D,
-    HLSLTypeDimension_None,     // HLSLBaseType_Sampler3D,
-    HLSLTypeDimension_None,     // HLSLBaseType_SamplerCube,
-    HLSLTypeDimension_None,     // HLSLBaseType_Sampler2DShadow,
-    HLSLTypeDimension_None,     // HLSLBaseType_Sampler2DMS,
-    HLSLTypeDimension_None,     // HLSLBaseType_Sampler2DArray,
-    
-    HLSLTypeDimension_None,     // HLSLBaseType_UserDefined,       // struct
-    HLSLTypeDimension_None,     // HLSLBaseType_Expression,        // type argument for defined() sizeof() and typeof().
-    HLSLTypeDimension_None,     // HLSLBaseType_Auto,
-};
 
-const HLSLBaseType ScalarBaseType[HLSLBaseType_Count] = {
-    HLSLBaseType_Unknown,       // HLSLBaseType_Unknown,
-    HLSLBaseType_Void,          // HLSLBaseType_Void,
-    
-    HLSLBaseType_Float,         // HLSLBaseType_Float,
-    HLSLBaseType_Float,         // HLSLBaseType_Float2,
-    HLSLBaseType_Float,         // HLSLBaseType_Float3,
-    HLSLBaseType_Float,         // HLSLBaseType_Float4,
-    HLSLBaseType_Float,         // HLSLBaseType_Float2x2,
-    HLSLBaseType_Float,         // HLSLBaseType_Float3x3,
-    HLSLBaseType_Float,         // HLSLBaseType_Float4x4,
-    HLSLBaseType_Float,         // HLSLBaseType_Float4x3,
-    HLSLBaseType_Float,         // HLSLBaseType_Float4x2,
-    HLSLBaseType_Half,          // HLSLBaseType_Half,
-    HLSLBaseType_Half,          // HLSLBaseType_Half2,
-    HLSLBaseType_Half,          // HLSLBaseType_Half3,
-    HLSLBaseType_Half,          // HLSLBaseType_Half4,
-    HLSLBaseType_Half,          // HLSLBaseType_Half2x2,
-    HLSLBaseType_Half,          // HLSLBaseType_Half3x3,
-    HLSLBaseType_Half,          // HLSLBaseType_Half4x4,
-    HLSLBaseType_Half,          // HLSLBaseType_Half4x3,
-    HLSLBaseType_Half,          // HLSLBaseType_Half4x2,
-    HLSLBaseType_Bool,          // HLSLBaseType_Bool,
-    HLSLBaseType_Bool,          // HLSLBaseType_Bool2,
-    HLSLBaseType_Bool,          // HLSLBaseType_Bool3,
-    HLSLBaseType_Bool,          // HLSLBaseType_Bool4,
-    HLSLBaseType_Int,           // HLSLBaseType_Int,
-    HLSLBaseType_Int,           // HLSLBaseType_Int2,
-    HLSLBaseType_Int,           // HLSLBaseType_Int3,
-    HLSLBaseType_Int,           // HLSLBaseType_Int4,
-    HLSLBaseType_Uint,          // HLSLBaseType_Uint,
-    HLSLBaseType_Uint,          // HLSLBaseType_Uint2,
-    HLSLBaseType_Uint,          // HLSLBaseType_Uint3,
-    HLSLBaseType_Uint,          // HLSLBaseType_Uint4,
-    
-    HLSLBaseType_Unknown,       // HLSLBaseType_Texture,
-    
-    HLSLBaseType_Unknown,       // HLSLBaseType_Sampler,           // @@ use type inference to determine sampler type.
-    HLSLBaseType_Unknown,       // HLSLBaseType_Sampler2D,
-    HLSLBaseType_Unknown,       // HLSLBaseType_Sampler3D,
-    HLSLBaseType_Unknown,       // HLSLBaseType_SamplerCube,
-    HLSLBaseType_Unknown,       // HLSLBaseType_Sampler2DShadow,
-    HLSLBaseType_Unknown,       // HLSLBaseType_Sampler2DMS,
-    HLSLBaseType_Unknown,       // HLSLBaseType_Sampler2DArray,
-    HLSLBaseType_Unknown,       // HLSLBaseType_UserDefined,       // struct
-    
-    HLSLBaseType_Unknown,       // HLSLBaseType_Expression,        // type argument for defined() sizeof() and typeof().
-    HLSLBaseType_Unknown,       // HLSLBaseType_Auto,
-};
+// over to HLSLParser.cpp
+extern bool IsSamplerType(const HLSLType & type);
 
 
 HLSLTree::HLSLTree(Allocator* allocator) :
diff --git a/hlslparser/src/HLSLTree.h b/hlslparser/src/HLSLTree.h
index 9e5665ce..fa78bd47 100644
--- a/hlslparser/src/HLSLTree.h
+++ b/hlslparser/src/HLSLTree.h
@@ -1,7 +1,5 @@
-#ifndef HLSL_TREE_H
-#define HLSL_TREE_H
+#pragma once
 
-//#include "Engine/StringPool.h"
 #include "Engine.h"
 
 #include <new>
@@ -48,20 +46,25 @@ enum HLSLNodeType
     HLSLNodeType_Comment
 };
 
+/*
 enum HLSLTypeDimension
 {
     HLSLTypeDimension_None,
+    
     HLSLTypeDimension_Scalar,
+    
     HLSLTypeDimension_Vector2,
     HLSLTypeDimension_Vector3,
     HLSLTypeDimension_Vector4,
+    
     HLSLTypeDimension_Matrix2x2,
     HLSLTypeDimension_Matrix3x3,
     HLSLTypeDimension_Matrix4x4,
-    HLSLTypeDimension_Matrix4x3, // TODO: no 3x4
-    HLSLTypeDimension_Matrix4x2
+    //HLSLTypeDimension_Matrix4x3, // TODO: no 3x4
+    //HLSLTypeDimension_Matrix4x2
 };
-    
+*/
+
 enum HLSLBaseType
 {
     HLSLBaseType_Unknown,
@@ -75,8 +78,11 @@ enum HLSLBaseType
 	HLSLBaseType_Float2x2,
     HLSLBaseType_Float3x3,
     HLSLBaseType_Float4x4,
+    
+    // TODO: remove
     HLSLBaseType_Float4x3, // TODO: missing Float3x4
     HLSLBaseType_Float4x2,
+    
     HLSLBaseType_Half,
     HLSLBaseType_Half2,
     HLSLBaseType_Half3,
@@ -84,6 +90,8 @@ enum HLSLBaseType
 	HLSLBaseType_Half2x2,
     HLSLBaseType_Half3x3,
     HLSLBaseType_Half4x4,
+    
+    // TODO: remove
     HLSLBaseType_Half4x3, // TODO: missing Half3x4
     HLSLBaseType_Half4x2,
     
@@ -100,10 +108,14 @@ enum HLSLBaseType
     HLSLBaseType_Uint2,
     HLSLBaseType_Uint3,
     HLSLBaseType_Uint4,
-    /*HLSLBaseType_Short,   // @@ Separate dimension from Base type, this is getting out of control.
+    
+    // @@ Separate dimension from Base type, this is getting out of control.
+    /*
+    HLSLBaseType_Short,
     HLSLBaseType_Short2,
     HLSLBaseType_Short3,
     HLSLBaseType_Short4,
+     
     HLSLBaseType_Ushort,
     HLSLBaseType_Ushort2,
     HLSLBaseType_Ushort3,
@@ -135,79 +147,31 @@ enum HLSLBaseType
    
     HLSLBaseType_NumericCount = HLSLBaseType_LastNumeric - HLSLBaseType_FirstNumeric + 1
 };
-    
-extern const HLSLTypeDimension BaseTypeDimension[HLSLBaseType_Count];
-extern const HLSLBaseType ScalarBaseType[HLSLBaseType_Count];
-
-inline bool IsSamplerType(HLSLBaseType baseType)
-{
-    return baseType == HLSLBaseType_Sampler ||
-           baseType == HLSLBaseType_Sampler2D ||
-           baseType == HLSLBaseType_Sampler3D ||
-           baseType == HLSLBaseType_SamplerCube ||
-           baseType == HLSLBaseType_Sampler2DShadow ||
-           baseType == HLSLBaseType_Sampler2DMS ||
-           baseType == HLSLBaseType_Sampler2DArray;
-}
-
-inline bool IsMatrixType(HLSLBaseType baseType)
-{
-    return  baseType == HLSLBaseType_Float3x3 ||
-            baseType == HLSLBaseType_Float4x4 ||
-            baseType == HLSLBaseType_Float4x3 ||
-            baseType == HLSLBaseType_Float4x2 ||
-            baseType == HLSLBaseType_Half3x3 ||
-            baseType == HLSLBaseType_Half4x4 ||
-            baseType == HLSLBaseType_Half4x3 ||
-            baseType == HLSLBaseType_Half4x2;
-}
-
-inline bool IsScalarType( HLSLBaseType baseType )
-{
-	return  baseType == HLSLBaseType_Float ||
-			baseType == HLSLBaseType_Half ||
-			baseType == HLSLBaseType_Bool ||
-			baseType == HLSLBaseType_Int ||
-			baseType == HLSLBaseType_Uint;
-}
-
-inline bool IsVectorType( HLSLBaseType baseType )
-{
-	return  baseType == HLSLBaseType_Float2 ||
-		baseType == HLSLBaseType_Float3 ||
-		baseType == HLSLBaseType_Float4 ||
-		baseType == HLSLBaseType_Half2 ||
-		baseType == HLSLBaseType_Half3 ||
-		baseType == HLSLBaseType_Half4 ||
-		baseType == HLSLBaseType_Bool2 ||
-		baseType == HLSLBaseType_Bool3 ||
-		baseType == HLSLBaseType_Bool4 ||
-		baseType == HLSLBaseType_Int2  ||
-		baseType == HLSLBaseType_Int3  ||
-		baseType == HLSLBaseType_Int4  ||
-		baseType == HLSLBaseType_Uint2 ||
-		baseType == HLSLBaseType_Uint3 ||
-		baseType == HLSLBaseType_Uint4;
-}
-
-
+  
 enum HLSLBinaryOp
 {
+    // math ops
     HLSLBinaryOp_And,
     HLSLBinaryOp_Or,
     HLSLBinaryOp_Add,
     HLSLBinaryOp_Sub,
     HLSLBinaryOp_Mul,
     HLSLBinaryOp_Div,
+    
+    // comparison ops
     HLSLBinaryOp_Less,
     HLSLBinaryOp_Greater,
     HLSLBinaryOp_LessEqual,
     HLSLBinaryOp_GreaterEqual,
     HLSLBinaryOp_Equal,
     HLSLBinaryOp_NotEqual,
+    
+    // bit ops
     HLSLBinaryOp_BitAnd,
     HLSLBinaryOp_BitOr,
     HLSLBinaryOp_BitXor,
+    
+    // assign ops
     HLSLBinaryOp_Assign,
     HLSLBinaryOp_AddAssign,
     HLSLBinaryOp_SubAssign,
@@ -356,22 +320,6 @@ struct HLSLType
     HLSLAddressSpace    addressSpace;
 };
 
-inline bool IsSamplerType(const HLSLType & type)
-{
-    return IsSamplerType(type.baseType);
-}
-
-inline bool IsScalarType(const HLSLType & type)
-{
-	return IsScalarType(type.baseType);
-}
-
-inline bool IsVectorType(const HLSLType & type)
-{
-	return IsVectorType(type.baseType);
-}
-
-
 /** Base class for all nodes in the HLSL AST */
 struct HLSLNode
 {
@@ -994,5 +942,3 @@ extern bool EmulateAlphaTest(HLSLTree* tree, const char* entryName, float alphaR
 extern void FlattenExpressions(HLSLTree* tree);
     
 } // M4
-
-#endif
diff --git a/hlslparser/src/MSLGenerator.cpp b/hlslparser/src/MSLGenerator.cpp
index ba99aa5f..5575f036 100644
--- a/hlslparser/src/MSLGenerator.cpp
+++ b/hlslparser/src/MSLGenerator.cpp
@@ -7,11 +7,9 @@
 //
 //=============================================================================
 
-//#include "Engine/String.h"
-//#include "Engine/Log.h"
-#include "Engine.h"
-
 #include "MSLGenerator.h"
+
+#include "Engine.h"
 #include "HLSLParser.h"
 #include "HLSLTree.h"
 
@@ -29,59 +27,6 @@
 
 namespace M4
 {
-    /* unused
-    static const char* GetTypeName(const HLSLType& type)
-    {
-        switch (type.baseType)
-        {
-        case HLSLBaseType_Void:             return "void";
-        case HLSLBaseType_Float:            return "float";
-        case HLSLBaseType_Float2:           return "float2";
-        case HLSLBaseType_Float3:           return "float3";
-        case HLSLBaseType_Float4:           return "float4";
-        case HLSLBaseType_Float2x2:         return "float2x2";
-        case HLSLBaseType_Float3x3:         return "float3x3";
-        case HLSLBaseType_Float4x4:         return "float4x4";
-        case HLSLBaseType_Float4x3:         return "float3x4";
-        case HLSLBaseType_Float4x2:         return "float2x4";
-        case HLSLBaseType_Half:             return "half";
-        case HLSLBaseType_Half2:            return "half2";
-        case HLSLBaseType_Half3:            return "half3";
-        case HLSLBaseType_Half4:            return "half4";
-        case HLSLBaseType_Half2x2:          return "half2x2";
-        case HLSLBaseType_Half3x3:          return "half3x3";
-        case HLSLBaseType_Half4x4:          return "half4x4";
-        case HLSLBaseType_Half4x3:          return "half3x4";
-        case HLSLBaseType_Half4x2:          return "half2x4";
-        case HLSLBaseType_Bool:             return "bool";
-        case HLSLBaseType_Bool2:            return "bool2";
-        case HLSLBaseType_Bool3:            return "bool3";
-        case HLSLBaseType_Bool4:            return "bool4";
-        case HLSLBaseType_Int:              return "int";
-        case HLSLBaseType_Int2:             return "int2";
-        case HLSLBaseType_Int3:             return "int3";
-        case HLSLBaseType_Int4:             return "int4";
-        case HLSLBaseType_Uint:             return "uint";
-        case HLSLBaseType_Uint2:            return "uint2";
-        case HLSLBaseType_Uint3:            return "uint3";
-        case HLSLBaseType_Uint4:            return "uint4";
-        case HLSLBaseType_Texture:          return "texture";
-        case HLSLBaseType_Sampler:          return "sampler";
-            // ACoget-TODO: How to detect non-float textures, if relevant?
-        case HLSLBaseType_Sampler2D:        return "texture2d<float>";
-        case HLSLBaseType_Sampler3D:        return "texture3d<float>";
-        case HLSLBaseType_SamplerCube:      return "texturecube<float>";
-        case HLSLBaseType_Sampler2DShadow:  return "depth2d<float>";
-        case HLSLBaseType_Sampler2DMS:      return "texture2d_ms<float>";
-        case HLSLBaseType_Sampler2DArray:   return "texture2d_array<float>";
-        case HLSLBaseType_UserDefined:      return type.typeName;
-        default:
-            ASSERT(0);
-            return "<unknown type>";
-        }
-    }
-    */
-
     static void ParseSemantic(const char* semantic, unsigned int* outputLength, unsigned int* outputIndex)
     {
         const char* semanticIndex = semantic;
@@ -123,15 +68,7 @@ namespace M4
         return result;
     }
 
-    inline bool IsHalf(HLSLBaseType type)
-    {
-        return type >= HLSLBaseType_Half && type <= HLSLBaseType_Half4x2;
-    }
-
-    inline bool IsFloat(HLSLBaseType type)
-    {
-        return type >= HLSLBaseType_Float && type <= HLSLBaseType_Float4x2;
-    }
+    
 
 
     MSLGenerator::MSLGenerator()
@@ -394,427 +331,8 @@ namespace M4
         // Any special function stubs we need go here
         // That includes special constructors to emulate HLSL not being strict
         
-#if 1
         //Branch internally to HLSL vs. MSL verision
         m_writer.WriteLine(0, "#include \"ShaderMSL.h\"");
-#else
-/*
-        m_writer.WriteLine(0, "#include <metal_stdlib>");
-        m_writer.WriteLine(0, "using namespace metal;");
-        m_writer.WriteLine(0, "");
-
-        if (m_tree->NeedsFunction("mad"))
-        {
-            if (m_options.usePreciseFma) {
-                m_writer.WriteLine(0, "#define mad precise::fma");
-            }
-            else {
-                //if (!m_options.forceHalfPrecision)
-                {
-                    m_writer.WriteLine(0,
-R"(
-float mad(float a, float b, float c) {
-    return a * b + c;
-}
-float2 mad(float2 a, float2 b, float2 c) {
-    return a * b + c;
-}
-float3 mad(float3 a, float3 b, float3 c) {
-    return a * b + c;
-}
-float4 mad(float4 a, float4 b, float4 c) {
-    return a * b + c;
-}
-)");
-                }
-
-                if (!m_options.treatHalfAsFloat)
-                {
-                    m_writer.WriteLine(0,
-R"(
-half mad(half a, half b, half c) {
-    return a * b + c;
-}
-half2 mad(half2 a, half2 b, half2 c) {
-    return a * b + c;
-}
-half3 mad(half3 a, half3 b, half3 c) {
-    return a * b + c;
-}
-half4 mad(half4 a, half4 b, half4 c) {
-    return a * b + c;
-}
-)"
-                        );
-                }
-            }
-        }
-
-        // @@ These should not be needed anymore.
-//        if (m_tree->NeedsFunction("max"))
-//        {
-//        m_writer.WriteLine(0, "inline float max(int a, float b) {");
-//        m_writer.WriteLine(1, "return max((float)a, b);");
-//        m_writer.WriteLine(0, "}");
-//        m_writer.WriteLine(0, "inline float max(float a, int b) {");
-//        m_writer.WriteLine(1, "return max(a, (float)b);");
-//        m_writer.WriteLine(0, "}");
-//        }
-//        if (m_tree->NeedsFunction("min"))
-//        {
-//        m_writer.WriteLine(0, "inline float min(int a, float b) {");
-//        m_writer.WriteLine(1, "return min((float)a, b);");
-//        m_writer.WriteLine(0, "}");
-//        m_writer.WriteLine(0, "inline float min(float a, int b) {");
-//        m_writer.WriteLine(1, "return min(a, (float)b);");
-//        m_writer.WriteLine(0, "}");
-//        }
-
-        if (m_tree->NeedsFunction("lerp"))
-        {
-            //m_writer.WriteLine(0, "template<typename T, typename X> inline T mix(T a, T b, X x) {");
-            //m_writer.WriteLine(1, "return mix(a, b, (float)x);");
-            //m_writer.WriteLine(0, "}");
-            m_writer.WriteLine(0, "#define lerp mix");
-        }
-
-        if (m_tree->NeedsFunction("mul"))
-        {
-            const char* am = (m_options.flags & Flag_PackMatrixRowMajor) ? "m * a" : "a * m";
-            const char* ma = (m_options.flags & Flag_PackMatrixRowMajor) ? "a * m" : "m * a";
-
-            //if (!m_options.forceHalfPrecision)
-            {
-                // TODO: Support PackMatrixRowMajor for float3x4/float4x3
-               
-                m_writer.WriteLine(0,
-R"(
-inline float2 mul(float2 a, float2x2 m) { return %s; }
-inline float3 mul(float3 a, float3x3 m) { return %s; }
-inline float4 mul(float4 a, float4x4 m) { return %s; }
-
-inline float2 mul(float2x2 m, float2 a) { return %s; }
-inline float3 mul(float3x3 m, float3 a) { return %s; }
-inline float4 mul(float4x4 m, float4 a) { return %s; }
-
-inline float3 mul(float4 a, float3x4 m) { return a * m; }
-inline float2 mul(float4 a, float2x4 m) { return a * m; }
-)",
-                 am, am, am,
-                 ma, ma, ma );
-            }
-
-            if (!m_options.treatHalfAsFloat)
-            {
-                // TODO: Support PackMatrixRowMajor for half3x4/half4x3
-                
-                m_writer.WriteLine(0, R"(
-inline half2 mul(half2 a, half2x2 m) { return %s; }
-inline half3 mul(half3 a, half3x3 m) { return %s; }
-inline half4 mul(half4 a, half4x4 m) { return %s; }
-
-inline half2 mul(half2x2 m, half2 a) { return %s; }
-inline half3 mul(half3x3 m, half3 a) { return %s; }
-inline half4 mul(half4x4 m, half4 a) { return %s; }
-
-inline half3 mul(half4 a, half3x4 m) { return a * m; }
-inline half2 mul(half4 a, half2x4 m) { return a * m; }
-                 )",
-                am, am, am,
-                ma, ma, ma );
-            }
-
-        }
-
-        // @@ How do we know if these will be needed? We could write these after parsing the whole file and prepend them.
-// Alec, I've never needed these
-//        m_writer.WriteLine(0, "inline float4 column(float4x4 m, int i) {");
-//        m_writer.WriteLine(1, "return float4(m[0][i], m[1][i], m[2][i], m[3][i]);");
-//        m_writer.WriteLine(0, "}");
-//
-//        m_writer.WriteLine(0, "inline float3 column(float3x4 m, int i) {");
-//        m_writer.WriteLine(1, "return float3(m[0][i], m[1][i], m[2][i]);");
-//        m_writer.WriteLine(0, "}");
-//
-//        m_writer.WriteLine(0, "inline float2 column(float2x4 m, int i) {");
-//        m_writer.WriteLine(1, "return float2(m[0][i], m[1][i]);");
-//        m_writer.WriteLine(0, "}");
-//
-//        m_writer.WriteLine(0, "inline float4 set_column(thread float4x4& m, int i, float4 v) {");
-//        m_writer.WriteLine(1, "    m[0][i] = v.x; m[1][i] = v.y; m[2][i] = v.z; m[3][i] = v.w; return v;");
-//        m_writer.WriteLine(0, "}");
-//
-//        m_writer.WriteLine(0, "inline float3 set_column(thread float3x4& m, int i, float3 v) {");
-//        m_writer.WriteLine(1, "    m[0][i] = v.x; m[1][i] = v.y; m[2][i] = v.z; return v;");
-//        m_writer.WriteLine(0, "}");
-//
-//        m_writer.WriteLine(0, "inline float2 set_column(thread float2x4& m, int i, float2 v) {");
-//        m_writer.WriteLine(1, "    m[0][i] = v.x; m[1][i] = v.y; return v;");
-//        m_writer.WriteLine(0, "}");
-//
-//        m_writer.WriteLine(0, "inline float3x3 matrix_ctor(float4x4 m) {");
-//        m_writer.WriteLine(1, "    return float3x3(m[0].xyz, m[1].xyz, m[2].xyz);");
-//        m_writer.WriteLine(0, "}");
-
-
-        if (m_tree->NeedsFunction("clip"))
-        {
-            m_writer.WriteLine(0, R"(
-void clip(float x) {
-   if (x < 0.0) discard_fragment();
-}
-            )");
-            
-        }
-        
-        // TODO: this then needs preprocessor to splice the macro
-        if (m_tree->NeedsFunction("rcp"))
-        {
-            m_writer.WriteLine(0, "#define rcp recip");
-        }
-
-        if (m_tree->NeedsFunction("ddx")) m_writer.WriteLine(0, "#define ddx dfdx");
-        if (m_tree->NeedsFunction("ddy")) m_writer.WriteLine(0, "#define ddy dfdy");
-        if (m_tree->NeedsFunction("frac")) m_writer.WriteLine(0, "#define frac fract");
-
-        //m_writer.WriteLine(0, "#define mad fma");     // @@ This doesn't seem to work.
-
-        // This would be nice
-        //const char * samplerType = "float";
-        //if (m_options.halfTextureSamplers)
-        //{
-        //samplerType = "half";
-        //}
-        
-        //const char * intType = "int";
-        //const char * uintType = "uint";
-        
-//        if (m_options.use16BitIntegers) {
-//            intType = "short";
-//            uintType = "ushort";
-//        }
-
-        if (m_tree->NeedsFunction("tex2D") ||
-            m_tree->NeedsFunction("tex2Dlod") ||
-            m_tree->NeedsFunction("tex2Dgrad") ||
-            m_tree->NeedsFunction("tex2Dbias") ||
-            m_tree->NeedsFunction("tex2Dfetch"))
-        {
-            m_writer.WriteLine(0, R"(
-struct Texture2DSampler {
-    Texture2DSampler(thread const texture2d<float>& t, thread const sampler& s) : t(t), s(s) {};
-    const thread texture2d<float>& t;
-    const thread sampler& s;
-};
-                 )");
-
-            if (!m_options.treatHalfAsFloat) {
-                m_writer.WriteLine(0, R"(
-struct Texture2DHalfSampler {
-    Texture2DHalfSampler(thread const texture2d<half>& t, thread const sampler& s) : t(t), s(s) {};
-    const thread texture2d<half>& t;
-    const thread sampler& s;
-};
-                )");
-            }
-        }
-
-        if (m_tree->NeedsFunction("tex2D"))
-        {
-            m_writer.WriteLine(0, R"(
-inline float4 tex2D(Texture2DSampler ts, float2 texCoord) {
-   return ts.t.sample(ts.s, texCoord);
-}
-            )");
-
-            if (!m_options.treatHalfAsFloat)
-            {
-                m_writer.WriteLine(0, R"(
-inline half4 tex2D(Texture2DHalfSampler ts, float2 texCoord) {
-    return ts.t.sample(ts.s, texCoord);
-}
-)");
-            }
-        }
-        if (m_tree->NeedsFunction("tex2Dlod"))
-        {
-            m_writer.WriteLine(0, R"(
-float4 tex2Dlod(Texture2DSampler ts, float4 texCoordMip) {
-    return ts.t.sample(ts.s, texCoordMip.xy, level(texCoordMip.w));
-}
-                 )");
-
-            if (!m_options.treatHalfAsFloat)
-            {
-                m_writer.WriteLine(0, R"(
-half4 tex2Dlod(Texture2DHalfSampler ts, float4 texCoordMip) {
-    return ts.t.sample(ts.s, texCoordMip.xy, level(texCoordMip.w));
- }
-                 )");
-            }
-        }
-        if (m_tree->NeedsFunction("tex2Dgrad"))
-        {
-            m_writer.WriteLine(0, R"(
-float4 tex2Dgrad(Texture2DSampler ts, float2 texCoord, float2 gradx, float2 grady) {
-m_writer.WriteLine(1, "return ts.t.sample(ts.s, texCoord.xy, gradient2d(gradx, grady));
-}
-           )");
-        }
-        if (m_tree->NeedsFunction("tex2Dbias"))
-        {
-            m_writer.WriteLine(0, R"(
-float4 tex2Dbias(Texture2DSampler ts, float4 texCoordBias) {
-   return ts.t.sample(ts.s, texCoordBias.xy, bias(texCoordBias.w));
-}
-            )");
-
-            if (!m_options.treatHalfAsFloat)
-            {
-                m_writer.WriteLine(0, R"(
-half4 tex2Dbias(Texture2DHalfSampler ts, float4 texCoordBias) {
-    m_writer.WriteLine(1, "return ts.t.sample(ts.s, texCoordBias.xy, bias(texCoordBias.w));
-}
-                )");
-            }
-        }
-        if (m_tree->NeedsFunction("tex2Dfetch"))
-        {
-            // @@ not used? not tested?
-            m_writer.WriteLine(0, R"(
-float4 tex2Dfetch(Texture2DSampler ts, int2 texCoord) {
-    return ts.t.read((uint2)texCoord);
-}
-                )");
-        }
-
-        if (m_tree->NeedsFunction("tex3D") ||
-            m_tree->NeedsFunction("tex3Dlod"))
-        {
-            m_writer.WriteLine(0, R"(
-struct Texture3DSampler {
-Texture3DSampler(thread const texture3d<float>& t, thread const sampler& s) : t(t), s(s) {};
-const thread texture3d<float>& t;
-const thread sampler& s;
-};
-            )");
-        }
-
-        if (m_tree->NeedsFunction("tex3D"))
-        {
-            m_writer.WriteLine(0, R"(
-float4 tex3D(Texture3DSampler ts, float3 texCoord) {
-return ts.t.sample(ts.s, texCoord);
-}
-            )");
-        }
-        if (m_tree->NeedsFunction("tex3Dlod"))
-        {
-            m_writer.WriteLine(0, R"(
-float4 tex3Dlod(Texture3DSampler ts, float4 texCoordMip) {
-    return ts.t.sample(ts.s, texCoordMip.xyz, level(texCoordMip.w));
-}
-            )");
-        }
-
-        if (m_tree->NeedsFunction("texCUBE") ||
-            m_tree->NeedsFunction("texCUBElod") ||
-            m_tree->NeedsFunction("texCUBEbias"))
-        {
-            m_writer.WriteLine(0, R"(
-struct TextureCubeSampler {
-TextureCubeSampler(thread const texturecube<float>& t, thread const sampler& s) : t(t), s(s) {};
-const thread texturecube<float>& t;
-const thread sampler& s;
-};
-            
-            )");
-        }
-
-        if (m_tree->NeedsFunction("texCUBE"))
-        {
-            m_writer.WriteLine(0, R"(
-float4 texCUBE(TextureCubeSampler ts, float3 texCoord) {");
-    return ts.t.sample(ts.s, texCoord);");
-}
-             )");
-        }
-
-        if (m_tree->NeedsFunction("texCUBElod"))
-        {
-            m_writer.WriteLine(0, R"(
-float4 texCUBElod(TextureCubeSampler ts, float4 texCoordMip) {");
-    return ts.t.sample(ts.s, texCoordMip.xyz, level(texCoordMip.w));");
-}
-             )");
-        }
-
-        if (m_tree->NeedsFunction("texCUBEbias"))
-        {
-            m_writer.WriteLine(0, R"(
-float4 texCUBEbias(TextureCubeSampler ts, float4 texCoordBias) {
-    return ts.t.sample(ts.s, texCoordBias.xyz, bias(texCoordBias.w));
-}
-             )");
-        }
-
-        if (m_tree->NeedsFunction("tex2Dcmp"))
-        {
-            m_writer.WriteLine(0, R"(
-struct Texture2DShadowSampler {
-Texture2DShadowSampler(thread const depth2d<float>& t, thread const sampler& s) : t(t), s(s) {};
-const thread depth2d<float>& t;
-const thread sampler& s;
-};
-            )");
-
-            // iOS Metal requires that the sampler in sample_compare is a compile-time constant
-            if (m_options.flags & Flag_ConstShadowSampler)
-            {
-                m_writer.WriteLine(0, R"(
-float4 tex2Dcmp(Texture2DShadowSampler ts, float4 texCoordCompare) {
-constexpr sampler shadow_constant_sampler(mip_filter::none, min_filter::linear, mag_filter::linear, address::clamp_to_edge, compare_func::less);"
-return ts.t.sample_compare(shadow_constant_sampler, texCoordCompare.xy, texCoordCompare.z);
-}
-                )");
-            
-            }
-            else
-            {
-                m_writer.WriteLine(0, R"(
-float4 tex2Dcmp(Texture2DShadowSampler ts, float4 texCoordCompare) {
- return ts.t.sample_compare(ts.s, texCoordCompare.xy, texCoordCompare.z);
-}
-                )");
-            }
-        }
-
-        if (m_tree->NeedsFunction("tex2DMSfetch"))
-        {
-            m_writer.WriteLine(0, R"(
-float4 tex2DMSfetch(texture2d_ms<float> t, int2 texCoord, int sample) {
-  return t.read((uint2)texCoord, (uint)sample);
- }
-             )");
-        }
-
-        if (m_tree->NeedsFunction("tex2DArray"))
-        {
-            m_writer.WriteLine(0, R"(
-struct Texture2DArraySampler {
-const thread texture2d_array<float>& t;
-const thread sampler& s;
-Texture2DArraySampler(thread const texture2d_array<float>& t, thread const sampler& s) : t(t), s(s) {};
-}
-
-float4 tex2DArray(Texture2DArraySampler ts, float3 texCoord) {
- return ts.t.sample(ts.s, texCoord.xy, texCoord.z + 0.5); // 0.5 offset needed on nvidia gpus
-}
-            )");
-        }
- */
-#endif
-    
     }
 
     bool MSLGenerator::Generate(HLSLTree* tree, Target target, const char* entryName, const Options& options)
@@ -1570,11 +1088,12 @@ float4 tex2DArray(Texture2DArraySampler ts, float3 texCoord) {
 
         if (m_options.treatHalfAsFloat)
         {
-            if (IsHalf(targetType)) targetType = HLSLBaseType(targetType + HLSLBaseType_Float - HLSLBaseType_Half);
-            if (IsHalf(sourceType)) sourceType = HLSLBaseType(sourceType + HLSLBaseType_Float - HLSLBaseType_Half);
+            // TODO: use call to convert half back to float type
+            if (IsHalf(targetType)) targetType = HalfToFloatBaseType(targetType);
+            if (IsHalf(sourceType)) sourceType = HalfToFloatBaseType(sourceType );
         }
 
-        return targetType != sourceType && (BaseTypeDimension[targetType] == BaseTypeDimension[sourceType] || BaseTypeDimension[sourceType] == HLSLTypeDimension_Scalar);
+        return targetType != sourceType && (IsCoreTypeEqual(targetType, sourceType) || IsScalarType(sourceType));
     }
 
 
@@ -1758,6 +1277,7 @@ float4 tex2DArray(Texture2DArraySampler ts, float3 texCoord) {
             bool addParenthesis = NeedsParenthesis(expression, parentExpression);
             if (addParenthesis) m_writer.Write("(");
 
+            /* forcing use of column matrices, this column work isn't needed
             bool rewrite_assign = false;
             if (binaryExpression->binaryOp == HLSLBinaryOp_Assign && binaryExpression->expression1->nodeType == HLSLNodeType_ArrayAccess)
             {
@@ -1779,15 +1299,21 @@ float4 tex2DArray(Texture2DArraySampler ts, float3 texCoord) {
             }
 
             if (!rewrite_assign)
+            */
             {
                 if (IsArithmeticOp(binaryExpression->binaryOp) || IsLogicOp(binaryExpression->binaryOp))
                 {
                     // Do intermediate type promotion, without changing dimension:
                     HLSLType promotedType = binaryExpression->expression1->expressionType;
 
-                    if (ScalarBaseType[binaryExpression->expressionType.baseType] != ScalarBaseType[promotedType.baseType])
+                    // TODO: remove
+                    //if (ScalarBaseType[binaryExpression->expressionType.baseType] != ScalarBaseType[promotedType.baseType])
+                    if (!IsNumericTypeEqual(binaryExpression->expressionType.baseType, promotedType.baseType))
                     {
-                        promotedType.baseType = HLSLBaseType(ScalarBaseType[binaryExpression->expressionType.baseType] + BaseTypeDimension[promotedType.baseType] - 1);
+                        promotedType.baseType = PromoteType(binaryExpression->expressionType.baseType, promotedType.baseType);
+                        
+                        // TODO: remove
+                        //promotedType.baseType = HLSLBaseType(ScalarBaseType[binaryExpression->expressionType.baseType] + BaseTypeDimension[promotedType.baseType] - 1);
                     }
 
                     OutputTypedExpression(promotedType, binaryExpression->expression1, binaryExpression);
@@ -1833,9 +1359,15 @@ float4 tex2DArray(Texture2DArraySampler ts, float3 texCoord) {
                     // Do intermediate type promotion, without changing dimension:
                     HLSLType promotedType = binaryExpression->expression2->expressionType;
 
-                    if (ScalarBaseType[binaryExpression->expressionType.baseType] != ScalarBaseType[promotedType.baseType])
+                    // TODO: remove
+                    //if (ScalarBaseType[binaryExpression->expressionType.baseType] != ScalarBaseType[promotedType.baseType])
+                    if (!IsNumericTypeEqual(binaryExpression->expressionType.baseType, promotedType.baseType))
                     {
-                        promotedType.baseType = HLSLBaseType(ScalarBaseType[binaryExpression->expressionType.baseType] + BaseTypeDimension[promotedType.baseType] - 1);
+                        // This should only promote up (half->float, etc)
+                        promotedType.baseType = PromoteType(binaryExpression->expressionType.baseType, promotedType.baseType);
+                        
+                        // TODO: remove
+                        //promotedType.baseType = HLSLBaseType(ScalarBaseType[binaryExpression->expressionType.baseType] + BaseTypeDimension[promotedType.baseType] - 1);
                     }
 
                     OutputTypedExpression(promotedType, binaryExpression->expression2, binaryExpression);
@@ -2521,6 +2053,10 @@ float4 tex2DArray(Texture2DArraySampler ts, float3 texCoord) {
             // case HLSLBaseType_Half4x2: return HLSLBaseType_Float4x2;
             // case HLSLBaseType_Half4x3: return HLSLBaseType_Float4x3;
             case HLSLBaseType_Half4x4: return HLSLBaseType_Float4x4;
+                
+            default:
+               // do nothing;
+                break;
         }
         
         return type;
diff --git a/hlslparser/src/MSLGenerator.h b/hlslparser/src/MSLGenerator.h
index c60254df..53cc900a 100644
--- a/hlslparser/src/MSLGenerator.h
+++ b/hlslparser/src/MSLGenerator.h
@@ -1,5 +1,4 @@
-#ifndef MSL_GENERATOR_H
-#define MSL_GENERATOR_H
+#pragma once
 
 #include "CodeWriter.h"
 #include "HLSLTree.h"
@@ -134,4 +133,3 @@ class MSLGenerator
 
 } // M4
 
-#endif

From 9029e81f6aef48824a746fe5779fb6747491d190 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 4 Mar 2023 16:02:10 -0800
Subject: [PATCH 426/901] kram - update script to list spv assembly

ugh, glslc isn't generating half code, but DXC is from the same HLSL.  Didn't find an CLI argument for glslc to honor the "half" setting.  DXC sets -enable-16-bit-types.
---
 hlslparser/buildShaders.sh | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/hlslparser/buildShaders.sh b/hlslparser/buildShaders.sh
index ee487ef6..5bed7299 100755
--- a/hlslparser/buildShaders.sh
+++ b/hlslparser/buildShaders.sh
@@ -64,21 +64,21 @@ psargs+="-T ps_6_2 "
 # dxc only loads DXIL.dll on Windows
 #  https://www.wihlidal.com/blog/pipeline/2018-09-16-dxil-signing-post-compile/
 echo gen DXIL with dxc
-dxc ${vsargs} -E SkinningVS -Fo SkinningVS.dxil Skinning.hlsl
-dxc ${psargs} -E SkinningPS -Fo SkinningPS.dxil Skinning.hlsl
+dxc ${vsargs} -E SkinningVS -Fo Skinning.vert.dxil Skinning.hlsl
+dxc ${psargs} -E SkinningPS -Fo Skinning.vert.dxil Skinning.hlsl
 
 # 1.0,1.1,1.2 is spv1.1,1.3,1.5
 #echo gen SPIRV 1.0
-#dxc ${vsargs} -spirv -fspv-target-env=vulkan1.0 -E SkinningVS -Fo SkinningVS.vert.spv Skinning.hlsl
-#dxc ${psargs} -spirv -fspv-target-env=vulkan1.0 -E SkinningPS -Fo SkinningPS.frag.spv Skinning.hlsl
+#dxc ${vsargs} -spirv -fspv-target-env=vulkan1.0 -E SkinningVS -Fo Skinning.vert.spv Skinning.hlsl
+#dxc ${psargs} -spirv -fspv-target-env=vulkan1.0 -E SkinningPS -Fo Skinning.frag.spv Skinning.hlsl
 
 #echo gen SPIRV 1.1
-#dxc ${vsargs} -spirv -fspv-target-env=vulkan1.1 -E SkinningVS -Fo SkinningVS.vert.spv1 Skinning.hlsl
-#dxc ${psargs} -spirv -fspv-target-env=vulkan1.1 -E SkinningPS -Fo SkinningPS.frag.spv1 Skinning.hlsl
+#dxc ${vsargs} -spirv -fspv-target-env=vulkan1.1 -E SkinningVS -Fo Skinning.vert.spv1 Skinning.hlsl
+#dxc ${psargs} -spirv -fspv-target-env=vulkan1.1 -E SkinningPS -Fo Skinning.frag.spv1 Skinning.hlsl
 
 echo gen SPIRV 1.2 with dxc
-dxc ${vsargs} -spirv -fspv-target-env=vulkan1.2 -E SkinningVS -Fo SkinningVS.vert.spv2 Skinning.hlsl
-dxc ${psargs} -spirv -fspv-target-env=vulkan1.2 -E SkinningPS -Fo SkinningPS.frag.spv2 Skinning.hlsl
+dxc ${vsargs} -spirv -fspv-target-env=vulkan1.2 -E SkinningVS -Fo Skinning.vert.spv2 -Fc Skinning.vert.spv2.txt Skinning.hlsl
+dxc ${psargs} -spirv -fspv-target-env=vulkan1.2 -E SkinningPS -Fo Skinning.frag.spv2 -Fc Skinning.frag.spv2.txt Skinning.hlsl
 
 vsargs="-Os -fshader-stage=vert --target-env=vulkan1.2 "
 psargs="-Os -fshader-stage=vert --target-env=vulkan1.2 "
@@ -90,15 +90,15 @@ psargs="-Os -fshader-stage=vert --target-env=vulkan1.2 "
 
 # note: glsl has a preprocesor
 echo gen SPRIV 1.2 with glslc
-glslc ${vsargs} -fentry-point=SkinningVS Skinning.hlsl -o SkinningVS.vert.gspv2
-glslc ${psargs} -fentry-point=SkinningPS Skinning.hlsl -o SkinningPS.frag.gspv2
+glslc ${vsargs} -fentry-point=SkinningVS Skinning.hlsl -o Skinning.vert.gspv2
+glslc ${psargs} -fentry-point=SkinningPS Skinning.hlsl -o Skinning.frag.gspv2
 
 # TODO: need to enable half (float16_t) usage in spriv generated shaders
 # how to identify compliation is targeting Vulkan?
 
 # barely human readable spv assembly listing
-glslc -S ${vsargs} -fentry-point=SkinningVS Skinning.hlsl -o SkinningVS.vert.gspv2.txt
-glslc -S ${vsargs} -fentry-point=SkinningPS Skinning.hlsl -o SkinningPS.frag.gspv2.txt
+glslc -S ${vsargs} -fentry-point=SkinningVS Skinning.hlsl -o Skinning.vert.gspv2.txt
+glslc -S ${vsargs} -fentry-point=SkinningPS Skinning.hlsl -o Skinning.frag.gspv2.txt
 
 
 # TODO: need to group files into library/module

From 454958e04bc68beaea686600cc1dddb69cde7b23 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 4 Mar 2023 17:22:27 -0800
Subject: [PATCH 427/901] kram - more hlslparser cleanup

Use the typename in the table for numerics since it's the same for MSL/HLSL
---
 hlslparser/src/HLSLGenerator.cpp | 39 ++++-----------------------
 hlslparser/src/HLSLParser.cpp    | 21 ++++++++++++---
 hlslparser/src/HLSLParser.h      |  3 +++
 hlslparser/src/MSLGenerator.cpp  | 45 +++-----------------------------
 4 files changed, 30 insertions(+), 78 deletions(-)

diff --git a/hlslparser/src/HLSLGenerator.cpp b/hlslparser/src/HLSLGenerator.cpp
index cfc8faec..332d236e 100644
--- a/hlslparser/src/HLSLGenerator.cpp
+++ b/hlslparser/src/HLSLGenerator.cpp
@@ -18,43 +18,14 @@ namespace M4
 
 static const char* GetTypeName(const HLSLType& type)
 {
+    const char* name = GetNumericTypeName(type.baseType);
+    if (name)
+        return name;
+    
     switch (type.baseType)
     {
-    case HLSLBaseType_Void:         return "void";
-    case HLSLBaseType_Float:        return "float";
-    case HLSLBaseType_Float2:       return "float2";
-    case HLSLBaseType_Float3:       return "float3";
-    case HLSLBaseType_Float4:       return "float4";
-	case HLSLBaseType_Float2x2:     return "float2x2";
-    case HLSLBaseType_Float3x3:     return "float3x3";
-    case HLSLBaseType_Float4x4:     return "float4x4";
-    //case HLSLBaseType_Float4x3:     return "float4x3";
-    //case HLSLBaseType_Float4x2:     return "float4x2";
-            
-    // Half support, these were all returning float
-    case HLSLBaseType_Half:        return "half";
-    case HLSLBaseType_Half2:       return "half2";
-    case HLSLBaseType_Half3:       return "half3";
-    case HLSLBaseType_Half4:       return "half4";
-    case HLSLBaseType_Half2x2:     return "halfx2";
-    case HLSLBaseType_Half3x3:     return "half3x3";
-    case HLSLBaseType_Half4x4:     return "half4x4";
-    //case HLSLBaseType_Half4x3:     return "half4x3";
-    //case HLSLBaseType_Half4x2:     return "half4x2";
-            
-    case HLSLBaseType_Bool:         return "bool";
-	case HLSLBaseType_Bool2:        return "bool2";
-	case HLSLBaseType_Bool3:        return "bool3";
-	case HLSLBaseType_Bool4:        return "bool4";
-    case HLSLBaseType_Int:          return "int";
-    case HLSLBaseType_Int2:         return "int2";
-    case HLSLBaseType_Int3:         return "int3";
-    case HLSLBaseType_Int4:         return "int4";
-    case HLSLBaseType_Uint:         return "uint";
-    case HLSLBaseType_Uint2:        return "uint2";
-    case HLSLBaseType_Uint3:        return "uint3";
-    case HLSLBaseType_Uint4:        return "uint4";
     case HLSLBaseType_Texture:      return "texture";
+            
     case HLSLBaseType_Sampler:      return "sampler";
     case HLSLBaseType_Sampler2D:    return "sampler2D";
     case HLSLBaseType_Sampler3D:    return "sampler3D";
diff --git a/hlslparser/src/HLSLParser.cpp b/hlslparser/src/HLSLParser.cpp
index fc085f88..3aa138d9 100644
--- a/hlslparser/src/HLSLParser.cpp
+++ b/hlslparser/src/HLSLParser.cpp
@@ -152,6 +152,11 @@ bool IsTextureType(const HLSLType & type)
     return IsTextureType(type.baseType);
 }
 
+bool IsNumericType(HLSLBaseType baseType)
+{
+    return IsVectorType(baseType) || IsScalarType(baseType) || IsMatrixType(baseType);
+}
+
 HLSLBaseType NumericToBaseType(NumericType numericType)
 {
     HLSLBaseType baseType = HLSLBaseType_Unknown;
@@ -169,6 +174,16 @@ HLSLBaseType NumericToBaseType(NumericType numericType)
     return baseType;
 }
 
+const char* GetNumericTypeName(HLSLBaseType type)
+{
+    if (!IsNumericType(type))
+        return nullptr;
+    
+    // MSL/HLSL share the same type names
+    const auto& b = baseTypeDescriptions[type];
+    return b.typeName;
+}
+
 HLSLBaseType PromoteType(HLSLBaseType toType, HLSLBaseType type)
 {
     return HLSLBaseType(NumericToBaseType(baseTypeDescriptions[type].numericType) +
@@ -798,7 +813,7 @@ const int _binaryOpPriority[] =
 
 const BaseTypeDescription baseTypeDescriptions[HLSLBaseType_Count] = 
     {
-        { "unknown type",       CoreType_None, DimensionType_None, NumericType_NaN,        0, 0, 0, -1 },      // HLSLBaseType_Unknown
+        { "unknown",       CoreType_None, DimensionType_None, NumericType_NaN,        0, 0, 0, -1 },      // HLSLBaseType_Unknown
         { "void",               CoreType_Void, DimensionType_None, NumericType_NaN,        0, 0, 0, -1 },      // HLSLBaseType_Void
         
         { "float",              CoreType_Scalar, DimensionType_Scalar, NumericType_Float,       1, 0, 1,  0 },      // HLSLBaseType_Float
@@ -850,8 +865,8 @@ const BaseTypeDescription baseTypeDescriptions[HLSLBaseType_Count] =
         { "sampler2DMS",        CoreType_Sampler, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Sampler2DMS
         { "sampler2DArray",     CoreType_Sampler, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Sampler2DArray
         
-        { "user defined",       CoreType_Struct, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_UserDefined
-        { "expression",         CoreType_Expression, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 }       // HLSLBaseType_Expression
+        { "struct",             CoreType_Struct, DimensionType_None, NumericType_NaN,         1, 0, 0, -1 },      // HLSLBaseType_UserDefined
+        { "expression",         CoreType_Expression, DimensionType_None, NumericType_NaN,     1, 0, 0, -1 }       // HLSLBaseType_Expression
     };
 
 // IC: I'm not sure this table is right, but any errors should be caught by the backend compiler.
diff --git a/hlslparser/src/HLSLParser.h b/hlslparser/src/HLSLParser.h
index 880cb51b..d7aa4d61 100644
--- a/hlslparser/src/HLSLParser.h
+++ b/hlslparser/src/HLSLParser.h
@@ -159,6 +159,7 @@ extern bool IsMatrixType(HLSLBaseType baseType);
 extern bool IsVectorType(HLSLBaseType baseType);
 extern bool IsScalarType(HLSLBaseType baseType);
 extern bool IsTextureType(HLSLBaseType baseType);
+extern bool IsNumericType(HLSLBaseType baseType);
 
 extern bool IsCoreTypeEqual(HLSLBaseType lhsType, HLSLBaseType rhsType);
 extern bool IsNumericTypeEqual(HLSLBaseType lhsType, HLSLBaseType rhsType);
@@ -171,4 +172,6 @@ extern bool IsTextureType(const HLSLType& type);
 
 extern HLSLBaseType PromoteType(HLSLBaseType toType, HLSLBaseType type);
 
+const char* GetNumericTypeName(HLSLBaseType type);
+
 }
diff --git a/hlslparser/src/MSLGenerator.cpp b/hlslparser/src/MSLGenerator.cpp
index 5575f036..f0367811 100644
--- a/hlslparser/src/MSLGenerator.cpp
+++ b/hlslparser/src/MSLGenerator.cpp
@@ -2077,49 +2077,12 @@ namespace M4
             baseType = HalfToFloatBaseType(baseType);
         }
         
+        const char* name = GetNumericTypeName(baseType);
+        if (name)
+            return name;
+        
         switch (baseType)
         {
-        case HLSLBaseType_Void:             return "void";
-                
-        case HLSLBaseType_Float:            return "float";
-        case HLSLBaseType_Float2:           return "float2";
-        case HLSLBaseType_Float3:           return "float3";
-        case HLSLBaseType_Float4:           return "float4";
-        case HLSLBaseType_Float2x2:         return "float2x2";
-        case HLSLBaseType_Float3x3:         return "float3x3";
-        case HLSLBaseType_Float4x4:         return "float4x4";
-                
-        // TODO: This was flipping the rows/columns
-        //case HLSLBaseType_Float4x3:         return "float3x4";
-        //case HLSLBaseType_Float4x2:         return "float2x4";
-        
-        case HLSLBaseType_Half:             return "half";
-        case HLSLBaseType_Half2:            return "half2";
-        case HLSLBaseType_Half3:            return "half3";
-        case HLSLBaseType_Half4:            return "half4";
-        case HLSLBaseType_Half2x2:          return "half2x2";
-        case HLSLBaseType_Half3x3:          return "half3x3";
-        case HLSLBaseType_Half4x4:          return "half4x4";
-        
-        // TODO: This was flipping the rows/columns
-        //case HLSLBaseType_Half4x3:          return "half3x4";
-        //case HLSLBaseType_Half4x2:          return "half2x4";
-                
-        case HLSLBaseType_Bool:             return "bool";
-        case HLSLBaseType_Bool2:            return "bool2";
-        case HLSLBaseType_Bool3:            return "bool3";
-        case HLSLBaseType_Bool4:            return "bool4";
-                
-        case HLSLBaseType_Int:              return "int";
-        case HLSLBaseType_Int2:             return "int2";
-        case HLSLBaseType_Int3:             return "int3";
-        case HLSLBaseType_Int4:             return "int4";
-                
-        case HLSLBaseType_Uint:             return "uint";
-        case HLSLBaseType_Uint2:            return "uint2";
-        case HLSLBaseType_Uint3:            return "uint3";
-        case HLSLBaseType_Uint4:            return "uint4";
-            
         // Texture should cart the half vs. float
         case HLSLBaseType_Texture:          return "texture";
                 

From ac3feeef768064cd506f0142b59e22ef07319eaf Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 4 Mar 2023 18:03:24 -0800
Subject: [PATCH 428/901] kram - hlslparser build script for glslc

tried to enable -enable-16bit-types on glslc but the compile fails.  I suspect it's not handling half operations properly.  VS/PS compile identifies code that looks reasonable.

Note: buildShaders.sh now has this hardcoded to point to the newer SDK on my machine.  This is where dxc/glslc are
# TODO: consider putting in path
vulkanSDK="~/devref/vulkansdk/1.3.239.0/macOS/bin/"

Skinning.hlsl:24: error: '' : could not create assignment
Skinning.hlsl:24: error: 'declaration' : Expected
Skinning.hlsl: Skinning.hlsl(24): error at column 43, HLSL parsing failed.
---
 hlslparser/buildShaders.sh | 45 ++++++++++++++++++++++++--------------
 1 file changed, 29 insertions(+), 16 deletions(-)

diff --git a/hlslparser/buildShaders.sh b/hlslparser/buildShaders.sh
index 5bed7299..3530541b 100755
--- a/hlslparser/buildShaders.sh
+++ b/hlslparser/buildShaders.sh
@@ -5,9 +5,14 @@ mkdir -p out
 pushd out
 
 # display commands
-# set -x
+set -x
 
-app=../build/hlslparser/Build/Products/Release/hlslparser
+# TODO: consider putting in path
+vulkanSDK="~/devref/vulkansdk/1.3.239.0/macOS/bin/"
+
+appHlslparser=../build/hlslparser/Build/Products/Release/hlslparser
+appDxc=${vulkanSDK}dxc
+appGlslc=${vulkanSDK}glslc
 
 # copy over the headers that translate to MSL/HLSL
 cp ../shaders/ShaderMSL.h .
@@ -16,11 +21,11 @@ cp ../shaders/ShaderHLSL.h .
 
 # build the metal shaders
 echo gen MSL
-${app} -i ../shaders/Skinning.hlsl -o Skinning.metal
+${appHlslparser} -i ../shaders/Skinning.hlsl -o Skinning.metal
 
 # build the hlsl shaders
 echo gen HLSL
-${app} -i ../shaders/Skinning.hlsl -o Skinning.hlsl
+${appHlslparser} -i ../shaders/Skinning.hlsl -o Skinning.hlsl
 
 # see if HLSL compiles (requires macOS Vulkan install)
 # this will pull /usr/bin/local/dxc
@@ -64,25 +69,33 @@ psargs+="-T ps_6_2 "
 # dxc only loads DXIL.dll on Windows
 #  https://www.wihlidal.com/blog/pipeline/2018-09-16-dxil-signing-post-compile/
 echo gen DXIL with dxc
-dxc ${vsargs} -E SkinningVS -Fo Skinning.vert.dxil Skinning.hlsl
-dxc ${psargs} -E SkinningPS -Fo Skinning.vert.dxil Skinning.hlsl
+${appDxc} ${vsargs} -E SkinningVS -Fo Skinning.vert.dxil Skinning.hlsl
+${appDxc} ${psargs} -E SkinningPS -Fo Skinning.vert.dxil Skinning.hlsl
 
 # 1.0,1.1,1.2 is spv1.1,1.3,1.5
 #echo gen SPIRV 1.0
-#dxc ${vsargs} -spirv -fspv-target-env=vulkan1.0 -E SkinningVS -Fo Skinning.vert.spv Skinning.hlsl
-#dxc ${psargs} -spirv -fspv-target-env=vulkan1.0 -E SkinningPS -Fo Skinning.frag.spv Skinning.hlsl
+#${appDxc} ${vsargs} -spirv -fspv-target-env=vulkan1.0 -E SkinningVS -Fo Skinning.vert.spv Skinning.hlsl
+#${appDxc} ${psargs} -spirv -fspv-target-env=vulkan1.0 -E SkinningPS -Fo Skinning.frag.spv Skinning.hlsl
 
 #echo gen SPIRV 1.1
-#dxc ${vsargs} -spirv -fspv-target-env=vulkan1.1 -E SkinningVS -Fo Skinning.vert.spv1 Skinning.hlsl
-#dxc ${psargs} -spirv -fspv-target-env=vulkan1.1 -E SkinningPS -Fo Skinning.frag.spv1 Skinning.hlsl
+#${appDxc} ${vsargs} -spirv -fspv-target-env=vulkan1.1 -E SkinningVS -Fo Skinning.vert.spv1 Skinning.hlsl
+#${appDxc} ${psargs} -spirv -fspv-target-env=vulkan1.1 -E SkinningPS -Fo Skinning.frag.spv1 Skinning.hlsl
 
 echo gen SPIRV 1.2 with dxc
-dxc ${vsargs} -spirv -fspv-target-env=vulkan1.2 -E SkinningVS -Fo Skinning.vert.spv2 -Fc Skinning.vert.spv2.txt Skinning.hlsl
-dxc ${psargs} -spirv -fspv-target-env=vulkan1.2 -E SkinningPS -Fo Skinning.frag.spv2 -Fc Skinning.frag.spv2.txt Skinning.hlsl
+${appDxc} ${vsargs} -spirv -fspv-target-env=vulkan1.2 -E SkinningVS -Fo Skinning.vert.spv2 -Fc Skinning.vert.spv2.txt Skinning.hlsl
+${appDxc} ${psargs} -spirv -fspv-target-env=vulkan1.2 -E SkinningPS -Fo Skinning.frag.spv2 -Fc Skinning.frag.spv2.txt Skinning.hlsl
 
 vsargs="-Os -fshader-stage=vert --target-env=vulkan1.2 "
 psargs="-Os -fshader-stage=vert --target-env=vulkan1.2 "
 
+# turn on half/short/ushort support
+# glslc fails when I set this even on latest build.
+#   Skinning.hlsl:24: error: '' : could not create assignment
+#   Skinning.hlsl:24: error: 'declaration' : Expected
+#   Skinning.hlsl: Skinning.hlsl(24): error at column 43, HLSL parsing failed.
+# vsargs+="-fhlsl-16bit-types "
+# psargs+="-fhlsl-16bit-types "
+
 # see SPV_GOOGLE_hlsl_functionality1
 # -fhlsl_functionality1
 # -g source level debugging info
@@ -90,15 +103,15 @@ psargs="-Os -fshader-stage=vert --target-env=vulkan1.2 "
 
 # note: glsl has a preprocesor
 echo gen SPRIV 1.2 with glslc
-glslc ${vsargs} -fentry-point=SkinningVS Skinning.hlsl -o Skinning.vert.gspv2
-glslc ${psargs} -fentry-point=SkinningPS Skinning.hlsl -o Skinning.frag.gspv2
+${appGlslc} ${vsargs} -fentry-point=SkinningVS Skinning.hlsl -o Skinning.vert.gspv2
+${appGlslc} ${psargs} -fentry-point=SkinningPS Skinning.hlsl -o Skinning.frag.gspv2
 
 # TODO: need to enable half (float16_t) usage in spriv generated shaders
 # how to identify compliation is targeting Vulkan?
 
 # barely human readable spv assembly listing
-glslc -S ${vsargs} -fentry-point=SkinningVS Skinning.hlsl -o Skinning.vert.gspv2.txt
-glslc -S ${vsargs} -fentry-point=SkinningPS Skinning.hlsl -o Skinning.frag.gspv2.txt
+${appGlslc} -S ${vsargs} -fentry-point=SkinningVS Skinning.hlsl -o Skinning.vert.gspv2.txt
+${appGlslc} -S ${psargs} -fentry-point=SkinningPS Skinning.hlsl -o Skinning.frag.gspv2.txt
 
 
 # TODO: need to group files into library/module

From 0373b2a7d930e3830951a3fc0ab833d84f22f48e Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 4 Mar 2023 18:19:31 -0800
Subject: [PATCH 429/901] kram - fix buildShaders.sh ref to home dir

---
 hlslparser/buildShaders.sh | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/hlslparser/buildShaders.sh b/hlslparser/buildShaders.sh
index 3530541b..61ed73c5 100755
--- a/hlslparser/buildShaders.sh
+++ b/hlslparser/buildShaders.sh
@@ -8,11 +8,14 @@ pushd out
 set -x
 
 # TODO: consider putting in path
-vulkanSDK="~/devref/vulkansdk/1.3.239.0/macOS/bin/"
+# note bash can't expand tilda, so using HOME instead
+vulkanSDK="${HOME}/devref/vulkansdk/1.3.239.0/macOS/bin/"
 
 appHlslparser=../build/hlslparser/Build/Products/Release/hlslparser
-appDxc=${vulkanSDK}dxc
-appGlslc=${vulkanSDK}glslc
+appDxc=${vulkanSDK}
+appGlslc=${vulkanSDK}
+appDxc+="dxc"
+appGlslc+="glslc"
 
 # copy over the headers that translate to MSL/HLSL
 cp ../shaders/ShaderMSL.h .

From e824a2b74841ca118896f98b5759a9fc1978a919 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 4 Mar 2023 19:09:16 -0800
Subject: [PATCH 430/901] kram - fix hlslparser hlsl half read/write

This was failing to mark 1.0h has a half literal, so then it was never printed out as 1.0h.
---
 hlslparser/buildShaders.sh                      | 12 +++++++++++-
 hlslparser/hlslparser.xcodeproj/project.pbxproj |  2 +-
 hlslparser/src/HLSLGenerator.cpp                |  4 +++-
 hlslparser/src/HLSLTokenizer.cpp                |  6 ++++--
 4 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/hlslparser/buildShaders.sh b/hlslparser/buildShaders.sh
index 61ed73c5..9d715cc0 100755
--- a/hlslparser/buildShaders.sh
+++ b/hlslparser/buildShaders.sh
@@ -5,7 +5,7 @@ mkdir -p out
 pushd out
 
 # display commands
-set -x
+# set -x
 
 # TODO: consider putting in path
 # note bash can't expand tilda, so using HOME instead
@@ -96,6 +96,7 @@ psargs="-Os -fshader-stage=vert --target-env=vulkan1.2 "
 #   Skinning.hlsl:24: error: '' : could not create assignment
 #   Skinning.hlsl:24: error: 'declaration' : Expected
 #   Skinning.hlsl: Skinning.hlsl(24): error at column 43, HLSL parsing failed.
+# seems that dot, min, max and other calls don't have half3 versions needed
 # vsargs+="-fhlsl-16bit-types "
 # psargs+="-fhlsl-16bit-types "
 
@@ -125,6 +126,15 @@ ${appGlslc} -S ${psargs} -fentry-point=SkinningPS Skinning.hlsl -o Skinning.frag
 
 # here are flags to use
 
+# dxc can output reflection directly
+# -Fre <file>             Output reflection to the given file
+  
+# may not need this if doing dxil output, then -Fo might gen dxil asm listing
+# -Cc  color-coded assmebly listing
+  
+# -remove-unused-functions Remove unused functions and types
+# -remove-unused-globals  Remove unused static globals and functions
+  
 # add reflect data
 # -fspv-reflect
 
diff --git a/hlslparser/hlslparser.xcodeproj/project.pbxproj b/hlslparser/hlslparser.xcodeproj/project.pbxproj
index c058feb3..fed2afab 100644
--- a/hlslparser/hlslparser.xcodeproj/project.pbxproj
+++ b/hlslparser/hlslparser.xcodeproj/project.pbxproj
@@ -92,9 +92,9 @@
 			children = (
 				70235C3729B303AA00909C95 /* buildShaders.sh */,
 				70235C5429B3260100909C95 /* README.md */,
+				702A2B7B29AA942D007D9A99 /* shaders */,
 				70235C3829B3041E00909C95 /* out */,
 				70235C3929B3145200909C95 /* src */,
-				702A2B7B29AA942D007D9A99 /* shaders */,
 				702A2B5A29A49DC8007D9A99 /* Products */,
 			);
 			sourceTree = "<group>";
diff --git a/hlslparser/src/HLSLGenerator.cpp b/hlslparser/src/HLSLGenerator.cpp
index 332d236e..f2b03b3b 100644
--- a/hlslparser/src/HLSLGenerator.cpp
+++ b/hlslparser/src/HLSLGenerator.cpp
@@ -368,12 +368,14 @@ void HLSLGenerator::OutputExpression(HLSLExpression* expression)
                 char buffer[64];
                 String_FormatFloat(buffer, sizeof(buffer), literalExpression->fValue);
                 String_StripTrailingFloatZeroes(buffer);
-                m_writer.Write("%s", buffer);
+                m_writer.Write("%s%s", buffer, literalExpression->type == HLSLBaseType_Half ? "h" : "" );
             }
             break;        
         case HLSLBaseType_Int:
             m_writer.Write("%d", literalExpression->iValue);
             break;
+        // TODO: missing uint?
+                
         case HLSLBaseType_Bool:
             m_writer.Write("%s", literalExpression->bValue ? "true" : "false");
             break;
diff --git a/hlslparser/src/HLSLTokenizer.cpp b/hlslparser/src/HLSLTokenizer.cpp
index aaebb6aa..ecaf7bf4 100644
--- a/hlslparser/src/HLSLTokenizer.cpp
+++ b/hlslparser/src/HLSLTokenizer.cpp
@@ -399,15 +399,17 @@ bool HLSLTokenizer::ScanNumber()
 
     // If the character after the number is an f then the f is treated as part
     // of the number (to handle 1.0f syntax).
+    bool isHalf = false;
 	if( ( fEnd[ 0 ] == 'f' || fEnd[ 0 ] == 'h' ) && fEnd < m_bufferEnd )
 	{
-		++fEnd;
+        isHalf = fEnd[ 0 ] == 'h';
+        ++fEnd;
 	}
 
 	if( fEnd > iEnd && GetIsNumberSeparator( fEnd[ 0 ] ) )
 	{
 		m_buffer = fEnd;
-		m_token = fEnd[ 0 ] == 'h' ? HLSLToken_HalfLiteral : HLSLToken_FloatLiteral;
+        m_token = ( isHalf || fEnd[ 0 ] == 'h' ) ? HLSLToken_HalfLiteral : HLSLToken_FloatLiteral;
         m_fValue = static_cast<float>(fValue);
         return true;
     }

From 30f9c7e3f69d54d01f2b2d33e3b080d7797c0e7b Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 4 Mar 2023 19:27:21 -0800
Subject: [PATCH 431/901] kram - fix glslc compile

The half support in glslc seems less than ideal.  Had to modify the shader to workaround cryptic errors from glslc.  DXC parses the original shader correctly.
---
 hlslparser/buildShaders.sh       |  4 ++--
 hlslparser/shaders/Skinning.hlsl | 17 +++++++++++++----
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/hlslparser/buildShaders.sh b/hlslparser/buildShaders.sh
index 9d715cc0..a6c27f22 100755
--- a/hlslparser/buildShaders.sh
+++ b/hlslparser/buildShaders.sh
@@ -97,8 +97,8 @@ psargs="-Os -fshader-stage=vert --target-env=vulkan1.2 "
 #   Skinning.hlsl:24: error: 'declaration' : Expected
 #   Skinning.hlsl: Skinning.hlsl(24): error at column 43, HLSL parsing failed.
 # seems that dot, min, max and other calls don't have half3 versions needed
-# vsargs+="-fhlsl-16bit-types "
-# psargs+="-fhlsl-16bit-types "
+vsargs+="-fhlsl-16bit-types "
+psargs+="-fhlsl-16bit-types "
 
 # see SPV_GOOGLE_hlsl_functionality1
 # -fhlsl_functionality1
diff --git a/hlslparser/shaders/Skinning.hlsl b/hlslparser/shaders/Skinning.hlsl
index 5105e4c8..2fa85749 100644
--- a/hlslparser/shaders/Skinning.hlsl
+++ b/hlslparser/shaders/Skinning.hlsl
@@ -80,8 +80,13 @@ OutputVS SkinningVS(InputVS input)
         
     // Output stuff
     output.position = mul(float4(position, 1.0), worldToClipTfm);
-    output.diffuse  = dot(lightDir, normal);
-    output.uv = input.uv;
+  
+    // glslc fix
+    output.diffuse = half( dot(lightDir, normal) );
+    // DXC
+    // output.diffuse = dot(lightDir, normal);
+
+   output.uv = input.uv;
 
     return output;
 }
@@ -117,8 +122,12 @@ OutputPS SkinningPS(InputPS input)
     // half4 color = tex.Sample(pointSampler, input.uv);
     
     // just to test min3 support
-    color.rgb = min3(color.r, color.g, color.b);
-    
+    // glslc fix
+    half c = half( min3(color.r, color.g, color.b) );
+    color.rgb = half3(c,c,c); // can't use half3(c)!
+    // DXC
+    // color.rgb = min3(color.r, color.g, color.b)
+
     color.rgb *= input.diffuse;
     output.color = color;
 

From f65243a865cec8183be855178a180bbe0cd038d8 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 4 Mar 2023 20:35:25 -0800
Subject: [PATCH 432/901] kram - turn off glslc path in buildShaders, put back
 in simple HLSL sources

Also always honor the half/float type on sampler declarations.  There is 16f depth on iOS added recently.
---
 hlslparser/buildShaders.sh       | 69 ++++++++++++++++----------------
 hlslparser/shaders/Skinning.hlsl | 10 ++---
 hlslparser/src/HLSLTokenizer.cpp |  3 ++
 hlslparser/src/HLSLTokenizer.h   |  1 +
 hlslparser/src/MSLGenerator.cpp  |  6 ++-
 5 files changed, 48 insertions(+), 41 deletions(-)

diff --git a/hlslparser/buildShaders.sh b/hlslparser/buildShaders.sh
index a6c27f22..259fe837 100755
--- a/hlslparser/buildShaders.sh
+++ b/hlslparser/buildShaders.sh
@@ -72,8 +72,8 @@ psargs+="-T ps_6_2 "
 # dxc only loads DXIL.dll on Windows
 #  https://www.wihlidal.com/blog/pipeline/2018-09-16-dxil-signing-post-compile/
 echo gen DXIL with dxc
-${appDxc} ${vsargs} -E SkinningVS -Fo Skinning.vert.dxil Skinning.hlsl
-${appDxc} ${psargs} -E SkinningPS -Fo Skinning.vert.dxil Skinning.hlsl
+${appDxc} ${vsargs} -E SkinningVS -Fo Skinning.vert.dxil -Fc Skinning.vert.dxil.txt Skinning.hlsl
+${appDxc} ${psargs} -E SkinningPS -Fo Skinning.vert.dxil -Fc Skinning.frag.dxil.txt Skinning.hlsl
 
 # 1.0,1.1,1.2 is spv1.1,1.3,1.5
 #echo gen SPIRV 1.0
@@ -88,49 +88,50 @@ echo gen SPIRV 1.2 with dxc
 ${appDxc} ${vsargs} -spirv -fspv-target-env=vulkan1.2 -E SkinningVS -Fo Skinning.vert.spv2 -Fc Skinning.vert.spv2.txt Skinning.hlsl
 ${appDxc} ${psargs} -spirv -fspv-target-env=vulkan1.2 -E SkinningPS -Fo Skinning.frag.spv2 -Fc Skinning.frag.spv2.txt Skinning.hlsl
 
-vsargs="-Os -fshader-stage=vert --target-env=vulkan1.2 "
-psargs="-Os -fshader-stage=vert --target-env=vulkan1.2 "
-
-# turn on half/short/ushort support
-# glslc fails when I set this even on latest build.
-#   Skinning.hlsl:24: error: '' : could not create assignment
-#   Skinning.hlsl:24: error: 'declaration' : Expected
-#   Skinning.hlsl: Skinning.hlsl(24): error at column 43, HLSL parsing failed.
-# seems that dot, min, max and other calls don't have half3 versions needed
-vsargs+="-fhlsl-16bit-types "
-psargs+="-fhlsl-16bit-types "
-
-# see SPV_GOOGLE_hlsl_functionality1
-# -fhlsl_functionality1
-# -g source level debugging info
-# -I include search path
-
-# note: glsl has a preprocesor
-echo gen SPRIV 1.2 with glslc
-${appGlslc} ${vsargs} -fentry-point=SkinningVS Skinning.hlsl -o Skinning.vert.gspv2
-${appGlslc} ${psargs} -fentry-point=SkinningPS Skinning.hlsl -o Skinning.frag.gspv2
-
-# TODO: need to enable half (float16_t) usage in spriv generated shaders
-# how to identify compliation is targeting Vulkan?
-
-# barely human readable spv assembly listing
-${appGlslc} -S ${vsargs} -fentry-point=SkinningVS Skinning.hlsl -o Skinning.vert.gspv2.txt
-${appGlslc} -S ${psargs} -fentry-point=SkinningPS Skinning.hlsl -o Skinning.frag.gspv2.txt
-
+# skip this path, have to mod hlsl just to get valid code to compile with glslc
+testGlslc=0
+
+if [[ $testGlslc -eq 1 ]]; then
+    vsargs="-Os -fshader-stage=vert --target-env=vulkan1.2 "
+    psargs="-Os -fshader-stage=vert --target-env=vulkan1.2 "
+
+    # turn on half/short/ushort support
+    # TODO: seems that dot, min, max and other calls don't have half3 versions needed, casts required
+    # and even half3(half) isn't valid.
+    # https://github.com/google/shaderc/issues/1309
+    vsargs+="-fhlsl-16bit-types "
+    psargs+="-fhlsl-16bit-types "
+
+    # see SPV_GOOGLE_hlsl_functionality1
+    # -fhlsl_functionality1
+    # -g source level debugging info
+    # -I include search path
+    # note: glsl has a preprocesor
+    
+    echo gen SPRIV 1.2 with glslc
+    ${appGlslc} ${vsargs} -fentry-point=SkinningVS Skinning.hlsl -o Skinning.vert.gspv2
+    ${appGlslc} ${psargs} -fentry-point=SkinningPS Skinning.hlsl -o Skinning.frag.gspv2
+
+    # TODO: need to enable half (float16_t) usage in spriv generated shaders
+    # how to identify compliation is targeting Vulkan?
+
+    # barely human readable spv assembly listing
+    ${appGlslc} -S ${vsargs} -fentry-point=SkinningVS Skinning.hlsl -o Skinning.vert.gspv2.txt
+    ${appGlslc} -S ${psargs} -fentry-point=SkinningPS Skinning.hlsl -o Skinning.frag.gspv2.txt
+fi
 
 # TODO: need to group files into library/module
 # also create a readable spv file, so can look through that
 
-
 # TODO: create reflect data w/o needing spriv
 
-# here are flags to use
+# here are flags to use w/DXC
 
 # dxc can output reflection directly
 # -Fre <file>             Output reflection to the given file
   
 # may not need this if doing dxil output, then -Fo might gen dxil asm listing
-# -Cc  color-coded assmebly listing
+# -Cc  color-coded assembly listing
   
 # -remove-unused-functions Remove unused functions and types
 # -remove-unused-globals  Remove unused static globals and functions
diff --git a/hlslparser/shaders/Skinning.hlsl b/hlslparser/shaders/Skinning.hlsl
index 2fa85749..1965ccde 100644
--- a/hlslparser/shaders/Skinning.hlsl
+++ b/hlslparser/shaders/Skinning.hlsl
@@ -82,9 +82,9 @@ OutputVS SkinningVS(InputVS input)
     output.position = mul(float4(position, 1.0), worldToClipTfm);
   
     // glslc fix
-    output.diffuse = half( dot(lightDir, normal) );
+    // output.diffuse = half( dot(lightDir, normal) );
     // DXC
-    // output.diffuse = dot(lightDir, normal);
+    output.diffuse = dot(lightDir, normal);
 
    output.uv = input.uv;
 
@@ -123,10 +123,10 @@ OutputPS SkinningPS(InputPS input)
     
     // just to test min3 support
     // glslc fix
-    half c = half( min3(color.r, color.g, color.b) );
-    color.rgb = half3(c,c,c); // can't use half3(c)!
+    //half c = half( min3(color.r, color.g, color.b) );
+    //color.rgb = half3(c,c,c); // can't use half3(c)!
     // DXC
-    // color.rgb = min3(color.r, color.g, color.b)
+    color.rgb = min3(color.r, color.g, color.b);
 
     color.rgb *= input.diffuse;
     output.color = color;
diff --git a/hlslparser/src/HLSLTokenizer.cpp b/hlslparser/src/HLSLTokenizer.cpp
index ecaf7bf4..67150c16 100644
--- a/hlslparser/src/HLSLTokenizer.cpp
+++ b/hlslparser/src/HLSLTokenizer.cpp
@@ -642,6 +642,8 @@ void HLSLTokenizer::GetTokenName(int token, char buffer[s_maxIdentifier])
         case HLSLToken_DivideEqual:
             strcpy(buffer, "/=");
             break;
+        
+        // literals (need uint?)
 		case HLSLToken_HalfLiteral:
 			strcpy( buffer, "half" );
 			break;
@@ -651,6 +653,7 @@ void HLSLTokenizer::GetTokenName(int token, char buffer[s_maxIdentifier])
         case HLSLToken_IntLiteral:
             strcpy(buffer, "int");
             break;
+                
         case HLSLToken_Identifier:
             strcpy(buffer, "identifier");
             break;
diff --git a/hlslparser/src/HLSLTokenizer.h b/hlslparser/src/HLSLTokenizer.h
index 58f8471b..6c12420b 100644
--- a/hlslparser/src/HLSLTokenizer.h
+++ b/hlslparser/src/HLSLTokenizer.h
@@ -98,6 +98,7 @@ enum HLSLToken
     HLSLToken_FloatLiteral,
 	HLSLToken_HalfLiteral,
     HLSLToken_IntLiteral,
+    
     HLSLToken_Identifier,
     HLSLToken_Comment,          //
     
diff --git a/hlslparser/src/MSLGenerator.cpp b/hlslparser/src/MSLGenerator.cpp
index f0367811..bf6186e1 100644
--- a/hlslparser/src/MSLGenerator.cpp
+++ b/hlslparser/src/MSLGenerator.cpp
@@ -865,16 +865,18 @@ namespace M4
             }
             else if (declaration->type.baseType == HLSLBaseType_Sampler2DShadow)
             {
-                m_writer.Write("thread depth2d<float>& %s_texture;", declaration->name);
+                // Note: ios has 16f depth now, so don't assume float
+                m_writer.Write("thread depth2d<%s>& %s_texture;", typeName,declaration->name);
                 m_writer.Write("thread sampler& %s_sampler", declaration->name);
             }
             else if (declaration->type.baseType == HLSLBaseType_Sampler2DMS)
             {
+                // no sampler, just Load samples
                 m_writer.Write("thread texture2d_ms<%s>& %s_texture;", typeName, declaration->name);
             }
             else if (declaration->type.baseType == HLSLBaseType_Sampler2DArray)
             {
-                m_writer.Write("thread texture2d_array<float>& %s_texture;", declaration->name);
+                m_writer.Write("thread texture2d_array<%s>& %s_texture;", typeName, declaration->name);
                 m_writer.Write("thread sampler& %s_sampler", declaration->name);
             }
             else

From eeb49d43e7e38ba48368b012f947d00207f6758b Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 4 Mar 2023 21:09:24 -0800
Subject: [PATCH 433/901] kram - update buildShaders.sh

Get this to gen reflection data, but it seems to be in a myriad of formats.   At least with spriv transpiles, can spriv-reflect the spv files themselves.
Need to show a comparison of transpiled MSL vs. what hlslparser generates.

TODO: get // comments preserved and over to the .metal/.hlsl file
TODO: Syntax highlighting not working in Xcode for HLSL or for most of the generated .metal file.  Xcode's the worst for syntax highlighting of any file they don't want to deal with.   VSCode does quite nicely with HLSL highlighting.
---
 hlslparser/buildShaders.sh       | 52 ++++++++++++++++++++------------
 hlslparser/src/HLSLGenerator.cpp |  2 ++
 2 files changed, 34 insertions(+), 20 deletions(-)

diff --git a/hlslparser/buildShaders.sh b/hlslparser/buildShaders.sh
index 259fe837..183658c3 100755
--- a/hlslparser/buildShaders.sh
+++ b/hlslparser/buildShaders.sh
@@ -2,6 +2,15 @@
 
 mkdir -p out
 
+mkdir -p out/macos
+mkdir -p out/win
+mkdir -p out/android
+
+# mkdir -p out/ios
+
+# for glslc testing
+#mkdir -p out/android2
+
 pushd out
 
 # display commands
@@ -14,14 +23,15 @@ vulkanSDK="${HOME}/devref/vulkansdk/1.3.239.0/macOS/bin/"
 appHlslparser=../build/hlslparser/Build/Products/Release/hlslparser
 appDxc=${vulkanSDK}
 appGlslc=${vulkanSDK}
+appSprivReflect=${vulkanSDK}
 appDxc+="dxc"
 appGlslc+="glslc"
+appSprivReflect+="spirv-reflect"
 
 # copy over the headers that translate to MSL/HLSL
 cp ../shaders/ShaderMSL.h .
 cp ../shaders/ShaderHLSL.h .
 
-
 # build the metal shaders
 echo gen MSL
 ${appHlslparser} -i ../shaders/Skinning.hlsl -o Skinning.metal
@@ -36,8 +46,12 @@ ${appHlslparser} -i ../shaders/Skinning.hlsl -o Skinning.hlsl
 
 
 # see if MSL compile
-echo compile MSL
-xcrun -sdk macosx metal Skinning.metal -o Skinning.metallib
+echo compile MSL for macOS
+xcrun -sdk macosx metal Skinning.metal -o macos/Skinning.metallib
+
+#echo compile MSL for iOS
+#xcrun -sdk macosx metal Skinning.metal -o ios/Skinning.metallib
+
 
 args="-nologo "
 
@@ -71,22 +85,20 @@ psargs+="-T ps_6_2 "
 # see this garbage here.  Can only sign dxil on Windows.
 # dxc only loads DXIL.dll on Windows
 #  https://www.wihlidal.com/blog/pipeline/2018-09-16-dxil-signing-post-compile/
+# no idea what format the refl file from dxil is?
 echo gen DXIL with dxc
-${appDxc} ${vsargs} -E SkinningVS -Fo Skinning.vert.dxil -Fc Skinning.vert.dxil.txt Skinning.hlsl
-${appDxc} ${psargs} -E SkinningPS -Fo Skinning.vert.dxil -Fc Skinning.frag.dxil.txt Skinning.hlsl
-
-# 1.0,1.1,1.2 is spv1.1,1.3,1.5
-#echo gen SPIRV 1.0
-#${appDxc} ${vsargs} -spirv -fspv-target-env=vulkan1.0 -E SkinningVS -Fo Skinning.vert.spv Skinning.hlsl
-#${appDxc} ${psargs} -spirv -fspv-target-env=vulkan1.0 -E SkinningPS -Fo Skinning.frag.spv Skinning.hlsl
-
-#echo gen SPIRV 1.1
-#${appDxc} ${vsargs} -spirv -fspv-target-env=vulkan1.1 -E SkinningVS -Fo Skinning.vert.spv1 Skinning.hlsl
-#${appDxc} ${psargs} -spirv -fspv-target-env=vulkan1.1 -E SkinningPS -Fo Skinning.frag.spv1 Skinning.hlsl
+${appDxc} ${vsargs} -E SkinningVS -Fo win/Skinning.vert.dxil -Fc win/Skinning.vert.dxil.txt -Fre win/Skinning.vert.refl Skinning.hlsl
+${appDxc} ${psargs} -E SkinningPS -Fo win/Skinning.frag.dxil -Fc win/Skinning.frag.dxil.txt -Fre win/Skinning.frag.refl Skinning.hlsl
 
+# 1.0,1.1,1.2 default to spv1.1,1.3,1.5
 echo gen SPIRV 1.2 with dxc
-${appDxc} ${vsargs} -spirv -fspv-target-env=vulkan1.2 -E SkinningVS -Fo Skinning.vert.spv2 -Fc Skinning.vert.spv2.txt Skinning.hlsl
-${appDxc} ${psargs} -spirv -fspv-target-env=vulkan1.2 -E SkinningPS -Fo Skinning.frag.spv2 -Fc Skinning.frag.spv2.txt Skinning.hlsl
+${appDxc} ${vsargs} -spirv -fspv-target-env=vulkan1.2 -E SkinningVS -Fo android/Skinning.vert.spv -Fc android/Skinning.vert.spv.txt Skinning.hlsl
+${appDxc} ${psargs} -spirv -fspv-target-env=vulkan1.2 -E SkinningPS -Fo android/Skinning.frag.spv -Fc android/Skinning.frag.spv.txt Skinning.hlsl
+
+# -Fre not supported with spriv, so just use spirv-reflect
+# either yaml or random format, why can't this just output json?
+${appSprivReflect} -y android/Skinning.vert.spv > android/Skinning.vert.refl
+${appSprivReflect} -y android/Skinning.frag.spv > android/Skinning.frag.refl
 
 # skip this path, have to mod hlsl just to get valid code to compile with glslc
 testGlslc=0
@@ -109,15 +121,15 @@ if [[ $testGlslc -eq 1 ]]; then
     # note: glsl has a preprocesor
     
     echo gen SPRIV 1.2 with glslc
-    ${appGlslc} ${vsargs} -fentry-point=SkinningVS Skinning.hlsl -o Skinning.vert.gspv2
-    ${appGlslc} ${psargs} -fentry-point=SkinningPS Skinning.hlsl -o Skinning.frag.gspv2
+    ${appGlslc} ${vsargs} -fentry-point=SkinningVS -o android2/Skinning.vert.spv Skinning.hlsl
+    ${appGlslc} ${psargs} -fentry-point=SkinningPS -o android2/Skinning.frag.spv Skinning.hlsl
 
     # TODO: need to enable half (float16_t) usage in spriv generated shaders
     # how to identify compliation is targeting Vulkan?
 
     # barely human readable spv assembly listing
-    ${appGlslc} -S ${vsargs} -fentry-point=SkinningVS Skinning.hlsl -o Skinning.vert.gspv2.txt
-    ${appGlslc} -S ${psargs} -fentry-point=SkinningPS Skinning.hlsl -o Skinning.frag.gspv2.txt
+    ${appGlslc} -S ${vsargs} -fentry-point=SkinningVS -o android2/Skinning.vert.spv.txt Skinning.hlsl
+    ${appGlslc} -S ${psargs} -fentry-point=SkinningPS -o android2/Skinning.frag.spv.txt Skinning.hlsl
 fi
 
 # TODO: need to group files into library/module
diff --git a/hlslparser/src/HLSLGenerator.cpp b/hlslparser/src/HLSLGenerator.cpp
index f2b03b3b..0d38140c 100644
--- a/hlslparser/src/HLSLGenerator.cpp
+++ b/hlslparser/src/HLSLGenerator.cpp
@@ -33,6 +33,8 @@ static const char* GetTypeName(const HLSLType& type)
     case HLSLBaseType_Sampler2DShadow:  return "sampler2DShadow";
     case HLSLBaseType_Sampler2DMS:  return "sampler2DMS";
     case HLSLBaseType_Sampler2DArray:    return "sampler2DArray";
+            
+    // struct
     case HLSLBaseType_UserDefined:  return type.typeName;
     default: return "<unknown type>";
     }

From 6ea76d3cefb903a39d4583a1f7985eb0a32984d8 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 4 Mar 2023 23:26:13 -0800
Subject: [PATCH 434/901] kram - more script and readme cleanup

---
 hlslparser/README.md       | 137 ++++++++++++++++++++++---------------
 hlslparser/buildShaders.sh |  30 ++++++--
 2 files changed, 105 insertions(+), 62 deletions(-)

diff --git a/hlslparser/README.md b/hlslparser/README.md
index abceb86a..fd6c481f 100644
--- a/hlslparser/README.md
+++ b/hlslparser/README.md
@@ -1,30 +1,43 @@
 HLSLParser
 ==========
 
-This version of uknown worlds and thekla/hlslparser takes a HLSL-like syntax and then converts that into modern HLSL and MSL.  Special thanks to Max McGuire (@Unknown Worlds) and Ignacio Castano and Johnathan Blow (@Thelka) for releasing this as open-source.  I've left out GLSL compilation, and legacy DX9 HLSL codegen to simplify maintaining the codebase.
+This version of uknown worlds and thekla/hlslparser takes a HLSL-like syntax and then converts that into modern HLSL and MSL.  Special thanks to Max McGuire (@Unknown Worlds) and Ignacio Castano and Johnathan Blow (@Thekla) for releasing this as open-source.  I've left out GLSL compilation, and legacy DX9 HLSL codegen to simplify maintaining the codebase.
 
 Overview
 ---
-HLSLTokenizer - produces HLSLTree
-HLSLParser - produces HLSLTree
-HLSLTree - AST tree of the source HLSL
 
-MSLGenerator - convert HLSLTree to MSL
-HLSLGenerator - to a DX10-style HLSL
-GLSLGenerator - unsupported
-
-Engine - some string and log helpers
-
-ShaderMSL.h - macros/functions to translate to MSL
-ShaderHLSL.h - to HLSL
-
-buildShaders.sh - build hlslparser first, and update ShaderMSL/HLSL.h files, then run this script.   It will run hlslparer to generate outputs, and then run that through DXC and the Metal compiler.  On macOS, install VulkanSDK as a faster path to DXC.
-
-About Shader Models
+|Code          |                   |
+|--------------|-------------------|
+|HLSLTokenizer | produces HLSLTree |
+|HLSLParser    | produces HLSLTree |
+|HLSLTree      | AST tree of the source HLSL |
+|||
+|MSLGenerator  | convert HLSLTree to MSL |
+|HLSLGenerator | to a DX10-style HLSL |
+|GLSLGenerator | unsupported |
+|||
+|Engine        | some string and log helpers |
+|||
+|ShaderMSL.h   | macros/functions to translate to MSL |
+|ShaderHLSL.h  | to HLSL |
+|||
+|buildShaders  | build hlslparser first, and update ShaderMSL/HLSL.h files, then run this script.  Runs hlslparer to generate MSL/HLSL, then runs that through DXC/Metal compiler. | 
+    
+|Apps          |                   |
+|--------------|-------------------|
+|hlslparser | convert dx9 style HLSL to DX10 HLSL and MSL |
+|DXC | Microsoft's open-source compiler, gens HLSL 6.0-6.6 DXIL, and spv1.0-1.2, clang-based optimizer, installed with Vulkan SDK |
+|glslc | Google's wrapper to glslang, preprocessor, reflection, see below |
+|glslang | GLSL and HLSL compiler, but doesn't compile valid HLSL half code |
+|spirv-opt | spv optimizer |
+|spriv-cross | transpile spv to MSL, HLSL, and GLSL, but codegen has 100's of temp vars, no comments, can target specific MSL/HLSL models |
+|spirv-reflect | gens reflection data from spv file |
+
+Dealing with Half
 ---
-HLSL 6.2 includes full half and int support.   So that is the compilation target.  But note the table below before adopting half in shaders.  Nvidia/AMD tried to phase out half support on the desktop, but iOS re-popularized half usage.  Android has many dragons using half (see below)
+HLSL 6.2 includes full half and int support.   So that is the compilation target.  Note table below before adopting half in shaders.  Nvidia/AMD tried to phase out half support on DX10, but iOS re-popularized half usage.  Android has many dragons using half (see below)
 
-Platforms - iOS, Adreno,  Mali, Nvida, AMD
+Platforms - iOS/PowerVR, Adreno,  Mali, Nvida, AMD
 | Feature        | I | A | M | N | A |
 |----------------|---|---|---|---|---|
 | Half Interp    | y | n | y | n | y |
@@ -34,63 +47,75 @@ Platforms - iOS, Adreno,  Mali, Nvida, AMD
 
 https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_16bit_storage.html
 
-StorageBuffer16BitAccess
-UniformAndStorageBuffer16BitAccess
-StoragePushConstant16
-StorageInputOutput16
+* StorageBuffer16BitAccess
+* UniformAndStorageBuffer16BitAccess
+* StoragePushConstant16
+* StorageInputOutput16
 
 There is also the limitation of half interpolation creating banding, and likely why Adreno/Nvidia do not support StorageInputOutput16.  Mali recommends using half to minimize parameter buffer storage out of the vertex shader, but then declaring float for the same variables in the fragment shader.  This limits sharing input/output structs.
 
-Android Gotchas
+Terms
 ---
 
-Mali cannot write SSBO in the vertex shader, but can in the fragment shader.  Only Vulkan supports readonly SSBO so no ES support.
-Mali sparse index buffer limits 
-  https://community.arm.com/support-forums/f/graphics-gaming-and-vr-forum/53672/vulkan-what-should-i-do-about-this-warning-bestpractices-vkcmddrawindexed-sparse-index-buffer
-Mali 180MB parameter buffer limit - device lost after exceeded
-Mali missing fillAsLines - affects debug visuals
-Mali is missing firstInstance to use MDI and SBO
+* Shader Variants - it's good to define which variants of shaders to generate.  Can use static and dynamic branching to reduce variant count.  Can lead to requiring shader source if can't predefine variant count.
 
-Adreno
-Occlusion queries can cause a switch from TBDR to IMR
+* Specialization Constants - allow variants to be generated within a single shader.  Spriv is marked and compiled based on these settings.  Metal has equivalent function constants
 
-iOS 
-Has no SamplerMinmax 
- A9 - limited MultiDrawInidirect that can only draw 1 element per draw
-      cpu ICB
-A11 - gpu ICB
-A13 - Argument Buffers indirection
 
-Shader Variants - it's good to define which variants of shaders to generate.  Can use static and dynamic branching to reduce variant count.  Can lead to requiring shader source if can't predefine variant count.
+Mobile Gotchas
+---
 
-Specialization Constants - allow variants to be generated within a single shader.  Spriv is marked and compiled based on these settings.  Metal has equivalent function constants
 
-Unlike GLSL, HLSL and MSL don't have an extension mechansims.  These shader models update in revisions. This is good and bad, but in general it simplfies picking the target for shader codegen.   These 
+Mali
+* Cannot write SSBO in the vertex shader, but can in the fragment shader.  Vulkan only.
+* Sparse index buffer limits 
+  https://community.arm.com/support-forums/f/graphics-gaming-and-vr-forum/53672/vulkan-what-should-i-do-about-this-warning-bestpractices-vkcmddrawindexed-sparse-index-buffer
+* 180MB parameter buffer limit - device lost after exceeded
+* Missing fillAsLines - affects debug visuals
+* Missing firstInstance to use MDI and SBO
 
-HLSL
-missing specialization constants - Vulkan adds these in for Spirv, but it's not valid HLSL for DX12
+Adreno
+* Occlusion queries can cause a switch from TBDR to IMR
 
-SM 6.2 adds back half and short support
+PowerVR
+* Very little US adoption
+* Imagination now part of Chinese state tech conglomerate
 
-MSL
-SM 2.1
-SM 2.2
-SM 2.3
-SM 2.4
+iOS 
+* - No SamplerMinmax 
+*  A9 - limited MultiDrawInidirect that can only draw 1 element per draw
+*       cpu ICB
+* A11 - gpu ICB
+* A13 - Argument Buffers indirection
+
+GLSL 
+* - dead shader language, Vulkan will pursue HLSL
+* + has extension mechanism
+* - precision modifiers in ES for lowp (no support), mediump (might be fp16, fp24, fp32), highp (fp24 or fp32)
+ 
+HLSL
+* - missing specialization constants - Vulkan adds these in for Spirv, but it's not valid HLSL for DX12
+* - no tile shader support, limiting iOS/Android tile development, Vulkan has this
+* + SM 6.2 adds back half and short support
 
+MSL
+* metal2.2, iOS13/macOS10.15,
+* metal2.3, iOS14/macOS11, target, function pointers,
+* metal2.4, iOS15/macOS12,
+* metal3.0, iOS16/macOS13, unified shader model
 Vulkan
-SM spv1.1
-SM spv1.3
-SM spv1.5
+* SM spv1.1, v1.0
+* SM spv1.3, v1.1
+* SM spv1.5, v1.2, target,
 
 HLSL 
-SM 6.2 - added back int/half support
+* SM 6.2, target, added back int/half support
   
 TODO:
-compute shader support
-preprocessed variants
-fix shader input names
-handle reflection (spirv-reflect?)
+* compute shader support
+* preprocessed variants
+* fix shader input names
+* handle reflection (spirv-reflect?)
 
 ---------------------------------
 
diff --git a/hlslparser/buildShaders.sh b/hlslparser/buildShaders.sh
index 183658c3..1850ad10 100755
--- a/hlslparser/buildShaders.sh
+++ b/hlslparser/buildShaders.sh
@@ -2,7 +2,7 @@
 
 mkdir -p out
 
-mkdir -p out/macos
+mkdir -p out/mac
 mkdir -p out/win
 mkdir -p out/android
 
@@ -28,6 +28,8 @@ appDxc+="dxc"
 appGlslc+="glslc"
 appSprivReflect+="spirv-reflect"
 
+#-------------------------------
+
 # copy over the headers that translate to MSL/HLSL
 cp ../shaders/ShaderMSL.h .
 cp ../shaders/ShaderHLSL.h .
@@ -40,18 +42,34 @@ ${appHlslparser} -i ../shaders/Skinning.hlsl -o Skinning.metal
 echo gen HLSL
 ${appHlslparser} -i ../shaders/Skinning.hlsl -o Skinning.hlsl
 
-# see if HLSL compiles (requires macOS Vulkan install)
-# this will pull /usr/bin/local/dxc
-# looks like DXC wants a ps/vs/cs profile, so is expecting one shader per file
+#-------------------------------
+
+# Metal is C++14
+
+# see if HLSL compiles to MSL (requires macOS Vulkan install)
+
+# record sources into code for gpu capture (don't ship this), debug mode
+macOptions="-frecord-sources -g "
 
+# O2 + size opt
+# macOptions+="-Os"
+
+# TODO: metal3.0 on M1 macOS13/iOS16
+macOptions+="-std=macos-metal2.3 "
 
 # see if MSL compile
 echo compile MSL for macOS
-xcrun -sdk macosx metal Skinning.metal -o macos/Skinning.metallib
+xcrun -sdk macosx metal Skinning.metal ${macOptions} -o mac/Skinning.metallib
+
+# iosOptions="-frecord-sources -g "
+# iosOptions+="-std=ios-metal2.3 "
 
 #echo compile MSL for iOS
-#xcrun -sdk macosx metal Skinning.metal -o ios/Skinning.metallib
+#xcrun -sdk macosx metal Skinning.metal ${iosOptions} -o ios/Skinning.metallib
+
+#-------------------------------
 
+# looks like DXC wants a ps/vs/cs profile, so is expecting one shader per output
 
 args="-nologo "
 

From ec694d59fa50d7a5b8ac2b1a19f3570fe54d4c86 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 5 Mar 2023 10:51:11 -0800
Subject: [PATCH 435/901] kram - hlslparser update readme

---
 hlslparser/README.md       | 109 +++++++++++++++++++++++++++++--------
 hlslparser/buildShaders.sh |   7 ++-
 2 files changed, 90 insertions(+), 26 deletions(-)

diff --git a/hlslparser/README.md b/hlslparser/README.md
index fd6c481f..6f41d352 100644
--- a/hlslparser/README.md
+++ b/hlslparser/README.md
@@ -38,6 +38,7 @@ Dealing with Half
 HLSL 6.2 includes full half and int support.   So that is the compilation target.  Note table below before adopting half in shaders.  Nvidia/AMD tried to phase out half support on DX10, but iOS re-popularized half usage.  Android has many dragons using half (see below)
 
 Platforms - iOS/PowerVR, Adreno,  Mali, Nvida, AMD
+
 | Feature        | I | A | M | N | A |
 |----------------|---|---|---|---|---|
 | Half Interp    | y | n | y | n | y |
@@ -58,61 +59,123 @@ Terms
 ---
 
 * Shader Variants - it's good to define which variants of shaders to generate.  Can use static and dynamic branching to reduce variant count.  Can lead to requiring shader source if can't predefine variant count.
-
 * Specialization Constants - allow variants to be generated within a single shader.  Spriv is marked and compiled based on these settings.  Metal has equivalent function constants
+* Tile shaders - kernels/fragment shaders that run at the tile level.  Subpasses in Vulkan.  tilegroup memory to and tile data passed from stage to stage without writing back to targets.
 
 
-Mobile Gotchas
+Mobile HW
 ---
 
-
 Mali
+* TBDR
 * Cannot write SSBO in the vertex shader, but can in the fragment shader.  Vulkan only.
 * Sparse index buffer limits 
   https://community.arm.com/support-forums/f/graphics-gaming-and-vr-forum/53672/vulkan-what-should-i-do-about-this-warning-bestpractices-vkcmddrawindexed-sparse-index-buffer
 * 180MB parameter buffer limit - device lost after exceeded
 * Missing fillAsLines - affects debug visuals
 * Missing firstInstance to use MDI and SBO
+* ARM also makes the cpu reference designs
+* ARM bought from Falanx Microsystems
 
 Adreno
+* TBDR
 * Occlusion queries can cause a switch from TBDR to IMR
+* Half shader limits
+* Formerly ATI Radeon mobile parts - division sold to Qualcomm
 
 PowerVR
+* TBDR
 * Very little US adoption
-* Imagination now part of Chinese state tech conglomerate
+* Imagination absorbed into Chinese state tech conglomerate
+* Origin of Apple Silicon
+* Only had PVRTC support, but now has ASTC
 
 iOS 
-* - No SamplerMinmax 
-*  A9 - limited MultiDrawInidirect that can only draw 1 element per draw
-*       cpu ICB
-* A11 - gpu ICB
+* TBDR
+* introduced Metal on iPhone 5S (A7)
+* No SamplerMinmax 
+*  A7 - ETC2 support
+*  A8 - ASTC support
+*  A9 - MDI api limited, cpu ICB
+* A11 - gpu ICB, tile shaders (no HLSL), Raster Order Groups, MSAA controls
 * A13 - Argument Buffers indirection
+* A14/M1 -  
+* A15/M2 -
+
+macOS
+* IMR (Intel), TBDR(M1/A14), TBDR(A15/M2)
+* locked at GL4.1 - no compute, clipControl, BC6/7 support, SSBO
+* M1 has BC texture support, iPad/iPhone still do not
+* Intel only has BC support, but is being phased out
+* Can use iOS tile shading on M1/M2, may work on last gen Intel?
+
+* https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
+
+Shading Languages
+---
+
+CG
+* Where it all started.  HLSL is an offshoot of this.  MSL closely resembles HLSL.
+
+MSL
+* https://developer.apple.com/metal/Metal-Shading-Language-Specification.pdf
+
+HLSL
+* Added back int/half support
+* Vulkan extensions for specialization constants and subpasses
+* SSBO - StructuredBuffers, ByteAddressBuffer
+* DX9 and DX10 style syntax changes
+* HLSL being added to clang
+
+* https://github.com/Microsoft/DirectXShaderCompiler/blob/main/docs/SPIR-V.rst#subpass-inputs
+* https://clang.llvm.org/docs/HLSL/HLSLSupport.html#:~:text=HLSL%20uses%20templates%20to%20define,case%20and%20issues%20a%20diagnostic.
+
+These languages not supported directly by hlslparser, but can transpile via spirv-cross.
 
 GLSL 
-* - dead shader language, Vulkan will pursue HLSL
-* + has extension mechanism
-* - precision modifiers in ES for lowp (no support), mediump (might be fp16, fp24, fp32), highp (fp24 or fp32)
- 
+* dead shader language, Vulkan/Microsoft will pursue HLSL
+* bolted on extensions with Apple, AMD, Nvidia, Intel extending language
+* has extension mechanism
+* replaced with spirv
+
+GLSL/ES
+* even more limited 
+* precision modifiers in ES for lowp (no support), mediump (might be fp16, fp24, fp32), highp (fp24 or fp32)
+* replaced with spriv
+
+WGSL
+* WebGPU shading language orignally meant as text form of spriv
+* Now using Dart like syntax completely unlike CG origin of other languages
+* Avoids pointers/references
+* Can transpile spirv to WGSL via tint, WGSL still not in spirv-cross
+
+WHLSL
+* WebGPU dropped language that would have been an offshoot of HLSL syntax.
+
+---------------------------------
+
+Shading Language Versions
+
 HLSL
-* - missing specialization constants - Vulkan adds these in for Spirv, but it's not valid HLSL for DX12
-* - no tile shader support, limiting iOS/Android tile development, Vulkan has this
-* + SM 6.2 adds back half and short support
+* SM 6.2, target, added back int/half support
+* SM 6.6, latest,
 
 MSL
 * metal2.2, iOS13/macOS10.15,
 * metal2.3, iOS14/macOS11, target, function pointers,
 * metal2.4, iOS15/macOS12,
-* metal3.0, iOS16/macOS13, unified shader model
-Vulkan
-* SM spv1.1, v1.0
-* SM spv1.3, v1.1
-* SM spv1.5, v1.2, target,
+* metal3.0, iOS16/macOS13, unified shader model, latest,
+
+Spirv
+* 1.1, vulkan1.0
+* 1.3, vulkan1.1
+* 1.5, vulkan1.2, target,
+
+---------------------------------
 
-HLSL 
-* SM 6.2, target, added back int/half support
-  
 TODO:
 * compute shader support
+* handle HLSL vulkan extensions, convert these to MSL kernels too
 * preprocessed variants
 * fix shader input names
 * handle reflection (spirv-reflect?)
diff --git a/hlslparser/buildShaders.sh b/hlslparser/buildShaders.sh
index 1850ad10..1266c808 100755
--- a/hlslparser/buildShaders.sh
+++ b/hlslparser/buildShaders.sh
@@ -157,17 +157,18 @@ fi
 
 # here are flags to use w/DXC
 
-# dxc can output reflection directly
+# dxc can output reflection directly (only for DXIL)
 # -Fre <file>             Output reflection to the given file
   
+# add reflect data to aid in generating reflection data
+# -fspv-reflect
+
 # may not need this if doing dxil output, then -Fo might gen dxil asm listing
 # -Cc  color-coded assembly listing
   
 # -remove-unused-functions Remove unused functions and types
 # -remove-unused-globals  Remove unused static globals and functions
   
-# add reflect data
-# -fspv-reflect
 
 # Negate SV_Position.y before writing to stage output in VS/DS/GS to accommodate Vulkan's coordinate system
 # -fvk-invert-y

From c568aa4e050c34687ef552d3b9c80499779a814f Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 5 Mar 2023 11:32:25 -0800
Subject: [PATCH 436/901] kram - hlslparser readme update

---
 hlslparser/README.md | 84 +++++++++++++++++++++++++++++++++++---------
 1 file changed, 68 insertions(+), 16 deletions(-)

diff --git a/hlslparser/README.md b/hlslparser/README.md
index 6f41d352..6c7de52e 100644
--- a/hlslparser/README.md
+++ b/hlslparser/README.md
@@ -76,40 +76,70 @@ Mali
 * Missing firstInstance to use MDI and SBO
 * ARM also makes the cpu reference designs
 * ARM bought from Falanx Microsystems
+* ETC2/ASTC, created ASTC format and encoders
 
 Adreno
 * TBDR
 * Occlusion queries can cause a switch from TBDR to IMR
 * Half shader limits
 * Formerly ATI Radeon mobile parts - division sold to Qualcomm
+* 8K constant cache for UBO
+* SSBO bypass cache
+* lacks baseInstance (firstInstance) support
+* ETC2/ASTC
 
 PowerVR
 * TBDR
 * Very little US adoption
 * Imagination absorbed into Chinese state tech conglomerate
 * Origin of Apple Silicon
-* Only had PVRTC support, but now has ASTC
+* PVRTC/ETC2/ASTC
 
 iOS 
 * TBDR
-* introduced Metal on iPhone 5S (A7)
-* No SamplerMinmax 
-*  A7 - ETC2 support
+*  A7 - ETC2 support, 1gb device, Metal introduced and ES3
 *  A8 - ASTC support
-*  A9 - MDI api limited, cpu ICB
-* A11 - gpu ICB, tile shaders (no HLSL), Raster Order Groups, MSAA controls
-* A13 - Argument Buffers indirection
-* A14/M1 -  
-* A15/M2 -
+*  A9 - limited MDI, cpu ICB, baseVertex, baseInstance, 2gb devices
+* A10 - 
+* A11 - gpu ICB, tile shaders, Raster Order Groups, MSAA improvements,
+* A12 - 
+* A13 - Argument Buffer indirection for material indexing, sparse texturing
+* A14/M1 - 
+* A15/M2 - little new gpu hw over A14
+* A16 - rumored ray tracing?
+* No SamplerMinMax support
+
+* https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
 
 macOS
 * IMR (Intel), TBDR(M1/A14), TBDR(A15/M2)
 * locked at GL4.1 - no compute, clipControl, BC6/7 support, SSBO
 * M1 has BC texture support, iPad/iPhone still do not
-* Intel only has BC support, but is being phased out
+* Intel only has BC support, but architecture is being phased out
 * Can use iOS tile shading on M1/M2, may work on last gen Intel?
-
-* https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
+* M1 can only run Windows ARM in Parallels VM
+* Parallels Intel can't run DX12/Vulkan, only DX11
+* Bootcamp can't run eGPU, but macOS can
+
+Intel
+* Skylake - improved fp16 support, sparse texturing
+* Alderlake - removes AVX512, 8HT, 8Little = 24 threads (really 16 cores)
+
+AMD
+* GCN2 - Puma, PS4/Xbox, sparse texturing, 2 async compute + 1 raster/compute pipe
+* GCN3 - lossless DCC, high quality video scalar, video encoder/decoder
+* GCN4 - Polaris, PS4 Pro, Xbox One, checkerboarding, degen tri removal
+* RDNA - 
+* RDNA2 - PS5/Xbox SeriesX, ray tracing hw, FSR
+* RDNA3 - 
+
+*  https://gpuopen.com/wp-content/uploads/2017/03/GDC2017-Advanced-Shader-Programming-On-GCN.pdf
+
+Nvidia
+* 10x0 - no tensor core
+* 20x0 - tensor cores, RT accel for triangle and bvh intersection
+* 30x0 - 
+* 40x0 - 
 
 Shading Languages
 ---
@@ -130,7 +160,19 @@ HLSL
 * https://github.com/Microsoft/DirectXShaderCompiler/blob/main/docs/SPIR-V.rst#subpass-inputs
 * https://clang.llvm.org/docs/HLSL/HLSLSupport.html#:~:text=HLSL%20uses%20templates%20to%20define,case%20and%20issues%20a%20diagnostic.
 
-These languages not supported directly by hlslparser, but can transpile via spirv-cross.
+Shader Assembly Formats
+
+SPIRV
+* generated by DXC and glslang (glslc)
+* assembly level constructs that can be transpiled back to other shading languges 
+* really difficult to read
+
+DXIL
+* generate by DXC
+* used by DX12
+* lacks specialization constant support
+
+Shading languages not supported directly by hlslparser, but can transpile via spirv-cross or tint.
 
 GLSL 
 * dead shader language, Vulkan/Microsoft will pursue HLSL
@@ -139,17 +181,27 @@ GLSL
 * replaced with spirv
 
 GLSL/ES
-* even more limited 
+* even more limited GLSL
 * precision modifiers in ES for lowp (no support), mediump (might be fp16, fp24, fp32), highp (fp24 or fp32)
 * replaced with spriv
+* no defaults for variables
+* line directives needlessly changed from GLSL
+
+WebGL/ES
+* versions ES2 (WebGL1), ES3 (WebGL2), ES3.1 (WebGL2.1), 
+* Khronos support ends at 3.1, moved to Vulkan/spirv
+* even more limited form GLSL/ES
+* fixed count loops, can break out of loop with uniform
+* WebGL2.1/ES3.1 can do compute, but not on Apple platforms
 
-WGSL
+WebGPU/WGSL
 * WebGPU shading language orignally meant as text form of spriv
+* full compute support
 * Now using Dart like syntax completely unlike CG origin of other languages
 * Avoids pointers/references
 * Can transpile spirv to WGSL via tint, WGSL still not in spirv-cross
 
-WHLSL
+WebGPU/WHLSL
 * WebGPU dropped language that would have been an offshoot of HLSL syntax.
 
 ---------------------------------

From fbc61e9c26eeb01dc10a6513ff27997575c13c16 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 5 Mar 2023 12:29:18 -0800
Subject: [PATCH 437/901] kram - hlslparser update readme

---
 hlslparser/README.md | 104 ++++++++++++++++++++++++++++---------------
 1 file changed, 67 insertions(+), 37 deletions(-)

diff --git a/hlslparser/README.md b/hlslparser/README.md
index 6c7de52e..4f6b8031 100644
--- a/hlslparser/README.md
+++ b/hlslparser/README.md
@@ -68,79 +68,98 @@ Mobile HW
 
 Mali
 * TBDR
-* Cannot write SSBO in the vertex shader, but can in the fragment shader.  Vulkan only.
+* Vulkan gpu - Midgard, Bifrost, Valhall, Immortalis
+* Vulkan can read but not write SSBO in VS.
 * Sparse index buffer limits 
-  https://community.arm.com/support-forums/f/graphics-gaming-and-vr-forum/53672/vulkan-what-should-i-do-about-this-warning-bestpractices-vkcmddrawindexed-sparse-index-buffer
 * 180MB parameter buffer limit - device lost after exceeded
-* Missing fillAsLines - affects debug visuals
-* Missing firstInstance to use MDI and SBO
-* ARM also makes the cpu reference designs
-* ARM bought from Falanx Microsystems
-* ETC2/ASTC, created ASTC format and encoders
+* Missing VK_POLYGON_MODE_LINE (feature.fillModeNonSolid) - affects debug visuals
+* Missing firstInstance to use MDI and SBO (G52 adds this)
+* ARM licenses mobile cpu reference designs
+* ARM bought Mali gpu from Falanx Microsystems
+* ETC2/ASTC, created ASTC format and encoders, no BC
+* https://en.wikipedia.org/wiki/Mali_(processor)
+* https://community.arm.com/support-forums/f/graphics-gaming-and-vr-forum/53672/vulkan-what-should-i-do-about-this-warning-bestpractices-vkcmddrawindexed-sparse-index-buffer
 
 Adreno
 * TBDR
 * Occlusion queries can cause a switch from TBDR to IMR
 * Half shader limits
-* Formerly ATI Radeon mobile parts - division sold to Qualcomm
+* Qualcomm bought ATI Radeon mobile gpu
 * 8K constant cache for UBO
 * SSBO bypass cache
 * lacks baseInstance (firstInstance) support
-* ETC2/ASTC
+* ETC2/ASTC, no BC
 
 PowerVR
 * TBDR
 * Very little US adoption
 * Imagination absorbed into Chinese state tech conglomerate
 * Origin of Apple Silicon
-* PVRTC/ETC2/ASTC
+* PVRTC/ETC2/ASTC, no BC
 
 iOS 
 * TBDR
-*  A7 - ETC2 support, 1gb device, Metal introduced and ES3
-*  A8 - ASTC support
-*  A9 - limited MDI, cpu ICB, baseVertex, baseInstance, 2gb devices
-* A10 - 
-* A11 - gpu ICB, tile shaders, Raster Order Groups, MSAA improvements,
-* A12 - 
+* locked at GL/ES 3.0 - no compute, use Metal/MSL instead
+*  A7 - 2/0 cpu, ETC2, 1gb device, Metal introduced and ES3
+*  A8 - 2/0, ASTC
+*  A9 - 2/0 2gb, limited MDI, cpu ICB, baseVertex, baseInstance
+* A10 - 2/0, 
+* A11 - 2/2, gpu ICB, tile shaders, Raster Order Groups, MSAA improvements, more gpus, async compute/raster
+* A12 - 2/4,
 * A13 - Argument Buffer indirection for material indexing, sparse texturing
-* A14/M1 - 
-* A15/M2 - little new gpu hw over A14
-* A16 - rumored ray tracing?
+* A14/M1 - lossy FBO compression, mesh shaders,
+* A15/M2 - no new gpu hw
+* A16/M3? - rumored RT hw
 * No SamplerMinMax support
 
 * https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
 
 macOS
 * IMR (Intel), TBDR(M1/A14), TBDR(A15/M2)
-* locked at GL4.1 - no compute, clipControl, BC6/7 support, SSBO
+* locked at GL4.1 - no compute, clipControl, BC6/7, SSBO, dsa, error callbacks
 * M1 has BC texture support, iPad/iPhone still do not
 * Intel only has BC support, but architecture is being phased out
-* Can use iOS tile shading on M1/M2, may work on last gen Intel?
+* Can use iOS tile shading on M1/M2, may work on last gen Intel?, last Intel RDNA and not RDNA2
 * M1 can only run Windows ARM in Parallels VM
 * Parallels Intel can't run DX12/Vulkan, only DX11
 * Bootcamp can't run eGPU, but macOS can
+* M1 can run iOS applications natively (but not ARKit)
 
 Intel
 * Skylake - improved fp16 support, sparse texturing
-* Alderlake - removes AVX512, 8HT, 8Little = 24 threads (really 16 cores)
+* Alderlake - removes AVX512, 8 HT big, 8 Little = 24 threads (really 16 cores)
 
 AMD
-* GCN2 - Puma, PS4/Xbox, sparse texturing, 2 async compute + 1 raster/compute pipe
-* GCN3 - lossless DCC, high quality video scalar, video encoder/decoder
-* GCN4 - Polaris, PS4 Pro, Xbox One, checkerboarding, degen tri removal
-* RDNA - 
-* RDNA2 - PS5/Xbox SeriesX, ray tracing hw, FSR
-* RDNA3 - 
-
-*  https://gpuopen.com/wp-content/uploads/2017/03/GDC2017-Advanced-Shader-Programming-On-GCN.pdf
+* scalar execution instead of vector based, compute, unified ALUs for rasterization
+# create Mantle API which lead to Vulkan
+* GCN1  - wave64, 1 instr/4 cycles, 1 cu = 4 simd16 units
+* GCN2  - Puma, PS4(Liverpool)/Xbone(Durango), sparse texturing, 2 async compute + 1 raster/compute pipe
+* GCN3  - lossless DCC, high quality video scalar, video encoder/decoder
+* GCN4  - Polaris, PS4 Pro(Neo)/XboneX(Scorpio), checkerboarding, degen tri removal, fp16 added back,
+* GCN5  - Vega, fp16 2x perf of fp32, fp64 is 1/16 typically, mesh shaders, 
+*
+* RDNA  - RX5300, wave32 (or 64), 1 instr/cycle, 2cpu = 1 wgp, mesh shaders, wave32 = 1 simd32 unit, display compression, 
+* RDNA2 - PS5/XboxX/SteamDeck, RX6000, RT hw, FSR
+* RDNA3 - RX7000, better compute/RT hw, multichip module design,
+*
+* https://en.wikipedia.org/wiki/Graphics_Core_Next
+* https://gpuopen.com/wp-content/uploads/2017/03/GDC2017-Advanced-Shader-Programming-On-GCN.pdf
+* https://en.wikipedia.org/wiki/RDNA_(microarchitecture)
+* https://en.wikipedia.org/wiki/List_of_AMD_graphics_processing_units
 
 Nvidia
+* scalar execution instead of vector based, compute, unified ALUs for rasterization
+* tile-based raster/binning in Maxwell (Tegra X1), not on same level as TBDR
+#
+* Tegra X1+ - Mariko, Nintendo Switch, ended chip production in 2021
 * 10x0 - no tensor core
 * 20x0 - tensor cores, RT accel for triangle and bvh intersection
 * 30x0 - 
 * 40x0 - 
 
+* https://en.wikipedia.org/wiki/List_of_Nvidia_graphics_processing_units
+* https://www.pcmag.com/news/report-nvidia-to-end-production-of-nintendo-switch-tegra-chip
+
 Shading Languages
 ---
 
@@ -179,27 +198,38 @@ GLSL
 * bolted on extensions with Apple, AMD, Nvidia, Intel extending language
 * has extension mechanism
 * replaced with spirv
+* horrible glGetError() requires sync of cpu/gpu
+* locked at GL4.1 on macOS - no compute
 
 GLSL/ES
 * even more limited GLSL
+* 3.0 on iOS, now emulated by Metal,
+* Khronos support ends at 3.1, moved to Vulkan/spirv
 * precision modifiers in ES for lowp (no support), mediump (might be fp16, fp24, fp32), highp (fp24 or fp32)
 * replaced with spriv
 * no defaults for variables
 * line directives needlessly changed from GLSL
 
-WebGL/ES
-* versions ES2 (WebGL1), ES3 (WebGL2), ES3.1 (WebGL2.1), 
-* Khronos support ends at 3.1, moved to Vulkan/spirv
-* even more limited form GLSL/ES
+GLSL/ES (WebGL)
+* imposed mobile limits on desktop - browers mostly running gpu on desktop parts, despite Apple adding ES2/3 extensions
+* versions ES2 (WebGL1), ES3 (WebGL2), ES3.1 (WebGL2.1)
+* lowest common denominator
+* wrote Figma using WebGL1, shaders rendering 2d vectors with Photoshop-style blend modes/masks, images
+* WebGL1 limited ES2 even more - npot 2d texture limits, no 3d textures, srgb missing for compresed texture, extension not on Safari, no fp64, no dynamic loops in shaders
+* WebGL2/2.1 - limited ES3/3.1, added srgb, 
 * fixed count loops, can break out of loop with uniform
 * WebGL2.1/ES3.1 can do compute, but not on Apple platforms
 
 WebGPU/WGSL
 * WebGPU shading language orignally meant as text form of spriv
 * full compute support
-* Now using Dart like syntax completely unlike CG origin of other languages
-* Avoids pointers/references
-* Can transpile spirv to WGSL via tint, WGSL still not in spirv-cross
+* now using Dart like syntax completely unlike CG origin of other languages
+* avoids pointers/references
+* can transpile spirv to WGSL via tint, WGSL still not in spirv-cross
+* now emualted by Metal.  Angle emulates atop DX11 instead of GL for driver stability, might also have Vulkan backend.
+* runs all gpu code in separate process, so has to marshall all data over
+* can't map/unmap, have to upload changes to buffer/texture
+* munges up shader names and code internally into unreadable mess.
 
 WebGPU/WHLSL
 * WebGPU dropped language that would have been an offshoot of HLSL syntax.

From fe7bcd42b1cef2d00ced4dc8c5126f0b88846252 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 5 Mar 2023 12:31:29 -0800
Subject: [PATCH 438/901] kram - hlslparser update readme

---
 hlslparser/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hlslparser/README.md b/hlslparser/README.md
index 4f6b8031..6a8d1257 100644
--- a/hlslparser/README.md
+++ b/hlslparser/README.md
@@ -131,7 +131,7 @@ Intel
 
 AMD
 * scalar execution instead of vector based, compute, unified ALUs for rasterization
-# create Mantle API which lead to Vulkan
+* create Mantle API which lead to Vulkan
 * GCN1  - wave64, 1 instr/4 cycles, 1 cu = 4 simd16 units
 * GCN2  - Puma, PS4(Liverpool)/Xbone(Durango), sparse texturing, 2 async compute + 1 raster/compute pipe
 * GCN3  - lossless DCC, high quality video scalar, video encoder/decoder

From 8a2fa490a2e2dcb16bd85d5216f248faf9bf860a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 5 Mar 2023 12:49:54 -0800
Subject: [PATCH 439/901] kram - hlslparser update readme

---
 hlslparser/README.md | 61 +++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 55 insertions(+), 6 deletions(-)

diff --git a/hlslparser/README.md b/hlslparser/README.md
index 6a8d1257..b0cd2c71 100644
--- a/hlslparser/README.md
+++ b/hlslparser/README.md
@@ -160,6 +160,37 @@ Nvidia
 * https://en.wikipedia.org/wiki/List_of_Nvidia_graphics_processing_units
 * https://www.pcmag.com/news/report-nvidia-to-end-production-of-nintendo-switch-tegra-chip
 
+Graphics APIs
+---
+Metal
+* Apple macOS/iOS only API
+* just works. Easy to develop and extend
+
+Vulkan
+* derived from AMD Mantle
+* super complex low-level API making dev deal with layout, barriers, and lots of gpu garbage
+* complex to enable features and extensions in api and in shaders
+* Android 8/9 Vulkan drivers often locked to Vulkan 1.0 without no updates
+* Better on Windows/Linux where AMD/Intel/Nvidia update drivers
+* https://www.khronos.org/files/vulkan10-reference-guide.pdf
+* https://vkguide.dev/
+* https://github.com/SaschaWillems/Vulkan
+* https://vulkan.gpuinfo.org/
+
+DX9-DX12
+* Micrososft Win only API
+* DX12 went super low level for performance
+
+OpenGL
+* SGI developed IrisGL then OpenGL, 30 years ago
+* Shipped with GLSL
+
+OpenGL/ES
+* mobile variants
+
+WebGL/WebGPU
+* browser variants
+
 Shading Languages
 ---
 
@@ -185,11 +216,20 @@ SPIRV
 * generated by DXC and glslang (glslc)
 * assembly level constructs that can be transpiled back to other shading languges 
 * really difficult to read
+* clang optimizer
 
 DXIL
 * generate by DXC
 * used by DX12
 * lacks specialization constant support
+* clang optimizer
+
+AIR
+* Apple IR format
+* clang optimizer
+* don't know of human readable version
+* linked into metallib
+
 
 Shading languages not supported directly by hlslparser, but can transpile via spirv-cross or tint.
 
@@ -218,7 +258,14 @@ GLSL/ES (WebGL)
 * WebGL1 limited ES2 even more - npot 2d texture limits, no 3d textures, srgb missing for compresed texture, extension not on Safari, no fp64, no dynamic loops in shaders
 * WebGL2/2.1 - limited ES3/3.1, added srgb, 
 * fixed count loops, can break out of loop with uniform
-* WebGL2.1/ES3.1 can do compute, but not on Apple platforms
+* WebGL2.1(ES3.1) can run compute, but not on Apple platforms
+* now emualted by Metal.  Angle emulates atop DX11 instead of GL for driver stability, might also have Vulkan backend.
+* runs all gpu code in separate browser process, so has to marshall all data over
+* can't map/unmap, have to upload changes to buffer/texture
+* rewrites shader names and code internally into unreadable mess.  Complicates stepping through shader in gpu captures.
+* https://www.khronos.org/webgl
+* https://www.khronos.org/files/webgl/webgl-reference-card-1_0.pdf
+* https://www.khronos.org/files/webgl20-reference-guide.pdf
 
 WebGPU/WGSL
 * WebGPU shading language orignally meant as text form of spriv
@@ -226,13 +273,15 @@ WebGPU/WGSL
 * now using Dart like syntax completely unlike CG origin of other languages
 * avoids pointers/references
 * can transpile spirv to WGSL via tint, WGSL still not in spirv-cross
-* now emualted by Metal.  Angle emulates atop DX11 instead of GL for driver stability, might also have Vulkan backend.
-* runs all gpu code in separate process, so has to marshall all data over
-* can't map/unmap, have to upload changes to buffer/texture
-* munges up shader names and code internally into unreadable mess.
+* converts WGSL back to spriv.
+* supposedly Apple didn't want to require spirv.
+* similar in api syntax to Metal/Vulkan/DX12
+* https://www.w3.org/TR/webgpu/
+* https://www.w3.org/TR/WGSL/
 
 WebGPU/WHLSL
-* WebGPU dropped language that would have been an offshoot of HLSL syntax.
+* WebGPU dropped proposed language that would have been an offshoot of HLSL syntax.
+* https://webkit.org/blog/8482/web-high-level-shading-language/
 
 ---------------------------------
 

From f915f15db78e5a93ec21f87e1abbe1ebb08e2cf9 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 5 Mar 2023 14:59:29 -0800
Subject: [PATCH 440/901] kram - hlslparser fixes

remove 16B alignment forced on all uniforms.
Add type to Texture output, workaround spirv spec limit that can't handle Texture2D<half4> by not using half4 on textures in HLSL path.  Really this limit only applies to HLSL converted to Spriv.
Try to get comments to parse without failing the compile, but still not working.
---
 hlslparser/README.md             | 75 +++++++++++++++++++++-----------
 hlslparser/shaders/ShaderHLSL.h  | 29 ++++++------
 hlslparser/shaders/Skinning.hlsl |  6 +++
 hlslparser/src/HLSLGenerator.cpp | 22 +++++++---
 hlslparser/src/HLSLParser.cpp    | 50 +++++++++++++++++++--
 hlslparser/src/HLSLParser.h      |  1 +
 hlslparser/src/HLSLTokenizer.h   |  2 +-
 hlslparser/src/HLSLTree.h        |  2 +
 hlslparser/src/MSLGenerator.cpp  | 49 +++++++--------------
 hlslparser/src/MSLGenerator.h    |  2 -
 10 files changed, 154 insertions(+), 84 deletions(-)

diff --git a/hlslparser/README.md b/hlslparser/README.md
index b0cd2c71..abe71568 100644
--- a/hlslparser/README.md
+++ b/hlslparser/README.md
@@ -163,66 +163,74 @@ Nvidia
 Graphics APIs
 ---
 Metal
-* Apple macOS/iOS only API
-* just works. Easy to develop and extend
+* Apple macOS/iOS only API.  Now on version 3.0
+* Just works if hw supports. Easy to develop gpu algorithms.
+* Argument buffer simplified in Metal 3.
 
 Vulkan
-* derived from AMD Mantle
+* derived from AMD Mantle by Khronos to generalized API
 * super complex low-level API making dev deal with layout, barriers, and lots of gpu garbage
 * complex to enable features and extensions in api and in shaders
 * Android 8/9 Vulkan drivers often locked to Vulkan 1.0 without no updates
-* Better on Windows/Linux where AMD/Intel/Nvidia update drivers
+* better on Windows/Linux where AMD/Intel/Nvidia update drivers
+* cryptic validation errors improvement over glGetError()
 * https://www.khronos.org/files/vulkan10-reference-guide.pdf
 * https://vkguide.dev/
 * https://github.com/SaschaWillems/Vulkan
 * https://vulkan.gpuinfo.org/
 
 DX9-DX12
-* Micrososft Win only API
+* Microsoft Win only API
 * DX12 went super low level for performance
+* DX11 is a very nice api with shader stages and compute
+* DX11 runs atop Parallels VM (which runs atop Metal)
 
 OpenGL
-* SGI developed IrisGL then OpenGL, 30 years ago
+* SGI developed IrisGL then OpenGL, 30 years ago, 
+* stopped at GL4.6 on desktop, GL4.1 on macOS
 * Shipped with GLSL
+* ES mobile variants, better with extensions
+* WebGL lowest common denominator browser variants, extensions often not implemented
 
-OpenGL/ES
-* mobile variants
-
-WebGL/WebGPU
-* browser variants
+WebGPU
+* Metal/DX12/Vulkan lowest common denominator API for browser
+* extensions to handle platform specifics
+* using WGSL
 
 Shading Languages
 ---
 
 CG
 * Where it all started.  HLSL is an offshoot of this.  MSL closely resembles HLSL.
+* Nvida has abandoned this
+* Unity was using but moved to HLSL
+* https://developer.nvidia.com/cg-toolkit
 
 MSL
+* C++14, has ctors, cg/hlsl like
+* buffers, textures, tile shaders, atomic int, 
 * https://developer.apple.com/metal/Metal-Shading-Language-Specification.pdf
 
 HLSL
-* Added back int/half support
+* C++ like, missing ctors, cg derived for original Xbox
+* Added back int/half support in SM 6.2
 * Vulkan extensions for specialization constants and subpasses
 * SSBO - StructuredBuffers, ByteAddressBuffer
-* DX9 and DX10 style syntax changes
-* HLSL being added to clang
-
+* DX9 and DX10 style syntax differs
+* HLSL added to clang
 * https://github.com/Microsoft/DirectXShaderCompiler/blob/main/docs/SPIR-V.rst#subpass-inputs
 * https://clang.llvm.org/docs/HLSL/HLSLSupport.html#:~:text=HLSL%20uses%20templates%20to%20define,case%20and%20issues%20a%20diagnostic.
 
 Shader Assembly Formats
 
 SPIRV
-* generated by DXC and glslang (glslc)
+* generated by DXC and glslang (glslc) for Vulkan to use
 * assembly level constructs that can be transpiled back to other shading languges 
 * really difficult to read
 * clang optimizer
-
-DXIL
-* generate by DXC
-* used by DX12
-* lacks specialization constant support
-* clang optimizer
+* linked into module
+* cannot represent Texture2D<half4>, so can't tranpsile to MSL texture2d<half>
+* https://github.com/microsoft/DirectXShaderCompiler/issues/2711
 
 AIR
 * Apple IR format
@@ -230,11 +238,24 @@ AIR
 * don't know of human readable version
 * linked into metallib
 
+DXIL
+* generate by DXC
+* used by DX12
+* lacks specialization constant support
+* clang optimizer
+
 
 Shading languages not supported directly by hlslparser, but can transpile via spirv-cross or tint.
 
+FX 
+* wrapper around shaders, passes, state
+* Microsoft abandoned this
+* hlslparser has code to read pass/technique/etc.
+* https://github.com/microsoft/FX11
+* https://learn.microsoft.com/en-us/windows/win32/direct3d11/d3d11-effect-format
+
 GLSL 
-* dead shader language, Vulkan/Microsoft will pursue HLSL
+* dead shader language at GL 4.6, Vulkan/Microsoft will pursue HLSL
 * bolted on extensions with Apple, AMD, Nvidia, Intel extending language
 * has extension mechanism
 * replaced with spirv
@@ -247,7 +268,8 @@ GLSL/ES
 * Khronos support ends at 3.1, moved to Vulkan/spirv
 * precision modifiers in ES for lowp (no support), mediump (might be fp16, fp24, fp32), highp (fp24 or fp32)
 * replaced with spriv
-* no defaults for variables
+* defaults needelessly removed from uniforms
+* dot or .0 required on all floating point numbers or shader fails to compile, int vs. float
 * line directives needlessly changed from GLSL
 
 GLSL/ES (WebGL)
@@ -264,10 +286,11 @@ GLSL/ES (WebGL)
 * can't map/unmap, have to upload changes to buffer/texture
 * rewrites shader names and code internally into unreadable mess.  Complicates stepping through shader in gpu captures.
 * https://www.khronos.org/webgl
+* https://webglreport.com/
 * https://www.khronos.org/files/webgl/webgl-reference-card-1_0.pdf
 * https://www.khronos.org/files/webgl20-reference-guide.pdf
 
-WebGPU/WGSL
+WGSL (WebGPU)
 * WebGPU shading language orignally meant as text form of spriv
 * full compute support
 * now using Dart like syntax completely unlike CG origin of other languages
@@ -279,7 +302,7 @@ WebGPU/WGSL
 * https://www.w3.org/TR/webgpu/
 * https://www.w3.org/TR/WGSL/
 
-WebGPU/WHLSL
+WHLSL (WebGPU)
 * WebGPU dropped proposed language that would have been an offshoot of HLSL syntax.
 * https://webkit.org/blog/8482/web-high-level-shading-language/
 
diff --git a/hlslparser/shaders/ShaderHLSL.h b/hlslparser/shaders/ShaderHLSL.h
index f44f9d71..faab2c4b 100644
--- a/hlslparser/shaders/ShaderHLSL.h
+++ b/hlslparser/shaders/ShaderHLSL.h
@@ -6,43 +6,44 @@
 
 #define USE_HALF 1
 
+// TODO: this only supports half on Texture2D
+
 // Don't know why HLSL doesn't support these
 #define min3(x,y,z) min(x, min(y, z))
 #define max3(x,y,z) max(x, max(y, z))
 #define length_squared(x) ((x)*(x))
 
 struct Texture2DSampler {
-    Texture2D t;
+    Texture2D<float4> t;
     SamplerState s;
 };
 
-// TODO: this only supports half on Texture2D
 #if USE_HALF
 // unique type, even though same data
 struct Texture2DHalfSampler {
-    Texture2D t;
+    Texture2D<float4> t; // TOOD: should be Texture2D<half4> but spirv limit
     SamplerState s;
 };
 
 #endif
 
 struct Texture3DSampler {
-    Texture3D t;
+    Texture3D<float4> t;
     SamplerState s;
 };
 
 struct TextureCubeSampler {
-    TextureCube t;
+    TextureCube<float4> t;
     SamplerState s;
 };
 
 struct Texture2DShadowSampler {
-    Texture2D t;
+    Texture2D<float4> t;
     SamplerComparisonState s;
 };
 
 struct Texture2DArraySampler {
-    Texture2DArray t;
+    Texture2DArray<float4> t;
     SamplerState s;
 };
 
@@ -52,7 +53,7 @@ struct Texture2DArraySampler {
 // Stupid HLSL lacks ctors.  Ugh!
 // Can't simplify to "return { t, a };" either.
 
-Texture2DSampler Texture2DSamplerCtor(Texture2D t, SamplerState s)
+Texture2DSampler Texture2DSamplerCtor(Texture2D<float4> t, SamplerState s)
 {
     Texture2DSampler a = { t, s };
     return a;
@@ -60,32 +61,34 @@ Texture2DSampler Texture2DSamplerCtor(Texture2D t, SamplerState s)
 
 #if USE_HALF
 
-Texture2DHalfSampler Texture2DHalfSamplerCtor(Texture2D t, SamplerState s)
+// This should take Texture2D<half4> but see
+// https://github.com/microsoft/DirectXShaderCompiler/issues/2711
+Texture2DHalfSampler Texture2DHalfSamplerCtor(Texture2D<float4> t, SamplerState s)
 {
     Texture2DHalfSampler a = { t, s };
     return a;
 }
 #endif
 
-Texture3DSampler Texture3DSamplerCtor(Texture3D t, SamplerState s)
+Texture3DSampler Texture3DSamplerCtor(Texture3D<float4> t, SamplerState s)
 {
     Texture3DSampler a = { t, s };
     return a;
 }
 
-TextureCubeSampler TextureCubeSamplerCtor(TextureCube t, SamplerState s)
+TextureCubeSampler TextureCubeSamplerCtor(TextureCube<float4> t, SamplerState s)
 {
     TextureCubeSampler a = { t, s };
     return a;
 }
 
-Texture2DShadowSampler Texture2DShadowSamplerCtor(Texture2D t, SamplerComparisonState s)
+Texture2DShadowSampler Texture2DShadowSamplerCtor(Texture2D<float4> t, SamplerComparisonState s)
 {
     Texture2DShadowSampler a = { t, s };
     return a;
 }
 
-Texture2DArraySampler Texture2DArraySamplerCtor(Texture2DArray t, SamplerState s)
+Texture2DArraySampler Texture2DArraySamplerCtor(Texture2DArray<float4> t, SamplerState s)
 {
     Texture2DArraySampler a = { t, s };
     return a;
diff --git a/hlslparser/shaders/Skinning.hlsl b/hlslparser/shaders/Skinning.hlsl
index 1965ccde..024a23cc 100644
--- a/hlslparser/shaders/Skinning.hlsl
+++ b/hlslparser/shaders/Skinning.hlsl
@@ -48,6 +48,12 @@ sampler2D<half> tex;
 // texture2d<half> tex;
 // sampler pointSampler;
 
+// ugh spriv unable to represent Texture2D<half>, spv generation fails
+// https://github.com/microsoft/DirectXShaderCompiler/issues/2711
+// fatal error: generated SPIR-V is invalid: Expected Sampled Type to be a
+// 32-bit int or float scalar type for Vulkan environment
+// type_2d_image = OpTypeImage %half 2D 2 0 0 1 Unknown
+
 // TODO: using column matrices for MSL/PSSL/GLSL constency
 // so switch from premul to postmul in shader.  And used float3x4
 // but need to add support to parsers/generators.
diff --git a/hlslparser/src/HLSLGenerator.cpp b/hlslparser/src/HLSLGenerator.cpp
index 0d38140c..15dacb9a 100644
--- a/hlslparser/src/HLSLGenerator.cpp
+++ b/hlslparser/src/HLSLGenerator.cpp
@@ -775,6 +775,18 @@ void HLSLGenerator::OutputDeclaration(HLSLDeclaration* declaration)
         const char* samplerType = "SamplerState";
         // @@ Handle generic sampler type.
 
+        // TODO: have a way to disable use of half (like on MSLGenerator)
+        bool isHalf = declaration->type.samplerType == HLSLBaseType_Half;
+        
+        // Can't use half4 textures with spriv
+        // Can tell Vulkan was written by and for desktop IHVs.
+        // https://github.com/microsoft/DirectXShaderCompiler/issues/2711
+        bool isSpirvTarget = true;
+        if (isSpirvTarget)
+            isHalf = false;
+        
+        const char* formatType = isHalf ? "half4" : "float4";
+        
         if (declaration->type.baseType == HLSLBaseType_Sampler2D)
         {
             textureType = "Texture2D";
@@ -794,7 +806,7 @@ void HLSLGenerator::OutputDeclaration(HLSLDeclaration* declaration)
         }
         else if (declaration->type.baseType == HLSLBaseType_Sampler2DMS)
         {
-            textureType = "Texture2DMS<float4>";  // @@ Is template argument required?
+            textureType = "Texture2DMS";
             samplerType = NULL;
         }
 
@@ -802,22 +814,22 @@ void HLSLGenerator::OutputDeclaration(HLSLDeclaration* declaration)
         {
             if (reg != -1)
             {
-                m_writer.Write("%s %s_texture : register(t%d); %s %s_sampler : register(s%d)", textureType, declaration->name, reg, samplerType, declaration->name, reg);
+                m_writer.Write("%s<%s> %s_texture : register(t%d); %s %s_sampler : register(s%d)", textureType, formatType, declaration->name, reg, samplerType, declaration->name, reg);
             }
             else
             {
-                m_writer.Write("%s %s_texture; %s %s_sampler", textureType, declaration->name, samplerType, declaration->name);
+                m_writer.Write("%s<%s> %s_texture; %s %s_sampler", textureType, formatType, declaration->name, samplerType, declaration->name);
             }
         }
         else
         {
             if (reg != -1)
             {
-                m_writer.Write("%s %s : register(t%d)", textureType, declaration->name, reg);
+                m_writer.Write("%s<%s> %s : register(t%d)", textureType, formatType, declaration->name, reg);
             }
             else
             {
-                m_writer.Write("%s %s", textureType, declaration->name);
+                m_writer.Write("%s<%s> %s", textureType, formatType, declaration->name);
             }
         }
         return;
diff --git a/hlslparser/src/HLSLParser.cpp b/hlslparser/src/HLSLParser.cpp
index 3aa138d9..195a1647 100644
--- a/hlslparser/src/HLSLParser.cpp
+++ b/hlslparser/src/HLSLParser.cpp
@@ -40,6 +40,7 @@ enum CoreType
     CoreType_Struct,
     CoreType_Void,
     CoreType_Expression,
+    CoreType_Comment,
     
     CoreType_Count // must be last
 };
@@ -174,6 +175,28 @@ HLSLBaseType NumericToBaseType(NumericType numericType)
     return baseType;
 }
 
+HLSLBaseType HalfToFloatBaseType(HLSLBaseType type)
+{
+    switch(type)
+    {
+        case HLSLBaseType_Half: return HLSLBaseType_Float;
+        case HLSLBaseType_Half2: return HLSLBaseType_Float2;
+        case HLSLBaseType_Half3: return HLSLBaseType_Float3;
+        case HLSLBaseType_Half4: return HLSLBaseType_Float4;
+        case HLSLBaseType_Half2x2: return HLSLBaseType_Float2x2;
+        case HLSLBaseType_Half3x3: return HLSLBaseType_Float3x3;
+        // case HLSLBaseType_Half4x2: return HLSLBaseType_Float4x2;
+        // case HLSLBaseType_Half4x3: return HLSLBaseType_Float4x3;
+        case HLSLBaseType_Half4x4: return HLSLBaseType_Float4x4;
+            
+        default:
+           // do nothing;
+            break;
+    }
+    
+    return type;
+}
+
 const char* GetNumericTypeName(HLSLBaseType type)
 {
     if (!IsNumericType(type))
@@ -866,7 +889,8 @@ const BaseTypeDescription baseTypeDescriptions[HLSLBaseType_Count] =
         { "sampler2DArray",     CoreType_Sampler, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Sampler2DArray
         
         { "struct",             CoreType_Struct, DimensionType_None, NumericType_NaN,         1, 0, 0, -1 },      // HLSLBaseType_UserDefined
-        { "expression",         CoreType_Expression, DimensionType_None, NumericType_NaN,     1, 0, 0, -1 }       // HLSLBaseType_Expression
+        { "expression",         CoreType_Expression, DimensionType_None, NumericType_NaN,     1, 0, 0, -1 },       // HLSLBaseType_Expression
+        { "comment",           CoreType_Comment, DimensionType_None, NumericType_NaN,         1, 0, 0, -1 },       // HLSLBaseType_Comment
     };
 
 // IC: I'm not sure this table is right, but any errors should be caught by the backend compiler.
@@ -1422,7 +1446,7 @@ bool HLSLParser::ExpectIdentifier(const char*& identifier)
 {
     if (!AcceptIdentifier(identifier))
     {
-        char near[HLSLTokenizer::s_maxIdentifier];
+        char near[HLSLTokenizer::s_maxIdentifier] = {};
         m_tokenizer.GetTokenName(near);
         m_tokenizer.Error("Syntax error: expected identifier near '%s'", near);
         identifier = "";
@@ -1479,7 +1503,16 @@ bool HLSLParser::ParseTopLevel(HLSLStatement*& statement)
 
     bool doesNotExpectSemicolon = false;
 
-    if (Accept(HLSLToken_Struct))
+    // Alec add comment
+    if (Accept(HLSLToken_Comment))
+    {
+        // TODO: add comment node to HLSLTree
+        // HLSLComment* comment = m_tree->AddNode<HLSLComment>(fileName, line);
+        // comment.text = ...;
+        
+        return true;
+    }
+    else if (Accept(HLSLToken_Struct))
     {
         // Struct declaration.
 
@@ -3497,6 +3530,12 @@ bool HLSLParser::AcceptType(bool allowVoid, HLSLType& type/*, bool acceptFlags*/
 
     int token = m_tokenizer.GetToken();
 
+    if (token == HLSLToken_Comment)
+    {
+        type.baseType = HLSLBaseType_Comment;
+        return true;
+    }
+    
     // Check built-in types.
     type.baseType = HLSLBaseType_Void;
     switch (token)
@@ -3631,8 +3670,11 @@ bool HLSLParser::AcceptType(bool allowVoid, HLSLType& type/*, bool acceptFlags*/
             {
                 // Hack.
                 // Sampler2D<half> or Sampler2D<float>
-                // but this doesn't line up with DX10
+                
+                // TODO: this doesn't line up with DX10
                 // which is Texture2D<half4> or Texture2D<float4>
+                // Samplers are just state blocks, but texture holds the type.
+                // Also have more texture slots than samplers, so samplers are reused.
                 
                 int token = m_tokenizer.GetToken();
                 if (token == HLSLToken_Float)
diff --git a/hlslparser/src/HLSLParser.h b/hlslparser/src/HLSLParser.h
index d7aa4d61..fa5794ef 100644
--- a/hlslparser/src/HLSLParser.h
+++ b/hlslparser/src/HLSLParser.h
@@ -171,6 +171,7 @@ extern bool IsScalarType(const HLSLType& type);
 extern bool IsTextureType(const HLSLType& type);
 
 extern HLSLBaseType PromoteType(HLSLBaseType toType, HLSLBaseType type);
+HLSLBaseType HalfToFloatBaseType(HLSLBaseType type);
 
 const char* GetNumericTypeName(HLSLBaseType type);
 
diff --git a/hlslparser/src/HLSLTokenizer.h b/hlslparser/src/HLSLTokenizer.h
index 6c12420b..f231e6b1 100644
--- a/hlslparser/src/HLSLTokenizer.h
+++ b/hlslparser/src/HLSLTokenizer.h
@@ -100,7 +100,7 @@ enum HLSLToken
     HLSLToken_IntLiteral,
     
     HLSLToken_Identifier,
-    HLSLToken_Comment,          //
+    HLSLToken_Comment,          // Alec added this
     
     HLSLToken_EndOfStream,
 };
diff --git a/hlslparser/src/HLSLTree.h b/hlslparser/src/HLSLTree.h
index fa78bd47..466f4115 100644
--- a/hlslparser/src/HLSLTree.h
+++ b/hlslparser/src/HLSLTree.h
@@ -136,6 +136,8 @@ enum HLSLBaseType
     HLSLBaseType_UserDefined,       // struct
     HLSLBaseType_Expression,        // type argument for defined() sizeof() and typeof().
     HLSLBaseType_Auto,
+    HLSLBaseType_Comment,           // single line comments optionally transferred to outptu
+    
     HLSLBaseType_Count,
     
     // counts
diff --git a/hlslparser/src/MSLGenerator.cpp b/hlslparser/src/MSLGenerator.cpp
index bf6186e1..139dfb8e 100644
--- a/hlslparser/src/MSLGenerator.cpp
+++ b/hlslparser/src/MSLGenerator.cpp
@@ -934,7 +934,7 @@ namespace M4
             if (!field->hidden)
             {
                 m_writer.BeginLine(indent + 1, field->fileName, field->line);
-                OutputDeclaration(field->type, field->name, field->assignment, false, false, /*alignment=*/16);
+                OutputDeclaration(field->type, field->name, field->assignment, false, false, 0); // /*alignment=*/16);
                 m_writer.EndLine(";");
             }
             field = (HLSLDeclaration*)field->nextStatement;
@@ -1418,22 +1418,24 @@ namespace M4
         else if (expression->nodeType == HLSLNodeType_ArrayAccess)
         {
             HLSLArrayAccess* arrayAccess = static_cast<HLSLArrayAccess*>(expression);
-            if (arrayAccess->array->expressionType.array || !IsMatrixType(arrayAccess->array->expressionType.baseType))
+            
+            // Just use the matrix notation, using column_order instead of row_order
+            if (arrayAccess->array->expressionType.array) // || !IsMatrixType(arrayAccess->array->expressionType.baseType))
             {
                 OutputExpression(arrayAccess->array, expression);
                 m_writer.Write("[");
                 OutputExpression(arrayAccess->index, NULL);
                 m_writer.Write("]");
             }
-            else
-            {
-                // @@ This doesn't work for l-values!
-                m_writer.Write("column(");
-                OutputExpression(arrayAccess->array, NULL);
-                m_writer.Write(", ");
-                OutputExpression(arrayAccess->index, NULL);
-                m_writer.Write(")");
-            }
+//            else
+//            {
+//                // @@ This doesn't work for l-values!
+//                m_writer.Write("column(");
+//                OutputExpression(arrayAccess->array, NULL);
+//                m_writer.Write(", ");
+//                OutputExpression(arrayAccess->index, NULL);
+//                m_writer.Write(")");
+//            }
         }
         else if (expression->nodeType == HLSLNodeType_FunctionCall)
         {
@@ -1544,6 +1546,7 @@ namespace M4
         }
         else if (alignment != 0 && !isTypeCast)
         {
+            // TODO: are these alignas needed?
             m_writer.Write("alignas(%d) ", alignment);
         }
 
@@ -2042,27 +2045,7 @@ namespace M4
         return NULL;
     }
 
-    HLSLBaseType MSLGenerator::HalfToFloatBaseType(HLSLBaseType type)
-    {
-        switch(type)
-        {
-            case HLSLBaseType_Half: return HLSLBaseType_Float;
-            case HLSLBaseType_Half2: return HLSLBaseType_Float2;
-            case HLSLBaseType_Half3: return HLSLBaseType_Float3;
-            case HLSLBaseType_Half4: return HLSLBaseType_Float4;
-            case HLSLBaseType_Half2x2: return HLSLBaseType_Float2x2;
-            case HLSLBaseType_Half3x3: return HLSLBaseType_Float3x3;
-            // case HLSLBaseType_Half4x2: return HLSLBaseType_Float4x2;
-            // case HLSLBaseType_Half4x3: return HLSLBaseType_Float4x3;
-            case HLSLBaseType_Half4x4: return HLSLBaseType_Float4x4;
-                
-            default:
-               // do nothing;
-                break;
-        }
-        
-        return type;
-    }
+    
         
     const char* MSLGenerator::GetTypeName(const HLSLType& type, bool exactType)
     {
@@ -2070,7 +2053,7 @@ namespace M4
 
         bool half_to_float = promote && m_options.treatHalfAsFloat;// && !exactType;
         
-        // TODO: move half/float to texture
+        // TODO: move carting around half/float to texture
         bool half_samplers = promote && type.samplerType == HLSLBaseType_Half && !m_options.treatHalfAsFloat;
 
         auto baseType = type.baseType;
diff --git a/hlslparser/src/MSLGenerator.h b/hlslparser/src/MSLGenerator.h
index 53cc900a..a79a56f5 100644
--- a/hlslparser/src/MSLGenerator.h
+++ b/hlslparser/src/MSLGenerator.h
@@ -112,8 +112,6 @@ class MSLGenerator
     
     void Error(const char* format, ...);
 
-    HLSLBaseType HalfToFloatBaseType(HLSLBaseType type);
-    
 private:
 
     CodeWriter      m_writer;

From 3d46a8a02483aa5bb34bc51305ade2b292a6c930 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 5 Mar 2023 20:16:18 -0800
Subject: [PATCH 441/901] kram - hlslparser crude comment support

Transpiled spriv always strips comments from the source files.  Want to preserve // comments in the generated HLSL/MSL.  Will add an option to strip these, but for development and gpu capture stepping these are invaluable.

Set skipSingleLineComments = false in the HLSLTokenizer.cpp file to strip them.
---
 hlslparser/README.md             |   4 +-
 hlslparser/shaders/ShaderHLSL.h  |   4 +-
 hlslparser/shaders/Skinning.hlsl |  41 ++----------
 hlslparser/src/HLSLGenerator.cpp |  32 +++++++--
 hlslparser/src/HLSLParser.cpp    | 110 ++++++++++++++++++-------------
 hlslparser/src/HLSLTokenizer.cpp |  26 +++++++-
 hlslparser/src/HLSLTokenizer.h   |  19 ++++--
 hlslparser/src/HLSLTree.cpp      |  42 +++++++++---
 hlslparser/src/HLSLTree.h        |   4 +-
 hlslparser/src/MSLGenerator.cpp  |  34 ++++++----
 10 files changed, 193 insertions(+), 123 deletions(-)

diff --git a/hlslparser/README.md b/hlslparser/README.md
index abe71568..4d4159a6 100644
--- a/hlslparser/README.md
+++ b/hlslparser/README.md
@@ -312,7 +312,9 @@ Shading Language Versions
 
 HLSL
 * SM 6.2, target, added back int/half support
-* SM 6.6, latest,
+* SM 6.6,
+* SM 6.7, SampleCmpLevel, RWTexture2DMS, RWTexture2DMSArray, signed texture offsets
+# https://microsoft.github.io/DirectX-Specs/d3d/HLSL_SM_6_7_Advanced_Texture_Ops.html
 
 MSL
 * metal2.2, iOS13/macOS10.15,
diff --git a/hlslparser/shaders/ShaderHLSL.h b/hlslparser/shaders/ShaderHLSL.h
index faab2c4b..778d9418 100644
--- a/hlslparser/shaders/ShaderHLSL.h
+++ b/hlslparser/shaders/ShaderHLSL.h
@@ -1,8 +1,10 @@
 #ifndef ShaderHLSL_h
 #define ShaderHLSL_h
 
+#ifdef __spriv__
 // glslc doesn't support #pragma once
-//#pragma once
+#pragma once
+#endif
 
 #define USE_HALF 1
 
diff --git a/hlslparser/shaders/Skinning.hlsl b/hlslparser/shaders/Skinning.hlsl
index 024a23cc..befb5dc2 100644
--- a/hlslparser/shaders/Skinning.hlsl
+++ b/hlslparser/shaders/Skinning.hlsl
@@ -7,17 +7,6 @@ struct InputVS
     uint4  blendIndices : BLENDINDICES;
 };
 
-// On half:
-// HLSL only added back half support in 6.2
-// GLSL has a float16_t type that is barely supported on mobile.
-// Half inputOuput not support on Nvidia/Adreno
-// Also interpolating half leads to banding
-// On TBDR, half inputOutput only way to save parameter buffer
-// space.  Can output half from VS, but define as float
-// input to PS to avoid banding.
-// Note Adreno doesn't suport half stored in blocks either.
-// What a mess!
-
 struct OutputVS
 {
     float4  position : POSITION;
@@ -25,7 +14,7 @@ struct OutputVS
     float2  uv : TEXCOORD0;
 };
 
-cbuffer Uniforms 
+cbuffer Uniforms
 {
     // should these be float3x4?
     float4x4 skinTfms[256];
@@ -33,31 +22,9 @@ cbuffer Uniforms
     float4x4 worldToClipTfm;
 };
 
-// old DX9 style
-// sampler2D tex;
-
-// mod from HLSLParser for half
-// can specify half, float, or none (float)
+// defines combined tex_texture/tex_sampler
 sampler2D<half> tex;
 
-// new style in DX10 to move to non-paired sampler/texture
-// Texture2D<half4> tex;
-// SamplerState pointSampler;
-
-// and in MSL
-// texture2d<half> tex;
-// sampler pointSampler;
-
-// ugh spriv unable to represent Texture2D<half>, spv generation fails
-// https://github.com/microsoft/DirectXShaderCompiler/issues/2711
-// fatal error: generated SPIR-V is invalid: Expected Sampled Type to be a
-// 32-bit int or float scalar type for Vulkan environment
-// type_2d_image = OpTypeImage %half 2D 2 0 0 1 Unknown
-
-// TODO: using column matrices for MSL/PSSL/GLSL constency
-// so switch from premul to postmul in shader.  And used float3x4
-// but need to add support to parsers/generators.
-
 float4x4 DoSkinTfm(float4x4 skinTfms[256], float4 blendWeights, uint4 blendIndices)
 {
     float4x4 skinTfm = blendWeights[0] * skinTfms[blendIndices[0]];
@@ -92,16 +59,16 @@ OutputVS SkinningVS(InputVS input)
     // DXC
     output.diffuse = dot(lightDir, normal);
 
-   output.uv = input.uv;
+    output.uv = input.uv;
 
     return output;
 }
 
 // Want to pass OutputVS as input, but DXC can't handle the redefinition
 // in the same file.  So have to keep OutputVS and InputPS in sync.
+// this can include position on MSL, but not on HLSL
 struct InputPS
 {
-    // this can include position on MSL, but not on HLSL
     half    diffuse : COLOR;
     float2  uv : TEXCOORD0;
 };
diff --git a/hlslparser/src/HLSLGenerator.cpp b/hlslparser/src/HLSLGenerator.cpp
index 15dacb9a..6fcbb9c7 100644
--- a/hlslparser/src/HLSLGenerator.cpp
+++ b/hlslparser/src/HLSLGenerator.cpp
@@ -176,6 +176,8 @@ bool HLSLGenerator::Generate(HLSLTree* tree, Target target, const char* entryNam
     // Note sure if/where to add these calls.  Just wanted to point
     // out that nothing is calling them, but could be useful.
     //EmulateAlphaTest(tree, entryName, 0.5f);
+    
+    // Alec commented out to see if COmments survive
     FlattenExpressions(tree);
     
     m_writer.WriteLine(0, "#include \"ShaderHLSL.h\"");
@@ -578,7 +580,12 @@ void HLSLGenerator::OutputStatements(int indent, HLSLStatement* statement)
 
         OutputAttributes(indent, statement->attributes);
 
-        if (statement->nodeType == HLSLNodeType_Declaration)
+        if (statement->nodeType == HLSLNodeType_Comment)
+        {
+            HLSLComment* comment = static_cast<HLSLComment*>(statement);
+            m_writer.WriteLine(indent, "//%s", comment->text);
+        }
+        else if (statement->nodeType == HLSLNodeType_Declaration)
         {
             HLSLDeclaration* declaration = static_cast<HLSLDeclaration*>(statement);
             m_writer.BeginLine(indent, declaration->fileName, declaration->line);
@@ -771,15 +778,13 @@ void HLSLGenerator::OutputDeclaration(HLSLDeclaration* declaration)
             sscanf(declaration->registerName, "s%d", &reg);
         }
 
-        const char* textureType = NULL;
-        const char* samplerType = "SamplerState";
         // @@ Handle generic sampler type.
 
         // TODO: have a way to disable use of half (like on MSLGenerator)
         bool isHalf = declaration->type.samplerType == HLSLBaseType_Half;
         
         // Can't use half4 textures with spriv
-        // Can tell Vulkan was written by and for desktop IHVs.
+        // Can tell Vulkan was written by/for desktop IHVs.
         // https://github.com/microsoft/DirectXShaderCompiler/issues/2711
         bool isSpirvTarget = true;
         if (isSpirvTarget)
@@ -787,6 +792,12 @@ void HLSLGenerator::OutputDeclaration(HLSLDeclaration* declaration)
         
         const char* formatType = isHalf ? "half4" : "float4";
         
+        // Unlike Metal, that just uses half/float, the type
+        // seems to be dimension specific on HLSL.  So may need
+        // caller to specify more types.
+        
+        // texture carts the dimension and format
+        const char* textureType = NULL;
         if (declaration->type.baseType == HLSLBaseType_Sampler2D)
         {
             textureType = "Texture2D";
@@ -802,14 +813,23 @@ void HLSLGenerator::OutputDeclaration(HLSLDeclaration* declaration)
         else if (declaration->type.baseType == HLSLBaseType_Sampler2DShadow)
         {
             textureType = "Texture2D";
-            samplerType = "SamplerComparisonState";
         }
         else if (declaration->type.baseType == HLSLBaseType_Sampler2DMS)
         {
             textureType = "Texture2DMS";
-            samplerType = NULL;
         }
 
+        // sampler
+        const char* samplerType = "SamplerState";
+        if (declaration->type.baseType == HLSLBaseType_Sampler2DShadow)
+        {
+           samplerType = "SamplerComparisonState";
+        }
+        else if (declaration->type.baseType == HLSLBaseType_Sampler2DMS)
+        {
+            samplerType = NULL;
+        }
+        
         if (samplerType != NULL)
         {
             if (reg != -1)
diff --git a/hlslparser/src/HLSLParser.cpp b/hlslparser/src/HLSLParser.cpp
index 195a1647..4cd311fa 100644
--- a/hlslparser/src/HLSLParser.cpp
+++ b/hlslparser/src/HLSLParser.cpp
@@ -431,16 +431,19 @@ static const EffectState samplerStates[] = {
     {"AddressU", 1, textureAddressingValues},
     {"AddressV", 2, textureAddressingValues},
     {"AddressW", 3, textureAddressingValues},
+    // limited choices for bordercolor on mobile, so assume transparent
     // "BorderColor", 4, D3DCOLOR
     {"MagFilter", 5, textureFilteringValues},
     {"MinFilter", 6, textureFilteringValues},
     {"MipFilter", 7, textureFilteringValues},
     {"MipMapLodBias", 8, floatValues},
+    // TODO: also MinMipLevel
     {"MaxMipLevel", 9, integerValues},
     {"MaxAnisotropy", 10, integerValues},
-    {"sRGBTexture", 11, booleanValues},    
+    // Format conveys this now {"sRGBTexture", 11, booleanValues},
 };
 
+// can set these states in an Effect block from FX files
 static const EffectState effectStates[] = {
     {"VertexShader", 0, NULL},
     {"PixelShader", 0, NULL},
@@ -521,24 +524,6 @@ static const EffectStateValue witnessStencilModeValues[] = {
     {NULL, 0}
 };
 
-/* why aren't these used
-static const EffectStateValue witnessFilterModeValues[] = {
-    {"Point", 0},
-    {"Linear", 1},
-    {"Mipmap_Nearest", 2},
-    {"Mipmap_Best", 3},     // Quality of mipmap filtering depends on render settings.
-    {"Anisotropic", 4},     // Aniso without mipmaps for reflection maps.
-    {NULL, 0}
-};
-
-static const EffectStateValue witnessWrapModeValues[] = {
-    {"Repeat", 0},
-    {"Clamp", 1},
-    {"ClampToBorder", 2},
-    {NULL, 0}
-};
-*/
-
 static const EffectState pipelineStates[] = {
     {"VertexShader", 0, NULL},
     {"PixelShader", 0, NULL},
@@ -1504,15 +1489,12 @@ bool HLSLParser::ParseTopLevel(HLSLStatement*& statement)
     bool doesNotExpectSemicolon = false;
 
     // Alec add comment
-    if (Accept(HLSLToken_Comment))
+    if (ParseComment(statement))
     {
-        // TODO: add comment node to HLSLTree
-        // HLSLComment* comment = m_tree->AddNode<HLSLComment>(fileName, line);
-        // comment.text = ...;
-        
-        return true;
+        doesNotExpectSemicolon = true;
     }
-    else if (Accept(HLSLToken_Struct))
+    else
+    if (Accept(HLSLToken_Struct))
     {
         // Struct declaration.
 
@@ -1546,6 +1528,8 @@ bool HLSLParser::ParseTopLevel(HLSLStatement*& statement)
             {
                 return false;
             }
+            
+            // chain fields onto struct
             HLSLStructField* field = NULL;
             if (!ParseFieldDeclaration(field))
             {
@@ -1594,6 +1578,13 @@ bool HLSLParser::ParseTopLevel(HLSLStatement*& statement)
             {
                 return false;
             }
+            
+            // TODO: can't convert statement to fields
+            if (ParseComment(statement))
+            {
+                continue;
+            }
+           
             HLSLDeclaration* field = NULL;
             if (!ParseDeclaration(field))
             {
@@ -1601,6 +1592,8 @@ bool HLSLParser::ParseTopLevel(HLSLStatement*& statement)
                 return false;
             }
             DeclareVariable( field->name, field->type );
+            
+            // chain fields onto buffer
             field->buffer = buffer;
             if (buffer->field == NULL)
             {
@@ -1611,10 +1604,11 @@ bool HLSLParser::ParseTopLevel(HLSLStatement*& statement)
                 lastField->nextStatement = field;
             }
             lastField = field;
-
+            
             if (!Expect(';')) {
                 return false;
             }
+
         }
 
         statement = buffer;
@@ -1738,6 +1732,8 @@ bool HLSLParser::ParseTopLevel(HLSLStatement*& statement)
             statement = declaration;
         }
     }
+    
+    // These three are from .fx file syntax
     else if (ParseTechnique(statement)) {
         doesNotExpectSemicolon = true;
     }
@@ -1747,9 +1743,6 @@ bool HLSLParser::ParseTopLevel(HLSLStatement*& statement)
     else if (ParseStage(statement)) {
         doesNotExpectSemicolon = true;
     }
-    else if (ParseComment(statement)) {
-        doesNotExpectSemicolon = true;
-    }
 
     if (statement != NULL) {
         statement->attributes = attributes;
@@ -1785,6 +1778,30 @@ bool HLSLParser::ParseStatementOrBlock(HLSLStatement*& firstStatement, const HLS
     return true;
 }
 
+bool HLSLParser::ParseComment(HLSLStatement*& statement)
+{
+    if (m_tokenizer.GetToken() != HLSLToken_Comment)
+        return false;
+    
+    const char* textName = m_tree->AddString(m_tokenizer.GetComment());
+    
+    // This has already parsed the next comment before have had a chance to
+    // grab the string from the previous comment, if they were sequenential comments.
+    // So grabbing a copy of comment before this parses the next comment.
+    if (!Accept(HLSLToken_Comment))
+        return false;
+    
+    const char* fileName = GetFileName();
+    int         line     = GetLineNumber();
+
+    HLSLComment* comment = m_tree->AddNode<HLSLComment>(fileName, line);
+    comment->text = textName;
+    
+    // pass it back
+    statement = comment;
+    return true;
+}
+
 bool HLSLParser::ParseBlock(HLSLStatement*& firstStatement, const HLSLType& returnType)
 {
     HLSLStatement* lastStatement = NULL;
@@ -1794,11 +1811,15 @@ bool HLSLParser::ParseBlock(HLSLStatement*& firstStatement, const HLSLType& retu
         {
             return false;
         }
+        
         HLSLStatement* statement = NULL;
+        
         if (!ParseStatement(statement, returnType))
         {
             return false;
         }
+        
+        // chain statements onto the list
         if (statement != NULL)
         {
             if (firstStatement == NULL)
@@ -1810,7 +1831,10 @@ bool HLSLParser::ParseBlock(HLSLStatement*& firstStatement, const HLSLType& retu
                 lastStatement->nextStatement = statement;
             }
             lastStatement = statement;
-            while (lastStatement->nextStatement) lastStatement = lastStatement->nextStatement;
+            
+            // some statement parsing can gen more than one statement, so find end
+            while (lastStatement->nextStatement)
+                lastStatement = lastStatement->nextStatement;
         }
     }
     return true;
@@ -1898,6 +1922,12 @@ bool HLSLParser::ParseStatement(HLSLStatement*& statement, const HLSLType& retur
     }
 #endif
     
+    // Getting 2 copies of some comments, why is that
+    if (ParseComment(statement))
+    {
+        return true;
+    }
+    
     // If statement.
     if (Accept(HLSLToken_If))
     {
@@ -2047,8 +2077,8 @@ bool HLSLParser::ParseDeclaration(HLSLDeclaration*& declaration)
         return false;
     }
 
-    bool allowUnsizedArray = true;  // @@ Really?
-
+    bool allowUnsizedArray = true;  // This is needed for SSBO
+    
     HLSLDeclaration * firstDeclaration = NULL;
     HLSLDeclaration * lastDeclaration = NULL;
 
@@ -2909,18 +2939,6 @@ bool HLSLParser::ParseSamplerState(HLSLExpression*& expression)
     return true;
 }
 
-bool HLSLParser::ParseComment(HLSLStatement*& statement)
-{
-    if (!Accept(HLSLToken_Comment)) {
-        return false;
-    }
-    
-    HLSLComment* comment = m_tree->AddNode<HLSLComment>(GetFileName(), GetLineNumber());
-    comment->text = "hello";  // TODO: process text of comment
-    
-    return true;
-}
-
 bool HLSLParser::ParseTechnique(HLSLStatement*& statement)
 {
     if (!Accept(HLSLToken_Technique)) {
@@ -3677,6 +3695,8 @@ bool HLSLParser::AcceptType(bool allowVoid, HLSLType& type/*, bool acceptFlags*/
                 // Also have more texture slots than samplers, so samplers are reused.
                 
                 int token = m_tokenizer.GetToken();
+                
+                // TODO: need more types
                 if (token == HLSLToken_Float)
                 {
                     type.samplerType = HLSLBaseType_Float;
diff --git a/hlslparser/src/HLSLTokenizer.cpp b/hlslparser/src/HLSLTokenizer.cpp
index 67150c16..40dc8962 100644
--- a/hlslparser/src/HLSLTokenizer.cpp
+++ b/hlslparser/src/HLSLTokenizer.cpp
@@ -11,7 +11,7 @@
 namespace M4
 {
 
-bool skipSingleLineComments = true;
+bool skipSingleLineComments = false;
 
 
 // The order here must match the order in the Token enum.
@@ -151,15 +151,30 @@ void HLSLTokenizer::Next()
         m_token = HLSLToken_Comment;
         m_buffer += 2;
         
+        m_comment[0] = 0;
+        
+        // How to count the remaining string as tokens of the comment
+        // typically expecting a single string, not a sequence of strings.
+        
         // skip the newline too, but would need to increment lineNumber
+        uint32_t commentLen = 0;
         while (m_buffer < m_bufferEnd)
         {
-            if (*(m_buffer++) == '\n')
+            if (*(m_buffer) == '\n')
             {
-                ++m_lineNumber;
+                m_buffer++;
+                m_lineNumber++;
                 break;
             }
+            
+            // store comment to temporary string
+            if (commentLen < (s_maxComment - 1))
+                m_comment[commentLen++] = *m_buffer;
+            
+            m_buffer++;
         }
+    
+        m_comment[commentLen] = 0;
         
         return;
     }
@@ -552,6 +567,11 @@ const char* HLSLTokenizer::GetIdentifier() const
     return m_identifier;
 }
 
+const char* HLSLTokenizer::GetComment() const
+{
+    return m_comment;
+}
+
 int HLSLTokenizer::GetLineNumber() const
 {
     return m_tokenLineNumber;
diff --git a/hlslparser/src/HLSLTokenizer.h b/hlslparser/src/HLSLTokenizer.h
index f231e6b1..e0dbf154 100644
--- a/hlslparser/src/HLSLTokenizer.h
+++ b/hlslparser/src/HLSLTokenizer.h
@@ -111,24 +111,28 @@ class HLSLTokenizer
 public:
 
     /// Maximum string length of an identifier.
-    static const int s_maxIdentifier = 255 + 1;
-
-    /** The file name is only used for error reporting. */
+    constexpr static int s_maxIdentifier = 255 + 1;
+    constexpr static int s_maxComment = 4096;
+    
+    /// The file name is only used for error reporting.
     HLSLTokenizer(const char* fileName, const char* buffer, size_t length);
 
-    /** Advances to the next token in the stream. */
+    /// Advances to the next token in the stream.
     void Next();
 
-    /** Returns the current token in the stream. */
+    /// Returns the current token in the stream.
     int GetToken() const;
 
-    /** Returns the number of the current token. */
+    /// Returns the number of the current token.
     float GetFloat() const;
     int   GetInt() const;
 
-    /** Returns the identifier for the current token. */
+    /// Returns the identifier for the current token.
     const char* GetIdentifier() const;
 
+    /// Returns the comment text for the current token.
+    const char* GetComment() const;
+
     /** Returns the line number where the current token began. */
     int GetLineNumber() const;
 
@@ -165,6 +169,7 @@ class HLSLTokenizer
     float               m_fValue;
     int                 m_iValue;
     char                m_identifier[s_maxIdentifier];
+    char                m_comment[s_maxComment];
     char                m_lineDirectiveFileName[s_maxIdentifier];
     int                 m_tokenLineNumber;
 
diff --git a/hlslparser/src/HLSLTree.cpp b/hlslparser/src/HLSLTree.cpp
index fc3a3dc1..1049a73b 100644
--- a/hlslparser/src/HLSLTree.cpp
+++ b/hlslparser/src/HLSLTree.cpp
@@ -640,6 +640,9 @@ void HLSLTreeVisitor::VisitTopLevelStatement(HLSLStatement * node)
     else if (node->nodeType == HLSLNodeType_Pipeline) {
         VisitPipeline((HLSLPipeline *)node);
     }
+    else if (node->nodeType == HLSLNodeType_Comment) {
+        VisitComment((HLSLComment*)node);
+    }
     else {
         ASSERT(0);
     }
@@ -683,6 +686,9 @@ void HLSLTreeVisitor::VisitStatement(HLSLStatement * node)
     else if (node->nodeType == HLSLNodeType_BlockStatement) {
         VisitBlockStatement((HLSLBlockStatement *)node);
     }
+    else if (node->nodeType == HLSLNodeType_Comment) {
+        VisitComment((HLSLComment *)node);
+    }
     else {
         ASSERT(0);
     }
@@ -924,9 +930,14 @@ void HLSLTreeVisitor::VisitTechnique(HLSLTechnique * node)
     }
 }
 
+void HLSLTreeVisitor::VisitComment(HLSLComment * node)
+{
+    
+}
+
 void HLSLTreeVisitor::VisitPipeline(HLSLPipeline * node)
 {
-    // @@ ?
+    // This is for FX files
 }
 
 void HLSLTreeVisitor::VisitFunctions(HLSLRoot * root)
@@ -957,7 +968,7 @@ void HLSLTreeVisitor::VisitParameters(HLSLRoot * root)
 class ResetHiddenFlagVisitor : public HLSLTreeVisitor
 {
 public:
-    virtual void VisitTopLevelStatement(HLSLStatement * statement)
+    virtual void VisitTopLevelStatement(HLSLStatement * statement) override
     {
         statement->hidden = true;
 
@@ -968,12 +979,17 @@ class ResetHiddenFlagVisitor : public HLSLTreeVisitor
     }
 
     // Hide buffer fields.
-    virtual void VisitDeclaration(HLSLDeclaration * node)
+    virtual void VisitDeclaration(HLSLDeclaration * node) override
     {
         node->hidden = true;
     }
 
-    virtual void VisitArgument(HLSLArgument * node)
+    virtual void VisitComment(HLSLComment * node) override
+    {
+        node->hidden = true;
+    }
+    
+    virtual void VisitArgument(HLSLArgument * node) override
     {
         node->hidden = false;   // Arguments are visible by default.
     }
@@ -985,7 +1001,12 @@ class MarkVisibleStatementsVisitor : public HLSLTreeVisitor
     HLSLTree * tree;
     MarkVisibleStatementsVisitor(HLSLTree * tree) : tree(tree) {}
 
-    virtual void VisitFunction(HLSLFunction * node)
+    virtual void VisitComment(HLSLComment * node) override
+    {
+        node->hidden = false;
+    }
+
+    virtual void VisitFunction(HLSLFunction * node) override
     {
         node->hidden = false;
         HLSLTreeVisitor::VisitFunction(node);
@@ -994,7 +1015,7 @@ class MarkVisibleStatementsVisitor : public HLSLTreeVisitor
             VisitFunction(node->forward);
     }
 
-    virtual void VisitFunctionCall(HLSLFunctionCall * node)
+    virtual void VisitFunctionCall(HLSLFunctionCall * node) override
     {
         HLSLTreeVisitor::VisitFunctionCall(node);
 
@@ -1004,7 +1025,7 @@ class MarkVisibleStatementsVisitor : public HLSLTreeVisitor
         }
     }
 
-    virtual void VisitIdentifierExpression(HLSLIdentifierExpression * node)
+    virtual void VisitIdentifierExpression(HLSLIdentifierExpression * node) override
     {
         HLSLTreeVisitor::VisitIdentifierExpression(node);
 
@@ -1019,7 +1040,7 @@ class MarkVisibleStatementsVisitor : public HLSLTreeVisitor
         }
     }
 
-    virtual void VisitType(HLSLType & type)
+    virtual void VisitType(HLSLType & type) override
     {
         if (type.baseType == HLSLBaseType_UserDefined)
         {
@@ -1622,6 +1643,11 @@ struct StatementList {
             current_function = NULL;
         }
 
+        virtual void VisitComment(HLSLComment * node) override
+        {
+            // TODO: do nothing?
+        }
+        
         virtual void VisitIfStatement(HLSLIfStatement * node) override
         {
             if (NeedsFlattening(node->condition, 1)) {
diff --git a/hlslparser/src/HLSLTree.h b/hlslparser/src/HLSLTree.h
index 466f4115..1f655c35 100644
--- a/hlslparser/src/HLSLTree.h
+++ b/hlslparser/src/HLSLTree.h
@@ -808,7 +808,7 @@ struct HLSLComment : public HLSLStatement
         text = NULL;
     }
 
-    const char*         text;
+    const char*             text;
 };
 
 /**
@@ -924,7 +924,7 @@ class HLSLTreeVisitor
     virtual void VisitPass(HLSLPass * node);
     virtual void VisitTechnique(HLSLTechnique * node);
     virtual void VisitPipeline(HLSLPipeline * node);
-
+    virtual void VisitComment(HLSLComment * node);
 
     virtual void VisitFunctions(HLSLRoot * root);
     virtual void VisitParameters(HLSLRoot * root);
diff --git a/hlslparser/src/MSLGenerator.cpp b/hlslparser/src/MSLGenerator.cpp
index 139dfb8e..e9e1e0a5 100644
--- a/hlslparser/src/MSLGenerator.cpp
+++ b/hlslparser/src/MSLGenerator.cpp
@@ -619,6 +619,7 @@ namespace M4
         }
     }
 
+    // recursive
     void MSLGenerator::OutputStatements(int indent, HLSLStatement* statement)
     {
         // Main generator loop: called recursively
@@ -631,8 +632,13 @@ namespace M4
             }
 
             OutputAttributes(indent, statement->attributes);
-
-            if (statement->nodeType == HLSLNodeType_Declaration)
+            
+            if (statement->nodeType == HLSLNodeType_Comment)
+            {
+                HLSLComment* comment = static_cast<HLSLComment*>(statement);
+                m_writer.WriteLine(indent, "//%s", comment->text);
+            }
+            else if (statement->nodeType == HLSLNodeType_Declaration)
             {
                 HLSLDeclaration* declaration = static_cast<HLSLDeclaration*>(statement);
 
@@ -811,7 +817,6 @@ namespace M4
     }
 
     // Called by OutputStatements
-
     void MSLGenerator::OutputAttributes(int indent, HLSLAttribute* attribute)
     {
         // IC: These do not appear to exist in MSL.
@@ -828,7 +833,7 @@ namespace M4
             }
             else if (attribute->attributeType == HLSLAttributeType_Branch)
             {
-                // @@
+                // @@, [[likely]]?
             }
 
             attribute = attribute->nextAttribute;
@@ -842,41 +847,41 @@ namespace M4
             // Declare a texture and a sampler instead
             // We do not handle multiple textures on the same line
             ASSERT(declaration->nextDeclaration == NULL);
-            const char * typeName = "float";
+            const char * formatName = "float";
             if (declaration->type.samplerType == HLSLBaseType_Half && !m_options.treatHalfAsFloat)
             {
-                typeName = "half";
+                formatName = "half";
             }
 
             if (declaration->type.baseType == HLSLBaseType_Sampler2D)
             {
-                m_writer.Write("thread texture2d<%s>& %s_texture;", typeName, declaration->name);
+                m_writer.Write("thread texture2d<%s>& %s_texture;", formatName, declaration->name);
                 m_writer.Write("thread sampler& %s_sampler", declaration->name);
             }
             else if (declaration->type.baseType == HLSLBaseType_Sampler3D)
             {
-                m_writer.Write("thread texture3d<%s>& %s_texture;", typeName, declaration->name);
+                m_writer.Write("thread texture3d<%s>& %s_texture;", formatName, declaration->name);
                 m_writer.Write("thread sampler& %s_sampler", declaration->name);
             }
             else if (declaration->type.baseType == HLSLBaseType_SamplerCube)
             {
-                m_writer.Write("thread texturecube<%s>& %s_texture;", typeName, declaration->name);
+                m_writer.Write("thread texturecube<%s>& %s_texture;", formatName, declaration->name);
                 m_writer.Write("thread sampler& %s_sampler", declaration->name);
             }
             else if (declaration->type.baseType == HLSLBaseType_Sampler2DShadow)
             {
                 // Note: ios has 16f depth now, so don't assume float
-                m_writer.Write("thread depth2d<%s>& %s_texture;", typeName,declaration->name);
+                m_writer.Write("thread depth2d<%s>& %s_texture;", formatName,declaration->name);
                 m_writer.Write("thread sampler& %s_sampler", declaration->name);
             }
             else if (declaration->type.baseType == HLSLBaseType_Sampler2DMS)
             {
                 // no sampler, just Load samples
-                m_writer.Write("thread texture2d_ms<%s>& %s_texture;", typeName, declaration->name);
+                m_writer.Write("thread texture2d_ms<%s>& %s_texture;", formatName, declaration->name);
             }
             else if (declaration->type.baseType == HLSLBaseType_Sampler2DArray)
             {
-                m_writer.Write("thread texture2d_array<%s>& %s_texture;", typeName, declaration->name);
+                m_writer.Write("thread texture2d_array<%s>& %s_texture;", formatName, declaration->name);
                 m_writer.Write("thread sampler& %s_sampler", declaration->name);
             }
             else
@@ -1090,7 +1095,7 @@ namespace M4
 
         if (m_options.treatHalfAsFloat)
         {
-            // TODO: use call to convert half back to float type
+            // use call to convert half back to float type
             if (IsHalf(targetType)) targetType = HalfToFloatBaseType(targetType);
             if (IsHalf(sourceType)) sourceType = HalfToFloatBaseType(sourceType );
         }
@@ -1127,6 +1132,9 @@ namespace M4
             // For texture declaration, generate a struct instead
             if (identifierExpression->global && IsSamplerType(identifierExpression->expressionType))
             {
+                // TODO: this code doesn't preserve user name for reflection
+                // Appends_texture/_sampler appended to name and forces combined tex/sampler.
+                
                 if (identifierExpression->expressionType.baseType == HLSLBaseType_Sampler2D)
                 {
                     if (identifierExpression->expressionType.samplerType == HLSLBaseType_Half && !m_options.treatHalfAsFloat)

From da1ad01cc1cc9388b1390939053dabf1070bccbc Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 5 Mar 2023 21:10:58 -0800
Subject: [PATCH 442/901] kram - hlslparser supports -g for debug

This keeps the comments.
---
 hlslparser/buildShaders.sh       | 23 ++++++++++-------
 hlslparser/src/HLSLParser.h      |  3 ++-
 hlslparser/src/HLSLTokenizer.cpp |  8 ++----
 hlslparser/src/HLSLTokenizer.h   | 42 +++++++++++++++++---------------
 hlslparser/src/Main.cpp          | 24 ++++++++++++------
 5 files changed, 57 insertions(+), 43 deletions(-)

diff --git a/hlslparser/buildShaders.sh b/hlslparser/buildShaders.sh
index 1266c808..ec946bce 100755
--- a/hlslparser/buildShaders.sh
+++ b/hlslparser/buildShaders.sh
@@ -34,13 +34,18 @@ appSprivReflect+="spirv-reflect"
 cp ../shaders/ShaderMSL.h .
 cp ../shaders/ShaderHLSL.h .
 
+parserOptions=""
+
+# preserve comments
+parserOptions+="-g "
+
 # build the metal shaders
 echo gen MSL
-${appHlslparser} -i ../shaders/Skinning.hlsl -o Skinning.metal
+${appHlslparser} ${parserOptions} -i ../shaders/Skinning.hlsl -o Skinning.metal
 
 # build the hlsl shaders
 echo gen HLSL
-${appHlslparser} -i ../shaders/Skinning.hlsl -o Skinning.hlsl
+${appHlslparser} ${parserOptions} -i ../shaders/Skinning.hlsl -o Skinning.hlsl
 
 #-------------------------------
 
@@ -49,23 +54,23 @@ ${appHlslparser} -i ../shaders/Skinning.hlsl -o Skinning.hlsl
 # see if HLSL compiles to MSL (requires macOS Vulkan install)
 
 # record sources into code for gpu capture (don't ship this), debug mode
-macOptions="-frecord-sources -g "
+metalMacOptions="-frecord-sources -g "
 
 # O2 + size opt
-# macOptions+="-Os"
+# metalMacOptions+="-Os"
 
 # TODO: metal3.0 on M1 macOS13/iOS16
-macOptions+="-std=macos-metal2.3 "
+metalMacOptions+="-std=macos-metal2.3 "
 
 # see if MSL compile
 echo compile MSL for macOS
-xcrun -sdk macosx metal Skinning.metal ${macOptions} -o mac/Skinning.metallib
+xcrun -sdk macosx metal Skinning.metal ${metalMacOptions} -o mac/Skinning.metallib
 
-# iosOptions="-frecord-sources -g "
-# iosOptions+="-std=ios-metal2.3 "
+# metaliosOptions="-frecord-sources -g "
+# metaliosOptions+="-std=ios-metal2.3 "
 
 #echo compile MSL for iOS
-#xcrun -sdk macosx metal Skinning.metal ${iosOptions} -o ios/Skinning.metallib
+#xcrun -sdk macosx metal Skinning.metal ${metaliosOptions} -o ios/Skinning.metallib
 
 #-------------------------------
 
diff --git a/hlslparser/src/HLSLParser.h b/hlslparser/src/HLSLParser.h
index fa5794ef..15e21ebc 100644
--- a/hlslparser/src/HLSLParser.h
+++ b/hlslparser/src/HLSLParser.h
@@ -25,7 +25,8 @@ class HLSLParser
 public:
 
     HLSLParser(Allocator* allocator, const char* fileName, const char* buffer, size_t length);
-
+    void SetKeepComments(bool enable) { m_tokenizer.SetKeepComments(enable); }
+    
     bool Parse(HLSLTree* tree);
 
 private:
diff --git a/hlslparser/src/HLSLTokenizer.cpp b/hlslparser/src/HLSLTokenizer.cpp
index 40dc8962..7feb3852 100644
--- a/hlslparser/src/HLSLTokenizer.cpp
+++ b/hlslparser/src/HLSLTokenizer.cpp
@@ -10,10 +10,6 @@
 
 namespace M4
 {
-
-bool skipSingleLineComments = false;
-
-
 // The order here must match the order in the Token enum.
 static const char* _reservedWords[] =
     {
@@ -146,7 +142,7 @@ void HLSLTokenizer::Next()
     const char* start = m_buffer;
 
     // single line comments
-    if (!skipSingleLineComments && (m_buffer[0] == '/' && m_buffer[1] == '/'))
+    if (m_keepComments && (m_buffer[0] == '/' && m_buffer[1] == '/'))
     {
         m_token = HLSLToken_Comment;
         m_buffer += 2;
@@ -312,7 +308,7 @@ bool HLSLTokenizer::SkipComment()
     bool result = false;
     if (m_buffer[0] == '/')
     {
-        if (skipSingleLineComments && m_buffer[1] == '/')
+        if ((!m_keepComments) && m_buffer[1] == '/')
         {
             // Single line comment.
             result = true;
diff --git a/hlslparser/src/HLSLTokenizer.h b/hlslparser/src/HLSLTokenizer.h
index e0dbf154..22604c81 100644
--- a/hlslparser/src/HLSLTokenizer.h
+++ b/hlslparser/src/HLSLTokenizer.h
@@ -133,22 +133,25 @@ class HLSLTokenizer
     /// Returns the comment text for the current token.
     const char* GetComment() const;
 
-    /** Returns the line number where the current token began. */
+    /// Returns the line number where the current token began.
     int GetLineNumber() const;
 
-    /** Returns the file name where the current token began. */
+    /// Returns the file name where the current token began.
     const char* GetFileName() const;
 
-    /** Gets a human readable text description of the current token. */
+    /// Gets a human readable text description of the current token.
     void GetTokenName(char buffer[s_maxIdentifier]) const;
 
-    /** Reports an error using printf style formatting. The current line number
-    is included. Only the first error reported will be output. */
+    /// Reports an error using printf style formatting. The current line number
+    /// is included. Only the first error reported will be output.
     void Error(const char* format, ...);
 
-    /** Gets a human readable text description of the specified token. */
+    /// Gets a human readable text description of the specified token.
     static void GetTokenName(int token, char buffer[s_maxIdentifier]);
 
+    /// Tokenizer will default to strip double-slash comments, but this tries to preserve them if true
+    void SetKeepComments(bool enable) { m_keepComments = enable; }
+    
 private:
 
     bool SkipWhitespace();
@@ -159,19 +162,20 @@ class HLSLTokenizer
 
 private:
 
-    const char*         m_fileName;
-    const char*         m_buffer;
-    const char*         m_bufferEnd;
-    int                 m_lineNumber;
-    bool                m_error;
-
-    int                 m_token;
-    float               m_fValue;
-    int                 m_iValue;
-    char                m_identifier[s_maxIdentifier];
-    char                m_comment[s_maxComment];
-    char                m_lineDirectiveFileName[s_maxIdentifier];
-    int                 m_tokenLineNumber;
+    const char*         m_fileName = nullptr;
+    const char*         m_buffer = nullptr;
+    const char*         m_bufferEnd = nullptr;
+    int                 m_lineNumber = 0;
+    bool                m_error = false;
+
+    int                 m_token = 0;
+    float               m_fValue = 0.0f;
+    int                 m_iValue = 0;
+    char                m_identifier[s_maxIdentifier] = {};
+    char                m_comment[s_maxComment] = {};
+    char                m_lineDirectiveFileName[s_maxIdentifier] = {};
+    int                 m_tokenLineNumber = 0;
+    bool                m_keepComments = false;
 
 };
 
diff --git a/hlslparser/src/Main.cpp b/hlslparser/src/Main.cpp
index ac03d85b..e37139d7 100644
--- a/hlslparser/src/Main.cpp
+++ b/hlslparser/src/Main.cpp
@@ -43,17 +43,15 @@ bool ReadFile( const char* fileName, string& str )
 
 void PrintUsage()
 {
-	fprintf(stderr, "usage: hlslparser [-h] -i shader.hlsl -o [shader.hlsl | shader.metal]\n"
-		 "\n"
+	fprintf(stderr,
+        "usage: hlslparser [-h|-g] -i shader.hlsl -o [shader.hlsl | shader.metal]\n"
 		 "Translate DX9-style HLSL shader to HLSL/MSL shader.\n"
-		 "\n"
-		 //" ENTRYNAME   entry point of the shader\n"
-		 "\n"
+         " -i          input HLSL\n"
+         " -o          output HLSL or MSL\n"
 		 "optional arguments:\n"
-        // " -vs, -fs    vertex or fragment shader\n"
+         " -g          debug mode, preserve comments\n"
          " -h, --help  show this help message and exit\n"
-		 " -hlsl       generate HLSL\n"
-		 " -metal      generate MSL\n");
+		);
 }
 
 // Taken from KrmaLog.cpp
@@ -106,6 +104,7 @@ int main( int argc, char* argv[] )
 	Language language = Language_MSL;
 	Target target = Target_FragmentShader;
     string outputFileName;
+    bool isDebug = false;
     
 	for( int argn = 1; argn < argc; ++argn )
 	{
@@ -127,6 +126,11 @@ int main( int argc, char* argv[] )
             if ( ++argn < argc )
                 fileName = argv[ argn ];
 		}
+        else if ( String_Equal( arg, "-g" ))
+        {
+            // will preserve double-slash comments where possible
+            isDebug = true;
+        }
         
 // This is derived from end characters of entry point
 //        else if( String_Equal( arg, "-vs" ) )
@@ -221,6 +225,10 @@ int main( int argc, char* argv[] )
 	// Parse input file
 	Allocator allocator;
 	HLSLParser parser( &allocator, fileName, source.data(), source.size() );
+    if (isDebug)
+    {
+        parser.SetKeepComments(true);
+    }
 	HLSLTree tree( &allocator );
 	if( !parser.Parse( &tree ) )
 	{

From ffab646f955387dd27fe6f177a7071c725b33b3f Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 5 Mar 2023 22:54:59 -0800
Subject: [PATCH 443/901] kram - add macos hlslparser build to cibuild

I don't have msbuild setup to build the sln file yet.   But at least can get the hlslparser for macos.
---
 hlslparser/README.md | 50 ++++++++++++++++++++++++++++++++++++--------
 scripts/cibuild.sh   | 11 ++++++++++
 2 files changed, 52 insertions(+), 9 deletions(-)

diff --git a/hlslparser/README.md b/hlslparser/README.md
index 4d4159a6..ea274b01 100644
--- a/hlslparser/README.md
+++ b/hlslparser/README.md
@@ -3,6 +3,47 @@ HLSLParser
 
 This version of uknown worlds and thekla/hlslparser takes a HLSL-like syntax and then converts that into modern HLSL and MSL.  Special thanks to Max McGuire (@Unknown Worlds) and Ignacio Castano and Johnathan Blow (@Thekla) for releasing this as open-source.  I've left out GLSL compilation, and legacy DX9 HLSL codegen to simplify maintaining the codebase.
 
+There are still growing pains to using hlslparser.  It can't do all the manipulation that glsc and spriv-cross does to ensure that a valid shader model shader is created.  But compiling with DXC/metal should help avoid issues.  I still don't know how to resolve reflection, since each compiler generates it's own unique data formats.  Reflection is simpler on the spirv path.
+
+The point of using hlslparser is to preserve comments and generate MSL/HLSL code close to the original sources.  MSL and HLSL are nearly the same shader language at the core.  Transpiling is converting spirv assembly back to MSL source code.  But spriv-opt introduces 100's of temp registers into the code, gens 24 character floats, strips comments, and the resulting code isn't so pretty to step through in Metal gpu capture.  At the same time, Apple ignores generating Spirv from MSL, so here we are.  Spriv should stay an end 
+
+---------------------------------
+
+Paths to generate MSL from HLSL 
+
+* Spirv   HLSL9 > hlslparser > HLSL10 > DXC > spv  
+*  reflection                                 spv > spv-reflect -> ref
+* MSL     HLSL9 > hlslparser > MSL    > metal > air/metallib
+*
+* Transpiling MSL: HLSL10 > DXC   > spv > spirv-cross > MSL
+* Transpiling MSL: HLSL10 > glslc > spv > spirv-cross > MSL
+*
+*  assumes DXC/glslc do optimizations similar to spirv-opt
+*
+* Variant generation 
+* HLSL9 > preprocess + defines > HLSL9
+*  or
+* HLSL9 + specialization > hlslparser
+
+---------------------------------
+
+TODO:
+* compute shader support
+* generate reflection data from parse of HLSL
+* handle HLSL vulkan extension constructs, convert these to MSL kernels too
+* variants from preprocess or vulkan specialization constants
+* get tile shader kernels to work
+* get SSBO to work
+* fix shaders to not structify metal and mod the source names
+* handle reflection (spirv-reflect?)
+* poor syntax highlighting of output .metal file, does Xcode have to compile?
+* no syntax highlighting of .hlsl files in Xcode, but VSCode has HLSL but not MSL
+
+* May want to swtich to VSCode for shader development
+* https://marketplace.visualstudio.com/items?itemName=doublebuffer.metal-shader&utm_source=VSCode.pro&utm_campaign=AhmadAwais
+
+---------------------------------
+
 Overview
 ---
 
@@ -329,15 +370,6 @@ Spirv
 
 ---------------------------------
 
-TODO:
-* compute shader support
-* handle HLSL vulkan extensions, convert these to MSL kernels too
-* preprocessed variants
-* fix shader input names
-* handle reflection (spirv-reflect?)
-
----------------------------------
-
 This is a fork of [Unknownworld's hlslparser](https://github.com/unknownworlds/hlslparser) adapted to our needs in [The Witness](http://the-witness.net). We currently use it to translate pseudo-HLSL shaders (using the legacy D3D9 syntax) to HLSL10 and Metal Shading Language (MSL). There's also a GLSL translator available that we do not use yet, but that is being maintained by community contributions.
 
 The HLSL parser has been extended with many HLSL10 features, but retaining the original HLSL C-based syntax.
diff --git a/scripts/cibuild.sh b/scripts/cibuild.sh
index bbc4641e..4b0df4e8 100755
--- a/scripts/cibuild.sh
+++ b/scripts/cibuild.sh
@@ -76,6 +76,11 @@ if [[ $buildType == macos ]]; then
 
 	popd
 
+	# build hlslparser to bin directory
+	pushd hlslparser
+	xcodebuild install -sdk macosx -project hlslparser.xcodeproj -configuration Release -destination generic/platform=macOS DSTROOT=${binHolderPath} INSTALL_PATH=bin
+	popd
+
 elif [[ $buildType == windows ]]; then
 	mkdir -p build
 
@@ -92,6 +97,12 @@ elif [[ $buildType == windows ]]; then
 
 	popd
 
+	# build hlslparser to bin directory
+	#pushd hlslparser
+	# TODO: run the sln file to build Win version of hlslparser
+	#msbuild hlslparser.sln -target:NotInSlnfolder:Rebuild;NewFolder\InSolutionFolder:Clean
+	#popd
+
 elif [[ $buildType == linux ]]; then
 	mkdir -p build
 

From 191e451e1ff6d99594f6a28bd3e23fb7fe8cc2c3 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 5 Mar 2023 23:18:52 -0800
Subject: [PATCH 444/901] kram - hlslparser deadstrip

This didn't seem to shrink app, still at 330K for universal macOS app.
---
 hlslparser/README.md                          | 23 ++++++++++++-------
 .../hlslparser.xcodeproj/project.pbxproj      |  2 ++
 2 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/hlslparser/README.md b/hlslparser/README.md
index ea274b01..41963a9e 100644
--- a/hlslparser/README.md
+++ b/hlslparser/README.md
@@ -76,16 +76,16 @@ Overview
 
 Dealing with Half
 ---
-HLSL 6.2 includes full half and int support.   So that is the compilation target.  Note table below before adopting half in shaders.  Nvidia/AMD tried to phase out half support on DX10, but iOS re-popularized half usage.  Android has many dragons using half (see below)
+HLSL 6.2 includes full half and int support.   So that is the compilation target.  Note table below before adopting half in shaders.  Nvidia/AMD tried to phase out half support on DX10, but iOS re-popularized half usage.  Android and Nvidia have many dragons using half (see below).  Half is only 10-bit mantissa with 5-bit exponent.  
 
-Platforms - iOS/PowerVR, Adreno,  Mali, Nvida, AMD
+Platforms - iOS/PowerVR, Adreno, Mali,   Nvida, AMD, Intel
 
-| Feature        | I | A | M | N | A |
-|----------------|---|---|---|---|---|
-| Half Interp    | y | n | y | n | y |
-| Half UBO       | y | n | y | n | y | 
-| Half Push      | y | y | y | y | n |
-| Half ALU       | y | y | y | y | y |
+| Feature        | I | A | M | | N | A | I |
+|----------------|---|---|---|-|---|---|---|
+| Half Interp    | y | n | y | | n | y | ? |
+| Half UBO       | y | n | y | | n | y | ? | 
+| Half Push      | y | y | y | | y | n | ? |
+| Half ALU       | y | y | y | | y | y | ? |
 
 https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_16bit_storage.html
 
@@ -96,6 +96,13 @@ https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_16bit_s
 
 There is also the limitation of half interpolation creating banding, and likely why Adreno/Nvidia do not support StorageInputOutput16.  Mali recommends using half to minimize parameter buffer storage out of the vertex shader, but then declaring float for the same variables in the fragment shader.  This limits sharing input/output structs.
 
+macOS on M1 - Rosetta2 lacks f16c cpu support, so translated x64 apps crash.
+  build Apple Silicon
+
+Android missing cpu arm64+f16 support from Redmi Note 8 and other chips.
+  vcvt_f32_f16 is still present without this.
+  
+
 Terms
 ---
 
diff --git a/hlslparser/hlslparser.xcodeproj/project.pbxproj b/hlslparser/hlslparser.xcodeproj/project.pbxproj
index fed2afab..bd2793fe 100644
--- a/hlslparser/hlslparser.xcodeproj/project.pbxproj
+++ b/hlslparser/hlslparser.xcodeproj/project.pbxproj
@@ -221,6 +221,7 @@
 				CLANG_WARN_UNREACHABLE_CODE = YES;
 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
 				COPY_PHASE_STRIP = NO;
+				DEAD_CODE_STRIPPING = YES;
 				DEBUG_INFORMATION_FORMAT = dwarf;
 				ENABLE_STRICT_OBJC_MSGSEND = YES;
 				ENABLE_TESTABILITY = YES;
@@ -279,6 +280,7 @@
 				CLANG_WARN_UNREACHABLE_CODE = YES;
 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
 				COPY_PHASE_STRIP = NO;
+				DEAD_CODE_STRIPPING = YES;
 				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
 				ENABLE_NS_ASSERTIONS = NO;
 				ENABLE_STRICT_OBJC_MSGSEND = YES;

From 32aec05525343a8f63eec2e7de77a4b4a3ff791f Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 5 Mar 2023 23:27:41 -0800
Subject: [PATCH 445/901] kram - hlslparser readme update

---
 hlslparser/README.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/hlslparser/README.md b/hlslparser/README.md
index 41963a9e..7bab7c60 100644
--- a/hlslparser/README.md
+++ b/hlslparser/README.md
@@ -1,11 +1,11 @@
 HLSLParser
 ==========
 
-This version of uknown worlds and thekla/hlslparser takes a HLSL-like syntax and then converts that into modern HLSL and MSL.  Special thanks to Max McGuire (@Unknown Worlds) and Ignacio Castano and Johnathan Blow (@Thekla) for releasing this as open-source.  I've left out GLSL compilation, and legacy DX9 HLSL codegen to simplify maintaining the codebase.
+This version of thekla/hlslparser takes a HLSL-like syntax and then converts that into modern HLSL and MSL.  Special thanks to Max McGuire (@Unknown Worlds) and Ignacio Castano and Johnathan Blow (@Thekla) for releasing this as open-source.  I've left out GLSL and DX9 legacy codegen to simplify maintaining the codebase.  This is a small amount of code compared with the Krhonos shader tools.
 
 There are still growing pains to using hlslparser.  It can't do all the manipulation that glsc and spriv-cross does to ensure that a valid shader model shader is created.  But compiling with DXC/metal should help avoid issues.  I still don't know how to resolve reflection, since each compiler generates it's own unique data formats.  Reflection is simpler on the spirv path.
 
-The point of using hlslparser is to preserve comments and generate MSL/HLSL code close to the original sources.  MSL and HLSL are nearly the same shader language at the core.  Transpiling is converting spirv assembly back to MSL source code.  But spriv-opt introduces 100's of temp registers into the code, gens 24 character floats, strips comments, and the resulting code isn't so pretty to step through in Metal gpu capture.  At the same time, Apple ignores generating Spirv from MSL, so here we are.  Spriv should stay an end 
+The point of this hlslparser is to preserve comments, generate MSL/HLSL code close to the original sources, and be easy to extend.  MSL and HLSL are nearly the same shader language at the core.  Typical spriv to MSL transpiles look  assembly-like in code flow.  Spriv-opt introduces 100's of temp registers into the code, gens 24 character floats, strips comments, and the resulting code isn't simple to step through in Metal GPU capture.  At the same time, Apple ignores generating Spirv from MSL, so here we are.  Spriv should remain a final assembly format to feed to Vulkan drivers.
 
 ---------------------------------
 
@@ -279,6 +279,7 @@ SPIRV
 * linked into module
 * cannot represent Texture2D<half4>, so can't tranpsile to MSL texture2d<half>
 * https://github.com/microsoft/DirectXShaderCompiler/issues/2711
+* https://www.khronos.org/spir/
 
 AIR
 * Apple IR format
@@ -362,7 +363,7 @@ HLSL
 * SM 6.2, target, added back int/half support
 * SM 6.6,
 * SM 6.7, SampleCmpLevel, RWTexture2DMS, RWTexture2DMSArray, signed texture offsets
-# https://microsoft.github.io/DirectX-Specs/d3d/HLSL_SM_6_7_Advanced_Texture_Ops.html
+* https://microsoft.github.io/DirectX-Specs/d3d/HLSL_SM_6_7_Advanced_Texture_Ops.html
 
 MSL
 * metal2.2, iOS13/macOS10.15,

From eb039017e995f00c714bd1b60acf303cb2b4e207 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 6 Mar 2023 09:35:37 -0800
Subject: [PATCH 446/901] kram - hlslparser add syntax highlighting

Designate .hlsl, .metal, and .h files for shaders as Metal Shading Language so they get some syntax highlighting.  Fortunately Metal and HLSL use similar types.  Really need an HLSL plugin for Xcode.
---
 hlslparser/hlslparser.xcodeproj/project.pbxproj |  6 +++---
 hlslparser/shaders/ShaderHLSL.h                 | 13 +++++++------
 hlslparser/shaders/ShaderMSL.h                  |  8 ++------
 3 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/hlslparser/hlslparser.xcodeproj/project.pbxproj b/hlslparser/hlslparser.xcodeproj/project.pbxproj
index bd2793fe..35147817 100644
--- a/hlslparser/hlslparser.xcodeproj/project.pbxproj
+++ b/hlslparser/hlslparser.xcodeproj/project.pbxproj
@@ -49,9 +49,9 @@
 		70235C4A29B3145200909C95 /* HLSLTree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = HLSLTree.h; sourceTree = "<group>"; };
 		70235C5429B3260100909C95 /* README.md */ = {isa = PBXFileReference; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = "<group>"; };
 		702A2B5929A49DC8007D9A99 /* hlslparser */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = hlslparser; sourceTree = BUILT_PRODUCTS_DIR; };
-		702A2B7E29AA942D007D9A99 /* Skinning.hlsl */ = {isa = PBXFileReference; lastKnownFileType = text; path = Skinning.hlsl; sourceTree = "<group>"; };
-		702A2B8329AC25BA007D9A99 /* ShaderMSL.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = ShaderMSL.h; sourceTree = "<group>"; };
-		702A2B8429AC4B23007D9A99 /* ShaderHLSL.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = ShaderHLSL.h; sourceTree = "<group>"; };
+		702A2B7E29AA942D007D9A99 /* Skinning.hlsl */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; path = Skinning.hlsl; sourceTree = "<group>"; };
+		702A2B8329AC25BA007D9A99 /* ShaderMSL.h */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; path = ShaderMSL.h; sourceTree = "<group>"; };
+		702A2B8429AC4B23007D9A99 /* ShaderHLSL.h */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; path = ShaderHLSL.h; sourceTree = "<group>"; };
 /* End PBXFileReference section */
 
 /* Begin PBXFrameworksBuildPhase section */
diff --git a/hlslparser/shaders/ShaderHLSL.h b/hlslparser/shaders/ShaderHLSL.h
index 778d9418..1c88ed9b 100644
--- a/hlslparser/shaders/ShaderHLSL.h
+++ b/hlslparser/shaders/ShaderHLSL.h
@@ -1,8 +1,9 @@
 #ifndef ShaderHLSL_h
 #define ShaderHLSL_h
 
+// glslc doesn't support but DXC does
+// so had to add header guard
 #ifdef __spriv__
-// glslc doesn't support #pragma once
 #pragma once
 #endif
 
@@ -23,7 +24,7 @@ struct Texture2DSampler {
 #if USE_HALF
 // unique type, even though same data
 struct Texture2DHalfSampler {
-    Texture2D<float4> t; // TOOD: should be Texture2D<half4> but spirv limit
+    Texture2D<float4> t; // TODO: should be Texture2D<half4> but spirv limit
     SamplerState s;
 };
 
@@ -210,10 +211,10 @@ float4 texCUBEbias(TextureCubeSampler ts, float4 texCoord) {
     return ts.t.SampleBias(ts.s, texCoord.xyz, texCoord.w);
 }
 
-// TODO: fix this
-//float4 tex2DArray(Texture2DArraySampler ts, float3 texCoord) {
-//    return ts.t.Sample(ts.s, texCoord.xy, texCoord.z + 0.5); // 0.5 offset needed on nvidia gpus
-//}
+float4 tex2DArray(Texture2DArraySampler ts, float3 texCoord) {
+    // return ts.t.Sample(ts.s, texCoord.xy, texCoord.z + 0.5); // 0.5 offset needed on nvidia gpus
+    return ts.t.Sample(ts.s, texCoord); // 0.5 offset
+}
 
 #endif // ShaderHLSL_h
     
diff --git a/hlslparser/shaders/ShaderMSL.h b/hlslparser/shaders/ShaderMSL.h
index 166145ab..d599e794 100644
--- a/hlslparser/shaders/ShaderMSL.h
+++ b/hlslparser/shaders/ShaderMSL.h
@@ -1,8 +1,6 @@
-#ifndef ShaderMSL_h
-#define ShaderMSL_h
 
-// glslc doesn't support #pragma once
-//#pragma once
+// glslc doesn't support, but this header is metal only
+#pragma once
 
 // TODO: support function_constants in MSL, is there HLSL equivalent yet
 // [[function_constant(index)]]
@@ -277,5 +275,3 @@ float4 tex2DArray(Texture2DArraySampler ts, float3 texCoord) {
     return ts.t.sample(ts.s, texCoord.xy, texCoord.z + 0.5); // 0.5 offset needed on nvidia gpus
 }
         
-#endif // ShaderMSL_h
-

From adabad2e14f5a4df19faf1d038b9d839e0f8a3d7 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 6 Mar 2023 23:18:30 -0800
Subject: [PATCH 447/901] kram - hlslparser set HLSL 2021 (-HV 2021)

HLSL 2021 can actually distinguish between structs containing the same types.  Before the half and float sampler wrappers conflicted.

https://devblogs.microsoft.com/directx/announcing-hlsl-2021/
https://github.com/microsoft/DirectXShaderCompiler/issues/5070
---
 hlslparser/buildShaders.sh       | 10 +++++++++-
 hlslparser/shaders/ShaderHLSL.h  |  6 +++---
 hlslparser/shaders/ShaderMSL.h   |  6 +++---
 hlslparser/shaders/Skinning.hlsl |  7 ++++++-
 hlslparser/src/HLSLParser.cpp    |  6 +++---
 5 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/hlslparser/buildShaders.sh b/hlslparser/buildShaders.sh
index ec946bce..ed64020c 100755
--- a/hlslparser/buildShaders.sh
+++ b/hlslparser/buildShaders.sh
@@ -90,6 +90,10 @@ args+="-Zpc "
 # also watch interpolation if using for input/output
 args+="-enable-16bit-types "
 
+# default is 2018, but 2021 fixes casting rules of structs with same args
+# https://devblogs.microsoft.com/directx/announcing-hlsl-2021/
+args+="-HV 2021 "
+ 
 # have to also compile to 6.2
 vsargs=${args}
 vsargs+="-T vs_6_2 "
@@ -128,8 +132,12 @@ testGlslc=0
 
 if [[ $testGlslc -eq 1 ]]; then
     vsargs="-Os -fshader-stage=vert --target-env=vulkan1.2 "
-    psargs="-Os -fshader-stage=vert --target-env=vulkan1.2 "
+    psargs="-Os -fshader-stage=frag --target-env=vulkan1.2 "
 
+    # TODO: probably no equivlent to this?
+    # vsargs+="-HV 2021 "
+    # psargs+="-HV 2021 "
+    
     # turn on half/short/ushort support
     # TODO: seems that dot, min, max and other calls don't have half3 versions needed, casts required
     # and even half3(half) isn't valid.
diff --git a/hlslparser/shaders/ShaderHLSL.h b/hlslparser/shaders/ShaderHLSL.h
index 1c88ed9b..30f9c311 100644
--- a/hlslparser/shaders/ShaderHLSL.h
+++ b/hlslparser/shaders/ShaderHLSL.h
@@ -171,15 +171,15 @@ float4 tex2DMSfetch(Texture2DMS<float4> t, int2 texCoord, int sample) {
 
 #if USE_HALF
 
-half4 tex2DH(Texture2DHalfSampler ts, float2 texCoord) {
+half4 tex2D(Texture2DHalfSampler ts, float2 texCoord) {
     return (half4)ts.t.Sample(ts.s, texCoord);
 }
 
-half4 tex2DHlod(Texture2DHalfSampler ts, float4 texCoordMip) {
+half4 tex2Dlod(Texture2DHalfSampler ts, float4 texCoordMip) {
     return (half4)ts.t.Sample(ts.s, texCoordMip.xy, texCoordMip.w);
 }
 
-half4 tex2DHbias(Texture2DHalfSampler ts, float4 texCoordBias) {
+half4 tex2Dbias(Texture2DHalfSampler ts, float4 texCoordBias) {
     return (half4)ts.t.SampleBias(ts.s, texCoordBias.xy, texCoordBias.w);
 }
 
diff --git a/hlslparser/shaders/ShaderMSL.h b/hlslparser/shaders/ShaderMSL.h
index d599e794..ad693908 100644
--- a/hlslparser/shaders/ShaderMSL.h
+++ b/hlslparser/shaders/ShaderMSL.h
@@ -215,15 +215,15 @@ float4 tex2Dfetch(Texture2DSampler ts, int2 texCoord) {
 
 // use samper2D<half> to specify these
 
-half4 tex2DH(Texture2DHalfSampler ts, float2 texCoord) {
+half4 tex2D(Texture2DHalfSampler ts, float2 texCoord) {
     return ts.t.sample(ts.s, texCoord);
 }
 
-half4 tex2DHlod(Texture2DHalfSampler ts, float4 texCoordMip) {
+half4 tex2Dlod(Texture2DHalfSampler ts, float4 texCoordMip) {
     return ts.t.sample(ts.s, texCoordMip.xy, level(texCoordMip.w));
 }
 
-half4 tex2DHbias(Texture2DHalfSampler ts, float4 texCoordBias) {
+half4 tex2Dbias(Texture2DHalfSampler ts, float4 texCoordBias) {
     return ts.t.sample(ts.s, texCoordBias.xy, bias(texCoordBias.w));
 }
 
diff --git a/hlslparser/shaders/Skinning.hlsl b/hlslparser/shaders/Skinning.hlsl
index befb5dc2..d2b106c1 100644
--- a/hlslparser/shaders/Skinning.hlsl
+++ b/hlslparser/shaders/Skinning.hlsl
@@ -1,3 +1,8 @@
+
+// TODO: syntax highlighting as Metal doesn't work
+// This isn't including header, but that doesn't seem to fix either.
+// Need HLSL plugin for Xcode
+
 struct InputVS
 {
     float4 position : POSITION;
@@ -88,7 +93,7 @@ OutputPS SkinningPS(InputPS input)
     // This is hard to reflect with combined tex/sampler
     // have way more textures than samplers on mobile.
     //float4 color = tex2D(tex, input.uv);
-    half4 color = tex2DH(tex, input.uv);
+    half4 color = tex2D(tex, input.uv);
     
     // TODO: move to DX10 style, but MSL codegen is trickier then
     // since it wraps the vars
diff --git a/hlslparser/src/HLSLParser.cpp b/hlslparser/src/HLSLParser.cpp
index 4cd311fa..4ac4c94a 100644
--- a/hlslparser/src/HLSLParser.cpp
+++ b/hlslparser/src/HLSLParser.cpp
@@ -750,9 +750,9 @@ const Intrinsic _intrinsic[] =
 		Intrinsic("asuint", HLSLBaseType_Uint, HLSLBaseType_Float),
 
         // This macro defines float/half versions
-        SAMPLER_INTRINSIC_FUNCTION("tex2D", "tex2DH", HLSLBaseType_Sampler2D, HLSLBaseType_Float2),
-        SAMPLER_INTRINSIC_FUNCTION("tex2Dlod", "tex2DHlod", HLSLBaseType_Sampler2D, HLSLBaseType_Float4),
-        SAMPLER_INTRINSIC_FUNCTION("tex2Dbias", "tex2DHbias", HLSLBaseType_Sampler2D, HLSLBaseType_Float4),
+        SAMPLER_INTRINSIC_FUNCTION("tex2D", "tex2D", HLSLBaseType_Sampler2D, HLSLBaseType_Float2),
+        SAMPLER_INTRINSIC_FUNCTION("tex2Dlod", "tex2Dlod", HLSLBaseType_Sampler2D, HLSLBaseType_Float4),
+        SAMPLER_INTRINSIC_FUNCTION("tex2Dbias", "tex2Dbias", HLSLBaseType_Sampler2D, HLSLBaseType_Float4),
         
     
         // Not sure this tex2Dproj is worth adding, have tex2DCmp

From 54a95f1da9ebed748293cb20dadb201eba06c532 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 8 Mar 2023 09:15:15 -0800
Subject: [PATCH 448/901] kram - hlslparser update

Switch to new HLSL keywords for attributes.

Add Vulkan style PointSize.  Unclear why BaseVertex/BaseInstance builtin aren't working or what the HLSL semantic name should be.  Also unclear why the builtin isn't the semantic.   None of the Vulkan docs ever show usage examples of the extension constructs that they add.

Also found out HLSL2021 still doesn't have u/byte support after all these years.  That will make reading/writing to ubo more of a challenge.  Have to use packing instructions in 6.6, or COLORTOUBYTE4N.
---
 hlslparser/README.md             |  12 +++
 hlslparser/buildShaders.sh       |  15 +++-
 hlslparser/shaders/ShaderMSL.h   |   6 +-
 hlslparser/shaders/Skinning.hlsl | 122 +++++++++++++++++++++++++++++--
 hlslparser/src/HLSLGenerator.cpp |  78 +++++++++++++++-----
 hlslparser/src/MSLGenerator.cpp  |  96 ++++++++++++++----------
 6 files changed, 259 insertions(+), 70 deletions(-)

diff --git a/hlslparser/README.md b/hlslparser/README.md
index 7bab7c60..92c2c653 100644
--- a/hlslparser/README.md
+++ b/hlslparser/README.md
@@ -114,6 +114,14 @@ Terms
 Mobile HW
 ---
 
+Android were running 2-3w, now up to 6w from better heat spreaders.
+
+Tesselation differs on mobile vs. desktop.  Mali does tesselation after VS (Android Extension Pack for ES).   Metal does it before VS.
+* https://developer.arm.com/documentation/100587/0100/tessellation/about-tessellation
+* https://android.googlesource.com/platform/external/deqp/+/refs/heads/master/external/vulkancts/modules/vulkan/tessellation/vktTessellationGeometryPointSizeTests.cpp
+* https://metalbyexample.com/tessellation/
+
+
 Mali
 * TBDR
 * Vulkan gpu - Midgard, Bifrost, Valhall, Immortalis
@@ -127,6 +135,7 @@ Mali
 * ETC2/ASTC, created ASTC format and encoders, no BC
 * https://en.wikipedia.org/wiki/Mali_(processor)
 * https://community.arm.com/support-forums/f/graphics-gaming-and-vr-forum/53672/vulkan-what-should-i-do-about-this-warning-bestpractices-vkcmddrawindexed-sparse-index-buffer
+* https://developer.samsung.com/galaxy-gamedev/gamedev-blog/infinitejet.html
 
 Adreno
 * TBDR
@@ -265,9 +274,12 @@ HLSL
 * Vulkan extensions for specialization constants and subpasses
 * SSBO - StructuredBuffers, ByteAddressBuffer
 * DX9 and DX10 style syntax differs
+* HLSL 2021 (latest) can distinguish structs with same types as unique
+* HLSL 2018 (default for DXC) can't tell structs apart if contain same types
 * HLSL added to clang
 * https://github.com/Microsoft/DirectXShaderCompiler/blob/main/docs/SPIR-V.rst#subpass-inputs
 * https://clang.llvm.org/docs/HLSL/HLSLSupport.html#:~:text=HLSL%20uses%20templates%20to%20define,case%20and%20issues%20a%20diagnostic.
+* https://devblogs.microsoft.com/directx/announcing-hlsl-2021/
 
 Shader Assembly Formats
 
diff --git a/hlslparser/buildShaders.sh b/hlslparser/buildShaders.sh
index ed64020c..48f69972 100755
--- a/hlslparser/buildShaders.sh
+++ b/hlslparser/buildShaders.sh
@@ -94,6 +94,8 @@ args+="-enable-16bit-types "
 # https://devblogs.microsoft.com/directx/announcing-hlsl-2021/
 args+="-HV 2021 "
  
+args+="-fspv-extension=SPV_KHR_shader_draw_parameters "
+
 # have to also compile to 6.2
 vsargs=${args}
 vsargs+="-T vs_6_2 "
@@ -113,9 +115,16 @@ psargs+="-T ps_6_2 "
 # dxc only loads DXIL.dll on Windows
 #  https://www.wihlidal.com/blog/pipeline/2018-09-16-dxil-signing-post-compile/
 # no idea what format the refl file from dxil is?
-echo gen DXIL with dxc
-${appDxc} ${vsargs} -E SkinningVS -Fo win/Skinning.vert.dxil -Fc win/Skinning.vert.dxil.txt -Fre win/Skinning.vert.refl Skinning.hlsl
-${appDxc} ${psargs} -E SkinningPS -Fo win/Skinning.frag.dxil -Fc win/Skinning.frag.dxil.txt -Fre win/Skinning.frag.refl Skinning.hlsl
+#echo gen DXIL with dxc
+#${appDxc} ${vsargs} -E SkinningVS -Fo win/Skinning.vert.dxil -Fc win/Skinning.vert.dxil.txt -Fre win/Skinning.vert.refl Skinning.hlsl
+#${appDxc} ${psargs} -E SkinningPS -Fo win/Skinning.frag.dxil -Fc win/Skinning.frag.dxil.txt -Fre win/Skinning.frag.refl Skinning.hlsl
+
+
+# Optimization is also delegated to SPIRV-Tools.
+# Right now there are no difference between optimization levels greater than zero;
+# they will all invoke the same optimization recipe. That is, the recipe behind spirv-opt -O.
+# -Os is a special set of options.  Can run custom spriv optimizations via
+# -Oconfig=--loop-unroll,--scalar-replacement=300,--eliminate-dead-code-aggressive
 
 # 1.0,1.1,1.2 default to spv1.1,1.3,1.5
 echo gen SPIRV 1.2 with dxc
diff --git a/hlslparser/shaders/ShaderMSL.h b/hlslparser/shaders/ShaderMSL.h
index ad693908..199b30b6 100644
--- a/hlslparser/shaders/ShaderMSL.h
+++ b/hlslparser/shaders/ShaderMSL.h
@@ -174,9 +174,9 @@ float4 tex2D(Texture2DSampler ts, float2 texCoord) {
 }
 
 // don't use for PCF
-float4 tex2Dproj(Texture2DSampler ts, float4 texCoord) {
-    return ts.t.sample(ts.s, texCoord.xy / texCoord.w);
-}
+//float4 tex2Dproj(Texture2DSampler ts, float4 texCoord) {
+//    return ts.t.sample(ts.s, texCoord.xy / texCoord.w);
+//}
 
 // gather only works on mip0
 float4 tex2DgatherRed(Texture2DSampler ts, float2 texCoord, int2 offset=0) {
diff --git a/hlslparser/shaders/Skinning.hlsl b/hlslparser/shaders/Skinning.hlsl
index d2b106c1..ffefb382 100644
--- a/hlslparser/shaders/Skinning.hlsl
+++ b/hlslparser/shaders/Skinning.hlsl
@@ -1,11 +1,81 @@
 
+
 // TODO: syntax highlighting as Metal doesn't work
 // This isn't including header, but that doesn't seem to fix either.
 // Need HLSL plugin for Xcode
 
+// https://github.com/microsoft/DirectXShaderCompiler/blob/main/docs/SPIR-V.rst
+
+// setup specialzation, convert these to function_constants
+// [[vk::constant_id(0)]] const bool  specConstInt  = 1;
+// [[vk::constant_id(1)]] const bool  specConstBool  = true;
+
+// subpass input, and SubpassLoad() calls
+// [[vk::input_attachment_index(i)]] SubpassInput input;
+// class SubpassInput<T> { T SubpassLoad(); };
+// class SubpassInputMS<T> { T SubpassLoad(int sampleIndex); };
+
+// push constants
+// [[vk::push_constant]]
+
+// descriptors and arg buffers
+// [[vk::binding(X[, Y])]] and [[vk::counter_binding(X)]]
+
+// [[vk::image_format("rgba8")]]
+// RWBuffer<float4> Buf;
+
+
+// [[vk::image_format("rg16f")]]
+// RWTexture2D<float2> Tex;
+
+// structure buffer only supports 2/4B access, ByteAddressBuffer only 4B increments
+// #ifdef __spirv__
+// [[vk::binding(X, Y), vk::counter_binding(Z)]]
+// #endif
+// StructuredBuffer<Struct> ssbo;
+
+// Are there no uint8 in HLSL?
+// D3DCOLORtoUBYTE4: Converts a floating-point, 4D vector set by a D3DCOLOR to a UBYTE4.
+// This is achieved by performing int4(input.zyxw * 255.002) using SPIR-V OpVectorShuffle, OpVectorTimesScalar, and OpConvertFToS, respectively.
+// https://microsoft.github.io/DirectX-Specs/d3d/HLSL_SM_6_6_Pack_Unpack_Intrinsics.html
+// int16_t4 unpack_s8s16(int8_t4_packed packedVal);        // Sign Extended
+// uint16_t4 unpack_u8u16(uint8_t4_packed packedVal);      // Non-Sign Extended
+// int32_t4 unpack_s8s32(int8_t4_packed packedVal);        // Sign Extended
+// uint32_t4 unpack_u8u32(uint8_t4_packed packedVal);      // Non-Sign Extended
+// uint8_t4_packed pack_u8(uint32_t4 unpackedVal);         // Pack lower 8 bits, drop unused bits
+// int8_t4_packed pack_s8(int32_t4  unpackedVal);          // Pack lower 8 bits, drop unused bits
+//
+// uint8_t4_packed pack_u8(uint16_t4 unpackedVal);         // Pack lower 8 bits, drop unused bits
+// int8_t4_packed pack_s8(int16_t4  unpackedVal);          // Pack lower 8 bits, drop unused bits
+//
+// uint8_t4_packed pack_clamp_u8(int32_t4  unpackedVal);   // Pack and Clamp [0, 255]
+// int8_t4_packed pack_clamp_s8(int32_t4  unpackedVal);    // Pack and Clamp [-128, 127]
+//
+// uint8_t4_packed pack_clamp_u8(int16_t4  unpackedVal);   // Pack and Clamp [0, 255]
+// int8_t4_packed pack_clamp_s8(int16_t4  unpackedVal);    // Pack and Clamp [-128, 127]
+//
+// have uint16_t/int16_t support in 6.2.
+//
+// cbuffer are std140, and ssbo are std430 arrangment.  Affects arrays.
+// or -fvk-use-dx-layout vs. -fvk-use-gl-layout vs. -fvk-use-scalar-layout.
+// Scalar layout rules introduced via VK_EXT_scalar_block_layout, which basically aligns
+// all aggregrate types according to their elements' natural alignment.
+// They can be enabled by -fvk-use-scalar-layout.
+// see table.  Vulkan can't use DX layout yet.
+//
+// cbuffer vs. ConstantBuffer<T> myCBuffer;
+
+// struct VSInput {
+//   [[vk::location(0)]] float4 pos  : POSITION;
+//   [[vk::location(1)]] float3 norm : NORMAL;
+// };
+
+
+
+
 struct InputVS
 {
-    float4 position : POSITION;
+    float4 position : SV_Position;
     float3 normal : NORMAL;
     float2 uv : TEXCOORD0;
     float4 blendWeights : BLENDWEIGHT;
@@ -14,11 +84,16 @@ struct InputVS
 
 struct OutputVS
 {
-    float4  position : POSITION;
+    float4  position : SV_Position;
     half    diffuse : COLOR;
     float2  uv : TEXCOORD0;
+    float   pointSize : PSIZE;
 };
 
+// This isn't working in HLSL
+
+    
+    
 cbuffer Uniforms
 {
     // should these be float3x4?
@@ -42,16 +117,38 @@ float4x4 DoSkinTfm(float4x4 skinTfms[256], float4 blendWeights, uint4 blendIndic
     return skinTfm;
 }
 
-OutputVS SkinningVS(InputVS input)
+// These don't compile for spv despite setting extension
+   
+// TODO: fix ability to comment these into SkinningVS inputs
+// uint vertexBase : BASE_VERTEX,
+// uint instanceBase : BASE_INSTANCE,
+    
+// TODO: can't yet have commented out inputs or tokenizer fails
+OutputVS SkinningVS(InputVS input,
+    uint instanceID : SV_InstanceID,
+    uint vertexID : SV_VertexID
+)
 {
     OutputVS output;
 
     // TODO: this needs to declare array param as constant for
     // MSL function call.  Pointers can't be missing working space.
     
-    float4x4 skinTfm = 0;
+    //float4x4 skinTfm = 0;
        // DoSkinTfm(skinTfms, input.blendWeights, input.blendIndices);
 
+    // this is just to use these
+    uint vertexNum = vertexID;
+    uint instanceNum = instanceID;
+
+    // uint vertexNum = vertexBase + vertexID;
+    // uint instanceNum = instanceBase + instanceID;
+
+    instanceNum += vertexNum;
+    
+    float4x4 skinTfm = skinTfms[ instanceNum ];
+    
+
     // Skin to world space
     float3 position = mul(input.position, skinTfm).xyz;
     half3 normal = half3(mul(float4(input.normal,0.0), skinTfm).xyz);
@@ -65,7 +162,10 @@ OutputVS SkinningVS(InputVS input)
     output.diffuse = dot(lightDir, normal);
 
     output.uv = input.uv;
-
+    
+    // only for Vulkan/MSL, DX12 can't control per vertex shader
+    output.pointSize = 1;
+    
     return output;
 }
 
@@ -80,16 +180,22 @@ struct InputPS
 
 struct OutputPS
 {
-    half4 color : COLOR0;
+    half4 color : SV_Target0;
 };
 
 // Note: don't write as void SkinningPS(VS_OUTPUT input, out PS_OUTPUT output)
 // this is worse MSL codegen.
 
-OutputPS SkinningPS(InputPS input)
+// TODO: SV_Position differs from Vulkan/MSL in that pos.w = w and not 1/w like gl_FragCoord.
+// DXC has a setting to invert w.
+
+OutputPS SkinningPS(InputPS input,
+     float4  position : SV_Position,
+     bool isFrontFace: SV_IsFrontFace
+    )
 {
     OutputPS output;
-   
+
     // This is hard to reflect with combined tex/sampler
     // have way more textures than samplers on mobile.
     //float4 color = tex2D(tex, input.uv);
diff --git a/hlslparser/src/HLSLGenerator.cpp b/hlslparser/src/HLSLGenerator.cpp
index 6fcbb9c7..dde96663 100644
--- a/hlslparser/src/HLSLGenerator.cpp
+++ b/hlslparser/src/HLSLGenerator.cpp
@@ -102,28 +102,66 @@ static const char * TranslateSemantic(const char* semantic, bool output, HLSLGen
     {
         if (output) 
         {
-            if (String_Equal("POSITION", semantic))     return "SV_Position";
+            if (String_Equal("SV_Position", semantic))
+                return "SV_Position";
+            
+            // This is the syntax, I guess DX12 still honors PSIZE?
+            // "[[vk::builtin(\"PointSize\")]] float name : PSIZE;
+            
+            // Vulkan/MSL only
+            if (String_Equal("PSIZE", semantic))
+                return "PSIZE";
         }
         else {
-            if (String_Equal("INSTANCE_ID", semantic))  return "SV_InstanceID";
+            if (String_Equal("SV_InstanceID", semantic))
+                return "SV_InstanceID";
+            if (String_Equal("SV_VertexID", semantic))
+                return "SV_VertexID";
+            
+            // see here for sample of builtin notation
+            // https://github.com/microsoft/DirectXShaderCompiler/commit/b6fe9886ad
+            
+            // Vulkan/MSL only, requires ext DrawParameters
+            // [[vk::builtin(\"BaseVertex\")]] uint baseVertex :
+            // [[vk::builtin(\"BaseInstance\")]] uint instance : SV_BaseInstance
+            
+            if (String_Equal(semantic, "BASE_VERTEX"))
+                return "BaseVertex";  // vulkan only
+            if (String_Equal(semantic, "BASE_INSTANCE"))
+                return "BaseInstance";  // vulkan only
         }
     }
     else if (target == HLSLGenerator::Target_PixelShader)
     {
         if (output)
         {
-            if (String_Equal("DEPTH", semantic))      return "SV_Depth";
-            if (String_Equal("COLOR", semantic))      return "SV_Target";
-            if (String_Equal("COLOR0", semantic))     return "SV_Target0";
-            if (String_Equal("COLOR0_1", semantic))   return "SV_Target1";
-            if (String_Equal("COLOR1", semantic))     return "SV_Target1";
-            if (String_Equal("COLOR2", semantic))     return "SV_Target2";
-            if (String_Equal("COLOR3", semantic))     return "SV_Target3";
+            if (String_Equal("SV_Depth", semantic))
+                return "SV_Depth";
+            if (String_Equal("SV_DepthGreaterEqual", semantic))
+                return "SV_DepthGreaterEqual";
+            if (String_Equal("SV_DepthLessEqual", semantic))
+                return "SV_DepthLessEqual";
+            
+            
+            if (String_Equal("SV_Target", semantic))
+                return "SV_Target";
+            if (String_Equal("SV_Target0", semantic))
+                return "SV_Target0";
+            // dual source blending ?
+            //if (String_Equal("COLOR0_1", semantic))       return "SV_Target1";
+            if (String_Equal("SV_Target1", semantic))
+                return "SV_Target1";
+            if (String_Equal("SV_Target2", semantic))
+                return "SV_Target2";
+            if (String_Equal("SV_Target3", semantic))
+                return "SV_Target3";
         }
         else
         {
-            if (String_Equal("VPOS", semantic))       return "SV_Position";
-            if (String_Equal("VFACE", semantic))      return "SV_IsFrontFace";    // bool   @@ Should we do type replacement too?
+            if (String_Equal("SV_Position", semantic))
+                return "SV_Position";
+            if (String_Equal("SV_IsFrontFace", semantic))
+                return "SV_IsFrontFace";    // bool   @@ Should we do type replacement too?
         }
     }
     return NULL;
@@ -249,13 +287,6 @@ bool HLSLGenerator::Generate(HLSLTree* tree, Target target, const char* entryNam
                 while (field) {
                     if (field->semantic) {
 						field->hidden = false;
-
-						if (target == Target_PixelShader && !output && String_EqualNoCase(field->semantic, "POSITION")) {
-                            // this is firing on NULL
-							//ASSERT(String_EqualNoCase(field->sv_semantic, "SV_Position"));
-							field->hidden = true;
-						}
-
                         field->sv_semantic = TranslateSemantic(field->semantic, output, target);
                     }
 
@@ -983,6 +1014,17 @@ void HLSLGenerator::OutputDeclarationBody(const HLSLType& type, const char* name
 
 void HLSLGenerator::OutputDeclaration(const HLSLType& type, const char* name, const char* semantic/*=NULL*/, const char* registerName/*=NULL*/, HLSLExpression * assignment/*=NULL*/)
 {
+    // Have to inject vulkan
+    if (semantic)
+    {
+        if (strcmp(semantic, "PSIZE") == 0)
+            m_writer.Write("%s ", "[[vk::builtin(\"PointSize\")]]");
+        else if (strcmp(semantic, "BaseVertex") == 0)
+            m_writer.Write("%s ", "[[vk::builtin(\"BaseVertex\")]]");
+        else if (strcmp(semantic, "BaseInstance") == 0)
+            m_writer.Write("%s ", "[[vk::builtin(\"BaseInstance\")]]");
+    }
+    
     OutputDeclarationType(type);
     OutputDeclarationBody(type, name, semantic, registerName, assignment);
 }
diff --git a/hlslparser/src/MSLGenerator.cpp b/hlslparser/src/MSLGenerator.cpp
index e9e1e0a5..a25ab9a0 100644
--- a/hlslparser/src/MSLGenerator.cpp
+++ b/hlslparser/src/MSLGenerator.cpp
@@ -242,12 +242,6 @@ namespace M4
                     HLSLStructField* field = structure->field;
                     while (field != NULL)
                     {
-                        // Hide vertex shader output position from fragment shader. @@ This is messing up the runtime compiler...
-                        /*if (target == Target_FragmentShader && is_semantic(field->semantic, "POSITION"))
-                        {
-                        field->hidden = true;
-                        }*/
-
                         if (!field->hidden)
                         {
                             field->sv_semantic = TranslateInputSemantic(field->semantic);
@@ -1554,7 +1548,7 @@ namespace M4
         }
         else if (alignment != 0 && !isTypeCast)
         {
-            // TODO: are these alignas needed?
+            // caller can request alignment, but default is 0
             m_writer.Write("alignas(%d) ", alignment);
         }
 
@@ -1913,14 +1907,6 @@ namespace M4
 
         if (m_target == MSLGenerator::Target_VertexShader)
         {
-            // TODO: eliminate old DX9 legacy constructs like this
-            if (String_Equal(semantic, "INSTANCE_ID"))
-                return "instance_id";
-            if (String_Equal(semantic, "VERTEX_ID"))
-                return "vertex_id";
-            if (String_Equal(semantic, "POSITION"))
-                return "attribute(VAPosition)";
-            
             // These are DX10 convention
             if (String_Equal(semantic, "SV_InstanceID"))
                 return "instance_id";
@@ -1930,11 +1916,28 @@ namespace M4
             if (String_Equal(semantic, "SV_Position"))
                 return "attribute(VAPosition)";
             
-            // TODO: offer numbers on all of these
+            if (String_Equal(semantic, "BASE_VERTEX"))
+                return "base_vertex";
+            if (String_Equal(semantic, "BASE_INSTANCE"))
+                return "base_instance";
+            
+            /* TODO: add to HLSL too
+            // TODO: baseVertex, baseInstance (Vulkan and Metal suport)
+            // SPV_KHR_shader_draw_parameters is Vulkan ext. DX12 no support.
+            //
+            
+             
+            // no equivlement drawIndex in MSL
+            //if (String_Equal(semantic, "SV_DrawIndex"))
+            //    return "draw_index";
+            */
+            
+            // TODO: offer index support on all of these
             if (String_Equal(semantic, "NORMAL"))
                 return "attribute(VANormal)";
             if (String_Equal(semantic, "TANGENT"))
                 return "attribute(VATangent)";
+            
             if (String_Equal(semantic, "BLENDINDICES"))
                 return "attribute(VABlendIndices)";
             if (String_Equal(semantic, "BLENDWEIGHT"))
@@ -1952,6 +1955,7 @@ namespace M4
             if (String_Equal(semantic, "TEXCOORD3"))
                 return "attribute(VATexcoord3)";
             
+            // Can set custom attributes via a callback
             if (m_options.attributeCallback)
             {
                 char name[64];
@@ -1975,23 +1979,24 @@ namespace M4
             if (String_Equal(semantic, "SV_Position"))
                 return "position";
             
-            if (String_Equal(semantic, "POSITION"))
-                return "position";
-            if (String_Equal(semantic, "VFACE"))
+            //  if (String_Equal(semantic, "POSITION"))
+            //    return "position";
+            if (String_Equal(semantic, "SV_IsFrontFace"))
                 return "front_facing";
             
             // VS sets what layer to render into, ps can look at it.
             // Gpu Family 5.
-            if (String_Equal(semantic, "TARGET_INDEX"))
+            if (String_Equal(semantic, "SV_RenderTargetArrayIndex"))
                 return "render_target_array_index";
             
+            // dual source? passes in underlying color
             if (String_Equal(semantic, "DST_COLOR"))
                 return "color(0)";
             
-            if (String_Equal(semantic, "SAMPLE_ID"))
+            if (String_Equal(semantic, "SV_SampleIndex"))
                 return "sample_id";
-            //if (String_Equal(semantic, "SAMPLE_MASK")) return "sample_mask";
-            //if (String_Equal(semantic, "SAMPLE_MASK")) return "sample_mask,post_depth_coverage";
+            //if (String_Equal(semantic, "SV_Coverage")) return "sample_mask";
+            //if (String_Equal(semantic, "SV_Coverage")) return "sample_mask,post_depth_coverage";
         }
 
         return NULL;
@@ -2007,15 +2012,21 @@ namespace M4
 
         if (m_target == MSLGenerator::Target_VertexShader)
         {
-            if (String_Equal(semantic, "POSITION")) return "position";
-            if (String_Equal(semantic, "SV_Position")) return "position";
+            if (String_Equal(semantic, "SV_Position"))
+                return "position";
             
             // PSIZE is non-square in DX9, and square in DX10 (and MSL)
-            if (String_Equal(semantic, "PSIZE")) return "point_size";
-            if (String_Equal(semantic, "POINT_SIZE")) return "point_size";
-            
+            // https://github.com/KhronosGroup/glslang/issues/1154
+            if (String_Equal(semantic, "PSIZE"))
+                return "point_size";
+
             // control layer in Gpu Family 5
-            if (String_Equal(semantic, "TARGET_INDEX")) return "render_target_array_index";
+            if (String_Equal(semantic, "SV_RenderTargetArrayIndex"))
+                return "render_target_array_index";
+            
+            // TODO: add
+            // SV_ViewportArrayIndex
+            // SV_ClipDistance0..n, SV_CullDistance0..n
         }
         else if (m_target == MSLGenerator::Target_FragmentShader)
         {
@@ -2030,24 +2041,33 @@ namespace M4
                 // MTLBlendFactorSource1Color, OneMinusSource1Color, Source1Alpha, OneMinuSource1Alpha.
                 
                 // @@ IC: Hardcoded for this specific case, extend ParseSemantic?
-                if (String_Equal(semantic, "COLOR0_1")) return "color(0), index(1)";
+                if (String_Equal(semantic, "COLOR0_1"))
+                    return "color(0), index(1)";
             }
 
             if (strncmp(semantic, "SV_Target", length) == 0)
             {
                 return m_tree->AddStringFormat("color(%d)", index);
             }
-            if (strncmp(semantic, "COLOR", length) == 0)
-            {
-                return m_tree->AddStringFormat("color(%d)", index);
-            }
+//            if (strncmp(semantic, "COLOR", length) == 0)
+//            {
+//                return m_tree->AddStringFormat("color(%d)", index);
+//            }
 
             // depth variants to preserve earlyz, use greater on reverseZ
-            if (String_Equal(semantic, "DEPTH")) return "depth(any)";
-            if (String_Equal(semantic, "DEPTH_GT")) return "depth(greater)";
-            if (String_Equal(semantic, "DEPTH_LT")) return "depth(less)";
+            if (String_Equal(semantic, "SV_Depth"))
+                return "depth(any)";
+            
+            // These don't quite line up, since comparison is not ==
+            // Metal can only use any/less/greater.  Preserve early z when outputting depth.
+            // reverseZ would use greater.
+            if (String_Equal(semantic, "SV_DepthGreaterEqual"))
+                return "depth(greater)";
+            if (String_Equal(semantic, "SV_DepthLessEqual"))
+                return "depth(less)";
             
-            if (String_Equal(semantic, "SAMPLE_MASK")) return "sample_mask";
+            if (String_Equal(semantic, "SV_Coverage"))
+                return "sample_mask";
         }
 
         return NULL;

From f3a13a3921f43481b4fc00124db58605c5eee3a5 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 8 Mar 2023 21:53:40 -0800
Subject: [PATCH 449/901] kram - hlslparser add u/short, replace table

_binaryOpTypeLookup table was way to complicated and made it impossible to add/remove types
remove float4x3, 4x2, and half version,
add ushort/short
indicate where u/char and double would go, but MSL doesn't support double and most gpu's run double super slow
More test shader code
Added hlslparser.xcworkspace and testshaders.xcodeproj files with built hlsl/metal files.  This provides limited syntax highlighting, but still Xcode stinks.  The files have to be parsable by Xcode or it won't highlight them.  Will move to vscode which handles metal/hlsl syntax highlighting really well and has a documented extension api.
---
 hlslparser/buildShaders.sh                    |  17 +-
 .../hlslparser.xcodeproj/project.pbxproj      |  22 +-
 .../contents.xcworkspacedata                  |  10 +
 hlslparser/shaders/ShaderHLSL.h               |  28 +
 hlslparser/shaders/ShaderMSL.h                |   7 +-
 hlslparser/shaders/Skinning.hlsl              |  59 +-
 hlslparser/src/HLSLParser.cpp                 | 546 ++++++++----------
 hlslparser/src/HLSLParser.h                   |  12 +-
 hlslparser/src/HLSLTokenizer.cpp              |  37 +-
 hlslparser/src/HLSLTokenizer.h                |  29 +-
 hlslparser/src/HLSLTree.h                     |  22 +-
 hlslparser/src/MSLGenerator.cpp               |   2 +
 .../testshaders.xcodeproj/project.pbxproj     | 350 +++++++++++
 13 files changed, 773 insertions(+), 368 deletions(-)
 create mode 100644 hlslparser/hlslparser.xcworkspace/contents.xcworkspacedata
 create mode 100644 hlslparser/testshaders.xcodeproj/project.pbxproj

diff --git a/hlslparser/buildShaders.sh b/hlslparser/buildShaders.sh
index 48f69972..c8361a38 100755
--- a/hlslparser/buildShaders.sh
+++ b/hlslparser/buildShaders.sh
@@ -11,7 +11,7 @@ mkdir -p out/android
 # for glslc testing
 #mkdir -p out/android2
 
-pushd out
+pushd outshaders
 
 # display commands
 # set -x
@@ -49,6 +49,12 @@ ${appHlslparser} ${parserOptions} -i ../shaders/Skinning.hlsl -o Skinning.hlsl
 
 #-------------------------------
 
+popd > /dev/null
+
+pushd out
+
+#-------------------------------
+
 # Metal is C++14
 
 # see if HLSL compiles to MSL (requires macOS Vulkan install)
@@ -64,16 +70,17 @@ metalMacOptions+="-std=macos-metal2.3 "
 
 # see if MSL compile
 echo compile MSL for macOS
-xcrun -sdk macosx metal Skinning.metal ${metalMacOptions} -o mac/Skinning.metallib
+xcrun -sdk macosx metal ../outshaders/Skinning.metal ${metalMacOptions} -o mac/Skinning.metallib
 
 # metaliosOptions="-frecord-sources -g "
 # metaliosOptions+="-std=ios-metal2.3 "
 
 #echo compile MSL for iOS
-#xcrun -sdk macosx metal Skinning.metal ${metaliosOptions} -o ios/Skinning.metallib
+#xcrun -sdk macosx metal ../outshaders/Skinning.metal ${metaliosOptions} -o ios/Skinning.metallib
 
 #-------------------------------
 
+
 # looks like DXC wants a ps/vs/cs profile, so is expecting one shader per output
 
 args="-nologo "
@@ -128,8 +135,8 @@ psargs+="-T ps_6_2 "
 
 # 1.0,1.1,1.2 default to spv1.1,1.3,1.5
 echo gen SPIRV 1.2 with dxc
-${appDxc} ${vsargs} -spirv -fspv-target-env=vulkan1.2 -E SkinningVS -Fo android/Skinning.vert.spv -Fc android/Skinning.vert.spv.txt Skinning.hlsl
-${appDxc} ${psargs} -spirv -fspv-target-env=vulkan1.2 -E SkinningPS -Fo android/Skinning.frag.spv -Fc android/Skinning.frag.spv.txt Skinning.hlsl
+${appDxc} ${vsargs} -spirv -fspv-target-env=vulkan1.2 -E SkinningVS -Fo android/Skinning.vert.spv -Fc android/Skinning.vert.spv.txt ../outshaders/Skinning.hlsl
+${appDxc} ${psargs} -spirv -fspv-target-env=vulkan1.2 -E SkinningPS -Fo android/Skinning.frag.spv -Fc android/Skinning.frag.spv.txt ../outshaders/Skinning.hlsl
 
 # -Fre not supported with spriv, so just use spirv-reflect
 # either yaml or random format, why can't this just output json?
diff --git a/hlslparser/hlslparser.xcodeproj/project.pbxproj b/hlslparser/hlslparser.xcodeproj/project.pbxproj
index 35147817..f22e9e4a 100644
--- a/hlslparser/hlslparser.xcodeproj/project.pbxproj
+++ b/hlslparser/hlslparser.xcodeproj/project.pbxproj
@@ -30,8 +30,6 @@
 /* End PBXCopyFilesBuildPhase section */
 
 /* Begin PBXFileReference section */
-		70235C3729B303AA00909C95 /* buildShaders.sh */ = {isa = PBXFileReference; lastKnownFileType = text.script.sh; path = buildShaders.sh; sourceTree = "<group>"; };
-		70235C3829B3041E00909C95 /* out */ = {isa = PBXFileReference; lastKnownFileType = folder; path = out; sourceTree = "<group>"; };
 		70235C3A29B3145200909C95 /* Engine.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Engine.h; sourceTree = "<group>"; };
 		70235C3B29B3145200909C95 /* HLSLParser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = HLSLParser.cpp; sourceTree = "<group>"; };
 		70235C3C29B3145200909C95 /* CodeWriter.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = CodeWriter.cpp; sourceTree = "<group>"; };
@@ -47,11 +45,8 @@
 		70235C4829B3145200909C95 /* HLSLTokenizer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = HLSLTokenizer.cpp; sourceTree = "<group>"; };
 		70235C4929B3145200909C95 /* HLSLGenerator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = HLSLGenerator.h; sourceTree = "<group>"; };
 		70235C4A29B3145200909C95 /* HLSLTree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = HLSLTree.h; sourceTree = "<group>"; };
-		70235C5429B3260100909C95 /* README.md */ = {isa = PBXFileReference; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = "<group>"; };
 		702A2B5929A49DC8007D9A99 /* hlslparser */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = hlslparser; sourceTree = BUILT_PRODUCTS_DIR; };
-		702A2B7E29AA942D007D9A99 /* Skinning.hlsl */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; path = Skinning.hlsl; sourceTree = "<group>"; };
-		702A2B8329AC25BA007D9A99 /* ShaderMSL.h */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; path = ShaderMSL.h; sourceTree = "<group>"; };
-		702A2B8429AC4B23007D9A99 /* ShaderHLSL.h */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; path = ShaderHLSL.h; sourceTree = "<group>"; };
+		707D37DC29B97A3000B08D22 /* README.md */ = {isa = PBXFileReference; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = "<group>"; };
 /* End PBXFileReference section */
 
 /* Begin PBXFrameworksBuildPhase section */
@@ -90,10 +85,7 @@
 		702A2B5029A49DC8007D9A99 = {
 			isa = PBXGroup;
 			children = (
-				70235C3729B303AA00909C95 /* buildShaders.sh */,
-				70235C5429B3260100909C95 /* README.md */,
-				702A2B7B29AA942D007D9A99 /* shaders */,
-				70235C3829B3041E00909C95 /* out */,
+				707D37DC29B97A3000B08D22 /* README.md */,
 				70235C3929B3145200909C95 /* src */,
 				702A2B5A29A49DC8007D9A99 /* Products */,
 			);
@@ -107,16 +99,6 @@
 			name = Products;
 			sourceTree = "<group>";
 		};
-		702A2B7B29AA942D007D9A99 /* shaders */ = {
-			isa = PBXGroup;
-			children = (
-				702A2B8329AC25BA007D9A99 /* ShaderMSL.h */,
-				702A2B8429AC4B23007D9A99 /* ShaderHLSL.h */,
-				702A2B7E29AA942D007D9A99 /* Skinning.hlsl */,
-			);
-			path = shaders;
-			sourceTree = "<group>";
-		};
 /* End PBXGroup section */
 
 /* Begin PBXNativeTarget section */
diff --git a/hlslparser/hlslparser.xcworkspace/contents.xcworkspacedata b/hlslparser/hlslparser.xcworkspace/contents.xcworkspacedata
new file mode 100644
index 00000000..3bb45bbf
--- /dev/null
+++ b/hlslparser/hlslparser.xcworkspace/contents.xcworkspacedata
@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Workspace
+   version = "1.0">
+   <FileRef
+      location = "group:testshaders.xcodeproj">
+   </FileRef>
+   <FileRef
+      location = "container:hlslparser.xcodeproj">
+   </FileRef>
+</Workspace>
diff --git a/hlslparser/shaders/ShaderHLSL.h b/hlslparser/shaders/ShaderHLSL.h
index 30f9c311..d94a3234 100644
--- a/hlslparser/shaders/ShaderHLSL.h
+++ b/hlslparser/shaders/ShaderHLSL.h
@@ -7,6 +7,34 @@
 #pragma once
 #endif
 
+// For float16 operations, denormal numbers must be preserved.
+// No atomic operations for float16 are supported.
+
+// no using, so do typedef
+// this is ugly syntax
+typedef int16_t2 short2;
+typedef int16_t3 short3;
+typedef int16_t4 short4;
+
+typedef uint16_t2 ushort2;
+typedef uint16_t3 ushort3;
+typedef uint16_t4 ushort4;
+
+// TODO: double, u/char
+// TODO: add double, but won't work on mobile.
+//  also Intel removed fp64 GPU support.  Often runs 1/64th speed.
+//  But may be needed for ray-tracing large worlds.  Metal doesn't have double.
+//
+//typedef int64_t2 long2;
+//typedef int64_t3 long3;
+//typedef int64_t4 long4;
+//typedef uint64_t2 ulong2;
+//typedef uint64_t3 ulong3;
+//typedef uint64_t4 ulong4;
+//typedef float64_t2 double2;
+//typedef float64_t3 double3;
+//typedef float64_t4 double4;
+
 #define USE_HALF 1
 
 // TODO: this only supports half on Texture2D
diff --git a/hlslparser/shaders/ShaderMSL.h b/hlslparser/shaders/ShaderMSL.h
index 199b30b6..393ee905 100644
--- a/hlslparser/shaders/ShaderMSL.h
+++ b/hlslparser/shaders/ShaderMSL.h
@@ -32,11 +32,12 @@ typedef NS_ENUM(int32_t, VA) {
     
     VANormal = 2,
     VATangent = 3,
+    VABitangent = 4,
     
-    VABlendIndices = 4,
-    VABlendWeight = 5,
+    VABlendIndices = 5,
+    VABlendWeight = 6,
     
-    VAColor0 = 6,
+    VAColor0 = 7,
     
     VATexcoord0 = 8,
     VATexcoord1 = 9,
diff --git a/hlslparser/shaders/Skinning.hlsl b/hlslparser/shaders/Skinning.hlsl
index ffefb382..b14cb0e9 100644
--- a/hlslparser/shaders/Skinning.hlsl
+++ b/hlslparser/shaders/Skinning.hlsl
@@ -2,13 +2,19 @@
 
 // TODO: syntax highlighting as Metal doesn't work
 // This isn't including header, but that doesn't seem to fix either.
-// Need HLSL plugin for Xcode
-
+// Need HLSL plugin for Xcode.  files have to be a part of project
+// to get syntax highlight but even that doesn't work.  And ref folders
+// also don't work since the files aren't part of the project.
+//
 // https://github.com/microsoft/DirectXShaderCompiler/blob/main/docs/SPIR-V.rst
 
-// setup specialzation, convert these to function_constants
+// setup specialization
+// HLSL:
 // [[vk::constant_id(0)]] const bool  specConstInt  = 1;
 // [[vk::constant_id(1)]] const bool  specConstBool  = true;
+// MSL:
+// constant bool a [[function_constant(0)]];
+// constant int a [[function_constant(1)]]; // 0.. 64K
 
 // subpass input, and SubpassLoad() calls
 // [[vk::input_attachment_index(i)]] SubpassInput input;
@@ -34,10 +40,15 @@
 // #endif
 // StructuredBuffer<Struct> ssbo;
 
-// Are there no uint8 in HLSL?
-// D3DCOLORtoUBYTE4: Converts a floating-point, 4D vector set by a D3DCOLOR to a UBYTE4.
+// No u/int8_t or u/char in HLSL.
+// D3DCOLORtoUBYTE4: Decodes a D3DCOLOR packed DWORD to a float4.
+// Note the swizzle, and I don't want an int4.  I need to encode.
 // This is achieved by performing int4(input.zyxw * 255.002) using SPIR-V OpVectorShuffle, OpVectorTimesScalar, and OpConvertFToS, respectively.
 // https://microsoft.github.io/DirectX-Specs/d3d/HLSL_SM_6_6_Pack_Unpack_Intrinsics.html
+
+// These all pack to too large of data structores.
+// also they don't handle gamma.
+//
 // int16_t4 unpack_s8s16(int8_t4_packed packedVal);        // Sign Extended
 // uint16_t4 unpack_u8u16(uint8_t4_packed packedVal);      // Non-Sign Extended
 // int32_t4 unpack_s8s32(int8_t4_packed packedVal);        // Sign Extended
@@ -54,7 +65,7 @@
 // uint8_t4_packed pack_clamp_u8(int16_t4  unpackedVal);   // Pack and Clamp [0, 255]
 // int8_t4_packed pack_clamp_s8(int16_t4  unpackedVal);    // Pack and Clamp [-128, 127]
 //
-// have uint16_t/int16_t support in 6.2.
+// have uint16_t/int16_t support in 6.2.  Need to add as type into parser.
 //
 // cbuffer are std140, and ssbo are std430 arrangment.  Affects arrays.
 // or -fvk-use-dx-layout vs. -fvk-use-gl-layout vs. -fvk-use-scalar-layout.
@@ -70,8 +81,15 @@
 //   [[vk::location(1)]] float3 norm : NORMAL;
 // };
 
+// 6.2 adds templated load, so can
+//ByteAddressBuffer buffer;
+//
+//float f1 = buffer.Load<float>(idx);
+//half2 h2 = buffer.Load<half2>(idx);
+//uint16_t4 i4 = buffer.Load<uint16_t4>(idx);
 
-
+// MSL rule;
+// If a vertex function writes to one or more buffers or textures, its return type must be void.
 
 struct InputVS
 {
@@ -80,6 +98,9 @@ struct InputVS
     float2 uv : TEXCOORD0;
     float4 blendWeights : BLENDWEIGHT;
     uint4  blendIndices : BLENDINDICES;
+    
+    short4 testShort : TANGENT;
+    ushort4 testUShort : BITANGENT;
 };
 
 struct OutputVS
@@ -90,10 +111,6 @@ struct OutputVS
     float   pointSize : PSIZE;
 };
 
-// This isn't working in HLSL
-
-    
-    
 cbuffer Uniforms
 {
     // should these be float3x4?
@@ -102,6 +119,7 @@ cbuffer Uniforms
     float4x4 worldToClipTfm;
 };
 
+// TODO: split up tex/sampler, update texture calls to DX10
 // defines combined tex_texture/tex_sampler
 sampler2D<half> tex;
 
@@ -117,11 +135,12 @@ float4x4 DoSkinTfm(float4x4 skinTfms[256], float4 blendWeights, uint4 blendIndic
     return skinTfm;
 }
 
-// These don't compile for spv despite setting extension
-   
-// TODO: fix ability to comment these into SkinningVS inputs
+// TODO: These don't compile for spv despite setting extension
+//  don't know what semantic to set?
 // uint vertexBase : BASE_VERTEX,
 // uint instanceBase : BASE_INSTANCE,
+
+// TODO: fix ability to comment these out inside SkinningVS inputs
     
 // TODO: can't yet have commented out inputs or tokenizer fails
 OutputVS SkinningVS(InputVS input,
@@ -161,6 +180,12 @@ OutputVS SkinningVS(InputVS input,
     // DXC
     output.diffuse = dot(lightDir, normal);
 
+    // test the operators
+    output.diffuse *= output.diffuse;
+    output.diffuse += output.diffuse;
+    output.diffuse -= output.diffuse;
+    output.diffuse /= output.diffuse;
+    
     output.uv = input.uv;
     
     // only for Vulkan/MSL, DX12 can't control per vertex shader
@@ -171,7 +196,9 @@ OutputVS SkinningVS(InputVS input,
 
 // Want to pass OutputVS as input, but DXC can't handle the redefinition
 // in the same file.  So have to keep OutputVS and InputPS in sync.
-// this can include position on MSL, but not on HLSL
+// this can include position on MSL, but not on HLSL.
+// Also for mobile the type should be higher precision to avoid banding.
+// So half from VS, but float in PS.
 struct InputPS
 {
     half    diffuse : COLOR;
@@ -186,7 +213,7 @@ struct OutputPS
 // Note: don't write as void SkinningPS(VS_OUTPUT input, out PS_OUTPUT output)
 // this is worse MSL codegen.
 
-// TODO: SV_Position differs from Vulkan/MSL in that pos.w = w and not 1/w like gl_FragCoord.
+// TODO: SV_Position differs from Vulkan/MSL in that pos.w = w and not 1/w like gl_FragCoord and [[position]].
 // DXC has a setting to invert w.
 
 OutputPS SkinningPS(InputPS input,
diff --git a/hlslparser/src/HLSLParser.cpp b/hlslparser/src/HLSLParser.cpp
index 4ac4c94a..ff0a51e5 100644
--- a/hlslparser/src/HLSLParser.cpp
+++ b/hlslparser/src/HLSLParser.cpp
@@ -59,8 +59,8 @@ enum DimensionType
     DimensionType_Matrix3x3,
     DimensionType_Matrix4x4,
     
-    DimensionType_Matrix4x3, // TODO: no 3x4
-    DimensionType_Matrix4x2
+    //DimensionType_Matrix4x3, // TODO: no 3x4
+    //DimensionType_Matrix4x2
 };
 
 // Can use this to break apart type to useful constructs
@@ -112,6 +112,21 @@ bool IsCoreTypeEqual(HLSLBaseType lhsType, HLSLBaseType rhsType)
            baseTypeDescriptions[rhsType].coreType;
 }
 
+bool IsDimensionEqual(HLSLBaseType lhsType, HLSLBaseType rhsType)
+{
+    return baseTypeDescriptions[lhsType].numComponents ==
+           baseTypeDescriptions[rhsType].numComponents &&
+           baseTypeDescriptions[lhsType].height ==
+           baseTypeDescriptions[rhsType].height;
+}
+
+bool IsCrossDimensionEqual(HLSLBaseType lhsType, HLSLBaseType rhsType)
+{
+    return baseTypeDescriptions[lhsType].height ==
+           baseTypeDescriptions[rhsType].numComponents;
+}
+
+
 bool IsNumericTypeEqual(HLSLBaseType lhsType, HLSLBaseType rhsType)
 {
     return baseTypeDescriptions[lhsType].numericType ==
@@ -168,7 +183,17 @@ HLSLBaseType NumericToBaseType(NumericType numericType)
         case NumericType_Int: baseType = HLSLBaseType_Int; break;
         case NumericType_Uint: baseType = HLSLBaseType_Uint; break;
         case NumericType_Bool: baseType = HLSLBaseType_Bool; break;
-        // TODO: short,ushort,double
+            
+        case NumericType_Ushort: baseType = HLSLBaseType_Ushort; break;
+        case NumericType_Short: baseType = HLSLBaseType_Short; break;
+            
+        // TODO: requires vec/matrix additions for double
+        // case NumericType_Double: baseType = HLSLBaseType_Bool; break;
+            
+        // TODO:
+        //case NumericType_Uint8: baseType = HLSLBaseType_Uint8; break;
+        //case NumericType_Int8: baseType = HLSLBaseType_Int8; break;
+        
         default:
             break;
     }
@@ -197,6 +222,8 @@ HLSLBaseType HalfToFloatBaseType(HLSLBaseType type)
     return type;
 }
 
+static HLSLBaseType ArithmeticOpResultType(HLSLBinaryOp binaryOp, HLSLBaseType t1, HLSLBaseType t2);
+
 const char* GetNumericTypeName(HLSLBaseType type)
 {
     if (!IsNumericType(type))
@@ -296,7 +323,9 @@ static const int _numberTypeRank[NumericType_Count][NumericType_Count] =
     { 1, 0, 4, 4, 4 },  // NumericType_Half
     { 5, 5, 0, 5, 5 },  // NumericType_Bool
     { 5, 5, 4, 0, 3 },  // NumericType_Int
-    { 5, 5, 4, 2, 0 }   // NumericType_Uint
+    { 5, 5, 4, 2, 0 },  // NumericType_Uint
+    { 5, 5, 4, 0, 3 },  // NumericType_Short
+    { 5, 5, 4, 2, 0 },  // NumericType_Ushort
 };
 
 
@@ -577,17 +606,14 @@ static const EffectState pipelineStates[] = {
         Intrinsic( name, HLSLBaseType_Half3,   HLSLBaseType_Half3,   HLSLBaseType_Half3,  HLSLBaseType_Half3 ),    \
         Intrinsic( name, HLSLBaseType_Half4,   HLSLBaseType_Half4,   HLSLBaseType_Half4,  HLSLBaseType_Half4 )
 
-//#if 1
-// @@ IC: For some reason this is not working with the Visual Studio compiler:
-// This broke using half, so don't just comment this out.
 #define SAMPLER_INTRINSIC_FUNCTION(name, nameH, sampler, arg1) \
         SamplerIntrinsic( name, HLSLBaseType_Float4, sampler, HLSLBaseType_Float, arg1),   \
         SamplerIntrinsic( nameH, HLSLBaseType_Half4,  sampler, HLSLBaseType_Half,  arg1  )
-//#else
-//#define SAMPLER_INTRINSIC_FUNCTION(name, sampler, arg1) \
-//        Intrinsic( name, HLSLBaseType_Float4, sampler, arg1)
-//#endif
     
+// TODO: change this to a mutlimap or something
+// would make it easier to specify, can write functions to set all vec, all matrix, etc.  This is a nightmare to add things too below.  MSL and HLSL
+// have a lot more operations now.
+
 const Intrinsic _intrinsic[] =
     {
         INTRINSIC_FLOAT1_FUNCTION( "abs" ),
@@ -749,6 +775,29 @@ const Intrinsic _intrinsic[] =
 
 		Intrinsic("asuint", HLSLBaseType_Uint, HLSLBaseType_Float),
 
+        // only for vec, void return type
+        Intrinsic( "sincos", HLSLBaseType_Void,  HLSLBaseType_Float,   HLSLBaseType_Float,  HLSLBaseType_Float ),
+        Intrinsic( "sincos", HLSLBaseType_Void,  HLSLBaseType_Float2,  HLSLBaseType_Float,  HLSLBaseType_Float2 ),
+        Intrinsic( "sincos", HLSLBaseType_Void,  HLSLBaseType_Float3,  HLSLBaseType_Float,  HLSLBaseType_Float3 ),
+        Intrinsic( "sincos", HLSLBaseType_Void,  HLSLBaseType_Float4,  HLSLBaseType_Float,  HLSLBaseType_Float4 ),
+        Intrinsic( "sincos", HLSLBaseType_Void,  HLSLBaseType_Half,    HLSLBaseType_Half,   HLSLBaseType_Half ),
+        Intrinsic( "sincos", HLSLBaseType_Void,  HLSLBaseType_Half2,   HLSLBaseType_Half2,  HLSLBaseType_Half2 ),
+        Intrinsic( "sincos", HLSLBaseType_Void,  HLSLBaseType_Half3,   HLSLBaseType_Half3,  HLSLBaseType_Half3 ),
+        Intrinsic( "sincos", HLSLBaseType_Void,  HLSLBaseType_Half4,   HLSLBaseType_Half4,  HLSLBaseType_Half4 ),
+        
+        // why is this only defined for vec
+        Intrinsic( "mad", HLSLBaseType_Float, HLSLBaseType_Float, HLSLBaseType_Float, HLSLBaseType_Float ),
+        Intrinsic( "mad", HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2 ),
+        Intrinsic( "mad", HLSLBaseType_Float3, HLSLBaseType_Float3, HLSLBaseType_Float3, HLSLBaseType_Float3 ),
+        Intrinsic( "mad", HLSLBaseType_Float4, HLSLBaseType_Float4, HLSLBaseType_Float4, HLSLBaseType_Float4 ),
+        Intrinsic( "mad", HLSLBaseType_Half, HLSLBaseType_Half, HLSLBaseType_Half, HLSLBaseType_Half ),
+        Intrinsic( "mad", HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Half2 ),
+        Intrinsic( "mad", HLSLBaseType_Half3, HLSLBaseType_Half3, HLSLBaseType_Half3, HLSLBaseType_Half3 ),
+        Intrinsic( "mad", HLSLBaseType_Half4, HLSLBaseType_Half4, HLSLBaseType_Half4, HLSLBaseType_Half4 ),
+        
+        // TODO: split off sampler intrinsics from math above
+        //------------------------
+        
         // This macro defines float/half versions
         SAMPLER_INTRINSIC_FUNCTION("tex2D", "tex2D", HLSLBaseType_Sampler2D, HLSLBaseType_Float2),
         SAMPLER_INTRINSIC_FUNCTION("tex2Dlod", "tex2Dlod", HLSLBaseType_Sampler2D, HLSLBaseType_Float4),
@@ -784,25 +833,7 @@ const Intrinsic _intrinsic[] =
         Intrinsic("texCUBEbias", HLSLBaseType_Float4, HLSLBaseType_SamplerCube, HLSLBaseType_Float4),
         Intrinsic("texCUBEsize", HLSLBaseType_Int, HLSLBaseType_SamplerCube),
 
-        // only for vec, void return type
-        Intrinsic( "sincos", HLSLBaseType_Void,  HLSLBaseType_Float,   HLSLBaseType_Float,  HLSLBaseType_Float ),
-        Intrinsic( "sincos", HLSLBaseType_Void,  HLSLBaseType_Float2,  HLSLBaseType_Float,  HLSLBaseType_Float2 ),
-        Intrinsic( "sincos", HLSLBaseType_Void,  HLSLBaseType_Float3,  HLSLBaseType_Float,  HLSLBaseType_Float3 ),
-        Intrinsic( "sincos", HLSLBaseType_Void,  HLSLBaseType_Float4,  HLSLBaseType_Float,  HLSLBaseType_Float4 ),
-        Intrinsic( "sincos", HLSLBaseType_Void,  HLSLBaseType_Half,    HLSLBaseType_Half,   HLSLBaseType_Half ),
-        Intrinsic( "sincos", HLSLBaseType_Void,  HLSLBaseType_Half2,   HLSLBaseType_Half2,  HLSLBaseType_Half2 ),
-        Intrinsic( "sincos", HLSLBaseType_Void,  HLSLBaseType_Half3,   HLSLBaseType_Half3,  HLSLBaseType_Half3 ),
-        Intrinsic( "sincos", HLSLBaseType_Void,  HLSLBaseType_Half4,   HLSLBaseType_Half4,  HLSLBaseType_Half4 ),
         
-        // why is this only defined for vec
-        Intrinsic( "mad", HLSLBaseType_Float, HLSLBaseType_Float, HLSLBaseType_Float, HLSLBaseType_Float ),
-        Intrinsic( "mad", HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2 ),
-        Intrinsic( "mad", HLSLBaseType_Float3, HLSLBaseType_Float3, HLSLBaseType_Float3, HLSLBaseType_Float3 ),
-        Intrinsic( "mad", HLSLBaseType_Float4, HLSLBaseType_Float4, HLSLBaseType_Float4, HLSLBaseType_Float4 ),
-        Intrinsic( "mad", HLSLBaseType_Half, HLSLBaseType_Half, HLSLBaseType_Half, HLSLBaseType_Half ),
-        Intrinsic( "mad", HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Half2 ),
-        Intrinsic( "mad", HLSLBaseType_Half3, HLSLBaseType_Half3, HLSLBaseType_Half3, HLSLBaseType_Half3 ),
-        Intrinsic( "mad", HLSLBaseType_Half4, HLSLBaseType_Half4, HLSLBaseType_Half4, HLSLBaseType_Half4 ),
     };
 
 const int _numIntrinsics = sizeof(_intrinsic) / sizeof(Intrinsic);
@@ -832,8 +863,8 @@ const BaseTypeDescription baseTypeDescriptions[HLSLBaseType_Count] =
 		{ "float2x2",			CoreType_Matrix, DimensionType_Matrix2x2, NumericType_Float,     2, 2, 2,  0 },		// HLSLBaseType_Float2x2
         { "float3x3",           CoreType_Matrix, DimensionType_Matrix3x3, NumericType_Float,     3, 2, 3,  0 },      // HLSLBaseType_Float3x3
         { "float4x4",           CoreType_Matrix, DimensionType_Matrix4x4, NumericType_Float,     4, 2, 4,  0 },      // HLSLBaseType_Float4x4
-        { "float4x3",           CoreType_Matrix, DimensionType_Matrix4x3, NumericType_Float,     4, 2, 3,  0 },      // HLSLBaseType_Float4x3
-        { "float4x2",           CoreType_Matrix, DimensionType_Matrix4x2, NumericType_Float,     4, 2, 2,  0 },      // HLSLBaseType_Float4x2
+        //{ "float4x3",           CoreType_Matrix, DimensionType_Matrix4x3, NumericType_Float,     4, 2, 3,  0 },      // HLSLBaseType_Float4x3
+        //{ "float4x2",           CoreType_Matrix, DimensionType_Matrix4x2, NumericType_Float,     4, 2, 2,  0 },      // HLSLBaseType_Float4x2
 
         { "half",               CoreType_Scalar, DimensionType_Scalar, NumericType_Half,        1, 0, 1,  1 },      // HLSLBaseType_Half
         { "half2",              CoreType_Vector, DimensionType_Vector2, NumericType_Half,       2, 1, 1,  1 },      // HLSLBaseType_Half2
@@ -843,9 +874,19 @@ const BaseTypeDescription baseTypeDescriptions[HLSLBaseType_Count] =
 		{ "half2x2",            CoreType_Matrix, DimensionType_Matrix2x2, NumericType_Half,		2, 2, 2,  0 },		// HLSLBaseType_Half2x2
         { "half3x3",            CoreType_Matrix, DimensionType_Matrix3x3, NumericType_Half,     3, 2, 3,  1 },      // HLSLBaseType_Half3x3
         { "half4x4",            CoreType_Matrix, DimensionType_Matrix4x4, NumericType_Half,     4, 2, 4,  1 },      // HLSLBaseType_Half4x4
-        { "half4x3",            CoreType_Matrix, DimensionType_Matrix4x3, NumericType_Half,     4, 2, 3,  1 },      // HLSLBaseType_Half4x3
-        { "half4x2",            CoreType_Matrix, DimensionType_Matrix4x2, NumericType_Half,     4, 2, 2,  1 },      // HLSLBaseType_Half4x2
-
+        //{ "half4x3",            CoreType_Matrix, DimensionType_Matrix4x3, NumericType_Half,     4, 2, 3,  1 },      // HLSLBaseType_Half4x3
+        //{ "half4x2",            CoreType_Matrix, DimensionType_Matrix4x2, NumericType_Half,     4, 2, 2,  1 },      // HLSLBaseType_Half4x2
+
+        // TODO: add double
+//        { "double",              CoreType_Scalar, DimensionType_Scalar, NumericType_Double,       1, 0, 1,  0 },      // HLSLBaseType_Double
+//        { "double2",             CoreType_Vector, DimensionType_Vector2, NumericType_Double,      2, 1, 1,  0 },      // HLSLBaseType_Double2
+//        { "double3",             CoreType_Vector, DimensionType_Vector3, NumericType_Double,      3, 1, 1,  0 },      // HLSLBaseType_Double3
+//        { "double4",             CoreType_Vector, DimensionType_Vector4, NumericType_Double,      4, 1, 1,  0 },      // HLSLBaseType_Double4
+//
+//        { "double2x2",            CoreType_Matrix, DimensionType_Matrix2x2, NumericType_Double,     2, 2, 2,  0 },        // HLSLBaseType_Doublet2x2
+//        { "double3x3",           CoreType_Matrix, DimensionType_Matrix3x3, NumericType_Double,     3, 2, 3,  0 },      // HLSLBaseType_Double3x3
+//        { "double4x4",           CoreType_Matrix, DimensionType_Matrix4x4, NumericType_Double,     4, 2, 4,  0 },      // HLSLBaseType_Double4x4
+        
         { "bool",               CoreType_Scalar, DimensionType_Scalar, NumericType_Bool,       1, 0, 1,  4 },      // HLSLBaseType_Bool
 		{ "bool2",				CoreType_Vector, DimensionType_Vector2, NumericType_Bool,	   2, 1, 1,  4 },      // HLSLBaseType_Bool2
 		{ "bool3",				CoreType_Vector, DimensionType_Vector3, NumericType_Bool,	   3, 1, 1,  4 },      // HLSLBaseType_Bool3
@@ -861,7 +902,17 @@ const BaseTypeDescription baseTypeDescriptions[HLSLBaseType_Count] =
         { "uint3",              CoreType_Vector, DimensionType_Vector3, NumericType_Uint,      3, 1, 1,  2 },      // HLSLBaseType_Uint3
         { "uint4",              CoreType_Vector, DimensionType_Vector4, NumericType_Uint,      4, 1, 1,  2 },      // HLSLBaseType_Uint4
 
-        // TODO: add ushort/short
+        { "short",              CoreType_Scalar, DimensionType_Scalar, NumericType_Short,      1, 0, 1,  3 },      // HLSLBaseType_Short
+        { "short2",             CoreType_Vector, DimensionType_Vector2, NumericType_Short,     2, 1, 1,  3 },      // HLSLBaseType_Short2
+        { "short3",             CoreType_Vector, DimensionType_Vector3, NumericType_Short,     3, 1, 1,  3 },      // HLSLBaseType_Short3
+        { "short4",             CoreType_Vector, DimensionType_Vector4, NumericType_Short,     4, 1, 1,  3 },      // HLSLBaseType_Short4
+
+        { "ushort",             CoreType_Scalar, DimensionType_Scalar, NumericType_Ushort,     1, 0, 1,  2 },      // HLSLBaseType_Ushort
+        { "ushort2",            CoreType_Vector, DimensionType_Vector2, NumericType_Ushort,    2, 1, 1,  2 },      // HLSLBaseType_Ushort2
+        { "ushort3",            CoreType_Vector, DimensionType_Vector3, NumericType_Ushort,    3, 1, 1,  2 },      // HLSLBaseType_Ushort3
+        { "ushort4",            CoreType_Vector, DimensionType_Vector4, NumericType_Ushort,    4, 1, 1,  2 },      // HLSLBaseType_Ushort4
+
+        // TODO: add u/char, but HLSL2021 doesn't have support
         
         { "texture",            CoreType_Texture, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Texture
         
@@ -875,226 +926,67 @@ const BaseTypeDescription baseTypeDescriptions[HLSLBaseType_Count] =
         
         { "struct",             CoreType_Struct, DimensionType_None, NumericType_NaN,         1, 0, 0, -1 },      // HLSLBaseType_UserDefined
         { "expression",         CoreType_Expression, DimensionType_None, NumericType_NaN,     1, 0, 0, -1 },       // HLSLBaseType_Expression
-        { "comment",           CoreType_Comment, DimensionType_None, NumericType_NaN,         1, 0, 0, -1 },       // HLSLBaseType_Comment
+        { "comment",            CoreType_Comment, DimensionType_None, NumericType_NaN,         1, 0, 0, -1 },       // HLSLBaseType_Comment
     };
 
 // IC: I'm not sure this table is right, but any errors should be caught by the backend compiler.
 // Also, this is operator dependent. The type resulting from (float4 * float4x4) is not the same as (float4 + float4x4).
 // We should probably distinguish between component-wise operator and only allow same dimensions
-HLSLBaseType _binaryOpTypeLookup[HLSLBaseType_NumericCount][HLSLBaseType_NumericCount] = 
-    {
-        {   // float
-            HLSLBaseType_Float, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float4, HLSLBaseType_Float2x2, HLSLBaseType_Float3x3, HLSLBaseType_Float4x4, HLSLBaseType_Float4x3, HLSLBaseType_Float4x2,
-            HLSLBaseType_Float, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float4, HLSLBaseType_Float2x2, HLSLBaseType_Float3x3, HLSLBaseType_Float4x4, HLSLBaseType_Float4x3, HLSLBaseType_Float4x2,
-            HLSLBaseType_Float, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float4,
-            HLSLBaseType_Float, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float4,
-            HLSLBaseType_Float, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float4
-        },
-        {   // float2
-            HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2,
-            HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2,
-            HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2
-        },
-        {   // float3
-            HLSLBaseType_Float3, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Float3, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Float3, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float3,
-            HLSLBaseType_Float3, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float3,
-            HLSLBaseType_Float3, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float3
-        },
-        {   // float4
-            HLSLBaseType_Float4, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float4, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Float4, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float4, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Float4, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float4,
-            HLSLBaseType_Float4, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float4,
-            HLSLBaseType_Float4, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float4
-        },
-        {   // float2x2
-            HLSLBaseType_Float2x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Float2x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Float2x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Float2x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Float2x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Float2x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Float2x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown
-        },
-        {   // float3x3
-            HLSLBaseType_Float3x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Float3x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Float3x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Float3x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Float3x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Float3x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Float3x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown
-        },
-        {   // float4x4
-            HLSLBaseType_Float4x4, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Float4x4, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Float4x4, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Float4x4, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Float4x4, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Float4x4, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Float4x4, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown
-        },
-        {   // float4x3
-            HLSLBaseType_Float4x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Float4x3, HLSLBaseType_Unknown,
-            HLSLBaseType_Float4x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Float4x3, HLSLBaseType_Unknown,
-            HLSLBaseType_Float4x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Float4x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Float4x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown
-        },
-        {   // float4x2
-            HLSLBaseType_Float4x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Float4x2,
-            HLSLBaseType_Float4x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Float4x2,
-            HLSLBaseType_Float4x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Float4x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Float4x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown
-        },
-        {   // half
-            HLSLBaseType_Float, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float4, HLSLBaseType_Float2x2, HLSLBaseType_Float3x3, HLSLBaseType_Float4x4, HLSLBaseType_Float4x3, HLSLBaseType_Float4x2, 
-            HLSLBaseType_Half, HLSLBaseType_Half2, HLSLBaseType_Half3, HLSLBaseType_Half4, HLSLBaseType_Half2x2, HLSLBaseType_Half3x3, HLSLBaseType_Half4x4, HLSLBaseType_Half4x3, HLSLBaseType_Half4x2, 
-            HLSLBaseType_Half, HLSLBaseType_Half2, HLSLBaseType_Half3, HLSLBaseType_Half4,
-            HLSLBaseType_Half, HLSLBaseType_Half2, HLSLBaseType_Half3, HLSLBaseType_Half4,
-            HLSLBaseType_Half, HLSLBaseType_Half2, HLSLBaseType_Half3, HLSLBaseType_Half4
-        },
-        {   // half2
-            HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Half2,
-            HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Half2,
-            HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Half2
-        },
-        {   // half3
-            HLSLBaseType_Float3, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Half3, HLSLBaseType_Half2, HLSLBaseType_Half3, HLSLBaseType_Half3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Half3, HLSLBaseType_Half2, HLSLBaseType_Half3, HLSLBaseType_Half3,
-            HLSLBaseType_Half3, HLSLBaseType_Half2, HLSLBaseType_Half3, HLSLBaseType_Half3,
-            HLSLBaseType_Half3, HLSLBaseType_Half2, HLSLBaseType_Half3, HLSLBaseType_Half3
-        },
-        {   // half4
-            HLSLBaseType_Float4, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float4, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Half4, HLSLBaseType_Half2, HLSLBaseType_Half3, HLSLBaseType_Half4, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Half4, HLSLBaseType_Half2, HLSLBaseType_Half3, HLSLBaseType_Half4,
-            HLSLBaseType_Half4, HLSLBaseType_Half2, HLSLBaseType_Half3, HLSLBaseType_Half4,
-            HLSLBaseType_Half4, HLSLBaseType_Half2, HLSLBaseType_Half3, HLSLBaseType_Half4
-        },
-        {   // half2x2
-            HLSLBaseType_Float2x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Float2x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Half2x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Half2x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Half2x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Half2x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Half2x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown
-        },
-        {   // half3x3
-            HLSLBaseType_Float3x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Float3x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Half3x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Half3x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Half3x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Half3x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Half3x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown
-        },
-        {   // half4x4
-            HLSLBaseType_Float4x4, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Float4x4, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Half4x4, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Half4x4, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Half4x4, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Half4x4, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Half4x4, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown
-        },
-        {   // float4x3
-            HLSLBaseType_Float4x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Float4x3, HLSLBaseType_Unknown,
-            HLSLBaseType_Half4x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Half4x3, HLSLBaseType_Unknown,
-            HLSLBaseType_Half4x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Half4x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Half4x3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown
-        },
-        {   // float4x2
-            HLSLBaseType_Float4x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Float4x2,
-            HLSLBaseType_Half4x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Half4x2,
-            HLSLBaseType_Half4x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Half4x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Half4x2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown
-        },
-        {   // bool
-            HLSLBaseType_Float, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float4, HLSLBaseType_Float2x2, HLSLBaseType_Float3x3, HLSLBaseType_Float4x4, HLSLBaseType_Float4x3, HLSLBaseType_Float4x2,
-            HLSLBaseType_Half, HLSLBaseType_Half2, HLSLBaseType_Half3, HLSLBaseType_Half4, HLSLBaseType_Half2x2, HLSLBaseType_Half3x3, HLSLBaseType_Half4x4, HLSLBaseType_Half4x3, HLSLBaseType_Half4x2,
-            HLSLBaseType_Int, HLSLBaseType_Int2, HLSLBaseType_Int3, HLSLBaseType_Int4,
-            HLSLBaseType_Int, HLSLBaseType_Int2, HLSLBaseType_Int3, HLSLBaseType_Int4,
-            HLSLBaseType_Uint, HLSLBaseType_Uint2, HLSLBaseType_Uint3, HLSLBaseType_Uint4
-        },
-        {   // bool2
-            HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float4, HLSLBaseType_Float2x2, HLSLBaseType_Float3x3, HLSLBaseType_Float4x4, HLSLBaseType_Float4x3, HLSLBaseType_Float4x2,
-            HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Half3, HLSLBaseType_Half4, HLSLBaseType_Half2x2, HLSLBaseType_Half3x3, HLSLBaseType_Half4x4, HLSLBaseType_Half4x3, HLSLBaseType_Half4x2,
-            HLSLBaseType_Int2, HLSLBaseType_Int2, HLSLBaseType_Int3, HLSLBaseType_Int4,
-            HLSLBaseType_Int2, HLSLBaseType_Int2, HLSLBaseType_Int3, HLSLBaseType_Int4,
-            HLSLBaseType_Uint2, HLSLBaseType_Uint2, HLSLBaseType_Uint3, HLSLBaseType_Uint4
-        },
-        {   // bool3
-            HLSLBaseType_Float3, HLSLBaseType_Float3, HLSLBaseType_Float3, HLSLBaseType_Float4, HLSLBaseType_Float2x2, HLSLBaseType_Float3x3, HLSLBaseType_Float4x4, HLSLBaseType_Float4x3, HLSLBaseType_Float4x2,
-            HLSLBaseType_Half3, HLSLBaseType_Half3, HLSLBaseType_Half3, HLSLBaseType_Half4, HLSLBaseType_Half2x2, HLSLBaseType_Half3x3, HLSLBaseType_Half4x4, HLSLBaseType_Half4x3, HLSLBaseType_Half4x2,
-            HLSLBaseType_Int3, HLSLBaseType_Int2, HLSLBaseType_Int3, HLSLBaseType_Int4,
-            HLSLBaseType_Int3, HLSLBaseType_Int2, HLSLBaseType_Int3, HLSLBaseType_Int4,
-            HLSLBaseType_Uint3, HLSLBaseType_Uint2, HLSLBaseType_Uint3, HLSLBaseType_Uint4
-        },
-        {   // bool4
-            HLSLBaseType_Float, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float4, HLSLBaseType_Float2x2, HLSLBaseType_Float3x3, HLSLBaseType_Float4x4, HLSLBaseType_Float4x3, HLSLBaseType_Float4x2,
-            HLSLBaseType_Half, HLSLBaseType_Half2, HLSLBaseType_Half3, HLSLBaseType_Half4, HLSLBaseType_Half2x2, HLSLBaseType_Half3x3, HLSLBaseType_Half4x4, HLSLBaseType_Half4x3, HLSLBaseType_Half4x2,
-            HLSLBaseType_Int, HLSLBaseType_Int2, HLSLBaseType_Int3, HLSLBaseType_Int4,
-            HLSLBaseType_Int, HLSLBaseType_Int2, HLSLBaseType_Int3, HLSLBaseType_Int4,
-            HLSLBaseType_Uint, HLSLBaseType_Uint2, HLSLBaseType_Uint3, HLSLBaseType_Uint4
-        },
-        {   // int
-            HLSLBaseType_Float, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float4, HLSLBaseType_Float2x2, HLSLBaseType_Float3x3, HLSLBaseType_Float4x4, HLSLBaseType_Float4x3, HLSLBaseType_Float4x2,
-            HLSLBaseType_Half, HLSLBaseType_Half2, HLSLBaseType_Half3, HLSLBaseType_Half4, HLSLBaseType_Half2x2, HLSLBaseType_Half3x3, HLSLBaseType_Half4x4, HLSLBaseType_Half4x3, HLSLBaseType_Half4x2,
-            HLSLBaseType_Int, HLSLBaseType_Int2, HLSLBaseType_Int2, HLSLBaseType_Int2,
-            HLSLBaseType_Int, HLSLBaseType_Int2, HLSLBaseType_Int3, HLSLBaseType_Int4,
-            HLSLBaseType_Uint, HLSLBaseType_Uint2, HLSLBaseType_Uint3, HLSLBaseType_Uint4
-        },
-        {   // int2
-            HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Int2, HLSLBaseType_Int2, HLSLBaseType_Int2, HLSLBaseType_Int2,
-            HLSLBaseType_Int2, HLSLBaseType_Int2, HLSLBaseType_Int2, HLSLBaseType_Int2,
-            HLSLBaseType_Uint2, HLSLBaseType_Uint2, HLSLBaseType_Uint2, HLSLBaseType_Uint2
-        },
-        {   // int3
-            HLSLBaseType_Float3, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Half3, HLSLBaseType_Half2, HLSLBaseType_Half3, HLSLBaseType_Half3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Int3, HLSLBaseType_Int2, HLSLBaseType_Int3, HLSLBaseType_Int3,
-            HLSLBaseType_Int3, HLSLBaseType_Int2, HLSLBaseType_Int3, HLSLBaseType_Int3,
-            HLSLBaseType_Uint3, HLSLBaseType_Uint2, HLSLBaseType_Uint3, HLSLBaseType_Uint3
-        },
-        {   // int4
-            HLSLBaseType_Float4, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float4, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Half4, HLSLBaseType_Half2, HLSLBaseType_Half3, HLSLBaseType_Half4, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Int4, HLSLBaseType_Int2, HLSLBaseType_Int3, HLSLBaseType_Int4,
-            HLSLBaseType_Int4, HLSLBaseType_Int2, HLSLBaseType_Int3, HLSLBaseType_Int4,
-            HLSLBaseType_Uint4, HLSLBaseType_Uint2, HLSLBaseType_Uint3, HLSLBaseType_Uint4
-        },
-        {   // uint
-            HLSLBaseType_Float, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float4, HLSLBaseType_Float2x2, HLSLBaseType_Float3x3, HLSLBaseType_Float4x4, HLSLBaseType_Float4x3, HLSLBaseType_Float4x2,
-            HLSLBaseType_Half, HLSLBaseType_Half2, HLSLBaseType_Half3, HLSLBaseType_Half4, HLSLBaseType_Half2x2, HLSLBaseType_Half3x3, HLSLBaseType_Half4x4, HLSLBaseType_Half4x3, HLSLBaseType_Half4x2,
-            HLSLBaseType_Uint, HLSLBaseType_Uint2, HLSLBaseType_Uint3, HLSLBaseType_Uint4,
-            HLSLBaseType_Uint, HLSLBaseType_Uint2, HLSLBaseType_Uint3, HLSLBaseType_Uint4,
-            HLSLBaseType_Uint, HLSLBaseType_Uint2, HLSLBaseType_Uint3, HLSLBaseType_Uint4
-        },
-        {   // uint2
-            HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Uint2, HLSLBaseType_Uint2, HLSLBaseType_Uint2, HLSLBaseType_Uint2,
-            HLSLBaseType_Uint2, HLSLBaseType_Uint2, HLSLBaseType_Uint2, HLSLBaseType_Uint2,
-            HLSLBaseType_Uint2, HLSLBaseType_Uint2, HLSLBaseType_Uint2, HLSLBaseType_Uint2
-        },
-        {   // uint3
-            HLSLBaseType_Float3, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Half3, HLSLBaseType_Half2, HLSLBaseType_Half3, HLSLBaseType_Half3, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Uint3, HLSLBaseType_Uint2, HLSLBaseType_Uint3, HLSLBaseType_Uint3,
-            HLSLBaseType_Uint3, HLSLBaseType_Uint2, HLSLBaseType_Uint3, HLSLBaseType_Uint3,
-            HLSLBaseType_Uint3, HLSLBaseType_Uint2, HLSLBaseType_Uint3, HLSLBaseType_Uint3
-        },
-        {   // uint4
-            HLSLBaseType_Float4, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float4, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Half4, HLSLBaseType_Half2, HLSLBaseType_Half3, HLSLBaseType_Half4, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown, HLSLBaseType_Unknown,
-            HLSLBaseType_Uint4, HLSLBaseType_Uint2, HLSLBaseType_Uint3, HLSLBaseType_Uint4,
-            HLSLBaseType_Uint4, HLSLBaseType_Uint2, HLSLBaseType_Uint3, HLSLBaseType_Uint4,
-            HLSLBaseType_Uint4, HLSLBaseType_Uint2, HLSLBaseType_Uint3, HLSLBaseType_Uint4
-        },
-    };
 
+
+HLSLBaseType ArithmeticOpResultType(HLSLBinaryOp binaryOp, HLSLBaseType t1, HLSLBaseType t2)
+{
+    // check that both are same numeric types
+    
+    // add, sub, div are similar
+    // mul is it's own test
+
+    // most mixing of types is invalid here
+    
+    if (IsNumericTypeEqual(t1, t2))
+    {
+        bool isSameDimensions = IsDimensionEqual(t1, t2);
+        
+        if (IsScalarType(t1) && IsScalarType(t2))
+        {
+            if (isSameDimensions) return t1;
+        }
+        else if (IsVectorType(t1) && IsVectorType(t2))
+        {
+            if (isSameDimensions) return t1;
+        }
+        else if (IsMatrixType(t1) && IsMatrixType(t2))
+        {
+            if (isSameDimensions) return t1;
+        }
+        
+        // TODO: handle div of 1.0 / m or 1.0 / v. 
+        
+        else if ((binaryOp == HLSLBinaryOp_Mul || binaryOp == HLSLBinaryOp_Div) &&
+                 (IsScalarType(t1) || IsScalarType(t2)))
+        {
+            return (IsVectorType(t1) || IsMatrixType(t1)) ? t1 : t2;
+        }
+        
+        // this has to check dimension across the mul
+        else if (binaryOp == HLSLBinaryOp_Mul)
+        {
+            bool isSameCrossDimension = IsCrossDimensionEqual(t1, t2);
+            
+            if (IsMatrixType(t1) && IsVectorType(t2))
+            {
+                if (isSameCrossDimension) return t2;
+            }
+            else if (IsVectorType(t1) && IsMatrixType(t2))
+            {
+                if (isSameCrossDimension) return t1;
+            }
+        }
+    }
+    
+    return HLSLBaseType_Unknown;
+}
+    
 // Priority of the ? : operator.
 const int _conditionalOpPriority = 1;
 
@@ -1116,19 +1008,23 @@ static const char* GetBinaryOpName(HLSLBinaryOp binaryOp)
     {
     case HLSLBinaryOp_And:          return "&&";
     case HLSLBinaryOp_Or:           return "||";
+            
     case HLSLBinaryOp_Add:          return "+";
     case HLSLBinaryOp_Sub:          return "-";
     case HLSLBinaryOp_Mul:          return "*";
     case HLSLBinaryOp_Div:          return "/";
+            
     case HLSLBinaryOp_Less:         return "<";
     case HLSLBinaryOp_Greater:      return ">";
     case HLSLBinaryOp_LessEqual:    return "<=";
     case HLSLBinaryOp_GreaterEqual: return ">=";
     case HLSLBinaryOp_Equal:        return "==";
     case HLSLBinaryOp_NotEqual:     return "!=";
+            
     case HLSLBinaryOp_BitAnd:       return "&";
     case HLSLBinaryOp_BitOr:        return "|";
     case HLSLBinaryOp_BitXor:       return "^";
+            
     case HLSLBinaryOp_Assign:       return "=";
     case HLSLBinaryOp_AddAssign:    return "+=";
     case HLSLBinaryOp_SubAssign:    return "-=";
@@ -1314,39 +1210,30 @@ static CompareFunctionsResult CompareFunctions(HLSLTree* tree, const HLSLFunctio
 
 static bool GetBinaryOpResultType(HLSLBinaryOp binaryOp, const HLSLType& type1, const HLSLType& type2, HLSLType& result)
 {
-
+    // only allow numeric types for binary operators
     if (type1.baseType < HLSLBaseType_FirstNumeric || type1.baseType > HLSLBaseType_LastNumeric || type1.array ||
         type2.baseType < HLSLBaseType_FirstNumeric || type2.baseType > HLSLBaseType_LastNumeric || type2.array)
     {
          return false;
     }
 
-    if (binaryOp == HLSLBinaryOp_BitAnd || binaryOp == HLSLBinaryOp_BitOr || binaryOp == HLSLBinaryOp_BitXor)
+    if (IsBitOp(binaryOp))
     {
-        if (type1.baseType < HLSLBaseType_FirstInteger || type1.baseType > HLSLBaseType_LastInteger)
+        if (type1.baseType < HLSLBaseType_FirstInteger ||
+            type1.baseType > HLSLBaseType_LastInteger)
         {
             return false;
         }
     }
 
-    switch (binaryOp)
+    if (IsLogicOp(binaryOp) || IsCompareOp(binaryOp))
     {
-    case HLSLBinaryOp_And:
-    case HLSLBinaryOp_Or:
-    case HLSLBinaryOp_Less:
-    case HLSLBinaryOp_Greater:
-    case HLSLBinaryOp_LessEqual:
-    case HLSLBinaryOp_GreaterEqual:
-    case HLSLBinaryOp_Equal:
-	case HLSLBinaryOp_NotEqual:
-		{
-			int numComponents = std::max( baseTypeDescriptions[ type1.baseType ].numComponents, baseTypeDescriptions[ type2.baseType ].numComponents );
-			result.baseType = HLSLBaseType( HLSLBaseType_Bool + numComponents - 1 );
-			break;
-		}
-    default:
-        result.baseType = _binaryOpTypeLookup[type1.baseType - HLSLBaseType_FirstNumeric][type2.baseType - HLSLBaseType_FirstNumeric];
-        break;
+        int numComponents = std::max( baseTypeDescriptions[ type1.baseType ].numComponents, baseTypeDescriptions[ type2.baseType ].numComponents );
+        result.baseType = HLSLBaseType( HLSLBaseType_Bool + numComponents - 1 );
+    }
+    else
+    {
+        result.baseType = ArithmeticOpResultType(binaryOp, type1.baseType, type2.baseType);
     }
 
     result.typeName     = NULL;
@@ -2704,12 +2591,12 @@ bool HLSLParser::ParseTerminalExpression(HLSLExpression*& expression, bool& need
                 case HLSLBaseType_Float4x4:
                     arrayAccess->expressionType.baseType = HLSLBaseType_Float4;
                     break;
-                case HLSLBaseType_Float4x3:
-                    arrayAccess->expressionType.baseType = HLSLBaseType_Float3;
-                    break;
-                case HLSLBaseType_Float4x2:
-                    arrayAccess->expressionType.baseType = HLSLBaseType_Float2;
-                    break;
+//                case HLSLBaseType_Float4x3:
+//                    arrayAccess->expressionType.baseType = HLSLBaseType_Float3;
+//                    break;
+//                case HLSLBaseType_Float4x2:
+//                    arrayAccess->expressionType.baseType = HLSLBaseType_Float2;
+//                    break;
                 case HLSLBaseType_Half2:
                 case HLSLBaseType_Half3:
                 case HLSLBaseType_Half4:
@@ -2724,12 +2611,12 @@ bool HLSLParser::ParseTerminalExpression(HLSLExpression*& expression, bool& need
                 case HLSLBaseType_Half4x4:
                     arrayAccess->expressionType.baseType = HLSLBaseType_Half4;
                     break;
-                case HLSLBaseType_Half4x3:
-                    arrayAccess->expressionType.baseType = HLSLBaseType_Half3;
-                    break;
-                case HLSLBaseType_Half4x2:
-                    arrayAccess->expressionType.baseType = HLSLBaseType_Half2;
-                    break;
+//                case HLSLBaseType_Half4x3:
+//                    arrayAccess->expressionType.baseType = HLSLBaseType_Half3;
+//                    break;
+//                case HLSLBaseType_Half4x2:
+//                    arrayAccess->expressionType.baseType = HLSLBaseType_Half2;
+//                    break;
                 case HLSLBaseType_Int2:
                 case HLSLBaseType_Int3:
                 case HLSLBaseType_Int4:
@@ -2740,6 +2627,23 @@ bool HLSLParser::ParseTerminalExpression(HLSLExpression*& expression, bool& need
                 case HLSLBaseType_Uint4:
                     arrayAccess->expressionType.baseType = HLSLBaseType_Uint;
                     break;
+                case HLSLBaseType_Bool2:
+                case HLSLBaseType_Bool3:
+                case HLSLBaseType_Bool4:
+                    arrayAccess->expressionType.baseType = HLSLBaseType_Bool;
+                    break;
+                case HLSLBaseType_Ushort2:
+                case HLSLBaseType_Ushort3:
+                case HLSLBaseType_Ushort4:
+                    arrayAccess->expressionType.baseType = HLSLBaseType_Ushort;
+                    break;
+                case HLSLBaseType_Short2:
+                case HLSLBaseType_Short3:
+                case HLSLBaseType_Short4:
+                    arrayAccess->expressionType.baseType = HLSLBaseType_Short;
+                    break;
+                        
+                // TODO: double, u/char
                 default:
                     m_tokenizer.Error("array, matrix, vector, or indexable object type expected in index expression");
                     return false;
@@ -3570,6 +3474,7 @@ bool HLSLParser::AcceptType(bool allowVoid, HLSLType& type/*, bool acceptFlags*/
     case HLSLToken_Float4:
         type.baseType = HLSLBaseType_Float4;
         break;
+            
 	case HLSLToken_Float2x2:
 		type.baseType = HLSLBaseType_Float2x2;
 		break;
@@ -3579,12 +3484,13 @@ bool HLSLParser::AcceptType(bool allowVoid, HLSLType& type/*, bool acceptFlags*/
     case HLSLToken_Float4x4:
         type.baseType = HLSLBaseType_Float4x4;
         break;
-    case HLSLToken_Float4x3:
-        type.baseType = HLSLBaseType_Float4x3;
-        break;
-    case HLSLToken_Float4x2:
-        type.baseType = HLSLBaseType_Float4x2;
-        break;
+//    case HLSLToken_Float4x3:
+//        type.baseType = HLSLBaseType_Float4x3;
+//        break;
+//    case HLSLToken_Float4x2:
+//        type.baseType = HLSLBaseType_Float4x2;
+//        break;
+            
     case HLSLToken_Half:
         type.baseType = HLSLBaseType_Half;
         break;
@@ -3597,6 +3503,7 @@ bool HLSLParser::AcceptType(bool allowVoid, HLSLType& type/*, bool acceptFlags*/
     case HLSLToken_Half4:
         type.baseType = HLSLBaseType_Half4;
         break;
+            
 	case HLSLToken_Half2x2:
 		type.baseType = HLSLBaseType_Half2x2;
 		break;
@@ -3606,12 +3513,12 @@ bool HLSLParser::AcceptType(bool allowVoid, HLSLType& type/*, bool acceptFlags*/
     case HLSLToken_Half4x4:
         type.baseType = HLSLBaseType_Half4x4;
         break;
-    case HLSLToken_Half4x3:
-        type.baseType = HLSLBaseType_Half4x3;
-        break;
-    case HLSLToken_Half4x2:
-        type.baseType = HLSLBaseType_Half4x2;
-        break;
+//    case HLSLToken_Half4x3:
+//        type.baseType = HLSLBaseType_Half4x3;
+//        break;
+//    case HLSLToken_Half4x2:
+//        type.baseType = HLSLBaseType_Half4x2;
+//        break;
     case HLSLToken_Bool:
         type.baseType = HLSLBaseType_Bool;
         break;
@@ -3624,6 +3531,7 @@ bool HLSLParser::AcceptType(bool allowVoid, HLSLType& type/*, bool acceptFlags*/
 	case HLSLToken_Bool4:
 		type.baseType = HLSLBaseType_Bool4;
 		break;
+            
     case HLSLToken_Int:
         type.baseType = HLSLBaseType_Int;
         break;
@@ -3636,6 +3544,7 @@ bool HLSLParser::AcceptType(bool allowVoid, HLSLType& type/*, bool acceptFlags*/
     case HLSLToken_Int4:
         type.baseType = HLSLBaseType_Int4;
         break;
+            
     case HLSLToken_Uint:
         type.baseType = HLSLBaseType_Uint;
         break;
@@ -3648,6 +3557,32 @@ bool HLSLParser::AcceptType(bool allowVoid, HLSLType& type/*, bool acceptFlags*/
     case HLSLToken_Uint4:
         type.baseType = HLSLBaseType_Uint4;
         break;
+         
+    case HLSLToken_Ushort:
+        type.baseType = HLSLBaseType_Ushort;
+        break;
+    case HLSLToken_Ushort2:
+        type.baseType = HLSLBaseType_Ushort2;
+        break;
+    case HLSLToken_Ushort3:
+        type.baseType = HLSLBaseType_Ushort3;
+        break;
+    case HLSLToken_Ushort4:
+        type.baseType = HLSLBaseType_Ushort4;
+        break;
+        
+    case HLSLToken_Short:
+        type.baseType = HLSLBaseType_Short;
+        break;
+    case HLSLToken_Short2:
+        type.baseType = HLSLBaseType_Short2;
+        break;
+    case HLSLToken_Short3:
+        type.baseType = HLSLBaseType_Short3;
+        break;
+    case HLSLToken_Short4:
+        type.baseType = HLSLBaseType_Short4;
+        break;
             
     // Textures
     case HLSLToken_Texture:
@@ -4102,30 +4037,31 @@ bool HLSLParser::GetMemberType(const HLSLType& objectType, HLSLMemberAccess * me
         m_tokenizer.Error("Invalid swizzle '%s'", fieldName);
         return false;
     }
-
-    static const HLSLBaseType floatType[] = { HLSLBaseType_Float, HLSLBaseType_Float2, HLSLBaseType_Float3, HLSLBaseType_Float4 };
-    static const HLSLBaseType halfType[]  = { HLSLBaseType_Half,  HLSLBaseType_Half2,  HLSLBaseType_Half3,  HLSLBaseType_Half4  };
-    static const HLSLBaseType intType[]   = { HLSLBaseType_Int,   HLSLBaseType_Int2,   HLSLBaseType_Int3,   HLSLBaseType_Int4   };
-    static const HLSLBaseType uintType[]  = { HLSLBaseType_Uint,  HLSLBaseType_Uint2,  HLSLBaseType_Uint3,  HLSLBaseType_Uint4  };
-    static const HLSLBaseType boolType[]  = { HLSLBaseType_Bool,  HLSLBaseType_Bool2,  HLSLBaseType_Bool3,  HLSLBaseType_Bool4  };
-    
+ 
     switch (baseTypeDescriptions[objectType.baseType].numericType)
     {
     case NumericType_Float:
-        memberAccess->expressionType.baseType = floatType[swizzleLength - 1];
+        memberAccess->expressionType.baseType = (HLSLBaseType)(HLSLBaseType_Float + swizzleLength - 1);
         break;
     case NumericType_Half:
-        memberAccess->expressionType.baseType = halfType[swizzleLength - 1];
+        memberAccess->expressionType.baseType = (HLSLBaseType)(HLSLBaseType_Half + swizzleLength - 1);
         break;
     case NumericType_Int:
-        memberAccess->expressionType.baseType = intType[swizzleLength - 1];
+        memberAccess->expressionType.baseType = (HLSLBaseType)(HLSLBaseType_Int + swizzleLength - 1);
         break;
     case NumericType_Uint:
-        memberAccess->expressionType.baseType = uintType[swizzleLength - 1];
+        memberAccess->expressionType.baseType = (HLSLBaseType)(HLSLBaseType_Uint + swizzleLength - 1);
             break;
     case NumericType_Bool:
-        memberAccess->expressionType.baseType = boolType[swizzleLength - 1];
+        memberAccess->expressionType.baseType = (HLSLBaseType)(HLSLBaseType_Bool + swizzleLength - 1);
+            break;
+    case NumericType_Short:
+        memberAccess->expressionType.baseType = (HLSLBaseType)(HLSLBaseType_Short + swizzleLength - 1);
+            break;
+    case NumericType_Ushort:
+        memberAccess->expressionType.baseType = (HLSLBaseType)(HLSLBaseType_Ushort + swizzleLength - 1);
             break;
+    // TODO: double, u/char
     default:
         ASSERT(0);
     }
diff --git a/hlslparser/src/HLSLParser.h b/hlslparser/src/HLSLParser.h
index 15e21ebc..5fdcc116 100644
--- a/hlslparser/src/HLSLParser.h
+++ b/hlslparser/src/HLSLParser.h
@@ -146,7 +146,15 @@ enum NumericType
     NumericType_Bool,
     NumericType_Int,
     NumericType_Uint,
-    // TODO: Double, Short, Ushort
+    NumericType_Short,
+    NumericType_Ushort,
+    
+    // TODO: add double
+    // NumericType_Double,
+    // TODO: HLSL doesn't have byte/ubyte, MSL does
+    // NumericType_UByte,
+    // NumericType_Byte,
+    
     NumericType_Count,
     
     NumericType_NaN, // not in count?
@@ -164,6 +172,8 @@ extern bool IsNumericType(HLSLBaseType baseType);
 
 extern bool IsCoreTypeEqual(HLSLBaseType lhsType, HLSLBaseType rhsType);
 extern bool IsNumericTypeEqual(HLSLBaseType lhsType, HLSLBaseType rhsType);
+extern bool IsDimensionEqual(HLSLBaseType lhsType, HLSLBaseType rhsType);
+extern bool IsCrossDimensionEqual(HLSLBaseType lhsType, HLSLBaseType rhsType);
 
 extern bool IsSamplerType(const HLSLType& type);
 extern bool IsMatrixType(const HLSLType& type);
diff --git a/hlslparser/src/HLSLTokenizer.cpp b/hlslparser/src/HLSLTokenizer.cpp
index 7feb3852..8b633a21 100644
--- a/hlslparser/src/HLSLTokenizer.cpp
+++ b/hlslparser/src/HLSLTokenizer.cpp
@@ -17,33 +17,53 @@ static const char* _reservedWords[] =
         "float2",
         "float3",
         "float4",
+        
 		"float2x2",
         "float3x3",
         "float4x4",
-        "float4x3",
-        "float4x2",
+        //"float4x3",
+        //"float4x2",
+        
         "half",
         "half2",
         "half3",
         "half4",
+        
 		"half2x2",
         "half3x3",
         "half4x4",
-        "half4x3",
-        "half4x2",
+        //"half4x3",
+        //"half4x2",
+        
         "bool",
 		"bool2",
 		"bool3",
 		"bool4",
+        
         "int",
         "int2",
         "int3",
         "int4",
+        
         "uint",
         "uint2",
         "uint3",
         "uint4",
+        
+        "short", // HLSLToken_Short
+        "short2",
+        "short3",
+        "short4",
+        
+        "ushort", // HLSLToken_Uhort
+        "ushort2",
+        "ushort3",
+        "ushort4",
+        
+        // TODO: double, u/char
+        
         "texture",
+        
         "sampler",
         "sampler2D",
         "sampler3D",
@@ -51,6 +71,7 @@ static const char* _reservedWords[] =
         "sampler2DShadow",
         "sampler2DMS",
         "sampler2DArray",
+        
         "if",
         "else",
         "for",
@@ -73,6 +94,8 @@ static const char* _reservedWords[] =
         "in",
         "out",
         "inout",
+        
+        // these are from fx file
         "sampler_state",
         "technique",
         "pass",
@@ -427,7 +450,7 @@ bool HLSLTokenizer::ScanNumber()
     else if (iEnd > m_buffer && GetIsNumberSeparator(iEnd[0]))
     {
         m_buffer = iEnd;
-        m_token  = HLSLToken_IntLiteral;
+        m_token  = HLSLToken_IntLiteral; // TODO: uint/short/ushort
         m_iValue = iValue;
         return true;
     }
@@ -615,6 +638,7 @@ void HLSLTokenizer::GetTokenName(char buffer[s_maxIdentifier]) const
     {
         snprintf(buffer, s_maxIdentifier, "%d", m_iValue);
     }
+    // TODO: short/ushort/uint
     else if (m_token == HLSLToken_Identifier)
     {
         strlcpy(buffer, m_identifier, s_maxIdentifier); // TODO: Alec, put in alt for Win
@@ -659,7 +683,7 @@ void HLSLTokenizer::GetTokenName(int token, char buffer[s_maxIdentifier])
             strcpy(buffer, "/=");
             break;
         
-        // literals (need uint?)
+        // literals
 		case HLSLToken_HalfLiteral:
 			strcpy( buffer, "half" );
 			break;
@@ -669,6 +693,7 @@ void HLSLTokenizer::GetTokenName(int token, char buffer[s_maxIdentifier])
         case HLSLToken_IntLiteral:
             strcpy(buffer, "int");
             break;
+        // TODO: need uint, short, ushort
                 
         case HLSLToken_Identifier:
             strcpy(buffer, "identifier");
diff --git a/hlslparser/src/HLSLTokenizer.h b/hlslparser/src/HLSLTokenizer.h
index 22604c81..2a7b18d5 100644
--- a/hlslparser/src/HLSLTokenizer.h
+++ b/hlslparser/src/HLSLTokenizer.h
@@ -9,37 +9,58 @@ namespace M4
 valid tokens. */
 enum HLSLToken
 {
+    // The order here must match the order in the _reservedWords
+    
     // Built-in types.
     HLSLToken_Float         = 256,
     HLSLToken_Float2,
     HLSLToken_Float3,
     HLSLToken_Float4,
+    
 	HLSLToken_Float2x2,
     HLSLToken_Float3x3,
     HLSLToken_Float4x4,
-    HLSLToken_Float4x3,
-    HLSLToken_Float4x2,
+    //HLSLToken_Float4x3,
+   // HLSLToken_Float4x2,
+    
     HLSLToken_Half,
     HLSLToken_Half2,
     HLSLToken_Half3,
     HLSLToken_Half4,
+    
 	HLSLToken_Half2x2,
     HLSLToken_Half3x3,
     HLSLToken_Half4x4,
-    HLSLToken_Half4x3,
-    HLSLToken_Half4x2,
+    //HLSLToken_Half4x3,
+    //HLSLToken_Half4x2,
+    
     HLSLToken_Bool,
 	HLSLToken_Bool2,
 	HLSLToken_Bool3,
 	HLSLToken_Bool4,
+    
     HLSLToken_Int,
     HLSLToken_Int2,
     HLSLToken_Int3,
     HLSLToken_Int4,
+    
     HLSLToken_Uint,
     HLSLToken_Uint2,
     HLSLToken_Uint3,
     HLSLToken_Uint4,
+
+    HLSLToken_Short,
+    HLSLToken_Short2,
+    HLSLToken_Short3,
+    HLSLToken_Short4,
+    
+    HLSLToken_Ushort,
+    HLSLToken_Ushort2,
+    HLSLToken_Ushort3,
+    HLSLToken_Ushort4,
+    
+    // TODO: double, u/char
+    
     HLSLToken_Texture,
     HLSLToken_Sampler,
     HLSLToken_Sampler2D,
diff --git a/hlslparser/src/HLSLTree.h b/hlslparser/src/HLSLTree.h
index 1f655c35..61a6dd1b 100644
--- a/hlslparser/src/HLSLTree.h
+++ b/hlslparser/src/HLSLTree.h
@@ -80,8 +80,8 @@ enum HLSLBaseType
     HLSLBaseType_Float4x4,
     
     // TODO: remove
-    HLSLBaseType_Float4x3, // TODO: missing Float3x4
-    HLSLBaseType_Float4x2,
+    //HLSLBaseType_Float4x3, // TODO: missing Float3x4
+    //HLSLBaseType_Float4x2,
     
     HLSLBaseType_Half,
     HLSLBaseType_Half2,
@@ -92,8 +92,8 @@ enum HLSLBaseType
     HLSLBaseType_Half4x4,
     
     // TODO: remove
-    HLSLBaseType_Half4x3, // TODO: missing Half3x4
-    HLSLBaseType_Half4x2,
+    //HLSLBaseType_Half4x3, // TODO: missing Half3x4
+    //HLSLBaseType_Half4x2,
     
     // integer
     HLSLBaseType_Bool,
@@ -109,8 +109,6 @@ enum HLSLBaseType
     HLSLBaseType_Uint3,
     HLSLBaseType_Uint4,
     
-    // @@ Separate dimension from Base type, this is getting out of control.
-    /*
     HLSLBaseType_Short,
     HLSLBaseType_Short2,
     HLSLBaseType_Short3,
@@ -119,7 +117,7 @@ enum HLSLBaseType
     HLSLBaseType_Ushort,
     HLSLBaseType_Ushort2,
     HLSLBaseType_Ushort3,
-    HLSLBaseType_Ushort4,*/
+    HLSLBaseType_Ushort4,
     
     // texture
     HLSLBaseType_Texture,
@@ -152,9 +150,11 @@ enum HLSLBaseType
   
 enum HLSLBinaryOp
 {
-    // math ops
+    // bit ops
     HLSLBinaryOp_And,
     HLSLBinaryOp_Or,
+    
+    // math ops
     HLSLBinaryOp_Add,
     HLSLBinaryOp_Sub,
     HLSLBinaryOp_Mul,
@@ -214,6 +214,12 @@ inline bool IsAssignOp( HLSLBinaryOp op )
         op == HLSLBinaryOp_DivAssign;
 }
 
+inline bool IsBitOp( HLSLBinaryOp op )
+{
+    return op == HLSLBinaryOp_BitAnd ||
+        op == HLSLBinaryOp_BitOr ||
+        op == HLSLBinaryOp_BitXor;
+}
     
 enum HLSLUnaryOp
 {
diff --git a/hlslparser/src/MSLGenerator.cpp b/hlslparser/src/MSLGenerator.cpp
index a25ab9a0..57d232d9 100644
--- a/hlslparser/src/MSLGenerator.cpp
+++ b/hlslparser/src/MSLGenerator.cpp
@@ -1937,6 +1937,8 @@ namespace M4
                 return "attribute(VANormal)";
             if (String_Equal(semantic, "TANGENT"))
                 return "attribute(VATangent)";
+            if (String_Equal(semantic, "BITANGENT"))
+                return "attribute(VABitangent)";
             
             if (String_Equal(semantic, "BLENDINDICES"))
                 return "attribute(VABlendIndices)";
diff --git a/hlslparser/testshaders.xcodeproj/project.pbxproj b/hlslparser/testshaders.xcodeproj/project.pbxproj
new file mode 100644
index 00000000..f29bee04
--- /dev/null
+++ b/hlslparser/testshaders.xcodeproj/project.pbxproj
@@ -0,0 +1,350 @@
+// !$*UTF8*$!
+{
+	archiveVersion = 1;
+	classes = {
+	};
+	objectVersion = 56;
+	objects = {
+
+/* Begin PBXBuildFile section */
+		707D37CF29B9797A00B08D22 /* Skinning.metal in Sources */ = {isa = PBXBuildFile; fileRef = 707D37CC29B9797A00B08D22 /* Skinning.metal */; };
+		707D37D029B9797A00B08D22 /* Skinning.hlsl in Resources */ = {isa = PBXBuildFile; fileRef = 707D37CD29B9797A00B08D22 /* Skinning.hlsl */; };
+		707D37D229B9798600B08D22 /* out in Resources */ = {isa = PBXBuildFile; fileRef = 707D37D129B9798600B08D22 /* out */; };
+		707D37D329B979B900B08D22 /* ShaderHLSL.h in Sources */ = {isa = PBXBuildFile; fileRef = 707D37CE29B9797A00B08D22 /* ShaderHLSL.h */; };
+		707D37D429B979C200B08D22 /* ShaderMSL.h in Sources */ = {isa = PBXBuildFile; fileRef = 707D37CB29B9797A00B08D22 /* ShaderMSL.h */; };
+		707D37DB29B97A0900B08D22 /* buildShaders.sh in Resources */ = {isa = PBXBuildFile; fileRef = 707D37DA29B97A0900B08D22 /* buildShaders.sh */; };
+		707D37DD29B97A5500B08D22 /* ShaderHLSL.h in Sources */ = {isa = PBXBuildFile; fileRef = 707D37D629B979EB00B08D22 /* ShaderHLSL.h */; };
+		707D37DE29B97A5500B08D22 /* Skinning.hlsl in Sources */ = {isa = PBXBuildFile; fileRef = 707D37D729B979EB00B08D22 /* Skinning.hlsl */; };
+		707D37DF29B97A5500B08D22 /* ShaderMSL.h in Sources */ = {isa = PBXBuildFile; fileRef = 707D37D529B979EB00B08D22 /* ShaderMSL.h */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXFileReference section */
+		707D37AE29B9787400B08D22 /* testshaders.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = testshaders.app; sourceTree = BUILT_PRODUCTS_DIR; };
+		707D37CB29B9797A00B08D22 /* ShaderMSL.h */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; fileEncoding = 4; path = ShaderMSL.h; sourceTree = "<group>"; };
+		707D37CC29B9797A00B08D22 /* Skinning.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = Skinning.metal; sourceTree = "<group>"; };
+		707D37CD29B9797A00B08D22 /* Skinning.hlsl */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; fileEncoding = 4; path = Skinning.hlsl; sourceTree = "<group>"; };
+		707D37CE29B9797A00B08D22 /* ShaderHLSL.h */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; fileEncoding = 4; path = ShaderHLSL.h; sourceTree = "<group>"; };
+		707D37D129B9798600B08D22 /* out */ = {isa = PBXFileReference; lastKnownFileType = folder; path = out; sourceTree = "<group>"; };
+		707D37D529B979EB00B08D22 /* ShaderMSL.h */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; path = ShaderMSL.h; sourceTree = "<group>"; };
+		707D37D629B979EB00B08D22 /* ShaderHLSL.h */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; path = ShaderHLSL.h; sourceTree = "<group>"; };
+		707D37D729B979EB00B08D22 /* Skinning.hlsl */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; path = Skinning.hlsl; sourceTree = "<group>"; };
+		707D37DA29B97A0900B08D22 /* buildShaders.sh */ = {isa = PBXFileReference; lastKnownFileType = text.script.sh; path = buildShaders.sh; sourceTree = "<group>"; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+		707D37AB29B9787400B08D22 /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+		707D37A529B9787400B08D22 = {
+			isa = PBXGroup;
+			children = (
+				707D37DA29B97A0900B08D22 /* buildShaders.sh */,
+				707D37D829B979EB00B08D22 /* shaders */,
+				707D37CA29B9797A00B08D22 /* outshaders */,
+				707D37D129B9798600B08D22 /* out */,
+				707D37AF29B9787400B08D22 /* Products */,
+			);
+			sourceTree = "<group>";
+		};
+		707D37AF29B9787400B08D22 /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				707D37AE29B9787400B08D22 /* testshaders.app */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
+		707D37CA29B9797A00B08D22 /* outshaders */ = {
+			isa = PBXGroup;
+			children = (
+				707D37CE29B9797A00B08D22 /* ShaderHLSL.h */,
+				707D37CB29B9797A00B08D22 /* ShaderMSL.h */,
+				707D37CC29B9797A00B08D22 /* Skinning.metal */,
+				707D37CD29B9797A00B08D22 /* Skinning.hlsl */,
+			);
+			path = outshaders;
+			sourceTree = "<group>";
+		};
+		707D37D829B979EB00B08D22 /* shaders */ = {
+			isa = PBXGroup;
+			children = (
+				707D37D529B979EB00B08D22 /* ShaderMSL.h */,
+				707D37D629B979EB00B08D22 /* ShaderHLSL.h */,
+				707D37D729B979EB00B08D22 /* Skinning.hlsl */,
+			);
+			path = shaders;
+			sourceTree = "<group>";
+		};
+/* End PBXGroup section */
+
+/* Begin PBXNativeTarget section */
+		707D37AD29B9787400B08D22 /* testshaders */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 707D37C529B9787500B08D22 /* Build configuration list for PBXNativeTarget "testshaders" */;
+			buildPhases = (
+				707D37AA29B9787400B08D22 /* Sources */,
+				707D37AB29B9787400B08D22 /* Frameworks */,
+				707D37AC29B9787400B08D22 /* Resources */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = testshaders;
+			productName = testshaders;
+			productReference = 707D37AE29B9787400B08D22 /* testshaders.app */;
+			productType = "com.apple.product-type.application";
+		};
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+		707D37A629B9787400B08D22 /* Project object */ = {
+			isa = PBXProject;
+			attributes = {
+				BuildIndependentTargetsInParallel = 1;
+				LastUpgradeCheck = 1410;
+				TargetAttributes = {
+					707D37AD29B9787400B08D22 = {
+						CreatedOnToolsVersion = 14.1;
+					};
+				};
+			};
+			buildConfigurationList = 707D37A929B9787400B08D22 /* Build configuration list for PBXProject "testshaders" */;
+			compatibilityVersion = "Xcode 14.0";
+			developmentRegion = en;
+			hasScannedForEncodings = 0;
+			knownRegions = (
+				en,
+				Base,
+			);
+			mainGroup = 707D37A529B9787400B08D22;
+			productRefGroup = 707D37AF29B9787400B08D22 /* Products */;
+			projectDirPath = "";
+			projectRoot = "";
+			targets = (
+				707D37AD29B9787400B08D22 /* testshaders */,
+			);
+		};
+/* End PBXProject section */
+
+/* Begin PBXResourcesBuildPhase section */
+		707D37AC29B9787400B08D22 /* Resources */ = {
+			isa = PBXResourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				707D37D029B9797A00B08D22 /* Skinning.hlsl in Resources */,
+				707D37DB29B97A0900B08D22 /* buildShaders.sh in Resources */,
+				707D37D229B9798600B08D22 /* out in Resources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXResourcesBuildPhase section */
+
+/* Begin PBXSourcesBuildPhase section */
+		707D37AA29B9787400B08D22 /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				707D37DF29B97A5500B08D22 /* ShaderMSL.h in Sources */,
+				707D37DD29B97A5500B08D22 /* ShaderHLSL.h in Sources */,
+				707D37D429B979C200B08D22 /* ShaderMSL.h in Sources */,
+				707D37D329B979B900B08D22 /* ShaderHLSL.h in Sources */,
+				707D37CF29B9797A00B08D22 /* Skinning.metal in Sources */,
+				707D37DE29B97A5500B08D22 /* Skinning.hlsl in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXSourcesBuildPhase section */
+
+/* Begin XCBuildConfiguration section */
+		707D37C329B9787500B08D22 /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_ANALYZER_NONNULL = YES;
+				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
+				CLANG_ENABLE_MODULES = YES;
+				CLANG_ENABLE_OBJC_ARC = YES;
+				CLANG_ENABLE_OBJC_WEAK = YES;
+				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_COMMA = YES;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INFINITE_RECURSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
+				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+				CLANG_WARN_STRICT_PROTOTYPES = YES;
+				CLANG_WARN_SUSPICIOUS_MOVE = YES;
+				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
+				CLANG_WARN_UNREACHABLE_CODE = YES;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				COPY_PHASE_STRIP = NO;
+				DEBUG_INFORMATION_FORMAT = dwarf;
+				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				ENABLE_TESTABILITY = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu11;
+				GCC_DYNAMIC_NO_PIC = NO;
+				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_OPTIMIZATION_LEVEL = 0;
+				GCC_PREPROCESSOR_DEFINITIONS = (
+					"DEBUG=1",
+					"$(inherited)",
+				);
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				MACOSX_DEPLOYMENT_TARGET = 13.0;
+				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
+				MTL_FAST_MATH = YES;
+				ONLY_ACTIVE_ARCH = YES;
+				SDKROOT = macosx;
+			};
+			name = Debug;
+		};
+		707D37C429B9787500B08D22 /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_ANALYZER_NONNULL = YES;
+				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
+				CLANG_ENABLE_MODULES = YES;
+				CLANG_ENABLE_OBJC_ARC = YES;
+				CLANG_ENABLE_OBJC_WEAK = YES;
+				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_COMMA = YES;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INFINITE_RECURSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
+				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+				CLANG_WARN_STRICT_PROTOTYPES = YES;
+				CLANG_WARN_SUSPICIOUS_MOVE = YES;
+				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
+				CLANG_WARN_UNREACHABLE_CODE = YES;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				COPY_PHASE_STRIP = NO;
+				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+				ENABLE_NS_ASSERTIONS = NO;
+				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu11;
+				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				MACOSX_DEPLOYMENT_TARGET = 13.0;
+				MTL_ENABLE_DEBUG_INFO = NO;
+				MTL_FAST_MATH = YES;
+				SDKROOT = macosx;
+			};
+			name = Release;
+		};
+		707D37C629B9787500B08D22 /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
+				ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
+				CODE_SIGN_ENTITLEMENTS = testshaders/testshaders.entitlements;
+				CODE_SIGN_STYLE = Automatic;
+				COMBINE_HIDPI_IMAGES = YES;
+				CURRENT_PROJECT_VERSION = 1;
+				GENERATE_INFOPLIST_FILE = YES;
+				INFOPLIST_FILE = testshaders/Info.plist;
+				INFOPLIST_KEY_NSHumanReadableCopyright = "";
+				INFOPLIST_KEY_NSMainStoryboardFile = Main;
+				INFOPLIST_KEY_NSPrincipalClass = NSApplication;
+				LD_RUNPATH_SEARCH_PATHS = (
+					"$(inherited)",
+					"@executable_path/../Frameworks",
+				);
+				MARKETING_VERSION = 1.0;
+				PRODUCT_BUNDLE_IDENTIFIER = com.ba.testshaders;
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				SWIFT_EMIT_LOC_STRINGS = YES;
+			};
+			name = Debug;
+		};
+		707D37C729B9787500B08D22 /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
+				ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
+				CODE_SIGN_ENTITLEMENTS = testshaders/testshaders.entitlements;
+				CODE_SIGN_STYLE = Automatic;
+				COMBINE_HIDPI_IMAGES = YES;
+				CURRENT_PROJECT_VERSION = 1;
+				GENERATE_INFOPLIST_FILE = YES;
+				INFOPLIST_FILE = testshaders/Info.plist;
+				INFOPLIST_KEY_NSHumanReadableCopyright = "";
+				INFOPLIST_KEY_NSMainStoryboardFile = Main;
+				INFOPLIST_KEY_NSPrincipalClass = NSApplication;
+				LD_RUNPATH_SEARCH_PATHS = (
+					"$(inherited)",
+					"@executable_path/../Frameworks",
+				);
+				MARKETING_VERSION = 1.0;
+				PRODUCT_BUNDLE_IDENTIFIER = com.ba.testshaders;
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				SWIFT_EMIT_LOC_STRINGS = YES;
+			};
+			name = Release;
+		};
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+		707D37A929B9787400B08D22 /* Build configuration list for PBXProject "testshaders" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				707D37C329B9787500B08D22 /* Debug */,
+				707D37C429B9787500B08D22 /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		707D37C529B9787500B08D22 /* Build configuration list for PBXNativeTarget "testshaders" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				707D37C629B9787500B08D22 /* Debug */,
+				707D37C729B9787500B08D22 /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+/* End XCConfigurationList section */
+	};
+	rootObject = 707D37A629B9787400B08D22 /* Project object */;
+}

From 3ac9efe7f18d5acece4277dfa19c81c45143a4ad Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 9 Mar 2023 21:24:26 -0800
Subject: [PATCH 450/901] kram - split texture/sampler, big fix to --/++
 operators breaking parsing, added testshaders project

Can tell this code bitrotted without any tests to run on it.
Dropped in new Sample.hlsl file from Microsoft to test the parsing.  This is a more involved HLSL file that now converts.
This weeded out a bug in HLSLTokenizer with || instead of && usage on the --/++ ops that completely broke any math use a - b would be misinterpreted as a --.

Got rid of all the old DX9 style constructs.  Textures now define type instead of sampler.   Updated conversion headers.
Added testshaders project that just runs buildShaders.sh script.  Errors are clickable from Xcode.
Use canonical path for input/output files, so that testshaders project jumps to errors.

Added TextureCubeArray.
TODO: Still more intrinsics to add for all the texture related calls.
TODO: Wish I could call these as member functions off texture, but for now are called like C calls, where texture is the first argument.
---
 hlslparser/README.md                          |   4 +-
 hlslparser/buildShaders.sh                    |  73 +++--
 hlslparser/shaders/Sample.hlsl                | 197 +++++++++++
 hlslparser/shaders/ShaderHLSL.h               | 307 ++++++++++--------
 hlslparser/shaders/ShaderMSL.h                | 214 ++++++------
 hlslparser/shaders/Skinning.hlsl              |  34 +-
 hlslparser/src/HLSLGenerator.cpp              | 110 ++++---
 hlslparser/src/HLSLParser.cpp                 | 164 ++++++----
 hlslparser/src/HLSLTokenizer.cpp              |  18 +-
 hlslparser/src/HLSLTokenizer.h                |  44 ++-
 hlslparser/src/HLSLTree.cpp                   |   8 +-
 hlslparser/src/HLSLTree.h                     |  39 +--
 hlslparser/src/MSLGenerator.cpp               | 150 +++++----
 hlslparser/src/Main.cpp                       |  20 +-
 .../testshaders.xcodeproj/project.pbxproj     | 145 ++-------
 15 files changed, 914 insertions(+), 613 deletions(-)
 create mode 100644 hlslparser/shaders/Sample.hlsl

diff --git a/hlslparser/README.md b/hlslparser/README.md
index 92c2c653..4cc62d36 100644
--- a/hlslparser/README.md
+++ b/hlslparser/README.md
@@ -277,6 +277,8 @@ HLSL
 * HLSL 2021 (latest) can distinguish structs with same types as unique
 * HLSL 2018 (default for DXC) can't tell structs apart if contain same types
 * HLSL added to clang
+* After 30 years, HLSL still does not have u/char support, and only 
+* added u/short support in 6.2.  So pack/unpack needed for SSBO.
 * https://github.com/Microsoft/DirectXShaderCompiler/blob/main/docs/SPIR-V.rst#subpass-inputs
 * https://clang.llvm.org/docs/HLSL/HLSLSupport.html#:~:text=HLSL%20uses%20templates%20to%20define,case%20and%20issues%20a%20diagnostic.
 * https://devblogs.microsoft.com/directx/announcing-hlsl-2021/
@@ -373,7 +375,7 @@ Shading Language Versions
 
 HLSL
 * SM 6.2, target, added back int/half support
-* SM 6.6,
+* SM 6.6, added 8-bit pack ops
 * SM 6.7, SampleCmpLevel, RWTexture2DMS, RWTexture2DMSArray, signed texture offsets
 * https://microsoft.github.io/DirectX-Specs/d3d/HLSL_SM_6_7_Advanced_Texture_Ops.html
 
diff --git a/hlslparser/buildShaders.sh b/hlslparser/buildShaders.sh
index c8361a38..c2bc2cea 100755
--- a/hlslparser/buildShaders.sh
+++ b/hlslparser/buildShaders.sh
@@ -20,7 +20,18 @@ pushd outshaders
 # note bash can't expand tilda, so using HOME instead
 vulkanSDK="${HOME}/devref/vulkansdk/1.3.239.0/macOS/bin/"
 
+projectDir="${HOME}/devref/kram/hlslparser/"
+
+srcDir=${projectDir}
+srcDir+="shaders/"
+
+dstDir=${projectDir}
+dstDir+="outshaders/"
+
+
+# this only pulls the release build, so testing debug won't update
 appHlslparser=../build/hlslparser/Build/Products/Release/hlslparser
+
 appDxc=${vulkanSDK}
 appGlslc=${vulkanSDK}
 appSprivReflect=${vulkanSDK}
@@ -28,11 +39,15 @@ appDxc+="dxc"
 appGlslc+="glslc"
 appSprivReflect+="spirv-reflect"
 
+# Xcode will only do clickthrough to warnings/errors if the filename
+# is a full path.  That's super annoying.
+
 #-------------------------------
 
 # copy over the headers that translate to MSL/HLSL
-cp ../shaders/ShaderMSL.h .
-cp ../shaders/ShaderHLSL.h .
+# TODO: move to outshaders, so when there are errors can clickthough to orignal files
+cp ${srcDir}/ShaderMSL.h .
+cp ${srcDir}/ShaderHLSL.h .
 
 parserOptions=""
 
@@ -41,11 +56,13 @@ parserOptions+="-g "
 
 # build the metal shaders
 echo gen MSL
-${appHlslparser} ${parserOptions} -i ../shaders/Skinning.hlsl -o Skinning.metal
+${appHlslparser} ${parserOptions} -i ${srcDir}Skinning.hlsl -o Skinning.metal
+${appHlslparser} ${parserOptions} -i ${srcDir}Sample.hlsl -o Sample.metal
 
 # build the hlsl shaders
 echo gen HLSL
-${appHlslparser} ${parserOptions} -i ../shaders/Skinning.hlsl -o Skinning.hlsl
+${appHlslparser} ${parserOptions} -i ${srcDir}Skinning.hlsl -o Skinning.hlsl
+${appHlslparser} ${parserOptions} -i ${srcDir}Sample.hlsl -o Sample.hlsl
 
 #-------------------------------
 
@@ -55,28 +72,35 @@ pushd out
 
 #-------------------------------
 
-# Metal is C++14
+testMetal=0
+
+if [[ $testMetal -eq 1 ]]; then
+    # Metal is C++14
 
-# see if HLSL compiles to MSL (requires macOS Vulkan install)
+    # see if HLSL compiles to MSL (requires macOS Vulkan install)
 
-# record sources into code for gpu capture (don't ship this), debug mode
-metalMacOptions="-frecord-sources -g "
+    # record sources into code for gpu capture (don't ship this), debug mode
+    metalMacOptions="-frecord-sources -g "
 
-# O2 + size opt
-# metalMacOptions+="-Os"
+    # O2 + size opt
+    # metalMacOptions+="-Os"
 
-# TODO: metal3.0 on M1 macOS13/iOS16
-metalMacOptions+="-std=macos-metal2.3 "
+    # TODO: metal3.0 on M1 macOS13/iOS16
+    metalMacOptions+="-std=macos-metal2.3 "
 
-# see if MSL compile
-echo compile MSL for macOS
-xcrun -sdk macosx metal ../outshaders/Skinning.metal ${metalMacOptions} -o mac/Skinning.metallib
+    # TODO: build to air, and then compile to single metallib and metallibdsym
+    # see if MSL compile
+    echo compile MSL for macOS
+    xcrun -sdk macosx metal ${dstDir}Skinning.metal ${metalMacOptions} -o mac/Skinning.metallib
 
-# metaliosOptions="-frecord-sources -g "
-# metaliosOptions+="-std=ios-metal2.3 "
+    xcrun -sdk macosx metal ${dstDir}Sample.metal ${metalMacOptions} -o mac/Sample.metallib
 
-#echo compile MSL for iOS
-#xcrun -sdk macosx metal ../outshaders/Skinning.metal ${metaliosOptions} -o ios/Skinning.metallib
+    # metaliosOptions="-frecord-sources -g "
+    # metaliosOptions+="-std=ios-metal2.3 "
+
+    #echo compile MSL for iOS
+    #xcrun -sdk macosx metal ${dstDir}Skinning.metal ${metaliosOptions} -o ios/Skinning.metallib
+fi
 
 #-------------------------------
 
@@ -103,12 +127,13 @@ args+="-HV 2021 "
  
 args+="-fspv-extension=SPV_KHR_shader_draw_parameters "
 
-# have to also compile to 6.2
+# have to also compile to 6.2 for u/short and half
+# then 6.6 adds u/char8 pack/unpack calls
 vsargs=${args}
-vsargs+="-T vs_6_2 "
+vsargs+="-T vs_6_6 "
 
 psargs=${args}
-psargs+="-T ps_6_2 "
+psargs+="-T ps_6_6 "
 
 #echo ${vsargs}
 #echo ${psargs}
@@ -135,8 +160,8 @@ psargs+="-T ps_6_2 "
 
 # 1.0,1.1,1.2 default to spv1.1,1.3,1.5
 echo gen SPIRV 1.2 with dxc
-${appDxc} ${vsargs} -spirv -fspv-target-env=vulkan1.2 -E SkinningVS -Fo android/Skinning.vert.spv -Fc android/Skinning.vert.spv.txt ../outshaders/Skinning.hlsl
-${appDxc} ${psargs} -spirv -fspv-target-env=vulkan1.2 -E SkinningPS -Fo android/Skinning.frag.spv -Fc android/Skinning.frag.spv.txt ../outshaders/Skinning.hlsl
+${appDxc} ${vsargs} -spirv -fspv-target-env=vulkan1.2 -E SkinningVS -Fo android/Skinning.vert.spv -Fc android/Skinning.vert.spv.txt ${dstDir}Skinning.hlsl
+${appDxc} ${psargs} -spirv -fspv-target-env=vulkan1.2 -E SkinningPS -Fo android/Skinning.frag.spv -Fc android/Skinning.frag.spv.txt ${dstDir}Skinning.hlsl
 
 # -Fre not supported with spriv, so just use spirv-reflect
 # either yaml or random format, why can't this just output json?
diff --git a/hlslparser/shaders/Sample.hlsl b/hlslparser/shaders/Sample.hlsl
new file mode 100644
index 00000000..3fe9733f
--- /dev/null
+++ b/hlslparser/shaders/Sample.hlsl
@@ -0,0 +1,197 @@
+//*********************************************************
+//
+// Copyright (c) Microsoft. All rights reserved.
+// This code is licensed under the MIT License (MIT).
+// THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY
+// IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR
+// PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT.
+//
+//*********************************************************
+
+// from here https://github.com/microsoft/DirectX-Graphics-Samples/blob/master/Samples/Desktop/D3D12Multithreading/src/shaders.hlsl
+
+Texture2D<float4> shadowMap : register(t0);
+Texture2D<float4> diffuseMap : register(t1);
+Texture2D<float4> normalMap : register(t2);
+
+SamplerState sampleWrap : register(s0);
+SamplerState sampleClamp : register(s1);
+
+// #define didn't compile due to lack of preprocesor
+static const int NUM_LIGHTS = 3;
+static const float SHADOW_DEPTH_BIAS = 0.00005;
+
+struct LightState
+{
+    float3 position;
+    float3 direction;
+    float4 color;
+    float4 falloff;
+    float4x4 view;
+    float4x4 projection;
+};
+
+cbuffer SceneConstantBuffer : register(b0)
+{
+    float4x4 model;
+    float4x4 view;
+    float4x4 projection;
+    float4 ambientColor;
+    bool sampleShadowMap;
+    LightState lights[NUM_LIGHTS];
+};
+
+struct InputVS
+{
+    float3 position : SV_Position;
+    float3 normal : NORMAL;
+    float2 uv : TEXCOORD0;
+    float3 tangent : TANGENT;
+};
+
+struct OutputVS
+{
+    float4 position : SV_Position;
+    float4 worldpos : TEXCOORD0;
+    float2 uv : TEXCOORD1;
+    float3 normal : NORMAL;
+    float3 tangent : TANGENT;
+};
+
+
+struct InputPS
+{
+    float4 position : SV_Position;
+    float4 worldpos : TEXCOORD0;
+    float2 uv : TEXCOORD1;
+    float3 normal : NORMAL;
+    float3 tangent : TANGENT;
+};
+
+
+//--------------------------------------------------------------------------------------
+// Sample normal map, convert to signed, apply tangent-to-world space transform.
+//--------------------------------------------------------------------------------------
+float3 CalcPerPixelNormal(float2 vTexcoord, float3 vVertNormal, float3 vVertTangent)
+{
+    // Compute tangent frame.
+    vVertNormal = normalize(vVertNormal);
+    vVertTangent = normalize(vVertTangent);
+
+    float3 vVertBinormal = normalize(cross(vVertTangent, vVertNormal));
+    float3x3 mTangentSpaceToWorldSpace = float3x3(vVertTangent, vVertBinormal, vVertNormal);
+
+    // Compute per-pixel normal.
+    float3 vBumpNormal = Sample(normalMap, sampleWrap, vTexcoord).xyz;
+    vBumpNormal = 2.0 * vBumpNormal - 1.0;
+
+    return mul(vBumpNormal, mTangentSpaceToWorldSpace);
+}
+
+//--------------------------------------------------------------------------------------
+// Diffuse lighting calculation, with angle and distance falloff.
+//--------------------------------------------------------------------------------------
+float4 CalcLightingColor(float3 vLightPos, float3 vLightDir, float4 vLightColor, float4 vFalloffs, float3 vPosWorld, float3 vPerPixelNormal)
+{
+    float3 vLightToPixelUnNormalized = vPosWorld - vLightPos;
+
+    // Dist falloff = 0 at vFalloffs.x, 1 at vFalloffs.x - vFalloffs.y
+    float fDist = length(vLightToPixelUnNormalized);
+
+    float fDistFalloff = saturate((vFalloffs.x - fDist) / vFalloffs.y);
+
+    // Normalize from here on.
+    float3 vLightToPixelNormalized = vLightToPixelUnNormalized / fDist;
+
+    // Angle falloff = 0 at vFalloffs.z, 1 at vFalloffs.z - vFalloffs.w
+    float fCosAngle = dot(vLightToPixelNormalized, vLightDir / length(vLightDir));
+    float fAngleFalloff = saturate((fCosAngle - vFalloffs.z) / vFalloffs.w);
+
+    // Diffuse contribution.
+    float fNDotL = saturate(-dot(vLightToPixelNormalized, vPerPixelNormal));
+
+    return vLightColor * fNDotL * fDistFalloff * fAngleFalloff;
+}
+
+//--------------------------------------------------------------------------------------
+// Test how much pixel is in shadow, using 2x2 percentage-closer filtering.
+//--------------------------------------------------------------------------------------
+float4 CalcUnshadowedAmountPCF2x2(int lightIndex, float4 vPosWorld)
+{
+    // Compute pixel position in light space.
+    float4 vLightSpacePos = vPosWorld;
+    vLightSpacePos = mul(vLightSpacePos, lights[lightIndex].view);
+    vLightSpacePos = mul(vLightSpacePos, lights[lightIndex].projection);
+
+    vLightSpacePos.xyz /= vLightSpacePos.w;
+
+    // Translate from homogeneous coords to texture coords.
+    float2 vShadowTexCoord = 0.5 * vLightSpacePos.xy + 0.5;
+    vShadowTexCoord.y = 1.0 - vShadowTexCoord.y;
+
+    // Depth bias to avoid pixel self-shadowing.
+    float vLightSpaceDepth = vLightSpacePos.z - SHADOW_DEPTH_BIAS;
+
+    // Find sub-pixel weights.
+    float2 vShadowMapDims = float2(1280.0, 720.0); // need to keep in sync with .cpp file
+    float4 vSubPixelCoords = float4(1.0, 1.0, 1.0, 1.0);
+    vSubPixelCoords.xy = frac(vShadowMapDims * vShadowTexCoord);
+    vSubPixelCoords.zw = 1.0 - vSubPixelCoords.xy;
+    float4 vBilinearWeights = vSubPixelCoords.zxzx * vSubPixelCoords.wwyy;
+
+    // 2x2 percentage closer filtering.
+    float2 vTexelUnits = 1.0 / vShadowMapDims;
+    float4 vShadowDepths;
+    vShadowDepths.x = Sample(shadowMap, sampleClamp, vShadowTexCoord).x;
+    vShadowDepths.y = Sample(shadowMap, sampleClamp, vShadowTexCoord + float2(vTexelUnits.x, 0.0)).x;
+    vShadowDepths.z = Sample(shadowMap, sampleClamp, vShadowTexCoord + float2(0.0, vTexelUnits.y)).x;
+    vShadowDepths.w = Sample(shadowMap, sampleClamp, vShadowTexCoord + vTexelUnits).x;
+
+    // What weighted fraction of the 4 samples are nearer to the light than this pixel?
+    float4 vShadowTests = (vShadowDepths >= vLightSpaceDepth);
+    return dot(vBilinearWeights, vShadowTests);
+}
+
+OutputVS SampleVS(InputVS input)
+{
+    OutputVS output;
+
+    float4 newPosition = float4(input.position, 1.0);
+
+    input.normal.z *= -1.0;
+    newPosition = mul(newPosition, model);
+
+    output.worldpos = newPosition;
+
+    newPosition = mul(newPosition, view);
+    newPosition = mul(newPosition, projection);
+
+    output.position = newPosition;
+    output.uv = input.uv;
+    output.normal = input.normal;
+    output.tangent = input.tangent;
+
+    return output;
+}
+
+float4 SamplePS(InputPS input) : SV_Target0
+{
+    float4 diffuseColor = Sample(diffuseMap, sampleWrap, input.uv);
+    float3 pixelNormal = CalcPerPixelNormal(input.uv, input.normal, input.tangent);
+    float4 totalLight = ambientColor;
+
+    for (int i = 0; i < NUM_LIGHTS; i++)
+    {
+        float4 lightPass = CalcLightingColor(lights[i].position, lights[i].direction, lights[i].color, lights[i].falloff, input.worldpos.xyz, pixelNormal);
+        if (sampleShadowMap && i == 0)
+        {
+            lightPass *= CalcUnshadowedAmountPCF2x2(i, input.worldpos);
+        }
+        totalLight += lightPass;
+    }
+
+    return diffuseColor * saturate(totalLight);
+}
+
+
diff --git a/hlslparser/shaders/ShaderHLSL.h b/hlslparser/shaders/ShaderHLSL.h
index d94a3234..7f1ed9d4 100644
--- a/hlslparser/shaders/ShaderHLSL.h
+++ b/hlslparser/shaders/ShaderHLSL.h
@@ -28,220 +28,271 @@ typedef uint16_t4 ushort4;
 //typedef int64_t2 long2;
 //typedef int64_t3 long3;
 //typedef int64_t4 long4;
+//
 //typedef uint64_t2 ulong2;
 //typedef uint64_t3 ulong3;
 //typedef uint64_t4 ulong4;
+//
 //typedef float64_t2 double2;
 //typedef float64_t3 double3;
 //typedef float64_t4 double4;
 
-#define USE_HALF 1
+// compile to SM6.6 for these
+typedef uint8_t4_packed uchar4_packed;
+typedef int8_t4_packed char4_packed;
+
+// signed do sign extend
+ushort4 toUshort4(uchar4_packed packed)
+{
+    return unpack_u8u16(packed);
+}
+short4 toShort4(char4_packed packed)
+{
+    return unpack_s8s16(packed);
+}
+uint4 toUint4(uchar4_packed packed)
+{
+    return unpack_u8u32(packed);
+}
+int4 toInt4(char4_packed packed)
+{
+    return unpack_s8s32(packed);
+}
+
+// Are SM6.6 calls for pack_clamp_u8 using the wrong input type?
+// https://github.com/microsoft/DirectXShaderCompiler/issues/5091
+// pack lower 8
+uchar4_packed fromUshort4(ushort4 v)
+{
+    return pack_u8(v);
+}
+uchar4_packed fromShort4ClampU(short4 v) 
+{
+    return pack_clamp_u8(v);
+}
+char4_packed fromShort4(short4 v, bool clamp = true)
+{
+    return  clamp ? pack_clamp_s8(v) : pack_s8(v);
+}
+uchar4_packed fromUint4(uint4 v)
+{
+    return pack_u8(v);
+}
+uchar4_packed fromInt4ClampU(int4 v)
+{
+    return pack_clamp_u8(v);
+}
+char4_packed fromInt4(int4 v, bool clamp = true)
+{
+    return clamp ? pack_clamp_s8(v) : pack_s8(v);
+}
+
 
-// TODO: this only supports half on Texture2D
+#define USE_HALF 1
 
 // Don't know why HLSL doesn't support these
 #define min3(x,y,z) min(x, min(y, z))
 #define max3(x,y,z) max(x, max(y, z))
 #define length_squared(x) ((x)*(x))
 
-struct Texture2DSampler {
-    Texture2D<float4> t;
-    SamplerState s;
-};
+// TODO: fix parsing, so don't have to provide these overrides
+// The parser also has to know about all these
 
-#if USE_HALF
-// unique type, even though same data
-struct Texture2DHalfSampler {
-    Texture2D<float4> t; // TODO: should be Texture2D<half4> but spirv limit
-    SamplerState s;
-};
+//----------
 
-#endif
+float4 Sample(Texture2D<float4> t, SamplerState s, float2 texCoord, int2 offset = 0)
+{
+    return t.Sample(s, texCoord, offset);
+}
 
-struct Texture3DSampler {
-    Texture3D<float4> t;
-    SamplerState s;
-};
+float4 Sample(Texture2DArray<float4> t, SamplerState s, float3 texCoord, int2 offset = 0)
+{
+    return t.Sample(s, texCoord, offset);
+}
 
-struct TextureCubeSampler {
-    TextureCube<float4> t;
-    SamplerState s;
-};
+float4 Sample(Texture3D<float4> t, SamplerState s, float3 texCoord, int3 offset = 0)
+{
+    return t.Sample(s, texCoord, offset);
+}
 
-struct Texture2DShadowSampler {
-    Texture2D<float4> t;
-    SamplerComparisonState s;
-};
+// no offset
+float4 Sample(TextureCube<float4> t, SamplerState s, float3 texCoord)
+{
+    return t.Sample(s, texCoord);
+}
 
-struct Texture2DArraySampler {
-    Texture2DArray<float4> t;
-    SamplerState s;
-};
+float4 Sample(TextureCubeArray<float4> t, SamplerState s, float4 texCoord)
+{
+    return t.Sample(s, texCoord);
+}
 
-//------------------------------
 
-// https://colinbarrebrisebois.com/2021/11/01/working-around-constructors-in-hlsl-or-lack-thereof/
-// Stupid HLSL lacks ctors.  Ugh!
-// Can't simplify to "return { t, a };" either.
+//----------
 
-Texture2DSampler Texture2DSamplerCtor(Texture2D<float4> t, SamplerState s)
+float4 SampleLevel(Texture2D<float4> t, SamplerState s, float4 texCoord, int2 offset = 0)
 {
-    Texture2DSampler a = { t, s };
-    return a;
+    return t.SampleLevel(s, texCoord.xy, texCoord.w, offset);
 }
-
-#if USE_HALF
-
-// This should take Texture2D<half4> but see
-// https://github.com/microsoft/DirectXShaderCompiler/issues/2711
-Texture2DHalfSampler Texture2DHalfSamplerCtor(Texture2D<float4> t, SamplerState s)
+float4 SampleLevel(Texture2DArray t, SamplerState s, float4 texCoord, int2 offset = 0)
 {
-    Texture2DHalfSampler a = { t, s };
-    return a;
+    return t.SampleLevel(s, texCoord.xyz, texCoord.w, offset);
 }
-#endif
 
-Texture3DSampler Texture3DSamplerCtor(Texture3D<float4> t, SamplerState s)
+float4 SampleLevel(Texture3D t, SamplerState s, float4 texCoord, int3 offset = 0)
 {
-    Texture3DSampler a = { t, s };
-    return a;
+    return t.SampleLevel(s, texCoord.xyz, texCoord.w, offset);
 }
 
-TextureCubeSampler TextureCubeSamplerCtor(TextureCube<float4> t, SamplerState s)
+// no offset support
+float4 SampleLevel(TextureCube t, SamplerState s, float4 texCoord)
 {
-    TextureCubeSampler a = { t, s };
-    return a;
+    return t.SampleLevel(s, texCoord.xyz, texCoord.w);
 }
 
-Texture2DShadowSampler Texture2DShadowSamplerCtor(Texture2D<float4> t, SamplerComparisonState s)
+// this would need more args for level
+//float4 SampleLevel(TextureCubeArray t, SamplerState s, float4 texCoord)
+//{
+//    return t.SampleLevel(s, texCoord.xyz, texCoord.w);
+//}
+
+
+//----------
+
+float4 SampleBias(Texture2D<float4> t, SamplerState s, float4 texCoord)
 {
-    Texture2DShadowSampler a = { t, s };
-    return a;
+    return t.SampleBias(s, texCoord.xy, texCoord.w);
 }
 
-Texture2DArraySampler Texture2DArraySamplerCtor(Texture2DArray<float4> t, SamplerState s)
+float4 SampleBias(TextureCube<float4> t, SamplerState s, float4 texCoord)
 {
-    Texture2DArraySampler a = { t, s };
-    return a;
+    return t.SampleBias(s, texCoord.xyz, texCoord.w);
 }
 
-//------------------------------
+//----------
 
-float4 tex2D(Texture2DSampler ts, float2 texCoord) {
-    return ts.t.Sample(ts.s, texCoord);
+float4 SampleGrad(Texture2D<float4> t, SamplerState s, float2 texCoord, float2 gradx, float2 grady)
+{
+   return t.SampleGrad(s, texCoord.xy, gradx, grady);
 }
 
-// gather only works on mip0
-float4 tex2DgatherRed(Texture2DSampler ts, float2 texCoord, int2 offset=0) {
-    return ts.t.GatherRed(ts.s, texCoord, offset);
-}
+//----------
 
-float4 tex2DgatherGreen(Texture2DSampler ts, float2 texCoord, int2 offset=0) {
-    return ts.t.GatherGreen(ts.s, texCoord, offset);
+// For persp shadows, remember to divide z = z/w before calling, or w = z/w on cube
+float4 SampleCmp(Texture2D t, SamplerComparisonState s, float4 texCoord, int2 offset = 0)
+{
+    return t.SampleCmp(s, texCoord.xy, texCoord.z, offset);
 }
 
-float4 tex2DgatherBlue(Texture2DSampler ts, float2 texCoord, int2 offset=0) {
-    return ts.t.GatherBlue(ts.s, texCoord, offset);
+float4 SampleCmp(Texture2DArray t, SamplerComparisonState s, float4 texCoord, int2 offset = 0)
+{
+    return t.SampleCmp(s, texCoord.xy, texCoord.z, offset);
 }
 
-float4 tex2DgatherAlpha(Texture2DSampler ts, float2 texCoord, int2 offset=0) {
-    return ts.t.GatherAlpha(ts.s, texCoord, offset);
+// no offset
+float4 SampleCmp(TextureCube t, SamplerComparisonState s, float4 texCoord)
+{
+    return t.SampleCmp(s, texCoord.xyz, texCoord.w);
 }
 
-int2 tex2Dsize(Texture2DSampler ts) {
-    int2 size;
-    ts.t.GetDimensions(size.x, size.y);
-    return size;
+float4 SampleCmp(TextureCubeArray t, SamplerComparisonState s, float4 texCoord)
+{
+    return t.SampleCmp(s, texCoord.xyz, texCoord.w);
 }
 
-int3 tex3Dsize(Texture3DSampler ts) {
-    int3 size;
-    ts.t.GetDimensions(size.x, size.y, size.z);
-    return size;
-}
+//----------
 
-int texCUBEsize(TextureCubeSampler ts) {
-    int size;
-    ts.t.GetDimensions(size, size); // sizexsize
-    return size;
+// this doesn't use SamplerState, raw load
+float4 Load(Texture2DMS<float4> t, int2 texCoord, int sample)
+{
+    return t.Load(texCoord, sample);
 }
 
-// don't use for PCF
-float4 tex2Dproj(Texture2DSampler ts, float4 texCoord) {
-    return ts.t.Sample(ts.s, texCoord.xy / texCoord.w);
-}
+//----------
 
-float4 tex2Dlod(Texture2DSampler ts, float4 texCoord, int2 offset = 0) {
-    return ts.t.SampleLevel(ts.s, texCoord.xy, texCoord.w, offset);
+// gather only works on mip0
+float4 GatherRed(Texture2D<float4> t, SamplerState s, float2 texCoord, int2 offset=0)
+{
+    return t.GatherRed(s, texCoord, offset);
 }
 
-float4 tex2Dbias(Texture2DSampler ts, float4 texCoord) {
-    return ts.t.SampleBias(ts.s, texCoord.xy, texCoord.w);
+float4 GatherGreen(Texture2D<float4> t, SamplerState s, float2 texCoord, int2 offset=0)
+{
+    return t.GatherGreen(s, texCoord, offset);
 }
 
-float4 tex2Dgrad(Texture2DSampler ts, float2 texCoord, float2 gradx, float2 grady) {
-   return ts.t.SampleGrad(ts.s, texCoord.xy, gradx, grady);
+float4 GatherBlue(Texture2D<float4> t, SamplerState s, float2 texCoord, int2 offset=0)
+{
+    return t.GatherBlue(s, texCoord, offset);
 }
 
-// This only applies to mip0
-float4 tex2Dcmp(Texture2DShadowSampler ts, float4 texCoord) {
-    return ts.t.SampleCmpLevelZero(ts.s, texCoord.xy, texCoord.z);
+float4 GatherAlpha(Texture2D<float4> t, SamplerState s, float2 texCoord, int2 offset=0)
+{
+    return t.GatherAlpha(s, texCoord, offset);
 }
 
-// This is int3 returning int2 in HLSL
-//float4 tex2Dfetch(Texture2DSampler ts, int2 texCoord) {
-//    return ts.t.Load(texCoord);
-//}
-
-float4 tex2DMSfetch(Texture2DMS<float4> t, int2 texCoord, int sample) {
-    return t.Load(texCoord, sample);
-}
 
 #if USE_HALF
 
-half4 tex2D(Texture2DHalfSampler ts, float2 texCoord) {
-    return (half4)ts.t.Sample(ts.s, texCoord);
+// Note: HLSL to SPIRV doesn't support half Texture types, so must cast from float4
+// but MSL and HLSL to DXIL can use half Texture type.
+half4 SampleH(Texture2D<float4> t, SamplerState s, float2 texCoord, int2 offset = 0)
+{
+    return (half4)t.Sample(s, texCoord, offset);
 }
 
-half4 tex2Dlod(Texture2DHalfSampler ts, float4 texCoordMip) {
-    return (half4)ts.t.Sample(ts.s, texCoordMip.xy, texCoordMip.w);
+half4 SampleLevelH(Texture2D<float4> t, SamplerState s, float4 texCoordMip, int2 offset = 0)
+{
+    return (half4)t.SampleLevel(s, texCoordMip.xy, texCoordMip.w, offset);
 }
 
-half4 tex2Dbias(Texture2DHalfSampler ts, float4 texCoordBias) {
-    return (half4)ts.t.SampleBias(ts.s, texCoordBias.xy, texCoordBias.w);
+// offset?
+half4 SampleBiasH(Texture2D<half> t, SamplerState s, float4 texCoordBias)
+{
+    return (half4)t.SampleBias(s, texCoordBias.xy, texCoordBias.w);
 }
 
 #else
 
-#define tex2DH tex2D
-#define tex2DHlod tex2Dlod
-#define tex2DHbias tex2Dbias
+// use all float4
+#define SampleH Sample
+#define SampleLevelH SampleLevel
+#define SampleBiasH SampleBias
 
 #endif
 
-float4 tex3D(Texture3DSampler ts, float3 texCoord) {
-    return ts.t.Sample(ts.s, texCoord);
-}
-
-float4 tex3Dlod(Texture3DSampler ts, float4 texCoord) {
-    return ts.t.SampleLevel(ts.s, texCoord.xyz, texCoord.w);
+int2 GetDimensions(Texture2D t)
+{
+    int2 size;
+    t.GetDimensions(size.x, size.y);
+    return size;
 }
 
-float4 texCUBE(TextureCubeSampler ts, float3 texCoord) {
-    return ts.t.Sample(ts.s, texCoord);
+int3 GetDimensions(Texture3D t)
+{
+    int3 size;
+    t.GetDimensions(size.x, size.y, size.z);
+    return size;
 }
 
-float4 texCUBElod(TextureCubeSampler ts, float4 texCoord) {
-    return ts.t.SampleLevel(ts.s, texCoord.xyz, texCoord.w);
+int2 GetDimensions(TextureCube t)
+{
+    int2 size;
+    t.GetDimensions(size.x, size.y); // sizexsize
+    return size;
 }
 
-float4 texCUBEbias(TextureCubeSampler ts, float4 texCoord) {
-    return ts.t.SampleBias(ts.s, texCoord.xyz, texCoord.w);
+int3 GetDimensions(TextureCubeArray t)
+{
+    int3 size;
+    t.GetDimensions(size.x, size.y, size.z); // sizexsize
+    return size;
 }
 
-float4 tex2DArray(Texture2DArraySampler ts, float3 texCoord) {
-    // return ts.t.Sample(ts.s, texCoord.xy, texCoord.z + 0.5); // 0.5 offset needed on nvidia gpus
-    return ts.t.Sample(ts.s, texCoord); // 0.5 offset
+int3 GetDimensions(Texture2DArray t)
+{
+    int3 size;
+    t.GetDimensions(size.x, size.y, size.z);
+    return size;
 }
 
 #endif // ShaderHLSL_h
diff --git a/hlslparser/shaders/ShaderMSL.h b/hlslparser/shaders/ShaderMSL.h
index 393ee905..a4f83f9c 100644
--- a/hlslparser/shaders/ShaderMSL.h
+++ b/hlslparser/shaders/ShaderMSL.h
@@ -115,117 +115,47 @@ void clip(float x) {
     if (x < 0.0) discard_fragment();
 }
 
-struct Texture2DSampler {
-    Texture2DSampler(thread const texture2d<float>& t, thread const sampler& s) : t(t), s(s) {};
-    const thread texture2d<float>& t;
-    const thread sampler& s;
-};
-
-#if USE_HALF
-struct Texture2DHalfSampler {
-    Texture2DHalfSampler(thread const texture2d<half>& t, thread const sampler& s) : t(t), s(s) {};
-    const thread texture2d<half>& t;
-    const thread sampler& s;
-};
-#endif
-
-struct Texture3DSampler {
-    Texture3DSampler(thread const texture3d<float>& t, thread const sampler& s) : t(t), s(s) {};
-    const thread texture3d<float>& t;
-    const thread sampler& s;
-};
-
-struct TextureCubeSampler {
-    TextureCubeSampler(thread const texturecube<float>& t, thread const sampler& s) : t(t), s(s) {};
-    const thread texturecube<float>& t;
-    const thread sampler& s;
-};
-
-struct Texture2DShadowSampler {
-    Texture2DShadowSampler(thread const depth2d<float>& t, thread const sampler& s) : t(t), s(s) {};
-    const thread depth2d<float>& t;
-    const thread sampler& s;
-};
-
-struct Texture2DArraySampler {
-    const thread texture2d_array<float>& t;
-    const thread sampler& s;
-    Texture2DArraySampler(thread const texture2d_array<float>& t, thread const sampler& s) : t(t), s(s) {};
-};
-    
-int2 tex2Dsize(Texture2DSampler ts) {
-    int2 size(ts.t.get_width(), ts.t.get_height());
-    return size;
-}
-
-
-int3 tex3Dsize(Texture3DSampler ts) {
-    int3 size(ts.t.get_width(), ts.t.get_height(), ts.t.get_depth());
-    return size;
-}
 
-int texCUBEsize(TextureCubeSampler ts) {
-    int size(ts.t.get_width());
-    return size;
-}
-    
     
-float4 tex2D(Texture2DSampler ts, float2 texCoord) {
-    return ts.t.sample(ts.s, texCoord);
-}
-
-// don't use for PCF
-//float4 tex2Dproj(Texture2DSampler ts, float4 texCoord) {
-//    return ts.t.sample(ts.s, texCoord.xy / texCoord.w);
-//}
+//---------
 
 // gather only works on mip0
-float4 tex2DgatherRed(Texture2DSampler ts, float2 texCoord, int2 offset=0) {
-    return ts.t.gather(ts.s, texCoord, offset, component::x); // TODO: int to component
+float4 GatherRed(texture2d<float> t, sampler s, float2 texCoord, int2 offset=0) {
+    return t.gather(s, texCoord, offset, component::x);
 }
   
-float4 tex2DgatherGreen(Texture2DSampler ts, float2 texCoord,  int2 offset=0) {
-    return ts.t.gather(ts.s, texCoord, offset, component::y); // TODO: int to component
+float4 GatherGreen(texture2d<float> t, sampler s, float2 texCoord,  int2 offset=0) {
+    return t.gather(s, texCoord, offset, component::y);
 }
 
-float4 tex2DgatherBlue(Texture2DSampler ts, float2 texCoord,  int2 offset=0) {
-    return ts.t.gather(ts.s, texCoord, offset, component::z); // TODO: int to component
+float4 GatherBlue(texture2d<float> t, sampler s, float2 texCoord,  int2 offset=0) {
+    return t.gather(s, texCoord, offset, component::z);
 }
 
-float4 tex2DgatherAlpha(Texture2DSampler ts, float2 texCoord, int2 offset=0) {
-    return ts.t.gather(ts.s, texCoord, offset, component::w); // TODO: int to component
+float4 GatherAlpha(texture2d<float> t, sampler s, float2 texCoord, int2 offset=0) {
+    return t.gather(s, texCoord, offset, component::w);
 }
 
-float4 tex2Dlod(Texture2DSampler ts, float4 texCoordMip) {
-    return ts.t.sample(ts.s, texCoordMip.xy, level(texCoordMip.w));
-}
+//---------
 
-float4 tex2Dgrad(Texture2DSampler ts, float2 texCoord, float2 gradx, float2 grady) {
-   return ts.t.sample(ts.s, texCoord.xy, gradient2d(gradx, grady));
+float4 SampleGrad(texture2d<float> t, sampler s, float2 texCoord, float2 gradx, float2 grady) {
+   return t.sample(s, texCoord.xy, gradient2d(gradx, grady));
 }
 
-float4 tex2Dbias(Texture2DSampler ts, float4 texCoordBias) {
-    return ts.t.sample(ts.s, texCoordBias.xy, bias(texCoordBias.w));
-}
-
-float4 tex2Dfetch(Texture2DSampler ts, int2 texCoord) {
-    return ts.t.read((uint2)texCoord);
-}
+//---------
 
 #if USE_HALF
 
-// use samper2D<half> to specify these
-
-half4 tex2D(Texture2DHalfSampler ts, float2 texCoord) {
-    return ts.t.sample(ts.s, texCoord);
+half4 SampleH(Texture2DHalfSampler t, sampler s, float2 texCoord) {
+    return t.sample(s, texCoord);
 }
 
-half4 tex2Dlod(Texture2DHalfSampler ts, float4 texCoordMip) {
-    return ts.t.sample(ts.s, texCoordMip.xy, level(texCoordMip.w));
+half4 SampleLevelH(Texture2DHalfSampler t, sampler s, float4 texCoordMip) {
+    return t.sample(s, texCoordMip.xy, level(texCoordMip.w));
 }
 
-half4 tex2Dbias(Texture2DHalfSampler ts, float4 texCoordBias) {
-    return ts.t.sample(ts.s, texCoordBias.xy, bias(texCoordBias.w));
+half4 SampleBiasH(Texture2DHalfSampler t, sampler s, float4 texCoordBias) {
+    return t.sample(s, texCoordBias.xy, bias(texCoordBias.w));
 }
 
 #else
@@ -236,43 +166,107 @@ half4 tex2Dbias(Texture2DHalfSampler ts, float4 texCoordBias) {
 
 #endif
 
-float4 tex3D(Texture3DSampler ts, float3 texCoord) {
-    return ts.t.sample(ts.s, texCoord);
+
+
+float4 SampleLevel(texture2d<float> t, sampler s, float4 texCoordMip) {
+    return t.sample(s, texCoordMip.xy, level(texCoordMip.w));
 }
 
+float4 SampleLevel(texturecube<float> t, sampler s, float4 texCoordMip) {
+    return t.sample(s, texCoordMip.xyz, level(texCoordMip.w));
+}
 
-float4 tex3Dlod(Texture3DSampler ts, float4 texCoordMip) {
-    return ts.t.sample(ts.s, texCoordMip.xyz, level(texCoordMip.w));
+float4 SampleLevel(texture2d_array<float> t, sampler s, float4 texCoordMip) {
+    return t.sample(s, texCoordMip.xyz, level(texCoordMip.w));
 }
 
-float4 texCUBE(TextureCubeSampler ts, float3 texCoord) {
-    return ts.t.sample(ts.s, texCoord);
+float4 SampleLevel(texture3d<float> t, sampler s, float4 texCoordMip) {
+    return t.sample(s, texCoordMip.xyz, level(texCoordMip.w));
 }
 
-float4 texCUBElod(TextureCubeSampler ts, float4 texCoordMip) {
-    return ts.t.sample(ts.s, texCoordMip.xyz, level(texCoordMip.w));
+float4 SampleLevel(texturecube_array<float> t, sampler s, float4 texCoordMip) {
+    return t.sample(s, texCoordMip.xyz, level(texCoordMip.w));
 }
 
-float4 texCUBEbias(TextureCubeSampler ts, float4 texCoordBias) {
-    return ts.t.sample(ts.s, texCoordBias.xyz, bias(texCoordBias.w));
+// ----
+
+float4 SampleBias(texturecube<float> t, sampler s, float4 texCoordBias) {
+    return t.sample(s, texCoordBias.xyz, bias(texCoordBias.w));
+}
+   
+float4 SampleBias(texture2d<float> t, sampler s, float4 texCoordBias) {
+    return t.sample(s, texCoordBias.xy, bias(texCoordBias.w));
 }
-    
-// iOS may need shadow sampler inline
-//    float4 tex2Dcmp(Texture2DShadowSampler ts, float4 texCoordCompare) {
-//        constexpr sampler shadow_constant_sampler(mip_filter::none, min_filter::linear, mag_filter::linear, address::clamp_to_edge, compare_func::less);"
-//        return ts.t.sample_compare(shadow_constant_sampler, texCoordCompare.xy, texCoordCompare.z);
-//    }
 
+// ios may need to hardcode sampler
+// constexpr sampler shadowSampler(mip_filter::none, min_filter::linear, mag_filter::linear, address::clamp_to_border, compare_func::greater);
 
-float4 tex2Dcmp(Texture2DShadowSampler ts, float4 texCoordCompare) {
-    return ts.t.sample_compare(ts.s, texCoordCompare.xy, texCoordCompare.z);
+// May have to detect SamplerComparisonState, and mark texture as depth2d
+float4 SampleCmp(depth2d<float> t, sampler s, float4 texCoordCompare) {
+    // division for perspective shadows, but caller should handle this
+    return t.sample_compare(s, texCoordCompare.xy, texCoordCompare.z / texCoordCompare.w );
 }
 
-float4 tex2DMSfetch(texture2d_ms<float> t, int2 texCoord, int sample) {
+float4 Load(texture2d_ms<float> t, int2 texCoord, int sample) {
     return t.read((uint2)texCoord, (uint)sample);
 }
 
-float4 tex2DArray(Texture2DArraySampler ts, float3 texCoord) {
-    return ts.t.sample(ts.s, texCoord.xy, texCoord.z + 0.5); // 0.5 offset needed on nvidia gpus
+// ----
+
+float4 Sample(texture2d_array<float> t, sampler s, float3 texCoord) {
+    return t.sample(s, texCoord.xy, texCoord.z);
 }
         
+float4 Sample(texture2d<float> t, sampler s, float2 texCoord)
+{
+    return t.sample(s, texCoord);
+}
+half4 SampleH(texture2d<half> t, sampler s, float2 texCoord)
+{
+    return t.sample(s, texCoord);
+}
+
+float4 Sample(texture3d<float> t, sampler s, float3 texCoord) {
+    return t.sample(s, texCoord);
+}
+
+float4 Sample(texturecube<float> t, sampler s, float3 texCoord) {
+    return t.sample(s, texCoord);
+}
+
+// ----
+
+int2 GetDimensions(texture2d t)
+{
+    int2 size(t.get_width(), t.get_height());
+    return size;
+}
+
+int3 GetDimensions(texture3d t)
+{
+    int3 size(t.get_width(), t.get_height(), t.get_depth());
+    return size;
+}
+
+int2 GetDimensions(texturecube t)
+{
+    int2 size(t.get_width(), t.get_width());
+    return size;
+}
+
+int2 GetDimensions(texturecube_array t)
+{
+    // TODO: arrayCount?
+    int2 size(t.get_width(), t.get_width());
+    t.GetDimensions(size, size); // sizexsize
+    return size;
+}
+
+int2 GetDimensions(texture2d_array t)
+{
+    // TODO: arrayCount?
+    int2 size(t.get_width(), t.get_height());
+    t.GetDimensions(size, size);
+    return size;
+}
+
diff --git a/hlslparser/shaders/Skinning.hlsl b/hlslparser/shaders/Skinning.hlsl
index b14cb0e9..328e3275 100644
--- a/hlslparser/shaders/Skinning.hlsl
+++ b/hlslparser/shaders/Skinning.hlsl
@@ -10,11 +10,11 @@
 
 // setup specialization
 // HLSL:
-// [[vk::constant_id(0)]] const bool  specConstInt  = 1;
+// [[vk::constant_id(0)]] const int   specConstInt  = 1;
 // [[vk::constant_id(1)]] const bool  specConstBool  = true;
 // MSL:
 // constant bool a [[function_constant(0)]];
-// constant int a [[function_constant(1)]]; // 0.. 64K
+// constant int  a [[function_constant(1)]]; // 0.. 64K-1
 
 // subpass input, and SubpassLoad() calls
 // [[vk::input_attachment_index(i)]] SubpassInput input;
@@ -44,27 +44,7 @@
 // D3DCOLORtoUBYTE4: Decodes a D3DCOLOR packed DWORD to a float4.
 // Note the swizzle, and I don't want an int4.  I need to encode.
 // This is achieved by performing int4(input.zyxw * 255.002) using SPIR-V OpVectorShuffle, OpVectorTimesScalar, and OpConvertFToS, respectively.
-// https://microsoft.github.io/DirectX-Specs/d3d/HLSL_SM_6_6_Pack_Unpack_Intrinsics.html
 
-// These all pack to too large of data structores.
-// also they don't handle gamma.
-//
-// int16_t4 unpack_s8s16(int8_t4_packed packedVal);        // Sign Extended
-// uint16_t4 unpack_u8u16(uint8_t4_packed packedVal);      // Non-Sign Extended
-// int32_t4 unpack_s8s32(int8_t4_packed packedVal);        // Sign Extended
-// uint32_t4 unpack_u8u32(uint8_t4_packed packedVal);      // Non-Sign Extended
-// uint8_t4_packed pack_u8(uint32_t4 unpackedVal);         // Pack lower 8 bits, drop unused bits
-// int8_t4_packed pack_s8(int32_t4  unpackedVal);          // Pack lower 8 bits, drop unused bits
-//
-// uint8_t4_packed pack_u8(uint16_t4 unpackedVal);         // Pack lower 8 bits, drop unused bits
-// int8_t4_packed pack_s8(int16_t4  unpackedVal);          // Pack lower 8 bits, drop unused bits
-//
-// uint8_t4_packed pack_clamp_u8(int32_t4  unpackedVal);   // Pack and Clamp [0, 255]
-// int8_t4_packed pack_clamp_s8(int32_t4  unpackedVal);    // Pack and Clamp [-128, 127]
-//
-// uint8_t4_packed pack_clamp_u8(int16_t4  unpackedVal);   // Pack and Clamp [0, 255]
-// int8_t4_packed pack_clamp_s8(int16_t4  unpackedVal);    // Pack and Clamp [-128, 127]
-//
 // have uint16_t/int16_t support in 6.2.  Need to add as type into parser.
 //
 // cbuffer are std140, and ssbo are std430 arrangment.  Affects arrays.
@@ -119,9 +99,8 @@ cbuffer Uniforms
     float4x4 worldToClipTfm;
 };
 
-// TODO: split up tex/sampler, update texture calls to DX10
-// defines combined tex_texture/tex_sampler
-sampler2D<half> tex;
+Texture2D<half4> tex;
+SamplerState samplerClamp;
 
 float4x4 DoSkinTfm(float4x4 skinTfms[256], float4 blendWeights, uint4 blendIndices)
 {
@@ -201,6 +180,7 @@ OutputVS SkinningVS(InputVS input,
 // So half from VS, but float in PS.
 struct InputPS
 {
+    float4  position : SV_Position;
     half    diffuse : COLOR;
     float2  uv : TEXCOORD0;
 };
@@ -217,7 +197,6 @@ struct OutputPS
 // DXC has a setting to invert w.
 
 OutputPS SkinningPS(InputPS input,
-     float4  position : SV_Position,
      bool isFrontFace: SV_IsFrontFace
     )
 {
@@ -226,7 +205,8 @@ OutputPS SkinningPS(InputPS input,
     // This is hard to reflect with combined tex/sampler
     // have way more textures than samplers on mobile.
     //float4 color = tex2D(tex, input.uv);
-    half4 color = tex2D(tex, input.uv);
+    //half4 color = half4(1.0h);
+    half4 color = SampleH(tex, samplerClamp, input.uv);
     
     // TODO: move to DX10 style, but MSL codegen is trickier then
     // since it wraps the vars
diff --git a/hlslparser/src/HLSLGenerator.cpp b/hlslparser/src/HLSLGenerator.cpp
index dde96663..8f04ff25 100644
--- a/hlslparser/src/HLSLGenerator.cpp
+++ b/hlslparser/src/HLSLGenerator.cpp
@@ -24,15 +24,17 @@ static const char* GetTypeName(const HLSLType& type)
     
     switch (type.baseType)
     {
-    case HLSLBaseType_Texture:      return "texture";
+    case HLSLBaseType_SamplerState:              return "SamplerState";
+    case HLSLBaseType_SamplerComparisonState:    return "SamplerComparisonState";
+        
+    //case HLSLBaseType_Texture:      return "texture";
             
-    case HLSLBaseType_Sampler:      return "sampler";
-    case HLSLBaseType_Sampler2D:    return "sampler2D";
-    case HLSLBaseType_Sampler3D:    return "sampler3D";
-    case HLSLBaseType_SamplerCube:  return "samplerCUBE";
-    case HLSLBaseType_Sampler2DShadow:  return "sampler2DShadow";
-    case HLSLBaseType_Sampler2DMS:  return "sampler2DMS";
-    case HLSLBaseType_Sampler2DArray:    return "sampler2DArray";
+    case HLSLBaseType_Texture2D:         return "Texture2D";
+    case HLSLBaseType_Texture2DArray:    return "Texture2DArray";
+    case HLSLBaseType_Texture3D:         return "Texture3D";
+    case HLSLBaseType_TextureCube:       return "TextureCube";
+    case HLSLBaseType_TextureCubeArray:  return "TextureCubeArray";
+    case HLSLBaseType_Texture2DMS:       return "Texture2DMS";
             
     // struct
     case HLSLBaseType_UserDefined:  return type.typeName;
@@ -333,14 +335,16 @@ void HLSLGenerator::OutputExpression(HLSLExpression* expression)
     {
         HLSLIdentifierExpression* identifierExpression = static_cast<HLSLIdentifierExpression*>(expression);
         const char* name = identifierExpression->name;
+        
+        /* I don't want to pass structs
         if (IsSamplerType(identifierExpression->expressionType) && identifierExpression->global)
         {
             // @@ Handle generic sampler type.
 
             // Stupid HLSL doesn't have ctors, so have ctor calls.
-            if (identifierExpression->expressionType.baseType == HLSLBaseType_Sampler2D)
+            if (identifierExpression->expressionType.baseType == HLSLBaseType_Texture2D)
             {
-                if (identifierExpression->expressionType.samplerType == HLSLBaseType_Half) // TODO: && !m_options.treatHalfAsFloat)
+                if (identifierExpression->expressionType.textureType == HLSLBaseType_Half) // TODO: && !m_options.treatHalfAsFloat)
                 {
                     m_writer.Write("Texture2DHalfSamplerCtor(%s_texture, %s_sampler)", name, name);
                 }
@@ -371,6 +375,7 @@ void HLSLGenerator::OutputExpression(HLSLExpression* expression)
             }
         }
         else
+        */
         {
             m_writer.Write("%s", name);
         }
@@ -799,25 +804,55 @@ void HLSLGenerator::OutputStatements(int indent, HLSLStatement* statement)
 
 void HLSLGenerator::OutputDeclaration(HLSLDeclaration* declaration)
 {
-    bool isSamplerType = IsSamplerType(declaration->type);
-
-    if (isSamplerType)
+    if (IsSamplerType(declaration->type))
     {
         int reg = -1;
         if (declaration->registerName != NULL)
         {
             sscanf(declaration->registerName, "s%d", &reg);
         }
+        
+        // sampler
+        const char* samplerType = nullptr;
+        if (declaration->type.baseType == HLSLBaseType_SamplerState)
+        {
+            samplerType = "SamplerState";
+        }
+        else if (declaration->type.baseType == HLSLBaseType_SamplerComparisonState)
+        {
+            samplerType = "SamplerComparisonState";
+        }
+        
+        if (samplerType)
+        {
+            if (reg != -1)
+            {
+                m_writer.Write("%s %s : register(s%d)", samplerType, declaration->name, reg);
+            }
+            else
+            {
+                m_writer.Write("%s %s", samplerType, declaration->name);
+            }
+        }
+        return;
+    }
+    if (IsTextureType(declaration->type))
+    {
+        int reg = -1;
+        if (declaration->registerName != NULL)
+        {
+            sscanf(declaration->registerName, "t%d", &reg);
+        }
 
         // @@ Handle generic sampler type.
 
         // TODO: have a way to disable use of half (like on MSLGenerator)
-        bool isHalf = declaration->type.samplerType == HLSLBaseType_Half;
+        bool isHalf = declaration->type.textureType == HLSLBaseType_Half;
         
         // Can't use half4 textures with spriv
         // Can tell Vulkan was written by/for desktop IHVs.
         // https://github.com/microsoft/DirectXShaderCompiler/issues/2711
-        bool isSpirvTarget = true;
+        bool isSpirvTarget = true; // TODO: tie to CLI option
         if (isSpirvTarget)
             isHalf = false;
         
@@ -829,50 +864,32 @@ void HLSLGenerator::OutputDeclaration(HLSLDeclaration* declaration)
         
         // texture carts the dimension and format
         const char* textureType = NULL;
-        if (declaration->type.baseType == HLSLBaseType_Sampler2D)
+        if (declaration->type.baseType == HLSLBaseType_Texture2D)
         {
             textureType = "Texture2D";
         }
-        else if (declaration->type.baseType == HLSLBaseType_Sampler3D)
+        else if (declaration->type.baseType == HLSLBaseType_Texture2DArray)
+        {
+            textureType = "Texture2DArray";
+        }
+        else if (declaration->type.baseType == HLSLBaseType_Texture3D)
         {
             textureType = "Texture3D";
         }
-        else if (declaration->type.baseType == HLSLBaseType_SamplerCube)
+        else if (declaration->type.baseType == HLSLBaseType_TextureCube)
         {
             textureType = "TextureCube";
         }
-        else if (declaration->type.baseType == HLSLBaseType_Sampler2DShadow)
+        else if (declaration->type.baseType == HLSLBaseType_TextureCubeArray)
         {
-            textureType = "Texture2D";
+            textureType = "TextureCubeArray";
         }
-        else if (declaration->type.baseType == HLSLBaseType_Sampler2DMS)
+        else if (declaration->type.baseType == HLSLBaseType_Texture2DMS)
         {
             textureType = "Texture2DMS";
         }
 
-        // sampler
-        const char* samplerType = "SamplerState";
-        if (declaration->type.baseType == HLSLBaseType_Sampler2DShadow)
-        {
-           samplerType = "SamplerComparisonState";
-        }
-        else if (declaration->type.baseType == HLSLBaseType_Sampler2DMS)
-        {
-            samplerType = NULL;
-        }
-        
-        if (samplerType != NULL)
-        {
-            if (reg != -1)
-            {
-                m_writer.Write("%s<%s> %s_texture : register(t%d); %s %s_sampler : register(s%d)", textureType, formatType, declaration->name, reg, samplerType, declaration->name, reg);
-            }
-            else
-            {
-                m_writer.Write("%s<%s> %s_texture; %s %s_sampler", textureType, formatType, declaration->name, samplerType, declaration->name);
-            }
-        }
-        else
+        if (textureType != NULL)
         {
             if (reg != -1)
             {
@@ -901,9 +918,10 @@ void HLSLGenerator::OutputDeclarationType(const HLSLType& type)
 {
     const char* typeName = GetTypeName(type);
     
+    /*
     if (type.baseType == HLSLBaseType_Sampler2D)
     {
-        if (type.samplerType == HLSLBaseType_Half
+        if (type.textureType == HLSLBaseType_Half
             // TODO: && !m_options.treatHalfAsFloat
             ) {
             typeName = "Texture2DHalfSampler";
@@ -928,7 +946,7 @@ void HLSLGenerator::OutputDeclarationType(const HLSLType& type)
     {
         typeName = "Texture2DMS<float4>";
     }
-    
+    */
 
     if (type.flags & HLSLTypeFlag_Const)
     {
diff --git a/hlslparser/src/HLSLParser.cpp b/hlslparser/src/HLSLParser.cpp
index ff0a51e5..e8c71ce2 100644
--- a/hlslparser/src/HLSLParser.cpp
+++ b/hlslparser/src/HLSLParser.cpp
@@ -309,10 +309,11 @@ struct Intrinsic
     HLSLArgument    argument[4];
 };
     
-Intrinsic SamplerIntrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType arg1, HLSLBaseType samplerType, HLSLBaseType arg2)
+Intrinsic TextureIntrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType arg1, HLSLBaseType textureType, HLSLBaseType arg2)
 {
-    Intrinsic i(name, returnType, arg1, arg2);
-    i.argument[0].type.samplerType = samplerType;
+    // Need to be able to pass SamplerComparisonState too
+    Intrinsic i(name, returnType, arg1, HLSLBaseType_SamplerState, arg2);
+    i.argument[0].type.textureType = textureType;
     return i;
 }
 
@@ -606,9 +607,9 @@ static const EffectState pipelineStates[] = {
         Intrinsic( name, HLSLBaseType_Half3,   HLSLBaseType_Half3,   HLSLBaseType_Half3,  HLSLBaseType_Half3 ),    \
         Intrinsic( name, HLSLBaseType_Half4,   HLSLBaseType_Half4,   HLSLBaseType_Half4,  HLSLBaseType_Half4 )
 
-#define SAMPLER_INTRINSIC_FUNCTION(name, nameH, sampler, arg1) \
-        SamplerIntrinsic( name, HLSLBaseType_Float4, sampler, HLSLBaseType_Float, arg1),   \
-        SamplerIntrinsic( nameH, HLSLBaseType_Half4,  sampler, HLSLBaseType_Half,  arg1  )
+#define TEXTURE_INTRINSIC_FUNCTION(name, sampler, arg1) \
+        TextureIntrinsic( name, HLSLBaseType_Float4, sampler, HLSLBaseType_Float, arg1),   \
+        TextureIntrinsic( name "H", HLSLBaseType_Half4,  sampler, HLSLBaseType_Half,  arg1  )
     
 // TODO: change this to a mutlimap or something
 // would make it easier to specify, can write functions to set all vec, all matrix, etc.  This is a nightmare to add things too below.  MSL and HLSL
@@ -798,42 +799,38 @@ const Intrinsic _intrinsic[] =
         // TODO: split off sampler intrinsics from math above
         //------------------------
         
-        // This macro defines float/half versions
-        SAMPLER_INTRINSIC_FUNCTION("tex2D", "tex2D", HLSLBaseType_Sampler2D, HLSLBaseType_Float2),
-        SAMPLER_INTRINSIC_FUNCTION("tex2Dlod", "tex2Dlod", HLSLBaseType_Sampler2D, HLSLBaseType_Float4),
-        SAMPLER_INTRINSIC_FUNCTION("tex2Dbias", "tex2Dbias", HLSLBaseType_Sampler2D, HLSLBaseType_Float4),
+        TEXTURE_INTRINSIC_FUNCTION("Sample", HLSLBaseType_Texture2D,  HLSLBaseType_Float2),
+        TEXTURE_INTRINSIC_FUNCTION("Sample", HLSLBaseType_Texture3D, HLSLBaseType_Float3),
+        TEXTURE_INTRINSIC_FUNCTION("Sample", HLSLBaseType_Texture2DArray, HLSLBaseType_Float3),
+        TEXTURE_INTRINSIC_FUNCTION("Sample", HLSLBaseType_TextureCube, HLSLBaseType_Float3),
+        TEXTURE_INTRINSIC_FUNCTION("Sample", HLSLBaseType_TextureCubeArray, HLSLBaseType_Float4),
         
-    
-        // Not sure this tex2Dproj is worth adding, have tex2DCmp
-        Intrinsic("tex2Dproj", HLSLBaseType_Float4, HLSLBaseType_Sampler2D, HLSLBaseType_Float4),
-
-//        Intrinsic("tex2Dlod",  HLSLBaseType_Float4, HLSLBaseType_Sampler2D, HLSLBaseType_Float4, HLSLBaseType_Int2),   // With offset.
-
+        // one more dimension than Sample
+        TEXTURE_INTRINSIC_FUNCTION("SampleLevel", HLSLBaseType_Texture2D, HLSLBaseType_Float3),
+        TEXTURE_INTRINSIC_FUNCTION("SampleLevel", HLSLBaseType_Texture3D, HLSLBaseType_Float4),
+        TEXTURE_INTRINSIC_FUNCTION("SampleLevel", HLSLBaseType_Texture2DArray, HLSLBaseType_Float4),
+        TEXTURE_INTRINSIC_FUNCTION("SampleLevel", HLSLBaseType_TextureCube, HLSLBaseType_Float4),
+        // TEXTURE_INTRINSIC_FUNCTION("SampleLevel", HLSLBaseType_TextureCubeArray, HLSLBaseType_Float4, Float),
         
-        Intrinsic("tex2Dgrad", HLSLBaseType_Float4, HLSLBaseType_Sampler2D, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2),
-        Intrinsic("tex2Dgather", HLSLBaseType_Float4, HLSLBaseType_Sampler2D, HLSLBaseType_Float2, HLSLBaseType_Int),
-        Intrinsic("tex2Dgather", HLSLBaseType_Float4, HLSLBaseType_Sampler2D, HLSLBaseType_Float2, HLSLBaseType_Int2, HLSLBaseType_Int),    // With offset.
-        Intrinsic("tex2Dsize", HLSLBaseType_Int2, HLSLBaseType_Sampler2D),
-        Intrinsic("tex2Dfetch", HLSLBaseType_Float4, HLSLBaseType_Sampler2D, HLSLBaseType_Int3),    // u,v,mipmap
-
-        Intrinsic("tex2Dcmp", HLSLBaseType_Float4, HLSLBaseType_Sampler2DShadow, HLSLBaseType_Float4),                // @@ IC: This really takes a float3 (uvz) and returns a float.
-
-        Intrinsic("tex2DMSfetch", HLSLBaseType_Float4, HLSLBaseType_Sampler2DMS, HLSLBaseType_Int2, HLSLBaseType_Int),
-        Intrinsic("tex2DMSsize", HLSLBaseType_Int3, HLSLBaseType_Sampler2DMS),
-
-        Intrinsic("tex2DArray", HLSLBaseType_Float4, HLSLBaseType_Sampler2DArray, HLSLBaseType_Float3),
-
-        Intrinsic("tex3D",     HLSLBaseType_Float4, HLSLBaseType_Sampler3D, HLSLBaseType_Float3),
-        Intrinsic("tex3Dlod",  HLSLBaseType_Float4, HLSLBaseType_Sampler3D, HLSLBaseType_Float4),
-        Intrinsic("tex3Dbias", HLSLBaseType_Float4, HLSLBaseType_Sampler3D, HLSLBaseType_Float4),
-        Intrinsic("tex3Dsize", HLSLBaseType_Int3, HLSLBaseType_Sampler3D),
-
-        Intrinsic("texCUBE",       HLSLBaseType_Float4, HLSLBaseType_SamplerCube, HLSLBaseType_Float3),
-        Intrinsic("texCUBElod", HLSLBaseType_Float4, HLSLBaseType_SamplerCube, HLSLBaseType_Float4),
-        Intrinsic("texCUBEbias", HLSLBaseType_Float4, HLSLBaseType_SamplerCube, HLSLBaseType_Float4),
-        Intrinsic("texCUBEsize", HLSLBaseType_Int, HLSLBaseType_SamplerCube),
-
+        // bias always in w
+        TEXTURE_INTRINSIC_FUNCTION("SampleBias", HLSLBaseType_Texture2D, HLSLBaseType_Float4),
+        TEXTURE_INTRINSIC_FUNCTION("SampleBias", HLSLBaseType_Texture3D, HLSLBaseType_Float4),
+        TEXTURE_INTRINSIC_FUNCTION("SampleBias", HLSLBaseType_Texture2DArray, HLSLBaseType_Float4),
+        TEXTURE_INTRINSIC_FUNCTION("SampleBias", HLSLBaseType_TextureCube, HLSLBaseType_Float4),
+        // TEXTURE_INTRINSIC_FUNCTION("SampleBias", HLSLBaseType_TextureCubeArray, HLSLBaseType_Float4, Float),
+        
+        //TEXTURE_INTRINSIC_FUNCTION("SampleGrad", HLSLBaseType_Texture3D, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2),
+        //TEXTURE_INTRINSIC_FUNCTION("SampleGrad", HLSLBaseType_Texture3D, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2),
+        //TEXTURE_INTRINSIC_FUNCTION("SampleGrad", HLSLBaseType_Texture3D, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2),
         
+        // TODO: for 2D tex (int2 offset is optional, how to indicate that?)
+        TEXTURE_INTRINSIC_FUNCTION("GatherRed", HLSLBaseType_Texture2D,  HLSLBaseType_Float2),
+        TEXTURE_INTRINSIC_FUNCTION("GatherGreen", HLSLBaseType_Texture2D,  HLSLBaseType_Float2),
+        TEXTURE_INTRINSIC_FUNCTION("GatherBlue", HLSLBaseType_Texture2D,  HLSLBaseType_Float2),
+        TEXTURE_INTRINSIC_FUNCTION("GatherAlpha", HLSLBaseType_Texture2D,  HLSLBaseType_Float2),
+        
+        // TODO: GetDimensions
+       
     };
 
 const int _numIntrinsics = sizeof(_intrinsic) / sizeof(Intrinsic);
@@ -914,16 +911,19 @@ const BaseTypeDescription baseTypeDescriptions[HLSLBaseType_Count] =
 
         // TODO: add u/char, but HLSL2021 doesn't have support
         
-        { "texture",            CoreType_Texture, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Texture
+       
+        //{ "texture",            CoreType_Texture, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Texture
         
-        { "sampler",            CoreType_Sampler, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Sampler
-        { "sampler2D",          CoreType_Sampler, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Sampler2D
-        { "sampler3D",          CoreType_Sampler, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Sampler3D
-        { "samplerCUBE",        CoreType_Sampler, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_SamplerCube
-        { "sampler2DShadow",    CoreType_Sampler, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Sampler2DShadow
-        { "sampler2DMS",        CoreType_Sampler, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Sampler2DMS
-        { "sampler2DArray",     CoreType_Sampler, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Sampler2DArray
+        { "Texture2D",          CoreType_Texture, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Texture2D
+        { "Texture3D",          CoreType_Texture, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Texture3D
+        { "TextureCube",        CoreType_Texture, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_TextureCube
+        { "Texture2DArray",     CoreType_Texture, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Texture2DArray
+        { "TextureCubeArray",     CoreType_Texture, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_TextureCubeArray
+        { "Texture2DMS",        CoreType_Texture, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Texture2DMS
         
+        { "SamplerState",            CoreType_Sampler, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Sampler
+        { "SamplerComparisonState",  CoreType_Sampler, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_SamplerComparisonState
+       
         { "struct",             CoreType_Struct, DimensionType_None, NumericType_NaN,         1, 0, 0, -1 },      // HLSLBaseType_UserDefined
         { "expression",         CoreType_Expression, DimensionType_None, NumericType_NaN,     1, 0, 0, -1 },       // HLSLBaseType_Expression
         { "comment",            CoreType_Comment, DimensionType_None, NumericType_NaN,         1, 0, 0, -1 },       // HLSLBaseType_Comment
@@ -947,6 +947,8 @@ HLSLBaseType ArithmeticOpResultType(HLSLBinaryOp binaryOp, HLSLBaseType t1, HLSL
     {
         bool isSameDimensions = IsDimensionEqual(t1, t2);
         
+            
+        
         if (IsScalarType(t1) && IsScalarType(t2))
         {
             if (isSameDimensions) return t1;
@@ -960,11 +962,17 @@ HLSLBaseType ArithmeticOpResultType(HLSLBinaryOp binaryOp, HLSLBaseType t1, HLSL
             if (isSameDimensions) return t1;
         }
         
-        // TODO: handle div of 1.0 / m or 1.0 / v. 
-        
+        else if ((binaryOp == HLSLBinaryOp_Add || binaryOp == HLSLBinaryOp_Sub) &&
+                 (IsScalarType(t1) || IsScalarType(t2)))
+        {
+            // allow v + 1, and 1 - v
+            return (IsVectorType(t1) || IsMatrixType(t1)) ? t1 : t2;
+        }
+         
         else if ((binaryOp == HLSLBinaryOp_Mul || binaryOp == HLSLBinaryOp_Div) &&
                  (IsScalarType(t1) || IsScalarType(t2)))
         {
+            // v * s
             return (IsVectorType(t1) || IsMatrixType(t1)) ? t1 : t2;
         }
         
@@ -1078,9 +1086,9 @@ static int GetTypeCastRank(HLSLTree * tree, const HLSLType& srcType, const HLSLT
 
     if (srcType.baseType == dstType.baseType)
     {
-        if (IsSamplerType(srcType.baseType))
+        if (IsTextureType(srcType.baseType))
         {
-            return srcType.samplerType == dstType.samplerType ? 0 : -1;
+            return srcType.textureType == dstType.textureType ? 0 : -1;
         }
         
         return 0;
@@ -1233,6 +1241,9 @@ static bool GetBinaryOpResultType(HLSLBinaryOp binaryOp, const HLSLType& type1,
     }
     else
     {
+        // TODO: allso mulAssign, ...
+        assert(!IsAssignOp(binaryOp));
+        
         result.baseType = ArithmeticOpResultType(binaryOp, type1.baseType, type2.baseType);
     }
 
@@ -2026,7 +2037,7 @@ bool HLSLParser::ParseDeclarationAssignment(HLSLDeclaration* declaration)
                 return false;
             }
         }
-        else if (IsSamplerType(declaration->type.baseType))
+        else if (IsSamplerType(declaration->type.baseType)) // TODO: should be for SamplerStateBlock, not Sampler
         {
             if (!ParseSamplerState(declaration->assignment))
             {
@@ -3585,10 +3596,36 @@ bool HLSLParser::AcceptType(bool allowVoid, HLSLType& type/*, bool acceptFlags*/
         break;
             
     // Textures
-    case HLSLToken_Texture:
-        type.baseType = HLSLBaseType_Texture;
+    case HLSLToken_Texture2D:
+        type.baseType = HLSLBaseType_Texture2D;
+        break;
+    case HLSLToken_Texture2DArray:
+        type.baseType = HLSLBaseType_Texture2DArray;
+        break;
+    case HLSLToken_Texture3D:
+        type.baseType = HLSLBaseType_Texture3D;
+        break;
+    case HLSLToken_TextureCube:
+        type.baseType = HLSLBaseType_TextureCube;
+        break;
+    case HLSLToken_Texture2DMS:
+        type.baseType = HLSLBaseType_Texture2DMS;
+        break;
+    case HLSLToken_TextureCubeArray:
+        type.baseType = HLSLBaseType_TextureCubeArray;
         break;
             
+    //case HLSLToken_Texture:
+    //    type.baseType = HLSLBaseType_Texture;
+    //    break;
+      
+    case HLSLToken_SamplerState:
+        type.baseType = HLSLBaseType_SamplerState;
+        break;
+    case HLSLToken_SamplerComparisonState:
+        type.baseType = HLSLBaseType_SamplerComparisonState;
+        break;
+    /*
     // Samplers
     case HLSLToken_Sampler:
         type.baseType = HLSLBaseType_Sampler2D;  // @@ IC: For now we assume that generic samplers are always sampler2D
@@ -3611,34 +3648,27 @@ bool HLSLParser::AcceptType(bool allowVoid, HLSLType& type/*, bool acceptFlags*/
     case HLSLToken_Sampler2DArray:
         type.baseType = HLSLBaseType_Sampler2DArray;
         break;
+    */
     }
     if (type.baseType != HLSLBaseType_Void)
     {
         m_tokenizer.Next();
         
-        if (IsSamplerType(type.baseType))
+        if (IsTextureType(type.baseType))
         {
             // Parse optional sampler type.
             if (Accept('<'))
             {
-                // Hack.
-                // Sampler2D<half> or Sampler2D<float>
-                
-                // TODO: this doesn't line up with DX10
-                // which is Texture2D<half4> or Texture2D<float4>
-                // Samplers are just state blocks, but texture holds the type.
-                // Also have more texture slots than samplers, so samplers are reused.
-                
                 int token = m_tokenizer.GetToken();
                 
                 // TODO: need more types
-                if (token == HLSLToken_Float)
+                if (token >= HLSLToken_Float && token <= HLSLToken_Float4)
                 {
-                    type.samplerType = HLSLBaseType_Float;
+                    type.textureType = HLSLBaseType_Float;
                 }
-                else if (token == HLSLToken_Half)
+                else if (token >= HLSLToken_Half && token <= HLSLToken_Half4)
                 {
-                    type.samplerType = HLSLBaseType_Half;
+                    type.textureType = HLSLBaseType_Half;
                 }
                 else
                 {
diff --git a/hlslparser/src/HLSLTokenizer.cpp b/hlslparser/src/HLSLTokenizer.cpp
index 8b633a21..f21d8212 100644
--- a/hlslparser/src/HLSLTokenizer.cpp
+++ b/hlslparser/src/HLSLTokenizer.cpp
@@ -62,6 +62,17 @@ static const char* _reservedWords[] =
         
         // TODO: double, u/char
         
+        "Texture2D",
+        "Texture3D",
+        "TextureCube",
+        "Texture2DArray",
+        "TextureCubeArray",
+        "Texture2DMS",
+        
+        "SamplerState",
+        "SamplerComparisonState",
+/*
+        // TODO: eliminate DX9 types
         "texture",
         
         "sampler",
@@ -71,6 +82,7 @@ static const char* _reservedWords[] =
         "sampler2DShadow",
         "sampler2DMS",
         "sampler2DArray",
+*/
         
         "if",
         "else",
@@ -87,9 +99,11 @@ static const char* _reservedWords[] =
         "return",
         "continue",
         "discard",
+        
         "const",
         "static",
         "inline",
+        
         "uniform",
         "in",
         "out",
@@ -261,13 +275,13 @@ void HLSLTokenizer::Next()
     }
 
     // ++, --
-    if ((m_buffer[0] == '-' || m_buffer[1] == '-'))
+    if ((m_buffer[0] == '-' && m_buffer[1] == '-'))
     {
         m_token = HLSLToken_MinusMinus;
         m_buffer += 2;
         return;
     }
-    if ((m_buffer[0] == '+' || m_buffer[1] == '+'))
+    if ((m_buffer[0] == '+' && m_buffer[1] == '+'))
     {
         m_token = HLSLToken_PlusPlus;
         m_buffer += 2;
diff --git a/hlslparser/src/HLSLTokenizer.h b/hlslparser/src/HLSLTokenizer.h
index 2a7b18d5..cdfe94a2 100644
--- a/hlslparser/src/HLSLTokenizer.h
+++ b/hlslparser/src/HLSLTokenizer.h
@@ -60,15 +60,40 @@ enum HLSLToken
     HLSLToken_Ushort4,
     
     // TODO: double, u/char
+    HLSLToken_Texture2D,
+    HLSLToken_Texture3D,
+    HLSLToken_TextureCube,
+    HLSLToken_Texture2DArray,
+    HLSLToken_TextureCubeArray,
+    HLSLToken_Texture2DMS,
     
-    HLSLToken_Texture,
-    HLSLToken_Sampler,
-    HLSLToken_Sampler2D,
-    HLSLToken_Sampler3D,
-    HLSLToken_SamplerCube,
-    HLSLToken_Sampler2DShadow,
-    HLSLToken_Sampler2DMS,
-    HLSLToken_Sampler2DArray,
+    HLSLToken_SamplerState,
+    HLSLToken_SamplerComparisonState,
+    
+    // these are all the texture ops
+    // do these need tokenized?
+    /*
+    HLSLToken_Sample,
+    HLSLToken_SampleLevel,
+    HLSLToken_SampleCmp,
+    HLSLToken_SampleCmpLevelZero,
+    HLSLToken_SampleCmpGrad,
+    HLSLToken_SampleBias,
+    HLSLToken_GatherRed,
+    HLSLToken_GatherGreen,
+    HLSLToken_GatherBlue,
+    HLSLToken_GatherAlpha, // + GatherCmdRed/Green/Blue/Alpha
+    HLSLToken_GetDimensions,
+    */
+    
+//    HLSLToken_Texture,
+//    HLSLToken_Sampler,
+//    HLSLToken_Sampler2D,
+//    HLSLToken_Sampler3D,
+//    HLSLToken_SamplerCube,
+//    HLSLToken_Sampler2DShadow,
+//    HLSLToken_Sampler2DMS,
+//    HLSLToken_Sampler2DArray,
 
     // Reserved words.
     HLSLToken_If,
@@ -86,6 +111,7 @@ enum HLSLToken
     HLSLToken_Return,
     HLSLToken_Continue,
     HLSLToken_Discard,
+    
     HLSLToken_Const,
     HLSLToken_Static,
     HLSLToken_Inline,
@@ -97,7 +123,7 @@ enum HLSLToken
     HLSLToken_InOut,
 
     // Effect keywords.
-    HLSLToken_SamplerState,
+    HLSLToken_SamplerStateBlock, 
     HLSLToken_Technique,
     HLSLToken_Pass,
 
diff --git a/hlslparser/src/HLSLTree.cpp b/hlslparser/src/HLSLTree.cpp
index 1049a73b..2b8b4d69 100644
--- a/hlslparser/src/HLSLTree.cpp
+++ b/hlslparser/src/HLSLTree.cpp
@@ -422,6 +422,7 @@ bool HLSLTree::NeedsFunction(const char* name)
     return visitor.result;
 }
 
+// TODO: the descriptionTable instead of hardcoding this again
 int GetVectorDimension(HLSLType & type)
 {
     if (type.baseType >= HLSLBaseType_FirstNumeric &&
@@ -460,8 +461,8 @@ int HLSLTree::GetExpressionValue(HLSLExpression * expression, float values[4])
     }
     if (expression->expressionType.baseType >= HLSLBaseType_FirstInteger && expression->expressionType.baseType <= HLSLBaseType_LastInteger)
     {
-        // @@ Add support for uints?
-        // @@ Add support for int vectors?
+        // TODO: Add support for uints?
+        // TODO: Add support for int vectors?
         return 0;
     }
     if (expression->expressionType.baseType > HLSLBaseType_LastNumeric)
@@ -597,7 +598,8 @@ int HLSLTree::GetExpressionValue(HLSLExpression * expression, float values[4])
         else if (literal->expressionType.baseType == HLSLBaseType_Bool) values[0] = literal->bValue;
         else if (literal->expressionType.baseType == HLSLBaseType_Int) values[0] = (float)literal->iValue;  // @@ Warn if conversion is not exact.
         else return 0;
-
+        // TODO: add short/ushore/uint
+        
         return 1;
     }
 
diff --git a/hlslparser/src/HLSLTree.h b/hlslparser/src/HLSLTree.h
index 61a6dd1b..df240573 100644
--- a/hlslparser/src/HLSLTree.h
+++ b/hlslparser/src/HLSLTree.h
@@ -46,25 +46,6 @@ enum HLSLNodeType
     HLSLNodeType_Comment
 };
 
-/*
-enum HLSLTypeDimension
-{
-    HLSLTypeDimension_None,
-    
-    HLSLTypeDimension_Scalar,
-    
-    HLSLTypeDimension_Vector2,
-    HLSLTypeDimension_Vector3,
-    HLSLTypeDimension_Vector4,
-    
-    HLSLTypeDimension_Matrix2x2,
-    HLSLTypeDimension_Matrix3x3,
-    HLSLTypeDimension_Matrix4x4,
-    //HLSLTypeDimension_Matrix4x3, // TODO: no 3x4
-    //HLSLTypeDimension_Matrix4x2
-};
-*/
-
 enum HLSLBaseType
 {
     HLSLBaseType_Unknown,
@@ -119,6 +100,17 @@ enum HLSLBaseType
     HLSLBaseType_Ushort3,
     HLSLBaseType_Ushort4,
     
+    HLSLBaseType_Texture2D,
+    HLSLBaseType_Texture3D,
+    HLSLBaseType_TextureCube,
+    HLSLBaseType_Texture2DArray,
+    HLSLBaseType_TextureCubeArray,
+    HLSLBaseType_Texture2DMS,
+    
+    HLSLBaseType_SamplerState,
+    HLSLBaseType_SamplerComparisonState,
+    
+/*
     // texture
     HLSLBaseType_Texture,
     
@@ -130,6 +122,7 @@ enum HLSLBaseType
     HLSLBaseType_Sampler2DShadow,
     HLSLBaseType_Sampler2DMS,
     HLSLBaseType_Sampler2DArray,
+*/
     
     HLSLBaseType_UserDefined,       // struct
     HLSLBaseType_Expression,        // type argument for defined() sizeof() and typeof().
@@ -140,10 +133,10 @@ enum HLSLBaseType
     
     // counts
     HLSLBaseType_FirstNumeric = HLSLBaseType_Float,
-    HLSLBaseType_LastNumeric = HLSLBaseType_Uint4,
+    HLSLBaseType_LastNumeric = HLSLBaseType_Ushort4,
     
     HLSLBaseType_FirstInteger = HLSLBaseType_Bool,
-    HLSLBaseType_LastInteger = HLSLBaseType_Uint4,
+    HLSLBaseType_LastInteger = HLSLBaseType_LastNumeric,
    
     HLSLBaseType_NumericCount = HLSLBaseType_LastNumeric - HLSLBaseType_FirstNumeric + 1
 };
@@ -312,7 +305,7 @@ struct HLSLType
     explicit HLSLType(HLSLBaseType _baseType = HLSLBaseType_Unknown)
     { 
         baseType    = _baseType;
-        samplerType = HLSLBaseType_Float;
+        textureType = HLSLBaseType_Float;
         typeName    = NULL;
         array       = false;
         arraySize   = NULL;
@@ -320,7 +313,7 @@ struct HLSLType
         addressSpace = HLSLAddressSpace_Undefined;
     }
     HLSLBaseType        baseType;
-    HLSLBaseType        samplerType;    // Half or Float
+    HLSLBaseType        textureType;    // Half or Float
     const char*         typeName;       // For user defined types.
     bool                array;
     HLSLExpression*     arraySize;
diff --git a/hlslparser/src/MSLGenerator.cpp b/hlslparser/src/MSLGenerator.cpp
index 57d232d9..c18aa0af 100644
--- a/hlslparser/src/MSLGenerator.cpp
+++ b/hlslparser/src/MSLGenerator.cpp
@@ -139,49 +139,56 @@ namespace M4
         HLSLStatement* statement = root->statement;
         ASSERT(m_firstClassArgument == NULL);
 
-        HLSLType samplerType(HLSLBaseType_Sampler);
+        //HLSLType samplerType(HLSLBaseType_Sampler);
 
         int nextTextureRegister = 0;
+        int nextSamplerRegister = 0;
         int nextBufferRegister = 0;
 
         while (statement != NULL)
         {
+            if (statement->hidden)
+            {
+                statement = statement->nextStatement;
+                continue;
+            }
+            
             if (statement->nodeType == HLSLNodeType_Declaration)
             {
                 HLSLDeclaration* declaration = (HLSLDeclaration*)statement;
 
-                if (!declaration->hidden && IsSamplerType(declaration->type))
+                if (IsTextureType(declaration->type))
                 {
+                    const char * textureName = declaration->name;
+                    
                     int textureRegister = ParseRegister(declaration->registerName, nextTextureRegister);
+                     const char * textureRegisterName = m_tree->AddStringFormat("texture(%d)", textureRegister);
 
-                    const char * textureName = m_tree->AddStringFormat("%s_texture", declaration->name);
-                    const char * textureRegisterName = m_tree->AddStringFormat("texture(%d)", textureRegister);
                     AddClassArgument(new ClassArgument(textureName, declaration->type, textureRegisterName));
-
-                    if (declaration->type.baseType != HLSLBaseType_Sampler2DMS)
-                    {
-                        const char * samplerName = m_tree->AddStringFormat("%s_sampler", declaration->name);
-                        const char * samplerRegisterName = m_tree->AddStringFormat("sampler(%d)", textureRegister);
-                        AddClassArgument(new ClassArgument(samplerName, samplerType, samplerRegisterName));
-                    }
+                }
+                else if (IsSamplerType(declaration->type))
+                {
+                    const char * samplerName = declaration->name;
+                    
+                    int samplerRegister = ParseRegister(declaration->registerName, nextSamplerRegister);
+                    const char * samplerRegisterName = m_tree->AddStringFormat("sampler(%d)", samplerRegister);
+                    
+                    AddClassArgument(new ClassArgument(samplerName, declaration->type, samplerRegisterName));
                 }
             }
             else if (statement->nodeType == HLSLNodeType_Buffer)
             {
                 HLSLBuffer * buffer = (HLSLBuffer *)statement;
+                
+                HLSLType type(HLSLBaseType_UserDefined);
+                type.addressSpace = HLSLAddressSpace_Constant;
+                type.typeName = m_tree->AddStringFormat("%s_ubo", buffer->name);
 
-                if (!buffer->hidden)
-                {
-                    HLSLType type(HLSLBaseType_UserDefined);
-                    type.addressSpace = HLSLAddressSpace_Constant;
-                    type.typeName = m_tree->AddStringFormat("%s_ubo", buffer->name);
-
-                    int bufferRegister = ParseRegister(buffer->registerName, nextBufferRegister) + m_options.bufferRegisterOffset;
+                int bufferRegister = ParseRegister(buffer->registerName, nextBufferRegister) + m_options.bufferRegisterOffset;
 
-                    const char * bufferRegisterName = m_tree->AddStringFormat("buffer(%d)", bufferRegister);
+                const char * bufferRegisterName = m_tree->AddStringFormat("buffer(%d)", bufferRegister);
 
-                    AddClassArgument(new ClassArgument(buffer->name, type, bufferRegisterName));
-                }
+                AddClassArgument(new ClassArgument(buffer->name, type, bufferRegisterName));
             }
 
             statement = statement->nextStatement;
@@ -837,51 +844,58 @@ namespace M4
     void MSLGenerator::OutputDeclaration(HLSLDeclaration* declaration)
     {
         if (IsSamplerType(declaration->type))
+        {
+            m_writer.Write("thread sampler& %s", declaration->name);
+        }
+        else if (IsTextureType(declaration->type))
         {
             // Declare a texture and a sampler instead
             // We do not handle multiple textures on the same line
             ASSERT(declaration->nextDeclaration == NULL);
             const char * formatName = "float";
-            if (declaration->type.samplerType == HLSLBaseType_Half && !m_options.treatHalfAsFloat)
+            if (declaration->type.textureType == HLSLBaseType_Half && !m_options.treatHalfAsFloat)
             {
                 formatName = "half";
             }
 
-            if (declaration->type.baseType == HLSLBaseType_Sampler2D)
+            if (declaration->type.baseType == HLSLBaseType_Texture2D)
+            {
+                m_writer.Write("thread texture2d<%s>& %s", formatName, declaration->name);
+            }
+            else if (declaration->type.baseType == HLSLBaseType_Texture2DArray)
             {
-                m_writer.Write("thread texture2d<%s>& %s_texture;", formatName, declaration->name);
-                m_writer.Write("thread sampler& %s_sampler", declaration->name);
+                m_writer.Write("thread texture2d_array<%s>& %s", formatName, declaration->name);
             }
-            else if (declaration->type.baseType == HLSLBaseType_Sampler3D)
+            else if (declaration->type.baseType == HLSLBaseType_TextureCubeArray)
             {
-                m_writer.Write("thread texture3d<%s>& %s_texture;", formatName, declaration->name);
-                m_writer.Write("thread sampler& %s_sampler", declaration->name);
+                m_writer.Write("thread texturecube_array<%s>& %s", formatName, declaration->name);
             }
-            else if (declaration->type.baseType == HLSLBaseType_SamplerCube)
+            else if (declaration->type.baseType == HLSLBaseType_Texture3D)
             {
-                m_writer.Write("thread texturecube<%s>& %s_texture;", formatName, declaration->name);
-                m_writer.Write("thread sampler& %s_sampler", declaration->name);
+                m_writer.Write("thread texture3d<%s>& %s", formatName, declaration->name);
             }
-            else if (declaration->type.baseType == HLSLBaseType_Sampler2DShadow)
+            else if (declaration->type.baseType == HLSLBaseType_TextureCube)
             {
-                // Note: ios has 16f depth now, so don't assume float
-                m_writer.Write("thread depth2d<%s>& %s_texture;", formatName,declaration->name);
-                m_writer.Write("thread sampler& %s_sampler", declaration->name);
+                m_writer.Write("thread texturecube<%s>& %s", formatName, declaration->name);
             }
-            else if (declaration->type.baseType == HLSLBaseType_Sampler2DMS)
+// TODO: need equivalent of this
+//            else if (declaration->type.baseType == HLSLBaseType_Texture2DShadow)
+//            {
+//                // Note: ios has 16f depth now, so don't assume float
+//                m_writer.Write("thread depth2d<%s>& %s;", formatName,declaration->name);
+//            }
+            else if (declaration->type.baseType == HLSLBaseType_Texture2DMS)
             {
                 // no sampler, just Load samples
-                m_writer.Write("thread texture2d_ms<%s>& %s_texture;", formatName, declaration->name);
+                m_writer.Write("thread texture2d_ms<%s>& %s_texture", formatName, declaration->name);
             }
-            else if (declaration->type.baseType == HLSLBaseType_Sampler2DArray)
+            else if (declaration->type.baseType == HLSLBaseType_Texture2DArray)
             {
-                m_writer.Write("thread texture2d_array<%s>& %s_texture;", formatName, declaration->name);
-                m_writer.Write("thread sampler& %s_sampler", declaration->name);
+                m_writer.Write("thread texture2d_array<%s>& %s_texture", formatName, declaration->name);
             }
             else
             {
-                // TODO: this should be error, not stuff into file
-                m_writer.Write("<unhandled texture type>");
+               Error("<unhandled texture type>");
             }
         }
         else
@@ -1123,6 +1137,23 @@ namespace M4
         {
             HLSLIdentifierExpression* identifierExpression = static_cast<HLSLIdentifierExpression*>(expression);
             const char* name = identifierExpression->name;
+            
+            /* don't need this either, just pass the same name
+                calls now take tex and sampler if specified
+             
+            if (identifierExpression->global && IsSamplerType(identifierExpression->expressionType))
+            {
+                // TODO: just pass in
+                m_writer.Write("%s", name);
+            }
+            else if (identifierExpression->global && IsTextureType(identifierExpression->expressionType))
+            {
+                // TODO: just pass in
+                m_writer.Write("%s", name);
+            }
+            */
+            
+            /* Stop wrapping in structs
             // For texture declaration, generate a struct instead
             if (identifierExpression->global && IsSamplerType(identifierExpression->expressionType))
             {
@@ -1151,9 +1182,10 @@ namespace M4
                 else if (identifierExpression->expressionType.baseType == HLSLBaseType_Sampler2DArray)
                     m_writer.Write("Texture2DArraySampler(%s_texture, %s_sampler)", name, name);
                 else
-                    m_writer.Write("<unhandled texture type>");
+                    Error("<unhandled texture type>");
             }
-            else
+            
+            else */
             {
                 if (identifierExpression->global)
                 {
@@ -1525,6 +1557,7 @@ namespace M4
                 }
             }
         }
+        /*
         if (IsSamplerType(type))
         {
             if (type.baseType == HLSLBaseType_Sampler2D) {
@@ -1544,9 +1577,11 @@ namespace M4
             else if (type.baseType == HLSLBaseType_Sampler2DShadow)
                 typeName = "Texture2DMSSampler";
             else
-                typeName = "<unhandled texture type>";
+               Error( "<unhandled texture type>" );
         }
-        else if (alignment != 0 && !isTypeCast)
+        else
+        */
+        if (alignment != 0 && !isTypeCast)
         {
             // caller can request alignment, but default is 0
             m_writer.Write("alignas(%d) ", alignment);
@@ -2084,7 +2119,7 @@ namespace M4
         bool half_to_float = promote && m_options.treatHalfAsFloat;// && !exactType;
         
         // TODO: move carting around half/float to texture
-        bool half_samplers = promote && type.samplerType == HLSLBaseType_Half && !m_options.treatHalfAsFloat;
+        bool half_samplers = promote && type.textureType == HLSLBaseType_Half && !m_options.treatHalfAsFloat;
 
         auto baseType = type.baseType;
         if (half_to_float)
@@ -2099,16 +2134,17 @@ namespace M4
         switch (baseType)
         {
         // Texture should cart the half vs. float
-        case HLSLBaseType_Texture:          return "texture";
-                
-        case HLSLBaseType_Sampler:          return "sampler";
-            // ACoget-TODO: How to detect non-float textures, if relevant?
-        case HLSLBaseType_Sampler2D:        return half_samplers ? "texture2d<half>" : "texture2d<float>";
-        case HLSLBaseType_Sampler3D:        return half_samplers ? "texture3d<half>" : "texture3d<float>";
-        case HLSLBaseType_SamplerCube:      return half_samplers ? "texturecube<half>" : "texturecube<float>";
-        case HLSLBaseType_Sampler2DShadow:  return "depth2d<float>";
-        case HLSLBaseType_Sampler2DMS:      return half_samplers ? "texture2d_ms<half>" : "texture2d_ms<float>";
-                
+        case HLSLBaseType_SamplerState:            return "sampler";
+        case HLSLBaseType_SamplerComparisonState:  return "sampler"; // TODO:
+            
+        case HLSLBaseType_Texture2D:        return half_samplers ? "texture2d<half>" : "texture2d<float>";
+        case HLSLBaseType_Texture3D:        return half_samplers ? "texture3d<half>" : "texture3d<float>";
+        case HLSLBaseType_TextureCube:      return half_samplers ? "texturecube<half>" : "texturecube<float>";
+        //case HLSLBaseType_Texture2DShadow:  return "depth2d<float>";
+        case HLSLBaseType_Texture2DMS:      return half_samplers ? "texture2d_ms<half>" : "texture2d_ms<float>";
+        case HLSLBaseType_TextureCubeArray:      return half_samplers ? "texturecube_array<half>" : "texturecube_array<float>";
+        case HLSLBaseType_Texture2DArray:      return half_samplers ? "texture2d_array<half>" : "texture2d_array<float>";
+           
         case HLSLBaseType_UserDefined:      return type.typeName;
         default:
             ASSERT(0);
diff --git a/hlslparser/src/Main.cpp b/hlslparser/src/Main.cpp
index e37139d7..b46b8744 100644
--- a/hlslparser/src/Main.cpp
+++ b/hlslparser/src/Main.cpp
@@ -8,6 +8,8 @@
 #include <stdio.h>
 #include <sys/stat.h>
 
+#include <filesystem>
+
 using namespace std;
 
 enum Target
@@ -92,7 +94,7 @@ int main( int argc, char* argv[] )
 	using namespace M4;
 
 	// Parse arguments
-	const char* fileName = NULL;
+	string fileName;
 	const char* entryName = NULL;
 
 	// TODO: could we take modern DX12 HLSL and translate to MSL only
@@ -154,7 +156,7 @@ int main( int argc, char* argv[] )
 		}
 	}
 
-	if( fileName == NULL  )
+	if( fileName.empty() )
 	{
 		Log_Error( "Missing source filename\n" );
 		PrintUsage();
@@ -204,6 +206,14 @@ int main( int argc, char* argv[] )
         outputFileName += ".hlsl";
     }
     
+    // find  full pathname of the fileName, so that errors are logged
+    // in way that can be clicked to. absolute includes .. in it, canonical does not.
+    auto path = filesystem::path(fileName);
+    fileName = filesystem::canonical( path );
+    
+    path = filesystem::path(outputFileName);
+    outputFileName = filesystem::canonical( path );
+    
     if ( outputFileName == fileName )
     {
         Log_Error( "Src and Dst filenames match.  Exiting.\n" );
@@ -216,15 +226,17 @@ int main( int argc, char* argv[] )
     
 	// Read input file
     string source;
-    if (!ReadFile( fileName, source ))
+    if (!ReadFile( fileName.c_str(), source ))
     {
         Log_Error( "Input file not found\n" );
         return 1;
     }
 
+    
+    
 	// Parse input file
 	Allocator allocator;
-	HLSLParser parser( &allocator, fileName, source.data(), source.size() );
+	HLSLParser parser( &allocator, fileName.c_str(), source.data(), source.size() );
     if (isDebug)
     {
         parser.SetKeepComments(true);
diff --git a/hlslparser/testshaders.xcodeproj/project.pbxproj b/hlslparser/testshaders.xcodeproj/project.pbxproj
index f29bee04..8e6e222e 100644
--- a/hlslparser/testshaders.xcodeproj/project.pbxproj
+++ b/hlslparser/testshaders.xcodeproj/project.pbxproj
@@ -6,41 +6,19 @@
 	objectVersion = 56;
 	objects = {
 
-/* Begin PBXBuildFile section */
-		707D37CF29B9797A00B08D22 /* Skinning.metal in Sources */ = {isa = PBXBuildFile; fileRef = 707D37CC29B9797A00B08D22 /* Skinning.metal */; };
-		707D37D029B9797A00B08D22 /* Skinning.hlsl in Resources */ = {isa = PBXBuildFile; fileRef = 707D37CD29B9797A00B08D22 /* Skinning.hlsl */; };
-		707D37D229B9798600B08D22 /* out in Resources */ = {isa = PBXBuildFile; fileRef = 707D37D129B9798600B08D22 /* out */; };
-		707D37D329B979B900B08D22 /* ShaderHLSL.h in Sources */ = {isa = PBXBuildFile; fileRef = 707D37CE29B9797A00B08D22 /* ShaderHLSL.h */; };
-		707D37D429B979C200B08D22 /* ShaderMSL.h in Sources */ = {isa = PBXBuildFile; fileRef = 707D37CB29B9797A00B08D22 /* ShaderMSL.h */; };
-		707D37DB29B97A0900B08D22 /* buildShaders.sh in Resources */ = {isa = PBXBuildFile; fileRef = 707D37DA29B97A0900B08D22 /* buildShaders.sh */; };
-		707D37DD29B97A5500B08D22 /* ShaderHLSL.h in Sources */ = {isa = PBXBuildFile; fileRef = 707D37D629B979EB00B08D22 /* ShaderHLSL.h */; };
-		707D37DE29B97A5500B08D22 /* Skinning.hlsl in Sources */ = {isa = PBXBuildFile; fileRef = 707D37D729B979EB00B08D22 /* Skinning.hlsl */; };
-		707D37DF29B97A5500B08D22 /* ShaderMSL.h in Sources */ = {isa = PBXBuildFile; fileRef = 707D37D529B979EB00B08D22 /* ShaderMSL.h */; };
-/* End PBXBuildFile section */
-
 /* Begin PBXFileReference section */
-		707D37AE29B9787400B08D22 /* testshaders.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = testshaders.app; sourceTree = BUILT_PRODUCTS_DIR; };
-		707D37CB29B9797A00B08D22 /* ShaderMSL.h */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; fileEncoding = 4; path = ShaderMSL.h; sourceTree = "<group>"; };
 		707D37CC29B9797A00B08D22 /* Skinning.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = Skinning.metal; sourceTree = "<group>"; };
 		707D37CD29B9797A00B08D22 /* Skinning.hlsl */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; fileEncoding = 4; path = Skinning.hlsl; sourceTree = "<group>"; };
-		707D37CE29B9797A00B08D22 /* ShaderHLSL.h */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; fileEncoding = 4; path = ShaderHLSL.h; sourceTree = "<group>"; };
 		707D37D129B9798600B08D22 /* out */ = {isa = PBXFileReference; lastKnownFileType = folder; path = out; sourceTree = "<group>"; };
 		707D37D529B979EB00B08D22 /* ShaderMSL.h */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; path = ShaderMSL.h; sourceTree = "<group>"; };
 		707D37D629B979EB00B08D22 /* ShaderHLSL.h */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; path = ShaderHLSL.h; sourceTree = "<group>"; };
 		707D37D729B979EB00B08D22 /* Skinning.hlsl */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; path = Skinning.hlsl; sourceTree = "<group>"; };
 		707D37DA29B97A0900B08D22 /* buildShaders.sh */ = {isa = PBXFileReference; lastKnownFileType = text.script.sh; path = buildShaders.sh; sourceTree = "<group>"; };
+		70CAA47E29B9BB0E004B7E7B /* Sample.hlsl */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; fileEncoding = 4; path = Sample.hlsl; sourceTree = "<group>"; };
+		70CAA48029BA7D28004B7E7B /* Sample.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = Sample.metal; sourceTree = "<group>"; };
+		70CAA48129BA7D28004B7E7B /* Sample.hlsl */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; fileEncoding = 4; path = Sample.hlsl; sourceTree = "<group>"; };
 /* End PBXFileReference section */
 
-/* Begin PBXFrameworksBuildPhase section */
-		707D37AB29B9787400B08D22 /* Frameworks */ = {
-			isa = PBXFrameworksBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
-/* End PBXFrameworksBuildPhase section */
-
 /* Begin PBXGroup section */
 		707D37A529B9787400B08D22 = {
 			isa = PBXGroup;
@@ -56,7 +34,6 @@
 		707D37AF29B9787400B08D22 /* Products */ = {
 			isa = PBXGroup;
 			children = (
-				707D37AE29B9787400B08D22 /* testshaders.app */,
 			);
 			name = Products;
 			sourceTree = "<group>";
@@ -64,8 +41,8 @@
 		707D37CA29B9797A00B08D22 /* outshaders */ = {
 			isa = PBXGroup;
 			children = (
-				707D37CE29B9797A00B08D22 /* ShaderHLSL.h */,
-				707D37CB29B9797A00B08D22 /* ShaderMSL.h */,
+				70CAA48129BA7D28004B7E7B /* Sample.hlsl */,
+				70CAA48029BA7D28004B7E7B /* Sample.metal */,
 				707D37CC29B9797A00B08D22 /* Skinning.metal */,
 				707D37CD29B9797A00B08D22 /* Skinning.hlsl */,
 			);
@@ -77,6 +54,7 @@
 			children = (
 				707D37D529B979EB00B08D22 /* ShaderMSL.h */,
 				707D37D629B979EB00B08D22 /* ShaderHLSL.h */,
+				70CAA47E29B9BB0E004B7E7B /* Sample.hlsl */,
 				707D37D729B979EB00B08D22 /* Skinning.hlsl */,
 			);
 			path = shaders;
@@ -84,25 +62,22 @@
 		};
 /* End PBXGroup section */
 
-/* Begin PBXNativeTarget section */
-		707D37AD29B9787400B08D22 /* testshaders */ = {
-			isa = PBXNativeTarget;
-			buildConfigurationList = 707D37C529B9787500B08D22 /* Build configuration list for PBXNativeTarget "testshaders" */;
+/* Begin PBXLegacyTarget section */
+		70CAA48529BAE9F5004B7E7B /* testshaders */ = {
+			isa = PBXLegacyTarget;
+			buildArgumentsString = "$(ACTION)";
+			buildConfigurationList = 70CAA48629BAE9F5004B7E7B /* Build configuration list for PBXLegacyTarget "testshaders" */;
 			buildPhases = (
-				707D37AA29B9787400B08D22 /* Sources */,
-				707D37AB29B9787400B08D22 /* Frameworks */,
-				707D37AC29B9787400B08D22 /* Resources */,
-			);
-			buildRules = (
 			);
+			buildToolPath = ./buildShaders.sh;
+			buildWorkingDirectory = "";
 			dependencies = (
 			);
 			name = testshaders;
+			passBuildSettingsInEnvironment = 1;
 			productName = testshaders;
-			productReference = 707D37AE29B9787400B08D22 /* testshaders.app */;
-			productType = "com.apple.product-type.application";
 		};
-/* End PBXNativeTarget section */
+/* End PBXLegacyTarget section */
 
 /* Begin PBXProject section */
 		707D37A629B9787400B08D22 /* Project object */ = {
@@ -111,7 +86,7 @@
 				BuildIndependentTargetsInParallel = 1;
 				LastUpgradeCheck = 1410;
 				TargetAttributes = {
-					707D37AD29B9787400B08D22 = {
+					70CAA48529BAE9F5004B7E7B = {
 						CreatedOnToolsVersion = 14.1;
 					};
 				};
@@ -129,40 +104,11 @@
 			projectDirPath = "";
 			projectRoot = "";
 			targets = (
-				707D37AD29B9787400B08D22 /* testshaders */,
+				70CAA48529BAE9F5004B7E7B /* testshaders */,
 			);
 		};
 /* End PBXProject section */
 
-/* Begin PBXResourcesBuildPhase section */
-		707D37AC29B9787400B08D22 /* Resources */ = {
-			isa = PBXResourcesBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-				707D37D029B9797A00B08D22 /* Skinning.hlsl in Resources */,
-				707D37DB29B97A0900B08D22 /* buildShaders.sh in Resources */,
-				707D37D229B9798600B08D22 /* out in Resources */,
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
-/* End PBXResourcesBuildPhase section */
-
-/* Begin PBXSourcesBuildPhase section */
-		707D37AA29B9787400B08D22 /* Sources */ = {
-			isa = PBXSourcesBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-				707D37DF29B97A5500B08D22 /* ShaderMSL.h in Sources */,
-				707D37DD29B97A5500B08D22 /* ShaderHLSL.h in Sources */,
-				707D37D429B979C200B08D22 /* ShaderMSL.h in Sources */,
-				707D37D329B979B900B08D22 /* ShaderHLSL.h in Sources */,
-				707D37CF29B9797A00B08D22 /* Skinning.metal in Sources */,
-				707D37DE29B97A5500B08D22 /* Skinning.hlsl in Sources */,
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
-/* End PBXSourcesBuildPhase section */
-
 /* Begin XCBuildConfiguration section */
 		707D37C329B9787500B08D22 /* Debug */ = {
 			isa = XCBuildConfiguration;
@@ -214,7 +160,7 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
-				MACOSX_DEPLOYMENT_TARGET = 13.0;
+				MACOSX_DEPLOYMENT_TARGET = 11.0;
 				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
 				MTL_FAST_MATH = YES;
 				ONLY_ACTIVE_ARCH = YES;
@@ -266,60 +212,35 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
-				MACOSX_DEPLOYMENT_TARGET = 13.0;
+				MACOSX_DEPLOYMENT_TARGET = 11.0;
 				MTL_ENABLE_DEBUG_INFO = NO;
 				MTL_FAST_MATH = YES;
 				SDKROOT = macosx;
 			};
 			name = Release;
 		};
-		707D37C629B9787500B08D22 /* Debug */ = {
+		70CAA48729BAE9F5004B7E7B /* Debug */ = {
 			isa = XCBuildConfiguration;
 			buildSettings = {
-				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
-				ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
-				CODE_SIGN_ENTITLEMENTS = testshaders/testshaders.entitlements;
 				CODE_SIGN_STYLE = Automatic;
-				COMBINE_HIDPI_IMAGES = YES;
-				CURRENT_PROJECT_VERSION = 1;
-				GENERATE_INFOPLIST_FILE = YES;
-				INFOPLIST_FILE = testshaders/Info.plist;
-				INFOPLIST_KEY_NSHumanReadableCopyright = "";
-				INFOPLIST_KEY_NSMainStoryboardFile = Main;
-				INFOPLIST_KEY_NSPrincipalClass = NSApplication;
-				LD_RUNPATH_SEARCH_PATHS = (
-					"$(inherited)",
-					"@executable_path/../Frameworks",
-				);
-				MARKETING_VERSION = 1.0;
-				PRODUCT_BUNDLE_IDENTIFIER = com.ba.testshaders;
+				DEBUGGING_SYMBOLS = YES;
+				DEBUG_INFORMATION_FORMAT = dwarf;
+				GCC_GENERATE_DEBUGGING_SYMBOLS = YES;
+				GCC_OPTIMIZATION_LEVEL = 0;
+				OTHER_CFLAGS = "";
+				OTHER_LDFLAGS = "";
 				PRODUCT_NAME = "$(TARGET_NAME)";
-				SWIFT_EMIT_LOC_STRINGS = YES;
 			};
 			name = Debug;
 		};
-		707D37C729B9787500B08D22 /* Release */ = {
+		70CAA48829BAE9F5004B7E7B /* Release */ = {
 			isa = XCBuildConfiguration;
 			buildSettings = {
-				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
-				ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
-				CODE_SIGN_ENTITLEMENTS = testshaders/testshaders.entitlements;
 				CODE_SIGN_STYLE = Automatic;
-				COMBINE_HIDPI_IMAGES = YES;
-				CURRENT_PROJECT_VERSION = 1;
-				GENERATE_INFOPLIST_FILE = YES;
-				INFOPLIST_FILE = testshaders/Info.plist;
-				INFOPLIST_KEY_NSHumanReadableCopyright = "";
-				INFOPLIST_KEY_NSMainStoryboardFile = Main;
-				INFOPLIST_KEY_NSPrincipalClass = NSApplication;
-				LD_RUNPATH_SEARCH_PATHS = (
-					"$(inherited)",
-					"@executable_path/../Frameworks",
-				);
-				MARKETING_VERSION = 1.0;
-				PRODUCT_BUNDLE_IDENTIFIER = com.ba.testshaders;
+				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+				OTHER_CFLAGS = "";
+				OTHER_LDFLAGS = "";
 				PRODUCT_NAME = "$(TARGET_NAME)";
-				SWIFT_EMIT_LOC_STRINGS = YES;
 			};
 			name = Release;
 		};
@@ -335,11 +256,11 @@
 			defaultConfigurationIsVisible = 0;
 			defaultConfigurationName = Release;
 		};
-		707D37C529B9787500B08D22 /* Build configuration list for PBXNativeTarget "testshaders" */ = {
+		70CAA48629BAE9F5004B7E7B /* Build configuration list for PBXLegacyTarget "testshaders" */ = {
 			isa = XCConfigurationList;
 			buildConfigurations = (
-				707D37C629B9787500B08D22 /* Debug */,
-				707D37C729B9787500B08D22 /* Release */,
+				70CAA48729BAE9F5004B7E7B /* Debug */,
+				70CAA48829BAE9F5004B7E7B /* Release */,
 			);
 			defaultConfigurationIsVisible = 0;
 			defaultConfigurationName = Release;

From b3823f0b586a61a50e4bb3141b39fa99ede342e7 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 9 Mar 2023 21:37:36 -0800
Subject: [PATCH 451/901] kram - hlslparser get scripts working

moved headers to outshaders so there is only one copy.  Can jump to issues.  Turned back on metal compile to test output.
---
 hlslparser/buildShaders.sh                    |  6 +-
 .../{shaders => outshaders}/ShaderHLSL.h      | 18 +++---
 .../{shaders => outshaders}/ShaderMSL.h       | 58 +++++++++----------
 3 files changed, 40 insertions(+), 42 deletions(-)
 rename hlslparser/{shaders => outshaders}/ShaderHLSL.h (95%)
 rename hlslparser/{shaders => outshaders}/ShaderMSL.h (82%)

diff --git a/hlslparser/buildShaders.sh b/hlslparser/buildShaders.sh
index c2bc2cea..c8669c8c 100755
--- a/hlslparser/buildShaders.sh
+++ b/hlslparser/buildShaders.sh
@@ -46,8 +46,8 @@ appSprivReflect+="spirv-reflect"
 
 # copy over the headers that translate to MSL/HLSL
 # TODO: move to outshaders, so when there are errors can clickthough to orignal files
-cp ${srcDir}/ShaderMSL.h .
-cp ${srcDir}/ShaderHLSL.h .
+#cp ${srcDir}/ShaderMSL.h .
+#cp ${srcDir}/ShaderHLSL.h .
 
 parserOptions=""
 
@@ -72,7 +72,7 @@ pushd out
 
 #-------------------------------
 
-testMetal=0
+testMetal=1
 
 if [[ $testMetal -eq 1 ]]; then
     # Metal is C++14
diff --git a/hlslparser/shaders/ShaderHLSL.h b/hlslparser/outshaders/ShaderHLSL.h
similarity index 95%
rename from hlslparser/shaders/ShaderHLSL.h
rename to hlslparser/outshaders/ShaderHLSL.h
index 7f1ed9d4..ea6d8c49 100644
--- a/hlslparser/shaders/ShaderHLSL.h
+++ b/hlslparser/outshaders/ShaderHLSL.h
@@ -183,10 +183,6 @@ float4 SampleCmp(Texture2D t, SamplerComparisonState s, float4 texCoord, int2 of
     return t.SampleCmp(s, texCoord.xy, texCoord.z, offset);
 }
 
-float4 SampleCmp(Texture2DArray t, SamplerComparisonState s, float4 texCoord, int2 offset = 0)
-{
-    return t.SampleCmp(s, texCoord.xy, texCoord.z, offset);
-}
 
 // no offset
 float4 SampleCmp(TextureCube t, SamplerComparisonState s, float4 texCoord)
@@ -194,10 +190,16 @@ float4 SampleCmp(TextureCube t, SamplerComparisonState s, float4 texCoord)
     return t.SampleCmp(s, texCoord.xyz, texCoord.w);
 }
 
-float4 SampleCmp(TextureCubeArray t, SamplerComparisonState s, float4 texCoord)
-{
-    return t.SampleCmp(s, texCoord.xyz, texCoord.w);
-}
+// TODO: may need to add to HLSLParser intrinsics
+//float4 SampleCmp(Texture2DArray t, SamplerComparisonState s, float4 texCoord, int2 offset = 0)
+//{
+//    return t.SampleCmp(s, texCoord.xyz, offset);
+//}
+//
+//float4 SampleCmp(TextureCubeArray t, SamplerComparisonState s, float4 texCoord)
+//{
+//    return t.SampleCmp(s, texCoord.xyzw);
+//}
 
 //----------
 
diff --git a/hlslparser/shaders/ShaderMSL.h b/hlslparser/outshaders/ShaderMSL.h
similarity index 82%
rename from hlslparser/shaders/ShaderMSL.h
rename to hlslparser/outshaders/ShaderMSL.h
index a4f83f9c..fcc7899b 100644
--- a/hlslparser/shaders/ShaderMSL.h
+++ b/hlslparser/outshaders/ShaderMSL.h
@@ -146,15 +146,15 @@ float4 SampleGrad(texture2d<float> t, sampler s, float2 texCoord, float2 gradx,
 
 #if USE_HALF
 
-half4 SampleH(Texture2DHalfSampler t, sampler s, float2 texCoord) {
+half4 SampleH(texture2d<half> t, sampler s, float2 texCoord) {
     return t.sample(s, texCoord);
 }
 
-half4 SampleLevelH(Texture2DHalfSampler t, sampler s, float4 texCoordMip) {
+half4 SampleLevelH(texture2d<half> t, sampler s, float4 texCoordMip) {
     return t.sample(s, texCoordMip.xy, level(texCoordMip.w));
 }
 
-half4 SampleBiasH(Texture2DHalfSampler t, sampler s, float4 texCoordBias) {
+half4 SampleBiasH(texture2d<half> t, sampler s, float4 texCoordBias) {
     return t.sample(s, texCoordBias.xy, bias(texCoordBias.w));
 }
 
@@ -176,17 +176,17 @@ float4 SampleLevel(texturecube<float> t, sampler s, float4 texCoordMip) {
     return t.sample(s, texCoordMip.xyz, level(texCoordMip.w));
 }
 
-float4 SampleLevel(texture2d_array<float> t, sampler s, float4 texCoordMip) {
-    return t.sample(s, texCoordMip.xyz, level(texCoordMip.w));
-}
-
 float4 SampleLevel(texture3d<float> t, sampler s, float4 texCoordMip) {
     return t.sample(s, texCoordMip.xyz, level(texCoordMip.w));
 }
 
-float4 SampleLevel(texturecube_array<float> t, sampler s, float4 texCoordMip) {
-    return t.sample(s, texCoordMip.xyz, level(texCoordMip.w));
-}
+// TODO: may need to add to intrinsics
+//float4 SampleLevel(texture2d_array<float> t, sampler s, float4 texCoordMip) {
+//    return t.sample(s, texCoordMip.xyz, level(texCoordMip.w));
+//}
+//float4 SampleLevel(texturecube_array<float> t, sampler s, float4 texCoordMip) {
+//    return t.sample(s, texCoordMip.xyz, level(texCoordMip.w));
+//}
 
 // ----
 
@@ -213,60 +213,56 @@ float4 Load(texture2d_ms<float> t, int2 texCoord, int sample) {
 
 // ----
 
-float4 Sample(texture2d_array<float> t, sampler s, float3 texCoord) {
-    return t.sample(s, texCoord.xy, texCoord.z);
+float4 Sample(texture2d_array<float> t, sampler s, float3 texCoord, int2 offset=0) {
+    return t.sample(s, texCoord.xy, uint(texCoord.z), offset);
 }
-        
-float4 Sample(texture2d<float> t, sampler s, float2 texCoord)
-{
-    return t.sample(s, texCoord);
+float4 Sample(texture2d<float> t, sampler s, float2 texCoord, int2 offset=0) {
+    return t.sample(s, texCoord, offset);
 }
-half4 SampleH(texture2d<half> t, sampler s, float2 texCoord)
-{
-    return t.sample(s, texCoord);
-}
-
-float4 Sample(texture3d<float> t, sampler s, float3 texCoord) {
-    return t.sample(s, texCoord);
+float4 Sample(texture3d<float> t, sampler s, float3 texCoord, int3 offset=0) {
+    return t.sample(s, texCoord, offset);
 }
-
 float4 Sample(texturecube<float> t, sampler s, float3 texCoord) {
     return t.sample(s, texCoord);
 }
+float4 Sample(texturecube_array<float> t, sampler s, float4 texCoord) {
+    return t.sample(s, texCoord.xyz, uint(texCoord.w));
+}
+
 
 // ----
 
-int2 GetDimensions(texture2d t)
+// get_num_mip_levels, get_array_size
+// TODO: need half versions
+int2 GetDimensions(texture2d<float> t)
 {
     int2 size(t.get_width(), t.get_height());
     return size;
 }
 
-int3 GetDimensions(texture3d t)
+int3 GetDimensions(texture3d<float> t)
 {
     int3 size(t.get_width(), t.get_height(), t.get_depth());
     return size;
 }
 
-int2 GetDimensions(texturecube t)
+int2 GetDimensions(texturecube<float> t)
 {
     int2 size(t.get_width(), t.get_width());
     return size;
 }
 
-int2 GetDimensions(texturecube_array t)
+int2 GetDimensions(texturecube_array<float> t)
 {
     // TODO: arrayCount?
     int2 size(t.get_width(), t.get_width());
-    t.GetDimensions(size, size); // sizexsize
     return size;
 }
 
-int2 GetDimensions(texture2d_array t)
+int2 GetDimensions(texture2d_array<float> t)
 {
     // TODO: arrayCount?
     int2 size(t.get_width(), t.get_height());
-    t.GetDimensions(size, size);
     return size;
 }
 

From b6bd88ef6ee3029602cdc6e23ab91c1005502a79 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 9 Mar 2023 21:53:17 -0800
Subject: [PATCH 452/901] kram - hlslparser update project

---
 hlslparser/outshaders/ShaderMSL.h             | 28 +++++++++++++++++--
 .../testshaders.xcodeproj/project.pbxproj     |  8 +++---
 2 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/hlslparser/outshaders/ShaderMSL.h b/hlslparser/outshaders/ShaderMSL.h
index fcc7899b..dfa38f91 100644
--- a/hlslparser/outshaders/ShaderMSL.h
+++ b/hlslparser/outshaders/ShaderMSL.h
@@ -233,6 +233,7 @@ float4 Sample(texturecube_array<float> t, sampler s, float4 texCoord) {
 // ----
 
 // get_num_mip_levels, get_array_size
+// get_width/height/depth(lod)
 // TODO: need half versions
 int2 GetDimensions(texture2d<float> t)
 {
@@ -254,15 +255,38 @@ int2 GetDimensions(texturecube<float> t)
 
 int2 GetDimensions(texturecube_array<float> t)
 {
-    // TODO: arrayCount?
     int2 size(t.get_width(), t.get_width());
     return size;
 }
 
 int2 GetDimensions(texture2d_array<float> t)
 {
-    // TODO: arrayCount?
     int2 size(t.get_width(), t.get_height());
     return size;
 }
 
+// For textures, T can be half, float, short, ushort, int, or uint.
+// For depth texture types, T must be float.
+//
+// texture2d_ms, texture2d_msaa_array
+//
+// depth2d, _ms, _ms_array, _array,
+// depthcube, depthcube_array
+
+// TODO: add sparse_sample options
+// gradientcube, gradient3d, min_lod_clamp(float lod),
+// bias(float value), level(float lod)
+// uint get_num_samples() const
+//
+// can have textures in structs
+// struct Foo {
+// texture2d<float> a [[texture(0)]];
+// depth2d<float> b [[texture(1)]];
+// };
+//
+// can pass into top level, can even nest inside structs
+// part of the argument buffer notation
+//
+// [[kernel]] void
+// my_kernel(Foo f)
+// {…}
diff --git a/hlslparser/testshaders.xcodeproj/project.pbxproj b/hlslparser/testshaders.xcodeproj/project.pbxproj
index 8e6e222e..3efbc60e 100644
--- a/hlslparser/testshaders.xcodeproj/project.pbxproj
+++ b/hlslparser/testshaders.xcodeproj/project.pbxproj
@@ -10,13 +10,13 @@
 		707D37CC29B9797A00B08D22 /* Skinning.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = Skinning.metal; sourceTree = "<group>"; };
 		707D37CD29B9797A00B08D22 /* Skinning.hlsl */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; fileEncoding = 4; path = Skinning.hlsl; sourceTree = "<group>"; };
 		707D37D129B9798600B08D22 /* out */ = {isa = PBXFileReference; lastKnownFileType = folder; path = out; sourceTree = "<group>"; };
-		707D37D529B979EB00B08D22 /* ShaderMSL.h */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; path = ShaderMSL.h; sourceTree = "<group>"; };
-		707D37D629B979EB00B08D22 /* ShaderHLSL.h */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; path = ShaderHLSL.h; sourceTree = "<group>"; };
 		707D37D729B979EB00B08D22 /* Skinning.hlsl */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; path = Skinning.hlsl; sourceTree = "<group>"; };
 		707D37DA29B97A0900B08D22 /* buildShaders.sh */ = {isa = PBXFileReference; lastKnownFileType = text.script.sh; path = buildShaders.sh; sourceTree = "<group>"; };
 		70CAA47E29B9BB0E004B7E7B /* Sample.hlsl */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; fileEncoding = 4; path = Sample.hlsl; sourceTree = "<group>"; };
 		70CAA48029BA7D28004B7E7B /* Sample.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = Sample.metal; sourceTree = "<group>"; };
 		70CAA48129BA7D28004B7E7B /* Sample.hlsl */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; fileEncoding = 4; path = Sample.hlsl; sourceTree = "<group>"; };
+		70CAA48929BAF707004B7E7B /* ShaderHLSL.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = ShaderHLSL.h; sourceTree = "<group>"; };
+		70CAA48A29BAF707004B7E7B /* ShaderMSL.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = ShaderMSL.h; sourceTree = "<group>"; };
 /* End PBXFileReference section */
 
 /* Begin PBXGroup section */
@@ -41,6 +41,8 @@
 		707D37CA29B9797A00B08D22 /* outshaders */ = {
 			isa = PBXGroup;
 			children = (
+				70CAA48929BAF707004B7E7B /* ShaderHLSL.h */,
+				70CAA48A29BAF707004B7E7B /* ShaderMSL.h */,
 				70CAA48129BA7D28004B7E7B /* Sample.hlsl */,
 				70CAA48029BA7D28004B7E7B /* Sample.metal */,
 				707D37CC29B9797A00B08D22 /* Skinning.metal */,
@@ -52,8 +54,6 @@
 		707D37D829B979EB00B08D22 /* shaders */ = {
 			isa = PBXGroup;
 			children = (
-				707D37D529B979EB00B08D22 /* ShaderMSL.h */,
-				707D37D629B979EB00B08D22 /* ShaderHLSL.h */,
 				70CAA47E29B9BB0E004B7E7B /* Sample.hlsl */,
 				707D37D729B979EB00B08D22 /* Skinning.hlsl */,
 			);

From ac6d28fb1f63a252ff4968d061953b1d4623bdf6 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 10 Mar 2023 21:35:52 -0800
Subject: [PATCH 453/901] kram - hlslparser update

Support for parsing more buffer types.  This isn't quite working since would need to move to non-globals (ConstantBuffer instead of cbuffer).   This would help get rid of all of the struct namespace needed in the MSL generation which uses member vars to simulate globals.   Also need to make sure MSL has templated buffers.
---
 hlslparser/outshaders/ShaderHLSL.h |  23 +++++++
 hlslparser/outshaders/ShaderMSL.h  |  11 +++
 hlslparser/shaders/Skinning.hlsl   |  44 ++++++++++--
 hlslparser/src/HLSLGenerator.cpp   |  77 ++++++++++++++++-----
 hlslparser/src/HLSLParser.cpp      | 105 ++++++++++++++++++++++++++---
 hlslparser/src/HLSLTokenizer.cpp   |  18 +++++
 hlslparser/src/HLSLTokenizer.h     |  13 ++++
 hlslparser/src/HLSLTree.cpp        |  63 ++++++++++++-----
 hlslparser/src/HLSLTree.h          |  45 ++++++++-----
 hlslparser/src/MSLGenerator.cpp    |  54 +++++++++++++--
 hlslparser/src/Main.cpp            |   9 +++
 11 files changed, 396 insertions(+), 66 deletions(-)

diff --git a/hlslparser/outshaders/ShaderHLSL.h b/hlslparser/outshaders/ShaderHLSL.h
index ea6d8c49..f63a2ea9 100644
--- a/hlslparser/outshaders/ShaderHLSL.h
+++ b/hlslparser/outshaders/ShaderHLSL.h
@@ -7,6 +7,29 @@
 #pragma once
 #endif
 
+// no &* or ctors in HLSL limited C++
+// This means operators cannot overload [+-*/>><<]=.  Only builtins work.
+
+// HLSL2021 adds bitfields, so could define a color.
+// They say they are on bw compatible with DX12 releases, but spriv backed should warn.
+//struct ColorRGBA {
+//  uint R : 8;
+//  uint G : 8;
+//  uint B : 8;
+//  uint A : 8;
+//};
+
+// DX12 support SM6, DX11 support SM5 and prior.
+// But only latest VS2022 supports SM6.6
+// DXC should be translating calls back to Vulkan
+
+// in HLSL 2021 logical operators can only be used with scalar values.
+// new and/or constructs instead of using &&/||
+// bool3 Cond = and(X, Y);
+// bool3 Cond2 = or(X, Y);
+// int3 Z = select(X, 1, 0);
+
+
 // For float16 operations, denormal numbers must be preserved.
 // No atomic operations for float16 are supported.
 
diff --git a/hlslparser/outshaders/ShaderMSL.h b/hlslparser/outshaders/ShaderMSL.h
index dfa38f91..b41225fe 100644
--- a/hlslparser/outshaders/ShaderMSL.h
+++ b/hlslparser/outshaders/ShaderMSL.h
@@ -290,3 +290,14 @@ int2 GetDimensions(texture2d_array<float> t)
 // [[kernel]] void
 // my_kernel(Foo f)
 // {…}
+
+// handle access specifier RWTexture mods the template arg
+// texture2d<float, access::write> a;
+// on iOS, Writable textures aren’t supported within an argument buffer.
+// 31/96/50000 buffers+textures for A7, A11, A13/Tier2
+// 16/16/2048 samplers
+// 64/128/16 on Tier1 macOS, see above for Tier2 (discrete gpu)
+
+
+    
+
diff --git a/hlslparser/shaders/Skinning.hlsl b/hlslparser/shaders/Skinning.hlsl
index 328e3275..7977c766 100644
--- a/hlslparser/shaders/Skinning.hlsl
+++ b/hlslparser/shaders/Skinning.hlsl
@@ -91,16 +91,45 @@ struct OutputVS
     float   pointSize : PSIZE;
 };
 
-cbuffer Uniforms
+// try to mondernize to ConstantBuffer
+/*
+struct UniformsStruct
 {
-    // should these be float3x4?
     float4x4 skinTfms[256];
     half3    lightDir;
     float4x4 worldToClipTfm;
 };
 
-Texture2D<half4> tex;
-SamplerState samplerClamp;
+ConstantBuffer<UniformsStruct> uniforms : register(b0);
+ 
+// Example
+// uniforms.skinTfms
+ 
+*/
+
+// can have 14x 64K limit to each cbuffer, 128 tbuffers,
+// This show up as globals.  Much pref ConstantBuffer form.
+cbuffer Uniforms : register(b0)
+{
+    float4x4 skinTfms[256];
+    half3    lightDir;
+    float4x4 worldToClipTfm;
+};
+
+
+struct StructuredStruct
+{
+    half3    lightDir;
+    float4x4 worldToClipTfm;
+};
+
+
+// Structured buffers
+// StructuredBuffer<StructuredStruct> bufferTest0 : register(t0);
+// RWStructuredBuffer<StructuredStruct> rwBufferTest0 : register(u0);
+
+Texture2D<half4> tex : register(t1);
+SamplerState samplerClamp : register(s0);
 
 float4x4 DoSkinTfm(float4x4 skinTfms[256], float4 blendWeights, uint4 blendIndices)
 {
@@ -144,9 +173,9 @@ OutputVS SkinningVS(InputVS input,
 
     instanceNum += vertexNum;
     
+    // float4x4 skinTfm = skinTfms[ instanceNum ];
     float4x4 skinTfm = skinTfms[ instanceNum ];
     
-
     // Skin to world space
     float3 position = mul(input.position, skinTfm).xyz;
     half3 normal = half3(mul(float4(input.normal,0.0), skinTfm).xyz);
@@ -159,6 +188,11 @@ OutputVS SkinningVS(InputVS input,
     // DXC
     output.diffuse = dot(lightDir, normal);
 
+    // TODO: test structured buffer
+    // StructuredStruct item = bufferTest0.Load(0);
+    // output.diffuse *= item.lightDir;
+   
+    
     // test the operators
     output.diffuse *= output.diffuse;
     output.diffuse += output.diffuse;
diff --git a/hlslparser/src/HLSLGenerator.cpp b/hlslparser/src/HLSLGenerator.cpp
index 8f04ff25..ecf8d79f 100644
--- a/hlslparser/src/HLSLGenerator.cpp
+++ b/hlslparser/src/HLSLGenerator.cpp
@@ -604,6 +604,23 @@ void HLSLGenerator::OutputAttributes(int indent, HLSLAttribute* attribute)
     }
 }
 
+static const char* BufferTypeToName(HLSLBufferType bufferType)
+{
+    const char* name = "";
+    switch(bufferType)
+    {
+        case HLSLBufferType_CBuffer: name = "cbuffer"; break;
+        case HLSLBufferType_TBuffer: name = "tbuffer"; break;
+            
+        case HLSLBufferType_ConstantBuffer: name = "ConstantBuffer"; break;
+        case HLSLBufferType_StructuredBuffer: name = "StructuredBuffer"; break;
+        case HLSLBufferType_RWStructuredBuffer: name = "RWStructuredBuffer"; break;
+        case HLSLBufferType_ByteAddressBuffer: name = "ByteAddressBuffer"; break;
+        case HLSLBufferType_RWByteAddressBuffer: name = "RWByteAddresssBuffer"; break;
+    }
+    
+    return name;
+}
 void HLSLGenerator::OutputStatements(int indent, HLSLStatement* statement)
 {
     while (statement != NULL)
@@ -653,31 +670,55 @@ void HLSLGenerator::OutputStatements(int indent, HLSLStatement* statement)
             HLSLDeclaration* field = buffer->field;
 
             m_writer.BeginLine(indent, buffer->fileName, buffer->line);
-            m_writer.Write("cbuffer %s", buffer->name);
-            if (buffer->registerName != NULL)
+            
+            if (!buffer->IsGlobalFields())
             {
-                m_writer.Write(" : register(%s)", buffer->registerName);
+                // write out template
+                m_writer.Write("%s<%s> %s",
+                               BufferTypeToName(buffer->bufferType),
+                               buffer->bufferStruct->name,
+                               buffer->name);
+            }
+            else
+            {
+                // not templated
+                m_writer.Write("%s %s",
+                               BufferTypeToName(buffer->bufferType),
+                               buffer->name);
             }
-            m_writer.EndLine(" {");
             
-
-            m_isInsideBuffer = true;
-
-            while (field != NULL)
+            // write out optinal register
+            if (buffer->registerName != NULL)
             {
-                if (!field->hidden)
+                 m_writer.Write(" : register(%s)", buffer->registerName);
+            }
+                         
+            if (buffer->IsGlobalFields())
+            {
+                m_writer.EndLine(" {");
+                m_isInsideBuffer = true;
+                
+                while (field != NULL)
                 {
-                    m_writer.BeginLine(indent + 1, field->fileName, field->line);
-                    OutputDeclaration(field->type, field->name, /*semantic=*/NULL, /*registerName*/field->registerName, field->assignment);
-                    m_writer.Write(";");
-                    m_writer.EndLine();
+                    if (!field->hidden)
+                    {
+                        m_writer.BeginLine(indent + 1, field->fileName, field->line);
+                        OutputDeclaration(field->type, field->name, /*semantic=*/NULL, /*registerName*/field->registerName, field->assignment);
+                        m_writer.Write(";");
+                        m_writer.EndLine();
+                    }
+                    field = (HLSLDeclaration*)field->nextStatement;
                 }
-                field = (HLSLDeclaration*)field->nextStatement;
+                
+                m_isInsideBuffer = false;
+                
+                m_writer.WriteLine(indent, "};");
+            }
+            else
+            {
+                m_writer.Write(";");
+                m_writer.EndLine();
             }
-
-            m_isInsideBuffer = false;
-
-            m_writer.WriteLine(indent, "};");
         }
         else if (statement->nodeType == HLSLNodeType_Function)
         {
diff --git a/hlslparser/src/HLSLParser.cpp b/hlslparser/src/HLSLParser.cpp
index e8c71ce2..021e4f81 100644
--- a/hlslparser/src/HLSLParser.cpp
+++ b/hlslparser/src/HLSLParser.cpp
@@ -173,6 +173,37 @@ bool IsNumericType(HLSLBaseType baseType)
     return IsVectorType(baseType) || IsScalarType(baseType) || IsMatrixType(baseType);
 }
 
+HLSLBufferType ConvertTokenToBufferType(HLSLToken token)
+{
+    HLSLBufferType type = HLSLBufferType_CBuffer;
+    
+    switch(token)
+    {
+        // DX9
+        case HLSLToken_CBuffer:
+            type = HLSLBufferType_CBuffer; break;
+        case HLSLToken_TBuffer:
+            type = HLSLBufferType_TBuffer; break;
+        
+        // DX10
+        case HLSLToken_ConstantBuffer:
+            type = HLSLBufferType_ConstantBuffer; break;
+        case HLSLToken_StructuredBuffer:
+            type = HLSLBufferType_StructuredBuffer; break;
+        case HLSLToken_RWStructuredBuffer:
+            type = HLSLBufferType_RWStructuredBuffer; break;
+        case HLSLToken_ByteAddressBuffer:
+            type = HLSLBufferType_ByteAddressBuffer; break;
+        case HLSLToken_RWByteAddressBuffer:
+            type = HLSLBufferType_RWByteAddressBuffer; break;
+            
+        default:
+            break;
+    }
+    
+    return type;
+}
+
 HLSLBaseType NumericToBaseType(NumericType numericType)
 {
     HLSLBaseType baseType = HLSLBaseType_Unknown;
@@ -1384,6 +1415,9 @@ bool HLSLParser::ParseTopLevel(HLSLStatement*& statement)
     //const char*  typeName = NULL;
     //int          typeFlags = false;
 
+    // TODO: this cast likely isn't safe
+    HLSLToken token = (HLSLToken)m_tokenizer.GetToken();
+    
     bool doesNotExpectSemicolon = false;
 
     // Alec add comment
@@ -1391,8 +1425,7 @@ bool HLSLParser::ParseTopLevel(HLSLStatement*& statement)
     {
         doesNotExpectSemicolon = true;
     }
-    else
-    if (Accept(HLSLToken_Struct))
+    else if (Accept(HLSLToken_Struct))
     {
         // Struct declaration.
 
@@ -1447,6 +1480,52 @@ bool HLSLParser::ParseTopLevel(HLSLStatement*& statement)
 
         statement = structure;
     }
+    else if (Accept(HLSLToken_ConstantBuffer) ||
+             Accept(HLSLToken_StructuredBuffer) ||
+             Accept(HLSLToken_RWStructuredBuffer) ||
+             Accept(HLSLToken_ByteAddressBuffer) ||
+             Accept(HLSLToken_RWByteAddressBuffer))
+    {
+        HLSLBuffer* buffer = m_tree->AddNode<HLSLBuffer>(fileName, line);
+        
+        // these can appear on t or u slots for read vs. read/write
+        // need to track what the user specified.  Load vs. Store calls.
+        buffer->bufferType = ConvertTokenToBufferType(token);
+    
+        // Is template struct type required?
+        if (Expect('<'))
+        {
+            const char* structName = nullptr;
+            
+            // Read the templated type, should reference a struct
+            // don't need to support fields on this.
+            if (!ExpectIdentifier(structName) || !Expect('>'))
+            {
+                return false;
+            }
+           
+            buffer->bufferStruct = FindUserDefinedType(structName);
+            if (!buffer->bufferStruct)
+            {
+                return false;
+            }
+        }
+        
+        // get name of buffer
+        AcceptIdentifier(buffer->name);
+    
+        // Parse ": register(t0/u0)"
+        if (Accept(':'))
+        {
+            if (!Expect(HLSLToken_Register) || !Expect('(') || !ExpectIdentifier(buffer->registerName) || !Expect(')'))
+            {
+                return false;
+            }
+            // TODO: Check that we aren't re-using a register.
+        }
+        
+        statement = buffer;
+    }
     else if (Accept(HLSLToken_CBuffer) || Accept(HLSLToken_TBuffer))
     {
         // cbuffer/tbuffer declaration.
@@ -1454,6 +1533,8 @@ bool HLSLParser::ParseTopLevel(HLSLStatement*& statement)
         HLSLBuffer* buffer = m_tree->AddNode<HLSLBuffer>(fileName, line);
         AcceptIdentifier(buffer->name);
 
+        buffer->bufferType = ConvertTokenToBufferType(token);
+        
         // Optional register assignment.
         if (Accept(':'))
         {
@@ -1752,7 +1833,10 @@ bool HLSLParser::ParseStatement(HLSLStatement*& statement, const HLSLType& retur
     HLSLAttribute * attributes = NULL;
     ParseAttributeBlock(attributes);    // @@ Leak if not assigned to node? 
 
-#if 0 // @@ Work in progress.
+#if 0
+    // @@ Work in progress.
+    // Alec? - @If, @Else blocks, are these like specialization constants?
+/*
     // Static statements: @if only for now.
     if (Accept('@'))
     {
@@ -1790,7 +1874,7 @@ bool HLSLParser::ParseStatement(HLSLStatement*& statement, const HLSLType& retur
             HLSLStatement * ifStatements = NULL;
             HLSLStatement * elseStatements = NULL;
             
-            if (!ParseStatementOrBlock(ifStatements, returnType, /*scoped=*/false))
+            if (!ParseStatementOrBlock(ifStatements, returnType, false))
             {
                 m_disableSemanticValidation = false;
                 return false;
@@ -1799,7 +1883,7 @@ bool HLSLParser::ParseStatement(HLSLStatement*& statement, const HLSLType& retur
             {
                 if (conditionValue) m_disableSemanticValidation = true;
                 
-                if (!ParseStatementOrBlock(elseStatements, returnType, /*scoped=*/false))
+                if (!ParseStatementOrBlock(elseStatements, returnType, false))
                 {
                     m_disableSemanticValidation = false;
                     return false;
@@ -1818,6 +1902,7 @@ bool HLSLParser::ParseStatement(HLSLStatement*& statement, const HLSLType& retur
             m_tokenizer.Error("Syntax error: unexpected token '@'");
         }
     }
+*/
 #endif
     
     // Getting 2 copies of some comments, why is that
@@ -2421,6 +2506,7 @@ bool HLSLParser::ParseTerminalExpression(HLSLExpression*& expression, bool& need
         float fValue = 0.0f;
         int   iValue = 0;
         
+        // literals
         if (AcceptFloat(fValue))
         {
             HLSLLiteralExpression* literalExpression = m_tree->AddNode<HLSLLiteralExpression>(fileName, line);
@@ -2441,7 +2527,7 @@ bool HLSLParser::ParseTerminalExpression(HLSLExpression*& expression, bool& need
 			expression = literalExpression;
 			return true;
 		}
-        else if (AcceptInt(iValue))
+        if (AcceptInt(iValue))
         {
             HLSLLiteralExpression* literalExpression = m_tree->AddNode<HLSLLiteralExpression>(fileName, line);
             literalExpression->type   = HLSLBaseType_Int;
@@ -2451,7 +2537,10 @@ bool HLSLParser::ParseTerminalExpression(HLSLExpression*& expression, bool& need
             expression = literalExpression;
             return true;
         }
-        else if (Accept(HLSLToken_True))
+        // TODO: need uint, u/short
+        
+        // boolean
+        if (Accept(HLSLToken_True))
         {
             HLSLLiteralExpression* literalExpression = m_tree->AddNode<HLSLLiteralExpression>(fileName, line);
             literalExpression->type   = HLSLBaseType_Bool;
@@ -2461,7 +2550,7 @@ bool HLSLParser::ParseTerminalExpression(HLSLExpression*& expression, bool& need
             expression = literalExpression;
             return true;
         }
-        else if (Accept(HLSLToken_False))
+        if (Accept(HLSLToken_False))
         {
             HLSLLiteralExpression* literalExpression = m_tree->AddNode<HLSLLiteralExpression>(fileName, line);
             literalExpression->type   = HLSLBaseType_Bool;
diff --git a/hlslparser/src/HLSLTokenizer.cpp b/hlslparser/src/HLSLTokenizer.cpp
index f21d8212..4e619a90 100644
--- a/hlslparser/src/HLSLTokenizer.cpp
+++ b/hlslparser/src/HLSLTokenizer.cpp
@@ -93,8 +93,18 @@ static const char* _reservedWords[] =
         "false",
         "void",
         "struct",
+        
+        // DX9
         "cbuffer",
         "tbuffer",
+        
+        // DX10 templated types
+        "ConstantBuffer", // indexable cbuffer
+        "StructuredBuffer",
+        "RWStructuredBuffer",
+        "ByteAddressBuffer",
+        "RWByteAddressBuffer",
+        
         "register",
         "return",
         "continue",
@@ -311,6 +321,14 @@ void HLSLTokenizer::Next()
     memcpy(m_identifier, start, length);
     m_identifier[length] = 0;
     
+#if 1 // hack
+    if (strcmp(m_identifier, "ConstantBuffer") == 0)
+    {
+        int bp = 0;
+        bp = bp;
+    }
+#endif
+    
     const int numReservedWords = sizeof(_reservedWords) / sizeof(const char*);
     for (int i = 0; i < numReservedWords; ++i)
     {
diff --git a/hlslparser/src/HLSLTokenizer.h b/hlslparser/src/HLSLTokenizer.h
index cdfe94a2..a27351ba 100644
--- a/hlslparser/src/HLSLTokenizer.h
+++ b/hlslparser/src/HLSLTokenizer.h
@@ -105,8 +105,21 @@ enum HLSLToken
     HLSLToken_False,
     HLSLToken_Void,
     HLSLToken_Struct,
+    
+    // dx9
     HLSLToken_CBuffer,
     HLSLToken_TBuffer,
+    
+    // dx10 templated types (TODO: hook to parser and generator)
+    HLSLToken_ConstantBuffer,
+    HLSLToken_StructuredBuffer,
+    HLSLToken_RWStructuredBuffer,
+    // HLSLToken_AppendStructuredBuffer,
+    // HLSLToken_ConsumeStructuredBuffer,
+    HLSLToken_ByteAddressBuffer,
+    HLSLToken_RWByteAddressBuffer,
+    // RWTexture, and other types
+    
     HLSLToken_Register,
     HLSLToken_Return,
     HLSLToken_Continue,
diff --git a/hlslparser/src/HLSLTree.cpp b/hlslparser/src/HLSLTree.cpp
index 2b8b4d69..b601c7e4 100644
--- a/hlslparser/src/HLSLTree.cpp
+++ b/hlslparser/src/HLSLTree.cpp
@@ -114,18 +114,29 @@ HLSLDeclaration * HLSLTree::FindGlobalDeclaration(const char * name, HLSLBuffer
         else if (statement->nodeType == HLSLNodeType_Buffer)
         {
             HLSLBuffer* buffer = (HLSLBuffer*)statement;
-
-            HLSLDeclaration* field = buffer->field;
-            while (field != NULL)
+            
+            // This searches the fields for the name
+            // since cbuffer/tbuffer represent globals.
+            if (buffer->IsGlobalFields())
             {
-                ASSERT(field->nodeType == HLSLNodeType_Declaration);
-                if (String_Equal(name, field->name))
+                HLSLDeclaration* field = buffer->field;
+                while (field != NULL)
                 {
-                    if (buffer_out) *buffer_out = buffer;
-                    return field;
+                    ASSERT(field->nodeType == HLSLNodeType_Declaration);
+                    if (String_Equal(name, field->name))
+                    {
+                        if (buffer_out) *buffer_out = buffer;
+                        return field;
+                    }
+                    field = (HLSLDeclaration*)field->nextStatement;
                 }
-                field = (HLSLDeclaration*)field->nextStatement;
             }
+            /* hack
+            else
+            {
+                int bp = 0;
+                bp = bp;
+            } */
         }
 
         statement = statement->nextStatement;
@@ -1092,16 +1103,38 @@ void PruneTree(HLSLTree* tree, const char* entryName0, const char* entryName1/*=
         {
             HLSLBuffer* buffer = (HLSLBuffer*)statement;
 
-            HLSLDeclaration* field = buffer->field;
-            while (field != NULL)
+            if (buffer->IsGlobalFields())
+            {
+                // mark buffer visible if any of its fields are used
+                HLSLDeclaration* field = buffer->field;
+                while (field != NULL)
+                {
+                    ASSERT(field->nodeType == HLSLNodeType_Declaration);
+                    if (!field->hidden)
+                    {
+                        buffer->hidden = false;
+                        break;
+                    }
+                    field = (HLSLDeclaration*)field->nextStatement;
+                }
+            }
+            else
             {
-                ASSERT(field->nodeType == HLSLNodeType_Declaration);
-                if (!field->hidden)
+                // TODO: these load from a struct so may just need
+                // to somehow mark this if present.
+                
+                // mark buffer visible if any struct fields are used
+                HLSLStructField* field = buffer->bufferStruct->field;
+                while (field != NULL)
                 {
-                    buffer->hidden = false;
-                    break;
+                    ASSERT(field->nodeType == HLSLNodeType_StructField);
+                    if (!field->hidden)
+                    {
+                        buffer->hidden = false;
+                        break;
+                    }
+                    field = (HLSLStructField*)field->nextField;
                 }
-                field = (HLSLDeclaration*)field->nextStatement;
             }
         }
 
diff --git a/hlslparser/src/HLSLTree.h b/hlslparser/src/HLSLTree.h
index df240573..2a647e9a 100644
--- a/hlslparser/src/HLSLTree.h
+++ b/hlslparser/src/HLSLTree.h
@@ -100,6 +100,9 @@ enum HLSLBaseType
     HLSLBaseType_Ushort3,
     HLSLBaseType_Ushort4,
     
+    // Seems like these should be subtype of HLSLTexture, but
+    // many of the intrinsics require a specific type of texture.
+    // MSL has many more types, included depth vs. regular textures.
     HLSLBaseType_Texture2D,
     HLSLBaseType_Texture3D,
     HLSLBaseType_TextureCube,
@@ -107,23 +110,10 @@ enum HLSLBaseType
     HLSLBaseType_TextureCubeArray,
     HLSLBaseType_Texture2DMS,
     
+    // Only 2 sampler types.
     HLSLBaseType_SamplerState,
     HLSLBaseType_SamplerComparisonState,
     
-/*
-    // texture
-    HLSLBaseType_Texture,
-    
-    // samplers
-    HLSLBaseType_Sampler,           // @@ use type inference to determine sampler type.
-    HLSLBaseType_Sampler2D,
-    HLSLBaseType_Sampler3D,
-    HLSLBaseType_SamplerCube,
-    HLSLBaseType_Sampler2DShadow,
-    HLSLBaseType_Sampler2DMS,
-    HLSLBaseType_Sampler2DArray,
-*/
-    
     HLSLBaseType_UserDefined,       // struct
     HLSLBaseType_Expression,        // type argument for defined() sizeof() and typeof().
     HLSLBaseType_Auto,
@@ -141,6 +131,20 @@ enum HLSLBaseType
     HLSLBaseType_NumericCount = HLSLBaseType_LastNumeric - HLSLBaseType_FirstNumeric + 1
 };
   
+enum HLSLBufferType
+{
+    // DX9
+    HLSLBufferType_CBuffer,
+    HLSLBufferType_TBuffer,
+    
+    // DX10 templated types
+    HLSLBufferType_ConstantBuffer, // indexable
+    HLSLBufferType_StructuredBuffer,
+    HLSLBufferType_RWStructuredBuffer,
+    HLSLBufferType_ByteAddressBuffer,
+    HLSLBufferType_RWByteAddressBuffer
+};
+
 enum HLSLBinaryOp
 {
     // bit ops
@@ -415,7 +419,7 @@ struct HLSLStructField : public HLSLNode
     bool                hidden;
 };
 
-/** A cbuffer or tbuffer declaration. */
+/** Buffer declaration. */
 struct HLSLBuffer : public HLSLStatement
 {
     static const HLSLNodeType s_type = HLSLNodeType_Buffer;
@@ -424,10 +428,21 @@ struct HLSLBuffer : public HLSLStatement
         name            = NULL;
         registerName    = NULL;
         field           = NULL;
+        
+        bufferType      = HLSLBufferType_CBuffer; // TODO: or add Unknown
+        bufferStruct    = NULL;
     }
     const char*         name;
     const char*         registerName;
     HLSLDeclaration*    field;
+    
+    bool IsGlobalFields() const
+    {
+        return  bufferType == HLSLBufferType_CBuffer ||
+                bufferType == HLSLBufferType_TBuffer;
+    }
+    HLSLBufferType      bufferType;
+    const HLSLStruct*   bufferStruct;
 };
 
 
diff --git a/hlslparser/src/MSLGenerator.cpp b/hlslparser/src/MSLGenerator.cpp
index c18aa0af..5a8c51bc 100644
--- a/hlslparser/src/MSLGenerator.cpp
+++ b/hlslparser/src/MSLGenerator.cpp
@@ -48,6 +48,7 @@ namespace M4
             return nextRegister++;
         }
 
+        // skip over the u/b/t register prefix
         while (*registerName && !isdigit(*registerName))
         {
             registerName++;
@@ -58,6 +59,7 @@ namespace M4
             return nextRegister++;
         }
 
+        // parse the number
         int result = atoi(registerName);
 
         if (nextRegister <= result)
@@ -182,6 +184,9 @@ namespace M4
                 
                 HLSLType type(HLSLBaseType_UserDefined);
                 type.addressSpace = HLSLAddressSpace_Constant;
+                
+                // TODO: on cbuffer is a ubo, not tbuffer, or others
+                // TODO: this is having to rename due to globals
                 type.typeName = m_tree->AddStringFormat("%s_ubo", buffer->name);
 
                 int bufferRegister = ParseRegister(buffer->registerName, nextBufferRegister) + m_options.bufferRegisterOffset;
@@ -383,27 +388,53 @@ namespace M4
         m_writer.BeginLine(1);
 
         m_writer.Write("%s(", shaderClassName);
+        
+        // mod
+        m_writer.EndLine();
+        m_writer.BeginLine(1);
+        
         const ClassArgument* currentArg = m_firstClassArgument;
         while (currentArg != NULL)
         {
-            if (currentArg->type.addressSpace == HLSLAddressSpace_Constant)                  m_writer.Write("constant ");
+            if (currentArg->type.addressSpace == HLSLAddressSpace_Constant)              m_writer.Write("constant ");
             else
                 m_writer.Write("thread ");
 
             m_writer.Write("%s & %s", GetTypeName(currentArg->type, /*exactType=*/true), currentArg->name);
 
             currentArg = currentArg->nextArg;
-            if (currentArg) m_writer.Write(", ");
+            if (currentArg)
+            {
+                m_writer.Write(", ");
+                
+                // mod
+                m_writer.EndLine();
+                m_writer.BeginLine(1);
+            }
         }
         m_writer.Write(")");
-
+        
+        // mod
+        m_writer.EndLine(); 
+        m_writer.BeginLine(1);
+        
         currentArg = m_firstClassArgument;
-        if (currentArg) m_writer.Write(" : ");
+        if (currentArg)
+        {
+            m_writer.Write(" : ");
+        }
         while (currentArg != NULL)
         {
             m_writer.Write("%s(%s)", currentArg->name, currentArg->name);
             currentArg = currentArg->nextArg;
-            if (currentArg) m_writer.Write(", ");
+            if (currentArg)
+            {
+                m_writer.Write(", ");
+                
+                // mod
+                m_writer.EndLine();
+                m_writer.BeginLine(1);
+            }
         }
         m_writer.EndLine(" {}");
 
@@ -455,6 +486,10 @@ namespace M4
 
         m_writer.Write(" %s(", entryName);
 
+        // Alec added for readability
+        m_writer.EndLine();
+        m_writer.BeginLine(1);
+        
         int argumentCount = 0;
         HLSLArgument* argument = entryFunction->argument;
         while (argument != NULL)
@@ -476,12 +511,17 @@ namespace M4
                 {
                     m_writer.Write(" [[%s]]", argument->sv_semantic);
                 }
+                
                 argumentCount++;
             }
             argument = argument->nextArgument;
             if (argument && !argument->hidden)
             {
                 m_writer.Write(", ");
+                
+                // Alec added for readability
+                m_writer.EndLine();
+                m_writer.BeginLine(1);
             }
         }
 
@@ -508,6 +548,10 @@ namespace M4
             if (currentArg)
             {
                 m_writer.Write(", ");
+                
+                // Alec added for readability
+                m_writer.EndLine();
+                m_writer.BeginLine(1);
             }
         }
         m_writer.EndLine(") {");
diff --git a/hlslparser/src/Main.cpp b/hlslparser/src/Main.cpp
index b46b8744..1378cdd6 100644
--- a/hlslparser/src/Main.cpp
+++ b/hlslparser/src/Main.cpp
@@ -16,7 +16,16 @@ enum Target
 {
     Target_VertexShader,
     Target_FragmentShader,
+    
     Target_ComputeShader,
+    
+    // none of these are portable to Metal/Android, they have own triangulation
+    //Target_GeometryShader,
+    //Target_HullShader,
+    //Target_ControlShader,
+    
+    // This is compute prior to frag (combined vertex + geo state)
+    // Target_MeshShader,
 };
 
 enum Language

From 63d34e2dca42e6cfcbe0a1135c9132673f037cb3 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 12 Mar 2023 11:54:53 -0700
Subject: [PATCH 454/901] kram - hlslparser update

More cleanup to handle ConstantBuffer instead of cbuffer.  This spills way less global state, but is a SM 6.1 construct.  Left in c/tbuffer for legacy.
Simplify semantic remap on MSL side.
Added buffer base type.
Moved to variable init instead of default dtor.
Remove old texture/sampler handling code that was commented out.

Added a default compute shader, but it's not processed.
---
 hlslparser/outshaders/ShaderHLSL.h            |   4 +
 hlslparser/outshaders/ShaderMSL.h             |  29 +-
 hlslparser/shaders/Compute.hlsl               |  46 ++
 hlslparser/shaders/Sample.hlsl                |  50 +-
 hlslparser/shaders/Skinning.hlsl              |  11 +-
 hlslparser/src/HLSLGenerator.cpp              |  65 +--
 hlslparser/src/HLSLParser.cpp                 | 118 +++--
 hlslparser/src/HLSLParser.h                   |   1 +
 hlslparser/src/HLSLTokenizer.cpp              |  17 +-
 hlslparser/src/HLSLTokenizer.h                |  25 -
 hlslparser/src/HLSLTree.cpp                   |  55 +-
 hlslparser/src/HLSLTree.h                     | 497 ++++++------------
 hlslparser/src/MSLGenerator.cpp               | 303 +++++------
 .../testshaders.xcodeproj/project.pbxproj     |   4 +-
 14 files changed, 558 insertions(+), 667 deletions(-)
 create mode 100644 hlslparser/shaders/Compute.hlsl

diff --git a/hlslparser/outshaders/ShaderHLSL.h b/hlslparser/outshaders/ShaderHLSL.h
index f63a2ea9..2722e526 100644
--- a/hlslparser/outshaders/ShaderHLSL.h
+++ b/hlslparser/outshaders/ShaderHLSL.h
@@ -29,6 +29,10 @@
 // bool3 Cond2 = or(X, Y);
 // int3 Z = select(X, 1, 0);
 
+// RW/ByteAddressBuffer added SM 5.1
+// RW/StructuredBuffer added SM 5.1
+// ConstantBuffer<T> foo[4] : register(b0) template supportes indexing too.
+// added in SM 6.1.  So targeting DX12 6.2 seems ideal with half support.
 
 // For float16 operations, denormal numbers must be preserved.
 // No atomic operations for float16 are supported.
diff --git a/hlslparser/outshaders/ShaderMSL.h b/hlslparser/outshaders/ShaderMSL.h
index b41225fe..dee3b48e 100644
--- a/hlslparser/outshaders/ShaderMSL.h
+++ b/hlslparser/outshaders/ShaderMSL.h
@@ -26,23 +26,28 @@
     enum _name : _type
 #endif
 
+// This isn't standard enum convention where enum starts with enum name
+// but this allows semantic passthrough of parser.
 typedef NS_ENUM(int32_t, VA) {
-    VAPosition = 0,
-    VATexcoord = 1,
+    POSITION = 0,
     
-    VANormal = 2,
-    VATangent = 3,
-    VABitangent = 4,
+    NORMAL = 2,
+    TANGENT = 3,
+    BITANGENT = 4,
     
-    VABlendIndices = 5,
-    VABlendWeight = 6,
+    BLENDINDICES = 5,
+    BLENDWEIGHT = 6,
     
-    VAColor0 = 7,
+    COLOR0 = 7,
     
-    VATexcoord0 = 8,
-    VATexcoord1 = 9,
-    VATexcoord2 = 10,
-    VATexcoord3 = 11,
+    TEXCOORD0 = 8,
+    TEXCOORD1 = 9,
+    TEXCOORD2 = 10,
+    TEXCOORD3 = 11,
+    TEXCOORD4 = 12,
+    TEXCOORD5 = 13,
+    TEXCOORD6 = 14,
+    TEXCOORD7 = 15,
 };
 
 // May want to only do using in the .metal files themselvs.
diff --git a/hlslparser/shaders/Compute.hlsl b/hlslparser/shaders/Compute.hlsl
new file mode 100644
index 00000000..aedcac4a
--- /dev/null
+++ b/hlslparser/shaders/Compute.hlsl
@@ -0,0 +1,46 @@
+//--------------------------------------------------------------------------------------
+// File: Compute.hlsl
+//
+// This file contains the Compute Shader to perform array A + array B
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+//--------------------------------------------------------------------------------------
+
+// adapted from https://learn.microsoft.com/en-us/windows/win32/direct3d11/direct3d-11-advanced-stages-compute-create
+
+struct BufType
+{
+    int i;
+    float f;
+};
+
+StructuredBuffer<BufType> Buffer0 : register(t0);
+StructuredBuffer<BufType> Buffer1 : register(t1);
+RWStructuredBuffer<BufType> BufferOut : register(u0);
+
+[numthreads(1, 1, 1)]
+void StructuredBufferCS( uint3 DTid : SV_DispatchThreadID )
+{
+    BufferOut[DTid.x].i = Buffer0[DTid.x].i + Buffer1[DTid.x].i;
+    BufferOut[DTid.x].f = Buffer0[DTid.x].f + Buffer1[DTid.x].f;
+}
+
+//-------------------
+
+// Can't this have type?
+// ByteAddressBuffer Buffer0 : register(t0);
+// ByteAddressBuffer Buffer1 : register(t1);
+// RWByteAddressBuffer BufferOut : register(u0);
+//
+// [numthreads(1, 1, 1)]
+// void ByteBufferCS( uint3 DTid : SV_DispatchThreadID )
+// {
+//     int i0 = asint( Buffer0.Load( DTid.x*8 ) );
+//     float f0 = asfloat( Buffer0.Load( DTid.x*8+4 ) );
+//     int i1 = asint( Buffer1.Load( DTid.x*8 ) );
+//     float f1 = asfloat( Buffer1.Load( DTid.x*8+4 ) );
+//
+//     BufferOut.Store( DTid.x*8, asuint(i0 + i1) );
+//     BufferOut.Store( DTid.x*8+4, asuint(f0 + f1) );
+// }
+
diff --git a/hlslparser/shaders/Sample.hlsl b/hlslparser/shaders/Sample.hlsl
index 3fe9733f..7d587c53 100644
--- a/hlslparser/shaders/Sample.hlsl
+++ b/hlslparser/shaders/Sample.hlsl
@@ -28,19 +28,33 @@ struct LightState
     float3 direction;
     float4 color;
     float4 falloff;
-    float4x4 view;
-    float4x4 projection;
+    float4x4 viewProj;
 };
 
+/*
 cbuffer SceneConstantBuffer : register(b0)
 {
     float4x4 model;
-    float4x4 view;
-    float4x4 projection;
+    float4x4 viewProj;
     float4 ambientColor;
     bool sampleShadowMap;
     LightState lights[NUM_LIGHTS];
 };
+*/
+
+struct SceneConstantBuffer
+{
+    float4x4 model;
+    float4x4 viewProj;
+    float4 ambientColor;
+    bool sampleShadowMap;
+    LightState lights[3];
+};
+// TODO: NUM_LIGHTS isn't unhidden when parsing structs
+// LightState lights[NUM_LIGHTS];
+
+// SM 6.1
+ConstantBuffer<SceneConstantBuffer> scene : register(b0);
 
 struct InputVS
 {
@@ -59,7 +73,6 @@ struct OutputVS
     float3 tangent : TANGENT;
 };
 
-
 struct InputPS
 {
     float4 position : SV_Position;
@@ -117,13 +130,12 @@ float4 CalcLightingColor(float3 vLightPos, float3 vLightDir, float4 vLightColor,
 //--------------------------------------------------------------------------------------
 // Test how much pixel is in shadow, using 2x2 percentage-closer filtering.
 //--------------------------------------------------------------------------------------
-float4 CalcUnshadowedAmountPCF2x2(int lightIndex, float4 vPosWorld)
+float4 CalcUnshadowedAmountPCF2x2(int lightIndex, float4 vPosWorld, float4x4 viewProj)
 {
     // Compute pixel position in light space.
     float4 vLightSpacePos = vPosWorld;
-    vLightSpacePos = mul(vLightSpacePos, lights[lightIndex].view);
-    vLightSpacePos = mul(vLightSpacePos, lights[lightIndex].projection);
-
+    vLightSpacePos = mul(vLightSpacePos, viewProj);
+    
     vLightSpacePos.xyz /= vLightSpacePos.w;
 
     // Translate from homogeneous coords to texture coords.
@@ -159,16 +171,17 @@ OutputVS SampleVS(InputVS input)
 
     float4 newPosition = float4(input.position, 1.0);
 
-    input.normal.z *= -1.0;
-    newPosition = mul(newPosition, model);
+    input.normal.z *= -1.0; // why negated?
+    newPosition = mul(newPosition, scene.model);
 
     output.worldpos = newPosition;
 
-    newPosition = mul(newPosition, view);
-    newPosition = mul(newPosition, projection);
-
+    newPosition = mul(newPosition, scene.viewProj);
+   
     output.position = newPosition;
     output.uv = input.uv;
+    
+    // TODO: need transformed to world space too?
     output.normal = input.normal;
     output.tangent = input.tangent;
 
@@ -179,14 +192,15 @@ float4 SamplePS(InputPS input) : SV_Target0
 {
     float4 diffuseColor = Sample(diffuseMap, sampleWrap, input.uv);
     float3 pixelNormal = CalcPerPixelNormal(input.uv, input.normal, input.tangent);
-    float4 totalLight = ambientColor;
+    float4 totalLight = scene.ambientColor;
 
     for (int i = 0; i < NUM_LIGHTS; i++)
     {
-        float4 lightPass = CalcLightingColor(lights[i].position, lights[i].direction, lights[i].color, lights[i].falloff, input.worldpos.xyz, pixelNormal);
-        if (sampleShadowMap && i == 0)
+        LightState light = scene.lights[i];
+        float4 lightPass = CalcLightingColor(light.position, light.direction, light.color, light.falloff, input.worldpos.xyz, pixelNormal);
+        if (scene.sampleShadowMap && i == 0)
         {
-            lightPass *= CalcUnshadowedAmountPCF2x2(i, input.worldpos);
+            lightPass *= CalcUnshadowedAmountPCF2x2(i, input.worldpos, light.viewProj);
         }
         totalLight += lightPass;
     }
diff --git a/hlslparser/shaders/Skinning.hlsl b/hlslparser/shaders/Skinning.hlsl
index 7977c766..096605d1 100644
--- a/hlslparser/shaders/Skinning.hlsl
+++ b/hlslparser/shaders/Skinning.hlsl
@@ -41,11 +41,12 @@
 // StructuredBuffer<Struct> ssbo;
 
 // No u/int8_t or u/char in HLSL.
+// There is int64_t/uint46_t in MSL.
 // D3DCOLORtoUBYTE4: Decodes a D3DCOLOR packed DWORD to a float4.
 // Note the swizzle, and I don't want an int4.  I need to encode.
 // This is achieved by performing int4(input.zyxw * 255.002) using SPIR-V OpVectorShuffle, OpVectorTimesScalar, and OpConvertFToS, respectively.
 
-// have uint16_t/int16_t support in 6.2.  Need to add as type into parser.
+// Have uint16_t/int16_t support in 6.2.
 //
 // cbuffer are std140, and ssbo are std430 arrangment.  Affects arrays.
 // or -fvk-use-dx-layout vs. -fvk-use-gl-layout vs. -fvk-use-scalar-layout.
@@ -54,7 +55,9 @@
 // They can be enabled by -fvk-use-scalar-layout.
 // see table.  Vulkan can't use DX layout yet.
 //
-// cbuffer vs. ConstantBuffer<T> myCBuffer;
+// This is 6.1 change so constants can be array indexed
+// And it also reduces the quantity of globals throughout and ties to MSL better.
+// cbuffer vs. ConstantBuffer<T> myCBuffer[10];
 
 // struct VSInput {
 //   [[vk::location(0)]] float4 pos  : POSITION;
@@ -145,8 +148,8 @@ float4x4 DoSkinTfm(float4x4 skinTfms[256], float4 blendWeights, uint4 blendIndic
 
 // TODO: These don't compile for spv despite setting extension
 //  don't know what semantic to set?
-// uint vertexBase : BASE_VERTEX,
-// uint instanceBase : BASE_INSTANCE,
+// uint vertexBase : BASEVERTEX,
+// uint instanceBase : BASEINSTANCE,
 
 // TODO: fix ability to comment these out inside SkinningVS inputs
     
diff --git a/hlslparser/src/HLSLGenerator.cpp b/hlslparser/src/HLSLGenerator.cpp
index ecf8d79f..d9f1d195 100644
--- a/hlslparser/src/HLSLGenerator.cpp
+++ b/hlslparser/src/HLSLGenerator.cpp
@@ -27,8 +27,6 @@ static const char* GetTypeName(const HLSLType& type)
     case HLSLBaseType_SamplerState:              return "SamplerState";
     case HLSLBaseType_SamplerComparisonState:    return "SamplerComparisonState";
         
-    //case HLSLBaseType_Texture:      return "texture";
-            
     case HLSLBaseType_Texture2D:         return "Texture2D";
     case HLSLBaseType_Texture2DArray:    return "Texture2DArray";
     case HLSLBaseType_Texture3D:         return "Texture3D";
@@ -100,26 +98,16 @@ HLSLGenerator::HLSLGenerator()
 // - Semantics associated input arguments or fields of the input arguments -> input semantic replacement.
 static const char * TranslateSemantic(const char* semantic, bool output, HLSLGenerator::Target target)
 {
+    // Note: these are all just passthrough of the DX10 semantics
+    // except for BASEVERTEX/INSTANCE which doesn't seem to dxc compile.
+    
     if (target == HLSLGenerator::Target_VertexShader)
     {
         if (output) 
         {
-            if (String_Equal("SV_Position", semantic))
-                return "SV_Position";
-            
-            // This is the syntax, I guess DX12 still honors PSIZE?
-            // "[[vk::builtin(\"PointSize\")]] float name : PSIZE;
-            
-            // Vulkan/MSL only
-            if (String_Equal("PSIZE", semantic))
-                return "PSIZE";
+
         }
         else {
-            if (String_Equal("SV_InstanceID", semantic))
-                return "SV_InstanceID";
-            if (String_Equal("SV_VertexID", semantic))
-                return "SV_VertexID";
-            
             // see here for sample of builtin notation
             // https://github.com/microsoft/DirectXShaderCompiler/commit/b6fe9886ad
             
@@ -127,9 +115,9 @@ static const char * TranslateSemantic(const char* semantic, bool output, HLSLGen
             // [[vk::builtin(\"BaseVertex\")]] uint baseVertex :
             // [[vk::builtin(\"BaseInstance\")]] uint instance : SV_BaseInstance
             
-            if (String_Equal(semantic, "BASE_VERTEX"))
+            if (String_Equal(semantic, "BASEVERTEX"))
                 return "BaseVertex";  // vulkan only
-            if (String_Equal(semantic, "BASE_INSTANCE"))
+            if (String_Equal(semantic, "BASEINSTANCE"))
                 return "BaseInstance";  // vulkan only
         }
     }
@@ -137,33 +125,11 @@ static const char * TranslateSemantic(const char* semantic, bool output, HLSLGen
     {
         if (output)
         {
-            if (String_Equal("SV_Depth", semantic))
-                return "SV_Depth";
-            if (String_Equal("SV_DepthGreaterEqual", semantic))
-                return "SV_DepthGreaterEqual";
-            if (String_Equal("SV_DepthLessEqual", semantic))
-                return "SV_DepthLessEqual";
-            
-            
-            if (String_Equal("SV_Target", semantic))
-                return "SV_Target";
-            if (String_Equal("SV_Target0", semantic))
-                return "SV_Target0";
-            // dual source blending ?
-            //if (String_Equal("COLOR0_1", semantic))       return "SV_Target1";
-            if (String_Equal("SV_Target1", semantic))
-                return "SV_Target1";
-            if (String_Equal("SV_Target2", semantic))
-                return "SV_Target2";
-            if (String_Equal("SV_Target3", semantic))
-                return "SV_Target3";
+
         }
         else
         {
-            if (String_Equal("SV_Position", semantic))
-                return "SV_Position";
-            if (String_Equal("SV_IsFrontFace", semantic))
-                return "SV_IsFrontFace";    // bool   @@ Should we do type replacement too?
+
         }
     }
     return NULL;
@@ -414,13 +380,14 @@ void HLSLGenerator::OutputExpression(HLSLExpression* expression)
         case HLSLBaseType_Int:
             m_writer.Write("%d", literalExpression->iValue);
             break;
-        // TODO: missing uint?
+        // TODO: missing uint, u/short, double
                 
         case HLSLBaseType_Bool:
             m_writer.Write("%s", literalExpression->bValue ? "true" : "false");
             break;
         default:
-            ASSERT(0);
+            Error("Unhandled literal");
+            //ASSERT(false);
         }
     }
     else if (expression->nodeType == HLSLNodeType_UnaryExpression)
@@ -492,7 +459,8 @@ void HLSLGenerator::OutputExpression(HLSLExpression* expression)
         case HLSLBinaryOp_BitOr:        op = " | "; break;
         case HLSLBinaryOp_BitXor:       op = " ^ "; break;
         default:
-            ASSERT(0);
+            Error("Unhandled binary op");
+            //ASSERT(false);
         }
         m_writer.Write("%s", op);
         OutputExpression(binaryExpression->expression2);
@@ -836,7 +804,8 @@ void HLSLGenerator::OutputStatements(int indent, HLSLStatement* statement)
         else
         {
             // Unhanded statement type.
-            ASSERT(0);
+            Error("Unhandled statemement");
+            //ASSERT(false);
         }
 
         statement = statement->nextStatement;
@@ -959,7 +928,9 @@ void HLSLGenerator::OutputDeclarationType(const HLSLType& type)
 {
     const char* typeName = GetTypeName(type);
     
-    /*
+    /* TODO: remove, the textures now caart the type,
+       might need special flag for depth for MSL side, ignore in HLSL
+     
     if (type.baseType == HLSLBaseType_Sampler2D)
     {
         if (type.textureType == HLSLBaseType_Half
diff --git a/hlslparser/src/HLSLParser.cpp b/hlslparser/src/HLSLParser.cpp
index 021e4f81..b0f59a85 100644
--- a/hlslparser/src/HLSLParser.cpp
+++ b/hlslparser/src/HLSLParser.cpp
@@ -41,6 +41,7 @@ enum CoreType
     CoreType_Void,
     CoreType_Expression,
     CoreType_Comment,
+    CoreType_Buffer,
     
     CoreType_Count // must be last
 };
@@ -106,6 +107,12 @@ bool IsTextureType(HLSLBaseType baseType)
     return baseTypeDescriptions[baseType].coreType == CoreType_Texture;
 }
 
+bool IsBufferType(HLSLBaseType baseType)
+{
+    return baseTypeDescriptions[baseType].coreType == CoreType_Buffer;
+}
+
+
 bool IsCoreTypeEqual(HLSLBaseType lhsType, HLSLBaseType rhsType)
 {
     return baseTypeDescriptions[lhsType].coreType ==
@@ -952,12 +959,17 @@ const BaseTypeDescription baseTypeDescriptions[HLSLBaseType_Count] =
         { "TextureCubeArray",     CoreType_Texture, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_TextureCubeArray
         { "Texture2DMS",        CoreType_Texture, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Texture2DMS
         
+        
         { "SamplerState",            CoreType_Sampler, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Sampler
         { "SamplerComparisonState",  CoreType_Sampler, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_SamplerComparisonState
        
         { "struct",             CoreType_Struct, DimensionType_None, NumericType_NaN,         1, 0, 0, -1 },      // HLSLBaseType_UserDefined
+        
+        // These aren't real HLSL types
         { "expression",         CoreType_Expression, DimensionType_None, NumericType_NaN,     1, 0, 0, -1 },       // HLSLBaseType_Expression
         { "comment",            CoreType_Comment, DimensionType_None, NumericType_NaN,         1, 0, 0, -1 },       // HLSLBaseType_Comment
+        
+        { "buffer",        CoreType_Buffer, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Buffer
     };
 
 // IC: I'm not sure this table is right, but any errors should be caught by the backend compiler.
@@ -1070,7 +1082,7 @@ static const char* GetBinaryOpName(HLSLBinaryOp binaryOp)
     case HLSLBinaryOp_MulAssign:    return "*=";
     case HLSLBinaryOp_DivAssign:    return "/=";
     default:
-        ASSERT(0);
+        ASSERT(false);
         return "???";
     }
 }
@@ -1504,7 +1516,7 @@ bool HLSLParser::ParseTopLevel(HLSLStatement*& statement)
                 return false;
             }
            
-            buffer->bufferStruct = FindUserDefinedType(structName);
+            buffer->bufferStruct = const_cast<HLSLStruct*>(FindUserDefinedType(structName));
             if (!buffer->bufferStruct)
             {
                 return false;
@@ -1524,6 +1536,15 @@ bool HLSLParser::ParseTopLevel(HLSLStatement*& statement)
             // TODO: Check that we aren't re-using a register.
         }
         
+        // Buffer needs to show up to reference the fields
+        // of the struct of the templated type.
+        HLSLType type(HLSLBaseType_UserDefined);
+        type.typeName = buffer->bufferStruct->name; // this is for userDefined name (f.e. struct)
+        
+        DeclareVariable( buffer->name, type );
+       
+        // TODO: add fields as variables too?
+        
         statement = buffer;
     }
     else if (Accept(HLSLToken_CBuffer) || Accept(HLSLToken_TBuffer))
@@ -1545,7 +1566,10 @@ bool HLSLParser::ParseTopLevel(HLSLStatement*& statement)
             // TODO: Check that we aren't re-using a register.
         }
 
-        // Fields.
+        // Fields are defined inside the c/tbuffer.
+        // These represent globals to the rest of the codebase which
+        // is simply evil.
+        
         if (!Expect('{'))
         {
             return false;
@@ -1570,6 +1594,8 @@ bool HLSLParser::ParseTopLevel(HLSLStatement*& statement)
                 m_tokenizer.Error("Expected variable declaration");
                 return false;
             }
+            
+            // These show up as global variables of the fields
             DeclareVariable( field->name, field->type );
             
             // chain fields onto buffer
@@ -2580,7 +2606,7 @@ bool HLSLParser::ParseTerminalExpression(HLSLExpression*& expression, bool& need
             }
 
             bool undeclaredIdentifier = false;
-
+ 
             const HLSLType* identifierType = FindVariable(identifierExpression->name, identifierExpression->global);
             if (identifierType != NULL)
             {
@@ -2591,6 +2617,7 @@ bool HLSLParser::ParseTerminalExpression(HLSLExpression*& expression, bool& need
                 if (GetIsFunction(identifierExpression->name))
                 {
                     // Functions are always global scope.
+                    // TODO: what about member functions?
                     identifierExpression->global = true;
                 }
                 else
@@ -2648,7 +2675,8 @@ bool HLSLParser::ParseTerminalExpression(HLSLExpression*& expression, bool& need
             {
                 return false;
             }
-            if (!GetMemberType( expression->expressionType, memberAccess))
+            
+            if (!GetMemberType(expression->expressionType, memberAccess))
             {
                 m_tokenizer.Error("Couldn't access '%s'", memberAccess->field);
                 return false;
@@ -2849,7 +2877,9 @@ bool HLSLParser::ParseArgumentList(HLSLArgument*& firstArgument, int& numArgumen
 
         HLSLArgument* argument = m_tree->AddNode<HLSLArgument>(fileName, line);
 
+        // what is unifor modifier ?
         if (Accept(HLSLToken_Uniform))     { argument->modifier = HLSLArgumentModifier_Uniform; }
+        
         else if (Accept(HLSLToken_In))     { argument->modifier = HLSLArgumentModifier_In;      }
         else if (Accept(HLSLToken_Out))    { argument->modifier = HLSLArgumentModifier_Out;     }
         else if (Accept(HLSLToken_InOut))  { argument->modifier = HLSLArgumentModifier_Inout;   }
@@ -3554,6 +3584,9 @@ bool HLSLParser::AcceptType(bool allowVoid, HLSLType& type/*, bool acceptFlags*/
 
     if (token == HLSLToken_Comment)
     {
+        // TODO: should this advance the tokenizer?
+        // m_tokenizer.Next();
+        
         type.baseType = HLSLBaseType_Comment;
         return true;
     }
@@ -3684,7 +3717,7 @@ bool HLSLParser::AcceptType(bool allowVoid, HLSLType& type/*, bool acceptFlags*/
         type.baseType = HLSLBaseType_Short4;
         break;
             
-    // Textures
+    // Textures (TODO: could have baseType be texture, with subtype like buffer)
     case HLSLToken_Texture2D:
         type.baseType = HLSLBaseType_Texture2D;
         break;
@@ -3704,40 +3737,26 @@ bool HLSLParser::AcceptType(bool allowVoid, HLSLType& type/*, bool acceptFlags*/
         type.baseType = HLSLBaseType_TextureCubeArray;
         break;
             
-    //case HLSLToken_Texture:
-    //    type.baseType = HLSLBaseType_Texture;
-    //    break;
-      
     case HLSLToken_SamplerState:
         type.baseType = HLSLBaseType_SamplerState;
         break;
     case HLSLToken_SamplerComparisonState:
         type.baseType = HLSLBaseType_SamplerComparisonState;
         break;
-    /*
-    // Samplers
-    case HLSLToken_Sampler:
-        type.baseType = HLSLBaseType_Sampler2D;  // @@ IC: For now we assume that generic samplers are always sampler2D
-        break;
-    case HLSLToken_Sampler2D:
-        type.baseType = HLSLBaseType_Sampler2D;
-        break;
-    case HLSLToken_Sampler3D:
-        type.baseType = HLSLBaseType_Sampler3D;
-        break;
-    case HLSLToken_SamplerCube:
-        type.baseType = HLSLBaseType_SamplerCube;
-        break;
-    case HLSLToken_Sampler2DShadow:
-        type.baseType = HLSLBaseType_Sampler2DShadow;
-        break;
-    case HLSLToken_Sampler2DMS:
-        type.baseType = HLSLBaseType_Sampler2DMS;
+            
+    case HLSLToken_CBuffer:
+    case HLSLToken_TBuffer:
+        // might make these BufferGlobals?
+        type.baseType = HLSLBaseType_Buffer;
         break;
-    case HLSLToken_Sampler2DArray:
-        type.baseType = HLSLBaseType_Sampler2DArray;
+            
+    case HLSLToken_StructuredBuffer:
+    case HLSLToken_RWStructuredBuffer:
+    case HLSLToken_ByteAddressBuffer:
+    case HLSLToken_RWByteAddressBuffer:
+    case HLSLToken_ConstantBuffer:
+        type.baseType = HLSLBaseType_Buffer;
         break;
-    */
     }
     if (type.baseType != HLSLBaseType_Void)
     {
@@ -3786,6 +3805,7 @@ bool HLSLParser::AcceptType(bool allowVoid, HLSLType& type/*, bool acceptFlags*/
         if (FindUserDefinedType(identifier) != NULL)
         {
             m_tokenizer.Next();
+            
             type.baseType = HLSLBaseType_UserDefined;
             type.typeName = identifier;
             return true;
@@ -4066,11 +4086,20 @@ const HLSLFunction* HLSLParser::MatchFunctionCall(const HLSLFunctionCall* functi
     return matchedFunction;
 }
 
+inline bool IsSwizzle(char c)
+{
+    return c == 'x' || c == 'y' || c == 'z' || c ==  'w' ||
+           c == 'r' || c == 'g' || c == 'b' || c ==  'a';
+}
+
 bool HLSLParser::GetMemberType(const HLSLType& objectType, HLSLMemberAccess * memberAccess)
 {
     const char* fieldName = memberAccess->field;
 
-    if (objectType.baseType == HLSLBaseType_UserDefined)
+    HLSLBaseType baseType = objectType.baseType;
+
+    // pull field from struct
+    if (baseType == HLSLBaseType_UserDefined)
     {
         const HLSLStruct* structure = FindUserDefinedType( objectType.typeName );
         ASSERT(structure != NULL);
@@ -4097,13 +4126,12 @@ bool HLSLParser::GetMemberType(const HLSLType& objectType, HLSLMemberAccess * me
 
     int swizzleLength = 0;
 
-    if (baseTypeDescriptions[objectType.baseType].numDimensions <= 1)
+    if (IsScalarType(baseType) || IsVectorType(baseType))
     {
         // Check for a swizzle on the scalar/vector types.
         for (int i = 0; fieldName[i] != 0; ++i)
         {
-            if (fieldName[i] != 'x' && fieldName[i] != 'y' && fieldName[i] != 'z' && fieldName[i] != 'w' &&
-                fieldName[i] != 'r' && fieldName[i] != 'g' && fieldName[i] != 'b' && fieldName[i] != 'a')
+            if (!IsSwizzle(fieldName[i]))
             {
                 m_tokenizer.Error("Invalid swizzle '%s'", fieldName);
                 return false;
@@ -4112,7 +4140,7 @@ bool HLSLParser::GetMemberType(const HLSLType& objectType, HLSLMemberAccess * me
         }
         ASSERT(swizzleLength > 0);
     }
-    else
+    else if (IsMatrixType(baseType))
     {
 
         // Check for a matrix element access (e.g. _m00 or _11)
@@ -4129,14 +4157,20 @@ bool HLSLParser::GetMemberType(const HLSLType& objectType, HLSLMemberAccess * me
             }
             if (!isdigit(n[0]) || !isdigit(n[1]))
             {
+                m_tokenizer.Error("Invalid matrix digit");
                 return false;
             }
 
             int r = (n[0] - '0') - base;
             int c = (n[1] - '0') - base;
-            if (r >= baseTypeDescriptions[objectType.baseType].height ||
-                c >= baseTypeDescriptions[objectType.baseType].numComponents)
+            if (r >= baseTypeDescriptions[objectType.baseType].height)
+            {
+                m_tokenizer.Error("Invalid matrix dimension %d", r);
+                return false;
+            }
+            if (c >= baseTypeDescriptions[objectType.baseType].numComponents)
             {
+                m_tokenizer.Error("Invalid matrix dimension %d", c);
                 return false;
             }
             ++swizzleLength;
@@ -4150,6 +4184,10 @@ bool HLSLParser::GetMemberType(const HLSLType& objectType, HLSLMemberAccess * me
         }
 
     }
+    else
+    {
+        return false;
+    }
 
     if (swizzleLength > 4)
     {
@@ -4182,7 +4220,7 @@ bool HLSLParser::GetMemberType(const HLSLType& objectType, HLSLMemberAccess * me
             break;
     // TODO: double, u/char
     default:
-        ASSERT(0);
+        ASSERT(false);
     }
 
     memberAccess->swizzle = true;
diff --git a/hlslparser/src/HLSLParser.h b/hlslparser/src/HLSLParser.h
index 5fdcc116..c31b103d 100644
--- a/hlslparser/src/HLSLParser.h
+++ b/hlslparser/src/HLSLParser.h
@@ -168,6 +168,7 @@ extern bool IsMatrixType(HLSLBaseType baseType);
 extern bool IsVectorType(HLSLBaseType baseType);
 extern bool IsScalarType(HLSLBaseType baseType);
 extern bool IsTextureType(HLSLBaseType baseType);
+extern bool IsBufferType(HLSLBaseType baseType);
 extern bool IsNumericType(HLSLBaseType baseType);
 
 extern bool IsCoreTypeEqual(HLSLBaseType lhsType, HLSLBaseType rhsType);
diff --git a/hlslparser/src/HLSLTokenizer.cpp b/hlslparser/src/HLSLTokenizer.cpp
index 4e619a90..79ba3cae 100644
--- a/hlslparser/src/HLSLTokenizer.cpp
+++ b/hlslparser/src/HLSLTokenizer.cpp
@@ -321,14 +321,6 @@ void HLSLTokenizer::Next()
     memcpy(m_identifier, start, length);
     m_identifier[length] = 0;
     
-#if 1 // hack
-    if (strcmp(m_identifier, "ConstantBuffer") == 0)
-    {
-        int bp = 0;
-        bp = bp;
-    }
-#endif
-    
     const int numReservedWords = sizeof(_reservedWords) / sizeof(const char*);
     for (int i = 0; i < numReservedWords; ++i)
     {
@@ -643,18 +635,13 @@ void HLSLTokenizer::Error(const char* format, ...)
         return;
     }
     m_error = true;
-
-
-    char buffer[1024];
+    
+    char buffer[4096]; // TODO: use dynamic string, sucks to cutoff logs
     va_list args;
     va_start(args, format);
     /*int result =*/ vsnprintf(buffer, sizeof(buffer) - 1, format, args);
     va_end(args);
 
-    // Make running in a build step does generate a clickable error/warning.
-    // But an app spitting out to Xcode console is not clickable.
-    
-    //Log_Error("%s(%d): error %s\n", m_fileName, m_lineNumber, buffer);
     Log_Error("%s(%d): %s\n", m_fileName, m_lineNumber, buffer);
 } 
 
diff --git a/hlslparser/src/HLSLTokenizer.h b/hlslparser/src/HLSLTokenizer.h
index a27351ba..dcc4a88d 100644
--- a/hlslparser/src/HLSLTokenizer.h
+++ b/hlslparser/src/HLSLTokenizer.h
@@ -70,31 +70,6 @@ enum HLSLToken
     HLSLToken_SamplerState,
     HLSLToken_SamplerComparisonState,
     
-    // these are all the texture ops
-    // do these need tokenized?
-    /*
-    HLSLToken_Sample,
-    HLSLToken_SampleLevel,
-    HLSLToken_SampleCmp,
-    HLSLToken_SampleCmpLevelZero,
-    HLSLToken_SampleCmpGrad,
-    HLSLToken_SampleBias,
-    HLSLToken_GatherRed,
-    HLSLToken_GatherGreen,
-    HLSLToken_GatherBlue,
-    HLSLToken_GatherAlpha, // + GatherCmdRed/Green/Blue/Alpha
-    HLSLToken_GetDimensions,
-    */
-    
-//    HLSLToken_Texture,
-//    HLSLToken_Sampler,
-//    HLSLToken_Sampler2D,
-//    HLSLToken_Sampler3D,
-//    HLSLToken_SamplerCube,
-//    HLSLToken_Sampler2DShadow,
-//    HLSLToken_Sampler2DMS,
-//    HLSLToken_Sampler2DArray,
-
     // Reserved words.
     HLSLToken_If,
     HLSLToken_Else,
diff --git a/hlslparser/src/HLSLTree.cpp b/hlslparser/src/HLSLTree.cpp
index b601c7e4..3cf2293a 100644
--- a/hlslparser/src/HLSLTree.cpp
+++ b/hlslparser/src/HLSLTree.cpp
@@ -115,7 +115,7 @@ HLSLDeclaration * HLSLTree::FindGlobalDeclaration(const char * name, HLSLBuffer
         {
             HLSLBuffer* buffer = (HLSLBuffer*)statement;
             
-            // This searches the fields for the name
+            // This searches the fields to find the buffer,
             // since cbuffer/tbuffer represent globals.
             if (buffer->IsGlobalFields())
             {
@@ -131,12 +131,30 @@ HLSLDeclaration * HLSLTree::FindGlobalDeclaration(const char * name, HLSLBuffer
                     field = (HLSLDeclaration*)field->nextStatement;
                 }
             }
-            /* hack
             else
             {
-                int bp = 0;
-                bp = bp;
-            } */
+                /* This isn't same type...
+                 
+                // Note: should pass buffers, but buffer/texture
+                // and cbuffer fields can be global to entire shader.
+                
+                // find struct first
+                const HLSLStruct* bufferStruct = buffer->bufferStruct;
+                
+                // new search those for the fields
+                HLSLStructField* field = bufferStruct->field;
+                while (field != NULL)
+                {
+                    ASSERT(field->nodeType == HLSLNodeType_Declaration);
+                    if (String_Equal(name, field->name))
+                    {
+                        if (buffer_out) *buffer_out = buffer;
+                        return field;
+                    }
+                    field = field->nextField;
+                }
+                */
+            }
         }
 
         statement = statement->nextStatement;
@@ -657,7 +675,7 @@ void HLSLTreeVisitor::VisitTopLevelStatement(HLSLStatement * node)
         VisitComment((HLSLComment*)node);
     }
     else {
-        ASSERT(0);
+        ASSERT(false);
     }
 }
 
@@ -703,7 +721,7 @@ void HLSLTreeVisitor::VisitStatement(HLSLStatement * node)
         VisitComment((HLSLComment *)node);
     }
     else {
-        ASSERT(0);
+        ASSERT(false);
     }
 }
 
@@ -738,12 +756,19 @@ void HLSLTreeVisitor::VisitStructField(HLSLStructField * node)
 
 void HLSLTreeVisitor::VisitBuffer(HLSLBuffer * node)
 {
-    HLSLDeclaration * field = node->field;
-    while (field != NULL) {
-        ASSERT(field->nodeType == HLSLNodeType_Declaration);
-        VisitDeclaration(field);
-        ASSERT(field->nextDeclaration == NULL);
-        field = (HLSLDeclaration *)field->nextStatement;
+    if (node->IsGlobalFields())
+    {
+        HLSLDeclaration* field = node->field;
+        while (field != NULL) {
+            ASSERT(field->nodeType == HLSLNodeType_Declaration);
+            VisitDeclaration(field);
+            ASSERT(field->nextDeclaration == NULL);
+            field = (HLSLDeclaration *)field->nextStatement;
+        }
+    }
+    else
+    {
+        VisitStruct(node->bufferStruct);
     }
 }
 
@@ -756,7 +781,7 @@ void HLSLTreeVisitor::VisitFunction(HLSLFunction * node)
 {
     VisitType(node->returnType);
 
-    HLSLArgument * argument = node->argument;
+    HLSLArgument* argument = node->argument;
     while (argument != NULL) {
         VisitArgument(argument);
         argument = argument->nextArgument;
@@ -817,7 +842,7 @@ void HLSLTreeVisitor::VisitExpression(HLSLExpression * node)
         VisitSamplerState((HLSLSamplerState *)node);
     }
     else {
-        ASSERT(0);
+        ASSERT(false);
     }
 }
 
diff --git a/hlslparser/src/HLSLTree.h b/hlslparser/src/HLSLTree.h
index 2a647e9a..83d2b736 100644
--- a/hlslparser/src/HLSLTree.h
+++ b/hlslparser/src/HLSLTree.h
@@ -14,7 +14,7 @@ enum HLSLNodeType
     HLSLNodeType_Struct,
     HLSLNodeType_StructField,
     HLSLNodeType_Buffer,
-    HLSLNodeType_BufferField,
+    HLSLNodeType_BufferField, // TODO: or just ref structField
     HLSLNodeType_Function,
     HLSLNodeType_Argument,
     HLSLNodeType_ExpressionStatement,
@@ -117,7 +117,10 @@ enum HLSLBaseType
     HLSLBaseType_UserDefined,       // struct
     HLSLBaseType_Expression,        // type argument for defined() sizeof() and typeof().
     HLSLBaseType_Auto,
-    HLSLBaseType_Comment,           // single line comments optionally transferred to outptu
+    HLSLBaseType_Comment,           // single line comments optionally transferred to output
+    
+    // There are subtypes below
+    HLSLBaseType_Buffer,
     
     HLSLBaseType_Count,
     
@@ -131,6 +134,7 @@ enum HLSLBaseType
     HLSLBaseType_NumericCount = HLSLBaseType_LastNumeric - HLSLBaseType_FirstNumeric + 1
 };
   
+// This a subtype to HLSLBaseType_Buffer
 enum HLSLBufferType
 {
     // DX9
@@ -277,6 +281,8 @@ enum HLSLAttributeType
 enum HLSLAddressSpace
 {
     HLSLAddressSpace_Undefined,
+    
+    // These only apply to MSL
     HLSLAddressSpace_Constant,
     HLSLAddressSpace_Device,
     HLSLAddressSpace_Thread,
@@ -309,212 +315,147 @@ struct HLSLType
     explicit HLSLType(HLSLBaseType _baseType = HLSLBaseType_Unknown)
     { 
         baseType    = _baseType;
-        textureType = HLSLBaseType_Float;
-        typeName    = NULL;
-        array       = false;
-        arraySize   = NULL;
-        flags       = 0;
-        addressSpace = HLSLAddressSpace_Undefined;
     }
-    HLSLBaseType        baseType;
-    HLSLBaseType        textureType;    // Half or Float
-    const char*         typeName;       // For user defined types.
-    bool                array;
-    HLSLExpression*     arraySize;
-    int                 flags;
-    HLSLAddressSpace    addressSpace;
+    HLSLBaseType        baseType = HLSLBaseType_Unknown;
+    HLSLBaseType        textureType = HLSLBaseType_Float;    // Half or Float
+    const char*         typeName = NULL;       // For user defined types.
+    bool                array = false;
+    HLSLExpression*     arraySize = NULL;
+    int                 flags = 0;
+    HLSLAddressSpace    addressSpace = HLSLAddressSpace_Undefined;
 };
 
-/** Base class for all nodes in the HLSL AST */
+/// Base class for all nodes in the HLSL AST
 struct HLSLNode
 {
-    HLSLNodeType        nodeType;
-    const char*         fileName;
-    int                 line;
+    HLSLNodeType        nodeType; // set to s_type
+    const char*         fileName = NULL;
+    int                 line = 0;
 };
 
 struct HLSLRoot : public HLSLNode
 {
     static const HLSLNodeType s_type = HLSLNodeType_Root;
-    HLSLRoot()          { statement = NULL; }
-    HLSLStatement*      statement;          // First statement.
+    HLSLStatement*      statement = NULL;          // First statement.
 };
 
 struct HLSLStatement : public HLSLNode
 {
-    HLSLStatement() 
-    { 
-        nextStatement   = NULL; 
-        attributes      = NULL;
-        hidden          = false;
-    }
-    HLSLStatement*      nextStatement;      // Next statement in the block.
-    HLSLAttribute*      attributes;
-    mutable bool        hidden;
+    HLSLStatement*      nextStatement = NULL;      // Next statement in the block.
+    HLSLAttribute*      attributes = NULL;
+    mutable bool        hidden = false;
 };
 
 struct HLSLAttribute : public HLSLNode
 {
     static const HLSLNodeType s_type = HLSLNodeType_Attribute;
-	HLSLAttribute()
-	{
-		attributeType = HLSLAttributeType_Unknown;
-		argument      = NULL;
-		nextAttribute = NULL;
-	}
-    HLSLAttributeType   attributeType;
-    HLSLExpression*     argument;
-    HLSLAttribute*      nextAttribute;
+    HLSLAttributeType   attributeType = HLSLAttributeType_Unknown;
+    HLSLExpression*     argument = NULL;
+    HLSLAttribute*      nextAttribute = NULL;
 };
 
 struct HLSLDeclaration : public HLSLStatement
 {
     static const HLSLNodeType s_type = HLSLNodeType_Declaration;
-    HLSLDeclaration()
-    {
-        name            = NULL;
-        registerName    = NULL;
-        semantic        = NULL;
-        nextDeclaration = NULL;
-        assignment      = NULL;
-        buffer          = NULL;
-    }
-    const char*         name;
+    const char*         name  = NULL;
     HLSLType            type;
-    const char*         registerName;       // @@ Store register index?
-    const char*         semantic;
-    HLSLDeclaration*    nextDeclaration;    // If multiple variables declared on a line.
-    HLSLExpression*     assignment;
-    HLSLBuffer*         buffer;
+    const char*         registerName  = NULL;       // @@ Store register index?
+    const char*         semantic  = NULL;
+    HLSLDeclaration*    nextDeclaration = NULL;    // If multiple variables declared on a line.
+    HLSLExpression*     assignment = NULL;
+    
+    HLSLBuffer*         buffer = NULL; // reference cbuffer for decl
 };
 
 struct HLSLStruct : public HLSLStatement
 {
     static const HLSLNodeType s_type = HLSLNodeType_Struct;
-    HLSLStruct()
-    {
-        name            = NULL;
-        field           = NULL;
-    }
-    const char*         name;
-    HLSLStructField*    field;              // First field in the structure.
+    const char*         name = NULL;
+    HLSLStructField*    field = NULL;              // First field in the structure.
 };
 
 struct HLSLStructField : public HLSLNode
 {
     static const HLSLNodeType s_type = HLSLNodeType_StructField;
-    HLSLStructField()
-    {
-        name            = NULL;
-        semantic        = NULL;
-        sv_semantic     = NULL;
-        nextField       = NULL;
-        hidden          = false;
-    }
-    const char*         name;
+    const char*         name = NULL;
     HLSLType            type;
-    const char*         semantic;
-    const char*         sv_semantic;
-    HLSLStructField*    nextField;      // Next field in the structure.
-    bool                hidden;
+    const char*         semantic = NULL;
+    const char*         sv_semantic = NULL;
+    HLSLStructField*    nextField = NULL;      // Next field in the structure.
+    bool                hidden = false;
 };
 
-/** Buffer declaration. */
+/// Buffer declaration.
 struct HLSLBuffer : public HLSLStatement
 {
-    static const HLSLNodeType s_type = HLSLNodeType_Buffer;
-    HLSLBuffer()
+    // These spill a ton of globals throughout shader
+    bool IsGlobalFields() const
     {
-        name            = NULL;
-        registerName    = NULL;
-        field           = NULL;
-        
-        bufferType      = HLSLBufferType_CBuffer; // TODO: or add Unknown
-        bufferStruct    = NULL;
+        return  bufferType == HLSLBufferType_CBuffer ||
+                bufferType == HLSLBufferType_TBuffer;
     }
-    const char*         name;
-    const char*         registerName;
-    HLSLDeclaration*    field;
     
-    bool IsGlobalFields() const
+    // DX changes registers for read-only vs. read-write buffers (SRV vs. UAV)
+    // so constant/cbuffer use b, structured/byte use t (like textures),
+    // and read-write use u.  MSL only has u and
+    bool IsReadOnly() const
     {
         return  bufferType == HLSLBufferType_CBuffer ||
-                bufferType == HLSLBufferType_TBuffer;
+                bufferType == HLSLBufferType_TBuffer ||
+                HLSLBufferType_ConstantBuffer ||
+                HLSLBufferType_StructuredBuffer ||
+                HLSLBufferType_ByteAddressBuffer;
     }
-    HLSLBufferType      bufferType;
-    const HLSLStruct*   bufferStruct;
+    
+    static const HLSLNodeType s_type = HLSLNodeType_Buffer;
+    const char*         name = NULL;
+    const char*         registerName = NULL;
+    HLSLDeclaration*    field = NULL;
+    HLSLBufferType      bufferType = HLSLBufferType_CBuffer;
+    HLSLStruct*         bufferStruct = NULL;
 };
 
 
-/** Function declaration */
+/// Function declaration
 struct HLSLFunction : public HLSLStatement
 {
     static const HLSLNodeType s_type = HLSLNodeType_Function;
-    HLSLFunction()
-    {
-        name            = NULL;
-        semantic        = NULL;
-        sv_semantic     = NULL;
-        statement       = NULL;
-        argument        = NULL;
-        numArguments    = 0;
-        numOutputArguments = 0;
-        forward         = NULL;
-    }
-    const char*         name;
+    const char*         name  = NULL;
     HLSLType            returnType;
-    const char*         semantic;
-    const char*         sv_semantic;
-    int                 numArguments;
-    int                 numOutputArguments;     // Includes out and inout arguments.
-    HLSLArgument*       argument;
-    HLSLStatement*      statement;
-    HLSLFunction*       forward; // Which HLSLFunction this one forward-declares
+    const char*         semantic  = NULL;
+    const char*         sv_semantic = NULL;
+    int                 numArguments = 0;
+    int                 numOutputArguments = 0;     // Includes out and inout arguments.
+    HLSLArgument*       argument = NULL;
+    HLSLStatement*      statement = NULL;
+    HLSLFunction*       forward = NULL; // Which HLSLFunction this one forward-declares
 };
 
-/** Declaration of an argument to a function. */
+/// Declaration of an argument to a function.
 struct HLSLArgument : public HLSLNode
 {
     static const HLSLNodeType s_type = HLSLNodeType_Argument;
-    HLSLArgument()
-    {
-        name            = NULL;
-        modifier        = HLSLArgumentModifier_None;
-        semantic        = NULL;
-        sv_semantic     = NULL;
-        defaultValue    = NULL;
-        nextArgument    = NULL;
-        hidden          = false;
-    }
-    const char*             name;
-    HLSLArgumentModifier    modifier;
+    const char*             name = NULL;
+    HLSLArgumentModifier    modifier = HLSLArgumentModifier_None;
     HLSLType                type;
-    const char*             semantic;
-    const char*             sv_semantic;
-    HLSLExpression*         defaultValue;
-    HLSLArgument*           nextArgument;
-    bool                    hidden;
+    const char*             semantic = NULL;
+    const char*             sv_semantic = NULL;
+    HLSLExpression*         defaultValue = NULL;
+    HLSLArgument*           nextArgument = NULL;
+    bool                    hidden = false;
 };
 
-/** A expression which forms a complete statement. */
+/// A expression which forms a complete statement.
 struct HLSLExpressionStatement : public HLSLStatement
 {
     static const HLSLNodeType s_type = HLSLNodeType_ExpressionStatement;
-    HLSLExpressionStatement()
-    {
-        expression = NULL;
-    }
-    HLSLExpression*     expression;
+    HLSLExpression*     expression = NULL;
 };
 
 struct HLSLReturnStatement : public HLSLStatement
 {
     static const HLSLNodeType s_type = HLSLNodeType_ReturnStatement;
-    HLSLReturnStatement()
-    {
-        expression = NULL;
-    }
-    HLSLExpression*     expression;
+    HLSLExpression*     expression = NULL;
 };
 
 struct HLSLDiscardStatement : public HLSLStatement
@@ -535,113 +476,72 @@ struct HLSLContinueStatement : public HLSLStatement
 struct HLSLIfStatement : public HLSLStatement
 {
     static const HLSLNodeType s_type = HLSLNodeType_IfStatement;
-    HLSLIfStatement()
-    {
-        condition     = NULL;
-        statement     = NULL;
-        elseStatement = NULL;
-        isStatic      = false;
-    }
-    HLSLExpression*     condition;
-    HLSLStatement*      statement;
-    HLSLStatement*      elseStatement;
-    bool                isStatic;
+    HLSLExpression*     condition = NULL;
+    HLSLStatement*      statement = NULL;
+    HLSLStatement*      elseStatement = NULL;
+    bool                isStatic = false;
 };
 
 struct HLSLForStatement : public HLSLStatement
 {
     static const HLSLNodeType s_type = HLSLNodeType_ForStatement;
-    HLSLForStatement()
-    {
-        initialization = NULL;
-        condition = NULL;
-        increment = NULL;
-        statement = NULL;
-    }
-    HLSLDeclaration*    initialization;
-    HLSLExpression*     condition;
-    HLSLExpression*     increment;
-    HLSLStatement*      statement;
+    HLSLDeclaration*    initialization = NULL;
+    HLSLExpression*     condition = NULL;
+    HLSLExpression*     increment = NULL;
+    HLSLStatement*      statement = NULL;
 };
 
 struct HLSLBlockStatement : public HLSLStatement
 {
     static const HLSLNodeType s_type = HLSLNodeType_BlockStatement;
-    HLSLBlockStatement()
-    {
-        statement = NULL;
-    }
-    HLSLStatement*      statement;
+    HLSLStatement*      statement = NULL;
 };
 
 
-/** Base type for all types of expressions. */
+/// Base type for all types of expressions.
 struct HLSLExpression : public HLSLNode
 {
     static const HLSLNodeType s_type = HLSLNodeType_Expression;
-    HLSLExpression()
-    {
-        nextExpression = NULL;
-    }
     HLSLType            expressionType;
-    HLSLExpression*     nextExpression; // Used when the expression is part of a list, like in a function call.
+    HLSLExpression*     nextExpression = NULL; // Used when the expression is part of a list, like in a function call.
 };
 
 struct HLSLUnaryExpression : public HLSLExpression
 {
     static const HLSLNodeType s_type = HLSLNodeType_UnaryExpression;
-    HLSLUnaryExpression()
-    {
-        expression = NULL;
-    }
-    HLSLUnaryOp         unaryOp;
-    HLSLExpression*     expression;
+    HLSLUnaryOp         unaryOp = {};
+    HLSLExpression*     expression = NULL;
 };
 
 struct HLSLBinaryExpression : public HLSLExpression
 {
     static const HLSLNodeType s_type = HLSLNodeType_BinaryExpression;
-    HLSLBinaryExpression()
-    {
-        expression1 = NULL;
-        expression2 = NULL;
-    }
-    HLSLBinaryOp        binaryOp;
-    HLSLExpression*     expression1;
-    HLSLExpression*     expression2;
+    HLSLBinaryOp        binaryOp = {};
+    HLSLExpression*     expression1 = NULL;
+    HLSLExpression*     expression2 = NULL;
 };
 
-/** ? : construct */
+/// ? : construct
 struct HLSLConditionalExpression : public HLSLExpression
 {
     static const HLSLNodeType s_type = HLSLNodeType_ConditionalExpression;
-    HLSLConditionalExpression()
-    {
-        condition       = NULL;
-        trueExpression  = NULL;
-        falseExpression = NULL;
-    }
-    HLSLExpression*     condition;
-    HLSLExpression*     trueExpression;
-    HLSLExpression*     falseExpression;
+    HLSLExpression*     condition = NULL;
+    HLSLExpression*     trueExpression = NULL;
+    HLSLExpression*     falseExpression = NULL;
 };
 
 struct HLSLCastingExpression : public HLSLExpression
 {
     static const HLSLNodeType s_type = HLSLNodeType_CastingExpression;
-    HLSLCastingExpression()
-    {
-        expression = NULL;
-    }
     HLSLType            type;
-    HLSLExpression*     expression;
+    HLSLExpression*     expression = NULL;
 };
 
-/** Float, integer, boolean, etc. literal constant. */
+/// Float, integer, boolean, etc. literal constant.
 struct HLSLLiteralExpression : public HLSLExpression
 {
     static const HLSLNodeType s_type = HLSLNodeType_LiteralExpression;
-    HLSLBaseType        type;   // Note, not all types can be literals.
+    HLSLBaseType        type = HLSLBaseType_Unknown;   // Note, not all types can be literals.
     union
     {
         bool            bValue;
@@ -650,184 +550,122 @@ struct HLSLLiteralExpression : public HLSLExpression
     };
 };
 
-/** An identifier, typically a variable name or structure field name. */
+/// An identifier, typically a variable name or structure field name.
 struct HLSLIdentifierExpression : public HLSLExpression
 {
     static const HLSLNodeType s_type = HLSLNodeType_IdentifierExpression;
-    HLSLIdentifierExpression()
-    {
-        name     = NULL;
-        global  = false;
-    }
-    const char*         name;
-    bool                global; // This is a global variable.
+    const char*         name = NULL;
+    bool                global = false; // This is a global variable.
 };
 
-/** float2(1, 2) */
+/// float2(1, 2)
 struct HLSLConstructorExpression : public HLSLExpression
 {
     static const HLSLNodeType s_type = HLSLNodeType_ConstructorExpression;
-	HLSLConstructorExpression()
-	{
-		argument = NULL;
-	}
-    HLSLType            type;
-    HLSLExpression*     argument;
+	HLSLType            type;
+    HLSLExpression*     argument = NULL;
 };
 
-/** object.member **/
+/// object.member input.member or input[10].member
 struct HLSLMemberAccess : public HLSLExpression
 {
     static const HLSLNodeType s_type = HLSLNodeType_MemberAccess;
-	HLSLMemberAccess()
-	{
-		object  = NULL;
-		field   = NULL;
-		swizzle = false;
-	}
-    HLSLExpression*     object;
-    const char*         field;
-    bool                swizzle;
-};
-
-/** array[index] **/
+	HLSLExpression*     object = NULL;
+    const char*         field = NULL;
+    bool                swizzle = false;
+};
+
+/// array[index]
 struct HLSLArrayAccess : public HLSLExpression
 {
     static const HLSLNodeType s_type = HLSLNodeType_ArrayAccess;
-	HLSLArrayAccess()
-	{
-		array = NULL;
-		index = NULL;
-	}
-    HLSLExpression*     array;
-    HLSLExpression*     index;
+	HLSLExpression*     array = NULL;
+    HLSLExpression*     index = NULL;
 };
 
 struct HLSLFunctionCall : public HLSLExpression
 {
     static const HLSLNodeType s_type = HLSLNodeType_FunctionCall;
-	HLSLFunctionCall()
-	{
-		function     = NULL;
-		argument     = NULL;
-		numArguments = 0;
-	}
-    const HLSLFunction* function;
-    HLSLExpression*     argument;
-    int                 numArguments;
+    const HLSLFunction* function = NULL;
+    HLSLExpression*     argument = NULL;
+    int                 numArguments = 0;
 };
 
+#if 1
+
+// These are all FX file constructs
+// TODO: may remove these, they just complicate the code
+//   but do want to specify mix of vs/ps/cs in single files
+
+// fx
 struct HLSLStateAssignment : public HLSLNode
 {
     static const HLSLNodeType s_type = HLSLNodeType_StateAssignment;
-    HLSLStateAssignment()
-    {
-        stateName = NULL;
-        sValue = NULL;
-        nextStateAssignment = NULL;
-    }
-
-    const char*             stateName;
-    int                     d3dRenderState;
+    const char*             stateName = NULL;
+    int                     d3dRenderState = 0;
     union {
         int                 iValue;
         float               fValue;
         const char *        sValue;
     };
-    HLSLStateAssignment*    nextStateAssignment;
+    HLSLStateAssignment*    nextStateAssignment = NULL;
 };
 
+// fx
 struct HLSLSamplerState : public HLSLExpression // @@ Does this need to be an expression? Does it have a type? I guess type is useful.
 {
     static const HLSLNodeType s_type = HLSLNodeType_SamplerState;
-    HLSLSamplerState()
-    {
-        numStateAssignments = 0;
-        stateAssignments = NULL;
-    }
-
-    int                     numStateAssignments;
-    HLSLStateAssignment*    stateAssignments;
+    int                     numStateAssignments = 0;
+    HLSLStateAssignment*    stateAssignments = NULL;
 };
 
+// fx
 struct HLSLPass : public HLSLNode
 {
     static const HLSLNodeType s_type = HLSLNodeType_Pass;
-    HLSLPass()
-    {
-        name = NULL;
-        numStateAssignments = 0;
-        stateAssignments = NULL;
-        nextPass = NULL;
-    }
-    
-    const char*             name;
-    int                     numStateAssignments;
-    HLSLStateAssignment*    stateAssignments;
-    HLSLPass*               nextPass;
+    const char*             name = NULL;
+    int                     numStateAssignments = 0;
+    HLSLStateAssignment*    stateAssignments = NULL;
+    HLSLPass*               nextPass = NULL;
 };
 
+// fx
 struct HLSLTechnique : public HLSLStatement
 {
     static const HLSLNodeType s_type = HLSLNodeType_Technique;
-    HLSLTechnique()
-    {
-        name = NULL;
-        numPasses = 0;
-        passes = NULL;
-    }
-
-    const char*         name;
-    int                 numPasses;
-    HLSLPass*           passes;
+    const char*         name = NULL;
+    int                 numPasses = 0;
+    HLSLPass*           passes = NULL;
 };
 
+// fx
 struct HLSLPipeline : public HLSLStatement
 {
     static const HLSLNodeType s_type = HLSLNodeType_Pipeline;
-    HLSLPipeline()
-    {
-        name = NULL;
-        numStateAssignments = 0;
-        stateAssignments = NULL;
-    }
-    
-    const char*             name;
-    int                     numStateAssignments;
-    HLSLStateAssignment*    stateAssignments;
+    const char*             name = NULL;
+    int                     numStateAssignments = 0;
+    HLSLStateAssignment*    stateAssignments = NULL;
 };
 
+// fx
 struct HLSLStage : public HLSLStatement
 {
     static const HLSLNodeType s_type = HLSLNodeType_Stage;
-    HLSLStage()
-    {
-        name = NULL;
-        statement = NULL;
-        inputs = NULL;
-        outputs = NULL;
-    }
-
-    const char*             name;
-    HLSLStatement*          statement;
-    HLSLDeclaration*        inputs;
-    HLSLDeclaration*        outputs;
+    const char*             name = NULL;
+    HLSLStatement*          statement = NULL;
+    HLSLDeclaration*        inputs = NULL;
+    HLSLDeclaration*        outputs = NULL;
 };
 
+#endif
+
 struct HLSLComment : public HLSLStatement
 {
     static const HLSLNodeType s_type = HLSLNodeType_Comment;
-    HLSLComment()
-    {
-        text = NULL;
-    }
-
-    const char*             text;
+    const char*             text = NULL;
 };
 
-/**
- * Abstract syntax tree for parsed HLSL code.
- */
+/// Abstract syntax tree for parsed HLSL code.
 class HLSLTree
 {
 
@@ -836,17 +674,17 @@ class HLSLTree
     explicit HLSLTree(Allocator* allocator);
     ~HLSLTree();
 
-    /** Adds a string to the string pool used by the tree. */
+    /// Adds a string to the string pool used by the tree.
     const char* AddString(const char* string);
     const char* AddStringFormat(const char* string, ...);
 
-    /** Returns true if the string is contained within the tree. */
+    /// Returns true if the string is contained within the tree.
     bool GetContainsString(const char* string) const;
 
-    /** Returns the root block in the tree */
+    /// Returns the root block in the tree */
     HLSLRoot* GetRoot() const;
 
-    /** Adds a new node to the tree with the specified type. */
+    /// Adds a new node to the tree with the specified type.
     template <class T>
     T* AddNode(const char* fileName, int line)
     {
@@ -911,7 +749,7 @@ class HLSLTreeVisitor
     virtual void VisitStruct(HLSLStruct * node);
     virtual void VisitStructField(HLSLStructField * node);
     virtual void VisitBuffer(HLSLBuffer * node);
-    //virtual void VisitBufferField(HLSLBufferField * node);
+    //virtual void VisitBufferField(HLSLBufferField * node); // TODO:
     virtual void VisitFunction(HLSLFunction * node);
     virtual void VisitArgument(HLSLArgument * node);
     virtual void VisitExpressionStatement(HLSLExpressionStatement * node);
@@ -933,11 +771,7 @@ class HLSLTreeVisitor
     virtual void VisitMemberAccess(HLSLMemberAccess * node);
     virtual void VisitArrayAccess(HLSLArrayAccess * node);
     virtual void VisitFunctionCall(HLSLFunctionCall * node);
-    virtual void VisitStateAssignment(HLSLStateAssignment * node);
-    virtual void VisitSamplerState(HLSLSamplerState * node);
-    virtual void VisitPass(HLSLPass * node);
-    virtual void VisitTechnique(HLSLTechnique * node);
-    virtual void VisitPipeline(HLSLPipeline * node);
+    
     virtual void VisitComment(HLSLComment * node);
 
     virtual void VisitFunctions(HLSLRoot * root);
@@ -946,6 +780,13 @@ class HLSLTreeVisitor
     HLSLFunction * FindFunction(HLSLRoot * root, const char * name);
     HLSLDeclaration * FindGlobalDeclaration(HLSLRoot * root, const char * name);
     HLSLStruct * FindGlobalStruct(HLSLRoot * root, const char * name);
+    
+    // These are fx file constructs
+    virtual void VisitStateAssignment(HLSLStateAssignment * node);
+    virtual void VisitSamplerState(HLSLSamplerState * node);
+    virtual void VisitPass(HLSLPass * node);
+    virtual void VisitTechnique(HLSLTechnique * node);
+    virtual void VisitPipeline(HLSLPipeline * node);
 };
 
 
diff --git a/hlslparser/src/MSLGenerator.cpp b/hlslparser/src/MSLGenerator.cpp
index 5a8c51bc..b87f90c9 100644
--- a/hlslparser/src/MSLGenerator.cpp
+++ b/hlslparser/src/MSLGenerator.cpp
@@ -187,8 +187,11 @@ namespace M4
                 
                 // TODO: on cbuffer is a ubo, not tbuffer, or others
                 // TODO: this is having to rename due to globals
-                type.typeName = m_tree->AddStringFormat("%s_ubo", buffer->name);
-
+                if (buffer->IsGlobalFields())
+                    type.typeName = m_tree->AddStringFormat("%s_ubo", buffer->name);
+                else
+                    type.typeName = m_tree->AddStringFormat("%s", buffer->bufferStruct->name);
+                
                 int bufferRegister = ParseRegister(buffer->registerName, nextBufferRegister) + m_options.bufferRegisterOffset;
 
                 const char * bufferRegisterName = m_tree->AddStringFormat("buffer(%d)", bufferRegister);
@@ -854,7 +857,7 @@ namespace M4
             else
             {
                 // Unhandled statement type.
-                ASSERT(0);
+                Error("Unknown statement");
             }
 
             statement = statement->nextStatement;
@@ -895,52 +898,13 @@ namespace M4
         {
             // Declare a texture and a sampler instead
             // We do not handle multiple textures on the same line
-            ASSERT(declaration->nextDeclaration == NULL);
-            const char * formatName = "float";
-            if (declaration->type.textureType == HLSLBaseType_Half && !m_options.treatHalfAsFloat)
-            {
-                formatName = "half";
-            }
-
-            if (declaration->type.baseType == HLSLBaseType_Texture2D)
-            {
-                m_writer.Write("thread texture2d<%s>& %s", formatName, declaration->name);
-            }
-            else if (declaration->type.baseType == HLSLBaseType_Texture2DArray)
-            {
-                m_writer.Write("thread texture2d_array<%s>& %s", formatName, declaration->name);
-            }
-            else if (declaration->type.baseType == HLSLBaseType_TextureCubeArray)
-            {
-                m_writer.Write("thread texturecube_array<%s>& %s", formatName, declaration->name);
-            }
-            else if (declaration->type.baseType == HLSLBaseType_Texture3D)
-            {
-                m_writer.Write("thread texture3d<%s>& %s", formatName, declaration->name);
-            }
-            else if (declaration->type.baseType == HLSLBaseType_TextureCube)
-            {
-                m_writer.Write("thread texturecube<%s>& %s", formatName, declaration->name);
-            }
-// TODO: need equivalent of this
-//            else if (declaration->type.baseType == HLSLBaseType_Texture2DShadow)
-//            {
-//                // Note: ios has 16f depth now, so don't assume float
-//                m_writer.Write("thread depth2d<%s>& %s;", formatName,declaration->name);
-//            }
-            else if (declaration->type.baseType == HLSLBaseType_Texture2DMS)
-            {
-                // no sampler, just Load samples
-                m_writer.Write("thread texture2d_ms<%s>& %s_texture", formatName, declaration->name);
-            }
-            else if (declaration->type.baseType == HLSLBaseType_Texture2DArray)
-            {
-                m_writer.Write("thread texture2d_array<%s>& %s_texture", formatName, declaration->name);
-            }
+            // ASSERT(declaration->nextDeclaration == NULL);
+            
+            const char* textureName = GetTypeName(declaration->type, true);
+            if (textureName)
+                m_writer.Write("thread %s& %s", textureName, declaration->name);
             else
-            {
-               Error("<unhandled texture type>");
-            }
+                Error("Unknown texture");
         }
         else
         {
@@ -966,10 +930,20 @@ namespace M4
             {
                 m_writer.BeginLine(indent + 1, field->fileName, field->line);
                 OutputDeclaration(field->type, field->name, NULL);
+                
+                // DONE: would need a semantic remap for all possible semantics
+                // just use the name the caller specified if sv_semantic
+                // is not set.  The header can handle translating
                 if (field->sv_semantic)
                 {
                     m_writer.Write(" [[%s]]", field->sv_semantic);
                 }
+// Alec added this fallback, but then it tags too many fields
+//                else if (field->semantic)
+//                {
+//                    m_writer.Write(" [[%s]]", field->semantic);
+//                }
+                
 
                 m_writer.Write(";");
                 m_writer.EndLine();
@@ -981,24 +955,48 @@ namespace M4
 
     void MSLGenerator::OutputBuffer(int indent, HLSLBuffer* buffer)
     {
-        HLSLDeclaration* field = buffer->field;
-
-        m_writer.BeginLine(indent, buffer->fileName, buffer->line);
-        m_writer.Write("struct %s_ubo", buffer->name);
-        m_writer.EndLine(" {");
-        while (field != NULL)
+        if (!buffer->IsGlobalFields())
         {
-            if (!field->hidden)
+            m_writer.BeginLine(indent, buffer->fileName, buffer->line);
+            
+            // TODO: handle array case for indexing, also
+            if (buffer->bufferType == HLSLBufferType_ConstantBuffer ||
+               buffer->bufferType == HLSLBufferType_ByteAddressBuffer ||
+               buffer->bufferType == HLSLBufferType_StructuredBuffer)
             {
-                m_writer.BeginLine(indent + 1, field->fileName, field->line);
-                OutputDeclaration(field->type, field->name, field->assignment, false, false, 0); // /*alignment=*/16);
-                m_writer.EndLine(";");
+                m_writer.WriteLine(indent, "constant %s & %s", buffer->bufferStruct->name, buffer->name);
+            }
+            else
+            {
+                // is this thread space?
+                m_writer.WriteLine(indent, "thread %s & %s",  buffer->bufferStruct->name, buffer->name);
             }
-            field = (HLSLDeclaration*)field->nextStatement;
+            
+            m_writer.EndLine(";");
+        }
+        else
+        {
+            HLSLDeclaration* field = buffer->field;
+            
+            m_writer.BeginLine(indent, buffer->fileName, buffer->line);
+            
+            // TODO: these aren't all ubo, some are structured buffer
+            m_writer.Write("struct %s_ubo", buffer->name);
+            m_writer.EndLine(" {");
+            while (field != NULL)
+            {
+                if (!field->hidden)
+                {
+                    m_writer.BeginLine(indent + 1, field->fileName, field->line);
+                    OutputDeclaration(field->type, field->name, field->assignment, false, false, 0); // /*alignment=*/16);
+                    m_writer.EndLine(";");
+                }
+                field = (HLSLDeclaration*)field->nextStatement;
+            }
+            m_writer.WriteLine(indent, "};");
+            
+            m_writer.WriteLine(indent, "constant %s_ubo & %s;", buffer->name, buffer->name);
         }
-        m_writer.WriteLine(indent, "};");
-
-        m_writer.WriteLine(indent, "constant %s_ubo & %s;", buffer->name, buffer->name);
     }
 
     void MSLGenerator::OutputFunction(int indent, HLSLFunction* function)
@@ -1316,8 +1314,10 @@ namespace M4
             case HLSLBaseType_Bool:
                 m_writer.Write("%s", literalExpression->bValue ? "true" : "false");
                 break;
+            // TODO: missing uint, u/short, double
             default:
-                ASSERT(0);
+                Error("Unhandled literal");
+                //ASSERT(0);
             }
         }
         else if (expression->nodeType == HLSLNodeType_UnaryExpression)
@@ -1386,14 +1386,9 @@ namespace M4
                     // Do intermediate type promotion, without changing dimension:
                     HLSLType promotedType = binaryExpression->expression1->expressionType;
 
-                    // TODO: remove
-                    //if (ScalarBaseType[binaryExpression->expressionType.baseType] != ScalarBaseType[promotedType.baseType])
                     if (!IsNumericTypeEqual(binaryExpression->expressionType.baseType, promotedType.baseType))
                     {
                         promotedType.baseType = PromoteType(binaryExpression->expressionType.baseType, promotedType.baseType);
-                        
-                        // TODO: remove
-                        //promotedType.baseType = HLSLBaseType(ScalarBaseType[binaryExpression->expressionType.baseType] + BaseTypeDimension[promotedType.baseType] - 1);
                     }
 
                     OutputTypedExpression(promotedType, binaryExpression->expression1, binaryExpression);
@@ -1427,7 +1422,8 @@ namespace M4
                 case HLSLBinaryOp_BitOr:        op = " | "; break;
                 case HLSLBinaryOp_BitXor:       op = " ^ "; break;
                 default:
-                    ASSERT(0);
+                    Error("unhandled literal");
+                    //ASSERT(0);
                 }
                 m_writer.Write("%s", op);
 
@@ -1439,15 +1435,10 @@ namespace M4
                     // Do intermediate type promotion, without changing dimension:
                     HLSLType promotedType = binaryExpression->expression2->expressionType;
 
-                    // TODO: remove
-                    //if (ScalarBaseType[binaryExpression->expressionType.baseType] != ScalarBaseType[promotedType.baseType])
                     if (!IsNumericTypeEqual(binaryExpression->expressionType.baseType, promotedType.baseType))
                     {
                         // This should only promote up (half->float, etc)
                         promotedType.baseType = PromoteType(binaryExpression->expressionType.baseType, promotedType.baseType);
-                        
-                        // TODO: remove
-                        //promotedType.baseType = HLSLBaseType(ScalarBaseType[binaryExpression->expressionType.baseType] + BaseTypeDimension[promotedType.baseType] - 1);
                     }
 
                     OutputTypedExpression(promotedType, binaryExpression->expression2, binaryExpression);
@@ -1601,30 +1592,7 @@ namespace M4
                 }
             }
         }
-        /*
-        if (IsSamplerType(type))
-        {
-            if (type.baseType == HLSLBaseType_Sampler2D) {
-                if (type.samplerType == HLSLBaseType_Half && !m_options.treatHalfAsFloat) {
-                    typeName = "Texture2DHalfSampler";
-                }
-                else {
-                    typeName = "Texture2DSampler";
-                }
-            }
-            else if (type.baseType == HLSLBaseType_Sampler3D)
-                typeName = "Texture3DSampler";
-            else if (type.baseType == HLSLBaseType_SamplerCube)
-                typeName = "TextureCubeSampler";
-            else if (type.baseType == HLSLBaseType_Sampler2DShadow)
-                typeName = "Texture2DShadowSampler";
-            else if (type.baseType == HLSLBaseType_Sampler2DShadow)
-                typeName = "Texture2DMSSampler";
-            else
-               Error( "<unhandled texture type>" );
-        }
-        else
-        */
+        
         if (alignment != 0 && !isTypeCast)
         {
             // caller can request alignment, but default is 0
@@ -1992,49 +1960,18 @@ namespace M4
             if (String_Equal(semantic, "SV_VertexID"))
                 return "vertex_id";
 
-            if (String_Equal(semantic, "SV_Position"))
-                return "attribute(VAPosition)";
-            
-            if (String_Equal(semantic, "BASE_VERTEX"))
+            // requires SPV_KHR_shader_draw_parameters for Vulkan
+            // not a DX12 construct.
+            if (String_Equal(semantic, "BASEVERTEX"))
                 return "base_vertex";
-            if (String_Equal(semantic, "BASE_INSTANCE"))
+            if (String_Equal(semantic, "BASEINSTANCE"))
                 return "base_instance";
-            
-            /* TODO: add to HLSL too
-            // TODO: baseVertex, baseInstance (Vulkan and Metal suport)
-            // SPV_KHR_shader_draw_parameters is Vulkan ext. DX12 no support.
-            //
-            
-             
-            // no equivlement drawIndex in MSL
-            //if (String_Equal(semantic, "SV_DrawIndex"))
+            //if (String_Equal(semantic, "DRAW_INDEX"))
             //    return "draw_index";
-            */
-            
-            // TODO: offer index support on all of these
-            if (String_Equal(semantic, "NORMAL"))
-                return "attribute(VANormal)";
-            if (String_Equal(semantic, "TANGENT"))
-                return "attribute(VATangent)";
-            if (String_Equal(semantic, "BITANGENT"))
-                return "attribute(VABitangent)";
-            
-            if (String_Equal(semantic, "BLENDINDICES"))
-                return "attribute(VABlendIndices)";
-            if (String_Equal(semantic, "BLENDWEIGHT"))
-                return "attribute(VABlendWeight)";
             
-            if (String_Equal(semantic, "COLOR"))
-                return "attribute(VAColor0)";
+            // TODO: primitive_id, barycentric
             
-            if (String_Equal(semantic, "TEXCOORD0"))
-                return "attribute(VATexcoord0)";
-            if (String_Equal(semantic, "TEXCOORD1"))
-                return "attribute(VATexcoord1)";
-            if (String_Equal(semantic, "TEXCOORD2"))
-                return "attribute(VATexcoord2)";
-            if (String_Equal(semantic, "TEXCOORD3"))
-                return "attribute(VATexcoord3)";
+            // Handle attributes
             
             // Can set custom attributes via a callback
             if (m_options.attributeCallback)
@@ -2052,6 +1989,11 @@ namespace M4
                     return m_tree->AddStringFormat("attribute(%d)", attribute);
                 }
             }
+            
+            if (String_Equal(semantic, "SV_Position"))
+                return "attribute(POSITION)";
+
+            return m_tree->AddStringFormat("attribute(%s)", semantic);
         }
         else if (m_target == MSLGenerator::Target_FragmentShader)
         {
@@ -2154,46 +2096,83 @@ namespace M4
         return NULL;
     }
 
-    
-        
+
     const char* MSLGenerator::GetTypeName(const HLSLType& type, bool exactType)
     {
         bool promote = ((type.flags & HLSLTypeFlag_NoPromote) == 0);
 
-        bool half_to_float = promote && m_options.treatHalfAsFloat;// && !exactType;
-        
-        // TODO: move carting around half/float to texture
-        bool half_samplers = promote && type.textureType == HLSLBaseType_Half && !m_options.treatHalfAsFloat;
-
+        // number
+        bool isHalfNumerics = promote && !m_options.treatHalfAsFloat;
         auto baseType = type.baseType;
-        if (half_to_float)
-        {
+        if (!isHalfNumerics)
             baseType = HalfToFloatBaseType(baseType);
-        }
         
         const char* name = GetNumericTypeName(baseType);
         if (name)
             return name;
         
-        switch (baseType)
+        // struct
+        if (baseType == HLSLBaseType_UserDefined)
+            return type.typeName;
+        
+        // sampler
+        if (IsSamplerType(baseType))
+            return "sampler";
+        
+        // texture
+        if (IsTextureType(baseType))
         {
-        // Texture should cart the half vs. float
-        case HLSLBaseType_SamplerState:            return "sampler";
-        case HLSLBaseType_SamplerComparisonState:  return "sampler"; // TODO:
+            // TODO: hook isDepth up to texture flag
+            // unclear if depth supports half, may have to be float always
+            bool isDepth = false;
             
-        case HLSLBaseType_Texture2D:        return half_samplers ? "texture2d<half>" : "texture2d<float>";
-        case HLSLBaseType_Texture3D:        return half_samplers ? "texture3d<half>" : "texture3d<float>";
-        case HLSLBaseType_TextureCube:      return half_samplers ? "texturecube<half>" : "texturecube<float>";
-        //case HLSLBaseType_Texture2DShadow:  return "depth2d<float>";
-        case HLSLBaseType_Texture2DMS:      return half_samplers ? "texture2d_ms<half>" : "texture2d_ms<float>";
-        case HLSLBaseType_TextureCubeArray:      return half_samplers ? "texturecube_array<half>" : "texturecube_array<float>";
-        case HLSLBaseType_Texture2DArray:      return half_samplers ? "texture2d_array<half>" : "texture2d_array<float>";
-           
-        case HLSLBaseType_UserDefined:      return type.typeName;
-        default:
-            ASSERT(0);
-            return "<unknown type>";
+            bool isHalfTexture  = promote && type.textureType == HLSLBaseType_Half && !m_options.treatHalfAsFloat;
+            
+            switch (baseType)
+            {
+                case HLSLBaseType_Texture2D:
+                    if (isDepth)
+                        return isHalfTexture ? "depth2d<half>" : "depth2d<float>";
+                    
+                    return isHalfTexture ? "texture2d<half>" : "texture2d<float>";
+                case HLSLBaseType_Texture3D:
+                    // no depth equivalent for 3d
+                    //if (isDepth)
+                    //    return isHalfTexture ? "depth2d<half>" : "depth2d<float>";
+                    
+                    return isHalfTexture ? "texture3d<half>" : "texture3d<float>";
+                case HLSLBaseType_TextureCube:
+                    if (isDepth)
+                        return isHalfTexture ? "depthcube<half>" : "depthcube<float>";
+                    
+                    return isHalfTexture ? "texturecube<half>" : "texturecube<float>";
+                    
+                    // TODO: no equivalent of this yet
+                    // depth_ms_array
+                    
+                case HLSLBaseType_Texture2DMS:
+                    if (isDepth)
+                        return isHalfTexture ? "depth2d_ms<half>" : "depth2d_ms<float>";
+                    
+                    return isHalfTexture ? "texture2d_ms<half>" : "texture2d_ms<float>";
+                case HLSLBaseType_TextureCubeArray:
+                    if (isDepth)
+                        return isHalfTexture ? "depthcube_array<half>" : "depthcube_array<float>";
+                    
+                    return isHalfTexture ? "texturecube_array<half>" : "texturecube_array<float>";
+                case HLSLBaseType_Texture2DArray:
+                    if (isDepth)
+                        return isHalfTexture ? "depth2d_array<half>" : "depth2d_array<float>";
+                    
+                    return isHalfTexture ? "texture2d_array<half>" : "texture2d_array<float>";
+                    
+                default:
+                    break;
+            }
         }
+        
+        Error("Uknown Type");
+        return NULL;
     }
 
 
diff --git a/hlslparser/testshaders.xcodeproj/project.pbxproj b/hlslparser/testshaders.xcodeproj/project.pbxproj
index 3efbc60e..f72adb98 100644
--- a/hlslparser/testshaders.xcodeproj/project.pbxproj
+++ b/hlslparser/testshaders.xcodeproj/project.pbxproj
@@ -17,6 +17,7 @@
 		70CAA48129BA7D28004B7E7B /* Sample.hlsl */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; fileEncoding = 4; path = Sample.hlsl; sourceTree = "<group>"; };
 		70CAA48929BAF707004B7E7B /* ShaderHLSL.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = ShaderHLSL.h; sourceTree = "<group>"; };
 		70CAA48A29BAF707004B7E7B /* ShaderMSL.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = ShaderMSL.h; sourceTree = "<group>"; };
+		70CAA48B29BBD985004B7E7B /* Compute.hlsl */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; path = Compute.hlsl; sourceTree = "<group>"; };
 /* End PBXFileReference section */
 
 /* Begin PBXGroup section */
@@ -45,8 +46,8 @@
 				70CAA48A29BAF707004B7E7B /* ShaderMSL.h */,
 				70CAA48129BA7D28004B7E7B /* Sample.hlsl */,
 				70CAA48029BA7D28004B7E7B /* Sample.metal */,
-				707D37CC29B9797A00B08D22 /* Skinning.metal */,
 				707D37CD29B9797A00B08D22 /* Skinning.hlsl */,
+				707D37CC29B9797A00B08D22 /* Skinning.metal */,
 			);
 			path = outshaders;
 			sourceTree = "<group>";
@@ -54,6 +55,7 @@
 		707D37D829B979EB00B08D22 /* shaders */ = {
 			isa = PBXGroup;
 			children = (
+				70CAA48B29BBD985004B7E7B /* Compute.hlsl */,
 				70CAA47E29B9BB0E004B7E7B /* Sample.hlsl */,
 				707D37D729B979EB00B08D22 /* Skinning.hlsl */,
 			);

From 55d789060322fef32ffc03a90780f6b71491a5b2 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 12 Mar 2023 13:28:00 -0700
Subject: [PATCH 455/901] kram - hlslshader update

Add preliminary compute support.  Still a work in progress.  This doesn't generate valid
Update canonical file check to only happen if output file exists.  Otherwise, this throws exception which is evil.
Consolidate the target types.

TODO: fix struct to constant depenendices for NUM_LIGHTS on Sample.hlsl.  These are being left hidden when used with ConstantBuffer.
---
 hlslparser/buildShaders.sh                    |  3 +
 hlslparser/outshaders/ShaderMSL.h             |  2 +
 hlslparser/shaders/Compute.hlsl               | 18 +++-
 hlslparser/src/HLSLGenerator.cpp              | 94 ++++++++-----------
 hlslparser/src/HLSLGenerator.h                | 13 +--
 hlslparser/src/HLSLTree.h                     | 16 ++++
 hlslparser/src/MSLGenerator.cpp               | 47 ++++++----
 hlslparser/src/MSLGenerator.h                 | 12 +--
 hlslparser/src/Main.cpp                       | 52 ++++------
 .../testshaders.xcodeproj/project.pbxproj     |  4 +
 10 files changed, 134 insertions(+), 127 deletions(-)

diff --git a/hlslparser/buildShaders.sh b/hlslparser/buildShaders.sh
index c8669c8c..47e19dff 100755
--- a/hlslparser/buildShaders.sh
+++ b/hlslparser/buildShaders.sh
@@ -95,6 +95,9 @@ if [[ $testMetal -eq 1 ]]; then
 
     xcrun -sdk macosx metal ${dstDir}Sample.metal ${metalMacOptions} -o mac/Sample.metallib
 
+    # this isn't going to compile yet
+    # xcrun -sdk macosx metal ${dstDir}Compute.metal ${metalMacOptions} -o mac/Compute.metallib
+
     # metaliosOptions="-frecord-sources -g "
     # metaliosOptions+="-std=ios-metal2.3 "
 
diff --git a/hlslparser/outshaders/ShaderMSL.h b/hlslparser/outshaders/ShaderMSL.h
index dee3b48e..7fc2566d 100644
--- a/hlslparser/outshaders/ShaderMSL.h
+++ b/hlslparser/outshaders/ShaderMSL.h
@@ -303,6 +303,8 @@ int2 GetDimensions(texture2d_array<float> t)
 // 16/16/2048 samplers
 // 64/128/16 on Tier1 macOS, see above for Tier2 (discrete gpu)
 
+// MSL 2.3 has function pointers
+// MSL 2.4 has compute recursion
 
     
diff --git a/hlslparser/shaders/Compute.hlsl b/hlslparser/shaders/Compute.hlsl
index aedcac4a..0b62a993 100644
--- a/hlslparser/shaders/Compute.hlsl
+++ b/hlslparser/shaders/Compute.hlsl
@@ -18,11 +18,14 @@ StructuredBuffer<BufType> Buffer0 : register(t0);
 StructuredBuffer<BufType> Buffer1 : register(t1);
 RWStructuredBuffer<BufType> BufferOut : register(u0);
 
-[numthreads(1, 1, 1)]
-void StructuredBufferCS( uint3 DTid : SV_DispatchThreadID )
+// TODO: support numthreads designator
+// [numthreads(1, 1, 1)]
+void ComputeCS( uint3 DTid : SV_DispatchThreadID )
 {
-    BufferOut[DTid.x].i = Buffer0[DTid.x].i + Buffer1[DTid.x].i;
-    BufferOut[DTid.x].f = Buffer0[DTid.x].f + Buffer1[DTid.x].f;
+    // TODO: Need array notation support on buffers like those above.
+    
+    //BufferOut[DTid.x].i = Buffer0[DTid.x].i + Buffer1[DTid.x].i;
+    //BufferOut[DTid.x].f = Buffer0[DTid.x].f + Buffer1[DTid.x].f;
 }
 
 //-------------------
@@ -33,7 +36,7 @@ void StructuredBufferCS( uint3 DTid : SV_DispatchThreadID )
 // RWByteAddressBuffer BufferOut : register(u0);
 //
 // [numthreads(1, 1, 1)]
-// void ByteBufferCS( uint3 DTid : SV_DispatchThreadID )
+// void ComputeCS( uint3 DTid : SV_DispatchThreadID )
 // {
 //     int i0 = asint( Buffer0.Load( DTid.x*8 ) );
 //     float f0 = asfloat( Buffer0.Load( DTid.x*8+4 ) );
@@ -42,5 +45,10 @@ void StructuredBufferCS( uint3 DTid : SV_DispatchThreadID )
 //
 //     BufferOut.Store( DTid.x*8, asuint(i0 + i1) );
 //     BufferOut.Store( DTid.x*8+4, asuint(f0 + f1) );
+//
+//     There is this new templated Load call.
+//     This greatly simplifies using BAB.
+//     float3 pos = Buffer0.Load<float3>(idx);
+//     Bufferout.Store<float3>(idx);
 // }
 
diff --git a/hlslparser/src/HLSLGenerator.cpp b/hlslparser/src/HLSLGenerator.cpp
index d9f1d195..56639e8c 100644
--- a/hlslparser/src/HLSLGenerator.cpp
+++ b/hlslparser/src/HLSLGenerator.cpp
@@ -16,28 +16,48 @@
 namespace M4
 {
 
-static const char* GetTypeName(const HLSLType& type)
+const char* HLSLGenerator::GetTypeName(const HLSLType& type)
 {
-    const char* name = GetNumericTypeName(type.baseType);
+    HLSLBaseType baseType = type.baseType;
+    
+    const char* name = GetNumericTypeName(baseType);
     if (name)
         return name;
     
-    switch (type.baseType)
+    if (baseType == HLSLBaseType_UserDefined)
+        return type.typeName;
+    
+    // Functions can return void, especially with compute
+    if (baseType == HLSLBaseType_Void)
+        return "void";
+    
+    // TODO: pull names from table, they should be same
+    if (IsSamplerType(baseType))
     {
-    case HLSLBaseType_SamplerState:              return "SamplerState";
-    case HLSLBaseType_SamplerComparisonState:    return "SamplerComparisonState";
-        
-    case HLSLBaseType_Texture2D:         return "Texture2D";
-    case HLSLBaseType_Texture2DArray:    return "Texture2DArray";
-    case HLSLBaseType_Texture3D:         return "Texture3D";
-    case HLSLBaseType_TextureCube:       return "TextureCube";
-    case HLSLBaseType_TextureCubeArray:  return "TextureCubeArray";
-    case HLSLBaseType_Texture2DMS:       return "Texture2DMS";
-            
-    // struct
-    case HLSLBaseType_UserDefined:  return type.typeName;
-    default: return "<unknown type>";
+        switch (baseType)
+        {
+            // samplers
+            case HLSLBaseType_SamplerState:              return "SamplerState";
+            case HLSLBaseType_SamplerComparisonState:    return "SamplerComparisonState";
+            default: break;
+        }
     }
+    else if (IsTextureType(baseType))
+    {
+        switch (baseType)
+        {
+            case HLSLBaseType_Texture2D:         return "Texture2D";
+            case HLSLBaseType_Texture2DArray:    return "Texture2DArray";
+            case HLSLBaseType_Texture3D:         return "Texture3D";
+            case HLSLBaseType_TextureCube:       return "TextureCube";
+            case HLSLBaseType_TextureCubeArray:  return "TextureCubeArray";
+            case HLSLBaseType_Texture2DMS:       return "Texture2DMS";
+            default: break;
+        }
+    }
+    
+    Error("Unknown type");
+    return NULL;
 }
 
 // TODO: copied from MSLGenerator
@@ -86,7 +106,7 @@ HLSLGenerator::HLSLGenerator()
 {
     m_tree                          = NULL;
     m_entryName                     = NULL;
-    m_target                        = Target_VertexShader;
+    m_target                        = HLSLTarget_VertexShader;
     m_isInsideBuffer                = false;
     m_error                         = false;
 }
@@ -96,12 +116,12 @@ HLSLGenerator::HLSLGenerator()
 // - Look at the function being generated.
 // - Return semantic, semantics associated to fields of the return structure, or output arguments, or fields of structures associated to output arguments -> output semantic replacement.
 // - Semantics associated input arguments or fields of the input arguments -> input semantic replacement.
-static const char * TranslateSemantic(const char* semantic, bool output, HLSLGenerator::Target target)
+static const char * TranslateSemantic(const char* semantic, bool output, HLSLTarget target)
 {
     // Note: these are all just passthrough of the DX10 semantics
     // except for BASEVERTEX/INSTANCE which doesn't seem to dxc compile.
     
-    if (target == HLSLGenerator::Target_VertexShader)
+    if (target == HLSLTarget_VertexShader)
     {
         if (output) 
         {
@@ -121,7 +141,7 @@ static const char * TranslateSemantic(const char* semantic, bool output, HLSLGen
                 return "BaseInstance";  // vulkan only
         }
     }
-    else if (target == HLSLGenerator::Target_PixelShader)
+    else if (target == HLSLTarget_PixelShader)
     {
         if (output)
         {
@@ -152,7 +172,7 @@ void HLSLGenerator::Error(const char* format, ...)
     va_end(arg);
 }
 
-bool HLSLGenerator::Generate(HLSLTree* tree, Target target, const char* entryName)
+bool HLSLGenerator::Generate(HLSLTree* tree, HLSLTarget target, const char* entryName)
 {
     m_tree      = tree;
     m_entryName = entryName;
@@ -927,38 +947,6 @@ void HLSLGenerator::OutputDeclaration(HLSLDeclaration* declaration)
 void HLSLGenerator::OutputDeclarationType(const HLSLType& type)
 {
     const char* typeName = GetTypeName(type);
-    
-    /* TODO: remove, the textures now caart the type,
-       might need special flag for depth for MSL side, ignore in HLSL
-     
-    if (type.baseType == HLSLBaseType_Sampler2D)
-    {
-        if (type.textureType == HLSLBaseType_Half
-            // TODO: && !m_options.treatHalfAsFloat
-            ) {
-            typeName = "Texture2DHalfSampler";
-        }
-        else {
-            typeName = "Texture2DSampler";
-        }
-    }
-    else if (type.baseType == HLSLBaseType_Sampler3D)
-    {
-        typeName = "Texture3DSampler";
-    }
-    else if (type.baseType == HLSLBaseType_SamplerCube)
-    {
-        typeName =  "TextureCubeSampler";
-    }
-    else if (type.baseType == HLSLBaseType_Sampler2DShadow)
-    {
-        typeName = "Texture2DShadowSampler";
-    }
-    else if (type.baseType == HLSLBaseType_Sampler2DMS)
-    {
-        typeName = "Texture2DMS<float4>";
-    }
-    */
 
     if (type.flags & HLSLTypeFlag_Const)
     {
diff --git a/hlslparser/src/HLSLGenerator.h b/hlslparser/src/HLSLGenerator.h
index b58c6917..a495dd70 100644
--- a/hlslparser/src/HLSLGenerator.h
+++ b/hlslparser/src/HLSLGenerator.h
@@ -27,16 +27,9 @@ class HLSLGenerator
 {
 
 public:
-
-    enum Target
-    {
-        Target_VertexShader,
-        Target_PixelShader,
-    };
-
     HLSLGenerator();
     
-    bool Generate(HLSLTree* tree, Target target, const char* entryName);
+    bool Generate(HLSLTree* tree, HLSLTarget target, const char* entryName);
     const char* GetResult() const;
 
 private:
@@ -55,6 +48,8 @@ class HLSLGenerator
      * isn't used in the syntax tree. */
     bool ChooseUniqueName(const char* base, char* dst, int dstLength) const;
 
+    const char* GetTypeName(const HLSLType& type);
+
     void Error(const char* format, ...);
     
 private:
@@ -63,7 +58,7 @@ class HLSLGenerator
 
     const HLSLTree* m_tree;
     const char*     m_entryName;
-    Target          m_target;
+    HLSLTarget      m_target;
     bool            m_isInsideBuffer;
     bool            m_error;
 };
diff --git a/hlslparser/src/HLSLTree.h b/hlslparser/src/HLSLTree.h
index 83d2b736..81f4f634 100644
--- a/hlslparser/src/HLSLTree.h
+++ b/hlslparser/src/HLSLTree.h
@@ -7,6 +7,22 @@
 namespace M4
 {
 
+enum HLSLTarget
+{
+    HLSLTarget_VertexShader,
+    HLSLTarget_PixelShader,
+    
+    HLSLTarget_ComputeShader,
+    
+    // none of these are portable to Metal/Android, they have own triangulation
+    // HLSLTarget_GeometryShader,
+    // HLSLTarget_HullShader,
+    // HLSLTarget_ControlShader,
+    
+    // This is compute prior to frag (combined vertex + geo state)
+    // HLSLTarget_MeshShader,
+};
+
 enum HLSLNodeType
 {
     HLSLNodeType_Root,
diff --git a/hlslparser/src/MSLGenerator.cpp b/hlslparser/src/MSLGenerator.cpp
index b87f90c9..6984f2d5 100644
--- a/hlslparser/src/MSLGenerator.cpp
+++ b/hlslparser/src/MSLGenerator.cpp
@@ -77,7 +77,7 @@ namespace M4
     {
         m_tree = NULL;
         m_entryName = NULL;
-        m_target = Target_VertexShader;
+        m_target = HLSLTarget_VertexShader;
         m_error = false;
 
         m_firstClassArgument = NULL;
@@ -118,7 +118,7 @@ namespace M4
     }
 
 
-    void MSLGenerator::Prepass(HLSLTree* tree, Target target, HLSLFunction* entryFunction)
+    void MSLGenerator::Prepass(HLSLTree* tree, HLSLTarget target, HLSLFunction* entryFunction)
     {
         // Hide unused arguments. @@ It would be good to do this in the other generators too.
         
@@ -267,7 +267,7 @@ namespace M4
                                 field->type.flags |= HLSLTypeFlag_NoPromote;
                             }
 
-                            /*if (target == Target_VertexShader && is_semantic(field->semantic, "COLOR"))
+                            /*if (target == HLSLTarget_VertexShader && is_semantic(field->semantic, "COLOR"))
                             {
                             field->type.flags |= HLSLTypeFlag_Swizzle_BGRA;
                             }*/
@@ -344,14 +344,14 @@ namespace M4
         m_writer.WriteLine(0, "#include \"ShaderMSL.h\"");
     }
 
-    bool MSLGenerator::Generate(HLSLTree* tree, Target target, const char* entryName, const Options& options)
+    bool MSLGenerator::Generate(HLSLTree* tree, HLSLTarget target, const char* entryName, const Options& options)
     {
         m_firstClassArgument = NULL;
         m_lastClassArgument = NULL;
 
         m_tree = tree;
         m_target = target;
-        ASSERT(m_target == Target_VertexShader || m_target == Target_FragmentShader);
+        //ASSERT(m_target == HLSLTarget_VertexShader || m_target == HLSLTarget_PixelShader);
         m_entryName = entryName;
         m_options = options;
 
@@ -464,13 +464,11 @@ namespace M4
         // @@ Add/Translate function attributes.
         // entryFunction->attributes
 
-        if (m_target == Target_VertexShader)
+        switch(m_target)
         {
-            m_writer.Write("vertex ");
-        }
-        else
-        {
-            m_writer.Write("fragment ");
+            case HLSLTarget_VertexShader:   m_writer.Write("vertex "); break;
+            case HLSLTarget_PixelShader:    m_writer.Write("fragment "); break;
+            case HLSLTarget_ComputeShader:  m_writer.Write("kernel "); break;
         }
 
         // Return type.
@@ -1952,7 +1950,7 @@ namespace M4
         unsigned int length, index;
         ParseSemantic(semantic, &length, &index);
 
-        if (m_target == MSLGenerator::Target_VertexShader)
+        if (m_target == HLSLTarget_VertexShader)
         {
             // These are DX10 convention
             if (String_Equal(semantic, "SV_InstanceID"))
@@ -1995,7 +1993,7 @@ namespace M4
 
             return m_tree->AddStringFormat("attribute(%s)", semantic);
         }
-        else if (m_target == MSLGenerator::Target_FragmentShader)
+        else if (m_target == HLSLTarget_PixelShader)
         {
             // PS inputs
             
@@ -2021,7 +2019,12 @@ namespace M4
             //if (String_Equal(semantic, "SV_Coverage")) return "sample_mask";
             //if (String_Equal(semantic, "SV_Coverage")) return "sample_mask,post_depth_coverage";
         }
-
+        else if (m_target == HLSLTarget_ComputeShader)
+        {
+            // compute inputs
+            if (String_Equal(semantic, "SV_DispatchThreadID"))
+                return "thread_position_in_grid";
+        }
         return NULL;
     }
 
@@ -2033,7 +2036,7 @@ namespace M4
         unsigned int length, index;
         ParseSemantic(semantic, &length, &index);
 
-        if (m_target == MSLGenerator::Target_VertexShader)
+        if (m_target == HLSLTarget_VertexShader)
         {
             if (String_Equal(semantic, "SV_Position"))
                 return "position";
@@ -2051,7 +2054,7 @@ namespace M4
             // SV_ViewportArrayIndex
             // SV_ClipDistance0..n, SV_CullDistance0..n
         }
-        else if (m_target == MSLGenerator::Target_FragmentShader)
+        else if (m_target == HLSLTarget_PixelShader)
         {
             if (m_options.flags & MSLGenerator::Flag_NoIndexAttribute)
             {
@@ -2092,7 +2095,11 @@ namespace M4
             if (String_Equal(semantic, "SV_Coverage"))
                 return "sample_mask";
         }
-
+        else if (m_target == HLSLTarget_ComputeShader)
+        {
+            // compute outputs
+            
+        }
         return NULL;
     }
 
@@ -2115,6 +2122,10 @@ namespace M4
         if (baseType == HLSLBaseType_UserDefined)
             return type.typeName;
         
+        // Functions can return void, especially with compute
+        if (baseType == HLSLBaseType_Void)
+            return "void";
+        
         // sampler
         if (IsSamplerType(baseType))
             return "sampler";
@@ -2171,7 +2182,7 @@ namespace M4
             }
         }
         
-        Error("Uknown Type");
+        Error("Unknown Type");
         return NULL;
     }
 
diff --git a/hlslparser/src/MSLGenerator.h b/hlslparser/src/MSLGenerator.h
index a79a56f5..6c4cb8b5 100644
--- a/hlslparser/src/MSLGenerator.h
+++ b/hlslparser/src/MSLGenerator.h
@@ -16,12 +16,6 @@ struct HLSLStruct;
 class MSLGenerator
 {
 public:
-    enum Target
-    {
-        Target_VertexShader,
-        Target_FragmentShader,
-    };
-    
     enum Flags
     {
         Flag_None = 0,
@@ -53,7 +47,7 @@ class MSLGenerator
 
     MSLGenerator();
 
-    bool Generate(HLSLTree* tree, Target target, const char* entryName, const Options& options = Options());
+    bool Generate(HLSLTree* tree, HLSLTarget target, const char* entryName, const Options& options = Options());
     const char* GetResult() const;
 
 private:
@@ -77,7 +71,7 @@ class MSLGenerator
 
     void AddClassArgument(ClassArgument * arg);
 
-    void Prepass(HLSLTree* tree, Target target, HLSLFunction* entryFunction);
+    void Prepass(HLSLTree* tree, HLSLTarget target, HLSLFunction* entryFunction);
     void CleanPrepass();
     
     void PrependDeclarations();
@@ -118,7 +112,7 @@ class MSLGenerator
 
     HLSLTree*       m_tree;
     const char*     m_entryName;
-    Target          m_target;
+    HLSLTarget      m_target;
     Options         m_options;
 
     bool            m_error;
diff --git a/hlslparser/src/Main.cpp b/hlslparser/src/Main.cpp
index 1378cdd6..878201a0 100644
--- a/hlslparser/src/Main.cpp
+++ b/hlslparser/src/Main.cpp
@@ -12,22 +12,6 @@
 
 using namespace std;
 
-enum Target
-{
-    Target_VertexShader,
-    Target_FragmentShader,
-    
-    Target_ComputeShader,
-    
-    // none of these are portable to Metal/Android, they have own triangulation
-    //Target_GeometryShader,
-    //Target_HullShader,
-    //Target_ControlShader,
-    
-    // This is compute prior to frag (combined vertex + geo state)
-    // Target_MeshShader,
-};
-
 enum Language
 {
     Language_MSL,
@@ -113,7 +97,7 @@ int main( int argc, char* argv[] )
 	// in parser, lets this only splice code that is needed.
 
 	Language language = Language_MSL;
-	Target target = Target_FragmentShader;
+	HLSLTarget target = HLSLTarget_PixelShader;
     string outputFileName;
     bool isDebug = false;
     
@@ -146,11 +130,11 @@ int main( int argc, char* argv[] )
 // This is derived from end characters of entry point
 //        else if( String_Equal( arg, "-vs" ) )
 //        {
-//            target = Target_VertexShader;
+//            target = HLSLTarget_VertexShader;
 //        }
 //        else if( String_Equal( arg, "-fs" ) )
 //        {
-//            target = Target_FragmentShader;
+//            target = HLSLTarget_PixelShader;
 //        }
  // TODO: require a arg to set entryName
 //		else if( entryName == NULL )
@@ -204,7 +188,7 @@ int main( int argc, char* argv[] )
     
     // Allow a mix of shaders in file.
     // Code now finds entry points.
-    // outputFileName += (target == Target_FragmentShader) ? "PS" : "VS";
+    // outputFileName += (target == HLSLTarget_PixelShader) ? "PS" : "VS";
     
     if ( language == Language_MSL )
     {
@@ -220,16 +204,19 @@ int main( int argc, char* argv[] )
     auto path = filesystem::path(fileName);
     fileName = filesystem::canonical( path );
     
+    // if this file doesn't exist, then canonical throws exception
     path = filesystem::path(outputFileName);
-    outputFileName = filesystem::canonical( path );
-    
-    if ( outputFileName == fileName )
+    if (filesystem::exists(path))
     {
-        Log_Error( "Src and Dst filenames match.  Exiting.\n" );
-        return 1;
+        outputFileName = filesystem::canonical( path );
+        
+        if ( outputFileName == fileName )
+        {
+            Log_Error( "Src and Dst filenames match.  Exiting.\n" );
+            return 1;
+        }
     }
     
-    
     //------------------------------------
     // Now start the work
     
@@ -295,8 +282,7 @@ int main( int argc, char* argv[] )
                 }
                 else if (endsWith(name, "CS"))
                 {
-                    // compute not yet supported
-                    // entryPoints.push_back(name);
+                    entryPoints.push_back(name);
                 }
             }
 
@@ -307,17 +293,17 @@ int main( int argc, char* argv[] )
     {
         entryName = entryPoint;
         if (endsWith(entryPoint, "VS"))
-            target = Target_VertexShader;
+            target = HLSLTarget_VertexShader;
         else if (endsWith(entryPoint, "PS"))
-            target = Target_FragmentShader;
+            target = HLSLTarget_PixelShader;
         else if (endsWith(entryPoint, "CS"))
-            target = Target_ComputeShader;
+            target = HLSLTarget_ComputeShader;
             
         // Generate output
         if (language == Language_HLSL)
         {
             HLSLGenerator generator;
-            if (generator.Generate( &tree, HLSLGenerator::Target(target), entryName ))
+            if (generator.Generate( &tree, target, entryName ))
             {
                 fprintf( fp, "%s", generator.GetResult() );
             }
@@ -330,7 +316,7 @@ int main( int argc, char* argv[] )
         else if (language == Language_MSL)
         {
             MSLGenerator generator;
-            if (generator.Generate( &tree, MSLGenerator::Target(target), entryName ))
+            if (generator.Generate( &tree, target, entryName ))
             {
                 fprintf( fp, "%s", generator.GetResult() );
             }
diff --git a/hlslparser/testshaders.xcodeproj/project.pbxproj b/hlslparser/testshaders.xcodeproj/project.pbxproj
index f72adb98..36ed9dc6 100644
--- a/hlslparser/testshaders.xcodeproj/project.pbxproj
+++ b/hlslparser/testshaders.xcodeproj/project.pbxproj
@@ -18,6 +18,8 @@
 		70CAA48929BAF707004B7E7B /* ShaderHLSL.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = ShaderHLSL.h; sourceTree = "<group>"; };
 		70CAA48A29BAF707004B7E7B /* ShaderMSL.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = ShaderMSL.h; sourceTree = "<group>"; };
 		70CAA48B29BBD985004B7E7B /* Compute.hlsl */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; path = Compute.hlsl; sourceTree = "<group>"; };
+		70CAA48C29BE6A62004B7E7B /* Compute.hlsl */ = {isa = PBXFileReference; lastKnownFileType = text; path = Compute.hlsl; sourceTree = "<group>"; };
+		70CAA48D29BE6A62004B7E7B /* Compute.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = Compute.metal; sourceTree = "<group>"; };
 /* End PBXFileReference section */
 
 /* Begin PBXGroup section */
@@ -44,6 +46,8 @@
 			children = (
 				70CAA48929BAF707004B7E7B /* ShaderHLSL.h */,
 				70CAA48A29BAF707004B7E7B /* ShaderMSL.h */,
+				70CAA48C29BE6A62004B7E7B /* Compute.hlsl */,
+				70CAA48D29BE6A62004B7E7B /* Compute.metal */,
 				70CAA48129BA7D28004B7E7B /* Sample.hlsl */,
 				70CAA48029BA7D28004B7E7B /* Sample.metal */,
 				707D37CD29B9797A00B08D22 /* Skinning.hlsl */,

From 370e82fcc356c3c390fc8bd7f13ca97fe6df4d17 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 12 Mar 2023 14:29:35 -0700
Subject: [PATCH 456/901] kram - update hlslparser

Add printflike to print/error calls.
Handle address space better.  Still hardcoded device/thread in a few spots.
Fix IsReadOnly() call on buffer.
---
 hlslparser/src/CodeWriter.h     |  7 ------
 hlslparser/src/Engine.h         | 13 +++++++---
 hlslparser/src/HLSLGenerator.h  |  2 +-
 hlslparser/src/HLSLTokenizer.h  |  2 +-
 hlslparser/src/HLSLTree.h       |  8 +++----
 hlslparser/src/MSLGenerator.cpp | 42 +++++++++++++++++++++++----------
 hlslparser/src/MSLGenerator.h   |  5 ++--
 7 files changed, 49 insertions(+), 30 deletions(-)

diff --git a/hlslparser/src/CodeWriter.h b/hlslparser/src/CodeWriter.h
index 074d7caa..d6fd8f9f 100644
--- a/hlslparser/src/CodeWriter.h
+++ b/hlslparser/src/CodeWriter.h
@@ -12,13 +12,6 @@
 #include "Engine.h"
 #include <string>
 
-// this is similar to printflike macro, checks format args
-#if defined(__GNUC__)
-#define M4_PRINTF_ATTR(string_index, first_to_check) __attribute__((format(printf, string_index, first_to_check)))
-#else
-#define M4_PRINTF_ATTR(string_index, first_to_check)
-#endif
-
 namespace M4
 {
 
diff --git a/hlslparser/src/Engine.h b/hlslparser/src/Engine.h
index d7cadce6..c4bfb033 100755
--- a/hlslparser/src/Engine.h
+++ b/hlslparser/src/Engine.h
@@ -21,6 +21,13 @@
 //#define assert(...)
 #define ASSERT assert
 
+// this is similar to printflike macro, checks format args
+#if defined(__GNUC__) || defined(__clang__)
+#define M4_PRINTF_ATTR(string_index, first_to_check) __attribute__((format(printf, string_index, first_to_check)))
+#else
+#define M4_PRINTF_ATTR(string_index, first_to_check)
+#endif
+
 namespace M4 {
 
 
@@ -45,7 +52,7 @@ class Allocator {
 
 // Engine/String.h
 
-int String_Printf(char * buffer, int size, const char * format, ...);
+int String_Printf(char * buffer, int size, const char * format, ...) M4_PRINTF_ATTR(3, 4);
 int String_PrintfArgList(char * buffer, int size, const char * format, va_list args);
 int String_FormatFloat(char * buffer, int size, float value);
 bool String_Equal(const char * a, const char * b);
@@ -57,7 +64,7 @@ void String_StripTrailingFloatZeroes(char* buffer);
 
 // Engine/Log.h
 
-void Log_Error(const char * format, ...);
+void Log_Error(const char * format, ...) M4_PRINTF_ATTR(1, 2);
 void Log_ErrorArgList(const char * format, va_list args);
 
 
@@ -181,7 +188,7 @@ struct StringPool {
     ~StringPool();
 
     const char * AddString(const char * string);
-    const char * AddStringFormat(const char * fmt, ...);
+    const char * AddStringFormat(const char * fmt, ...) M4_PRINTF_ATTR(2, 3);
     const char * AddStringFormatList(const char * fmt, va_list args);
     bool GetContainsString(const char * string) const;
 
diff --git a/hlslparser/src/HLSLGenerator.h b/hlslparser/src/HLSLGenerator.h
index a495dd70..a06d312c 100644
--- a/hlslparser/src/HLSLGenerator.h
+++ b/hlslparser/src/HLSLGenerator.h
@@ -50,7 +50,7 @@ class HLSLGenerator
 
     const char* GetTypeName(const HLSLType& type);
 
-    void Error(const char* format, ...);
+    void Error(const char* format, ...) M4_PRINTF_ATTR(2, 3);
     
 private:
 
diff --git a/hlslparser/src/HLSLTokenizer.h b/hlslparser/src/HLSLTokenizer.h
index dcc4a88d..b15b198b 100644
--- a/hlslparser/src/HLSLTokenizer.h
+++ b/hlslparser/src/HLSLTokenizer.h
@@ -179,7 +179,7 @@ class HLSLTokenizer
 
     /// Reports an error using printf style formatting. The current line number
     /// is included. Only the first error reported will be output.
-    void Error(const char* format, ...);
+    void Error(const char* format, ...) M4_PRINTF_ATTR(2, 3);
 
     /// Gets a human readable text description of the specified token.
     static void GetTokenName(int token, char buffer[s_maxIdentifier]);
diff --git a/hlslparser/src/HLSLTree.h b/hlslparser/src/HLSLTree.h
index 81f4f634..a384a1a8 100644
--- a/hlslparser/src/HLSLTree.h
+++ b/hlslparser/src/HLSLTree.h
@@ -418,9 +418,9 @@ struct HLSLBuffer : public HLSLStatement
     {
         return  bufferType == HLSLBufferType_CBuffer ||
                 bufferType == HLSLBufferType_TBuffer ||
-                HLSLBufferType_ConstantBuffer ||
-                HLSLBufferType_StructuredBuffer ||
-                HLSLBufferType_ByteAddressBuffer;
+                bufferType == HLSLBufferType_ConstantBuffer ||
+                bufferType == HLSLBufferType_StructuredBuffer ||
+                bufferType == HLSLBufferType_ByteAddressBuffer;
     }
     
     static const HLSLNodeType s_type = HLSLNodeType_Buffer;
@@ -692,7 +692,7 @@ class HLSLTree
 
     /// Adds a string to the string pool used by the tree.
     const char* AddString(const char* string);
-    const char* AddStringFormat(const char* string, ...);
+    const char* AddStringFormat(const char* string, ...) M4_PRINTF_ATTR(2, 3);
 
     /// Returns true if the string is contained within the tree.
     bool GetContainsString(const char* string) const;
diff --git a/hlslparser/src/MSLGenerator.cpp b/hlslparser/src/MSLGenerator.cpp
index 6984f2d5..81743fc8 100644
--- a/hlslparser/src/MSLGenerator.cpp
+++ b/hlslparser/src/MSLGenerator.cpp
@@ -87,7 +87,7 @@ namespace M4
     }
 
     // Copied from GLSLGenerator
-    void MSLGenerator::Error(const char* format, ...)
+    void MSLGenerator::Error(const char* format, ...) const
     {
         // It's not always convenient to stop executing when an error occurs,
         // so just track once we've hit an error and stop reporting them until
@@ -183,7 +183,6 @@ namespace M4
                 HLSLBuffer * buffer = (HLSLBuffer *)statement;
                 
                 HLSLType type(HLSLBaseType_UserDefined);
-                type.addressSpace = HLSLAddressSpace_Constant;
                 
                 // TODO: on cbuffer is a ubo, not tbuffer, or others
                 // TODO: this is having to rename due to globals
@@ -192,6 +191,11 @@ namespace M4
                 else
                     type.typeName = m_tree->AddStringFormat("%s", buffer->bufferStruct->name);
                 
+                if (buffer->IsReadOnly())
+                    type.addressSpace = HLSLAddressSpace_Constant;
+                else
+                    type.addressSpace = HLSLAddressSpace_Device;
+                
                 int bufferRegister = ParseRegister(buffer->registerName, nextBufferRegister) + m_options.bufferRegisterOffset;
 
                 const char * bufferRegisterName = m_tree->AddStringFormat("buffer(%d)", bufferRegister);
@@ -344,6 +348,24 @@ namespace M4
         m_writer.WriteLine(0, "#include \"ShaderMSL.h\"");
     }
 
+    const char* MSLGenerator::GetAddressSpaceName(HLSLAddressSpace addressSpace) const
+    {
+        switch(addressSpace)
+        {
+            case HLSLAddressSpace_Constant: return "constant";
+            case HLSLAddressSpace_Device: return "device";
+            case HLSLAddressSpace_Thread: return "thread";
+            case HLSLAddressSpace_Shared: return "shared";
+            //case HLSLAddressSpace_Threadgroup:  return "threadgroup_local";
+            //case HLSLAddressSpace_ThreadgroupImageblock: return "threadgroup_imageblock");
+                
+            case HLSLAddressSpace_Undefined: break;
+        }
+        
+        Error("Uknown address space");
+        return "";
+    }
+
     bool MSLGenerator::Generate(HLSLTree* tree, HLSLTarget target, const char* entryName, const Options& options)
     {
         m_firstClassArgument = NULL;
@@ -399,9 +421,7 @@ namespace M4
         const ClassArgument* currentArg = m_firstClassArgument;
         while (currentArg != NULL)
         {
-            if (currentArg->type.addressSpace == HLSLAddressSpace_Constant)              m_writer.Write("constant ");
-            else
-                m_writer.Write("thread ");
+            m_writer.Write("%s ", GetAddressSpaceName(currentArg->type.addressSpace));
 
             m_writer.Write("%s & %s", GetTypeName(currentArg->type, /*exactType=*/true), currentArg->name);
 
@@ -533,12 +553,10 @@ namespace M4
         }
         while (currentArg != NULL)
         {
-            //if (currentArg->type.addressSpace == HLSLAddressSpace_Constant) m_writer.Write("constant ");
-            //else m_writer.Write("thread ");
-
             if (currentArg->type.baseType == HLSLBaseType_UserDefined)
             {
-                m_writer.Write("constant %s::%s & %s [[%s]]", shaderClassName, currentArg->type.typeName, currentArg->name, currentArg->registerName);
+                m_writer.Write("%s %s::%s & %s [[%s]]", GetAddressSpaceName(currentArg->type.addressSpace),
+                  shaderClassName, currentArg->type.typeName, currentArg->name, currentArg->registerName);
             }
             else
             {
@@ -966,8 +984,7 @@ namespace M4
             }
             else
             {
-                // is this thread space?
-                m_writer.WriteLine(indent, "thread %s & %s",  buffer->bufferStruct->name, buffer->name);
+                m_writer.WriteLine(indent, "device %s & %s",  buffer->bufferStruct->name, buffer->name);
             }
             
             m_writer.EndLine(";");
@@ -1578,7 +1595,7 @@ namespace M4
         {
             if (isRef && !isTypeCast)
             {
-                m_writer.Write("thread ");
+                m_writer.Write("%s ", GetAddressSpaceName(type.addressSpace));
             }
             if (isConst || type.flags & HLSLTypeFlag_Const)
             {
@@ -1586,6 +1603,7 @@ namespace M4
 
                 if ((type.flags & HLSLTypeFlag_Static) != 0 && !isTypeCast)
                 {
+                    // TODO: use GetAddressSpaceName?
                     m_writer.Write("static constant constexpr ");
                 }
             }
diff --git a/hlslparser/src/MSLGenerator.h b/hlslparser/src/MSLGenerator.h
index 6c4cb8b5..87c0c2cc 100644
--- a/hlslparser/src/MSLGenerator.h
+++ b/hlslparser/src/MSLGenerator.h
@@ -103,8 +103,9 @@ class MSLGenerator
     const char* TranslateOutputSemantic(const char* semantic);
 
     const char* GetTypeName(const HLSLType& type, bool exactType);
+    const char* GetAddressSpaceName(HLSLAddressSpace addressSpace) const;
     
-    void Error(const char* format, ...);
+    void Error(const char* format, ...) const M4_PRINTF_ATTR(2, 3);
 
 private:
 
@@ -115,7 +116,7 @@ class MSLGenerator
     HLSLTarget      m_target;
     Options         m_options;
 
-    bool            m_error;
+    mutable bool            m_error;
 
     ClassArgument * m_firstClassArgument;
     ClassArgument * m_lastClassArgument;

From bcdad2f047c35a3b36b705c132a151b32422f5fa Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 12 Mar 2023 18:35:40 -0700
Subject: [PATCH 457/901] kram - update hlslparser

Add Depth2D support.  Metal only allows sample/gather_compare from depth2d textures and their variants.  So get this started with 2d case first.  Tried setting address spaces up, but Metal seems to only want them declared as "thread" space (except ubo).

Note that MSL has array indexing into buffers which mimics StructuredBuffer.  That will need correct designators on the pointer to the array.   This may also need to apply to ConstantBuffer which can have an array size that is not currently parsed.

Remove more dead code.
---
 hlslparser/outshaders/ShaderHLSL.h |  32 ++++---
 hlslparser/outshaders/ShaderMSL.h  |  17 ++++
 hlslparser/shaders/Sample.hlsl     |   2 +-
 hlslparser/src/HLSLGenerator.cpp   |  78 ++---------------
 hlslparser/src/HLSLParser.cpp      |  17 ++++
 hlslparser/src/HLSLParser.h        |   1 +
 hlslparser/src/HLSLTokenizer.cpp   |  20 ++---
 hlslparser/src/HLSLTokenizer.h     |   3 +
 hlslparser/src/HLSLTree.h          |   3 +
 hlslparser/src/MSLGenerator.cpp    | 129 ++++++++++-------------------
 hlslparser/src/MSLGenerator.h      |   2 +-
 11 files changed, 116 insertions(+), 188 deletions(-)

diff --git a/hlslparser/outshaders/ShaderHLSL.h b/hlslparser/outshaders/ShaderHLSL.h
index 2722e526..78c579b8 100644
--- a/hlslparser/outshaders/ShaderHLSL.h
+++ b/hlslparser/outshaders/ShaderHLSL.h
@@ -64,6 +64,9 @@ typedef uint16_t4 ushort4;
 //typedef float64_t3 double3;
 //typedef float64_t4 double4;
 
+// 6.6 is cutting edge, want to target 6.2 for now
+#define SM66 1
+#if SM66
 // compile to SM6.6 for these
 typedef uint8_t4_packed uchar4_packed;
 typedef int8_t4_packed char4_packed;
@@ -113,6 +116,7 @@ char4_packed fromInt4(int4 v, bool clamp = true)
 {
     return clamp ? pack_clamp_s8(v) : pack_s8(v);
 }
+#endif
 
 
 #define USE_HALF 1
@@ -201,33 +205,27 @@ float4 SampleGrad(Texture2D<float4> t, SamplerState s, float2 texCoord, float2 g
 {
    return t.SampleGrad(s, texCoord.xy, gradx, grady);
 }
-
 //----------
 
+// typedef Texture2D Depth2D;
+
+// can just use the
+//float4 Sample(Texture2D<float4> t, SamplerState s, float2 texCoord, int2 offset = 0)
+//{
+//    return t.Sample(s, texCoord.xy, offset);
+//}
+
 // For persp shadows, remember to divide z = z/w before calling, or w = z/w on cube
-float4 SampleCmp(Texture2D t, SamplerComparisonState s, float4 texCoord, int2 offset = 0)
+float4 SampleCmp(Texture2D<float4> t, SamplerComparisonState s, float4 texCoord, int2 offset = 0)
 {
     return t.SampleCmp(s, texCoord.xy, texCoord.z, offset);
 }
 
-
-// no offset
-float4 SampleCmp(TextureCube t, SamplerComparisonState s, float4 texCoord)
+float4 GatherCmp(Texture2D<float4> t, SamplerComparisonState s, float4 texCoord, int2 offset = 0)
 {
-    return t.SampleCmp(s, texCoord.xyz, texCoord.w);
+    return t.GatherCmp(s, texCoord.xy, texCoord.z, offset);
 }
 
-// TODO: may need to add to HLSLParser intrinsics
-//float4 SampleCmp(Texture2DArray t, SamplerComparisonState s, float4 texCoord, int2 offset = 0)
-//{
-//    return t.SampleCmp(s, texCoord.xyz, offset);
-//}
-//
-//float4 SampleCmp(TextureCubeArray t, SamplerComparisonState s, float4 texCoord)
-//{
-//    return t.SampleCmp(s, texCoord.xyzw);
-//}
-
 //----------
 
 // this doesn't use SamplerState, raw load
diff --git a/hlslparser/outshaders/ShaderMSL.h b/hlslparser/outshaders/ShaderMSL.h
index 7fc2566d..ae6ccf4f 100644
--- a/hlslparser/outshaders/ShaderMSL.h
+++ b/hlslparser/outshaders/ShaderMSL.h
@@ -233,6 +233,23 @@ float4 Sample(texturecube<float> t, sampler s, float3 texCoord) {
 float4 Sample(texturecube_array<float> t, sampler s, float4 texCoord) {
     return t.sample(s, texCoord.xyz, uint(texCoord.w));
 }
+//----------
+
+float4 Sample(depth2d<float> t, sampler s, float2 texCoord, int2 offset = 0)
+{
+    return t.sample(s, texCoord.xy, offset);
+}
+
+// For persp shadows, remember to divide z = z/w before calling, or w = z/w on cube
+float4 SampleCmp(depth2d<float> t, sampler s, float4 texCoord, int2 offset = 0)
+{
+    return t.sample_compare(s, texCoord.xy, texCoord.z, offset);
+}
+
+float4 GatherCmp(depth2d<float> t, sampler s, float4 texCoord, int2 offset = 0)
+{
+    return t.gather_compare(s, texCoord.xy, texCoord.z, offset);
+}
 
 
 // ----
diff --git a/hlslparser/shaders/Sample.hlsl b/hlslparser/shaders/Sample.hlsl
index 7d587c53..69cb78b7 100644
--- a/hlslparser/shaders/Sample.hlsl
+++ b/hlslparser/shaders/Sample.hlsl
@@ -11,7 +11,7 @@
 
 // from here https://github.com/microsoft/DirectX-Graphics-Samples/blob/master/Samples/Desktop/D3D12Multithreading/src/shaders.hlsl
 
-Texture2D<float4> shadowMap : register(t0);
+Depth2D<float4> shadowMap : register(t0);
 Texture2D<float4> diffuseMap : register(t1);
 Texture2D<float4> normalMap : register(t2);
 
diff --git a/hlslparser/src/HLSLGenerator.cpp b/hlslparser/src/HLSLGenerator.cpp
index 56639e8c..320a2b01 100644
--- a/hlslparser/src/HLSLGenerator.cpp
+++ b/hlslparser/src/HLSLGenerator.cpp
@@ -38,6 +38,8 @@ const char* HLSLGenerator::GetTypeName(const HLSLType& type)
         {
             // samplers
             case HLSLBaseType_SamplerState:              return "SamplerState";
+                
+            // can only pair this with depth texture to match Metal
             case HLSLBaseType_SamplerComparisonState:    return "SamplerComparisonState";
             default: break;
         }
@@ -46,6 +48,10 @@ const char* HLSLGenerator::GetTypeName(const HLSLType& type)
     {
         switch (baseType)
         {
+            // depth textures just use Texture2D typedef
+            // TODO: add cube, array, ms, others
+            case HLSLBaseType_Depth2D:           return "Depth2D";
+                
             case HLSLBaseType_Texture2D:         return "Texture2D";
             case HLSLBaseType_Texture2DArray:    return "Texture2DArray";
             case HLSLBaseType_Texture3D:         return "Texture3D";
@@ -322,49 +328,7 @@ void HLSLGenerator::OutputExpression(HLSLExpression* expression)
         HLSLIdentifierExpression* identifierExpression = static_cast<HLSLIdentifierExpression*>(expression);
         const char* name = identifierExpression->name;
         
-        /* I don't want to pass structs
-        if (IsSamplerType(identifierExpression->expressionType) && identifierExpression->global)
-        {
-            // @@ Handle generic sampler type.
-
-            // Stupid HLSL doesn't have ctors, so have ctor calls.
-            if (identifierExpression->expressionType.baseType == HLSLBaseType_Texture2D)
-            {
-                if (identifierExpression->expressionType.textureType == HLSLBaseType_Half) // TODO: && !m_options.treatHalfAsFloat)
-                {
-                    m_writer.Write("Texture2DHalfSamplerCtor(%s_texture, %s_sampler)", name, name);
-                }
-                else
-                {
-                    m_writer.Write("Texture2DSamplerCtor(%s_texture, %s_sampler)", name, name);
-                }
-                
-                // Remove this, so have half support above
-                //m_writer.Write("%s(%s_texture, %s_sampler)", "Texture2DSamplerCtor", name, name);
-            }
-            else if (identifierExpression->expressionType.baseType == HLSLBaseType_Sampler3D)
-            {
-                m_writer.Write("%s(%s_texture, %s_sampler)", "Texture3DSamplerCtor", name, name);
-            }
-            else if (identifierExpression->expressionType.baseType == HLSLBaseType_SamplerCube)
-            {
-                m_writer.Write("%s(%s_texture, %s_sampler)", "TextureCubeSamplerCtor", name, name);
-            }
-            else if (identifierExpression->expressionType.baseType == HLSLBaseType_Sampler2DShadow)
-            {
-                m_writer.Write("%s(%s_texture, %s_sampler)", "Texture2DShadowSamplerCtor", name, name);
-            }
-            // TODO: add all ctor types
-            else if (identifierExpression->expressionType.baseType == HLSLBaseType_Sampler2DMS)
-            {
-                m_writer.Write("%s", name);
-            }
-        }
-        else
-        */
-        {
-            m_writer.Write("%s", name);
-        }
+        m_writer.Write("%s", name);
     }
     else if (expression->nodeType == HLSLNodeType_CastingExpression)
     {
@@ -893,32 +857,8 @@ void HLSLGenerator::OutputDeclaration(HLSLDeclaration* declaration)
         // caller to specify more types.
         
         // texture carts the dimension and format
-        const char* textureType = NULL;
-        if (declaration->type.baseType == HLSLBaseType_Texture2D)
-        {
-            textureType = "Texture2D";
-        }
-        else if (declaration->type.baseType == HLSLBaseType_Texture2DArray)
-        {
-            textureType = "Texture2DArray";
-        }
-        else if (declaration->type.baseType == HLSLBaseType_Texture3D)
-        {
-            textureType = "Texture3D";
-        }
-        else if (declaration->type.baseType == HLSLBaseType_TextureCube)
-        {
-            textureType = "TextureCube";
-        }
-        else if (declaration->type.baseType == HLSLBaseType_TextureCubeArray)
-        {
-            textureType = "TextureCubeArray";
-        }
-        else if (declaration->type.baseType == HLSLBaseType_Texture2DMS)
-        {
-            textureType = "Texture2DMS";
-        }
-
+        const char* textureType = GetTypeName(declaration->type);
+    
         if (textureType != NULL)
         {
             if (reg != -1)
diff --git a/hlslparser/src/HLSLParser.cpp b/hlslparser/src/HLSLParser.cpp
index b0f59a85..255f19b8 100644
--- a/hlslparser/src/HLSLParser.cpp
+++ b/hlslparser/src/HLSLParser.cpp
@@ -107,6 +107,13 @@ bool IsTextureType(HLSLBaseType baseType)
     return baseTypeDescriptions[baseType].coreType == CoreType_Texture;
 }
 
+bool IsDepthTextureType(HLSLBaseType baseType)
+{
+    // return baseTypeDescriptions[baseType].coreType == CoreType_DepthTexture;
+    return baseType == HLSLBaseType_Depth2D;
+}
+
+
 bool IsBufferType(HLSLBaseType baseType)
 {
     return baseTypeDescriptions[baseType].coreType == CoreType_Buffer;
@@ -843,6 +850,11 @@ const Intrinsic _intrinsic[] =
         TEXTURE_INTRINSIC_FUNCTION("Sample", HLSLBaseType_TextureCube, HLSLBaseType_Float3),
         TEXTURE_INTRINSIC_FUNCTION("Sample", HLSLBaseType_TextureCubeArray, HLSLBaseType_Float4),
         
+        // Depth
+        TEXTURE_INTRINSIC_FUNCTION("Sample", HLSLBaseType_Depth2D, HLSLBaseType_Float2),
+        // TODO: TEXTURE_INTRINSIC_FUNCTION("SampleCmp", HLSLBaseType_Depth2D, HLSLBaseType_Float4),
+        // TODO: TEXTURE_INTRINSIC_FUNCTION("GatherCmp", HLSLBaseType_Depth2D, HLSLBaseType_Float4),
+        
         // one more dimension than Sample
         TEXTURE_INTRINSIC_FUNCTION("SampleLevel", HLSLBaseType_Texture2D, HLSLBaseType_Float3),
         TEXTURE_INTRINSIC_FUNCTION("SampleLevel", HLSLBaseType_Texture3D, HLSLBaseType_Float4),
@@ -959,6 +971,7 @@ const BaseTypeDescription baseTypeDescriptions[HLSLBaseType_Count] =
         { "TextureCubeArray",     CoreType_Texture, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_TextureCubeArray
         { "Texture2DMS",        CoreType_Texture, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Texture2DMS
         
+        { "Depth2D",            CoreType_Texture, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Depth2D
         
         { "SamplerState",            CoreType_Sampler, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Sampler
         { "SamplerComparisonState",  CoreType_Sampler, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_SamplerComparisonState
@@ -3736,6 +3749,10 @@ bool HLSLParser::AcceptType(bool allowVoid, HLSLType& type/*, bool acceptFlags*/
     case HLSLToken_TextureCubeArray:
         type.baseType = HLSLBaseType_TextureCubeArray;
         break;
+       
+    case HLSLToken_Depth2D:
+        type.baseType = HLSLBaseType_Depth2D;
+        break;
             
     case HLSLToken_SamplerState:
         type.baseType = HLSLBaseType_SamplerState;
diff --git a/hlslparser/src/HLSLParser.h b/hlslparser/src/HLSLParser.h
index c31b103d..acbfb38d 100644
--- a/hlslparser/src/HLSLParser.h
+++ b/hlslparser/src/HLSLParser.h
@@ -168,6 +168,7 @@ extern bool IsMatrixType(HLSLBaseType baseType);
 extern bool IsVectorType(HLSLBaseType baseType);
 extern bool IsScalarType(HLSLBaseType baseType);
 extern bool IsTextureType(HLSLBaseType baseType);
+extern bool IsDepthTextureType(HLSLBaseType baseType);
 extern bool IsBufferType(HLSLBaseType baseType);
 extern bool IsNumericType(HLSLBaseType baseType);
 
diff --git a/hlslparser/src/HLSLTokenizer.cpp b/hlslparser/src/HLSLTokenizer.cpp
index 79ba3cae..e9d0a0be 100644
--- a/hlslparser/src/HLSLTokenizer.cpp
+++ b/hlslparser/src/HLSLTokenizer.cpp
@@ -69,21 +69,11 @@ static const char* _reservedWords[] =
         "TextureCubeArray",
         "Texture2DMS",
         
+        "Depth2D",
+        
         "SamplerState",
         "SamplerComparisonState",
-/*
-        // TODO: eliminate DX9 types
-        "texture",
-        
-        "sampler",
-        "sampler2D",
-        "sampler3D",
-        "samplerCUBE",
-        "sampler2DShadow",
-        "sampler2DMS",
-        "sampler2DArray",
-*/
-        
+
         "if",
         "else",
         "for",
@@ -94,11 +84,11 @@ static const char* _reservedWords[] =
         "void",
         "struct",
         
-        // DX9
+        // DX9 buffer types (tons of globals)
         "cbuffer",
         "tbuffer",
         
-        // DX10 templated types
+        // DX10 buffer templated types
         "ConstantBuffer", // indexable cbuffer
         "StructuredBuffer",
         "RWStructuredBuffer",
diff --git a/hlslparser/src/HLSLTokenizer.h b/hlslparser/src/HLSLTokenizer.h
index b15b198b..1d21d543 100644
--- a/hlslparser/src/HLSLTokenizer.h
+++ b/hlslparser/src/HLSLTokenizer.h
@@ -67,6 +67,9 @@ enum HLSLToken
     HLSLToken_TextureCubeArray,
     HLSLToken_Texture2DMS,
     
+    HLSLToken_Depth2D,
+    // TODO: other depth types
+    
     HLSLToken_SamplerState,
     HLSLToken_SamplerComparisonState,
     
diff --git a/hlslparser/src/HLSLTree.h b/hlslparser/src/HLSLTree.h
index a384a1a8..fd00247f 100644
--- a/hlslparser/src/HLSLTree.h
+++ b/hlslparser/src/HLSLTree.h
@@ -126,6 +126,9 @@ enum HLSLBaseType
     HLSLBaseType_TextureCubeArray,
     HLSLBaseType_Texture2DMS,
     
+    HLSLBaseType_Depth2D,
+    // TODO: add more depth types as needed (pair with SamplerComparisonState)
+    
     // Only 2 sampler types.
     HLSLBaseType_SamplerState,
     HLSLBaseType_SamplerComparisonState,
diff --git a/hlslparser/src/MSLGenerator.cpp b/hlslparser/src/MSLGenerator.cpp
index 81743fc8..85a3c205 100644
--- a/hlslparser/src/MSLGenerator.cpp
+++ b/hlslparser/src/MSLGenerator.cpp
@@ -166,6 +166,9 @@ namespace M4
                     int textureRegister = ParseRegister(declaration->registerName, nextTextureRegister);
                      const char * textureRegisterName = m_tree->AddStringFormat("texture(%d)", textureRegister);
 
+                    if (declaration->type.addressSpace == HLSLAddressSpace_Undefined)
+                        declaration->type.addressSpace = HLSLAddressSpace_Device;
+                    
                     AddClassArgument(new ClassArgument(textureName, declaration->type, textureRegisterName));
                 }
                 else if (IsSamplerType(declaration->type))
@@ -175,6 +178,9 @@ namespace M4
                     int samplerRegister = ParseRegister(declaration->registerName, nextSamplerRegister);
                     const char * samplerRegisterName = m_tree->AddStringFormat("sampler(%d)", samplerRegister);
                     
+                    if (declaration->type.addressSpace == HLSLAddressSpace_Undefined)
+                        declaration->type.addressSpace = HLSLAddressSpace_Device;
+                    
                     AddClassArgument(new ClassArgument(samplerName, declaration->type, samplerRegisterName));
                 }
             }
@@ -348,8 +354,23 @@ namespace M4
         m_writer.WriteLine(0, "#include \"ShaderMSL.h\"");
     }
 
-    const char* MSLGenerator::GetAddressSpaceName(HLSLAddressSpace addressSpace) const
+    // Any reference or pointer must be qualified with address space in MSL
+    const char* MSLGenerator::GetAddressSpaceName(HLSLBaseType baseType, HLSLAddressSpace addressSpace) const
     {
+        if (IsSamplerType(baseType))
+        {
+            return "thread";
+        }
+        if (IsTextureType(baseType))
+        {
+            //if (IsDepthTextureType(baseType))
+            //    return "device";
+            return "thread";
+        }
+
+        // buffers also need to handle readonly (constant and const device) vs.
+        // readwrite (device).
+        
         switch(addressSpace)
         {
             case HLSLAddressSpace_Constant: return "constant";
@@ -362,7 +383,7 @@ namespace M4
             case HLSLAddressSpace_Undefined: break;
         }
         
-        Error("Uknown address space");
+        Error("Unknown address space");
         return "";
     }
 
@@ -421,8 +442,8 @@ namespace M4
         const ClassArgument* currentArg = m_firstClassArgument;
         while (currentArg != NULL)
         {
-            m_writer.Write("%s ", GetAddressSpaceName(currentArg->type.addressSpace));
-
+            m_writer.Write("%s ", GetAddressSpaceName(currentArg->type.baseType, currentArg->type.addressSpace));
+            
             m_writer.Write("%s & %s", GetTypeName(currentArg->type, /*exactType=*/true), currentArg->name);
 
             currentArg = currentArg->nextArg;
@@ -555,7 +576,7 @@ namespace M4
         {
             if (currentArg->type.baseType == HLSLBaseType_UserDefined)
             {
-                m_writer.Write("%s %s::%s & %s [[%s]]", GetAddressSpaceName(currentArg->type.addressSpace),
+                m_writer.Write("%s %s::%s & %s [[%s]]", GetAddressSpaceName(currentArg->type.baseType, currentArg->type.addressSpace),
                   shaderClassName, currentArg->type.typeName, currentArg->name, currentArg->registerName);
             }
             else
@@ -908,17 +929,13 @@ namespace M4
     {
         if (IsSamplerType(declaration->type))
         {
-            m_writer.Write("thread sampler& %s", declaration->name);
+            m_writer.Write("%s sampler& %s", GetAddressSpaceName(declaration->type.baseType, declaration->type.addressSpace), declaration->name);
         }
         else if (IsTextureType(declaration->type))
         {
-            // Declare a texture and a sampler instead
-            // We do not handle multiple textures on the same line
-            // ASSERT(declaration->nextDeclaration == NULL);
-            
             const char* textureName = GetTypeName(declaration->type, true);
             if (textureName)
-                m_writer.Write("thread %s& %s", textureName, declaration->name);
+                m_writer.Write("%s %s& %s", GetAddressSpaceName(declaration->type.baseType, declaration->type.addressSpace), textureName, declaration->name);
             else
                 Error("Unknown texture");
         }
@@ -1195,54 +1212,6 @@ namespace M4
             HLSLIdentifierExpression* identifierExpression = static_cast<HLSLIdentifierExpression*>(expression);
             const char* name = identifierExpression->name;
             
-            /* don't need this either, just pass the same name
-                calls now take tex and sampler if specified
-             
-            if (identifierExpression->global && IsSamplerType(identifierExpression->expressionType))
-            {
-                // TODO: just pass in
-                m_writer.Write("%s", name);
-            }
-            else if (identifierExpression->global && IsTextureType(identifierExpression->expressionType))
-            {
-                // TODO: just pass in
-                m_writer.Write("%s", name);
-            }
-            */
-            
-            /* Stop wrapping in structs
-            // For texture declaration, generate a struct instead
-            if (identifierExpression->global && IsSamplerType(identifierExpression->expressionType))
-            {
-                // TODO: this code doesn't preserve user name for reflection
-                // Appends_texture/_sampler appended to name and forces combined tex/sampler.
-                
-                if (identifierExpression->expressionType.baseType == HLSLBaseType_Sampler2D)
-                {
-                    if (identifierExpression->expressionType.samplerType == HLSLBaseType_Half && !m_options.treatHalfAsFloat)
-                    {
-                        m_writer.Write("Texture2DHalfSampler(%s_texture, %s_sampler)", name, name);
-                    }
-                    else
-                    {
-                        m_writer.Write("Texture2DSampler(%s_texture, %s_sampler)", name, name);
-                    }
-                }
-                else if (identifierExpression->expressionType.baseType == HLSLBaseType_Sampler3D)
-                    m_writer.Write("Texture3DSampler(%s_texture, %s_sampler)", name, name);
-                else if (identifierExpression->expressionType.baseType == HLSLBaseType_SamplerCube)
-                    m_writer.Write("TextureCubeSampler(%s_texture, %s_sampler)", name, name);
-                else if (identifierExpression->expressionType.baseType == HLSLBaseType_Sampler2DShadow)
-                    m_writer.Write("Texture2DShadowSampler(%s_texture, %s_sampler)", name, name);
-                else if (identifierExpression->expressionType.baseType == HLSLBaseType_Sampler2DMS)
-                    m_writer.Write("%s_texture", name);
-                else if (identifierExpression->expressionType.baseType == HLSLBaseType_Sampler2DArray)
-                    m_writer.Write("Texture2DArraySampler(%s_texture, %s_sampler)", name, name);
-                else
-                    Error("<unhandled texture type>");
-            }
-            
-            else */
             {
                 if (identifierExpression->global)
                 {
@@ -1595,7 +1564,7 @@ namespace M4
         {
             if (isRef && !isTypeCast)
             {
-                m_writer.Write("%s ", GetAddressSpaceName(type.addressSpace));
+                m_writer.Write("%s ", GetAddressSpaceName(type.baseType, type.addressSpace));
             }
             if (isConst || type.flags & HLSLTypeFlag_Const)
             {
@@ -2151,48 +2120,38 @@ namespace M4
         // texture
         if (IsTextureType(baseType))
         {
-            // TODO: hook isDepth up to texture flag
             // unclear if depth supports half, may have to be float always
-            bool isDepth = false;
             
             bool isHalfTexture  = promote && type.textureType == HLSLBaseType_Half && !m_options.treatHalfAsFloat;
             
             switch (baseType)
             {
-                case HLSLBaseType_Texture2D:
-                    if (isDepth)
-                        return isHalfTexture ? "depth2d<half>" : "depth2d<float>";
+                case HLSLBaseType_Depth2D:
+                    return isHalfTexture ? "depth2d<half>" : "depth2d<float>";
+                /* TODO: add these
+                case HLSLBaseType_DepthCube:
+                    return isHalfTexture ? "depthcube<half>" : "depthcube<float>";
+                case HLSLBaseType_Depth2DArray:
+                    return isHalfTexture ? "depth2d_array<half>" : "depth2d_array<float>";
+                case HLSLBaseType_Depth2DMS:
+                    return isHalfTexture ? "depth2d_ms<half>" : "depth2d_ms<float>";
+                */
                     
+                // TODO: no HLSL equivalent of this yet, but exists in MSL
+                // depth_ms_array
+                    
+                
+                case HLSLBaseType_Texture2D:
                     return isHalfTexture ? "texture2d<half>" : "texture2d<float>";
                 case HLSLBaseType_Texture3D:
-                    // no depth equivalent for 3d
-                    //if (isDepth)
-                    //    return isHalfTexture ? "depth2d<half>" : "depth2d<float>";
-                    
                     return isHalfTexture ? "texture3d<half>" : "texture3d<float>";
                 case HLSLBaseType_TextureCube:
-                    if (isDepth)
-                        return isHalfTexture ? "depthcube<half>" : "depthcube<float>";
-                    
                     return isHalfTexture ? "texturecube<half>" : "texturecube<float>";
-                    
-                    // TODO: no equivalent of this yet
-                    // depth_ms_array
-                    
                 case HLSLBaseType_Texture2DMS:
-                    if (isDepth)
-                        return isHalfTexture ? "depth2d_ms<half>" : "depth2d_ms<float>";
-                    
                     return isHalfTexture ? "texture2d_ms<half>" : "texture2d_ms<float>";
                 case HLSLBaseType_TextureCubeArray:
-                    if (isDepth)
-                        return isHalfTexture ? "depthcube_array<half>" : "depthcube_array<float>";
-                    
                     return isHalfTexture ? "texturecube_array<half>" : "texturecube_array<float>";
                 case HLSLBaseType_Texture2DArray:
-                    if (isDepth)
-                        return isHalfTexture ? "depth2d_array<half>" : "depth2d_array<float>";
-                    
                     return isHalfTexture ? "texture2d_array<half>" : "texture2d_array<float>";
                     
                 default:
diff --git a/hlslparser/src/MSLGenerator.h b/hlslparser/src/MSLGenerator.h
index 87c0c2cc..b70e2388 100644
--- a/hlslparser/src/MSLGenerator.h
+++ b/hlslparser/src/MSLGenerator.h
@@ -103,7 +103,7 @@ class MSLGenerator
     const char* TranslateOutputSemantic(const char* semantic);
 
     const char* GetTypeName(const HLSLType& type, bool exactType);
-    const char* GetAddressSpaceName(HLSLAddressSpace addressSpace) const;
+    const char* GetAddressSpaceName(HLSLBaseType baseType, HLSLAddressSpace addressSpace) const;
     
     void Error(const char* format, ...) const M4_PRINTF_ATTR(2, 3);
 

From f0219797dd5b3f68c4ba46bbf13e36a221c29467 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 12 Mar 2023 18:55:22 -0700
Subject: [PATCH 458/901] kram - update hlslparser

Update Sample.hlsl to use half, and fixed compilation issues from trailing zeroes and marking values as half.  For some reason, testshaders isn't picking up on errors coming from metal compiler as errors.  Ick.
---
 hlslparser/buildShaders.sh                    |  6 +++
 hlslparser/outshaders/DepthTest.metal         | 34 +++++++++++++++
 hlslparser/shaders/Sample.hlsl                | 41 ++++++++++---------
 hlslparser/src/Engine.cpp                     | 13 +++---
 .../testshaders.xcodeproj/project.pbxproj     |  2 +
 5 files changed, 70 insertions(+), 26 deletions(-)
 create mode 100644 hlslparser/outshaders/DepthTest.metal

diff --git a/hlslparser/buildShaders.sh b/hlslparser/buildShaders.sh
index 47e19dff..fe9d7214 100755
--- a/hlslparser/buildShaders.sh
+++ b/hlslparser/buildShaders.sh
@@ -58,11 +58,13 @@ parserOptions+="-g "
 echo gen MSL
 ${appHlslparser} ${parserOptions} -i ${srcDir}Skinning.hlsl -o Skinning.metal
 ${appHlslparser} ${parserOptions} -i ${srcDir}Sample.hlsl -o Sample.metal
+${appHlslparser} ${parserOptions} -i ${srcDir}Compute.hlsl -o Compute.metal
 
 # build the hlsl shaders
 echo gen HLSL
 ${appHlslparser} ${parserOptions} -i ${srcDir}Skinning.hlsl -o Skinning.hlsl
 ${appHlslparser} ${parserOptions} -i ${srcDir}Sample.hlsl -o Sample.hlsl
+${appHlslparser} ${parserOptions} -i ${srcDir}Compute.hlsl -o Compute.hlsl
 
 #-------------------------------
 
@@ -88,6 +90,9 @@ if [[ $testMetal -eq 1 ]]; then
     # TODO: metal3.0 on M1 macOS13/iOS16
     metalMacOptions+="-std=macos-metal2.3 "
 
+    # Test case
+    # xcrun -sdk macosx metal ${dstDir}DepthTest.metal ${metalMacOptions} -o mac/DepthTest.metallib
+
     # TODO: build to air, and then compile to single metallib and metallibdsym
     # see if MSL compile
     echo compile MSL for macOS
@@ -95,6 +100,7 @@ if [[ $testMetal -eq 1 ]]; then
 
     xcrun -sdk macosx metal ${dstDir}Sample.metal ${metalMacOptions} -o mac/Sample.metallib
 
+    
     # this isn't going to compile yet
     # xcrun -sdk macosx metal ${dstDir}Compute.metal ${metalMacOptions} -o mac/Compute.metallib
 
diff --git a/hlslparser/outshaders/DepthTest.metal b/hlslparser/outshaders/DepthTest.metal
new file mode 100644
index 00000000..042a3853
--- /dev/null
+++ b/hlslparser/outshaders/DepthTest.metal
@@ -0,0 +1,34 @@
+#include <metal_stdlib>
+
+// https://developer.apple.com/forums/thread/726383
+using namespace metal;
+
+struct SamplePSNS {
+    struct InputPS {
+        float4 position [[position]];
+    };
+    
+    thread depth2d<float>& shadowMap;
+    thread sampler& sampleBorder;
+    
+    float4 SamplePS(InputPS input) {
+        return shadowMap.sample_compare(sampleBorder, input.position.xy, input.position.z);
+    };
+
+    
+    SamplePSNS(
+      thread depth2d<float>& shadowMap,
+      thread sampler& sampleBorder)
+       : shadowMap(shadowMap),
+         sampleBorder(sampleBorder)
+    {}
+};
+
+fragment float4 SamplePS(
+    SamplePSNS::InputPS input [[stage_in]],
+    depth2d<float> shadowMap [[texture(0)]],
+    sampler sampleBorder [[sampler(0)]])
+{
+    SamplePSNS shader(shadowMap, sampleBorder);
+    return shader.SamplePS(input);
+}
diff --git a/hlslparser/shaders/Sample.hlsl b/hlslparser/shaders/Sample.hlsl
index 69cb78b7..73cba564 100644
--- a/hlslparser/shaders/Sample.hlsl
+++ b/hlslparser/shaders/Sample.hlsl
@@ -12,8 +12,8 @@
 // from here https://github.com/microsoft/DirectX-Graphics-Samples/blob/master/Samples/Desktop/D3D12Multithreading/src/shaders.hlsl
 
 Depth2D<float4> shadowMap : register(t0);
-Texture2D<float4> diffuseMap : register(t1);
-Texture2D<float4> normalMap : register(t2);
+Texture2D<half4> diffuseMap : register(t1);
+Texture2D<half4> normalMap : register(t2);
 
 SamplerState sampleWrap : register(s0);
 SamplerState sampleClamp : register(s1);
@@ -86,18 +86,18 @@ struct InputPS
 //--------------------------------------------------------------------------------------
 // Sample normal map, convert to signed, apply tangent-to-world space transform.
 //--------------------------------------------------------------------------------------
-float3 CalcPerPixelNormal(float2 vTexcoord, float3 vVertNormal, float3 vVertTangent)
+half3 CalcPerPixelNormal(float2 vTexcoord, half3 vVertNormal, half3 vVertTangent)
 {
     // Compute tangent frame.
     vVertNormal = normalize(vVertNormal);
     vVertTangent = normalize(vVertTangent);
 
-    float3 vVertBinormal = normalize(cross(vVertTangent, vVertNormal));
-    float3x3 mTangentSpaceToWorldSpace = float3x3(vVertTangent, vVertBinormal, vVertNormal);
+    half3 vVertBinormal = normalize(cross(vVertTangent, vVertNormal));
+    half3x3 mTangentSpaceToWorldSpace = half3x3(vVertTangent, vVertBinormal, vVertNormal);
 
     // Compute per-pixel normal.
-    float3 vBumpNormal = Sample(normalMap, sampleWrap, vTexcoord).xyz;
-    vBumpNormal = 2.0 * vBumpNormal - 1.0;
+    half3 vBumpNormal = SampleH(normalMap, sampleWrap, vTexcoord).xyz;
+    vBumpNormal = 2.0h * vBumpNormal - 1.0h;
 
     return mul(vBumpNormal, mTangentSpaceToWorldSpace);
 }
@@ -105,26 +105,27 @@ float3 CalcPerPixelNormal(float2 vTexcoord, float3 vVertNormal, float3 vVertTang
 //--------------------------------------------------------------------------------------
 // Diffuse lighting calculation, with angle and distance falloff.
 //--------------------------------------------------------------------------------------
-float4 CalcLightingColor(float3 vLightPos, float3 vLightDir, float4 vLightColor, float4 vFalloffs, float3 vPosWorld, float3 vPerPixelNormal)
+half4 CalcLightingColor(float3 vLightPos, float3 vLightDir, half4 vLightColor, float4 vFalloffs, float3 vPosWorld, half3 vPerPixelNormal)
 {
     float3 vLightToPixelUnNormalized = vPosWorld - vLightPos;
 
     // Dist falloff = 0 at vFalloffs.x, 1 at vFalloffs.x - vFalloffs.y
     float fDist = length(vLightToPixelUnNormalized);
 
-    float fDistFalloff = saturate((vFalloffs.x - fDist) / vFalloffs.y);
+    half fDistFalloff = (half)saturate((vFalloffs.x - fDist) / vFalloffs.y);
 
     // Normalize from here on.
-    float3 vLightToPixelNormalized = vLightToPixelUnNormalized / fDist;
+    half3 vLightToPixelNormalized = normalize(vLightToPixelUnNormalized);
 
     // Angle falloff = 0 at vFalloffs.z, 1 at vFalloffs.z - vFalloffs.w
-    float fCosAngle = dot(vLightToPixelNormalized, vLightDir / length(vLightDir));
-    float fAngleFalloff = saturate((fCosAngle - vFalloffs.z) / vFalloffs.w);
+    half3 lightDir = normalize(vLightDir);
+    half fCosAngle = dot(vLightToPixelNormalized, lightDir);
+    half fAngleFalloff = saturate((fCosAngle - (half)vFalloffs.z) / (half)vFalloffs.w);
 
     // Diffuse contribution.
-    float fNDotL = saturate(-dot(vLightToPixelNormalized, vPerPixelNormal));
+    half fNDotL = saturate(-dot(vLightToPixelNormalized, vPerPixelNormal));
 
-    return vLightColor * fNDotL * fDistFalloff * fAngleFalloff;
+    return vLightColor * (fNDotL * fDistFalloff * fAngleFalloff);
 }
 
 //--------------------------------------------------------------------------------------
@@ -190,22 +191,22 @@ OutputVS SampleVS(InputVS input)
 
 float4 SamplePS(InputPS input) : SV_Target0
 {
-    float4 diffuseColor = Sample(diffuseMap, sampleWrap, input.uv);
-    float3 pixelNormal = CalcPerPixelNormal(input.uv, input.normal, input.tangent);
-    float4 totalLight = scene.ambientColor;
+    half4 diffuseColor = SampleH(diffuseMap, sampleWrap, input.uv);
+    half3 pixelNormal = CalcPerPixelNormal(input.uv, input.normal, input.tangent);
+    half4 totalLight = (half4)scene.ambientColor;
 
     for (int i = 0; i < NUM_LIGHTS; i++)
     {
         LightState light = scene.lights[i];
-        float4 lightPass = CalcLightingColor(light.position, light.direction, light.color, light.falloff, input.worldpos.xyz, pixelNormal);
+        half4 lightPass = CalcLightingColor(light.position, light.direction, light.color, light.falloff, input.worldpos.xyz, pixelNormal);
         if (scene.sampleShadowMap && i == 0)
         {
-            lightPass *= CalcUnshadowedAmountPCF2x2(i, input.worldpos, light.viewProj);
+            lightPass *= (half4)CalcUnshadowedAmountPCF2x2(i, input.worldpos, light.viewProj);
         }
         totalLight += lightPass;
     }
 
-    return diffuseColor * saturate(totalLight);
+    return (float4)(diffuseColor * saturate(totalLight));
 }
 
 
diff --git a/hlslparser/src/Engine.cpp b/hlslparser/src/Engine.cpp
index 5d0a76ca..d63ed82c 100755
--- a/hlslparser/src/Engine.cpp
+++ b/hlslparser/src/Engine.cpp
@@ -94,13 +94,14 @@ void String_StripTrailingFloatZeroes(char* buffer)
         }
     }
     
+    // This breaks appending h to a number in MSL
     // strip the period (only for MSL)
-    char& c = buffer[bufferLen-1];
-    if (dotPos == &c)
-    {
-        c = 0;
-        bufferLen--;
-    }
+//    char& c = buffer[bufferLen-1];
+//    if (dotPos == &c)
+//    {
+//        c = 0;
+//        bufferLen--;
+//    }
 }
 
 // Engine/Log.cpp
diff --git a/hlslparser/testshaders.xcodeproj/project.pbxproj b/hlslparser/testshaders.xcodeproj/project.pbxproj
index 36ed9dc6..f58f621b 100644
--- a/hlslparser/testshaders.xcodeproj/project.pbxproj
+++ b/hlslparser/testshaders.xcodeproj/project.pbxproj
@@ -20,6 +20,7 @@
 		70CAA48B29BBD985004B7E7B /* Compute.hlsl */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; path = Compute.hlsl; sourceTree = "<group>"; };
 		70CAA48C29BE6A62004B7E7B /* Compute.hlsl */ = {isa = PBXFileReference; lastKnownFileType = text; path = Compute.hlsl; sourceTree = "<group>"; };
 		70CAA48D29BE6A62004B7E7B /* Compute.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = Compute.metal; sourceTree = "<group>"; };
+		70CAA48E29BEB04C004B7E7B /* DepthTest.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = DepthTest.metal; sourceTree = "<group>"; };
 /* End PBXFileReference section */
 
 /* Begin PBXGroup section */
@@ -46,6 +47,7 @@
 			children = (
 				70CAA48929BAF707004B7E7B /* ShaderHLSL.h */,
 				70CAA48A29BAF707004B7E7B /* ShaderMSL.h */,
+				70CAA48E29BEB04C004B7E7B /* DepthTest.metal */,
 				70CAA48C29BE6A62004B7E7B /* Compute.hlsl */,
 				70CAA48D29BE6A62004B7E7B /* Compute.metal */,
 				70CAA48129BA7D28004B7E7B /* Sample.hlsl */,

From 7f8e8c84ee00e46d4d1caecc8495622103b62454 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 12 Mar 2023 19:04:33 -0700
Subject: [PATCH 459/901] kram - hlslparser update

dramatically improve half code gen with more half intrinsics.  When these aren't found, the code promotes to float which I don't want.  mul/cross were missing half versions.
---
 hlslparser/src/HLSLParser.cpp | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/hlslparser/src/HLSLParser.cpp b/hlslparser/src/HLSLParser.cpp
index 255f19b8..6ec91c3d 100644
--- a/hlslparser/src/HLSLParser.cpp
+++ b/hlslparser/src/HLSLParser.cpp
@@ -743,6 +743,7 @@ const Intrinsic _intrinsic[] =
 
         // 3d cross product only
         Intrinsic( "cross", HLSLBaseType_Float3,  HLSLBaseType_Float3,  HLSLBaseType_Float3 ),
+        Intrinsic( "cross", HLSLBaseType_Half3,  HLSLBaseType_Half3,  HLSLBaseType_Half3 ),
 
         Intrinsic( "length", HLSLBaseType_Float,  HLSLBaseType_Float  ),
         Intrinsic( "length", HLSLBaseType_Float,  HLSLBaseType_Float2 ),
@@ -776,6 +777,7 @@ const Intrinsic _intrinsic[] =
         // vector<N> = mul(matrix<N,M>, vector<M>) ?
         // matrix<N,M> = mul(matrix<N,M>, matrix<M,N>) ?
         
+        // scalar/vec ops
         INTRINSIC_FLOAT2_FUNCTION( "mul" ),
 		Intrinsic( "mul", HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2x2 ),
         Intrinsic( "mul", HLSLBaseType_Float3, HLSLBaseType_Float3, HLSLBaseType_Float3x3 ),
@@ -786,6 +788,28 @@ const Intrinsic _intrinsic[] =
         //Intrinsic( "mul", HLSLBaseType_Float3, HLSLBaseType_Float4, HLSLBaseType_Float4x3 ),
         //Intrinsic( "mul", HLSLBaseType_Float2, HLSLBaseType_Float4, HLSLBaseType_Float4x2 ),
 
+        Intrinsic( "mul", HLSLBaseType_Float2, HLSLBaseType_Float2x2, HLSLBaseType_Float2  ),
+        Intrinsic( "mul", HLSLBaseType_Float3, HLSLBaseType_Float3x3, HLSLBaseType_Float3  ),
+        Intrinsic( "mul", HLSLBaseType_Float4, HLSLBaseType_Float4x4, HLSLBaseType_Float4  ),
+        Intrinsic( "mul", HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2x2 ),
+        Intrinsic( "mul", HLSLBaseType_Float3, HLSLBaseType_Float3, HLSLBaseType_Float3x3 ),
+        Intrinsic( "mul", HLSLBaseType_Float4, HLSLBaseType_Float4, HLSLBaseType_Float4x4 ),
+        
+        Intrinsic( "mul", HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Half2x2 ),
+        Intrinsic( "mul", HLSLBaseType_Half3, HLSLBaseType_Half3, HLSLBaseType_Half3x3 ),
+        Intrinsic( "mul", HLSLBaseType_Half4, HLSLBaseType_Half4, HLSLBaseType_Half4x4 ),
+        Intrinsic( "mul", HLSLBaseType_Half2, HLSLBaseType_Half2x2, HLSLBaseType_Half2 ),
+        Intrinsic( "mul", HLSLBaseType_Half3, HLSLBaseType_Half3x3, HLSLBaseType_Half3 ),
+        Intrinsic( "mul", HLSLBaseType_Half4, HLSLBaseType_Half4x4, HLSLBaseType_Half4 ),
+        
+        Intrinsic( "mul", HLSLBaseType_Half2, HLSLBaseType_Half2x2, HLSLBaseType_Half2  ),
+        Intrinsic( "mul", HLSLBaseType_Half3, HLSLBaseType_Half3x3, HLSLBaseType_Half3  ),
+        Intrinsic( "mul", HLSLBaseType_Half4, HLSLBaseType_Half4x4, HLSLBaseType_Half4  ),
+        Intrinsic( "mul", HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Half2x2 ),
+        Intrinsic( "mul", HLSLBaseType_Half3, HLSLBaseType_Half3, HLSLBaseType_Half3x3 ),
+        Intrinsic( "mul", HLSLBaseType_Half4, HLSLBaseType_Half4, HLSLBaseType_Half4x4 ),
+        
+        
         // matrix transpose
 		Intrinsic( "transpose", HLSLBaseType_Float2x2, HLSLBaseType_Float2x2 ),
         Intrinsic( "transpose", HLSLBaseType_Float3x3, HLSLBaseType_Float3x3 ),

From 5b396d3b0866839a754ae48957b32da51bbce2dc Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 12 Mar 2023 19:18:59 -0700
Subject: [PATCH 460/901] kram - hlslparser update

Remove fx-related state calls. Not sure I want to get rid of, but it's very hard to handle variants in fx.
---
 hlslparser/src/HLSLParser.cpp | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/hlslparser/src/HLSLParser.cpp b/hlslparser/src/HLSLParser.cpp
index 6ec91c3d..afb993bc 100644
--- a/hlslparser/src/HLSLParser.cpp
+++ b/hlslparser/src/HLSLParser.cpp
@@ -374,7 +374,7 @@ static const int _numberTypeRank[NumericType_Count][NumericType_Count] =
     { 5, 5, 4, 2, 0 },  // NumericType_Ushort
 };
 
-
+/* All FX state
 struct EffectStateValue
 {
     const char * name;
@@ -621,6 +621,7 @@ static const EffectState pipelineStates[] = {
     {"AlphaWrite", 0, booleanValues},
     {"AlphaTest", 0, booleanValues},       // This is really alpha to coverage.
 };
+*/
 
 #define INTRINSIC_FLOAT1_FUNCTION(name) \
         Intrinsic( name, HLSLBaseType_Float,   HLSLBaseType_Float  ),   \
@@ -795,6 +796,7 @@ const Intrinsic _intrinsic[] =
         Intrinsic( "mul", HLSLBaseType_Float3, HLSLBaseType_Float3, HLSLBaseType_Float3x3 ),
         Intrinsic( "mul", HLSLBaseType_Float4, HLSLBaseType_Float4, HLSLBaseType_Float4x4 ),
         
+        // half versions
         Intrinsic( "mul", HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Half2x2 ),
         Intrinsic( "mul", HLSLBaseType_Half3, HLSLBaseType_Half3, HLSLBaseType_Half3x3 ),
         Intrinsic( "mul", HLSLBaseType_Half4, HLSLBaseType_Half4, HLSLBaseType_Half4x4 ),
@@ -1775,6 +1777,7 @@ bool HLSLParser::ParseTopLevel(HLSLStatement*& statement)
         }
     }
     
+    /*
     // These three are from .fx file syntax
     else if (ParseTechnique(statement)) {
         doesNotExpectSemicolon = true;
@@ -1785,7 +1788,8 @@ bool HLSLParser::ParseTopLevel(HLSLStatement*& statement)
     else if (ParseStage(statement)) {
         doesNotExpectSemicolon = true;
     }
-
+    */
+    
     if (statement != NULL) {
         statement->attributes = attributes;
     }
@@ -2185,13 +2189,13 @@ bool HLSLParser::ParseDeclarationAssignment(HLSLDeclaration* declaration)
                 return false;
             }
         }
-        else if (IsSamplerType(declaration->type.baseType)) // TODO: should be for SamplerStateBlock, not Sampler
-        {
-            if (!ParseSamplerState(declaration->assignment))
-            {
-                return false;
-            }
-        }
+//        else if (IsSamplerType(declaration->type.baseType)) // TODO: should be for SamplerStateBlock, not Sampler
+//        {
+//            if (!ParseSamplerState(declaration->assignment))
+//            {
+//                return false;
+//            }
+//        }
         else if (!ParseExpression(declaration->assignment))
         {
             return false;
@@ -2960,7 +2964,7 @@ bool HLSLParser::ParseArgumentList(HLSLArgument*& firstArgument, int& numArgumen
     return true;
 }
 
-
+/*
 bool HLSLParser::ParseSamplerState(HLSLExpression*& expression)
 {
     if (!Expect(HLSLToken_SamplerState))
@@ -2989,7 +2993,7 @@ bool HLSLParser::ParseSamplerState(HLSLExpression*& expression)
         }
 
         HLSLStateAssignment* stateAssignment = NULL;
-        if (!ParseStateAssignment(stateAssignment, /*isSamplerState=*/true, /*isPipeline=*/false))
+        if (!ParseStateAssignment(stateAssignment, true, false))
         {
             return false;
         }
@@ -3096,7 +3100,7 @@ bool HLSLParser::ParsePass(HLSLPass*& pass)
         }
 
         HLSLStateAssignment* stateAssignment = NULL;
-        if (!ParseStateAssignment(stateAssignment, /*isSamplerState=*/false, /*isPipelineState=*/false))
+        if (!ParseStateAssignment(stateAssignment, false, false))
         {
             return false;
         }
@@ -3145,7 +3149,7 @@ bool HLSLParser::ParsePipeline(HLSLStatement*& statement)
         }
 
         HLSLStateAssignment* stateAssignment = NULL;
-        if (!ParseStateAssignment(stateAssignment, /*isSamplerState=*/false, /*isPipeline=*/true))
+        if (!ParseStateAssignment(stateAssignment, false, true))
         {
             return false;
         }
@@ -3398,7 +3402,7 @@ bool HLSLParser::ParseStateAssignment(HLSLStateAssignment*& stateAssignment, boo
 
     return true;
 }
-
+*/
 
 bool HLSLParser::ParseAttributeList(HLSLAttribute*& firstAttribute)
 {

From cc324e6b529a20afcc726831a67c33cb999efe8d Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 12 Mar 2023 21:33:00 -0700
Subject: [PATCH 461/901] kram - update hlslparser

Fixed SampleCmp handling.
Fix mul handling - duplicate mul intrinsices were registered.
Add "error" to Log_Error in the hopes that Xcode would flag these, but it does not.   Do an exit(status) if non-zero.
---
 hlslparser/outshaders/ShaderHLSL.h |  5 ++--
 hlslparser/outshaders/ShaderMSL.h  | 24 +++++++----------
 hlslparser/shaders/Sample.hlsl     | 38 +++++++++++++++++++-------
 hlslparser/src/HLSLParser.cpp      | 43 +++++++++++++-----------------
 hlslparser/src/HLSLTokenizer.cpp   |  2 +-
 hlslparser/src/Main.cpp            | 41 +++++++++++++++++++---------
 6 files changed, 89 insertions(+), 64 deletions(-)

diff --git a/hlslparser/outshaders/ShaderHLSL.h b/hlslparser/outshaders/ShaderHLSL.h
index 78c579b8..76eeccca 100644
--- a/hlslparser/outshaders/ShaderHLSL.h
+++ b/hlslparser/outshaders/ShaderHLSL.h
@@ -205,9 +205,10 @@ float4 SampleGrad(Texture2D<float4> t, SamplerState s, float2 texCoord, float2 g
 {
    return t.SampleGrad(s, texCoord.xy, gradx, grady);
 }
+
 //----------
 
-// typedef Texture2D Depth2D;
+typedef Texture2D Depth2D;
 
 // can just use the
 //float4 Sample(Texture2D<float4> t, SamplerState s, float2 texCoord, int2 offset = 0)
@@ -216,7 +217,7 @@ float4 SampleGrad(Texture2D<float4> t, SamplerState s, float2 texCoord, float2 g
 //}
 
 // For persp shadows, remember to divide z = z/w before calling, or w = z/w on cube
-float4 SampleCmp(Texture2D<float4> t, SamplerComparisonState s, float4 texCoord, int2 offset = 0)
+float SampleCmp(Texture2D<float4> t, SamplerComparisonState s, float4 texCoord, int2 offset = 0)
 {
     return t.SampleCmp(s, texCoord.xy, texCoord.z, offset);
 }
diff --git a/hlslparser/outshaders/ShaderMSL.h b/hlslparser/outshaders/ShaderMSL.h
index ae6ccf4f..705dcd41 100644
--- a/hlslparser/outshaders/ShaderMSL.h
+++ b/hlslparser/outshaders/ShaderMSL.h
@@ -80,8 +80,8 @@ float2 mul(float2x2 m, float2 a) { return m * a; }
 float3 mul(float3x3 m, float3 a) { return m * a; }
 float4 mul(float4x4 m, float4 a) { return m * a; }
 
-float3 mul(float4 a, float3x4 m) { return a * m; } // why no macro ?
-float2 mul(float4 a, float2x4 m) { return a * m; }
+//float3 mul(float4 a, float3x4 m) { return a * m; } // why no macro ?
+//float2 mul(float4 a, float2x4 m) { return a * m; }
 
 #if USE_HALF
 half mad(half a, half b, half c) {
@@ -105,8 +105,8 @@ half2 mul(half2x2 m, half2 a) { return m * a; }
 half3 mul(half3x3 m, half3 a) { return m * a; }
 half4 mul(half4x4 m, half4 a) { return m * a; }
 
-half3 mul(half4 a, half3x4 m) { return a * m; } // why no macro ?
-half2 mul(half4 a, half2x4 m) { return a * m; }
+//half3 mul(half4 a, half3x4 m) { return a * m; } // why no macro ?
+//half2 mul(half4 a, half2x4 m) { return a * m; }
 #endif
 
 #define lerp mix
@@ -203,15 +203,6 @@ float4 SampleBias(texture2d<float> t, sampler s, float4 texCoordBias) {
     return t.sample(s, texCoordBias.xy, bias(texCoordBias.w));
 }
 
-// ios may need to hardcode sampler
-// constexpr sampler shadowSampler(mip_filter::none, min_filter::linear, mag_filter::linear, address::clamp_to_border, compare_func::greater);
-
-// May have to detect SamplerComparisonState, and mark texture as depth2d
-float4 SampleCmp(depth2d<float> t, sampler s, float4 texCoordCompare) {
-    // division for perspective shadows, but caller should handle this
-    return t.sample_compare(s, texCoordCompare.xy, texCoordCompare.z / texCoordCompare.w );
-}
-
 float4 Load(texture2d_ms<float> t, int2 texCoord, int sample) {
     return t.read((uint2)texCoord, (uint)sample);
 }
@@ -240,8 +231,13 @@ float4 Sample(depth2d<float> t, sampler s, float2 texCoord, int2 offset = 0)
     return t.sample(s, texCoord.xy, offset);
 }
 
+
+// ios may need to hardcode sampler for compare
+// constexpr sampler shadowSampler(mip_filter::none, min_filter::linear, mag_filter::linear, address::clamp_to_border, compare_func::greater);
+
+
 // For persp shadows, remember to divide z = z/w before calling, or w = z/w on cube
-float4 SampleCmp(depth2d<float> t, sampler s, float4 texCoord, int2 offset = 0)
+float SampleCmp(depth2d<float> t, sampler s, float4 texCoord, int2 offset = 0)
 {
     return t.sample_compare(s, texCoord.xy, texCoord.z, offset);
 }
diff --git a/hlslparser/shaders/Sample.hlsl b/hlslparser/shaders/Sample.hlsl
index 73cba564..55870127 100644
--- a/hlslparser/shaders/Sample.hlsl
+++ b/hlslparser/shaders/Sample.hlsl
@@ -16,7 +16,8 @@ Texture2D<half4> diffuseMap : register(t1);
 Texture2D<half4> normalMap : register(t2);
 
 SamplerState sampleWrap : register(s0);
-SamplerState sampleClamp : register(s1);
+//SamplerState sampleClamp : register(s1);
+SamplerComparisonState shadowMapSampler : register(s1);
 
 // #define didn't compile due to lack of preprocesor
 static const int NUM_LIGHTS = 3;
@@ -50,12 +51,15 @@ struct SceneConstantBuffer
     bool sampleShadowMap;
     LightState lights[3];
 };
-// TODO: NUM_LIGHTS isn't unhidden when parsing structs
+// TODO: NUM_LIGHTS isn't unhidden when parsing structs, dupe what cbuffer fields do
 // LightState lights[NUM_LIGHTS];
 
 // SM 6.1
 ConstantBuffer<SceneConstantBuffer> scene : register(b0);
 
+// TODO: also have this form, where can index into
+// ConstantBuffer<SceneConstantBuffer> scene[10] : register(b0);
+
 struct InputVS
 {
     float3 position : SV_Position;
@@ -131,21 +135,31 @@ half4 CalcLightingColor(float3 vLightPos, float3 vLightDir, half4 vLightColor, f
 //--------------------------------------------------------------------------------------
 // Test how much pixel is in shadow, using 2x2 percentage-closer filtering.
 //--------------------------------------------------------------------------------------
-float4 CalcUnshadowedAmountPCF2x2(int lightIndex, float4 vPosWorld, float4x4 viewProj)
+half CalcUnshadowedAmountPCF2x2(int lightIndex, float4 vPosWorld, float4x4 viewProj)
 {
     // Compute pixel position in light space.
     float4 vLightSpacePos = vPosWorld;
     vLightSpacePos = mul(vLightSpacePos, viewProj);
     
+    // need to reject before division (assuming revZ, infZ)
+    if (vLightSpacePos.z > vLightSpacePos.w)
+        return 1.0f;
+    
     vLightSpacePos.xyz /= vLightSpacePos.w;
 
+    vLightSpacePos.xy *= 0.5;
+    vLightSpacePos.xy += 0.5;
+    
     // Translate from homogeneous coords to texture coords.
-    float2 vShadowTexCoord = 0.5 * vLightSpacePos.xy + 0.5;
-    vShadowTexCoord.y = 1.0 - vShadowTexCoord.y;
+    //float2 vShadowTexCoord = 0.5 * vLightSpacePos.xy + 0.5;
+    vLightSpacePos.y = 1.0 - vLightSpacePos.y;
 
     // Depth bias to avoid pixel self-shadowing.
-    float vLightSpaceDepth = vLightSpacePos.z - SHADOW_DEPTH_BIAS;
+    vLightSpacePos.z -= SHADOW_DEPTH_BIAS;
 
+    // 2x2 percentage closer filtering.
+    /* Ick, Microsoft
+     
     // Find sub-pixel weights.
     float2 vShadowMapDims = float2(1280.0, 720.0); // need to keep in sync with .cpp file
     float4 vSubPixelCoords = float4(1.0, 1.0, 1.0, 1.0);
@@ -153,17 +167,21 @@ float4 CalcUnshadowedAmountPCF2x2(int lightIndex, float4 vPosWorld, float4x4 vie
     vSubPixelCoords.zw = 1.0 - vSubPixelCoords.xy;
     float4 vBilinearWeights = vSubPixelCoords.zxzx * vSubPixelCoords.wwyy;
 
-    // 2x2 percentage closer filtering.
     float2 vTexelUnits = 1.0 / vShadowMapDims;
+    
     float4 vShadowDepths;
     vShadowDepths.x = Sample(shadowMap, sampleClamp, vShadowTexCoord).x;
     vShadowDepths.y = Sample(shadowMap, sampleClamp, vShadowTexCoord + float2(vTexelUnits.x, 0.0)).x;
     vShadowDepths.z = Sample(shadowMap, sampleClamp, vShadowTexCoord + float2(0.0, vTexelUnits.y)).x;
     vShadowDepths.w = Sample(shadowMap, sampleClamp, vShadowTexCoord + vTexelUnits).x;
-
-    // What weighted fraction of the 4 samples are nearer to the light than this pixel?
     float4 vShadowTests = (vShadowDepths >= vLightSpaceDepth);
+     
+    // What weighted fraction of the 4 samples are nearer to the light than this pixel?
     return dot(vBilinearWeights, vShadowTests);
+    */
+
+    return (half)SampleCmp(shadowMap, shadowMapSampler, vLightSpacePos);
+   
 }
 
 OutputVS SampleVS(InputVS input)
@@ -201,7 +219,7 @@ float4 SamplePS(InputPS input) : SV_Target0
         half4 lightPass = CalcLightingColor(light.position, light.direction, light.color, light.falloff, input.worldpos.xyz, pixelNormal);
         if (scene.sampleShadowMap && i == 0)
         {
-            lightPass *= (half4)CalcUnshadowedAmountPCF2x2(i, input.worldpos, light.viewProj);
+            lightPass *= CalcUnshadowedAmountPCF2x2(i, input.worldpos, light.viewProj);
         }
         totalLight += lightPass;
     }
diff --git a/hlslparser/src/HLSLParser.cpp b/hlslparser/src/HLSLParser.cpp
index afb993bc..3579e341 100644
--- a/hlslparser/src/HLSLParser.cpp
+++ b/hlslparser/src/HLSLParser.cpp
@@ -354,11 +354,18 @@ struct Intrinsic
     HLSLArgument    argument[4];
 };
     
-Intrinsic TextureIntrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType arg1, HLSLBaseType textureType, HLSLBaseType arg2)
+Intrinsic TextureIntrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType textureType, HLSLBaseType textureHalfOrFloat, HLSLBaseType uvType)
+{
+    Intrinsic i(name, returnType, textureType, HLSLBaseType_SamplerState, uvType);
+    i.argument[0].type.textureType = textureHalfOrFloat;
+    return i;
+}
+
+Intrinsic DepthIntrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType textureType, HLSLBaseType textureHalfOrFloat, HLSLBaseType uvType)
 {
     // Need to be able to pass SamplerComparisonState too
-    Intrinsic i(name, returnType, arg1, HLSLBaseType_SamplerState, arg2);
-    i.argument[0].type.textureType = textureType;
+    Intrinsic i(name, returnType, textureType, HLSLBaseType_SamplerComparisonState, uvType);
+    i.argument[0].type.textureType = textureHalfOrFloat;
     return i;
 }
 
@@ -653,9 +660,9 @@ static const EffectState pipelineStates[] = {
         Intrinsic( name, HLSLBaseType_Half3,   HLSLBaseType_Half3,   HLSLBaseType_Half3,  HLSLBaseType_Half3 ),    \
         Intrinsic( name, HLSLBaseType_Half4,   HLSLBaseType_Half4,   HLSLBaseType_Half4,  HLSLBaseType_Half4 )
 
-#define TEXTURE_INTRINSIC_FUNCTION(name, sampler, arg1) \
-        TextureIntrinsic( name, HLSLBaseType_Float4, sampler, HLSLBaseType_Float, arg1),   \
-        TextureIntrinsic( name "H", HLSLBaseType_Half4,  sampler, HLSLBaseType_Half,  arg1  )
+#define TEXTURE_INTRINSIC_FUNCTION(name, textureType, uvType) \
+        TextureIntrinsic( name, HLSLBaseType_Float4, textureType, HLSLBaseType_Float, uvType),   \
+        TextureIntrinsic( name "H", HLSLBaseType_Half4, textureType, HLSLBaseType_Half, uvType  )
     
 // TODO: change this to a mutlimap or something
 // would make it easier to specify, can write functions to set all vec, all matrix, etc.  This is a nightmare to add things too below.  MSL and HLSL
@@ -789,13 +796,6 @@ const Intrinsic _intrinsic[] =
         //Intrinsic( "mul", HLSLBaseType_Float3, HLSLBaseType_Float4, HLSLBaseType_Float4x3 ),
         //Intrinsic( "mul", HLSLBaseType_Float2, HLSLBaseType_Float4, HLSLBaseType_Float4x2 ),
 
-        Intrinsic( "mul", HLSLBaseType_Float2, HLSLBaseType_Float2x2, HLSLBaseType_Float2  ),
-        Intrinsic( "mul", HLSLBaseType_Float3, HLSLBaseType_Float3x3, HLSLBaseType_Float3  ),
-        Intrinsic( "mul", HLSLBaseType_Float4, HLSLBaseType_Float4x4, HLSLBaseType_Float4  ),
-        Intrinsic( "mul", HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2x2 ),
-        Intrinsic( "mul", HLSLBaseType_Float3, HLSLBaseType_Float3, HLSLBaseType_Float3x3 ),
-        Intrinsic( "mul", HLSLBaseType_Float4, HLSLBaseType_Float4, HLSLBaseType_Float4x4 ),
-        
         // half versions
         Intrinsic( "mul", HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Half2x2 ),
         Intrinsic( "mul", HLSLBaseType_Half3, HLSLBaseType_Half3, HLSLBaseType_Half3x3 ),
@@ -804,14 +804,6 @@ const Intrinsic _intrinsic[] =
         Intrinsic( "mul", HLSLBaseType_Half3, HLSLBaseType_Half3x3, HLSLBaseType_Half3 ),
         Intrinsic( "mul", HLSLBaseType_Half4, HLSLBaseType_Half4x4, HLSLBaseType_Half4 ),
         
-        Intrinsic( "mul", HLSLBaseType_Half2, HLSLBaseType_Half2x2, HLSLBaseType_Half2  ),
-        Intrinsic( "mul", HLSLBaseType_Half3, HLSLBaseType_Half3x3, HLSLBaseType_Half3  ),
-        Intrinsic( "mul", HLSLBaseType_Half4, HLSLBaseType_Half4x4, HLSLBaseType_Half4  ),
-        Intrinsic( "mul", HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Half2x2 ),
-        Intrinsic( "mul", HLSLBaseType_Half3, HLSLBaseType_Half3, HLSLBaseType_Half3x3 ),
-        Intrinsic( "mul", HLSLBaseType_Half4, HLSLBaseType_Half4, HLSLBaseType_Half4x4 ),
-        
-        
         // matrix transpose
 		Intrinsic( "transpose", HLSLBaseType_Float2x2, HLSLBaseType_Float2x2 ),
         Intrinsic( "transpose", HLSLBaseType_Float3x3, HLSLBaseType_Float3x3 ),
@@ -877,9 +869,12 @@ const Intrinsic _intrinsic[] =
         TEXTURE_INTRINSIC_FUNCTION("Sample", HLSLBaseType_TextureCubeArray, HLSLBaseType_Float4),
         
         // Depth
-        TEXTURE_INTRINSIC_FUNCTION("Sample", HLSLBaseType_Depth2D, HLSLBaseType_Float2),
-        // TODO: TEXTURE_INTRINSIC_FUNCTION("SampleCmp", HLSLBaseType_Depth2D, HLSLBaseType_Float4),
-        // TODO: TEXTURE_INTRINSIC_FUNCTION("GatherCmp", HLSLBaseType_Depth2D, HLSLBaseType_Float4),
+        DepthIntrinsic("Sample", HLSLBaseType_Float, HLSLBaseType_Depth2D, HLSLBaseType_Float,  HLSLBaseType_Float2),
+        
+        // xyz are used, this doesn't match HLSL which is 2 + compare
+        DepthIntrinsic("SampleCmp", HLSLBaseType_Float, HLSLBaseType_Depth2D, HLSLBaseType_Float, HLSLBaseType_Float4),
+        DepthIntrinsic("GatherCmp", HLSLBaseType_Float4, HLSLBaseType_Depth2D, HLSLBaseType_Float, HLSLBaseType_Float4),
+        
         
         // one more dimension than Sample
         TEXTURE_INTRINSIC_FUNCTION("SampleLevel", HLSLBaseType_Texture2D, HLSLBaseType_Float3),
diff --git a/hlslparser/src/HLSLTokenizer.cpp b/hlslparser/src/HLSLTokenizer.cpp
index e9d0a0be..9020158c 100644
--- a/hlslparser/src/HLSLTokenizer.cpp
+++ b/hlslparser/src/HLSLTokenizer.cpp
@@ -632,7 +632,7 @@ void HLSLTokenizer::Error(const char* format, ...)
     /*int result =*/ vsnprintf(buffer, sizeof(buffer) - 1, format, args);
     va_end(args);
 
-    Log_Error("%s(%d): %s\n", m_fileName, m_lineNumber, buffer);
+    Log_Error("%s(%d): %s %s\n", m_fileName, m_lineNumber, "error", buffer);
 } 
 
 void HLSLTokenizer::GetTokenName(char buffer[s_maxIdentifier]) const
diff --git a/hlslparser/src/Main.cpp b/hlslparser/src/Main.cpp
index 878201a0..9b783a6c 100644
--- a/hlslparser/src/Main.cpp
+++ b/hlslparser/src/Main.cpp
@@ -243,15 +243,6 @@ int main( int argc, char* argv[] )
 		Log_Error( "Parsing failed\n" );
 		return 1;
 	}
-
-    
-    // using wb to avoid having Win convert \n to \r\n
-    FILE* fp = fopen( outputFileName.c_str(), "wb" );
-    if ( !fp )
-    {
-        Log_Error( "Could not open output file %s\n", outputFileName.c_str() );
-        return 1;
-    }
     
     int status = 0;
     
@@ -289,6 +280,9 @@ int main( int argc, char* argv[] )
             statement = statement->nextStatement;
         }
     }
+    
+    string output;
+    
     for (auto& entryPoint: entryPoints)
     {
         entryName = entryPoint;
@@ -305,7 +299,8 @@ int main( int argc, char* argv[] )
             HLSLGenerator generator;
             if (generator.Generate( &tree, target, entryName ))
             {
-                fprintf( fp, "%s", generator.GetResult() );
+                // write the buffer out
+                output += generator.GetResult();
             }
             else
             {
@@ -318,7 +313,8 @@ int main( int argc, char* argv[] )
             MSLGenerator generator;
             if (generator.Generate( &tree, target, entryName ))
             {
-                fprintf( fp, "%s", generator.GetResult() );
+                // write the buffer out
+                output += generator.GetResult();
             }
             else
             {
@@ -326,9 +322,28 @@ int main( int argc, char* argv[] )
                 status = 1;
             }
         }
+        
+        if (status != 0)
+            break;
     }
     
-    fclose( fp );
-
+    if (status == 0)
+    {
+        // using wb to avoid having Win convert \n to \r\n
+        FILE* fp = fopen( outputFileName.c_str(), "wb" );
+        if ( !fp )
+        {
+            Log_Error( "Could not open output file %s\n", outputFileName.c_str() );
+            return 1;
+        }
+        
+        fprintf(fp, "%s", output.c_str());
+        fclose( fp );
+    }
+        
+    // It's not enough to return 1 from main, but set exit code.
+    if (status)
+        exit(status);
+    
     return status;
 }

From 495f6d83358bbb66eb9b00c317f2decb2572582a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 12 Mar 2023 22:24:33 -0700
Subject: [PATCH 462/901] kram - update hlslparser

Fix error logs to show up in Xcode.
Reduce Microsoft shader to less shader code.
---
 hlslparser/shaders/Sample.hlsl   | 42 +++++++++-----------------------
 hlslparser/src/HLSLTokenizer.cpp | 10 +++++++-
 2 files changed, 21 insertions(+), 31 deletions(-)

diff --git a/hlslparser/shaders/Sample.hlsl b/hlslparser/shaders/Sample.hlsl
index 55870127..53c1453c 100644
--- a/hlslparser/shaders/Sample.hlsl
+++ b/hlslparser/shaders/Sample.hlsl
@@ -16,7 +16,6 @@ Texture2D<half4> diffuseMap : register(t1);
 Texture2D<half4> normalMap : register(t2);
 
 SamplerState sampleWrap : register(s0);
-//SamplerState sampleClamp : register(s1);
 SamplerComparisonState shadowMapSampler : register(s1);
 
 // #define didn't compile due to lack of preprocesor
@@ -145,43 +144,23 @@ half CalcUnshadowedAmountPCF2x2(int lightIndex, float4 vPosWorld, float4x4 viewP
     if (vLightSpacePos.z > vLightSpacePos.w)
         return 1.0f;
     
+    // near/w for persp, z/1 for ortho
     vLightSpacePos.xyz /= vLightSpacePos.w;
 
+/*
+    // TODO: do all the flip and scaling and bias in the proj, not in shader
     vLightSpacePos.xy *= 0.5;
     vLightSpacePos.xy += 0.5;
     
     // Translate from homogeneous coords to texture coords.
-    //float2 vShadowTexCoord = 0.5 * vLightSpacePos.xy + 0.5;
     vLightSpacePos.y = 1.0 - vLightSpacePos.y;
 
     // Depth bias to avoid pixel self-shadowing.
     vLightSpacePos.z -= SHADOW_DEPTH_BIAS;
-
-    // 2x2 percentage closer filtering.
-    /* Ick, Microsoft
-     
-    // Find sub-pixel weights.
-    float2 vShadowMapDims = float2(1280.0, 720.0); // need to keep in sync with .cpp file
-    float4 vSubPixelCoords = float4(1.0, 1.0, 1.0, 1.0);
-    vSubPixelCoords.xy = frac(vShadowMapDims * vShadowTexCoord);
-    vSubPixelCoords.zw = 1.0 - vSubPixelCoords.xy;
-    float4 vBilinearWeights = vSubPixelCoords.zxzx * vSubPixelCoords.wwyy;
-
-    float2 vTexelUnits = 1.0 / vShadowMapDims;
+*/
     
-    float4 vShadowDepths;
-    vShadowDepths.x = Sample(shadowMap, sampleClamp, vShadowTexCoord).x;
-    vShadowDepths.y = Sample(shadowMap, sampleClamp, vShadowTexCoord + float2(vTexelUnits.x, 0.0)).x;
-    vShadowDepths.z = Sample(shadowMap, sampleClamp, vShadowTexCoord + float2(0.0, vTexelUnits.y)).x;
-    vShadowDepths.w = Sample(shadowMap, sampleClamp, vShadowTexCoord + vTexelUnits).x;
-    float4 vShadowTests = (vShadowDepths >= vLightSpaceDepth);
-     
-    // What weighted fraction of the 4 samples are nearer to the light than this pixel?
-    return dot(vBilinearWeights, vShadowTests);
-    */
-
+    // Use HW filtering
     return (half)SampleCmp(shadowMap, shadowMapSampler, vLightSpacePos);
-   
 }
 
 OutputVS SampleVS(InputVS input)
@@ -190,7 +169,6 @@ OutputVS SampleVS(InputVS input)
 
     float4 newPosition = float4(input.position, 1.0);
 
-    input.normal.z *= -1.0; // why negated?
     newPosition = mul(newPosition, scene.model);
 
     output.worldpos = newPosition;
@@ -200,9 +178,11 @@ OutputVS SampleVS(InputVS input)
     output.position = newPosition;
     output.uv = input.uv;
     
-    // TODO: need transformed to world space too?
-    output.normal = input.normal;
-    output.tangent = input.tangent;
+    // need transformed to world space too?
+    // this only works if only uniform scale and invT on normal
+    //input.normal.z *= -1.0; // why negated?
+    output.normal = mul(float4(input.normal, 0.0), scene.model);
+    output.tangent = mul(float4(input.tangent, 0.0), scene.model);
 
     return output;
 }
@@ -217,6 +197,8 @@ float4 SamplePS(InputPS input) : SV_Target0
     {
         LightState light = scene.lights[i];
         half4 lightPass = CalcLightingColor(light.position, light.direction, light.color, light.falloff, input.worldpos.xyz, pixelNormal);
+        
+        // only single light shadow map
         if (scene.sampleShadowMap && i == 0)
         {
             lightPass *= CalcUnshadowedAmountPCF2x2(i, input.worldpos, light.viewProj);
diff --git a/hlslparser/src/HLSLTokenizer.cpp b/hlslparser/src/HLSLTokenizer.cpp
index 9020158c..d186746f 100644
--- a/hlslparser/src/HLSLTokenizer.cpp
+++ b/hlslparser/src/HLSLTokenizer.cpp
@@ -632,7 +632,15 @@ void HLSLTokenizer::Error(const char* format, ...)
     /*int result =*/ vsnprintf(buffer, sizeof(buffer) - 1, format, args);
     va_end(args);
 
-    Log_Error("%s(%d): %s %s\n", m_fileName, m_lineNumber, "error", buffer);
+    // can log error/warning/info messages
+    bool isError = true;
+#if _MSC_VER
+    // VS convention
+    Log_Error("%s(%d): %s: %s\n", m_fileName, m_lineNumber,  isError ? "error" : "warning", buffer);
+#else
+    // Xcode convention (must be absolute filename for clickthrough)
+    Log_Error("%s:%d: %s: %s\n", m_fileName, m_lineNumber, isError ? "error" : "warning", buffer);
+#endif
 } 
 
 void HLSLTokenizer::GetTokenName(char buffer[s_maxIdentifier]) const

From 5a0e6d0a3a26c21fa77c60b80afe0d283f952c2c Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 13 Mar 2023 13:57:32 -0700
Subject: [PATCH 463/901] kram - hlslparser update

Handle more depth types.
Added a "written" field to HLSLStatement.   This would allow globals to be written once to the output, so multi-entry point output works.
Added scalar to matrix mul ops.  This didn't fix Metal output missing the src on some binary ops.  Need to find out why that happens.
---
 hlslparser/outshaders/ShaderHLSL.h | 10 +++++--
 hlslparser/outshaders/ShaderMSL.h  |  8 +++---
 hlslparser/src/HLSLGenerator.cpp   | 43 +++++++++++++++++-------------
 hlslparser/src/HLSLParser.cpp      | 40 +++++++++++++++++++++++++--
 hlslparser/src/HLSLTokenizer.cpp   |  9 ++++++-
 hlslparser/src/HLSLTokenizer.h     |  2 ++
 hlslparser/src/HLSLTree.h          | 11 ++++++++
 hlslparser/src/MSLGenerator.cpp    | 24 ++++++++---------
 8 files changed, 107 insertions(+), 40 deletions(-)

diff --git a/hlslparser/outshaders/ShaderHLSL.h b/hlslparser/outshaders/ShaderHLSL.h
index 76eeccca..46a8fdf0 100644
--- a/hlslparser/outshaders/ShaderHLSL.h
+++ b/hlslparser/outshaders/ShaderHLSL.h
@@ -208,9 +208,15 @@ float4 SampleGrad(Texture2D<float4> t, SamplerState s, float2 texCoord, float2 g
 
 //----------
 
-typedef Texture2D Depth2D;
+// This has templated elements appended, so typedef doesn't work.
+// HLSL doesn't distingush depth/color, but MSL does. These calls combine
+// the comparison value in the z or w element.
+#define Depth2D Texture2D
+#define Depth2DArray Texture2DArray
+#define DepthCube TextureCube
 
-// can just use the
+
+// can just use the default for Texture2D<float4>
 //float4 Sample(Texture2D<float4> t, SamplerState s, float2 texCoord, int2 offset = 0)
 //{
 //    return t.Sample(s, texCoord.xy, offset);
diff --git a/hlslparser/outshaders/ShaderMSL.h b/hlslparser/outshaders/ShaderMSL.h
index 705dcd41..7a0d063e 100644
--- a/hlslparser/outshaders/ShaderMSL.h
+++ b/hlslparser/outshaders/ShaderMSL.h
@@ -237,14 +237,14 @@ float4 Sample(depth2d<float> t, sampler s, float2 texCoord, int2 offset = 0)
 
 
 // For persp shadows, remember to divide z = z/w before calling, or w = z/w on cube
-float SampleCmp(depth2d<float> t, sampler s, float4 texCoord, int2 offset = 0)
+float SampleCmp(depth2d<float> t, sampler s, float4 texCompareCoord, int2 offset = 0)
 {
-    return t.sample_compare(s, texCoord.xy, texCoord.z, offset);
+    return t.sample_compare(s, texCompareCoord.xy, texCompareCoord.z, offset);
 }
 
-float4 GatherCmp(depth2d<float> t, sampler s, float4 texCoord, int2 offset = 0)
+float4 GatherCmp(depth2d<float> t, sampler s, float4 texCompareCoord, int2 offset = 0)
 {
-    return t.gather_compare(s, texCoord.xy, texCoord.z, offset);
+    return t.gather_compare(s, texCompareCoord.xy, texCompareCoord.z, offset);
 }
 
 
diff --git a/hlslparser/src/HLSLGenerator.cpp b/hlslparser/src/HLSLGenerator.cpp
index 320a2b01..8e58257a 100644
--- a/hlslparser/src/HLSLGenerator.cpp
+++ b/hlslparser/src/HLSLGenerator.cpp
@@ -49,9 +49,11 @@ const char* HLSLGenerator::GetTypeName(const HLSLType& type)
         switch (baseType)
         {
             // depth textures just use Texture2D typedef
-            // TODO: add cube, array, ms, others
+            // TODO: add ms, others
             case HLSLBaseType_Depth2D:           return "Depth2D";
-                
+            case HLSLBaseType_Depth2DArray:      return "Depth2DArray";
+            case HLSLBaseType_DepthCube:         return "DepthCube";
+           
             case HLSLBaseType_Texture2D:         return "Texture2D";
             case HLSLBaseType_Texture2DArray:    return "Texture2DArray";
             case HLSLBaseType_Texture3D:         return "Texture3D";
@@ -621,32 +623,40 @@ void HLSLGenerator::OutputStatements(int indent, HLSLStatement* statement)
             HLSLBuffer* buffer = static_cast<HLSLBuffer*>(statement);
             HLSLDeclaration* field = buffer->field;
 
-            m_writer.BeginLine(indent, buffer->fileName, buffer->line);
-            
             if (!buffer->IsGlobalFields())
             {
+                m_writer.BeginLine(indent, buffer->fileName, buffer->line);
+                
                 // write out template
                 m_writer.Write("%s<%s> %s",
                                BufferTypeToName(buffer->bufferType),
                                buffer->bufferStruct->name,
                                buffer->name);
+                
+                // write out optinal register
+                if (buffer->registerName != NULL)
+                {
+                     m_writer.Write(" : register(%s)", buffer->registerName);
+                }
+                
+                m_writer.Write(";");
+                m_writer.EndLine();
             }
             else
             {
+                m_writer.BeginLine(indent, buffer->fileName, buffer->line);
+                
                 // not templated
                 m_writer.Write("%s %s",
                                BufferTypeToName(buffer->bufferType),
                                buffer->name);
-            }
-            
-            // write out optinal register
-            if (buffer->registerName != NULL)
-            {
-                 m_writer.Write(" : register(%s)", buffer->registerName);
-            }
-                         
-            if (buffer->IsGlobalFields())
-            {
+                
+                // write out optinal register
+                if (buffer->registerName != NULL)
+                {
+                     m_writer.Write(" : register(%s)", buffer->registerName);
+                }
+                
                 m_writer.EndLine(" {");
                 m_isInsideBuffer = true;
                 
@@ -666,11 +676,6 @@ void HLSLGenerator::OutputStatements(int indent, HLSLStatement* statement)
                 
                 m_writer.WriteLine(indent, "};");
             }
-            else
-            {
-                m_writer.Write(";");
-                m_writer.EndLine();
-            }
         }
         else if (statement->nodeType == HLSLNodeType_Function)
         {
diff --git a/hlslparser/src/HLSLParser.cpp b/hlslparser/src/HLSLParser.cpp
index 3579e341..dc05f52e 100644
--- a/hlslparser/src/HLSLParser.cpp
+++ b/hlslparser/src/HLSLParser.cpp
@@ -110,7 +110,9 @@ bool IsTextureType(HLSLBaseType baseType)
 bool IsDepthTextureType(HLSLBaseType baseType)
 {
     // return baseTypeDescriptions[baseType].coreType == CoreType_DepthTexture;
-    return baseType == HLSLBaseType_Depth2D;
+    return  baseType == HLSLBaseType_Depth2D ||
+            baseType == HLSLBaseType_Depth2DArray ||
+            baseType == HLSLBaseType_DepthCube;
 }
 
 
@@ -787,6 +789,24 @@ const Intrinsic _intrinsic[] =
         
         // scalar/vec ops
         INTRINSIC_FLOAT2_FUNCTION( "mul" ),
+        
+        // scalar mul, since * isn't working on Metal properly
+        Intrinsic( "mul", HLSLBaseType_Float2x2, HLSLBaseType_Float, HLSLBaseType_Float2x2 ),
+        Intrinsic( "mul", HLSLBaseType_Float3x3, HLSLBaseType_Float, HLSLBaseType_Float3x3 ),
+        Intrinsic( "mul", HLSLBaseType_Float4x4, HLSLBaseType_Float, HLSLBaseType_Float4x4 ),
+        Intrinsic( "mul", HLSLBaseType_Float2x2, HLSLBaseType_Float2x2, HLSLBaseType_Float ),
+        Intrinsic( "mul", HLSLBaseType_Float3x3, HLSLBaseType_Float3x3, HLSLBaseType_Float ),
+        Intrinsic( "mul", HLSLBaseType_Float4x4, HLSLBaseType_Float4x4, HLSLBaseType_Float ),
+        
+        Intrinsic( "mul", HLSLBaseType_Half2x2, HLSLBaseType_Half, HLSLBaseType_Half2x2 ),
+        Intrinsic( "mul", HLSLBaseType_Half3x3, HLSLBaseType_Half, HLSLBaseType_Half3x3 ),
+        Intrinsic( "mul", HLSLBaseType_Half4x4, HLSLBaseType_Half, HLSLBaseType_Half4x4 ),
+        Intrinsic( "mul", HLSLBaseType_Half2x2, HLSLBaseType_Half2x2, HLSLBaseType_Half ),
+        Intrinsic( "mul", HLSLBaseType_Half3x3, HLSLBaseType_Half3x3, HLSLBaseType_Half ),
+        Intrinsic( "mul", HLSLBaseType_Half4x4, HLSLBaseType_Half4x4, HLSLBaseType_Float ),
+        
+        
+        
 		Intrinsic( "mul", HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2x2 ),
         Intrinsic( "mul", HLSLBaseType_Float3, HLSLBaseType_Float3, HLSLBaseType_Float3x3 ),
         Intrinsic( "mul", HLSLBaseType_Float4, HLSLBaseType_Float4, HLSLBaseType_Float4x4 ),
@@ -870,11 +890,19 @@ const Intrinsic _intrinsic[] =
         
         // Depth
         DepthIntrinsic("Sample", HLSLBaseType_Float, HLSLBaseType_Depth2D, HLSLBaseType_Float,  HLSLBaseType_Float2),
+        DepthIntrinsic("Sample", HLSLBaseType_Float, HLSLBaseType_Depth2DArray, HLSLBaseType_Float,  HLSLBaseType_Float3),
+        DepthIntrinsic("Sample", HLSLBaseType_Float, HLSLBaseType_DepthCube, HLSLBaseType_Float,  HLSLBaseType_Float3),
         
         // xyz are used, this doesn't match HLSL which is 2 + compare
         DepthIntrinsic("SampleCmp", HLSLBaseType_Float, HLSLBaseType_Depth2D, HLSLBaseType_Float, HLSLBaseType_Float4),
-        DepthIntrinsic("GatherCmp", HLSLBaseType_Float4, HLSLBaseType_Depth2D, HLSLBaseType_Float, HLSLBaseType_Float4),
+        DepthIntrinsic("SampleCmp", HLSLBaseType_Float, HLSLBaseType_Depth2DArray, HLSLBaseType_Float, HLSLBaseType_Float4),
+        DepthIntrinsic("SampleCmp", HLSLBaseType_Float, HLSLBaseType_DepthCube, HLSLBaseType_Float, HLSLBaseType_Float4),
         
+        // returns float4 w/comparisons, probably only on mip0
+        // TODO: add GatherRed? to read 4 depth values
+        DepthIntrinsic("GatherCmp", HLSLBaseType_Float4, HLSLBaseType_Depth2D, HLSLBaseType_Float, HLSLBaseType_Float4),
+        DepthIntrinsic("GatherCmp", HLSLBaseType_Float4, HLSLBaseType_Depth2DArray, HLSLBaseType_Float, HLSLBaseType_Float4),
+        DepthIntrinsic("GatherCmp", HLSLBaseType_Float4, HLSLBaseType_DepthCube, HLSLBaseType_Float, HLSLBaseType_Float4),
         
         // one more dimension than Sample
         TEXTURE_INTRINSIC_FUNCTION("SampleLevel", HLSLBaseType_Texture2D, HLSLBaseType_Float3),
@@ -993,6 +1021,8 @@ const BaseTypeDescription baseTypeDescriptions[HLSLBaseType_Count] =
         { "Texture2DMS",        CoreType_Texture, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Texture2DMS
         
         { "Depth2D",            CoreType_Texture, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Depth2D
+        { "Depth2DArray",       CoreType_Texture, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Depth2DArray
+        { "DepthCube",          CoreType_Texture, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_DepthCube
         
         { "SamplerState",            CoreType_Sampler, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Sampler
         { "SamplerComparisonState",  CoreType_Sampler, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_SamplerComparisonState
@@ -3776,6 +3806,12 @@ bool HLSLParser::AcceptType(bool allowVoid, HLSLType& type/*, bool acceptFlags*/
     case HLSLToken_Depth2D:
         type.baseType = HLSLBaseType_Depth2D;
         break;
+    case HLSLToken_Depth2DArray:
+        type.baseType = HLSLBaseType_Depth2DArray;
+        break;
+    case HLSLToken_DepthCube:
+        type.baseType = HLSLBaseType_DepthCube;
+        break;
             
     case HLSLToken_SamplerState:
         type.baseType = HLSLBaseType_SamplerState;
diff --git a/hlslparser/src/HLSLTokenizer.cpp b/hlslparser/src/HLSLTokenizer.cpp
index d186746f..247bec39 100644
--- a/hlslparser/src/HLSLTokenizer.cpp
+++ b/hlslparser/src/HLSLTokenizer.cpp
@@ -70,6 +70,8 @@ static const char* _reservedWords[] =
         "Texture2DMS",
         
         "Depth2D",
+        "Depth2DArray", // cascades
+        "DepthCube",
         
         "SamplerState",
         "SamplerComparisonState",
@@ -658,7 +660,7 @@ void HLSLTokenizer::GetTokenName(char buffer[s_maxIdentifier]) const
     // TODO: short/ushort/uint
     else if (m_token == HLSLToken_Identifier)
     {
-        strlcpy(buffer, m_identifier, s_maxIdentifier); // TODO: Alec, put in alt for Win
+        strlcpy(buffer, m_identifier, s_maxIdentifier); // TODO: Alec, put in alt strlcpy call for Win
     }
     else
     {
@@ -718,6 +720,11 @@ void HLSLTokenizer::GetTokenName(int token, char buffer[s_maxIdentifier])
         case HLSLToken_EndOfStream:
             strcpy(buffer, "<eof>");
             break;
+                
+        case HLSLToken_Comment:
+            strcpy(buffer, "comment");
+            break;
+            
         default:
             strcpy(buffer, "unknown");
             break;
diff --git a/hlslparser/src/HLSLTokenizer.h b/hlslparser/src/HLSLTokenizer.h
index 1d21d543..6ed5de18 100644
--- a/hlslparser/src/HLSLTokenizer.h
+++ b/hlslparser/src/HLSLTokenizer.h
@@ -68,6 +68,8 @@ enum HLSLToken
     HLSLToken_Texture2DMS,
     
     HLSLToken_Depth2D,
+    HLSLToken_Depth2DArray,
+    HLSLToken_DepthCube,
     // TODO: other depth types
     
     HLSLToken_SamplerState,
diff --git a/hlslparser/src/HLSLTree.h b/hlslparser/src/HLSLTree.h
index fd00247f..dd019d03 100644
--- a/hlslparser/src/HLSLTree.h
+++ b/hlslparser/src/HLSLTree.h
@@ -127,6 +127,8 @@ enum HLSLBaseType
     HLSLBaseType_Texture2DMS,
     
     HLSLBaseType_Depth2D,
+    HLSLBaseType_Depth2DArray,
+    HLSLBaseType_DepthCube,
     // TODO: add more depth types as needed (pair with SamplerComparisonState)
     
     // Only 2 sampler types.
@@ -344,6 +346,9 @@ struct HLSLType
     HLSLAddressSpace    addressSpace = HLSLAddressSpace_Undefined;
 };
 
+// Only Statment, Argument, StructField can be marked hidden.
+// But many elements like Buffer derived from Statement.
+
 /// Base class for all nodes in the HLSL AST
 struct HLSLNode
 {
@@ -362,7 +367,13 @@ struct HLSLStatement : public HLSLNode
 {
     HLSLStatement*      nextStatement = NULL;      // Next statement in the block.
     HLSLAttribute*      attributes = NULL;
+    
+    // This allows tree pruning.  Marked true after traversing use in
     mutable bool        hidden = false;
+    
+    // This is marked as false at start, and multi endpoint traversal marks
+    // when a global is already written, and next write is skipped.
+    mutable bool        written = false;
 };
 
 struct HLSLAttribute : public HLSLNode
diff --git a/hlslparser/src/MSLGenerator.cpp b/hlslparser/src/MSLGenerator.cpp
index 85a3c205..06f83530 100644
--- a/hlslparser/src/MSLGenerator.cpp
+++ b/hlslparser/src/MSLGenerator.cpp
@@ -2124,36 +2124,36 @@ namespace M4
             
             bool isHalfTexture  = promote && type.textureType == HLSLBaseType_Half && !m_options.treatHalfAsFloat;
             
+            // MSL docs state must be float type, but what about D16f texture?
+            if (IsDepthTextureType(baseType))
+                isHalfTexture = false;
+            
             switch (baseType)
             {
                 case HLSLBaseType_Depth2D:
                     return isHalfTexture ? "depth2d<half>" : "depth2d<float>";
-                /* TODO: add these
-                case HLSLBaseType_DepthCube:
-                    return isHalfTexture ? "depthcube<half>" : "depthcube<float>";
                 case HLSLBaseType_Depth2DArray:
                     return isHalfTexture ? "depth2d_array<half>" : "depth2d_array<float>";
+                case HLSLBaseType_DepthCube:
+                    return isHalfTexture ? "depthcube<half>" : "depthcube<float>";
+                /* TODO: add, also depth_ms_array, but no HLSL equivalent
                 case HLSLBaseType_Depth2DMS:
                     return isHalfTexture ? "depth2d_ms<half>" : "depth2d_ms<float>";
                 */
-                    
-                // TODO: no HLSL equivalent of this yet, but exists in MSL
-                // depth_ms_array
-                    
                 
                 case HLSLBaseType_Texture2D:
                     return isHalfTexture ? "texture2d<half>" : "texture2d<float>";
+                case HLSLBaseType_Texture2DArray:
+                    return isHalfTexture ? "texture2d_array<half>" : "texture2d_array<float>";
                 case HLSLBaseType_Texture3D:
                     return isHalfTexture ? "texture3d<half>" : "texture3d<float>";
                 case HLSLBaseType_TextureCube:
                     return isHalfTexture ? "texturecube<half>" : "texturecube<float>";
-                case HLSLBaseType_Texture2DMS:
-                    return isHalfTexture ? "texture2d_ms<half>" : "texture2d_ms<float>";
                 case HLSLBaseType_TextureCubeArray:
                     return isHalfTexture ? "texturecube_array<half>" : "texturecube_array<float>";
-                case HLSLBaseType_Texture2DArray:
-                    return isHalfTexture ? "texture2d_array<half>" : "texture2d_array<float>";
-                    
+                case HLSLBaseType_Texture2DMS:
+                    return isHalfTexture ? "texture2d_ms<half>" : "texture2d_ms<float>";
+                
                 default:
                     break;
             }

From 74a5605f5f96fb96ce35a2cc4e816623d942aa3d Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 13 Mar 2023 18:57:56 -0700
Subject: [PATCH 464/901] kram - hlslparser update

Fix buffer not getting written out.
Comment out FX file code that isn't finished.
Fix indenting on CodeWriter.
Fix log to output in Xcode style warnings/errors.
Add mul for scalar case for now.
Move PointSize so it's before out modifier.
Add "written" field that skips duplicate globals for multi-entry point output (enabled on HLSL).  Can't enable on MSL due to struct namespaces.
  Would like to remove that, but would need to avoid all globals.
---
 hlslparser/buildShaders.sh        |  12 +++-
 hlslparser/outshaders/ShaderMSL.h |  15 ++++
 hlslparser/shaders/Skinning.hlsl  |  78 ++++++++++-----------
 hlslparser/src/CodeWriter.cpp     |  21 ++++--
 hlslparser/src/CodeWriter.h       |   9 +--
 hlslparser/src/HLSLGenerator.cpp  |  69 ++++++++++++-------
 hlslparser/src/HLSLParser.cpp     |  16 +++--
 hlslparser/src/HLSLParser.h       |  17 +++--
 hlslparser/src/HLSLTree.cpp       |  53 ++++++++++-----
 hlslparser/src/HLSLTree.h         |  25 ++++---
 hlslparser/src/MSLGenerator.cpp   | 109 +++++++++++++-----------------
 11 files changed, 247 insertions(+), 177 deletions(-)

diff --git a/hlslparser/buildShaders.sh b/hlslparser/buildShaders.sh
index fe9d7214..52b18c16 100755
--- a/hlslparser/buildShaders.sh
+++ b/hlslparser/buildShaders.sh
@@ -136,14 +136,18 @@ args+="-HV 2021 "
  
 args+="-fspv-extension=SPV_KHR_shader_draw_parameters "
 
-# have to also compile to 6.2 for u/short and half
-# then 6.6 adds u/char8 pack/unpack calls
+# 6.1 for ConstantBuffer
+# 6.2 for u/short and half
+# 6.6 adds u/char8 pack/unpack calls
 vsargs=${args}
 vsargs+="-T vs_6_6 "
 
 psargs=${args}
 psargs+="-T ps_6_6 "
 
+csargs=${args}
+csargs+="-T cs_6_6 "
+
 #echo ${vsargs}
 #echo ${psargs}
 
@@ -177,6 +181,10 @@ ${appDxc} ${psargs} -spirv -fspv-target-env=vulkan1.2 -E SkinningPS -Fo android/
 ${appSprivReflect} -y android/Skinning.vert.spv > android/Skinning.vert.refl
 ${appSprivReflect} -y android/Skinning.frag.spv > android/Skinning.frag.refl
 
+# TODO: support compute too
+#${appDxc} ${csargs} -spirv -fspv-target-env=vulkan1.2 -E ComputeCS -Fo android/Compute.comp.spv -Fc android/Compute.frag.spv.txt ${dstDir}Compute.hlsl
+#${appSprivReflect} -y android/Compute.comp.spv > android/Compute.comp.refl
+
 # skip this path, have to mod hlsl just to get valid code to compile with glslc
 testGlslc=0
 
diff --git a/hlslparser/outshaders/ShaderMSL.h b/hlslparser/outshaders/ShaderMSL.h
index 7a0d063e..3fab8258 100644
--- a/hlslparser/outshaders/ShaderMSL.h
+++ b/hlslparser/outshaders/ShaderMSL.h
@@ -72,6 +72,21 @@ float4 mad(float4 a, float4 b, float4 c) {
     return a * b + c;
 }
 
+// Might be easier to use * instead
+float2x2 mul(float a, float2x2 m) { return a * m; }
+float3x3 mul(float a, float3x3 m) { return a * m; }
+float4x4 mul(float a, float4x4 m) { return a * m; }
+float2x2 mul(float2x2 m, float a) { return a * m; }
+float3x3 mul(float3x3 m, float a) { return a * m; }
+float4x4 mul(float4x4 m, float a) { return a * m; }
+
+half2x2 mul(half a, half2x2 m) { return a * m; }
+half3x3 mul(half a, half3x3 m) { return a * m; }
+half4x4 mul(half a, half4x4 m) { return a * m; }
+half2x2 mul(half2x2 m, half a) { return a * m; }
+half3x3 mul(half3x3 m, half a) { return a * m; }
+half4x4 mul(half4x4 m, half a) { return a * m; }
+
 float2 mul(float2 a, float2x2 m) { return a * m; }
 float3 mul(float3 a, float3x3 m) { return a * m; }
 float4 mul(float4 a, float4x4 m) { return a * m; }
diff --git a/hlslparser/shaders/Skinning.hlsl b/hlslparser/shaders/Skinning.hlsl
index 096605d1..12a8c8c3 100644
--- a/hlslparser/shaders/Skinning.hlsl
+++ b/hlslparser/shaders/Skinning.hlsl
@@ -86,29 +86,28 @@ struct InputVS
     ushort4 testUShort : BITANGENT;
 };
 
+// half below won't work on many Adreno/Mali without inputOutput
 struct OutputVS
 {
     float4  position : SV_Position;
     half    diffuse : COLOR;
     float2  uv : TEXCOORD0;
-    float   pointSize : PSIZE;
 };
 
 // try to mondernize to ConstantBuffer
-/*
 struct UniformsStruct
 {
     float4x4 skinTfms[256];
     half3    lightDir;
     float4x4 worldToClipTfm;
 };
-
 ConstantBuffer<UniformsStruct> uniforms : register(b0);
+
  
 // Example
 // uniforms.skinTfms
  
-*/
+/*
 
 // can have 14x 64K limit to each cbuffer, 128 tbuffers,
 // This show up as globals.  Much pref ConstantBuffer form.
@@ -119,42 +118,46 @@ cbuffer Uniforms : register(b0)
     float4x4 worldToClipTfm;
 };
 
-
+*/
+ 
+/*
+// Structured buffers
 struct StructuredStruct
 {
     half3    lightDir;
     float4x4 worldToClipTfm;
 };
 
-
-// Structured buffers
 // StructuredBuffer<StructuredStruct> bufferTest0 : register(t0);
 // RWStructuredBuffer<StructuredStruct> rwBufferTest0 : register(u0);
+*/
 
 Texture2D<half4> tex : register(t1);
 SamplerState samplerClamp : register(s0);
 
-float4x4 DoSkinTfm(float4x4 skinTfms[256], float4 blendWeights, uint4 blendIndices)
+float4x4 DoSkinTfm(UniformsStruct uniforms, float4 blendWeights, uint4 blendIndices)
 {
-    float4x4 skinTfm = blendWeights[0] * skinTfms[blendIndices[0]];
-       
+    // Can use mul or * here
+    //float4x4 skinTfm = blendWeights[0] * uniforms.skinTfms[blendIndices[0]];
+    float4x4 skinTfm = mul(blendWeights[0], uniforms.skinTfms[blendIndices[0]]);
+    
     for (uint i = 1; i < 4; ++i)
     {
-        skinTfm += blendWeights[i] * skinTfms[blendIndices[i]];
+        //skinTfm += blendWeights[i] * uniforms.skinTfms[blendIndices[i]];
+        skinTfm += mul(blendWeights[i], uniforms.skinTfms[blendIndices[i]]);
     }
-
+    
     return skinTfm;
 }
 
-// TODO: These don't compile for spv despite setting extension
-//  don't know what semantic to set?
-// uint vertexBase : BASEVERTEX,
-// uint instanceBase : BASEINSTANCE,
+// TODO: this isn't working, wanted to share OutputVS, so left out of that
+// and moved to SkinningVS as output.  But something thinks out is type.
+// , out float pointSize : PSIZE
 
-// TODO: fix ability to comment these out inside SkinningVS inputs
-    
-// TODO: can't yet have commented out inputs or tokenizer fails
+// TODO: fix ability to comment out inputs
 OutputVS SkinningVS(InputVS input,
+    uint vertexBase : BASEVERTEX,
+    uint instanceBase : BASEINSTANCE,
     uint instanceID : SV_InstanceID,
     uint vertexID : SV_VertexID
 )
@@ -163,33 +166,32 @@ OutputVS SkinningVS(InputVS input,
 
     // TODO: this needs to declare array param as constant for
     // MSL function call.  Pointers can't be missing working space.
-    
-    //float4x4 skinTfm = 0;
-       // DoSkinTfm(skinTfms, input.blendWeights, input.blendIndices);
 
     // this is just to use these
-    uint vertexNum = vertexID;
-    uint instanceNum = instanceID;
+    //uint vertexNum = vertexID;
+    //uint instanceNum = instanceID;
 
-    // uint vertexNum = vertexBase + vertexID;
-    // uint instanceNum = instanceBase + instanceID;
+    uint vertexNum = vertexBase + vertexID;
+    uint instanceNum = instanceBase + instanceID;
 
     instanceNum += vertexNum;
     
-    // float4x4 skinTfm = skinTfms[ instanceNum ];
-    float4x4 skinTfm = skinTfms[ instanceNum ];
+    float4x4 skinTfm = uniforms.skinTfms[ instanceNum ];
     
+    // MSL doesn't like *=
+    skinTfm = skinTfm * DoSkinTfm(uniforms, input.blendWeights, input.blendIndices);
+
     // Skin to world space
     float3 position = mul(input.position, skinTfm).xyz;
     half3 normal = half3(mul(float4(input.normal,0.0), skinTfm).xyz);
         
     // Output stuff
-    output.position = mul(float4(position, 1.0), worldToClipTfm);
+    output.position = mul(float4(position, 1.0), uniforms.worldToClipTfm);
   
     // glslc fix
     // output.diffuse = half( dot(lightDir, normal) );
     // DXC
-    output.diffuse = dot(lightDir, normal);
+    output.diffuse = dot(uniforms.lightDir, normal);
 
     // TODO: test structured buffer
     // StructuredStruct item = bufferTest0.Load(0);
@@ -205,7 +207,7 @@ OutputVS SkinningVS(InputVS input,
     output.uv = input.uv;
     
     // only for Vulkan/MSL, DX12 can't control per vertex shader
-    output.pointSize = 1;
+    //pointSize = 1;
     
     return output;
 }
@@ -215,12 +217,12 @@ OutputVS SkinningVS(InputVS input,
 // this can include position on MSL, but not on HLSL.
 // Also for mobile the type should be higher precision to avoid banding.
 // So half from VS, but float in PS.
-struct InputPS
-{
-    float4  position : SV_Position;
-    half    diffuse : COLOR;
-    float2  uv : TEXCOORD0;
-};
+//struct InputPS
+//{
+//    float4  position : SV_Position;
+//    half    diffuse : COLOR;
+//    float2  uv : TEXCOORD0;
+//};
 
 struct OutputPS
 {
@@ -233,7 +235,7 @@ struct OutputPS
 // TODO: SV_Position differs from Vulkan/MSL in that pos.w = w and not 1/w like gl_FragCoord and [[position]].
 // DXC has a setting to invert w.
 
-OutputPS SkinningPS(InputPS input,
+OutputPS SkinningPS(OutputVS input,
      bool isFrontFace: SV_IsFrontFace
     )
 {
diff --git a/hlslparser/src/CodeWriter.cpp b/hlslparser/src/CodeWriter.cpp
index c030fdb4..ce2fcc49 100644
--- a/hlslparser/src/CodeWriter.cpp
+++ b/hlslparser/src/CodeWriter.cpp
@@ -16,7 +16,7 @@
 namespace M4
 {
 
-static const int _maxLineLength = 2048;
+static const int _maxLineLength = 4096;
 
 CodeWriter::CodeWriter(bool writeFileNames)
 {
@@ -29,6 +29,9 @@ CodeWriter::CodeWriter(bool writeFileNames)
 
 void CodeWriter::BeginLine(int indent, const char* fileName, int lineNumber)
 {
+    // probably missing an EndLine
+    ASSERT(m_currentIndent == 0);
+    
     if (m_writeLines)
     {
         bool outputLine = false;
@@ -70,13 +73,14 @@ void CodeWriter::BeginLine(int indent, const char* fileName, int lineNumber)
     }
 
     // Handle the indentation.
-    for (int i = 0; i < indent * m_spacesPerIndent; ++i)
-    {
-        m_buffer += " ";
-    }
+    if (indent)
+        Write("%*s", indent * m_spacesPerIndent, "");
+    
+    m_currentIndent = indent;
+    
 }
 
-void CodeWriter::EndLine(const char* text)
+int CodeWriter::EndLine(const char* text)
 {
     if (text != NULL)
     {
@@ -84,6 +88,11 @@ void CodeWriter::EndLine(const char* text)
     }
     m_buffer += "\n";
     ++m_currentLine;
+    
+    // so can EndLine/BeginLine
+    int indent = m_currentIndent;
+    m_currentIndent = 0;
+    return indent;
 }
 
 void CodeWriter::Write(const char* format, ...)
diff --git a/hlslparser/src/CodeWriter.h b/hlslparser/src/CodeWriter.h
index d6fd8f9f..fffc7b0a 100644
--- a/hlslparser/src/CodeWriter.h
+++ b/hlslparser/src/CodeWriter.h
@@ -28,11 +28,11 @@ class CodeWriter
     CodeWriter(bool writeFileNames = true);
 
     void BeginLine(int indent, const char* fileName = NULL, int lineNumber = -1);
-    M4_PRINTF_ATTR(2, 3) void Write(const char* format, ...);
-    void EndLine(const char* text = NULL);
+    void Write(const char* format, ...) M4_PRINTF_ATTR(2, 3);
+    int EndLine(const char* text = NULL);
 
-    M4_PRINTF_ATTR(3, 4) void WriteLine(int indent, const char* format, ...);
-    M4_PRINTF_ATTR(5, 6) void WriteLineTagged(int indent, const char* fileName, int lineNumber, const char* format, ...);
+    void WriteLine(int indent, const char* format, ...) M4_PRINTF_ATTR(3, 4);
+    void WriteLineTagged(int indent, const char* fileName, int lineNumber, const char* format, ...) M4_PRINTF_ATTR(5, 6) ;
 
     const char* GetResult() const;
     void Reset();
@@ -43,6 +43,7 @@ class CodeWriter
     int             m_currentLine;
     const char*     m_currentFileName;
     int             m_spacesPerIndent;
+    int             m_currentIndent;
     bool            m_writeLines;
     bool            m_writeFileNames;
 
diff --git a/hlslparser/src/HLSLGenerator.cpp b/hlslparser/src/HLSLGenerator.cpp
index 8e58257a..dc292e98 100644
--- a/hlslparser/src/HLSLGenerator.cpp
+++ b/hlslparser/src/HLSLGenerator.cpp
@@ -235,6 +235,9 @@ bool HLSLGenerator::Generate(HLSLTree* tree, HLSLTarget target, const char* entr
             while (field) {
 				HLSLStructField * nextField = field->nextField;
 
+                // TODO: may have to be careful with SV_Position, since this puts
+                // those last.  SSBO won't use those semantics, so should be okay.
+                
                 if (field->semantic) {
 					field->hidden = false;
                     field->sv_semantic = TranslateSemantic(field->semantic, /*output=*/true, target);
@@ -495,7 +498,7 @@ void HLSLGenerator::OutputExpression(HLSLExpression* expression)
     }
     else
     {
-        m_writer.Write("<unknown expression>");
+        Error("unknown expression");
     }
 }
 
@@ -506,9 +509,24 @@ void HLSLGenerator::OutputArguments(HLSLArgument* argument)
     {
         if (numArgs > 0)
         {
-            m_writer.Write(", ");
+            int indent = m_writer.EndLine(",");
+            m_writer.BeginLine(indent);
         }
 
+        const char * semantic = argument->sv_semantic ? argument->sv_semantic : argument->semantic;
+
+        // Have to inject vulkan
+        if (semantic)
+        {
+            if (strcmp(semantic, "PSIZE") == 0)
+                m_writer.Write("%s ", "[[vk::builtin(\"PointSize\")]]");
+            else if (strcmp(semantic, "BaseVertex") == 0)
+                m_writer.Write("%s ", "[[vk::builtin(\"BaseVertex\")]]");
+            else if (strcmp(semantic, "BaseInstance") == 0)
+                m_writer.Write("%s ", "[[vk::builtin(\"BaseInstance\")]]");
+        }
+        
+        // Then modifier
         switch (argument->modifier)
         {
         case HLSLArgumentModifier_In:
@@ -526,10 +544,9 @@ void HLSLGenerator::OutputArguments(HLSLArgument* argument)
         default:
             break;
         }
-
-        const char * semantic = argument->sv_semantic ? argument->sv_semantic : argument->semantic;
-
+        
         OutputDeclaration(argument->type, argument->name, semantic, /*registerName=*/NULL, argument->defaultValue);
+        
         argument = argument->nextArgument;
         ++numArgs;
     }
@@ -579,12 +596,24 @@ void HLSLGenerator::OutputStatements(int indent, HLSLStatement* statement)
 {
     while (statement != NULL)
     {
+        // skip pruned statements
         if (statement->hidden) 
         {
             statement = statement->nextStatement;
             continue;
         }
 
+        // skip writing some types across multiple entry points
+        if (statement->written &&
+            (statement->nodeType == HLSLNodeType_Comment ||
+             statement->nodeType == HLSLNodeType_Buffer ||
+             statement->nodeType == HLSLNodeType_Struct))
+        {
+            statement = statement->nextStatement;
+            continue;
+        }
+        statement->written = true;
+        
         OutputAttributes(indent, statement->attributes);
 
         if (statement->nodeType == HLSLNodeType_Comment)
@@ -651,7 +680,7 @@ void HLSLGenerator::OutputStatements(int indent, HLSLStatement* statement)
                                BufferTypeToName(buffer->bufferType),
                                buffer->name);
                 
-                // write out optinal register
+                // write out optional register
                 if (buffer->registerName != NULL)
                 {
                      m_writer.Write(" : register(%s)", buffer->registerName);
@@ -782,14 +811,15 @@ void HLSLGenerator::OutputStatements(int indent, HLSLStatement* statement)
             OutputStatements(indent + 1, blockStatement->statement);
             m_writer.WriteLine(indent, "}");
         }
-        else if (statement->nodeType == HLSLNodeType_Technique)
-        {
-            // Techniques are ignored.
-        }
-        else if (statement->nodeType == HLSLNodeType_Pipeline)
-        {
-            // Pipelines are ignored.
-        }
+        // FX file constructs
+//        else if (statement->nodeType == HLSLNodeType_Technique)
+//        {
+//            // Techniques are ignored.
+//        }
+//        else if (statement->nodeType == HLSLNodeType_Pipeline)
+//        {
+//            // Pipelines are ignored.
+//        }
         else
         {
             // Unhanded statement type.
@@ -977,17 +1007,6 @@ void HLSLGenerator::OutputDeclarationBody(const HLSLType& type, const char* name
 
 void HLSLGenerator::OutputDeclaration(const HLSLType& type, const char* name, const char* semantic/*=NULL*/, const char* registerName/*=NULL*/, HLSLExpression * assignment/*=NULL*/)
 {
-    // Have to inject vulkan
-    if (semantic)
-    {
-        if (strcmp(semantic, "PSIZE") == 0)
-            m_writer.Write("%s ", "[[vk::builtin(\"PointSize\")]]");
-        else if (strcmp(semantic, "BaseVertex") == 0)
-            m_writer.Write("%s ", "[[vk::builtin(\"BaseVertex\")]]");
-        else if (strcmp(semantic, "BaseInstance") == 0)
-            m_writer.Write("%s ", "[[vk::builtin(\"BaseInstance\")]]");
-    }
-    
     OutputDeclarationType(type);
     OutputDeclarationBody(type, name, semantic, registerName, assignment);
 }
diff --git a/hlslparser/src/HLSLParser.cpp b/hlslparser/src/HLSLParser.cpp
index dc05f52e..79845ac0 100644
--- a/hlslparser/src/HLSLParser.cpp
+++ b/hlslparser/src/HLSLParser.cpp
@@ -803,7 +803,7 @@ const Intrinsic _intrinsic[] =
         Intrinsic( "mul", HLSLBaseType_Half4x4, HLSLBaseType_Half, HLSLBaseType_Half4x4 ),
         Intrinsic( "mul", HLSLBaseType_Half2x2, HLSLBaseType_Half2x2, HLSLBaseType_Half ),
         Intrinsic( "mul", HLSLBaseType_Half3x3, HLSLBaseType_Half3x3, HLSLBaseType_Half ),
-        Intrinsic( "mul", HLSLBaseType_Half4x4, HLSLBaseType_Half4x4, HLSLBaseType_Float ),
+        Intrinsic( "mul", HLSLBaseType_Half4x4, HLSLBaseType_Half4x4, HLSLBaseType_Half ),
         
         
@@ -2859,20 +2859,25 @@ bool HLSLParser::ParseTerminalExpression(HLSLExpression*& expression, bool& need
             {
                 return false;
             }
-
+            
             if (expression->nodeType != HLSLNodeType_IdentifierExpression)
             {
                 m_tokenizer.Error("Expected function identifier");
                 return false;
             }
-
+            
             const HLSLIdentifierExpression* identifierExpression = static_cast<const HLSLIdentifierExpression*>(expression);
             const HLSLFunction* function = MatchFunctionCall( functionCall, identifierExpression->name );
             if (function == NULL)
             {
                 return false;
             }
-
+            
+            if (strcmp(function->name, "mul") == 0)
+            {
+                int bp = 0;
+                bp = bp;
+            }
             functionCall->function = function;
             functionCall->expressionType = function->returnType;
             expression = functionCall;
@@ -3497,6 +3502,7 @@ bool HLSLParser::ParseAttributeBlock(HLSLAttribute*& attribute)
     return true;
 }
 
+/* never completed
 bool HLSLParser::ParseStage(HLSLStatement*& statement)
 {
     if (!Accept("stage"))
@@ -3534,7 +3540,7 @@ bool HLSLParser::ParseStage(HLSLStatement*& statement)
     statement = stage;
     return true;
 }
-
+*/
 
 
diff --git a/hlslparser/src/HLSLParser.h b/hlslparser/src/HLSLParser.h
index acbfb38d..ddfb4b56 100644
--- a/hlslparser/src/HLSLParser.h
+++ b/hlslparser/src/HLSLParser.h
@@ -79,13 +79,16 @@ class HLSLParser
 
     bool ParseStateName(bool isSamplerState, bool isPipelineState, const char*& name, const EffectState *& state);
     bool ParseColorMask(int& mask);
-    bool ParseStateValue(const EffectState * state, HLSLStateAssignment* stateAssignment);
-    bool ParseStateAssignment(HLSLStateAssignment*& stateAssignment, bool isSamplerState, bool isPipelineState);
-    bool ParseSamplerState(HLSLExpression*& expression);
-    bool ParseTechnique(HLSLStatement*& statement);
-    bool ParsePass(HLSLPass*& pass);
-    bool ParsePipeline(HLSLStatement*& pipeline);
-    bool ParseStage(HLSLStatement*& stage);
+    
+// FX file
+//    bool ParseStateValue(const EffectState * state, HLSLStateAssignment* stateAssignment);
+//    bool ParseStateAssignment(HLSLStateAssignment*& stateAssignment, bool isSamplerState, bool isPipelineState);
+//    bool ParseSamplerState(HLSLExpression*& expression);
+//    bool ParseTechnique(HLSLStatement*& statement);
+//    bool ParsePass(HLSLPass*& pass);
+//    bool ParsePipeline(HLSLStatement*& pipeline);
+//    bool ParseStage(HLSLStatement*& stage);
+    
     bool ParseComment(HLSLStatement*& statement);
 
     bool ParseAttributeList(HLSLAttribute*& attribute);
diff --git a/hlslparser/src/HLSLTree.cpp b/hlslparser/src/HLSLTree.cpp
index 3cf2293a..db1c75e9 100644
--- a/hlslparser/src/HLSLTree.cpp
+++ b/hlslparser/src/HLSLTree.cpp
@@ -133,6 +133,12 @@ HLSLDeclaration * HLSLTree::FindGlobalDeclaration(const char * name, HLSLBuffer
             }
             else
             {
+                if (String_Equal(name, buffer->name))
+                {
+                    if (buffer_out) *buffer_out = buffer;
+                    return NULL;
+                }
+                
                 /* This isn't same type...
                  
                 // Note: should pass buffers, but buffer/texture
@@ -184,6 +190,7 @@ HLSLStruct * HLSLTree::FindGlobalStruct(const char * name)
     return NULL;
 }
 
+/* FX files
 HLSLTechnique * HLSLTree::FindTechnique(const char * name)
 {
     HLSLStatement * statement = m_root->statement;
@@ -244,6 +251,7 @@ HLSLPipeline * HLSLTree::FindPipeline(const char * name)
 
     return NULL;
 }
+*/
 
 HLSLBuffer * HLSLTree::FindBuffer(const char * name)
 {
@@ -665,15 +673,18 @@ void HLSLTreeVisitor::VisitTopLevelStatement(HLSLStatement * node)
     else if (node->nodeType == HLSLNodeType_Function) {
         VisitFunction((HLSLFunction *)node);
     }
-    else if (node->nodeType == HLSLNodeType_Technique) {
-        VisitTechnique((HLSLTechnique *)node);
-    }
-    else if (node->nodeType == HLSLNodeType_Pipeline) {
-        VisitPipeline((HLSLPipeline *)node);
-    }
     else if (node->nodeType == HLSLNodeType_Comment) {
         VisitComment((HLSLComment*)node);
     }
+    
+    // FX file stuff
+//    else if (node->nodeType == HLSLNodeType_Technique) {
+//        VisitTechnique((HLSLTechnique *)node);
+//    }
+//    else if (node->nodeType == HLSLNodeType_Pipeline) {
+//        VisitPipeline((HLSLPipeline *)node);
+//    }
+    
     else {
         ASSERT(false);
     }
@@ -838,9 +849,9 @@ void HLSLTreeVisitor::VisitExpression(HLSLExpression * node)
         VisitFunctionCall((HLSLFunctionCall *)node);
     }
     // Acoget-TODO: This was missing. Did adding it break anything?
-    else if (node->nodeType == HLSLNodeType_SamplerState) {
-        VisitSamplerState((HLSLSamplerState *)node);
-    }
+//    else if (node->nodeType == HLSLNodeType_SamplerState) {
+//        VisitSamplerState((HLSLSamplerState *)node);
+//    }
     else {
         ASSERT(false);
     }
@@ -939,6 +950,7 @@ void HLSLTreeVisitor::VisitFunctionCall(HLSLFunctionCall * node)
     }
 }
 
+/*
 void HLSLTreeVisitor::VisitStateAssignment(HLSLStateAssignment * node) {}
 
 void HLSLTreeVisitor::VisitSamplerState(HLSLSamplerState * node)
@@ -968,15 +980,17 @@ void HLSLTreeVisitor::VisitTechnique(HLSLTechnique * node)
     }
 }
 
-void HLSLTreeVisitor::VisitComment(HLSLComment * node)
-{
-    
-}
 
 void HLSLTreeVisitor::VisitPipeline(HLSLPipeline * node)
 {
     // This is for FX files
 }
+*/
+
+void HLSLTreeVisitor::VisitComment(HLSLComment * node)
+{
+    
+}
 
 void HLSLTreeVisitor::VisitFunctions(HLSLRoot * root)
 {
@@ -1066,18 +1080,23 @@ class MarkVisibleStatementsVisitor : public HLSLTreeVisitor
     virtual void VisitIdentifierExpression(HLSLIdentifierExpression * node) override
     {
         HLSLTreeVisitor::VisitIdentifierExpression(node);
-
+        
         if (node->global)
         {
-            HLSLDeclaration * declaration = tree->FindGlobalDeclaration(node->name);
+            HLSLBuffer* buffer = NULL;
+            HLSLDeclaration * declaration = tree->FindGlobalDeclaration(node->name, &buffer);
             if (declaration != NULL && declaration->hidden)
             {
                 declaration->hidden = false;
                 VisitDeclaration(declaration);
             }
+            if (buffer != NULL && buffer->hidden)
+            {
+                buffer->hidden = false;
+            }
         }
     }
-
+        
     virtual void VisitType(HLSLType & type) override
     {
         if (type.baseType == HLSLBaseType_UserDefined)
@@ -1148,6 +1167,7 @@ void PruneTree(HLSLTree* tree, const char* entryName0, const char* entryName1/*=
                 // TODO: these load from a struct so may just need
                 // to somehow mark this if present.
                 
+                /* all struct fields are hidden = false, so this doesn't work
                 // mark buffer visible if any struct fields are used
                 HLSLStructField* field = buffer->bufferStruct->field;
                 while (field != NULL)
@@ -1160,6 +1180,7 @@ void PruneTree(HLSLTree* tree, const char* entryName0, const char* entryName1/*=
                     }
                     field = (HLSLStructField*)field->nextField;
                 }
+                */
             }
         }
 
diff --git a/hlslparser/src/HLSLTree.h b/hlslparser/src/HLSLTree.h
index dd019d03..87ba02d3 100644
--- a/hlslparser/src/HLSLTree.h
+++ b/hlslparser/src/HLSLTree.h
@@ -622,7 +622,7 @@ struct HLSLFunctionCall : public HLSLExpression
 };
 
 #if 1
-
+/*
 // These are all FX file constructs
 // TODO: may remove these, they just complicate the code
 //   but do want to specify mix of vs/ps/cs in single files
@@ -686,7 +686,7 @@ struct HLSLStage : public HLSLStatement
     HLSLDeclaration*        inputs = NULL;
     HLSLDeclaration*        outputs = NULL;
 };
-
+*/
 #endif
 
 struct HLSLComment : public HLSLStatement
@@ -727,13 +727,16 @@ class HLSLTree
 
     HLSLFunction * FindFunction(const char * name);
     HLSLDeclaration * FindGlobalDeclaration(const char * name, HLSLBuffer ** buffer_out = NULL);
+    
     HLSLStruct * FindGlobalStruct(const char * name);
-    HLSLTechnique * FindTechnique(const char * name);
-    HLSLPipeline * FindFirstPipeline();
-    HLSLPipeline * FindNextPipeline(HLSLPipeline * current);
-    HLSLPipeline * FindPipeline(const char * name);
     HLSLBuffer * FindBuffer(const char * name);
 
+// FX files
+//    HLSLTechnique * FindTechnique(const char * name);
+//    HLSLPipeline * FindFirstPipeline();
+//    HLSLPipeline * FindNextPipeline(HLSLPipeline * current);
+//    HLSLPipeline * FindPipeline(const char * name);
+ 
     bool GetExpressionValue(HLSLExpression * expression, int & value);
     int GetExpressionValue(HLSLExpression * expression, float values[4]);
 
@@ -812,11 +815,11 @@ class HLSLTreeVisitor
     HLSLStruct * FindGlobalStruct(HLSLRoot * root, const char * name);
     
     // These are fx file constructs
-    virtual void VisitStateAssignment(HLSLStateAssignment * node);
-    virtual void VisitSamplerState(HLSLSamplerState * node);
-    virtual void VisitPass(HLSLPass * node);
-    virtual void VisitTechnique(HLSLTechnique * node);
-    virtual void VisitPipeline(HLSLPipeline * node);
+//    virtual void VisitStateAssignment(HLSLStateAssignment * node);
+//    virtual void VisitSamplerState(HLSLSamplerState * node);
+//    virtual void VisitPass(HLSLPass * node);
+//    virtual void VisitTechnique(HLSLTechnique * node);
+//    virtual void VisitPipeline(HLSLPipeline * node);
 };
 
 
diff --git a/hlslparser/src/MSLGenerator.cpp b/hlslparser/src/MSLGenerator.cpp
index 06f83530..ae0a3d6c 100644
--- a/hlslparser/src/MSLGenerator.cpp
+++ b/hlslparser/src/MSLGenerator.cpp
@@ -394,7 +394,6 @@ namespace M4
 
         m_tree = tree;
         m_target = target;
-        //ASSERT(m_target == HLSLTarget_VertexShader || m_target == HLSLTarget_PixelShader);
         m_entryName = entryName;
         m_options = options;
 
@@ -423,8 +422,6 @@ namespace M4
         shaderClassNameStr += "NS"; // to distinguish from function
         
         const char* shaderClassName = shaderClassNameStr.c_str();
-        // This doesn't work if want to have multiple shaders in one file
-        // (target == MSLGenerator::Target_VertexShader) ? "Vertex_Shader" : "Pixel_Shader";
         m_writer.WriteLine(0, "struct %s {", shaderClassName);
 
         OutputStatements(1, root->statement);
@@ -436,8 +433,8 @@ namespace M4
         m_writer.Write("%s(", shaderClassName);
         
         // mod
-        m_writer.EndLine();
-        m_writer.BeginLine(1);
+        int indent = m_writer.EndLine();
+        m_writer.BeginLine(indent);
         
         const ClassArgument* currentArg = m_firstClassArgument;
         while (currentArg != NULL)
@@ -452,15 +449,15 @@ namespace M4
                 m_writer.Write(", ");
                 
                 // mod
-                m_writer.EndLine();
-                m_writer.BeginLine(1);
+                indent = m_writer.EndLine();
+                m_writer.BeginLine(indent);
             }
         }
         m_writer.Write(")");
         
         // mod
-        m_writer.EndLine(); 
-        m_writer.BeginLine(1);
+        indent = m_writer.EndLine();
+        m_writer.BeginLine(indent);
         
         currentArg = m_firstClassArgument;
         if (currentArg)
@@ -476,8 +473,8 @@ namespace M4
                 m_writer.Write(", ");
                 
                 // mod
-                m_writer.EndLine();
-                m_writer.BeginLine(1);
+                indent = m_writer.EndLine();
+                m_writer.BeginLine(indent);
             }
         }
         m_writer.EndLine(" {}");
@@ -529,8 +526,8 @@ namespace M4
         m_writer.Write(" %s(", entryName);
 
         // Alec added for readability
-        m_writer.EndLine();
-        m_writer.BeginLine(1);
+        indent = m_writer.EndLine();
+        m_writer.BeginLine(indent);
         
         int argumentCount = 0;
         HLSLArgument* argument = entryFunction->argument;
@@ -562,8 +559,8 @@ namespace M4
                 m_writer.Write(", ");
                 
                 // Alec added for readability
-                m_writer.EndLine();
-                m_writer.BeginLine(1);
+                indent = m_writer.EndLine();
+                m_writer.BeginLine(indent);
             }
         }
 
@@ -590,8 +587,8 @@ namespace M4
                 m_writer.Write(", ");
                 
                 // Alec added for readability
-                m_writer.EndLine();
-                m_writer.BeginLine(1);
+                indent = m_writer.EndLine();
+                m_writer.BeginLine(indent);
             }
         }
         m_writer.EndLine(") {");
@@ -710,12 +707,21 @@ namespace M4
         // Main generator loop: called recursively
         while (statement != NULL)
         {
+            // skip pruned statements
             if (statement->hidden)
             {
                 statement = statement->nextStatement;
                 continue;
             }
-
+            
+            // skip writing across multiple entry points
+//            if (statement->written)
+//            {
+//                statement = statement->nextStatement;
+//                continue;
+//            }
+            statement->written = true;
+            
             OutputAttributes(indent, statement->attributes);
             
             if (statement->nodeType == HLSLNodeType_Comment)
@@ -883,14 +889,16 @@ namespace M4
                 OutputStatements(indent + 1, blockStatement->statement);
                 m_writer.WriteLine(indent, "}");
             }
-            else if (statement->nodeType == HLSLNodeType_Technique)
-            {
-                // Techniques are ignored.
-            }
-            else if (statement->nodeType == HLSLNodeType_Pipeline)
-            {
-                // Pipelines are ignored.
-            }
+            
+            // fx file support for Technique/Pipeline
+//            else if (statement->nodeType == HLSLNodeType_Technique)
+//            {
+//                // Techniques are ignored.
+//            }
+//            else if (statement->nodeType == HLSLNodeType_Pipeline)
+//            {
+//                // Pipelines are ignored.
+//            }
             else
             {
                 // Unhandled statement type.
@@ -949,7 +957,7 @@ namespace M4
                 m_writer.Write(",");
                 OutputDeclarationBody(declaration->type, declaration->name, declaration->assignment);
                 declaration = declaration->nextDeclaration;
-            };
+            }
         }
     }
 
@@ -971,12 +979,6 @@ namespace M4
                 {
                     m_writer.Write(" [[%s]]", field->sv_semantic);
                 }
-// Alec added this fallback, but then it tags too many fields
-//                else if (field->semantic)
-//                {
-//                    m_writer.Write(" [[%s]]", field->semantic);
-//                }
-                
 
                 m_writer.Write(";");
                 m_writer.EndLine();
@@ -992,10 +994,12 @@ namespace M4
         {
             m_writer.BeginLine(indent, buffer->fileName, buffer->line);
             
-            // TODO: handle array case for indexing, also
+            // TODO: handle array count for indexing into constant buffer
+            // some are unbounded array like BAB and SBO
+            // TODO: may need to use t/u registers for those too and a thread?
             if (buffer->bufferType == HLSLBufferType_ConstantBuffer ||
-               buffer->bufferType == HLSLBufferType_ByteAddressBuffer ||
-               buffer->bufferType == HLSLBufferType_StructuredBuffer)
+                buffer->bufferType == HLSLBufferType_ByteAddressBuffer ||
+                buffer->bufferType == HLSLBufferType_StructuredBuffer)
             {
                 m_writer.WriteLine(indent, "constant %s & %s", buffer->bufferStruct->name, buffer->name);
             }
@@ -1008,11 +1012,11 @@ namespace M4
         }
         else
         {
+            // converted cbuffer that spill tons of globals for every field
             HLSLDeclaration* field = buffer->field;
             
             m_writer.BeginLine(indent, buffer->fileName, buffer->line);
             
-            // TODO: these aren't all ubo, some are structured buffer
             m_writer.Write("struct %s_ubo", buffer->name);
             m_writer.EndLine(" {");
             while (field != NULL)
@@ -1215,6 +1219,7 @@ namespace M4
             {
                 if (identifierExpression->global)
                 {
+                    // prepend cbuffer name
                     HLSLBuffer * buffer;
                     HLSLDeclaration * declaration = m_tree->FindGlobalDeclaration(identifierExpression->name, &buffer);
 
@@ -1340,30 +1345,7 @@ namespace M4
 
             bool addParenthesis = NeedsParenthesis(expression, parentExpression);
             if (addParenthesis) m_writer.Write("(");
-
-            /* forcing use of column matrices, this column work isn't needed
-            bool rewrite_assign = false;
-            if (binaryExpression->binaryOp == HLSLBinaryOp_Assign && binaryExpression->expression1->nodeType == HLSLNodeType_ArrayAccess)
-            {
-                HLSLArrayAccess* arrayAccess = static_cast<HLSLArrayAccess*>(binaryExpression->expression1);
-                if (!arrayAccess->array->expressionType.array && IsMatrixType(arrayAccess->array->expressionType.baseType))
-                {
-                    // TODO: Alec, I eliminated the set_column code
-                    // does that need added back?
-                    rewrite_assign = true;
-
-                    m_writer.Write("set_column(");
-                    OutputExpression(arrayAccess->array, NULL);
-                    m_writer.Write(", ");
-                    OutputExpression(arrayAccess->index, NULL);
-                    m_writer.Write(", ");
-                    OutputExpression(binaryExpression->expression2, NULL);
-                    m_writer.Write(")");
-                }
-            }
-
-            if (!rewrite_assign)
-            */
+            
             {
                 if (IsArithmeticOp(binaryExpression->binaryOp) || IsLogicOp(binaryExpression->binaryOp))
                 {
@@ -1411,6 +1393,7 @@ namespace M4
                 }
                 m_writer.Write("%s", op);
 
+                
                 if (binaryExpression->binaryOp == HLSLBinaryOp_MulAssign ||
                     binaryExpression->binaryOp == HLSLBinaryOp_DivAssign ||
                     IsArithmeticOp(binaryExpression->binaryOp) ||
@@ -1473,7 +1456,7 @@ namespace M4
             HLSLArrayAccess* arrayAccess = static_cast<HLSLArrayAccess*>(expression);
             
             // Just use the matrix notation, using column_order instead of row_order
-            if (arrayAccess->array->expressionType.array) // || !IsMatrixType(arrayAccess->array->expressionType.baseType))
+            //if (arrayAccess->array->expressionType.array) // || !IsMatrixType(arrayAccess->array->expressionType.baseType))
             {
                 OutputExpression(arrayAccess->array, expression);
                 m_writer.Write("[");
@@ -1497,7 +1480,7 @@ namespace M4
         }
         else
         {
-            m_writer.Write("<unknown expression>");
+            Error("unknown expression");
         }
     }
 

From ccde34294d636fafa12cabfae083beb413520a35 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 13 Mar 2023 18:58:45 -0700
Subject: [PATCH 465/901] kram - update log to Xcode style

---
 libkram/kram/KramLog.cpp | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp
index 83f5511f..70dab8fe 100644
--- a/libkram/kram/KramLog.cpp
+++ b/libkram/kram/KramLog.cpp
@@ -261,24 +261,29 @@ static int32_t logMessageImpl(const char* group, int32_t logLevel,
     const char* tag = "";
     const char* groupString = "";
     const char* space = "";
-
+    const char* level = "";
+    
     string fileLineFunc;
     switch (logLevel) {
         case LogLevelDebug:
             //tag = "[D]";
+            //level = " debug:";
             break;
         case LogLevelInfo:
             //tag = "[I]";
+            //level = " info:";
             break;
 
         case LogLevelWarning:
             tag = "[W]";
+            level = " warning:";
             groupString = group;
             space = " ";
 
             break;
         case LogLevelError: {
             tag = "[E]";
+            level = " error:";
             groupString = group;
             space = " ";
 
@@ -300,7 +305,13 @@ static int32_t logMessageImpl(const char* group, int32_t logLevel,
             // TODO: in clang only __PRETTY_FUNCTION__ has namespace::className::function(args...)
             // __FUNCTION__ is only the function call, but want class name if it's a method without going to RTTI
             // https://stackoverflow.com/questions/1666802/is-there-a-class-macro-in-c
-            sprintf(fileLineFunc, "@%s(%d): %s()\n", filename, line, func);
+#if KRAM_WIN
+            // format needed for Visual Studio to print
+            sprintf(fileLineFunc, "%s(%d):%s %s()\n", filename, line, level, func);
+#else
+            // format needed for Xcode to print
+            sprintf(fileLineFunc, "%s:%d:%s %s()\n", filename, line, level, func);
+#endif
             break;
         }
         default:

From f5f79a76996642b811e40f9f5de57bea9f311781 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 13 Mar 2023 19:59:13 -0700
Subject: [PATCH 466/901] kram - update hlslparser

Support MSL buffer pointers to implement StructuredBuffer.  This translates nicely between MSL and HLSL.
---
 hlslparser/shaders/Skinning.hlsl | 26 +++++++-------------------
 hlslparser/src/HLSLParser.cpp    | 10 +++++++++-
 hlslparser/src/MSLGenerator.cpp  | 32 ++++++++++++++++++++++++--------
 hlslparser/src/MSLGenerator.h    |  7 ++++---
 4 files changed, 44 insertions(+), 31 deletions(-)

diff --git a/hlslparser/shaders/Skinning.hlsl b/hlslparser/shaders/Skinning.hlsl
index 12a8c8c3..d6784449 100644
--- a/hlslparser/shaders/Skinning.hlsl
+++ b/hlslparser/shaders/Skinning.hlsl
@@ -103,24 +103,11 @@ struct UniformsStruct
 };
 ConstantBuffer<UniformsStruct> uniforms : register(b0);
 
- 
-// Example
-// uniforms.skinTfms
- 
-/*
-
 // can have 14x 64K limit to each cbuffer, 128 tbuffers,
-// This show up as globals.  Much pref ConstantBuffer form.
-cbuffer Uniforms : register(b0)
-{
-    float4x4 skinTfms[256];
-    half3    lightDir;
-    float4x4 worldToClipTfm;
-};
 
-*/
+// Example
+// uniforms.skinTfms
  
-/*
 // Structured buffers
 struct StructuredStruct
 {
@@ -128,7 +115,9 @@ struct StructuredStruct
     float4x4 worldToClipTfm;
 };
 
-// StructuredBuffer<StructuredStruct> bufferTest0 : register(t0);
+StructuredBuffer<StructuredStruct> bufferTest0 : register(t2);
+
+/*
 // RWStructuredBuffer<StructuredStruct> rwBufferTest0 : register(u0);
 */
 
@@ -194,10 +183,9 @@ OutputVS SkinningVS(InputVS input,
     output.diffuse = dot(uniforms.lightDir, normal);
 
     // TODO: test structured buffer
-    // StructuredStruct item = bufferTest0.Load(0);
-    // output.diffuse *= item.lightDir;
+    StructuredStruct item = bufferTest0[0];
+    output.diffuse *= item.lightDir.x;
    
-    
     // test the operators
     output.diffuse *= output.diffuse;
     output.diffuse += output.diffuse;
diff --git a/hlslparser/src/HLSLParser.cpp b/hlslparser/src/HLSLParser.cpp
index 79845ac0..18a8b5b3 100644
--- a/hlslparser/src/HLSLParser.cpp
+++ b/hlslparser/src/HLSLParser.cpp
@@ -2761,7 +2761,15 @@ bool HLSLParser::ParseTerminalExpression(HLSLExpression*& expression, bool& need
                 return false;
             }
 
-            if (expression->expressionType.array)
+            if (expression->expressionType.baseType == HLSLBaseType_UserDefined)
+            {
+                // some buffer types (!IsGlobalFields) have array notation
+                arrayAccess->expressionType.baseType = HLSLBaseType_UserDefined;
+                arrayAccess->expressionType.array     = true;
+                arrayAccess->expressionType.arraySize = NULL;
+                
+            }
+            else if (expression->expressionType.array)
             {
                 arrayAccess->expressionType = expression->expressionType;
                 arrayAccess->expressionType.array     = false;
diff --git a/hlslparser/src/MSLGenerator.cpp b/hlslparser/src/MSLGenerator.cpp
index ae0a3d6c..e58a24b7 100644
--- a/hlslparser/src/MSLGenerator.cpp
+++ b/hlslparser/src/MSLGenerator.cpp
@@ -169,7 +169,7 @@ namespace M4
                     if (declaration->type.addressSpace == HLSLAddressSpace_Undefined)
                         declaration->type.addressSpace = HLSLAddressSpace_Device;
                     
-                    AddClassArgument(new ClassArgument(textureName, declaration->type, textureRegisterName));
+                    AddClassArgument(new ClassArgument(textureName, declaration->type, textureRegisterName, true));
                 }
                 else if (IsSamplerType(declaration->type))
                 {
@@ -181,7 +181,7 @@ namespace M4
                     if (declaration->type.addressSpace == HLSLAddressSpace_Undefined)
                         declaration->type.addressSpace = HLSLAddressSpace_Device;
                     
-                    AddClassArgument(new ClassArgument(samplerName, declaration->type, samplerRegisterName));
+                    AddClassArgument(new ClassArgument(samplerName, declaration->type, samplerRegisterName, true));
                 }
             }
             else if (statement->nodeType == HLSLNodeType_Buffer)
@@ -197,6 +197,10 @@ namespace M4
                 else
                     type.typeName = m_tree->AddStringFormat("%s", buffer->bufferStruct->name);
                 
+                // TODO: ConstantBuffer can use ptr notation, detect array decl
+                bool isRef = buffer->bufferType == HLSLBufferType_ConstantBuffer ||
+                             buffer->IsGlobalFields();
+   
                 if (buffer->IsReadOnly())
                     type.addressSpace = HLSLAddressSpace_Constant;
                 else
@@ -206,7 +210,7 @@ namespace M4
 
                 const char * bufferRegisterName = m_tree->AddStringFormat("buffer(%d)", bufferRegister);
 
-                AddClassArgument(new ClassArgument(buffer->name, type, bufferRegisterName));
+                AddClassArgument(new ClassArgument(buffer->name, type, bufferRegisterName, isRef));
             }
 
             statement = statement->nextStatement;
@@ -441,7 +445,10 @@ namespace M4
         {
             m_writer.Write("%s ", GetAddressSpaceName(currentArg->type.baseType, currentArg->type.addressSpace));
             
-            m_writer.Write("%s & %s", GetTypeName(currentArg->type, /*exactType=*/true), currentArg->name);
+            // ref vs. ptr
+            bool isRef = currentArg->isRef;
+            
+            m_writer.Write("%s %s %s", GetTypeName(currentArg->type, /*exactType=*/true), isRef ? "&" : "*", currentArg->name);
 
             currentArg = currentArg->nextArg;
             if (currentArg)
@@ -573,8 +580,11 @@ namespace M4
         {
             if (currentArg->type.baseType == HLSLBaseType_UserDefined)
             {
-                m_writer.Write("%s %s::%s & %s [[%s]]", GetAddressSpaceName(currentArg->type.baseType, currentArg->type.addressSpace),
-                  shaderClassName, currentArg->type.typeName, currentArg->name, currentArg->registerName);
+                bool isRef = currentArg->isRef;
+               
+                m_writer.Write("%s %s::%s %s %s [[%s]]", GetAddressSpaceName(currentArg->type.baseType, currentArg->type.addressSpace),
+                  shaderClassName, currentArg->type.typeName, isRef ? "&" : "*", currentArg->name,
+                               currentArg->registerName);
             }
             else
             {
@@ -997,15 +1007,21 @@ namespace M4
             // TODO: handle array count for indexing into constant buffer
             // some are unbounded array like BAB and SBO
             // TODO: may need to use t/u registers for those too and a thread?
+            
+            // TODO: fix this, ConstantBuffer can index into a constant buffer too
+            // detect use of array notation on decl
+            bool isRef = buffer->bufferType == HLSLBufferType_ConstantBuffer ||
+                         buffer->IsGlobalFields();
+            
             if (buffer->bufferType == HLSLBufferType_ConstantBuffer ||
                 buffer->bufferType == HLSLBufferType_ByteAddressBuffer ||
                 buffer->bufferType == HLSLBufferType_StructuredBuffer)
             {
-                m_writer.WriteLine(indent, "constant %s & %s", buffer->bufferStruct->name, buffer->name);
+                m_writer.WriteLine(indent, "constant %s %s %s", buffer->bufferStruct->name, isRef ? "&" : "*", buffer->name);
             }
             else
             {
-                m_writer.WriteLine(indent, "device %s & %s",  buffer->bufferStruct->name, buffer->name);
+                m_writer.WriteLine(indent, "device %s %s %s",  buffer->bufferStruct->name, isRef ? "&" : "*", buffer->name);
             }
             
             m_writer.EndLine(";");
diff --git a/hlslparser/src/MSLGenerator.h b/hlslparser/src/MSLGenerator.h
index b70e2388..5e5b9d10 100644
--- a/hlslparser/src/MSLGenerator.h
+++ b/hlslparser/src/MSLGenerator.h
@@ -59,11 +59,12 @@ class MSLGenerator
         HLSLType type;
         //const char* typeName;     // @@ Do we need more than the type name?
         const char* registerName;
-
+        bool isRef;
+        
         ClassArgument * nextArg;
         
-        ClassArgument(const char* name, HLSLType type, const char * registerName) :
-            name(name), type(type), registerName(registerName)
+        ClassArgument(const char* name, HLSLType type, const char * registerName, bool isRef) :
+            name(name), type(type), registerName(registerName), isRef(isRef)
 		{
 			nextArg = NULL;
 		}

From 0ec7640c1c162035e1a6ea50eebbc01ddc7e465a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 13 Mar 2023 22:46:42 -0700
Subject: [PATCH 467/901] kram - hlslparser update

add compute shader support.  Don't yet have specification of numthreads.  So that's hardcoded to 1,1,1
---
 hlslparser/README.md             | 39 +++++++++++++++++++-------------
 hlslparser/buildShaders.sh       | 12 +++-------
 hlslparser/shaders/Compute.hlsl  | 18 ++++++---------
 hlslparser/src/HLSLGenerator.cpp | 16 +++++++++++++
 hlslparser/src/HLSLParser.cpp    |  7 +++++-
 hlslparser/src/MSLGenerator.cpp  | 11 +++++++++
 6 files changed, 66 insertions(+), 37 deletions(-)

diff --git a/hlslparser/README.md b/hlslparser/README.md
index 4cc62d36..e9a3b017 100644
--- a/hlslparser/README.md
+++ b/hlslparser/README.md
@@ -1,41 +1,48 @@
 HLSLParser
 ==========
 
-This version of thekla/hlslparser takes a HLSL-like syntax and then converts that into modern HLSL and MSL.  Special thanks to Max McGuire (@Unknown Worlds) and Ignacio Castano and Johnathan Blow (@Thekla) for releasing this as open-source.  I've left out GLSL and DX9 legacy codegen to simplify maintaining the codebase.  This is a small amount of code compared with the Krhonos shader tools.
+This version of thekla/hlslparser takes a HLSL2021 syntax that then converts that into modern HLSL and MSL.  Special thanks to Max McGuire (@Unknown Worlds) and Ignacio Castano and Johnathan Blow (@Thekla) for releasing this as open-source.  I've left out GLSL and DX9 and FX legacy codegen to simplify maintaining the codebase.  This is a small amount of code compared with the Krhonos shader tools.
 
 There are still growing pains to using hlslparser.  It can't do all the manipulation that glsc and spriv-cross does to ensure that a valid shader model shader is created.  But compiling with DXC/metal should help avoid issues.  I still don't know how to resolve reflection, since each compiler generates it's own unique data formats.  Reflection is simpler on the spirv path.
 
-The point of this hlslparser is to preserve comments, generate MSL/HLSL code close to the original sources, and be easy to extend.  MSL and HLSL are nearly the same shader language at the core.  Typical spriv to MSL transpiles look  assembly-like in code flow.  Spriv-opt introduces 100's of temp registers into the code, gens 24 character floats, strips comments, and the resulting code isn't simple to step through in Metal GPU capture.  At the same time, Apple ignores generating Spirv from MSL, so here we are.  Spriv should remain a final assembly format to feed to Vulkan drivers.
+The point of this hlslparser is to preserve comments, generate MSL/HLSL code close to the original sources, and be easy to extend.  MSL and HLSL are nearly the same shader language at the core.  Typical spriv to MSL transpiles look assembly-like in code flow.  spriv-opt and spriv-cross introduces 100's of temp registers into the code, gens 24 character floats, strips comments, can't translate half samplers, and the resulting code isn't simple to step through in Metal GPU capture.  At the same time, Apple ignores generating Spirv from MSL, so here we are.  Spriv should remain a final assembly format to feed to Vulkan drivers.
 
 ---------------------------------
 
-Paths to generate MSL from HLSL 
+Paths to turn HLSL and SPV 
 
-* Spirv   HLSL9 > hlslparser > HLSL10 > DXC > spv  
-*  reflection                                 spv > spv-reflect -> ref
-* MSL     HLSL9 > hlslparser > MSL    > metal > air/metallib
+* HLSL2021 > hlslparser > HLSL2021 > DXC > spv  
+* HLSL2021 > hlslparser > MSL    > metal > air/metallib
 *
-* Transpiling MSL: HLSL10 > DXC   > spv > spirv-cross > MSL
-* Transpiling MSL: HLSL10 > glslc > spv > spirv-cross > MSL
+* Reflection: spv > spv-reflect -> refl
 *
-*  assumes DXC/glslc do optimizations similar to spirv-opt
+* Transpiling MSL: HLSL2021 > DXC   > spv > spirv-cross > MSL
+* Transpiling MSL: HLSL2021 > glslc > spv > spirv-cross > MSL (fails on simple HLSL)
 *
 * Variant generation 
-* HLSL9 > preprocess + defines > HLSL9
-*  or
-* HLSL9 + specialization > hlslparser
+* HLSL2021 + defines > preprocess > HLSL2021
+* HLSL2021 + specialization > hlslparser
 
 ---------------------------------
 
-TODO:
+DONE
+* u/int support
+* SSBO support
 * compute shader support
+* DX11/12 style syntax
+* chop out FX syntax
+* split out sampler / texture
+* handle depth textures 
+* compile HLSL with DXC to SPV
+* compile MSL with metalc to AIR/metallib
+
+TODO:
+* get tile shader kernels to work, may be MSL and Android SPV specific
 * generate reflection data from parse of HLSL
+* handle reflection (spirv-reflect?)
 * handle HLSL vulkan extension constructs, convert these to MSL kernels too
 * variants from preprocess or vulkan specialization constants
-* get tile shader kernels to work
-* get SSBO to work
 * fix shaders to not structify metal and mod the source names
-* handle reflection (spirv-reflect?)
 * poor syntax highlighting of output .metal file, does Xcode have to compile?
 * no syntax highlighting of .hlsl files in Xcode, but VSCode has HLSL but not MSL
 
diff --git a/hlslparser/buildShaders.sh b/hlslparser/buildShaders.sh
index 52b18c16..442b7ffc 100755
--- a/hlslparser/buildShaders.sh
+++ b/hlslparser/buildShaders.sh
@@ -97,12 +97,8 @@ if [[ $testMetal -eq 1 ]]; then
     # see if MSL compile
     echo compile MSL for macOS
     xcrun -sdk macosx metal ${dstDir}Skinning.metal ${metalMacOptions} -o mac/Skinning.metallib
-
     xcrun -sdk macosx metal ${dstDir}Sample.metal ${metalMacOptions} -o mac/Sample.metallib
-
-    
-    # this isn't going to compile yet
-    # xcrun -sdk macosx metal ${dstDir}Compute.metal ${metalMacOptions} -o mac/Compute.metallib
+    xcrun -sdk macosx metal ${dstDir}Compute.metal ${metalMacOptions} -o mac/Compute.metallib
 
     # metaliosOptions="-frecord-sources -g "
     # metaliosOptions+="-std=ios-metal2.3 "
@@ -175,15 +171,13 @@ csargs+="-T cs_6_6 "
 echo gen SPIRV 1.2 with dxc
 ${appDxc} ${vsargs} -spirv -fspv-target-env=vulkan1.2 -E SkinningVS -Fo android/Skinning.vert.spv -Fc android/Skinning.vert.spv.txt ${dstDir}Skinning.hlsl
 ${appDxc} ${psargs} -spirv -fspv-target-env=vulkan1.2 -E SkinningPS -Fo android/Skinning.frag.spv -Fc android/Skinning.frag.spv.txt ${dstDir}Skinning.hlsl
+${appDxc} ${csargs} -spirv -fspv-target-env=vulkan1.2 -E ComputeCS -Fo android/Compute.comp.spv -Fc android/Compute.frag.spv.txt ${dstDir}Compute.hlsl
 
 # -Fre not supported with spriv, so just use spirv-reflect
 # either yaml or random format, why can't this just output json?
 ${appSprivReflect} -y android/Skinning.vert.spv > android/Skinning.vert.refl
 ${appSprivReflect} -y android/Skinning.frag.spv > android/Skinning.frag.refl
-
-# TODO: support compute too
-#${appDxc} ${csargs} -spirv -fspv-target-env=vulkan1.2 -E ComputeCS -Fo android/Compute.comp.spv -Fc android/Compute.frag.spv.txt ${dstDir}Compute.hlsl
-#${appSprivReflect} -y android/Compute.comp.spv > android/Compute.comp.refl
+${appSprivReflect} -y android/Compute.comp.spv > android/Compute.comp.refl
 
 # skip this path, have to mod hlsl just to get valid code to compile with glslc
 testGlslc=0
diff --git a/hlslparser/shaders/Compute.hlsl b/hlslparser/shaders/Compute.hlsl
index 0b62a993..bd8a8d21 100644
--- a/hlslparser/shaders/Compute.hlsl
+++ b/hlslparser/shaders/Compute.hlsl
@@ -16,25 +16,21 @@ struct BufType
 
 StructuredBuffer<BufType> Buffer0 : register(t0);
 StructuredBuffer<BufType> Buffer1 : register(t1);
-RWStructuredBuffer<BufType> BufferOut : register(u0);
+
+RWStructuredBuffer<BufType> BufferOut : register(u2);
 
 // TODO: support numthreads designator
 // [numthreads(1, 1, 1)]
-void ComputeCS( uint3 DTid : SV_DispatchThreadID )
+void ComputeCS( uint3 tid : SV_DispatchThreadID )
 {
-    // TODO: Need array notation support on buffers like those above.
-    
-    //BufferOut[DTid.x].i = Buffer0[DTid.x].i + Buffer1[DTid.x].i;
-    //BufferOut[DTid.x].f = Buffer0[DTid.x].f + Buffer1[DTid.x].f;
+    BufferOut[tid.x].i = Buffer0[tid.x].i + Buffer1[tid.x].i;
+    BufferOut[tid.x].f = Buffer0[tid.x].f + Buffer1[tid.x].f;
 }
 
 //-------------------
 
-// Can't this have type?
-// ByteAddressBuffer Buffer0 : register(t0);
-// ByteAddressBuffer Buffer1 : register(t1);
-// RWByteAddressBuffer BufferOut : register(u0);
-//
+// Need better way to search entry points, don't use filename
+// Just search functions ending in VS/PS/CS
 // [numthreads(1, 1, 1)]
 // void ComputeCS( uint3 DTid : SV_DispatchThreadID )
 // {
diff --git a/hlslparser/src/HLSLGenerator.cpp b/hlslparser/src/HLSLGenerator.cpp
index dc292e98..92025f60 100644
--- a/hlslparser/src/HLSLGenerator.cpp
+++ b/hlslparser/src/HLSLGenerator.cpp
@@ -160,6 +160,17 @@ static const char * TranslateSemantic(const char* semantic, bool output, HLSLTar
 
         }
     }
+    else if (target == HLSLTarget_ComputeShader)
+    {
+        if (output)
+        {
+
+        }
+        else
+        {
+
+        }
+    }
     return NULL;
 }
 
@@ -715,6 +726,11 @@ void HLSLGenerator::OutputStatements(int indent, HLSLStatement* statement)
             const char* functionName   = function->name;
             const char* returnTypeName = GetTypeName(function->returnType);
 
+            // TODO: hack, since don't actually parse bracket construct yet
+            bool isEntryPoint = strcmp(functionName, m_entryName) == 0;
+            if (isEntryPoint && m_target == HLSLTarget_ComputeShader)
+                m_writer.WriteLine(indent, "[numthreads(1,1,1)]");
+            
             m_writer.BeginLine(indent, function->fileName, function->line);
             m_writer.Write("%s %s(", returnTypeName, functionName);
 
diff --git a/hlslparser/src/HLSLParser.cpp b/hlslparser/src/HLSLParser.cpp
index 18a8b5b3..c9775bcc 100644
--- a/hlslparser/src/HLSLParser.cpp
+++ b/hlslparser/src/HLSLParser.cpp
@@ -2765,6 +2765,7 @@ bool HLSLParser::ParseTerminalExpression(HLSLExpression*& expression, bool& need
             {
                 // some buffer types (!IsGlobalFields) have array notation
                 arrayAccess->expressionType.baseType = HLSLBaseType_UserDefined;
+                arrayAccess->expressionType.typeName = expression->expressionType.typeName;
                 arrayAccess->expressionType.array     = true;
                 arrayAccess->expressionType.arraySize = NULL;
                 
@@ -4193,7 +4194,9 @@ bool HLSLParser::GetMemberType(const HLSLType& objectType, HLSLMemberAccess * me
     {
         const HLSLStruct* structure = FindUserDefinedType( objectType.typeName );
         ASSERT(structure != NULL);
-
+        if (structure == NULL)
+            return false;
+        
         const HLSLStructField* field = structure->field;
         while (field != NULL)
         {
@@ -4229,6 +4232,8 @@ bool HLSLParser::GetMemberType(const HLSLType& objectType, HLSLMemberAccess * me
             ++swizzleLength;
         }
         ASSERT(swizzleLength > 0);
+        if (swizzleLength == 0)
+            return false;
     }
     else if (IsMatrixType(baseType))
     {
diff --git a/hlslparser/src/MSLGenerator.cpp b/hlslparser/src/MSLGenerator.cpp
index e58a24b7..7c7e4a41 100644
--- a/hlslparser/src/MSLGenerator.cpp
+++ b/hlslparser/src/MSLGenerator.cpp
@@ -509,6 +509,11 @@ namespace M4
         // @@ Add/Translate function attributes.
         // entryFunction->attributes
 
+        // TODO: hack, since don't actually parse bracket construct yet
+        // MSL doesn't seem to have this, set from code?
+        // if (m_target == HLSLTarget_ComputeShader)
+        //     m_writer.WriteLine(indent, "[numthreads(1,1,1)]");
+        
         switch(m_target)
         {
             case HLSLTarget_VertexShader:   m_writer.Write("vertex "); break;
@@ -575,6 +580,9 @@ namespace M4
         if (argumentCount && currentArg != NULL)
         {
             m_writer.Write(", ");
+            
+            indent = m_writer.EndLine();
+            m_writer.BeginLine(indent);
         }
         while (currentArg != NULL)
         {
@@ -619,6 +627,9 @@ namespace M4
                 if (currentArg)
                 {
                     m_writer.Write(", ");
+                    
+                    indent = m_writer.EndLine();
+                    m_writer.BeginLine(indent);
                 }
             }
 

From 6046f3b8a5a956f6626126c092598e9e9df1a0fc Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 13 Mar 2023 22:57:28 -0700
Subject: [PATCH 468/901] kram - hislparser build win

---
 scripts/cibuild.sh | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/scripts/cibuild.sh b/scripts/cibuild.sh
index 4b0df4e8..8edc19fc 100755
--- a/scripts/cibuild.sh
+++ b/scripts/cibuild.sh
@@ -86,8 +86,8 @@ elif [[ $buildType == windows ]]; then
 
 	pushd build
 
-	#cmake .. -G "Visual Studio 15 2017 Win64"
-	cmake .. -G "Visual Studio 16 2019" -A x64
+	# TODO: update to VS2022
+	make .. -G "Visual Studio 16 2019" -A x64
 
 	# build the release build
 	cmake --build . --config Release
@@ -97,12 +97,11 @@ elif [[ $buildType == windows ]]; then
 
 	popd
 
-	# build hlslparser to bin directory
-	#pushd hlslparser
-	# TODO: run the sln file to build Win version of hlslparser
-	#msbuild hlslparser.sln -target:NotInSlnfolder:Rebuild;NewFolder\InSolutionFolder:Clean
-	#popd
-
+	# build hlslparser (release) to bin folder
+	pushd hlslparser
+	msbuild hlslparser.sln /p:OutputPath=${binHolderPath}
+	popd
+	
 elif [[ $buildType == linux ]]; then
 	mkdir -p build
 

From 678893eed345cee564726c95399de20f1950a019 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 13 Mar 2023 23:31:55 -0700
Subject: [PATCH 469/901] kram - hlslparser win build

Added to cmake.  Can't get msbuild to run from shell script.
---
 CMakeLists.txt            |   8 +++
 hlslparser/CMakeLists.txt | 146 ++++++++++++++++++++++++++++++++++++++
 scripts/cibuild.sh        |  10 ++-
 3 files changed, 161 insertions(+), 3 deletions(-)
 create mode 100644 hlslparser/CMakeLists.txt

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 59baebc5..d2bc67d5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -159,5 +159,13 @@ endif()
 #    install(TARGETS kram-ps BUNDLE DESTINATION ${BIN_DIR})
 #endif()
 
+#-----------------------------------------------------
+
+# hlslparser is also now in the kram build.  Keep executables up to date.
+# I would use the sln file, but msbuild doesn't like to be called from cibuld.sh
+if (BUILD_WIN)
+    install(TARGETS hlslparser ARCHIVE DESTINATION ${BIN_DIR})
+endif()
+
 
 
diff --git a/hlslparser/CMakeLists.txt b/hlslparser/CMakeLists.txt
new file mode 100644
index 00000000..c9420031
--- /dev/null
+++ b/hlslparser/CMakeLists.txt
@@ -0,0 +1,146 @@
+#cmake_minimum_required(VERSION 3.19.1 FATAL_ERROR)
+
+#-----------------------------------------------------
+
+set(BUILD_MAC FALSE)
+set(BUILD_WIN FALSE)
+
+if (APPLE)
+    if (CMAKE_SYSTEM_NAME STREQUAL "iOS")
+        message("build for iOS")
+        set(BUILD_IOS TRUE)
+    else()
+        message("build for macOS")
+        set(BUILD_MAC TRUE)
+    endif()
+elseif (WIN32)
+    message("build for win x64")
+    set(BUILD_WIN TRUE)
+endif()
+
+#-----------------------------------------------------
+# hlslparser 
+
+# now setup the app project
+set(myTargetApp hlslparser)
+
+# not using version in cmake anymore, this is pulled in by KramVersion.h
+if (BUILD_MAC)
+    project(${myTargetApp} LANGUAGES C CXX OBJCXX)
+elseif (BUILD_WIN)
+    project(${myTargetApp} LANGUAGES C CXX)
+endif()
+
+# **** this the executable target ****, for a CLI App
+add_executable(${myTargetApp})
+
+#-----------------------------------------------------
+    
+if (BUILD_MAC)
+    # ate is a macOS/iOS only library, and it varies in encode support by OS revision
+    #target_link_libraries(${myTargetApp}
+    #    ate
+    #    libkram)
+
+    set_target_properties(${myTargetApp} PROPERTIES
+         # Note: match this up with CXX version
+        # c++11 min
+        XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD "c++14"
+        XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++"
+
+        # removed this in case run on Rosetta.  Not sure if this is Intel/Arm64 build.
+        # this is Intel specific, and no settings for Neon
+        # avx1
+        #XCODE_ATTRIBUTE_CLANG_X86_VECTOR_INSTRUCTIONS "avx"
+        
+        # turn off exceptions/rtti
+        XCODE_ATTRIBUTE_GCC_ENABLE_CPP_EXCEPTIONS NO
+        XCODE_ATTRIBUTE_GCC_ENABLE_CPP_RTTI NO
+    
+        # can't believe this isn't on by default in CMAKE
+        XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC YES
+    
+        #-------------------------
+        
+        # set debug style for apps
+        XCODE_ATTRIBUTE_DEBUG_INFORMATION_FORMAT "dwarf-with-dsym"
+        XCODE_ATTRIBUTE_ONLY_ACTIVE_ARCH "NO"
+    
+        # this drops app from 762KB to 174KB with only ATE enabled
+        # note about needing -gfull instead of -gused here or debug info messed up:
+        # https://gist.github.com/tkersey/39b4fe69e14b859889ffadccb009e397
+        XCODE_ATTRIBUTE_DEAD_CODE_STRIPPING YES
+        XCODE_ATTRIBUTE_LLVM_LTO[variant=Release] "Incremental"
+    
+        #-------------------------
+        # for now disable signing, and just "sign to run locally"
+        XCODE_ATTRIBUTE_PRODUCT_BUNDLE_IDENTIFIER "com.ba.hlslparser"
+        XCODE_ATTRIBUTE_CODE_SIGNING_REQUIRED "NO"
+        XCODE_ATTRIBUTE_CODE_SIGN_IDENTITY ""
+    )
+
+    target_compile_options(${myTargetApp} PRIVATE -W -Wall)
+
+elseif (BUILD_WIN)
+    #target_link_libraries(${myTargetApp} libkram)
+
+    # When Win rebuilds library, it doesn't relink app to correct code when you
+    # build the app target project.  Breakpoints stop working after any library source edit,
+    # and you have to rebuild solution to get the correct code to exectute.  Since 2014.  Try this.
+    # And BUILD_ALL never launches properly.
+    # https://cmake.org/pipermail/cmake/2014-October/058798.html
+    SET(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR})
+    SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR})
+
+    # TODO: switch to add_target_definitions
+
+    # to turn off exceptions/rtti use /GR and /EHsc replacement
+    string(REGEX REPLACE "/GR" "/GR-" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+    string(REGEX REPLACE "/EHsc" "/EHs-c-" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+
+    # don't need force with apps, since they only access kram folder files which include KramConfig
+    
+    # all warnings, AVX1, and multiprocess compiles
+    target_compile_options(${myTargetApp} PRIVATE /W3 /arch:AVX /MP /GF /FC)
+    
+    # fix STL
+    target_compile_definitions(${myTargetApp} PRIVATE "-D_D_HAS_EXCEPTIONS=0 -D_ITERATOR_DEBUG_LEVEL=0")
+    
+    if (CMAKE_BUILD_TYPE EQUAL "Debug")
+        target_compile_definitions(${myTargetLib} PRIVATE "/INCREMENTAL")
+        
+    elseif (CMAKE_BUILD_TYPE EQUAL "Release")
+        # only dead strip on Release builds since this disables Incremental linking, may want Profile build that doesn't use this
+        target_compile_definitions(${myTargetLib} PRIVATE "/OPT:REF")
+        
+        # other possibliities
+        # /GL - whole program optimization
+        # /Gy - edit and continue with function level linking
+        # /Oi - enable intrinsic functions
+    
+    endif()
+    
+elseif (BUILD_UNIX)
+    #target_link_libraries(${myTargetApp} libkram)
+
+    # TODO: finish this
+    
+endif()
+
+
+#------------------
+ 
+file(GLOB_RECURSE appSources CONFIGURE_DEPENDS
+    "${PROJECT_SOURCE_DIR}/*.cpp"
+    "${PROJECT_SOURCE_DIR}/*.h"
+)
+
+source_group(TREE "${PROJECT_SOURCE_DIR}" PREFIX "source" FILES ${appSources})
+
+target_include_directories(${myTargetApp} PRIVATE
+    "${PROJECT_SOURCE_DIR}"
+)
+
+target_sources(${myTargetApp} PRIVATE ${appSources})
+
+
diff --git a/scripts/cibuild.sh b/scripts/cibuild.sh
index 8edc19fc..dfcbc0bd 100755
--- a/scripts/cibuild.sh
+++ b/scripts/cibuild.sh
@@ -97,10 +97,14 @@ elif [[ $buildType == windows ]]; then
 
 	popd
 
+	# mingW doesn't like running this Win style command line
+	# see here for another method https://github.com/microsoft/setup-msbuild
+	# just added the parser to cmake
+	#
 	# build hlslparser (release) to bin folder
-	pushd hlslparser
-	msbuild hlslparser.sln /p:OutputPath=${binHolderPath}
-	popd
+	#pushd hlslparser
+	#msbuild hlslparser.sln /p:OutputPath=${binHolderPath}
+	#popd
 	
 elif [[ $buildType == linux ]]; then
 	mkdir -p build

From e8e62d75cc62febc96f2bb2ea3b25fb8153eec84 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 13 Mar 2023 23:35:29 -0700
Subject: [PATCH 470/901] kram - fix cibuild.sh

---
 scripts/cibuild.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/cibuild.sh b/scripts/cibuild.sh
index dfcbc0bd..ba269dc7 100755
--- a/scripts/cibuild.sh
+++ b/scripts/cibuild.sh
@@ -87,7 +87,7 @@ elif [[ $buildType == windows ]]; then
 	pushd build
 
 	# TODO: update to VS2022
-	make .. -G "Visual Studio 16 2019" -A x64
+	cmake .. -G "Visual Studio 16 2019" -A x64
 
 	# build the release build
 	cmake --build . --config Release

From c0ec73421421cd8994d6f6793ae6c25c1a757c6b Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 13 Mar 2023 23:43:21 -0700
Subject: [PATCH 471/901] kram - hlslparser win build

adding to kram cmake
---
 CMakeLists.txt | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index d2bc67d5..82bae726 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -161,6 +161,9 @@ endif()
 
 #-----------------------------------------------------
 
+# hack hlslparser for win build into kram for now
+add_subdirectory(hlslparser)
+
 # hlslparser is also now in the kram build.  Keep executables up to date.
 # I would use the sln file, but msbuild doesn't like to be called from cibuld.sh
 if (BUILD_WIN)

From b50a25edb75898abfdab258a64d73e6d80f9b8a4 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 13 Mar 2023 23:58:04 -0700
Subject: [PATCH 472/901] kram - hlslparser win build

---
 hlslparser/src/GLSLGenerator.cpp | 1920 ------------------------------
 hlslparser/src/GLSLGenerator.h   |  164 ---
 hlslparser/src/HLSLTokenizer.cpp |    5 +
 3 files changed, 5 insertions(+), 2084 deletions(-)
 delete mode 100644 hlslparser/src/GLSLGenerator.cpp
 delete mode 100644 hlslparser/src/GLSLGenerator.h

diff --git a/hlslparser/src/GLSLGenerator.cpp b/hlslparser/src/GLSLGenerator.cpp
deleted file mode 100644
index 49562143..00000000
--- a/hlslparser/src/GLSLGenerator.cpp
+++ /dev/null
@@ -1,1920 +0,0 @@
-//=============================================================================
-//
-// Render/GLSLGenerator.cpp
-//
-// Created by Max McGuire (max@unknownworlds.com)
-// Copyright (c) 2013, Unknown Worlds Entertainment, Inc.
-//
-//=============================================================================
-
-#include "GLSLGenerator.h"
-#include "HLSLParser.h"
-#include "HLSLTree.h"
-
-//#include "Engine/String.h"
-//#include "Engine/Log.h"
-#include "Engine.h"
-
-#include <stdarg.h>
-#include <string.h>
-
-namespace M4
-{
-
-static const HLSLType kFloatType(HLSLBaseType_Float);
-static const HLSLType kUintType(HLSLBaseType_Uint);
-static const HLSLType kIntType(HLSLBaseType_Int);
-static const HLSLType kBoolType(HLSLBaseType_Bool);
-
-// These are reserved words in GLSL that aren't reserved in HLSL.
-const char* GLSLGenerator::s_reservedWord[] =
-    {
-        "output",
-        "input",
-        "mod",
-        "mix",
-        "fract",
-        "dFdx",
-        "dFdy",
-    };
-
-static const char* GetTypeName(const HLSLType& type)
-{
-    switch (type.baseType)
-    {
-    case HLSLBaseType_Void:         return "void";
-    case HLSLBaseType_Float:        return "float";
-    case HLSLBaseType_Float2:       return "vec2";
-    case HLSLBaseType_Float3:       return "vec3";
-    case HLSLBaseType_Float4:       return "vec4";
-	case HLSLBaseType_Float2x2:     return "mat2";
-    case HLSLBaseType_Float3x3:     return "mat3";
-    case HLSLBaseType_Float4x4:     return "mat4";
-    case HLSLBaseType_Half:         return "float";
-    case HLSLBaseType_Half2:        return "vec2";
-    case HLSLBaseType_Half3:        return "vec3";
-    case HLSLBaseType_Half4:        return "vec4";
-	case HLSLBaseType_Half2x2:      return "mat2";
-    case HLSLBaseType_Half3x3:      return "mat3";
-    case HLSLBaseType_Half4x4:      return "mat4";
-    case HLSLBaseType_Bool:         return "bool";
-	case HLSLBaseType_Bool2:        return "bvec2";
-	case HLSLBaseType_Bool3:        return "bvec3";
-	case HLSLBaseType_Bool4:        return "bvec4";
-    case HLSLBaseType_Int:          return "int";
-    case HLSLBaseType_Int2:         return "ivec2";
-    case HLSLBaseType_Int3:         return "ivec3";
-    case HLSLBaseType_Int4:         return "ivec4";
-    case HLSLBaseType_Uint:         return "uint";
-    case HLSLBaseType_Uint2:        return "uvec2";
-    case HLSLBaseType_Uint3:        return "uvec3";
-    case HLSLBaseType_Uint4:        return "uvec4";
-    case HLSLBaseType_Texture:      return "texture";
-    case HLSLBaseType_Sampler:      return "sampler";
-    case HLSLBaseType_Sampler2D:    return "sampler2D";
-    case HLSLBaseType_Sampler3D:    return "sampler3D";
-    case HLSLBaseType_SamplerCube:  return "samplerCube";
-    case HLSLBaseType_Sampler2DMS:  return "sampler2DMS";
-    case HLSLBaseType_Sampler2DArray:  return "sampler2DArray";
-    case HLSLBaseType_UserDefined:  return type.typeName;
-    default: return "?";
-    }
-}
-
-static bool GetCanImplicitCast(const HLSLType& srcType, const HLSLType& dstType)
-{
-    return srcType.baseType == dstType.baseType;
-}
-
-static int GetFunctionArguments(HLSLFunctionCall* functionCall, HLSLExpression* expression[], int maxArguments)
-{
-    HLSLExpression* argument = functionCall->argument;
-    int numArguments = 0;
-    while (argument != NULL)
-    {
-        if (numArguments < maxArguments)
-        {
-            expression[numArguments] = argument;
-        }
-        argument = argument->nextExpression;
-        ++numArguments;
-    }
-    return numArguments;
-}
-
-GLSLGenerator::GLSLGenerator() :
-    m_writer(/* writeFileNames= */ false)
-{
-    m_tree                      = NULL;
-    m_entryName                 = NULL;
-    m_target                    = Target_VertexShader;
-    m_version                   = Version_140;
-    m_versionLegacy             = false;
-    m_inAttribPrefix            = NULL;
-    m_outAttribPrefix           = NULL;
-    m_error                     = false;
-    m_matrixRowFunction[0]      = 0;
-    m_matrixCtorFunction[0]     = 0;
-    m_matrixMulFunction[0]      = 0;
-    m_clipFunction[0]           = 0;
-    m_tex2DlodFunction[0]       = 0;
-    m_tex2DbiasFunction[0]      = 0;
-    m_tex3DlodFunction[0]       = 0;
-    m_texCUBEbiasFunction[0]    = 0;
-	m_texCUBElodFunction[ 0 ] 	= 0;
-    m_scalarSwizzle2Function[0] = 0;
-    m_scalarSwizzle3Function[0] = 0;
-    m_scalarSwizzle4Function[0] = 0;
-    m_sinCosFunction[0]         = 0;
-	m_bvecTernary[ 0 ]			= 0;
-    m_outputPosition            = false;
-    m_outputTargets             = 0;
-}
-
-bool GLSLGenerator::Generate(HLSLTree* tree, Target target, Version version, const char* entryName, const Options& options)
-{
-
-    m_tree      = tree;
-    m_entryName = entryName;
-    m_target    = target;
-    m_version   = version;
-    m_versionLegacy = (version == Version_110 || version == Version_100_ES);
-    m_options   = options;
-
-    ChooseUniqueName("matrix_row", m_matrixRowFunction, sizeof(m_matrixRowFunction));
-    ChooseUniqueName("matrix_ctor", m_matrixCtorFunction, sizeof(m_matrixCtorFunction));
-    ChooseUniqueName("matrix_mul", m_matrixMulFunction, sizeof(m_matrixMulFunction));
-    ChooseUniqueName("clip", m_clipFunction, sizeof(m_clipFunction));
-    ChooseUniqueName("tex2Dlod", m_tex2DlodFunction, sizeof(m_tex2DlodFunction));
-    ChooseUniqueName("tex2Dbias", m_tex2DbiasFunction, sizeof(m_tex2DbiasFunction));
-    ChooseUniqueName("tex2Dgrad", m_tex2DgradFunction, sizeof(m_tex2DgradFunction));
-    ChooseUniqueName("tex3Dlod", m_tex3DlodFunction, sizeof(m_tex3DlodFunction));
-    ChooseUniqueName("texCUBEbias", m_texCUBEbiasFunction, sizeof(m_texCUBEbiasFunction));
-	ChooseUniqueName( "texCUBElod", m_texCUBElodFunction, sizeof( m_texCUBElodFunction ) );
-
-    for (int i = 0; i < s_numReservedWords; ++i)
-    {
-        ChooseUniqueName( s_reservedWord[i], m_reservedWord[i], sizeof(m_reservedWord[i]) );
-    }
-
-    ChooseUniqueName("m_scalar_swizzle2", m_scalarSwizzle2Function, sizeof(m_scalarSwizzle2Function));
-    ChooseUniqueName("m_scalar_swizzle3", m_scalarSwizzle3Function, sizeof(m_scalarSwizzle3Function));
-    ChooseUniqueName("m_scalar_swizzle4", m_scalarSwizzle4Function, sizeof(m_scalarSwizzle4Function));
-
-    ChooseUniqueName("sincos", m_sinCosFunction, sizeof(m_sinCosFunction));
-
-	ChooseUniqueName( "bvecTernary", m_bvecTernary, sizeof( m_bvecTernary ) );
-
-    if (target == Target_VertexShader)
-    {
-        m_inAttribPrefix  = "";
-        m_outAttribPrefix = "frag_";
-    }
-    else
-    {
-        m_inAttribPrefix  = "frag_";
-        m_outAttribPrefix = "rast_";
-    }
-
-    HLSLRoot* root = m_tree->GetRoot();
-    HLSLStatement* statement = root->statement;
-
-    // Find the entry point function.
-    HLSLFunction* entryFunction = FindFunction(root, m_entryName);
-    if (entryFunction == NULL)
-    {
-        Error("Entry point '%s' doesn't exist", m_entryName);
-        return false;
-    }
-
-    if (m_version == Version_110)
-    {
-        m_writer.WriteLine(0, "#version 110");
-    }
-    else if (m_version == Version_140)
-    {
-        m_writer.WriteLine(0, "#version 140");
-
-        // Pragmas for NVIDIA.
-        m_writer.WriteLine(0, "#pragma optionNV(fastmath on)");
-        //m_writer.WriteLine(0, "#pragma optionNV(fastprecision on)");
-        m_writer.WriteLine(0, "#pragma optionNV(ifcvt none)");
-        m_writer.WriteLine(0, "#pragma optionNV(inline all)");
-        m_writer.WriteLine(0, "#pragma optionNV(strict on)");
-        m_writer.WriteLine(0, "#pragma optionNV(unroll all)");
-    }
-    else if (m_version == Version_150)
-    {
-        m_writer.WriteLine(0, "#version 150");
-    }
-    else if (m_version == Version_100_ES)
-    {
-        m_writer.WriteLine(0, "#version 100");
-        m_writer.WriteLine(0, "precision highp float;");
-    }
-    else if (m_version == Version_300_ES)
-    {
-        m_writer.WriteLine(0, "#version 300 es");
-        m_writer.WriteLine(0, "precision highp float;");
-    }
-    else
-    {
-        Error("Unrecognized target version");
-        return false;
-    }
-
-    // Output the special function used to access rows in a matrix.
-    m_writer.WriteLine(0, "vec3 %s(mat3 m, int i) { return vec3( m[0][i], m[1][i], m[2][i] ); }", m_matrixRowFunction);
-    m_writer.WriteLine(0, "vec4 %s(mat4 m, int i) { return vec4( m[0][i], m[1][i], m[2][i], m[3][i] ); }", m_matrixRowFunction);
-
-    // Output the special function used to do matrix cast for OpenGL 2.0
-    if (m_version == Version_110)
-    {
-        m_writer.WriteLine(0, "mat3 %s(mat4 m) { return mat3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2]); }", m_matrixCtorFunction);
-    }
-
-    // Output the special functions used for matrix multiplication lowering
-    // They make sure glsl-optimizer can fold expressions better
-    if (m_tree->NeedsFunction("mul") && (m_options.flags & Flag_LowerMatrixMultiplication))
-    {
-        m_writer.WriteLine(0, "vec2 %s(mat2 m, vec2 v) { return m[0] * v.x + m[1] * v.y; }", m_matrixMulFunction);
-        m_writer.WriteLine(0, "vec2 %s(vec2 v, mat2 m) { return vec2(dot(m[0], v), dot(m[1], v)); }", m_matrixMulFunction);
-        m_writer.WriteLine(0, "vec3 %s(mat3 m, vec3 v) { return m[0] * v.x + m[1] * v.y + m[2] * v.z; }", m_matrixMulFunction);
-        m_writer.WriteLine(0, "vec3 %s(vec3 v, mat3 m) { return vec3(dot(m[0], v), dot(m[1], v), dot(m[2], v)); }", m_matrixMulFunction);
-        m_writer.WriteLine(0, "vec4 %s(mat4 m, vec4 v) { return m[0] * v.x + m[1] * v.y + m[2] * v.z + m[3] * v.w; }", m_matrixMulFunction);
-        m_writer.WriteLine(0, "vec4 %s(vec4 v, mat4 m) { return vec4(dot(m[0], v), dot(m[1], v), dot(m[2], v), dot(m[3], v)); }", m_matrixMulFunction);
-    }
-
-    // Output the special function used to emulate HLSL clip.
-    if (m_tree->NeedsFunction("clip"))
-    {
-        const char* discard = m_target == Target_FragmentShader ? "discard" : "";
-        m_writer.WriteLine(0, "void %s(float x) { if (x < 0.0) %s;  }", m_clipFunction, discard);
-        m_writer.WriteLine(0, "void %s(vec2  x) { if (any(lessThan(x, vec2(0.0, 0.0)))) %s;  }", m_clipFunction, discard);
-        m_writer.WriteLine(0, "void %s(vec3  x) { if (any(lessThan(x, vec3(0.0, 0.0, 0.0)))) %s;  }", m_clipFunction, discard);
-        m_writer.WriteLine(0, "void %s(vec4  x) { if (any(lessThan(x, vec4(0.0, 0.0, 0.0, 0.0)))) %s;  }", m_clipFunction, discard);
-    }
-
-    // Output the special function used to emulate tex2Dlod.
-    if (m_tree->NeedsFunction("tex2Dlod"))
-    {
-        const char* function = "textureLod";
-
-        if (m_version == Version_110)
-        {
-            m_writer.WriteLine(0, "#extension GL_ARB_shader_texture_lod : require");
-            function = "texture2DLod";
-        }
-        else if (m_version == Version_100_ES)
-        {
-            m_writer.WriteLine(0, "#extension GL_EXT_shader_texture_lod : require");
-            function = "texture2DLodEXT";
-        }
-
-        m_writer.WriteLine(0, "vec4 %s(sampler2D samp, vec4 texCoord) { return %s(samp, texCoord.xy, texCoord.w);  }", m_tex2DlodFunction, function);
-    }
-
-    // Output the special function used to emulate tex2Dgrad.
-    if (m_tree->NeedsFunction("tex2Dgrad"))
-    {
-        const char* function = "textureGrad";
-
-        if (m_version == Version_110)
-        {
-            m_writer.WriteLine(0, "#extension GL_ARB_shader_texture_lod : require");
-            function = "texture2DGradARB";
-        }
-        else if (m_version == Version_100_ES)
-        {
-            m_writer.WriteLine(0, "#extension GL_EXT_shader_texture_lod : require");
-            function = "texture2DGradEXT";
-        }
-
-        m_writer.WriteLine(0, "vec4 %s(sampler2D samp, vec2 texCoord, vec2 dx, vec2 dy) { return %s(samp, texCoord, dx, dy);  }", m_tex2DgradFunction, function);
-    }
-
-    // Output the special function used to emulate tex2Dbias.
-    if (m_tree->NeedsFunction("tex2Dbias"))
-    {
-        if (target == Target_FragmentShader)
-        {
-            m_writer.WriteLine(0, "vec4 %s(sampler2D samp, vec4 texCoord) { return %s(samp, texCoord.xy, texCoord.w);  }", m_tex2DbiasFunction, m_versionLegacy ? "texture2D" : "texture" );
-        }
-        else
-        {
-            // Bias value is not supported in vertex shader.
-            m_writer.WriteLine(0, "vec4 %s(sampler2D samp, vec4 texCoord) { return texture(samp, texCoord.xy);  }", m_tex2DbiasFunction );
-        }
-    }
-
-    // Output the special function used to emulate tex2DMSfetch.
-    if (m_tree->NeedsFunction("tex2DMSfetch"))
-    {
-        m_writer.WriteLine(0, "vec4 tex2DMSfetch(sampler2DMS samp, ivec2 texCoord, int sample) {");
-        m_writer.WriteLine(1, "return texelFetch(samp, texCoord, sample);");
-        m_writer.WriteLine(0, "}");
-    }
-
-    // Output the special function used to emulate tex3Dlod.
-    if (m_tree->NeedsFunction("tex3Dlod"))
-    {
-        m_writer.WriteLine(0, "vec4 %s(sampler3D samp, vec4 texCoord) { return %s(samp, texCoord.xyz, texCoord.w);  }", m_tex3DlodFunction, m_versionLegacy ? "texture3D" : "texture" );
-    }
-
-    // Output the special function used to emulate texCUBEbias.
-    if (m_tree->NeedsFunction("texCUBEbias"))
-    {
-        if (target == Target_FragmentShader)
-        {
-            m_writer.WriteLine(0, "vec4 %s(samplerCube samp, vec4 texCoord) { return %s(samp, texCoord.xyz, texCoord.w);  }", m_texCUBEbiasFunction, m_versionLegacy ? "textureCube" : "texture" );
-        }
-        else
-        {
-            // Bias value is not supported in vertex shader.
-            m_writer.WriteLine(0, "vec4 %s(samplerCube samp, vec4 texCoord) { return texture(samp, texCoord.xyz);  }", m_texCUBEbiasFunction );
-        }
-    }
-
-	// Output the special function used to emulate texCUBElod
-	if (m_tree->NeedsFunction("texCUBElod"))
-	{
-        const char* function = "textureLod";
-
-        if (m_version == Version_110)
-        {
-            m_writer.WriteLine(0, "#extension GL_ARB_shader_texture_lod : require");
-            function = "textureCubeLod";
-        }
-        else if (m_version == Version_100_ES)
-        {
-            m_writer.WriteLine(0, "#extension GL_EXT_shader_texture_lod : require");
-            function = "textureCubeLodEXT";
-        }
-
-		m_writer.WriteLine( 0, "vec4 %s(samplerCube samp, vec4 texCoord) { return %s(samp, texCoord.xyz, texCoord.w);  }", m_texCUBElodFunction, function);
-	}
-
-    m_writer.WriteLine(0, "vec2  %s(float x) { return  vec2(x, x); }", m_scalarSwizzle2Function);
-    m_writer.WriteLine(0, "ivec2 %s(int   x) { return ivec2(x, x); }", m_scalarSwizzle2Function);
-
-    m_writer.WriteLine(0, "vec3  %s(float x) { return  vec3(x, x, x); }", m_scalarSwizzle3Function);
-    m_writer.WriteLine(0, "ivec3 %s(int   x) { return ivec3(x, x, x); }", m_scalarSwizzle3Function);
-
-    m_writer.WriteLine(0, "vec4  %s(float x) { return  vec4(x, x, x, x); }", m_scalarSwizzle4Function);
-    m_writer.WriteLine(0, "ivec4 %s(int   x) { return ivec4(x, x, x, x); }", m_scalarSwizzle4Function);
-
-    if (!m_versionLegacy)
-    {
-        m_writer.WriteLine(0, "uvec2 %s(uint  x) { return uvec2(x, x); }", m_scalarSwizzle2Function);
-        m_writer.WriteLine(0, "uvec3 %s(uint  x) { return uvec3(x, x, x); }", m_scalarSwizzle3Function);
-        m_writer.WriteLine(0, "uvec4 %s(uint  x) { return uvec4(x, x, x, x); }", m_scalarSwizzle4Function);
-    }
-
-    if (m_tree->NeedsFunction("sincos"))
-    {
-        const char* floatTypes[] = { "float", "vec2", "vec3", "vec4" };
-        for (int i = 0; i < 4; ++i)
-        {
-            m_writer.WriteLine(0, "void %s(%s x, out %s s, out %s c) { s = sin(x); c = cos(x); }", m_sinCosFunction,
-                floatTypes[i], floatTypes[i], floatTypes[i]);
-        }
-    }
-
-	// special function to emulate ?: with bool{2,3,4} condition type
-	m_writer.WriteLine( 0, "vec2 %s(bvec2 cond, vec2 trueExpr, vec2 falseExpr) { vec2 ret; ret.x = cond.x ? trueExpr.x : falseExpr.x; ret.y = cond.y ? trueExpr.y : falseExpr.y; return ret; }", m_bvecTernary );
-	m_writer.WriteLine( 0, "vec3 %s(bvec3 cond, vec3 trueExpr, vec3 falseExpr) { vec3 ret; ret.x = cond.x ? trueExpr.x : falseExpr.x; ret.y = cond.y ? trueExpr.y : falseExpr.y; ret.z = cond.z ? trueExpr.z : falseExpr.z; return ret; }", m_bvecTernary );
-	m_writer.WriteLine( 0, "vec4 %s(bvec4 cond, vec4 trueExpr, vec4 falseExpr) { vec4 ret; ret.x = cond.x ? trueExpr.x : falseExpr.x; ret.y = cond.y ? trueExpr.y : falseExpr.y; ret.z = cond.z ? trueExpr.z : falseExpr.z; ret.w = cond.w ? trueExpr.w : falseExpr.w; return ret; }", m_bvecTernary );
-
-    // Output the extension used for dFdx/dFdy in GLES2
-    if (m_version == Version_100_ES && (m_tree->NeedsFunction("ddx") || m_tree->NeedsFunction("ddy")))
-    {
-        m_writer.WriteLine(0, "#extension GL_OES_standard_derivatives : require");
-    }
-
-    OutputAttributes(entryFunction);
-
-    if (m_target == Target_FragmentShader)
-    {
-        if (!m_outputTargets)
-            Error("Fragment shader must output a color");
-
-        if (!m_versionLegacy)
-            m_writer.WriteLine(0, "out vec4 rast_FragData[%d];", m_outputTargets);
-    }
-
-    OutputStatements(0, statement);
-    OutputEntryCaller(entryFunction);
-
-    m_tree = NULL;
-
-    // The GLSL compilers don't check for this, so generate our own error message.
-    if (target == Target_VertexShader && !m_outputPosition)
-    {
-        Error("Vertex shader must output a position");
-    }
-
-    return !m_error;
-
-}
-
-const char* GLSLGenerator::GetResult() const
-{
-    return m_writer.GetResult();
-}
-
-void GLSLGenerator::OutputExpressionList(HLSLExpression* expression, HLSLArgument* argument)
-{
-    int numExpressions = 0;
-    while (expression != NULL)
-    {
-        if (numExpressions > 0)
-        {
-            m_writer.Write(", ");
-        }
-        
-        HLSLType* expectedType = NULL;
-        if (argument != NULL)
-        {
-            expectedType = &argument->type;
-            argument = argument->nextArgument;
-        }
-
-        OutputExpression(expression, expectedType);
-        expression = expression->nextExpression;
-        ++numExpressions;
-    }
-}
-
-const HLSLType* commonScalarType(const HLSLType& lhs, const HLSLType& rhs)
-{
-    if (!IsScalarType(lhs) || !IsScalarType(rhs))
-        return NULL;
-
-    if (lhs.baseType == HLSLBaseType_Float || lhs.baseType == HLSLBaseType_Half ||
-        rhs.baseType == HLSLBaseType_Float || rhs.baseType == HLSLBaseType_Half)
-        return &kFloatType;
-
-    if (lhs.baseType == HLSLBaseType_Uint || rhs.baseType == HLSLBaseType_Uint)
-        return &kUintType;
-
-    if (lhs.baseType == HLSLBaseType_Int || rhs.baseType == HLSLBaseType_Int)
-        return &kIntType;
-
-    if (lhs.baseType == HLSLBaseType_Bool || rhs.baseType == HLSLBaseType_Bool)
-        return &kBoolType;
-
-    return NULL;
-}
-
-void GLSLGenerator::OutputExpression(HLSLExpression* expression, const HLSLType* dstType)
-{
-
-    bool cast = dstType != NULL && !GetCanImplicitCast(expression->expressionType, *dstType);
-    if (expression->nodeType == HLSLNodeType_CastingExpression)
-    {
-        // No need to include a cast if the expression is already doing it.
-        cast = false;
-    }
-
-    if (cast)
-    {
-        OutputCast(*dstType);
-        m_writer.Write("(");
-    }
-
-    HLSLBuffer* bufferAccess = (m_options.flags & Flag_EmulateConstantBuffer) ? GetBufferAccessExpression(expression) : 0;
-
-    if (bufferAccess)
-    {
-        OutputBufferAccessExpression(bufferAccess, expression, expression->expressionType, 0);
-    }
-    else if (expression->nodeType == HLSLNodeType_IdentifierExpression)
-    {
-        HLSLIdentifierExpression* identifierExpression = static_cast<HLSLIdentifierExpression*>(expression);
-        OutputIdentifier(identifierExpression->name);
-    }
-    else if (expression->nodeType == HLSLNodeType_ConstructorExpression)
-    {
-        HLSLConstructorExpression* constructorExpression = static_cast<HLSLConstructorExpression*>(expression);
-        m_writer.Write("%s(", GetTypeName(constructorExpression->type));
-        OutputExpressionList(constructorExpression->argument);
-        m_writer.Write(")");
-    }
-    else if (expression->nodeType == HLSLNodeType_CastingExpression)
-    {
-        HLSLCastingExpression* castingExpression = static_cast<HLSLCastingExpression*>(expression);
-        OutputCast(castingExpression->type);
-        m_writer.Write("(");
-        OutputExpression(castingExpression->expression);
-        m_writer.Write(")");
-    }
-    else if (expression->nodeType == HLSLNodeType_LiteralExpression)
-    {
-        HLSLLiteralExpression* literalExpression = static_cast<HLSLLiteralExpression*>(expression);
-        switch (literalExpression->type)
-        {
-        case HLSLBaseType_Half:
-        case HLSLBaseType_Float:
-            {
-                // Don't use printf directly so that we don't use the system locale.
-                char buffer[64];
-                String_FormatFloat(buffer, sizeof(buffer), literalExpression->fValue);
-                m_writer.Write("%s", buffer);
-            }
-            break;
-        case HLSLBaseType_Int:
-            m_writer.Write("%d", literalExpression->iValue);
-            break;
-        case HLSLBaseType_Uint:
-            m_writer.Write("%uu", literalExpression->iValue);
-	    break;
-	case HLSLBaseType_Bool:
-            m_writer.Write("%s", literalExpression->bValue ? "true" : "false");
-            break;
-        default:
-            ASSERT(0);
-        }
-    }
-    else if (expression->nodeType == HLSLNodeType_UnaryExpression)
-    {
-        HLSLUnaryExpression* unaryExpression = static_cast<HLSLUnaryExpression*>(expression);
-        const char* op = "?";
-        bool pre = true;
-        const HLSLType* dstType = NULL;
-        switch (unaryExpression->unaryOp)
-        {
-        case HLSLUnaryOp_Negative:      op = "-";  break;
-        case HLSLUnaryOp_Positive:      op = "+";  break;
-        case HLSLUnaryOp_Not:           op = "!";  dstType = &unaryExpression->expressionType; break;
-        case HLSLUnaryOp_PreIncrement:  op = "++"; break;
-        case HLSLUnaryOp_PreDecrement:  op = "--"; break;
-        case HLSLUnaryOp_PostIncrement: op = "++"; pre = false; break;
-        case HLSLUnaryOp_PostDecrement: op = "--"; pre = false; break;
-        case HLSLUnaryOp_BitNot:        op = "~";  break;
-        }
-        m_writer.Write("(");
-        if (pre)
-        {
-            m_writer.Write("%s", op);
-            OutputExpression(unaryExpression->expression, dstType);
-        }
-        else
-        {
-            OutputExpression(unaryExpression->expression, dstType);
-            m_writer.Write("%s", op);
-        }
-        m_writer.Write(")");
-    }
-    else if (expression->nodeType == HLSLNodeType_BinaryExpression)
-    {
-        HLSLBinaryExpression* binaryExpression = static_cast<HLSLBinaryExpression*>(expression);
-        const char* op = "?";
-        const HLSLType* dstType1 = NULL;
-        const HLSLType* dstType2 = NULL;
-
-		//
-		bool vectorExpression = IsVectorType( binaryExpression->expression1->expressionType ) || IsVectorType( binaryExpression->expression2->expressionType );
-		if( vectorExpression && IsCompareOp( binaryExpression->binaryOp ))
-		{
-			switch (binaryExpression->binaryOp)
-			{
-			case HLSLBinaryOp_Less:         m_writer.Write("lessThan(");			break;
-			case HLSLBinaryOp_Greater:      m_writer.Write("greaterThan(");			break;
-			case HLSLBinaryOp_LessEqual:    m_writer.Write("lessThanEqual(");		break;
-			case HLSLBinaryOp_GreaterEqual: m_writer.Write("greaterThanEqual(");	break;
-			case HLSLBinaryOp_Equal:        m_writer.Write("equal(");				break;
-			case HLSLBinaryOp_NotEqual:     m_writer.Write("notEqual(");			break;
-			default:
-				ASSERT(0); // is so, check isCompareOp
-			}
-
-			if( IsVectorType( binaryExpression->expression1->expressionType ) && IsScalarType( binaryExpression->expression2->expressionType ) )
-				dstType2 = &binaryExpression->expression1->expressionType;
-			else if( IsScalarType( binaryExpression->expression1->expressionType ) && IsVectorType( binaryExpression->expression2->expressionType ) )
-				dstType1 = &binaryExpression->expression2->expressionType;
-			// TODO if both expressions are vector but with different dimension handle it here or in parser?
-
-			OutputExpression(binaryExpression->expression1, dstType1);
-			m_writer.Write(", ");
-			OutputExpression(binaryExpression->expression2, dstType2);
-			m_writer.Write(")");
-		}
-		else
-		{
-			switch (binaryExpression->binaryOp)
-			{
-			case HLSLBinaryOp_Add:          op = " + "; dstType1 = dstType2 = &binaryExpression->expressionType; break;
-			case HLSLBinaryOp_Sub:          op = " - "; dstType1 = dstType2 = &binaryExpression->expressionType; break;
-			case HLSLBinaryOp_Mul:          op = " * "; dstType1 = dstType2 = &binaryExpression->expressionType; break;
-			case HLSLBinaryOp_Div:          op = " / "; dstType1 = dstType2 = &binaryExpression->expressionType; break;
-			case HLSLBinaryOp_Less:         op = " < "; dstType1 = dstType2 = commonScalarType(binaryExpression->expression1->expressionType, binaryExpression->expression2->expressionType); break;
-			case HLSLBinaryOp_Greater:      op = " > "; dstType1 = dstType2 = commonScalarType(binaryExpression->expression1->expressionType, binaryExpression->expression2->expressionType); break;
-			case HLSLBinaryOp_LessEqual:    op = " <= "; dstType1 = dstType2 = commonScalarType(binaryExpression->expression1->expressionType, binaryExpression->expression2->expressionType); break;
-			case HLSLBinaryOp_GreaterEqual: op = " >= "; dstType1 = dstType2 = commonScalarType(binaryExpression->expression1->expressionType, binaryExpression->expression2->expressionType); break;
-			case HLSLBinaryOp_Equal:        op = " == "; dstType1 = dstType2 = commonScalarType(binaryExpression->expression1->expressionType, binaryExpression->expression2->expressionType); break;
-			case HLSLBinaryOp_NotEqual:     op = " != "; dstType1 = dstType2 = commonScalarType(binaryExpression->expression1->expressionType, binaryExpression->expression2->expressionType); break;
-			case HLSLBinaryOp_Assign:       op = " = ";  dstType2 = &binaryExpression->expressionType; break;
-			case HLSLBinaryOp_AddAssign:    op = " += "; dstType2 = &binaryExpression->expressionType; break;
-			case HLSLBinaryOp_SubAssign:    op = " -= "; dstType2 = &binaryExpression->expressionType; break;
-			case HLSLBinaryOp_MulAssign:    op = " *= "; dstType2 = &binaryExpression->expressionType; break;
-			case HLSLBinaryOp_DivAssign:    op = " /= "; dstType2 = &binaryExpression->expressionType; break;
-			case HLSLBinaryOp_And:          op = " && "; dstType1 = dstType2 = &binaryExpression->expressionType; break;
-			case HLSLBinaryOp_Or:           op = " || "; dstType1 = dstType2 = &binaryExpression->expressionType; break;
-			case HLSLBinaryOp_BitAnd:       op = " & "; dstType1 = dstType2 = commonScalarType(binaryExpression->expression1->expressionType, binaryExpression->expression2->expressionType); break;
-			case HLSLBinaryOp_BitOr:        op = " | "; dstType1 = dstType2 = commonScalarType(binaryExpression->expression1->expressionType, binaryExpression->expression2->expressionType); break;
-			case HLSLBinaryOp_BitXor:       op = " ^ "; dstType1 = dstType2 = commonScalarType(binaryExpression->expression1->expressionType, binaryExpression->expression2->expressionType); break;
-			default:
-				ASSERT(0);
-			}
-			m_writer.Write("(");
-			OutputExpression(binaryExpression->expression1, dstType1);
-			m_writer.Write("%s", op);
-			OutputExpression(binaryExpression->expression2, dstType2);
-			m_writer.Write(")");
-		}
-    }
-    else if (expression->nodeType == HLSLNodeType_ConditionalExpression)
-    {
-        HLSLConditionalExpression* conditionalExpression = static_cast<HLSLConditionalExpression*>(expression);
-		if( IsVectorType( conditionalExpression->condition->expressionType ) )
-		{
-			m_writer.Write( "%s", m_bvecTernary );
-			m_writer.Write( "( " );
-			OutputExpression( conditionalExpression->condition );
-			m_writer.Write( ", " );
-			OutputExpression( conditionalExpression->trueExpression, &conditionalExpression->expressionType );
-			m_writer.Write( ", " );
-			OutputExpression( conditionalExpression->falseExpression, &conditionalExpression->expressionType  );
-			m_writer.Write( " )" );
-		}
-		else
-		{
-			m_writer.Write( "((" );
-			OutputExpression( conditionalExpression->condition, &kBoolType );
-			m_writer.Write( ")?(" );
-			OutputExpression( conditionalExpression->trueExpression, dstType );
-			m_writer.Write( "):(" );
-			OutputExpression( conditionalExpression->falseExpression, dstType );
-			m_writer.Write( "))" );
-		}
-    }
-    else if (expression->nodeType == HLSLNodeType_MemberAccess)
-    {
-
-        HLSLMemberAccess* memberAccess = static_cast<HLSLMemberAccess*>(expression);
-
-        if (memberAccess->object->expressionType.baseType == HLSLBaseType_Half  ||
-            memberAccess->object->expressionType.baseType == HLSLBaseType_Float ||
-            memberAccess->object->expressionType.baseType == HLSLBaseType_Int   ||
-            memberAccess->object->expressionType.baseType == HLSLBaseType_Uint)
-        {
-            // Handle swizzling on scalar values.
-            size_t swizzleLength = strlen(memberAccess->field);
-            if (swizzleLength == 2)
-            {
-                m_writer.Write("%s", m_scalarSwizzle2Function);
-            }
-            else if (swizzleLength == 3)
-            {
-                m_writer.Write("%s", m_scalarSwizzle3Function);
-            }
-            else if (swizzleLength == 4)
-            {
-                m_writer.Write("%s", m_scalarSwizzle4Function);
-            }
-            m_writer.Write("(");
-            OutputExpression(memberAccess->object);
-            m_writer.Write(")");
-        }
-        else
-        {
-            m_writer.Write("(");
-            OutputExpression(memberAccess->object);
-            m_writer.Write(")");
-
-			if( memberAccess->object->expressionType.baseType == HLSLBaseType_Float2x2 ||
-				memberAccess->object->expressionType.baseType == HLSLBaseType_Float3x3 ||
-                memberAccess->object->expressionType.baseType == HLSLBaseType_Float4x4 ||
-				memberAccess->object->expressionType.baseType == HLSLBaseType_Half2x2 ||
-				memberAccess->object->expressionType.baseType == HLSLBaseType_Half3x3 ||
-				memberAccess->object->expressionType.baseType == HLSLBaseType_Half4x4 )
-            {
-                // Handle HLSL matrix "swizzling".
-                // TODO: Properly handle multiple element selection such as _m00_m12
-                const char* n = memberAccess->field;
-                while (n[0] != 0)
-                {
-                    if ( n[0] != '_' )
-                    {
-                        ASSERT(0);
-                        break;
-                    }
-                    ++n;
-                    char base = '1';
-                    if (n[0] == 'm')
-                    {
-                        base = '0';
-                        ++n;
-                    }
-                    if (isdigit(n[0]) && isdigit(n[1]) )
-                    {
-                        m_writer.Write("[%d][%d]", n[1] - base, n[0] - base);
-                        n += 2;
-                    }
-                    else
-                    {
-                        ASSERT(0);
-                        break;
-                    }
-                }
-            }
-            else
-            {
-                m_writer.Write(".%s", memberAccess->field);
-            }
-
-        }
-
-    }
-    else if (expression->nodeType == HLSLNodeType_ArrayAccess)
-    {
-        HLSLArrayAccess* arrayAccess = static_cast<HLSLArrayAccess*>(expression);
-
-        if (!arrayAccess->array->expressionType.array &&
-			(arrayAccess->array->expressionType.baseType == HLSLBaseType_Float2x2 ||
-			 arrayAccess->array->expressionType.baseType == HLSLBaseType_Float3x3 ||
-             arrayAccess->array->expressionType.baseType == HLSLBaseType_Float4x4 ||
-			 arrayAccess->array->expressionType.baseType == HLSLBaseType_Half2x2 ||
-			 arrayAccess->array->expressionType.baseType == HLSLBaseType_Half3x3 ||
-			 arrayAccess->array->expressionType.baseType == HLSLBaseType_Half4x4 ) )
-        {
-            // GLSL access a matrix as m[c][r] while HLSL is m[r][c], so use our
-            // special row access function to convert.
-            m_writer.Write("%s(", m_matrixRowFunction);
-            OutputExpression(arrayAccess->array);
-            m_writer.Write(",");
-            OutputExpression(arrayAccess->index);
-            m_writer.Write(")");
-        }
-        else
-        {
-            OutputExpression(arrayAccess->array);
-            m_writer.Write("[");
-            OutputExpression(arrayAccess->index);
-            m_writer.Write("]");
-        }
-
-    }
-    else if (expression->nodeType == HLSLNodeType_FunctionCall)
-    {
-        HLSLFunctionCall* functionCall = static_cast<HLSLFunctionCall*>(expression);
-
-        // Handle intrinsic funtions that are different between HLSL and GLSL.
-        bool handled = false;
-        const char* functionName = functionCall->function->name;
-
-        if (String_Equal(functionName, "mul"))
-        {
-            HLSLExpression* argument[2];
-            if (GetFunctionArguments(functionCall, argument, 2) != 2)
-            {
-                Error("mul expects 2 arguments");
-                return;
-            }
-
-            const HLSLType& type0 = functionCall->function->argument->type;
-            const HLSLType& type1 = functionCall->function->argument->nextArgument->type;
-
-            const char* prefix = (m_options.flags & Flag_LowerMatrixMultiplication) ? m_matrixMulFunction : "";
-            const char* infix = (m_options.flags & Flag_LowerMatrixMultiplication) ? "," : "*";
-
-            if (m_options.flags & Flag_PackMatrixRowMajor)
-            {
-                m_writer.Write("%s((", prefix);
-                OutputExpression(argument[1], &type1);
-                m_writer.Write(")%s(", infix);
-                OutputExpression(argument[0], &type0);
-                m_writer.Write("))");
-            }
-            else
-            {
-                m_writer.Write("%s((", prefix);
-                OutputExpression(argument[0], &type0);
-                m_writer.Write(")%s(", infix);
-                OutputExpression(argument[1], &type1);
-                m_writer.Write("))");
-            }
-
-            handled = true;
-        }
-        else if (String_Equal(functionName, "saturate"))
-        {
-            HLSLExpression* argument[1];
-            if (GetFunctionArguments(functionCall, argument, 1) != 1)
-            {
-                Error("saturate expects 1 argument");
-                return;
-            }
-            m_writer.Write("clamp(");
-            OutputExpression(argument[0]);
-            m_writer.Write(", 0.0, 1.0)");
-            handled = true;
-        }
-
-        if (!handled)
-        {
-            OutputIdentifier(functionName);
-            m_writer.Write("(");
-            OutputExpressionList(functionCall->argument, functionCall->function->argument);
-            m_writer.Write(")");
-        }
-    }
-    else
-    {
-        m_writer.Write("<unknown expression>");
-    }
-
-    if (cast)
-    {
-/*
-        const BaseTypeDescription& srcTypeDesc = _baseTypeDescriptions[expression->expressionType.baseType];
-        const BaseTypeDescription& dstTypeDesc = _baseTypeDescriptions[dstType->baseType];
-
-        if (dstTypeDesc.numDimensions == 1 && dstTypeDesc.numComponents > 1)
-        {
-            // Casting to a vector - pad with 0s
-            for (int i = srcTypeDesc.numComponents; i < dstTypeDesc.numComponents; ++i)
-            {
-                m_writer.Write(", 0");
-            }
-        }
-*/
-
-        m_writer.Write(")");
-    }
-
-}
-
-void GLSLGenerator::OutputIdentifier(const char* name)
-{
-
-    // Remap intrinstic functions.
-    if (String_Equal(name, "tex2D"))
-    {
-        name = m_versionLegacy ? "texture2D" : "texture";
-    }
-    else if (String_Equal(name, "tex2Dproj"))
-    {
-        name = m_versionLegacy ? "texture2DProj" : "textureProj";
-    }
-    else if (String_Equal(name, "texCUBE"))
-    {
-        name = m_versionLegacy ? "textureCube" : "texture";
-    }
-    else if (String_Equal(name, "tex3D"))
-    {
-        name = m_versionLegacy ? "texture3D" : "texture";
-    }
-    else if (String_Equal(name, "clip"))
-    {
-        name = m_clipFunction;
-    }
-    else if (String_Equal(name, "tex2Dlod"))
-    {
-        name = m_tex2DlodFunction;
-    }
-    else if (String_Equal(name, "tex2Dbias"))
-    {
-        name = m_tex2DbiasFunction;
-    }
-    else if (String_Equal(name, "tex2Dgrad"))
-    {
-        name = m_tex2DgradFunction;
-    }
-    else if (String_Equal(name, "tex2DArray"))
-    {
-        name = "texture";
-    }
-    else if (String_Equal(name, "texCUBEbias"))
-    {
-        name = m_texCUBEbiasFunction;
-    }
-	else if( String_Equal( name, "texCUBElod" ) )
-	{
-		name = m_texCUBElodFunction;
-	}
-    else if (String_Equal(name, "atan2"))
-    {
-        name = "atan";
-    }
-    else if (String_Equal(name, "sincos"))
-    {
-        name = m_sinCosFunction;
-    }
-    else if (String_Equal(name, "fmod"))
-    {
-        // mod is not the same as fmod if the parameter is negative!
-        // The equivalent of fmod(x, y) is x - y * floor(x/y)
-        // We use the mod version for performance.
-        name = "mod";
-    }
-    else if (String_Equal(name, "lerp"))
-    {
-        name = "mix";
-    }
-    else if (String_Equal(name, "frac"))
-    {
-        name = "fract";
-    }
-    else if (String_Equal(name, "ddx"))
-    {
-        name = "dFdx";
-    }
-    else if (String_Equal(name, "ddy"))
-    {
-        name = "dFdy";
-    }
-    else 
-    {
-        // The identifier could be a GLSL reserved word (if it's not also a HLSL reserved word).
-        name = GetSafeIdentifierName(name);
-    }
-    m_writer.Write("%s", name);
-
-}
-
-void GLSLGenerator::OutputArguments(HLSLArgument* argument)
-{
-    int numArgs = 0;
-    while (argument != NULL)
-    {
-        if (numArgs > 0)
-        {
-            m_writer.Write(", ");
-        }
-
-        switch (argument->modifier)
-        {
-        case HLSLArgumentModifier_In:
-            m_writer.Write("in ");
-            break;
-        case HLSLArgumentModifier_Out:
-            m_writer.Write("out ");
-            break;
-        case HLSLArgumentModifier_Inout:
-            m_writer.Write("inout ");
-            break;
-        default:
-            break;
-        }
-
-        OutputDeclaration(argument->type, argument->name);
-        argument = argument->nextArgument;
-        ++numArgs;
-    }
-}
-
-void GLSLGenerator::OutputStatements(int indent, HLSLStatement* statement, const HLSLType* returnType)
-{
-
-    while (statement != NULL)
-    {
-        if (statement->hidden)
-        {
-            statement = statement->nextStatement;
-            continue;
-        }
-
-        if (statement->nodeType == HLSLNodeType_Declaration)
-        {
-            HLSLDeclaration* declaration = static_cast<HLSLDeclaration*>(statement);
-
-            // GLSL doesn't seem have texture uniforms, so just ignore them.
-            if (declaration->type.baseType != HLSLBaseType_Texture)
-            {
-                m_writer.BeginLine(indent, declaration->fileName, declaration->line);
-                if (indent == 0)
-                {
-                    // At the top level, we need the "uniform" keyword.
-                    if ((declaration->type.flags & HLSLTypeFlag_Static) == 0)
-                        m_writer.Write("uniform ");
-                }
-                OutputDeclaration(declaration);
-                m_writer.EndLine(";");
-            }
-        }
-        else if (statement->nodeType == HLSLNodeType_Struct)
-        {
-            HLSLStruct* structure = static_cast<HLSLStruct*>(statement);
-            m_writer.WriteLine(indent, "struct %s {", structure->name);
-            HLSLStructField* field = structure->field;
-            while (field != NULL)
-            {
-                m_writer.BeginLine(indent + 1, field->fileName, field->line);
-                OutputDeclaration(field->type, field->name);
-                m_writer.Write(";");
-                m_writer.EndLine();
-                field = field->nextField;
-            }
-            m_writer.WriteLine(indent, "};");
-        }
-        else if (statement->nodeType == HLSLNodeType_Buffer)
-        {
-            HLSLBuffer* buffer = static_cast<HLSLBuffer*>(statement);
-            OutputBuffer(indent, buffer);
-        }
-        else if (statement->nodeType == HLSLNodeType_Function)
-        {
-            HLSLFunction* function = static_cast<HLSLFunction*>(statement);
-
-            // Use an alternate name for the function which is supposed to be entry point
-            // so that we can supply our own function which will be the actual entry point.
-            const char* functionName   = GetSafeIdentifierName(function->name);
-            const char* returnTypeName = GetTypeName(function->returnType);
-
-            m_writer.BeginLine(indent, function->fileName, function->line);
-            m_writer.Write("%s %s(", returnTypeName, functionName);
-
-            OutputArguments(function->argument);
-
-            if (function->forward)
-            {
-                m_writer.WriteLine(indent, ");");
-            }
-            else
-            {
-                m_writer.Write(") {");
-                m_writer.EndLine();
-
-                OutputStatements(indent + 1, function->statement, &function->returnType);
-                m_writer.WriteLine(indent, "}");
-            }
-        }
-        else if (statement->nodeType == HLSLNodeType_ExpressionStatement)
-        {
-            HLSLExpressionStatement* expressionStatement = static_cast<HLSLExpressionStatement*>(statement);
-            m_writer.BeginLine(indent, statement->fileName, statement->line);
-            OutputExpression(expressionStatement->expression);
-            m_writer.EndLine(";");
-        }
-        else if (statement->nodeType == HLSLNodeType_ReturnStatement)
-        {
-            HLSLReturnStatement* returnStatement = static_cast<HLSLReturnStatement*>(statement);
-            if (returnStatement->expression != NULL)
-            {
-                m_writer.BeginLine(indent, returnStatement->fileName, returnStatement->line);
-                m_writer.Write("return ");
-                OutputExpression(returnStatement->expression, returnType);
-                m_writer.EndLine(";");
-            }
-            else
-            {
-                m_writer.WriteLineTagged(indent, returnStatement->fileName, returnStatement->line, "return;");
-            }
-        }
-        else if (statement->nodeType == HLSLNodeType_DiscardStatement)
-        {
-            HLSLDiscardStatement* discardStatement = static_cast<HLSLDiscardStatement*>(statement);
-            if (m_target == Target_FragmentShader)
-            {
-                m_writer.WriteLineTagged(indent, discardStatement->fileName, discardStatement->line, "discard;");
-            }
-        }
-        else if (statement->nodeType == HLSLNodeType_BreakStatement)
-        {
-            HLSLBreakStatement* breakStatement = static_cast<HLSLBreakStatement*>(statement);
-            m_writer.WriteLineTagged(indent, breakStatement->fileName, breakStatement->line, "break;");
-        }
-        else if (statement->nodeType == HLSLNodeType_ContinueStatement)
-        {
-            HLSLContinueStatement* continueStatement = static_cast<HLSLContinueStatement*>(statement);
-            m_writer.WriteLineTagged(indent, continueStatement->fileName, continueStatement->line, "continue;");
-        }
-        else if (statement->nodeType == HLSLNodeType_IfStatement)
-        {
-            HLSLIfStatement* ifStatement = static_cast<HLSLIfStatement*>(statement);
-            m_writer.BeginLine(indent, ifStatement->fileName, ifStatement->line);
-            m_writer.Write("if (");
-            OutputExpression(ifStatement->condition, &kBoolType);
-            m_writer.Write(") {");
-            m_writer.EndLine();
-            OutputStatements(indent + 1, ifStatement->statement, returnType);
-            m_writer.WriteLine(indent, "}");
-            if (ifStatement->elseStatement != NULL)
-            {
-                m_writer.WriteLine(indent, "else {");
-                OutputStatements(indent + 1, ifStatement->elseStatement, returnType);
-                m_writer.WriteLine(indent, "}");
-            }
-        }
-        else if (statement->nodeType == HLSLNodeType_ForStatement)
-        {
-            HLSLForStatement* forStatement = static_cast<HLSLForStatement*>(statement);
-            m_writer.BeginLine(indent, forStatement->fileName, forStatement->line);
-            m_writer.Write("for (");
-            OutputDeclaration(forStatement->initialization);
-            m_writer.Write("; ");
-            OutputExpression(forStatement->condition, &kBoolType);
-            m_writer.Write("; ");
-            OutputExpression(forStatement->increment);
-            m_writer.Write(") {");
-            m_writer.EndLine();
-            OutputStatements(indent + 1, forStatement->statement, returnType);
-            m_writer.WriteLine(indent, "}");
-        }
-        else
-        {
-            // Unhanded statement type.
-            ASSERT(0);
-        }
-
-        statement = statement->nextStatement;
-
-    }
-
-}
-
-void GLSLGenerator::OutputBuffer(int indent, HLSLBuffer* buffer)
-{
-    // Empty uniform blocks cause compilation errors on NVIDIA, so don't emit them.
-    if (buffer->field == NULL)
-        return;
-
-    if (m_options.flags & Flag_EmulateConstantBuffer)
-    {
-        unsigned int size = 0;
-        LayoutBuffer(buffer, size);
-
-        unsigned int uniformSize = (size + 3) / 4;
-
-        m_writer.WriteLineTagged(indent, buffer->fileName, buffer->line, "uniform vec4 %s%s[%d];", m_options.constantBufferPrefix, buffer->name, uniformSize);
-    }
-    else
-    {
-        m_writer.WriteLineTagged(indent, buffer->fileName, buffer->line, "layout (std140) uniform %s%s {", m_options.constantBufferPrefix, buffer->name);
-        HLSLDeclaration* field = buffer->field;
-        while (field != NULL)
-        {
-            m_writer.BeginLine(indent + 1, field->fileName, field->line);
-            OutputDeclaration(field->type, field->name);
-            m_writer.Write(";");
-            m_writer.EndLine();
-            field = (HLSLDeclaration*)field->nextStatement;
-        }
-        m_writer.WriteLine(indent, "};");
-    }
-}
-
-inline void alignForWrite(unsigned int& offset, unsigned int size)
-{
-    ASSERT(size <= 4);
-
-    if (offset / 4 != (offset + size - 1) / 4)
-        offset = (offset + 3) & ~3;
-}
-
-void GLSLGenerator::LayoutBuffer(HLSLBuffer* buffer, unsigned int& offset)
-{
-    for (HLSLDeclaration* field = buffer->field; field; field = (HLSLDeclaration*)field->nextStatement)
-    {
-        LayoutBuffer(field->type, offset);
-    }
-}
-
-void GLSLGenerator::LayoutBuffer(const HLSLType& type, unsigned int& offset)
-{
-    LayoutBufferAlign(type, offset);
-
-    if (type.array)
-    {
-        int arraySize = 0;
-        m_tree->GetExpressionValue(type.arraySize, arraySize);
-
-        unsigned int elementSize = 0;
-        LayoutBufferElement(type, elementSize);
-
-        unsigned int alignedElementSize = (elementSize + 3) & ~3;
-
-        offset += alignedElementSize * arraySize;
-    }
-    else
-    {
-        LayoutBufferElement(type, offset);
-    }
-}
-
-void GLSLGenerator::LayoutBufferElement(const HLSLType& type, unsigned int& offset)
-{
-    if (type.baseType == HLSLBaseType_Float)
-    {
-        offset += 1;
-    }
-    else if (type.baseType == HLSLBaseType_Float2)
-    {
-        offset += 2;
-    }
-    else if (type.baseType == HLSLBaseType_Float3)
-    {
-        offset += 3;
-    }
-    else if (type.baseType == HLSLBaseType_Float4)
-    {
-        offset += 4;
-    }
-    else if (type.baseType == HLSLBaseType_Float4x4)
-    {
-        offset += 16;
-    }
-    else if (type.baseType == HLSLBaseType_UserDefined)
-    {
-        HLSLStruct * st = m_tree->FindGlobalStruct(type.typeName);
-
-        if (st)
-        {
-            for (HLSLStructField* field = st->field; field; field = field->nextField)
-            {
-                LayoutBuffer(field->type, offset);
-            }
-        }
-        else
-        {
-            Error("Unknown type %s", type.typeName);
-        }
-    }
-    else
-    {
-        Error("Constant buffer layout is not supported for %s", GetTypeName(type));
-    }
-}
-
-void GLSLGenerator::LayoutBufferAlign(const HLSLType& type, unsigned int& offset)
-{
-    if (type.array)
-    {
-        alignForWrite(offset, 4);
-    }
-    else if (type.baseType == HLSLBaseType_Float)
-    {
-        alignForWrite(offset, 1);
-    }
-    else if (type.baseType == HLSLBaseType_Float2)
-    {
-        alignForWrite(offset, 2);
-    }
-    else if (type.baseType == HLSLBaseType_Float3)
-    {
-        alignForWrite(offset, 3);
-    }
-    else if (type.baseType == HLSLBaseType_Float4)
-    {
-        alignForWrite(offset, 4);
-    }
-    else if (type.baseType == HLSLBaseType_Float4x4)
-    {
-        alignForWrite(offset, 4);
-    }
-    else if (type.baseType == HLSLBaseType_UserDefined)
-    {
-        alignForWrite(offset, 4);
-    }
-    else
-    {
-        Error("Constant buffer layout is not supported for %s", GetTypeName(type));
-    }
-}
-
-HLSLBuffer* GLSLGenerator::GetBufferAccessExpression(HLSLExpression* expression)
-{
-    if (expression->nodeType == HLSLNodeType_IdentifierExpression)
-    {
-        HLSLIdentifierExpression* identifierExpression = static_cast<HLSLIdentifierExpression*>(expression);
-
-        if (identifierExpression->global)
-        {
-            HLSLDeclaration * declaration = m_tree->FindGlobalDeclaration(identifierExpression->name);
-
-            if (declaration && declaration->buffer)
-                return declaration->buffer;
-        }
-    }
-    else if (expression->nodeType == HLSLNodeType_MemberAccess)
-    {
-        HLSLMemberAccess* memberAccess = static_cast<HLSLMemberAccess*>(expression);
-
-        if (memberAccess->object->expressionType.baseType == HLSLBaseType_UserDefined)
-            return GetBufferAccessExpression(memberAccess->object);
-    }
-    else if (expression->nodeType == HLSLNodeType_ArrayAccess)
-    {
-        HLSLArrayAccess* arrayAccess = static_cast<HLSLArrayAccess*>(expression);
-
-        if (arrayAccess->array->expressionType.array)
-            return GetBufferAccessExpression(arrayAccess->array);
-    }
-
-    return 0;
-}
-
-void GLSLGenerator::OutputBufferAccessExpression(HLSLBuffer* buffer, HLSLExpression* expression, const HLSLType& type, unsigned int postOffset)
-{
-    if (type.array)
-    {
-        Error("Constant buffer access is not supported for arrays (use indexing instead)");
-    }
-    else if (type.baseType == HLSLBaseType_Float)
-    {
-        m_writer.Write("%s%s[", m_options.constantBufferPrefix, buffer->name);
-        unsigned int index = OutputBufferAccessIndex(expression, postOffset);
-        m_writer.Write("%d].%c", index / 4, "xyzw"[index % 4]);
-    }
-    else if (type.baseType == HLSLBaseType_Float2)
-    {
-        m_writer.Write("%s%s[", m_options.constantBufferPrefix, buffer->name);
-        unsigned int index = OutputBufferAccessIndex(expression, postOffset);
-        m_writer.Write("%d].%s", index / 4, index % 4 == 0 ? "xy" : index % 4 == 1 ? "yz" : "zw");
-    }
-    else if (type.baseType == HLSLBaseType_Float3)
-    {
-        m_writer.Write("%s%s[", m_options.constantBufferPrefix, buffer->name);
-        unsigned int index = OutputBufferAccessIndex(expression, postOffset);
-        m_writer.Write("%d].%s", index / 4, index % 4 == 0 ? "xyz" : "yzw");
-    }
-    else if (type.baseType == HLSLBaseType_Float4)
-    {
-        m_writer.Write("%s%s[", m_options.constantBufferPrefix, buffer->name);
-        unsigned int index = OutputBufferAccessIndex(expression, postOffset);
-        ASSERT(index % 4 == 0);
-        m_writer.Write("%d]", index / 4);
-    }
-    else if (type.baseType == HLSLBaseType_Float4x4)
-    {
-        m_writer.Write("mat4(");
-        for (int i = 0; i < 4; ++i)
-        {
-            m_writer.Write("%s%s[", m_options.constantBufferPrefix, buffer->name);
-            unsigned int index = OutputBufferAccessIndex(expression, postOffset + i * 4);
-            ASSERT(index % 4 == 0);
-            m_writer.Write("%d]%c", index / 4, i == 3 ? ')' : ',');
-        }
-    }
-    else if (type.baseType == HLSLBaseType_UserDefined)
-    {
-        HLSLStruct * st = m_tree->FindGlobalStruct(type.typeName);
-
-        if (st)
-        {
-            m_writer.Write("%s(", st->name);
-
-            unsigned int offset = postOffset;
-
-            for (HLSLStructField* field = st->field; field; field = field->nextField)
-            {
-                OutputBufferAccessExpression(buffer, expression, field->type, offset);
-
-                if (field->nextField)
-                    m_writer.Write(",");
-
-                LayoutBuffer(field->type, offset);
-            }
-
-            m_writer.Write(")");
-        }
-        else
-        {
-            Error("Unknown type %s", type.typeName);
-        }
-    }
-    else
-    {
-        Error("Constant buffer layout is not supported for %s", GetTypeName(type));
-    }
-}
-
-unsigned int GLSLGenerator::OutputBufferAccessIndex(HLSLExpression* expression, unsigned int postOffset)
-{
-    if (expression->nodeType == HLSLNodeType_IdentifierExpression)
-    {
-        HLSLIdentifierExpression* identifierExpression = static_cast<HLSLIdentifierExpression*>(expression);
-        ASSERT(identifierExpression->global);
-
-        HLSLDeclaration * declaration = m_tree->FindGlobalDeclaration(identifierExpression->name);
-        ASSERT(declaration);
-
-        HLSLBuffer * buffer = declaration->buffer;
-        ASSERT(buffer);
-
-        unsigned int offset = 0;
-
-        for (HLSLDeclaration* field = buffer->field; field; field = (HLSLDeclaration*)field->nextStatement)
-        {
-            if (field == declaration)
-            {
-                LayoutBufferAlign(field->type, offset);
-                break;
-            }
-
-            LayoutBuffer(field->type, offset);
-        }
-
-        return offset + postOffset;
-    }
-    else if (expression->nodeType == HLSLNodeType_MemberAccess)
-    {
-        HLSLMemberAccess* memberAccess = static_cast<HLSLMemberAccess*>(expression);
-
-        const HLSLType& type = memberAccess->object->expressionType;
-        ASSERT(type.baseType == HLSLBaseType_UserDefined);
-
-        HLSLStruct * st = m_tree->FindGlobalStruct(type.typeName);
-
-        if (st)
-        {
-            unsigned int offset = 0;
-
-            for (HLSLStructField* field = st->field; field; field = field->nextField)
-            {
-                if (field->name == memberAccess->field)
-                {
-                    LayoutBufferAlign(field->type, offset);
-                    break;
-                }
-
-                LayoutBuffer(field->type, offset);
-            }
-
-            return offset + OutputBufferAccessIndex(memberAccess->object, postOffset);
-        }
-        else
-        {
-            Error("Unknown type %s", type.typeName);
-        }
-    }
-    else if (expression->nodeType == HLSLNodeType_ArrayAccess)
-    {
-        HLSLArrayAccess* arrayAccess = static_cast<HLSLArrayAccess*>(expression);
-
-        const HLSLType& type = arrayAccess->array->expressionType;
-        ASSERT(type.array);
-
-        unsigned int elementSize = 0;
-        LayoutBufferElement(type, elementSize);
-
-        unsigned int alignedElementSize = (elementSize + 3) & ~3;
-
-        int arrayIndex = 0;
-        if (m_tree->GetExpressionValue(arrayAccess->index, arrayIndex))
-        {
-            unsigned int offset = arrayIndex * alignedElementSize;
-
-            return offset + OutputBufferAccessIndex(arrayAccess->array, postOffset);
-        }
-        else
-        {
-            m_writer.Write("%d*(", alignedElementSize / 4);
-            OutputExpression(arrayAccess->index);
-            m_writer.Write(")+");
-
-            return OutputBufferAccessIndex(arrayAccess->array, postOffset);
-        }
-    }
-    else
-    {
-        ASSERT(!"IsBufferAccessExpression should have returned false");
-    }
-
-    return 0;
-}
-
-HLSLFunction* GLSLGenerator::FindFunction(HLSLRoot* root, const char* name)
-{
-    HLSLStatement* statement = root->statement;
-    while (statement != NULL)
-    {
-        if (statement->nodeType == HLSLNodeType_Function)
-        {
-            HLSLFunction* function = static_cast<HLSLFunction*>(statement);
-            if (String_Equal(function->name, name))
-            {
-                return function;
-            }
-        }
-        statement = statement->nextStatement;
-    }
-    return NULL;
-}
-
-HLSLStruct* GLSLGenerator::FindStruct(HLSLRoot* root, const char* name)
-{
-    HLSLStatement* statement = root->statement;
-    while (statement != NULL)
-    {
-        if (statement->nodeType == HLSLNodeType_Struct)
-        {
-            HLSLStruct* structDeclaration = static_cast<HLSLStruct*>(statement);
-            if (String_Equal(structDeclaration->name, name))
-            {
-                return structDeclaration;
-            }
-        }
-        statement = statement->nextStatement;
-    }
-    return NULL;
-}
-
-
-const char* GLSLGenerator::GetAttribQualifier(AttributeModifier modifier)
-{
-    if (m_versionLegacy)
-    {
-        if (m_target == Target_VertexShader)
-            return (modifier == AttributeModifier_In) ? "attribute" : "varying";
-        else
-            return (modifier == AttributeModifier_In) ? "varying" : "out";
-    }
-    else
-    {
-        return (modifier == AttributeModifier_In) ? "in" : "out";
-    }
-}
-
-void GLSLGenerator::OutputAttribute(const HLSLType& type, const char* semantic, AttributeModifier modifier)
-{
-    const char* qualifier = GetAttribQualifier(modifier);
-    const char* prefix = (modifier == AttributeModifier_In) ? m_inAttribPrefix : m_outAttribPrefix;
-
-    HLSLRoot* root = m_tree->GetRoot();
-    if (type.baseType == HLSLBaseType_UserDefined)
-    {
-        // If the argument is a struct with semantics specified, we need to
-        // grab them.
-        HLSLStruct* structDeclaration = FindStruct(root, type.typeName);
-        ASSERT(structDeclaration != NULL);
-        HLSLStructField* field = structDeclaration->field;
-        while (field != NULL)
-        {
-            if (field->semantic != NULL && GetBuiltInSemantic(field->semantic, modifier) == NULL)
-            {
-                m_writer.Write( "%s ", qualifier );
-				char attribName[ 64 ];
-				String_Printf( attribName, 64, "%s%s", prefix, field->semantic );
-				OutputDeclaration( field->type, attribName );
-				m_writer.EndLine(";");
-            }
-            field = field->nextField;
-        }
-    }
-    else if (semantic != NULL && GetBuiltInSemantic(semantic, modifier) == NULL)
-    {
-		m_writer.Write( "%s ", qualifier );
-		char attribName[ 64 ];
-		String_Printf( attribName, 64, "%s%s", prefix, semantic );
-		OutputDeclaration( type, attribName );
-		m_writer.EndLine(";");
-    }
-}
-
-void GLSLGenerator::OutputAttributes(HLSLFunction* entryFunction)
-{
-    // Write out the input/output attributes to the shader.
-    HLSLArgument* argument = entryFunction->argument;
-    while (argument != NULL)
-    {
-        if (argument->modifier == HLSLArgumentModifier_None || argument->modifier == HLSLArgumentModifier_In)
-            OutputAttribute(argument->type, argument->semantic, AttributeModifier_In);
-        if (argument->modifier == HLSLArgumentModifier_Out)
-            OutputAttribute(argument->type, argument->semantic, AttributeModifier_Out);
-
-        argument = argument->nextArgument;
-    }
-
-    // Write out the output attributes from the shader.
-    OutputAttribute(entryFunction->returnType, entryFunction->semantic, AttributeModifier_Out);
-}
-
-void GLSLGenerator::OutputSetOutAttribute(const char* semantic, const char* resultName)
-{
-    int outputIndex = -1;
-    const char* builtInSemantic = GetBuiltInSemantic(semantic, AttributeModifier_Out, &outputIndex);
-    if (builtInSemantic != NULL)
-    {
-        if (String_Equal(builtInSemantic, "gl_Position"))
-        {
-            if (m_options.flags & Flag_FlipPositionOutput)
-            {
-                // Mirror the y-coordinate when we're outputing from
-                // the vertex shader so that we match the D3D texture
-                // coordinate origin convention in render-to-texture
-                // operations.
-                // We also need to convert the normalized device
-                // coordinates from the D3D convention of 0 to 1 to the
-                // OpenGL convention of -1 to 1.
-                m_writer.WriteLine(1, "vec4 temp = %s;", resultName);
-                m_writer.WriteLine(1, "%s = temp * vec4(1,-1,2,1) - vec4(0,0,temp.w,0);", builtInSemantic);
-            }
-            else
-            {
-                m_writer.WriteLine(1, "%s = %s;", builtInSemantic, resultName);
-            }
-
-            m_outputPosition = true;
-        }
-        else if (String_Equal(builtInSemantic, "gl_FragDepth"))
-        {
-            // If the value goes outside of the 0 to 1 range, the
-            // fragment will be rejected unlike in D3D, so clamp it.
-            m_writer.WriteLine(1, "%s = clamp(float(%s), 0.0, 1.0);", builtInSemantic, resultName);
-        }
-        else if (outputIndex >= 0)
-        {
-            m_writer.WriteLine(1, "%s[%d] = %s;", builtInSemantic, outputIndex, resultName);
-        }
-        else
-        {
-            m_writer.WriteLine(1, "%s = %s;", builtInSemantic, resultName);
-        }
-    }
-    else if (m_target == Target_FragmentShader)
-    {
-        Error("Output attribute %s does not map to any built-ins", semantic);
-    }
-    else
-    {
-        m_writer.WriteLine(1, "%s%s = %s;", m_outAttribPrefix, semantic, resultName);
-    }
-}
-
-void GLSLGenerator::OutputEntryCaller(HLSLFunction* entryFunction)
-{
-    HLSLRoot* root = m_tree->GetRoot();
-
-    m_writer.WriteLine(0, "void main() {");
-
-    // Create local variables for each of the parameters we'll need to pass
-    // into the entry point function.
-    HLSLArgument* argument = entryFunction->argument;
-    while (argument != NULL)
-    {
-        m_writer.BeginLine(1);
-        OutputDeclaration(argument->type, argument->name);
-        m_writer.EndLine(";");
-
-        if (argument->modifier != HLSLArgumentModifier_Out)
-        {
-            // Set the value for the local variable.
-            if (argument->type.baseType == HLSLBaseType_UserDefined)
-            {
-                HLSLStruct* structDeclaration = FindStruct(root, argument->type.typeName);
-                ASSERT(structDeclaration != NULL);
-                HLSLStructField* field = structDeclaration->field;
-                while (field != NULL)
-                {
-                    if (field->semantic != NULL)
-                    {
-                        const char* builtInSemantic = GetBuiltInSemantic(field->semantic, AttributeModifier_In);
-                        if (builtInSemantic)
-                        {
-                            m_writer.WriteLine(1, "%s.%s = %s;", GetSafeIdentifierName(argument->name), GetSafeIdentifierName(field->name), builtInSemantic);
-                        }
-                        else
-                        {
-                            m_writer.WriteLine(1, "%s.%s = %s%s;", GetSafeIdentifierName(argument->name), GetSafeIdentifierName(field->name), m_inAttribPrefix, field->semantic);
-                        }
-                    }
-                    field = field->nextField;
-                }
-            }
-            else if (argument->semantic != NULL)
-            {
-                const char* builtInSemantic = GetBuiltInSemantic(argument->semantic, AttributeModifier_In);
-                if (builtInSemantic)
-                {
-                    m_writer.WriteLine(1, "%s = %s;", GetSafeIdentifierName(argument->name), builtInSemantic);
-                }
-                else
-                {
-                    m_writer.WriteLine(1, "%s = %s%s;", GetSafeIdentifierName(argument->name), m_inAttribPrefix, argument->semantic);
-                }
-            }
-        }
-
-        argument = argument->nextArgument;
-    }
-
-    const char* resultName = "result";
-
-    // Call the original entry function.
-    m_writer.BeginLine(1);
-    if (entryFunction->returnType.baseType != HLSLBaseType_Void)
-        m_writer.Write("%s %s = ", GetTypeName(entryFunction->returnType), resultName);
-    m_writer.Write("%s(", m_entryName);
-
-    int numArgs = 0;
-    argument = entryFunction->argument;
-    while (argument != NULL)
-    {
-        if (numArgs > 0)
-        {
-            m_writer.Write(", ");
-        }
-
-        m_writer.Write("%s", GetSafeIdentifierName(argument->name));
-
-        argument = argument->nextArgument;
-        ++numArgs;
-    }
-    m_writer.EndLine(");");
-
-    // Copy values from the result into the out attributes as necessary.
-    argument = entryFunction->argument;
-    while (argument != NULL)
-    {
-        if (argument->modifier == HLSLArgumentModifier_Out && argument->semantic)
-            OutputSetOutAttribute(argument->semantic, GetSafeIdentifierName(argument->name));
-
-        argument = argument->nextArgument;
-    }
-
-    if (entryFunction->returnType.baseType == HLSLBaseType_UserDefined)
-    {
-        HLSLStruct* structDeclaration = FindStruct(root, entryFunction->returnType.typeName);
-        ASSERT(structDeclaration != NULL);
-        HLSLStructField* field = structDeclaration->field;
-        while (field != NULL)
-        {
-            char fieldResultName[1024];
-            String_Printf( fieldResultName, sizeof(fieldResultName), "%s.%s", resultName, field->name );
-            OutputSetOutAttribute( field->semantic, fieldResultName );
-            field = field->nextField;
-        }
-    }
-    else if (entryFunction->semantic != NULL)
-    {
-        OutputSetOutAttribute(entryFunction->semantic, resultName);
-    }
-
-    m_writer.WriteLine(0, "}");
-}
-
-void GLSLGenerator::OutputDeclaration(HLSLDeclaration* declaration)
-{
-	OutputDeclarationType( declaration->type );
-
-	HLSLDeclaration* lastDecl = nullptr;
-	while( declaration )
-	{
-		if( lastDecl )
-			m_writer.Write( ", " );
-
-		OutputDeclarationBody( declaration->type, GetSafeIdentifierName( declaration->name ) );
-
-		if( declaration->assignment != NULL )
-		{
-			m_writer.Write( " = " );
-			if( declaration->type.array )
-			{
-				m_writer.Write( "%s[]( ", GetTypeName( declaration->type ) );
-				OutputExpressionList( declaration->assignment );
-				m_writer.Write( " )" );
-			}
-			else
-			{
-				OutputExpression( declaration->assignment, &declaration->type );
-			}
-		}
-
-		lastDecl = declaration;
-		declaration = declaration->nextDeclaration;
-	}
-}
-
-void GLSLGenerator::OutputDeclaration(const HLSLType& type, const char* name)
-{
-	OutputDeclarationType( type );
-	OutputDeclarationBody( type, name );
-}
-
-void GLSLGenerator::OutputDeclarationType( const HLSLType& type )
-{
-	if ((type.flags & HLSLTypeFlag_Const) && (type.flags & HLSLTypeFlag_Static))
-	{
-		m_writer.Write("const ");
-	}
-
-	m_writer.Write( "%s ", GetTypeName( type ) );
-}
-
-void GLSLGenerator::OutputDeclarationBody( const HLSLType& type, const char* name )
-{
-	if( !type.array )
-	{
-		m_writer.Write( "%s", GetSafeIdentifierName( name ) );
-	}
-	else
-	{
-		m_writer.Write( "%s[", GetSafeIdentifierName( name ) );
-		if( type.arraySize != NULL )
-		{
-			OutputExpression( type.arraySize );
-		}
-		m_writer.Write( "]" );
-	}
-}
-
-void GLSLGenerator::OutputCast(const HLSLType& type)
-{
-    if (m_version == Version_110 && type.baseType == HLSLBaseType_Float3x3)
-        m_writer.Write("%s", m_matrixCtorFunction);
-    else
-        OutputDeclaration(type, "");
-}
-
-void GLSLGenerator::Error(const char* format, ...)
-{
-    // It's not always convenient to stop executing when an error occurs,
-    // so just track once we've hit an error and stop reporting them until
-    // we successfully bail out of execution.
-    if (m_error)
-    {
-        return;
-    }
-    m_error = true;
-
-    va_list arg;
-    va_start(arg, format);
-    Log_ErrorArgList(format, arg);
-    va_end(arg);
-} 
-
-const char* GLSLGenerator::GetSafeIdentifierName(const char* name) const
-{
-    for (int i = 0; i < s_numReservedWords; ++i)
-    {
-        if (String_Equal(s_reservedWord[i], name))
-        {
-            return m_reservedWord[i];
-        }
-    }
-    return name;
-}
-
-bool GLSLGenerator::ChooseUniqueName(const char* base, char* dst, int dstLength) const
-{
-    for (int i = 0; i < 1024; ++i)
-    {
-        String_Printf(dst, dstLength, "%s%d", base, i);
-        if (!m_tree->GetContainsString(dst))
-        {
-            return true;
-        }
-    }
-    return false;
-}
-
-const char* GLSLGenerator::GetBuiltInSemantic(const char* semantic, AttributeModifier modifier, int* outputIndex)
-{
-    if (outputIndex)
-        *outputIndex = -1;
-
-    if (m_target == Target_VertexShader && modifier == AttributeModifier_Out && String_Equal(semantic, "POSITION"))
-        return "gl_Position";
-
-    if (m_target == Target_VertexShader && modifier == AttributeModifier_Out && String_Equal(semantic, "SV_Position"))
-        return "gl_Position";
-
-    if (m_target == Target_VertexShader && modifier == AttributeModifier_Out && String_Equal(semantic, "PSIZE"))
-        return "gl_PointSize";
-
-    if (m_target == Target_VertexShader && modifier == AttributeModifier_In && String_Equal(semantic, "SV_InstanceID"))
-        return "gl_InstanceID";
-
-    if (m_target == Target_FragmentShader && modifier == AttributeModifier_Out && String_Equal(semantic, "SV_Depth"))
-        return "gl_FragDepth";
-
-    if (m_target == Target_FragmentShader && modifier == AttributeModifier_In && String_Equal(semantic, "SV_Position"))
-        return "gl_FragCoord";
-
-    if (m_target == Target_FragmentShader && modifier == AttributeModifier_Out)
-    {
-        int index = -1;
-
-        if (strncmp(semantic, "COLOR", 5) == 0)
-            index = atoi(semantic + 5);
-        else if (strncmp(semantic, "SV_Target", 9) == 0)
-            index = atoi(semantic + 9);
-
-        if (index >= 0)
-        {
-            if (m_outputTargets <= index)
-                m_outputTargets = index + 1;
-
-            if (outputIndex)
-                *outputIndex = index;
-
-            return m_versionLegacy ? "gl_FragData" : "rast_FragData";
-        }
-    }
-
-    return NULL;
-}
-
-}
diff --git a/hlslparser/src/GLSLGenerator.h b/hlslparser/src/GLSLGenerator.h
deleted file mode 100644
index ecd727c4..00000000
--- a/hlslparser/src/GLSLGenerator.h
+++ /dev/null
@@ -1,164 +0,0 @@
-//=============================================================================
-//
-// Render/GLSLGenerator.h
-//
-// Created by Max McGuire (max@unknownworlds.com)
-// Copyright (c) 2013, Unknown Worlds Entertainment, Inc.
-//
-//=============================================================================
-
-#ifndef GLSL_GENERATOR_H
-#define GLSL_GENERATOR_H
-
-#include "CodeWriter.h"
-#include "HLSLTree.h"
-
-namespace M4
-{
-
-class GLSLGenerator
-{
-
-public:
-    enum Target
-    {
-        Target_VertexShader,
-        Target_FragmentShader,
-    };
-
-    enum Version
-    {
-        Version_110, // OpenGL 2.0
-        Version_140, // OpenGL 3.1
-        Version_150, // OpenGL 3.2
-        Version_100_ES, // OpenGL ES 2.0
-        Version_300_ES, // OpenGL ES 3.0
-    };
-
-    enum Flags
-    {
-        Flag_FlipPositionOutput = 1 << 0,
-        Flag_EmulateConstantBuffer = 1 << 1,
-        Flag_PackMatrixRowMajor = 1 << 2,
-        Flag_LowerMatrixMultiplication = 1 << 3,
-    };
-
-    struct Options
-    {
-        unsigned int flags;
-        const char* constantBufferPrefix;
-
-        Options()
-        {
-            flags = 0;
-            constantBufferPrefix = "";
-        }
-    };
-
-    GLSLGenerator();
-    
-    bool Generate(HLSLTree* tree, Target target, Version versiom, const char* entryName, const Options& options = Options());
-    const char* GetResult() const;
-
-private:
-
-    enum AttributeModifier
-    {
-        AttributeModifier_In,
-        AttributeModifier_Out,
-    };
-
-    void OutputExpressionList(HLSLExpression* expression, HLSLArgument* argument = NULL);
-    void OutputExpression(HLSLExpression* expression, const HLSLType* dstType = NULL);
-    void OutputIdentifier(const char* name);
-    void OutputArguments(HLSLArgument* argument);
-    
-    /**
-     * If the statements are part of a function, then returnType can be used to specify the type
-     * that a return statement is expected to produce so that correct casts will be generated.
-     */
-    void OutputStatements(int indent, HLSLStatement* statement, const HLSLType* returnType = NULL);
-
-    void OutputAttribute(const HLSLType& type, const char* semantic, AttributeModifier modifier);
-    void OutputAttributes(HLSLFunction* entryFunction);
-    void OutputEntryCaller(HLSLFunction* entryFunction);
-    void OutputDeclaration(HLSLDeclaration* declaration);
-	void OutputDeclarationType( const HLSLType& type );
-	void OutputDeclarationBody( const HLSLType& type, const char* name );
-    void OutputDeclaration(const HLSLType& type, const char* name);
-    void OutputCast(const HLSLType& type);
-
-    void OutputSetOutAttribute(const char* semantic, const char* resultName);
-
-    void LayoutBuffer(HLSLBuffer* buffer, unsigned int& offset);
-    void LayoutBuffer(const HLSLType& type, unsigned int& offset);
-    void LayoutBufferElement(const HLSLType& type, unsigned int& offset);
-    void LayoutBufferAlign(const HLSLType& type, unsigned int& offset);
-
-    HLSLBuffer* GetBufferAccessExpression(HLSLExpression* expression);
-    void OutputBufferAccessExpression(HLSLBuffer* buffer, HLSLExpression* expression, const HLSLType& type, unsigned int postOffset);
-    unsigned int OutputBufferAccessIndex(HLSLExpression* expression, unsigned int postOffset);
-
-    void OutputBuffer(int indent, HLSLBuffer* buffer);
-
-    HLSLFunction* FindFunction(HLSLRoot* root, const char* name);
-    HLSLStruct* FindStruct(HLSLRoot* root, const char* name);
-
-    void Error(const char* format, ...);
-
-    /** GLSL contains some reserved words that don't exist in HLSL. This function will
-     * sanitize those names. */
-    const char* GetSafeIdentifierName(const char* name) const;
-
-    /** Generates a name of the format "base+n" where n is an integer such that the name
-     * isn't used in the syntax tree. */
-    bool ChooseUniqueName(const char* base, char* dst, int dstLength) const;
-
-    const char* GetBuiltInSemantic(const char* semantic, AttributeModifier modifier, int* outputIndex = 0);
-    const char* GetAttribQualifier(AttributeModifier modifier);
-
-private:
-
-    static const int    s_numReservedWords = 7;
-    static const char*  s_reservedWord[s_numReservedWords];
-
-    CodeWriter          m_writer;
-
-    HLSLTree*           m_tree;
-    const char*         m_entryName;
-    Target              m_target;
-    Version             m_version;
-    bool                m_versionLegacy;
-    Options             m_options;
-
-    bool                m_outputPosition;
-    int                 m_outputTargets;
-
-    const char*         m_outAttribPrefix;
-    const char*         m_inAttribPrefix;
-
-    char                m_matrixRowFunction[64];
-    char                m_matrixCtorFunction[64];
-    char                m_matrixMulFunction[64];
-    char                m_clipFunction[64];
-    char                m_tex2DlodFunction[64];
-    char                m_tex2DbiasFunction[64];
-    char                m_tex2DgradFunction[64];
-    char                m_tex3DlodFunction[64];
-    char                m_texCUBEbiasFunction[64];
-	char                m_texCUBElodFunction[ 64 ];
-    char                m_scalarSwizzle2Function[64];
-    char                m_scalarSwizzle3Function[64];
-    char                m_scalarSwizzle4Function[64];
-    char                m_sinCosFunction[64];
-	char                m_bvecTernary[ 64 ];
-
-    bool                m_error;
-
-    char                m_reservedWord[s_numReservedWords][64];
-
-};
-
-}
-
-#endif
\ No newline at end of file
diff --git a/hlslparser/src/HLSLTokenizer.cpp b/hlslparser/src/HLSLTokenizer.cpp
index 247bec39..5643763c 100644
--- a/hlslparser/src/HLSLTokenizer.cpp
+++ b/hlslparser/src/HLSLTokenizer.cpp
@@ -660,7 +660,12 @@ void HLSLTokenizer::GetTokenName(char buffer[s_maxIdentifier]) const
     // TODO: short/ushort/uint
     else if (m_token == HLSLToken_Identifier)
     {
+#ifdef WIN32
+        strncpy(buffer, m_identifier, sizeof(buffer));
+        buffer[s_maxIdentifier-1] = 0;
+#else
         strlcpy(buffer, m_identifier, s_maxIdentifier); // TODO: Alec, put in alt strlcpy call for Win
+#endif
     }
     else
     {

From 5faf275d8aaf806c747d4f4ea3ece5f4567f61ba Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 14 Mar 2023 00:06:38 -0700
Subject: [PATCH 473/901] kram - hlslparser win build

---
 CMakeLists.txt            | 23 ++++++++---------------
 hlslparser/CMakeLists.txt |  2 +-
 hlslparser/src/Main.cpp   |  3 +++
 3 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 82bae726..d4da143e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -115,16 +115,15 @@ if (BUILD_MAC)
         OUTPUT_STRIP_TRAILING_WHITESPACE
     )
     
-    if (BUILD_MAC)
-        message("macOS SDK ${SDK_VERSION}")
-        message("macOS deploy ${CMAKE_OSX_DEPLOYMENT_TARGET}")
-        message("macOS arch ${CMAKE_OSX_ARCHITECTURES}")
-                        
-        if (SDK_VERSION VERSION_LESS XCODE_MIN_SDK_MACOS)
-            message(FATAL_ERROR "This project requires at least macOS SDK ${XCODE_MIN_SDK_MACOS}"
-        )
-        endif()
+    message("macOS SDK ${SDK_VERSION}")
+    message("macOS deploy ${CMAKE_OSX_DEPLOYMENT_TARGET}")
+    message("macOS arch ${CMAKE_OSX_ARCHITECTURES}")
+                    
+    if (SDK_VERSION VERSION_LESS XCODE_MIN_SDK_MACOS)
+        message(FATAL_ERROR "This project requires at least macOS SDK ${XCODE_MIN_SDK_MACOS}"
+    )
     endif()
+    
 endif()
 
 #-----------------------------------------------------
@@ -132,12 +131,6 @@ endif()
 # was considering platform-specific builds, but mac/win don't conflict
 set(BIN_DIR ${PROJECT_SOURCE_DIR}/bin)
     
-#if (BUILD_MAC)
-#    set(BIN_DIR ${PROJECT_SOURCE_DIR}/bin/mac)
-#elseif (BUILD_WIN)
-#    set(BIN_DIR ${PROJECT_SOURCE_DIR}/bin/win)
-#endif()
-
 # So by default install depends on ALL_BUILD target, but that will fail if plugin
 # does not have everything setup to build (or like now is not building).
 # The plugin is currently setting EXCLUDE_FROM_ALL on the target so it's not built.
diff --git a/hlslparser/CMakeLists.txt b/hlslparser/CMakeLists.txt
index c9420031..a965ddc6 100644
--- a/hlslparser/CMakeLists.txt
+++ b/hlslparser/CMakeLists.txt
@@ -45,7 +45,7 @@ if (BUILD_MAC)
     set_target_properties(${myTargetApp} PROPERTIES
          # Note: match this up with CXX version
         # c++11 min
-        XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD "c++14"
+        XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD "c++20"
         XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++"
 
         # removed this in case run on Rosetta.  Not sure if this is Intel/Arm64 build.
diff --git a/hlslparser/src/Main.cpp b/hlslparser/src/Main.cpp
index 9b783a6c..cf49696b 100644
--- a/hlslparser/src/Main.cpp
+++ b/hlslparser/src/Main.cpp
@@ -199,6 +199,8 @@ int main( int argc, char* argv[] )
         outputFileName += ".hlsl";
     }
     
+    // Win build on github is failing on this, so skip for now
+#ifndef WIN32
     // find  full pathname of the fileName, so that errors are logged
     // in way that can be clicked to. absolute includes .. in it, canonical does not.
     auto path = filesystem::path(fileName);
@@ -216,6 +218,7 @@ int main( int argc, char* argv[] )
             return 1;
         }
     }
+#endif
     
     //------------------------------------
     // Now start the work

From 07a4b55c58f6d63d45646d3fbd267e0bd3a4f324 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 14 Mar 2023 00:23:41 -0700
Subject: [PATCH 474/901] kram - hlslparser win build

---
 CMakeLists.txt | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index d4da143e..1b604ead 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -78,16 +78,19 @@ add_subdirectory(libkram)
 # the CLI app for Mac/Win that can build content for other platforms, uses libkram
 add_subdirectory(kramc)
 
-# the viewer is only written for macOS Intel/ARM currently, uses libkram
 if (BUILD_MAC)
+    # the viewer is only written for macOS Intel/ARM currently, uses libkram
     add_subdirectory(kramv)
-endif()
 
-# ps plugin that uses libkram
-if (BUILD_MAC)
+    # ps plugin that uses libkram
     add_subdirectory(plugin)
 endif()
 
+# hack hlslparser for win build into kram for now
+if (BUID_WIN)
+    add_subdirectory(hlslparser)
+endif()
+
 #-----------------------------------------------------
 
 # https://discourse.cmake.org/t/specifying-cmake-osx-sysroot-breaks-xcode-projects-but-no-other-choice/2532/8
@@ -154,8 +157,6 @@ endif()
 
 #-----------------------------------------------------
 
-# hack hlslparser for win build into kram for now
-add_subdirectory(hlslparser)
 
 # hlslparser is also now in the kram build.  Keep executables up to date.
 # I would use the sln file, but msbuild doesn't like to be called from cibuld.sh

From 4aa9c2ece18501a130f6bc8b205ba4861687252c Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 14 Mar 2023 00:32:57 -0700
Subject: [PATCH 475/901] kram - hlslparser win build

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1b604ead..79776517 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -87,7 +87,7 @@ if (BUILD_MAC)
 endif()
 
 # hack hlslparser for win build into kram for now
-if (BUID_WIN)
+if (BUILD_WIN)
     add_subdirectory(hlslparser)
 endif()
 

From 6799d0cf9ee7f0ab4fc4a463639dc13891072be1 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 14 Mar 2023 01:07:55 -0700
Subject: [PATCH 476/901] kram - hlslparser update win

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 79776517..53194464 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -161,7 +161,7 @@ endif()
 # hlslparser is also now in the kram build.  Keep executables up to date.
 # I would use the sln file, but msbuild doesn't like to be called from cibuld.sh
 if (BUILD_WIN)
-    install(TARGETS hlslparser ARCHIVE DESTINATION ${BIN_DIR})
+    install(TARGETS hlslparser RUNTIME DESTINATION ${BIN_DIR})
 endif()
 
 
From 82622b6206ce7b42ce409cab15fd4df1e69b3634 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 16 Mar 2023 00:10:00 -0700
Subject: [PATCH 477/901] kram - hlslparser update

Add RWTexture2D, but it's not connected to array ops yet.  These are pretty limited compared to SSBO.
Make Engine use an unordered_set instead of an O(N) search of all strings.
Remove EmulateAlphaTest.
Remove <vector>
---
 hlslparser/README.md             |  46 +++++++++-----
 hlslparser/shaders/Skinning.hlsl |  11 +---
 hlslparser/src/Engine.cpp        | 104 ++++++++++++++++++++++---------
 hlslparser/src/Engine.h          |  13 ++--
 hlslparser/src/HLSLTokenizer.h   |   2 +
 hlslparser/src/HLSLTree.cpp      |  95 +++-------------------------
 hlslparser/src/HLSLTree.h        |   7 ++-
 hlslparser/src/MSLGenerator.cpp  |  18 +++++-
 hlslparser/src/Main.cpp          |  14 ++---
 9 files changed, 154 insertions(+), 156 deletions(-)

diff --git a/hlslparser/README.md b/hlslparser/README.md
index e9a3b017..7c9b4a58 100644
--- a/hlslparser/README.md
+++ b/hlslparser/README.md
@@ -27,22 +27,34 @@ Paths to turn HLSL and SPV
 
 DONE
 * u/int support
+* u/short support, can't interpolate in HLSL
 * SSBO support
 * compute shader support
-* DX11/12 style syntax
-* chop out FX syntax
-* split out sampler / texture
+* HLSL2021 style syntax
+* chop out FX and GLSL support, can use spirv-cross for GLSL
+* split sampler and texture
 * handle depth textures 
 * compile HLSL with DXC to SPV
 * compile MSL with metalc to AIR/metallib
 
 TODO:
-* get tile shader kernels to work, may be MSL and Android SPV specific
+* u/int64_tN support
+* double support - not in MSL, can't interpolate vs/ps must pack to uint
+# RWTexture
+* more than half/float/int literals (f.e. uint)
+* ray-tracing kernels
+* passing variables only by value in HLSL vs. value/ref/ptr in MSL
+* argument buffers and descriptor sets (root tables for DX?)
+* halfio/2/3/4 type for Nvidia/Adreno, halfst2/3/4 for storage
+* specialization and push constants for variants (MSL/SPV only)
+* numgroups designator for DX kernel
+* tile shader kernels - may be MSL and Android SPV specific
+* triangulation shaders (geom, mesh, hull/etc) - platform specific
 * generate reflection data from parse of HLSL
 * handle reflection (spirv-reflect?)
 * handle HLSL vulkan extension constructs, convert these to MSL kernels too
-* variants from preprocess or vulkan specialization constants
-* fix shaders to not structify metal and mod the source names
+* preprocessor for handling platform specifics, and variants
+* fix shaders to not structify metal and mod the source names, turn on written, currently handling globals.  Could require passing elements from main shader.
 * poor syntax highlighting of output .metal file, does Xcode have to compile?
 * no syntax highlighting of .hlsl files in Xcode, but VSCode has HLSL but not MSL
 
@@ -83,14 +95,13 @@ Overview
 
 Dealing with Half
 ---
-HLSL 6.2 includes full half and int support.   So that is the compilation target.  Note table below before adopting half in shaders.  Nvidia/AMD tried to phase out half support on DX10, but iOS re-popularized half usage.  Android and Nvidia have many dragons using half (see below).  Half is only 10-bit mantissa with 5-bit exponent.  
+HLSL2021 6.2 includes full half and int support.   So that is the compilation target.  Note table below before adopting half in shaders.  Nvidia/AMD tried to phase out half support on DX10 era, but iOS re-popularized half usage.  Android and Nvidia have many dragons using half (see below).  Half is only 10-bit mantissa with 5-bit exponent.  Even the Mali table below is only all full on more recent hardware.
 
-Platforms - iOS/PowerVR, Adreno, Mali,   Nvida, AMD, Intel
-
-| Feature        | I | A | M | | N | A | I |
+| Platforms      | iOS/PowerVR, Mali, Adreno | Nvida, AMD, Intel |
+| Feature        | I | M | A | | N | A | I |
 |----------------|---|---|---|-|---|---|---|
-| Half Interp    | y | n | y | | n | y | ? |
-| Half UBO       | y | n | y | | n | y | ? | 
+| Half Interp    | y | y | n | | n | y | ? |
+| Half UBO       | y | y | n | | n | y | ? | 
 | Half Push      | y | y | y | | y | n | ? |
 | Half ALU       | y | y | y | | y | y | ? |
 
@@ -101,12 +112,13 @@ https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_16bit_s
 * StoragePushConstant16
 * StorageInputOutput16
 
-There is also the limitation of half interpolation creating banding, and likely why Adreno/Nvidia do not support StorageInputOutput16.  Mali recommends using half to minimize parameter buffer storage out of the vertex shader, but then declaring float for the same variables in the fragment shader.  This limits sharing input/output structs.
+* There is also the limitation of half interpolation creating banding, and likely why Adreno/Nvidia do not support StorageInputOutput16.  Mali recommends using half to minimize parameter buffer storage out of the vertex shader, but then declaring float for the same variables in the fragment shader.  This limits sharing input/output structs.
+
+* Adreno also doesn't support half storage, so this limits SSBO and UBO usage.   
 
-macOS on M1 - Rosetta2 lacks f16c cpu support, so translated x64 apps crash.
-  build Apple Silicon
+* macOS on M1 - Rosetta2 lacks AVX and f16c cpu support, so translated x64 apps crash. Build Apple Silicon to fix this.
 
-Android missing cpu arm64+f16 support from Redmi Note 8 and other chips.
+* Android missing cpu arm64+f16 support from Redmi Note 8 and other chips.
   vcvt_f32_f16 is still present without this.
   
 
@@ -361,7 +373,7 @@ GLSL/ES (WebGL)
 * https://www.khronos.org/files/webgl20-reference-guide.pdf
 
 WGSL (WebGPU)
-* WebGPU shading language orignally meant as text form of spriv
+* WebGPU shading language originally meant as text form of spriv
 * full compute support
 * now using Dart like syntax completely unlike CG origin of other languages
 * avoids pointers/references
diff --git a/hlslparser/shaders/Skinning.hlsl b/hlslparser/shaders/Skinning.hlsl
index d6784449..0ff5515d 100644
--- a/hlslparser/shaders/Skinning.hlsl
+++ b/hlslparser/shaders/Skinning.hlsl
@@ -9,10 +9,11 @@
 // https://github.com/microsoft/DirectXShaderCompiler/blob/main/docs/SPIR-V.rst
 
 // setup specialization
-// HLSL:
+// HLSL: at beginning
 // [[vk::constant_id(0)]] const int   specConstInt  = 1;
 // [[vk::constant_id(1)]] const bool  specConstBool  = true;
-// MSL:
+//
+// MSL: at end
 // constant bool a [[function_constant(0)]];
 // constant int  a [[function_constant(1)]]; // 0.. 64K-1
 
@@ -104,9 +105,6 @@ struct UniformsStruct
 ConstantBuffer<UniformsStruct> uniforms : register(b0);
 
 // can have 14x 64K limit to each cbuffer, 128 tbuffers,
-
-// Example
-// uniforms.skinTfms
  
 // Structured buffers
 struct StructuredStruct
@@ -117,9 +115,6 @@ struct StructuredStruct
 
 StructuredBuffer<StructuredStruct> bufferTest0 : register(t2);
 
-/*
-// RWStructuredBuffer<StructuredStruct> rwBufferTest0 : register(u0);
-*/
 
 Texture2D<half4> tex : register(t1);
 SamplerState samplerClamp : register(s0);
diff --git a/hlslparser/src/Engine.cpp b/hlslparser/src/Engine.cpp
index d63ed82c..748ec82b 100755
--- a/hlslparser/src/Engine.cpp
+++ b/hlslparser/src/Engine.cpp
@@ -5,6 +5,8 @@
 #include <string.h> // strcmp, strcasecmp
 #include <stdlib.h>	// strtod, strtol
 
+// this is usually just an unordered_map internally
+#include <unordered_set>
 
 namespace M4 {
 
@@ -129,30 +131,71 @@ void Log_ErrorArgList(const char * format, va_list args) {
 
 // Engine/StringPool.cpp
 
-StringPool::StringPool(Allocator * allocator) : stringArray(allocator) {
+// Taken from Alec's HashHelper.h
+
+// case sensitive fnv1a hash, can pass existing hash to continue a hash
+inline uint32_t HashFnv1a(const char* val, uint32_t hash = 0x811c9dc5) {
+    const uint32_t prime  = 0x01000193; // 16777619 (32-bit)
+    while (*val) {
+        hash = (hash * prime) ^ (uint32_t)*val++;
+    }
+    return hash;
+}
+
+// this compares string stored as const char*
+struct CompareStrings
+{
+    template <class _Tp>
+    bool operator()(const _Tp& __x, const _Tp& __y) const
+    { return strcmp( __x, __y ) == 0; }
+    
+    template <class _Tp>
+    size_t operator()(const _Tp& __x) const {
+        // assumes 32-bit hash to int64 conversion here
+        return (size_t)HashFnv1a(__x);
+    }
+};
+
+using StringPoolSet = std::unordered_set<const char*, CompareStrings, CompareStrings>;
+
+#define CastImpl(imp) (StringPoolSet*)imp
+
+StringPool::StringPool(Allocator * allocator) {
+    // allocator not used
+    
+    m_impl = new StringPoolSet();
 }
 StringPool::~StringPool() {
-    for (int i = 0; i < stringArray.GetSize(); i++) {
-        free((void *)stringArray[i]);
-        stringArray[i] = NULL;
+    auto* impl = CastImpl(m_impl);
+    
+    // TODO: fix
+    // delete the strings
+    for (auto& it : *impl) {
+        const char* text = it;
+        free((char*)text);
     }
+    
+    delete impl;
 }
 
-const char * StringPool::AddString(const char * string) {
-    for (int i = 0; i < stringArray.GetSize(); i++) {
-        if (String_Equal(stringArray[i], string)) return stringArray[i];
-    }
+const char * StringPool::AddString(const char * text) {
+    auto* impl = CastImpl(m_impl);
+    auto it = impl->find(text);
+    if (it != impl->end())
+        return *it;
+    
+    // _strdup doesn't go through allocator either
 #if _MSC_VER
-    const char * dup = _strdup(string);
+    const char * dup = _strdup(text);
 #else
-    const char * dup = strdup(string);
+    const char * dup = strdup(text);
 #endif
-    stringArray.PushBack(dup);
+    
+    impl->insert(dup);
     return dup;
 }
 
-// @@ From mprintf.cpp
-static char* mprintf_valist(const char* fmt, va_list args) {
+const char* StringPool::PrintFormattedVaList(const char* fmt, va_list args) {
     char* res = nullptr;
     
     va_list tmp;
@@ -164,32 +207,39 @@ static char* mprintf_valist(const char* fmt, va_list args) {
     int len = vsnprintf(nullptr, 0, fmt, tmp);
     if (len >= 0)
     {
-        res = new char[len+1];
+        res = (char*)malloc(len+1);
         vsnprintf(res, len+1, fmt, tmp);
     }
     va_end(tmp);
 
+    // caller responsible for freeing mem
     return res;
 }
 
 const char * StringPool::AddStringFormatList(const char * format, va_list args) {
+    // don't format if no tokens
     va_list tmp;
     va_copy(tmp, args);
-    const char * string = mprintf_valist(format, tmp);
+    const char * text = PrintFormattedVaList(format, tmp);
     va_end(tmp);
 
-    for (int i = 0; i < stringArray.GetSize(); i++) {
-        if (String_Equal(stringArray[i], string)) {
-            delete [] string;
-            return stringArray[i];
-        }
+    auto* impl = CastImpl(m_impl);
+    
+    // add it if not found
+    auto it = impl->find(text);
+    if (it == impl->end())
+    {
+        impl->insert(text);
+        return text;
     }
-
-    stringArray.PushBack(string);
-    return string;
+    
+    // allocated inside PrintFormattedVaList
+    free((char*)text);
+    return *it;
 }
 
 const char * StringPool::AddStringFormat(const char * format, ...) {
+    // TODO: don't format if no tokens
     va_list args;
     va_start(args, format);
     const char * string = AddStringFormatList(format, args);
@@ -198,11 +248,9 @@ const char * StringPool::AddStringFormat(const char * format, ...) {
     return string;
 }
 
-bool StringPool::GetContainsString(const char * string) const {
-    for (int i = 0; i < stringArray.GetSize(); i++) {
-        if (String_Equal(stringArray[i], string)) return true;
-    }
-    return false;
+bool StringPool::GetContainsString(const char * text) const {
+    const auto* impl = CastImpl(m_impl);
+    return impl->find(text) != impl->end();
 }
 
 } // M4 namespace
diff --git a/hlslparser/src/Engine.h b/hlslparser/src/Engine.h
index c4bfb033..8510c34e 100755
--- a/hlslparser/src/Engine.h
+++ b/hlslparser/src/Engine.h
@@ -33,6 +33,10 @@ namespace M4 {
 
 // Engine/Allocator.h
 
+// This doesn't do placement new/delete, but is only
+// use to allocate NodePage and StringPool.  Then placement
+// new/delete is called explicitly by say NewNode.  So
+// there default ctor variable initializers are safe to use.
 class Allocator {
 public:
     template <typename T> T * New() {
@@ -187,12 +191,13 @@ struct StringPool {
     StringPool(Allocator * allocator);
     ~StringPool();
 
-    const char * AddString(const char * string);
+    const char * AddString(const char * text);
     const char * AddStringFormat(const char * fmt, ...) M4_PRINTF_ATTR(2, 3);
     const char * AddStringFormatList(const char * fmt, va_list args);
-    bool GetContainsString(const char * string) const;
-
-    Array<const char *> stringArray;
+    bool GetContainsString(const char * text) const;
+private:
+    const char*PrintFormattedVaList(const char* fmt, va_list args);
+    void* m_impl = NULL;
 };
 
 
diff --git a/hlslparser/src/HLSLTokenizer.h b/hlslparser/src/HLSLTokenizer.h
index 6ed5de18..ad2a8415 100644
--- a/hlslparser/src/HLSLTokenizer.h
+++ b/hlslparser/src/HLSLTokenizer.h
@@ -72,6 +72,8 @@ enum HLSLToken
     HLSLToken_DepthCube,
     // TODO: other depth types
     
+    HLSLToken_RWTexture2D,
+    
     HLSLToken_SamplerState,
     HLSLToken_SamplerComparisonState,
     
diff --git a/hlslparser/src/HLSLTree.cpp b/hlslparser/src/HLSLTree.cpp
index db1c75e9..2231d134 100644
--- a/hlslparser/src/HLSLTree.cpp
+++ b/hlslparser/src/HLSLTree.cpp
@@ -1300,7 +1300,7 @@ void AddSingleStatement(HLSLRoot * root, HLSLStatement * before, HLSLStatement *
 }
 
 
-
+/* *X file releated
 // @@ This is very game-specific. Should be moved to pipeline_parser or somewhere else.
 void GroupParameters(HLSLTree * tree)
 {
@@ -1409,13 +1409,13 @@ void GroupParameters(HLSLTree * tree)
                 }
             }
         }
-        /*else
-        {
-            if (statementBeforeBuffers == NULL) {
-                // This is the location where we will insert our buffers.
-                statementBeforeBuffers = previousStatement;
-            }
-        }*/
+        //else
+        //{
+        //if (statementBeforeBuffers == NULL) {
+        //        This is the location where we will insert our buffers.
+        //        statementBeforeBuffers = previousStatement;
+        //    }
+        //}
 
         if (statement->nextStatement == nextStatement) {
             previousStatement = statement;
@@ -1486,7 +1486,7 @@ void GroupParameters(HLSLTree * tree)
         AddSingleStatement(root, statementBeforeBuffers, perPassBuffer);
     }
 }
-
+*/
 
 class FindArgumentVisitor : public HLSLTreeVisitor
 {
@@ -1544,83 +1544,6 @@ void HideUnusedArguments(HLSLFunction * function)
     }
 }
 
-bool EmulateAlphaTest(HLSLTree* tree, const char* entryName, float alphaRef/*=0.5*/)
-{
-    // Find all return statements of this entry point.
-    HLSLFunction* entry = tree->FindFunction(entryName);
-    if (entry != NULL)
-    {
-        HLSLStatement ** ptr = &entry->statement;
-        HLSLStatement * statement = entry->statement;
-        while (statement != NULL)
-        {
-            if (statement->nodeType == HLSLNodeType_ReturnStatement)
-            {
-                HLSLReturnStatement * returnStatement = (HLSLReturnStatement *)statement;
-                HLSLBaseType returnType = returnStatement->expression->expressionType.baseType;
-                
-                // Build statement: "if (%s.a < 0.5) discard;"
-
-                HLSLDiscardStatement * discard = tree->AddNode<HLSLDiscardStatement>(statement->fileName, statement->line);
-                
-                HLSLExpression * alpha = NULL;
-                if (returnType == HLSLBaseType_Float4 || returnType == HLSLBaseType_Half4)
-                {
-                    // @@ If return expression is a constructor, grab 4th argument.
-                    // That's not as easy, since we support 'float4(float3, float)' or 'float4(float, float3)', extracting
-                    // the latter is not that easy.
-                    /*if (returnStatement->expression->nodeType == HLSLNodeType_ConstructorExpression) {
-                        HLSLConstructorExpression * constructor = (HLSLConstructorExpression *)returnStatement->expression;
-                        //constructor->
-                    }
-                    */
-                    
-                    if (alpha == NULL) {
-                        HLSLMemberAccess * access = tree->AddNode<HLSLMemberAccess>(statement->fileName, statement->line);
-                        access->expressionType = HLSLType(HLSLBaseType_Float);
-                        access->object = returnStatement->expression;     // @@ Is reference OK? Or should we clone expression?
-                        access->field = tree->AddString("a");
-                        access->swizzle = true;
-                        
-                        alpha = access;
-                    }
-                }
-                else if (returnType == HLSLBaseType_Float || returnType == HLSLBaseType_Half)
-                {
-                    alpha = returnStatement->expression;     // @@ Is reference OK? Or should we clone expression?
-                }
-                else
-                {
-                    return false;
-                }
-                
-                HLSLLiteralExpression * threshold = tree->AddNode<HLSLLiteralExpression>(statement->fileName, statement->line);
-                threshold->expressionType = HLSLType(HLSLBaseType_Float);
-                threshold->fValue = alphaRef;
-                threshold->type = HLSLBaseType_Float;
-                
-                HLSLBinaryExpression * condition = tree->AddNode<HLSLBinaryExpression>(statement->fileName, statement->line);
-                condition->expressionType = HLSLType(HLSLBaseType_Bool);
-                condition->binaryOp = HLSLBinaryOp_Less;
-                condition->expression1 = alpha;
-                condition->expression2 = threshold;
-
-                // Insert statement.
-                HLSLIfStatement * st = tree->AddNode<HLSLIfStatement>(statement->fileName, statement->line);
-                st->condition = condition;
-                st->statement = discard;
-                st->nextStatement = statement;
-                *ptr = st;
-            }
-        
-            ptr = &statement->nextStatement;
-            statement = statement->nextStatement;
-        }
-    }
-
-    return true;
-}
-
 bool NeedsFlattening(HLSLExpression * expr, int level = 0) {
     if (expr == NULL) {
         return false;
diff --git a/hlslparser/src/HLSLTree.h b/hlslparser/src/HLSLTree.h
index 87ba02d3..79d7399a 100644
--- a/hlslparser/src/HLSLTree.h
+++ b/hlslparser/src/HLSLTree.h
@@ -131,6 +131,8 @@ enum HLSLBaseType
     HLSLBaseType_DepthCube,
     // TODO: add more depth types as needed (pair with SamplerComparisonState)
     
+    HLSLBaseType_RWTexture2D,
+    
     // Only 2 sampler types.
     HLSLBaseType_SamplerState,
     HLSLBaseType_SamplerComparisonState,
@@ -338,7 +340,7 @@ struct HLSLType
         baseType    = _baseType;
     }
     HLSLBaseType        baseType = HLSLBaseType_Unknown;
-    HLSLBaseType        textureType = HLSLBaseType_Float;    // Half or Float
+    HLSLBaseType        formatType = HLSLBaseType_Float;    // Half or Float (rename to formatType - applies to templated params like buffer/texture)
     const char*         typeName = NULL;       // For user defined types.
     bool                array = false;
     HLSLExpression*     arraySize = NULL;
@@ -826,9 +828,8 @@ class HLSLTreeVisitor
 // Tree transformations:
 extern void PruneTree(HLSLTree* tree, const char* entryName0, const char* entryName1 = NULL);
 extern void SortTree(HLSLTree* tree);
-extern void GroupParameters(HLSLTree* tree);
+//extern void GroupParameters(HLSLTree* tree);
 extern void HideUnusedArguments(HLSLFunction * function);
-extern bool EmulateAlphaTest(HLSLTree* tree, const char* entryName, float alphaRef = 0.5f);
 extern void FlattenExpressions(HLSLTree* tree);
     
 } // M4
diff --git a/hlslparser/src/MSLGenerator.cpp b/hlslparser/src/MSLGenerator.cpp
index 7c7e4a41..a25309dd 100644
--- a/hlslparser/src/MSLGenerator.cpp
+++ b/hlslparser/src/MSLGenerator.cpp
@@ -134,7 +134,6 @@ namespace M4
         
         // Note sure if/where to add these calls.  Just wanted to point
         // out that nothing is calling them, but could be useful.
-        //EmulateAlphaTest(tree, entryName, 0.5f);
         FlattenExpressions(tree);
         
         HLSLRoot* root = tree->GetRoot();
@@ -2037,7 +2036,7 @@ namespace M4
         {
             if (String_Equal(semantic, "SV_Position"))
                 return "position";
-            
+        
             // PSIZE is non-square in DX9, and square in DX10 (and MSL)
             // https://github.com/KhronosGroup/glslang/issues/1154
             if (String_Equal(semantic, "PSIZE"))
@@ -2068,6 +2067,15 @@ namespace M4
                     return "color(0), index(1)";
             }
 
+            // This is only in A14 and higher
+            if (String_Equal(semantic, "SV_Berycentrics"))
+                return "barycentric_coord";
+            
+            // Is there an HLSL euivalent.  Have vulkan ext for PointSize
+            // "point_coord"
+            
+            // "primitive_id"
+            
             if (strncmp(semantic, "SV_Target", length) == 0)
             {
                 return m_tree->AddStringFormat("color(%d)", index);
@@ -2132,7 +2140,7 @@ namespace M4
         {
             // unclear if depth supports half, may have to be float always
             
-            bool isHalfTexture  = promote && type.textureType == HLSLBaseType_Half && !m_options.treatHalfAsFloat;
+            bool isHalfTexture  = promote && IsHalf(type.formatType) && !m_options.treatHalfAsFloat;
             
             // MSL docs state must be float type, but what about D16f texture?
             if (IsDepthTextureType(baseType))
@@ -2151,6 +2159,10 @@ namespace M4
                     return isHalfTexture ? "depth2d_ms<half>" : "depth2d_ms<float>";
                 */
                 
+                // More types than just half/float for this
+                case HLSLBaseType_RWTexture2D:
+                    return isHalfTexture ? "texture2d<half, access::read_write>" : "texture2d<float, access::read_write>";
+                    
                 case HLSLBaseType_Texture2D:
                     return isHalfTexture ? "texture2d<half>" : "texture2d<float>";
                 case HLSLBaseType_Texture2DArray:
diff --git a/hlslparser/src/Main.cpp b/hlslparser/src/Main.cpp
index cf49696b..07684096 100644
--- a/hlslparser/src/Main.cpp
+++ b/hlslparser/src/Main.cpp
@@ -4,7 +4,6 @@
 #include "HLSLGenerator.h"
 #include "MSLGenerator.h"
 
-#include <vector>
 #include <stdio.h>
 #include <sys/stat.h>
 
@@ -250,10 +249,10 @@ int main( int argc, char* argv[] )
     int status = 0;
     
     // build a list of entryPoints
-    vector<const char*> entryPoints;
+    Array<const char*> entryPoints(&allocator);
     if (entryName != nullptr)
     {
-        entryPoints.push_back(entryName);
+        entryPoints.PushBack(entryName);
     }
     else
     {
@@ -268,15 +267,15 @@ int main( int argc, char* argv[] )
                 
                 if (endsWith(name, "VS"))
                 {
-                    entryPoints.push_back(name);
+                    entryPoints.PushBack(name);
                 }
                 else if (endsWith(name, "PS"))
                 {
-                    entryPoints.push_back(name);
+                    entryPoints.PushBack(name);
                 }
                 else if (endsWith(name, "CS"))
                 {
-                    entryPoints.push_back(name);
+                    entryPoints.PushBack(name);
                 }
             }
 
@@ -286,8 +285,9 @@ int main( int argc, char* argv[] )
     
     string output;
     
-    for (auto& entryPoint: entryPoints)
+    for (uint32_t i = 0; i < entryPoints.GetSize(); ++i)
     {
+        const char* entryPoint = entryPoints[i];
         entryName = entryPoint;
         if (endsWith(entryPoint, "VS"))
             target = HLSLTarget_VertexShader;

From 1c1709e81a52d5a84b3b468be19b85c79ded31ab Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 16 Mar 2023 00:20:14 -0700
Subject: [PATCH 478/901] kram - update hlslparser

push buffers, GetFormatName,
---
 hlslparser/src/HLSLGenerator.cpp | 118 ++++++++++++++++++-------------
 hlslparser/src/HLSLGenerator.h   |   2 +
 hlslparser/src/HLSLParser.cpp    |  20 ++++--
 hlslparser/src/HLSLTokenizer.cpp |   2 +
 hlslparser/src/HLSLTree.h        |   3 +-
 5 files changed, 90 insertions(+), 55 deletions(-)

diff --git a/hlslparser/src/HLSLGenerator.cpp b/hlslparser/src/HLSLGenerator.cpp
index 92025f60..146a9001 100644
--- a/hlslparser/src/HLSLGenerator.cpp
+++ b/hlslparser/src/HLSLGenerator.cpp
@@ -20,13 +20,14 @@ const char* HLSLGenerator::GetTypeName(const HLSLType& type)
 {
     HLSLBaseType baseType = type.baseType;
     
+    if (baseType == HLSLBaseType_UserDefined)
+        return type.typeName;
+    
+    // TODO: these can all just use a table entry, have another slot for MSL
     const char* name = GetNumericTypeName(baseType);
     if (name)
         return name;
     
-    if (baseType == HLSLBaseType_UserDefined)
-        return type.typeName;
-    
     // Functions can return void, especially with compute
     if (baseType == HLSLBaseType_Void)
         return "void";
@@ -50,16 +51,19 @@ const char* HLSLGenerator::GetTypeName(const HLSLType& type)
         {
             // depth textures just use Texture2D typedef
             // TODO: add ms, others
-            case HLSLBaseType_Depth2D:           return "Depth2D";
-            case HLSLBaseType_Depth2DArray:      return "Depth2DArray";
-            case HLSLBaseType_DepthCube:         return "DepthCube";
-           
             case HLSLBaseType_Texture2D:         return "Texture2D";
             case HLSLBaseType_Texture2DArray:    return "Texture2DArray";
             case HLSLBaseType_Texture3D:         return "Texture3D";
             case HLSLBaseType_TextureCube:       return "TextureCube";
             case HLSLBaseType_TextureCubeArray:  return "TextureCubeArray";
             case HLSLBaseType_Texture2DMS:       return "Texture2DMS";
+               
+            case HLSLBaseType_Depth2D:           return "Depth2D";
+            case HLSLBaseType_Depth2DArray:      return "Depth2DArray";
+            case HLSLBaseType_DepthCube:         return "DepthCube";
+           
+            case HLSLBaseType_RWTexture2D:       return "RWTexture2D";
+            
             default: break;
         }
     }
@@ -218,11 +222,7 @@ bool HLSLGenerator::Generate(HLSLTree* tree, HLSLTarget target, const char* entr
     // This strips any unused inputs to the entry point function
     HideUnusedArguments(entryFunction);
     
-    // Note sure if/where to add these calls.  Just wanted to point
-    // out that nothing is calling them, but could be useful.
-    //EmulateAlphaTest(tree, entryName, 0.5f);
-    
-    // Alec commented out to see if COmments survive
+    // Is this needed
     FlattenExpressions(tree);
     
     m_writer.WriteLine(0, "#include \"ShaderHLSL.h\"");
@@ -380,7 +380,7 @@ void HLSLGenerator::OutputExpression(HLSLExpression* expression)
         case HLSLBaseType_Int:
             m_writer.Write("%d", literalExpression->iValue);
             break;
-        // TODO: missing uint, u/short, double
+        // TODO: missing uint, u/short, u/long double
                 
         case HLSLBaseType_Bool:
             m_writer.Write("%s", literalExpression->bValue ? "true" : "false");
@@ -665,8 +665,21 @@ void HLSLGenerator::OutputStatements(int indent, HLSLStatement* statement)
 
             if (!buffer->IsGlobalFields())
             {
+                // Constant/Structured/ByteAdddressBuffer
                 m_writer.BeginLine(indent, buffer->fileName, buffer->line);
                 
+                // Handle push constant for Vulkan.
+                // This is just a buffer to MSL.
+                // VK is limited to 1 buffer as a result.  Cannot contain half on AMD.
+                if (buffer->bufferType == HLSLBufferType_ConstantBuffer)
+                {
+                    if (strstr(buffer->name, "Push") != nullptr ||
+                        strstr(buffer->name, "push") != nullptr)
+                    {
+                        m_writer.Write("[[vk::push_constant]] ");
+                    }
+                }
+                
                 // write out template
                 m_writer.Write("%s<%s> %s",
                                BufferTypeToName(buffer->bufferType),
@@ -684,6 +697,7 @@ void HLSLGenerator::OutputStatements(int indent, HLSLStatement* statement)
             }
             else
             {
+                // c/tbuffer
                 m_writer.BeginLine(indent, buffer->fileName, buffer->line);
                 
                 // not templated
@@ -847,6 +861,29 @@ void HLSLGenerator::OutputStatements(int indent, HLSLStatement* statement)
     }
 }
 
+// Use for templated buffers/textures
+const char* HLSLGenerator::GetFormatName(HLSLBaseType bufferOrTextureType, HLSLBaseType formatType)
+{
+    // TODO: have a way to disable use of half (like on MSLGenerator)
+    bool isHalf = IsHalf(formatType);
+    
+    // Can't use half4 textures with spriv
+    // Can tell Vulkan was written by/for desktop IHVs.
+    // https://github.com/microsoft/DirectXShaderCompiler/issues/2711
+    bool isSpirvTarget = true; // TODO: tie to CLI option
+    if (isSpirvTarget)
+        isHalf = false;
+    
+    const char* formatName = isHalf ? "half4" : "float4";
+    
+    // Unlike Metal, that just uses half/float, the type
+    // seems to be dimension specific on HLSL.  So may need
+    // caller to specify more types.
+    
+    return formatName;
+}
+
+
 void HLSLGenerator::OutputDeclaration(HLSLDeclaration* declaration)
 {
     if (IsSamplerType(declaration->type))
@@ -858,25 +895,26 @@ void HLSLGenerator::OutputDeclaration(HLSLDeclaration* declaration)
         }
         
         // sampler
-        const char* samplerType = nullptr;
-        if (declaration->type.baseType == HLSLBaseType_SamplerState)
-        {
-            samplerType = "SamplerState";
-        }
-        else if (declaration->type.baseType == HLSLBaseType_SamplerComparisonState)
-        {
-            samplerType = "SamplerComparisonState";
-        }
-        
-        if (samplerType)
+        const char* samplerTypeName = GetTypeName(declaration->type);
+    
+//        if (declaration->type.baseType == HLSLBaseType_SamplerState)
+//        {
+//            samplerType = "SamplerState";
+//        }
+//        else if (declaration->type.baseType == HLSLBaseType_SamplerComparisonState)
+//        {
+//            samplerType = "SamplerComparisonState";
+//        }
+//
+        if (samplerTypeName)
         {
             if (reg != -1)
             {
-                m_writer.Write("%s %s : register(s%d)", samplerType, declaration->name, reg);
+                m_writer.Write("%s %s : register(s%d)", samplerTypeName, declaration->name, reg);
             }
             else
             {
-                m_writer.Write("%s %s", samplerType, declaration->name);
+                m_writer.Write("%s %s", samplerTypeName, declaration->name);
             }
         }
         return;
@@ -889,36 +927,20 @@ void HLSLGenerator::OutputDeclaration(HLSLDeclaration* declaration)
             sscanf(declaration->registerName, "t%d", &reg);
         }
 
-        // @@ Handle generic sampler type.
-
-        // TODO: have a way to disable use of half (like on MSLGenerator)
-        bool isHalf = declaration->type.textureType == HLSLBaseType_Half;
-        
-        // Can't use half4 textures with spriv
-        // Can tell Vulkan was written by/for desktop IHVs.
-        // https://github.com/microsoft/DirectXShaderCompiler/issues/2711
-        bool isSpirvTarget = true; // TODO: tie to CLI option
-        if (isSpirvTarget)
-            isHalf = false;
-        
-        const char* formatType = isHalf ? "half4" : "float4";
-        
-        // Unlike Metal, that just uses half/float, the type
-        // seems to be dimension specific on HLSL.  So may need
-        // caller to specify more types.
-        
+        const char* formatTypeName = GetFormatName(declaration->type.baseType, declaration->type.formatType);
+       
         // texture carts the dimension and format
-        const char* textureType = GetTypeName(declaration->type);
+        const char* textureTypeName = GetTypeName(declaration->type);
     
-        if (textureType != NULL)
+        if (textureTypeName != NULL)
         {
             if (reg != -1)
             {
-                m_writer.Write("%s<%s> %s : register(t%d)", textureType, formatType, declaration->name, reg);
+                m_writer.Write("%s<%s> %s : register(t%d)", textureTypeName, formatTypeName, declaration->name, reg);
             }
             else
             {
-                m_writer.Write("%s<%s> %s", textureType, formatType, declaration->name);
+                m_writer.Write("%s<%s> %s", textureTypeName, formatTypeName, declaration->name);
             }
         }
         return;
diff --git a/hlslparser/src/HLSLGenerator.h b/hlslparser/src/HLSLGenerator.h
index a06d312c..f048e9c8 100644
--- a/hlslparser/src/HLSLGenerator.h
+++ b/hlslparser/src/HLSLGenerator.h
@@ -52,6 +52,8 @@ class HLSLGenerator
 
     void Error(const char* format, ...) M4_PRINTF_ATTR(2, 3);
     
+    const char* GetFormatName(HLSLBaseType bufferOrTextureType, HLSLBaseType formatType);
+    
 private:
 
     CodeWriter      m_writer;
diff --git a/hlslparser/src/HLSLParser.cpp b/hlslparser/src/HLSLParser.cpp
index c9775bcc..f9966da5 100644
--- a/hlslparser/src/HLSLParser.cpp
+++ b/hlslparser/src/HLSLParser.cpp
@@ -241,6 +241,10 @@ HLSLBaseType NumericToBaseType(NumericType numericType)
         //case NumericType_Uint8: baseType = HLSLBaseType_Uint8; break;
         //case NumericType_Int8: baseType = HLSLBaseType_Int8; break;
         
+        // TODO:
+        //case NumericType_Uint64: baseType = HLSLBaseType_Uint8; break;
+        //case NumericType_Int64: baseType = HLSLBaseType_Int8; break;
+            
         default:
             break;
     }
@@ -359,7 +363,7 @@ struct Intrinsic
 Intrinsic TextureIntrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType textureType, HLSLBaseType textureHalfOrFloat, HLSLBaseType uvType)
 {
     Intrinsic i(name, returnType, textureType, HLSLBaseType_SamplerState, uvType);
-    i.argument[0].type.textureType = textureHalfOrFloat;
+    i.argument[0].type.formatType = textureHalfOrFloat;
     return i;
 }
 
@@ -367,7 +371,7 @@ Intrinsic DepthIntrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType
 {
     // Need to be able to pass SamplerComparisonState too
     Intrinsic i(name, returnType, textureType, HLSLBaseType_SamplerComparisonState, uvType);
-    i.argument[0].type.textureType = textureHalfOrFloat;
+    i.argument[0].type.formatType = textureHalfOrFloat;
     return i;
 }
 
@@ -1195,7 +1199,7 @@ static int GetTypeCastRank(HLSLTree * tree, const HLSLType& srcType, const HLSLT
     {
         if (IsTextureType(srcType.baseType))
         {
-            return srcType.textureType == dstType.textureType ? 0 : -1;
+            return srcType.formatType == dstType.formatType ? 0 : -1;
         }
         
         return 0;
@@ -3828,6 +3832,10 @@ bool HLSLParser::AcceptType(bool allowVoid, HLSLType& type/*, bool acceptFlags*/
         type.baseType = HLSLBaseType_DepthCube;
         break;
             
+    case HLSLToken_RWTexture2D:
+        type.baseType = HLSLBaseType_RWTexture2D;
+        break;
+            
     case HLSLToken_SamplerState:
         type.baseType = HLSLBaseType_SamplerState;
         break;
@@ -3860,14 +3868,14 @@ bool HLSLParser::AcceptType(bool allowVoid, HLSLType& type/*, bool acceptFlags*/
             {
                 int token = m_tokenizer.GetToken();
                 
-                // TODO: need more types
+                // TODO: need more format types
                 if (token >= HLSLToken_Float && token <= HLSLToken_Float4)
                 {
-                    type.textureType = HLSLBaseType_Float;
+                    type.formatType = (HLSLBaseType)(HLSLBaseType_Float + (token - HLSLToken_Float));
                 }
                 else if (token >= HLSLToken_Half && token <= HLSLToken_Half4)
                 {
-                    type.textureType = HLSLBaseType_Half;
+                    type.formatType =  (HLSLBaseType)(HLSLBaseType_Half + (token - HLSLToken_Half));
                 }
                 else
                 {
diff --git a/hlslparser/src/HLSLTokenizer.cpp b/hlslparser/src/HLSLTokenizer.cpp
index 5643763c..71a0f294 100644
--- a/hlslparser/src/HLSLTokenizer.cpp
+++ b/hlslparser/src/HLSLTokenizer.cpp
@@ -73,6 +73,8 @@ static const char* _reservedWords[] =
         "Depth2DArray", // cascades
         "DepthCube",
         
+        "RWTexture2D",
+        
         "SamplerState",
         "SamplerComparisonState",
 
diff --git a/hlslparser/src/HLSLTree.h b/hlslparser/src/HLSLTree.h
index 79d7399a..648ea7b8 100644
--- a/hlslparser/src/HLSLTree.h
+++ b/hlslparser/src/HLSLTree.h
@@ -97,6 +97,7 @@ enum HLSLBaseType
     HLSLBaseType_Bool2,
 	HLSLBaseType_Bool3,
 	HLSLBaseType_Bool4,
+    
     HLSLBaseType_Int,
     HLSLBaseType_Int2,
     HLSLBaseType_Int3,
@@ -139,7 +140,7 @@ enum HLSLBaseType
     
     HLSLBaseType_UserDefined,       // struct
     HLSLBaseType_Expression,        // type argument for defined() sizeof() and typeof().
-    HLSLBaseType_Auto,
+    //HLSLBaseType_Auto,
     HLSLBaseType_Comment,           // single line comments optionally transferred to output
     
     // There are subtypes below

From 8aae5c6c3e0f4f0dea40574ceadce5097ff12b12 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 16 Mar 2023 00:52:34 -0700
Subject: [PATCH 479/901] kram - update hlslparser

Made table of registered types a little easier to expand.
---
 hlslparser/src/Engine.cpp        |   2 +-
 hlslparser/src/HLSLGenerator.cpp |   8 +-
 hlslparser/src/HLSLParser.cpp    | 367 +++++++++++++++++++++----------
 hlslparser/src/HLSLParser.h      |  51 +++--
 hlslparser/src/HLSLTokenizer.cpp |  26 ++-
 hlslparser/src/HLSLTokenizer.h   |  27 ++-
 hlslparser/src/HLSLTree.h        |  27 ++-
 hlslparser/src/MSLGenerator.cpp  |   6 +-
 8 files changed, 351 insertions(+), 163 deletions(-)

diff --git a/hlslparser/src/Engine.cpp b/hlslparser/src/Engine.cpp
index 748ec82b..fa680884 100755
--- a/hlslparser/src/Engine.cpp
+++ b/hlslparser/src/Engine.cpp
@@ -170,7 +170,7 @@ StringPool::~StringPool() {
     
     // TODO: fix
     // delete the strings
-    for (auto& it : *impl) {
+    for (auto it : *impl) {
         const char* text = it;
         free((char*)text);
     }
diff --git a/hlslparser/src/HLSLGenerator.cpp b/hlslparser/src/HLSLGenerator.cpp
index 146a9001..c7e5d6eb 100644
--- a/hlslparser/src/HLSLGenerator.cpp
+++ b/hlslparser/src/HLSLGenerator.cpp
@@ -529,11 +529,11 @@ void HLSLGenerator::OutputArguments(HLSLArgument* argument)
         // Have to inject vulkan
         if (semantic)
         {
-            if (strcmp(semantic, "PSIZE") == 0)
+            if (String_Equal(semantic, "PSIZE"))
                 m_writer.Write("%s ", "[[vk::builtin(\"PointSize\")]]");
-            else if (strcmp(semantic, "BaseVertex") == 0)
+            else if (String_Equal(semantic, "BaseVertex"))
                 m_writer.Write("%s ", "[[vk::builtin(\"BaseVertex\")]]");
-            else if (strcmp(semantic, "BaseInstance") == 0)
+            else if (String_Equal(semantic, "BaseInstance"))
                 m_writer.Write("%s ", "[[vk::builtin(\"BaseInstance\")]]");
         }
         
@@ -741,7 +741,7 @@ void HLSLGenerator::OutputStatements(int indent, HLSLStatement* statement)
             const char* returnTypeName = GetTypeName(function->returnType);
 
             // TODO: hack, since don't actually parse bracket construct yet
-            bool isEntryPoint = strcmp(functionName, m_entryName) == 0;
+            bool isEntryPoint = String_Equal(functionName, m_entryName);
             if (isEntryPoint && m_target == HLSLTarget_ComputeShader)
                 m_writer.WriteLine(indent, "[numthreads(1,1,1)]");
             
diff --git a/hlslparser/src/HLSLParser.cpp b/hlslparser/src/HLSLParser.cpp
index f9966da5..6df37a8a 100644
--- a/hlslparser/src/HLSLParser.cpp
+++ b/hlslparser/src/HLSLParser.cpp
@@ -67,20 +67,25 @@ enum DimensionType
 // Can use this to break apart type to useful constructs
 struct BaseTypeDescription
 {
-    const char*     typeName;
-    CoreType        coreType;
-    DimensionType   dimensionType;
-    NumericType     numericType;
-    int             numComponents;
+    const char*     typeName = "";
+    const char*     typeNameMetal = "";
+    
+    HLSLBaseType    baseType = HLSLBaseType_Unknown;
+    CoreType        coreType = CoreType_None;
+    DimensionType   dimensionType = DimensionType_None;
+    NumericType     numericType = NumericType_NaN;
     
     // TODO: is this useful ?
-    int             numDimensions; // scalar = 0, vector = 1, matrix = 2
+   // int             numDimensions; // scalar = 0, vector = 1, matrix = 2
+    uint8_t             numDimensions = 0;
+    uint8_t             numComponents = 0;
+    uint8_t             height = 0;
     
-    int             height;
-    int             binaryOpRank;
+    int8_t              binaryOpRank = -1; // or was this supposed to be max (-1 in uint8_t)
 };
 
-extern const BaseTypeDescription baseTypeDescriptions[HLSLBaseType_Count];
+// really const
+extern BaseTypeDescription baseTypeDescriptions[HLSLBaseType_Count];
 
 bool IsSamplerType(HLSLBaseType baseType)
 {
@@ -950,99 +955,213 @@ const int _binaryOpPriority[] =
         5, 3, 4, // &, |, ^
     };
 
-const BaseTypeDescription baseTypeDescriptions[HLSLBaseType_Count] = 
+// Note: these strings need to live until end of the app
+StringPool gPool(NULL);
+
+
+BaseTypeDescription baseTypeDescriptions[HLSLBaseType_Count];
+
+void RegisterMatrix(HLSLBaseType type, uint32_t typeOffset, NumericType numericType,  int binaryOpRank, const char* typeName, uint32_t dim1, uint32_t dim2)
+{
+    char buf[32];
+    snprintf(buf, sizeof(buf), "%s%dx%d", typeName, dim1, dim2);
+    const char* name = gPool.AddString(buf);
+    
+    HLSLBaseType baseType = (HLSLBaseType)(type + typeOffset);
+    
+    BaseTypeDescription& desc = baseTypeDescriptions[baseType];
+    desc.typeName = name;
+    
+    desc.baseType = baseType;
+    desc.coreType = CoreType_Matrix;
+    desc.dimensionType = DimensionType(DimensionType_Matrix2x2 + (dim2 - 2));
+    desc.numericType = numericType;
+    
+    desc.numDimensions = 2;
+    desc.numComponents = dim1;
+    desc.height = dim2;
+    desc.binaryOpRank = binaryOpRank;
+}
+
+void RegisterVector(HLSLBaseType type, uint32_t typeOffset, NumericType numericType,  int binaryOpRank,  const char* typeName, uint32_t dim)
+{
+    char buf[32];
+    snprintf(buf, sizeof(buf), "%s%d", typeName, dim);
+    const char* name = gPool.AddString(buf);
+    
+    HLSLBaseType baseType = (HLSLBaseType)(type + typeOffset);
+    
+    BaseTypeDescription& desc = baseTypeDescriptions[type + typeOffset];
+    desc.typeName = name;
+    desc.typeNameMetal = name;
+    
+    // 4 types
+    desc.baseType = baseType;
+    desc.coreType = CoreType_Vector;
+    desc.dimensionType = DimensionType(DimensionType_Vector2 + (dim - 2));
+    desc.numericType = numericType;
+    
+    desc.numDimensions = 1;
+    desc.numComponents = dim;
+    desc.height = 1;
+    desc.binaryOpRank = binaryOpRank;
+}
+
+void RegisterScalar(HLSLBaseType type, uint32_t typeOffset, NumericType numericType,  int binaryOpRank, const char* typeName)
+{
+    const char* name = gPool.AddString(typeName);
+    
+    HLSLBaseType baseType = (HLSLBaseType)(type + typeOffset);
+    
+    BaseTypeDescription& desc = baseTypeDescriptions[baseType];
+    desc.typeName = name;
+    desc.typeNameMetal = name;
+    
+    // 4 types
+    desc.baseType = baseType;
+    desc.coreType = CoreType_Scalar;
+    desc.dimensionType = DimensionType_Scalar;
+    desc.numericType = numericType;
+    
+    desc.numDimensions = 0;
+    desc.numComponents = 1;
+    desc.height = 1;
+    desc.binaryOpRank = binaryOpRank;
+}
+
+void RegisterTexture(HLSLBaseType baseType, const char* typeName, const char* typeNameMetal)
+{
+    BaseTypeDescription& desc = baseTypeDescriptions[baseType];
+    desc.baseType = baseType;
+    desc.typeName = typeName;
+    desc.typeNameMetal = typeNameMetal;
+    
+    desc.coreType = CoreType_Texture;
+}
+
+void RegisterSampler(HLSLBaseType baseType, const char* typeName, const char* typeNameMetal)
+{
+    BaseTypeDescription& desc = baseTypeDescriptions[baseType];
+    desc.baseType = baseType;
+    desc.typeName = typeName;
+    desc.typeNameMetal = typeNameMetal;
+    
+    desc.coreType = CoreType_Sampler;
+}
+
+void RegisterType(HLSLBaseType baseType, CoreType coreType, const char* typeName)
+{
+    BaseTypeDescription& desc = baseTypeDescriptions[baseType];
+    desc.baseType = baseType;
+    desc.typeName = typeName;
+    desc.typeNameMetal = typeName;
+    
+    desc.coreType = coreType;
+}
+
+
+bool initBaseTypeDescriptions()
+{
     {
-        { "unknown",       CoreType_None, DimensionType_None, NumericType_NaN,        0, 0, 0, -1 },      // HLSLBaseType_Unknown
-        { "void",               CoreType_Void, DimensionType_None, NumericType_NaN,        0, 0, 0, -1 },      // HLSLBaseType_Void
-        
-        { "float",              CoreType_Scalar, DimensionType_Scalar, NumericType_Float,       1, 0, 1,  0 },      // HLSLBaseType_Float
-        { "float2",             CoreType_Vector, DimensionType_Vector2, NumericType_Float,      2, 1, 1,  0 },      // HLSLBaseType_Float2
-        { "float3",             CoreType_Vector, DimensionType_Vector3, NumericType_Float,      3, 1, 1,  0 },      // HLSLBaseType_Float3
-        { "float4",             CoreType_Vector, DimensionType_Vector4, NumericType_Float,      4, 1, 1,  0 },      // HLSLBaseType_Float4
-        
-		{ "float2x2",			CoreType_Matrix, DimensionType_Matrix2x2, NumericType_Float,     2, 2, 2,  0 },		// HLSLBaseType_Float2x2
-        { "float3x3",           CoreType_Matrix, DimensionType_Matrix3x3, NumericType_Float,     3, 2, 3,  0 },      // HLSLBaseType_Float3x3
-        { "float4x4",           CoreType_Matrix, DimensionType_Matrix4x4, NumericType_Float,     4, 2, 4,  0 },      // HLSLBaseType_Float4x4
-        //{ "float4x3",           CoreType_Matrix, DimensionType_Matrix4x3, NumericType_Float,     4, 2, 3,  0 },      // HLSLBaseType_Float4x3
-        //{ "float4x2",           CoreType_Matrix, DimensionType_Matrix4x2, NumericType_Float,     4, 2, 2,  0 },      // HLSLBaseType_Float4x2
-
-        { "half",               CoreType_Scalar, DimensionType_Scalar, NumericType_Half,        1, 0, 1,  1 },      // HLSLBaseType_Half
-        { "half2",              CoreType_Vector, DimensionType_Vector2, NumericType_Half,       2, 1, 1,  1 },      // HLSLBaseType_Half2
-        { "half3",              CoreType_Vector, DimensionType_Vector3, NumericType_Half,       3, 1, 1,  1 },      // HLSLBaseType_Half3
-        { "half4",              CoreType_Vector, DimensionType_Vector4, NumericType_Half,       4, 1, 1,  1 },      // HLSLBaseType_Half4
-        
-		{ "half2x2",            CoreType_Matrix, DimensionType_Matrix2x2, NumericType_Half,		2, 2, 2,  0 },		// HLSLBaseType_Half2x2
-        { "half3x3",            CoreType_Matrix, DimensionType_Matrix3x3, NumericType_Half,     3, 2, 3,  1 },      // HLSLBaseType_Half3x3
-        { "half4x4",            CoreType_Matrix, DimensionType_Matrix4x4, NumericType_Half,     4, 2, 4,  1 },      // HLSLBaseType_Half4x4
-        //{ "half4x3",            CoreType_Matrix, DimensionType_Matrix4x3, NumericType_Half,     4, 2, 3,  1 },      // HLSLBaseType_Half4x3
-        //{ "half4x2",            CoreType_Matrix, DimensionType_Matrix4x2, NumericType_Half,     4, 2, 2,  1 },      // HLSLBaseType_Half4x2
-
-        // TODO: add double
-//        { "double",              CoreType_Scalar, DimensionType_Scalar, NumericType_Double,       1, 0, 1,  0 },      // HLSLBaseType_Double
-//        { "double2",             CoreType_Vector, DimensionType_Vector2, NumericType_Double,      2, 1, 1,  0 },      // HLSLBaseType_Double2
-//        { "double3",             CoreType_Vector, DimensionType_Vector3, NumericType_Double,      3, 1, 1,  0 },      // HLSLBaseType_Double3
-//        { "double4",             CoreType_Vector, DimensionType_Vector4, NumericType_Double,      4, 1, 1,  0 },      // HLSLBaseType_Double4
-//
-//        { "double2x2",            CoreType_Matrix, DimensionType_Matrix2x2, NumericType_Double,     2, 2, 2,  0 },        // HLSLBaseType_Doublet2x2
-//        { "double3x3",           CoreType_Matrix, DimensionType_Matrix3x3, NumericType_Double,     3, 2, 3,  0 },      // HLSLBaseType_Double3x3
-//        { "double4x4",           CoreType_Matrix, DimensionType_Matrix4x4, NumericType_Double,     4, 2, 4,  0 },      // HLSLBaseType_Double4x4
-        
-        { "bool",               CoreType_Scalar, DimensionType_Scalar, NumericType_Bool,       1, 0, 1,  4 },      // HLSLBaseType_Bool
-		{ "bool2",				CoreType_Vector, DimensionType_Vector2, NumericType_Bool,	   2, 1, 1,  4 },      // HLSLBaseType_Bool2
-		{ "bool3",				CoreType_Vector, DimensionType_Vector3, NumericType_Bool,	   3, 1, 1,  4 },      // HLSLBaseType_Bool3
-		{ "bool4",				CoreType_Vector, DimensionType_Vector4, NumericType_Bool,	   4, 1, 1,  4 },      // HLSLBaseType_Bool4
-
-        { "int",                CoreType_Scalar, DimensionType_Scalar, NumericType_Int,        1, 0, 1,  3 },      // HLSLBaseType_Int
-        { "int2",               CoreType_Vector, DimensionType_Vector2, NumericType_Int,       2, 1, 1,  3 },      // HLSLBaseType_Int2
-        { "int3",               CoreType_Vector, DimensionType_Vector3, NumericType_Int,       3, 1, 1,  3 },      // HLSLBaseType_Int3
-        { "int4",               CoreType_Vector, DimensionType_Vector4, NumericType_Int,       4, 1, 1,  3 },      // HLSLBaseType_Int4
-
-        { "uint",               CoreType_Scalar, DimensionType_Scalar, NumericType_Uint,       1, 0, 1,  2 },      // HLSLBaseType_Uint
-        { "uint2",              CoreType_Vector, DimensionType_Vector2, NumericType_Uint,      2, 1, 1,  2 },      // HLSLBaseType_Uint2
-        { "uint3",              CoreType_Vector, DimensionType_Vector3, NumericType_Uint,      3, 1, 1,  2 },      // HLSLBaseType_Uint3
-        { "uint4",              CoreType_Vector, DimensionType_Vector4, NumericType_Uint,      4, 1, 1,  2 },      // HLSLBaseType_Uint4
-
-        { "short",              CoreType_Scalar, DimensionType_Scalar, NumericType_Short,      1, 0, 1,  3 },      // HLSLBaseType_Short
-        { "short2",             CoreType_Vector, DimensionType_Vector2, NumericType_Short,     2, 1, 1,  3 },      // HLSLBaseType_Short2
-        { "short3",             CoreType_Vector, DimensionType_Vector3, NumericType_Short,     3, 1, 1,  3 },      // HLSLBaseType_Short3
-        { "short4",             CoreType_Vector, DimensionType_Vector4, NumericType_Short,     4, 1, 1,  3 },      // HLSLBaseType_Short4
-
-        { "ushort",             CoreType_Scalar, DimensionType_Scalar, NumericType_Ushort,     1, 0, 1,  2 },      // HLSLBaseType_Ushort
-        { "ushort2",            CoreType_Vector, DimensionType_Vector2, NumericType_Ushort,    2, 1, 1,  2 },      // HLSLBaseType_Ushort2
-        { "ushort3",            CoreType_Vector, DimensionType_Vector3, NumericType_Ushort,    3, 1, 1,  2 },      // HLSLBaseType_Ushort3
-        { "ushort4",            CoreType_Vector, DimensionType_Vector4, NumericType_Ushort,    4, 1, 1,  2 },      // HLSLBaseType_Ushort4
-
-        // TODO: add u/char, but HLSL2021 doesn't have support
-        
-       
-        //{ "texture",            CoreType_Texture, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Texture
-        
-        { "Texture2D",          CoreType_Texture, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Texture2D
-        { "Texture3D",          CoreType_Texture, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Texture3D
-        { "TextureCube",        CoreType_Texture, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_TextureCube
-        { "Texture2DArray",     CoreType_Texture, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Texture2DArray
-        { "TextureCubeArray",     CoreType_Texture, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_TextureCubeArray
-        { "Texture2DMS",        CoreType_Texture, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Texture2DMS
+        const uint32_t kNumTypes = 3;
+        const char* typeNames[kNumTypes] = { "float", "half", "double" };
+        const HLSLBaseType baseTypes[kNumTypes] = { HLSLBaseType_Float, HLSLBaseType_Half, HLSLBaseType_Double };
+        const NumericType numericTypes[kNumTypes] = { NumericType_Float, NumericType_Half, NumericType_Double };
+        const int binaryOpRanks[kNumTypes] = { 0, 1, 2 };
         
-        { "Depth2D",            CoreType_Texture, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Depth2D
-        { "Depth2DArray",       CoreType_Texture, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Depth2DArray
-        { "DepthCube",          CoreType_Texture, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_DepthCube
-        
-        { "SamplerState",            CoreType_Sampler, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Sampler
-        { "SamplerComparisonState",  CoreType_Sampler, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_SamplerComparisonState
-       
-        { "struct",             CoreType_Struct, DimensionType_None, NumericType_NaN,         1, 0, 0, -1 },      // HLSLBaseType_UserDefined
-        
-        // These aren't real HLSL types
-        { "expression",         CoreType_Expression, DimensionType_None, NumericType_NaN,     1, 0, 0, -1 },       // HLSLBaseType_Expression
-        { "comment",            CoreType_Comment, DimensionType_None, NumericType_NaN,         1, 0, 0, -1 },       // HLSLBaseType_Comment
+        for (uint32_t i = 0; i < kNumTypes; ++i)
+        {
+            const char* typeName = typeNames[i];
+            HLSLBaseType baseType = baseTypes[i];
+            NumericType numericType = numericTypes[i];
+            int binaryOpRank = binaryOpRanks[i];
+            
+            RegisterScalar(baseType, 0, numericType, binaryOpRank, typeName);
+            RegisterVector(baseType, 1, numericType, binaryOpRank, typeName, 2);
+            RegisterVector(baseType, 2, numericType, binaryOpRank, typeName, 3);
+            RegisterVector(baseType, 3, numericType, binaryOpRank, typeName, 4);
+            
+            RegisterMatrix(baseType, 4, numericType, binaryOpRank, typeName, 2, 2);
+            RegisterMatrix(baseType, 5, numericType, binaryOpRank, typeName, 3, 3);
+            RegisterMatrix(baseType, 6, numericType, binaryOpRank, typeName, 4, 4);
+        }
+    }
+    
+    {
+        const uint32_t kNumTypes = 7;
+        const char* typeNames[kNumTypes] = {
+            "int", "uint",
+            "long", "ulong",
+            "short", "ushort",
+            "bool"
+        };
+        const HLSLBaseType baseTypes[kNumTypes] = {
+            HLSLBaseType_Int,  HLSLBaseType_Uint,
+            HLSLBaseType_Long, HLSLBaseType_Ulong,
+            HLSLBaseType_Short, HLSLBaseType_Ushort,
+            HLSLBaseType_Bool
+        };
+        const NumericType numericTypes[kNumTypes] = {
+            NumericType_Int,  NumericType_Uint,
+            NumericType_Long,  NumericType_Ulong,
+            NumericType_Short, NumericType_Ushort,
+            NumericType_Bool
+        };
+        const int binaryOpRanks[kNumTypes] = {
+            2, 1, // Note: int seems like it should be highest
+            3, 2,
+            4, 3,
+            4
+        };
         
-        { "buffer",        CoreType_Buffer, DimensionType_None, NumericType_NaN,        1, 0, 0, -1 },      // HLSLBaseType_Buffer
-    };
+        for (uint32_t i = 0; i < kNumTypes; ++i)
+        {
+            const char* typeName = typeNames[i];
+            HLSLBaseType baseType = baseTypes[i];
+            NumericType numericType = numericTypes[i];
+            int binaryOpRank = binaryOpRanks[i];
+            
+            RegisterScalar(baseType, 0, numericType, binaryOpRank, typeName);
+            RegisterVector(baseType, 1, numericType, binaryOpRank, typeName, 2);
+            RegisterVector(baseType, 2, numericType, binaryOpRank, typeName, 3);
+            RegisterVector(baseType, 3, numericType, binaryOpRank, typeName, 4);
+        }
+    }
+    
+    // TODO: add u/char, but HLSL2021 doesn't have support, but MSL does
+    
+    // TODO: would it be better to use "texture" base type (see "buffer")
+    // and then have a TextureSubType off that?
+    
+    // texutres
+    RegisterTexture(HLSLBaseType_Texture2D, "Texture2D", "texture2d");
+    RegisterTexture(HLSLBaseType_Texture2DArray, "Texture2DArray", "texture2d_array");
+    RegisterTexture(HLSLBaseType_Texture3D, "Texture3D", "texture3d");
+    RegisterTexture(HLSLBaseType_TextureCube, "TextureCube", "texturecube");
+    RegisterTexture(HLSLBaseType_TextureCubeArray, "TextureCubeArray", "texturecube_rray");
+    RegisterTexture(HLSLBaseType_Texture2DMS, "Texture2DMS", "texture2d_ms");
+    
+    RegisterTexture(HLSLBaseType_Depth2D, "Depth2D", "depth2d");
+    RegisterTexture(HLSLBaseType_Depth2DArray, "Depth2DArray", "depth2d_array");
+    RegisterTexture(HLSLBaseType_DepthCube, "DepthCube", "depthcube");
+    
+    RegisterTexture(HLSLBaseType_RWTexture2D, "RWTexture2D", "texture2d");
+    
+    // samplers
+    RegisterSampler(HLSLBaseType_SamplerState, "SamplerState", "sampler");
+    RegisterSampler(HLSLBaseType_SamplerComparisonState, "SamplerComparisonState", "sampler");
+    
+    RegisterType(HLSLBaseType_UserDefined, CoreType_Struct, "struct");
+    RegisterType(HLSLBaseType_Void, CoreType_Void, "void");
+    RegisterType(HLSLBaseType_Unknown, CoreType_None, "unknown");
+    RegisterType(HLSLBaseType_Expression, CoreType_Expression, "expression");
+    RegisterType(HLSLBaseType_Comment, CoreType_Comment, "comment");
+    RegisterType(HLSLBaseType_Buffer, CoreType_Buffer, "buffer");
+    
+    return true;
+}
 
-// IC: I'm not sure this table is right, but any errors should be caught by the backend compiler.
-// Also, this is operator dependent. The type resulting from (float4 * float4x4) is not the same as (float4 + float4x4).
-// We should probably distinguish between component-wise operator and only allow same dimensions
+static bool _initBaseTypeDescriptions = initBaseTypeDescriptions();
 
 
 HLSLBaseType ArithmeticOpResultType(HLSLBinaryOp binaryOp, HLSLBaseType t1, HLSLBaseType t2)
@@ -1109,7 +1228,7 @@ HLSLBaseType ArithmeticOpResultType(HLSLBinaryOp binaryOp, HLSLBaseType t1, HLSL
 // Priority of the ? : operator.
 const int _conditionalOpPriority = 1;
 
-static const char* GetTypeName(const HLSLType& type)
+const char* GetTypeName(const HLSLType& type)
 {
     if (type.baseType == HLSLBaseType_UserDefined)
     {
@@ -1121,6 +1240,18 @@ static const char* GetTypeName(const HLSLType& type)
     }
 }
 
+const char* GetTypeNameMetal(const HLSLType& type)
+{
+    if (type.baseType == HLSLBaseType_UserDefined)
+    {
+        return type.typeName;
+    }
+    else
+    {
+        return baseTypeDescriptions[type.baseType].typeNameMetal;
+    }
+}
+
 static const char* GetBinaryOpName(HLSLBinaryOp binaryOp)
 {
     switch (binaryOp)
@@ -1192,7 +1323,7 @@ static int GetTypeCastRank(HLSLTree * tree, const HLSLType& srcType, const HLSLT
 
     if (srcType.baseType == HLSLBaseType_UserDefined && dstType.baseType == HLSLBaseType_UserDefined)
     {
-        return strcmp(srcType.typeName, dstType.typeName) == 0 ? 0 : -1;
+        return String_Equal(srcType.typeName, dstType.typeName) ? 0 : -1;
     }
 
     if (srcType.baseType == dstType.baseType)
@@ -2886,11 +3017,11 @@ bool HLSLParser::ParseTerminalExpression(HLSLExpression*& expression, bool& need
                 return false;
             }
             
-            if (strcmp(function->name, "mul") == 0)
-            {
-                int bp = 0;
-                bp = bp;
-            }
+//            if (String_Equal(function->name, "mul"))
+//            {
+//                int bp = 0;
+//                bp = bp;
+//            }
             functionCall->function = function;
             functionCall->expressionType = function->returnType;
             expression = functionCall;
@@ -3461,10 +3592,10 @@ bool HLSLParser::ParseAttributeList(HLSLAttribute*& firstAttribute)
 
         HLSLAttribute * attribute = m_tree->AddNode<HLSLAttribute>(fileName, line);
         
-        if (strcmp(identifier, "unroll") == 0) attribute->attributeType = HLSLAttributeType_Unroll;
-        else if (strcmp(identifier, "flatten") == 0) attribute->attributeType = HLSLAttributeType_Flatten;
-        else if (strcmp(identifier, "branch") == 0) attribute->attributeType = HLSLAttributeType_Branch;
-        else if (strcmp(identifier, "nofastmath") == 0) attribute->attributeType = HLSLAttributeType_NoFastMath;
+        if (String_Equal(identifier, "unroll")) attribute->attributeType = HLSLAttributeType_Unroll;
+        else if (String_Equal(identifier, "flatten")) attribute->attributeType = HLSLAttributeType_Flatten;
+        else if (String_Equal(identifier, "branch")) attribute->attributeType = HLSLAttributeType_Branch;
+        else if (String_Equal(identifier, "nofastmath")) attribute->attributeType = HLSLAttributeType_NoFastMath;
         
         // @@ parse arguments, () not required if attribute constructor has no arguments.
 
@@ -3871,11 +4002,17 @@ bool HLSLParser::AcceptType(bool allowVoid, HLSLType& type/*, bool acceptFlags*/
                 // TODO: need more format types
                 if (token >= HLSLToken_Float && token <= HLSLToken_Float4)
                 {
-                    type.formatType = (HLSLBaseType)(HLSLBaseType_Float + (token - HLSLToken_Float));
+                    // TODO: code only tests if texture formatType exactly matches
+                    // when looking for Intrinsics, need to fix that before changing
+                    // this.
+                    
+                    type.formatType = HLSLBaseType_Float;
+                    // (HLSLBaseType)(HLSLBaseType_Float + (token - HLSLToken_Float));
                 }
                 else if (token >= HLSLToken_Half && token <= HLSLToken_Half4)
                 {
-                    type.formatType =  (HLSLBaseType)(HLSLBaseType_Half + (token - HLSLToken_Half));
+                    type.formatType =  HLSLBaseType_Half;
+                    // (HLSLBaseType)(HLSLBaseType_Half + (token - HLSLToken_Half));
                 }
                 else
                 {
@@ -4147,10 +4284,18 @@ const HLSLFunction* HLSLParser::MatchFunctionCall(const HLSLFunctionCall* functi
     for (int i = 0; i < _numIntrinsics; ++i)
     {
         const HLSLFunction* function = &_intrinsic[i].function;
+        
+        // TODO: this is doing another O(n) search of intrinsic names
         if (String_Equal(function->name, name))
         {
             nameMatches = true;
 
+            if (String_Equal(name, "SampleH"))
+            {
+                int bp = 0;
+                bp = bp;
+            }
+            
             CompareFunctionsResult result = CompareFunctions( m_tree, functionCall, function, matchedFunction );
             if (result == Function1Better)
             {
diff --git a/hlslparser/src/HLSLParser.h b/hlslparser/src/HLSLParser.h
index ddfb4b56..40aa655c 100644
--- a/hlslparser/src/HLSLParser.h
+++ b/hlslparser/src/HLSLParser.h
@@ -146,14 +146,16 @@ enum NumericType
 {
     NumericType_Float,
     NumericType_Half,
+    NumericType_Double, // not in MSL
+    
     NumericType_Bool,
     NumericType_Int,
     NumericType_Uint,
     NumericType_Short,
     NumericType_Ushort,
+    NumericType_Ulong,
+    NumericType_Long,
     
-    // TODO: add double
-    // NumericType_Double,
     // TODO: HLSL doesn't have byte/ubyte, MSL does
     // NumericType_UByte,
     // NumericType_Byte,
@@ -166,29 +168,32 @@ enum NumericType
 bool IsHalf(HLSLBaseType type);
 bool IsFloat(HLSLBaseType type);
 
-extern bool IsSamplerType(HLSLBaseType baseType);
-extern bool IsMatrixType(HLSLBaseType baseType);
-extern bool IsVectorType(HLSLBaseType baseType);
-extern bool IsScalarType(HLSLBaseType baseType);
-extern bool IsTextureType(HLSLBaseType baseType);
-extern bool IsDepthTextureType(HLSLBaseType baseType);
-extern bool IsBufferType(HLSLBaseType baseType);
-extern bool IsNumericType(HLSLBaseType baseType);
-
-extern bool IsCoreTypeEqual(HLSLBaseType lhsType, HLSLBaseType rhsType);
-extern bool IsNumericTypeEqual(HLSLBaseType lhsType, HLSLBaseType rhsType);
-extern bool IsDimensionEqual(HLSLBaseType lhsType, HLSLBaseType rhsType);
-extern bool IsCrossDimensionEqual(HLSLBaseType lhsType, HLSLBaseType rhsType);
-
-extern bool IsSamplerType(const HLSLType& type);
-extern bool IsMatrixType(const HLSLType& type);
-extern bool IsVectorType(const HLSLType& type);
-extern bool IsScalarType(const HLSLType& type);
-extern bool IsTextureType(const HLSLType& type);
-
-extern HLSLBaseType PromoteType(HLSLBaseType toType, HLSLBaseType type);
+bool IsSamplerType(HLSLBaseType baseType);
+bool IsMatrixType(HLSLBaseType baseType);
+bool IsVectorType(HLSLBaseType baseType);
+bool IsScalarType(HLSLBaseType baseType);
+bool IsTextureType(HLSLBaseType baseType);
+bool IsDepthTextureType(HLSLBaseType baseType);
+bool IsBufferType(HLSLBaseType baseType);
+bool IsNumericType(HLSLBaseType baseType);
+
+bool IsCoreTypeEqual(HLSLBaseType lhsType, HLSLBaseType rhsType);
+bool IsNumericTypeEqual(HLSLBaseType lhsType, HLSLBaseType rhsType);
+bool IsDimensionEqual(HLSLBaseType lhsType, HLSLBaseType rhsType);
+bool IsCrossDimensionEqual(HLSLBaseType lhsType, HLSLBaseType rhsType);
+
+bool IsSamplerType(const HLSLType& type);
+bool IsMatrixType(const HLSLType& type);
+bool IsVectorType(const HLSLType& type);
+bool IsScalarType(const HLSLType& type);
+bool IsTextureType(const HLSLType& type);
+
+HLSLBaseType PromoteType(HLSLBaseType toType, HLSLBaseType type);
 HLSLBaseType HalfToFloatBaseType(HLSLBaseType type);
 
 const char* GetNumericTypeName(HLSLBaseType type);
 
+const char* GetTypeName(const HLSLType& type);
+const char* GetTypeNameMetal(const HLSLType& type);
+
 }
diff --git a/hlslparser/src/HLSLTokenizer.cpp b/hlslparser/src/HLSLTokenizer.cpp
index 71a0f294..b078ad5f 100644
--- a/hlslparser/src/HLSLTokenizer.cpp
+++ b/hlslparser/src/HLSLTokenizer.cpp
@@ -17,23 +17,25 @@ static const char* _reservedWords[] =
         "float2",
         "float3",
         "float4",
-        
 		"float2x2",
         "float3x3",
         "float4x4",
-        //"float4x3",
-        //"float4x2",
         
         "half",
         "half2",
         "half3",
         "half4",
-        
 		"half2x2",
         "half3x3",
         "half4x4",
-        //"half4x3",
-        //"half4x2",
+        
+        "double",
+        "double2",
+        "double3",
+        "double4",
+        "double2x2",
+        "double3x3",
+        "double4x4",
         
         "bool",
 		"bool2",
@@ -60,6 +62,16 @@ static const char* _reservedWords[] =
         "ushort3",
         "ushort4",
         
+        "long",
+        "long2",
+        "long3",
+        "long4",
+        
+        "ulong",
+        "ulong2",
+        "ulong3",
+        "ulong4",
+        
         // TODO: double, u/char
         
         "Texture2D",
@@ -318,7 +330,7 @@ void HLSLTokenizer::Next()
     const int numReservedWords = sizeof(_reservedWords) / sizeof(const char*);
     for (int i = 0; i < numReservedWords; ++i)
     {
-        if (strcmp(_reservedWords[i], m_identifier) == 0)
+        if (String_Equal(_reservedWords[i], m_identifier))
         {
             m_token = 256 + i;
             return;
diff --git a/hlslparser/src/HLSLTokenizer.h b/hlslparser/src/HLSLTokenizer.h
index ad2a8415..a8a52605 100644
--- a/hlslparser/src/HLSLTokenizer.h
+++ b/hlslparser/src/HLSLTokenizer.h
@@ -16,23 +16,25 @@ enum HLSLToken
     HLSLToken_Float2,
     HLSLToken_Float3,
     HLSLToken_Float4,
-    
 	HLSLToken_Float2x2,
     HLSLToken_Float3x3,
     HLSLToken_Float4x4,
-    //HLSLToken_Float4x3,
-   // HLSLToken_Float4x2,
     
     HLSLToken_Half,
     HLSLToken_Half2,
     HLSLToken_Half3,
     HLSLToken_Half4,
-    
 	HLSLToken_Half2x2,
     HLSLToken_Half3x3,
     HLSLToken_Half4x4,
-    //HLSLToken_Half4x3,
-    //HLSLToken_Half4x2,
+    
+    HLSLToken_Double,
+    HLSLToken_Double2,
+    HLSLToken_Double3,
+    HLSLToken_Double4,
+    HLSLToken_Double2x2,
+    HLSLToken_Double3x3,
+    HLSLToken_Double4x4,
     
     HLSLToken_Bool,
 	HLSLToken_Bool2,
@@ -59,6 +61,16 @@ enum HLSLToken
     HLSLToken_Ushort3,
     HLSLToken_Ushort4,
     
+    HLSLToken_Long,
+    HLSLToken_Long2,
+    HLSLToken_Long3,
+    HLSLToken_Long4,
+    
+    HLSLToken_Ulong,
+    HLSLToken_Ulong2,
+    HLSLToken_Ulong3,
+    HLSLToken_Ulong4,
+    
     // TODO: double, u/char
     HLSLToken_Texture2D,
     HLSLToken_Texture3D,
@@ -122,6 +134,9 @@ enum HLSLToken
     HLSLToken_Technique,
     HLSLToken_Pass,
 
+    //===================
+    // End of strings that have to match in _reservedWords in .cpp
+    
     // Multi-character symbols.
     HLSLToken_LessEqual,
     HLSLToken_GreaterEqual,
diff --git a/hlslparser/src/HLSLTree.h b/hlslparser/src/HLSLTree.h
index 648ea7b8..55f9e42f 100644
--- a/hlslparser/src/HLSLTree.h
+++ b/hlslparser/src/HLSLTree.h
@@ -76,10 +76,6 @@ enum HLSLBaseType
     HLSLBaseType_Float3x3,
     HLSLBaseType_Float4x4,
     
-    // TODO: remove
-    //HLSLBaseType_Float4x3, // TODO: missing Float3x4
-    //HLSLBaseType_Float4x2,
-    
     HLSLBaseType_Half,
     HLSLBaseType_Half2,
     HLSLBaseType_Half3,
@@ -88,9 +84,13 @@ enum HLSLBaseType
     HLSLBaseType_Half3x3,
     HLSLBaseType_Half4x4,
     
-    // TODO: remove
-    //HLSLBaseType_Half4x3, // TODO: missing Half3x4
-    //HLSLBaseType_Half4x2,
+    HLSLBaseType_Double,
+    HLSLBaseType_Double2,
+    HLSLBaseType_Double3,
+    HLSLBaseType_Double4,
+    HLSLBaseType_Double2x2,
+    HLSLBaseType_Double3x3,
+    HLSLBaseType_Double4x4,
     
     // integer
     HLSLBaseType_Bool,
@@ -102,6 +102,7 @@ enum HLSLBaseType
     HLSLBaseType_Int2,
     HLSLBaseType_Int3,
     HLSLBaseType_Int4,
+    
     HLSLBaseType_Uint,
     HLSLBaseType_Uint2,
     HLSLBaseType_Uint3,
@@ -111,12 +112,22 @@ enum HLSLBaseType
     HLSLBaseType_Short2,
     HLSLBaseType_Short3,
     HLSLBaseType_Short4,
-     
+    
     HLSLBaseType_Ushort,
     HLSLBaseType_Ushort2,
     HLSLBaseType_Ushort3,
     HLSLBaseType_Ushort4,
     
+    HLSLBaseType_Long,
+    HLSLBaseType_Long2,
+    HLSLBaseType_Long3,
+    HLSLBaseType_Long4,
+    
+    HLSLBaseType_Ulong,
+    HLSLBaseType_Ulong2,
+    HLSLBaseType_Ulong3,
+    HLSLBaseType_Ulong4,
+    
     // Seems like these should be subtype of HLSLTexture, but
     // many of the intrinsics require a specific type of texture.
     // MSL has many more types, included depth vs. regular textures.
diff --git a/hlslparser/src/MSLGenerator.cpp b/hlslparser/src/MSLGenerator.cpp
index a25309dd..cc6cf556 100644
--- a/hlslparser/src/MSLGenerator.cpp
+++ b/hlslparser/src/MSLGenerator.cpp
@@ -275,7 +275,7 @@ namespace M4
                             field->sv_semantic = TranslateInputSemantic(field->semantic);
 
                             // Force type to uint.
-                            if (field->sv_semantic && strcmp(field->sv_semantic, "sample_id") == 0) {
+                            if (field->sv_semantic && String_Equal(field->sv_semantic, "sample_id")) {
                                 field->type.baseType = HLSLBaseType_Uint;
                                 field->type.flags |= HLSLTypeFlag_NoPromote;
                             }
@@ -293,7 +293,7 @@ namespace M4
                     argument->sv_semantic = TranslateInputSemantic(argument->semantic);
 
                     // Force type to uint.
-                    if (argument->sv_semantic && strcmp(argument->sv_semantic, "sample_id") == 0) {
+                    if (argument->sv_semantic && String_Equal(argument->sv_semantic, "sample_id")) {
                         argument->type.baseType = HLSLBaseType_Uint;
                         argument->type.flags |= HLSLTypeFlag_NoPromote;
                     }
@@ -493,7 +493,7 @@ namespace M4
 
         // If function return value has a non-color output semantic, declare a temporary struct for the output.
         bool wrapReturnType = false;
-        if (entryFunction->sv_semantic != NULL && strcmp(entryFunction->sv_semantic, "color(0)") != 0)
+        if (entryFunction->sv_semantic != NULL && !String_Equal(entryFunction->sv_semantic, "color(0)"))
         {
             wrapReturnType = true;
 

From 4c0da9d98a5d8a25a06c74af89a4f809205e1e1d Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 16 Mar 2023 01:47:09 -0700
Subject: [PATCH 480/901] kram - hlslparser update

Just more typeName cleanup
---
 hlslparser/outshaders/ShaderHLSL.h | 42 ++++++++++-----
 hlslparser/shaders/Sample.hlsl     | 11 ----
 hlslparser/shaders/Skinning.hlsl   | 13 ++---
 hlslparser/src/HLSLGenerator.cpp   | 51 ++----------------
 hlslparser/src/HLSLParser.cpp      | 85 ++++++++++++++++++++----------
 hlslparser/src/HLSLParser.h        |  4 +-
 hlslparser/src/HLSLTokenizer.cpp   |  1 +
 hlslparser/src/HLSLTree.h          |  2 +-
 hlslparser/src/MSLGenerator.cpp    | 34 +++++++-----
 9 files changed, 121 insertions(+), 122 deletions(-)

diff --git a/hlslparser/outshaders/ShaderHLSL.h b/hlslparser/outshaders/ShaderHLSL.h
index 46a8fdf0..97a1e402 100644
--- a/hlslparser/outshaders/ShaderHLSL.h
+++ b/hlslparser/outshaders/ShaderHLSL.h
@@ -39,30 +39,44 @@
 
 // no using, so do typedef
 // this is ugly syntax
+//typedef int16_t  short;
 typedef int16_t2 short2;
 typedef int16_t3 short3;
 typedef int16_t4 short4;
 
+typedef uint16_t  ushort;
 typedef uint16_t2 ushort2;
 typedef uint16_t3 ushort3;
 typedef uint16_t4 ushort4;
 
-// TODO: double, u/char
-// TODO: add double, but won't work on mobile.
+//typedef int64_t  long;
+typedef int64_t2 long2;
+typedef int64_t3 long3;
+typedef int64_t4 long4;
+
+typedef uint64_t  ulong;
+typedef uint64_t2 ulong2;
+typedef uint64_t3 ulong3;
+typedef uint64_t4 ulong4;
+
+//typedef float64_t double;
+typedef float64_t2 double2;
+typedef float64_t3 double3;
+typedef float64_t4 double4;
+
+typedef float64_t2x2 double2x2;
+typedef float64_t3x3 double3x3;
+typedef float64_t4x4 double4x4;
+
+
+// Note: no u/char
+// Note: add double, but won't work on mobile (Android/MSL).
 //  also Intel removed fp64 GPU support.  Often runs 1/64th speed.
 //  But may be needed for ray-tracing large worlds.  Metal doesn't have double.
-//
-//typedef int64_t2 long2;
-//typedef int64_t3 long3;
-//typedef int64_t4 long4;
-//
-//typedef uint64_t2 ulong2;
-//typedef uint64_t3 ulong3;
-//typedef uint64_t4 ulong4;
-//
-//typedef float64_t2 double2;
-//typedef float64_t3 double3;
-//typedef float64_t4 double4;
+
+// TODO: add Atomics, more atomic u/long and float in SM 6.6
+//  otherwise it's most atomic_u/int that is portable.
+// Apple Metal 3 added atomic_float.
 
 // 6.6 is cutting edge, want to target 6.2 for now
 #define SM66 1
diff --git a/hlslparser/shaders/Sample.hlsl b/hlslparser/shaders/Sample.hlsl
index 53c1453c..a3356162 100644
--- a/hlslparser/shaders/Sample.hlsl
+++ b/hlslparser/shaders/Sample.hlsl
@@ -31,17 +31,6 @@ struct LightState
     float4x4 viewProj;
 };
 
-/*
-cbuffer SceneConstantBuffer : register(b0)
-{
-    float4x4 model;
-    float4x4 viewProj;
-    float4 ambientColor;
-    bool sampleShadowMap;
-    LightState lights[NUM_LIGHTS];
-};
-*/
-
 struct SceneConstantBuffer
 {
     float4x4 model;
diff --git a/hlslparser/shaders/Skinning.hlsl b/hlslparser/shaders/Skinning.hlsl
index 0ff5515d..6e86327b 100644
--- a/hlslparser/shaders/Skinning.hlsl
+++ b/hlslparser/shaders/Skinning.hlsl
@@ -8,30 +8,31 @@
 //
 // https://github.com/microsoft/DirectXShaderCompiler/blob/main/docs/SPIR-V.rst
 
-// setup specialization
-// HLSL: at beginning
+// setup variants
+// HLSL: specialization constants marked at beginning
 // [[vk::constant_id(0)]] const int   specConstInt  = 1;
 // [[vk::constant_id(1)]] const bool  specConstBool  = true;
 //
-// MSL: at end
+// MSL: function constants marked at end
 // constant bool a [[function_constant(0)]];
 // constant int  a [[function_constant(1)]]; // 0.. 64K-1
 
+// This is for tile shaders
 // subpass input, and SubpassLoad() calls
 // [[vk::input_attachment_index(i)]] SubpassInput input;
 // class SubpassInput<T> { T SubpassLoad(); };
 // class SubpassInputMS<T> { T SubpassLoad(int sampleIndex); };
 
-// push constants
+// push constants (DONE)
 // [[vk::push_constant]]
 
 // descriptors and arg buffers
 // [[vk::binding(X[, Y])]] and [[vk::counter_binding(X)]]
 
+// tagging the format of buffers/textures, since HLSL can't represent
 // [[vk::image_format("rgba8")]]
 // RWBuffer<float4> Buf;
-
-
+//
 // [[vk::image_format("rg16f")]]
 // RWTexture2D<float2> Tex;
 
diff --git a/hlslparser/src/HLSLGenerator.cpp b/hlslparser/src/HLSLGenerator.cpp
index c7e5d6eb..b9752ca3 100644
--- a/hlslparser/src/HLSLGenerator.cpp
+++ b/hlslparser/src/HLSLGenerator.cpp
@@ -19,54 +19,11 @@ namespace M4
 const char* HLSLGenerator::GetTypeName(const HLSLType& type)
 {
     HLSLBaseType baseType = type.baseType;
-    
-    if (baseType == HLSLBaseType_UserDefined)
-        return type.typeName;
-    
-    // TODO: these can all just use a table entry, have another slot for MSL
-    const char* name = GetNumericTypeName(baseType);
-    if (name)
-        return name;
-    
+
+    // DONE: these can all just use a table entry, have another slot for MSL
     // Functions can return void, especially with compute
-    if (baseType == HLSLBaseType_Void)
-        return "void";
-    
-    // TODO: pull names from table, they should be same
-    if (IsSamplerType(baseType))
-    {
-        switch (baseType)
-        {
-            // samplers
-            case HLSLBaseType_SamplerState:              return "SamplerState";
-                
-            // can only pair this with depth texture to match Metal
-            case HLSLBaseType_SamplerComparisonState:    return "SamplerComparisonState";
-            default: break;
-        }
-    }
-    else if (IsTextureType(baseType))
-    {
-        switch (baseType)
-        {
-            // depth textures just use Texture2D typedef
-            // TODO: add ms, others
-            case HLSLBaseType_Texture2D:         return "Texture2D";
-            case HLSLBaseType_Texture2DArray:    return "Texture2DArray";
-            case HLSLBaseType_Texture3D:         return "Texture3D";
-            case HLSLBaseType_TextureCube:       return "TextureCube";
-            case HLSLBaseType_TextureCubeArray:  return "TextureCubeArray";
-            case HLSLBaseType_Texture2DMS:       return "Texture2DMS";
-               
-            case HLSLBaseType_Depth2D:           return "Depth2D";
-            case HLSLBaseType_Depth2DArray:      return "Depth2DArray";
-            case HLSLBaseType_DepthCube:         return "DepthCube";
-           
-            case HLSLBaseType_RWTexture2D:       return "RWTexture2D";
-            
-            default: break;
-        }
-    }
+    if (IsTextureType(baseType) || IsSamplerType(baseType) || IsNumericType(baseType) || baseType == HLSLBaseType_Void || baseType == HLSLBaseType_UserDefined)
+        return GetTypeNameHLSL(type);
     
     Error("Unknown type");
     return NULL;
diff --git a/hlslparser/src/HLSLParser.cpp b/hlslparser/src/HLSLParser.cpp
index 6df37a8a..4a050d1e 100644
--- a/hlslparser/src/HLSLParser.cpp
+++ b/hlslparser/src/HLSLParser.cpp
@@ -154,6 +154,7 @@ bool IsNumericTypeEqual(HLSLBaseType lhsType, HLSLBaseType rhsType)
            baseTypeDescriptions[rhsType].numericType;
 }
 
+// TODO: with so many types, should just request the numeric type
 bool IsHalf(HLSLBaseType type)
 {
     return baseTypeDescriptions[type].numericType == NumericType_Half;
@@ -164,6 +165,14 @@ bool IsFloat(HLSLBaseType type)
     return baseTypeDescriptions[type].numericType == NumericType_Float;
 }
 
+bool IsDouble(HLSLBaseType type)
+{
+    return baseTypeDescriptions[type].numericType == NumericType_Double;
+}
+
+// TODO: IsUint, IsInt, ... or just get type.
+
+
 bool IsSamplerType(const HLSLType & type)
 {
     return IsSamplerType(type.baseType);
@@ -266,8 +275,6 @@ HLSLBaseType HalfToFloatBaseType(HLSLBaseType type)
         case HLSLBaseType_Half4: return HLSLBaseType_Float4;
         case HLSLBaseType_Half2x2: return HLSLBaseType_Float2x2;
         case HLSLBaseType_Half3x3: return HLSLBaseType_Float3x3;
-        // case HLSLBaseType_Half4x2: return HLSLBaseType_Float4x2;
-        // case HLSLBaseType_Half4x3: return HLSLBaseType_Float4x3;
         case HLSLBaseType_Half4x4: return HLSLBaseType_Float4x4;
             
         default:
@@ -278,6 +285,27 @@ HLSLBaseType HalfToFloatBaseType(HLSLBaseType type)
     return type;
 }
 
+HLSLBaseType DoubleToFloatBaseType(HLSLBaseType type)
+{
+    switch(type)
+    {
+        case HLSLBaseType_Double: return HLSLBaseType_Float;
+        case HLSLBaseType_Double2: return HLSLBaseType_Float2;
+        case HLSLBaseType_Double3: return HLSLBaseType_Float3;
+        case HLSLBaseType_Double4: return HLSLBaseType_Float4;
+        case HLSLBaseType_Double2x2: return HLSLBaseType_Float2x2;
+        case HLSLBaseType_Double3x3: return HLSLBaseType_Float3x3;
+        case HLSLBaseType_Double4x4: return HLSLBaseType_Float4x4;
+            
+        default:
+           // do nothing;
+            break;
+    }
+    
+    return type;
+}
+
+
 static HLSLBaseType ArithmeticOpResultType(HLSLBinaryOp binaryOp, HLSLBaseType t1, HLSLBaseType t2);
 
 const char* GetNumericTypeName(HLSLBaseType type)
@@ -971,6 +999,7 @@ void RegisterMatrix(HLSLBaseType type, uint32_t typeOffset, NumericType numericT
     
     BaseTypeDescription& desc = baseTypeDescriptions[baseType];
     desc.typeName = name;
+    desc.typeNameMetal = name;
     
     desc.baseType = baseType;
     desc.coreType = CoreType_Matrix;
@@ -1228,7 +1257,7 @@ HLSLBaseType ArithmeticOpResultType(HLSLBinaryOp binaryOp, HLSLBaseType t1, HLSL
 // Priority of the ? : operator.
 const int _conditionalOpPriority = 1;
 
-const char* GetTypeName(const HLSLType& type)
+const char* GetTypeNameHLSL(const HLSLType& type)
 {
     if (type.baseType == HLSLBaseType_UserDefined)
     {
@@ -1328,6 +1357,8 @@ static int GetTypeCastRank(HLSLTree * tree, const HLSLType& srcType, const HLSLT
 
     if (srcType.baseType == dstType.baseType)
     {
+        // This only works if textures are half or float, but not hwne
+        // there are more varied texture that can be cast.
         if (IsTextureType(srcType.baseType))
         {
             return srcType.formatType == dstType.formatType ? 0 : -1;
@@ -2411,8 +2442,8 @@ bool HLSLParser::CheckTypeCast(const HLSLType& srcType, const HLSLType& dstType)
 {
     if (GetTypeCastRank(m_tree, srcType, dstType) == -1)
     {
-        const char* srcTypeName = GetTypeName(srcType);
-        const char* dstTypeName = GetTypeName(dstType);
+        const char* srcTypeName = GetTypeNameHLSL(srcType);
+        const char* dstTypeName = GetTypeNameHLSL(dstType);
         m_tokenizer.Error("Cannot implicitly convert from '%s' to '%s'", srcTypeName, dstTypeName);
         return false;
     }
@@ -2444,8 +2475,8 @@ bool HLSLParser::ParseExpression(HLSLExpression*& expression)
 
         if (!CheckTypeCast(expression2->expressionType, expression->expressionType))
         {
-            const char* srcTypeName = GetTypeName(expression2->expressionType);
-            const char* dstTypeName = GetTypeName(expression->expressionType);
+            const char* srcTypeName = GetTypeNameHLSL(expression2->expressionType);
+            const char* dstTypeName = GetTypeNameHLSL(expression->expressionType);
             m_tokenizer.Error("Cannot implicitly convert from '%s' to '%s'", srcTypeName, dstTypeName);
             return false;
         }
@@ -2585,8 +2616,8 @@ bool HLSLParser::ParseBinaryExpression(int priority, HLSLExpression*& expression
             binaryExpression->expression2 = expression2;
             if (!GetBinaryOpResultType( binaryOp, expression->expressionType, expression2->expressionType, binaryExpression->expressionType ))
             {
-                const char* typeName1 = GetTypeName( binaryExpression->expression1->expressionType );
-                const char* typeName2 = GetTypeName( binaryExpression->expression2->expressionType );
+                const char* typeName1 = GetTypeNameHLSL( binaryExpression->expression1->expressionType );
+                const char* typeName2 = GetTypeNameHLSL( binaryExpression->expression2->expressionType );
                 m_tokenizer.Error("binary '%s' : no global operator found which takes types '%s' and '%s' (or there is no acceptable conversion)",
                     GetBinaryOpName(binaryOp), typeName1, typeName2);
 
@@ -2614,8 +2645,8 @@ bool HLSLParser::ParseBinaryExpression(int priority, HLSLExpression*& expression
             // Make sure both cases have compatible types.
             if (GetTypeCastRank(m_tree, expression1->expressionType, expression2->expressionType) == -1)
             {
-                const char* srcTypeName = GetTypeName(expression2->expressionType);
-                const char* dstTypeName = GetTypeName(expression1->expressionType);
+                const char* srcTypeName = GetTypeNameHLSL(expression2->expressionType);
+                const char* dstTypeName = GetTypeNameHLSL(expression1->expressionType);
                 m_tokenizer.Error("':' no possible conversion from from '%s' to '%s'", srcTypeName, dstTypeName);
                 return false;
             }
@@ -2682,7 +2713,7 @@ bool HLSLParser::ParseTerminalExpression(HLSLExpression*& expression, bool& need
             if (unaryExpression->expression->expressionType.baseType < HLSLBaseType_FirstInteger || 
                 unaryExpression->expression->expressionType.baseType > HLSLBaseType_LastInteger)
             {
-                const char * typeName = GetTypeName(unaryExpression->expression->expressionType);
+                const char * typeName = GetTypeNameHLSL(unaryExpression->expression->expressionType);
                 m_tokenizer.Error("unary '~' : no global operator found which takes type '%s' (or there is no acceptable conversion)", typeName);
                 return false;
             }
@@ -3017,11 +3048,6 @@ bool HLSLParser::ParseTerminalExpression(HLSLExpression*& expression, bool& need
                 return false;
             }
             
-//            if (String_Equal(function->name, "mul"))
-//            {
-//                int bp = 0;
-//                bp = bp;
-//            }
             functionCall->function = function;
             functionCall->expressionType = function->returnType;
             expression = functionCall;
@@ -3592,10 +3618,14 @@ bool HLSLParser::ParseAttributeList(HLSLAttribute*& firstAttribute)
 
         HLSLAttribute * attribute = m_tree->AddNode<HLSLAttribute>(fileName, line);
         
-        if (String_Equal(identifier, "unroll")) attribute->attributeType = HLSLAttributeType_Unroll;
-        else if (String_Equal(identifier, "flatten")) attribute->attributeType = HLSLAttributeType_Flatten;
-        else if (String_Equal(identifier, "branch")) attribute->attributeType = HLSLAttributeType_Branch;
-        else if (String_Equal(identifier, "nofastmath")) attribute->attributeType = HLSLAttributeType_NoFastMath;
+        if (String_Equal(identifier, "unroll"))
+            attribute->attributeType = HLSLAttributeType_Unroll;
+        else if (String_Equal(identifier, "flatten"))
+            attribute->attributeType = HLSLAttributeType_Flatten;
+        else if (String_Equal(identifier, "branch"))
+            attribute->attributeType = HLSLAttributeType_Branch;
+        else if (String_Equal(identifier, "nofastmath"))
+            attribute->attributeType = HLSLAttributeType_NoFastMath;
         
         // @@ parse arguments, () not required if attribute constructor has no arguments.
 
@@ -3967,6 +3997,7 @@ bool HLSLParser::AcceptType(bool allowVoid, HLSLType& type/*, bool acceptFlags*/
         type.baseType = HLSLBaseType_RWTexture2D;
         break;
             
+    // samplers
     case HLSLToken_SamplerState:
         type.baseType = HLSLBaseType_SamplerState;
         break;
@@ -3974,12 +4005,14 @@ bool HLSLParser::AcceptType(bool allowVoid, HLSLType& type/*, bool acceptFlags*/
         type.baseType = HLSLBaseType_SamplerComparisonState;
         break;
             
+    // older constants
     case HLSLToken_CBuffer:
     case HLSLToken_TBuffer:
         // might make these BufferGlobals?
         type.baseType = HLSLBaseType_Buffer;
         break;
             
+    // SSBO
     case HLSLToken_StructuredBuffer:
     case HLSLToken_RWStructuredBuffer:
     case HLSLToken_ByteAddressBuffer:
@@ -4000,6 +4033,7 @@ bool HLSLParser::AcceptType(bool allowVoid, HLSLType& type/*, bool acceptFlags*/
                 int token = m_tokenizer.GetToken();
                 
                 // TODO: need more format types
+                // TODO: double, and other types
                 if (token >= HLSLToken_Float && token <= HLSLToken_Float4)
                 {
                     // TODO: code only tests if texture formatType exactly matches
@@ -4016,7 +4050,7 @@ bool HLSLParser::AcceptType(bool allowVoid, HLSLType& type/*, bool acceptFlags*/
                 }
                 else
                 {
-                    m_tokenizer.Error("Expected half or float.");
+                    m_tokenizer.Error("Expected half or float format type on texture.");
                     return false;
                 }
                 m_tokenizer.Next();
@@ -4286,15 +4320,10 @@ const HLSLFunction* HLSLParser::MatchFunctionCall(const HLSLFunctionCall* functi
         const HLSLFunction* function = &_intrinsic[i].function;
         
         // TODO: this is doing another O(n) search of intrinsic names
+        // and there are a lot of them with all the input/output variants
         if (String_Equal(function->name, name))
         {
             nameMatches = true;
-
-            if (String_Equal(name, "SampleH"))
-            {
-                int bp = 0;
-                bp = bp;
-            }
             
             CompareFunctionsResult result = CompareFunctions( m_tree, functionCall, function, matchedFunction );
             if (result == Function1Better)
diff --git a/hlslparser/src/HLSLParser.h b/hlslparser/src/HLSLParser.h
index 40aa655c..a14720bd 100644
--- a/hlslparser/src/HLSLParser.h
+++ b/hlslparser/src/HLSLParser.h
@@ -167,6 +167,7 @@ enum NumericType
 
 bool IsHalf(HLSLBaseType type);
 bool IsFloat(HLSLBaseType type);
+bool IsDouble(HLSLBaseType type);
 
 bool IsSamplerType(HLSLBaseType baseType);
 bool IsMatrixType(HLSLBaseType baseType);
@@ -190,10 +191,11 @@ bool IsTextureType(const HLSLType& type);
 
 HLSLBaseType PromoteType(HLSLBaseType toType, HLSLBaseType type);
 HLSLBaseType HalfToFloatBaseType(HLSLBaseType type);
+HLSLBaseType DoubleToFloatBaseType(HLSLBaseType type);
 
 const char* GetNumericTypeName(HLSLBaseType type);
 
-const char* GetTypeName(const HLSLType& type);
+const char* GetTypeNameHLSL(const HLSLType& type);
 const char* GetTypeNameMetal(const HLSLType& type);
 
 }
diff --git a/hlslparser/src/HLSLTokenizer.cpp b/hlslparser/src/HLSLTokenizer.cpp
index b078ad5f..227615bb 100644
--- a/hlslparser/src/HLSLTokenizer.cpp
+++ b/hlslparser/src/HLSLTokenizer.cpp
@@ -330,6 +330,7 @@ void HLSLTokenizer::Next()
     const int numReservedWords = sizeof(_reservedWords) / sizeof(const char*);
     for (int i = 0; i < numReservedWords; ++i)
     {
+        // TODO: remove O(N) search of strings, need unordered_map
         if (String_Equal(_reservedWords[i], m_identifier))
         {
             m_token = 256 + i;
diff --git a/hlslparser/src/HLSLTree.h b/hlslparser/src/HLSLTree.h
index 55f9e42f..17f157e0 100644
--- a/hlslparser/src/HLSLTree.h
+++ b/hlslparser/src/HLSLTree.h
@@ -161,7 +161,7 @@ enum HLSLBaseType
     
     // counts
     HLSLBaseType_FirstNumeric = HLSLBaseType_Float,
-    HLSLBaseType_LastNumeric = HLSLBaseType_Ushort4,
+    HLSLBaseType_LastNumeric = HLSLBaseType_Ulong4,
     
     HLSLBaseType_FirstInteger = HLSLBaseType_Bool,
     HLSLBaseType_LastInteger = HLSLBaseType_LastNumeric,
diff --git a/hlslparser/src/MSLGenerator.cpp b/hlslparser/src/MSLGenerator.cpp
index cc6cf556..cafed104 100644
--- a/hlslparser/src/MSLGenerator.cpp
+++ b/hlslparser/src/MSLGenerator.cpp
@@ -2116,24 +2116,21 @@ namespace M4
         // number
         bool isHalfNumerics = promote && !m_options.treatHalfAsFloat;
         auto baseType = type.baseType;
+        
+        // Note: these conversions should really be done during parsing
+        // so that casting gets applied.
         if (!isHalfNumerics)
             baseType = HalfToFloatBaseType(baseType);
         
-        const char* name = GetNumericTypeName(baseType);
-        if (name)
-            return name;
-        
-        // struct
-        if (baseType == HLSLBaseType_UserDefined)
-            return type.typeName;
+        // MSL doesn't support double
+        if (IsDouble(baseType))
+            baseType = DoubleToFloatBaseType(baseType);
         
-        // Functions can return void, especially with compute
-        if (baseType == HLSLBaseType_Void)
-            return "void";
+        HLSLType remappedType(baseType);
+        remappedType.typeName = type.typeName; // in case it's a struct
         
-        // sampler
-        if (IsSamplerType(baseType))
-            return "sampler";
+        if (IsSamplerType(baseType) || IsNumericType(baseType) || baseType == HLSLBaseType_Void || baseType == HLSLBaseType_UserDefined)
+            return GetTypeNameMetal(remappedType);
         
         // texture
         if (IsTextureType(baseType))
@@ -2146,6 +2143,14 @@ namespace M4
             if (IsDepthTextureType(baseType))
                 isHalfTexture = false;
             
+            // TODO: could use GetTypeNameMetal() but it doesn't include <> portion
+            // so would have to pool and then return the result.
+            
+            // This would allow more formats
+            // const char* textureTypeName = GetTypeNameMetal(baseType);
+            // const char* formatTypeName = GetFormatTypeName(baseType, formatType);
+            // snprintf(buf, sizeof(buf), "%s<%s>", textureTypeName, formatTypeName);
+            
             switch (baseType)
             {
                 case HLSLBaseType_Depth2D:
@@ -2154,7 +2159,8 @@ namespace M4
                     return isHalfTexture ? "depth2d_array<half>" : "depth2d_array<float>";
                 case HLSLBaseType_DepthCube:
                     return isHalfTexture ? "depthcube<half>" : "depthcube<float>";
-                /* TODO: add, also depth_ms_array, but no HLSL equivalent
+                    
+                /* TODO: also depth_ms_array, but HLSL6.6 equivalent
                 case HLSLBaseType_Depth2DMS:
                     return isHalfTexture ? "depth2d_ms<half>" : "depth2d_ms<float>";
                 */

From 025fc41a814b4f5a10bc4bdfae8eab68947af853 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 17 Mar 2023 11:57:52 -0700
Subject: [PATCH 481/901] kram - update hlslparser

---
 hlslparser/src/CodeWriter.h |  2 ++
 hlslparser/src/Engine.cpp   | 24 +-----------------------
 hlslparser/src/Engine.h     | 28 ++++++++++++++++++++++++++++
 hlslparser/src/HLSLTree.cpp | 16 +++++++++++++---
 hlslparser/src/HLSLTree.h   |  1 +
 5 files changed, 45 insertions(+), 26 deletions(-)

diff --git a/hlslparser/src/CodeWriter.h b/hlslparser/src/CodeWriter.h
index fffc7b0a..be964d87 100644
--- a/hlslparser/src/CodeWriter.h
+++ b/hlslparser/src/CodeWriter.h
@@ -10,6 +10,8 @@
 #pragma once
 
 #include "Engine.h"
+
+// stl
 #include <string>
 
 namespace M4
diff --git a/hlslparser/src/Engine.cpp b/hlslparser/src/Engine.cpp
index fa680884..bdb09f13 100755
--- a/hlslparser/src/Engine.cpp
+++ b/hlslparser/src/Engine.cpp
@@ -133,30 +133,8 @@ void Log_ErrorArgList(const char * format, va_list args) {
 
 // Taken from Alec's HashHelper.h
 
-// case sensitive fnv1a hash, can pass existing hash to continue a hash
-inline uint32_t HashFnv1a(const char* val, uint32_t hash = 0x811c9dc5) {
-    const uint32_t prime  = 0x01000193; // 16777619 (32-bit)
-    while (*val) {
-        hash = (hash * prime) ^ (uint32_t)*val++;
-    }
-    return hash;
-}
-
-// this compares string stored as const char*
-struct CompareStrings
-{
-    template <class _Tp>
-    bool operator()(const _Tp& __x, const _Tp& __y) const
-    { return strcmp( __x, __y ) == 0; }
-    
-    template <class _Tp>
-    size_t operator()(const _Tp& __x) const {
-        // assumes 32-bit hash to int64 conversion here
-        return (size_t)HashFnv1a(__x);
-    }
-};
 
-using StringPoolSet = std::unordered_set<const char*, CompareStrings, CompareStrings>;
+using StringPoolSet = std::unordered_set<const char*, CompareAndHandStrings, CompareAndHandStrings>;
 
 #define CastImpl(imp) (StringPoolSet*)imp
 
diff --git a/hlslparser/src/Engine.h b/hlslparser/src/Engine.h
index 8510c34e..ff7d23e2 100755
--- a/hlslparser/src/Engine.h
+++ b/hlslparser/src/Engine.h
@@ -56,6 +56,8 @@ class Allocator {
 
 // Engine/String.h
 
+
+
 int String_Printf(char * buffer, int size, const char * format, ...) M4_PRINTF_ATTR(3, 4);
 int String_PrintfArgList(char * buffer, int size, const char * format, va_list args);
 int String_FormatFloat(char * buffer, int size, float value);
@@ -66,6 +68,32 @@ int String_ToInteger(const char * str, char ** end);
 
 void String_StripTrailingFloatZeroes(char* buffer);
 
+// Hash and Compare are taken out of kram
+// case sensitive fnv1a hash, can pass existing hash to continue a hash
+inline uint32_t HashFnv1a(const char* val, uint32_t hash = 0x811c9dc5)
+{
+    const uint32_t prime  = 0x01000193; // 16777619 (32-bit)
+    while (*val)
+    {
+        hash = (hash * prime) ^ (uint32_t)*val++;
+    }
+    return hash;
+}
+
+// this compares string stored as const char*
+struct CompareAndHandStrings
+{
+    template <class _Tp>
+    bool operator()(const _Tp& __x, const _Tp& __y) const
+    { return String_Equal( __x, __y ) == 0; }
+    
+    template <class _Tp>
+    size_t operator()(const _Tp& __x) const {
+        // assumes 32-bit hash to int64 conversion here
+        return (size_t)HashFnv1a(__x);
+    }
+};
+
 // Engine/Log.h
 
 void Log_Error(const char * format, ...) M4_PRINTF_ATTR(1, 2);
diff --git a/hlslparser/src/HLSLTree.cpp b/hlslparser/src/HLSLTree.cpp
index 2231d134..c851a01c 100644
--- a/hlslparser/src/HLSLTree.cpp
+++ b/hlslparser/src/HLSLTree.cpp
@@ -436,6 +436,8 @@ bool HLSLTree::NeedsFunction(const char* name)
         const char* name;
         bool result;
 
+        virtual ~NeedsFunctionVisitor() {}
+        
         virtual void VisitTopLevelStatement(HLSLStatement * node)
         {
             if (!node->hidden)
@@ -1020,6 +1022,8 @@ void HLSLTreeVisitor::VisitParameters(HLSLRoot * root)
 class ResetHiddenFlagVisitor : public HLSLTreeVisitor
 {
 public:
+    virtual ~ResetHiddenFlagVisitor() {}
+    
     virtual void VisitTopLevelStatement(HLSLStatement * statement) override
     {
         statement->hidden = true;
@@ -1050,9 +1054,12 @@ class ResetHiddenFlagVisitor : public HLSLTreeVisitor
 class MarkVisibleStatementsVisitor : public HLSLTreeVisitor
 {
 public:
+    
     HLSLTree * tree;
     MarkVisibleStatementsVisitor(HLSLTree * tree) : tree(tree) {}
 
+    virtual ~MarkVisibleStatementsVisitor() {}
+    
     virtual void VisitComment(HLSLComment * node) override
     {
         node->hidden = false;
@@ -1494,16 +1501,18 @@ class FindArgumentVisitor : public HLSLTreeVisitor
     bool found;
     const char * name;
 
+    virtual ~FindArgumentVisitor() {}
+    
 	FindArgumentVisitor()
 	{
 		found = false;
 		name  = NULL;
 	}
 
-    bool FindArgument(const char * name, HLSLFunction * function)
+    bool FindArgument(const char * _name, HLSLFunction * function)
     {
-        this->found = false;
-        this->name = name;
+        found = false;
+        name = _name;
         VisitStatements(function->statement);
         return found;
     }
@@ -1630,6 +1639,7 @@ struct StatementList {
             statement_pointer = NULL;
             current_function = NULL;
         }
+        virtual ~ExpressionFlattener() {}
         
         void FlattenExpressions(HLSLTree * tree)
         {
diff --git a/hlslparser/src/HLSLTree.h b/hlslparser/src/HLSLTree.h
index 17f157e0..97c0b25e 100644
--- a/hlslparser/src/HLSLTree.h
+++ b/hlslparser/src/HLSLTree.h
@@ -786,6 +786,7 @@ class HLSLTree
 class HLSLTreeVisitor
 {
 public:
+    virtual ~HLSLTreeVisitor() {}
     virtual void VisitType(HLSLType & type);
 
     virtual void VisitRoot(HLSLRoot * node);

From 698ee4c29b85633669458a86766989167d04e211 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 17 Mar 2023 11:59:59 -0700
Subject: [PATCH 482/901] kram - update hlslparser

---
 hlslparser/src/Engine.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hlslparser/src/Engine.h b/hlslparser/src/Engine.h
index ff7d23e2..5d55b10c 100755
--- a/hlslparser/src/Engine.h
+++ b/hlslparser/src/Engine.h
@@ -85,7 +85,7 @@ struct CompareAndHandStrings
 {
     template <class _Tp>
     bool operator()(const _Tp& __x, const _Tp& __y) const
-    { return String_Equal( __x, __y ) == 0; }
+    { return String_Equal( __x, __y ); }
     
     template <class _Tp>
     size_t operator()(const _Tp& __x) const {

From 91dc7845f9eb04d40118cc55b51d698015c6cdda Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 17 Mar 2023 15:40:13 -0700
Subject: [PATCH 483/901] kram - hlslparser update

---
 .../hlslparser.xcodeproj/project.pbxproj      |  18 +
 hlslparser/src/HLSLParser.cpp                 | 871 ++++++++++--------
 hlslparser/src/MSLGenerator.cpp               |  11 +-
 3 files changed, 535 insertions(+), 365 deletions(-)

diff --git a/hlslparser/hlslparser.xcodeproj/project.pbxproj b/hlslparser/hlslparser.xcodeproj/project.pbxproj
index f22e9e4a..b9444ce2 100644
--- a/hlslparser/hlslparser.xcodeproj/project.pbxproj
+++ b/hlslparser/hlslparser.xcodeproj/project.pbxproj
@@ -180,10 +180,12 @@
 				CLANG_ENABLE_MODULES = YES;
 				CLANG_ENABLE_OBJC_ARC = YES;
 				CLANG_ENABLE_OBJC_WEAK = YES;
+				CLANG_WARN_ATOMIC_IMPLICIT_SEQ_CST = YES;
 				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
 				CLANG_WARN_BOOL_CONVERSION = YES;
 				CLANG_WARN_COMMA = YES;
 				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_CXX0X_EXTENSIONS = YES;
 				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
 				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
 				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
@@ -202,6 +204,7 @@
 				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
 				CLANG_WARN_UNREACHABLE_CODE = YES;
 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				CLANG_WARN__EXIT_TIME_DESTRUCTORS = NO;
 				COPY_PHASE_STRIP = NO;
 				DEAD_CODE_STRIPPING = YES;
 				DEBUG_INFORMATION_FORMAT = dwarf;
@@ -209,6 +212,8 @@
 				ENABLE_TESTABILITY = YES;
 				GCC_C_LANGUAGE_STANDARD = gnu11;
 				GCC_DYNAMIC_NO_PIC = NO;
+				GCC_ENABLE_CPP_EXCEPTIONS = NO;
+				GCC_ENABLE_CPP_RTTI = NO;
 				GCC_NO_COMMON_BLOCKS = YES;
 				GCC_OPTIMIZATION_LEVEL = 0;
 				GCC_PREPROCESSOR_DEFINITIONS = (
@@ -217,8 +222,12 @@
 				);
 				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
 				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_HIDDEN_VIRTUAL_FUNCTIONS = YES;
+				GCC_WARN_NON_VIRTUAL_DESTRUCTOR = YES;
+				GCC_WARN_SHADOW = YES;
 				GCC_WARN_UNDECLARED_SELECTOR = YES;
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+				GCC_WARN_UNKNOWN_PRAGMAS = YES;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
 				MACOSX_DEPLOYMENT_TARGET = 11.0;
@@ -239,10 +248,12 @@
 				CLANG_ENABLE_MODULES = YES;
 				CLANG_ENABLE_OBJC_ARC = YES;
 				CLANG_ENABLE_OBJC_WEAK = YES;
+				CLANG_WARN_ATOMIC_IMPLICIT_SEQ_CST = YES;
 				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
 				CLANG_WARN_BOOL_CONVERSION = YES;
 				CLANG_WARN_COMMA = YES;
 				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_CXX0X_EXTENSIONS = YES;
 				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
 				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
 				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
@@ -261,17 +272,24 @@
 				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
 				CLANG_WARN_UNREACHABLE_CODE = YES;
 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				CLANG_WARN__EXIT_TIME_DESTRUCTORS = NO;
 				COPY_PHASE_STRIP = NO;
 				DEAD_CODE_STRIPPING = YES;
 				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
 				ENABLE_NS_ASSERTIONS = NO;
 				ENABLE_STRICT_OBJC_MSGSEND = YES;
 				GCC_C_LANGUAGE_STANDARD = gnu11;
+				GCC_ENABLE_CPP_EXCEPTIONS = NO;
+				GCC_ENABLE_CPP_RTTI = NO;
 				GCC_NO_COMMON_BLOCKS = YES;
 				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
 				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_HIDDEN_VIRTUAL_FUNCTIONS = YES;
+				GCC_WARN_NON_VIRTUAL_DESTRUCTOR = YES;
+				GCC_WARN_SHADOW = YES;
 				GCC_WARN_UNDECLARED_SELECTOR = YES;
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+				GCC_WARN_UNKNOWN_PRAGMAS = YES;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
 				MACOSX_DEPLOYMENT_TARGET = 11.0;
diff --git a/hlslparser/src/HLSLParser.cpp b/hlslparser/src/HLSLParser.cpp
index 4a050d1e..277ea455 100644
--- a/hlslparser/src/HLSLParser.cpp
+++ b/hlslparser/src/HLSLParser.cpp
@@ -13,10 +13,14 @@
 
 #include "HLSLTree.h"
 
-#include <algorithm>
 #include <ctype.h>
 #include <string.h>
 
+// stl
+#include <algorithm>
+#include <vector>
+#include <unordered_map>
+
 namespace M4
 {
 
@@ -329,95 +333,125 @@ HLSLBaseType PromoteType(HLSLBaseType toType, HLSLBaseType type)
 /** This structure stores a HLSLFunction-like declaration for an intrinsic function */
 struct Intrinsic
 {
-    explicit Intrinsic(const char* name, HLSLBaseType returnType)
+    explicit Intrinsic(const char* name, uint32_t numArgs)
     {
-        function.name                   = name;
-        function.returnType.baseType    = returnType;
-        function.numArguments           = 0;
+        function.name         = name;
+        function.numArguments = numArgs;
+        
+        if (numArgs == 0) return;
+        
+        for (uint32_t i = 0; i < numArgs; ++i)
+        {
+            argument[i].type.flags = HLSLTypeFlag_Const;
+        }
     }
-    explicit Intrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType arg1)
+    
+    void ChainArgumentPointers()
     {
-        function.name                   = name;
-        function.returnType.baseType    = returnType;
-        function.numArguments           = 1;
-        function.argument               = argument + 0;
-        argument[0].type.baseType       = arg1;
-        argument[0].type.flags          = HLSLTypeFlag_Const;
+        function.argument = argument + 0;
+        
+        uint32_t numArgs = function.numArguments;
+        // This chain pf pointers won't surive copy
+        for (uint32_t i = 0; i < numArgs; ++i)
+        {
+            if (i < numArgs - 1)
+                argument[i].nextArgument = argument + i + 1;
+        }
     }
-    explicit Intrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType arg1, HLSLBaseType arg2)
+    
+    void SetArgumentTypes(HLSLBaseType returnType, HLSLBaseType args[4])
     {
-        function.name                   = name;
-        function.returnType.baseType    = returnType;
-        function.argument               = argument + 0;
-        function.numArguments           = 2;
-        argument[0].type.baseType       = arg1;
-        argument[0].type.flags          = HLSLTypeFlag_Const;
-        argument[0].nextArgument        = argument + 1;
-        argument[1].type.baseType       = arg2;
-        argument[1].type.flags          = HLSLTypeFlag_Const;
+        function.returnType.baseType = returnType;
+        for (uint32_t i = 0; i < function.numArguments; ++i)
+        {
+            ASSERT(args[i] != HLSLBaseType_Unknown);
+            argument[i].type.baseType = args[i];
+        }
     }
-    explicit Intrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType arg1, HLSLBaseType arg2, HLSLBaseType arg3)
-    {
-        function.name                   = name;
-        function.returnType.baseType    = returnType;
-        function.argument               = argument + 0;
-        function.numArguments           = 3;
-        argument[0].type.baseType       = arg1;
-        argument[0].type.flags          = HLSLTypeFlag_Const;
-        argument[0].nextArgument        = argument + 1;
-        argument[1].type.baseType       = arg2;
-        argument[1].type.flags          = HLSLTypeFlag_Const;
-        argument[1].nextArgument        = argument + 2;
-        argument[2].type.baseType       = arg3;
-        argument[2].type.flags          = HLSLTypeFlag_Const;
-    }
-    explicit Intrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType arg1, HLSLBaseType arg2, HLSLBaseType arg3, HLSLBaseType arg4)
+    
+    void ArgsToArray(HLSLBaseType args[4], uint32_t& numArgs, HLSLBaseType arg1, HLSLBaseType arg2, HLSLBaseType arg3, HLSLBaseType arg4)
+    {
+        numArgs = 0;
+        if (arg1 == HLSLBaseType_Unknown) return;
+        args[numArgs++] = arg1;
+        if (arg2 == HLSLBaseType_Unknown) return;
+        args[numArgs++] = arg2;
+        if (arg3 == HLSLBaseType_Unknown) return;
+        args[numArgs++] = arg3;
+        if (arg4 == HLSLBaseType_Unknown) return;
+        args[numArgs++] = arg4;
+    }
+    
+    explicit Intrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType arg1 = HLSLBaseType_Unknown, HLSLBaseType arg2 = HLSLBaseType_Unknown, HLSLBaseType arg3 = HLSLBaseType_Unknown, HLSLBaseType arg4 = HLSLBaseType_Unknown)
     {
         function.name                   = name;
-        function.returnType.baseType    = returnType;
-        function.argument               = argument + 0;
-        function.numArguments           = 4;
-        argument[0].type.baseType       = arg1;
-        argument[0].type.flags          = HLSLTypeFlag_Const;
-        argument[0].nextArgument        = argument + 1;
-        argument[1].type.baseType       = arg2;
-        argument[1].type.flags          = HLSLTypeFlag_Const;
-        argument[1].nextArgument        = argument + 2;
-        argument[2].type.baseType       = arg3;
-        argument[2].type.flags          = HLSLTypeFlag_Const;
-        argument[2].nextArgument        = argument + 3;
-        argument[3].type.baseType       = arg4;
-        argument[3].type.flags          = HLSLTypeFlag_Const;
+        
+        HLSLBaseType argumentTypes[4];
+        uint32_t numArgs = 0;
+        ArgsToArray(argumentTypes, numArgs, arg1, arg2, arg3, arg4);
+        
+        *this = Intrinsic(name, numArgs);
+        SetArgumentTypes(returnType, argumentTypes);
     }
+    
+    // TODO: allow member function intrinsices on buffers/textures
+    HLSLBaseType    memberType = HLSLBaseType_Unknown;
     HLSLFunction    function;
     HLSLArgument    argument[4];
 };
     
-Intrinsic TextureIntrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType textureType, HLSLBaseType textureHalfOrFloat, HLSLBaseType uvType)
+// So many calls are member functions in modern HLSL/MSL
+#define USE_MEMBER_FUNCTIONS 0
+
+static void AddIntrinsic(const Intrinsic& intrinsic);
+
+void AddTextureIntrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType textureType, HLSLBaseType textureHalfOrFloat, HLSLBaseType uvType)
 {
+#if USE_MEMBER_FUNCTIONS
+    Intrinsic i(name, returnType, HLSLBaseType_SamplerState, uvType);
+    i.memberType = textureType;
+#else
     Intrinsic i(name, returnType, textureType, HLSLBaseType_SamplerState, uvType);
+#endif
     i.argument[0].type.formatType = textureHalfOrFloat;
-    return i;
+
+    AddIntrinsic(i);
 }
 
-Intrinsic DepthIntrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType textureType, HLSLBaseType textureHalfOrFloat, HLSLBaseType uvType)
+// DepthCmp takes additional arg for comparison value, but this rolls it into uv
+void AddDepthIntrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType textureType, HLSLBaseType textureHalfOrFloat, HLSLBaseType uvType)
 {
-    // Need to be able to pass SamplerComparisonState too
-    Intrinsic i(name, returnType, textureType, HLSLBaseType_SamplerComparisonState, uvType);
+    // ComparisonState is only for SampleCmp/GatherCmp
+    bool isCompare = String_Equal(name, "GatherCmp") || String_Equal(name, "SampleCmp");
+    HLSLBaseType samplerType = isCompare ? HLSLBaseType_SamplerComparisonState : HLSLBaseType_SamplerState;
+    
+#if USE_MEMBER_FUNCTIONS
+    Intrinsic i(name, returnType, samplerType, uvType);
+    i.memberType = textureType;
+#else
+    Intrinsic i(name, returnType, textureType, samplerType, uvType);
+#endif
+    
     i.argument[0].type.formatType = textureHalfOrFloat;
-    return i;
+    AddIntrinsic(i);
 }
 
 static const int _numberTypeRank[NumericType_Count][NumericType_Count] = 
 {
-    //F  H  B  I  U    
-    { 0, 4, 4, 4, 4 },  // NumericType_Float
-    { 1, 0, 4, 4, 4 },  // NumericType_Half
-    { 5, 5, 0, 5, 5 },  // NumericType_Bool
-    { 5, 5, 4, 0, 3 },  // NumericType_Int
-    { 5, 5, 4, 2, 0 },  // NumericType_Uint
-    { 5, 5, 4, 0, 3 },  // NumericType_Short
-    { 5, 5, 4, 2, 0 },  // NumericType_Ushort
+    // across is what type list on right is converted into (5 means don't, 0 means best)
+    //F  H  D  B  I  UI  S US  L UL
+    { 0, 3, 3, 4, 4, 4,  4, 4, 4, 4 },  // NumericType_Float
+    { 2, 0, 4, 4, 4, 4,  4, 4, 4, 4 },  // NumericType_Half
+    { 1, 4, 0, 4, 4, 4,  4, 4, 4, 4 },  // NumericType_Double
+   
+    { 5, 5, 5, 0, 5, 5,  5, 5, 5, 5 },  // NumericType_Bool
+    { 5, 5, 5, 4, 0, 3,  4, 3, 5, 5 },  // NumericType_Int
+    { 5, 5, 5, 4, 2, 0,  3, 4, 5, 5 },  // NumericType_Uint
+    { 5, 5, 5, 4, 0, 3,  0, 5, 5, 5 },  // NumericType_Short
+    { 5, 5, 5, 4, 2, 0,  5, 0, 5, 5 },  // NumericType_Ushort
+    
+    { 5, 5, 5, 4, 0, 3,  5, 5, 0, 5 },  // NumericType_Long
+    { 5, 5, 5, 4, 2, 0,  5, 5, 5, 0 },  // NumericType_Ulong
 };
 
 /* All FX state
@@ -669,6 +703,7 @@ static const EffectState pipelineStates[] = {
 };
 */
 
+/*
 #define INTRINSIC_FLOAT1_FUNCTION(name) \
         Intrinsic( name, HLSLBaseType_Float,   HLSLBaseType_Float  ),   \
         Intrinsic( name, HLSLBaseType_Float2,  HLSLBaseType_Float2 ),   \
@@ -699,277 +734,398 @@ static const EffectState pipelineStates[] = {
         Intrinsic( name, HLSLBaseType_Half3,   HLSLBaseType_Half3,   HLSLBaseType_Half3,  HLSLBaseType_Half3 ),    \
         Intrinsic( name, HLSLBaseType_Half4,   HLSLBaseType_Half4,   HLSLBaseType_Half4,  HLSLBaseType_Half4 )
 
+*/
+
+// TODO: elim the H version once have member functions, can check the member textuer format.
 #define TEXTURE_INTRINSIC_FUNCTION(name, textureType, uvType) \
-        TextureIntrinsic( name, HLSLBaseType_Float4, textureType, HLSLBaseType_Float, uvType),   \
-        TextureIntrinsic( name "H", HLSLBaseType_Half4, textureType, HLSLBaseType_Half, uvType  )
+AddTextureIntrinsic( name, HLSLBaseType_Float4, textureType, HLSLBaseType_Float, uvType)
+
+#define TEXTURE_INTRINSIC_FUNCTION_H(name, textureType, uvType) \
+        AddTextureIntrinsic( name "H", HLSLBaseType_Half4, textureType, HLSLBaseType_Half, uvType  )
+
+// Note: these strings need to live until end of the app
+StringPool gStringPool(NULL);
+
+enum All
+{
+    AllHalf = (1<<0),
+    AllFloat = (1<<1),
+    AllDouble = (1<<2),
     
-// TODO: change this to a mutlimap or something
-// would make it easier to specify, can write functions to set all vec, all matrix, etc.  This is a nightmare to add things too below.  MSL and HLSL
-// have a lot more operations now.
-
-const Intrinsic _intrinsic[] =
-    {
-        INTRINSIC_FLOAT1_FUNCTION( "abs" ),
-        INTRINSIC_FLOAT1_FUNCTION( "acos" ),
-
-        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Float ),
-        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Float2 ),
-        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Float3 ),
-        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Float4 ),
-		Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Float2x2 ),
-        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Float3x3 ),
-        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Float4x4 ),
-        //Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Float4x3 ),
-        //Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Float4x2 ),
-        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Half ),
-        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Half2 ),
-        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Half3 ),
-        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Half4 ),
-		Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Half2x2 ),
-        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Half3x3 ),
-        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Half4x4 ),
-        //Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Half4x3 ),
-        //Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Half4x2 ),
-        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Bool ),
-        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Int ),
-        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Int2 ),
-        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Int3 ),
-        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Int4 ),
-        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Uint ),
-        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Uint2 ),
-        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Uint3 ),
-        Intrinsic( "any", HLSLBaseType_Bool, HLSLBaseType_Uint4 ),
-
-        // TODO: "all"
-        // TODO: select
-        
-        INTRINSIC_FLOAT1_FUNCTION( "asin" ),
-        INTRINSIC_FLOAT1_FUNCTION( "atan" ),
-        INTRINSIC_FLOAT2_FUNCTION( "atan2" ),
-        INTRINSIC_FLOAT3_FUNCTION( "clamp" ),
-        INTRINSIC_FLOAT1_FUNCTION( "cos" ),
+    AllFloats = AllHalf | AllFloat | AllDouble,
+    
+    AllUint = (1<<3),
+    AllInt = (1<<4),
+    AllShort = (1<<5),
+    AllUshort = (1<<6),
+    AllLong = (1<<7),
+    AllUlong = (1<<8),
+    AllBool = (1<<9),
+    
+    AllInts = AllUint | AllInt | AllShort | AllUshort | AllLong | AllUlong | AllBool,
+    
+    //AllScalar  = (1<<15),
+    AllVecs = (1<<16),
+    AllMats = (1<<17),
+    AllDims = AllVecs | AllMats,
+};
+using AllMask = uint32_t;
 
-        INTRINSIC_FLOAT3_FUNCTION( "lerp" ),
-        INTRINSIC_FLOAT3_FUNCTION( "smoothstep" ),
+// TODO: want to use Array, but it needs Allocator passed
+struct Range
+{
+    uint32_t start;
+    uint32_t count;
+};
+using IntrinsicRangeMap = std::unordered_map<const char*, Range, CompareAndHandStrings, CompareAndHandStrings>;
 
-        INTRINSIC_FLOAT1_FUNCTION( "floor" ),
-        INTRINSIC_FLOAT1_FUNCTION( "ceil" ),
-        INTRINSIC_FLOAT1_FUNCTION( "frac" ),
+static std::vector<Intrinsic> _intrinsics;
 
-        INTRINSIC_FLOAT2_FUNCTION( "fmod" ),
+// This will help with comparison to avoid O(n) search of all 5000 intrinsics
+static IntrinsicRangeMap _intrinsicRangeMap;
 
-        INTRINSIC_FLOAT3_FUNCTION("min3"),
-        INTRINSIC_FLOAT3_FUNCTION("max3"),
-        
-        // MSL constructs, may be in HLSL
-        // distance
-        // distance_squared
-        // refract
-        // deteriminant
-        
-        // this one isn't cheap to emulate
-        //INTRINSIC_FLOAT3_FUNCTION("median3"),
+static void AddIntrinsic(const Intrinsic& intrinsic)
+{
+    const char* name = intrinsic.function.name;
+    
+    // Put in string pool since using this as a key.  Also means equals just ptr compar.
+   name = gStringPool.AddString(name);
+    
+    // track intrinsic range in a map, also the name lookup helps speed the parser up
+    auto it = _intrinsicRangeMap.find(name);
+    if (it != _intrinsicRangeMap.end())
+    {
+        it->second.count++;
+    }
+    else
+    {
+        _intrinsicRangeMap[name] = { (uint32_t)_intrinsics.size(), 1 };
+    }
+    
+    // To avoid having growth destroy the argument chains
+    if (_intrinsics.empty())
+        _intrinsics.reserve(10000);
         
-        Intrinsic( "clip", HLSLBaseType_Void,  HLSLBaseType_Float    ),
-        Intrinsic( "clip", HLSLBaseType_Void,  HLSLBaseType_Float2   ),
-        Intrinsic( "clip", HLSLBaseType_Void,  HLSLBaseType_Float3   ),
-        Intrinsic( "clip", HLSLBaseType_Void,  HLSLBaseType_Float4   ),
-        Intrinsic( "clip", HLSLBaseType_Void,  HLSLBaseType_Half     ),
-        Intrinsic( "clip", HLSLBaseType_Void,  HLSLBaseType_Half2    ),
-        Intrinsic( "clip", HLSLBaseType_Void,  HLSLBaseType_Half3    ),
-        Intrinsic( "clip", HLSLBaseType_Void,  HLSLBaseType_Half4    ),
-
-        Intrinsic( "dot", HLSLBaseType_Float,  HLSLBaseType_Float,   HLSLBaseType_Float  ),
-        Intrinsic( "dot", HLSLBaseType_Float,  HLSLBaseType_Float2,  HLSLBaseType_Float2 ),
-        Intrinsic( "dot", HLSLBaseType_Float,  HLSLBaseType_Float3,  HLSLBaseType_Float3 ),
-        Intrinsic( "dot", HLSLBaseType_Float,  HLSLBaseType_Float4,  HLSLBaseType_Float4 ),
-        Intrinsic( "dot", HLSLBaseType_Half,   HLSLBaseType_Half,    HLSLBaseType_Half   ),
-        Intrinsic( "dot", HLSLBaseType_Half,   HLSLBaseType_Half2,   HLSLBaseType_Half2  ),
-        Intrinsic( "dot", HLSLBaseType_Half,   HLSLBaseType_Half3,   HLSLBaseType_Half3  ),
-        Intrinsic( "dot", HLSLBaseType_Half,   HLSLBaseType_Half4,   HLSLBaseType_Half4  ),
-
-        // 3d cross product only
-        Intrinsic( "cross", HLSLBaseType_Float3,  HLSLBaseType_Float3,  HLSLBaseType_Float3 ),
-        Intrinsic( "cross", HLSLBaseType_Half3,  HLSLBaseType_Half3,  HLSLBaseType_Half3 ),
-
-        Intrinsic( "length", HLSLBaseType_Float,  HLSLBaseType_Float  ),
-        Intrinsic( "length", HLSLBaseType_Float,  HLSLBaseType_Float2 ),
-        Intrinsic( "length", HLSLBaseType_Float,  HLSLBaseType_Float3 ),
-        Intrinsic( "length", HLSLBaseType_Float,  HLSLBaseType_Float4 ),
-        Intrinsic( "length", HLSLBaseType_Half,   HLSLBaseType_Half   ),
-        Intrinsic( "length", HLSLBaseType_Half,   HLSLBaseType_Half2  ),
-        Intrinsic( "length", HLSLBaseType_Half,   HLSLBaseType_Half3  ),
-        Intrinsic( "length", HLSLBaseType_Half,   HLSLBaseType_Half4  ),
-
-        // MSL construct, dumb that HLSL lacks this
-        Intrinsic( "length_squared", HLSLBaseType_Float,  HLSLBaseType_Float  ),
-        Intrinsic( "length_squared", HLSLBaseType_Float,  HLSLBaseType_Float2 ),
-        Intrinsic( "length_squared", HLSLBaseType_Float,  HLSLBaseType_Float3 ),
-        Intrinsic( "length_squared", HLSLBaseType_Float,  HLSLBaseType_Float4 ),
-        Intrinsic( "length_squared", HLSLBaseType_Half,   HLSLBaseType_Half   ),
-        Intrinsic( "length_squared", HLSLBaseType_Half,   HLSLBaseType_Half2  ),
-        Intrinsic( "length_squared", HLSLBaseType_Half,   HLSLBaseType_Half3  ),
-        Intrinsic( "length_squared", HLSLBaseType_Half,   HLSLBaseType_Half4  ),
+    _intrinsics.push_back(intrinsic);
+    _intrinsics.back().function.name = name;
+    
+    // These pointers change when copied or when vector grows, so do a reserve
+    _intrinsics.back().ChainArgumentPointers();
+}
 
+void AddIntrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType arg1 = HLSLBaseType_Unknown, HLSLBaseType arg2 = HLSLBaseType_Unknown, HLSLBaseType arg3 = HLSLBaseType_Unknown, HLSLBaseType arg4 = HLSLBaseType_Unknown)
+{
+    Intrinsic intrinsic(name, returnType, arg1, arg2, arg3, arg4);
+    AddIntrinsic(intrinsic);
+}
+
+void RegisterBaseTypeIntrinsic(Intrinsic& intrinsic, uint32_t numArgs, HLSLBaseType returnType, HLSLBaseType baseType, uint32_t start, uint32_t end)
+{
+    HLSLBaseType args[4] = {};
+    
+    for (uint32_t i = start; i < end; ++i)
+    {
+        HLSLBaseType baseTypeIter = (HLSLBaseType)(baseType + i);
         
-        INTRINSIC_FLOAT2_FUNCTION( "max" ),
-        INTRINSIC_FLOAT2_FUNCTION( "min" ),
-
-        // @@ Add all combinations.
-        // scalar = mul(scalar, scalar)
-        // vector<N> = mul(scalar, vector<N>)
-        // vector<N> = mul(vector<N>, scalar)
-        // vector<N> = mul(vector<N>, vector<N>)
-        // vector<M> = mul(vector<N>, matrix<N,M>) ?
-        // vector<N> = mul(matrix<N,M>, vector<M>) ?
-        // matrix<N,M> = mul(matrix<N,M>, matrix<M,N>) ?
-        
-        // scalar/vec ops
-        INTRINSIC_FLOAT2_FUNCTION( "mul" ),
-        
-        // scalar mul, since * isn't working on Metal properly
-        Intrinsic( "mul", HLSLBaseType_Float2x2, HLSLBaseType_Float, HLSLBaseType_Float2x2 ),
-        Intrinsic( "mul", HLSLBaseType_Float3x3, HLSLBaseType_Float, HLSLBaseType_Float3x3 ),
-        Intrinsic( "mul", HLSLBaseType_Float4x4, HLSLBaseType_Float, HLSLBaseType_Float4x4 ),
-        Intrinsic( "mul", HLSLBaseType_Float2x2, HLSLBaseType_Float2x2, HLSLBaseType_Float ),
-        Intrinsic( "mul", HLSLBaseType_Float3x3, HLSLBaseType_Float3x3, HLSLBaseType_Float ),
-        Intrinsic( "mul", HLSLBaseType_Float4x4, HLSLBaseType_Float4x4, HLSLBaseType_Float ),
-        
-        Intrinsic( "mul", HLSLBaseType_Half2x2, HLSLBaseType_Half, HLSLBaseType_Half2x2 ),
-        Intrinsic( "mul", HLSLBaseType_Half3x3, HLSLBaseType_Half, HLSLBaseType_Half3x3 ),
-        Intrinsic( "mul", HLSLBaseType_Half4x4, HLSLBaseType_Half, HLSLBaseType_Half4x4 ),
-        Intrinsic( "mul", HLSLBaseType_Half2x2, HLSLBaseType_Half2x2, HLSLBaseType_Half ),
-        Intrinsic( "mul", HLSLBaseType_Half3x3, HLSLBaseType_Half3x3, HLSLBaseType_Half ),
-        Intrinsic( "mul", HLSLBaseType_Half4x4, HLSLBaseType_Half4x4, HLSLBaseType_Half ),
-        
-        
-        
-		Intrinsic( "mul", HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2x2 ),
-        Intrinsic( "mul", HLSLBaseType_Float3, HLSLBaseType_Float3, HLSLBaseType_Float3x3 ),
-        Intrinsic( "mul", HLSLBaseType_Float4, HLSLBaseType_Float4, HLSLBaseType_Float4x4 ),
-        Intrinsic( "mul", HLSLBaseType_Float2, HLSLBaseType_Float2x2, HLSLBaseType_Float2 ),
-        Intrinsic( "mul", HLSLBaseType_Float3, HLSLBaseType_Float3x3, HLSLBaseType_Float3 ),
-        Intrinsic( "mul", HLSLBaseType_Float4, HLSLBaseType_Float4x4, HLSLBaseType_Float4 ),
-        //Intrinsic( "mul", HLSLBaseType_Float3, HLSLBaseType_Float4, HLSLBaseType_Float4x3 ),
-        //Intrinsic( "mul", HLSLBaseType_Float2, HLSLBaseType_Float4, HLSLBaseType_Float4x2 ),
-
-        // half versions
-        Intrinsic( "mul", HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Half2x2 ),
-        Intrinsic( "mul", HLSLBaseType_Half3, HLSLBaseType_Half3, HLSLBaseType_Half3x3 ),
-        Intrinsic( "mul", HLSLBaseType_Half4, HLSLBaseType_Half4, HLSLBaseType_Half4x4 ),
-        Intrinsic( "mul", HLSLBaseType_Half2, HLSLBaseType_Half2x2, HLSLBaseType_Half2 ),
-        Intrinsic( "mul", HLSLBaseType_Half3, HLSLBaseType_Half3x3, HLSLBaseType_Half3 ),
-        Intrinsic( "mul", HLSLBaseType_Half4, HLSLBaseType_Half4x4, HLSLBaseType_Half4 ),
-        
-        // matrix transpose
-		Intrinsic( "transpose", HLSLBaseType_Float2x2, HLSLBaseType_Float2x2 ),
-        Intrinsic( "transpose", HLSLBaseType_Float3x3, HLSLBaseType_Float3x3 ),
-        Intrinsic( "transpose", HLSLBaseType_Float4x4, HLSLBaseType_Float4x4 ),
-        Intrinsic( "transpose", HLSLBaseType_Half2x2, HLSLBaseType_Half2x2 ),
-        Intrinsic( "transpose", HLSLBaseType_Half3x3, HLSLBaseType_Half3x3 ),
-        Intrinsic( "transpose", HLSLBaseType_Half4x4, HLSLBaseType_Half4x4 ),
-
-        INTRINSIC_FLOAT1_FUNCTION( "normalize" ),
-        INTRINSIC_FLOAT2_FUNCTION( "pow" ),
-        INTRINSIC_FLOAT1_FUNCTION( "saturate" ),
-        INTRINSIC_FLOAT1_FUNCTION( "sin" ),
-        INTRINSIC_FLOAT1_FUNCTION( "sqrt" ),
-        INTRINSIC_FLOAT1_FUNCTION( "rsqrt" ),
-        INTRINSIC_FLOAT1_FUNCTION( "rcp" ),
-        INTRINSIC_FLOAT1_FUNCTION( "exp" ),
-        INTRINSIC_FLOAT1_FUNCTION( "exp2" ),
-        // TODO: MSL INTRINSIC_FLOAT1_FUNCTION( "exp10" ),
-       
-        INTRINSIC_FLOAT1_FUNCTION( "log" ),
-        INTRINSIC_FLOAT1_FUNCTION( "log2" ),
-        // TODO: MSL INTRINSIC_FLOAT1_FUNCTION( "log10" ),
-        
-        INTRINSIC_FLOAT1_FUNCTION( "ddx" ),
-        INTRINSIC_FLOAT1_FUNCTION( "ddy" ),
-        
-        INTRINSIC_FLOAT1_FUNCTION( "sign" ),
-        INTRINSIC_FLOAT2_FUNCTION( "step" ),
-        INTRINSIC_FLOAT2_FUNCTION( "reflect" ),
-
-		INTRINSIC_FLOAT1_FUNCTION("isnan"),
-		INTRINSIC_FLOAT1_FUNCTION("isinf"),
-
-		Intrinsic("asuint", HLSLBaseType_Uint, HLSLBaseType_Float),
-
-        // only for vec, void return type
-        Intrinsic( "sincos", HLSLBaseType_Void,  HLSLBaseType_Float,   HLSLBaseType_Float,  HLSLBaseType_Float ),
-        Intrinsic( "sincos", HLSLBaseType_Void,  HLSLBaseType_Float2,  HLSLBaseType_Float,  HLSLBaseType_Float2 ),
-        Intrinsic( "sincos", HLSLBaseType_Void,  HLSLBaseType_Float3,  HLSLBaseType_Float,  HLSLBaseType_Float3 ),
-        Intrinsic( "sincos", HLSLBaseType_Void,  HLSLBaseType_Float4,  HLSLBaseType_Float,  HLSLBaseType_Float4 ),
-        Intrinsic( "sincos", HLSLBaseType_Void,  HLSLBaseType_Half,    HLSLBaseType_Half,   HLSLBaseType_Half ),
-        Intrinsic( "sincos", HLSLBaseType_Void,  HLSLBaseType_Half2,   HLSLBaseType_Half2,  HLSLBaseType_Half2 ),
-        Intrinsic( "sincos", HLSLBaseType_Void,  HLSLBaseType_Half3,   HLSLBaseType_Half3,  HLSLBaseType_Half3 ),
-        Intrinsic( "sincos", HLSLBaseType_Void,  HLSLBaseType_Half4,   HLSLBaseType_Half4,  HLSLBaseType_Half4 ),
-        
-        // why is this only defined for vec
-        Intrinsic( "mad", HLSLBaseType_Float, HLSLBaseType_Float, HLSLBaseType_Float, HLSLBaseType_Float ),
-        Intrinsic( "mad", HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2 ),
-        Intrinsic( "mad", HLSLBaseType_Float3, HLSLBaseType_Float3, HLSLBaseType_Float3, HLSLBaseType_Float3 ),
-        Intrinsic( "mad", HLSLBaseType_Float4, HLSLBaseType_Float4, HLSLBaseType_Float4, HLSLBaseType_Float4 ),
-        Intrinsic( "mad", HLSLBaseType_Half, HLSLBaseType_Half, HLSLBaseType_Half, HLSLBaseType_Half ),
-        Intrinsic( "mad", HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Half2 ),
-        Intrinsic( "mad", HLSLBaseType_Half3, HLSLBaseType_Half3, HLSLBaseType_Half3, HLSLBaseType_Half3 ),
-        Intrinsic( "mad", HLSLBaseType_Half4, HLSLBaseType_Half4, HLSLBaseType_Half4, HLSLBaseType_Half4 ),
-        
-        // TODO: split off sampler intrinsics from math above
-        //------------------------
-        
-        TEXTURE_INTRINSIC_FUNCTION("Sample", HLSLBaseType_Texture2D,  HLSLBaseType_Float2),
-        TEXTURE_INTRINSIC_FUNCTION("Sample", HLSLBaseType_Texture3D, HLSLBaseType_Float3),
-        TEXTURE_INTRINSIC_FUNCTION("Sample", HLSLBaseType_Texture2DArray, HLSLBaseType_Float3),
-        TEXTURE_INTRINSIC_FUNCTION("Sample", HLSLBaseType_TextureCube, HLSLBaseType_Float3),
-        TEXTURE_INTRINSIC_FUNCTION("Sample", HLSLBaseType_TextureCubeArray, HLSLBaseType_Float4),
-        
-        // Depth
-        DepthIntrinsic("Sample", HLSLBaseType_Float, HLSLBaseType_Depth2D, HLSLBaseType_Float,  HLSLBaseType_Float2),
-        DepthIntrinsic("Sample", HLSLBaseType_Float, HLSLBaseType_Depth2DArray, HLSLBaseType_Float,  HLSLBaseType_Float3),
-        DepthIntrinsic("Sample", HLSLBaseType_Float, HLSLBaseType_DepthCube, HLSLBaseType_Float,  HLSLBaseType_Float3),
-        
-        // xyz are used, this doesn't match HLSL which is 2 + compare
-        DepthIntrinsic("SampleCmp", HLSLBaseType_Float, HLSLBaseType_Depth2D, HLSLBaseType_Float, HLSLBaseType_Float4),
-        DepthIntrinsic("SampleCmp", HLSLBaseType_Float, HLSLBaseType_Depth2DArray, HLSLBaseType_Float, HLSLBaseType_Float4),
-        DepthIntrinsic("SampleCmp", HLSLBaseType_Float, HLSLBaseType_DepthCube, HLSLBaseType_Float, HLSLBaseType_Float4),
+        HLSLBaseType newReturnType = (returnType == HLSLBaseType_Unknown) ? baseTypeIter : returnType;
         
-        // returns float4 w/comparisons, probably only on mip0
-        // TODO: add GatherRed? to read 4 depth values
-        DepthIntrinsic("GatherCmp", HLSLBaseType_Float4, HLSLBaseType_Depth2D, HLSLBaseType_Float, HLSLBaseType_Float4),
-        DepthIntrinsic("GatherCmp", HLSLBaseType_Float4, HLSLBaseType_Depth2DArray, HLSLBaseType_Float, HLSLBaseType_Float4),
-        DepthIntrinsic("GatherCmp", HLSLBaseType_Float4, HLSLBaseType_DepthCube, HLSLBaseType_Float, HLSLBaseType_Float4),
+        for (uint32_t a = 0; a < numArgs; ++a)
+            args[a] = baseTypeIter;
         
-        // one more dimension than Sample
-        TEXTURE_INTRINSIC_FUNCTION("SampleLevel", HLSLBaseType_Texture2D, HLSLBaseType_Float3),
-        TEXTURE_INTRINSIC_FUNCTION("SampleLevel", HLSLBaseType_Texture3D, HLSLBaseType_Float4),
-        TEXTURE_INTRINSIC_FUNCTION("SampleLevel", HLSLBaseType_Texture2DArray, HLSLBaseType_Float4),
-        TEXTURE_INTRINSIC_FUNCTION("SampleLevel", HLSLBaseType_TextureCube, HLSLBaseType_Float4),
-        // TEXTURE_INTRINSIC_FUNCTION("SampleLevel", HLSLBaseType_TextureCubeArray, HLSLBaseType_Float4, Float),
+        intrinsic.SetArgumentTypes(newReturnType, args);
+        AddIntrinsic(intrinsic);
+    }
+}
+
+bool TestBits(AllMask mask, AllMask maskTest)
+{
+    return (mask & maskTest) == maskTest;
+}
+
+void RegisterIntrinsics(const char* name, uint32_t numArgs, AllMask mask, HLSLBaseType returnType = HLSLBaseType_Unknown)
+{
+    Intrinsic intrinsic(name, numArgs);
+
+    {
+        const uint32_t kNumTypes = 3;
+        HLSLBaseType baseTypes[kNumTypes] = { HLSLBaseType_Float, HLSLBaseType_Half, HLSLBaseType_Double };
+    
+        bool skip[kNumTypes] = {};
+        if (!TestBits(mask, AllFloat))
+           skip[0] = true;
+        if (!TestBits(mask, AllHalf))
+           skip[1] = true;
+        if (!TestBits(mask, AllDouble))
+            skip[2] = true;
         
-        // bias always in w
-        TEXTURE_INTRINSIC_FUNCTION("SampleBias", HLSLBaseType_Texture2D, HLSLBaseType_Float4),
-        TEXTURE_INTRINSIC_FUNCTION("SampleBias", HLSLBaseType_Texture3D, HLSLBaseType_Float4),
-        TEXTURE_INTRINSIC_FUNCTION("SampleBias", HLSLBaseType_Texture2DArray, HLSLBaseType_Float4),
-        TEXTURE_INTRINSIC_FUNCTION("SampleBias", HLSLBaseType_TextureCube, HLSLBaseType_Float4),
-        // TEXTURE_INTRINSIC_FUNCTION("SampleBias", HLSLBaseType_TextureCubeArray, HLSLBaseType_Float4, Float),
+        for (uint32_t i = 0; i < kNumTypes; ++i)
+        {
+            if (skip[i]) continue;
+            HLSLBaseType baseType = baseTypes[i];
+            
+            if (mask & AllVecs)
+                RegisterBaseTypeIntrinsic(intrinsic, numArgs, returnType, baseType, 0, 4);
+            if (mask & AllMats)
+                RegisterBaseTypeIntrinsic(intrinsic, numArgs, returnType, baseType, 4, 7);
+        }
+    }
+
+    if ((mask & AllInts) == AllInts)
+    {
+        const uint32_t kNumTypes = 7;
+        HLSLBaseType baseTypes[kNumTypes] = {
+            HLSLBaseType_Long, HLSLBaseType_Ulong,
+            HLSLBaseType_Int,  HLSLBaseType_Uint,
+            HLSLBaseType_Short, HLSLBaseType_Ushort,
+            HLSLBaseType_Bool
+        };
         
-        //TEXTURE_INTRINSIC_FUNCTION("SampleGrad", HLSLBaseType_Texture3D, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2),
-        //TEXTURE_INTRINSIC_FUNCTION("SampleGrad", HLSLBaseType_Texture3D, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2),
-        //TEXTURE_INTRINSIC_FUNCTION("SampleGrad", HLSLBaseType_Texture3D, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2),
+        bool skip[kNumTypes] = {};
+        if (!TestBits(mask, AllLong))
+            skip[0] = true;
+        if (!TestBits(mask, AllUlong))
+            skip[1] = true;
+        if (!TestBits(mask, AllInt))
+            skip[2] = true;
+        if (!TestBits(mask, AllUint))
+            skip[3] = true;
+        if (!TestBits(mask, AllShort))
+            skip[4] = true;
+        if (!TestBits(mask, AllUshort))
+            skip[5] = true;
+        if (!TestBits(mask, AllBool))
+            skip[6] = true;
         
-        // TODO: for 2D tex (int2 offset is optional, how to indicate that?)
-        TEXTURE_INTRINSIC_FUNCTION("GatherRed", HLSLBaseType_Texture2D,  HLSLBaseType_Float2),
-        TEXTURE_INTRINSIC_FUNCTION("GatherGreen", HLSLBaseType_Texture2D,  HLSLBaseType_Float2),
-        TEXTURE_INTRINSIC_FUNCTION("GatherBlue", HLSLBaseType_Texture2D,  HLSLBaseType_Float2),
-        TEXTURE_INTRINSIC_FUNCTION("GatherAlpha", HLSLBaseType_Texture2D,  HLSLBaseType_Float2),
         
-        // TODO: GetDimensions
-       
+        for (uint32_t i = 0; i < kNumTypes; ++i)
+        {
+            if (skip[i]) continue;
+            HLSLBaseType baseType = baseTypes[i];
+            
+            if (mask & AllVecs)
+                RegisterBaseTypeIntrinsic(intrinsic, numArgs, returnType, baseType, 0, 4);
+            
+            // TODO: No int matrices yet, but could add them
+            //if (mask & AllMats)
+            //    RegisterBaseTypeIntrinsic(intrinsic, numArgs, returnType, 4, 7);
+        }
+    }
+}
+
+#define ArrayCount(array) (sizeof(array) / sizeof(array[0]) )
+
+bool InitIntrinsics()
+{
+    // TODO: these arrays shouldn't need to be static, but getting corrupt strings
+    static const char* ops1[] = {
+        "abs",
+        "acos", "asin", "atan",
+        "cos", "sin", "tan",
+        "floor", "ceil", "frac", "fmod",
+        "normalize", "saturate", "sqrt", "rcp", "exp", "exp2",
+        "log", "Log2",
+        "ddx", "ddy",
+        "isNan", "isInf", "sign",
+    };
+    
+    static const char* ops2[] = {
+        "atan2", "pow", // can't pow take sclar?
+        "step", "reflect",
+        "min", "max",
+    };
+    
+    static const char* ops3[] = {
+        "clamp", "lerp", "smoothstep",
+        "min3", "max3",
     };
 
-const int _numIntrinsics = sizeof(_intrinsic) / sizeof(Intrinsic);
+    // MSL constructs, may be in HLSL
+    // "distance"
+    // "distance_squared"
+    // "refract"
+    // "deteriminant"
+    // "median3" - takes 3 args
+    // "select"
+    
+    for (uint32_t i = 0, iEnd = ArrayCount(ops1); i < iEnd; ++i)
+    {
+        RegisterIntrinsics( ops1[i], 1, AllFloats | AllVecs );
+    }
+    for (uint32_t i = 0, iEnd = ArrayCount(ops2); i < iEnd; ++i)
+    {
+        RegisterIntrinsics( ops2[i], 2, AllFloats | AllVecs );
+    }
+    for (uint32_t i = 0, iEnd = ArrayCount(ops3); i < iEnd; ++i)
+    {
+        RegisterIntrinsics( ops3[i], 3, AllFloats | AllVecs );
+    }
+    
+    RegisterIntrinsics("sincos", 2, AllFloats | AllVecs, HLSLBaseType_Void);
+
+    RegisterIntrinsics( "mad", 3, AllFloats | AllVecs);
+   
+    RegisterIntrinsics( "any", 1, AllFloats | AllInts | AllVecs, HLSLBaseType_Bool);
+    RegisterIntrinsics( "all", 1, AllFloats | AllInts | AllVecs, HLSLBaseType_Bool);
+    
+    RegisterIntrinsics( "clip", 1, AllFloats | AllVecs, HLSLBaseType_Void);
+    
+    RegisterIntrinsics( "dot", 2, AllHalf | AllVecs, HLSLBaseType_Half);
+    RegisterIntrinsics( "dot", 2, AllFloat | AllVecs, HLSLBaseType_Float);
+    RegisterIntrinsics( "dot", 2, AllDouble | AllVecs, HLSLBaseType_Double);
+  
+    // 3d cross product only
+    AddIntrinsic( "cross", HLSLBaseType_Float3,  HLSLBaseType_Float3,  HLSLBaseType_Float3 );
+    AddIntrinsic( "cross", HLSLBaseType_Half3,   HLSLBaseType_Half3,   HLSLBaseType_Half3 );
+    AddIntrinsic( "cross", HLSLBaseType_Double3, HLSLBaseType_Double3, HLSLBaseType_Double3 );
+
+    RegisterIntrinsics( "length", 1, AllHalf | AllVecs, HLSLBaseType_Half);
+    RegisterIntrinsics( "length", 1, AllFloat | AllVecs, HLSLBaseType_Float);
+    RegisterIntrinsics( "length", 1, AllDouble | AllVecs, HLSLBaseType_Double);
+  
+    // MSL construct
+    RegisterIntrinsics( "length_squared", 1, AllHalf | AllVecs, HLSLBaseType_Half);
+    RegisterIntrinsics( "length_squared", 1, AllFloat | AllVecs, HLSLBaseType_Float);
+    RegisterIntrinsics( "length_squared", 1, AllDouble | AllVecs, HLSLBaseType_Double);
+
+    // scalar/vec ops
+    RegisterIntrinsics( "mul", 2, AllFloat | AllVecs | AllMats );
+    
+    // scalar mul, since * isn't working on Metal properly
+    // m = s * m
+    AddIntrinsic( "mul", HLSLBaseType_Float2x2, HLSLBaseType_Float, HLSLBaseType_Float2x2 );
+    AddIntrinsic( "mul", HLSLBaseType_Float3x3, HLSLBaseType_Float, HLSLBaseType_Float3x3 );
+    AddIntrinsic( "mul", HLSLBaseType_Float4x4, HLSLBaseType_Float, HLSLBaseType_Float4x4 );
+    AddIntrinsic( "mul", HLSLBaseType_Float2x2, HLSLBaseType_Float2x2, HLSLBaseType_Float );
+    AddIntrinsic( "mul", HLSLBaseType_Float3x3, HLSLBaseType_Float3x3, HLSLBaseType_Float );
+    AddIntrinsic( "mul", HLSLBaseType_Float4x4, HLSLBaseType_Float4x4, HLSLBaseType_Float );
+    
+    // v = v * m
+    AddIntrinsic( "mul", HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2x2 );
+    AddIntrinsic( "mul", HLSLBaseType_Float3, HLSLBaseType_Float3, HLSLBaseType_Float3x3 );
+    AddIntrinsic( "mul", HLSLBaseType_Float4, HLSLBaseType_Float4, HLSLBaseType_Float4x4 );
+    AddIntrinsic( "mul", HLSLBaseType_Float2, HLSLBaseType_Float2x2, HLSLBaseType_Float2 );
+    AddIntrinsic( "mul", HLSLBaseType_Float3, HLSLBaseType_Float3x3, HLSLBaseType_Float3 );
+    AddIntrinsic( "mul", HLSLBaseType_Float4, HLSLBaseType_Float4x4, HLSLBaseType_Float4 );
+    
+    // m = s * m
+    AddIntrinsic( "mul", HLSLBaseType_Half2x2, HLSLBaseType_Half, HLSLBaseType_Half2x2 );
+    AddIntrinsic( "mul", HLSLBaseType_Half3x3, HLSLBaseType_Half, HLSLBaseType_Half3x3 );
+    AddIntrinsic( "mul", HLSLBaseType_Half4x4, HLSLBaseType_Half, HLSLBaseType_Half4x4 );
+    AddIntrinsic( "mul", HLSLBaseType_Half2x2, HLSLBaseType_Half2x2, HLSLBaseType_Half );
+    AddIntrinsic( "mul", HLSLBaseType_Half3x3, HLSLBaseType_Half3x3, HLSLBaseType_Half );
+    AddIntrinsic( "mul", HLSLBaseType_Half4x4, HLSLBaseType_Half4x4, HLSLBaseType_Half );
+    
+    // v = v * m
+    AddIntrinsic( "mul", HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Half2x2 );
+    AddIntrinsic( "mul", HLSLBaseType_Half3, HLSLBaseType_Half3, HLSLBaseType_Half3x3 );
+    AddIntrinsic( "mul", HLSLBaseType_Half4, HLSLBaseType_Half4, HLSLBaseType_Half4x4 );
+    AddIntrinsic( "mul", HLSLBaseType_Half2, HLSLBaseType_Half2x2, HLSLBaseType_Half2 );
+    AddIntrinsic( "mul", HLSLBaseType_Half3, HLSLBaseType_Half3x3, HLSLBaseType_Half3 );
+    AddIntrinsic( "mul", HLSLBaseType_Half4, HLSLBaseType_Half4x4, HLSLBaseType_Half4 );
+    
+    // m = s * m
+    AddIntrinsic( "mul", HLSLBaseType_Double2x2, HLSLBaseType_Double, HLSLBaseType_Double2x2 );
+    AddIntrinsic( "mul", HLSLBaseType_Double3x3, HLSLBaseType_Double, HLSLBaseType_Double3x3 );
+    AddIntrinsic( "mul", HLSLBaseType_Double4x4, HLSLBaseType_Double, HLSLBaseType_Double4x4 );
+    AddIntrinsic( "mul", HLSLBaseType_Double2x2, HLSLBaseType_Double2x2, HLSLBaseType_Double );
+    AddIntrinsic( "mul", HLSLBaseType_Double3x3, HLSLBaseType_Double3x3, HLSLBaseType_Double );
+    AddIntrinsic( "mul", HLSLBaseType_Double4x4, HLSLBaseType_Double4x4, HLSLBaseType_Double );
+    
+    // v = v * m
+    AddIntrinsic( "mul", HLSLBaseType_Double2, HLSLBaseType_Double2, HLSLBaseType_Double2x2 );
+    AddIntrinsic( "mul", HLSLBaseType_Double3, HLSLBaseType_Double3, HLSLBaseType_Double3x3 );
+    AddIntrinsic( "mul", HLSLBaseType_Double4, HLSLBaseType_Double4, HLSLBaseType_Double4x4 );
+    AddIntrinsic( "mul", HLSLBaseType_Double2, HLSLBaseType_Double2x2, HLSLBaseType_Double2 );
+    AddIntrinsic( "mul", HLSLBaseType_Double3, HLSLBaseType_Double3x3, HLSLBaseType_Double3 );
+    AddIntrinsic( "mul", HLSLBaseType_Double4, HLSLBaseType_Double4x4, HLSLBaseType_Double4 );
+    
+    // matrix transpose
+    RegisterIntrinsics("transpose", 2, AllFloats | AllMats);
+    
+    // TODO: more conversions fp16, double, etc.
+    AddIntrinsic("asuint", HLSLBaseType_Uint, HLSLBaseType_Float);
+    AddIntrinsic("asuint", HLSLBaseType_Uint, HLSLBaseType_Double);
+    AddIntrinsic("asuint", HLSLBaseType_Uint, HLSLBaseType_Half);
+
+    
+    // TODO: split off sampler intrinsics from math above
+    //------------------------
+    
+    TEXTURE_INTRINSIC_FUNCTION("Sample", HLSLBaseType_Texture2D,  HLSLBaseType_Float2);
+    TEXTURE_INTRINSIC_FUNCTION("Sample", HLSLBaseType_Texture3D, HLSLBaseType_Float3);
+    TEXTURE_INTRINSIC_FUNCTION("Sample", HLSLBaseType_Texture2DArray, HLSLBaseType_Float3);
+    TEXTURE_INTRINSIC_FUNCTION("Sample", HLSLBaseType_TextureCube, HLSLBaseType_Float3);
+    TEXTURE_INTRINSIC_FUNCTION("Sample", HLSLBaseType_TextureCubeArray, HLSLBaseType_Float4);
+    
+    // Depth
+    AddDepthIntrinsic("Sample", HLSLBaseType_Float, HLSLBaseType_Depth2D, HLSLBaseType_Float,  HLSLBaseType_Float2);
+    AddDepthIntrinsic("Sample", HLSLBaseType_Float, HLSLBaseType_Depth2DArray, HLSLBaseType_Float,  HLSLBaseType_Float3);
+    AddDepthIntrinsic("Sample", HLSLBaseType_Float, HLSLBaseType_DepthCube, HLSLBaseType_Float,  HLSLBaseType_Float3);
+    
+    TEXTURE_INTRINSIC_FUNCTION_H("Sample", HLSLBaseType_Texture2D,  HLSLBaseType_Float2);
+    TEXTURE_INTRINSIC_FUNCTION_H("Sample", HLSLBaseType_Texture3D, HLSLBaseType_Float3);
+    TEXTURE_INTRINSIC_FUNCTION_H("Sample", HLSLBaseType_Texture2DArray, HLSLBaseType_Float3);
+    TEXTURE_INTRINSIC_FUNCTION_H("Sample", HLSLBaseType_TextureCube, HLSLBaseType_Float3);
+    TEXTURE_INTRINSIC_FUNCTION_H("Sample", HLSLBaseType_TextureCubeArray, HLSLBaseType_Float4);
+    
+    
+    // xyz are used, this doesn't match HLSL which is 2 + compare
+    AddDepthIntrinsic("SampleCmp", HLSLBaseType_Float, HLSLBaseType_Depth2D, HLSLBaseType_Float, HLSLBaseType_Float4);
+    AddDepthIntrinsic("SampleCmp", HLSLBaseType_Float, HLSLBaseType_Depth2DArray, HLSLBaseType_Float, HLSLBaseType_Float4);
+    AddDepthIntrinsic("SampleCmp", HLSLBaseType_Float, HLSLBaseType_DepthCube, HLSLBaseType_Float, HLSLBaseType_Float4);
+    
+    // returns float4 w/comparisons, probably only on mip0
+    // TODO: add GatherRed? to read 4 depth values
+    AddDepthIntrinsic("GatherCmp", HLSLBaseType_Float4, HLSLBaseType_Depth2D, HLSLBaseType_Float, HLSLBaseType_Float4);
+    AddDepthIntrinsic("GatherCmp", HLSLBaseType_Float4, HLSLBaseType_Depth2DArray, HLSLBaseType_Float, HLSLBaseType_Float4);
+    AddDepthIntrinsic("GatherCmp", HLSLBaseType_Float4, HLSLBaseType_DepthCube, HLSLBaseType_Float, HLSLBaseType_Float4);
+    
+    // one more dimension than Sample
+    TEXTURE_INTRINSIC_FUNCTION("SampleLevel", HLSLBaseType_Texture2D, HLSLBaseType_Float3);
+    TEXTURE_INTRINSIC_FUNCTION("SampleLevel", HLSLBaseType_Texture3D, HLSLBaseType_Float4);
+    TEXTURE_INTRINSIC_FUNCTION("SampleLevel", HLSLBaseType_Texture2DArray, HLSLBaseType_Float4);
+    TEXTURE_INTRINSIC_FUNCTION("SampleLevel", HLSLBaseType_TextureCube, HLSLBaseType_Float4);
+    // TEXTURE_INTRINSIC_FUNCTION("SampleLevel", HLSLBaseType_TextureCubeArray, HLSLBaseType_Float4, Float);
+    
+    TEXTURE_INTRINSIC_FUNCTION_H("SampleLevel", HLSLBaseType_Texture2D, HLSLBaseType_Float3);
+    TEXTURE_INTRINSIC_FUNCTION_H("SampleLevel", HLSLBaseType_Texture3D, HLSLBaseType_Float4);
+    TEXTURE_INTRINSIC_FUNCTION_H("SampleLevel", HLSLBaseType_Texture2DArray, HLSLBaseType_Float4);
+    TEXTURE_INTRINSIC_FUNCTION_H("SampleLevel", HLSLBaseType_TextureCube, HLSLBaseType_Float4);
+   
+    
+    // bias always in w
+    TEXTURE_INTRINSIC_FUNCTION("SampleBias", HLSLBaseType_Texture2D, HLSLBaseType_Float4);
+    TEXTURE_INTRINSIC_FUNCTION("SampleBias", HLSLBaseType_Texture3D, HLSLBaseType_Float4);
+    TEXTURE_INTRINSIC_FUNCTION("SampleBias", HLSLBaseType_Texture2DArray, HLSLBaseType_Float4);
+    TEXTURE_INTRINSIC_FUNCTION("SampleBias", HLSLBaseType_TextureCube, HLSLBaseType_Float4);
+    // TEXTURE_INTRINSIC_FUNCTION("SampleBias", HLSLBaseType_TextureCubeArray, HLSLBaseType_Float4, Float);
+    
+    TEXTURE_INTRINSIC_FUNCTION_H("SampleBias", HLSLBaseType_Texture2D, HLSLBaseType_Float4);
+    TEXTURE_INTRINSIC_FUNCTION_H("SampleBias", HLSLBaseType_Texture3D, HLSLBaseType_Float4);
+    TEXTURE_INTRINSIC_FUNCTION_H("SampleBias", HLSLBaseType_Texture2DArray, HLSLBaseType_Float4);
+    TEXTURE_INTRINSIC_FUNCTION_H("SampleBias", HLSLBaseType_TextureCube, HLSLBaseType_Float4);
+    
+    
+    //TEXTURE_INTRINSIC_FUNCTION("SampleGrad", HLSLBaseType_Texture3D, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2);
+    //TEXTURE_INTRINSIC_FUNCTION("SampleGrad", HLSLBaseType_Texture3D, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2);
+    //TEXTURE_INTRINSIC_FUNCTION("SampleGrad", HLSLBaseType_Texture3D, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2);
+    
+    // TODO: for 2D tex (int2 offset is optional, how to indicate that?)
+    TEXTURE_INTRINSIC_FUNCTION("GatherRed", HLSLBaseType_Texture2D,  HLSLBaseType_Float2);
+    TEXTURE_INTRINSIC_FUNCTION("GatherGreen", HLSLBaseType_Texture2D,  HLSLBaseType_Float2);
+    TEXTURE_INTRINSIC_FUNCTION("GatherBlue", HLSLBaseType_Texture2D,  HLSLBaseType_Float2);
+    TEXTURE_INTRINSIC_FUNCTION("GatherAlpha", HLSLBaseType_Texture2D,  HLSLBaseType_Float2);
+    
+    TEXTURE_INTRINSIC_FUNCTION_H("GatherRed", HLSLBaseType_Texture2D,  HLSLBaseType_Float2);
+    TEXTURE_INTRINSIC_FUNCTION_H("GatherGreen", HLSLBaseType_Texture2D,  HLSLBaseType_Float2);
+    TEXTURE_INTRINSIC_FUNCTION_H("GatherBlue", HLSLBaseType_Texture2D,  HLSLBaseType_Float2);
+    TEXTURE_INTRINSIC_FUNCTION_H("GatherAlpha", HLSLBaseType_Texture2D,  HLSLBaseType_Float2);
+    
+    // TODO: GetDimensions
+    return true;
+};
+
+static bool initIntrinsics = InitIntrinsics();
 
 // The order in this array must match up with HLSLBinaryOp
 const int _binaryOpPriority[] =
@@ -983,8 +1139,6 @@ const int _binaryOpPriority[] =
         5, 3, 4, // &, |, ^
     };
 
-// Note: these strings need to live until end of the app
-StringPool gPool(NULL);
 
 
 BaseTypeDescription baseTypeDescriptions[HLSLBaseType_Count];
@@ -993,7 +1147,7 @@ void RegisterMatrix(HLSLBaseType type, uint32_t typeOffset, NumericType numericT
 {
     char buf[32];
     snprintf(buf, sizeof(buf), "%s%dx%d", typeName, dim1, dim2);
-    const char* name = gPool.AddString(buf);
+    const char* name = gStringPool.AddString(buf);
     
     HLSLBaseType baseType = (HLSLBaseType)(type + typeOffset);
     
@@ -1016,7 +1170,7 @@ void RegisterVector(HLSLBaseType type, uint32_t typeOffset, NumericType numericT
 {
     char buf[32];
     snprintf(buf, sizeof(buf), "%s%d", typeName, dim);
-    const char* name = gPool.AddString(buf);
+    const char* name = gStringPool.AddString(buf);
     
     HLSLBaseType baseType = (HLSLBaseType)(type + typeOffset);
     
@@ -1038,7 +1192,7 @@ void RegisterVector(HLSLBaseType type, uint32_t typeOffset, NumericType numericT
 
 void RegisterScalar(HLSLBaseType type, uint32_t typeOffset, NumericType numericType,  int binaryOpRank, const char* typeName)
 {
-    const char* name = gPool.AddString(typeName);
+    const char* name = gStringPool.AddString(typeName);
     
     HLSLBaseType baseType = (HLSLBaseType)(type + typeOffset);
     
@@ -1089,7 +1243,7 @@ void RegisterType(HLSLBaseType baseType, CoreType coreType, const char* typeName
 }
 
 
-bool initBaseTypeDescriptions()
+bool InitBaseTypeDescriptions()
 {
     {
         const uint32_t kNumTypes = 3;
@@ -1190,7 +1344,7 @@ bool initBaseTypeDescriptions()
     return true;
 }
 
-static bool _initBaseTypeDescriptions = initBaseTypeDescriptions();
+static bool _initBaseTypeDescriptions = InitBaseTypeDescriptions();
 
 
 HLSLBaseType ArithmeticOpResultType(HLSLBinaryOp binaryOp, HLSLBaseType t1, HLSLBaseType t2)
@@ -1768,10 +1922,10 @@ bool HLSLParser::ParseTopLevel(HLSLStatement*& statement)
         
         // Buffer needs to show up to reference the fields
         // of the struct of the templated type.
-        HLSLType type(HLSLBaseType_UserDefined);
-        type.typeName = buffer->bufferStruct->name; // this is for userDefined name (f.e. struct)
+        HLSLType bufferType(HLSLBaseType_UserDefined);
+        bufferType.typeName = buffer->bufferStruct->name; // this is for userDefined name (f.e. struct)
         
-        DeclareVariable( buffer->name, type );
+        DeclareVariable( buffer->name, bufferType );
        
         // TODO: add fields as variables too?
         
@@ -2345,20 +2499,20 @@ bool HLSLParser::ParseDeclaration(HLSLDeclaration*& declaration)
             }
         }
 
-        HLSLDeclaration * declaration = m_tree->AddNode<HLSLDeclaration>(fileName, line);
-        declaration->type  = type;
-        declaration->name  = name;
+        HLSLDeclaration * parsedDeclaration = m_tree->AddNode<HLSLDeclaration>(fileName, line);
+        parsedDeclaration->type  = type;
+        parsedDeclaration->name  = name;
 
-        DeclareVariable( declaration->name, declaration->type );
+        DeclareVariable( parsedDeclaration->name, parsedDeclaration->type );
 
         // Handle option assignment of the declared variables(s).
-        if (!ParseDeclarationAssignment( declaration )) {
+        if (!ParseDeclarationAssignment( parsedDeclaration )) {
             return false;
         }
 
-        if (firstDeclaration == NULL) firstDeclaration = declaration;
-        if (lastDeclaration != NULL) lastDeclaration->nextDeclaration = declaration;
-        lastDeclaration = declaration;
+        if (firstDeclaration == NULL) firstDeclaration = parsedDeclaration;
+        if (lastDeclaration != NULL) lastDeclaration->nextDeclaration = parsedDeclaration;
+        lastDeclaration = parsedDeclaration;
 
     } while(Accept(','));
 
@@ -2887,11 +3041,11 @@ bool HLSLParser::ParseTerminalExpression(HLSLExpression*& expression, bool& need
         done = true;
 
         // Post fix unary operator
-        HLSLUnaryOp unaryOp;
-        while (AcceptUnaryOperator(false, unaryOp))
+        HLSLUnaryOp unaryOp2;
+        while (AcceptUnaryOperator(false, unaryOp2))
         {
             HLSLUnaryExpression* unaryExpression = m_tree->AddNode<HLSLUnaryExpression>(fileName, line);
-            unaryExpression->unaryOp = unaryOp;
+            unaryExpression->unaryOp = unaryOp2;
             unaryExpression->expression = expression;
             unaryExpression->expressionType = unaryExpression->expression->expressionType;
             expression = unaryExpression;
@@ -4030,7 +4184,7 @@ bool HLSLParser::AcceptType(bool allowVoid, HLSLType& type/*, bool acceptFlags*/
             // Parse optional sampler type.
             if (Accept('<'))
             {
-                int token = m_tokenizer.GetToken();
+                token = m_tokenizer.GetToken();
                 
                 // TODO: need more format types
                 // TODO: double, and other types
@@ -4264,6 +4418,7 @@ void HLSLParser::DeclareVariable(const char* name, const HLSLType& type)
 
 bool HLSLParser::GetIsFunction(const char* name) const
 {
+    // check user defined functions
     for (int i = 0; i < m_functions.GetSize(); ++i)
     {
         // == is ok here because we're passed the strings through the string pool.
@@ -4272,17 +4427,10 @@ bool HLSLParser::GetIsFunction(const char* name) const
             return true;
         }
     }
-    for (int i = 0; i < _numIntrinsics; ++i)
-    {
-        // Intrinsic names are not in the string pool (since they are compile time
-        // constants, so we need full string compare).
-        if (String_Equal(name, _intrinsic[i].function.name))
-        {
-            return true;
-        }
-    }
-
-    return false;
+    
+    // see if it's an intrinsic
+    const auto& it = _intrinsicRangeMap.find(name);
+    return it != _intrinsicRangeMap.end();
 }
 
 const HLSLFunction* HLSLParser::MatchFunctionCall(const HLSLFunctionCall* functionCall, const char* name)
@@ -4315,14 +4463,17 @@ const HLSLFunction* HLSLParser::MatchFunctionCall(const HLSLFunctionCall* functi
     }
 
     // Get the intrinsic functions with the specified name.
-    for (int i = 0; i < _numIntrinsics; ++i)
+    const auto& iter = _intrinsicRangeMap.find(name);
+    if (iter != _intrinsicRangeMap.end())
     {
-        const HLSLFunction* function = &_intrinsic[i].function;
-        
-        // TODO: this is doing another O(n) search of intrinsic names
-        // and there are a lot of them with all the input/output variants
-        if (String_Equal(function->name, name))
+        Range range = iter->second;
+        for (int i = 0; i < range.count; ++i)
         {
+            uint32_t idx = range.start + i;
+            const HLSLFunction* function = &_intrinsics[idx].function;
+            
+            ASSERT(String_Equal(function->name, name));
+                   
             nameMatches = true;
             
             CompareFunctionsResult result = CompareFunctions( m_tree, functionCall, function, matchedFunction );
@@ -4337,7 +4488,7 @@ const HLSLFunction* HLSLParser::MatchFunctionCall(const HLSLFunctionCall* functi
             }
         }
     }
-
+    
     if (matchedFunction != NULL && numMatchedOverloads > 1)
     {
         // Multiple overloads match.
diff --git a/hlslparser/src/MSLGenerator.cpp b/hlslparser/src/MSLGenerator.cpp
index cafed104..d76ece31 100644
--- a/hlslparser/src/MSLGenerator.cpp
+++ b/hlslparser/src/MSLGenerator.cpp
@@ -1146,16 +1146,17 @@ namespace M4
                 m_writer.Write("return { ");
 
                 int numArguments = 0;
-                HLSLArgument * argument = m_currentFunction->argument;
-                while (argument != NULL)
+                HLSLArgument * argument2 = m_currentFunction->argument;
+                while (argument2 != NULL)
                 {
-                    if (argument->modifier == HLSLArgumentModifier_Out || argument->modifier == HLSLArgumentModifier_Inout)
+                    if (argument2->modifier == HLSLArgumentModifier_Out ||
+                        argument2->modifier == HLSLArgumentModifier_Inout)
                     {
                         if (numArguments) m_writer.Write(", ");
-                        m_writer.Write("%s ", argument->name);
+                        m_writer.Write("%s ", argument2->name);
                         numArguments++;
                     }
-                    argument = argument->nextArgument;
+                    argument2 = argument2->nextArgument;
                 }
 
                 m_writer.EndLine(" };");

From 701c7c31a443dae1d46d900e5601483a2c8a8c49 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 17 Mar 2023 17:44:07 -0700
Subject: [PATCH 484/901] kram - hlslparser update

---
 hlslparser/README.md               | 14 +++++-----
 hlslparser/buildShaders.sh         | 45 +++++++++++++++++++++---------
 hlslparser/outshaders/ShaderHLSL.h |  4 +--
 3 files changed, 41 insertions(+), 22 deletions(-)

diff --git a/hlslparser/README.md b/hlslparser/README.md
index 7c9b4a58..451a976e 100644
--- a/hlslparser/README.md
+++ b/hlslparser/README.md
@@ -3,9 +3,9 @@ HLSLParser
 
 This version of thekla/hlslparser takes a HLSL2021 syntax that then converts that into modern HLSL and MSL.  Special thanks to Max McGuire (@Unknown Worlds) and Ignacio Castano and Johnathan Blow (@Thekla) for releasing this as open-source.  I've left out GLSL and DX9 and FX legacy codegen to simplify maintaining the codebase.  This is a small amount of code compared with the Krhonos shader tools.
 
-There are still growing pains to using hlslparser.  It can't do all the manipulation that glsc and spriv-cross does to ensure that a valid shader model shader is created.  But compiling with DXC/metal should help avoid issues.  I still don't know how to resolve reflection, since each compiler generates it's own unique data formats.  Reflection is simpler on the spirv path.
+There are still growing pains to using hlslparser.  It can't do all the manipulation that glsc and spirv-cross does to ensure that a valid shader model shader is created.  But compiling with DXC/metal should help avoid issues.  I still don't know how to resolve reflection, since each compiler generates it's own unique data formats.  Reflection is simpler on the spirv path.
 
-The point of this hlslparser is to preserve comments, generate MSL/HLSL code close to the original sources, and be easy to extend.  MSL and HLSL are nearly the same shader language at the core.  Typical spriv to MSL transpiles look assembly-like in code flow.  spriv-opt and spriv-cross introduces 100's of temp registers into the code, gens 24 character floats, strips comments, can't translate half samplers, and the resulting code isn't simple to step through in Metal GPU capture.  At the same time, Apple ignores generating Spirv from MSL, so here we are.  Spriv should remain a final assembly format to feed to Vulkan drivers.
+The point of this hlslparser is to preserve comments, generate MSL/HLSL code close to the original sources, and be easy to extend.  MSL and HLSL are nearly the same shader language at the core.  Typical spirv to MSL transpiles look assembly-like in code flow.  spirv-opt and spirv-cross introduces 100's of temp registers into the code, gens 24 character floats, strips comments, can't translate half samplers, and the resulting code isn't simple to step through in Metal GPU capture.  At the same time, Apple ignores generating Spirv from MSL, so here we are.  Spirv should remain a final assembly format to feed to Vulkan drivers.
 
 ---------------------------------
 
@@ -90,7 +90,7 @@ Overview
 |glslc | Google's wrapper to glslang, preprocessor, reflection, see below |
 |glslang | GLSL and HLSL compiler, but doesn't compile valid HLSL half code |
 |spirv-opt | spv optimizer |
-|spriv-cross | transpile spv to MSL, HLSL, and GLSL, but codegen has 100's of temp vars, no comments, can target specific MSL/HLSL models |
+|spirv-cross | transpile spv to MSL, HLSL, and GLSL, but codegen has 100's of temp vars, no comments, can target specific MSL/HLSL models |
 |spirv-reflect | gens reflection data from spv file |
 
 Dealing with Half
@@ -126,7 +126,7 @@ Terms
 ---
 
 * Shader Variants - it's good to define which variants of shaders to generate.  Can use static and dynamic branching to reduce variant count.  Can lead to requiring shader source if can't predefine variant count.
-* Specialization Constants - allow variants to be generated within a single shader.  Spriv is marked and compiled based on these settings.  Metal has equivalent function constants
+* Specialization Constants - allow variants to be generated within a single shader.  Spirv is marked and compiled based on these settings.  Metal has equivalent function constants
 * Tile shaders - kernels/fragment shaders that run at the tile level.  Subpasses in Vulkan.  tilegroup memory to and tile data passed from stage to stage without writing back to targets.
 
 
@@ -349,7 +349,7 @@ GLSL/ES
 * 3.0 on iOS, now emulated by Metal,
 * Khronos support ends at 3.1, moved to Vulkan/spirv
 * precision modifiers in ES for lowp (no support), mediump (might be fp16, fp24, fp32), highp (fp24 or fp32)
-* replaced with spriv
+* replaced with spirv
 * defaults needelessly removed from uniforms
 * dot or .0 required on all floating point numbers or shader fails to compile, int vs. float
 * line directives needlessly changed from GLSL
@@ -373,12 +373,12 @@ GLSL/ES (WebGL)
 * https://www.khronos.org/files/webgl20-reference-guide.pdf
 
 WGSL (WebGPU)
-* WebGPU shading language originally meant as text form of spriv
+* WebGPU shading language originally meant as text form of spirv
 * full compute support
 * now using Dart like syntax completely unlike CG origin of other languages
 * avoids pointers/references
 * can transpile spirv to WGSL via tint, WGSL still not in spirv-cross
-* converts WGSL back to spriv.
+* converts WGSL back to spirv.
 * supposedly Apple didn't want to require spirv.
 * similar in api syntax to Metal/Vulkan/DX12
 * https://www.w3.org/TR/webgpu/
diff --git a/hlslparser/buildShaders.sh b/hlslparser/buildShaders.sh
index 442b7ffc..fca230e2 100755
--- a/hlslparser/buildShaders.sh
+++ b/hlslparser/buildShaders.sh
@@ -5,6 +5,7 @@ mkdir -p out
 mkdir -p out/mac
 mkdir -p out/win
 mkdir -p out/android
+mkdir -p out/ios
 
 # mkdir -p out/ios
 
@@ -34,10 +35,13 @@ appHlslparser=../build/hlslparser/Build/Products/Release/hlslparser
 
 appDxc=${vulkanSDK}
 appGlslc=${vulkanSDK}
-appSprivReflect=${vulkanSDK}
+appSpirvReflect=${vulkanSDK}
+appSpirvCross=${vulkanSDK}
+
 appDxc+="dxc"
 appGlslc+="glslc"
-appSprivReflect+="spirv-reflect"
+appSpirvReflect+="spirv-reflect"
+appSpirvCross+="spirv-cross"
 
 # Xcode will only do clickthrough to warnings/errors if the filename
 # is a full path.  That's super annoying.
@@ -114,8 +118,9 @@ fi
 
 args="-nologo "
 
+# if this is left out the transpiled MSL has no var names
 # debug
-# args+="-Zi "
+args+="-Zi "
 
 # column matrices
 args+="-Zpc "
@@ -164,20 +169,34 @@ csargs+="-T cs_6_6 "
 # Optimization is also delegated to SPIRV-Tools.
 # Right now there are no difference between optimization levels greater than zero;
 # they will all invoke the same optimization recipe. That is, the recipe behind spirv-opt -O.
-# -Os is a special set of options.  Can run custom spriv optimizations via
+# -Os is a special set of options.  Can run custom spirv optimizations via
 # -Oconfig=--loop-unroll,--scalar-replacement=300,--eliminate-dead-code-aggressive
 
 # 1.0,1.1,1.2 default to spv1.1,1.3,1.5
 echo gen SPIRV 1.2 with dxc
 ${appDxc} ${vsargs} -spirv -fspv-target-env=vulkan1.2 -E SkinningVS -Fo android/Skinning.vert.spv -Fc android/Skinning.vert.spv.txt ${dstDir}Skinning.hlsl
 ${appDxc} ${psargs} -spirv -fspv-target-env=vulkan1.2 -E SkinningPS -Fo android/Skinning.frag.spv -Fc android/Skinning.frag.spv.txt ${dstDir}Skinning.hlsl
-${appDxc} ${csargs} -spirv -fspv-target-env=vulkan1.2 -E ComputeCS -Fo android/Compute.comp.spv -Fc android/Compute.frag.spv.txt ${dstDir}Compute.hlsl
 
-# -Fre not supported with spriv, so just use spirv-reflect
+${appDxc} ${vsargs} -spirv -fspv-target-env=vulkan1.2 -E SampleVS -Fo android/Sample.vert.spv -Fc android/Sample.vert.spv.txt ${dstDir}Sample.hlsl
+${appDxc} ${psargs} -spirv -fspv-target-env=vulkan1.2 -E SamplePS -Fo android/Sample.frag.spv -Fc android/Sample.frag.spv.txt ${dstDir}Sample.hlsl
+
+${appDxc} ${csargs} -spirv -fspv-target-env=vulkan1.2 -E ComputeCS -Fo android/Compute.comp.spv -Fc android/Compute.comp.spv.txt ${dstDir}Compute.hlsl
+
+# -Fre not supported with spirv, so just use spirv-reflect
 # either yaml or random format, why can't this just output json?
-${appSprivReflect} -y android/Skinning.vert.spv > android/Skinning.vert.refl
-${appSprivReflect} -y android/Skinning.frag.spv > android/Skinning.frag.refl
-${appSprivReflect} -y android/Compute.comp.spv > android/Compute.comp.refl
+${appSpirvReflect} -y android/Skinning.vert.spv > android/Skinning.vert.refl
+${appSpirvReflect} -y android/Skinning.frag.spv > android/Skinning.frag.refl
+${appSpirvReflect} -y android/Sample.vert.spv > android/Sample.vert.refl
+${appSpirvReflect} -y android/Sample.frag.spv > android/Sample.frag.refl
+${appSpirvReflect} -y android/Compute.comp.spv > android/Compute.comp.refl
+
+# transpile spirv to ios MSL for comparsion to what hlslparser MSL produces
+#  would never use this, would use hlslparser path directly
+${appSpirvCross} --msl --msl-version 20300 --msl-ios android/Skinning.vert.spv --output ios/Skinning.vert.metal
+${appSpirvCross} --msl --msl-version 20300 --msl-ios android/Skinning.frag.spv --output ios/Skinning.frag.metal
+${appSpirvCross} --msl --msl-version 20300 --msl-ios android/Sample.vert.spv --output ios/Sample.vert.metal
+${appSpirvCross} --msl --msl-version 20300 --msl-ios android/Sample.frag.spv --output ios/Sample.frag.metal
+${appSpirvCross} --msl --msl-version 20300 --msl-ios android/Compute.comp.spv --output ios/Compute.comp.metal
 
 # skip this path, have to mod hlsl just to get valid code to compile with glslc
 testGlslc=0
@@ -203,11 +222,11 @@ if [[ $testGlslc -eq 1 ]]; then
     # -I include search path
     # note: glsl has a preprocesor
     
-    echo gen SPRIV 1.2 with glslc
+    echo gen SPIRV 1.2 with glslc
     ${appGlslc} ${vsargs} -fentry-point=SkinningVS -o android2/Skinning.vert.spv Skinning.hlsl
     ${appGlslc} ${psargs} -fentry-point=SkinningPS -o android2/Skinning.frag.spv Skinning.hlsl
 
-    # TODO: need to enable half (float16_t) usage in spriv generated shaders
+    # TODO: need to enable half (float16_t) usage in spirv generated shaders
     # how to identify compliation is targeting Vulkan?
 
     # barely human readable spv assembly listing
@@ -218,7 +237,7 @@ fi
 # TODO: need to group files into library/module
 # also create a readable spv file, so can look through that
 
-# TODO: create reflect data w/o needing spriv
+# TODO: create reflect data w/o needing spirv
 
 # here are flags to use w/DXC
 
@@ -251,7 +270,7 @@ fi
 # -WX                     Treat warnings as errors
 # -Zi                     Enable debug information
   
-# TODO: transpile with spriv-cross to WGSL, GLSL, etc off the spirv.
+# TODO: transpile with spirv-cross to WGSL, GLSL, etc off the spirv.
 
 # -enable-16bit-types     Enable 16bit types and disable min precision types. Available in HLSL 2018 and shader model 6.2
 # -Fc <file>              Output assembly code listing file
diff --git a/hlslparser/outshaders/ShaderHLSL.h b/hlslparser/outshaders/ShaderHLSL.h
index 97a1e402..4f7bec23 100644
--- a/hlslparser/outshaders/ShaderHLSL.h
+++ b/hlslparser/outshaders/ShaderHLSL.h
@@ -3,7 +3,7 @@
 
 // glslc doesn't support but DXC does
 // so had to add header guard
-#ifdef __spriv__
+#ifdef __spirv__
 #pragma once
 #endif
 
@@ -11,7 +11,7 @@
 // This means operators cannot overload [+-*/>><<]=.  Only builtins work.
 
 // HLSL2021 adds bitfields, so could define a color.
-// They say they are on bw compatible with DX12 releases, but spriv backed should warn.
+// They say they are on bw compatible with DX12 releases, but spirv backed should warn.
 //struct ColorRGBA {
 //  uint R : 8;
 //  uint G : 8;

From 2ca3348f4feea94e0edc989c9265b7dece0f6487 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 17 Mar 2023 18:40:32 -0700
Subject: [PATCH 485/901] kram - hlslparser update

spriv transpile example
more cleanup of HLSL cast gen.
---
 hlslparser/shaders/Sample.hlsl   |  4 ++--
 hlslparser/src/HLSLGenerator.cpp | 13 ++++++++++---
 hlslparser/src/HLSLGenerator.h   |  2 +-
 hlslparser/src/MSLGenerator.cpp  |  2 ++
 hlslparser/src/Main.cpp          |  2 +-
 5 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/hlslparser/shaders/Sample.hlsl b/hlslparser/shaders/Sample.hlsl
index a3356162..ea6ba57f 100644
--- a/hlslparser/shaders/Sample.hlsl
+++ b/hlslparser/shaders/Sample.hlsl
@@ -170,8 +170,8 @@ OutputVS SampleVS(InputVS input)
     // need transformed to world space too?
     // this only works if only uniform scale and invT on normal
     //input.normal.z *= -1.0; // why negated?
-    output.normal = mul(float4(input.normal, 0.0), scene.model);
-    output.tangent = mul(float4(input.tangent, 0.0), scene.model);
+    output.normal = mul(float4(input.normal, 0.0), scene.model).xyz;
+    output.tangent = mul(float4(input.tangent, 0.0), scene.model).xyz;
 
     return output;
 }
diff --git a/hlslparser/src/HLSLGenerator.cpp b/hlslparser/src/HLSLGenerator.cpp
index b9752ca3..79075bc6 100644
--- a/hlslparser/src/HLSLGenerator.cpp
+++ b/hlslparser/src/HLSLGenerator.cpp
@@ -307,7 +307,8 @@ void HLSLGenerator::OutputExpression(HLSLExpression* expression)
     {
         HLSLCastingExpression* castingExpression = static_cast<HLSLCastingExpression*>(expression);
         m_writer.Write("(");
-        OutputDeclaration(castingExpression->type, "");
+        // OutputDeclaration(castingExpression->type, "");
+        OutputDeclarationType(castingExpression->type, true/*isTypeCast*/);
         m_writer.Write(")(");
         OutputExpression(castingExpression->expression);
         m_writer.Write(")");
@@ -824,7 +825,7 @@ const char* HLSLGenerator::GetFormatName(HLSLBaseType bufferOrTextureType, HLSLB
     // TODO: have a way to disable use of half (like on MSLGenerator)
     bool isHalf = IsHalf(formatType);
     
-    // Can't use half4 textures with spriv
+    // Can't use half4 textures with spirv
     // Can tell Vulkan was written by/for desktop IHVs.
     // https://github.com/microsoft/DirectXShaderCompiler/issues/2711
     bool isSpirvTarget = true; // TODO: tie to CLI option
@@ -914,10 +915,16 @@ void HLSLGenerator::OutputDeclaration(HLSLDeclaration* declaration)
     };
 }
 
-void HLSLGenerator::OutputDeclarationType(const HLSLType& type)
+void HLSLGenerator::OutputDeclarationType(const HLSLType& type, bool isTypeCast)
 {
     const char* typeName = GetTypeName(type);
 
+    if (isTypeCast)
+    {
+        m_writer.Write("%s", typeName);
+        return;
+    }
+    
     if (type.flags & HLSLTypeFlag_Const)
     {
         m_writer.Write("const ");
diff --git a/hlslparser/src/HLSLGenerator.h b/hlslparser/src/HLSLGenerator.h
index f048e9c8..81c1c141 100644
--- a/hlslparser/src/HLSLGenerator.h
+++ b/hlslparser/src/HLSLGenerator.h
@@ -41,7 +41,7 @@ class HLSLGenerator
     void OutputStatements(int indent, HLSLStatement* statement);
     void OutputDeclaration(HLSLDeclaration* declaration);
     void OutputDeclaration(const HLSLType& type, const char* name, const char* semantic = NULL, const char* registerName = NULL, HLSLExpression* defaultValue = NULL);
-    void OutputDeclarationType(const HLSLType& type);
+    void OutputDeclarationType(const HLSLType& type, bool isTypeCast = false);
     void OutputDeclarationBody(const HLSLType& type, const char* name, const char* semantic =NULL, const char* registerName = NULL, HLSLExpression * assignment = NULL);
 
     /** Generates a name of the format "base+n" where n is an integer such that the name
diff --git a/hlslparser/src/MSLGenerator.cpp b/hlslparser/src/MSLGenerator.cpp
index d76ece31..a52865e4 100644
--- a/hlslparser/src/MSLGenerator.cpp
+++ b/hlslparser/src/MSLGenerator.cpp
@@ -1515,6 +1515,8 @@ namespace M4
     {
         if (type.baseType == HLSLBaseType_Float3x3)
         {
+            // TODO: pull name from table, why is this special case?
+            // also why is this not in parens?
             m_writer.Write("float3x3");
         }
         else
diff --git a/hlslparser/src/Main.cpp b/hlslparser/src/Main.cpp
index 07684096..37f977ad 100644
--- a/hlslparser/src/Main.cpp
+++ b/hlslparser/src/Main.cpp
@@ -90,7 +90,7 @@ int main( int argc, char* argv[] )
 	const char* entryName = NULL;
 
 	// TODO: could we take modern DX12 HLSL and translate to MSL only
-	// That would simplify all this.  What spriv-cross already does though.
+	// That would simplify all this.  What spirv-cross already does though.
 	// Could drop HLSLGenerator then, and just use this to gen MSL.
 	// Much of the glue code can just be in a header, but having it
 	// in parser, lets this only splice code that is needed.

From 5cd7a6b6b27ef365d62d1d0d4a5ec3444a671d01 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 17 Mar 2023 19:09:43 -0700
Subject: [PATCH 486/901] kram - hlslparser update readme

---
 hlslparser/README.md | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/hlslparser/README.md b/hlslparser/README.md
index 451a976e..f93ec2ef 100644
--- a/hlslparser/README.md
+++ b/hlslparser/README.md
@@ -36,18 +36,19 @@ DONE
 * handle depth textures 
 * compile HLSL with DXC to SPV
 * compile MSL with metalc to AIR/metallib
-
-TODO:
 * u/int64_tN support
 * double support - not in MSL, can't interpolate vs/ps must pack to uint
-# RWTexture
+* RWTexture (needs ops)
+* Vulkan push constants in HLSL
+
+TODO:
 * more than half/float/int literals (f.e. uint)
-* ray-tracing kernels
 * passing variables only by value in HLSL vs. value/ref/ptr in MSL
 * argument buffers and descriptor sets (root tables for DX?)
 * halfio/2/3/4 type for Nvidia/Adreno, halfst2/3/4 for storage
 * specialization and push constants for variants (MSL/SPV only)
 * numgroups designator for DX kernel
+* ray-tracing kernels
 * tile shader kernels - may be MSL and Android SPV specific
 * triangulation shaders (geom, mesh, hull/etc) - platform specific
 * generate reflection data from parse of HLSL
@@ -57,8 +58,9 @@ TODO:
 * fix shaders to not structify metal and mod the source names, turn on written, currently handling globals.  Could require passing elements from main shader.
 * poor syntax highlighting of output .metal file, does Xcode have to compile?
 * no syntax highlighting of .hlsl files in Xcode, but VSCode has HLSL but not MSL
-
-* May want to swtich to VSCode for shader development
+*
+* May want to switch to VSCode for shader development
+* Also Windows VS2022 has HLSL add-on from Tim Jones
 * https://marketplace.visualstudio.com/items?itemName=doublebuffer.metal-shader&utm_source=VSCode.pro&utm_campaign=AhmadAwais
 
 ---------------------------------

From 970fda3218c3d3dba81ab702f3a6884dc78b0fba Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 17 Mar 2023 22:20:37 -0700
Subject: [PATCH 487/901] kram - hlslparser update

Optimize string usage
Fix MSL indenting
---
 hlslparser/README.md             |  9 ++--
 hlslparser/src/CodeWriter.cpp    | 92 ++++++++++++++++++++------------
 hlslparser/src/Engine.cpp        | 48 +++++++++++++----
 hlslparser/src/Engine.h          |  2 +
 hlslparser/src/HLSLGenerator.cpp |  9 ++--
 hlslparser/src/HLSLTokenizer.cpp |  7 +--
 hlslparser/src/MSLGenerator.cpp  | 80 +++++++++++++++------------
 7 files changed, 156 insertions(+), 91 deletions(-)

diff --git a/hlslparser/README.md b/hlslparser/README.md
index f93ec2ef..e98671d5 100644
--- a/hlslparser/README.md
+++ b/hlslparser/README.md
@@ -11,13 +11,13 @@ The point of this hlslparser is to preserve comments, generate MSL/HLSL code clo
 
 Paths to turn HLSL and SPV 
 
-* HLSL2021 > hlslparser > HLSL2021 > DXC > spv  
-* HLSL2021 > hlslparser > MSL    > metal > air/metallib
+* HLSL2021 > hlslparser > HLSL2021 > dxc > SPV  
+* HLSL2021 > hlslparser > MSL    > metal > AIR(metallib)
 *
 * Reflection: spv > spv-reflect -> refl
 *
-* Transpiling MSL: HLSL2021 > DXC   > spv > spirv-cross > MSL
-* Transpiling MSL: HLSL2021 > glslc > spv > spirv-cross > MSL (fails on simple HLSL)
+* Transpiling MSL: HLSL2021 > dxc   > SPV > spirv-cross > MSL
+* Transpiling MSL: HLSL2021 > glslc > SPV > spirv-cross > MSL (fails on simple HLSL)
 *
 * Variant generation 
 * HLSL2021 + defines > preprocess > HLSL2021
@@ -42,6 +42,7 @@ DONE
 * Vulkan push constants in HLSL
 
 TODO:
+* atomics
 * more than half/float/int literals (f.e. uint)
 * passing variables only by value in HLSL vs. value/ref/ptr in MSL
 * argument buffers and descriptor sets (root tables for DX?)
diff --git a/hlslparser/src/CodeWriter.cpp b/hlslparser/src/CodeWriter.cpp
index ce2fcc49..335074bf 100644
--- a/hlslparser/src/CodeWriter.cpp
+++ b/hlslparser/src/CodeWriter.cpp
@@ -97,54 +97,76 @@ int CodeWriter::EndLine(const char* text)
 
 void CodeWriter::Write(const char* format, ...)
 {
-    va_list args;
-    va_start(args, format);
-
-    char buffer[_maxLineLength];
-    String_PrintfArgList(buffer, sizeof(buffer), format, args);
-
-    m_buffer += buffer;
-
-    va_end(args);      
+    // TODO: String_Equal(format, "\%s")
+    if (!String_HasChar(format, '%'))
+    {
+        m_buffer += format;
+    }
+    else
+    {
+        char buffer[_maxLineLength];
+        
+        va_list args;
+        va_start(args, format);
+        int result = String_PrintfArgList(buffer, sizeof(buffer), format, args);
+        ASSERT(result != -1);
+        va_end(args);
+        
+        m_buffer += buffer;
+    }
 }
 
 void CodeWriter::WriteLine(int indent, const char* format, ...)
 {
-    va_list args;
-    va_start(args, format);
-
-    char buffer[_maxLineLength];
-
-    int result = String_PrintfArgList(buffer, sizeof(buffer), format, args);
-    ASSERT(result != -1);
-
-    for (int i = 0; i < indent * m_spacesPerIndent; ++i)
+    if (indent)
+        Write("%*s", indent * m_spacesPerIndent, "");
+    
+    // avoid unnecessary work
+    // TODO: String_Equal(format, "\%s")
+    if (!String_HasChar(format, '%'))
     {
-        m_buffer += " ";
+        m_buffer += format;
     }
-    m_buffer += buffer;
-
+    else
+    {
+        char buffer[_maxLineLength];
+        
+        va_list args;
+        va_start(args, format);
+        int result = String_PrintfArgList(buffer, sizeof(buffer), format, args);
+        ASSERT(result != -1);
+        va_end(args);
+        
+        m_buffer += buffer;
+    }
+    
     EndLine();
-
-    va_end(args);        
 }
 
 void CodeWriter::WriteLineTagged(int indent, const char* fileName, int lineNumber, const char* format, ...)
 {
-    va_list args;
-    va_start(args, format);
-
+    // TODO: this should make sure that line isn't already Begu
     BeginLine(indent, fileName, lineNumber);
-
-    char buffer[_maxLineLength];
-    int result = String_PrintfArgList(buffer, sizeof(buffer), format, args);
-    ASSERT(result != -1);
-
-    m_buffer += buffer;
-
+    
+    // TODO: String_Equal(format, "\%s")
+    if (!String_HasChar(format, '%'))
+    {
+        m_buffer += format;
+    }
+    else
+    {
+        char buffer[_maxLineLength];
+        
+        va_list args;
+        va_start(args, format);
+        int result = String_PrintfArgList(buffer, sizeof(buffer), format, args);
+        ASSERT(result != -1);
+        va_end(args);
+        
+        m_buffer += buffer;
+    }
+    
     EndLine();
-
-    va_end(args);        
 }
 
 const char* CodeWriter::GetResult() const
diff --git a/hlslparser/src/Engine.cpp b/hlslparser/src/Engine.cpp
index bdb09f13..956e1e47 100755
--- a/hlslparser/src/Engine.cpp
+++ b/hlslparser/src/Engine.cpp
@@ -12,18 +12,45 @@ namespace M4 {
 
 // Engine/String.cpp
 
-int String_PrintfArgList(char * buffer, int size, const char * format, va_list args) {
+void String_Copy(char* str, const char* b, uint32_t size)
+{
+#ifdef WIN32
+    strncpy(str, b, size);
+    str[size-1] = 0;
+#else
+    strlcpy(str, b, size);
+#endif
+}
 
-    va_list tmp;
-    va_copy(tmp, args);
+int String_PrintfArgList(char * buffer, int size, const char * format, va_list args) {
+    int n;
+    
+    if (String_Equal(format, "%s"))
+    {
+        va_list tmp;
+        va_copy(tmp, args);
+        const char* text = va_arg(args, const char*);
+        String_Copy(buffer, text, size);
+        va_end(tmp);
+    
+        // truncation or not
+        size_t len = strlen(text);
+        n = (len > size) ? -1 : (int)len;
+    }
+    else
+    {
+        va_list tmp;
+        va_copy(tmp, args);
 
+        // vsnprint returns -1 on failure
 #if _MSC_VER >= 1400
-	int n = vsnprintf_s(buffer, size, _TRUNCATE, format, tmp);
+        n = vsnprintf_s(buffer, size, _TRUNCATE, format, tmp);
 #else
-	int n = vsnprintf(buffer, size, format, tmp);
+        n = vsnprintf(buffer, size, format, tmp);
 #endif
-
-    va_end(tmp);
+        va_end(tmp);
+    }
+   
 
 	if (n < 0 || n > size) return -1;
 	return n;
@@ -45,6 +72,10 @@ int String_FormatFloat(char * buffer, int size, float value) {
     return String_Printf(buffer, size, "%f", value);
 }
 
+bool String_HasChar(const char* str, char c) {
+    return strchr(str, c) != NULL;
+}
+
 bool String_Equal(const char * a, const char * b) {
 	if (a == b) return true;
 	if (a == NULL || b == NULL) return false;
@@ -131,9 +162,6 @@ void Log_ErrorArgList(const char * format, va_list args) {
 
 // Engine/StringPool.cpp
 
-// Taken from Alec's HashHelper.h
-
-
 using StringPoolSet = std::unordered_set<const char*, CompareAndHandStrings, CompareAndHandStrings>;
 
 #define CastImpl(imp) (StringPoolSet*)imp
diff --git a/hlslparser/src/Engine.h b/hlslparser/src/Engine.h
index 5d55b10c..d7ed1172 100755
--- a/hlslparser/src/Engine.h
+++ b/hlslparser/src/Engine.h
@@ -65,6 +65,8 @@ bool String_Equal(const char * a, const char * b);
 bool String_EqualNoCase(const char * a, const char * b);
 double String_ToDouble(const char * str, char ** end);
 int String_ToInteger(const char * str, char ** end);
+bool String_HasChar(const char* str, char c);
+void String_Copy(char* str, const char* b, uint32_t size);
 
 void String_StripTrailingFloatZeroes(char* buffer);
 
diff --git a/hlslparser/src/HLSLGenerator.cpp b/hlslparser/src/HLSLGenerator.cpp
index 79075bc6..1aaf4dde 100644
--- a/hlslparser/src/HLSLGenerator.cpp
+++ b/hlslparser/src/HLSLGenerator.cpp
@@ -307,9 +307,12 @@ void HLSLGenerator::OutputExpression(HLSLExpression* expression)
     {
         HLSLCastingExpression* castingExpression = static_cast<HLSLCastingExpression*>(expression);
         m_writer.Write("(");
-        // OutputDeclaration(castingExpression->type, "");
-        OutputDeclarationType(castingExpression->type, true/*isTypeCast*/);
-        m_writer.Write(")(");
+        // OutputDeclaration(castingExpression->type, ""); // old - adds space after type
+        OutputDeclarationType(castingExpression->type, true/*isTypeCast*/); // new
+        m_writer.Write(")");
+        
+        // These parens may not be needed
+        m_writer.Write("(");
         OutputExpression(castingExpression->expression);
         m_writer.Write(")");
     }
diff --git a/hlslparser/src/HLSLTokenizer.cpp b/hlslparser/src/HLSLTokenizer.cpp
index 227615bb..7f034847 100644
--- a/hlslparser/src/HLSLTokenizer.cpp
+++ b/hlslparser/src/HLSLTokenizer.cpp
@@ -675,12 +675,7 @@ void HLSLTokenizer::GetTokenName(char buffer[s_maxIdentifier]) const
     // TODO: short/ushort/uint
     else if (m_token == HLSLToken_Identifier)
     {
-#ifdef WIN32
-        strncpy(buffer, m_identifier, sizeof(buffer));
-        buffer[s_maxIdentifier-1] = 0;
-#else
-        strlcpy(buffer, m_identifier, s_maxIdentifier); // TODO: Alec, put in alt strlcpy call for Win
-#endif
+        String_Copy(buffer, m_identifier, s_maxIdentifier);
     }
     else
     {
diff --git a/hlslparser/src/MSLGenerator.cpp b/hlslparser/src/MSLGenerator.cpp
index a52865e4..ddc06db4 100644
--- a/hlslparser/src/MSLGenerator.cpp
+++ b/hlslparser/src/MSLGenerator.cpp
@@ -437,7 +437,7 @@ namespace M4
         
         // mod
         int indent = m_writer.EndLine();
-        m_writer.BeginLine(indent);
+        m_writer.BeginLine(indent+1); // 1 more level for params
         
         const ClassArgument* currentArg = m_firstClassArgument;
         while (currentArg != NULL)
@@ -533,37 +533,45 @@ namespace M4
             }
             m_writer.Write("%s", GetTypeName(entryFunction->returnType, /*exactType=*/true));
         }
-
+        
         m_writer.Write(" %s(", entryName);
 
         // Alec added for readability
         indent = m_writer.EndLine();
-        m_writer.BeginLine(indent);
+        
+        m_writer.BeginLine(indent+1); // indent more
+        
+        //--------------------
+        // This is the class taking in arguments
         
         int argumentCount = 0;
         HLSLArgument* argument = entryFunction->argument;
         while (argument != NULL)
         {
-            if (!argument->hidden)
+            if (argument->hidden)
             {
-                if (argument->type.baseType == HLSLBaseType_UserDefined)
-                {
-                    m_writer.Write("%s::", shaderClassName);
-                }
-                m_writer.Write("%s %s", GetTypeName(argument->type, /*exactType=*/true), argument->name);
+                argument = argument->nextArgument;
+                continue;
+            }
+            
+            if (argument->type.baseType == HLSLBaseType_UserDefined)
+            {
+                m_writer.Write("%s::", shaderClassName);
+            }
+            m_writer.Write("%s %s", GetTypeName(argument->type, /*exactType=*/true), argument->name);
 
-                // @@ IC: We are assuming that the first argument is the 'stage_in'.
-                if (argument->type.baseType == HLSLBaseType_UserDefined && argument == entryFunction->argument)
-                {
-                    m_writer.Write(" [[stage_in]]");
-                }
-                else if (argument->sv_semantic)
-                {
-                    m_writer.Write(" [[%s]]", argument->sv_semantic);
-                }
-                
-                argumentCount++;
+            // @@ IC: We are assuming that the first argument is the 'stage_in'.
+            if (argument->type.baseType == HLSLBaseType_UserDefined && argument == entryFunction->argument)
+            {
+                m_writer.Write(" [[stage_in]]");
+            }
+            else if (argument->sv_semantic)
+            {
+                m_writer.Write(" [[%s]]", argument->sv_semantic);
             }
+            
+            argumentCount++;
+            
             argument = argument->nextArgument;
             if (argument && !argument->hidden)
             {
@@ -573,15 +581,21 @@ namespace M4
                 indent = m_writer.EndLine();
                 m_writer.BeginLine(indent);
             }
+            
+            
         }
 
+        // These are additional inputs/outputs not [[stage_in]]
+        
         currentArg = m_firstClassArgument;
         if (argumentCount && currentArg != NULL)
         {
-            m_writer.Write(", ");
+            m_writer.Write(",");
             
+            // Alec added for readability
             indent = m_writer.EndLine();
             m_writer.BeginLine(indent);
+            
         }
         while (currentArg != NULL)
         {
@@ -602,13 +616,14 @@ namespace M4
             if (currentArg)
             {
                 m_writer.Write(", ");
-                
-                // Alec added for readability
-                indent = m_writer.EndLine();
-                m_writer.BeginLine(indent);
             }
+            
+            // Alec added for readability
+            indent = m_writer.EndLine();
+            m_writer.BeginLine(indent);
         }
-        m_writer.EndLine(") {");
+        m_writer.EndLine(")");
+        m_writer.WriteLine(0, "{");
 
         // Create the helper class instance and call the entry point from the original shader
         m_writer.BeginLine(1);
@@ -627,8 +642,8 @@ namespace M4
                 {
                     m_writer.Write(", ");
                     
-                    indent = m_writer.EndLine();
-                    m_writer.BeginLine(indent);
+                    // indent = m_writer.EndLine();
+                    // m_writer.BeginLine(indent);
                 }
             }
 
@@ -1000,8 +1015,7 @@ namespace M4
                     m_writer.Write(" [[%s]]", field->sv_semantic);
                 }
 
-                m_writer.Write(";");
-                m_writer.EndLine();
+                m_writer.EndLine(";");
             }
             field = field->nextField;
         }
@@ -1027,11 +1041,11 @@ namespace M4
                 buffer->bufferType == HLSLBufferType_ByteAddressBuffer ||
                 buffer->bufferType == HLSLBufferType_StructuredBuffer)
             {
-                m_writer.WriteLine(indent, "constant %s %s %s", buffer->bufferStruct->name, isRef ? "&" : "*", buffer->name);
+                m_writer.Write("constant %s %s %s", buffer->bufferStruct->name, isRef ? "&" : "*", buffer->name);
             }
             else
             {
-                m_writer.WriteLine(indent, "device %s %s %s",  buffer->bufferStruct->name, isRef ? "&" : "*", buffer->name);
+                m_writer.Write("device %s %s %s",  buffer->bufferStruct->name, isRef ? "&" : "*", buffer->name);
             }
             
             m_writer.EndLine(";");
@@ -1042,9 +1056,9 @@ namespace M4
             HLSLDeclaration* field = buffer->field;
             
             m_writer.BeginLine(indent, buffer->fileName, buffer->line);
-            
             m_writer.Write("struct %s_ubo", buffer->name);
             m_writer.EndLine(" {");
+            
             while (field != NULL)
             {
                 if (!field->hidden)

From 8b000a1cca697506e9a200fa96ac841bbc0699b6 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 18 Mar 2023 01:20:05 -0700
Subject: [PATCH 488/901] kram - hlslparser update

compile MSL from transpile to make sure it's valid
---
 hlslparser/buildShaders.sh       |  70 ++++++++++------
 hlslparser/src/HLSLParser.cpp    | 137 ++++++++++++++-----------------
 hlslparser/src/HLSLTokenizer.cpp |   6 +-
 hlslparser/src/HLSLTokenizer.h   |   2 +-
 hlslparser/src/HLSLTree.h        |   3 +
 5 files changed, 112 insertions(+), 106 deletions(-)

diff --git a/hlslparser/buildShaders.sh b/hlslparser/buildShaders.sh
index fca230e2..537632b3 100755
--- a/hlslparser/buildShaders.sh
+++ b/hlslparser/buildShaders.sh
@@ -19,7 +19,8 @@ pushd outshaders
 
 # TODO: consider putting in path
 # note bash can't expand tilda, so using HOME instead
-vulkanSDK="${HOME}/devref/vulkansdk/1.3.239.0/macOS/bin/"
+#svulkanSDK="${HOME}/devref/vulkansdk/1.3.239.0/macOS/bin/"
+vulkanSDK=""
 
 projectDir="${HOME}/devref/kram/hlslparser/"
 
@@ -29,6 +30,8 @@ srcDir+="shaders/"
 dstDir=${projectDir}
 dstDir+="outshaders/"
 
+dstDirOut=${projectDir}
+dstDirOut+="out/"
 
 # this only pulls the release build, so testing debug won't update
 appHlslparser=../build/hlslparser/Build/Products/Release/hlslparser
@@ -38,11 +41,19 @@ appGlslc=${vulkanSDK}
 appSpirvReflect=${vulkanSDK}
 appSpirvCross=${vulkanSDK}
 
+# compilers
 appDxc+="dxc"
 appGlslc+="glslc"
+appMetalMac="xcrun -sdk macosx metal"
+appMetaliOS="xcrun -sdk macosx metal"
+
+# reflect/transpile spv
 appSpirvReflect+="spirv-reflect"
 appSpirvCross+="spirv-cross"
 
+# TODO: also use the metal tools on Win to build
+# and already have vulkan sdk
+
 # Xcode will only do clickthrough to warnings/errors if the filename
 # is a full path.  That's super annoying.
 
@@ -78,37 +89,31 @@ pushd out
 
 #-------------------------------
 
+# TODO: metal3.0 on M1 macOS13/iOS16
+# record sources into code for gpu capture (don't ship this), debug mode
+
+# O2 + size opt
+# metalMacOptions+="-Os"
+
+    
 testMetal=1
 
 if [[ $testMetal -eq 1 ]]; then
     # Metal is C++14
-
-    # see if HLSL compiles to MSL (requires macOS Vulkan install)
-
-    # record sources into code for gpu capture (don't ship this), debug mode
     metalMacOptions="-frecord-sources -g "
-
-    # O2 + size opt
-    # metalMacOptions+="-Os"
-
-    # TODO: metal3.0 on M1 macOS13/iOS16
     metalMacOptions+="-std=macos-metal2.3 "
 
+    # see if HLSL compiles to MSL (requires macOS Vulkan install)
+
     # Test case
-    # xcrun -sdk macosx metal ${dstDir}DepthTest.metal ${metalMacOptions} -o mac/DepthTest.metallib
+    # ${appMetalMac} ${dstDir}DepthTest.metal ${metalMacOptions} -o mac/DepthTest.metallib
 
     # TODO: build to air, and then compile to single metallib and metallibdsym
     # see if MSL compile
     echo compile MSL for macOS
-    xcrun -sdk macosx metal ${dstDir}Skinning.metal ${metalMacOptions} -o mac/Skinning.metallib
-    xcrun -sdk macosx metal ${dstDir}Sample.metal ${metalMacOptions} -o mac/Sample.metallib
-    xcrun -sdk macosx metal ${dstDir}Compute.metal ${metalMacOptions} -o mac/Compute.metallib
-
-    # metaliosOptions="-frecord-sources -g "
-    # metaliosOptions+="-std=ios-metal2.3 "
-
-    #echo compile MSL for iOS
-    #xcrun -sdk macosx metal ${dstDir}Skinning.metal ${metaliosOptions} -o ios/Skinning.metallib
+    ${appMetalMac} ${dstDir}Skinning.metal ${metalMacOptions} -o mac/Skinning.metallib
+    ${appMetalMac} ${dstDir}Sample.metal ${metalMacOptions} -o mac/Sample.metallib
+    ${appMetalMac} ${dstDir}Compute.metal ${metalMacOptions} -o mac/Compute.metallib
 fi
 
 #-------------------------------
@@ -190,13 +195,24 @@ ${appSpirvReflect} -y android/Sample.vert.spv > android/Sample.vert.refl
 ${appSpirvReflect} -y android/Sample.frag.spv > android/Sample.frag.refl
 ${appSpirvReflect} -y android/Compute.comp.spv > android/Compute.comp.refl
 
-# transpile spirv to ios MSL for comparsion to what hlslparser MSL produces
-#  would never use this, would use hlslparser path directly
-${appSpirvCross} --msl --msl-version 20300 --msl-ios android/Skinning.vert.spv --output ios/Skinning.vert.metal
-${appSpirvCross} --msl --msl-version 20300 --msl-ios android/Skinning.frag.spv --output ios/Skinning.frag.metal
-${appSpirvCross} --msl --msl-version 20300 --msl-ios android/Sample.vert.spv --output ios/Sample.vert.metal
-${appSpirvCross} --msl --msl-version 20300 --msl-ios android/Sample.frag.spv --output ios/Sample.frag.metal
-${appSpirvCross} --msl --msl-version 20300 --msl-ios android/Compute.comp.spv --output ios/Compute.comp.metal
+if [[ $testMetal -eq 1 ]]; then
+    metaliOSOptions="-frecord-sources -g "
+    metaliOSOptions+="-std=ios-metal2.3 "
+
+    # transpile spirv to ios MSL for comparsion to what hlslparser MSL produces
+    #  would never use this, would use hlslparser path directly
+    ${appSpirvCross} --msl --msl-version 20300 --msl-ios android/Skinning.vert.spv --output ios/Skinning.vert.metal
+    ${appSpirvCross} --msl --msl-version 20300 --msl-ios android/Skinning.frag.spv --output ios/Skinning.frag.metal
+    ${appSpirvCross} --msl --msl-version 20300 --msl-ios android/Sample.vert.spv --output ios/Sample.vert.metal
+    ${appSpirvCross} --msl --msl-version 20300 --msl-ios android/Sample.frag.spv --output ios/Sample.frag.metal
+    ${appSpirvCross} --msl --msl-version 20300 --msl-ios android/Compute.comp.spv --output ios/Compute.comp.metal
+
+    ${appMetaliOS} ${dstDirOut}ios/Skinning.vert.metal ${metaliOSOptions} -o ios/Skinning.vert.metallib
+    ${appMetaliOS} ${dstDirOut}ios/Skinning.frag.metal ${metaliOSOptions} -o ios/Skinning.frag.metallib
+    ${appMetaliOS} ${dstDirOut}ios/Sample.vert.metal ${metaliOSOptions} -o ios/Sample.vert.metallib
+    ${appMetaliOS} ${dstDirOut}ios/Sample.frag.metal ${metaliOSOptions} -o ios/Sample.frag.metallib
+    ${appMetaliOS} ${dstDirOut}ios/Compute.comp.metal ${metaliOSOptions} -o ios/Compute.comp.metallib
+fi
 
 # skip this path, have to mod hlsl just to get valid code to compile with glslc
 testGlslc=0
diff --git a/hlslparser/src/HLSLParser.cpp b/hlslparser/src/HLSLParser.cpp
index 277ea455..c6678d93 100644
--- a/hlslparser/src/HLSLParser.cpp
+++ b/hlslparser/src/HLSLParser.cpp
@@ -245,24 +245,20 @@ HLSLBaseType NumericToBaseType(NumericType numericType)
     {
         case NumericType_Float: baseType = HLSLBaseType_Float; break;
         case NumericType_Half: baseType = HLSLBaseType_Half; break;
+        case NumericType_Double: baseType = HLSLBaseType_Bool; break;
+       
         case NumericType_Int: baseType = HLSLBaseType_Int; break;
         case NumericType_Uint: baseType = HLSLBaseType_Uint; break;
-        case NumericType_Bool: baseType = HLSLBaseType_Bool; break;
-            
         case NumericType_Ushort: baseType = HLSLBaseType_Ushort; break;
         case NumericType_Short: baseType = HLSLBaseType_Short; break;
-            
-        // TODO: requires vec/matrix additions for double
-        // case NumericType_Double: baseType = HLSLBaseType_Bool; break;
-            
-        // TODO:
+        case NumericType_Ulong: baseType = HLSLBaseType_Ulong; break;
+        case NumericType_Long: baseType = HLSLBaseType_Long; break;
+        case NumericType_Bool: baseType = HLSLBaseType_Bool; break;
+        
+        // MSL has 8-bit, but HLSL/Vulkan don't
         //case NumericType_Uint8: baseType = HLSLBaseType_Uint8; break;
         //case NumericType_Int8: baseType = HLSLBaseType_Int8; break;
-        
-        // TODO:
-        //case NumericType_Uint64: baseType = HLSLBaseType_Uint8; break;
-        //case NumericType_Int64: baseType = HLSLBaseType_Int8; break;
-            
+    
         default:
             break;
     }
@@ -271,41 +267,15 @@ HLSLBaseType NumericToBaseType(NumericType numericType)
 
 HLSLBaseType HalfToFloatBaseType(HLSLBaseType type)
 {
-    switch(type)
-    {
-        case HLSLBaseType_Half: return HLSLBaseType_Float;
-        case HLSLBaseType_Half2: return HLSLBaseType_Float2;
-        case HLSLBaseType_Half3: return HLSLBaseType_Float3;
-        case HLSLBaseType_Half4: return HLSLBaseType_Float4;
-        case HLSLBaseType_Half2x2: return HLSLBaseType_Float2x2;
-        case HLSLBaseType_Half3x3: return HLSLBaseType_Float3x3;
-        case HLSLBaseType_Half4x4: return HLSLBaseType_Float4x4;
-            
-        default:
-           // do nothing;
-            break;
-    }
-    
+    if (IsHalf(type))
+        type = (HLSLBaseType)(HLSLBaseType_Float + (type - HLSLBaseType_Half));
     return type;
 }
 
 HLSLBaseType DoubleToFloatBaseType(HLSLBaseType type)
 {
-    switch(type)
-    {
-        case HLSLBaseType_Double: return HLSLBaseType_Float;
-        case HLSLBaseType_Double2: return HLSLBaseType_Float2;
-        case HLSLBaseType_Double3: return HLSLBaseType_Float3;
-        case HLSLBaseType_Double4: return HLSLBaseType_Float4;
-        case HLSLBaseType_Double2x2: return HLSLBaseType_Float2x2;
-        case HLSLBaseType_Double3x3: return HLSLBaseType_Float3x3;
-        case HLSLBaseType_Double4x4: return HLSLBaseType_Float4x4;
-            
-        default:
-           // do nothing;
-            break;
-    }
-    
+    if (IsDouble(type))
+        type = (HLSLBaseType)(HLSLBaseType_Float + (type - HLSLBaseType_Double));
     return type;
 }
 
@@ -395,12 +365,14 @@ struct Intrinsic
     }
     
     // TODO: allow member function intrinsices on buffers/textures
-    HLSLBaseType    memberType = HLSLBaseType_Unknown;
     HLSLFunction    function;
     HLSLArgument    argument[4];
 };
     
-// So many calls are member functions in modern HLSL/MSL
+// So many calls are member functions in modern HLSL/MSL.
+// This means the parser has to work harder to write out these intrinsics
+// since some have default args, and some need level(), bias() wrappers in MSL.
+// That complexity is currently hidden away in wrapper C-style calls in ShaderMSL.h.
 #define USE_MEMBER_FUNCTIONS 0
 
 static void AddIntrinsic(const Intrinsic& intrinsic);
@@ -409,7 +381,7 @@ void AddTextureIntrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType
 {
 #if USE_MEMBER_FUNCTIONS
     Intrinsic i(name, returnType, HLSLBaseType_SamplerState, uvType);
-    i.memberType = textureType;
+    i.function.memberType = textureType;
 #else
     Intrinsic i(name, returnType, textureType, HLSLBaseType_SamplerState, uvType);
 #endif
@@ -426,8 +398,8 @@ void AddDepthIntrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType t
     HLSLBaseType samplerType = isCompare ? HLSLBaseType_SamplerComparisonState : HLSLBaseType_SamplerState;
     
 #if USE_MEMBER_FUNCTIONS
-    Intrinsic i(name, returnType, samplerType, uvType);
-    i.memberType = textureType;
+    Intrinsic i(name, returnType, samplerType, uvType); 
+    i.function.memberType = textureType;
 #else
     Intrinsic i(name, returnType, textureType, samplerType, uvType);
 #endif
@@ -738,7 +710,7 @@ static const EffectState pipelineStates[] = {
 
 // TODO: elim the H version once have member functions, can check the member textuer format.
 #define TEXTURE_INTRINSIC_FUNCTION(name, textureType, uvType) \
-AddTextureIntrinsic( name, HLSLBaseType_Float4, textureType, HLSLBaseType_Float, uvType)
+    AddTextureIntrinsic( name, HLSLBaseType_Float4, textureType, HLSLBaseType_Float, uvType)
 
 #define TEXTURE_INTRINSIC_FUNCTION_H(name, textureType, uvType) \
         AddTextureIntrinsic( name "H", HLSLBaseType_Half4, textureType, HLSLBaseType_Half, uvType  )
@@ -3114,12 +3086,7 @@ bool HLSLParser::ParseTerminalExpression(HLSLExpression*& expression, bool& need
                 case HLSLBaseType_Float4x4:
                     arrayAccess->expressionType.baseType = HLSLBaseType_Float4;
                     break;
-//                case HLSLBaseType_Float4x3:
-//                    arrayAccess->expressionType.baseType = HLSLBaseType_Float3;
-//                    break;
-//                case HLSLBaseType_Float4x2:
-//                    arrayAccess->expressionType.baseType = HLSLBaseType_Float2;
-//                    break;
+
                 case HLSLBaseType_Half2:
                 case HLSLBaseType_Half3:
                 case HLSLBaseType_Half4:
@@ -3134,12 +3101,23 @@ bool HLSLParser::ParseTerminalExpression(HLSLExpression*& expression, bool& need
                 case HLSLBaseType_Half4x4:
                     arrayAccess->expressionType.baseType = HLSLBaseType_Half4;
                     break;
-//                case HLSLBaseType_Half4x3:
-//                    arrayAccess->expressionType.baseType = HLSLBaseType_Half3;
-//                    break;
-//                case HLSLBaseType_Half4x2:
-//                    arrayAccess->expressionType.baseType = HLSLBaseType_Half2;
-//                    break;
+
+                case HLSLBaseType_Double2:
+                case HLSLBaseType_Double3:
+                case HLSLBaseType_Double4:
+                    arrayAccess->expressionType.baseType = HLSLBaseType_Double;
+                    break;
+                case HLSLBaseType_Double2x2:
+                    arrayAccess->expressionType.baseType = HLSLBaseType_Double2;
+                    break;
+                case HLSLBaseType_Double3x3:
+                    arrayAccess->expressionType.baseType = HLSLBaseType_Double3;
+                    break;
+                case HLSLBaseType_Double4x4:
+                    arrayAccess->expressionType.baseType = HLSLBaseType_Double4;
+                    break;
+
+                        
                 case HLSLBaseType_Int2:
                 case HLSLBaseType_Int3:
                 case HLSLBaseType_Int4:
@@ -3165,8 +3143,18 @@ bool HLSLParser::ParseTerminalExpression(HLSLExpression*& expression, bool& need
                 case HLSLBaseType_Short4:
                     arrayAccess->expressionType.baseType = HLSLBaseType_Short;
                     break;
+                case HLSLBaseType_Ulong2:
+                case HLSLBaseType_Ulong3:
+                case HLSLBaseType_Ulong4:
+                    arrayAccess->expressionType.baseType = HLSLBaseType_Ulong;
+                    break;
+                case HLSLBaseType_Long2:
+                case HLSLBaseType_Long3:
+                case HLSLBaseType_Long4:
+                    arrayAccess->expressionType.baseType = HLSLBaseType_Long;
+                    break;
                         
-                // TODO: double, u/char
+                // TODO: u/char
                 default:
                     m_tokenizer.Error("array, matrix, vector, or indexable object type expected in index expression");
                     return false;
@@ -4017,12 +4005,6 @@ bool HLSLParser::AcceptType(bool allowVoid, HLSLType& type/*, bool acceptFlags*/
     case HLSLToken_Float4x4:
         type.baseType = HLSLBaseType_Float4x4;
         break;
-//    case HLSLToken_Float4x3:
-//        type.baseType = HLSLBaseType_Float4x3;
-//        break;
-//    case HLSLToken_Float4x2:
-//        type.baseType = HLSLBaseType_Float4x2;
-//        break;
             
     case HLSLToken_Half:
         type.baseType = HLSLBaseType_Half;
@@ -4046,12 +4028,7 @@ bool HLSLParser::AcceptType(bool allowVoid, HLSLType& type/*, bool acceptFlags*/
     case HLSLToken_Half4x4:
         type.baseType = HLSLBaseType_Half4x4;
         break;
-//    case HLSLToken_Half4x3:
-//        type.baseType = HLSLBaseType_Half4x3;
-//        break;
-//    case HLSLToken_Half4x2:
-//        type.baseType = HLSLBaseType_Half4x2;
-//        break;
+
     case HLSLToken_Bool:
         type.baseType = HLSLBaseType_Bool;
         break;
@@ -4187,7 +4164,7 @@ bool HLSLParser::AcceptType(bool allowVoid, HLSLType& type/*, bool acceptFlags*/
                 token = m_tokenizer.GetToken();
                 
                 // TODO: need more format types
-                // TODO: double, and other types
+                // TODO: double, u/long, and other types
                 if (token >= HLSLToken_Float && token <= HLSLToken_Float4)
                 {
                     // TODO: code only tests if texture formatType exactly matches
@@ -4631,6 +4608,10 @@ bool HLSLParser::GetMemberType(const HLSLType& objectType, HLSLMemberAccess * me
     case NumericType_Half:
         memberAccess->expressionType.baseType = (HLSLBaseType)(HLSLBaseType_Half + swizzleLength - 1);
         break;
+    case NumericType_Double:
+        memberAccess->expressionType.baseType = (HLSLBaseType)(HLSLBaseType_Double + swizzleLength - 1);
+        break;
+        
     case NumericType_Int:
         memberAccess->expressionType.baseType = (HLSLBaseType)(HLSLBaseType_Int + swizzleLength - 1);
         break;
@@ -4646,7 +4627,13 @@ bool HLSLParser::GetMemberType(const HLSLType& objectType, HLSLMemberAccess * me
     case NumericType_Ushort:
         memberAccess->expressionType.baseType = (HLSLBaseType)(HLSLBaseType_Ushort + swizzleLength - 1);
             break;
-    // TODO: double, u/char
+    case NumericType_Long:
+        memberAccess->expressionType.baseType = (HLSLBaseType)(HLSLBaseType_Long + swizzleLength - 1);
+            break;
+    case NumericType_Ulong:
+        memberAccess->expressionType.baseType = (HLSLBaseType)(HLSLBaseType_Ulong + swizzleLength - 1);
+            break;
+    // TODO: u/char
     default:
         ASSERT(false);
     }
diff --git a/hlslparser/src/HLSLTokenizer.cpp b/hlslparser/src/HLSLTokenizer.cpp
index 7f034847..1837394e 100644
--- a/hlslparser/src/HLSLTokenizer.cpp
+++ b/hlslparser/src/HLSLTokenizer.cpp
@@ -52,12 +52,12 @@ static const char* _reservedWords[] =
         "uint3",
         "uint4",
         
-        "short", // HLSLToken_Short
+        "short", 
         "short2",
         "short3",
         "short4",
         
-        "ushort", // HLSLToken_Uhort
+        "ushort",
         "ushort2",
         "ushort3",
         "ushort4",
@@ -72,7 +72,7 @@ static const char* _reservedWords[] =
         "ulong3",
         "ulong4",
         
-        // TODO: double, u/char
+        // TODO: u/char
         
         "Texture2D",
         "Texture3D",
diff --git a/hlslparser/src/HLSLTokenizer.h b/hlslparser/src/HLSLTokenizer.h
index a8a52605..bb3babe1 100644
--- a/hlslparser/src/HLSLTokenizer.h
+++ b/hlslparser/src/HLSLTokenizer.h
@@ -71,7 +71,7 @@ enum HLSLToken
     HLSLToken_Ulong3,
     HLSLToken_Ulong4,
     
-    // TODO: double, u/char
+    // TODO: u/char
     HLSLToken_Texture2D,
     HLSLToken_Texture3D,
     HLSLToken_TextureCube,
diff --git a/hlslparser/src/HLSLTree.h b/hlslparser/src/HLSLTree.h
index 97c0b25e..960ee17a 100644
--- a/hlslparser/src/HLSLTree.h
+++ b/hlslparser/src/HLSLTree.h
@@ -466,6 +466,7 @@ struct HLSLFunction : public HLSLStatement
     static const HLSLNodeType s_type = HLSLNodeType_Function;
     const char*         name  = NULL;
     HLSLType            returnType;
+    HLSLBaseType        memberType = HLSLBaseType_Unknown;
     const char*         semantic  = NULL;
     const char*         sv_semantic = NULL;
     int                 numArguments = 0;
@@ -473,6 +474,8 @@ struct HLSLFunction : public HLSLStatement
     HLSLArgument*       argument = NULL;
     HLSLStatement*      statement = NULL;
     HLSLFunction*       forward = NULL; // Which HLSLFunction this one forward-declares
+    
+    bool IsMemberFunction() const { return memberType != HLSLBaseType_Unknown; }
 };
 
 /// Declaration of an argument to a function.

From acbf3fe9ffecae8ef8b3e2c35bde76c3af2b6fba Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 18 Mar 2023 12:22:42 -0700
Subject: [PATCH 489/901] kram - hlslparser update

more string speedup using std::string
---
 hlslparser/src/CodeWriter.cpp | 79 ++++++++-------------------------
 hlslparser/src/Engine.cpp     | 82 +++++++++++++++++++++++++++++------
 hlslparser/src/Engine.h       | 13 +++++-
 hlslparser/src/HLSLParser.cpp | 11 +++--
 4 files changed, 101 insertions(+), 84 deletions(-)

diff --git a/hlslparser/src/CodeWriter.cpp b/hlslparser/src/CodeWriter.cpp
index 335074bf..91a913b5 100644
--- a/hlslparser/src/CodeWriter.cpp
+++ b/hlslparser/src/CodeWriter.cpp
@@ -15,9 +15,6 @@
 
 namespace M4
 {
-
-static const int _maxLineLength = 4096;
-
 CodeWriter::CodeWriter(bool writeFileNames)
 {
     m_currentLine       = 1;
@@ -55,18 +52,13 @@ void CodeWriter::BeginLine(int indent, const char* fileName, int lineNumber)
          
         if (outputLine || outputFile)
         {
-            char buffer[256];
-            String_Printf(buffer, sizeof(buffer), "#line %d", lineNumber);
-            m_buffer += buffer;
             if (outputFile && m_writeFileNames)
             {
-                m_buffer += " \"";
-                m_buffer += fileName;
-                m_buffer += "\"\n";
+                String_Printf(m_buffer, "#line %d \"%s\"\n", lineNumber, fileName.c_str());
             }
             else
             {
-                m_buffer += "\n";
+                String_Printf(m_buffer, "#line %d\n", lineNumber);
             }
         }
         */
@@ -97,23 +89,11 @@ int CodeWriter::EndLine(const char* text)
 
 void CodeWriter::Write(const char* format, ...)
 {
-    // TODO: String_Equal(format, "\%s")
-    if (!String_HasChar(format, '%'))
-    {
-        m_buffer += format;
-    }
-    else
-    {
-        char buffer[_maxLineLength];
-        
-        va_list args;
-        va_start(args, format);
-        int result = String_PrintfArgList(buffer, sizeof(buffer), format, args);
-        ASSERT(result != -1);
-        va_end(args);
-        
-        m_buffer += buffer;
-    }
+    va_list args;
+    va_start(args, format);
+    int result = String_PrintfArgList(m_buffer, format, args);
+    ASSERT(result != -1);
+    va_end(args);
 }
 
 void CodeWriter::WriteLine(int indent, const char* format, ...)
@@ -121,24 +101,11 @@ void CodeWriter::WriteLine(int indent, const char* format, ...)
     if (indent)
         Write("%*s", indent * m_spacesPerIndent, "");
     
-    // avoid unnecessary work
-    // TODO: String_Equal(format, "\%s")
-    if (!String_HasChar(format, '%'))
-    {
-        m_buffer += format;
-    }
-    else
-    {
-        char buffer[_maxLineLength];
-        
-        va_list args;
-        va_start(args, format);
-        int result = String_PrintfArgList(buffer, sizeof(buffer), format, args);
-        ASSERT(result != -1);
-        va_end(args);
-        
-        m_buffer += buffer;
-    }
+    va_list args;
+    va_start(args, format);
+    int result = String_PrintfArgList(m_buffer, format, args);
+    ASSERT(result != -1);
+    va_end(args);
     
     EndLine();
 }
@@ -148,24 +115,12 @@ void CodeWriter::WriteLineTagged(int indent, const char* fileName, int lineNumbe
     // TODO: this should make sure that line isn't already Begu
     BeginLine(indent, fileName, lineNumber);
     
-    // TODO: String_Equal(format, "\%s")
-    if (!String_HasChar(format, '%'))
-    {
-        m_buffer += format;
-    }
-    else
-    {
-        char buffer[_maxLineLength];
-        
-        va_list args;
-        va_start(args, format);
-        int result = String_PrintfArgList(buffer, sizeof(buffer), format, args);
-        ASSERT(result != -1);
-        va_end(args);
+    va_list args;
+    va_start(args, format);
+    int result = String_PrintfArgList(m_buffer, format, args);
+    ASSERT(result != -1);
+    va_end(args);
         
-        m_buffer += buffer;
-    }
-    
     EndLine();
 }
 
diff --git a/hlslparser/src/Engine.cpp b/hlslparser/src/Engine.cpp
index 956e1e47..ee04ea9b 100755
--- a/hlslparser/src/Engine.cpp
+++ b/hlslparser/src/Engine.cpp
@@ -22,38 +22,93 @@ void String_Copy(char* str, const char* b, uint32_t size)
 #endif
 }
 
-int String_PrintfArgList(char * buffer, int size, const char * format, va_list args) {
-    int n;
+// This version doesn't truncate and is simpler
+int String_PrintfArgList(std::string& buffer, const char * format, va_list args) {
+    int n = 0;
     
-    if (String_Equal(format, "%s"))
+    if (!String_HasChar(format, '%'))
+    {
+        buffer += format;
+        n = (uint32_t)strlen(format);
+    }
+    else if (String_Equal(format, "%s"))
     {
         va_list tmp;
         va_copy(tmp, args);
         const char* text = va_arg(args, const char*);
-        String_Copy(buffer, text, size);
+        n = (uint32_t)strlen(text);
+        buffer += text;
+        va_end(tmp);
+    }
+    else
+    {
+        va_list tmp;
+        va_copy(tmp, args);
+        
+        int len = vsnprintf(nullptr, 0, format, tmp);
+        if (len >= 0)
+        {
+            size_t bufferLength = buffer.length();
+            buffer.resize(bufferLength+len);
+            vsnprintf((char*)buffer.data() + bufferLength, len+1, format, tmp);
+            
+            n = len;
+        }
         va_end(tmp);
+    }
     
+    return n;
+}
+
+// This version truncates but works on stack
+int String_PrintfArgList(char* buffer, int size, const char * format, va_list args) {
+    int n;
+    
+    if (!String_HasChar(format, '%'))
+    {
+        String_Copy(buffer, format, size);
+        
         // truncation or not
-        size_t len = strlen(text);
-        n = (len > size) ? -1 : (int)len;
+        n = (int)strlen(format);
+    }
+    else if (String_Equal(format, "%s"))
+    {
+        va_list tmp;
+        va_copy(tmp, args);
+        const char* text = va_arg(args, const char*);
+        n = (int)strlen(text);
+        
+        // truncation
+        String_Copy(buffer, text, size);
+        va_end(tmp);
     }
     else
     {
         va_list tmp;
         va_copy(tmp, args);
 
+        // truncation
         // vsnprint returns -1 on failure
-#if _MSC_VER >= 1400
-        n = vsnprintf_s(buffer, size, _TRUNCATE, format, tmp);
-#else
         n = vsnprintf(buffer, size, format, tmp);
-#endif
         va_end(tmp);
     }
    
+	if (n < 0 || (n+1) > size)
+        return -1;
+	
+    return n;
+}
 
-	if (n < 0 || n > size) return -1;
-	return n;
+int String_Printf(std::string& buffer, const char * format, ...) {
+
+    va_list args;
+    va_start(args, format);
+
+    int n = String_PrintfArgList(buffer, format, args);
+
+    va_end(args);
+
+    return n;
 }
 
 int String_Printf(char * buffer, int size, const char * format, ...) {
@@ -167,14 +222,13 @@ using StringPoolSet = std::unordered_set<const char*, CompareAndHandStrings, Com
 #define CastImpl(imp) (StringPoolSet*)imp
 
 StringPool::StringPool(Allocator * allocator) {
-    // allocator not used
+    // NOTE: allocator not used
     
     m_impl = new StringPoolSet();
 }
 StringPool::~StringPool() {
     auto* impl = CastImpl(m_impl);
     
-    // TODO: fix
     // delete the strings
     for (auto it : *impl) {
         const char* text = it;
diff --git a/hlslparser/src/Engine.h b/hlslparser/src/Engine.h
index d7ed1172..8d153d72 100755
--- a/hlslparser/src/Engine.h
+++ b/hlslparser/src/Engine.h
@@ -8,6 +8,9 @@
 #include <stdlib.h> // malloc
 #include <new> // for placement new
 
+// stl
+#include <string>
+
 #ifndef NULL
 #define NULL    0
 #endif
@@ -58,14 +61,20 @@ class Allocator {
 
 
-int String_Printf(char * buffer, int size, const char * format, ...) M4_PRINTF_ATTR(3, 4);
-int String_PrintfArgList(char * buffer, int size, const char * format, va_list args);
 int String_FormatFloat(char * buffer, int size, float value);
 bool String_Equal(const char * a, const char * b);
 bool String_EqualNoCase(const char * a, const char * b);
 double String_ToDouble(const char * str, char ** end);
 int String_ToInteger(const char * str, char ** end);
 bool String_HasChar(const char* str, char c);
+
+// just use these, it's way easier than using fixed buffers
+int String_PrintfArgList(std::string& buffer, const char * format, va_list args);
+int String_Printf(std::string& buffer, const char * format, ...) M4_PRINTF_ATTR(2, 3);
+
+// These 3 calls have truncation issues
+int String_Printf(char * buffer, int size, const char * format, ...) M4_PRINTF_ATTR(3, 4);
+int String_PrintfArgList(char * buffer, int size, const char * format, va_list args);
 void String_Copy(char* str, const char* b, uint32_t size);
 
 void String_StripTrailingFloatZeroes(char* buffer);
diff --git a/hlslparser/src/HLSLParser.cpp b/hlslparser/src/HLSLParser.cpp
index c6678d93..eae18ca2 100644
--- a/hlslparser/src/HLSLParser.cpp
+++ b/hlslparser/src/HLSLParser.cpp
@@ -824,9 +824,9 @@ void RegisterIntrinsics(const char* name, uint32_t numArgs, AllMask mask, HLSLBa
     
         bool skip[kNumTypes] = {};
         if (!TestBits(mask, AllFloat))
-           skip[0] = true;
+            skip[0] = true;
         if (!TestBits(mask, AllHalf))
-           skip[1] = true;
+            skip[1] = true;
         if (!TestBits(mask, AllDouble))
             skip[2] = true;
         
@@ -868,7 +868,6 @@ void RegisterIntrinsics(const char* name, uint32_t numArgs, AllMask mask, HLSLBa
         if (!TestBits(mask, AllBool))
             skip[6] = true;
         
-        
         for (uint32_t i = 0; i < kNumTypes; ++i)
         {
             if (skip[i]) continue;
@@ -889,7 +888,7 @@ void RegisterIntrinsics(const char* name, uint32_t numArgs, AllMask mask, HLSLBa
 bool InitIntrinsics()
 {
     // TODO: these arrays shouldn't need to be static, but getting corrupt strings
-    static const char* ops1[] = {
+    const char* ops1[] = {
         "abs",
         "acos", "asin", "atan",
         "cos", "sin", "tan",
@@ -900,13 +899,13 @@ bool InitIntrinsics()
         "isNan", "isInf", "sign",
     };
     
-    static const char* ops2[] = {
+    const char* ops2[] = {
         "atan2", "pow", // can't pow take sclar?
         "step", "reflect",
         "min", "max",
     };
     
-    static const char* ops3[] = {
+    const char* ops3[] = {
         "clamp", "lerp", "smoothstep",
         "min3", "max3",
     };

From 1918833344c62d8ef7bac4fee9d0291b743b04c3 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 18 Mar 2023 14:28:25 -0700
Subject: [PATCH 490/901] kram - hlslparser more intrinisics

---
 hlslparser/outshaders/ShaderHLSL.h |   1 +
 hlslparser/outshaders/ShaderMSL.h  |   1 +
 hlslparser/src/HLSLParser.cpp      | 123 +++++++++++++++++++++++------
 3 files changed, 99 insertions(+), 26 deletions(-)

diff --git a/hlslparser/outshaders/ShaderHLSL.h b/hlslparser/outshaders/ShaderHLSL.h
index 4f7bec23..2eb9c54e 100644
--- a/hlslparser/outshaders/ShaderHLSL.h
+++ b/hlslparser/outshaders/ShaderHLSL.h
@@ -139,6 +139,7 @@ char4_packed fromInt4(int4 v, bool clamp = true)
 #define min3(x,y,z) min(x, min(y, z))
 #define max3(x,y,z) max(x, max(y, z))
 #define length_squared(x) ((x)*(x))
+#define distance_squared(x,y) (((x)-(y))*((x)-(y)))
 
 // TODO: fix parsing, so don't have to provide these overrides
 // The parser also has to know about all these
diff --git a/hlslparser/outshaders/ShaderMSL.h b/hlslparser/outshaders/ShaderMSL.h
index 3fab8258..0d50d76c 100644
--- a/hlslparser/outshaders/ShaderMSL.h
+++ b/hlslparser/outshaders/ShaderMSL.h
@@ -124,6 +124,7 @@ half4 mul(half4x4 m, half4 a) { return m * a; }
 //half2 mul(half4 a, half2x4 m) { return a * m; }
 #endif
 
+// TODO: parser could replace these intrinsic names in metal
 #define lerp mix
 #define rcp recip
 #define ddx dfdx
diff --git a/hlslparser/src/HLSLParser.cpp b/hlslparser/src/HLSLParser.cpp
index eae18ca2..7ba569f9 100644
--- a/hlslparser/src/HLSLParser.cpp
+++ b/hlslparser/src/HLSLParser.cpp
@@ -887,51 +887,73 @@ void RegisterIntrinsics(const char* name, uint32_t numArgs, AllMask mask, HLSLBa
 
 bool InitIntrinsics()
 {
-    // TODO: these arrays shouldn't need to be static, but getting corrupt strings
-    const char* ops1[] = {
+    const char* kVecOps1[] = {
         "abs",
         "acos", "asin", "atan",
         "cos", "sin", "tan",
-        "floor", "ceil", "frac", "fmod",
-        "normalize", "saturate", "sqrt", "rcp", "exp", "exp2",
-        "log", "Log2",
-        "ddx", "ddy",
-        "isNan", "isInf", "sign",
+        "cosh", "sinh", "tanh",
+        "floor", "ceil", "frac", "fmod", "round", "trunc",
+        "normalize", "sqrt", "rsqrt", "rcp", "saturate", "sign",
+        "log", "log2", "log10",
+        "exp", "exp2",
+        "ddx", "ddy", // ps only
+        "isnan", "isinf", "isfinite",
+        "degrees", "radians" // emulated in MSL
     };
     
-    const char* ops2[] = {
-        "atan2", "pow", // can't pow take sclar?
-        "step", "reflect",
+    const char* kVecOps2[] = {
+        "atan2", "pow", // can't pow take scalar?
+        "step", "frexp",
         "min", "max",
     };
     
-    const char* ops3[] = {
-        "clamp", "lerp", "smoothstep",
+    const char* kVecOps3[] = {
+        "clamp", "lerp", // can clamp and lerp take a scalar for last args/arg?
+        "smoothstep", "fma",
         "min3", "max3",
     };
 
+    
+    // HLSL
+    // countbits(x)
+    // D3DCOLORtoUBYTE4(x) // does nasty bgra swizzle, so have to convert back
+    // ddx_coarse/fine, ddy_coarse/fine
+    // msad4
+    // printf, errorf
+    
+    // TODO: get atomics working
+    // these work on atomic_int/uint, then bool/ulong 2.4,
+    //   then sub/add on float in MSL 3.0.  How does HLSL declare atomic values?
+    // InterlockedAdd(x,y, out orig value), And, Xor, Or, Min, Max, Exchange, CompareExchange
+    
+    // no "exp10" in HLSL, but is in MSL
+    
+    // both
+    // "refract"
+    
     // MSL constructs, may be in HLSL
-    // "distance"
     // "distance_squared"
-    // "refract"
-    // "deteriminant"
-    // "median3" - takes 3 args
-    // "select"
+    // "median3(x,y,z)"
+    // "select(x,y,z)"
+    // "clz", "ctz", count leading trailing zeros
+    // "popcount" count zeroes (opposite of countbits?)
+    // addsat, subsat,
+    // absdiff, hadd(x,y),
     
-    for (uint32_t i = 0, iEnd = ArrayCount(ops1); i < iEnd; ++i)
+    for (uint32_t i = 0, iEnd = ArrayCount(kVecOps1); i < iEnd; ++i)
     {
-        RegisterIntrinsics( ops1[i], 1, AllFloats | AllVecs );
+        RegisterIntrinsics( kVecOps1[i], 1, AllFloats | AllVecs );
     }
-    for (uint32_t i = 0, iEnd = ArrayCount(ops2); i < iEnd; ++i)
+    for (uint32_t i = 0, iEnd = ArrayCount(kVecOps2); i < iEnd; ++i)
     {
-        RegisterIntrinsics( ops2[i], 2, AllFloats | AllVecs );
+        RegisterIntrinsics( kVecOps2[i], 2, AllFloats | AllVecs );
     }
-    for (uint32_t i = 0, iEnd = ArrayCount(ops3); i < iEnd; ++i)
+    for (uint32_t i = 0, iEnd = ArrayCount(kVecOps3); i < iEnd; ++i)
     {
-        RegisterIntrinsics( ops3[i], 3, AllFloats | AllVecs );
+        RegisterIntrinsics( kVecOps3[i], 3, AllFloats | AllVecs );
     }
     
-    RegisterIntrinsics("sincos", 2, AllFloats | AllVecs, HLSLBaseType_Void);
+    RegisterIntrinsics( "sincos", 2, AllFloats | AllVecs, HLSLBaseType_Void);
 
     RegisterIntrinsics( "mad", 3, AllFloats | AllVecs);
    
@@ -948,7 +970,15 @@ bool InitIntrinsics()
     AddIntrinsic( "cross", HLSLBaseType_Float3,  HLSLBaseType_Float3,  HLSLBaseType_Float3 );
     AddIntrinsic( "cross", HLSLBaseType_Half3,   HLSLBaseType_Half3,   HLSLBaseType_Half3 );
     AddIntrinsic( "cross", HLSLBaseType_Double3, HLSLBaseType_Double3, HLSLBaseType_Double3 );
-
+    
+    AddIntrinsic( "reflect", HLSLBaseType_Float3,  HLSLBaseType_Float3,  HLSLBaseType_Float3 );
+    AddIntrinsic( "reflect", HLSLBaseType_Half3,   HLSLBaseType_Half3,   HLSLBaseType_Half3 );
+    AddIntrinsic( "reflect", HLSLBaseType_Double3, HLSLBaseType_Double3, HLSLBaseType_Double3 );
+    
+    AddIntrinsic( "refract", HLSLBaseType_Float3,  HLSLBaseType_Float3,  HLSLBaseType_Float3, HLSLBaseType_Float );
+    AddIntrinsic( "refract", HLSLBaseType_Half3,   HLSLBaseType_Half3,   HLSLBaseType_Half3, HLSLBaseType_Half );
+    AddIntrinsic( "refract", HLSLBaseType_Double3, HLSLBaseType_Double3, HLSLBaseType_Double3, HLSLBaseType_Double );
+    
     RegisterIntrinsics( "length", 1, AllHalf | AllVecs, HLSLBaseType_Half);
     RegisterIntrinsics( "length", 1, AllFloat | AllVecs, HLSLBaseType_Float);
     RegisterIntrinsics( "length", 1, AllDouble | AllVecs, HLSLBaseType_Double);
@@ -958,6 +988,17 @@ bool InitIntrinsics()
     RegisterIntrinsics( "length_squared", 1, AllFloat | AllVecs, HLSLBaseType_Float);
     RegisterIntrinsics( "length_squared", 1, AllDouble | AllVecs, HLSLBaseType_Double);
 
+    RegisterIntrinsics( "distance", 1, AllHalf | AllVecs, HLSLBaseType_Half);
+    RegisterIntrinsics( "distance", 1, AllFloat | AllVecs, HLSLBaseType_Float);
+    RegisterIntrinsics( "distance", 1, AllDouble | AllVecs, HLSLBaseType_Double);
+
+    RegisterIntrinsics( "distance_squared", 1, AllHalf | AllVecs, HLSLBaseType_Half);
+    RegisterIntrinsics( "distance_squared", 1, AllFloat | AllVecs, HLSLBaseType_Float);
+    RegisterIntrinsics( "distance_squared", 1, AllDouble | AllVecs, HLSLBaseType_Double);
+
+    // ps only
+    AddIntrinsic( "fwidth", HLSLBaseType_Float, HLSLBaseType_Float2, HLSLBaseType_Float2 );
+   
     // scalar/vec ops
     RegisterIntrinsics( "mul", 2, AllFloat | AllVecs | AllMats );
     
@@ -1011,13 +1052,43 @@ bool InitIntrinsics()
     AddIntrinsic( "mul", HLSLBaseType_Double4, HLSLBaseType_Double4x4, HLSLBaseType_Double4 );
     
     // matrix transpose
-    RegisterIntrinsics("transpose", 2, AllFloats | AllMats);
+    RegisterIntrinsics("transpose", 1, AllFloats | AllMats);
     
+    // determinant needs to return scalar for all 9 mat types
+    AddIntrinsic("determinant", HLSLBaseType_Float,  HLSLBaseType_Float2x2);
+    AddIntrinsic("determinant", HLSLBaseType_Float,  HLSLBaseType_Float3x3);
+    AddIntrinsic("determinant", HLSLBaseType_Float,  HLSLBaseType_Float4x4);
+    AddIntrinsic("determinant", HLSLBaseType_Half,   HLSLBaseType_Half2x2);
+    AddIntrinsic("determinant", HLSLBaseType_Half,   HLSLBaseType_Half3x3);
+    AddIntrinsic("determinant", HLSLBaseType_Half,   HLSLBaseType_Half4x4);
+    AddIntrinsic("determinant", HLSLBaseType_Double, HLSLBaseType_Double2x2);
+    AddIntrinsic("determinant", HLSLBaseType_Double, HLSLBaseType_Double3x3);
+    AddIntrinsic("determinant", HLSLBaseType_Double, HLSLBaseType_Double4x4);
+   
     // TODO: more conversions fp16, double, etc.
     AddIntrinsic("asuint", HLSLBaseType_Uint, HLSLBaseType_Float);
     AddIntrinsic("asuint", HLSLBaseType_Uint, HLSLBaseType_Double);
     AddIntrinsic("asuint", HLSLBaseType_Uint, HLSLBaseType_Half);
 
+    AddIntrinsic("asfloat", HLSLBaseType_Float, HLSLBaseType_Int );
+    
+    // AddIntrinsic("f16tof32", HLSLBaseType_Float, HLSLBaseType_Uint ); // lower 16-bits
+    // AddIntrinsic("f32tof16", HLSLBaseType_Uint, HLSLBaseType_Float );
+    
+    // "faceforward" Returns -n * sign(dot(i, ng)).
+    
+    // firstbithigh, firstbitlow
+
+    AddIntrinsic("asint", HLSLBaseType_Uint, HLSLBaseType_Float);
+    
+    // low/hi uint
+    AddIntrinsic("asdouble", HLSLBaseType_Double, HLSLBaseType_Uint, HLSLBaseType_Uint);
+   
+    // one for 64-bit too (low/hi uint)
+    AddIntrinsic("asuint", HLSLBaseType_Ulong, HLSLBaseType_Uint, HLSLBaseType_Uint);
+    AddIntrinsic("asuint", HLSLBaseType_Uint, HLSLBaseType_Float);
+   
+    
     
     // TODO: split off sampler intrinsics from math above
     //------------------------

From 88d870ec29ca83847f3230e92dd075b43bffcb02 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 18 Mar 2023 18:47:57 -0700
Subject: [PATCH 491/901] kram - hlslparser update

Some intrinscis apply to int as well.   Try to round out the list so that most ops are present for HLSL/MSL code.
---
 hlslparser/outshaders/ShaderHLSL.h | 12 ++++---
 hlslparser/outshaders/ShaderMSL.h  | 39 ++++++++++++--------
 hlslparser/src/HLSLParser.cpp      | 58 ++++++++++++++++++++++--------
 3 files changed, 75 insertions(+), 34 deletions(-)

diff --git a/hlslparser/outshaders/ShaderHLSL.h b/hlslparser/outshaders/ShaderHLSL.h
index 2eb9c54e..1d7894ef 100644
--- a/hlslparser/outshaders/ShaderHLSL.h
+++ b/hlslparser/outshaders/ShaderHLSL.h
@@ -7,6 +7,12 @@
 #pragma once
 #endif
 
+// Don't know why HLSL doesn't support these
+#define min3(x,y,z) min(x, min(y, z))
+#define max3(x,y,z) max(x, max(y, z))
+#define length_squared(x) ((x)*(x))
+#define distance_squared(x,y) (((x)-(y))*((x)-(y)))
+
 // no &* or ctors in HLSL limited C++
 // This means operators cannot overload [+-*/>><<]=.  Only builtins work.
 
@@ -135,11 +141,7 @@ char4_packed fromInt4(int4 v, bool clamp = true)
 
 #define USE_HALF 1
 
-// Don't know why HLSL doesn't support these
-#define min3(x,y,z) min(x, min(y, z))
-#define max3(x,y,z) max(x, max(y, z))
-#define length_squared(x) ((x)*(x))
-#define distance_squared(x,y) (((x)-(y))*((x)-(y)))
+
 
 // TODO: fix parsing, so don't have to provide these overrides
 // The parser also has to know about all these
diff --git a/hlslparser/outshaders/ShaderMSL.h b/hlslparser/outshaders/ShaderMSL.h
index 0d50d76c..d45827d5 100644
--- a/hlslparser/outshaders/ShaderMSL.h
+++ b/hlslparser/outshaders/ShaderMSL.h
@@ -72,21 +72,18 @@ float4 mad(float4 a, float4 b, float4 c) {
     return a * b + c;
 }
 
+// DirectX couldn't simply use operator * in all these years
+// so have to use a function call mul.
+
 // Might be easier to use * instead
 float2x2 mul(float a, float2x2 m) { return a * m; }
 float3x3 mul(float a, float3x3 m) { return a * m; }
 float4x4 mul(float a, float4x4 m) { return a * m; }
+
 float2x2 mul(float2x2 m, float a) { return a * m; }
 float3x3 mul(float3x3 m, float a) { return a * m; }
 float4x4 mul(float4x4 m, float a) { return a * m; }
 
-half2x2 mul(half a, half2x2 m) { return a * m; }
-half3x3 mul(half a, half3x3 m) { return a * m; }
-half4x4 mul(half a, half4x4 m) { return a * m; }
-half2x2 mul(half2x2 m, half a) { return a * m; }
-half3x3 mul(half3x3 m, half a) { return a * m; }
-half4x4 mul(half4x4 m, half a) { return a * m; }
-
 float2 mul(float2 a, float2x2 m) { return a * m; }
 float3 mul(float3 a, float3x3 m) { return a * m; }
 float4 mul(float4 a, float4x4 m) { return a * m; }
@@ -99,6 +96,7 @@ float4 mul(float4x4 m, float4 a) { return m * a; }
 //float2 mul(float4 a, float2x4 m) { return a * m; }
 
 #if USE_HALF
+
 half mad(half a, half b, half c) {
     return a * b + c;
 }
@@ -112,6 +110,14 @@ half4 mad(half4 a, half4 b, half4 c) {
     return a * b + c;
 }
 
+half2x2 mul(half a, half2x2 m) { return a * m; }
+half3x3 mul(half a, half3x3 m) { return a * m; }
+half4x4 mul(half a, half4x4 m) { return a * m; }
+
+half2x2 mul(half2x2 m, half a) { return a * m; }
+half3x3 mul(half3x3 m, half a) { return a * m; }
+half4x4 mul(half4x4 m, half a) { return a * m; }
+
 half2 mul(half2 a, half2x2 m) { return a * m; }
 half3 mul(half3 a, half3x3 m) { return a * m; }
 half4 mul(half4 a, half4x4 m) { return a * m; }
@@ -120,8 +126,6 @@ half2 mul(half2x2 m, half2 a) { return m * a; }
 half3 mul(half3x3 m, half3 a) { return m * a; }
 half4 mul(half4x4 m, half4 a) { return m * a; }
 
-//half3 mul(half4 a, half3x4 m) { return a * m; } // why no macro ?
-//half2 mul(half4 a, half2x4 m) { return a * m; }
 #endif
 
 // TODO: parser could replace these intrinsic names in metal
@@ -131,10 +135,15 @@ half4 mul(half4x4 m, half4 a) { return m * a; }
 #define ddy dfdy
 #define frac fract
 #define isinfinite isinf
+#define degrees(x) ((x) / (M_PI/180.0))
+#define radians(x) ((x) * (M_PI/180.0))
 
-void clip(float x) {
-    if (x < 0.0) discard_fragment();
-}
+// bit ops
+#define countbits(x) popcount(x)
+#define firstbithigh(x) clz(x)
+#define firstbitlow(x) ctz(x)
+ 
+#define clip(x) if (all((x) < 0.0) discard_fragment()
 
 
@@ -181,9 +190,9 @@ half4 SampleBiasH(texture2d<half> t, sampler s, float4 texCoordBias) {
 
 #else
 
-#define tex2DH tex2D
-#define tex2DHlod tex2Dlod
-#define tex2DHbias tex2Dbias
+#define SampleH Sample
+#define SampleLevelH SampleLevel
+#define SampleBiasH SampleBias
 
 #endif
 
diff --git a/hlslparser/src/HLSLParser.cpp b/hlslparser/src/HLSLParser.cpp
index 7ba569f9..6d579271 100644
--- a/hlslparser/src/HLSLParser.cpp
+++ b/hlslparser/src/HLSLParser.cpp
@@ -888,7 +888,6 @@ void RegisterIntrinsics(const char* name, uint32_t numArgs, AllMask mask, HLSLBa
 bool InitIntrinsics()
 {
     const char* kVecOps1[] = {
-        "abs",
         "acos", "asin", "atan",
         "cos", "sin", "tan",
         "cosh", "sinh", "tanh",
@@ -901,22 +900,39 @@ bool InitIntrinsics()
         "degrees", "radians" // emulated in MSL
     };
     
+    // apply to float/int
+    const char* kVecOps1All[] = {
+        "abs",
+    };
+    
     const char* kVecOps2[] = {
         "atan2", "pow", // can't pow take scalar?
         "step", "frexp",
+    };
+    
+    // apply to float/int
+    const char* kVecOps2All[] = {
         "min", "max",
     };
     
     const char* kVecOps3[] = {
-        "clamp", "lerp", // can clamp and lerp take a scalar for last args/arg?
+        "lerp", // can clamp and lerp take a scalar for last args/arg?
         "smoothstep", "fma",
-        "min3", "max3",
     };
 
+    // apply to float/int
+    const char* kVecOps3All[] = {
+        "clamp",
+        "min3", "max3",
+    };
     
     // HLSL
-    // countbits(x)
+    
+    // not going to support due to swizzle, just have similar routine for half
     // D3DCOLORtoUBYTE4(x) // does nasty bgra swizzle, so have to convert back
+    //   r0.xyzw = float4(255.001953,255.001953,255.001953,255.001953) * r0.zyxw;
+    //   ro0.xyzw = (int4)r0.xyzw;
+    
     // ddx_coarse/fine, ddy_coarse/fine
     // msad4
     // printf, errorf
@@ -927,31 +943,45 @@ bool InitIntrinsics()
     // InterlockedAdd(x,y, out orig value), And, Xor, Or, Min, Max, Exchange, CompareExchange
     
     // no "exp10" in HLSL, but is in MSL
-    
-    // both
-    // "refract"
-    
+
     // MSL constructs, may be in HLSL
-    // "distance_squared"
     // "median3(x,y,z)"
     // "select(x,y,z)"
-    // "clz", "ctz", count leading trailing zeros
-    // "popcount" count zeroes (opposite of countbits?)
     // addsat, subsat,
     // absdiff, hadd(x,y),
     
+    AllMask mask = AllFloats | AllVecs;
     for (uint32_t i = 0, iEnd = ArrayCount(kVecOps1); i < iEnd; ++i)
     {
-        RegisterIntrinsics( kVecOps1[i], 1, AllFloats | AllVecs );
+        RegisterIntrinsics( kVecOps1[i], 1, mask );
     }
     for (uint32_t i = 0, iEnd = ArrayCount(kVecOps2); i < iEnd; ++i)
     {
-        RegisterIntrinsics( kVecOps2[i], 2, AllFloats | AllVecs );
+        RegisterIntrinsics( kVecOps2[i], 2, mask );
     }
     for (uint32_t i = 0, iEnd = ArrayCount(kVecOps3); i < iEnd; ++i)
     {
-        RegisterIntrinsics( kVecOps3[i], 3, AllFloats | AllVecs );
+        RegisterIntrinsics( kVecOps3[i], 3, mask );
+    }
+    
+    mask = AllFloats | AllInts | AllVecs;
+    for (uint32_t i = 0, iEnd = ArrayCount(kVecOps1All); i < iEnd; ++i)
+    {
+        RegisterIntrinsics( kVecOps1All[i], 1, mask );
     }
+    for (uint32_t i = 0, iEnd = ArrayCount(kVecOps2All); i < iEnd; ++i)
+    {
+        RegisterIntrinsics( kVecOps2All[i], 2, mask );
+    }
+    for (uint32_t i = 0, iEnd = ArrayCount(kVecOps3All); i < iEnd; ++i)
+    {
+        RegisterIntrinsics( kVecOps3All[i], 3, mask );
+    }
+    
+    // bit counting
+    RegisterIntrinsics( "countbits", 1, AllInts | AllVecs); // popcount in MSL
+    RegisterIntrinsics( "firstbithigh", 1, AllInts | AllVecs); // clz in MSL
+    RegisterIntrinsics( "firstbitlow", 1, AllInts | AllVecs); // ctz in MSL
     
     RegisterIntrinsics( "sincos", 2, AllFloats | AllVecs, HLSLBaseType_Void);
 

From 5876d3c100d147dd9b46b56d15b2b4c7846efbea Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 19 Mar 2023 11:01:10 -0700
Subject: [PATCH 492/901] kram - hlalparser update

---
 hlslparser/outshaders/ShaderHLSL.h |  56 ++++++++++++--
 hlslparser/outshaders/ShaderMSL.h  |  46 ++++++++++-
 hlslparser/src/HLSLParser.cpp      | 119 ++++++++++++++++++++++++-----
 3 files changed, 192 insertions(+), 29 deletions(-)

diff --git a/hlslparser/outshaders/ShaderHLSL.h b/hlslparser/outshaders/ShaderHLSL.h
index 1d7894ef..895de0a9 100644
--- a/hlslparser/outshaders/ShaderHLSL.h
+++ b/hlslparser/outshaders/ShaderHLSL.h
@@ -141,10 +141,8 @@ char4_packed fromInt4(int4 v, bool clamp = true)
 
 #define USE_HALF 1
 
-
-
 // TODO: fix parsing, so don't have to provide these overrides
-// The parser also has to know about all these
+// The parser also has to rewrite params on MSL and wrap args.
 
 //----------
 
@@ -177,6 +175,7 @@ float4 Sample(TextureCubeArray<float4> t, SamplerState s, float4 texCoord)
 
 //----------
 
+// Can use these inside vertex shader
 float4 SampleLevel(Texture2D<float4> t, SamplerState s, float4 texCoord, int2 offset = 0)
 {
     return t.SampleLevel(s, texCoord.xy, texCoord.w, offset);
@@ -252,10 +251,46 @@ float4 GatherCmp(Texture2D<float4> t, SamplerComparisonState s, float4 texCoord,
 
 //----------
 
-// this doesn't use SamplerState, raw load
-float4 Load(Texture2DMS<float4> t, int2 texCoord, int sample)
+// Use these in VS.  Why doesn't bilinear work in VS?
+// TextureLevel should work in VS, since lod is specific.
+
+
+// can also use this stype
+// uint2 pos_xy = uint2( 0, 10 );
+// texelColor = tex0[ pos_xy ] ;
+
+
+// TODO: these also take offsets
+float4 Load(Texture2D<float4> t, int2 texCoord, int lod = 0, int2 offset = 0)
+{
+    return t.Load(texCoord, lod, offset);
+}
+
+float4 Load(Texture3D<float4> t, int3 texCoord, int lod = 0, int3 offset = 0)
 {
-    return t.Load(texCoord, sample);
+    return t.Load(texCoord, lod, offset);
+}
+
+float4 Load(Texture2DArray<float4> t, int3 texCoord, int lod = 0, int2 offset = 0)
+{
+    return t.Load(texCoord, lod, offset);
+}
+
+// no support in HLSL
+//float4 Load(TextureCube<float4> t, int3 texCoord)
+//{
+//    return t.Load(texCoord);
+//}
+//
+//float4 Load(TextureCubeArray<float4> t, int4 texCoord)
+//{
+//    return t.Load(texCoord);
+//}
+
+// this doesn't use SamplerState, raw load, not sampleIndex not lod
+float4 Load(Texture2DMS<float4> t, int2 texCoord, int sample, int2 offset = 0)
+{
+    return t.Load(texCoord, sample, offset);
 }
 
 //----------
@@ -311,6 +346,7 @@ half4 SampleBiasH(Texture2D<half> t, SamplerState s, float4 texCoordBias)
 
 #endif
 
+// TODO: these should be types, but by leaving off type, they apply to all types.
 int2 GetDimensions(Texture2D t)
 {
     int2 size;
@@ -346,5 +382,13 @@ int3 GetDimensions(Texture2DArray t)
     return size;
 }
 
+int2 GetDimensions(Texture2DMS t)
+{
+    int2 size;
+    t.GetDimensions(size.x, size.y);
+    return size;
+}
+
+
 #endif // ShaderHLSL_h
     
diff --git a/hlslparser/outshaders/ShaderMSL.h b/hlslparser/outshaders/ShaderMSL.h
index d45827d5..f126f4d6 100644
--- a/hlslparser/outshaders/ShaderMSL.h
+++ b/hlslparser/outshaders/ShaderMSL.h
@@ -137,6 +137,7 @@ half4 mul(half4x4 m, half4 a) { return m * a; }
 #define isinfinite isinf
 #define degrees(x) ((x) / (M_PI/180.0))
 #define radians(x) ((x) * (M_PI/180.0))
+#define reversebits(x) reverse_bits(x))
 
 // bit ops
 #define countbits(x) popcount(x)
@@ -228,10 +229,43 @@ float4 SampleBias(texture2d<float> t, sampler s, float4 texCoordBias) {
     return t.sample(s, texCoordBias.xy, bias(texCoordBias.w));
 }
 
+//------
+
+// see if some of these have offset
+float4 Load(texture2d<float> t, int2 texCoord, int lod = 0)
+{
+    return t.read((uint2)texCoord, (uint)lod);
+}
+
+float4 Load(texture3d<float> t, int3 texCoord, int lod = 0)
+{
+    return t.read((uint3)texCoord, (uint)lod);
+}
+
+float4 Load(texture2d_array<float> t, int3 texCoord, int lod = 0)
+{
+    return t.read((uint2)texCoord.xy, (uint)texCoord.z, (uint)lod);
+}
+
+// no HLSL equivalent, so don't define for MSL.  Maybe it's just offset that doesn't.
+//float4 Load(texturecube<float> t, int3 texCoord, int lod = 0)
+//{
+//    uv, face, lod, offset
+//    return t.read((uint2)texCoord.xy, (uint)texCoord.z, (uint2)lod);
+//}
+//
+//float4 Load(texturecube_array<float> t, int4 texCoord, int lod = 0)
+//{
+//    return t.read((uint2)texCoord.xy, (uint)texCoord.z, (uint)texcoord.w, (uint)lod);
+//}
+
+// this doesn't use SamplerState, raw load
 float4 Load(texture2d_ms<float> t, int2 texCoord, int sample) {
     return t.read((uint2)texCoord, (uint)sample);
 }
 
+// also write call (Store in HLSL)
+
 // ----
 
 float4 Sample(texture2d_array<float> t, sampler s, float3 texCoord, int2 offset=0) {
@@ -296,13 +330,19 @@ int2 GetDimensions(texturecube<float> t)
     return size;
 }
 
-int2 GetDimensions(texturecube_array<float> t)
+int3 GetDimensions(texturecube_array<float> t)
 {
-    int2 size(t.get_width(), t.get_width());
+    int3 size(t.get_width(), t.get_width(), t.get_array_size());
+    return size;
+}
+
+int3 GetDimensions(texture2d_array<float> t)
+{
+    int3 size(t.get_width(), t.get_height(), t.get_array_size());
     return size;
 }
 
-int2 GetDimensions(texture2d_array<float> t)
+int2 GetDimensions(texture2d_ms<float> t)
 {
     int2 size(t.get_width(), t.get_height());
     return size;
diff --git a/hlslparser/src/HLSLParser.cpp b/hlslparser/src/HLSLParser.cpp
index 6d579271..42290444 100644
--- a/hlslparser/src/HLSLParser.cpp
+++ b/hlslparser/src/HLSLParser.cpp
@@ -887,6 +887,11 @@ void RegisterIntrinsics(const char* name, uint32_t numArgs, AllMask mask, HLSLBa
 
 bool InitIntrinsics()
 {
+    // Note that none of these need to be in alphabetical order
+    // since an unordered map is used for lookup.  But do need
+    // all intrinsics of the same name to be defined together in
+    // a single range.
+    
     const char* kVecOps1[] = {
         "acos", "asin", "atan",
         "cos", "sin", "tan",
@@ -926,29 +931,32 @@ bool InitIntrinsics()
         "min3", "max3",
     };
     
-    // HLSL
-    
+    // HLSL intrinsics
+    //
     // not going to support due to swizzle, just have similar routine for half
     // D3DCOLORtoUBYTE4(x) // does nasty bgra swizzle, so have to convert back
     //   r0.xyzw = float4(255.001953,255.001953,255.001953,255.001953) * r0.zyxw;
     //   ro0.xyzw = (int4)r0.xyzw;
-    
+    //
+    // there's already toint/tuint, but those don't normalize
+    //
     // ddx_coarse/fine, ddy_coarse/fine
     // msad4
     // printf, errorf
-    
-    // TODO: get atomics working
-    // these work on atomic_int/uint, then bool/ulong 2.4,
-    //   then sub/add on float in MSL 3.0.  How does HLSL declare atomic values?
-    // InterlockedAdd(x,y, out orig value), And, Xor, Or, Min, Max, Exchange, CompareExchange
-    
+    //
+    // faceforward = -n * sign(dot(i, ng))
     // no "exp10" in HLSL, but is in MSL
-
-    // MSL constructs, may be in HLSL
-    // "median3(x,y,z)"
-    // "select(x,y,z)"
-    // addsat, subsat,
+    //---------------------
+    // MSL intrinsics
+    // median3(x,y,z)
+    // select(x,y,z)
+    // addsat, subsat, rotate,
     // absdiff, hadd(x,y),
+    // is_null_texture(tex)
+    // tex.fence()
+    
+
+    
     
     AllMask mask = AllFloats | AllVecs;
     for (uint32_t i = 0, iEnd = ArrayCount(kVecOps1); i < iEnd; ++i)
@@ -982,7 +990,8 @@ bool InitIntrinsics()
     RegisterIntrinsics( "countbits", 1, AllInts | AllVecs); // popcount in MSL
     RegisterIntrinsics( "firstbithigh", 1, AllInts | AllVecs); // clz in MSL
     RegisterIntrinsics( "firstbitlow", 1, AllInts | AllVecs); // ctz in MSL
-    
+    RegisterIntrinsics( "reversebits", 1, AllInts | AllVecs); // ctz in MSL
+   
     RegisterIntrinsics( "sincos", 2, AllFloats | AllVecs, HLSLBaseType_Void);
 
     RegisterIntrinsics( "mad", 3, AllFloats | AllVecs);
@@ -1118,9 +1127,71 @@ bool InitIntrinsics()
     AddIntrinsic("asuint", HLSLBaseType_Ulong, HLSLBaseType_Uint, HLSLBaseType_Uint);
     AddIntrinsic("asuint", HLSLBaseType_Uint, HLSLBaseType_Float);
    
+#if 0
+    // TODO: get atomics working
+    // these work on atomic_int/uint, then bool/ulong 2.4,
+    //   then sub/add on float in MSL 3.0.  How does HLSL declare atomic values?
+
+    // How to designate atomics?  These have atomic_u/int type in MSL.
+    // Metal just uses atomic<int>, atomic<uint>, ...
+
+    #include <metal_atomic>
+    // memory_order_relaxed is only value to pass
+    atomic_fetch_add_explicit(output, val, memory_order_relaxed);
+    atomic_fetch_sub_explicit(output, val, memory_order_relaxed);
+    atomic_fetch_min_explicit(output, val, memory_order_relaxed);
+    atomic_fetch_max_explicit(output, val, memory_order_relaxed);
+    atomic_fetch_and_explicit(output, val, memory_order_relaxed);
+    atomic_fetch_or_explicit(output,  val, memory_order_relaxed);
+    atomic_fetch_xor_explicit(output, val, memory_order_relaxed);
+   
+    bool atomic_compare_exchange_weak_explicit(device A* object,
+     C *expected, C desired, memory_order success,
+     memory_order failure)
     
+    void atomic_store_explicit(device A* object, C desired,
+     memory_order order)
+    
+    // Here's how to emulate in MSL
+    void InterlockedAdd(device atomic<uint>* dst, uint val, out uint original )
+    {
+        original = atomic_fetch_add_explicit(dst, val, memory_order_relaxed);
+    }
+    
+    AddIntrisic("InterlockedAdd", HLSLBaseType_Void, HLSLBaseType_AtomicInt, HLSLBaseType_Int, HLSLBaseType_Int);
+    AddIntrisic("InterlockedAdd", HLSLBaseType_Void, HLSLBaseType_AtomicUint, HLSLBaseType_Uint, HLSLBaseType_Uint);
+    
+    AddIntrisic("InterlockedSub", HLSLBaseType_Void, HLSLBaseType_AtomicInt, HLSLBaseType_Int, HLSLBaseType_Int);
+    AddIntrisic("InterlockedSub", HLSLBaseType_Void, HLSLBaseType_AtomicUint, HLSLBaseType_Uint, HLSLBaseType_Uint);
+    
+    AddIntrisic("InterlockedMin", HLSLBaseType_Void, HLSLBaseType_AtomicInt, HLSLBaseType_Int, HLSLBaseType_Int);
+    AddIntrisic("InterlockedMin", HLSLBaseType_Void, HLSLBaseType_AtomicUint, HLSLBaseType_Uint, HLSLBaseType_Uint);
+    
+    AddIntrisic("InterlockedMax", HLSLBaseType_Void, HLSLBaseType_AtomicInt, HLSLBaseType_Int, HLSLBaseType_Int);
+    AddIntrisic("InterlockedMax", HLSLBaseType_Void, HLSLBaseType_AtomicUint, HLSLBaseType_Uint, HLSLBaseType_Uint);
+    
+    AddIntrisic("InterlockedAnd", HLSLBaseType_Void, HLSLBaseType_AtomicInt, HLSLBaseType_Int, HLSLBaseType_Int);
+    AddIntrisic("InterlockedAnd", HLSLBaseType_Void, HLSLBaseType_AtomicUint, HLSLBaseType_Uint, HLSLBaseType_Uint);
+    
+    AddIntrisic("InterlockedOr", HLSLBaseType_Void, HLSLBaseType_AtomicInt, HLSLBaseType_Int, HLSLBaseType_Int);
+    AddIntrisic("InterlockedOr", HLSLBaseType_Void, HLSLBaseType_AtomicUint, HLSLBaseType_Uint, HLSLBaseType_Uint);
+    
+    AddIntrisic("InterlockedXor", HLSLBaseType_Void, HLSLBaseType_AtomicInt, HLSLBaseType_Int, HLSLBaseType_Int);
+    AddIntrisic("InterlockedXor", HLSLBaseType_Void, HLSLBaseType_AtomicUint, HLSLBaseType_Uint, HLSLBaseType_Uint);
+    
+    // compare dst + compareValue, and store 3rd if same, nothing returned
+    InterlockCompareStore(HLSLBaseType_Void, HLSLBaseType_AtomicInt, HLSLBaseType_Int, HLSLBaseType_Int)
+    
+    // extra param here, last value is always original value
+    // compare dst + compareValue, store 3rd if same, return original dst
+    InterlockedCompareExchage(HLSLBaseType_Void, HLSLBaseType_AtomicInt, HLSLBaseType_Int, HLSLBaseType_Int, HLSLBaseType_Int)
+
+#endif
+
     
     // TODO: split off sampler intrinsics from math above
+    // these need to be member functions and have default arg value support
+    
     //------------------------
     
     TEXTURE_INTRINSIC_FUNCTION("Sample", HLSLBaseType_Texture2D,  HLSLBaseType_Float2);
@@ -1178,10 +1249,6 @@ bool InitIntrinsics()
     TEXTURE_INTRINSIC_FUNCTION_H("SampleBias", HLSLBaseType_TextureCube, HLSLBaseType_Float4);
     
     
-    //TEXTURE_INTRINSIC_FUNCTION("SampleGrad", HLSLBaseType_Texture3D, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2);
-    //TEXTURE_INTRINSIC_FUNCTION("SampleGrad", HLSLBaseType_Texture3D, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2);
-    //TEXTURE_INTRINSIC_FUNCTION("SampleGrad", HLSLBaseType_Texture3D, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2);
-    
     // TODO: for 2D tex (int2 offset is optional, how to indicate that?)
     TEXTURE_INTRINSIC_FUNCTION("GatherRed", HLSLBaseType_Texture2D,  HLSLBaseType_Float2);
     TEXTURE_INTRINSIC_FUNCTION("GatherGreen", HLSLBaseType_Texture2D,  HLSLBaseType_Float2);
@@ -1193,7 +1260,19 @@ bool InitIntrinsics()
     TEXTURE_INTRINSIC_FUNCTION_H("GatherBlue", HLSLBaseType_Texture2D,  HLSLBaseType_Float2);
     TEXTURE_INTRINSIC_FUNCTION_H("GatherAlpha", HLSLBaseType_Texture2D,  HLSLBaseType_Float2);
     
-    // TODO: GetDimensions
+    // TODO: add more types cube/3d takes gradient3d in MSL
+    // The Intrinsic ctor would need to have 5 args instead 4
+    // first move to member functions, then add this with 4 args
+    // AddTextureIntrinsic( "SampleGrad", HLSLBaseType_Float4, HLSLBaseType_Texture2D, HLSLBaseType_Float, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2);
+    
+    // TODO: aren't these uint instead of int?
+    TEXTURE_INTRINSIC_FUNCTION("GetDimensions", HLSLBaseType_Texture2D, HLSLBaseType_Int2);
+    TEXTURE_INTRINSIC_FUNCTION("GetDimensions", HLSLBaseType_Texture3D, HLSLBaseType_Int3);
+    TEXTURE_INTRINSIC_FUNCTION("GetDimensions", HLSLBaseType_Texture2DArray, HLSLBaseType_Int3);
+    TEXTURE_INTRINSIC_FUNCTION("GetDimensions", HLSLBaseType_TextureCube, HLSLBaseType_Int3);
+    TEXTURE_INTRINSIC_FUNCTION("GetDimensions", HLSLBaseType_TextureCubeArray, HLSLBaseType_Int3);
+    TEXTURE_INTRINSIC_FUNCTION("GetDimensions", HLSLBaseType_Texture2DMS, HLSLBaseType_Int2);
+    
     return true;
 };
 

From 1f97711df0e26ac9be2f71b7d7e5c1f0af1e56cd Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 19 Mar 2023 13:19:07 -0700
Subject: [PATCH 493/901] kram - hlslparser update

Add more cli options control.  -nohalf and -line.  These options were not hooked up to anything in original sources.  nohalf may not totally work everywhere.
---
 hlslparser/outshaders/ShaderHLSL.h | 25 ++++++++++++-----
 hlslparser/outshaders/ShaderMSL.h  | 41 ++++++++++++++++++++++------
 hlslparser/shaders/Skinning.hlsl   |  7 ++++-
 hlslparser/src/CodeWriter.cpp      | 27 +++++++-----------
 hlslparser/src/CodeWriter.h        |  7 +++--
 hlslparser/src/HLSLGenerator.cpp   |  7 +++--
 hlslparser/src/HLSLGenerator.h     | 21 +++++++++++++-
 hlslparser/src/HLSLParser.cpp      | 35 +++++++++++++++++++-----
 hlslparser/src/HLSLTree.h          | 15 ++++++++++
 hlslparser/src/MSLGenerator.cpp    | 25 +++++++++--------
 hlslparser/src/MSLGenerator.h      | 44 +++++++++---------------------
 hlslparser/src/Main.cpp            | 27 +++++++++++++++---
 12 files changed, 188 insertions(+), 93 deletions(-)

diff --git a/hlslparser/outshaders/ShaderHLSL.h b/hlslparser/outshaders/ShaderHLSL.h
index 895de0a9..f8e08705 100644
--- a/hlslparser/outshaders/ShaderHLSL.h
+++ b/hlslparser/outshaders/ShaderHLSL.h
@@ -13,6 +13,14 @@
 #define length_squared(x) ((x)*(x))
 #define distance_squared(x,y) (((x)-(y))*((x)-(y)))
 
+// Use templated type to pass tex + sampler combos
+//template<typename T>
+//struct TexSampler
+//{
+//    T t;
+//    SamplerState s;
+//};
+
 // no &* or ctors in HLSL limited C++
 // This means operators cannot overload [+-*/>><<]=.  Only builtins work.
 
@@ -263,17 +271,17 @@ float4 GatherCmp(Texture2D<float4> t, SamplerComparisonState s, float4 texCoord,
 // TODO: these also take offsets
 float4 Load(Texture2D<float4> t, int2 texCoord, int lod = 0, int2 offset = 0)
 {
-    return t.Load(texCoord, lod, offset);
+    return t.Load(int3(texCoord, lod), offset);
 }
 
 float4 Load(Texture3D<float4> t, int3 texCoord, int lod = 0, int3 offset = 0)
 {
-    return t.Load(texCoord, lod, offset);
+    return t.Load(int4(texCoord, lod), offset);
 }
 
 float4 Load(Texture2DArray<float4> t, int3 texCoord, int lod = 0, int2 offset = 0)
 {
-    return t.Load(texCoord, lod, offset);
+    return t.Load(int4(texCoord, lod), offset);
 }
 
 // no support in HLSL
@@ -346,6 +354,9 @@ half4 SampleBiasH(Texture2D<half> t, SamplerState s, float4 texCoordBias)
 
 #endif
 
+// There are 2 variants of GetDimensions, one that takes a mipLevel input
+// and returns params for that, and one that returns mip0.
+
 // TODO: these should be types, but by leaving off type, they apply to all types.
 int2 GetDimensions(Texture2D t)
 {
@@ -382,11 +393,11 @@ int3 GetDimensions(Texture2DArray t)
     return size;
 }
 
-int2 GetDimensions(Texture2DMS t)
+int2 GetDimensions(Texture2DMS<float4> t)
 {
-    int2 size;
-    t.GetDimensions(size.x, size.y);
-    return size;
+    int3 size;
+    t.GetDimensions(size.x, size.y, size.z);
+    return size.xy;
 }
 
 
diff --git a/hlslparser/outshaders/ShaderMSL.h b/hlslparser/outshaders/ShaderMSL.h
index f126f4d6..8125d17c 100644
--- a/hlslparser/outshaders/ShaderMSL.h
+++ b/hlslparser/outshaders/ShaderMSL.h
@@ -146,8 +146,15 @@ half4 mul(half4x4 m, half4 a) { return m * a; }
  
 #define clip(x) if (all((x) < 0.0) discard_fragment()
 
+// Use templated type to pass tex + sampler combos
+//template<typename T>
+//struct TexSampler
+//{
+//    T t;
+//    sampler s;
+//};
+
 
-    
 //---------
 
 // gather only works on mip0
@@ -356,23 +363,39 @@ int2 GetDimensions(texture2d_ms<float> t)
 // depth2d, _ms, _ms_array, _array,
 // depthcube, depthcube_array
 
-// TODO: add sparse_sample options
+/// TODO: add sparse_sample options
+//template <typename T>
+//struct sparse_color {
+//public:
+// constexpr sparse_color(T value, bool resident) thread;
+// // Indicates whether all memory addressed to retrieve the value was
+//mapped.
+// constexpr bool resident() const thread;
+//
+// // Retrieve the color value.
+// constexpr T const value() const thread;
+//};
+// sparse_sample(s, coord, offset), sparse_gather, sparse_sample_compare, sparse_gather_compare
+// min_lod_clamp(float) option to sample
+
 // gradientcube, gradient3d, min_lod_clamp(float lod),
 // bias(float value), level(float lod)
 // uint get_num_samples() const
 //
-// can have textures in structs
+// can have textures in structs, would help pass tex + sampler
+//   but already had that before in hlslparser.  Could
+//   bring that back, but have those built by caller.
+// could code rewrite calls to pass tex/sampler into them
+//   and then don't need the struct wrapper in MSL.  That
+// severly limits sharing structs, functions.  The structs
+//   don't really need to be in there.
+//
 // struct Foo {
 // texture2d<float> a [[texture(0)]];
 // depth2d<float> b [[texture(1)]];
 // };
 //
-// can pass into top level, can even nest inside structs
-// part of the argument buffer notation
-//
-// [[kernel]] void
-// my_kernel(Foo f)
-// {…}
+
 
 // handle access specifier RWTexture mods the template arg
 // texture2d<float, access::write> a;
diff --git a/hlslparser/shaders/Skinning.hlsl b/hlslparser/shaders/Skinning.hlsl
index 6e86327b..75e683ba 100644
--- a/hlslparser/shaders/Skinning.hlsl
+++ b/hlslparser/shaders/Skinning.hlsl
@@ -224,7 +224,12 @@ OutputPS SkinningPS(OutputVS input,
     )
 {
     OutputPS output;
-
+    
+    // Syntax procoess can't handle this construct.
+    // Before parser was adding these wrappers, but also limiting split of tex/sampler.
+    // TexSampler<Texture2D> texWrap(tex, samplerClamp);
+    // half4 color = SampleH(texWrap.t, texWrap.s, input.uv);
+    
     // This is hard to reflect with combined tex/sampler
     // have way more textures than samplers on mobile.
     //float4 color = tex2D(tex, input.uv);
diff --git a/hlslparser/src/CodeWriter.cpp b/hlslparser/src/CodeWriter.cpp
index 91a913b5..046219f4 100644
--- a/hlslparser/src/CodeWriter.cpp
+++ b/hlslparser/src/CodeWriter.cpp
@@ -15,13 +15,12 @@
 
 namespace M4
 {
-CodeWriter::CodeWriter(bool writeFileNames)
+CodeWriter::CodeWriter()
 {
     m_currentLine       = 1;
     m_currentFileName   = NULL;
     m_spacesPerIndent   = 4;
-    m_writeLines        = true;
-    m_writeFileNames    = writeFileNames;
+    m_writeFileLine     = false;
 }
 
 void CodeWriter::BeginLine(int indent, const char* fileName, int lineNumber)
@@ -29,7 +28,7 @@ void CodeWriter::BeginLine(int indent, const char* fileName, int lineNumber)
     // probably missing an EndLine
     ASSERT(m_currentIndent == 0);
     
-    if (m_writeLines)
+    if (m_writeFileLine)
     {
         bool outputLine = false;
         bool outputFile = false;
@@ -47,21 +46,15 @@ void CodeWriter::BeginLine(int indent, const char* fileName, int lineNumber)
             outputLine = true;
         }
 
-        /* TODO: Alec, removed this for now
-            This writes in #line directives back to orignal source files
-         
-        if (outputLine || outputFile)
+        // if previous filename is same, only output line
+        if (outputFile)
         {
-            if (outputFile && m_writeFileNames)
-            {
-                String_Printf(m_buffer, "#line %d \"%s\"\n", lineNumber, fileName.c_str());
-            }
-            else
-            {
-                String_Printf(m_buffer, "#line %d\n", lineNumber);
-            }
+            String_Printf(m_buffer, "#line %d \"%s\"\n", lineNumber, fileName);
+        }
+        else if (outputLine)
+        {
+            String_Printf(m_buffer, "#line %d\n", lineNumber);
         }
-        */
     }
 
     // Handle the indentation.
diff --git a/hlslparser/src/CodeWriter.h b/hlslparser/src/CodeWriter.h
index be964d87..f9d27b7e 100644
--- a/hlslparser/src/CodeWriter.h
+++ b/hlslparser/src/CodeWriter.h
@@ -27,8 +27,10 @@ class CodeWriter
 {
 
 public:
-    CodeWriter(bool writeFileNames = true);
+    CodeWriter();
 
+    void SetWriteFileLine(bool enable) { m_writeFileLine = enable; }
+    
     void BeginLine(int indent, const char* fileName = NULL, int lineNumber = -1);
     void Write(const char* format, ...) M4_PRINTF_ATTR(2, 3);
     int EndLine(const char* text = NULL);
@@ -46,8 +48,7 @@ class CodeWriter
     const char*     m_currentFileName;
     int             m_spacesPerIndent;
     int             m_currentIndent;
-    bool            m_writeLines;
-    bool            m_writeFileNames;
+    bool            m_writeFileLine;
 
 };
 
diff --git a/hlslparser/src/HLSLGenerator.cpp b/hlslparser/src/HLSLGenerator.cpp
index 1aaf4dde..112b7e9d 100644
--- a/hlslparser/src/HLSLGenerator.cpp
+++ b/hlslparser/src/HLSLGenerator.cpp
@@ -80,7 +80,6 @@ HLSLGenerator::HLSLGenerator()
     m_error                         = false;
 }
 
-
 // @@ We need a better way of doing semantic replacement:
 // - Look at the function being generated.
 // - Return semantic, semantics associated to fields of the return structure, or output arguments, or fields of structures associated to output arguments -> output semantic replacement.
@@ -152,13 +151,17 @@ void HLSLGenerator::Error(const char* format, ...)
     va_end(arg);
 }
 
-bool HLSLGenerator::Generate(HLSLTree* tree, HLSLTarget target, const char* entryName)
+bool HLSLGenerator::Generate(HLSLTree* tree, HLSLTarget target, const char* entryName, const HLSLOptions& options)
 {
+    
     m_tree      = tree;
     m_entryName = entryName;
     m_target    = target;
     m_isInsideBuffer = false;
 
+    m_options   = options;
+    m_writer.SetWriteFileLine(options.writeFileLine);
+    
     m_writer.Reset();
 
     // Find entry point function
diff --git a/hlslparser/src/HLSLGenerator.h b/hlslparser/src/HLSLGenerator.h
index 81c1c141..c596e36c 100644
--- a/hlslparser/src/HLSLGenerator.h
+++ b/hlslparser/src/HLSLGenerator.h
@@ -19,6 +19,22 @@ class  HLSLTree;
 struct HLSLFunction;
 struct HLSLStruct;
 
+// TODO: try to unify some options with MSLGenerator
+struct HLSLOptions
+{
+    // int (*attributeCallback)(const char* name, uint32_t index) = NULL;
+    // uint32_t bufferRegisterOffset = 0;
+    
+    bool writeFileLine = false;
+    
+    // TODO: hook this up
+    // bool treatHalfAsFloat = false;
+    // bool treadDoubleAsFloat = true;
+    
+    // add vk constructions to HLSL source to convert to Spriv
+    // bool writeVulkan = true;
+};
+
 /**
  * This class is used to generate HLSL which is compatible with the D3D9
  * compiler (i.e. no cbuffers).
@@ -29,7 +45,9 @@ class HLSLGenerator
 public:
     HLSLGenerator();
     
-    bool Generate(HLSLTree* tree, HLSLTarget target, const char* entryName);
+    
+    
+    bool Generate(HLSLTree* tree, HLSLTarget target, const char* entryName, const HLSLOptions& options = HLSLOptions() );
     const char* GetResult() const;
 
 private:
@@ -63,6 +81,7 @@ class HLSLGenerator
     HLSLTarget      m_target;
     bool            m_isInsideBuffer;
     bool            m_error;
+    HLSLOptions     m_options;
 };
 
 } // M4
diff --git a/hlslparser/src/HLSLParser.cpp b/hlslparser/src/HLSLParser.cpp
index 42290444..69ed982f 100644
--- a/hlslparser/src/HLSLParser.cpp
+++ b/hlslparser/src/HLSLParser.cpp
@@ -1104,7 +1104,10 @@ bool InitIntrinsics()
     AddIntrinsic("determinant", HLSLBaseType_Double, HLSLBaseType_Double3x3);
     AddIntrinsic("determinant", HLSLBaseType_Double, HLSLBaseType_Double4x4);
    
+#if 0
     // TODO: more conversions fp16, double, etc.
+    // MSL can just do simple casts.  These are more for getting data in/out
+    // of shader stages, or out of DataAddressBuffer which has single type.
     AddIntrinsic("asuint", HLSLBaseType_Uint, HLSLBaseType_Float);
     AddIntrinsic("asuint", HLSLBaseType_Uint, HLSLBaseType_Double);
     AddIntrinsic("asuint", HLSLBaseType_Uint, HLSLBaseType_Half);
@@ -1116,8 +1119,6 @@ bool InitIntrinsics()
     
     // "faceforward" Returns -n * sign(dot(i, ng)).
     
-    // firstbithigh, firstbitlow
-
     AddIntrinsic("asint", HLSLBaseType_Uint, HLSLBaseType_Float);
     
     // low/hi uint
@@ -1126,7 +1127,8 @@ bool InitIntrinsics()
     // one for 64-bit too (low/hi uint)
     AddIntrinsic("asuint", HLSLBaseType_Ulong, HLSLBaseType_Uint, HLSLBaseType_Uint);
     AddIntrinsic("asuint", HLSLBaseType_Uint, HLSLBaseType_Float);
-   
+#endif
+    
 #if 0
     // TODO: get atomics working
     // these work on atomic_int/uint, then bool/ulong 2.4,
@@ -2468,7 +2470,6 @@ bool HLSLParser::ParseStatement(HLSLStatement*& statement, const HLSLType& retur
 */
 #endif
     
-    // Getting 2 copies of some comments, why is that
     if (ParseComment(statement))
     {
         return true;
@@ -3206,16 +3207,36 @@ bool HLSLParser::ParseTerminalExpression(HLSLExpression*& expression, bool& need
         // Member access operator.
         while (Accept('.'))
         {
-            HLSLMemberAccess* memberAccess = m_tree->AddNode<HLSLMemberAccess>(fileName, line);
-            memberAccess->object = expression;
-            if (!ExpectIdentifier(memberAccess->field))
+            // member function
+            const char* text = NULL;
+            if (!ExpectIdentifier(text))
             {
                 return false;
             }
             
+            /*
+            const HLSLMemberFuction* memberFunction = FindMemberFunction(text);
+            if (function != NULL)
+            {
+                // check parent type, and args to see if it's a match
+
+                 HLSLMemberFunction* memberFunction = m_tree->AddNode<HLSLMemberFunction>(fileName, line);
+                    memberFunction->object = exprenssion;
+                    memberAccess->memberFunction = memberFunction;
+            
+           } */
+            
+            // member variable
+            HLSLMemberAccess* memberAccess = m_tree->AddNode<HLSLMemberAccess>(fileName, line);
+            memberAccess->object = expression;
+            memberAccess->field = text;
+            
             if (!GetMemberType(expression->expressionType, memberAccess))
             {
                 m_tokenizer.Error("Couldn't access '%s'", memberAccess->field);
+                
+                // this leaks memberAccess allocated above, but
+                // all allocated from single allocator, so just free/reset that
                 return false;
             }
             expression = memberAccess;
diff --git a/hlslparser/src/HLSLTree.h b/hlslparser/src/HLSLTree.h
index 960ee17a..24265d26 100644
--- a/hlslparser/src/HLSLTree.h
+++ b/hlslparser/src/HLSLTree.h
@@ -630,6 +630,7 @@ struct HLSLArrayAccess : public HLSLExpression
     HLSLExpression*     index = NULL;
 };
 
+/// c-style foo(arg1, arg2)
 struct HLSLFunctionCall : public HLSLExpression
 {
     static const HLSLNodeType s_type = HLSLNodeType_FunctionCall;
@@ -638,6 +639,20 @@ struct HLSLFunctionCall : public HLSLExpression
     int                 numArguments = 0;
 };
 
+/// c++ style member.foo(arg1, arg2)
+struct HLSLMemberFunctionCall : public HLSLExpression
+{
+    static const HLSLNodeType s_type = HLSLNodeType_FunctionCall;
+    
+    // could be buffer, texture, raytrace
+    HLSLBaseType        memberType = HLSLBaseType_Unknown; // may need type for typeName?
+    
+    const HLSLFunction* function = NULL;
+    HLSLExpression*     argument = NULL;
+    int                 numArguments = 0;
+};
+
+
 #if 1
 /*
 // These are all FX file constructs
diff --git a/hlslparser/src/MSLGenerator.cpp b/hlslparser/src/MSLGenerator.cpp
index ddc06db4..386155d8 100644
--- a/hlslparser/src/MSLGenerator.cpp
+++ b/hlslparser/src/MSLGenerator.cpp
@@ -27,7 +27,7 @@
 
 namespace M4
 {
-    static void ParseSemantic(const char* semantic, unsigned int* outputLength, unsigned int* outputIndex)
+    static void ParseSemantic(const char* semantic, uint32_t* outputLength, uint32_t* outputIndex)
     {
         const char* semanticIndex = semantic;
 
@@ -390,7 +390,7 @@ namespace M4
         return "";
     }
 
-    bool MSLGenerator::Generate(HLSLTree* tree, HLSLTarget target, const char* entryName, const Options& options)
+    bool MSLGenerator::Generate(HLSLTree* tree, HLSLTarget target, const char* entryName, const MSLOptions& options)
     {
         m_firstClassArgument = NULL;
         m_lastClassArgument = NULL;
@@ -398,8 +398,10 @@ namespace M4
         m_tree = tree;
         m_target = target;
         m_entryName = entryName;
+        
         m_options = options;
-
+        m_writer.SetWriteFileLine(options.writeFileLine);
+    
         m_writer.Reset();
 
         // Find entry point function
@@ -1960,7 +1962,7 @@ namespace M4
         if (semantic == NULL)
             return NULL;
 
-        unsigned int length, index;
+        uint32_t length, index;
         ParseSemantic(semantic, &length, &index);
 
         if (m_target == HLSLTarget_VertexShader)
@@ -2046,7 +2048,7 @@ namespace M4
         if (semantic == NULL)
             return NULL;
 
-        unsigned int length, index;
+        uint32_t length, index;
         ParseSemantic(semantic, &length, &index);
 
         if (m_target == HLSLTarget_VertexShader)
@@ -2069,12 +2071,13 @@ namespace M4
         }
         else if (m_target == HLSLTarget_PixelShader)
         {
-            if (m_options.flags & MSLGenerator::Flag_NoIndexAttribute)
-            {
-                // No dual-source blending on iOS, and no index() attribute
-                if (String_Equal(semantic, "COLOR0_1")) return NULL;
-            }
-            else
+// Not supporting flags, add as bool to options if needed
+//            if (m_options.flags & MSLGenerator::Flag_NoIndexAttribute)
+//            {
+//                // No dual-source blending on iOS, and no index() attribute
+//                if (String_Equal(semantic, "COLOR0_1")) return NULL;
+//            }
+//            else
             {
                 // See these settings
                 // MTLBlendFactorSource1Color, OneMinusSource1Color, Source1Alpha, OneMinuSource1Alpha.
diff --git a/hlslparser/src/MSLGenerator.h b/hlslparser/src/MSLGenerator.h
index 5e5b9d10..4f56f390 100644
--- a/hlslparser/src/MSLGenerator.h
+++ b/hlslparser/src/MSLGenerator.h
@@ -10,44 +10,26 @@ class  HLSLTree;
 struct HLSLFunction;
 struct HLSLStruct;
     
+struct MSLOptions
+{
+    int (*attributeCallback)(const char* name, uint32_t index) = NULL;
+    
+    // no CLI to set offset
+    uint32_t bufferRegisterOffset = 0;
+    
+    bool writeFileLine = false;
+    bool treatHalfAsFloat = false;
+};
+
 /**
  * This class is used to generate MSL shaders.
  */
 class MSLGenerator
 {
 public:
-    enum Flags
-    {
-        Flag_None = 0,
-        Flag_ConstShadowSampler = 1 << 0,
-        Flag_PackMatrixRowMajor = 1 << 1,
-        Flag_NoIndexAttribute   = 1 << 2,
-    };
-
-    struct Options
-    {
-        unsigned int flags;
-        unsigned int bufferRegisterOffset;
-        int (*attributeCallback)(const char* name, unsigned int index);
-        bool treatHalfAsFloat;
-        bool usePreciseFma;
-        //bool use16BitIntegers;
-
-        Options()
-        {
-            // DX and MSL are both column major
-            flags = Flag_None;
-            bufferRegisterOffset = 0;
-            attributeCallback = NULL;
-            treatHalfAsFloat = false;
-            usePreciseFma = false;
-            //use16BitIntegers = false;
-        }
-    };
-
     MSLGenerator();
 
-    bool Generate(HLSLTree* tree, HLSLTarget target, const char* entryName, const Options& options = Options());
+    bool Generate(HLSLTree* tree, HLSLTarget target, const char* entryName, const MSLOptions& options = MSLOptions());
     const char* GetResult() const;
 
 private:
@@ -115,7 +97,7 @@ class MSLGenerator
     HLSLTree*       m_tree;
     const char*     m_entryName;
     HLSLTarget      m_target;
-    Options         m_options;
+    MSLOptions      m_options;
 
     mutable bool            m_error;
 
diff --git a/hlslparser/src/Main.cpp b/hlslparser/src/Main.cpp
index 37f977ad..53872ef2 100644
--- a/hlslparser/src/Main.cpp
+++ b/hlslparser/src/Main.cpp
@@ -45,6 +45,8 @@ void PrintUsage()
 		 "optional arguments:\n"
          " -g          debug mode, preserve comments\n"
          " -h, --help  show this help message and exit\n"
+         " -line   write #file/line directive\n"
+         " -nohalf     turn half into float (MSL only)"
 		);
 }
 
@@ -99,6 +101,8 @@ int main( int argc, char* argv[] )
 	HLSLTarget target = HLSLTarget_PixelShader;
     string outputFileName;
     bool isDebug = false;
+    bool isTreatHalfAsFloat = false;
+    bool isWriteFileLine = false;
     
 	for( int argn = 1; argn < argc; ++argn )
 	{
@@ -125,6 +129,16 @@ int main( int argc, char* argv[] )
             // will preserve double-slash comments where possible
             isDebug = true;
         }
+        else if ( String_Equal( arg, "-nohalf" ))
+        {
+            // will preserve double-slash comments where possible
+            isTreatHalfAsFloat = true;
+        }
+        else if ( String_Equal( arg, "-line" ))
+        {
+            // will preserve double-slash comments where possible
+            isWriteFileLine = true;
+        }
         
 // This is derived from end characters of entry point
 //        else if( String_Equal( arg, "-vs" ) )
@@ -230,8 +244,6 @@ int main( int argc, char* argv[] )
         return 1;
     }
 
-    
-    
 	// Parse input file
 	Allocator allocator;
 	HLSLParser parser( &allocator, fileName.c_str(), source.data(), source.size() );
@@ -299,8 +311,11 @@ int main( int argc, char* argv[] )
         // Generate output
         if (language == Language_HLSL)
         {
+            HLSLOptions options;
+            options.writeFileLine = isWriteFileLine;
+            
             HLSLGenerator generator;
-            if (generator.Generate( &tree, target, entryName ))
+            if (generator.Generate( &tree, target, entryName, options))
             {
                 // write the buffer out
                 output += generator.GetResult();
@@ -313,8 +328,12 @@ int main( int argc, char* argv[] )
         }
         else if (language == Language_MSL)
         {
+            MSLOptions options;
+            options.writeFileLine = isWriteFileLine;
+            options.treatHalfAsFloat = isTreatHalfAsFloat;
+            
             MSLGenerator generator;
-            if (generator.Generate( &tree, target, entryName ))
+            if (generator.Generate(&tree, target, entryName, options))
             {
                 // write the buffer out
                 output += generator.GetResult();

From 206377946305fe2c0f129d80c9a3b5b4b59f8ecc Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 19 Mar 2023 13:56:51 -0700
Subject: [PATCH 494/901] kram - hlslparser update

simplify texture intrinsic, add "Load" but probably better to use array notation
---
 hlslparser/src/HLSLParser.cpp | 141 +++++++++++++++++++++-------------
 hlslparser/src/HLSLParser.h   |  10 +++
 2 files changed, 96 insertions(+), 55 deletions(-)

diff --git a/hlslparser/src/HLSLParser.cpp b/hlslparser/src/HLSLParser.cpp
index 69ed982f..ae4aa74e 100644
--- a/hlslparser/src/HLSLParser.cpp
+++ b/hlslparser/src/HLSLParser.cpp
@@ -174,7 +174,41 @@ bool IsDouble(HLSLBaseType type)
     return baseTypeDescriptions[type].numericType == NumericType_Double;
 }
 
-// TODO: IsUint, IsInt, ... or just get type.
+bool IsInt(HLSLBaseType type)
+{
+    return baseTypeDescriptions[type].numericType == NumericType_Int;
+}
+
+bool IsUint(HLSLBaseType type)
+{
+    return baseTypeDescriptions[type].numericType == NumericType_Uint;
+}
+
+bool IsShort(HLSLBaseType type)
+{
+    return baseTypeDescriptions[type].numericType == NumericType_Short;
+}
+bool IsUshort(HLSLBaseType type)
+{
+    return baseTypeDescriptions[type].numericType == NumericType_Ushort;
+}
+
+bool IsLong(HLSLBaseType type)
+{
+    return baseTypeDescriptions[type].numericType == NumericType_Long;
+}
+bool IsUlong(HLSLBaseType type)
+{
+    return baseTypeDescriptions[type].numericType == NumericType_Ulong;
+}
+
+bool IsBool(HLSLBaseType type)
+{
+    return baseTypeDescriptions[type].numericType == NumericType_Bool;
+}
+
+
+
 
 
 bool IsSamplerType(const HLSLType & type)
@@ -265,6 +299,12 @@ HLSLBaseType NumericToBaseType(NumericType numericType)
     return baseType;
 }
 
+HLSLBaseType GetScalarType(HLSLBaseType type)
+{
+    ASSERT(IsNumericType(type));
+    return NumericToBaseType(baseTypeDescriptions[type].numericType);
+}
+
 HLSLBaseType HalfToFloatBaseType(HLSLBaseType type)
 {
     if (IsHalf(type))
@@ -377,7 +417,20 @@ struct Intrinsic
 
 static void AddIntrinsic(const Intrinsic& intrinsic);
 
-void AddTextureIntrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType textureType, HLSLBaseType textureHalfOrFloat, HLSLBaseType uvType)
+void AddTextureLoadIntrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType textureType, HLSLBaseType uvType)
+{
+#if USE_MEMBER_FUNCTIONS
+    Intrinsic i(name, returnType, uvType);
+    i.function.memberType = textureType;
+#else
+    Intrinsic i(name, returnType, textureType, uvType);
+#endif
+    i.argument[0].type.formatType = GetScalarType(returnType);
+
+    AddIntrinsic(i);
+}
+
+void AddTextureIntrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType textureType, HLSLBaseType uvType)
 {
 #if USE_MEMBER_FUNCTIONS
     Intrinsic i(name, returnType, HLSLBaseType_SamplerState, uvType);
@@ -385,13 +438,13 @@ void AddTextureIntrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType
 #else
     Intrinsic i(name, returnType, textureType, HLSLBaseType_SamplerState, uvType);
 #endif
-    i.argument[0].type.formatType = textureHalfOrFloat;
+    i.argument[0].type.formatType = GetScalarType(returnType);
 
     AddIntrinsic(i);
 }
 
 // DepthCmp takes additional arg for comparison value, but this rolls it into uv
-void AddDepthIntrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType textureType, HLSLBaseType textureHalfOrFloat, HLSLBaseType uvType)
+void AddDepthIntrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType textureType, HLSLBaseType uvType)
 {
     // ComparisonState is only for SampleCmp/GatherCmp
     bool isCompare = String_Equal(name, "GatherCmp") || String_Equal(name, "SampleCmp");
@@ -404,10 +457,18 @@ void AddDepthIntrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType t
     Intrinsic i(name, returnType, textureType, samplerType, uvType);
 #endif
     
-    i.argument[0].type.formatType = textureHalfOrFloat;
+    i.argument[0].type.formatType = GetScalarType(returnType);
     AddIntrinsic(i);
 }
 
+// TODO: elim the H version once have member functions, can check the member textuer format.
+#define TEXTURE_INTRINSIC_FUNCTION(name, textureType, uvType) \
+    AddTextureIntrinsic( name, HLSLBaseType_Float4, textureType, uvType)
+
+#define TEXTURE_INTRINSIC_FUNCTION_H(name, textureType, uvType) \
+    AddTextureIntrinsic( name "H", HLSLBaseType_Half4, textureType, uvType  )
+
+
 static const int _numberTypeRank[NumericType_Count][NumericType_Count] = 
 {
     // across is what type list on right is converted into (5 means don't, 0 means best)
@@ -675,46 +736,6 @@ static const EffectState pipelineStates[] = {
 };
 */
 
-/*
-#define INTRINSIC_FLOAT1_FUNCTION(name) \
-        Intrinsic( name, HLSLBaseType_Float,   HLSLBaseType_Float  ),   \
-        Intrinsic( name, HLSLBaseType_Float2,  HLSLBaseType_Float2 ),   \
-        Intrinsic( name, HLSLBaseType_Float3,  HLSLBaseType_Float3 ),   \
-        Intrinsic( name, HLSLBaseType_Float4,  HLSLBaseType_Float4 ),   \
-        Intrinsic( name, HLSLBaseType_Half,    HLSLBaseType_Half   ),   \
-        Intrinsic( name, HLSLBaseType_Half2,   HLSLBaseType_Half2  ),   \
-        Intrinsic( name, HLSLBaseType_Half3,   HLSLBaseType_Half3  ),   \
-        Intrinsic( name, HLSLBaseType_Half4,   HLSLBaseType_Half4  )
-
-#define INTRINSIC_FLOAT2_FUNCTION(name) \
-        Intrinsic( name, HLSLBaseType_Float,   HLSLBaseType_Float,   HLSLBaseType_Float  ),   \
-        Intrinsic( name, HLSLBaseType_Float2,  HLSLBaseType_Float2,  HLSLBaseType_Float2 ),   \
-        Intrinsic( name, HLSLBaseType_Float3,  HLSLBaseType_Float3,  HLSLBaseType_Float3 ),   \
-        Intrinsic( name, HLSLBaseType_Float4,  HLSLBaseType_Float4,  HLSLBaseType_Float4 ),   \
-        Intrinsic( name, HLSLBaseType_Half,    HLSLBaseType_Half,    HLSLBaseType_Half   ),   \
-        Intrinsic( name, HLSLBaseType_Half2,   HLSLBaseType_Half2,   HLSLBaseType_Half2  ),   \
-        Intrinsic( name, HLSLBaseType_Half3,   HLSLBaseType_Half3,   HLSLBaseType_Half3  ),   \
-        Intrinsic( name, HLSLBaseType_Half4,   HLSLBaseType_Half4,   HLSLBaseType_Half4  )
-
-#define INTRINSIC_FLOAT3_FUNCTION(name) \
-        Intrinsic( name, HLSLBaseType_Float,   HLSLBaseType_Float,   HLSLBaseType_Float,  HLSLBaseType_Float ),   \
-        Intrinsic( name, HLSLBaseType_Float2,  HLSLBaseType_Float2,  HLSLBaseType_Float2,  HLSLBaseType_Float2 ),  \
-        Intrinsic( name, HLSLBaseType_Float3,  HLSLBaseType_Float3,  HLSLBaseType_Float3,  HLSLBaseType_Float3 ),  \
-        Intrinsic( name, HLSLBaseType_Float4,  HLSLBaseType_Float4,  HLSLBaseType_Float4,  HLSLBaseType_Float4 ),  \
-        Intrinsic( name, HLSLBaseType_Half,    HLSLBaseType_Half,    HLSLBaseType_Half,   HLSLBaseType_Half ),    \
-        Intrinsic( name, HLSLBaseType_Half2,   HLSLBaseType_Half2,   HLSLBaseType_Half2,  HLSLBaseType_Half2 ),    \
-        Intrinsic( name, HLSLBaseType_Half3,   HLSLBaseType_Half3,   HLSLBaseType_Half3,  HLSLBaseType_Half3 ),    \
-        Intrinsic( name, HLSLBaseType_Half4,   HLSLBaseType_Half4,   HLSLBaseType_Half4,  HLSLBaseType_Half4 )
-
-*/
-
-// TODO: elim the H version once have member functions, can check the member textuer format.
-#define TEXTURE_INTRINSIC_FUNCTION(name, textureType, uvType) \
-    AddTextureIntrinsic( name, HLSLBaseType_Float4, textureType, HLSLBaseType_Float, uvType)
-
-#define TEXTURE_INTRINSIC_FUNCTION_H(name, textureType, uvType) \
-        AddTextureIntrinsic( name "H", HLSLBaseType_Half4, textureType, HLSLBaseType_Half, uvType  )
-
 // Note: these strings need to live until end of the app
 StringPool gStringPool(NULL);
 
@@ -1203,9 +1224,9 @@ bool InitIntrinsics()
     TEXTURE_INTRINSIC_FUNCTION("Sample", HLSLBaseType_TextureCubeArray, HLSLBaseType_Float4);
     
     // Depth
-    AddDepthIntrinsic("Sample", HLSLBaseType_Float, HLSLBaseType_Depth2D, HLSLBaseType_Float,  HLSLBaseType_Float2);
-    AddDepthIntrinsic("Sample", HLSLBaseType_Float, HLSLBaseType_Depth2DArray, HLSLBaseType_Float,  HLSLBaseType_Float3);
-    AddDepthIntrinsic("Sample", HLSLBaseType_Float, HLSLBaseType_DepthCube, HLSLBaseType_Float,  HLSLBaseType_Float3);
+    AddDepthIntrinsic("Sample", HLSLBaseType_Float, HLSLBaseType_Depth2D, HLSLBaseType_Float2);
+    AddDepthIntrinsic("Sample", HLSLBaseType_Float, HLSLBaseType_Depth2DArray,  HLSLBaseType_Float3);
+    AddDepthIntrinsic("Sample", HLSLBaseType_Float, HLSLBaseType_DepthCube,  HLSLBaseType_Float3);
     
     TEXTURE_INTRINSIC_FUNCTION_H("Sample", HLSLBaseType_Texture2D,  HLSLBaseType_Float2);
     TEXTURE_INTRINSIC_FUNCTION_H("Sample", HLSLBaseType_Texture3D, HLSLBaseType_Float3);
@@ -1215,15 +1236,15 @@ bool InitIntrinsics()
     
     
     // xyz are used, this doesn't match HLSL which is 2 + compare
-    AddDepthIntrinsic("SampleCmp", HLSLBaseType_Float, HLSLBaseType_Depth2D, HLSLBaseType_Float, HLSLBaseType_Float4);
-    AddDepthIntrinsic("SampleCmp", HLSLBaseType_Float, HLSLBaseType_Depth2DArray, HLSLBaseType_Float, HLSLBaseType_Float4);
-    AddDepthIntrinsic("SampleCmp", HLSLBaseType_Float, HLSLBaseType_DepthCube, HLSLBaseType_Float, HLSLBaseType_Float4);
+    AddDepthIntrinsic("SampleCmp", HLSLBaseType_Float, HLSLBaseType_Depth2D, HLSLBaseType_Float4);
+    AddDepthIntrinsic("SampleCmp", HLSLBaseType_Float, HLSLBaseType_Depth2DArray, HLSLBaseType_Float4);
+    AddDepthIntrinsic("SampleCmp", HLSLBaseType_Float, HLSLBaseType_DepthCube, HLSLBaseType_Float4);
     
     // returns float4 w/comparisons, probably only on mip0
     // TODO: add GatherRed? to read 4 depth values
-    AddDepthIntrinsic("GatherCmp", HLSLBaseType_Float4, HLSLBaseType_Depth2D, HLSLBaseType_Float, HLSLBaseType_Float4);
-    AddDepthIntrinsic("GatherCmp", HLSLBaseType_Float4, HLSLBaseType_Depth2DArray, HLSLBaseType_Float, HLSLBaseType_Float4);
-    AddDepthIntrinsic("GatherCmp", HLSLBaseType_Float4, HLSLBaseType_DepthCube, HLSLBaseType_Float, HLSLBaseType_Float4);
+    AddDepthIntrinsic("GatherCmp", HLSLBaseType_Float4, HLSLBaseType_Depth2D, HLSLBaseType_Float4);
+    AddDepthIntrinsic("GatherCmp", HLSLBaseType_Float4, HLSLBaseType_Depth2DArray, HLSLBaseType_Float4);
+    AddDepthIntrinsic("GatherCmp", HLSLBaseType_Float4, HLSLBaseType_DepthCube, HLSLBaseType_Float4);
     
     // one more dimension than Sample
     TEXTURE_INTRINSIC_FUNCTION("SampleLevel", HLSLBaseType_Texture2D, HLSLBaseType_Float3);
@@ -1267,6 +1288,15 @@ bool InitIntrinsics()
     // first move to member functions, then add this with 4 args
     // AddTextureIntrinsic( "SampleGrad", HLSLBaseType_Float4, HLSLBaseType_Texture2D, HLSLBaseType_Float, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2);
     
+    // These constructs are not declaring the lod or offset param which have default
+    AddTextureLoadIntrinsic("Load", HLSLBaseType_Float4, HLSLBaseType_Texture2D, HLSLBaseType_Int2);
+    AddTextureLoadIntrinsic("Load", HLSLBaseType_Float4, HLSLBaseType_Texture2D, HLSLBaseType_Int2);
+    AddTextureLoadIntrinsic("Load", HLSLBaseType_Float4, HLSLBaseType_Texture2D, HLSLBaseType_Int2);
+    AddTextureLoadIntrinsic("Load", HLSLBaseType_Float4, HLSLBaseType_Texture2D, HLSLBaseType_Int2);
+    AddTextureLoadIntrinsic("Load", HLSLBaseType_Float4, HLSLBaseType_Texture2D, HLSLBaseType_Int2);
+    AddTextureLoadIntrinsic("Load", HLSLBaseType_Float4, HLSLBaseType_Texture2D, HLSLBaseType_Int2);
+    AddTextureLoadIntrinsic("Load", HLSLBaseType_Float4, HLSLBaseType_Texture2D, HLSLBaseType_Int2);
+    
     // TODO: aren't these uint instead of int?
     TEXTURE_INTRINSIC_FUNCTION("GetDimensions", HLSLBaseType_Texture2D, HLSLBaseType_Int2);
     TEXTURE_INTRINSIC_FUNCTION("GetDimensions", HLSLBaseType_Texture3D, HLSLBaseType_Int3);
@@ -1278,7 +1308,6 @@ bool InitIntrinsics()
     return true;
 };
 
-static bool initIntrinsics = InitIntrinsics();
 
 // The order in this array must match up with HLSLBinaryOp
 const int _binaryOpPriority[] =
@@ -1499,6 +1528,8 @@ bool InitBaseTypeDescriptions()
 
 static bool _initBaseTypeDescriptions = InitBaseTypeDescriptions();
 
+// this needs to happen after base descriptions
+static bool _initIntrinsics = InitIntrinsics();
 
 HLSLBaseType ArithmeticOpResultType(HLSLBinaryOp binaryOp, HLSLBaseType t1, HLSLBaseType t2)
 {
diff --git a/hlslparser/src/HLSLParser.h b/hlslparser/src/HLSLParser.h
index a14720bd..9f33e590 100644
--- a/hlslparser/src/HLSLParser.h
+++ b/hlslparser/src/HLSLParser.h
@@ -169,6 +169,14 @@ bool IsHalf(HLSLBaseType type);
 bool IsFloat(HLSLBaseType type);
 bool IsDouble(HLSLBaseType type);
 
+bool IsInt(HLSLBaseType type);
+bool IsUnit(HLSLBaseType type);
+bool IsShort(HLSLBaseType type);
+bool IsUshort(HLSLBaseType type);
+bool IsLong(HLSLBaseType type);
+bool IsUlong(HLSLBaseType type);
+bool IsBool(HLSLBaseType type);
+
 bool IsSamplerType(HLSLBaseType baseType);
 bool IsMatrixType(HLSLBaseType baseType);
 bool IsVectorType(HLSLBaseType baseType);
@@ -198,4 +206,6 @@ const char* GetNumericTypeName(HLSLBaseType type);
 const char* GetTypeNameHLSL(const HLSLType& type);
 const char* GetTypeNameMetal(const HLSLType& type);
 
+HLSLBaseType GetScalarType(HLSLBaseType type);
+
 }

From cb3f8ee9d50c900322810b8196c754da7d7cc529 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 19 Mar 2023 14:16:52 -0700
Subject: [PATCH 495/901] kram - hlslparser update

Add shader tags to HLSL
---
 hlslparser/README.md             | 28 +++++++++++++++-------------
 hlslparser/buildShaders.sh       | 14 ++++++++------
 hlslparser/src/HLSLGenerator.cpp | 21 ++++++++++++++++++---
 hlslparser/src/HLSLParser.cpp    | 13 ++++++-------
 4 files changed, 47 insertions(+), 29 deletions(-)

diff --git a/hlslparser/README.md b/hlslparser/README.md
index e98671d5..eee660ed 100644
--- a/hlslparser/README.md
+++ b/hlslparser/README.md
@@ -43,7 +43,7 @@ DONE
 
 TODO:
 * atomics
-* more than half/float/int literals (f.e. uint)
+* more than half/float/int literals (f.e. u/int, u/long), requires trailing U, L
 * passing variables only by value in HLSL vs. value/ref/ptr in MSL
 * argument buffers and descriptor sets (root tables for DX?)
 * halfio/2/3/4 type for Nvidia/Adreno, halfst2/3/4 for storage
@@ -280,27 +280,28 @@ Shading Languages
 ---
 
 CG
-* Where it all started.  HLSL is an offshoot of this.  MSL closely resembles HLSL.
+* Where it all started.  C like, multi-entrypoint.
 * Nvida has abandoned this
 * Unity was using but moved to HLSL
 * https://developer.nvidia.com/cg-toolkit
 
 MSL
-* C++14, has ctors, cg/hlsl like
-* buffers, textures, tile shaders, atomic int, 
+* C++14, has ctors, cg/hlsl like, multi-entrypoint
+* buffers, textures, tile shaders, atomic int
+* Can only sample using float uv, likely to avoid banding/precision issues
+* 
 * https://developer.apple.com/metal/Metal-Shading-Language-Specification.pdf
 
 HLSL
-* C++ like, missing ctors, cg derived for original Xbox
-* Added back int/half support in SM 6.2
+* C++ like, missing ctors, no virtuals, limited op overlaoad, cg derived for original Xbox, multi-entrypoint
+* uses mul instead of operator for vec/matrix math
+* Added back u/short, half support in SM 6.2
+* double require special packing
+* u/char4 doesn't exist, only int/short/uint64, uint64 is hw limited
 * Vulkan extensions for specialization constants and subpasses
 * SSBO - StructuredBuffers, ByteAddressBuffer
-* DX9 and DX10 style syntax differs
 * HLSL 2021 (latest) can distinguish structs with same types as unique
-* HLSL 2018 (default for DXC) can't tell structs apart if contain same types
 * HLSL added to clang
-* After 30 years, HLSL still does not have u/char support, and only 
-* added u/short support in 6.2.  So pack/unpack needed for SSBO.
 * https://github.com/Microsoft/DirectXShaderCompiler/blob/main/docs/SPIR-V.rst#subpass-inputs
 * https://clang.llvm.org/docs/HLSL/HLSLSupport.html#:~:text=HLSL%20uses%20templates%20to%20define,case%20and%20issues%20a%20diagnostic.
 * https://devblogs.microsoft.com/directx/announcing-hlsl-2021/
@@ -345,6 +346,7 @@ GLSL
 * has extension mechanism
 * replaced with spirv
 * horrible glGetError() requires sync of cpu/gpu
+* not multi-entrypoint, each entrypoint called main()
 * locked at GL4.1 on macOS - no compute
 
 GLSL/ES
@@ -402,9 +404,9 @@ HLSL
 * https://microsoft.github.io/DirectX-Specs/d3d/HLSL_SM_6_7_Advanced_Texture_Ops.html
 
 MSL
-* metal2.2, iOS13/macOS10.15,
-* metal2.3, iOS14/macOS11, target, function pointers,
-* metal2.4, iOS15/macOS12,
+* metal2.2, iOS13/macOS10.15,  A8 min
+* metal2.3, iOS14/macOS11, target, function pointers, A9 min
+* metal2.4, iOS15/macOS12, 
 * metal3.0, iOS16/macOS13, unified shader model, latest,
 
 Spirv
diff --git a/hlslparser/buildShaders.sh b/hlslparser/buildShaders.sh
index 537632b3..5b163224 100755
--- a/hlslparser/buildShaders.sh
+++ b/hlslparser/buildShaders.sh
@@ -19,8 +19,10 @@ pushd outshaders
 
 # TODO: consider putting in path
 # note bash can't expand tilda, so using HOME instead
-#svulkanSDK="${HOME}/devref/vulkansdk/1.3.239.0/macOS/bin/"
-vulkanSDK=""
+# This only works if running from terminal, and not from Xcode
+#  so back to hardcoding the path.
+vulkanSDK="${HOME}/devref/vulkansdk/1.3.239.0/macOS/bin/"
+#vulkanSDK=""
 
 projectDir="${HOME}/devref/kram/hlslparser/"
 
@@ -143,16 +145,16 @@ args+="-HV 2021 "
 args+="-fspv-extension=SPV_KHR_shader_draw_parameters "
 
 # 6.1 for ConstantBuffer
-# 6.2 for u/short and half
+# 6.2 for u/short and half <- target
 # 6.6 adds u/char8 pack/unpack calls
 vsargs=${args}
-vsargs+="-T vs_6_6 "
+vsargs+="-T vs_6_2 "
 
 psargs=${args}
-psargs+="-T ps_6_6 "
+psargs+="-T ps_6_2 "
 
 csargs=${args}
-csargs+="-T cs_6_6 "
+csargs+="-T cs_6_2 "
 
 #echo ${vsargs}
 #echo ${psargs}
diff --git a/hlslparser/src/HLSLGenerator.cpp b/hlslparser/src/HLSLGenerator.cpp
index 112b7e9d..065705e9 100644
--- a/hlslparser/src/HLSLGenerator.cpp
+++ b/hlslparser/src/HLSLGenerator.cpp
@@ -704,10 +704,25 @@ void HLSLGenerator::OutputStatements(int indent, HLSLStatement* statement)
             const char* functionName   = function->name;
             const char* returnTypeName = GetTypeName(function->returnType);
 
-            // TODO: hack, since don't actually parse bracket construct yet
             bool isEntryPoint = String_Equal(functionName, m_entryName);
-            if (isEntryPoint && m_target == HLSLTarget_ComputeShader)
-                m_writer.WriteLine(indent, "[numthreads(1,1,1)]");
+            if (isEntryPoint)
+            {
+                // This is a SM6.x construct for tagging entry points
+                switch(m_target)
+                {
+                    case HLSLTarget_VertexShader:
+                        m_writer.WriteLine(indent, "[shader(\"vertex\")] ");
+                        break;
+                    case HLSLTarget_PixelShader:
+                        m_writer.WriteLine(indent, "[shader(\"pixel\")] ");
+                        break;
+                    case HLSLTarget_ComputeShader:
+                        m_writer.WriteLine(indent, "[shader(\"compute\")] ");
+                        // TODO: hack, since don't actually parse bracket construct yet
+                        m_writer.WriteLine(indent, "[numthreads(1,1,1)]");
+                        break;
+                }
+            }
             
             m_writer.BeginLine(indent, function->fileName, function->line);
             m_writer.Write("%s %s(", returnTypeName, functionName);
diff --git a/hlslparser/src/HLSLParser.cpp b/hlslparser/src/HLSLParser.cpp
index ae4aa74e..ed245d3f 100644
--- a/hlslparser/src/HLSLParser.cpp
+++ b/hlslparser/src/HLSLParser.cpp
@@ -1289,13 +1289,12 @@ bool InitIntrinsics()
     // AddTextureIntrinsic( "SampleGrad", HLSLBaseType_Float4, HLSLBaseType_Texture2D, HLSLBaseType_Float, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2);
     
     // These constructs are not declaring the lod or offset param which have default
-    AddTextureLoadIntrinsic("Load", HLSLBaseType_Float4, HLSLBaseType_Texture2D, HLSLBaseType_Int2);
-    AddTextureLoadIntrinsic("Load", HLSLBaseType_Float4, HLSLBaseType_Texture2D, HLSLBaseType_Int2);
-    AddTextureLoadIntrinsic("Load", HLSLBaseType_Float4, HLSLBaseType_Texture2D, HLSLBaseType_Int2);
-    AddTextureLoadIntrinsic("Load", HLSLBaseType_Float4, HLSLBaseType_Texture2D, HLSLBaseType_Int2);
-    AddTextureLoadIntrinsic("Load", HLSLBaseType_Float4, HLSLBaseType_Texture2D, HLSLBaseType_Int2);
-    AddTextureLoadIntrinsic("Load", HLSLBaseType_Float4, HLSLBaseType_Texture2D, HLSLBaseType_Int2);
-    AddTextureLoadIntrinsic("Load", HLSLBaseType_Float4, HLSLBaseType_Texture2D, HLSLBaseType_Int2);
+    AddTextureLoadIntrinsic("Load", HLSLBaseType_Float4, HLSLBaseType_Texture2D, HLSLBaseType_Int2); // TODO: needs lod
+    AddTextureLoadIntrinsic("Load", HLSLBaseType_Float4, HLSLBaseType_Texture3D, HLSLBaseType_Int3); // TODO: need lod
+    AddTextureLoadIntrinsic("Load", HLSLBaseType_Float4, HLSLBaseType_Texture2DArray, HLSLBaseType_Int2); // TODO: needs array, lod
+    //    AddTextureLoadIntrinsic("Load", HLSLBaseType_Float4, HLSLBaseType_TextureCube, HLSLBaseType_Int2); // TODO: needs face, lod
+    //    AddTextureLoadIntrinsic("Load", HLSLBaseType_Float4, HLSLBaseType_TextureCubeArray, HLSLBaseType_Int2); // TODO: needs face, lod, array
+    AddTextureLoadIntrinsic("Load", HLSLBaseType_Float4, HLSLBaseType_Texture2DMS, HLSLBaseType_Int2); // TODO: needs sampleIndex
     
     // TODO: aren't these uint instead of int?
     TEXTURE_INTRINSIC_FUNCTION("GetDimensions", HLSLBaseType_Texture2D, HLSLBaseType_Int2);

From a29813cd070c38ec113f3f5c8ce7efb0cd432fc7 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 19 Mar 2023 15:54:38 -0700
Subject: [PATCH 496/901] kram - hlslparser update

ninja build, but not yet ready
---
 hlslparser/build.ninja | 74 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 74 insertions(+)
 create mode 100755 hlslparser/build.ninja

diff --git a/hlslparser/build.ninja b/hlslparser/build.ninja
new file mode 100755
index 00000000..390c6f98
--- /dev/null
+++ b/hlslparser/build.ninja
@@ -0,0 +1,74 @@
+
+
+vulkanSDK=${HOME}/devref/vulkansdk/1.3.239.0/macOS/bin/
+projectDir=${HOME}/devref/kram/hlslparser/
+appBuildDir =./build/hlslparser/Build/Products/Release/
+
+hlslparser = ${appBuildDir}hlslparser
+
+#------
+
+metalCompile = xcrun -sdk macosx metal
+metalLib = xcrun -sdk macosx metallib
+
+# src files
+srcDir = ${projectDir}shaders/
+
+# headers and parser gen shaders
+intDir = ${projectDir}outshaders/
+
+# compiled shader per platform
+dstDir = ${projectDir}out
+
+
+flagsParser = -g
+flagsMSL = -g -frecord-sources -std=macos-metal2.3
+
+# for iOS
+# -std=ios-metal2.3
+    
+
+# hlslparser msl codegen
+rule genMSL
+  command = $hlslparser $flagsParser -i ${srcDir}$in -o ${intDir}$out
+
+# compiler to .air
+rule compileMSL
+  command = $metalCompile $flagsMSL -c $in -o $out
+
+# linker to metallib
+rule linkMSL
+  command = $metalLib $in -o $out
+
+# need correct dir
+# gen air
+build Skinning.metal: genMSL Skinning.hlsl
+build Sample.metal: genMSL Sample.hlsl
+build Compute.metal: genMSL Compute.hlsl
+
+build Skinning.air: compileMSL Skinning.metal
+build Sample.air: compileMSL Sample.metal
+build Compute.air: compileMSL Compute.metal
+
+# can ninja take file list?
+build Game.metallib: linkMSL Skinning.air Sample.air Compute.air
+
+#-------
+
+dxc = ${vulkanSDK}dxc
+
+flagsDXC = -nologo -Zi -Zpc -enable16-bit-types -HV 2021 -fspv-extension=SPV_KHR_shader_draw_parameters
+
+# hlslparser hlsl/msl codegen
+rule genHLSL
+  command = $hlslparser $flagsParser -i ${srcDir}$in -o ${intDir}$out
+
+# compiler to dxil or spriv
+# TODO: fix src/dstDir
+rule compileHLSL
+  command = $dxc $flagsDXC -i $in -o $out
+
+#build Skinning.hlsl: genHLSL Skinning.hlsl
+#build Sample.hlsl: genHLSL Sample.hlsl
+#build Compute.hlsl: genHLSL Compute.hlsl
+

From 495e418542c2bbe091adaf3058976e49c1863bca Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 19 Mar 2023 15:57:14 -0700
Subject: [PATCH 497/901] kram - update hlslparser

more ninja tweaks - it doesn't like var splicing, so prob need to move to cmake to gen
---
 hlslparser/build.ninja | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hlslparser/build.ninja b/hlslparser/build.ninja
index 390c6f98..35855dd3 100755
--- a/hlslparser/build.ninja
+++ b/hlslparser/build.ninja
@@ -34,11 +34,11 @@ rule genMSL
 
 # compiler to .air
 rule compileMSL
-  command = $metalCompile $flagsMSL -c $in -o $out
+  command = $metalCompile $flagsMSL -c ${intDir}$in -o ${dstDir}$out
 
 # linker to metallib
 rule linkMSL
-  command = $metalLib $in -o $out
+  command = $metalLib ${outDir}$in -o ${outDir}$out
 
 # need correct dir
 # gen air

From 476cf7eaec8f05164a1f0f90bcfd2b28a59c6ff8 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 19 Mar 2023 17:23:52 -0700
Subject: [PATCH 498/901] kram - hlslparser update

fix build.ninja file.   Builds hlsl with deps, then air, then a single metalib.   Didn't add metallibdsym yet.
---
 hlslparser/build.ninja            |  87 +++++++++++++-----
 hlslparser/buildShaders.sh        |   5 ++
 hlslparser/outshaders/ShaderMSL.h | 144 ++++++++++++++++--------------
 3 files changed, 144 insertions(+), 92 deletions(-)

diff --git a/hlslparser/build.ninja b/hlslparser/build.ninja
index 35855dd3..bd118bb9 100755
--- a/hlslparser/build.ninja
+++ b/hlslparser/build.ninja
@@ -1,7 +1,7 @@
 
 
-vulkanSDK=${HOME}/devref/vulkansdk/1.3.239.0/macOS/bin/
-projectDir=${HOME}/devref/kram/hlslparser/
+vulkanSDK=/Users/Alec/devref/vulkansdk/1.3.239.0/macOS/bin/
+projectDir=/Users/Alec/devref/kram/hlslparser/
 appBuildDir =./build/hlslparser/Build/Products/Release/
 
 hlslparser = ${appBuildDir}hlslparser
@@ -18,57 +18,96 @@ srcDir = ${projectDir}shaders/
 intDir = ${projectDir}outshaders/
 
 # compiled shader per platform
-dstDir = ${projectDir}out
+dstDir = ${projectDir}out/mac/
 
 
 flagsParser = -g
-flagsMSL = -g -frecord-sources -std=macos-metal2.3
+
+# this is setup for .air file and metallibdsym creation
+flagsMSL = -g -frecord-sources=flat -std=macos-metal2.3
 
 # for iOS
 # -std=ios-metal2.3
     
-
 # hlslparser msl codegen
 rule genMSL
-  command = $hlslparser $flagsParser -i ${srcDir}$in -o ${intDir}$out
+    command = $hlslparser $flagsParser -i $in -o $out
 
-# compiler to .air
+# compile to .air
 rule compileMSL
-  command = $metalCompile $flagsMSL -c ${intDir}$in -o ${dstDir}$out
+    command = $metalCompile $flagsMSL -c $in -o $out
 
+# https://developer.apple.com/documentation/metal/shader_libraries/compiling_shader_code_into_a_library_with_metal_s_command-line_tools?language=objc
 # linker to metallib
 rule linkMSL
-  command = $metalLib ${outDir}$in -o ${outDir}$out
+    command = $metalLib -o $out $in
+
+dependsMSL = ${intDir}/ShaderMSL.h
 
 # need correct dir
 # gen air
-build Skinning.metal: genMSL Skinning.hlsl
-build Sample.metal: genMSL Sample.hlsl
-build Compute.metal: genMSL Compute.hlsl
+build ${intDir}Skinning.metal: genMSL ${srcDir}Skinning.hlsl | ${dependsMSL}
+build ${intDir}Sample.metal: genMSL ${srcDir}Sample.hlsl | ${dependsMSL}
+build ${intDir}Compute.metal: genMSL ${srcDir}Compute.hlsl | ${dependsMSL}
 
-build Skinning.air: compileMSL Skinning.metal
-build Sample.air: compileMSL Sample.metal
-build Compute.air: compileMSL Compute.metal
+build ${dstDir}Skinning.air: compileMSL ${intDir}Skinning.metal
+build ${dstDir}Sample.air: compileMSL ${intDir}Sample.metal
+build ${dstDir}Compute.air: compileMSL ${intDir}Compute.metal
 
 # can ninja take file list?
-build Game.metallib: linkMSL Skinning.air Sample.air Compute.air
+build ${dstDir}Game.metallib: linkMSL ${dstDir}Skinning.air ${dstDir}Sample.air ${dstDir}Compute.air
 
 #-------
 
 dxc = ${vulkanSDK}dxc
 
-flagsDXC = -nologo -Zi -Zpc -enable16-bit-types -HV 2021 -fspv-extension=SPV_KHR_shader_draw_parameters
+flagsDXC = -nologo -Zi -Zpc -enable-16bit-types -HV 2021 -fspv-extension=SPV_KHR_shader_draw_parameters -spirv -fspv-target-env=vulkan1.2
+
+flagsVS = -T vs_6_2
+flagsPS = -T ps_6_2
+flagsCS = -T cs_6_2
+
+dependsHLSL = ${intDir}/ShaderHLSL.h
+
+dstDirHLSL = ${projectDir}out/android/
 
 # hlslparser hlsl/msl codegen
 rule genHLSL
-  command = $hlslparser $flagsParser -i ${srcDir}$in -o ${intDir}$out
+  command = $hlslparser $flagsParser -i $in -o $out
+
+# this will get shadow replaced
+entryPoint = Foo
 
-# compiler to dxil or spriv
+# compile to dxil or spriv
 # TODO: fix src/dstDir
-rule compileHLSL
-  command = $dxc $flagsDXC -i $in -o $out
+rule compileVS
+  command = $dxc $flagsDXC $flagsVS -E ${entryPoint}VS -Fo $out $in
+rule compilePS
+  command = $dxc $flagsDXC $flagsPS -E ${entryPoint}PS -Fo $out $in
+rule compileCS
+  command = $dxc $flagsDXC $flagsCS -E ${entryPoint}CS -Fo $out $in
+
+# gen hlsl
+build ${intDir}Skinning.hlsl: genHLSL ${srcDir}Skinning.hlsl | ${dependsHLSL}
+build ${intDir}Sample.hlsl: genHLSL ${srcDir}Sample.hlsl | ${dependsHLSL}
+build ${intDir}Compute.hlsl: genHLSL ${srcDir}Compute.hlsl | ${dependsHLSL}
+
+# compile hlsl
+build ${dstDirHLSL}Skinning.vert.spv: compileVS ${intDir}Skinning.hlsl
+    entryPoint = Skinning
+    
+build ${dstDirHLSL}Skinning.frag.spv: compilePS ${intDir}Skinning.hlsl
+    entryPoint = Skinning
+        
+build ${dstDirHLSL}Sample.vert.spv: compileVS ${intDir}Sample.hlsl
+    entryPoint = Sample
+    
+build ${dstDirHLSL}Sample.frag.spv: compilePS ${intDir}Sample.hlsl
+    entryPoint = Sample
+    
+# could zip up files
+        
+build ${dstDirHLSL}Compute.comp.spv: compileCS ${intDir}Compute.hlsl
+    entryPoint = Compute
 
-#build Skinning.hlsl: genHLSL Skinning.hlsl
-#build Sample.hlsl: genHLSL Sample.hlsl
-#build Compute.hlsl: genHLSL Compute.hlsl
 
diff --git a/hlslparser/buildShaders.sh b/hlslparser/buildShaders.sh
index 5b163224..38ee7aa4 100755
--- a/hlslparser/buildShaders.sh
+++ b/hlslparser/buildShaders.sh
@@ -209,11 +209,16 @@ if [[ $testMetal -eq 1 ]]; then
     ${appSpirvCross} --msl --msl-version 20300 --msl-ios android/Sample.frag.spv --output ios/Sample.frag.metal
     ${appSpirvCross} --msl --msl-version 20300 --msl-ios android/Compute.comp.spv --output ios/Compute.comp.metal
 
+    # compile to make sure code is valid
+    # TODO: compile to air, and use metal-ar to link to one metalib and metalibdsym
+    # need cmake file to run all the steps by writing out ninja file
+    # or need makefile since ninja can't string manip
     ${appMetaliOS} ${dstDirOut}ios/Skinning.vert.metal ${metaliOSOptions} -o ios/Skinning.vert.metallib
     ${appMetaliOS} ${dstDirOut}ios/Skinning.frag.metal ${metaliOSOptions} -o ios/Skinning.frag.metallib
     ${appMetaliOS} ${dstDirOut}ios/Sample.vert.metal ${metaliOSOptions} -o ios/Sample.vert.metallib
     ${appMetaliOS} ${dstDirOut}ios/Sample.frag.metal ${metaliOSOptions} -o ios/Sample.frag.metallib
     ${appMetaliOS} ${dstDirOut}ios/Compute.comp.metal ${metaliOSOptions} -o ios/Compute.comp.metallib
+    
 fi
 
 # skip this path, have to mod hlsl just to get valid code to compile with glslc
diff --git a/hlslparser/outshaders/ShaderMSL.h b/hlslparser/outshaders/ShaderMSL.h
index 8125d17c..1bdf2bd6 100644
--- a/hlslparser/outshaders/ShaderMSL.h
+++ b/hlslparser/outshaders/ShaderMSL.h
@@ -1,3 +1,8 @@
+#ifndef ShaderMSL_h
+#define ShaderMSL_h
+
+// For some reason the air link thinks all symbols in this header are duplicates
+// unles I mark them all as inline.  So do that for now.
 
 // glslc doesn't support, but this header is metal only
 #pragma once
@@ -59,72 +64,75 @@ using namespace simd;
 
 // #define mad precise::fma"
     
-float mad(float a, float b, float c) {
+
+inline float mad(float a, float b, float c) {
     return a * b + c;
 }
-float2 mad(float2 a, float2 b, float2 c) {
+inline float2 mad(float2 a, float2 b, float2 c) {
     return a * b + c;
 }
-float3 mad(float3 a, float3 b, float3 c) {
+inline float3 mad(float3 a, float3 b, float3 c) {
     return a * b + c;
 }
-float4 mad(float4 a, float4 b, float4 c) {
+inline float4 mad(float4 a, float4 b, float4 c) {
     return a * b + c;
 }
 
+
 // DirectX couldn't simply use operator * in all these years
 // so have to use a function call mul.
 
 // Might be easier to use * instead
-float2x2 mul(float a, float2x2 m) { return a * m; }
-float3x3 mul(float a, float3x3 m) { return a * m; }
-float4x4 mul(float a, float4x4 m) { return a * m; }
+inline float2x2 mul(float a, float2x2 m) { return a * m; }
+inline float3x3 mul(float a, float3x3 m) { return a * m; }
+inline float4x4 mul(float a, float4x4 m) { return a * m; }
 
-float2x2 mul(float2x2 m, float a) { return a * m; }
-float3x3 mul(float3x3 m, float a) { return a * m; }
-float4x4 mul(float4x4 m, float a) { return a * m; }
+inline float2x2 mul(float2x2 m, float a) { return a * m; }
+inline float3x3 mul(float3x3 m, float a) { return a * m; }
+inline float4x4 mul(float4x4 m, float a) { return a * m; }
 
-float2 mul(float2 a, float2x2 m) { return a * m; }
-float3 mul(float3 a, float3x3 m) { return a * m; }
-float4 mul(float4 a, float4x4 m) { return a * m; }
+inline float2 mul(float2 a, float2x2 m) { return a * m; }
+inline float3 mul(float3 a, float3x3 m) { return a * m; }
+inline float4 mul(float4 a, float4x4 m) { return a * m; }
 
-float2 mul(float2x2 m, float2 a) { return m * a; }
-float3 mul(float3x3 m, float3 a) { return m * a; }
-float4 mul(float4x4 m, float4 a) { return m * a; }
+inline float2 mul(float2x2 m, float2 a) { return m * a; }
+inline float3 mul(float3x3 m, float3 a) { return m * a; }
+inline float4 mul(float4x4 m, float4 a) { return m * a; }
 
 //float3 mul(float4 a, float3x4 m) { return a * m; } // why no macro ?
 //float2 mul(float4 a, float2x4 m) { return a * m; }
 
 #if USE_HALF
 
-half mad(half a, half b, half c) {
+inline half mad(half a, half b, half c) {
     return a * b + c;
 }
-half2 mad(half2 a, half2 b, half2 c) {
+inline half2 mad(half2 a, half2 b, half2 c) {
     return a * b + c;
 }
-half3 mad(half3 a, half3 b, half3 c) {
+inline half3 mad(half3 a, half3 b, half3 c) {
     return a * b + c;
 }
-half4 mad(half4 a, half4 b, half4 c) {
+inline half4 mad(half4 a, half4 b, half4 c) {
     return a * b + c;
 }
 
-half2x2 mul(half a, half2x2 m) { return a * m; }
-half3x3 mul(half a, half3x3 m) { return a * m; }
-half4x4 mul(half a, half4x4 m) { return a * m; }
 
-half2x2 mul(half2x2 m, half a) { return a * m; }
-half3x3 mul(half3x3 m, half a) { return a * m; }
-half4x4 mul(half4x4 m, half a) { return a * m; }
+inline half2x2 mul(half a, half2x2 m) { return a * m; }
+inline half3x3 mul(half a, half3x3 m) { return a * m; }
+inline half4x4 mul(half a, half4x4 m) { return a * m; }
 
-half2 mul(half2 a, half2x2 m) { return a * m; }
-half3 mul(half3 a, half3x3 m) { return a * m; }
-half4 mul(half4 a, half4x4 m) { return a * m; }
+inline half2x2 mul(half2x2 m, half a) { return a * m; }
+inline half3x3 mul(half3x3 m, half a) { return a * m; }
+inline half4x4 mul(half4x4 m, half a) { return a * m; }
 
-half2 mul(half2x2 m, half2 a) { return m * a; }
-half3 mul(half3x3 m, half3 a) { return m * a; }
-half4 mul(half4x4 m, half4 a) { return m * a; }
+inline half2 mul(half2 a, half2x2 m) { return a * m; }
+inline half3 mul(half3 a, half3x3 m) { return a * m; }
+inline half4 mul(half4 a, half4x4 m) { return a * m; }
+
+inline half2 mul(half2x2 m, half2 a) { return m * a; }
+inline half3 mul(half3x3 m, half3 a) { return m * a; }
+inline half4 mul(half4x4 m, half4 a) { return m * a; }
 
 #endif
 
@@ -158,25 +166,25 @@ half4 mul(half4x4 m, half4 a) { return m * a; }
 //---------
 
 // gather only works on mip0
-float4 GatherRed(texture2d<float> t, sampler s, float2 texCoord, int2 offset=0) {
+inline float4 GatherRed(texture2d<float> t, sampler s, float2 texCoord, int2 offset=0) {
     return t.gather(s, texCoord, offset, component::x);
 }
   
-float4 GatherGreen(texture2d<float> t, sampler s, float2 texCoord,  int2 offset=0) {
+inline float4 GatherGreen(texture2d<float> t, sampler s, float2 texCoord,  int2 offset=0) {
     return t.gather(s, texCoord, offset, component::y);
 }
 
-float4 GatherBlue(texture2d<float> t, sampler s, float2 texCoord,  int2 offset=0) {
+inline float4 GatherBlue(texture2d<float> t, sampler s, float2 texCoord,  int2 offset=0) {
     return t.gather(s, texCoord, offset, component::z);
 }
 
-float4 GatherAlpha(texture2d<float> t, sampler s, float2 texCoord, int2 offset=0) {
+inline float4 GatherAlpha(texture2d<float> t, sampler s, float2 texCoord, int2 offset=0) {
     return t.gather(s, texCoord, offset, component::w);
 }
 
 //---------
 
-float4 SampleGrad(texture2d<float> t, sampler s, float2 texCoord, float2 gradx, float2 grady) {
+inline float4 SampleGrad(texture2d<float> t, sampler s, float2 texCoord, float2 gradx, float2 grady) {
    return t.sample(s, texCoord.xy, gradient2d(gradx, grady));
 }
 
@@ -184,15 +192,15 @@ float4 SampleGrad(texture2d<float> t, sampler s, float2 texCoord, float2 gradx,
 
 #if USE_HALF
 
-half4 SampleH(texture2d<half> t, sampler s, float2 texCoord) {
+inline half4 SampleH(texture2d<half> t, sampler s, float2 texCoord) {
     return t.sample(s, texCoord);
 }
 
-half4 SampleLevelH(texture2d<half> t, sampler s, float4 texCoordMip) {
+inline half4 SampleLevelH(texture2d<half> t, sampler s, float4 texCoordMip) {
     return t.sample(s, texCoordMip.xy, level(texCoordMip.w));
 }
 
-half4 SampleBiasH(texture2d<half> t, sampler s, float4 texCoordBias) {
+inline half4 SampleBiasH(texture2d<half> t, sampler s, float4 texCoordBias) {
     return t.sample(s, texCoordBias.xy, bias(texCoordBias.w));
 }
 
@@ -206,68 +214,68 @@ half4 SampleBiasH(texture2d<half> t, sampler s, float4 texCoordBias) {
 
 
-float4 SampleLevel(texture2d<float> t, sampler s, float4 texCoordMip) {
+inline float4 SampleLevel(texture2d<float> t, sampler s, float4 texCoordMip) {
     return t.sample(s, texCoordMip.xy, level(texCoordMip.w));
 }
 
-float4 SampleLevel(texturecube<float> t, sampler s, float4 texCoordMip) {
+inline float4 SampleLevel(texturecube<float> t, sampler s, float4 texCoordMip) {
     return t.sample(s, texCoordMip.xyz, level(texCoordMip.w));
 }
 
-float4 SampleLevel(texture3d<float> t, sampler s, float4 texCoordMip) {
+inline float4 SampleLevel(texture3d<float> t, sampler s, float4 texCoordMip) {
     return t.sample(s, texCoordMip.xyz, level(texCoordMip.w));
 }
 
 // TODO: may need to add to intrinsics
-//float4 SampleLevel(texture2d_array<float> t, sampler s, float4 texCoordMip) {
+//inline float4 SampleLevel(texture2d_array<float> t, sampler s, float4 texCoordMip) {
 //    return t.sample(s, texCoordMip.xyz, level(texCoordMip.w));
 //}
-//float4 SampleLevel(texturecube_array<float> t, sampler s, float4 texCoordMip) {
+//inline float4 SampleLevel(texturecube_array<float> t, sampler s, float4 texCoordMip) {
 //    return t.sample(s, texCoordMip.xyz, level(texCoordMip.w));
 //}
 
 // ----
 
-float4 SampleBias(texturecube<float> t, sampler s, float4 texCoordBias) {
+inline float4 SampleBias(texturecube<float> t, sampler s, float4 texCoordBias) {
     return t.sample(s, texCoordBias.xyz, bias(texCoordBias.w));
 }
    
-float4 SampleBias(texture2d<float> t, sampler s, float4 texCoordBias) {
+inline float4 SampleBias(texture2d<float> t, sampler s, float4 texCoordBias) {
     return t.sample(s, texCoordBias.xy, bias(texCoordBias.w));
 }
 
 //------
 
 // see if some of these have offset
-float4 Load(texture2d<float> t, int2 texCoord, int lod = 0)
+inline float4 Load(texture2d<float> t, int2 texCoord, int lod = 0)
 {
     return t.read((uint2)texCoord, (uint)lod);
 }
 
-float4 Load(texture3d<float> t, int3 texCoord, int lod = 0)
+inline float4 Load(texture3d<float> t, int3 texCoord, int lod = 0)
 {
     return t.read((uint3)texCoord, (uint)lod);
 }
 
-float4 Load(texture2d_array<float> t, int3 texCoord, int lod = 0)
+inline float4 Load(texture2d_array<float> t, int3 texCoord, int lod = 0)
 {
     return t.read((uint2)texCoord.xy, (uint)texCoord.z, (uint)lod);
 }
 
 // no HLSL equivalent, so don't define for MSL.  Maybe it's just offset that doesn't.
-//float4 Load(texturecube<float> t, int3 texCoord, int lod = 0)
+//inline float4 Load(texturecube<float> t, int3 texCoord, int lod = 0)
 //{
 //    uv, face, lod, offset
 //    return t.read((uint2)texCoord.xy, (uint)texCoord.z, (uint2)lod);
 //}
 //
-//float4 Load(texturecube_array<float> t, int4 texCoord, int lod = 0)
+//inline float4 Load(texturecube_array<float> t, int4 texCoord, int lod = 0)
 //{
 //    return t.read((uint2)texCoord.xy, (uint)texCoord.z, (uint)texcoord.w, (uint)lod);
 //}
 
 // this doesn't use SamplerState, raw load
-float4 Load(texture2d_ms<float> t, int2 texCoord, int sample) {
+inline float4 Load(texture2d_ms<float> t, int2 texCoord, int sample) {
     return t.read((uint2)texCoord, (uint)sample);
 }
 
@@ -275,24 +283,24 @@ float4 Load(texture2d_ms<float> t, int2 texCoord, int sample) {
 
 // ----
 
-float4 Sample(texture2d_array<float> t, sampler s, float3 texCoord, int2 offset=0) {
+inline float4 Sample(texture2d_array<float> t, sampler s, float3 texCoord, int2 offset=0) {
     return t.sample(s, texCoord.xy, uint(texCoord.z), offset);
 }
-float4 Sample(texture2d<float> t, sampler s, float2 texCoord, int2 offset=0) {
+inline float4 Sample(texture2d<float> t, sampler s, float2 texCoord, int2 offset=0) {
     return t.sample(s, texCoord, offset);
 }
-float4 Sample(texture3d<float> t, sampler s, float3 texCoord, int3 offset=0) {
+inline float4 Sample(texture3d<float> t, sampler s, float3 texCoord, int3 offset=0) {
     return t.sample(s, texCoord, offset);
 }
-float4 Sample(texturecube<float> t, sampler s, float3 texCoord) {
+inline float4 Sample(texturecube<float> t, sampler s, float3 texCoord) {
     return t.sample(s, texCoord);
 }
-float4 Sample(texturecube_array<float> t, sampler s, float4 texCoord) {
+inline float4 Sample(texturecube_array<float> t, sampler s, float4 texCoord) {
     return t.sample(s, texCoord.xyz, uint(texCoord.w));
 }
 //----------
 
-float4 Sample(depth2d<float> t, sampler s, float2 texCoord, int2 offset = 0)
+inline float4 Sample(depth2d<float> t, sampler s, float2 texCoord, int2 offset = 0)
 {
     return t.sample(s, texCoord.xy, offset);
 }
@@ -303,12 +311,12 @@ float4 Sample(depth2d<float> t, sampler s, float2 texCoord, int2 offset = 0)
 
 
 // For persp shadows, remember to divide z = z/w before calling, or w = z/w on cube
-float SampleCmp(depth2d<float> t, sampler s, float4 texCompareCoord, int2 offset = 0)
+inline float SampleCmp(depth2d<float> t, sampler s, float4 texCompareCoord, int2 offset = 0)
 {
     return t.sample_compare(s, texCompareCoord.xy, texCompareCoord.z, offset);
 }
 
-float4 GatherCmp(depth2d<float> t, sampler s, float4 texCompareCoord, int2 offset = 0)
+inline float4 GatherCmp(depth2d<float> t, sampler s, float4 texCompareCoord, int2 offset = 0)
 {
     return t.gather_compare(s, texCompareCoord.xy, texCompareCoord.z, offset);
 }
@@ -319,37 +327,37 @@ float4 GatherCmp(depth2d<float> t, sampler s, float4 texCompareCoord, int2 offse
 // get_num_mip_levels, get_array_size
 // get_width/height/depth(lod)
 // TODO: need half versions
-int2 GetDimensions(texture2d<float> t)
+inline int2 GetDimensions(texture2d<float> t)
 {
     int2 size(t.get_width(), t.get_height());
     return size;
 }
 
-int3 GetDimensions(texture3d<float> t)
+inline int3 GetDimensions(texture3d<float> t)
 {
     int3 size(t.get_width(), t.get_height(), t.get_depth());
     return size;
 }
 
-int2 GetDimensions(texturecube<float> t)
+inline int2 GetDimensions(texturecube<float> t)
 {
     int2 size(t.get_width(), t.get_width());
     return size;
 }
 
-int3 GetDimensions(texturecube_array<float> t)
+inline int3 GetDimensions(texturecube_array<float> t)
 {
     int3 size(t.get_width(), t.get_width(), t.get_array_size());
     return size;
 }
 
-int3 GetDimensions(texture2d_array<float> t)
+inline int3 GetDimensions(texture2d_array<float> t)
 {
     int3 size(t.get_width(), t.get_height(), t.get_array_size());
     return size;
 }
 
-int2 GetDimensions(texture2d_ms<float> t)
+inline int2 GetDimensions(texture2d_ms<float> t)
 {
     int2 size(t.get_width(), t.get_height());
     return size;
@@ -407,5 +415,5 @@ int2 GetDimensions(texture2d_ms<float> t)
 // MSL 2.3 has function pointers
 // MSL 2.4 has compute recursion
 
-    
+#endif
 

From ea74a57c437ddd3c48ef5a3b72b344485be6a981 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 19 Mar 2023 19:25:10 -0700
Subject: [PATCH 499/901] kram - hlslparser update

Use brew ninja build script for now.   It's fast.  Don't know why Xcode uses it's own sanitized path, instead of my own with brew and vulkan sdk in it.  Turn on -line to jump back to errors in original source files from IDE.  Turn on warnings as errors, just to keep the code clean.   This makes output hard to read, so recommend disabling it when developing the conversions.  VS supports same gcc/clang error/warning report, so use that.
---
 hlslparser/build.ninja                        | 58 ++++++++++++++-----
 hlslparser/shaders/Sample.hlsl                |  8 +--
 hlslparser/src/HLSLTokenizer.cpp              |  9 +--
 .../testshaders.xcodeproj/project.pbxproj     |  4 +-
 4 files changed, 55 insertions(+), 24 deletions(-)

diff --git a/hlslparser/build.ninja b/hlslparser/build.ninja
index bd118bb9..17d6e513 100755
--- a/hlslparser/build.ninja
+++ b/hlslparser/build.ninja
@@ -10,6 +10,7 @@ hlslparser = ${appBuildDir}hlslparser
 
 metalCompile = xcrun -sdk macosx metal
 metalLib = xcrun -sdk macosx metallib
+metalLibStrip = xcrun -sdk macosx metal-dsymutil
 
 # src files
 srcDir = ${projectDir}shaders/
@@ -20,11 +21,21 @@ intDir = ${projectDir}outshaders/
 # compiled shader per platform
 dstDir = ${projectDir}out/mac/
 
+# turn on file/line directives to jump back to original sources
+# and turn on comments in generated sources
+flagsParser = -g -line
 
-flagsParser = -g
+# this makes metallib 3x bigger, but not optimized
+#flagsMSLDebug = -g
 
-# this is setup for .air file and metallibdsym creation
-flagsMSL = -g -frecord-sources=flat -std=macos-metal2.3
+# this allows Xcode to load the sym file
+flagsMSLDebug = -frecord-sources
+
+# target MSL2.3 for now on macOS
+flagsMSL = -std=macos-metal2.3 -Werror
+
+# this seems to gen weird warnings
+#
 
 # for iOS
 # -std=ios-metal2.3
@@ -35,16 +46,19 @@ rule genMSL
 
 # compile to .air
 rule compileMSL
-    command = $metalCompile $flagsMSL -c $in -o $out
+    command = $metalCompile $flagsMSLDebug $flagsMSL -c $in -o $out
 
 # https://developer.apple.com/documentation/metal/shader_libraries/compiling_shader_code_into_a_library_with_metal_s_command-line_tools?language=objc
 # linker to metallib
 rule linkMSL
     command = $metalLib -o $out $in
 
+# strip metallib and gen metlalibsym
+rule stripMSL
+    command = $metalLibStrip -flat -remove-source $in
+
 dependsMSL = ${intDir}/ShaderMSL.h
 
-# need correct dir
 # gen air
 build ${intDir}Skinning.metal: genMSL ${srcDir}Skinning.hlsl | ${dependsMSL}
 build ${intDir}Sample.metal: genMSL ${srcDir}Sample.hlsl | ${dependsMSL}
@@ -54,14 +68,27 @@ build ${dstDir}Skinning.air: compileMSL ${intDir}Skinning.metal
 build ${dstDir}Sample.air: compileMSL ${intDir}Sample.metal
 build ${dstDir}Compute.air: compileMSL ${intDir}Compute.metal
 
-# can ninja take file list?
-build ${dstDir}Game.metallib: linkMSL ${dstDir}Skinning.air ${dstDir}Sample.air ${dstDir}Compute.air
+# gen metallib (87K)
+build ${dstDir}GameShaders.metallib: linkMSL ${dstDir}Skinning.air ${dstDir}Sample.air ${dstDir}Compute.air
+
+# this is getting run every time since it strips the metallib, how to prevent that
+# also can compile all metal files into both metallib and dsycm
+# this also doesn't make metallib any smaller, but is supposed to strip it
+# goes from 299K down to 273K, but seems like it should go smaller
+# with no debug flags, 87K and sym is 80K w/o debug, prob should skip gen of sym file
+# if not in debug
+
+# gen metallibsym
+build ${dstDir}GameShaders.metallibsym: stripMSL ${dstDir}GameShaders.metallib
 
 #-------
 
 dxc = ${vulkanSDK}dxc
 
-flagsDXC = -nologo -Zi -Zpc -enable-16bit-types -HV 2021 -fspv-extension=SPV_KHR_shader_draw_parameters -spirv -fspv-target-env=vulkan1.2
+# flagsDXDebug = -Zi
+flagsDXDebug =
+
+flagsDXC = -nologo -Zpc -enable-16bit-types -HV 2021 -Werror -fspv-extension=SPV_KHR_shader_draw_parameters -spirv -fspv-target-env=vulkan1.2
 
 flagsVS = -T vs_6_2
 flagsPS = -T ps_6_2
@@ -78,8 +105,7 @@ rule genHLSL
 # this will get shadow replaced
 entryPoint = Foo
 
-# compile to dxil or spriv
-# TODO: fix src/dstDir
+# compile to spirv
 rule compileVS
   command = $dxc $flagsDXC $flagsVS -E ${entryPoint}VS -Fo $out $in
 rule compilePS
@@ -87,12 +113,15 @@ rule compilePS
 rule compileCS
   command = $dxc $flagsDXC $flagsCS -E ${entryPoint}CS -Fo $out $in
 
+rule archiveSpriv
+  command = zip -r $out $in
+
 # gen hlsl
 build ${intDir}Skinning.hlsl: genHLSL ${srcDir}Skinning.hlsl | ${dependsHLSL}
 build ${intDir}Sample.hlsl: genHLSL ${srcDir}Sample.hlsl | ${dependsHLSL}
 build ${intDir}Compute.hlsl: genHLSL ${srcDir}Compute.hlsl | ${dependsHLSL}
 
-# compile hlsl
+# gen spv
 build ${dstDirHLSL}Skinning.vert.spv: compileVS ${intDir}Skinning.hlsl
     entryPoint = Skinning
     
@@ -105,9 +134,12 @@ build ${dstDirHLSL}Sample.vert.spv: compileVS ${intDir}Sample.hlsl
 build ${dstDirHLSL}Sample.frag.spv: compilePS ${intDir}Sample.hlsl
     entryPoint = Sample
     
-# could zip up files
-        
 build ${dstDirHLSL}Compute.comp.spv: compileCS ${intDir}Compute.hlsl
     entryPoint = Compute
 
+# TODO: use strip command to gen pdb for each file, may only apply to DXIL
+
+# TODO: could zip spv to single archive (6k)
+build ${dstDirHLSL}GameShaders.zip: archiveSpriv ${dstDirHLSL}Skinning.vert.spv ${dstDirHLSL}Skinning.frag.spv ${dstDirHLSL}Sample.vert.spv ${dstDirHLSL}Sample.frag.spv ${dstDirHLSL}Compute.comp.spv
+
 
diff --git a/hlslparser/shaders/Sample.hlsl b/hlslparser/shaders/Sample.hlsl
index ea6ba57f..046091e6 100644
--- a/hlslparser/shaders/Sample.hlsl
+++ b/hlslparser/shaders/Sample.hlsl
@@ -107,10 +107,10 @@ half4 CalcLightingColor(float3 vLightPos, float3 vLightDir, half4 vLightColor, f
     half fDistFalloff = (half)saturate((vFalloffs.x - fDist) / vFalloffs.y);
 
     // Normalize from here on.
-    half3 vLightToPixelNormalized = normalize(vLightToPixelUnNormalized);
+    half3 vLightToPixelNormalized = (half3)normalize(vLightToPixelUnNormalized);
 
     // Angle falloff = 0 at vFalloffs.z, 1 at vFalloffs.z - vFalloffs.w
-    half3 lightDir = normalize(vLightDir);
+    half3 lightDir = (half3)normalize(vLightDir);
     half fCosAngle = dot(vLightToPixelNormalized, lightDir);
     half fAngleFalloff = saturate((fCosAngle - (half)vFalloffs.z) / (half)vFalloffs.w);
 
@@ -179,13 +179,13 @@ OutputVS SampleVS(InputVS input)
 float4 SamplePS(InputPS input) : SV_Target0
 {
     half4 diffuseColor = SampleH(diffuseMap, sampleWrap, input.uv);
-    half3 pixelNormal = CalcPerPixelNormal(input.uv, input.normal, input.tangent);
+    half3 pixelNormal = CalcPerPixelNormal(input.uv, (half3)input.normal, (half3)input.tangent);
     half4 totalLight = (half4)scene.ambientColor;
 
     for (int i = 0; i < NUM_LIGHTS; i++)
     {
         LightState light = scene.lights[i];
-        half4 lightPass = CalcLightingColor(light.position, light.direction, light.color, light.falloff, input.worldpos.xyz, pixelNormal);
+        half4 lightPass = CalcLightingColor(light.position, light.direction, (half4)light.color, light.falloff, input.worldpos.xyz, pixelNormal);
         
         // only single light shadow map
         if (scene.sampleShadowMap && i == 0)
diff --git a/hlslparser/src/HLSLTokenizer.cpp b/hlslparser/src/HLSLTokenizer.cpp
index 1837394e..a332400f 100644
--- a/hlslparser/src/HLSLTokenizer.cpp
+++ b/hlslparser/src/HLSLTokenizer.cpp
@@ -651,13 +651,10 @@ void HLSLTokenizer::Error(const char* format, ...)
 
     // can log error/warning/info messages
     bool isError = true;
-#if _MSC_VER
-    // VS convention
-    Log_Error("%s(%d): %s: %s\n", m_fileName, m_lineNumber,  isError ? "error" : "warning", buffer);
-#else
-    // Xcode convention (must be absolute filename for clickthrough)
+
+    // Gcc/lcang convention (must be absolute filename for clickthrough)
+    // Visual Stuidio can pick up on this formatting too
     Log_Error("%s:%d: %s: %s\n", m_fileName, m_lineNumber, isError ? "error" : "warning", buffer);
-#endif
 } 
 
 void HLSLTokenizer::GetTokenName(char buffer[s_maxIdentifier]) const
diff --git a/hlslparser/testshaders.xcodeproj/project.pbxproj b/hlslparser/testshaders.xcodeproj/project.pbxproj
index f58f621b..cc7a2822 100644
--- a/hlslparser/testshaders.xcodeproj/project.pbxproj
+++ b/hlslparser/testshaders.xcodeproj/project.pbxproj
@@ -21,12 +21,14 @@
 		70CAA48C29BE6A62004B7E7B /* Compute.hlsl */ = {isa = PBXFileReference; lastKnownFileType = text; path = Compute.hlsl; sourceTree = "<group>"; };
 		70CAA48D29BE6A62004B7E7B /* Compute.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = Compute.metal; sourceTree = "<group>"; };
 		70CAA48E29BEB04C004B7E7B /* DepthTest.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = DepthTest.metal; sourceTree = "<group>"; };
+		70CAA48F29C63A46004B7E7B /* build.ninja */ = {isa = PBXFileReference; lastKnownFileType = text.script.sh; path = build.ninja; sourceTree = "<group>"; };
 /* End PBXFileReference section */
 
 /* Begin PBXGroup section */
 		707D37A529B9787400B08D22 = {
 			isa = PBXGroup;
 			children = (
+				70CAA48F29C63A46004B7E7B /* build.ninja */,
 				707D37DA29B97A0900B08D22 /* buildShaders.sh */,
 				707D37D829B979EB00B08D22 /* shaders */,
 				707D37CA29B9797A00B08D22 /* outshaders */,
@@ -77,7 +79,7 @@
 			buildConfigurationList = 70CAA48629BAE9F5004B7E7B /* Build configuration list for PBXLegacyTarget "testshaders" */;
 			buildPhases = (
 			);
-			buildToolPath = ./buildShaders.sh;
+			buildToolPath = /opt/homebrew/bin/ninja;
 			buildWorkingDirectory = "";
 			dependencies = (
 			);

From 75202c5ec4c65121656ba166b03b2725c1634789 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 19 Mar 2023 20:24:21 -0700
Subject: [PATCH 500/901] kram - update hlslparser

Bump to SM2.4, since this seems to be first support for separate metallibsym.  Was getting warnings about it being unsupported, but the warnings were far from clear, and only generated periodically.

Move the headers back to shaders directory, since can set -I include path, and jump to original errors with the -line directive.  That's super nice.
---
 hlslparser/outshaders/DepthTest.metal         | 34 -------------------
 .../{outshaders => shaders}/ShaderHLSL.h      |  0
 .../{outshaders => shaders}/ShaderMSL.h       |  0
 hlslparser/shaders/Skinning.hlsl              |  6 +++-
 .../testshaders.xcodeproj/project.pbxproj     | 10 +++---
 5 files changed, 9 insertions(+), 41 deletions(-)
 delete mode 100644 hlslparser/outshaders/DepthTest.metal
 rename hlslparser/{outshaders => shaders}/ShaderHLSL.h (100%)
 rename hlslparser/{outshaders => shaders}/ShaderMSL.h (100%)

diff --git a/hlslparser/outshaders/DepthTest.metal b/hlslparser/outshaders/DepthTest.metal
deleted file mode 100644
index 042a3853..00000000
--- a/hlslparser/outshaders/DepthTest.metal
+++ /dev/null
@@ -1,34 +0,0 @@
-#include <metal_stdlib>
-
-// https://developer.apple.com/forums/thread/726383
-using namespace metal;
-
-struct SamplePSNS {
-    struct InputPS {
-        float4 position [[position]];
-    };
-    
-    thread depth2d<float>& shadowMap;
-    thread sampler& sampleBorder;
-    
-    float4 SamplePS(InputPS input) {
-        return shadowMap.sample_compare(sampleBorder, input.position.xy, input.position.z);
-    };
-
-    
-    SamplePSNS(
-      thread depth2d<float>& shadowMap,
-      thread sampler& sampleBorder)
-       : shadowMap(shadowMap),
-         sampleBorder(sampleBorder)
-    {}
-};
-
-fragment float4 SamplePS(
-    SamplePSNS::InputPS input [[stage_in]],
-    depth2d<float> shadowMap [[texture(0)]],
-    sampler sampleBorder [[sampler(0)]])
-{
-    SamplePSNS shader(shadowMap, sampleBorder);
-    return shader.SamplePS(input);
-}
diff --git a/hlslparser/outshaders/ShaderHLSL.h b/hlslparser/shaders/ShaderHLSL.h
similarity index 100%
rename from hlslparser/outshaders/ShaderHLSL.h
rename to hlslparser/shaders/ShaderHLSL.h
diff --git a/hlslparser/outshaders/ShaderMSL.h b/hlslparser/shaders/ShaderMSL.h
similarity index 100%
rename from hlslparser/outshaders/ShaderMSL.h
rename to hlslparser/shaders/ShaderMSL.h
diff --git a/hlslparser/shaders/Skinning.hlsl b/hlslparser/shaders/Skinning.hlsl
index 75e683ba..fd622ea6 100644
--- a/hlslparser/shaders/Skinning.hlsl
+++ b/hlslparser/shaders/Skinning.hlsl
@@ -97,9 +97,13 @@ struct OutputVS
 };
 
 // try to mondernize to ConstantBuffer
+// Note: SkinTfms makes MSL air shader 60K bigger at 256,
+// so may want to limit large hardcoded arrays.  It's 61K @256, and 7.8K @1.
+// Doesn't seem to affect spriv at 4K.
+// GameShaders.metallib is 33K, and Zip of Spirv is 6K.
 struct UniformsStruct
 {
-    float4x4 skinTfms[256];
+    float4x4 skinTfms[1];
     half3    lightDir;
     float4x4 worldToClipTfm;
 };
diff --git a/hlslparser/testshaders.xcodeproj/project.pbxproj b/hlslparser/testshaders.xcodeproj/project.pbxproj
index cc7a2822..c298dc0b 100644
--- a/hlslparser/testshaders.xcodeproj/project.pbxproj
+++ b/hlslparser/testshaders.xcodeproj/project.pbxproj
@@ -15,13 +15,12 @@
 		70CAA47E29B9BB0E004B7E7B /* Sample.hlsl */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; fileEncoding = 4; path = Sample.hlsl; sourceTree = "<group>"; };
 		70CAA48029BA7D28004B7E7B /* Sample.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = Sample.metal; sourceTree = "<group>"; };
 		70CAA48129BA7D28004B7E7B /* Sample.hlsl */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; fileEncoding = 4; path = Sample.hlsl; sourceTree = "<group>"; };
-		70CAA48929BAF707004B7E7B /* ShaderHLSL.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = ShaderHLSL.h; sourceTree = "<group>"; };
-		70CAA48A29BAF707004B7E7B /* ShaderMSL.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = ShaderMSL.h; sourceTree = "<group>"; };
 		70CAA48B29BBD985004B7E7B /* Compute.hlsl */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; path = Compute.hlsl; sourceTree = "<group>"; };
 		70CAA48C29BE6A62004B7E7B /* Compute.hlsl */ = {isa = PBXFileReference; lastKnownFileType = text; path = Compute.hlsl; sourceTree = "<group>"; };
 		70CAA48D29BE6A62004B7E7B /* Compute.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = Compute.metal; sourceTree = "<group>"; };
-		70CAA48E29BEB04C004B7E7B /* DepthTest.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = DepthTest.metal; sourceTree = "<group>"; };
 		70CAA48F29C63A46004B7E7B /* build.ninja */ = {isa = PBXFileReference; lastKnownFileType = text.script.sh; path = build.ninja; sourceTree = "<group>"; };
+		70CAA49029C8072C004B7E7B /* ShaderHLSL.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = ShaderHLSL.h; sourceTree = "<group>"; };
+		70CAA49129C8072C004B7E7B /* ShaderMSL.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = ShaderMSL.h; sourceTree = "<group>"; };
 /* End PBXFileReference section */
 
 /* Begin PBXGroup section */
@@ -47,9 +46,6 @@
 		707D37CA29B9797A00B08D22 /* outshaders */ = {
 			isa = PBXGroup;
 			children = (
-				70CAA48929BAF707004B7E7B /* ShaderHLSL.h */,
-				70CAA48A29BAF707004B7E7B /* ShaderMSL.h */,
-				70CAA48E29BEB04C004B7E7B /* DepthTest.metal */,
 				70CAA48C29BE6A62004B7E7B /* Compute.hlsl */,
 				70CAA48D29BE6A62004B7E7B /* Compute.metal */,
 				70CAA48129BA7D28004B7E7B /* Sample.hlsl */,
@@ -63,6 +59,8 @@
 		707D37D829B979EB00B08D22 /* shaders */ = {
 			isa = PBXGroup;
 			children = (
+				70CAA49029C8072C004B7E7B /* ShaderHLSL.h */,
+				70CAA49129C8072C004B7E7B /* ShaderMSL.h */,
 				70CAA48B29BBD985004B7E7B /* Compute.hlsl */,
 				70CAA47E29B9BB0E004B7E7B /* Sample.hlsl */,
 				707D37D729B979EB00B08D22 /* Skinning.hlsl */,

From d197808166653877ecc5bae97157605534abac94 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 19 Mar 2023 20:28:42 -0700
Subject: [PATCH 501/901] kram - hlslparser update

update ninja file too.  sourcetree had filter on list of files in File Status, so this one was missed.
---
 hlslparser/build.ninja | 59 +++++++++++++++++++++++-------------------
 1 file changed, 33 insertions(+), 26 deletions(-)

diff --git a/hlslparser/build.ninja b/hlslparser/build.ninja
index 17d6e513..d7bc035d 100755
--- a/hlslparser/build.ninja
+++ b/hlslparser/build.ninja
@@ -1,8 +1,12 @@
 
+# TODO: need absolute paths for Xcode for error/warning clickthrough
+# ugh that ninja doesn't have path or wildcard support.
+# but coding by hand for now is better than dealing with cmake ninja
+homeDir = /Users/Alec/devref/
 
-vulkanSDK=/Users/Alec/devref/vulkansdk/1.3.239.0/macOS/bin/
-projectDir=/Users/Alec/devref/kram/hlslparser/
-appBuildDir =./build/hlslparser/Build/Products/Release/
+vulkanSDK = ${homeDir}vulkansdk/1.3.239.0/macOS/bin/
+projectDir = ${homeDir}kram/hlslparser/
+appBuildDir = ${projectDir}/build/hlslparser/Build/Products/Release/
 
 hlslparser = ${appBuildDir}hlslparser
 
@@ -19,27 +23,27 @@ srcDir = ${projectDir}shaders/
 intDir = ${projectDir}outshaders/
 
 # compiled shader per platform
+# 2.4 is first version to support companion sym
 dstDir = ${projectDir}out/mac/
+flagsMSL = -std=macos-metal2.4 -Werror -I ${srcDir}
+
+dependsMSL = ${srcDir}/ShaderMSL.h
+
+# for iOS
+#dstDir = ${projectDir}out/ios/
+#flagsMSL = -std=ios-metal2.4 -Werror
 
 # turn on file/line directives to jump back to original sources
 # and turn on comments in generated sources
 flagsParser = -g -line
 
-# this makes metallib 3x bigger, but not optimized
+# turn on debug, makes metallib 3x bigger and not optimized
 #flagsMSLDebug = -g
 
-# this allows Xcode to load the sym file
+# this allows Xcode to load the sym file, don't use with 2.3, or the file is giant.
 flagsMSLDebug = -frecord-sources
+# flagsMSLDebug =
 
-# target MSL2.3 for now on macOS
-flagsMSL = -std=macos-metal2.3 -Werror
-
-# this seems to gen weird warnings
-#
-
-# for iOS
-# -std=ios-metal2.3
-    
 # hlslparser msl codegen
 rule genMSL
     command = $hlslparser $flagsParser -i $in -o $out
@@ -53,17 +57,18 @@ rule compileMSL
 rule linkMSL
     command = $metalLib -o $out $in
 
-# strip metallib and gen metlalibsym
+# strip metallib and gen metlalibsym (only for SM2.4+)
 rule stripMSL
     command = $metalLibStrip -flat -remove-source $in
 
-dependsMSL = ${intDir}/ShaderMSL.h
 
 # gen air
 build ${intDir}Skinning.metal: genMSL ${srcDir}Skinning.hlsl | ${dependsMSL}
 build ${intDir}Sample.metal: genMSL ${srcDir}Sample.hlsl | ${dependsMSL}
 build ${intDir}Compute.metal: genMSL ${srcDir}Compute.hlsl | ${dependsMSL}
 
+# this avoids recompilation when there are a lot of files
+# but it does mean more compile calls to gen the one metallib and sym
 build ${dstDir}Skinning.air: compileMSL ${intDir}Skinning.metal
 build ${dstDir}Sample.air: compileMSL ${intDir}Sample.metal
 build ${dstDir}Compute.air: compileMSL ${intDir}Compute.metal
@@ -78,6 +83,8 @@ build ${dstDir}GameShaders.metallib: linkMSL ${dstDir}Skinning.air ${dstDir}Samp
 # with no debug flags, 87K and sym is 80K w/o debug, prob should skip gen of sym file
 # if not in debug
 
+# sources are still embedded in the 87K version
+
 # gen metallibsym
 build ${dstDir}GameShaders.metallibsym: stripMSL ${dstDir}GameShaders.metallib
 
@@ -85,36 +92,36 @@ build ${dstDir}GameShaders.metallibsym: stripMSL ${dstDir}GameShaders.metallib
 
 dxc = ${vulkanSDK}dxc
 
+dependsHLSL = ${srcDir}/ShaderHLSL.h
+dstDirHLSL = ${projectDir}out/android/
+
+# turn on debug
 # flagsDXDebug = -Zi
 flagsDXDebug =
 
-flagsDXC = -nologo -Zpc -enable-16bit-types -HV 2021 -Werror -fspv-extension=SPV_KHR_shader_draw_parameters -spirv -fspv-target-env=vulkan1.2
+flagsDXC = -nologo -Zpc -enable-16bit-types -HV 2021 -Werror -fspv-extension=SPV_KHR_shader_draw_parameters -spirv -fspv-target-env=vulkan1.2 -I ${srcDir}
 
 flagsVS = -T vs_6_2
 flagsPS = -T ps_6_2
 flagsCS = -T cs_6_2
 
-dependsHLSL = ${intDir}/ShaderHLSL.h
-
-dstDirHLSL = ${projectDir}out/android/
-
 # hlslparser hlsl/msl codegen
 rule genHLSL
-  command = $hlslparser $flagsParser -i $in -o $out
+    command = $hlslparser $flagsParser -i $in -o $out
 
 # this will get shadow replaced
 entryPoint = Foo
 
 # compile to spirv
 rule compileVS
-  command = $dxc $flagsDXC $flagsVS -E ${entryPoint}VS -Fo $out $in
+    command = $dxc $flagsDXC $flagsVS -E ${entryPoint}VS -Fo $out $in
 rule compilePS
-  command = $dxc $flagsDXC $flagsPS -E ${entryPoint}PS -Fo $out $in
+    command = $dxc $flagsDXC $flagsPS -E ${entryPoint}PS -Fo $out $in
 rule compileCS
-  command = $dxc $flagsDXC $flagsCS -E ${entryPoint}CS -Fo $out $in
+    command = $dxc $flagsDXC $flagsCS -E ${entryPoint}CS -Fo $out $in
 
 rule archiveSpriv
-  command = zip -r $out $in
+    command = zip -r $out $in
 
 # gen hlsl
 build ${intDir}Skinning.hlsl: genHLSL ${srcDir}Skinning.hlsl | ${dependsHLSL}

From 6553ef5bd5af7674ce7d59a5fa88b7c598e5a1a5 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 19 Mar 2023 20:40:57 -0700
Subject: [PATCH 502/901] kram - hlslparser update

elimiante intermediate dir, this keeps all code and output in one directory, and can extract zip/metallib from there.  Keeps everything platform specific.
---
 hlslparser/build.ninja | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/hlslparser/build.ninja b/hlslparser/build.ninja
index d7bc035d..df580495 100755
--- a/hlslparser/build.ninja
+++ b/hlslparser/build.ninja
@@ -20,7 +20,7 @@ metalLibStrip = xcrun -sdk macosx metal-dsymutil
 srcDir = ${projectDir}shaders/
 
 # headers and parser gen shaders
-intDir = ${projectDir}outshaders/
+#intDir = ${projectDir}outshaders/
 
 # compiled shader per platform
 # 2.4 is first version to support companion sym
@@ -63,15 +63,15 @@ rule stripMSL
 
 
 # gen air
-build ${intDir}Skinning.metal: genMSL ${srcDir}Skinning.hlsl | ${dependsMSL}
-build ${intDir}Sample.metal: genMSL ${srcDir}Sample.hlsl | ${dependsMSL}
-build ${intDir}Compute.metal: genMSL ${srcDir}Compute.hlsl | ${dependsMSL}
+build ${dstDir}Skinning.metal: genMSL ${srcDir}Skinning.hlsl | ${dependsMSL}
+build ${dstDir}Sample.metal: genMSL ${srcDir}Sample.hlsl | ${dependsMSL}
+build ${dstDir}Compute.metal: genMSL ${srcDir}Compute.hlsl | ${dependsMSL}
 
 # this avoids recompilation when there are a lot of files
 # but it does mean more compile calls to gen the one metallib and sym
-build ${dstDir}Skinning.air: compileMSL ${intDir}Skinning.metal
-build ${dstDir}Sample.air: compileMSL ${intDir}Sample.metal
-build ${dstDir}Compute.air: compileMSL ${intDir}Compute.metal
+build ${dstDir}Skinning.air: compileMSL ${dstDir}Skinning.metal
+build ${dstDir}Sample.air: compileMSL ${dstDir}Sample.metal
+build ${dstDir}Compute.air: compileMSL ${dstDir}Compute.metal
 
 # gen metallib (87K)
 build ${dstDir}GameShaders.metallib: linkMSL ${dstDir}Skinning.air ${dstDir}Sample.air ${dstDir}Compute.air
@@ -124,24 +124,24 @@ rule archiveSpriv
     command = zip -r $out $in
 
 # gen hlsl
-build ${intDir}Skinning.hlsl: genHLSL ${srcDir}Skinning.hlsl | ${dependsHLSL}
-build ${intDir}Sample.hlsl: genHLSL ${srcDir}Sample.hlsl | ${dependsHLSL}
-build ${intDir}Compute.hlsl: genHLSL ${srcDir}Compute.hlsl | ${dependsHLSL}
+build ${dstDirHLSL}Skinning.hlsl: genHLSL ${srcDir}Skinning.hlsl | ${dependsHLSL}
+build ${dstDirHLSL}Sample.hlsl: genHLSL ${srcDir}Sample.hlsl | ${dependsHLSL}
+build ${dstDirHLSL}Compute.hlsl: genHLSL ${srcDir}Compute.hlsl | ${dependsHLSL}
 
 # gen spv
-build ${dstDirHLSL}Skinning.vert.spv: compileVS ${intDir}Skinning.hlsl
+build ${dstDirHLSL}Skinning.vert.spv: compileVS ${dstDirHLSL}Skinning.hlsl
     entryPoint = Skinning
     
-build ${dstDirHLSL}Skinning.frag.spv: compilePS ${intDir}Skinning.hlsl
+build ${dstDirHLSL}Skinning.frag.spv: compilePS ${dstDirHLSL}Skinning.hlsl
     entryPoint = Skinning
         
-build ${dstDirHLSL}Sample.vert.spv: compileVS ${intDir}Sample.hlsl
+build ${dstDirHLSL}Sample.vert.spv: compileVS ${dstDirHLSL}Sample.hlsl
     entryPoint = Sample
     
-build ${dstDirHLSL}Sample.frag.spv: compilePS ${intDir}Sample.hlsl
+build ${dstDirHLSL}Sample.frag.spv: compilePS ${dstDirHLSL}Sample.hlsl
     entryPoint = Sample
     
-build ${dstDirHLSL}Compute.comp.spv: compileCS ${intDir}Compute.hlsl
+build ${dstDirHLSL}Compute.comp.spv: compileCS ${dstDirHLSL}Compute.hlsl
     entryPoint = Compute
 
 # TODO: use strip command to gen pdb for each file, may only apply to DXIL

From 8eaa845fd77c69ae31d49a5329ceaf6c737a7958 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 26 Mar 2023 00:04:42 -0700
Subject: [PATCH 503/901] kram - hlslparser update

Fix so static const scalars are specified first, and only written out once.  Couldn't define a struct using one otherwise.  More utils.  Add similar halfAsFloat option to HLSLGenerator.
---
 hlslparser/README.md                          |  36 ++-
 hlslparser/build.ninja                        |  39 +++-
 hlslparser/shaders/Sample.hlsl                |   4 +-
 hlslparser/src/Engine.cpp                     |  38 +++-
 hlslparser/src/Engine.h                       |  13 +-
 hlslparser/src/HLSLGenerator.cpp              |  78 +++++--
 hlslparser/src/HLSLGenerator.h                |   9 +-
 hlslparser/src/HLSLParser.cpp                 |  43 +++-
 hlslparser/src/HLSLParser.h                   |  13 +-
 hlslparser/src/HLSLTokenizer.cpp              |  11 +-
 hlslparser/src/HLSLTree.cpp                   | 209 ++++++++++++------
 hlslparser/src/HLSLTree.h                     |  50 +++--
 hlslparser/src/MSLGenerator.cpp               |  63 +++---
 hlslparser/src/Main.cpp                       |   6 +-
 .../testshaders.xcodeproj/project.pbxproj     |  24 +-
 15 files changed, 433 insertions(+), 203 deletions(-)

diff --git a/hlslparser/README.md b/hlslparser/README.md
index eee660ed..f2d98236 100644
--- a/hlslparser/README.md
+++ b/hlslparser/README.md
@@ -40,6 +40,7 @@ DONE
 * double support - not in MSL, can't interpolate vs/ps must pack to uint
 * RWTexture (needs ops)
 * Vulkan push constants in HLSL
+* fix static constant handling
 
 TODO:
 * atomics
@@ -47,7 +48,7 @@ TODO:
 * passing variables only by value in HLSL vs. value/ref/ptr in MSL
 * argument buffers and descriptor sets (root tables for DX?)
 * halfio/2/3/4 type for Nvidia/Adreno, halfst2/3/4 for storage
-* specialization and push constants for variants (MSL/SPV only)
+* specialization/function constants for variants (MSL/SPV only)
 * numgroups designator for DX kernel
 * ray-tracing kernels
 * tile shader kernels - may be MSL and Android SPV specific
@@ -55,13 +56,15 @@ TODO:
 * generate reflection data from parse of HLSL
 * handle reflection (spirv-reflect?)
 * handle HLSL vulkan extension constructs, convert these to MSL kernels too
-* preprocessor for handling platform specifics, and variants
+* preprocessor for handling includes, platform specifics, and variants
 * fix shaders to not structify metal and mod the source names, turn on written, currently handling globals.  Could require passing elements from main shader.
 * poor syntax highlighting of output .metal file, does Xcode have to compile?
 * no syntax highlighting of .hlsl files in Xcode, but VSCode has HLSL but not MSL
-*
+
+Shader Editor
+* Xcode has the worst syntax highlighting of any IDE with an undocumented plugin api that is constantly broken by newer version of Xcode.  Hightlighting only works if files are compiled by IDE, but using CLI tool.
 * May want to switch to VSCode for shader development
-* Also Windows VS2022 has HLSL add-on from Tim Jones
+* Also Windows VS2022 has HLSL add-on from Tim Jones, but this doesn't work with VS on macOS
 * https://marketplace.visualstudio.com/items?itemName=doublebuffer.metal-shader&utm_source=VSCode.pro&utm_campaign=AhmadAwais
 
 ---------------------------------
@@ -91,7 +94,7 @@ Overview
 |hlslparser | convert dx9 style HLSL to DX10 HLSL and MSL |
 |DXC | Microsoft's open-source compiler, gens HLSL 6.0-6.6 DXIL, and spv1.0-1.2, clang-based optimizer, installed with Vulkan SDK |
 |glslc | Google's wrapper to glslang, preprocessor, reflection, see below |
-|glslang | GLSL and HLSL compiler, but doesn't compile valid HLSL half code |
+|glslang | GLSL and HLSL compiler, but doesn't compile valid HLSL half code, only SM 5.1 |
 |spirv-opt | spv optimizer |
 |spirv-cross | transpile spv to MSL, HLSL, and GLSL, but codegen has 100's of temp vars, no comments, can target specific MSL/HLSL models |
 |spirv-reflect | gens reflection data from spv file |
@@ -103,10 +106,10 @@ HLSL2021 6.2 includes full half and int support.   So that is the compilation ta
 | Platforms      | iOS/PowerVR, Mali, Adreno | Nvida, AMD, Intel |
 | Feature        | I | M | A | | N | A | I |
 |----------------|---|---|---|-|---|---|---|
-| Half Interp    | y | y | n | | n | y | ? |
-| Half UBO       | y | y | n | | n | y | ? | 
-| Half Push      | y | y | y | | y | n | ? |
-| Half ALU       | y | y | y | | y | y | ? |
+| Half Interp    | y | y | n | | n | y | y |
+| Half UBO       | y | y | n | | y | y | y | 
+| Half Push      | y | y | y | | y | n | y |
+| Half ALU       | y | y | y | | y | y | y |
 
 https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_16bit_storage.html
 
@@ -124,6 +127,21 @@ https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_16bit_s
 * Android missing cpu arm64+f16 support from Redmi Note 8 and other chips.
   vcvt_f32_f16 is still present without this.
   
+Dealing with Double
+---
+* HLSL double is a joke.  Nvidia hobble fp64 output to 1/16th or less of the fp32 performance on GeForce to sell Quadro for CAD.
+* HLSL only supports 3 ops - div, rcp, fma.
+* HLSL requires touint and todouble to pass between shader stages
+* Intel removed fp64 support in Gen11/12/13.  Removed form ARC.
+* MSL has no fp64 support
+
+Dealing with uchar4
+---
+* No vertex formats to srgb degamma uchar4 colors, only texture unit has this.
+* Compute can't do srgb gamma due to bypass of ROP units
+* HLSL lacks this a uchar type, and only has pack/unpack ops in 6.6
+* Hard to use with SSBO despite uint32 chunks.
+* D3DColorToUBYTE4 has annoying bgra swizzle, so don't use it.
 
 Terms
 ---
diff --git a/hlslparser/build.ninja b/hlslparser/build.ninja
index df580495..b919c978 100755
--- a/hlslparser/build.ninja
+++ b/hlslparser/build.ninja
@@ -1,7 +1,17 @@
 
-# TODO: need absolute paths for Xcode for error/warning clickthrough
+# Note: need absolute paths for Xcode for error/warning clickthrough
 # ugh that ninja doesn't have path or wildcard support.
 # but coding by hand for now is better than dealing with cmake ninja
+
+# some handy commands
+# ninja -t clean
+# ninja -t rules -d
+# ninja -t targets
+
+# ninja_required_version = 1.5
+# include rules.ninja
+
+
 homeDir = /Users/Alec/devref/
 
 vulkanSDK = ${homeDir}vulkansdk/1.3.239.0/macOS/bin/
@@ -47,20 +57,24 @@ flagsMSLDebug = -frecord-sources
 # hlslparser msl codegen
 rule genMSL
     command = $hlslparser $flagsParser -i $in -o $out
-
+    description = Gen MSL
+    
 # compile to .air
 rule compileMSL
     command = $metalCompile $flagsMSLDebug $flagsMSL -c $in -o $out
-
+    description = Gen Air
+        
 # https://developer.apple.com/documentation/metal/shader_libraries/compiling_shader_code_into_a_library_with_metal_s_command-line_tools?language=objc
 # linker to metallib
 rule linkMSL
     command = $metalLib -o $out $in
-
+    description = Link metallib
+    
 # strip metallib and gen metlalibsym (only for SM2.4+)
 rule stripMSL
     command = $metalLibStrip -flat -remove-source $in
-
+    description = Gen metallibsym
+    
 
 # gen air
 build ${dstDir}Skinning.metal: genMSL ${srcDir}Skinning.hlsl | ${dependsMSL}
@@ -108,21 +122,28 @@ flagsCS = -T cs_6_2
 # hlslparser hlsl/msl codegen
 rule genHLSL
     command = $hlslparser $flagsParser -i $in -o $out
-
+    description = Gen HLSL
+    
 # this will get shadow replaced
 entryPoint = Foo
 
 # compile to spirv
 rule compileVS
     command = $dxc $flagsDXC $flagsVS -E ${entryPoint}VS -Fo $out $in
+    description = Gen spirv VS
+    
 rule compilePS
     command = $dxc $flagsDXC $flagsPS -E ${entryPoint}PS -Fo $out $in
+    description = Gen spirv PS
+        
 rule compileCS
     command = $dxc $flagsDXC $flagsCS -E ${entryPoint}CS -Fo $out $in
-
+    description = Gen spirv CS
+   
 rule archiveSpriv
     command = zip -r $out $in
-
+    description = Gen Archive
+   
 # gen hlsl
 build ${dstDirHLSL}Skinning.hlsl: genHLSL ${srcDir}Skinning.hlsl | ${dependsHLSL}
 build ${dstDirHLSL}Sample.hlsl: genHLSL ${srcDir}Sample.hlsl | ${dependsHLSL}
@@ -131,13 +152,11 @@ build ${dstDirHLSL}Compute.hlsl: genHLSL ${srcDir}Compute.hlsl | ${dependsHLSL}
 # gen spv
 build ${dstDirHLSL}Skinning.vert.spv: compileVS ${dstDirHLSL}Skinning.hlsl
     entryPoint = Skinning
-    
 build ${dstDirHLSL}Skinning.frag.spv: compilePS ${dstDirHLSL}Skinning.hlsl
     entryPoint = Skinning
         
 build ${dstDirHLSL}Sample.vert.spv: compileVS ${dstDirHLSL}Sample.hlsl
     entryPoint = Sample
-    
 build ${dstDirHLSL}Sample.frag.spv: compilePS ${dstDirHLSL}Sample.hlsl
     entryPoint = Sample
     
diff --git a/hlslparser/shaders/Sample.hlsl b/hlslparser/shaders/Sample.hlsl
index 046091e6..bd73814a 100644
--- a/hlslparser/shaders/Sample.hlsl
+++ b/hlslparser/shaders/Sample.hlsl
@@ -20,7 +20,7 @@ SamplerComparisonState shadowMapSampler : register(s1);
 
 // #define didn't compile due to lack of preprocesor
 static const int NUM_LIGHTS = 3;
-static const float SHADOW_DEPTH_BIAS = 0.00005;
+//static const float SHADOW_DEPTH_BIAS = 0.00005;
 
 struct LightState
 {
@@ -39,8 +39,6 @@ struct SceneConstantBuffer
     bool sampleShadowMap;
     LightState lights[3];
 };
-// TODO: NUM_LIGHTS isn't unhidden when parsing structs, dupe what cbuffer fields do
-// LightState lights[NUM_LIGHTS];
 
 // SM 6.1
 ConstantBuffer<SceneConstantBuffer> scene : register(b0);
diff --git a/hlslparser/src/Engine.cpp b/hlslparser/src/Engine.cpp
index ee04ea9b..f16d5497 100755
--- a/hlslparser/src/Engine.cpp
+++ b/hlslparser/src/Engine.cpp
@@ -124,13 +124,17 @@ int String_Printf(char * buffer, int size, const char * format, ...) {
 }
 
 int String_FormatFloat(char * buffer, int size, float value) {
-    return String_Printf(buffer, size, "%f", value);
+    return String_Printf(buffer, size, "%.6f", value);
 }
 
 bool String_HasChar(const char* str, char c) {
     return strchr(str, c) != NULL;
 }
 
+bool String_HasString(const char* str, const char* search) {
+    return strstr(str, search) != NULL;
+}
+
 bool String_Equal(const char * a, const char * b) {
 	if (a == b) return true;
 	if (a == NULL || b == NULL) return false;
@@ -151,15 +155,39 @@ double String_ToDouble(const char * str, char ** endptr) {
 	return strtod(str, endptr);
 }
 
-int String_ToInteger(const char * str, char ** endptr) {
-	return (int)strtol(str, endptr, 10);
+float String_ToFloat(const char * str, char ** endptr) {
+    return strtof(str, endptr);
+}
+
+static const int kBase10 = 10;
+static const int kBase16 = 16;
+
+int32_t String_ToIntHex(const char * str, char ** endptr) {
+    return (int)strtol(str, endptr, kBase16);
+}
+
+int32_t String_ToInt(const char * str, char ** endptr) {
+	return (int)strtol(str, endptr, kBase10);
+}
+
+uint32_t String_ToUint(const char * str, char ** endptr) {
+    return (int)strtoul(str, endptr, kBase10);
+}
+
+uint64_t String_ToUlong(const char * str, char ** endptr) {
+    return (int)strtoull(str, endptr, kBase10);
 }
 
-int String_ToIntegerHex(const char * str, char ** endptr) {
-	return (int)strtol(str, endptr, 16);
+int64_t String_ToLong(const char * str, char ** endptr) {
+    return (int)strtoll(str, endptr, kBase10);
 }
 
 
+
+
+
+
+
 void String_StripTrailingFloatZeroes(char* buffer)
 {
     const char* dotPos = strrchr(buffer, '.');
diff --git a/hlslparser/src/Engine.h b/hlslparser/src/Engine.h
index 8d153d72..c575413b 100755
--- a/hlslparser/src/Engine.h
+++ b/hlslparser/src/Engine.h
@@ -64,9 +64,20 @@ class Allocator {
 int String_FormatFloat(char * buffer, int size, float value);
 bool String_Equal(const char * a, const char * b);
 bool String_EqualNoCase(const char * a, const char * b);
+
 double String_ToDouble(const char * str, char ** end);
-int String_ToInteger(const char * str, char ** end);
+float String_ToFloat(const char * str, char ** end);
+// no half
+
+int32_t String_ToIntHex(const char * str, char ** end);
+int32_t String_ToInt(const char * str, char ** end);
+uint32_t String_ToUint(const char * str, char ** end);
+
+uint64_t String_ToUlong(const char * str, char ** end);
+int64_t String_ToLong(const char * str, char ** end);
+
 bool String_HasChar(const char* str, char c);
+bool String_HasString(const char* str, const char* search);
 
 // just use these, it's way easier than using fixed buffers
 int String_PrintfArgList(std::string& buffer, const char * format, va_list args);
diff --git a/hlslparser/src/HLSLGenerator.cpp b/hlslparser/src/HLSLGenerator.cpp
index 065705e9..37dd74b0 100644
--- a/hlslparser/src/HLSLGenerator.cpp
+++ b/hlslparser/src/HLSLGenerator.cpp
@@ -18,12 +18,28 @@ namespace M4
 
 const char* HLSLGenerator::GetTypeName(const HLSLType& type)
 {
-    HLSLBaseType baseType = type.baseType;
+    bool promote = ((type.flags & HLSLTypeFlag_NoPromote) == 0);
 
+    // number
+    bool isHalfNumerics = promote && !m_options.treatHalfAsFloat;
+    HLSLBaseType baseType = type.baseType;
+    
+    // Note: these conversions should really be done during parsing
+    // so that casting gets applied.
+    if (!isHalfNumerics)
+        baseType = HalfToFloatBaseType(baseType);
+    
+    // MSL doesn't support double, and many HLSL cards don't either.
+    //if (IsDouble(baseType))
+    //    baseType = DoubleToFloatBaseType(baseType);
+    
+    HLSLType remappedType(type);
+    remappedType.baseType = baseType;
+    
     // DONE: these can all just use a table entry, have another slot for MSL
     // Functions can return void, especially with compute
     if (IsTextureType(baseType) || IsSamplerType(baseType) || IsNumericType(baseType) || baseType == HLSLBaseType_Void || baseType == HLSLBaseType_UserDefined)
-        return GetTypeNameHLSL(type);
+        return GetTypeNameHLSL(remappedType);
     
     Error("Unknown type");
     return NULL;
@@ -329,18 +345,27 @@ void HLSLGenerator::OutputExpression(HLSLExpression* expression)
     else if (expression->nodeType == HLSLNodeType_LiteralExpression)
     {
         HLSLLiteralExpression* literalExpression = static_cast<HLSLLiteralExpression*>(expression);
-        switch (literalExpression->type)
+        
+        HLSLBaseType type = literalExpression->type;
+        if (m_options.treatHalfAsFloat && IsHalf(type))
+            type = HLSLBaseType_Float;
+        
+        switch (type)
         {
         case HLSLBaseType_Half:
         case HLSLBaseType_Float:
+        case HLSLBaseType_Double:
             {
                 // Don't use printf directly so that we don't use the system locale.
                 char buffer[64];
                 String_FormatFloat(buffer, sizeof(buffer), literalExpression->fValue);
                 String_StripTrailingFloatZeroes(buffer);
-                m_writer.Write("%s%s", buffer, literalExpression->type == HLSLBaseType_Half ? "h" : "" );
+                m_writer.Write("%s%s", buffer, type == HLSLBaseType_Half ? "h" : "" );
             }
-            break;        
+            break;
+                
+        case HLSLBaseType_Short:
+        case HLSLBaseType_Ulong:
         case HLSLBaseType_Int:
             m_writer.Write("%d", literalExpression->iValue);
             break;
@@ -491,7 +516,7 @@ void HLSLGenerator::OutputArguments(HLSLArgument* argument)
         const char * semantic = argument->sv_semantic ? argument->sv_semantic : argument->semantic;
 
         // Have to inject vulkan
-        if (semantic)
+        if (semantic && m_options.writeVulkan)
         {
             if (String_Equal(semantic, "PSIZE"))
                 m_writer.Write("%s ", "[[vk::builtin(\"PointSize\")]]");
@@ -567,6 +592,31 @@ static const char* BufferTypeToName(HLSLBufferType bufferType)
     
     return name;
 }
+
+bool HLSLGenerator::CanSkipWrittenStatement(const HLSLStatement* statement) const
+{
+    if (!statement->written) return false;
+    
+    // only write these once for multi-entrypoint
+    if (statement->nodeType == HLSLNodeType_Comment ||
+         statement->nodeType == HLSLNodeType_Buffer ||
+         statement->nodeType == HLSLNodeType_Struct)
+        return true;
+    
+    // only write const scalars out once, so they don't conflict
+    if (statement->nodeType == HLSLNodeType_Declaration)
+    {
+        const HLSLDeclaration* decl = (const HLSLDeclaration*)statement;
+        if (IsScalarType(decl->type.baseType) && decl->type.flags & HLSLTypeFlag_Const)
+        {
+            return true;
+        }
+    }
+    
+    // TODO: need to skip helper functions, etc.
+        
+    return false;
+}
 void HLSLGenerator::OutputStatements(int indent, HLSLStatement* statement)
 {
     while (statement != NULL)
@@ -579,10 +629,7 @@ void HLSLGenerator::OutputStatements(int indent, HLSLStatement* statement)
         }
 
         // skip writing some types across multiple entry points
-        if (statement->written &&
-            (statement->nodeType == HLSLNodeType_Comment ||
-             statement->nodeType == HLSLNodeType_Buffer ||
-             statement->nodeType == HLSLNodeType_Struct))
+        if (CanSkipWrittenStatement(statement))
         {
             statement = statement->nextStatement;
             continue;
@@ -637,8 +684,9 @@ void HLSLGenerator::OutputStatements(int indent, HLSLStatement* statement)
                 // VK is limited to 1 buffer as a result.  Cannot contain half on AMD.
                 if (buffer->bufferType == HLSLBufferType_ConstantBuffer)
                 {
-                    if (strstr(buffer->name, "Push") != nullptr ||
-                        strstr(buffer->name, "push") != nullptr)
+                    if (m_options.writeVulkan &&
+                        (String_HasString(buffer->name, "Push") ||
+                         String_HasString(buffer->name, "push")))
                     {
                         m_writer.Write("[[vk::push_constant]] ");
                     }
@@ -906,7 +954,11 @@ void HLSLGenerator::OutputDeclaration(HLSLDeclaration* declaration)
             sscanf(declaration->registerName, "t%d", &reg);
         }
 
-        const char* formatTypeName = GetFormatName(declaration->type.baseType, declaration->type.formatType);
+        HLSLBaseType formatType = declaration->type.formatType;
+        if (m_options.treatHalfAsFloat && IsHalf(formatType))
+            formatType = HalfToFloatBaseType(formatType);
+            
+        const char* formatTypeName = GetFormatName(declaration->type.baseType, formatType);
        
         // texture carts the dimension and format
         const char* textureTypeName = GetTypeName(declaration->type);
diff --git a/hlslparser/src/HLSLGenerator.h b/hlslparser/src/HLSLGenerator.h
index c596e36c..a909d0ca 100644
--- a/hlslparser/src/HLSLGenerator.h
+++ b/hlslparser/src/HLSLGenerator.h
@@ -27,12 +27,12 @@ struct HLSLOptions
     
     bool writeFileLine = false;
     
+    bool treatHalfAsFloat = false;
     // TODO: hook this up
-    // bool treatHalfAsFloat = false;
-    // bool treadDoubleAsFloat = true;
+    // bool treatDoubleAsFloat = true;
     
     // add vk constructions to HLSL source to convert to Spriv
-    // bool writeVulkan = true;
+    bool writeVulkan = false;
 };
 
 /**
@@ -71,7 +71,8 @@ class HLSLGenerator
     void Error(const char* format, ...) M4_PRINTF_ATTR(2, 3);
     
     const char* GetFormatName(HLSLBaseType bufferOrTextureType, HLSLBaseType formatType);
-    
+    bool CanSkipWrittenStatement(const HLSLStatement* statement) const;
+
 private:
 
     CodeWriter      m_writer;
diff --git a/hlslparser/src/HLSLParser.cpp b/hlslparser/src/HLSLParser.cpp
index ed245d3f..694d5e14 100644
--- a/hlslparser/src/HLSLParser.cpp
+++ b/hlslparser/src/HLSLParser.cpp
@@ -174,6 +174,21 @@ bool IsDouble(HLSLBaseType type)
     return baseTypeDescriptions[type].numericType == NumericType_Double;
 }
 
+bool IsFloatingType(HLSLBaseType type)
+{
+    NumericType n = baseTypeDescriptions[type].numericType;
+    return n == NumericType_Half || n == NumericType_Float || n == NumericType_Double;
+}
+
+bool IsIntegerType(HLSLBaseType type)
+{
+    NumericType n = baseTypeDescriptions[type].numericType;
+    return n == NumericType_Int || n == NumericType_Uint ||
+           n == NumericType_Short || n == NumericType_Ushort ||
+           n == NumericType_Long || n == NumericType_Ulong;
+}
+
+
 bool IsInt(HLSLBaseType type)
 {
     return baseTypeDescriptions[type].numericType == NumericType_Int;
@@ -305,6 +320,14 @@ HLSLBaseType GetScalarType(HLSLBaseType type)
     return NumericToBaseType(baseTypeDescriptions[type].numericType);
 }
 
+int32_t GetVectorDimension(HLSLBaseType type)
+{
+    if (IsScalarType(type)) return 1;
+    if (!IsVectorType(type)) return 0;
+    
+    return baseTypeDescriptions[type].numComponents;
+}
+
 HLSLBaseType HalfToFloatBaseType(HLSLBaseType type)
 {
     if (IsHalf(type))
@@ -1543,8 +1566,6 @@ HLSLBaseType ArithmeticOpResultType(HLSLBinaryOp binaryOp, HLSLBaseType t1, HLSL
     {
         bool isSameDimensions = IsDimensionEqual(t1, t2);
         
-            
-        
         if (IsScalarType(t1) && IsScalarType(t2))
         {
             if (isSameDimensions) return t1;
@@ -1829,16 +1850,15 @@ static CompareFunctionsResult CompareFunctions(HLSLTree* tree, const HLSLFunctio
 static bool GetBinaryOpResultType(HLSLBinaryOp binaryOp, const HLSLType& type1, const HLSLType& type2, HLSLType& result)
 {
     // only allow numeric types for binary operators
-    if (type1.baseType < HLSLBaseType_FirstNumeric || type1.baseType > HLSLBaseType_LastNumeric || type1.array ||
-        type2.baseType < HLSLBaseType_FirstNumeric || type2.baseType > HLSLBaseType_LastNumeric || type2.array)
+    if (!IsNumericType(type1.baseType) || type1.array ||
+        !IsNumericType(type2.baseType) || type2.array)
     {
          return false;
     }
 
     if (IsBitOp(binaryOp))
     {
-        if (type1.baseType < HLSLBaseType_FirstInteger ||
-            type1.baseType > HLSLBaseType_LastInteger)
+        if (!IsIntegerType(type1.baseType))
         {
             return false;
         }
@@ -1959,9 +1979,9 @@ bool HLSLParser::AcceptFloat(float& value)
     return false;
 }
 
-bool HLSLParser::AcceptHalf( float& value )
+bool HLSLParser::AcceptHalf(float& value)
 {
-	if( m_tokenizer.GetToken() == HLSLToken_HalfLiteral )
+	if(m_tokenizer.GetToken() == HLSLToken_HalfLiteral)
 	{
 		value = m_tokenizer.GetFloat();
 		m_tokenizer.Next();
@@ -3046,8 +3066,7 @@ bool HLSLParser::ParseTerminalExpression(HLSLExpression*& expression, bool& need
         }
         if (unaryOp == HLSLUnaryOp_BitNot)
         {
-            if (unaryExpression->expression->expressionType.baseType < HLSLBaseType_FirstInteger || 
-                unaryExpression->expression->expressionType.baseType > HLSLBaseType_LastInteger)
+            if (!IsIntegerType(unaryExpression->expression->expressionType.baseType))
             {
                 const char * typeName = GetTypeNameHLSL(unaryExpression->expression->expressionType);
                 m_tokenizer.Error("unary '~' : no global operator found which takes type '%s' (or there is no acceptable conversion)", typeName);
@@ -3111,7 +3130,7 @@ bool HLSLParser::ParseTerminalExpression(HLSLExpression*& expression, bool& need
             expression = literalExpression;
             return true;
         }
-		if( AcceptHalf( fValue ) )
+		if(AcceptHalf(fValue))
 		{
 			HLSLLiteralExpression* literalExpression = m_tree->AddNode<HLSLLiteralExpression>( fileName, line );
 			literalExpression->type = HLSLBaseType_Half;
@@ -3131,7 +3150,7 @@ bool HLSLParser::ParseTerminalExpression(HLSLExpression*& expression, bool& need
             expression = literalExpression;
             return true;
         }
-        // TODO: need uint, u/short
+        // TODO: need uint, u/short, double
         
         // boolean
         if (Accept(HLSLToken_True))
diff --git a/hlslparser/src/HLSLParser.h b/hlslparser/src/HLSLParser.h
index 9f33e590..7b007a9d 100644
--- a/hlslparser/src/HLSLParser.h
+++ b/hlslparser/src/HLSLParser.h
@@ -186,15 +186,19 @@ bool IsDepthTextureType(HLSLBaseType baseType);
 bool IsBufferType(HLSLBaseType baseType);
 bool IsNumericType(HLSLBaseType baseType);
 
+bool IsFloatingType(HLSLBaseType type);
+bool IsIntegerType(HLSLBaseType type);
+
 bool IsCoreTypeEqual(HLSLBaseType lhsType, HLSLBaseType rhsType);
 bool IsNumericTypeEqual(HLSLBaseType lhsType, HLSLBaseType rhsType);
 bool IsDimensionEqual(HLSLBaseType lhsType, HLSLBaseType rhsType);
 bool IsCrossDimensionEqual(HLSLBaseType lhsType, HLSLBaseType rhsType);
 
-bool IsSamplerType(const HLSLType& type);
-bool IsMatrixType(const HLSLType& type);
-bool IsVectorType(const HLSLType& type);
 bool IsScalarType(const HLSLType& type);
+bool IsVectorType(const HLSLType& type);
+bool IsMatrixType(const HLSLType& type);
+
+bool IsSamplerType(const HLSLType& type);
 bool IsTextureType(const HLSLType& type);
 
 HLSLBaseType PromoteType(HLSLBaseType toType, HLSLBaseType type);
@@ -208,4 +212,7 @@ const char* GetTypeNameMetal(const HLSLType& type);
 
 HLSLBaseType GetScalarType(HLSLBaseType type);
 
+// returns 1 for scalar or 2/3/4 for vector types.
+int32_t GetVectorDimension(HLSLBaseType type);
+
 }
diff --git a/hlslparser/src/HLSLTokenizer.cpp b/hlslparser/src/HLSLTokenizer.cpp
index a332400f..eaf7e63a 100644
--- a/hlslparser/src/HLSLTokenizer.cpp
+++ b/hlslparser/src/HLSLTokenizer.cpp
@@ -441,11 +441,11 @@ bool HLSLTokenizer::ScanNumber()
     if (m_bufferEnd - m_buffer > 2 && m_buffer[0] == '0' && m_buffer[1] == 'x')
     {
         char*   hEnd = NULL;
-        int     iValue = (int)strtol(m_buffer+2, &hEnd, 16);
+        int     iValue = (int)String_ToIntHex(m_buffer+2, &hEnd);
         if (GetIsNumberSeparator(hEnd[0]))
         {
             m_buffer = hEnd;
-            m_token  = HLSLToken_IntLiteral;
+            m_token  = HLSLToken_IntLiteral; // TODO: handle uint, etc.
             m_iValue = iValue;
             return true;
         }
@@ -460,8 +460,11 @@ bool HLSLTokenizer::ScanNumber()
     }
 
     char*  iEnd = NULL;
-    int    iValue = String_ToInteger(m_buffer, &iEnd);
+    int    iValue = String_ToInt(m_buffer, &iEnd);
 
+    // TODO: handle lf, etc.  Double not really worth adding, since it's
+    // so hobbled.
+    
     // If the character after the number is an f then the f is treated as part
     // of the number (to handle 1.0f syntax).
     bool isHalf = false;
@@ -508,7 +511,7 @@ bool HLSLTokenizer::ScanLineDirective()
         }
 
         char* iEnd = NULL;
-        int lineNumber = String_ToInteger(m_buffer, &iEnd);
+        int lineNumber = String_ToInt(m_buffer, &iEnd);
 
         if (!isspace(*iEnd))
         {
diff --git a/hlslparser/src/HLSLTree.cpp b/hlslparser/src/HLSLTree.cpp
index c851a01c..8a242396 100644
--- a/hlslparser/src/HLSLTree.cpp
+++ b/hlslparser/src/HLSLTree.cpp
@@ -5,9 +5,17 @@
 namespace M4
 {
 
+// TODO: split helper calls out to new .h, so can include that
 // over to HLSLParser.cpp
 extern bool IsSamplerType(const HLSLType & type);
 
+extern bool IsScalarType(HLSLBaseType type);
+extern bool IsIntegerType(HLSLBaseType type);
+extern bool IsFloatingType(HLSLBaseType type);
+
+extern int32_t GetVectorDimension(HLSLBaseType type);
+
+
 
 HLSLTree::HLSLTree(Allocator* allocator) :
     m_allocator(allocator), m_stringPool(allocator)
@@ -285,8 +293,12 @@ bool HLSLTree::GetExpressionValue(HLSLExpression * expression, int & value)
         return false;
     }
 
-    // We are expecting an integer scalar. @@ Add support for type conversion from other scalar types.
-    if (expression->expressionType.baseType != HLSLBaseType_Int &&
+    // We are expecting an integer scalar.
+    // TODO: Add support for type conversion from uint scalar types.
+    if (expression->expressionType.baseType != HLSLBaseType_Long &&
+        expression->expressionType.baseType != HLSLBaseType_Short &&
+        expression->expressionType.baseType != HLSLBaseType_Int &&
+        
         expression->expressionType.baseType != HLSLBaseType_Bool)
     {
         return false;
@@ -415,9 +427,17 @@ bool HLSLTree::GetExpressionValue(HLSLExpression * expression, int & value)
     {
         HLSLLiteralExpression * literal = (HLSLLiteralExpression *)expression;
    
-        if (literal->expressionType.baseType == HLSLBaseType_Int) value = literal->iValue;
-        else if (literal->expressionType.baseType == HLSLBaseType_Bool) value = (int)literal->bValue;
-        else return false;
+        if (literal->expressionType.baseType == HLSLBaseType_Int)
+            value = literal->iValue;
+        else if (literal->expressionType.baseType == HLSLBaseType_Long)
+            value = literal->iValue; // precision loss to Int
+        else if (literal->expressionType.baseType == HLSLBaseType_Short)
+            value = literal->iValue;
+        
+        else if (literal->expressionType.baseType == HLSLBaseType_Bool)
+            value = (int)literal->bValue;
+        else
+            return false;
         
         return true;
     }
@@ -425,6 +445,7 @@ bool HLSLTree::GetExpressionValue(HLSLExpression * expression, int & value)
     return false;
 }
 
+// TODO: Nothing calling this?
 bool HLSLTree::NeedsFunction(const char* name)
 {
     // Early out
@@ -461,21 +482,6 @@ bool HLSLTree::NeedsFunction(const char* name)
     return visitor.result;
 }
 
-// TODO: the descriptionTable instead of hardcoding this again
-int GetVectorDimension(HLSLType & type)
-{
-    if (type.baseType >= HLSLBaseType_FirstNumeric &&
-        type.baseType <= HLSLBaseType_LastNumeric)
-    {
-        if (type.baseType == HLSLBaseType_Float || type.baseType == HLSLBaseType_Half) return 1;
-        if (type.baseType == HLSLBaseType_Float2 || type.baseType == HLSLBaseType_Half2) return 2;
-        if (type.baseType == HLSLBaseType_Float3 || type.baseType == HLSLBaseType_Half3) return 3;
-        if (type.baseType == HLSLBaseType_Float4 || type.baseType == HLSLBaseType_Half4) return 4;
-
-    }
-    return 0;
-}
-
 // Returns dimension, 0 if invalid.
 int HLSLTree::GetExpressionValue(HLSLExpression * expression, float values[4])
 {
@@ -487,27 +493,24 @@ int HLSLTree::GetExpressionValue(HLSLExpression * expression, float values[4])
         return 0;
     }
 
-    if (expression->expressionType.baseType == HLSLBaseType_Int ||
-        expression->expressionType.baseType == HLSLBaseType_Bool)
+    HLSLBaseType type = expression->expressionType.baseType;
+    
+    if (IsIntegerType(type))
     {
-        int int_value;
-        if (GetExpressionValue(expression, int_value)) {
-            for (int i = 0; i < 4; i++) values[i] = (float)int_value;   // @@ Warn if conversion is not exact.
-            return 1;
+        if (IsScalarType(type))
+        {
+            int intValue;
+            if (GetExpressionValue(expression, intValue)) {
+                for (int i = 0; i < 4; i++) values[i] = (float)intValue;   // @@ Warn if conversion is not exact.
+                return 1;
+            }
         }
-
-        return 0;
-    }
-    if (expression->expressionType.baseType >= HLSLBaseType_FirstInteger && expression->expressionType.baseType <= HLSLBaseType_LastInteger)
-    {
-        // TODO: Add support for uints?
-        // TODO: Add support for int vectors?
+        
         return 0;
     }
-    if (expression->expressionType.baseType > HLSLBaseType_LastNumeric)
-    {
+    // this skips other int types not handled above
+    if (!IsFloatingType(type))
         return 0;
-    }
 
     // @@ Not supported yet, but we may need it?
     if (expression->expressionType.array) 
@@ -515,11 +518,12 @@ int HLSLTree::GetExpressionValue(HLSLExpression * expression, float values[4])
         return false;
     }
 
+    int dim = GetVectorDimension(type);
+
     if (expression->nodeType == HLSLNodeType_BinaryExpression) 
     {
         HLSLBinaryExpression * binaryExpression = (HLSLBinaryExpression *)expression;
-        int dim = GetVectorDimension(binaryExpression->expressionType);
-
+        
         float values1[4], values2[4];
         int dim1 = GetExpressionValue(binaryExpression->expression1, values1);
         int dim2 = GetExpressionValue(binaryExpression->expression2, values2);
@@ -531,7 +535,7 @@ int HLSLTree::GetExpressionValue(HLSLExpression * expression, float values[4])
 
         if (dim1 != dim2)
         {
-            // Brodacast scalar to vector size.
+            // Broadcast scalar to vector size.
             if (dim1 == 1)
             {
                 for (int i = 1; i < dim2; i++) values1[i] = values1[0];
@@ -570,8 +574,7 @@ int HLSLTree::GetExpressionValue(HLSLExpression * expression, float values[4])
     else if (expression->nodeType == HLSLNodeType_UnaryExpression) 
     {
         HLSLUnaryExpression * unaryExpression = (HLSLUnaryExpression *)expression;
-        int dim = GetVectorDimension(unaryExpression->expressionType);
-
+       
         int dim1 = GetExpressionValue(unaryExpression->expression, values);
         if (dim1 == 0)
         {
@@ -595,8 +598,6 @@ int HLSLTree::GetExpressionValue(HLSLExpression * expression, float values[4])
     {
         HLSLConstructorExpression * constructor = (HLSLConstructorExpression *)expression;
 
-        int dim = GetVectorDimension(constructor->expressionType);
-
         int idx = 0;
         HLSLExpression * arg = constructor->argument;
         while (arg != NULL)
@@ -632,12 +633,26 @@ int HLSLTree::GetExpressionValue(HLSLExpression * expression, float values[4])
     {
         HLSLLiteralExpression * literal = (HLSLLiteralExpression *)expression;
 
-        if (literal->expressionType.baseType == HLSLBaseType_Float) values[0] = literal->fValue;
-        else if (literal->expressionType.baseType == HLSLBaseType_Half) values[0] = literal->fValue;
-        else if (literal->expressionType.baseType == HLSLBaseType_Bool) values[0] = literal->bValue;
-        else if (literal->expressionType.baseType == HLSLBaseType_Int) values[0] = (float)literal->iValue;  // @@ Warn if conversion is not exact.
-        else return 0;
-        // TODO: add short/ushore/uint
+        if (literal->expressionType.baseType == HLSLBaseType_Float)
+            values[0] = literal->fValue;
+        else if (literal->expressionType.baseType == HLSLBaseType_Half)
+            values[0] = literal->fValue;
+        else if (literal->expressionType.baseType == HLSLBaseType_Double)
+            values[0] = literal->fValue; // TODO: need more precision
+        
+        else if (literal->expressionType.baseType == HLSLBaseType_Bool)
+            values[0] = literal->bValue;
+        
+        // TODO: add uint types, fix precision of short/long/double/half
+        // signed ints
+        else if (literal->expressionType.baseType == HLSLBaseType_Int)
+            values[0] = (float)literal->iValue;  // @@ Warn if conversion is not exact.
+        else if (literal->expressionType.baseType == HLSLBaseType_Short)
+            values[0] = (float)literal->iValue;
+        else if (literal->expressionType.baseType == HLSLBaseType_Long)
+            values[0] = (float)literal->iValue;
+        else
+            return 0;
         
         return 1;
     }
@@ -764,6 +779,12 @@ void HLSLTreeVisitor::VisitStruct(HLSLStruct * node)
 
 void HLSLTreeVisitor::VisitStructField(HLSLStructField * node)
 {
+    // This can use a constant in an array field that must be resolved
+    if (node->type.array)
+    {
+        VisitExpression(node->type.arraySize);
+    }
+    
     VisitType(node->type);
 }
 
@@ -1037,7 +1058,7 @@ class ResetHiddenFlagVisitor : public HLSLTreeVisitor
     // Hide buffer fields.
     virtual void VisitDeclaration(HLSLDeclaration * node) override
     {
-        node->hidden = true;
+       // node->hidden = true;
     }
 
     virtual void VisitComment(HLSLComment * node) override
@@ -1092,6 +1113,7 @@ class MarkVisibleStatementsVisitor : public HLSLTreeVisitor
         {
             HLSLBuffer* buffer = NULL;
             HLSLDeclaration * declaration = tree->FindGlobalDeclaration(node->name, &buffer);
+
             if (declaration != NULL && declaration->hidden)
             {
                 declaration->hidden = false;
@@ -1106,6 +1128,17 @@ class MarkVisibleStatementsVisitor : public HLSLTreeVisitor
         
     virtual void VisitType(HLSLType & type) override
     {
+//        if (type.array)
+//        {
+//            //  Alec added this to try to handle structs with array constants, but
+//            // it causes other issues.  VisitStructField calls VisitType.
+//
+//            // handle sized or unsized array, since sized may use constant
+//            // VisitExpression(type.arraySize);
+//            int bp = 0;
+//            bp = bp;
+//        }
+//        else
         if (type.baseType == HLSLBaseType_UserDefined)
         {
             HLSLStruct * globalStruct = tree->FindGlobalStruct(type.typeName);
@@ -1199,64 +1232,96 @@ void PruneTree(HLSLTree* tree, const char* entryName0, const char* entryName1/*=
 void SortTree(HLSLTree * tree)
 {
     // Stable sort so that statements are in this order:
-    // structs, declarations, functions, techniques.
+    // const scalars for arrays, structs, declarations, functions, techniques.
 	// but their relative order is preserved.
 
     HLSLRoot* root = tree->GetRoot();
 
+    HLSLStatement* constScalarDeclarations = NULL;
+    HLSLStatement* lastConstScalarDeclaration = NULL;
+    
     HLSLStatement* structs = NULL;
     HLSLStatement* lastStruct = NULL;
+    
     HLSLStatement* constDeclarations = NULL;
     HLSLStatement* lastConstDeclaration = NULL;
+    
     HLSLStatement* declarations = NULL;
     HLSLStatement* lastDeclaration = NULL;
+    
     HLSLStatement* functions = NULL;
     HLSLStatement* lastFunction = NULL;
+    
     HLSLStatement* other = NULL;
     HLSLStatement* lastOther = NULL;
 
+    
+#define AppendToList(statement, list, listLast) \
+    if (list == NULL) list = statement; \
+    if (listLast != NULL) listLast->nextStatement = statement; \
+    listLast = statement;
+    
     HLSLStatement* statement = root->statement;
     while (statement != NULL) {
         HLSLStatement* nextStatement = statement->nextStatement;
         statement->nextStatement = NULL;
 
         if (statement->nodeType == HLSLNodeType_Struct) {
-            if (structs == NULL) structs = statement;
-            if (lastStruct != NULL) lastStruct->nextStatement = statement;
-            lastStruct = statement;
+            AppendToList(statement, structs, lastStruct);
         }
         else if (statement->nodeType == HLSLNodeType_Declaration ||
                  statement->nodeType == HLSLNodeType_Buffer)
         {
-            if (statement->nodeType == HLSLNodeType_Declaration && (((HLSLDeclaration *)statement)->type.flags & HLSLTypeFlag_Const)) {
-                if (constDeclarations == NULL) constDeclarations = statement;
-                if (lastConstDeclaration != NULL) lastConstDeclaration->nextStatement = statement;
-                lastConstDeclaration = statement;
+            // There are cases where a struct uses a const array size,
+            // so those need to be ordered prior to the struct.
+            if (statement->nodeType == HLSLNodeType_Declaration)
+            {
+                HLSLDeclaration* decl = (HLSLDeclaration *)statement;
+                
+                if (decl->type.flags & HLSLTypeFlag_Const)
+                {
+                    // this is a global scalar, so best to order first
+                    if (IsScalarType(decl->type.baseType))
+                    {
+                        AppendToList(statement, constScalarDeclarations, lastConstScalarDeclaration);
+                    }
+                    else
+                    {
+                        AppendToList(statement, constDeclarations, lastConstDeclaration);
+                    }
+                }
+                else
+                {
+                    AppendToList(statement, declarations, lastDeclaration);
+                }
             }
-            else {
-                if (declarations == NULL) declarations = statement;
-                if (lastDeclaration != NULL) lastDeclaration->nextStatement = statement;
-                lastDeclaration = statement;
+            else if (statement->nodeType == HLSLNodeType_Buffer)
+            {
+                AppendToList(statement, declarations, lastDeclaration);
             }
         }
-        else if (statement->nodeType == HLSLNodeType_Function) {
-            if (functions == NULL) functions = statement;
-            if (lastFunction != NULL) lastFunction->nextStatement = statement;
-            lastFunction = statement;
+        else if (statement->nodeType == HLSLNodeType_Function)
+        {
+            AppendToList(statement, functions, lastFunction);
         }
-        else {
-            if (other == NULL) other = statement;
-            if (lastOther != NULL) lastOther->nextStatement = statement;
-            lastOther = statement;
+        else
+        {
+            AppendToList(statement, other, lastOther);
         }
 
         statement = nextStatement;
     }
 
     // Chain all the statements in the order that we want.
-    HLSLStatement * firstStatement = structs;
-    HLSLStatement * lastStatement = lastStruct;
+    HLSLStatement * firstStatement = constScalarDeclarations;
+    HLSLStatement * lastStatement = lastConstScalarDeclaration;
 
+    if (structs != NULL) {
+        if (firstStatement == NULL) firstStatement = structs;
+        else lastStatement->nextStatement = structs;
+        lastStatement = lastStruct;
+    }
+    
     if (constDeclarations != NULL) {
         if (firstStatement == NULL) firstStatement = constDeclarations;
         else lastStatement->nextStatement = constDeclarations;
diff --git a/hlslparser/src/HLSLTree.h b/hlslparser/src/HLSLTree.h
index 24265d26..ef5c44fb 100644
--- a/hlslparser/src/HLSLTree.h
+++ b/hlslparser/src/HLSLTree.h
@@ -26,13 +26,16 @@ enum HLSLTarget
 enum HLSLNodeType
 {
     HLSLNodeType_Root,
+    
     HLSLNodeType_Declaration,
     HLSLNodeType_Struct,
     HLSLNodeType_StructField,
     HLSLNodeType_Buffer,
     HLSLNodeType_BufferField, // TODO: or just ref structField
+    
     HLSLNodeType_Function,
     HLSLNodeType_Argument,
+    
     HLSLNodeType_ExpressionStatement,
     HLSLNodeType_Expression,
     HLSLNodeType_ReturnStatement,
@@ -52,13 +55,17 @@ enum HLSLNodeType
     HLSLNodeType_MemberAccess,
     HLSLNodeType_ArrayAccess,
     HLSLNodeType_FunctionCall,
+    
+    /* FX file stuff
     HLSLNodeType_StateAssignment,
     HLSLNodeType_SamplerState,
     HLSLNodeType_Pass,
     HLSLNodeType_Technique,
-    HLSLNodeType_Attribute,
     HLSLNodeType_Pipeline,
     HLSLNodeType_Stage,
+    */
+    
+    HLSLNodeType_Attribute,
     HLSLNodeType_Comment
 };
 
@@ -145,28 +152,28 @@ enum HLSLBaseType
     
     HLSLBaseType_RWTexture2D,
     
-    // Only 2 sampler types.
+    // Only 2 sampler types. - type is for defining state inside them
     HLSLBaseType_SamplerState,
     HLSLBaseType_SamplerComparisonState,
     
     HLSLBaseType_UserDefined,       // struct
     HLSLBaseType_Expression,        // type argument for defined() sizeof() and typeof().
-    //HLSLBaseType_Auto,
+    //HLSLBaseType_Auto,            // this wasn't hooked up
     HLSLBaseType_Comment,           // single line comments optionally transferred to output
     
-    // There are subtypes below
+    // Buffer subtypes below
     HLSLBaseType_Buffer,
     
     HLSLBaseType_Count,
     
     // counts
-    HLSLBaseType_FirstNumeric = HLSLBaseType_Float,
-    HLSLBaseType_LastNumeric = HLSLBaseType_Ulong4,
+    //HLSLBaseType_FirstNumeric = HLSLBaseType_Float,
+    //HLSLBaseType_LastNumeric = HLSLBaseType_Ulong4,
     
-    HLSLBaseType_FirstInteger = HLSLBaseType_Bool,
-    HLSLBaseType_LastInteger = HLSLBaseType_LastNumeric,
+    //HLSLBaseType_FirstInteger = HLSLBaseType_Bool,
+    //HLSLBaseType_LastInteger = HLSLBaseType_LastNumeric,
    
-    HLSLBaseType_NumericCount = HLSLBaseType_LastNumeric - HLSLBaseType_FirstNumeric + 1
+    HLSLBaseType_NumericCount = HLSLBaseType_Ulong4 - HLSLBaseType_Float + 1
 };
   
 // This a subtype to HLSLBaseType_Buffer
@@ -307,10 +314,14 @@ enum HLSLTypeFlags
 enum HLSLAttributeType
 {
     HLSLAttributeType_Unknown,
+    
+    // TODO: a lot more attributes, these are loop attributes
+    // f.e. specialization constant and numthreads for HLSL
     HLSLAttributeType_Unroll,
     HLSLAttributeType_Branch,
     HLSLAttributeType_Flatten,
     HLSLAttributeType_NoFastMath,
+    
 };
 
 enum HLSLAddressSpace
@@ -322,6 +333,8 @@ enum HLSLAddressSpace
     HLSLAddressSpace_Device,
     HLSLAddressSpace_Thread,
     HLSLAddressSpace_Shared,
+    // TODO: Threadgroup,
+    // TODO: ThreadgroupImageblock
 };
 
 
@@ -352,16 +365,16 @@ struct HLSLType
         baseType    = _baseType;
     }
     HLSLBaseType        baseType = HLSLBaseType_Unknown;
-    HLSLBaseType        formatType = HLSLBaseType_Float;    // Half or Float (rename to formatType - applies to templated params like buffer/texture)
+    HLSLBaseType        formatType = HLSLBaseType_Float;    // Half or Float (only applies to templated params like buffer/texture)
     const char*         typeName = NULL;       // For user defined types.
     bool                array = false;
-    HLSLExpression*     arraySize = NULL;
+    HLSLExpression*     arraySize = NULL; // can ref constant like NUM_LIGHTS
     int                 flags = 0;
-    HLSLAddressSpace    addressSpace = HLSLAddressSpace_Undefined;
+    HLSLAddressSpace    addressSpace = HLSLAddressSpace_Undefined; // MSL mostly
 };
 
 // Only Statment, Argument, StructField can be marked hidden.
-// But many elements like Buffer derived from Statement.
+// But many elements like Buffer derive from Statement.
 
 /// Base class for all nodes in the HLSL AST
 struct HLSLNode
@@ -390,6 +403,7 @@ struct HLSLStatement : public HLSLNode
     mutable bool        written = false;
 };
 
+// [unroll]
 struct HLSLAttribute : public HLSLNode
 {
     static const HLSLNodeType s_type = HLSLNodeType_Attribute;
@@ -553,6 +567,7 @@ struct HLSLExpression : public HLSLNode
     HLSLExpression*     nextExpression = NULL; // Used when the expression is part of a list, like in a function call.
 };
 
+// -a
 struct HLSLUnaryExpression : public HLSLExpression
 {
     static const HLSLNodeType s_type = HLSLNodeType_UnaryExpression;
@@ -560,6 +575,7 @@ struct HLSLUnaryExpression : public HLSLExpression
     HLSLExpression*     expression = NULL;
 };
 
+/// a + b
 struct HLSLBinaryExpression : public HLSLExpression
 {
     static const HLSLNodeType s_type = HLSLNodeType_BinaryExpression;
@@ -577,6 +593,7 @@ struct HLSLConditionalExpression : public HLSLExpression
     HLSLExpression*     falseExpression = NULL;
 };
 
+/// v = (half4)v2
 struct HLSLCastingExpression : public HLSLExpression
 {
     static const HLSLNodeType s_type = HLSLNodeType_CastingExpression;
@@ -593,7 +610,7 @@ struct HLSLLiteralExpression : public HLSLExpression
     {
         bool            bValue;
         float           fValue;
-        int             iValue;
+        int32_t         iValue;
     };
 };
 
@@ -630,7 +647,7 @@ struct HLSLArrayAccess : public HLSLExpression
     HLSLExpression*     index = NULL;
 };
 
-/// c-style foo(arg1, arg2)
+/// c-style foo(arg1, arg2) - args can have defaults that are parsed
 struct HLSLFunctionCall : public HLSLExpression
 {
     static const HLSLNodeType s_type = HLSLNodeType_FunctionCall;
@@ -639,6 +656,7 @@ struct HLSLFunctionCall : public HLSLExpression
     int                 numArguments = 0;
 };
 
+// TODO: finish adding this for texture and buffer ops
 /// c++ style member.foo(arg1, arg2)
 struct HLSLMemberFunctionCall : public HLSLExpression
 {
@@ -666,7 +684,7 @@ struct HLSLStateAssignment : public HLSLNode
     const char*             stateName = NULL;
     int                     d3dRenderState = 0;
     union {
-        int                 iValue;
+        int32_t             iValue;
         float               fValue;
         const char *        sValue;
     };
diff --git a/hlslparser/src/MSLGenerator.cpp b/hlslparser/src/MSLGenerator.cpp
index 386155d8..f29b257f 100644
--- a/hlslparser/src/MSLGenerator.cpp
+++ b/hlslparser/src/MSLGenerator.cpp
@@ -366,8 +366,6 @@ namespace M4
         }
         if (IsTextureType(baseType))
         {
-            //if (IsDepthTextureType(baseType))
-            //    return "device";
             return "thread";
         }
 
@@ -718,20 +716,31 @@ namespace M4
 
                 if ((type.flags & HLSLTypeFlag_Const) && (type.flags & HLSLTypeFlag_Static))
                 {
-                    m_writer.BeginLine(indent, declaration->fileName, declaration->line);
-                    OutputDeclaration(declaration);
-                    m_writer.EndLine(";");
-
+                    if (!declaration->written)
+                    {
+                        m_writer.BeginLine(indent, declaration->fileName, declaration->line);
+                        OutputDeclaration(declaration);
+                        m_writer.EndLine(";");
+                    }
+                    
                     // hide declaration from subsequent passes
                     declaration->hidden = true;
+                    
+                    // skipped for multi-entrypoint
+                    declaration->written = true;
                 }
             }
             else if (statement->nodeType == HLSLNodeType_Function)
             {
                 HLSLFunction* function = static_cast<HLSLFunction*>(statement);
-
+                
                 if (!function->forward)
+                {
                     OutputStaticDeclarations(indent, function->statement);
+                    
+                    // skipped for multi-entrypoint
+                    //function->written = true;
+                }
             }
 
             statement = statement->nextStatement;
@@ -1317,37 +1326,37 @@ namespace M4
         else if (expression->nodeType == HLSLNodeType_LiteralExpression)
         {
             HLSLLiteralExpression* literalExpression = static_cast<HLSLLiteralExpression*>(expression);
+        
+            HLSLBaseType type = literalExpression->type;
+            if (m_options.treatHalfAsFloat && IsHalf(type))
+                type = HLSLBaseType_Float;
             
-            char floatBuffer[32];
-            
-            switch (literalExpression->type)
+            switch (type)
             {
+                    
             case HLSLBaseType_Half:
-                if (m_options.treatHalfAsFloat) {
-                    snprintf(floatBuffer, sizeof(floatBuffer), "%f", literalExpression->fValue);
-                    String_StripTrailingFloatZeroes(floatBuffer);
-                    m_writer.Write("%s", floatBuffer);
-                }
-                else {
-                    // TODO: reduce digits since fp16 has much less precision
-                    snprintf(floatBuffer, sizeof(floatBuffer), "%f", literalExpression->fValue);
-                    String_StripTrailingFloatZeroes(floatBuffer);
-                    m_writer.Write("%sh", floatBuffer);
-                }
-                break;
+            case HLSLBaseType_Double:
             case HLSLBaseType_Float:
-                snprintf(floatBuffer, sizeof(floatBuffer), "%f", literalExpression->fValue);
+            {
+                char floatBuffer[64];
+                
+                String_FormatFloat(floatBuffer, sizeof(floatBuffer), literalExpression->fValue);
                 String_StripTrailingFloatZeroes(floatBuffer);
-                m_writer.Write("%s", floatBuffer);
+                m_writer.Write("%s%s", floatBuffer, type == HLSLBaseType_Half ? "h" : "");
                 break;
+            }
+            // TODO: missing uint types (trailing character u, ul, ..)
+                    
+            case HLSLBaseType_Short:
+            case HLSLBaseType_Long:
             case HLSLBaseType_Int:
                 m_writer.Write("%d", literalExpression->iValue);
                 break;
+                    
             case HLSLBaseType_Bool:
                 m_writer.Write("%s", literalExpression->bValue ? "true" : "false");
                 break;
-            // TODO: missing uint, u/short, double
-            default:
+           default:
                 Error("Unhandled literal");
                 //ASSERT(0);
             }
@@ -2135,7 +2144,7 @@ namespace M4
 
         // number
         bool isHalfNumerics = promote && !m_options.treatHalfAsFloat;
-        auto baseType = type.baseType;
+        HLSLBaseType baseType = type.baseType;
         
         // Note: these conversions should really be done during parsing
         // so that casting gets applied.
diff --git a/hlslparser/src/Main.cpp b/hlslparser/src/Main.cpp
index 53872ef2..0f3e5b49 100644
--- a/hlslparser/src/Main.cpp
+++ b/hlslparser/src/Main.cpp
@@ -45,8 +45,8 @@ void PrintUsage()
 		 "optional arguments:\n"
          " -g          debug mode, preserve comments\n"
          " -h, --help  show this help message and exit\n"
-         " -line   write #file/line directive\n"
-         " -nohalf     turn half into float (MSL only)"
+         " -line       write #file/line directive\n"
+         " -nohalf     turn half into float"
 		);
 }
 
@@ -313,6 +313,8 @@ int main( int argc, char* argv[] )
         {
             HLSLOptions options;
             options.writeFileLine = isWriteFileLine;
+            options.treatHalfAsFloat = isTreatHalfAsFloat;
+            options.writeVulkan = true; // TODO: tie to CLI
             
             HLSLGenerator generator;
             if (generator.Generate( &tree, target, entryName, options))
diff --git a/hlslparser/testshaders.xcodeproj/project.pbxproj b/hlslparser/testshaders.xcodeproj/project.pbxproj
index c298dc0b..0062ffeb 100644
--- a/hlslparser/testshaders.xcodeproj/project.pbxproj
+++ b/hlslparser/testshaders.xcodeproj/project.pbxproj
@@ -7,17 +7,11 @@
 	objects = {
 
 /* Begin PBXFileReference section */
-		707D37CC29B9797A00B08D22 /* Skinning.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = Skinning.metal; sourceTree = "<group>"; };
-		707D37CD29B9797A00B08D22 /* Skinning.hlsl */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; fileEncoding = 4; path = Skinning.hlsl; sourceTree = "<group>"; };
 		707D37D129B9798600B08D22 /* out */ = {isa = PBXFileReference; lastKnownFileType = folder; path = out; sourceTree = "<group>"; };
 		707D37D729B979EB00B08D22 /* Skinning.hlsl */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; path = Skinning.hlsl; sourceTree = "<group>"; };
 		707D37DA29B97A0900B08D22 /* buildShaders.sh */ = {isa = PBXFileReference; lastKnownFileType = text.script.sh; path = buildShaders.sh; sourceTree = "<group>"; };
 		70CAA47E29B9BB0E004B7E7B /* Sample.hlsl */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; fileEncoding = 4; path = Sample.hlsl; sourceTree = "<group>"; };
-		70CAA48029BA7D28004B7E7B /* Sample.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = Sample.metal; sourceTree = "<group>"; };
-		70CAA48129BA7D28004B7E7B /* Sample.hlsl */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; fileEncoding = 4; path = Sample.hlsl; sourceTree = "<group>"; };
 		70CAA48B29BBD985004B7E7B /* Compute.hlsl */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; path = Compute.hlsl; sourceTree = "<group>"; };
-		70CAA48C29BE6A62004B7E7B /* Compute.hlsl */ = {isa = PBXFileReference; lastKnownFileType = text; path = Compute.hlsl; sourceTree = "<group>"; };
-		70CAA48D29BE6A62004B7E7B /* Compute.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = Compute.metal; sourceTree = "<group>"; };
 		70CAA48F29C63A46004B7E7B /* build.ninja */ = {isa = PBXFileReference; lastKnownFileType = text.script.sh; path = build.ninja; sourceTree = "<group>"; };
 		70CAA49029C8072C004B7E7B /* ShaderHLSL.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = ShaderHLSL.h; sourceTree = "<group>"; };
 		70CAA49129C8072C004B7E7B /* ShaderMSL.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = ShaderMSL.h; sourceTree = "<group>"; };
@@ -30,7 +24,6 @@
 				70CAA48F29C63A46004B7E7B /* build.ninja */,
 				707D37DA29B97A0900B08D22 /* buildShaders.sh */,
 				707D37D829B979EB00B08D22 /* shaders */,
-				707D37CA29B9797A00B08D22 /* outshaders */,
 				707D37D129B9798600B08D22 /* out */,
 				707D37AF29B9787400B08D22 /* Products */,
 			);
@@ -43,19 +36,6 @@
 			name = Products;
 			sourceTree = "<group>";
 		};
-		707D37CA29B9797A00B08D22 /* outshaders */ = {
-			isa = PBXGroup;
-			children = (
-				70CAA48C29BE6A62004B7E7B /* Compute.hlsl */,
-				70CAA48D29BE6A62004B7E7B /* Compute.metal */,
-				70CAA48129BA7D28004B7E7B /* Sample.hlsl */,
-				70CAA48029BA7D28004B7E7B /* Sample.metal */,
-				707D37CD29B9797A00B08D22 /* Skinning.hlsl */,
-				707D37CC29B9797A00B08D22 /* Skinning.metal */,
-			);
-			path = outshaders;
-			sourceTree = "<group>";
-		};
 		707D37D829B979EB00B08D22 /* shaders */ = {
 			isa = PBXGroup;
 			children = (
@@ -168,7 +148,7 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
-				MACOSX_DEPLOYMENT_TARGET = 11.0;
+				MACOSX_DEPLOYMENT_TARGET = 12.0;
 				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
 				MTL_FAST_MATH = YES;
 				ONLY_ACTIVE_ARCH = YES;
@@ -220,7 +200,7 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
-				MACOSX_DEPLOYMENT_TARGET = 11.0;
+				MACOSX_DEPLOYMENT_TARGET = 12.0;
 				MTL_ENABLE_DEBUG_INFO = NO;
 				MTL_FAST_MATH = YES;
 				SDKROOT = macosx;

From e3eba6ea0bbbeb55807eae81b419e40df7d87a48 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 26 Mar 2023 01:08:09 -0700
Subject: [PATCH 504/901] kram - hlslparser update

Needed Xcode deploy set to 13.0 or metallib link failed.  Is there a way to specify a lower macOS target?
---
 hlslparser/README.md                          |  22 +--
 hlslparser/buildShaders.sh                    | 146 +++++++-----------
 hlslparser/shaders/Sample.hlsl                |   2 +-
 hlslparser/src/HLSLParser.cpp                 |   2 +-
 hlslparser/src/HLSLTree.h                     |   2 +
 hlslparser/src/MSLGenerator.cpp               |  32 ++--
 .../testshaders.xcodeproj/project.pbxproj     |   4 +-
 7 files changed, 91 insertions(+), 119 deletions(-)

diff --git a/hlslparser/README.md b/hlslparser/README.md
index f2d98236..9f487eab 100644
--- a/hlslparser/README.md
+++ b/hlslparser/README.md
@@ -43,6 +43,7 @@ DONE
 * fix static constant handling
 
 TODO:
+* include handling
 * atomics
 * more than half/float/int literals (f.e. u/int, u/long), requires trailing U, L
 * passing variables only by value in HLSL vs. value/ref/ptr in MSL
@@ -129,23 +130,22 @@ https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_16bit_s
   
 Dealing with Double
 ---
-* HLSL double is a joke.  Nvidia hobble fp64 output to 1/16th or less of the fp32 performance on GeForce to sell Quadro for CAD.
-* HLSL only supports 3 ops - div, rcp, fma.
+* HLSL double suport is a joke.  Nvidia hobble fp64 output to 1/16th or less of the fp32 performance on GeForce to sell Quadro for CAD.  AMD is similar.
+* Intel removed fp64 support in Gen11/12/13 and from ARC.
+* HLSL only supports 3 ops in DX11.1 - div, rcp, fma.
 * HLSL requires touint and todouble to pass between shader stages
-* Intel removed fp64 support in Gen11/12/13.  Removed form ARC.
 * MSL has no fp64 support
 
 Dealing with uchar4
 ---
 * No vertex formats to srgb degamma uchar4 colors, only texture unit has this.
 * Compute can't do srgb gamma due to bypass of ROP units
-* HLSL lacks this a uchar type, and only has pack/unpack ops in 6.6
+* HLSL lacks uchar type, and only has pack/unpack ops in 6.6
 * Hard to use with SSBO despite uint32 chunks.
 * D3DColorToUBYTE4 has annoying bgra swizzle, so don't use it.
 
 Terms
 ---
-
 * Shader Variants - it's good to define which variants of shaders to generate.  Can use static and dynamic branching to reduce variant count.  Can lead to requiring shader source if can't predefine variant count.
 * Specialization Constants - allow variants to be generated within a single shader.  Spirv is marked and compiled based on these settings.  Metal has equivalent function constants
 * Tile shaders - kernels/fragment shaders that run at the tile level.  Subpasses in Vulkan.  tilegroup memory to and tile data passed from stage to stage without writing back to targets.
@@ -247,12 +247,16 @@ AMD
 Nvidia
 * scalar execution instead of vector based, compute, unified ALUs for rasterization
 * tile-based raster/binning in Maxwell (Tegra X1), not on same level as TBDR
-#
+* fp16 rates are the same as fp32 on 30x0/40x0, indicating little fp16 support
+*   but fp16 rates are double on 10x0/20x0
+* 1080 runs fp16 at 1/128th the speed of fp32 - ugh!  To hobble ML work
+*   on GeForce cards and push expensive Quadro which do full rate fp16.
+*
 * Tegra X1+ - Mariko, Nintendo Switch, ended chip production in 2021
-* 10x0 - no tensor core
+* 10x0 - no tensor core, 1/128th speed fp16, 
 * 20x0 - tensor cores, RT accel for triangle and bvh intersection
-* 30x0 - 
-* 40x0 - 
+* 30x0 - 36 fp32 vs. 0.6 fp64 Teraflops on 3090 (60x), fp16 same speed as fp32, faster RT/Tensor cores
+* 40x0 - fp16 same speed as fp32, faster RT/Tensor cores
 
 * https://en.wikipedia.org/wiki/List_of_Nvidia_graphics_processing_units
 * https://www.pcmag.com/news/report-nvidia-to-end-production-of-nintendo-switch-tegra-chip
diff --git a/hlslparser/buildShaders.sh b/hlslparser/buildShaders.sh
index 38ee7aa4..78bacb34 100755
--- a/hlslparser/buildShaders.sh
+++ b/hlslparser/buildShaders.sh
@@ -12,7 +12,6 @@ mkdir -p out/ios
 # for glslc testing
 #mkdir -p out/android2
 
-pushd outshaders
 
 # display commands
 # set -x
@@ -30,10 +29,10 @@ srcDir=${projectDir}
 srcDir+="shaders/"
 
 dstDir=${projectDir}
-dstDir+="outshaders/"
+dstDir+="out/"
 
-dstDirOut=${projectDir}
-dstDirOut+="out/"
+#dstDirOut=${projectDir}
+#dstDirOut+="out/"
 
 # this only pulls the release build, so testing debug won't update
 appHlslparser=../build/hlslparser/Build/Products/Release/hlslparser
@@ -59,17 +58,22 @@ appSpirvCross+="spirv-cross"
 # Xcode will only do clickthrough to warnings/errors if the filename
 # is a full path.  That's super annoying.
 
-#-------------------------------
-
-# copy over the headers that translate to MSL/HLSL
-# TODO: move to outshaders, so when there are errors can clickthough to orignal files
-#cp ${srcDir}/ShaderMSL.h .
-#cp ${srcDir}/ShaderHLSL.h .
-
 parserOptions=""
 
 # preserve comments
-parserOptions+="-g "
+parserOptions+="-g -line "
+
+pushd out/mac
+
+# build the metal shaders
+echo gen MSL
+${appHlslparser} ${parserOptions} -i ${srcDir}Skinning.hlsl -o Skinning.metal
+${appHlslparser} ${parserOptions} -i ${srcDir}Sample.hlsl -o Sample.metal
+${appHlslparser} ${parserOptions} -i ${srcDir}Compute.hlsl -o Compute.metal
+
+popd > /dev/null
+
+pushd out/ios
 
 # build the metal shaders
 echo gen MSL
@@ -77,17 +81,18 @@ ${appHlslparser} ${parserOptions} -i ${srcDir}Skinning.hlsl -o Skinning.metal
 ${appHlslparser} ${parserOptions} -i ${srcDir}Sample.hlsl -o Sample.metal
 ${appHlslparser} ${parserOptions} -i ${srcDir}Compute.hlsl -o Compute.metal
 
+popd > /dev/null
+
+pushd out/android
+
 # build the hlsl shaders
 echo gen HLSL
 ${appHlslparser} ${parserOptions} -i ${srcDir}Skinning.hlsl -o Skinning.hlsl
 ${appHlslparser} ${parserOptions} -i ${srcDir}Sample.hlsl -o Sample.hlsl
 ${appHlslparser} ${parserOptions} -i ${srcDir}Compute.hlsl -o Compute.hlsl
 
-#-------------------------------
-
 popd > /dev/null
 
-pushd out
 
 #-------------------------------
 
@@ -97,27 +102,29 @@ pushd out
 # O2 + size opt
 # metalMacOptions+="-Os"
 
-    
 testMetal=1
 
 if [[ $testMetal -eq 1 ]]; then
     # Metal is C++14
     metalMacOptions="-frecord-sources -g "
-    metalMacOptions+="-std=macos-metal2.3 "
+    metalMacOptions+="-std=macos-metal2.4 "
 
     # see if HLSL compiles to MSL (requires macOS Vulkan install)
+    
+    echo compile mac to metallib
+    ${appMetalMac} ${metalMacOptions} -I ${srcDir} -o ${dstDir}mac/GameShaders.metallib   ${dstDir}mac/Skinning.metal ${dstDir}mac/Sample.metal ${dstDir}mac/Compute.metal
 
-    # Test case
-    # ${appMetalMac} ${dstDir}DepthTest.metal ${metalMacOptions} -o mac/DepthTest.metallib
 
-    # TODO: build to air, and then compile to single metallib and metallibdsym
-    # see if MSL compile
-    echo compile MSL for macOS
-    ${appMetalMac} ${dstDir}Skinning.metal ${metalMacOptions} -o mac/Skinning.metallib
-    ${appMetalMac} ${dstDir}Sample.metal ${metalMacOptions} -o mac/Sample.metallib
-    ${appMetalMac} ${dstDir}Compute.metal ${metalMacOptions} -o mac/Compute.metallib
+    metaliOSOptions="-frecord-sources -g "
+    metaliOSOptions+="-std=ios-metal2.4 "
+
+    echo compile iOS to metallib
+    ${appMetaliOS} ${metaliOSOptions} -I ${srcDir} -o ${dstDir}ios/GameShaders.metallib   ${dstDir}ios/Skinning.metal ${dstDir}ios/Sample.metal ${dstDir}ios/Compute.metal
 fi
 
+
+pushd out/android
+
 #-------------------------------
 
 
@@ -181,83 +188,40 @@ csargs+="-T cs_6_2 "
 
 # 1.0,1.1,1.2 default to spv1.1,1.3,1.5
 echo gen SPIRV 1.2 with dxc
-${appDxc} ${vsargs} -spirv -fspv-target-env=vulkan1.2 -E SkinningVS -Fo android/Skinning.vert.spv -Fc android/Skinning.vert.spv.txt ${dstDir}Skinning.hlsl
-${appDxc} ${psargs} -spirv -fspv-target-env=vulkan1.2 -E SkinningPS -Fo android/Skinning.frag.spv -Fc android/Skinning.frag.spv.txt ${dstDir}Skinning.hlsl
+${appDxc} ${vsargs} -spirv -fspv-target-env=vulkan1.2 -E SkinningVS -Fo Skinning.vert.spv -Fc Skinning.vert.spv.txt Skinning.hlsl
+${appDxc} ${psargs} -spirv -fspv-target-env=vulkan1.2 -E SkinningPS -Fo Skinning.frag.spv -Fc Skinning.frag.spv.txt Skinning.hlsl
 
-${appDxc} ${vsargs} -spirv -fspv-target-env=vulkan1.2 -E SampleVS -Fo android/Sample.vert.spv -Fc android/Sample.vert.spv.txt ${dstDir}Sample.hlsl
-${appDxc} ${psargs} -spirv -fspv-target-env=vulkan1.2 -E SamplePS -Fo android/Sample.frag.spv -Fc android/Sample.frag.spv.txt ${dstDir}Sample.hlsl
+${appDxc} ${vsargs} -spirv -fspv-target-env=vulkan1.2 -E SampleVS -Fo Sample.vert.spv -Fc Sample.vert.spv.txt Sample.hlsl
+${appDxc} ${psargs} -spirv -fspv-target-env=vulkan1.2 -E SamplePS -Fo Sample.frag.spv -Fc Sample.frag.spv.txt Sample.hlsl
 
-${appDxc} ${csargs} -spirv -fspv-target-env=vulkan1.2 -E ComputeCS -Fo android/Compute.comp.spv -Fc android/Compute.comp.spv.txt ${dstDir}Compute.hlsl
+${appDxc} ${csargs} -spirv -fspv-target-env=vulkan1.2 -E ComputeCS -Fo Compute.comp.spv -Fc Compute.comp.spv.txt Compute.hlsl
 
 # -Fre not supported with spirv, so just use spirv-reflect
 # either yaml or random format, why can't this just output json?
-${appSpirvReflect} -y android/Skinning.vert.spv > android/Skinning.vert.refl
-${appSpirvReflect} -y android/Skinning.frag.spv > android/Skinning.frag.refl
-${appSpirvReflect} -y android/Sample.vert.spv > android/Sample.vert.refl
-${appSpirvReflect} -y android/Sample.frag.spv > android/Sample.frag.refl
-${appSpirvReflect} -y android/Compute.comp.spv > android/Compute.comp.refl
+${appSpirvReflect} -y Skinning.vert.spv > Skinning.vert.refl
+${appSpirvReflect} -y Skinning.frag.spv > Skinning.frag.refl
+${appSpirvReflect} -y Sample.vert.spv > Sample.vert.refl
+${appSpirvReflect} -y Sample.frag.spv > Sample.frag.refl
+${appSpirvReflect} -y Compute.comp.spv > Compute.comp.refl
 
 if [[ $testMetal -eq 1 ]]; then
-    metaliOSOptions="-frecord-sources -g "
-    metaliOSOptions+="-std=ios-metal2.3 "
-
-    # transpile spirv to ios MSL for comparsion to what hlslparser MSL produces
-    #  would never use this, would use hlslparser path directly
-    ${appSpirvCross} --msl --msl-version 20300 --msl-ios android/Skinning.vert.spv --output ios/Skinning.vert.metal
-    ${appSpirvCross} --msl --msl-version 20300 --msl-ios android/Skinning.frag.spv --output ios/Skinning.frag.metal
-    ${appSpirvCross} --msl --msl-version 20300 --msl-ios android/Sample.vert.spv --output ios/Sample.vert.metal
-    ${appSpirvCross} --msl --msl-version 20300 --msl-ios android/Sample.frag.spv --output ios/Sample.frag.metal
-    ${appSpirvCross} --msl --msl-version 20300 --msl-ios android/Compute.comp.spv --output ios/Compute.comp.metal
+    #metaliOSOptions="-frecord-sources -g "
+    #metaliOSOptions+="-std=ios-metal2.4 "
+
+    # transpile android spirv to ios MSL for comparsion to what hlslparser MSL produces
+    #  would never use this, would use hlslparser path directly or gen spirv
+    #  specific for this target
+    ${appSpirvCross} --msl --msl-version 20400 --msl-ios Skinning.vert.spv --output ios/Skinning.vert.metal
+    ${appSpirvCross} --msl --msl-version 20400 --msl-ios Skinning.frag.spv --output ios/Skinning.frag.metal
+    ${appSpirvCross} --msl --msl-version 20400 --msl-ios Sample.vert.spv --output ios/Sample.vert.metal
+    ${appSpirvCross} --msl --msl-version 20400 --msl-ios Sample.frag.spv --output ios/Sample.frag.metal
+    ${appSpirvCross} --msl --msl-version 20400 --msl-ios Compute.comp.spv --output ios/Compute.comp.metal
 
     # compile to make sure code is valid
-    # TODO: compile to air, and use metal-ar to link to one metalib and metalibdsym
-    # need cmake file to run all the steps by writing out ninja file
-    # or need makefile since ninja can't string manip
-    ${appMetaliOS} ${dstDirOut}ios/Skinning.vert.metal ${metaliOSOptions} -o ios/Skinning.vert.metallib
-    ${appMetaliOS} ${dstDirOut}ios/Skinning.frag.metal ${metaliOSOptions} -o ios/Skinning.frag.metallib
-    ${appMetaliOS} ${dstDirOut}ios/Sample.vert.metal ${metaliOSOptions} -o ios/Sample.vert.metallib
-    ${appMetaliOS} ${dstDirOut}ios/Sample.frag.metal ${metaliOSOptions} -o ios/Sample.frag.metallib
-    ${appMetaliOS} ${dstDirOut}ios/Compute.comp.metal ${metaliOSOptions} -o ios/Compute.comp.metallib
-    
-fi
-
-# skip this path, have to mod hlsl just to get valid code to compile with glslc
-testGlslc=0
-
-if [[ $testGlslc -eq 1 ]]; then
-    vsargs="-Os -fshader-stage=vert --target-env=vulkan1.2 "
-    psargs="-Os -fshader-stage=frag --target-env=vulkan1.2 "
-
-    # TODO: probably no equivlent to this?
-    # vsargs+="-HV 2021 "
-    # psargs+="-HV 2021 "
-    
-    # turn on half/short/ushort support
-    # TODO: seems that dot, min, max and other calls don't have half3 versions needed, casts required
-    # and even half3(half) isn't valid.
-    # https://github.com/google/shaderc/issues/1309
-    vsargs+="-fhlsl-16bit-types "
-    psargs+="-fhlsl-16bit-types "
-
-    # see SPV_GOOGLE_hlsl_functionality1
-    # -fhlsl_functionality1
-    # -g source level debugging info
-    # -I include search path
-    # note: glsl has a preprocesor
-    
-    echo gen SPIRV 1.2 with glslc
-    ${appGlslc} ${vsargs} -fentry-point=SkinningVS -o android2/Skinning.vert.spv Skinning.hlsl
-    ${appGlslc} ${psargs} -fentry-point=SkinningPS -o android2/Skinning.frag.spv Skinning.hlsl
-
-    # TODO: need to enable half (float16_t) usage in spirv generated shaders
-    # how to identify compliation is targeting Vulkan?
-
-    # barely human readable spv assembly listing
-    ${appGlslc} -S ${vsargs} -fentry-point=SkinningVS -o android2/Skinning.vert.spv.txt Skinning.hlsl
-    ${appGlslc} -S ${psargs} -fentry-point=SkinningPS -o android2/Skinning.frag.spv.txt Skinning.hlsl
+    ${appMetaliOS} ${metaliOSOptions} -o ${dstDir}ios/GameShadersTranspile.metallib -I ${srcDir} ${dstDir}ios/Skinning.vert.metal ${dstDir}ios/Skinning.frag.metal ${dstDir}ios/Sample.vert.metal ${dstDir}ios/Sample.frag.metal ${dstDir}ios/Compute.comp.metal
 fi
 
-# TODO: need to group files into library/module
+# DONE: need to group files into library/module
 # also create a readable spv file, so can look through that
 
 # TODO: create reflect data w/o needing spirv
diff --git a/hlslparser/shaders/Sample.hlsl b/hlslparser/shaders/Sample.hlsl
index bd73814a..d9d7273e 100644
--- a/hlslparser/shaders/Sample.hlsl
+++ b/hlslparser/shaders/Sample.hlsl
@@ -37,7 +37,7 @@ struct SceneConstantBuffer
     float4x4 viewProj;
     float4 ambientColor;
     bool sampleShadowMap;
-    LightState lights[3];
+    LightState lights[NUM_LIGHTS];
 };
 
 // SM 6.1
diff --git a/hlslparser/src/HLSLParser.cpp b/hlslparser/src/HLSLParser.cpp
index 694d5e14..fd4712bc 100644
--- a/hlslparser/src/HLSLParser.cpp
+++ b/hlslparser/src/HLSLParser.cpp
@@ -853,7 +853,7 @@ void RegisterBaseTypeIntrinsic(Intrinsic& intrinsic, uint32_t numArgs, HLSLBaseT
     }
 }
 
-bool TestBits(AllMask mask, AllMask maskTest)
+inline bool TestBits(AllMask mask, AllMask maskTest)
 {
     return (mask & maskTest) == maskTest;
 }
diff --git a/hlslparser/src/HLSLTree.h b/hlslparser/src/HLSLTree.h
index ef5c44fb..4dc61b21 100644
--- a/hlslparser/src/HLSLTree.h
+++ b/hlslparser/src/HLSLTree.h
@@ -364,6 +364,8 @@ struct HLSLType
     { 
         baseType    = _baseType;
     }
+    bool TestFlags(int flags_) const { return (flags & flags_) == flags_; }
+    
     HLSLBaseType        baseType = HLSLBaseType_Unknown;
     HLSLBaseType        formatType = HLSLBaseType_Float;    // Half or Float (only applies to templated params like buffer/texture)
     const char*         typeName = NULL;       // For user defined types.
diff --git a/hlslparser/src/MSLGenerator.cpp b/hlslparser/src/MSLGenerator.cpp
index f29b257f..8a9f3bc1 100644
--- a/hlslparser/src/MSLGenerator.cpp
+++ b/hlslparser/src/MSLGenerator.cpp
@@ -714,7 +714,7 @@ namespace M4
 
                 const HLSLType& type = declaration->type;
 
-                if ((type.flags & HLSLTypeFlag_Const) && (type.flags & HLSLTypeFlag_Static))
+                if (type.TestFlags(HLSLTypeFlag_Const | HLSLTypeFlag_Static))
                 {
                     if (!declaration->written)
                     {
@@ -1603,15 +1603,17 @@ namespace M4
             {
                 m_writer.Write("%s ", GetAddressSpaceName(type.baseType, type.addressSpace));
             }
-            if (isConst || type.flags & HLSLTypeFlag_Const)
+            if (isConst || type.TestFlags(HLSLTypeFlag_Const))
             {
-                m_writer.Write("const ");
-
-                if ((type.flags & HLSLTypeFlag_Static) != 0 && !isTypeCast)
-                {
-                    // TODO: use GetAddressSpaceName?
-                    m_writer.Write("static constant constexpr ");
-                }
+                m_writer.Write("constant ");
+                
+//                m_writer.Write("const ");
+//
+//                if ((type.flags & HLSLTypeFlag_Static) != 0 && !isTypeCast)
+//                {
+//                    // TODO: use GetAddressSpaceName?
+//                    m_writer.Write("static constant constexpr ");
+//                }
             }
         }
         
@@ -1630,19 +1632,19 @@ namespace M4
         }
 
         // Interpolation modifiers.
-        if (type.flags & HLSLTypeFlag_NoInterpolation)
+        if (type.TestFlags(HLSLTypeFlag_NoInterpolation))
         {
             m_writer.Write(" [[flat]]");
         }
         else
         {
-            if (type.flags & HLSLTypeFlag_NoPerspective)
+            if (type.TestFlags(HLSLTypeFlag_NoPerspective))
             {
-                if (type.flags & HLSLTypeFlag_Centroid)
+                if (type.TestFlags(HLSLTypeFlag_Centroid))
                 {
                     m_writer.Write(" [[centroid_no_perspective]]");
                 }
-                else if (type.flags & HLSLTypeFlag_Sample)
+                else if (type.TestFlags(HLSLTypeFlag_Sample))
                 {
                     m_writer.Write(" [[sample_no_perspective]]");
                 }
@@ -1653,11 +1655,11 @@ namespace M4
             }
             else
             {
-                if (type.flags & HLSLTypeFlag_Centroid)
+                if (type.TestFlags(HLSLTypeFlag_Centroid))
                 {
                     m_writer.Write(" [[centroid_perspective]]");
                 }
-                else if (type.flags & HLSLTypeFlag_Sample)
+                else if (type.TestFlags(HLSLTypeFlag_Sample))
                 {
                     m_writer.Write(" [[sample_perspective]]");
                 }
diff --git a/hlslparser/testshaders.xcodeproj/project.pbxproj b/hlslparser/testshaders.xcodeproj/project.pbxproj
index 0062ffeb..9b3838d6 100644
--- a/hlslparser/testshaders.xcodeproj/project.pbxproj
+++ b/hlslparser/testshaders.xcodeproj/project.pbxproj
@@ -148,7 +148,7 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
-				MACOSX_DEPLOYMENT_TARGET = 12.0;
+				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
 				MTL_FAST_MATH = YES;
 				ONLY_ACTIVE_ARCH = YES;
@@ -200,7 +200,7 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
-				MACOSX_DEPLOYMENT_TARGET = 12.0;
+				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				MTL_ENABLE_DEBUG_INFO = NO;
 				MTL_FAST_MATH = YES;
 				SDKROOT = macosx;

From 8d20b1c568a817fc364a6193f64b6803f536c7b2 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 26 Mar 2023 13:28:05 -0700
Subject: [PATCH 505/901] kram - hlslparser update

Remove namespace wrapping around struct type.  Structs are written out once using the written flag.  This allows sharing across vs/ps, although now InputVS is no longer a unique name per namespace.
Shift all code in MSLGenerator so it's not indented by namespace.
---
 hlslparser/build.ninja           |    4 +-
 hlslparser/shaders/Sample.hlsl   |   19 +-
 hlslparser/src/HLSLGenerator.cpp |   10 +-
 hlslparser/src/MSLGenerator.cpp  | 3436 +++++++++++++++---------------
 hlslparser/src/MSLGenerator.h    |    2 +
 5 files changed, 1763 insertions(+), 1708 deletions(-)

diff --git a/hlslparser/build.ninja b/hlslparser/build.ninja
index b919c978..e6d28ef1 100755
--- a/hlslparser/build.ninja
+++ b/hlslparser/build.ninja
@@ -163,9 +163,7 @@ build ${dstDirHLSL}Sample.frag.spv: compilePS ${dstDirHLSL}Sample.hlsl
 build ${dstDirHLSL}Compute.comp.spv: compileCS ${dstDirHLSL}Compute.hlsl
     entryPoint = Compute
 
-# TODO: use strip command to gen pdb for each file, may only apply to DXIL
-
-# TODO: could zip spv to single archive (6k)
+# zip spv to single archive (6k)
 build ${dstDirHLSL}GameShaders.zip: archiveSpriv ${dstDirHLSL}Skinning.vert.spv ${dstDirHLSL}Skinning.frag.spv ${dstDirHLSL}Sample.vert.spv ${dstDirHLSL}Sample.frag.spv ${dstDirHLSL}Compute.comp.spv
 
 
diff --git a/hlslparser/shaders/Sample.hlsl b/hlslparser/shaders/Sample.hlsl
index d9d7273e..ff6cf006 100644
--- a/hlslparser/shaders/Sample.hlsl
+++ b/hlslparser/shaders/Sample.hlsl
@@ -63,14 +63,15 @@ struct OutputVS
     float3 tangent : TANGENT;
 };
 
-struct InputPS
-{
-    float4 position : SV_Position;
-    float4 worldpos : TEXCOORD0;
-    float2 uv : TEXCOORD1;
-    float3 normal : NORMAL;
-    float3 tangent : TANGENT;
-};
+// Don't need this
+//struct InputPS
+//{
+//    float4 position : SV_Position;
+//    float4 worldpos : TEXCOORD0;
+//    float2 uv : TEXCOORD1;
+//    float3 normal : NORMAL;
+//    float3 tangent : TANGENT;
+//};
 
 
 //--------------------------------------------------------------------------------------
@@ -174,7 +175,7 @@ OutputVS SampleVS(InputVS input)
     return output;
 }
 
-float4 SamplePS(InputPS input) : SV_Target0
+float4 SamplePS(OutputVS input) : SV_Target0
 {
     half4 diffuseColor = SampleH(diffuseMap, sampleWrap, input.uv);
     half3 pixelNormal = CalcPerPixelNormal(input.uv, (half3)input.normal, (half3)input.tangent);
diff --git a/hlslparser/src/HLSLGenerator.cpp b/hlslparser/src/HLSLGenerator.cpp
index 37dd74b0..93846f39 100644
--- a/hlslparser/src/HLSLGenerator.cpp
+++ b/hlslparser/src/HLSLGenerator.cpp
@@ -998,15 +998,15 @@ void HLSLGenerator::OutputDeclarationType(const HLSLType& type, bool isTypeCast)
         return;
     }
     
-    if (type.flags & HLSLTypeFlag_Const)
-    {
-        m_writer.Write("const ");
-    }
     if (type.flags & HLSLTypeFlag_Static)
     {
         m_writer.Write("static ");
     }
-
+    if (type.flags & HLSLTypeFlag_Const)
+    {
+        m_writer.Write("const ");
+    }
+    
     // Interpolation modifiers.
     if (type.flags & HLSLTypeFlag_Centroid)
     {
diff --git a/hlslparser/src/MSLGenerator.cpp b/hlslparser/src/MSLGenerator.cpp
index 8a9f3bc1..e120cb7b 100644
--- a/hlslparser/src/MSLGenerator.cpp
+++ b/hlslparser/src/MSLGenerator.cpp
@@ -27,292 +27,244 @@
 
 namespace M4
 {
-    static void ParseSemantic(const char* semantic, uint32_t* outputLength, uint32_t* outputIndex)
+static void ParseSemantic(const char* semantic, uint32_t* outputLength, uint32_t* outputIndex)
+{
+    const char* semanticIndex = semantic;
+
+    while (*semanticIndex && !isdigit(*semanticIndex))
     {
-        const char* semanticIndex = semantic;
+        semanticIndex++;
+    }
 
-        while (*semanticIndex && !isdigit(*semanticIndex))
-        {
-            semanticIndex++;
-        }
+    *outputLength = (uint32_t)(semanticIndex - semantic);
+    *outputIndex = atoi(semanticIndex);
+}
 
-        *outputLength = (uint32_t)(semanticIndex - semantic);
-        *outputIndex = atoi(semanticIndex);
+// Parse register name and advance next register index.
+static int ParseRegister(const char* registerName, int& nextRegister)
+{
+    if (!registerName)
+    {
+        return nextRegister++;
     }
 
-    // Parse register name and advance next register index.
-    static int ParseRegister(const char* registerName, int& nextRegister)
+    // skip over the u/b/t register prefix
+    while (*registerName && !isdigit(*registerName))
     {
-        if (!registerName)
-        {
-            return nextRegister++;
-        }
+        registerName++;
+    }
 
-        // skip over the u/b/t register prefix
-        while (*registerName && !isdigit(*registerName))
-        {
-            registerName++;
-        }
+    if (!*registerName)
+    {
+        return nextRegister++;
+    }
 
-        if (!*registerName)
-        {
-            return nextRegister++;
-        }
+    // parse the number
+    int result = atoi(registerName);
 
-        // parse the number
-        int result = atoi(registerName);
+    if (nextRegister <= result)
+    {
+        nextRegister = result + 1;
+    }
 
-        if (nextRegister <= result)
-        {
-            nextRegister = result + 1;
-        }
+    return result;
+}
 
-        return result;
-    }
 
-    
 
 
-    MSLGenerator::MSLGenerator()
-    {
-        m_tree = NULL;
-        m_entryName = NULL;
-        m_target = HLSLTarget_VertexShader;
-        m_error = false;
+MSLGenerator::MSLGenerator()
+{
+    m_tree = NULL;
+    m_entryName = NULL;
+    m_target = HLSLTarget_VertexShader;
+    m_error = false;
 
-        m_firstClassArgument = NULL;
-        m_lastClassArgument = NULL;
+    m_firstClassArgument = NULL;
+    m_lastClassArgument = NULL;
 
-        m_currentFunction = NULL;
-    }
+    m_currentFunction = NULL;
+}
 
-    // Copied from GLSLGenerator
-    void MSLGenerator::Error(const char* format, ...) const
+// Copied from GLSLGenerator
+void MSLGenerator::Error(const char* format, ...) const
+{
+    // It's not always convenient to stop executing when an error occurs,
+    // so just track once we've hit an error and stop reporting them until
+    // we successfully bail out of execution.
+    if (m_error)
     {
-        // It's not always convenient to stop executing when an error occurs,
-        // so just track once we've hit an error and stop reporting them until
-        // we successfully bail out of execution.
-        if (m_error)
-        {
-            return;
-        }
-        m_error = true;
-
-        va_list arg;
-        va_start(arg, format);
-        Log_ErrorArgList(format, arg);
-        va_end(arg);
+        return;
     }
+    m_error = true;
+
+    va_list arg;
+    va_start(arg, format);
+    Log_ErrorArgList(format, arg);
+    va_end(arg);
+}
 
-    inline void MSLGenerator::AddClassArgument(ClassArgument* arg)
+inline void MSLGenerator::AddClassArgument(ClassArgument* arg)
+{
+    if (m_firstClassArgument == NULL)
     {
-        if (m_firstClassArgument == NULL)
-        {
-            m_firstClassArgument = arg;
-        }
-        else
-        {
-            m_lastClassArgument->nextArg = arg;
-        }
-        m_lastClassArgument = arg;
+        m_firstClassArgument = arg;
     }
+    else
+    {
+        m_lastClassArgument->nextArg = arg;
+    }
+    m_lastClassArgument = arg;
+}
 
 
-    void MSLGenerator::Prepass(HLSLTree* tree, HLSLTarget target, HLSLFunction* entryFunction)
-    {
-        // Hide unused arguments. @@ It would be good to do this in the other generators too.
-        
-        // PruneTree resets hidden flags to true, then marks visible elements
-        // based on whether entry point visits them.
-        PruneTree(tree, entryFunction->name); // Note: takes second entry
-        
-        // This sorts tree by type, but keeps ordering
-        SortTree(tree);
-       
-        // This strips any unused inputs to the entry point function
-        HideUnusedArguments(entryFunction);
-        
-        // Note sure if/where to add these calls.  Just wanted to point
-        // out that nothing is calling them, but could be useful.
-        FlattenExpressions(tree);
-        
-        HLSLRoot* root = tree->GetRoot();
-        HLSLStatement* statement = root->statement;
-        ASSERT(m_firstClassArgument == NULL);
+void MSLGenerator::Prepass(HLSLTree* tree, HLSLTarget target, HLSLFunction* entryFunction)
+{
+    // Hide unused arguments. @@ It would be good to do this in the other generators too.
+    
+    // PruneTree resets hidden flags to true, then marks visible elements
+    // based on whether entry point visits them.
+    PruneTree(tree, entryFunction->name); // Note: takes second entry
+    
+    // This sorts tree by type, but keeps ordering
+    SortTree(tree);
+   
+    // This strips any unused inputs to the entry point function
+    HideUnusedArguments(entryFunction);
+    
+    // Note sure if/where to add these calls.  Just wanted to point
+    // out that nothing is calling them, but could be useful.
+    FlattenExpressions(tree);
+    
+    HLSLRoot* root = tree->GetRoot();
+    HLSLStatement* statement = root->statement;
+    ASSERT(m_firstClassArgument == NULL);
 
-        //HLSLType samplerType(HLSLBaseType_Sampler);
+    //HLSLType samplerType(HLSLBaseType_Sampler);
 
-        int nextTextureRegister = 0;
-        int nextSamplerRegister = 0;
-        int nextBufferRegister = 0;
+    int nextTextureRegister = 0;
+    int nextSamplerRegister = 0;
+    int nextBufferRegister = 0;
 
-        while (statement != NULL)
+    while (statement != NULL)
+    {
+        if (statement->hidden)
         {
-            if (statement->hidden)
-            {
-                statement = statement->nextStatement;
-                continue;
-            }
-            
-            if (statement->nodeType == HLSLNodeType_Declaration)
-            {
-                HLSLDeclaration* declaration = (HLSLDeclaration*)statement;
+            statement = statement->nextStatement;
+            continue;
+        }
+        
+        if (statement->nodeType == HLSLNodeType_Declaration)
+        {
+            HLSLDeclaration* declaration = (HLSLDeclaration*)statement;
 
-                if (IsTextureType(declaration->type))
-                {
-                    const char * textureName = declaration->name;
-                    
-                    int textureRegister = ParseRegister(declaration->registerName, nextTextureRegister);
-                     const char * textureRegisterName = m_tree->AddStringFormat("texture(%d)", textureRegister);
+            if (IsTextureType(declaration->type))
+            {
+                const char * textureName = declaration->name;
+                
+                int textureRegister = ParseRegister(declaration->registerName, nextTextureRegister);
+                 const char * textureRegisterName = m_tree->AddStringFormat("texture(%d)", textureRegister);
 
-                    if (declaration->type.addressSpace == HLSLAddressSpace_Undefined)
-                        declaration->type.addressSpace = HLSLAddressSpace_Device;
-                    
-                    AddClassArgument(new ClassArgument(textureName, declaration->type, textureRegisterName, true));
-                }
-                else if (IsSamplerType(declaration->type))
-                {
-                    const char * samplerName = declaration->name;
-                    
-                    int samplerRegister = ParseRegister(declaration->registerName, nextSamplerRegister);
-                    const char * samplerRegisterName = m_tree->AddStringFormat("sampler(%d)", samplerRegister);
-                    
-                    if (declaration->type.addressSpace == HLSLAddressSpace_Undefined)
-                        declaration->type.addressSpace = HLSLAddressSpace_Device;
-                    
-                    AddClassArgument(new ClassArgument(samplerName, declaration->type, samplerRegisterName, true));
-                }
+                if (declaration->type.addressSpace == HLSLAddressSpace_Undefined)
+                    declaration->type.addressSpace = HLSLAddressSpace_Device;
+                
+                AddClassArgument(new ClassArgument(textureName, declaration->type, textureRegisterName, true));
             }
-            else if (statement->nodeType == HLSLNodeType_Buffer)
+            else if (IsSamplerType(declaration->type))
             {
-                HLSLBuffer * buffer = (HLSLBuffer *)statement;
-                
-                HLSLType type(HLSLBaseType_UserDefined);
+                const char * samplerName = declaration->name;
                 
-                // TODO: on cbuffer is a ubo, not tbuffer, or others
-                // TODO: this is having to rename due to globals
-                if (buffer->IsGlobalFields())
-                    type.typeName = m_tree->AddStringFormat("%s_ubo", buffer->name);
-                else
-                    type.typeName = m_tree->AddStringFormat("%s", buffer->bufferStruct->name);
+                int samplerRegister = ParseRegister(declaration->registerName, nextSamplerRegister);
+                const char * samplerRegisterName = m_tree->AddStringFormat("sampler(%d)", samplerRegister);
                 
-                // TODO: ConstantBuffer can use ptr notation, detect array decl
-                bool isRef = buffer->bufferType == HLSLBufferType_ConstantBuffer ||
-                             buffer->IsGlobalFields();
-   
-                if (buffer->IsReadOnly())
-                    type.addressSpace = HLSLAddressSpace_Constant;
-                else
-                    type.addressSpace = HLSLAddressSpace_Device;
+                if (declaration->type.addressSpace == HLSLAddressSpace_Undefined)
+                    declaration->type.addressSpace = HLSLAddressSpace_Device;
                 
-                int bufferRegister = ParseRegister(buffer->registerName, nextBufferRegister) + m_options.bufferRegisterOffset;
+                AddClassArgument(new ClassArgument(samplerName, declaration->type, samplerRegisterName, true));
+            }
+        }
+        else if (statement->nodeType == HLSLNodeType_Buffer)
+        {
+            HLSLBuffer * buffer = (HLSLBuffer *)statement;
+            
+            HLSLType type(HLSLBaseType_UserDefined);
+            
+            // TODO: on cbuffer is a ubo, not tbuffer, or others
+            // TODO: this is having to rename due to globals
+            if (buffer->IsGlobalFields())
+                type.typeName = m_tree->AddStringFormat("%s_ubo", buffer->name);
+            else
+                type.typeName = m_tree->AddStringFormat("%s", buffer->bufferStruct->name);
+            
+            // TODO: ConstantBuffer can use ptr notation, detect array decl
+            bool isRef = buffer->bufferType == HLSLBufferType_ConstantBuffer ||
+                         buffer->IsGlobalFields();
 
-                const char * bufferRegisterName = m_tree->AddStringFormat("buffer(%d)", bufferRegister);
+            if (buffer->IsReadOnly())
+                type.addressSpace = HLSLAddressSpace_Constant;
+            else
+                type.addressSpace = HLSLAddressSpace_Device;
+            
+            int bufferRegister = ParseRegister(buffer->registerName, nextBufferRegister) + m_options.bufferRegisterOffset;
 
-                AddClassArgument(new ClassArgument(buffer->name, type, bufferRegisterName, isRef));
-            }
+            const char * bufferRegisterName = m_tree->AddStringFormat("buffer(%d)", bufferRegister);
 
-            statement = statement->nextStatement;
+            AddClassArgument(new ClassArgument(buffer->name, type, bufferRegisterName, isRef));
         }
 
-        // @@ IC: instance_id parameter must be a function argument. If we find it inside a struct we must move it to the function arguments
-        // and patch all the references to it!
+        statement = statement->nextStatement;
+    }
 
-        // Translate semantics.
-        HLSLArgument* argument = entryFunction->argument;
-        while (argument != NULL)
+    // @@ IC: instance_id parameter must be a function argument. If we find it inside a struct we must move it to the function arguments
+    // and patch all the references to it!
+
+    // Translate semantics.
+    HLSLArgument* argument = entryFunction->argument;
+    while (argument != NULL)
+    {
+        if (argument->hidden)
         {
-            if (argument->hidden)
-            {
-                argument = argument->nextArgument;
-                continue;
-            }
+            argument = argument->nextArgument;
+            continue;
+        }
 
-            if (argument->modifier == HLSLArgumentModifier_Out)
+        if (argument->modifier == HLSLArgumentModifier_Out)
+        {
+            // Translate output arguments semantics.
+            if (argument->type.baseType == HLSLBaseType_UserDefined)
             {
-                // Translate output arguments semantics.
-                if (argument->type.baseType == HLSLBaseType_UserDefined)
+                // Our vertex input is a struct and its fields need to be tagged when we generate that
+                HLSLStruct* structure = tree->FindGlobalStruct(argument->type.typeName);
+                if (structure == NULL)
                 {
-                    // Our vertex input is a struct and its fields need to be tagged when we generate that
-                    HLSLStruct* structure = tree->FindGlobalStruct(argument->type.typeName);
-                    if (structure == NULL)
-                    {
-                        Error("Vertex shader output struct '%s' not found in shader\n", argument->type.typeName);
-                    }
+                    Error("Vertex shader output struct '%s' not found in shader\n", argument->type.typeName);
+                }
 
-                    HLSLStructField* field = structure->field;
-                    while (field != NULL)
+                HLSLStructField* field = structure->field;
+                while (field != NULL)
+                {
+                    if (!field->hidden)
                     {
-                        if (!field->hidden)
-                        {
-                            field->sv_semantic = TranslateOutputSemantic(field->semantic);
-                        }
-                        field = field->nextField;
+                        field->sv_semantic = TranslateOutputSemantic(field->semantic);
                     }
-                }
-                else
-                {
-                    argument->sv_semantic = TranslateOutputSemantic(argument->semantic);
+                    field = field->nextField;
                 }
             }
             else
             {
-                // Translate input arguments semantics.
-                if (argument->type.baseType == HLSLBaseType_UserDefined)
-                {
-                    // Our vertex input is a struct and its fields need to be tagged when we generate that
-                    HLSLStruct* structure = tree->FindGlobalStruct(argument->type.typeName);
-                    if (structure == NULL)
-                    {
-                        Error("Vertex shader input struct '%s' not found in shader\n", argument->type.typeName);
-                    }
-
-                    HLSLStructField* field = structure->field;
-                    while (field != NULL)
-                    {
-                        if (!field->hidden)
-                        {
-                            field->sv_semantic = TranslateInputSemantic(field->semantic);
-
-                            // Force type to uint.
-                            if (field->sv_semantic && String_Equal(field->sv_semantic, "sample_id")) {
-                                field->type.baseType = HLSLBaseType_Uint;
-                                field->type.flags |= HLSLTypeFlag_NoPromote;
-                            }
-
-                            /*if (target == HLSLTarget_VertexShader && is_semantic(field->semantic, "COLOR"))
-                            {
-                            field->type.flags |= HLSLTypeFlag_Swizzle_BGRA;
-                            }*/
-                        }
-                        field = field->nextField;
-                    }
-                }
-                else
-                {
-                    argument->sv_semantic = TranslateInputSemantic(argument->semantic);
-
-                    // Force type to uint.
-                    if (argument->sv_semantic && String_Equal(argument->sv_semantic, "sample_id")) {
-                        argument->type.baseType = HLSLBaseType_Uint;
-                        argument->type.flags |= HLSLTypeFlag_NoPromote;
-                    }
-                }
+                argument->sv_semantic = TranslateOutputSemantic(argument->semantic);
             }
-
-            argument = argument->nextArgument;
         }
-
-        // Translate return value semantic.
-        if (entryFunction->returnType.baseType != HLSLBaseType_Void)
+        else
         {
-            if (entryFunction->returnType.baseType == HLSLBaseType_UserDefined)
+            // Translate input arguments semantics.
+            if (argument->type.baseType == HLSLBaseType_UserDefined)
             {
                 // Our vertex input is a struct and its fields need to be tagged when we generate that
-                HLSLStruct* structure = tree->FindGlobalStruct(entryFunction->returnType.typeName);
+                HLSLStruct* structure = tree->FindGlobalStruct(argument->type.typeName);
                 if (structure == NULL)
                 {
-                    Error("Vertex shader output struct '%s' not found in shader\n", entryFunction->returnType.typeName);
+                    Error("Vertex shader input struct '%s' not found in shader\n", argument->type.typeName);
                 }
 
                 HLSLStructField* field = structure->field;
@@ -320,623 +272,727 @@ namespace M4
                 {
                     if (!field->hidden)
                     {
-                        field->sv_semantic = TranslateOutputSemantic(field->semantic);
+                        field->sv_semantic = TranslateInputSemantic(field->semantic);
+
+                        // Force type to uint.
+                        if (field->sv_semantic && String_Equal(field->sv_semantic, "sample_id")) {
+                            field->type.baseType = HLSLBaseType_Uint;
+                            field->type.flags |= HLSLTypeFlag_NoPromote;
+                        }
+
+                        /*if (target == HLSLTarget_VertexShader && is_semantic(field->semantic, "COLOR"))
+                        {
+                        field->type.flags |= HLSLTypeFlag_Swizzle_BGRA;
+                        }*/
                     }
                     field = field->nextField;
                 }
             }
             else
             {
-                entryFunction->sv_semantic = TranslateOutputSemantic(entryFunction->semantic);
+                argument->sv_semantic = TranslateInputSemantic(argument->semantic);
 
-                //Error("MSL only supports COLOR semantic in return \n", entryFunction->returnType.typeName);
+                // Force type to uint.
+                if (argument->sv_semantic && String_Equal(argument->sv_semantic, "sample_id")) {
+                    argument->type.baseType = HLSLBaseType_Uint;
+                    argument->type.flags |= HLSLTypeFlag_NoPromote;
+                }
             }
         }
+
+        argument = argument->nextArgument;
     }
 
-    void MSLGenerator::CleanPrepass()
+    // Translate return value semantic.
+    if (entryFunction->returnType.baseType != HLSLBaseType_Void)
     {
-        ClassArgument* currentArg = m_firstClassArgument;
-        while (currentArg != NULL)
+        if (entryFunction->returnType.baseType == HLSLBaseType_UserDefined)
+        {
+            // Our vertex input is a struct and its fields need to be tagged when we generate that
+            HLSLStruct* structure = tree->FindGlobalStruct(entryFunction->returnType.typeName);
+            if (structure == NULL)
+            {
+                Error("Vertex shader output struct '%s' not found in shader\n", entryFunction->returnType.typeName);
+            }
+
+            HLSLStructField* field = structure->field;
+            while (field != NULL)
+            {
+                if (!field->hidden)
+                {
+                    field->sv_semantic = TranslateOutputSemantic(field->semantic);
+                }
+                field = field->nextField;
+            }
+        }
+        else
         {
-            ClassArgument* nextArg = currentArg->nextArg;
-            delete currentArg;
-            currentArg = nextArg;
+            entryFunction->sv_semantic = TranslateOutputSemantic(entryFunction->semantic);
+
+            //Error("MSL only supports COLOR semantic in return \n", entryFunction->returnType.typeName);
         }
-        delete currentArg;
-        m_firstClassArgument = NULL;
-        m_lastClassArgument = NULL;
     }
+}
 
-    void MSLGenerator::PrependDeclarations()
+void MSLGenerator::CleanPrepass()
+{
+    ClassArgument* currentArg = m_firstClassArgument;
+    while (currentArg != NULL)
     {
-        // Any special function stubs we need go here
-        // That includes special constructors to emulate HLSL not being strict
-        
-        //Branch internally to HLSL vs. MSL verision
-        m_writer.WriteLine(0, "#include \"ShaderMSL.h\"");
+        ClassArgument* nextArg = currentArg->nextArg;
+        delete currentArg;
+        currentArg = nextArg;
     }
+    delete currentArg;
+    m_firstClassArgument = NULL;
+    m_lastClassArgument = NULL;
+}
 
-    // Any reference or pointer must be qualified with address space in MSL
-    const char* MSLGenerator::GetAddressSpaceName(HLSLBaseType baseType, HLSLAddressSpace addressSpace) const
-    {
-        if (IsSamplerType(baseType))
-        {
-            return "thread";
-        }
-        if (IsTextureType(baseType))
-        {
-            return "thread";
-        }
+void MSLGenerator::PrependDeclarations()
+{
+    // Any special function stubs we need go here
+    // That includes special constructors to emulate HLSL not being strict
+    
+    //Branch internally to HLSL vs. MSL verision
+    m_writer.WriteLine(0, "#include \"ShaderMSL.h\"");
+}
 
-        // buffers also need to handle readonly (constant and const device) vs.
-        // readwrite (device).
-        
-        switch(addressSpace)
-        {
-            case HLSLAddressSpace_Constant: return "constant";
-            case HLSLAddressSpace_Device: return "device";
-            case HLSLAddressSpace_Thread: return "thread";
-            case HLSLAddressSpace_Shared: return "shared";
-            //case HLSLAddressSpace_Threadgroup:  return "threadgroup_local";
-            //case HLSLAddressSpace_ThreadgroupImageblock: return "threadgroup_imageblock");
-                
-            case HLSLAddressSpace_Undefined: break;
-        }
-        
-        Error("Unknown address space");
-        return "";
+// Any reference or pointer must be qualified with address space in MSL
+const char* MSLGenerator::GetAddressSpaceName(HLSLBaseType baseType, HLSLAddressSpace addressSpace) const
+{
+    if (IsSamplerType(baseType))
+    {
+        return "thread";
+    }
+    if (IsTextureType(baseType))
+    {
+        return "thread";
     }
 
-    bool MSLGenerator::Generate(HLSLTree* tree, HLSLTarget target, const char* entryName, const MSLOptions& options)
+    // buffers also need to handle readonly (constant and const device) vs.
+    // readwrite (device).
+    
+    switch(addressSpace)
     {
-        m_firstClassArgument = NULL;
-        m_lastClassArgument = NULL;
+        case HLSLAddressSpace_Constant: return "constant";
+        case HLSLAddressSpace_Device: return "device";
+        case HLSLAddressSpace_Thread: return "thread";
+        case HLSLAddressSpace_Shared: return "shared";
+        //case HLSLAddressSpace_Threadgroup:  return "threadgroup_local";
+        //case HLSLAddressSpace_ThreadgroupImageblock: return "threadgroup_imageblock");
+            
+        case HLSLAddressSpace_Undefined: break;
+    }
+    
+    Error("Unknown address space");
+    return "";
+}
 
-        m_tree = tree;
-        m_target = target;
-        m_entryName = entryName;
-        
-        m_options = options;
-        m_writer.SetWriteFileLine(options.writeFileLine);
+bool MSLGenerator::Generate(HLSLTree* tree, HLSLTarget target, const char* entryName, const MSLOptions& options)
+{
+    m_firstClassArgument = NULL;
+    m_lastClassArgument = NULL;
+
+    m_tree = tree;
+    m_target = target;
+    m_entryName = entryName;
     
-        m_writer.Reset();
+    m_options = options;
+    m_writer.SetWriteFileLine(options.writeFileLine);
 
-        // Find entry point function
-        HLSLFunction* entryFunction = tree->FindFunction(entryName);
-        if (entryFunction == NULL)
-        {
-            Error("Entry point '%s' doesn't exist\n", entryName);
-            return false;
-        }
+    m_writer.Reset();
 
-        Prepass(tree, target, entryFunction);
+    // Find entry point function
+    HLSLFunction* entryFunction = tree->FindFunction(entryName);
+    if (entryFunction == NULL)
+    {
+        Error("Entry point '%s' doesn't exist\n", entryName);
+        return false;
+    }
 
-        PrependDeclarations();
+    Prepass(tree, target, entryFunction);
 
-        HLSLRoot* root = m_tree->GetRoot();
+    PrependDeclarations();
 
-        OutputStaticDeclarations(0, root->statement);
+    HLSLRoot* root = m_tree->GetRoot();
 
-        // In MSL, uniforms are parameters for the entry point, not globals:
-        // to limit code rewriting, we wrap the entire original shader into a class.
-        // Uniforms are then passed to the constructor and copied to member variables.
-        std::string shaderClassNameStr = entryName;
-        shaderClassNameStr += "NS"; // to distinguish from function
-        
-        const char* shaderClassName = shaderClassNameStr.c_str();
-        m_writer.WriteLine(0, "struct %s {", shaderClassName);
+    OutputStaticDeclarations(0, root->statement);
 
-        OutputStatements(1, root->statement);
+    // In MSL, uniforms are parameters for the entry point, not globals:
+    // to limit code rewriting, we wrap the entire original shader into a class.
+    // Uniforms are then passed to the constructor and copied to member variables.
+    std::string shaderClassNameStr = entryName;
+    shaderClassNameStr += "NS"; // to distinguish from function
+    
+    const char* shaderClassName = shaderClassNameStr.c_str();
+    m_writer.WriteLine(0, "struct %s {", shaderClassName);
 
-        // Generate constructor
-        m_writer.WriteLine(0, "");
-        m_writer.BeginLine(1);
+    OutputStatements(1, root->statement);
 
-        m_writer.Write("%s(", shaderClassName);
-        
-        // mod
-        int indent = m_writer.EndLine();
-        m_writer.BeginLine(indent+1); // 1 more level for params
-        
-        const ClassArgument* currentArg = m_firstClassArgument;
-        while (currentArg != NULL)
-        {
-            m_writer.Write("%s ", GetAddressSpaceName(currentArg->type.baseType, currentArg->type.addressSpace));
-            
-            // ref vs. ptr
-            bool isRef = currentArg->isRef;
-            
-            m_writer.Write("%s %s %s", GetTypeName(currentArg->type, /*exactType=*/true), isRef ? "&" : "*", currentArg->name);
+    // Generate constructor
+    m_writer.WriteLine(0, "");
+    m_writer.BeginLine(1);
 
-            currentArg = currentArg->nextArg;
-            if (currentArg)
-            {
-                m_writer.Write(", ");
-                
-                // mod
-                indent = m_writer.EndLine();
-                m_writer.BeginLine(indent);
-            }
-        }
-        m_writer.Write(")");
+    m_writer.Write("%s(", shaderClassName);
+    
+    // mod
+    int indent = m_writer.EndLine();
+    m_writer.BeginLine(indent+1); // 1 more level for params
+    
+    const ClassArgument* currentArg = m_firstClassArgument;
+    while (currentArg != NULL)
+    {
+        m_writer.Write("%s ", GetAddressSpaceName(currentArg->type.baseType, currentArg->type.addressSpace));
         
-        // mod
-        indent = m_writer.EndLine();
-        m_writer.BeginLine(indent);
+        // ref vs. ptr
+        bool isRef = currentArg->isRef;
         
-        currentArg = m_firstClassArgument;
+        m_writer.Write("%s %s %s", GetTypeName(currentArg->type, /*exactType=*/true), isRef ? "&" : "*", currentArg->name);
+
+        currentArg = currentArg->nextArg;
         if (currentArg)
         {
-            m_writer.Write(" : ");
+            m_writer.Write(", ");
+            
+            // mod
+            indent = m_writer.EndLine();
+            m_writer.BeginLine(indent);
         }
-        while (currentArg != NULL)
+    }
+    m_writer.Write(")");
+    
+    // mod
+    indent = m_writer.EndLine();
+    m_writer.BeginLine(indent);
+    
+    currentArg = m_firstClassArgument;
+    if (currentArg)
+    {
+        m_writer.Write(" : ");
+    }
+    while (currentArg != NULL)
+    {
+        m_writer.Write("%s(%s)", currentArg->name, currentArg->name);
+        currentArg = currentArg->nextArg;
+        if (currentArg)
         {
-            m_writer.Write("%s(%s)", currentArg->name, currentArg->name);
-            currentArg = currentArg->nextArg;
-            if (currentArg)
-            {
-                m_writer.Write(", ");
-                
-                // mod
-                indent = m_writer.EndLine();
-                m_writer.BeginLine(indent);
-            }
+            m_writer.Write(", ");
+            
+            // mod
+            indent = m_writer.EndLine();
+            m_writer.BeginLine(indent);
         }
-        m_writer.EndLine(" {}");
+    }
+    m_writer.EndLine(" {}");
 
-        m_writer.WriteLine(0, "};"); // Class
+    m_writer.WriteLine(0, "};"); // Class
 
 
-        // Generate real entry point, the one called by Metal
-        m_writer.WriteLine(0, "");
+    // Generate real entry point, the one called by Metal
+    m_writer.WriteLine(0, "");
 
-        // If function return value has a non-color output semantic, declare a temporary struct for the output.
-        bool wrapReturnType = false;
-        if (entryFunction->sv_semantic != NULL && !String_Equal(entryFunction->sv_semantic, "color(0)"))
-        {
-            wrapReturnType = true;
+    // If function return value has a non-color output semantic, declare a temporary struct for the output.
+    bool wrapReturnType = false;
+    if (entryFunction->sv_semantic != NULL && !String_Equal(entryFunction->sv_semantic, "color(0)"))
+    {
+        wrapReturnType = true;
 
-            m_writer.WriteLine(0, "struct %s_output { %s tmp [[%s]]; };", entryName, GetTypeName(entryFunction->returnType, /*exactType=*/true), entryFunction->sv_semantic);
+        m_writer.WriteLine(0, "struct %s_output { %s tmp [[%s]]; };", entryName, GetTypeName(entryFunction->returnType, /*exactType=*/true), entryFunction->sv_semantic);
 
-            m_writer.WriteLine(0, "");
-        }
+        m_writer.WriteLine(0, "");
+    }
+
+
+    m_writer.BeginLine(0);
 
+    // @@ Add/Translate function attributes.
+    // entryFunction->attributes
 
-        m_writer.BeginLine(0);
+    // TODO: hack, since don't actually parse bracket construct yet
+    // MSL doesn't seem to have this, set from code?
+    // if (m_target == HLSLTarget_ComputeShader)
+    //     m_writer.WriteLine(indent, "[numthreads(1,1,1)]");
+    
+    switch(m_target)
+    {
+        case HLSLTarget_VertexShader:   m_writer.Write("vertex "); break;
+        case HLSLTarget_PixelShader:    m_writer.Write("fragment "); break;
+        case HLSLTarget_ComputeShader:  m_writer.Write("kernel "); break;
+    }
 
-        // @@ Add/Translate function attributes.
-        // entryFunction->attributes
+    // Return type.
+    if (wrapReturnType)
+    {
+        m_writer.Write("%s_output", entryName);
+    }
+    else
+    {
+        if (entryFunction->returnType.baseType == HLSLBaseType_UserDefined)
+        {
+            // Alec removing namespaced structs
+            // m_writer.Write("%s::", shaderClassName);
+        }
+        m_writer.Write("%s", GetTypeName(entryFunction->returnType, /*exactType=*/true));
+    }
+    
+    m_writer.Write(" %s(", entryName);
 
-        // TODO: hack, since don't actually parse bracket construct yet
-        // MSL doesn't seem to have this, set from code?
-        // if (m_target == HLSLTarget_ComputeShader)
-        //     m_writer.WriteLine(indent, "[numthreads(1,1,1)]");
+    // Alec added for readability
+    indent = m_writer.EndLine();
+    
+    m_writer.BeginLine(indent+1); // indent more
+    
+    //--------------------
+    // This is the class taking in arguments
+    
+    int argumentCount = 0;
+    HLSLArgument* argument = entryFunction->argument;
+    while (argument != NULL)
+    {
+        if (argument->hidden)
+        {
+            argument = argument->nextArgument;
+            continue;
+        }
         
-        switch(m_target)
+        if (argument->type.baseType == HLSLBaseType_UserDefined)
         {
-            case HLSLTarget_VertexShader:   m_writer.Write("vertex "); break;
-            case HLSLTarget_PixelShader:    m_writer.Write("fragment "); break;
-            case HLSLTarget_ComputeShader:  m_writer.Write("kernel "); break;
+            //TODO: aled removing namespaced structs
+            //m_writer.Write("%s::", shaderClassName);
         }
+        m_writer.Write("%s %s", GetTypeName(argument->type, /*exactType=*/true), argument->name);
 
-        // Return type.
-        if (wrapReturnType)
+        // @@ IC: We are assuming that the first argument is the 'stage_in'.
+        if (argument->type.baseType == HLSLBaseType_UserDefined && argument == entryFunction->argument)
         {
-            m_writer.Write("%s_output", entryName);
+            m_writer.Write(" [[stage_in]]");
         }
-        else
+        else if (argument->sv_semantic)
         {
-            if (entryFunction->returnType.baseType == HLSLBaseType_UserDefined)
-            {
-                m_writer.Write("%s::", shaderClassName);
-            }
-            m_writer.Write("%s", GetTypeName(entryFunction->returnType, /*exactType=*/true));
+            m_writer.Write(" [[%s]]", argument->sv_semantic);
         }
         
-        m_writer.Write(" %s(", entryName);
+        argumentCount++;
+        
+        argument = argument->nextArgument;
+        if (argument && !argument->hidden)
+        {
+            m_writer.Write(", ");
+            
+            // Alec added for readability
+            indent = m_writer.EndLine();
+            m_writer.BeginLine(indent);
+        }
+        
+        
+    }
 
+    // These are additional inputs/outputs not [[stage_in]]
+    
+    currentArg = m_firstClassArgument;
+    if (argumentCount && currentArg != NULL)
+    {
+        m_writer.Write(",");
+        
         // Alec added for readability
         indent = m_writer.EndLine();
+        m_writer.BeginLine(indent);
         
-        m_writer.BeginLine(indent+1); // indent more
-        
-        //--------------------
-        // This is the class taking in arguments
-        
-        int argumentCount = 0;
-        HLSLArgument* argument = entryFunction->argument;
-        while (argument != NULL)
+    }
+    while (currentArg != NULL)
+    {
+        if (currentArg->type.baseType == HLSLBaseType_UserDefined)
         {
-            if (argument->hidden)
-            {
-                argument = argument->nextArgument;
-                continue;
-            }
-            
-            if (argument->type.baseType == HLSLBaseType_UserDefined)
-            {
-                m_writer.Write("%s::", shaderClassName);
-            }
-            m_writer.Write("%s %s", GetTypeName(argument->type, /*exactType=*/true), argument->name);
-
-            // @@ IC: We are assuming that the first argument is the 'stage_in'.
-            if (argument->type.baseType == HLSLBaseType_UserDefined && argument == entryFunction->argument)
-            {
-                m_writer.Write(" [[stage_in]]");
-            }
-            else if (argument->sv_semantic)
-            {
-                m_writer.Write(" [[%s]]", argument->sv_semantic);
-            }
-            
-            argumentCount++;
-            
-            argument = argument->nextArgument;
-            if (argument && !argument->hidden)
-            {
-                m_writer.Write(", ");
-                
-                // Alec added for readability
-                indent = m_writer.EndLine();
-                m_writer.BeginLine(indent);
-            }
-            
-            
+            bool isRef = currentArg->isRef;
+           
+            m_writer.Write("%s %s %s %s [[%s]]", GetAddressSpaceName(currentArg->type.baseType, currentArg->type.addressSpace),
+              // shaderClassName,
+             currentArg->type.typeName, isRef ? "&" : "*", currentArg->name,
+                           currentArg->registerName);
+        }
+        else
+        {
+            m_writer.Write("%s %s [[%s]]", GetTypeName(currentArg->type, /*exactType=*/true), currentArg->name, currentArg->registerName);
         }
 
-        // These are additional inputs/outputs not [[stage_in]]
-        
-        currentArg = m_firstClassArgument;
-        if (argumentCount && currentArg != NULL)
+        currentArg = currentArg->nextArg;
+        if (currentArg)
         {
-            m_writer.Write(",");
-            
-            // Alec added for readability
-            indent = m_writer.EndLine();
-            m_writer.BeginLine(indent);
-            
+            m_writer.Write(", ");
         }
+        
+        // Alec added for readability
+        indent = m_writer.EndLine();
+        m_writer.BeginLine(indent);
+    }
+    m_writer.EndLine(")");
+    m_writer.WriteLine(0, "{");
+
+    // Create the helper class instance and call the entry point from the original shader
+    m_writer.BeginLine(1);
+    m_writer.Write("%s %s", shaderClassName, entryName);
+
+    currentArg = m_firstClassArgument;
+    if (currentArg)
+    {
+        m_writer.Write("(");
+
         while (currentArg != NULL)
         {
-            if (currentArg->type.baseType == HLSLBaseType_UserDefined)
-            {
-                bool isRef = currentArg->isRef;
-               
-                m_writer.Write("%s %s::%s %s %s [[%s]]", GetAddressSpaceName(currentArg->type.baseType, currentArg->type.addressSpace),
-                  shaderClassName, currentArg->type.typeName, isRef ? "&" : "*", currentArg->name,
-                               currentArg->registerName);
-            }
-            else
-            {
-                m_writer.Write("%s %s [[%s]]", GetTypeName(currentArg->type, /*exactType=*/true), currentArg->name, currentArg->registerName);
-            }
-
+            m_writer.Write("%s", currentArg->name);
             currentArg = currentArg->nextArg;
             if (currentArg)
             {
                 m_writer.Write(", ");
+                
+                // indent = m_writer.EndLine();
+                // m_writer.BeginLine(indent);
             }
-            
-            // Alec added for readability
-            indent = m_writer.EndLine();
-            m_writer.BeginLine(indent);
         }
-        m_writer.EndLine(")");
-        m_writer.WriteLine(0, "{");
 
-        // Create the helper class instance and call the entry point from the original shader
-        m_writer.BeginLine(1);
-        m_writer.Write("%s %s", shaderClassName, entryName);
-
-        currentArg = m_firstClassArgument;
-        if (currentArg)
-        {
-            m_writer.Write("(");
-
-            while (currentArg != NULL)
-            {
-                m_writer.Write("%s", currentArg->name);
-                currentArg = currentArg->nextArg;
-                if (currentArg)
-                {
-                    m_writer.Write(", ");
-                    
-                    // indent = m_writer.EndLine();
-                    // m_writer.BeginLine(indent);
-                }
-            }
+        m_writer.Write(")");
+    }
+    m_writer.EndLine(";");
 
-            m_writer.Write(")");
-        }
-        m_writer.EndLine(";");
+    m_writer.BeginLine(1);
 
-        m_writer.BeginLine(1);
+    if (wrapReturnType)
+    {
+        m_writer.Write("%s_output output; output.tmp = %s.%s(", entryName, entryName, entryName);
+    }
+    else
+    {
+        m_writer.Write("return %s.%s(", entryName, entryName);
+    }
 
-        if (wrapReturnType)
+    argument = entryFunction->argument;
+    while (argument != NULL)
+    {
+        if (!argument->hidden)
         {
-            m_writer.Write("%s_output output; output.tmp = %s.%s(", entryName, entryName, entryName);
+            m_writer.Write("%s", argument->name);
         }
-        else
+        argument = argument->nextArgument;
+        if (argument && !argument->hidden)
         {
-            m_writer.Write("return %s.%s(", entryName, entryName);
+            m_writer.Write(", ");
         }
+    }
 
-        argument = entryFunction->argument;
-        while (argument != NULL)
-        {
-            if (!argument->hidden)
-            {
-                m_writer.Write("%s", argument->name);
-            }
-            argument = argument->nextArgument;
-            if (argument && !argument->hidden)
-            {
-                m_writer.Write(", ");
-            }
-        }
+    m_writer.EndLine(");");
 
-        m_writer.EndLine(");");
+    if (wrapReturnType)
+    {
+        m_writer.WriteLine(1, "return output;");
+    }
 
-        if (wrapReturnType)
-        {
-            m_writer.WriteLine(1, "return output;");
-        }
+    m_writer.WriteLine(0, "}");
 
-        m_writer.WriteLine(0, "}");
+    CleanPrepass();
+    m_tree = NULL;
 
-        CleanPrepass();
-        m_tree = NULL;
+    // Any final check goes here, but shouldn't be needed as the Metal compiler is solid
 
-        // Any final check goes here, but shouldn't be needed as the Metal compiler is solid
+    return !m_error;
+}
 
-        return !m_error;
-    }
+const char* MSLGenerator::GetResult() const
+{
+    return m_writer.GetResult();
+}
 
-    const char* MSLGenerator::GetResult() const
+void MSLGenerator::OutputStaticDeclarations(int indent, HLSLStatement* statement)
+{
+    while (statement != NULL)
     {
-        return m_writer.GetResult();
-    }
+        if (statement->hidden)
+        {
+            statement = statement->nextStatement;
+            continue;
+        }
 
-    void MSLGenerator::OutputStaticDeclarations(int indent, HLSLStatement* statement)
-    {
-        while (statement != NULL)
+        // write struct/buffer outside of the namespace struct
+        if (statement->nodeType == HLSLNodeType_Struct)
         {
-            if (statement->hidden)
+            if (!statement->written)
             {
-                statement = statement->nextStatement;
-                continue;
+                HLSLStruct* structure = static_cast<HLSLStruct*>(statement);
+                OutputStruct(indent, structure);
+                
+                // skipped for multi-entrypoint
+                statement->written = true;
             }
-
-            if (statement->nodeType == HLSLNodeType_Declaration)
+        }
+        /* These are variables, so must be in namespace
+        else if (statement->nodeType == HLSLNodeType_Buffer)
+        {
+            if (!statement->written)
             {
-                HLSLDeclaration* declaration = static_cast<HLSLDeclaration*>(statement);
+                HLSLBuffer* buffer = static_cast<HLSLBuffer*>(statement);
+                OutputBuffer(indent, buffer);
+                
+                // skipped for multi-entrypoint
+                statement->written = true;
+            }
+        }
+        */
+        
+        else if (statement->nodeType == HLSLNodeType_Declaration)
+        {
+            HLSLDeclaration* declaration = static_cast<HLSLDeclaration*>(statement);
 
-                const HLSLType& type = declaration->type;
+            const HLSLType& type = declaration->type;
 
-                if (type.TestFlags(HLSLTypeFlag_Const | HLSLTypeFlag_Static))
+            if (type.TestFlags(HLSLTypeFlag_Const | HLSLTypeFlag_Static))
+            {
+                if (!declaration->written)
                 {
-                    if (!declaration->written)
-                    {
-                        m_writer.BeginLine(indent, declaration->fileName, declaration->line);
-                        OutputDeclaration(declaration);
-                        m_writer.EndLine(";");
-                    }
-                    
-                    // hide declaration from subsequent passes
-                    declaration->hidden = true;
+                    m_writer.BeginLine(indent, declaration->fileName, declaration->line);
+                    OutputDeclaration(declaration);
+                    m_writer.EndLine(";");
                     
                     // skipped for multi-entrypoint
                     declaration->written = true;
                 }
+                
+                // TODO: sure this is needed, or does written handle it
+                // hide declaration from subsequent passes
+                declaration->hidden = true;
             }
-            else if (statement->nodeType == HLSLNodeType_Function)
+        }
+        else if (statement->nodeType == HLSLNodeType_Function)
+        {
+            HLSLFunction* function = static_cast<HLSLFunction*>(statement);
+            
+            if (!function->forward)
             {
-                HLSLFunction* function = static_cast<HLSLFunction*>(statement);
+                OutputStaticDeclarations(indent, function->statement);
                 
-                if (!function->forward)
-                {
-                    OutputStaticDeclarations(indent, function->statement);
-                    
-                    // skipped for multi-entrypoint
-                    //function->written = true;
-                }
+                // skipped for multi-entrypoint
+                //function->written = true;
             }
+        }
 
-            statement = statement->nextStatement;
+        statement = statement->nextStatement;
+    }
+}
+
+bool MSLGenerator::CanSkipWrittenStatement(const HLSLStatement* statement) const
+{
+    if (!statement->written) return false;
+
+    // only write these once for multi-entrypoint
+    if (statement->nodeType == HLSLNodeType_Comment ||
+         // statement->nodeType == HLSLNodeType_Buffer ||
+         statement->nodeType == HLSLNodeType_Struct)
+        return true;
+
+    // only write const scalars out once, so they don't conflict
+    if (statement->nodeType == HLSLNodeType_Declaration)
+    {
+        const HLSLDeclaration* decl = (const HLSLDeclaration*)statement;
+        if (IsScalarType(decl->type.baseType) && decl->type.flags & HLSLTypeFlag_Const)
+        {
+            return true;
         }
     }
 
-    // recursive
-    void MSLGenerator::OutputStatements(int indent, HLSLStatement* statement)
+    // TODO: need to skip helper functions, etc.
+        
+    return false;
+}
+
+// recursive
+void MSLGenerator::OutputStatements(int indent, HLSLStatement* statement)
+{
+    // Main generator loop: called recursively
+    while (statement != NULL)
     {
-        // Main generator loop: called recursively
-        while (statement != NULL)
+        // skip pruned statements
+        if (statement->hidden)
+        {
+            statement = statement->nextStatement;
+            continue;
+        }
+        
+        // skip writing across multiple entry points
+        // skip writing some types across multiple entry points
+        if (CanSkipWrittenStatement(statement))
+        {
+            statement = statement->nextStatement;
+            continue;
+        }
+        statement->written = true;
+        
+        OutputAttributes(indent, statement->attributes);
+        
+        if (statement->nodeType == HLSLNodeType_Comment)
         {
-            // skip pruned statements
-            if (statement->hidden)
+            HLSLComment* comment = static_cast<HLSLComment*>(statement);
+            m_writer.WriteLine(indent, "//%s", comment->text);
+        }
+        else if (statement->nodeType == HLSLNodeType_Declaration)
+        {
+            HLSLDeclaration* declaration = static_cast<HLSLDeclaration*>(statement);
+
+            if (declaration->assignment && declaration->assignment->nodeType == HLSLNodeType_FunctionCall)
             {
-                statement = statement->nextStatement;
-                continue;
+                OutputFunctionCallStatement(indent, (HLSLFunctionCall*)declaration->assignment, declaration);
             }
-            
-            // skip writing across multiple entry points
-//            if (statement->written)
-//            {
-//                statement = statement->nextStatement;
-//                continue;
-//            }
-            statement->written = true;
-            
-            OutputAttributes(indent, statement->attributes);
-            
-            if (statement->nodeType == HLSLNodeType_Comment)
+            else
             {
-                HLSLComment* comment = static_cast<HLSLComment*>(statement);
-                m_writer.WriteLine(indent, "//%s", comment->text);
+                m_writer.BeginLine(indent, declaration->fileName, declaration->line);
+                OutputDeclaration(declaration);
+                m_writer.EndLine(";");
             }
-            else if (statement->nodeType == HLSLNodeType_Declaration)
-            {
-                HLSLDeclaration* declaration = static_cast<HLSLDeclaration*>(statement);
+        }
+        else if (statement->nodeType == HLSLNodeType_Struct)
+        {
+            HLSLStruct* structure = static_cast<HLSLStruct*>(statement);
+            OutputStruct(indent, structure);
+        }
+        else if (statement->nodeType == HLSLNodeType_Buffer)
+        {
+            HLSLBuffer* buffer = static_cast<HLSLBuffer*>(statement);
+            OutputBuffer(indent, buffer);
+        }
+        else if (statement->nodeType == HLSLNodeType_Function)
+        {
+            HLSLFunction* function = static_cast<HLSLFunction*>(statement);
 
-                if (declaration->assignment && declaration->assignment->nodeType == HLSLNodeType_FunctionCall)
-                {
-                    OutputFunctionCallStatement(indent, (HLSLFunctionCall*)declaration->assignment, declaration);
-                }
-                else
-                {
-                    m_writer.BeginLine(indent, declaration->fileName, declaration->line);
-                    OutputDeclaration(declaration);
-                    m_writer.EndLine(";");
-                }
-            }
-            else if (statement->nodeType == HLSLNodeType_Struct)
+            if (!function->forward)
             {
-                HLSLStruct* structure = static_cast<HLSLStruct*>(statement);
-                OutputStruct(indent, structure);
+                OutputFunction(indent, function);
             }
-            else if (statement->nodeType == HLSLNodeType_Buffer)
+        }
+        else if (statement->nodeType == HLSLNodeType_ExpressionStatement)
+        {
+            HLSLExpressionStatement* expressionStatement = static_cast<HLSLExpressionStatement*>(statement);
+            HLSLExpression* expression = expressionStatement->expression;
+
+            if (expression->nodeType == HLSLNodeType_FunctionCall)
             {
-                HLSLBuffer* buffer = static_cast<HLSLBuffer*>(statement);
-                OutputBuffer(indent, buffer);
+                OutputFunctionCallStatement(indent, (HLSLFunctionCall*)expression, NULL);
             }
-            else if (statement->nodeType == HLSLNodeType_Function)
+            else
             {
-                HLSLFunction* function = static_cast<HLSLFunction*>(statement);
-
-                if (!function->forward)
-                {
-                    OutputFunction(indent, function);
-                }
+                m_writer.BeginLine(indent, statement->fileName, statement->line);
+                OutputExpression(expressionStatement->expression, NULL);
+                m_writer.EndLine(";");
             }
-            else if (statement->nodeType == HLSLNodeType_ExpressionStatement)
+        }
+        else if (statement->nodeType == HLSLNodeType_ReturnStatement)
+        {
+            HLSLReturnStatement* returnStatement = static_cast<HLSLReturnStatement*>(statement);
+            if (m_currentFunction->numOutputArguments > 0)
             {
-                HLSLExpressionStatement* expressionStatement = static_cast<HLSLExpressionStatement*>(statement);
-                HLSLExpression* expression = expressionStatement->expression;
+                m_writer.BeginLine(indent, returnStatement->fileName, returnStatement->line);
+                m_writer.Write("return { ");
 
-                if (expression->nodeType == HLSLNodeType_FunctionCall)
-                {
-                    OutputFunctionCallStatement(indent, (HLSLFunctionCall*)expression, NULL);
-                }
-                else
+                int numArguments = 0;
+                if (returnStatement->expression != NULL)
                 {
-                    m_writer.BeginLine(indent, statement->fileName, statement->line);
-                    OutputExpression(expressionStatement->expression, NULL);
-                    m_writer.EndLine(";");
+                    OutputTypedExpression(m_currentFunction->returnType, returnStatement->expression, NULL);
+                    numArguments++;
                 }
-            }
-            else if (statement->nodeType == HLSLNodeType_ReturnStatement)
-            {
-                HLSLReturnStatement* returnStatement = static_cast<HLSLReturnStatement*>(statement);
-                if (m_currentFunction->numOutputArguments > 0)
-                {
-                    m_writer.BeginLine(indent, returnStatement->fileName, returnStatement->line);
-                    m_writer.Write("return { ");
 
-                    int numArguments = 0;
-                    if (returnStatement->expression != NULL)
+                HLSLArgument * argument = m_currentFunction->argument;
+                while (argument != NULL)
+                {
+                    if (argument->modifier == HLSLArgumentModifier_Out || argument->modifier == HLSLArgumentModifier_Inout)
                     {
-                        OutputTypedExpression(m_currentFunction->returnType, returnStatement->expression, NULL);
+                        if (numArguments) m_writer.Write(", ");
+                        m_writer.Write("%s", argument->name);
                         numArguments++;
                     }
-
-                    HLSLArgument * argument = m_currentFunction->argument;
-                    while (argument != NULL)
-                    {
-                        if (argument->modifier == HLSLArgumentModifier_Out || argument->modifier == HLSLArgumentModifier_Inout)
-                        {
-                            if (numArguments) m_writer.Write(", ");
-                            m_writer.Write("%s", argument->name);
-                            numArguments++;
-                        }
-                        argument = argument->nextArgument;
-                    }
-
-                    m_writer.EndLine(" };");
+                    argument = argument->nextArgument;
                 }
-                else if (returnStatement->expression != NULL)
-                {
-                    m_writer.BeginLine(indent, returnStatement->fileName, returnStatement->line);
-                    m_writer.Write("return ");
-                    OutputTypedExpression(m_currentFunction->returnType, returnStatement->expression, NULL);
-                    m_writer.EndLine(";");
-                }
-                else
-                {
-                    m_writer.WriteLineTagged(indent, returnStatement->fileName, returnStatement->line, "return;");
-                }
-            }
-            else if (statement->nodeType == HLSLNodeType_DiscardStatement)
-            {
-                HLSLDiscardStatement* discardStatement = static_cast<HLSLDiscardStatement*>(statement);
-                m_writer.WriteLineTagged(indent, discardStatement->fileName, discardStatement->line, "discard_fragment();");
+
+                m_writer.EndLine(" };");
             }
-            else if (statement->nodeType == HLSLNodeType_BreakStatement)
+            else if (returnStatement->expression != NULL)
             {
-                HLSLBreakStatement* breakStatement = static_cast<HLSLBreakStatement*>(statement);
-                m_writer.WriteLineTagged(indent, breakStatement->fileName, breakStatement->line, "break;");
+                m_writer.BeginLine(indent, returnStatement->fileName, returnStatement->line);
+                m_writer.Write("return ");
+                OutputTypedExpression(m_currentFunction->returnType, returnStatement->expression, NULL);
+                m_writer.EndLine(";");
             }
-            else if (statement->nodeType == HLSLNodeType_ContinueStatement)
+            else
             {
-                HLSLContinueStatement* continueStatement = static_cast<HLSLContinueStatement*>(statement);
-                m_writer.WriteLineTagged(indent, continueStatement->fileName, continueStatement->line, "continue;");
+                m_writer.WriteLineTagged(indent, returnStatement->fileName, returnStatement->line, "return;");
             }
-            else if (statement->nodeType == HLSLNodeType_IfStatement)
-            {
-                HLSLIfStatement* ifStatement = static_cast<HLSLIfStatement*>(statement);
+        }
+        else if (statement->nodeType == HLSLNodeType_DiscardStatement)
+        {
+            HLSLDiscardStatement* discardStatement = static_cast<HLSLDiscardStatement*>(statement);
+            m_writer.WriteLineTagged(indent, discardStatement->fileName, discardStatement->line, "discard_fragment();");
+        }
+        else if (statement->nodeType == HLSLNodeType_BreakStatement)
+        {
+            HLSLBreakStatement* breakStatement = static_cast<HLSLBreakStatement*>(statement);
+            m_writer.WriteLineTagged(indent, breakStatement->fileName, breakStatement->line, "break;");
+        }
+        else if (statement->nodeType == HLSLNodeType_ContinueStatement)
+        {
+            HLSLContinueStatement* continueStatement = static_cast<HLSLContinueStatement*>(statement);
+            m_writer.WriteLineTagged(indent, continueStatement->fileName, continueStatement->line, "continue;");
+        }
+        else if (statement->nodeType == HLSLNodeType_IfStatement)
+        {
+            HLSLIfStatement* ifStatement = static_cast<HLSLIfStatement*>(statement);
 
-                if (ifStatement->isStatic) {
-                    int value;
-                    if (!m_tree->GetExpressionValue(ifStatement->condition, value)) {
-                        Error("@if condition could not be evaluated.\n");
-                    }
-                    if (value != 0) {
-                        OutputStatements(indent + 1, ifStatement->statement);
-                    }
-                    else if (ifStatement->elseStatement != NULL) {
-                        OutputStatements(indent + 1, ifStatement->elseStatement);
-                    }
+            if (ifStatement->isStatic) {
+                int value;
+                if (!m_tree->GetExpressionValue(ifStatement->condition, value)) {
+                    Error("@if condition could not be evaluated.\n");
                 }
-                else {
-                    m_writer.BeginLine(indent, ifStatement->fileName, ifStatement->line);
-                    m_writer.Write("if (");
-                    OutputExpression(ifStatement->condition, NULL);
-                    m_writer.Write(") {");
-                    m_writer.EndLine();
+                if (value != 0) {
                     OutputStatements(indent + 1, ifStatement->statement);
-                    m_writer.WriteLine(indent, "}");
-                    if (ifStatement->elseStatement != NULL)
-                    {
-                        m_writer.WriteLine(indent, "else {");
-                        OutputStatements(indent + 1, ifStatement->elseStatement);
-                        m_writer.WriteLine(indent, "}");
-                    }
+                }
+                else if (ifStatement->elseStatement != NULL) {
+                    OutputStatements(indent + 1, ifStatement->elseStatement);
                 }
             }
-            else if (statement->nodeType == HLSLNodeType_ForStatement)
-            {
-                HLSLForStatement* forStatement = static_cast<HLSLForStatement*>(statement);
-                m_writer.BeginLine(indent, forStatement->fileName, forStatement->line);
-                m_writer.Write("for (");
-                OutputDeclaration(forStatement->initialization);
-                m_writer.Write("; ");
-                OutputExpression(forStatement->condition, NULL);
-                m_writer.Write("; ");
-                OutputExpression(forStatement->increment, NULL);
+            else {
+                m_writer.BeginLine(indent, ifStatement->fileName, ifStatement->line);
+                m_writer.Write("if (");
+                OutputExpression(ifStatement->condition, NULL);
                 m_writer.Write(") {");
                 m_writer.EndLine();
-                OutputStatements(indent + 1, forStatement->statement);
-                m_writer.WriteLine(indent, "}");
-            }
-            else if (statement->nodeType == HLSLNodeType_BlockStatement)
-            {
-                HLSLBlockStatement* blockStatement = static_cast<HLSLBlockStatement*>(statement);
-                m_writer.WriteLineTagged(indent, blockStatement->fileName, blockStatement->line, "{");
-                OutputStatements(indent + 1, blockStatement->statement);
+                OutputStatements(indent + 1, ifStatement->statement);
                 m_writer.WriteLine(indent, "}");
+                if (ifStatement->elseStatement != NULL)
+                {
+                    m_writer.WriteLine(indent, "else {");
+                    OutputStatements(indent + 1, ifStatement->elseStatement);
+                    m_writer.WriteLine(indent, "}");
+                }
             }
-            
-            // fx file support for Technique/Pipeline
+        }
+        else if (statement->nodeType == HLSLNodeType_ForStatement)
+        {
+            HLSLForStatement* forStatement = static_cast<HLSLForStatement*>(statement);
+            m_writer.BeginLine(indent, forStatement->fileName, forStatement->line);
+            m_writer.Write("for (");
+            OutputDeclaration(forStatement->initialization);
+            m_writer.Write("; ");
+            OutputExpression(forStatement->condition, NULL);
+            m_writer.Write("; ");
+            OutputExpression(forStatement->increment, NULL);
+            m_writer.Write(") {");
+            m_writer.EndLine();
+            OutputStatements(indent + 1, forStatement->statement);
+            m_writer.WriteLine(indent, "}");
+        }
+        else if (statement->nodeType == HLSLNodeType_BlockStatement)
+        {
+            HLSLBlockStatement* blockStatement = static_cast<HLSLBlockStatement*>(statement);
+            m_writer.WriteLineTagged(indent, blockStatement->fileName, blockStatement->line, "{");
+            OutputStatements(indent + 1, blockStatement->statement);
+            m_writer.WriteLine(indent, "}");
+        }
+        
+        // fx file support for Technique/Pipeline
 //            else if (statement->nodeType == HLSLNodeType_Technique)
 //            {
 //                // Techniques are ignored.
@@ -945,202 +1001,172 @@ namespace M4
 //            {
 //                // Pipelines are ignored.
 //            }
-            else
-            {
-                // Unhandled statement type.
-                Error("Unknown statement");
-            }
-
-            statement = statement->nextStatement;
+        else
+        {
+            // Unhandled statement type.
+            Error("Unknown statement");
         }
-    }
 
-    // Called by OutputStatements
-    void MSLGenerator::OutputAttributes(int indent, HLSLAttribute* attribute)
-    {
-        // IC: These do not appear to exist in MSL.
-        while (attribute != NULL) {
-            if (attribute->attributeType == HLSLAttributeType_Unroll)
-            {
-                // @@ Do any of these work?
-                //m_writer.WriteLine(indent, attribute->fileName, attribute->line, "#pragma unroll");
-                //m_writer.WriteLine(indent, attribute->fileName, attribute->line, "[[unroll]]");
-            }
-            else if (attribute->attributeType == HLSLAttributeType_Flatten)
-            {
-                // @@
-            }
-            else if (attribute->attributeType == HLSLAttributeType_Branch)
-            {
-                // @@, [[likely]]?
-            }
-
-            attribute = attribute->nextAttribute;
-        }
+        statement = statement->nextStatement;
     }
+}
 
-    void MSLGenerator::OutputDeclaration(HLSLDeclaration* declaration)
-    {
-        if (IsSamplerType(declaration->type))
+// Called by OutputStatements
+void MSLGenerator::OutputAttributes(int indent, HLSLAttribute* attribute)
+{
+    // IC: These do not appear to exist in MSL.
+    while (attribute != NULL) {
+        if (attribute->attributeType == HLSLAttributeType_Unroll)
         {
-            m_writer.Write("%s sampler& %s", GetAddressSpaceName(declaration->type.baseType, declaration->type.addressSpace), declaration->name);
+            // @@ Do any of these work?
+            //m_writer.WriteLine(indent, attribute->fileName, attribute->line, "#pragma unroll");
+            //m_writer.WriteLine(indent, attribute->fileName, attribute->line, "[[unroll]]");
         }
-        else if (IsTextureType(declaration->type))
+        else if (attribute->attributeType == HLSLAttributeType_Flatten)
         {
-            const char* textureName = GetTypeName(declaration->type, true);
-            if (textureName)
-                m_writer.Write("%s %s& %s", GetAddressSpaceName(declaration->type.baseType, declaration->type.addressSpace), textureName, declaration->name);
-            else
-                Error("Unknown texture");
+            // @@
         }
-        else
+        else if (attribute->attributeType == HLSLAttributeType_Branch)
         {
-            OutputDeclaration(declaration->type, declaration->name, declaration->assignment);
-
-            declaration = declaration->nextDeclaration;
-            while (declaration != NULL)
-            {
-                m_writer.Write(",");
-                OutputDeclarationBody(declaration->type, declaration->name, declaration->assignment);
-                declaration = declaration->nextDeclaration;
-            }
+            // @@, [[likely]]?
         }
+
+        attribute = attribute->nextAttribute;
     }
+}
 
-    void MSLGenerator::OutputStruct(int indent, HLSLStruct* structure)
+void MSLGenerator::OutputDeclaration(HLSLDeclaration* declaration)
+{
+    if (IsSamplerType(declaration->type))
     {
-        m_writer.WriteLineTagged(indent, structure->fileName, structure->line, "struct %s {", structure->name);
-        HLSLStructField* field = structure->field;
-        while (field != NULL)
-        {
-            if (!field->hidden)
-            {
-                m_writer.BeginLine(indent + 1, field->fileName, field->line);
-                OutputDeclaration(field->type, field->name, NULL);
-                
-                // DONE: would need a semantic remap for all possible semantics
-                // just use the name the caller specified if sv_semantic
-                // is not set.  The header can handle translating
-                if (field->sv_semantic)
-                {
-                    m_writer.Write(" [[%s]]", field->sv_semantic);
-                }
+        m_writer.Write("%s sampler& %s", GetAddressSpaceName(declaration->type.baseType, declaration->type.addressSpace), declaration->name);
+    }
+    else if (IsTextureType(declaration->type))
+    {
+        const char* textureName = GetTypeName(declaration->type, true);
+        if (textureName)
+            m_writer.Write("%s %s& %s", GetAddressSpaceName(declaration->type.baseType, declaration->type.addressSpace), textureName, declaration->name);
+        else
+            Error("Unknown texture");
+    }
+    else
+    {
+        OutputDeclaration(declaration->type, declaration->name, declaration->assignment);
 
-                m_writer.EndLine(";");
-            }
-            field = field->nextField;
+        declaration = declaration->nextDeclaration;
+        while (declaration != NULL)
+        {
+            m_writer.Write(",");
+            OutputDeclarationBody(declaration->type, declaration->name, declaration->assignment);
+            declaration = declaration->nextDeclaration;
         }
-        m_writer.WriteLine(indent, "};");
     }
+}
 
-    void MSLGenerator::OutputBuffer(int indent, HLSLBuffer* buffer)
+void MSLGenerator::OutputStruct(int indent, HLSLStruct* structure)
+{
+    m_writer.WriteLineTagged(indent, structure->fileName, structure->line, "struct %s {", structure->name);
+    HLSLStructField* field = structure->field;
+    while (field != NULL)
     {
-        if (!buffer->IsGlobalFields())
+        if (!field->hidden)
         {
-            m_writer.BeginLine(indent, buffer->fileName, buffer->line);
-            
-            // TODO: handle array count for indexing into constant buffer
-            // some are unbounded array like BAB and SBO
-            // TODO: may need to use t/u registers for those too and a thread?
-            
-            // TODO: fix this, ConstantBuffer can index into a constant buffer too
-            // detect use of array notation on decl
-            bool isRef = buffer->bufferType == HLSLBufferType_ConstantBuffer ||
-                         buffer->IsGlobalFields();
+            m_writer.BeginLine(indent + 1, field->fileName, field->line);
+            OutputDeclaration(field->type, field->name, NULL);
             
-            if (buffer->bufferType == HLSLBufferType_ConstantBuffer ||
-                buffer->bufferType == HLSLBufferType_ByteAddressBuffer ||
-                buffer->bufferType == HLSLBufferType_StructuredBuffer)
+            // DONE: would need a semantic remap for all possible semantics
+            // just use the name the caller specified if sv_semantic
+            // is not set.  The header can handle translating
+            if (field->sv_semantic)
             {
-                m_writer.Write("constant %s %s %s", buffer->bufferStruct->name, isRef ? "&" : "*", buffer->name);
+                m_writer.Write(" [[%s]]", field->sv_semantic);
             }
-            else
-            {
-                m_writer.Write("device %s %s %s",  buffer->bufferStruct->name, isRef ? "&" : "*", buffer->name);
-            }
-            
+
             m_writer.EndLine(";");
         }
+        field = field->nextField;
+    }
+    m_writer.WriteLine(indent, "};");
+}
+
+void MSLGenerator::OutputBuffer(int indent, HLSLBuffer* buffer)
+{
+    if (!buffer->IsGlobalFields())
+    {
+        m_writer.BeginLine(indent, buffer->fileName, buffer->line);
+        
+        // TODO: handle array count for indexing into constant buffer
+        // some are unbounded array like BAB and SBO
+        // TODO: may need to use t/u registers for those too and a thread?
+        
+        // TODO: fix this, ConstantBuffer can index into a constant buffer too
+        // detect use of array notation on decl
+        bool isRef = buffer->bufferType == HLSLBufferType_ConstantBuffer ||
+                     buffer->IsGlobalFields();
+        
+        if (buffer->bufferType == HLSLBufferType_ConstantBuffer ||
+            buffer->bufferType == HLSLBufferType_ByteAddressBuffer ||
+            buffer->bufferType == HLSLBufferType_StructuredBuffer)
+        {
+            m_writer.Write("constant %s %s %s", buffer->bufferStruct->name, isRef ? "&" : "*", buffer->name);
+        }
         else
         {
-            // converted cbuffer that spill tons of globals for every field
-            HLSLDeclaration* field = buffer->field;
-            
-            m_writer.BeginLine(indent, buffer->fileName, buffer->line);
-            m_writer.Write("struct %s_ubo", buffer->name);
-            m_writer.EndLine(" {");
-            
-            while (field != NULL)
-            {
-                if (!field->hidden)
-                {
-                    m_writer.BeginLine(indent + 1, field->fileName, field->line);
-                    OutputDeclaration(field->type, field->name, field->assignment, false, false, 0); // /*alignment=*/16);
-                    m_writer.EndLine(";");
-                }
-                field = (HLSLDeclaration*)field->nextStatement;
-            }
-            m_writer.WriteLine(indent, "};");
-            
-            m_writer.WriteLine(indent, "constant %s_ubo & %s;", buffer->name, buffer->name);
+            m_writer.Write("device %s %s %s",  buffer->bufferStruct->name, isRef ? "&" : "*", buffer->name);
         }
+        
+        m_writer.EndLine(";");
     }
-
-    void MSLGenerator::OutputFunction(int indent, HLSLFunction* function)
+    else
     {
-        const char* functionName = function->name;
-        const char* returnTypeName = GetTypeName(function->returnType, /*exactType=*/false);
-
-        // Declare output tuple.
-        if (function->numOutputArguments > 0)
+        // converted cbuffer that spill tons of globals for every field
+        HLSLDeclaration* field = buffer->field;
+        
+        m_writer.BeginLine(indent, buffer->fileName, buffer->line);
+        m_writer.Write("struct %s_ubo", buffer->name);
+        m_writer.EndLine(" {");
+        
+        while (field != NULL)
         {
-            returnTypeName = m_tree->AddStringFormat("%s_out%d", functionName, function->line); // @@ Find a better way to generate unique name.
-
-            m_writer.BeginLine(indent, function->fileName, function->line);
-            m_writer.Write("struct %s { ", returnTypeName);
-            m_writer.EndLine();
-
-            if (function->returnType.baseType != HLSLBaseType_Void)
+            if (!field->hidden)
             {
-                m_writer.BeginLine(indent + 1, function->fileName, function->line);
-                OutputDeclaration(function->returnType, "__result", /*defaultValue=*/NULL, /*isRef=*/false, /*isConst=*/false);
+                m_writer.BeginLine(indent + 1, field->fileName, field->line);
+                OutputDeclaration(field->type, field->name, field->assignment, false, false, 0); // /*alignment=*/16);
                 m_writer.EndLine(";");
             }
+            field = (HLSLDeclaration*)field->nextStatement;
+        }
+        m_writer.WriteLine(indent, "};");
+        
+        m_writer.WriteLine(indent, "constant %s_ubo & %s;", buffer->name, buffer->name);
+    }
+}
 
-            HLSLArgument * argument = function->argument;
-            while (argument != NULL)
-            {
-                if (argument->modifier == HLSLArgumentModifier_Out || argument->modifier == HLSLArgumentModifier_Inout)
-                {
-                    m_writer.BeginLine(indent + 1, function->fileName, function->line);
-                    OutputDeclaration(argument->type, argument->name, /*defaultValue=*/NULL, /*isRef=*/false, /*isConst=*/false);
-                    m_writer.EndLine(";");
-                }
-                argument = argument->nextArgument;
-            }
+void MSLGenerator::OutputFunction(int indent, HLSLFunction* function)
+{
+    const char* functionName = function->name;
+    const char* returnTypeName = GetTypeName(function->returnType, /*exactType=*/false);
 
-            m_writer.WriteLine(indent, "};");
+    // Declare output tuple.
+    if (function->numOutputArguments > 0)
+    {
+        returnTypeName = m_tree->AddStringFormat("%s_out%d", functionName, function->line); // @@ Find a better way to generate unique name.
 
-            // Create unique function name to avoid collision with overloads and different return types.
-            m_writer.BeginLine(indent, function->fileName, function->line);
-            m_writer.Write("%s %s_%d(", returnTypeName, functionName, function->line);
-        }
-        else
+        m_writer.BeginLine(indent, function->fileName, function->line);
+        m_writer.Write("struct %s { ", returnTypeName);
+        m_writer.EndLine();
+
+        if (function->returnType.baseType != HLSLBaseType_Void)
         {
-            m_writer.BeginLine(indent, function->fileName, function->line);
-            m_writer.Write("%s %s(", returnTypeName, functionName);
+            m_writer.BeginLine(indent + 1, function->fileName, function->line);
+            OutputDeclaration(function->returnType, "__result", /*defaultValue=*/NULL, /*isRef=*/false, /*isConst=*/false);
+            m_writer.EndLine(";");
         }
 
-        OutputArguments(function->argument);
-
-        m_writer.EndLine(") {");
-        m_currentFunction = function;
-
-        // Local declarations for output arguments.
         HLSLArgument * argument = function->argument;
         while (argument != NULL)
         {
-            if (argument->modifier == HLSLArgumentModifier_Out)
+            if (argument->modifier == HLSLArgumentModifier_Out || argument->modifier == HLSLArgumentModifier_Inout)
             {
                 m_writer.BeginLine(indent + 1, function->fileName, function->line);
                 OutputDeclaration(argument->type, argument->name, /*defaultValue=*/NULL, /*isRef=*/false, /*isConst=*/false);
@@ -1149,372 +1175,402 @@ namespace M4
             argument = argument->nextArgument;
         }
 
-        OutputStatements(indent + 1, function->statement); // @@ Modify return statements if function has multiple output arguments!
+        m_writer.WriteLine(indent, "};");
 
-        // Output implicit return.
-        if (function->numOutputArguments > 0)
-        {
-            bool needsImplicitReturn = true;
-            HLSLStatement * statement = function->statement;
-            if (statement != NULL)
-            {
-                while (statement->nextStatement != NULL)
-                {
-                    statement = statement->nextStatement;
-                }
-                needsImplicitReturn = (statement->nodeType != HLSLNodeType_ReturnStatement) && function->returnType.baseType == HLSLBaseType_Void;
-            }
+        // Create unique function name to avoid collision with overloads and different return types.
+        m_writer.BeginLine(indent, function->fileName, function->line);
+        m_writer.Write("%s %s_%d(", returnTypeName, functionName, function->line);
+    }
+    else
+    {
+        m_writer.BeginLine(indent, function->fileName, function->line);
+        m_writer.Write("%s %s(", returnTypeName, functionName);
+    }
 
-            if (needsImplicitReturn)
-            {
-                m_writer.BeginLine(indent + 1);
-                m_writer.Write("return { ");
+    OutputArguments(function->argument);
 
-                int numArguments = 0;
-                HLSLArgument * argument2 = m_currentFunction->argument;
-                while (argument2 != NULL)
-                {
-                    if (argument2->modifier == HLSLArgumentModifier_Out ||
-                        argument2->modifier == HLSLArgumentModifier_Inout)
-                    {
-                        if (numArguments) m_writer.Write(", ");
-                        m_writer.Write("%s ", argument2->name);
-                        numArguments++;
-                    }
-                    argument2 = argument2->nextArgument;
-                }
+    m_writer.EndLine(") {");
+    m_currentFunction = function;
 
-                m_writer.EndLine(" };");
-            }
+    // Local declarations for output arguments.
+    HLSLArgument * argument = function->argument;
+    while (argument != NULL)
+    {
+        if (argument->modifier == HLSLArgumentModifier_Out)
+        {
+            m_writer.BeginLine(indent + 1, function->fileName, function->line);
+            OutputDeclaration(argument->type, argument->name, /*defaultValue=*/NULL, /*isRef=*/false, /*isConst=*/false);
+            m_writer.EndLine(";");
         }
-
-        m_writer.WriteLine(indent, "};");
-        m_currentFunction = NULL;
+        argument = argument->nextArgument;
     }
 
+    OutputStatements(indent + 1, function->statement); // @@ Modify return statements if function has multiple output arguments!
 
-    // @@ We could be a lot smarter removing parenthesis based on the operator precedence of the parent expression.
-    static bool NeedsParenthesis(HLSLExpression* expression, HLSLExpression* parentExpression) {
-
-        // For now we just omit the parenthesis if there's no parent expression.
-        if (parentExpression == NULL)
+    // Output implicit return.
+    if (function->numOutputArguments > 0)
+    {
+        bool needsImplicitReturn = true;
+        HLSLStatement * statement = function->statement;
+        if (statement != NULL)
         {
-            return false;
+            while (statement->nextStatement != NULL)
+            {
+                statement = statement->nextStatement;
+            }
+            needsImplicitReturn = (statement->nodeType != HLSLNodeType_ReturnStatement) && function->returnType.baseType == HLSLBaseType_Void;
         }
 
-        // One more special case that's pretty common.
-        if (parentExpression->nodeType == HLSLNodeType_MemberAccess)
+        if (needsImplicitReturn)
         {
-            if (expression->nodeType == HLSLNodeType_IdentifierExpression ||
-                expression->nodeType == HLSLNodeType_ArrayAccess ||
-                expression->nodeType == HLSLNodeType_MemberAccess)
+            m_writer.BeginLine(indent + 1);
+            m_writer.Write("return { ");
+
+            int numArguments = 0;
+            HLSLArgument * argument2 = m_currentFunction->argument;
+            while (argument2 != NULL)
             {
-                return false;
+                if (argument2->modifier == HLSLArgumentModifier_Out ||
+                    argument2->modifier == HLSLArgumentModifier_Inout)
+                {
+                    if (numArguments) m_writer.Write(", ");
+                    m_writer.Write("%s ", argument2->name);
+                    numArguments++;
+                }
+                argument2 = argument2->nextArgument;
             }
-        }
 
-        return true;
+            m_writer.EndLine(" };");
+        }
     }
 
-    bool MSLGenerator::NeedsCast(const HLSLType & target, const HLSLType & source)
+    m_writer.WriteLine(indent, "};");
+    m_currentFunction = NULL;
+}
+
+
+// @@ We could be a lot smarter removing parenthesis based on the operator precedence of the parent expression.
+static bool NeedsParenthesis(HLSLExpression* expression, HLSLExpression* parentExpression) {
+
+    // For now we just omit the parenthesis if there's no parent expression.
+    if (parentExpression == NULL)
     {
-        HLSLBaseType targetType = target.baseType;
-        HLSLBaseType sourceType = source.baseType;
+        return false;
+    }
 
-        if (sourceType == HLSLBaseType_Int) {
-            // int k = 1;
+    // One more special case that's pretty common.
+    if (parentExpression->nodeType == HLSLNodeType_MemberAccess)
+    {
+        if (expression->nodeType == HLSLNodeType_IdentifierExpression ||
+            expression->nodeType == HLSLNodeType_ArrayAccess ||
+            expression->nodeType == HLSLNodeType_MemberAccess)
+        {
+            return false;
         }
+    }
 
-        /*if (IsScalarType(target))
-        {
-        // Scalar types do not need casting.
-        return false;
-        }*/
+    return true;
+}
+
+bool MSLGenerator::NeedsCast(const HLSLType & target, const HLSLType & source)
+{
+    HLSLBaseType targetType = target.baseType;
+    HLSLBaseType sourceType = source.baseType;
 
-        if (m_options.treatHalfAsFloat)
-        {
-            // use call to convert half back to float type
-            if (IsHalf(targetType)) targetType = HalfToFloatBaseType(targetType);
-            if (IsHalf(sourceType)) sourceType = HalfToFloatBaseType(sourceType );
-        }
+    if (sourceType == HLSLBaseType_Int) {
+        // int k = 1;
+    }
 
-        return targetType != sourceType && (IsCoreTypeEqual(targetType, sourceType) || IsScalarType(sourceType));
+    /*if (IsScalarType(target))
+    {
+    // Scalar types do not need casting.
+    return false;
+    }*/
+
+    if (m_options.treatHalfAsFloat)
+    {
+        // use call to convert half back to float type
+        if (IsHalf(targetType)) targetType = HalfToFloatBaseType(targetType);
+        if (IsHalf(sourceType)) sourceType = HalfToFloatBaseType(sourceType );
     }
 
+    return targetType != sourceType && (IsCoreTypeEqual(targetType, sourceType) || IsScalarType(sourceType));
+}
+
 
-    void MSLGenerator::OutputTypedExpression(const HLSLType& type, HLSLExpression* expression, HLSLExpression* parentExpression)
+void MSLGenerator::OutputTypedExpression(const HLSLType& type, HLSLExpression* expression, HLSLExpression* parentExpression)
+{
+    // If base types are not exactly the same, do explicit cast.
+    bool closeCastExpression = false;
+    if (NeedsCast(type, expression->expressionType))
     {
-        // If base types are not exactly the same, do explicit cast.
-        bool closeCastExpression = false;
-        if (NeedsCast(type, expression->expressionType))
-        {
-            OutputCast(type);
-            m_writer.Write("(");
-            closeCastExpression = true;
-        }
+        OutputCast(type);
+        m_writer.Write("(");
+        closeCastExpression = true;
+    }
 
-        OutputExpression(expression, parentExpression);
+    OutputExpression(expression, parentExpression);
 
-        if (closeCastExpression)
-        {
-            m_writer.Write(")");
-        }
+    if (closeCastExpression)
+    {
+        m_writer.Write(")");
     }
+}
 
-    void MSLGenerator::OutputExpression(HLSLExpression* expression, HLSLExpression* parentExpression)
+void MSLGenerator::OutputExpression(HLSLExpression* expression, HLSLExpression* parentExpression)
+{
+    if (expression->nodeType == HLSLNodeType_IdentifierExpression)
     {
-        if (expression->nodeType == HLSLNodeType_IdentifierExpression)
+        HLSLIdentifierExpression* identifierExpression = static_cast<HLSLIdentifierExpression*>(expression);
+        const char* name = identifierExpression->name;
+        
         {
-            HLSLIdentifierExpression* identifierExpression = static_cast<HLSLIdentifierExpression*>(expression);
-            const char* name = identifierExpression->name;
-            
+            if (identifierExpression->global)
             {
-                if (identifierExpression->global)
-                {
-                    // prepend cbuffer name
-                    HLSLBuffer * buffer;
-                    HLSLDeclaration * declaration = m_tree->FindGlobalDeclaration(identifierExpression->name, &buffer);
-
-                    if (declaration && declaration->buffer)
-                    {
-                        ASSERT(buffer == declaration->buffer);
-                        m_writer.Write("%s.", declaration->buffer->name);
-                    }
-                }
-                m_writer.Write("%s", name);
+                // prepend cbuffer name
+                HLSLBuffer * buffer;
+                HLSLDeclaration * declaration = m_tree->FindGlobalDeclaration(identifierExpression->name, &buffer);
 
-                // IC: Add swizzle if this is a member access of a field that has the swizzle flag.
-                /*if (parentExpression->nodeType == HLSLNodeType_MemberAccess)
-                {
-                HLSLMemberAccess* memberAccess = (HLSLMemberAccess*)parentExpression;
-                const HLSLType & objectType = memberAccess->object->expressionType;
-                const HLSLStruct* structure = m_tree->FindGlobalStruct(objectType.typeName);
-                if (structure != NULL)
-                {
-                const HLSLStructField* field = structure->field;
-                while (field != NULL)
-                {
-                if (field->name == name)
+                if (declaration && declaration->buffer)
                 {
-                if (field->type.flags & HLSLTypeFlag_Swizzle_BGRA)
-                {
-                m_writer.Write(".bgra", name);
-                }
+                    ASSERT(buffer == declaration->buffer);
+                    m_writer.Write("%s.", declaration->buffer->name);
                 }
-                }
-                }
-                }*/
             }
+            m_writer.Write("%s", name);
+
+            // IC: Add swizzle if this is a member access of a field that has the swizzle flag.
+            /*if (parentExpression->nodeType == HLSLNodeType_MemberAccess)
+            {
+            HLSLMemberAccess* memberAccess = (HLSLMemberAccess*)parentExpression;
+            const HLSLType & objectType = memberAccess->object->expressionType;
+            const HLSLStruct* structure = m_tree->FindGlobalStruct(objectType.typeName);
+            if (structure != NULL)
+            {
+            const HLSLStructField* field = structure->field;
+            while (field != NULL)
+            {
+            if (field->name == name)
+            {
+            if (field->type.flags & HLSLTypeFlag_Swizzle_BGRA)
+            {
+            m_writer.Write(".bgra", name);
+            }
+            }
+            }
+            }
+            }*/
         }
-        else if (expression->nodeType == HLSLNodeType_CastingExpression)
+    }
+    else if (expression->nodeType == HLSLNodeType_CastingExpression)
+    {
+        HLSLCastingExpression* castingExpression = static_cast<HLSLCastingExpression*>(expression);
+        OutputCast(castingExpression->type);
+        m_writer.Write("(");
+        OutputExpression(castingExpression->expression, castingExpression);
+        m_writer.Write(")");
+    }
+    else if (expression->nodeType == HLSLNodeType_ConstructorExpression)
+    {
+        HLSLConstructorExpression* constructorExpression = static_cast<HLSLConstructorExpression*>(expression);
+        
+        m_writer.Write("%s(", GetTypeName(constructorExpression->type, /*exactType=*/false));
+        //OutputExpressionList(constructorExpression->type, constructorExpression->argument);   // @@ Get element type.
+        OutputExpressionList(constructorExpression->argument);
+        m_writer.Write(")");
+    }
+    else if (expression->nodeType == HLSLNodeType_LiteralExpression)
+    {
+        HLSLLiteralExpression* literalExpression = static_cast<HLSLLiteralExpression*>(expression);
+    
+        HLSLBaseType type = literalExpression->type;
+        if (m_options.treatHalfAsFloat && IsHalf(type))
+            type = HLSLBaseType_Float;
+        
+        switch (type)
         {
-            HLSLCastingExpression* castingExpression = static_cast<HLSLCastingExpression*>(expression);
-            OutputCast(castingExpression->type);
-            m_writer.Write("(");
-            OutputExpression(castingExpression->expression, castingExpression);
-            m_writer.Write(")");
-        }
-        else if (expression->nodeType == HLSLNodeType_ConstructorExpression)
+                
+        case HLSLBaseType_Half:
+        case HLSLBaseType_Double:
+        case HLSLBaseType_Float:
         {
-            HLSLConstructorExpression* constructorExpression = static_cast<HLSLConstructorExpression*>(expression);
+            char floatBuffer[64];
             
-            m_writer.Write("%s(", GetTypeName(constructorExpression->type, /*exactType=*/false));
-            //OutputExpressionList(constructorExpression->type, constructorExpression->argument);   // @@ Get element type.
-            OutputExpressionList(constructorExpression->argument);
-            m_writer.Write(")");
+            String_FormatFloat(floatBuffer, sizeof(floatBuffer), literalExpression->fValue);
+            String_StripTrailingFloatZeroes(floatBuffer);
+            m_writer.Write("%s%s", floatBuffer, type == HLSLBaseType_Half ? "h" : "");
+            break;
         }
-        else if (expression->nodeType == HLSLNodeType_LiteralExpression)
-        {
-            HLSLLiteralExpression* literalExpression = static_cast<HLSLLiteralExpression*>(expression);
-        
-            HLSLBaseType type = literalExpression->type;
-            if (m_options.treatHalfAsFloat && IsHalf(type))
-                type = HLSLBaseType_Float;
-            
-            switch (type)
-            {
-                    
-            case HLSLBaseType_Half:
-            case HLSLBaseType_Double:
-            case HLSLBaseType_Float:
-            {
-                char floatBuffer[64];
+        // TODO: missing uint types (trailing character u, ul, ..)
                 
-                String_FormatFloat(floatBuffer, sizeof(floatBuffer), literalExpression->fValue);
-                String_StripTrailingFloatZeroes(floatBuffer);
-                m_writer.Write("%s%s", floatBuffer, type == HLSLBaseType_Half ? "h" : "");
-                break;
-            }
-            // TODO: missing uint types (trailing character u, ul, ..)
-                    
-            case HLSLBaseType_Short:
-            case HLSLBaseType_Long:
-            case HLSLBaseType_Int:
-                m_writer.Write("%d", literalExpression->iValue);
-                break;
-                    
-            case HLSLBaseType_Bool:
-                m_writer.Write("%s", literalExpression->bValue ? "true" : "false");
-                break;
-           default:
-                Error("Unhandled literal");
-                //ASSERT(0);
-            }
+        case HLSLBaseType_Short:
+        case HLSLBaseType_Long:
+        case HLSLBaseType_Int:
+            m_writer.Write("%d", literalExpression->iValue);
+            break;
+                
+        case HLSLBaseType_Bool:
+            m_writer.Write("%s", literalExpression->bValue ? "true" : "false");
+            break;
+       default:
+            Error("Unhandled literal");
+            //ASSERT(0);
         }
-        else if (expression->nodeType == HLSLNodeType_UnaryExpression)
-        {
-            HLSLUnaryExpression* unaryExpression = static_cast<HLSLUnaryExpression*>(expression);
-            const char* op = "?";
-            bool pre = true;
-            switch (unaryExpression->unaryOp)
-            {
-            case HLSLUnaryOp_Negative:      op = "-";  break;
-            case HLSLUnaryOp_Positive:      op = "+";  break;
-            case HLSLUnaryOp_Not:           op = "!";  break;
-            case HLSLUnaryOp_BitNot:        op = "~";  break;
-            case HLSLUnaryOp_PreIncrement:  op = "++"; break;
-            case HLSLUnaryOp_PreDecrement:  op = "--"; break;
-            case HLSLUnaryOp_PostIncrement: op = "++"; pre = false; break;
-            case HLSLUnaryOp_PostDecrement: op = "--"; pre = false; break;
-            }
-            bool addParenthesis = NeedsParenthesis(unaryExpression->expression, expression);
-            if (addParenthesis) m_writer.Write("(");
-            if (pre)
-            {
-                m_writer.Write("%s", op);
-                OutputExpression(unaryExpression->expression, unaryExpression);
-            }
-            else
-            {
-                OutputExpression(unaryExpression->expression, unaryExpression);
-                m_writer.Write("%s", op);
-            }
-            if (addParenthesis) m_writer.Write(")");
+    }
+    else if (expression->nodeType == HLSLNodeType_UnaryExpression)
+    {
+        HLSLUnaryExpression* unaryExpression = static_cast<HLSLUnaryExpression*>(expression);
+        const char* op = "?";
+        bool pre = true;
+        switch (unaryExpression->unaryOp)
+        {
+        case HLSLUnaryOp_Negative:      op = "-";  break;
+        case HLSLUnaryOp_Positive:      op = "+";  break;
+        case HLSLUnaryOp_Not:           op = "!";  break;
+        case HLSLUnaryOp_BitNot:        op = "~";  break;
+        case HLSLUnaryOp_PreIncrement:  op = "++"; break;
+        case HLSLUnaryOp_PreDecrement:  op = "--"; break;
+        case HLSLUnaryOp_PostIncrement: op = "++"; pre = false; break;
+        case HLSLUnaryOp_PostDecrement: op = "--"; pre = false; break;
+        }
+        bool addParenthesis = NeedsParenthesis(unaryExpression->expression, expression);
+        if (addParenthesis) m_writer.Write("(");
+        if (pre)
+        {
+            m_writer.Write("%s", op);
+            OutputExpression(unaryExpression->expression, unaryExpression);
         }
-        else if (expression->nodeType == HLSLNodeType_BinaryExpression)
+        else
         {
-            HLSLBinaryExpression* binaryExpression = static_cast<HLSLBinaryExpression*>(expression);
+            OutputExpression(unaryExpression->expression, unaryExpression);
+            m_writer.Write("%s", op);
+        }
+        if (addParenthesis) m_writer.Write(")");
+    }
+    else if (expression->nodeType == HLSLNodeType_BinaryExpression)
+    {
+        HLSLBinaryExpression* binaryExpression = static_cast<HLSLBinaryExpression*>(expression);
 
-            bool addParenthesis = NeedsParenthesis(expression, parentExpression);
-            if (addParenthesis) m_writer.Write("(");
-            
+        bool addParenthesis = NeedsParenthesis(expression, parentExpression);
+        if (addParenthesis) m_writer.Write("(");
+        
+        {
+            if (IsArithmeticOp(binaryExpression->binaryOp) || IsLogicOp(binaryExpression->binaryOp))
             {
-                if (IsArithmeticOp(binaryExpression->binaryOp) || IsLogicOp(binaryExpression->binaryOp))
-                {
-                    // Do intermediate type promotion, without changing dimension:
-                    HLSLType promotedType = binaryExpression->expression1->expressionType;
+                // Do intermediate type promotion, without changing dimension:
+                HLSLType promotedType = binaryExpression->expression1->expressionType;
 
-                    if (!IsNumericTypeEqual(binaryExpression->expressionType.baseType, promotedType.baseType))
-                    {
-                        promotedType.baseType = PromoteType(binaryExpression->expressionType.baseType, promotedType.baseType);
-                    }
-
-                    OutputTypedExpression(promotedType, binaryExpression->expression1, binaryExpression);
-                }
-                else
+                if (!IsNumericTypeEqual(binaryExpression->expressionType.baseType, promotedType.baseType))
                 {
-                    OutputExpression(binaryExpression->expression1, binaryExpression);
+                    promotedType.baseType = PromoteType(binaryExpression->expressionType.baseType, promotedType.baseType);
                 }
 
-                const char* op = "?";
-                switch (binaryExpression->binaryOp)
-                {
-                case HLSLBinaryOp_Add:          op = " + "; break;
-                case HLSLBinaryOp_Sub:          op = " - "; break;
-                case HLSLBinaryOp_Mul:          op = " * "; break;
-                case HLSLBinaryOp_Div:          op = " / "; break;
-                case HLSLBinaryOp_Less:         op = " < "; break;
-                case HLSLBinaryOp_Greater:      op = " > "; break;
-                case HLSLBinaryOp_LessEqual:    op = " <= "; break;
-                case HLSLBinaryOp_GreaterEqual: op = " >= "; break;
-                case HLSLBinaryOp_Equal:        op = " == "; break;
-                case HLSLBinaryOp_NotEqual:     op = " != "; break;
-                case HLSLBinaryOp_Assign:       op = " = "; break;
-                case HLSLBinaryOp_AddAssign:    op = " += "; break;
-                case HLSLBinaryOp_SubAssign:    op = " -= "; break;
-                case HLSLBinaryOp_MulAssign:    op = " *= "; break;
-                case HLSLBinaryOp_DivAssign:    op = " /= "; break;
-                case HLSLBinaryOp_And:          op = " && "; break;
-                case HLSLBinaryOp_Or:           op = " || "; break;
-                case HLSLBinaryOp_BitAnd:       op = " & "; break;
-                case HLSLBinaryOp_BitOr:        op = " | "; break;
-                case HLSLBinaryOp_BitXor:       op = " ^ "; break;
-                default:
-                    Error("unhandled literal");
-                    //ASSERT(0);
-                }
-                m_writer.Write("%s", op);
+                OutputTypedExpression(promotedType, binaryExpression->expression1, binaryExpression);
+            }
+            else
+            {
+                OutputExpression(binaryExpression->expression1, binaryExpression);
+            }
 
-                
-                if (binaryExpression->binaryOp == HLSLBinaryOp_MulAssign ||
-                    binaryExpression->binaryOp == HLSLBinaryOp_DivAssign ||
-                    IsArithmeticOp(binaryExpression->binaryOp) ||
-                    IsLogicOp(binaryExpression->binaryOp))
-                {
-                    // Do intermediate type promotion, without changing dimension:
-                    HLSLType promotedType = binaryExpression->expression2->expressionType;
+            const char* op = "?";
+            switch (binaryExpression->binaryOp)
+            {
+            case HLSLBinaryOp_Add:          op = " + "; break;
+            case HLSLBinaryOp_Sub:          op = " - "; break;
+            case HLSLBinaryOp_Mul:          op = " * "; break;
+            case HLSLBinaryOp_Div:          op = " / "; break;
+            case HLSLBinaryOp_Less:         op = " < "; break;
+            case HLSLBinaryOp_Greater:      op = " > "; break;
+            case HLSLBinaryOp_LessEqual:    op = " <= "; break;
+            case HLSLBinaryOp_GreaterEqual: op = " >= "; break;
+            case HLSLBinaryOp_Equal:        op = " == "; break;
+            case HLSLBinaryOp_NotEqual:     op = " != "; break;
+            case HLSLBinaryOp_Assign:       op = " = "; break;
+            case HLSLBinaryOp_AddAssign:    op = " += "; break;
+            case HLSLBinaryOp_SubAssign:    op = " -= "; break;
+            case HLSLBinaryOp_MulAssign:    op = " *= "; break;
+            case HLSLBinaryOp_DivAssign:    op = " /= "; break;
+            case HLSLBinaryOp_And:          op = " && "; break;
+            case HLSLBinaryOp_Or:           op = " || "; break;
+            case HLSLBinaryOp_BitAnd:       op = " & "; break;
+            case HLSLBinaryOp_BitOr:        op = " | "; break;
+            case HLSLBinaryOp_BitXor:       op = " ^ "; break;
+            default:
+                Error("unhandled literal");
+                //ASSERT(0);
+            }
+            m_writer.Write("%s", op);
 
-                    if (!IsNumericTypeEqual(binaryExpression->expressionType.baseType, promotedType.baseType))
-                    {
-                        // This should only promote up (half->float, etc)
-                        promotedType.baseType = PromoteType(binaryExpression->expressionType.baseType, promotedType.baseType);
-                    }
+            
+            if (binaryExpression->binaryOp == HLSLBinaryOp_MulAssign ||
+                binaryExpression->binaryOp == HLSLBinaryOp_DivAssign ||
+                IsArithmeticOp(binaryExpression->binaryOp) ||
+                IsLogicOp(binaryExpression->binaryOp))
+            {
+                // Do intermediate type promotion, without changing dimension:
+                HLSLType promotedType = binaryExpression->expression2->expressionType;
 
-                    OutputTypedExpression(promotedType, binaryExpression->expression2, binaryExpression);
-                }
-                else if (IsAssignOp(binaryExpression->binaryOp))
+                if (!IsNumericTypeEqual(binaryExpression->expressionType.baseType, promotedType.baseType))
                 {
-                    OutputTypedExpression(binaryExpression->expressionType, binaryExpression->expression2, binaryExpression);
+                    // This should only promote up (half->float, etc)
+                    promotedType.baseType = PromoteType(binaryExpression->expressionType.baseType, promotedType.baseType);
                 }
-                else
-                {
-                    OutputExpression(binaryExpression->expression2, binaryExpression);
-                }
-            }
-            if (addParenthesis) m_writer.Write(")");
-        }
-        else if (expression->nodeType == HLSLNodeType_ConditionalExpression)
-        {
-            HLSLConditionalExpression* conditionalExpression = static_cast<HLSLConditionalExpression*>(expression);
-            
-            // TODO: @@ Remove parenthesis.
-            m_writer.Write("((");
-            OutputExpression(conditionalExpression->condition, NULL);
-            m_writer.Write(")?(");
-            OutputExpression(conditionalExpression->trueExpression, NULL);
-            m_writer.Write("):(");
-            OutputExpression(conditionalExpression->falseExpression, NULL);
-            m_writer.Write("))");
-        }
-        else if (expression->nodeType == HLSLNodeType_MemberAccess)
-        {
-            HLSLMemberAccess* memberAccess = static_cast<HLSLMemberAccess*>(expression);
-            bool addParenthesis = NeedsParenthesis(memberAccess->object, expression);
 
-            if (addParenthesis)
+                OutputTypedExpression(promotedType, binaryExpression->expression2, binaryExpression);
+            }
+            else if (IsAssignOp(binaryExpression->binaryOp))
             {
-                m_writer.Write("(");
+                OutputTypedExpression(binaryExpression->expressionType, binaryExpression->expression2, binaryExpression);
             }
-            OutputExpression(memberAccess->object, NULL);
-            if (addParenthesis)
+            else
             {
-                m_writer.Write(")");
+                OutputExpression(binaryExpression->expression2, binaryExpression);
             }
+        }
+        if (addParenthesis) m_writer.Write(")");
+    }
+    else if (expression->nodeType == HLSLNodeType_ConditionalExpression)
+    {
+        HLSLConditionalExpression* conditionalExpression = static_cast<HLSLConditionalExpression*>(expression);
+        
+        // TODO: @@ Remove parenthesis.
+        m_writer.Write("((");
+        OutputExpression(conditionalExpression->condition, NULL);
+        m_writer.Write(")?(");
+        OutputExpression(conditionalExpression->trueExpression, NULL);
+        m_writer.Write("):(");
+        OutputExpression(conditionalExpression->falseExpression, NULL);
+        m_writer.Write("))");
+    }
+    else if (expression->nodeType == HLSLNodeType_MemberAccess)
+    {
+        HLSLMemberAccess* memberAccess = static_cast<HLSLMemberAccess*>(expression);
+        bool addParenthesis = NeedsParenthesis(memberAccess->object, expression);
 
-            m_writer.Write(".%s", memberAccess->field);
+        if (addParenthesis)
+        {
+            m_writer.Write("(");
         }
-        else if (expression->nodeType == HLSLNodeType_ArrayAccess)
+        OutputExpression(memberAccess->object, NULL);
+        if (addParenthesis)
         {
-            HLSLArrayAccess* arrayAccess = static_cast<HLSLArrayAccess*>(expression);
-            
-            // Just use the matrix notation, using column_order instead of row_order
-            //if (arrayAccess->array->expressionType.array) // || !IsMatrixType(arrayAccess->array->expressionType.baseType))
-            {
-                OutputExpression(arrayAccess->array, expression);
-                m_writer.Write("[");
-                OutputExpression(arrayAccess->index, NULL);
-                m_writer.Write("]");
-            }
+            m_writer.Write(")");
+        }
+
+        m_writer.Write(".%s", memberAccess->field);
+    }
+    else if (expression->nodeType == HLSLNodeType_ArrayAccess)
+    {
+        HLSLArrayAccess* arrayAccess = static_cast<HLSLArrayAccess*>(expression);
+        
+        // Just use the matrix notation, using column_order instead of row_order
+        //if (arrayAccess->array->expressionType.array) // || !IsMatrixType(arrayAccess->array->expressionType.baseType))
+        {
+            OutputExpression(arrayAccess->array, expression);
+            m_writer.Write("[");
+            OutputExpression(arrayAccess->index, NULL);
+            m_writer.Write("]");
+        }
 //            else
 //            {
 //                // @@ This doesn't work for l-values!
@@ -1524,89 +1580,89 @@ namespace M4
 //                OutputExpression(arrayAccess->index, NULL);
 //                m_writer.Write(")");
 //            }
-        }
-        else if (expression->nodeType == HLSLNodeType_FunctionCall)
-        {
-            HLSLFunctionCall* functionCall = static_cast<HLSLFunctionCall*>(expression);
-            OutputFunctionCall(functionCall, parentExpression);
-        }
-        else
-        {
-            Error("unknown expression");
-        }
     }
+    else if (expression->nodeType == HLSLNodeType_FunctionCall)
+    {
+        HLSLFunctionCall* functionCall = static_cast<HLSLFunctionCall*>(expression);
+        OutputFunctionCall(functionCall, parentExpression);
+    }
+    else
+    {
+        Error("unknown expression");
+    }
+}
 
-    void MSLGenerator::OutputCast(const HLSLType& type)
+void MSLGenerator::OutputCast(const HLSLType& type)
+{
+    if (type.baseType == HLSLBaseType_Float3x3)
     {
-        if (type.baseType == HLSLBaseType_Float3x3)
-        {
-            // TODO: pull name from table, why is this special case?
-            // also why is this not in parens?
-            m_writer.Write("float3x3");
-        }
-        else
-        {
-            m_writer.Write("(");
-            OutputDeclarationType(type, /*isConst=*/false, /*isRef=*/false, /*alignment=*/0, /*isTypeCast=*/true);
-            m_writer.Write(")");
-        }
+        // TODO: pull name from table, why is this special case?
+        // also why is this not in parens?
+        m_writer.Write("float3x3");
+    }
+    else
+    {
+        m_writer.Write("(");
+        OutputDeclarationType(type, /*isConst=*/false, /*isRef=*/false, /*alignment=*/0, /*isTypeCast=*/true);
+        m_writer.Write(")");
     }
+}
 
-    // Called by the various Output functions
-    void MSLGenerator::OutputArguments(HLSLArgument* argument)
+// Called by the various Output functions
+void MSLGenerator::OutputArguments(HLSLArgument* argument)
+{
+    int numArgs = 0;
+    while (argument != NULL)
     {
-        int numArgs = 0;
-        while (argument != NULL)
+        // Skip hidden and output arguments.
+        if (argument->hidden || argument->modifier == HLSLArgumentModifier_Out)
         {
-            // Skip hidden and output arguments.
-            if (argument->hidden || argument->modifier == HLSLArgumentModifier_Out)
-            {
-                argument = argument->nextArgument;
-                continue;
-            }
-
-            if (numArgs > 0)
-            {
-                m_writer.Write(", ");
-            }
+            argument = argument->nextArgument;
+            continue;
+        }
 
-            //bool isRef = false;
-            bool isConst = false;
-            /*if (argument->modifier == HLSLArgumentModifier_Out || argument->modifier == HLSLArgumentModifier_Inout)
-            {
-            isRef = true;
-            }*/
-            if (argument->modifier == HLSLArgumentModifier_In || argument->modifier == HLSLArgumentModifier_Const)
-            {
-                isConst = true;
-            }
+        if (numArgs > 0)
+        {
+            m_writer.Write(", ");
+        }
 
-            OutputDeclaration(argument->type, argument->name, argument->defaultValue, /*isRef=*/false, isConst);
-            argument = argument->nextArgument;
-            ++numArgs;
+        //bool isRef = false;
+        bool isConst = false;
+        /*if (argument->modifier == HLSLArgumentModifier_Out || argument->modifier == HLSLArgumentModifier_Inout)
+        {
+        isRef = true;
+        }*/
+        if (argument->modifier == HLSLArgumentModifier_In || argument->modifier == HLSLArgumentModifier_Const)
+        {
+            isConst = true;
         }
-    }
 
-    void MSLGenerator::OutputDeclaration(const HLSLType& type, const char* name, HLSLExpression* assignment, bool isRef, bool isConst, int alignment)
-    {
-        OutputDeclarationType(type, isRef, isConst, alignment);
-        OutputDeclarationBody(type, name, assignment, isRef);
+        OutputDeclaration(argument->type, argument->name, argument->defaultValue, /*isRef=*/false, isConst);
+        argument = argument->nextArgument;
+        ++numArgs;
     }
+}
 
-    void MSLGenerator::OutputDeclarationType(const HLSLType& type, bool isRef, bool isConst, int alignment, bool isTypeCast)
-    {
-        const char* typeName = GetTypeName(type, /*exactType=*/isTypeCast);  // @@ Don't allow type changes in uniform/globals or casts!
+void MSLGenerator::OutputDeclaration(const HLSLType& type, const char* name, HLSLExpression* assignment, bool isRef, bool isConst, int alignment)
+{
+    OutputDeclarationType(type, isRef, isConst, alignment);
+    OutputDeclarationBody(type, name, assignment, isRef);
+}
 
-        /*if (!isTypeCast)*/
+void MSLGenerator::OutputDeclarationType(const HLSLType& type, bool isRef, bool isConst, int alignment, bool isTypeCast)
+{
+    const char* typeName = GetTypeName(type, /*exactType=*/isTypeCast);  // @@ Don't allow type changes in uniform/globals or casts!
+
+    /*if (!isTypeCast)*/
+    {
+        if (isRef && !isTypeCast)
         {
-            if (isRef && !isTypeCast)
-            {
-                m_writer.Write("%s ", GetAddressSpaceName(type.baseType, type.addressSpace));
-            }
-            if (isConst || type.TestFlags(HLSLTypeFlag_Const))
-            {
-                m_writer.Write("constant ");
-                
+            m_writer.Write("%s ", GetAddressSpaceName(type.baseType, type.addressSpace));
+        }
+        if (isConst || type.TestFlags(HLSLTypeFlag_Const))
+        {
+            m_writer.Write("constant ");
+            
 //                m_writer.Write("const ");
 //
 //                if ((type.flags & HLSLTypeFlag_Static) != 0 && !isTypeCast)
@@ -1614,474 +1670,474 @@ namespace M4
 //                    // TODO: use GetAddressSpaceName?
 //                    m_writer.Write("static constant constexpr ");
 //                }
-            }
-        }
-        
-        if (alignment != 0 && !isTypeCast)
-        {
-            // caller can request alignment, but default is 0
-            m_writer.Write("alignas(%d) ", alignment);
         }
+    }
+    
+    if (alignment != 0 && !isTypeCast)
+    {
+        // caller can request alignment, but default is 0
+        m_writer.Write("alignas(%d) ", alignment);
+    }
 
-        m_writer.Write("%s", typeName);
+    m_writer.Write("%s", typeName);
 
-        if (isTypeCast)
-        {
-            // Do not output modifiers inside type cast expressions.
-            return;
-        }
+    if (isTypeCast)
+    {
+        // Do not output modifiers inside type cast expressions.
+        return;
+    }
 
-        // Interpolation modifiers.
-        if (type.TestFlags(HLSLTypeFlag_NoInterpolation))
+    // Interpolation modifiers.
+    if (type.TestFlags(HLSLTypeFlag_NoInterpolation))
+    {
+        m_writer.Write(" [[flat]]");
+    }
+    else
+    {
+        if (type.TestFlags(HLSLTypeFlag_NoPerspective))
         {
-            m_writer.Write(" [[flat]]");
+            if (type.TestFlags(HLSLTypeFlag_Centroid))
+            {
+                m_writer.Write(" [[centroid_no_perspective]]");
+            }
+            else if (type.TestFlags(HLSLTypeFlag_Sample))
+            {
+                m_writer.Write(" [[sample_no_perspective]]");
+            }
+            else
+            {
+                m_writer.Write(" [[center_no_perspective]]");
+            }
         }
         else
         {
-            if (type.TestFlags(HLSLTypeFlag_NoPerspective))
+            if (type.TestFlags(HLSLTypeFlag_Centroid))
             {
-                if (type.TestFlags(HLSLTypeFlag_Centroid))
-                {
-                    m_writer.Write(" [[centroid_no_perspective]]");
-                }
-                else if (type.TestFlags(HLSLTypeFlag_Sample))
-                {
-                    m_writer.Write(" [[sample_no_perspective]]");
-                }
-                else
-                {
-                    m_writer.Write(" [[center_no_perspective]]");
-                }
+                m_writer.Write(" [[centroid_perspective]]");
+            }
+            else if (type.TestFlags(HLSLTypeFlag_Sample))
+            {
+                m_writer.Write(" [[sample_perspective]]");
             }
             else
             {
-                if (type.TestFlags(HLSLTypeFlag_Centroid))
-                {
-                    m_writer.Write(" [[centroid_perspective]]");
-                }
-                else if (type.TestFlags(HLSLTypeFlag_Sample))
-                {
-                    m_writer.Write(" [[sample_perspective]]");
-                }
-                else
-                {
-                    // Default.
-                    //m_writer.Write(" [[center_perspective]]");
-                }
+                // Default.
+                //m_writer.Write(" [[center_perspective]]");
             }
         }
     }
+}
 
-    void MSLGenerator::OutputDeclarationBody(const HLSLType& type, const char* name, HLSLExpression* assignment, bool isRef)
+void MSLGenerator::OutputDeclarationBody(const HLSLType& type, const char* name, HLSLExpression* assignment, bool isRef)
+{
+    if (isRef)
     {
-        if (isRef)
-        {
-            // Arrays of refs are illegal in C++ and hence MSL, need to "link" the & to the var name
-            m_writer.Write("(&");
-        }
+        // Arrays of refs are illegal in C++ and hence MSL, need to "link" the & to the var name
+        m_writer.Write("(&");
+    }
 
-        // Then name
-        m_writer.Write(" %s", name);
+    // Then name
+    m_writer.Write(" %s", name);
+
+    if (isRef)
+    {
+        m_writer.Write(")");
+    }
 
-        if (isRef)
+    // Add brackets for arrays
+    if (type.array)
+    {
+        m_writer.Write("[");
+        if (type.arraySize != NULL)
         {
-            m_writer.Write(")");
+            OutputExpression(type.arraySize, NULL);
         }
+        m_writer.Write("]");
+    }
+
+    // Semantics and registers unhandled for now
 
-        // Add brackets for arrays
+    // Assignment handling
+    if (assignment != NULL)
+    {
+        m_writer.Write(" = ");
         if (type.array)
         {
-            m_writer.Write("[");
-            if (type.arraySize != NULL)
-            {
-                OutputExpression(type.arraySize, NULL);
-            }
-            m_writer.Write("]");
+            m_writer.Write("{ ");
+            OutputExpressionList(assignment);
+            m_writer.Write(" }");
+        }
+        else
+        {
+            OutputTypedExpression(type, assignment, NULL);
         }
+    }
+}
 
-        // Semantics and registers unhandled for now
-
-        // Assignment handling
-        if (assignment != NULL)
+void MSLGenerator::OutputExpressionList(HLSLExpression* expression)
+{
+    int numExpressions = 0;
+    while (expression != NULL)
+    {
+        if (numExpressions > 0)
         {
-            m_writer.Write(" = ");
-            if (type.array)
-            {
-                m_writer.Write("{ ");
-                OutputExpressionList(assignment);
-                m_writer.Write(" }");
-            }
-            else
-            {
-                OutputTypedExpression(type, assignment, NULL);
-            }
+            m_writer.Write(", ");
         }
+        OutputExpression(expression, NULL);
+        expression = expression->nextExpression;
+        ++numExpressions;
     }
+}
 
-    void MSLGenerator::OutputExpressionList(HLSLExpression* expression)
+// Cast all expressions to given type.
+void MSLGenerator::OutputExpressionList(const HLSLType & type, HLSLExpression* expression)
+{
+    int numExpressions = 0;
+    while (expression != NULL)
     {
-        int numExpressions = 0;
-        while (expression != NULL)
+        if (numExpressions > 0)
         {
-            if (numExpressions > 0)
-            {
-                m_writer.Write(", ");
-            }
-            OutputExpression(expression, NULL);
-            expression = expression->nextExpression;
-            ++numExpressions;
+            m_writer.Write(", ");
         }
+
+        OutputTypedExpression(type, expression, NULL);
+        expression = expression->nextExpression;
+        ++numExpressions;
     }
+}
 
-    // Cast all expressions to given type.
-    void MSLGenerator::OutputExpressionList(const HLSLType & type, HLSLExpression* expression)
+// Cast each expression to corresponding argument type.
+void MSLGenerator::OutputExpressionList(HLSLArgument* argument, HLSLExpression* expression)
+{
+    int numExpressions = 0;
+    while (expression != NULL)
     {
-        int numExpressions = 0;
-        while (expression != NULL)
+        ASSERT(argument != NULL);
+        if (argument->modifier != HLSLArgumentModifier_Out)
         {
             if (numExpressions > 0)
             {
                 m_writer.Write(", ");
             }
 
-            OutputTypedExpression(type, expression, NULL);
-            expression = expression->nextExpression;
+            OutputTypedExpression(argument->type, expression, NULL);
             ++numExpressions;
         }
+
+        expression = expression->nextExpression;
+        argument = argument->nextArgument;
     }
+}
 
-    // Cast each expression to corresponding argument type.
-    void MSLGenerator::OutputExpressionList(HLSLArgument* argument, HLSLExpression* expression)
-    {
-        int numExpressions = 0;
-        while (expression != NULL)
-        {
-            ASSERT(argument != NULL);
-            if (argument->modifier != HLSLArgumentModifier_Out)
-            {
-                if (numExpressions > 0)
-                {
-                    m_writer.Write(", ");
-                }
 
-                OutputTypedExpression(argument->type, expression, NULL);
-                ++numExpressions;
-            }
 
-            expression = expression->nextExpression;
-            argument = argument->nextArgument;
-        }
+inline bool isAddressable(HLSLExpression* expression)
+{
+    if (expression->nodeType == HLSLNodeType_IdentifierExpression)
+    {
+        return true;
     }
+    if (expression->nodeType == HLSLNodeType_ArrayAccess)
+    {
+        return true;
+    }
+    if (expression->nodeType == HLSLNodeType_MemberAccess)
+    {
+        HLSLMemberAccess* memberAccess = (HLSLMemberAccess*)expression;
+        return !memberAccess->swizzle;
+    }
+    return false;
+}
 
 
-
-    inline bool isAddressable(HLSLExpression* expression)
+void MSLGenerator::OutputFunctionCallStatement(int indent, HLSLFunctionCall* functionCall, HLSLDeclaration* declaration)
+{
+    // Nothing special about these cases:
+    if (functionCall->function->numOutputArguments == 0)
     {
-        if (expression->nodeType == HLSLNodeType_IdentifierExpression)
-        {
-            return true;
-        }
-        if (expression->nodeType == HLSLNodeType_ArrayAccess)
+        m_writer.BeginLine(indent, functionCall->fileName, functionCall->line);
+        if (declaration)
         {
-            return true;
+            OutputDeclaration(declaration);
         }
-        if (expression->nodeType == HLSLNodeType_MemberAccess)
+        else
         {
-            HLSLMemberAccess* memberAccess = (HLSLMemberAccess*)expression;
-            return !memberAccess->swizzle;
+            OutputExpression(functionCall, NULL);
         }
-        return false;
+        m_writer.EndLine(";");
+        return;
     }
 
 
-    void MSLGenerator::OutputFunctionCallStatement(int indent, HLSLFunctionCall* functionCall, HLSLDeclaration* declaration)
-    {
-        // Nothing special about these cases:
-        if (functionCall->function->numOutputArguments == 0)
-        {
-            m_writer.BeginLine(indent, functionCall->fileName, functionCall->line);
-            if (declaration)
-            {
-                OutputDeclaration(declaration);
-            }
-            else
-            {
-                OutputExpression(functionCall, NULL);
-            }
-            m_writer.EndLine(";");
-            return;
-        }
-
-
-        // Transform this:
-        // float foo = functionCall(bah, poo);
-
-        // Into:
-        // auto tmp = functionCall(bah, poo);
-        // bah = tmp.bah;
-        // poo = tmp.poo;
-        // float foo = tmp.__result;
-
-        const char* functionName = functionCall->function->name;
+    // Transform this:
+    // float foo = functionCall(bah, poo);
 
-        m_writer.BeginLine(indent, functionCall->fileName, functionCall->line);
-        m_writer.Write("auto out%d = %s_%d(", functionCall->line, functionName, functionCall->function->line);
-        OutputExpressionList(functionCall->function->argument, functionCall->argument);
-        m_writer.EndLine(");");
+    // Into:
+    // auto tmp = functionCall(bah, poo);
+    // bah = tmp.bah;
+    // poo = tmp.poo;
+    // float foo = tmp.__result;
 
-        HLSLExpression * expression = functionCall->argument;
-        HLSLArgument * argument = functionCall->function->argument;
-        while (argument != NULL)
-        {
-            if (argument->modifier == HLSLArgumentModifier_Out || argument->modifier == HLSLArgumentModifier_Inout)
-            {
-                m_writer.BeginLine(indent);
-                OutputExpression(expression, NULL);
-                // @@ This assignment may need a cast.
-                m_writer.Write(" = ");
-                if (NeedsCast(expression->expressionType, argument->type)) {
-                    m_writer.Write("(%s)", GetTypeName(expression->expressionType, true));
-                }
-                m_writer.Write("out%d.%s;", functionCall->line, argument->name);
-                m_writer.EndLine();
-            }
+    const char* functionName = functionCall->function->name;
 
-            expression = expression->nextExpression;
-            argument = argument->nextArgument;
-        }
+    m_writer.BeginLine(indent, functionCall->fileName, functionCall->line);
+    m_writer.Write("auto out%d = %s_%d(", functionCall->line, functionName, functionCall->function->line);
+    OutputExpressionList(functionCall->function->argument, functionCall->argument);
+    m_writer.EndLine(");");
 
-        if (declaration)
+    HLSLExpression * expression = functionCall->argument;
+    HLSLArgument * argument = functionCall->function->argument;
+    while (argument != NULL)
+    {
+        if (argument->modifier == HLSLArgumentModifier_Out || argument->modifier == HLSLArgumentModifier_Inout)
         {
             m_writer.BeginLine(indent);
-            OutputDeclarationType(declaration->type);
-            m_writer.Write(" %s = out%d.__result;", declaration->name, functionCall->line);
+            OutputExpression(expression, NULL);
+            // @@ This assignment may need a cast.
+            m_writer.Write(" = ");
+            if (NeedsCast(expression->expressionType, argument->type)) {
+                m_writer.Write("(%s)", GetTypeName(expression->expressionType, true));
+            }
+            m_writer.Write("out%d.%s;", functionCall->line, argument->name);
             m_writer.EndLine();
         }
 
+        expression = expression->nextExpression;
+        argument = argument->nextArgument;
+    }
 
-/* TODO: Alec, why is all this chopped out?
- 
-        int argumentIndex = 0;
-        HLSLArgument* argument = functionCall->function->argument;
-        HLSLExpression* expression = functionCall->argument;
-        while (argument != NULL)
-        {
-            if (!isAddressable(expression))
-            {
-                if (argument->modifier == HLSLArgumentModifier_Out)
-                {
-                    m_writer.BeginLine(indent, functionCall->fileName, functionCall->line);
-                    OutputDeclarationType(argument->type);
-                    m_writer.Write("tmp%d;", argumentIndex);
-                    m_writer.EndLine();
-                }
-                else if (argument->modifier == HLSLArgumentModifier_Inout)
-                {
-                    m_writer.BeginLine(indent, functionCall->fileName, functionCall->line);
-                    OutputDeclarationType(argument->type);
-                    m_writer.Write("tmp%d = ", argumentIndex);
-                    OutputExpression(expression, NULL);
-                    m_writer.EndLine(";");
-                }
-            }
-            argument = argument->nextArgument;
-            expression = expression->nextExpression;
-            argumentIndex++;
-        }
+    if (declaration)
+    {
+        m_writer.BeginLine(indent);
+        OutputDeclarationType(declaration->type);
+        m_writer.Write(" %s = out%d.__result;", declaration->name, functionCall->line);
+        m_writer.EndLine();
+    }
 
-        m_writer.BeginLine(indent, functionCall->fileName, functionCall->line);
-        const char* name = functionCall->function->name;
-        m_writer.Write("%s(", name);
-        //OutputExpressionList(functionCall->argument);
 
-        // Output expression list with temporary substitution.
-        argumentIndex = 0;
-        argument = functionCall->function->argument;
-        expression = functionCall->argument;
-        while (expression != NULL)
-        {
-            if (!isAddressable(expression) && (argument->modifier == HLSLArgumentModifier_Out || argument->modifier == HLSLArgumentModifier_Inout))
-            {
-                m_writer.Write("tmp%d", argumentIndex);
-            }
-            else
-            {
-                OutputExpression(expression, NULL);
-            }
+/* TODO: Alec, why is all this chopped out?
 
-            argument = argument->nextArgument;
-            expression = expression->nextExpression;
-            argumentIndex++;
-            if (expression)
+    int argumentIndex = 0;
+    HLSLArgument* argument = functionCall->function->argument;
+    HLSLExpression* expression = functionCall->argument;
+    while (argument != NULL)
+    {
+        if (!isAddressable(expression))
+        {
+            if (argument->modifier == HLSLArgumentModifier_Out)
             {
-                m_writer.Write(", ");
+                m_writer.BeginLine(indent, functionCall->fileName, functionCall->line);
+                OutputDeclarationType(argument->type);
+                m_writer.Write("tmp%d;", argumentIndex);
+                m_writer.EndLine();
             }
-        }
-        m_writer.EndLine(");");
-
-        argumentIndex = 0;
-        argument = functionCall->function->argument;
-        expression = functionCall->argument;
-        while (expression != NULL)
-        {
-            if (!isAddressable(expression) && (argument->modifier == HLSLArgumentModifier_Out || argument->modifier == HLSLArgumentModifier_Inout))
+            else if (argument->modifier == HLSLArgumentModifier_Inout)
             {
                 m_writer.BeginLine(indent, functionCall->fileName, functionCall->line);
+                OutputDeclarationType(argument->type);
+                m_writer.Write("tmp%d = ", argumentIndex);
                 OutputExpression(expression, NULL);
-                m_writer.Write(" = tmp%d", argumentIndex);
                 m_writer.EndLine(";");
             }
-
-            argument = argument->nextArgument;
-            expression = expression->nextExpression;
-            argumentIndex++;
         }
-*/
+        argument = argument->nextArgument;
+        expression = expression->nextExpression;
+        argumentIndex++;
     }
 
-    void MSLGenerator::OutputFunctionCall(HLSLFunctionCall* functionCall, HLSLExpression * parentExpression)
+    m_writer.BeginLine(indent, functionCall->fileName, functionCall->line);
+    const char* name = functionCall->function->name;
+    m_writer.Write("%s(", name);
+    //OutputExpressionList(functionCall->argument);
+
+    // Output expression list with temporary substitution.
+    argumentIndex = 0;
+    argument = functionCall->function->argument;
+    expression = functionCall->argument;
+    while (expression != NULL)
     {
-        if (functionCall->function->numOutputArguments > 0)
+        if (!isAddressable(expression) && (argument->modifier == HLSLArgumentModifier_Out || argument->modifier == HLSLArgumentModifier_Inout))
         {
-            ASSERT(false);
+            m_writer.Write("tmp%d", argumentIndex);
         }
-
-        const char* functionName = functionCall->function->name;
-
-        // If function begins with tex, then it returns float4 or half4 depending on options.halfTextureSamplers
-        /*if (strncmp(functionName, "tex", 3) == 0)
-        {
-        if (parentExpression && IsFloat(parentExpression->expressionType.baseType))
-        {
-        if (m_options.halfTextureSamplers)
+        else
         {
-        OutputCast(functionCall->expressionType);
-        }
+            OutputExpression(expression, NULL);
         }
-        }*/
 
+        argument = argument->nextArgument;
+        expression = expression->nextExpression;
+        argumentIndex++;
+        if (expression)
         {
-            m_writer.Write("%s(", functionName);
-            OutputExpressionList(functionCall->function->argument, functionCall->argument);
-            //OutputExpressionList(functionCall->argument);
-            m_writer.Write(")");
+            m_writer.Write(", ");
         }
     }
+    m_writer.EndLine(");");
 
-    const char* MSLGenerator::TranslateInputSemantic(const char * semantic)
+    argumentIndex = 0;
+    argument = functionCall->function->argument;
+    expression = functionCall->argument;
+    while (expression != NULL)
     {
-        if (semantic == NULL)
-            return NULL;
+        if (!isAddressable(expression) && (argument->modifier == HLSLArgumentModifier_Out || argument->modifier == HLSLArgumentModifier_Inout))
+        {
+            m_writer.BeginLine(indent, functionCall->fileName, functionCall->line);
+            OutputExpression(expression, NULL);
+            m_writer.Write(" = tmp%d", argumentIndex);
+            m_writer.EndLine(";");
+        }
 
-        uint32_t length, index;
-        ParseSemantic(semantic, &length, &index);
+        argument = argument->nextArgument;
+        expression = expression->nextExpression;
+        argumentIndex++;
+    }
+*/
+}
 
-        if (m_target == HLSLTarget_VertexShader)
-        {
-            // These are DX10 convention
-            if (String_Equal(semantic, "SV_InstanceID"))
-                return "instance_id";
-            if (String_Equal(semantic, "SV_VertexID"))
-                return "vertex_id";
+void MSLGenerator::OutputFunctionCall(HLSLFunctionCall* functionCall, HLSLExpression * parentExpression)
+{
+    if (functionCall->function->numOutputArguments > 0)
+    {
+        ASSERT(false);
+    }
 
-            // requires SPV_KHR_shader_draw_parameters for Vulkan
-            // not a DX12 construct.
-            if (String_Equal(semantic, "BASEVERTEX"))
-                return "base_vertex";
-            if (String_Equal(semantic, "BASEINSTANCE"))
-                return "base_instance";
-            //if (String_Equal(semantic, "DRAW_INDEX"))
-            //    return "draw_index";
-            
-            // TODO: primitive_id, barycentric
-            
-            // Handle attributes
-            
-            // Can set custom attributes via a callback
-            if (m_options.attributeCallback)
-            {
-                char name[64];
-                ASSERT(length < sizeof(name));
+    const char* functionName = functionCall->function->name;
 
-                strncpy(name, semantic, length);
-                name[length] = 0;
+    // If function begins with tex, then it returns float4 or half4 depending on options.halfTextureSamplers
+    /*if (strncmp(functionName, "tex", 3) == 0)
+    {
+    if (parentExpression && IsFloat(parentExpression->expressionType.baseType))
+    {
+    if (m_options.halfTextureSamplers)
+    {
+    OutputCast(functionCall->expressionType);
+    }
+    }
+    }*/
 
-                int attribute = m_options.attributeCallback(name, index);
+    {
+        m_writer.Write("%s(", functionName);
+        OutputExpressionList(functionCall->function->argument, functionCall->argument);
+        //OutputExpressionList(functionCall->argument);
+        m_writer.Write(")");
+    }
+}
 
-                if (attribute >= 0)
-                {
-                    return m_tree->AddStringFormat("attribute(%d)", attribute);
-                }
-            }
-            
-            if (String_Equal(semantic, "SV_Position"))
-                return "attribute(POSITION)";
+const char* MSLGenerator::TranslateInputSemantic(const char * semantic)
+{
+    if (semantic == NULL)
+        return NULL;
 
-            return m_tree->AddStringFormat("attribute(%s)", semantic);
-        }
-        else if (m_target == HLSLTarget_PixelShader)
-        {
-            // PS inputs
-            
-            if (String_Equal(semantic, "SV_Position"))
-                return "position";
-            
-            //  if (String_Equal(semantic, "POSITION"))
-            //    return "position";
-            if (String_Equal(semantic, "SV_IsFrontFace"))
-                return "front_facing";
-            
-            // VS sets what layer to render into, ps can look at it.
-            // Gpu Family 5.
-            if (String_Equal(semantic, "SV_RenderTargetArrayIndex"))
-                return "render_target_array_index";
-            
-            // dual source? passes in underlying color
-            if (String_Equal(semantic, "DST_COLOR"))
-                return "color(0)";
-            
-            if (String_Equal(semantic, "SV_SampleIndex"))
-                return "sample_id";
-            //if (String_Equal(semantic, "SV_Coverage")) return "sample_mask";
-            //if (String_Equal(semantic, "SV_Coverage")) return "sample_mask,post_depth_coverage";
-        }
-        else if (m_target == HLSLTarget_ComputeShader)
+    uint32_t length, index;
+    ParseSemantic(semantic, &length, &index);
+
+    if (m_target == HLSLTarget_VertexShader)
+    {
+        // These are DX10 convention
+        if (String_Equal(semantic, "SV_InstanceID"))
+            return "instance_id";
+        if (String_Equal(semantic, "SV_VertexID"))
+            return "vertex_id";
+
+        // requires SPV_KHR_shader_draw_parameters for Vulkan
+        // not a DX12 construct.
+        if (String_Equal(semantic, "BASEVERTEX"))
+            return "base_vertex";
+        if (String_Equal(semantic, "BASEINSTANCE"))
+            return "base_instance";
+        //if (String_Equal(semantic, "DRAW_INDEX"))
+        //    return "draw_index";
+        
+        // TODO: primitive_id, barycentric
+        
+        // Handle attributes
+        
+        // Can set custom attributes via a callback
+        if (m_options.attributeCallback)
         {
-            // compute inputs
-            if (String_Equal(semantic, "SV_DispatchThreadID"))
-                return "thread_position_in_grid";
+            char name[64];
+            ASSERT(length < sizeof(name));
+
+            strncpy(name, semantic, length);
+            name[length] = 0;
+
+            int attribute = m_options.attributeCallback(name, index);
+
+            if (attribute >= 0)
+            {
+                return m_tree->AddStringFormat("attribute(%d)", attribute);
+            }
         }
-        return NULL;
-    }
+        
+        if (String_Equal(semantic, "SV_Position"))
+            return "attribute(POSITION)";
 
-    const char* MSLGenerator::TranslateOutputSemantic(const char * semantic)
+        return m_tree->AddStringFormat("attribute(%s)", semantic);
+    }
+    else if (m_target == HLSLTarget_PixelShader)
+    {
+        // PS inputs
+        
+        if (String_Equal(semantic, "SV_Position"))
+            return "position";
+        
+        //  if (String_Equal(semantic, "POSITION"))
+        //    return "position";
+        if (String_Equal(semantic, "SV_IsFrontFace"))
+            return "front_facing";
+        
+        // VS sets what layer to render into, ps can look at it.
+        // Gpu Family 5.
+        if (String_Equal(semantic, "SV_RenderTargetArrayIndex"))
+            return "render_target_array_index";
+        
+        // dual source? passes in underlying color
+        if (String_Equal(semantic, "DST_COLOR"))
+            return "color(0)";
+        
+        if (String_Equal(semantic, "SV_SampleIndex"))
+            return "sample_id";
+        //if (String_Equal(semantic, "SV_Coverage")) return "sample_mask";
+        //if (String_Equal(semantic, "SV_Coverage")) return "sample_mask,post_depth_coverage";
+    }
+    else if (m_target == HLSLTarget_ComputeShader)
     {
-        if (semantic == NULL)
-            return NULL;
+        // compute inputs
+        if (String_Equal(semantic, "SV_DispatchThreadID"))
+            return "thread_position_in_grid";
+    }
+    return NULL;
+}
+
+const char* MSLGenerator::TranslateOutputSemantic(const char * semantic)
+{
+    if (semantic == NULL)
+        return NULL;
 
-        uint32_t length, index;
-        ParseSemantic(semantic, &length, &index);
+    uint32_t length, index;
+    ParseSemantic(semantic, &length, &index);
 
-        if (m_target == HLSLTarget_VertexShader)
-        {
-            if (String_Equal(semantic, "SV_Position"))
-                return "position";
+    if (m_target == HLSLTarget_VertexShader)
+    {
+        if (String_Equal(semantic, "SV_Position"))
+            return "position";
+    
+        // PSIZE is non-square in DX9, and square in DX10 (and MSL)
+        // https://github.com/KhronosGroup/glslang/issues/1154
+        if (String_Equal(semantic, "PSIZE"))
+            return "point_size";
+
+        // control layer in Gpu Family 5
+        if (String_Equal(semantic, "SV_RenderTargetArrayIndex"))
+            return "render_target_array_index";
         
-            // PSIZE is non-square in DX9, and square in DX10 (and MSL)
-            // https://github.com/KhronosGroup/glslang/issues/1154
-            if (String_Equal(semantic, "PSIZE"))
-                return "point_size";
-
-            // control layer in Gpu Family 5
-            if (String_Equal(semantic, "SV_RenderTargetArrayIndex"))
-                return "render_target_array_index";
-            
-            // TODO: add
-            // SV_ViewportArrayIndex
-            // SV_ClipDistance0..n, SV_CullDistance0..n
-        }
-        else if (m_target == HLSLTarget_PixelShader)
-        {
+        // TODO: add
+        // SV_ViewportArrayIndex
+        // SV_ClipDistance0..n, SV_CullDistance0..n
+    }
+    else if (m_target == HLSLTarget_PixelShader)
+    {
 // Not supporting flags, add as bool to options if needed
 //            if (m_options.flags & MSLGenerator::Flag_NoIndexAttribute)
 //            {
@@ -2089,139 +2145,137 @@ namespace M4
 //                if (String_Equal(semantic, "COLOR0_1")) return NULL;
 //            }
 //            else
-            {
-                // See these settings
-                // MTLBlendFactorSource1Color, OneMinusSource1Color, Source1Alpha, OneMinuSource1Alpha.
-                
-                // @@ IC: Hardcoded for this specific case, extend ParseSemantic?
-                if (String_Equal(semantic, "COLOR0_1"))
-                    return "color(0), index(1)";
-            }
-
-            // This is only in A14 and higher
-            if (String_Equal(semantic, "SV_Berycentrics"))
-                return "barycentric_coord";
-            
-            // Is there an HLSL euivalent.  Have vulkan ext for PointSize
-            // "point_coord"
-            
-            // "primitive_id"
+        {
+            // See these settings
+            // MTLBlendFactorSource1Color, OneMinusSource1Color, Source1Alpha, OneMinuSource1Alpha.
             
-            if (strncmp(semantic, "SV_Target", length) == 0)
-            {
-                return m_tree->AddStringFormat("color(%d)", index);
-            }
+            // @@ IC: Hardcoded for this specific case, extend ParseSemantic?
+            if (String_Equal(semantic, "COLOR0_1"))
+                return "color(0), index(1)";
+        }
+
+        // This is only in A14 and higher
+        if (String_Equal(semantic, "SV_Berycentrics"))
+            return "barycentric_coord";
+        
+        // Is there an HLSL euivalent.  Have vulkan ext for PointSize
+        // "point_coord"
+        
+        // "primitive_id"
+        
+        if (strncmp(semantic, "SV_Target", length) == 0)
+        {
+            return m_tree->AddStringFormat("color(%d)", index);
+        }
 //            if (strncmp(semantic, "COLOR", length) == 0)
 //            {
 //                return m_tree->AddStringFormat("color(%d)", index);
 //            }
 
-            // depth variants to preserve earlyz, use greater on reverseZ
-            if (String_Equal(semantic, "SV_Depth"))
-                return "depth(any)";
-            
-            // These don't quite line up, since comparison is not ==
-            // Metal can only use any/less/greater.  Preserve early z when outputting depth.
-            // reverseZ would use greater.
-            if (String_Equal(semantic, "SV_DepthGreaterEqual"))
-                return "depth(greater)";
-            if (String_Equal(semantic, "SV_DepthLessEqual"))
-                return "depth(less)";
-            
-            if (String_Equal(semantic, "SV_Coverage"))
-                return "sample_mask";
-        }
-        else if (m_target == HLSLTarget_ComputeShader)
-        {
-            // compute outputs
-            
-        }
-        return NULL;
+        // depth variants to preserve earlyz, use greater on reverseZ
+        if (String_Equal(semantic, "SV_Depth"))
+            return "depth(any)";
+        
+        // These don't quite line up, since comparison is not ==
+        // Metal can only use any/less/greater.  Preserve early z when outputting depth.
+        // reverseZ would use greater.
+        if (String_Equal(semantic, "SV_DepthGreaterEqual"))
+            return "depth(greater)";
+        if (String_Equal(semantic, "SV_DepthLessEqual"))
+            return "depth(less)";
+        
+        if (String_Equal(semantic, "SV_Coverage"))
+            return "sample_mask";
+    }
+    else if (m_target == HLSLTarget_ComputeShader)
+    {
+        // compute outputs
+        
     }
+    return NULL;
+}
 
 
-    const char* MSLGenerator::GetTypeName(const HLSLType& type, bool exactType)
-    {
-        bool promote = ((type.flags & HLSLTypeFlag_NoPromote) == 0);
+const char* MSLGenerator::GetTypeName(const HLSLType& type, bool exactType)
+{
+    bool promote = ((type.flags & HLSLTypeFlag_NoPromote) == 0);
 
-        // number
-        bool isHalfNumerics = promote && !m_options.treatHalfAsFloat;
-        HLSLBaseType baseType = type.baseType;
+    // number
+    bool isHalfNumerics = promote && !m_options.treatHalfAsFloat;
+    HLSLBaseType baseType = type.baseType;
+    
+    // Note: these conversions should really be done during parsing
+    // so that casting gets applied.
+    if (!isHalfNumerics)
+        baseType = HalfToFloatBaseType(baseType);
+    
+    // MSL doesn't support double
+    if (IsDouble(baseType))
+        baseType = DoubleToFloatBaseType(baseType);
+    
+    HLSLType remappedType(baseType);
+    remappedType.typeName = type.typeName; // in case it's a struct
+    
+    if (IsSamplerType(baseType) || IsNumericType(baseType) || baseType == HLSLBaseType_Void || baseType == HLSLBaseType_UserDefined)
+        return GetTypeNameMetal(remappedType);
+    
+    // texture
+    if (IsTextureType(baseType))
+    {
+        // unclear if depth supports half, may have to be float always
         
-        // Note: these conversions should really be done during parsing
-        // so that casting gets applied.
-        if (!isHalfNumerics)
-            baseType = HalfToFloatBaseType(baseType);
+        bool isHalfTexture  = promote && IsHalf(type.formatType) && !m_options.treatHalfAsFloat;
         
-        // MSL doesn't support double
-        if (IsDouble(baseType))
-            baseType = DoubleToFloatBaseType(baseType);
+        // MSL docs state must be float type, but what about D16f texture?
+        if (IsDepthTextureType(baseType))
+            isHalfTexture = false;
         
-        HLSLType remappedType(baseType);
-        remappedType.typeName = type.typeName; // in case it's a struct
+        // TODO: could use GetTypeNameMetal() but it doesn't include <> portion
+        // so would have to pool and then return the result.
         
-        if (IsSamplerType(baseType) || IsNumericType(baseType) || baseType == HLSLBaseType_Void || baseType == HLSLBaseType_UserDefined)
-            return GetTypeNameMetal(remappedType);
+        // This would allow more formats
+        // const char* textureTypeName = GetTypeNameMetal(baseType);
+        // const char* formatTypeName = GetFormatTypeName(baseType, formatType);
+        // snprintf(buf, sizeof(buf), "%s<%s>", textureTypeName, formatTypeName);
         
-        // texture
-        if (IsTextureType(baseType))
-        {
-            // unclear if depth supports half, may have to be float always
-            
-            bool isHalfTexture  = promote && IsHalf(type.formatType) && !m_options.treatHalfAsFloat;
-            
-            // MSL docs state must be float type, but what about D16f texture?
-            if (IsDepthTextureType(baseType))
-                isHalfTexture = false;
-            
-            // TODO: could use GetTypeNameMetal() but it doesn't include <> portion
-            // so would have to pool and then return the result.
-            
-            // This would allow more formats
-            // const char* textureTypeName = GetTypeNameMetal(baseType);
-            // const char* formatTypeName = GetFormatTypeName(baseType, formatType);
-            // snprintf(buf, sizeof(buf), "%s<%s>", textureTypeName, formatTypeName);
-            
-            switch (baseType)
-            {
-                case HLSLBaseType_Depth2D:
-                    return isHalfTexture ? "depth2d<half>" : "depth2d<float>";
-                case HLSLBaseType_Depth2DArray:
-                    return isHalfTexture ? "depth2d_array<half>" : "depth2d_array<float>";
-                case HLSLBaseType_DepthCube:
-                    return isHalfTexture ? "depthcube<half>" : "depthcube<float>";
-                    
-                /* TODO: also depth_ms_array, but HLSL6.6 equivalent
-                case HLSLBaseType_Depth2DMS:
-                    return isHalfTexture ? "depth2d_ms<half>" : "depth2d_ms<float>";
-                */
+        switch (baseType)
+        {
+            case HLSLBaseType_Depth2D:
+                return isHalfTexture ? "depth2d<half>" : "depth2d<float>";
+            case HLSLBaseType_Depth2DArray:
+                return isHalfTexture ? "depth2d_array<half>" : "depth2d_array<float>";
+            case HLSLBaseType_DepthCube:
+                return isHalfTexture ? "depthcube<half>" : "depthcube<float>";
                 
-                // More types than just half/float for this
-                case HLSLBaseType_RWTexture2D:
-                    return isHalfTexture ? "texture2d<half, access::read_write>" : "texture2d<float, access::read_write>";
-                    
-                case HLSLBaseType_Texture2D:
-                    return isHalfTexture ? "texture2d<half>" : "texture2d<float>";
-                case HLSLBaseType_Texture2DArray:
-                    return isHalfTexture ? "texture2d_array<half>" : "texture2d_array<float>";
-                case HLSLBaseType_Texture3D:
-                    return isHalfTexture ? "texture3d<half>" : "texture3d<float>";
-                case HLSLBaseType_TextureCube:
-                    return isHalfTexture ? "texturecube<half>" : "texturecube<float>";
-                case HLSLBaseType_TextureCubeArray:
-                    return isHalfTexture ? "texturecube_array<half>" : "texturecube_array<float>";
-                case HLSLBaseType_Texture2DMS:
-                    return isHalfTexture ? "texture2d_ms<half>" : "texture2d_ms<float>";
+            /* TODO: also depth_ms_array, but HLSL6.6 equivalent
+            case HLSLBaseType_Depth2DMS:
+                return isHalfTexture ? "depth2d_ms<half>" : "depth2d_ms<float>";
+            */
+            
+            // More types than just half/float for this
+            case HLSLBaseType_RWTexture2D:
+                return isHalfTexture ? "texture2d<half, access::read_write>" : "texture2d<float, access::read_write>";
                 
-                default:
-                    break;
-            }
+            case HLSLBaseType_Texture2D:
+                return isHalfTexture ? "texture2d<half>" : "texture2d<float>";
+            case HLSLBaseType_Texture2DArray:
+                return isHalfTexture ? "texture2d_array<half>" : "texture2d_array<float>";
+            case HLSLBaseType_Texture3D:
+                return isHalfTexture ? "texture3d<half>" : "texture3d<float>";
+            case HLSLBaseType_TextureCube:
+                return isHalfTexture ? "texturecube<half>" : "texturecube<float>";
+            case HLSLBaseType_TextureCubeArray:
+                return isHalfTexture ? "texturecube_array<half>" : "texturecube_array<float>";
+            case HLSLBaseType_Texture2DMS:
+                return isHalfTexture ? "texture2d_ms<half>" : "texture2d_ms<float>";
+            
+            default:
+                break;
         }
-        
-        Error("Unknown Type");
-        return NULL;
     }
-
-
+    
+    Error("Unknown Type");
+    return NULL;
+}
 
 } // M4
diff --git a/hlslparser/src/MSLGenerator.h b/hlslparser/src/MSLGenerator.h
index 4f56f390..25cd4d34 100644
--- a/hlslparser/src/MSLGenerator.h
+++ b/hlslparser/src/MSLGenerator.h
@@ -88,6 +88,8 @@ class MSLGenerator
     const char* GetTypeName(const HLSLType& type, bool exactType);
     const char* GetAddressSpaceName(HLSLBaseType baseType, HLSLAddressSpace addressSpace) const;
     
+    bool CanSkipWrittenStatement(const HLSLStatement* statement) const;
+    
     void Error(const char* format, ...) const M4_PRINTF_ATTR(2, 3);
 
 private:

From 74cdfa08be7fc140d136016cc1d059230465f0ee Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 26 Mar 2023 15:03:19 -0700
Subject: [PATCH 506/901] kram - hlslparser update

Remove awful Microsoft hungarian notation from sample shader.  Why can't this silly practice die of sticking f, v, m in front of perfectly readable variables?
---
 hlslparser/shaders/Sample.hlsl   | 62 ++++++++++++++++----------------
 hlslparser/shaders/Skinning.hlsl |  5 +--
 2 files changed, 34 insertions(+), 33 deletions(-)

diff --git a/hlslparser/shaders/Sample.hlsl b/hlslparser/shaders/Sample.hlsl
index ff6cf006..4fa812d3 100644
--- a/hlslparser/shaders/Sample.hlsl
+++ b/hlslparser/shaders/Sample.hlsl
@@ -77,78 +77,78 @@ struct OutputVS
 //--------------------------------------------------------------------------------------
 // Sample normal map, convert to signed, apply tangent-to-world space transform.
 //--------------------------------------------------------------------------------------
-half3 CalcPerPixelNormal(float2 vTexcoord, half3 vVertNormal, half3 vVertTangent)
+half3 CalcPerPixelNormal(float2 texcoord, half3 vertNormal, half3 vertTangent)
 {
     // Compute tangent frame.
-    vVertNormal = normalize(vVertNormal);
-    vVertTangent = normalize(vVertTangent);
+    vertNormal = normalize(vertNormal);
+    vertTangent = normalize(vertTangent);
 
-    half3 vVertBinormal = normalize(cross(vVertTangent, vVertNormal));
-    half3x3 mTangentSpaceToWorldSpace = half3x3(vVertTangent, vVertBinormal, vVertNormal);
+    half3 vertBinormal = normalize(cross(vertTangent, vertNormal));
+    half3x3 tangentSpaceToWorldSpace = half3x3(vertTangent, vertBinormal, vertNormal);
 
     // Compute per-pixel normal.
-    half3 vBumpNormal = SampleH(normalMap, sampleWrap, vTexcoord).xyz;
-    vBumpNormal = 2.0h * vBumpNormal - 1.0h;
+    half3 bumpNormal = SampleH(normalMap, sampleWrap, texcoord).xyz;
+    bumpNormal = 2.0h * bumpNormal - 1.0h;
 
-    return mul(vBumpNormal, mTangentSpaceToWorldSpace);
+    return mul(bumpNormal, tangentSpaceToWorldSpace);
 }
 
 //--------------------------------------------------------------------------------------
 // Diffuse lighting calculation, with angle and distance falloff.
 //--------------------------------------------------------------------------------------
-half4 CalcLightingColor(float3 vLightPos, float3 vLightDir, half4 vLightColor, float4 vFalloffs, float3 vPosWorld, half3 vPerPixelNormal)
+half4 CalcLightingColor(float3 lightPos, float3 lightDir, half4 lightColor, float4 falloffs, float3 posWorld, half3 perPixelNormal)
 {
-    float3 vLightToPixelUnNormalized = vPosWorld - vLightPos;
+    float3 lightToPixelUnNormalized = posWorld - lightPos;
 
-    // Dist falloff = 0 at vFalloffs.x, 1 at vFalloffs.x - vFalloffs.y
-    float fDist = length(vLightToPixelUnNormalized);
+    // Dist falloff = 0 at falloffs.x, 1 at falloffs.x - falloffs.y
+    float dist = length(lightToPixelUnNormalized);
 
-    half fDistFalloff = (half)saturate((vFalloffs.x - fDist) / vFalloffs.y);
+    half distFalloff = (half)saturate((falloffs.x - dist) / falloffs.y);
 
     // Normalize from here on.
-    half3 vLightToPixelNormalized = (half3)normalize(vLightToPixelUnNormalized);
+    half3 lightToPixelNormalized = (half3)normalize(lightToPixelUnNormalized);
 
-    // Angle falloff = 0 at vFalloffs.z, 1 at vFalloffs.z - vFalloffs.w
-    half3 lightDir = (half3)normalize(vLightDir);
-    half fCosAngle = dot(vLightToPixelNormalized, lightDir);
-    half fAngleFalloff = saturate((fCosAngle - (half)vFalloffs.z) / (half)vFalloffs.w);
+    // Angle falloff = 0 at falloffs.z, 1 at falloffs.z - falloffs.w
+    half3 lightDirHalf = (half3)normalize(lightDir);
+    half cosAngle = dot(lightToPixelNormalized, lightDirHalf);
+    half angleFalloff = saturate((cosAngle - (half)falloffs.z) / (half)falloffs.w);
 
     // Diffuse contribution.
-    half fNDotL = saturate(-dot(vLightToPixelNormalized, vPerPixelNormal));
+    half dotNL = saturate(-dot(lightToPixelNormalized, perPixelNormal));
 
-    return vLightColor * (fNDotL * fDistFalloff * fAngleFalloff);
+    return lightColor * (dotNL * distFalloff * angleFalloff);
 }
 
 //--------------------------------------------------------------------------------------
 // Test how much pixel is in shadow, using 2x2 percentage-closer filtering.
 //--------------------------------------------------------------------------------------
-half CalcUnshadowedAmountPCF2x2(int lightIndex, float4 vPosWorld, float4x4 viewProj)
+half CalcUnshadowedAmountPCF2x2(int lightIndex, float4 posWorld, float4x4 viewProj)
 {
     // Compute pixel position in light space.
-    float4 vLightSpacePos = vPosWorld;
-    vLightSpacePos = mul(vLightSpacePos, viewProj);
+    float4 lightSpacePos = posWorld;
+    lightSpacePos = mul(lightSpacePos, viewProj);
     
     // need to reject before division (assuming revZ, infZ)
-    if (vLightSpacePos.z > vLightSpacePos.w)
-        return 1.0f;
+    if (lightSpacePos.z > lightSpacePos.w)
+        return 1.0h;
     
     // near/w for persp, z/1 for ortho
-    vLightSpacePos.xyz /= vLightSpacePos.w;
+    lightSpacePos.xyz /= lightSpacePos.w;
 
 /*
     // TODO: do all the flip and scaling and bias in the proj, not in shader
-    vLightSpacePos.xy *= 0.5;
-    vLightSpacePos.xy += 0.5;
+    lightSpacePos.xy *= 0.5;
+    lightSpacePos.xy += 0.5;
     
     // Translate from homogeneous coords to texture coords.
-    vLightSpacePos.y = 1.0 - vLightSpacePos.y;
+    lightSpacePos.y = 1.0 - lightSpacePos.y;
 
     // Depth bias to avoid pixel self-shadowing.
-    vLightSpacePos.z -= SHADOW_DEPTH_BIAS;
+    lightSpacePos.z -= SHADOW_DEPTH_BIAS;
 */
     
     // Use HW filtering
-    return (half)SampleCmp(shadowMap, shadowMapSampler, vLightSpacePos);
+    return (half)SampleCmp(shadowMap, shadowMapSampler, lightSpacePos);
 }
 
 OutputVS SampleVS(InputVS input)
diff --git a/hlslparser/shaders/Skinning.hlsl b/hlslparser/shaders/Skinning.hlsl
index fd622ea6..74d99672 100644
--- a/hlslparser/shaders/Skinning.hlsl
+++ b/hlslparser/shaders/Skinning.hlsl
@@ -96,14 +96,15 @@ struct OutputVS
     float2  uv : TEXCOORD0;
 };
 
-// try to mondernize to ConstantBuffer
+static const uint kMaxSkinTfms = 256;
+
 // Note: SkinTfms makes MSL air shader 60K bigger at 256,
 // so may want to limit large hardcoded arrays.  It's 61K @256, and 7.8K @1.
 // Doesn't seem to affect spriv at 4K.
 // GameShaders.metallib is 33K, and Zip of Spirv is 6K.
 struct UniformsStruct
 {
-    float4x4 skinTfms[1];
+    float4x4 skinTfms[kMaxSkinTfms];
     half3    lightDir;
     float4x4 worldToClipTfm;
 };

From 4466a338502dd85f7464610479184cfaea9c6b94 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 26 Mar 2023 19:01:18 -0700
Subject: [PATCH 507/901] kram - hlslparser update

Allow helper functions.  But these need written out once for HLSL.  MSL is still embedding these into namespace, so each one needs copy.
---
 hlslparser/shaders/Sample.hlsl   | 118 ++++++++++++++++---------------
 hlslparser/src/HLSLGenerator.cpp |   8 ++-
 hlslparser/src/MSLGenerator.cpp  |  11 ++-
 3 files changed, 77 insertions(+), 60 deletions(-)

diff --git a/hlslparser/shaders/Sample.hlsl b/hlslparser/shaders/Sample.hlsl
index 4fa812d3..1679da3c 100644
--- a/hlslparser/shaders/Sample.hlsl
+++ b/hlslparser/shaders/Sample.hlsl
@@ -11,6 +11,16 @@
 
 // from here https://github.com/microsoft/DirectX-Graphics-Samples/blob/master/Samples/Desktop/D3D12Multithreading/src/shaders.hlsl
 
+// TODO: also want to be able to share functions in a multientry point file
+// so could move these out as static functions from the namespace in Metal.
+// Right now if used, they get replicated into each class, but that handles
+// any global use of texture+sampler/buffer.
+
+// Textures/samplers/buffers are globals to all shader within the file.
+// But have to pair texture + sampler.  In MSL, can combine
+// into an argument buffer which holds all that.
+// Vulkan has descriptor sets.
+
 Depth2D<float4> shadowMap : register(t0);
 Texture2D<half4> diffuseMap : register(t1);
 Texture2D<half4> normalMap : register(t2);
@@ -43,60 +53,61 @@ struct SceneConstantBuffer
 // SM 6.1
 ConstantBuffer<SceneConstantBuffer> scene : register(b0);
 
+// no preprocessor to do this yet, so have to add functions
+// can't seem to have overloads like this with same name
+inline float4 mulr(float4 v, float4x4 m) { return mul(m,v); }
+inline float3 mulr(float3 v, float3x3 m) { return mul(m,v); }
+inline half3  mulr(half3  v, half3x3  m) { return mul(m,v); }
+
+
 // TODO: also have this form, where can index into
 // ConstantBuffer<SceneConstantBuffer> scene[10] : register(b0);
 
+// TODO: normal/tangent should be half3/4, but use 101010A2 in buffer
 struct InputVS
 {
     float3 position : SV_Position;
-    float3 normal : NORMAL;
     float2 uv : TEXCOORD0;
-    float3 tangent : TANGENT;
+    
+    float3 normal : NORMAL;
+    float4 tangent : TANGENT;
 };
 
+// TODO: normal/tangent should be half3/4 to cut parameter buffer
+// but that will break Nvidia/Adreno.
 struct OutputVS
 {
     float4 position : SV_Position;
-    float4 worldpos : TEXCOORD0;
+    float4 worldPos : TEXCOORD0;
     float2 uv : TEXCOORD1;
+    
     float3 normal : NORMAL;
-    float3 tangent : TANGENT;
+    float4 tangent : TANGENT;
 };
 
-// Don't need this
-//struct InputPS
-//{
-//    float4 position : SV_Position;
-//    float4 worldpos : TEXCOORD0;
-//    float2 uv : TEXCOORD1;
-//    float3 normal : NORMAL;
-//    float3 tangent : TANGENT;
-//};
-
+// TODO: should output half4
+struct OutputPS
+{
+    float4 target0 : SV_Target0;
+};
 
-//--------------------------------------------------------------------------------------
 // Sample normal map, convert to signed, apply tangent-to-world space transform.
-//--------------------------------------------------------------------------------------
-half3 CalcPerPixelNormal(float2 texcoord, half3 vertNormal, half3 vertTangent)
+half3 CalcPerPixelNormal(float2 texcoord, half3 vertNormal, half3 vertTangent, half bitanSign)
 {
-    // Compute tangent frame.
-    vertNormal = normalize(vertNormal);
-    vertTangent = normalize(vertTangent);
-
-    half3 vertBinormal = normalize(cross(vertTangent, vertNormal));
+    half3 vertBinormal = normalize(cross(vertTangent, vertNormal)) * bitanSign;
     half3x3 tangentSpaceToWorldSpace = half3x3(vertTangent, vertBinormal, vertNormal);
 
     // Compute per-pixel normal.
     half3 bumpNormal = SampleH(normalMap, sampleWrap, texcoord).xyz;
+    
+    // TODO: let snorm format handle, and do z reconstruct
     bumpNormal = 2.0h * bumpNormal - 1.0h;
 
     return mul(bumpNormal, tangentSpaceToWorldSpace);
 }
 
-//--------------------------------------------------------------------------------------
 // Diffuse lighting calculation, with angle and distance falloff.
-//--------------------------------------------------------------------------------------
-half4 CalcLightingColor(float3 lightPos, float3 lightDir, half4 lightColor, float4 falloffs, float3 posWorld, half3 perPixelNormal)
+half4 CalcLightingColor(float3 lightPos, half3 lightDir, half4 lightColor, float4 falloffs, float3 posWorld, half3 perPixelNormal)
 {
     float3 lightToPixelUnNormalized = posWorld - lightPos;
 
@@ -109,8 +120,8 @@ half4 CalcLightingColor(float3 lightPos, float3 lightDir, half4 lightColor, floa
     half3 lightToPixelNormalized = (half3)normalize(lightToPixelUnNormalized);
 
     // Angle falloff = 0 at falloffs.z, 1 at falloffs.z - falloffs.w
-    half3 lightDirHalf = (half3)normalize(lightDir);
-    half cosAngle = dot(lightToPixelNormalized, lightDirHalf);
+    //half3 lightDirHalf = (half3)normalize(lightDir);
+    half cosAngle = dot(lightToPixelNormalized, lightDir);
     half angleFalloff = saturate((cosAngle - (half)falloffs.z) / (half)falloffs.w);
 
     // Diffuse contribution.
@@ -119,14 +130,12 @@ half4 CalcLightingColor(float3 lightPos, float3 lightDir, half4 lightColor, floa
     return lightColor * (dotNL * distFalloff * angleFalloff);
 }
 
-//--------------------------------------------------------------------------------------
 // Test how much pixel is in shadow, using 2x2 percentage-closer filtering.
-//--------------------------------------------------------------------------------------
-half CalcUnshadowedAmountPCF2x2(int lightIndex, float4 posWorld, float4x4 viewProj)
+half CalcUnshadowedAmountPCF2x2(float4 posWorld, float4x4 viewProj)
 {
     // Compute pixel position in light space.
     float4 lightSpacePos = posWorld;
-    lightSpacePos = mul(lightSpacePos, viewProj);
+    lightSpacePos = mulr(lightSpacePos, viewProj);
     
     // need to reject before division (assuming revZ, infZ)
     if (lightSpacePos.z > lightSpacePos.w)
@@ -135,18 +144,6 @@ half CalcUnshadowedAmountPCF2x2(int lightIndex, float4 posWorld, float4x4 viewPr
     // near/w for persp, z/1 for ortho
     lightSpacePos.xyz /= lightSpacePos.w;
 
-/*
-    // TODO: do all the flip and scaling and bias in the proj, not in shader
-    lightSpacePos.xy *= 0.5;
-    lightSpacePos.xy += 0.5;
-    
-    // Translate from homogeneous coords to texture coords.
-    lightSpacePos.y = 1.0 - lightSpacePos.y;
-
-    // Depth bias to avoid pixel self-shadowing.
-    lightSpacePos.z -= SHADOW_DEPTH_BIAS;
-*/
-    
     // Use HW filtering
     return (half)SampleCmp(shadowMap, shadowMapSampler, lightSpacePos);
 }
@@ -157,44 +154,53 @@ OutputVS SampleVS(InputVS input)
 
     float4 newPosition = float4(input.position, 1.0);
 
-    newPosition = mul(newPosition, scene.model);
+    // TODO: need to flip these muls for column matrix
+    newPosition = mulr(newPosition, scene.model);
 
-    output.worldpos = newPosition;
+    output.worldPos = newPosition;
 
-    newPosition = mul(newPosition, scene.viewProj);
+    newPosition = mulr(newPosition, scene.viewProj);
    
     output.position = newPosition;
     output.uv = input.uv;
     
+    // TODO: preserve bitan sign in tangent.w down to PS
     // need transformed to world space too?
     // this only works if only uniform scale and invT on normal
-    //input.normal.z *= -1.0; // why negated?
-    output.normal = mul(float4(input.normal, 0.0), scene.model).xyz;
-    output.tangent = mul(float4(input.tangent, 0.0), scene.model).xyz;
-
+    output.normal = mulr(float4(input.normal, 0.0), scene.model).xyz;
+    output.tangent.xyz = mulr(float4(input.tangent.xyz, 0.0), scene.model).xyz;
+    output.tangent.w = input.tangent.w;
+    
     return output;
 }
 
-float4 SamplePS(OutputVS input) : SV_Target0
+OutputPS SamplePS(OutputVS input)
 {
+    // Compute tangent frame.
+    half3 normal = (half3)normalize(input.normal);
+    half3 tangent = (half3)normalize(input.tangent.xyz);
+    half  bitanSign = (half)input.tangent.w;
+    
     half4 diffuseColor = SampleH(diffuseMap, sampleWrap, input.uv);
-    half3 pixelNormal = CalcPerPixelNormal(input.uv, (half3)input.normal, (half3)input.tangent);
+    half3 pixelNormal = CalcPerPixelNormal(input.uv, normal, tangent, bitanSign);
     half4 totalLight = (half4)scene.ambientColor;
 
     for (int i = 0; i < NUM_LIGHTS; i++)
     {
         LightState light = scene.lights[i];
-        half4 lightPass = CalcLightingColor(light.position, light.direction, (half4)light.color, light.falloff, input.worldpos.xyz, pixelNormal);
+        half4 lightPass = CalcLightingColor(light.position, (half3)normalize(light.direction), (half4)light.color, light.falloff, input.worldPos.xyz, pixelNormal);
         
         // only single light shadow map
-        if (scene.sampleShadowMap && i == 0)
+        if (i == 0 && scene.sampleShadowMap)
         {
-            lightPass *= CalcUnshadowedAmountPCF2x2(i, input.worldpos, light.viewProj);
+            lightPass *= CalcUnshadowedAmountPCF2x2(input.worldPos, light.viewProj);
         }
         totalLight += lightPass;
     }
 
-    return (float4)(diffuseColor * saturate(totalLight));
+    OutputPS output;
+    output.target0 = (float4)(diffuseColor * saturate(totalLight));
+    return output;
 }
 
 
diff --git a/hlslparser/src/HLSLGenerator.cpp b/hlslparser/src/HLSLGenerator.cpp
index 93846f39..3ff759de 100644
--- a/hlslparser/src/HLSLGenerator.cpp
+++ b/hlslparser/src/HLSLGenerator.cpp
@@ -613,8 +613,12 @@ bool HLSLGenerator::CanSkipWrittenStatement(const HLSLStatement* statement) cons
         }
     }
     
-    // TODO: need to skip helper functions, etc.
-        
+    // Helper functions should be skipped once written out
+    if (statement->nodeType == HLSLNodeType_Function)
+    {
+        return true;
+    }
+    
     return false;
 }
 void HLSLGenerator::OutputStatements(int indent, HLSLStatement* statement)
diff --git a/hlslparser/src/MSLGenerator.cpp b/hlslparser/src/MSLGenerator.cpp
index e120cb7b..8907d810 100644
--- a/hlslparser/src/MSLGenerator.cpp
+++ b/hlslparser/src/MSLGenerator.cpp
@@ -797,8 +797,15 @@ bool MSLGenerator::CanSkipWrittenStatement(const HLSLStatement* statement) const
         }
     }
 
-    // TODO: need to skip helper functions, etc.
-        
+    // TODO: all functions are currently thrown into the namespace class
+    // so can't yet strip them.
+    
+    // Helper functions should be skipped once written out
+//    if (statement->nodeType == HLSLNodeType_Function)
+//    {
+//        return true;
+//    }
+    
     return false;
 }
 

From 220e07872363c941967cd3d36e3eebab8d9d278d Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 27 Mar 2023 22:53:50 -0700
Subject: [PATCH 508/901] kram - hlslparser update

add halfio/st for Nvidia/Adreno and Android.  These are for when inputOutput or storage are not supported.   No CLI support yet, but options are there.
switch over to texture member functions.  This better matches how MSL/HLSL work, but makes it a pain to override the functions.  This isn't quite complete, but if don't need offset or arrays, then it should work.
Add include support that is stripped, just so VSCode tools can parse HLSL.
Update shaders to use half.  Can see that MSL uses half from vs to ps and out of ps.
Fix float4x4 to float3x3 cast, and half too.  MSL annoyingly can't handle casting.
---
 hlslparser/README.md             |  22 +-
 hlslparser/build.ninja           |  10 +
 hlslparser/shaders/Compute.hlsl  |   2 +
 hlslparser/shaders/Sample.hlsl   |  41 ++--
 hlslparser/shaders/ShaderHLSL.h  |  69 +++++-
 hlslparser/shaders/ShaderMSL.h   |  60 +++++-
 hlslparser/shaders/Skinning.hlsl | 132 +++++-------
 hlslparser/src/Engine.cpp        |  23 +-
 hlslparser/src/Engine.h          |   3 +-
 hlslparser/src/HLSLGenerator.cpp |  41 ++--
 hlslparser/src/HLSLParser.cpp    | 324 ++++++++++++++++++----------
 hlslparser/src/HLSLParser.h      |  21 +-
 hlslparser/src/HLSLTokenizer.cpp | 353 ++++++++++++++++++-------------
 hlslparser/src/HLSLTokenizer.h   |  25 ++-
 hlslparser/src/HLSLTree.cpp      |   5 +
 hlslparser/src/HLSLTree.h        |  14 +-
 hlslparser/src/MSLGenerator.cpp  |  18 +-
 hlslparser/src/Main.cpp          |   8 +-
 18 files changed, 751 insertions(+), 420 deletions(-)

diff --git a/hlslparser/README.md b/hlslparser/README.md
index 9f487eab..df66dfda 100644
--- a/hlslparser/README.md
+++ b/hlslparser/README.md
@@ -112,7 +112,10 @@ HLSL2021 6.2 includes full half and int support.   So that is the compilation ta
 | Half Push      | y | y | y | | y | n | y |
 | Half ALU       | y | y | y | | y | y | y |
 
-https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_16bit_storage.html
+* https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_16bit_storage.html
+
+* AMD has no fp16 div/exp on Vega, constant buffer should use fp16.
+* https://gpuopen.com/learn/first-steps-implementing-fp16/
 
 * StorageBuffer16BitAccess
 * UniformAndStorageBuffer16BitAccess
@@ -232,8 +235,8 @@ AMD
 * GCN1  - wave64, 1 instr/4 cycles, 1 cu = 4 simd16 units
 * GCN2  - Puma, PS4(Liverpool)/Xbone(Durango), sparse texturing, 2 async compute + 1 raster/compute pipe
 * GCN3  - lossless DCC, high quality video scalar, video encoder/decoder
-* GCN4  - Polaris, PS4 Pro(Neo)/XboneX(Scorpio), checkerboarding, degen tri removal, fp16 added back,
-* GCN5  - Vega, fp16 2x perf of fp32, fp64 is 1/16 typically, mesh shaders, 
+* GCN4  - Polaris, PS4 Pro(Neo)/XboneX(Scorpio), checkerboarding, degen tri removal, fp16 added back, little point to fp16
+* GCN5  - Vega, fp16 2x, fp64 is 1/16 typically, mesh shaders
 *
 * RDNA  - RX5300, wave32 (or 64), 1 instr/cycle, 2cpu = 1 wgp, mesh shaders, wave32 = 1 simd32 unit, display compression, 
 * RDNA2 - PS5/XboxX/SteamDeck, RX6000, RT hw, FSR
@@ -250,13 +253,15 @@ Nvidia
 * fp16 rates are the same as fp32 on 30x0/40x0, indicating little fp16 support
 *   but fp16 rates are double on 10x0/20x0
 * 1080 runs fp16 at 1/128th the speed of fp32 - ugh!  To hobble ML work
-*   on GeForce cards and push expensive Quadro which do full rate fp16.
+*   on GeForce cards and push expensive Quadro.
+*   Also double was hobbled.
 *
 * Tegra X1+ - Mariko, Nintendo Switch, ended chip production in 2021
-* 10x0 - no tensor core, 1/128th speed fp16, 
-* 20x0 - tensor cores, RT accel for triangle and bvh intersection
-* 30x0 - 36 fp32 vs. 0.6 fp64 Teraflops on 3090 (60x), fp16 same speed as fp32, faster RT/Tensor cores
-* 40x0 - fp16 same speed as fp32, faster RT/Tensor cores
+* 10x0 - 1/64x fp16, 
+* 16x0 - 2x fp16,
+* 20x0 - 2x fp16, tensor cores, RT accel for triangle and bvh intersection
+* 30x0 - 1x fp16, 36 fp32 vs. 0.6 fp64 Teraflops on 3090 (60x), faster RT/Tensor cores
+* 40x0 - 1x fp16, faster RT/Tensor cores
 
 * https://en.wikipedia.org/wiki/List_of_Nvidia_graphics_processing_units
 * https://www.pcmag.com/news/report-nvidia-to-end-production-of-nintendo-switch-tegra-chip
@@ -337,6 +342,7 @@ SPIRV
 * clang optimizer
 * linked into module
 * cannot represent Texture2D<half4>, so can't tranpsile to MSL texture2d<half>
+* OpTypeImage must be fp32, i32/i64 format only in 1.2 spec.
 * https://github.com/microsoft/DirectXShaderCompiler/issues/2711
 * https://www.khronos.org/spir/
 
diff --git a/hlslparser/build.ninja b/hlslparser/build.ninja
index e6d28ef1..5a101970 100755
--- a/hlslparser/build.ninja
+++ b/hlslparser/build.ninja
@@ -20,6 +20,15 @@ appBuildDir = ${projectDir}/build/hlslparser/Build/Products/Release/
 
 hlslparser = ${appBuildDir}hlslparser
 
+# this is annoying that ninja doesn't cache app modstamps
+#rule cleanBuild
+#    command = ninja -t clean
+#    description = Clean Build
+
+# this doesn't have anything to compare, need app timestamps
+# also couldn't add to dependsMSL or dependsHLSL
+#build clean: cleanBuild | ${hlslparser}
+
 #------
 
 metalCompile = xcrun -sdk macosx metal
@@ -46,6 +55,7 @@ dependsMSL = ${srcDir}/ShaderMSL.h
 # turn on file/line directives to jump back to original sources
 # and turn on comments in generated sources
 flagsParser = -g -line
+#flagsParser = -g
 
 # turn on debug, makes metallib 3x bigger and not optimized
 #flagsMSLDebug = -g
diff --git a/hlslparser/shaders/Compute.hlsl b/hlslparser/shaders/Compute.hlsl
index bd8a8d21..28744d31 100644
--- a/hlslparser/shaders/Compute.hlsl
+++ b/hlslparser/shaders/Compute.hlsl
@@ -6,6 +6,8 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 //--------------------------------------------------------------------------------------
 
+#include "ShaderHLSL.h"
+
 // adapted from https://learn.microsoft.com/en-us/windows/win32/direct3d11/direct3d-11-advanced-stages-compute-create
 
 struct BufType
diff --git a/hlslparser/shaders/Sample.hlsl b/hlslparser/shaders/Sample.hlsl
index 1679da3c..ad22d004 100644
--- a/hlslparser/shaders/Sample.hlsl
+++ b/hlslparser/shaders/Sample.hlsl
@@ -21,6 +21,8 @@
 // into an argument buffer which holds all that.
 // Vulkan has descriptor sets.
 
+#include "ShaderHLSL.h"
+
 Depth2D<float4> shadowMap : register(t0);
 Texture2D<half4> diffuseMap : register(t1);
 Texture2D<half4> normalMap : register(t2);
@@ -64,6 +66,7 @@ inline half3  mulr(half3  v, half3x3  m) { return mul(m,v); }
 // ConstantBuffer<SceneConstantBuffer> scene[10] : register(b0);
 
 // TODO: normal/tangent should be half3/4, but use 101010A2 in buffer
+// but have to transform them by float4x4, so no point in declaring as half here
 struct InputVS
 {
     float3 position : SV_Position;
@@ -73,7 +76,7 @@ struct InputVS
     float4 tangent : TANGENT;
 };
 
-// TODO: normal/tangent should be half3/4 to cut parameter buffer
+// DONE: normal/tangent should be half3/4 to cut parameter buffer
 // but that will break Nvidia/Adreno.
 struct OutputVS
 {
@@ -81,14 +84,14 @@ struct OutputVS
     float4 worldPos : TEXCOORD0;
     float2 uv : TEXCOORD1;
     
-    float3 normal : NORMAL;
-    float4 tangent : TANGENT;
+    half3io normal : NORMAL;
+    half4io tangent : TANGENT;
 };
 
-// TODO: should output half4
+// DONE: color is now half
 struct OutputPS
 {
-    float4 target0 : SV_Target0;
+    half4io target0 : SV_Target0;
 };
 
 // Sample normal map, convert to signed, apply tangent-to-world space transform.
@@ -98,12 +101,13 @@ half3 CalcPerPixelNormal(float2 texcoord, half3 vertNormal, half3 vertTangent, h
     half3x3 tangentSpaceToWorldSpace = half3x3(vertTangent, vertBinormal, vertNormal);
 
     // Compute per-pixel normal.
-    half3 bumpNormal = SampleH(normalMap, sampleWrap, texcoord).xyz;
+    half4 bumpSample = normalMap.Sample(sampleWrap, texcoord);
+    half3 bumpNormal = bumpSample.xyz; // normalMap.Sample(sampleWrap, texcoord).xyz;
     
     // TODO: let snorm format handle, and do z reconstruct
     bumpNormal = 2.0h * bumpNormal - 1.0h;
 
-    return mul(bumpNormal, tangentSpaceToWorldSpace);
+    return mulr(bumpNormal, tangentSpaceToWorldSpace);
 }
 
 // Diffuse lighting calculation, with angle and distance falloff.
@@ -145,7 +149,7 @@ half CalcUnshadowedAmountPCF2x2(float4 posWorld, float4x4 viewProj)
     lightSpacePos.xyz /= lightSpacePos.w;
 
     // Use HW filtering
-    return (half)SampleCmp(shadowMap, shadowMapSampler, lightSpacePos);
+    return (half)shadowMap.SampleCmp(shadowMapSampler, lightSpacePos.xy, lightSpacePos.z);
 }
 
 OutputVS SampleVS(InputVS input)
@@ -154,7 +158,6 @@ OutputVS SampleVS(InputVS input)
 
     float4 newPosition = float4(input.position, 1.0);
 
-    // TODO: need to flip these muls for column matrix
     newPosition = mulr(newPosition, scene.model);
 
     output.worldPos = newPosition;
@@ -164,12 +167,10 @@ OutputVS SampleVS(InputVS input)
     output.position = newPosition;
     output.uv = input.uv;
     
-    // TODO: preserve bitan sign in tangent.w down to PS
-    // need transformed to world space too?
-    // this only works if only uniform scale and invT on normal
-    output.normal = mulr(float4(input.normal, 0.0), scene.model).xyz;
-    output.tangent.xyz = mulr(float4(input.tangent.xyz, 0.0), scene.model).xyz;
-    output.tangent.w = input.tangent.w;
+    // This only works if only uniform scale and invT on normal
+    output.normal = (half3io)mulr(input.normal, (float3x3)scene.model);
+    output.tangent.xyz = (half3io)mulr(input.tangent.xyz, (float3x3)scene.model);
+    output.tangent.w = (halfio)input.tangent.w;
     
     return output;
 }
@@ -177,18 +178,18 @@ OutputVS SampleVS(InputVS input)
 OutputPS SamplePS(OutputVS input)
 {
     // Compute tangent frame.
-    half3 normal = (half3)normalize(input.normal);
-    half3 tangent = (half3)normalize(input.tangent.xyz);
+    half3 normal = normalize((half3)input.normal);
+    half3 tangent = normalize((half3)input.tangent.xyz);
     half  bitanSign = (half)input.tangent.w;
     
-    half4 diffuseColor = SampleH(diffuseMap, sampleWrap, input.uv);
+    half4 diffuseColor = diffuseMap.Sample(sampleWrap, input.uv);
     half3 pixelNormal = CalcPerPixelNormal(input.uv, normal, tangent, bitanSign);
     half4 totalLight = (half4)scene.ambientColor;
 
     for (int i = 0; i < NUM_LIGHTS; i++)
     {
         LightState light = scene.lights[i];
-        half4 lightPass = CalcLightingColor(light.position, (half3)normalize(light.direction), (half4)light.color, light.falloff, input.worldPos.xyz, pixelNormal);
+        half4 lightPass = CalcLightingColor(light.position, normalize((half3)light.direction), (half4)light.color, light.falloff, input.worldPos.xyz, pixelNormal);
         
         // only single light shadow map
         if (i == 0 && scene.sampleShadowMap)
@@ -199,7 +200,7 @@ OutputPS SamplePS(OutputVS input)
     }
 
     OutputPS output;
-    output.target0 = (float4)(diffuseColor * saturate(totalLight));
+    output.target0 = (half4io)(diffuseColor * saturate(totalLight));
     return output;
 }
 
diff --git a/hlslparser/shaders/ShaderHLSL.h b/hlslparser/shaders/ShaderHLSL.h
index f8e08705..4ae72921 100644
--- a/hlslparser/shaders/ShaderHLSL.h
+++ b/hlslparser/shaders/ShaderHLSL.h
@@ -3,7 +3,7 @@
 
 // glslc doesn't support but DXC does
 // so had to add header guard
-#ifdef __spirv__
+#ifndef __spirv__
 #pragma once
 #endif
 
@@ -93,7 +93,7 @@ typedef float64_t4x4 double4x4;
 // Apple Metal 3 added atomic_float.
 
 // 6.6 is cutting edge, want to target 6.2 for now
-#define SM66 1
+#define SM66 0
 #if SM66
 // compile to SM6.6 for these
 typedef uint8_t4_packed uchar4_packed;
@@ -147,8 +147,63 @@ char4_packed fromInt4(int4 v, bool clamp = true)
 #endif
 
 
+// TODO: toggle building shaders for Andreno/Nvidia
 #define USE_HALF 1
 
+// Only Android Adreno doesn't have fp16 storage, but MSL/desktop has this.
+// Can't use min16Float since that is remapped to fp16.
+
+#define USE_HALF_STORAGE 1
+
+#if USE_HALFIO
+
+typedef half  halfst;
+typedef half2 half2st;
+typedef half3 half3st;
+typedef half4 half4st;
+
+#else
+
+typedef float  halfst;
+typedef float2 half2st;
+typedef float3 half3st;
+typedef float4 half4st;
+
+#endif
+
+// Nvidia/Adreno don't support half as inputOutput, but that
+// just wastes parameter buffer on tiled architectures.  Can
+// just used these as casts and types inside input/output.
+
+#define USE_HALFIO USE_HALF
+
+// This is handled by parser, but syntax highlighting would need this
+#if USE_HALFIO
+
+typedef half  halfio;
+typedef half2 half2io;
+typedef half3 half3io;
+typedef half4 half4io;
+
+#else
+
+typedef float  halfio;
+typedef float2 half2io;
+typedef float3 half3io;
+typedef float4 half4io;
+
+#endif
+
+// This has templated elements appended, so typedef doesn't work.
+// HLSL doesn't distingush depth/color, but MSL does. These calls combine
+// the comparison value in the z or w element.
+#define Depth2D Texture2D
+#define Depth2DArray Texture2DArray
+#define DepthCube TextureCube
+
+
+/* These are now defined as member function intrinsics
+ 
 // TODO: fix parsing, so don't have to provide these overrides
 // The parser also has to rewrite params on MSL and wrap args.
 
@@ -232,12 +287,6 @@ float4 SampleGrad(Texture2D<float4> t, SamplerState s, float2 texCoord, float2 g
 
 //----------
 
-// This has templated elements appended, so typedef doesn't work.
-// HLSL doesn't distingush depth/color, but MSL does. These calls combine
-// the comparison value in the z or w element.
-#define Depth2D Texture2D
-#define Depth2DArray Texture2DArray
-#define DepthCube TextureCube
 
 
 // can just use the default for Texture2D<float4>
@@ -301,6 +350,7 @@ float4 Load(Texture2DMS<float4> t, int2 texCoord, int sample, int2 offset = 0)
     return t.Load(texCoord, sample, offset);
 }
 
+
 //----------
 
 // gather only works on mip0
@@ -325,6 +375,7 @@ float4 GatherAlpha(Texture2D<float4> t, SamplerState s, float2 texCoord, int2 of
 }
 
 
+
 #if USE_HALF
 
 // Note: HLSL to SPIRV doesn't support half Texture types, so must cast from float4
@@ -354,6 +405,8 @@ half4 SampleBiasH(Texture2D<half> t, SamplerState s, float4 texCoordBias)
 
 #endif
 
+ */
+
 // There are 2 variants of GetDimensions, one that takes a mipLevel input
 // and returns params for that, and one that returns mip0.
 
diff --git a/hlslparser/shaders/ShaderMSL.h b/hlslparser/shaders/ShaderMSL.h
index 1bdf2bd6..290a9532 100644
--- a/hlslparser/shaders/ShaderMSL.h
+++ b/hlslparser/shaders/ShaderMSL.h
@@ -21,6 +21,7 @@
 #import <Foundation/Foundation.h>
 #else
 #include <metal_stdlib>
+#include <metal_atomic>
 #endif
 
 #import <simd/simd.h>
@@ -62,6 +63,20 @@ using namespace simd;
 // can safely use half on Metal
 #define USE_HALF 1
 
+#define USE_HALFIO USE_HALF
+
+// these aren't really needed, HLSLParser has options that replace this
+// in the output code.
+typedef half  halfio;
+typedef half2 half2io;
+typedef half3 half3io;
+typedef half4 half4io;
+
+typedef half  halfst;
+typedef half2 half2st;
+typedef half3 half3st;
+typedef half4 half4st;
+
 // #define mad precise::fma"
     
 
@@ -134,6 +149,14 @@ inline half2 mul(half2x2 m, half2 a) { return m * a; }
 inline half3 mul(half3x3 m, half3 a) { return m * a; }
 inline half4 mul(half4x4 m, half4 a) { return m * a; }
 
+inline float3x3 tofloat3x3(float4x4 m) {
+    return float3x3(m[0].xyz, m[1].xyz, m[2].xyz);
+}
+inline half3x3 tohalf3x3(half4x4 m) {
+    return half3x3(m[0].xyz, m[1].xyz, m[2].xyz);
+}
+ 
+                           
 #endif
 
 // TODO: parser could replace these intrinsic names in metal
@@ -155,6 +178,7 @@ inline half4 mul(half4x4 m, half4 a) { return m * a; }
 #define clip(x) if (all((x) < 0.0) discard_fragment()
 
 // Use templated type to pass tex + sampler combos
+// Then parser would have to handle templates.  Ick.
 //template<typename T>
 //struct TexSampler
 //{
@@ -162,9 +186,9 @@ inline half4 mul(half4x4 m, half4 a) { return m * a; }
 //    sampler s;
 //};
 
-
 //---------
 
+/*
 // gather only works on mip0
 inline float4 GatherRed(texture2d<float> t, sampler s, float2 texCoord, int2 offset=0) {
     return t.gather(s, texCoord, offset, component::x);
@@ -262,7 +286,7 @@ inline float4 Load(texture2d_array<float> t, int3 texCoord, int lod = 0)
     return t.read((uint2)texCoord.xy, (uint)texCoord.z, (uint)lod);
 }
 
-// no HLSL equivalent, so don't define for MSL.  Maybe it's just offset that doesn't.
+// no HLSL equivalent, so don't define for MSL.  Maybe it's just offset that is missing.
 //inline float4 Load(texturecube<float> t, int3 texCoord, int lod = 0)
 //{
 //    uv, face, lod, offset
@@ -283,6 +307,7 @@ inline float4 Load(texture2d_ms<float> t, int2 texCoord, int sample) {
 
 // ----
 
+
 inline float4 Sample(texture2d_array<float> t, sampler s, float3 texCoord, int2 offset=0) {
     return t.sample(s, texCoord.xy, uint(texCoord.z), offset);
 }
@@ -306,10 +331,6 @@ inline float4 Sample(depth2d<float> t, sampler s, float2 texCoord, int2 offset =
 }
 
 
-// ios may need to hardcode sampler for compare
-// constexpr sampler shadowSampler(mip_filter::none, min_filter::linear, mag_filter::linear, address::clamp_to_border, compare_func::greater);
-
-
 // For persp shadows, remember to divide z = z/w before calling, or w = z/w on cube
 inline float SampleCmp(depth2d<float> t, sampler s, float4 texCompareCoord, int2 offset = 0)
 {
@@ -320,7 +341,34 @@ inline float4 GatherCmp(depth2d<float> t, sampler s, float4 texCompareCoord, int
 {
     return t.gather_compare(s, texCompareCoord.xy, texCompareCoord.z, offset);
 }
+ */
+
+// Trying to override member functions is just not simple like
+// raw functions are.   And macros are insufficient when the args
+// differ across the same named call.  Also macros can't handle
+// defaults.
+
+
+// TODO: some have optional offsets, but not cube/cubearray in HLSL
+// TODO: array lookup on Sample is uint?,
+#define Sample(s, uv)              sample(s, uv)
+#define SampleLevel(s, uv, level_) sample(s, uv, level(level_))
+#define SampleBias(s, uv, bias_)   sample(s, uv, bias(bias_))
+
+// only valid for Texture2D, there is gradient3d
+#define SampleGrad(s, uv, gradx, grady)   sample(s, uv, gradient2d(gradx, grady))
+
+#define SampleCmp(s, uv, val)     sample_compare(s, uv, val)
+#define GatherCmp(s, uv, val)     gather_compare(s, uv, val)
+
+// TODO: must pass uint, not int to read unlike HLSL
+// more complex with face, array, lod
+#define Load(uv, sample)                  read(uv, sample)
 
+#define GatherRed(s, uv)   gather(x, uv, (int2)0, component::x)
+#define GatherGreen(s, uv) gather(x, uv, (int2)0, component::y)
+#define GatherBlue(s, uv)  gather(x, uv, (int2)0, component::z)
+#define GatherAlpha(s, uv) gather(x, uv, (int2)0, component::w)
 
 // ----
 
diff --git a/hlslparser/shaders/Skinning.hlsl b/hlslparser/shaders/Skinning.hlsl
index 74d99672..003d76f4 100644
--- a/hlslparser/shaders/Skinning.hlsl
+++ b/hlslparser/shaders/Skinning.hlsl
@@ -1,4 +1,5 @@
 
+#include "ShaderHLSL.h"
 
 // TODO: syntax highlighting as Metal doesn't work
 // This isn't including header, but that doesn't seem to fix either.
@@ -76,6 +77,12 @@
 // MSL rule;
 // If a vertex function writes to one or more buffers or textures, its return type must be void.
 
+// no preprocessor to do this yet, so have to add functions
+// can't seem to have overloads like this with same name
+inline float4 mulr(float4 v, float4x4 m) { return mul(m,v); }
+inline float3 mulr(float3 v, float3x3 m) { return mul(m,v); }
+inline half3  mulr(half3  v, half3x3  m) { return mul(m,v); }
+
 struct InputVS
 {
     float4 position : SV_Position;
@@ -83,19 +90,24 @@ struct InputVS
     float2 uv : TEXCOORD0;
     float4 blendWeights : BLENDWEIGHT;
     uint4  blendIndices : BLENDINDICES;
-    
-    short4 testShort : TANGENT;
-    ushort4 testUShort : BITANGENT;
 };
 
-// half below won't work on many Adreno/Mali without inputOutput
+// these were just tests
+//short4 testShort : TANGENT;
+//ushort4 testUShort : BITANGENT;
+
 struct OutputVS
 {
     float4  position : SV_Position;
-    half    diffuse : COLOR;
+    halfio  diffuse : COLOR;
     float2  uv : TEXCOORD0;
 };
 
+struct OutputPS
+{
+    half4io color : SV_Target0;
+};
+
 static const uint kMaxSkinTfms = 256;
 
 // Note: SkinTfms makes MSL air shader 60K bigger at 256,
@@ -105,7 +117,8 @@ static const uint kMaxSkinTfms = 256;
 struct UniformsStruct
 {
     float4x4 skinTfms[kMaxSkinTfms];
-    half3    lightDir;
+    
+    half3st  lightDir;
     float4x4 worldToClipTfm;
 };
 ConstantBuffer<UniformsStruct> uniforms : register(b0);
@@ -113,28 +126,26 @@ ConstantBuffer<UniformsStruct> uniforms : register(b0);
 // can have 14x 64K limit to each cbuffer, 128 tbuffers,
  
 // Structured buffers
-struct StructuredStruct
-{
-    half3    lightDir;
-    float4x4 worldToClipTfm;
-};
-
-StructuredBuffer<StructuredStruct> bufferTest0 : register(t2);
+//struct StructuredStruct
+//{
+//    half3st  lightDir;
+//    float4x4 worldToClipTfm;
+//};
+//
+//StructuredBuffer<StructuredStruct> bufferTest0 : register(t2);
 
 
 Texture2D<half4> tex : register(t1);
 SamplerState samplerClamp : register(s0);
 
-float4x4 DoSkinTfm(UniformsStruct uniforms, float4 blendWeights, uint4 blendIndices)
+float4x4 DoSkinTfm(/*UniformsStruct uniforms,*/ float4 blendWeights, uint4 blendIndices)
 {
-    // Can use mul or * here
-    //float4x4 skinTfm = blendWeights[0] * uniforms.skinTfms[blendIndices[0]];
-    float4x4 skinTfm = mul(blendWeights[0], uniforms.skinTfms[blendIndices[0]]);
+    // weight the transforms, could use half4x4 and 101010A2 for weights
+    float4x4 skinTfm = blendWeights[0] * uniforms.skinTfms[blendIndices[0]];
     
     for (uint i = 1; i < 4; ++i)
     {
-        //skinTfm += blendWeights[i] * uniforms.skinTfms[blendIndices[i]];
-        skinTfm += mul(blendWeights[i], uniforms.skinTfms[blendIndices[i]]);
+        skinTfm += blendWeights[i] * uniforms.skinTfms[blendIndices[i]];
     }
     
     return skinTfm;
@@ -166,32 +177,31 @@ OutputVS SkinningVS(InputVS input,
 
     instanceNum += vertexNum;
     
-    float4x4 skinTfm = uniforms.skinTfms[ instanceNum ];
+    // not using above
+    
+    // float4x4 skinTfm = uniforms.skinTfms[ instanceNum ];
     
-    // MSL doesn't like *=
-    skinTfm = skinTfm * DoSkinTfm(uniforms, input.blendWeights, input.blendIndices);
+    float4x4 skinTfm = DoSkinTfm(input.blendWeights, input.blendIndices);
 
     // Skin to world space
-    float3 position = mul(input.position, skinTfm).xyz;
-    half3 normal = half3(mul(float4(input.normal,0.0), skinTfm).xyz);
-        
+    float3 normal = mulr(input.normal, (float3x3)skinTfm);
+    normal = mulr(normal, (float3x3)uniforms.worldToClipTfm);
+    
     // Output stuff
-    output.position = mul(float4(position, 1.0), uniforms.worldToClipTfm);
+    float4 worldPos = mulr(input.position, skinTfm);
+    output.position = mulr(worldPos, uniforms.worldToClipTfm);
   
-    // glslc fix
-    // output.diffuse = half( dot(lightDir, normal) );
-    // DXC
-    output.diffuse = dot(uniforms.lightDir, normal);
-
-    // TODO: test structured buffer
-    StructuredStruct item = bufferTest0[0];
-    output.diffuse *= item.lightDir.x;
+    output.diffuse = (halfio)dot((half3)uniforms.lightDir, (half3)normal);
+
+    // test structured buffer
+    // StructuredStruct item = bufferTest0[0];
+    //output.diffuse *= item.lightDir.x;
    
     // test the operators
-    output.diffuse *= output.diffuse;
-    output.diffuse += output.diffuse;
-    output.diffuse -= output.diffuse;
-    output.diffuse /= output.diffuse;
+//    output.diffuse *= output.diffuse;
+//    output.diffuse += output.diffuse;
+//    output.diffuse -= output.diffuse;
+//    output.diffuse /= output.diffuse;
     
     output.uv = input.uv;
     
@@ -201,22 +211,7 @@ OutputVS SkinningVS(InputVS input,
     return output;
 }
 
-// Want to pass OutputVS as input, but DXC can't handle the redefinition
-// in the same file.  So have to keep OutputVS and InputPS in sync.
-// this can include position on MSL, but not on HLSL.
-// Also for mobile the type should be higher precision to avoid banding.
-// So half from VS, but float in PS.
-//struct InputPS
-//{
-//    float4  position : SV_Position;
-//    half    diffuse : COLOR;
-//    float2  uv : TEXCOORD0;
-//};
 
-struct OutputPS
-{
-    half4 color : SV_Target0;
-};
 
 // Note: don't write as void SkinningPS(VS_OUTPUT input, out PS_OUTPUT output)
 // this is worse MSL codegen.
@@ -228,32 +223,11 @@ OutputPS SkinningPS(OutputVS input,
      bool isFrontFace: SV_IsFrontFace
     )
 {
-    OutputPS output;
-    
-    // Syntax procoess can't handle this construct.
-    // Before parser was adding these wrappers, but also limiting split of tex/sampler.
-    // TexSampler<Texture2D> texWrap(tex, samplerClamp);
-    // half4 color = SampleH(texWrap.t, texWrap.s, input.uv);
-    
-    // This is hard to reflect with combined tex/sampler
-    // have way more textures than samplers on mobile.
-    //float4 color = tex2D(tex, input.uv);
-    //half4 color = half4(1.0h);
-    half4 color = SampleH(tex, samplerClamp, input.uv);
-    
-    // TODO: move to DX10 style, but MSL codegen is trickier then
-    // since it wraps the vars
-    // half4 color = tex.Sample(pointSampler, input.uv);
-    
-    // just to test min3 support
-    // glslc fix
-    //half c = half( min3(color.r, color.g, color.b) );
-    //color.rgb = half3(c,c,c); // can't use half3(c)!
-    // DXC
+    half4 color = tex.Sample(samplerClamp, input.uv);
     color.rgb = min3(color.r, color.g, color.b);
-
-    color.rgb *= input.diffuse;
-    output.color = color;
-
+    color.rgb *= (half)input.diffuse;
+    
+    OutputPS output;
+    output.color = (half4io)color;
     return output;
 }
diff --git a/hlslparser/src/Engine.cpp b/hlslparser/src/Engine.cpp
index f16d5497..49bdd1c0 100755
--- a/hlslparser/src/Engine.cpp
+++ b/hlslparser/src/Engine.cpp
@@ -222,24 +222,33 @@ void String_StripTrailingFloatZeroes(char* buffer)
 
 // Engine/Log.cpp
 
-void Log_Error(const char * format, ...) {
+void Log_Error(const char * format, ...)
+{
     va_list args;
     va_start(args, format);
     Log_ErrorArgList(format, args);
     va_end(args);
 }
 
-void Log_ErrorArgList(const char * format, va_list args) {
-#if 1 // @@ Don't we need to do this?
+void Log_ErrorArgList(const char * format, va_list args, const char* filename, uint32_t line)
+{
     va_list tmp;
     va_copy(tmp, args);
+    
+    // Not thread-safe
+    static std::string buffer;
+    buffer.clear();
+    String_PrintfArgList(buffer, format, tmp);
+    
     // TODO: this doesn't work on Win/Android
     // use a real log abstraction to ODS/etc from Kram
-    vfprintf( stderr, format, tmp );
+    if (filename)
+        fprintf( stderr, "%s:%d: error: %s", filename, line, buffer.c_str());
+    else
+        fprintf( stderr, "error: %s", buffer.c_str());
+    
     va_end(tmp);
-#else
-    vprintf( format, args );
-#endif
+
 }
 
 
diff --git a/hlslparser/src/Engine.h b/hlslparser/src/Engine.h
index c575413b..905d8788 100755
--- a/hlslparser/src/Engine.h
+++ b/hlslparser/src/Engine.h
@@ -119,7 +119,8 @@ struct CompareAndHandStrings
 // Engine/Log.h
 
 void Log_Error(const char * format, ...) M4_PRINTF_ATTR(1, 2);
-void Log_ErrorArgList(const char * format, va_list args);
+
+void Log_ErrorArgList(const char * format, va_list args, const char* filename = NULL, uint32_t line = 0);
 
 
 // Engine/Array.h
diff --git a/hlslparser/src/HLSLGenerator.cpp b/hlslparser/src/HLSLGenerator.cpp
index 3ff759de..2565098c 100644
--- a/hlslparser/src/HLSLGenerator.cpp
+++ b/hlslparser/src/HLSLGenerator.cpp
@@ -496,6 +496,26 @@ void HLSLGenerator::OutputExpression(HLSLExpression* expression)
         OutputExpressionList(functionCall->argument);
         m_writer.Write(")");
     }
+    else if (expression->nodeType == HLSLNodeType_MemberFunctionCall)
+    {
+        HLSLMemberFunctionCall* functionCall = static_cast<HLSLMemberFunctionCall*>(expression);
+        
+        // Spriv only supports fp32 or i32/i64 OpTypeImage
+        if (IsHalf(functionCall->function->returnType.baseType) && m_options.writeVulkan)
+        {
+            // TODO: may need parens
+            m_writer.Write("(half4)");
+        }
+        
+        // Write out the member identifier
+        m_writer.Write("%s.", functionCall->memberIdentifier->name);
+        
+        // Same as FunctionCall
+        const char* name = functionCall->function->name;
+        m_writer.Write("%s(", name);
+        OutputExpressionList(functionCall->argument);
+        m_writer.Write(")");
+    }
     else
     {
         Error("unknown expression");
@@ -884,7 +904,7 @@ void HLSLGenerator::OutputStatements(int indent, HLSLStatement* statement)
         else
         {
             // Unhanded statement type.
-            Error("Unhandled statemement");
+            Error("Unhandled statement");
             //ASSERT(false);
         }
 
@@ -898,18 +918,17 @@ const char* HLSLGenerator::GetFormatName(HLSLBaseType bufferOrTextureType, HLSLB
     // TODO: have a way to disable use of half (like on MSLGenerator)
     bool isHalf = IsHalf(formatType);
     
-    // Can't use half4 textures with spirv
+    // Can't use half4 textures with spirv.  Can only cast from full float sampler.
     // Can tell Vulkan was written by/for desktop IHVs.
     // https://github.com/microsoft/DirectXShaderCompiler/issues/2711
-    bool isSpirvTarget = true; // TODO: tie to CLI option
+    bool isSpirvTarget = m_options.writeVulkan;
     if (isSpirvTarget)
         isHalf = false;
     
     const char* formatName = isHalf ? "half4" : "float4";
     
-    // Unlike Metal, that just uses half/float, the type
-    // seems to be dimension specific on HLSL.  So may need
-    // caller to specify more types.
+    // MSL only uses half/float mostly. With HLSL, this is a full
+    // template format of float/2/3/4.
     
     return formatName;
 }
@@ -927,16 +946,6 @@ void HLSLGenerator::OutputDeclaration(HLSLDeclaration* declaration)
         
         // sampler
         const char* samplerTypeName = GetTypeName(declaration->type);
-    
-//        if (declaration->type.baseType == HLSLBaseType_SamplerState)
-//        {
-//            samplerType = "SamplerState";
-//        }
-//        else if (declaration->type.baseType == HLSLBaseType_SamplerComparisonState)
-//        {
-//            samplerType = "SamplerComparisonState";
-//        }
-//
         if (samplerTypeName)
         {
             if (reg != -1)
diff --git a/hlslparser/src/HLSLParser.cpp b/hlslparser/src/HLSLParser.cpp
index fd4712bc..062ffb4b 100644
--- a/hlslparser/src/HLSLParser.cpp
+++ b/hlslparser/src/HLSLParser.cpp
@@ -436,60 +436,69 @@ struct Intrinsic
 // This means the parser has to work harder to write out these intrinsics
 // since some have default args, and some need level(), bias() wrappers in MSL.
 // That complexity is currently hidden away in wrapper C-style calls in ShaderMSL.h.
-#define USE_MEMBER_FUNCTIONS 0
+#define USE_MEMBER_FUNCTIONS 1
 
 static void AddIntrinsic(const Intrinsic& intrinsic);
 
-void AddTextureLoadIntrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType textureType, HLSLBaseType uvType)
+void AddTextureLoadIntrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType textureType, HLSLBaseType uvType, HLSLBaseType arg3 = HLSLBaseType_Unknown, HLSLBaseType arg4 = HLSLBaseType_Unknown)
 {
 #if USE_MEMBER_FUNCTIONS
-    Intrinsic i(name, returnType, uvType);
-    i.function.memberType = textureType;
+    Intrinsic i(name, returnType, uvType, arg3, arg4);
+    i.function.memberType = textureType; // extract formatType from return type
 #else
-    Intrinsic i(name, returnType, textureType, uvType);
+//    Intrinsic i(name, returnType, textureType, uvType);
+//
+//    // classify textureType subtype off scalar
+//    i.argument[0].type.formatType = GetScalarType(returnType);
 #endif
-    i.argument[0].type.formatType = GetScalarType(returnType);
-
+    
     AddIntrinsic(i);
 }
 
-void AddTextureIntrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType textureType, HLSLBaseType uvType)
+void AddTextureIntrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType textureType, HLSLBaseType uvType, HLSLBaseType arg3 = HLSLBaseType_Unknown, HLSLBaseType arg4 = HLSLBaseType_Unknown)
 {
 #if USE_MEMBER_FUNCTIONS
-    Intrinsic i(name, returnType, HLSLBaseType_SamplerState, uvType);
+    Intrinsic i(name, returnType, HLSLBaseType_SamplerState, uvType, arg3, arg4);
     i.function.memberType = textureType;
 #else
-    Intrinsic i(name, returnType, textureType, HLSLBaseType_SamplerState, uvType);
+//    Intrinsic i(name, returnType, textureType, HLSLBaseType_SamplerState, uvType);
+//
+//    // classify textureType subtype off scalar
+//    i.argument[0].type.formatType = GetScalarType(returnType);
 #endif
-    i.argument[0].type.formatType = GetScalarType(returnType);
-
+    
     AddIntrinsic(i);
 }
 
+void AddTextureIntrinsics(const char* name, HLSLBaseType textureType, HLSLBaseType uvType, HLSLBaseType arg3 = HLSLBaseType_Unknown, HLSLBaseType arg4 = HLSLBaseType_Unknown)
+{
+    AddTextureIntrinsic( name, HLSLBaseType_Float4, textureType, uvType, arg3, arg4);
+    AddTextureIntrinsic( name, HLSLBaseType_Half4, textureType, uvType, arg3, arg4);
+}
+
+
 // DepthCmp takes additional arg for comparison value, but this rolls it into uv
-void AddDepthIntrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType textureType, HLSLBaseType uvType)
+void AddDepthIntrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType textureType, HLSLBaseType uvType, HLSLBaseType arg3 = HLSLBaseType_Unknown, HLSLBaseType arg4 = HLSLBaseType_Unknown)
 {
     // ComparisonState is only for SampleCmp/GatherCmp
     bool isCompare = String_Equal(name, "GatherCmp") || String_Equal(name, "SampleCmp");
     HLSLBaseType samplerType = isCompare ? HLSLBaseType_SamplerComparisonState : HLSLBaseType_SamplerState;
     
 #if USE_MEMBER_FUNCTIONS
-    Intrinsic i(name, returnType, samplerType, uvType); 
+    Intrinsic i(name, returnType, samplerType, uvType, arg3, arg4);
     i.function.memberType = textureType;
 #else
-    Intrinsic i(name, returnType, textureType, samplerType, uvType);
+//    Intrinsic i(name, returnType, textureType, samplerType, uvType);
+//    i.argument[0].type.formatType = GetScalarType(returnType);
 #endif
     
-    i.argument[0].type.formatType = GetScalarType(returnType);
     AddIntrinsic(i);
 }
 
 // TODO: elim the H version once have member functions, can check the member textuer format.
-#define TEXTURE_INTRINSIC_FUNCTION(name, textureType, uvType) \
-    AddTextureIntrinsic( name, HLSLBaseType_Float4, textureType, uvType)
-
-#define TEXTURE_INTRINSIC_FUNCTION_H(name, textureType, uvType) \
-    AddTextureIntrinsic( name "H", HLSLBaseType_Half4, textureType, uvType  )
+//#define TEXTURE_INTRINSIC_FUNCTION(name, textureType, uvType) \
+//    AddTextureIntrinsic( name, HLSLBaseType_Float4, textureType, uvType) \
+//    AddTextureIntrinsic( name, HLSLBaseType_Half4, textureType, uvType )
 
 
 static const int _numberTypeRank[NumericType_Count][NumericType_Count] = 
@@ -819,9 +828,11 @@ static void AddIntrinsic(const Intrinsic& intrinsic)
     }
     
     // To avoid having growth destroy the argument chains
+    const uint32_t kMaxIntrinsics = 10000; // TODO: reduce once count is known
     if (_intrinsics.empty())
-        _intrinsics.reserve(10000);
-        
+        _intrinsics.reserve(kMaxIntrinsics);
+    ASSERT(_intrinsics.size() < kMaxIntrinsics);
+    
     _intrinsics.push_back(intrinsic);
     _intrinsics.back().function.name = name;
     
@@ -1181,7 +1192,6 @@ bool InitIntrinsics()
     // How to designate atomics?  These have atomic_u/int type in MSL.
     // Metal just uses atomic<int>, atomic<uint>, ...
 
-    #include <metal_atomic>
     // memory_order_relaxed is only value to pass
     atomic_fetch_add_explicit(output, val, memory_order_relaxed);
     atomic_fetch_sub_explicit(output, val, memory_order_relaxed);
@@ -1240,76 +1250,62 @@ bool InitIntrinsics()
     
     //------------------------
     
-    TEXTURE_INTRINSIC_FUNCTION("Sample", HLSLBaseType_Texture2D,  HLSLBaseType_Float2);
-    TEXTURE_INTRINSIC_FUNCTION("Sample", HLSLBaseType_Texture3D, HLSLBaseType_Float3);
-    TEXTURE_INTRINSIC_FUNCTION("Sample", HLSLBaseType_Texture2DArray, HLSLBaseType_Float3);
-    TEXTURE_INTRINSIC_FUNCTION("Sample", HLSLBaseType_TextureCube, HLSLBaseType_Float3);
-    TEXTURE_INTRINSIC_FUNCTION("Sample", HLSLBaseType_TextureCubeArray, HLSLBaseType_Float4);
+    // TODO: need optional offset
     
-    // Depth
-    AddDepthIntrinsic("Sample", HLSLBaseType_Float, HLSLBaseType_Depth2D, HLSLBaseType_Float2);
-    AddDepthIntrinsic("Sample", HLSLBaseType_Float, HLSLBaseType_Depth2DArray,  HLSLBaseType_Float3);
-    AddDepthIntrinsic("Sample", HLSLBaseType_Float, HLSLBaseType_DepthCube,  HLSLBaseType_Float3);
+    // Cannot use Sample with 2DMS/Array
+    AddTextureIntrinsics("Sample", HLSLBaseType_Texture2D, HLSLBaseType_Float2); // Int2 offset
+    AddTextureIntrinsics("Sample", HLSLBaseType_Texture3D, HLSLBaseType_Float3); // Int3 offset
+    AddTextureIntrinsics("Sample", HLSLBaseType_Texture2DArray, HLSLBaseType_Float3); // Int2 offset
     
-    TEXTURE_INTRINSIC_FUNCTION_H("Sample", HLSLBaseType_Texture2D,  HLSLBaseType_Float2);
-    TEXTURE_INTRINSIC_FUNCTION_H("Sample", HLSLBaseType_Texture3D, HLSLBaseType_Float3);
-    TEXTURE_INTRINSIC_FUNCTION_H("Sample", HLSLBaseType_Texture2DArray, HLSLBaseType_Float3);
-    TEXTURE_INTRINSIC_FUNCTION_H("Sample", HLSLBaseType_TextureCube, HLSLBaseType_Float3);
-    TEXTURE_INTRINSIC_FUNCTION_H("Sample", HLSLBaseType_TextureCubeArray, HLSLBaseType_Float4);
+    // these don't have offset
+    AddTextureIntrinsics("Sample", HLSLBaseType_TextureCube, HLSLBaseType_Float3);
+    AddTextureIntrinsics("Sample", HLSLBaseType_TextureCubeArray, HLSLBaseType_Float4);
     
+    // Depth
+    AddDepthIntrinsic("Sample", HLSLBaseType_Float, HLSLBaseType_Depth2D, HLSLBaseType_Float2); // Int2 offset
+    AddDepthIntrinsic("Sample", HLSLBaseType_Float, HLSLBaseType_Depth2DArray,  HLSLBaseType_Float3); // Int2 offset
+    AddDepthIntrinsic("Sample", HLSLBaseType_Float, HLSLBaseType_DepthCube,  HLSLBaseType_Float3); // no offset
     
-    // xyz are used, this doesn't match HLSL which is 2 + compare
-    AddDepthIntrinsic("SampleCmp", HLSLBaseType_Float, HLSLBaseType_Depth2D, HLSLBaseType_Float4);
-    AddDepthIntrinsic("SampleCmp", HLSLBaseType_Float, HLSLBaseType_Depth2DArray, HLSLBaseType_Float4);
-    AddDepthIntrinsic("SampleCmp", HLSLBaseType_Float, HLSLBaseType_DepthCube, HLSLBaseType_Float4);
+    AddDepthIntrinsic("SampleCmp", HLSLBaseType_Float, HLSLBaseType_Depth2D, HLSLBaseType_Float2, HLSLBaseType_Float);
+    AddDepthIntrinsic("SampleCmp", HLSLBaseType_Float, HLSLBaseType_Depth2DArray, HLSLBaseType_Float3, HLSLBaseType_Float);
+    AddDepthIntrinsic("SampleCmp", HLSLBaseType_Float, HLSLBaseType_DepthCube, HLSLBaseType_Float3, HLSLBaseType_Float);
     
     // returns float4 w/comparisons, probably only on mip0
     // TODO: add GatherRed? to read 4 depth values
-    AddDepthIntrinsic("GatherCmp", HLSLBaseType_Float4, HLSLBaseType_Depth2D, HLSLBaseType_Float4);
-    AddDepthIntrinsic("GatherCmp", HLSLBaseType_Float4, HLSLBaseType_Depth2DArray, HLSLBaseType_Float4);
-    AddDepthIntrinsic("GatherCmp", HLSLBaseType_Float4, HLSLBaseType_DepthCube, HLSLBaseType_Float4);
+    AddDepthIntrinsic("GatherCmp", HLSLBaseType_Float4, HLSLBaseType_Depth2D, HLSLBaseType_Float2, HLSLBaseType_Float);
+    AddDepthIntrinsic("GatherCmp", HLSLBaseType_Float4, HLSLBaseType_Depth2DArray, HLSLBaseType_Float3, HLSLBaseType_Float);
+    AddDepthIntrinsic("GatherCmp", HLSLBaseType_Float4, HLSLBaseType_DepthCube, HLSLBaseType_Float3, HLSLBaseType_Float);
     
     // one more dimension than Sample
-    TEXTURE_INTRINSIC_FUNCTION("SampleLevel", HLSLBaseType_Texture2D, HLSLBaseType_Float3);
-    TEXTURE_INTRINSIC_FUNCTION("SampleLevel", HLSLBaseType_Texture3D, HLSLBaseType_Float4);
-    TEXTURE_INTRINSIC_FUNCTION("SampleLevel", HLSLBaseType_Texture2DArray, HLSLBaseType_Float4);
-    TEXTURE_INTRINSIC_FUNCTION("SampleLevel", HLSLBaseType_TextureCube, HLSLBaseType_Float4);
+    AddTextureIntrinsics("SampleLevel", HLSLBaseType_Texture2D, HLSLBaseType_Float2, HLSLBaseType_Float);
+    AddTextureIntrinsics("SampleLevel", HLSLBaseType_Texture3D, HLSLBaseType_Float3, HLSLBaseType_Float);
+    AddTextureIntrinsics("SampleLevel", HLSLBaseType_Texture2DArray, HLSLBaseType_Float3, HLSLBaseType_Float);
+    AddTextureIntrinsics("SampleLevel", HLSLBaseType_TextureCube, HLSLBaseType_Float3, HLSLBaseType_Float);
     // TEXTURE_INTRINSIC_FUNCTION("SampleLevel", HLSLBaseType_TextureCubeArray, HLSLBaseType_Float4, Float);
     
-    TEXTURE_INTRINSIC_FUNCTION_H("SampleLevel", HLSLBaseType_Texture2D, HLSLBaseType_Float3);
-    TEXTURE_INTRINSIC_FUNCTION_H("SampleLevel", HLSLBaseType_Texture3D, HLSLBaseType_Float4);
-    TEXTURE_INTRINSIC_FUNCTION_H("SampleLevel", HLSLBaseType_Texture2DArray, HLSLBaseType_Float4);
-    TEXTURE_INTRINSIC_FUNCTION_H("SampleLevel", HLSLBaseType_TextureCube, HLSLBaseType_Float4);
-   
-    
     // bias always in w
-    TEXTURE_INTRINSIC_FUNCTION("SampleBias", HLSLBaseType_Texture2D, HLSLBaseType_Float4);
-    TEXTURE_INTRINSIC_FUNCTION("SampleBias", HLSLBaseType_Texture3D, HLSLBaseType_Float4);
-    TEXTURE_INTRINSIC_FUNCTION("SampleBias", HLSLBaseType_Texture2DArray, HLSLBaseType_Float4);
-    TEXTURE_INTRINSIC_FUNCTION("SampleBias", HLSLBaseType_TextureCube, HLSLBaseType_Float4);
-    // TEXTURE_INTRINSIC_FUNCTION("SampleBias", HLSLBaseType_TextureCubeArray, HLSLBaseType_Float4, Float);
+    AddTextureIntrinsics("SampleBias", HLSLBaseType_Texture2D, HLSLBaseType_Float2, HLSLBaseType_Float);
+    AddTextureIntrinsics("SampleBias", HLSLBaseType_Texture3D, HLSLBaseType_Float3, HLSLBaseType_Float);
+    AddTextureIntrinsics("SampleBias", HLSLBaseType_Texture2DArray, HLSLBaseType_Float3, HLSLBaseType_Float);
     
-    TEXTURE_INTRINSIC_FUNCTION_H("SampleBias", HLSLBaseType_Texture2D, HLSLBaseType_Float4);
-    TEXTURE_INTRINSIC_FUNCTION_H("SampleBias", HLSLBaseType_Texture3D, HLSLBaseType_Float4);
-    TEXTURE_INTRINSIC_FUNCTION_H("SampleBias", HLSLBaseType_Texture2DArray, HLSLBaseType_Float4);
-    TEXTURE_INTRINSIC_FUNCTION_H("SampleBias", HLSLBaseType_TextureCube, HLSLBaseType_Float4);
     
+    // no offset on cube/cubearray
+    AddTextureIntrinsics("SampleBias", HLSLBaseType_TextureCube, HLSLBaseType_Float3, HLSLBaseType_Float);
+    // AddTextureIntrinsics("SampleBias", HLSLBaseType_TextureCubeArray, HLSLBaseType_Float4, Float);
     
+
     // TODO: for 2D tex (int2 offset is optional, how to indicate that?)
-    TEXTURE_INTRINSIC_FUNCTION("GatherRed", HLSLBaseType_Texture2D,  HLSLBaseType_Float2);
-    TEXTURE_INTRINSIC_FUNCTION("GatherGreen", HLSLBaseType_Texture2D,  HLSLBaseType_Float2);
-    TEXTURE_INTRINSIC_FUNCTION("GatherBlue", HLSLBaseType_Texture2D,  HLSLBaseType_Float2);
-    TEXTURE_INTRINSIC_FUNCTION("GatherAlpha", HLSLBaseType_Texture2D,  HLSLBaseType_Float2);
+    // arguments have defaultValue that can be set.
     
-    TEXTURE_INTRINSIC_FUNCTION_H("GatherRed", HLSLBaseType_Texture2D,  HLSLBaseType_Float2);
-    TEXTURE_INTRINSIC_FUNCTION_H("GatherGreen", HLSLBaseType_Texture2D,  HLSLBaseType_Float2);
-    TEXTURE_INTRINSIC_FUNCTION_H("GatherBlue", HLSLBaseType_Texture2D,  HLSLBaseType_Float2);
-    TEXTURE_INTRINSIC_FUNCTION_H("GatherAlpha", HLSLBaseType_Texture2D,  HLSLBaseType_Float2);
+    AddTextureIntrinsics("GatherRed", HLSLBaseType_Texture2D,  HLSLBaseType_Float2);
+    AddTextureIntrinsics("GatherGreen", HLSLBaseType_Texture2D,  HLSLBaseType_Float2);
+    AddTextureIntrinsics("GatherBlue", HLSLBaseType_Texture2D,  HLSLBaseType_Float2);
+    AddTextureIntrinsics("GatherAlpha", HLSLBaseType_Texture2D,  HLSLBaseType_Float2);
     
     // TODO: add more types cube/3d takes gradient3d in MSL
     // The Intrinsic ctor would need to have 5 args instead 4
     // first move to member functions, then add this with 4 args
-    // AddTextureIntrinsic( "SampleGrad", HLSLBaseType_Float4, HLSLBaseType_Texture2D, HLSLBaseType_Float, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2);
+    // AddTextureIntrinsics( "SampleGrad", HLSLBaseType_Texture2D, HLSLBaseType_Float, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2);
     
     // These constructs are not declaring the lod or offset param which have default
     AddTextureLoadIntrinsic("Load", HLSLBaseType_Float4, HLSLBaseType_Texture2D, HLSLBaseType_Int2); // TODO: needs lod
@@ -1320,12 +1316,12 @@ bool InitIntrinsics()
     AddTextureLoadIntrinsic("Load", HLSLBaseType_Float4, HLSLBaseType_Texture2DMS, HLSLBaseType_Int2); // TODO: needs sampleIndex
     
     // TODO: aren't these uint instead of int?
-    TEXTURE_INTRINSIC_FUNCTION("GetDimensions", HLSLBaseType_Texture2D, HLSLBaseType_Int2);
-    TEXTURE_INTRINSIC_FUNCTION("GetDimensions", HLSLBaseType_Texture3D, HLSLBaseType_Int3);
-    TEXTURE_INTRINSIC_FUNCTION("GetDimensions", HLSLBaseType_Texture2DArray, HLSLBaseType_Int3);
-    TEXTURE_INTRINSIC_FUNCTION("GetDimensions", HLSLBaseType_TextureCube, HLSLBaseType_Int3);
-    TEXTURE_INTRINSIC_FUNCTION("GetDimensions", HLSLBaseType_TextureCubeArray, HLSLBaseType_Int3);
-    TEXTURE_INTRINSIC_FUNCTION("GetDimensions", HLSLBaseType_Texture2DMS, HLSLBaseType_Int2);
+    AddTextureIntrinsics("GetDimensions", HLSLBaseType_Texture2D, HLSLBaseType_Int2);
+    AddTextureIntrinsics("GetDimensions", HLSLBaseType_Texture3D, HLSLBaseType_Int3);
+    AddTextureIntrinsics("GetDimensions", HLSLBaseType_Texture2DArray, HLSLBaseType_Int3);
+    AddTextureIntrinsics("GetDimensions", HLSLBaseType_TextureCube, HLSLBaseType_Int3);
+    AddTextureIntrinsics("GetDimensions", HLSLBaseType_TextureCubeArray, HLSLBaseType_Int3);
+    AddTextureIntrinsics("GetDimensions", HLSLBaseType_Texture2DMS, HLSLBaseType_Int2);
     
     return true;
 };
@@ -3256,40 +3252,85 @@ bool HLSLParser::ParseTerminalExpression(HLSLExpression*& expression, bool& need
         // Member access operator.
         while (Accept('.'))
         {
-            // member function
+            // member or member function
             const char* text = NULL;
             if (!ExpectIdentifier(text))
             {
                 return false;
             }
             
-            /*
-            const HLSLMemberFuction* memberFunction = FindMemberFunction(text);
-            if (function != NULL)
+            //const HLSLMemberFuction* memberFunction = FindMemberFunction(text);
+            //if (function != NULL)
             {
                 // check parent type, and args to see if it's a match
-
-                 HLSLMemberFunction* memberFunction = m_tree->AddNode<HLSLMemberFunction>(fileName, line);
-                    memberFunction->object = exprenssion;
-                    memberAccess->memberFunction = memberFunction;
-            
-           } */
-            
-            // member variable
-            HLSLMemberAccess* memberAccess = m_tree->AddNode<HLSLMemberAccess>(fileName, line);
-            memberAccess->object = expression;
-            memberAccess->field = text;
-            
-            if (!GetMemberType(expression->expressionType, memberAccess))
-            {
-                m_tokenizer.Error("Couldn't access '%s'", memberAccess->field);
                 
-                // this leaks memberAccess allocated above, but
-                // all allocated from single allocator, so just free/reset that
-                return false;
-            }
-            expression = memberAccess;
-            done = false;
+                // copied from intrinsic lookup at end
+                if (Accept('('))
+                {
+                    HLSLMemberFunctionCall* functionCall = m_tree->AddNode<HLSLMemberFunctionCall>(fileName, line);
+                      
+                    done = false;
+                    
+                    // parse the args
+                    if (!ParseExpressionList(')', false, functionCall->argument, functionCall->numArguments))
+                    {
+                        return false;
+                    }
+                    
+                    if (expression->nodeType != HLSLNodeType_IdentifierExpression)
+                    {
+                        m_tokenizer.Error("Expected function identifier");
+                        return false;
+                    }
+                    
+                    // This is "tex" of tex.Sample(...)
+                    const HLSLIdentifierExpression* identifierExpression = static_cast<const HLSLIdentifierExpression*>(expression);
+                    
+                    // TODO: what if it's a chain of member functions?
+                    functionCall->memberIdentifier = identifierExpression;
+                    
+                    // TODO: lookup texture, buffer, struct for identiferExpression
+                    // TODO: prob need formatType to match half/float return type.
+                    
+                    // TODO: could lookup only float memberFunctions if spirv
+                    // which can't handle fp16 samplers.
+                    
+                    // This is matching to a member function (mostly intrinsics)
+                    const HLSLFunction* function = MatchFunctionCall( functionCall, text, &identifierExpression->expressionType );
+                    if (function == NULL)
+                    {
+                        return false;
+                    }
+                    
+                    functionCall->function = function;
+                    functionCall->expressionType = function->returnType;
+                    
+                    // or is it the identiferExpression?
+                    expression = functionCall;
+                    
+                    // for now don't allow chained member functions
+                    return true;
+                }
+                
+           }
+           //else
+           {
+               // member variable
+               HLSLMemberAccess* memberAccess = m_tree->AddNode<HLSLMemberAccess>(fileName, line);
+               memberAccess->object = expression;
+               memberAccess->field = text;
+               
+               if (!GetMemberType(expression->expressionType, memberAccess))
+               {
+                   m_tokenizer.Error("Couldn't access '%s'", memberAccess->field);
+                   
+                   // this leaks memberAccess allocated above, but
+                   // all allocated from single allocator, so just free/reset that
+                   return false;
+               }
+               expression = memberAccess;
+               done = false;
+           }
         }
 
         // Handle array access.
@@ -4109,9 +4150,10 @@ bool HLSLParser::ParseStage(HLSLStatement*& statement)
 
 
-bool HLSLParser::Parse(HLSLTree* tree)
+bool HLSLParser::Parse(HLSLTree* tree, const HLSLParserOptions& options)
 {
     m_tree = tree;
+    m_options = options;
     
     HLSLRoot* root = m_tree->GetRoot();
     HLSLStatement* lastStatement = NULL;
@@ -4254,6 +4296,34 @@ bool HLSLParser::AcceptType(bool allowVoid, HLSLType& type/*, bool acceptFlags*/
     case HLSLToken_Float4x4:
         type.baseType = HLSLBaseType_Float4x4;
         break;
+          
+    // The parser is remapping the type here
+    case HLSLToken_Halfio:
+        type.baseType = m_options.isHalfio ? HLSLBaseType_Half : HLSLBaseType_Float;
+        break;
+    case HLSLToken_Half2io:
+        type.baseType = m_options.isHalfio ? HLSLBaseType_Half2 : HLSLBaseType_Float2;
+        break;
+    case HLSLToken_Half3io:
+        type.baseType = m_options.isHalfio ? HLSLBaseType_Half3 : HLSLBaseType_Float3;
+        break;
+    case HLSLToken_Half4io:
+        type.baseType = m_options.isHalfio ? HLSLBaseType_Half4 : HLSLBaseType_Float4;
+        break;
+            
+    // The parser is remapping the type here
+    case HLSLToken_Halfst:
+        type.baseType = m_options.isHalfst ? HLSLBaseType_Half : HLSLBaseType_Float;
+        break;
+    case HLSLToken_Half2st:
+        type.baseType = m_options.isHalfst ? HLSLBaseType_Half2 : HLSLBaseType_Float2;
+        break;
+    case HLSLToken_Half3st:
+        type.baseType = m_options.isHalfst ? HLSLBaseType_Half3 : HLSLBaseType_Float3;
+        break;
+    case HLSLToken_Half4st:
+        type.baseType = m_options.isHalfst ? HLSLBaseType_Half4 : HLSLBaseType_Float4;
+        break;
             
     case HLSLToken_Half:
         type.baseType = HLSLBaseType_Half;
@@ -4580,6 +4650,7 @@ const HLSLType* HLSLParser::FindVariable(const char* name, bool& global) const
     return NULL;
 }
 
+// This only search user-defined c-style functions.  Intrinsics are not in this.
 const HLSLFunction* HLSLParser::FindFunction(const char* name) const
 {
     for (int i = 0; i < m_functions.GetSize(); ++i)
@@ -4659,7 +4730,7 @@ bool HLSLParser::GetIsFunction(const char* name) const
     return it != _intrinsicRangeMap.end();
 }
 
-const HLSLFunction* HLSLParser::MatchFunctionCall(const HLSLFunctionCall* functionCall, const char* name)
+const HLSLFunction* HLSLParser::MatchFunctionCall(const HLSLFunctionCall* functionCall, const char* name, const HLSLType* memberType)
 {
     const HLSLFunction* matchedFunction     = NULL;
 
@@ -4667,7 +4738,8 @@ const HLSLFunction* HLSLParser::MatchFunctionCall(const HLSLFunctionCall* functi
     int  numMatchedOverloads    = 0;
     bool nameMatches            = false;
 
-    // Get the user defined functions with the specified name.
+    // Get the user defined c functions with the specified name.
+    // There may be more than one, and these are not ordered.
     for (int i = 0; i < m_functions.GetSize(); ++i)
     {
         const HLSLFunction* function = m_functions[i];
@@ -4675,6 +4747,26 @@ const HLSLFunction* HLSLParser::MatchFunctionCall(const HLSLFunctionCall* functi
         {
             nameMatches = true;
             
+            // if caller requests member function, then memberType must match
+            bool isMemberFunc = function->IsMemberFunction();
+           
+            if (memberType)
+            {
+                if (!isMemberFunc)
+                    continue;
+                
+                if (memberType->baseType != function->memberType)
+                    continue;
+                
+                if (memberType->formatType != GetScalarType(function->returnType.baseType))
+                    continue;
+            }
+            else
+            {
+                if (isMemberFunc)
+                    continue;
+            }
+            
             CompareFunctionsResult result = CompareFunctions( m_tree, functionCall, function, matchedFunction );
             if (result == Function1Better)
             {
@@ -4698,6 +4790,24 @@ const HLSLFunction* HLSLParser::MatchFunctionCall(const HLSLFunctionCall* functi
             uint32_t idx = range.start + i;
             const HLSLFunction* function = &_intrinsics[idx].function;
             
+            // if caller requests member function, then memberType must match
+            bool isMemberFunc = function->IsMemberFunction();
+            if (memberType)
+            {
+                if (!isMemberFunc)
+                    break;
+            
+                if (memberType->baseType != function->memberType)
+                    continue;
+                
+                if (memberType->formatType != GetScalarType(function->returnType.baseType))
+                    continue;
+            }
+            else
+            {
+                if (isMemberFunc)
+                    break;
+            }
             ASSERT(String_Equal(function->name, name));
                    
             nameMatches = true;
diff --git a/hlslparser/src/HLSLParser.h b/hlslparser/src/HLSLParser.h
index 7b007a9d..1a09da60 100644
--- a/hlslparser/src/HLSLParser.h
+++ b/hlslparser/src/HLSLParser.h
@@ -19,6 +19,14 @@ namespace M4
 
 struct EffectState;
 
+// This wouldn't be needed if could preprocess prior to calling parser.
+struct HLSLParserOptions
+{
+    bool isHalfst = false;
+    
+    bool isHalfio = false;
+};
+
 class HLSLParser
 {
 
@@ -27,7 +35,7 @@ class HLSLParser
     HLSLParser(Allocator* allocator, const char* fileName, const char* buffer, size_t length);
     void SetKeepComments(bool enable) { m_tokenizer.SetKeepComments(enable); }
     
-    bool Parse(HLSLTree* tree);
+    bool Parse(HLSLTree* tree, const HLSLParserOptions& options = HLSLParserOptions());
 
 private:
 
@@ -103,7 +111,7 @@ class HLSLParser
 
     void DeclareVariable(const char* name, const HLSLType& type);
 
-    /** Returned pointer is only valid until Declare or Begin/EndScope is called. */
+    /// Returned pointer is only valid until Declare or Begin/EndScope is called. 
     const HLSLType* FindVariable(const char* name, bool& global) const;
 
     const HLSLFunction* FindFunction(const char* name) const;
@@ -111,10 +119,11 @@ class HLSLParser
 
     bool GetIsFunction(const char* name) const;
     
-    /** Finds the overloaded function that matches the specified call. */
-    const HLSLFunction* MatchFunctionCall(const HLSLFunctionCall* functionCall, const char* name);
+    /// Finds the overloaded function that matches the specified call.
+    /// Pass memberType to match member functions.
+    const HLSLFunction* MatchFunctionCall(const HLSLFunctionCall* functionCall, const char* name, const HLSLType* memberType = NULL);
 
-    /** Gets the type of the named field on the specified object type (fieldName can also specify a swizzle. ) */
+    /// Gets the type of the named field on the specified object type (fieldName can also specify a swizzle. )
     bool GetMemberType(const HLSLType& objectType, HLSLMemberAccess * memberAccess);
 
     bool CheckTypeCast(const HLSLType& srcType, const HLSLType& dstType);
@@ -140,6 +149,8 @@ class HLSLParser
     
     bool                    m_allowUndeclaredIdentifiers = false;
     bool                    m_disableSemanticValidation = false;
+    
+    HLSLParserOptions       m_options;
 };
 
 enum NumericType
diff --git a/hlslparser/src/HLSLTokenizer.cpp b/hlslparser/src/HLSLTokenizer.cpp
index eaf7e63a..965fb6ea 100644
--- a/hlslparser/src/HLSLTokenizer.cpp
+++ b/hlslparser/src/HLSLTokenizer.cpp
@@ -12,124 +12,138 @@ namespace M4
 {
 // The order here must match the order in the Token enum.
 static const char* _reservedWords[] =
-    {
-        "float",
-        "float2",
-        "float3",
-        "float4",
-		"float2x2",
-        "float3x3",
-        "float4x4",
-        
-        "half",
-        "half2",
-        "half3",
-        "half4",
-		"half2x2",
-        "half3x3",
-        "half4x4",
-        
-        "double",
-        "double2",
-        "double3",
-        "double4",
-        "double2x2",
-        "double3x3",
-        "double4x4",
-        
-        "bool",
-		"bool2",
-		"bool3",
-		"bool4",
-        
-        "int",
-        "int2",
-        "int3",
-        "int4",
-        
-        "uint",
-        "uint2",
-        "uint3",
-        "uint4",
-        
-        "short", 
-        "short2",
-        "short3",
-        "short4",
-        
-        "ushort",
-        "ushort2",
-        "ushort3",
-        "ushort4",
-        
-        "long",
-        "long2",
-        "long3",
-        "long4",
-        
-        "ulong",
-        "ulong2",
-        "ulong3",
-        "ulong4",
-        
-        // TODO: u/char
-        
-        "Texture2D",
-        "Texture3D",
-        "TextureCube",
-        "Texture2DArray",
-        "TextureCubeArray",
-        "Texture2DMS",
-        
-        "Depth2D",
-        "Depth2DArray", // cascades
-        "DepthCube",
-        
-        "RWTexture2D",
-        
-        "SamplerState",
-        "SamplerComparisonState",
-
-        "if",
-        "else",
-        "for",
-        "while",
-        "break",
-        "true",
-        "false",
-        "void",
-        "struct",
-        
-        // DX9 buffer types (tons of globals)
-        "cbuffer",
-        "tbuffer",
-        
-        // DX10 buffer templated types
-        "ConstantBuffer", // indexable cbuffer
-        "StructuredBuffer",
-        "RWStructuredBuffer",
-        "ByteAddressBuffer",
-        "RWByteAddressBuffer",
-        
-        "register",
-        "return",
-        "continue",
-        "discard",
-        
-        "const",
-        "static",
-        "inline",
-        
-        "uniform",
-        "in",
-        "out",
-        "inout",
-        
-        // these are from fx file
-        "sampler_state",
-        "technique",
-        "pass",
-    };
+{
+    "float",
+    "float2",
+    "float3",
+    "float4",
+    "float2x2",
+    "float3x3",
+    "float4x4",
+    
+    // for Nvidia/Adreno
+    "halfio",
+    "half2io",
+    "half3io",
+    "half4io",
+    
+    // for Android
+    "halfst",
+    "half2st",
+    "half3st",
+    "half4st",
+    
+    "half",
+    "half2",
+    "half3",
+    "half4",
+    "half2x2",
+    "half3x3",
+    "half4x4",
+    
+    "double",
+    "double2",
+    "double3",
+    "double4",
+    "double2x2",
+    "double3x3",
+    "double4x4",
+    
+    "bool",
+    "bool2",
+    "bool3",
+    "bool4",
+    
+    "int",
+    "int2",
+    "int3",
+    "int4",
+    
+    "uint",
+    "uint2",
+    "uint3",
+    "uint4",
+    
+    "short",
+    "short2",
+    "short3",
+    "short4",
+    
+    "ushort",
+    "ushort2",
+    "ushort3",
+    "ushort4",
+    
+    "long",
+    "long2",
+    "long3",
+    "long4",
+    
+    "ulong",
+    "ulong2",
+    "ulong3",
+    "ulong4",
+    
+    // TODO: u/char
+    
+    "Texture2D",
+    "Texture3D",
+    "TextureCube",
+    "Texture2DArray",
+    "TextureCubeArray",
+    "Texture2DMS",
+    
+    "Depth2D",
+    "Depth2DArray", // cascades
+    "DepthCube",
+    
+    "RWTexture2D",
+    
+    "SamplerState",
+    "SamplerComparisonState",
+
+    "if",
+    "else",
+    "for",
+    "while",
+    "break",
+    "true",
+    "false",
+    "void",
+    "struct",
+    
+    // DX9 buffer types (tons of globals)
+    "cbuffer",
+    "tbuffer",
+    
+    // DX10 buffer templated types
+    "ConstantBuffer", // indexable cbuffer
+    "StructuredBuffer",
+    "RWStructuredBuffer",
+    "ByteAddressBuffer",
+    "RWByteAddressBuffer",
+    
+    "register",
+    "return",
+    "continue",
+    "discard",
+    
+    "const",
+    "static",
+    "inline",
+
+    "uniform",
+    "in",
+    "out",
+    "inout",
+    
+    "#include",
+    
+    // these are from fx file
+    //"sampler_state",
+    //"technique",
+    //"pass",
+};
 
 static bool GetIsSymbol(char c)
 {
@@ -174,7 +188,7 @@ HLSLTokenizer::HLSLTokenizer(const char* fileName, const char* buffer, size_t le
 
 void HLSLTokenizer::Next()
 {
-	while( SkipWhitespace() || SkipComment() || ScanLineDirective() || SkipPragmaDirective() )
+	while(SkipWhitespace() || SkipComment() || ScanLineDirective() || SkipPragmaDirective() || SkipInclude())
     {
     }
 
@@ -342,6 +356,30 @@ void HLSLTokenizer::Next()
 
 }
 
+bool HLSLTokenizer::SkipInclude()
+{
+    bool result = false;
+    
+    static const char* keyword = "#include";
+    static uint32_t keywordLen = (uint32_t)strlen(keyword);
+    
+    if( strncmp( m_buffer, keyword, keywordLen ) == 0 && isspace( m_buffer[ keywordLen ] ) )
+    {
+        m_buffer += keywordLen;
+        result = true;
+        while( m_buffer < m_bufferEnd )
+        {
+            if( *( m_buffer++ ) == '\n' )
+            {
+                ++m_lineNumber;
+                break;
+            }
+        }
+    }
+    return result;
+}
+
+    
 bool HLSLTokenizer::SkipWhitespace()
 {
     bool result = false;
@@ -405,26 +443,24 @@ bool HLSLTokenizer::SkipComment()
 bool HLSLTokenizer::SkipPragmaDirective()
 {
 	bool result = false;
-	if( m_bufferEnd - m_buffer > 7 && *m_buffer == '#' )
-	{
-		const char* ptr = m_buffer + 1;
-		while( isspace( *ptr ) )
-			ptr++;
-
-		if( strncmp( ptr, "pragma", 6 ) == 0 && isspace( ptr[ 6 ] ) )
-		{
-			m_buffer = ptr + 6;
-			result = true;
-			while( m_buffer < m_bufferEnd )
-			{
-				if( *( m_buffer++ ) == '\n' )
-				{
-					++m_lineNumber;
-					break;
-				}
-			}
-		}
-	}
+	
+    static const char* keyword = "#include";
+    static uint32_t keywordLen = (uint32_t)strlen(keyword);
+
+    if( strncmp( m_buffer, keyword, keywordLen ) == 0 && isspace( m_buffer[ keywordLen ] ) )
+    {
+        m_buffer += keywordLen;
+        result = true;
+        while( m_buffer < m_bufferEnd )
+        {
+            if( *( m_buffer++ ) == '\n' )
+            {
+                ++m_lineNumber;
+                break;
+            }
+        }
+    }
+
 	return result;
 }
 
@@ -494,11 +530,12 @@ bool HLSLTokenizer::ScanNumber()
 
 bool HLSLTokenizer::ScanLineDirective()
 {
+    static const char* keyword = "#line";
+    static uint32_t keywordLen = (uint32_t)strlen(keyword);
     
-    if (m_bufferEnd - m_buffer > 5 && strncmp(m_buffer, "#line", 5) == 0 && isspace(m_buffer[5]))
+    if (strncmp(m_buffer, keyword, keywordLen) == 0 && isspace(m_buffer[keywordLen]))
     {
-
-        m_buffer += 5;
+        m_buffer += keywordLen;
         
         while (m_buffer < m_bufferEnd && isspace(m_buffer[0]))
         {
@@ -646,18 +683,17 @@ void HLSLTokenizer::Error(const char* format, ...)
     }
     m_error = true;
     
-    char buffer[4096]; // TODO: use dynamic string, sucks to cutoff logs
     va_list args;
     va_start(args, format);
-    /*int result =*/ vsnprintf(buffer, sizeof(buffer) - 1, format, args);
+    Log_ErrorArgList(format, args, m_fileName, m_lineNumber);
     va_end(args);
 
     // can log error/warning/info messages
-    bool isError = true;
+    //bool isError = true;
 
     // Gcc/lcang convention (must be absolute filename for clickthrough)
     // Visual Stuidio can pick up on this formatting too
-    Log_Error("%s:%d: %s: %s\n", m_fileName, m_lineNumber, isError ? "error" : "warning", buffer);
+    //Log_Error("%s:%d: %s: %s\n", m_fileName, m_lineNumber, isError ? "error" : "warning", buffer);
 } 
 
 void HLSLTokenizer::GetTokenName(char buffer[s_maxIdentifier]) const
@@ -685,6 +721,7 @@ void HLSLTokenizer::GetTokenName(char buffer[s_maxIdentifier]) const
 
 void HLSLTokenizer::GetTokenName(int token, char buffer[s_maxIdentifier])
 {
+    // ascii
     if (token < 256)
     {
         buffer[0] = (char)token;
@@ -704,6 +741,7 @@ void HLSLTokenizer::GetTokenName(int token, char buffer[s_maxIdentifier])
         case HLSLToken_MinusMinus:
             strcpy(buffer, "--");
             break;
+                
         case HLSLToken_PlusEqual:
             strcpy(buffer, "+=");
             break;
@@ -716,7 +754,28 @@ void HLSLTokenizer::GetTokenName(int token, char buffer[s_maxIdentifier])
         case HLSLToken_DivideEqual:
             strcpy(buffer, "/=");
             break;
-        
+                
+        // DONE: Missing several token types
+        case HLSLToken_LessEqual:
+            strcpy(buffer, "<=");
+            break;
+        case HLSLToken_GreaterEqual:
+            strcpy(buffer, ">=");
+            break;
+        case HLSLToken_EqualEqual:
+            strcpy(buffer, "==");
+            break;
+        case HLSLToken_NotEqual:
+            strcpy(buffer, "!=");
+            break;
+
+        case HLSLToken_LogicalAnd:
+            strcpy(buffer, "&&");
+            break;
+        case HLSLToken_LogicalOr:
+            strcpy(buffer, "||");
+            break;
+                
         // literals
 		case HLSLToken_HalfLiteral:
 			strcpy( buffer, "half" );
diff --git a/hlslparser/src/HLSLTokenizer.h b/hlslparser/src/HLSLTokenizer.h
index bb3babe1..166309a4 100644
--- a/hlslparser/src/HLSLTokenizer.h
+++ b/hlslparser/src/HLSLTokenizer.h
@@ -20,6 +20,18 @@ enum HLSLToken
     HLSLToken_Float3x3,
     HLSLToken_Float4x4,
     
+    // for Nvidia/Adreno
+    HLSLToken_Halfio,
+    HLSLToken_Half2io,
+    HLSLToken_Half3io,
+    HLSLToken_Half4io,
+    
+    // for Android w/o fp16 storage
+    HLSLToken_Halfst,
+    HLSLToken_Half2st,
+    HLSLToken_Half3st,
+    HLSLToken_Half4st,
+    
     HLSLToken_Half,
     HLSLToken_Half2,
     HLSLToken_Half3,
@@ -130,10 +142,15 @@ enum HLSLToken
     HLSLToken_InOut,
 
     // Effect keywords.
-    HLSLToken_SamplerStateBlock, 
-    HLSLToken_Technique,
-    HLSLToken_Pass,
+    //HLSLToken_SamplerStateBlock,
+    //HLSLToken_Technique,
+    //HLSLToken_Pass,
 
+    // These all start with #
+    HLSLToken_Include,
+    // HLSLToken_Pragma
+    // HLSLToken_Line
+    
     //===================
     // End of strings that have to match in _reservedWords in .cpp
     
@@ -214,6 +231,8 @@ class HLSLTokenizer
     bool SkipWhitespace();
     bool SkipComment();
 	bool SkipPragmaDirective();
+    bool SkipInclude();
+    
     bool ScanNumber();
     bool ScanLineDirective();
 
diff --git a/hlslparser/src/HLSLTree.cpp b/hlslparser/src/HLSLTree.cpp
index 8a242396..6a0392aa 100644
--- a/hlslparser/src/HLSLTree.cpp
+++ b/hlslparser/src/HLSLTree.cpp
@@ -871,6 +871,11 @@ void HLSLTreeVisitor::VisitExpression(HLSLExpression * node)
     else if (node->nodeType == HLSLNodeType_FunctionCall) {
         VisitFunctionCall((HLSLFunctionCall *)node);
     }
+    else if (node->nodeType == HLSLNodeType_MemberFunctionCall) {
+        HLSLMemberFunctionCall* memberFunctionCall = (HLSLMemberFunctionCall *)node;
+        VisitIdentifierExpression((HLSLIdentifierExpression*)memberFunctionCall->memberIdentifier); // const_cast
+        VisitFunctionCall(memberFunctionCall);
+    }
     // Acoget-TODO: This was missing. Did adding it break anything?
 //    else if (node->nodeType == HLSLNodeType_SamplerState) {
 //        VisitSamplerState((HLSLSamplerState *)node);
diff --git a/hlslparser/src/HLSLTree.h b/hlslparser/src/HLSLTree.h
index 4dc61b21..3ad0bd63 100644
--- a/hlslparser/src/HLSLTree.h
+++ b/hlslparser/src/HLSLTree.h
@@ -55,6 +55,7 @@ enum HLSLNodeType
     HLSLNodeType_MemberAccess,
     HLSLNodeType_ArrayAccess,
     HLSLNodeType_FunctionCall,
+    HLSLNodeType_MemberFunctionCall,
     
     /* FX file stuff
     HLSLNodeType_StateAssignment,
@@ -482,7 +483,7 @@ struct HLSLFunction : public HLSLStatement
     static const HLSLNodeType s_type = HLSLNodeType_Function;
     const char*         name  = NULL;
     HLSLType            returnType;
-    HLSLBaseType        memberType = HLSLBaseType_Unknown;
+    HLSLBaseType        memberType = HLSLBaseType_Unknown; // for sampler members, must also look at GetScalarType(returnType)
     const char*         semantic  = NULL;
     const char*         sv_semantic = NULL;
     int                 numArguments = 0;
@@ -660,19 +661,14 @@ struct HLSLFunctionCall : public HLSLExpression
 
 // TODO: finish adding this for texture and buffer ops
 /// c++ style member.foo(arg1, arg2)
-struct HLSLMemberFunctionCall : public HLSLExpression
+struct HLSLMemberFunctionCall : public HLSLFunctionCall
 {
-    static const HLSLNodeType s_type = HLSLNodeType_FunctionCall;
+    static const HLSLNodeType s_type = HLSLNodeType_MemberFunctionCall;
     
     // could be buffer, texture, raytrace
-    HLSLBaseType        memberType = HLSLBaseType_Unknown; // may need type for typeName?
-    
-    const HLSLFunction* function = NULL;
-    HLSLExpression*     argument = NULL;
-    int                 numArguments = 0;
+    const HLSLIdentifierExpression* memberIdentifier = NULL;
 };
 
-
 #if 1
 /*
 // These are all FX file constructs
diff --git a/hlslparser/src/MSLGenerator.cpp b/hlslparser/src/MSLGenerator.cpp
index 8907d810..9d1dc56d 100644
--- a/hlslparser/src/MSLGenerator.cpp
+++ b/hlslparser/src/MSLGenerator.cpp
@@ -1593,6 +1593,15 @@ void MSLGenerator::OutputExpression(HLSLExpression* expression, HLSLExpression*
         HLSLFunctionCall* functionCall = static_cast<HLSLFunctionCall*>(expression);
         OutputFunctionCall(functionCall, parentExpression);
     }
+    else if (expression->nodeType == HLSLNodeType_MemberFunctionCall)
+    {
+        HLSLMemberFunctionCall* functionCall = static_cast<HLSLMemberFunctionCall*>(expression);
+        
+        // Write out the member identifier
+        m_writer.Write("%s.", functionCall->memberIdentifier->name);
+
+        OutputFunctionCall(functionCall, parentExpression);
+    }
     else
     {
         Error("unknown expression");
@@ -1601,11 +1610,14 @@ void MSLGenerator::OutputExpression(HLSLExpression* expression, HLSLExpression*
 
 void MSLGenerator::OutputCast(const HLSLType& type)
 {
+    // Note: msl fails on float4x4 to float3x3 casting
     if (type.baseType == HLSLBaseType_Float3x3)
     {
-        // TODO: pull name from table, why is this special case?
-        // also why is this not in parens?
-        m_writer.Write("float3x3");
+        m_writer.Write("tofloat3x3");
+    }
+    else if (type.baseType == HLSLBaseType_Half3x3)
+    {
+        m_writer.Write("tohalft3x3");
     }
     else
     {
diff --git a/hlslparser/src/Main.cpp b/hlslparser/src/Main.cpp
index 0f3e5b49..6c380ae9 100644
--- a/hlslparser/src/Main.cpp
+++ b/hlslparser/src/Main.cpp
@@ -252,7 +252,13 @@ int main( int argc, char* argv[] )
         parser.SetKeepComments(true);
     }
 	HLSLTree tree( &allocator );
-	if( !parser.Parse( &tree ) )
+    
+    // TODO: tie this to CLI, MSL should set both to true
+    HLSLParserOptions parserOptions;
+    parserOptions.isHalfst = true;
+    parserOptions.isHalfio = true;
+    
+	if( !parser.Parse( &tree, parserOptions ) )
 	{
 		Log_Error( "Parsing failed\n" );
 		return 1;

From 5af4b1db381e9ef6131ef1d3a75065caf6015794 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 1 Apr 2023 00:04:42 -0700
Subject: [PATCH 509/901] kram - fix CMakeLists.txt

---
 hlslparser/CMakeLists.txt | 2 +-
 kramc/CMakeLists.txt      | 4 ++--
 libkram/CMakeLists.txt    | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/hlslparser/CMakeLists.txt b/hlslparser/CMakeLists.txt
index a965ddc6..d8dc4536 100644
--- a/hlslparser/CMakeLists.txt
+++ b/hlslparser/CMakeLists.txt
@@ -104,7 +104,7 @@ elseif (BUILD_WIN)
     target_compile_options(${myTargetApp} PRIVATE /W3 /arch:AVX /MP /GF /FC)
     
     # fix STL
-    target_compile_definitions(${myTargetApp} PRIVATE "-D_D_HAS_EXCEPTIONS=0 -D_ITERATOR_DEBUG_LEVEL=0")
+    target_compile_definitions(${myTargetApp} PRIVATE "-D_HAS_EXCEPTIONS=0 -D_ITERATOR_DEBUG_LEVEL=0")
     
     if (CMAKE_BUILD_TYPE EQUAL "Debug")
         target_compile_definitions(${myTargetLib} PRIVATE "/INCREMENTAL")
diff --git a/kramc/CMakeLists.txt b/kramc/CMakeLists.txt
index a1d1f687..4259efc5 100644
--- a/kramc/CMakeLists.txt
+++ b/kramc/CMakeLists.txt
@@ -84,7 +84,7 @@ elseif (BUILD_WIN)
 
     # When Win rebuilds library, it doesn't relink app to correct code when you
     # build the app target project.  Breakpoints stop working after any library source edit,
-    # and you have to rebuild solution to get the correct code to exectute.  Since 2014.  Try this.
+    # and you have to rebuild solution to get the correct code to execute.  Since 2014.  Try this.
     # And BUILD_ALL never launches properly.
     # https://cmake.org/pipermail/cmake/2014-October/058798.html
     SET(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR})
@@ -102,7 +102,7 @@ elseif (BUILD_WIN)
     target_compile_options(${myTargetApp} PRIVATE /W3 /arch:AVX /MP /GF /FC)
     
     # fix STL
-    target_compile_definitions(${myTargetApp} PRIVATE "-D_D_HAS_EXCEPTIONS=0 -D_ITERATOR_DEBUG_LEVEL=0")
+    target_compile_definitions(${myTargetApp} PRIVATE "-D_HAS_EXCEPTIONS=0 -D_ITERATOR_DEBUG_LEVEL=0")
     
     if (CMAKE_BUILD_TYPE EQUAL "Debug")
         target_compile_definitions(${myTargetLib} PRIVATE "/INCREMENTAL")
diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index 19529037..26f101e0 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -260,7 +260,7 @@ elseif (BUILD_WIN)
     target_compile_options(${myTargetLib} PRIVATE  /W3 /arch:AVX /MP)
     
     # fix STL
-    target_compile_definitions(${myTargetLib} PRIVATE "-D_D_HAS_EXCEPTIONS=0 -D_ITERATOR_DEBUG_LEVEL=0")
+    target_compile_definitions(${myTargetLib} PRIVATE "-D_HAS_EXCEPTIONS=0 -D_ITERATOR_DEBUG_LEVEL=0")
         
 endif()
 

From 31a92493f67c6ca9f5392bc3815d079d793657f0 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 1 Apr 2023 00:05:58 -0700
Subject: [PATCH 510/901] kram - hlslparser update

---
 hlslparser/README.md   | 11 ++++++-----
 hlslparser/build.ninja |  4 ++--
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/hlslparser/README.md b/hlslparser/README.md
index df66dfda..3ebd000d 100644
--- a/hlslparser/README.md
+++ b/hlslparser/README.md
@@ -168,13 +168,14 @@ Tesselation differs on mobile vs. desktop.  Mali does tesselation after VS (Andr
 Mali
 * TBDR
 * Vulkan gpu - Midgard, Bifrost, Valhall, Immortalis
-* Vulkan can read but not write SSBO in VS.
+* Vulkan can read but not write SSBO in VS.  VS then depends on PS output.  Use CS.
 * Sparse index buffer limits 
 * 180MB parameter buffer limit - device lost after exceeded
 * Missing VK_POLYGON_MODE_LINE (feature.fillModeNonSolid) - affects debug visuals
-* Missing firstInstance to use MDI and SBO (G52 adds this)
 * ARM licenses mobile cpu reference designs
 * ARM bought Mali gpu from Falanx Microsystems
+* Sets maxDrawIndirectCount = 1, limiting MDI utility
+* Raytracing and VRS on Immortalis
 * ETC2/ASTC, created ASTC format and encoders, no BC
 * https://en.wikipedia.org/wiki/Mali_(processor)
 * https://community.arm.com/support-forums/f/graphics-gaming-and-vr-forum/53672/vulkan-what-should-i-do-about-this-warning-bestpractices-vkcmddrawindexed-sparse-index-buffer
@@ -183,11 +184,11 @@ Mali
 Adreno
 * TBDR
 * Occlusion queries can cause a switch from TBDR to IMR
-* Half shader limits
+* Half shader limits from lack of fp16 storage/inputOuptut
 * Qualcomm bought ATI Radeon mobile gpu
 * 8K constant cache for UBO
-* SSBO bypass cache
-* lacks baseInstance (firstInstance) support
+* SSBO bypass 16K uniform cache
+* lacks baseInstance support, indirectDrawFirstInstance = false on 4x0/5x0, limiting MDI utility
 * ETC2/ASTC, no BC
 
 PowerVR
diff --git a/hlslparser/build.ninja b/hlslparser/build.ninja
index 5a101970..e025e6dd 100755
--- a/hlslparser/build.ninja
+++ b/hlslparser/build.ninja
@@ -46,7 +46,7 @@ srcDir = ${projectDir}shaders/
 dstDir = ${projectDir}out/mac/
 flagsMSL = -std=macos-metal2.4 -Werror -I ${srcDir}
 
-dependsMSL = ${srcDir}/ShaderMSL.h
+dependsMSL = ${srcDir}ShaderMSL.h
 
 # for iOS
 #dstDir = ${projectDir}out/ios/
@@ -116,7 +116,7 @@ build ${dstDir}GameShaders.metallibsym: stripMSL ${dstDir}GameShaders.metallib
 
 dxc = ${vulkanSDK}dxc
 
-dependsHLSL = ${srcDir}/ShaderHLSL.h
+dependsHLSL = ${srcDir}ShaderHLSL.h
 dstDirHLSL = ${projectDir}out/android/
 
 # turn on debug

From 578d37c1e4bb08297e652e2baa68e16fd5caed4c Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 1 Apr 2023 00:45:52 -0700
Subject: [PATCH 511/901] kram - hlslparser update for win

---
 hlslparser/src/HLSLParser.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hlslparser/src/HLSLParser.cpp b/hlslparser/src/HLSLParser.cpp
index 062ffb4b..26cf2eb5 100644
--- a/hlslparser/src/HLSLParser.cpp
+++ b/hlslparser/src/HLSLParser.cpp
@@ -14,6 +14,7 @@
 #include "HLSLTree.h"
 
 #include <ctype.h>
+#include <malloc.h> // for alloca
 #include <string.h>
 
 // stl

From 9c4f778b06abaf1f2e7d56d10f9bf5e31f8a5923 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 1 Apr 2023 01:04:18 -0700
Subject: [PATCH 512/901] kram - remove -D_HAS_EXCEPTIONS=0

Advice is all over the place, but this is supposed to disable exceptions only within STL.

See here about letting VC set this.  The code was all compiling when CMakeLists.txt was setting this flag incorrectly. But setting it gens a ton of errors and warnings across the Win code base.  Check tests for _HAS_EXCEPTIONS in fmt/core.h that suddenly doesn't compile, etc.

https://reviews.llvm.org/D52998
---
 hlslparser/CMakeLists.txt | 2 +-
 kramc/CMakeLists.txt      | 2 +-
 libkram/CMakeLists.txt    | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/hlslparser/CMakeLists.txt b/hlslparser/CMakeLists.txt
index d8dc4536..5541e98e 100644
--- a/hlslparser/CMakeLists.txt
+++ b/hlslparser/CMakeLists.txt
@@ -104,7 +104,7 @@ elseif (BUILD_WIN)
     target_compile_options(${myTargetApp} PRIVATE /W3 /arch:AVX /MP /GF /FC)
     
     # fix STL
-    target_compile_definitions(${myTargetApp} PRIVATE "-D_HAS_EXCEPTIONS=0 -D_ITERATOR_DEBUG_LEVEL=0")
+    target_compile_definitions(${myTargetApp} PRIVATE "-D_ITERATOR_DEBUG_LEVEL=0")
     
     if (CMAKE_BUILD_TYPE EQUAL "Debug")
         target_compile_definitions(${myTargetLib} PRIVATE "/INCREMENTAL")
diff --git a/kramc/CMakeLists.txt b/kramc/CMakeLists.txt
index 4259efc5..c51e860d 100644
--- a/kramc/CMakeLists.txt
+++ b/kramc/CMakeLists.txt
@@ -102,7 +102,7 @@ elseif (BUILD_WIN)
     target_compile_options(${myTargetApp} PRIVATE /W3 /arch:AVX /MP /GF /FC)
     
     # fix STL
-    target_compile_definitions(${myTargetApp} PRIVATE "-D_HAS_EXCEPTIONS=0 -D_ITERATOR_DEBUG_LEVEL=0")
+    target_compile_definitions(${myTargetApp} PRIVATE "-D_ITERATOR_DEBUG_LEVEL=0")
     
     if (CMAKE_BUILD_TYPE EQUAL "Debug")
         target_compile_definitions(${myTargetLib} PRIVATE "/INCREMENTAL")
diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index 26f101e0..af624544 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -260,7 +260,7 @@ elseif (BUILD_WIN)
     target_compile_options(${myTargetLib} PRIVATE  /W3 /arch:AVX /MP)
     
     # fix STL
-    target_compile_definitions(${myTargetLib} PRIVATE "-D_HAS_EXCEPTIONS=0 -D_ITERATOR_DEBUG_LEVEL=0")
+    target_compile_definitions(${myTargetLib} PRIVATE "-D_ITERATOR_DEBUG_LEVEL=0")
         
 endif()
 

From 36293ad0cedbb6f364daa9eb244e3b6dcc956e27 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 29 Apr 2023 10:19:08 -0700
Subject: [PATCH 513/901] kram - fix win core retrieval and remap

This is using the numa groups, but Win11 simplifies all that.  Not really using the group field.
---
 libkram/kram/TaskSystem.cpp | 52 ++++++++++++++++++++++++++++---------
 1 file changed, 40 insertions(+), 12 deletions(-)

diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index 672c07fe..54b0504e 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -35,6 +35,9 @@ enum class CoreType : uint8_t
 struct CoreNum
 {
     uint8_t index;
+#if KRAM_WIN
+    uint8_t group; // for Win only
+#endif
     CoreType type;
 };
 
@@ -127,20 +130,28 @@ static const CoreInfo& GetCoreInfo()
     // have to walk array of data, and assemble this info, ugh
     // https://docs.microsoft.com/en-us/windows/win32/api/sysinfoapi/nf-sysinfoapi-getlogicalprocessorinformation
     
+    // https://docs.microsoft.com/en-us/windows/win32/procthread/multiple-processors
+    
     DWORD logicalCoreCount = 0;
     DWORD physicalCoreCount = 0;
     bool isHyperthreaded = false;
     
     DWORD returnLength = 0;
-    PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = nullptr;
-    DWORD rc = GetLogicalProcessorInformation(buffer, &returnLength);
+    
+    using ProcInfo = SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX;
+    
+    // This returns data on processor groupings
+    char buffer[16*1024];
+    uint32_t returnLength = sizeof(buffer);
+    DWORD rc = GetLogicalProcessorInformationEx(RelationAll, (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)buffer, &returnLength);
+    
     PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = nullptr;
     DWORD byteOffset = 0;
     
     // walk the array
-    ptr = buffer;
+    ptr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)buffer;
     byteOffset = 0;
-    while (byteOffset + sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION) <= returnLength) {
+    while (byteOffset + sizeof(ProcInfo) <= returnLength) {
         switch (ptr->Relationship) {
             case RelationProcessorCore: {
                 uint32_t logicalCores = CountSetBits(ptr->ProcessorMask);
@@ -154,15 +165,22 @@ static const CoreInfo& GetCoreInfo()
         if (isHyperthreaded)
             break;
         
-        byteOffset += sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
+        byteOffset += sizeof(ProcInfo);
         ptr++;
     }
     
-    ptr = buffer;
+    ptr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)buffer;
     byteOffset = 0;
+    
+    uint8_t groupNumber = 0;
     uint32_t coreNumber = 0;
-    while (byteOffset + sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION) <= returnLength) {
+    while (byteOffset + sizeof(ProcInfo) <= returnLength) {
         switch (ptr->Relationship) {
+            case RelationGroup:
+                // Sounds like Win11 now allows all cores to be mapped.
+                // TODO: groupNumber = ptr->activeGroup;
+                break;
+                
             case RelationProcessorCore: {
                 physicalCoreCount++;
                 
@@ -171,11 +189,11 @@ static const CoreInfo& GetCoreInfo()
                 uint32_t logicalCores = CountSetBits(ptr->ProcessorMask);
                 if (logicalCores > 1 || !isHyperthreaded) {
                     coreInfo.bigCoreCount++;
-                    coreInfo.remapTable.push_back({(uint8_t)coreNumber, CoreType::Big});
+                    coreInfo.remapTable.push_back({(uint8_t)coreNumber, (uint8_t)groupNumber, CoreType::Big});
                 }
                 else {
                     coreInfo.littleCoreCount++;
-                    coreInfo.remapTable.push_back({(uint8_t)coreNumber, CoreType::Little});
+                    coreInfo.remapTable.push_back({(uint8_t)coreNumber, (uint8_t)groupNumber, CoreType::Little});
                 }
                 
                 // Is this the correct index for physical cores?
@@ -186,7 +204,7 @@ static const CoreInfo& GetCoreInfo()
                 break;
             }
         }
-        byteOffset += sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
+        byteOffset += sizeof(ProcInfo);
         ptr++;
     }
     
@@ -385,6 +403,7 @@ static void setThreadPriority(std::thread::native_handle_type handle, ThreadPrio
 
 static void setThreadPriority(std::thread::native_handle_type handle, uint8_t priority)
 {
+    // This doesn't change policy.
     // Android on -20 to 20, where lower is higher priority
     int prioritySys = 0;
     switch(priority) {
@@ -402,6 +421,7 @@ static void setThreadPriority(std::thread::native_handle_type handle, uint8_t pr
 
 static void setThreadPriority(std::thread::native_handle_type handle, ThreadPriority priority)
 {
+    // This doesn't change policy.
     // Win has 0 to 15 normal, then 16-31 real time priority
     int prioritySys = 0;
     switch(priority) {
@@ -469,10 +489,15 @@ static void setThreadAffinity(std::thread::native_handle_type handle, uint32_t t
     // Revisit Numa groups on Win, have 128-core/256 ThreadRipper
     // https://chrisgreendevelopmentblog.wordpress.com/2017/08/29/thread-pools-and-windows-processor-groups/
     
+    // https://docs.microsoft.com/en-us/windows/win32/procthread/processor-groups
+    // None of this runs on x86, only on x64.
+    //
+    // Starting with Windows 11 and Windows Server 2022, it is no longer the case that applications are constrained by default to a single processor group. Instead, processes and their threads have processor affinities that by default span all processors in the system, across multiple groups on machines with more than 64 processors.
+    
     // win thread pool, but seems to limit to group 0
     //  https://github.com/stlab/libraries/blob/develop/stlab/concurrency/default_executor.hpp
     
-    int32_t threadIndexToGroup(int32_t threadIndex)
+    static int32_t threadIndexToGroup(int32_t threadIndex)
     {
         for (int32_t i = 0; i < nNumGroups; i++)
         {
@@ -482,7 +507,7 @@ static void setThreadAffinity(std::thread::native_handle_type handle, uint32_t t
         return 0; // error
     }
     
-    void setupWinCoreGroups()
+    static void setupWinCoreGroups()
     {
         // Also have to test for HT on these, and fix remap table.
         // Table will need to be larger to accomodate.
@@ -507,6 +532,9 @@ static void setThreadAffinity(std::thread::native_handle_type handle, uint32_t t
     affinity.group = groupNum;
     affinity.mask = *(const DWORD_PTR*)&affinityMask;
     success = SetThreadGroupAffinity(hndl, &affinity, nullptr);
+    
+    // also this hint to scheduler (for > 64 cores)
+    // SetThreadIdealProcessorEx( );
 #endif
     
 #else

From e4e58a51aac1eab92e4c8b7fa34ac836d3b059fc Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 29 Apr 2023 18:44:22 -0700
Subject: [PATCH 514/901] kram - fix build breaks

---
 hlslparser/src/HLSLParser.cpp |  1 -
 libkram/kram/TaskSystem.cpp   | 13 ++++++++-----
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/hlslparser/src/HLSLParser.cpp b/hlslparser/src/HLSLParser.cpp
index 26cf2eb5..062ffb4b 100644
--- a/hlslparser/src/HLSLParser.cpp
+++ b/hlslparser/src/HLSLParser.cpp
@@ -14,7 +14,6 @@
 #include "HLSLTree.h"
 
 #include <ctype.h>
-#include <malloc.h> // for alloca
 #include <string.h>
 
 // stl
diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index 54b0504e..22c4b474 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -138,18 +138,21 @@ static const CoreInfo& GetCoreInfo()
     
     DWORD returnLength = 0;
     
+    // get the exact size
+    GetLogicalProcessorInformationEx(RelationAll, (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)nullptr, &returnLength);
+    
     using ProcInfo = SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX;
     
     // This returns data on processor groupings
-    char buffer[16*1024];
-    uint32_t returnLength = sizeof(buffer);
-    DWORD rc = GetLogicalProcessorInformationEx(RelationAll, (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)buffer, &returnLength);
+    vector<uint8_t> buffer;
+    buffer.resize(returnLength);
+    DWORD rc = GetLogicalProcessorInformationEx(RelationAll, (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)buffer.data(), &returnLength);
     
     PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = nullptr;
     DWORD byteOffset = 0;
     
     // walk the array
-    ptr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)buffer;
+    ptr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)buffer.data();
     byteOffset = 0;
     while (byteOffset + sizeof(ProcInfo) <= returnLength) {
         switch (ptr->Relationship) {
@@ -169,7 +172,7 @@ static const CoreInfo& GetCoreInfo()
         ptr++;
     }
     
-    ptr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)buffer;
+    ptr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)buffer.data();
     byteOffset = 0;
     
     uint8_t groupNumber = 0;

From c6e667c2c8f2e1c12de436b7686951d132e7346a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 29 Apr 2023 18:51:45 -0700
Subject: [PATCH 515/901] kram - fix build breaks

---
 libkram/kram/TaskSystem.cpp | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index 22c4b474..f6d1ca20 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -136,23 +136,23 @@ static const CoreInfo& GetCoreInfo()
     DWORD physicalCoreCount = 0;
     bool isHyperthreaded = false;
     
-    DWORD returnLength = 0;
+    using ProcInfo = SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX;
+    using ProcInfoPtr = PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX;
     
     // get the exact size
-    GetLogicalProcessorInformationEx(RelationAll, (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)nullptr, &returnLength);
-    
-    using ProcInfo = SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX;
+    DWORD returnLength = 0;
+    GetLogicalProcessorInformationEx(RelationAll, (ProcInfoPtr)nullptr, &returnLength);
     
     // This returns data on processor groupings
     vector<uint8_t> buffer;
     buffer.resize(returnLength);
-    DWORD rc = GetLogicalProcessorInformationEx(RelationAll, (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)buffer.data(), &returnLength);
+    DWORD rc = GetLogicalProcessorInformationEx(RelationAll, (ProcInfoPtr)buffer.data(), &returnLength);
     
-    PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = nullptr;
+    ProcInfoPtr ptr = nullptr;
     DWORD byteOffset = 0;
     
     // walk the array
-    ptr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)buffer.data();
+    ptr = (ProcInfoPtr)buffer.data();
     byteOffset = 0;
     while (byteOffset + sizeof(ProcInfo) <= returnLength) {
         switch (ptr->Relationship) {
@@ -172,7 +172,7 @@ static const CoreInfo& GetCoreInfo()
         ptr++;
     }
     
-    ptr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)buffer.data();
+    ptr = (ProcInfoPtr)buffer.data();
     byteOffset = 0;
     
     uint8_t groupNumber = 0;

From 7a05f5e5043bd559d85cab6cb215af632232c176 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 29 Apr 2023 20:54:40 -0700
Subject: [PATCH 516/901] kram - fix build breaks

Fallback to original core stuff.  Drop the numa group call, since members of structs are completely different.   May need Processor->GroupCount for logical cores, but it's way too confusing for now.
---
 libkram/kram/TaskSystem.cpp | 89 +++++++------------------------------
 1 file changed, 17 insertions(+), 72 deletions(-)

diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index f6d1ca20..5a4cd22c 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -35,9 +35,9 @@ enum class CoreType : uint8_t
 struct CoreNum
 {
     uint8_t index;
-#if KRAM_WIN
-    uint8_t group; // for Win only
-#endif
+//#if KRAM_WIN
+//    uint8_t group; // for Win only
+//#endif
     CoreType type;
 };
 
@@ -132,27 +132,30 @@ static const CoreInfo& GetCoreInfo()
     
     // https://docs.microsoft.com/en-us/windows/win32/procthread/multiple-processors
     
+    // Really need to use _EX version to get at numa groups
+    // but it doesn't have same bitmask for logical cores.
+    // Revisit when really building app on Win, but it just
+    // broke the build too many times.
+    
     DWORD logicalCoreCount = 0;
     DWORD physicalCoreCount = 0;
     bool isHyperthreaded = false;
     
-    using ProcInfo = SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX;
-    using ProcInfoPtr = PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX;
+    using ProcInfo = SYSTEM_LOGICAL_PROCESSOR_INFORMATION;
+    using ProcInfoPtr = PSYSTEM_LOGICAL_PROCESSOR_INFORMATION;
     
-    // get the exact size
     DWORD returnLength = 0;
-    GetLogicalProcessorInformationEx(RelationAll, (ProcInfoPtr)nullptr, &returnLength);
+    DWORD rc = GetLogicalProcessorInformation((ProcInfoPtr)nullptr, &returnLength);
     
-    // This returns data on processor groupings
     vector<uint8_t> buffer;
     buffer.resize(returnLength);
-    DWORD rc = GetLogicalProcessorInformationEx(RelationAll, (ProcInfoPtr)buffer.data(), &returnLength);
+    rc = GetLogicalProcessorInformation(buffer, &returnLength);
     
-    ProcInfoPtr ptr = nullptr;
+    ProcInfoPtr ptr = (ProcInfoPtr)buffer.data();
     DWORD byteOffset = 0;
     
     // walk the array
-    ptr = (ProcInfoPtr)buffer.data();
+    ptr = buffer;
     byteOffset = 0;
     while (byteOffset + sizeof(ProcInfo) <= returnLength) {
         switch (ptr->Relationship) {
@@ -174,16 +177,9 @@ static const CoreInfo& GetCoreInfo()
     
     ptr = (ProcInfoPtr)buffer.data();
     byteOffset = 0;
-    
-    uint8_t groupNumber = 0;
     uint32_t coreNumber = 0;
     while (byteOffset + sizeof(ProcInfo) <= returnLength) {
         switch (ptr->Relationship) {
-            case RelationGroup:
-                // Sounds like Win11 now allows all cores to be mapped.
-                // TODO: groupNumber = ptr->activeGroup;
-                break;
-                
             case RelationProcessorCore: {
                 physicalCoreCount++;
                 
@@ -192,11 +188,11 @@ static const CoreInfo& GetCoreInfo()
                 uint32_t logicalCores = CountSetBits(ptr->ProcessorMask);
                 if (logicalCores > 1 || !isHyperthreaded) {
                     coreInfo.bigCoreCount++;
-                    coreInfo.remapTable.push_back({(uint8_t)coreNumber, (uint8_t)groupNumber, CoreType::Big});
+                    coreInfo.remapTable.push_back({(uint8_t)coreNumber, CoreType::Big});
                 }
                 else {
                     coreInfo.littleCoreCount++;
-                    coreInfo.remapTable.push_back({(uint8_t)coreNumber, (uint8_t)groupNumber, CoreType::Little});
+                    coreInfo.remapTable.push_back({(uint8_t)coreNumber, CoreType::Little});
                 }
                 
                 // Is this the correct index for physical cores?
@@ -211,6 +207,7 @@ static const CoreInfo& GetCoreInfo()
         ptr++;
     }
     
+    
     coreInfo.isHyperthreaded = isHyperthreaded;
     coreInfo.physicalCoreCount = physicalCoreCount;
     
@@ -488,58 +485,6 @@ static void setThreadAffinity(std::thread::native_handle_type handle, uint32_t t
     DWORD_PTR mask = SetThreadAffinityMask(handle, *(const DWORD_PTR*)&affinityMask);
     success = mask != 0;
     
-#if 0 // TODO: finish this
-    // Revisit Numa groups on Win, have 128-core/256 ThreadRipper
-    // https://chrisgreendevelopmentblog.wordpress.com/2017/08/29/thread-pools-and-windows-processor-groups/
-    
-    // https://docs.microsoft.com/en-us/windows/win32/procthread/processor-groups
-    // None of this runs on x86, only on x64.
-    //
-    // Starting with Windows 11 and Windows Server 2022, it is no longer the case that applications are constrained by default to a single processor group. Instead, processes and their threads have processor affinities that by default span all processors in the system, across multiple groups on machines with more than 64 processors.
-    
-    // win thread pool, but seems to limit to group 0
-    //  https://github.com/stlab/libraries/blob/develop/stlab/concurrency/default_executor.hpp
-    
-    static int32_t threadIndexToGroup(int32_t threadIndex)
-    {
-        for (int32_t i = 0; i < nNumGroups; i++)
-        {
-            if (threadIndex < totalCores[i])
-                return i;
-        }
-        return 0; // error
-    }
-    
-    static void setupWinCoreGroups()
-    {
-        // Also have to test for HT on these, and fix remap table.
-        // Table will need to be larger to accomodate.
-        
-        int32_t nNumGroups = GetActiveProcessorGroupCount();
-        int32_t numCores[16] = {}; // TODO: make members
-        int32_t totalCores[16] = 0;
-        for (int32_t i = 0; i < nNumGroups; i++)
-        {
-            numCores[i] = GetMaximumProcessorCount(i);
-            totalCores[i] += numCores[i];
-        }
-    }
-
-    // have to adjust the mask for the core group
-    int32_t groupNum = threadIndexToGroup(threadIndex);
-    int32_t groupThreadIndex = (groupNum == 0) ? 0 : totalCores[groupNum-1];
-    affinityMask = ((uint64_t)1) << (threadIndex - groupThreadIndex);
-
-    // set group and affinity
-    GROUP_AFFINITY affinity;
-    affinity.group = groupNum;
-    affinity.mask = *(const DWORD_PTR*)&affinityMask;
-    success = SetThreadGroupAffinity(hndl, &affinity, nullptr);
-    
-    // also this hint to scheduler (for > 64 cores)
-    // SetThreadIdealProcessorEx( );
-#endif
-    
 #else
     // most systems are pthread-based, this is represented with array of bits
     cpu_set_t cpuset;

From 3c2bdf3fa7ee421b3334dfba556d0d96b8f088fd Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 29 Apr 2023 20:59:14 -0700
Subject: [PATCH 517/901] kram - fix build breaks

---
 libkram/kram/TaskSystem.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index 5a4cd22c..47a317bd 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -151,11 +151,11 @@ static const CoreInfo& GetCoreInfo()
     buffer.resize(returnLength);
     rc = GetLogicalProcessorInformation(buffer, &returnLength);
     
-    ProcInfoPtr ptr = (ProcInfoPtr)buffer.data();
+    ProcInfoPtr ptr = nullptr;
     DWORD byteOffset = 0;
     
     // walk the array
-    ptr = buffer;
+    ptr = (ProcInfoPtr)buffer.data();
     byteOffset = 0;
     while (byteOffset + sizeof(ProcInfo) <= returnLength) {
         switch (ptr->Relationship) {

From 7ad5e37e7674f7181aa4e5e087b38e379b5b2b5d Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 29 Apr 2023 21:40:36 -0700
Subject: [PATCH 518/901] kram - fix build breaks

---
 libkram/kram/TaskSystem.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index 47a317bd..43fbd6c6 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -149,7 +149,7 @@ static const CoreInfo& GetCoreInfo()
     
     vector<uint8_t> buffer;
     buffer.resize(returnLength);
-    rc = GetLogicalProcessorInformation(buffer, &returnLength);
+    rc = GetLogicalProcessorInformation((ProcInfoPtr)buffer.data(), &returnLength);
     
     ProcInfoPtr ptr = nullptr;
     DWORD byteOffset = 0;

From eb5b1904883eb74029fd170603c2401bc323a6f9 Mon Sep 17 00:00:00 2001
From: alecazam123 <alecmiller@yahoo.com>
Date: Tue, 2 May 2023 00:36:48 -0700
Subject: [PATCH 519/901] kram - switch win build over to clang

also enabled filesystem calls, and made them not throw.
---
 hlslparser/CMakeLists.txt     | 35 +++++++++--------------------------
 hlslparser/src/HLSLParser.cpp |  3 +++
 hlslparser/src/Main.cpp       |  9 ++++-----
 libkram/CMakeLists.txt        | 10 +++++-----
 libkram/kram/KramConfig.h     |  3 ++-
 5 files changed, 23 insertions(+), 37 deletions(-)

diff --git a/hlslparser/CMakeLists.txt b/hlslparser/CMakeLists.txt
index 5541e98e..4e836fa1 100644
--- a/hlslparser/CMakeLists.txt
+++ b/hlslparser/CMakeLists.txt
@@ -6,13 +6,8 @@ set(BUILD_MAC FALSE)
 set(BUILD_WIN FALSE)
 
 if (APPLE)
-    if (CMAKE_SYSTEM_NAME STREQUAL "iOS")
-        message("build for iOS")
-        set(BUILD_IOS TRUE)
-    else()
-        message("build for macOS")
-        set(BUILD_MAC TRUE)
-    endif()
+    message("build for macOS")
+    set(BUILD_MAC TRUE)
 elseif (WIN32)
     message("build for win x64")
     set(BUILD_WIN TRUE)
@@ -37,11 +32,6 @@ add_executable(${myTargetApp})
 #-----------------------------------------------------
     
 if (BUILD_MAC)
-    # ate is a macOS/iOS only library, and it varies in encode support by OS revision
-    #target_link_libraries(${myTargetApp}
-    #    ate
-    #    libkram)
-
     set_target_properties(${myTargetApp} PROPERTIES
          # Note: match this up with CXX version
         # c++11 min
@@ -82,8 +72,6 @@ if (BUILD_MAC)
     target_compile_options(${myTargetApp} PRIVATE -W -Wall)
 
 elseif (BUILD_WIN)
-    #target_link_libraries(${myTargetApp} libkram)
-
     # When Win rebuilds library, it doesn't relink app to correct code when you
     # build the app target project.  Breakpoints stop working after any library source edit,
     # and you have to rebuild solution to get the correct code to exectute.  Since 2014.  Try this.
@@ -101,30 +89,25 @@ elseif (BUILD_WIN)
     # don't need force with apps, since they only access kram folder files which include KramConfig
     
     # all warnings, AVX1, and multiprocess compiles
-    target_compile_options(${myTargetApp} PRIVATE /W3 /arch:AVX /MP /GF /FC)
-    
-    # fix STL
-    target_compile_definitions(${myTargetApp} PRIVATE "-D_ITERATOR_DEBUG_LEVEL=0")
+    target_compile_options(${myTargetApp} PRIVATE /W3 /arch:AVX -mf16c /MP /GF /FC)
     
+    # fix STL (don't use -D here, will remove)
+    target_compile_definitions(${myTargetApp} PRIVATE _ITERATOR_DEBUG_LEVEL=0 _HAS_EXCEPTIONS=0)
+
     if (CMAKE_BUILD_TYPE EQUAL "Debug")
-        target_compile_definitions(${myTargetLib} PRIVATE "/INCREMENTAL")
+        target_compile_definitions(${myTargetApp} PRIVATE "/INCREMENTAL")
         
     elseif (CMAKE_BUILD_TYPE EQUAL "Release")
         # only dead strip on Release builds since this disables Incremental linking, may want Profile build that doesn't use this
-        target_compile_definitions(${myTargetLib} PRIVATE "/OPT:REF")
+        target_compile_definitions(${myTargetApp} PRIVATE "/OPT:REF")
         
         # other possibliities
         # /GL - whole program optimization
-        # /Gy - edit and continue with function level linking
+        # /Gy - edit and continue with function level linking (no clang)
         # /Oi - enable intrinsic functions
     
     endif()
     
-elseif (BUILD_UNIX)
-    #target_link_libraries(${myTargetApp} libkram)
-
-    # TODO: finish this
-    
 endif()
 
 
diff --git a/hlslparser/src/HLSLParser.cpp b/hlslparser/src/HLSLParser.cpp
index 062ffb4b..b3dd5086 100644
--- a/hlslparser/src/HLSLParser.cpp
+++ b/hlslparser/src/HLSLParser.cpp
@@ -13,6 +13,9 @@
 
 #include "HLSLTree.h"
 
+#ifdef _WIN32
+#include <malloc.h> // for alloca
+#endif
 #include <ctype.h>
 #include <string.h>
 
diff --git a/hlslparser/src/Main.cpp b/hlslparser/src/Main.cpp
index 6c380ae9..addaa5d8 100644
--- a/hlslparser/src/Main.cpp
+++ b/hlslparser/src/Main.cpp
@@ -213,17 +213,17 @@ int main( int argc, char* argv[] )
     }
     
     // Win build on github is failing on this, so skip for now
-#ifndef WIN32
     // find  full pathname of the fileName, so that errors are logged
     // in way that can be clicked to. absolute includes .. in it, canonical does not.
+    std::error_code errorCode; // To shutup exceptions
     auto path = filesystem::path(fileName);
-    fileName = filesystem::canonical( path );
+    fileName = filesystem::canonical(path, errorCode).generic_string();
     
     // if this file doesn't exist, then canonical throws exception
     path = filesystem::path(outputFileName);
     if (filesystem::exists(path))
     {
-        outputFileName = filesystem::canonical( path );
+        outputFileName = filesystem::canonical(path, errorCode).generic_string();
         
         if ( outputFileName == fileName )
         {
@@ -231,7 +231,6 @@ int main( int argc, char* argv[] )
             return 1;
         }
     }
-#endif
     
     //------------------------------------
     // Now start the work
@@ -303,7 +302,7 @@ int main( int argc, char* argv[] )
     
     string output;
     
-    for (uint32_t i = 0; i < entryPoints.GetSize(); ++i)
+    for (uint32_t i = 0; i < (uint32_t)entryPoints.GetSize(); ++i)
     {
         const char* entryPoint = entryPoints[i];
         entryName = entryPoint;
diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index af624544..7d1d5989 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -256,12 +256,12 @@ elseif (BUILD_WIN)
     # force include (public)
     target_compile_options(${myTargetLib} PUBLIC /FIKramConfig.h)
        
-    # all warnings, AVX, and multiprocess compiles
-    target_compile_options(${myTargetLib} PRIVATE  /W3 /arch:AVX /MP)
+    # all warnings, AVX, and multiprocess compiles, clang needs mf16c or -mavx2
+    target_compile_options(${myTargetLib} PRIVATE  /W3 /arch:AVX -mf16c /MP)
     
-    # fix STL
-    target_compile_definitions(${myTargetLib} PRIVATE "-D_ITERATOR_DEBUG_LEVEL=0")
-        
+    # fix STL (don't use -D here, will remove)
+    target_compile_definitions(${myTargetLib} PRIVATE _ITERATOR_DEBUG_LEVEL=0 _HAS_EXCEPTIONS=0)
+
 endif()
 
 # turn on dead-code stripping in release.  Don't set this in debug.
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index 61a50788..4f3d2922 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -109,7 +109,8 @@
 // clang can compile simd/simd.h code on other platforms
 // this provides vector extensions from gcc that were setup for OpenCL shaders
 #ifndef USE_SIMDLIB
-#if defined(__clang__)
+// TODO: bring over simd for Win
+#if !KRAM_WIN 
 #define USE_SIMDLIB 1
 #else
 #define USE_SIMDLIB 0

From b260a6170c37cfacc7a04e56e55a5b2931892a80 Mon Sep 17 00:00:00 2001
From: alecazam123 <alecmiller@yahoo.com>
Date: Tue, 2 May 2023 18:41:31 -0700
Subject: [PATCH 520/901] kram - switch build to clang

---
 scripts/cibuild.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/cibuild.sh b/scripts/cibuild.sh
index ba269dc7..fbb61897 100755
--- a/scripts/cibuild.sh
+++ b/scripts/cibuild.sh
@@ -86,8 +86,8 @@ elif [[ $buildType == windows ]]; then
 
 	pushd build
 
-	# TODO: update to VS2022
-	cmake .. -G "Visual Studio 16 2019" -A x64
+	# DONE: update to VS2022 and use clang
+	cmake .. -G "Visual Studio 17 2022" -T ClangCL -A x64
 
 	# build the release build
 	cmake --build . --config Release

From f6e138dfa60d53e8709f380f1e84fe3bb84e4fd7 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecazam@users.noreply.github.com>
Date: Tue, 2 May 2023 18:52:57 -0700
Subject: [PATCH 521/901] Update pre-release.yml

Switch to latest windows for VS2022 w/clang.
---
 .github/workflows/pre-release.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pre-release.yml b/.github/workflows/pre-release.yml
index 79f9c673..0c532581 100644
--- a/.github/workflows/pre-release.yml
+++ b/.github/workflows/pre-release.yml
@@ -13,7 +13,7 @@ jobs:
     strategy:
       matrix:
         #os: [ubuntu-latest, macos-latest, windows-latest]
-        os: [macos-latest, windows-2019]
+        os: [macos-latest, windows-latest]
         
     steps:
       - name: Update CMake

From b81499d5073a104cce6df2045f8bb19dd6239d80 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecazam@users.noreply.github.com>
Date: Tue, 2 May 2023 21:39:17 -0700
Subject: [PATCH 522/901] Update README.md

---
 README.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/README.md b/README.md
index 95daaa31..8dcbae80 100644
--- a/README.md
+++ b/README.md
@@ -1,9 +1,14 @@
+
 # kram, kram.exe
 C++11 main to libkram to create CLI tool.  Encode/decode/info on PNG/KTX/KTX2/DDS files with LDR/HDR and BC/ASTC/ETC2.  Runs on macOS/win.
 
 # libkram.a, libkram-ios.a, kram.lib
 C++11 library from 200 to 800KB in size depending on encoder options.  Compiles for iOS (ARM), macOS (ARM/Intel), win (Intel).
 
+# hslparser
+Parses HLSL syntax and generates readable HLSL/MSL code without transpiling.  DXC is then used to compile to spirv.
+https://github.com/alecaam/kram/hlslparser
+
 # kramv.app
 ObjC++ Viewer for PNG/KTX/KTX2/DDS supported files from kram.  530KB in size.  Uses Metal compute and shaders, eyedropper, grids, debugging, preview.  Supports HDR and all texture types.  Mip, face, and array access.  No dmg yet, just drop onto /Applications folder.  Runs on macOS (ARM/Intel).
 

From 0c9743cca03ab3608abb061cdbfd713436303b07 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecazam@users.noreply.github.com>
Date: Tue, 2 May 2023 21:41:25 -0700
Subject: [PATCH 523/901] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 8dcbae80..afc1bdc6 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@ C++11 library from 200 to 800KB in size depending on encoder options.  Compiles
 
 # hslparser
 Parses HLSL syntax and generates readable HLSL/MSL code without transpiling.  DXC is then used to compile to spirv.
-https://github.com/alecaam/kram/hlslparser
+https://github.com/alecazam/kram/tree/main/hlslparser
 
 # kramv.app
 ObjC++ Viewer for PNG/KTX/KTX2/DDS supported files from kram.  530KB in size.  Uses Metal compute and shaders, eyedropper, grids, debugging, preview.  Supports HDR and all texture types.  Mip, face, and array access.  No dmg yet, just drop onto /Applications folder.  Runs on macOS (ARM/Intel).

From 2baab6aa6a2fa79769ef4244f11448d3a35c0402 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecazam@users.noreply.github.com>
Date: Tue, 2 May 2023 21:53:00 -0700
Subject: [PATCH 524/901] Update tagged-release.yml

switch to windows-latest
---
 .github/workflows/tagged-release.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/tagged-release.yml b/.github/workflows/tagged-release.yml
index 231bcaba..f88cce58 100644
--- a/.github/workflows/tagged-release.yml
+++ b/.github/workflows/tagged-release.yml
@@ -12,7 +12,7 @@ jobs:
     strategy:
       matrix:
         #os: [ubuntu-latest, macos-latest, windows-latest]
-        os: [macos-latest, windows-2019]
+        os: [macos-latest, windows-latest]
         
     steps:
       - name: Update CMake
@@ -41,4 +41,4 @@ jobs:
           file: bin/*.zip
           file_glob: true
           tag: ${{ github.ref }}
-          overwrite: true
\ No newline at end of file
+          overwrite: true

From cee330fd8e321111b073687f303f501bb55510fb Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 3 May 2023 19:33:36 -0700
Subject: [PATCH 525/901] kram - update cmake

---
 kramc/CMakeLists.txt | 24 +++++++-----------------
 1 file changed, 7 insertions(+), 17 deletions(-)

diff --git a/kramc/CMakeLists.txt b/kramc/CMakeLists.txt
index c51e860d..a592301f 100644
--- a/kramc/CMakeLists.txt
+++ b/kramc/CMakeLists.txt
@@ -6,13 +6,8 @@ set(BUILD_MAC FALSE)
 set(BUILD_WIN FALSE)
 
 if (APPLE)
-    if (CMAKE_SYSTEM_NAME STREQUAL "iOS")
-        message("build for iOS")
-        set(BUILD_IOS TRUE)
-    else()
-        message("build for macOS")
-        set(BUILD_MAC TRUE)
-    endif()
+    message("build for macOS")
+    set(BUILD_MAC TRUE)
 elseif (WIN32)
     message("build for win x64")
     set(BUILD_WIN TRUE)
@@ -45,7 +40,7 @@ if (BUILD_MAC)
     set_target_properties(${myTargetApp} PROPERTIES
          # Note: match this up with CXX version
         # c++11 min
-        XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD "c++14"
+        XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD "c++20"
         XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++"
 
         # avx1
@@ -99,10 +94,10 @@ elseif (BUILD_WIN)
     # don't need force with apps, since they only access kram folder files which include KramConfig
     
     # all warnings, AVX1, and multiprocess compiles
-    target_compile_options(${myTargetApp} PRIVATE /W3 /arch:AVX /MP /GF /FC)
+    target_compile_options(${myTargetApp} PRIVATE /W3 /arch:AVX -f16c /MP /GF /FC)
     
     # fix STL
-    target_compile_definitions(${myTargetApp} PRIVATE "-D_ITERATOR_DEBUG_LEVEL=0")
+    target_compile_definitions(${myTargetApp} PRIVATE -D_ITERATOR_DEBUG_LEVEL=0 -D_HAS_EXCEPTIONS=0)
     
     if (CMAKE_BUILD_TYPE EQUAL "Debug")
         target_compile_definitions(${myTargetLib} PRIVATE "/INCREMENTAL")
@@ -113,16 +108,11 @@ elseif (BUILD_WIN)
         
         # other possibliities
         # /GL - whole program optimization
-        # /Gy - edit and continue with function level linking
+        # /Gy - edit and continue with function level linking (unsupported for clang)
         # /Oi - enable intrinsic functions
     
     endif()
-    
-elseif (BUILD_UNIX)
-    target_link_libraries(${myTargetApp} libkram)
-
-    # TODO: finish this
-    
+       
 endif()
 
 
From cda3c343ae8f8ffdcd7a81d4d7c6e2fc721836e3 Mon Sep 17 00:00:00 2001
From: alecazam123 <alecmiller@yahoo.com>
Date: Thu, 4 May 2023 14:05:27 -0700
Subject: [PATCH 526/901] kram - ktx/ktx2/dds thumbnail provider for WIndows
 using libkram

This uses libkram to decode and generate the thumbnail just like kramv does.  Windows only has 32, 96, 256, 1024 thumbnails.  Some of the smaller images in src/text the thumbnails are too small.  So need to find out how to make those bigger.  Windows nevery upscales, but in this case the thumbnails are pretty useless super tiny.  This thumbnail provider mechansim started in Win7, and has seen little in the way of updates from Microsoft.  A small update occurred in Vista.

This is based on the MIT license source from iOrange here https://github.com/iOrange/QOIThumbnailProvider.  Their code was easy enough to grok and modify.  The Microsoft Recipe sample thumbnail provider just seemed complex and broken with way too much code.

To install, do the following:
regsvr32.exe kram-thumb-win.dll

To uninstall, do the following:
regsvr32.exe /u kram-thumb-win.dll
---
 CMakeLists.txt                       |  14 +-
 kram-thumb-win/CMakeLists.txt        |  37 +++
 kram-thumb-win/Dll.cpp               | 232 ++++++++++++++++++
 kram-thumb-win/Dll.rc                |  38 +++
 kram-thumb-win/KramThumbProvider.cpp | 346 +++++++++++++++++++++++++++
 kram-thumb-win/KramThumbProvider.def |   6 +
 kram-thumb-win/LICENSE               |  21 ++
 kram-thumb-win/resource.h            |  18 ++
 kram-thumb/KramThumbnailProvider.mm  |   2 +-
 libkram/CMakeLists.txt               |   1 +
 libkram/kram/Kram.cpp                |   3 +-
 11 files changed, 712 insertions(+), 6 deletions(-)
 create mode 100644 kram-thumb-win/CMakeLists.txt
 create mode 100644 kram-thumb-win/Dll.cpp
 create mode 100644 kram-thumb-win/Dll.rc
 create mode 100644 kram-thumb-win/KramThumbProvider.cpp
 create mode 100644 kram-thumb-win/KramThumbProvider.def
 create mode 100644 kram-thumb-win/LICENSE
 create mode 100644 kram-thumb-win/resource.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 53194464..2d1e7762 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -86,7 +86,12 @@ if (BUILD_MAC)
     add_subdirectory(plugin)
 endif()
 
-# hack hlslparser for win build into kram for now
+# this is an Explorer thumbnail extension (run script to un/register), uses libkrma
+if (BUILD_WIN)
+    add_subdirectory(kram-thumb-win)
+endif()
+
+# hack hlslparser for win build into kram for now, does not use kram
 if (BUILD_WIN)
     add_subdirectory(hlslparser)
 endif()
@@ -150,14 +155,15 @@ if (BUILD_MAC)
 	install(TARGETS kramv BUNDLE DESTINATION ${BIN_DIR})
 endif()
 
+if (BUILD_WIN)
+    install(TARGETS kram-thumb-win LIBRARY DESTINATION ${BIN_DIR})
+endif()
+
 # don't install this yet
 #if (BUILD_MAC)
 #    install(TARGETS kram-ps BUNDLE DESTINATION ${BIN_DIR})
 #endif()
 
-#-----------------------------------------------------
-
-
 # hlslparser is also now in the kram build.  Keep executables up to date.
 # I would use the sln file, but msbuild doesn't like to be called from cibuld.sh
 if (BUILD_WIN)
diff --git a/kram-thumb-win/CMakeLists.txt b/kram-thumb-win/CMakeLists.txt
new file mode 100644
index 00000000..2ed0d934
--- /dev/null
+++ b/kram-thumb-win/CMakeLists.txt
@@ -0,0 +1,37 @@
+﻿
+# dll output can be renamed for debug vs. release, but is hard to debug
+set(myTargetLib kram-thumb-win)
+
+# caller already set all this
+# project(${myTargetLib} LANGUAGES CXX)
+
+set(CMAKE_STATIC_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} /NODEFAULTLIB:LIBCMT")
+set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} /DEF:${CMAKE_CURRENT_SOURCE_DIR}/KramThumbProvider.def /NODEFAULTLIB:LIBCMT")
+
+set(SOURCE_FILES
+    Dll.cpp
+    Dll.rc
+    KramThumbProvider.cpp
+)
+
+# Module is a DLL library
+add_library(${myTargetLib} MODULE ${SOURCE_FILES})
+
+ # to turn off exceptions/rtti use /GR and /EHsc replacement
+string(REGEX REPLACE "/GR" "/GR-" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+string(REGEX REPLACE "/EHsc" "/EHs-c-" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+    
+# all warnings, AVX1, and multiprocess compiles
+target_compile_options(${myTargetLib} PRIVATE /W3 /arch:AVX -mf16c /MP /GF /FC)
+  
+target_compile_definitions(${myTargetLib} PRIVATE -D_ITERATOR_DEBUG_LEVEL=0 -D_HAS_EXCEPTIONS=0 -DUNICODE -D_UNICODE)
+    
+if (CMAKE_BUILD_TYPE EQUAL "Debug")
+    target_compile_definitions(${myTargetLib} PRIVATE "/INCREMENTAL")
+        
+elseif (CMAKE_BUILD_TYPE EQUAL "Release")
+    # only dead strip on Release builds since this disables Incremental linking, may want Profile build that doesn't use this
+    target_compile_definitions(${myTargetLib} PRIVATE "/OPT:REF")
+endif()
+
+target_link_libraries(${myTargetLib} PRIVATE shlwapi.lib libkram)
diff --git a/kram-thumb-win/Dll.cpp b/kram-thumb-win/Dll.cpp
new file mode 100644
index 00000000..bfe8452f
--- /dev/null
+++ b/kram-thumb-win/Dll.cpp
@@ -0,0 +1,232 @@
+// based on QOI Thumbnail Provider for Windows Explorer
+// Written by iOrange in 2021
+// 
+// Based on Microsoft's example
+// https://github.com/microsoft/windows-classic-samples/tree/main/Samples/Win7Samples/winui/shell/appshellintegration/RecipeThumbnailProvider
+// 
+// Also more info here:
+// https://docs.microsoft.com/en-us/previous-versions/windows/desktop/legacy/cc144118(v=vs.85)
+
+#include <objbase.h>
+#include <shlwapi.h>
+#include <thumbcache.h> // For IThumbnailProvider.
+#include <shlobj.h>     // For SHChangeNotify
+#include <new>
+#include <atomic>
+#include <vector>       // For std::size
+
+// from KramThumbProvider.cpp
+extern HRESULT KramThumbProvider_CreateInstance(REFIID riid, void** ppv);
+
+#define SZ_KramTHUMBHANDLER L"Kram Thumbnail Handler"
+
+// TODO: update CLSID here, is this a fixed id since Win7 vs. Vista said a provider is different ids?
+// keepd in sync with kCLSID
+// Just a different way of expressing CLSID in the two values below
+// made with uuidgen.exe
+#define SZ_CLSID_KramTHUMBHANDLER L"{a9a47ef5-c238-42a9-a4e6-a85558811dac}"
+constexpr CLSID kCLSID_KramThumbHandler = {0xa9a47ef5, 0xc238, 0x42a9, {0xa4, 0xe6, 0xa8, 0x55, 0x58, 0x81, 0x1d, 0xac}};
+
+
+typedef HRESULT(*PFNCREATEINSTANCE)(REFIID riid, void** ppvObject);
+struct CLASS_OBJECT_INIT {
+    const CLSID*        pClsid;
+    PFNCREATEINSTANCE   pfnCreate;
+};
+
+// add classes supported by this module here
+constexpr CLASS_OBJECT_INIT kClassObjectInit[] = {
+    { &kCLSID_KramThumbHandler, KramThumbProvider_CreateInstance }
+};
+
+
+std::atomic_long    gModuleReferences(0);
+HINSTANCE           gModuleInstance = nullptr;
+
+// Standard DLL functions
+STDAPI_(BOOL) DllMain(HINSTANCE hInstance, DWORD dwReason, void*) {
+    if (DLL_PROCESS_ATTACH == dwReason) {
+        gModuleInstance = hInstance;
+        ::DisableThreadLibraryCalls(hInstance);
+    } else if (DLL_PROCESS_DETACH == dwReason) {
+        gModuleInstance = nullptr;
+    }
+    return TRUE;
+}
+
+STDAPI DllCanUnloadNow() {
+    // Only allow the DLL to be unloaded after all outstanding references have been released
+    return (gModuleReferences > 0) ? S_FALSE : S_OK;
+}
+
+void DllAddRef() {
+    ++gModuleReferences;
+}
+
+void DllRelease() {
+    --gModuleReferences;
+}
+
+class CClassFactory : public IClassFactory {
+public:
+    static HRESULT CreateInstance(REFCLSID clsid, const CLASS_OBJECT_INIT* pClassObjectInits, size_t cClassObjectInits, REFIID riid, void** ppv) {
+        *ppv = NULL;
+        HRESULT hr = CLASS_E_CLASSNOTAVAILABLE;
+        for (size_t i = 0; i < cClassObjectInits; ++i) {
+            if (clsid == *pClassObjectInits[i].pClsid) {
+                IClassFactory* pClassFactory = new (std::nothrow) CClassFactory(pClassObjectInits[i].pfnCreate);
+                hr = pClassFactory ? S_OK : E_OUTOFMEMORY;
+                if (SUCCEEDED(hr)) {
+                    hr = pClassFactory->QueryInterface(riid, ppv);
+                    pClassFactory->Release();
+                }
+                break; // match found
+            }
+        }
+        return hr;
+    }
+
+    CClassFactory(PFNCREATEINSTANCE pfnCreate)
+        : mReferences(1)
+        , mCreateFunc(pfnCreate) {
+        DllAddRef();
+    }
+
+    // IUnknown
+    IFACEMETHODIMP QueryInterface(REFIID riid, void** ppv) {
+        static const QITAB qit[] = {
+            QITABENT(CClassFactory, IClassFactory),
+            { 0 }
+        };
+        return QISearch(this, qit, riid, ppv);
+    }
+
+    IFACEMETHODIMP_(ULONG) AddRef() {
+        return ++mReferences;
+    }
+
+    IFACEMETHODIMP_(ULONG) Release() {
+        const long refs = --mReferences;
+        if (!refs) {
+            delete this;
+        }
+        return refs;
+    }
+
+    // IClassFactory
+    IFACEMETHODIMP CreateInstance(IUnknown* punkOuter, REFIID riid, void** ppv) {
+        return punkOuter ? CLASS_E_NOAGGREGATION : mCreateFunc(riid, ppv);
+    }
+
+    IFACEMETHODIMP LockServer(BOOL fLock) {
+        if (fLock) {
+            DllAddRef();
+        } else {
+            DllRelease();
+        }
+        return S_OK;
+    }
+
+private:
+    ~CClassFactory() {
+        DllRelease();
+    }
+
+    std::atomic_long    mReferences;
+    PFNCREATEINSTANCE   mCreateFunc;
+};
+
+STDAPI DllGetClassObject(REFCLSID clsid, REFIID riid, void** ppv) {
+    return CClassFactory::CreateInstance(clsid, kClassObjectInit, std::size(kClassObjectInit), riid, ppv);
+}
+
+// A struct to hold the information required for a registry entry
+struct REGISTRY_ENTRY {
+    HKEY   hkeyRoot;
+    PCWSTR pszKeyName;
+    PCWSTR pszValueName;
+    PCWSTR pszData;
+};
+
+// Creates a registry key (if needed) and sets the default value of the key
+HRESULT CreateRegKeyAndSetValue(const REGISTRY_ENTRY* pRegistryEntry) {
+    HKEY hKey;
+    HRESULT hr = HRESULT_FROM_WIN32(RegCreateKeyExW(pRegistryEntry->hkeyRoot,
+                                                    pRegistryEntry->pszKeyName,
+                                                    0, nullptr, REG_OPTION_NON_VOLATILE,
+                                                    KEY_SET_VALUE | KEY_WOW64_64KEY,
+                                                    nullptr, &hKey, nullptr));
+    if (SUCCEEDED(hr)) {
+        hr = HRESULT_FROM_WIN32(RegSetValueExW(hKey, pRegistryEntry->pszValueName, 0, REG_SZ,
+                                               reinterpret_cast<const BYTE*>(pRegistryEntry->pszData),
+                                               static_cast<DWORD>(wcslen(pRegistryEntry->pszData) + 1) * sizeof(WCHAR)));
+        RegCloseKey(hKey);
+    }
+    return hr;
+}
+
+// Registers this COM server
+STDAPI DllRegisterServer() {
+    HRESULT hr;
+    WCHAR szModuleName[MAX_PATH] = { 0 };
+
+    if (!GetModuleFileNameW(gModuleInstance, szModuleName, ARRAYSIZE(szModuleName))) {
+        hr = HRESULT_FROM_WIN32(GetLastError());
+    } else {
+        // List of registry entries we want to create
+        const REGISTRY_ENTRY registryEntries[] = {
+            // RootKey          KeyName                                                                      ValueName          Data
+            {HKEY_CURRENT_USER, L"Software\\Classes\\CLSID\\" SZ_CLSID_KramTHUMBHANDLER,                      nullptr,           SZ_KramTHUMBHANDLER},
+            {HKEY_CURRENT_USER, L"Software\\Classes\\CLSID\\" SZ_CLSID_KramTHUMBHANDLER L"\\InProcServer32",  nullptr,           szModuleName},
+            {HKEY_CURRENT_USER, L"Software\\Classes\\CLSID\\" SZ_CLSID_KramTHUMBHANDLER L"\\InProcServer32",  L"ThreadingModel", L"Apartment"},
+
+            // libkram can decode any of these and create a thumbnail
+            // The Vista GUID for the thumbnail handler Shell extension is E357FCCD-A995-4576-B01F-234630154E96.
+            {HKEY_CURRENT_USER, L"Software\\Classes\\.ktx",                                                  L"PerceivedType",  L"image"},
+            {HKEY_CURRENT_USER, L"Software\\Classes\\.ktx\\ShellEx\\{e357fccd-a995-4576-b01f-234630154e96}", nullptr,           SZ_CLSID_KramTHUMBHANDLER},
+            {HKEY_CURRENT_USER, L"Software\\Classes\\.ktx2",                                                  L"PerceivedType",  L"image"},
+            {HKEY_CURRENT_USER, L"Software\\Classes\\.ktx2\\ShellEx\\{e357fccd-a995-4576-b01f-234630154e96}", nullptr,           SZ_CLSID_KramTHUMBHANDLER},
+            {HKEY_CURRENT_USER, L"Software\\Classes\\.dds",                                                  L"PerceivedType",  L"image"},
+            {HKEY_CURRENT_USER, L"Software\\Classes\\.dds\\ShellEx\\{e357fccd-a995-4576-b01f-234630154e96}", nullptr,           SZ_CLSID_KramTHUMBHANDLER},
+            //{HKEY_CURRENT_USER, L"Software\\Classes\\.png",                                                  L"PerceivedType", L"image"},
+            //{HKEY_CURRENT_USER, L"Software\\Classes\\.png\\ShellEx\\{e357fccd-a995-4576-b01f-234630154e96}", nullptr, SZ_CLSID_KramTHUMBHANDLER},
+        };
+
+        hr = S_OK;
+        for (size_t i = 0; i < std::size(registryEntries) && SUCCEEDED(hr); ++i) {
+            hr = CreateRegKeyAndSetValue(&registryEntries[i]);
+        }
+    }
+
+    if (SUCCEEDED(hr)) {
+        // This tells the shell to invalidate the thumbnail cache.  This is important because any .qoi files
+        // viewed before registering this handler would otherwise show cached blank thumbnails.
+        SHChangeNotify(SHCNE_ASSOCCHANGED, SHCNF_IDLIST, nullptr, nullptr);
+    }
+
+    return hr;
+}
+
+// Unregisters this COM server
+STDAPI DllUnregisterServer() {
+    HRESULT hr = S_OK;
+
+    const PCWSTR regKeys[] = {
+        L"Software\\Classes\\CLSID\\" SZ_CLSID_KramTHUMBHANDLER,
+        L"Software\\Classes\\.ktx",
+        L"Software\\Classes\\.ktx2",
+        L"Software\\Classes\\.dds",
+       // L"Software\\Classes\\.png", // only need this if Win png bg is bad
+    };
+
+    // Delete the registry entries
+    for (size_t i = 0; i < std::size(regKeys) && SUCCEEDED(hr); ++i) {
+        hr = HRESULT_FROM_WIN32(RegDeleteTreeW(HKEY_CURRENT_USER, regKeys[i]));
+        if (hr == HRESULT_FROM_WIN32(ERROR_FILE_NOT_FOUND)) {
+            // If the registry entry has already been deleted, say S_OK.
+            hr = S_OK;
+        }
+    }
+
+    return hr;
+}
diff --git a/kram-thumb-win/Dll.rc b/kram-thumb-win/Dll.rc
new file mode 100644
index 00000000..0d950c39
--- /dev/null
+++ b/kram-thumb-win/Dll.rc
@@ -0,0 +1,38 @@
+#include <winres.h>
+
+#ifndef DEBUG
+#define VER_DEBUG                   0
+#else
+#define VER_DEBUG                   VS_FF_DEBUG
+#endif
+
+VS_VERSION_INFO VERSIONINFO
+FILEVERSION     0,0,2,0
+PRODUCTVERSION  0,0,2,0
+FILEFLAGSMASK   VS_FFI_FILEFLAGSMASK
+FILEFLAGS       VER_DEBUG
+FILEOS          VOS__WINDOWS32
+FILETYPE        VFT_DLL
+FILESUBTYPE     VFT2_UNKNOWN
+BEGIN
+    BLOCK "StringFileInfo"
+    BEGIN
+        BLOCK "040904E4"
+        BEGIN
+            VALUE "CompanyName",      "ba"
+            VALUE "FileDescription",  "Kram Thumbnail Provider"
+            VALUE "FileVersion",      "0.0.2.0"
+            VALUE "InternalName",     "KramThumbProvider.dll"
+            VALUE "LegalCopyright",   "2023, Alec Miller"
+            VALUE "LegalTrademarks1", ""
+            VALUE "LegalTrademarks2", ""
+            VALUE "OriginalFilename", "KramThumbProvider.dll"
+            VALUE "ProductName",      "KramThumbProvider"
+            VALUE "ProductVersion",   "0, 0, 2, 0"
+        END
+    END
+    BLOCK "VarFileInfo"
+    BEGIN
+        VALUE "Translation", 0x409, 1200
+    END
+END
\ No newline at end of file
diff --git a/kram-thumb-win/KramThumbProvider.cpp b/kram-thumb-win/KramThumbProvider.cpp
new file mode 100644
index 00000000..909607ce
--- /dev/null
+++ b/kram-thumb-win/KramThumbProvider.cpp
@@ -0,0 +1,346 @@
+﻿#include "KramLib.h"
+
+#include <shlwapi.h>
+#include <thumbcache.h> // For IThumbnailProvider.
+#include <wrl/client.h> // For ComPtr
+#include <new>
+#include <atomic>
+#include <vector>
+
+using namespace kram;
+using namespace std;
+
+template <typename T>
+using ComPtr = Microsoft::WRL::ComPtr<T>;
+
+// This thumbnail provider implements IInitializeWithStream to enable being hosted
+// in an isolated process for robustness.
+//
+// This will build to a DLL
+// reg:   regsrv32.exe KramThumbProvider.dll
+// unreg: regsrv32.exe /u KramThumbProvider.dll
+
+inline void* KLOGF(uint32_t code, const char* format, ...)
+{
+    string str;
+
+    va_list args;
+    va_start(args, format);
+    /* int32_t len = */ append_vsprintf(str, format, args);
+    va_end(args);
+
+    // log here, so it can see it in Console.  But this never appears.
+    // How are you supposed to debug failures?  Resorted to passing a unique code into this call.
+    // It wasn't originally supposed to generate an NSError
+    // NSLog(@"%s", str.c_str());
+
+    // Console prints this as <private>, so what's the point of producing a localizedString ?
+    // This doesn't seem to work to Console app, but maybe if logs are to terminal
+    // sudo log config --mode "level:debug" --subsystem com.ba.kramv
+
+    //NSString* errorText = [NSString stringWithUTF8String:str.c_str()];
+    // return [NSError errorWithDomain:@"com.ba.kramv" code:code userInfo:@{NSLocalizedDescriptionKey : errorText}];
+    return nullptr;
+}
+
+struct ImageToPass {
+    KTXImage image;
+    KTXImageData imageData;
+};
+
+class KramThumbProvider final : public IInitializeWithStream, public IThumbnailProvider 
+{
+public:
+    KramThumbProvider()
+        : mReferences(1)
+        , mStream{} {
+    }
+
+    virtual ~KramThumbProvider() {
+    }
+
+    // IUnknown
+    IFACEMETHODIMP QueryInterface(REFIID riid, void** ppv) {
+        static const QITAB qit[] = {
+            QITABENT(KramThumbProvider, IInitializeWithStream),
+            QITABENT(KramThumbProvider, IThumbnailProvider),
+            { 0 },
+        };
+        return QISearch(this, qit, riid, ppv);
+    }
+
+    IFACEMETHODIMP_(ULONG) AddRef() {
+        return ++mReferences;
+    }
+
+    IFACEMETHODIMP_(ULONG) Release() {
+        long refs = --mReferences;
+        if (!refs) {
+            delete this;
+        }
+        return refs;
+    }
+
+    // IInitializeWithStream
+    IFACEMETHODIMP Initialize(IStream* pStream, DWORD /*grfMode*/) {
+        HRESULT hr = E_UNEXPECTED;  // can only be inited once
+        if (!mStream) {
+            // take a reference to the stream if we have not been inited yet
+            hr = pStream->QueryInterface(mStream.ReleaseAndGetAddressOf());
+        }
+        return hr;
+    }
+
+    // IThumbnailProvider
+    IFACEMETHODIMP GetThumbnail(UINT cx, HBITMAP* phbmp, WTS_ALPHATYPE* pdwAlpha) {
+       
+        // read from stream and create a thumbnail
+        if (!ImageToHBITMAP(cx, phbmp)) {
+            return E_OUTOFMEMORY;
+        }
+
+        // always 4 channels
+        *pdwAlpha = WTSAT_ARGB;
+         
+        return S_OK;
+    }
+
+private:
+    bool ImageToHBITMAP(uint32_t maxSize, HBITMAP* phbmp)
+    {
+        if (!mStream)
+            return false;
+
+        // only know that we have a stream
+        const char* filename = "";
+
+        ULARGE_INTEGER streamSizeUint = {};
+        IStream_Size(mStream.Get(), &streamSizeUint);
+        size_t streamSize = (size_t)streamSizeUint.QuadPart;
+
+        // TODO: for now read the entire stream in, but eventually test the first 4-6B for type
+        vector<uint8_t> streamData;
+        streamData.resize(streamSize);
+        ULONG bytesRead = 0;
+        HRESULT hr = mStream->Read(streamData.data(), streamSize, &bytesRead);  // can only read ULONG
+        if (FAILED(hr) || streamSize != bytesRead)
+            return false;
+
+
+        // https://learn.microsoft.com/en-us/windows/win32/api/thumbcache/nf-thumbcache-ithumbnailprovider-getthumbnail
+
+        std::shared_ptr<ImageToPass> imageToPass = std::make_shared<ImageToPass>();
+        TexEncoder decoderType = kTexEncoderUnknown;
+        uint32_t imageWidth, imageHeight;
+
+        {
+            KTXImage& image = imageToPass->image;
+            KTXImageData& imageData = imageToPass->imageData;
+
+            if (!imageData.open(streamData.data(), streamData.size(), image)) {
+                KLOGF(2, "kramv %s could not open file\n", filename);
+                return false;
+            }
+
+            // This will set decoder
+            auto textureType = MyMTLTextureType2D;  // image.textureType
+            if (!validateFormatAndDecoder(textureType, image.pixelFormat, decoderType)) {
+                KLOGF(3, "format decode only supports ktx and ktx2 output");
+                return false;
+            }
+
+            imageWidth = NAMESPACE_STL::max(1U, image.width);
+            imageHeight = NAMESPACE_STL::max(1U, image.height);
+        }
+
+        // This is retina factor
+        //float requestScale = 1.0;  // request.scale;
+
+        // One of the sides must match maximumSize, but can have
+        // different aspect ratios below that on a given sides.
+        struct NSSize {
+            float width;
+            float height;
+        };
+        NSSize contextSize = { (float)maxSize, (float)maxSize };  
+
+        // compute w/h from aspect ratio of image
+        float requestWidth, requestHeight;
+
+        float imageAspect = imageWidth / (float)imageHeight;
+        if (imageAspect >= 1.0f) {
+            requestWidth = contextSize.width;
+            requestHeight = std::clamp((contextSize.width / imageAspect), 1.0f, contextSize.height);
+        }
+        else {
+            requestWidth = std::clamp((contextSize.height * imageAspect), 1.0f, contextSize.width);
+            requestHeight = contextSize.height;
+        }
+
+        // will be further scaled by requestScale (only on macOS)
+        contextSize.width = requestWidth;
+        contextSize.height = requestHeight;
+
+        //-----------------
+
+        KTXImage& image = imageToPass->image;
+
+        bool isPremul = image.isPremul();
+        bool isSrgb = isSrgbFormat(image.pixelFormat);
+
+        // unpack a level to get the blocks
+        uint32_t mipNumber = 0;
+        uint32_t mipCount = image.mipCount();
+
+        uint32_t w, h, d;
+        for (uint32_t i = 0; i < mipCount; ++i) {
+            image.mipDimensions(i, w, h, d);
+            if (w > contextSize.width || h > contextSize.height) {
+                mipNumber++;
+            }
+        }
+
+        // clamp to smallest
+        mipNumber = std::min(mipNumber, mipCount - 1);
+        image.mipDimensions(mipNumber, w, h, d);
+
+        //-----------------
+
+        uint32_t chunkNum = 0;  // TODO: could embed chunk(s) to gen thumbnail from, cube/array?
+        uint32_t numChunks = image.totalChunks();
+
+        vector<uint8_t> mipData;
+
+        // now decode the blocks in that chunk to Color
+        if (isBlockFormat(image.pixelFormat)) {
+            // then decode any blocks to rgba8u, not dealing with HDR formats yet
+            uint64_t mipLength = image.mipLevels[mipNumber].length;
+
+            if (image.isSupercompressed()) {
+                const uint8_t* srcData = image.fileData + image.mipLevels[mipNumber].offset;
+
+                mipData.resize(mipLength * numChunks);
+                uint8_t* dstData = mipData.data();
+                if (!image.unpackLevel(mipNumber, srcData, dstData)) {
+                    // KLOGF("kramv %s failed to unpack mip\n", filename);
+                    return false;
+                }
+
+                // now extract the chunk for the thumbnail out of that level
+                if (numChunks > 1) {
+                    macroUnusedVar(chunkNum);
+                    assert(chunkNum == 0);
+
+                    // this just truncate to chunk 0 instead of copying chunkNum first
+                    mipData.resize(mipLength);
+                }
+            }
+            else {
+                // this just truncate to chunk 0 instead of copying chunkNum first
+                mipData.resize(mipLength);
+
+                const uint8_t* srcData = image.fileData + image.mipLevels[mipNumber].offset;
+
+                memcpy(mipData.data(), srcData, mipLength);
+            }
+
+            KramDecoder decoder;
+            KramDecoderParams params;
+            params.decoder = decoderType;
+
+            // TODO: should honor swizzle in the ktx image
+            // TODO: probaby need an snorm rgba format to convert the snorm versions, so they're not all red
+            // if sdf, will be signed format and that will stay red
+
+            switch (image.pixelFormat) {
+                // To avoid showing single channel content in red, replicate to rgb
+                case MyMTLPixelFormatBC4_RUnorm:
+                case MyMTLPixelFormatEAC_R11Unorm:
+                    params.swizzleText = "rrr1";
+                    break;
+
+                default:
+                    break;
+            }
+
+            vector<uint8_t> dstMipData;
+
+            // only space for one chunk for now
+            dstMipData.resize(h * w * sizeof(Color));
+
+            // want to just decode one chunk of the level that was unpacked abovve
+            if (!decoder.decodeBlocks(w, h, mipData.data(), (int32_t)mipData.size(), image.pixelFormat, dstMipData, params)) {
+                // Can't return NSError
+                // error = KLOGF("kramv %s failed to decode blocks\n", filename);
+                return false;
+            }
+
+            // copy over original encoded data
+            mipData = dstMipData;
+        }
+        else if (isExplicitFormat(image.pixelFormat)) {
+            // explicit formats like r/rg/rgb and 16f/32F need to be converted to rgba8 here
+            // this should currently clamp, but could do range tonemap, see Image::convertToFourChannel()
+            // but this needs to be slightly different.  This will decompress mip again
+
+            Image image2D;
+            if (!image2D.loadThumbnailFromKTX(image, mipNumber)) {
+                // KLOGF("kramv %s failed to convert image to 4 channels\n", filename);
+                return false;
+            }
+
+            // copy from Color back to uint8_t
+            uint32_t mipSize = h * w * sizeof(Color);
+            mipData.resize(mipSize);
+            memcpy(mipData.data(), image2D.pixels().data(), mipSize);
+        }
+        
+
+    
+        //---------------------
+        
+        // create a bitmap, and allocate memory for the pixels
+        BITMAPINFO bmi = {};
+        bmi.bmiHeader.biSize = sizeof(bmi.bmiHeader);
+        bmi.bmiHeader.biWidth = static_cast<LONG>(w);
+        bmi.bmiHeader.biHeight = -static_cast<LONG>(h);  // -h to be top-down
+        bmi.bmiHeader.biPlanes = 1;
+        bmi.bmiHeader.biBitCount = 32;
+        bmi.bmiHeader.biCompression = BI_RGB;  // TODO: use BI_PNG to shrink thumbnails
+
+        Color* dstPixels = nullptr;
+        HBITMAP hbmp = CreateDIBSection(nullptr, &bmi, DIB_RGB_COLORS, reinterpret_cast<void**>(&dstPixels), nullptr, 0);
+        if (!hbmp) {
+            return false;
+        }
+
+        // copy into bgra image (swizzle b and r).
+        const Color* srcPixels = (const Color*)mipData.data();
+        // copy pixels over and swap RGBA -> BGRA
+        const uint32_t numPixels = w * h;
+        for (uint32_t i = 0; i < numPixels; ++i) {
+            // TODO: use uint32_t to do component swizzle
+            dstPixels[i].b = srcPixels[i].r;
+            dstPixels[i].g = srcPixels[i].g;
+            dstPixels[i].r = srcPixels[i].b;
+            dstPixels[i].a = srcPixels[i].a;
+        }
+
+        *phbmp = hbmp;
+        return true;
+    }
+
+private:
+    std::atomic_long    mReferences;
+    ComPtr<IStream>     mStream;     // provided during initialization.
+};
+
+HRESULT KramThumbProvider_CreateInstance(REFIID riid, void** ppv) {
+    KramThumbProvider* provider = new (std::nothrow) KramThumbProvider();
+    HRESULT hr = provider ? S_OK : E_OUTOFMEMORY;
+    if (SUCCEEDED(hr)) {
+        hr = provider->QueryInterface(riid, ppv);
+        provider->Release();
+    }
+    return hr;
+}
\ No newline at end of file
diff --git a/kram-thumb-win/KramThumbProvider.def b/kram-thumb-win/KramThumbProvider.def
new file mode 100644
index 00000000..eb96f9fe
--- /dev/null
+++ b/kram-thumb-win/KramThumbProvider.def
@@ -0,0 +1,6 @@
+EXPORTS
+    DllGetClassObject       PRIVATE
+    DllCanUnloadNow         PRIVATE
+    DllRegisterServer       PRIVATE
+    DllUnregisterServer     PRIVATE
+    DllMain                 PRIVATE
diff --git a/kram-thumb-win/LICENSE b/kram-thumb-win/LICENSE
new file mode 100644
index 00000000..1fce8ec0
--- /dev/null
+++ b/kram-thumb-win/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2021 iOrange
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/kram-thumb-win/resource.h b/kram-thumb-win/resource.h
new file mode 100644
index 00000000..4494a87d
--- /dev/null
+++ b/kram-thumb-win/resource.h
@@ -0,0 +1,18 @@
+//{{NO_DEPENDENCIES}}
+// Microsoft Visual C++ generated include file.
+// Used by Dll.rc
+//
+#define VER_DEBUG                       0
+#define VS_VERSION_INFO                 1
+#define IDC_STATIC                      -1
+
+// Next default values for new objects
+// 
+#ifdef APSTUDIO_INVOKED
+#ifndef APSTUDIO_READONLY_SYMBOLS
+#define _APS_NEXT_RESOURCE_VALUE        101
+#define _APS_NEXT_COMMAND_VALUE         40001
+#define _APS_NEXT_CONTROL_VALUE         1000
+#define _APS_NEXT_SYMED_VALUE           101
+#endif
+#endif
diff --git a/kram-thumb/KramThumbnailProvider.mm b/kram-thumb/KramThumbnailProvider.mm
index b190181b..d9e09d54 100644
--- a/kram-thumb/KramThumbnailProvider.mm
+++ b/kram-thumb/KramThumbnailProvider.mm
@@ -67,7 +67,7 @@ - (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest *)request complet
         KTXImageData& imageData = imageToPass->imageData;
         
         if (!imageData.open(filename, image)) {
-            error = KLOGF(2, "kramv %s coould not open file\n", filename);
+            error = KLOGF(2, "kramv %s could not open file\n", filename);
             handler(nil, error);
             return;
         }
diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index 7d1d5989..f4fc5edb 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -277,6 +277,7 @@ target_precompile_headers(${myTargetLib} PRIVATE
 )
 
 # public
+# TODO: these should not be in quotes?
 target_compile_definitions(${myTargetLib} 
     PUBLIC
     "-DCOMPILE_EASTL=${COMPILE_EASTL}"
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 0077c104..c985276c 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -21,7 +21,8 @@
 #include "KramImage.h"  // has config defines, move them out
 #include "KramMmapHelper.h"
 #include "KramTimer.h"
-#include "KramVersion.h"
+#define KRAM_VERSION "1.0"
+//#include "KramVersion.h"
 #include "TaskSystem.h"
 #include "lodepng.h"
 #include "miniz.h"

From bf45f590a4896db32cb12bf442d2ec1ae0afdc47 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecazam@users.noreply.github.com>
Date: Thu, 4 May 2023 14:31:29 -0700
Subject: [PATCH 527/901] Update README.md

---
 README.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/README.md b/README.md
index afc1bdc6..7f5780cb 100644
--- a/README.md
+++ b/README.md
@@ -2,6 +2,9 @@
 # kram, kram.exe
 C++11 main to libkram to create CLI tool.  Encode/decode/info on PNG/KTX/KTX2/DDS files with LDR/HDR and BC/ASTC/ETC2.  Runs on macOS/win.
 
+# kram-thumb-win.dll
+Windows thumbnailer for DDS/KTX/KTX2.  Go to build or bin folder.  Install with "regsrv32.exe kram-thumb-win.dll".  Uninstall with "regsrv32.exe /u kram-thumb-win.dll"
+
 # libkram.a, libkram-ios.a, kram.lib
 C++11 library from 200 to 800KB in size depending on encoder options.  Compiles for iOS (ARM), macOS (ARM/Intel), win (Intel).
 

From 1d6f6e290c36e97920d0fa5ecda54a071cc85424 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecazam@users.noreply.github.com>
Date: Thu, 4 May 2023 14:32:57 -0700
Subject: [PATCH 528/901] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 7f5780cb..1be9048c 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@
 C++11 main to libkram to create CLI tool.  Encode/decode/info on PNG/KTX/KTX2/DDS files with LDR/HDR and BC/ASTC/ETC2.  Runs on macOS/win.
 
 # kram-thumb-win.dll
-Windows thumbnailer for DDS/KTX/KTX2.  Go to build or bin folder.  Install with "regsrv32.exe kram-thumb-win.dll".  Uninstall with "regsrv32.exe /u kram-thumb-win.dll"
+Windows thumbnailer for DDS/KTX/KTX2.  Go to build or bin folder.  Install with "regsvr32.exe kram-thumb-win.dll".  Uninstall with "regsvr32.exe /u kram-thumb-win.dll"
 
 # libkram.a, libkram-ios.a, kram.lib
 C++11 library from 200 to 800KB in size depending on encoder options.  Compiles for iOS (ARM), macOS (ARM/Intel), win (Intel).

From a946830cf966327c44f74e796924abad6762aa59 Mon Sep 17 00:00:00 2001
From: alecazam123 <alecmiller@yahoo.com>
Date: Thu, 4 May 2023 16:44:14 -0700
Subject: [PATCH 529/901] kram - blend thumbnails to opaque black.

---
 kram-thumb-win/Dll.cpp               |  2 +-
 kram-thumb-win/KramThumbProvider.cpp | 20 +++++++++++++++++---
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/kram-thumb-win/Dll.cpp b/kram-thumb-win/Dll.cpp
index bfe8452f..301ff587 100644
--- a/kram-thumb-win/Dll.cpp
+++ b/kram-thumb-win/Dll.cpp
@@ -86,7 +86,7 @@ class CClassFactory : public IClassFactory {
         return hr;
     }
 
-    CClassFactory(PFNCREATEINSTANCE pfnCreate)
+    virtual CClassFactory(PFNCREATEINSTANCE pfnCreate)
         : mReferences(1)
         , mCreateFunc(pfnCreate) {
         DllAddRef();
diff --git a/kram-thumb-win/KramThumbProvider.cpp b/kram-thumb-win/KramThumbProvider.cpp
index 909607ce..30b31894 100644
--- a/kram-thumb-win/KramThumbProvider.cpp
+++ b/kram-thumb-win/KramThumbProvider.cpp
@@ -314,16 +314,30 @@ class KramThumbProvider final : public IInitializeWithStream, public IThumbnailP
             return false;
         }
 
+        // TODO: super tiny icons like 2x2 or 4x4 look terrible.  Windows says it never upsamples textures
+        // but a 2x2 thumbnail inside a 32x32 thumbnail isn't visible.  Apple does the right thing and upsamples.
+
         // copy into bgra image (swizzle b and r).
         const Color* srcPixels = (const Color*)mipData.data();
         // copy pixels over and swap RGBA -> BGRA
         const uint32_t numPixels = w * h;
         for (uint32_t i = 0; i < numPixels; ++i) {
             // TODO: use uint32_t to do component swizzle
-            dstPixels[i].b = srcPixels[i].r;
-            dstPixels[i].g = srcPixels[i].g;
             dstPixels[i].r = srcPixels[i].b;
-            dstPixels[i].a = srcPixels[i].a;
+            dstPixels[i].g = srcPixels[i].g;
+            dstPixels[i].b = srcPixels[i].r;
+
+            // setting to 1 for premul is equivalent of blend to opaque black
+            dstPixels[i].a = 255;
+             
+            if (!isPremul) {
+                uint32_t alpha = srcPixels[i].a;
+                if (alpha < 255) {
+                    dstPixels[i].r = (dstPixels[i].r * alpha) / 255;
+                    dstPixels[i].g = (dstPixels[i].g * alpha) / 255;
+                    dstPixels[i].b = (dstPixels[i].b * alpha) / 255;
+                }
+            }
         }
 
         *phbmp = hbmp;

From 95b061106b94365b9f527dc789a1e0dcfd846c70 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 4 May 2023 17:03:55 -0700
Subject: [PATCH 530/901] kram - fix win thumbs

---
 kram-thumb-win/Dll.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/kram-thumb-win/Dll.cpp b/kram-thumb-win/Dll.cpp
index 301ff587..7d13f5ac 100644
--- a/kram-thumb-win/Dll.cpp
+++ b/kram-thumb-win/Dll.cpp
@@ -86,12 +86,16 @@ class CClassFactory : public IClassFactory {
         return hr;
     }
 
-    virtual CClassFactory(PFNCREATEINSTANCE pfnCreate)
+    CClassFactory(PFNCREATEINSTANCE pfnCreate)
         : mReferences(1)
         , mCreateFunc(pfnCreate) {
         DllAddRef();
     }
 
+    virtual ~CClassFactory() {
+        DllRelease();
+    }
+
     // IUnknown
     IFACEMETHODIMP QueryInterface(REFIID riid, void** ppv) {
         static const QITAB qit[] = {
@@ -128,10 +132,6 @@ class CClassFactory : public IClassFactory {
     }
 
 private:
-    ~CClassFactory() {
-        DllRelease();
-    }
-
     std::atomic_long    mReferences;
     PFNCREATEINSTANCE   mCreateFunc;
 };

From f58ee5b479557ed43f2ac1c10a107dee5662d272 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecazam@users.noreply.github.com>
Date: Sun, 7 May 2023 12:50:27 -0700
Subject: [PATCH 531/901] Update README.md

---
 README.md | 42 +++++++++++++++++++++++++++---------------
 1 file changed, 27 insertions(+), 15 deletions(-)

diff --git a/README.md b/README.md
index 1be9048c..46fe8ac2 100644
--- a/README.md
+++ b/README.md
@@ -2,9 +2,6 @@
 # kram, kram.exe
 C++11 main to libkram to create CLI tool.  Encode/decode/info on PNG/KTX/KTX2/DDS files with LDR/HDR and BC/ASTC/ETC2.  Runs on macOS/win.
 
-# kram-thumb-win.dll
-Windows thumbnailer for DDS/KTX/KTX2.  Go to build or bin folder.  Install with "regsvr32.exe kram-thumb-win.dll".  Uninstall with "regsvr32.exe /u kram-thumb-win.dll"
-
 # libkram.a, libkram-ios.a, kram.lib
 C++11 library from 200 to 800KB in size depending on encoder options.  Compiles for iOS (ARM), macOS (ARM/Intel), win (Intel).
 
@@ -12,13 +9,20 @@ C++11 library from 200 to 800KB in size depending on encoder options.  Compiles
 Parses HLSL syntax and generates readable HLSL/MSL code without transpiling.  DXC is then used to compile to spirv.
 https://github.com/alecazam/kram/tree/main/hlslparser
 
+# kram-thumb-win.dll
+Windows thumbnailer for DDS/KTX/KTX2.  Go to build or bin folder.  Install with "regsvr32.exe kram-thumb-win.dll".  Uninstall with "regsvr32.exe /u kram-thumb-win.dll"
+
+https://github.com/alecazam/kram/tree/main/kram-thumb-win
+
+https://github.com/iOrange/QOIThumbnailProvider
+
 # kramv.app
-ObjC++ Viewer for PNG/KTX/KTX2/DDS supported files from kram.  530KB in size.  Uses Metal compute and shaders, eyedropper, grids, debugging, preview.  Supports HDR and all texture types.  Mip, face, and array access.  No dmg yet, just drop onto /Applications folder.  Runs on macOS (ARM/Intel).
+ObjC++ viewer for PNG/KTX/KTX2/DDS supported files from kram.  Uses Metal compute and shaders, eyedropper, grids, debugging, preview.  Supports HDR and all texture types.  Mip, face, volume, and array access.  No dmg yet, just drop onto /Applications folder.  Runs on macOS (arm64/x64).  Generates Finder thumbnails and QuickLook previews via modern macOS app extension mechanisms.
 
 Diagrams and screenshots can be located here:
 https://www.figma.com/file/bPmPSpBGTi2xTVnBDqVEq0/kram
 
-#### Releases includes builds for macOS (Xcode 12.3 - arm64/x64) and Windows x64 (VS 2019 - x64).  kramv for macOS, kram for macOS/Win, libkram for macOS/iOS/Win.  Android library via NDK is possible, but f16 support is spotty on devices.
+#### Releases includes builds for macOS (Xcode 14.3 - arm64/x64/clang) and Windows x64 (VS 2022 - x64/clang).  kramv for macOS, kram for macOS/Win, libkram for macOS/iOS/Win, win-thumb-kram for Win.  Android library via NDK is possible, but f16 support is spotty on devices.
 
 ### About kram
 kram is a wrapper to several popular encoders.  Most encoders have sources, and have been optimized to use very little memory and generate high quality encodings at all settings.  All kram encoders are currently CPU-based.  Some of these encoders use SSE, and a SSE to Neon layer translates those.  kram was built to be small and used as a library or app.  It's also designed for mobile and desktop use.  The final size with all encoders is under 1MB, and disabling each encoder chops off around 200KB down to a final 200KB app size via dead-code stripping.  The code should compile with C++11 or higher.
@@ -166,7 +170,7 @@ open build2/kram.xcworkspace
 ```
 
 
-kram was using CMake to setup the projects and build.  kramv.app, kram, and libkram are generated, but kramv.app and kram are stand-alone.  The library can be useful in apps that want to include the decoder, or runtime compression of gpu-generated data.
+kram was using CMake to setup the projects and build, but it doesn't support workspaces, clean, or the thumbnail/preview extension linking.  As a result, kramv.app, kram, and libkram are generated.  So I'm building kramv.app and everything with a custom Xcode project now.  The library can be useful in apps that want to include the decoder, or runtime compression of gpu-generated data.
 
 For Mac, the CMake build is out-of-source, and can be built from the command line, or debugged from the xcodeproj that is built.  Ninja and Makefiles can also be generated from cmake, but remember to trash the CMakeCache.txt file.
 
@@ -181,13 +185,13 @@ or
 cmake --install ../bin --config Release
 ```
 
-For Windows, CMake is still used. I tried to fix CMake to build the library into the app directory so the app is updated.  "Rebuild Solution" if your changes don't take effect, or if breakpoints stop being hit.
+For Windows, CMake is still used. CMake build libkram, kramc, and kram-thumb-win.dll. This uses the clang compiler and x64 only.
 
 ```
 mkdir build
-cmake .. -G "Visual Studio 15 2017 Win64" 
+cmake .. -G "Visual Studio 16 2019" -T ClangCL -A x64
 or
-cmake .. -G "Visual Studio 16 2019" -A x64
+cmake .. -G "Visual Studio 17 2022" -T ClangCL -A x64
 
 cmake --build . --config Release
 or
@@ -230,7 +234,7 @@ Win
 python3.exe -m pip install -U pip
 python3.exe -m pip install -r ../scripts/requirements.txt
 
-# this uses 8 processes, and bundles the results to a zip file
+# Gen ktx using 8 processes, and bundles the results to a zip file
 ../scripts/kramTextures.py --jobs 8 -p android --bundle
 
 # this writes out a script of all commands and runs on threads in a single process
@@ -238,13 +242,15 @@ python3.exe -m pip install -r ../scripts/requirements.txt
 ../scripts/kramTextures.py --jobs 8 -p mac --script --force 
 ../scripts/kramTextures.py --jobs 8 -p win --script --force 
 
-# To move towards supercompressed ktx2 files, the following flags convert ktx output to ktx2
+# Generate ktx2 output, and then bundle them into a zip
+../scripts/kramTextures.py -p any -c ktx2 --bundle
+../scripts/kramTextures.py -p android -c ktx2 --bundle --check
 
-# if ktxsc and ktx2ktx2 are present in the path, then these scripts generate ktx2 output, and then bundle them into a zip
-../scripts/kramTextures.py -p any --ktx2 --bundle
-../scripts/kramTextures.py -p android --ktx2 --bundle --check
+# Generate dds output, and then bundle them into a zip
+../scripts/kramTextures.py -p any -c dds --bundle
+../scripts/kramTextures.py -p android -c dds --bundle --check
 
-# if ktxsc and ktx2ktx2 are present in the path, this runs kramTextures across all platforms, requires ktx2ktx2
+# Generate textures for all platforms
 ../scripts/kramTests.sh 
 
 ```
@@ -332,6 +338,12 @@ kram includes additional open-source:
 | miniz	         | Rich Gelreich      | Unlicense | bundle support via zip    |
 | gltfKit        | Warren Moore       | MIT       | gltf decoder/renderer     |
 
+kram-thumb-win.dll addtional open-source
+
+| Library        | Author             | License   | Purpose                   |
+|----------------|--------------------|-----------|---------------------------|
+| QOI thumbnails | iOrange            | MIT       | win thumbnailer           |
+
 #### Open source changes
 
 * lodepng - altered header paths.

From cfc6dcb766cab349fa6e2ceea1b22255c73bccb9 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecazam@users.noreply.github.com>
Date: Sun, 7 May 2023 12:59:19 -0700
Subject: [PATCH 532/901] Update README.md

---
 hlslparser/README.md | 61 +++++---------------------------------------
 1 file changed, 6 insertions(+), 55 deletions(-)

diff --git a/hlslparser/README.md b/hlslparser/README.md
index 3ebd000d..e33b5ec1 100644
--- a/hlslparser/README.md
+++ b/hlslparser/README.md
@@ -1,12 +1,17 @@
 HLSLParser
 ==========
 
-This version of thekla/hlslparser takes a HLSL2021 syntax that then converts that into modern HLSL and MSL.  Special thanks to Max McGuire (@Unknown Worlds) and Ignacio Castano and Johnathan Blow (@Thekla) for releasing this as open-source.  I've left out GLSL and DX9 and FX legacy codegen to simplify maintaining the codebase.  This is a small amount of code compared with the Krhonos shader tools.
+This version of thekla/hlslparser takes a HLSL2021 syntax that then converts that into modern HLSL and MSL.  Special thanks to Max McGuire (@Unknown Worlds) and Ignacio Castano and Jonathan Blow (@Thekla) for releasing this as open-source.  I've left out GLSL and DX9 and FX legacy codegen to simplify maintaining the codebase.  This is a small amount of code compared with the Krhonos shader tools.
 
 There are still growing pains to using hlslparser.  It can't do all the manipulation that glsc and spirv-cross does to ensure that a valid shader model shader is created.  But compiling with DXC/metal should help avoid issues.  I still don't know how to resolve reflection, since each compiler generates it's own unique data formats.  Reflection is simpler on the spirv path.
 
 The point of this hlslparser is to preserve comments, generate MSL/HLSL code close to the original sources, and be easy to extend.  MSL and HLSL are nearly the same shader language at the core.  Typical spirv to MSL transpiles look assembly-like in code flow.  spirv-opt and spirv-cross introduces 100's of temp registers into the code, gens 24 character floats, strips comments, can't translate half samplers, and the resulting code isn't simple to step through in Metal GPU capture.  At the same time, Apple ignores generating Spirv from MSL, so here we are.  Spirv should remain a final assembly format to feed to Vulkan drivers.
 
+Original sources found here.  See for more details:
+
+https://github.com/Thekla/hlslparser
+https://github.com/unknownworlds/hlslparser
+
 ---------------------------------
 
 Paths to turn HLSL and SPV 
@@ -442,57 +447,3 @@ Spirv
 * 1.1, vulkan1.0
 * 1.3, vulkan1.1
 * 1.5, vulkan1.2, target,
-
----------------------------------
-
-This is a fork of [Unknownworld's hlslparser](https://github.com/unknownworlds/hlslparser) adapted to our needs in [The Witness](http://the-witness.net). We currently use it to translate pseudo-HLSL shaders (using the legacy D3D9 syntax) to HLSL10 and Metal Shading Language (MSL). There's also a GLSL translator available that we do not use yet, but that is being maintained by community contributions.
-
-The HLSL parser has been extended with many HLSL10 features, but retaining the original HLSL C-based syntax.
-
-For example, the following functions in our HLSL dialect:
-
-```C
-float tex2Dcmp(sampler2DShadow s, float3 texcoord_comparevalue);
-float4 tex2DMSfetch(sampler2DMS s, int2 texcoord, int sample);
-int2 tex2Dsize(sampler2D s);
-```
-
-Are equivalent to these methods in HLSL10:
-
-```C++
-float Texture2D::SampleCmp(SamplerComparisonState s, float2 texcoord, float comparevalue);
-float4 Texture2DMS<float4>::Load(int2 texcoord, int sample);
-void Texture2D<float4>::GetDimensions(out uint w, out uint h);
-```
-
-
-
-Here are the original release notes:
-
-
-> HLSL Parser and GLSL code generator
->
-> This is the code we used in Natural Selection 2 to convert HLSL shader code to
-GLSL for use with OpenGL. The code is pulled from a larger codebase and has some
-dependencies which have been replaced with stubs. These dependencies are all very
-basic (array classes, memory allocators, etc.) so replacing them with our own
-equivalent should be simple if you want to use this code.
->
-> The parser is designed to work with HLSL code written in the legacy Direct3D 9
-style (e.g. D3DCOMPILE_ENABLE_BACKWARDS_COMPATIBILITY should be used with D3D11).
-The parser works with cbuffers for uniforms, so in addition to generating GLSL,
-there is a class provided for generating D3D9-compatible HLSL which doesn't
-support cbuffers. The GLSL code requires version 3.1 for support of uniform blocks.
-The parser is designed to catch all errors and generate "clean" GLSL which can
-then be compiled without any errors.
->
-> The HLSL parsing is done though a basic recursive descent parser coded by hand
-rather than using a parser generator. We believe makes the code easier to
-understand and work with.
->
-> To get consistent results from Direct3D and OpenGL, our engine renders in OpenGL
-"upside down". This is automatically added into the generated GLSL vertex shaders.
->
-> Although this code was written specifically for our use, we hope that it may be
-useful as an educational tool or a base for someone who wants to do something
-similar.

From 9c7a7581845e9e57e70d32c9bcf48a52ac2a71a4 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 13 May 2023 23:39:17 -0700
Subject: [PATCH 533/901] kram - add thumbnailer readme

---
 README.md                |  2 --
 kram-thumb-win/README.md | 31 +++++++++++++++++++++++++++++++
 2 files changed, 31 insertions(+), 2 deletions(-)
 create mode 100644 kram-thumb-win/README.md

diff --git a/README.md b/README.md
index 46fe8ac2..78633b1f 100644
--- a/README.md
+++ b/README.md
@@ -14,8 +14,6 @@ Windows thumbnailer for DDS/KTX/KTX2.  Go to build or bin folder.  Install with
 
 https://github.com/alecazam/kram/tree/main/kram-thumb-win
 
-https://github.com/iOrange/QOIThumbnailProvider
-
 # kramv.app
 ObjC++ viewer for PNG/KTX/KTX2/DDS supported files from kram.  Uses Metal compute and shaders, eyedropper, grids, debugging, preview.  Supports HDR and all texture types.  Mip, face, volume, and array access.  No dmg yet, just drop onto /Applications folder.  Runs on macOS (arm64/x64).  Generates Finder thumbnails and QuickLook previews via modern macOS app extension mechanisms.
 
diff --git a/kram-thumb-win/README.md b/kram-thumb-win/README.md
new file mode 100644
index 00000000..a400f203
--- /dev/null
+++ b/kram-thumb-win/README.md
@@ -0,0 +1,31 @@
+# kram-thumb-win.dll
+
+Windows thumbnailer for DDS/KTX/KTX2 containers in C++.  To use the thumbnailer:
+
+* Go to build or bin folder.  
+* Install "regsvr32.exe kram-thumb-win.dll".  
+* Uninstall "regsvr32.exe /u kram-thumb-win.dll"
+
+# About kram thumbnailer
+
+The thumbnailer runs the same libkram decoders that kramv uses for macOS.  This is an ancient Win7 thumbnailer that Microsoft last updated in Vista.  The Microsoft samples didn't work off github.  
+
+A sanitized stream of bytes is supplied by the Explorer thumbnail service to the dll, the dll decodes the image container to a single image, and then returns the closest mip as a bitmap.  The bitmap is assumed to be sRGB, but there are few details or settings.  Explorer caches the thumbnails.  Windows also seems to generate thumbnails when apps are tied to specific extensions.
+
+For some reason, Microsoft doesn't upscale small 2x2 thumbnails.  These show up as barely visible dots despite a request for a 32x32 pixel.  macOS does upscale these so they are viewable.
+
+These are the default thumbnail sizes that are subject to change.  Note that Microsoft bases dpi off multiples of 96, where macOS uses 72.
+
+* 32x32
+* 96x96
+* 256x256
+* 1024x1024
+
+Adapted from Microosoft sample code that iOrange cleaned up for QOI images. 
+
+https://github.com/iOrange/QOIThumbnailProvider
+
+This code doesn't work and the ability to run this as an exe don't seem to be present.
+
+https://learn.microsoft.com/en-us/samples/microsoft/windows-classic-samples/recipethumbnailprovider/
+

From 3b1b639c9b562a186a6417db0446606fc9a4552a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 12 Jul 2023 00:43:05 -0700
Subject: [PATCH 534/901] scripts - simplify kramTextures.py by getting rid of
 khronos support, add dds src/dst support

I don't really want to encourage dds usage, but often on consoles it's the only imported format (esp. ones from Microsoft)
Removed 'any' for testing uastc, but see this script history if you need it.
---
 hlslparser/src/Engine.h |   2 +-
 scripts/kramTextures.py | 122 ++++++++--------------------------------
 2 files changed, 23 insertions(+), 101 deletions(-)

diff --git a/hlslparser/src/Engine.h b/hlslparser/src/Engine.h
index 905d8788..ea3eeb2c 100755
--- a/hlslparser/src/Engine.h
+++ b/hlslparser/src/Engine.h
@@ -37,7 +37,7 @@ namespace M4 {
 // Engine/Allocator.h
 
 // This doesn't do placement new/delete, but is only
-// use to allocate NodePage and StringPool.  Then placement
+// used to allocate NodePage and StringPool.  Then placement
 // new/delete is called explicitly by say NewNode.  So
 // there default ctor variable initializers are safe to use.
 class Allocator {
diff --git a/scripts/kramTextures.py b/scripts/kramTextures.py
index c5444a20..7f494d2c 100755
--- a/scripts/kramTextures.py
+++ b/scripts/kramTextures.py
@@ -48,12 +48,6 @@ class TextureProcessor:
 
 	appKram = ""
 
-	appKtx2 = ""
-	appKtx2sc = ""
-	appKtx2check = ""
-	doUastc = False
-	doKTX2 = False
-	
 	# preset formats for a given platform
 	textureFormats = []
 
@@ -70,12 +64,14 @@ class TextureProcessor:
 
 	# so script can be killed
 	doExit = False
+	container = ""
 
-	def __init__(self, platform, appKram, maxCores, force, script, scriptFilename, textureFormats):
+	def __init__(self, platform, appKram, maxCores, force, script, scriptFilename, textureFormats, container):
 		self.platform = platform
 
 		self.appKram = appKram
 		self.textureFormats = textureFormats
+		self.container = container
 
 		if script:
 			self.doScript = True
@@ -162,16 +158,14 @@ def processTextureKram(self, srcPath, dstDir, srcModstamp):
 		# skip unrecognized extensions in the folder
 		srcRoot, srcExtension = os.path.splitext(srcPath)
 		srcExtension = srcExtension.lower()
-		if not (srcExtension == ".png" or srcExtension == ".ktx" or srcExtension == ".ktx2"):
+		if not (srcExtension == ".png" or srcExtension == ".ktx" or srcExtension == ".ktx2" or srcExtension == ".dds"):
 			if not srcPath.endswith(".DS_Store"):
 				print("skipping unknown extension on file {0}".format(srcPath))
 			return 0
 		
 		srcFilename = os.path.basename(srcRoot) # just the name no ext
 
-		ext = ".ktx"
-		if self.doKTX2:
-			ext = ".ktx2"
+		ext = "." + self.container
 		dstName = srcFilename 
 
 		# replace -h with -n, since it will be converted to a normal
@@ -220,7 +214,7 @@ def processTextureKram(self, srcPath, dstDir, srcModstamp):
 
 		# choice of none, zlib, or zstd
 		compressorText = ""
-		if self.doKTX2:
+		if self.container == "ktx2":
 			compressorText = " -zstd 0"
 
 		# this could work on 3d and cubearray textures, but for now only use on 2D textures
@@ -250,42 +244,6 @@ def processTextureKram(self, srcPath, dstDir, srcModstamp):
 			if timer > slowTextureTime:
 				print("perf: encode {0} took {1:.3f}s".format(dstName, timer))
 
-			
-			# TODO: split this off into another modstamp testing pass, and only do work if ktx is older than ktx2
-			# convert ktx -> ktx2, and zstd supercompress the mips, kram can read these and decompress
-			# for now, this is only possible when not scripted
-			# could read these in kram, and then execute them, or write these to another file
-			# and then execute that if script file suceeds
-			# if self.appKtx2:
-			# 	ktx2Filename = dstFile + "2"
-				
-			# 	# create the ktx2
-			# 	result = self.spawn(self.appKtx2 + " -f -o " + ktx2Filename + " " + dstFile)
-				
-			# 	# too bad this can't check ktx1...
-			# 	if self.appKtx2check != "" and result == 0:
-			# 		result = self.spawn(self.appKtx2check + " -q " + ktx2Filename)
-
-			# 	# can only zstd compress block encoded files, but can do BasisLZ on 
-			# 	#   explicit files.
-
-			# 	# overwrite it with supercompressed version
-			# 	# basis uastc supercompress - only if content isn't already block encoded, TODO: kramv and loader cannot read this
-			# 	# zstd supercompress - works on everything, kramv and loader can read this
-			# 	if self.appKtx2sc != "" and result == 0:
-			# 		if self.doUastc:
-			# 			result = self.spawn(self.appKtx2sc + " --uastc 2 --uastc_rdo_q 1.0 --zcmp 3 --threads 1 " + ktx2Filename)
-			# 		else:
-			# 			result = self.spawn(self.appKtx2sc + " --zcmp 3 --threads 1 " + ktx2Filename)
-
-			if self.doKTX2:
-				ktx2Filename = dstFile
-				
-				# double check supercompressed version, may not be necessary
-				if self.appKtx2check != "" and result == 0:
-			 		result = self.spawn(self.appKtx2check + " -q " + ktx2Filename)
-			
-
 		return result
 
 	def scandirRecursively(self, path):
@@ -372,52 +330,22 @@ def runMapInParallel(args):
 		return 0
 
 @click.command()
-@click.option('-p', '--platform', type=click.Choice(['ios', 'mac', 'win', 'android', 'any']), required=True, help="build platform")
-@click.option('-c', '--container', type=click.Choice(['ktx', 'ktx2']), default="ktx2", help="container type")
+@click.option('-p', '--platform', type=click.Choice(['ios', 'mac', 'win', 'android']), required=True, help="build platform")
+@click.option('-c', '--container', type=click.Choice(['ktx', 'ktx2', 'dds']), default="ktx2", help="container type")
 @click.option('-v', '--verbose', is_flag=True, help="verbose output")
 @click.option('-q', '--quality', default=49, type=click.IntRange(0, 100), help="quality affects encode speed")
 @click.option('-j', '--jobs', default=64, help="max physical cores to use")
 @click.option('--force', is_flag=True, help="force rebuild ignoring modstamps")
 @click.option('--script', is_flag=True, help="generate kram script and execute that")
-@click.option('--check', is_flag=True, help="check ktx2 files when generated")
 @click.option('--bundle', is_flag=True, help="bundle files by updating a zip file")
-def processTextures(platform, container, verbose, quality, jobs, force, script, check, bundle):
+def processTextures(platform, container, verbose, quality, jobs, force, script, bundle):
 	# output to multiple dirs by type
 
 	# eventually pass these in as strings, so script is generic
 	# Mac can handle this, but Win want's absolute path (or kram run from PATH)
 	binDir = "../bin/"
 	appKram = os.path.abspath(binDir + "kram")
-	
-	appKtx2 = ""
-	appKtx2sc = ""
-	appKtx2check = ""
-	doUastc = False
-
-	ktx2 = True
-	if container == "ktx":
-		ktx2 = False
-
-	# can convert ktx -> ktx2 files with zstd and Basis supercompression
-	# caller must have ktx2ktx2 and ktx2sc in path build from https://github.com/KhronosGroup/KTX-Software
-	if platform == "any":
-		ktx2 = True
-		doUastc = True
-
-	if ktx2:
-		# have to run check script after generating, or have to convert ktx to ktx2
-		# so that's why these disable scripting
-		if doUastc or check:
-			script = False
-
-		# these were for converting ktx output from kram to ktx2, and for uastc from original png
-		appKtx2 = "ktx2ktx2"
-		appKtx2sc ="ktxsc"
-
-		# this is a validator app
-		if check:
-			appKtx2check = "ktx2check"
-	
+
 	# abspath strips the trailing slash off - ugh
 	srcDirBase = os.path.abspath("../tests/src/")
 	srcDirBase += "/"
@@ -545,19 +473,8 @@ def processTextures(platform, container, verbose, quality, jobs, force, script,
 
 	result = 0
 		
-	processor = TextureProcessor(platform, appKram, maxCores, force, script, scriptFile, formats)
-	if ktx2:
-		processor.doKTX2 = ktx2
-
-		# used to need all of these apps to gen ktx2, but can gen directly from kram now
-		# leaving these to test aastc case
-		processor.appKtx2 = appKtx2
-		processor.appKtx2sc = appKtx2sc
-		processor.doUastc = doUastc
-
-		# check app still useful
-		processor.appKtx2check = appKtx2check
-		
+	processor = TextureProcessor(platform, appKram, maxCores, force, script, scriptFile, formats, container)
+	
 	for srcDir in srcDirs:
 		dstDir = dstDirForPlatform + srcDir
 		os.makedirs(dstDir, exist_ok = True)
@@ -577,18 +494,23 @@ def processTextures(platform, container, verbose, quality, jobs, force, script,
 	if bundle:
 		# TODO: build an asset catalog symlinked to zip for app slicing and ODR on iOS/macOS, Android has similar
 		
+		# see https://aras-p.info/blog/2021/08/05/EXR-Zip-compression-levels/ 
+		# compression level 4 is 2x faster, and only slightly less compression than default of 6
+			
 		# DONE: need to push/popDir but basically strip the full path before files are zipped.
 		#  want find to generate files from within the out/platform directory.
 		os.chdir(dstDirForPlatform)
 
-		# either store ktx2 or compress ktx in updating a zip with the data
-		if ktx2:
+		# Either store pre-compressed ktx2 or compress dds/ktx. Update a zip with the data for speed.
+		if container == "ktx2":
 			compressionLevel = 0
 			dstBundle = "bundle-" + platform + "-ktx2" + ".zip"
 			command = "find {0} -name '*.ktx2' | zip -u -{1} -@ {2}".format(".", compressionLevel, dstBundle)
-		else:
-			# see https://aras-p.info/blog/2021/08/05/EXR-Zip-compression-levels/ 
-			# compression level 4 is 2x faster, and only slightly less compression than default of 6
+		elif container == "dds":
+			compressionLevel = 4
+			dstBundle = "bundle-" + platform + "-dds" + ".zip"
+			command = "find {0} -name '*.dds' | zip -u -{1} -@ {2}".format(".", compressionLevel, dstBundle)
+		elif container == "ktx":
 			compressionLevel = 4
 			dstBundle = "bundle-" + platform + "-ktx" + ".zip"
 			command = "find {0} -name '*.ktx' | zip -u -{1} -@ {2}".format(".", compressionLevel, dstBundle)

From 95c2a2ada0fc6ce34f7560fd98670d70b76c4b2d Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 3 Sep 2023 22:16:39 -0700
Subject: [PATCH 535/901] kram - log formatters, reporters, and more fields

---
 kramv/KramViewerMain.mm     |  32 ++++-
 libkram/kram/KramLog.cpp    | 273 +++++++++++++++++++++++++-----------
 libkram/kram/KramLog.h      |   2 +-
 libkram/kram/KramTimer.cpp  |   2 +-
 libkram/kram/KramTimer.h    |   4 +-
 libkram/kram/TaskSystem.cpp |  30 ++++
 libkram/kram/TaskSystem.h   |   4 +-
 scripts/kramTextures.py     |  15 --
 8 files changed, 255 insertions(+), 107 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 7ec59e7b..1f59ad64 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -524,8 +524,6 @@ @implementation MyMTKView {
     Data _data;
 }
 
-
-
 - (void)awakeFromNib
 {
     [super awakeFromNib];
@@ -540,9 +538,11 @@ - (void)awakeFromNib
     _data._delegate.view = (__bridge void*)self;
 
     // TODO: see if can only open this
-    // NSLog(@"AwakeFromNIB");
+    //KLOGI("Viewer", "AwakeFromNIB");
 }
 
+
+
 // to get upper left origin like on UIView
 #if KRAM_MAC
 - (BOOL)isFlipped
@@ -1500,6 +1500,9 @@ - (BOOL)setShapeFromSelection:(NSInteger)index {
 
 -(BOOL)loadFile
 {
+    if (_data._files.empty())
+        return NO;
+    
     // lookup the filename and data at that entry
     const File& file = _data._files[_data._fileIndex];
     const char* filename = file.nameShort.c_str();
@@ -1612,8 +1615,29 @@ - (BOOL)loadTextureFromURLs:(NSArray<NSURL*>*)urls
     _hudHidden = false;
     [self updateHudVisibility];
     
+    const char* filename = "";
     NSURL* url = urls[0];
-    const char* filename = url.fileSystemRepresentation;
+    if ([url.scheme isEqualToString:@"kram"])
+    {
+        // the resource specifier has port and other data
+        // for now treat this as a local file path.
+        
+        // kram://filename.ktx
+        filename = [url.resourceSpecifier UTF8String];
+        filename = filename + 2; // skip the //
+        
+        // can't get Slack to honor links like these
+        // with a kram:///Users/...
+        // or with kram://~/blah
+        //
+        // Also note that loadFilesFromURLs
+        // also need this same treatment instead
+        // of relying on url.fileSystemRepresentation
+    }
+    else
+    {
+        filename = url.fileSystemRepresentation;
+    }
     bool isSingleFile = urls.count == 1;
     
     Renderer* renderer = (Renderer *)self.delegate;
diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp
index 70dab8fe..40cfcc02 100644
--- a/libkram/kram/KramLog.cpp
+++ b/libkram/kram/KramLog.cpp
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2022 by Alec Miller. - MIT License
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
@@ -18,6 +18,7 @@
 #endif
 
 #include "KramFmt.h"
+#include "KramTimer.h"
 #include "format.h" // really fmt/format.h
 
 namespace kram {
@@ -234,106 +235,188 @@ inline void OutputDebugStringU(LPCSTR lpOutputString, uint32_t len8)
 
 //----------------------------------
 
-static int32_t logMessageImpl(const char* group, int32_t logLevel,
-                          const char* file, int32_t line, const char* func,
-                          const char* fmt, const char* msg)
+struct LogMessage
 {
-    // TOOD: add any filtering up here, or before msg is built
-
-    // pipe to correct place, could even be file output
-    FILE* fp = stdout;
-    if (logLevel >= LogLevelWarning)
-        fp = stderr;
-
-    // see if newline required
-    int32_t len = (int32_t)strlen(fmt);
-    bool needsNewline = false;
-    if (len >= 1)
-        needsNewline = fmt[len - 1] != '\n';
-
-    if (needsNewline) {
-        len = (int32_t)strlen(msg);
-        if (len >= 1)
-            needsNewline = msg[len - 1] != '\n';
-    }
-
-    // format soem strings to embellish the message
-    const char* tag = "";
-    const char* groupString = "";
-    const char* space = "";
-    const char* level = "";
+    const char* group;
+    int32_t logLevel;
     
-    string fileLineFunc;
-    switch (logLevel) {
-        case LogLevelDebug:
-            //tag = "[D]";
-            //level = " debug:";
-            break;
-        case LogLevelInfo:
-            //tag = "[I]";
-            //level = " info:";
-            break;
-
-        case LogLevelWarning:
-            tag = "[W]";
-            level = " warning:";
-            groupString = group;
-            space = " ";
-
-            break;
-        case LogLevelError: {
-            tag = "[E]";
-            level = " error:";
-            groupString = group;
-            space = " ";
+    const char* file;
+    int32_t line;
+    const char* func;
+    const char* threadName;
+    
+    const char* msg;
+    bool msgHasNewline;
+    
+    double timestamp;
+};
 
+static const char* getFormatTokens(const LogMessage& msg) {
 #if KRAM_WIN
-            const char fileSeparator = '\\';
+    if (msg.logLevel <= LogLevelInfo)
+        return "m\n";
+    if (msg.file)
+        return "[l] g m\n" "F: L: u\n";
+    return "[l] g m\n";
+#elif KRAM_ANDROID
+    return "m\n";
 #else
-            const char fileSeparator = '/';
+    // copy of formatters above
+    if (msg.logLevel <= LogLevelInfo)
+        return "m\n";
+    if (msg.file)
+        return "[l] g m\n" "F: L: u\n";
+    return "[l] g m\n";
 #endif
+}
+
 
-            // shorten filename
-            const char* filename = strrchr(file, fileSeparator);
-            if (filename) {
-                filename++;
+static void formatMessage(string& buffer, LogMessage& msg, const char* tokens)
+{
+    buffer.clear();
+   
+    char c = 0;
+    while ((c = *tokens++) != 0) {
+        switch(c) {
+            case ' ':
+            case ':':
+            case '[':
+            case ']':
+            case '\n':
+                buffer += c;
+                break;
+                
+            case 'l':
+            case 'L': {
+                bool isVerbose = c == 'L';
+                const char* level = "";
+                switch(msg.logLevel) {
+                    case LogLevelDebug:
+                        level = isVerbose ? "debug" : "D";
+                        break;
+                    case LogLevelInfo:
+                        level = isVerbose ? "info" : "I";
+                        break;
+                    case LogLevelWarning:
+                        level = isVerbose ? "warning" : "W";
+                        break;
+                    case LogLevelError:
+                        level = isVerbose ? "error" : "E";
+                        break;
+                }
+                buffer += level;
+                break;
             }
-            else {
-                filename = file;
+            case 'g': {
+                buffer += msg.group;
+                break;
             }
-
-            // TODO: in clang only __PRETTY_FUNCTION__ has namespace::className::function(args...)
-            // __FUNCTION__ is only the function call, but want class name if it's a method without going to RTTI
-            // https://stackoverflow.com/questions/1666802/is-there-a-class-macro-in-c
+                
+            case 'u': {
+                if (msg.func) {
+                    buffer += msg.func;
+                    int32_t len = (int32_t)strlen(msg.func);
+                    if (len > 1 && msg.func[len-1] != ']')
+                        buffer += "()";
+                }
+                break;
+            }
+                
+            case 'T': {
+                append_sprintf(buffer, "%f", msg.timestamp);
+                break;
+            }
+            case 't': {
+                if (msg.threadName) {
+                    buffer += msg.threadName;
+                }
+                break;
+            }
+            case 'm': {
+                if (msg.msg) {
+                    // strip any trailing newline, should be in the tokens
+                    buffer += msg.msg;
+                    if (msg.msgHasNewline)
+                        buffer.pop_back();
+                }
+                break;
+            }
+                
+            case 'f':
+            case 'F': {
+                if (msg.file) {
 #if KRAM_WIN
-            // format needed for Visual Studio to print
-            sprintf(fileLineFunc, "%s(%d):%s %s()\n", filename, line, level, func);
+                    const char fileSeparator = '\\';
 #else
-            // format needed for Xcode to print
-            sprintf(fileLineFunc, "%s:%d:%s %s()\n", filename, line, level, func);
+                    const char fileSeparator = '/';
 #endif
-            break;
+                    bool isVerbose = c == 'L';
+                    
+                    const char* filename = msg.file;
+                    
+                    // shorten filename
+                    if (!isVerbose) {
+                        const char* shortFilename = strrchr(filename, fileSeparator);
+                        if (shortFilename) {
+                            shortFilename++;
+                            filename = shortFilename;
+                        }
+                    }
+                    
+#if KRAM_WIN
+                    // format needed for Visual Studio to collect/clickthrough
+                    append_sprintf(buffer, "%s(%d)", filename, msg.line);
+#else
+                    // format needed for Xcode/VSCode to print
+                    append_sprintf(buffer, "%s:%d", filename, msg.line);
+#endif
+                }
+                break;
+            }
         }
-        default:
-            break;
     }
+}
 
+// Pulled in from TaskSystem.cpp
+constexpr const uint32_t kMaxThreadName = 32;
+extern void getCurrentThreadName(char name[kMaxThreadName]);
+
+
+static int32_t logMessageImpl(LogMessage& msg)
+{
+    // TODO: add any filtering up here, or before msg is built
+    
+    const char* text = msg.msg;
+    
+    msg.msgHasNewline = false;
+    int32_t len = (int32_t)strlen(text);
+    if (len >= 1 && text[len - 1] == '\n')
+        msg.msgHasNewline = true;
+    
+    // fill out thread name
+    char threadName[kMaxThreadName] = {};
+    getCurrentThreadName(threadName);
+    msg.threadName = threadName;
+    
+    // retrieve timestamp
+    msg.timestamp = currentTimestamp();
+    
     // stdout isn't thread safe, so to prevent mixed output put this under mutex
     mylock lock(gLogLock);
 
     // this means caller needs to know all errors to display in the hud
-    if (gIsErrorLogCapture && logLevel == LogLevelError) {
-        gErrorLogCaptureText += msg;
-        if (needsNewline) {
+    if (gIsErrorLogCapture && msg.logLevel == LogLevelError) {
+        gErrorLogCaptureText += text;
+        if (!msg.msgHasNewline)
             gErrorLogCaptureText += "\n";
-        }
     }
 
-    // format into a buffer
+    // format into a buffer (it's under lock, so can use static)
     static string buffer;
-    sprintf(buffer, "%s%s%s%s%s%s", tag, groupString, space, msg, needsNewline ? "\n" : "", fileLineFunc.c_str());
     
 #if KRAM_WIN
+    formatMessage(buffer, msg, getFormatTokens(msg));
+    
     if (::IsDebuggerPresent()) {
         // TODO: split string up into multiple logs
         // this is limited to 32K
@@ -344,9 +427,18 @@ static int32_t logMessageImpl(const char* group, int32_t logLevel,
     }
     else {
         // avoid double print to debugger
+        FILE* fp = stdout;
         fprintf(fp, "%s", buffer.c_str());
+        fflush(fp);
     }
 #elif KRAM_ANDROID
+    // TODO: move higher up
+    // API 30
+    if (!__android_log_is_loggable(androidLogLevel, msg.group, androidLogLevel))
+        return;
+    
+    formatMessage(buffer, msg, getFormatTokens(msg));
+    
     AndroidLogLevel androidLogLevel = ANDROID_LOG_ERROR;
     switch (logLevel) {
         case LogLevelDebug:
@@ -363,15 +455,23 @@ static int32_t logMessageImpl(const char* group, int32_t logLevel,
             androidLogLevel = ANDROID_LOG_ERROR;
             break;
     }
-    
+
     // TODO: can also fix printf to work on Android
     // but can't set log level like with this call, but no dump buffer limit
     
     // TODO: split string up into multiple logs
     // this can only write 4K - 40? chars at time, don't use print it's 1023
-    __android_log_write(androidLogLevel, tag, buffer.c_str());
+    // API 30
+    __android_log_message msg = {
+        LOG_ID_MAIN, msg.file, msg.line, buffer.c_str(), androidLogLevel, sizeof(__android_log_message), msg.group);
+    }
+    __android_log_write_log_message(msg);
 #else
+    formatMessage(buffer, msg, getFormatTokens(msg));
+    
+    FILE* fp = stdout;
     fprintf(fp, "%s", buffer.c_str());
+    fflush(fp);
 #endif
 
     return 0;  // reserved for later
@@ -384,6 +484,7 @@ int32_t logMessage(const char* group, int32_t logLevel,
     // convert var ags to a msg
     const char* msg;
 
+    // TODO: handle %s too, also this is heap allocating str
     string str;
     if (strrchr(fmt, '%') == nullptr) {
         msg = fmt;
@@ -397,8 +498,12 @@ int32_t logMessage(const char* group, int32_t logLevel,
         msg = str.c_str();
     }
     
-    return logMessageImpl(group, logLevel, file, line, func,
-                          fmt, msg);
+    LogMessage logMessage = {
+        group, logLevel,
+        file, line, func, nullptr,
+        msg, false, 0.0
+    };
+    return logMessageImpl(logMessage);
 }
 
 
@@ -419,8 +524,12 @@ int32_t logMessage(const char* group, int32_t logLevel,
     string str = fmt::vformat(format, args);
     const char* msg = str.c_str();
     
-    return logMessageImpl(group, logLevel, file, line, func,
-                          format.data(), msg);
+    LogMessage logMessage = {
+        group, logLevel,
+        file, line, func, nullptr,
+        msg, false, 0.0
+    };
+    return logMessageImpl(logMessage);
 }
 
 }  // namespace kram
diff --git a/libkram/kram/KramLog.h b/libkram/kram/KramLog.h
index 75defab9..1a136647 100644
--- a/libkram/kram/KramLog.h
+++ b/libkram/kram/KramLog.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2022 by Alec Miller. - MIT License
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramTimer.cpp b/libkram/kram/KramTimer.cpp
index e75e619a..b00b3a7a 100644
--- a/libkram/kram/KramTimer.cpp
+++ b/libkram/kram/KramTimer.cpp
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2022 by Alec Miller. - MIT License
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramTimer.h b/libkram/kram/KramTimer.h
index 3575c585..326aac76 100644
--- a/libkram/kram/KramTimer.h
+++ b/libkram/kram/KramTimer.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2022 by Alec Miller. - MIT License
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
@@ -10,7 +10,7 @@
 
 namespace kram {
 // Can obtain a timestamp to nanosecond accuracy.
-extern double currentTimestamp();
+double currentTimestamp();
 
 // This can record timings for each start/stop call.
 class Timer {
diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index 43fbd6c6..cba48fb7 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -266,6 +266,7 @@ std::thread::native_handle_type getCurrentThread()
 // Of course, Windows has to make portability difficult.
 // And Mac non-standardly, doesn't even pass thread to call.
 //   This requires it to be set from thread itself.
+constexpr const uint32_t kMaxThreadName = 32;
 
 #if KRAM_WIN
 
@@ -299,6 +300,26 @@ void setThreadName(std::thread& thread, const char* threadName)
     setThreadName(thread.native_handle(), threadName);
 }
 
+void getCurrentThreadName(char name[kMaxThreadName])
+{
+    name[0] = 0;
+    
+    WSTR* threadNameW = nullptr;
+    HRESULT hr = ::GetThreadDescription(getCurrentThread(), &threadNameW);
+    if (SUCCEEDED(hr)) {
+        // convert name back
+        uint32_t len = wstrlen(threadNameW);
+        if (len > kMaxThreadName)
+            len = kMaxThreadName;
+        for (uint32_t i = 0; i < len; ++i) {
+            name[i] = (char)threadNameW[i];
+        }
+        name[kMaxThreadName-1] = 0;
+        
+        LocalFree(threadNameW);
+    }
+}
+
 #elif KRAM_MAC || KRAM_IOS
 
 void setThreadName(std::thread::native_handle_type macroUnusedArg(handle), const char* threadName)
@@ -322,6 +343,10 @@ void setCurrentThreadName(const char* threadName)
 //    setThreadName(handle, threadName);
 //}
 
+void getCurrentThreadName(char name[kMaxThreadName])
+{
+    pthread_getname_np(getCurrentThread(), name, kMaxThreadName);
+}
 #else
 
 // 15 char name limit on Linux/Android, how modern!
@@ -342,6 +367,11 @@ void setThreadName(std::thread& thread, const char* threadName)
     setThreadName(thread.native_handle(), threadName);
 }
 
+void getCurrentThreadName(char name[kMaxThreadName])
+{
+    pthread_getname_np(getCurrentThread(), name, kMaxThreadName);
+}
+
 #endif
 
 //------------------
diff --git a/libkram/kram/TaskSystem.h b/libkram/kram/TaskSystem.h
index af21d2b5..5f62a67b 100644
--- a/libkram/kram/TaskSystem.h
+++ b/libkram/kram/TaskSystem.h
@@ -50,7 +50,7 @@ class notification_queue {
         if (!lock || _q.empty()) {
             return false;
         }
-        x = move(_q.front());
+        x = std::move(_q.front());
         _q.pop_front();
         return true;
     }
@@ -68,7 +68,7 @@ class notification_queue {
         }
 
         // return the work while lock is held
-        x = move(_q.front());
+        x = std::move(_q.front());
         _q.pop_front();
         return true;
     }
diff --git a/scripts/kramTextures.py b/scripts/kramTextures.py
index 7f494d2c..5aa02a7f 100755
--- a/scripts/kramTextures.py
+++ b/scripts/kramTextures.py
@@ -413,9 +413,6 @@ def processTextures(platform, container, verbose, quality, jobs, force, script,
 		fmtHeight = fmtNormal + fmtHeightArgs
 
 	elif platform == "mac":
-		# bc1 on Toof has purple, green, yellow artifacts with bc7enc, and has more banding
-		# and a lot of weird blocky artifacts, look into bc1 encoder.  
-		# Squish BC1 has more blocky artifacts, but not random color artifacts.
 		fmtAlbedo = " -f bc7 -srgb -premul" # + " -optopaque"
 		fmtNormal = " -f bc5 -signed -normal"
 		fmtMetalRoughness = " -f bc5"
@@ -424,8 +421,6 @@ def processTextures(platform, container, verbose, quality, jobs, force, script,
 		fmtHeight = fmtNormal + fmtHeightArgs
 
 	elif platform == "win":
-		# bc1 on Toof has purple, green, yellow artifacts with bc7enc, and has more banding
-		# and a lot of weird blocky artifacts, look into bc1 encoder
 		fmtAlbedo = " -f bc7 -srgb -premul" # + " -optopaque"
 		fmtNormal = " -f bc5 -signed -normal"
 		fmtMetalRoughness = " -f bc5"
@@ -433,16 +428,6 @@ def processTextures(platform, container, verbose, quality, jobs, force, script,
 		fmtSDF = " -f bc4 -signed -sdf"
 		fmtHeight = fmtNormal + fmtHeightArgs
 
-	elif platform == "any":
-		# output to s/rgba8u, then run through ktxsc to go to BasisLZ
-		# no signed formats, but probably can transcode to one
-		fmtAlbedo = " -f rgba8 -srgb -premul" # + " -optopaque"
-		fmtNormal = " -f rgba8 -swizzle rg01 -normal"
-		fmtMetalRoughness = " -f rgba8 -swizzle r001"
-		fmtMask = " -f rgba8 -swizzle r001"
-		fmtSDF = " -f rgba8 -swizzle r001 -sdf"
-		fmtHeight = fmtNormal + fmtHeightArgs
-
 	else:
 		return 1
 

From 10b454ee9c13c54fb6ef77629268571a2407a2aa Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 3 Sep 2023 22:35:29 -0700
Subject: [PATCH 536/901] kram - threads fix type

---
 libkram/kram/TaskSystem.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index cba48fb7..e3284ab5 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -304,7 +304,7 @@ void getCurrentThreadName(char name[kMaxThreadName])
 {
     name[0] = 0;
     
-    WSTR* threadNameW = nullptr;
+    wchar_t* threadNameW = nullptr;
     HRESULT hr = ::GetThreadDescription(getCurrentThread(), &threadNameW);
     if (SUCCEEDED(hr)) {
         // convert name back

From 505492ea15ddcf1c695e3cd3d5ed673976468575 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 3 Sep 2023 22:37:22 -0700
Subject: [PATCH 537/901] kram - update copyright

---
 libkram/kram/KTXImage.cpp       | 2 +-
 libkram/kram/KTXImage.h         | 2 +-
 libkram/kram/Kram.cpp           | 2 +-
 libkram/kram/Kram.h             | 2 +-
 libkram/kram/KramConfig.h       | 2 +-
 libkram/kram/KramDDSHelper.cpp  | 2 +-
 libkram/kram/KramDDSHelper.h    | 2 +-
 libkram/kram/KramFileHelper.cpp | 2 +-
 libkram/kram/KramFileHelper.h   | 2 +-
 libkram/kram/KramFmt.h          | 2 +-
 libkram/kram/KramImage.cpp      | 2 +-
 libkram/kram/KramImage.h        | 2 +-
 libkram/kram/KramImageInfo.cpp  | 2 +-
 libkram/kram/KramImageInfo.h    | 2 +-
 libkram/kram/KramLib.h          | 2 +-
 libkram/kram/KramMipper.cpp     | 2 +-
 libkram/kram/KramMipper.h       | 2 +-
 libkram/kram/KramMmapHelper.cpp | 2 +-
 libkram/kram/KramMmapHelper.h   | 2 +-
 libkram/kram/KramPrefix.pch     | 2 +-
 libkram/kram/KramSDFMipper.cpp  | 2 +-
 libkram/kram/KramSDFMipper.h    | 2 +-
 libkram/kram/float4a.cpp        | 2 +-
 libkram/kram/float4a.h          | 2 +-
 24 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index ed60c5e1..363e9b04 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2022 by Alec Miller. - MIT License
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h
index 1efc651c..d5e4680b 100644
--- a/libkram/kram/KTXImage.h
+++ b/libkram/kram/KTXImage.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2022 by Alec Miller. - MIT License
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index c985276c..7c27ae56 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2022 by Alec Miller. - MIT License
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/Kram.h b/libkram/kram/Kram.h
index b43ce912..e8cb6299 100644
--- a/libkram/kram/Kram.h
+++ b/libkram/kram/Kram.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2022 by Alec Miller. - MIT License
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index 4f3d2922..6b634221 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2022 by Alec Miller. - MIT License
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramDDSHelper.cpp b/libkram/kram/KramDDSHelper.cpp
index af3909d6..82488f5b 100644
--- a/libkram/kram/KramDDSHelper.cpp
+++ b/libkram/kram/KramDDSHelper.cpp
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2022 by Alec Miller. - MIT License
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramDDSHelper.h b/libkram/kram/KramDDSHelper.h
index ec8dd1ec..0038e891 100644
--- a/libkram/kram/KramDDSHelper.h
+++ b/libkram/kram/KramDDSHelper.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2022 by Alec Miller. - MIT License
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramFileHelper.cpp b/libkram/kram/KramFileHelper.cpp
index ad17bc0f..a1b87126 100644
--- a/libkram/kram/KramFileHelper.cpp
+++ b/libkram/kram/KramFileHelper.cpp
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2022 by Alec Miller. - MIT License
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramFileHelper.h b/libkram/kram/KramFileHelper.h
index b7d0f48f..84e6902f 100644
--- a/libkram/kram/KramFileHelper.h
+++ b/libkram/kram/KramFileHelper.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2022 by Alec Miller. - MIT License
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramFmt.h b/libkram/kram/KramFmt.h
index 8bf1f1a3..8bb43060 100644
--- a/libkram/kram/KramFmt.h
+++ b/libkram/kram/KramFmt.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2022 by Alec Miller. - MIT License
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index 308f1d32..eeea2f2d 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2022 by Alec Miller. - MIT License
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramImage.h b/libkram/kram/KramImage.h
index 02655ace..7de5ea2d 100644
--- a/libkram/kram/KramImage.h
+++ b/libkram/kram/KramImage.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2022 by Alec Miller. - MIT License
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp
index 35c25c3b..70c20bd0 100644
--- a/libkram/kram/KramImageInfo.cpp
+++ b/libkram/kram/KramImageInfo.cpp
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2022 by Alec Miller. - MIT License
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramImageInfo.h b/libkram/kram/KramImageInfo.h
index c31a8f35..fafe74b5 100644
--- a/libkram/kram/KramImageInfo.h
+++ b/libkram/kram/KramImageInfo.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2022 by Alec Miller. - MIT License
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramLib.h b/libkram/kram/KramLib.h
index 068f4168..6739d68b 100644
--- a/libkram/kram/KramLib.h
+++ b/libkram/kram/KramLib.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2022 by Alec Miller. - MIT License
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramMipper.cpp b/libkram/kram/KramMipper.cpp
index 62d93a1d..926fa9b2 100644
--- a/libkram/kram/KramMipper.cpp
+++ b/libkram/kram/KramMipper.cpp
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2022 by Alec Miller. - MIT License
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramMipper.h b/libkram/kram/KramMipper.h
index 72634a2d..7bc6ff0d 100644
--- a/libkram/kram/KramMipper.h
+++ b/libkram/kram/KramMipper.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2022 by Alec Miller. - MIT License
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramMmapHelper.cpp b/libkram/kram/KramMmapHelper.cpp
index cacfcd9a..fa3b4a4b 100644
--- a/libkram/kram/KramMmapHelper.cpp
+++ b/libkram/kram/KramMmapHelper.cpp
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2022 by Alec Miller. - MIT License
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramMmapHelper.h b/libkram/kram/KramMmapHelper.h
index 04dfe523..125a4a6f 100644
--- a/libkram/kram/KramMmapHelper.h
+++ b/libkram/kram/KramMmapHelper.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2022 by Alec Miller. - MIT License
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramPrefix.pch b/libkram/kram/KramPrefix.pch
index 02374c5e..3df9e61d 100644
--- a/libkram/kram/KramPrefix.pch
+++ b/libkram/kram/KramPrefix.pch
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2022 by Alec Miller. - MIT License
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramSDFMipper.cpp b/libkram/kram/KramSDFMipper.cpp
index 4a8d7845..632be5f1 100644
--- a/libkram/kram/KramSDFMipper.cpp
+++ b/libkram/kram/KramSDFMipper.cpp
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2022 by Alec Miller. - MIT License
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramSDFMipper.h b/libkram/kram/KramSDFMipper.h
index f65d21d0..c9f0e187 100644
--- a/libkram/kram/KramSDFMipper.h
+++ b/libkram/kram/KramSDFMipper.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2022 by Alec Miller. - MIT License
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/float4a.cpp b/libkram/kram/float4a.cpp
index 43396f17..e89dba71 100644
--- a/libkram/kram/float4a.cpp
+++ b/libkram/kram/float4a.cpp
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2022 by Alec Miller. - MIT License
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/float4a.h b/libkram/kram/float4a.h
index f8a21bbf..85453a23 100644
--- a/libkram/kram/float4a.h
+++ b/libkram/kram/float4a.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2022 by Alec Miller. - MIT License
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 

From cab9ec255711c0fd72aae15b45e88868afdc659c Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 3 Sep 2023 22:58:26 -0700
Subject: [PATCH 538/901] kram - threads fix function

Why did you have to use utf16 Microsoft?
---
 libkram/kram/TaskSystem.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index e3284ab5..413dd4b8 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -308,7 +308,7 @@ void getCurrentThreadName(char name[kMaxThreadName])
     HRESULT hr = ::GetThreadDescription(getCurrentThread(), &threadNameW);
     if (SUCCEEDED(hr)) {
         // convert name back
-        uint32_t len = wstrlen(threadNameW);
+        uint32_t len = wcslen(threadNameW);
         if (len > kMaxThreadName)
             len = kMaxThreadName;
         for (uint32_t i = 0; i < len; ++i) {

From 24a26b6a4c57984bb3defa7215604a0af21a07fe Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 3 Sep 2023 23:47:36 -0700
Subject: [PATCH 539/901] kram - name main thread

expose setThreadInfo since not always running TaskSystem.
---
 kramv/KramViewerMain.mm     | 10 ++++++----
 libkram/kram/KramLog.cpp    |  9 ++++++---
 libkram/kram/TaskSystem.cpp |  9 ++-------
 libkram/kram/TaskSystem.h   | 10 ++++++++++
 4 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 1f59ad64..11fd8dad 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -19,6 +19,7 @@
 // C++
 #include "KramLib.h"
 #include "KramVersion.h"  // keep kramv version in sync with libkram
+#include "TaskSystem.h"
 
 //#include "KramMipper.h"
 
@@ -527,7 +528,7 @@ @implementation MyMTKView {
 - (void)awakeFromNib
 {
     [super awakeFromNib];
-
+    
     // vertical offset of table down so hud can display info
     NSScrollView* scrollView = [_tableView enclosingScrollView];
     CGRect rect = scrollView.frame;
@@ -536,13 +537,11 @@ - (void)awakeFromNib
     
     // C++ delegate
     _data._delegate.view = (__bridge void*)self;
-
+    
     // TODO: see if can only open this
     //KLOGI("Viewer", "AwakeFromNIB");
 }
 
-
-
 // to get upper left origin like on UIView
 #if KRAM_MAC
 - (BOOL)isFlipped
@@ -1875,6 +1874,9 @@ - (void)viewDidLoad
 
 int main(int argc, const char *argv[])
 {
+    ThreadInfo infoMain = { "Main", ThreadPriority::Interactive, 0 };
+    setThreadInfo(infoMain);
+    
     @autoreleasepool {
         // Setup code that might create autoreleased objects goes here.
     }
diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp
index 40cfcc02..03056453 100644
--- a/libkram/kram/KramLog.cpp
+++ b/libkram/kram/KramLog.cpp
@@ -256,7 +256,8 @@ static const char* getFormatTokens(const LogMessage& msg) {
     if (msg.logLevel <= LogLevelInfo)
         return "m\n";
     if (msg.file)
-        return "[l] g m\n" "F: L: u\n";
+        return "[l] g m\n"
+               "F: L: t u\n";
     return "[l] g m\n";
 #elif KRAM_ANDROID
     return "m\n";
@@ -265,7 +266,8 @@ static const char* getFormatTokens(const LogMessage& msg) {
     if (msg.logLevel <= LogLevelInfo)
         return "m\n";
     if (msg.file)
-        return "[l] g m\n" "F: L: u\n";
+        return "[l] g m\n"
+               "F: L: t u\n";
     return "[l] g m\n";
 #endif
 }
@@ -396,7 +398,8 @@ static int32_t logMessageImpl(LogMessage& msg)
     // fill out thread name
     char threadName[kMaxThreadName] = {};
     getCurrentThreadName(threadName);
-    msg.threadName = threadName;
+    if (threadName[0] != 0)
+        msg.threadName = threadName;
     
     // retrieve timestamp
     msg.timestamp = currentTimestamp();
diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index 413dd4b8..ef2628f2 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -270,7 +270,8 @@ constexpr const uint32_t kMaxThreadName = 32;
 
 #if KRAM_WIN
 
-// TODO: on Win, also need to set the following.  Then use Windows Termnial.
+// TODO: on Win, also need to set the following.  Then use Windows Terminal/
+// Can just set in manifest file.
 // SetConsoleOutputCP(CP_UTF8);
 
 void setThreadName(std::thread::native_handle_type handle, const char* threadName)
@@ -583,12 +584,6 @@ void task_system::run(int32_t threadIndex)
     }
 }
 
-struct ThreadInfo {
-    const char* name = "";
-    ThreadPriority priority = ThreadPriority::Default;
-    int affinity = 0; // single core for now
-};
-
 // This only works for current thread, but simplifies setting several thread params.
 void setThreadInfo(ThreadInfo& info) {
     setCurrentThreadName(info.name);
diff --git a/libkram/kram/TaskSystem.h b/libkram/kram/TaskSystem.h
index 5f62a67b..ac8f9d40 100644
--- a/libkram/kram/TaskSystem.h
+++ b/libkram/kram/TaskSystem.h
@@ -145,6 +145,16 @@ enum class ThreadPriority
     Interactive = 5,
 };
 
+struct ThreadInfo {
+    const char* name = "";
+    ThreadPriority priority = ThreadPriority::Default;
+    int affinity = 0; // single core for now
+};
+
+// This only works for current thread, but simplifies setting several thread params.
+void setThreadInfo(ThreadInfo& info);
+
+    
 class task_system {
     NOT_COPYABLE(task_system);
 

From 22c2fce51dd63a1cb8c94968c9ca30b9d1547dcd Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 4 Sep 2023 10:48:32 -0700
Subject: [PATCH 540/901] kram - fix menu seperators

---
 kramv/KramLoader.mm      |  4 ++--
 kramv/KramViewerBase.cpp | 17 +++++++++--------
 libkram/kram/KramLog.cpp | 21 +++++++++++----------
 3 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index 9bafd027..e5878c33 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -118,7 +118,7 @@ bool decodeImage(const KTXImage &image, KTXImage &imageDecoded)
     }
 #endif
     else {
-        assert(false);  // don't call this routine if decode not needed
+        KASSERT(false);  // don't call this routine if decode not needed
     }
 
     // TODO: decode BC format on iOS when not supported, but viewer only on macOS
@@ -398,7 +398,7 @@ - (void)uploadTexturesIfNeeded:(id<MTLBlitCommandEncoder>)blitEncoder
                 chunkNum = 0;
             }
 
-            // assert(blit.textureIndex < _blitTextures.count);
+            // KASSERT(blit.textureIndex < _blitTextures.count);
             id<MTLTexture> texture = _blitTextures[blit.textureIndex];
 
             [blitEncoder copyFromBuffer:_buffer
diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index 79452c24..2fdd1e3b 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -2004,20 +2004,21 @@ void Data::initActions()
     // copy all of them to a vector, and then assign the action ptrs
     for (int32_t i = 0; i < numActions; ++i) {
         Action& action = actions[i];
-        const char* icon = action.icon;  // single char
-        
-        // skip separators
-        bool isSeparator = icon[0] == 0;
-        if (isSeparator) continue;
-        
         _actions.push_back(action);
     }
 
     // now alias Actions to the vector above
-    //assert(_actions.size() == ArrayCount(actionPtrs));
+    uint32_t counter = 0;
     for (int32_t i = 0; i < _actions.size(); ++i) {
-        *(actionPtrs[i]) = &_actions[i];
+        // skip separators
+        Action& action = _actions[i];
+        const char* icon = action.icon;  // single char
+        bool isSeparator = icon[0] == 0;
+        if (isSeparator) continue;
+        
+        *(actionPtrs[counter++]) = &_actions[i];
     }
+    KASSERT(counter == ArrayCount(actionPtrs));
 }
 
 void Data::initDisabledButtons()
diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp
index 03056453..9b68f5fc 100644
--- a/libkram/kram/KramLog.cpp
+++ b/libkram/kram/KramLog.cpp
@@ -273,7 +273,7 @@ static const char* getFormatTokens(const LogMessage& msg) {
 }
 
 
-static void formatMessage(string& buffer, LogMessage& msg, const char* tokens)
+static void formatMessage(string& buffer, const LogMessage& msg, const char* tokens)
 {
     buffer.clear();
    
@@ -384,10 +384,8 @@ constexpr const uint32_t kMaxThreadName = 32;
 extern void getCurrentThreadName(char name[kMaxThreadName]);
 
 
-static int32_t logMessageImpl(LogMessage& msg)
+void setMessageFields(LogMessage& msg)
 {
-    // TODO: add any filtering up here, or before msg is built
-    
     const char* text = msg.msg;
     
     msg.msgHasNewline = false;
@@ -403,13 +401,17 @@ static int32_t logMessageImpl(LogMessage& msg)
     
     // retrieve timestamp
     msg.timestamp = currentTimestamp();
+}
+
+static int32_t logMessageImpl(const LogMessage& msg)
+{
+    // TODO: add any filtering up here, or before msg is built
     
-    // stdout isn't thread safe, so to prevent mixed output put this under mutex
     mylock lock(gLogLock);
 
     // this means caller needs to know all errors to display in the hud
     if (gIsErrorLogCapture && msg.logLevel == LogLevelError) {
-        gErrorLogCaptureText += text;
+        gErrorLogCaptureText += msg.msg;
         if (!msg.msgHasNewline)
             gErrorLogCaptureText += "\n";
     }
@@ -458,12 +460,9 @@ static int32_t logMessageImpl(LogMessage& msg)
             androidLogLevel = ANDROID_LOG_ERROR;
             break;
     }
-
-    // TODO: can also fix printf to work on Android
-    // but can't set log level like with this call, but no dump buffer limit
     
     // TODO: split string up into multiple logs
-    // this can only write 4K - 40? chars at time, don't use print it's 1023
+    // this can only write 4K - 80 chars at time, don't use print it's 1023
     // API 30
     __android_log_message msg = {
         LOG_ID_MAIN, msg.file, msg.line, buffer.c_str(), androidLogLevel, sizeof(__android_log_message), msg.group);
@@ -506,6 +505,7 @@ int32_t logMessage(const char* group, int32_t logLevel,
         file, line, func, nullptr,
         msg, false, 0.0
     };
+    setMessageFields(logMessage);
     return logMessageImpl(logMessage);
 }
 
@@ -532,6 +532,7 @@ int32_t logMessage(const char* group, int32_t logLevel,
         file, line, func, nullptr,
         msg, false, 0.0
     };
+    setMessageFields(logMessage);
     return logMessageImpl(logMessage);
 }
 

From 2bfa501d931a959a5d1d47dcc82f74ff887b3bbb Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 4 Sep 2023 23:46:38 -0700
Subject: [PATCH 541/901] kram - log fix threadName

---
 libkram/kram/KramLog.cpp | 159 +++++++++++++++++++++++++++------------
 1 file changed, 109 insertions(+), 50 deletions(-)

diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp
index 9b68f5fc..04d29b13 100644
--- a/libkram/kram/KramLog.cpp
+++ b/libkram/kram/KramLog.cpp
@@ -23,35 +23,16 @@
 
 namespace kram {
 
+// Pulled in from TaskSystem.cpp
+constexpr const uint32_t kMaxThreadName = 32;
+extern void getCurrentThreadName(char name[kMaxThreadName]);
+
 using mymutex = std::recursive_mutex;
 using mylock = std::unique_lock<mymutex>;
 
 using namespace NAMESPACE_STL;
 
-static mymutex gLogLock;
-static string gErrorLogCaptureText;
-static bool gIsErrorLogCapture = false;
-void setErrorLogCapture(bool enable)
-{
-    gIsErrorLogCapture = enable;
-    if (enable) {
-        mylock lock(gLogLock);
-        gErrorLogCaptureText.clear();
-    }
-}
-bool isErrorLogCapture() { return gIsErrorLogCapture; }
 
-// return the text
-void getErrorLogCaptureText(string& text)
-{
-    if (gIsErrorLogCapture) {
-        mylock lock(gLogLock);
-        text = gErrorLogCaptureText;
-    }
-    else {
-        text.clear();
-    }
-}
 
 // TODO: install assert handler to intercept, and also add a verify (assert that leaves source in)
 //void __assert(const char *expression, const char *file, int32_t line) {
@@ -190,6 +171,8 @@ bool endsWithExtension(const char* str, const string& substring)
     return strcmp(search, substring.c_str()) == 0;
 }
 
+//----------------------------------
+
 #if KRAM_WIN
 // Adapted from Emil Persson's post.
 // https://twitter.com/_Humus_/status/1629165133359460352
@@ -233,7 +216,43 @@ inline void OutputDebugStringU(LPCSTR lpOutputString, uint32_t len8)
 
 #endif
 
-//----------------------------------
+struct LogState
+{
+    mymutex lock;
+    string errorLogCaptureText;
+    string buffer;
+    bool isErrorLogCapture = false;
+    uint32_t counter = 0;
+    
+#if KRAM_WIN
+    bool isWindowsSubsystemApp = false;
+    bool isWindowsDebugger = false;
+#endif
+};
+static LogState gLogState;
+
+void setErrorLogCapture(bool enable)
+{
+    gLogState.isErrorLogCapture = enable;
+    if (enable) {
+        mylock lock(gLogState.lock);
+        gLogState.errorLogCaptureText.clear();
+    }
+}
+
+bool isErrorLogCapture() { return gLogState.isErrorLogCapture; }
+
+// return the text
+void getErrorLogCaptureText(string& text)
+{
+    if (gLogState.isErrorLogCapture) {
+        mylock lock(gLogState.lock);
+        text = gLogState.errorLogCaptureText;
+    }
+    else {
+        text.clear();
+    }
+}
 
 struct LogMessage
 {
@@ -289,7 +308,7 @@ static void formatMessage(string& buffer, const LogMessage& msg, const char* tok
                 break;
                 
             case 'l':
-            case 'L': {
+            case 'L': { // level
                 bool isVerbose = c == 'L';
                 const char* level = "";
                 switch(msg.logLevel) {
@@ -309,12 +328,12 @@ static void formatMessage(string& buffer, const LogMessage& msg, const char* tok
                 buffer += level;
                 break;
             }
-            case 'g': {
+            case 'g': { // group
                 buffer += msg.group;
                 break;
             }
                 
-            case 'u': {
+            case 'u': { // func
                 if (msg.func) {
                     buffer += msg.func;
                     int32_t len = (int32_t)strlen(msg.func);
@@ -324,17 +343,20 @@ static void formatMessage(string& buffer, const LogMessage& msg, const char* tok
                 break;
             }
                 
-            case 'T': {
-                append_sprintf(buffer, "%f", msg.timestamp);
+            case 'd': { // date/timestamp
+                if (msg.timestamp != 0.0) {
+                    append_sprintf(buffer, "%f", msg.timestamp);
+                }
                 break;
             }
-            case 't': {
+                
+            case 't': { // thread
                 if (msg.threadName) {
                     buffer += msg.threadName;
                 }
                 break;
             }
-            case 'm': {
+            case 'm': { // message
                 if (msg.msg) {
                     // strip any trailing newline, should be in the tokens
                     buffer += msg.msg;
@@ -344,7 +366,7 @@ static void formatMessage(string& buffer, const LogMessage& msg, const char* tok
                 break;
             }
                 
-            case 'f':
+            case 'f': // file:line
             case 'F': {
                 if (msg.file) {
 #if KRAM_WIN
@@ -379,12 +401,9 @@ static void formatMessage(string& buffer, const LogMessage& msg, const char* tok
     }
 }
 
-// Pulled in from TaskSystem.cpp
-constexpr const uint32_t kMaxThreadName = 32;
-extern void getCurrentThreadName(char name[kMaxThreadName]);
 
 
-void setMessageFields(LogMessage& msg)
+void setMessageFields(LogMessage& msg, char threadName[kMaxThreadName])
 {
     const char* text = msg.msg;
     
@@ -393,8 +412,18 @@ void setMessageFields(LogMessage& msg)
     if (len >= 1 && text[len - 1] == '\n')
         msg.msgHasNewline = true;
     
+    // Note: this could analyze the format tokens for all reporters.
+    // Also may want a log file with own formatting/fields.
+#if KRAM_ANDROID
+    // logcat only needs the tag, file, line, message
+    return;
+#else
+    // only reporting message on info/debug
+    if (msg.logLevel <= LogLevelInfo)
+        return;
+#endif
+    
     // fill out thread name
-    char threadName[kMaxThreadName] = {};
     getCurrentThreadName(threadName);
     if (threadName[0] != 0)
         msg.threadName = threadName;
@@ -407,33 +436,50 @@ static int32_t logMessageImpl(const LogMessage& msg)
 {
     // TODO: add any filtering up here, or before msg is built
     
-    mylock lock(gLogLock);
+    mylock lock(gLogState.lock);
 
     // this means caller needs to know all errors to display in the hud
-    if (gIsErrorLogCapture && msg.logLevel == LogLevelError) {
-        gErrorLogCaptureText += msg.msg;
+    if (gLogState.isErrorLogCapture && msg.logLevel == LogLevelError) {
+        gLogState.errorLogCaptureText += msg.msg;
         if (!msg.msgHasNewline)
-            gErrorLogCaptureText += "\n";
+            gLogState.errorLogCaptureText += "\n";
     }
 
     // format into a buffer (it's under lock, so can use static)
-    static string buffer;
+    string& buffer = gLogState.buffer;
+    
+    gLogState.counter++;
     
 #if KRAM_WIN
+    
+    // This is only needed for Window subsystem.
+    // Assumes gui app didn't call AllocConsole.
+    // TODO: keep testing IsDebuggerPresent from time to time for attach
+    if (gLogState.counter == 1) {
+        gLogState.isWindowsSubsystemApp = GetStdHandle( ) == nullptr;
+        gLogState.isWindowsDebugger = ::IsDebuggerPresent();
+    }
+    
+    if (gLogState.isWindowsSubsystemApp && !gLogState.isWindowsDebugger)
+        return;
+    
     formatMessage(buffer, msg, getFormatTokens(msg));
     
-    if (::IsDebuggerPresent()) {
+    if (gLogState.isWindowsSubsystemApp) {
         // TODO: split string up into multiple logs
         // this is limited to 32K
         // OutputDebugString(buffer.c_str());
         
-        // This supports UTF8 strings by converting them to wide
+        // This supports UTF8 strings by converting them to wide.
+        // TODO: Wine doesn't handle.
         OutputDebugStringU(buffer.c_str(), buffer.size());
     }
     else {
         // avoid double print to debugger
         FILE* fp = stdout;
-        fprintf(fp, "%s", buffer.c_str());
+        //fprintf(fp, "%s", buffer.c_str()); // or fwrite?
+        fwrite(buffer.c_str(), 1, buffer.size(), fp);
+        // if heavy logging, then could delay fflusu
         fflush(fp);
     }
 #elif KRAM_ANDROID
@@ -461,7 +507,7 @@ static int32_t logMessageImpl(const LogMessage& msg)
             break;
     }
     
-    // TODO: split string up into multiple logs
+    // TODO: split string up into multiple logs by /n
     // this can only write 4K - 80 chars at time, don't use print it's 1023
     // API 30
     __android_log_message msg = {
@@ -469,10 +515,15 @@ static int32_t logMessageImpl(const LogMessage& msg)
     }
     __android_log_write_log_message(msg);
 #else
+    // Note: this doesn't go out to Console, but does go out to Xcode.
+    // NSLog/os_log sucks with no intercepts, filtering, or formatting.
+    
     formatMessage(buffer, msg, getFormatTokens(msg));
     
     FILE* fp = stdout;
-    fprintf(fp, "%s", buffer.c_str());
+    //fprintf(fp, "%s", buffer.c_str()); // or fwrite?
+    fwrite(buffer.c_str(), 1, buffer.size(), fp);
+    // if heavy logging, then could delay fflusu
     fflush(fp);
 #endif
 
@@ -486,11 +537,17 @@ int32_t logMessage(const char* group, int32_t logLevel,
     // convert var ags to a msg
     const char* msg;
 
-    // TODO: handle %s too, also this is heap allocating str
     string str;
     if (strrchr(fmt, '%') == nullptr) {
         msg = fmt;
     }
+    else if (strcmp(fmt, "%s") == 0) {
+        // hope can reference this past va_end
+        va_list args;
+        va_start(args, fmt);
+        msg = va_arg(args, const char*);
+        va_end(args);
+    }
     else {
         va_list args;
         va_start(args, fmt);
@@ -505,7 +562,8 @@ int32_t logMessage(const char* group, int32_t logLevel,
         file, line, func, nullptr,
         msg, false, 0.0
     };
-    setMessageFields(logMessage);
+    char threadName[kMaxThreadName] = {};
+    setMessageFields(logMessage, threadName);
     return logMessageImpl(logMessage);
 }
 
@@ -532,7 +590,8 @@ int32_t logMessage(const char* group, int32_t logLevel,
         file, line, func, nullptr,
         msg, false, 0.0
     };
-    setMessageFields(logMessage);
+    char threadName[kMaxThreadName] = {};
+    setMessageFields(logMessage, threadName);
     return logMessageImpl(logMessage);
 }
 

From 940b603e292b5df8dfcce1207c34dda35231830a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 4 Sep 2023 23:55:06 -0700
Subject: [PATCH 542/901] kram - log fix return and status

---
 libkram/kram/KramLog.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp
index 04d29b13..c48efda9 100644
--- a/libkram/kram/KramLog.cpp
+++ b/libkram/kram/KramLog.cpp
@@ -450,6 +450,8 @@ static int32_t logMessageImpl(const LogMessage& msg)
     
     gLogState.counter++;
     
+    int32_t status = (msg.logLevel == LogLevelError) ? 1 : 0;
+    
 #if KRAM_WIN
     
     // This is only needed for Window subsystem.
@@ -461,7 +463,7 @@ static int32_t logMessageImpl(const LogMessage& msg)
     }
     
     if (gLogState.isWindowsSubsystemApp && !gLogState.isWindowsDebugger)
-        return;
+        return status;
     
     formatMessage(buffer, msg, getFormatTokens(msg));
     
@@ -477,16 +479,15 @@ static int32_t logMessageImpl(const LogMessage& msg)
     else {
         // avoid double print to debugger
         FILE* fp = stdout;
-        //fprintf(fp, "%s", buffer.c_str()); // or fwrite?
         fwrite(buffer.c_str(), 1, buffer.size(), fp);
-        // if heavy logging, then could delay fflusu
+        // if heavy logging, then could delay fflush
         fflush(fp);
     }
 #elif KRAM_ANDROID
     // TODO: move higher up
     // API 30
     if (!__android_log_is_loggable(androidLogLevel, msg.group, androidLogLevel))
-        return;
+        return status;
     
     formatMessage(buffer, msg, getFormatTokens(msg));
     
@@ -521,13 +522,12 @@ static int32_t logMessageImpl(const LogMessage& msg)
     formatMessage(buffer, msg, getFormatTokens(msg));
     
     FILE* fp = stdout;
-    //fprintf(fp, "%s", buffer.c_str()); // or fwrite?
     fwrite(buffer.c_str(), 1, buffer.size(), fp);
-    // if heavy logging, then could delay fflusu
+    // if heavy logging, then could delay fflush
     fflush(fp);
 #endif
 
-    return 0;  // reserved for later
+    return status;  // reserved for later
 }
 
 int32_t logMessage(const char* group, int32_t logLevel,

From 88495f55a5390b8b884ea1b510668881b7a910ab Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 5 Sep 2023 19:57:00 -0700
Subject: [PATCH 543/901] kram - fix log on win

---
 libkram/kram/KramLog.cpp | 58 +++++++++++++++++++++++++++-------------
 libkram/kram/KramLog.h   |  3 +++
 2 files changed, 43 insertions(+), 18 deletions(-)

diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp
index c48efda9..d560e0a7 100644
--- a/libkram/kram/KramLog.cpp
+++ b/libkram/kram/KramLog.cpp
@@ -225,7 +225,7 @@ struct LogState
     uint32_t counter = 0;
     
 #if KRAM_WIN
-    bool isWindowsSubsystemApp = false;
+    bool isWindowsGuiApp = false; // default isConsole
     bool isWindowsDebugger = false;
 #endif
 };
@@ -402,7 +402,13 @@ static void formatMessage(string& buffer, const LogMessage& msg, const char* tok
 }
 
 
-
+bool isMessageFiltered(const LogMessage& msg) {
+#if KRAM_RELEASE
+    if (msg.logLevel == LogLevelDebug)
+        return true;
+#endig
+    return false;
+}
 void setMessageFields(LogMessage& msg, char threadName[kMaxThreadName])
 {
     const char* text = msg.msg;
@@ -454,20 +460,23 @@ static int32_t logMessageImpl(const LogMessage& msg)
     
 #if KRAM_WIN
     
-    // This is only needed for Window subsystem.
+    // This is only needed for Window Gui.
     // Assumes gui app didn't call AllocConsole.
-    // TODO: keep testing IsDebuggerPresent from time to time for attach
     if (gLogState.counter == 1) {
-        gLogState.isWindowsSubsystemApp = GetStdHandle( ) == nullptr;
-        gLogState.isWindowsDebugger = ::IsDebuggerPresent();
+        bool hasConsole = ::GetStdHandle(STD_OUTPUT_HANDLE) != nullptr;
+        
+        // only way to debug a gui app without console is to attach debugger
+        gLogState.isWindowsGuiApp = !hasConsole;
     }
-    
-    if (gLogState.isWindowsSubsystemApp && !gLogState.isWindowsDebugger)
+    // TODO: test IsDebuggerPresent once per frame, not on every log
+    gLogState.isWindowsDebugger = ::IsDebuggerPresent();
+
+    if (gLogState.isWindowsGuiApp && !gLogState.isWindowsDebugger)
         return status;
     
     formatMessage(buffer, msg, getFormatTokens(msg));
     
-    if (gLogState.isWindowsSubsystemApp) {
+    if (gLogState.isWindowsGuiApp) {
         // TODO: split string up into multiple logs
         // this is limited to 32K
         // OutputDebugString(buffer.c_str());
@@ -534,6 +543,16 @@ int32_t logMessage(const char* group, int32_t logLevel,
                           const char* file, int32_t line, const char* func,
                           const char* fmt, ...)
 {
+    LogMessage logMessage = {
+        group, logLevel,
+        file, line, func, nullptr,
+        nullptr, false, 0.0
+    };
+    if (isMessageFiltered(logMessage)) {
+        return 0;
+    }
+    
+    
     // convert var ags to a msg
     const char* msg;
 
@@ -557,11 +576,8 @@ int32_t logMessage(const char* group, int32_t logLevel,
         msg = str.c_str();
     }
     
-    LogMessage logMessage = {
-        group, logLevel,
-        file, line, func, nullptr,
-        msg, false, 0.0
-    };
+    logMessage.msg = msg;
+    
     char threadName[kMaxThreadName] = {};
     setMessageFields(logMessage, threadName);
     return logMessageImpl(logMessage);
@@ -582,14 +598,20 @@ int32_t logMessage(const char* group, int32_t logLevel,
     // and then reserve that space in str.  Use that for impl of append_format.
     // can then append to existing string (see vsprintf)
     
-    string str = fmt::vformat(format, args);
-    const char* msg = str.c_str();
-    
     LogMessage logMessage = {
         group, logLevel,
         file, line, func, nullptr,
-        msg, false, 0.0
+        nullptr, false, 0.0
     };
+    if (isMessageFiltered(logMessage)) {
+        return 0;
+    }
+    
+    string str = fmt::vformat(format, args);
+    const char* msg = str.c_str();
+    
+    logMessage.msg = msg;
+    
     char threadName[kMaxThreadName] = {};
     setMessageFields(logMessage, threadName);
     return logMessageImpl(logMessage);
diff --git a/libkram/kram/KramLog.h b/libkram/kram/KramLog.h
index 1a136647..120127d6 100644
--- a/libkram/kram/KramLog.h
+++ b/libkram/kram/KramLog.h
@@ -57,6 +57,9 @@ bool isErrorLogCapture();
 // return the text
 void getErrorLogCaptureText(string& text);
 
+//-----------------------
+// String Ops
+
 // returns length of string, -1 if failure
 int32_t sprintf(string& str, const char* format, ...) __printflike(2, 3);
 

From 4cf54e6c1a053b6cbea80345fecd03733ae93ba4 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 5 Sep 2023 20:05:21 -0700
Subject: [PATCH 544/901] kram - log fix

---
 libkram/kram/KramLog.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp
index d560e0a7..cc9a9968 100644
--- a/libkram/kram/KramLog.cpp
+++ b/libkram/kram/KramLog.cpp
@@ -406,7 +406,7 @@ bool isMessageFiltered(const LogMessage& msg) {
 #if KRAM_RELEASE
     if (msg.logLevel == LogLevelDebug)
         return true;
-#endig
+#endif
     return false;
 }
 void setMessageFields(LogMessage& msg, char threadName[kMaxThreadName])

From 7da936d54beaa57a802b030e6a45a9dbe43a3a7b Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 25 Sep 2023 22:32:03 -0700
Subject: [PATCH 545/901] kram - switch domain, set macOS 12 minspec, fix
 KRAM_RELEASE and NDEBUG setting, add stack traces, frame pacing

Win stack trace is not debugged.  macOS stack traces don't have file/line without calling out to atos which I don't want to do.
Win uses DbgHelp.lib which needs download to redistribute.
---
 build2/kram.xcodeproj/project.pbxproj         |   5 +-
 build2/kramc.xcodeproj/project.pbxproj        |  11 +-
 build2/kramv.xcodeproj/project.pbxproj        |  39 +-
 gtlf/GLTF/GLTF.xcodeproj/project.pbxproj      |   5 +-
 .../GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj |   5 +-
 kram-preview/KramPreviewViewController.mm     |   4 +-
 kram-thumb/KramThumbnailProvider.mm           |   4 +-
 kramv/Info.plist                              |  13 +
 kramv/KramRenderer.h                          |   3 +
 kramv/KramRenderer.mm                         |  41 +-
 kramv/KramViewerMain.mm                       |   7 +-
 .../astcenc_platform_isa_detection.cpp        |   2 +-
 libkram/kram/KramLog.cpp                      | 570 ++++++++++++++++--
 libkram/kram/KramLog.h                        |   7 +-
 14 files changed, 629 insertions(+), 87 deletions(-)

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index 6fec40dd..6bbaa6ea 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -1921,7 +1921,7 @@
 				GCC_WARN_UNUSED_VARIABLE = YES;
 				HEADER_SEARCH_PATHS = "";
 				IPHONEOS_DEPLOYMENT_TARGET = 14.1;
-				MACOSX_DEPLOYMENT_TARGET = 11.0;
+				MACOSX_DEPLOYMENT_TARGET = 12.0;
 				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
 				MTL_FAST_MATH = YES;
 				ONLY_ACTIVE_ARCH = YES;
@@ -2004,10 +2004,11 @@
 				GCC_WARN_UNUSED_VARIABLE = YES;
 				HEADER_SEARCH_PATHS = "";
 				IPHONEOS_DEPLOYMENT_TARGET = 14.1;
-				MACOSX_DEPLOYMENT_TARGET = 11.0;
+				MACOSX_DEPLOYMENT_TARGET = 12.0;
 				MTL_ENABLE_DEBUG_INFO = NO;
 				MTL_FAST_MATH = YES;
 				OTHER_CFLAGS = (
+					"-DNDEBUG=1",
 					"-DCOMPILE_ASTCENC=1",
 					"-DCOMPILE_ATE=1",
 					"-DCOMPILE_ETCENC=1",
diff --git a/build2/kramc.xcodeproj/project.pbxproj b/build2/kramc.xcodeproj/project.pbxproj
index a65e7cf4..38cc8324 100644
--- a/build2/kramc.xcodeproj/project.pbxproj
+++ b/build2/kramc.xcodeproj/project.pbxproj
@@ -200,7 +200,7 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
-				MACOSX_DEPLOYMENT_TARGET = 11.0;
+				MACOSX_DEPLOYMENT_TARGET = 12.0;
 				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
 				MTL_FAST_MATH = YES;
 				ONLY_ACTIVE_ARCH = YES;
@@ -263,10 +263,11 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
-				MACOSX_DEPLOYMENT_TARGET = 11.0;
+				MACOSX_DEPLOYMENT_TARGET = 12.0;
 				MTL_ENABLE_DEBUG_INFO = NO;
 				MTL_FAST_MATH = YES;
 				OTHER_CFLAGS = (
+					"-DNDEBUG=1",
 					"-DCOMPILE_FASTL=0",
 					"-DCOMPILE_EASTL=0",
 					"-include",
@@ -293,7 +294,8 @@
 				GCC_WARN_SHADOW = YES;
 				GCC_WARN_STRICT_SELECTOR_MATCH = YES;
 				HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram";
-				MACOSX_DEPLOYMENT_TARGET = 11.0;
+				MACOSX_DEPLOYMENT_TARGET = 12.0;
+				PRODUCT_BUNDLE_IDENTIFIER = com.hialec.kramc;
 				PRODUCT_NAME = kram;
 			};
 			name = Debug;
@@ -315,7 +317,8 @@
 				GCC_WARN_SHADOW = YES;
 				GCC_WARN_STRICT_SELECTOR_MATCH = YES;
 				HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram";
-				MACOSX_DEPLOYMENT_TARGET = 11.0;
+				MACOSX_DEPLOYMENT_TARGET = 12.0;
+				PRODUCT_BUNDLE_IDENTIFIER = com.hialec.kramc;
 				PRODUCT_NAME = kram;
 			};
 			name = Release;
diff --git a/build2/kramv.xcodeproj/project.pbxproj b/build2/kramv.xcodeproj/project.pbxproj
index a8710a67..f417d584 100644
--- a/build2/kramv.xcodeproj/project.pbxproj
+++ b/build2/kramv.xcodeproj/project.pbxproj
@@ -33,6 +33,7 @@
 		708D44D4272FA4C800783DCE /* tropical_beach.ktx in Resources */ = {isa = PBXBuildFile; fileRef = 708D44D2272FA4C800783DCE /* tropical_beach.ktx */; };
 		708D44D5272FA4C800783DCE /* piazza_san_marco.ktx in Resources */ = {isa = PBXBuildFile; fileRef = 708D44D3272FA4C800783DCE /* piazza_san_marco.ktx */; };
 		7099CFBD28E8319C008D4ABF /* UniformTypeIdentifiers.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 7099CFBC28E8319B008D4ABF /* UniformTypeIdentifiers.framework */; };
+		70B5BFF828F5254000CD83D8 /* CoreText.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 70B5BFF728F5253F00CD83D8 /* CoreText.framework */; };
 		70E33EC826E536BF00CBA422 /* QuickLookThumbnailing.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 70E33EC726E536BF00CBA422 /* QuickLookThumbnailing.framework */; };
 		70E33ECA26E536BF00CBA422 /* Quartz.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 70E33EC926E536BF00CBA422 /* Quartz.framework */; };
 		70E33ECE26E536BF00CBA422 /* KramThumbnailProvider.mm in Sources */ = {isa = PBXBuildFile; fileRef = 70E33ECD26E536BF00CBA422 /* KramThumbnailProvider.mm */; };
@@ -130,6 +131,7 @@
 		708D44D2272FA4C800783DCE /* tropical_beach.ktx */ = {isa = PBXFileReference; lastKnownFileType = file; path = tropical_beach.ktx; sourceTree = "<group>"; };
 		708D44D3272FA4C800783DCE /* piazza_san_marco.ktx */ = {isa = PBXFileReference; lastKnownFileType = file; path = piazza_san_marco.ktx; sourceTree = "<group>"; };
 		7099CFBC28E8319B008D4ABF /* UniformTypeIdentifiers.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = UniformTypeIdentifiers.framework; path = System/Library/Frameworks/UniformTypeIdentifiers.framework; sourceTree = SDKROOT; };
+		70B5BFF728F5253F00CD83D8 /* CoreText.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreText.framework; path = System/Library/Frameworks/CoreText.framework; sourceTree = SDKROOT; };
 		70E33EC626E536BF00CBA422 /* kram-thumb.appex */ = {isa = PBXFileReference; explicitFileType = "wrapper.app-extension"; includeInIndex = 0; path = "kram-thumb.appex"; sourceTree = BUILT_PRODUCTS_DIR; };
 		70E33EC726E536BF00CBA422 /* QuickLookThumbnailing.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = QuickLookThumbnailing.framework; path = System/Library/Frameworks/QuickLookThumbnailing.framework; sourceTree = SDKROOT; };
 		70E33EC926E536BF00CBA422 /* Quartz.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Quartz.framework; path = System/Library/Frameworks/Quartz.framework; sourceTree = SDKROOT; };
@@ -155,6 +157,7 @@
 				706EF24D26D17C30001C950E /* ModelIO.framework in Frameworks */,
 				706EF25226D17C6F001C950E /* MetalKit.framework in Frameworks */,
 				70871D4327CAD3EA00D0B9E1 /* libkram.a in Frameworks */,
+				70B5BFF828F5254000CD83D8 /* CoreText.framework in Frameworks */,
 				706EF25526D17C85001C950E /* Metal.framework in Frameworks */,
 				7099CFBD28E8319C008D4ABF /* UniformTypeIdentifiers.framework in Frameworks */,
 				706EF25726D17C9D001C950E /* AppKit.framework in Frameworks */,
@@ -239,6 +242,7 @@
 		706EF24726D17BC2001C950E /* Frameworks */ = {
 			isa = PBXGroup;
 			children = (
+				70B5BFF728F5253F00CD83D8 /* CoreText.framework */,
 				7099CFBC28E8319B008D4ABF /* UniformTypeIdentifiers.framework */,
 				70833668271575EA0077BCB6 /* GLTFMTL.framework */,
 				70833664271575E50077BCB6 /* GLTF.framework */,
@@ -563,7 +567,7 @@
 					"$(PROJECT_DIR)/../libkram/kram",
 					"$(PROJECT_DIR)/../libkram",
 				);
-				MACOSX_DEPLOYMENT_TARGET = 11.0;
+				MACOSX_DEPLOYMENT_TARGET = 12.0;
 				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
 				MTL_FAST_MATH = YES;
 				MTL_LANGUAGE_REVISION = UseDeploymentTarget;
@@ -633,7 +637,7 @@
 					"$(PROJECT_DIR)/../libkram/kram",
 					"$(PROJECT_DIR)/../libkram",
 				);
-				MACOSX_DEPLOYMENT_TARGET = 11.0;
+				MACOSX_DEPLOYMENT_TARGET = 12.0;
 				MTL_ENABLE_DEBUG_INFO = NO;
 				MTL_FAST_MATH = YES;
 				MTL_LANGUAGE_REVISION = UseDeploymentTarget;
@@ -671,12 +675,14 @@
 				GCC_WARN_SHADOW = YES;
 				GCC_WARN_STRICT_SELECTOR_MATCH = YES;
 				INFOPLIST_FILE = ../kramv/Info.plist;
+				INFOPLIST_KEY_CFBundleDisplayName = kramv;
+				INFOPLIST_KEY_LSApplicationCategoryType = "public.app-category.developer-tools";
 				LD_RUNPATH_SEARCH_PATHS = (
 					"$(inherited)",
 					"@executable_path/../Frameworks",
 				);
-				MACOSX_DEPLOYMENT_TARGET = 11.0;
-				PRODUCT_BUNDLE_IDENTIFIER = com.ba.kramv;
+				MARKETING_VERSION = 2.0.0;
+				PRODUCT_BUNDLE_IDENTIFIER = com.hialec.kramv;
 				PRODUCT_NAME = "$(TARGET_NAME)";
 			};
 			name = Debug;
@@ -704,12 +710,21 @@
 				GCC_WARN_SHADOW = YES;
 				GCC_WARN_STRICT_SELECTOR_MATCH = YES;
 				INFOPLIST_FILE = ../kramv/Info.plist;
+				INFOPLIST_KEY_CFBundleDisplayName = kramv;
+				INFOPLIST_KEY_LSApplicationCategoryType = "public.app-category.developer-tools";
 				LD_RUNPATH_SEARCH_PATHS = (
 					"$(inherited)",
 					"@executable_path/../Frameworks",
 				);
-				MACOSX_DEPLOYMENT_TARGET = 11.0;
-				PRODUCT_BUNDLE_IDENTIFIER = com.ba.kramv;
+				MARKETING_VERSION = 2.0.0;
+				OTHER_CFLAGS = (
+					"-DNDEBUG=1",
+					"-DCOMPILE_FASTL=0",
+					"-DCOMPILE_EASTL=0",
+					"-include",
+					KramConfig.h,
+				);
+				PRODUCT_BUNDLE_IDENTIFIER = com.hialec.kramv;
 				PRODUCT_NAME = "$(TARGET_NAME)";
 			};
 			name = Release;
@@ -726,8 +741,7 @@
 					"@executable_path/../Frameworks",
 					"@executable_path/../../../../Frameworks",
 				);
-				MACOSX_DEPLOYMENT_TARGET = 11.0;
-				PRODUCT_BUNDLE_IDENTIFIER = "com.ba.kramv.kram-thumb";
+				PRODUCT_BUNDLE_IDENTIFIER = "com.hialec.kramv.kram-thumb";
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SKIP_INSTALL = YES;
 			};
@@ -745,8 +759,7 @@
 					"@executable_path/../Frameworks",
 					"@executable_path/../../../../Frameworks",
 				);
-				MACOSX_DEPLOYMENT_TARGET = 11.0;
-				PRODUCT_BUNDLE_IDENTIFIER = "com.ba.kramv.kram-thumb";
+				PRODUCT_BUNDLE_IDENTIFIER = "com.hialec.kramv.kram-thumb";
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SKIP_INSTALL = YES;
 			};
@@ -764,8 +777,7 @@
 					"@executable_path/../Frameworks",
 					"@executable_path/../../../../Frameworks",
 				);
-				MACOSX_DEPLOYMENT_TARGET = 11.0;
-				PRODUCT_BUNDLE_IDENTIFIER = "com.ba.kramv.kram-preview";
+				PRODUCT_BUNDLE_IDENTIFIER = "com.hialec.kramv.kram-preview";
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SKIP_INSTALL = YES;
 			};
@@ -783,8 +795,7 @@
 					"@executable_path/../Frameworks",
 					"@executable_path/../../../../Frameworks",
 				);
-				MACOSX_DEPLOYMENT_TARGET = 11.0;
-				PRODUCT_BUNDLE_IDENTIFIER = "com.ba.kramv.kram-preview";
+				PRODUCT_BUNDLE_IDENTIFIER = "com.hialec.kramv.kram-preview";
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SKIP_INSTALL = YES;
 			};
diff --git a/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj b/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj
index c93236fd..bf8601de 100644
--- a/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj
+++ b/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj
@@ -404,7 +404,7 @@
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
 				IPHONEOS_DEPLOYMENT_TARGET = 14.1;
-				MACOSX_DEPLOYMENT_TARGET = 11.0;
+				MACOSX_DEPLOYMENT_TARGET = 12.0;
 				MTL_ENABLE_DEBUG_INFO = YES;
 				ONLY_ACTIVE_ARCH = YES;
 				SDKROOT = macosx;
@@ -461,8 +461,9 @@
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
 				IPHONEOS_DEPLOYMENT_TARGET = 14.1;
-				MACOSX_DEPLOYMENT_TARGET = 11.0;
+				MACOSX_DEPLOYMENT_TARGET = 12.0;
 				MTL_ENABLE_DEBUG_INFO = NO;
+				OTHER_CFLAGS = "-DNDEBUG=1";
 				SDKROOT = macosx;
 				VERSIONING_SYSTEM = "apple-generic";
 				VERSION_INFO_PREFIX = "";
diff --git a/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj b/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj
index 9d51e748..1f1e962f 100644
--- a/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj
+++ b/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj
@@ -264,7 +264,7 @@
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
 				IPHONEOS_DEPLOYMENT_TARGET = 14.1;
-				MACOSX_DEPLOYMENT_TARGET = 11.0;
+				MACOSX_DEPLOYMENT_TARGET = 12.0;
 				MTL_ENABLE_DEBUG_INFO = YES;
 				ONLY_ACTIVE_ARCH = YES;
 				SDKROOT = macosx;
@@ -323,8 +323,9 @@
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
 				IPHONEOS_DEPLOYMENT_TARGET = 14.1;
-				MACOSX_DEPLOYMENT_TARGET = 11.0;
+				MACOSX_DEPLOYMENT_TARGET = 12.0;
 				MTL_ENABLE_DEBUG_INFO = NO;
+				OTHER_CFLAGS = "-DNDEBUG=1";
 				SDKROOT = macosx;
 				SUPPORTED_PLATFORMS = "macosx iphoneos";
 				VALID_ARCHS = "i386 x86_64 armv7s armv7 arm64";
diff --git a/kram-preview/KramPreviewViewController.mm b/kram-preview/KramPreviewViewController.mm
index bd0f6b7a..c62de40f 100644
--- a/kram-preview/KramPreviewViewController.mm
+++ b/kram-preview/KramPreviewViewController.mm
@@ -28,10 +28,10 @@
     
     // Console prints this as <private>, so what's the point of producing a localizedString ?
     // This doesn't seem to work to Console app, but maybe if logs are to terminal
-    // sudo log config --mode "level:debug" --subsystem com.ba.kramv
+    // sudo log config --mode "level:debug" --subsystem com.hialec.kramv
     
     NSString* errorText = [NSString stringWithUTF8String:str.c_str()];
-    return [NSError errorWithDomain:@"com.ba.kramv" code:code userInfo:@{NSLocalizedDescriptionKey: errorText}];
+    return [NSError errorWithDomain:@"com.hialec.kramv" code:code userInfo:@{NSLocalizedDescriptionKey: errorText}];
 }
 
 @interface KramPreviewViewController () <QLPreviewingController>
diff --git a/kram-thumb/KramThumbnailProvider.mm b/kram-thumb/KramThumbnailProvider.mm
index d9e09d54..ba82fb37 100644
--- a/kram-thumb/KramThumbnailProvider.mm
+++ b/kram-thumb/KramThumbnailProvider.mm
@@ -29,10 +29,10 @@ @implementation KramThumbnailProvider
     
     // Console prints this as <private>, so what's the point of producing a localizedString ?
     // This doesn't seem to work to Console app, but maybe if logs are to terminal
-    // sudo log config --mode "level:debug" --subsystem com.ba.kramv
+    // sudo log config --mode "level:debug" --subsystem com.hialec.kramv
     
     NSString* errorText = [NSString stringWithUTF8String:str.c_str()];
-    return [NSError errorWithDomain:@"com.ba.kramv" code:code userInfo:@{NSLocalizedDescriptionKey:errorText}];
+    return [NSError errorWithDomain:@"com.hialec.kramv" code:code userInfo:@{NSLocalizedDescriptionKey:errorText}];
 }
 
 struct ImageToPass
diff --git a/kramv/Info.plist b/kramv/Info.plist
index 3643e95a..80291404 100644
--- a/kramv/Info.plist
+++ b/kramv/Info.plist
@@ -149,6 +149,19 @@
 	<string>$(PRODUCT_BUNDLE_PACKAGE_TYPE)</string>
 	<key>CFBundleShortVersionString</key>
 	<string>1.0</string>
+	<key>CFBundleURLTypes</key>
+	<array>
+		<dict>
+			<key>CFBundleTypeRole</key>
+			<string>Viewer</string>
+			<key>CFBundleURLName</key>
+			<string>com.hialec.kramv</string>
+			<key>CFBundleURLSchemes</key>
+			<array>
+				<string>kram</string>
+			</array>
+		</dict>
+	</array>
 	<key>CFBundleVersion</key>
 	<string>1</string>
 	<key>LSApplicationCategoryType</key>
diff --git a/kramv/KramRenderer.h b/kramv/KramRenderer.h
index 1413a0ae..35ef95b0 100644
--- a/kramv/KramRenderer.h
+++ b/kramv/KramRenderer.h
@@ -82,6 +82,9 @@ class KTXImage;
 // So caller can respond to completed callback
 - (void)setEyedropperDelegate:(nullable id)delegate;
 
+// Can enable frame pacing for VRR
+- (void)setFramePacingEnabled:(bool)enable;
+
 // can play animations in gltf models
 @property (nonatomic) BOOL playAnimations;
 
diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 3df60073..f2d71124 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -21,7 +21,7 @@
 using mymutex = std::recursive_mutex;
 using mylock = std::unique_lock<mymutex>;
 
-os_log_t gLogKramv = os_log_create("com.ba.kramv", "");
+os_log_t gLogKramv = os_log_create("com.hialec.kramv", "");
 
 class Signpost
 {
@@ -205,6 +205,9 @@ @implementation Renderer {
 #endif
 
     __weak id _delegateHud;
+    
+    bool _useFramePacing;
+    double _avgGpuTime;
 }
 
 @synthesize playAnimations;
@@ -1626,14 +1629,46 @@ - (void)drawInMTKView:(nonnull MTKView *)view
 
         // These are equivalent
         // [commandBuffer presentDrawable:view.currentDrawable];
+        
+        typeof(self) __weak weakSelf = self;
         [commandBuffer addScheduledHandler:^(id<MTLCommandBuffer> cmdBuf) {
+            if (cmdBuf.error) return;
             Signpost postPresent("presentDrawble");
-            [drawable present];
+            [weakSelf _present:drawable];
         }];
 
+        // This only works if only using one commandBuffer
+        [commandBuffer addCompletedHandler:^(id<MTLCommandBuffer> cmdBuf) {
+            if (cmdBuf.error) return;
+            double gpuTime = cmdBuf.GPUEndTime - cmdBuf.GPUStartTime;
+            [weakSelf _updateFramePacing:gpuTime];
+        }];
+            
         [commandBuffer commit];
     }
 }
+    
+- (void)_present:(id<CAMetalDrawable>)drawable {
+    if (_useFramePacing)
+        [drawable presentAfterMinimumDuration:_avgGpuTime];
+    else
+        [drawable present];
+}
+
+- (void)_updateFramePacing:(double)gpuTime {
+    if (_useFramePacing) {
+        _avgGpuTime = lerp(_avgGpuTime, gpuTime, 0.25);
+    }
+}
+
+- (void)setFramePacingEnabled:(bool)enable {
+    if (_useFramePacing != enable) {
+        _useFramePacing = enable;
+        
+        // this will get adjusted by updateFramePacing
+        _avgGpuTime = 1.0 / 60.0;
+    }
+}
 
 #if USE_GLTF
 
@@ -2444,7 +2479,7 @@ - (void)assetWithURL:(NSURL *)assetURL didFinishLoading:(GLTFAsset *)asset
 - (void)assetWithURL:(NSURL *)assetURL didFailToLoadWithError:(NSError *)error;
 {
     // TODO: display this error to the user
-    NSLog(@"Asset load failed with error: %@", error);
+    KLOGE("Renderer", "Asset load failed with error: %s", [[error localizedDescription] UTF8String]);
 }
 #endif
 
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 11fd8dad..b9735044 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -539,7 +539,7 @@ - (void)awakeFromNib
     _data._delegate.view = (__bridge void*)self;
     
     // TODO: see if can only open this
-    //KLOGI("Viewer", "AwakeFromNIB");
+    // KLOGI("Viewer", "AwakeFromNIB");
 }
 
 // to get upper left origin like on UIView
@@ -572,12 +572,9 @@ - (instancetype)initWithCoder:(NSCoder *)coder
     // only re-render when changes are made
     // Note: this breaks ability to gpu capture, since display link not running.
     // so disable this if want to do captures.  Or just move the cursor to capture.
-//#ifndef NDEBUG  // KRAM_RELEASE
     self.enableSetNeedsDisplay = YES;
-//#endif
-    // openFile in appDelegate handles "Open in..."
 
-   
+    // openFile in appDelegate handles "Open in..."
 
     // added for drag-drop support
     [self registerForDraggedTypes:pasteboardTypes];
diff --git a/libkram/astc-encoder/astcenc_platform_isa_detection.cpp b/libkram/astc-encoder/astcenc_platform_isa_detection.cpp
index 4158da31..b22cc1bf 100644
--- a/libkram/astc-encoder/astcenc_platform_isa_detection.cpp
+++ b/libkram/astc-encoder/astcenc_platform_isa_detection.cpp
@@ -47,7 +47,7 @@ static bool g_cpu_has_f16c { false };
 ============================================================================ */
 #if !defined(__clang__) && defined(_MSC_VER)
 #define WIN32_LEAN_AND_MEAN
-#include <Windows.h>
+#include <windows.h>
 #include <intrin.h>
 
 /**
diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp
index cc9a9968..cd5cd944 100644
--- a/libkram/kram/KramLog.cpp
+++ b/libkram/kram/KramLog.cpp
@@ -6,6 +6,15 @@
 
 //#include <string>
 
+#if KRAM_IOS || KRAM_MAC
+#define KRAM_LOG_STACKTRACE KRAM_DEBUG
+#elif KRAM_WIN
+// TODO: need to debug code before enabling
+#define KRAM_LOG_STACKTRACE 0 // KRAM_DEBUG
+#else
+#define KRAM_LOG_STACKTRACE 0 // KRAM_DEBUG
+#endif
+
 // for Win
 #include <stdarg.h>
 
@@ -13,8 +22,22 @@
 
 #if KRAM_WIN
 #include <windows.h>
+#include <intrin.h> // for AddressOfReturnAdress, ReturnAddress
+
+#if KRAM_LOG_STACKTRACE
+// There is a DbgHelp.lib that is redistributable
+#include <dbghelp.h>
+#pragma comment(lib, "DbgHelp.lib");
+#endif
+
 #elif KRAM_ANDROID
 #include <log.h>
+
+#elif KRAM_IOS || KRAM_MAC
+#include <os/log.h>
+#include <cxxabi.h> // demangle
+#include <dlfcn.h>  // address to symbol
+#include <execinfo.h>
 #endif
 
 #include "KramFmt.h"
@@ -33,6 +56,321 @@ using mylock = std::unique_lock<mymutex>;
 using namespace NAMESPACE_STL;
 
 
+#if KRAM_WIN
+// https://stackoverflow.com/questions/18547251/when-i-use-strlcpy-function-in-c-the-compilor-give-me-an-error
+
+// '_cups_strlcat()' - Safely concatenate two strings.
+size_t                    // O - Length of string
+strlcat(char       *dst,  // O - Destination string
+        const char *src,  // I - Source string
+        size_t     size)  // I - Size of destination string buffer
+{
+  size_t    srclen;         // Length of source string
+  size_t    dstlen;         // Length of destination string
+
+
+   // Figure out how much room is left...
+  dstlen = strlen(dst);
+  size   -= dstlen + 1;
+
+  if (!size)
+    return (dstlen);        // No room, return immediately...
+
+  // Figure out how much room is needed...
+  srclen = strlen(src);
+
+  // Copy the appropriate amount...
+  if (srclen > size)
+    srclen = size;
+
+  memcpy(dst + dstlen, src, srclen);
+  dst[dstlen + srclen] = '\0';
+
+  return (dstlen + srclen);
+}
+
+// '_cups_strlcpy()' - Safely copy two strings.
+size_t                          // O - Length of string
+strlcpy(char       *dst,        // O - Destination string
+        const char *src,        // I - Source string
+        size_t      size)       // I - Size of destination string buffer
+{
+  size_t    srclen; // Length of source string
+
+
+  // Figure out how much room is needed...
+  size --;
+
+  srclen = strlen(src);
+
+  // Copy the appropriate amount...
+  if (srclen > size)
+    srclen = size;
+
+  memcpy(dst, src, srclen);
+  dst[srclen] = '\0';
+
+  return (srclen);
+}
+#endif
+
+#if KRAM_LOG_STACKTRACE
+
+#if KRAM_WIN
+// https://learn.microsoft.com/en-us/windows/win32/debug/retrieving-symbol-information-by-address?redirectedfrom=MSDN
+class AddressHelper
+{
+private:
+    
+    HANDLE m_process = 0;
+    
+public:
+    AddressHelper()
+    {
+        m_process = GetCurrentProcess();
+        
+        // produces line number and demangles name
+        SymSetOptions(SYMOPT_LOAD_LINES | SYMOPT_UNDNAME);
+        
+        // load the symbols
+        SymInitialize(m_process, NULL, TRUE);
+    }
+    
+    bool isStackTraceSupported() const { return true; }
+    
+    bool getAddressInfo(const void* address, string& symbolName, string& filename, uint32_t& line)
+    {
+        string.clear();
+        filename.clear()
+        line = 0;
+        
+        IMAGEHLP_LINE64 loc = {}
+        loc.SizeOfStruct = sizeof(IMAGEHLP_LINE64);
+        DWORD  displacement;
+        
+        // This grabs the symbol name
+        char buffer[sizeof(SYMBOL_INFO) + MAX_SYM_NAME * sizeof(TCHAR)];
+        SYMBOL_INFO& symbol = *(SYMBOL_INFO*)buffer;
+        symbol.SizeOfStruct = sizeof(SYMBOL_INFO);
+        symbol.MaxNameLen = MAX_SYM_NAME;
+        SymFromAddr(process, (ULONG64)address, &displacement, &symbol);
+        symbolName = symbol.Name;
+        
+        // all demangle ops are single-threaded, so run this under log mutex
+        if (!SymGetLineFromAddr64(m_process, (DWORD64)address, &displacement, &loc))
+            return false;
+        
+        filename = loc.Filename;
+        line = loc.LineNumber;
+        return true;
+    }
+    
+    void getStackInfo(string& stackInfo, uint32_t skipStackLevels)
+    {
+        STACKFRAME64 stack = {};
+        
+        string symbolName, filename;
+        uint32_t line;
+        
+        for( frame = 0; ; frame++ ) {
+            if (frame < skipStackLevels)
+                continue;
+            
+            BOOL result = StackWalk64(
+              IMAGE_FILE_MACHINE_AMD64, // Intel licensed this - is x64 now
+              m_process,
+              GetCurrentThread(),
+              &stack,
+              &ctxCopy,
+              NULL,
+              SymFunctionTableAccess64,
+              SymGetModuleBase64,
+              NULL
+              );
+            
+            // no knowledge of stack depth, so just have to keep waking
+            if (!result)
+                return;
+            
+            if (getAddressInfo(mstack.AddrPC.Offset, symbolName, filename, line))
+                append_sprintf(stackInfo, "%s:%u: %s", filename.c_str(), line, symbolName.c_str());
+            
+            // Note: can get Module with a different call if above fails
+        }
+    }
+    
+    // https://stackoverflow.com/questions/22467604/how-can-you-use-capturestackbacktrace-to-capture-the-exception-stack-not-the-ca
+};
+
+// Check this out
+// https://github.com/vtjnash/dbghelp2
+
+// Also here, cross-platform symbol lookup/demangle
+// handles ASLR on mach-o, but only using relative offsets on the .o file
+// https://dynamorio.org/page_drsyms.html
+
+// https://stackoverflow.com/questions/22467604/how-can-you-use-capturestackbacktrace-to-capture-the-exception-stack-not-the-ca
+// CaptureStackBackTrace().
+
+#else
+
+// here's nm on osx
+// https://opensource.apple.com/source/cctools/cctools-622.5.1/misc/nm.c
+
+
+// The dladdr() function is available only in dynamically linked programs.
+// #include <dlfcn.h>
+// int dladdr(const void *addr, Dl_info *info);
+// int dladdr1(const void *addr, Dl_info *info, void **extra_info,
+//                   int flags);
+// Here's supposed to be code that deals with static libs too
+// https://stackoverflow.com/questions/19848567/how-to-get-the-build-uuid-in-runtime-and-the-image-base-address/19859516#19859516
+
+class AddressHelper
+{
+private:
+    void substr(string& str, const char* start, const char* end) {
+        str = str.substr(start - str.c_str(), end - start);
+    }
+    
+    const char* strrchr(const char* start, const char* end, char c) {
+        while (end > start) {
+            end--;
+            if (*end == c)
+                return end;
+        }
+        
+        return nullptr;
+    }
+    
+    void demangleSymbol(string& symbolName)
+    {
+        size_t size = 0;
+        int status = 0;
+        
+        // This one is getting chopped up incorrect
+        // 10  AppKit                              0x0000000193079730 __24-[NSViewController view]_block_invoke + 28
+        
+        // Some other examples
+        // 14  AppKit                              0x0000000192c7b230 NSPerformVisuallyAtomicChange + 108
+        // 24  kramv                               0x0000000104c6b4e0 main + 76
+        
+        const char* text = symbolName.c_str();
+        // chop off the "+ 132" offset
+        const char* plusOffsetEnd = strstr(text, " +");
+        const char* objCStart = strstr(text, " -");
+        if (!objCStart)
+            objCStart = strstr(text, " __24-"); // block invoke
+        
+        const char* cppStart = strstr(text, " _ZN4");
+        const char* spaceStart = plusOffsetEnd ? strrchr(text, plusOffsetEnd, ' ') : nullptr;
+        
+        if (objCStart)
+            substr(symbolName, objCStart+1, plusOffsetEnd);
+        else if (cppStart)
+            substr(symbolName, cppStart+1, plusOffsetEnd);
+        else if (spaceStart)
+            substr(symbolName, spaceStart+1, plusOffsetEnd);
+        
+        // Note: some objC does need demangle
+        if (cppStart) {
+            // This allocates memory using malloc
+            // Must have just the name, not all the other cruft around it
+            // ObjC is not manged.
+            char* symbol = abi::__cxa_demangle(symbolName.c_str(), nullptr, &size, &status);
+            if (status == 0) {
+                symbolName = symbol;
+                free(symbol);
+            }
+        }
+    }
+    
+public:
+    bool isStackTraceSupported() const { return true; }
+   
+    bool getAddressInfo(const void* address, string& symbolName, string& filename, uint32_t& line)
+    {
+        void* callstack[1] = { (void*)address };
+        
+        // this allocates memory
+        char** strs = backtrace_symbols(callstack, 1);
+        
+        // may need -no_pie to turn off ASLR, also don't reuse stack-frame reg
+        // Will have to parse symbolName, filename, line
+        symbolName = strs[0];
+        
+        free(strs);
+        
+        // TODO: figure out file/line lookup, don't want to fire nm/addr2line process each time
+        // Those are GPL poison into codebases.  But they no doubt do a ton
+        // or work on each launch to then lookup 1+ symbols.
+        // Apple doesn't even have addr2line, and needs to use atos. 
+        // But atos doesn't exist except on dev systems.
+        // atos goes to private framework CoreSymbolication.  Ugh.
+        // There is also boost::stack_trace which does gen a valid stack somehow.
+        
+        // CoreSymbolicate might have calls
+        // https://opensource.apple.com/source/xnu/xnu-3789.21.4/tools/tests/darwintests/backtracing.c.auto.html
+        
+        // https://developer.apple.com/documentation/xcode/adding-identifiable-symbol-names-to-a-crash-report
+        // https://developer.apple.com/documentation/xcode/analyzing-a-crash-report
+        
+        // Note: this can provide the file/line, but requires calling out to external process
+        // also nm and addr2line
+        // system("atos -o kramv.app.dSYM/Contents/Resources/DWARF/kramv -arch arm64 -l %p", address);
+        
+        filename.clear();
+        line = 0;
+        
+        demangleSymbol(symbolName);
+        
+        return true;
+    }
+    
+    void getStackInfo(string& stackInfo, uint32_t skipStackLevels)
+    {
+        void* callstack[128];
+        uint32_t frames = backtrace(callstack, 128);
+        
+        // Also this call, but can't use it to lookup a symbol, and it's ObjC.
+        // but it just returns the same data as below (no file/line).
+        // +[NSThread callStackSymbols]
+        
+        // backtrace_symbols() attempts to transform a call stack obtained by
+        // backtrace() into an array of human-readable strings using dladdr().
+        char** strs = backtrace_symbols(callstack, frames);
+        string symbolName;
+        for (uint32_t i = skipStackLevels; i < frames; ++i) {
+            symbolName = strs[i];
+            
+            demangleSymbol(symbolName);
+            
+            append_sprintf(stackInfo, "[%2u] ", i-skipStackLevels);
+            stackInfo += symbolName;
+            stackInfo += "\n";
+        }
+        
+        free(strs);
+    }
+    
+    // nm is typically used to decode, but that's an executable
+};
+#endif
+
+#else
+
+class AddressHelper
+{
+public:
+    bool isStackTraceSupported() const { return false; }
+    bool getAddressInfo(const void* address, string& symbolName, string& filename, uint32_t& line) { return false; }
+    void getStackInfo(string& stackInfo, uint32_t skipStackLevels) {}
+};
+
+#endif
+
+static AddressHelper gAddressHelper;
+
 
 // TODO: install assert handler to intercept, and also add a verify (assert that leaves source in)
 //void __assert(const char *expression, const char *file, int32_t line) {
@@ -259,36 +597,72 @@ struct LogMessage
     const char* group;
     int32_t logLevel;
     
+    // from macro
     const char* file;
     int32_t line;
     const char* func;
+    
+    // embelished
     const char* threadName;
+    double timestamp;
+    
+    void* dso;
+    void* returnAddress;
     
     const char* msg;
     bool msgHasNewline;
-    
-    double timestamp;
 };
 
-static const char* getFormatTokens(const LogMessage& msg) {
+enum DebuggerType
+{
+    DebuggerOutputDebugString,
+    DebuggerOSLog,
+    DebuggerLogcat,
+    Debugger,
+};
+
+constexpr const uint32_t kMaxTokens = 32;
+
+static const char* getFormatTokens(char tokens[kMaxTokens], const LogMessage& msg, DebuggerType type) 
+{
 #if KRAM_WIN
-    if (msg.logLevel <= LogLevelInfo)
-        return "m\n";
-    if (msg.file)
-        return "[l] g m\n"
-               "F: L: t u\n";
-    return "[l] g m\n";
+    if (msg.logLevel <= LogLevelInfo) {
+        stlcpy(tokens, "m\n", kMaxTokens);
+    }
+    else if (msg.file) {
+        strlcpy(tokens, "[l] g m\n" "F: L: t u\n", kMaxTokens);
+    }
+    else {
+        strlcpy(tokens, "[l] g m\n", kMaxTokens);
+    }
 #elif KRAM_ANDROID
-    return "m\n";
+    // Android logcat has level, tag, file/line passed in the mesasge
+   strlcpy(tokens, "m\n", kMaxTokens);
 #else
     // copy of formatters above
-    if (msg.logLevel <= LogLevelInfo)
-        return "m\n";
-    if (msg.file)
-        return "[l] g m\n"
-               "F: L: t u\n";
-    return "[l] g m\n";
+    if (msg.logLevel <= LogLevelInfo) {
+        strlcpy(tokens, "m\n", kMaxTokens);
+    }
+    else if (msg.file) {
+        strlcpy(tokens, "[l] g m\n" "F: L: t u\n", kMaxTokens);
+    }
+    else {
+        strlcpy(tokens, "[l] g m\n", kMaxTokens);
+    }
+    
+    if (gAddressHelper.isStackTraceSupported() && msg.logLevel >= LogLevelError) {
+        
+        // can just report the caller, and not a full stack
+        // already have function, so returnAddress printing is the same.
+        /* if (msg.returnAddress) {
+            strlcat(tokens, "s\n", kMaxTokens);
+        }
+        else */ {
+            strlcat(tokens, "S", kMaxTokens);
+        }
+    }
 #endif
+    return tokens;
 }
 
 
@@ -333,6 +707,23 @@ static void formatMessage(string& buffer, const LogMessage& msg, const char* tok
                 break;
             }
                 
+            case 's': { // return address (1 line stack)
+                if (msg.returnAddress) {
+                    string symbolName, filename;
+                    uint32_t line = 0;
+                    gAddressHelper.getAddressInfo(msg.returnAddress, symbolName, filename, line);
+                    buffer += symbolName;
+                }
+                break;
+            }
+            case 'S': { // full stack
+                uint32_t skipStackLevels = 3;
+#if KRAM_DEBUG
+                skipStackLevels++;
+#endif
+                gAddressHelper.getStackInfo(buffer, skipStackLevels);
+                break;
+            }
             case 'u': { // func
                 if (msg.func) {
                     buffer += msg.func;
@@ -443,14 +834,14 @@ static int32_t logMessageImpl(const LogMessage& msg)
     // TODO: add any filtering up here, or before msg is built
     
     mylock lock(gLogState.lock);
-
+    
     // this means caller needs to know all errors to display in the hud
     if (gLogState.isErrorLogCapture && msg.logLevel == LogLevelError) {
         gLogState.errorLogCaptureText += msg.msg;
         if (!msg.msgHasNewline)
             gLogState.errorLogCaptureText += "\n";
     }
-
+    
     // format into a buffer (it's under lock, so can use static)
     string& buffer = gLogState.buffer;
     
@@ -470,13 +861,16 @@ static int32_t logMessageImpl(const LogMessage& msg)
     }
     // TODO: test IsDebuggerPresent once per frame, not on every log
     gLogState.isWindowsDebugger = ::IsDebuggerPresent();
-
+    
     if (gLogState.isWindowsGuiApp && !gLogState.isWindowsDebugger)
         return status;
     
-    formatMessage(buffer, msg, getFormatTokens(msg));
     
     if (gLogState.isWindowsGuiApp) {
+        char tokens[kMaxTokens] = {};
+        getFormatTokens(tokens, msg, DebuggerOutputDebugString);
+        formatMessage(buffer, msg, tokens);
+        
         // TODO: split string up into multiple logs
         // this is limited to 32K
         // OutputDebugString(buffer.c_str());
@@ -486,6 +880,10 @@ static int32_t logMessageImpl(const LogMessage& msg)
         OutputDebugStringU(buffer.c_str(), buffer.size());
     }
     else {
+        char tokens[kMaxTokens] = {};
+        getFormatTokens(tokens, msg, Debugger);
+        formatMessage(buffer, msg, tokens);
+        
         // avoid double print to debugger
         FILE* fp = stdout;
         fwrite(buffer.c_str(), 1, buffer.size(), fp);
@@ -495,66 +893,129 @@ static int32_t logMessageImpl(const LogMessage& msg)
 #elif KRAM_ANDROID
     // TODO: move higher up
     // API 30
-    if (!__android_log_is_loggable(androidLogLevel, msg.group, androidLogLevel))
-        return status;
-    
-    formatMessage(buffer, msg, getFormatTokens(msg));
-    
-    AndroidLogLevel androidLogLevel = ANDROID_LOG_ERROR;
-    switch (logLevel) {
+    AndroidLogLevel osLogLevel = ANDROID_LOG_ERROR;
+    switch (msg.logLevel) {
         case LogLevelDebug:
-            androidLogLevel = ANDROID_LOG_DEBUG;
+            osLogLevel = ANDROID_LOG_DEBUG;
             break;
         case LogLevelInfo:
-            androidLogLevel = ANDROID_LOG_INFO;
+            osLogLevel = ANDROID_LOG_INFO;
             break;
-
+            
         case LogLevelWarning:
-            androidLogLevel = ANDROID_LOG_WARNING;
+            osLogLevel = ANDROID_LOG_WARNING;
             break;
         case LogLevelError:
-            androidLogLevel = ANDROID_LOG_ERROR;
+            osLogLevel = ANDROID_LOG_ERROR;
             break;
     }
     
+    if (!__android_log_is_loggable(osLogLevel, msg.group, __android_log_get_minimum_priority())) // will be default level if not set
+        return status;
+    
+    char tokens[kMaxTokens] = {};
+    getFormatTokens(tokens, msg, DebuggerLogcat);
+    formatMessage(buffer, msg, tokens);
+    
     // TODO: split string up into multiple logs by /n
     // this can only write 4K - 80 chars at time, don't use print it's 1023
     // API 30
     __android_log_message msg = {
-        LOG_ID_MAIN, msg.file, msg.line, buffer.c_str(), androidLogLevel, sizeof(__android_log_message), msg.group);
-    }
+        LOG_ID_MAIN, msg.file, msg.line, buffer.c_str(), osLogLevel, sizeof(__android_log_message), msg.group
+    };
     __android_log_write_log_message(msg);
 #else
-    // Note: this doesn't go out to Console, but does go out to Xcode.
-    // NSLog/os_log sucks with no intercepts, filtering, or formatting.
     
-    formatMessage(buffer, msg, getFormatTokens(msg));
+#if KRAM_IOS || KRAM_MAC
+    // test os_log
     
-    FILE* fp = stdout;
-    fwrite(buffer.c_str(), 1, buffer.size(), fp);
-    // if heavy logging, then could delay fflush
-    fflush(fp);
+    static bool useOSLog = true;
+    if (useOSLog)
+    {
+        char tokens[kMaxTokens] = {};
+        getFormatTokens(tokens, msg, DebuggerOSLog);
+        formatMessage(buffer, msg, tokens);
+        
+        // os_log reports this as the callsite, and doesn't jump to another file
+        // or if the dso is even passed from this file, the file/line aren't correct.
+        // So os_log_impl is grabbing return address whithin the function that can't be set.
+        // So have to inject the NSLog, os_log, syslog calls directly into code, but that
+        // not feasible.   This will at least color the mesages.
+        
+        auto osLogLevel = OS_LOG_TYPE_INFO;
+        switch (msg.logLevel) {
+            case LogLevelDebug:
+                osLogLevel = OS_LOG_TYPE_DEBUG;
+                break;
+            case LogLevelInfo:
+                osLogLevel = OS_LOG_TYPE_INFO;
+                break;
+                
+            case LogLevelWarning:
+                osLogLevel = OS_LOG_TYPE_ERROR; // no warning level
+                break;
+            case LogLevelError:
+                osLogLevel = OS_LOG_TYPE_FAULT;
+                break;
+        }
+        
+        // TODO: have kramc and kramv using this logger, can we get at subsystem?
+        const char* subsystem = "com.hialec.kram";
+        
+        os_log_with_type(os_log_create(subsystem, msg.group), osLogLevel, "%{public}s", buffer.c_str());
+    }
+    else
+#endif
+    {
+        char tokens[kMaxTokens] = {};
+        getFormatTokens(tokens, msg, Debugger);
+        formatMessage(buffer, msg, tokens);
+        
+        FILE* fp = stdout;
+        fwrite(buffer.c_str(), 1, buffer.size(), fp);
+        // if heavy logging, then could delay fflush
+        fflush(fp);
+    }
 #endif
 
     return status;  // reserved for later
 }
 
+
+                     
 int32_t logMessage(const char* group, int32_t logLevel,
                           const char* file, int32_t line, const char* func,
                           const char* fmt, ...)
 {
+    void* dso = nullptr;
+    void* logAddress = nullptr;
+    
+#if KRAM_IOS || KRAM_MAC
+    dso = &__dso_handle; // may need to come from call site for the mach_header of .o
+    logAddress = __builtin_return_address(0); // or __builtin_frame_address(0))
+#elif KRAM_WIN
+    //
+    // TODO: use SymFromAddr to convert address to mangled symbol, and demangle it
+    // from DbgHelp.dll
+    logAddress = _ReturnAddress(); // or _AddressOfReturnAddress()
+#endif
+    
     LogMessage logMessage = {
         group, logLevel,
-        file, line, func, nullptr,
-        nullptr, false, 0.0
+        file, line, func, 
+        nullptr, 0.0, // threadname, timestamp
+        
+        // must set -no_pie to use __builtin_return_address to turn off ASLR
+        dso, logAddress,
+        nullptr, false, // msg, msgHasNewline
     };
+    
     if (isMessageFiltered(logMessage)) {
         return 0;
     }
     
-    
     // convert var ags to a msg
-    const char* msg;
+    const char* msg = nullptr;
 
     string str;
     if (strrchr(fmt, '%') == nullptr) {
@@ -570,9 +1031,10 @@ int32_t logMessage(const char* group, int32_t logLevel,
     else {
         va_list args;
         va_start(args, fmt);
-        vsprintf(str, fmt, args);
+        int res = vsprintf(str, fmt, args);
         va_end(args);
-
+        if (res < 0) return 0;
+        
         msg = str.c_str();
     }
     
@@ -597,11 +1059,21 @@ int32_t logMessage(const char* group, int32_t logLevel,
     // TODO: size_t size = std::formatted_size(format, args);
     // and then reserve that space in str.  Use that for impl of append_format.
     // can then append to existing string (see vsprintf)
+#if KRAM_IOS || KRAM_MAC
+    void* dso = &__dso_handle;
+    void* logAddress = __builtin_return_address(0); // or __builtin_frame_address(0))
+#else
+    void* dso = nullptr;
+    void* logAddress = nullptr;
+#endif
     
     LogMessage logMessage = {
         group, logLevel,
-        file, line, func, nullptr,
-        nullptr, false, 0.0
+        file, line, func, nullptr, 0.0, // threadName, timestamp
+        
+        // must set -no_pie to use __builtin_return_address to turn off ASLR
+        dso, logAddress,
+        nullptr, false, // msg, msgHasNewline
     };
     if (isMessageFiltered(logMessage)) {
         return 0;
diff --git a/libkram/kram/KramLog.h b/libkram/kram/KramLog.h
index 120127d6..c520c37a 100644
--- a/libkram/kram/KramLog.h
+++ b/libkram/kram/KramLog.h
@@ -7,7 +7,7 @@
 #include <cassert>
 //#include <string>
 
-//#include "KramConfig.h"
+// #include "KramConfig.h"
 
 namespace kram {
 
@@ -76,4 +76,9 @@ bool endsWithExtension(const char* str, const string& substring);
 // https://stackoverflow.com/questions/874134/find-out-if-string-ends-with-another-string-in-c
 bool endsWith(const string& value, const string& ending);
 
+#if KRAM_WIN
+size_t strlcat(char* dst, const char* src, size_t size);
+size_t strlcpy(char* dst, const char* src, size_t size);
+#endif
+
 }  // namespace kram

From d45cf6fa241b15ba9bd412c6e1ab7efd69c0288a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 25 Sep 2023 22:53:09 -0700
Subject: [PATCH 546/901] kram - fix typo

---
 libkram/kram/KramLog.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp
index cd5cd944..70b50467 100644
--- a/libkram/kram/KramLog.cpp
+++ b/libkram/kram/KramLog.cpp
@@ -627,7 +627,7 @@ static const char* getFormatTokens(char tokens[kMaxTokens], const LogMessage& ms
 {
 #if KRAM_WIN
     if (msg.logLevel <= LogLevelInfo) {
-        stlcpy(tokens, "m\n", kMaxTokens);
+        strlcpy(tokens, "m\n", kMaxTokens);
     }
     else if (msg.file) {
         strlcpy(tokens, "[l] g m\n" "F: L: t u\n", kMaxTokens);

From 1f364d9a6ae58e0dec2058ad2d9ca398875a2d01 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 25 Sep 2023 23:21:27 -0700
Subject: [PATCH 547/901] kram  - more stack trace

---
 libkram/kram/KramLog.cpp | 50 ++++++++++++++++++----------------------
 1 file changed, 22 insertions(+), 28 deletions(-)

diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp
index 70b50467..f6741cb1 100644
--- a/libkram/kram/KramLog.cpp
+++ b/libkram/kram/KramLog.cpp
@@ -136,20 +136,25 @@ class AddressHelper
         SymInitialize(m_process, NULL, TRUE);
     }
     
+    ~AddressHelper()
+    {
+        SymCleanup(m_process);
+    }
+    
     bool isStackTraceSupported() const { return true; }
     
     bool getAddressInfo(const void* address, string& symbolName, string& filename, uint32_t& line)
     {
         string.clear();
-        filename.clear()
+        filename.clear();
         line = 0;
         
-        IMAGEHLP_LINE64 loc = {}
+        IMAGEHLP_LINE64 loc = {};
         loc.SizeOfStruct = sizeof(IMAGEHLP_LINE64);
-        DWORD  displacement;
+        DWORD  displacement = 0;
         
         // This grabs the symbol name
-        char buffer[sizeof(SYMBOL_INFO) + MAX_SYM_NAME * sizeof(TCHAR)];
+        char buffer[sizeof(SYMBOL_INFO) + MAX_SYM_NAME * sizeof(TCHAR)] = {};
         SYMBOL_INFO& symbol = *(SYMBOL_INFO*)buffer;
         symbol.SizeOfStruct = sizeof(SYMBOL_INFO);
         symbol.MaxNameLen = MAX_SYM_NAME;
@@ -167,38 +172,27 @@ class AddressHelper
     
     void getStackInfo(string& stackInfo, uint32_t skipStackLevels)
     {
-        STACKFRAME64 stack = {};
-        
         string symbolName, filename;
-        uint32_t line;
+        uint32_t line = 0;
         
-        for( frame = 0; ; frame++ ) {
-            if (frame < skipStackLevels)
-                continue;
-            
-            BOOL result = StackWalk64(
-              IMAGE_FILE_MACHINE_AMD64, // Intel licensed this - is x64 now
-              m_process,
-              GetCurrentThread(),
-              &stack,
-              &ctxCopy,
-              NULL,
-              SymFunctionTableAccess64,
-              SymGetModuleBase64,
-              NULL
-              );
-            
-            // no knowledge of stack depth, so just have to keep waking
-            if (!result)
-                return;
-            
-            if (getAddressInfo(mstack.AddrPC.Offset, symbolName, filename, line))
+        const uint32_t kMaxStackTrace = 128;
+        void* stacktrace[kMaxStackTrace] = {};
+        
+        // Can use this hash to uniquely identify stacks that are the same
+        ULONG stackTraceHash = 0;
+        
+        // This provides the symbols
+        uint32_t frameCount = CaptureStackBackTrace(skipStackLevels, kMaxStackTrace, stacktrace, &stackTraceHash);
+        
+        for(uint32_t i = 0; i < frameCount; ++i) {
+            if (getAddressInfo(stacktrace[i], symbolName, filename, line))
                 append_sprintf(stackInfo, "%s:%u: %s", filename.c_str(), line, symbolName.c_str());
             
             // Note: can get Module with a different call if above fails
         }
     }
     
+    // See here on using StackWalk64 to walk up the SEH context.
     // https://stackoverflow.com/questions/22467604/how-can-you-use-capturestackbacktrace-to-capture-the-exception-stack-not-the-ca
 };
 

From 737552d1c406e43cc2965025b0b809537faac597 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 8 Oct 2023 23:42:11 -0700
Subject: [PATCH 548/901] kram - replace simdjson with json11, a blocked linear
 allocator, and immutable strings

This is a fairly efficient json reader/writer.  I've taken the now defunct json11 packed from Dropbox, and heavily modified it.  So keys and strings are aliased.  And there are almost not allocation during reading.  Still more work to do on writer just in helping build up the json.  This also supports iterators.   One slow part is addJson which must search to find the end of the linked list.  But all allocations are block allocated in 1024 elements, DOM nodes are 32B currently, and a 100KB atlas file takes about 600KB to read.  And that's all in nice 32KB chunks from the heap.  This still needs to be fixed to handle trailing comma.

No guarantees this is the fastet json parser, and I'll likely bring back simdjson.  But I like how simple my json11 parser is.

Added a BlockedLinearAllocator.  Just wanted something that could produce a single node, be reset, and then resupply those nodes.  There is an item count per block, and item size.  No variable block support.  But this works well for Json nodes which are all 32B.

Comment out FASTL.  This stl just doesn't work out.
Turn off stack traces for errors for now.

Added an ImmutableString/Pool.  I always use this class, and so it's time to add it in

Fixed crash where atlas wasn't getting cleared out when loading a different file, and the hud would try to display from a dead lastAtlas entry.
---
 build2/kram.xcodeproj/project.pbxproj     |   64 +-
 kram-preview/KramPreviewViewController.h  |    2 +-
 kram-preview/KramPreviewViewController.mm |    2 +-
 kram-thumb-win/README.md                  |    6 +-
 kram-thumb/KramThumbnailProvider.h        |    2 +-
 kram-thumb/KramThumbnailProvider.mm       |    2 +-
 kramv/Base.lproj/Main.storyboard          |    8 +-
 kramv/KramLoader.h                        |    2 +-
 kramv/KramLoader.mm                       |    2 +-
 kramv/KramRenderer.h                      |    2 +-
 kramv/KramRenderer.mm                     |    2 +-
 kramv/KramViewerBase.cpp                  |   74 +-
 kramv/KramViewerBase.h                    |    3 +-
 kramv/KramViewerMain.mm                   |  110 ++-
 kramv/Shaders/KramShaders.h               |    2 +-
 kramv/Shaders/KramShaders.metal           |    2 +-
 libkram/json11/json11.cpp                 | 1090 +++++++++++++++++++++
 libkram/json11/json11.h                   |  372 +++++++
 libkram/kram/BlockedLinearAllocator.cpp   |   66 ++
 libkram/kram/BlockedLinearAllocator.h     |   66 ++
 libkram/kram/ImmutableString.cpp          |   97 ++
 libkram/kram/ImmutableString.h            |  119 +++
 libkram/kram/KramConfig.h                 |    4 +-
 libkram/kram/KramLog.cpp                  |    3 +-
 libkram/kram/KramPrefix.h                 |    2 +-
 tests/src/AtlasTest-atlas.json            |    6 +-
 26 files changed, 2003 insertions(+), 107 deletions(-)
 create mode 100644 libkram/json11/json11.cpp
 create mode 100644 libkram/json11/json11.h
 create mode 100644 libkram/kram/BlockedLinearAllocator.cpp
 create mode 100644 libkram/kram/BlockedLinearAllocator.h
 create mode 100644 libkram/kram/ImmutableString.cpp
 create mode 100644 libkram/kram/ImmutableString.h

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index 6bbaa6ea..48acab47 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -7,10 +7,6 @@
 	objects = {
 
 /* Begin PBXBuildFile section */
-		70424D31290127BB00CEF9AC /* simdjson.h in Headers */ = {isa = PBXBuildFile; fileRef = 70424D2F290127BB00CEF9AC /* simdjson.h */; };
-		70424D32290127BB00CEF9AC /* simdjson.h in Headers */ = {isa = PBXBuildFile; fileRef = 70424D2F290127BB00CEF9AC /* simdjson.h */; };
-		70424D33290127BB00CEF9AC /* simdjson.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70424D30290127BB00CEF9AC /* simdjson.cpp */; };
-		70424D34290127BB00CEF9AC /* simdjson.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70424D30290127BB00CEF9AC /* simdjson.cpp */; };
 		704738BC289F6AEE00C77A9F /* unordered_map.h in Headers */ = {isa = PBXBuildFile; fileRef = 704738B1289F6AEE00C77A9F /* unordered_map.h */; };
 		704738BD289F6AEE00C77A9F /* unordered_map.h in Headers */ = {isa = PBXBuildFile; fileRef = 704738B1289F6AEE00C77A9F /* unordered_map.h */; };
 		704738BE289F6AEE00C77A9F /* falgorithm.h in Headers */ = {isa = PBXBuildFile; fileRef = 704738B2289F6AEE00C77A9F /* falgorithm.h */; };
@@ -402,11 +398,21 @@
 		70CDB65127A1382700A546C1 /* KramDDSHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 70CDB64E27A1382600A546C1 /* KramDDSHelper.h */; };
 		70CDB65227A1382700A546C1 /* KramDDSHelper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70CDB64F27A1382600A546C1 /* KramDDSHelper.cpp */; };
 		70CDB65327A1382700A546C1 /* KramDDSHelper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70CDB64F27A1382600A546C1 /* KramDDSHelper.cpp */; };
+		70D222D82AC800AC00B9EA23 /* json11.h in Headers */ = {isa = PBXBuildFile; fileRef = 70D222D62AC800AC00B9EA23 /* json11.h */; };
+		70D222D92AC800AC00B9EA23 /* json11.h in Headers */ = {isa = PBXBuildFile; fileRef = 70D222D62AC800AC00B9EA23 /* json11.h */; };
+		70D222DA2AC800AC00B9EA23 /* json11.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70D222D72AC800AC00B9EA23 /* json11.cpp */; };
+		70D222DB2AC800AC00B9EA23 /* json11.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70D222D72AC800AC00B9EA23 /* json11.cpp */; };
+		70D222DE2AD2132300B9EA23 /* ImmutableString.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70D222DC2AD2132300B9EA23 /* ImmutableString.cpp */; };
+		70D222DF2AD2132300B9EA23 /* ImmutableString.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70D222DC2AD2132300B9EA23 /* ImmutableString.cpp */; };
+		70D222E02AD2132300B9EA23 /* ImmutableString.h in Headers */ = {isa = PBXBuildFile; fileRef = 70D222DD2AD2132300B9EA23 /* ImmutableString.h */; };
+		70D222E12AD2132300B9EA23 /* ImmutableString.h in Headers */ = {isa = PBXBuildFile; fileRef = 70D222DD2AD2132300B9EA23 /* ImmutableString.h */; };
+		70D222E42AD22BED00B9EA23 /* BlockedLinearAllocator.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70D222E22AD22BED00B9EA23 /* BlockedLinearAllocator.cpp */; };
+		70D222E52AD22BED00B9EA23 /* BlockedLinearAllocator.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70D222E22AD22BED00B9EA23 /* BlockedLinearAllocator.cpp */; };
+		70D222E62AD22BED00B9EA23 /* BlockedLinearAllocator.h in Headers */ = {isa = PBXBuildFile; fileRef = 70D222E32AD22BED00B9EA23 /* BlockedLinearAllocator.h */; };
+		70D222E72AD22BED00B9EA23 /* BlockedLinearAllocator.h in Headers */ = {isa = PBXBuildFile; fileRef = 70D222E32AD22BED00B9EA23 /* BlockedLinearAllocator.h */; };
 /* End PBXBuildFile section */
 
 /* Begin PBXFileReference section */
-		70424D2F290127BB00CEF9AC /* simdjson.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = simdjson.h; sourceTree = "<group>"; };
-		70424D30290127BB00CEF9AC /* simdjson.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = simdjson.cpp; sourceTree = "<group>"; };
 		704738B1289F6AEE00C77A9F /* unordered_map.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = unordered_map.h; sourceTree = "<group>"; };
 		704738B2289F6AEE00C77A9F /* falgorithm.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = falgorithm.h; sourceTree = "<group>"; };
 		704738B3289F6AEE00C77A9F /* map.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = map.h; sourceTree = "<group>"; };
@@ -749,6 +755,12 @@
 		70C6398C289FB234006E7422 /* KramPrefix.pch */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = KramPrefix.pch; sourceTree = "<group>"; };
 		70CDB64E27A1382600A546C1 /* KramDDSHelper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = KramDDSHelper.h; sourceTree = "<group>"; };
 		70CDB64F27A1382600A546C1 /* KramDDSHelper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = KramDDSHelper.cpp; sourceTree = "<group>"; };
+		70D222D62AC800AC00B9EA23 /* json11.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = json11.h; sourceTree = "<group>"; };
+		70D222D72AC800AC00B9EA23 /* json11.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = json11.cpp; sourceTree = "<group>"; };
+		70D222DC2AD2132300B9EA23 /* ImmutableString.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ImmutableString.cpp; sourceTree = "<group>"; };
+		70D222DD2AD2132300B9EA23 /* ImmutableString.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ImmutableString.h; sourceTree = "<group>"; };
+		70D222E22AD22BED00B9EA23 /* BlockedLinearAllocator.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = BlockedLinearAllocator.cpp; sourceTree = "<group>"; };
+		70D222E32AD22BED00B9EA23 /* BlockedLinearAllocator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = BlockedLinearAllocator.h; sourceTree = "<group>"; };
 /* End PBXFileReference section */
 
 /* Begin PBXFrameworksBuildPhase section */
@@ -771,15 +783,6 @@
 /* End PBXFrameworksBuildPhase section */
 
 /* Begin PBXGroup section */
-		70424D2E290127BB00CEF9AC /* simdjson */ = {
-			isa = PBXGroup;
-			children = (
-				70424D2F290127BB00CEF9AC /* simdjson.h */,
-				70424D30290127BB00CEF9AC /* simdjson.cpp */,
-			);
-			path = simdjson;
-			sourceTree = "<group>";
-		};
 		704738AF289F6AEE00C77A9F /* fastl */ = {
 			isa = PBXGroup;
 			children = (
@@ -827,7 +830,7 @@
 				706EEDFB26D1583E001C950E /* transcoder */,
 				706EEE1026D1583F001C950E /* miniz */,
 				706EEE1326D1583F001C950E /* heman */,
-				70424D2E290127BB00CEF9AC /* simdjson */,
+				70D222D52AC800AC00B9EA23 /* json11 */,
 				706EEE1626D1583F001C950E /* stb */,
 				706EEE1826D1583F001C950E /* kram */,
 				706EEE3926D1583F001C950E /* squish */,
@@ -1039,6 +1042,10 @@
 				706EEE2226D1583F001C950E /* sse2neon.h */,
 				706EEE3426D1583F001C950E /* float4a.h */,
 				706EEE2F26D1583F001C950E /* float4a.cpp */,
+				70D222E32AD22BED00B9EA23 /* BlockedLinearAllocator.h */,
+				70D222E22AD22BED00B9EA23 /* BlockedLinearAllocator.cpp */,
+				70D222DD2AD2132300B9EA23 /* ImmutableString.h */,
+				70D222DC2AD2132300B9EA23 /* ImmutableString.cpp */,
 				706EEE3826D1583F001C950E /* TaskSystem.h */,
 				706EEE1F26D1583F001C950E /* TaskSystem.cpp */,
 			);
@@ -1354,6 +1361,15 @@
 			path = fmt;
 			sourceTree = "<group>";
 		};
+		70D222D52AC800AC00B9EA23 /* json11 */ = {
+			isa = PBXGroup;
+			children = (
+				70D222D62AC800AC00B9EA23 /* json11.h */,
+				70D222D72AC800AC00B9EA23 /* json11.cpp */,
+			);
+			path = json11;
+			sourceTree = "<group>";
+		};
 /* End PBXGroup section */
 
 /* Begin PBXHeadersBuildPhase section */
@@ -1386,7 +1402,6 @@
 				709B8D4328D7BCAD0081BD1F /* args.h in Headers */,
 				708A6A9C2708CE4700BA5410 /* bc6h_encode.h in Headers */,
 				706EEFDF26D15984001C950E /* EtcBlock4x4Encoding_ETC1.h in Headers */,
-				70424D31290127BB00CEF9AC /* simdjson.h in Headers */,
 				706EEFE026D15984001C950E /* EtcBlock4x4Encoding_RGBA8.h in Headers */,
 				706EEFE126D15984001C950E /* EtcColorFloatRGBA.h in Headers */,
 				706EEFE226D15984001C950E /* EtcBlock4x4Encoding.h in Headers */,
@@ -1418,6 +1433,7 @@
 				706EEFFF26D15985001C950E /* KramZipHelper.h in Headers */,
 				706EF00026D15985001C950E /* KramSDFMipper.h in Headers */,
 				706EF00126D15985001C950E /* sse2neon.h in Headers */,
+				70D222E62AD22BED00B9EA23 /* BlockedLinearAllocator.h in Headers */,
 				70871DF127DDDBCD00D0B9E1 /* astcenc_mathlib.h in Headers */,
 				709B8D3128D7BCAD0081BD1F /* ranges.h in Headers */,
 				706EF00226D15985001C950E /* KramConfig.h in Headers */,
@@ -1430,6 +1446,7 @@
 				709B8D4F28D7C15F0081BD1F /* KramFmt.h in Headers */,
 				707789D92881BA81008A51BC /* bc7decomp.h in Headers */,
 				706EF00826D15985001C950E /* Kram.h in Headers */,
+				70D222E02AD2132300B9EA23 /* ImmutableString.h in Headers */,
 				704738C8289F6AEE00C77A9F /* vector.h in Headers */,
 				70871DED27DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.h in Headers */,
 				707789DB2881BA81008A51BC /* ert.h in Headers */,
@@ -1457,6 +1474,7 @@
 				706EF01626D15985001C950E /* maths.h in Headers */,
 				707789F32881BCE2008A51BC /* rdo_bc_encoder.h in Headers */,
 				704738CA289F6AEE00C77A9F /* set.h in Headers */,
+				70D222D82AC800AC00B9EA23 /* json11.h in Headers */,
 				706EF01726D15985001C950E /* colourset.h in Headers */,
 				708A6AA42708CE4700BA5410 /* bc6h_utils.h in Headers */,
 				706EF01826D15985001C950E /* colourblock.h in Headers */,
@@ -1502,7 +1520,6 @@
 				709B8D4428D7BCAD0081BD1F /* args.h in Headers */,
 				708A6A9D2708CE4700BA5410 /* bc6h_encode.h in Headers */,
 				706EF15926D166C5001C950E /* EtcBlock4x4Encoding_ETC1.h in Headers */,
-				70424D32290127BB00CEF9AC /* simdjson.h in Headers */,
 				706EF15A26D166C5001C950E /* EtcBlock4x4Encoding_RGBA8.h in Headers */,
 				706EF15B26D166C5001C950E /* EtcColorFloatRGBA.h in Headers */,
 				706EF15C26D166C5001C950E /* EtcBlock4x4Encoding.h in Headers */,
@@ -1534,6 +1551,7 @@
 				706EF17926D166C5001C950E /* KramZipHelper.h in Headers */,
 				706EF17A26D166C5001C950E /* KramSDFMipper.h in Headers */,
 				706EF17B26D166C5001C950E /* sse2neon.h in Headers */,
+				70D222E72AD22BED00B9EA23 /* BlockedLinearAllocator.h in Headers */,
 				70871DF227DDDBCD00D0B9E1 /* astcenc_mathlib.h in Headers */,
 				709B8D3228D7BCAD0081BD1F /* ranges.h in Headers */,
 				706EF17C26D166C5001C950E /* KramConfig.h in Headers */,
@@ -1546,6 +1564,7 @@
 				709B8D5028D7C15F0081BD1F /* KramFmt.h in Headers */,
 				707789DA2881BA81008A51BC /* bc7decomp.h in Headers */,
 				706EF18226D166C5001C950E /* Kram.h in Headers */,
+				70D222E12AD2132300B9EA23 /* ImmutableString.h in Headers */,
 				704738C9289F6AEE00C77A9F /* vector.h in Headers */,
 				70871DEE27DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.h in Headers */,
 				707789DC2881BA81008A51BC /* ert.h in Headers */,
@@ -1573,6 +1592,7 @@
 				706EF19026D166C5001C950E /* maths.h in Headers */,
 				707789F42881BCE2008A51BC /* rdo_bc_encoder.h in Headers */,
 				704738CB289F6AEE00C77A9F /* set.h in Headers */,
+				70D222D92AC800AC00B9EA23 /* json11.h in Headers */,
 				706EF19126D166C5001C950E /* colourset.h in Headers */,
 				708A6AA52708CE4700BA5410 /* bc6h_utils.h in Headers */,
 				706EF19226D166C5001C950E /* colourblock.h in Headers */,
@@ -1677,6 +1697,7 @@
 				70CDB65227A1382700A546C1 /* KramDDSHelper.cpp in Sources */,
 				706EEF8026D1595D001C950E /* EtcImage.cpp in Sources */,
 				70871DEB27DDDBCD00D0B9E1 /* astcenc_block_sizes.cpp in Sources */,
+				70D222E42AD22BED00B9EA23 /* BlockedLinearAllocator.cpp in Sources */,
 				706EEF8126D1595D001C950E /* EtcDifferentialTrys.cpp in Sources */,
 				706EEF8226D1595D001C950E /* EtcMath.cpp in Sources */,
 				706EEF8326D1595D001C950E /* EtcBlock4x4Encoding_RGBA8.cpp in Sources */,
@@ -1699,7 +1720,6 @@
 				706EFF7526D34740001C950E /* assert.cpp in Sources */,
 				706EFF8526D34740001C950E /* fixed_pool.cpp in Sources */,
 				706EEFA726D1595D001C950E /* basisu_transcoder.cpp in Sources */,
-				70424D33290127BB00CEF9AC /* simdjson.cpp in Sources */,
 				706EFF8326D34740001C950E /* red_black_tree.cpp in Sources */,
 				70871DE327DDDBCD00D0B9E1 /* astcenc_decompress_symbolic.cpp in Sources */,
 				70871E0727DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.cpp in Sources */,
@@ -1725,6 +1745,7 @@
 				706EEFB326D1595D001C950E /* KramSDFMipper.cpp in Sources */,
 				706EEFB426D1595D001C950E /* KramMmapHelper.cpp in Sources */,
 				709B8D3928D7BCAD0081BD1F /* format.cpp in Sources */,
+				70D222DE2AD2132300B9EA23 /* ImmutableString.cpp in Sources */,
 				70871DCB27DDDBCD00D0B9E1 /* astcenc_image.cpp in Sources */,
 				706EEFB526D1595D001C950E /* float4a.cpp in Sources */,
 				706EFF7326D34740001C950E /* thread_support.cpp in Sources */,
@@ -1744,6 +1765,7 @@
 				70871DFF27DDDBCD00D0B9E1 /* astcenc_pick_best_endpoint_format.cpp in Sources */,
 				70871E0927DDDBCD00D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp in Sources */,
 				70871DCF27DDDBCD00D0B9E1 /* astcenc_symbolic_physical.cpp in Sources */,
+				70D222DA2AC800AC00B9EA23 /* json11.cpp in Sources */,
 				706EEFC026D1595E001C950E /* maths.cpp in Sources */,
 				706EEFC126D1595E001C950E /* singlecolourfit.cpp in Sources */,
 				706EEFC226D1595E001C950E /* zstd.cpp in Sources */,
@@ -1768,6 +1790,7 @@
 				706EFC2426D1C39B001C950E /* ateencoder.mm in Sources */,
 				707789EE2881BA81008A51BC /* bc7decomp_ref.cpp in Sources */,
 				706EF19826D166C5001C950E /* EtcBlock4x4Encoding_RGB8.cpp in Sources */,
+				70D222DB2AC800AC00B9EA23 /* json11.cpp in Sources */,
 				70871DCE27DDDBCD00D0B9E1 /* astcenc_find_best_partitioning.cpp in Sources */,
 				70CDB65327A1382700A546C1 /* KramDDSHelper.cpp in Sources */,
 				706EF19926D166C5001C950E /* EtcImage.cpp in Sources */,
@@ -1799,6 +1822,7 @@
 				70871E0827DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.cpp in Sources */,
 				70871E0627DDDBCD00D0B9E1 /* astcenc_platform_isa_detection.cpp in Sources */,
 				707789D62881BA81008A51BC /* bc7enc.cpp in Sources */,
+				70D222E52AD22BED00B9EA23 /* BlockedLinearAllocator.cpp in Sources */,
 				706EFF8026D34740001C950E /* intrusive_list.cpp in Sources */,
 				707789EA2881BA81008A51BC /* bc7decomp.cpp in Sources */,
 				706EF1C026D166C5001C950E /* miniz.cpp in Sources */,
@@ -1811,7 +1835,6 @@
 				706EF1C526D166C5001C950E /* KramZipHelper.cpp in Sources */,
 				706EF1C626D166C5001C950E /* TaskSystem.cpp in Sources */,
 				706EF1C726D166C5001C950E /* KramFileHelper.cpp in Sources */,
-				70424D34290127BB00CEF9AC /* simdjson.cpp in Sources */,
 				706EFF7C26D34740001C950E /* numeric_limits.cpp in Sources */,
 				706EF1C826D166C5001C950E /* KramImageInfo.cpp in Sources */,
 				70871DEA27DDDBCD00D0B9E1 /* astcenc_integer_sequence.cpp in Sources */,
@@ -1819,6 +1842,7 @@
 				706EF1CA26D166C5001C950E /* KramLog.cpp in Sources */,
 				706EF1CB26D166C5001C950E /* KramSDFMipper.cpp in Sources */,
 				706EF1CC26D166C5001C950E /* KramMmapHelper.cpp in Sources */,
+				70D222DF2AD2132300B9EA23 /* ImmutableString.cpp in Sources */,
 				70871DCC27DDDBCD00D0B9E1 /* astcenc_image.cpp in Sources */,
 				706EF1CD26D166C5001C950E /* float4a.cpp in Sources */,
 				706EFF7426D34740001C950E /* thread_support.cpp in Sources */,
diff --git a/kram-preview/KramPreviewViewController.h b/kram-preview/KramPreviewViewController.h
index 5acdf430..2bf9e439 100644
--- a/kram-preview/KramPreviewViewController.h
+++ b/kram-preview/KramPreviewViewController.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2022 by Alec Miller. - MIT License
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/kram-preview/KramPreviewViewController.mm b/kram-preview/KramPreviewViewController.mm
index c62de40f..9cdc071f 100644
--- a/kram-preview/KramPreviewViewController.mm
+++ b/kram-preview/KramPreviewViewController.mm
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2022 by Alec Miller. - MIT License
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/kram-thumb-win/README.md b/kram-thumb-win/README.md
index a400f203..1144aaea 100644
--- a/kram-thumb-win/README.md
+++ b/kram-thumb-win/README.md
@@ -8,9 +8,9 @@ Windows thumbnailer for DDS/KTX/KTX2 containers in C++.  To use the thumbnailer:
 
 # About kram thumbnailer
 
-The thumbnailer runs the same libkram decoders that kramv uses for macOS.  This is an ancient Win7 thumbnailer that Microsoft last updated in Vista.  The Microsoft samples didn't work off github.  
+The thumbnailer dll runs the same libkram decoders that kramv thumbnailer uses for macOS.  An ancient Win7 thumbnil service calls over to the dll.  The Microsoft service harkens back to Win7, was last updated in vista, and their sample didn't work off github.  So thankfully a dev on github can cleaned all this up. 
 
-A sanitized stream of bytes is supplied by the Explorer thumbnail service to the dll, the dll decodes the image container to a single image, and then returns the closest mip as a bitmap.  The bitmap is assumed to be sRGB, but there are few details or settings.  Explorer caches the thumbnails.  Windows also seems to generate thumbnails when apps are tied to specific extensions.
+A sanitized stream of bytes is supplied by the Explorer thumbnail service to the dll, the dll uses libkram to decode the image container to a single image, and returns the closest mip as a bitmap to the service.  The bitmap is assumed to be sRGB, but there are few details or settings.  Explorer caches the thumbnails.  Windows also seems to generate thumbnails when apps are tied to specific extensions.
 
 For some reason, Microsoft doesn't upscale small 2x2 thumbnails.  These show up as barely visible dots despite a request for a 32x32 pixel.  macOS does upscale these so they are viewable.
 
@@ -21,7 +21,7 @@ These are the default thumbnail sizes that are subject to change.  Note that Mic
 * 256x256
 * 1024x1024
 
-Adapted from Microosoft sample code that iOrange cleaned up for QOI images. 
+Adapted from Microsoft sample code that iOrange cleaned to generate thumbnails for QOI images. 
 
 https://github.com/iOrange/QOIThumbnailProvider
 
diff --git a/kram-thumb/KramThumbnailProvider.h b/kram-thumb/KramThumbnailProvider.h
index 0ebbdcd8..17498b80 100644
--- a/kram-thumb/KramThumbnailProvider.h
+++ b/kram-thumb/KramThumbnailProvider.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2022 by Alec Miller. - MIT License
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/kram-thumb/KramThumbnailProvider.mm b/kram-thumb/KramThumbnailProvider.mm
index ba82fb37..25ca813a 100644
--- a/kram-thumb/KramThumbnailProvider.mm
+++ b/kram-thumb/KramThumbnailProvider.mm
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2022 by Alec Miller. - MIT License
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/kramv/Base.lproj/Main.storyboard b/kramv/Base.lproj/Main.storyboard
index 1d6caae7..891c23e1 100644
--- a/kramv/Base.lproj/Main.storyboard
+++ b/kramv/Base.lproj/Main.storyboard
@@ -1,8 +1,8 @@
 <?xml version="1.0" encoding="UTF-8"?>
-<document type="com.apple.InterfaceBuilder3.Cocoa.Storyboard.XIB" version="3.0" toolsVersion="19455" targetRuntime="MacOSX.Cocoa" propertyAccessControl="none" useAutolayout="YES" initialViewController="B8D-0N-5wS">
+<document type="com.apple.InterfaceBuilder3.Cocoa.Storyboard.XIB" version="3.0" toolsVersion="22154" targetRuntime="MacOSX.Cocoa" propertyAccessControl="none" useAutolayout="YES" initialViewController="B8D-0N-5wS">
     <dependencies>
         <deployment identifier="macosx"/>
-        <plugIn identifier="com.apple.InterfaceBuilder.CocoaPlugin" version="19455"/>
+        <plugIn identifier="com.apple.InterfaceBuilder.CocoaPlugin" version="22154"/>
         <capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
     </dependencies>
     <scenes>
@@ -174,7 +174,7 @@
                         <rect key="frame" x="0.0" y="0.0" width="800" height="600"/>
                         <autoresizingMask key="autoresizingMask"/>
                         <subviews>
-                            <scrollView borderType="none" autohidesScrollers="YES" horizontalLineScroll="24" horizontalPageScroll="10" verticalLineScroll="24" verticalPageScroll="10" usesPredominantAxisScrolling="NO" translatesAutoresizingMaskIntoConstraints="NO" id="CPB-x5-bmZ" userLabel="FilesScrollView">
+                            <scrollView fixedFrame="YES" borderType="none" autohidesScrollers="YES" horizontalLineScroll="24" horizontalPageScroll="10" verticalLineScroll="24" verticalPageScroll="10" usesPredominantAxisScrolling="NO" translatesAutoresizingMaskIntoConstraints="NO" id="CPB-x5-bmZ" userLabel="FilesScrollView">
                                 <rect key="frame" x="20" y="20" width="207" height="400"/>
                                 <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" heightSizable="YES"/>
                                 <clipView key="contentView" drawsBackground="NO" id="R7E-tN-iH2">
@@ -204,7 +204,7 @@
                                                             <rect key="frame" x="18" y="0.0" width="170" height="24"/>
                                                             <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
                                                             <subviews>
-                                                                <textField horizontalHuggingPriority="251" verticalHuggingPriority="750" horizontalCompressionResistancePriority="250" fixedFrame="YES" translatesAutoresizingMaskIntoConstraints="NO" id="DRr-2v-dcm">
+                                                                <textField focusRingType="none" horizontalHuggingPriority="251" verticalHuggingPriority="750" horizontalCompressionResistancePriority="250" fixedFrame="YES" translatesAutoresizingMaskIntoConstraints="NO" id="DRr-2v-dcm">
                                                                     <rect key="frame" x="0.0" y="4" width="170" height="16"/>
                                                                     <autoresizingMask key="autoresizingMask" widthSizable="YES" flexibleMinY="YES" flexibleMaxY="YES"/>
                                                                     <textFieldCell key="cell" lineBreakMode="truncatingTail" sendsActionOnEndEditing="YES" title="Table View Cell" id="fSo-T3-czq">
diff --git a/kramv/KramLoader.h b/kramv/KramLoader.h
index 63991707..3e61ccfb 100644
--- a/kramv/KramLoader.h
+++ b/kramv/KramLoader.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2022 by Alec Miller. - MIT License
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index e5878c33..8e227337 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2022 by Alec Miller. - MIT License
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/kramv/KramRenderer.h b/kramv/KramRenderer.h
index 35ef95b0..de20df75 100644
--- a/kramv/KramRenderer.h
+++ b/kramv/KramRenderer.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2022 by Alec Miller. - MIT License
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index f2d71124..6fedf3ee 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2022 by Alec Miller. - MIT License
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index 2fdd1e3b..af523e89 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -1,6 +1,6 @@
 #include "KramViewerBase.h"
 
-#include "simdjson/simdjson.h"
+#include "json11/json11.h"
 
 namespace kram {
 using namespace simd;
@@ -670,8 +670,6 @@ float4x4 orthographic_rhs(float width, float height, float nearZ, float farZ,
 //--------------------------------
 
 // Want to avoid Apple libs for things that have C++ equivalents.
-// simdjson without exceptions isn't super readable or safe looking.
-// TODO: see if simdjson is stable enough, using unsafe calls
 
 Data::Data()
 {
@@ -684,47 +682,71 @@ Data::~Data()
     delete _showSettings;
 }
 
+void Data::clearAtlas() {
+    _showSettings->atlas.clear();
+    _showSettings->lastAtlas = nullptr;
+}
+
 bool Data::loadAtlasFile(const char* filename)
 {
-    using namespace simdjson;
+    using namespace json11;
     
-    ondemand::parser parser;
+    clearAtlas();
+    
+    // can just mmap the json
+    MmapHelper mmap;
+    if (!mmap.open(filename)) {
+        KLOGE("kramv", "Failed to open %s", filename);
+        return false;
+    }
+    
+    Timer timer;
+    JsonReader jsonReader;
+    const Json* root = jsonReader.read((const char*)mmap.data(), mmap.dataLength());
+    string err = jsonReader.error();
+    if (!root || !err.empty()) {
+        KLOGE("kramv", "Failed parsing %s: %s", filename, err.c_str());
+        return false;
+    }
+    timer.stop();
+    
+    KLOGI("kramv", "parsed %.0f KB of json using %.0f KB of memory in %.3fms",
+          (double)mmap.dataLength() / 1024.0,
+          (double)jsonReader.memoryUse() / 1024.0,
+          timer.timeElapsedMillis());
+    
+    const Json& atlasProps = (*root)[(uint32_t)0];
     
-    // TODO: can just mmap the json to provide
-    auto json = padded_string::load(filename);
-    auto atlasProps = parser.iterate(json);
-       
     // Can use hover or a show all on these entries and names.
     // Draw names on screen using system text in the upper left corner if 1
     // if showing all, then show names across each mip level.  May want to
     // snap to pixels on each mip level so can see overlap.
     
-    _showSettings->atlas.clear();
     
     {
         std::vector<double> values;
         // string_view atlasName = atlasProps["name"].get_string().value_unsafe();
         
-        uint64_t width = atlasProps["width"].get_uint64().value_unsafe();
-        uint64_t height = atlasProps["height"].get_uint64().value_unsafe();
+        int width = atlasProps["width"].int_value();
+        int height = atlasProps["height"].int_value();
     
-        uint64_t slice = atlasProps["slice"].get_uint64().value_unsafe();
+        int slice = atlasProps["slice"].int_value();
         
         float uPad = 0.0f;
         float vPad = 0.0f;
         
-        if (atlasProps["paduv"].get_array().error() != NO_SUCH_FIELD) {
+        if (atlasProps["paduv"].is_array()) {
             values.clear();
-            for (auto value : atlasProps["paduv"])
-                values.push_back(value.get_double().value_unsafe());
+            for (const auto& value : atlasProps["paduv"])
+                values.push_back(value.number_value());
             
             uPad = values[0];
             vPad = values[1];
         }
-        else if (atlasProps["padpx"].get_array().error() != NO_SUCH_FIELD) {
+        else if (atlasProps["padpx"].is_array()) {
             values.clear();
-            for (auto value : atlasProps["padpx"])
-                values.push_back(value.get_double().value_unsafe());
+            for (const auto& value : atlasProps["padpx"])
+                values.push_back(value.number_value());
             
             uPad = values[0];
             vPad = values[1];
@@ -733,20 +755,21 @@ bool Data::loadAtlasFile(const char* filename)
             vPad /= height;
         }
         
+        string decodedName;
         for (auto regionProps: atlasProps["regions"])
         {
-            string_view name = regionProps["name"].get_string().value_unsafe();
+            const char* name = regionProps["name"].string_value(decodedName);
             
             float x = 0.0f;
             float y = 0.0f;
             float w = 0.0f;
             float h = 0.0f;
             
-            if (regionProps["ruv"].get_array().error() != NO_SUCH_FIELD)
+            if (regionProps["ruv"].is_array())
             {
                 values.clear();
                 for (auto value : regionProps["ruv"])
-                    values.push_back(value.get_double().value_unsafe());
+                    values.push_back(value.number_value());
             
                 // Note: could convert pixel and mip0 size to uv.
                 // normalized uv make these easier to draw across all mips
@@ -755,11 +778,11 @@ bool Data::loadAtlasFile(const char* filename)
                 w = values[2];
                 h = values[3];
             }
-            else if (regionProps["rpx"].get_array().error() != NO_SUCH_FIELD)
+            else if (regionProps["rpx"].is_array())
             {
                 values.clear();
                 for (auto value : regionProps["rpx"])
-                    values.push_back(value.get_double().value_unsafe());
+                    values.push_back(value.number_value());
             
                 x = values[0];
                 y = values[1];
@@ -776,7 +799,7 @@ bool Data::loadAtlasFile(const char* filename)
             const char* verticalProp = "f"; // regionProps["rot"];
             bool isVertical = verticalProp && verticalProp[0] == 't';
             
-            Atlas atlas = {(string)name, x,y, w,h, uPad,vPad, isVertical, (uint32_t)slice};
+            Atlas atlas = {name, x,y, w,h, uPad,vPad, isVertical, (uint32_t)slice};
             _showSettings->atlas.emplace_back(std::move(atlas));
         }
     }
@@ -1099,6 +1122,7 @@ bool Data::loadFile()
         }
     }
     if (!hasAtlas) {
+        clearAtlas();
         atlasFilename.clear();
     }
     
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index 0dbdc9fb..a9ba2737 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2022 by Alec Miller. - MIT License
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
@@ -460,6 +460,7 @@ struct Data {
     Data();
     ~Data();
     
+    void clearAtlas();
     bool loadAtlasFile(const char* filename);
     bool listFilesInArchive(int32_t urlIndex);
     bool openArchive(const char * zipFilename, int32_t urlIndex);
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index b9735044..ed5e9c14 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2022 by Alec Miller. - MIT License
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
@@ -44,6 +44,42 @@
 using namespace NAMESPACE_STL;
 
 
+// ktx, ktx2, png, and dds for images
+// zip, metallib
+// gltf, glb files for models
+NSArray<NSString*>* utis = @[
+  @"public.directory",
+    
+  [UTType typeWithFilenameExtension: @"png"].identifier,
+  [UTType typeWithFilenameExtension: @"ktx"].identifier,
+  [UTType typeWithFilenameExtension: @"ktx2"].identifier,
+  [UTType typeWithFilenameExtension: @"dds"].identifier,
+  
+  [UTType typeWithFilenameExtension: @"zip"].identifier,
+  [UTType typeWithFilenameExtension: @"metallib"].identifier,
+  
+#if USE_GLTF
+  [UTType typeWithFilenameExtension: @"gltf"].identifier,
+  [UTType typeWithFilenameExtension: @"glb"].identifier,
+  //@"model/gltf+json",
+  //@"model/gltf+binary"
+#endif
+#if USE_USD
+  [UTType typeWithFilenameExtension: @"usd"].identifier,
+  [UTType typeWithFilenameExtension: @"usd"].identifier,
+  [UTType typeWithFilenameExtension: @"usda"].identifier,
+#endif
+  
+  // read -atlas.json files
+  [UTType typeWithFilenameExtension: @"json"].identifier
+];
+NSDictionary* pasteboardOptions = @{
+    // This means only these uti can be droped.
+    NSPasteboardURLReadingContentsConformToTypesKey: utis
+    
+    // Don't use this it prevents folder urls
+    //, NSPasteboardURLReadingFileURLsOnlyKey: @YES
+};
 
 
 struct MouseData
@@ -382,7 +418,41 @@ - (void)application:(NSApplication *)sender
     [view fixupDocumentList];
 }
 
+// this isn't filtered by the document types specified, NSDocumentController?
+// added public.folder instead, this would nedd to call readFromURL
+- (IBAction)openDocument:(id)sender
+{
+    // need to implement, or default NSOpenPanel can't specify a directory
+    NSDocumentController* controller = [NSDocumentController sharedDocumentController];
+ 
+#if 0
+    // Would be nice, but doesn't allow directory.
+    // How is NSDocument aware of directory, from Info.plist?
+    NSArray<NSURL*>* urls = [controller URLsFromRunningOpenPanel];
+    if (urls) {
+        NSLog(@"selected URL: %@", urls[0]);
+        
+    }
+#else
+    
+    NSOpenPanel *panel = [NSOpenPanel openPanel];
+    [panel setCanChooseFiles:YES];
+    [panel setCanChooseDirectories:YES];
+    [panel setAllowsMultipleSelection:NO];
+
+    if ([controller runModalOpenPanel:panel forTypes:utis] == NSModalResponseOK)
+    {
+        NSURL* selectedURL = [[panel URLs] objectAtIndex:0];
+
+        //NSLog(@"selected URL: %@", selectedURL);
+        NSError* error = nil;
 
+        [controller    openDocumentWithContentsOfURL:selectedURL
+                                 display:YES
+                                   error:&error];
+    }
+#endif
+}
 
 - (IBAction)showAboutDialog:(id)sender
 {
@@ -394,7 +464,7 @@ - (IBAction)showAboutDialog:(id)sender
 
     // want to embed the git tag here
     options[@"Copyright"] =
-        [NSString stringWithUTF8String:"kram ©2020-2022 by Alec Miller"];
+        [NSString stringWithUTF8String:"kram ©2020-2023 by Alec Miller"];
 
     // add a link to kram website, skip the Visit text
     NSMutableAttributedString* str = [[NSMutableAttributedString alloc]
@@ -461,42 +531,6 @@ - (IBAction)showAboutDialog:(id)sender
  
 */
 
-// ktx, ktx2, png, and dds for images
-// zip, metallib
-// gltf, glb files for models
-NSArray<NSString*>* utis = @[
-  @"public.directory",
-    
-  [UTType typeWithFilenameExtension: @"png"].identifier,
-  [UTType typeWithFilenameExtension: @"ktx"].identifier,
-  [UTType typeWithFilenameExtension: @"ktx2"].identifier,
-  [UTType typeWithFilenameExtension: @"dds"].identifier,
-  
-  [UTType typeWithFilenameExtension: @"zip"].identifier,
-  [UTType typeWithFilenameExtension: @"metallib"].identifier,
-  
-#if USE_GLTF
-  [UTType typeWithFilenameExtension: @"gltf"].identifier,
-  [UTType typeWithFilenameExtension: @"glb"].identifier,
-  //@"model/gltf+json",
-  //@"model/gltf+binary"
-#endif
-#if USE_USD
-  [UTType typeWithFilenameExtension: @"usd"].identifier,
-  [UTType typeWithFilenameExtension: @"usd"].identifier,
-  [UTType typeWithFilenameExtension: @"usda"].identifier,
-#endif
-  
-  // read -atlas.json files
-  [UTType typeWithFilenameExtension: @"json"].identifier
-];
-NSDictionary* pasteboardOptions = @{
-    // This means only these uti can be droped.
-    NSPasteboardURLReadingContentsConformToTypesKey: utis
-    
-    // Don't use this it prevents folder urls
-    //, NSPasteboardURLReadingFileURLsOnlyKey: @YES
-};
 
 
diff --git a/kramv/Shaders/KramShaders.h b/kramv/Shaders/KramShaders.h
index ebe288dc..3566f1f6 100644
--- a/kramv/Shaders/KramShaders.h
+++ b/kramv/Shaders/KramShaders.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2022 by Alec Miller. - MIT License
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/kramv/Shaders/KramShaders.metal b/kramv/Shaders/KramShaders.metal
index 14ae85bb..79583a81 100644
--- a/kramv/Shaders/KramShaders.metal
+++ b/kramv/Shaders/KramShaders.metal
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2022 by Alec Miller. - MIT License
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/json11/json11.cpp b/libkram/json11/json11.cpp
new file mode 100644
index 00000000..e7241d12
--- /dev/null
+++ b/libkram/json11/json11.cpp
@@ -0,0 +1,1090 @@
+/* Copyright (c) 2013 Dropbox, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+// Heavily modifed by Alec Miller 10/1/23
+// This codebase was frozen by DropBox with very little effort put into it.
+// And I liked the readability of the code.  Optimized with ImmutableStrings
+// and a BlockedLinearAllocator.
+
+#include "json11.h"
+#include <cassert>
+#include <cmath>
+#include <cstdlib>
+#include <cstdio>
+#include <limits>
+
+// not including this in KramConfig.h - used for pool
+#include "BlockedLinearAllocator.h"
+
+// This now uses 600K Debug (32B node) to read 100K of json.
+// TODO: parser doesn't handle trailing comments
+
+namespace json11 {
+
+using namespace NAMESPACE_STL;
+using namespace kram;
+
+//---------------------
+
+void JsonWriter::writeText(const char* text) {
+    *_out += text;
+}
+
+void JsonWriter::writeNull() {
+    writeText("null");
+}
+
+void JsonWriter::writeNumber(const Json& value) {
+    if (isfinite(value.number_value())) {
+        char buf[32];
+        // TODO: this is super long
+        // TODO: remove trailing 0's
+        snprintf(buf, sizeof buf, "%.17g", value.number_value());
+        writeText(buf);
+    } else {
+        // TODO: may want to write inf
+        writeText("null");
+    }
+}
+
+void JsonWriter::writeBool(const Json& value) {
+    writeText(value.boolean_value() ? "true" : "false");
+}
+
+// encode utf8 to escaped json string
+void JsonWriter::writeString(const Json& value) {
+    writeText("\"");
+    string dummy;
+    const char* str = value.string_value(dummy);
+    for (size_t i = 0, iEnd = value.count(); i < iEnd; i++) {
+        const char ch = str[i];
+        if (ch == '\\') {
+            writeText("\\\\"); // this is 2x \
+        } else if (ch == '"') {
+            writeString("\\\"");
+        } else if (ch == '\b') {
+            writeText("\\b");
+        } else if (ch == '\f') { //?
+            writeText("\\f");
+        } else if (ch == '\n') {
+            writeText("\\n");
+        } else if (ch == '\r') {
+            writeText("\\r");
+        } else if (ch == '\t') {
+            writeText("\\t");
+        } else if (static_cast<uint8_t>(ch) <= 0x1f) {
+            char buf[8];
+            snprintf(buf, sizeof buf, "\\u%04x", ch);
+            writeText(buf);
+        } else if (static_cast<uint8_t>(ch) == 0xe2 && static_cast<uint8_t>(str[i+1]) == 0x80
+                   && static_cast<uint8_t>(str[i+2]) == 0xa8) {
+            writeText("\\u2028"); // line separator
+            i += 2;
+        } else if (static_cast<uint8_t>(ch) == 0xe2 && static_cast<uint8_t>(str[i+1]) == 0x80
+                   && static_cast<uint8_t>(str[i+2]) == 0xa9) {
+            writeText("\\u2029"); // paragraph separator
+            i += 2;
+        } else {
+            char chStr[2] = {};
+            chStr[0] = ch;
+            writeText(chStr);
+        }
+    }
+    writeText("\"");
+}
+
+void JsonWriter::writeArray(const Json &values) {
+    bool first = true;
+    
+    // Note: this isn't handling any indenting
+    writeText("[");
+    for (const auto& it: values) {
+        if (!first)
+            writeText(", ");
+        // just an array in brackets
+        write(it);
+        first = false;
+    }
+    writeText("]");
+}
+
+void JsonWriter::writeObject(const Json &values) {
+    bool first = true;
+    
+    // Note: this isn't handling any indenting
+    writeText("{");
+    for (const auto& it: values) {
+        if (!first)
+            writeText(", ");
+       
+        // quoted key:value pairs in parens
+        writeText("\"");
+        writeText(it.key());
+        writeText("\": ");
+        write(it);
+        first = false;
+    }
+    writeText("}");
+}
+
+void JsonWriter::write(const Json& json) {
+    switch(json.type()) {
+        case Json::TypeObject: writeObject(json); break;
+        case Json::TypeArray: writeArray(json); break;
+            
+        case Json::TypeString: writeString(json); break;
+        case Json::TypeBoolean: writeBool(json); break;
+        case Json::TypeNumber: writeNumber(json); break;
+        case Json::TypeNull: writeNull(); break;
+    }
+}
+
+void JsonWriter::write(const Json &root, string& out) {
+    _out = &out;
+    write(root);
+}
+
+//-----------------------------
+
+class JsonReaderData {
+public:
+    JsonReaderData() 
+        : _nodeAllocator(1024, sizeof(Json)),
+          _keyPool(32*1024)
+    { }
+    
+    Json* allocateJson() {
+        Json* node = _nodeAllocator.allocateItem<Json>();
+        assert(node);
+        return node;
+    }
+    
+    void reset() {
+        _nodeAllocator.reset();
+    }
+    void resetAndFree() {
+        _nodeAllocator.resetAndFree();
+    }
+    
+    const char* getJsonString(uint32_t offset) const {
+        // TODO: assert(offset < _jsonDataSize);
+        return _jsonData + offset;
+    }
+    
+    bool isImmutableKey(const char* key) const {
+        return _keyPool.isImmutableString(key);
+    }
+    ImmutableString getImmutableKey(const char* key) const {
+        return _keyPool.getImmutableString(key);
+    }
+    
+    uint16_t getIndexFromKey(const char* key) const {
+        ImmutableString str = _keyPool.getImmutableString(key);
+        return _keyPool.getCounter(str);
+    }
+    
+    ImmutableString getKeyFromIndex(uint16_t keyIndex) {
+        return _keyPool.getImmutableString(keyIndex);
+    }
+    bool isWriting() const { return _isWriting; }
+    
+    // track allocated string memory
+    void trackMemory(int32_t size) {
+        _memoryUsage += size;
+        if (_memoryUsageMax < _memoryUsage)
+            _memoryUsageMax = _memoryUsage;
+    }
+    size_t getMemory() const { return _memoryUsage; }
+    size_t getMemoryMax() const { return _memoryUsageMax; }
+    
+    size_t memoryUse() const { return getMemoryMax() + _nodeAllocator.memoryUse(); }
+    
+private:
+    const char* _jsonData = nullptr;
+    
+    BlockedLinearAllocator _nodeAllocator;
+    mutable ImmutableStringPool _keyPool; // only mutable because getImmutable is non-const
+    bool _isWriting = false;
+    
+    // debug stuff
+    size_t _memoryUsage = 0;
+    size_t _memoryUsageMax = 0;
+};
+
+static const Json sNullValue;
+
+//-----------------------------
+
+void Json::setKey(ImmutableString key) {
+    // was converting to uint16_t
+    _key = key;
+}
+
+ImmutableString Json::key() const {
+    // was converting from uint16_t
+    return _key;
+}
+
+const Json & Json::operator[] (uint32_t i) const {
+    assert(_type == TypeArray);
+    
+    // Has to walk node linked list
+    if (i < _count) {
+        uint32_t counter = 0;
+        for (const auto& it : *this) {
+            if (i == counter++)
+                return it;
+        }
+    }
+    
+    return sNullValue;
+}
+
+const Json & Json::operator[] (const char* key) const {
+    assert(_type == TypeObject);
+    
+    // was converting to uint16_t, then doing comparison
+    //uint16_t keyIndex = _data.getIndexFromKey(key);
+    
+    // Has to walk node linked list
+    for (const auto& it : *this) {
+        // since can't remap incoming string to immutable or index, have to do strcmp
+        // and also store 8B instead of 2B for the key.
+        if ((key == it._key) || (strcmp(key, it._key) == 0))
+            return it;
+    }
+    return sNullValue;
+}
+
+bool Json::iterate(const Json*& it) const {
+    assert(_type == TypeObject || _type == TypeArray);
+    
+    if (it == nullptr)
+        it = _value.aval;
+    else
+        it = it->_next;
+    
+    return it != nullptr;
+}
+
+//bool Json::operator== (const Json &other) const {
+//    // Some cases where comparing int/double is okay, or bool to 1/0, etc
+//    if (_type != other._type)
+//        return false;
+//    
+//    // what about comparing key?
+//    
+//    switch(_type) {
+//        case NUL: return true;
+//        case BOOL: return _value.bval == other._value.bval;
+//        case NUMBER: return _value.dval == other._value.dval;
+//        case STRING: 
+//            // This doesn't work if not null terminated, need count of each string
+//            return strcmp(_value.sval, other._value.sval) == 0;
+//        case ARRAY:
+//        case OBJECT: return _value.aval == other._value.aval;
+//    }
+//}
+//
+// not needed with linear key search
+//bool Json::operator< (const Json &other) const {
+//    if (_type != other._type)
+//        return _type < other._type;
+//    
+//    // what about sorting key?
+//   
+//    switch(_type) {
+//        case NUL: return true;
+//        case BOOL: return _value.bval < other._value.bval;
+//        case NUMBER: return _value.dval < other._value.dval;
+//        case STRING: return strcmp(_value.sval, other._value.sval) < 0;
+//        case ARRAY:
+//        case OBJECT: return _value.aval < other._value.aval;
+//    }
+//}
+
+//-------------------
+
+Json::JsonValue::JsonValue(const char* v, uint32_t count, bool allocated) : sval(v) {
+    if (allocated) {
+        // need to allocate memory here
+        sval = new char[count+1];
+        memcpy((void*)sval, v, count+1);
+    }
+}
+
+Json::JsonValue::JsonValue(const Json::array& values, Type t) : aval(nullptr) {
+    /* This is more complex
+    assert(_data.isWriting());
+    
+    Json** next = &aval;
+    for (const auto& it: values) {
+        Json* json = _data.allocateJson();
+        // TODO: new (json) Json(*it);
+        if (json->is_string()) {
+            json->_flags = FlagsAllocatedUnencoded;
+        }
+
+        // chain the nodes, but not doubly-linked list
+        *next = json->_next;
+        next = &json->_next;
+        
+        // TODO: this has to be recursive to copy the array
+        // since nodes weren't allocated from the BlockAllocator.
+    }
+    
+    *next = nullptr;
+    */
+}
+
+/////////////////////////////////
+// Parsing
+
+// Format char c suitable for printing in an error message.
+static inline string esc(char c) {
+    char buf[12];
+    if (static_cast<uint8_t>(c) >= 0x20 && static_cast<uint8_t>(c) <= 0x7f) {
+        snprintf(buf, sizeof buf, "'%c' (%d)", c, c);
+    } else {
+        snprintf(buf, sizeof buf, "(%d)", c);
+    }
+    return string(buf);
+}
+
+static inline bool in_range(long x, long lower, long upper) {
+    return (x >= lower && x <= upper);
+}
+
+// decode to UTF-8 and add it to out.
+static void decode_to_utf8(long pt, string & out) {
+    if (pt < 0)
+        return;
+
+    if (pt < 0x80) {
+        out += static_cast<char>(pt);
+    } else if (pt < 0x800) {
+        out += static_cast<char>((pt >> 6) | 0xC0);
+        out += static_cast<char>((pt & 0x3F) | 0x80);
+    } else if (pt < 0x10000) {
+        out += static_cast<char>((pt >> 12) | 0xE0);
+        out += static_cast<char>(((pt >> 6) & 0x3F) | 0x80);
+        out += static_cast<char>((pt & 0x3F) | 0x80);
+    } else {
+        out += static_cast<char>((pt >> 18) | 0xF0);
+        out += static_cast<char>(((pt >> 12) & 0x3F) | 0x80);
+        out += static_cast<char>(((pt >> 6) & 0x3F) | 0x80);
+        out += static_cast<char>((pt & 0x3F) | 0x80);
+    }
+}
+
+// Parse a string, starting at the current position.
+static bool decode_string(const char* str, uint32_t strSize, string& out) {
+    uint32_t i = 0;
+    long last_escaped_codepoint = -1;
+    while (true) {
+        if (i == strSize) {
+            //fail("unexpected end of input in string");
+            return false;
+        }
+
+        char ch = str[i++];
+
+        if (ch == '"') {
+            decode_to_utf8(last_escaped_codepoint, out);
+            return true;
+        }
+
+        if (in_range(ch, 0, 0x1f)) {
+            //fail("unescaped " + esc(ch) + " in string");
+            return false;
+        }
+        // The usual case: non-escaped characters
+        if (ch != '\\') {
+            decode_to_utf8(last_escaped_codepoint, out);
+            last_escaped_codepoint = -1;
+            out += ch;
+            continue;
+        }
+
+        // Handle escapes
+        if (i == strSize) {
+            //fail("unexpected end of input in string");
+            return false;
+        }
+        
+        ch = str[i++];
+
+        if (ch == 'u') {
+            // Explicitly check length of the substring. The following loop
+            // relies on string returning the terminating NUL when
+            // accessing str[length]. Checking here reduces brittleness.
+            if (i + 4 >= strSize) {
+                //fail("bad \\u escape:");
+                return false;
+            }
+            
+            // Extract 4-byte escape sequence
+            char esc[5] = {};
+            esc[0] = str[i+0];
+            esc[1] = str[i+1];
+            esc[2] = str[i+2];
+            esc[3] = str[i+3];
+            i += 4;
+            
+            for (size_t j = 0; j < 4; j++) {
+                if (!in_range(esc[j], 'a', 'f') && !in_range(esc[j], 'A', 'F')
+                    && !in_range(esc[j], '0', '9'))
+                {
+                    //fail("bad \\u escape: " + esc);
+                    return false;
+                }
+            }
+
+            long codepoint = strtol(esc, nullptr, 16);
+
+            // JSON specifies that characters outside the BMP shall be encoded as a pair
+            // of 4-hex-digit \u escapes encoding their surrogate pair components. Check
+            // whether we're in the middle of such a beast: the previous codepoint was an
+            // escaped lead (high) surrogate, and this is a trail (low) surrogate.
+            if (in_range(last_escaped_codepoint, 0xD800, 0xDBFF)
+                    && in_range(codepoint, 0xDC00, 0xDFFF)) {
+                // Reassemble the two surrogate pairs into one astral-plane character, per
+                // the UTF-16 algorithm.
+                decode_to_utf8((((last_escaped_codepoint - 0xD800) << 10)
+                             | (codepoint - 0xDC00)) + 0x10000, out);
+                last_escaped_codepoint = -1;
+            } else {
+                decode_to_utf8(last_escaped_codepoint, out);
+                last_escaped_codepoint = codepoint;
+            }
+
+            i += 4;
+            continue;
+        }
+
+        decode_to_utf8(last_escaped_codepoint, out);
+        last_escaped_codepoint = -1;
+
+        if (ch == 'b') {
+            out += '\b';
+        } else if (ch == 'f') {
+            out += '\f';
+        } else if (ch == 'n') {
+            out += '\n';
+        } else if (ch == 'r') {
+            out += '\r';
+        } else if (ch == 't') {
+            out += '\t';
+        } else if (ch == '"' || ch == '\\' || ch == '/') {
+            out += ch;
+        } else {
+            //fail("invalid escape character " + esc(ch));
+            return false;
+        }
+    }
+    
+    return true;
+}
+
+//namespace {
+// Object that tracks all state of an in-progress parse.
+//class JsonReader final {
+//public:
+    // TODO: move parser state into _data?
+
+JsonReader::JsonReader() {
+    _data = new JsonReaderData();
+}
+JsonReader::~JsonReader() {
+    delete _data;
+    _data = nullptr;
+}
+
+size_t JsonReader::memoryUse() const {
+    return _data->memoryUse();
+}
+
+ImmutableString JsonReader::getImmutableKey(const char* key) {
+    return _data->getImmutableKey(key);
+}
+
+void JsonReader::fail(const string& msg) {
+    if (!failed) {
+        err = msg;
+        
+        // append the line count
+        err += " line:";
+        err += to_string(lineCount);
+        
+        failed = true;
+    }
+}
+
+// Advance until the current character is non-whitespace.
+void JsonReader::consume_whitespace() {
+    while (char ch = str[i]) {
+        if (!(ch == ' ' || ch == '\r' || ch == '\n' || ch == '\t'))
+            break;
+        if (ch == '\n') {
+            lineCount++;
+        }
+        i++;
+    }
+}
+
+// Advance comments (c-style inline and multiline).
+bool JsonReader::consume_comment() {
+  bool comment_found = false;
+  if (str[i] == '/') {
+    i++;
+    if (i == strSize) {
+        fail("unexpected end of input after start of comment");
+        return false;
+    }
+    if (str[i] == '/') { // inline comment
+      i++;
+      // advance until next line, or end of input
+      while (i < strSize && str[i] != '\n') {
+        i++;
+      }
+      comment_found = true;
+    }
+    else if (str[i] == '*') { // multiline comment
+      i++;
+      if (i > strSize-2) {
+        fail("unexpected end of input inside multi-line comment");
+        return false;
+      }
+      // advance until closing tokens
+      while (!(str[i] == '*' && str[i+1] == '/')) {
+        i++;
+          if (i > strSize-2) {
+              fail("unexpected end of input inside multi-line comment");
+              return false;
+          }
+      }
+      i += 2;
+      comment_found = true;
+    }
+    else {
+        fail("malformed comment");
+        return false;
+    }
+  }
+  return comment_found;
+}
+
+// Advance until the current character is non-whitespace and non-comment.
+void JsonReader::consume_garbage() {
+    consume_whitespace();
+    bool comment_found = false;
+    do {
+        comment_found = consume_comment();
+        if (failed)
+            return;
+        consume_whitespace();
+    }
+    while(comment_found);
+}
+
+                   
+// Return the next non-whitespace character. If the end of the input is reached,
+// flag an error and return 0.
+char JsonReader::get_next_token() {
+    consume_garbage();
+    if (failed)
+        return 0;
+    if (i == strSize) {
+        fail("unexpected end of input");
+        return 0;
+    }
+    return str[i++];
+}
+
+// This is much simpler since it just searches for ending "
+void JsonReader::parse_string_location(uint32_t& count) {
+    while (true) {
+        if (i == strSize) {
+            fail("unexpected end of input in string");
+            return;
+        }
+
+        count++;
+        char ch = str[i++];
+
+        // stop when reach the end quote
+        if (ch == '\"') {
+            --count;
+            return;
+        }
+    }
+}
+
+// This is not used in parsing, but will need to convert strings
+//   that are aliased and unescape them.
+
+
+
+// Parse a double.
+double JsonReader::parse_number() {
+    size_t start_pos = i;
+
+    // TODO: This is buffer overrun city!
+    // this is mostly a bunch of validation, then strtod
+    if (str[i] == '-')
+        i++;
+
+    // Integer part
+    if (str[i] == '0') {
+        i++;
+        if (in_range(str[i], '0', '9')) {
+            fail("leading 0s not permitted in numbers");
+            return 0.0;
+        }
+    } else if (in_range(str[i], '1', '9')) {
+        i++;
+        while (in_range(str[i], '0', '9'))
+            i++;
+    } else {
+        fail("invalid " + esc(str[i]) + " in number");
+        return 0.0;
+    }
+
+    if (str[i] != '.' && str[i] != 'e' && str[i] != 'E'
+            && (i - start_pos) <= static_cast<size_t>(numeric_limits<int>::digits10)) {
+        
+        // TODO: switch to from_chars, why using this instead of strtod?
+        // This has local and global conflicts
+        return atoi(str + start_pos);
+    }
+
+    // Decimal part
+    if (str[i] == '.') {
+        i++;
+        if (!in_range(str[i], '0', '9')) {
+            fail("at least one digit required in fractional part");
+            return 0.0;
+        }
+        
+        while (in_range(str[i], '0', '9'))
+            i++;
+    }
+
+    // Exponent part
+    if (str[i] == 'e' || str[i] == 'E') {
+        i++;
+
+        if (str[i] == '+' || str[i] == '-')
+            i++;
+
+        if (!in_range(str[i], '0', '9')) {
+            fail("at least one digit required in exponent");
+            return 0.0;
+        }
+        
+        while (in_range(str[i], '0', '9'))
+            i++;
+    }
+
+    // TODO: switch to from_chars()
+    return strtod(str + start_pos, nullptr);
+}
+
+bool JsonReader::compareStringForLength(const char* expected, size_t length)
+{
+    if (i + length > strSize)
+        return false;
+    
+    for (uint32_t j = 0; j < length; ++j) {
+        if (str[i+j] != expected[j])
+            return false;
+    }
+    return true;
+}
+
+// Expect that 'str' starts at the character that was just read. If it does, advance
+// the input and return res. If not, flag an error.
+bool JsonReader::expect(const char* expected) {
+    size_t expectedLength = strlen(expected);
+    assert(i != 0);
+    i--;
+    if (compareStringForLength(expected, expectedLength)) {
+        i += expectedLength;
+        return true;
+    } else {
+        fail(string("parse error: expected ") + expected);
+        return false;
+    }
+}
+
+ImmutableString JsonReader::parse_key()
+{
+    uint32_t keyCount = 0;
+    const char* keyStart = &str[i];
+    parse_string_location(keyCount);
+    
+    // need to null terminate to convert to ImmutableString
+    assert(keyCount < 256);
+    char tmp[256] = {};
+    memcpy(tmp, keyStart, keyCount);
+    tmp[keyCount] = 0;
+    
+    const char* key = _data->getImmutableKey(tmp);
+    return key;
+}
+
+// Parse a JSON object.  Recursive so limit stack/heap vars.
+void JsonReader::parse_json(int depth, Json& parent, ImmutableString key) {
+    if (depth > maxDepth) {
+        fail("exceeded maximum nesting depth");
+        return;
+    }
+
+    // exit out of the recursion (added this)
+    if (failed) {
+        return;
+    }
+    
+    char ch = get_next_token();
+    if (failed) {
+        return;
+    }
+    
+    if (ch == '-' || (ch >= '0' && ch <= '9')) {
+        i--;
+        Json* json = _data->allocateJson();
+        parent.addNumber(json, parse_number(), key);
+        return;
+    }
+
+    if (ch == 't') {
+        if (expect("true")) {
+            Json* json = _data->allocateJson();
+            parent.addBoolean(json, true, key);
+        }
+        return;
+    }
+    
+    if (ch == 'f') {
+        if (expect("false")) {
+            Json* json = _data->allocateJson();
+            parent.addBoolean(json, false, key);
+        }
+        return;
+    }
+    
+    if (ch == 'n') {
+        if (expect("null")) {
+            Json* json = _data->allocateJson();
+            parent.addNull(json, key);
+        }
+        return;
+    }
+    
+    if (ch == '"') {
+        uint32_t strStart = i;
+        uint32_t strCount = 0;
+        parse_string_location(strCount);
+        Json* json = _data->allocateJson();
+        parent.addString(json, &str[strStart], strCount, Json::FlagsAliasedEncoded, key);
+        return;
+    }
+
+    // TODO: dedupe keys?
+    if (ch == '{') {
+        ch = get_next_token();
+            
+        // empty object
+        Json* json = _data->allocateJson();
+        parent.addObject(json, key);
+            
+        if (ch == '}')
+            return;
+
+        while (true) {
+            // parse the "key" : value
+            if (ch != '"') {
+                fail("expected '\"' in object, got " + esc(ch));
+                return;
+            }
+            
+            const char* objectKey = parse_key();
+            if (failed)
+                return;
+
+            ch = get_next_token();
+            if (ch != ':') {
+                fail("expected ':' in object, got " + esc(ch));
+                return;
+            }
+            
+            // parse the value, and add it if valid
+            parse_json(depth + 1, *json, objectKey);
+            if (failed)
+                return;
+
+            ch = get_next_token();
+            if (ch == '}')
+                break;
+            if (ch != ',') {
+                fail("expected ',' in object, got " + esc(ch));
+                return;
+            }
+            ch = get_next_token();
+        }
+        return;
+    }
+
+    // arrays have numeric keys, and are not sparse
+    if (ch == '[') {
+         ch = get_next_token();
+            
+        // empty array
+        Json* json = _data->allocateJson();
+        parent.addArray(json, key);
+            
+        if (ch == ']')
+            return;
+
+        while (true) {
+            i--;
+            parse_json(depth + 1, *json);
+            if (failed)
+                return;
+
+            ch = get_next_token();
+            if (ch == ']')
+                break;
+            if (ch != ',') {
+                fail("expected ',' in list, got " + esc(ch));
+                return;
+            }
+            
+            ch = get_next_token();
+            (void)ch;
+        }
+        
+        // this is what was adding array up to parent
+        return;
+    }
+
+    fail("expected value, got " + esc(ch));
+    return;
+}
+
+//----------------
+
+
+void Json::createRoot()
+{
+    _type = TypeArray; // TODO: should this have unique type?, basically just retrieve aval[0]
+}
+
+void Json::addJson(Json* json)
+{
+    assert(is_array() || is_object());
+    
+    // link to end of list, requires a search, how to speed this up
+    Json** next = &_value.aval;
+    for (uint32_t i = 0; i < _count; ++i) {
+        next = &((*next)->_next);
+    }
+    
+    *next = json;
+    _count++;
+}
+
+void Json::addString(Json* json, const char* str, uint32_t len, Flags flags, ImmutableString key)
+{
+    new (json) Json(str, len, false);
+    json->_flags = flags;
+    if (key) json->setKey(key);
+    addJson(json);
+}
+
+void Json::addNull(Json* json, ImmutableString key) {
+    new (json) Json();
+    if (key) json->setKey(key);
+    addJson(json);
+}
+
+void Json::addNumber(Json* json, double number, ImmutableString key) {
+    new (json) Json(number);
+    if (key) json->setKey(key);
+    addJson(json);
+}
+
+void Json::addBoolean(Json* json, bool b, ImmutableString key) {
+    new (json) Json(b);
+    if (key) json->setKey(key);
+    addJson(json);
+}
+
+void Json::addArray(Json* json, ImmutableString key) {
+    new (json) Json();
+    json->_type = TypeArray;
+    if (key) json->setKey(key);
+    addJson(json);
+}
+
+void Json::addObject(Json* json, ImmutableString key) {
+    new (json) Json();
+    json->_type = TypeObject;
+    if (key) json->setKey(key);
+    addJson(json);
+}
+
+//------------------------
+
+Json::Json(const Json::array &values, Json::Type t)
+    : _type(t), _count(values.size()), _value(values) {
+    assert(t == TypeObject || t == TypeArray);
+}
+
+// Want to use msize to avoid storing length
+Json::~Json() {
+    switch(_type) {
+        case TypeString:
+            if (_flags == FlagsAllocatedUnencoded) {
+                delete [] _value.sval;
+                //_data.trackMemory(-_count);
+            }
+            _value.sval = nullptr;
+            _count = 0;
+            break;
+        case TypeArray:
+        case TypeObject:
+            // TODO: this has to free tree if there are allocated strings
+            // but that would only be during writes?
+            _value.aval = nullptr;
+            _count = 0;
+            break;
+        default: break;
+    }
+}
+
+void Json::trackMemory(int32_t size)
+{
+    //_data->trackMemory(size);
+}
+
+void JsonReader::resetAndFree() {
+    _data->resetAndFree();
+}
+
+Json* JsonReader::read(const char* str_, uint32_t strCount_) {
+    // construct parser off input
+    // Something needs to keep data alive for the Json nodes
+    //JsonReader parser { str, strCount, 0 };
+    
+    str = str_;
+    strSize = strCount_;
+    failed = false;
+    err.clear();
+    
+    // reuse the memory,
+    // this should be state that caller holds onto.
+    _data->reset();
+    
+    // fake root
+    Json* root = _data->allocateJson();
+    new (root) Json();
+    root->createRoot();
+    
+    // parse it
+    parse_json(0, *root);
+
+    // Check for any trailing garbage
+    consume_garbage();
+    if (failed) {
+        return nullptr;
+    }
+    if (i != strSize) {
+        fail("unexpected trailing " + esc(str[i]));
+        return nullptr;
+    }
+    return root;
+}
+
+const char* Json::string_value(string& str) const {
+    str.clear();
+    if (!is_string())
+        return "";
+    
+    if (_flags == FlagsAliasedEncoded) {
+        // This string length is the encoded length, so decoded should be shorter
+        if (!decode_string(_value.sval, _count, str)) {
+            return "";
+        }
+        return str.c_str();
+    }
+    
+    // already not-encoded, so can return.  When written this goes through encode.
+    return _value.sval;
+}
+
+
+// revisit this later
+// Documented in json11.h
+//Json::array Json::parse_multi(const char* in,
+//                               size_t &parser_stop_pos,
+//                               string &err) {
+//    JsonReader parser { in, strlen(in), 0, err, false };
+//    parser_stop_pos = 0;
+//    Json::array json_vec;
+//    while (parser.i != parser.strSize && !parser.failed) {
+//        json_vec.push_back(parser.parse_json(0));
+//        if (parser.failed)
+//            break;
+//
+//        // Check for another object
+//        parser.consume_garbage();
+//        if (parser.failed)
+//            break;
+//        parser_stop_pos = parser.i;
+//    }
+//    return json_vec;
+//}
+
+/////////////////////////////////
+// Shape-checking
+
+/*
+bool Json::has_shape(const shape & types, string & err) const {
+    if (!is_object()) {
+        err = "expected JSON object, got " + dump();
+        return false;
+    }
+
+    const auto& obj_items = object_items();
+    for (auto & item : types) {
+        const auto it = obj_items.find(item.first);
+        if (it == obj_items.cend() || it->second.type() != item.second) {
+            err = "bad type for " + item.first + " in " + dump();
+            return false;
+        }
+    }
+
+    return true;
+}
+*/
+
+} // namespace json11
diff --git a/libkram/json11/json11.h b/libkram/json11/json11.h
new file mode 100644
index 00000000..355fdbe1
--- /dev/null
+++ b/libkram/json11/json11.h
@@ -0,0 +1,372 @@
+/* json11
+ *
+ * json11 is a tiny JSON library for C++11, providing JSON parsing and serialization.
+ *
+ * The core object provided by the library is json11::Json. A Json object represents any JSON
+ * value: null, bool, number (int or double), string (string), array (vector), or
+ * object (map).
+ *
+ * Json objects act like values: they can be assigned, copied, moved, compared for equality or
+ * order, etc. There are also helper methods Json::dump, to serialize a Json to a string, and
+ * Json::parse (static) to parse a string as a Json object.
+ *
+ * Internally, the various types of Json object are represented by the JsonValue class
+ * hierarchy.
+ *
+ * A note on numbers - JSON specifies the syntax of number formatting but not its semantics,
+ * so some JSON implementations distinguish between integers and floating-point numbers, while
+ * some don't. In json11, we choose the latter. Because some JSON implementations (namely
+ * Javascript itself) treat all numbers as the same type, distinguishing the two leads
+ * to JSON that will be *silently* changed by a round-trip through those implementations.
+ * Dangerous! To avoid that risk, json11 stores all numbers as double internally, but also
+ * provides integer helpers.
+ *
+ * Fortunately, double-precision IEEE754 ('double') can precisely store any integer in the
+ * range +/-2^53, which includes every 'int' on most systems. (Timestamps often use int64
+ * or long long to avoid the Y2038K problem; a double storing microseconds since some epoch
+ * will be exact for +/- 275 years.)
+ */
+
+/* Copyright (c) 2013 Dropbox, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#pragma once
+
+#include "KramConfig.h"
+
+#include "ImmutableString.h"
+
+namespace json11 {
+
+using namespace NAMESPACE_STL;
+using namespace kram;
+
+class Json;
+class JsonReaderData;
+
+//--------------------------
+
+// Write json nodes out to a string.  String data is encoded.
+class JsonWriter final {
+public:
+    // Serialize.
+    // caller must clear string.
+    // TODO: accumulating to a string for large amounts of json is bad,
+    // consider a real IO path use FileHelper or something.
+    void write(const Json& root, string& out);
+   
+private:
+    void write(const Json& root);
+    
+    void writeObject(const Json &values);
+    void writeArray(const Json &values);
+        
+    void writeString(const Json &value);
+    void writeNumber(const Json &value);
+    void writeBool(const Json &value);
+    void writeNull();
+        
+    // This could write to a FILE* instead of the string
+    void writeText(const char* str);
+    
+private:
+    string* _out = nullptr;
+};
+
+//--------------------------
+
+// DOM-based parser with nice memory characteristics and small API.
+class JsonReader final {
+public:
+    JsonReader();
+    ~JsonReader();
+    
+    // Parse. If parse fails, return Json() and assign an error message to err.
+    // Strings are aliased out of the incoming buffer. Keys are aliased
+    // from an immutable pool.  And json nodes are allocated from a block
+    // linear allocator.  So the returned Json only lives while reader does.
+    Json* read(const char* str, uint32_t strCount);
+    const string& error() const { return err; }
+    
+    void resetAndFree();
+    size_t memoryUse() const;
+    
+    ImmutableString getImmutableKey(const char* key);
+    
+    // TODO: add call to decode and allocate all strings in tree.  Would allow mmap to be released.
+    // should this have a string allocator, or use the existing block allocator?
+    
+private:
+    void fail(const string& msg);
+    
+    void consume_whitespace();
+    bool consume_comment();
+    void consume_garbage();
+    char get_next_token();
+    bool compareStringForLength(const char* expected, size_t length);
+    bool expect(const char* expected);
+    
+    void parse_string_location(uint32_t& count);
+    double parse_number();
+    ImmutableString parse_key();
+    void parse_json(int depth, Json& parent, ImmutableString key = nullptr);
+       
+private:
+    // State
+    const char* str = nullptr;
+    size_t strSize  = 0;
+    size_t i = 0; // iterates through str, poor choice for member variable
+    
+    // error state
+    string err;
+    bool failed = false;
+    uint32_t lineCount = 1; // lines are 1 based
+    
+    // parser is recursive instead of iterative, so has max depth to prevent runaway parsing.
+    uint32_t maxDepth = 200;
+    
+    // allocator and immutable string pool are here
+    JsonReaderData* _data = nullptr;
+};
+
+//--------------------------
+
+// Json value type.  This is a tree of nodes with iterators and search.
+class Json final {
+public:
+    // iterator to simplify walking lists/objects
+    class const_iterator final
+    {
+    public:
+        const_iterator(const Json* node) : _curr(node) { }
+  
+        const_iterator& operator=(const Json* node) {
+            _curr = node; 
+            return *this;
+        }
+  
+        // Prefix ++ overload
+        const_iterator& operator++() {
+            if (_curr)
+                _curr = _curr->_next;
+            return *this;
+        }
+  
+        // Postfix ++ overload
+        const_iterator operator++(int)
+        {
+            const_iterator iterator = *this;
+            ++(*this);
+            return iterator;
+        }
+  
+        bool operator==(const const_iterator& iterator) const { return _curr == iterator._curr; }
+        bool operator!=(const const_iterator& iterator) const { return _curr != iterator._curr; }
+        const Json& operator*() const { return *_curr; }
+  
+    private:
+        const Json* _curr;
+    };
+    
+    // Type
+    enum Type : uint8_t {
+        TypeNull,
+        TypeNumber,
+        TypeBoolean,
+        TypeString,
+        TypeArray,
+        TypeObject
+    };
+    
+    // Flags for additional data on a type
+    enum Flags : uint8_t {
+        FlagsNone = 0,
+        FlagsAliasedEncoded, // needs decode on read
+        FlagsAllocatedUnencoded, // needs encode on write
+    };
+    
+    // Array/object can pass in for writer, but converted to linked nodes
+    using array = vector<Json>;
+    
+    // Constructors for the various types of JSON value.
+    Json() noexcept                  {}
+    Json(nullptr_t) noexcept         {}
+    Json(double value)               : _type(TypeNumber), _value(value) {}
+    Json(int value)                  : Json((double)value) {}
+    Json(bool value)                 : _type(TypeBoolean), _value(value) {}
+    
+    Json(const string& value)        : Json(value.c_str(), value.size())  {}
+    Json(const char* value, uint32_t count_, bool allocated = true)
+        : _type(TypeString), _flags(allocated ? FlagsAllocatedUnencoded : FlagsAliasedEncoded), _count(count_),
+          _value(value, count_, allocated)
+    { 
+        // if (allocated) trackMemory(_count);
+    }
+
+    // This prevents Json(some_pointer) from accidentally producing a bool. Use
+    // Json(bool(some_pointer)) if that behavior is desired.
+    Json(void *) = delete;
+    
+    // has to recursively copy the entire tree of nodes, TODO:
+    Json(const array& values, Type type = TypeArray);
+    
+    ~Json();
+    
+    /* Don't know if these can work
+    // Implicit constructor: anything with a to_json() function.
+    template <class T, class = decltype(&T::to_json)>
+    Json(const T & t) : Json(t.to_json()) {}
+
+    // Implicit constructor: map-like objects (map, unordered_map, etc)
+    // TODO: revisit, but flatten objects to arrays
+//    template <class M, typename enable_if<
+//        is_constructible<string, decltype(declval<M>().begin()->first)>::value
+//        && is_constructible<Json, decltype(declval<M>().begin()->second)>::value,
+//            int>::type = 0>
+//    Json(const M & m) : Json(object(m.begin(), m.end())) {}
+
+    // Implicit constructor: vector-like objects (list, vector, set, etc)
+    template <class V, typename enable_if<
+        is_constructible<Json, decltype(*declval<V>().begin())>::value,
+            int>::type = 0>
+    Json(const V & v) : Json(array(v.begin(), v.end())) {}
+    */
+    
+    // Accessors
+    Type type() const { return _type; }
+    
+    // Only for object type, caller can create from JsonReader
+    ImmutableString key() const;
+    void setKey(ImmutableString key);
+    
+    // array/objects have count and iterate call
+    size_t count() const { return _count; };
+    // Return a reference to arr[i] if this is an array, Json() otherwise.
+    const Json & operator[](uint32_t i) const;
+    // Return a reference to obj[key] if this is an object, Json() otherwise.
+    const Json & operator[](const char* key) const;
+    
+    // implement standard iterator paradigm for linked list
+    const_iterator begin() const { assert(is_array() || is_object()); return const_iterator(_value.aval); }
+    const_iterator end() const { return const_iterator(nullptr); }
+    
+    bool iterate(const Json*& it) const;
+    
+    bool is_null()   const  { return _type == TypeNull; }
+    bool is_number() const  { return _type == TypeNumber; }
+    bool is_boolean() const { return _type == TypeBoolean; }
+    bool is_string() const  { return _type == TypeString; }
+    bool is_array()  const  { return _type == TypeArray; }
+    bool is_object() const  { return _type == TypeObject; }
+
+    // Return the enclosed value if this is a number, 0 otherwise. Note that json11 does not
+    // distinguish between integer and non-integer numbers - number_value() and int_value()
+    // can both be applied to a NUMBER-typed object.
+    double number_value() const { return is_number() ? _value.dval : 0.0; }
+    float double_value() const { return number_value(); }
+    float float_value() const { return (float)number_value(); }
+    int int_value() const { return (int)number_value(); }
+    
+    // Return the enclosed value if this is a boolean, false otherwise.
+    bool boolean_value() const { return is_boolean() ? _value.bval : false; }
+    // Return the enclosed string if this is a string, empty string otherwise
+    const char* string_value(string& str) const;
+
+    // TODO: do we really need these comparisons?, typically just doing a key search
+    // only have to implement 2 operators
+    //bool operator== (const Json &rhs) const;
+    //bool operator<  (const Json &rhs) const;
+    //bool operator!= (const Json &rhs) const { return !(*this == rhs); }
+//    bool operator<= (const Json &rhs) const { return !(rhs < *this); }
+//    bool operator>  (const Json &rhs) const { return  (rhs < *this); }
+//    bool operator>= (const Json &rhs) const { return !(*this < rhs); }
+
+    // Return true if this is a JSON object and, for each item in types, has a field of
+    // the given type. If not, return false and set err to a descriptive message.
+    // typedef std::initializer_list<pair<string, Type>> shape;
+    // bool has_shape(const shape & types, string & err) const;
+
+private:
+    friend class JsonReader;
+    
+    // Doesn't seem to work with namespaced class
+    void createRoot();
+
+    // TODO: make need to expose to build up a json hierarchy for dumping
+    void addJson(Json* json);
+    void addString(Json* json, const char* str, uint32_t len, Flags flags, ImmutableString key = nullptr);
+    void addNull(Json* json, ImmutableString key = nullptr);
+    void addBoolean(Json* json, bool b, ImmutableString key = nullptr);
+    void addNumber(Json* json, double number, ImmutableString key = nullptr);
+    void addArray(Json* json, ImmutableString key = nullptr);
+    void addObject(Json* json,ImmutableString key = nullptr);
+
+private:
+    void trackMemory(int32_t size);
+
+    // This type is 32B / node w/key ptr, w/2B key it's 24B / node.
+    
+    // 8B - objects store key in children
+    // debugging difficult without key as string
+    const char* _key = nullptr;
+    
+    // 2B, but needs lookup table then
+    //uint16_t _key = 0;
+    uint16_t _padding = 0;
+    
+    // 1B - really 3 bits
+    Type _type = TypeNull;
+    
+    // 1B - really 1-2 bits
+    Flags _flags = FlagsNone;
+    
+    // 4B - count used by array/object, also by string
+    uint32_t _count = 0;
+    
+    // 8B - value to hold double and ptrs
+    union JsonValue {
+        JsonValue() : aval(nullptr) { }
+        JsonValue(double v) : dval(v) {}
+        JsonValue(bool v) : bval(v) {}
+        JsonValue(const char* v, uint32_t count, bool allocate);
+        JsonValue(const Json::array& value, Type t = TypeArray);
+        
+        // allocated strings deleted by Json dtor which knows type
+        // the rest are all just block allocated
+        
+        double     dval;
+        bool       bval;
+        
+        // 2 string forms - aliased to mmap (terminated with ", not-escaped)
+        // not-escaped and allocated which is null terminated
+        const char* sval;
+        
+        //uint32_t aval;
+        Json* aval; // aliased children, chained with _next to form tree
+    } _value;
+    
+    // 8B - arrays/object chain values, so this is non-null on more than just array/object type
+    // aval is the root of the children.
+    //uint32_t _next = 0;
+    Json* _next = nullptr;
+};
+
+} // namespace json11
diff --git a/libkram/kram/BlockedLinearAllocator.cpp b/libkram/kram/BlockedLinearAllocator.cpp
new file mode 100644
index 00000000..a58df33d
--- /dev/null
+++ b/libkram/kram/BlockedLinearAllocator.cpp
@@ -0,0 +1,66 @@
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
+
+#include "BlockedLinearAllocator.h"
+
+namespace kram {
+
+using namespace NAMESPACE_STL;
+
+BlockedLinearAllocator::BlockedLinearAllocator(uint32_t itemsPerBlock, uint32_t itemSize)
+: _itemsPerBlock(itemsPerBlock), _itemSize(itemSize), _blockSize(itemsPerBlock*itemSize)
+{
+    
+}
+
+BlockedLinearAllocator::~BlockedLinearAllocator() {
+    resetAndFree();
+}
+
+void BlockedLinearAllocator::reset() {
+    // don't free the block memory, reuse for next parse
+    _blockCurrent = 0;
+    _counter = 0;
+    _blockCounter = 0;
+}
+
+void BlockedLinearAllocator::resetAndFree() {
+    for (auto& it: _blocks) {
+        delete [] it;
+    }
+    _blocks.clear();
+    reset();
+}
+
+bool BlockedLinearAllocator::checkAllocate() {
+    // allocate more blocks
+    if (_counter >= _blocks.size() * _itemsPerBlock) {
+        uint8_t* newBlock = new uint8_t[_blockSize];
+        if (!newBlock)
+            return false;
+        
+        _blocks.push_back(newBlock);
+    }
+    
+    // advance to next block
+    if (_counter && ((_counter % _itemsPerBlock) == 0)) {
+        _blockCurrent++;
+        _blockCounter = 0;
+    }
+    return true;
+}
+
+void* BlockedLinearAllocator::allocate() {
+    // make sure space exists
+    if (!checkAllocate())
+        return nullptr;
+    
+    // return a new item off the block
+    auto& block = _blocks[_blockCurrent];
+    uint32_t start = _blockCounter++;
+    _counter++;
+    return block + start * _itemSize;
+}
+
+}
diff --git a/libkram/kram/BlockedLinearAllocator.h b/libkram/kram/BlockedLinearAllocator.h
new file mode 100644
index 00000000..a0bbc6ba
--- /dev/null
+++ b/libkram/kram/BlockedLinearAllocator.h
@@ -0,0 +1,66 @@
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
+
+#pragma once
+
+#include "KramConfig.h"
+
+namespace kram {
+
+using namespace NAMESPACE_STL;
+
+// Can use to allocate tree nodes where length is unknown
+// until the tree is fully parsed.
+class BlockedLinearAllocator
+{
+public:
+    BlockedLinearAllocator(uint32_t itemsPerBlock, uint32_t itemSize);
+    ~BlockedLinearAllocator();
+    
+    void* allocate();
+    // for POD, caller must zero out?
+    template<typename T>
+    T* allocateItem() { return (T*)allocate(); }
+    
+    // placement new/delete could also be done, and variable
+    // itemSize, but then have to address alignment
+    
+    // Convert to/from an index.  Call before allocate.
+    uint32_t nextItemIndex() const { return _counter; }
+    
+    // This retrieves data from an index
+    void* itemIndexToData(uint32_t itemIndex) const {
+        uint32_t blockNum = itemIndex / _itemsPerBlock;
+        uint32_t blockIndex = itemIndex % _itemsPerBlock;
+        return _blocks[blockNum] + blockIndex * _itemSize;
+    }
+    
+    // can reuse same allocated blocks to avoid fragmentation
+    void reset();
+    
+    // free the allocated blocks
+    void resetAndFree();
+    
+    size_t memoryUse() const {
+        return _blocks.size() * _blockSize;
+    }
+    
+private:
+    bool checkAllocate();
+    
+    using Block = uint8_t*;
+    vector<Block> _blocks;
+    
+    // currently only one item size storeed in Block
+    uint32_t _itemSize = 0;
+    uint32_t _itemsPerBlock = 0;
+    uint32_t _blockSize = 0;
+    
+    // where in block, and total item count
+    uint32_t _blockCurrent = 0;
+    uint32_t _blockCounter = 0; // item index into current block
+    uint32_t _counter = 0;
+};
+
+}
diff --git a/libkram/kram/ImmutableString.cpp b/libkram/kram/ImmutableString.cpp
new file mode 100644
index 00000000..2a8f7ea6
--- /dev/null
+++ b/libkram/kram/ImmutableString.cpp
@@ -0,0 +1,97 @@
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
+
+#include "ImmutableString.h"
+
+namespace kram {
+
+using namespace NAMESPACE_STL;
+
+ImmutableStringPool::ImmutableStringPool(size_t capacity_) {
+    capacity = capacity_;
+    
+    // empty string is always 0.  Only one buffer for now.  Does not grow.
+    ImmutableStringInfo info = { 0, (uint16_t)counter++ };
+    mem = new char[capacity];
+    
+    memcpy(mem, &info, sizeof(ImmutableStringInfo));
+    mem[sizeof(ImmutableStringInfo)] = 0;
+    
+    emptyString = mem + sizeof(ImmutableStringInfo);
+    
+    keyTable.reserve(1024);
+    
+    // keep aligned to 2B for ImmutableStringInfo
+    uint32_t sz = 2;
+    size += sz + sizeof(ImmutableStringInfo);
+}
+
+ImmutableStringPool::~ImmutableStringPool() {
+    delete [] mem;
+    mem = nullptr;
+}
+
+ImmutableString ImmutableStringPool::getImmutableString(const char* s) {
+    if (!s || !*s)
+        return emptyString;
+    
+    // caller passing in an already immutable string in the block
+    if (isImmutableString(s))
+        return s;
+    
+    // mutex lock from here on down if hitting from multiple threads
+    // this is iterating on map
+    mylock lock(mapLock);
+    
+    // find a block with the string
+    auto it = map.find(s);
+    if (it != map.end())
+        return it->first;
+    
+    // Keep unique key count under 64K
+    const uint32_t kMaxCounter = 64*1024;
+    if (counter >= kMaxCounter) {
+        KLOGE("ImmutableString", "Pool cannot fit string");
+        return emptyString;
+    }
+    // not found, so need to add to an empty block
+    size_t sz = strlen(s) + 1;
+    
+    
+    // see if it will fit the block
+    if ((size + sz + sizeof(ImmutableStringInfo))  > capacity) {
+        KLOGE("ImmutableString", "Pool cannot fit string length %zu", sz);
+        return emptyString;
+    }
+    
+    // uint32_t hash = (uint32_t)map.hash_function()( s ); // or just use fnv1a call ?  unordered_map does cache this?
+    ImmutableStringInfo info = { (uint16_t)(counter++), (uint16_t)(sz - 1) }; // hashStr };
+    
+    // 4B header
+    sz += sizeof(ImmutableStringInfo);
+    
+    // This finds a string from a 2B lookup uisng the info.counter
+    keyTable.push_back(size + sizeof(ImmutableStringPool));
+    
+    // append it
+    char* immStr = mem + size;
+    
+    memcpy(immStr, &info, sizeof(ImmutableStringInfo));
+    immStr += sizeof(ImmutableStringInfo);
+    memcpy(immStr, s, sz);
+    
+    // add it into the map
+    map[immStr] = size;
+    size += sz;
+    
+    // keep aligned to 2 bytes
+    size_t align = alignof(ImmutableStringInfo);
+    assert(align == 2);
+    if (size & 1)
+        ++size;
+    
+    return immStr;
+}
+
+}
diff --git a/libkram/kram/ImmutableString.h b/libkram/kram/ImmutableString.h
new file mode 100644
index 00000000..7fae5112
--- /dev/null
+++ b/libkram/kram/ImmutableString.h
@@ -0,0 +1,119 @@
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
+
+#pragma once
+
+#include "KramConfig.h"
+
+#include <mutex>
+
+//-----------------------------
+// Add in KramHashHelper.h
+
+namespace kram {
+
+using namespace NAMESPACE_STL;
+
+// case-sensitive fnv1a hash, can pass existing hash to continue a hash
+inline uint32_t HashFnv1a(const char* val, uint32_t hash = 0x811c9dc5) {
+    const uint32_t prime  = 0x01000193; // 16777619 (32-bit)
+    while (*val) {
+        hash = (hash * prime) ^ (uint32_t)*val++;
+    }
+    return hash;
+}
+
+// this compares string stored as const char*
+struct CompareStrings
+{
+    // Would count and/or hash help?
+    // otherwise, this has to walk memory, hash already found bucket
+    template <class T>
+    bool operator()(const T& x, const T& y) const
+    { return strcmp(x, y) == 0; }
+    
+    template <class T>
+    size_t operator()(const T& x) const {
+        // 32-bit hash to uint64 conversion on 64-bit system
+        return (size_t)HashFnv1a(x);
+    }
+};
+
+//--------------
+
+// Pool stores info prior to each key.
+struct ImmutableStringInfo {
+    uint16_t counter;
+    uint16_t length;
+    //uint32_t hash;
+};
+using ImmutableString = const char*;
+
+// Store and retrieve immutable strings.  The address of these never changes.
+class ImmutableStringPool
+{
+public:
+    ImmutableStringPool(size_t capacity_ = 32*1024);
+    ~ImmutableStringPool();
+    
+    ImmutableString getImmutableString(const char* s);
+    string_view getImmutableStringView(const char* s) {
+        ImmutableString str = getImmutableString(s);
+        return string_view(getImmutableString(s), getLength(str));
+    }
+    
+    // Compress 8B to 2B using counter
+    uint16_t getCounter(ImmutableString str) const {
+        const ImmutableStringInfo* info = ((const ImmutableStringInfo*)(str-sizeof(ImmutableStringInfo)));
+        return info->counter;
+    }
+    // cached strlen of string
+    uint16_t getLength(ImmutableString str) const {
+        const ImmutableStringInfo* info = ((const ImmutableStringInfo*)(str-sizeof(ImmutableStringInfo)));
+        return info->length;
+    }
+    
+    // Can lookup string from counter
+    ImmutableString getImmutableString(uint16_t counter_) const {
+        mylock lock(mapLock);
+        return mem + keyTable[counter_];
+    }
+    string_view getImmutableStringView(uint16_t counter_) const {
+        mylock lock(mapLock);
+        ImmutableString str = mem + keyTable[counter_];
+        return string_view(str, getLength(str));
+    }
+    
+    // Can call outside of mutex if mem never grows.
+    bool isImmutableString(const char* s) const {
+        return s >= mem && s < mem + capacity;
+    }
+    
+private:
+    using mymutex = std::mutex;
+    using mylock = std::unique_lock<mymutex>; // or lock_guard?
+    
+    mutable mymutex mapLock;
+    
+    // Remap strings to immutable strings.
+    // Could be unordered_set.
+    using ImmutableMap = unordered_map<ImmutableString, uint32_t, CompareStrings, CompareStrings>;
+    ImmutableMap map;
+    
+    // Can convert keys to 2B using lookup table.  Can grow.
+    vector<uint32_t> keyTable;
+    
+    // Only has one block of memory right now.
+    // This block cannot grow or addresses are all invalidated.
+    char* mem = nullptr;
+    uint32_t size = 0;
+    uint32_t capacity = 0;
+    
+    // A count of how many strings are stored
+    uint32_t counter = 0;
+    
+    ImmutableString emptyString = nullptr;
+};
+
+}
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index 6b634221..93443aa8 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -167,7 +167,7 @@
 #endif
 
 // This needs debug support that native stl already has.
-// EASTL only seems to define that for Visual Studio, and not lldb
+// EASTL only seems to define that for Visual Studio natvis, and not lldb
 #define USE_EASTL COMPILE_EASTL
 #define USE_FASTL COMPILE_FASTL
 
@@ -198,6 +198,7 @@
 #include <atomic>
 
 
+/* This library just doesn't work, but was an interesting idea
 #elif USE_FASTL
 
 #define NAMESPACE_STL fastl
@@ -239,6 +240,7 @@
 //#include <mutex>
 //#include <condition_variable>
 //#include <thread>
+*/
 
 #else
 
diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp
index f6741cb1..24dfc792 100644
--- a/libkram/kram/KramLog.cpp
+++ b/libkram/kram/KramLog.cpp
@@ -644,7 +644,8 @@ static const char* getFormatTokens(char tokens[kMaxTokens], const LogMessage& ms
         strlcpy(tokens, "[l] g m\n", kMaxTokens);
     }
     
-    if (gAddressHelper.isStackTraceSupported() && msg.logLevel >= LogLevelError) {
+    bool printStacksForErrors = false;
+    if (printStacksForErrors && gAddressHelper.isStackTraceSupported() && msg.logLevel >= LogLevelError) {
         
         // can just report the caller, and not a full stack
         // already have function, so returnAddress printing is the same.
diff --git a/libkram/kram/KramPrefix.h b/libkram/kram/KramPrefix.h
index 02374c5e..3df9e61d 100644
--- a/libkram/kram/KramPrefix.h
+++ b/libkram/kram/KramPrefix.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2022 by Alec Miller. - MIT License
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/tests/src/AtlasTest-atlas.json b/tests/src/AtlasTest-atlas.json
index a8e2803e..40c81a7e 100644
--- a/tests/src/AtlasTest-atlas.json
+++ b/tests/src/AtlasTest-atlas.json
@@ -1,5 +1,5 @@
 {
-"name": "AtlastTest",
+"name": "AtlasTest",
 "width": 128,
 "height": 128,
 "slice": 0,
@@ -9,7 +9,7 @@
 "regions":
 [
     {"name":"red", "rpx":[0,0,32,32]},
-    {"name":"green", "rpx": [64,32,32,32]},
-    {"name":"blue", "rpx":[0,64,32,32]},
+    {"name":"green", "rpx":[64,32,32,32]},
+    {"name":"blue", "rpx":[0,64,32,32]}
 ]
 }

From ba2b5cb6365d05e9e59caf65b7db817b0dea5a70 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 14 Oct 2023 09:20:57 -0700
Subject: [PATCH 549/901] kram - back to zero warnings, compiling with xcode15,
 added dlmalloc (cpp), added back simdjson to compare perf

json11 doesn't speed up much in release build, so the parser must be far too slow (15ms vs. 11ms for 100k json).
added ability to "Open" folders from the open command.  This was rather painful.  Before could only drag-drop.
---
 build2/kram.xcodeproj/project.pbxproj         |   38 +-
 build2/kramc.xcodeproj/project.pbxproj        |   11 +-
 build2/kramv.xcodeproj/project.pbxproj        |   29 +-
 gtlf/GLTF/GLTF.xcodeproj/project.pbxproj      |   41 +-
 .../GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj |   37 +-
 kramv/KramViewerMain.mm                       |   58 +-
 libkram/allocate/dlmalloc.cpp                 | 6218 +++++++++++++++++
 libkram/bc7enc/rgbcx.cpp                      |    2 +
 libkram/json11/json11.cpp                     |  118 +-
 libkram/json11/json11.h                       |    9 +-
 libkram/kram/ImmutableString.cpp              |    2 +
 11 files changed, 6462 insertions(+), 101 deletions(-)
 create mode 100644 libkram/allocate/dlmalloc.cpp

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index 48acab47..bc638ea4 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -3,7 +3,7 @@
 	archiveVersion = 1;
 	classes = {
 	};
-	objectVersion = 50;
+	objectVersion = 54;
 	objects = {
 
 /* Begin PBXBuildFile section */
@@ -410,6 +410,10 @@
 		70D222E52AD22BED00B9EA23 /* BlockedLinearAllocator.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70D222E22AD22BED00B9EA23 /* BlockedLinearAllocator.cpp */; };
 		70D222E62AD22BED00B9EA23 /* BlockedLinearAllocator.h in Headers */ = {isa = PBXBuildFile; fileRef = 70D222E32AD22BED00B9EA23 /* BlockedLinearAllocator.h */; };
 		70D222E72AD22BED00B9EA23 /* BlockedLinearAllocator.h in Headers */ = {isa = PBXBuildFile; fileRef = 70D222E32AD22BED00B9EA23 /* BlockedLinearAllocator.h */; };
+		70D222EB2ADAF25E00B9EA23 /* simdjson.h in Headers */ = {isa = PBXBuildFile; fileRef = 70D222E92ADAF25E00B9EA23 /* simdjson.h */; };
+		70D222EC2ADAF25E00B9EA23 /* simdjson.h in Headers */ = {isa = PBXBuildFile; fileRef = 70D222E92ADAF25E00B9EA23 /* simdjson.h */; };
+		70D222ED2ADAF25E00B9EA23 /* simdjson.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70D222EA2ADAF25E00B9EA23 /* simdjson.cpp */; };
+		70D222EE2ADAF25E00B9EA23 /* simdjson.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70D222EA2ADAF25E00B9EA23 /* simdjson.cpp */; };
 /* End PBXBuildFile section */
 
 /* Begin PBXFileReference section */
@@ -761,6 +765,8 @@
 		70D222DD2AD2132300B9EA23 /* ImmutableString.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ImmutableString.h; sourceTree = "<group>"; };
 		70D222E22AD22BED00B9EA23 /* BlockedLinearAllocator.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = BlockedLinearAllocator.cpp; sourceTree = "<group>"; };
 		70D222E32AD22BED00B9EA23 /* BlockedLinearAllocator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = BlockedLinearAllocator.h; sourceTree = "<group>"; };
+		70D222E92ADAF25E00B9EA23 /* simdjson.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = simdjson.h; sourceTree = "<group>"; };
+		70D222EA2ADAF25E00B9EA23 /* simdjson.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = simdjson.cpp; sourceTree = "<group>"; };
 /* End PBXFileReference section */
 
 /* Begin PBXFrameworksBuildPhase section */
@@ -831,6 +837,7 @@
 				706EEE1026D1583F001C950E /* miniz */,
 				706EEE1326D1583F001C950E /* heman */,
 				70D222D52AC800AC00B9EA23 /* json11 */,
+				70D222E82ADAF25E00B9EA23 /* simdjson */,
 				706EEE1626D1583F001C950E /* stb */,
 				706EEE1826D1583F001C950E /* kram */,
 				706EEE3926D1583F001C950E /* squish */,
@@ -1037,7 +1044,6 @@
 				706EEE2126D1583F001C950E /* KramFileHelper.cpp */,
 				706EEE3726D1583F001C950E /* KramMipper.h */,
 				706EEE1C26D1583F001C950E /* KramMipper.cpp */,
-				706EEE1D26D1583F001C950E /* _clang-format */,
 				706EEE2D26D1583F001C950E /* win_mmap.h */,
 				706EEE2226D1583F001C950E /* sse2neon.h */,
 				706EEE3426D1583F001C950E /* float4a.h */,
@@ -1048,6 +1054,7 @@
 				70D222DC2AD2132300B9EA23 /* ImmutableString.cpp */,
 				706EEE3826D1583F001C950E /* TaskSystem.h */,
 				706EEE1F26D1583F001C950E /* TaskSystem.cpp */,
+				706EEE1D26D1583F001C950E /* _clang-format */,
 			);
 			path = kram;
 			sourceTree = "<group>";
@@ -1370,6 +1377,15 @@
 			path = json11;
 			sourceTree = "<group>";
 		};
+		70D222E82ADAF25E00B9EA23 /* simdjson */ = {
+			isa = PBXGroup;
+			children = (
+				70D222E92ADAF25E00B9EA23 /* simdjson.h */,
+				70D222EA2ADAF25E00B9EA23 /* simdjson.cpp */,
+			);
+			path = simdjson;
+			sourceTree = "<group>";
+		};
 /* End PBXGroup section */
 
 /* Begin PBXHeadersBuildPhase section */
@@ -1448,6 +1464,7 @@
 				706EF00826D15985001C950E /* Kram.h in Headers */,
 				70D222E02AD2132300B9EA23 /* ImmutableString.h in Headers */,
 				704738C8289F6AEE00C77A9F /* vector.h in Headers */,
+				70D222EB2ADAF25E00B9EA23 /* simdjson.h in Headers */,
 				70871DED27DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.h in Headers */,
 				707789DB2881BA81008A51BC /* ert.h in Headers */,
 				706EF00926D15985001C950E /* KTXImage.h in Headers */,
@@ -1566,6 +1583,7 @@
 				706EF18226D166C5001C950E /* Kram.h in Headers */,
 				70D222E12AD2132300B9EA23 /* ImmutableString.h in Headers */,
 				704738C9289F6AEE00C77A9F /* vector.h in Headers */,
+				70D222EC2ADAF25E00B9EA23 /* simdjson.h in Headers */,
 				70871DEE27DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.h in Headers */,
 				707789DC2881BA81008A51BC /* ert.h in Headers */,
 				706EF18326D166C5001C950E /* KTXImage.h in Headers */,
@@ -1652,7 +1670,8 @@
 		706ECDD626D1577A001C950E /* Project object */ = {
 			isa = PBXProject;
 			attributes = {
-				LastUpgradeCheck = 1240;
+				BuildIndependentTargetsInParallel = YES;
+				LastUpgradeCheck = 1500;
 				TargetAttributes = {
 					706ECDDD26D1577A001C950E = {
 						CreatedOnToolsVersion = 12.4;
@@ -1666,7 +1685,6 @@
 			knownRegions = (
 				en,
 				Base,
-				English,
 				english,
 			);
 			mainGroup = 706ECDD526D1577A001C950E;
@@ -1695,6 +1713,7 @@
 				706EEF7F26D1595D001C950E /* EtcBlock4x4Encoding_RGB8.cpp in Sources */,
 				70871DCD27DDDBCD00D0B9E1 /* astcenc_find_best_partitioning.cpp in Sources */,
 				70CDB65227A1382700A546C1 /* KramDDSHelper.cpp in Sources */,
+				70D222ED2ADAF25E00B9EA23 /* simdjson.cpp in Sources */,
 				706EEF8026D1595D001C950E /* EtcImage.cpp in Sources */,
 				70871DEB27DDDBCD00D0B9E1 /* astcenc_block_sizes.cpp in Sources */,
 				70D222E42AD22BED00B9EA23 /* BlockedLinearAllocator.cpp in Sources */,
@@ -1787,6 +1806,7 @@
 				70871DD227DDDBCD00D0B9E1 /* astcenc_averages_and_directions.cpp in Sources */,
 				70871DE027DDDBCD00D0B9E1 /* astcenc_mathlib_softfloat.cpp in Sources */,
 				709B8D3828D7BCAD0081BD1F /* os.cpp in Sources */,
+				70D222EE2ADAF25E00B9EA23 /* simdjson.cpp in Sources */,
 				706EFC2426D1C39B001C950E /* ateencoder.mm in Sources */,
 				707789EE2881BA81008A51BC /* bc7decomp_ref.cpp in Sources */,
 				706EF19826D166C5001C950E /* EtcBlock4x4Encoding_RGB8.cpp in Sources */,
@@ -1881,6 +1901,7 @@
 			isa = XCBuildConfiguration;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES;
 				CLANG_ANALYZER_NONNULL = YES;
 				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
 				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
@@ -1891,6 +1912,7 @@
 				CLANG_WARN_ASSIGN_ENUM = YES;
 				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
 				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_COMMA = NO;
 				CLANG_WARN_CONSTANT_CONVERSION = YES;
 				CLANG_WARN_CXX0X_EXTENSIONS = YES;
 				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
@@ -1918,6 +1940,7 @@
 				DEBUG_INFORMATION_FORMAT = dwarf;
 				ENABLE_STRICT_OBJC_MSGSEND = YES;
 				ENABLE_TESTABILITY = YES;
+				ENABLE_USER_SCRIPT_SANDBOXING = YES;
 				GCC_C_LANGUAGE_STANDARD = gnu11;
 				GCC_DYNAMIC_NO_PIC = NO;
 				GCC_ENABLE_CPP_EXCEPTIONS = NO;
@@ -1931,6 +1954,7 @@
 					"DEBUG=1",
 					"$(inherited)",
 				);
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
 				"GCC_WARN_64_TO_32_BIT_CONVERSION[arch=*64]" = NO;
 				GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
 				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
@@ -1970,6 +1994,7 @@
 			isa = XCBuildConfiguration;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES;
 				CLANG_ANALYZER_NONNULL = YES;
 				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
 				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
@@ -1980,6 +2005,7 @@
 				CLANG_WARN_ASSIGN_ENUM = YES;
 				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
 				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_COMMA = NO;
 				CLANG_WARN_CONSTANT_CONVERSION = YES;
 				CLANG_WARN_CXX0X_EXTENSIONS = YES;
 				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
@@ -2007,6 +2033,7 @@
 				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
 				ENABLE_NS_ASSERTIONS = NO;
 				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				ENABLE_USER_SCRIPT_SANDBOXING = YES;
 				GCC_C_LANGUAGE_STANDARD = gnu11;
 				GCC_ENABLE_CPP_EXCEPTIONS = NO;
 				GCC_ENABLE_CPP_RTTI = NO;
@@ -2014,6 +2041,7 @@
 				GCC_NO_COMMON_BLOCKS = YES;
 				GCC_PRECOMPILE_PREFIX_HEADER = YES;
 				GCC_PREFIX_HEADER = "$(PROJECT_DIR)/../libkram/kram/KramPrefix.pch";
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
 				"GCC_WARN_64_TO_32_BIT_CONVERSION[arch=*64]" = NO;
 				GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
 				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
@@ -2080,7 +2108,6 @@
 		706EF1DF26D166C5001C950E /* Debug */ = {
 			isa = XCBuildConfiguration;
 			buildSettings = {
-				ARCHS = arm64;
 				CLANG_WARN_DOCUMENTATION_COMMENTS = NO;
 				CLANG_X86_VECTOR_INSTRUCTIONS = default;
 				CODE_SIGN_STYLE = Automatic;
@@ -2097,7 +2124,6 @@
 		706EF1E026D166C5001C950E /* Release */ = {
 			isa = XCBuildConfiguration;
 			buildSettings = {
-				ARCHS = arm64;
 				CLANG_WARN_DOCUMENTATION_COMMENTS = NO;
 				CLANG_X86_VECTOR_INSTRUCTIONS = default;
 				CODE_SIGN_STYLE = Automatic;
diff --git a/build2/kramc.xcodeproj/project.pbxproj b/build2/kramc.xcodeproj/project.pbxproj
index 38cc8324..9b88cc4b 100644
--- a/build2/kramc.xcodeproj/project.pbxproj
+++ b/build2/kramc.xcodeproj/project.pbxproj
@@ -3,7 +3,7 @@
 	archiveVersion = 1;
 	classes = {
 	};
-	objectVersion = 50;
+	objectVersion = 54;
 	objects = {
 
 /* Begin PBXBuildFile section */
@@ -109,7 +109,8 @@
 		706EF26A26D18082001C950E /* Project object */ = {
 			isa = PBXProject;
 			attributes = {
-				LastUpgradeCheck = 1240;
+				BuildIndependentTargetsInParallel = YES;
+				LastUpgradeCheck = 1500;
 				TargetAttributes = {
 					706EF27126D18082001C950E = {
 						CreatedOnToolsVersion = 12.4;
@@ -184,16 +185,13 @@
 				DEBUG_INFORMATION_FORMAT = dwarf;
 				ENABLE_STRICT_OBJC_MSGSEND = YES;
 				ENABLE_TESTABILITY = YES;
+				ENABLE_USER_SCRIPT_SANDBOXING = YES;
 				GCC_C_LANGUAGE_STANDARD = gnu11;
 				GCC_DYNAMIC_NO_PIC = NO;
 				GCC_ENABLE_CPP_EXCEPTIONS = NO;
 				GCC_ENABLE_CPP_RTTI = NO;
 				GCC_NO_COMMON_BLOCKS = YES;
 				GCC_OPTIMIZATION_LEVEL = 0;
-				GCC_PREPROCESSOR_DEFINITIONS = (
-					"DEBUG=1",
-					"$(inherited)",
-				);
 				GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
 				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
 				GCC_WARN_UNDECLARED_SELECTOR = YES;
@@ -253,6 +251,7 @@
 				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
 				ENABLE_NS_ASSERTIONS = NO;
 				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				ENABLE_USER_SCRIPT_SANDBOXING = YES;
 				GCC_C_LANGUAGE_STANDARD = gnu11;
 				GCC_ENABLE_CPP_EXCEPTIONS = NO;
 				GCC_ENABLE_CPP_RTTI = NO;
diff --git a/build2/kramv.xcodeproj/project.pbxproj b/build2/kramv.xcodeproj/project.pbxproj
index f417d584..6018a7cf 100644
--- a/build2/kramv.xcodeproj/project.pbxproj
+++ b/build2/kramv.xcodeproj/project.pbxproj
@@ -3,7 +3,7 @@
 	archiveVersion = 1;
 	classes = {
 	};
-	objectVersion = 50;
+	objectVersion = 54;
 	objects = {
 
 /* Begin PBXBuildFile section */
@@ -37,7 +37,7 @@
 		70E33EC826E536BF00CBA422 /* QuickLookThumbnailing.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 70E33EC726E536BF00CBA422 /* QuickLookThumbnailing.framework */; };
 		70E33ECA26E536BF00CBA422 /* Quartz.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 70E33EC926E536BF00CBA422 /* Quartz.framework */; };
 		70E33ECE26E536BF00CBA422 /* KramThumbnailProvider.mm in Sources */ = {isa = PBXBuildFile; fileRef = 70E33ECD26E536BF00CBA422 /* KramThumbnailProvider.mm */; };
-		70E33ED326E536BF00CBA422 /* kram-thumb.appex in Embed App Extensions */ = {isa = PBXBuildFile; fileRef = 70E33EC626E536BF00CBA422 /* kram-thumb.appex */; settings = {ATTRIBUTES = (RemoveHeadersOnCopy, ); }; };
+		70E33ED326E536BF00CBA422 /* kram-thumb.appex in Embed Foundation Extensions */ = {isa = PBXBuildFile; fileRef = 70E33EC626E536BF00CBA422 /* kram-thumb.appex */; settings = {ATTRIBUTES = (RemoveHeadersOnCopy, ); }; };
 		70E33ED826E5377000CBA422 /* libkram.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF24826D17BC2001C950E /* libkram.a */; };
 		70E33ED926E5378800CBA422 /* libate.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF25926D17CAA001C950E /* libate.tbd */; };
 		70E33EDB26E5379900CBA422 /* CoreGraphics.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 70E33EDA26E5379900CBA422 /* CoreGraphics.framework */; };
@@ -45,7 +45,7 @@
 		70E33EE326E5478900CBA422 /* Quartz.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 70E33EC926E536BF00CBA422 /* Quartz.framework */; };
 		70E33EE726E5478900CBA422 /* KramPreviewViewController.mm in Sources */ = {isa = PBXBuildFile; fileRef = 70E33EE626E5478900CBA422 /* KramPreviewViewController.mm */; };
 		70E33EEA26E5478900CBA422 /* KramPreviewViewController.xib in Resources */ = {isa = PBXBuildFile; fileRef = 70E33EE826E5478900CBA422 /* KramPreviewViewController.xib */; };
-		70E33EEF26E5478900CBA422 /* kram-preview.appex in Embed App Extensions */ = {isa = PBXBuildFile; fileRef = 70E33EE226E5478900CBA422 /* kram-preview.appex */; settings = {ATTRIBUTES = (RemoveHeadersOnCopy, ); }; };
+		70E33EEF26E5478900CBA422 /* kram-preview.appex in Embed Foundation Extensions */ = {isa = PBXBuildFile; fileRef = 70E33EE226E5478900CBA422 /* kram-preview.appex */; settings = {ATTRIBUTES = (RemoveHeadersOnCopy, ); }; };
 		70E33EF326E548C700CBA422 /* libkram.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF24826D17BC2001C950E /* libkram.a */; };
 		70E33EF426E548CF00CBA422 /* libate.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF25926D17CAA001C950E /* libate.tbd */; };
 		70E33EF526E548D800CBA422 /* CoreGraphics.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 70E33EDA26E5379900CBA422 /* CoreGraphics.framework */; };
@@ -83,16 +83,16 @@
 			name = "Embed Frameworks";
 			runOnlyForDeploymentPostprocessing = 0;
 		};
-		70E33ED426E536BF00CBA422 /* Embed App Extensions */ = {
+		70E33ED426E536BF00CBA422 /* Embed Foundation Extensions */ = {
 			isa = PBXCopyFilesBuildPhase;
 			buildActionMask = 2147483647;
 			dstPath = "";
 			dstSubfolderSpec = 13;
 			files = (
-				70E33EEF26E5478900CBA422 /* kram-preview.appex in Embed App Extensions */,
-				70E33ED326E536BF00CBA422 /* kram-thumb.appex in Embed App Extensions */,
+				70E33EEF26E5478900CBA422 /* kram-preview.appex in Embed Foundation Extensions */,
+				70E33ED326E536BF00CBA422 /* kram-thumb.appex in Embed Foundation Extensions */,
 			);
-			name = "Embed App Extensions";
+			name = "Embed Foundation Extensions";
 			runOnlyForDeploymentPostprocessing = 0;
 		};
 /* End PBXCopyFilesBuildPhase section */
@@ -323,7 +323,7 @@
 				706EF20B26D17A26001C950E /* Sources */,
 				706EF20C26D17A26001C950E /* Frameworks */,
 				706EF20D26D17A26001C950E /* Resources */,
-				70E33ED426E536BF00CBA422 /* Embed App Extensions */,
+				70E33ED426E536BF00CBA422 /* Embed Foundation Extensions */,
 				70833667271575E50077BCB6 /* Embed Frameworks */,
 			);
 			buildRules = (
@@ -377,7 +377,8 @@
 		706EF20726D17A26001C950E /* Project object */ = {
 			isa = PBXProject;
 			attributes = {
-				LastUpgradeCheck = 1240;
+				BuildIndependentTargetsInParallel = YES;
+				LastUpgradeCheck = 1500;
 				TargetAttributes = {
 					706EF20E26D17A26001C950E = {
 						CreatedOnToolsVersion = 12.4;
@@ -521,7 +522,7 @@
 				CLANG_ENABLE_OBJC_WEAK = YES;
 				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
 				CLANG_WARN_BOOL_CONVERSION = YES;
-				CLANG_WARN_COMMA = YES;
+				CLANG_WARN_COMMA = NO;
 				CLANG_WARN_CONSTANT_CONVERSION = YES;
 				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
 				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
@@ -547,16 +548,13 @@
 				DONT_GENERATE_INFOPLIST_FILE = YES;
 				ENABLE_STRICT_OBJC_MSGSEND = YES;
 				ENABLE_TESTABILITY = YES;
+				ENABLE_USER_SCRIPT_SANDBOXING = YES;
 				GCC_C_LANGUAGE_STANDARD = gnu11;
 				GCC_DYNAMIC_NO_PIC = NO;
 				GCC_ENABLE_CPP_EXCEPTIONS = NO;
 				GCC_ENABLE_CPP_RTTI = NO;
 				GCC_NO_COMMON_BLOCKS = YES;
 				GCC_OPTIMIZATION_LEVEL = 0;
-				GCC_PREPROCESSOR_DEFINITIONS = (
-					"DEBUG=1",
-					"$(inherited)",
-				);
 				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
 				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
 				GCC_WARN_UNDECLARED_SELECTOR = YES;
@@ -597,7 +595,7 @@
 				CLANG_ENABLE_OBJC_WEAK = YES;
 				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
 				CLANG_WARN_BOOL_CONVERSION = YES;
-				CLANG_WARN_COMMA = YES;
+				CLANG_WARN_COMMA = NO;
 				CLANG_WARN_CONSTANT_CONVERSION = YES;
 				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
 				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
@@ -623,6 +621,7 @@
 				DONT_GENERATE_INFOPLIST_FILE = YES;
 				ENABLE_NS_ASSERTIONS = NO;
 				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				ENABLE_USER_SCRIPT_SANDBOXING = YES;
 				GCC_C_LANGUAGE_STANDARD = gnu11;
 				GCC_ENABLE_CPP_EXCEPTIONS = NO;
 				GCC_ENABLE_CPP_RTTI = NO;
diff --git a/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj b/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj
index bf8601de..fd9000ba 100644
--- a/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj
+++ b/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj
@@ -3,7 +3,7 @@
 	archiveVersion = 1;
 	classes = {
 	};
-	objectVersion = 46;
+	objectVersion = 54;
 	objects = {
 
 /* Begin PBXBuildFile section */
@@ -282,7 +282,8 @@
 		83D6FF3F1F48BB3A00F71E0C /* Project object */ = {
 			isa = PBXProject;
 			attributes = {
-				LastUpgradeCheck = 0930;
+				BuildIndependentTargetsInParallel = YES;
+				LastUpgradeCheck = 1500;
 				ORGANIZATIONNAME = "Warren Moore";
 				TargetAttributes = {
 					83D6FF471F48BB3A00F71E0C = {
@@ -293,11 +294,11 @@
 			};
 			buildConfigurationList = 83D6FF421F48BB3A00F71E0C /* Build configuration list for PBXProject "GLTF" */;
 			compatibilityVersion = "Xcode 3.2";
-			developmentRegion = English;
+			developmentRegion = en;
 			hasScannedForEncodings = 0;
 			knownRegions = (
-				English,
 				en,
+				Base,
 			);
 			mainGroup = 83D6FF3E1F48BB3A00F71E0C;
 			productRefGroup = 83D6FF491F48BB3A00F71E0C /* Products */;
@@ -354,6 +355,7 @@
 			isa = XCBuildConfiguration;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES;
 				CLANG_ANALYZER_NONNULL = YES;
 				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
 				CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x";
@@ -375,6 +377,7 @@
 				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = NO;
 				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
 				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
 				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
 				CLANG_WARN_STRICT_PROTOTYPES = YES;
 				CLANG_WARN_SUSPICIOUS_MOVE = YES;
@@ -387,16 +390,13 @@
 				DEBUG_INFORMATION_FORMAT = dwarf;
 				ENABLE_STRICT_OBJC_MSGSEND = YES;
 				ENABLE_TESTABILITY = YES;
+				ENABLE_USER_SCRIPT_SANDBOXING = YES;
 				GCC_C_LANGUAGE_STANDARD = gnu99;
 				GCC_DYNAMIC_NO_PIC = NO;
 				GCC_ENABLE_CPP_EXCEPTIONS = NO;
 				GCC_ENABLE_CPP_RTTI = NO;
 				GCC_NO_COMMON_BLOCKS = YES;
 				GCC_OPTIMIZATION_LEVEL = 0;
-				GCC_PREPROCESSOR_DEFINITIONS = (
-					"DEBUG=1",
-					"$(inherited)",
-				);
 				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
 				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
 				GCC_WARN_UNDECLARED_SELECTOR = YES;
@@ -417,6 +417,7 @@
 			isa = XCBuildConfiguration;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES;
 				CLANG_ANALYZER_NONNULL = YES;
 				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
 				CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x";
@@ -438,6 +439,7 @@
 				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = NO;
 				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
 				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
 				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
 				CLANG_WARN_STRICT_PROTOTYPES = YES;
 				CLANG_WARN_SUSPICIOUS_MOVE = YES;
@@ -450,6 +452,7 @@
 				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
 				ENABLE_NS_ASSERTIONS = NO;
 				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				ENABLE_USER_SCRIPT_SANDBOXING = YES;
 				GCC_C_LANGUAGE_STANDARD = gnu99;
 				GCC_ENABLE_CPP_EXCEPTIONS = NO;
 				GCC_ENABLE_CPP_RTTI = NO;
@@ -481,11 +484,18 @@
 				DYLIB_COMPATIBILITY_VERSION = 1;
 				DYLIB_CURRENT_VERSION = 1;
 				DYLIB_INSTALL_NAME_BASE = "@rpath";
+				ENABLE_MODULE_VERIFIER = YES;
 				FRAMEWORK_VERSION = A;
 				INFOPLIST_FILE = Info.plist;
 				INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks";
-				IPHONEOS_DEPLOYMENT_TARGET = 10.0;
-				LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/../Frameworks @loader_path/Frameworks";
+				IPHONEOS_DEPLOYMENT_TARGET = 12.0;
+				LD_RUNPATH_SEARCH_PATHS = (
+					"$(inherited)",
+					"@executable_path/../Frameworks",
+					"@loader_path/Frameworks",
+				);
+				MODULE_VERIFIER_SUPPORTED_LANGUAGES = "objective-c objective-c++";
+				MODULE_VERIFIER_SUPPORTED_LANGUAGE_STANDARDS = "gnu99 gnu++11";
 				PRODUCT_BUNDLE_IDENTIFIER = net.warrenmoore.GLTF;
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SKIP_INSTALL = YES;
@@ -505,11 +515,18 @@
 				DYLIB_COMPATIBILITY_VERSION = 1;
 				DYLIB_CURRENT_VERSION = 1;
 				DYLIB_INSTALL_NAME_BASE = "@rpath";
+				ENABLE_MODULE_VERIFIER = YES;
 				FRAMEWORK_VERSION = A;
 				INFOPLIST_FILE = Info.plist;
 				INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks";
-				IPHONEOS_DEPLOYMENT_TARGET = 10.0;
-				LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/../Frameworks @loader_path/Frameworks";
+				IPHONEOS_DEPLOYMENT_TARGET = 12.0;
+				LD_RUNPATH_SEARCH_PATHS = (
+					"$(inherited)",
+					"@executable_path/../Frameworks",
+					"@loader_path/Frameworks",
+				);
+				MODULE_VERIFIER_SUPPORTED_LANGUAGES = "objective-c objective-c++";
+				MODULE_VERIFIER_SUPPORTED_LANGUAGE_STANDARDS = "gnu99 gnu++11";
 				PRODUCT_BUNDLE_IDENTIFIER = net.warrenmoore.GLTF;
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SKIP_INSTALL = YES;
diff --git a/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj b/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj
index 1f1e962f..dba2d3ff 100644
--- a/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj
+++ b/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj
@@ -3,7 +3,7 @@
 	archiveVersion = 1;
 	classes = {
 	};
-	objectVersion = 46;
+	objectVersion = 54;
 	objects = {
 
 /* Begin PBXBuildFile section */
@@ -156,7 +156,8 @@
 		83D6FFA81F48BCB500F71E0C /* Project object */ = {
 			isa = PBXProject;
 			attributes = {
-				LastUpgradeCheck = 0930;
+				BuildIndependentTargetsInParallel = YES;
+				LastUpgradeCheck = 1500;
 				ORGANIZATIONNAME = "Warren Moore";
 				TargetAttributes = {
 					83D6FFB01F48BCB500F71E0C = {
@@ -167,11 +168,11 @@
 			};
 			buildConfigurationList = 83D6FFAB1F48BCB500F71E0C /* Build configuration list for PBXProject "GLTFMTL" */;
 			compatibilityVersion = "Xcode 3.2";
-			developmentRegion = English;
+			developmentRegion = en;
 			hasScannedForEncodings = 0;
 			knownRegions = (
-				English,
 				en,
+				Base,
 			);
 			mainGroup = 83D6FFA71F48BCB500F71E0C;
 			productRefGroup = 83D6FFB21F48BCB500F71E0C /* Products */;
@@ -214,6 +215,7 @@
 			isa = XCBuildConfiguration;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES;
 				CLANG_ANALYZER_NONNULL = YES;
 				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
 				CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x";
@@ -235,6 +237,7 @@
 				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
 				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
 				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
 				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
 				CLANG_WARN_STRICT_PROTOTYPES = YES;
 				CLANG_WARN_SUSPICIOUS_MOVE = YES;
@@ -247,16 +250,13 @@
 				DEBUG_INFORMATION_FORMAT = dwarf;
 				ENABLE_STRICT_OBJC_MSGSEND = YES;
 				ENABLE_TESTABILITY = YES;
+				ENABLE_USER_SCRIPT_SANDBOXING = YES;
 				GCC_C_LANGUAGE_STANDARD = gnu99;
 				GCC_DYNAMIC_NO_PIC = NO;
 				GCC_ENABLE_CPP_EXCEPTIONS = NO;
 				GCC_ENABLE_CPP_RTTI = NO;
 				GCC_NO_COMMON_BLOCKS = YES;
 				GCC_OPTIMIZATION_LEVEL = 0;
-				GCC_PREPROCESSOR_DEFINITIONS = (
-					"DEBUG=1",
-					"$(inherited)",
-				);
 				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
 				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
 				GCC_WARN_UNDECLARED_SELECTOR = YES;
@@ -279,6 +279,7 @@
 			isa = XCBuildConfiguration;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES;
 				CLANG_ANALYZER_NONNULL = YES;
 				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
 				CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x";
@@ -300,6 +301,7 @@
 				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
 				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
 				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
 				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
 				CLANG_WARN_STRICT_PROTOTYPES = YES;
 				CLANG_WARN_SUSPICIOUS_MOVE = YES;
@@ -312,6 +314,7 @@
 				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
 				ENABLE_NS_ASSERTIONS = NO;
 				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				ENABLE_USER_SCRIPT_SANDBOXING = YES;
 				GCC_C_LANGUAGE_STANDARD = gnu99;
 				GCC_ENABLE_CPP_EXCEPTIONS = NO;
 				GCC_ENABLE_CPP_RTTI = NO;
@@ -345,10 +348,17 @@
 				DYLIB_COMPATIBILITY_VERSION = 1;
 				DYLIB_CURRENT_VERSION = 1;
 				DYLIB_INSTALL_NAME_BASE = "@rpath";
+				ENABLE_MODULE_VERIFIER = YES;
 				FRAMEWORK_VERSION = A;
 				INFOPLIST_FILE = "$(SRCROOT)/Info.plist";
 				INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks";
-				LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/../Frameworks @loader_path/Frameworks";
+				LD_RUNPATH_SEARCH_PATHS = (
+					"$(inherited)",
+					"@executable_path/../Frameworks",
+					"@loader_path/Frameworks",
+				);
+				MODULE_VERIFIER_SUPPORTED_LANGUAGES = "objective-c objective-c++";
+				MODULE_VERIFIER_SUPPORTED_LANGUAGE_STANDARDS = "gnu99 gnu++11";
 				PRODUCT_BUNDLE_IDENTIFIER = net.warrenmoore.GLTFMTL;
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SKIP_INSTALL = YES;
@@ -368,10 +378,17 @@
 				DYLIB_COMPATIBILITY_VERSION = 1;
 				DYLIB_CURRENT_VERSION = 1;
 				DYLIB_INSTALL_NAME_BASE = "@rpath";
+				ENABLE_MODULE_VERIFIER = YES;
 				FRAMEWORK_VERSION = A;
 				INFOPLIST_FILE = "$(SRCROOT)/Info.plist";
 				INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks";
-				LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/../Frameworks @loader_path/Frameworks";
+				LD_RUNPATH_SEARCH_PATHS = (
+					"$(inherited)",
+					"@executable_path/../Frameworks",
+					"@loader_path/Frameworks",
+				);
+				MODULE_VERIFIER_SUPPORTED_LANGUAGES = "objective-c objective-c++";
+				MODULE_VERIFIER_SUPPORTED_LANGUAGE_STANDARDS = "gnu99 gnu++11";
 				PRODUCT_BUNDLE_IDENTIFIER = net.warrenmoore.GLTFMTL;
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SKIP_INSTALL = YES;
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index ed5e9c14..9e9f02c2 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -428,28 +428,58 @@ - (IBAction)openDocument:(id)sender
 #if 0
     // Would be nice, but doesn't allow directory.
     // How is NSDocument aware of directory, from Info.plist?
-    NSArray<NSURL*>* urls = [controller URLsFromRunningOpenPanel];
-    if (urls) {
-        NSLog(@"selected URL: %@", urls[0]);
-        
-    }
+//    NSArray<NSURL*>* urls = [controller URLsFromRunningOpenPanel];
+//    if (urls) {
+//        NSLog(@"selected URL: %@", urls[0]);
+//        
+//    }
 #else
     
-    NSOpenPanel *panel = [NSOpenPanel openPanel];
+    NSOpenPanel* panel = [NSOpenPanel openPanel];
     [panel setCanChooseFiles:YES];
     [panel setCanChooseDirectories:YES];
     [panel setAllowsMultipleSelection:NO];
 
     if ([controller runModalOpenPanel:panel forTypes:utis] == NSModalResponseOK)
     {
-        NSURL* selectedURL = [[panel URLs] objectAtIndex:0];
-
-        //NSLog(@"selected URL: %@", selectedURL);
-        NSError* error = nil;
-
-        [controller    openDocumentWithContentsOfURL:selectedURL
-                                 display:YES
-                                   error:&error];
+        NSArray<NSURL*>* urls = [panel URLs];
+        NSURL* url = [urls objectAtIndex:0];
+        
+        // This gets a file:// urls, and then openDocument won't open it if
+        // it's a folder.
+        
+        bool isDirectory = false;
+        if (url.isFileURL) {
+            BOOL isDir = NO;
+            // Verify that the file exists
+            // and is indeed a directory (isDirectory is an out parameter)
+            if ([[NSFileManager defaultManager] fileExistsAtPath: url.path isDirectory: &isDir]
+                && isDir) {
+                isDirectory = true;
+            }
+        }
+        
+        if (isDirectory) {
+            // have to open this directory URL directly
+            //[self openURLs:[NSApplication sharedApplication] urls:urls];
+            
+            // this is called from "Open In..."
+            NSApplication* app = [NSApplication sharedApplication];
+            MyMTKView* view = app.mainWindow.contentView;
+            [view loadTextureFromURLs:urls];
+            [view fixupDocumentList];
+        }
+        else {
+            [controller openDocumentWithContentsOfURL:url
+                                              display:YES
+                                    completionHandler:
+                 ^(NSDocument* doc, BOOL isAlreadOpen, NSError* error ) {
+                if (!error) {
+                    // what should this do?
+                }
+            }
+            ];
+        }
     }
 #endif
 }
diff --git a/libkram/allocate/dlmalloc.cpp b/libkram/allocate/dlmalloc.cpp
new file mode 100644
index 00000000..0d1732a6
--- /dev/null
+++ b/libkram/allocate/dlmalloc.cpp
@@ -0,0 +1,6218 @@
+/*
+  This is a version (aka dlmalloc) of malloc/free/realloc written by
+  Doug Lea and released to the public domain, as explained at
+  http://creativecommons.org/publicdomain/zero/1.0/ Send questions,
+  comments, complaints, performance data, etc to dl@cs.oswego.edu
+*/
+
+/*
+  C++ version by Gregory Popovitch (greg7mdp@gmail.com) and released 
+  to the public domain as well, as explained at
+  http://creativecommons.org/publicdomain/zero/1.0/.
+
+  based on Version 2.8.6 Wed Aug 29 06:57:58 2012  Doug Lea
+           (ftp://gee.cs.oswego.edu/pub/misc/malloc.c)
+*/
+
+/*
+
+* Quickstart
+
+  This library is all in one file to simplify the most common usage:
+  ftp it, compile it (-O3), and link it into another program. All of
+  the compile-time options default to reasonable values for use on
+  most platforms.  You might later want to step through various
+  compile-time and dynamic tuning options.
+
+  For convenience, an include file for code using this malloc is at:
+     ftp://gee.cs.oswego.edu/pub/misc/malloc-2.8.6.h
+  You don't really need this .h file unless you call functions not
+  defined in your system include files.  The .h file contains only the
+  excerpts from this file needed for using this malloc on ANSI C/C++
+  systems, so long as you haven't changed compile-time options about
+  naming and tuning parameters.  If you do, then you can create your
+  own malloc.h that does include all settings by cutting at the point
+  indicated below. Note that you may already by default be using a C
+  library containing a malloc that is based on some version of this
+  malloc (for example in linux). You might still want to use the one
+  in this file to customize settings or to avoid overheads associated
+  with library versions.
+
+* Vital statistics:
+
+  Supported pointer/size_t representation:       4 or 8 bytes
+       size_t MUST be an unsigned type of the same width as
+       pointers. (If you are using an ancient system that declares
+       size_t as a signed type, or need it to be a different width
+       than pointers, you can use a previous release of this malloc
+       (e.g. 2.7.2) supporting these.)
+
+  Alignment:                                     8 bytes (minimum)
+       This suffices for nearly all current machines and C compilers.
+       However, you can define MALLOC_ALIGNMENT to be wider than this
+       if necessary (up to 128bytes), at the expense of using more space.
+
+  Minimum overhead per allocated chunk:   4 or  8 bytes (if 4byte sizes)
+                                          8 or 16 bytes (if 8byte sizes)
+       Each malloced chunk has a hidden word of overhead holding size
+       and status information, and additional cross-check word
+       if FOOTERS is defined.
+
+  Minimum allocated size: 4-byte ptrs:  16 bytes    (including overhead)
+                          8-byte ptrs:  32 bytes    (including overhead)
+
+       Even a request for zero bytes (i.e., malloc(0)) returns a
+       pointer to something of the minimum allocatable size.
+       The maximum overhead wastage (i.e., number of extra bytes
+       allocated than were requested in malloc) is less than or equal
+       to the minimum size, except for requests >= mmap_threshold that
+       are serviced via mmap(), where the worst case wastage is about
+       32 bytes plus the remainder from a system page (the minimal
+       mmap unit); typically 4096 or 8192 bytes.
+
+  Security: static-safe; optionally more or less
+       The "security" of malloc refers to the ability of malicious
+       code to accentuate the effects of errors (for example, freeing
+       space that is not currently malloc'ed or overwriting past the
+       ends of chunks) in code that calls malloc.  This malloc
+       guarantees not to modify any memory locations below the base of
+       heap, i.e., static variables, even in the presence of usage
+       errors.  The routines additionally detect most improper frees
+       and reallocs.  All this holds as long as the static bookkeeping
+       for malloc itself is not corrupted by some other means.  This
+       is only one aspect of security -- these checks do not, and
+       cannot, detect all possible programming errors.
+
+       If FOOTERS is defined nonzero, then each allocated chunk
+       carries an additional check word to verify that it was malloced
+       from its space.  These check words are the same within each
+       execution of a program using malloc, but differ across
+       executions, so externally crafted fake chunks cannot be
+       freed. This improves security by rejecting frees/reallocs that
+       could corrupt heap memory, in addition to the checks preventing
+       writes to statics that are always on.  This may further improve
+       security at the expense of time and space overhead.  (Note that
+       FOOTERS may also be worth using with MSPACES.)
+
+       By default detected errors cause the program to abort (calling
+       "abort()"). You can override this to instead proceed past
+       errors by defining PROCEED_ON_ERROR.  In this case, a bad free
+       has no effect, and a malloc that encounters a bad address
+       caused by user overwrites will ignore the bad address by
+       dropping pointers and indices to all known memory. This may
+       be appropriate for programs that should continue if at all
+       possible in the face of programming errors, although they may
+       run out of memory because dropped memory is never reclaimed.
+
+       If you don't like either of these options, you can define
+       CORRUPTION_ERROR_ACTION and USAGE_ERROR_ACTION to do anything
+       else. And if if you are sure that your program using malloc has
+       no errors or vulnerabilities, you can define INSECURE to 1,
+       which might (or might not) provide a small performance improvement.
+
+       It is also possible to limit the maximum total allocatable
+       space, using malloc_set_footprint_limit. This is not
+       designed as a security feature in itself (calls to set limits
+       are not screened or privileged), but may be useful as one
+       aspect of a secure implementation.
+
+  Thread-safety: thread-safe unless USE_LOCKS defined to zero
+       When USE_LOCKS is defined, each public call to malloc, free,
+       etc is surrounded with a lock. By default, this uses a plain
+       pthread mutex, win32 critical section, or a spin-lock if if
+       available for the platform and not disabled by setting
+       USE_SPIN_LOCKS=0.  However, if USE_RECURSIVE_LOCKS is defined,
+       recursive versions are used instead (which are not required for
+       base functionality but may be needed in layered extensions).
+       Using a global lock is not especially fast, and can be a major
+       bottleneck.  It is designed only to provide minimal protection
+       in concurrent environments, and to provide a basis for
+       extensions.  If you are using malloc in a concurrent program,
+       consider instead using nedmalloc
+       (http://www.nedprod.com/programs/portable/nedmalloc/) or
+       ptmalloc (See http://www.malloc.de), which are derived from
+       versions of this malloc.
+
+  System requirements: Any combination of MORECORE and/or MMAP/MUNMAP
+       This malloc can use unix sbrk or any emulation (invoked using
+       the CALL_MORECORE macro) and/or mmap/munmap or any emulation
+       (invoked using CALL_MMAP/CALL_MUNMAP) to get and release system
+       memory.  On most unix systems, it tends to work best if both
+       MORECORE and MMAP are enabled.  On Win32, it uses emulations
+       based on VirtualAlloc. It also uses common C library functions
+       like memset.
+
+  Compliance: I believe it is compliant with the Single Unix Specification
+       (See http://www.unix.org). Also SVID/XPG, ANSI C, and probably
+       others as well.
+
+* Overview of algorithms
+
+  This is not the fastest, most space-conserving, most portable, or
+  most tunable malloc ever written. However it is among the fastest
+  while also being among the most space-conserving, portable and
+  tunable.  Consistent balance across these factors results in a good
+  general-purpose allocator for malloc-intensive programs.
+
+  In most ways, this malloc is a best-fit allocator. Generally, it
+  chooses the best-fitting existing chunk for a request, with ties
+  broken in approximately least-recently-used order. (This strategy
+  normally maintains low fragmentation.) However, for requests less
+  than 256bytes, it deviates from best-fit when there is not an
+  exactly fitting available chunk by preferring to use space adjacent
+  to that used for the previous small request, as well as by breaking
+  ties in approximately most-recently-used order. (These enhance
+  locality of series of small allocations.)  And for very large requests
+  (>= 256Kb by default), it relies on system memory mapping
+  facilities, if supported.  (This helps avoid carrying around and
+  possibly fragmenting memory used only for large chunks.)
+
+  All operations (except malloc_stats and mallinfo) have execution
+  times that are bounded by a constant factor of the number of bits in
+  a size_t, not counting any clearing in calloc or copying in realloc,
+  or actions surrounding MORECORE and MMAP that have times
+  proportional to the number of non-contiguous regions returned by
+  system allocation routines, which is often just 1. In real-time
+  applications, you can optionally suppress segment traversals using
+  NO_SEGMENT_TRAVERSAL, which assures bounded execution even when
+  system allocators return non-contiguous spaces, at the typical
+  expense of carrying around more memory and increased fragmentation.
+
+  The implementation is not very modular and seriously overuses
+  macros. Perhaps someday all C compilers will do as good a job
+  inlining modular code as can now be done by brute-force expansion,
+  but now, enough of them seem not to.
+
+  Some compilers issue a lot of warnings about code that is
+  dead/unreachable only on some platforms, and also about intentional
+  uses of negation on unsigned types. All known cases of each can be
+  ignored.
+
+  For a longer but out of date high-level description, see
+     http://gee.cs.oswego.edu/dl/html/malloc.html
+
+* MSPACES
+  If MSPACES is defined, then in addition to malloc, free, etc.,
+  this file also defines mspace_malloc, mspace_free, etc. These
+  are versions of malloc routines that take an "mspace" argument
+  obtained using create_mspace, to control all internal bookkeeping.
+  If ONLY_MSPACES is defined, only these versions are compiled.
+  So if you would like to use this allocator for only some allocations,
+  and your system malloc for others, you can compile with
+  ONLY_MSPACES and then do something like...
+    static mspace mymspace = create_mspace(0,0); // for example
+    #define mymalloc(bytes)  mspace_malloc(mymspace, bytes)
+
+  (Note: If you only need one instance of an mspace, you can instead
+  use "USE_DL_PREFIX" to relabel the global malloc.)
+
+  You can similarly create thread-local allocators by storing
+  mspaces as thread-locals. For example:
+    static __thread mspace tlms = 0;
+    void*  tlmalloc(size_t bytes) {
+      if (tlms == 0) tlms = create_mspace(0, 0);
+      return mspace_malloc(tlms, bytes);
+    }
+    void  tlfree(void* mem) { mspace_free(tlms, mem); }
+
+  Unless FOOTERS is defined, each mspace is completely independent.
+  You cannot allocate from one and free to another (although
+  conformance is only weakly checked, so usage errors are not always
+  caught). If FOOTERS is defined, then each chunk carries around a tag
+  indicating its originating mspace, and frees are directed to their
+  originating spaces. Normally, this requires use of locks.
+
+ -------------------------  Compile-time options ---------------------------
+
+Be careful in setting #define values for numerical constants of type
+size_t. On some systems, literal values are not automatically extended
+to size_t precision unless they are explicitly casted. You can also
+use the symbolic values MAX_SIZE_T, SIZE_T_ONE, etc below.
+
+WIN32                    default: defined if _WIN32 defined
+  Defining WIN32 sets up defaults for MS environment and compilers.
+  Otherwise defaults are for unix. Beware that there seem to be some
+  cases where this malloc might not be a pure drop-in replacement for
+  Win32 malloc: Random-looking failures from Win32 GDI API's (eg;
+  SetDIBits()) may be due to bugs in some video driver implementations
+  when pixel buffers are malloc()ed, and the region spans more than
+  one VirtualAlloc()ed region. Because dlmalloc uses a small (64Kb)
+  default granularity, pixel buffers may straddle virtual allocation
+  regions more often than when using the Microsoft allocator.  You can
+  avoid this by using VirtualAlloc() and VirtualFree() for all pixel
+  buffers rather than using malloc().  If this is not possible,
+  recompile this malloc with a larger DEFAULT_GRANULARITY. Note:
+  in cases where MSC and gcc (cygwin) are known to differ on WIN32,
+  conditions use _MSC_VER to distinguish them.
+
+DLMALLOC_EXPORT       default: extern
+  Defines how public APIs are declared. If you want to export via a
+  Windows DLL, you might define this as
+    #define DLMALLOC_EXPORT extern  __declspec(dllexport)
+  If you want a POSIX ELF shared object, you might use
+    #define DLMALLOC_EXPORT extern __attribute__((visibility("default")))
+
+MALLOC_ALIGNMENT         default: (size_t)(2 * sizeof(void *))
+  Controls the minimum alignment for malloc'ed chunks.  It must be a
+  power of two and at least 8, even on machines for which smaller
+  alignments would suffice. It may be defined as larger than this
+  though. Note however that code and data structures are optimized for
+  the case of 8-byte alignment.
+
+MSPACES                  default: 0 (false)
+  If true, compile in support for independent allocation spaces.
+  This is only supported if HAVE_MMAP is true.
+
+ONLY_MSPACES             default: 0 (false)
+  If true, only compile in mspace versions, not regular versions.
+
+USE_LOCKS                default: 1 (true)
+  Causes each call to each public routine to be surrounded with
+  pthread or WIN32 mutex lock/unlock. (If set true, this can be
+  overridden on a per-mspace basis for mspace versions.) If set to a
+  non-zero value other than 1, locks are used, but their
+  implementation is left out, so lock functions must be supplied manually,
+  as described below.
+
+USE_SPIN_LOCKS           default: 1 iff USE_LOCKS and spin locks available
+  If true, uses custom spin locks for locking. This is currently
+  supported only gcc >= 4.1, older gccs on x86 platforms, and recent
+  MS compilers.  Otherwise, posix locks or win32 critical sections are
+  used.
+
+USE_RECURSIVE_LOCKS      default: not defined
+  If defined nonzero, uses recursive (aka reentrant) locks, otherwise
+  uses plain mutexes. This is not required for malloc proper, but may
+  be needed for layered allocators such as nedmalloc.
+
+LOCK_AT_FORK            default: not defined
+  If defined nonzero, performs pthread_atfork upon initialization
+  to initialize child lock while holding parent lock. The implementation
+  assumes that pthread locks (not custom locks) are being used. In other
+  cases, you may need to customize the implementation.
+
+FOOTERS                  default: 0
+  If true, provide extra checking and dispatching by placing
+  information in the footers of allocated chunks. This adds
+  space and time overhead.
+
+INSECURE                 default: 0
+  If true, omit checks for usage errors and heap space overwrites.
+
+USE_DL_PREFIX            default: NOT defined
+  Causes compiler to prefix all public routines with the string 'dl'.
+  This can be useful when you only want to use this malloc in one part
+  of a program, using your regular system malloc elsewhere.
+
+MALLOC_INSPECT_ALL       default: NOT defined
+  If defined, compiles malloc_inspect_all and mspace_inspect_all, that
+  perform traversal of all heap space.  Unless access to these
+  functions is otherwise restricted, you probably do not want to
+  include them in secure implementations.
+
+ABORT                    default: defined as abort()
+  Defines how to abort on failed checks.  On most systems, a failed
+  check cannot die with an "assert" or even print an informative
+  message, because the underlying print routines in turn call malloc,
+  which will fail again.  Generally, the best policy is to simply call
+  abort(). It's not very useful to do more than this because many
+  errors due to overwriting will show up as address faults (null, odd
+  addresses etc) rather than malloc-triggered checks, so will also
+  abort.  Also, most compilers know that abort() does not return, so
+  can better optimize code conditionally calling it.
+
+PROCEED_ON_ERROR           default: defined as 0 (false)
+  Controls whether detected bad addresses cause them to bypassed
+  rather than aborting. If set, detected bad arguments to free and
+  realloc are ignored. And all bookkeeping information is zeroed out
+  upon a detected overwrite of freed heap space, thus losing the
+  ability to ever return it from malloc again, but enabling the
+  application to proceed. If PROCEED_ON_ERROR is defined, the
+  static variable malloc_corruption_error_count is compiled in
+  and can be examined to see if errors have occurred. This option
+  generates slower code than the default abort policy.
+
+DEBUG                    default: NOT defined
+  The DEBUG setting is mainly intended for people trying to modify
+  this code or diagnose problems when porting to new platforms.
+  However, it may also be able to better isolate user errors than just
+  using runtime checks.  The assertions in the check routines spell
+  out in more detail the assumptions and invariants underlying the
+  algorithms.  The checking is fairly extensive, and will slow down
+  execution noticeably. Calling malloc_stats or mallinfo with DEBUG
+  set will attempt to check every non-mmapped allocated and free chunk
+  in the course of computing the summaries.
+
+ABORT_ON_ASSERT_FAILURE   default: defined as 1 (true)
+  Debugging assertion failures can be nearly impossible if your
+  version of the assert macro causes malloc to be called, which will
+  lead to a cascade of further failures, blowing the runtime stack.
+  ABORT_ON_ASSERT_FAILURE cause assertions failures to call abort(),
+  which will usually make debugging easier.
+
+MALLOC_FAILURE_ACTION     default: sets errno to ENOMEM, or no-op on win32
+  The action to take before "return 0" when malloc fails to be able to
+  return memory because there is none available.
+
+HAVE_MORECORE             default: 1 (true) unless win32 or ONLY_MSPACES
+  True if this system supports sbrk or an emulation of it.
+
+MORECORE                  default: sbrk
+  The name of the sbrk-style system routine to call to obtain more
+  memory.  See below for guidance on writing custom MORECORE
+  functions. The type of the argument to sbrk/MORECORE varies across
+  systems.  It cannot be size_t, because it supports negative
+  arguments, so it is normally the signed type of the same width as
+  size_t (sometimes declared as "intptr_t").  It doesn't much matter
+  though. Internally, we only call it with arguments less than half
+  the max value of a size_t, which should work across all reasonable
+  possibilities, although sometimes generating compiler warnings.
+
+MORECORE_CONTIGUOUS       default: 1 (true) if HAVE_MORECORE
+  If true, take advantage of fact that consecutive calls to MORECORE
+  with positive arguments always return contiguous increasing
+  addresses.  This is true of unix sbrk. It does not hurt too much to
+  set it true anyway, since malloc copes with non-contiguities.
+  Setting it false when definitely non-contiguous saves time
+  and possibly wasted space it would take to discover this though.
+
+MORECORE_CANNOT_TRIM      default: NOT defined
+  True if MORECORE cannot release space back to the system when given
+  negative arguments. This is generally necessary only if you are
+  using a hand-crafted MORECORE function that cannot handle negative
+  arguments.
+
+NO_SEGMENT_TRAVERSAL       default: 0
+  If non-zero, suppresses traversals of memory segments
+  returned by either MORECORE or CALL_MMAP. This disables
+  merging of segments that are contiguous, and selectively
+  releasing them to the OS if unused, but bounds execution times.
+
+HAVE_MMAP                 default: 1 (true)
+  True if this system supports mmap or an emulation of it.  If so, and
+  HAVE_MORECORE is not true, MMAP is used for all system
+  allocation. If set and HAVE_MORECORE is true as well, MMAP is
+  primarily used to directly allocate very large blocks. It is also
+  used as a backup strategy in cases where MORECORE fails to provide
+  space from system. Note: A single call to MUNMAP is assumed to be
+  able to unmap memory that may have be allocated using multiple calls
+  to MMAP, so long as they are adjacent.
+
+HAVE_MREMAP               default: 1 on linux, else 0
+  If true realloc() uses mremap() to re-allocate large blocks and
+  extend or shrink allocation spaces.
+
+MMAP_CLEARS               default: 1 except on WINCE.
+  True if mmap clears memory so calloc doesn't need to. This is true
+  for standard unix mmap using /dev/zero and on WIN32 except for WINCE.
+
+USE_BUILTIN_FFS            default: 0 (i.e., not used)
+  Causes malloc to use the builtin ffs() function to compute indices.
+  Some compilers may recognize and intrinsify ffs to be faster than the
+  supplied C version. Also, the case of x86 using gcc is special-cased
+  to an asm instruction, so is already as fast as it can be, and so
+  this setting has no effect. Similarly for Win32 under recent MS compilers.
+  (On most x86s, the asm version is only slightly faster than the C version.)
+
+malloc_getpagesize         default: derive from system includes, or 4096.
+  The system page size. To the extent possible, this malloc manages
+  memory from the system in page-size units.  This may be (and
+  usually is) a function rather than a constant. This is ignored
+  if WIN32, where page size is determined using getSystemInfo during
+  initialization.
+
+USE_DEV_RANDOM             default: 0 (i.e., not used)
+  Causes malloc to use /dev/random to initialize secure magic seed for
+  stamping footers. Otherwise, the current time is used.
+
+NO_MALLINFO                default: 0
+  If defined, don't compile "mallinfo". This can be a simple way
+  of dealing with mismatches between system declarations and
+  those in this file.
+
+MALLINFO_FIELD_TYPE        default: size_t
+  The type of the fields in the mallinfo struct. This was originally
+  defined as "int" in SVID etc, but is more usefully defined as
+  size_t. The value is used only if  HAVE_USR_INCLUDE_MALLOC_H is not set
+
+NO_MALLOC_STATS            default: 0
+  If defined, don't compile "malloc_stats". This avoids calls to
+  fprintf and bringing in stdio dependencies you might not want.
+
+REALLOC_ZERO_BYTES_FREES    default: not defined
+  This should be set if a call to realloc with zero bytes should
+  be the same as a call to free. Some people think it should. Otherwise,
+  since this malloc returns a unique pointer for malloc(0), so does
+  realloc(p, 0).
+
+LACKS_UNISTD_H, LACKS_FCNTL_H, LACKS_SYS_PARAM_H, LACKS_SYS_MMAN_H
+LACKS_STRINGS_H, LACKS_STRING_H, LACKS_SYS_TYPES_H,  LACKS_ERRNO_H
+LACKS_STDLIB_H LACKS_SCHED_H LACKS_TIME_H  default: NOT defined unless on WIN32
+  Define these if your system does not have these header files.
+  You might need to manually insert some of the declarations they provide.
+
+DEFAULT_GRANULARITY        default: page size if MORECORE_CONTIGUOUS,
+                                system_info.dwAllocationGranularity in WIN32,
+                                otherwise 64K.
+      Also settable using mallopt(M_GRANULARITY, x)
+  The unit for allocating and deallocating memory from the system.  On
+  most systems with contiguous MORECORE, there is no reason to
+  make this more than a page. However, systems with MMAP tend to
+  either require or encourage larger granularities.  You can increase
+  this value to prevent system allocation functions to be called so
+  often, especially if they are slow.  The value must be at least one
+  page and must be a power of two.  Setting to 0 causes initialization
+  to either page size or win32 region size.  (Note: In previous
+  versions of malloc, the equivalent of this option was called
+  "TOP_PAD")
+
+DEFAULT_TRIM_THRESHOLD    default: 2MB
+      Also settable using mallopt(M_TRIM_THRESHOLD, x)
+  The maximum amount of unused top-most memory to keep before
+  releasing via malloc_trim in free().  Automatic trimming is mainly
+  useful in long-lived programs using contiguous MORECORE.  Because
+  trimming via sbrk can be slow on some systems, and can sometimes be
+  wasteful (in cases where programs immediately afterward allocate
+  more large chunks) the value should be high enough so that your
+  overall system performance would improve by releasing this much
+  memory.  As a rough guide, you might set to a value close to the
+  average size of a process (program) running on your system.
+  Releasing this much memory would allow such a process to run in
+  memory.  Generally, it is worth tuning trim thresholds when a
+  program undergoes phases where several large chunks are allocated
+  and released in ways that can reuse each other's storage, perhaps
+  mixed with phases where there are no such chunks at all. The trim
+  value must be greater than page size to have any useful effect.  To
+  disable trimming completely, you can set to MAX_SIZE_T. Note that the trick
+  some people use of mallocing a huge space and then freeing it at
+  program startup, in an attempt to reserve system memory, doesn't
+  have the intended effect under automatic trimming, since that memory
+  will immediately be returned to the system.
+
+DEFAULT_MMAP_THRESHOLD       default: 256K
+      Also settable using mallopt(M_MMAP_THRESHOLD, x)
+  The request size threshold for using MMAP to directly service a
+  request. Requests of at least this size that cannot be allocated
+  using already-existing space will be serviced via mmap.  (If enough
+  normal freed space already exists it is used instead.)  Using mmap
+  segregates relatively large chunks of memory so that they can be
+  individually obtained and released from the host system. A request
+  serviced through mmap is never reused by any other request (at least
+  not directly; the system may just so happen to remap successive
+  requests to the same locations).  Segregating space in this way has
+  the benefits that: Mmapped space can always be individually released
+  back to the system, which helps keep the system level memory demands
+  of a long-lived program low.  Also, mapped memory doesn't become
+  `locked' between other chunks, as can happen with normally allocated
+  chunks, which means that even trimming via malloc_trim would not
+  release them.  However, it has the disadvantage that the space
+  cannot be reclaimed, consolidated, and then used to service later
+  requests, as happens with normal chunks.  The advantages of mmap
+  nearly always outweigh disadvantages for "large" chunks, but the
+  value of "large" may vary across systems.  The default is an
+  empirically derived value that works well in most systems. You can
+  disable mmap by setting to MAX_SIZE_T.
+
+MAX_RELEASE_CHECK_RATE   default: 4095 unless not HAVE_MMAP
+  The number of consolidated frees between checks to release
+  unused segments when freeing. When using non-contiguous segments,
+  especially with multiple mspaces, checking only for topmost space
+  doesn't always suffice to trigger trimming. To compensate for this,
+  free() will, with a period of MAX_RELEASE_CHECK_RATE (or the
+  current number of segments, if greater) try to release unused
+  segments to the OS when freeing chunks that result in
+  consolidation. The best value for this parameter is a compromise
+  between slowing down frees with relatively costly checks that
+  rarely trigger versus holding on to unused memory. To effectively
+  disable, set to MAX_SIZE_T. This may lead to a very slight speed
+  improvement at the expense of carrying around more memory.
+*/
+
+/* Version identifier to allow people to support multiple versions */
+#ifndef DLMALLOC_VERSION
+#define DLMALLOC_VERSION 20806
+#endif /* DLMALLOC_VERSION */
+
+#ifndef DLMALLOC_EXPORT
+#define DLMALLOC_EXPORT extern
+#endif
+
+#ifndef WIN32
+#ifdef _WIN32
+#define WIN32 1
+#endif  /* _WIN32 */
+#ifdef _WIN32_WCE
+#define LACKS_FCNTL_H
+#define WIN32 1
+#endif /* _WIN32_WCE */
+#endif  /* WIN32 */
+#ifdef WIN32
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#include <tchar.h>
+#define HAVE_MMAP 1
+#define HAVE_MORECORE 0
+#define LACKS_UNISTD_H
+#define LACKS_SYS_PARAM_H
+#define LACKS_SYS_MMAN_H
+#define LACKS_STRING_H
+#define LACKS_STRINGS_H
+#define LACKS_SYS_TYPES_H
+#define LACKS_ERRNO_H
+#define LACKS_SCHED_H
+#ifndef MALLOC_FAILURE_ACTION
+#define MALLOC_FAILURE_ACTION
+#endif /* MALLOC_FAILURE_ACTION */
+#ifndef MMAP_CLEARS
+#ifdef _WIN32_WCE /* WINCE reportedly does not clear */
+#define MMAP_CLEARS 0
+#else
+#define MMAP_CLEARS 1
+#endif /* _WIN32_WCE */
+#endif /*MMAP_CLEARS */
+#endif  /* WIN32 */
+
+#if defined(DARWIN) || defined(_DARWIN)
+/* Mac OSX docs advise not to use sbrk; it seems better to use mmap */
+#ifndef HAVE_MORECORE
+#define HAVE_MORECORE 0
+#define HAVE_MMAP 1
+/* OSX allocators provide 16 byte alignment */
+#ifndef MALLOC_ALIGNMENT
+#define MALLOC_ALIGNMENT ((size_t)16U)
+#endif
+#endif  /* HAVE_MORECORE */
+#endif  /* DARWIN */
+
+#if defined __cplusplus && (__GNUC__ >= 3 || __GNUC_MINOR__ >= 8) && !defined(__CYGWIN__)
+    #define DLTHROW	throw ()
+#else
+    #define DLTHROW
+#endif
+
+#ifndef LACKS_SYS_TYPES_H
+    #include <sys/types.h>  /* For size_t */
+#endif  /* LACKS_SYS_TYPES_H */
+
+/* The maximum possible size_t value has all bits set */
+#define MAX_SIZE_T           (~(size_t)0)
+
+#ifndef USE_LOCKS
+    #define USE_LOCKS  1
+#endif 
+
+#if USE_LOCKS /* Spin locks for gcc >= 4.1, older gcc on x86, MSC >= 1310 */
+#if ((defined(__GNUC__) &&                                              \
+      ((__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) ||      \
+       defined(__i386__) || defined(__x86_64__))) ||                    \
+     (defined(_MSC_VER) && _MSC_VER>=1310))
+#ifndef USE_SPIN_LOCKS
+#define USE_SPIN_LOCKS 1
+#endif /* USE_SPIN_LOCKS */
+#elif USE_SPIN_LOCKS
+#error "USE_SPIN_LOCKS defined without implementation"
+#endif /* ... locks available... */
+#elif !defined(USE_SPIN_LOCKS)
+#define USE_SPIN_LOCKS 0
+#endif /* USE_LOCKS */
+
+#ifndef ONLY_MSPACES
+#define ONLY_MSPACES 0
+#endif  /* ONLY_MSPACES */
+#ifndef MSPACES
+#if ONLY_MSPACES
+#define MSPACES 1
+#else   /* ONLY_MSPACES */
+#define MSPACES 0
+#endif  /* ONLY_MSPACES */
+#endif  /* MSPACES */
+#ifndef MALLOC_ALIGNMENT
+#define MALLOC_ALIGNMENT ((size_t)(2 * sizeof(void *)))
+#endif  /* MALLOC_ALIGNMENT */
+#ifndef FOOTERS
+#define FOOTERS 0
+#endif  /* FOOTERS */
+#ifndef ABORT
+#define ABORT  abort()
+#endif  /* ABORT */
+#ifndef ABORT_ON_ASSERT_FAILURE
+#define ABORT_ON_ASSERT_FAILURE 1
+#endif  /* ABORT_ON_ASSERT_FAILURE */
+#ifndef PROCEED_ON_ERROR
+#define PROCEED_ON_ERROR 0
+#endif  /* PROCEED_ON_ERROR */
+
+#ifndef INSECURE
+#define INSECURE 0
+#endif  /* INSECURE */
+#ifndef MALLOC_INSPECT_ALL
+#define MALLOC_INSPECT_ALL 0
+#endif  /* MALLOC_INSPECT_ALL */
+#ifndef HAVE_MMAP
+#define HAVE_MMAP 1
+#endif  /* HAVE_MMAP */
+#ifndef MMAP_CLEARS
+#define MMAP_CLEARS 1
+#endif  /* MMAP_CLEARS */
+#ifndef HAVE_MREMAP
+#ifdef linux
+#define HAVE_MREMAP 1
+#ifndef _GNU_SOURCE
+    #define _GNU_SOURCE /* Turns on mremap() definition */
+#endif
+#else   /* linux */
+#define HAVE_MREMAP 0
+#endif  /* linux */
+#endif  /* HAVE_MREMAP */
+#ifndef MALLOC_FAILURE_ACTION
+#define MALLOC_FAILURE_ACTION  errno = ENOMEM;
+#endif  /* MALLOC_FAILURE_ACTION */
+#ifndef HAVE_MORECORE
+#if ONLY_MSPACES
+#define HAVE_MORECORE 0
+#else   /* ONLY_MSPACES */
+#define HAVE_MORECORE 1
+#endif  /* ONLY_MSPACES */
+#endif  /* HAVE_MORECORE */
+#if !HAVE_MORECORE
+#define MORECORE_CONTIGUOUS 0
+#else   /* !HAVE_MORECORE */
+#define MORECORE_DEFAULT sbrk
+#ifndef MORECORE_CONTIGUOUS
+#define MORECORE_CONTIGUOUS 1
+#endif  /* MORECORE_CONTIGUOUS */
+#endif  /* HAVE_MORECORE */
+#ifndef DEFAULT_GRANULARITY
+#if (MORECORE_CONTIGUOUS || defined(WIN32))
+#define DEFAULT_GRANULARITY (0)  /* 0 means to compute in init_mparams */
+#else   /* MORECORE_CONTIGUOUS */
+#define DEFAULT_GRANULARITY ((size_t)64U * (size_t)1024U)
+#endif  /* MORECORE_CONTIGUOUS */
+#endif  /* DEFAULT_GRANULARITY */
+#ifndef DEFAULT_TRIM_THRESHOLD
+#ifndef MORECORE_CANNOT_TRIM
+#define DEFAULT_TRIM_THRESHOLD ((size_t)2U * (size_t)1024U * (size_t)1024U)
+#else   /* MORECORE_CANNOT_TRIM */
+#define DEFAULT_TRIM_THRESHOLD MAX_SIZE_T
+#endif  /* MORECORE_CANNOT_TRIM */
+#endif  /* DEFAULT_TRIM_THRESHOLD */
+#ifndef DEFAULT_MMAP_THRESHOLD
+#if HAVE_MMAP
+#define DEFAULT_MMAP_THRESHOLD ((size_t)256U * (size_t)1024U)
+#else   /* HAVE_MMAP */
+#define DEFAULT_MMAP_THRESHOLD MAX_SIZE_T
+#endif  /* HAVE_MMAP */
+#endif  /* DEFAULT_MMAP_THRESHOLD */
+#ifndef MAX_RELEASE_CHECK_RATE
+#if HAVE_MMAP
+#define MAX_RELEASE_CHECK_RATE 4095
+#else
+#define MAX_RELEASE_CHECK_RATE MAX_SIZE_T
+#endif /* HAVE_MMAP */
+#endif /* MAX_RELEASE_CHECK_RATE */
+#ifndef USE_BUILTIN_FFS
+#define USE_BUILTIN_FFS 0
+#endif  /* USE_BUILTIN_FFS */
+#ifndef USE_DEV_RANDOM
+#define USE_DEV_RANDOM 0
+#endif  /* USE_DEV_RANDOM */
+#ifndef NO_MALLINFO
+#define NO_MALLINFO 0
+#endif  /* NO_MALLINFO */
+#ifndef MALLINFO_FIELD_TYPE
+#define MALLINFO_FIELD_TYPE size_t
+#endif  /* MALLINFO_FIELD_TYPE */
+#ifndef NO_MALLOC_STATS
+#define NO_MALLOC_STATS 0
+#endif  /* NO_MALLOC_STATS */
+#ifndef NO_SEGMENT_TRAVERSAL
+#define NO_SEGMENT_TRAVERSAL 0
+#endif /* NO_SEGMENT_TRAVERSAL */
+
+/*
+  mallopt tuning options.  SVID/XPG defines four standard parameter
+  numbers for mallopt, normally defined in malloc.h.  None of these
+  are used in this malloc, so setting them has no effect. But this
+  malloc does support the following options.
+*/
+
+#define M_TRIM_THRESHOLD     (-1)
+#define M_GRANULARITY        (-2)
+#define M_MMAP_THRESHOLD     (-3)
+
+/* ------------------------ Mallinfo declarations ------------------------ */
+
+#if !NO_MALLINFO
+/*
+  This version of malloc supports the standard SVID/XPG mallinfo
+  routine that returns a struct containing usage properties and
+  statistics. It should work on any system that has a
+  /usr/include/malloc.h defining struct mallinfo.  The main
+  declaration needed is the mallinfo struct that is returned (by-copy)
+  by mallinfo().  The malloinfo struct contains a bunch of fields that
+  are not even meaningful in this version of malloc.  These fields are
+  are instead filled by mallinfo() with other numbers that might be of
+  interest.
+
+  HAVE_USR_INCLUDE_MALLOC_H should be set if you have a
+  /usr/include/malloc.h file that includes a declaration of struct
+  mallinfo.  If so, it is included; else a compliant version is
+  declared below.  These must be precisely the same for mallinfo() to
+  work.  The original SVID version of this struct, defined on most
+  systems with mallinfo, declares all fields as ints. But some others
+  define as unsigned long. If your system defines the fields using a
+  type of different width than listed here, you MUST #include your
+  system version and #define HAVE_USR_INCLUDE_MALLOC_H.
+*/
+
+/* #define HAVE_USR_INCLUDE_MALLOC_H */
+
+#ifdef HAVE_USR_INCLUDE_MALLOC_H
+#include "/usr/include/malloc.h"
+#else /* HAVE_USR_INCLUDE_MALLOC_H */
+#ifndef STRUCT_MALLINFO_DECLARED
+/* HP-UX (and others?) redefines mallinfo unless _STRUCT_MALLINFO is defined */
+#define _STRUCT_MALLINFO
+#define STRUCT_MALLINFO_DECLARED 1
+struct mallinfo {
+    MALLINFO_FIELD_TYPE arena;    /* non-mmapped space allocated from system */
+    MALLINFO_FIELD_TYPE ordblks;  /* number of free chunks */
+    MALLINFO_FIELD_TYPE smblks;   /* always 0 */
+    MALLINFO_FIELD_TYPE hblks;    /* always 0 */
+    MALLINFO_FIELD_TYPE hblkhd;   /* space in mmapped regions */
+    MALLINFO_FIELD_TYPE usmblks;  /* maximum total allocated space */
+    MALLINFO_FIELD_TYPE fsmblks;  /* always 0 */
+    MALLINFO_FIELD_TYPE uordblks; /* total allocated space */
+    MALLINFO_FIELD_TYPE fordblks; /* total free space */
+    MALLINFO_FIELD_TYPE keepcost; /* releasable (via malloc_trim) space */
+};
+#endif /* STRUCT_MALLINFO_DECLARED */
+#endif /* HAVE_USR_INCLUDE_MALLOC_H */
+#endif /* NO_MALLINFO */
+
+/*
+  Try to persuade compilers to inline. The most critical functions for
+  inlining are defined as macros, so these aren't used for them.
+*/
+
+#ifndef FORCEINLINE
+  #if defined(__GNUC__)
+      #define FORCEINLINE __inline __attribute__ ((always_inline))
+  #elif defined(_MSC_VER)
+      #define FORCEINLINE __forceinline
+  #else
+      #define FORCEINLINE inline
+  #endif
+#endif
+
+#ifndef NOINLINE
+    #if defined(__GNUC__)
+        #define NOINLINE __attribute__ ((noinline))
+    #elif defined(_MSC_VER)
+        #define NOINLINE __declspec(noinline)
+    #else
+        #define NOINLINE
+    #endif
+#endif
+
+extern "C" {
+
+#if !ONLY_MSPACES
+
+/* ------------------- Declarations of public routines ------------------- */
+
+#ifndef USE_DL_PREFIX
+#define dlcalloc               calloc
+#define dlfree                 free
+#define dlmalloc               malloc
+#define dlmemalign             memalign
+#define dlposix_memalign       posix_memalign
+#define dlrealloc              realloc
+#define dlrealloc_in_place     realloc_in_place
+#define dlvalloc               valloc
+#define dlpvalloc              pvalloc
+#define dlmallinfo             mallinfo
+#define dlmallopt              mallopt
+#define dlmalloc_trim          malloc_trim
+#define dlmalloc_stats         malloc_stats
+#define dlmalloc_usable_size   malloc_usable_size
+#define dlmalloc_footprint     malloc_footprint
+#define dlmalloc_max_footprint malloc_max_footprint
+#define dlmalloc_footprint_limit malloc_footprint_limit
+#define dlmalloc_set_footprint_limit malloc_set_footprint_limit
+#define dlmalloc_inspect_all   malloc_inspect_all
+#define dlindependent_calloc   independent_calloc
+#define dlindependent_comalloc independent_comalloc
+#define dlbulk_free            bulk_free
+#endif /* USE_DL_PREFIX */
+
+/*
+  malloc(size_t n)
+  Returns a pointer to a newly allocated chunk of at least n bytes, or
+  null if no space is available, in which case errno is set to ENOMEM
+  on ANSI C systems.
+
+  If n is zero, malloc returns a minimum-sized chunk. (The minimum
+  size is 16 bytes on most 32bit systems, and 32 bytes on 64bit
+  systems.)  Note that size_t is an unsigned type, so calls with
+  arguments that would be negative if signed are interpreted as
+  requests for huge amounts of space, which will often fail. The
+  maximum supported value of n differs across systems, but is in all
+  cases less than the maximum representable value of a size_t.
+*/
+DLMALLOC_EXPORT void* dlmalloc(size_t) DLTHROW;
+
+/*
+  free(void* p)
+  Releases the chunk of memory pointed to by p, that had been previously
+  allocated using malloc or a related routine such as realloc.
+  It has no effect if p is null. If p was not malloced or already
+  freed, free(p) will by default cause the current program to abort.
+*/
+DLMALLOC_EXPORT void  dlfree(void*) DLTHROW;
+
+/*
+  calloc(size_t n_elements, size_t element_size);
+  Returns a pointer to n_elements * element_size bytes, with all locations
+  set to zero.
+*/
+DLMALLOC_EXPORT void* dlcalloc(size_t, size_t) DLTHROW;
+
+/*
+  realloc(void* p, size_t n)
+  Returns a pointer to a chunk of size n that contains the same data
+  as does chunk p up to the minimum of (n, p's size) bytes, or null
+  if no space is available.
+
+  The returned pointer may or may not be the same as p. The algorithm
+  prefers extending p in most cases when possible, otherwise it
+  employs the equivalent of a malloc-copy-free sequence.
+
+  If p is null, realloc is equivalent to malloc.
+
+  If space is not available, realloc returns null, errno is set (if on
+  ANSI) and p is NOT freed.
+
+  if n is for fewer bytes than already held by p, the newly unused
+  space is lopped off and freed if possible.  realloc with a size
+  argument of zero (re)allocates a minimum-sized chunk.
+
+  The old unix realloc convention of allowing the last-free'd chunk
+  to be used as an argument to realloc is not supported.
+*/
+DLMALLOC_EXPORT void* dlrealloc(void*, size_t) DLTHROW;
+
+/*
+  realloc_in_place(void* p, size_t n)
+  Resizes the space allocated for p to size n, only if this can be
+  done without moving p (i.e., only if there is adjacent space
+  available if n is greater than p's current allocated size, or n is
+  less than or equal to p's size). This may be used instead of plain
+  realloc if an alternative allocation strategy is needed upon failure
+  to expand space; for example, reallocation of a buffer that must be
+  memory-aligned or cleared. You can use realloc_in_place to trigger
+  these alternatives only when needed.
+
+  Returns p if successful; otherwise null.
+*/
+DLMALLOC_EXPORT void* dlrealloc_in_place(void*, size_t);
+
+/*
+  memalign(size_t alignment, size_t n);
+  Returns a pointer to a newly allocated chunk of n bytes, aligned
+  in accord with the alignment argument.
+
+  The alignment argument should be a power of two. If the argument is
+  not a power of two, the nearest greater power is used.
+  8-byte alignment is guaranteed by normal malloc calls, so don't
+  bother calling memalign with an argument of 8 or less.
+
+  Overreliance on memalign is a sure way to fragment space.
+*/
+DLMALLOC_EXPORT void* dlmemalign(size_t, size_t);
+
+/*
+  int posix_memalign(void** pp, size_t alignment, size_t n);
+  Allocates a chunk of n bytes, aligned in accord with the alignment
+  argument. Differs from memalign only in that it (1) assigns the
+  allocated memory to *pp rather than returning it, (2) fails and
+  returns EINVAL if the alignment is not a power of two (3) fails and
+  returns ENOMEM if memory cannot be allocated.
+*/
+DLMALLOC_EXPORT int dlposix_memalign(void**, size_t, size_t) DLTHROW;
+
+/*
+  valloc(size_t n);
+  Equivalent to memalign(pagesize, n), where pagesize is the page
+  size of the system. If the pagesize is unknown, 4096 is used.
+*/
+DLMALLOC_EXPORT void* dlvalloc(size_t) DLTHROW;
+
+/*
+  mallopt(int parameter_number, int parameter_value)
+  Sets tunable parameters The format is to provide a
+  (parameter-number, parameter-value) pair.  mallopt then sets the
+  corresponding parameter to the argument value if it can (i.e., so
+  long as the value is meaningful), and returns 1 if successful else
+  0.  To workaround the fact that mallopt is specified to use int,
+  not size_t parameters, the value -1 is specially treated as the
+  maximum unsigned size_t value.
+
+  SVID/XPG/ANSI defines four standard param numbers for mallopt,
+  normally defined in malloc.h.  None of these are use in this malloc,
+  so setting them has no effect. But this malloc also supports other
+  options in mallopt. See below for details.  Briefly, supported
+  parameters are as follows (listed defaults are for "typical"
+  configurations).
+
+  Symbol            param #  default    allowed param values
+  M_TRIM_THRESHOLD     -1   2*1024*1024   any   (-1 disables)
+  M_GRANULARITY        -2     page size   any power of 2 >= page size
+  M_MMAP_THRESHOLD     -3      256*1024   any   (or 0 if no MMAP support)
+*/
+DLMALLOC_EXPORT int dlmallopt(int, int);
+
+/*
+  malloc_footprint();
+  Returns the number of bytes obtained from the system.  The total
+  number of bytes allocated by malloc, realloc etc., is less than this
+  value. Unlike mallinfo, this function returns only a precomputed
+  result, so can be called frequently to monitor memory consumption.
+  Even if locks are otherwise defined, this function does not use them,
+  so results might not be up to date.
+*/
+DLMALLOC_EXPORT size_t dlmalloc_footprint(void);
+
+/*
+  malloc_max_footprint();
+  Returns the maximum number of bytes obtained from the system. This
+  value will be greater than current footprint if deallocated space
+  has been reclaimed by the system. The peak number of bytes allocated
+  by malloc, realloc etc., is less than this value. Unlike mallinfo,
+  this function returns only a precomputed result, so can be called
+  frequently to monitor memory consumption.  Even if locks are
+  otherwise defined, this function does not use them, so results might
+  not be up to date.
+*/
+DLMALLOC_EXPORT size_t dlmalloc_max_footprint(void);
+
+/*
+  malloc_footprint_limit();
+  Returns the number of bytes that the heap is allowed to obtain from
+  the system, returning the last value returned by
+  malloc_set_footprint_limit, or the maximum size_t value if
+  never set. The returned value reflects a permission. There is no
+  guarantee that this number of bytes can actually be obtained from
+  the system.
+*/
+DLMALLOC_EXPORT size_t dlmalloc_footprint_limit();
+
+/*
+  malloc_set_footprint_limit();
+  Sets the maximum number of bytes to obtain from the system, causing
+  failure returns from malloc and related functions upon attempts to
+  exceed this value. The argument value may be subject to page
+  rounding to an enforceable limit; this actual value is returned.
+  Using an argument of the maximum possible size_t effectively
+  disables checks. If the argument is less than or equal to the
+  current malloc_footprint, then all future allocations that require
+  additional system memory will fail. However, invocation cannot
+  retroactively deallocate existing used memory.
+*/
+DLMALLOC_EXPORT size_t dlmalloc_set_footprint_limit(size_t bytes);
+
+#if MALLOC_INSPECT_ALL
+/*
+  malloc_inspect_all(void(*handler)(void *start,
+                                    void *end,
+                                    size_t used_bytes,
+                                    void* callback_arg),
+                      void* arg);
+  Traverses the heap and calls the given handler for each managed
+  region, skipping all bytes that are (or may be) used for bookkeeping
+  purposes.  Traversal does not include include chunks that have been
+  directly memory mapped. Each reported region begins at the start
+  address, and continues up to but not including the end address.  The
+  first used_bytes of the region contain allocated data. If
+  used_bytes is zero, the region is unallocated. The handler is
+  invoked with the given callback argument. If locks are defined, they
+  are held during the entire traversal. It is a bad idea to invoke
+  other malloc functions from within the handler.
+
+  For example, to count the number of in-use chunks with size greater
+  than 1000, you could write:
+  static int count = 0;
+  void count_chunks(void* start, void* end, size_t used, void* arg) {
+    if (used >= 1000) ++count;
+  }
+  then:
+    malloc_inspect_all(count_chunks, NULL);
+
+  malloc_inspect_all is compiled only if MALLOC_INSPECT_ALL is defined.
+*/
+DLMALLOC_EXPORT void dlmalloc_inspect_all(void(*handler)(void*, void *, size_t, void*),
+                           void* arg);
+
+#endif /* MALLOC_INSPECT_ALL */
+
+#if !NO_MALLINFO
+/*
+  mallinfo()
+  Returns (by copy) a struct containing various summary statistics:
+
+  arena:     current total non-mmapped bytes allocated from system
+  ordblks:   the number of free chunks
+  smblks:    always zero.
+  hblks:     current number of mmapped regions
+  hblkhd:    total bytes held in mmapped regions
+  usmblks:   the maximum total allocated space. This will be greater
+                than current total if trimming has occurred.
+  fsmblks:   always zero
+  uordblks:  current total allocated space (normal or mmapped)
+  fordblks:  total free space
+  keepcost:  the maximum number of bytes that could ideally be released
+               back to system via malloc_trim. ("ideally" means that
+               it ignores page restrictions etc.)
+
+  Because these fields are ints, but internal bookkeeping may
+  be kept as longs, the reported values may wrap around zero and
+  thus be inaccurate.
+*/
+DLMALLOC_EXPORT struct mallinfo dlmallinfo(void);
+#endif /* NO_MALLINFO */
+
+/*
+  independent_calloc(size_t n_elements, size_t element_size, void* chunks[]);
+
+  independent_calloc is similar to calloc, but instead of returning a
+  single cleared space, it returns an array of pointers to n_elements
+  independent elements that can hold contents of size elem_size, each
+  of which starts out cleared, and can be independently freed,
+  realloc'ed etc. The elements are guaranteed to be adjacently
+  allocated (this is not guaranteed to occur with multiple callocs or
+  mallocs), which may also improve cache locality in some
+  applications.
+
+  The "chunks" argument is optional (i.e., may be null, which is
+  probably the most typical usage). If it is null, the returned array
+  is itself dynamically allocated and should also be freed when it is
+  no longer needed. Otherwise, the chunks array must be of at least
+  n_elements in length. It is filled in with the pointers to the
+  chunks.
+
+  In either case, independent_calloc returns this pointer array, or
+  null if the allocation failed.  If n_elements is zero and "chunks"
+  is null, it returns a chunk representing an array with zero elements
+  (which should be freed if not wanted).
+
+  Each element must be freed when it is no longer needed. This can be
+  done all at once using bulk_free.
+
+  independent_calloc simplifies and speeds up implementations of many
+  kinds of pools.  It may also be useful when constructing large data
+  structures that initially have a fixed number of fixed-sized nodes,
+  but the number is not known at compile time, and some of the nodes
+  may later need to be freed. For example:
+
+  struct Node { int item; struct Node* next; };
+
+  struct Node* build_list() {
+    struct Node** pool;
+    int n = read_number_of_nodes_needed();
+    if (n <= 0) return 0;
+    pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0);
+    if (pool == 0) die();
+    // organize into a linked list...
+    struct Node* first = pool[0];
+    for (i = 0; i < n-1; ++i)
+      pool[i]->next = pool[i+1];
+    free(pool);     // Can now free the array (or not, if it is needed later)
+    return first;
+  }
+*/
+DLMALLOC_EXPORT void** dlindependent_calloc(size_t, size_t, void**);
+
+/*
+  independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]);
+
+  independent_comalloc allocates, all at once, a set of n_elements
+  chunks with sizes indicated in the "sizes" array.    It returns
+  an array of pointers to these elements, each of which can be
+  independently freed, realloc'ed etc. The elements are guaranteed to
+  be adjacently allocated (this is not guaranteed to occur with
+  multiple callocs or mallocs), which may also improve cache locality
+  in some applications.
+
+  The "chunks" argument is optional (i.e., may be null). If it is null
+  the returned array is itself dynamically allocated and should also
+  be freed when it is no longer needed. Otherwise, the chunks array
+  must be of at least n_elements in length. It is filled in with the
+  pointers to the chunks.
+
+  In either case, independent_comalloc returns this pointer array, or
+  null if the allocation failed.  If n_elements is zero and chunks is
+  null, it returns a chunk representing an array with zero elements
+  (which should be freed if not wanted).
+
+  Each element must be freed when it is no longer needed. This can be
+  done all at once using bulk_free.
+
+  independent_comallac differs from independent_calloc in that each
+  element may have a different size, and also that it does not
+  automatically clear elements.
+
+  independent_comalloc can be used to speed up allocation in cases
+  where several structs or objects must always be allocated at the
+  same time.  For example:
+
+  struct Head { ... }
+  struct Foot { ... }
+
+  void send_message(char* msg) {
+    int msglen = strlen(msg);
+    size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) };
+    void* chunks[3];
+    if (independent_comalloc(3, sizes, chunks) == 0)
+      die();
+    struct Head* head = (struct Head*)(chunks[0]);
+    char*        body = (char*)(chunks[1]);
+    struct Foot* foot = (struct Foot*)(chunks[2]);
+    // ...
+  }
+
+  In general though, independent_comalloc is worth using only for
+  larger values of n_elements. For small values, you probably won't
+  detect enough difference from series of malloc calls to bother.
+
+  Overuse of independent_comalloc can increase overall memory usage,
+  since it cannot reuse existing noncontiguous small chunks that
+  might be available for some of the elements.
+*/
+DLMALLOC_EXPORT void** dlindependent_comalloc(size_t, size_t*, void**);
+
+/*
+  bulk_free(void* array[], size_t n_elements)
+  Frees and clears (sets to null) each non-null pointer in the given
+  array.  This is likely to be faster than freeing them one-by-one.
+  If footers are used, pointers that have been allocated in different
+  mspaces are not freed or cleared, and the count of all such pointers
+  is returned.  For large arrays of pointers with poor locality, it
+  may be worthwhile to sort this array before calling bulk_free.
+*/
+DLMALLOC_EXPORT size_t  dlbulk_free(void**, size_t n_elements);
+
+/*
+  pvalloc(size_t n);
+  Equivalent to valloc(minimum-page-that-holds(n)), that is,
+  round up n to nearest pagesize.
+ */
+DLMALLOC_EXPORT void*  dlpvalloc(size_t);
+
+/*
+  malloc_trim(size_t pad);
+
+  If possible, gives memory back to the system (via negative arguments
+  to sbrk) if there is unused memory at the `high' end of the malloc
+  pool or in unused MMAP segments. You can call this after freeing
+  large blocks of memory to potentially reduce the system-level memory
+  requirements of a program. However, it cannot guarantee to reduce
+  memory. Under some allocation patterns, some large free blocks of
+  memory will be locked between two used chunks, so they cannot be
+  given back to the system.
+
+  The `pad' argument to malloc_trim represents the amount of free
+  trailing space to leave untrimmed. If this argument is zero, only
+  the minimum amount of memory to maintain internal data structures
+  will be left. Non-zero arguments can be supplied to maintain enough
+  trailing space to service future expected allocations without having
+  to re-obtain memory from the system.
+
+  Malloc_trim returns 1 if it actually released any memory, else 0.
+*/
+DLMALLOC_EXPORT int  dlmalloc_trim(size_t);
+
+/*
+  malloc_stats();
+  Prints on stderr the amount of space obtained from the system (both
+  via sbrk and mmap), the maximum amount (which may be more than
+  current if malloc_trim and/or munmap got called), and the current
+  number of bytes allocated via malloc (or realloc, etc) but not yet
+  freed. Note that this is the number of bytes allocated, not the
+  number requested. It will be larger than the number requested
+  because of alignment and bookkeeping overhead. Because it includes
+  alignment wastage as being in use, this figure may be greater than
+  zero even when no user-level chunks are allocated.
+
+  The reported current and maximum system memory can be inaccurate if
+  a program makes other calls to system memory allocation functions
+  (normally sbrk) outside of malloc.
+
+  malloc_stats prints only the most commonly interesting statistics.
+  More information can be obtained by calling mallinfo.
+*/
+DLMALLOC_EXPORT void  dlmalloc_stats(void);
+
+/*
+  malloc_usable_size(void* p);
+
+  Returns the number of bytes you can actually use in
+  an allocated chunk, which may be more than you requested (although
+  often not) due to alignment and minimum size constraints.
+  You can use this many bytes without worrying about
+  overwriting other allocated objects. This is not a particularly great
+  programming practice. malloc_usable_size can be more useful in
+  debugging and assertions, for example:
+
+  p = malloc(n);
+  assert(malloc_usable_size(p) >= 256);
+*/
+size_t dlmalloc_usable_size(void*);
+
+#endif /* ONLY_MSPACES */
+
+#if MSPACES
+
+/*
+  mspace is an opaque type representing an independent
+  region of space that supports mspace_malloc, etc.
+*/
+typedef void* mspace;
+
+/*
+  create_mspace creates and returns a new independent space with the
+  given initial capacity, or, if 0, the default granularity size.  It
+  returns null if there is no system memory available to create the
+  space.  If argument locked is non-zero, the space uses a separate
+  lock to control access. The capacity of the space will grow
+  dynamically as needed to service mspace_malloc requests.  You can
+  control the sizes of incremental increases of this space by
+  compiling with a different DEFAULT_GRANULARITY or dynamically
+  setting with mallopt(M_GRANULARITY, value).
+*/
+DLMALLOC_EXPORT mspace create_mspace(size_t capacity, int locked);
+
+/*
+  destroy_mspace destroys the given space, and attempts to return all
+  of its memory back to the system, returning the total number of
+  bytes freed. After destruction, the results of access to all memory
+  used by the space become undefined.
+*/
+DLMALLOC_EXPORT size_t destroy_mspace(mspace msp);
+
+/*
+  create_mspace_with_base uses the memory supplied as the initial base
+  of a new mspace. Part (less than 128*sizeof(size_t) bytes) of this
+  space is used for bookkeeping, so the capacity must be at least this
+  large. (Otherwise 0 is returned.) When this initial space is
+  exhausted, additional memory will be obtained from the system.
+  Destroying this space will deallocate all additionally allocated
+  space (if possible) but not the initial base.
+*/
+DLMALLOC_EXPORT mspace create_mspace_with_base(void* base, size_t capacity, int locked);
+
+/*
+  mspace_track_large_chunks controls whether requests for large chunks
+  are allocated in their own untracked mmapped regions, separate from
+  others in this mspace. By default large chunks are not tracked,
+  which reduces fragmentation. However, such chunks are not
+  necessarily released to the system upon destroy_mspace.  Enabling
+  tracking by setting to true may increase fragmentation, but avoids
+  leakage when relying on destroy_mspace to release all memory
+  allocated using this space.  The function returns the previous
+  setting.
+*/
+DLMALLOC_EXPORT int mspace_track_large_chunks(mspace msp, int enable);
+
+
+/*
+  mspace_malloc behaves as malloc, but operates within
+  the given space.
+*/
+DLMALLOC_EXPORT void* mspace_malloc(mspace msp, size_t bytes);
+
+/*
+  mspace_free behaves as free, but operates within
+  the given space.
+
+  If compiled with FOOTERS==1, mspace_free is not actually needed.
+  free may be called instead of mspace_free because freed chunks from
+  any space are handled by their originating spaces.
+*/
+DLMALLOC_EXPORT void mspace_free(mspace msp, void* mem);
+
+/*
+  mspace_realloc behaves as realloc, but operates within
+  the given space.
+
+  If compiled with FOOTERS==1, mspace_realloc is not actually
+  needed.  realloc may be called instead of mspace_realloc because
+  realloced chunks from any space are handled by their originating
+  spaces.
+*/
+DLMALLOC_EXPORT void* mspace_realloc(mspace msp, void* mem, size_t newsize);
+
+/*
+  mspace_calloc behaves as calloc, but operates within
+  the given space.
+*/
+DLMALLOC_EXPORT void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size);
+
+/*
+  mspace_memalign behaves as memalign, but operates within
+  the given space.
+*/
+DLMALLOC_EXPORT void* mspace_memalign(mspace msp, size_t alignment, size_t bytes);
+
+/*
+  mspace_independent_calloc behaves as independent_calloc, but
+  operates within the given space.
+*/
+DLMALLOC_EXPORT void** mspace_independent_calloc(mspace msp, size_t n_elements,
+                                 size_t elem_size, void* chunks[]);
+
+/*
+  mspace_independent_comalloc behaves as independent_comalloc, but
+  operates within the given space.
+*/
+DLMALLOC_EXPORT void** mspace_independent_comalloc(mspace msp, size_t n_elements,
+                                   size_t sizes[], void* chunks[]);
+
+/*
+  mspace_footprint() returns the number of bytes obtained from the
+  system for this space.
+*/
+DLMALLOC_EXPORT size_t mspace_footprint(mspace msp);
+
+/*
+  mspace_max_footprint() returns the peak number of bytes obtained from the
+  system for this space.
+*/
+DLMALLOC_EXPORT size_t mspace_max_footprint(mspace msp);
+
+
+#if !NO_MALLINFO
+/*
+  mspace_mallinfo behaves as mallinfo, but reports properties of
+  the given space.
+*/
+DLMALLOC_EXPORT struct mallinfo mspace_mallinfo(mspace msp);
+#endif /* NO_MALLINFO */
+
+/*
+  malloc_usable_size(void* p) behaves the same as malloc_usable_size;
+*/
+DLMALLOC_EXPORT size_t mspace_usable_size(const void* mem);
+
+/*
+  mspace_malloc_stats behaves as malloc_stats, but reports
+  properties of the given space.
+*/
+DLMALLOC_EXPORT void mspace_malloc_stats(mspace msp);
+
+/*
+  mspace_trim behaves as malloc_trim, but
+  operates within the given space.
+*/
+DLMALLOC_EXPORT int mspace_trim(mspace msp, size_t pad);
+
+/*
+  An alias for mallopt.
+*/
+DLMALLOC_EXPORT int mspace_mallopt(int, int);
+
+#endif /* MSPACES */
+
+}  /* end of extern "C" */
+
+/*
+  ========================================================================
+  To make a fully customizable malloc.h header file, cut everything
+  above this line, put into file malloc.h, edit to suit, and #include it
+  on the next line, as well as in programs that use this malloc.
+  ========================================================================
+*/
+
+/* #include "malloc.h" */
+
+/*------------------------------ internal #includes ---------------------- */
+
+#ifdef _MSC_VER
+#pragma warning( disable : 4146 ) /* no "unsigned" warnings */
+#endif /* _MSC_VER */
+#if !NO_MALLOC_STATS
+#include <stdio.h>       /* for printing in malloc_stats */
+#endif /* NO_MALLOC_STATS */
+#ifndef LACKS_ERRNO_H
+#include <errno.h>       /* for MALLOC_FAILURE_ACTION */
+#endif /* LACKS_ERRNO_H */
+#ifdef DEBUG
+    #if ABORT_ON_ASSERT_FAILURE
+        #undef assert
+        #define assert(x) if(!(x)) ABORT
+    #else /* ABORT_ON_ASSERT_FAILURE */
+        #include <assert.h>
+    #endif /* ABORT_ON_ASSERT_FAILURE */
+#else  /* DEBUG */
+    #ifndef assert
+        #define assert(x)
+    #endif
+    #define DEBUG 0
+#endif /* DEBUG */
+
+#if !defined(WIN32) && !defined(LACKS_TIME_H)
+#include <time.h>        /* for magic initialization */
+#endif /* WIN32 */
+#ifndef LACKS_STDLIB_H
+#include <stdlib.h>      /* for abort() */
+#endif /* LACKS_STDLIB_H */
+#ifndef LACKS_STRING_H
+#include <string.h>      /* for memset etc */
+#endif  /* LACKS_STRING_H */
+#if USE_BUILTIN_FFS
+#ifndef LACKS_STRINGS_H
+#include <strings.h>     /* for ffs */
+#endif /* LACKS_STRINGS_H */
+#endif /* USE_BUILTIN_FFS */
+#if HAVE_MMAP
+#ifndef LACKS_SYS_MMAN_H
+/* On some versions of linux, mremap decl in mman.h needs __USE_GNU set */
+#if (defined(linux) && !defined(__USE_GNU))
+#define __USE_GNU 1
+#include <sys/mman.h>    /* for mmap */
+#undef __USE_GNU
+#else
+#include <sys/mman.h>    /* for mmap */
+#endif /* linux */
+#endif /* LACKS_SYS_MMAN_H */
+#ifndef LACKS_FCNTL_H
+#include <fcntl.h>
+#endif /* LACKS_FCNTL_H */
+#endif /* HAVE_MMAP */
+#ifndef LACKS_UNISTD_H
+#include <unistd.h>     /* for sbrk, sysconf */
+#else /* LACKS_UNISTD_H */
+#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__)
+extern void*     sbrk(ptrdiff_t);
+#endif /* FreeBSD etc */
+#endif /* LACKS_UNISTD_H */
+
+/* Declarations for locking */
+#if USE_LOCKS
+#ifndef WIN32
+#if defined (__SVR4) && defined (__sun)  /* solaris */
+#include <thread.h>
+#elif !defined(LACKS_SCHED_H)
+#include <sched.h>
+#endif /* solaris or LACKS_SCHED_H */
+#if (defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0) || !USE_SPIN_LOCKS
+#include <pthread.h>
+#endif /* USE_RECURSIVE_LOCKS ... */
+#elif defined(_MSC_VER)
+#ifndef _M_AMD64
+/* These are already defined on AMD64 builds */
+extern "C" {
+    LONG __cdecl _InterlockedCompareExchange(LONG volatile *Dest, LONG Exchange, LONG Comp);
+    LONG __cdecl _InterlockedExchange(LONG volatile *Target, LONG Value);
+}
+#endif /* _M_AMD64 */
+#pragma intrinsic (_InterlockedCompareExchange)
+#pragma intrinsic (_InterlockedExchange)
+#define interlockedcompareexchange _InterlockedCompareExchange
+#define interlockedexchange _InterlockedExchange
+#elif defined(WIN32) && defined(__GNUC__)
+#define interlockedcompareexchange(a, b, c) __sync_val_compare_and_swap(a, c, b)
+#define interlockedexchange __sync_lock_test_and_set
+#endif /* Win32 */
+#else /* USE_LOCKS */
+#endif /* USE_LOCKS */
+
+#ifndef LOCK_AT_FORK
+    #define LOCK_AT_FORK 0
+#endif
+
+/* Declarations for bit scanning on win32 */
+#if defined(_MSC_VER) && _MSC_VER>=1300
+#ifndef BitScanForward /* Try to avoid pulling in WinNT.h */
+extern "C" {
+    unsigned char _BitScanForward(unsigned long *index, unsigned long mask);
+    unsigned char _BitScanReverse(unsigned long *index, unsigned long mask);
+}
+
+#define BitScanForward _BitScanForward
+#define BitScanReverse _BitScanReverse
+#pragma intrinsic(_BitScanForward)
+#pragma intrinsic(_BitScanReverse)
+#endif /* BitScanForward */
+#endif /* defined(_MSC_VER) && _MSC_VER>=1300 */
+
+#ifndef WIN32
+#ifndef malloc_getpagesize
+#  ifdef _SC_PAGESIZE         /* some SVR4 systems omit an underscore */
+#    ifndef _SC_PAGE_SIZE
+#      define _SC_PAGE_SIZE _SC_PAGESIZE
+#    endif
+#  endif
+#  ifdef _SC_PAGE_SIZE
+#    define malloc_getpagesize sysconf(_SC_PAGE_SIZE)
+#  else
+#    if defined(BSD) || defined(DGUX) || defined(HAVE_GETPAGESIZE)
+       extern size_t getpagesize();
+#      define malloc_getpagesize getpagesize()
+#    else
+#      ifdef WIN32 /* use supplied emulation of getpagesize */
+#        define malloc_getpagesize getpagesize()
+#      else
+#        ifndef LACKS_SYS_PARAM_H
+#          include <sys/param.h>
+#        endif
+#        ifdef EXEC_PAGESIZE
+#          define malloc_getpagesize EXEC_PAGESIZE
+#        else
+#          ifdef NBPG
+#            ifndef CLSIZE
+#              define malloc_getpagesize NBPG
+#            else
+#              define malloc_getpagesize (NBPG * CLSIZE)
+#            endif
+#          else
+#            ifdef NBPC
+#              define malloc_getpagesize NBPC
+#            else
+#              ifdef PAGESIZE
+#                define malloc_getpagesize PAGESIZE
+#              else /* just guess */
+#                define malloc_getpagesize ((size_t)4096U)
+#              endif
+#            endif
+#          endif
+#        endif
+#      endif
+#    endif
+#  endif
+#endif
+#endif
+
+/* ------------------- size_t and alignment properties -------------------- */
+
+/* The byte and bit size of a size_t */
+#define SIZE_T_BITSIZE      (sizeof(size_t) << 3)
+
+/* Some constants coerced to size_t */
+/* Annoying but necessary to avoid errors on some platforms */
+#define HALF_MAX_SIZE_T     (MAX_SIZE_T / 2U)
+
+// The bit mask value corresponding to MALLOC_ALIGNMENT
+#define CHUNK_ALIGN_MASK    (MALLOC_ALIGNMENT - 1)
+
+// True if address a has acceptable alignment
+bool is_aligned(void *p) { return ((size_t)p & CHUNK_ALIGN_MASK) == 0; }
+
+// the number of bytes to offset an address to align it
+size_t align_offset(void *p)
+{
+    return (((size_t)p & CHUNK_ALIGN_MASK) == 0) ? 0 :
+        ((MALLOC_ALIGNMENT - ((size_t)p & CHUNK_ALIGN_MASK)) & CHUNK_ALIGN_MASK);
+}
+
+/* -------------------------- MMAP preliminaries ------------------------- */
+
+/*
+   If HAVE_MORECORE or HAVE_MMAP are false, we just define calls and
+   checks to fail so compiler optimizer can delete code rather than
+   using so many "#if"s.
+*/
+
+
+/* MORECORE and MMAP must return MFAIL on failure */
+#define MFAIL                ((void*)(MAX_SIZE_T))
+#define CMFAIL               ((char*)(MFAIL)) /* defined for convenience */
+
+#if HAVE_MMAP
+
+#ifndef WIN32
+#define MUNMAP_DEFAULT(a, s)  munmap((a), (s))
+#define MMAP_PROT            (PROT_READ | PROT_WRITE)
+#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
+#define MAP_ANONYMOUS        MAP_ANON
+#endif /* MAP_ANON */
+#ifdef MAP_ANONYMOUS
+#define MMAP_FLAGS           (MAP_PRIVATE | MAP_ANONYMOUS)
+#define MMAP_DEFAULT(s)       mmap(0, (s), MMAP_PROT, MMAP_FLAGS, -1, 0)
+#else /* MAP_ANONYMOUS */
+/*
+   Nearly all versions of mmap support MAP_ANONYMOUS, so the following
+   is unlikely to be needed, but is supplied just in case.
+*/
+#define MMAP_FLAGS           (MAP_PRIVATE)
+static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */
+void MMAP_DEFAULT(size_t s) {
+    if (dev_zero_fd < 0)
+        dev_zero_fd = open("/dev/zero", O_RDWR);
+    mmap(0, s, MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0);
+}
+
+#endif /* MAP_ANONYMOUS */
+
+#define DIRECT_MMAP_DEFAULT(s) MMAP_DEFAULT(s)
+
+#else /* WIN32 */
+
+/* Win32 MMAP via VirtualAlloc */
+static FORCEINLINE void* win32mmap(size_t size) {
+    void* ptr = VirtualAlloc(0, size, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
+    return (ptr != 0)? ptr: MFAIL;
+}
+
+/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
+static FORCEINLINE void* win32direct_mmap(size_t size) {
+    void* ptr = VirtualAlloc(0, size, MEM_RESERVE | MEM_COMMIT | MEM_TOP_DOWN,
+                             PAGE_READWRITE);
+    return (ptr != 0)? ptr: MFAIL;
+}
+
+/* This function supports releasing coalesed segments */
+static FORCEINLINE int win32munmap(void* ptr, size_t size) {
+    MEMORY_BASIC_INFORMATION minfo;
+    char* cptr = (char*)ptr;
+    while (size) {
+        if (VirtualQuery(cptr, &minfo, sizeof(minfo)) == 0)
+            return -1;
+        if (minfo.BaseAddress != cptr || minfo.AllocationBase != cptr ||
+            minfo.State != MEM_COMMIT || minfo.RegionSize > size)
+            return -1;
+        if (VirtualFree(cptr, 0, MEM_RELEASE) == 0)
+            return -1;
+        cptr += minfo.RegionSize;
+        size -= minfo.RegionSize;
+    }
+    return 0;
+}
+
+#define MMAP_DEFAULT(s)             win32mmap(s)
+#define MUNMAP_DEFAULT(a, s)        win32munmap((a), (s))
+#define DIRECT_MMAP_DEFAULT(s)      win32direct_mmap(s)
+#endif /* WIN32 */
+#endif /* HAVE_MMAP */
+
+#if HAVE_MREMAP
+#ifndef WIN32
+#define MREMAP_DEFAULT(addr, osz, nsz, mv) mremap((addr), (osz), (nsz), (mv))
+#endif /* WIN32 */
+#endif /* HAVE_MREMAP */
+
+/**
+ * Define CALL_MORECORE
+ */
+#if HAVE_MORECORE
+    #ifdef MORECORE
+        #define CALL_MORECORE(S)    MORECORE(S)
+    #else  /* MORECORE */
+        #define CALL_MORECORE(S)    MORECORE_DEFAULT(S)
+    #endif /* MORECORE */
+#else  /* HAVE_MORECORE */
+    #define CALL_MORECORE(S)        MFAIL
+#endif /* HAVE_MORECORE */
+
+/**
+ * Define CALL_MMAP/CALL_MUNMAP/CALL_DIRECT_MMAP
+ */
+#if HAVE_MMAP
+    #define USE_MMAP_BIT            1
+
+    #ifdef MMAP
+        #define CALL_MMAP(s)        MMAP(s)
+    #else /* MMAP */
+        #define CALL_MMAP(s)        MMAP_DEFAULT(s)
+    #endif /* MMAP */
+    #ifdef MUNMAP
+        #define CALL_MUNMAP(a, s)   MUNMAP((a), (s))
+    #else /* MUNMAP */
+        #define CALL_MUNMAP(a, s)   MUNMAP_DEFAULT((a), (s))
+    #endif /* MUNMAP */
+    #ifdef DIRECT_MMAP
+        #define CALL_DIRECT_MMAP(s) DIRECT_MMAP(s)
+    #else /* DIRECT_MMAP */
+        #define CALL_DIRECT_MMAP(s) DIRECT_MMAP_DEFAULT(s)
+    #endif /* DIRECT_MMAP */
+#else  /* HAVE_MMAP */
+    #define USE_MMAP_BIT            0
+
+    #define MMAP(s)                 MFAIL
+    #define MUNMAP(a, s)            (-1)
+    #define DIRECT_MMAP(s)          MFAIL
+    #define CALL_DIRECT_MMAP(s)     DIRECT_MMAP(s)
+    #define CALL_MMAP(s)            MMAP(s)
+    #define CALL_MUNMAP(a, s)       MUNMAP((a), (s))
+#endif /* HAVE_MMAP */
+
+/**
+ * Define CALL_MREMAP
+ */
+#if HAVE_MMAP && HAVE_MREMAP
+    #ifdef MREMAP
+        #define CALL_MREMAP(addr, osz, nsz, mv) MREMAP((addr), (osz), (nsz), (mv))
+    #else /* MREMAP */
+        #define CALL_MREMAP(addr, osz, nsz, mv) MREMAP_DEFAULT((addr), (osz), (nsz), (mv))
+    #endif /* MREMAP */
+#else  /* HAVE_MMAP && HAVE_MREMAP */
+    #define CALL_MREMAP(addr, osz, nsz, mv)     MFAIL
+#endif /* HAVE_MMAP && HAVE_MREMAP */
+
+/* mstate bit set if continguous morecore disabled or failed */
+#define USE_NONCONTIGUOUS_BIT (4U)
+
+/* segment bit set in create_mspace_with_base */
+#define EXTERN_BIT            (8U)
+
+
+/* --------------------------- Lock preliminaries ------------------------ */
+
+/*
+  When locks are defined, there is one global lock, plus
+  one per-mspace lock.
+
+  The global lock_ensures that mparams.magic and other unique
+  mparams values are initialized only once. It also protects
+  sequences of calls to MORECORE.  In many cases sys_alloc requires
+  two calls, that should not be interleaved with calls by other
+  threads.  This does not protect against direct calls to MORECORE
+  by other threads not using this lock, so there is still code to
+  cope the best we can on interference.
+
+  Per-mspace locks surround calls to malloc, free, etc.
+  By default, locks are simple non-reentrant mutexes.
+
+  Because lock-protected regions generally have bounded times, it is
+  OK to use the supplied simple spinlocks. Spinlocks are likely to
+  improve performance for lightly contended applications, but worsen
+  performance under heavy contention.
+
+  If USE_LOCKS is > 1, the definitions of lock routines here are
+  bypassed, in which case you will need to define the type MLOCK_T,
+  and at least INITIAL_LOCK, DESTROY_LOCK, ACQUIRE_LOCK, RELEASE_LOCK
+  and TRY_LOCK.  You must also declare a
+    static MLOCK_T malloc_global_mutex = { initialization values };.
+
+*/
+
+#if !USE_LOCKS
+#define USE_LOCK_BIT               (0U)
+#define INITIAL_LOCK(l)            (0)
+#define DESTROY_LOCK(l)            (0)
+#define ACQUIRE_MALLOC_GLOBAL_LOCK()
+#define RELEASE_MALLOC_GLOBAL_LOCK()
+
+#else
+#if USE_LOCKS > 1
+/* -----------------------  User-defined locks ------------------------ */
+/* Define your own lock implementation here */
+/* #define INITIAL_LOCK(lk)  ... */
+/* #define DESTROY_LOCK(lk)  ... */
+/* #define ACQUIRE_LOCK(lk)  ... */
+/* #define RELEASE_LOCK(lk)  ... */
+/* #define TRY_LOCK(lk) ... */
+/* static MLOCK_T malloc_global_mutex = ... */
+
+#elif USE_SPIN_LOCKS
+
+/* First, define CAS_LOCK and CLEAR_LOCK on ints */
+/* Note CAS_LOCK defined to return 0 on success */
+
+#if defined(__GNUC__)&& (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1))
+#define CAS_LOCK(sl)     __sync_lock_test_and_set(sl, 1)
+#define CLEAR_LOCK(sl)   __sync_lock_release(sl)
+
+#elif (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)))
+/* Custom spin locks for older gcc on x86 */
+static FORCEINLINE int x86_cas_lock(int *sl) {
+  int ret;
+  int val = 1;
+  int cmp = 0;
+  __asm__ __volatile__  ("lock; cmpxchgl %1, %2"
+                         : "=a" (ret)
+                         : "r" (val), "m" (*(sl)), "0"(cmp)
+                         : "memory", "cc");
+  return ret;
+}
+
+static FORCEINLINE void x86_clear_lock(int* sl) {
+  assert(*sl != 0);
+  int prev = 0;
+  int ret;
+  __asm__ __volatile__ ("lock; xchgl %0, %1"
+                        : "=r" (ret)
+                        : "m" (*(sl)), "0"(prev)
+                        : "memory");
+}
+
+#define CAS_LOCK(sl)     x86_cas_lock(sl)
+#define CLEAR_LOCK(sl)   x86_clear_lock(sl)
+
+#else /* Win32 MSC */
+#define CAS_LOCK(sl)     interlockedexchange(sl, 1)
+#define CLEAR_LOCK(sl)   interlockedexchange (sl, 0)
+
+#endif /* ... gcc spins locks ... */
+
+/* How to yield for a spin lock */
+#define SPINS_PER_YIELD       63
+#if defined(_MSC_VER)
+#define SLEEP_EX_DURATION     50 /* delay for yield/sleep */
+#define SPIN_LOCK_YIELD  SleepEx(SLEEP_EX_DURATION, FALSE)
+#elif defined (__SVR4) && defined (__sun) /* solaris */
+#define SPIN_LOCK_YIELD   thr_yield();
+#elif !defined(LACKS_SCHED_H)
+#define SPIN_LOCK_YIELD   sched_yield();
+#else
+#define SPIN_LOCK_YIELD
+#endif /* ... yield ... */
+
+#if !defined(USE_RECURSIVE_LOCKS) || USE_RECURSIVE_LOCKS == 0
+/* Plain spin locks use single word (embedded in malloc_states) */
+static int spin_acquire_lock(unsigned *sl) {
+    int spins = 0;
+    while (*(volatile unsigned *)sl != 0 || CAS_LOCK(sl)) {
+        if ((++spins & SPINS_PER_YIELD) == 0) {
+            SPIN_LOCK_YIELD;
+        }
+    }
+    return 0;
+}
+
+#define MLOCK_T               unsigned
+#define TRY_LOCK(sl)          !CAS_LOCK(sl)
+#define RELEASE_LOCK(sl)      CLEAR_LOCK(sl)
+#define ACQUIRE_LOCK(sl)      (CAS_LOCK(sl)? spin_acquire_lock(sl) : 0)
+#define INITIAL_LOCK(sl)      (*sl = 0)
+#define DESTROY_LOCK(sl)      (0)
+static MLOCK_T malloc_global_mutex = 0;
+
+#else /* USE_RECURSIVE_LOCKS */
+/* types for lock owners */
+#ifdef WIN32
+#define THREAD_ID_T           DWORD
+#define CURRENT_THREAD        GetCurrentThreadId()
+#define EQ_OWNER(X,Y)         ((X) == (Y))
+#else
+/*
+  Note: the following assume that pthread_t is a type that can be
+  initialized to (casted) zero. If this is not the case, you will need to
+  somehow redefine these or not use spin locks.
+*/
+#define THREAD_ID_T           pthread_t
+#define CURRENT_THREAD        pthread_self()
+#define EQ_OWNER(X,Y)         pthread_equal(X, Y)
+#endif
+
+struct malloc_recursive_lock {
+    int sl;
+    unsigned int c;
+    THREAD_ID_T threadid;
+};
+
+#define MLOCK_T  struct malloc_recursive_lock
+static MLOCK_T malloc_global_mutex = { 0, 0, (THREAD_ID_T)0};
+
+static FORCEINLINE void recursive_release_lock(MLOCK_T *lk) {
+    assert(lk->sl != 0);
+    if (--lk->c == 0) {
+        CLEAR_LOCK(&lk->sl);
+    }
+}
+
+static FORCEINLINE int recursive_acquire_lock(MLOCK_T *lk) {
+    THREAD_ID_T mythreadid = CURRENT_THREAD;
+    int spins = 0;
+    for (;;) {
+        if (*((volatile int *)(&lk->sl)) == 0) {
+            if (!CAS_LOCK(&lk->sl)) {
+                lk->threadid = mythreadid;
+                lk->c = 1;
+                return 0;
+            }
+        }
+        else if (EQ_OWNER(lk->threadid, mythreadid)) {
+            ++lk->c;
+            return 0;
+        }
+        if ((++spins & SPINS_PER_YIELD) == 0) {
+            SPIN_LOCK_YIELD;
+        }
+    }
+}
+
+static FORCEINLINE int recursive_try_lock(MLOCK_T *lk) {
+    THREAD_ID_T mythreadid = CURRENT_THREAD;
+    if (*((volatile int *)(&lk->sl)) == 0) {
+        if (!CAS_LOCK(&lk->sl)) {
+            lk->threadid = mythreadid;
+            lk->c = 1;
+            return 1;
+        }
+    }
+    else if (EQ_OWNER(lk->threadid, mythreadid)) {
+        ++lk->c;
+        return 1;
+    }
+    return 0;
+}
+
+#define RELEASE_LOCK(lk)      recursive_release_lock(lk)
+#define TRY_LOCK(lk)          recursive_try_lock(lk)
+#define ACQUIRE_LOCK(lk)      recursive_acquire_lock(lk)
+#define INITIAL_LOCK(lk)      ((lk)->threadid = (THREAD_ID_T)0, (lk)->sl = 0, (lk)->c = 0)
+#define DESTROY_LOCK(lk)      (0)
+#endif /* USE_RECURSIVE_LOCKS */
+
+#elif defined(WIN32) /* Win32 critical sections */
+#define MLOCK_T               CRITICAL_SECTION
+#define ACQUIRE_LOCK(lk)      (EnterCriticalSection(lk), 0)
+#define RELEASE_LOCK(lk)      LeaveCriticalSection(lk)
+#define TRY_LOCK(lk)          TryEnterCriticalSection(lk)
+#define INITIAL_LOCK(lk)      (!InitializeCriticalSectionAndSpinCount((lk), 0x80000000 | 4000))
+#define DESTROY_LOCK(lk)      (DeleteCriticalSection(lk), 0)
+#define NEED_GLOBAL_LOCK_INIT
+
+static MLOCK_T malloc_global_mutex;
+static volatile LONG malloc_global_mutex_status;
+
+/* Use spin loop to initialize global lock */
+static void init_malloc_global_mutex() {
+  for (;;) {
+    long stat = malloc_global_mutex_status;
+    if (stat > 0)
+      return;
+    /* transition to < 0 while initializing, then to > 0) */
+    if (stat == 0 &&
+        interlockedcompareexchange(&malloc_global_mutex_status, (LONG)-1, (LONG)0) == 0) {
+      InitializeCriticalSection(&malloc_global_mutex);
+      interlockedexchange(&malloc_global_mutex_status, (LONG)1);
+      return;
+    }
+    SleepEx(0, FALSE);
+  }
+}
+
+#else /* pthreads-based locks */
+#define MLOCK_T               pthread_mutex_t
+#define ACQUIRE_LOCK(lk)      pthread_mutex_lock(lk)
+#define RELEASE_LOCK(lk)      pthread_mutex_unlock(lk)
+#define TRY_LOCK(lk)          (!pthread_mutex_trylock(lk))
+#define INITIAL_LOCK(lk)      pthread_init_lock(lk)
+#define DESTROY_LOCK(lk)      pthread_mutex_destroy(lk)
+
+#if defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0 && defined(linux) && !defined(PTHREAD_MUTEX_RECURSIVE)
+/* Cope with old-style linux recursive lock initialization by adding */
+/* skipped internal declaration from pthread.h */
+extern int pthread_mutexattr_setkind_np __P ((pthread_mutexattr_t *__attr,
+                                              int __kind));
+#define PTHREAD_MUTEX_RECURSIVE PTHREAD_MUTEX_RECURSIVE_NP
+#define pthread_mutexattr_settype(x,y) pthread_mutexattr_setkind_np(x,y)
+#endif /* USE_RECURSIVE_LOCKS ... */
+
+static MLOCK_T malloc_global_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static int pthread_init_lock (MLOCK_T *lk) {
+  pthread_mutexattr_t attr;
+  if (pthread_mutexattr_init(&attr)) return 1;
+#if defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0
+  if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE)) return 1;
+#endif
+  if (pthread_mutex_init(lk, &attr)) return 1;
+  if (pthread_mutexattr_destroy(&attr)) return 1;
+  return 0;
+}
+
+#endif /* ... lock types ... */
+
+/* Common code for all lock types */
+#define USE_LOCK_BIT               (2U)
+
+#ifndef ACQUIRE_MALLOC_GLOBAL_LOCK
+#define ACQUIRE_MALLOC_GLOBAL_LOCK()  ACQUIRE_LOCK(&malloc_global_mutex);
+#endif
+
+#ifndef RELEASE_MALLOC_GLOBAL_LOCK
+#define RELEASE_MALLOC_GLOBAL_LOCK()  RELEASE_LOCK(&malloc_global_mutex);
+#endif
+
+#endif /* USE_LOCKS */
+
+/* -----------------------  Chunk representations ------------------------ */
+
+/*
+  (The following includes lightly edited explanations by Colin Plumb.)
+
+  The malloc_chunk declaration below is misleading (but accurate and
+  necessary).  It declares a "view" into memory allowing access to
+  necessary fields at known offsets from a given base.
+
+  Chunks of memory are maintained using a `boundary tag' method as
+  originally described by Knuth.  (See the paper by Paul Wilson
+  ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a survey of such
+  techniques.)  Sizes of free chunks are stored both in the front of
+  each chunk and at the end.  This makes consolidating fragmented
+  chunks into bigger chunks fast.  The head fields also hold bits
+  representing whether chunks are free or in use.
+
+  Here are some pictures to make it clearer.  They are "exploded" to
+  show that the state of a chunk can be thought of as extending from
+  the high 31 bits of the head field of its header through the
+  prev_foot and PINUSE_BIT bit of the following chunk header.
+
+  A chunk that's in use looks like:
+
+   chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+           | Size of previous chunk (if P = 0)                             |
+           +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P|
+         | Size of this chunk                                         1| +-+
+   mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+         |                                                               |
+         +-                                                             -+
+         |                                                               |
+         +-                                                             -+
+         |                                                               :
+         +-      size - sizeof(size_t) available payload bytes          -+
+         :                                                               |
+ chunk-> +-                                                             -+
+         |                                                               |
+         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |1|
+       | Size of next chunk (may or may not be in use)               | +-+
+ mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+    And if it's free, it looks like this:
+
+   chunk-> +-                                                             -+
+           | User payload (must be in use, or we would have merged!)       |
+           +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P|
+         | Size of this chunk                                         0| +-+
+   mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+         | Next pointer                                                  |
+         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+         | Prev pointer                                                  |
+         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+         |                                                               :
+         +-      size - sizeof(struct chunk) unused bytes               -+
+         :                                                               |
+ chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+         | Size of this chunk                                            |
+         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |0|
+       | Size of next chunk (must be in use, or we would have merged)| +-+
+ mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                                                               :
+       +- User payload                                                -+
+       :                                                               |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+                                                                     |0|
+                                                                     +-+
+  Note that since we always merge adjacent free chunks, the chunks
+  adjacent to a free chunk must be in use.
+
+  Given a pointer to a chunk (which can be derived trivially from the
+  payload pointer) we can, in O(1) time, find out whether the adjacent
+  chunks are free, and if so, unlink them from the lists that they
+  are on and merge them with the current chunk.
+
+  Chunks always begin on even word boundaries, so the mem portion
+  (which is returned to the user) is also on an even word boundary, and
+  thus at least double-word aligned.
+
+  The P (PINUSE_BIT) bit, stored in the unused low-order bit of the
+  chunk size (which is always a multiple of two words), is an in-use
+  bit for the *previous* chunk.  If that bit is *clear*, then the
+  word before the current chunk size contains the previous chunk
+  size, and can be used to find the front of the previous chunk.
+  The very first chunk allocated always has this bit set, preventing
+  access to non-existent (or non-owned) memory. If pinuse is set for
+  any given chunk, then you CANNOT determine the size of the
+  previous chunk, and might even get a memory addressing fault when
+  trying to do so.
+
+  The C (CINUSE_BIT) bit, stored in the unused second-lowest bit of
+  the chunk size redundantly records whether the current chunk is
+  inuse (unless the chunk is mmapped). This redundancy enables usage
+  checks within free and realloc, and reduces indirection when freeing
+  and consolidating chunks.
+
+  Each freshly allocated chunk must have both cinuse and pinuse set.
+  That is, each allocated chunk borders either a previously allocated
+  and still in-use chunk, or the base of its memory arena. This is
+  ensured by making all allocations from the `lowest' part of any
+  found chunk.  Further, no free chunk physically borders another one,
+  so each free chunk is known to be preceded and followed by either
+  inuse chunks or the ends of memory.
+
+  Note that the `foot' of the current chunk is actually represented
+  as the prev_foot of the NEXT chunk. This makes it easier to
+  deal with alignments etc but can be very confusing when trying
+  to extend or adapt this code.
+
+  The exceptions to all this are
+
+     1. The special chunk `top' is the top-most available chunk (i.e.,
+        the one bordering the end of available memory). It is treated
+        specially.  Top is never included in any bin, is used only if
+        no other chunk is available, and is released back to the
+        system if it is very large (see M_TRIM_THRESHOLD).  In effect,
+        the top chunk is treated as larger (and thus less well
+        fitting) than any other available chunk.  The top chunk
+        doesn't update its trailing size field since there is no next
+        contiguous chunk that would have to index off it. However,
+        space is still allocated for it (TOP_FOOT_SIZE) to enable
+        separation or merging when space is extended.
+
+     3. Chunks allocated via mmap, have both cinuse and pinuse bits
+        cleared in their head fields.  Because they are allocated
+        one-by-one, each must carry its own prev_foot field, which is
+        also used to hold the offset this chunk has within its mmapped
+        region, which is needed to preserve alignment. Each mmapped
+        chunk is trailed by the first two fields of a fake next-chunk
+        for sake of usage checks.
+
+*/
+
+#define PINUSE_BIT          1
+#define CINUSE_BIT          2
+#define FLAG4_BIT           4
+#define INUSE_BITS          (PINUSE_BIT | CINUSE_BIT)
+#define FLAG_BITS           (PINUSE_BIT | CINUSE_BIT | FLAG4_BIT)
+
+/* ------------------- Chunks sizes and alignments ----------------------- */
+
+#define MCHUNK_SIZE         (sizeof(mchunk))
+
+#if FOOTERS
+    #define CHUNK_OVERHEAD      (2 * sizeof(size_t))
+#else // FOOTERS
+    #define CHUNK_OVERHEAD      (sizeof(size_t))
+#endif // FOOTERS
+
+/* MMapped chunks need a second word of overhead ... */
+#define MMAP_CHUNK_OVERHEAD (2 * sizeof(size_t))
+/* ... and additional padding for fake next-chunk at foot */
+#define MMAP_FOOT_PAD       (4 * sizeof(size_t))
+
+/* The smallest size we can malloc is an aligned minimal chunk */
+#define MIN_CHUNK_SIZE  ((MCHUNK_SIZE + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK)
+
+// ===============================================================================
+struct malloc_chunk_header {
+    void set_size_and_pinuse_of_free_chunk(size_t s) {
+        _head = s | PINUSE_BIT;
+        set_foot(s);
+    }
+
+    void set_foot(size_t s)  { 
+        ((malloc_chunk_header *)((char*)this + s))->_prev_foot = s; 
+    }
+
+    // extraction of fields from head words
+    bool cinuse() const        { return !!(_head & CINUSE_BIT); }
+    bool pinuse() const        { return !!(_head & PINUSE_BIT); }
+    bool flag4inuse() const    { return !!(_head & FLAG4_BIT); }
+    bool is_inuse() const      { return (_head & INUSE_BITS) != PINUSE_BIT; }
+    bool is_mmapped() const    { return (_head & INUSE_BITS) == 0; }
+
+    size_t chunksize() const   { return _head & ~(FLAG_BITS); }
+
+    void clear_pinuse()        { _head &= ~PINUSE_BIT; }
+    void set_flag4()           { _head |= FLAG4_BIT; }
+    void clear_flag4()         { _head &= ~FLAG4_BIT; }
+
+    // Treat space at ptr +/- offset as a chunk
+    malloc_chunk_header * chunk_plus_offset(size_t s)  {
+        return (malloc_chunk_header *)((char*)this + s);
+    }
+    malloc_chunk_header * chunk_minus_offset(size_t s) { 
+        return (malloc_chunk_header *)((char*)this - s); 
+    }
+
+    // Ptr to next or previous physical malloc_chunk.
+    malloc_chunk_header * next_chunk() { 
+        return (malloc_chunk_header *)((char*)this + (_head & ~FLAG_BITS)); 
+    }
+    malloc_chunk_header * prev_chunk() { 
+        return (malloc_chunk_header *)((char*)this - (_prev_foot));
+    }
+
+    // extract next chunk's pinuse bit
+    size_t next_pinuse()  { return next_chunk()->_head & PINUSE_BIT; }
+
+    size_t   _prev_foot;  // Size of previous chunk (if free). 
+    size_t   _head;       // Size and inuse bits.
+};
+
+// ===============================================================================
+struct malloc_chunk : public malloc_chunk_header {
+    // Set size, pinuse bit, foot, and clear next pinuse
+    void set_free_with_pinuse(size_t s, malloc_chunk* n)
+    {
+        n->clear_pinuse();
+        set_size_and_pinuse_of_free_chunk(s);
+    }
+
+    // Get the internal overhead associated with chunk p
+    size_t overhead_for() { return is_mmapped() ? MMAP_CHUNK_OVERHEAD : CHUNK_OVERHEAD; }
+
+    // Return true if malloced space is not necessarily cleared
+    bool calloc_must_clear()
+    {
+#if MMAP_CLEARS
+        return !is_mmapped();
+#else 
+        return true;
+#endif 
+    }
+    
+    struct malloc_chunk* _fd;         // double links -- used only if free.
+    struct malloc_chunk* _bk;
+};
+
+typedef malloc_chunk  mchunk;
+typedef malloc_chunk* mchunkptr;
+typedef malloc_chunk_header *hchunkptr;
+typedef malloc_chunk* sbinptr;         // The type of bins of chunks
+typedef unsigned int bindex_t;         // Described below
+typedef unsigned int binmap_t;         // Described below
+typedef unsigned int flag_t;           // The type of various bit flag sets
+
+// conversion from malloc headers to user pointers, and back 
+static FORCEINLINE void *chunk2mem(const void *p)       { return (void *)((char *)p + 2 * sizeof(size_t)); }
+static FORCEINLINE mchunkptr mem2chunk(const void *mem) { return (mchunkptr)((char *)mem - 2 * sizeof(size_t)); }
+
+// chunk associated with aligned address A
+static FORCEINLINE mchunkptr align_as_chunk(char *A)    { return (mchunkptr)(A + align_offset(chunk2mem(A))); }
+
+// Bounds on request (not chunk) sizes.
+#define MAX_REQUEST         ((-MIN_CHUNK_SIZE) << 2)
+#define MIN_REQUEST         (MIN_CHUNK_SIZE - CHUNK_OVERHEAD - 1)
+
+// pad request bytes into a usable size
+static FORCEINLINE size_t pad_request(size_t req) 
+{
+    return (req + CHUNK_OVERHEAD + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK;
+}
+
+// pad request, checking for minimum (but not maximum)
+static FORCEINLINE size_t request2size(size_t req) 
+{
+    return req < MIN_REQUEST ? MIN_CHUNK_SIZE : pad_request(req);
+}
+
+
+/* ------------------ Operations on head and foot fields ----------------- */
+
+/*
+  The head field of a chunk is or'ed with PINUSE_BIT when previous
+  adjacent chunk in use, and or'ed with CINUSE_BIT if this chunk is in
+  use, unless mmapped, in which case both bits are cleared.
+
+  FLAG4_BIT is not used by this malloc, but might be useful in extensions.
+*/
+
+// Head value for fenceposts
+#define FENCEPOST_HEAD  (INUSE_BITS | sizeof(size_t))
+
+
+/* ---------------------- Overlaid data structures ----------------------- */
+
+/*
+  When chunks are not in use, they are treated as nodes of either
+  lists or trees.
+
+  "Small"  chunks are stored in circular doubly-linked lists, and look
+  like this:
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Size of previous chunk                            |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `head:' |             Size of chunk, in bytes                         |P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Forward pointer to next chunk in list             |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Back pointer to previous chunk in list            |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Unused space (may be 0 bytes long)                .
+            .                                                               .
+            .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `foot:' |             Size of chunk, in bytes                           |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+  Larger chunks are kept in a form of bitwise digital trees (aka
+  tries) keyed on chunksizes.  Because malloc_tree_chunks are only for
+  free chunks greater than 256 bytes, their size doesn't impose any
+  constraints on user chunk sizes.  Each node looks like:
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Size of previous chunk                            |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `head:' |             Size of chunk, in bytes                         |P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Forward pointer to next chunk of same size        |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Back pointer to previous chunk of same size       |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Pointer to left child (child[0])                  |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Pointer to right child (child[1])                 |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Pointer to parent                                 |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             bin index of this chunk                           |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Unused space                                      .
+            .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `foot:' |             Size of chunk, in bytes                           |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+  Each tree holding treenodes is a tree of unique chunk sizes.  Chunks
+  of the same size are arranged in a circularly-linked list, with only
+  the oldest chunk (the next to be used, in our FIFO ordering)
+  actually in the tree.  (Tree members are distinguished by a non-null
+  parent pointer.)  If a chunk with the same size an an existing node
+  is inserted, it is linked off the existing node using pointers that
+  work in the same way as fd/bk pointers of small chunks.
+
+  Each tree contains a power of 2 sized range of chunk sizes (the
+  smallest is 0x100 <= x < 0x180), which is is divided in half at each
+  tree level, with the chunks in the smaller half of the range (0x100
+  <= x < 0x140 for the top nose) in the left subtree and the larger
+  half (0x140 <= x < 0x180) in the right subtree.  This is, of course,
+  done by inspecting individual bits.
+
+  Using these rules, each node's left subtree contains all smaller
+  sizes than its right subtree.  However, the node at the root of each
+  subtree has no particular ordering relationship to either.  (The
+  dividing line between the subtree sizes is based on trie relation.)
+  If we remove the last chunk of a given size from the interior of the
+  tree, we need to replace it with a leaf node.  The tree ordering
+  rules permit a node to be replaced by any leaf below it.
+
+  The smallest chunk in a tree (a common operation in a best-fit
+  allocator) can be found by walking a path to the leftmost leaf in
+  the tree.  Unlike a usual binary tree, where we follow left child
+  pointers until we reach a null, here we follow the right child
+  pointer any time the left one is null, until we reach a leaf with
+  both child pointers null. The smallest chunk in the tree will be
+  somewhere along that path.
+
+  The worst case number of steps to add, find, or remove a node is
+  bounded by the number of bits differentiating chunks within
+  bins. Under current bin calculations, this ranges from 6 up to 21
+  (for 32 bit sizes) or up to 53 (for 64 bit sizes). The typical case
+  is of course much better.
+*/
+
+// ===============================================================================
+struct malloc_tree_chunk : public malloc_chunk_header {
+    malloc_tree_chunk *leftmost_child() {
+        return _child[0] ? _child[0] : _child[1]; 
+    }
+
+
+    malloc_tree_chunk* _fd;
+    malloc_tree_chunk* _bk;
+
+    malloc_tree_chunk* _child[2];
+    malloc_tree_chunk* _parent;
+    bindex_t           _index;
+};
+
+typedef malloc_tree_chunk  tchunk;
+typedef malloc_tree_chunk* tchunkptr;
+typedef malloc_tree_chunk* tbinptr; // The type of bins of trees
+
+/* ----------------------------- Segments -------------------------------- */
+
+/*
+  Each malloc space may include non-contiguous segments, held in a
+  list headed by an embedded malloc_segment record representing the
+  top-most space. Segments also include flags holding properties of
+  the space. Large chunks that are directly allocated by mmap are not
+  included in this list. They are instead independently created and
+  destroyed without otherwise keeping track of them.
+
+  Segment management mainly comes into play for spaces allocated by
+  MMAP.  Any call to MMAP might or might not return memory that is
+  adjacent to an existing segment.  MORECORE normally contiguously
+  extends the current space, so this space is almost always adjacent,
+  which is simpler and faster to deal with. (This is why MORECORE is
+  used preferentially to MMAP when both are available -- see
+  sys_alloc.)  When allocating using MMAP, we don't use any of the
+  hinting mechanisms (inconsistently) supported in various
+  implementations of unix mmap, or distinguish reserving from
+  committing memory. Instead, we just ask for space, and exploit
+  contiguity when we get it.  It is probably possible to do
+  better than this on some systems, but no general scheme seems
+  to be significantly better.
+
+  Management entails a simpler variant of the consolidation scheme
+  used for chunks to reduce fragmentation -- new adjacent memory is
+  normally prepended or appended to an existing segment. However,
+  there are limitations compared to chunk consolidation that mostly
+  reflect the fact that segment processing is relatively infrequent
+  (occurring only when getting memory from system) and that we
+  don't expect to have huge numbers of segments:
+
+  * Segments are not indexed, so traversal requires linear scans.  (It
+    would be possible to index these, but is not worth the extra
+    overhead and complexity for most programs on most platforms.)
+  * New segments are only appended to old ones when holding top-most
+    memory; if they cannot be prepended to others, they are held in
+    different segments.
+
+  Except for the top-most segment of an mstate, each segment record
+  is kept at the tail of its segment. Segments are added by pushing
+  segment records onto the list headed by &mstate.seg for the
+  containing mstate.
+
+  Segment flags control allocation/merge/deallocation policies:
+  * If EXTERN_BIT set, then we did not allocate this segment,
+    and so should not try to deallocate or merge with others.
+    (This currently holds only for the initial segment passed
+    into create_mspace_with_base.)
+  * If USE_MMAP_BIT set, the segment may be merged with
+    other surrounding mmapped segments and trimmed/de-allocated
+    using munmap.
+  * If neither bit is set, then the segment was obtained using
+    MORECORE so can be merged with surrounding MORECORE'd segments
+    and deallocated/trimmed using MORECORE with negative arguments.
+*/
+
+// ===============================================================================
+struct malloc_segment {
+    bool is_mmapped_segment()  { return !!(_sflags & USE_MMAP_BIT); }
+    bool is_extern_segment()   { return !!(_sflags & EXTERN_BIT); }
+
+    char*           _base;          // base address
+    size_t          _size;          // allocated size
+    malloc_segment* _next;          // ptr to next segment
+    flag_t          _sflags;        // mmap and extern flag
+};
+
+typedef malloc_segment  msegment;
+typedef malloc_segment* msegmentptr;
+
+/* ------------- Malloc_params ------------------- */
+
+/*
+  malloc_params holds global properties, including those that can be
+  dynamically set using mallopt. There is a single instance, mparams,
+  initialized in init_mparams. Note that the non-zeroness of "magic"
+  also serves as an initialization flag.
+*/
+
+// ===============================================================================
+struct malloc_params {
+    malloc_params() : _magic(0) {}
+
+    void ensure_initialization()
+    {
+        if (!_magic)
+            _init();
+    }
+    int change(int param_number, int value);
+
+    size_t page_align(size_t sz) {
+        return (sz + (_page_size - 1)) & ~(_page_size - 1);
+    }
+
+    size_t granularity_align(size_t sz) {
+        return (sz + (_granularity - 1)) & ~(_granularity - 1);
+    }
+
+    bool is_page_aligned(char *S) {
+        return ((size_t)S & (_page_size - 1)) == 0;
+    }
+
+    int _init();
+
+    size_t _magic;
+    size_t _page_size;
+    size_t _granularity;
+    size_t _mmap_threshold;
+    size_t _trim_threshold;
+    flag_t _default_mflags;
+};
+
+static malloc_params mparams;
+
+/* ---------------------------- malloc_state ----------------------------- */
+
+/*
+   A malloc_state holds all of the bookkeeping for a space.
+   The main fields are:
+
+  Top
+    The topmost chunk of the currently active segment. Its size is
+    cached in topsize.  The actual size of topmost space is
+    topsize+TOP_FOOT_SIZE, which includes space reserved for adding
+    fenceposts and segment records if necessary when getting more
+    space from the system.  The size at which to autotrim top is
+    cached from mparams in trim_check, except that it is disabled if
+    an autotrim fails.
+
+  Designated victim (dv)
+    This is the preferred chunk for servicing small requests that
+    don't have exact fits.  It is normally the chunk split off most
+    recently to service another small request.  Its size is cached in
+    dvsize. The link fields of this chunk are not maintained since it
+    is not kept in a bin.
+
+  SmallBins
+    An array of bin headers for free chunks.  These bins hold chunks
+    with sizes less than MIN_LARGE_SIZE bytes. Each bin contains
+    chunks of all the same size, spaced 8 bytes apart.  To simplify
+    use in double-linked lists, each bin header acts as a malloc_chunk
+    pointing to the real first node, if it exists (else pointing to
+    itself).  This avoids special-casing for headers.  But to avoid
+    waste, we allocate only the fd/bk pointers of bins, and then use
+    repositioning tricks to treat these as the fields of a chunk.
+
+  TreeBins
+    Treebins are pointers to the roots of trees holding a range of
+    sizes. There are 2 equally spaced treebins for each power of two
+    from TREE_SHIFT to TREE_SHIFT+16. The last bin holds anything
+    larger.
+
+  Bin maps
+    There is one bit map for small bins ("smallmap") and one for
+    treebins ("treemap).  Each bin sets its bit when non-empty, and
+    clears the bit when empty.  Bit operations are then used to avoid
+    bin-by-bin searching -- nearly all "search" is done without ever
+    looking at bins that won't be selected.  The bit maps
+    conservatively use 32 bits per map word, even if on 64bit system.
+    For a good description of some of the bit-based techniques used
+    here, see Henry S. Warren Jr's book "Hacker's Delight" (and
+    supplement at http://hackersdelight.org/). Many of these are
+    intended to reduce the branchiness of paths through malloc etc, as
+    well as to reduce the number of memory locations read or written.
+
+  Segments
+    A list of segments headed by an embedded malloc_segment record
+    representing the initial space.
+
+  Address check support
+    The least_addr field is the least address ever obtained from
+    MORECORE or MMAP. Attempted frees and reallocs of any address less
+    than this are trapped (unless INSECURE is defined).
+
+  Magic tag
+    A cross-check field that should always hold same value as mparams._magic.
+
+  Max allowed footprint
+    The maximum allowed bytes to allocate from system (zero means no limit)
+
+  Flags
+    Bits recording whether to use MMAP, locks, or contiguous MORECORE
+
+  Statistics
+    Each space keeps track of current and maximum system memory
+    obtained via MORECORE or MMAP.
+
+  Trim support
+    Fields holding the amount of unused topmost memory that should trigger
+    trimming, and a counter to force periodic scanning to release unused
+    non-topmost segments.
+
+  Locking
+    If USE_LOCKS is defined, the "mutex" lock is acquired and released
+    around every public call using this mspace.
+
+  Extension support
+    A void* pointer and a size_t field that can be used to help implement
+    extensions to this malloc.
+*/
+
+
+// ================================================================================
+class malloc_state 
+{
+public:
+    /* ----------------------- _malloc, _free, etc... --- */
+    FORCEINLINE void* _malloc(size_t bytes);
+    FORCEINLINE void  _free(mchunkptr p);
+
+
+    /* ------------------------ Relays to internal calls to malloc/free from realloc, memalign etc */
+#if ONLY_MSPACES
+    void *internal_malloc(size_t b) { return mspace_malloc(this, b); }
+    void internal_free(void *mem)   { mspace_free(this,mem); }
+#else 
+    #if MSPACES
+        FORCEINLINE void *internal_malloc(size_t b); 
+        FORCEINLINE void internal_free(void *mem);
+    #else
+        void *internal_malloc(size_t b) { return dlmalloc(b); }
+        void  internal_free(void *mem)  { dlfree(mem); }
+    #endif
+#endif
+
+    /* ------------------------ ----------------------- */
+
+    struct mallinfo internal_mallinfo();
+    void      internal_malloc_stats();
+
+    void      init_top(mchunkptr p, size_t psize);
+    void      init_bins();
+    void      init(char* tbase, size_t tsize);
+
+    /* ------------------------ System alloc/dealloc -------------------------- */
+    void*     sys_alloc(size_t nb);
+    size_t    release_unused_segments();
+    int       sys_trim(size_t pad);
+    void      dispose_chunk(mchunkptr p, size_t psize);
+
+    /* ----------------------- Internal support for realloc, memalign, etc --- */
+    mchunkptr try_realloc_chunk(mchunkptr p, size_t nb, int can_move);
+    void*     internal_memalign(size_t alignment, size_t bytes);
+    void**    ialloc(size_t n_elements, size_t* sizes, int opts, void* chunks[]);
+    size_t    internal_bulk_free(void* array[], size_t nelem);
+    void      internal_inspect_all(void(*handler)(void *start, void *end,
+                                                  size_t used_bytes, void* callback_arg),
+                                   void* arg);
+
+    /* -------------------------- system alloc setup (Operations on mflags) ----- */
+    bool      use_lock() const { return !!(_mflags & USE_LOCK_BIT); }
+    void      enable_lock()    { _mflags |=  USE_LOCK_BIT; }
+    void      set_lock(int l)  {
+        _mflags = l ? _mflags | USE_LOCK_BIT : _mflags & ~USE_LOCK_BIT;
+    }
+
+#if USE_LOCKS
+    void      disable_lock()   { _mflags &= ~USE_LOCK_BIT; }
+    MLOCK_T&  get_mutex()      { return _mutex; }
+#else
+    void      disable_lock()   {}
+#endif
+
+    bool      use_mmap() const { return !!(_mflags & USE_MMAP_BIT); }
+    void      enable_mmap()    { _mflags |=  USE_MMAP_BIT; }
+
+#if HAVE_MMAP
+    void      disable_mmap()   { _mflags &= ~USE_MMAP_BIT; }
+#else
+    void      disable_mmap()   {}
+#endif
+
+    /* ----------------------- Runtime Check Support ------------------------- */
+
+    /*
+      For security, the main invariant is that malloc/free/etc never
+      writes to a static address other than malloc_state, unless static
+      malloc_state itself has been corrupted, which cannot occur via
+      malloc (because of these checks). In essence this means that we
+      believe all pointers, sizes, maps etc held in malloc_state, but
+      check all of those linked or offsetted from other embedded data
+      structures.  These checks are interspersed with main code in a way
+      that tends to minimize their run-time cost.
+
+      When FOOTERS is defined, in addition to range checking, we also
+      verify footer fields of inuse chunks, which can be used guarantee
+      that the mstate controlling malloc/free is intact.  This is a
+      streamlined version of the approach described by William Robertson
+      et al in "Run-time Detection of Heap-based Overflows" LISA'03
+      http://www.usenix.org/events/lisa03/tech/robertson.html The footer
+      of an inuse chunk holds the xor of its mstate and a random seed,
+      that is checked upon calls to free() and realloc().  This is
+      (probabalistically) unguessable from outside the program, but can be
+      computed by any code successfully malloc'ing any chunk, so does not
+      itself provide protection against code that has already broken
+      security through some other means.  Unlike Robertson et al, we
+      always dynamically check addresses of all offset chunks (previous,
+      next, etc). This turns out to be cheaper than relying on hashes.
+    */
+
+
+#if !INSECURE
+    // Check if address a is at least as high as any from MORECORE or MMAP
+    bool        ok_address(void *a) const { return (char *)a >= _least_addr; }
+
+    // Check if address of next chunk n is higher than base chunk p
+    static bool ok_next(void *p, void *n) { return p < n; }
+
+    // Check if p has inuse status
+    static bool ok_inuse(mchunkptr p)     { return p->is_inuse(); }
+ 
+    // Check if p has its pinuse bit on
+    static bool ok_pinuse(mchunkptr p)    { return p->pinuse(); }
+
+    // Check if (alleged) mstate m has expected magic field
+    bool        ok_magic() const          { return _magic == mparams._magic; }
+    
+    // In gcc, use __builtin_expect to minimize impact of checks
+    #if defined(__GNUC__) && __GNUC__ >= 3
+        static bool rtcheck(bool e)       { return __builtin_expect(e, 1); }
+    #else
+        static bool rtcheck(bool e)       { return e; }
+    #endif
+#else
+    static bool ok_address(void *a)       { return true; }
+    static bool ok_next(void *p, void *n) { return true; }
+    static bool ok_inuse(mchunkptr p)     { return true; }
+    static bool ok_pinuse(mchunkptr p)    { return true; }
+    static bool ok_magic()                { return true; }
+    static bool rtcheck(bool e)           { return true; }
+#endif
+
+    bool is_initialized() const           { return _top != 0; }
+
+    bool use_noncontiguous()  const       { return !!(_mflags & USE_NONCONTIGUOUS_BIT); }
+    void disable_contiguous()             { _mflags |=  USE_NONCONTIGUOUS_BIT; }
+
+    // Return segment holding given address
+    msegmentptr segment_holding(char* addr) const {
+        msegmentptr sp = (msegmentptr)&_seg;
+        for (;;) {
+            if (addr >= sp->_base && addr < sp->_base + sp->_size)
+                return sp;
+            if ((sp = sp->_next) == 0)
+                return 0;
+        }
+    }
+
+    // Return true if segment contains a segment link
+    int has_segment_link(msegmentptr ss) const {
+        msegmentptr sp = (msegmentptr)&_seg;
+        for (;;) {
+            if ((char*)sp >= ss->_base && (char*)sp < ss->_base + ss->_size)
+                return 1;
+            if ((sp = sp->_next) == 0)
+                return 0;
+        }
+    }
+    
+#ifndef MORECORE_CANNOT_TRIM
+    bool should_trim(size_t s) const { return s > _trim_check; }
+#else 
+    bool should_trim(size_t s) const { return false; }
+#endif 
+
+
+    /* -------------------------- Debugging setup ---------------------------- */
+
+#if ! DEBUG
+    void check_free_chunk(mchunkptr) {}
+    void check_inuse_chunk(mchunkptr) {}
+    void check_malloced_chunk(void* , size_t) {}
+    void check_mmapped_chunk(mchunkptr) {}
+    void check_malloc_state() {}
+    void check_top_chunk(mchunkptr) {}
+#else /* DEBUG */
+    void check_free_chunk(mchunkptr p)       { do_check_free_chunk(p); }
+    void check_inuse_chunk(mchunkptr p)      { do_check_inuse_chunk(p); }
+    void check_malloced_chunk(void* p, size_t s) { do_check_malloced_chunk(p, s); }
+    void check_mmapped_chunk(mchunkptr p)    { do_check_mmapped_chunk(p); }
+    void check_malloc_state()                { do_check_malloc_state(); }
+    void check_top_chunk(mchunkptr p)        { do_check_top_chunk(p); }
+
+    void do_check_any_chunk(mchunkptr p) const;
+    void do_check_top_chunk(mchunkptr p) const;
+    void do_check_mmapped_chunk(mchunkptr p) const;
+    void do_check_inuse_chunk(mchunkptr p) const;
+    void do_check_free_chunk(mchunkptr p) const;
+    void do_check_malloced_chunk(void* mem, size_t s) const;
+    void do_check_tree(tchunkptr t);
+    void do_check_treebin(bindex_t i);
+    void do_check_smallbin(bindex_t i);
+    void do_check_malloc_state();
+    int  bin_find(mchunkptr x);
+    size_t traverse_and_check();
+#endif // DEBUG
+
+private:
+
+    /* ---------------------------- Indexing Bins ---------------------------- */
+
+    static bool  is_small(size_t s)          { return (s >> SMALLBIN_SHIFT) < NSMALLBINS; }
+    static bindex_t  small_index(size_t s)   { return (bindex_t)(s  >> SMALLBIN_SHIFT); }
+    static size_t small_index2size(size_t i) { return i << SMALLBIN_SHIFT; }
+    static bindex_t  MIN_SMALL_INDEX()       { return small_index(MIN_CHUNK_SIZE); }
+
+    // assign tree index for size S to variable I. Use x86 asm if possible 
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
+    FORCEINLINE static bindex_t compute_tree_index(size_t S)
+    {
+        unsigned int X = S >> TREEBIN_SHIFT;
+        if (X == 0)
+            return 0;
+        else if (X > 0xFFFF)
+            return NTREEBINS - 1;
+
+        unsigned int K = (unsigned) sizeof(X)*__CHAR_BIT__ - 1 - (unsigned) __builtin_clz(X); 
+        return (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT - 1)) & 1)));
+    }
+
+#elif defined (__INTEL_COMPILER)
+    FORCEINLINE static bindex_t compute_tree_index(size_t S)
+    {
+        size_t X = S >> TREEBIN_SHIFT;
+        if (X == 0)
+            return 0;
+        else if (X > 0xFFFF)
+            return NTREEBINS - 1;
+
+        unsigned int K = _bit_scan_reverse (X); 
+        return (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT - 1)) & 1)));
+    }
+
+#elif defined(_MSC_VER) && _MSC_VER>=1300
+    FORCEINLINE static bindex_t compute_tree_index(size_t S)
+    {
+        size_t X = S >> TREEBIN_SHIFT;
+        if (X == 0)
+            return 0;
+        else if (X > 0xFFFF)
+            return NTREEBINS - 1;
+
+        unsigned int K;
+        _BitScanReverse((DWORD *) &K, (DWORD) X);
+        return (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT - 1)) & 1)));
+    }
+
+#else // GNUC
+    FORCEINLINE static bindex_t compute_tree_index(size_t S)
+    {
+        size_t X = S >> TREEBIN_SHIFT;
+        if (X == 0)
+            return 0;
+        else if (X > 0xFFFF)
+            return NTREEBINS - 1;
+
+        unsigned int Y = (unsigned int)X;
+        unsigned int N = ((Y - 0x100) >> 16) & 8;
+        unsigned int K = (((Y <<= N) - 0x1000) >> 16) & 4;
+        N += K;
+        N += K = (((Y <<= K) - 0x4000) >> 16) & 2;
+        K = 14 - N + ((Y <<= K) >> 15);
+        return (K << 1) + ((S >> (K + (TREEBIN_SHIFT - 1)) & 1));
+    }
+#endif // GNUC
+
+    // Shift placing maximum resolved bit in a treebin at i as sign bit
+    static bindex_t leftshift_for_tree_index(bindex_t i) 
+    {
+        return (i == NTREEBINS - 1) ? 0 :
+            ((SIZE_T_BITSIZE - 1) - ((i >> 1) + TREEBIN_SHIFT - 2));
+    }
+
+    // The size of the smallest chunk held in bin with index i
+    static bindex_t minsize_for_tree_index(bindex_t i)
+    {
+        return ((size_t)1 << ((i >> 1) + TREEBIN_SHIFT)) | 
+            (((size_t)(i & 1)) << ((i >> 1) + TREEBIN_SHIFT - 1));
+    }
+
+
+    // ----------- isolate the least set bit of a bitmap
+    static binmap_t least_bit(binmap_t x) { return x & -x; }
+
+    // ----------- mask with all bits to left of least bit of x on
+    static binmap_t left_bits(binmap_t x) { return (x<<1) | -(x<<1); }
+
+    // index corresponding to given bit. Use x86 asm if possible
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
+    static bindex_t compute_bit2idx(binmap_t X)
+    {
+        unsigned int J;
+        J = __builtin_ctz(X); 
+        return (bindex_t)J;
+    }
+
+#elif defined (__INTEL_COMPILER)
+    static bindex_t compute_bit2idx(binmap_t X)
+    {
+        unsigned int J;
+        J = _bit_scan_forward (X); 
+        return (bindex_t)J;
+    }
+
+#elif defined(_MSC_VER) && _MSC_VER>=1300
+    static bindex_t compute_bit2idx(binmap_t X)
+    {
+        unsigned int J;
+        _BitScanForward((DWORD *) &J, X);
+        return (bindex_t)J;
+    }
+
+#elif USE_BUILTIN_FFS
+    static bindex_t compute_bit2idx(binmap_t X) { return ffs(X) - 1; }
+
+#else
+    static bindex_t compute_bit2idx(binmap_t X)
+    {
+        unsigned int Y = X - 1;
+        unsigned int K = Y >> (16-4) & 16;
+        unsigned int N = K;        Y >>= K;
+        N += K = Y >> (8-3) &  8;  Y >>= K;
+        N += K = Y >> (4-2) &  4;  Y >>= K;
+        N += K = Y >> (2-1) &  2;  Y >>= K;
+        N += K = Y >> (1-0) &  1;  Y >>= K;
+        return (bindex_t)(N + Y);
+    }
+#endif // GNUC
+
+    /* ------------------------ Set up inuse chunks with or without footers ---*/
+#if !FOOTERS
+    void mark_inuse_foot(malloc_chunk_header *, size_t) {}
+#else 
+    //Set foot of inuse chunk to be xor of mstate and seed 
+    void  mark_inuse_foot(malloc_chunk_header *p, size_t s) {
+        (((mchunkptr)((char*)p + s))->prev_foot = (size_t)this ^ mparams._magic); }
+#endif
+
+    void set_inuse(malloc_chunk_header *p, size_t s) {
+        p->_head = (p->_head & PINUSE_BIT) | s | CINUSE_BIT;
+        ((mchunkptr)(((char*)p) + s))->_head |= PINUSE_BIT;
+        mark_inuse_foot(p,s);
+    }
+
+    void set_inuse_and_pinuse(malloc_chunk_header *p, size_t s) {
+        p->_head = s | PINUSE_BIT | CINUSE_BIT;
+        ((mchunkptr)(((char*)p) + s))->_head |= PINUSE_BIT;
+        mark_inuse_foot(p,s);
+    }
+
+    void set_size_and_pinuse_of_inuse_chunk(malloc_chunk_header *p, size_t s) {
+        p->_head = s | PINUSE_BIT | CINUSE_BIT;
+        mark_inuse_foot(p, s);
+    }
+    
+    /* ------------------------ Addressing by index. See  about smallbin repositioning --- */
+    sbinptr  smallbin_at(bindex_t i) const { return (sbinptr)((char*)&_smallbins[i << 1]); }
+    tbinptr* treebin_at(bindex_t i)  { return &_treebins[i]; }
+
+    /* ----------------------- bit corresponding to given index ---------*/
+    static binmap_t idx2bit(bindex_t i) { return ((binmap_t)1 << i); }
+
+    // --------------- Mark/Clear bits with given index
+    void     mark_smallmap(bindex_t i)      { _smallmap |=  idx2bit(i); }
+    void     clear_smallmap(bindex_t i)     { _smallmap &= ~idx2bit(i); }
+    binmap_t smallmap_is_marked(bindex_t i) const { return _smallmap & idx2bit(i); }
+
+    void     mark_treemap(bindex_t i)       { _treemap  |=  idx2bit(i); }
+    void     clear_treemap(bindex_t i)      { _treemap  &= ~idx2bit(i); }
+    binmap_t treemap_is_marked(bindex_t i)  const { return _treemap & idx2bit(i); }
+
+    /* ------------------------ ----------------------- */
+    FORCEINLINE void insert_small_chunk(mchunkptr P, size_t S);
+    FORCEINLINE void unlink_small_chunk(mchunkptr P, size_t S);
+    FORCEINLINE void unlink_first_small_chunk(mchunkptr B, mchunkptr P, bindex_t I);
+    FORCEINLINE void replace_dv(mchunkptr P, size_t S);
+
+    /* ------------------------- Operations on trees ------------------------- */
+    FORCEINLINE void insert_large_chunk(tchunkptr X, size_t S);
+    FORCEINLINE void unlink_large_chunk(tchunkptr X);
+
+    /* ------------------------ Relays to large vs small bin operations */
+    FORCEINLINE void insert_chunk(mchunkptr P, size_t S);
+    FORCEINLINE void unlink_chunk(mchunkptr P, size_t S);
+
+    /* -----------------------  Direct-mmapping chunks ----------------------- */
+    void*     mmap_alloc(size_t nb);
+    mchunkptr mmap_resize(mchunkptr oldp, size_t nb, int flags);
+
+    void      reset_on_error();
+    void*     prepend_alloc(char* newbase, char* oldbase, size_t nb);
+    void      add_segment(char* tbase, size_t tsize, flag_t mmapped);
+
+    /* ------------------------ malloc --------------------------- */
+    void*     tmalloc_large(size_t nb);
+    void*     tmalloc_small(size_t nb);
+
+    /* ------------------------Bin types, widths and sizes -------- */
+    static const size_t NSMALLBINS      = 32;
+    static const size_t NTREEBINS       = 32;
+    static const size_t SMALLBIN_SHIFT  = 3;
+    static const size_t SMALLBIN_WIDTH  = 1 << SMALLBIN_SHIFT;
+    static const size_t TREEBIN_SHIFT   = 8;
+    static const size_t MIN_LARGE_SIZE  = 1 << TREEBIN_SHIFT;
+    static const size_t MAX_SMALL_SIZE  =  (MIN_LARGE_SIZE - 1);
+    static const size_t MAX_SMALL_REQUEST = (MAX_SMALL_SIZE - CHUNK_ALIGN_MASK - CHUNK_OVERHEAD);
+
+    /* ------------------------ data members --------------------------- */
+    binmap_t   _smallmap;
+    binmap_t   _treemap;
+    size_t     _dvsize;
+    size_t     _topsize;
+    char*      _least_addr;
+    mchunkptr  _dv;
+    mchunkptr  _top;
+    size_t     _trim_check;
+    size_t     _release_checks;
+    size_t     _magic;
+    mchunkptr  _smallbins[(NSMALLBINS+1)*2];
+    tbinptr    _treebins[NTREEBINS];
+public:
+    size_t     _footprint;
+    size_t     _max_footprint;
+    size_t     _footprint_limit; // zero means no limit
+    flag_t     _mflags;
+
+#if USE_LOCKS
+    MLOCK_T    _mutex;     // locate lock among fields that rarely change
+#endif // USE_LOCKS
+    msegment   _seg;
+
+private:
+    void*      _extp;      // Unused but available for extensions
+    size_t     _exts;
+};
+
+typedef malloc_state*    mstate;
+
+/* ------------- end malloc_state ------------------- */
+
+#if FOOTERS
+    malloc_state* get_mstate_for(malloc_chunk_header *p) {
+        return (malloc_state*)(((mchunkptr)((char*)(p) +
+                                     (p->chunksize())))->prev_foot ^ mparams._magic);
+    }
+#endif
+
+/* ------------- Global malloc_state ------------------- */
+
+#if !ONLY_MSPACES
+
+// The global malloc_state used for all non-"mspace" calls
+static malloc_state _gm_;
+#define gm                 (&_gm_)
+#define is_global(M)       ((M) == &_gm_)
+
+#endif // !ONLY_MSPACES
+
+/* -------------------------- system alloc setup ------------------------- */
+
+
+
+// For mmap, use granularity alignment on windows, else page-align
+#ifdef WIN32
+    #define mmap_align(S) mparams.granularity_align(S)
+#else
+    #define mmap_align(S) mparams.page_align(S)
+#endif
+
+// For sys_alloc, enough padding to ensure can malloc request on success
+#define SYS_ALLOC_PADDING (TOP_FOOT_SIZE + MALLOC_ALIGNMENT)
+
+
+//  True if segment S holds address A
+bool segment_holds(msegmentptr S, mchunkptr A) {
+    return (char*)A >= S->_base && (char*)A < S->_base + S->_size;
+}
+
+/*
+  TOP_FOOT_SIZE is padding at the end of a segment, including space
+  that may be needed to place segment records and fenceposts when new
+  noncontiguous segments are added.
+*/
+#define TOP_FOOT_SIZE \
+    (align_offset(chunk2mem((void *)0))+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE)
+
+
+/* -------------------------------  Hooks -------------------------------- */
+
+/*
+  PREACTION should be defined to return 0 on success, and nonzero on
+  failure. If you are not using locking, you can redefine these to do
+  anything you like.
+*/
+
+#if USE_LOCKS
+    #define PREACTION(M)  ((M->use_lock())? ACQUIRE_LOCK(&(M)->get_mutex()) : 0)
+    #define POSTACTION(M) { if (M->use_lock()) RELEASE_LOCK(&(M)->get_mutex()); }
+#else // USE_LOCKS
+    #ifndef PREACTION
+        #define PREACTION(M) (0)
+    #endif 
+
+    #ifndef POSTACTION
+        #define POSTACTION(M)
+    #endif 
+#endif // USE_LOCKS
+
+/*
+  CORRUPTION_ERROR_ACTION is triggered upon detected bad addresses.
+  USAGE_ERROR_ACTION is triggered on detected bad frees and
+  reallocs. The argument p is an address that might have triggered the
+  fault. It is ignored by the two predefined actions, but might be
+  useful in custom actions that try to help diagnose errors.
+*/
+
+#if PROCEED_ON_ERROR
+    
+    // A count of the number of corruption errors causing resets
+    int malloc_corruption_error_count;
+    
+    #define CORRUPTION_ERROR_ACTION(m)  m->reset_on_error()
+    #define USAGE_ERROR_ACTION(m, p)
+
+#else // PROCEED_ON_ERROR
+    
+    #ifndef CORRUPTION_ERROR_ACTION
+        #define CORRUPTION_ERROR_ACTION(m) ABORT
+    #endif // CORRUPTION_ERROR_ACTION
+    
+    #ifndef USAGE_ERROR_ACTION
+        #define USAGE_ERROR_ACTION(m,p) ABORT
+    #endif // USAGE_ERROR_ACTION
+
+#endif // PROCEED_ON_ERROR
+
+/* ---------------------------- setting mparams -------------------------- */
+
+#if LOCK_AT_FORK
+    static void pre_fork(void)         { ACQUIRE_LOCK(&(gm)->get_mutex()); }
+    static void post_fork_parent(void) { RELEASE_LOCK(&(gm)->get_mutex()); }
+    static void post_fork_child(void)  { INITIAL_LOCK(&(gm)->get_mutex()); }
+#endif // LOCK_AT_FORK
+
+// Initialize mparams
+int malloc_params::_init() {
+#ifdef NEED_GLOBAL_LOCK_INIT
+    if (malloc_global_mutex_status <= 0)
+        init_malloc_global_mutex();
+#endif
+
+    ACQUIRE_MALLOC_GLOBAL_LOCK();
+    if (_magic == 0) {
+        size_t magic;
+        size_t psize;
+        size_t gsize;
+
+#ifndef WIN32
+        psize = malloc_getpagesize;
+        gsize = ((DEFAULT_GRANULARITY != 0)? DEFAULT_GRANULARITY : psize);
+#else // WIN32
+        {
+            SYSTEM_INFO system_info;
+            GetSystemInfo(&system_info);
+            psize = system_info.dwPageSize;
+            gsize = ((DEFAULT_GRANULARITY != 0)?
+                     DEFAULT_GRANULARITY : system_info.dwAllocationGranularity);
+        }
+#endif // WIN32
+
+        /* Sanity-check configuration:
+           size_t must be unsigned and as wide as pointer type.
+           ints must be at least 4 bytes.
+           alignment must be at least 8.
+           Alignment, min chunk size, and page size must all be powers of 2.
+        */
+        if ((sizeof(size_t) != sizeof(char*)) ||
+            (MAX_SIZE_T < MIN_CHUNK_SIZE)  ||
+            (sizeof(int) < 4)  ||
+            (MALLOC_ALIGNMENT < (size_t)8U) ||
+            ((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT-1)) != 0) ||
+            ((MCHUNK_SIZE      & (MCHUNK_SIZE-1))      != 0) ||
+            ((gsize            & (gsize-1))            != 0) ||
+            ((psize            & (psize-1))            != 0))
+            ABORT;
+        _granularity = gsize;
+        _page_size = psize;
+        _mmap_threshold = DEFAULT_MMAP_THRESHOLD;
+        _trim_threshold = DEFAULT_TRIM_THRESHOLD;
+#if MORECORE_CONTIGUOUS
+        _default_mflags = USE_LOCK_BIT | USE_MMAP_BIT;
+#else  // MORECORE_CONTIGUOUS
+        _default_mflags = USE_LOCK_BIT | USE_MMAP_BIT | USE_NONCONTIGUOUS_BIT;
+#endif // MORECORE_CONTIGUOUS
+
+#if !ONLY_MSPACES
+        // Set up lock for main malloc area
+        gm->_mflags = _default_mflags;
+        (void)INITIAL_LOCK(&gm->get_mutex());
+#endif
+#if LOCK_AT_FORK
+        pthread_atfork(&pre_fork, &post_fork_parent, &post_fork_child);
+#endif
+
+        {
+#if USE_DEV_RANDOM
+            int fd;
+            unsigned char buf[sizeof(size_t)];
+            // Try to use /dev/urandom, else fall back on using time
+            if ((fd = open("/dev/urandom", O_RDONLY)) >= 0 &&
+                read(fd, buf, sizeof(buf)) == sizeof(buf)) {
+                magic = *((size_t *) buf);
+                close(fd);
+            }
+            else
+#endif // USE_DEV_RANDOM
+            {
+#ifdef WIN32
+                magic = (size_t)(GetTickCount() ^ (size_t)0x55555555U);
+#elif defined(LACKS_TIME_H)
+                magic = (size_t)&magic ^ (size_t)0x55555555U;
+#else
+                magic = (size_t)(time(0) ^ (size_t)0x55555555U);
+#endif
+            }
+            magic |= (size_t)8U;    // ensure nonzero
+            magic &= ~(size_t)7U;   // improve chances of fault for bad values
+            // Until memory modes commonly available, use volatile-write
+            (*(volatile size_t *)(&(_magic))) = magic;
+        }
+    }
+
+    RELEASE_MALLOC_GLOBAL_LOCK();
+    return 1;
+}
+
+// support for mallopt
+int malloc_params::change(int param_number, int value) {
+    size_t val;
+    ensure_initialization();
+    val = (value == -1)? MAX_SIZE_T : (size_t)value;
+
+    switch(param_number) {
+    case M_TRIM_THRESHOLD:
+        _trim_threshold = val;
+        return 1;
+
+    case M_GRANULARITY:
+        if (val >= _page_size && ((val & (val - 1)) == 0)) {
+            _granularity = val;
+            return 1;
+        }
+        else
+            return 0;
+
+    case M_MMAP_THRESHOLD:
+        _mmap_threshold = val;
+        return 1;
+
+    default:
+        return 0;
+    }
+}
+
+#if DEBUG
+/* ------------------------- Debugging Support --------------------------- */
+
+// Check properties of any chunk, whether free, inuse, mmapped etc 
+void malloc_state::do_check_any_chunk(mchunkptr p)  const {
+    assert((is_aligned(chunk2mem(p))) || (p->_head == FENCEPOST_HEAD));
+    assert(ok_address(p));
+}
+
+// Check properties of top chunk
+void malloc_state::do_check_top_chunk(mchunkptr p) const {
+    msegmentptr sp = segment_holding((char*)p);
+    size_t  sz = p->_head & ~INUSE_BITS; // third-lowest bit can be set!
+    assert(sp != 0);
+    assert((is_aligned(chunk2mem(p))) || (p->_head == FENCEPOST_HEAD));
+    assert(ok_address(p));
+    assert(sz == _topsize);
+    assert(sz > 0);
+    assert(sz == ((sp->_base + sp->_size) - (char*)p) - TOP_FOOT_SIZE);
+    assert(p->pinuse());
+    assert(!p->chunk_plus_offset(sz)->pinuse());
+}
+
+// Check properties of (inuse) mmapped chunks
+void malloc_state::do_check_mmapped_chunk(mchunkptr p) const {
+    size_t  sz = p->chunksize();
+    size_t len = (sz + (p->_prev_foot) + MMAP_FOOT_PAD);
+    assert(p->is_mmapped());
+    assert(use_mmap());
+    assert((is_aligned(chunk2mem(p))) || (p->_head == FENCEPOST_HEAD));
+    assert(ok_address(p));
+    assert(!is_small(sz));
+    assert((len & (mparams._page_size - 1)) == 0);
+    assert(p->chunk_plus_offset(sz)->_head == FENCEPOST_HEAD);
+    assert(p->chunk_plus_offset(sz+sizeof(size_t))->_head == 0);
+}
+
+// Check properties of inuse chunks
+void malloc_state::do_check_inuse_chunk(mchunkptr p) const {
+    do_check_any_chunk(p);
+    assert(p->is_inuse());
+    assert(p->next_pinuse());
+    // If not pinuse and not mmapped, previous chunk has OK offset
+    assert(p->is_mmapped() || p->pinuse() || (mchunkptr)p->prev_chunk()->next_chunk() == p);
+    if (p->is_mmapped())
+        do_check_mmapped_chunk(p);
+}
+
+// Check properties of free chunks
+void malloc_state::do_check_free_chunk(mchunkptr p) const {
+    size_t sz = p->chunksize();
+    mchunkptr next = (mchunkptr)p->chunk_plus_offset(sz);
+    do_check_any_chunk(p);
+    assert(!p->is_inuse());
+    assert(!p->next_pinuse());
+    assert (!p->is_mmapped());
+    if (p != _dv && p != _top) {
+        if (sz >= MIN_CHUNK_SIZE) {
+            assert((sz & CHUNK_ALIGN_MASK) == 0);
+            assert(is_aligned(chunk2mem(p)));
+            assert(next->_prev_foot == sz);
+            assert(p->pinuse());
+            assert (next == _top || next->is_inuse());
+            assert(p->_fd->_bk == p);
+            assert(p->_bk->_fd == p);
+        }
+        else  // markers are always of size sizeof(size_t)
+            assert(sz == sizeof(size_t));
+    }
+}
+
+// Check properties of malloced chunks at the point they are malloced
+void malloc_state::do_check_malloced_chunk(void* mem, size_t s) const {
+    if (mem != 0) {
+        mchunkptr p = mem2chunk(mem);
+        size_t sz = p->_head & ~INUSE_BITS;
+        do_check_inuse_chunk(p);
+        assert((sz & CHUNK_ALIGN_MASK) == 0);
+        assert(sz >= MIN_CHUNK_SIZE);
+        assert(sz >= s);
+        // unless mmapped, size is less than MIN_CHUNK_SIZE more than request
+        assert(p->is_mmapped() || sz < (s + MIN_CHUNK_SIZE));
+    }
+}
+
+// Check a tree and its subtrees. 
+void malloc_state::do_check_tree(tchunkptr t) {
+    tchunkptr head = 0;
+    tchunkptr u = t;
+    bindex_t tindex = t->_index;
+    size_t tsize = t->chunksize();
+    bindex_t idx = compute_tree_index(tsize);
+    assert(tindex == idx);
+    assert(tsize >= MIN_LARGE_SIZE);
+    assert(tsize >= minsize_for_tree_index(idx));
+    assert((idx == NTREEBINS - 1) || (tsize < minsize_for_tree_index((idx+1))));
+
+    do {
+        // traverse through chain of same-sized nodes
+        do_check_any_chunk((mchunkptr)u);
+        assert(u->_index == tindex);
+        assert(u->chunksize() == tsize);
+        assert(!u->is_inuse());
+        assert(!u->next_pinuse());
+        assert(u->_fd->_bk == u);
+        assert(u->_bk->_fd == u);
+        if (u->_parent == 0) {
+            assert(u->_child[0] == 0);
+            assert(u->_child[1] == 0);
+        }
+        else {
+            assert(head == 0); // only one node on chain has parent
+            head = u;
+            assert(u->_parent != u);
+            assert (u->_parent->_child[0] == u ||
+                    u->_parent->_child[1] == u ||
+                    *((tbinptr*)(u->_parent)) == u);
+            if (u->_child[0] != 0) {
+                assert(u->_child[0]->_parent == u);
+                assert(u->_child[0] != u);
+                do_check_tree(u->_child[0]);
+            }
+            if (u->_child[1] != 0) {
+                assert(u->_child[1]->_parent == u);
+                assert(u->_child[1] != u);
+                do_check_tree(u->_child[1]);
+            }
+            if (u->_child[0] != 0 && u->_child[1] != 0) {
+                assert(u->_child[0]->chunksize() < u->_child[1]->chunksize());
+            }
+        }
+        u = u->_fd;
+    } while (u != t);
+    assert(head != 0);
+}
+
+//  Check all the chunks in a treebin. 
+void malloc_state::do_check_treebin(bindex_t i) {
+    tbinptr* tb = (tbinptr*)treebin_at(i);
+    tchunkptr t = *tb;
+    int empty = (_treemap & (1U << i)) == 0;
+    if (t == 0)
+        assert(empty);
+    if (!empty)
+        do_check_tree(t);
+}
+
+//  Check all the chunks in a smallbin. 
+void malloc_state::do_check_smallbin(bindex_t i) {
+    sbinptr b = smallbin_at(i);
+    mchunkptr p = b->_bk;
+    unsigned int empty = (_smallmap & (1U << i)) == 0;
+    if (p == b)
+        assert(empty);
+    if (!empty) {
+        for (; p != b; p = p->_bk) {
+            size_t size = p->chunksize();
+            mchunkptr q;
+            // each chunk claims to be free
+            do_check_free_chunk(p);
+            // chunk belongs in bin
+            assert(small_index(size) == i);
+            assert(p->_bk == b || p->_bk->chunksize() == p->chunksize());
+            // chunk is followed by an inuse chunk
+            q = (mchunkptr)p->next_chunk();
+            if (q->_head != FENCEPOST_HEAD)
+                do_check_inuse_chunk(q);
+        }
+    }
+}
+
+// Find x in a bin. Used in other check functions.
+int malloc_state::bin_find(mchunkptr x) {
+    size_t size = x->chunksize();
+    if (is_small(size)) {
+        bindex_t sidx = small_index(size);
+        sbinptr b = smallbin_at(sidx);
+        if (smallmap_is_marked(sidx)) {
+            mchunkptr p = b;
+            do {
+                if (p == x)
+                    return 1;
+            } while ((p = p->_fd) != b);
+        }
+    }
+    else {
+        bindex_t tidx = compute_tree_index(size);
+        if (treemap_is_marked(tidx)) {
+            tchunkptr t = *treebin_at(tidx);
+            size_t sizebits = size << leftshift_for_tree_index(tidx);
+            while (t != 0 && t->chunksize() != size) {
+                t = t->_child[(sizebits >> (SIZE_T_BITSIZE - 1)) & 1];
+                sizebits <<= 1;
+            }
+            if (t != 0) {
+                tchunkptr u = t;
+                do {
+                    if (u == (tchunkptr)x)
+                        return 1;
+                } while ((u = u->_fd) != t);
+            }
+        }
+    }
+    return 0;
+}
+
+// Traverse each chunk and check it; return total
+size_t malloc_state::traverse_and_check() {
+    size_t sum = 0;
+    if (is_initialized()) {
+        msegmentptr s = (msegmentptr)&_seg;
+        sum += _topsize + TOP_FOOT_SIZE;
+        while (s != 0) {
+            mchunkptr q = align_as_chunk(s->_base);
+            mchunkptr lastq = 0;
+            assert(q->pinuse());
+            while (segment_holds(s, q) &&
+                   q != _top && q->_head != FENCEPOST_HEAD) {
+                sum += q->chunksize();
+                if (q->is_inuse()) {
+                    assert(!bin_find(q));
+                    do_check_inuse_chunk(q);
+                }
+                else {
+                    assert(q == _dv || bin_find(q));
+                    assert(lastq == 0 || lastq->is_inuse()); // Not 2 consecutive free
+                    do_check_free_chunk(q);
+                }
+                lastq = q;
+                q = (mchunkptr)q->next_chunk();
+            }
+            s = s->_next;
+        }
+    }
+    return sum;
+}
+
+
+// Check all properties of malloc_state.
+void malloc_state::do_check_malloc_state() {
+    bindex_t i;
+    size_t total;
+    // check bins
+    for (i = 0; i < NSMALLBINS; ++i)
+        do_check_smallbin(i);
+    for (i = 0; i < NTREEBINS; ++i)
+        do_check_treebin(i);
+
+    if (_dvsize != 0) { 
+        // check dv chunk
+        do_check_any_chunk(_dv);
+        assert(_dvsize == _dv->chunksize());
+        assert(_dvsize >= MIN_CHUNK_SIZE);
+        assert(bin_find(_dv) == 0);
+    }
+
+    if (_top != 0) {
+        // check top chunk
+        do_check_top_chunk(_top);
+        //assert(topsize == top->chunksize()); redundant
+        assert(_topsize > 0);
+        assert(bin_find(_top) == 0);
+    }
+
+    total = traverse_and_check();
+    assert(total <= _footprint);
+    assert(_footprint <= _max_footprint);
+}
+#endif // DEBUG
+
+/* ----------------------------- statistics ------------------------------ */
+
+#if !NO_MALLINFO
+// ===============================================================================
+struct mallinfo malloc_state::internal_mallinfo() {
+    struct mallinfo nm = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+    mparams.ensure_initialization();
+    if (!PREACTION(this)) {
+        check_malloc_state();
+        if (is_initialized()) {
+            size_t nfree = 1; // top always free
+            size_t mfree = _topsize + TOP_FOOT_SIZE;
+            size_t sum = mfree;
+            msegmentptr s = &_seg;
+            while (s != 0) {
+                mchunkptr q = align_as_chunk(s->_base);
+                while (segment_holds(s, q) &&
+                       q != _top && q->_head != FENCEPOST_HEAD) {
+                    size_t sz = q->chunksize();
+                    sum += sz;
+                    if (!q->is_inuse()) {
+                        mfree += sz;
+                        ++nfree;
+                    }
+                    q = (mchunkptr)q->next_chunk();
+                }
+                s = s->_next;
+            }
+
+            nm.arena    = sum;
+            nm.ordblks  = nfree;
+            nm.hblkhd   = _footprint - sum;
+            nm.usmblks  = _max_footprint;
+            nm.uordblks = _footprint - mfree;
+            nm.fordblks = mfree;
+            nm.keepcost = _topsize;
+        }
+
+        POSTACTION(this);
+    }
+    return nm;
+}
+#endif // !NO_MALLINFO
+
+#if !NO_MALLOC_STATS
+void malloc_state::internal_malloc_stats() {
+    mparams.ensure_initialization();
+    if (!PREACTION(this)) {
+        size_t maxfp = 0;
+        size_t fp = 0;
+        size_t used = 0;
+        check_malloc_state();
+        if (is_initialized()) {
+            msegmentptr s = &_seg;
+            maxfp = _max_footprint;
+            fp = _footprint;
+            used = fp - (_topsize + TOP_FOOT_SIZE);
+
+            while (s != 0) {
+                mchunkptr q = align_as_chunk(s->_base);
+                while (segment_holds(s, q) &&
+                       q != _top && q->_head != FENCEPOST_HEAD) {
+                    if (!q->is_inuse())
+                        used -= q->chunksize();
+                    q = (mchunkptr)q->next_chunk();
+                }
+                s = s->_next;
+            }
+        }
+        POSTACTION(this); // drop lock
+        fprintf(stderr, "max system bytes = %10lu\n", (unsigned long)(maxfp));
+        fprintf(stderr, "system bytes     = %10lu\n", (unsigned long)(fp));
+        fprintf(stderr, "in use bytes     = %10lu\n", (unsigned long)(used));
+    }
+}
+#endif // NO_MALLOC_STATS
+
+/* ----------------------- Operations on smallbins ----------------------- */
+
+/*
+  Various forms of linking and unlinking are defined as macros.  Even
+  the ones for trees, which are very long but have very short typical
+  paths.  This is ugly but reduces reliance on inlining support of
+  compilers.
+*/
+
+// Link a free chunk into a smallbin 
+void malloc_state::insert_small_chunk(mchunkptr p, size_t s) {
+    bindex_t I  = small_index(s);
+    mchunkptr B = smallbin_at(I);
+    mchunkptr F = B;
+    assert(s >= MIN_CHUNK_SIZE);
+    if (!smallmap_is_marked(I))
+        mark_smallmap(I);
+    else if (rtcheck(ok_address(B->_fd)))
+        F = B->_fd;
+    else {
+        CORRUPTION_ERROR_ACTION(this);
+    }
+    B->_fd = p;
+    F->_bk = p;
+    p->_fd = F;
+    p->_bk = B;
+}
+
+// Unlink a chunk from a smallbin 
+void malloc_state::unlink_small_chunk(mchunkptr p, size_t s) {
+    mchunkptr F = p->_fd;
+    mchunkptr B = p->_bk;
+    bindex_t I = small_index(s);
+    assert(p != B);
+    assert(p != F);
+    assert(p->chunksize() == small_index2size(I));
+    if (rtcheck(F == smallbin_at(I) || (ok_address(F) && F->_bk == p))) { 
+        if (B == F) {
+            clear_smallmap(I);
+        }
+        else if (rtcheck(B == smallbin_at(I) ||
+                         (ok_address(B) && B->_fd == p))) {
+            F->_bk = B;
+            B->_fd = F;
+        }
+        else {
+            CORRUPTION_ERROR_ACTION(this);
+        }
+    }
+    else {
+        CORRUPTION_ERROR_ACTION(this);
+    }
+}
+
+// Unlink the first chunk from a smallbin
+void malloc_state::unlink_first_small_chunk(mchunkptr B, mchunkptr p, bindex_t I) {
+    mchunkptr F = p->_fd;
+    assert(p != B);
+    assert(p != F);
+    assert(p->chunksize() == small_index2size(I));
+    if (B == F) {
+        clear_smallmap(I);
+    }
+    else if (rtcheck(ok_address(F) && F->_bk == p)) {
+        F->_bk = B;
+        B->_fd = F;
+    }
+    else {
+        CORRUPTION_ERROR_ACTION(this);
+    }
+}
+
+// Replace dv node, binning the old one
+// Used only when dvsize known to be small
+void malloc_state::replace_dv(mchunkptr p, size_t s) {
+    size_t DVS = _dvsize;
+    assert(is_small(DVS));
+    if (DVS != 0) {
+        mchunkptr DV = _dv;
+        insert_small_chunk(DV, DVS);
+    }
+    _dvsize = s;
+    _dv = p;
+}
+
+/* ------------------------- Operations on trees ------------------------- */
+
+// Insert chunk into tree
+void malloc_state::insert_large_chunk(tchunkptr X, size_t s) {
+    tbinptr* H;
+    bindex_t I = compute_tree_index(s);
+    H = treebin_at(I);
+    X->_index = I;
+    X->_child[0] = X->_child[1] = 0;
+    if (!treemap_is_marked(I)) {
+        mark_treemap(I);
+        *H = X;
+        X->_parent = (tchunkptr)H;
+        X->_fd = X->_bk = X;
+    }
+    else {
+        tchunkptr T = *H;
+        size_t K = s << leftshift_for_tree_index(I);
+        for (;;) {
+            if (T->chunksize() != s) {
+                tchunkptr* C = &(T->_child[(K >> (SIZE_T_BITSIZE - 1)) & 1]);
+                K <<= 1;
+                if (*C != 0)
+                    T = *C;
+                else if (rtcheck(ok_address(C))) {
+                    *C = X;
+                    X->_parent = T;
+                    X->_fd = X->_bk = X;
+                    break;
+                }
+                else {
+                    CORRUPTION_ERROR_ACTION(this);
+                    break;
+                }
+            }
+            else {
+                tchunkptr F = T->_fd;
+                if (rtcheck(ok_address(T) && ok_address(F))) {
+                    T->_fd = F->_bk = X;
+                    X->_fd = F;
+                    X->_bk = T;
+                    X->_parent = 0;
+                    break;
+                }
+                else {
+                    CORRUPTION_ERROR_ACTION(this);
+                    break;
+                }
+            }
+        }
+    }
+}
+
+/*
+  Unlink steps:
+
+  1. If x is a chained node, unlink it from its same-sized fd/bk links
+     and choose its bk node as its replacement.
+  2. If x was the last node of its size, but not a leaf node, it must
+     be replaced with a leaf node (not merely one with an open left or
+     right), to make sure that lefts and rights of descendents
+     correspond properly to bit masks.  We use the rightmost descendent
+     of x.  We could use any other leaf, but this is easy to locate and
+     tends to counteract removal of leftmosts elsewhere, and so keeps
+     paths shorter than minimally guaranteed.  This doesn't loop much
+     because on average a node in a tree is near the bottom.
+  3. If x is the base of a chain (i.e., has parent links) relink
+     x's parent and children to x's replacement (or null if none).
+*/
+
+void malloc_state::unlink_large_chunk(tchunkptr X) {
+    tchunkptr XP = X->_parent;
+    tchunkptr R;
+    if (X->_bk != X) {
+        tchunkptr F = X->_fd;
+        R = X->_bk;
+        if (rtcheck(ok_address(F) && F->_bk == X && R->_fd == X)) {
+            F->_bk = R;
+            R->_fd = F;
+        }
+        else {
+            CORRUPTION_ERROR_ACTION(this);
+        }
+    }
+    else {
+        tchunkptr* RP;
+        if (((R = *(RP = &(X->_child[1]))) != 0) ||
+            ((R = *(RP = &(X->_child[0]))) != 0)) {
+            tchunkptr* CP;
+            while ((*(CP = &(R->_child[1])) != 0) ||
+                   (*(CP = &(R->_child[0])) != 0)) {
+                R = *(RP = CP);
+            }
+            if (rtcheck(ok_address(RP)))
+                *RP = 0;
+            else {
+                CORRUPTION_ERROR_ACTION(this);
+            }
+        }
+    }
+    if (XP != 0) {
+        tbinptr* H = treebin_at(X->_index);
+        if (X == *H) {
+            if ((*H = R) == 0) 
+                clear_treemap(X->_index);
+        }
+        else if (rtcheck(ok_address(XP))) {
+            if (XP->_child[0] == X) 
+                XP->_child[0] = R;
+            else 
+                XP->_child[1] = R;
+        }
+        else
+            CORRUPTION_ERROR_ACTION(this);
+        if (R != 0) {
+            if (rtcheck(ok_address(R))) {
+                tchunkptr C0, C1;
+                R->_parent = XP;
+                if ((C0 = X->_child[0]) != 0) {
+                    if (rtcheck(ok_address(C0))) {
+                        R->_child[0] = C0;
+                        C0->_parent = R;
+                    }
+                    else
+                        CORRUPTION_ERROR_ACTION(this);
+                }
+                if ((C1 = X->_child[1]) != 0) {
+                    if (rtcheck(ok_address(C1))) {
+                        R->_child[1] = C1;
+                        C1->_parent = R;
+                    }
+                    else
+                        CORRUPTION_ERROR_ACTION(this);
+                }
+            }
+            else
+                CORRUPTION_ERROR_ACTION(this);
+        }
+    }
+}
+
+// Relays to large vs small bin operations
+
+void malloc_state::insert_chunk(mchunkptr p, size_t s)
+{
+    if (is_small(s)) 
+        insert_small_chunk(p, s);
+    else 
+    { 
+        tchunkptr tp = (tchunkptr)(p); 
+        insert_large_chunk(tp, s); 
+    }
+}
+
+void malloc_state::unlink_chunk(mchunkptr p, size_t s)
+{
+    if (is_small(s)) 
+        unlink_small_chunk(p, s);
+    else 
+    {
+        tchunkptr tp = (tchunkptr)(p); 
+        unlink_large_chunk(tp); 
+    }
+}
+
+
+// Relays to internal calls to malloc/free from realloc, memalign etc
+
+#if !ONLY_MSPACES && MSPACES
+    void *malloc_state::internal_malloc(size_t b) {
+        return ((this == gm)? dlmalloc(b) : mspace_malloc(this, b)); 
+    }
+    void malloc_state::internal_free(void *mem) {
+        if (this == gm) dlfree(mem); else mspace_free(this,mem);
+    }
+#endif
+
+/* -----------------------  Direct-mmapping chunks ----------------------- */
+
+/*
+  Directly mmapped chunks are set up with an offset to the start of
+  the mmapped region stored in the prev_foot field of the chunk. This
+  allows reconstruction of the required argument to MUNMAP when freed,
+  and also allows adjustment of the returned chunk to meet alignment
+  requirements (especially in memalign).
+*/
+
+// Malloc using mmap
+void* malloc_state::mmap_alloc(size_t nb) {
+    size_t mmsize = mmap_align(nb + 6 * sizeof(size_t) + CHUNK_ALIGN_MASK);
+    if (_footprint_limit != 0) {
+        size_t fp = _footprint + mmsize;
+        if (fp <= _footprint || fp > _footprint_limit)
+            return 0;
+    }
+    if (mmsize > nb) {
+        // Check for wrap around 0
+        char* mm = (char*)(CALL_DIRECT_MMAP(mmsize));
+        if (mm != CMFAIL) {
+            size_t offset = align_offset(chunk2mem(mm));
+            size_t psize = mmsize - offset - MMAP_FOOT_PAD;
+            mchunkptr p = (mchunkptr)(mm + offset);
+            p->_prev_foot = offset;
+            p->_head = psize;
+            mark_inuse_foot(p, psize);
+            p->chunk_plus_offset(psize)->_head = FENCEPOST_HEAD;
+            p->chunk_plus_offset(psize+sizeof(size_t))->_head = 0;
+
+            if (_least_addr == 0 || mm < _least_addr)
+                _least_addr = mm;
+            if ((_footprint += mmsize) > _max_footprint)
+                _max_footprint = _footprint;
+            assert(is_aligned(chunk2mem(p)));
+            check_mmapped_chunk(p);
+            return chunk2mem(p);
+        }
+    }
+    return 0;
+}
+
+// Realloc using mmap
+mchunkptr malloc_state::mmap_resize(mchunkptr oldp, size_t nb, int flags) {
+    size_t oldsize = oldp->chunksize();
+    (void)flags;      // placate people compiling -Wunused
+    if (is_small(nb)) // Can't shrink mmap regions below small size
+        return 0;
+
+    // Keep old chunk if big enough but not too big
+    if (oldsize >= nb + sizeof(size_t) &&
+        (oldsize - nb) <= (mparams._granularity << 1))
+        return oldp;
+    else {
+        size_t offset = oldp->_prev_foot;
+        size_t oldmmsize = oldsize + offset + MMAP_FOOT_PAD;
+        size_t newmmsize = mmap_align(nb + 6 * sizeof(size_t) + CHUNK_ALIGN_MASK);
+        char* cp = (char*)CALL_MREMAP((char*)oldp - offset,
+                                      oldmmsize, newmmsize, flags);
+        if (cp != CMFAIL) {
+            mchunkptr newp = (mchunkptr)(cp + offset);
+            size_t psize = newmmsize - offset - MMAP_FOOT_PAD;
+            newp->_head = psize;
+            mark_inuse_foot(newp, psize);
+            newp->chunk_plus_offset(psize)->_head = FENCEPOST_HEAD;
+            newp->chunk_plus_offset(psize+sizeof(size_t))->_head = 0;
+
+            if (cp < _least_addr)
+                _least_addr = cp;
+            if ((_footprint += newmmsize - oldmmsize) > _max_footprint)
+                _max_footprint = _footprint;
+            check_mmapped_chunk(newp);
+            return newp;
+        }
+    }
+    return 0;
+}
+
+
+/* -------------------------- mspace management -------------------------- */
+
+// Initialize top chunk and its size
+void malloc_state::init_top(mchunkptr p, size_t psize) {
+    // Ensure alignment
+    size_t offset = align_offset(chunk2mem(p));
+    p = (mchunkptr)((char*)p + offset);
+    psize -= offset;
+
+    _top = p;
+    _topsize = psize;
+    p->_head = psize | PINUSE_BIT;
+    // set size of fake trailing chunk holding overhead space only once
+    p->chunk_plus_offset(psize)->_head = TOP_FOOT_SIZE;
+    _trim_check = mparams._trim_threshold; // reset on each update
+}
+
+// Initialize bins for a new mstate that is otherwise zeroed out
+void malloc_state::init_bins() {
+    // Establish circular links for smallbins
+    bindex_t i;
+    for (i = 0; i < NSMALLBINS; ++i) {
+        sbinptr bin = smallbin_at(i);
+        bin->_fd = bin->_bk = bin;
+    }
+}
+
+#if PROCEED_ON_ERROR
+
+// default corruption action
+void malloc_state::reset_on_error() {
+    int i;
+    ++malloc_corruption_error_count;
+    // Reinitialize fields to forget about all memory
+    _smallmap = _treemap = 0;
+    _dvsize = _topsize = 0;
+    _seg._base = 0;
+    _seg._size = 0;
+    _seg._next = 0;
+    _top = _dv = 0;
+    for (i = 0; i < NTREEBINS; ++i)
+        *treebin_at(i) = 0;
+    init_bins();
+}
+#endif // PROCEED_ON_ERROR
+
+/* Allocate chunk and prepend remainder with chunk in successor base. */
+void* malloc_state::prepend_alloc(char* newbase, char* oldbase, size_t nb) {
+    mchunkptr p = align_as_chunk(newbase);
+    mchunkptr oldfirst = align_as_chunk(oldbase);
+    size_t psize = (char*)oldfirst - (char*)p;
+    mchunkptr q = (mchunkptr)p->chunk_plus_offset(nb);
+    size_t qsize = psize - nb;
+    set_size_and_pinuse_of_inuse_chunk(p, nb);
+
+    assert((char*)oldfirst > (char*)q);
+    assert(oldfirst->pinuse());
+    assert(qsize >= MIN_CHUNK_SIZE);
+
+    // consolidate remainder with first chunk of old base
+    if (oldfirst == _top) {
+        size_t tsize = _topsize += qsize;
+        _top = q;
+        q->_head = tsize | PINUSE_BIT;
+        check_top_chunk(q);
+    }
+    else if (oldfirst == _dv) {
+        size_t dsize = _dvsize += qsize;
+        _dv = q;
+        q->set_size_and_pinuse_of_free_chunk(dsize);
+    }
+    else {
+        if (!oldfirst->is_inuse()) {
+            size_t nsize = oldfirst->chunksize();
+            unlink_chunk(oldfirst, nsize);
+            oldfirst = (mchunkptr)oldfirst->chunk_plus_offset(nsize);
+            qsize += nsize;
+        }
+        q->set_free_with_pinuse(qsize, oldfirst);
+        insert_chunk(q, qsize);
+        check_free_chunk(q);
+    }
+
+    check_malloced_chunk(chunk2mem(p), nb);
+    return chunk2mem(p);
+}
+
+// Add a segment to hold a new noncontiguous region
+void malloc_state::add_segment(char* tbase, size_t tsize, flag_t mmapped) {
+    // Determine locations and sizes of segment, fenceposts, old top
+    char* old_top = (char*)_top;
+    msegmentptr oldsp = segment_holding(old_top);
+    char* old_end = oldsp->_base + oldsp->_size;
+    size_t ssize = pad_request(sizeof(struct malloc_segment));
+    char* rawsp = old_end - (ssize + 4 * sizeof(size_t) + CHUNK_ALIGN_MASK);
+    size_t offset = align_offset(chunk2mem(rawsp));
+    char* asp = rawsp + offset;
+    char* csp = (asp < (old_top + MIN_CHUNK_SIZE))? old_top : asp;
+    mchunkptr sp = (mchunkptr)csp;
+    msegmentptr ss = (msegmentptr)(chunk2mem(sp));
+    mchunkptr tnext = (mchunkptr)sp->chunk_plus_offset(ssize);
+    mchunkptr p = tnext;
+    int nfences = 0;
+
+    // reset top to new space
+    init_top((mchunkptr)tbase, tsize - TOP_FOOT_SIZE);
+
+    // Set up segment record
+    assert(is_aligned(ss));
+    set_size_and_pinuse_of_inuse_chunk(sp, ssize);
+    *ss = _seg; // Push current record
+    _seg._base = tbase;
+    _seg._size = tsize;
+    _seg._sflags = mmapped;
+    _seg._next = ss;
+
+    // Insert trailing fenceposts
+    for (;;) {
+        mchunkptr nextp = (mchunkptr)p->chunk_plus_offset(sizeof(size_t));
+        p->_head = FENCEPOST_HEAD;
+        ++nfences;
+        if ((char*)(&(nextp->_head)) < old_end)
+            p = nextp;
+        else
+            break;
+    }
+    assert(nfences >= 2);
+
+    // Insert the rest of old top into a bin as an ordinary free chunk
+    if (csp != old_top) {
+        mchunkptr q = (mchunkptr)old_top;
+        size_t psize = csp - old_top;
+        mchunkptr tn = (mchunkptr)q->chunk_plus_offset(psize);
+        q->set_free_with_pinuse(psize, tn);
+        insert_chunk(q, psize);
+    }
+
+    check_top_chunk(_top);
+}
+
+/* -------------------------- System allocation -------------------------- */
+
+// Get memory from system using MORECORE or MMAP
+void* malloc_state::sys_alloc(size_t nb) {
+    char* tbase = CMFAIL;
+    size_t tsize = 0;
+    flag_t mmap_flag = 0;
+    size_t asize; // allocation size
+
+    mparams.ensure_initialization();
+
+    // Directly map large chunks, but only if already initialized
+    if (use_mmap() && nb >= mparams._mmap_threshold && _topsize != 0) {
+        void* mem = mmap_alloc(nb);
+        if (mem != 0)
+            return mem;
+    }
+
+    asize = mparams.granularity_align(nb + SYS_ALLOC_PADDING);
+    if (asize <= nb)
+        return 0; // wraparound
+    if (_footprint_limit != 0) {
+        size_t fp = _footprint + asize;
+        if (fp <= _footprint || fp > _footprint_limit)
+            return 0;
+    }
+
+    /*
+      Try getting memory in any of three ways (in most-preferred to
+      least-preferred order):
+      1. A call to MORECORE that can normally contiguously extend memory.
+      (disabled if not MORECORE_CONTIGUOUS or not HAVE_MORECORE or
+      or main space is mmapped or a previous contiguous call failed)
+      2. A call to MMAP new space (disabled if not HAVE_MMAP).
+      Note that under the default settings, if MORECORE is unable to
+      fulfill a request, and HAVE_MMAP is true, then mmap is
+      used as a noncontiguous system allocator. This is a useful backup
+      strategy for systems with holes in address spaces -- in this case
+      sbrk cannot contiguously expand the heap, but mmap may be able to
+      find space.
+      3. A call to MORECORE that cannot usually contiguously extend memory.
+      (disabled if not HAVE_MORECORE)
+
+      In all cases, we need to request enough bytes from system to ensure
+      we can malloc nb bytes upon success, so pad with enough space for
+      top_foot, plus alignment-pad to make sure we don't lose bytes if
+      not on boundary, and round this up to a granularity unit.
+    */
+
+    if (MORECORE_CONTIGUOUS && !use_noncontiguous()) {
+        char* br = CMFAIL;
+        size_t ssize = asize; // sbrk call size
+        msegmentptr ss = (_top == 0)? 0 : segment_holding((char*)_top);
+        ACQUIRE_MALLOC_GLOBAL_LOCK();
+
+        if (ss == 0) {
+            // First time through or recovery
+            char* base = (char*)CALL_MORECORE(0);
+            if (base != CMFAIL) {
+                size_t fp;
+                // Adjust to end on a page boundary
+                if (!mparams.is_page_aligned(base))
+                    ssize += (mparams.page_align((size_t)base) - (size_t)base);
+                fp = _footprint + ssize; // recheck limits
+                if (ssize > nb && ssize < HALF_MAX_SIZE_T &&
+                    (_footprint_limit == 0 ||
+                     (fp > _footprint && fp <= _footprint_limit)) &&
+                    (br = (char*)(CALL_MORECORE(ssize))) == base) {
+                    tbase = base;
+                    tsize = ssize;
+                }
+            }
+        }
+        else {
+            // Subtract out existing available top space from MORECORE request.
+            ssize = mparams.granularity_align(nb - _topsize + SYS_ALLOC_PADDING);
+            // Use mem here only if it did continuously extend old space
+            if (ssize < HALF_MAX_SIZE_T &&
+                (br = (char*)(CALL_MORECORE(ssize))) == ss->_base+ss->_size) {
+                tbase = br;
+                tsize = ssize;
+            }
+        }
+
+        if (tbase == CMFAIL) {
+            // Cope with partial failure
+            if (br != CMFAIL) {
+                // Try to use/extend the space we did get
+                if (ssize < HALF_MAX_SIZE_T &&
+                    ssize < nb + SYS_ALLOC_PADDING) {
+                    size_t esize = mparams.granularity_align(nb + SYS_ALLOC_PADDING - ssize);
+                    if (esize < HALF_MAX_SIZE_T) {
+                        char* end = (char*)CALL_MORECORE(esize);
+                        if (end != CMFAIL)
+                            ssize += esize;
+                        else {
+                            // Can't use; try to release
+                            (void) CALL_MORECORE(-ssize);
+                            br = CMFAIL;
+                        }
+                    }
+                }
+            }
+            if (br != CMFAIL) {
+                // Use the space we did get
+                tbase = br;
+                tsize = ssize;
+            }
+            else
+                disable_contiguous(); // Don't try contiguous path in the future
+        }
+
+        RELEASE_MALLOC_GLOBAL_LOCK();
+    }
+
+    if (HAVE_MMAP && tbase == CMFAIL) {
+        // Try MMAP
+        char* mp = (char*)(CALL_MMAP(asize));
+        if (mp != CMFAIL) {
+            tbase = mp;
+            tsize = asize;
+            mmap_flag = USE_MMAP_BIT;
+        }
+    }
+
+    if (HAVE_MORECORE && tbase == CMFAIL) {
+        // Try noncontiguous MORECORE
+        if (asize < HALF_MAX_SIZE_T) {
+            char* br = CMFAIL;
+            char* end = CMFAIL;
+            ACQUIRE_MALLOC_GLOBAL_LOCK();
+            br = (char*)(CALL_MORECORE(asize));
+            end = (char*)(CALL_MORECORE(0));
+            RELEASE_MALLOC_GLOBAL_LOCK();
+            if (br != CMFAIL && end != CMFAIL && br < end) {
+                size_t ssize = end - br;
+                if (ssize > nb + TOP_FOOT_SIZE) {
+                    tbase = br;
+                    tsize = ssize;
+                }
+            }
+        }
+    }
+
+    if (tbase != CMFAIL) {
+
+        if ((_footprint += tsize) > _max_footprint)
+            _max_footprint = _footprint;
+
+        if (!is_initialized()) {
+            // first-time initialization
+            if (_least_addr == 0 || tbase < _least_addr)
+                _least_addr = tbase;
+            _seg._base = tbase;
+            _seg._size = tsize;
+            _seg._sflags = mmap_flag;
+            _magic = mparams._magic;
+            _release_checks = MAX_RELEASE_CHECK_RATE;
+            init_bins();
+#if !ONLY_MSPACES
+            if (is_global(this))
+                init_top((mchunkptr)tbase, tsize - TOP_FOOT_SIZE);
+            else
+#endif
+            {
+                // Offset top by embedded malloc_state
+                mchunkptr mn = (mchunkptr)mem2chunk(this)->next_chunk();
+                init_top(mn, (size_t)((tbase + tsize) - (char*)mn) -TOP_FOOT_SIZE);
+            }
+        }
+
+        else {
+            // Try to merge with an existing segment
+            msegmentptr sp = &_seg;
+            // Only consider most recent segment if traversal suppressed
+            while (sp != 0 && tbase != sp->_base + sp->_size)
+                sp = (NO_SEGMENT_TRAVERSAL) ? 0 : sp->_next;
+            if (sp != 0 &&
+                !sp->is_extern_segment() &&
+                (sp->_sflags & USE_MMAP_BIT) == mmap_flag &&
+                segment_holds(sp, _top)) {
+                // append
+                sp->_size += tsize;
+                init_top(_top, _topsize + tsize);
+            }
+            else {
+                if (tbase < _least_addr)
+                    _least_addr = tbase;
+                sp = &_seg;
+                while (sp != 0 && sp->_base != tbase + tsize)
+                    sp = (NO_SEGMENT_TRAVERSAL) ? 0 : sp->_next;
+                if (sp != 0 &&
+                    !sp->is_extern_segment() &&
+                    (sp->_sflags & USE_MMAP_BIT) == mmap_flag) {
+                    char* oldbase = sp->_base;
+                    sp->_base = tbase;
+                    sp->_size += tsize;
+                    return prepend_alloc(tbase, oldbase, nb);
+                }
+                else
+                    add_segment(tbase, tsize, mmap_flag);
+            }
+        }
+
+        if (nb < _topsize) {
+            // Allocate from new or extended top space
+            size_t rsize = _topsize -= nb;
+            mchunkptr p = _top;
+            mchunkptr r = _top = (mchunkptr)p->chunk_plus_offset(nb);
+            r->_head = rsize | PINUSE_BIT;
+            set_size_and_pinuse_of_inuse_chunk(p, nb);
+            check_top_chunk(_top);
+            check_malloced_chunk(chunk2mem(p), nb);
+            return chunk2mem(p);
+        }
+    }
+
+    MALLOC_FAILURE_ACTION;
+    return 0;
+}
+
+/* -----------------------  system deallocation -------------------------- */
+
+// Unmap and unlink any mmapped segments that don't contain used chunks
+size_t malloc_state::release_unused_segments() {
+    size_t released = 0;
+    int nsegs = 0;
+    msegmentptr pred = &_seg;
+    msegmentptr sp = pred->_next;
+    while (sp != 0) {
+        char* base = sp->_base;
+        size_t size = sp->_size;
+        msegmentptr next = sp->_next;
+        ++nsegs;
+        if (sp->is_mmapped_segment() && !sp->is_extern_segment()) {
+            mchunkptr p = align_as_chunk(base);
+            size_t psize = p->chunksize();
+            // Can unmap if first chunk holds entire segment and not pinned
+            if (!p->is_inuse() && (char*)p + psize >= base + size - TOP_FOOT_SIZE) {
+                tchunkptr tp = (tchunkptr)p;
+                assert(segment_holds(sp, p));
+                if (p == _dv) {
+                    _dv = 0;
+                    _dvsize = 0;
+                }
+                else {
+                    unlink_large_chunk(tp);
+                }
+                if (CALL_MUNMAP(base, size) == 0) {
+                    released += size;
+                    _footprint -= size;
+                    // unlink obsoleted record
+                    sp = pred;
+                    sp->_next = next;
+                }
+                else {
+                    // back out if cannot unmap
+                    insert_large_chunk(tp, psize);
+                }
+            }
+        }
+        if (NO_SEGMENT_TRAVERSAL) // scan only first segment
+            break;
+        pred = sp;
+        sp = next;
+    }
+    // Reset check counter
+    _release_checks = (((size_t) nsegs > (size_t) MAX_RELEASE_CHECK_RATE)?
+                         (size_t) nsegs : (size_t) MAX_RELEASE_CHECK_RATE);
+    return released;
+}
+
+int malloc_state::sys_trim(size_t pad) {
+    size_t released = 0;
+    mparams.ensure_initialization();
+    if (pad < MAX_REQUEST && is_initialized()) {
+        pad += TOP_FOOT_SIZE; // ensure enough room for segment overhead
+
+        if (_topsize > pad) {
+            // Shrink top space in _granularity - size units, keeping at least one
+            size_t unit = mparams._granularity;
+            size_t extra = ((_topsize - pad + (unit - 1)) / unit -
+                            1) * unit;
+            msegmentptr sp = segment_holding((char*)_top);
+
+            if (!sp->is_extern_segment()) {
+                if (sp->is_mmapped_segment()) {
+                    if (HAVE_MMAP &&
+                        sp->_size >= extra &&
+                        !has_segment_link(sp)) {
+                        // can't shrink if pinned
+                        size_t newsize = sp->_size - extra;
+                        (void)newsize; // placate people compiling -Wunused-variable
+                        // Prefer mremap, fall back to munmap
+                        if ((CALL_MREMAP(sp->_base, sp->_size, newsize, 0) != MFAIL) ||
+                            (CALL_MUNMAP(sp->_base + newsize, extra) == 0)) {
+                            released = extra;
+                        }
+                    }
+                }
+                else if (HAVE_MORECORE) {
+                    if (extra >= HALF_MAX_SIZE_T) // Avoid wrapping negative
+                        extra = (HALF_MAX_SIZE_T) + 1 - unit;
+                    ACQUIRE_MALLOC_GLOBAL_LOCK();
+                    {
+                        // Make sure end of memory is where we last set it.
+                        char* old_br = (char*)(CALL_MORECORE(0));
+                        if (old_br == sp->_base + sp->_size) {
+                            char* rel_br = (char*)(CALL_MORECORE(-extra));
+                            char* new_br = (char*)(CALL_MORECORE(0));
+                            if (rel_br != CMFAIL && new_br < old_br)
+                                released = old_br - new_br;
+                        }
+                    }
+                    RELEASE_MALLOC_GLOBAL_LOCK();
+                }
+            }
+
+            if (released != 0) {
+                sp->_size -= released;
+                _footprint -= released;
+                init_top(_top, _topsize - released);
+                check_top_chunk(_top);
+            }
+        }
+
+        // Unmap any unused mmapped segments
+        if (HAVE_MMAP)
+            released += release_unused_segments();
+
+        // On failure, disable autotrim to avoid repeated failed future calls
+        if (released == 0 && _topsize > _trim_check)
+            _trim_check = MAX_SIZE_T;
+    }
+
+    return (released != 0)? 1 : 0;
+}
+
+/* Consolidate and bin a chunk. Differs from exported versions
+   of free mainly in that the chunk need not be marked as inuse.
+*/
+void malloc_state::dispose_chunk(mchunkptr p, size_t psize) {
+    mchunkptr next = (mchunkptr)p->chunk_plus_offset(psize);
+    if (!p->pinuse()) {
+        mchunkptr prev;
+        size_t prevsize = p->_prev_foot;
+        if (p->is_mmapped()) {
+            psize += prevsize + MMAP_FOOT_PAD;
+            if (CALL_MUNMAP((char*)p - prevsize, psize) == 0)
+                _footprint -= psize;
+            return;
+        }
+        prev = (mchunkptr)p->chunk_minus_offset(prevsize);
+        psize += prevsize;
+        p = prev;
+        if (rtcheck(ok_address(prev))) {
+            // consolidate backward
+            if (p != _dv) {
+                unlink_chunk(p, prevsize);
+            }
+            else if ((next->_head & INUSE_BITS) == INUSE_BITS) {
+                _dvsize = psize;
+                p->set_free_with_pinuse(psize, next);
+                return;
+            }
+        }
+        else {
+            CORRUPTION_ERROR_ACTION(this);
+            return;
+        }
+    }
+    if (rtcheck(ok_address(next))) {
+        if (!next->cinuse()) {
+            // consolidate forward
+            if (next == _top) {
+                size_t tsize = _topsize += psize;
+                _top = p;
+                p->_head = tsize | PINUSE_BIT;
+                if (p == _dv) {
+                    _dv = 0;
+                    _dvsize = 0;
+                }
+                return;
+            }
+            else if (next == _dv) {
+                size_t dsize = _dvsize += psize;
+                _dv = p;
+                p->set_size_and_pinuse_of_free_chunk(dsize);
+                return;
+            }
+            else {
+                size_t nsize = next->chunksize();
+                psize += nsize;
+                unlink_chunk(next, nsize);
+                p->set_size_and_pinuse_of_free_chunk(psize);
+                if (p == _dv) {
+                    _dvsize = psize;
+                    return;
+                }
+            }
+        }
+        else {
+            p->set_free_with_pinuse(psize, next);
+        }
+        insert_chunk(p, psize);
+    }
+    else {
+        CORRUPTION_ERROR_ACTION(this);
+    }
+}
+
+/* ---------------------------- malloc --------------------------- */
+
+// allocate a large request from the best fitting chunk in a treebin
+void* malloc_state::tmalloc_large(size_t nb) {
+    tchunkptr v = 0;
+    size_t rsize = -nb; // Unsigned negation
+    tchunkptr t;
+    bindex_t idx = compute_tree_index(nb);
+    if ((t = *treebin_at(idx)) != 0) {
+        // Traverse tree for this bin looking for node with size == nb
+        size_t sizebits = nb << leftshift_for_tree_index(idx);
+        tchunkptr rst = 0;  // The deepest untaken right subtree
+        for (;;) {
+            tchunkptr rt;
+            size_t trem = t->chunksize() - nb;
+            if (trem < rsize) {
+                v = t;
+                if ((rsize = trem) == 0)
+                    break;
+            }
+            rt = t->_child[1];
+            t = t->_child[(sizebits >> (SIZE_T_BITSIZE - 1)) & 1];
+            if (rt != 0 && rt != t)
+                rst = rt;
+            if (t == 0) {
+                t = rst; // set t to least subtree holding sizes > nb
+                break;
+            }
+            sizebits <<= 1;
+        }
+    }
+    if (t == 0 && v == 0) {
+        // set t to root of next non-empty treebin
+        binmap_t leftbits = left_bits(idx2bit(idx)) & _treemap;
+        if (leftbits != 0) {
+            binmap_t leastbit = least_bit(leftbits);
+            bindex_t i = compute_bit2idx(leastbit);
+            t = *treebin_at(i);
+        }
+    }
+
+    while (t != 0) {
+        // find smallest of tree or subtree
+        size_t trem = t->chunksize() - nb;
+        if (trem < rsize) {
+            rsize = trem;
+            v = t;
+        }
+        t = t->leftmost_child();
+    }
+
+    //  If dv is a better fit, return 0 so malloc will use it
+    if (v != 0 && rsize < (size_t)(_dvsize - nb)) {
+        if (rtcheck(ok_address(v))) {
+            // split
+            mchunkptr r = (mchunkptr)v->chunk_plus_offset(nb);
+            assert(v->chunksize() == rsize + nb);
+            if (rtcheck(ok_next(v, r))) {
+                unlink_large_chunk(v);
+                if (rsize < MIN_CHUNK_SIZE)
+                    set_inuse_and_pinuse(v, (rsize + nb));
+                else {
+                    set_size_and_pinuse_of_inuse_chunk(v, nb);
+                    r->set_size_and_pinuse_of_free_chunk(rsize);
+                    insert_chunk(r, rsize);
+                }
+                return chunk2mem(v);
+            }
+        }
+        CORRUPTION_ERROR_ACTION(this);
+    }
+    return 0;
+}
+
+// allocate a small request from the best fitting chunk in a treebin
+void* malloc_state::tmalloc_small(size_t nb) {
+    tchunkptr t, v;
+    size_t rsize;
+    binmap_t leastbit = least_bit(_treemap);
+    bindex_t i = compute_bit2idx(leastbit);
+    v = t = *treebin_at(i);
+    rsize = t->chunksize() - nb;
+
+    while ((t = t->leftmost_child()) != 0) {
+        size_t trem = t->chunksize() - nb;
+        if (trem < rsize) {
+            rsize = trem;
+            v = t;
+        }
+    }
+
+    if (rtcheck(ok_address(v))) {
+        mchunkptr r = (mchunkptr)v->chunk_plus_offset(nb);
+        assert(v->chunksize() == rsize + nb);
+        if (rtcheck(ok_next(v, r))) {
+            unlink_large_chunk(v);
+            if (rsize < MIN_CHUNK_SIZE)
+                set_inuse_and_pinuse(v, (rsize + nb));
+            else {
+                set_size_and_pinuse_of_inuse_chunk(v, nb);
+                r->set_size_and_pinuse_of_free_chunk(rsize);
+                replace_dv(r, rsize);
+            }
+            return chunk2mem(v);
+        }
+    }
+
+    CORRUPTION_ERROR_ACTION(this);
+    return 0;
+}
+
+#if !ONLY_MSPACES
+
+void* dlmalloc(size_t bytes) DLTHROW {
+    /*
+      Basic algorithm:
+      If a small request (< 256 bytes minus per-chunk overhead):
+      1. If one exists, use a remainderless chunk in associated smallbin.
+      (Remainderless means that there are too few excess bytes to
+      represent as a chunk.)
+      2. If it is big enough, use the dv chunk, which is normally the
+      chunk adjacent to the one used for the most recent small request.
+      3. If one exists, split the smallest available chunk in a bin,
+      saving remainder in dv.
+      4. If it is big enough, use the top chunk.
+      5. If available, get memory from system and use it
+      Otherwise, for a large request:
+      1. Find the smallest available binned chunk that fits, and use it
+      if it is better fitting than dv chunk, splitting if necessary.
+      2. If better fitting than any binned chunk, use the dv chunk.
+      3. If it is big enough, use the top chunk.
+      4. If request size >= mmap threshold, try to directly mmap this chunk.
+      5. If available, get memory from system and use it
+
+      The ugly goto's here ensure that postaction occurs along all paths.
+    */
+
+#if USE_LOCKS
+    mparams.ensure_initialization(); // initialize in sys_alloc if not using locks
+#endif
+    return gm->_malloc(bytes);
+}
+
+void dlfree(void* mem) DLTHROW {
+    /*
+      Consolidate freed chunks with preceeding or succeeding bordering
+      free chunks, if they exist, and then place in a bin.  Intermixed
+      with special cases for top, dv, mmapped chunks, and usage errors.
+    */
+
+    if (mem != 0) {
+        mchunkptr p  = mem2chunk(mem);
+#if FOOTERS
+        mstate fm = get_mstate_for(p);
+        if (!fm->ok_magic()) {
+            USAGE_ERROR_ACTION(fm, p);
+            return;
+        }
+        fm->_free(p);
+#else // FOOTERS
+        gm->_free(p);
+#endif // FOOTERS
+    }
+}
+
+void* dlcalloc(size_t n_elements, size_t elem_size) DLTHROW {
+    void* mem;
+    size_t req = 0;
+    if (n_elements != 0) {
+        req = n_elements * elem_size;
+        if (((n_elements | elem_size) & ~(size_t)0xffff) &&
+            (req / n_elements != elem_size))
+            req = MAX_SIZE_T; // force downstream failure on overflow
+    }
+    mem = dlmalloc(req);
+    if (mem != 0 && mem2chunk(mem)->calloc_must_clear())
+        memset(mem, 0, req);
+    return mem;
+}
+
+#endif // !ONLY_MSPACES
+
+/* ---------------------------- malloc --------------------------- */
+
+void* malloc_state::_malloc(size_t bytes) {
+    if (!PREACTION(this)) {
+        void* mem;
+        size_t nb;
+        if (bytes <= MAX_SMALL_REQUEST) {
+            bindex_t idx;
+            binmap_t smallbits;
+            nb = (bytes < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(bytes);
+            idx = small_index(nb);
+            smallbits = _smallmap >> idx;
+
+            if ((smallbits & 0x3U) != 0) {
+                // Remainderless fit to a smallbin.
+                mchunkptr b, p;
+                idx += ~smallbits & 1;       // Uses next bin if idx empty
+                b = smallbin_at(idx);
+                p = b->_fd;
+                assert(p->chunksize() == small_index2size(idx));
+                unlink_first_small_chunk(b, p, idx);
+                set_inuse_and_pinuse(p, small_index2size(idx));
+                mem = chunk2mem(p);
+                check_malloced_chunk(mem, nb);
+                goto postaction;
+            }
+
+            else if (nb > _dvsize) {
+                if (smallbits != 0) {
+                    // Use chunk in next nonempty smallbin
+                    mchunkptr b, p, r;
+                    size_t rsize;
+                    binmap_t leftbits = (smallbits << idx) & left_bits(malloc_state::idx2bit(idx));
+                    binmap_t leastbit = least_bit(leftbits);
+                    bindex_t i = compute_bit2idx(leastbit);
+                    b = smallbin_at(i);
+                    p = b->_fd;
+                    assert(p->chunksize() == small_index2size(i));
+                    unlink_first_small_chunk(b, p, i);
+                    rsize = small_index2size(i) - nb;
+                    // Fit here cannot be remainderless if 4byte sizes
+                    if (sizeof(size_t) != 4 && rsize < MIN_CHUNK_SIZE)
+                        set_inuse_and_pinuse(p, small_index2size(i));
+                    else {
+                        set_size_and_pinuse_of_inuse_chunk(p, nb);
+                        r = (mchunkptr)p->chunk_plus_offset(nb);
+                        r->set_size_and_pinuse_of_free_chunk(rsize);
+                        replace_dv(r, rsize);
+                    }
+                    mem = chunk2mem(p);
+                    check_malloced_chunk(mem, nb);
+                    goto postaction;
+                }
+
+                else if (_treemap != 0 && (mem = tmalloc_small(nb)) != 0) {
+                    check_malloced_chunk(mem, nb);
+                    goto postaction;
+                }
+            }
+        }
+        else if (bytes >= MAX_REQUEST)
+            nb = MAX_SIZE_T; // Too big to allocate. Force failure (in sys alloc)
+        else {
+            nb = pad_request(bytes);
+            if (_treemap != 0 && (mem = tmalloc_large(nb)) != 0) {
+                check_malloced_chunk(mem, nb);
+                goto postaction;
+            }
+        }
+
+        if (nb <= _dvsize) {
+            size_t rsize = _dvsize - nb;
+            mchunkptr p = _dv;
+            if (rsize >= MIN_CHUNK_SIZE) {
+                // split dv
+                mchunkptr r = _dv = (mchunkptr)p->chunk_plus_offset(nb);
+                _dvsize = rsize;
+                r->set_size_and_pinuse_of_free_chunk(rsize);
+                set_size_and_pinuse_of_inuse_chunk(p, nb);
+            }
+            else { // exhaust dv
+                size_t dvs = _dvsize;
+                _dvsize = 0;
+                _dv = 0;
+                set_inuse_and_pinuse(p, dvs);
+            }
+            mem = chunk2mem(p);
+            check_malloced_chunk(mem, nb);
+            goto postaction;
+        }
+
+        else if (nb < _topsize) {
+            // Split top
+            size_t rsize = _topsize -= nb;
+            mchunkptr p = _top;
+            mchunkptr r = _top = (mchunkptr)p->chunk_plus_offset(nb);
+            r->_head = rsize | PINUSE_BIT;
+            set_size_and_pinuse_of_inuse_chunk(p, nb);
+            mem = chunk2mem(p);
+            check_top_chunk(_top);
+            check_malloced_chunk(mem, nb);
+            goto postaction;
+        }
+
+        mem = sys_alloc(nb);
+
+    postaction:
+        POSTACTION(this);
+        return mem;
+    }
+
+    return 0;
+}
+
+/* ---------------------------- free --------------------------- */
+
+void malloc_state::_free(mchunkptr p) {
+    if (!PREACTION(this)) {
+        check_inuse_chunk(p);
+        if (rtcheck(ok_address(p) && ok_inuse(p))) {
+            size_t psize = p->chunksize();
+            mchunkptr next = (mchunkptr)p->chunk_plus_offset(psize);
+            if (!p->pinuse()) {
+                size_t prevsize = p->_prev_foot;
+                if (p->is_mmapped()) {
+                    psize += prevsize + MMAP_FOOT_PAD;
+                    if (CALL_MUNMAP((char*)p - prevsize, psize) == 0)
+                        _footprint -= psize;
+                    goto postaction;
+                }
+                else {
+                    mchunkptr prev = (mchunkptr)p->chunk_minus_offset(prevsize);
+                    psize += prevsize;
+                    p = prev;
+                    if (rtcheck(ok_address(prev))) {
+                        // consolidate backward
+                        if (p != _dv) {
+                            unlink_chunk(p, prevsize);
+                        }
+                        else if ((next->_head & INUSE_BITS) == INUSE_BITS) {
+                            _dvsize = psize;
+                            p->set_free_with_pinuse(psize, next);
+                            goto postaction;
+                        }
+                    }
+                    else
+                        goto erroraction;
+                }
+            }
+
+            if (rtcheck(ok_next(p, next) && ok_pinuse(next))) {
+                if (!next->cinuse()) {
+                    // consolidate forward
+                    if (next == _top) {
+                        size_t tsize = _topsize += psize;
+                        _top = p;
+                        p->_head = tsize | PINUSE_BIT;
+                        if (p == _dv) {
+                            _dv = 0;
+                            _dvsize = 0;
+                        }
+                        if (should_trim(tsize))
+                            sys_trim(0);
+                        goto postaction;
+                    }
+                    else if (next == _dv) {
+                        size_t dsize = _dvsize += psize;
+                        _dv = p;
+                        p->set_size_and_pinuse_of_free_chunk(dsize);
+                        goto postaction;
+                    }
+                    else {
+                        size_t nsize = next->chunksize();
+                        psize += nsize;
+                        unlink_chunk(next, nsize);
+                        p->set_size_and_pinuse_of_free_chunk(psize);
+                        if (p == _dv) {
+                            _dvsize = psize;
+                            goto postaction;
+                        }
+                    }
+                }
+                else
+                    p->set_free_with_pinuse(psize, next);
+
+                if (is_small(psize)) {
+                    insert_small_chunk(p, psize);
+                    check_free_chunk(p);
+                }
+                else {
+                    tchunkptr tp = (tchunkptr)p;
+                    insert_large_chunk(tp, psize);
+                    check_free_chunk(p);
+                    if (--_release_checks == 0)
+                        release_unused_segments();
+                }
+                goto postaction;
+            }
+        }
+    erroraction:
+        USAGE_ERROR_ACTION(this, p);
+    postaction:
+        POSTACTION(this);
+    }
+}
+
+/* ------------ Internal support for realloc, memalign, etc -------------- */
+
+// Try to realloc; only in-place unless can_move true
+mchunkptr malloc_state::try_realloc_chunk(mchunkptr p, size_t nb, int can_move) {
+    mchunkptr newp = 0;
+    size_t oldsize = p->chunksize();
+    mchunkptr next = (mchunkptr)p->chunk_plus_offset(oldsize);
+    if (rtcheck(ok_address(p) && ok_inuse(p) &&
+                ok_next(p, next) && ok_pinuse(next))) {
+        if (p->is_mmapped()) {
+            newp = mmap_resize(p, nb, can_move);
+        }
+        else if (oldsize >= nb) {
+            // already big enough
+            size_t rsize = oldsize - nb;
+            if (rsize >= MIN_CHUNK_SIZE) {
+                // split off remainder
+                mchunkptr r = (mchunkptr)p->chunk_plus_offset(nb);
+                set_inuse(p, nb);
+                set_inuse(r, rsize);
+                dispose_chunk(r, rsize);
+            }
+            newp = p;
+        }
+        else if (next == _top) {
+            // extend into top
+            if (oldsize + _topsize > nb) {
+                size_t newsize = oldsize + _topsize;
+                size_t newtopsize = newsize - nb;
+                mchunkptr newtop = (mchunkptr)p->chunk_plus_offset(nb);
+                set_inuse(p, nb);
+                newtop->_head = newtopsize | PINUSE_BIT;
+                _top = newtop;
+                _topsize = newtopsize;
+                newp = p;
+            }
+        }
+        else if (next == _dv) {
+            // extend into dv
+            size_t dvs = _dvsize;
+            if (oldsize + dvs >= nb) {
+                size_t dsize = oldsize + dvs - nb;
+                if (dsize >= MIN_CHUNK_SIZE) {
+                    mchunkptr r = (mchunkptr)p->chunk_plus_offset(nb);
+                    mchunkptr n = (mchunkptr)r->chunk_plus_offset(dsize);
+                    set_inuse(p, nb);
+                    r->set_size_and_pinuse_of_free_chunk(dsize);
+                    n->clear_pinuse();
+                    _dvsize = dsize;
+                    _dv = r;
+                }
+                else {
+                    // exhaust dv
+                    size_t newsize = oldsize + dvs;
+                    set_inuse(p, newsize);
+                    _dvsize = 0;
+                    _dv = 0;
+                }
+                newp = p;
+            }
+        }
+        else if (!next->cinuse()) {
+            // extend into next free chunk
+            size_t nextsize = next->chunksize();
+            if (oldsize + nextsize >= nb) {
+                size_t rsize = oldsize + nextsize - nb;
+                unlink_chunk(next, nextsize);
+                if (rsize < MIN_CHUNK_SIZE) {
+                    size_t newsize = oldsize + nextsize;
+                    set_inuse(p, newsize);
+                }
+                else {
+                    mchunkptr r = (mchunkptr)p->chunk_plus_offset(nb);
+                    set_inuse(p, nb);
+                    set_inuse(r, rsize);
+                    dispose_chunk(r, rsize);
+                }
+                newp = p;
+            }
+        }
+    }
+    else {
+        USAGE_ERROR_ACTION(m, chunk2mem(p));
+    }
+    return newp;
+}
+
+void* malloc_state::internal_memalign(size_t alignment, size_t bytes) {
+    void* mem = 0;
+    if (alignment < MIN_CHUNK_SIZE) // must be at least a minimum chunk size
+        alignment = MIN_CHUNK_SIZE;
+    if ((alignment & (alignment - 1)) != 0) {
+        // Ensure a power of 2
+        size_t a = MALLOC_ALIGNMENT << 1;
+        while (a < alignment) a <<= 1;
+        alignment = a;
+    }
+    if (bytes >= MAX_REQUEST - alignment) {
+        MALLOC_FAILURE_ACTION;
+    }
+    else {
+        size_t nb = request2size(bytes);
+        size_t req = nb + alignment + MIN_CHUNK_SIZE - CHUNK_OVERHEAD;
+        mem = internal_malloc(req);
+        if (mem != 0) {
+            mchunkptr p = mem2chunk(mem);
+            if (PREACTION(this))
+                return 0;
+            if ((((size_t)(mem)) & (alignment - 1)) != 0) {
+                // misaligned
+                /*
+                  Find an aligned spot inside chunk.  Since we need to give
+                  back leading space in a chunk of at least MIN_CHUNK_SIZE, if
+                  the first calculation places us at a spot with less than
+                  MIN_CHUNK_SIZE leader, we can move to the next aligned spot.
+                  We've allocated enough total room so that this is always
+                  possible.
+                */
+                char* br = (char*)mem2chunk((void *)(((size_t)((char*)mem + alignment - 1)) &
+                                                     -alignment));
+                char* pos = ((size_t)(br - (char*)(p)) >= MIN_CHUNK_SIZE)?
+                    br : br+alignment;
+                mchunkptr newp = (mchunkptr)pos;
+                size_t leadsize = pos - (char*)(p);
+                size_t newsize = p->chunksize() - leadsize;
+
+                if (p->is_mmapped()) {
+                    // For mmapped chunks, just adjust offset
+                    newp->_prev_foot = p->_prev_foot + leadsize;
+                    newp->_head = newsize;
+                }
+                else {
+                    // Otherwise, give back leader, use the rest
+                    set_inuse(newp, newsize);
+                    set_inuse(p, leadsize);
+                    dispose_chunk(p, leadsize);
+                }
+                p = newp;
+            }
+
+            // Give back spare room at the end
+            if (!p->is_mmapped()) {
+                size_t size = p->chunksize();
+                if (size > nb + MIN_CHUNK_SIZE) {
+                    size_t remainder_size = size - nb;
+                    mchunkptr remainder = (mchunkptr)p->chunk_plus_offset(nb);
+                    set_inuse(p, nb);
+                    set_inuse(remainder, remainder_size);
+                    dispose_chunk(remainder, remainder_size);
+                }
+            }
+
+            mem = chunk2mem(p);
+            assert (p->chunksize() >= nb);
+            assert(((size_t)mem & (alignment - 1)) == 0);
+            check_inuse_chunk(p);
+            POSTACTION(this);
+        }
+    }
+    return mem;
+}
+
+/*
+  Common support for independent_X routines, handling
+    all of the combinations that can result.
+  The opts arg has:
+    bit 0 set if all elements are same size (using sizes[0])
+    bit 1 set if elements should be zeroed
+*/
+void** malloc_state::ialloc(size_t n_elements, size_t* sizes, int opts,
+                            void* chunks[]) {
+
+    size_t    element_size;   // chunksize of each element, if all same 
+    size_t    contents_size;  // total size of elements 
+    size_t    array_size;     // request size of pointer array 
+    void*     mem;            // malloced aggregate space 
+    mchunkptr p;              // corresponding chunk 
+    size_t    remainder_size; // remaining bytes while splitting 
+    void**    marray;         // either "chunks" or malloced ptr array 
+    mchunkptr array_chunk;    // chunk for malloced ptr array 
+    flag_t    was_enabled;    // to disable mmap 
+    size_t    size;
+    size_t    i;
+
+    mparams.ensure_initialization();
+    // compute array length, if needed 
+    if (chunks != 0) {
+        if (n_elements == 0)
+            return chunks; // nothing to do 
+        marray = chunks;
+        array_size = 0;
+    }
+    else {
+        // if empty req, must still return chunk representing empty array 
+        if (n_elements == 0)
+            return (void**)internal_malloc(0);
+        marray = 0;
+        array_size = request2size(n_elements * (sizeof(void*)));
+    }
+
+    // compute total element size 
+    if (opts & 0x1) {
+        // all-same-size 
+        element_size = request2size(*sizes);
+        contents_size = n_elements * element_size;
+    }
+    else {
+        // add up all the sizes 
+        element_size = 0;
+        contents_size = 0;
+        for (i = 0; i != n_elements; ++i)
+            contents_size += request2size(sizes[i]);
+    }
+
+    size = contents_size + array_size;
+
+    /*
+      Allocate the aggregate chunk.  First disable direct-mmapping so
+      malloc won't use it, since we would not be able to later
+      free/realloc space internal to a segregated mmap region.
+    */
+    was_enabled = use_mmap();
+    disable_mmap();
+    mem = internal_malloc(size - CHUNK_OVERHEAD);
+    if (was_enabled)
+        enable_mmap();
+    if (mem == 0)
+        return 0;
+
+    if (PREACTION(this)) return 0;
+    p = mem2chunk(mem);
+    remainder_size = p->chunksize();
+
+    assert(!p->is_mmapped());
+
+    if (opts & 0x2) {
+        // optionally clear the elements 
+        memset((size_t*)mem, 0, remainder_size - sizeof(size_t) - array_size);
+    }
+
+    // If not provided, allocate the pointer array as final part of chunk 
+    if (marray == 0) {
+        size_t  array_chunk_size;
+        array_chunk = (mchunkptr)p->chunk_plus_offset(contents_size);
+        array_chunk_size = remainder_size - contents_size;
+        marray = (void**) (chunk2mem(array_chunk));
+        set_size_and_pinuse_of_inuse_chunk(array_chunk, array_chunk_size);
+        remainder_size = contents_size;
+    }
+
+    // split out elements 
+    for (i = 0; ; ++i) {
+        marray[i] = chunk2mem(p);
+        if (i != n_elements - 1) {
+            if (element_size != 0)
+                size = element_size;
+            else
+                size = request2size(sizes[i]);
+            remainder_size -= size;
+            set_size_and_pinuse_of_inuse_chunk(p, size);
+            p = (mchunkptr)p->chunk_plus_offset(size);
+        }
+        else { 
+            // the final element absorbs any overallocation slop 
+            set_size_and_pinuse_of_inuse_chunk(p, remainder_size);
+            break;
+        }
+    }
+
+#if DEBUG
+    if (marray != chunks) {
+        // final element must have exactly exhausted chunk 
+        if (element_size != 0) {
+            assert(remainder_size == element_size);
+        }
+        else {
+            assert(remainder_size == request2size(sizes[i]));
+        }
+        check_inuse_chunk(mem2chunk(marray));
+    }
+    for (i = 0; i != n_elements; ++i)
+        check_inuse_chunk(mem2chunk(marray[i]));
+
+#endif
+
+    POSTACTION(this);
+    return marray;
+}
+
+/* Try to free all pointers in the given array.
+   Note: this could be made faster, by delaying consolidation,
+   at the price of disabling some user integrity checks, We
+   still optimize some consolidations by combining adjacent
+   chunks before freeing, which will occur often if allocated
+   with ialloc or the array is sorted.
+*/
+size_t malloc_state::internal_bulk_free(void* array[], size_t nelem) {
+    size_t unfreed = 0;
+    if (!PREACTION(this)) {
+        void** a;
+        void** fence = &(array[nelem]);
+        for (a = array; a != fence; ++a) {
+            void* mem = *a;
+            if (mem != 0) {
+                mchunkptr p = mem2chunk(mem);
+                size_t psize = p->chunksize();
+#if FOOTERS
+                if (get_mstate_for(p) != m) {
+                    ++unfreed;
+                    continue;
+                }
+#endif
+                check_inuse_chunk(p);
+                *a = 0;
+                if (rtcheck(ok_address(p) && ok_inuse(p))) {
+                    void ** b = a + 1; // try to merge with next chunk
+                    mchunkptr next = (mchunkptr)p->next_chunk();
+                    if (b != fence && *b == chunk2mem(next)) {
+                        size_t newsize = next->chunksize() + psize;
+                        set_inuse(p, newsize);
+                        *b = chunk2mem(p);
+                    }
+                    else
+                        dispose_chunk(p, psize);
+                }
+                else {
+                    CORRUPTION_ERROR_ACTION(this);
+                    break;
+                }
+            }
+        }
+        if (should_trim(_topsize))
+            sys_trim(0);
+        POSTACTION(this);
+    }
+    return unfreed;
+}
+
+void malloc_state::init(char* tbase, size_t tsize) {
+    _seg._base = _least_addr = tbase;
+    _seg._size = _footprint = _max_footprint = tsize;
+    _magic    = mparams._magic;
+    _release_checks = MAX_RELEASE_CHECK_RATE;
+    _mflags   = mparams._default_mflags;
+    _extp     = 0;
+    _exts     = 0;
+    disable_contiguous();
+    init_bins();
+    mchunkptr mn = (mchunkptr)mem2chunk(this)->next_chunk();
+    init_top(mn, (size_t)((tbase + tsize) - (char*)mn) - TOP_FOOT_SIZE);
+    check_top_chunk(_top);
+}
+
+/* Traversal */
+#if MALLOC_INSPECT_ALL
+void malloc_state::internal_inspect_all(void(*handler)(void *start, void *end,
+                                                       size_t used_bytes,
+                                                       void* callback_arg),
+                                        void* arg) {
+    if (is_initialized()) {
+        mchunkptr top = top;
+        msegmentptr s;
+        for (s = &seg; s != 0; s = s->next) {
+            mchunkptr q = align_as_chunk(s->base);
+            while (segment_holds(s, q) && q->head != FENCEPOST_HEAD) {
+                mchunkptr next = (mchunkptr)q->next_chunk();
+                size_t sz = q->chunksize();
+                size_t used;
+                void* start;
+                if (q->is_inuse()) {
+                    used = sz - CHUNK_OVERHEAD; // must not be mmapped
+                    start = chunk2mem(q);
+                }
+                else {
+                    used = 0;
+                    if (is_small(sz)) {
+                        // offset by possible bookkeeping
+                        start = (void*)((char*)q + sizeof(struct malloc_chunk));
+                    }
+                    else {
+                        start = (void*)((char*)q + sizeof(struct malloc_tree_chunk));
+                    }
+                }
+                if (start < (void*)next)  // skip if all space is bookkeeping
+                    handler(start, next, used, arg);
+                if (q == top)
+                    break;
+                q = next;
+            }
+        }
+    }
+}
+#endif // MALLOC_INSPECT_ALL
+
+/* ------------------ Exported realloc, memalign, etc -------------------- */
+
+#if !ONLY_MSPACES
+
+void* dlrealloc(void* oldmem, size_t bytes) DLTHROW {
+    void* mem = 0;
+    if (oldmem == 0) {
+        mem = dlmalloc(bytes);
+    }
+    else if (bytes >= MAX_REQUEST) {
+        MALLOC_FAILURE_ACTION;
+    }
+#ifdef REALLOC_ZERO_BYTES_FREES
+    else if (bytes == 0) {
+        dlfree(oldmem);
+    }
+#endif // REALLOC_ZERO_BYTES_FREES
+    else {
+        size_t nb = request2size(bytes);
+        mchunkptr oldp = mem2chunk(oldmem);
+#if ! FOOTERS
+        mstate m = gm;
+#else 
+        mstate m = get_mstate_for(oldp);
+        if (!m->ok_magic()) {
+            USAGE_ERROR_ACTION(m, oldmem);
+            return 0;
+        }
+#endif
+        if (!PREACTION(m)) {
+            mchunkptr newp = m->try_realloc_chunk(oldp, nb, 1);
+            POSTACTION(m);
+            if (newp != 0) {
+                m->check_inuse_chunk(newp);
+                mem = chunk2mem(newp);
+            }
+            else {
+                mem = m->internal_malloc(bytes);
+                if (mem != 0) {
+                    size_t oc = oldp->chunksize() - oldp->overhead_for();
+                    memcpy(mem, oldmem, (oc < bytes)? oc : bytes);
+                    m->internal_free(oldmem);
+                }
+            }
+        }
+    }
+    return mem;
+}
+
+void* dlrealloc_in_place(void* oldmem, size_t bytes) {
+    void* mem = 0;
+    if (oldmem != 0) {
+        if (bytes >= MAX_REQUEST) {
+            MALLOC_FAILURE_ACTION;
+        }
+        else {
+            size_t nb = request2size(bytes);
+            mchunkptr oldp = mem2chunk(oldmem);
+#if ! FOOTERS
+            mstate m = gm;
+#else 
+            mstate m = get_mstate_for(oldp);
+            if (!m->ok_magic()) {
+                USAGE_ERROR_ACTION(m, oldmem);
+                return 0;
+            }
+#endif
+            if (!PREACTION(m)) {
+                mchunkptr newp = m->try_realloc_chunk(oldp, nb, 0);
+                POSTACTION(m);
+                if (newp == oldp) {
+                    m->check_inuse_chunk(newp);
+                    mem = oldmem;
+                }
+            }
+        }
+    }
+    return mem;
+}
+
+void* dlmemalign(size_t alignment, size_t bytes) {
+    if (alignment <= MALLOC_ALIGNMENT) {
+        return dlmalloc(bytes);
+    }
+    return gm->internal_memalign(alignment, bytes);
+}
+
+int dlposix_memalign(void** pp, size_t alignment, size_t bytes) DLTHROW {
+    void* mem = 0;
+    if (alignment == MALLOC_ALIGNMENT)
+        mem = dlmalloc(bytes);
+    else {
+        size_t d = alignment / sizeof(void*);
+        size_t r = alignment % sizeof(void*);
+        if (r != 0 || d == 0 || (d & (d - 1)) != 0)
+            return EINVAL;
+        else if (bytes <= MAX_REQUEST - alignment) {
+            if (alignment <  MIN_CHUNK_SIZE)
+                alignment = MIN_CHUNK_SIZE;
+            mem = gm->internal_memalign(alignment, bytes);
+        }
+    }
+    if (mem == 0)
+        return ENOMEM;
+    else {
+        *pp = mem;
+        return 0;
+    }
+}
+
+void* dlvalloc(size_t bytes) DLTHROW {
+    size_t pagesz;
+    mparams.ensure_initialization();
+    pagesz = mparams._page_size;
+    return dlmemalign(pagesz, bytes);
+}
+
+void* dlpvalloc(size_t bytes) {
+    size_t pagesz;
+    mparams.ensure_initialization();
+    pagesz = mparams._page_size;
+    return dlmemalign(pagesz, (bytes + pagesz - 1) & ~(pagesz - 1));
+}
+
+void** dlindependent_calloc(size_t n_elements, size_t elem_size,
+                            void* chunks[]) {
+    size_t sz = elem_size; // serves as 1-element array
+    return gm->ialloc(n_elements, &sz, 3, chunks);
+}
+
+void** dlindependent_comalloc(size_t n_elements, size_t sizes[],
+                              void* chunks[]) {
+    return gm->ialloc(n_elements, sizes, 0, chunks);
+}
+
+size_t dlbulk_free(void* array[], size_t nelem) {
+    return gm->internal_bulk_free(array, nelem);
+}
+
+#if MALLOC_INSPECT_ALL
+void dlmalloc_inspect_all(void(*handler)(void *start,
+                                         void *end,
+                                         size_t used_bytes,
+                                         void* callback_arg),
+                          void* arg) {
+    mparams.ensure_initialization();
+    if (!PREACTION(gm)) {
+        internal_inspect_all(gm, handler, arg);
+        POSTACTION(gm);
+    }
+}
+#endif 
+
+int dlmalloc_trim(size_t pad) {
+    int result = 0;
+    mparams.ensure_initialization();
+    if (!PREACTION(gm)) {
+        result = gm->sys_trim(pad);
+        POSTACTION(gm);
+    }
+    return result;
+}
+
+size_t dlmalloc_footprint(void) {
+    return gm->_footprint;
+}
+
+size_t dlmalloc_max_footprint(void) {
+    return gm->_max_footprint;
+}
+
+size_t dlmalloc_footprint_limit(void) {
+    size_t maf = gm->_footprint_limit;
+    return maf == 0 ? MAX_SIZE_T : maf;
+}
+
+size_t dlmalloc_set_footprint_limit(size_t bytes) {
+    size_t result;  // invert sense of 0
+    if (bytes == 0)
+        result = mparams.granularity_align(1); // Use minimal size
+    if (bytes == MAX_SIZE_T)
+        result = 0;                    // disable
+    else
+        result = mparams.granularity_align(bytes);
+    return gm->_footprint_limit = result;
+}
+
+#if !NO_MALLINFO
+struct mallinfo dlmallinfo(void) {
+    return gm->internal_mallinfo();
+}
+#endif
+
+#if !NO_MALLOC_STATS
+void dlmalloc_stats() {
+    gm->internal_malloc_stats();
+}
+#endif 
+
+int dlmallopt(int param_number, int value) {
+    return mparams.change(param_number, value);
+}
+
+size_t dlmalloc_usable_size(void* mem) {
+    if (mem != 0) {
+        mchunkptr p = mem2chunk(mem);
+        if (p->is_inuse())
+            return p->chunksize() - p->overhead_for();
+    }
+    return 0;
+}
+
+#endif /* !ONLY_MSPACES */
+
+
+/* ----------------------------- user mspaces ---------------------------- */
+
+#if MSPACES
+
+static mstate init_user_mstate(char* tbase, size_t tsize) {
+    size_t msize = pad_request(sizeof(malloc_state));
+    mchunkptr msp = align_as_chunk(tbase);
+    mstate m = (mstate)(chunk2mem(msp));
+    memset(m, 0, msize);
+    (void)INITIAL_LOCK(&m->get_mutex());
+    msp->_head = (msize | INUSE_BITS);
+    m->init(tbase, tsize);
+    return m;
+}
+
+mspace create_mspace(size_t capacity, int locked) {
+    mstate m = 0;
+    size_t msize;
+    mparams.ensure_initialization();
+    msize = pad_request(sizeof(malloc_state));
+    if (capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams._page_size)) {
+        size_t rs = ((capacity == 0)? mparams._granularity :
+                     (capacity + TOP_FOOT_SIZE + msize));
+        size_t tsize = mparams.granularity_align(rs);
+        char* tbase = (char*)(CALL_MMAP(tsize));
+        if (tbase != CMFAIL) {
+            m = init_user_mstate(tbase, tsize);
+            m->_seg._sflags = USE_MMAP_BIT;
+            m->set_lock(locked);
+        }
+    }
+    return (mspace)m;
+}
+
+mspace create_mspace_with_base(void* base, size_t capacity, int locked) {
+    mstate m = 0;
+    size_t msize;
+    mparams.ensure_initialization();
+    msize = pad_request(sizeof(malloc_state));
+    if (capacity > msize + TOP_FOOT_SIZE &&
+        capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams._page_size)) {
+        m = init_user_mstate((char*)base, capacity);
+        m->_seg._sflags = EXTERN_BIT;
+        m->set_lock(locked);
+    }
+    return (mspace)m;
+}
+
+int mspace_track_large_chunks(mspace msp, int enable) {
+    int ret = 0;
+    mstate ms = (mstate)msp;
+    if (!PREACTION(ms)) {
+        if (!ms->use_mmap()) {
+            ret = 1;
+        }
+        if (!enable) {
+            ms->enable_mmap();
+        } else {
+            ms->disable_mmap();
+        }
+        POSTACTION(ms);
+    }
+    return ret;
+}
+
+size_t destroy_mspace(mspace msp) {
+    size_t freed = 0;
+    mstate ms = (mstate)msp;
+    if (ms->ok_magic()) {
+        msegmentptr sp = &ms->_seg;
+        (void)DESTROY_LOCK(&ms->get_mutex()); // destroy before unmapped
+        while (sp != 0) {
+            char* base = sp->_base;
+            size_t size = sp->_size;
+            flag_t flag = sp->_sflags;
+            (void)base; // placate people compiling -Wunused-variable
+            sp = sp->_next;
+            if ((flag & USE_MMAP_BIT) && !(flag & EXTERN_BIT) &&
+                CALL_MUNMAP(base, size) == 0)
+                freed += size;
+        }
+    }
+    else {
+        USAGE_ERROR_ACTION(ms,ms);
+    }
+    return freed;
+}
+
+/* ----------------------------  mspace versions of malloc/calloc/free routines -------------------- */
+void* mspace_malloc(mspace msp, size_t bytes) {
+    mstate ms = (mstate)msp;
+    if (!ms->ok_magic()) {
+        USAGE_ERROR_ACTION(ms,ms);
+        return 0;
+    }
+    return ms->_malloc(bytes);
+}
+
+void mspace_free(mspace msp, void* mem) {
+    if (mem != 0) {
+        mchunkptr p  = mem2chunk(mem);
+#if FOOTERS
+        mstate fm = get_mstate_for(p);
+        (void)msp; // placate people compiling -Wunused
+#else 
+        mstate fm = (mstate)msp;
+#endif
+        if (!fm->ok_magic()) {
+            USAGE_ERROR_ACTION(fm, p);
+            return;
+        }
+        fm->_free(p);
+    }
+}
+
+void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size) {
+    void* mem;
+    size_t req = 0;
+    mstate ms = (mstate)msp;
+    if (!ms->ok_magic()) {
+        USAGE_ERROR_ACTION(ms,ms);
+        return 0;
+    }
+    if (n_elements != 0) {
+        req = n_elements * elem_size;
+        if (((n_elements | elem_size) & ~(size_t)0xffff) &&
+            (req / n_elements != elem_size))
+            req = MAX_SIZE_T; // force downstream failure on overflow
+    }
+    mem = ms->internal_malloc(req);
+    if (mem != 0 && mem2chunk(mem)->calloc_must_clear())
+        memset(mem, 0, req);
+    return mem;
+}
+
+void* mspace_realloc(mspace msp, void* oldmem, size_t bytes) {
+    void* mem = 0;
+    if (oldmem == 0) {
+        mem = mspace_malloc(msp, bytes);
+    }
+    else if (bytes >= MAX_REQUEST) {
+        MALLOC_FAILURE_ACTION;
+    }
+#ifdef REALLOC_ZERO_BYTES_FREES
+    else if (bytes == 0) {
+        mspace_free(msp, oldmem);
+    }
+#endif
+    else {
+        size_t nb = request2size(bytes);
+        mchunkptr oldp = mem2chunk(oldmem);
+#if ! FOOTERS
+        mstate m = (mstate)msp;
+#else 
+        mstate m = get_mstate_for(oldp);
+        if (!m->ok_magic()) {
+            USAGE_ERROR_ACTION(m, oldmem);
+            return 0;
+        }
+#endif
+        if (!PREACTION(m)) {
+            mchunkptr newp = m->try_realloc_chunk(oldp, nb, 1);
+            POSTACTION(m);
+            if (newp != 0) {
+                m->check_inuse_chunk(newp);
+                mem = chunk2mem(newp);
+            }
+            else {
+                mem = mspace_malloc(m, bytes);
+                if (mem != 0) {
+                    size_t oc = oldp->chunksize() - oldp->overhead_for();
+                    memcpy(mem, oldmem, (oc < bytes)? oc : bytes);
+                    mspace_free(m, oldmem);
+                }
+            }
+        }
+    }
+    return mem;
+}
+
+void* mspace_realloc_in_place(mspace msp, void* oldmem, size_t bytes) {
+    void* mem = 0;
+    if (oldmem != 0) {
+        if (bytes >= MAX_REQUEST) {
+            MALLOC_FAILURE_ACTION;
+        }
+        else {
+            size_t nb = request2size(bytes);
+            mchunkptr oldp = mem2chunk(oldmem);
+#if ! FOOTERS
+            mstate m = (mstate)msp;
+#else
+            mstate m = get_mstate_for(oldp);
+            (void)msp; // placate people compiling -Wunused
+            if (!m->ok_magic()) {
+                USAGE_ERROR_ACTION(m, oldmem);
+                return 0;
+            }
+#endif
+            if (!PREACTION(m)) {
+                mchunkptr newp = m->try_realloc_chunk(oldp, nb, 0);
+                POSTACTION(m);
+                if (newp == oldp) {
+                    m->check_inuse_chunk(newp);
+                    mem = oldmem;
+                }
+            }
+        }
+    }
+    return mem;
+}
+
+void* mspace_memalign(mspace msp, size_t alignment, size_t bytes) {
+    mstate ms = (mstate)msp;
+    if (!ms->ok_magic()) {
+        USAGE_ERROR_ACTION(ms,ms);
+        return 0;
+    }
+    if (alignment <= MALLOC_ALIGNMENT)
+        return mspace_malloc(msp, bytes);
+    return ms->internal_memalign(alignment, bytes);
+}
+
+void** mspace_independent_calloc(mspace msp, size_t n_elements,
+                                 size_t elem_size, void* chunks[]) {
+    size_t sz = elem_size; // serves as 1-element array
+    mstate ms = (mstate)msp;
+    if (!ms->ok_magic()) {
+        USAGE_ERROR_ACTION(ms,ms);
+        return 0;
+    }
+    return ms->ialloc(n_elements, &sz, 3, chunks);
+}
+
+void** mspace_independent_comalloc(mspace msp, size_t n_elements,
+                                   size_t sizes[], void* chunks[]) {
+    mstate ms = (mstate)msp;
+    if (!ms->ok_magic()) {
+        USAGE_ERROR_ACTION(ms,ms);
+        return 0;
+    }
+    return ms->ialloc(n_elements, sizes, 0, chunks);
+}
+
+size_t mspace_bulk_free(mspace msp, void* array[], size_t nelem) {
+    return ((mstate)msp)->internal_bulk_free(array, nelem);
+}
+
+#if MALLOC_INSPECT_ALL
+void mspace_inspect_all(mspace msp,
+                        void(*handler)(void *start,
+                                       void *end,
+                                       size_t used_bytes,
+                                       void* callback_arg),
+                        void* arg) {
+    mstate ms = (mstate)msp;
+    if (ms->ok_magic()) {
+        if (!PREACTION(ms)) {
+            internal_inspect_all(ms, handler, arg);
+            POSTACTION(ms);
+        }
+    }
+    else {
+        USAGE_ERROR_ACTION(ms,ms);
+    }
+}
+#endif // MALLOC_INSPECT_ALL
+
+int mspace_trim(mspace msp, size_t pad) {
+    int result = 0;
+    mstate ms = (mstate)msp;
+    if (ms->ok_magic()) {
+        if (!PREACTION(ms)) {
+            result = ms->sys_trim(pad);
+            POSTACTION(ms);
+        }
+    }
+    else {
+        USAGE_ERROR_ACTION(ms,ms);
+    }
+    return result;
+}
+
+#if !NO_MALLOC_STATS
+void mspace_malloc_stats(mspace msp) {
+    mstate ms = (mstate)msp;
+    if (ms->ok_magic()) {
+        ms->internal_malloc_stats();
+    }
+    else {
+        USAGE_ERROR_ACTION(ms,ms);
+    }
+}
+#endif // NO_MALLOC_STATS
+
+size_t mspace_footprint(mspace msp) {
+    size_t result = 0;
+    mstate ms = (mstate)msp;
+    if (ms->ok_magic()) {
+        result = ms->_footprint;
+    }
+    else {
+        USAGE_ERROR_ACTION(ms,ms);
+    }
+    return result;
+}
+
+size_t mspace_max_footprint(mspace msp) {
+    size_t result = 0;
+    mstate ms = (mstate)msp;
+    if (ms->ok_magic()) {
+        result = ms->_max_footprint;
+    }
+    else {
+        USAGE_ERROR_ACTION(ms,ms);
+    }
+    return result;
+}
+
+size_t mspace_footprint_limit(mspace msp) {
+    size_t result = 0;
+    mstate ms = (mstate)msp;
+    if (ms->ok_magic()) {
+        size_t maf = ms->_footprint_limit;
+        result = (maf == 0) ? MAX_SIZE_T : maf;
+    }
+    else {
+        USAGE_ERROR_ACTION(ms,ms);
+    }
+    return result;
+}
+
+size_t mspace_set_footprint_limit(mspace msp, size_t bytes) {
+    size_t result = 0;
+    mstate ms = (mstate)msp;
+    if (ms->ok_magic()) {
+        if (bytes == 0)
+            result = mparams.granularity_align(1); // Use minimal size
+        if (bytes == MAX_SIZE_T)
+            result = 0;                    // disable
+        else
+            result = mparams.granularity_align(bytes);
+        ms->_footprint_limit = result;
+    }
+    else {
+        USAGE_ERROR_ACTION(ms,ms);
+    }
+    return result;
+}
+
+#if !NO_MALLINFO
+struct mallinfo mspace_mallinfo(mspace msp) {
+    mstate ms = (mstate)msp;
+    if (!ms->ok_magic()) {
+        USAGE_ERROR_ACTION(ms,ms);
+    }
+    return ms->internal_mallinfo();
+}
+#endif // NO_MALLINFO
+
+size_t mspace_usable_size(const void* mem) {
+    if (mem != 0) {
+        mchunkptr p = mem2chunk(mem);
+        if (p->is_inuse())
+            return p->chunksize() - p->overhead_for();
+    }
+    return 0;
+}
+
+int mspace_mallopt(int param_number, int value) {
+    return mparams.change(param_number, value);
+}
+
+#endif // MSPACES
+
+
+/* -------------------- Alternative MORECORE functions ------------------- */
+
+/*
+  Guidelines for creating a custom version of MORECORE:
+
+  * For best performance, MORECORE should allocate in multiples of pagesize.
+  * MORECORE may allocate more memory than requested. (Or even less,
+      but this will usually result in a malloc failure.)
+  * MORECORE must not allocate memory when given argument zero, but
+      instead return one past the end address of memory from previous
+      nonzero call.
+  * For best performance, consecutive calls to MORECORE with positive
+      arguments should return increasing addresses, indicating that
+      space has been contiguously extended.
+  * Even though consecutive calls to MORECORE need not return contiguous
+      addresses, it must be OK for malloc'ed chunks to span multiple
+      regions in those cases where they do happen to be contiguous.
+  * MORECORE need not handle negative arguments -- it may instead
+      just return MFAIL when given negative arguments.
+      Negative arguments are always multiples of pagesize. MORECORE
+      must not misinterpret negative args as large positive unsigned
+      args. You can suppress all such calls from even occurring by defining
+      MORECORE_CANNOT_TRIM,
+
+  As an example alternative MORECORE, here is a custom allocator
+  kindly contributed for pre-OSX macOS.  It uses virtually but not
+  necessarily physically contiguous non-paged memory (locked in,
+  present and won't get swapped out).  You can use it by uncommenting
+  this section, adding some #includes, and setting up the appropriate
+  defines above:
+
+      #define MORECORE osMoreCore
+
+  There is also a shutdown routine that should somehow be called for
+  cleanup upon program exit.
+
+  #define MAX_POOL_ENTRIES 100
+  #define MINIMUM_MORECORE_SIZE  (64 * 1024U)
+  static int next_os_pool;
+  void *our_os_pools[MAX_POOL_ENTRIES];
+
+  void *osMoreCore(int size)
+  {
+    void *ptr = 0;
+    static void *sbrk_top = 0;
+
+    if (size > 0)
+    {
+      if (size < MINIMUM_MORECORE_SIZE)
+         size = MINIMUM_MORECORE_SIZE;
+      if (CurrentExecutionLevel() == kTaskLevel)
+         ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0);
+      if (ptr == 0)
+      {
+        return (void *) MFAIL;
+      }
+      // save ptrs so they can be freed during cleanup
+      our_os_pools[next_os_pool] = ptr;
+      next_os_pool++;
+      ptr = (void *) ((((size_t) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK);
+      sbrk_top = (char *) ptr + size;
+      return ptr;
+    }
+    else if (size < 0)
+    {
+      // we don't currently support shrink behavior
+      return (void *) MFAIL;
+    }
+    else
+    {
+      return sbrk_top;
+    }
+  }
+
+  // cleanup any allocated memory pools
+  // called as last thing before shutting down driver
+
+  void osCleanupMem(void)
+  {
+    void **ptr;
+
+    for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++)
+      if (*ptr)
+      {
+         PoolDeallocate(*ptr);
+         *ptr = 0;
+      }
+  }
+
+*/
+
+
+/* -----------------------------------------------------------------------
+History:
+    v2.8.6 Wed Aug 29 06:57:58 2012  Doug Lea
+      * fix bad comparison in dlposix_memalign
+      * don't reuse adjusted asize in sys_alloc
+      * add LOCK_AT_FORK -- thanks to Kirill Artamonov for the suggestion
+      * reduce compiler warnings -- thanks to all who reported/suggested these
+
+    v2.8.5 Sun May 22 10:26:02 2011  Doug Lea  (dl at gee)
+      * Always perform unlink checks unless INSECURE
+      * Add posix_memalign.
+      * Improve realloc to expand in more cases; expose realloc_in_place.
+        Thanks to Peter Buhr for the suggestion.
+      * Add footprint_limit, inspect_all, bulk_free. Thanks
+        to Barry Hayes and others for the suggestions.
+      * Internal refactorings to avoid calls while holding locks
+      * Use non-reentrant locks by default. Thanks to Roland McGrath
+        for the suggestion.
+      * Small fixes to mspace_destroy, reset_on_error.
+      * Various configuration extensions/changes. Thanks
+         to all who contributed these.
+
+    V2.8.4a Thu Apr 28 14:39:43 2011 (dl at gee.cs.oswego.edu)
+      * Update Creative Commons URL
+
+    V2.8.4 Wed May 27 09:56:23 2009  Doug Lea  (dl at gee)
+      * Use zeros instead of prev foot for is_mmapped
+      * Add mspace_track_large_chunks; thanks to Jean Brouwers
+      * Fix set_inuse in internal_realloc; thanks to Jean Brouwers
+      * Fix insufficient sys_alloc padding when using 16byte alignment
+      * Fix bad error check in mspace_footprint
+      * Adaptations for ptmalloc; thanks to Wolfram Gloger.
+      * Reentrant spin locks; thanks to Earl Chew and others
+      * Win32 improvements; thanks to Niall Douglas and Earl Chew
+      * Add NO_SEGMENT_TRAVERSAL and MAX_RELEASE_CHECK_RATE options
+      * Extension hook in malloc_state
+      * Various small adjustments to reduce warnings on some compilers
+      * Various configuration extensions/changes for more platforms. Thanks
+         to all who contributed these.
+
+    V2.8.3 Thu Sep 22 11:16:32 2005  Doug Lea  (dl at gee)
+      * Add max_footprint functions
+      * Ensure all appropriate literals are size_t
+      * Fix conditional compilation problem for some #define settings
+      * Avoid concatenating segments with the one provided
+        in create_mspace_with_base
+      * Rename some variables to avoid compiler shadowing warnings
+      * Use explicit lock initialization.
+      * Better handling of sbrk interference.
+      * Simplify and fix segment insertion, trimming and mspace_destroy
+      * Reinstate REALLOC_ZERO_BYTES_FREES option from 2.7.x
+      * Thanks especially to Dennis Flanagan for help on these.
+
+    V2.8.2 Sun Jun 12 16:01:10 2005  Doug Lea  (dl at gee)
+      * Fix memalign brace error.
+
+    V2.8.1 Wed Jun  8 16:11:46 2005  Doug Lea  (dl at gee)
+      * Fix improper #endif nesting in C++
+      * Add explicit casts needed for C++
+
+    V2.8.0 Mon May 30 14:09:02 2005  Doug Lea  (dl at gee)
+      * Use trees for large bins
+      * Support mspaces
+      * Use segments to unify sbrk-based and mmap-based system allocation,
+        removing need for emulation on most platforms without sbrk.
+      * Default safety checks
+      * Optional footer checks. Thanks to William Robertson for the idea.
+      * Internal code refactoring
+      * Incorporate suggestions and platform-specific changes.
+        Thanks to Dennis Flanagan, Colin Plumb, Niall Douglas,
+        Aaron Bachmann,  Emery Berger, and others.
+      * Speed up non-fastbin processing enough to remove fastbins.
+      * Remove useless cfree() to avoid conflicts with other apps.
+      * Remove internal memcpy, memset. Compilers handle builtins better.
+      * Remove some options that no one ever used and rename others.
+
+    V2.7.2 Sat Aug 17 09:07:30 2002  Doug Lea  (dl at gee)
+      * Fix malloc_state bitmap array misdeclaration
+
+    V2.7.1 Thu Jul 25 10:58:03 2002  Doug Lea  (dl at gee)
+      * Allow tuning of FIRST_SORTED_BIN_SIZE
+      * Use PTR_UINT as type for all ptr->int casts. Thanks to John Belmonte.
+      * Better detection and support for non-contiguousness of MORECORE.
+        Thanks to Andreas Mueller, Conal Walsh, and Wolfram Gloger
+      * Bypass most of malloc if no frees. Thanks To Emery Berger.
+      * Fix freeing of old top non-contiguous chunk im sysmalloc.
+      * Raised default trim and map thresholds to 256K.
+      * Fix mmap-related #defines. Thanks to Lubos Lunak.
+      * Fix copy macros; added LACKS_FCNTL_H. Thanks to Neal Walfield.
+      * Branch-free bin calculation
+      * Default trim and mmap thresholds now 256K.
+
+    V2.7.0 Sun Mar 11 14:14:06 2001  Doug Lea  (dl at gee)
+      * Introduce independent_comalloc and independent_calloc.
+        Thanks to Michael Pachos for motivation and help.
+      * Make optional .h file available
+      * Allow > 2GB requests on 32bit systems.
+      * new WIN32 sbrk, mmap, munmap, lock code from <Walter@GeNeSys-e.de>.
+        Thanks also to Andreas Mueller <a.mueller at paradatec.de>,
+        and Anonymous.
+      * Allow override of MALLOC_ALIGNMENT (Thanks to Ruud Waij for
+        helping test this.)
+      * memalign: check alignment arg
+      * realloc: don't try to shift chunks backwards, since this
+        leads to  more fragmentation in some programs and doesn't
+        seem to help in any others.
+      * Collect all cases in malloc requiring system memory into sysmalloc
+      * Use mmap as backup to sbrk
+      * Place all internal state in malloc_state
+      * Introduce fastbins (although similar to 2.5.1)
+      * Many minor tunings and cosmetic improvements
+      * Introduce USE_PUBLIC_MALLOC_WRAPPERS, USE_MALLOC_LOCK
+      * Introduce MALLOC_FAILURE_ACTION, MORECORE_CONTIGUOUS
+        Thanks to Tony E. Bennett <tbennett@nvidia.com> and others.
+      * Include errno.h to support default failure action.
+
+    V2.6.6 Sun Dec  5 07:42:19 1999  Doug Lea  (dl at gee)
+      * return null for negative arguments
+      * Added Several WIN32 cleanups from Martin C. Fong <mcfong at yahoo.com>
+         * Add 'LACKS_SYS_PARAM_H' for those systems without 'sys/param.h'
+          (e.g. WIN32 platforms)
+         * Cleanup header file inclusion for WIN32 platforms
+         * Cleanup code to avoid Microsoft Visual C++ compiler complaints
+         * Add 'USE_DL_PREFIX' to quickly allow co-existence with existing
+           memory allocation routines
+         * Set 'malloc_getpagesize' for WIN32 platforms (needs more work)
+         * Use 'assert' rather than 'ASSERT' in WIN32 code to conform to
+           usage of 'assert' in non-WIN32 code
+         * Improve WIN32 'sbrk()' emulation's 'findRegion()' routine to
+           avoid infinite loop
+      * Always call 'fREe()' rather than 'free()'
+
+    V2.6.5 Wed Jun 17 15:57:31 1998  Doug Lea  (dl at gee)
+      * Fixed ordering problem with boundary-stamping
+
+    V2.6.3 Sun May 19 08:17:58 1996  Doug Lea  (dl at gee)
+      * Added pvalloc, as recommended by H.J. Liu
+      * Added 64bit pointer support mainly from Wolfram Gloger
+      * Added anonymously donated WIN32 sbrk emulation
+      * Malloc, calloc, getpagesize: add optimizations from Raymond Nijssen
+      * malloc_extend_top: fix mask error that caused wastage after
+        foreign sbrks
+      * Add linux mremap support code from HJ Liu
+
+    V2.6.2 Tue Dec  5 06:52:55 1995  Doug Lea  (dl at gee)
+      * Integrated most documentation with the code.
+      * Add support for mmap, with help from
+        Wolfram Gloger (Gloger@lrz.uni-muenchen.de).
+      * Use last_remainder in more cases.
+      * Pack bins using idea from  colin@nyx10.cs.du.edu
+      * Use ordered bins instead of best-fit threshhold
+      * Eliminate block-local decls to simplify tracing and debugging.
+      * Support another case of realloc via move into top
+      * Fix error occuring when initial sbrk_base not word-aligned.
+      * Rely on page size for units instead of SBRK_UNIT to
+        avoid surprises about sbrk alignment conventions.
+      * Add mallinfo, mallopt. Thanks to Raymond Nijssen
+        (raymond@es.ele.tue.nl) for the suggestion.
+      * Add `pad' argument to malloc_trim and top_pad mallopt parameter.
+      * More precautions for cases where other routines call sbrk,
+        courtesy of Wolfram Gloger (Gloger@lrz.uni-muenchen.de).
+      * Added macros etc., allowing use in linux libc from
+        H.J. Lu (hjl@gnu.ai.mit.edu)
+      * Inverted this history list
+
+    V2.6.1 Sat Dec  2 14:10:57 1995  Doug Lea  (dl at gee)
+      * Re-tuned and fixed to behave more nicely with V2.6.0 changes.
+      * Removed all preallocation code since under current scheme
+        the work required to undo bad preallocations exceeds
+        the work saved in good cases for most test programs.
+      * No longer use return list or unconsolidated bins since
+        no scheme using them consistently outperforms those that don't
+        given above changes.
+      * Use best fit for very large chunks to prevent some worst-cases.
+      * Added some support for debugging
+
+    V2.6.0 Sat Nov  4 07:05:23 1995  Doug Lea  (dl at gee)
+      * Removed footers when chunks are in use. Thanks to
+        Paul Wilson (wilson@cs.texas.edu) for the suggestion.
+
+    V2.5.4 Wed Nov  1 07:54:51 1995  Doug Lea  (dl at gee)
+      * Added malloc_trim, with help from Wolfram Gloger
+        (wmglo@Dent.MED.Uni-Muenchen.DE).
+
+    V2.5.3 Tue Apr 26 10:16:01 1994  Doug Lea  (dl at g)
+
+    V2.5.2 Tue Apr  5 16:20:40 1994  Doug Lea  (dl at g)
+      * realloc: try to expand in both directions
+      * malloc: swap order of clean-bin strategy;
+      * realloc: only conditionally expand backwards
+      * Try not to scavenge used bins
+      * Use bin counts as a guide to preallocation
+      * Occasionally bin return list chunks in first scan
+      * Add a few optimizations from colin@nyx10.cs.du.edu
+
+    V2.5.1 Sat Aug 14 15:40:43 1993  Doug Lea  (dl at g)
+      * faster bin computation & slightly different binning
+      * merged all consolidations to one part of malloc proper
+         (eliminating old malloc_find_space & malloc_clean_bin)
+      * Scan 2 returns chunks (not just 1)
+      * Propagate failure in realloc if malloc returns 0
+      * Add stuff to allow compilation on non-ANSI compilers
+          from kpv@research.att.com
+
+    V2.5 Sat Aug  7 07:41:59 1993  Doug Lea  (dl at g.oswego.edu)
+      * removed potential for odd address access in prev_chunk
+      * removed dependency on getpagesize.h
+      * misc cosmetics and a bit more internal documentation
+      * anticosmetics: mangled names in macros to evade debugger strangeness
+      * tested on sparc, hp-700, dec-mips, rs6000
+          with gcc & native cc (hp, dec only) allowing
+          Detlefs & Zorn comparison study (in SIGPLAN Notices.)
+
+    Trial version Fri Aug 28 13:14:29 1992  Doug Lea  (dl at g.oswego.edu)
+      * Based loosely on libg++-1.2X malloc. (It retains some of the overall
+         structure of old version,  but most details differ.)
+
+*/
diff --git a/libkram/bc7enc/rgbcx.cpp b/libkram/bc7enc/rgbcx.cpp
index 6a718da1..12ff023f 100644
--- a/libkram/bc7enc/rgbcx.cpp
+++ b/libkram/bc7enc/rgbcx.cpp
@@ -370,8 +370,10 @@ namespace rgbcx
 	static bc1_match_entry g_bc1_match5_equals_1[256], g_bc1_match6_equals_1[256];
 	static bc1_match_entry g_bc1_match5_half[256], g_bc1_match6_half[256];
 
+#ifndef NDEBUG // only used in asserts
 	static inline int scale_5_to_8(int v) { return (v << 3) | (v >> 2); }
 	static inline int scale_6_to_8(int v) { return (v << 2) | (v >> 4); }
+#endif
 
 	// v0, v1 = unexpanded DXT1 endpoint values (5/6-bits)
 	// c0, c1 = expanded DXT1 endpoint values (8-bits)
diff --git a/libkram/json11/json11.cpp b/libkram/json11/json11.cpp
index e7241d12..7118fe6b 100644
--- a/libkram/json11/json11.cpp
+++ b/libkram/json11/json11.cpp
@@ -31,6 +31,11 @@
 #include <cstdio>
 #include <limits>
 
+// This sucks that clang C++17 in 2023 doesn't have float conversion impl
+#if USE_CHARCONV
+#include <charconv> // for from_chars
+#endif
+
 // not including this in KramConfig.h - used for pool
 #include "BlockedLinearAllocator.h"
 
@@ -257,30 +262,63 @@ const Json & Json::operator[] (uint32_t i) const {
     
     return sNullValue;
 }
+    
+
+// run this to delete nodes that used allocateJson
+/* add/fix this, iterator is const only
+void Json::deleteJsonTree() {
+    // need to call placement delete on anything with allocations
+    // but right now that's only allocated strings from writer
+    for (auto& it : *this) {
+        it.deleteJsonTree();
+    }
+    if (is_string())
+        this->~Json();
+    
+}
+*/
+
+const Json & Json::find(ImmutableString key) const {
+    
+    for (const auto& it : *this) {
+        // since can't remap incoming string to immutable or index, have to do strcmp
+        // and also store 8B instead of 2B for the key.
+        if (key == it._key)
+            return it;
+    }
+ 
+    return sNullValue;
+}
 
 const Json & Json::operator[] (const char* key) const {
     assert(_type == TypeObject);
     
-    // was converting to uint16_t, then doing comparison
-    //uint16_t keyIndex = _data.getIndexFromKey(key);
+    // Need to be able to test incoming for immutable
+    // and could skip the strcmp then
     
     // Has to walk node linked list
     for (const auto& it : *this) {
         // since can't remap incoming string to immutable or index, have to do strcmp
         // and also store 8B instead of 2B for the key.
-        if ((key == it._key) || (strcmp(key, it._key) == 0))
+        if (key == it._key) // immutable passed in
+            return it;
+    
+        // could speedup with comparing lengths first
+        // all immutable kes have size at ptr - 2
+        if (strcmp(key, it._key) == 0)
             return it;
     }
     return sNullValue;
 }
 
 bool Json::iterate(const Json*& it) const {
-    assert(_type == TypeObject || _type == TypeArray);
-    
-    if (it == nullptr)
+    if (it == nullptr) {
+        assert(_type == TypeObject || _type == TypeArray);
         it = _value.aval;
-    else
+    }
+    else {
         it = it->_next;
+    }
     
     return it != nullptr;
 }
@@ -504,18 +542,14 @@ static bool decode_string(const char* str, uint32_t strSize, string& out) {
     return true;
 }
 
-//namespace {
-// Object that tracks all state of an in-progress parse.
-//class JsonReader final {
-//public:
-    // TODO: move parser state into _data?
+//------------------------------------
 
 JsonReader::JsonReader() {
-    _data = new JsonReaderData();
+    _data = make_unique<JsonReaderData>();
 }
 JsonReader::~JsonReader() {
-    delete _data;
-    _data = nullptr;
+    //delete _data;
+    //_data = nullptr;
 }
 
 size_t JsonReader::memoryUse() const {
@@ -646,9 +680,9 @@ void JsonReader::parse_string_location(uint32_t& count) {
 // Parse a double.
 double JsonReader::parse_number() {
     size_t start_pos = i;
-
-    // TODO: This is buffer overrun city!
-    // this is mostly a bunch of validation, then strtod
+    double value = 0.0;
+    
+    // this is mostly a bunch of validation, then atoi/strtod
     if (str[i] == '-')
         i++;
 
@@ -657,7 +691,7 @@ double JsonReader::parse_number() {
         i++;
         if (in_range(str[i], '0', '9')) {
             fail("leading 0s not permitted in numbers");
-            return 0.0;
+            return value;
         }
     } else if (in_range(str[i], '1', '9')) {
         i++;
@@ -665,15 +699,21 @@ double JsonReader::parse_number() {
             i++;
     } else {
         fail("invalid " + esc(str[i]) + " in number");
-        return 0.0;
+        return value;
     }
 
     if (str[i] != '.' && str[i] != 'e' && str[i] != 'E'
             && (i - start_pos) <= static_cast<size_t>(numeric_limits<int>::digits10)) {
         
-        // TODO: switch to from_chars, why using this instead of strtod?
-        // This has local and global conflicts
-        return atoi(str + start_pos);
+        
+#if USE_CHARCONV
+        // TODO:: switch to from_chars, int but not fp supported
+        from_chars(str + start_pos, str + i, value);
+#else
+        // this is locale dependent, other bad stuff
+        value = atoi(str + start_pos);
+#endif
+        return value;
     }
 
     // Decimal part
@@ -681,7 +721,7 @@ double JsonReader::parse_number() {
         i++;
         if (!in_range(str[i], '0', '9')) {
             fail("at least one digit required in fractional part");
-            return 0.0;
+            return value;
         }
         
         while (in_range(str[i], '0', '9'))
@@ -697,15 +737,19 @@ double JsonReader::parse_number() {
 
         if (!in_range(str[i], '0', '9')) {
             fail("at least one digit required in exponent");
-            return 0.0;
+            return value;
         }
         
         while (in_range(str[i], '0', '9'))
             i++;
     }
 
-    // TODO: switch to from_chars()
-    return strtod(str + start_pos, nullptr);
+#if USE_CHARCONV
+    from_chars(str + start_pos, str + i, value);
+#else
+    value = strtod(str + start_pos, nullptr);
+#endif
+    return value;
 }
 
 bool JsonReader::compareStringForLength(const char* expected, size_t length)
@@ -959,24 +1003,24 @@ Json::Json(const Json::array &values, Json::Type t)
     assert(t == TypeObject || t == TypeArray);
 }
 
-// Want to use msize to avoid storing length
 Json::~Json() {
     switch(_type) {
         case TypeString:
             if (_flags == FlagsAllocatedUnencoded) {
                 delete [] _value.sval;
                 //_data.trackMemory(-_count);
+                _value.sval = nullptr;
+                _count = 0;
             }
-            _value.sval = nullptr;
-            _count = 0;
-            break;
-        case TypeArray:
-        case TypeObject:
-            // TODO: this has to free tree if there are allocated strings
-            // but that would only be during writes?
-            _value.aval = nullptr;
-            _count = 0;
             break;
+            
+//        case TypeArray:
+//        case TypeObject:
+//            // TODO: this has to free tree if there are allocated strings
+//            // but that would only be during writes?
+//            _value.aval = nullptr;
+//            _count = 0;
+//            break;
         default: break;
     }
 }
diff --git a/libkram/json11/json11.h b/libkram/json11/json11.h
index 355fdbe1..15799004 100644
--- a/libkram/json11/json11.h
+++ b/libkram/json11/json11.h
@@ -144,7 +144,7 @@ class JsonReader final {
     uint32_t maxDepth = 200;
     
     // allocator and immutable string pool are here
-    JsonReaderData* _data = nullptr;
+    unique_ptr<JsonReaderData> _data;
 };
 
 //--------------------------
@@ -304,6 +304,13 @@ class Json final {
     // typedef std::initializer_list<pair<string, Type>> shape;
     // bool has_shape(const shape & types, string & err) const;
 
+    // quickly find a node using immutable string
+    const Json & find(ImmutableString key) const;
+
+    // useful for deleting allocated string values in block allocated nodes
+    // so it does a placement delete
+    // void deleteJsonTree();
+   
 private:
     friend class JsonReader;
     
diff --git a/libkram/kram/ImmutableString.cpp b/libkram/kram/ImmutableString.cpp
index 2a8f7ea6..551a4172 100644
--- a/libkram/kram/ImmutableString.cpp
+++ b/libkram/kram/ImmutableString.cpp
@@ -88,6 +88,8 @@ ImmutableString ImmutableStringPool::getImmutableString(const char* s) {
     // keep aligned to 2 bytes
     size_t align = alignof(ImmutableStringInfo);
     assert(align == 2);
+    (void)align;
+    
     if (size & 1)
         ++size;
     

From 16cf6771591897a3270ecceb6b03442d415fac32 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 15 Oct 2023 22:26:18 -0700
Subject: [PATCH 550/901] dlmalloc - clean it up with C++

---
 libkram/allocate/dlmalloc.cpp | 4407 +++++++++++----------------------
 libkram/allocate/dlmalloc.h   |  644 +++++
 2 files changed, 2155 insertions(+), 2896 deletions(-)
 create mode 100644 libkram/allocate/dlmalloc.h

diff --git a/libkram/allocate/dlmalloc.cpp b/libkram/allocate/dlmalloc.cpp
index 0d1732a6..4994b755 100644
--- a/libkram/allocate/dlmalloc.cpp
+++ b/libkram/allocate/dlmalloc.cpp
@@ -14,6 +14,10 @@
            (ftp://gee.cs.oswego.edu/pub/misc/malloc.c)
 */
 
+/*
+  Further mods by Alec Miller as part of Kram.
+*/
+
 /*
 
 * Quickstart
@@ -528,1676 +532,580 @@ MAX_RELEASE_CHECK_RATE   default: 4095 unless not HAVE_MMAP
   improvement at the expense of carrying around more memory.
 */
 
-/* Version identifier to allow people to support multiple versions */
-#ifndef DLMALLOC_VERSION
-#define DLMALLOC_VERSION 20806
-#endif /* DLMALLOC_VERSION */
+#include "dlmalloc.h"
+#include <mutex>
 
-#ifndef DLMALLOC_EXPORT
-#define DLMALLOC_EXPORT extern
-#endif
 
 #ifndef WIN32
 #ifdef _WIN32
 #define WIN32 1
-#endif  /* _WIN32 */
-#ifdef _WIN32_WCE
-#define LACKS_FCNTL_H
-#define WIN32 1
-#endif /* _WIN32_WCE */
-#endif  /* WIN32 */
+#endif
+#endif  // WIN32
+
 #ifdef WIN32
 #define WIN32_LEAN_AND_MEAN
 #include <windows.h>
 #include <tchar.h>
+#endif
+
+#include <stdio.h>       // for printing in malloc_stats
+#include <assert.h>
+#include <stdlib.h>      // for abort()
+#include <string.h>      // for memset etc
+
+#if !defined(WIN32)
+#include <errno.h>       // for MALLOC_FAILURE_ACTION
+#include <time.h>        // for magic initialization
+#include <strings.h>     // for ffs (find first set bit in int)
+#include <sys/mman.h>    // for mmap
+#include <fcntl.h>
+#include <unistd.h>      // for sbrk, sysconf
+#endif
+
+#ifdef _MSC_VER
+#pragma warning( disable : 4146 ) /* no "unsigned" warnings */
+#endif /* _MSC_VER */
+
+namespace kram {
+
+#define DLTHROW    throw ()
+
+#define DLMALLOC_VERSION 20900
+
+
+#ifdef WIN32
+// don't have sbrk, use mmap
 #define HAVE_MMAP 1
-#define HAVE_MORECORE 0
-#define LACKS_UNISTD_H
-#define LACKS_SYS_PARAM_H
-#define LACKS_SYS_MMAN_H
-#define LACKS_STRING_H
-#define LACKS_STRINGS_H
-#define LACKS_SYS_TYPES_H
-#define LACKS_ERRNO_H
-#define LACKS_SCHED_H
-#ifndef MALLOC_FAILURE_ACTION
+//#define HAVE_MORECORE 0
+
 #define MALLOC_FAILURE_ACTION
-#endif /* MALLOC_FAILURE_ACTION */
-#ifndef MMAP_CLEARS
-#ifdef _WIN32_WCE /* WINCE reportedly does not clear */
-#define MMAP_CLEARS 0
-#else
 #define MMAP_CLEARS 1
-#endif /* _WIN32_WCE */
-#endif /*MMAP_CLEARS */
-#endif  /* WIN32 */
-
-#if defined(DARWIN) || defined(_DARWIN)
-/* Mac OSX docs advise not to use sbrk; it seems better to use mmap */
-#ifndef HAVE_MORECORE
-#define HAVE_MORECORE 0
+
+#else // iOS/macOS
+
+// don't use sbrk on Apple, use mmap
 #define HAVE_MMAP 1
-/* OSX allocators provide 16 byte alignment */
-#ifndef MALLOC_ALIGNMENT
-#define MALLOC_ALIGNMENT ((size_t)16U)
+//#define HAVE_MORECORE 0
+#define MMAP_CLEARS 1
 #endif
-#endif  /* HAVE_MORECORE */
-#endif  /* DARWIN */
 
-#if defined __cplusplus && (__GNUC__ >= 3 || __GNUC_MINOR__ >= 8) && !defined(__CYGWIN__)
-    #define DLTHROW	throw ()
-#else
-    #define DLTHROW
-#endif
+// 64-bit systems mostly require 16B aligned memory, also needed for SIMD
+#define MALLOC_ALIGNMENT ((size_t)16U)
+
+// The maximum possible size_t value has all bits set
+#define MAX_SIZE_T       (~(size_t)0)
+
+
+//----------------------------------------
 
-#ifndef LACKS_SYS_TYPES_H
-    #include <sys/types.h>  /* For size_t */
-#endif  /* LACKS_SYS_TYPES_H */
-
-/* The maximum possible size_t value has all bits set */
-#define MAX_SIZE_T           (~(size_t)0)
-
-#ifndef USE_LOCKS
-    #define USE_LOCKS  1
-#endif 
-
-#if USE_LOCKS /* Spin locks for gcc >= 4.1, older gcc on x86, MSC >= 1310 */
-#if ((defined(__GNUC__) &&                                              \
-      ((__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) ||      \
-       defined(__i386__) || defined(__x86_64__))) ||                    \
-     (defined(_MSC_VER) && _MSC_VER>=1310))
-#ifndef USE_SPIN_LOCKS
-#define USE_SPIN_LOCKS 1
-#endif /* USE_SPIN_LOCKS */
-#elif USE_SPIN_LOCKS
-#error "USE_SPIN_LOCKS defined without implementation"
-#endif /* ... locks available... */
-#elif !defined(USE_SPIN_LOCKS)
-#define USE_SPIN_LOCKS 0
-#endif /* USE_LOCKS */
-
-#ifndef ONLY_MSPACES
 #define ONLY_MSPACES 0
-#endif  /* ONLY_MSPACES */
-#ifndef MSPACES
-#if ONLY_MSPACES
 #define MSPACES 1
-#else   /* ONLY_MSPACES */
-#define MSPACES 0
-#endif  /* ONLY_MSPACES */
-#endif  /* MSPACES */
-#ifndef MALLOC_ALIGNMENT
-#define MALLOC_ALIGNMENT ((size_t)(2 * sizeof(void *)))
-#endif  /* MALLOC_ALIGNMENT */
+
+//#ifndef ONLY_MSPACES
+//#define ONLY_MSPACES 0
+//#endif
+//
+//#ifndef MSPACES
+//#if ONLY_MSPACES
+//#define MSPACES 1
+//#else
+//#define MSPACES 0
+//#endif
+//#endif
+
+//#ifndef MALLOC_ALIGNMENT
+//#define MALLOC_ALIGNMENT ((size_t)(2 * sizeof(void *)))
+//#endif
+
 #ifndef FOOTERS
 #define FOOTERS 0
-#endif  /* FOOTERS */
+#endif
+
 #ifndef ABORT
 #define ABORT  abort()
-#endif  /* ABORT */
-#ifndef ABORT_ON_ASSERT_FAILURE
-#define ABORT_ON_ASSERT_FAILURE 1
-#endif  /* ABORT_ON_ASSERT_FAILURE */
+#endif
+
+//#ifndef ABORT_ON_ASSERT_FAILURE
+//#define ABORT_ON_ASSERT_FAILURE 1
+//#endif
+
 #ifndef PROCEED_ON_ERROR
 #define PROCEED_ON_ERROR 0
-#endif  /* PROCEED_ON_ERROR */
+#endif
+
+// only on linux
+#define HAVE_MREMAP 0
 
 #ifndef INSECURE
 #define INSECURE 0
-#endif  /* INSECURE */
+#endif
+
 #ifndef MALLOC_INSPECT_ALL
 #define MALLOC_INSPECT_ALL 0
-#endif  /* MALLOC_INSPECT_ALL */
-#ifndef HAVE_MMAP
-#define HAVE_MMAP 1
-#endif  /* HAVE_MMAP */
-#ifndef MMAP_CLEARS
-#define MMAP_CLEARS 1
-#endif  /* MMAP_CLEARS */
-#ifndef HAVE_MREMAP
-#ifdef linux
-#define HAVE_MREMAP 1
-#ifndef _GNU_SOURCE
-    #define _GNU_SOURCE /* Turns on mremap() definition */
 #endif
-#else   /* linux */
-#define HAVE_MREMAP 0
-#endif  /* linux */
-#endif  /* HAVE_MREMAP */
+
+//#ifndef HAVE_MMAP
+//#define HAVE_MMAP 1
+//#endif
+
+//#ifndef MMAP_CLEARS
+//#define MMAP_CLEARS 1
+//#endif
+
+//#ifndef HAVE_MREMAP
+//#ifdef linux // TODO: using linux check here
+//#define HAVE_MREMAP 1
+//#ifndef _GNU_SOURCE
+//    #define _GNU_SOURCE /* Turns on mremap() definition */
+//#endif
+//#else
+//#define HAVE_MREMAP 0
+//#endif
+//#endif
+
 #ifndef MALLOC_FAILURE_ACTION
 #define MALLOC_FAILURE_ACTION  errno = ENOMEM;
 #endif  /* MALLOC_FAILURE_ACTION */
-#ifndef HAVE_MORECORE
-#if ONLY_MSPACES
-#define HAVE_MORECORE 0
-#else   /* ONLY_MSPACES */
-#define HAVE_MORECORE 1
-#endif  /* ONLY_MSPACES */
-#endif  /* HAVE_MORECORE */
-#if !HAVE_MORECORE
-#define MORECORE_CONTIGUOUS 0
-#else   /* !HAVE_MORECORE */
-#define MORECORE_DEFAULT sbrk
-#ifndef MORECORE_CONTIGUOUS
-#define MORECORE_CONTIGUOUS 1
-#endif  /* MORECORE_CONTIGUOUS */
-#endif  /* HAVE_MORECORE */
-#ifndef DEFAULT_GRANULARITY
-#if (MORECORE_CONTIGUOUS || defined(WIN32))
-#define DEFAULT_GRANULARITY (0)  /* 0 means to compute in init_mparams */
-#else   /* MORECORE_CONTIGUOUS */
-#define DEFAULT_GRANULARITY ((size_t)64U * (size_t)1024U)
-#endif  /* MORECORE_CONTIGUOUS */
-#endif  /* DEFAULT_GRANULARITY */
-#ifndef DEFAULT_TRIM_THRESHOLD
-#ifndef MORECORE_CANNOT_TRIM
+
+//#ifndef HAVE_MORECORE
+//#if ONLY_MSPACES
+//#define HAVE_MORECORE 0
+//#else   /* ONLY_MSPACES */
+//#define HAVE_MORECORE 1
+//#endif  /* ONLY_MSPACES */
+//#endif  /* HAVE_MORECORE */
+//#if !HAVE_MORECORE
+//#define MORECORE_CONTIGUOUS 0
+//#else   /* !HAVE_MORECORE */
+//#define MORECORE_DEFAULT sbrk
+//#ifndef MORECORE_CONTIGUOUS
+//#define MORECORE_CONTIGUOUS 1
+//#endif  /* MORECORE_CONTIGUOUS */
+//#endif  /* HAVE_MORECORE */
+//
+//#ifndef DEFAULT_GRANULARITY
+//#if (MORECORE_CONTIGUOUS || defined(WIN32))
+//#define DEFAULT_GRANULARITY (0)  /* 0 means to compute in init_mparams */
+//#else   /* MORECORE_CONTIGUOUS */
+//#define DEFAULT_GRANULARITY ((size_t)64U * (size_t)1024U)
+//#endif  /* MORECORE_CONTIGUOUS */
+//#endif  /* DEFAULT_GRANULARITY */
+//
+//#ifndef DEFAULT_TRIM_THRESHOLD
+//#ifndef MORECORE_CANNOT_TRIM
+//#define DEFAULT_TRIM_THRESHOLD ((size_t)2U * (size_t)1024U * (size_t)1024U)
+//#else   /* MORECORE_CANNOT_TRIM */
+//#define DEFAULT_TRIM_THRESHOLD MAX_SIZE_T
+//#endif  /* MORECORE_CANNOT_TRIM */
+//#endif  /* DEFAULT_TRIM_THRESHOLD */
+
+// means use system page size
+#define DEFAULT_GRANULARITY (0)
+
+// this trim is only for sbrk
 #define DEFAULT_TRIM_THRESHOLD ((size_t)2U * (size_t)1024U * (size_t)1024U)
-#else   /* MORECORE_CANNOT_TRIM */
-#define DEFAULT_TRIM_THRESHOLD MAX_SIZE_T
-#endif  /* MORECORE_CANNOT_TRIM */
-#endif  /* DEFAULT_TRIM_THRESHOLD */
-#ifndef DEFAULT_MMAP_THRESHOLD
-#if HAVE_MMAP
+
+// mmap
 #define DEFAULT_MMAP_THRESHOLD ((size_t)256U * (size_t)1024U)
-#else   /* HAVE_MMAP */
-#define DEFAULT_MMAP_THRESHOLD MAX_SIZE_T
-#endif  /* HAVE_MMAP */
-#endif  /* DEFAULT_MMAP_THRESHOLD */
-#ifndef MAX_RELEASE_CHECK_RATE
-#if HAVE_MMAP
 #define MAX_RELEASE_CHECK_RATE 4095
-#else
-#define MAX_RELEASE_CHECK_RATE MAX_SIZE_T
-#endif /* HAVE_MMAP */
-#endif /* MAX_RELEASE_CHECK_RATE */
-#ifndef USE_BUILTIN_FFS
-#define USE_BUILTIN_FFS 0
-#endif  /* USE_BUILTIN_FFS */
-#ifndef USE_DEV_RANDOM
-#define USE_DEV_RANDOM 0
-#endif  /* USE_DEV_RANDOM */
-#ifndef NO_MALLINFO
-#define NO_MALLINFO 0
-#endif  /* NO_MALLINFO */
-#ifndef MALLINFO_FIELD_TYPE
-#define MALLINFO_FIELD_TYPE size_t
-#endif  /* MALLINFO_FIELD_TYPE */
-#ifndef NO_MALLOC_STATS
+
+//#ifndef DEFAULT_MMAP_THRESHOLD
+//#if HAVE_MMAP
+//#define DEFAULT_MMAP_THRESHOLD ((size_t)256U * (size_t)1024U)
+//#else   /* HAVE_MMAP */
+//#define DEFAULT_MMAP_THRESHOLD MAX_SIZE_T
+//#endif  /* HAVE_MMAP */
+//#endif  /* DEFAULT_MMAP_THRESHOLD */
+//
+//#ifndef MAX_RELEASE_CHECK_RATE
+//#if HAVE_MMAP
+//#define MAX_RELEASE_CHECK_RATE 4095
+//#else
+//#define MAX_RELEASE_CHECK_RATE MAX_SIZE_T
+//#endif /* HAVE_MMAP */
+//#endif /* MAX_RELEASE_CHECK_RATE */
+
+//#ifndef USE_BUILTIN_FFS
+//#define USE_BUILTIN_FFS 0
+//#endif  /* USE_BUILTIN_FFS */
+
+//#ifndef USE_DEV_RANDOM
+//#define USE_DEV_RANDOM 0
+//#endif  /* USE_DEV_RANDOM */
+
+//#ifndef NO_MALLINFO
+//#define NO_MALLINFO 0
+//#endif  /* NO_MALLINFO */
+//
+//#ifndef MALLINFO_FIELD_TYPE
+//#define MALLINFO_FIELD_TYPE size_t
+//#endif  /* MALLINFO_FIELD_TYPE */
+
 #define NO_MALLOC_STATS 0
-#endif  /* NO_MALLOC_STATS */
-#ifndef NO_SEGMENT_TRAVERSAL
 #define NO_SEGMENT_TRAVERSAL 0
-#endif /* NO_SEGMENT_TRAVERSAL */
 
-/*
-  mallopt tuning options.  SVID/XPG defines four standard parameter
-  numbers for mallopt, normally defined in malloc.h.  None of these
-  are used in this malloc, so setting them has no effect. But this
-  malloc does support the following options.
-*/
+//#ifndef NO_MALLOC_STATS
+//#define NO_MALLOC_STATS 0
+//#endif  /* NO_MALLOC_STATS */
 
+//#ifndef NO_SEGMENT_TRAVERSAL
+//#define NO_SEGMENT_TRAVERSAL 0
+//#endif /* NO_SEGMENT_TRAVERSAL */
+
+// mallopt tuning options.  SVID/XPG defines four standard parameter
+// numbers for mallopt, normally defined in malloc.h.  None of these
+// are used in this malloc, so setting them has no effect. But this
+// malloc does support the following options.
 #define M_TRIM_THRESHOLD     (-1)
 #define M_GRANULARITY        (-2)
 #define M_MMAP_THRESHOLD     (-3)
 
-/* ------------------------ Mallinfo declarations ------------------------ */
-
-#if !NO_MALLINFO
-/*
-  This version of malloc supports the standard SVID/XPG mallinfo
-  routine that returns a struct containing usage properties and
-  statistics. It should work on any system that has a
-  /usr/include/malloc.h defining struct mallinfo.  The main
-  declaration needed is the mallinfo struct that is returned (by-copy)
-  by mallinfo().  The malloinfo struct contains a bunch of fields that
-  are not even meaningful in this version of malloc.  These fields are
-  are instead filled by mallinfo() with other numbers that might be of
-  interest.
-
-  HAVE_USR_INCLUDE_MALLOC_H should be set if you have a
-  /usr/include/malloc.h file that includes a declaration of struct
-  mallinfo.  If so, it is included; else a compliant version is
-  declared below.  These must be precisely the same for mallinfo() to
-  work.  The original SVID version of this struct, defined on most
-  systems with mallinfo, declares all fields as ints. But some others
-  define as unsigned long. If your system defines the fields using a
-  type of different width than listed here, you MUST #include your
-  system version and #define HAVE_USR_INCLUDE_MALLOC_H.
-*/
-
-/* #define HAVE_USR_INCLUDE_MALLOC_H */
-
-#ifdef HAVE_USR_INCLUDE_MALLOC_H
-#include "/usr/include/malloc.h"
-#else /* HAVE_USR_INCLUDE_MALLOC_H */
-#ifndef STRUCT_MALLINFO_DECLARED
-/* HP-UX (and others?) redefines mallinfo unless _STRUCT_MALLINFO is defined */
-#define _STRUCT_MALLINFO
-#define STRUCT_MALLINFO_DECLARED 1
-struct mallinfo {
-    MALLINFO_FIELD_TYPE arena;    /* non-mmapped space allocated from system */
-    MALLINFO_FIELD_TYPE ordblks;  /* number of free chunks */
-    MALLINFO_FIELD_TYPE smblks;   /* always 0 */
-    MALLINFO_FIELD_TYPE hblks;    /* always 0 */
-    MALLINFO_FIELD_TYPE hblkhd;   /* space in mmapped regions */
-    MALLINFO_FIELD_TYPE usmblks;  /* maximum total allocated space */
-    MALLINFO_FIELD_TYPE fsmblks;  /* always 0 */
-    MALLINFO_FIELD_TYPE uordblks; /* total allocated space */
-    MALLINFO_FIELD_TYPE fordblks; /* total free space */
-    MALLINFO_FIELD_TYPE keepcost; /* releasable (via malloc_trim) space */
-};
-#endif /* STRUCT_MALLINFO_DECLARED */
-#endif /* HAVE_USR_INCLUDE_MALLOC_H */
-#endif /* NO_MALLINFO */
-
-/*
-  Try to persuade compilers to inline. The most critical functions for
-  inlining are defined as macros, so these aren't used for them.
-*/
 
+// Try to persuade compilers to inline. The most critical functions for
+// inlining are defined as macros, so these aren't used for them.
 #ifndef FORCEINLINE
-  #if defined(__GNUC__)
-      #define FORCEINLINE __inline __attribute__ ((always_inline))
-  #elif defined(_MSC_VER)
-      #define FORCEINLINE __forceinline
-  #else
-      #define FORCEINLINE inline
-  #endif
+#if defined(__GNUC__) || defined(__clang__)
+#define FORCEINLINE __inline __attribute__ ((always_inline))
+#elif defined(_MSC_VER)
+#define FORCEINLINE __forceinline
+#else
+#define FORCEINLINE inline
 #endif
-
-#ifndef NOINLINE
-    #if defined(__GNUC__)
-        #define NOINLINE __attribute__ ((noinline))
-    #elif defined(_MSC_VER)
-        #define NOINLINE __declspec(noinline)
-    #else
-        #define NOINLINE
-    #endif
 #endif
 
-extern "C" {
+// Probably more portable call now.  mac is including ffs()
+// Declarations for bit scanning on win32
+// This already gets pulled in by windows.h
+//#ifdef _MSC_VER)
+//#ifndef BitScanForward /* Try to avoid pulling in WinNT.h */
+//extern "C" {
+//    unsigned char _BitScanForward(unsigned long *index, unsigned long mask);
+//    unsigned char _BitScanReverse(unsigned long *index, unsigned long mask);
+//}
+//
+//#define BitScanForward _BitScanForward
+//#define BitScanReverse _BitScanReverse
+//#pragma intrinsic(_BitScanForward)
+//#pragma intrinsic(_BitScanReverse)
+//#endif
+//#endif
+
+// pagesize is 4K on intel, 16k on Apple Silicon, ? on android
+#define  malloc_getpagesize getpagesize()
 
-#if !ONLY_MSPACES
+/* ------------------- size_t and alignment properties -------------------- */
 
-/* ------------------- Declarations of public routines ------------------- */
-
-#ifndef USE_DL_PREFIX
-#define dlcalloc               calloc
-#define dlfree                 free
-#define dlmalloc               malloc
-#define dlmemalign             memalign
-#define dlposix_memalign       posix_memalign
-#define dlrealloc              realloc
-#define dlrealloc_in_place     realloc_in_place
-#define dlvalloc               valloc
-#define dlpvalloc              pvalloc
-#define dlmallinfo             mallinfo
-#define dlmallopt              mallopt
-#define dlmalloc_trim          malloc_trim
-#define dlmalloc_stats         malloc_stats
-#define dlmalloc_usable_size   malloc_usable_size
-#define dlmalloc_footprint     malloc_footprint
-#define dlmalloc_max_footprint malloc_max_footprint
-#define dlmalloc_footprint_limit malloc_footprint_limit
-#define dlmalloc_set_footprint_limit malloc_set_footprint_limit
-#define dlmalloc_inspect_all   malloc_inspect_all
-#define dlindependent_calloc   independent_calloc
-#define dlindependent_comalloc independent_comalloc
-#define dlbulk_free            bulk_free
-#endif /* USE_DL_PREFIX */
+/* The byte and bit size of a size_t */
+#define SIZE_T_BITSIZE      (sizeof(size_t) << 3)
 
-/*
-  malloc(size_t n)
-  Returns a pointer to a newly allocated chunk of at least n bytes, or
-  null if no space is available, in which case errno is set to ENOMEM
-  on ANSI C systems.
-
-  If n is zero, malloc returns a minimum-sized chunk. (The minimum
-  size is 16 bytes on most 32bit systems, and 32 bytes on 64bit
-  systems.)  Note that size_t is an unsigned type, so calls with
-  arguments that would be negative if signed are interpreted as
-  requests for huge amounts of space, which will often fail. The
-  maximum supported value of n differs across systems, but is in all
-  cases less than the maximum representable value of a size_t.
-*/
-DLMALLOC_EXPORT void* dlmalloc(size_t) DLTHROW;
+/* Some constants coerced to size_t */
+/* Annoying but necessary to avoid errors on some platforms */
+#define HALF_MAX_SIZE_T     (MAX_SIZE_T / 2U)
 
-/*
-  free(void* p)
-  Releases the chunk of memory pointed to by p, that had been previously
-  allocated using malloc or a related routine such as realloc.
-  It has no effect if p is null. If p was not malloced or already
-  freed, free(p) will by default cause the current program to abort.
-*/
-DLMALLOC_EXPORT void  dlfree(void*) DLTHROW;
+// The bit mask value corresponding to MALLOC_ALIGNMENT
+#define CHUNK_ALIGN_MASK    (MALLOC_ALIGNMENT - 1)
 
-/*
-  calloc(size_t n_elements, size_t element_size);
-  Returns a pointer to n_elements * element_size bytes, with all locations
-  set to zero.
-*/
-DLMALLOC_EXPORT void* dlcalloc(size_t, size_t) DLTHROW;
+// True if address a has acceptable alignment
+bool is_aligned(void *p) { return ((size_t)p & CHUNK_ALIGN_MASK) == 0; }
 
-/*
-  realloc(void* p, size_t n)
-  Returns a pointer to a chunk of size n that contains the same data
-  as does chunk p up to the minimum of (n, p's size) bytes, or null
-  if no space is available.
+// the number of bytes to offset an address to align it
+size_t align_offset(void *p)
+{
+    return (((size_t)p & CHUNK_ALIGN_MASK) == 0) ? 0 :
+    ((MALLOC_ALIGNMENT - ((size_t)p & CHUNK_ALIGN_MASK)) & CHUNK_ALIGN_MASK);
+}
 
-  The returned pointer may or may not be the same as p. The algorithm
-  prefers extending p in most cases when possible, otherwise it
-  employs the equivalent of a malloc-copy-free sequence.
+/* -------------------------- MMAP preliminaries ------------------------- */
 
-  If p is null, realloc is equivalent to malloc.
 
-  If space is not available, realloc returns null, errno is set (if on
-  ANSI) and p is NOT freed.
+//   If HAVE_MORECORE or HAVE_MMAP are false, we just define calls and
+//   checks to fail so compiler optimizer can delete code rather than
+//   using so many "#if"s.
 
-  if n is for fewer bytes than already held by p, the newly unused
-  space is lopped off and freed if possible.  realloc with a size
-  argument of zero (re)allocates a minimum-sized chunk.
+// MORECORE and MMAP must return MFAIL on failure
+#define MFAIL                ((void*)(MAX_SIZE_T))
+#define CMFAIL               ((char*)(MFAIL)) // defined for convenience/
 
-  The old unix realloc convention of allowing the last-free'd chunk
-  to be used as an argument to realloc is not supported.
-*/
-DLMALLOC_EXPORT void* dlrealloc(void*, size_t) DLTHROW;
+#ifdef WIN32
 
-/*
-  realloc_in_place(void* p, size_t n)
-  Resizes the space allocated for p to size n, only if this can be
-  done without moving p (i.e., only if there is adjacent space
-  available if n is greater than p's current allocated size, or n is
-  less than or equal to p's size). This may be used instead of plain
-  realloc if an alternative allocation strategy is needed upon failure
-  to expand space; for example, reallocation of a buffer that must be
-  memory-aligned or cleared. You can use realloc_in_place to trigger
-  these alternatives only when needed.
-
-  Returns p if successful; otherwise null.
-*/
-DLMALLOC_EXPORT void* dlrealloc_in_place(void*, size_t);
+// This is an mmap implementation based on VirtualAlloc on Win.
+// The one in win_mmap.h is for file mmap.
 
-/*
-  memalign(size_t alignment, size_t n);
-  Returns a pointer to a newly allocated chunk of n bytes, aligned
-  in accord with the alignment argument.
+// Win32 MMAP via VirtualAlloc
+static FORCEINLINE void* win32mmap(size_t size) {
+    void* ptr = VirtualAlloc(0, size, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
+    return (ptr != 0)? ptr: MFAIL;
+}
 
-  The alignment argument should be a power of two. If the argument is
-  not a power of two, the nearest greater power is used.
-  8-byte alignment is guaranteed by normal malloc calls, so don't
-  bother calling memalign with an argument of 8 or less.
+// For direct MMAP, use MEM_TOP_DOWN to minimize interference
+static FORCEINLINE void* win32direct_mmap(size_t size) {
+    void* ptr = VirtualAlloc(0, size, MEM_RESERVE | MEM_COMMIT | MEM_TOP_DOWN,
+                             PAGE_READWRITE);
+    return (ptr != 0)? ptr: MFAIL;
+}
 
-  Overreliance on memalign is a sure way to fragment space.
-*/
-DLMALLOC_EXPORT void* dlmemalign(size_t, size_t);
+// This function supports releasing coalesed segments
+static FORCEINLINE int win32munmap(void* ptr, size_t size) {
+    MEMORY_BASIC_INFORMATION minfo;
+    char* cptr = (char*)ptr;
+    while (size) {
+        if (VirtualQuery(cptr, &minfo, sizeof(minfo)) == 0)
+            return -1;
+        if (minfo.BaseAddress != cptr || minfo.AllocationBase != cptr ||
+            minfo.State != MEM_COMMIT || minfo.RegionSize > size)
+            return -1;
+        if (VirtualFree(cptr, 0, MEM_RELEASE) == 0)
+            return -1;
+        cptr += minfo.RegionSize;
+        size -= minfo.RegionSize;
+    }
+    return 0;
+}
 
-/*
-  int posix_memalign(void** pp, size_t alignment, size_t n);
-  Allocates a chunk of n bytes, aligned in accord with the alignment
-  argument. Differs from memalign only in that it (1) assigns the
-  allocated memory to *pp rather than returning it, (2) fails and
-  returns EINVAL if the alignment is not a power of two (3) fails and
-  returns ENOMEM if memory cannot be allocated.
-*/
-DLMALLOC_EXPORT int dlposix_memalign(void**, size_t, size_t) DLTHROW;
+#define MUNMAP_DEFAULT(a, s)        win32munmap((a), (s))
+#define MMAP_DEFAULT(s)             win32mmap(s)
+#define DIRECT_MMAP_DEFAULT(s)      win32direct_mmap(s)
 
-/*
-  valloc(size_t n);
-  Equivalent to memalign(pagesize, n), where pagesize is the page
-  size of the system. If the pagesize is unknown, 4096 is used.
-*/
-DLMALLOC_EXPORT void* dlvalloc(size_t) DLTHROW;
+#else
 
-/*
-  mallopt(int parameter_number, int parameter_value)
-  Sets tunable parameters The format is to provide a
-  (parameter-number, parameter-value) pair.  mallopt then sets the
-  corresponding parameter to the argument value if it can (i.e., so
-  long as the value is meaningful), and returns 1 if successful else
-  0.  To workaround the fact that mallopt is specified to use int,
-  not size_t parameters, the value -1 is specially treated as the
-  maximum unsigned size_t value.
-
-  SVID/XPG/ANSI defines four standard param numbers for mallopt,
-  normally defined in malloc.h.  None of these are use in this malloc,
-  so setting them has no effect. But this malloc also supports other
-  options in mallopt. See below for details.  Briefly, supported
-  parameters are as follows (listed defaults are for "typical"
-  configurations).
-
-  Symbol            param #  default    allowed param values
-  M_TRIM_THRESHOLD     -1   2*1024*1024   any   (-1 disables)
-  M_GRANULARITY        -2     page size   any power of 2 >= page size
-  M_MMAP_THRESHOLD     -3      256*1024   any   (or 0 if no MMAP support)
-*/
-DLMALLOC_EXPORT int dlmallopt(int, int);
+#define MMAP_PROT            (PROT_READ | PROT_WRITE)
+#define MAP_ANONYMOUS        MAP_ANON
+#define MMAP_FLAGS           (MAP_PRIVATE | MAP_ANONYMOUS)
 
-/*
-  malloc_footprint();
-  Returns the number of bytes obtained from the system.  The total
-  number of bytes allocated by malloc, realloc etc., is less than this
-  value. Unlike mallinfo, this function returns only a precomputed
-  result, so can be called frequently to monitor memory consumption.
-  Even if locks are otherwise defined, this function does not use them,
-  so results might not be up to date.
-*/
-DLMALLOC_EXPORT size_t dlmalloc_footprint(void);
+#define MUNMAP_DEFAULT(a, s)  munmap((a), (s))
+#define MMAP_DEFAULT(s)       mmap(0, (s), MMAP_PROT, MMAP_FLAGS, -1, 0)
+#define DIRECT_MMAP_DEFAULT(s) MMAP_DEFAULT(s)
 
-/*
-  malloc_max_footprint();
-  Returns the maximum number of bytes obtained from the system. This
-  value will be greater than current footprint if deallocated space
-  has been reclaimed by the system. The peak number of bytes allocated
-  by malloc, realloc etc., is less than this value. Unlike mallinfo,
-  this function returns only a precomputed result, so can be called
-  frequently to monitor memory consumption.  Even if locks are
-  otherwise defined, this function does not use them, so results might
-  not be up to date.
-*/
-DLMALLOC_EXPORT size_t dlmalloc_max_footprint(void);
+#endif
 
-/*
-  malloc_footprint_limit();
-  Returns the number of bytes that the heap is allowed to obtain from
-  the system, returning the last value returned by
-  malloc_set_footprint_limit, or the maximum size_t value if
-  never set. The returned value reflects a permission. There is no
-  guarantee that this number of bytes can actually be obtained from
-  the system.
-*/
-DLMALLOC_EXPORT size_t dlmalloc_footprint_limit();
+// this is linux only, macOS/iOS doesn't have this, doubt Android provides acces
+#if HAVE_MREMAP
+#define MREMAP_DEFAULT(addr, osz, nsz, mv) mremap((addr), (osz), (nsz), (mv))
+#define CALL_MREMAP(addr, osz, nsz, mv) MREMAP_DEFAULT((addr), (osz), (nsz), (mv))
+#else
+#define CALL_MREMAP(addr, osz, nsz, mv)     MFAIL
+#endif
 
-/*
-  malloc_set_footprint_limit();
-  Sets the maximum number of bytes to obtain from the system, causing
-  failure returns from malloc and related functions upon attempts to
-  exceed this value. The argument value may be subject to page
-  rounding to an enforceable limit; this actual value is returned.
-  Using an argument of the maximum possible size_t effectively
-  disables checks. If the argument is less than or equal to the
-  current malloc_footprint, then all future allocations that require
-  additional system memory will fail. However, invocation cannot
-  retroactively deallocate existing used memory.
-*/
-DLMALLOC_EXPORT size_t dlmalloc_set_footprint_limit(size_t bytes);
+// Define CALL_MORECORE
+//#define CALL_MORECORE(S)        MFAIL
 
-#if MALLOC_INSPECT_ALL
-/*
-  malloc_inspect_all(void(*handler)(void *start,
-                                    void *end,
-                                    size_t used_bytes,
-                                    void* callback_arg),
-                      void* arg);
-  Traverses the heap and calls the given handler for each managed
-  region, skipping all bytes that are (or may be) used for bookkeeping
-  purposes.  Traversal does not include include chunks that have been
-  directly memory mapped. Each reported region begins at the start
-  address, and continues up to but not including the end address.  The
-  first used_bytes of the region contain allocated data. If
-  used_bytes is zero, the region is unallocated. The handler is
-  invoked with the given callback argument. If locks are defined, they
-  are held during the entire traversal. It is a bad idea to invoke
-  other malloc functions from within the handler.
-
-  For example, to count the number of in-use chunks with size greater
-  than 1000, you could write:
-  static int count = 0;
-  void count_chunks(void* start, void* end, size_t used, void* arg) {
-    if (used >= 1000) ++count;
-  }
-  then:
-    malloc_inspect_all(count_chunks, NULL);
-
-  malloc_inspect_all is compiled only if MALLOC_INSPECT_ALL is defined.
-*/
-DLMALLOC_EXPORT void dlmalloc_inspect_all(void(*handler)(void*, void *, size_t, void*),
-                           void* arg);
+// Define CALL_MMAP/CALL_MUNMAP/CALL_DIRECT_MMAP
+#define USE_MMAP_BIT        1
+#define CALL_MMAP(s)        MMAP_DEFAULT(s)
+#define CALL_MUNMAP(a, s)   MUNMAP_DEFAULT((a), (s))
+#define CALL_DIRECT_MMAP(s) DIRECT_MMAP_DEFAULT(s)
 
-#endif /* MALLOC_INSPECT_ALL */
 
-#if !NO_MALLINFO
-/*
-  mallinfo()
-  Returns (by copy) a struct containing various summary statistics:
-
-  arena:     current total non-mmapped bytes allocated from system
-  ordblks:   the number of free chunks
-  smblks:    always zero.
-  hblks:     current number of mmapped regions
-  hblkhd:    total bytes held in mmapped regions
-  usmblks:   the maximum total allocated space. This will be greater
-                than current total if trimming has occurred.
-  fsmblks:   always zero
-  uordblks:  current total allocated space (normal or mmapped)
-  fordblks:  total free space
-  keepcost:  the maximum number of bytes that could ideally be released
-               back to system via malloc_trim. ("ideally" means that
-               it ignores page restrictions etc.)
-
-  Because these fields are ints, but internal bookkeeping may
-  be kept as longs, the reported values may wrap around zero and
-  thus be inaccurate.
-*/
-DLMALLOC_EXPORT struct mallinfo dlmallinfo(void);
-#endif /* NO_MALLINFO */
+// mstate bit set if continguous morecore disabled or failed
+#define USE_NONCONTIGUOUS_BIT (4U)
 
-/*
-  independent_calloc(size_t n_elements, size_t element_size, void* chunks[]);
-
-  independent_calloc is similar to calloc, but instead of returning a
-  single cleared space, it returns an array of pointers to n_elements
-  independent elements that can hold contents of size elem_size, each
-  of which starts out cleared, and can be independently freed,
-  realloc'ed etc. The elements are guaranteed to be adjacently
-  allocated (this is not guaranteed to occur with multiple callocs or
-  mallocs), which may also improve cache locality in some
-  applications.
-
-  The "chunks" argument is optional (i.e., may be null, which is
-  probably the most typical usage). If it is null, the returned array
-  is itself dynamically allocated and should also be freed when it is
-  no longer needed. Otherwise, the chunks array must be of at least
-  n_elements in length. It is filled in with the pointers to the
-  chunks.
-
-  In either case, independent_calloc returns this pointer array, or
-  null if the allocation failed.  If n_elements is zero and "chunks"
-  is null, it returns a chunk representing an array with zero elements
-  (which should be freed if not wanted).
-
-  Each element must be freed when it is no longer needed. This can be
-  done all at once using bulk_free.
-
-  independent_calloc simplifies and speeds up implementations of many
-  kinds of pools.  It may also be useful when constructing large data
-  structures that initially have a fixed number of fixed-sized nodes,
-  but the number is not known at compile time, and some of the nodes
-  may later need to be freed. For example:
-
-  struct Node { int item; struct Node* next; };
-
-  struct Node* build_list() {
-    struct Node** pool;
-    int n = read_number_of_nodes_needed();
-    if (n <= 0) return 0;
-    pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0);
-    if (pool == 0) die();
-    // organize into a linked list...
-    struct Node* first = pool[0];
-    for (i = 0; i < n-1; ++i)
-      pool[i]->next = pool[i+1];
-    free(pool);     // Can now free the array (or not, if it is needed later)
-    return first;
-  }
-*/
-DLMALLOC_EXPORT void** dlindependent_calloc(size_t, size_t, void**);
+// segment bit set in create_mspace_with_base
+#define EXTERN_BIT            (8U)
 
-/*
-  independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]);
-
-  independent_comalloc allocates, all at once, a set of n_elements
-  chunks with sizes indicated in the "sizes" array.    It returns
-  an array of pointers to these elements, each of which can be
-  independently freed, realloc'ed etc. The elements are guaranteed to
-  be adjacently allocated (this is not guaranteed to occur with
-  multiple callocs or mallocs), which may also improve cache locality
-  in some applications.
-
-  The "chunks" argument is optional (i.e., may be null). If it is null
-  the returned array is itself dynamically allocated and should also
-  be freed when it is no longer needed. Otherwise, the chunks array
-  must be of at least n_elements in length. It is filled in with the
-  pointers to the chunks.
-
-  In either case, independent_comalloc returns this pointer array, or
-  null if the allocation failed.  If n_elements is zero and chunks is
-  null, it returns a chunk representing an array with zero elements
-  (which should be freed if not wanted).
-
-  Each element must be freed when it is no longer needed. This can be
-  done all at once using bulk_free.
-
-  independent_comallac differs from independent_calloc in that each
-  element may have a different size, and also that it does not
-  automatically clear elements.
-
-  independent_comalloc can be used to speed up allocation in cases
-  where several structs or objects must always be allocated at the
-  same time.  For example:
-
-  struct Head { ... }
-  struct Foot { ... }
-
-  void send_message(char* msg) {
-    int msglen = strlen(msg);
-    size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) };
-    void* chunks[3];
-    if (independent_comalloc(3, sizes, chunks) == 0)
-      die();
-    struct Head* head = (struct Head*)(chunks[0]);
-    char*        body = (char*)(chunks[1]);
-    struct Foot* foot = (struct Foot*)(chunks[2]);
-    // ...
-  }
-
-  In general though, independent_comalloc is worth using only for
-  larger values of n_elements. For small values, you probably won't
-  detect enough difference from series of malloc calls to bother.
-
-  Overuse of independent_comalloc can increase overall memory usage,
-  since it cannot reuse existing noncontiguous small chunks that
-  might be available for some of the elements.
-*/
-DLMALLOC_EXPORT void** dlindependent_comalloc(size_t, size_t*, void**);
 
-/*
-  bulk_free(void* array[], size_t n_elements)
-  Frees and clears (sets to null) each non-null pointer in the given
-  array.  This is likely to be faster than freeing them one-by-one.
-  If footers are used, pointers that have been allocated in different
-  mspaces are not freed or cleared, and the count of all such pointers
-  is returned.  For large arrays of pointers with poor locality, it
-  may be worthwhile to sort this array before calling bulk_free.
-*/
-DLMALLOC_EXPORT size_t  dlbulk_free(void**, size_t n_elements);
+/* --------------------------- Lock preliminaries ------------------------ */
 
 /*
-  pvalloc(size_t n);
-  Equivalent to valloc(minimum-page-that-holds(n)), that is,
-  round up n to nearest pagesize.
+ When locks are defined, there is one global lock, plus
+ one per-mspace lock.
+ 
+ The global lock_ensures that mparams.magic and other unique
+ mparams values are initialized only once. It also protects
+ sequences of calls to MORECORE.  In many cases sys_alloc requires
+ two calls, that should not be interleaved with calls by other
+ threads.  This does not protect against direct calls to MORECORE
+ by other threads not using this lock, so there is still code to
+ cope the best we can on interference.
+ 
+ Per-mspace locks surround calls to malloc, free, etc.
+ By default, locks are simple non-reentrant mutexes.
+ 
+ Because lock-protected regions generally have bounded times, it is
+ OK to use the supplied simple spinlocks. Spinlocks are likely to
+ improve performance for lightly contended applications, but worsen
+ performance under heavy contention.
+ 
+ If USE_LOCKS is > 1, the definitions of lock routines here are
+ bypassed, in which case you will need to define the type MLOCK_T,
+ and at least INITIAL_LOCK, DESTROY_LOCK, ACQUIRE_LOCK, RELEASE_LOCK
+ and TRY_LOCK.  You must also declare a
+ static MLOCK_T malloc_global_mutex = { initialization values };.
+ 
  */
-DLMALLOC_EXPORT void*  dlpvalloc(size_t);
-
-/*
-  malloc_trim(size_t pad);
-
-  If possible, gives memory back to the system (via negative arguments
-  to sbrk) if there is unused memory at the `high' end of the malloc
-  pool or in unused MMAP segments. You can call this after freeing
-  large blocks of memory to potentially reduce the system-level memory
-  requirements of a program. However, it cannot guarantee to reduce
-  memory. Under some allocation patterns, some large free blocks of
-  memory will be locked between two used chunks, so they cannot be
-  given back to the system.
-
-  The `pad' argument to malloc_trim represents the amount of free
-  trailing space to leave untrimmed. If this argument is zero, only
-  the minimum amount of memory to maintain internal data structures
-  will be left. Non-zero arguments can be supplied to maintain enough
-  trailing space to service future expected allocations without having
-  to re-obtain memory from the system.
-
-  Malloc_trim returns 1 if it actually released any memory, else 0.
-*/
-DLMALLOC_EXPORT int  dlmalloc_trim(size_t);
-
-/*
-  malloc_stats();
-  Prints on stderr the amount of space obtained from the system (both
-  via sbrk and mmap), the maximum amount (which may be more than
-  current if malloc_trim and/or munmap got called), and the current
-  number of bytes allocated via malloc (or realloc, etc) but not yet
-  freed. Note that this is the number of bytes allocated, not the
-  number requested. It will be larger than the number requested
-  because of alignment and bookkeeping overhead. Because it includes
-  alignment wastage as being in use, this figure may be greater than
-  zero even when no user-level chunks are allocated.
-
-  The reported current and maximum system memory can be inaccurate if
-  a program makes other calls to system memory allocation functions
-  (normally sbrk) outside of malloc.
-
-  malloc_stats prints only the most commonly interesting statistics.
-  More information can be obtained by calling mallinfo.
-*/
-DLMALLOC_EXPORT void  dlmalloc_stats(void);
-
-/*
-  malloc_usable_size(void* p);
-
-  Returns the number of bytes you can actually use in
-  an allocated chunk, which may be more than you requested (although
-  often not) due to alignment and minimum size constraints.
-  You can use this many bytes without worrying about
-  overwriting other allocated objects. This is not a particularly great
-  programming practice. malloc_usable_size can be more useful in
-  debugging and assertions, for example:
-
-  p = malloc(n);
-  assert(malloc_usable_size(p) >= 256);
-*/
-size_t dlmalloc_usable_size(void*);
-
-#endif /* ONLY_MSPACES */
-
-#if MSPACES
-
-/*
-  mspace is an opaque type representing an independent
-  region of space that supports mspace_malloc, etc.
-*/
-typedef void* mspace;
-
-/*
-  create_mspace creates and returns a new independent space with the
-  given initial capacity, or, if 0, the default granularity size.  It
-  returns null if there is no system memory available to create the
-  space.  If argument locked is non-zero, the space uses a separate
-  lock to control access. The capacity of the space will grow
-  dynamically as needed to service mspace_malloc requests.  You can
-  control the sizes of incremental increases of this space by
-  compiling with a different DEFAULT_GRANULARITY or dynamically
-  setting with mallopt(M_GRANULARITY, value).
-*/
-DLMALLOC_EXPORT mspace create_mspace(size_t capacity, int locked);
 
-/*
-  destroy_mspace destroys the given space, and attempts to return all
-  of its memory back to the system, returning the total number of
-  bytes freed. After destruction, the results of access to all memory
-  used by the space become undefined.
-*/
-DLMALLOC_EXPORT size_t destroy_mspace(mspace msp);
-
-/*
-  create_mspace_with_base uses the memory supplied as the initial base
-  of a new mspace. Part (less than 128*sizeof(size_t) bytes) of this
-  space is used for bookkeeping, so the capacity must be at least this
-  large. (Otherwise 0 is returned.) When this initial space is
-  exhausted, additional memory will be obtained from the system.
-  Destroying this space will deallocate all additionally allocated
-  space (if possible) but not the initial base.
-*/
-DLMALLOC_EXPORT mspace create_mspace_with_base(void* base, size_t capacity, int locked);
+// Common code for all lock types
+#define USE_LOCK_BIT               (2U)
 
-/*
-  mspace_track_large_chunks controls whether requests for large chunks
-  are allocated in their own untracked mmapped regions, separate from
-  others in this mspace. By default large chunks are not tracked,
-  which reduces fragmentation. However, such chunks are not
-  necessarily released to the system upon destroy_mspace.  Enabling
-  tracking by setting to true may increase fragmentation, but avoids
-  leakage when relying on destroy_mspace to release all memory
-  allocated using this space.  The function returns the previous
-  setting.
-*/
-DLMALLOC_EXPORT int mspace_track_large_chunks(mspace msp, int enable);
+// TODO: switch to futex, os_unfair_lock, recursive_mutex not needed?
+using MLOCK_T = std::mutex;
+MLOCK_T malloc_global_mutex;
+#define ACQUIRE_MALLOC_GLOBAL_LOCK()  malloc_global_mutex.lock();
+#define RELEASE_MALLOC_GLOBAL_LOCK()  malloc_global_mutex.unlock();
+#define ACQUIRE_LOCK(lk)      (lk)->lock()
+#define RELEASE_LOCK(lk)      (lk)->unlock()
+#define TRY_LOCK(lk)          (lk)->try()
 
 
-/*
-  mspace_malloc behaves as malloc, but operates within
-  the given space.
-*/
-DLMALLOC_EXPORT void* mspace_malloc(mspace msp, size_t bytes);
+/* -----------------------  Chunk representations ------------------------ */
 
 /*
-  mspace_free behaves as free, but operates within
-  the given space.
+ (The following includes lightly edited explanations by Colin Plumb.)
+ 
+ The malloc_chunk declaration below is misleading (but accurate and
+ necessary).  It declares a "view" into memory allowing access to
+ necessary fields at known offsets from a given base.
+ 
+ Chunks of memory are maintained using a `boundary tag' method as
+ originally described by Knuth.  (See the paper by Paul Wilson
+ ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a survey of such
+ techniques.)  Sizes of free chunks are stored both in the front of
+ each chunk and at the end.  This makes consolidating fragmented
+ chunks into bigger chunks fast.  The head fields also hold bits
+ representing whether chunks are free or in use.
+ 
+ Here are some pictures to make it clearer.  They are "exploded" to
+ show that the state of a chunk can be thought of as extending from
+ the high 31 bits of the head field of its header through the
+ prev_foot and PINUSE_BIT bit of the following chunk header.
+ 
+ A chunk that's in use looks like:
+ 
+ chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Size of previous chunk (if P = 0)                             |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P|
+ | Size of this chunk                                         1| +-+
+ mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |                                                               |
+ +-                                                             -+
+ |                                                               |
+ +-                                                             -+
+ |                                                               :
+ +-      size - sizeof(size_t) available payload bytes          -+
+ :                                                               |
+ chunk-> +-                                                             -+
+ |                                                               |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |1|
+ | Size of next chunk (may or may not be in use)               | +-+
+ mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ 
+ And if it's free, it looks like this:
+ 
+ chunk-> +-                                                             -+
+ | User payload (must be in use, or we would have merged!)       |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P|
+ | Size of this chunk                                         0| +-+
+ mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Next pointer                                                  |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Prev pointer                                                  |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |                                                               :
+ +-      size - sizeof(struct chunk) unused bytes               -+
+ :                                                               |
+ chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Size of this chunk                                            |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |0|
+ | Size of next chunk (must be in use, or we would have merged)| +-+
+ mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |                                                               :
+ +- User payload                                                -+
+ :                                                               |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |0|
+ +-+
+ Note that since we always merge adjacent free chunks, the chunks
+ adjacent to a free chunk must be in use.
+ 
+ Given a pointer to a chunk (which can be derived trivially from the
+ payload pointer) we can, in O(1) time, find out whether the adjacent
+ chunks are free, and if so, unlink them from the lists that they
+ are on and merge them with the current chunk.
+ 
+ Chunks always begin on even word boundaries, so the mem portion
+ (which is returned to the user) is also on an even word boundary, and
+ thus at least double-word aligned.
+ 
+ The P (PINUSE_BIT) bit, stored in the unused low-order bit of the
+ chunk size (which is always a multiple of two words), is an in-use
+ bit for the *previous* chunk.  If that bit is *clear*, then the
+ word before the current chunk size contains the previous chunk
+ size, and can be used to find the front of the previous chunk.
+ The very first chunk allocated always has this bit set, preventing
+ access to non-existent (or non-owned) memory. If pinuse is set for
+ any given chunk, then you CANNOT determine the size of the
+ previous chunk, and might even get a memory addressing fault when
+ trying to do so.
+ 
+ The C (CINUSE_BIT) bit, stored in the unused second-lowest bit of
+ the chunk size redundantly records whether the current chunk is
+ inuse (unless the chunk is mmapped). This redundancy enables usage
+ checks within free and realloc, and reduces indirection when freeing
+ and consolidating chunks.
+ 
+ Each freshly allocated chunk must have both cinuse and pinuse set.
+ That is, each allocated chunk borders either a previously allocated
+ and still in-use chunk, or the base of its memory arena. This is
+ ensured by making all allocations from the `lowest' part of any
+ found chunk.  Further, no free chunk physically borders another one,
+ so each free chunk is known to be preceded and followed by either
+ inuse chunks or the ends of memory.
+ 
+ Note that the `foot' of the current chunk is actually represented
+ as the prev_foot of the NEXT chunk. This makes it easier to
+ deal with alignments etc but can be very confusing when trying
+ to extend or adapt this code.
+ 
+ The exceptions to all this are
+ 
+ 1. The special chunk `top' is the top-most available chunk (i.e.,
+ the one bordering the end of available memory). It is treated
+ specially.  Top is never included in any bin, is used only if
+ no other chunk is available, and is released back to the
+ system if it is very large (see M_TRIM_THRESHOLD).  In effect,
+ the top chunk is treated as larger (and thus less well
+ fitting) than any other available chunk.  The top chunk
+ doesn't update its trailing size field since there is no next
+ contiguous chunk that would have to index off it. However,
+ space is still allocated for it (TOP_FOOT_SIZE) to enable
+ separation or merging when space is extended.
+ 
+ 3. Chunks allocated via mmap, have both cinuse and pinuse bits
+ cleared in their head fields.  Because they are allocated
+ one-by-one, each must carry its own prev_foot field, which is
+ also used to hold the offset this chunk has within its mmapped
+ region, which is needed to preserve alignment. Each mmapped
+ chunk is trailed by the first two fields of a fake next-chunk
+ for sake of usage checks.
+ 
+ */
 
-  If compiled with FOOTERS==1, mspace_free is not actually needed.
-  free may be called instead of mspace_free because freed chunks from
-  any space are handled by their originating spaces.
-*/
-DLMALLOC_EXPORT void mspace_free(mspace msp, void* mem);
+#define PINUSE_BIT          1
+#define CINUSE_BIT          2
+#define FLAG4_BIT           4
+#define INUSE_BITS          (PINUSE_BIT | CINUSE_BIT)
+#define FLAG_BITS           (PINUSE_BIT | CINUSE_BIT | FLAG4_BIT)
 
-/*
-  mspace_realloc behaves as realloc, but operates within
-  the given space.
+/* ------------------- Chunks sizes and alignments ----------------------- */
 
-  If compiled with FOOTERS==1, mspace_realloc is not actually
-  needed.  realloc may be called instead of mspace_realloc because
-  realloced chunks from any space are handled by their originating
-  spaces.
-*/
-DLMALLOC_EXPORT void* mspace_realloc(mspace msp, void* mem, size_t newsize);
+#define MCHUNK_SIZE         (sizeof(mchunk))
 
-/*
-  mspace_calloc behaves as calloc, but operates within
-  the given space.
-*/
-DLMALLOC_EXPORT void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size);
+#if FOOTERS
+#define CHUNK_OVERHEAD      (2 * sizeof(size_t))
+#else // FOOTERS
+#define CHUNK_OVERHEAD      (sizeof(size_t))
+#endif // FOOTERS
 
-/*
-  mspace_memalign behaves as memalign, but operates within
-  the given space.
-*/
-DLMALLOC_EXPORT void* mspace_memalign(mspace msp, size_t alignment, size_t bytes);
+// MMapped chunks need a second word of overhead ...
+#define MMAP_CHUNK_OVERHEAD (2 * sizeof(size_t))
+// ... and additional padding for fake next-chunk at foot
+#define MMAP_FOOT_PAD       (4 * sizeof(size_t))
 
-/*
-  mspace_independent_calloc behaves as independent_calloc, but
-  operates within the given space.
-*/
-DLMALLOC_EXPORT void** mspace_independent_calloc(mspace msp, size_t n_elements,
-                                 size_t elem_size, void* chunks[]);
-
-/*
-  mspace_independent_comalloc behaves as independent_comalloc, but
-  operates within the given space.
-*/
-DLMALLOC_EXPORT void** mspace_independent_comalloc(mspace msp, size_t n_elements,
-                                   size_t sizes[], void* chunks[]);
-
-/*
-  mspace_footprint() returns the number of bytes obtained from the
-  system for this space.
-*/
-DLMALLOC_EXPORT size_t mspace_footprint(mspace msp);
-
-/*
-  mspace_max_footprint() returns the peak number of bytes obtained from the
-  system for this space.
-*/
-DLMALLOC_EXPORT size_t mspace_max_footprint(mspace msp);
-
-
-#if !NO_MALLINFO
-/*
-  mspace_mallinfo behaves as mallinfo, but reports properties of
-  the given space.
-*/
-DLMALLOC_EXPORT struct mallinfo mspace_mallinfo(mspace msp);
-#endif /* NO_MALLINFO */
-
-/*
-  malloc_usable_size(void* p) behaves the same as malloc_usable_size;
-*/
-DLMALLOC_EXPORT size_t mspace_usable_size(const void* mem);
-
-/*
-  mspace_malloc_stats behaves as malloc_stats, but reports
-  properties of the given space.
-*/
-DLMALLOC_EXPORT void mspace_malloc_stats(mspace msp);
-
-/*
-  mspace_trim behaves as malloc_trim, but
-  operates within the given space.
-*/
-DLMALLOC_EXPORT int mspace_trim(mspace msp, size_t pad);
-
-/*
-  An alias for mallopt.
-*/
-DLMALLOC_EXPORT int mspace_mallopt(int, int);
-
-#endif /* MSPACES */
-
-}  /* end of extern "C" */
-
-/*
-  ========================================================================
-  To make a fully customizable malloc.h header file, cut everything
-  above this line, put into file malloc.h, edit to suit, and #include it
-  on the next line, as well as in programs that use this malloc.
-  ========================================================================
-*/
-
-/* #include "malloc.h" */
-
-/*------------------------------ internal #includes ---------------------- */
-
-#ifdef _MSC_VER
-#pragma warning( disable : 4146 ) /* no "unsigned" warnings */
-#endif /* _MSC_VER */
-#if !NO_MALLOC_STATS
-#include <stdio.h>       /* for printing in malloc_stats */
-#endif /* NO_MALLOC_STATS */
-#ifndef LACKS_ERRNO_H
-#include <errno.h>       /* for MALLOC_FAILURE_ACTION */
-#endif /* LACKS_ERRNO_H */
-#ifdef DEBUG
-    #if ABORT_ON_ASSERT_FAILURE
-        #undef assert
-        #define assert(x) if(!(x)) ABORT
-    #else /* ABORT_ON_ASSERT_FAILURE */
-        #include <assert.h>
-    #endif /* ABORT_ON_ASSERT_FAILURE */
-#else  /* DEBUG */
-    #ifndef assert
-        #define assert(x)
-    #endif
-    #define DEBUG 0
-#endif /* DEBUG */
-
-#if !defined(WIN32) && !defined(LACKS_TIME_H)
-#include <time.h>        /* for magic initialization */
-#endif /* WIN32 */
-#ifndef LACKS_STDLIB_H
-#include <stdlib.h>      /* for abort() */
-#endif /* LACKS_STDLIB_H */
-#ifndef LACKS_STRING_H
-#include <string.h>      /* for memset etc */
-#endif  /* LACKS_STRING_H */
-#if USE_BUILTIN_FFS
-#ifndef LACKS_STRINGS_H
-#include <strings.h>     /* for ffs */
-#endif /* LACKS_STRINGS_H */
-#endif /* USE_BUILTIN_FFS */
-#if HAVE_MMAP
-#ifndef LACKS_SYS_MMAN_H
-/* On some versions of linux, mremap decl in mman.h needs __USE_GNU set */
-#if (defined(linux) && !defined(__USE_GNU))
-#define __USE_GNU 1
-#include <sys/mman.h>    /* for mmap */
-#undef __USE_GNU
-#else
-#include <sys/mman.h>    /* for mmap */
-#endif /* linux */
-#endif /* LACKS_SYS_MMAN_H */
-#ifndef LACKS_FCNTL_H
-#include <fcntl.h>
-#endif /* LACKS_FCNTL_H */
-#endif /* HAVE_MMAP */
-#ifndef LACKS_UNISTD_H
-#include <unistd.h>     /* for sbrk, sysconf */
-#else /* LACKS_UNISTD_H */
-#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__)
-extern void*     sbrk(ptrdiff_t);
-#endif /* FreeBSD etc */
-#endif /* LACKS_UNISTD_H */
-
-/* Declarations for locking */
-#if USE_LOCKS
-#ifndef WIN32
-#if defined (__SVR4) && defined (__sun)  /* solaris */
-#include <thread.h>
-#elif !defined(LACKS_SCHED_H)
-#include <sched.h>
-#endif /* solaris or LACKS_SCHED_H */
-#if (defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0) || !USE_SPIN_LOCKS
-#include <pthread.h>
-#endif /* USE_RECURSIVE_LOCKS ... */
-#elif defined(_MSC_VER)
-#ifndef _M_AMD64
-/* These are already defined on AMD64 builds */
-extern "C" {
-    LONG __cdecl _InterlockedCompareExchange(LONG volatile *Dest, LONG Exchange, LONG Comp);
-    LONG __cdecl _InterlockedExchange(LONG volatile *Target, LONG Value);
-}
-#endif /* _M_AMD64 */
-#pragma intrinsic (_InterlockedCompareExchange)
-#pragma intrinsic (_InterlockedExchange)
-#define interlockedcompareexchange _InterlockedCompareExchange
-#define interlockedexchange _InterlockedExchange
-#elif defined(WIN32) && defined(__GNUC__)
-#define interlockedcompareexchange(a, b, c) __sync_val_compare_and_swap(a, c, b)
-#define interlockedexchange __sync_lock_test_and_set
-#endif /* Win32 */
-#else /* USE_LOCKS */
-#endif /* USE_LOCKS */
-
-#ifndef LOCK_AT_FORK
-    #define LOCK_AT_FORK 0
-#endif
-
-/* Declarations for bit scanning on win32 */
-#if defined(_MSC_VER) && _MSC_VER>=1300
-#ifndef BitScanForward /* Try to avoid pulling in WinNT.h */
-extern "C" {
-    unsigned char _BitScanForward(unsigned long *index, unsigned long mask);
-    unsigned char _BitScanReverse(unsigned long *index, unsigned long mask);
-}
-
-#define BitScanForward _BitScanForward
-#define BitScanReverse _BitScanReverse
-#pragma intrinsic(_BitScanForward)
-#pragma intrinsic(_BitScanReverse)
-#endif /* BitScanForward */
-#endif /* defined(_MSC_VER) && _MSC_VER>=1300 */
-
-#ifndef WIN32
-#ifndef malloc_getpagesize
-#  ifdef _SC_PAGESIZE         /* some SVR4 systems omit an underscore */
-#    ifndef _SC_PAGE_SIZE
-#      define _SC_PAGE_SIZE _SC_PAGESIZE
-#    endif
-#  endif
-#  ifdef _SC_PAGE_SIZE
-#    define malloc_getpagesize sysconf(_SC_PAGE_SIZE)
-#  else
-#    if defined(BSD) || defined(DGUX) || defined(HAVE_GETPAGESIZE)
-       extern size_t getpagesize();
-#      define malloc_getpagesize getpagesize()
-#    else
-#      ifdef WIN32 /* use supplied emulation of getpagesize */
-#        define malloc_getpagesize getpagesize()
-#      else
-#        ifndef LACKS_SYS_PARAM_H
-#          include <sys/param.h>
-#        endif
-#        ifdef EXEC_PAGESIZE
-#          define malloc_getpagesize EXEC_PAGESIZE
-#        else
-#          ifdef NBPG
-#            ifndef CLSIZE
-#              define malloc_getpagesize NBPG
-#            else
-#              define malloc_getpagesize (NBPG * CLSIZE)
-#            endif
-#          else
-#            ifdef NBPC
-#              define malloc_getpagesize NBPC
-#            else
-#              ifdef PAGESIZE
-#                define malloc_getpagesize PAGESIZE
-#              else /* just guess */
-#                define malloc_getpagesize ((size_t)4096U)
-#              endif
-#            endif
-#          endif
-#        endif
-#      endif
-#    endif
-#  endif
-#endif
-#endif
-
-/* ------------------- size_t and alignment properties -------------------- */
-
-/* The byte and bit size of a size_t */
-#define SIZE_T_BITSIZE      (sizeof(size_t) << 3)
-
-/* Some constants coerced to size_t */
-/* Annoying but necessary to avoid errors on some platforms */
-#define HALF_MAX_SIZE_T     (MAX_SIZE_T / 2U)
-
-// The bit mask value corresponding to MALLOC_ALIGNMENT
-#define CHUNK_ALIGN_MASK    (MALLOC_ALIGNMENT - 1)
-
-// True if address a has acceptable alignment
-bool is_aligned(void *p) { return ((size_t)p & CHUNK_ALIGN_MASK) == 0; }
-
-// the number of bytes to offset an address to align it
-size_t align_offset(void *p)
-{
-    return (((size_t)p & CHUNK_ALIGN_MASK) == 0) ? 0 :
-        ((MALLOC_ALIGNMENT - ((size_t)p & CHUNK_ALIGN_MASK)) & CHUNK_ALIGN_MASK);
-}
-
-/* -------------------------- MMAP preliminaries ------------------------- */
-
-/*
-   If HAVE_MORECORE or HAVE_MMAP are false, we just define calls and
-   checks to fail so compiler optimizer can delete code rather than
-   using so many "#if"s.
-*/
-
-
-/* MORECORE and MMAP must return MFAIL on failure */
-#define MFAIL                ((void*)(MAX_SIZE_T))
-#define CMFAIL               ((char*)(MFAIL)) /* defined for convenience */
-
-#if HAVE_MMAP
-
-#ifndef WIN32
-#define MUNMAP_DEFAULT(a, s)  munmap((a), (s))
-#define MMAP_PROT            (PROT_READ | PROT_WRITE)
-#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
-#define MAP_ANONYMOUS        MAP_ANON
-#endif /* MAP_ANON */
-#ifdef MAP_ANONYMOUS
-#define MMAP_FLAGS           (MAP_PRIVATE | MAP_ANONYMOUS)
-#define MMAP_DEFAULT(s)       mmap(0, (s), MMAP_PROT, MMAP_FLAGS, -1, 0)
-#else /* MAP_ANONYMOUS */
-/*
-   Nearly all versions of mmap support MAP_ANONYMOUS, so the following
-   is unlikely to be needed, but is supplied just in case.
-*/
-#define MMAP_FLAGS           (MAP_PRIVATE)
-static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */
-void MMAP_DEFAULT(size_t s) {
-    if (dev_zero_fd < 0)
-        dev_zero_fd = open("/dev/zero", O_RDWR);
-    mmap(0, s, MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0);
-}
-
-#endif /* MAP_ANONYMOUS */
-
-#define DIRECT_MMAP_DEFAULT(s) MMAP_DEFAULT(s)
-
-#else /* WIN32 */
-
-/* Win32 MMAP via VirtualAlloc */
-static FORCEINLINE void* win32mmap(size_t size) {
-    void* ptr = VirtualAlloc(0, size, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
-    return (ptr != 0)? ptr: MFAIL;
-}
-
-/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
-static FORCEINLINE void* win32direct_mmap(size_t size) {
-    void* ptr = VirtualAlloc(0, size, MEM_RESERVE | MEM_COMMIT | MEM_TOP_DOWN,
-                             PAGE_READWRITE);
-    return (ptr != 0)? ptr: MFAIL;
-}
-
-/* This function supports releasing coalesed segments */
-static FORCEINLINE int win32munmap(void* ptr, size_t size) {
-    MEMORY_BASIC_INFORMATION minfo;
-    char* cptr = (char*)ptr;
-    while (size) {
-        if (VirtualQuery(cptr, &minfo, sizeof(minfo)) == 0)
-            return -1;
-        if (minfo.BaseAddress != cptr || minfo.AllocationBase != cptr ||
-            minfo.State != MEM_COMMIT || minfo.RegionSize > size)
-            return -1;
-        if (VirtualFree(cptr, 0, MEM_RELEASE) == 0)
-            return -1;
-        cptr += minfo.RegionSize;
-        size -= minfo.RegionSize;
-    }
-    return 0;
-}
-
-#define MMAP_DEFAULT(s)             win32mmap(s)
-#define MUNMAP_DEFAULT(a, s)        win32munmap((a), (s))
-#define DIRECT_MMAP_DEFAULT(s)      win32direct_mmap(s)
-#endif /* WIN32 */
-#endif /* HAVE_MMAP */
-
-#if HAVE_MREMAP
-#ifndef WIN32
-#define MREMAP_DEFAULT(addr, osz, nsz, mv) mremap((addr), (osz), (nsz), (mv))
-#endif /* WIN32 */
-#endif /* HAVE_MREMAP */
-
-/**
- * Define CALL_MORECORE
- */
-#if HAVE_MORECORE
-    #ifdef MORECORE
-        #define CALL_MORECORE(S)    MORECORE(S)
-    #else  /* MORECORE */
-        #define CALL_MORECORE(S)    MORECORE_DEFAULT(S)
-    #endif /* MORECORE */
-#else  /* HAVE_MORECORE */
-    #define CALL_MORECORE(S)        MFAIL
-#endif /* HAVE_MORECORE */
-
-/**
- * Define CALL_MMAP/CALL_MUNMAP/CALL_DIRECT_MMAP
- */
-#if HAVE_MMAP
-    #define USE_MMAP_BIT            1
-
-    #ifdef MMAP
-        #define CALL_MMAP(s)        MMAP(s)
-    #else /* MMAP */
-        #define CALL_MMAP(s)        MMAP_DEFAULT(s)
-    #endif /* MMAP */
-    #ifdef MUNMAP
-        #define CALL_MUNMAP(a, s)   MUNMAP((a), (s))
-    #else /* MUNMAP */
-        #define CALL_MUNMAP(a, s)   MUNMAP_DEFAULT((a), (s))
-    #endif /* MUNMAP */
-    #ifdef DIRECT_MMAP
-        #define CALL_DIRECT_MMAP(s) DIRECT_MMAP(s)
-    #else /* DIRECT_MMAP */
-        #define CALL_DIRECT_MMAP(s) DIRECT_MMAP_DEFAULT(s)
-    #endif /* DIRECT_MMAP */
-#else  /* HAVE_MMAP */
-    #define USE_MMAP_BIT            0
-
-    #define MMAP(s)                 MFAIL
-    #define MUNMAP(a, s)            (-1)
-    #define DIRECT_MMAP(s)          MFAIL
-    #define CALL_DIRECT_MMAP(s)     DIRECT_MMAP(s)
-    #define CALL_MMAP(s)            MMAP(s)
-    #define CALL_MUNMAP(a, s)       MUNMAP((a), (s))
-#endif /* HAVE_MMAP */
-
-/**
- * Define CALL_MREMAP
- */
-#if HAVE_MMAP && HAVE_MREMAP
-    #ifdef MREMAP
-        #define CALL_MREMAP(addr, osz, nsz, mv) MREMAP((addr), (osz), (nsz), (mv))
-    #else /* MREMAP */
-        #define CALL_MREMAP(addr, osz, nsz, mv) MREMAP_DEFAULT((addr), (osz), (nsz), (mv))
-    #endif /* MREMAP */
-#else  /* HAVE_MMAP && HAVE_MREMAP */
-    #define CALL_MREMAP(addr, osz, nsz, mv)     MFAIL
-#endif /* HAVE_MMAP && HAVE_MREMAP */
-
-/* mstate bit set if continguous morecore disabled or failed */
-#define USE_NONCONTIGUOUS_BIT (4U)
-
-/* segment bit set in create_mspace_with_base */
-#define EXTERN_BIT            (8U)
-
-
-/* --------------------------- Lock preliminaries ------------------------ */
-
-/*
-  When locks are defined, there is one global lock, plus
-  one per-mspace lock.
-
-  The global lock_ensures that mparams.magic and other unique
-  mparams values are initialized only once. It also protects
-  sequences of calls to MORECORE.  In many cases sys_alloc requires
-  two calls, that should not be interleaved with calls by other
-  threads.  This does not protect against direct calls to MORECORE
-  by other threads not using this lock, so there is still code to
-  cope the best we can on interference.
-
-  Per-mspace locks surround calls to malloc, free, etc.
-  By default, locks are simple non-reentrant mutexes.
-
-  Because lock-protected regions generally have bounded times, it is
-  OK to use the supplied simple spinlocks. Spinlocks are likely to
-  improve performance for lightly contended applications, but worsen
-  performance under heavy contention.
-
-  If USE_LOCKS is > 1, the definitions of lock routines here are
-  bypassed, in which case you will need to define the type MLOCK_T,
-  and at least INITIAL_LOCK, DESTROY_LOCK, ACQUIRE_LOCK, RELEASE_LOCK
-  and TRY_LOCK.  You must also declare a
-    static MLOCK_T malloc_global_mutex = { initialization values };.
-
-*/
-
-#if !USE_LOCKS
-#define USE_LOCK_BIT               (0U)
-#define INITIAL_LOCK(l)            (0)
-#define DESTROY_LOCK(l)            (0)
-#define ACQUIRE_MALLOC_GLOBAL_LOCK()
-#define RELEASE_MALLOC_GLOBAL_LOCK()
-
-#else
-#if USE_LOCKS > 1
-/* -----------------------  User-defined locks ------------------------ */
-/* Define your own lock implementation here */
-/* #define INITIAL_LOCK(lk)  ... */
-/* #define DESTROY_LOCK(lk)  ... */
-/* #define ACQUIRE_LOCK(lk)  ... */
-/* #define RELEASE_LOCK(lk)  ... */
-/* #define TRY_LOCK(lk) ... */
-/* static MLOCK_T malloc_global_mutex = ... */
-
-#elif USE_SPIN_LOCKS
-
-/* First, define CAS_LOCK and CLEAR_LOCK on ints */
-/* Note CAS_LOCK defined to return 0 on success */
-
-#if defined(__GNUC__)&& (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1))
-#define CAS_LOCK(sl)     __sync_lock_test_and_set(sl, 1)
-#define CLEAR_LOCK(sl)   __sync_lock_release(sl)
-
-#elif (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)))
-/* Custom spin locks for older gcc on x86 */
-static FORCEINLINE int x86_cas_lock(int *sl) {
-  int ret;
-  int val = 1;
-  int cmp = 0;
-  __asm__ __volatile__  ("lock; cmpxchgl %1, %2"
-                         : "=a" (ret)
-                         : "r" (val), "m" (*(sl)), "0"(cmp)
-                         : "memory", "cc");
-  return ret;
-}
-
-static FORCEINLINE void x86_clear_lock(int* sl) {
-  assert(*sl != 0);
-  int prev = 0;
-  int ret;
-  __asm__ __volatile__ ("lock; xchgl %0, %1"
-                        : "=r" (ret)
-                        : "m" (*(sl)), "0"(prev)
-                        : "memory");
-}
-
-#define CAS_LOCK(sl)     x86_cas_lock(sl)
-#define CLEAR_LOCK(sl)   x86_clear_lock(sl)
-
-#else /* Win32 MSC */
-#define CAS_LOCK(sl)     interlockedexchange(sl, 1)
-#define CLEAR_LOCK(sl)   interlockedexchange (sl, 0)
-
-#endif /* ... gcc spins locks ... */
-
-/* How to yield for a spin lock */
-#define SPINS_PER_YIELD       63
-#if defined(_MSC_VER)
-#define SLEEP_EX_DURATION     50 /* delay for yield/sleep */
-#define SPIN_LOCK_YIELD  SleepEx(SLEEP_EX_DURATION, FALSE)
-#elif defined (__SVR4) && defined (__sun) /* solaris */
-#define SPIN_LOCK_YIELD   thr_yield();
-#elif !defined(LACKS_SCHED_H)
-#define SPIN_LOCK_YIELD   sched_yield();
-#else
-#define SPIN_LOCK_YIELD
-#endif /* ... yield ... */
-
-#if !defined(USE_RECURSIVE_LOCKS) || USE_RECURSIVE_LOCKS == 0
-/* Plain spin locks use single word (embedded in malloc_states) */
-static int spin_acquire_lock(unsigned *sl) {
-    int spins = 0;
-    while (*(volatile unsigned *)sl != 0 || CAS_LOCK(sl)) {
-        if ((++spins & SPINS_PER_YIELD) == 0) {
-            SPIN_LOCK_YIELD;
-        }
-    }
-    return 0;
-}
-
-#define MLOCK_T               unsigned
-#define TRY_LOCK(sl)          !CAS_LOCK(sl)
-#define RELEASE_LOCK(sl)      CLEAR_LOCK(sl)
-#define ACQUIRE_LOCK(sl)      (CAS_LOCK(sl)? spin_acquire_lock(sl) : 0)
-#define INITIAL_LOCK(sl)      (*sl = 0)
-#define DESTROY_LOCK(sl)      (0)
-static MLOCK_T malloc_global_mutex = 0;
-
-#else /* USE_RECURSIVE_LOCKS */
-/* types for lock owners */
-#ifdef WIN32
-#define THREAD_ID_T           DWORD
-#define CURRENT_THREAD        GetCurrentThreadId()
-#define EQ_OWNER(X,Y)         ((X) == (Y))
-#else
-/*
-  Note: the following assume that pthread_t is a type that can be
-  initialized to (casted) zero. If this is not the case, you will need to
-  somehow redefine these or not use spin locks.
-*/
-#define THREAD_ID_T           pthread_t
-#define CURRENT_THREAD        pthread_self()
-#define EQ_OWNER(X,Y)         pthread_equal(X, Y)
-#endif
-
-struct malloc_recursive_lock {
-    int sl;
-    unsigned int c;
-    THREAD_ID_T threadid;
-};
-
-#define MLOCK_T  struct malloc_recursive_lock
-static MLOCK_T malloc_global_mutex = { 0, 0, (THREAD_ID_T)0};
-
-static FORCEINLINE void recursive_release_lock(MLOCK_T *lk) {
-    assert(lk->sl != 0);
-    if (--lk->c == 0) {
-        CLEAR_LOCK(&lk->sl);
-    }
-}
-
-static FORCEINLINE int recursive_acquire_lock(MLOCK_T *lk) {
-    THREAD_ID_T mythreadid = CURRENT_THREAD;
-    int spins = 0;
-    for (;;) {
-        if (*((volatile int *)(&lk->sl)) == 0) {
-            if (!CAS_LOCK(&lk->sl)) {
-                lk->threadid = mythreadid;
-                lk->c = 1;
-                return 0;
-            }
-        }
-        else if (EQ_OWNER(lk->threadid, mythreadid)) {
-            ++lk->c;
-            return 0;
-        }
-        if ((++spins & SPINS_PER_YIELD) == 0) {
-            SPIN_LOCK_YIELD;
-        }
-    }
-}
-
-static FORCEINLINE int recursive_try_lock(MLOCK_T *lk) {
-    THREAD_ID_T mythreadid = CURRENT_THREAD;
-    if (*((volatile int *)(&lk->sl)) == 0) {
-        if (!CAS_LOCK(&lk->sl)) {
-            lk->threadid = mythreadid;
-            lk->c = 1;
-            return 1;
-        }
-    }
-    else if (EQ_OWNER(lk->threadid, mythreadid)) {
-        ++lk->c;
-        return 1;
-    }
-    return 0;
-}
-
-#define RELEASE_LOCK(lk)      recursive_release_lock(lk)
-#define TRY_LOCK(lk)          recursive_try_lock(lk)
-#define ACQUIRE_LOCK(lk)      recursive_acquire_lock(lk)
-#define INITIAL_LOCK(lk)      ((lk)->threadid = (THREAD_ID_T)0, (lk)->sl = 0, (lk)->c = 0)
-#define DESTROY_LOCK(lk)      (0)
-#endif /* USE_RECURSIVE_LOCKS */
-
-#elif defined(WIN32) /* Win32 critical sections */
-#define MLOCK_T               CRITICAL_SECTION
-#define ACQUIRE_LOCK(lk)      (EnterCriticalSection(lk), 0)
-#define RELEASE_LOCK(lk)      LeaveCriticalSection(lk)
-#define TRY_LOCK(lk)          TryEnterCriticalSection(lk)
-#define INITIAL_LOCK(lk)      (!InitializeCriticalSectionAndSpinCount((lk), 0x80000000 | 4000))
-#define DESTROY_LOCK(lk)      (DeleteCriticalSection(lk), 0)
-#define NEED_GLOBAL_LOCK_INIT
-
-static MLOCK_T malloc_global_mutex;
-static volatile LONG malloc_global_mutex_status;
-
-/* Use spin loop to initialize global lock */
-static void init_malloc_global_mutex() {
-  for (;;) {
-    long stat = malloc_global_mutex_status;
-    if (stat > 0)
-      return;
-    /* transition to < 0 while initializing, then to > 0) */
-    if (stat == 0 &&
-        interlockedcompareexchange(&malloc_global_mutex_status, (LONG)-1, (LONG)0) == 0) {
-      InitializeCriticalSection(&malloc_global_mutex);
-      interlockedexchange(&malloc_global_mutex_status, (LONG)1);
-      return;
-    }
-    SleepEx(0, FALSE);
-  }
-}
-
-#else /* pthreads-based locks */
-#define MLOCK_T               pthread_mutex_t
-#define ACQUIRE_LOCK(lk)      pthread_mutex_lock(lk)
-#define RELEASE_LOCK(lk)      pthread_mutex_unlock(lk)
-#define TRY_LOCK(lk)          (!pthread_mutex_trylock(lk))
-#define INITIAL_LOCK(lk)      pthread_init_lock(lk)
-#define DESTROY_LOCK(lk)      pthread_mutex_destroy(lk)
-
-#if defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0 && defined(linux) && !defined(PTHREAD_MUTEX_RECURSIVE)
-/* Cope with old-style linux recursive lock initialization by adding */
-/* skipped internal declaration from pthread.h */
-extern int pthread_mutexattr_setkind_np __P ((pthread_mutexattr_t *__attr,
-                                              int __kind));
-#define PTHREAD_MUTEX_RECURSIVE PTHREAD_MUTEX_RECURSIVE_NP
-#define pthread_mutexattr_settype(x,y) pthread_mutexattr_setkind_np(x,y)
-#endif /* USE_RECURSIVE_LOCKS ... */
-
-static MLOCK_T malloc_global_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-static int pthread_init_lock (MLOCK_T *lk) {
-  pthread_mutexattr_t attr;
-  if (pthread_mutexattr_init(&attr)) return 1;
-#if defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0
-  if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE)) return 1;
-#endif
-  if (pthread_mutex_init(lk, &attr)) return 1;
-  if (pthread_mutexattr_destroy(&attr)) return 1;
-  return 0;
-}
-
-#endif /* ... lock types ... */
-
-/* Common code for all lock types */
-#define USE_LOCK_BIT               (2U)
-
-#ifndef ACQUIRE_MALLOC_GLOBAL_LOCK
-#define ACQUIRE_MALLOC_GLOBAL_LOCK()  ACQUIRE_LOCK(&malloc_global_mutex);
-#endif
-
-#ifndef RELEASE_MALLOC_GLOBAL_LOCK
-#define RELEASE_MALLOC_GLOBAL_LOCK()  RELEASE_LOCK(&malloc_global_mutex);
-#endif
-
-#endif /* USE_LOCKS */
-
-/* -----------------------  Chunk representations ------------------------ */
-
-/*
-  (The following includes lightly edited explanations by Colin Plumb.)
-
-  The malloc_chunk declaration below is misleading (but accurate and
-  necessary).  It declares a "view" into memory allowing access to
-  necessary fields at known offsets from a given base.
-
-  Chunks of memory are maintained using a `boundary tag' method as
-  originally described by Knuth.  (See the paper by Paul Wilson
-  ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a survey of such
-  techniques.)  Sizes of free chunks are stored both in the front of
-  each chunk and at the end.  This makes consolidating fragmented
-  chunks into bigger chunks fast.  The head fields also hold bits
-  representing whether chunks are free or in use.
-
-  Here are some pictures to make it clearer.  They are "exploded" to
-  show that the state of a chunk can be thought of as extending from
-  the high 31 bits of the head field of its header through the
-  prev_foot and PINUSE_BIT bit of the following chunk header.
-
-  A chunk that's in use looks like:
-
-   chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-           | Size of previous chunk (if P = 0)                             |
-           +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P|
-         | Size of this chunk                                         1| +-+
-   mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-         |                                                               |
-         +-                                                             -+
-         |                                                               |
-         +-                                                             -+
-         |                                                               :
-         +-      size - sizeof(size_t) available payload bytes          -+
-         :                                                               |
- chunk-> +-                                                             -+
-         |                                                               |
-         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |1|
-       | Size of next chunk (may or may not be in use)               | +-+
- mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-
-    And if it's free, it looks like this:
-
-   chunk-> +-                                                             -+
-           | User payload (must be in use, or we would have merged!)       |
-           +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P|
-         | Size of this chunk                                         0| +-+
-   mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-         | Next pointer                                                  |
-         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-         | Prev pointer                                                  |
-         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-         |                                                               :
-         +-      size - sizeof(struct chunk) unused bytes               -+
-         :                                                               |
- chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-         | Size of this chunk                                            |
-         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |0|
-       | Size of next chunk (must be in use, or we would have merged)| +-+
- mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-       |                                                               :
-       +- User payload                                                -+
-       :                                                               |
-       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-                                                                     |0|
-                                                                     +-+
-  Note that since we always merge adjacent free chunks, the chunks
-  adjacent to a free chunk must be in use.
-
-  Given a pointer to a chunk (which can be derived trivially from the
-  payload pointer) we can, in O(1) time, find out whether the adjacent
-  chunks are free, and if so, unlink them from the lists that they
-  are on and merge them with the current chunk.
-
-  Chunks always begin on even word boundaries, so the mem portion
-  (which is returned to the user) is also on an even word boundary, and
-  thus at least double-word aligned.
-
-  The P (PINUSE_BIT) bit, stored in the unused low-order bit of the
-  chunk size (which is always a multiple of two words), is an in-use
-  bit for the *previous* chunk.  If that bit is *clear*, then the
-  word before the current chunk size contains the previous chunk
-  size, and can be used to find the front of the previous chunk.
-  The very first chunk allocated always has this bit set, preventing
-  access to non-existent (or non-owned) memory. If pinuse is set for
-  any given chunk, then you CANNOT determine the size of the
-  previous chunk, and might even get a memory addressing fault when
-  trying to do so.
-
-  The C (CINUSE_BIT) bit, stored in the unused second-lowest bit of
-  the chunk size redundantly records whether the current chunk is
-  inuse (unless the chunk is mmapped). This redundancy enables usage
-  checks within free and realloc, and reduces indirection when freeing
-  and consolidating chunks.
-
-  Each freshly allocated chunk must have both cinuse and pinuse set.
-  That is, each allocated chunk borders either a previously allocated
-  and still in-use chunk, or the base of its memory arena. This is
-  ensured by making all allocations from the `lowest' part of any
-  found chunk.  Further, no free chunk physically borders another one,
-  so each free chunk is known to be preceded and followed by either
-  inuse chunks or the ends of memory.
-
-  Note that the `foot' of the current chunk is actually represented
-  as the prev_foot of the NEXT chunk. This makes it easier to
-  deal with alignments etc but can be very confusing when trying
-  to extend or adapt this code.
-
-  The exceptions to all this are
-
-     1. The special chunk `top' is the top-most available chunk (i.e.,
-        the one bordering the end of available memory). It is treated
-        specially.  Top is never included in any bin, is used only if
-        no other chunk is available, and is released back to the
-        system if it is very large (see M_TRIM_THRESHOLD).  In effect,
-        the top chunk is treated as larger (and thus less well
-        fitting) than any other available chunk.  The top chunk
-        doesn't update its trailing size field since there is no next
-        contiguous chunk that would have to index off it. However,
-        space is still allocated for it (TOP_FOOT_SIZE) to enable
-        separation or merging when space is extended.
-
-     3. Chunks allocated via mmap, have both cinuse and pinuse bits
-        cleared in their head fields.  Because they are allocated
-        one-by-one, each must carry its own prev_foot field, which is
-        also used to hold the offset this chunk has within its mmapped
-        region, which is needed to preserve alignment. Each mmapped
-        chunk is trailed by the first two fields of a fake next-chunk
-        for sake of usage checks.
-
-*/
-
-#define PINUSE_BIT          1
-#define CINUSE_BIT          2
-#define FLAG4_BIT           4
-#define INUSE_BITS          (PINUSE_BIT | CINUSE_BIT)
-#define FLAG_BITS           (PINUSE_BIT | CINUSE_BIT | FLAG4_BIT)
-
-/* ------------------- Chunks sizes and alignments ----------------------- */
-
-#define MCHUNK_SIZE         (sizeof(mchunk))
-
-#if FOOTERS
-    #define CHUNK_OVERHEAD      (2 * sizeof(size_t))
-#else // FOOTERS
-    #define CHUNK_OVERHEAD      (sizeof(size_t))
-#endif // FOOTERS
-
-/* MMapped chunks need a second word of overhead ... */
-#define MMAP_CHUNK_OVERHEAD (2 * sizeof(size_t))
-/* ... and additional padding for fake next-chunk at foot */
-#define MMAP_FOOT_PAD       (4 * sizeof(size_t))
-
-/* The smallest size we can malloc is an aligned minimal chunk */
-#define MIN_CHUNK_SIZE  ((MCHUNK_SIZE + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK)
+// The smallest size we can malloc is an aligned minimal chunk
+#define MIN_CHUNK_SIZE  ((MCHUNK_SIZE + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK)
 
 // ===============================================================================
 struct malloc_chunk_header {
@@ -2205,44 +1113,44 @@ struct malloc_chunk_header {
         _head = s | PINUSE_BIT;
         set_foot(s);
     }
-
-    void set_foot(size_t s)  { 
-        ((malloc_chunk_header *)((char*)this + s))->_prev_foot = s; 
+    
+    void set_foot(size_t s)  {
+        ((malloc_chunk_header *)((char*)this + s))->_prev_foot = s;
     }
-
+    
     // extraction of fields from head words
     bool cinuse() const        { return !!(_head & CINUSE_BIT); }
     bool pinuse() const        { return !!(_head & PINUSE_BIT); }
     bool flag4inuse() const    { return !!(_head & FLAG4_BIT); }
     bool is_inuse() const      { return (_head & INUSE_BITS) != PINUSE_BIT; }
     bool is_mmapped() const    { return (_head & INUSE_BITS) == 0; }
-
+    
     size_t chunksize() const   { return _head & ~(FLAG_BITS); }
-
+    
     void clear_pinuse()        { _head &= ~PINUSE_BIT; }
     void set_flag4()           { _head |= FLAG4_BIT; }
     void clear_flag4()         { _head &= ~FLAG4_BIT; }
-
+    
     // Treat space at ptr +/- offset as a chunk
     malloc_chunk_header * chunk_plus_offset(size_t s)  {
         return (malloc_chunk_header *)((char*)this + s);
     }
-    malloc_chunk_header * chunk_minus_offset(size_t s) { 
-        return (malloc_chunk_header *)((char*)this - s); 
+    malloc_chunk_header * chunk_minus_offset(size_t s) {
+        return (malloc_chunk_header *)((char*)this - s);
     }
-
+    
     // Ptr to next or previous physical malloc_chunk.
-    malloc_chunk_header * next_chunk() { 
-        return (malloc_chunk_header *)((char*)this + (_head & ~FLAG_BITS)); 
+    malloc_chunk_header * next_chunk() {
+        return (malloc_chunk_header *)((char*)this + (_head & ~FLAG_BITS));
     }
-    malloc_chunk_header * prev_chunk() { 
+    malloc_chunk_header * prev_chunk() {
         return (malloc_chunk_header *)((char*)this - (_prev_foot));
     }
-
+    
     // extract next chunk's pinuse bit
     size_t next_pinuse()  { return next_chunk()->_head & PINUSE_BIT; }
-
-    size_t   _prev_foot;  // Size of previous chunk (if free). 
+    
+    size_t   _prev_foot;  // Size of previous chunk (if free).
     size_t   _head;       // Size and inuse bits.
 };
 
@@ -2254,18 +1162,18 @@ struct malloc_chunk : public malloc_chunk_header {
         n->clear_pinuse();
         set_size_and_pinuse_of_free_chunk(s);
     }
-
+    
     // Get the internal overhead associated with chunk p
     size_t overhead_for() { return is_mmapped() ? MMAP_CHUNK_OVERHEAD : CHUNK_OVERHEAD; }
-
+    
     // Return true if malloced space is not necessarily cleared
     bool calloc_must_clear()
     {
 #if MMAP_CLEARS
         return !is_mmapped();
-#else 
+#else
         return true;
-#endif 
+#endif
     }
     
     struct malloc_chunk* _fd;         // double links -- used only if free.
@@ -2280,7 +1188,7 @@ typedef unsigned int bindex_t;         // Described below
 typedef unsigned int binmap_t;         // Described below
 typedef unsigned int flag_t;           // The type of various bit flag sets
 
-// conversion from malloc headers to user pointers, and back 
+// conversion from malloc headers to user pointers, and back
 static FORCEINLINE void *chunk2mem(const void *p)       { return (void *)((char *)p + 2 * sizeof(size_t)); }
 static FORCEINLINE mchunkptr mem2chunk(const void *mem) { return (mchunkptr)((char *)mem - 2 * sizeof(size_t)); }
 
@@ -2292,13 +1200,13 @@ static FORCEINLINE mchunkptr align_as_chunk(char *A)    { return (mchunkptr)(A +
 #define MIN_REQUEST         (MIN_CHUNK_SIZE - CHUNK_OVERHEAD - 1)
 
 // pad request bytes into a usable size
-static FORCEINLINE size_t pad_request(size_t req) 
+static FORCEINLINE size_t pad_request(size_t req)
 {
     return (req + CHUNK_OVERHEAD + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK;
 }
 
 // pad request, checking for minimum (but not maximum)
-static FORCEINLINE size_t request2size(size_t req) 
+static FORCEINLINE size_t request2size(size_t req)
 {
     return req < MIN_REQUEST ? MIN_CHUNK_SIZE : pad_request(req);
 }
@@ -2307,12 +1215,12 @@ static FORCEINLINE size_t request2size(size_t req)
 /* ------------------ Operations on head and foot fields ----------------- */
 
 /*
-  The head field of a chunk is or'ed with PINUSE_BIT when previous
-  adjacent chunk in use, and or'ed with CINUSE_BIT if this chunk is in
-  use, unless mmapped, in which case both bits are cleared.
-
-  FLAG4_BIT is not used by this malloc, but might be useful in extensions.
-*/
+ The head field of a chunk is or'ed with PINUSE_BIT when previous
+ adjacent chunk in use, and or'ed with CINUSE_BIT if this chunk is in
+ use, unless mmapped, in which case both bits are cleared.
+ 
+ FLAG4_BIT is not used by this malloc, but might be useful in extensions.
+ */
 
 // Head value for fenceposts
 #define FENCEPOST_HEAD  (INUSE_BITS | sizeof(size_t))
@@ -2321,104 +1229,104 @@ static FORCEINLINE size_t request2size(size_t req)
 /* ---------------------- Overlaid data structures ----------------------- */
 
 /*
-  When chunks are not in use, they are treated as nodes of either
-  lists or trees.
-
-  "Small"  chunks are stored in circular doubly-linked lists, and look
-  like this:
-
-    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-            |             Size of previous chunk                            |
-            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-    `head:' |             Size of chunk, in bytes                         |P|
-      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-            |             Forward pointer to next chunk in list             |
-            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-            |             Back pointer to previous chunk in list            |
-            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-            |             Unused space (may be 0 bytes long)                .
-            .                                                               .
-            .                                                               |
-nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-    `foot:' |             Size of chunk, in bytes                           |
-            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-
-  Larger chunks are kept in a form of bitwise digital trees (aka
-  tries) keyed on chunksizes.  Because malloc_tree_chunks are only for
-  free chunks greater than 256 bytes, their size doesn't impose any
-  constraints on user chunk sizes.  Each node looks like:
-
-    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-            |             Size of previous chunk                            |
-            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-    `head:' |             Size of chunk, in bytes                         |P|
-      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-            |             Forward pointer to next chunk of same size        |
-            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-            |             Back pointer to previous chunk of same size       |
-            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-            |             Pointer to left child (child[0])                  |
-            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-            |             Pointer to right child (child[1])                 |
-            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-            |             Pointer to parent                                 |
-            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-            |             bin index of this chunk                           |
-            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-            |             Unused space                                      .
-            .                                                               |
-nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-    `foot:' |             Size of chunk, in bytes                           |
-            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-
-  Each tree holding treenodes is a tree of unique chunk sizes.  Chunks
-  of the same size are arranged in a circularly-linked list, with only
-  the oldest chunk (the next to be used, in our FIFO ordering)
-  actually in the tree.  (Tree members are distinguished by a non-null
-  parent pointer.)  If a chunk with the same size an an existing node
-  is inserted, it is linked off the existing node using pointers that
-  work in the same way as fd/bk pointers of small chunks.
-
-  Each tree contains a power of 2 sized range of chunk sizes (the
-  smallest is 0x100 <= x < 0x180), which is is divided in half at each
-  tree level, with the chunks in the smaller half of the range (0x100
-  <= x < 0x140 for the top nose) in the left subtree and the larger
-  half (0x140 <= x < 0x180) in the right subtree.  This is, of course,
-  done by inspecting individual bits.
-
-  Using these rules, each node's left subtree contains all smaller
-  sizes than its right subtree.  However, the node at the root of each
-  subtree has no particular ordering relationship to either.  (The
-  dividing line between the subtree sizes is based on trie relation.)
-  If we remove the last chunk of a given size from the interior of the
-  tree, we need to replace it with a leaf node.  The tree ordering
-  rules permit a node to be replaced by any leaf below it.
-
-  The smallest chunk in a tree (a common operation in a best-fit
-  allocator) can be found by walking a path to the leftmost leaf in
-  the tree.  Unlike a usual binary tree, where we follow left child
-  pointers until we reach a null, here we follow the right child
-  pointer any time the left one is null, until we reach a leaf with
-  both child pointers null. The smallest chunk in the tree will be
-  somewhere along that path.
-
-  The worst case number of steps to add, find, or remove a node is
-  bounded by the number of bits differentiating chunks within
-  bins. Under current bin calculations, this ranges from 6 up to 21
-  (for 32 bit sizes) or up to 53 (for 64 bit sizes). The typical case
-  is of course much better.
-*/
+ When chunks are not in use, they are treated as nodes of either
+ lists or trees.
+ 
+ "Small"  chunks are stored in circular doubly-linked lists, and look
+ like this:
+ 
+ chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |             Size of previous chunk                            |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ `head:' |             Size of chunk, in bytes                         |P|
+ mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |             Forward pointer to next chunk in list             |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |             Back pointer to previous chunk in list            |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |             Unused space (may be 0 bytes long)                .
+ .                                                               .
+ .                                                               |
+ nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ `foot:' |             Size of chunk, in bytes                           |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ 
+ Larger chunks are kept in a form of bitwise digital trees (aka
+ tries) keyed on chunksizes.  Because malloc_tree_chunks are only for
+ free chunks greater than 256 bytes, their size doesn't impose any
+ constraints on user chunk sizes.  Each node looks like:
+ 
+ chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |             Size of previous chunk                            |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ `head:' |             Size of chunk, in bytes                         |P|
+ mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |             Forward pointer to next chunk of same size        |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |             Back pointer to previous chunk of same size       |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |             Pointer to left child (child[0])                  |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |             Pointer to right child (child[1])                 |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |             Pointer to parent                                 |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |             bin index of this chunk                           |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |             Unused space                                      .
+ .                                                               |
+ nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ `foot:' |             Size of chunk, in bytes                           |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ 
+ Each tree holding treenodes is a tree of unique chunk sizes.  Chunks
+ of the same size are arranged in a circularly-linked list, with only
+ the oldest chunk (the next to be used, in our FIFO ordering)
+ actually in the tree.  (Tree members are distinguished by a non-null
+ parent pointer.)  If a chunk with the same size an an existing node
+ is inserted, it is linked off the existing node using pointers that
+ work in the same way as fd/bk pointers of small chunks.
+ 
+ Each tree contains a power of 2 sized range of chunk sizes (the
+ smallest is 0x100 <= x < 0x180), which is is divided in half at each
+ tree level, with the chunks in the smaller half of the range (0x100
+ <= x < 0x140 for the top nose) in the left subtree and the larger
+ half (0x140 <= x < 0x180) in the right subtree.  This is, of course,
+ done by inspecting individual bits.
+ 
+ Using these rules, each node's left subtree contains all smaller
+ sizes than its right subtree.  However, the node at the root of each
+ subtree has no particular ordering relationship to either.  (The
+ dividing line between the subtree sizes is based on trie relation.)
+ If we remove the last chunk of a given size from the interior of the
+ tree, we need to replace it with a leaf node.  The tree ordering
+ rules permit a node to be replaced by any leaf below it.
+ 
+ The smallest chunk in a tree (a common operation in a best-fit
+ allocator) can be found by walking a path to the leftmost leaf in
+ the tree.  Unlike a usual binary tree, where we follow left child
+ pointers until we reach a null, here we follow the right child
+ pointer any time the left one is null, until we reach a leaf with
+ both child pointers null. The smallest chunk in the tree will be
+ somewhere along that path.
+ 
+ The worst case number of steps to add, find, or remove a node is
+ bounded by the number of bits differentiating chunks within
+ bins. Under current bin calculations, this ranges from 6 up to 21
+ (for 32 bit sizes) or up to 53 (for 64 bit sizes). The typical case
+ is of course much better.
+ */
 
 // ===============================================================================
 struct malloc_tree_chunk : public malloc_chunk_header {
     malloc_tree_chunk *leftmost_child() {
-        return _child[0] ? _child[0] : _child[1]; 
+        return _child[0] ? _child[0] : _child[1];
     }
-
-
+    
+    
     malloc_tree_chunk* _fd;
     malloc_tree_chunk* _bk;
-
+    
     malloc_tree_chunk* _child[2];
     malloc_tree_chunk* _parent;
     bindex_t           _index;
@@ -2431,65 +1339,65 @@ typedef malloc_tree_chunk* tbinptr; // The type of bins of trees
 /* ----------------------------- Segments -------------------------------- */
 
 /*
-  Each malloc space may include non-contiguous segments, held in a
-  list headed by an embedded malloc_segment record representing the
-  top-most space. Segments also include flags holding properties of
-  the space. Large chunks that are directly allocated by mmap are not
-  included in this list. They are instead independently created and
-  destroyed without otherwise keeping track of them.
-
-  Segment management mainly comes into play for spaces allocated by
-  MMAP.  Any call to MMAP might or might not return memory that is
-  adjacent to an existing segment.  MORECORE normally contiguously
-  extends the current space, so this space is almost always adjacent,
-  which is simpler and faster to deal with. (This is why MORECORE is
-  used preferentially to MMAP when both are available -- see
-  sys_alloc.)  When allocating using MMAP, we don't use any of the
-  hinting mechanisms (inconsistently) supported in various
-  implementations of unix mmap, or distinguish reserving from
-  committing memory. Instead, we just ask for space, and exploit
-  contiguity when we get it.  It is probably possible to do
-  better than this on some systems, but no general scheme seems
-  to be significantly better.
-
-  Management entails a simpler variant of the consolidation scheme
-  used for chunks to reduce fragmentation -- new adjacent memory is
-  normally prepended or appended to an existing segment. However,
-  there are limitations compared to chunk consolidation that mostly
-  reflect the fact that segment processing is relatively infrequent
-  (occurring only when getting memory from system) and that we
-  don't expect to have huge numbers of segments:
-
-  * Segments are not indexed, so traversal requires linear scans.  (It
-    would be possible to index these, but is not worth the extra
-    overhead and complexity for most programs on most platforms.)
-  * New segments are only appended to old ones when holding top-most
-    memory; if they cannot be prepended to others, they are held in
-    different segments.
-
-  Except for the top-most segment of an mstate, each segment record
-  is kept at the tail of its segment. Segments are added by pushing
-  segment records onto the list headed by &mstate.seg for the
-  containing mstate.
-
-  Segment flags control allocation/merge/deallocation policies:
-  * If EXTERN_BIT set, then we did not allocate this segment,
-    and so should not try to deallocate or merge with others.
-    (This currently holds only for the initial segment passed
-    into create_mspace_with_base.)
-  * If USE_MMAP_BIT set, the segment may be merged with
-    other surrounding mmapped segments and trimmed/de-allocated
-    using munmap.
-  * If neither bit is set, then the segment was obtained using
-    MORECORE so can be merged with surrounding MORECORE'd segments
-    and deallocated/trimmed using MORECORE with negative arguments.
-*/
+ Each malloc space may include non-contiguous segments, held in a
+ list headed by an embedded malloc_segment record representing the
+ top-most space. Segments also include flags holding properties of
+ the space. Large chunks that are directly allocated by mmap are not
+ included in this list. They are instead independently created and
+ destroyed without otherwise keeping track of them.
+ 
+ Segment management mainly comes into play for spaces allocated by
+ MMAP.  Any call to MMAP might or might not return memory that is
+ adjacent to an existing segment.  MORECORE normally contiguously
+ extends the current space, so this space is almost always adjacent,
+ which is simpler and faster to deal with. (This is why MORECORE is
+ used preferentially to MMAP when both are available -- see
+ sys_alloc.)  When allocating using MMAP, we don't use any of the
+ hinting mechanisms (inconsistently) supported in various
+ implementations of unix mmap, or distinguish reserving from
+ committing memory. Instead, we just ask for space, and exploit
+ contiguity when we get it.  It is probably possible to do
+ better than this on some systems, but no general scheme seems
+ to be significantly better.
+ 
+ Management entails a simpler variant of the consolidation scheme
+ used for chunks to reduce fragmentation -- new adjacent memory is
+ normally prepended or appended to an existing segment. However,
+ there are limitations compared to chunk consolidation that mostly
+ reflect the fact that segment processing is relatively infrequent
+ (occurring only when getting memory from system) and that we
+ don't expect to have huge numbers of segments:
+ 
+ * Segments are not indexed, so traversal requires linear scans.  (It
+ would be possible to index these, but is not worth the extra
+ overhead and complexity for most programs on most platforms.)
+ * New segments are only appended to old ones when holding top-most
+ memory; if they cannot be prepended to others, they are held in
+ different segments.
+ 
+ Except for the top-most segment of an mstate, each segment record
+ is kept at the tail of its segment. Segments are added by pushing
+ segment records onto the list headed by &mstate.seg for the
+ containing mstate.
+ 
+ Segment flags control allocation/merge/deallocation policies:
+ * If EXTERN_BIT set, then we did not allocate this segment,
+ and so should not try to deallocate or merge with others.
+ (This currently holds only for the initial segment passed
+ into create_mspace_with_base.)
+ * If USE_MMAP_BIT set, the segment may be merged with
+ other surrounding mmapped segments and trimmed/de-allocated
+ using munmap.
+ * If neither bit is set, then the segment was obtained using
+ MORECORE so can be merged with surrounding MORECORE'd segments
+ and deallocated/trimmed using MORECORE with negative arguments.
+ */
 
 // ===============================================================================
 struct malloc_segment {
     bool is_mmapped_segment()  { return !!(_sflags & USE_MMAP_BIT); }
     bool is_extern_segment()   { return !!(_sflags & EXTERN_BIT); }
-
+    
     char*           _base;          // base address
     size_t          _size;          // allocated size
     malloc_segment* _next;          // ptr to next segment
@@ -2502,37 +1410,37 @@ typedef malloc_segment* msegmentptr;
 /* ------------- Malloc_params ------------------- */
 
 /*
-  malloc_params holds global properties, including those that can be
-  dynamically set using mallopt. There is a single instance, mparams,
-  initialized in init_mparams. Note that the non-zeroness of "magic"
-  also serves as an initialization flag.
-*/
+ malloc_params holds global properties, including those that can be
+ dynamically set using mallopt. There is a single instance, mparams,
+ initialized in init_mparams. Note that the non-zeroness of "magic"
+ also serves as an initialization flag.
+ */
 
 // ===============================================================================
 struct malloc_params {
     malloc_params() : _magic(0) {}
-
+    
     void ensure_initialization()
     {
         if (!_magic)
             _init();
     }
     int change(int param_number, int value);
-
+    
     size_t page_align(size_t sz) {
         return (sz + (_page_size - 1)) & ~(_page_size - 1);
     }
-
+    
     size_t granularity_align(size_t sz) {
         return (sz + (_granularity - 1)) & ~(_granularity - 1);
     }
-
+    
     bool is_page_aligned(char *S) {
         return ((size_t)S & (_page_size - 1)) == 0;
     }
-
+    
     int _init();
-
+    
     size_t _magic;
     size_t _page_size;
     size_t _granularity;
@@ -2546,130 +1454,130 @@ static malloc_params mparams;
 /* ---------------------------- malloc_state ----------------------------- */
 
 /*
-   A malloc_state holds all of the bookkeeping for a space.
-   The main fields are:
-
-  Top
-    The topmost chunk of the currently active segment. Its size is
-    cached in topsize.  The actual size of topmost space is
-    topsize+TOP_FOOT_SIZE, which includes space reserved for adding
-    fenceposts and segment records if necessary when getting more
-    space from the system.  The size at which to autotrim top is
-    cached from mparams in trim_check, except that it is disabled if
-    an autotrim fails.
-
-  Designated victim (dv)
-    This is the preferred chunk for servicing small requests that
-    don't have exact fits.  It is normally the chunk split off most
-    recently to service another small request.  Its size is cached in
-    dvsize. The link fields of this chunk are not maintained since it
-    is not kept in a bin.
-
-  SmallBins
-    An array of bin headers for free chunks.  These bins hold chunks
-    with sizes less than MIN_LARGE_SIZE bytes. Each bin contains
-    chunks of all the same size, spaced 8 bytes apart.  To simplify
-    use in double-linked lists, each bin header acts as a malloc_chunk
-    pointing to the real first node, if it exists (else pointing to
-    itself).  This avoids special-casing for headers.  But to avoid
-    waste, we allocate only the fd/bk pointers of bins, and then use
-    repositioning tricks to treat these as the fields of a chunk.
-
-  TreeBins
-    Treebins are pointers to the roots of trees holding a range of
-    sizes. There are 2 equally spaced treebins for each power of two
-    from TREE_SHIFT to TREE_SHIFT+16. The last bin holds anything
-    larger.
-
-  Bin maps
-    There is one bit map for small bins ("smallmap") and one for
-    treebins ("treemap).  Each bin sets its bit when non-empty, and
-    clears the bit when empty.  Bit operations are then used to avoid
-    bin-by-bin searching -- nearly all "search" is done without ever
-    looking at bins that won't be selected.  The bit maps
-    conservatively use 32 bits per map word, even if on 64bit system.
-    For a good description of some of the bit-based techniques used
-    here, see Henry S. Warren Jr's book "Hacker's Delight" (and
-    supplement at http://hackersdelight.org/). Many of these are
-    intended to reduce the branchiness of paths through malloc etc, as
-    well as to reduce the number of memory locations read or written.
-
-  Segments
-    A list of segments headed by an embedded malloc_segment record
-    representing the initial space.
-
-  Address check support
-    The least_addr field is the least address ever obtained from
-    MORECORE or MMAP. Attempted frees and reallocs of any address less
-    than this are trapped (unless INSECURE is defined).
-
-  Magic tag
-    A cross-check field that should always hold same value as mparams._magic.
-
-  Max allowed footprint
-    The maximum allowed bytes to allocate from system (zero means no limit)
-
-  Flags
-    Bits recording whether to use MMAP, locks, or contiguous MORECORE
-
-  Statistics
-    Each space keeps track of current and maximum system memory
-    obtained via MORECORE or MMAP.
-
-  Trim support
-    Fields holding the amount of unused topmost memory that should trigger
-    trimming, and a counter to force periodic scanning to release unused
-    non-topmost segments.
-
-  Locking
-    If USE_LOCKS is defined, the "mutex" lock is acquired and released
-    around every public call using this mspace.
-
-  Extension support
-    A void* pointer and a size_t field that can be used to help implement
-    extensions to this malloc.
-*/
+ A malloc_state holds all of the bookkeeping for a space.
+ The main fields are:
+ 
+ Top
+ The topmost chunk of the currently active segment. Its size is
+ cached in topsize.  The actual size of topmost space is
+ topsize+TOP_FOOT_SIZE, which includes space reserved for adding
+ fenceposts and segment records if necessary when getting more
+ space from the system.  The size at which to autotrim top is
+ cached from mparams in trim_check, except that it is disabled if
+ an autotrim fails.
+ 
+ Designated victim (dv)
+ This is the preferred chunk for servicing small requests that
+ don't have exact fits.  It is normally the chunk split off most
+ recently to service another small request.  Its size is cached in
+ dvsize. The link fields of this chunk are not maintained since it
+ is not kept in a bin.
+ 
+ SmallBins
+ An array of bin headers for free chunks.  These bins hold chunks
+ with sizes less than MIN_LARGE_SIZE bytes. Each bin contains
+ chunks of all the same size, spaced 8 bytes apart.  To simplify
+ use in double-linked lists, each bin header acts as a malloc_chunk
+ pointing to the real first node, if it exists (else pointing to
+ itself).  This avoids special-casing for headers.  But to avoid
+ waste, we allocate only the fd/bk pointers of bins, and then use
+ repositioning tricks to treat these as the fields of a chunk.
+ 
+ TreeBins
+ Treebins are pointers to the roots of trees holding a range of
+ sizes. There are 2 equally spaced treebins for each power of two
+ from TREE_SHIFT to TREE_SHIFT+16. The last bin holds anything
+ larger.
+ 
+ Bin maps
+ There is one bit map for small bins ("smallmap") and one for
+ treebins ("treemap).  Each bin sets its bit when non-empty, and
+ clears the bit when empty.  Bit operations are then used to avoid
+ bin-by-bin searching -- nearly all "search" is done without ever
+ looking at bins that won't be selected.  The bit maps
+ conservatively use 32 bits per map word, even if on 64bit system.
+ For a good description of some of the bit-based techniques used
+ here, see Henry S. Warren Jr's book "Hacker's Delight" (and
+ supplement at http://hackersdelight.org/). Many of these are
+ intended to reduce the branchiness of paths through malloc etc, as
+ well as to reduce the number of memory locations read or written.
+ 
+ Segments
+ A list of segments headed by an embedded malloc_segment record
+ representing the initial space.
+ 
+ Address check support
+ The least_addr field is the least address ever obtained from
+ MORECORE or MMAP. Attempted frees and reallocs of any address less
+ than this are trapped (unless INSECURE is defined).
+ 
+ Magic tag
+ A cross-check field that should always hold same value as mparams._magic.
+ 
+ Max allowed footprint
+ The maximum allowed bytes to allocate from system (zero means no limit)
+ 
+ Flags
+ Bits recording whether to use MMAP, locks, or contiguous MORECORE
+ 
+ Statistics
+ Each space keeps track of current and maximum system memory
+ obtained via MORECORE or MMAP.
+ 
+ Trim support
+ Fields holding the amount of unused topmost memory that should trigger
+ trimming, and a counter to force periodic scanning to release unused
+ non-topmost segments.
+ 
+ Locking
+ If USE_LOCKS is defined, the "mutex" lock is acquired and released
+ around every public call using this mspace.
+ 
+ Extension support
+ A void* pointer and a size_t field that can be used to help implement
+ extensions to this malloc.
+ */
 
 
 // ================================================================================
-class malloc_state 
+class malloc_state
 {
 public:
-    /* ----------------------- _malloc, _free, etc... --- */
+    // ----------------------- _malloc, _free, etc... ---
     FORCEINLINE void* _malloc(size_t bytes);
     FORCEINLINE void  _free(mchunkptr p);
-
-
-    /* ------------------------ Relays to internal calls to malloc/free from realloc, memalign etc */
+    
+    
+    // ------------------------ Relays to internal calls to malloc/free from realloc, memalign etc
 #if ONLY_MSPACES
     void *internal_malloc(size_t b) { return mspace_malloc(this, b); }
     void internal_free(void *mem)   { mspace_free(this,mem); }
-#else 
-    #if MSPACES
-        FORCEINLINE void *internal_malloc(size_t b); 
-        FORCEINLINE void internal_free(void *mem);
-    #else
-        void *internal_malloc(size_t b) { return dlmalloc(b); }
-        void  internal_free(void *mem)  { dlfree(mem); }
-    #endif
+#else
+#if MSPACES
+    FORCEINLINE void *internal_malloc(size_t b);
+    FORCEINLINE void internal_free(void *mem);
+#else
+    void *internal_malloc(size_t b) { return dlmalloc(b); }
+    void  internal_free(void *mem)  { dlfree(mem); }
 #endif
-
-    /* ------------------------ ----------------------- */
-
-    struct mallinfo internal_mallinfo();
+#endif
+    
+    //------------------------ -----------------------
+    
+    dlmallinfo internal_mallinfo();
     void      internal_malloc_stats();
-
+    
     void      init_top(mchunkptr p, size_t psize);
     void      init_bins();
     void      init(char* tbase, size_t tsize);
-
-    /* ------------------------ System alloc/dealloc -------------------------- */
+    
+    // ------------------------ System alloc/dealloc --------------------------
     void*     sys_alloc(size_t nb);
     size_t    release_unused_segments();
     int       sys_trim(size_t pad);
     void      dispose_chunk(mchunkptr p, size_t psize);
-
-    /* ----------------------- Internal support for realloc, memalign, etc --- */
+    
+    // ----------------------- Internal support for realloc, memalign, etc ---
     mchunkptr try_realloc_chunk(mchunkptr p, size_t nb, int can_move);
     void*     internal_memalign(size_t alignment, size_t bytes);
     void**    ialloc(size_t n_elements, size_t* sizes, int opts, void* chunks[]);
@@ -2677,81 +1585,69 @@ class malloc_state
     void      internal_inspect_all(void(*handler)(void *start, void *end,
                                                   size_t used_bytes, void* callback_arg),
                                    void* arg);
-
-    /* -------------------------- system alloc setup (Operations on mflags) ----- */
+    
+    // -------------------------- system alloc setup (Operations on mflags) -----
     bool      use_lock() const { return !!(_mflags & USE_LOCK_BIT); }
-    void      enable_lock()    { _mflags |=  USE_LOCK_BIT; }
     void      set_lock(int l)  {
         _mflags = l ? _mflags | USE_LOCK_BIT : _mflags & ~USE_LOCK_BIT;
     }
-
-#if USE_LOCKS
-    void      disable_lock()   { _mflags &= ~USE_LOCK_BIT; }
     MLOCK_T&  get_mutex()      { return _mutex; }
-#else
-    void      disable_lock()   {}
-#endif
-
+    
     bool      use_mmap() const { return !!(_mflags & USE_MMAP_BIT); }
     void      enable_mmap()    { _mflags |=  USE_MMAP_BIT; }
-
-#if HAVE_MMAP
     void      disable_mmap()   { _mflags &= ~USE_MMAP_BIT; }
-#else
-    void      disable_mmap()   {}
-#endif
-
-    /* ----------------------- Runtime Check Support ------------------------- */
-
+    
+    // ----------------------- Runtime Check Support -------------------------
+    
     /*
-      For security, the main invariant is that malloc/free/etc never
-      writes to a static address other than malloc_state, unless static
-      malloc_state itself has been corrupted, which cannot occur via
-      malloc (because of these checks). In essence this means that we
-      believe all pointers, sizes, maps etc held in malloc_state, but
-      check all of those linked or offsetted from other embedded data
-      structures.  These checks are interspersed with main code in a way
-      that tends to minimize their run-time cost.
-
-      When FOOTERS is defined, in addition to range checking, we also
-      verify footer fields of inuse chunks, which can be used guarantee
-      that the mstate controlling malloc/free is intact.  This is a
-      streamlined version of the approach described by William Robertson
-      et al in "Run-time Detection of Heap-based Overflows" LISA'03
-      http://www.usenix.org/events/lisa03/tech/robertson.html The footer
-      of an inuse chunk holds the xor of its mstate and a random seed,
-      that is checked upon calls to free() and realloc().  This is
-      (probabalistically) unguessable from outside the program, but can be
-      computed by any code successfully malloc'ing any chunk, so does not
-      itself provide protection against code that has already broken
-      security through some other means.  Unlike Robertson et al, we
-      always dynamically check addresses of all offset chunks (previous,
-      next, etc). This turns out to be cheaper than relying on hashes.
-    */
-
-
+     For security, the main invariant is that malloc/free/etc never
+     writes to a static address other than malloc_state, unless static
+     malloc_state itself has been corrupted, which cannot occur via
+     malloc (because of these checks). In essence this means that we
+     believe all pointers, sizes, maps etc held in malloc_state, but
+     check all of those linked or offsetted from other embedded data
+     structures.  These checks are interspersed with main code in a way
+     that tends to minimize their run-time cost.
+     
+     When FOOTERS is defined, in addition to range checking, we also
+     verify footer fields of inuse chunks, which can be used guarantee
+     that the mstate controlling malloc/free is intact.  This is a
+     streamlined version of the approach described by William Robertson
+     et al in "Run-time Detection of Heap-based Overflows" LISA'03
+     http://www.usenix.org/events/lisa03/tech/robertson.html The footer
+     of an inuse chunk holds the xor of its mstate and a random seed,
+     that is checked upon calls to free() and realloc().  This is
+     (probabalistically) unguessable from outside the program, but can be
+     computed by any code successfully malloc'ing any chunk, so does not
+     itself provide protection against code that has already broken
+     security through some other means.  Unlike Robertson et al, we
+     always dynamically check addresses of all offset chunks (previous,
+     next, etc). This turns out to be cheaper than relying on hashes.
+     */
+    
+    
 #if !INSECURE
     // Check if address a is at least as high as any from MORECORE or MMAP
     bool        ok_address(void *a) const { return (char *)a >= _least_addr; }
-
+    
     // Check if address of next chunk n is higher than base chunk p
     static bool ok_next(void *p, void *n) { return p < n; }
-
+    
     // Check if p has inuse status
     static bool ok_inuse(mchunkptr p)     { return p->is_inuse(); }
- 
+    
     // Check if p has its pinuse bit on
     static bool ok_pinuse(mchunkptr p)    { return p->pinuse(); }
-
+    
     // Check if (alleged) mstate m has expected magic field
     bool        ok_magic() const          { return _magic == mparams._magic; }
     
     // In gcc, use __builtin_expect to minimize impact of checks
-    #if defined(__GNUC__) && __GNUC__ >= 3
-        static bool rtcheck(bool e)       { return __builtin_expect(e, 1); }
-    #else
-        static bool rtcheck(bool e)       { return e; }
-    #endif
+#if defined(__GNUC__) && __GNUC__ >= 3
+    static bool rtcheck(bool e)       { return __builtin_expect(e, 1); }
+#else
+    static bool rtcheck(bool e)       { return e; }
+#endif
 #else
     static bool ok_address(void *a)       { return true; }
     static bool ok_next(void *p, void *n) { return true; }
@@ -2760,12 +1656,12 @@ class malloc_state
     static bool ok_magic()                { return true; }
     static bool rtcheck(bool e)           { return true; }
 #endif
-
+    
     bool is_initialized() const           { return _top != 0; }
-
+    
     bool use_noncontiguous()  const       { return !!(_mflags & USE_NONCONTIGUOUS_BIT); }
     void disable_contiguous()             { _mflags |=  USE_NONCONTIGUOUS_BIT; }
-
+    
     // Return segment holding given address
     msegmentptr segment_holding(char* addr) const {
         msegmentptr sp = (msegmentptr)&_seg;
@@ -2776,7 +1672,7 @@ class malloc_state
                 return 0;
         }
     }
-
+    
     // Return true if segment contains a segment link
     int has_segment_link(msegmentptr ss) const {
         msegmentptr sp = (msegmentptr)&_seg;
@@ -2790,13 +1686,13 @@ class malloc_state
     
 #ifndef MORECORE_CANNOT_TRIM
     bool should_trim(size_t s) const { return s > _trim_check; }
-#else 
+#else
     bool should_trim(size_t s) const { return false; }
-#endif 
-
-
-    /* -------------------------- Debugging setup ---------------------------- */
-
+#endif
+    
+    
+    // -------------------------- Debugging setup ----------------------------
+    
 #if ! DEBUG
     void check_free_chunk(mchunkptr) {}
     void check_inuse_chunk(mchunkptr) {}
@@ -2804,14 +1700,14 @@ class malloc_state
     void check_mmapped_chunk(mchunkptr) {}
     void check_malloc_state() {}
     void check_top_chunk(mchunkptr) {}
-#else /* DEBUG */
+#else // DEBUG
     void check_free_chunk(mchunkptr p)       { do_check_free_chunk(p); }
     void check_inuse_chunk(mchunkptr p)      { do_check_inuse_chunk(p); }
     void check_malloced_chunk(void* p, size_t s) { do_check_malloced_chunk(p, s); }
     void check_mmapped_chunk(mchunkptr p)    { do_check_mmapped_chunk(p); }
     void check_malloc_state()                { do_check_malloc_state(); }
     void check_top_chunk(mchunkptr p)        { do_check_top_chunk(p); }
-
+    
     void do_check_any_chunk(mchunkptr p) const;
     void do_check_top_chunk(mchunkptr p) const;
     void do_check_mmapped_chunk(mchunkptr p) const;
@@ -2825,17 +1721,17 @@ class malloc_state
     int  bin_find(mchunkptr x);
     size_t traverse_and_check();
 #endif // DEBUG
-
+    
 private:
-
-    /* ---------------------------- Indexing Bins ---------------------------- */
-
+    
+    // ---------------------------- Indexing Bins ----------------------------
+    
     static bool  is_small(size_t s)          { return (s >> SMALLBIN_SHIFT) < NSMALLBINS; }
     static bindex_t  small_index(size_t s)   { return (bindex_t)(s  >> SMALLBIN_SHIFT); }
     static size_t small_index2size(size_t i) { return i << SMALLBIN_SHIFT; }
     static bindex_t  MIN_SMALL_INDEX()       { return small_index(MIN_CHUNK_SIZE); }
-
-    // assign tree index for size S to variable I. Use x86 asm if possible 
+    
+    // assign tree index for size S to variable I. Use x86 asm if possible
 #if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
     FORCEINLINE static bindex_t compute_tree_index(size_t S)
     {
@@ -2844,11 +1740,11 @@ class malloc_state
             return 0;
         else if (X > 0xFFFF)
             return NTREEBINS - 1;
-
-        unsigned int K = (unsigned) sizeof(X)*__CHAR_BIT__ - 1 - (unsigned) __builtin_clz(X); 
+        
+        unsigned int K = (unsigned) sizeof(X)*__CHAR_BIT__ - 1 - (unsigned) __builtin_clz(X);
         return (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT - 1)) & 1)));
     }
-
+    
 #elif defined (__INTEL_COMPILER)
     FORCEINLINE static bindex_t compute_tree_index(size_t S)
     {
@@ -2857,11 +1753,11 @@ class malloc_state
             return 0;
         else if (X > 0xFFFF)
             return NTREEBINS - 1;
-
-        unsigned int K = _bit_scan_reverse (X); 
+        
+        unsigned int K = _bit_scan_reverse (X);
         return (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT - 1)) & 1)));
     }
-
+    
 #elif defined(_MSC_VER) && _MSC_VER>=1300
     FORCEINLINE static bindex_t compute_tree_index(size_t S)
     {
@@ -2870,12 +1766,12 @@ class malloc_state
             return 0;
         else if (X > 0xFFFF)
             return NTREEBINS - 1;
-
+        
         unsigned int K;
         _BitScanReverse((DWORD *) &K, (DWORD) X);
         return (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT - 1)) & 1)));
     }
-
+    
 #else // GNUC
     FORCEINLINE static bindex_t compute_tree_index(size_t S)
     {
@@ -2884,7 +1780,7 @@ class malloc_state
             return 0;
         else if (X > 0xFFFF)
             return NTREEBINS - 1;
-
+        
         unsigned int Y = (unsigned int)X;
         unsigned int N = ((Y - 0x100) >> 16) & 8;
         unsigned int K = (((Y <<= N) - 0x1000) >> 16) & 4;
@@ -2894,45 +1790,45 @@ class malloc_state
         return (K << 1) + ((S >> (K + (TREEBIN_SHIFT - 1)) & 1));
     }
 #endif // GNUC
-
+    
     // Shift placing maximum resolved bit in a treebin at i as sign bit
-    static bindex_t leftshift_for_tree_index(bindex_t i) 
+    static bindex_t leftshift_for_tree_index(bindex_t i)
     {
         return (i == NTREEBINS - 1) ? 0 :
-            ((SIZE_T_BITSIZE - 1) - ((i >> 1) + TREEBIN_SHIFT - 2));
+        ((SIZE_T_BITSIZE - 1) - ((i >> 1) + TREEBIN_SHIFT - 2));
     }
-
+    
     // The size of the smallest chunk held in bin with index i
     static bindex_t minsize_for_tree_index(bindex_t i)
     {
-        return ((size_t)1 << ((i >> 1) + TREEBIN_SHIFT)) | 
-            (((size_t)(i & 1)) << ((i >> 1) + TREEBIN_SHIFT - 1));
+        return ((size_t)1 << ((i >> 1) + TREEBIN_SHIFT)) |
+        (((size_t)(i & 1)) << ((i >> 1) + TREEBIN_SHIFT - 1));
     }
-
-
+    
+    
     // ----------- isolate the least set bit of a bitmap
     static binmap_t least_bit(binmap_t x) { return x & -x; }
-
+    
     // ----------- mask with all bits to left of least bit of x on
     static binmap_t left_bits(binmap_t x) { return (x<<1) | -(x<<1); }
-
+    
     // index corresponding to given bit. Use x86 asm if possible
 #if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
     static bindex_t compute_bit2idx(binmap_t X)
     {
         unsigned int J;
-        J = __builtin_ctz(X); 
+        J = __builtin_ctz(X);
         return (bindex_t)J;
     }
-
+    
 #elif defined (__INTEL_COMPILER)
     static bindex_t compute_bit2idx(binmap_t X)
     {
         unsigned int J;
-        J = _bit_scan_forward (X); 
+        J = _bit_scan_forward (X);
         return (bindex_t)J;
     }
-
+    
 #elif defined(_MSC_VER) && _MSC_VER>=1300
     static bindex_t compute_bit2idx(binmap_t X)
     {
@@ -2940,10 +1836,10 @@ class malloc_state
         _BitScanForward((DWORD *) &J, X);
         return (bindex_t)J;
     }
-
-#elif USE_BUILTIN_FFS
-    static bindex_t compute_bit2idx(binmap_t X) { return ffs(X) - 1; }
-
+    
+    //#elif USE_BUILTIN_FFS
+    //    static bindex_t compute_bit2idx(binmap_t X) { return ffs(X) - 1; }
+    
 #else
     static bindex_t compute_bit2idx(binmap_t X)
     {
@@ -2957,76 +1853,76 @@ class malloc_state
         return (bindex_t)(N + Y);
     }
 #endif // GNUC
-
-    /* ------------------------ Set up inuse chunks with or without footers ---*/
+    
+    // ------------------------ Set up inuse chunks with or without footers ---
 #if !FOOTERS
     void mark_inuse_foot(malloc_chunk_header *, size_t) {}
-#else 
-    //Set foot of inuse chunk to be xor of mstate and seed 
+#else
+    //Set foot of inuse chunk to be xor of mstate and seed
     void  mark_inuse_foot(malloc_chunk_header *p, size_t s) {
         (((mchunkptr)((char*)p + s))->prev_foot = (size_t)this ^ mparams._magic); }
 #endif
-
+    
     void set_inuse(malloc_chunk_header *p, size_t s) {
         p->_head = (p->_head & PINUSE_BIT) | s | CINUSE_BIT;
         ((mchunkptr)(((char*)p) + s))->_head |= PINUSE_BIT;
         mark_inuse_foot(p,s);
     }
-
+    
     void set_inuse_and_pinuse(malloc_chunk_header *p, size_t s) {
         p->_head = s | PINUSE_BIT | CINUSE_BIT;
         ((mchunkptr)(((char*)p) + s))->_head |= PINUSE_BIT;
         mark_inuse_foot(p,s);
     }
-
+    
     void set_size_and_pinuse_of_inuse_chunk(malloc_chunk_header *p, size_t s) {
         p->_head = s | PINUSE_BIT | CINUSE_BIT;
         mark_inuse_foot(p, s);
     }
     
-    /* ------------------------ Addressing by index. See  about smallbin repositioning --- */
+    // ------------------------ Addressing by index. See  about smallbin repositioning ---
     sbinptr  smallbin_at(bindex_t i) const { return (sbinptr)((char*)&_smallbins[i << 1]); }
     tbinptr* treebin_at(bindex_t i)  { return &_treebins[i]; }
-
-    /* ----------------------- bit corresponding to given index ---------*/
+    
+    // ----------------------- bit corresponding to given index ---------
     static binmap_t idx2bit(bindex_t i) { return ((binmap_t)1 << i); }
-
+    
     // --------------- Mark/Clear bits with given index
     void     mark_smallmap(bindex_t i)      { _smallmap |=  idx2bit(i); }
     void     clear_smallmap(bindex_t i)     { _smallmap &= ~idx2bit(i); }
     binmap_t smallmap_is_marked(bindex_t i) const { return _smallmap & idx2bit(i); }
-
+    
     void     mark_treemap(bindex_t i)       { _treemap  |=  idx2bit(i); }
     void     clear_treemap(bindex_t i)      { _treemap  &= ~idx2bit(i); }
     binmap_t treemap_is_marked(bindex_t i)  const { return _treemap & idx2bit(i); }
-
-    /* ------------------------ ----------------------- */
+    
+    // ------------------------ -----------------------
     FORCEINLINE void insert_small_chunk(mchunkptr P, size_t S);
     FORCEINLINE void unlink_small_chunk(mchunkptr P, size_t S);
     FORCEINLINE void unlink_first_small_chunk(mchunkptr B, mchunkptr P, bindex_t I);
     FORCEINLINE void replace_dv(mchunkptr P, size_t S);
-
-    /* ------------------------- Operations on trees ------------------------- */
+    
+    // ------------------------- Operations on trees -------------------------
     FORCEINLINE void insert_large_chunk(tchunkptr X, size_t S);
     FORCEINLINE void unlink_large_chunk(tchunkptr X);
-
-    /* ------------------------ Relays to large vs small bin operations */
+    
+    // ------------------------ Relays to large vs small bin operations
     FORCEINLINE void insert_chunk(mchunkptr P, size_t S);
     FORCEINLINE void unlink_chunk(mchunkptr P, size_t S);
-
-    /* -----------------------  Direct-mmapping chunks ----------------------- */
+    
+    // -----------------------  Direct-mmapping chunks -----------------------
     void*     mmap_alloc(size_t nb);
     mchunkptr mmap_resize(mchunkptr oldp, size_t nb, int flags);
-
+    
     void      reset_on_error();
     void*     prepend_alloc(char* newbase, char* oldbase, size_t nb);
     void      add_segment(char* tbase, size_t tsize, flag_t mmapped);
-
-    /* ------------------------ malloc --------------------------- */
+    
+    // ------------------------ malloc ---------------------------
     void*     tmalloc_large(size_t nb);
     void*     tmalloc_small(size_t nb);
-
-    /* ------------------------Bin types, widths and sizes -------- */
+    
+    // ------------------------Bin types, widths and sizes --------
     static const size_t NSMALLBINS      = 32;
     static const size_t NTREEBINS       = 32;
     static const size_t SMALLBIN_SHIFT  = 3;
@@ -3035,8 +1931,8 @@ class malloc_state
     static const size_t MIN_LARGE_SIZE  = 1 << TREEBIN_SHIFT;
     static const size_t MAX_SMALL_SIZE  =  (MIN_LARGE_SIZE - 1);
     static const size_t MAX_SMALL_REQUEST = (MAX_SMALL_SIZE - CHUNK_ALIGN_MASK - CHUNK_OVERHEAD);
-
-    /* ------------------------ data members --------------------------- */
+    
+    // ------------------------ data members ---------------------------
     binmap_t   _smallmap;
     binmap_t   _treemap;
     size_t     _dvsize;
@@ -3054,29 +1950,27 @@ class malloc_state
     size_t     _max_footprint;
     size_t     _footprint_limit; // zero means no limit
     flag_t     _mflags;
-
-#if USE_LOCKS
-    MLOCK_T    _mutex;     // locate lock among fields that rarely change
-#endif // USE_LOCKS
+    
     msegment   _seg;
-
+    
+    // This prevents memset, since it's an object
+    MLOCK_T    _mutex;     // locate lock among fields that rarely change
+    
 private:
     void*      _extp;      // Unused but available for extensions
     size_t     _exts;
 };
 
-typedef malloc_state*    mstate;
-
-/* ------------- end malloc_state ------------------- */
+// ------------- end malloc_state -------------------
 
 #if FOOTERS
-    malloc_state* get_mstate_for(malloc_chunk_header *p) {
-        return (malloc_state*)(((mchunkptr)((char*)(p) +
-                                     (p->chunksize())))->prev_foot ^ mparams._magic);
-    }
+malloc_state* get_mstate_for(malloc_chunk_header *p) {
+    return (malloc_state*)(((mchunkptr)((char*)(p) +
+                                        (p->chunksize())))->prev_foot ^ mparams._magic);
+}
 #endif
 
-/* ------------- Global malloc_state ------------------- */
+// ------------- Global malloc_state -------------------
 
 #if !ONLY_MSPACES
 
@@ -3087,15 +1981,15 @@ static malloc_state _gm_;
 
 #endif // !ONLY_MSPACES
 
-/* -------------------------- system alloc setup ------------------------- */
+// -------------------------- system alloc setup -------------------------
 
 
 // For mmap, use granularity alignment on windows, else page-align
 #ifdef WIN32
-    #define mmap_align(S) mparams.granularity_align(S)
+#define mmap_align(S) mparams.granularity_align(S)
 #else
-    #define mmap_align(S) mparams.page_align(S)
+#define mmap_align(S) mparams.page_align(S)
 #endif
 
 // For sys_alloc, enough padding to ensure can malloc request on success
@@ -3108,84 +2002,75 @@ bool segment_holds(msegmentptr S, mchunkptr A) {
 }
 
 /*
-  TOP_FOOT_SIZE is padding at the end of a segment, including space
-  that may be needed to place segment records and fenceposts when new
-  noncontiguous segments are added.
-*/
+ TOP_FOOT_SIZE is padding at the end of a segment, including space
+ that may be needed to place segment records and fenceposts when new
+ noncontiguous segments are added.
+ */
 #define TOP_FOOT_SIZE \
-    (align_offset(chunk2mem((void *)0))+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE)
+(align_offset(chunk2mem((void *)0))+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE)
 
 
-/* -------------------------------  Hooks -------------------------------- */
+// -------------------------------  Hooks --------------------------------
 
 /*
-  PREACTION should be defined to return 0 on success, and nonzero on
-  failure. If you are not using locking, you can redefine these to do
-  anything you like.
-*/
+ PREACTION should be defined to return 0 on success, and nonzero on
+ failure. If you are not using locking, you can redefine these to do
+ anything you like.
+ */
+
+#define PREACTION(M)  ACQUIRE_LOCK(&(M)->get_mutex())
+#define POSTACTION(M) RELEASE_LOCK(&(M)->get_mutex())
 
-#if USE_LOCKS
-    #define PREACTION(M)  ((M->use_lock())? ACQUIRE_LOCK(&(M)->get_mutex()) : 0)
-    #define POSTACTION(M) { if (M->use_lock()) RELEASE_LOCK(&(M)->get_mutex()); }
-#else // USE_LOCKS
-    #ifndef PREACTION
-        #define PREACTION(M) (0)
-    #endif 
-
-    #ifndef POSTACTION
-        #define POSTACTION(M)
-    #endif 
-#endif // USE_LOCKS
 
 /*
-  CORRUPTION_ERROR_ACTION is triggered upon detected bad addresses.
-  USAGE_ERROR_ACTION is triggered on detected bad frees and
-  reallocs. The argument p is an address that might have triggered the
-  fault. It is ignored by the two predefined actions, but might be
-  useful in custom actions that try to help diagnose errors.
-*/
+ CORRUPTION_ERROR_ACTION is triggered upon detected bad addresses.
+ USAGE_ERROR_ACTION is triggered on detected bad frees and
+ reallocs. The argument p is an address that might have triggered the
+ fault. It is ignored by the two predefined actions, but might be
+ useful in custom actions that try to help diagnose errors.
+ */
 
 #if PROCEED_ON_ERROR
-    
-    // A count of the number of corruption errors causing resets
-    int malloc_corruption_error_count;
-    
-    #define CORRUPTION_ERROR_ACTION(m)  m->reset_on_error()
-    #define USAGE_ERROR_ACTION(m, p)
 
-#else // PROCEED_ON_ERROR
-    
-    #ifndef CORRUPTION_ERROR_ACTION
-        #define CORRUPTION_ERROR_ACTION(m) ABORT
-    #endif // CORRUPTION_ERROR_ACTION
-    
-    #ifndef USAGE_ERROR_ACTION
-        #define USAGE_ERROR_ACTION(m,p) ABORT
-    #endif // USAGE_ERROR_ACTION
+// A count of the number of corruption errors causing resets
+int malloc_corruption_error_count;
+
+#define CORRUPTION_ERROR_ACTION(m)  m->reset_on_error()
+#define USAGE_ERROR_ACTION(m, p)
+
+#else // PROCEED_ON_ERROR
+
+#ifndef CORRUPTION_ERROR_ACTION
+#define CORRUPTION_ERROR_ACTION(m) ABORT
+#endif // CORRUPTION_ERROR_ACTION
+
+#ifndef USAGE_ERROR_ACTION
+#define USAGE_ERROR_ACTION(m,p) ABORT
+#endif // USAGE_ERROR_ACTION
 
 #endif // PROCEED_ON_ERROR
 
-/* ---------------------------- setting mparams -------------------------- */
+// ---------------------------- setting mparams --------------------------
 
-#if LOCK_AT_FORK
-    static void pre_fork(void)         { ACQUIRE_LOCK(&(gm)->get_mutex()); }
-    static void post_fork_parent(void) { RELEASE_LOCK(&(gm)->get_mutex()); }
-    static void post_fork_child(void)  { INITIAL_LOCK(&(gm)->get_mutex()); }
-#endif // LOCK_AT_FORK
+//#if LOCK_AT_FORK
+//    static void pre_fork(void)         { ACQUIRE_LOCK(&(gm)->get_mutex()); }
+//    static void post_fork_parent(void) { RELEASE_LOCK(&(gm)->get_mutex()); }
+//    static void post_fork_child(void)  { INITIAL_LOCK(&(gm)->get_mutex()); }
+//#endif // LOCK_AT_FORK
 
 // Initialize mparams
 int malloc_params::_init() {
-#ifdef NEED_GLOBAL_LOCK_INIT
-    if (malloc_global_mutex_status <= 0)
-        init_malloc_global_mutex();
-#endif
-
+    //#ifdef NEED_GLOBAL_LOCK_INIT
+    //    if (malloc_global_mutex_status <= 0)
+    //        init_malloc_global_mutex();
+    //#endif
+    
     ACQUIRE_MALLOC_GLOBAL_LOCK();
     if (_magic == 0) {
         size_t magic;
         size_t psize;
         size_t gsize;
-
+        
 #ifndef WIN32
         psize = malloc_getpagesize;
         gsize = ((DEFAULT_GRANULARITY != 0)? DEFAULT_GRANULARITY : psize);
@@ -3198,13 +2083,13 @@ int malloc_params::_init() {
                      DEFAULT_GRANULARITY : system_info.dwAllocationGranularity);
         }
 #endif // WIN32
-
+        
         /* Sanity-check configuration:
-           size_t must be unsigned and as wide as pointer type.
-           ints must be at least 4 bytes.
-           alignment must be at least 8.
-           Alignment, min chunk size, and page size must all be powers of 2.
-        */
+         size_t must be unsigned and as wide as pointer type.
+         ints must be at least 4 bytes.
+         alignment must be at least 8.
+         Alignment, min chunk size, and page size must all be powers of 2.
+         */
         if ((sizeof(size_t) != sizeof(char*)) ||
             (MAX_SIZE_T < MIN_CHUNK_SIZE)  ||
             (sizeof(int) < 4)  ||
@@ -3223,33 +2108,33 @@ int malloc_params::_init() {
 #else  // MORECORE_CONTIGUOUS
         _default_mflags = USE_LOCK_BIT | USE_MMAP_BIT | USE_NONCONTIGUOUS_BIT;
 #endif // MORECORE_CONTIGUOUS
-
+        
 #if !ONLY_MSPACES
         // Set up lock for main malloc area
         gm->_mflags = _default_mflags;
-        (void)INITIAL_LOCK(&gm->get_mutex());
-#endif
-#if LOCK_AT_FORK
-        pthread_atfork(&pre_fork, &post_fork_parent, &post_fork_child);
+        //(void)INITIAL_LOCK(&gm->get_mutex());
 #endif
-
+        //#if LOCK_AT_FORK
+        //        pthread_atfork(&pre_fork, &post_fork_parent, &post_fork_child);
+        //#endif
+        
         {
-#if USE_DEV_RANDOM
-            int fd;
-            unsigned char buf[sizeof(size_t)];
-            // Try to use /dev/urandom, else fall back on using time
-            if ((fd = open("/dev/urandom", O_RDONLY)) >= 0 &&
-                read(fd, buf, sizeof(buf)) == sizeof(buf)) {
-                magic = *((size_t *) buf);
-                close(fd);
-            }
-            else
-#endif // USE_DEV_RANDOM
+            //#if USE_DEV_RANDOM
+            //            int fd;
+            //            unsigned char buf[sizeof(size_t)];
+            //            // Try to use /dev/urandom, else fall back on using time
+            //            if ((fd = open("/dev/urandom", O_RDONLY)) >= 0 &&
+            //                read(fd, buf, sizeof(buf)) == sizeof(buf)) {
+            //                magic = *((size_t *) buf);
+            //                close(fd);
+            //            }
+            //            else
+            //#endif // USE_DEV_RANDOM
             {
 #ifdef WIN32
                 magic = (size_t)(GetTickCount() ^ (size_t)0x55555555U);
-#elif defined(LACKS_TIME_H)
-                magic = (size_t)&magic ^ (size_t)0x55555555U;
+                //#elif defined(LACKS_TIME_H)
+                //                magic = (size_t)&magic ^ (size_t)0x55555555U;
 #else
                 magic = (size_t)(time(0) ^ (size_t)0x55555555U);
 #endif
@@ -3260,7 +2145,7 @@ int malloc_params::_init() {
             (*(volatile size_t *)(&(_magic))) = magic;
         }
     }
-
+    
     RELEASE_MALLOC_GLOBAL_LOCK();
     return 1;
 }
@@ -3270,33 +2155,33 @@ int malloc_params::change(int param_number, int value) {
     size_t val;
     ensure_initialization();
     val = (value == -1)? MAX_SIZE_T : (size_t)value;
-
+    
     switch(param_number) {
-    case M_TRIM_THRESHOLD:
-        _trim_threshold = val;
-        return 1;
-
-    case M_GRANULARITY:
-        if (val >= _page_size && ((val & (val - 1)) == 0)) {
-            _granularity = val;
+        case M_TRIM_THRESHOLD:
+            _trim_threshold = val;
             return 1;
-        }
-        else
+            
+        case M_GRANULARITY:
+            if (val >= _page_size && ((val & (val - 1)) == 0)) {
+                _granularity = val;
+                return 1;
+            }
+            else
+                return 0;
+            
+        case M_MMAP_THRESHOLD:
+            _mmap_threshold = val;
+            return 1;
+            
+        default:
             return 0;
-
-    case M_MMAP_THRESHOLD:
-        _mmap_threshold = val;
-        return 1;
-
-    default:
-        return 0;
     }
 }
 
 #if DEBUG
 /* ------------------------- Debugging Support --------------------------- */
 
-// Check properties of any chunk, whether free, inuse, mmapped etc 
+// Check properties of any chunk, whether free, inuse, mmapped etc
 void malloc_state::do_check_any_chunk(mchunkptr p)  const {
     assert((is_aligned(chunk2mem(p))) || (p->_head == FENCEPOST_HEAD));
     assert(ok_address(p));
@@ -3378,7 +2263,7 @@ void malloc_state::do_check_malloced_chunk(void* mem, size_t s) const {
     }
 }
 
-// Check a tree and its subtrees. 
+// Check a tree and its subtrees.
 void malloc_state::do_check_tree(tchunkptr t) {
     tchunkptr head = 0;
     tchunkptr u = t;
@@ -3389,7 +2274,7 @@ void malloc_state::do_check_tree(tchunkptr t) {
     assert(tsize >= MIN_LARGE_SIZE);
     assert(tsize >= minsize_for_tree_index(idx));
     assert((idx == NTREEBINS - 1) || (tsize < minsize_for_tree_index((idx+1))));
-
+    
     do {
         // traverse through chain of same-sized nodes
         do_check_any_chunk((mchunkptr)u);
@@ -3429,7 +2314,7 @@ void malloc_state::do_check_tree(tchunkptr t) {
     assert(head != 0);
 }
 
-//  Check all the chunks in a treebin. 
+//  Check all the chunks in a treebin.
 void malloc_state::do_check_treebin(bindex_t i) {
     tbinptr* tb = (tbinptr*)treebin_at(i);
     tchunkptr t = *tb;
@@ -3440,7 +2325,7 @@ void malloc_state::do_check_treebin(bindex_t i) {
         do_check_tree(t);
 }
 
-//  Check all the chunks in a smallbin. 
+//  Check all the chunks in a smallbin.
 void malloc_state::do_check_smallbin(bindex_t i) {
     sbinptr b = smallbin_at(i);
     mchunkptr p = b->_bk;
@@ -3540,15 +2425,15 @@ void malloc_state::do_check_malloc_state() {
         do_check_smallbin(i);
     for (i = 0; i < NTREEBINS; ++i)
         do_check_treebin(i);
-
-    if (_dvsize != 0) { 
+    
+    if (_dvsize != 0) {
         // check dv chunk
         do_check_any_chunk(_dv);
         assert(_dvsize == _dv->chunksize());
         assert(_dvsize >= MIN_CHUNK_SIZE);
         assert(bin_find(_dv) == 0);
     }
-
+    
     if (_top != 0) {
         // check top chunk
         do_check_top_chunk(_top);
@@ -3556,7 +2441,7 @@ void malloc_state::do_check_malloc_state() {
         assert(_topsize > 0);
         assert(bin_find(_top) == 0);
     }
-
+    
     total = traverse_and_check();
     assert(total <= _footprint);
     assert(_footprint <= _max_footprint);
@@ -3565,12 +2450,12 @@ void malloc_state::do_check_malloc_state() {
 
 /* ----------------------------- statistics ------------------------------ */
 
-#if !NO_MALLINFO
 // ===============================================================================
-struct mallinfo malloc_state::internal_mallinfo() {
-    struct mallinfo nm = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+dlmallinfo malloc_state::internal_mallinfo() {
+    dlmallinfo nm = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
     mparams.ensure_initialization();
-    if (!PREACTION(this)) {
+    {
+        PREACTION(this);
         check_malloc_state();
         if (is_initialized()) {
             size_t nfree = 1; // top always free
@@ -3591,7 +2476,7 @@ struct mallinfo malloc_state::internal_mallinfo() {
                 }
                 s = s->_next;
             }
-
+            
             nm.arena    = sum;
             nm.ordblks  = nfree;
             nm.hblkhd   = _footprint - sum;
@@ -3600,17 +2485,17 @@ struct mallinfo malloc_state::internal_mallinfo() {
             nm.fordblks = mfree;
             nm.keepcost = _topsize;
         }
-
+        
         POSTACTION(this);
     }
     return nm;
 }
-#endif // !NO_MALLINFO
 
 #if !NO_MALLOC_STATS
 void malloc_state::internal_malloc_stats() {
     mparams.ensure_initialization();
-    if (!PREACTION(this)) {
+    {
+        PREACTION(this);
         size_t maxfp = 0;
         size_t fp = 0;
         size_t used = 0;
@@ -3620,7 +2505,7 @@ void malloc_state::internal_malloc_stats() {
             maxfp = _max_footprint;
             fp = _footprint;
             used = fp - (_topsize + TOP_FOOT_SIZE);
-
+            
             while (s != 0) {
                 mchunkptr q = align_as_chunk(s->_base);
                 while (segment_holds(s, q) &&
@@ -3633,9 +2518,15 @@ void malloc_state::internal_malloc_stats() {
             }
         }
         POSTACTION(this); // drop lock
-        fprintf(stderr, "max system bytes = %10lu\n", (unsigned long)(maxfp));
-        fprintf(stderr, "system bytes     = %10lu\n", (unsigned long)(fp));
-        fprintf(stderr, "in use bytes     = %10lu\n", (unsigned long)(used));
+        
+        // fprintf doesn't workon Win/Android gui apps
+        // also weird to log info to stderr just to avoid buffering
+        fprintf(stdout, "max system bytes = %10lu\n"
+                "system bytes     = %10lu\n"
+                "in use bytes     = %10lu\n",
+                (unsigned long)(maxfp),
+                (unsigned long)(fp),
+                (unsigned long)(used));
     }
 }
 #endif // NO_MALLOC_STATS
@@ -3643,13 +2534,13 @@ void malloc_state::internal_malloc_stats() {
 /* ----------------------- Operations on smallbins ----------------------- */
 
 /*
-  Various forms of linking and unlinking are defined as macros.  Even
-  the ones for trees, which are very long but have very short typical
-  paths.  This is ugly but reduces reliance on inlining support of
-  compilers.
-*/
+ Various forms of linking and unlinking are defined as macros.  Even
+ the ones for trees, which are very long but have very short typical
+ paths.  This is ugly but reduces reliance on inlining support of
+ compilers.
+ */
 
-// Link a free chunk into a smallbin 
+// Link a free chunk into a smallbin
 void malloc_state::insert_small_chunk(mchunkptr p, size_t s) {
     bindex_t I  = small_index(s);
     mchunkptr B = smallbin_at(I);
@@ -3668,7 +2559,7 @@ void malloc_state::insert_small_chunk(mchunkptr p, size_t s) {
     p->_bk = B;
 }
 
-// Unlink a chunk from a smallbin 
+// Unlink a chunk from a smallbin
 void malloc_state::unlink_small_chunk(mchunkptr p, size_t s) {
     mchunkptr F = p->_fd;
     mchunkptr B = p->_bk;
@@ -3676,7 +2567,7 @@ void malloc_state::unlink_small_chunk(mchunkptr p, size_t s) {
     assert(p != B);
     assert(p != F);
     assert(p->chunksize() == small_index2size(I));
-    if (rtcheck(F == smallbin_at(I) || (ok_address(F) && F->_bk == p))) { 
+    if (rtcheck(F == smallbin_at(I) || (ok_address(F) && F->_bk == p))) {
         if (B == F) {
             clear_smallmap(I);
         }
@@ -3779,21 +2670,21 @@ void malloc_state::insert_large_chunk(tchunkptr X, size_t s) {
 }
 
 /*
-  Unlink steps:
-
-  1. If x is a chained node, unlink it from its same-sized fd/bk links
-     and choose its bk node as its replacement.
-  2. If x was the last node of its size, but not a leaf node, it must
-     be replaced with a leaf node (not merely one with an open left or
-     right), to make sure that lefts and rights of descendents
-     correspond properly to bit masks.  We use the rightmost descendent
-     of x.  We could use any other leaf, but this is easy to locate and
-     tends to counteract removal of leftmosts elsewhere, and so keeps
-     paths shorter than minimally guaranteed.  This doesn't loop much
-     because on average a node in a tree is near the bottom.
-  3. If x is the base of a chain (i.e., has parent links) relink
-     x's parent and children to x's replacement (or null if none).
-*/
+ Unlink steps:
+ 
+ 1. If x is a chained node, unlink it from its same-sized fd/bk links
+ and choose its bk node as its replacement.
+ 2. If x was the last node of its size, but not a leaf node, it must
+ be replaced with a leaf node (not merely one with an open left or
+ right), to make sure that lefts and rights of descendents
+ correspond properly to bit masks.  We use the rightmost descendent
+ of x.  We could use any other leaf, but this is easy to locate and
+ tends to counteract removal of leftmosts elsewhere, and so keeps
+ paths shorter than minimally guaranteed.  This doesn't loop much
+ because on average a node in a tree is near the bottom.
+ 3. If x is the base of a chain (i.e., has parent links) relink
+ x's parent and children to x's replacement (or null if none).
+ */
 
 void malloc_state::unlink_large_chunk(tchunkptr X) {
     tchunkptr XP = X->_parent;
@@ -3828,13 +2719,13 @@ void malloc_state::unlink_large_chunk(tchunkptr X) {
     if (XP != 0) {
         tbinptr* H = treebin_at(X->_index);
         if (X == *H) {
-            if ((*H = R) == 0) 
+            if ((*H = R) == 0)
                 clear_treemap(X->_index);
         }
         else if (rtcheck(ok_address(XP))) {
-            if (XP->_child[0] == X) 
+            if (XP->_child[0] == X)
                 XP->_child[0] = R;
-            else 
+            else
                 XP->_child[1] = R;
         }
         else
@@ -3870,23 +2761,23 @@ void malloc_state::unlink_large_chunk(tchunkptr X) {
 
 void malloc_state::insert_chunk(mchunkptr p, size_t s)
 {
-    if (is_small(s)) 
+    if (is_small(s))
         insert_small_chunk(p, s);
-    else 
-    { 
-        tchunkptr tp = (tchunkptr)(p); 
-        insert_large_chunk(tp, s); 
+    else
+    {
+        tchunkptr tp = (tchunkptr)(p);
+        insert_large_chunk(tp, s);
     }
 }
 
 void malloc_state::unlink_chunk(mchunkptr p, size_t s)
 {
-    if (is_small(s)) 
+    if (is_small(s))
         unlink_small_chunk(p, s);
-    else 
+    else
     {
-        tchunkptr tp = (tchunkptr)(p); 
-        unlink_large_chunk(tp); 
+        tchunkptr tp = (tchunkptr)(p);
+        unlink_large_chunk(tp);
     }
 }
 
@@ -3894,23 +2785,23 @@ void malloc_state::unlink_chunk(mchunkptr p, size_t s)
 // Relays to internal calls to malloc/free from realloc, memalign etc
 
 #if !ONLY_MSPACES && MSPACES
-    void *malloc_state::internal_malloc(size_t b) {
-        return ((this == gm)? dlmalloc(b) : mspace_malloc(this, b)); 
-    }
-    void malloc_state::internal_free(void *mem) {
-        if (this == gm) dlfree(mem); else mspace_free(this,mem);
-    }
+void *malloc_state::internal_malloc(size_t b) {
+    return ((this == gm)? dlmalloc(b) : this->_malloc(b));
+}
+void malloc_state::internal_free(void *mem) {
+    if (this == gm) dlfree(mem); else this->_free((malloc_chunk*)mem);
+}
 #endif
 
 /* -----------------------  Direct-mmapping chunks ----------------------- */
 
 /*
-  Directly mmapped chunks are set up with an offset to the start of
-  the mmapped region stored in the prev_foot field of the chunk. This
-  allows reconstruction of the required argument to MUNMAP when freed,
-  and also allows adjustment of the returned chunk to meet alignment
-  requirements (especially in memalign).
-*/
+ Directly mmapped chunks are set up with an offset to the start of
+ the mmapped region stored in the prev_foot field of the chunk. This
+ allows reconstruction of the required argument to MUNMAP when freed,
+ and also allows adjustment of the returned chunk to meet alignment
+ requirements (especially in memalign).
+ */
 
 // Malloc using mmap
 void* malloc_state::mmap_alloc(size_t nb) {
@@ -3932,7 +2823,7 @@ void* malloc_state::mmap_alloc(size_t nb) {
             mark_inuse_foot(p, psize);
             p->chunk_plus_offset(psize)->_head = FENCEPOST_HEAD;
             p->chunk_plus_offset(psize+sizeof(size_t))->_head = 0;
-
+            
             if (_least_addr == 0 || mm < _least_addr)
                 _least_addr = mm;
             if ((_footprint += mmsize) > _max_footprint)
@@ -3951,38 +2842,40 @@ mchunkptr malloc_state::mmap_resize(mchunkptr oldp, size_t nb, int flags) {
     (void)flags;      // placate people compiling -Wunused
     if (is_small(nb)) // Can't shrink mmap regions below small size
         return 0;
-
+    
     // Keep old chunk if big enough but not too big
     if (oldsize >= nb + sizeof(size_t) &&
         (oldsize - nb) <= (mparams._granularity << 1))
         return oldp;
     else {
-        size_t offset = oldp->_prev_foot;
-        size_t oldmmsize = oldsize + offset + MMAP_FOOT_PAD;
-        size_t newmmsize = mmap_align(nb + 6 * sizeof(size_t) + CHUNK_ALIGN_MASK);
-        char* cp = (char*)CALL_MREMAP((char*)oldp - offset,
-                                      oldmmsize, newmmsize, flags);
-        if (cp != CMFAIL) {
-            mchunkptr newp = (mchunkptr)(cp + offset);
-            size_t psize = newmmsize - offset - MMAP_FOOT_PAD;
-            newp->_head = psize;
-            mark_inuse_foot(newp, psize);
-            newp->chunk_plus_offset(psize)->_head = FENCEPOST_HEAD;
-            newp->chunk_plus_offset(psize+sizeof(size_t))->_head = 0;
-
-            if (cp < _least_addr)
-                _least_addr = cp;
-            if ((_footprint += newmmsize - oldmmsize) > _max_footprint)
-                _max_footprint = _footprint;
-            check_mmapped_chunk(newp);
-            return newp;
-        }
+        // Only supported on linux
+        //        size_t offset = oldp->_prev_foot;
+        //        size_t oldmmsize = oldsize + offset + MMAP_FOOT_PAD;
+        //        size_t newmmsize = mmap_align(nb + 6 * sizeof(size_t) + CHUNK_ALIGN_MASK);
+        //
+        //        char* cp = (char*)CALL_MREMAP((char*)oldp - offset,
+        //                                      oldmmsize, newmmsize, flags);
+        //        if (cp != CMFAIL) {
+        //            mchunkptr newp = (mchunkptr)(cp + offset);
+        //            size_t psize = newmmsize - offset - MMAP_FOOT_PAD;
+        //            newp->_head = psize;
+        //            mark_inuse_foot(newp, psize);
+        //            newp->chunk_plus_offset(psize)->_head = FENCEPOST_HEAD;
+        //            newp->chunk_plus_offset(psize+sizeof(size_t))->_head = 0;
+        //
+        //            if (cp < _least_addr)
+        //                _least_addr = cp;
+        //            if ((_footprint += newmmsize - oldmmsize) > _max_footprint)
+        //                _max_footprint = _footprint;
+        //            check_mmapped_chunk(newp);
+        //            return newp;
+        //        }
     }
     return 0;
 }
 
 
-/* -------------------------- mspace management -------------------------- */
+// -------------------------- mspace management --------------------------
 
 // Initialize top chunk and its size
 void malloc_state::init_top(mchunkptr p, size_t psize) {
@@ -3990,7 +2883,7 @@ void malloc_state::init_top(mchunkptr p, size_t psize) {
     size_t offset = align_offset(chunk2mem(p));
     p = (mchunkptr)((char*)p + offset);
     psize -= offset;
-
+    
     _top = p;
     _topsize = psize;
     p->_head = psize | PINUSE_BIT;
@@ -4036,11 +2929,11 @@ void* malloc_state::prepend_alloc(char* newbase, char* oldbase, size_t nb) {
     mchunkptr q = (mchunkptr)p->chunk_plus_offset(nb);
     size_t qsize = psize - nb;
     set_size_and_pinuse_of_inuse_chunk(p, nb);
-
+    
     assert((char*)oldfirst > (char*)q);
     assert(oldfirst->pinuse());
     assert(qsize >= MIN_CHUNK_SIZE);
-
+    
     // consolidate remainder with first chunk of old base
     if (oldfirst == _top) {
         size_t tsize = _topsize += qsize;
@@ -4064,7 +2957,7 @@ void* malloc_state::prepend_alloc(char* newbase, char* oldbase, size_t nb) {
         insert_chunk(q, qsize);
         check_free_chunk(q);
     }
-
+    
     check_malloced_chunk(chunk2mem(p), nb);
     return chunk2mem(p);
 }
@@ -4085,10 +2978,10 @@ void malloc_state::add_segment(char* tbase, size_t tsize, flag_t mmapped) {
     mchunkptr tnext = (mchunkptr)sp->chunk_plus_offset(ssize);
     mchunkptr p = tnext;
     int nfences = 0;
-
+    
     // reset top to new space
     init_top((mchunkptr)tbase, tsize - TOP_FOOT_SIZE);
-
+    
     // Set up segment record
     assert(is_aligned(ss));
     set_size_and_pinuse_of_inuse_chunk(sp, ssize);
@@ -4097,7 +2990,7 @@ void malloc_state::add_segment(char* tbase, size_t tsize, flag_t mmapped) {
     _seg._size = tsize;
     _seg._sflags = mmapped;
     _seg._next = ss;
-
+    
     // Insert trailing fenceposts
     for (;;) {
         mchunkptr nextp = (mchunkptr)p->chunk_plus_offset(sizeof(size_t));
@@ -4109,7 +3002,7 @@ void malloc_state::add_segment(char* tbase, size_t tsize, flag_t mmapped) {
             break;
     }
     assert(nfences >= 2);
-
+    
     // Insert the rest of old top into a bin as an ordinary free chunk
     if (csp != old_top) {
         mchunkptr q = (mchunkptr)old_top;
@@ -4118,7 +3011,7 @@ void malloc_state::add_segment(char* tbase, size_t tsize, flag_t mmapped) {
         q->set_free_with_pinuse(psize, tn);
         insert_chunk(q, psize);
     }
-
+    
     check_top_chunk(_top);
 }
 
@@ -4130,16 +3023,16 @@ void* malloc_state::sys_alloc(size_t nb) {
     size_t tsize = 0;
     flag_t mmap_flag = 0;
     size_t asize; // allocation size
-
+    
     mparams.ensure_initialization();
-
+    
     // Directly map large chunks, but only if already initialized
     if (use_mmap() && nb >= mparams._mmap_threshold && _topsize != 0) {
         void* mem = mmap_alloc(nb);
         if (mem != 0)
             return mem;
     }
-
+    
     asize = mparams.granularity_align(nb + SYS_ALLOC_PADDING);
     if (asize <= nb)
         return 0; // wraparound
@@ -4148,95 +3041,95 @@ void* malloc_state::sys_alloc(size_t nb) {
         if (fp <= _footprint || fp > _footprint_limit)
             return 0;
     }
-
+    
     /*
-      Try getting memory in any of three ways (in most-preferred to
-      least-preferred order):
-      1. A call to MORECORE that can normally contiguously extend memory.
-      (disabled if not MORECORE_CONTIGUOUS or not HAVE_MORECORE or
-      or main space is mmapped or a previous contiguous call failed)
-      2. A call to MMAP new space (disabled if not HAVE_MMAP).
-      Note that under the default settings, if MORECORE is unable to
-      fulfill a request, and HAVE_MMAP is true, then mmap is
-      used as a noncontiguous system allocator. This is a useful backup
-      strategy for systems with holes in address spaces -- in this case
-      sbrk cannot contiguously expand the heap, but mmap may be able to
-      find space.
-      3. A call to MORECORE that cannot usually contiguously extend memory.
-      (disabled if not HAVE_MORECORE)
-
-      In all cases, we need to request enough bytes from system to ensure
-      we can malloc nb bytes upon success, so pad with enough space for
-      top_foot, plus alignment-pad to make sure we don't lose bytes if
-      not on boundary, and round this up to a granularity unit.
-    */
-
-    if (MORECORE_CONTIGUOUS && !use_noncontiguous()) {
-        char* br = CMFAIL;
-        size_t ssize = asize; // sbrk call size
-        msegmentptr ss = (_top == 0)? 0 : segment_holding((char*)_top);
-        ACQUIRE_MALLOC_GLOBAL_LOCK();
-
-        if (ss == 0) {
-            // First time through or recovery
-            char* base = (char*)CALL_MORECORE(0);
-            if (base != CMFAIL) {
-                size_t fp;
-                // Adjust to end on a page boundary
-                if (!mparams.is_page_aligned(base))
-                    ssize += (mparams.page_align((size_t)base) - (size_t)base);
-                fp = _footprint + ssize; // recheck limits
-                if (ssize > nb && ssize < HALF_MAX_SIZE_T &&
-                    (_footprint_limit == 0 ||
-                     (fp > _footprint && fp <= _footprint_limit)) &&
-                    (br = (char*)(CALL_MORECORE(ssize))) == base) {
-                    tbase = base;
-                    tsize = ssize;
-                }
-            }
-        }
-        else {
-            // Subtract out existing available top space from MORECORE request.
-            ssize = mparams.granularity_align(nb - _topsize + SYS_ALLOC_PADDING);
-            // Use mem here only if it did continuously extend old space
-            if (ssize < HALF_MAX_SIZE_T &&
-                (br = (char*)(CALL_MORECORE(ssize))) == ss->_base+ss->_size) {
-                tbase = br;
-                tsize = ssize;
-            }
-        }
-
-        if (tbase == CMFAIL) {
-            // Cope with partial failure
-            if (br != CMFAIL) {
-                // Try to use/extend the space we did get
-                if (ssize < HALF_MAX_SIZE_T &&
-                    ssize < nb + SYS_ALLOC_PADDING) {
-                    size_t esize = mparams.granularity_align(nb + SYS_ALLOC_PADDING - ssize);
-                    if (esize < HALF_MAX_SIZE_T) {
-                        char* end = (char*)CALL_MORECORE(esize);
-                        if (end != CMFAIL)
-                            ssize += esize;
-                        else {
-                            // Can't use; try to release
-                            (void) CALL_MORECORE(-ssize);
-                            br = CMFAIL;
-                        }
-                    }
-                }
-            }
-            if (br != CMFAIL) {
-                // Use the space we did get
-                tbase = br;
-                tsize = ssize;
-            }
-            else
-                disable_contiguous(); // Don't try contiguous path in the future
-        }
-
-        RELEASE_MALLOC_GLOBAL_LOCK();
-    }
-
+     Try getting memory in any of three ways (in most-preferred to
+     least-preferred order):
+     1. A call to MORECORE that can normally contiguously extend memory.
+     (disabled if not MORECORE_CONTIGUOUS or not HAVE_MORECORE or
+     or main space is mmapped or a previous contiguous call failed)
+     2. A call to MMAP new space (disabled if not HAVE_MMAP).
+     Note that under the default settings, if MORECORE is unable to
+     fulfill a request, and HAVE_MMAP is true, then mmap is
+     used as a noncontiguous system allocator. This is a useful backup
+     strategy for systems with holes in address spaces -- in this case
+     sbrk cannot contiguously expand the heap, but mmap may be able to
+     find space.
+     3. A call to MORECORE that cannot usually contiguously extend memory.
+     (disabled if not HAVE_MORECORE)
+     
+     In all cases, we need to request enough bytes from system to ensure
+     we can malloc nb bytes upon success, so pad with enough space for
+     top_foot, plus alignment-pad to make sure we don't lose bytes if
+     not on boundary, and round this up to a granularity unit.
+     */
+    
+    //    if (MORECORE_CONTIGUOUS && !use_noncontiguous()) {
+    //        char* br = CMFAIL;
+    //        size_t ssize = asize; // sbrk call size
+    //        msegmentptr ss = (_top == 0)? 0 : segment_holding((char*)_top);
+    //        ACQUIRE_MALLOC_GLOBAL_LOCK();
+    //
+    //        if (ss == 0) {
+    //            // First time through or recovery
+    //            char* base = (char*)CALL_MORECORE(0);
+    //            if (base != CMFAIL) {
+    //                size_t fp;
+    //                // Adjust to end on a page boundary
+    //                if (!mparams.is_page_aligned(base))
+    //                    ssize += (mparams.page_align((size_t)base) - (size_t)base);
+    //                fp = _footprint + ssize; // recheck limits
+    //                if (ssize > nb && ssize < HALF_MAX_SIZE_T &&
+    //                    (_footprint_limit == 0 ||
+    //                     (fp > _footprint && fp <= _footprint_limit)) &&
+    //                    (br = (char*)(CALL_MORECORE(ssize))) == base) {
+    //                    tbase = base;
+    //                    tsize = ssize;
+    //                }
+    //            }
+    //        }
+    //        else {
+    //            // Subtract out existing available top space from MORECORE request.
+    //            ssize = mparams.granularity_align(nb - _topsize + SYS_ALLOC_PADDING);
+    //            // Use mem here only if it did continuously extend old space
+    //            if (ssize < HALF_MAX_SIZE_T &&
+    //                (br = (char*)(CALL_MORECORE(ssize))) == ss->_base+ss->_size) {
+    //                tbase = br;
+    //                tsize = ssize;
+    //            }
+    //        }
+    //
+    //        if (tbase == CMFAIL) {
+    //            // Cope with partial failure
+    //            if (br != CMFAIL) {
+    //                // Try to use/extend the space we did get
+    //                if (ssize < HALF_MAX_SIZE_T &&
+    //                    ssize < nb + SYS_ALLOC_PADDING) {
+    //                    size_t esize = mparams.granularity_align(nb + SYS_ALLOC_PADDING - ssize);
+    //                    if (esize < HALF_MAX_SIZE_T) {
+    //                        char* end = (char*)CALL_MORECORE(esize);
+    //                        if (end != CMFAIL)
+    //                            ssize += esize;
+    //                        else {
+    //                            // Can't use; try to release
+    //                            (void) CALL_MORECORE(-ssize);
+    //                            br = CMFAIL;
+    //                        }
+    //                    }
+    //                }
+    //            }
+    //            if (br != CMFAIL) {
+    //                // Use the space we did get
+    //                tbase = br;
+    //                tsize = ssize;
+    //            }
+    //            else
+    //                disable_contiguous(); // Don't try contiguous path in the future
+    //        }
+    //
+    //        RELEASE_MALLOC_GLOBAL_LOCK();
+    //    }
+    
     if (HAVE_MMAP && tbase == CMFAIL) {
         // Try MMAP
         char* mp = (char*)(CALL_MMAP(asize));
@@ -4246,31 +3139,31 @@ void* malloc_state::sys_alloc(size_t nb) {
             mmap_flag = USE_MMAP_BIT;
         }
     }
-
-    if (HAVE_MORECORE && tbase == CMFAIL) {
-        // Try noncontiguous MORECORE
-        if (asize < HALF_MAX_SIZE_T) {
-            char* br = CMFAIL;
-            char* end = CMFAIL;
-            ACQUIRE_MALLOC_GLOBAL_LOCK();
-            br = (char*)(CALL_MORECORE(asize));
-            end = (char*)(CALL_MORECORE(0));
-            RELEASE_MALLOC_GLOBAL_LOCK();
-            if (br != CMFAIL && end != CMFAIL && br < end) {
-                size_t ssize = end - br;
-                if (ssize > nb + TOP_FOOT_SIZE) {
-                    tbase = br;
-                    tsize = ssize;
-                }
-            }
-        }
-    }
-
+    
+    //    if (HAVE_MORECORE && tbase == CMFAIL) {
+    //        // Try noncontiguous MORECORE
+    //        if (asize < HALF_MAX_SIZE_T) {
+    //            char* br = CMFAIL;
+    //            char* end = CMFAIL;
+    //            ACQUIRE_MALLOC_GLOBAL_LOCK();
+    //            br = (char*)(CALL_MORECORE(asize));
+    //            end = (char*)(CALL_MORECORE(0));
+    //            RELEASE_MALLOC_GLOBAL_LOCK();
+    //            if (br != CMFAIL && end != CMFAIL && br < end) {
+    //                size_t ssize = end - br;
+    //                if (ssize > nb + TOP_FOOT_SIZE) {
+    //                    tbase = br;
+    //                    tsize = ssize;
+    //                }
+    //            }
+    //        }
+    //    }
+    
     if (tbase != CMFAIL) {
-
+        
         if ((_footprint += tsize) > _max_footprint)
             _max_footprint = _footprint;
-
+        
         if (!is_initialized()) {
             // first-time initialization
             if (_least_addr == 0 || tbase < _least_addr)
@@ -4292,7 +3185,7 @@ void* malloc_state::sys_alloc(size_t nb) {
                 init_top(mn, (size_t)((tbase + tsize) - (char*)mn) -TOP_FOOT_SIZE);
             }
         }
-
+        
         else {
             // Try to merge with an existing segment
             msegmentptr sp = &_seg;
@@ -4325,7 +3218,7 @@ void* malloc_state::sys_alloc(size_t nb) {
                     add_segment(tbase, tsize, mmap_flag);
             }
         }
-
+        
         if (nb < _topsize) {
             // Allocate from new or extended top space
             size_t rsize = _topsize -= nb;
@@ -4338,7 +3231,7 @@ void* malloc_state::sys_alloc(size_t nb) {
             return chunk2mem(p);
         }
     }
-
+    
     MALLOC_FAILURE_ACTION;
     return 0;
 }
@@ -4390,7 +3283,7 @@ size_t malloc_state::release_unused_segments() {
     }
     // Reset check counter
     _release_checks = (((size_t) nsegs > (size_t) MAX_RELEASE_CHECK_RATE)?
-                         (size_t) nsegs : (size_t) MAX_RELEASE_CHECK_RATE);
+                       (size_t) nsegs : (size_t) MAX_RELEASE_CHECK_RATE);
     return released;
 }
 
@@ -4399,14 +3292,14 @@ int malloc_state::sys_trim(size_t pad) {
     mparams.ensure_initialization();
     if (pad < MAX_REQUEST && is_initialized()) {
         pad += TOP_FOOT_SIZE; // ensure enough room for segment overhead
-
+        
         if (_topsize > pad) {
             // Shrink top space in _granularity - size units, keeping at least one
             size_t unit = mparams._granularity;
             size_t extra = ((_topsize - pad + (unit - 1)) / unit -
                             1) * unit;
             msegmentptr sp = segment_holding((char*)_top);
-
+            
             if (!sp->is_extern_segment()) {
                 if (sp->is_mmapped_segment()) {
                     if (HAVE_MMAP &&
@@ -4416,30 +3309,30 @@ int malloc_state::sys_trim(size_t pad) {
                         size_t newsize = sp->_size - extra;
                         (void)newsize; // placate people compiling -Wunused-variable
                         // Prefer mremap, fall back to munmap
-                        if ((CALL_MREMAP(sp->_base, sp->_size, newsize, 0) != MFAIL) ||
+                        if (// (CALL_MREMAP(sp->_base, sp->_size, newsize, 0) != MFAIL) ||
                             (CALL_MUNMAP(sp->_base + newsize, extra) == 0)) {
-                            released = extra;
-                        }
+                                released = extra;
+                            }
                     }
                 }
-                else if (HAVE_MORECORE) {
-                    if (extra >= HALF_MAX_SIZE_T) // Avoid wrapping negative
-                        extra = (HALF_MAX_SIZE_T) + 1 - unit;
-                    ACQUIRE_MALLOC_GLOBAL_LOCK();
-                    {
-                        // Make sure end of memory is where we last set it.
-                        char* old_br = (char*)(CALL_MORECORE(0));
-                        if (old_br == sp->_base + sp->_size) {
-                            char* rel_br = (char*)(CALL_MORECORE(-extra));
-                            char* new_br = (char*)(CALL_MORECORE(0));
-                            if (rel_br != CMFAIL && new_br < old_br)
-                                released = old_br - new_br;
-                        }
-                    }
-                    RELEASE_MALLOC_GLOBAL_LOCK();
-                }
+                //                else if (HAVE_MORECORE) {
+                //                    if (extra >= HALF_MAX_SIZE_T) // Avoid wrapping negative
+                //                        extra = (HALF_MAX_SIZE_T) + 1 - unit;
+                //                    ACQUIRE_MALLOC_GLOBAL_LOCK();
+                //                    {
+                //                        // Make sure end of memory is where we last set it.
+                //                        char* old_br = (char*)(CALL_MORECORE(0));
+                //                        if (old_br == sp->_base + sp->_size) {
+                //                            char* rel_br = (char*)(CALL_MORECORE(-extra));
+                //                            char* new_br = (char*)(CALL_MORECORE(0));
+                //                            if (rel_br != CMFAIL && new_br < old_br)
+                //                                released = old_br - new_br;
+                //                        }
+                //                    }
+                //                    RELEASE_MALLOC_GLOBAL_LOCK();
+                //                }
             }
-
+            
             if (released != 0) {
                 sp->_size -= released;
                 _footprint -= released;
@@ -4447,22 +3340,22 @@ int malloc_state::sys_trim(size_t pad) {
                 check_top_chunk(_top);
             }
         }
-
+        
         // Unmap any unused mmapped segments
         if (HAVE_MMAP)
             released += release_unused_segments();
-
+        
         // On failure, disable autotrim to avoid repeated failed future calls
         if (released == 0 && _topsize > _trim_check)
             _trim_check = MAX_SIZE_T;
     }
-
+    
     return (released != 0)? 1 : 0;
 }
 
 /* Consolidate and bin a chunk. Differs from exported versions
-   of free mainly in that the chunk need not be marked as inuse.
-*/
+ of free mainly in that the chunk need not be marked as inuse.
+ */
 void malloc_state::dispose_chunk(mchunkptr p, size_t psize) {
     mchunkptr next = (mchunkptr)p->chunk_plus_offset(psize);
     if (!p->pinuse()) {
@@ -4573,7 +3466,7 @@ void* malloc_state::tmalloc_large(size_t nb) {
             t = *treebin_at(i);
         }
     }
-
+    
     while (t != 0) {
         // find smallest of tree or subtree
         size_t trem = t->chunksize() - nb;
@@ -4583,7 +3476,7 @@ void* malloc_state::tmalloc_large(size_t nb) {
         }
         t = t->leftmost_child();
     }
-
+    
     //  If dv is a better fit, return 0 so malloc will use it
     if (v != 0 && rsize < (size_t)(_dvsize - nb)) {
         if (rtcheck(ok_address(v))) {
@@ -4615,7 +3508,7 @@ void* malloc_state::tmalloc_small(size_t nb) {
     bindex_t i = compute_bit2idx(leastbit);
     v = t = *treebin_at(i);
     rsize = t->chunksize() - nb;
-
+    
     while ((t = t->leftmost_child()) != 0) {
         size_t trem = t->chunksize() - nb;
         if (trem < rsize) {
@@ -4623,7 +3516,7 @@ void* malloc_state::tmalloc_small(size_t nb) {
             v = t;
         }
     }
-
+    
     if (rtcheck(ok_address(v))) {
         mchunkptr r = (mchunkptr)v->chunk_plus_offset(nb);
         assert(v->chunksize() == rsize + nb);
@@ -4639,7 +3532,7 @@ void* malloc_state::tmalloc_small(size_t nb) {
             return chunk2mem(v);
         }
     }
-
+    
     CORRUPTION_ERROR_ACTION(this);
     return 0;
 }
@@ -4648,28 +3541,28 @@ void* malloc_state::tmalloc_small(size_t nb) {
 
 void* dlmalloc(size_t bytes) DLTHROW {
     /*
-      Basic algorithm:
-      If a small request (< 256 bytes minus per-chunk overhead):
-      1. If one exists, use a remainderless chunk in associated smallbin.
-      (Remainderless means that there are too few excess bytes to
-      represent as a chunk.)
-      2. If it is big enough, use the dv chunk, which is normally the
-      chunk adjacent to the one used for the most recent small request.
-      3. If one exists, split the smallest available chunk in a bin,
-      saving remainder in dv.
-      4. If it is big enough, use the top chunk.
-      5. If available, get memory from system and use it
-      Otherwise, for a large request:
-      1. Find the smallest available binned chunk that fits, and use it
-      if it is better fitting than dv chunk, splitting if necessary.
-      2. If better fitting than any binned chunk, use the dv chunk.
-      3. If it is big enough, use the top chunk.
-      4. If request size >= mmap threshold, try to directly mmap this chunk.
-      5. If available, get memory from system and use it
-
-      The ugly goto's here ensure that postaction occurs along all paths.
-    */
-
+     Basic algorithm:
+     If a small request (< 256 bytes minus per-chunk overhead):
+     1. If one exists, use a remainderless chunk in associated smallbin.
+     (Remainderless means that there are too few excess bytes to
+     represent as a chunk.)
+     2. If it is big enough, use the dv chunk, which is normally the
+     chunk adjacent to the one used for the most recent small request.
+     3. If one exists, split the smallest available chunk in a bin,
+     saving remainder in dv.
+     4. If it is big enough, use the top chunk.
+     5. If available, get memory from system and use it
+     Otherwise, for a large request:
+     1. Find the smallest available binned chunk that fits, and use it
+     if it is better fitting than dv chunk, splitting if necessary.
+     2. If better fitting than any binned chunk, use the dv chunk.
+     3. If it is big enough, use the top chunk.
+     4. If request size >= mmap threshold, try to directly mmap this chunk.
+     5. If available, get memory from system and use it
+     
+     The ugly goto's here ensure that postaction occurs along all paths.
+     */
+    
 #if USE_LOCKS
     mparams.ensure_initialization(); // initialize in sys_alloc if not using locks
 #endif
@@ -4678,11 +3571,11 @@ void* dlmalloc(size_t bytes) DLTHROW {
 
 void dlfree(void* mem) DLTHROW {
     /*
-      Consolidate freed chunks with preceeding or succeeding bordering
-      free chunks, if they exist, and then place in a bin.  Intermixed
-      with special cases for top, dv, mmapped chunks, and usage errors.
-    */
-
+     Consolidate freed chunks with preceeding or succeeding bordering
+     free chunks, if they exist, and then place in a bin.  Intermixed
+     with special cases for top, dv, mmapped chunks, and usage errors.
+     */
+    
     if (mem != 0) {
         mchunkptr p  = mem2chunk(mem);
 #if FOOTERS
@@ -4718,7 +3611,8 @@ void* dlcalloc(size_t n_elements, size_t elem_size) DLTHROW {
 /* ---------------------------- malloc --------------------------- */
 
 void* malloc_state::_malloc(size_t bytes) {
-    if (!PREACTION(this)) {
+    {
+        PREACTION(this);
         void* mem;
         size_t nb;
         if (bytes <= MAX_SMALL_REQUEST) {
@@ -4727,7 +3621,7 @@ void* malloc_state::_malloc(size_t bytes) {
             nb = (bytes < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(bytes);
             idx = small_index(nb);
             smallbits = _smallmap >> idx;
-
+            
             if ((smallbits & 0x3U) != 0) {
                 // Remainderless fit to a smallbin.
                 mchunkptr b, p;
@@ -4741,7 +3635,7 @@ void* malloc_state::_malloc(size_t bytes) {
                 check_malloced_chunk(mem, nb);
                 goto postaction;
             }
-
+            
             else if (nb > _dvsize) {
                 if (smallbits != 0) {
                     // Use chunk in next nonempty smallbin
@@ -4768,7 +3662,7 @@ void* malloc_state::_malloc(size_t bytes) {
                     check_malloced_chunk(mem, nb);
                     goto postaction;
                 }
-
+                
                 else if (_treemap != 0 && (mem = tmalloc_small(nb)) != 0) {
                     check_malloced_chunk(mem, nb);
                     goto postaction;
@@ -4784,7 +3678,7 @@ void* malloc_state::_malloc(size_t bytes) {
                 goto postaction;
             }
         }
-
+        
         if (nb <= _dvsize) {
             size_t rsize = _dvsize - nb;
             mchunkptr p = _dv;
@@ -4805,7 +3699,7 @@ void* malloc_state::_malloc(size_t bytes) {
             check_malloced_chunk(mem, nb);
             goto postaction;
         }
-
+        
         else if (nb < _topsize) {
             // Split top
             size_t rsize = _topsize -= nb;
@@ -4818,21 +3712,22 @@ void* malloc_state::_malloc(size_t bytes) {
             check_malloced_chunk(mem, nb);
             goto postaction;
         }
-
+        
         mem = sys_alloc(nb);
-
+        
     postaction:
         POSTACTION(this);
         return mem;
     }
-
+    
     return 0;
 }
 
 /* ---------------------------- free --------------------------- */
 
 void malloc_state::_free(mchunkptr p) {
-    if (!PREACTION(this)) {
+    {
+        PREACTION(this);
         check_inuse_chunk(p);
         if (rtcheck(ok_address(p) && ok_inuse(p))) {
             size_t psize = p->chunksize();
@@ -4864,7 +3759,7 @@ void malloc_state::_free(mchunkptr p) {
                         goto erroraction;
                 }
             }
-
+            
             if (rtcheck(ok_next(p, next) && ok_pinuse(next))) {
                 if (!next->cinuse()) {
                     // consolidate forward
@@ -4899,7 +3794,7 @@ void malloc_state::_free(mchunkptr p) {
                 }
                 else
                     p->set_free_with_pinuse(psize, next);
-
+                
                 if (is_small(psize)) {
                     insert_small_chunk(p, psize);
                     check_free_chunk(p);
@@ -5027,26 +3922,25 @@ void* malloc_state::internal_memalign(size_t alignment, size_t bytes) {
         mem = internal_malloc(req);
         if (mem != 0) {
             mchunkptr p = mem2chunk(mem);
-            if (PREACTION(this))
-                return 0;
+            PREACTION(this);
             if ((((size_t)(mem)) & (alignment - 1)) != 0) {
                 // misaligned
                 /*
-                  Find an aligned spot inside chunk.  Since we need to give
-                  back leading space in a chunk of at least MIN_CHUNK_SIZE, if
-                  the first calculation places us at a spot with less than
-                  MIN_CHUNK_SIZE leader, we can move to the next aligned spot.
-                  We've allocated enough total room so that this is always
-                  possible.
-                */
+                 Find an aligned spot inside chunk.  Since we need to give
+                 back leading space in a chunk of at least MIN_CHUNK_SIZE, if
+                 the first calculation places us at a spot with less than
+                 MIN_CHUNK_SIZE leader, we can move to the next aligned spot.
+                 We've allocated enough total room so that this is always
+                 possible.
+                 */
                 char* br = (char*)mem2chunk((void *)(((size_t)((char*)mem + alignment - 1)) &
                                                      -alignment));
                 char* pos = ((size_t)(br - (char*)(p)) >= MIN_CHUNK_SIZE)?
-                    br : br+alignment;
+                br : br+alignment;
                 mchunkptr newp = (mchunkptr)pos;
                 size_t leadsize = pos - (char*)(p);
                 size_t newsize = p->chunksize() - leadsize;
-
+                
                 if (p->is_mmapped()) {
                     // For mmapped chunks, just adjust offset
                     newp->_prev_foot = p->_prev_foot + leadsize;
@@ -5060,7 +3954,7 @@ void* malloc_state::internal_memalign(size_t alignment, size_t bytes) {
                 }
                 p = newp;
             }
-
+            
             // Give back spare room at the end
             if (!p->is_mmapped()) {
                 size_t size = p->chunksize();
@@ -5072,7 +3966,7 @@ void* malloc_state::internal_memalign(size_t alignment, size_t bytes) {
                     dispose_chunk(remainder, remainder_size);
                 }
             }
-
+            
             mem = chunk2mem(p);
             assert (p->chunksize() >= nb);
             assert(((size_t)mem & (alignment - 1)) == 0);
@@ -5084,64 +3978,64 @@ void* malloc_state::internal_memalign(size_t alignment, size_t bytes) {
 }
 
 /*
-  Common support for independent_X routines, handling
-    all of the combinations that can result.
-  The opts arg has:
-    bit 0 set if all elements are same size (using sizes[0])
-    bit 1 set if elements should be zeroed
-*/
+ Common support for independent_X routines, handling
+ all of the combinations that can result.
+ The opts arg has:
+ bit 0 set if all elements are same size (using sizes[0])
+ bit 1 set if elements should be zeroed
+ */
 void** malloc_state::ialloc(size_t n_elements, size_t* sizes, int opts,
                             void* chunks[]) {
-
-    size_t    element_size;   // chunksize of each element, if all same 
-    size_t    contents_size;  // total size of elements 
-    size_t    array_size;     // request size of pointer array 
-    void*     mem;            // malloced aggregate space 
-    mchunkptr p;              // corresponding chunk 
-    size_t    remainder_size; // remaining bytes while splitting 
-    void**    marray;         // either "chunks" or malloced ptr array 
-    mchunkptr array_chunk;    // chunk for malloced ptr array 
-    flag_t    was_enabled;    // to disable mmap 
+    
+    size_t    element_size;   // chunksize of each element, if all same
+    size_t    contents_size;  // total size of elements
+    size_t    array_size;     // request size of pointer array
+    void*     mem;            // malloced aggregate space
+    mchunkptr p;              // corresponding chunk
+    size_t    remainder_size; // remaining bytes while splitting
+    void**    marray;         // either "chunks" or malloced ptr array
+    mchunkptr array_chunk;    // chunk for malloced ptr array
+    flag_t    was_enabled;    // to disable mmap
     size_t    size;
     size_t    i;
-
+    
     mparams.ensure_initialization();
-    // compute array length, if needed 
+    // compute array length, if needed
     if (chunks != 0) {
         if (n_elements == 0)
-            return chunks; // nothing to do 
+            return chunks; // nothing to do
         marray = chunks;
         array_size = 0;
     }
     else {
-        // if empty req, must still return chunk representing empty array 
+        // if empty req, must still return chunk representing empty array
         if (n_elements == 0)
             return (void**)internal_malloc(0);
         marray = 0;
         array_size = request2size(n_elements * (sizeof(void*)));
     }
-
-    // compute total element size 
+    
+    // compute total element size
     if (opts & 0x1) {
-        // all-same-size 
+        // all-same-size
         element_size = request2size(*sizes);
         contents_size = n_elements * element_size;
     }
     else {
-        // add up all the sizes 
+        // add up all the sizes
         element_size = 0;
         contents_size = 0;
         for (i = 0; i != n_elements; ++i)
             contents_size += request2size(sizes[i]);
     }
-
+    
     size = contents_size + array_size;
-
+    
     /*
-      Allocate the aggregate chunk.  First disable direct-mmapping so
-      malloc won't use it, since we would not be able to later
-      free/realloc space internal to a segregated mmap region.
-    */
+     Allocate the aggregate chunk.  First disable direct-mmapping so
+     malloc won't use it, since we would not be able to later
+     free/realloc space internal to a segregated mmap region.
+     */
     was_enabled = use_mmap();
     disable_mmap();
     mem = internal_malloc(size - CHUNK_OVERHEAD);
@@ -5149,19 +4043,19 @@ void** malloc_state::ialloc(size_t n_elements, size_t* sizes, int opts,
         enable_mmap();
     if (mem == 0)
         return 0;
-
-    if (PREACTION(this)) return 0;
+    
+    PREACTION(this);
     p = mem2chunk(mem);
     remainder_size = p->chunksize();
-
+    
     assert(!p->is_mmapped());
-
+    
     if (opts & 0x2) {
-        // optionally clear the elements 
+        // optionally clear the elements
         memset((size_t*)mem, 0, remainder_size - sizeof(size_t) - array_size);
     }
-
-    // If not provided, allocate the pointer array as final part of chunk 
+    
+    // If not provided, allocate the pointer array as final part of chunk
     if (marray == 0) {
         size_t  array_chunk_size;
         array_chunk = (mchunkptr)p->chunk_plus_offset(contents_size);
@@ -5170,8 +4064,8 @@ void** malloc_state::ialloc(size_t n_elements, size_t* sizes, int opts,
         set_size_and_pinuse_of_inuse_chunk(array_chunk, array_chunk_size);
         remainder_size = contents_size;
     }
-
-    // split out elements 
+    
+    // split out elements
     for (i = 0; ; ++i) {
         marray[i] = chunk2mem(p);
         if (i != n_elements - 1) {
@@ -5183,16 +4077,16 @@ void** malloc_state::ialloc(size_t n_elements, size_t* sizes, int opts,
             set_size_and_pinuse_of_inuse_chunk(p, size);
             p = (mchunkptr)p->chunk_plus_offset(size);
         }
-        else { 
-            // the final element absorbs any overallocation slop 
+        else {
+            // the final element absorbs any overallocation slop
             set_size_and_pinuse_of_inuse_chunk(p, remainder_size);
             break;
         }
     }
-
+    
 #if DEBUG
     if (marray != chunks) {
-        // final element must have exactly exhausted chunk 
+        // final element must have exactly exhausted chunk
         if (element_size != 0) {
             assert(remainder_size == element_size);
         }
@@ -5203,23 +4097,24 @@ void** malloc_state::ialloc(size_t n_elements, size_t* sizes, int opts,
     }
     for (i = 0; i != n_elements; ++i)
         check_inuse_chunk(mem2chunk(marray[i]));
-
+    
 #endif
-
+    
     POSTACTION(this);
     return marray;
 }
 
 /* Try to free all pointers in the given array.
-   Note: this could be made faster, by delaying consolidation,
-   at the price of disabling some user integrity checks, We
-   still optimize some consolidations by combining adjacent
-   chunks before freeing, which will occur often if allocated
-   with ialloc or the array is sorted.
-*/
+ Note: this could be made faster, by delaying consolidation,
+ at the price of disabling some user integrity checks, We
+ still optimize some consolidations by combining adjacent
+ chunks before freeing, which will occur often if allocated
+ with ialloc or the array is sorted.
+ */
 size_t malloc_state::internal_bulk_free(void* array[], size_t nelem) {
     size_t unfreed = 0;
-    if (!PREACTION(this)) {
+    {
+        PREACTION(this);
         void** a;
         void** fence = &(array[nelem]);
         for (a = array; a != fence; ++a) {
@@ -5337,14 +4232,15 @@ void* dlrealloc(void* oldmem, size_t bytes) DLTHROW {
         mchunkptr oldp = mem2chunk(oldmem);
 #if ! FOOTERS
         mstate m = gm;
-#else 
+#else
         mstate m = get_mstate_for(oldp);
         if (!m->ok_magic()) {
             USAGE_ERROR_ACTION(m, oldmem);
             return 0;
         }
 #endif
-        if (!PREACTION(m)) {
+        {
+            PREACTION(m);
             mchunkptr newp = m->try_realloc_chunk(oldp, nb, 1);
             POSTACTION(m);
             if (newp != 0) {
@@ -5375,14 +4271,15 @@ void* dlrealloc_in_place(void* oldmem, size_t bytes) {
             mchunkptr oldp = mem2chunk(oldmem);
 #if ! FOOTERS
             mstate m = gm;
-#else 
+#else
             mstate m = get_mstate_for(oldp);
             if (!m->ok_magic()) {
                 USAGE_ERROR_ACTION(m, oldmem);
                 return 0;
             }
 #endif
-            if (!PREACTION(m)) {
+            {
+                PREACTION(m);
                 mchunkptr newp = m->try_realloc_chunk(oldp, nb, 0);
                 POSTACTION(m);
                 if (newp == oldp) {
@@ -5461,17 +4358,19 @@ void dlmalloc_inspect_all(void(*handler)(void *start,
                                          void* callback_arg),
                           void* arg) {
     mparams.ensure_initialization();
-    if (!PREACTION(gm)) {
+    {
+        PREACTION(gm);
         internal_inspect_all(gm, handler, arg);
         POSTACTION(gm);
     }
 }
-#endif 
+#endif
 
 int dlmalloc_trim(size_t pad) {
     int result = 0;
     mparams.ensure_initialization();
-    if (!PREACTION(gm)) {
+    {
+        PREACTION(gm);
         result = gm->sys_trim(pad);
         POSTACTION(gm);
     }
@@ -5502,17 +4401,15 @@ size_t dlmalloc_set_footprint_limit(size_t bytes) {
     return gm->_footprint_limit = result;
 }
 
-#if !NO_MALLINFO
-struct mallinfo dlmallinfo(void) {
+dlmallinfo dlmalloc_info(void) {
     return gm->internal_mallinfo();
 }
-#endif
 
 #if !NO_MALLOC_STATS
 void dlmalloc_stats() {
     gm->internal_malloc_stats();
 }
-#endif 
+#endif
 
 int dlmallopt(int param_number, int value) {
     return mparams.change(param_number, value);
@@ -5534,54 +4431,78 @@ size_t dlmalloc_usable_size(void* mem) {
 
 #if MSPACES
 
-static mstate init_user_mstate(char* tbase, size_t tsize) {
-    size_t msize = pad_request(sizeof(malloc_state));
-    mchunkptr msp = align_as_chunk(tbase);
-    mstate m = (mstate)(chunk2mem(msp));
-    memset(m, 0, msize);
-    (void)INITIAL_LOCK(&m->get_mutex());
-    msp->_head = (msize | INUSE_BITS);
-    m->init(tbase, tsize);
-    return m;
-}
-
-mspace create_mspace(size_t capacity, int locked) {
-    mstate m = 0;
-    size_t msize;
+// Note sure that using spaces is worth the trouble.  Each allocation
+// then needs FOOTER or has to be freed back to the mspace that it
+// originated from.  The allocator is already complicated enough.
+// Basically this does a virtual allocate on Win for each mspace.
+// So that can grow in it's own address range.
+//
+// mimalloc from Microsoft looks pretty good too.
+
+
+//static mstate init_user_mstate(mstate ms, flag_t flags, char* tbase, size_t tsize) {
+//    // This is allocating mstate out of passed memory
+//    //size_t msize = pad_request(sizeof(malloc_state));
+//    //mchunkptr msp = align_as_chunk(tbase);
+//    //mstate m = (mstate)(chunk2mem(msp));
+//    // TODO: meset is bad for any types added to class (f.e. mutex)
+//    //memset(m, 0, msize);
+//    //
+//    //(void)INITIAL_LOCK(&m->get_mutex());
+//    //msp->_head = (msize | INUSE_BITS);
+//    
+//   
+//    return m;
+//}
+
+
+
+bool mspace::create(size_t capacity, int locked) {
+    if (!ms)
+        ms = make_unique<malloc_state>(); // alloc on sys heap
     mparams.ensure_initialization();
-    msize = pad_request(sizeof(malloc_state));
-    if (capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams._page_size)) {
+    size_t msize = 0; // pad_request(sizeof(malloc_state));
+    //if (capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams._page_size)) {
         size_t rs = ((capacity == 0)? mparams._granularity :
                      (capacity + TOP_FOOT_SIZE + msize));
         size_t tsize = mparams.granularity_align(rs);
         char* tbase = (char*)(CALL_MMAP(tsize));
-        if (tbase != CMFAIL) {
-            m = init_user_mstate(tbase, tsize);
-            m->_seg._sflags = USE_MMAP_BIT;
-            m->set_lock(locked);
+    
+        if (tbase == CMFAIL) {
+            ms.reset();
+            return false;
         }
-    }
-    return (mspace)m;
+    
+        //init_user_mstate(ms, USE_MMAP_BIT, tbase, tsize);
+        ms->init(tbase, tsize);
+        ms->_seg._sflags = USE_MMAP_BIT;
+        ms->set_lock(locked);
+        
+    //}
+    return true;
 }
 
-mspace create_mspace_with_base(void* base, size_t capacity, int locked) {
-    mstate m = 0;
-    size_t msize;
+bool mspace::create_with_base(void* base, size_t capacity, int locked) {
+    if (!ms)
+        ms = make_unique<malloc_state>(); // alloc on sys heap
+    
     mparams.ensure_initialization();
-    msize = pad_request(sizeof(malloc_state));
-    if (capacity > msize + TOP_FOOT_SIZE &&
-        capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams._page_size)) {
-        m = init_user_mstate((char*)base, capacity);
-        m->_seg._sflags = EXTERN_BIT;
-        m->set_lock(locked);
-    }
-    return (mspace)m;
+    //size_t msize = pad_request(sizeof(malloc_state));
+    //if (capacity > msize + TOP_FOOT_SIZE &&
+        //capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams._page_size)) {
+        ms->init((char*)base, capacity);
+        //init_user_mstate(ms, EXTERN_BIT, (char*)base, capacity);
+        ms->_seg._sflags = EXTERN_BIT;
+        ms->set_lock(locked);
+    //}
+    return true;
 }
 
-int mspace_track_large_chunks(mspace msp, int enable) {
+int mspace::track_large_chunks(int enable) {
     int ret = 0;
-    mstate ms = (mstate)msp;
-    if (!PREACTION(ms)) {
+    //mstate ms = (mstate)msp;
+    //{
+        PREACTION(ms);
         if (!ms->use_mmap()) {
             ret = 1;
         }
@@ -5591,36 +4512,39 @@ int mspace_track_large_chunks(mspace msp, int enable) {
             ms->disable_mmap();
         }
         POSTACTION(ms);
-    }
+    //}
     return ret;
 }
 
-size_t destroy_mspace(mspace msp) {
-    size_t freed = 0;
-    mstate ms = (mstate)msp;
-    if (ms->ok_magic()) {
-        msegmentptr sp = &ms->_seg;
-        (void)DESTROY_LOCK(&ms->get_mutex()); // destroy before unmapped
-        while (sp != 0) {
-            char* base = sp->_base;
-            size_t size = sp->_size;
-            flag_t flag = sp->_sflags;
-            (void)base; // placate people compiling -Wunused-variable
-            sp = sp->_next;
-            if ((flag & USE_MMAP_BIT) && !(flag & EXTERN_BIT) &&
-                CALL_MUNMAP(base, size) == 0)
-                freed += size;
-        }
-    }
-    else {
+size_t mspace::destroy() {
+    //mstate ms = (mstate)msp;
+    if (!ms->ok_magic()) {
         USAGE_ERROR_ACTION(ms,ms);
+        return 0;
+    }
+    
+    size_t freed = 0;
+    msegmentptr sp = &ms->_seg;
+    //(void)DESTROY_LOCK(&ms->get_mutex()); // destroy before unmapped
+    while (sp != 0) {
+        char* base = sp->_base;
+        size_t size = sp->_size;
+        flag_t flag = sp->_sflags;
+        (void)base; // placate people compiling -Wunused-variable
+        sp = sp->_next;
+        if ((flag & USE_MMAP_BIT) && !(flag & EXTERN_BIT) &&
+            CALL_MUNMAP(base, size) == 0)
+            freed += size;
     }
+    
+    ms.reset();
+    
     return freed;
 }
 
-/* ----------------------------  mspace versions of malloc/calloc/free routines -------------------- */
-void* mspace_malloc(mspace msp, size_t bytes) {
-    mstate ms = (mstate)msp;
+// ----------------------------  mspace versions of malloc/calloc/free routines --------------------
+void* mspace::_malloc(size_t bytes) {
+    //mstate ms = (mstate)msp;
     if (!ms->ok_magic()) {
         USAGE_ERROR_ACTION(ms,ms);
         return 0;
@@ -5628,14 +4552,13 @@ void* mspace_malloc(mspace msp, size_t bytes) {
     return ms->_malloc(bytes);
 }
 
-void mspace_free(mspace msp, void* mem) {
+void mspace::_free(void* mem) {
     if (mem != 0) {
         mchunkptr p  = mem2chunk(mem);
 #if FOOTERS
         mstate fm = get_mstate_for(p);
-        (void)msp; // placate people compiling -Wunused
-#else 
-        mstate fm = (mstate)msp;
+#else
+        mstate fm = ms.get();
 #endif
         if (!fm->ok_magic()) {
             USAGE_ERROR_ACTION(fm, p);
@@ -5645,10 +4568,10 @@ void mspace_free(mspace msp, void* mem) {
     }
 }
 
-void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size) {
+void* mspace::calloc(size_t n_elements, size_t elem_size) {
     void* mem;
     size_t req = 0;
-    mstate ms = (mstate)msp;
+    //mstate ms = (mstate)msp;
     if (!ms->ok_magic()) {
         USAGE_ERROR_ACTION(ms,ms);
         return 0;
@@ -5665,32 +4588,33 @@ void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size) {
     return mem;
 }
 
-void* mspace_realloc(mspace msp, void* oldmem, size_t bytes) {
+void* mspace::realloc(void* oldmem, size_t bytes) {
     void* mem = 0;
     if (oldmem == 0) {
-        mem = mspace_malloc(msp, bytes);
+        mem = ms->_malloc(bytes);
     }
     else if (bytes >= MAX_REQUEST) {
         MALLOC_FAILURE_ACTION;
     }
 #ifdef REALLOC_ZERO_BYTES_FREES
     else if (bytes == 0) {
-        mspace_free(msp, oldmem);
+        ms->_free(oldmem);
     }
 #endif
     else {
         size_t nb = request2size(bytes);
         mchunkptr oldp = mem2chunk(oldmem);
 #if ! FOOTERS
-        mstate m = (mstate)msp;
-#else 
+        mstate m = ms.get();
+#else
         mstate m = get_mstate_for(oldp);
         if (!m->ok_magic()) {
             USAGE_ERROR_ACTION(m, oldmem);
             return 0;
         }
 #endif
-        if (!PREACTION(m)) {
+        {
+            PREACTION(m);
             mchunkptr newp = m->try_realloc_chunk(oldp, nb, 1);
             POSTACTION(m);
             if (newp != 0) {
@@ -5698,11 +4622,11 @@ void* mspace_realloc(mspace msp, void* oldmem, size_t bytes) {
                 mem = chunk2mem(newp);
             }
             else {
-                mem = mspace_malloc(m, bytes);
+                mem = m->_malloc(bytes);
                 if (mem != 0) {
                     size_t oc = oldp->chunksize() - oldp->overhead_for();
                     memcpy(mem, oldmem, (oc < bytes)? oc : bytes);
-                    mspace_free(m, oldmem);
+                    m->_free((malloc_chunk*)oldmem);
                 }
             }
         }
@@ -5710,7 +4634,7 @@ void* mspace_realloc(mspace msp, void* oldmem, size_t bytes) {
     return mem;
 }
 
-void* mspace_realloc_in_place(mspace msp, void* oldmem, size_t bytes) {
+void* mspace::realloc_in_place(void* oldmem, size_t bytes) {
     void* mem = 0;
     if (oldmem != 0) {
         if (bytes >= MAX_REQUEST) {
@@ -5720,7 +4644,7 @@ void* mspace_realloc_in_place(mspace msp, void* oldmem, size_t bytes) {
             size_t nb = request2size(bytes);
             mchunkptr oldp = mem2chunk(oldmem);
 #if ! FOOTERS
-            mstate m = (mstate)msp;
+            mstate m = ms.get();
 #else
             mstate m = get_mstate_for(oldp);
             (void)msp; // placate people compiling -Wunused
@@ -5729,7 +4653,8 @@ void* mspace_realloc_in_place(mspace msp, void* oldmem, size_t bytes) {
                 return 0;
             }
 #endif
-            if (!PREACTION(m)) {
+            {
+                PREACTION(m);
                 mchunkptr newp = m->try_realloc_chunk(oldp, nb, 0);
                 POSTACTION(m);
                 if (newp == oldp) {
@@ -5742,21 +4667,21 @@ void* mspace_realloc_in_place(mspace msp, void* oldmem, size_t bytes) {
     return mem;
 }
 
-void* mspace_memalign(mspace msp, size_t alignment, size_t bytes) {
-    mstate ms = (mstate)msp;
+void* mspace::memalign(size_t alignment, size_t bytes) {
+    //mstate ms = (mstate)msp;
     if (!ms->ok_magic()) {
         USAGE_ERROR_ACTION(ms,ms);
         return 0;
     }
     if (alignment <= MALLOC_ALIGNMENT)
-        return mspace_malloc(msp, bytes);
+        return _malloc(bytes);
     return ms->internal_memalign(alignment, bytes);
 }
 
-void** mspace_independent_calloc(mspace msp, size_t n_elements,
-                                 size_t elem_size, void* chunks[]) {
+void** mspace::independent_calloc(size_t n_elements,
+                                  size_t elem_size, void* chunks[]) {
     size_t sz = elem_size; // serves as 1-element array
-    mstate ms = (mstate)msp;
+    //mstate ms = (mstate)msp;
     if (!ms->ok_magic()) {
         USAGE_ERROR_ACTION(ms,ms);
         return 0;
@@ -5764,9 +4689,9 @@ void** mspace_independent_calloc(mspace msp, size_t n_elements,
     return ms->ialloc(n_elements, &sz, 3, chunks);
 }
 
-void** mspace_independent_comalloc(mspace msp, size_t n_elements,
-                                   size_t sizes[], void* chunks[]) {
-    mstate ms = (mstate)msp;
+void** mspace::independent_comalloc(size_t n_elements,
+                                    size_t sizes[], void* chunks[]) {
+    //mstate ms = (mstate)msp;
     if (!ms->ok_magic()) {
         USAGE_ERROR_ACTION(ms,ms);
         return 0;
@@ -5774,20 +4699,21 @@ void** mspace_independent_comalloc(mspace msp, size_t n_elements,
     return ms->ialloc(n_elements, sizes, 0, chunks);
 }
 
-size_t mspace_bulk_free(mspace msp, void* array[], size_t nelem) {
-    return ((mstate)msp)->internal_bulk_free(array, nelem);
+size_t mspace::bulk_free(void* array[], size_t nelem) {
+    return ms->internal_bulk_free(array, nelem);
 }
 
 #if MALLOC_INSPECT_ALL
-void mspace_inspect_all(mspace msp,
-                        void(*handler)(void *start,
-                                       void *end,
-                                       size_t used_bytes,
-                                       void* callback_arg),
-                        void* arg) {
-    mstate ms = (mstate)msp;
+void mspace::inspect_all(
+                         void(*handler)(void *start,
+                                        void *end,
+                                        size_t used_bytes,
+                                        void* callback_arg),
+                         void* arg) {
+    //mstate ms = (mstate)msp;
     if (ms->ok_magic()) {
-        if (!PREACTION(ms)) {
+        {
+            PREACTION(ms);
             internal_inspect_all(ms, handler, arg);
             POSTACTION(ms);
         }
@@ -5798,11 +4724,12 @@ void mspace_inspect_all(mspace msp,
 }
 #endif // MALLOC_INSPECT_ALL
 
-int mspace_trim(mspace msp, size_t pad) {
+int mspace::trim(size_t pad) {
     int result = 0;
-    mstate ms = (mstate)msp;
+    //mstate ms = (mstate)msp;
     if (ms->ok_magic()) {
-        if (!PREACTION(ms)) {
+        {
+            PREACTION(ms);
             result = ms->sys_trim(pad);
             POSTACTION(ms);
         }
@@ -5814,8 +4741,8 @@ int mspace_trim(mspace msp, size_t pad) {
 }
 
 #if !NO_MALLOC_STATS
-void mspace_malloc_stats(mspace msp) {
-    mstate ms = (mstate)msp;
+void mspace::malloc_stats() {
+    //mstate ms = (mstate)msp;
     if (ms->ok_magic()) {
         ms->internal_malloc_stats();
     }
@@ -5825,9 +4752,9 @@ void mspace_malloc_stats(mspace msp) {
 }
 #endif // NO_MALLOC_STATS
 
-size_t mspace_footprint(mspace msp) {
+size_t mspace::footprint() {
     size_t result = 0;
-    mstate ms = (mstate)msp;
+    //mstate ms = (mstate)msp;
     if (ms->ok_magic()) {
         result = ms->_footprint;
     }
@@ -5837,9 +4764,9 @@ size_t mspace_footprint(mspace msp) {
     return result;
 }
 
-size_t mspace_max_footprint(mspace msp) {
+size_t mspace::max_footprint() {
     size_t result = 0;
-    mstate ms = (mstate)msp;
+    //mstate ms = (mstate)msp;
     if (ms->ok_magic()) {
         result = ms->_max_footprint;
     }
@@ -5849,9 +4776,9 @@ size_t mspace_max_footprint(mspace msp) {
     return result;
 }
 
-size_t mspace_footprint_limit(mspace msp) {
+size_t mspace::footprint_limit() {
     size_t result = 0;
-    mstate ms = (mstate)msp;
+    //mstate ms = (mstate)msp;
     if (ms->ok_magic()) {
         size_t maf = ms->_footprint_limit;
         result = (maf == 0) ? MAX_SIZE_T : maf;
@@ -5862,9 +4789,9 @@ size_t mspace_footprint_limit(mspace msp) {
     return result;
 }
 
-size_t mspace_set_footprint_limit(mspace msp, size_t bytes) {
+size_t mspace::set_footprint_limit(size_t bytes) {
     size_t result = 0;
-    mstate ms = (mstate)msp;
+    //mstate ms = (mstate)msp;
     if (ms->ok_magic()) {
         if (bytes == 0)
             result = mparams.granularity_align(1); // Use minimal size
@@ -5880,17 +4807,16 @@ size_t mspace_set_footprint_limit(mspace msp, size_t bytes) {
     return result;
 }
 
-#if !NO_MALLINFO
-struct mallinfo mspace_mallinfo(mspace msp) {
-    mstate ms = (mstate)msp;
+
+dlmallinfo mspace::mallinfo() {
+    //mstate ms = (mstate)msp;
     if (!ms->ok_magic()) {
         USAGE_ERROR_ACTION(ms,ms);
     }
     return ms->internal_mallinfo();
 }
-#endif // NO_MALLINFO
 
-size_t mspace_usable_size(const void* mem) {
+size_t mspace::usable_size(const void* mem) {
     if (mem != 0) {
         mchunkptr p = mem2chunk(mem);
         if (p->is_inuse())
@@ -5899,320 +4825,9 @@ size_t mspace_usable_size(const void* mem) {
     return 0;
 }
 
-int mspace_mallopt(int param_number, int value) {
+int mspace::mallopt(int param_number, int value) {
     return mparams.change(param_number, value);
 }
 
 #endif // MSPACES
-
-
-/* -------------------- Alternative MORECORE functions ------------------- */
-
-/*
-  Guidelines for creating a custom version of MORECORE:
-
-  * For best performance, MORECORE should allocate in multiples of pagesize.
-  * MORECORE may allocate more memory than requested. (Or even less,
-      but this will usually result in a malloc failure.)
-  * MORECORE must not allocate memory when given argument zero, but
-      instead return one past the end address of memory from previous
-      nonzero call.
-  * For best performance, consecutive calls to MORECORE with positive
-      arguments should return increasing addresses, indicating that
-      space has been contiguously extended.
-  * Even though consecutive calls to MORECORE need not return contiguous
-      addresses, it must be OK for malloc'ed chunks to span multiple
-      regions in those cases where they do happen to be contiguous.
-  * MORECORE need not handle negative arguments -- it may instead
-      just return MFAIL when given negative arguments.
-      Negative arguments are always multiples of pagesize. MORECORE
-      must not misinterpret negative args as large positive unsigned
-      args. You can suppress all such calls from even occurring by defining
-      MORECORE_CANNOT_TRIM,
-
-  As an example alternative MORECORE, here is a custom allocator
-  kindly contributed for pre-OSX macOS.  It uses virtually but not
-  necessarily physically contiguous non-paged memory (locked in,
-  present and won't get swapped out).  You can use it by uncommenting
-  this section, adding some #includes, and setting up the appropriate
-  defines above:
-
-      #define MORECORE osMoreCore
-
-  There is also a shutdown routine that should somehow be called for
-  cleanup upon program exit.
-
-  #define MAX_POOL_ENTRIES 100
-  #define MINIMUM_MORECORE_SIZE  (64 * 1024U)
-  static int next_os_pool;
-  void *our_os_pools[MAX_POOL_ENTRIES];
-
-  void *osMoreCore(int size)
-  {
-    void *ptr = 0;
-    static void *sbrk_top = 0;
-
-    if (size > 0)
-    {
-      if (size < MINIMUM_MORECORE_SIZE)
-         size = MINIMUM_MORECORE_SIZE;
-      if (CurrentExecutionLevel() == kTaskLevel)
-         ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0);
-      if (ptr == 0)
-      {
-        return (void *) MFAIL;
-      }
-      // save ptrs so they can be freed during cleanup
-      our_os_pools[next_os_pool] = ptr;
-      next_os_pool++;
-      ptr = (void *) ((((size_t) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK);
-      sbrk_top = (char *) ptr + size;
-      return ptr;
-    }
-    else if (size < 0)
-    {
-      // we don't currently support shrink behavior
-      return (void *) MFAIL;
-    }
-    else
-    {
-      return sbrk_top;
-    }
-  }
-
-  // cleanup any allocated memory pools
-  // called as last thing before shutting down driver
-
-  void osCleanupMem(void)
-  {
-    void **ptr;
-
-    for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++)
-      if (*ptr)
-      {
-         PoolDeallocate(*ptr);
-         *ptr = 0;
-      }
-  }
-
-*/
-
-
-/* -----------------------------------------------------------------------
-History:
-    v2.8.6 Wed Aug 29 06:57:58 2012  Doug Lea
-      * fix bad comparison in dlposix_memalign
-      * don't reuse adjusted asize in sys_alloc
-      * add LOCK_AT_FORK -- thanks to Kirill Artamonov for the suggestion
-      * reduce compiler warnings -- thanks to all who reported/suggested these
-
-    v2.8.5 Sun May 22 10:26:02 2011  Doug Lea  (dl at gee)
-      * Always perform unlink checks unless INSECURE
-      * Add posix_memalign.
-      * Improve realloc to expand in more cases; expose realloc_in_place.
-        Thanks to Peter Buhr for the suggestion.
-      * Add footprint_limit, inspect_all, bulk_free. Thanks
-        to Barry Hayes and others for the suggestions.
-      * Internal refactorings to avoid calls while holding locks
-      * Use non-reentrant locks by default. Thanks to Roland McGrath
-        for the suggestion.
-      * Small fixes to mspace_destroy, reset_on_error.
-      * Various configuration extensions/changes. Thanks
-         to all who contributed these.
-
-    V2.8.4a Thu Apr 28 14:39:43 2011 (dl at gee.cs.oswego.edu)
-      * Update Creative Commons URL
-
-    V2.8.4 Wed May 27 09:56:23 2009  Doug Lea  (dl at gee)
-      * Use zeros instead of prev foot for is_mmapped
-      * Add mspace_track_large_chunks; thanks to Jean Brouwers
-      * Fix set_inuse in internal_realloc; thanks to Jean Brouwers
-      * Fix insufficient sys_alloc padding when using 16byte alignment
-      * Fix bad error check in mspace_footprint
-      * Adaptations for ptmalloc; thanks to Wolfram Gloger.
-      * Reentrant spin locks; thanks to Earl Chew and others
-      * Win32 improvements; thanks to Niall Douglas and Earl Chew
-      * Add NO_SEGMENT_TRAVERSAL and MAX_RELEASE_CHECK_RATE options
-      * Extension hook in malloc_state
-      * Various small adjustments to reduce warnings on some compilers
-      * Various configuration extensions/changes for more platforms. Thanks
-         to all who contributed these.
-
-    V2.8.3 Thu Sep 22 11:16:32 2005  Doug Lea  (dl at gee)
-      * Add max_footprint functions
-      * Ensure all appropriate literals are size_t
-      * Fix conditional compilation problem for some #define settings
-      * Avoid concatenating segments with the one provided
-        in create_mspace_with_base
-      * Rename some variables to avoid compiler shadowing warnings
-      * Use explicit lock initialization.
-      * Better handling of sbrk interference.
-      * Simplify and fix segment insertion, trimming and mspace_destroy
-      * Reinstate REALLOC_ZERO_BYTES_FREES option from 2.7.x
-      * Thanks especially to Dennis Flanagan for help on these.
-
-    V2.8.2 Sun Jun 12 16:01:10 2005  Doug Lea  (dl at gee)
-      * Fix memalign brace error.
-
-    V2.8.1 Wed Jun  8 16:11:46 2005  Doug Lea  (dl at gee)
-      * Fix improper #endif nesting in C++
-      * Add explicit casts needed for C++
-
-    V2.8.0 Mon May 30 14:09:02 2005  Doug Lea  (dl at gee)
-      * Use trees for large bins
-      * Support mspaces
-      * Use segments to unify sbrk-based and mmap-based system allocation,
-        removing need for emulation on most platforms without sbrk.
-      * Default safety checks
-      * Optional footer checks. Thanks to William Robertson for the idea.
-      * Internal code refactoring
-      * Incorporate suggestions and platform-specific changes.
-        Thanks to Dennis Flanagan, Colin Plumb, Niall Douglas,
-        Aaron Bachmann,  Emery Berger, and others.
-      * Speed up non-fastbin processing enough to remove fastbins.
-      * Remove useless cfree() to avoid conflicts with other apps.
-      * Remove internal memcpy, memset. Compilers handle builtins better.
-      * Remove some options that no one ever used and rename others.
-
-    V2.7.2 Sat Aug 17 09:07:30 2002  Doug Lea  (dl at gee)
-      * Fix malloc_state bitmap array misdeclaration
-
-    V2.7.1 Thu Jul 25 10:58:03 2002  Doug Lea  (dl at gee)
-      * Allow tuning of FIRST_SORTED_BIN_SIZE
-      * Use PTR_UINT as type for all ptr->int casts. Thanks to John Belmonte.
-      * Better detection and support for non-contiguousness of MORECORE.
-        Thanks to Andreas Mueller, Conal Walsh, and Wolfram Gloger
-      * Bypass most of malloc if no frees. Thanks To Emery Berger.
-      * Fix freeing of old top non-contiguous chunk im sysmalloc.
-      * Raised default trim and map thresholds to 256K.
-      * Fix mmap-related #defines. Thanks to Lubos Lunak.
-      * Fix copy macros; added LACKS_FCNTL_H. Thanks to Neal Walfield.
-      * Branch-free bin calculation
-      * Default trim and mmap thresholds now 256K.
-
-    V2.7.0 Sun Mar 11 14:14:06 2001  Doug Lea  (dl at gee)
-      * Introduce independent_comalloc and independent_calloc.
-        Thanks to Michael Pachos for motivation and help.
-      * Make optional .h file available
-      * Allow > 2GB requests on 32bit systems.
-      * new WIN32 sbrk, mmap, munmap, lock code from <Walter@GeNeSys-e.de>.
-        Thanks also to Andreas Mueller <a.mueller at paradatec.de>,
-        and Anonymous.
-      * Allow override of MALLOC_ALIGNMENT (Thanks to Ruud Waij for
-        helping test this.)
-      * memalign: check alignment arg
-      * realloc: don't try to shift chunks backwards, since this
-        leads to  more fragmentation in some programs and doesn't
-        seem to help in any others.
-      * Collect all cases in malloc requiring system memory into sysmalloc
-      * Use mmap as backup to sbrk
-      * Place all internal state in malloc_state
-      * Introduce fastbins (although similar to 2.5.1)
-      * Many minor tunings and cosmetic improvements
-      * Introduce USE_PUBLIC_MALLOC_WRAPPERS, USE_MALLOC_LOCK
-      * Introduce MALLOC_FAILURE_ACTION, MORECORE_CONTIGUOUS
-        Thanks to Tony E. Bennett <tbennett@nvidia.com> and others.
-      * Include errno.h to support default failure action.
-
-    V2.6.6 Sun Dec  5 07:42:19 1999  Doug Lea  (dl at gee)
-      * return null for negative arguments
-      * Added Several WIN32 cleanups from Martin C. Fong <mcfong at yahoo.com>
-         * Add 'LACKS_SYS_PARAM_H' for those systems without 'sys/param.h'
-          (e.g. WIN32 platforms)
-         * Cleanup header file inclusion for WIN32 platforms
-         * Cleanup code to avoid Microsoft Visual C++ compiler complaints
-         * Add 'USE_DL_PREFIX' to quickly allow co-existence with existing
-           memory allocation routines
-         * Set 'malloc_getpagesize' for WIN32 platforms (needs more work)
-         * Use 'assert' rather than 'ASSERT' in WIN32 code to conform to
-           usage of 'assert' in non-WIN32 code
-         * Improve WIN32 'sbrk()' emulation's 'findRegion()' routine to
-           avoid infinite loop
-      * Always call 'fREe()' rather than 'free()'
-
-    V2.6.5 Wed Jun 17 15:57:31 1998  Doug Lea  (dl at gee)
-      * Fixed ordering problem with boundary-stamping
-
-    V2.6.3 Sun May 19 08:17:58 1996  Doug Lea  (dl at gee)
-      * Added pvalloc, as recommended by H.J. Liu
-      * Added 64bit pointer support mainly from Wolfram Gloger
-      * Added anonymously donated WIN32 sbrk emulation
-      * Malloc, calloc, getpagesize: add optimizations from Raymond Nijssen
-      * malloc_extend_top: fix mask error that caused wastage after
-        foreign sbrks
-      * Add linux mremap support code from HJ Liu
-
-    V2.6.2 Tue Dec  5 06:52:55 1995  Doug Lea  (dl at gee)
-      * Integrated most documentation with the code.
-      * Add support for mmap, with help from
-        Wolfram Gloger (Gloger@lrz.uni-muenchen.de).
-      * Use last_remainder in more cases.
-      * Pack bins using idea from  colin@nyx10.cs.du.edu
-      * Use ordered bins instead of best-fit threshhold
-      * Eliminate block-local decls to simplify tracing and debugging.
-      * Support another case of realloc via move into top
-      * Fix error occuring when initial sbrk_base not word-aligned.
-      * Rely on page size for units instead of SBRK_UNIT to
-        avoid surprises about sbrk alignment conventions.
-      * Add mallinfo, mallopt. Thanks to Raymond Nijssen
-        (raymond@es.ele.tue.nl) for the suggestion.
-      * Add `pad' argument to malloc_trim and top_pad mallopt parameter.
-      * More precautions for cases where other routines call sbrk,
-        courtesy of Wolfram Gloger (Gloger@lrz.uni-muenchen.de).
-      * Added macros etc., allowing use in linux libc from
-        H.J. Lu (hjl@gnu.ai.mit.edu)
-      * Inverted this history list
-
-    V2.6.1 Sat Dec  2 14:10:57 1995  Doug Lea  (dl at gee)
-      * Re-tuned and fixed to behave more nicely with V2.6.0 changes.
-      * Removed all preallocation code since under current scheme
-        the work required to undo bad preallocations exceeds
-        the work saved in good cases for most test programs.
-      * No longer use return list or unconsolidated bins since
-        no scheme using them consistently outperforms those that don't
-        given above changes.
-      * Use best fit for very large chunks to prevent some worst-cases.
-      * Added some support for debugging
-
-    V2.6.0 Sat Nov  4 07:05:23 1995  Doug Lea  (dl at gee)
-      * Removed footers when chunks are in use. Thanks to
-        Paul Wilson (wilson@cs.texas.edu) for the suggestion.
-
-    V2.5.4 Wed Nov  1 07:54:51 1995  Doug Lea  (dl at gee)
-      * Added malloc_trim, with help from Wolfram Gloger
-        (wmglo@Dent.MED.Uni-Muenchen.DE).
-
-    V2.5.3 Tue Apr 26 10:16:01 1994  Doug Lea  (dl at g)
-
-    V2.5.2 Tue Apr  5 16:20:40 1994  Doug Lea  (dl at g)
-      * realloc: try to expand in both directions
-      * malloc: swap order of clean-bin strategy;
-      * realloc: only conditionally expand backwards
-      * Try not to scavenge used bins
-      * Use bin counts as a guide to preallocation
-      * Occasionally bin return list chunks in first scan
-      * Add a few optimizations from colin@nyx10.cs.du.edu
-
-    V2.5.1 Sat Aug 14 15:40:43 1993  Doug Lea  (dl at g)
-      * faster bin computation & slightly different binning
-      * merged all consolidations to one part of malloc proper
-         (eliminating old malloc_find_space & malloc_clean_bin)
-      * Scan 2 returns chunks (not just 1)
-      * Propagate failure in realloc if malloc returns 0
-      * Add stuff to allow compilation on non-ANSI compilers
-          from kpv@research.att.com
-
-    V2.5 Sat Aug  7 07:41:59 1993  Doug Lea  (dl at g.oswego.edu)
-      * removed potential for odd address access in prev_chunk
-      * removed dependency on getpagesize.h
-      * misc cosmetics and a bit more internal documentation
-      * anticosmetics: mangled names in macros to evade debugger strangeness
-      * tested on sparc, hp-700, dec-mips, rs6000
-          with gcc & native cc (hp, dec only) allowing
-          Detlefs & Zorn comparison study (in SIGPLAN Notices.)
-
-    Trial version Fri Aug 28 13:14:29 1992  Doug Lea  (dl at g.oswego.edu)
-      * Based loosely on libg++-1.2X malloc. (It retains some of the overall
-         structure of old version,  but most details differ.)
-
-*/
+}
diff --git a/libkram/allocate/dlmalloc.h b/libkram/allocate/dlmalloc.h
new file mode 100644
index 00000000..50f69ab2
--- /dev/null
+++ b/libkram/allocate/dlmalloc.h
@@ -0,0 +1,644 @@
+// See dlmalloc.c for license
+#pragma once
+
+#include <stddef.h> // for size_t
+
+namespace kram {
+using namespace NAMESPACE_STL;
+
+/*
+ This version of malloc supports the standard SVID/XPG mallinfo
+ routine that returns a struct containing usage properties and
+ statistics. It should work on any system that has a
+ /usr/include/malloc.h defining struct mallinfo.  The main
+ declaration needed is the mallinfo struct that is returned (by-copy)
+ by mallinfo().  The malloinfo struct contains a bunch of fields that
+ are not even meaningful in this version of malloc.  These fields are
+ are instead filled by mallinfo() with other numbers that might be of
+ interest.
+ 
+ HAVE_USR_INCLUDE_MALLOC_H should be set if you have a
+ /usr/include/malloc.h file that includes a declaration of struct
+ mallinfo.  If so, it is included; else a compliant version is
+ declared below.  These must be precisely the same for mallinfo() to
+ work.  The original SVID version of this struct, defined on most
+ systems with mallinfo, declares all fields as ints. But some others
+ define as unsigned long. If your system defines the fields using a
+ type of different width than listed here, you MUST #include your
+ system version and #define HAVE_USR_INCLUDE_MALLOC_H.
+ */
+// intended to match mallinfo in "/usr/include/malloc.h"
+struct dlmallinfo {
+    size_t arena;    /* non-mmapped space allocated from system */
+    size_t ordblks;  /* number of free chunks */
+    size_t smblks;   /* always 0 */
+    size_t hblks;    /* always 0 */
+    size_t hblkhd;   /* space in mmapped regions */
+    size_t usmblks;  /* maximum total allocated space */
+    size_t fsmblks;  /* always 0 */
+    size_t uordblks; /* total allocated space */
+    size_t fordblks; /* total free space */
+    size_t keepcost; /* releasable (via malloc_trim) space */
+};
+
+// can setup as dll, but no one does
+#define DLMALLOC_EXPORT extern
+// This is noexcept
+#define DLTHROW    throw ()
+
+/*
+ malloc(size_t n)
+ Returns a pointer to a newly allocated chunk of at least n bytes, or
+ null if no space is available, in which case errno is set to ENOMEM
+ on ANSI C systems.
+ 
+ If n is zero, malloc returns a minimum-sized chunk. (The minimum
+ size is 16 bytes on most 32bit systems, and 32 bytes on 64bit
+ systems.)  Note that size_t is an unsigned type, so calls with
+ arguments that would be negative if signed are interpreted as
+ requests for huge amounts of space, which will often fail. The
+ maximum supported value of n differs across systems, but is in all
+ cases less than the maximum representable value of a size_t.
+ */
+DLMALLOC_EXPORT void* dlmalloc(size_t) DLTHROW;
+
+/*
+ free(void* p)
+ Releases the chunk of memory pointed to by p, that had been previously
+ allocated using malloc or a related routine such as realloc.
+ It has no effect if p is null. If p was not malloced or already
+ freed, free(p) will by default cause the current program to abort.
+ */
+DLMALLOC_EXPORT void  dlfree(void*) DLTHROW;
+
+/*
+ calloc(size_t n_elements, size_t element_size);
+ Returns a pointer to n_elements * element_size bytes, with all locations
+ set to zero.
+ */
+DLMALLOC_EXPORT void* dlcalloc(size_t, size_t) DLTHROW;
+
+/*
+ realloc(void* p, size_t n)
+ Returns a pointer to a chunk of size n that contains the same data
+ as does chunk p up to the minimum of (n, p's size) bytes, or null
+ if no space is available.
+ 
+ The returned pointer may or may not be the same as p. The algorithm
+ prefers extending p in most cases when possible, otherwise it
+ employs the equivalent of a malloc-copy-free sequence.
+ 
+ If p is null, realloc is equivalent to malloc.
+ 
+ If space is not available, realloc returns null, errno is set (if on
+ ANSI) and p is NOT freed.
+ 
+ if n is for fewer bytes than already held by p, the newly unused
+ space is lopped off and freed if possible.  realloc with a size
+ argument of zero (re)allocates a minimum-sized chunk.
+ 
+ The old unix realloc convention of allowing the last-free'd chunk
+ to be used as an argument to realloc is not supported.
+ */
+DLMALLOC_EXPORT void* dlrealloc(void*, size_t) DLTHROW;
+
+/*
+ realloc_in_place(void* p, size_t n)
+ Resizes the space allocated for p to size n, only if this can be
+ done without moving p (i.e., only if there is adjacent space
+ available if n is greater than p's current allocated size, or n is
+ less than or equal to p's size). This may be used instead of plain
+ realloc if an alternative allocation strategy is needed upon failure
+ to expand space; for example, reallocation of a buffer that must be
+ memory-aligned or cleared. You can use realloc_in_place to trigger
+ these alternatives only when needed.
+ 
+ Returns p if successful; otherwise null.
+ */
+DLMALLOC_EXPORT void* dlrealloc_in_place(void*, size_t);
+
+/*
+ memalign(size_t alignment, size_t n);
+ Returns a pointer to a newly allocated chunk of n bytes, aligned
+ in accord with the alignment argument.
+ 
+ The alignment argument should be a power of two. If the argument is
+ not a power of two, the nearest greater power is used.
+ 8-byte alignment is guaranteed by normal malloc calls, so don't
+ bother calling memalign with an argument of 8 or less.
+ 
+ Overreliance on memalign is a sure way to fragment space.
+ */
+DLMALLOC_EXPORT void* dlmemalign(size_t, size_t);
+
+/*
+ int posix_memalign(void** pp, size_t alignment, size_t n);
+ Allocates a chunk of n bytes, aligned in accord with the alignment
+ argument. Differs from memalign only in that it (1) assigns the
+ allocated memory to *pp rather than returning it, (2) fails and
+ returns EINVAL if the alignment is not a power of two (3) fails and
+ returns ENOMEM if memory cannot be allocated.
+ */
+DLMALLOC_EXPORT int dlposix_memalign(void**, size_t, size_t) DLTHROW;
+
+/*
+ valloc(size_t n);
+ Equivalent to memalign(pagesize, n), where pagesize is the page
+ size of the system. If the pagesize is unknown, 4096 is used.
+ */
+DLMALLOC_EXPORT void* dlvalloc(size_t) DLTHROW;
+
+/*
+ mallopt(int parameter_number, int parameter_value)
+ Sets tunable parameters The format is to provide a
+ (parameter-number, parameter-value) pair.  mallopt then sets the
+ corresponding parameter to the argument value if it can (i.e., so
+ long as the value is meaningful), and returns 1 if successful else
+ 0.  To workaround the fact that mallopt is specified to use int,
+ not size_t parameters, the value -1 is specially treated as the
+ maximum unsigned size_t value.
+ 
+ SVID/XPG/ANSI defines four standard param numbers for mallopt,
+ normally defined in malloc.h.  None of these are use in this malloc,
+ so setting them has no effect. But this malloc also supports other
+ options in mallopt. See below for details.  Briefly, supported
+ parameters are as follows (listed defaults are for "typical"
+ configurations).
+ 
+ Symbol            param #  default    allowed param values
+ M_TRIM_THRESHOLD     -1   2*1024*1024   any   (-1 disables)
+ M_GRANULARITY        -2     page size   any power of 2 >= page size
+ M_MMAP_THRESHOLD     -3      256*1024   any   (or 0 if no MMAP support)
+ */
+DLMALLOC_EXPORT int dlmallopt(int, int);
+
+/*
+ malloc_footprint();
+ Returns the number of bytes obtained from the system.  The total
+ number of bytes allocated by malloc, realloc etc., is less than this
+ value. Unlike mallinfo, this function returns only a precomputed
+ result, so can be called frequently to monitor memory consumption.
+ Even if locks are otherwise defined, this function does not use them,
+ so results might not be up to date.
+ */
+DLMALLOC_EXPORT size_t dlmalloc_footprint(void);
+
+/*
+ malloc_max_footprint();
+ Returns the maximum number of bytes obtained from the system. This
+ value will be greater than current footprint if deallocated space
+ has been reclaimed by the system. The peak number of bytes allocated
+ by malloc, realloc etc., is less than this value. Unlike mallinfo,
+ this function returns only a precomputed result, so can be called
+ frequently to monitor memory consumption.  Even if locks are
+ otherwise defined, this function does not use them, so results might
+ not be up to date.
+ */
+DLMALLOC_EXPORT size_t dlmalloc_max_footprint(void);
+
+/*
+ malloc_footprint_limit();
+ Returns the number of bytes that the heap is allowed to obtain from
+ the system, returning the last value returned by
+ malloc_set_footprint_limit, or the maximum size_t value if
+ never set. The returned value reflects a permission. There is no
+ guarantee that this number of bytes can actually be obtained from
+ the system.
+ */
+DLMALLOC_EXPORT size_t dlmalloc_footprint_limit();
+
+/*
+ malloc_set_footprint_limit();
+ Sets the maximum number of bytes to obtain from the system, causing
+ failure returns from malloc and related functions upon attempts to
+ exceed this value. The argument value may be subject to page
+ rounding to an enforceable limit; this actual value is returned.
+ Using an argument of the maximum possible size_t effectively
+ disables checks. If the argument is less than or equal to the
+ current malloc_footprint, then all future allocations that require
+ additional system memory will fail. However, invocation cannot
+ retroactively deallocate existing used memory.
+ */
+DLMALLOC_EXPORT size_t dlmalloc_set_footprint_limit(size_t bytes);
+
+#if MALLOC_INSPECT_ALL
+/*
+ malloc_inspect_all(void(*handler)(void *start,
+ void *end,
+ size_t used_bytes,
+ void* callback_arg),
+ void* arg);
+ Traverses the heap and calls the given handler for each managed
+ region, skipping all bytes that are (or may be) used for bookkeeping
+ purposes.  Traversal does not include include chunks that have been
+ directly memory mapped. Each reported region begins at the start
+ address, and continues up to but not including the end address.  The
+ first used_bytes of the region contain allocated data. If
+ used_bytes is zero, the region is unallocated. The handler is
+ invoked with the given callback argument. If locks are defined, they
+ are held during the entire traversal. It is a bad idea to invoke
+ other malloc functions from within the handler.
+ 
+ For example, to count the number of in-use chunks with size greater
+ than 1000, you could write:
+ static int count = 0;
+ void count_chunks(void* start, void* end, size_t used, void* arg) {
+ if (used >= 1000) ++count;
+ }
+ then:
+ malloc_inspect_all(count_chunks, NULL);
+ 
+ malloc_inspect_all is compiled only if MALLOC_INSPECT_ALL is defined.
+ */
+DLMALLOC_EXPORT void dlmalloc_inspect_all(void(*handler)(void*, void *, size_t, void*),
+                                          void* arg);
+
+#endif /* MALLOC_INSPECT_ALL */
+
+/*
+ mallinfo()
+ Returns (by copy) a struct containing various summary statistics:
+ 
+ arena:     current total non-mmapped bytes allocated from system
+ ordblks:   the number of free chunks
+ smblks:    always zero.
+ hblks:     current number of mmapped regions
+ hblkhd:    total bytes held in mmapped regions
+ usmblks:   the maximum total allocated space. This will be greater
+ than current total if trimming has occurred.
+ fsmblks:   always zero
+ uordblks:  current total allocated space (normal or mmapped)
+ fordblks:  total free space
+ keepcost:  the maximum number of bytes that could ideally be released
+ back to system via malloc_trim. ("ideally" means that
+ it ignores page restrictions etc.)
+ 
+ Because these fields are ints, but internal bookkeeping may
+ be kept as longs, the reported values may wrap around zero and
+ thus be inaccurate.
+ */
+DLMALLOC_EXPORT dlmallinfo dlmalloc_info(void);
+
+/*
+ independent_calloc(size_t n_elements, size_t element_size, void* chunks[]);
+ 
+ independent_calloc is similar to calloc, but instead of returning a
+ single cleared space, it returns an array of pointers to n_elements
+ independent elements that can hold contents of size elem_size, each
+ of which starts out cleared, and can be independently freed,
+ realloc'ed etc. The elements are guaranteed to be adjacently
+ allocated (this is not guaranteed to occur with multiple callocs or
+ mallocs), which may also improve cache locality in some
+ applications.
+ 
+ The "chunks" argument is optional (i.e., may be null, which is
+ probably the most typical usage). If it is null, the returned array
+ is itself dynamically allocated and should also be freed when it is
+ no longer needed. Otherwise, the chunks array must be of at least
+ n_elements in length. It is filled in with the pointers to the
+ chunks.
+ 
+ In either case, independent_calloc returns this pointer array, or
+ null if the allocation failed.  If n_elements is zero and "chunks"
+ is null, it returns a chunk representing an array with zero elements
+ (which should be freed if not wanted).
+ 
+ Each element must be freed when it is no longer needed. This can be
+ done all at once using bulk_free.
+ 
+ independent_calloc simplifies and speeds up implementations of many
+ kinds of pools.  It may also be useful when constructing large data
+ structures that initially have a fixed number of fixed-sized nodes,
+ but the number is not known at compile time, and some of the nodes
+ may later need to be freed. For example:
+ 
+ struct Node { int item; struct Node* next; };
+ 
+ struct Node* build_list() {
+ struct Node** pool;
+ int n = read_number_of_nodes_needed();
+ if (n <= 0) return 0;
+ pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0);
+ if (pool == 0) die();
+ // organize into a linked list...
+ struct Node* first = pool[0];
+ for (i = 0; i < n-1; ++i)
+ pool[i]->next = pool[i+1];
+ free(pool);     // Can now free the array (or not, if it is needed later)
+ return first;
+ }
+ */
+DLMALLOC_EXPORT void** dlindependent_calloc(size_t, size_t, void**);
+
+/*
+ independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]);
+ 
+ independent_comalloc allocates, all at once, a set of n_elements
+ chunks with sizes indicated in the "sizes" array.    It returns
+ an array of pointers to these elements, each of which can be
+ independently freed, realloc'ed etc. The elements are guaranteed to
+ be adjacently allocated (this is not guaranteed to occur with
+ multiple callocs or mallocs), which may also improve cache locality
+ in some applications.
+ 
+ The "chunks" argument is optional (i.e., may be null). If it is null
+ the returned array is itself dynamically allocated and should also
+ be freed when it is no longer needed. Otherwise, the chunks array
+ must be of at least n_elements in length. It is filled in with the
+ pointers to the chunks.
+ 
+ In either case, independent_comalloc returns this pointer array, or
+ null if the allocation failed.  If n_elements is zero and chunks is
+ null, it returns a chunk representing an array with zero elements
+ (which should be freed if not wanted).
+ 
+ Each element must be freed when it is no longer needed. This can be
+ done all at once using bulk_free.
+ 
+ independent_comallac differs from independent_calloc in that each
+ element may have a different size, and also that it does not
+ automatically clear elements.
+ 
+ independent_comalloc can be used to speed up allocation in cases
+ where several structs or objects must always be allocated at the
+ same time.  For example:
+ 
+ struct Head { ... }
+ struct Foot { ... }
+ 
+ void send_message(char* msg) {
+ int msglen = strlen(msg);
+ size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) };
+ void* chunks[3];
+ if (independent_comalloc(3, sizes, chunks) == 0)
+ die();
+ struct Head* head = (struct Head*)(chunks[0]);
+ char*        body = (char*)(chunks[1]);
+ struct Foot* foot = (struct Foot*)(chunks[2]);
+ // ...
+ }
+ 
+ In general though, independent_comalloc is worth using only for
+ larger values of n_elements. For small values, you probably won't
+ detect enough difference from series of malloc calls to bother.
+ 
+ Overuse of independent_comalloc can increase overall memory usage,
+ since it cannot reuse existing noncontiguous small chunks that
+ might be available for some of the elements.
+ */
+DLMALLOC_EXPORT void** dlindependent_comalloc(size_t, size_t*, void**);
+
+/*
+ bulk_free(void* array[], size_t n_elements)
+ Frees and clears (sets to null) each non-null pointer in the given
+ array.  This is likely to be faster than freeing them one-by-one.
+ If footers are used, pointers that have been allocated in different
+ mspaces are not freed or cleared, and the count of all such pointers
+ is returned.  For large arrays of pointers with poor locality, it
+ may be worthwhile to sort this array before calling bulk_free.
+ */
+DLMALLOC_EXPORT size_t  dlbulk_free(void**, size_t n_elements);
+
+/*
+ pvalloc(size_t n);
+ Equivalent to valloc(minimum-page-that-holds(n)), that is,
+ round up n to nearest pagesize.
+ */
+DLMALLOC_EXPORT void*  dlpvalloc(size_t);
+
+/*
+ malloc_trim(size_t pad);
+ 
+ If possible, gives memory back to the system (via negative arguments
+ to sbrk) if there is unused memory at the `high' end of the malloc
+ pool or in unused MMAP segments. You can call this after freeing
+ large blocks of memory to potentially reduce the system-level memory
+ requirements of a program. However, it cannot guarantee to reduce
+ memory. Under some allocation patterns, some large free blocks of
+ memory will be locked between two used chunks, so they cannot be
+ given back to the system.
+ 
+ The `pad' argument to malloc_trim represents the amount of free
+ trailing space to leave untrimmed. If this argument is zero, only
+ the minimum amount of memory to maintain internal data structures
+ will be left. Non-zero arguments can be supplied to maintain enough
+ trailing space to service future expected allocations without having
+ to re-obtain memory from the system.
+ 
+ Malloc_trim returns 1 if it actually released any memory, else 0.
+ */
+DLMALLOC_EXPORT int  dlmalloc_trim(size_t);
+
+/*
+ malloc_stats();
+ Prints on stderr the amount of space obtained from the system (both
+ via sbrk and mmap), the maximum amount (which may be more than
+ current if malloc_trim and/or munmap got called), and the current
+ number of bytes allocated via malloc (or realloc, etc) but not yet
+ freed. Note that this is the number of bytes allocated, not the
+ number requested. It will be larger than the number requested
+ because of alignment and bookkeeping overhead. Because it includes
+ alignment wastage as being in use, this figure may be greater than
+ zero even when no user-level chunks are allocated.
+ 
+ The reported current and maximum system memory can be inaccurate if
+ a program makes other calls to system memory allocation functions
+ (normally sbrk) outside of malloc.
+ 
+ malloc_stats prints only the most commonly interesting statistics.
+ More information can be obtained by calling mallinfo.
+ */
+DLMALLOC_EXPORT void  dlmalloc_stats(void);
+
+/*
+ malloc_usable_size(void* p);
+ 
+ Returns the number of bytes you can actually use in
+ an allocated chunk, which may be more than you requested (although
+ often not) due to alignment and minimum size constraints.
+ You can use this many bytes without worrying about
+ overwriting other allocated objects. This is not a particularly great
+ programming practice. malloc_usable_size can be more useful in
+ debugging and assertions, for example:
+ 
+ p = malloc(n);
+ assert(malloc_usable_size(p) >= 256);
+ */
+size_t dlmalloc_usable_size(void*);
+
+//#endif /* ONLY_MSPACES */
+
+// This is only supported if mmap is too
+//#if MSPACES
+
+/*
+ mspace is an opaque type representing an independent
+ region of space that supports mspace_malloc, etc.
+ */
+//typedef void* mspace;
+class mspace;
+
+/*
+ create_mspace creates and returns a new independent space with the
+ given initial capacity, or, if 0, the default granularity size.  It
+ returns null if there is no system memory available to create the
+ space.  If argument locked is non-zero, the space uses a separate
+ lock to control access. The capacity of the space will grow
+ dynamically as needed to service mspace_malloc requests.  You can
+ control the sizes of incremental increases of this space by
+ compiling with a different DEFAULT_GRANULARITY or dynamically
+ setting with mallopt(M_GRANULARITY, value).
+ */
+//DLMALLOC_EXPORT mspace create_mspace(size_t capacity, int locked);
+
+/*
+ destroy_mspace destroys the given space, and attempts to return all
+ of its memory back to the system, returning the total number of
+ bytes freed. After destruction, the results of access to all memory
+ used by the space become undefined.
+ */
+//DLMALLOC_EXPORT size_t destroy_mspace(mspace msp);
+
+/*
+ create_mspace_with_base uses the memory supplied as the initial base
+ of a new mspace. Part (less than 128*sizeof(size_t) bytes) of this
+ space is used for bookkeeping, so the capacity must be at least this
+ large. (Otherwise 0 is returned.) When this initial space is
+ exhausted, additional memory will be obtained from the system.
+ Destroying this space will deallocate all additionally allocated
+ space (if possible) but not the initial base.
+ */
+//DLMALLOC_EXPORT mspace create_mspace_with_base(void* base, size_t capacity, int locked);
+
+using mstate = class malloc_state*;
+
+class mspace {
+public:
+    // see comments above
+    bool create(size_t capacity, int locked);
+    bool create_with_base(void* base, size_t capacity, int locked);
+    size_t destroy();
+
+    /*
+     mspace_track_large_chunks controls whether requests for large chunks
+     are allocated in their own untracked mmapped regions, separate from
+     others in this mspace. By default large chunks are not tracked,
+     which reduces fragmentation. However, such chunks are not
+     necessarily released to the system upon destroy_mspace.  Enabling
+     tracking by setting to true may increase fragmentation, but avoids
+     leakage when relying on destroy_mspace to release all memory
+     allocated using this space.  The function returns the previous
+     setting.
+     */
+    int track_large_chunks(int enable);
+    
+    
+    /*
+     mspace_malloc behaves as malloc, but operates within
+     the given space.
+     */
+    void* _malloc(size_t bytes);
+    
+    /*
+     mspace_free behaves as free, but operates within
+     the given space.
+     
+     If compiled with FOOTERS==1, mspace_free is not actually needed.
+     free may be called instead of mspace_free because freed chunks from
+     any space are handled by their originating spaces.
+     */
+    void _free(void* mem);
+    
+    /*
+     mspace_realloc behaves as realloc, but operates within
+     the given space.
+     
+     If compiled with FOOTERS==1, mspace_realloc is not actually
+     needed.  realloc may be called instead of mspace_realloc because
+     realloced chunks from any space are handled by their originating
+     spaces.
+     */
+    void* realloc(void* mem, size_t newsize);
+    
+    /*
+     mspace_calloc behaves as calloc, but operates within
+     the given space.
+     */
+    void* calloc(size_t n_elements, size_t elem_size);
+    
+    /*
+     mspace_memalign behaves as memalign, but operates within
+     the given space.
+     */
+    void* memalign(size_t alignment, size_t bytes);
+    
+    /*
+     mspace_independent_calloc behaves as independent_calloc, but
+     operates within the given space.
+     */
+    void** independent_calloc(size_t n_elements,
+                              size_t elem_size, void* chunks[]);
+    
+    /*
+     mspace_independent_comalloc behaves as independent_comalloc, but
+     operates within the given space.
+     */
+    void** independent_comalloc(size_t n_elements,
+                                size_t sizes[], void* chunks[]);
+    
+    /*
+     mspace_footprint() returns the number of bytes obtained from the
+     system for this space.
+     */
+    size_t footprint();
+    
+    /*
+     mspace_max_footprint() returns the peak number of bytes obtained from the
+     system for this space.
+     */
+    size_t max_footprint();
+    
+    
+    /*
+     mspace_mallinfo behaves as mallinfo, but reports properties of
+     the given space.
+     */
+    dlmallinfo mallinfo();
+    
+    /*
+     malloc_usable_size(void* p) behaves the same as malloc_usable_size;
+     */
+    size_t usable_size(const void* mem);
+    
+    /*
+     mspace_malloc_stats behaves as malloc_stats, but reports
+     properties of the given space.
+     */
+    void malloc_stats();
+    
+    /*
+     mspace_trim behaves as malloc_trim, but
+     operates within the given space.
+     */
+    int trim(size_t pad);
+    
+    /*
+     An alias for mallopt.
+     */
+    int mallopt(int, int);
+    
+private:
+    void* realloc_in_place(void* oldmem, size_t bytes);
+    size_t bulk_free(void* array[], size_t nelem);
+    size_t footprint_limit();
+    size_t set_footprint_limit(size_t bytes);
+
+    unique_ptr<malloc_state> ms;
+};
+
+//#endif /* MSPACES */
+
+#undef DLMALLOC_EXPORT
+#undef DLTHROW
+
+}

From c3da5e9d241060d01509dcb65de4e94b7f16d823 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 15 Oct 2023 22:27:35 -0700
Subject: [PATCH 551/901] kram - add dlmalloc to project

---
 build2/kram.xcodeproj/project.pbxproj | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index bc638ea4..b6dab7a6 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -414,6 +414,10 @@
 		70D222EC2ADAF25E00B9EA23 /* simdjson.h in Headers */ = {isa = PBXBuildFile; fileRef = 70D222E92ADAF25E00B9EA23 /* simdjson.h */; };
 		70D222ED2ADAF25E00B9EA23 /* simdjson.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70D222EA2ADAF25E00B9EA23 /* simdjson.cpp */; };
 		70D222EE2ADAF25E00B9EA23 /* simdjson.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70D222EA2ADAF25E00B9EA23 /* simdjson.cpp */; };
+		70D222F52ADAF78300B9EA23 /* dlmalloc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70D222F42ADAF78300B9EA23 /* dlmalloc.cpp */; };
+		70D222F62ADAF78300B9EA23 /* dlmalloc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70D222F42ADAF78300B9EA23 /* dlmalloc.cpp */; };
+		70D222F82ADAFA1500B9EA23 /* dlmalloc.h in Headers */ = {isa = PBXBuildFile; fileRef = 70D222F72ADAFA1500B9EA23 /* dlmalloc.h */; };
+		70D222F92ADAFA1500B9EA23 /* dlmalloc.h in Headers */ = {isa = PBXBuildFile; fileRef = 70D222F72ADAFA1500B9EA23 /* dlmalloc.h */; };
 /* End PBXBuildFile section */
 
 /* Begin PBXFileReference section */
@@ -767,6 +771,8 @@
 		70D222E32AD22BED00B9EA23 /* BlockedLinearAllocator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = BlockedLinearAllocator.h; sourceTree = "<group>"; };
 		70D222E92ADAF25E00B9EA23 /* simdjson.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = simdjson.h; sourceTree = "<group>"; };
 		70D222EA2ADAF25E00B9EA23 /* simdjson.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = simdjson.cpp; sourceTree = "<group>"; };
+		70D222F42ADAF78300B9EA23 /* dlmalloc.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = dlmalloc.cpp; sourceTree = "<group>"; };
+		70D222F72ADAFA1500B9EA23 /* dlmalloc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = dlmalloc.h; sourceTree = "<group>"; };
 /* End PBXFileReference section */
 
 /* Begin PBXFrameworksBuildPhase section */
@@ -825,6 +831,7 @@
 		706EEDA826D1583E001C950E /* libkram */ = {
 			isa = PBXGroup;
 			children = (
+				70D222F32ADAF78300B9EA23 /* allocate */,
 				708A6A882708CE4700BA5410 /* compressonator */,
 				706EFC3E26D3473F001C950E /* eastl */,
 				704738AF289F6AEE00C77A9F /* fastl */,
@@ -1386,6 +1393,15 @@
 			path = simdjson;
 			sourceTree = "<group>";
 		};
+		70D222F32ADAF78300B9EA23 /* allocate */ = {
+			isa = PBXGroup;
+			children = (
+				70D222F72ADAFA1500B9EA23 /* dlmalloc.h */,
+				70D222F42ADAF78300B9EA23 /* dlmalloc.cpp */,
+			);
+			path = allocate;
+			sourceTree = "<group>";
+		};
 /* End PBXGroup section */
 
 /* Begin PBXHeadersBuildPhase section */
@@ -1503,6 +1519,7 @@
 				709B8D2D28D7BCAD0081BD1F /* ostream.h in Headers */,
 				706EF01B26D15985001C950E /* lodepng.h in Headers */,
 				709B8D4928D7BCAD0081BD1F /* format.h in Headers */,
+				70D222F82ADAFA1500B9EA23 /* dlmalloc.h in Headers */,
 				706EF01C26D15985001C950E /* tmpfileplus.h in Headers */,
 				709B8D3328D7BCAD0081BD1F /* xchar.h in Headers */,
 			);
@@ -1622,6 +1639,7 @@
 				709B8D2E28D7BCAD0081BD1F /* ostream.h in Headers */,
 				706EF19526D166C5001C950E /* lodepng.h in Headers */,
 				709B8D4A28D7BCAD0081BD1F /* format.h in Headers */,
+				70D222F92ADAFA1500B9EA23 /* dlmalloc.h in Headers */,
 				706EF19626D166C5001C950E /* tmpfileplus.h in Headers */,
 				709B8D3428D7BCAD0081BD1F /* xchar.h in Headers */,
 			);
@@ -1704,6 +1722,7 @@
 			buildActionMask = 2147483647;
 			files = (
 				70871DD727DDDBCD00D0B9E1 /* astcenc_quantization.cpp in Sources */,
+				70D222F52ADAF78300B9EA23 /* dlmalloc.cpp in Sources */,
 				707789E52881BA81008A51BC /* ert.cpp in Sources */,
 				70871E0327DDDBCD00D0B9E1 /* astcenc_color_unquantize.cpp in Sources */,
 				70871DD127DDDBCD00D0B9E1 /* astcenc_averages_and_directions.cpp in Sources */,
@@ -1885,6 +1904,7 @@
 				706EF1D626D166C5001C950E /* maths.cpp in Sources */,
 				706EF1D726D166C5001C950E /* singlecolourfit.cpp in Sources */,
 				706EF1D826D166C5001C950E /* zstd.cpp in Sources */,
+				70D222F62ADAF78300B9EA23 /* dlmalloc.cpp in Sources */,
 				706EF1D926D166C5001C950E /* zstddeclib.cpp in Sources */,
 				706EF1DA26D166C5001C950E /* lodepng.cpp in Sources */,
 				707789E22881BA81008A51BC /* utils.cpp in Sources */,

From cf071f73e491fb4f7abe38b58fffc3f9d46ca126 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 5 Nov 2023 11:17:44 -0800
Subject: [PATCH 552/901] kram - add in vma visualizer dump, and added Perfetto
 python script

This is untested.  But include sample from AMD.
---
 scripts/GpuMemDump.schema.json | 163 +++++++++++++
 scripts/GpuMemDumpPerfetto.py  | 322 +++++++++++++++++++++++++
 scripts/GpuMemDumpSample.json  | 426 +++++++++++++++++++++++++++++++++
 scripts/GpuMemDumpVis.py       | 334 ++++++++++++++++++++++++++
 4 files changed, 1245 insertions(+)
 create mode 100644 scripts/GpuMemDump.schema.json
 create mode 100644 scripts/GpuMemDumpPerfetto.py
 create mode 100644 scripts/GpuMemDumpSample.json
 create mode 100644 scripts/GpuMemDumpVis.py

diff --git a/scripts/GpuMemDump.schema.json b/scripts/GpuMemDump.schema.json
new file mode 100644
index 00000000..38a752e2
--- /dev/null
+++ b/scripts/GpuMemDump.schema.json
@@ -0,0 +1,163 @@
+{
+    "$id": "https://gpuopen.com/vulkan-memory-allocator/schemas/GpuMemDump",
+    "$schema": "https://json-schema.org/draft/2020-12/schema",
+    "type": "object",
+    "properties": {
+        "General": {
+            "type": "object",
+            "properties": {
+                "API": {"type": "string", "enum": ["Vulkan", "Direct3D 12"]},
+                "GPU": {"type": "string"}
+            },
+            "required": ["API", "GPU"]
+        },
+        "Total": {"$ref": "#/$defs/Stats"},
+        "MemoryInfo": {
+            "type": "object",
+            "additionalProperties": {
+                "type": "object",
+                "properties": {
+                    "Flags": {
+                        "type": "array",
+                        "items": {"type": "string"}
+                    },
+                    "Size": {"type": "integer"},
+                    "Budget": {
+                        "type": "object",
+                        "properties": {
+                            "BudgetBytes": {"type": "integer"},
+                            "UsageBytes": {"type": "integer"}
+                        },
+                        "additionalProperties": false
+                    },
+                    "Stats": {"$ref": "#/$defs/Stats"},
+                    "MemoryPools": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "type": "object",
+                            "properties": {
+                                "Flags": {
+                                    "type": "array",
+                                    "items": {"type": "string"}
+                                },
+                                "Stats": {"$ref": "#/$defs/Stats"}
+                            },
+                            "additionalProperties": false
+                        }
+                    }
+                },
+                "required": ["Budget", "Stats"],
+                "additionalProperties": false
+            }
+        },
+        "DefaultPools": {
+            "type": "object",
+            "additionalProperties": {
+                "type": "object",
+                "properties": {
+                    "PreferredBlockSize": {"type": "integer"},
+                    "Blocks": {
+                        "type": "object",
+                        "propertyNames": {"pattern": "[0-9]+"},
+                        "additionalProperties": {"$ref": "#/$defs/Block"}
+                    },
+                    "DedicatedAllocations": {
+                        "type": "array",
+                        "items": {"$ref": "#/$defs/DedicatedAllocation"}
+                    }
+                }
+            }
+        },
+        "CustomPools": {
+            "type": "object",
+            "additionalProperties": {
+                "type": "array",
+                "items": {
+                    "type": "object",
+                    "properties": {
+                        "Name": {"type": "string"},
+                        "Flags": {"type": "array"},
+                        "PreferredBlockSize": {"type": "integer"},
+                        "Blocks": {
+                            "type": "object",
+                            "additionalProperties": {"$ref": "#/$defs/Block"}
+                        },
+                        "DedicatedAllocations": {
+                            "type": "array",
+                            "items": {"$ref": "#/$defs/DedicatedAllocation"}
+                        }
+                    },
+                    "required": ["PreferredBlockSize"],
+                    "additionalProperties": false
+                }
+            }
+        }
+    },
+    "required": ["General", "Total", "MemoryInfo"],
+    "additionalProperties": false,
+    "$defs": {
+        "CustomData": {
+            "type": "string",
+            "pattern": "^[0-9a-zA-Z]+$"
+        },
+        "Stats": {
+            "type": "object",
+            "properties": {
+                "BlockCount": {"type": "integer"},
+                "BlockBytes": {"type": "integer"},
+                "AllocationCount": {"type": "integer"},
+                "AllocationBytes": {"type": "integer"},
+                "UnusedRangeCount": {"type": "integer"},
+                "AllocationSizeMin": {"type": "integer"},
+                "AllocationSizeMax": {"type": "integer"},
+                "UnusedRangeSizeMin": {"type": "integer"},
+                "UnusedRangeSizeMax": {"type": "integer"}
+            },
+            "required": [
+                "BlockCount", "BlockBytes",
+                "AllocationCount", "AllocationBytes",
+                "UnusedRangeCount"
+            ],
+            "additionalProperties": false
+        },
+        "Block": {
+            "type": "object",
+            "properties": {
+                "MapRefCount": {"type": "integer"},
+                "TotalBytes": {"type": "integer"},
+                "UnusedBytes": {"type": "integer"},
+                "Allocations": {"type": "integer"},
+                "UnusedRanges": {"type": "integer"},
+                "Suballocations": {"type": "array", "items": {"$ref": "#/$defs/Suballocation"}}
+            },
+            "required": ["TotalBytes", "UnusedBytes", "Allocations", "UnusedRanges"]
+        },
+        "DedicatedAllocation": {
+            "type": "object",
+            "properties": {
+                "Type": {"type": "string"},
+                "Size": {"type": "integer"},
+                "Usage": {"type": "integer"},
+                "CustomData": {"$ref": "#/$defs/CustomData"},
+                "Name": {"type": "string"},
+                "Layout": {"type": "integer"}
+            },
+            "required": ["Type", "Size"],
+            "additionalProperties": false
+        },
+        "Suballocation": {
+            "type": "object",
+            "properties": {
+                "Offset": {"type": "integer"},
+                "Type": {"type": "string"},
+                "Size": {"type": "integer"},
+                "Usage": {"type": "integer"},
+                "CustomData": {"$ref": "#/$defs/CustomData"},
+                "Name": {"type": "string"},
+                "Layout": {"type": "integer"}
+            },
+            "required": ["Offset", "Type", "Size"],
+            "additionalProperties": false
+        }
+    }
+}
diff --git a/scripts/GpuMemDumpPerfetto.py b/scripts/GpuMemDumpPerfetto.py
new file mode 100644
index 00000000..b467c8e5
--- /dev/null
+++ b/scripts/GpuMemDumpPerfetto.py
@@ -0,0 +1,322 @@
+# kram - Copyright 2020-2023 by Alec Miller. - MIT License
+# The license and copyright notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# This is derived from GpuMemDumpVis.py, and like it, doesn't handle aliasing.
+# Only reads size and not offset of allocations.  But Perfetto can't handle overlapping rects.
+# Unlike the png, Pefetto can zoom in and display the allocation names and sizes.
+
+#
+# Copyright (c) 2018-2023 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+
+import argparse
+import json
+
+PROGRAM_VERSION = 'Vulkan/D3D12 Memory Allocator Dump Perfetto 3.0.3'
+
+# dx12 or vulkan
+currentApi = ""
+
+# input data dictionary
+data = {}
+
+ # now convert the dictonaries to new dictionaries, and then out to json 
+perfettoDict = {
+    'displayTimeUnit': 'ms', # TODO: /1000, not quite kb
+    'systemTraceEvents': 'systemTraceEvents',
+    'traceEvents': [],
+}
+
+def ParseArgs():
+    argParser = argparse.ArgumentParser(description='Visualization of Vulkan/D3D12 Memory Allocator JSON dump in Perfetto.')
+    argParser.add_argument('DumpFile', help='Path to source JSON file with memory dump created by Vulkan/D3D12 Memory Allocator library')
+    argParser.add_argument('-v', '--version', action='version', version=PROGRAM_VERSION)
+    # TODO: derive output from input name if not present
+    argParser.add_argument('-o', '--output', required=True, help='Path to destination trace file')
+    return argParser.parse_args()
+
+def GetDataForMemoryPool(poolTypeName):
+    global data
+    if poolTypeName in data:
+        return data[poolTypeName]
+    else:
+        newPoolData = {'DedicatedAllocations':[], 
+                       'Blocks':[], 
+                       'CustomPools':{}}
+        data[poolTypeName] = newPoolData
+        return newPoolData
+
+def ProcessBlock(poolData, block):
+    blockInfo = {'ID': block[0], 
+                 'Size': int(block[1]['TotalBytes']), 
+                 'Suballocations':[]}
+    for alloc in block[1]['Suballocations']:
+        allocData = {'Type': alloc['Type'], 
+                     'Size': int(alloc['Size']), 
+                     'Usage': int(alloc['Usage']) if 'Usage' in alloc else 0 }
+        blockInfo['Suballocations'].append(allocData)
+    poolData['Blocks'].append(blockInfo)
+    
+def IsDataEmpty():
+    global data
+    for poolData in data.values():
+        if len(poolData['DedicatedAllocations']) > 0:
+            return False
+        if len(poolData['Blocks']) > 0:
+            return False
+        for customPool in poolData['CustomPools'].values():
+            if len(customPool['Blocks']) > 0:
+                return False
+            if len(customPool['DedicatedAllocations']) > 0:
+                return False
+    return True
+
+def RemoveEmptyType():
+    global data
+    for poolType in list(data.keys()):
+        pool = data[poolType]
+        if len(pool['DedicatedAllocations']) > 0:
+           continue
+        if len(pool['Blocks']) > 0:
+            continue
+        empty = True
+        for customPool in pool['CustomPools'].values():
+            if len(customPool['Blocks']) > 0:
+                empty = False
+                break
+            if len(customPool['DedicatedAllocations']) > 0:
+                empty = False
+                break
+        if empty:
+            del data[poolType]
+
+def AllocTypeToCategory(type, usage):
+    global currentApi
+    if type == 'FREE':
+        return ""
+    elif type == 'UNKNOWN':
+        return "?"
+
+    if currentApi == 'Vulkan':
+        if type == 'BUFFER':
+            if (usage & 0x1C0) != 0: # INDIRECT_BUFFER | VERTEX_BUFFER | INDEX_BUFFER
+                isVB = True # TODO: split up
+                return ifelse(isVB, "VB", "IB") 
+            elif (usage & 0x28) != 0: # STORAGE_BUFFER | STORAGE_TEXEL_BUFFER
+                return "SB"
+            elif (usage & 0x14) != 0: # UNIFORM_BUFFER | UNIFORM_TEXEL_BUFFER
+                return "UB"
+            else:
+                return "?B"
+        elif type == 'IMAGE_OPTIMAL':
+            # TODO: need tex type (2d, 3d, ...)
+            if (usage & 0x20) != 0: # DEPTH_STENCIL_ATTACHMENT
+                return "DT"
+            elif (usage & 0xD0) != 0: # INPUT_ATTACHMENT | TRANSIENT_ATTACHMENT | COLOR_ATTACHMENT
+                return "RT"
+            elif (usage & 0x4) != 0: # SAMPLED
+                return "TT"
+            else:
+                return "?T"
+        elif type == 'IMAGE_LINEAR' :
+            return "LT"
+        elif type == 'IMAGE_UNKNOWN':
+            return "?T"
+    elif currentApi == 'Direct3D 12':
+        if type == 'BUFFER':
+            return "?B"
+        elif type == 'TEXTURE1D' or type == 'TEXTURE2D' or type == 'TEXTURE3D':
+            if (usage & 0x2) != 0: # D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL
+                return "DT"
+            elif (usage & 0x5) != 0: # D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS
+                return "RT"
+            elif (usage & 0x8) == 0: # Not having D3D12_RESOURCE_FLAG_DENY_SHARED_RESOURCE
+                return "TT"
+            else:
+                return "?T"
+    else:
+        print("Unknown graphics API!")
+        exit(1)
+    assert False
+    return "??"
+
+def AddTraceEventsAlloc(alloc, address, blockCounter):
+    global perfettoDict
+
+    size = alloc['Size']
+    type = AllocTypeToCategory(alloc['Type'], alloc['Usage'])
+
+    # begin/end events for Perfetto, address is treated as a time value for plotting rects
+    perfettoDict['traceEvents'].add({
+        'name': alloc['Name'], # TODO: conditionally add only if name present
+        'ph': 'B',
+        'ts': int(address),
+        'tid': int(blockCounter),
+        'cat': type
+    })
+    perfettoDict['traceEvents'].add(
+    {
+        'ph': 'E',
+        'ts': int(address+size),
+        'tid': int(blockCounter),
+        'cat': type # TODO: needed if begin already has it spec'd?
+    })
+
+def AddTraceEventsBlock(block, blockCounter):
+    global perfettoDict
+    address = 0
+    for alloc in block['Suballocations']:
+        AddTraceEventsAlloc(alloc, address, blockCounter)
+        address += alloc['Size']
+
+def AddBlockName(blockCounter, blockName):
+    global perfettoDict
+
+    # TODO: are these just loose added into the object ?
+    perfettoDict.add({
+        'name': 'thread_name',
+        'ph': 'M',
+        'tid': int(blockCounter),
+        'args': {
+            'name': blockName
+        }
+    })
+
+def AddTraceEvents():
+    global perfettoDict
+    blockCounter = 0
+    poolIndex = 0
+    for poolData in data.values():
+
+        # report for default pool
+
+        # block allocs
+        blockIndex = 0
+        for block in poolData['Blocks']:
+            blockName = "p{} block{} {}".format(poolIndex, blockIndex, block['ID'])
+            AddTraceEventsBlock(block, blockCounter)
+            AddBlockName(blockName, blockCounter)
+            blockCounter += 1
+            blockIndex += 1
+
+        # dedicated allocs
+        allocationIndex = 0
+        for dedicatedAlloc in poolData['DedicatedAllocations']:
+            blockName = 'p{} alloc{} {}'.format(poolIndex, allocationIndex, dedicatedAlloc['Name'])
+            AddTraceEventsAlloc(dedicatedAlloc, 0, blockCounter)
+            AddBlockName(blockName, blockCounter)
+            blockCounter += 1
+            allocationIndex += 1
+            
+        # repeat for custom pools
+        for customPoolData in poolData['CustomPools'].values():
+            customPoolName = customPoolData['Name']
+
+            # pool block allocs
+            blockIndex = 0
+            for block in customPoolData['Blocks']:
+                blockName = 'p{} {} block{} {}'.format(poolIndex, customPoolName, blockIndex, block['ID'])
+                AddTraceEventsBlock(block, blockCounter)
+                AddBlockName(blockName, blockCounter)
+                blockCounter += 1
+                blockIndex += 1
+
+            # pool dedicated allocs
+            allocationIndex = 0
+            for dedicatedAlloc in customPoolData['DedicatedAllocations']:
+                blockName = 'p{} {} alloc{} {}'.format(poolIndex, customPoolName, dedicatedAlloc['Name'])
+                AddTraceEventsAlloc(dedicatedAlloc, 0, blockCounter)
+                AddBlockName(blockName, blockCounter)
+                blockCounter += 1
+                allocationIndex += 1
+                
+        poolIndex += 1
+
+if __name__ == '__main__':
+    args = ParseArgs()
+    jsonSrc = json.load(open(args.DumpFile, 'rb'))
+ 
+    if 'General' in jsonSrc:
+        currentApi = jsonSrc['General']['API']
+    else:
+        print("Wrong JSON format, cannot determine graphics API!")
+        exit(1)
+            
+    # Process default pools
+    if 'DefaultPools' in jsonSrc:
+        for memoryPool in jsonSrc['DefaultPools'].items():
+            poolData = GetDataForMemoryPool(memoryPool[0])
+            # Get dedicated allocations
+            for dedicatedAlloc in memoryPool[1]['DedicatedAllocations']:
+                allocData = {'Type': dedicatedAlloc['Type'], 
+                             'Size': int(dedicatedAlloc['Size']), 
+                             'Usage': int(dedicatedAlloc['Usage'])}
+                poolData['DedicatedAllocations'].append(allocData)
+            # Get allocations in block vectors
+            for block in memoryPool[1]['Blocks'].items():
+                ProcessBlock(poolData, block)
+             
+    # Process custom pools
+    if 'CustomPools' in jsonSrc:
+        for memoryPool in jsonSrc['CustomPools'].items():
+            poolData = GetDataForMemoryPool(memoryPool[0])
+            for pool in memoryPool[1]:
+                poolName = pool['Name']
+                poolData['CustomPools'][poolName] = {'DedicatedAllocations':[], 'Blocks':[]}
+                # Get dedicated allocations
+                for dedicatedAlloc in pool['DedicatedAllocations']:
+                    allocData = {'Type': dedicatedAlloc['Type'], 
+                                 'Size': int(dedicatedAlloc['Size']), 
+                                 'Usage': int(dedicatedAlloc['Usage'])}
+                    poolData['CustomPools'][poolName]['DedicatedAllocations'].append(allocData)
+                # Get allocations in block vectors
+                for block in pool['Blocks'].items():
+                    ProcessBlock(poolData['CustomPools'][poolName], block)
+    
+    
+    if IsDataEmpty():
+        print("There is nothing to write. Please make sure you generated the stats string with detailed map enabled.")
+        exit(1)
+    RemoveEmptyType()
+    
+    AddTraceEvents()
+
+    perfettoJson = json.dumps(perfettoDict, indent=4)
+    
+    with open(args.output, "w") as outfile:
+        outfile.write(perfettoJson)
+    
+"""
+Main data structure - variable `data` - is a dictionary. Key is string - memory type name. Value is dictionary of:
+- Fixed key 'DedicatedAllocations'. Value is list of objects, each containing dictionary with:
+    - Fixed key 'Type'. Value is string.
+    - Fixed key 'Size'. Value is int.
+    - Fixed key 'Usage'. Value is int.
+- Fixed key 'Blocks'. Value is list of objects, each containing dictionary with:
+    - Fixed key 'ID'. Value is int.
+    - Fixed key 'Size'. Value is int.
+    - Fixed key 'Suballocations'. Value is list of objects as above.
+- Fixed key 'CustomPools'. Value is dictionary.
+  - Key is string with pool ID/name. Value is a dictionary with:
+    - Fixed key 'DedicatedAllocations'. Value is list of objects as above.
+    - Fixed key 'Blocks'. Value is a list of objects representing memory blocks as above.
+"""
diff --git a/scripts/GpuMemDumpSample.json b/scripts/GpuMemDumpSample.json
new file mode 100644
index 00000000..d12c8b37
--- /dev/null
+++ b/scripts/GpuMemDumpSample.json
@@ -0,0 +1,426 @@
+{
+  "General": {
+    "API": "Vulkan", 
+    "apiVersion": "1.3.203", 
+    "GPU": "AMD Radeon RX 6600 XT", 
+    "deviceType": 2, 
+    "maxMemoryAllocationCount": 4096, 
+    "bufferImageGranularity": 1, 
+    "nonCoherentAtomSize": 128, 
+    "memoryHeapCount": 2, 
+    "memoryTypeCount": 8
+  }, 
+  "Total": {
+    "BlockCount": 69, 
+    "BlockBytes": 201392128, 
+    "AllocationCount": 132, 
+    "AllocationBytes": 73401500, 
+    "UnusedRangeCount": 11, 
+    "AllocationSizeMin": 60, 
+    "AllocationSizeMax": 6095200, 
+    "UnusedRangeSizeMin": 196, 
+    "UnusedRangeSizeMax": 33550336
+  }, 
+  "MemoryInfo": {
+    "Heap 0": {
+      "Flags": [], 
+      "Size": 16862150656, 
+      "Budget": {
+        "BudgetBytes": 16325847040, 
+        "UsageBytes": 122392576
+      }, 
+      "Stats": {
+        "BlockCount": 35, 
+        "BlockBytes": 117473280, 
+        "AllocationCount": 64, 
+        "AllocationBytes": 33619968, 
+        "UnusedRangeCount": 5, 
+        "AllocationSizeMin": 1024, 
+        "AllocationSizeMax": 2097152, 
+        "UnusedRangeSizeMin": 49152, 
+        "UnusedRangeSizeMax": 33550336
+      }, 
+      "MemoryPools": {
+        "Type 1": {
+          "Flags": ["HOST_VISIBLE", "HOST_COHERENT"], 
+          "Stats": {
+            "BlockCount": 5, 
+            "BlockBytes": 33558528, 
+            "AllocationCount": 8, 
+            "AllocationBytes": 8192, 
+            "UnusedRangeCount": 1, 
+            "AllocationSizeMin": 1024, 
+            "AllocationSizeMax": 1024
+          }
+        }, 
+        "Type 3": {
+          "Flags": ["HOST_VISIBLE", "HOST_COHERENT", "HOST_CACHED"], 
+          "Stats": {
+            "BlockCount": 30, 
+            "BlockBytes": 83914752, 
+            "AllocationCount": 56, 
+            "AllocationBytes": 33611776, 
+            "UnusedRangeCount": 4, 
+            "AllocationSizeMin": 1024, 
+            "AllocationSizeMax": 2097152, 
+            "UnusedRangeSizeMin": 49152, 
+            "UnusedRangeSizeMax": 25100288
+          }
+        }, 
+        "Type 5": {
+          "Flags": ["HOST_VISIBLE", "HOST_COHERENT", "DEVICE_COHERENT_AMD", "DEVICE_UNCACHED_AMD"], 
+          "Stats": {
+            "BlockCount": 0, 
+            "BlockBytes": 0, 
+            "AllocationCount": 0, 
+            "AllocationBytes": 0, 
+            "UnusedRangeCount": 0
+          }
+        }, 
+        "Type 7": {
+          "Flags": ["HOST_VISIBLE", "HOST_COHERENT", "HOST_CACHED", "DEVICE_COHERENT_AMD", "DEVICE_UNCACHED_AMD"], 
+          "Stats": {
+            "BlockCount": 0, 
+            "BlockBytes": 0, 
+            "AllocationCount": 0, 
+            "AllocationBytes": 0, 
+            "UnusedRangeCount": 0
+          }
+        }
+      }
+    }, 
+    "Heap 1": {
+      "Flags": ["DEVICE_LOCAL", "MULTI_INSTANCE"], 
+      "Size": 8573157376, 
+      "Budget": {
+        "BudgetBytes": 7737008128, 
+        "UsageBytes": 155025408
+      }, 
+      "Stats": {
+        "BlockCount": 34, 
+        "BlockBytes": 83918848, 
+        "AllocationCount": 68, 
+        "AllocationBytes": 39781532, 
+        "UnusedRangeCount": 6, 
+        "AllocationSizeMin": 60, 
+        "AllocationSizeMax": 6095200, 
+        "UnusedRangeSizeMin": 196, 
+        "UnusedRangeSizeMax": 25100288
+      }, 
+      "MemoryPools": {
+        "Type 0": {
+          "Flags": ["DEVICE_LOCAL"], 
+          "Stats": {
+            "BlockCount": 34, 
+            "BlockBytes": 83918848, 
+            "AllocationCount": 68, 
+            "AllocationBytes": 39781532, 
+            "UnusedRangeCount": 6, 
+            "AllocationSizeMin": 60, 
+            "AllocationSizeMax": 6095200, 
+            "UnusedRangeSizeMin": 196, 
+            "UnusedRangeSizeMax": 25100288
+          }
+        }, 
+        "Type 2": {
+          "Flags": ["DEVICE_LOCAL", "HOST_VISIBLE", "HOST_COHERENT"], 
+          "Stats": {
+            "BlockCount": 0, 
+            "BlockBytes": 0, 
+            "AllocationCount": 0, 
+            "AllocationBytes": 0, 
+            "UnusedRangeCount": 0
+          }
+        }, 
+        "Type 4": {
+          "Flags": ["DEVICE_LOCAL", "DEVICE_COHERENT_AMD", "DEVICE_UNCACHED_AMD"], 
+          "Stats": {
+            "BlockCount": 0, 
+            "BlockBytes": 0, 
+            "AllocationCount": 0, 
+            "AllocationBytes": 0, 
+            "UnusedRangeCount": 0
+          }
+        }, 
+        "Type 6": {
+          "Flags": ["DEVICE_LOCAL", "HOST_VISIBLE", "HOST_COHERENT", "DEVICE_COHERENT_AMD", "DEVICE_UNCACHED_AMD"], 
+          "Stats": {
+            "BlockCount": 0, 
+            "BlockBytes": 0, 
+            "AllocationCount": 0, 
+            "AllocationBytes": 0, 
+            "UnusedRangeCount": 0
+          }
+        }
+      }
+    }
+  }, 
+  "DefaultPools": {
+    "Type 0": {
+      "PreferredBlockSize": 268435456, 
+      "Blocks": {
+        "0": {
+          "MapRefCount": 0, 
+          "TotalBytes": 33554432, 
+          "UnusedBytes": 18987876, 
+          "Allocations": 20, 
+          "UnusedRanges": 4, 
+          "Suballocations": [
+            {"Offset": 0, "Type": "IMAGE_OPTIMAL", "Size": 65536, "Usage": 6}, 
+            {"Offset": 65536, "Type": "BUFFER", "Size": 768, "Usage": 130}, 
+            {"Offset": 66304, "Type": "BUFFER", "Size": 60, "Usage": 66}, 
+            {"Offset": 66364, "Type": "UNKNOWN", "Size": 1024, "Usage": 0}, 
+            {"Offset": 67388, "Type": "UNKNOWN", "Size": 1024, "Usage": 0, "CustomData": "0000000000F5D987"}, 
+            {"Offset": 68412, "Type": "UNKNOWN", "Size": 1024, "Usage": 0, "Name": "SHEPURD"}, 
+            {"Offset": 69436, "Type": "UNKNOWN", "Size": 1024, "Usage": 0, "CustomData": "00000000018CE96A", "Name": "JOKER"}, 
+            {"Offset": 70460, "Type": "BUFFER", "Size": 1024, "Usage": 3}, 
+            {"Offset": 71484, "Type": "BUFFER", "Size": 1024, "Usage": 3, "CustomData": "0000000000F5D987"}, 
+            {"Offset": 72508, "Type": "BUFFER", "Size": 1024, "Usage": 3, "Name": "SHEPURD"}, 
+            {"Offset": 73532, "Type": "BUFFER", "Size": 1024, "Usage": 3, "CustomData": "00000000018CE96A", "Name": "JOKER"}, 
+            {"Offset": 74556, "Type": "FREE", "Size": 196}, 
+            {"Offset": 74752, "Type": "IMAGE_LINEAR", "Size": 2048, "Usage": 7}, 
+            {"Offset": 76800, "Type": "IMAGE_LINEAR", "Size": 2048, "Usage": 7, "CustomData": "0000000000F5D987"}, 
+            {"Offset": 78848, "Type": "IMAGE_LINEAR", "Size": 2048, "Usage": 7, "Name": "SHEPURD"}, 
+            {"Offset": 80896, "Type": "IMAGE_LINEAR", "Size": 2048, "Usage": 7, "CustomData": "00000000018CE96A", "Name": "JOKER"}, 
+            {"Offset": 82944, "Type": "FREE", "Size": 48128}, 
+            {"Offset": 131072, "Type": "IMAGE_OPTIMAL", "Size": 6095200, "Usage": 32}, 
+            {"Offset": 6226272, "Type": "FREE", "Size": 65184}, 
+            {"Offset": 6291456, "Type": "IMAGE_OPTIMAL", "Size": 2097152, "Usage": 7}, 
+            {"Offset": 8388608, "Type": "IMAGE_OPTIMAL", "Size": 2097152, "Usage": 7, "CustomData": "0000000000F5D987"}, 
+            {"Offset": 10485760, "Type": "IMAGE_OPTIMAL", "Size": 2097152, "Usage": 7, "Name": "SHEPURD"}, 
+            {"Offset": 12582912, "Type": "IMAGE_OPTIMAL", "Size": 2097152, "Usage": 7, "CustomData": "00000000018CE96A", "Name": "JOKER"}, 
+            {"Offset": 14680064, "Type": "FREE", "Size": 18874368}
+          ]
+        }
+      }, 
+      "DedicatedAllocations": [
+        {"Type": "UNKNOWN", "Size": 1024, "Usage": 0}, 
+        {"Type": "UNKNOWN", "Size": 1024, "Usage": 0, "CustomData": "0000000000F5D987"}, 
+        {"Type": "UNKNOWN", "Size": 1024, "Usage": 0, "Name": "SHEPURD"}, 
+        {"Type": "UNKNOWN", "Size": 1024, "Usage": 0, "CustomData": "00000000018CE96A", "Name": "JOKER"}, 
+        {"Type": "BUFFER", "Size": 1024, "Usage": 3}, 
+        {"Type": "BUFFER", "Size": 1024, "Usage": 3, "CustomData": "0000000000F5D987"}, 
+        {"Type": "BUFFER", "Size": 1024, "Usage": 3, "Name": "SHEPURD"}, 
+        {"Type": "BUFFER", "Size": 1024, "Usage": 3, "CustomData": "00000000018CE96A", "Name": "JOKER"}, 
+        {"Type": "IMAGE_LINEAR", "Size": 2048, "Usage": 7}, 
+        {"Type": "IMAGE_LINEAR", "Size": 2048, "Usage": 7, "CustomData": "0000000000F5D987"}, 
+        {"Type": "IMAGE_LINEAR", "Size": 2048, "Usage": 7, "Name": "SHEPURD"}, 
+        {"Type": "IMAGE_LINEAR", "Size": 2048, "Usage": 7, "CustomData": "00000000018CE96A", "Name": "JOKER"}, 
+        {"Type": "IMAGE_OPTIMAL", "Size": 2097152, "Usage": 7}, 
+        {"Type": "IMAGE_OPTIMAL", "Size": 2097152, "Usage": 7, "CustomData": "0000000000F5D987"}, 
+        {"Type": "IMAGE_OPTIMAL", "Size": 2097152, "Usage": 7, "Name": "SHEPURD"}, 
+        {"Type": "IMAGE_OPTIMAL", "Size": 2097152, "Usage": 7, "CustomData": "00000000018CE96A", "Name": "JOKER"}
+      ]
+    }, 
+    "Type 1": {
+      "PreferredBlockSize": 268435456, 
+      "Blocks": {
+        "0": {
+          "MapRefCount": 0, 
+          "TotalBytes": 33554432, 
+          "UnusedBytes": 33550336, 
+          "Allocations": 4, 
+          "UnusedRanges": 1, 
+          "Suballocations": [
+            {"Offset": 0, "Type": "UNKNOWN", "Size": 1024, "Usage": 0}, 
+            {"Offset": 1024, "Type": "UNKNOWN", "Size": 1024, "Usage": 0, "CustomData": "0000000000F5D987"}, 
+            {"Offset": 2048, "Type": "UNKNOWN", "Size": 1024, "Usage": 0, "Name": "SHEPURD"}, 
+            {"Offset": 3072, "Type": "UNKNOWN", "Size": 1024, "Usage": 0, "CustomData": "00000000018CE96A", "Name": "JOKER"}, 
+            {"Offset": 4096, "Type": "FREE", "Size": 33550336}
+          ]
+        }
+      }, 
+      "DedicatedAllocations": [
+        {"Type": "UNKNOWN", "Size": 1024, "Usage": 0}, 
+        {"Type": "UNKNOWN", "Size": 1024, "Usage": 0, "CustomData": "0000000000F5D987"}, 
+        {"Type": "UNKNOWN", "Size": 1024, "Usage": 0, "Name": "SHEPURD"}, 
+        {"Type": "UNKNOWN", "Size": 1024, "Usage": 0, "CustomData": "00000000018CE96A", "Name": "JOKER"}
+      ]
+    }, 
+    "Type 2": {
+      "PreferredBlockSize": 268435456, 
+      "Blocks": {
+      }, 
+      "DedicatedAllocations": [
+      ]
+    }, 
+    "Type 3": {
+      "PreferredBlockSize": 268435456, 
+      "Blocks": {
+        "0": {
+          "MapRefCount": 0, 
+          "TotalBytes": 33554432, 
+          "UnusedBytes": 25153536, 
+          "Allocations": 12, 
+          "UnusedRanges": 2, 
+          "Suballocations": [
+            {"Offset": 0, "Type": "BUFFER", "Size": 1024, "Usage": 3}, 
+            {"Offset": 1024, "Type": "BUFFER", "Size": 1024, "Usage": 3, "CustomData": "0000000000F5D987"}, 
+            {"Offset": 2048, "Type": "BUFFER", "Size": 1024, "Usage": 3, "Name": "SHEPURD"}, 
+            {"Offset": 3072, "Type": "BUFFER", "Size": 1024, "Usage": 3, "CustomData": "00000000018CE96A", "Name": "JOKER"}, 
+            {"Offset": 4096, "Type": "IMAGE_LINEAR", "Size": 2048, "Usage": 7}, 
+            {"Offset": 6144, "Type": "IMAGE_LINEAR", "Size": 2048, "Usage": 7, "CustomData": "0000000000F5D987"}, 
+            {"Offset": 8192, "Type": "IMAGE_LINEAR", "Size": 2048, "Usage": 7, "Name": "SHEPURD"}, 
+            {"Offset": 10240, "Type": "IMAGE_LINEAR", "Size": 2048, "Usage": 7, "CustomData": "00000000018CE96A", "Name": "JOKER"}, 
+            {"Offset": 12288, "Type": "FREE", "Size": 53248}, 
+            {"Offset": 65536, "Type": "IMAGE_OPTIMAL", "Size": 2097152, "Usage": 7}, 
+            {"Offset": 2162688, "Type": "IMAGE_OPTIMAL", "Size": 2097152, "Usage": 7, "CustomData": "0000000000F5D987"}, 
+            {"Offset": 4259840, "Type": "IMAGE_OPTIMAL", "Size": 2097152, "Usage": 7, "Name": "SHEPURD"}, 
+            {"Offset": 6356992, "Type": "IMAGE_OPTIMAL", "Size": 2097152, "Usage": 7, "CustomData": "00000000018CE96A", "Name": "JOKER"}, 
+            {"Offset": 8454144, "Type": "FREE", "Size": 25100288}
+          ]
+        }
+      }, 
+      "DedicatedAllocations": [
+        {"Type": "BUFFER", "Size": 1024, "Usage": 3}, 
+        {"Type": "BUFFER", "Size": 1024, "Usage": 3, "CustomData": "0000000000F5D987"}, 
+        {"Type": "BUFFER", "Size": 1024, "Usage": 3, "Name": "SHEPURD"}, 
+        {"Type": "BUFFER", "Size": 1024, "Usage": 3, "CustomData": "00000000018CE96A", "Name": "JOKER"}, 
+        {"Type": "IMAGE_LINEAR", "Size": 2048, "Usage": 7}, 
+        {"Type": "IMAGE_LINEAR", "Size": 2048, "Usage": 7, "CustomData": "0000000000F5D987"}, 
+        {"Type": "IMAGE_LINEAR", "Size": 2048, "Usage": 7, "Name": "SHEPURD"}, 
+        {"Type": "IMAGE_LINEAR", "Size": 2048, "Usage": 7, "CustomData": "00000000018CE96A", "Name": "JOKER"}, 
+        {"Type": "IMAGE_OPTIMAL", "Size": 2097152, "Usage": 7}, 
+        {"Type": "IMAGE_OPTIMAL", "Size": 2097152, "Usage": 7, "CustomData": "0000000000F5D987"}, 
+        {"Type": "IMAGE_OPTIMAL", "Size": 2097152, "Usage": 7, "Name": "SHEPURD"}, 
+        {"Type": "IMAGE_OPTIMAL", "Size": 2097152, "Usage": 7, "CustomData": "00000000018CE96A", "Name": "JOKER"}
+      ]
+    }, 
+    "Type 4": {
+      "PreferredBlockSize": 268435456, 
+      "Blocks": {
+      }, 
+      "DedicatedAllocations": [
+      ]
+    }, 
+    "Type 5": {
+      "PreferredBlockSize": 268435456, 
+      "Blocks": {
+      }, 
+      "DedicatedAllocations": [
+      ]
+    }, 
+    "Type 6": {
+      "PreferredBlockSize": 268435456, 
+      "Blocks": {
+      }, 
+      "DedicatedAllocations": [
+      ]
+    }, 
+    "Type 7": {
+      "PreferredBlockSize": 268435456, 
+      "Blocks": {
+      }, 
+      "DedicatedAllocations": [
+      ]
+    }
+  }, 
+  "CustomPools": {
+    "Type 0": [
+      {
+        "Name": "0", 
+        "PreferredBlockSize": 268435456, 
+        "Blocks": {
+          "0": {
+            "MapRefCount": 0, 
+            "TotalBytes": 33554432, 
+            "UnusedBytes": 25149440, 
+            "Allocations": 16, 
+            "UnusedRanges": 2, 
+            "Suballocations": [
+              {"Offset": 0, "Type": "UNKNOWN", "Size": 1024, "Usage": 0}, 
+              {"Offset": 1024, "Type": "UNKNOWN", "Size": 1024, "Usage": 0, "CustomData": "0000000000F5D987"}, 
+              {"Offset": 2048, "Type": "UNKNOWN", "Size": 1024, "Usage": 0, "Name": "SHEPURD"}, 
+              {"Offset": 3072, "Type": "UNKNOWN", "Size": 1024, "Usage": 0, "CustomData": "00000000018CE96A", "Name": "JOKER"}, 
+              {"Offset": 4096, "Type": "BUFFER", "Size": 1024, "Usage": 3}, 
+              {"Offset": 5120, "Type": "BUFFER", "Size": 1024, "Usage": 3, "CustomData": "0000000000F5D987"}, 
+              {"Offset": 6144, "Type": "BUFFER", "Size": 1024, "Usage": 3, "Name": "SHEPURD"}, 
+              {"Offset": 7168, "Type": "BUFFER", "Size": 1024, "Usage": 3, "CustomData": "00000000018CE96A", "Name": "JOKER"}, 
+              {"Offset": 8192, "Type": "IMAGE_LINEAR", "Size": 2048, "Usage": 7}, 
+              {"Offset": 10240, "Type": "IMAGE_LINEAR", "Size": 2048, "Usage": 7, "CustomData": "0000000000F5D987"}, 
+              {"Offset": 12288, "Type": "IMAGE_LINEAR", "Size": 2048, "Usage": 7, "Name": "SHEPURD"}, 
+              {"Offset": 14336, "Type": "IMAGE_LINEAR", "Size": 2048, "Usage": 7, "CustomData": "00000000018CE96A", "Name": "JOKER"}, 
+              {"Offset": 16384, "Type": "FREE", "Size": 49152}, 
+              {"Offset": 65536, "Type": "IMAGE_OPTIMAL", "Size": 2097152, "Usage": 7}, 
+              {"Offset": 2162688, "Type": "IMAGE_OPTIMAL", "Size": 2097152, "Usage": 7, "CustomData": "0000000000F5D987"}, 
+              {"Offset": 4259840, "Type": "IMAGE_OPTIMAL", "Size": 2097152, "Usage": 7, "Name": "SHEPURD"}, 
+              {"Offset": 6356992, "Type": "IMAGE_OPTIMAL", "Size": 2097152, "Usage": 7, "CustomData": "00000000018CE96A", "Name": "JOKER"}, 
+              {"Offset": 8454144, "Type": "FREE", "Size": 25100288}
+            ]
+          }
+        }, 
+        "DedicatedAllocations": [
+          {"Type": "UNKNOWN", "Size": 1024, "Usage": 0}, 
+          {"Type": "UNKNOWN", "Size": 1024, "Usage": 0, "CustomData": "0000000000F5D987"}, 
+          {"Type": "UNKNOWN", "Size": 1024, "Usage": 0, "Name": "SHEPURD"}, 
+          {"Type": "UNKNOWN", "Size": 1024, "Usage": 0, "CustomData": "00000000018CE96A", "Name": "JOKER"}, 
+          {"Type": "BUFFER", "Size": 1024, "Usage": 3}, 
+          {"Type": "BUFFER", "Size": 1024, "Usage": 3, "CustomData": "0000000000F5D987"}, 
+          {"Type": "BUFFER", "Size": 1024, "Usage": 3, "Name": "SHEPURD"}, 
+          {"Type": "BUFFER", "Size": 1024, "Usage": 3, "CustomData": "00000000018CE96A", "Name": "JOKER"}, 
+          {"Type": "IMAGE_LINEAR", "Size": 2048, "Usage": 7}, 
+          {"Type": "IMAGE_LINEAR", "Size": 2048, "Usage": 7, "CustomData": "0000000000F5D987"}, 
+          {"Type": "IMAGE_LINEAR", "Size": 2048, "Usage": 7, "Name": "SHEPURD"}, 
+          {"Type": "IMAGE_LINEAR", "Size": 2048, "Usage": 7, "CustomData": "00000000018CE96A", "Name": "JOKER"}, 
+          {"Type": "IMAGE_OPTIMAL", "Size": 2097152, "Usage": 7}, 
+          {"Type": "IMAGE_OPTIMAL", "Size": 2097152, "Usage": 7, "CustomData": "0000000000F5D987"}, 
+          {"Type": "IMAGE_OPTIMAL", "Size": 2097152, "Usage": 7, "Name": "SHEPURD"}, 
+          {"Type": "IMAGE_OPTIMAL", "Size": 2097152, "Usage": 7, "CustomData": "00000000018CE96A", "Name": "JOKER"}
+        ]
+      }
+    ], 
+    "Type 3": [
+      {
+        "Name": "0", 
+        "PreferredBlockSize": 268435456, 
+        "Blocks": {
+          "0": {
+            "MapRefCount": 0, 
+            "TotalBytes": 33554432, 
+            "UnusedBytes": 25149440, 
+            "Allocations": 16, 
+            "UnusedRanges": 2, 
+            "Suballocations": [
+              {"Offset": 0, "Type": "UNKNOWN", "Size": 1024, "Usage": 0}, 
+              {"Offset": 1024, "Type": "UNKNOWN", "Size": 1024, "Usage": 0, "CustomData": "0000000000F5D987"}, 
+              {"Offset": 2048, "Type": "UNKNOWN", "Size": 1024, "Usage": 0, "Name": "SHEPURD"}, 
+              {"Offset": 3072, "Type": "UNKNOWN", "Size": 1024, "Usage": 0, "CustomData": "00000000018CE96A", "Name": "JOKER"}, 
+              {"Offset": 4096, "Type": "BUFFER", "Size": 1024, "Usage": 3}, 
+              {"Offset": 5120, "Type": "BUFFER", "Size": 1024, "Usage": 3, "CustomData": "0000000000F5D987"}, 
+              {"Offset": 6144, "Type": "BUFFER", "Size": 1024, "Usage": 3, "Name": "SHEPURD"}, 
+              {"Offset": 7168, "Type": "BUFFER", "Size": 1024, "Usage": 3, "CustomData": "00000000018CE96A", "Name": "JOKER"}, 
+              {"Offset": 8192, "Type": "IMAGE_LINEAR", "Size": 2048, "Usage": 7}, 
+              {"Offset": 10240, "Type": "IMAGE_LINEAR", "Size": 2048, "Usage": 7, "CustomData": "0000000000F5D987"}, 
+              {"Offset": 12288, "Type": "IMAGE_LINEAR", "Size": 2048, "Usage": 7, "Name": "SHEPURD"}, 
+              {"Offset": 14336, "Type": "IMAGE_LINEAR", "Size": 2048, "Usage": 7, "CustomData": "00000000018CE96A", "Name": "JOKER"}, 
+              {"Offset": 16384, "Type": "FREE", "Size": 49152}, 
+              {"Offset": 65536, "Type": "IMAGE_OPTIMAL", "Size": 2097152, "Usage": 7}, 
+              {"Offset": 2162688, "Type": "IMAGE_OPTIMAL", "Size": 2097152, "Usage": 7, "CustomData": "0000000000F5D987"}, 
+              {"Offset": 4259840, "Type": "IMAGE_OPTIMAL", "Size": 2097152, "Usage": 7, "Name": "SHEPURD"}, 
+              {"Offset": 6356992, "Type": "IMAGE_OPTIMAL", "Size": 2097152, "Usage": 7, "CustomData": "00000000018CE96A", "Name": "JOKER"}, 
+              {"Offset": 8454144, "Type": "FREE", "Size": 25100288}
+            ]
+          }
+        }, 
+        "DedicatedAllocations": [
+          {"Type": "UNKNOWN", "Size": 1024, "Usage": 0}, 
+          {"Type": "UNKNOWN", "Size": 1024, "Usage": 0, "CustomData": "0000000000F5D987"}, 
+          {"Type": "UNKNOWN", "Size": 1024, "Usage": 0, "Name": "SHEPURD"}, 
+          {"Type": "UNKNOWN", "Size": 1024, "Usage": 0, "CustomData": "00000000018CE96A", "Name": "JOKER"}, 
+          {"Type": "BUFFER", "Size": 1024, "Usage": 3}, 
+          {"Type": "BUFFER", "Size": 1024, "Usage": 3, "CustomData": "0000000000F5D987"}, 
+          {"Type": "BUFFER", "Size": 1024, "Usage": 3, "Name": "SHEPURD"}, 
+          {"Type": "BUFFER", "Size": 1024, "Usage": 3, "CustomData": "00000000018CE96A", "Name": "JOKER"}, 
+          {"Type": "IMAGE_LINEAR", "Size": 2048, "Usage": 7}, 
+          {"Type": "IMAGE_LINEAR", "Size": 2048, "Usage": 7, "CustomData": "0000000000F5D987"}, 
+          {"Type": "IMAGE_LINEAR", "Size": 2048, "Usage": 7, "Name": "SHEPURD"}, 
+          {"Type": "IMAGE_LINEAR", "Size": 2048, "Usage": 7, "CustomData": "00000000018CE96A", "Name": "JOKER"}, 
+          {"Type": "IMAGE_OPTIMAL", "Size": 2097152, "Usage": 7}, 
+          {"Type": "IMAGE_OPTIMAL", "Size": 2097152, "Usage": 7, "CustomData": "0000000000F5D987"}, 
+          {"Type": "IMAGE_OPTIMAL", "Size": 2097152, "Usage": 7, "Name": "SHEPURD"}, 
+          {"Type": "IMAGE_OPTIMAL", "Size": 2097152, "Usage": 7, "CustomData": "00000000018CE96A", "Name": "JOKER"}
+        ]
+      }
+    ]
+  }
+}
\ No newline at end of file
diff --git a/scripts/GpuMemDumpVis.py b/scripts/GpuMemDumpVis.py
new file mode 100644
index 00000000..e27b4cd8
--- /dev/null
+++ b/scripts/GpuMemDumpVis.py
@@ -0,0 +1,334 @@
+#
+# Copyright (c) 2018-2023 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+
+import argparse
+import json
+from PIL import Image, ImageDraw, ImageFont
+
+
+PROGRAM_VERSION = 'Vulkan/D3D12 Memory Allocator Dump Visualization 3.0.3'
+IMG_WIDTH = 1200
+IMG_MARGIN = 8
+TEXT_MARGIN = 4
+FONT_SIZE = 10
+MAP_SIZE = 24
+COLOR_TEXT_H1 = (0, 0, 0, 255)
+COLOR_TEXT_H2 = (150, 150, 150, 255)
+COLOR_OUTLINE = (155, 155, 155, 255)
+COLOR_OUTLINE_HARD = (0, 0, 0, 255)
+COLOR_GRID_LINE = (224, 224, 224, 255)
+
+currentApi = ""
+data = {}
+
+
+def ParseArgs():
+    argParser = argparse.ArgumentParser(description='Visualization of Vulkan/D3D12 Memory Allocator JSON dump.')
+    argParser.add_argument('DumpFile', help='Path to source JSON file with memory dump created by Vulkan/D3D12 Memory Allocator library')
+    argParser.add_argument('-v', '--version', action='version', version=PROGRAM_VERSION)
+    argParser.add_argument('-o', '--output', required=True, help='Path to destination image file (e.g. PNG)')
+    return argParser.parse_args()
+
+def GetDataForMemoryPool(poolTypeName):
+    global data
+    if poolTypeName in data:
+        return data[poolTypeName]
+    else:
+        newPoolData = {'DedicatedAllocations':[], 'Blocks':[], 'CustomPools':{}}
+        data[poolTypeName] = newPoolData
+        return newPoolData
+
+def ProcessBlock(poolData, block):
+    blockInfo = {'ID': block[0], 'Size': int(block[1]['TotalBytes']), 'Suballocations':[]}
+    for alloc in block[1]['Suballocations']:
+        allocData = {'Type': alloc['Type'], 'Size': int(alloc['Size']), 'Usage': int(alloc['Usage']) if 'Usage' in alloc else 0 }
+        blockInfo['Suballocations'].append(allocData)
+    poolData['Blocks'].append(blockInfo)
+    
+def IsDataEmpty():
+    global data
+    for poolData in data.values():
+        if len(poolData['DedicatedAllocations']) > 0:
+            return False
+        if len(poolData['Blocks']) > 0:
+            return False
+        for customPool in poolData['CustomPools'].values():
+            if len(customPool['Blocks']) > 0:
+                return False
+            if len(customPool['DedicatedAllocations']) > 0:
+                return False
+    return True
+
+def RemoveEmptyType():
+    global data
+    for poolType in list(data.keys()):
+        pool = data[poolType]
+        if len(pool['DedicatedAllocations']) > 0:
+           continue
+        if len(pool['Blocks']) > 0:
+            continue
+        empty = True
+        for customPool in pool['CustomPools'].values():
+            if len(customPool['Blocks']) > 0:
+                empty = False
+                break
+            if len(customPool['DedicatedAllocations']) > 0:
+                empty = False
+                break
+        if empty:
+            del data[poolType]
+
+# Returns tuple:
+# [0] image height : integer
+# [1] pixels per byte : float
+def CalcParams():
+    global data
+    height = IMG_MARGIN
+    height += FONT_SIZE + IMG_MARGIN # Grid lines legend - sizes
+    maxBlockSize = 0
+    # Get height occupied by every memory pool
+    for poolData in data.values():
+        height += FONT_SIZE + IMG_MARGIN # Memory pool title
+        height += len(poolData['Blocks']) * (FONT_SIZE + MAP_SIZE + IMG_MARGIN * 2)
+        height += len(poolData['DedicatedAllocations']) * (FONT_SIZE + MAP_SIZE + IMG_MARGIN * 2)
+        # Get longest block size
+        for dedicatedAlloc in poolData['DedicatedAllocations']:
+            maxBlockSize = max(maxBlockSize, dedicatedAlloc['Size'])
+        for block in poolData['Blocks']:
+            maxBlockSize = max(maxBlockSize, block['Size'])
+        # Same for custom pools
+        for customPoolData in poolData['CustomPools'].values():
+            height += len(customPoolData['Blocks']) * (FONT_SIZE + MAP_SIZE + IMG_MARGIN * 2)
+            height += len(customPoolData['DedicatedAllocations']) * (FONT_SIZE + MAP_SIZE + IMG_MARGIN * 2)
+            # Get longest block size
+            for dedicatedAlloc in customPoolData['DedicatedAllocations']:
+                maxBlockSize = max(maxBlockSize, dedicatedAlloc['Size'])
+            for block in customPoolData['Blocks']:
+                maxBlockSize = max(maxBlockSize, block['Size'])
+
+    return height, (IMG_WIDTH - IMG_MARGIN * 2) / float(maxBlockSize)
+
+def BytesToStr(bytes):
+    if bytes < 1024:
+        return "%d B" % bytes
+    bytes /= 1024
+    if bytes < 1024:
+        return "%d KiB" % bytes
+    bytes /= 1024
+    if bytes < 1024:
+        return "%d MiB" % bytes
+    bytes /= 1024
+    return "%d GiB" % bytes
+
+def TypeToColor(type, usage):
+    global currentApi
+    if type == 'FREE':
+        return 220, 220, 220, 255
+    elif type == 'UNKNOWN':
+        return 175, 175, 175, 255 # Gray
+
+    if currentApi == 'Vulkan':
+        if type == 'BUFFER':
+            if (usage & 0x1C0) != 0: # INDIRECT_BUFFER | VERTEX_BUFFER | INDEX_BUFFER
+                return 255, 148, 148, 255 # Red
+            elif (usage & 0x28) != 0: # STORAGE_BUFFER | STORAGE_TEXEL_BUFFER
+                return 255, 187, 121, 255 # Orange
+            elif (usage & 0x14) != 0: # UNIFORM_BUFFER | UNIFORM_TEXEL_BUFFER
+                return 255, 255, 0, 255 # Yellow
+            else:
+                return 255, 255, 165, 255 # Light yellow
+        elif type == 'IMAGE_OPTIMAL':
+            if (usage & 0x20) != 0: # DEPTH_STENCIL_ATTACHMENT
+                return 246, 128, 255, 255 # Pink
+            elif (usage & 0xD0) != 0: # INPUT_ATTACHMENT | TRANSIENT_ATTACHMENT | COLOR_ATTACHMENT
+                return 179, 179, 255, 255 # Blue
+            elif (usage & 0x4) != 0: # SAMPLED
+                return 0, 255, 255, 255 # Aqua
+            else:
+                return 183, 255, 255, 255 # Light aqua
+        elif type == 'IMAGE_LINEAR' :
+            return 0, 255, 0, 255 # Green
+        elif type == 'IMAGE_UNKNOWN':
+            return 0, 255, 164, 255 # Green/aqua
+    elif currentApi == 'Direct3D 12':
+        if type == 'BUFFER':
+                return 255, 255, 165, 255 # Light yellow
+        elif type == 'TEXTURE1D' or type == 'TEXTURE2D' or type == 'TEXTURE3D':
+            if (usage & 0x2) != 0: # D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL
+                return 246, 128, 255, 255 # Pink
+            elif (usage & 0x5) != 0: # D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS
+                return 179, 179, 255, 255 # Blue
+            elif (usage & 0x8) == 0: # Not having D3D12_RESOURCE_FLAG_DENY_SHARED_RESOURCE
+                return 0, 255, 255, 255 # Aqua
+            else:
+                return 183, 255, 255, 255 # Light aqua
+    else:
+        print("Unknown graphics API!")
+        exit(1)
+    assert False
+    return 0, 0, 0, 255
+
+def DrawBlock(draw, y, block, pixelsPerByte):
+    sizePixels = int(block['Size'] * pixelsPerByte)
+    draw.rectangle([IMG_MARGIN, y, IMG_MARGIN + sizePixels, y + MAP_SIZE], fill=TypeToColor('FREE', 0), outline=None)
+    byte = 0
+    x = 0
+    lastHardLineX = -1
+    for alloc in block['Suballocations']:
+        byteEnd = byte + alloc['Size']
+        xEnd = int(byteEnd * pixelsPerByte)
+        if alloc['Type'] != 'FREE':
+            if xEnd > x + 1:
+                draw.rectangle([IMG_MARGIN + x, y, IMG_MARGIN + xEnd, y + MAP_SIZE], fill=TypeToColor(alloc['Type'], alloc['Usage']), outline=COLOR_OUTLINE)
+                # Hard line was been overwritten by rectangle outline: redraw it.
+                if lastHardLineX == x:
+                    draw.line([IMG_MARGIN + x, y, IMG_MARGIN + x, y + MAP_SIZE], fill=COLOR_OUTLINE_HARD)
+            else:
+                draw.line([IMG_MARGIN + x, y, IMG_MARGIN + x, y + MAP_SIZE], fill=COLOR_OUTLINE_HARD)
+                lastHardLineX = x
+        byte = byteEnd
+        x = xEnd
+
+def DrawDedicatedAllocationBlock(draw, y, dedicatedAlloc, pixelsPerByte): 
+    sizePixels = int(dedicatedAlloc['Size'] * pixelsPerByte)
+    draw.rectangle([IMG_MARGIN, y, IMG_MARGIN + sizePixels, y + MAP_SIZE], fill=TypeToColor(dedicatedAlloc['Type'], dedicatedAlloc['Usage']), outline=COLOR_OUTLINE)
+
+
+if __name__ == '__main__':
+    args = ParseArgs()
+    jsonSrc = json.load(open(args.DumpFile, 'rb'))
+ 
+    if 'General' in jsonSrc:
+        currentApi = jsonSrc['General']['API']
+    else:
+        print("Wrong JSON format, cannot determine graphics API!")
+        exit(1)
+        
+    # Process default pools
+    if 'DefaultPools' in jsonSrc:
+        for memoryPool in jsonSrc['DefaultPools'].items():
+            poolData = GetDataForMemoryPool(memoryPool[0])
+            # Get dedicated allocations
+            for dedicatedAlloc in memoryPool[1]['DedicatedAllocations']:
+                allocData = {'Type': dedicatedAlloc['Type'], 'Size': int(dedicatedAlloc['Size']), 'Usage': int(dedicatedAlloc['Usage'])}
+                poolData['DedicatedAllocations'].append(allocData)
+            # Get allocations in block vectors
+            for block in memoryPool[1]['Blocks'].items():
+                ProcessBlock(poolData, block)
+    # Process custom pools
+    if 'CustomPools' in jsonSrc:
+        for memoryPool in jsonSrc['CustomPools'].items():
+            poolData = GetDataForMemoryPool(memoryPool[0])
+            for pool in memoryPool[1]:
+                poolName = pool['Name']
+                poolData['CustomPools'][poolName] = {'DedicatedAllocations':[], 'Blocks':[]}
+                # Get dedicated allocations
+                for dedicatedAlloc in pool['DedicatedAllocations']:
+                    allocData = {'Type': dedicatedAlloc['Type'], 'Size': int(dedicatedAlloc['Size']), 'Usage': int(dedicatedAlloc['Usage'])}
+                    poolData['CustomPools'][poolName]['DedicatedAllocations'].append(allocData)
+                # Get allocations in block vectors
+                for block in pool['Blocks'].items():
+                    ProcessBlock(poolData['CustomPools'][poolName], block)
+
+    if IsDataEmpty():
+        print("There is nothing to put on the image. Please make sure you generated the stats string with detailed map enabled.")
+        exit(1)
+    RemoveEmptyType()
+    # Calculate dimmensions and create data image       
+    imgHeight, pixelsPerByte = CalcParams()
+    img = Image.new('RGB', (IMG_WIDTH, imgHeight), 'white')
+    draw = ImageDraw.Draw(img)
+    try:
+        font = ImageFont.truetype('segoeuib.ttf')
+    except:
+        font = ImageFont.load_default()
+
+    # Draw grid lines
+    bytesBetweenGridLines = 32
+    while bytesBetweenGridLines * pixelsPerByte < 64:
+        bytesBetweenGridLines *= 2
+    byte = 0
+    y = IMG_MARGIN
+    while True:
+        x = int(byte * pixelsPerByte)
+        if x > IMG_WIDTH - 2 * IMG_MARGIN:
+            break
+        draw.line([x + IMG_MARGIN, 0, x + IMG_MARGIN, imgHeight], fill=COLOR_GRID_LINE)
+        if byte == 0:
+            draw.text((x + IMG_MARGIN + TEXT_MARGIN, y), "0", fill=COLOR_TEXT_H2, font=font)
+        else:
+            text = BytesToStr(byte)
+            textLength = draw.textlength(text, font=font)
+            draw.text((x + IMG_MARGIN - textLength - TEXT_MARGIN, y), text, fill=COLOR_TEXT_H2, font=font)
+        byte += bytesBetweenGridLines
+    y += FONT_SIZE + IMG_MARGIN
+    
+    # Draw main content
+    for memType in sorted(data.keys()):
+        memPoolData = data[memType]
+        draw.text((IMG_MARGIN, y), "Memory pool %s" % memType, fill=COLOR_TEXT_H1, font=font)
+        y += FONT_SIZE + IMG_MARGIN
+        # Draw block vectors
+        for block in memPoolData['Blocks']:
+            draw.text((IMG_MARGIN, y), "Default pool block %s" % block['ID'], fill=COLOR_TEXT_H2, font=font)
+            y += FONT_SIZE + IMG_MARGIN
+            DrawBlock(draw, y, block, pixelsPerByte)
+            y += MAP_SIZE + IMG_MARGIN
+        index = 0
+        # Draw dedicated allocations
+        for dedicatedAlloc in memPoolData['DedicatedAllocations']:
+            draw.text((IMG_MARGIN, y), "Dedicated allocation %d" % index, fill=COLOR_TEXT_H2, font=font)
+            y += FONT_SIZE + IMG_MARGIN
+            DrawDedicatedAllocationBlock(draw, y, dedicatedAlloc, pixelsPerByte)
+            y += MAP_SIZE + IMG_MARGIN
+            index += 1
+        for poolName, pool in memPoolData['CustomPools'].items():
+            for block in pool['Blocks']:
+                draw.text((IMG_MARGIN, y), "Custom pool %s block %s" % (poolName, block['ID']), fill=COLOR_TEXT_H2, font=font)
+                y += FONT_SIZE + IMG_MARGIN
+                DrawBlock(draw, y, block, pixelsPerByte)
+                y += MAP_SIZE + IMG_MARGIN
+            index = 0
+            for dedicatedAlloc in pool['DedicatedAllocations']:
+                draw.text((IMG_MARGIN, y), "Custom pool %s dedicated allocation %d" % (poolName, index), fill=COLOR_TEXT_H2, font=font)
+                y += FONT_SIZE + IMG_MARGIN
+                DrawDedicatedAllocationBlock(draw, y, dedicatedAlloc, pixelsPerByte)
+                y += MAP_SIZE + IMG_MARGIN
+                index += 1
+    del draw
+    img.save(args.output)
+
+"""
+Main data structure - variable `data` - is a dictionary. Key is string - memory type name. Value is dictionary of:
+- Fixed key 'DedicatedAllocations'. Value is list of objects, each containing dictionary with:
+    - Fixed key 'Type'. Value is string.
+    - Fixed key 'Size'. Value is int.
+    - Fixed key 'Usage'. Value is int.
+- Fixed key 'Blocks'. Value is list of objects, each containing dictionary with:
+    - Fixed key 'ID'. Value is int.
+    - Fixed key 'Size'. Value is int.
+    - Fixed key 'Suballocations'. Value is list of objects as above.
+- Fixed key 'CustomPools'. Value is dictionary.
+  - Key is string with pool ID/name. Value is a dictionary with:
+    - Fixed key 'DedicatedAllocations'. Value is list of objects as above.
+    - Fixed key 'Blocks'. Value is a list of objects representing memory blocks as above.
+"""

From 65e3d532a32ebd2b145513864fb1b7d1a85bf6d1 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 5 Nov 2023 13:13:02 -0800
Subject: [PATCH 553/901] kram - scripts, update mem dump

---
 scripts/GpuMemDumpPerfetto.py |  64 +++++++++++--------
 scripts/GpuMemDumpVis.py      |   3 +
 scripts/open_trace_in_ui.py   | 113 ++++++++++++++++++++++++++++++++++
 3 files changed, 153 insertions(+), 27 deletions(-)
 mode change 100644 => 100755 scripts/GpuMemDumpPerfetto.py
 mode change 100644 => 100755 scripts/GpuMemDumpVis.py
 create mode 100644 scripts/open_trace_in_ui.py

diff --git a/scripts/GpuMemDumpPerfetto.py b/scripts/GpuMemDumpPerfetto.py
old mode 100644
new mode 100755
index b467c8e5..43128c9a
--- a/scripts/GpuMemDumpPerfetto.py
+++ b/scripts/GpuMemDumpPerfetto.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python3
+
 # kram - Copyright 2020-2023 by Alec Miller. - MIT License
 # The license and copyright notice shall be included
 # in all copies or substantial portions of the Software.
@@ -33,6 +35,9 @@
 
 PROGRAM_VERSION = 'Vulkan/D3D12 Memory Allocator Dump Perfetto 3.0.3'
 
+# TODO: Perfetto can't handle empty string for name on the allocs
+# so had to set them to 'M'
+
 # dx12 or vulkan
 currentApi = ""
 
@@ -41,8 +46,8 @@
 
  # now convert the dictonaries to new dictionaries, and then out to json 
 perfettoDict = {
-    'displayTimeUnit': 'ms', # TODO: /1000, not quite kb
-    'systemTraceEvents': 'systemTraceEvents',
+    'displayTimeUnit': 'ns', # TODO: /1000, not quite kb, ns or ms only
+    'systemTraceEvents': 'SystemTraceData',
     'traceEvents': [],
 }
 
@@ -72,7 +77,8 @@ def ProcessBlock(poolData, block):
     for alloc in block[1]['Suballocations']:
         allocData = {'Type': alloc['Type'], 
                      'Size': int(alloc['Size']), 
-                     'Usage': int(alloc['Usage']) if 'Usage' in alloc else 0 }
+                     'Usage': int(alloc['Usage']) if 'Usage' in alloc else 0,
+                     'Name': alloc['Name'] if 'Name' in alloc else 'M' }
         blockInfo['Suballocations'].append(allocData)
     poolData['Blocks'].append(blockInfo)
     
@@ -120,7 +126,10 @@ def AllocTypeToCategory(type, usage):
         if type == 'BUFFER':
             if (usage & 0x1C0) != 0: # INDIRECT_BUFFER | VERTEX_BUFFER | INDEX_BUFFER
                 isVB = True # TODO: split up
-                return ifelse(isVB, "VB", "IB") 
+                if isVB: 
+                    return "VB"
+                else: 
+                    return "IB" 
             elif (usage & 0x28) != 0: # STORAGE_BUFFER | STORAGE_TEXEL_BUFFER
                 return "SB"
             elif (usage & 0x14) != 0: # UNIFORM_BUFFER | UNIFORM_TEXEL_BUFFER
@@ -165,21 +174,16 @@ def AddTraceEventsAlloc(alloc, address, blockCounter):
     size = alloc['Size']
     type = AllocTypeToCategory(alloc['Type'], alloc['Usage'])
 
-    # begin/end events for Perfetto, address is treated as a time value for plotting rects
-    perfettoDict['traceEvents'].add({
-        'name': alloc['Name'], # TODO: conditionally add only if name present
-        'ph': 'B',
+    # complete event is much less data than B/E
+    perfettoDict['traceEvents'].append({
+        'name': alloc['Name'],
+        'ph': 'X',
         'ts': int(address),
+        'dur': int(size),
         'tid': int(blockCounter),
+        #'pid': 0,
         'cat': type
     })
-    perfettoDict['traceEvents'].add(
-    {
-        'ph': 'E',
-        'ts': int(address+size),
-        'tid': int(blockCounter),
-        'cat': type # TODO: needed if begin already has it spec'd?
-    })
 
 def AddTraceEventsBlock(block, blockCounter):
     global perfettoDict
@@ -188,14 +192,15 @@ def AddTraceEventsBlock(block, blockCounter):
         AddTraceEventsAlloc(alloc, address, blockCounter)
         address += alloc['Size']
 
-def AddBlockName(blockCounter, blockName):
+def AddBlockName(blockName, blockCounter):
     global perfettoDict
 
-    # TODO: are these just loose added into the object ?
-    perfettoDict.add({
+    # TODO: would be nice to add at top, rather than intermingled
+    perfettoDict['traceEvents'].append({
         'name': 'thread_name',
         'ph': 'M',
         'tid': int(blockCounter),
+        #'pid': 0,
         'args': {
             'name': blockName
         }
@@ -213,8 +218,8 @@ def AddTraceEvents():
         blockIndex = 0
         for block in poolData['Blocks']:
             blockName = "p{} block{} {}".format(poolIndex, blockIndex, block['ID'])
-            AddTraceEventsBlock(block, blockCounter)
             AddBlockName(blockName, blockCounter)
+            AddTraceEventsBlock(block, blockCounter)
             blockCounter += 1
             blockIndex += 1
 
@@ -222,30 +227,30 @@ def AddTraceEvents():
         allocationIndex = 0
         for dedicatedAlloc in poolData['DedicatedAllocations']:
             blockName = 'p{} alloc{} {}'.format(poolIndex, allocationIndex, dedicatedAlloc['Name'])
-            AddTraceEventsAlloc(dedicatedAlloc, 0, blockCounter)
             AddBlockName(blockName, blockCounter)
+            AddTraceEventsAlloc(dedicatedAlloc, 0, blockCounter)
             blockCounter += 1
             allocationIndex += 1
             
         # repeat for custom pools
         for customPoolData in poolData['CustomPools'].values():
-            customPoolName = customPoolData['Name']
+            customPoolName = "" # TODO: hook up, but it's not a field customPoolData['Name']
 
             # pool block allocs
             blockIndex = 0
             for block in customPoolData['Blocks']:
                 blockName = 'p{} {} block{} {}'.format(poolIndex, customPoolName, blockIndex, block['ID'])
-                AddTraceEventsBlock(block, blockCounter)
                 AddBlockName(blockName, blockCounter)
+                AddTraceEventsBlock(block, blockCounter)
                 blockCounter += 1
                 blockIndex += 1
 
             # pool dedicated allocs
             allocationIndex = 0
             for dedicatedAlloc in customPoolData['DedicatedAllocations']:
-                blockName = 'p{} {} alloc{} {}'.format(poolIndex, customPoolName, dedicatedAlloc['Name'])
-                AddTraceEventsAlloc(dedicatedAlloc, 0, blockCounter)
+                blockName = 'p{} {} alloc{} {}'.format(poolIndex, customPoolName, allocationIndex, dedicatedAlloc['Name'])
                 AddBlockName(blockName, blockCounter)
+                AddTraceEventsAlloc(dedicatedAlloc, 0, blockCounter)
                 blockCounter += 1
                 allocationIndex += 1
                 
@@ -269,7 +274,8 @@ def AddTraceEvents():
             for dedicatedAlloc in memoryPool[1]['DedicatedAllocations']:
                 allocData = {'Type': dedicatedAlloc['Type'], 
                              'Size': int(dedicatedAlloc['Size']), 
-                             'Usage': int(dedicatedAlloc['Usage'])}
+                             'Usage': int(dedicatedAlloc['Usage']),
+                             'Name': dedicatedAlloc['Name'] if 'Name' in dedicatedAlloc else 'M'}
                 poolData['DedicatedAllocations'].append(allocData)
             # Get allocations in block vectors
             for block in memoryPool[1]['Blocks'].items():
@@ -286,7 +292,8 @@ def AddTraceEvents():
                 for dedicatedAlloc in pool['DedicatedAllocations']:
                     allocData = {'Type': dedicatedAlloc['Type'], 
                                  'Size': int(dedicatedAlloc['Size']), 
-                                 'Usage': int(dedicatedAlloc['Usage'])}
+                                 'Usage': int(dedicatedAlloc['Usage']),
+                                 'Name': dedicatedAlloc['Name'] if 'Name' in dedicatedAlloc else 'M'}
                     poolData['CustomPools'][poolName]['DedicatedAllocations'].append(allocData)
                 # Get allocations in block vectors
                 for block in pool['Blocks'].items():
@@ -300,7 +307,8 @@ def AddTraceEvents():
     
     AddTraceEvents()
 
-    perfettoJson = json.dumps(perfettoDict, indent=4)
+    # perfettoJson = json.dumps(perfettoDict, indent=4)
+    perfettoJson = json.dumps(perfettoDict, indent=0)
     
     with open(args.output, "w") as outfile:
         outfile.write(perfettoJson)
@@ -311,6 +319,8 @@ def AddTraceEvents():
     - Fixed key 'Type'. Value is string.
     - Fixed key 'Size'. Value is int.
     - Fixed key 'Usage'. Value is int.
+    - Key 'Name' optional, Value is string
+    - Key'CustomData' optional
 - Fixed key 'Blocks'. Value is list of objects, each containing dictionary with:
     - Fixed key 'ID'. Value is int.
     - Fixed key 'Size'. Value is int.
diff --git a/scripts/GpuMemDumpVis.py b/scripts/GpuMemDumpVis.py
old mode 100644
new mode 100755
index e27b4cd8..a8b945fd
--- a/scripts/GpuMemDumpVis.py
+++ b/scripts/GpuMemDumpVis.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python3
 #
 # Copyright (c) 2018-2023 Advanced Micro Devices, Inc. All rights reserved.
 #
@@ -323,6 +324,8 @@ def DrawDedicatedAllocationBlock(draw, y, dedicatedAlloc, pixelsPerByte):
     - Fixed key 'Type'. Value is string.
     - Fixed key 'Size'. Value is int.
     - Fixed key 'Usage'. Value is int.
+    - Key 'Name' optional, Value is string
+    - Key'CustomData' optional
 - Fixed key 'Blocks'. Value is list of objects, each containing dictionary with:
     - Fixed key 'ID'. Value is int.
     - Fixed key 'Size'. Value is int.
diff --git a/scripts/open_trace_in_ui.py b/scripts/open_trace_in_ui.py
new file mode 100644
index 00000000..269e71e9
--- /dev/null
+++ b/scripts/open_trace_in_ui.py
@@ -0,0 +1,113 @@
+#!/usr/bin/env python3
+# Copyright (C) 2021 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# taken from here, to open a .trace file in Perfetto in the browser.  Runs
+# an http server at a port, and then browser can read the file from that
+# since fileURL are often blocked. Use a system call to run this, and it
+# will change the working directory to that of the file.
+# from https://github.com/google/perfetto/blob/master/tools/open_trace_in_ui
+
+import argparse
+import http.server
+import os
+import socketserver
+import sys
+import webbrowser
+
+
+class ANSI:
+  END = '\033[0m'
+  BOLD = '\033[1m'
+  RED = '\033[91m'
+  BLACK = '\033[30m'
+  BLUE = '\033[94m'
+  BG_YELLOW = '\033[43m'
+  BG_BLUE = '\033[44m'
+
+
+# HTTP Server used to open the trace in the browser.
+class HttpHandler(http.server.SimpleHTTPRequestHandler):
+
+  def end_headers(self):
+    self.send_header('Access-Control-Allow-Origin', self.server.allow_origin)
+    self.send_header('Cache-Control', 'no-cache')
+    super().end_headers()
+
+  def do_GET(self):
+    if self.path != '/' + self.server.expected_fname:
+      self.send_error(404, "File not found")
+      return
+
+    self.server.fname_get_completed = True
+    super().do_GET()
+
+  def do_POST(self):
+    self.send_error(404, "File not found")
+
+
+def prt(msg, colors=ANSI.END):
+  print(colors + msg + ANSI.END)
+
+
+def open_trace(path, open_browser, origin):
+  # We reuse the HTTP+RPC port because it's the only one allowed by the CSP.
+  PORT = 9001
+  path = os.path.abspath(path)
+  os.chdir(os.path.dirname(path))
+  fname = os.path.basename(path)
+  socketserver.TCPServer.allow_reuse_address = True
+  with socketserver.TCPServer(('127.0.0.1', PORT), HttpHandler) as httpd:
+    address = f'{origin}/#!/?url=http://127.0.0.1:{PORT}/{fname}'
+    if open_browser:
+      webbrowser.open_new_tab(address)
+    else:
+      print(f'Open URL in browser: {address}')
+
+    httpd.expected_fname = fname
+    httpd.fname_get_completed = None
+    httpd.allow_origin = origin
+    while httpd.fname_get_completed is None:
+      httpd.handle_request()
+
+
+def main():
+  examples = '\n'.join(
+      [ANSI.BOLD + 'Usage:' + ANSI.END, '  -i path/trace_file_name [-n]'])
+  parser = argparse.ArgumentParser(
+      epilog=examples, formatter_class=argparse.RawTextHelpFormatter)
+
+  help = 'Input trace filename'
+  parser.add_argument('-i', '--trace', help=help)
+  parser.add_argument(
+      '-n', '--no-open-browser', action='store_true', default=False)
+  parser.add_argument('--origin', default='https://ui.perfetto.dev')
+
+  args = parser.parse_args()
+  trace_file = args.trace
+  open_browser = not args.no_open_browser
+
+  if trace_file is None:
+    prt('Please specify trace file name with -i/--trace argument', ANSI.RED)
+    sys.exit(1)
+  elif not os.path.exists(trace_file):
+    prt('%s not found ' % trace_file, ANSI.RED)
+    sys.exit(1)
+
+  prt('Opening the trace (%s) in the browser' % trace_file)
+  open_trace(trace_file, open_browser, args.origin)
+
+
+if __name__ == '__main__':
+  sys.exit(main())

From 64d363949771569abd019d864a7d67910cd47606 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 5 Nov 2023 16:56:28 -0800
Subject: [PATCH 554/901] kram - more work on perfetto dump, added TODO: and
 comments about limits of Perfetto json importer.

---
 scripts/GpuMemDumpPerfetto.py | 82 +++++++++++++++++++++++++++--------
 1 file changed, 64 insertions(+), 18 deletions(-)

diff --git a/scripts/GpuMemDumpPerfetto.py b/scripts/GpuMemDumpPerfetto.py
index 43128c9a..cd12626b 100755
--- a/scripts/GpuMemDumpPerfetto.py
+++ b/scripts/GpuMemDumpPerfetto.py
@@ -36,7 +36,19 @@
 PROGRAM_VERSION = 'Vulkan/D3D12 Memory Allocator Dump Perfetto 3.0.3'
 
 # TODO: Perfetto can't handle empty string for name on the allocs
-# so had to set them to 'M'
+#   so had to set them to 'M'
+# TODO: cname doesn't seem to import and has limited colors
+#  cname is ignored by parser
+#  https://github.com/google/perfetto/blob/master/src/trace_processor/importers/json/json_trace_parser.cc
+# TODO: need TO_TIMECODE or something to display the values with formatting
+#  can call 'set timestamp format' from UI, but this doesn't affect duration display
+#  which is still formatted.
+# TODO: could vsync markers be used to demarkate 4 or 8mb sections?
+#
+# TODO: Pefetto doesn't care about supporting Cataylst json format, but doesn't have a json of its own
+# https://github.com/google/perfetto/issues/622
+# https://github.com/google/perfetto/issues/623
+
 
 # dx12 or vulkan
 currentApi = ""
@@ -45,8 +57,11 @@
 data = {}
 
  # now convert the dictonaries to new dictionaries, and then out to json 
+ # TODO: ms = *1e-3 not quite kb, ns = *1E-9
+ # when using ms, then values can hit minute and hour time conversions which /60 instead of /100,
+ # but ns is also goofy due to 1e9 being gb.
 perfettoDict = {
-    'displayTimeUnit': 'ns', # TODO: /1000, not quite kb, ns or ms only
+    'displayTimeUnit': 'ms', 
     'systemTraceEvents': 'SystemTraceData',
     'traceEvents': [],
 }
@@ -115,12 +130,13 @@ def RemoveEmptyType():
         if empty:
             del data[poolType]
 
+
 def AllocTypeToCategory(type, usage):
     global currentApi
     if type == 'FREE':
-        return ""
+        return "  "
     elif type == 'UNKNOWN':
-        return "?"
+        return "??"
 
     if currentApi == 'Vulkan':
         if type == 'BUFFER':
@@ -168,29 +184,57 @@ def AllocTypeToCategory(type, usage):
     assert False
     return "??"
 
-def AddTraceEventsAlloc(alloc, address, blockCounter):
+# not many cname colors to choose from
+# https://github.com/catapult-project/catapult/blob/master/tracing/tracing/base/color_scheme.html
+def AllocCategoryToColor(category):
+    color = "grey"
+
+    if category[1] == 'B':
+        if category == 'VB':
+            color = "olive"
+        elif category == 'IB':
+            color = "white"
+        else:
+            color = "white"
+    elif category[1] == 'T':
+        color = "yellow"
+    
+    return color
+                     
+def AddTraceEventsAlloc(alloc, addr, blockCounter):
     global perfettoDict
 
     size = alloc['Size']
-    type = AllocTypeToCategory(alloc['Type'], alloc['Usage'])
+    category = AllocTypeToCategory(alloc['Type'], alloc['Usage'])
+    
+    # this is optinonal, Pefetto will psuedocolor different names
+    # but this is one option for consistent coloring
+    # perfetto doesn't seem to honor set cname
+    # https://github.com/catapult-project/catapult/blob/master/tracing/tracing/base/color_scheme.html
+    #color = AllocCategoryToColor(category)
 
     # complete event is much less data than B/E
     perfettoDict['traceEvents'].append({
         'name': alloc['Name'],
         'ph': 'X',
-        'ts': int(address),
+        'ts': int(addr),
         'dur': int(size),
         'tid': int(blockCounter),
         #'pid': 0,
-        'cat': type
+        #'cname': color, 
+        'cat': category
     })
 
 def AddTraceEventsBlock(block, blockCounter):
     global perfettoDict
-    address = 0
+
+    # TODO: could collect together contig empty blocks.  Lots of 'M' values otherwise.
+    #  this would require passing down size 
+       
+    addr = int(0)
     for alloc in block['Suballocations']:
-        AddTraceEventsAlloc(alloc, address, blockCounter)
-        address += alloc['Size']
+        AddTraceEventsAlloc(alloc, addr, blockCounter)
+        addr = addr + int(alloc['Size'])
 
 def AddBlockName(blockName, blockCounter):
     global perfettoDict
@@ -210,14 +254,17 @@ def AddTraceEvents():
     global perfettoDict
     blockCounter = 0
     poolIndex = 0
+
     for poolData in data.values():
 
         # report for default pool
 
+        # TODO: add all block names first across all pools, then add the allocations
+       
         # block allocs
         blockIndex = 0
         for block in poolData['Blocks']:
-            blockName = "p{} block{} {}".format(poolIndex, blockIndex, block['ID'])
+            blockName = "T{} b{} {}".format(poolIndex, blockIndex, block['ID'])
             AddBlockName(blockName, blockCounter)
             AddTraceEventsBlock(block, blockCounter)
             blockCounter += 1
@@ -226,20 +273,19 @@ def AddTraceEvents():
         # dedicated allocs
         allocationIndex = 0
         for dedicatedAlloc in poolData['DedicatedAllocations']:
-            blockName = 'p{} alloc{} {}'.format(poolIndex, allocationIndex, dedicatedAlloc['Name'])
+            blockName = 'T{} a{} {}'.format(poolIndex, allocationIndex, dedicatedAlloc['Name'])
             AddBlockName(blockName, blockCounter)
             AddTraceEventsAlloc(dedicatedAlloc, 0, blockCounter)
             blockCounter += 1
             allocationIndex += 1
             
         # repeat for custom pools
-        for customPoolData in poolData['CustomPools'].values():
-            customPoolName = "" # TODO: hook up, but it's not a field customPoolData['Name']
-
+        for customPoolName, customPoolData in poolData['CustomPools'].items():
+           
             # pool block allocs
             blockIndex = 0
             for block in customPoolData['Blocks']:
-                blockName = 'p{} {} block{} {}'.format(poolIndex, customPoolName, blockIndex, block['ID'])
+                blockName = 'T{} {} b{} {}'.format(poolIndex, customPoolName, blockIndex, block['ID'])
                 AddBlockName(blockName, blockCounter)
                 AddTraceEventsBlock(block, blockCounter)
                 blockCounter += 1
@@ -248,7 +294,7 @@ def AddTraceEvents():
             # pool dedicated allocs
             allocationIndex = 0
             for dedicatedAlloc in customPoolData['DedicatedAllocations']:
-                blockName = 'p{} {} alloc{} {}'.format(poolIndex, customPoolName, allocationIndex, dedicatedAlloc['Name'])
+                blockName = 'T{} {} a{} {}'.format(poolIndex, customPoolName, allocationIndex, dedicatedAlloc['Name'])
                 AddBlockName(blockName, blockCounter)
                 AddTraceEventsAlloc(dedicatedAlloc, 0, blockCounter)
                 blockCounter += 1

From 88d1ffc90e8aa4d0c8b1cdf0c83d3b1be7f30676 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 5 Nov 2023 17:39:53 -0800
Subject: [PATCH 555/901] kram - prepend category to dump, tried a rename call
 but then Perfetto doesn't pseudocolor, fix buffer breakdown

---
 scripts/GpuMemDumpPerfetto.py | 46 +++++++++++++++++++++++++++++------
 1 file changed, 39 insertions(+), 7 deletions(-)

diff --git a/scripts/GpuMemDumpPerfetto.py b/scripts/GpuMemDumpPerfetto.py
index cd12626b..8bf6bdaa 100755
--- a/scripts/GpuMemDumpPerfetto.py
+++ b/scripts/GpuMemDumpPerfetto.py
@@ -56,6 +56,10 @@
 # input data dictionary
 data = {}
 
+# remap names to index for obfuscation, can then share mem maps
+nameIndexer = { "": 0 }
+nameIndexerCounter = 0
+
  # now convert the dictonaries to new dictionaries, and then out to json 
  # TODO: ms = *1e-3 not quite kb, ns = *1E-9
  # when using ms, then values can hit minute and hour time conversions which /60 instead of /100,
@@ -141,11 +145,12 @@ def AllocTypeToCategory(type, usage):
     if currentApi == 'Vulkan':
         if type == 'BUFFER':
             if (usage & 0x1C0) != 0: # INDIRECT_BUFFER | VERTEX_BUFFER | INDEX_BUFFER
-                isVB = True # TODO: split up
-                if isVB: 
+                if (usage & 0x080) != 0:
                     return "VB"
-                else: 
-                    return "IB" 
+                elif (usage & 0x040) != 0:
+                    return "IB"
+                elif (usage & 0x0100) != 0:
+                    return "DB"
             elif (usage & 0x28) != 0: # STORAGE_BUFFER | STORAGE_TEXEL_BUFFER
                 return "SB"
             elif (usage & 0x14) != 0: # UNIFORM_BUFFER | UNIFORM_TEXEL_BUFFER
@@ -194,16 +199,37 @@ def AllocCategoryToColor(category):
             color = "olive"
         elif category == 'IB':
             color = "white"
+        elif category == 'DB':
+            color = "white"
         else:
             color = "white"
     elif category[1] == 'T':
         color = "yellow"
     
     return color
-                     
+
+# a way to obscure names, so can share maps publicly
+def RemapName(name):
+
+    # TODO: perfetto doesn't uniquely color the names if using numbers
+    # or even N + num when they differ.  Find out what criteria needs.
+    useNameIndexer = False
+    if useNameIndexer:
+        global nameIndexer
+        global nameIndexerCounter
+        
+        if name in nameIndexer:
+            name = str(nameIndexer[name])
+        else:
+            nameIndexer[name] = nameIndexerCounter
+            name = str(nameIndexerCounter)
+            nameIndexerCounter += 1
+
+    return name
+
 def AddTraceEventsAlloc(alloc, addr, blockCounter):
     global perfettoDict
-
+    
     size = alloc['Size']
     category = AllocTypeToCategory(alloc['Type'], alloc['Usage'])
     
@@ -213,9 +239,15 @@ def AddTraceEventsAlloc(alloc, addr, blockCounter):
     # https://github.com/catapult-project/catapult/blob/master/tracing/tracing/base/color_scheme.html
     #color = AllocCategoryToColor(category)
 
+    name = RemapName(alloc['Name'])
+
+    # prepend category
+    if (alloc['Type'] != 'FREE'):
+        name = category + "-" + name
+
     # complete event is much less data than B/E
     perfettoDict['traceEvents'].append({
-        'name': alloc['Name'],
+        'name': name,
         'ph': 'X',
         'ts': int(addr),
         'dur': int(size),

From 7d539ba5d0b82e670e507d975fb62bf1bd470e74 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 5 Nov 2023 19:53:53 -0800
Subject: [PATCH 556/901] kram - fix memtype, sort blocks before output

---
 scripts/GpuMemDumpPerfetto.py | 40 ++++++++++++++++++++++-------------
 1 file changed, 25 insertions(+), 15 deletions(-)

diff --git a/scripts/GpuMemDumpPerfetto.py b/scripts/GpuMemDumpPerfetto.py
index 8bf6bdaa..cab2c4f8 100755
--- a/scripts/GpuMemDumpPerfetto.py
+++ b/scripts/GpuMemDumpPerfetto.py
@@ -43,8 +43,8 @@
 # TODO: need TO_TIMECODE or something to display the values with formatting
 #  can call 'set timestamp format' from UI, but this doesn't affect duration display
 #  which is still formatted.
-# TODO: could vsync markers be used to demarkate 4 or 8mb sections?
-#
+# TODO: could vsync markers be used to demarkate 4 or 8mb sections?  
+#  Workaround: Just set timestamp to seconds.
 # TODO: Pefetto doesn't care about supporting Cataylst json format, but doesn't have a json of its own
 # https://github.com/google/perfetto/issues/622
 # https://github.com/google/perfetto/issues/623
@@ -212,7 +212,7 @@ def AllocCategoryToColor(category):
 def RemapName(name):
 
     # TODO: perfetto doesn't uniquely color the names if using numbers
-    # or even N + num when they differ.  Find out what criteria needs.
+    # or even N + num when they differ.  Find out what color criteria needs.
     useNameIndexer = False
     if useNameIndexer:
         global nameIndexer
@@ -242,11 +242,11 @@ def AddTraceEventsAlloc(alloc, addr, blockCounter):
     name = RemapName(alloc['Name'])
 
     # prepend category
-    if (alloc['Type'] != 'FREE'):
+    prependCategory = True
+    if (prependCategory and alloc['Type'] != 'FREE'):
         name = category + "-" + name
 
-    # complete event is much less data than B/E
-    perfettoDict['traceEvents'].append({
+    traceEvent = {
         'name': name,
         'ph': 'X',
         'ts': int(addr),
@@ -255,7 +255,11 @@ def AddTraceEventsAlloc(alloc, addr, blockCounter):
         #'pid': 0,
         #'cname': color, 
         'cat': category
-    })
+    }
+
+    # complete event X is much less data than B/E
+    # these cannot be nested or overlap, so no aliasing
+    perfettoDict['traceEvents'].append(traceEvent)
 
 def AddTraceEventsBlock(block, blockCounter):
     global perfettoDict
@@ -285,17 +289,24 @@ def AddBlockName(blockName, blockCounter):
 def AddTraceEvents():
     global perfettoDict
     blockCounter = 0
-    poolIndex = 0
+    
+    # TODO: add all block names first across all pools, then add the allocations
+    # TODO: do dedicated allocations need sorted?
+    # TODO: could specify pid for memType, but think has to be stored per alloc
 
-    for poolData in data.values():
+    # for poolData in data.values():
+    for memType in sorted(data.keys()):
+        memPoolData = data[memType]
 
+        # strip 'Type ' off string
+        poolIndex = memType[5:]
+        
         # report for default pool
 
-        # TODO: add all block names first across all pools, then add the allocations
-       
+        
         # block allocs
         blockIndex = 0
-        for block in poolData['Blocks']:
+        for block in sorted(poolData['Blocks']):
             blockName = "T{} b{} {}".format(poolIndex, blockIndex, block['ID'])
             AddBlockName(blockName, blockCounter)
             AddTraceEventsBlock(block, blockCounter)
@@ -316,7 +327,7 @@ def AddTraceEvents():
            
             # pool block allocs
             blockIndex = 0
-            for block in customPoolData['Blocks']:
+            for block in sorted(customPoolData['Blocks']):
                 blockName = 'T{} {} b{} {}'.format(poolIndex, customPoolName, blockIndex, block['ID'])
                 AddBlockName(blockName, blockCounter)
                 AddTraceEventsBlock(block, blockCounter)
@@ -332,8 +343,7 @@ def AddTraceEvents():
                 blockCounter += 1
                 allocationIndex += 1
                 
-        poolIndex += 1
-
+       
 if __name__ == '__main__':
     args = ParseArgs()
     jsonSrc = json.load(open(args.DumpFile, 'rb'))

From 7e295e75a3c444c5f3ded680a8224b0113853540 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 5 Nov 2023 20:17:23 -0800
Subject: [PATCH 557/901] kram - add process name to src, compactJson,

---
 scripts/GpuMemDumpPerfetto.py | 58 ++++++++++++++++++++++-------------
 1 file changed, 37 insertions(+), 21 deletions(-)

diff --git a/scripts/GpuMemDumpPerfetto.py b/scripts/GpuMemDumpPerfetto.py
index cab2c4f8..14218747 100755
--- a/scripts/GpuMemDumpPerfetto.py
+++ b/scripts/GpuMemDumpPerfetto.py
@@ -36,18 +36,15 @@
 PROGRAM_VERSION = 'Vulkan/D3D12 Memory Allocator Dump Perfetto 3.0.3'
 
 # TODO: Perfetto can't handle empty string for name on the allocs
-#   so had to set them to 'M'
+#   so had to set them to 'M'.  This is getting fixed
 # TODO: cname doesn't seem to import and has limited colors
 #  cname is ignored by parser
 #  https://github.com/google/perfetto/blob/master/src/trace_processor/importers/json/json_trace_parser.cc
-# TODO: need TO_TIMECODE or something to display the values with formatting
-#  can call 'set timestamp format' from UI, but this doesn't affect duration display
-#  which is still formatted.
-# TODO: could vsync markers be used to demarkate 4 or 8mb sections?  
-#  Workaround: Just set timestamp to seconds.
+# TODO: call 'set timestamp format' to seconds from UI (cmd+shift+P), 
+#  but this doesn't affect duration display which is still formatted. Second lines reflect MB.
 # TODO: Pefetto doesn't care about supporting Cataylst json format, but doesn't have a json of its own
-# https://github.com/google/perfetto/issues/622
-# https://github.com/google/perfetto/issues/623
+#  https://github.com/google/perfetto/issues/622
+#  https://github.com/google/perfetto/issues/623
 
 
 # dx12 or vulkan
@@ -68,7 +65,7 @@
     'displayTimeUnit': 'ms', 
     'systemTraceEvents': 'SystemTraceData',
     'traceEvents': [],
-}
+}    
 
 def ParseArgs():
     argParser = argparse.ArgumentParser(description='Visualization of Vulkan/D3D12 Memory Allocator JSON dump in Perfetto.')
@@ -144,17 +141,16 @@ def AllocTypeToCategory(type, usage):
 
     if currentApi == 'Vulkan':
         if type == 'BUFFER':
-            if (usage & 0x1C0) != 0: # INDIRECT_BUFFER | VERTEX_BUFFER | INDEX_BUFFER
-                if (usage & 0x080) != 0:
-                    return "VB"
-                elif (usage & 0x040) != 0:
-                    return "IB"
-                elif (usage & 0x0100) != 0:
-                    return "DB"
-            elif (usage & 0x28) != 0: # STORAGE_BUFFER | STORAGE_TEXEL_BUFFER
-                return "SB"
-            elif (usage & 0x14) != 0: # UNIFORM_BUFFER | UNIFORM_TEXEL_BUFFER
+            if (usage & 0x0080) != 0:  # VERTEX_BUFFER
+                return "VB"
+            elif (usage & 0x040) != 0: # INDEX_BUFFER
+                return "IB"
+            elif (usage & 0x0014) != 0: # UNIFORM_BUFFER | UNIFORM_TEXEL_BUFFER
                 return "UB"
+            elif (usage & 0x0100) != 0: # INDIRECT_BUFFER
+                return "DB"
+            elif (usage & 0x0028) != 0: # STORAGE_BUFFER | STORAGE_TEXEL_BUFFER
+                return "SB"
             else:
                 return "?B"
         elif type == 'IMAGE_OPTIMAL':
@@ -286,6 +282,20 @@ def AddBlockName(blockName, blockCounter):
         }
     })
 
+def AddProcessName(processName):
+    global perfettoDict
+
+    # TODO: would be nice to add at top, rather than intermingled
+    perfettoDict['traceEvents'].append({
+        'name': 'process_name',
+        'ph': 'M',
+        'pid': 0,
+        'args': {
+            'name': processName
+        }
+    })
+
+
 def AddTraceEvents():
     global perfettoDict
     blockCounter = 0
@@ -393,10 +403,16 @@ def AddTraceEvents():
         exit(1)
     RemoveEmptyType()
     
+    # using process name to indicate source file
+    AddProcessName(args.DumpFile)
+
     AddTraceEvents()
 
-    # perfettoJson = json.dumps(perfettoDict, indent=4)
-    perfettoJson = json.dumps(perfettoDict, indent=0)
+    compactJson = False
+    if compactJson:
+         perfettoJson = json.dumps(perfettoDict)
+    else:
+        perfettoJson = json.dumps(perfettoDict, indent=0)
     
     with open(args.output, "w") as outfile:
         outfile.write(perfettoJson)

From e7474d463578e5f4cbf841dc52d44a9cf7eb63db Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 8 Nov 2023 10:17:09 -0800
Subject: [PATCH 558/901] kram - remove sorted blocks

---
 scripts/GpuMemDumpPerfetto.py | 38 +++++++++++++++++++++++++----------
 1 file changed, 27 insertions(+), 11 deletions(-)

diff --git a/scripts/GpuMemDumpPerfetto.py b/scripts/GpuMemDumpPerfetto.py
index 14218747..1902ab60 100755
--- a/scripts/GpuMemDumpPerfetto.py
+++ b/scripts/GpuMemDumpPerfetto.py
@@ -33,19 +33,34 @@
 import argparse
 import json
 
+# this is all for doing queries, not writing out traces which only has C++ calls
+# did a pip3 install perfetto, but no docs on it https://pypi.org/project/perfetto/
+# import perfetto
+# https://android.googlesource.com/platform/external/perfetto/+/refs/heads/master/python/example.py
+# from perfetto.trace_processor import TraceProcessor, TraceProcessorConfig
+# https://perfetto.dev/docs/analysis/batch-trace-processor
+#
+# https://perfetto.dev/docs/reference/synthetic-track-event
+# No phtyon writer, and so everything has to be protobuf based
+# https://perfetto.dev/docs/design-docs/protozero
+
 PROGRAM_VERSION = 'Vulkan/D3D12 Memory Allocator Dump Perfetto 3.0.3'
 
-# TODO: Perfetto can't handle empty string for name on the allocs
-#   so had to set them to 'M'.  This is getting fixed
-# TODO: cname doesn't seem to import and has limited colors
-#  cname is ignored by parser
+# DONE: Perfetto can't handle empty string for name on the allocs
+#   so had to set them to 'M'.  This is getting fixed.
+#   https://r.android.com/2817378
+# DONE: cname doesn't seem to import and has limited colors
+#  cname is ignored by parser, there is a layer and bg color, but need to set color for a block
+#  find out how Peretto consistently colors layers by name
 #  https://github.com/google/perfetto/blob/master/src/trace_processor/importers/json/json_trace_parser.cc
-# TODO: call 'set timestamp format' to seconds from UI (cmd+shift+P), 
+#  https://github.com/google/perfetto/issues/620
+# DONE: call 'set timestamp format' to seconds from UI (cmd+shift+P), 
 #  but this doesn't affect duration display which is still formatted. Second lines reflect MB.
 # TODO: Pefetto doesn't care about supporting Cataylst json format, but doesn't have a json of its own
 #  https://github.com/google/perfetto/issues/622
 #  https://github.com/google/perfetto/issues/623
-
+# TODO: add totals, already know none are empty.  Add these to a summary track.
+#   can have count/mem of each time and potentially across all types
 
 # dx12 or vulkan
 currentApi = ""
@@ -141,7 +156,8 @@ def AllocTypeToCategory(type, usage):
 
     if currentApi == 'Vulkan':
         if type == 'BUFFER':
-            if (usage & 0x0080) != 0:  # VERTEX_BUFFER
+            # https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkBufferUsageFlagBits.html
+            if (usage & 0x0080) != 0:  # VK_USAGE_VERTEX_BUFFER_BIT
                 return "VB"
             elif (usage & 0x040) != 0: # INDEX_BUFFER
                 return "IB"
@@ -152,9 +168,10 @@ def AllocTypeToCategory(type, usage):
             elif (usage & 0x0028) != 0: # STORAGE_BUFFER | STORAGE_TEXEL_BUFFER
                 return "SB"
             else:
-                return "?B"
+                return "?B" # TODO: getting this on some buffers, so identify more
         elif type == 'IMAGE_OPTIMAL':
             # TODO: need tex type (2d, 3d, ...)
+            # https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkImageUsageFlagBits.html
             if (usage & 0x20) != 0: # DEPTH_STENCIL_ATTACHMENT
                 return "DT"
             elif (usage & 0xD0) != 0: # INPUT_ATTACHMENT | TRANSIENT_ATTACHMENT | COLOR_ATTACHMENT
@@ -313,10 +330,9 @@ def AddTraceEvents():
         
         # report for default pool
 
-        
         # block allocs
         blockIndex = 0
-        for block in sorted(poolData['Blocks']):
+        for block in poolData['Blocks']:
             blockName = "T{} b{} {}".format(poolIndex, blockIndex, block['ID'])
             AddBlockName(blockName, blockCounter)
             AddTraceEventsBlock(block, blockCounter)
@@ -337,7 +353,7 @@ def AddTraceEvents():
            
             # pool block allocs
             blockIndex = 0
-            for block in sorted(customPoolData['Blocks']):
+            for block in customPoolData['Blocks']:
                 blockName = 'T{} {} b{} {}'.format(poolIndex, customPoolName, blockIndex, block['ID'])
                 AddBlockName(blockName, blockCounter)
                 AddTraceEventsBlock(block, blockCounter)

From 8d4dea2996cc6c99ec53f8643f3e80a55c1347c0 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 8 Nov 2023 10:32:58 -0800
Subject: [PATCH 559/901] kram - sort keys

---
 scripts/GpuMemDumpPerfetto.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/scripts/GpuMemDumpPerfetto.py b/scripts/GpuMemDumpPerfetto.py
index 1902ab60..0f515247 100755
--- a/scripts/GpuMemDumpPerfetto.py
+++ b/scripts/GpuMemDumpPerfetto.py
@@ -332,7 +332,8 @@ def AddTraceEvents():
 
         # block allocs
         blockIndex = 0
-        for block in poolData['Blocks']:
+        for blockKey in storted(poolData['Blocks'].keys()):
+            block = poolData['Blocks'][blockKey]
             blockName = "T{} b{} {}".format(poolIndex, blockIndex, block['ID'])
             AddBlockName(blockName, blockCounter)
             AddTraceEventsBlock(block, blockCounter)
@@ -353,7 +354,8 @@ def AddTraceEvents():
            
             # pool block allocs
             blockIndex = 0
-            for block in customPoolData['Blocks']:
+            for blockKey in sorted(customPoolData['Blocks'].keys()):
+                block = poolData['Blocks'][blockKey]
                 blockName = 'T{} {} b{} {}'.format(poolIndex, customPoolName, blockIndex, block['ID'])
                 AddBlockName(blockName, blockCounter)
                 AddTraceEventsBlock(block, blockCounter)

From bf68c4fded33d3d41f2563865a2a44ad4db65c28 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 8 Nov 2023 11:04:03 -0800
Subject: [PATCH 560/901] kram - fix typo

---
 scripts/GpuMemDumpPerfetto.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/GpuMemDumpPerfetto.py b/scripts/GpuMemDumpPerfetto.py
index 0f515247..c7fe39fc 100755
--- a/scripts/GpuMemDumpPerfetto.py
+++ b/scripts/GpuMemDumpPerfetto.py
@@ -332,7 +332,7 @@ def AddTraceEvents():
 
         # block allocs
         blockIndex = 0
-        for blockKey in storted(poolData['Blocks'].keys()):
+        for blockKey in sorted(poolData['Blocks'].keys()):
             block = poolData['Blocks'][blockKey]
             blockName = "T{} b{} {}".format(poolIndex, blockIndex, block['ID'])
             AddBlockName(blockName, blockCounter)

From ed5a1f715ca4642270af11f1d17de950c91f33f5 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 8 Nov 2023 11:22:44 -0800
Subject: [PATCH 561/901] kram - fix memdump

---
 scripts/GpuMemDumpPerfetto.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/scripts/GpuMemDumpPerfetto.py b/scripts/GpuMemDumpPerfetto.py
index c7fe39fc..a5d25b9b 100755
--- a/scripts/GpuMemDumpPerfetto.py
+++ b/scripts/GpuMemDumpPerfetto.py
@@ -323,7 +323,7 @@ def AddTraceEvents():
 
     # for poolData in data.values():
     for memType in sorted(data.keys()):
-        memPoolData = data[memType]
+        poolData = data[memType]
 
         # strip 'Type ' off string
         poolIndex = memType[5:]
@@ -332,9 +332,8 @@ def AddTraceEvents():
 
         # block allocs
         blockIndex = 0
-        for blockKey in sorted(poolData['Blocks'].keys()):
-            block = poolData['Blocks'][blockKey]
-            blockName = "T{} b{} {}".format(poolIndex, blockIndex, block['ID'])
+        for bloc in poolData['Blocks']:
+            WblockName = "T{} b{} {}".format(poolIndex, blockIndex, block['ID'])
             AddBlockName(blockName, blockCounter)
             AddTraceEventsBlock(block, blockCounter)
             blockCounter += 1
@@ -354,8 +353,7 @@ def AddTraceEvents():
            
             # pool block allocs
             blockIndex = 0
-            for blockKey in sorted(customPoolData['Blocks'].keys()):
-                block = poolData['Blocks'][blockKey]
+            for block in customPoolData['Blocks']:
                 blockName = 'T{} {} b{} {}'.format(poolIndex, customPoolName, blockIndex, block['ID'])
                 AddBlockName(blockName, blockCounter)
                 AddTraceEventsBlock(block, blockCounter)

From dfa3fac80f2a41c84d845ce40ed38936bd0e0586 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 8 Nov 2023 22:09:20 -0800
Subject: [PATCH 562/901] kram - setting to skip free blocks, label Staging
 bufs with MB, move block names first,

---
 scripts/GpuMemDumpPerfetto.py | 74 ++++++++++++++++++++++++-----------
 1 file changed, 51 insertions(+), 23 deletions(-)

diff --git a/scripts/GpuMemDumpPerfetto.py b/scripts/GpuMemDumpPerfetto.py
index a5d25b9b..9330143b 100755
--- a/scripts/GpuMemDumpPerfetto.py
+++ b/scripts/GpuMemDumpPerfetto.py
@@ -56,7 +56,7 @@
 #  https://github.com/google/perfetto/issues/620
 # DONE: call 'set timestamp format' to seconds from UI (cmd+shift+P), 
 #  but this doesn't affect duration display which is still formatted. Second lines reflect MB.
-# TODO: Pefetto doesn't care about supporting Cataylst json format, but doesn't have a json of its own
+# TODO: Pefetto doesn't want to extend/support Cataylst json format, but doesn't have a json of its own
 #  https://github.com/google/perfetto/issues/622
 #  https://github.com/google/perfetto/issues/623
 # TODO: add totals, already know none are empty.  Add these to a summary track.
@@ -72,7 +72,7 @@
 nameIndexer = { "": 0 }
 nameIndexerCounter = 0
 
- # now convert the dictonaries to new dictionaries, and then out to json 
+ # now convert the dictionaries to new dictionaries, and then out to json 
  # TODO: ms = *1e-3 not quite kb, ns = *1E-9
  # when using ms, then values can hit minute and hour time conversions which /60 instead of /100,
  # but ns is also goofy due to 1e9 being gb.
@@ -167,6 +167,8 @@ def AllocTypeToCategory(type, usage):
                 return "DB"
             elif (usage & 0x0028) != 0: # STORAGE_BUFFER | STORAGE_TEXEL_BUFFER
                 return "SB"
+            elif (usage & 0x0003) != 0: # Staging buffer only sets 1 or 2 bit, calling this MB for memory
+                return "MB"
             else:
                 return "?B" # TODO: getting this on some buffers, so identify more
         elif type == 'IMAGE_OPTIMAL':
@@ -243,6 +245,21 @@ def RemapName(name):
 def AddTraceEventsAlloc(alloc, addr, blockCounter):
     global perfettoDict
     
+    # settings
+
+    # this makes it harder to look for strings, but Perfetto can't control color
+    # so this prepends the type/category
+    prependCategory = True
+
+    # this has a downside that empty blocks and tail end of block isn't clear
+    # but it does cut down on data 
+    skipFreeAlloc = True
+
+    isFreeAlloc = alloc['Type'] == 'FREE'
+
+    if (skipFreeAlloc and isFreeAlloc):
+        return
+
     size = alloc['Size']
     category = AllocTypeToCategory(alloc['Type'], alloc['Usage'])
     
@@ -255,8 +272,7 @@ def AddTraceEventsAlloc(alloc, addr, blockCounter):
     name = RemapName(alloc['Name'])
 
     # prepend category
-    prependCategory = True
-    if (prependCategory and alloc['Type'] != 'FREE'):
+    if (prependCategory and not isFreeAlloc):
         name = category + "-" + name
 
     traceEvent = {
@@ -288,7 +304,6 @@ def AddTraceEventsBlock(block, blockCounter):
 def AddBlockName(blockName, blockCounter):
     global perfettoDict
 
-    # TODO: would be nice to add at top, rather than intermingled
     perfettoDict['traceEvents'].append({
         'name': 'thread_name',
         'ph': 'M',
@@ -302,7 +317,6 @@ def AddBlockName(blockName, blockCounter):
 def AddProcessName(processName):
     global perfettoDict
 
-    # TODO: would be nice to add at top, rather than intermingled
     perfettoDict['traceEvents'].append({
         'name': 'process_name',
         'ph': 'M',
@@ -313,11 +327,11 @@ def AddProcessName(processName):
     })
 
 
-def AddTraceEvents():
+def AddTraceEvents(addBlockNames):
     global perfettoDict
     blockCounter = 0
     
-    # TODO: add all block names first across all pools, then add the allocations
+    # DONE: add all block names first across all pools, then add the allocations
     # TODO: do dedicated allocations need sorted?
     # TODO: could specify pid for memType, but think has to be stored per alloc
 
@@ -332,19 +346,23 @@ def AddTraceEvents():
 
         # block allocs
         blockIndex = 0
-        for bloc in poolData['Blocks']:
-            WblockName = "T{} b{} {}".format(poolIndex, blockIndex, block['ID'])
-            AddBlockName(blockName, blockCounter)
-            AddTraceEventsBlock(block, blockCounter)
+        for block in poolData['Blocks']:
+            if addBlockNames:
+                blockName = "T{} b{} {}".format(poolIndex, blockIndex, block['ID'])
+                AddBlockName(blockName, blockCounter)
+            else:
+                AddTraceEventsBlock(block, blockCounter)
             blockCounter += 1
             blockIndex += 1
 
         # dedicated allocs
         allocationIndex = 0
         for dedicatedAlloc in poolData['DedicatedAllocations']:
-            blockName = 'T{} a{} {}'.format(poolIndex, allocationIndex, dedicatedAlloc['Name'])
-            AddBlockName(blockName, blockCounter)
-            AddTraceEventsAlloc(dedicatedAlloc, 0, blockCounter)
+            if addBlockNames:
+                blockName = 'T{} a{} {}'.format(poolIndex, allocationIndex, dedicatedAlloc['Name'])
+                AddBlockName(blockName, blockCounter)
+            else:
+                AddTraceEventsAlloc(dedicatedAlloc, 0, blockCounter)
             blockCounter += 1
             allocationIndex += 1
             
@@ -354,18 +372,22 @@ def AddTraceEvents():
             # pool block allocs
             blockIndex = 0
             for block in customPoolData['Blocks']:
-                blockName = 'T{} {} b{} {}'.format(poolIndex, customPoolName, blockIndex, block['ID'])
-                AddBlockName(blockName, blockCounter)
-                AddTraceEventsBlock(block, blockCounter)
+                if addBlockNames:
+                    blockName = 'T{} {} b{} {}'.format(poolIndex, customPoolName, blockIndex, block['ID'])
+                    AddBlockName(blockName, blockCounter)
+                else:
+                    AddTraceEventsBlock(block, blockCounter)
                 blockCounter += 1
                 blockIndex += 1
 
             # pool dedicated allocs
             allocationIndex = 0
             for dedicatedAlloc in customPoolData['DedicatedAllocations']:
-                blockName = 'T{} {} a{} {}'.format(poolIndex, customPoolName, allocationIndex, dedicatedAlloc['Name'])
-                AddBlockName(blockName, blockCounter)
-                AddTraceEventsAlloc(dedicatedAlloc, 0, blockCounter)
+                if addBlockNames:
+                    blockName = 'T{} {} a{} {}'.format(poolIndex, customPoolName, allocationIndex, dedicatedAlloc['Name'])
+                    AddBlockName(blockName, blockCounter)
+                else:
+                    AddTraceEventsAlloc(dedicatedAlloc, 0, blockCounter)
                 blockCounter += 1
                 allocationIndex += 1
                 
@@ -419,12 +441,18 @@ def AddTraceEvents():
         exit(1)
     RemoveEmptyType()
     
-    # using process name to indicate source file
+    # add process name to indicate source file
     AddProcessName(args.DumpFile)
 
-    AddTraceEvents()
+    # add thread names to indicate block names
+    AddTraceEvents(True)
 
+    # add the actual memory block size/offset/name
+    AddTraceEvents(False)
+
+    # setting
     compactJson = False
+
     if compactJson:
          perfettoJson = json.dumps(perfettoDict)
     else:

From 5fa84ab63c9d708cfebe339c272bf04e5a5a540a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 21 Jan 2024 12:46:33 -0800
Subject: [PATCH 563/901] kram - replace atoi with strtol

atoi doesn't handle uint32_t or higher.
---
 kramv/KramViewerBase.cpp  |  4 ++++
 libkram/json11/json11.cpp | 18 +++++++++++-------
 libkram/kram/Kram.cpp     | 18 +++++++++---------
 libkram/kram/KramLog.cpp  |  2 +-
 libkram/kram/KramLog.h    | 26 ++++++++++++++++++++++++++
 5 files changed, 51 insertions(+), 17 deletions(-)

diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index af523e89..63b93da7 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -1,3 +1,7 @@
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
+
 #include "KramViewerBase.h"
 
 #include "json11/json11.h"
diff --git a/libkram/json11/json11.cpp b/libkram/json11/json11.cpp
index 7118fe6b..5a42d91f 100644
--- a/libkram/json11/json11.cpp
+++ b/libkram/json11/json11.cpp
@@ -19,11 +19,6 @@
  * THE SOFTWARE.
  */
 
-// Heavily modifed by Alec Miller 10/1/23
-// This codebase was frozen by DropBox with very little effort put into it.
-// And I liked the readability of the code.  Optimized with ImmutableStrings
-// and a BlockedLinearAllocator.
-
 #include "json11.h"
 #include <cassert>
 #include <cmath>
@@ -39,7 +34,16 @@
 // not including this in KramConfig.h - used for pool
 #include "BlockedLinearAllocator.h"
 
-// This now uses 600K Debug (32B node) to read 100K of json.
+// Heavily modifed by Alec Miller 10/1/23
+// This codebase was frozen by DropBox with very little effort put into it.
+// And I liked the readability of the code.  Optimized with ImmutableStrings
+// and a BlockedLinearAllocator.
+//
+// This is DOM reader/writer.  Building up stl data structures in a DOM
+// to write isn't great memory wise.  May move to a SAX writer.
+// Times to read ui rectangle file on M1 MBP 14".  1/21/24
+// Release - parsed 101 KB of json using 576 KB of memory in 14.011ms
+// Debug   - parsed 101 KB of json using 576 KB of memory in 26.779ms
 // TODO: parser doesn't handle trailing comments
 
 namespace json11 {
@@ -711,7 +715,7 @@ double JsonReader::parse_number() {
         from_chars(str + start_pos, str + i, value);
 #else
         // this is locale dependent, other bad stuff
-        value = atoi(str + start_pos);
+        value = (double)StringToInt64(str + start_pos);
 #endif
         return value;
     }
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 7c27ae56..f31e300b 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -1495,7 +1495,7 @@ static void setupTestArgs(vector<const char*>& args)
                 break;
             }
 
-            testNumber = atoi(args[i + 1]);
+            testNumber = StringToInt32(args[i + 1]);
 
             if (!kramTestCommand(testNumber, argsTest, cmd)) {
                 KLOGE("Kram", "Test %d not found\n", testNumber);
@@ -2672,7 +2672,7 @@ static int32_t kramAppEncode(vector<const char*>& args)
                 break;
             }
             
-            infoArgs.sdfThreshold = atoi(args[i]);
+            infoArgs.sdfThreshold = StringToInt32(args[i]);
             if (infoArgs.sdfThreshold < 1 || infoArgs.sdfThreshold > 255) {
                 KLOGE("Kram", "sdfThreshold arg invalid");
                 error = true;
@@ -2695,7 +2695,7 @@ static int32_t kramAppEncode(vector<const char*>& args)
                 break;
             }
 
-            infoArgs.mipMaxSize = atoi(args[i]);
+            infoArgs.mipMaxSize = StringToInt32(args[i]);
             if (infoArgs.mipMaxSize < 1 || infoArgs.mipMaxSize > 65536) {
                 KLOGE("Kram", "mipmax arg invalid");
                 error = true;
@@ -2710,7 +2710,7 @@ static int32_t kramAppEncode(vector<const char*>& args)
                 break;
             }
 
-            infoArgs.mipMinSize = atoi(args[i]);
+            infoArgs.mipMinSize = StringToInt32(args[i]);
             if (infoArgs.mipMinSize < 1 || infoArgs.mipMinSize > 65536) {
                 KLOGE("Kram", "mipmin arg invalid");
                 error = true;
@@ -2725,7 +2725,7 @@ static int32_t kramAppEncode(vector<const char*>& args)
                 break;
             }
 
-            infoArgs.mipSkip = atoi(args[i]);
+            infoArgs.mipSkip = StringToInt32(args[i]);
             if (infoArgs.mipSkip < 0 || infoArgs.mipSkip > 16) {
                 KLOGE("Kram", "mipskip arg invalid");
                 error = true;
@@ -2848,7 +2848,7 @@ static int32_t kramAppEncode(vector<const char*>& args)
                 break;
             }
 
-            infoArgs.quality = atoi(args[i]);
+            infoArgs.quality = StringToInt32(args[i]);
         }
 
         else if (isStringEqual(word, "-output") ||
@@ -2962,7 +2962,7 @@ static int32_t kramAppEncode(vector<const char*>& args)
                 error = true;
                 break;
             }
-            infoArgs.compressor.compressorLevel = atoi(args[i]);
+            infoArgs.compressor.compressorLevel = StringToInt32(args[i]);
         }
         else if (isStringEqual(word, "-zlib")) {
             infoArgs.compressor.compressorType = KTX2SupercompressionZlib;
@@ -2972,7 +2972,7 @@ static int32_t kramAppEncode(vector<const char*>& args)
                 error = true;
                 break;
             }
-            infoArgs.compressor.compressorLevel = atoi(args[i]);
+            infoArgs.compressor.compressorLevel = StringToInt32(args[i]);
         }
         else {
             KLOGE("Kram", "unexpected argument \"%s\"\n",
@@ -3281,7 +3281,7 @@ int32_t kramAppScript(vector<const char*>& args)
                 break;
             }
 
-            numJobs = atoi(args[i]);
+            numJobs = StringToInt32(args[i]);
         }
         else if (isStringEqual(word, "-v") ||
                  isStringEqual(word, "-verbose")) {
diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp
index 24dfc792..c3227d91 100644
--- a/libkram/kram/KramLog.cpp
+++ b/libkram/kram/KramLog.cpp
@@ -311,7 +311,7 @@ class AddressHelper
         
         // Note: this can provide the file/line, but requires calling out to external process
         // also nm and addr2line
-        // system("atos -o kramv.app.dSYM/Contents/Resources/DWARF/kramv -arch arm64 -l %p", address);
+        // posix_spawn("atos -o kramv.app.dSYM/Contents/Resources/DWARF/kramv -arch arm64 -l %p", address);
         
         filename.clear();
         line = 0;
diff --git a/libkram/kram/KramLog.h b/libkram/kram/KramLog.h
index c520c37a..090f0b69 100644
--- a/libkram/kram/KramLog.h
+++ b/libkram/kram/KramLog.h
@@ -81,4 +81,30 @@ size_t strlcat(char* dst, const char* src, size_t size);
 size_t strlcpy(char* dst, const char* src, size_t size);
 #endif
 
+// Note: never use atoi, it doesn't handle unsigned value with high bit set.
+inline int64_t StringToInt64(const char* num)
+{
+    char* endPtr = nullptr;
+    int64_t value = strtol(num, &endPtr, 10);
+    return value;
+}
+
+inline uint64_t StringToUInt64(const char* num)
+{
+    char* endPtr = nullptr;
+    uint64_t value = strtoul(num, &endPtr, 10);
+    return value;
+}
+
+inline int32_t StringToInt32(const char* num)
+{
+    return (int32_t)StringToInt64(num);
+}
+
+inline uint32_t StringToUInt32(const char* num)
+{
+    return (int32_t)StringToUInt64(num);
+}
+
+
 }  // namespace kram

From be4f03212720b4a9b923fbdd6d128fbbc8d86ab2 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 21 Jan 2024 18:02:47 -0800
Subject: [PATCH 564/901] kram - add json SAX writer

This is way simpler than building DOM and then writing that out to a file/string.
---
 libkram/json11/json11.cpp | 89 +++++++++++++++++++++++++++++++++++++--
 libkram/json11/json11.h   | 57 +++++++++++++++++++++++++
 2 files changed, 143 insertions(+), 3 deletions(-)

diff --git a/libkram/json11/json11.cpp b/libkram/json11/json11.cpp
index 5a42d91f..c62ebecb 100644
--- a/libkram/json11/json11.cpp
+++ b/libkram/json11/json11.cpp
@@ -41,10 +41,16 @@
 //
 // This is DOM reader/writer.  Building up stl data structures in a DOM
 // to write isn't great memory wise.  May move to a SAX writer.
-// Times to read ui rectangle file on M1 MBP 14".  1/21/24
+// Times to read font atlas file on M1 MBP 14".  1/21/24
+//
+// json11
 // Release - parsed 101 KB of json using 576 KB of memory in 14.011ms
 // Debug   - parsed 101 KB of json using 576 KB of memory in 26.779ms
-// TODO: parser doesn't handle trailing comments
+//
+// simdjson (5x faster), no mem use numbers
+// Release - parsed 101 KB of json in 3.182ms
+//
+// TODO: json11 parser doesn't handle trailing comments
 
 namespace json11 {
 
@@ -53,6 +59,82 @@ using namespace kram;
 
 //---------------------
 
+const char* JsonWriter::EscapeString(const char* str)
+{
+    size_t strLen = strlen(str);
+    
+    // first see if string needs escaped
+    bool needsEscaped = false;
+    for (size_t i = 0; i < strLen; i++) {
+        const char ch = str[i];
+        switch(ch) {
+            case '\\':
+            case '"':
+            case '\b':
+            case '\f':
+            case '\n':
+            case '\r':
+            case '\t':
+                needsEscaped = true;
+                break;
+        }
+        
+        if (needsEscaped)
+            break;
+    }
+    
+    if (!needsEscaped) {
+        return str;
+    }
+    
+    // build the escaped string
+    _escapedString.clear();
+    for (size_t i = 0; i < strLen; i++) {
+        const char ch = str[i];
+        if (ch == '\\') {
+            _escapedString.push_back('\\');
+            _escapedString.push_back('\\');
+        } else if (ch == '"') {
+            _escapedString.push_back('\\');
+            _escapedString.push_back('"');
+        } else if (ch == '\b') {
+            _escapedString.push_back('\\');
+            _escapedString.push_back('b');
+        } else if (ch == '\f') { //?
+            _escapedString.push_back('\\');
+            _escapedString.push_back('f');
+        } else if (ch == '\n') {
+            _escapedString.push_back('\\');
+            _escapedString.push_back('n');
+        } else if (ch == '\r') {
+            _escapedString.push_back('\\');
+            _escapedString.push_back('r');
+        } else if (ch == '\t') {
+            _escapedString.push_back('\\');
+            _escapedString.push_back('t');
+        } else if (static_cast<uint8_t>(ch) <= 0x1f) {
+            char buf[8];
+            snprintf(buf, sizeof buf, "\\u%04x", ch);
+            _escapedString.append(buf);
+        } else if (static_cast<uint8_t>(ch) == 0xe2 &&
+                   static_cast<uint8_t>(str[i+1]) == 0x80 &&
+                   static_cast<uint8_t>(str[i+2]) == 0xa8) {
+            _escapedString.append("\\u2028"); // line separator
+            i += 2;
+        } else if (static_cast<uint8_t>(ch) == 0xe2 &&
+                   static_cast<uint8_t>(str[i+1]) == 0x80 &&
+                   static_cast<uint8_t>(str[i+2]) == 0xa9) {
+            _escapedString.append("\\u2029"); // paragraph separator
+            i += 2;
+        } else {
+            _escapedString.push_back(ch);
+        }
+    }
+    
+    return _escapedString.c_str();
+}
+    
+/*
 void JsonWriter::writeText(const char* text) {
     *_out += text;
 }
@@ -86,7 +168,7 @@ void JsonWriter::writeString(const Json& value) {
     for (size_t i = 0, iEnd = value.count(); i < iEnd; i++) {
         const char ch = str[i];
         if (ch == '\\') {
-            writeText("\\\\"); // this is 2x \
+            writeText("\\\\"); // this is 2x
         } else if (ch == '"') {
             writeString("\\\"");
         } else if (ch == '\b') {
@@ -170,6 +252,7 @@ void JsonWriter::write(const Json &root, string& out) {
     _out = &out;
     write(root);
 }
+*/
 
 //-----------------------------
 
diff --git a/libkram/json11/json11.h b/libkram/json11/json11.h
index 15799004..41664656 100644
--- a/libkram/json11/json11.h
+++ b/libkram/json11/json11.h
@@ -64,6 +64,7 @@ class JsonReaderData;
 
 //--------------------------
 
+/* Don't want to maintain this form.  Use SAX not DOM for writer.
 // Write json nodes out to a string.  String data is encoded.
 class JsonWriter final {
 public:
@@ -90,6 +91,62 @@ class JsonWriter final {
 private:
     string* _out = nullptr;
 };
+*/
+
+// Write json nodes out to a string.  String data is encoded.
+// This is way simpler than building up stl DOM to then write it out.
+// And keys go out in the order added.
+class JsonWriter final {
+public:
+    JsonWriter(string* str) : _out(str) {}
+    
+    void pushObject(const char* key = "") {
+        if (key[0])
+            sprintf(*_out, "{\"%s\"", key);
+        else
+            _out->push_back('{');
+        
+        _stack.push_back('}');
+    }
+    void pushArray(const char* key = "") {
+        if (key[0])
+            sprintf(*_out, "[\"%s\"", key);
+        else
+            _out->push_back('[');
+        
+        _stack.push_back(']');
+    }
+    void pop() {
+        KASSERT(_stack.empty());
+        char c = _stack.back();
+        _stack.pop_back();
+        _out->push_back(c);
+    }
+    
+    void writeString(const char* key, const char* value) {
+        sprintf(*_out, "\"%s\"=\"%s\"", key, EscapeString(value));
+    }
+    void writeDouble(const char* key, double value) {
+        sprintf(*_out, "\"%s\"=\"%f\"", key, value);
+    }
+    void writeInt32(const char* key, int32_t value) {
+        sprintf(*_out, "\"%s\"=\"%d\"", key, value);
+    }
+    void writeBool(const char* key, bool value) {
+        sprintf(*_out, "\"%s\"=\"%s\"", key, value ? "true" : "false");
+    }
+    void writeNull(const char* key) {
+        sprintf(*_out, "\"%s\"=\"%s\"", key, "null");
+    }
+    
+private:
+    const char* EscapeString(const char* str);
+    
+    string* _out = nullptr;
+    string _stack;
+    string _escapedString;
+};
+
 
 //--------------------------
 

From 37650628cd0b312936c6029cbbe3e64c94049639 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 21 Jan 2024 18:56:50 -0800
Subject: [PATCH 565/901] kram - add back simdjson for perf, and fix JsonWriter

So simdjson is 5x faster at reading than json11.
Add comma and newline support to JsonWriter.
---
 kramv/KramViewerBase.cpp  | 130 ++++++++++++++++++++++++++++++++++++++
 libkram/json11/json11.cpp |   2 +-
 libkram/json11/json11.h   |  70 ++++++++++++++++----
 3 files changed, 190 insertions(+), 12 deletions(-)

diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index 63b93da7..3bf74fbb 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -4,7 +4,13 @@
 
 #include "KramViewerBase.h"
 
+// compare perf of these readers
+#define USE_SIMDJSON 1
+#if USE_SIMDJSON
+#include "simdjson/simdjson.h"
+#else
 #include "json11/json11.h"
+#endif
 
 namespace kram {
 using namespace simd;
@@ -691,6 +697,128 @@ void Data::clearAtlas() {
     _showSettings->lastAtlas = nullptr;
 }
 
+#if USE_SIMDJSON
+
+bool Data::loadAtlasFile(const char* filename)
+{
+    using namespace simdjson;
+    
+    clearAtlas();
+    
+    Timer timer;
+    
+    // can just mmap the json
+    MmapHelper mmap;
+    if (!mmap.open(filename)) {
+        KLOGE("kramv", "Failed to open %s", filename);
+        return false;
+    }
+    
+    ondemand::parser parser;
+    
+    padded_string json((const char*)mmap.data(), mmap.dataLength());
+    auto atlasProps = parser.iterate(json);
+       
+    // can we get at memory use numbers to do the parse?
+    KLOGI("kramv", "parsed %.0f KB of json in %.3fms",
+          (double)mmap.dataLength() / 1024.0,
+          timer.timeElapsedMillis());
+    
+    // Can use hover or a show all on these entries and names.
+    // Draw names on screen using system text in the upper left corner if 1
+    // if showing all, then show names across each mip level.  May want to
+    // snap to pixels on each mip level so can see overlap.
+    
+    {
+        std::vector<double> values;
+        //string_view atlasName = atlasProps["name"].get_string().value_unsafe();
+        
+        uint64_t width = atlasProps["width"].get_uint64().value_unsafe();
+        uint64_t height = atlasProps["height"].get_uint64().value_unsafe();
+    
+        uint64_t slice = atlasProps["slice"].get_uint64().value_unsafe();
+        
+        float uPad = 0.0f;
+        float vPad = 0.0f;
+        
+        if (atlasProps["paduv"].get_array().error() != NO_SUCH_FIELD) {
+            values.clear();
+            for (auto value : atlasProps["paduv"])
+                values.push_back(value.get_double().value_unsafe());
+            
+            uPad = values[0];
+            vPad = values[1];
+        }
+        else if (atlasProps["padpx"].get_array().error() != NO_SUCH_FIELD) {
+            values.clear();
+            for (auto value : atlasProps["padpx"])
+                values.push_back(value.get_double().value_unsafe());
+            
+            uPad = values[0];
+            vPad = values[1];
+            
+            uPad /= width;
+            vPad /= height;
+        }
+        
+        for (auto regionProps: atlasProps["regions"])
+        {
+            string_view name = regionProps["name"].get_string().value_unsafe();
+            
+            float x = 0.0f;
+            float y = 0.0f;
+            float w = 0.0f;
+            float h = 0.0f;
+            
+            if (regionProps["ruv"].get_array().error() != NO_SUCH_FIELD)
+            {
+                values.clear();
+                for (auto value : regionProps["ruv"])
+                    values.push_back(value.get_double().value_unsafe());
+            
+                // Note: could convert pixel and mip0 size to uv.
+                // normalized uv make these easier to draw across all mips
+                x = values[0];
+                y = values[1];
+                w = values[2];
+                h = values[3];
+            }
+            else if (regionProps["rpx"].get_array().error() != NO_SUCH_FIELD)
+            {
+                values.clear();
+                for (auto value : regionProps["rpx"])
+                    values.push_back(value.get_double().value_unsafe());
+            
+                x = values[0];
+                y = values[1];
+                w = values[2];
+                h = values[3];
+                
+                // normalize to uv using the width/height
+                x /= width;
+                y /= height;
+                w /= width;
+                h /= height;
+            }
+                
+            const char* verticalProp = "f"; // regionProps["rot"];
+            bool isVertical = verticalProp && verticalProp[0] == 't';
+            
+            Atlas atlas = {(string)name, x,y, w,h, uPad,vPad, isVertical, (uint32_t)slice};
+            _showSettings->atlas.emplace_back(std::move(atlas));
+        }
+    }
+    
+    // TODO: also need to be able to bring in vector shapes
+    // maybe from svg or files written out from figma or photoshop.
+    // Can triangulate those, and use xatlas to pack those.
+    // Also xatlas can flatten out a 3d model into a chart.
+    
+    return true;
+}
+
+#else
+
 bool Data::loadAtlasFile(const char* filename)
 {
     using namespace json11;
@@ -816,6 +944,8 @@ bool Data::loadAtlasFile(const char* filename)
     return true;
 }
 
+#endif
+
 // opens archive
 bool Data::openArchive(const char * zipFilename, int32_t urlIndex)
 {
diff --git a/libkram/json11/json11.cpp b/libkram/json11/json11.cpp
index c62ebecb..a623f24c 100644
--- a/libkram/json11/json11.cpp
+++ b/libkram/json11/json11.cpp
@@ -59,7 +59,7 @@ using namespace kram;
 
 //---------------------
 
-const char* JsonWriter::EscapeString(const char* str)
+const char* JsonWriter::escapedString(const char* str)
 {
     size_t strLen = strlen(str);
     
diff --git a/libkram/json11/json11.h b/libkram/json11/json11.h
index 41664656..a4be7b4d 100644
--- a/libkram/json11/json11.h
+++ b/libkram/json11/json11.h
@@ -102,46 +102,94 @@ class JsonWriter final {
     
     void pushObject(const char* key = "") {
         if (key[0])
-            sprintf(*_out, "{\"%s\"", key);
+        {
+            sprintf(*_out, "{\"%s\":\n", key);
+        }
         else
+        {
             _out->push_back('{');
-        
+            _out->push_back('\n');
+        }
         _stack.push_back('}');
+        _isFirst.push_back(false);
     }
     void pushArray(const char* key = "") {
         if (key[0])
-            sprintf(*_out, "[\"%s\"", key);
+            sprintf(*_out, "[\"%s\":\n", key);
         else
+        {
             _out->push_back('[');
-        
+            _out->push_back('\n');
+        }
         _stack.push_back(']');
+        _isFirst.push_back(false);
     }
+    
+    // can call pop() or variants to check pairing
     void pop() {
         KASSERT(_stack.empty());
         char c = _stack.back();
-        _stack.pop_back();
+        
         _out->push_back(c);
+        _out->push_back('\n');
+        
+        _stack.pop_back();
+        _isFirst.pop_back();
+    }
+    void popObject() {
+        KASSERT(_stack.empty());
+        char c = _stack.back();
+        KASSERT(c == '}');
+        pop();
+    }
+    void popArray() {
+        KASSERT(_stack.empty());
+        char c = _stack.back();
+        KASSERT(c == ']');
+        pop();
     }
     
     void writeString(const char* key, const char* value) {
-        sprintf(*_out, "\"%s\"=\"%s\"", key, EscapeString(value));
+        writeCommaAndNewline();
+        int indent = _stack.size();
+        append_sprintf(*_out, "%*s\"%s\":\"%s\"", indent, "", key, escapedString(value));
     }
     void writeDouble(const char* key, double value) {
-        sprintf(*_out, "\"%s\"=\"%f\"", key, value);
+        writeCommaAndNewline();
+        int indent = _stack.size();
+        append_sprintf(*_out, "%*s\"%s\":\"%f\"", indent, "", key, value);
     }
     void writeInt32(const char* key, int32_t value) {
-        sprintf(*_out, "\"%s\"=\"%d\"", key, value);
+        writeCommaAndNewline();
+        int indent = _stack.size();
+        append_sprintf(*_out, "%*s\"%s\":\"%d\"", indent, "", key, value);
+        
     }
     void writeBool(const char* key, bool value) {
-        sprintf(*_out, "\"%s\"=\"%s\"", key, value ? "true" : "false");
+        writeCommaAndNewline();
+        int indent = _stack.size();
+        append_sprintf(*_out, "%*s\"%s\":\"%s\"", indent, "", key, value ? "true" : "false");
     }
     void writeNull(const char* key) {
-        sprintf(*_out, "\"%s\"=\"%s\"", key, "null");
+        writeCommaAndNewline();
+        int indent = _stack.size();
+        append_sprintf(*_out, "%*s\"%s\":\"%s\"", indent, "", key, "null");
     }
     
 private:
-    const char* EscapeString(const char* str);
+    void writeCommaAndNewline()
+    {
+        bool isFirst = _isFirst.back();
+        if (!isFirst)
+            _out->push_back(',');
+        _out->push_back('\n');
+        
+        // vector<bool> is special
+        _isFirst[_isFirst.size()-1] = true;
+    }
+    const char* escapedString(const char* str);
     
+    vector<bool> _isFirst;
     string* _out = nullptr;
     string _stack;
     string _escapedString;

From 4ad14325bc83fce54a3ce77ec5181b65c4ae1a91 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 27 Jan 2024 19:36:27 -0800
Subject: [PATCH 566/901] kramv - enable Metal 3 support (macOS 13+, iOS 16+)

Note there isn't any specific code that requires this yet, but it's a good minspec at least on macOS.  Newer Intel macOS can still run x64, but eventually I'll just drop the universal app and only support Apple Silicon.  iOS could drop back, but that's just a library that few are using.  And I'd like it to be unified on shader support.   Can start using RT, mesh shaders, descriptor indexing, etc.
---
 build2/kram.xcodeproj/project.pbxproj          |  8 ++++----
 build2/kramc.xcodeproj/project.pbxproj         |  6 ++----
 build2/kramv.xcodeproj/project.pbxproj         |  6 ++++--
 gtlf/GLTF/GLTF.xcodeproj/project.pbxproj       | 10 ++++------
 gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj |  8 ++++----
 gtlf/GLTFMTL/Source/GLTFMTLShaderBuilder.m     |  2 +-
 6 files changed, 19 insertions(+), 21 deletions(-)

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index b6dab7a6..d62802c0 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -1988,8 +1988,8 @@
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
 				HEADER_SEARCH_PATHS = "";
-				IPHONEOS_DEPLOYMENT_TARGET = 14.1;
-				MACOSX_DEPLOYMENT_TARGET = 12.0;
+				IPHONEOS_DEPLOYMENT_TARGET = 16.0;
+				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
 				MTL_FAST_MATH = YES;
 				ONLY_ACTIVE_ARCH = YES;
@@ -2075,8 +2075,8 @@
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
 				HEADER_SEARCH_PATHS = "";
-				IPHONEOS_DEPLOYMENT_TARGET = 14.1;
-				MACOSX_DEPLOYMENT_TARGET = 12.0;
+				IPHONEOS_DEPLOYMENT_TARGET = 16.0;
+				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				MTL_ENABLE_DEBUG_INFO = NO;
 				MTL_FAST_MATH = YES;
 				OTHER_CFLAGS = (
diff --git a/build2/kramc.xcodeproj/project.pbxproj b/build2/kramc.xcodeproj/project.pbxproj
index 9b88cc4b..f4a1ad2e 100644
--- a/build2/kramc.xcodeproj/project.pbxproj
+++ b/build2/kramc.xcodeproj/project.pbxproj
@@ -198,7 +198,7 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
-				MACOSX_DEPLOYMENT_TARGET = 12.0;
+				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
 				MTL_FAST_MATH = YES;
 				ONLY_ACTIVE_ARCH = YES;
@@ -262,7 +262,7 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
-				MACOSX_DEPLOYMENT_TARGET = 12.0;
+				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				MTL_ENABLE_DEBUG_INFO = NO;
 				MTL_FAST_MATH = YES;
 				OTHER_CFLAGS = (
@@ -293,7 +293,6 @@
 				GCC_WARN_SHADOW = YES;
 				GCC_WARN_STRICT_SELECTOR_MATCH = YES;
 				HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram";
-				MACOSX_DEPLOYMENT_TARGET = 12.0;
 				PRODUCT_BUNDLE_IDENTIFIER = com.hialec.kramc;
 				PRODUCT_NAME = kram;
 			};
@@ -316,7 +315,6 @@
 				GCC_WARN_SHADOW = YES;
 				GCC_WARN_STRICT_SELECTOR_MATCH = YES;
 				HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram";
-				MACOSX_DEPLOYMENT_TARGET = 12.0;
 				PRODUCT_BUNDLE_IDENTIFIER = com.hialec.kramc;
 				PRODUCT_NAME = kram;
 			};
diff --git a/build2/kramv.xcodeproj/project.pbxproj b/build2/kramv.xcodeproj/project.pbxproj
index 6018a7cf..cb8b4fcb 100644
--- a/build2/kramv.xcodeproj/project.pbxproj
+++ b/build2/kramv.xcodeproj/project.pbxproj
@@ -565,7 +565,7 @@
 					"$(PROJECT_DIR)/../libkram/kram",
 					"$(PROJECT_DIR)/../libkram",
 				);
-				MACOSX_DEPLOYMENT_TARGET = 12.0;
+				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
 				MTL_FAST_MATH = YES;
 				MTL_LANGUAGE_REVISION = UseDeploymentTarget;
@@ -636,7 +636,7 @@
 					"$(PROJECT_DIR)/../libkram/kram",
 					"$(PROJECT_DIR)/../libkram",
 				);
-				MACOSX_DEPLOYMENT_TARGET = 12.0;
+				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				MTL_ENABLE_DEBUG_INFO = NO;
 				MTL_FAST_MATH = YES;
 				MTL_LANGUAGE_REVISION = UseDeploymentTarget;
@@ -681,6 +681,7 @@
 					"@executable_path/../Frameworks",
 				);
 				MARKETING_VERSION = 2.0.0;
+				MTL_LANGUAGE_REVISION = Metal30;
 				PRODUCT_BUNDLE_IDENTIFIER = com.hialec.kramv;
 				PRODUCT_NAME = "$(TARGET_NAME)";
 			};
@@ -716,6 +717,7 @@
 					"@executable_path/../Frameworks",
 				);
 				MARKETING_VERSION = 2.0.0;
+				MTL_LANGUAGE_REVISION = Metal30;
 				OTHER_CFLAGS = (
 					"-DNDEBUG=1",
 					"-DCOMPILE_FASTL=0",
diff --git a/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj b/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj
index fd9000ba..4173bfc9 100644
--- a/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj
+++ b/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj
@@ -403,8 +403,8 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
-				IPHONEOS_DEPLOYMENT_TARGET = 14.1;
-				MACOSX_DEPLOYMENT_TARGET = 12.0;
+				IPHONEOS_DEPLOYMENT_TARGET = 16.0;
+				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				MTL_ENABLE_DEBUG_INFO = YES;
 				ONLY_ACTIVE_ARCH = YES;
 				SDKROOT = macosx;
@@ -463,8 +463,8 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
-				IPHONEOS_DEPLOYMENT_TARGET = 14.1;
-				MACOSX_DEPLOYMENT_TARGET = 12.0;
+				IPHONEOS_DEPLOYMENT_TARGET = 16.0;
+				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				MTL_ENABLE_DEBUG_INFO = NO;
 				OTHER_CFLAGS = "-DNDEBUG=1";
 				SDKROOT = macosx;
@@ -488,7 +488,6 @@
 				FRAMEWORK_VERSION = A;
 				INFOPLIST_FILE = Info.plist;
 				INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks";
-				IPHONEOS_DEPLOYMENT_TARGET = 12.0;
 				LD_RUNPATH_SEARCH_PATHS = (
 					"$(inherited)",
 					"@executable_path/../Frameworks",
@@ -519,7 +518,6 @@
 				FRAMEWORK_VERSION = A;
 				INFOPLIST_FILE = Info.plist;
 				INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks";
-				IPHONEOS_DEPLOYMENT_TARGET = 12.0;
 				LD_RUNPATH_SEARCH_PATHS = (
 					"$(inherited)",
 					"@executable_path/../Frameworks",
diff --git a/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj b/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj
index dba2d3ff..d32a1e09 100644
--- a/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj
+++ b/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj
@@ -263,8 +263,8 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
-				IPHONEOS_DEPLOYMENT_TARGET = 14.1;
-				MACOSX_DEPLOYMENT_TARGET = 12.0;
+				IPHONEOS_DEPLOYMENT_TARGET = 16.0;
+				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				MTL_ENABLE_DEBUG_INFO = YES;
 				ONLY_ACTIVE_ARCH = YES;
 				SDKROOT = macosx;
@@ -325,8 +325,8 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
-				IPHONEOS_DEPLOYMENT_TARGET = 14.1;
-				MACOSX_DEPLOYMENT_TARGET = 12.0;
+				IPHONEOS_DEPLOYMENT_TARGET = 16.0;
+				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				MTL_ENABLE_DEBUG_INFO = NO;
 				OTHER_CFLAGS = "-DNDEBUG=1";
 				SDKROOT = macosx;
diff --git a/gtlf/GLTFMTL/Source/GLTFMTLShaderBuilder.m b/gtlf/GLTFMTL/Source/GLTFMTLShaderBuilder.m
index 47a6b125..91ec211c 100644
--- a/gtlf/GLTFMTL/Source/GLTFMTLShaderBuilder.m
+++ b/gtlf/GLTFMTL/Source/GLTFMTLShaderBuilder.m
@@ -68,7 +68,7 @@ @implementation GLTFMTLShaderBuilder
     pipelineDescriptor.vertexDescriptor = vertexDescriptor;
     
     pipelineDescriptor.colorAttachments[0].pixelFormat = colorPixelFormat;
-    pipelineDescriptor.sampleCount = sampleCount;
+    pipelineDescriptor.rasterSampleCount = sampleCount;
 
     if (submesh.material.alphaMode == GLTFAlphaModeBlend) {
         pipelineDescriptor.colorAttachments[0].blendingEnabled = YES;

From 05e77346db08ff9cd32988f6019b2c4120282786 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 27 Jan 2024 19:44:20 -0800
Subject: [PATCH 567/901] kramv - more json updates, json11 can focus on being
 a reader only, bury writer impl

---
 libkram/json11/json11.cpp | 205 ++++++++++++++++++++++++++++-------
 libkram/json11/json11.h   | 219 ++++++++++----------------------------
 2 files changed, 220 insertions(+), 204 deletions(-)

diff --git a/libkram/json11/json11.cpp b/libkram/json11/json11.cpp
index a623f24c..e0edf0f5 100644
--- a/libkram/json11/json11.cpp
+++ b/libkram/json11/json11.cpp
@@ -133,7 +133,138 @@ const char* JsonWriter::escapedString(const char* str)
     
     return _escapedString.c_str();
 }
+  
+void JsonWriter::pushObject(const char* key) {
+    if (key[0])
+    {
+        KASSERT(isObject());
+        writeCommaAndNewline();
+        int indent = _stack.size();
+        sprintf(*_out, "%*s\"%s\":{\n", indent, "", key);
+    }
+    else
+    {
+        _out->push_back('{');
+        _out->push_back('\n');
+    }
+    _stack.push_back('}');
+    _isFirst.push_back(false);
+}
+void JsonWriter::pushArray(const char* key) {
+    if (key[0])
+    {
+        KASSERT(isObject());
+        writeCommaAndNewline();
+        int indent = _stack.size();
+        sprintf(*_out, "%*s\"%s\":[\n", indent, "", key);
+    }
+    else
+    {
+        _out->push_back('[');
+        _out->push_back('\n');
+    }
+    _stack.push_back(']');
+    _isFirst.push_back(false);
+}
+
+void JsonWriter::pop() {
+    KASSERT(_stack.empty());
+    char c = _stack.back();
+    
+    _out->push_back(c);
+    _out->push_back('\n');
+    
+    _stack.pop_back();
+    _isFirst.pop_back();
+}
+void JsonWriter::popObject() {
+    KASSERT(_stack.empty());
+    char c = _stack.back();
+    KASSERT(c == '}');
+    pop();
+}
+void JsonWriter::popArray() {
+    KASSERT(_stack.empty());
+    char c = _stack.back();
+    KASSERT(c == ']');
+    pop();
+}
+
+void JsonWriter::writeString(const char* key, const char* value) {
+    KASSERT(isObject());
+    writeCommaAndNewline();
+    int indent = _stack.size();
+    append_sprintf(*_out, "%*s\"%s\":\"%s\"", indent, "", key, escapedString(value));
+}
+void JsonWriter::writeDouble(const char* key, double value) {
+    KASSERT(isObject());
+    writeCommaAndNewline();
+    int indent = _stack.size();
+    append_sprintf(*_out, "%*s\"%s\":%f", indent, "", key, value);
+}
+void JsonWriter::writeInt32(const char* key, int32_t value) {
+    KASSERT(isObject());
+    writeCommaAndNewline();
+    int indent = _stack.size();
+    append_sprintf(*_out, "%*s\"%s\":\"%d\"", indent, "", key, value);
+    
+}
+void JsonWriter::writeBool(const char* key, bool value) {
+    KASSERT(isObject());
+    writeCommaAndNewline();
+    int indent = _stack.size();
+    append_sprintf(*_out, "%*s\"%s\":%s", indent, "", key, value ? "true" : "false");
+}
+void JsonWriter::writeNull(const char* key) {
+    KASSERT(isObject());
+    writeCommaAndNewline();
+    int indent = _stack.size();
+    append_sprintf(*_out, "%*s\"%s\":%s", indent, "", key, "null");
+}
+
+void JsonWriter::writeString(const char* value) {
+    KASSERT(isArray());
+    // only if in array
+    writeCommaAndNewline();
+    int indent = _stack.size();
+    append_sprintf(*_out, "%*s\"%s\"", indent, "", escapedString(value));
+}
+void JsonWriter::writeDouble(double value) {
+    KASSERT(isArray());
+    writeCommaAndNewline();
+    int indent = _stack.size();
+    append_sprintf(*_out, "%*s%f", indent, "", value);
+}
+void JsonWriter::writeInt32(int32_t value) {
+    KASSERT(isArray());
+    writeCommaAndNewline();
+    int indent = _stack.size();
+    append_sprintf(*_out, "%*s\"%d\"", indent, "", value);
+}
+void JsonWriter::writeBool(bool value) {
+    KASSERT(isArray());
+    writeCommaAndNewline();
+    int indent = _stack.size();
+    append_sprintf(*_out, "%*s%s", indent, "", value ? "true" : "false");
+}
+void JsonWriter::writeNull() {
+    KASSERT(isArray());
+    writeCommaAndNewline();
+    int indent = _stack.size();
+    append_sprintf(*_out, "%*s%s", indent, "", "null");
+}
+
+void JsonWriter::writeCommaAndNewline() {
+    bool isFirst = _isFirst.back();
+    if (!isFirst)
+        _out->push_back(',');
+    _out->push_back('\n');
     
+    // vector<bool> is special
+    _isFirst[_isFirst.size()-1] = true;
+}
+
+
 /*
 void JsonWriter::writeText(const char* text) {
     *_out += text;
@@ -448,16 +579,17 @@ bool Json::iterate(const Json*& it) const {
 
 //-------------------
 
-Json::JsonValue::JsonValue(const char* v, uint32_t count, bool allocated) : sval(v) {
-    if (allocated) {
-        // need to allocate memory here
-        sval = new char[count+1];
-        memcpy((void*)sval, v, count+1);
-    }
-}
+//Json::JsonValue::JsonValue(const char* v, uint32_t count) : sval(v) {
+//    if (allocated) {
+//        // need to allocate memory here
+//        sval = new char[count+1];
+//        memcpy((void*)sval, v, count+1);
+//    }
+//}
 
+/* This is more complex, and uneeded for writer
 Json::JsonValue::JsonValue(const Json::array& values, Type t) : aval(nullptr) {
-    /* This is more complex
+    
     assert(_data.isWriting());
     
     Json** next = &aval;
@@ -477,9 +609,9 @@ Json::JsonValue::JsonValue(const Json::array& values, Type t) : aval(nullptr) {
     }
     
     *next = nullptr;
-    */
+    
 }
-
+     */
 /////////////////////////////////
 // Parsing
 
@@ -759,10 +891,6 @@ void JsonReader::parse_string_location(uint32_t& count) {
     }
 }
 
-// This is not used in parsing, but will need to convert strings
-//   that are aliased and unescape them.
-
-
 
 // Parse a double.
 double JsonReader::parse_number() {
@@ -797,7 +925,6 @@ double JsonReader::parse_number() {
         // TODO:: switch to from_chars, int but not fp supported
         from_chars(str + start_pos, str + i, value);
 #else
-        // this is locale dependent, other bad stuff
         value = (double)StringToInt64(str + start_pos);
 #endif
         return value;
@@ -832,6 +959,7 @@ double JsonReader::parse_number() {
     }
 
 #if USE_CHARCONV
+    // this stupid call, macOS doesn't even implement
     from_chars(str + start_pos, str + i, value);
 #else
     value = strtod(str + start_pos, nullptr);
@@ -935,7 +1063,7 @@ void JsonReader::parse_json(int depth, Json& parent, ImmutableString key) {
         uint32_t strCount = 0;
         parse_string_location(strCount);
         Json* json = _data->allocateJson();
-        parent.addString(json, &str[strStart], strCount, Json::FlagsAliasedEncoded, key);
+        parent.addString(json, &str[strStart], strCount, key);
         return;
     }
 
@@ -1043,10 +1171,9 @@ void Json::addJson(Json* json)
     _count++;
 }
 
-void Json::addString(Json* json, const char* str, uint32_t len, Flags flags, ImmutableString key)
+void Json::addString(Json* json, const char* str, uint32_t len, ImmutableString key)
 {
-    new (json) Json(str, len, false);
-    json->_flags = flags;
+    new (json) Json(str, len);
     if (key) json->setKey(key);
     addJson(json);
 }
@@ -1085,21 +1212,21 @@ void Json::addObject(Json* json, ImmutableString key) {
 
 //------------------------
 
-Json::Json(const Json::array &values, Json::Type t)
-    : _type(t), _count(values.size()), _value(values) {
-    assert(t == TypeObject || t == TypeArray);
-}
+//Json::Json(const Json::array &values, Json::Type t)
+//    : _type(t), _count(values.size()), _value(values) {
+//    assert(t == TypeObject || t == TypeArray);
+//}
 
-Json::~Json() {
-    switch(_type) {
-        case TypeString:
-            if (_flags == FlagsAllocatedUnencoded) {
-                delete [] _value.sval;
-                //_data.trackMemory(-_count);
-                _value.sval = nullptr;
-                _count = 0;
-            }
-            break;
+//Json::~Json() {
+//    switch(_type) {
+//        case TypeString:
+//            if (_flags == FlagsAllocatedUnencoded) {
+//                delete [] _value.sval;
+//                //_data.trackMemory(-_count);
+//                _value.sval = nullptr;
+//                _count = 0;
+//            }
+//            break;
             
 //        case TypeArray:
 //        case TypeObject:
@@ -1108,9 +1235,9 @@ Json::~Json() {
 //            _value.aval = nullptr;
 //            _count = 0;
 //            break;
-        default: break;
-    }
-}
+//        default: break;
+//    }
+//}
 
 void Json::trackMemory(int32_t size)
 {
@@ -1160,16 +1287,16 @@ const char* Json::string_value(string& str) const {
     if (!is_string())
         return "";
     
-    if (_flags == FlagsAliasedEncoded) {
+    //if (_flags == FlagsAliasedEncoded) {
         // This string length is the encoded length, so decoded should be shorter
         if (!decode_string(_value.sval, _count, str)) {
             return "";
         }
         return str.c_str();
-    }
+    //}
     
     // already not-encoded, so can return.  When written this goes through encode.
-    return _value.sval;
+    //return _value.sval;
 }
 
 
diff --git a/libkram/json11/json11.h b/libkram/json11/json11.h
index a4be7b4d..d0c66bfb 100644
--- a/libkram/json11/json11.h
+++ b/libkram/json11/json11.h
@@ -93,6 +93,7 @@ class JsonWriter final {
 };
 */
 
+// This code is part of kram, not json11.  Will move out.
 // Write json nodes out to a string.  String data is encoded.
 // This is way simpler than building up stl DOM to then write it out.
 // And keys go out in the order added.
@@ -100,96 +101,36 @@ class JsonWriter final {
 public:
     JsonWriter(string* str) : _out(str) {}
     
-    void pushObject(const char* key = "") {
-        if (key[0])
-        {
-            sprintf(*_out, "{\"%s\":\n", key);
-        }
-        else
-        {
-            _out->push_back('{');
-            _out->push_back('\n');
-        }
-        _stack.push_back('}');
-        _isFirst.push_back(false);
-    }
-    void pushArray(const char* key = "") {
-        if (key[0])
-            sprintf(*_out, "[\"%s\":\n", key);
-        else
-        {
-            _out->push_back('[');
-            _out->push_back('\n');
-        }
-        _stack.push_back(']');
-        _isFirst.push_back(false);
-    }
+    void pushObject(const char* key = "");
+    void popObject();
     
-    // can call pop() or variants to check pairing
-    void pop() {
-        KASSERT(_stack.empty());
-        char c = _stack.back();
-        
-        _out->push_back(c);
-        _out->push_back('\n');
-        
-        _stack.pop_back();
-        _isFirst.pop_back();
-    }
-    void popObject() {
-        KASSERT(_stack.empty());
-        char c = _stack.back();
-        KASSERT(c == '}');
-        pop();
-    }
-    void popArray() {
-        KASSERT(_stack.empty());
-        char c = _stack.back();
-        KASSERT(c == ']');
-        pop();
-    }
+    void pushArray(const char* key = "");
+    void popArray();
     
-    void writeString(const char* key, const char* value) {
-        writeCommaAndNewline();
-        int indent = _stack.size();
-        append_sprintf(*_out, "%*s\"%s\":\"%s\"", indent, "", key, escapedString(value));
-    }
-    void writeDouble(const char* key, double value) {
-        writeCommaAndNewline();
-        int indent = _stack.size();
-        append_sprintf(*_out, "%*s\"%s\":\"%f\"", indent, "", key, value);
-    }
-    void writeInt32(const char* key, int32_t value) {
-        writeCommaAndNewline();
-        int indent = _stack.size();
-        append_sprintf(*_out, "%*s\"%s\":\"%d\"", indent, "", key, value);
-        
-    }
-    void writeBool(const char* key, bool value) {
-        writeCommaAndNewline();
-        int indent = _stack.size();
-        append_sprintf(*_out, "%*s\"%s\":\"%s\"", indent, "", key, value ? "true" : "false");
-    }
-    void writeNull(const char* key) {
-        writeCommaAndNewline();
-        int indent = _stack.size();
-        append_sprintf(*_out, "%*s\"%s\":\"%s\"", indent, "", key, "null");
-    }
+    // keys for adding to object
+    void writeString(const char* key, const char* value);
+    void writeDouble(const char* key, double value);
+    void writeInt32(const char* key, int32_t value);
+    void writeBool(const char* key, bool value);
+    void writeNull(const char* key);
+    
+    // These are keyless for adding to an array
+    void writeString(const char* value);
+    void writeDouble(double value);
+    void writeInt32(int32_t value);
+    void writeBool(bool value);
+    void writeNull();
     
 private:
-    void writeCommaAndNewline()
-    {
-        bool isFirst = _isFirst.back();
-        if (!isFirst)
-            _out->push_back(',');
-        _out->push_back('\n');
-        
-        // vector<bool> is special
-        _isFirst[_isFirst.size()-1] = true;
-    }
+    bool isArray() const { return _stack.back() == ']'; }
+    bool isObject() const { return _stack.back() == '}'; }
+   
+    void pop();
+    
+    void writeCommaAndNewline();
     const char* escapedString(const char* str);
     
-    vector<bool> _isFirst;
+    vector<bool> _isFirst; // could store counts
     string* _out = nullptr;
     string _stack;
     string _escapedString;
@@ -302,59 +243,16 @@ class Json final {
     };
     
     // Flags for additional data on a type
-    enum Flags : uint8_t {
-        FlagsNone = 0,
-        FlagsAliasedEncoded, // needs decode on read
-        FlagsAllocatedUnencoded, // needs encode on write
-    };
-    
-    // Array/object can pass in for writer, but converted to linked nodes
-    using array = vector<Json>;
-    
-    // Constructors for the various types of JSON value.
-    Json() noexcept                  {}
-    Json(nullptr_t) noexcept         {}
-    Json(double value)               : _type(TypeNumber), _value(value) {}
-    Json(int value)                  : Json((double)value) {}
-    Json(bool value)                 : _type(TypeBoolean), _value(value) {}
-    
-    Json(const string& value)        : Json(value.c_str(), value.size())  {}
-    Json(const char* value, uint32_t count_, bool allocated = true)
-        : _type(TypeString), _flags(allocated ? FlagsAllocatedUnencoded : FlagsAliasedEncoded), _count(count_),
-          _value(value, count_, allocated)
-    { 
-        // if (allocated) trackMemory(_count);
-    }
+//    enum Flags : uint8_t {
+//        FlagsNone = 0,
+//        FlagsAliasedEncoded, // needs decode on read
+//        FlagsAllocatedUnencoded, // needs encode on write
+//    };
+    
+    // Only public for use by sNullValue
+    Json() noexcept {}
+    //~Json();
 
-    // This prevents Json(some_pointer) from accidentally producing a bool. Use
-    // Json(bool(some_pointer)) if that behavior is desired.
-    Json(void *) = delete;
-    
-    // has to recursively copy the entire tree of nodes, TODO:
-    Json(const array& values, Type type = TypeArray);
-    
-    ~Json();
-    
-    /* Don't know if these can work
-    // Implicit constructor: anything with a to_json() function.
-    template <class T, class = decltype(&T::to_json)>
-    Json(const T & t) : Json(t.to_json()) {}
-
-    // Implicit constructor: map-like objects (map, unordered_map, etc)
-    // TODO: revisit, but flatten objects to arrays
-//    template <class M, typename enable_if<
-//        is_constructible<string, decltype(declval<M>().begin()->first)>::value
-//        && is_constructible<Json, decltype(declval<M>().begin()->second)>::value,
-//            int>::type = 0>
-//    Json(const M & m) : Json(object(m.begin(), m.end())) {}
-
-    // Implicit constructor: vector-like objects (list, vector, set, etc)
-    template <class V, typename enable_if<
-        is_constructible<Json, decltype(*declval<V>().begin())>::value,
-            int>::type = 0>
-    Json(const V & v) : Json(array(v.begin(), v.end())) {}
-    */
-    
     // Accessors
     Type type() const { return _type; }
     
@@ -386,7 +284,7 @@ class Json final {
     // distinguish between integer and non-integer numbers - number_value() and int_value()
     // can both be applied to a NUMBER-typed object.
     double number_value() const { return is_number() ? _value.dval : 0.0; }
-    float double_value() const { return number_value(); }
+    double double_value() const { return number_value(); }
     float float_value() const { return (float)number_value(); }
     int int_value() const { return (int)number_value(); }
     
@@ -395,36 +293,29 @@ class Json final {
     // Return the enclosed string if this is a string, empty string otherwise
     const char* string_value(string& str) const;
 
-    // TODO: do we really need these comparisons?, typically just doing a key search
-    // only have to implement 2 operators
-    //bool operator== (const Json &rhs) const;
-    //bool operator<  (const Json &rhs) const;
-    //bool operator!= (const Json &rhs) const { return !(*this == rhs); }
-//    bool operator<= (const Json &rhs) const { return !(rhs < *this); }
-//    bool operator>  (const Json &rhs) const { return  (rhs < *this); }
-//    bool operator>= (const Json &rhs) const { return !(*this < rhs); }
-
-    // Return true if this is a JSON object and, for each item in types, has a field of
-    // the given type. If not, return false and set err to a descriptive message.
-    // typedef std::initializer_list<pair<string, Type>> shape;
-    // bool has_shape(const shape & types, string & err) const;
-
     // quickly find a node using immutable string
-    const Json & find(ImmutableString key) const;
-
-    // useful for deleting allocated string values in block allocated nodes
-    // so it does a placement delete
-    // void deleteJsonTree();
-   
+    const Json& find(ImmutableString key) const;
+    
 private:
     friend class JsonReader;
     
     // Doesn't seem to work with namespaced class
     void createRoot();
 
-    // TODO: make need to expose to build up a json hierarchy for dumping
+    // Constructors for the various types of JSON value.
+    Json(nullptr_t) noexcept         {}
+    Json(double value)               : _type(TypeNumber), _value(value) {}
+    Json(bool value)                 : _type(TypeBoolean), _value(value) {}
+    
+    // only works for aliased string
+    Json(const char* value, uint32_t count_)
+        : _type(TypeString), _count(count_), _value(value)
+    {
+    }
+    
+    // Only for JsonReader
     void addJson(Json* json);
-    void addString(Json* json, const char* str, uint32_t len, Flags flags, ImmutableString key = nullptr);
+    void addString(Json* json, const char* str, uint32_t len, ImmutableString key = nullptr);
     void addNull(Json* json, ImmutableString key = nullptr);
     void addBoolean(Json* json, bool b, ImmutableString key = nullptr);
     void addNumber(Json* json, double number, ImmutableString key = nullptr);
@@ -443,13 +334,11 @@ class Json final {
     // 2B, but needs lookup table then
     //uint16_t _key = 0;
     uint16_t _padding = 0;
+    uint8_t  _padding1 = 0;
     
-    // 1B - really 3 bits
+    // 1B - really 3 bits (could pack into ptr, but check immutable align)
     Type _type = TypeNull;
     
-    // 1B - really 1-2 bits
-    Flags _flags = FlagsNone;
-    
     // 4B - count used by array/object, also by string
     uint32_t _count = 0;
     
@@ -458,8 +347,8 @@ class Json final {
         JsonValue() : aval(nullptr) { }
         JsonValue(double v) : dval(v) {}
         JsonValue(bool v) : bval(v) {}
-        JsonValue(const char* v, uint32_t count, bool allocate);
-        JsonValue(const Json::array& value, Type t = TypeArray);
+        JsonValue(const char* v) : sval(v) {}
+        //JsonValue(const Json::array& value, Type t = TypeArray);
         
         // allocated strings deleted by Json dtor which knows type
         // the rest are all just block allocated

From e892711b16f04cf352dd4dbefc778aa3d5c4bb1f Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 27 Jan 2024 20:32:07 -0800
Subject: [PATCH 568/901] kram - start of python visualizers for EASTL.py

EA only released a natvis file which is no help when running on Xcode.  And Xcode moved all their python visualizers to internal C++ code for their STL impl.  Ugh.
https://github.com/electronicarts/EASTL/issues/462
---
 scripts/EASTL.py | 47 +++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 45 insertions(+), 2 deletions(-)

diff --git a/scripts/EASTL.py b/scripts/EASTL.py
index 9655b639..cfe35ddc 100644
--- a/scripts/EASTL.py
+++ b/scripts/EASTL.py
@@ -6,9 +6,52 @@
 // https://pspdfkit.com/blog/2018/how-to-extend-lldb-to-provide-a-better-debugging-experience/debugging_support_formatters_for_lldb_in_vscode
 // TODO: size is template param
 
+// run this from .lldbinit 
+// command script import source kram/scripts/EASTL.py
+
+// can also add commands to lldb that execute when run
+
+import lldb
 
 def __lldb_init_module(debugger, internal_dict):
 	print('installing eastl formatters to lldb')
 
-	debugger.HandleCommand('type summary add --summary-string "${var.mValue}" east::array<*>')
-	debugger.HandleCommand('type summary add --summary-string "${var.mpBegin} size=${var.mpEnd}-$(var.mpBegin}" east::VectorBase<*>')
+	debugger.HandleCommand('type summary add --summary-string "value=${var.mValue} size=1" eastl::array<>')
+	debugger.HandleCommand('type summary add --summary-string "value=${var.mpBegin} size=(${var.mpEnd}-${var.mpBegin})" eastl::VectorBase<>')W
+
+// TODO:
+// eastl::unique_ptr<>
+// eastl::shared_ptr<>
+// eastl::weak_ptr<> 
+// eastl::basic_string<> length, capacity, value
+// eastl::basic_string<wchar_t> length, capacity, value (don't use this)
+// eastl::pair<> first, second
+// eastl::span<>
+// eastl::DequeBase<>
+// eastl::DequeueIterator<>
+// eastl::queue<>
+// eastl::ListBase<>
+// eastl::ListNode<>
+// eastl::ListIterator<>
+// eastl::SListBase<>
+// eastl::SListNode<>
+// eastl::SListIterator<>
+// eastl::intrusive_list_base<>
+// eastl::intrusive_list_iterator<>
+// eastl::set<>
+// eastl::rbtree<>
+// eastl::rbtree_node<>
+// eastl::rbtree_iterator<>
+// eastl::hashtable<>
+// eastl::hash_node<>
+// eastl::hashtable_iterator_base<>
+// eastl::reverse_iterator<>
+// eastl::bitset<>
+// eastl::basic_string_view<>
+// eastl::compressed_pair_imp<>
+// eastl::optional<>
+// eastl::ratio<>
+// eastl::chrono::duration<> like 7 of thse
+// eastl::reference_wrapper<>
+// eastl::any<>
+// eastl::atomic_flag<>

From 0e2a7265f7fe38b7f6219d3057c83361d11e8671 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 28 Jan 2024 17:09:50 -0800
Subject: [PATCH 569/901] kram - more json11 cleanup, remove charconv

---
 libkram/json11/json11.cpp | 94 ++-------------------------------------
 libkram/json11/json11.h   | 23 ++--------
 2 files changed, 8 insertions(+), 109 deletions(-)

diff --git a/libkram/json11/json11.cpp b/libkram/json11/json11.cpp
index e0edf0f5..47393e42 100644
--- a/libkram/json11/json11.cpp
+++ b/libkram/json11/json11.cpp
@@ -26,11 +26,6 @@
 #include <cstdio>
 #include <limits>
 
-// This sucks that clang C++17 in 2023 doesn't have float conversion impl
-#if USE_CHARCONV
-#include <charconv> // for from_chars
-#endif
-
 // not including this in KramConfig.h - used for pool
 #include "BlockedLinearAllocator.h"
 
@@ -919,14 +914,7 @@ double JsonReader::parse_number() {
 
     if (str[i] != '.' && str[i] != 'e' && str[i] != 'E'
             && (i - start_pos) <= static_cast<size_t>(numeric_limits<int>::digits10)) {
-        
-        
-#if USE_CHARCONV
-        // TODO:: switch to from_chars, int but not fp supported
-        from_chars(str + start_pos, str + i, value);
-#else
         value = (double)StringToInt64(str + start_pos);
-#endif
         return value;
     }
 
@@ -958,12 +946,7 @@ double JsonReader::parse_number() {
             i++;
     }
 
-#if USE_CHARCONV
-    // this stupid call, macOS doesn't even implement
-    from_chars(str + start_pos, str + i, value);
-#else
     value = strtod(str + start_pos, nullptr);
-#endif
     return value;
 }
 
@@ -1154,7 +1137,7 @@ void JsonReader::parse_json(int depth, Json& parent, ImmutableString key) {
 
 void Json::createRoot()
 {
-    _type = TypeArray; // TODO: should this have unique type?, basically just retrieve aval[0]
+    _type = TypeArray;
 }
 
 void Json::addJson(Json* json)
@@ -1212,33 +1195,6 @@ void Json::addObject(Json* json, ImmutableString key) {
 
 //------------------------
 
-//Json::Json(const Json::array &values, Json::Type t)
-//    : _type(t), _count(values.size()), _value(values) {
-//    assert(t == TypeObject || t == TypeArray);
-//}
-
-//Json::~Json() {
-//    switch(_type) {
-//        case TypeString:
-//            if (_flags == FlagsAllocatedUnencoded) {
-//                delete [] _value.sval;
-//                //_data.trackMemory(-_count);
-//                _value.sval = nullptr;
-//                _count = 0;
-//            }
-//            break;
-            
-//        case TypeArray:
-//        case TypeObject:
-//            // TODO: this has to free tree if there are allocated strings
-//            // but that would only be during writes?
-//            _value.aval = nullptr;
-//            _count = 0;
-//            break;
-//        default: break;
-//    }
-//}
-
 void Json::trackMemory(int32_t size)
 {
     //_data->trackMemory(size);
@@ -1287,6 +1243,9 @@ const char* Json::string_value(string& str) const {
     if (!is_string())
         return "";
     
+    // TODO: revist to see if can just return _value.sval
+    // could skip the decode if not encoded.
+    
     //if (_flags == FlagsAliasedEncoded) {
         // This string length is the encoded length, so decoded should be shorter
         if (!decode_string(_value.sval, _count, str)) {
@@ -1300,49 +1259,4 @@ const char* Json::string_value(string& str) const {
 }
 
 
-// revisit this later
-// Documented in json11.h
-//Json::array Json::parse_multi(const char* in,
-//                               size_t &parser_stop_pos,
-//                               string &err) {
-//    JsonReader parser { in, strlen(in), 0, err, false };
-//    parser_stop_pos = 0;
-//    Json::array json_vec;
-//    while (parser.i != parser.strSize && !parser.failed) {
-//        json_vec.push_back(parser.parse_json(0));
-//        if (parser.failed)
-//            break;
-//
-//        // Check for another object
-//        parser.consume_garbage();
-//        if (parser.failed)
-//            break;
-//        parser_stop_pos = parser.i;
-//    }
-//    return json_vec;
-//}
-
-/////////////////////////////////
-// Shape-checking
-
-/*
-bool Json::has_shape(const shape & types, string & err) const {
-    if (!is_object()) {
-        err = "expected JSON object, got " + dump();
-        return false;
-    }
-
-    const auto& obj_items = object_items();
-    for (auto & item : types) {
-        const auto it = obj_items.find(item.first);
-        if (it == obj_items.cend() || it->second.type() != item.second) {
-            err = "bad type for " + item.first + " in " + dump();
-            return false;
-        }
-    }
-
-    return true;
-}
-*/
-
 } // namespace json11
diff --git a/libkram/json11/json11.h b/libkram/json11/json11.h
index d0c66bfb..e4b76af2 100644
--- a/libkram/json11/json11.h
+++ b/libkram/json11/json11.h
@@ -241,18 +241,10 @@ class Json final {
         TypeArray,
         TypeObject
     };
-    
-    // Flags for additional data on a type
-//    enum Flags : uint8_t {
-//        FlagsNone = 0,
-//        FlagsAliasedEncoded, // needs decode on read
-//        FlagsAllocatedUnencoded, // needs encode on write
-//    };
-    
+        
     // Only public for use by sNullValue
     Json() noexcept {}
-    //~Json();
-
+   
     // Accessors
     Type type() const { return _type; }
     
@@ -334,6 +326,8 @@ class Json final {
     // 2B, but needs lookup table then
     //uint16_t _key = 0;
     uint16_t _padding = 0;
+    
+    // 1B
     uint8_t  _padding1 = 0;
     
     // 1B - really 3 bits (could pack into ptr, but check immutable align)
@@ -348,19 +342,10 @@ class Json final {
         JsonValue(double v) : dval(v) {}
         JsonValue(bool v) : bval(v) {}
         JsonValue(const char* v) : sval(v) {}
-        //JsonValue(const Json::array& value, Type t = TypeArray);
-        
-        // allocated strings deleted by Json dtor which knows type
-        // the rest are all just block allocated
         
         double     dval;
         bool       bval;
-        
-        // 2 string forms - aliased to mmap (terminated with ", not-escaped)
-        // not-escaped and allocated which is null terminated
         const char* sval;
-        
-        //uint32_t aval;
         Json* aval; // aliased children, chained with _next to form tree
     } _value;
     

From 073b9c92e17fe4ff1fd1cde4993e6498ccc6cde6 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 28 Jan 2024 17:12:15 -0800
Subject: [PATCH 570/901] hlslparser - update script to use
 metal-shaderconverter

This can bypass transpiling from Spriv.   HLSL -> dxc -> DXIL -> metal-shaderconverter -> metallib
---
 hlslparser/buildShaders.sh      | 126 +++++++++++++++++++++++---------
 hlslparser/shaders/ShaderHLSL.h |   2 +
 2 files changed, 92 insertions(+), 36 deletions(-)

diff --git a/hlslparser/buildShaders.sh b/hlslparser/buildShaders.sh
index 78bacb34..7e8461a3 100755
--- a/hlslparser/buildShaders.sh
+++ b/hlslparser/buildShaders.sh
@@ -7,20 +7,13 @@ mkdir -p out/win
 mkdir -p out/android
 mkdir -p out/ios
 
-# mkdir -p out/ios
-
-# for glslc testing
-#mkdir -p out/android2
-
-
 # display commands
-# set -x
+set -x
 
-# TODO: consider putting in path
 # note bash can't expand tilda, so using HOME instead
 # This only works if running from terminal, and not from Xcode
 #  so back to hardcoding the path.
-vulkanSDK="${HOME}/devref/vulkansdk/1.3.239.0/macOS/bin/"
+vulkanSDK="${HOME}/devref/vulkansdk/1.3.275.0/macOS/bin/"
 #vulkanSDK=""
 
 projectDir="${HOME}/devref/kram/hlslparser/"
@@ -28,6 +21,8 @@ projectDir="${HOME}/devref/kram/hlslparser/"
 srcDir=${projectDir}
 srcDir+="shaders/"
 
+includeDir=${srcDir}
+
 dstDir=${projectDir}
 dstDir+="out/"
 
@@ -35,7 +30,7 @@ dstDir+="out/"
 #dstDirOut+="out/"
 
 # this only pulls the release build, so testing debug won't update
-appHlslparser=../build/hlslparser/Build/Products/Release/hlslparser
+appHlslparser=${projectDir}build/hlslparser/Build/Products/Release/hlslparser
 
 appDxc=${vulkanSDK}
 appGlslc=${vulkanSDK}
@@ -46,12 +41,16 @@ appSpirvCross=${vulkanSDK}
 appDxc+="dxc"
 appGlslc+="glslc"
 appMetalMac="xcrun -sdk macosx metal"
+# sdk doesn't seem to need to be iphoneos
 appMetaliOS="xcrun -sdk macosx metal"
 
 # reflect/transpile spv
 appSpirvReflect+="spirv-reflect"
 appSpirvCross+="spirv-cross"
 
+# this has to be installed from online installer, this is v1.1
+appMetalShaderConverter="metal-shaderconverter"
+
 # TODO: also use the metal tools on Win to build
 # and already have vulkan sdk
 
@@ -63,9 +62,9 @@ parserOptions=""
 # preserve comments
 parserOptions+="-g -line "
 
+# build the metal shaders - mac
 pushd out/mac
 
-# build the metal shaders
 echo gen MSL
 ${appHlslparser} ${parserOptions} -i ${srcDir}Skinning.hlsl -o Skinning.metal
 ${appHlslparser} ${parserOptions} -i ${srcDir}Sample.hlsl -o Sample.metal
@@ -73,9 +72,9 @@ ${appHlslparser} ${parserOptions} -i ${srcDir}Compute.hlsl -o Compute.metal
 
 popd > /dev/null
 
+# build the metal shaders - ios
 pushd out/ios
 
-# build the metal shaders
 echo gen MSL
 ${appHlslparser} ${parserOptions} -i ${srcDir}Skinning.hlsl -o Skinning.metal
 ${appHlslparser} ${parserOptions} -i ${srcDir}Sample.hlsl -o Sample.metal
@@ -83,9 +82,19 @@ ${appHlslparser} ${parserOptions} -i ${srcDir}Compute.hlsl -o Compute.metal
 
 popd > /dev/null
 
+# build the hlsl shaders - android
 pushd out/android
 
-# build the hlsl shaders
+echo gen HLSL
+${appHlslparser} ${parserOptions} -i ${srcDir}Skinning.hlsl -o Skinning.hlsl
+${appHlslparser} ${parserOptions} -i ${srcDir}Sample.hlsl -o Sample.hlsl
+${appHlslparser} ${parserOptions} -i ${srcDir}Compute.hlsl -o Compute.hlsl
+
+popd > /dev/null
+
+# build the hlsl shaders - win
+pushd out/win
+
 echo gen HLSL
 ${appHlslparser} ${parserOptions} -i ${srcDir}Skinning.hlsl -o Skinning.hlsl
 ${appHlslparser} ${parserOptions} -i ${srcDir}Sample.hlsl -o Sample.hlsl
@@ -96,7 +105,7 @@ popd > /dev/null
 
 #-------------------------------
 
-# TODO: metal3.0 on M1 macOS13/iOS16
+# DONE: metal3.0 on M1 macOS13/iOS16
 # record sources into code for gpu capture (don't ship this), debug mode
 
 # O2 + size opt
@@ -107,23 +116,22 @@ testMetal=1
 if [[ $testMetal -eq 1 ]]; then
     # Metal is C++14
     metalMacOptions="-frecord-sources -g "
-    metalMacOptions+="-std=macos-metal2.4 "
+    metalMacOptions+="-std=metal3.0 "
 
     # see if HLSL compiles to MSL (requires macOS Vulkan install)
     
     echo compile mac to metallib
-    ${appMetalMac} ${metalMacOptions} -I ${srcDir} -o ${dstDir}mac/GameShaders.metallib   ${dstDir}mac/Skinning.metal ${dstDir}mac/Sample.metal ${dstDir}mac/Compute.metal
+    ${appMetalMac} ${metalMacOptions} -I ${includeDir} -o ${dstDir}mac/GameShaders.metallib   ${dstDir}mac/Skinning.metal ${dstDir}mac/Sample.metal ${dstDir}mac/Compute.metal
 
 
     metaliOSOptions="-frecord-sources -g "
-    metaliOSOptions+="-std=ios-metal2.4 "
+    metaliOSOptions+="-std=metal3.0 "
 
     echo compile iOS to metallib
-    ${appMetaliOS} ${metaliOSOptions} -I ${srcDir} -o ${dstDir}ios/GameShaders.metallib   ${dstDir}ios/Skinning.metal ${dstDir}ios/Sample.metal ${dstDir}ios/Compute.metal
+    ${appMetaliOS} ${metaliOSOptions} -I ${includeDir} -o ${dstDir}ios/GameShaders.metallib   ${dstDir}ios/Skinning.metal ${dstDir}ios/Sample.metal ${dstDir}ios/Compute.metal
 fi
 
 
-pushd out/android
 
 #-------------------------------
 
@@ -175,9 +183,41 @@ csargs+="-T cs_6_2 "
 # dxc only loads DXIL.dll on Windows
 #  https://www.wihlidal.com/blog/pipeline/2018-09-16-dxil-signing-post-compile/
 # no idea what format the refl file from dxil is?
-#echo gen DXIL with dxc
-#${appDxc} ${vsargs} -E SkinningVS -Fo win/Skinning.vert.dxil -Fc win/Skinning.vert.dxil.txt -Fre win/Skinning.vert.refl Skinning.hlsl
-#${appDxc} ${psargs} -E SkinningPS -Fo win/Skinning.frag.dxil -Fc win/Skinning.frag.dxil.txt -Fre win/Skinning.frag.refl Skinning.hlsl
+
+if [[ $testMetal -eq 1 ]]; then
+
+    pushd out/win
+
+    # echo gen DXIL with dxc
+    
+    # TODO: how to link metallib to single metallib?
+    # can this build to AIR, then build that into metallib?
+
+    # Note this isn't a win file
+    mscArgsVS="--minimum-gpu-family=Metal3 --vertex-stage-in --positionInvariance"
+    # --enable-gs-ts-emulation  --vertex-input-layout-file=<string>
+    
+    mscArgsPS="--minimum-gpu-family=Metal3"
+
+    mscArgsMac="--deployment-os=macOS --minimum-os-build-version=13.0.0"
+    mscArgsiOS="--deployment-os=iOS --minimum-os-build-version=16.0.0"
+
+    # build vert
+    ${appDxc} ${vsargs} -I ${includeDir} -E SkinningVS -Fo Skinning.vert.dxil -Fc Skinning.vert.dxil.txt -Fre Skinning.vert.refl Skinning.hlsl
+    
+    ${appMetalShaderConverter} ${mscArgsMac} ${mscArgsVS} Skinning.vert.dxil -o Skinning.vert.mac.metallib
+    ${appMetalShaderConverter} ${mscArgsiOS} ${mscArgsVS} Skinning.vert.dxil -o Skinning.vert.ios.metallib
+    
+    # build frag
+    ${appDxc} ${psargs} -I ${includeDir} -E SkinningPS -Fo Skinning.frag.dxil -Fc Skinning.frag.dxil.txt -Fre Skinning.frag.refl Skinning.hlsl
+    
+    ${appMetalShaderConverter} ${mscArgsMac} ${mscArgsPS} Skinning.frag.dxil -o Skinning.frag.mac.metallib
+    ${appMetalShaderConverter} ${mscArgsiOS} ${mscArgsPS} Skinning.frag.dxil -o Skinning.frag.ios.metallib
+    
+    popd > /dev/null
+fi
+
+# TODO: add other shaders, but also switch to for loop?
 
 
 # Optimization is also delegated to SPIRV-Tools.
@@ -186,15 +226,18 @@ csargs+="-T cs_6_2 "
 # -Os is a special set of options.  Can run custom spirv optimizations via
 # -Oconfig=--loop-unroll,--scalar-replacement=300,--eliminate-dead-code-aggressive
 
-# 1.0,1.1,1.2 default to spv1.1,1.3,1.5
-echo gen SPIRV 1.2 with dxc
-${appDxc} ${vsargs} -spirv -fspv-target-env=vulkan1.2 -E SkinningVS -Fo Skinning.vert.spv -Fc Skinning.vert.spv.txt Skinning.hlsl
-${appDxc} ${psargs} -spirv -fspv-target-env=vulkan1.2 -E SkinningPS -Fo Skinning.frag.spv -Fc Skinning.frag.spv.txt Skinning.hlsl
+# this outputs spv for android, then transpiles it to ios
+pushd out/android
+
+echo gen SPIRV 1.3 with dxc
+
+${appDxc} ${vsargs} -spirv -fspv-target-env=vulkan1.3 -I ${includeDir} -E SkinningVS -Fo Skinning.vert.spv -Fc Skinning.vert.spv.txt Skinning.hlsl
+${appDxc} ${psargs} -spirv -fspv-target-env=vulkan1.3 -I ${includeDir} -E SkinningPS -Fo Skinning.frag.spv -Fc Skinning.frag.spv.txt Skinning.hlsl
 
-${appDxc} ${vsargs} -spirv -fspv-target-env=vulkan1.2 -E SampleVS -Fo Sample.vert.spv -Fc Sample.vert.spv.txt Sample.hlsl
-${appDxc} ${psargs} -spirv -fspv-target-env=vulkan1.2 -E SamplePS -Fo Sample.frag.spv -Fc Sample.frag.spv.txt Sample.hlsl
+${appDxc} ${vsargs} -spirv -fspv-target-env=vulkan1.3 -I ${includeDir} -E SampleVS -Fo Sample.vert.spv -Fc Sample.vert.spv.txt Sample.hlsl
+${appDxc} ${psargs} -spirv -fspv-target-env=vulkan1.3 -I ${includeDir} -E SamplePS -Fo Sample.frag.spv -Fc Sample.frag.spv.txt Sample.hlsl
 
-${appDxc} ${csargs} -spirv -fspv-target-env=vulkan1.2 -E ComputeCS -Fo Compute.comp.spv -Fc Compute.comp.spv.txt Compute.hlsl
+${appDxc} ${csargs} -spirv -fspv-target-env=vulkan1.3 -I ${includeDir} -E ComputeCS -Fo Compute.comp.spv -Fc Compute.comp.spv.txt Compute.hlsl
 
 # -Fre not supported with spirv, so just use spirv-reflect
 # either yaml or random format, why can't this just output json?
@@ -204,23 +247,35 @@ ${appSpirvReflect} -y Sample.vert.spv > Sample.vert.refl
 ${appSpirvReflect} -y Sample.frag.spv > Sample.frag.refl
 ${appSpirvReflect} -y Compute.comp.spv > Compute.comp.refl
 
+popd > /dev/null
+
+# This needs spv from android for now to transpile
 if [[ $testMetal -eq 1 ]]; then
+
+    pushd out/ios
+
     #metaliOSOptions="-frecord-sources -g "
     #metaliOSOptions+="-std=ios-metal2.4 "
 
     # transpile android spirv to ios MSL for comparsion to what hlslparser MSL produces
     #  would never use this, would use hlslparser path directly or gen spirv
     #  specific for this target
-    ${appSpirvCross} --msl --msl-version 20400 --msl-ios Skinning.vert.spv --output ios/Skinning.vert.metal
-    ${appSpirvCross} --msl --msl-version 20400 --msl-ios Skinning.frag.spv --output ios/Skinning.frag.metal
-    ${appSpirvCross} --msl --msl-version 20400 --msl-ios Sample.vert.spv --output ios/Sample.vert.metal
-    ${appSpirvCross} --msl --msl-version 20400 --msl-ios Sample.frag.spv --output ios/Sample.frag.metal
-    ${appSpirvCross} --msl --msl-version 20400 --msl-ios Compute.comp.spv --output ios/Compute.comp.metal
+    spvDir=${dstDir}/android/
+    
+    ${appSpirvCross} --msl --msl-version 30000 --msl-ios ${spvDir}Skinning.vert.spv --output Skinning.vert.metal
+    ${appSpirvCross} --msl --msl-version 30000 --msl-ios ${spvDir}Skinning.frag.spv --output Skinning.frag.metal
+    ${appSpirvCross} --msl --msl-version 30000 --msl-ios ${spvDir}Sample.vert.spv --output Sample.vert.metal
+    ${appSpirvCross} --msl --msl-version 30000 --msl-ios ${spvDir}Sample.frag.spv --output Sample.frag.metal
+    ${appSpirvCross} --msl --msl-version 30000 --msl-ios ${spvDir}Compute.comp.spv --output Compute.comp.metal
 
+    # do includes survive transpile, why does this need -I ?s
     # compile to make sure code is valid
-    ${appMetaliOS} ${metaliOSOptions} -o ${dstDir}ios/GameShadersTranspile.metallib -I ${srcDir} ${dstDir}ios/Skinning.vert.metal ${dstDir}ios/Skinning.frag.metal ${dstDir}ios/Sample.vert.metal ${dstDir}ios/Sample.frag.metal ${dstDir}ios/Compute.comp.metal
+    ${appMetaliOS} ${metaliOSOptions} -o GameShadersTranspile.metallib -I ${includeDir} Skinning.vert.metal Skinning.frag.metal Sample.vert.metal Sample.frag.metal Compute.comp.metal
+    
+    popd > /dev/null
 fi
 
+
 # DONE: need to group files into library/module
 # also create a readable spv file, so can look through that
 
@@ -263,4 +318,3 @@ fi
 # -Fc <file>              Output assembly code listing file
 
 # this prints cwd if not redirected
-popd > /dev/null
diff --git a/hlslparser/shaders/ShaderHLSL.h b/hlslparser/shaders/ShaderHLSL.h
index 4ae72921..56b74b8b 100644
--- a/hlslparser/shaders/ShaderHLSL.h
+++ b/hlslparser/shaders/ShaderHLSL.h
@@ -73,6 +73,8 @@ typedef uint64_t2 ulong2;
 typedef uint64_t3 ulong3;
 typedef uint64_t4 ulong4;
 
+// TODO: should matrices be added for long/short?
+
 //typedef float64_t double;
 typedef float64_t2 double2;
 typedef float64_t3 double3;

From 83800f2ed90c78a0ccb4f603eaffaeacf3c67f42 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 28 Jan 2024 17:12:48 -0800
Subject: [PATCH 571/901] kramv - fix renderer

---
 kramv/KramRenderer.mm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 6fedf3ee..50895c0a 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -626,7 +626,7 @@ - (void)_createComputePipelines
     MTLRenderPipelineDescriptor* pipelineStateDescriptor =
         [[MTLRenderPipelineDescriptor alloc] init];
     pipelineStateDescriptor.label = fsNameNS;
-    pipelineStateDescriptor.sampleCount = _viewFramebuffer.sampleCount;
+    pipelineStateDescriptor.rasterSampleCount = _viewFramebuffer.sampleCount;
     pipelineStateDescriptor.vertexDescriptor = _mtlVertexDescriptor;
     pipelineStateDescriptor.colorAttachments[0].pixelFormat =
         _viewFramebuffer.colorPixelFormat;

From b330fe4e613377254cf605fdb94fa24825db9b80 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 18 Feb 2024 11:16:11 -0800
Subject: [PATCH 572/901] kram - add cgltf, but not using it yet

---
 libkram/cgltf/LICENSE       |    7 +
 libkram/cgltf/README.md     |  162 +
 libkram/cgltf/cgltf.h       | 7050 +++++++++++++++++++++++++++++++++++
 libkram/cgltf/cgltf_write.h | 1506 ++++++++
 4 files changed, 8725 insertions(+)
 create mode 100644 libkram/cgltf/LICENSE
 create mode 100644 libkram/cgltf/README.md
 create mode 100644 libkram/cgltf/cgltf.h
 create mode 100644 libkram/cgltf/cgltf_write.h

diff --git a/libkram/cgltf/LICENSE b/libkram/cgltf/LICENSE
new file mode 100644
index 00000000..599d9341
--- /dev/null
+++ b/libkram/cgltf/LICENSE
@@ -0,0 +1,7 @@
+Copyright (c) 2018-2021 Johannes Kuhlmann
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/libkram/cgltf/README.md b/libkram/cgltf/README.md
new file mode 100644
index 00000000..e2aa6b40
--- /dev/null
+++ b/libkram/cgltf/README.md
@@ -0,0 +1,162 @@
+# :diamond_shape_with_a_dot_inside: cgltf
+**Single-file/stb-style C glTF loader and writer**
+
+[![Build Status](https://github.com/jkuhlmann/cgltf/workflows/build/badge.svg)](https://github.com/jkuhlmann/cgltf/actions)
+
+Used in: [bgfx](https://github.com/bkaradzic/bgfx), [Filament](https://github.com/google/filament), [gltfpack](https://github.com/zeux/meshoptimizer/tree/master/gltf), [raylib](https://github.com/raysan5/raylib), [Unigine](https://developer.unigine.com/en/docs/2.14.1/third_party?rlang=cpp#cgltf), and more!
+
+## Usage: Loading
+Loading from file:
+```c
+#define CGLTF_IMPLEMENTATION
+#include "cgltf.h"
+
+cgltf_options options = {0};
+cgltf_data* data = NULL;
+cgltf_result result = cgltf_parse_file(&options, "scene.gltf", &data);
+if (result == cgltf_result_success)
+{
+	/* TODO make awesome stuff */
+	cgltf_free(data);
+}
+```
+
+Loading from memory:
+```c
+#define CGLTF_IMPLEMENTATION
+#include "cgltf.h"
+
+void* buf; /* Pointer to glb or gltf file data */
+size_t size; /* Size of the file data */
+
+cgltf_options options = {0};
+cgltf_data* data = NULL;
+cgltf_result result = cgltf_parse(&options, buf, size, &data);
+if (result == cgltf_result_success)
+{
+	/* TODO make awesome stuff */
+	cgltf_free(data);
+}
+```
+
+Note that cgltf does not load the contents of extra files such as buffers or images into memory by default. You'll need to read these files yourself using URIs from `data.buffers[]` or `data.images[]` respectively.
+For buffer data, you can alternatively call `cgltf_load_buffers`, which will use `FILE*` APIs to open and read buffer files. This automatically decodes base64 data URIs in buffers. For data URIs in images, you will need to use `cgltf_load_buffer_base64`.
+
+**For more in-depth documentation and a description of the public interface refer to the top of the `cgltf.h` file.**
+
+## Usage: Writing
+When writing glTF data, you need a valid `cgltf_data` structure that represents a valid glTF document. You can construct such a structure yourself or load it using the loader functions described above. The writer functions do not deallocate any memory. So, you either have to do it manually or call `cgltf_free()` if you got the data by loading it from a glTF document.
+
+Writing to file:
+```c
+#define CGLTF_IMPLEMENTATION
+#define CGLTF_WRITE_IMPLEMENTATION
+#include "cgltf_write.h"
+
+cgltf_options options = {0};
+cgltf_data* data = /* TODO must be valid data */;
+cgltf_result result = cgltf_write_file(&options, "out.gltf", data);
+if (result != cgltf_result_success)
+{
+	/* TODO handle error */
+}
+```
+
+Writing to memory:
+```c
+#define CGLTF_IMPLEMENTATION
+#define CGLTF_WRITE_IMPLEMENTATION
+#include "cgltf_write.h"
+cgltf_options options = {0};
+cgltf_data* data = /* TODO must be valid data */;
+
+cgltf_size size = cgltf_write(&options, NULL, 0, data);
+
+char* buf = malloc(size);
+
+cgltf_size written = cgltf_write(&options, buf, size, data);
+if (written != size)
+{
+	/* TODO handle error */
+}
+```
+
+Note that cgltf does not write the contents of extra files such as buffers or images. You'll need to write this data yourself.
+
+**For more in-depth documentation and a description of the public interface refer to the top of the `cgltf_write.h` file.**
+
+
+## Features
+cgltf supports core glTF 2.0:
+- glb (binary files) and gltf (JSON files)
+- meshes (including accessors, buffer views, buffers)
+- materials (including textures, samplers, images)
+- scenes and nodes
+- skins
+- animations
+- cameras
+- morph targets
+- extras data
+
+cgltf also supports some glTF extensions:
+- EXT_mesh_gpu_instancing
+- EXT_meshopt_compression
+- KHR_draco_mesh_compression (requires a library like [Google's Draco](https://github.com/google/draco) for decompression though)
+- KHR_lights_punctual
+- KHR_materials_clearcoat
+- KHR_materials_emissive_strength
+- KHR_materials_ior
+- KHR_materials_iridescence
+- KHR_materials_pbrSpecularGlossiness
+- KHR_materials_sheen
+- KHR_materials_specular
+- KHR_materials_transmission
+- KHR_materials_unlit
+- KHR_materials_variants
+- KHR_materials_volume
+- KHR_materials_anisotropy
+- KHR_texture_basisu (requires a library like [Binomial Basisu](https://github.com/BinomialLLC/basis_universal) for transcoding to native compressed texture)
+- KHR_texture_transform
+
+cgltf does **not** yet support unlisted extensions. However, unlisted extensions can be accessed via "extensions" member on objects.
+
+## Building
+The easiest approach is to integrate the `cgltf.h` header file into your project. If you are unfamiliar with single-file C libraries (also known as stb-style libraries), this is how it goes:
+
+1. Include `cgltf.h` where you need the functionality.
+1. Have exactly one source file that defines `CGLTF_IMPLEMENTATION` before including `cgltf.h`.
+1. Use the cgltf functions as described above.
+
+Support for writing can be found in a separate file called `cgltf_write.h` (which includes `cgltf.h`). Building it works analogously using the `CGLTF_WRITE_IMPLEMENTATION` define.
+
+## Contributing
+Everyone is welcome to contribute to the library. If you find any problems, you can submit them using [GitHub's issue system](https://github.com/jkuhlmann/cgltf/issues). If you want to contribute code, you should fork the project and then send a pull request.
+
+
+## Dependencies
+None.
+
+C headers being used by the implementation:
+```
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <limits.h>
+#include <assert.h> // If asserts are enabled.
+```
+
+Note, this library has a copy of the [JSMN JSON parser](https://github.com/zserge/jsmn) embedded in its source.
+
+## Testing
+There is a Python script in the `test/` folder that retrieves the glTF 2.0 sample files from the glTF-Sample-Models repository (https://github.com/KhronosGroup/glTF-Sample-Models/tree/master/2.0) and runs the library against all gltf and glb files.
+
+Here's one way to build and run the test:
+
+    cd test ; mkdir build ; cd build ; cmake .. -DCMAKE_BUILD_TYPE=Debug
+    make -j
+    cd ..
+    ./test_all.py
+
+There is also a llvm-fuzz test in `fuzz/`. See http://llvm.org/docs/LibFuzzer.html for more information.
diff --git a/libkram/cgltf/cgltf.h b/libkram/cgltf/cgltf.h
new file mode 100644
index 00000000..af24c65e
--- /dev/null
+++ b/libkram/cgltf/cgltf.h
@@ -0,0 +1,7050 @@
+/**
+ * cgltf - a single-file glTF 2.0 parser written in C99.
+ *
+ * Version: 1.13
+ *
+ * Website: https://github.com/jkuhlmann/cgltf
+ *
+ * Distributed under the MIT License, see notice at the end of this file.
+ *
+ * Building:
+ * Include this file where you need the struct and function
+ * declarations. Have exactly one source file where you define
+ * `CGLTF_IMPLEMENTATION` before including this file to get the
+ * function definitions.
+ *
+ * Reference:
+ * `cgltf_result cgltf_parse(const cgltf_options*, const void*,
+ * cgltf_size, cgltf_data**)` parses both glTF and GLB data. If
+ * this function returns `cgltf_result_success`, you have to call
+ * `cgltf_free()` on the created `cgltf_data*` variable.
+ * Note that contents of external files for buffers and images are not
+ * automatically loaded. You'll need to read these files yourself using
+ * URIs in the `cgltf_data` structure.
+ *
+ * `cgltf_options` is the struct passed to `cgltf_parse()` to control
+ * parts of the parsing process. You can use it to force the file type
+ * and provide memory allocation as well as file operation callbacks.
+ * Should be zero-initialized to trigger default behavior.
+ *
+ * `cgltf_data` is the struct allocated and filled by `cgltf_parse()`.
+ * It generally mirrors the glTF format as described by the spec (see
+ * https://github.com/KhronosGroup/glTF/tree/master/specification/2.0).
+ *
+ * `void cgltf_free(cgltf_data*)` frees the allocated `cgltf_data`
+ * variable.
+ *
+ * `cgltf_result cgltf_load_buffers(const cgltf_options*, cgltf_data*,
+ * const char* gltf_path)` can be optionally called to open and read buffer
+ * files using the `FILE*` APIs. The `gltf_path` argument is the path to
+ * the original glTF file, which allows the parser to resolve the path to
+ * buffer files.
+ *
+ * `cgltf_result cgltf_load_buffer_base64(const cgltf_options* options,
+ * cgltf_size size, const char* base64, void** out_data)` decodes
+ * base64-encoded data content. Used internally by `cgltf_load_buffers()`.
+ * This is useful when decoding data URIs in images.
+ *
+ * `cgltf_result cgltf_parse_file(const cgltf_options* options, const
+ * char* path, cgltf_data** out_data)` can be used to open the given
+ * file using `FILE*` APIs and parse the data using `cgltf_parse()`.
+ *
+ * `cgltf_result cgltf_validate(cgltf_data*)` can be used to do additional
+ * checks to make sure the parsed glTF data is valid.
+ *
+ * `cgltf_node_transform_local` converts the translation / rotation / scale properties of a node
+ * into a mat4.
+ *
+ * `cgltf_node_transform_world` calls `cgltf_node_transform_local` on every ancestor in order
+ * to compute the root-to-node transformation.
+ *
+ * `cgltf_accessor_unpack_floats` reads in the data from an accessor, applies sparse data (if any),
+ * and converts them to floating point. Assumes that `cgltf_load_buffers` has already been called.
+ * By passing null for the output pointer, users can find out how many floats are required in the
+ * output buffer.
+ *
+ * `cgltf_num_components` is a tiny utility that tells you the dimensionality of
+ * a certain accessor type. This can be used before `cgltf_accessor_unpack_floats` to help allocate
+ * the necessary amount of memory. `cgltf_component_size` and `cgltf_calc_size` exist for 
+ * similar purposes.
+ *
+ * `cgltf_accessor_read_float` reads a certain element from a non-sparse accessor and converts it to
+ * floating point, assuming that `cgltf_load_buffers` has already been called. The passed-in element
+ * size is the number of floats in the output buffer, which should be in the range [1, 16]. Returns
+ * false if the passed-in element_size is too small, or if the accessor is sparse.
+ *
+ * `cgltf_accessor_read_uint` is similar to its floating-point counterpart, but limited to reading
+ * vector types and does not support matrix types. The passed-in element size is the number of uints
+ * in the output buffer, which should be in the range [1, 4]. Returns false if the passed-in 
+ * element_size is too small, or if the accessor is sparse.
+ *
+ * `cgltf_accessor_read_index` is similar to its floating-point counterpart, but it returns size_t
+ * and only works with single-component data types.
+ *
+ * `cgltf_copy_extras_json` allows users to retrieve the "extras" data that can be attached to many
+ * glTF objects (which can be arbitrary JSON data). This is a legacy function, consider using
+ * cgltf_extras::data directly instead. You can parse this data using your own JSON parser
+ * or, if you've included the cgltf implementation using the integrated JSMN JSON parser.
+ */
+#ifndef CGLTF_H_INCLUDED__
+#define CGLTF_H_INCLUDED__
+
+#include <stddef.h>
+#include <stdint.h> /* For uint8_t, uint32_t */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef size_t cgltf_size;
+typedef long long int cgltf_ssize;
+typedef float cgltf_float;
+typedef int cgltf_int;
+typedef unsigned int cgltf_uint;
+typedef int cgltf_bool;
+
+typedef enum cgltf_file_type
+{
+	cgltf_file_type_invalid,
+	cgltf_file_type_gltf,
+	cgltf_file_type_glb,
+	cgltf_file_type_max_enum
+} cgltf_file_type;
+
+typedef enum cgltf_result
+{
+	cgltf_result_success,
+	cgltf_result_data_too_short,
+	cgltf_result_unknown_format,
+	cgltf_result_invalid_json,
+	cgltf_result_invalid_gltf,
+	cgltf_result_invalid_options,
+	cgltf_result_file_not_found,
+	cgltf_result_io_error,
+	cgltf_result_out_of_memory,
+	cgltf_result_legacy_gltf,
+    cgltf_result_max_enum
+} cgltf_result;
+
+typedef struct cgltf_memory_options
+{
+	void* (*alloc_func)(void* user, cgltf_size size);
+	void (*free_func) (void* user, void* ptr);
+	void* user_data;
+} cgltf_memory_options;
+
+typedef struct cgltf_file_options
+{
+	cgltf_result(*read)(const struct cgltf_memory_options* memory_options, const struct cgltf_file_options* file_options, const char* path, cgltf_size* size, void** data);
+	void (*release)(const struct cgltf_memory_options* memory_options, const struct cgltf_file_options* file_options, void* data);
+	void* user_data;
+} cgltf_file_options;
+
+typedef struct cgltf_options
+{
+	cgltf_file_type type; /* invalid == auto detect */
+	cgltf_size json_token_count; /* 0 == auto */
+	cgltf_memory_options memory;
+	cgltf_file_options file;
+} cgltf_options;
+
+typedef enum cgltf_buffer_view_type
+{
+	cgltf_buffer_view_type_invalid,
+	cgltf_buffer_view_type_indices,
+	cgltf_buffer_view_type_vertices,
+	cgltf_buffer_view_type_max_enum
+} cgltf_buffer_view_type;
+
+typedef enum cgltf_attribute_type
+{
+	cgltf_attribute_type_invalid,
+	cgltf_attribute_type_position,
+	cgltf_attribute_type_normal,
+	cgltf_attribute_type_tangent,
+	cgltf_attribute_type_texcoord,
+	cgltf_attribute_type_color,
+	cgltf_attribute_type_joints,
+	cgltf_attribute_type_weights,
+	cgltf_attribute_type_custom,
+	cgltf_attribute_type_max_enum
+} cgltf_attribute_type;
+
+typedef enum cgltf_component_type
+{
+	cgltf_component_type_invalid,
+	cgltf_component_type_r_8, /* BYTE */
+	cgltf_component_type_r_8u, /* UNSIGNED_BYTE */
+	cgltf_component_type_r_16, /* SHORT */
+	cgltf_component_type_r_16u, /* UNSIGNED_SHORT */
+	cgltf_component_type_r_32u, /* UNSIGNED_INT */
+	cgltf_component_type_r_32f, /* FLOAT */
+    cgltf_component_type_max_enum
+} cgltf_component_type;
+
+typedef enum cgltf_type
+{
+	cgltf_type_invalid,
+	cgltf_type_scalar,
+	cgltf_type_vec2,
+	cgltf_type_vec3,
+	cgltf_type_vec4,
+	cgltf_type_mat2,
+	cgltf_type_mat3,
+	cgltf_type_mat4,
+	cgltf_type_max_enum
+} cgltf_type;
+
+typedef enum cgltf_primitive_type
+{
+	cgltf_primitive_type_points,
+	cgltf_primitive_type_lines,
+	cgltf_primitive_type_line_loop,
+	cgltf_primitive_type_line_strip,
+	cgltf_primitive_type_triangles,
+	cgltf_primitive_type_triangle_strip,
+	cgltf_primitive_type_triangle_fan,
+	cgltf_primitive_type_max_enum
+} cgltf_primitive_type;
+
+typedef enum cgltf_alpha_mode
+{
+	cgltf_alpha_mode_opaque,
+	cgltf_alpha_mode_mask,
+	cgltf_alpha_mode_blend,
+	cgltf_alpha_mode_max_enum
+} cgltf_alpha_mode;
+
+typedef enum cgltf_animation_path_type {
+	cgltf_animation_path_type_invalid,
+	cgltf_animation_path_type_translation,
+	cgltf_animation_path_type_rotation,
+	cgltf_animation_path_type_scale,
+	cgltf_animation_path_type_weights,
+	cgltf_animation_path_type_max_enum
+} cgltf_animation_path_type;
+
+typedef enum cgltf_interpolation_type {
+	cgltf_interpolation_type_linear,
+	cgltf_interpolation_type_step,
+	cgltf_interpolation_type_cubic_spline,
+	cgltf_interpolation_type_max_enum
+} cgltf_interpolation_type;
+
+typedef enum cgltf_camera_type {
+	cgltf_camera_type_invalid,
+	cgltf_camera_type_perspective,
+	cgltf_camera_type_orthographic,
+	cgltf_camera_type_max_enum
+} cgltf_camera_type;
+
+typedef enum cgltf_light_type {
+	cgltf_light_type_invalid,
+	cgltf_light_type_directional,
+	cgltf_light_type_point,
+	cgltf_light_type_spot,
+	cgltf_light_type_max_enum
+} cgltf_light_type;
+
+typedef enum cgltf_data_free_method {
+	cgltf_data_free_method_none,
+	cgltf_data_free_method_file_release,
+	cgltf_data_free_method_memory_free,
+	cgltf_data_free_method_max_enum
+} cgltf_data_free_method;
+
+typedef struct cgltf_extras {
+	cgltf_size start_offset; /* this field is deprecated and will be removed in the future; use data instead */
+	cgltf_size end_offset; /* this field is deprecated and will be removed in the future; use data instead */
+
+	char* data;
+} cgltf_extras;
+
+typedef struct cgltf_extension {
+	char* name;
+	char* data;
+} cgltf_extension;
+
+typedef struct cgltf_buffer
+{
+	char* name;
+	cgltf_size size;
+	char* uri;
+	void* data; /* loaded by cgltf_load_buffers */
+	cgltf_data_free_method data_free_method;
+	cgltf_extras extras;
+	cgltf_size extensions_count;
+	cgltf_extension* extensions;
+} cgltf_buffer;
+
+typedef enum cgltf_meshopt_compression_mode {
+	cgltf_meshopt_compression_mode_invalid,
+	cgltf_meshopt_compression_mode_attributes,
+	cgltf_meshopt_compression_mode_triangles,
+	cgltf_meshopt_compression_mode_indices,
+	cgltf_meshopt_compression_mode_max_enum
+} cgltf_meshopt_compression_mode;
+
+typedef enum cgltf_meshopt_compression_filter {
+	cgltf_meshopt_compression_filter_none,
+	cgltf_meshopt_compression_filter_octahedral,
+	cgltf_meshopt_compression_filter_quaternion,
+	cgltf_meshopt_compression_filter_exponential,
+	cgltf_meshopt_compression_filter_max_enum
+} cgltf_meshopt_compression_filter;
+
+typedef struct cgltf_meshopt_compression
+{
+	cgltf_buffer* buffer;
+	cgltf_size offset;
+	cgltf_size size;
+	cgltf_size stride;
+	cgltf_size count;
+	cgltf_meshopt_compression_mode mode;
+	cgltf_meshopt_compression_filter filter;
+} cgltf_meshopt_compression;
+
+typedef struct cgltf_buffer_view
+{
+	char *name;
+	cgltf_buffer* buffer;
+	cgltf_size offset;
+	cgltf_size size;
+	cgltf_size stride; /* 0 == automatically determined by accessor */
+	cgltf_buffer_view_type type;
+	void* data; /* overrides buffer->data if present, filled by extensions */
+	cgltf_bool has_meshopt_compression;
+	cgltf_meshopt_compression meshopt_compression;
+	cgltf_extras extras;
+	cgltf_size extensions_count;
+	cgltf_extension* extensions;
+} cgltf_buffer_view;
+
+typedef struct cgltf_accessor_sparse
+{
+	cgltf_size count;
+	cgltf_buffer_view* indices_buffer_view;
+	cgltf_size indices_byte_offset;
+	cgltf_component_type indices_component_type;
+	cgltf_buffer_view* values_buffer_view;
+	cgltf_size values_byte_offset;
+	cgltf_extras extras;
+	cgltf_extras indices_extras;
+	cgltf_extras values_extras;
+	cgltf_size extensions_count;
+	cgltf_extension* extensions;
+	cgltf_size indices_extensions_count;
+	cgltf_extension* indices_extensions;
+	cgltf_size values_extensions_count;
+	cgltf_extension* values_extensions;
+} cgltf_accessor_sparse;
+
+typedef struct cgltf_accessor
+{
+	char* name;
+	cgltf_component_type component_type;
+	cgltf_bool normalized;
+	cgltf_type type;
+	cgltf_size offset;
+	cgltf_size count;
+	cgltf_size stride;
+	cgltf_buffer_view* buffer_view;
+	cgltf_bool has_min;
+	cgltf_float min[16];
+	cgltf_bool has_max;
+	cgltf_float max[16];
+	cgltf_bool is_sparse;
+	cgltf_accessor_sparse sparse;
+	cgltf_extras extras;
+	cgltf_size extensions_count;
+	cgltf_extension* extensions;
+} cgltf_accessor;
+
+typedef struct cgltf_attribute
+{
+	char* name;
+	cgltf_attribute_type type;
+	cgltf_int index;
+	cgltf_accessor* data;
+} cgltf_attribute;
+
+typedef struct cgltf_image
+{
+	char* name;
+	char* uri;
+	cgltf_buffer_view* buffer_view;
+	char* mime_type;
+	cgltf_extras extras;
+	cgltf_size extensions_count;
+	cgltf_extension* extensions;
+} cgltf_image;
+
+typedef struct cgltf_sampler
+{
+	char* name;
+	cgltf_int mag_filter;
+	cgltf_int min_filter;
+	cgltf_int wrap_s;
+	cgltf_int wrap_t;
+	cgltf_extras extras;
+	cgltf_size extensions_count;
+	cgltf_extension* extensions;
+} cgltf_sampler;
+
+typedef struct cgltf_texture
+{
+	char* name;
+	cgltf_image* image;
+	cgltf_sampler* sampler;
+	cgltf_bool has_basisu;
+	cgltf_image* basisu_image;
+	cgltf_extras extras;
+	cgltf_size extensions_count;
+	cgltf_extension* extensions;
+} cgltf_texture;
+
+typedef struct cgltf_texture_transform
+{
+	cgltf_float offset[2];
+	cgltf_float rotation;
+	cgltf_float scale[2];
+	cgltf_bool has_texcoord;
+	cgltf_int texcoord;
+} cgltf_texture_transform;
+
+typedef struct cgltf_texture_view
+{
+	cgltf_texture* texture;
+	cgltf_int texcoord;
+	cgltf_float scale; /* equivalent to strength for occlusion_texture */
+	cgltf_bool has_transform;
+	cgltf_texture_transform transform;
+	cgltf_extras extras;
+	cgltf_size extensions_count;
+	cgltf_extension* extensions;
+} cgltf_texture_view;
+
+typedef struct cgltf_pbr_metallic_roughness
+{
+	cgltf_texture_view base_color_texture;
+	cgltf_texture_view metallic_roughness_texture;
+
+	cgltf_float base_color_factor[4];
+	cgltf_float metallic_factor;
+	cgltf_float roughness_factor;
+} cgltf_pbr_metallic_roughness;
+
+typedef struct cgltf_pbr_specular_glossiness
+{
+	cgltf_texture_view diffuse_texture;
+	cgltf_texture_view specular_glossiness_texture;
+
+	cgltf_float diffuse_factor[4];
+	cgltf_float specular_factor[3];
+	cgltf_float glossiness_factor;
+} cgltf_pbr_specular_glossiness;
+
+typedef struct cgltf_clearcoat
+{
+	cgltf_texture_view clearcoat_texture;
+	cgltf_texture_view clearcoat_roughness_texture;
+	cgltf_texture_view clearcoat_normal_texture;
+
+	cgltf_float clearcoat_factor;
+	cgltf_float clearcoat_roughness_factor;
+} cgltf_clearcoat;
+
+typedef struct cgltf_transmission
+{
+	cgltf_texture_view transmission_texture;
+	cgltf_float transmission_factor;
+} cgltf_transmission;
+
+typedef struct cgltf_ior
+{
+	cgltf_float ior;
+} cgltf_ior;
+
+typedef struct cgltf_specular
+{
+	cgltf_texture_view specular_texture;
+	cgltf_texture_view specular_color_texture;
+	cgltf_float specular_color_factor[3];
+	cgltf_float specular_factor;
+} cgltf_specular;
+
+typedef struct cgltf_volume
+{
+	cgltf_texture_view thickness_texture;
+	cgltf_float thickness_factor;
+	cgltf_float attenuation_color[3];
+	cgltf_float attenuation_distance;
+} cgltf_volume;
+
+typedef struct cgltf_sheen
+{
+	cgltf_texture_view sheen_color_texture;
+	cgltf_float sheen_color_factor[3];
+	cgltf_texture_view sheen_roughness_texture;
+	cgltf_float sheen_roughness_factor;
+} cgltf_sheen;
+
+typedef struct cgltf_emissive_strength
+{
+	cgltf_float emissive_strength;
+} cgltf_emissive_strength;
+
+typedef struct cgltf_iridescence
+{
+	cgltf_float iridescence_factor;
+	cgltf_texture_view iridescence_texture;
+	cgltf_float iridescence_ior;
+	cgltf_float iridescence_thickness_min;
+	cgltf_float iridescence_thickness_max;
+	cgltf_texture_view iridescence_thickness_texture;
+} cgltf_iridescence;
+
+typedef struct cgltf_anisotropy
+{
+	cgltf_float anisotropy_strength;
+	cgltf_float anisotropy_rotation;
+	cgltf_texture_view anisotropy_texture;
+} cgltf_anisotropy;
+
+typedef struct cgltf_material
+{
+	char* name;
+	cgltf_bool has_pbr_metallic_roughness;
+	cgltf_bool has_pbr_specular_glossiness;
+	cgltf_bool has_clearcoat;
+	cgltf_bool has_transmission;
+	cgltf_bool has_volume;
+	cgltf_bool has_ior;
+	cgltf_bool has_specular;
+	cgltf_bool has_sheen;
+	cgltf_bool has_emissive_strength;
+	cgltf_bool has_iridescence;
+	cgltf_bool has_anisotropy;
+	cgltf_pbr_metallic_roughness pbr_metallic_roughness;
+	cgltf_pbr_specular_glossiness pbr_specular_glossiness;
+	cgltf_clearcoat clearcoat;
+	cgltf_ior ior;
+	cgltf_specular specular;
+	cgltf_sheen sheen;
+	cgltf_transmission transmission;
+	cgltf_volume volume;
+	cgltf_emissive_strength emissive_strength;
+	cgltf_iridescence iridescence;
+	cgltf_anisotropy anisotropy;
+	cgltf_texture_view normal_texture;
+	cgltf_texture_view occlusion_texture;
+	cgltf_texture_view emissive_texture;
+	cgltf_float emissive_factor[3];
+	cgltf_alpha_mode alpha_mode;
+	cgltf_float alpha_cutoff;
+	cgltf_bool double_sided;
+	cgltf_bool unlit;
+	cgltf_extras extras;
+	cgltf_size extensions_count;
+	cgltf_extension* extensions;
+} cgltf_material;
+
+typedef struct cgltf_material_mapping
+{
+	cgltf_size variant;
+	cgltf_material* material;
+	cgltf_extras extras;
+} cgltf_material_mapping;
+
+typedef struct cgltf_morph_target {
+	cgltf_attribute* attributes;
+	cgltf_size attributes_count;
+} cgltf_morph_target;
+
+typedef struct cgltf_draco_mesh_compression {
+	cgltf_buffer_view* buffer_view;
+	cgltf_attribute* attributes;
+	cgltf_size attributes_count;
+} cgltf_draco_mesh_compression;
+
+typedef struct cgltf_mesh_gpu_instancing {
+	cgltf_attribute* attributes;
+	cgltf_size attributes_count;
+} cgltf_mesh_gpu_instancing;
+
+typedef struct cgltf_primitive {
+	cgltf_primitive_type type;
+	cgltf_accessor* indices;
+	cgltf_material* material;
+	cgltf_attribute* attributes;
+	cgltf_size attributes_count;
+	cgltf_morph_target* targets;
+	cgltf_size targets_count;
+	cgltf_extras extras;
+	cgltf_bool has_draco_mesh_compression;
+	cgltf_draco_mesh_compression draco_mesh_compression;
+	cgltf_material_mapping* mappings;
+	cgltf_size mappings_count;
+	cgltf_size extensions_count;
+	cgltf_extension* extensions;
+} cgltf_primitive;
+
+typedef struct cgltf_mesh {
+	char* name;
+	cgltf_primitive* primitives;
+	cgltf_size primitives_count;
+	cgltf_float* weights;
+	cgltf_size weights_count;
+	char** target_names;
+	cgltf_size target_names_count;
+	cgltf_extras extras;
+	cgltf_size extensions_count;
+	cgltf_extension* extensions;
+} cgltf_mesh;
+
+typedef struct cgltf_node cgltf_node;
+
+typedef struct cgltf_skin {
+	char* name;
+	cgltf_node** joints;
+	cgltf_size joints_count;
+	cgltf_node* skeleton;
+	cgltf_accessor* inverse_bind_matrices;
+	cgltf_extras extras;
+	cgltf_size extensions_count;
+	cgltf_extension* extensions;
+} cgltf_skin;
+
+typedef struct cgltf_camera_perspective {
+	cgltf_bool has_aspect_ratio;
+	cgltf_float aspect_ratio;
+	cgltf_float yfov;
+	cgltf_bool has_zfar;
+	cgltf_float zfar;
+	cgltf_float znear;
+	cgltf_extras extras;
+} cgltf_camera_perspective;
+
+typedef struct cgltf_camera_orthographic {
+	cgltf_float xmag;
+	cgltf_float ymag;
+	cgltf_float zfar;
+	cgltf_float znear;
+	cgltf_extras extras;
+} cgltf_camera_orthographic;
+
+typedef struct cgltf_camera {
+	char* name;
+	cgltf_camera_type type;
+	union {
+		cgltf_camera_perspective perspective;
+		cgltf_camera_orthographic orthographic;
+	} data;
+	cgltf_extras extras;
+	cgltf_size extensions_count;
+	cgltf_extension* extensions;
+} cgltf_camera;
+
+typedef struct cgltf_light {
+	char* name;
+	cgltf_float color[3];
+	cgltf_float intensity;
+	cgltf_light_type type;
+	cgltf_float range;
+	cgltf_float spot_inner_cone_angle;
+	cgltf_float spot_outer_cone_angle;
+	cgltf_extras extras;
+} cgltf_light;
+
+struct cgltf_node {
+	char* name;
+	cgltf_node* parent;
+	cgltf_node** children;
+	cgltf_size children_count;
+	cgltf_skin* skin;
+	cgltf_mesh* mesh;
+	cgltf_camera* camera;
+	cgltf_light* light;
+	cgltf_float* weights;
+	cgltf_size weights_count;
+	cgltf_bool has_translation;
+	cgltf_bool has_rotation;
+	cgltf_bool has_scale;
+	cgltf_bool has_matrix;
+	cgltf_float translation[3];
+	cgltf_float rotation[4];
+	cgltf_float scale[3];
+	cgltf_float matrix[16];
+	cgltf_extras extras;
+	cgltf_bool has_mesh_gpu_instancing;
+	cgltf_mesh_gpu_instancing mesh_gpu_instancing;
+	cgltf_size extensions_count;
+	cgltf_extension* extensions;
+};
+
+typedef struct cgltf_scene {
+	char* name;
+	cgltf_node** nodes;
+	cgltf_size nodes_count;
+	cgltf_extras extras;
+	cgltf_size extensions_count;
+	cgltf_extension* extensions;
+} cgltf_scene;
+
+typedef struct cgltf_animation_sampler {
+	cgltf_accessor* input;
+	cgltf_accessor* output;
+	cgltf_interpolation_type interpolation;
+	cgltf_extras extras;
+	cgltf_size extensions_count;
+	cgltf_extension* extensions;
+} cgltf_animation_sampler;
+
+typedef struct cgltf_animation_channel {
+	cgltf_animation_sampler* sampler;
+	cgltf_node* target_node;
+	cgltf_animation_path_type target_path;
+	cgltf_extras extras;
+	cgltf_size extensions_count;
+	cgltf_extension* extensions;
+} cgltf_animation_channel;
+
+typedef struct cgltf_animation {
+	char* name;
+	cgltf_animation_sampler* samplers;
+	cgltf_size samplers_count;
+	cgltf_animation_channel* channels;
+	cgltf_size channels_count;
+	cgltf_extras extras;
+	cgltf_size extensions_count;
+	cgltf_extension* extensions;
+} cgltf_animation;
+
+typedef struct cgltf_material_variant
+{
+	char* name;
+	cgltf_extras extras;
+} cgltf_material_variant;
+
+typedef struct cgltf_asset {
+	char* copyright;
+	char* generator;
+	char* version;
+	char* min_version;
+	cgltf_extras extras;
+	cgltf_size extensions_count;
+	cgltf_extension* extensions;
+} cgltf_asset;
+
+typedef struct cgltf_data
+{
+	cgltf_file_type file_type;
+	void* file_data;
+
+	cgltf_asset asset;
+
+	cgltf_mesh* meshes;
+	cgltf_size meshes_count;
+
+	cgltf_material* materials;
+	cgltf_size materials_count;
+
+	cgltf_accessor* accessors;
+	cgltf_size accessors_count;
+
+	cgltf_buffer_view* buffer_views;
+	cgltf_size buffer_views_count;
+
+	cgltf_buffer* buffers;
+	cgltf_size buffers_count;
+
+	cgltf_image* images;
+	cgltf_size images_count;
+
+	cgltf_texture* textures;
+	cgltf_size textures_count;
+
+	cgltf_sampler* samplers;
+	cgltf_size samplers_count;
+
+	cgltf_skin* skins;
+	cgltf_size skins_count;
+
+	cgltf_camera* cameras;
+	cgltf_size cameras_count;
+
+	cgltf_light* lights;
+	cgltf_size lights_count;
+
+	cgltf_node* nodes;
+	cgltf_size nodes_count;
+
+	cgltf_scene* scenes;
+	cgltf_size scenes_count;
+
+	cgltf_scene* scene;
+
+	cgltf_animation* animations;
+	cgltf_size animations_count;
+
+	cgltf_material_variant* variants;
+	cgltf_size variants_count;
+
+	cgltf_extras extras;
+
+	cgltf_size data_extensions_count;
+	cgltf_extension* data_extensions;
+
+	char** extensions_used;
+	cgltf_size extensions_used_count;
+
+	char** extensions_required;
+	cgltf_size extensions_required_count;
+
+	const char* json;
+	cgltf_size json_size;
+
+	const void* bin;
+	cgltf_size bin_size;
+
+	cgltf_memory_options memory;
+	cgltf_file_options file;
+} cgltf_data;
+
+cgltf_result cgltf_parse(
+		const cgltf_options* options,
+		const void* data,
+		cgltf_size size,
+		cgltf_data** out_data);
+
+cgltf_result cgltf_parse_file(
+		const cgltf_options* options,
+		const char* path,
+		cgltf_data** out_data);
+
+cgltf_result cgltf_load_buffers(
+		const cgltf_options* options,
+		cgltf_data* data,
+		const char* gltf_path);
+
+cgltf_result cgltf_load_buffer_base64(const cgltf_options* options, cgltf_size size, const char* base64, void** out_data);
+
+cgltf_size cgltf_decode_string(char* string);
+cgltf_size cgltf_decode_uri(char* uri);
+
+cgltf_result cgltf_validate(cgltf_data* data);
+
+void cgltf_free(cgltf_data* data);
+
+void cgltf_node_transform_local(const cgltf_node* node, cgltf_float* out_matrix);
+void cgltf_node_transform_world(const cgltf_node* node, cgltf_float* out_matrix);
+
+const uint8_t* cgltf_buffer_view_data(const cgltf_buffer_view* view);
+
+cgltf_bool cgltf_accessor_read_float(const cgltf_accessor* accessor, cgltf_size index, cgltf_float* out, cgltf_size element_size);
+cgltf_bool cgltf_accessor_read_uint(const cgltf_accessor* accessor, cgltf_size index, cgltf_uint* out, cgltf_size element_size);
+cgltf_size cgltf_accessor_read_index(const cgltf_accessor* accessor, cgltf_size index);
+
+cgltf_size cgltf_num_components(cgltf_type type);
+cgltf_size cgltf_component_size(cgltf_component_type component_type);
+cgltf_size cgltf_calc_size(cgltf_type type, cgltf_component_type component_type);
+
+cgltf_size cgltf_accessor_unpack_floats(const cgltf_accessor* accessor, cgltf_float* out, cgltf_size float_count);
+cgltf_size cgltf_accessor_unpack_indices(const cgltf_accessor* accessor, cgltf_uint* out, cgltf_size index_count);
+
+/* this function is deprecated and will be removed in the future; use cgltf_extras::data instead */
+cgltf_result cgltf_copy_extras_json(const cgltf_data* data, const cgltf_extras* extras, char* dest, cgltf_size* dest_size);
+
+cgltf_size cgltf_mesh_index(const cgltf_data* data, const cgltf_mesh* object);
+cgltf_size cgltf_material_index(const cgltf_data* data, const cgltf_material* object);
+cgltf_size cgltf_accessor_index(const cgltf_data* data, const cgltf_accessor* object);
+cgltf_size cgltf_buffer_view_index(const cgltf_data* data, const cgltf_buffer_view* object);
+cgltf_size cgltf_buffer_index(const cgltf_data* data, const cgltf_buffer* object);
+cgltf_size cgltf_image_index(const cgltf_data* data, const cgltf_image* object);
+cgltf_size cgltf_texture_index(const cgltf_data* data, const cgltf_texture* object);
+cgltf_size cgltf_sampler_index(const cgltf_data* data, const cgltf_sampler* object);
+cgltf_size cgltf_skin_index(const cgltf_data* data, const cgltf_skin* object);
+cgltf_size cgltf_camera_index(const cgltf_data* data, const cgltf_camera* object);
+cgltf_size cgltf_light_index(const cgltf_data* data, const cgltf_light* object);
+cgltf_size cgltf_node_index(const cgltf_data* data, const cgltf_node* object);
+cgltf_size cgltf_scene_index(const cgltf_data* data, const cgltf_scene* object);
+cgltf_size cgltf_animation_index(const cgltf_data* data, const cgltf_animation* object);
+cgltf_size cgltf_animation_sampler_index(const cgltf_animation* animation, const cgltf_animation_sampler* object);
+cgltf_size cgltf_animation_channel_index(const cgltf_animation* animation, const cgltf_animation_channel* object);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* #ifndef CGLTF_H_INCLUDED__ */
+
+/*
+ *
+ * Stop now, if you are only interested in the API.
+ * Below, you find the implementation.
+ *
+ */
+
+#if defined(__INTELLISENSE__) || defined(__JETBRAINS_IDE__)
+/* This makes MSVC/CLion intellisense work. */
+#define CGLTF_IMPLEMENTATION
+#endif
+
+#ifdef CGLTF_IMPLEMENTATION
+
+#include <assert.h> /* For assert */
+#include <string.h> /* For strncpy */
+#include <stdio.h>  /* For fopen */
+#include <limits.h> /* For UINT_MAX etc */
+#include <float.h>  /* For FLT_MAX */
+
+#if !defined(CGLTF_MALLOC) || !defined(CGLTF_FREE) || !defined(CGLTF_ATOI) || !defined(CGLTF_ATOF) || !defined(CGLTF_ATOLL)
+#include <stdlib.h> /* For malloc, free, atoi, atof */
+#endif
+
+/* JSMN_PARENT_LINKS is necessary to make parsing large structures linear in input size */
+#define JSMN_PARENT_LINKS
+
+/* JSMN_STRICT is necessary to reject invalid JSON documents */
+#define JSMN_STRICT
+
+/*
+ * -- jsmn.h start --
+ * Source: https://github.com/zserge/jsmn
+ * License: MIT
+ */
+typedef enum {
+	JSMN_UNDEFINED = 0,
+	JSMN_OBJECT = 1,
+	JSMN_ARRAY = 2,
+	JSMN_STRING = 3,
+	JSMN_PRIMITIVE = 4
+} jsmntype_t;
+enum jsmnerr {
+	/* Not enough tokens were provided */
+	JSMN_ERROR_NOMEM = -1,
+	/* Invalid character inside JSON string */
+	JSMN_ERROR_INVAL = -2,
+	/* The string is not a full JSON packet, more bytes expected */
+	JSMN_ERROR_PART = -3
+};
+typedef struct {
+	jsmntype_t type;
+	ptrdiff_t start;
+	ptrdiff_t end;
+	int size;
+#ifdef JSMN_PARENT_LINKS
+	int parent;
+#endif
+} jsmntok_t;
+typedef struct {
+	size_t pos; /* offset in the JSON string */
+	unsigned int toknext; /* next token to allocate */
+	int toksuper; /* superior token node, e.g parent object or array */
+} jsmn_parser;
+static void jsmn_init(jsmn_parser *parser);
+static int jsmn_parse(jsmn_parser *parser, const char *js, size_t len, jsmntok_t *tokens, size_t num_tokens);
+/*
+ * -- jsmn.h end --
+ */
+
+
+#ifndef CGLTF_CONSTS
+static const cgltf_size GlbHeaderSize = 12;
+static const cgltf_size GlbChunkHeaderSize = 8;
+static const uint32_t GlbVersion = 2;
+static const uint32_t GlbMagic = 0x46546C67;
+static const uint32_t GlbMagicJsonChunk = 0x4E4F534A;
+static const uint32_t GlbMagicBinChunk = 0x004E4942;
+#define CGLTF_CONSTS
+#endif
+
+#ifndef CGLTF_MALLOC
+#define CGLTF_MALLOC(size) malloc(size)
+#endif
+#ifndef CGLTF_FREE
+#define CGLTF_FREE(ptr) free(ptr)
+#endif
+#ifndef CGLTF_ATOI
+#define CGLTF_ATOI(str) atoi(str)
+#endif
+#ifndef CGLTF_ATOF
+#define CGLTF_ATOF(str) atof(str)
+#endif
+#ifndef CGLTF_ATOLL
+#define CGLTF_ATOLL(str) atoll(str)
+#endif
+#ifndef CGLTF_VALIDATE_ENABLE_ASSERTS
+#define CGLTF_VALIDATE_ENABLE_ASSERTS 0
+#endif
+
+static void* cgltf_default_alloc(void* user, cgltf_size size)
+{
+	(void)user;
+	return CGLTF_MALLOC(size);
+}
+
+static void cgltf_default_free(void* user, void* ptr)
+{
+	(void)user;
+	CGLTF_FREE(ptr);
+}
+
+static void* cgltf_calloc(cgltf_options* options, size_t element_size, cgltf_size count)
+{
+	if (SIZE_MAX / element_size < count)
+	{
+		return NULL;
+	}
+	void* result = options->memory.alloc_func(options->memory.user_data, element_size * count);
+	if (!result)
+	{
+		return NULL;
+	}
+	memset(result, 0, element_size * count);
+	return result;
+}
+
+static cgltf_result cgltf_default_file_read(const struct cgltf_memory_options* memory_options, const struct cgltf_file_options* file_options, const char* path, cgltf_size* size, void** data)
+{
+	(void)file_options;
+	void* (*memory_alloc)(void*, cgltf_size) = memory_options->alloc_func ? memory_options->alloc_func : &cgltf_default_alloc;
+	void (*memory_free)(void*, void*) = memory_options->free_func ? memory_options->free_func : &cgltf_default_free;
+
+	FILE* file = fopen(path, "rb");
+	if (!file)
+	{
+		return cgltf_result_file_not_found;
+	}
+
+	cgltf_size file_size = size ? *size : 0;
+
+	if (file_size == 0)
+	{
+		fseek(file, 0, SEEK_END);
+
+#ifdef _MSC_VER
+		__int64 length = _ftelli64(file);
+#else
+		long length = ftell(file);
+#endif
+
+		if (length < 0)
+		{
+			fclose(file);
+			return cgltf_result_io_error;
+		}
+
+		fseek(file, 0, SEEK_SET);
+		file_size = (cgltf_size)length;
+	}
+
+	char* file_data = (char*)memory_alloc(memory_options->user_data, file_size);
+	if (!file_data)
+	{
+		fclose(file);
+		return cgltf_result_out_of_memory;
+	}
+	
+	cgltf_size read_size = fread(file_data, 1, file_size, file);
+
+	fclose(file);
+
+	if (read_size != file_size)
+	{
+		memory_free(memory_options->user_data, file_data);
+		return cgltf_result_io_error;
+	}
+
+	if (size)
+	{
+		*size = file_size;
+	}
+	if (data)
+	{
+		*data = file_data;
+	}
+
+	return cgltf_result_success;
+}
+
+static void cgltf_default_file_release(const struct cgltf_memory_options* memory_options, const struct cgltf_file_options* file_options, void* data)
+{
+	(void)file_options;
+	void (*memfree)(void*, void*) = memory_options->free_func ? memory_options->free_func : &cgltf_default_free;
+	memfree(memory_options->user_data, data);
+}
+
+static cgltf_result cgltf_parse_json(cgltf_options* options, const uint8_t* json_chunk, cgltf_size size, cgltf_data** out_data);
+
+cgltf_result cgltf_parse(const cgltf_options* options, const void* data, cgltf_size size, cgltf_data** out_data)
+{
+	if (size < GlbHeaderSize)
+	{
+		return cgltf_result_data_too_short;
+	}
+
+	if (options == NULL)
+	{
+		return cgltf_result_invalid_options;
+	}
+
+	cgltf_options fixed_options = *options;
+	if (fixed_options.memory.alloc_func == NULL)
+	{
+		fixed_options.memory.alloc_func = &cgltf_default_alloc;
+	}
+	if (fixed_options.memory.free_func == NULL)
+	{
+		fixed_options.memory.free_func = &cgltf_default_free;
+	}
+
+	uint32_t tmp;
+	// Magic
+	memcpy(&tmp, data, 4);
+	if (tmp != GlbMagic)
+	{
+		if (fixed_options.type == cgltf_file_type_invalid)
+		{
+			fixed_options.type = cgltf_file_type_gltf;
+		}
+		else if (fixed_options.type == cgltf_file_type_glb)
+		{
+			return cgltf_result_unknown_format;
+		}
+	}
+
+	if (fixed_options.type == cgltf_file_type_gltf)
+	{
+		cgltf_result json_result = cgltf_parse_json(&fixed_options, (const uint8_t*)data, size, out_data);
+		if (json_result != cgltf_result_success)
+		{
+			return json_result;
+		}
+
+		(*out_data)->file_type = cgltf_file_type_gltf;
+
+		return cgltf_result_success;
+	}
+
+	const uint8_t* ptr = (const uint8_t*)data;
+	// Version
+	memcpy(&tmp, ptr + 4, 4);
+	uint32_t version = tmp;
+	if (version != GlbVersion)
+	{
+		return version < GlbVersion ? cgltf_result_legacy_gltf : cgltf_result_unknown_format;
+	}
+
+	// Total length
+	memcpy(&tmp, ptr + 8, 4);
+	if (tmp > size)
+	{
+		return cgltf_result_data_too_short;
+	}
+
+	const uint8_t* json_chunk = ptr + GlbHeaderSize;
+
+	if (GlbHeaderSize + GlbChunkHeaderSize > size)
+	{
+		return cgltf_result_data_too_short;
+	}
+
+	// JSON chunk: length
+	uint32_t json_length;
+	memcpy(&json_length, json_chunk, 4);
+	if (GlbHeaderSize + GlbChunkHeaderSize + json_length > size)
+	{
+		return cgltf_result_data_too_short;
+	}
+
+	// JSON chunk: magic
+	memcpy(&tmp, json_chunk + 4, 4);
+	if (tmp != GlbMagicJsonChunk)
+	{
+		return cgltf_result_unknown_format;
+	}
+
+	json_chunk += GlbChunkHeaderSize;
+
+	const void* bin = 0;
+	cgltf_size bin_size = 0;
+
+	if (GlbHeaderSize + GlbChunkHeaderSize + json_length + GlbChunkHeaderSize <= size)
+	{
+		// We can read another chunk
+		const uint8_t* bin_chunk = json_chunk + json_length;
+
+		// Bin chunk: length
+		uint32_t bin_length;
+		memcpy(&bin_length, bin_chunk, 4);
+		if (GlbHeaderSize + GlbChunkHeaderSize + json_length + GlbChunkHeaderSize + bin_length > size)
+		{
+			return cgltf_result_data_too_short;
+		}
+
+		// Bin chunk: magic
+		memcpy(&tmp, bin_chunk + 4, 4);
+		if (tmp != GlbMagicBinChunk)
+		{
+			return cgltf_result_unknown_format;
+		}
+
+		bin_chunk += GlbChunkHeaderSize;
+
+		bin = bin_chunk;
+		bin_size = bin_length;
+	}
+
+	cgltf_result json_result = cgltf_parse_json(&fixed_options, json_chunk, json_length, out_data);
+	if (json_result != cgltf_result_success)
+	{
+		return json_result;
+	}
+
+	(*out_data)->file_type = cgltf_file_type_glb;
+	(*out_data)->bin = bin;
+	(*out_data)->bin_size = bin_size;
+
+	return cgltf_result_success;
+}
+
+cgltf_result cgltf_parse_file(const cgltf_options* options, const char* path, cgltf_data** out_data)
+{
+	if (options == NULL)
+	{
+		return cgltf_result_invalid_options;
+	}
+
+	cgltf_result (*file_read)(const struct cgltf_memory_options*, const struct cgltf_file_options*, const char*, cgltf_size*, void**) = options->file.read ? options->file.read : &cgltf_default_file_read;
+	void (*file_release)(const struct cgltf_memory_options*, const struct cgltf_file_options*, void* data) = options->file.release ? options->file.release : cgltf_default_file_release;
+
+	void* file_data = NULL;
+	cgltf_size file_size = 0;
+	cgltf_result result = file_read(&options->memory, &options->file, path, &file_size, &file_data);
+	if (result != cgltf_result_success)
+	{
+		return result;
+	}
+
+	result = cgltf_parse(options, file_data, file_size, out_data);
+
+	if (result != cgltf_result_success)
+	{
+		file_release(&options->memory, &options->file, file_data);
+		return result;
+	}
+
+	(*out_data)->file_data = file_data;
+
+	return cgltf_result_success;
+}
+
+static void cgltf_combine_paths(char* path, const char* base, const char* uri)
+{
+	const char* s0 = strrchr(base, '/');
+	const char* s1 = strrchr(base, '\\');
+	const char* slash = s0 ? (s1 && s1 > s0 ? s1 : s0) : s1;
+
+	if (slash)
+	{
+		size_t prefix = slash - base + 1;
+
+		strncpy(path, base, prefix);
+		strcpy(path + prefix, uri);
+	}
+	else
+	{
+		strcpy(path, uri);
+	}
+}
+
+static cgltf_result cgltf_load_buffer_file(const cgltf_options* options, cgltf_size size, const char* uri, const char* gltf_path, void** out_data)
+{
+	void* (*memory_alloc)(void*, cgltf_size) = options->memory.alloc_func ? options->memory.alloc_func : &cgltf_default_alloc;
+	void (*memory_free)(void*, void*) = options->memory.free_func ? options->memory.free_func : &cgltf_default_free;
+	cgltf_result (*file_read)(const struct cgltf_memory_options*, const struct cgltf_file_options*, const char*, cgltf_size*, void**) = options->file.read ? options->file.read : &cgltf_default_file_read;
+
+	char* path = (char*)memory_alloc(options->memory.user_data, strlen(uri) + strlen(gltf_path) + 1);
+	if (!path)
+	{
+		return cgltf_result_out_of_memory;
+	}
+
+	cgltf_combine_paths(path, gltf_path, uri);
+
+	// after combining, the tail of the resulting path is a uri; decode_uri converts it into path
+	cgltf_decode_uri(path + strlen(path) - strlen(uri));
+
+	void* file_data = NULL;
+	cgltf_result result = file_read(&options->memory, &options->file, path, &size, &file_data);
+
+	memory_free(options->memory.user_data, path);
+
+	*out_data = (result == cgltf_result_success) ? file_data : NULL;
+
+	return result;
+}
+
+cgltf_result cgltf_load_buffer_base64(const cgltf_options* options, cgltf_size size, const char* base64, void** out_data)
+{
+	void* (*memory_alloc)(void*, cgltf_size) = options->memory.alloc_func ? options->memory.alloc_func : &cgltf_default_alloc;
+	void (*memory_free)(void*, void*) = options->memory.free_func ? options->memory.free_func : &cgltf_default_free;
+
+	unsigned char* data = (unsigned char*)memory_alloc(options->memory.user_data, size);
+	if (!data)
+	{
+		return cgltf_result_out_of_memory;
+	}
+
+	unsigned int buffer = 0;
+	unsigned int buffer_bits = 0;
+
+	for (cgltf_size i = 0; i < size; ++i)
+	{
+		while (buffer_bits < 8)
+		{
+			char ch = *base64++;
+
+			int index =
+				(unsigned)(ch - 'A') < 26 ? (ch - 'A') :
+				(unsigned)(ch - 'a') < 26 ? (ch - 'a') + 26 :
+				(unsigned)(ch - '0') < 10 ? (ch - '0') + 52 :
+				ch == '+' ? 62 :
+				ch == '/' ? 63 :
+				-1;
+
+			if (index < 0)
+			{
+				memory_free(options->memory.user_data, data);
+				return cgltf_result_io_error;
+			}
+
+			buffer = (buffer << 6) | index;
+			buffer_bits += 6;
+		}
+
+		data[i] = (unsigned char)(buffer >> (buffer_bits - 8));
+		buffer_bits -= 8;
+	}
+
+	*out_data = data;
+
+	return cgltf_result_success;
+}
+
+static int cgltf_unhex(char ch)
+{
+	return
+		(unsigned)(ch - '0') < 10 ? (ch - '0') :
+		(unsigned)(ch - 'A') < 6 ? (ch - 'A') + 10 :
+		(unsigned)(ch - 'a') < 6 ? (ch - 'a') + 10 :
+		-1;
+}
+
+cgltf_size cgltf_decode_string(char* string)
+{
+	char* read = string + strcspn(string, "\\");
+	if (*read == 0)
+	{
+		return read - string;
+	}
+	char* write = string;
+	char* last = string;
+
+	for (;;)
+	{
+		// Copy characters since last escaped sequence
+		cgltf_size written = read - last;
+		memmove(write, last, written);
+		write += written;
+
+		if (*read++ == 0)
+		{
+			break;
+		}
+
+		// jsmn already checked that all escape sequences are valid
+		switch (*read++)
+		{
+		case '\"': *write++ = '\"'; break;
+		case '/':  *write++ = '/';  break;
+		case '\\': *write++ = '\\'; break;
+		case 'b':  *write++ = '\b'; break;
+		case 'f':  *write++ = '\f'; break;
+		case 'r':  *write++ = '\r'; break;
+		case 'n':  *write++ = '\n'; break;
+		case 't':  *write++ = '\t'; break;
+		case 'u':
+		{
+			// UCS-2 codepoint \uXXXX to UTF-8
+			int character = 0;
+			for (cgltf_size i = 0; i < 4; ++i)
+			{
+				character = (character << 4) + cgltf_unhex(*read++);
+			}
+
+			if (character <= 0x7F)
+			{
+				*write++ = character & 0xFF;
+			}
+			else if (character <= 0x7FF)
+			{
+				*write++ = 0xC0 | ((character >> 6) & 0xFF);
+				*write++ = 0x80 | (character & 0x3F);
+			}
+			else
+			{
+				*write++ = 0xE0 | ((character >> 12) & 0xFF);
+				*write++ = 0x80 | ((character >> 6) & 0x3F);
+				*write++ = 0x80 | (character & 0x3F);
+			}
+			break;
+		}
+		default:
+			break;
+		}
+
+		last = read;
+		read += strcspn(read, "\\");
+	}
+
+	*write = 0;
+	return write - string;
+}
+
+cgltf_size cgltf_decode_uri(char* uri)
+{
+	char* write = uri;
+	char* i = uri;
+
+	while (*i)
+	{
+		if (*i == '%')
+		{
+			int ch1 = cgltf_unhex(i[1]);
+
+			if (ch1 >= 0)
+			{
+				int ch2 = cgltf_unhex(i[2]);
+
+				if (ch2 >= 0)
+				{
+					*write++ = (char)(ch1 * 16 + ch2);
+					i += 3;
+					continue;
+				}
+			}
+		}
+
+		*write++ = *i++;
+	}
+
+	*write = 0;
+	return write - uri;
+}
+
+cgltf_result cgltf_load_buffers(const cgltf_options* options, cgltf_data* data, const char* gltf_path)
+{
+	if (options == NULL)
+	{
+		return cgltf_result_invalid_options;
+	}
+
+	if (data->buffers_count && data->buffers[0].data == NULL && data->buffers[0].uri == NULL && data->bin)
+	{
+		if (data->bin_size < data->buffers[0].size)
+		{
+			return cgltf_result_data_too_short;
+		}
+
+		data->buffers[0].data = (void*)data->bin;
+		data->buffers[0].data_free_method = cgltf_data_free_method_none;
+	}
+
+	for (cgltf_size i = 0; i < data->buffers_count; ++i)
+	{
+		if (data->buffers[i].data)
+		{
+			continue;
+		}
+
+		const char* uri = data->buffers[i].uri;
+
+		if (uri == NULL)
+		{
+			continue;
+		}
+
+		if (strncmp(uri, "data:", 5) == 0)
+		{
+			const char* comma = strchr(uri, ',');
+
+			if (comma && comma - uri >= 7 && strncmp(comma - 7, ";base64", 7) == 0)
+			{
+				cgltf_result res = cgltf_load_buffer_base64(options, data->buffers[i].size, comma + 1, &data->buffers[i].data);
+				data->buffers[i].data_free_method = cgltf_data_free_method_memory_free;
+
+				if (res != cgltf_result_success)
+				{
+					return res;
+				}
+			}
+			else
+			{
+				return cgltf_result_unknown_format;
+			}
+		}
+		else if (strstr(uri, "://") == NULL && gltf_path)
+		{
+			cgltf_result res = cgltf_load_buffer_file(options, data->buffers[i].size, uri, gltf_path, &data->buffers[i].data);
+			data->buffers[i].data_free_method = cgltf_data_free_method_file_release;
+
+			if (res != cgltf_result_success)
+			{
+				return res;
+			}
+		}
+		else
+		{
+			return cgltf_result_unknown_format;
+		}
+	}
+
+	return cgltf_result_success;
+}
+
+static cgltf_size cgltf_calc_index_bound(cgltf_buffer_view* buffer_view, cgltf_size offset, cgltf_component_type component_type, cgltf_size count)
+{
+	char* data = (char*)buffer_view->buffer->data + offset + buffer_view->offset;
+	cgltf_size bound = 0;
+
+	switch (component_type)
+	{
+	case cgltf_component_type_r_8u:
+		for (size_t i = 0; i < count; ++i)
+		{
+			cgltf_size v = ((unsigned char*)data)[i];
+			bound = bound > v ? bound : v;
+		}
+		break;
+
+	case cgltf_component_type_r_16u:
+		for (size_t i = 0; i < count; ++i)
+		{
+			cgltf_size v = ((unsigned short*)data)[i];
+			bound = bound > v ? bound : v;
+		}
+		break;
+
+	case cgltf_component_type_r_32u:
+		for (size_t i = 0; i < count; ++i)
+		{
+			cgltf_size v = ((unsigned int*)data)[i];
+			bound = bound > v ? bound : v;
+		}
+		break;
+
+	default:
+		;
+	}
+
+	return bound;
+}
+
+#if CGLTF_VALIDATE_ENABLE_ASSERTS
+#define CGLTF_ASSERT_IF(cond, result) assert(!(cond)); if (cond) return result;
+#else
+#define CGLTF_ASSERT_IF(cond, result) if (cond) return result;
+#endif
+
+cgltf_result cgltf_validate(cgltf_data* data)
+{
+	for (cgltf_size i = 0; i < data->accessors_count; ++i)
+	{
+		cgltf_accessor* accessor = &data->accessors[i];
+
+		cgltf_size element_size = cgltf_calc_size(accessor->type, accessor->component_type);
+
+		if (accessor->buffer_view)
+		{
+			cgltf_size req_size = accessor->offset + accessor->stride * (accessor->count - 1) + element_size;
+
+			CGLTF_ASSERT_IF(accessor->buffer_view->size < req_size, cgltf_result_data_too_short);
+		}
+
+		if (accessor->is_sparse)
+		{
+			cgltf_accessor_sparse* sparse = &accessor->sparse;
+
+			cgltf_size indices_component_size = cgltf_calc_size(cgltf_type_scalar, sparse->indices_component_type);
+			cgltf_size indices_req_size = sparse->indices_byte_offset + indices_component_size * sparse->count;
+			cgltf_size values_req_size = sparse->values_byte_offset + element_size * sparse->count;
+
+			CGLTF_ASSERT_IF(sparse->indices_buffer_view->size < indices_req_size ||
+							sparse->values_buffer_view->size < values_req_size, cgltf_result_data_too_short);
+
+			CGLTF_ASSERT_IF(sparse->indices_component_type != cgltf_component_type_r_8u &&
+							sparse->indices_component_type != cgltf_component_type_r_16u &&
+							sparse->indices_component_type != cgltf_component_type_r_32u, cgltf_result_invalid_gltf);
+
+			if (sparse->indices_buffer_view->buffer->data)
+			{
+				cgltf_size index_bound = cgltf_calc_index_bound(sparse->indices_buffer_view, sparse->indices_byte_offset, sparse->indices_component_type, sparse->count);
+
+				CGLTF_ASSERT_IF(index_bound >= accessor->count, cgltf_result_data_too_short);
+			}
+		}
+	}
+
+	for (cgltf_size i = 0; i < data->buffer_views_count; ++i)
+	{
+		cgltf_size req_size = data->buffer_views[i].offset + data->buffer_views[i].size;
+
+		CGLTF_ASSERT_IF(data->buffer_views[i].buffer && data->buffer_views[i].buffer->size < req_size, cgltf_result_data_too_short);
+
+		if (data->buffer_views[i].has_meshopt_compression)
+		{
+			cgltf_meshopt_compression* mc = &data->buffer_views[i].meshopt_compression;
+
+			CGLTF_ASSERT_IF(mc->buffer == NULL || mc->buffer->size < mc->offset + mc->size, cgltf_result_data_too_short);
+
+			CGLTF_ASSERT_IF(data->buffer_views[i].stride && mc->stride != data->buffer_views[i].stride, cgltf_result_invalid_gltf);
+
+			CGLTF_ASSERT_IF(data->buffer_views[i].size != mc->stride * mc->count, cgltf_result_invalid_gltf);
+
+			CGLTF_ASSERT_IF(mc->mode == cgltf_meshopt_compression_mode_invalid, cgltf_result_invalid_gltf);
+
+			CGLTF_ASSERT_IF(mc->mode == cgltf_meshopt_compression_mode_attributes && !(mc->stride % 4 == 0 && mc->stride <= 256), cgltf_result_invalid_gltf);
+
+			CGLTF_ASSERT_IF(mc->mode == cgltf_meshopt_compression_mode_triangles && mc->count % 3 != 0, cgltf_result_invalid_gltf);
+
+			CGLTF_ASSERT_IF((mc->mode == cgltf_meshopt_compression_mode_triangles || mc->mode == cgltf_meshopt_compression_mode_indices) && mc->stride != 2 && mc->stride != 4, cgltf_result_invalid_gltf);
+
+			CGLTF_ASSERT_IF((mc->mode == cgltf_meshopt_compression_mode_triangles || mc->mode == cgltf_meshopt_compression_mode_indices) && mc->filter != cgltf_meshopt_compression_filter_none, cgltf_result_invalid_gltf);
+
+			CGLTF_ASSERT_IF(mc->filter == cgltf_meshopt_compression_filter_octahedral && mc->stride != 4 && mc->stride != 8, cgltf_result_invalid_gltf);
+
+			CGLTF_ASSERT_IF(mc->filter == cgltf_meshopt_compression_filter_quaternion && mc->stride != 8, cgltf_result_invalid_gltf);
+		}
+	}
+
+	for (cgltf_size i = 0; i < data->meshes_count; ++i)
+	{
+		if (data->meshes[i].weights)
+		{
+			CGLTF_ASSERT_IF(data->meshes[i].primitives_count && data->meshes[i].primitives[0].targets_count != data->meshes[i].weights_count, cgltf_result_invalid_gltf);
+		}
+
+		if (data->meshes[i].target_names)
+		{
+			CGLTF_ASSERT_IF(data->meshes[i].primitives_count && data->meshes[i].primitives[0].targets_count != data->meshes[i].target_names_count, cgltf_result_invalid_gltf);
+		}
+
+		for (cgltf_size j = 0; j < data->meshes[i].primitives_count; ++j)
+		{
+			CGLTF_ASSERT_IF(data->meshes[i].primitives[j].targets_count != data->meshes[i].primitives[0].targets_count, cgltf_result_invalid_gltf);
+
+			if (data->meshes[i].primitives[j].attributes_count)
+			{
+				cgltf_accessor* first = data->meshes[i].primitives[j].attributes[0].data;
+
+				for (cgltf_size k = 0; k < data->meshes[i].primitives[j].attributes_count; ++k)
+				{
+					CGLTF_ASSERT_IF(data->meshes[i].primitives[j].attributes[k].data->count != first->count, cgltf_result_invalid_gltf);
+				}
+
+				for (cgltf_size k = 0; k < data->meshes[i].primitives[j].targets_count; ++k)
+				{
+					for (cgltf_size m = 0; m < data->meshes[i].primitives[j].targets[k].attributes_count; ++m)
+					{
+						CGLTF_ASSERT_IF(data->meshes[i].primitives[j].targets[k].attributes[m].data->count != first->count, cgltf_result_invalid_gltf);
+					}
+				}
+
+				cgltf_accessor* indices = data->meshes[i].primitives[j].indices;
+
+				CGLTF_ASSERT_IF(indices &&
+					indices->component_type != cgltf_component_type_r_8u &&
+					indices->component_type != cgltf_component_type_r_16u &&
+					indices->component_type != cgltf_component_type_r_32u, cgltf_result_invalid_gltf);
+
+				if (indices && indices->buffer_view && indices->buffer_view->buffer->data)
+				{
+					cgltf_size index_bound = cgltf_calc_index_bound(indices->buffer_view, indices->offset, indices->component_type, indices->count);
+
+					CGLTF_ASSERT_IF(index_bound >= first->count, cgltf_result_data_too_short);
+				}
+
+				for (cgltf_size k = 0; k < data->meshes[i].primitives[j].mappings_count; ++k)
+				{
+					CGLTF_ASSERT_IF(data->meshes[i].primitives[j].mappings[k].variant >= data->variants_count, cgltf_result_invalid_gltf);
+				}
+			}
+		}
+	}
+
+	for (cgltf_size i = 0; i < data->nodes_count; ++i)
+	{
+		if (data->nodes[i].weights && data->nodes[i].mesh)
+		{
+			CGLTF_ASSERT_IF (data->nodes[i].mesh->primitives_count && data->nodes[i].mesh->primitives[0].targets_count != data->nodes[i].weights_count, cgltf_result_invalid_gltf);
+		}
+	}
+
+	for (cgltf_size i = 0; i < data->nodes_count; ++i)
+	{
+		cgltf_node* p1 = data->nodes[i].parent;
+		cgltf_node* p2 = p1 ? p1->parent : NULL;
+
+		while (p1 && p2)
+		{
+			CGLTF_ASSERT_IF(p1 == p2, cgltf_result_invalid_gltf);
+
+			p1 = p1->parent;
+			p2 = p2->parent ? p2->parent->parent : NULL;
+		}
+	}
+
+	for (cgltf_size i = 0; i < data->scenes_count; ++i)
+	{
+		for (cgltf_size j = 0; j < data->scenes[i].nodes_count; ++j)
+		{
+			CGLTF_ASSERT_IF(data->scenes[i].nodes[j]->parent, cgltf_result_invalid_gltf);
+		}
+	}
+
+	for (cgltf_size i = 0; i < data->animations_count; ++i)
+	{
+		for (cgltf_size j = 0; j < data->animations[i].channels_count; ++j)
+		{
+			cgltf_animation_channel* channel = &data->animations[i].channels[j];
+
+			if (!channel->target_node)
+			{
+				continue;
+			}
+
+			cgltf_size components = 1;
+
+			if (channel->target_path == cgltf_animation_path_type_weights)
+			{
+				CGLTF_ASSERT_IF(!channel->target_node->mesh || !channel->target_node->mesh->primitives_count, cgltf_result_invalid_gltf);
+
+				components = channel->target_node->mesh->primitives[0].targets_count;
+			}
+
+			cgltf_size values = channel->sampler->interpolation == cgltf_interpolation_type_cubic_spline ? 3 : 1;
+
+			CGLTF_ASSERT_IF(channel->sampler->input->count * components * values != channel->sampler->output->count, cgltf_result_data_too_short);
+		}
+	}
+
+	return cgltf_result_success;
+}
+
+cgltf_result cgltf_copy_extras_json(const cgltf_data* data, const cgltf_extras* extras, char* dest, cgltf_size* dest_size)
+{
+	cgltf_size json_size = extras->end_offset - extras->start_offset;
+
+	if (!dest)
+	{
+		if (dest_size)
+		{
+			*dest_size = json_size + 1;
+			return cgltf_result_success;
+		}
+		return cgltf_result_invalid_options;
+	}
+
+	if (*dest_size + 1 < json_size)
+	{
+		strncpy(dest, data->json + extras->start_offset, *dest_size - 1);
+		dest[*dest_size - 1] = 0;
+	}
+	else
+	{
+		strncpy(dest, data->json + extras->start_offset, json_size);
+		dest[json_size] = 0;
+	}
+
+	return cgltf_result_success;
+}
+
+static void cgltf_free_extras(cgltf_data* data, cgltf_extras* extras)
+{
+	data->memory.free_func(data->memory.user_data, extras->data);
+}
+
+static void cgltf_free_extensions(cgltf_data* data, cgltf_extension* extensions, cgltf_size extensions_count)
+{
+	for (cgltf_size i = 0; i < extensions_count; ++i)
+	{
+		data->memory.free_func(data->memory.user_data, extensions[i].name);
+		data->memory.free_func(data->memory.user_data, extensions[i].data);
+	}
+	data->memory.free_func(data->memory.user_data, extensions);
+}
+
+static void cgltf_free_texture_view(cgltf_data* data, cgltf_texture_view* view)
+{
+	cgltf_free_extensions(data, view->extensions, view->extensions_count);
+	cgltf_free_extras(data, &view->extras);
+}
+
+void cgltf_free(cgltf_data* data)
+{
+	if (!data)
+	{
+		return;
+	}
+
+	void (*file_release)(const struct cgltf_memory_options*, const struct cgltf_file_options*, void* data) = data->file.release ? data->file.release : cgltf_default_file_release;
+
+	data->memory.free_func(data->memory.user_data, data->asset.copyright);
+	data->memory.free_func(data->memory.user_data, data->asset.generator);
+	data->memory.free_func(data->memory.user_data, data->asset.version);
+	data->memory.free_func(data->memory.user_data, data->asset.min_version);
+
+	cgltf_free_extensions(data, data->asset.extensions, data->asset.extensions_count);
+	cgltf_free_extras(data, &data->asset.extras);
+
+	for (cgltf_size i = 0; i < data->accessors_count; ++i)
+	{
+		data->memory.free_func(data->memory.user_data, data->accessors[i].name);
+
+		if(data->accessors[i].is_sparse)
+		{
+			cgltf_free_extensions(data, data->accessors[i].sparse.extensions, data->accessors[i].sparse.extensions_count);
+			cgltf_free_extensions(data, data->accessors[i].sparse.indices_extensions, data->accessors[i].sparse.indices_extensions_count);
+			cgltf_free_extensions(data, data->accessors[i].sparse.values_extensions, data->accessors[i].sparse.values_extensions_count);
+			cgltf_free_extras(data, &data->accessors[i].sparse.extras);
+			cgltf_free_extras(data, &data->accessors[i].sparse.indices_extras);
+			cgltf_free_extras(data, &data->accessors[i].sparse.values_extras);
+		}
+		cgltf_free_extensions(data, data->accessors[i].extensions, data->accessors[i].extensions_count);
+		cgltf_free_extras(data, &data->accessors[i].extras);
+	}
+	data->memory.free_func(data->memory.user_data, data->accessors);
+
+	for (cgltf_size i = 0; i < data->buffer_views_count; ++i)
+	{
+		data->memory.free_func(data->memory.user_data, data->buffer_views[i].name);
+		data->memory.free_func(data->memory.user_data, data->buffer_views[i].data);
+
+		cgltf_free_extensions(data, data->buffer_views[i].extensions, data->buffer_views[i].extensions_count);
+		cgltf_free_extras(data, &data->buffer_views[i].extras);
+	}
+	data->memory.free_func(data->memory.user_data, data->buffer_views);
+
+	for (cgltf_size i = 0; i < data->buffers_count; ++i)
+	{
+		data->memory.free_func(data->memory.user_data, data->buffers[i].name);
+
+		if (data->buffers[i].data_free_method == cgltf_data_free_method_file_release)
+		{
+			file_release(&data->memory, &data->file, data->buffers[i].data);
+		}
+		else if (data->buffers[i].data_free_method == cgltf_data_free_method_memory_free)
+		{
+			data->memory.free_func(data->memory.user_data, data->buffers[i].data);
+		}
+
+		data->memory.free_func(data->memory.user_data, data->buffers[i].uri);
+
+		cgltf_free_extensions(data, data->buffers[i].extensions, data->buffers[i].extensions_count);
+		cgltf_free_extras(data, &data->buffers[i].extras);
+	}
+	data->memory.free_func(data->memory.user_data, data->buffers);
+
+	for (cgltf_size i = 0; i < data->meshes_count; ++i)
+	{
+		data->memory.free_func(data->memory.user_data, data->meshes[i].name);
+
+		for (cgltf_size j = 0; j < data->meshes[i].primitives_count; ++j)
+		{
+			for (cgltf_size k = 0; k < data->meshes[i].primitives[j].attributes_count; ++k)
+			{
+				data->memory.free_func(data->memory.user_data, data->meshes[i].primitives[j].attributes[k].name);
+			}
+
+			data->memory.free_func(data->memory.user_data, data->meshes[i].primitives[j].attributes);
+
+			for (cgltf_size k = 0; k < data->meshes[i].primitives[j].targets_count; ++k)
+			{
+				for (cgltf_size m = 0; m < data->meshes[i].primitives[j].targets[k].attributes_count; ++m)
+				{
+					data->memory.free_func(data->memory.user_data, data->meshes[i].primitives[j].targets[k].attributes[m].name);
+				}
+
+				data->memory.free_func(data->memory.user_data, data->meshes[i].primitives[j].targets[k].attributes);
+			}
+
+			data->memory.free_func(data->memory.user_data, data->meshes[i].primitives[j].targets);
+
+			if (data->meshes[i].primitives[j].has_draco_mesh_compression)
+			{
+				for (cgltf_size k = 0; k < data->meshes[i].primitives[j].draco_mesh_compression.attributes_count; ++k)
+				{
+					data->memory.free_func(data->memory.user_data, data->meshes[i].primitives[j].draco_mesh_compression.attributes[k].name);
+				}
+
+				data->memory.free_func(data->memory.user_data, data->meshes[i].primitives[j].draco_mesh_compression.attributes);
+			}
+
+			for (cgltf_size k = 0; k < data->meshes[i].primitives[j].mappings_count; ++k)
+			{
+				cgltf_free_extras(data, &data->meshes[i].primitives[j].mappings[k].extras);
+			}
+
+			data->memory.free_func(data->memory.user_data, data->meshes[i].primitives[j].mappings);
+
+			cgltf_free_extensions(data, data->meshes[i].primitives[j].extensions, data->meshes[i].primitives[j].extensions_count);
+			cgltf_free_extras(data, &data->meshes[i].primitives[j].extras);
+		}
+
+		data->memory.free_func(data->memory.user_data, data->meshes[i].primitives);
+		data->memory.free_func(data->memory.user_data, data->meshes[i].weights);
+
+		for (cgltf_size j = 0; j < data->meshes[i].target_names_count; ++j)
+		{
+			data->memory.free_func(data->memory.user_data, data->meshes[i].target_names[j]);
+		}
+
+		cgltf_free_extensions(data, data->meshes[i].extensions, data->meshes[i].extensions_count);
+		cgltf_free_extras(data, &data->meshes[i].extras);
+
+		data->memory.free_func(data->memory.user_data, data->meshes[i].target_names);
+	}
+
+	data->memory.free_func(data->memory.user_data, data->meshes);
+
+	for (cgltf_size i = 0; i < data->materials_count; ++i)
+	{
+		data->memory.free_func(data->memory.user_data, data->materials[i].name);
+
+		if(data->materials[i].has_pbr_metallic_roughness)
+		{
+			cgltf_free_texture_view(data, &data->materials[i].pbr_metallic_roughness.metallic_roughness_texture);
+			cgltf_free_texture_view(data, &data->materials[i].pbr_metallic_roughness.base_color_texture);
+		}
+		if(data->materials[i].has_pbr_specular_glossiness)
+		{
+			cgltf_free_texture_view(data, &data->materials[i].pbr_specular_glossiness.diffuse_texture);
+			cgltf_free_texture_view(data, &data->materials[i].pbr_specular_glossiness.specular_glossiness_texture);
+		}
+		if(data->materials[i].has_clearcoat)
+		{
+			cgltf_free_texture_view(data, &data->materials[i].clearcoat.clearcoat_texture);
+			cgltf_free_texture_view(data, &data->materials[i].clearcoat.clearcoat_roughness_texture);
+			cgltf_free_texture_view(data, &data->materials[i].clearcoat.clearcoat_normal_texture);
+		}
+		if(data->materials[i].has_specular)
+		{
+			cgltf_free_texture_view(data, &data->materials[i].specular.specular_texture);
+			cgltf_free_texture_view(data, &data->materials[i].specular.specular_color_texture);
+		}
+		if(data->materials[i].has_transmission)
+		{
+			cgltf_free_texture_view(data, &data->materials[i].transmission.transmission_texture);
+		}
+		if (data->materials[i].has_volume)
+		{
+			cgltf_free_texture_view(data, &data->materials[i].volume.thickness_texture);
+		}
+		if(data->materials[i].has_sheen)
+		{
+			cgltf_free_texture_view(data, &data->materials[i].sheen.sheen_color_texture);
+			cgltf_free_texture_view(data, &data->materials[i].sheen.sheen_roughness_texture);
+		}
+		if(data->materials[i].has_iridescence)
+		{
+			cgltf_free_texture_view(data, &data->materials[i].iridescence.iridescence_texture);
+			cgltf_free_texture_view(data, &data->materials[i].iridescence.iridescence_thickness_texture);
+		}
+		if (data->materials[i].has_anisotropy)
+		{
+			cgltf_free_texture_view(data, &data->materials[i].anisotropy.anisotropy_texture);
+		}
+
+		cgltf_free_texture_view(data, &data->materials[i].normal_texture);
+		cgltf_free_texture_view(data, &data->materials[i].occlusion_texture);
+		cgltf_free_texture_view(data, &data->materials[i].emissive_texture);
+
+		cgltf_free_extensions(data, data->materials[i].extensions, data->materials[i].extensions_count);
+		cgltf_free_extras(data, &data->materials[i].extras);
+	}
+
+	data->memory.free_func(data->memory.user_data, data->materials);
+
+	for (cgltf_size i = 0; i < data->images_count; ++i) 
+	{
+		data->memory.free_func(data->memory.user_data, data->images[i].name);
+		data->memory.free_func(data->memory.user_data, data->images[i].uri);
+		data->memory.free_func(data->memory.user_data, data->images[i].mime_type);
+
+		cgltf_free_extensions(data, data->images[i].extensions, data->images[i].extensions_count);
+		cgltf_free_extras(data, &data->images[i].extras);
+	}
+
+	data->memory.free_func(data->memory.user_data, data->images);
+
+	for (cgltf_size i = 0; i < data->textures_count; ++i)
+	{
+		data->memory.free_func(data->memory.user_data, data->textures[i].name);
+
+		cgltf_free_extensions(data, data->textures[i].extensions, data->textures[i].extensions_count);
+		cgltf_free_extras(data, &data->textures[i].extras);
+	}
+
+	data->memory.free_func(data->memory.user_data, data->textures);
+
+	for (cgltf_size i = 0; i < data->samplers_count; ++i)
+	{
+		data->memory.free_func(data->memory.user_data, data->samplers[i].name);
+
+		cgltf_free_extensions(data, data->samplers[i].extensions, data->samplers[i].extensions_count);
+		cgltf_free_extras(data, &data->samplers[i].extras);
+	}
+
+	data->memory.free_func(data->memory.user_data, data->samplers);
+
+	for (cgltf_size i = 0; i < data->skins_count; ++i)
+	{
+		data->memory.free_func(data->memory.user_data, data->skins[i].name);
+		data->memory.free_func(data->memory.user_data, data->skins[i].joints);
+
+		cgltf_free_extensions(data, data->skins[i].extensions, data->skins[i].extensions_count);
+		cgltf_free_extras(data, &data->skins[i].extras);
+	}
+
+	data->memory.free_func(data->memory.user_data, data->skins);
+
+	for (cgltf_size i = 0; i < data->cameras_count; ++i)
+	{
+		data->memory.free_func(data->memory.user_data, data->cameras[i].name);
+
+		if (data->cameras[i].type == cgltf_camera_type_perspective)
+		{
+			cgltf_free_extras(data, &data->cameras[i].data.perspective.extras);
+		}
+		else if (data->cameras[i].type == cgltf_camera_type_orthographic)
+		{
+			cgltf_free_extras(data, &data->cameras[i].data.orthographic.extras);
+		}
+
+		cgltf_free_extensions(data, data->cameras[i].extensions, data->cameras[i].extensions_count);
+		cgltf_free_extras(data, &data->cameras[i].extras);
+	}
+
+	data->memory.free_func(data->memory.user_data, data->cameras);
+
+	for (cgltf_size i = 0; i < data->lights_count; ++i)
+	{
+		data->memory.free_func(data->memory.user_data, data->lights[i].name);
+
+		cgltf_free_extras(data, &data->lights[i].extras);
+	}
+
+	data->memory.free_func(data->memory.user_data, data->lights);
+
+	for (cgltf_size i = 0; i < data->nodes_count; ++i)
+	{
+		data->memory.free_func(data->memory.user_data, data->nodes[i].name);
+		data->memory.free_func(data->memory.user_data, data->nodes[i].children);
+		data->memory.free_func(data->memory.user_data, data->nodes[i].weights);
+
+		if (data->nodes[i].has_mesh_gpu_instancing)
+		{
+			for (cgltf_size j = 0; j < data->nodes[i].mesh_gpu_instancing.attributes_count; ++j)
+			{
+				data->memory.free_func(data->memory.user_data, data->nodes[i].mesh_gpu_instancing.attributes[j].name);
+			}
+
+			data->memory.free_func(data->memory.user_data, data->nodes[i].mesh_gpu_instancing.attributes);
+		}
+
+		cgltf_free_extensions(data, data->nodes[i].extensions, data->nodes[i].extensions_count);
+		cgltf_free_extras(data, &data->nodes[i].extras);
+	}
+
+	data->memory.free_func(data->memory.user_data, data->nodes);
+
+	for (cgltf_size i = 0; i < data->scenes_count; ++i)
+	{
+		data->memory.free_func(data->memory.user_data, data->scenes[i].name);
+		data->memory.free_func(data->memory.user_data, data->scenes[i].nodes);
+
+		cgltf_free_extensions(data, data->scenes[i].extensions, data->scenes[i].extensions_count);
+		cgltf_free_extras(data, &data->scenes[i].extras);
+	}
+
+	data->memory.free_func(data->memory.user_data, data->scenes);
+
+	for (cgltf_size i = 0; i < data->animations_count; ++i)
+	{
+		data->memory.free_func(data->memory.user_data, data->animations[i].name);
+		for (cgltf_size j = 0; j <  data->animations[i].samplers_count; ++j)
+		{
+			cgltf_free_extensions(data, data->animations[i].samplers[j].extensions, data->animations[i].samplers[j].extensions_count);
+			cgltf_free_extras(data, &data->animations[i].samplers[j].extras);
+		}
+		data->memory.free_func(data->memory.user_data, data->animations[i].samplers);
+
+		for (cgltf_size j = 0; j <  data->animations[i].channels_count; ++j)
+		{
+			cgltf_free_extensions(data, data->animations[i].channels[j].extensions, data->animations[i].channels[j].extensions_count);
+			cgltf_free_extras(data, &data->animations[i].channels[j].extras);
+		}
+		data->memory.free_func(data->memory.user_data, data->animations[i].channels);
+
+		cgltf_free_extensions(data, data->animations[i].extensions, data->animations[i].extensions_count);
+		cgltf_free_extras(data, &data->animations[i].extras);
+	}
+
+	data->memory.free_func(data->memory.user_data, data->animations);
+
+	for (cgltf_size i = 0; i < data->variants_count; ++i)
+	{
+		data->memory.free_func(data->memory.user_data, data->variants[i].name);
+
+		cgltf_free_extras(data, &data->variants[i].extras);
+	}
+
+	data->memory.free_func(data->memory.user_data, data->variants);
+
+	cgltf_free_extensions(data, data->data_extensions, data->data_extensions_count);
+	cgltf_free_extras(data, &data->extras);
+
+	for (cgltf_size i = 0; i < data->extensions_used_count; ++i)
+	{
+		data->memory.free_func(data->memory.user_data, data->extensions_used[i]);
+	}
+
+	data->memory.free_func(data->memory.user_data, data->extensions_used);
+
+	for (cgltf_size i = 0; i < data->extensions_required_count; ++i)
+	{
+		data->memory.free_func(data->memory.user_data, data->extensions_required[i]);
+	}
+
+	data->memory.free_func(data->memory.user_data, data->extensions_required);
+
+	file_release(&data->memory, &data->file, data->file_data);
+
+	data->memory.free_func(data->memory.user_data, data);
+}
+
+void cgltf_node_transform_local(const cgltf_node* node, cgltf_float* out_matrix)
+{
+	cgltf_float* lm = out_matrix;
+
+	if (node->has_matrix)
+	{
+		memcpy(lm, node->matrix, sizeof(float) * 16);
+	}
+	else
+	{
+		float tx = node->translation[0];
+		float ty = node->translation[1];
+		float tz = node->translation[2];
+
+		float qx = node->rotation[0];
+		float qy = node->rotation[1];
+		float qz = node->rotation[2];
+		float qw = node->rotation[3];
+
+		float sx = node->scale[0];
+		float sy = node->scale[1];
+		float sz = node->scale[2];
+
+		lm[0] = (1 - 2 * qy*qy - 2 * qz*qz) * sx;
+		lm[1] = (2 * qx*qy + 2 * qz*qw) * sx;
+		lm[2] = (2 * qx*qz - 2 * qy*qw) * sx;
+		lm[3] = 0.f;
+
+		lm[4] = (2 * qx*qy - 2 * qz*qw) * sy;
+		lm[5] = (1 - 2 * qx*qx - 2 * qz*qz) * sy;
+		lm[6] = (2 * qy*qz + 2 * qx*qw) * sy;
+		lm[7] = 0.f;
+
+		lm[8] = (2 * qx*qz + 2 * qy*qw) * sz;
+		lm[9] = (2 * qy*qz - 2 * qx*qw) * sz;
+		lm[10] = (1 - 2 * qx*qx - 2 * qy*qy) * sz;
+		lm[11] = 0.f;
+
+		lm[12] = tx;
+		lm[13] = ty;
+		lm[14] = tz;
+		lm[15] = 1.f;
+	}
+}
+
+void cgltf_node_transform_world(const cgltf_node* node, cgltf_float* out_matrix)
+{
+	cgltf_float* lm = out_matrix;
+	cgltf_node_transform_local(node, lm);
+
+	const cgltf_node* parent = node->parent;
+
+	while (parent)
+	{
+		float pm[16];
+		cgltf_node_transform_local(parent, pm);
+
+		for (int i = 0; i < 4; ++i)
+		{
+			float l0 = lm[i * 4 + 0];
+			float l1 = lm[i * 4 + 1];
+			float l2 = lm[i * 4 + 2];
+
+			float r0 = l0 * pm[0] + l1 * pm[4] + l2 * pm[8];
+			float r1 = l0 * pm[1] + l1 * pm[5] + l2 * pm[9];
+			float r2 = l0 * pm[2] + l1 * pm[6] + l2 * pm[10];
+
+			lm[i * 4 + 0] = r0;
+			lm[i * 4 + 1] = r1;
+			lm[i * 4 + 2] = r2;
+		}
+
+		lm[12] += pm[12];
+		lm[13] += pm[13];
+		lm[14] += pm[14];
+
+		parent = parent->parent;
+	}
+}
+
+static cgltf_ssize cgltf_component_read_integer(const void* in, cgltf_component_type component_type)
+{
+	switch (component_type)
+	{
+		case cgltf_component_type_r_16:
+			return *((const int16_t*) in);
+		case cgltf_component_type_r_16u:
+			return *((const uint16_t*) in);
+		case cgltf_component_type_r_32u:
+			return *((const uint32_t*) in);
+		case cgltf_component_type_r_32f:
+			return (cgltf_ssize)*((const float*) in);
+		case cgltf_component_type_r_8:
+			return *((const int8_t*) in);
+		case cgltf_component_type_r_8u:
+			return *((const uint8_t*) in);
+		default:
+			return 0;
+	}
+}
+
+static cgltf_size cgltf_component_read_index(const void* in, cgltf_component_type component_type)
+{
+	switch (component_type)
+	{
+		case cgltf_component_type_r_16u:
+			return *((const uint16_t*) in);
+		case cgltf_component_type_r_32u:
+			return *((const uint32_t*) in);
+		case cgltf_component_type_r_32f:
+			return (cgltf_size)((cgltf_ssize)*((const float*) in));
+		case cgltf_component_type_r_8u:
+			return *((const uint8_t*) in);
+		default:
+			return 0;
+	}
+}
+
+static cgltf_float cgltf_component_read_float(const void* in, cgltf_component_type component_type, cgltf_bool normalized)
+{
+	if (component_type == cgltf_component_type_r_32f)
+	{
+		return *((const float*) in);
+	}
+
+	if (normalized)
+	{
+		switch (component_type)
+		{
+			// note: glTF spec doesn't currently define normalized conversions for 32-bit integers
+			case cgltf_component_type_r_16:
+				return *((const int16_t*) in) / (cgltf_float)32767;
+			case cgltf_component_type_r_16u:
+				return *((const uint16_t*) in) / (cgltf_float)65535;
+			case cgltf_component_type_r_8:
+				return *((const int8_t*) in) / (cgltf_float)127;
+			case cgltf_component_type_r_8u:
+				return *((const uint8_t*) in) / (cgltf_float)255;
+			default:
+				return 0;
+		}
+	}
+
+	return (cgltf_float)cgltf_component_read_integer(in, component_type);
+}
+
+static cgltf_bool cgltf_element_read_float(const uint8_t* element, cgltf_type type, cgltf_component_type component_type, cgltf_bool normalized, cgltf_float* out, cgltf_size element_size)
+{
+	cgltf_size num_components = cgltf_num_components(type);
+
+	if (element_size < num_components) {
+		return 0;
+	}
+
+	// There are three special cases for component extraction, see #data-alignment in the 2.0 spec.
+
+	cgltf_size component_size = cgltf_component_size(component_type);
+
+	if (type == cgltf_type_mat2 && component_size == 1)
+	{
+		out[0] = cgltf_component_read_float(element, component_type, normalized);
+		out[1] = cgltf_component_read_float(element + 1, component_type, normalized);
+		out[2] = cgltf_component_read_float(element + 4, component_type, normalized);
+		out[3] = cgltf_component_read_float(element + 5, component_type, normalized);
+		return 1;
+	}
+
+	if (type == cgltf_type_mat3 && component_size == 1)
+	{
+		out[0] = cgltf_component_read_float(element, component_type, normalized);
+		out[1] = cgltf_component_read_float(element + 1, component_type, normalized);
+		out[2] = cgltf_component_read_float(element + 2, component_type, normalized);
+		out[3] = cgltf_component_read_float(element + 4, component_type, normalized);
+		out[4] = cgltf_component_read_float(element + 5, component_type, normalized);
+		out[5] = cgltf_component_read_float(element + 6, component_type, normalized);
+		out[6] = cgltf_component_read_float(element + 8, component_type, normalized);
+		out[7] = cgltf_component_read_float(element + 9, component_type, normalized);
+		out[8] = cgltf_component_read_float(element + 10, component_type, normalized);
+		return 1;
+	}
+
+	if (type == cgltf_type_mat3 && component_size == 2)
+	{
+		out[0] = cgltf_component_read_float(element, component_type, normalized);
+		out[1] = cgltf_component_read_float(element + 2, component_type, normalized);
+		out[2] = cgltf_component_read_float(element + 4, component_type, normalized);
+		out[3] = cgltf_component_read_float(element + 8, component_type, normalized);
+		out[4] = cgltf_component_read_float(element + 10, component_type, normalized);
+		out[5] = cgltf_component_read_float(element + 12, component_type, normalized);
+		out[6] = cgltf_component_read_float(element + 16, component_type, normalized);
+		out[7] = cgltf_component_read_float(element + 18, component_type, normalized);
+		out[8] = cgltf_component_read_float(element + 20, component_type, normalized);
+		return 1;
+	}
+
+	for (cgltf_size i = 0; i < num_components; ++i)
+	{
+		out[i] = cgltf_component_read_float(element + component_size * i, component_type, normalized);
+	}
+	return 1;
+}
+
+const uint8_t* cgltf_buffer_view_data(const cgltf_buffer_view* view)
+{
+	if (view->data)
+		return (const uint8_t*)view->data;
+
+	if (!view->buffer->data)
+		return NULL;
+
+	const uint8_t* result = (const uint8_t*)view->buffer->data;
+	result += view->offset;
+	return result;
+}
+
+cgltf_bool cgltf_accessor_read_float(const cgltf_accessor* accessor, cgltf_size index, cgltf_float* out, cgltf_size element_size)
+{
+	if (accessor->is_sparse)
+	{
+		return 0;
+	}
+	if (accessor->buffer_view == NULL)
+	{
+		memset(out, 0, element_size * sizeof(cgltf_float));
+		return 1;
+	}
+	const uint8_t* element = cgltf_buffer_view_data(accessor->buffer_view);
+	if (element == NULL)
+	{
+		return 0;
+	}
+	element += accessor->offset + accessor->stride * index;
+	return cgltf_element_read_float(element, accessor->type, accessor->component_type, accessor->normalized, out, element_size);
+}
+
+cgltf_size cgltf_accessor_unpack_floats(const cgltf_accessor* accessor, cgltf_float* out, cgltf_size float_count)
+{
+	cgltf_size floats_per_element = cgltf_num_components(accessor->type);
+	cgltf_size available_floats = accessor->count * floats_per_element;
+	if (out == NULL)
+	{
+		return available_floats;
+	}
+
+	float_count = available_floats < float_count ? available_floats : float_count;
+	cgltf_size element_count = float_count / floats_per_element;
+
+	// First pass: convert each element in the base accessor.
+	if (accessor->buffer_view == NULL)
+	{
+		memset(out, 0, element_count * floats_per_element * sizeof(cgltf_float));
+	}
+	else
+	{
+		const uint8_t* element = cgltf_buffer_view_data(accessor->buffer_view);
+		if (element == NULL)
+		{
+			return 0;
+		}
+		element += accessor->offset;
+
+		if (accessor->component_type == cgltf_component_type_r_32f && accessor->stride == floats_per_element * sizeof(cgltf_float))
+		{
+			memcpy(out, element, element_count * floats_per_element * sizeof(cgltf_float));
+		}
+		else
+		{
+			cgltf_float* dest = out;
+
+			for (cgltf_size index = 0; index < element_count; index++, dest += floats_per_element, element += accessor->stride)
+			{
+				if (!cgltf_element_read_float(element, accessor->type, accessor->component_type, accessor->normalized, dest, floats_per_element))
+				{
+					return 0;
+				}
+			}
+		}
+	}
+
+	// Second pass: write out each element in the sparse accessor.
+	if (accessor->is_sparse)
+	{
+		const cgltf_accessor_sparse* sparse = &accessor->sparse;
+
+		const uint8_t* index_data = cgltf_buffer_view_data(sparse->indices_buffer_view);
+		const uint8_t* reader_head = cgltf_buffer_view_data(sparse->values_buffer_view);
+
+		if (index_data == NULL || reader_head == NULL)
+		{
+			return 0;
+		}
+
+		index_data += sparse->indices_byte_offset;
+		reader_head += sparse->values_byte_offset;
+
+		cgltf_size index_stride = cgltf_component_size(sparse->indices_component_type);
+		for (cgltf_size reader_index = 0; reader_index < sparse->count; reader_index++, index_data += index_stride, reader_head += accessor->stride)
+		{
+			size_t writer_index = cgltf_component_read_index(index_data, sparse->indices_component_type);
+			float* writer_head = out + writer_index * floats_per_element;
+
+			if (!cgltf_element_read_float(reader_head, accessor->type, accessor->component_type, accessor->normalized, writer_head, floats_per_element))
+			{
+				return 0;
+			}
+		}
+	}
+
+	return element_count * floats_per_element;
+}
+
+static cgltf_uint cgltf_component_read_uint(const void* in, cgltf_component_type component_type)
+{
+	switch (component_type)
+	{
+		case cgltf_component_type_r_8:
+			return *((const int8_t*) in);
+
+		case cgltf_component_type_r_8u:
+			return *((const uint8_t*) in);
+
+		case cgltf_component_type_r_16:
+			return *((const int16_t*) in);
+
+		case cgltf_component_type_r_16u:
+			return *((const uint16_t*) in);
+
+		case cgltf_component_type_r_32u:
+			return *((const uint32_t*) in);
+
+		default:
+			return 0;
+	}
+}
+
+static cgltf_bool cgltf_element_read_uint(const uint8_t* element, cgltf_type type, cgltf_component_type component_type, cgltf_uint* out, cgltf_size element_size)
+{
+	cgltf_size num_components = cgltf_num_components(type);
+
+	if (element_size < num_components)
+	{
+		return 0;
+	}
+
+	// Reading integer matrices is not a valid use case
+	if (type == cgltf_type_mat2 || type == cgltf_type_mat3 || type == cgltf_type_mat4)
+	{
+		return 0;
+	}
+
+	cgltf_size component_size = cgltf_component_size(component_type);
+
+	for (cgltf_size i = 0; i < num_components; ++i)
+	{
+		out[i] = cgltf_component_read_uint(element + component_size * i, component_type);
+	}
+	return 1;
+}
+
+cgltf_bool cgltf_accessor_read_uint(const cgltf_accessor* accessor, cgltf_size index, cgltf_uint* out, cgltf_size element_size)
+{
+	if (accessor->is_sparse)
+	{
+		return 0;
+	}
+	if (accessor->buffer_view == NULL)
+	{
+		memset(out, 0, element_size * sizeof( cgltf_uint ));
+		return 1;
+	}
+	const uint8_t* element = cgltf_buffer_view_data(accessor->buffer_view);
+	if (element == NULL)
+	{
+		return 0;
+	}
+	element += accessor->offset + accessor->stride * index;
+	return cgltf_element_read_uint(element, accessor->type, accessor->component_type, out, element_size);
+}
+
+cgltf_size cgltf_accessor_read_index(const cgltf_accessor* accessor, cgltf_size index)
+{
+	if (accessor->is_sparse)
+	{
+		return 0; // This is an error case, but we can't communicate the error with existing interface.
+	}
+	if (accessor->buffer_view == NULL)
+	{
+		return 0;
+	}
+	const uint8_t* element = cgltf_buffer_view_data(accessor->buffer_view);
+	if (element == NULL)
+	{
+		return 0; // This is an error case, but we can't communicate the error with existing interface.
+	}
+	element += accessor->offset + accessor->stride * index;
+	return cgltf_component_read_index(element, accessor->component_type);
+}
+
+cgltf_size cgltf_mesh_index(const cgltf_data* data, const cgltf_mesh* object)
+{
+	assert(object && (cgltf_size)(object - data->meshes) < data->meshes_count);
+	return (cgltf_size)(object - data->meshes);
+}
+
+cgltf_size cgltf_material_index(const cgltf_data* data, const cgltf_material* object)
+{
+	assert(object && (cgltf_size)(object - data->materials) < data->materials_count);
+	return (cgltf_size)(object - data->materials);
+}
+
+cgltf_size cgltf_accessor_index(const cgltf_data* data, const cgltf_accessor* object)
+{
+	assert(object && (cgltf_size)(object - data->accessors) < data->accessors_count);
+	return (cgltf_size)(object - data->accessors);
+}
+
+cgltf_size cgltf_buffer_view_index(const cgltf_data* data, const cgltf_buffer_view* object)
+{
+	assert(object && (cgltf_size)(object - data->buffer_views) < data->buffer_views_count);
+	return (cgltf_size)(object - data->buffer_views);
+}
+
+cgltf_size cgltf_buffer_index(const cgltf_data* data, const cgltf_buffer* object)
+{
+	assert(object && (cgltf_size)(object - data->buffers) < data->buffers_count);
+	return (cgltf_size)(object - data->buffers);
+}
+
+cgltf_size cgltf_image_index(const cgltf_data* data, const cgltf_image* object)
+{
+	assert(object && (cgltf_size)(object - data->images) < data->images_count);
+	return (cgltf_size)(object - data->images);
+}
+
+cgltf_size cgltf_texture_index(const cgltf_data* data, const cgltf_texture* object)
+{
+	assert(object && (cgltf_size)(object - data->textures) < data->textures_count);
+	return (cgltf_size)(object - data->textures);
+}
+
+cgltf_size cgltf_sampler_index(const cgltf_data* data, const cgltf_sampler* object)
+{
+	assert(object && (cgltf_size)(object - data->samplers) < data->samplers_count);
+	return (cgltf_size)(object - data->samplers);
+}
+
+cgltf_size cgltf_skin_index(const cgltf_data* data, const cgltf_skin* object)
+{
+	assert(object && (cgltf_size)(object - data->skins) < data->skins_count);
+	return (cgltf_size)(object - data->skins);
+}
+
+cgltf_size cgltf_camera_index(const cgltf_data* data, const cgltf_camera* object)
+{
+	assert(object && (cgltf_size)(object - data->cameras) < data->cameras_count);
+	return (cgltf_size)(object - data->cameras);
+}
+
+cgltf_size cgltf_light_index(const cgltf_data* data, const cgltf_light* object)
+{
+	assert(object && (cgltf_size)(object - data->lights) < data->lights_count);
+	return (cgltf_size)(object - data->lights);
+}
+
+cgltf_size cgltf_node_index(const cgltf_data* data, const cgltf_node* object)
+{
+	assert(object && (cgltf_size)(object - data->nodes) < data->nodes_count);
+	return (cgltf_size)(object - data->nodes);
+}
+
+cgltf_size cgltf_scene_index(const cgltf_data* data, const cgltf_scene* object)
+{
+	assert(object && (cgltf_size)(object - data->scenes) < data->scenes_count);
+	return (cgltf_size)(object - data->scenes);
+}
+
+cgltf_size cgltf_animation_index(const cgltf_data* data, const cgltf_animation* object)
+{
+	assert(object && (cgltf_size)(object - data->animations) < data->animations_count);
+	return (cgltf_size)(object - data->animations);
+}
+
+cgltf_size cgltf_animation_sampler_index(const cgltf_animation* animation, const cgltf_animation_sampler* object)
+{
+	assert(object && (cgltf_size)(object - animation->samplers) < animation->samplers_count);
+	return (cgltf_size)(object - animation->samplers);
+}
+
+cgltf_size cgltf_animation_channel_index(const cgltf_animation* animation, const cgltf_animation_channel* object)
+{
+	assert(object && (cgltf_size)(object - animation->channels) < animation->channels_count);
+	return (cgltf_size)(object - animation->channels);
+}
+
+cgltf_size cgltf_accessor_unpack_indices(const cgltf_accessor* accessor, cgltf_uint* out, cgltf_size index_count)
+{
+	if (out == NULL)
+	{
+		return accessor->count;
+	}
+
+	index_count = accessor->count < index_count ? accessor->count : index_count;
+
+	if (accessor->is_sparse)
+	{
+		return 0;
+	}
+	if (accessor->buffer_view == NULL)
+	{
+		return 0;
+	}
+	const uint8_t* element = cgltf_buffer_view_data(accessor->buffer_view);
+	if (element == NULL)
+	{
+		return 0;
+	}
+	element += accessor->offset;
+
+	if (accessor->component_type == cgltf_component_type_r_32u && accessor->stride == sizeof(cgltf_uint))
+	{
+		memcpy(out, element, index_count * sizeof(cgltf_uint));
+	}
+	else
+	{
+		cgltf_uint* dest = out;
+
+		for (cgltf_size index = 0; index < index_count; index++, dest++, element += accessor->stride)
+		{
+			*dest = (cgltf_uint)cgltf_component_read_index(element, accessor->component_type);
+		}
+	}
+
+	return index_count;
+}
+
+#define CGLTF_ERROR_JSON -1
+#define CGLTF_ERROR_NOMEM -2
+#define CGLTF_ERROR_LEGACY -3
+
+#define CGLTF_CHECK_TOKTYPE(tok_, type_) if ((tok_).type != (type_)) { return CGLTF_ERROR_JSON; }
+#define CGLTF_CHECK_TOKTYPE_RETTYPE(tok_, type_, ret_) if ((tok_).type != (type_)) { return (ret_)CGLTF_ERROR_JSON; }
+#define CGLTF_CHECK_KEY(tok_) if ((tok_).type != JSMN_STRING || (tok_).size == 0) { return CGLTF_ERROR_JSON; } /* checking size for 0 verifies that a value follows the key */
+
+#define CGLTF_PTRINDEX(type, idx) (type*)((cgltf_size)idx + 1)
+#define CGLTF_PTRFIXUP(var, data, size) if (var) { if ((cgltf_size)var > size) { return CGLTF_ERROR_JSON; } var = &data[(cgltf_size)var-1]; }
+#define CGLTF_PTRFIXUP_REQ(var, data, size) if (!var || (cgltf_size)var > size) { return CGLTF_ERROR_JSON; } var = &data[(cgltf_size)var-1];
+
+static int cgltf_json_strcmp(jsmntok_t const* tok, const uint8_t* json_chunk, const char* str)
+{
+	CGLTF_CHECK_TOKTYPE(*tok, JSMN_STRING);
+	size_t const str_len = strlen(str);
+	size_t const name_length = (size_t)(tok->end - tok->start);
+	return (str_len == name_length) ? strncmp((const char*)json_chunk + tok->start, str, str_len) : 128;
+}
+
+static int cgltf_json_to_int(jsmntok_t const* tok, const uint8_t* json_chunk)
+{
+	CGLTF_CHECK_TOKTYPE(*tok, JSMN_PRIMITIVE);
+	char tmp[128];
+	int size = (size_t)(tok->end - tok->start) < sizeof(tmp) ? (int)(tok->end - tok->start) : (int)(sizeof(tmp) - 1);
+	strncpy(tmp, (const char*)json_chunk + tok->start, size);
+	tmp[size] = 0;
+	return CGLTF_ATOI(tmp);
+}
+
+static cgltf_size cgltf_json_to_size(jsmntok_t const* tok, const uint8_t* json_chunk)
+{
+	CGLTF_CHECK_TOKTYPE_RETTYPE(*tok, JSMN_PRIMITIVE, cgltf_size);
+	char tmp[128];
+	int size = (size_t)(tok->end - tok->start) < sizeof(tmp) ? (int)(tok->end - tok->start) : (int)(sizeof(tmp) - 1);
+	strncpy(tmp, (const char*)json_chunk + tok->start, size);
+	tmp[size] = 0;
+	return (cgltf_size)CGLTF_ATOLL(tmp);
+}
+
+static cgltf_float cgltf_json_to_float(jsmntok_t const* tok, const uint8_t* json_chunk)
+{
+	CGLTF_CHECK_TOKTYPE(*tok, JSMN_PRIMITIVE);
+	char tmp[128];
+	int size = (size_t)(tok->end - tok->start) < sizeof(tmp) ? (int)(tok->end - tok->start) : (int)(sizeof(tmp) - 1);
+	strncpy(tmp, (const char*)json_chunk + tok->start, size);
+	tmp[size] = 0;
+	return (cgltf_float)CGLTF_ATOF(tmp);
+}
+
+static cgltf_bool cgltf_json_to_bool(jsmntok_t const* tok, const uint8_t* json_chunk)
+{
+	int size = (int)(tok->end - tok->start);
+	return size == 4 && memcmp(json_chunk + tok->start, "true", 4) == 0;
+}
+
+static int cgltf_skip_json(jsmntok_t const* tokens, int i)
+{
+	int end = i + 1;
+
+	while (i < end)
+	{
+		switch (tokens[i].type)
+		{
+		case JSMN_OBJECT:
+			end += tokens[i].size * 2;
+			break;
+
+		case JSMN_ARRAY:
+			end += tokens[i].size;
+			break;
+
+		case JSMN_PRIMITIVE:
+		case JSMN_STRING:
+			break;
+
+		default:
+			return -1;
+		}
+
+		i++;
+	}
+
+	return i;
+}
+
+static void cgltf_fill_float_array(float* out_array, int size, float value)
+{
+	for (int j = 0; j < size; ++j)
+	{
+		out_array[j] = value;
+	}
+}
+
+static int cgltf_parse_json_float_array(jsmntok_t const* tokens, int i, const uint8_t* json_chunk, float* out_array, int size)
+{
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_ARRAY);
+	if (tokens[i].size != size)
+	{
+		return CGLTF_ERROR_JSON;
+	}
+	++i;
+	for (int j = 0; j < size; ++j)
+	{
+		CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_PRIMITIVE);
+		out_array[j] = cgltf_json_to_float(tokens + i, json_chunk);
+		++i;
+	}
+	return i;
+}
+
+static int cgltf_parse_json_string(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, char** out_string)
+{
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_STRING);
+	if (*out_string)
+	{
+		return CGLTF_ERROR_JSON;
+	}
+	int size = (int)(tokens[i].end - tokens[i].start);
+	char* result = (char*)options->memory.alloc_func(options->memory.user_data, size + 1);
+	if (!result)
+	{
+		return CGLTF_ERROR_NOMEM;
+	}
+	strncpy(result, (const char*)json_chunk + tokens[i].start, size);
+	result[size] = 0;
+	*out_string = result;
+	return i + 1;
+}
+
+static int cgltf_parse_json_array(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, size_t element_size, void** out_array, cgltf_size* out_size)
+{
+	(void)json_chunk;
+	if (tokens[i].type != JSMN_ARRAY)
+	{
+		return tokens[i].type == JSMN_OBJECT ? CGLTF_ERROR_LEGACY : CGLTF_ERROR_JSON;
+	}
+	if (*out_array)
+	{
+		return CGLTF_ERROR_JSON;
+	}
+	int size = tokens[i].size;
+	void* result = cgltf_calloc(options, element_size, size);
+	if (!result)
+	{
+		return CGLTF_ERROR_NOMEM;
+	}
+	*out_array = result;
+	*out_size = size;
+	return i + 1;
+}
+
+static int cgltf_parse_json_string_array(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, char*** out_array, cgltf_size* out_size)
+{
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_ARRAY);
+	i = cgltf_parse_json_array(options, tokens, i, json_chunk, sizeof(char*), (void**)out_array, out_size);
+	if (i < 0)
+	{
+		return i;
+	}
+
+	for (cgltf_size j = 0; j < *out_size; ++j)
+	{
+		i = cgltf_parse_json_string(options, tokens, i, json_chunk, j + (*out_array));
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+	return i;
+}
+
+static void cgltf_parse_attribute_type(const char* name, cgltf_attribute_type* out_type, int* out_index)
+{
+	if (*name == '_')
+	{
+		*out_type = cgltf_attribute_type_custom;
+		return;
+	}
+
+	const char* us = strchr(name, '_');
+	size_t len = us ? (size_t)(us - name) : strlen(name);
+
+	if (len == 8 && strncmp(name, "POSITION", 8) == 0)
+	{
+		*out_type = cgltf_attribute_type_position;
+	}
+	else if (len == 6 && strncmp(name, "NORMAL", 6) == 0)
+	{
+		*out_type = cgltf_attribute_type_normal;
+	}
+	else if (len == 7 && strncmp(name, "TANGENT", 7) == 0)
+	{
+		*out_type = cgltf_attribute_type_tangent;
+	}
+	else if (len == 8 && strncmp(name, "TEXCOORD", 8) == 0)
+	{
+		*out_type = cgltf_attribute_type_texcoord;
+	}
+	else if (len == 5 && strncmp(name, "COLOR", 5) == 0)
+	{
+		*out_type = cgltf_attribute_type_color;
+	}
+	else if (len == 6 && strncmp(name, "JOINTS", 6) == 0)
+	{
+		*out_type = cgltf_attribute_type_joints;
+	}
+	else if (len == 7 && strncmp(name, "WEIGHTS", 7) == 0)
+	{
+		*out_type = cgltf_attribute_type_weights;
+	}
+	else
+	{
+		*out_type = cgltf_attribute_type_invalid;
+	}
+
+	if (us && *out_type != cgltf_attribute_type_invalid)
+	{
+		*out_index = CGLTF_ATOI(us + 1);
+	}
+}
+
+static int cgltf_parse_json_attribute_list(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_attribute** out_attributes, cgltf_size* out_attributes_count)
+{
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+
+	if (*out_attributes)
+	{
+		return CGLTF_ERROR_JSON;
+	}
+
+	*out_attributes_count = tokens[i].size;
+	*out_attributes = (cgltf_attribute*)cgltf_calloc(options, sizeof(cgltf_attribute), *out_attributes_count);
+	++i;
+
+	if (!*out_attributes)
+	{
+		return CGLTF_ERROR_NOMEM;
+	}
+
+	for (cgltf_size j = 0; j < *out_attributes_count; ++j)
+	{
+		CGLTF_CHECK_KEY(tokens[i]);
+
+		i = cgltf_parse_json_string(options, tokens, i, json_chunk, &(*out_attributes)[j].name);
+		if (i < 0)
+		{
+			return CGLTF_ERROR_JSON;
+		}
+
+		cgltf_parse_attribute_type((*out_attributes)[j].name, &(*out_attributes)[j].type, &(*out_attributes)[j].index);
+
+		(*out_attributes)[j].data = CGLTF_PTRINDEX(cgltf_accessor, cgltf_json_to_int(tokens + i, json_chunk));
+		++i;
+	}
+
+	return i;
+}
+
+static int cgltf_parse_json_extras(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_extras* out_extras)
+{
+	if (out_extras->data)
+	{
+		return CGLTF_ERROR_JSON;
+	}
+
+	/* fill deprecated fields for now, this will be removed in the future */
+	out_extras->start_offset = tokens[i].start;
+	out_extras->end_offset = tokens[i].end;
+
+	size_t start = tokens[i].start;
+	size_t size = tokens[i].end - start;
+	out_extras->data = (char*)options->memory.alloc_func(options->memory.user_data, size + 1);
+	if (!out_extras->data)
+	{
+		return CGLTF_ERROR_NOMEM;
+	}
+	strncpy(out_extras->data, (const char*)json_chunk + start, size);
+	out_extras->data[size] = '\0';
+
+	i = cgltf_skip_json(tokens, i);
+	return i;
+}
+
+static int cgltf_parse_json_unprocessed_extension(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_extension* out_extension)
+{
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_STRING);
+	CGLTF_CHECK_TOKTYPE(tokens[i+1], JSMN_OBJECT);
+	if (out_extension->name)
+	{
+		return CGLTF_ERROR_JSON;
+	}
+
+	cgltf_size name_length = tokens[i].end - tokens[i].start;
+	out_extension->name = (char*)options->memory.alloc_func(options->memory.user_data, name_length + 1);
+	if (!out_extension->name)
+	{
+		return CGLTF_ERROR_NOMEM;
+	}
+	strncpy(out_extension->name, (const char*)json_chunk + tokens[i].start, name_length);
+	out_extension->name[name_length] = 0;
+	i++;
+
+	size_t start = tokens[i].start;
+	size_t size = tokens[i].end - start;
+	out_extension->data = (char*)options->memory.alloc_func(options->memory.user_data, size + 1);
+	if (!out_extension->data)
+	{
+		return CGLTF_ERROR_NOMEM;
+	}
+	strncpy(out_extension->data, (const char*)json_chunk + start, size);
+	out_extension->data[size] = '\0';
+
+	i = cgltf_skip_json(tokens, i);
+
+	return i;
+}
+
+static int cgltf_parse_json_unprocessed_extensions(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_size* out_extensions_count, cgltf_extension** out_extensions)
+{
+	++i;
+
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+	if(*out_extensions)
+	{
+		return CGLTF_ERROR_JSON;
+	}
+
+	int extensions_size = tokens[i].size;
+	*out_extensions_count = 0;
+	*out_extensions = (cgltf_extension*)cgltf_calloc(options, sizeof(cgltf_extension), extensions_size);
+
+	if (!*out_extensions)
+	{
+		return CGLTF_ERROR_NOMEM;
+	}
+
+	++i;
+
+	for (int j = 0; j < extensions_size; ++j)
+	{
+		CGLTF_CHECK_KEY(tokens[i]);
+
+		cgltf_size extension_index = (*out_extensions_count)++;
+		cgltf_extension* extension = &((*out_extensions)[extension_index]);
+		i = cgltf_parse_json_unprocessed_extension(options, tokens, i, json_chunk, extension);
+
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+	return i;
+}
+
+static int cgltf_parse_json_draco_mesh_compression(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_draco_mesh_compression* out_draco_mesh_compression)
+{
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+
+	int size = tokens[i].size;
+	++i;
+
+	for (int j = 0; j < size; ++j)
+	{
+		CGLTF_CHECK_KEY(tokens[i]);
+
+		if (cgltf_json_strcmp(tokens + i, json_chunk, "attributes") == 0)
+		{
+			i = cgltf_parse_json_attribute_list(options, tokens, i + 1, json_chunk, &out_draco_mesh_compression->attributes, &out_draco_mesh_compression->attributes_count);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "bufferView") == 0)
+		{
+			++i;
+			out_draco_mesh_compression->buffer_view = CGLTF_PTRINDEX(cgltf_buffer_view, cgltf_json_to_int(tokens + i, json_chunk));
+			++i;
+		}
+		else
+		{
+			i = cgltf_skip_json(tokens, i+1);
+		}
+
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+
+	return i;
+}
+
+static int cgltf_parse_json_mesh_gpu_instancing(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_mesh_gpu_instancing* out_mesh_gpu_instancing)
+{
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+
+	int size = tokens[i].size;
+	++i;
+
+	for (int j = 0; j < size; ++j)
+	{
+		CGLTF_CHECK_KEY(tokens[i]);
+
+		if (cgltf_json_strcmp(tokens + i, json_chunk, "attributes") == 0)
+		{
+			i = cgltf_parse_json_attribute_list(options, tokens, i + 1, json_chunk, &out_mesh_gpu_instancing->attributes, &out_mesh_gpu_instancing->attributes_count);
+		}
+		else
+		{
+			i = cgltf_skip_json(tokens, i+1);
+		}
+
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+
+	return i;
+}
+
+static int cgltf_parse_json_material_mapping_data(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_material_mapping* out_mappings, cgltf_size* offset)
+{
+	(void)options;
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_ARRAY);
+
+	int size = tokens[i].size;
+	++i;
+
+	for (int j = 0; j < size; ++j)
+	{
+		CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+
+		int obj_size = tokens[i].size;
+		++i;
+
+		int material = -1;
+		int variants_tok = -1;
+		int extras_tok = -1;
+
+		for (int k = 0; k < obj_size; ++k)
+		{
+			CGLTF_CHECK_KEY(tokens[i]);
+
+			if (cgltf_json_strcmp(tokens + i, json_chunk, "material") == 0)
+			{
+				++i;
+				material = cgltf_json_to_int(tokens + i, json_chunk);
+				++i;
+			}
+			else if (cgltf_json_strcmp(tokens + i, json_chunk, "variants") == 0)
+			{
+				variants_tok = i+1;
+				CGLTF_CHECK_TOKTYPE(tokens[variants_tok], JSMN_ARRAY);
+
+				i = cgltf_skip_json(tokens, i+1);
+			}
+			else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0)
+			{
+				extras_tok = i + 1;
+				i = cgltf_skip_json(tokens, extras_tok);
+			}
+			else
+			{
+				i = cgltf_skip_json(tokens, i+1);
+			}
+
+			if (i < 0)
+			{
+				return i;
+			}
+		}
+
+		if (material < 0 || variants_tok < 0)
+		{
+			return CGLTF_ERROR_JSON;
+		}
+
+		if (out_mappings)
+		{
+			for (int k = 0; k < tokens[variants_tok].size; ++k)
+			{
+				int variant = cgltf_json_to_int(&tokens[variants_tok + 1 + k], json_chunk);
+				if (variant < 0)
+					return variant;
+
+				out_mappings[*offset].material = CGLTF_PTRINDEX(cgltf_material, material);
+				out_mappings[*offset].variant = variant;
+
+				if (extras_tok >= 0)
+				{
+					int e = cgltf_parse_json_extras(options, tokens, extras_tok, json_chunk, &out_mappings[*offset].extras);
+					if (e < 0)
+						return e;
+				}
+
+				(*offset)++;
+			}
+		}
+		else
+		{
+			(*offset) += tokens[variants_tok].size;
+		}
+	}
+
+	return i;
+}
+
+static int cgltf_parse_json_material_mappings(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_primitive* out_prim)
+{
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+
+	int size = tokens[i].size;
+	++i;
+
+	for (int j = 0; j < size; ++j)
+	{
+		CGLTF_CHECK_KEY(tokens[i]);
+
+		if (cgltf_json_strcmp(tokens + i, json_chunk, "mappings") == 0)
+		{
+			if (out_prim->mappings)
+			{
+				return CGLTF_ERROR_JSON;
+			}
+
+			cgltf_size mappings_offset = 0;
+			int k = cgltf_parse_json_material_mapping_data(options, tokens, i + 1, json_chunk, NULL, &mappings_offset);
+			if (k < 0)
+			{
+				return k;
+			}
+
+			out_prim->mappings_count = mappings_offset;
+			out_prim->mappings = (cgltf_material_mapping*)cgltf_calloc(options, sizeof(cgltf_material_mapping), out_prim->mappings_count);
+
+			mappings_offset = 0;
+			i = cgltf_parse_json_material_mapping_data(options, tokens, i + 1, json_chunk, out_prim->mappings, &mappings_offset);
+		}
+		else
+		{
+			i = cgltf_skip_json(tokens, i+1);
+		}
+
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+
+	return i;
+}
+
+static int cgltf_parse_json_primitive(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_primitive* out_prim)
+{
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+
+	out_prim->type = cgltf_primitive_type_triangles;
+
+	int size = tokens[i].size;
+	++i;
+
+	for (int j = 0; j < size; ++j)
+	{
+		CGLTF_CHECK_KEY(tokens[i]);
+
+		if (cgltf_json_strcmp(tokens+i, json_chunk, "mode") == 0)
+		{
+			++i;
+			out_prim->type
+					= (cgltf_primitive_type)
+					cgltf_json_to_int(tokens+i, json_chunk);
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "indices") == 0)
+		{
+			++i;
+			out_prim->indices = CGLTF_PTRINDEX(cgltf_accessor, cgltf_json_to_int(tokens + i, json_chunk));
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "material") == 0)
+		{
+			++i;
+			out_prim->material = CGLTF_PTRINDEX(cgltf_material, cgltf_json_to_int(tokens + i, json_chunk));
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "attributes") == 0)
+		{
+			i = cgltf_parse_json_attribute_list(options, tokens, i + 1, json_chunk, &out_prim->attributes, &out_prim->attributes_count);
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "targets") == 0)
+		{
+			i = cgltf_parse_json_array(options, tokens, i + 1, json_chunk, sizeof(cgltf_morph_target), (void**)&out_prim->targets, &out_prim->targets_count);
+			if (i < 0)
+			{
+				return i;
+			}
+
+			for (cgltf_size k = 0; k < out_prim->targets_count; ++k)
+			{
+				i = cgltf_parse_json_attribute_list(options, tokens, i, json_chunk, &out_prim->targets[k].attributes, &out_prim->targets[k].attributes_count);
+				if (i < 0)
+				{
+					return i;
+				}
+			}
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0)
+		{
+			i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_prim->extras);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0)
+		{
+			++i;
+
+			CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+			if(out_prim->extensions)
+			{
+				return CGLTF_ERROR_JSON;
+			}
+
+			int extensions_size = tokens[i].size;
+			out_prim->extensions_count = 0;
+			out_prim->extensions = (cgltf_extension*)cgltf_calloc(options, sizeof(cgltf_extension), extensions_size);
+
+			if (!out_prim->extensions)
+			{
+				return CGLTF_ERROR_NOMEM;
+			}
+
+			++i;
+			for (int k = 0; k < extensions_size; ++k)
+			{
+				CGLTF_CHECK_KEY(tokens[i]);
+
+				if (cgltf_json_strcmp(tokens+i, json_chunk, "KHR_draco_mesh_compression") == 0)
+				{
+					out_prim->has_draco_mesh_compression = 1;
+					i = cgltf_parse_json_draco_mesh_compression(options, tokens, i + 1, json_chunk, &out_prim->draco_mesh_compression);
+				}
+				else if (cgltf_json_strcmp(tokens+i, json_chunk, "KHR_materials_variants") == 0)
+				{
+					i = cgltf_parse_json_material_mappings(options, tokens, i + 1, json_chunk, out_prim);
+				}
+				else
+				{
+					i = cgltf_parse_json_unprocessed_extension(options, tokens, i, json_chunk, &(out_prim->extensions[out_prim->extensions_count++]));
+				}
+
+				if (i < 0)
+				{
+					return i;
+				}
+			}
+		}
+		else
+		{
+			i = cgltf_skip_json(tokens, i+1);
+		}
+
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+
+	return i;
+}
+
+static int cgltf_parse_json_mesh(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_mesh* out_mesh)
+{
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+
+	int size = tokens[i].size;
+	++i;
+
+	for (int j = 0; j < size; ++j)
+	{
+		CGLTF_CHECK_KEY(tokens[i]);
+
+		if (cgltf_json_strcmp(tokens+i, json_chunk, "name") == 0)
+		{
+			i = cgltf_parse_json_string(options, tokens, i + 1, json_chunk, &out_mesh->name);
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "primitives") == 0)
+		{
+			i = cgltf_parse_json_array(options, tokens, i + 1, json_chunk, sizeof(cgltf_primitive), (void**)&out_mesh->primitives, &out_mesh->primitives_count);
+			if (i < 0)
+			{
+				return i;
+			}
+
+			for (cgltf_size prim_index = 0; prim_index < out_mesh->primitives_count; ++prim_index)
+			{
+				i = cgltf_parse_json_primitive(options, tokens, i, json_chunk, &out_mesh->primitives[prim_index]);
+				if (i < 0)
+				{
+					return i;
+				}
+			}
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "weights") == 0)
+		{
+			i = cgltf_parse_json_array(options, tokens, i + 1, json_chunk, sizeof(cgltf_float), (void**)&out_mesh->weights, &out_mesh->weights_count);
+			if (i < 0)
+			{
+				return i;
+			}
+
+			i = cgltf_parse_json_float_array(tokens, i - 1, json_chunk, out_mesh->weights, (int)out_mesh->weights_count);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0)
+		{
+			++i;
+
+			out_mesh->extras.start_offset = tokens[i].start;
+			out_mesh->extras.end_offset = tokens[i].end;
+
+			if (tokens[i].type == JSMN_OBJECT)
+			{
+				int extras_size = tokens[i].size;
+				++i;
+
+				for (int k = 0; k < extras_size; ++k)
+				{
+					CGLTF_CHECK_KEY(tokens[i]);
+
+					if (cgltf_json_strcmp(tokens+i, json_chunk, "targetNames") == 0 && tokens[i+1].type == JSMN_ARRAY)
+					{
+						i = cgltf_parse_json_string_array(options, tokens, i + 1, json_chunk, &out_mesh->target_names, &out_mesh->target_names_count);
+					}
+					else
+					{
+						i = cgltf_skip_json(tokens, i+1);
+					}
+
+					if (i < 0)
+					{
+						return i;
+					}
+				}
+			}
+			else
+			{
+				i = cgltf_skip_json(tokens, i);
+			}
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0)
+		{
+			i = cgltf_parse_json_unprocessed_extensions(options, tokens, i, json_chunk, &out_mesh->extensions_count, &out_mesh->extensions);
+		}
+		else
+		{
+			i = cgltf_skip_json(tokens, i+1);
+		}
+
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+
+	return i;
+}
+
+static int cgltf_parse_json_meshes(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_data* out_data)
+{
+	i = cgltf_parse_json_array(options, tokens, i, json_chunk, sizeof(cgltf_mesh), (void**)&out_data->meshes, &out_data->meshes_count);
+	if (i < 0)
+	{
+		return i;
+	}
+
+	for (cgltf_size j = 0; j < out_data->meshes_count; ++j)
+	{
+		i = cgltf_parse_json_mesh(options, tokens, i, json_chunk, &out_data->meshes[j]);
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+	return i;
+}
+
+static cgltf_component_type cgltf_json_to_component_type(jsmntok_t const* tok, const uint8_t* json_chunk)
+{
+	int type = cgltf_json_to_int(tok, json_chunk);
+
+	switch (type)
+	{
+	case 5120:
+		return cgltf_component_type_r_8;
+	case 5121:
+		return cgltf_component_type_r_8u;
+	case 5122:
+		return cgltf_component_type_r_16;
+	case 5123:
+		return cgltf_component_type_r_16u;
+	case 5125:
+		return cgltf_component_type_r_32u;
+	case 5126:
+		return cgltf_component_type_r_32f;
+	default:
+		return cgltf_component_type_invalid;
+	}
+}
+
+static int cgltf_parse_json_accessor_sparse(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_accessor_sparse* out_sparse)
+{
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+
+	int size = tokens[i].size;
+	++i;
+
+	for (int j = 0; j < size; ++j)
+	{
+		CGLTF_CHECK_KEY(tokens[i]);
+
+		if (cgltf_json_strcmp(tokens+i, json_chunk, "count") == 0)
+		{
+			++i;
+			out_sparse->count = cgltf_json_to_int(tokens + i, json_chunk);
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "indices") == 0)
+		{
+			++i;
+			CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+
+			int indices_size = tokens[i].size;
+			++i;
+
+			for (int k = 0; k < indices_size; ++k)
+			{
+				CGLTF_CHECK_KEY(tokens[i]);
+
+				if (cgltf_json_strcmp(tokens+i, json_chunk, "bufferView") == 0)
+				{
+					++i;
+					out_sparse->indices_buffer_view = CGLTF_PTRINDEX(cgltf_buffer_view, cgltf_json_to_int(tokens + i, json_chunk));
+					++i;
+				}
+				else if (cgltf_json_strcmp(tokens+i, json_chunk, "byteOffset") == 0)
+				{
+					++i;
+					out_sparse->indices_byte_offset = cgltf_json_to_size(tokens + i, json_chunk);
+					++i;
+				}
+				else if (cgltf_json_strcmp(tokens+i, json_chunk, "componentType") == 0)
+				{
+					++i;
+					out_sparse->indices_component_type = cgltf_json_to_component_type(tokens + i, json_chunk);
+					++i;
+				}
+				else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0)
+				{
+					i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_sparse->indices_extras);
+				}
+				else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0)
+				{
+					i = cgltf_parse_json_unprocessed_extensions(options, tokens, i, json_chunk, &out_sparse->indices_extensions_count, &out_sparse->indices_extensions);
+				}
+				else
+				{
+					i = cgltf_skip_json(tokens, i+1);
+				}
+
+				if (i < 0)
+				{
+					return i;
+				}
+			}
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "values") == 0)
+		{
+			++i;
+			CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+
+			int values_size = tokens[i].size;
+			++i;
+
+			for (int k = 0; k < values_size; ++k)
+			{
+				CGLTF_CHECK_KEY(tokens[i]);
+
+				if (cgltf_json_strcmp(tokens+i, json_chunk, "bufferView") == 0)
+				{
+					++i;
+					out_sparse->values_buffer_view = CGLTF_PTRINDEX(cgltf_buffer_view, cgltf_json_to_int(tokens + i, json_chunk));
+					++i;
+				}
+				else if (cgltf_json_strcmp(tokens+i, json_chunk, "byteOffset") == 0)
+				{
+					++i;
+					out_sparse->values_byte_offset = cgltf_json_to_size(tokens + i, json_chunk);
+					++i;
+				}
+				else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0)
+				{
+					i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_sparse->values_extras);
+				}
+				else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0)
+				{
+					i = cgltf_parse_json_unprocessed_extensions(options, tokens, i, json_chunk, &out_sparse->values_extensions_count, &out_sparse->values_extensions);
+				}
+				else
+				{
+					i = cgltf_skip_json(tokens, i+1);
+				}
+
+				if (i < 0)
+				{
+					return i;
+				}
+			}
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0)
+		{
+			i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_sparse->extras);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0)
+		{
+			i = cgltf_parse_json_unprocessed_extensions(options, tokens, i, json_chunk, &out_sparse->extensions_count, &out_sparse->extensions);
+		}
+		else
+		{
+			i = cgltf_skip_json(tokens, i+1);
+		}
+
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+
+	return i;
+}
+
+static int cgltf_parse_json_accessor(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_accessor* out_accessor)
+{
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+
+	int size = tokens[i].size;
+	++i;
+
+	for (int j = 0; j < size; ++j)
+	{
+		CGLTF_CHECK_KEY(tokens[i]);
+
+		if (cgltf_json_strcmp(tokens + i, json_chunk, "name") == 0)
+		{
+			i = cgltf_parse_json_string(options, tokens, i + 1, json_chunk, &out_accessor->name);
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "bufferView") == 0)
+		{
+			++i;
+			out_accessor->buffer_view = CGLTF_PTRINDEX(cgltf_buffer_view, cgltf_json_to_int(tokens + i, json_chunk));
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "byteOffset") == 0)
+		{
+			++i;
+			out_accessor->offset =
+					cgltf_json_to_size(tokens+i, json_chunk);
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "componentType") == 0)
+		{
+			++i;
+			out_accessor->component_type = cgltf_json_to_component_type(tokens + i, json_chunk);
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "normalized") == 0)
+		{
+			++i;
+			out_accessor->normalized = cgltf_json_to_bool(tokens+i, json_chunk);
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "count") == 0)
+		{
+			++i;
+			out_accessor->count =
+					cgltf_json_to_int(tokens+i, json_chunk);
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "type") == 0)
+		{
+			++i;
+			if (cgltf_json_strcmp(tokens+i, json_chunk, "SCALAR") == 0)
+			{
+				out_accessor->type = cgltf_type_scalar;
+			}
+			else if (cgltf_json_strcmp(tokens+i, json_chunk, "VEC2") == 0)
+			{
+				out_accessor->type = cgltf_type_vec2;
+			}
+			else if (cgltf_json_strcmp(tokens+i, json_chunk, "VEC3") == 0)
+			{
+				out_accessor->type = cgltf_type_vec3;
+			}
+			else if (cgltf_json_strcmp(tokens+i, json_chunk, "VEC4") == 0)
+			{
+				out_accessor->type = cgltf_type_vec4;
+			}
+			else if (cgltf_json_strcmp(tokens+i, json_chunk, "MAT2") == 0)
+			{
+				out_accessor->type = cgltf_type_mat2;
+			}
+			else if (cgltf_json_strcmp(tokens+i, json_chunk, "MAT3") == 0)
+			{
+				out_accessor->type = cgltf_type_mat3;
+			}
+			else if (cgltf_json_strcmp(tokens+i, json_chunk, "MAT4") == 0)
+			{
+				out_accessor->type = cgltf_type_mat4;
+			}
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "min") == 0)
+		{
+			++i;
+			out_accessor->has_min = 1;
+			// note: we can't parse the precise number of elements since type may not have been computed yet
+			int min_size = tokens[i].size > 16 ? 16 : tokens[i].size;
+			i = cgltf_parse_json_float_array(tokens, i, json_chunk, out_accessor->min, min_size);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "max") == 0)
+		{
+			++i;
+			out_accessor->has_max = 1;
+			// note: we can't parse the precise number of elements since type may not have been computed yet
+			int max_size = tokens[i].size > 16 ? 16 : tokens[i].size;
+			i = cgltf_parse_json_float_array(tokens, i, json_chunk, out_accessor->max, max_size);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "sparse") == 0)
+		{
+			out_accessor->is_sparse = 1;
+			i = cgltf_parse_json_accessor_sparse(options, tokens, i + 1, json_chunk, &out_accessor->sparse);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0)
+		{
+			i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_accessor->extras);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0)
+		{
+			i = cgltf_parse_json_unprocessed_extensions(options, tokens, i, json_chunk, &out_accessor->extensions_count, &out_accessor->extensions);
+		}
+		else
+		{
+			i = cgltf_skip_json(tokens, i+1);
+		}
+
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+
+	return i;
+}
+
+static int cgltf_parse_json_texture_transform(jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_texture_transform* out_texture_transform)
+{
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+
+	int size = tokens[i].size;
+	++i;
+
+	for (int j = 0; j < size; ++j)
+	{
+		CGLTF_CHECK_KEY(tokens[i]);
+
+		if (cgltf_json_strcmp(tokens + i, json_chunk, "offset") == 0)
+		{
+			i = cgltf_parse_json_float_array(tokens, i + 1, json_chunk, out_texture_transform->offset, 2);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "rotation") == 0)
+		{
+			++i;
+			out_texture_transform->rotation = cgltf_json_to_float(tokens + i, json_chunk);
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "scale") == 0)
+		{
+			i = cgltf_parse_json_float_array(tokens, i + 1, json_chunk, out_texture_transform->scale, 2);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "texCoord") == 0)
+		{
+			++i;
+			out_texture_transform->has_texcoord = 1;
+			out_texture_transform->texcoord = cgltf_json_to_int(tokens + i, json_chunk);
+			++i;
+		}
+		else
+		{
+			i = cgltf_skip_json(tokens, i + 1);
+		}
+
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+
+	return i;
+}
+
+static int cgltf_parse_json_texture_view(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_texture_view* out_texture_view)
+{
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+
+	out_texture_view->scale = 1.0f;
+	cgltf_fill_float_array(out_texture_view->transform.scale, 2, 1.0f);
+
+	int size = tokens[i].size;
+	++i;
+
+	for (int j = 0; j < size; ++j)
+	{
+		CGLTF_CHECK_KEY(tokens[i]);
+
+		if (cgltf_json_strcmp(tokens + i, json_chunk, "index") == 0)
+		{
+			++i;
+			out_texture_view->texture = CGLTF_PTRINDEX(cgltf_texture, cgltf_json_to_int(tokens + i, json_chunk));
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "texCoord") == 0)
+		{
+			++i;
+			out_texture_view->texcoord = cgltf_json_to_int(tokens + i, json_chunk);
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "scale") == 0) 
+		{
+			++i;
+			out_texture_view->scale = cgltf_json_to_float(tokens + i, json_chunk);
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "strength") == 0)
+		{
+			++i;
+			out_texture_view->scale = cgltf_json_to_float(tokens + i, json_chunk);
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0)
+		{
+			i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_texture_view->extras);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0)
+		{
+			++i;
+
+			CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+			if(out_texture_view->extensions)
+			{
+				return CGLTF_ERROR_JSON;
+			}
+
+			int extensions_size = tokens[i].size;
+			out_texture_view->extensions_count = 0;
+			out_texture_view->extensions = (cgltf_extension*)cgltf_calloc(options, sizeof(cgltf_extension), extensions_size);
+
+			if (!out_texture_view->extensions)
+			{
+				return CGLTF_ERROR_NOMEM;
+			}
+
+			++i;
+
+			for (int k = 0; k < extensions_size; ++k)
+			{
+				CGLTF_CHECK_KEY(tokens[i]);
+
+				if (cgltf_json_strcmp(tokens+i, json_chunk, "KHR_texture_transform") == 0)
+				{
+					out_texture_view->has_transform = 1;
+					i = cgltf_parse_json_texture_transform(tokens, i + 1, json_chunk, &out_texture_view->transform);
+				}
+				else
+				{
+					i = cgltf_parse_json_unprocessed_extension(options, tokens, i, json_chunk, &(out_texture_view->extensions[out_texture_view->extensions_count++]));
+				}
+
+				if (i < 0)
+				{
+					return i;
+				}
+			}
+		}
+		else
+		{
+			i = cgltf_skip_json(tokens, i + 1);
+		}
+
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+
+	return i;
+}
+
+static int cgltf_parse_json_pbr_metallic_roughness(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_pbr_metallic_roughness* out_pbr)
+{
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+
+	int size = tokens[i].size;
+	++i;
+
+	for (int j = 0; j < size; ++j)
+	{
+		CGLTF_CHECK_KEY(tokens[i]);
+
+		if (cgltf_json_strcmp(tokens+i, json_chunk, "metallicFactor") == 0)
+		{
+			++i;
+			out_pbr->metallic_factor = 
+				cgltf_json_to_float(tokens + i, json_chunk);
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "roughnessFactor") == 0) 
+		{
+			++i;
+			out_pbr->roughness_factor =
+				cgltf_json_to_float(tokens+i, json_chunk);
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "baseColorFactor") == 0)
+		{
+			i = cgltf_parse_json_float_array(tokens, i + 1, json_chunk, out_pbr->base_color_factor, 4);
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "baseColorTexture") == 0)
+		{
+			i = cgltf_parse_json_texture_view(options, tokens, i + 1, json_chunk,
+				&out_pbr->base_color_texture);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "metallicRoughnessTexture") == 0)
+		{
+			i = cgltf_parse_json_texture_view(options, tokens, i + 1, json_chunk,
+				&out_pbr->metallic_roughness_texture);
+		}
+		else
+		{
+			i = cgltf_skip_json(tokens, i+1);
+		}
+
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+
+	return i;
+}
+
+static int cgltf_parse_json_pbr_specular_glossiness(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_pbr_specular_glossiness* out_pbr)
+{
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+	int size = tokens[i].size;
+	++i;
+
+	for (int j = 0; j < size; ++j)
+	{
+		CGLTF_CHECK_KEY(tokens[i]);
+
+		if (cgltf_json_strcmp(tokens+i, json_chunk, "diffuseFactor") == 0)
+		{
+			i = cgltf_parse_json_float_array(tokens, i + 1, json_chunk, out_pbr->diffuse_factor, 4);
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "specularFactor") == 0)
+		{
+			i = cgltf_parse_json_float_array(tokens, i + 1, json_chunk, out_pbr->specular_factor, 3);
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "glossinessFactor") == 0)
+		{
+			++i;
+			out_pbr->glossiness_factor = cgltf_json_to_float(tokens + i, json_chunk);
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "diffuseTexture") == 0)
+		{
+			i = cgltf_parse_json_texture_view(options, tokens, i + 1, json_chunk, &out_pbr->diffuse_texture);
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "specularGlossinessTexture") == 0)
+		{
+			i = cgltf_parse_json_texture_view(options, tokens, i + 1, json_chunk, &out_pbr->specular_glossiness_texture);
+		}
+		else
+		{
+			i = cgltf_skip_json(tokens, i+1);
+		}
+
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+
+	return i;
+}
+
+static int cgltf_parse_json_clearcoat(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_clearcoat* out_clearcoat)
+{
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+	int size = tokens[i].size;
+	++i;
+
+	for (int j = 0; j < size; ++j)
+	{
+		CGLTF_CHECK_KEY(tokens[i]);
+
+		if (cgltf_json_strcmp(tokens+i, json_chunk, "clearcoatFactor") == 0)
+		{
+			++i;
+			out_clearcoat->clearcoat_factor = cgltf_json_to_float(tokens + i, json_chunk);
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "clearcoatRoughnessFactor") == 0)
+		{
+			++i;
+			out_clearcoat->clearcoat_roughness_factor = cgltf_json_to_float(tokens + i, json_chunk);
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "clearcoatTexture") == 0)
+		{
+			i = cgltf_parse_json_texture_view(options, tokens, i + 1, json_chunk, &out_clearcoat->clearcoat_texture);
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "clearcoatRoughnessTexture") == 0)
+		{
+			i = cgltf_parse_json_texture_view(options, tokens, i + 1, json_chunk, &out_clearcoat->clearcoat_roughness_texture);
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "clearcoatNormalTexture") == 0)
+		{
+			i = cgltf_parse_json_texture_view(options, tokens, i + 1, json_chunk, &out_clearcoat->clearcoat_normal_texture);
+		}
+		else
+		{
+			i = cgltf_skip_json(tokens, i+1);
+		}
+
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+
+	return i;
+}
+
+static int cgltf_parse_json_ior(jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_ior* out_ior)
+{
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+	int size = tokens[i].size;
+	++i;
+
+	// Default values
+	out_ior->ior = 1.5f;
+
+	for (int j = 0; j < size; ++j)
+	{
+		CGLTF_CHECK_KEY(tokens[i]);
+
+		if (cgltf_json_strcmp(tokens+i, json_chunk, "ior") == 0)
+		{
+			++i;
+			out_ior->ior = cgltf_json_to_float(tokens + i, json_chunk);
+			++i;
+		}
+		else
+		{
+			i = cgltf_skip_json(tokens, i+1);
+		}
+
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+
+	return i;
+}
+
+static int cgltf_parse_json_specular(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_specular* out_specular)
+{
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+	int size = tokens[i].size;
+	++i;
+
+	// Default values
+	out_specular->specular_factor = 1.0f;
+	cgltf_fill_float_array(out_specular->specular_color_factor, 3, 1.0f);
+
+	for (int j = 0; j < size; ++j)
+	{
+		CGLTF_CHECK_KEY(tokens[i]);
+
+		if (cgltf_json_strcmp(tokens+i, json_chunk, "specularFactor") == 0)
+		{
+			++i;
+			out_specular->specular_factor = cgltf_json_to_float(tokens + i, json_chunk);
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "specularColorFactor") == 0)
+		{
+			i = cgltf_parse_json_float_array(tokens, i + 1, json_chunk, out_specular->specular_color_factor, 3);
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "specularTexture") == 0)
+		{
+			i = cgltf_parse_json_texture_view(options, tokens, i + 1, json_chunk, &out_specular->specular_texture);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "specularColorTexture") == 0)
+		{
+			i = cgltf_parse_json_texture_view(options, tokens, i + 1, json_chunk, &out_specular->specular_color_texture);
+		}
+		else
+		{
+			i = cgltf_skip_json(tokens, i+1);
+		}
+
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+
+	return i;
+}
+
+static int cgltf_parse_json_transmission(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_transmission* out_transmission)
+{
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+	int size = tokens[i].size;
+	++i;
+
+	for (int j = 0; j < size; ++j)
+	{
+		CGLTF_CHECK_KEY(tokens[i]);
+
+		if (cgltf_json_strcmp(tokens+i, json_chunk, "transmissionFactor") == 0)
+		{
+			++i;
+			out_transmission->transmission_factor = cgltf_json_to_float(tokens + i, json_chunk);
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "transmissionTexture") == 0)
+		{
+			i = cgltf_parse_json_texture_view(options, tokens, i + 1, json_chunk, &out_transmission->transmission_texture);
+		}
+		else
+		{
+			i = cgltf_skip_json(tokens, i+1);
+		}
+
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+
+	return i;
+}
+
+static int cgltf_parse_json_volume(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_volume* out_volume)
+{
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+	int size = tokens[i].size;
+	++i;
+
+	for (int j = 0; j < size; ++j)
+	{
+		CGLTF_CHECK_KEY(tokens[i]);
+
+		if (cgltf_json_strcmp(tokens + i, json_chunk, "thicknessFactor") == 0)
+		{
+			++i;
+			out_volume->thickness_factor = cgltf_json_to_float(tokens + i, json_chunk);
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "thicknessTexture") == 0)
+		{
+			i = cgltf_parse_json_texture_view(options, tokens, i + 1, json_chunk, &out_volume->thickness_texture);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "attenuationColor") == 0)
+		{
+			i = cgltf_parse_json_float_array(tokens, i + 1, json_chunk, out_volume->attenuation_color, 3);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "attenuationDistance") == 0)
+		{
+			++i;
+			out_volume->attenuation_distance = cgltf_json_to_float(tokens + i, json_chunk);
+			++i;
+		}
+		else
+		{
+			i = cgltf_skip_json(tokens, i + 1);
+		}
+
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+
+	return i;
+}
+
+static int cgltf_parse_json_sheen(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_sheen* out_sheen)
+{
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+	int size = tokens[i].size;
+	++i;
+
+	for (int j = 0; j < size; ++j)
+	{
+		CGLTF_CHECK_KEY(tokens[i]);
+
+		if (cgltf_json_strcmp(tokens+i, json_chunk, "sheenColorFactor") == 0)
+		{
+			i = cgltf_parse_json_float_array(tokens, i + 1, json_chunk, out_sheen->sheen_color_factor, 3);
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "sheenColorTexture") == 0)
+		{
+			i = cgltf_parse_json_texture_view(options, tokens, i + 1, json_chunk, &out_sheen->sheen_color_texture);
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "sheenRoughnessFactor") == 0)
+		{
+			++i;
+			out_sheen->sheen_roughness_factor = cgltf_json_to_float(tokens + i, json_chunk);
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "sheenRoughnessTexture") == 0)
+		{
+			i = cgltf_parse_json_texture_view(options, tokens, i + 1, json_chunk, &out_sheen->sheen_roughness_texture);
+		}
+		else
+		{
+			i = cgltf_skip_json(tokens, i+1);
+		}
+
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+
+	return i;
+}
+
+static int cgltf_parse_json_emissive_strength(jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_emissive_strength* out_emissive_strength)
+{
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+	int size = tokens[i].size;
+	++i;
+
+	// Default
+	out_emissive_strength->emissive_strength = 1.f;
+
+	for (int j = 0; j < size; ++j)
+	{
+		CGLTF_CHECK_KEY(tokens[i]);
+
+		if (cgltf_json_strcmp(tokens + i, json_chunk, "emissiveStrength") == 0)
+		{
+			++i;
+			out_emissive_strength->emissive_strength = cgltf_json_to_float(tokens + i, json_chunk);
+			++i;
+		}
+		else
+		{
+			i = cgltf_skip_json(tokens, i + 1);
+		}
+
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+
+	return i;
+}
+
+static int cgltf_parse_json_iridescence(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_iridescence* out_iridescence)
+{
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+	int size = tokens[i].size;
+	++i;
+
+	// Default
+	out_iridescence->iridescence_ior = 1.3f;
+	out_iridescence->iridescence_thickness_min = 100.f;
+	out_iridescence->iridescence_thickness_max = 400.f;
+
+	for (int j = 0; j < size; ++j)
+	{
+		CGLTF_CHECK_KEY(tokens[i]);
+
+		if (cgltf_json_strcmp(tokens + i, json_chunk, "iridescenceFactor") == 0)
+		{
+			++i;
+			out_iridescence->iridescence_factor = cgltf_json_to_float(tokens + i, json_chunk);
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "iridescenceTexture") == 0)
+		{
+			i = cgltf_parse_json_texture_view(options, tokens, i + 1, json_chunk, &out_iridescence->iridescence_texture);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "iridescenceIor") == 0)
+		{
+			++i;
+			out_iridescence->iridescence_ior = cgltf_json_to_float(tokens + i, json_chunk);
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "iridescenceThicknessMinimum") == 0)
+		{
+			++i;
+			out_iridescence->iridescence_thickness_min = cgltf_json_to_float(tokens + i, json_chunk);
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "iridescenceThicknessMaximum") == 0)
+		{
+			++i;
+			out_iridescence->iridescence_thickness_max = cgltf_json_to_float(tokens + i, json_chunk);
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "iridescenceThicknessTexture") == 0)
+		{
+			i = cgltf_parse_json_texture_view(options, tokens, i + 1, json_chunk, &out_iridescence->iridescence_thickness_texture);
+		}
+		else
+		{
+			i = cgltf_skip_json(tokens, i + 1);
+		}
+
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+
+	return i;
+}
+
+static int cgltf_parse_json_anisotropy(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_anisotropy* out_anisotropy)
+{
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+	int size = tokens[i].size;
+	++i;
+
+
+	for (int j = 0; j < size; ++j)
+	{
+		CGLTF_CHECK_KEY(tokens[i]);
+
+		if (cgltf_json_strcmp(tokens + i, json_chunk, "anisotropyStrength") == 0)
+		{
+			++i;
+			out_anisotropy->anisotropy_strength = cgltf_json_to_float(tokens + i, json_chunk);
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "anisotropyRotation") == 0)
+		{
+			++i;
+			out_anisotropy->anisotropy_rotation = cgltf_json_to_float(tokens + i, json_chunk);
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "anisotropyTexture") == 0)
+		{
+			i = cgltf_parse_json_texture_view(options, tokens, i + 1, json_chunk, &out_anisotropy->anisotropy_texture);
+		}
+		else
+		{
+			i = cgltf_skip_json(tokens, i + 1);
+		}
+
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+
+	return i;
+}
+
+static int cgltf_parse_json_image(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_image* out_image)
+{
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+
+	int size = tokens[i].size;
+	++i;
+
+	for (int j = 0; j < size; ++j) 
+	{
+		CGLTF_CHECK_KEY(tokens[i]);
+
+		if (cgltf_json_strcmp(tokens + i, json_chunk, "uri") == 0) 
+		{
+			i = cgltf_parse_json_string(options, tokens, i + 1, json_chunk, &out_image->uri);
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "bufferView") == 0)
+		{
+			++i;
+			out_image->buffer_view = CGLTF_PTRINDEX(cgltf_buffer_view, cgltf_json_to_int(tokens + i, json_chunk));
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "mimeType") == 0)
+		{
+			i = cgltf_parse_json_string(options, tokens, i + 1, json_chunk, &out_image->mime_type);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "name") == 0)
+		{
+			i = cgltf_parse_json_string(options, tokens, i + 1, json_chunk, &out_image->name);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0)
+		{
+			i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_image->extras);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0)
+		{
+			i = cgltf_parse_json_unprocessed_extensions(options, tokens, i, json_chunk, &out_image->extensions_count, &out_image->extensions);
+		}
+		else
+		{
+			i = cgltf_skip_json(tokens, i + 1);
+		}
+
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+
+	return i;
+}
+
+static int cgltf_parse_json_sampler(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_sampler* out_sampler)
+{
+	(void)options;
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+
+	out_sampler->wrap_s = 10497;
+	out_sampler->wrap_t = 10497;
+
+	int size = tokens[i].size;
+	++i;
+
+	for (int j = 0; j < size; ++j)
+	{
+		CGLTF_CHECK_KEY(tokens[i]);
+
+		if (cgltf_json_strcmp(tokens + i, json_chunk, "name") == 0)
+		{
+			i = cgltf_parse_json_string(options, tokens, i + 1, json_chunk, &out_sampler->name);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "magFilter") == 0)
+		{
+			++i;
+			out_sampler->mag_filter
+				= cgltf_json_to_int(tokens + i, json_chunk);
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "minFilter") == 0)
+		{
+			++i;
+			out_sampler->min_filter
+				= cgltf_json_to_int(tokens + i, json_chunk);
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "wrapS") == 0)
+		{
+			++i;
+			out_sampler->wrap_s
+				= cgltf_json_to_int(tokens + i, json_chunk);
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "wrapT") == 0) 
+		{
+			++i;
+			out_sampler->wrap_t
+				= cgltf_json_to_int(tokens + i, json_chunk);
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0)
+		{
+			i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_sampler->extras);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0)
+		{
+			i = cgltf_parse_json_unprocessed_extensions(options, tokens, i, json_chunk, &out_sampler->extensions_count, &out_sampler->extensions);
+		}
+		else
+		{
+			i = cgltf_skip_json(tokens, i + 1);
+		}
+
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+
+	return i;
+}
+
+static int cgltf_parse_json_texture(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_texture* out_texture)
+{
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+
+	int size = tokens[i].size;
+	++i;
+
+	for (int j = 0; j < size; ++j)
+	{
+		CGLTF_CHECK_KEY(tokens[i]);
+
+		if (cgltf_json_strcmp(tokens+i, json_chunk, "name") == 0)
+		{
+			i = cgltf_parse_json_string(options, tokens, i + 1, json_chunk, &out_texture->name);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "sampler") == 0)
+		{
+			++i;
+			out_texture->sampler = CGLTF_PTRINDEX(cgltf_sampler, cgltf_json_to_int(tokens + i, json_chunk));
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "source") == 0) 
+		{
+			++i;
+			out_texture->image = CGLTF_PTRINDEX(cgltf_image, cgltf_json_to_int(tokens + i, json_chunk));
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0)
+		{
+			i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_texture->extras);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0)
+		{
+			++i;
+
+			CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+			if (out_texture->extensions)
+			{
+				return CGLTF_ERROR_JSON;
+			}
+
+			int extensions_size = tokens[i].size;
+			++i;
+			out_texture->extensions = (cgltf_extension*)cgltf_calloc(options, sizeof(cgltf_extension), extensions_size);
+			out_texture->extensions_count = 0;
+
+			if (!out_texture->extensions)
+			{
+				return CGLTF_ERROR_NOMEM;
+			}
+
+			for (int k = 0; k < extensions_size; ++k)
+			{
+				CGLTF_CHECK_KEY(tokens[i]);
+
+				if (cgltf_json_strcmp(tokens + i, json_chunk, "KHR_texture_basisu") == 0)
+				{
+					out_texture->has_basisu = 1;
+					++i;
+					CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+					int num_properties = tokens[i].size;
+					++i;
+
+					for (int t = 0; t < num_properties; ++t)
+					{
+						CGLTF_CHECK_KEY(tokens[i]);
+
+						if (cgltf_json_strcmp(tokens + i, json_chunk, "source") == 0)
+						{
+							++i;
+							out_texture->basisu_image = CGLTF_PTRINDEX(cgltf_image, cgltf_json_to_int(tokens + i, json_chunk));
+							++i;
+						}
+						else
+						{
+							i = cgltf_skip_json(tokens, i + 1);
+						}
+						if (i < 0)
+						{
+							return i;
+						}
+					}
+				}
+				else
+				{
+					i = cgltf_parse_json_unprocessed_extension(options, tokens, i, json_chunk, &(out_texture->extensions[out_texture->extensions_count++]));
+				}
+
+				if (i < 0)
+				{
+					return i;
+				}
+			}
+		}
+		else
+		{
+			i = cgltf_skip_json(tokens, i + 1);
+		}
+
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+
+	return i;
+}
+
+static int cgltf_parse_json_material(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_material* out_material)
+{
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+
+	cgltf_fill_float_array(out_material->pbr_metallic_roughness.base_color_factor, 4, 1.0f);
+	out_material->pbr_metallic_roughness.metallic_factor = 1.0f;
+	out_material->pbr_metallic_roughness.roughness_factor = 1.0f;
+
+	cgltf_fill_float_array(out_material->pbr_specular_glossiness.diffuse_factor, 4, 1.0f);
+	cgltf_fill_float_array(out_material->pbr_specular_glossiness.specular_factor, 3, 1.0f);
+	out_material->pbr_specular_glossiness.glossiness_factor = 1.0f;
+
+	cgltf_fill_float_array(out_material->volume.attenuation_color, 3, 1.0f);
+	out_material->volume.attenuation_distance = FLT_MAX;
+
+	out_material->alpha_cutoff = 0.5f;
+
+	int size = tokens[i].size;
+	++i;
+
+	for (int j = 0; j < size; ++j)
+	{
+		CGLTF_CHECK_KEY(tokens[i]);
+
+		if (cgltf_json_strcmp(tokens+i, json_chunk, "name") == 0)
+		{
+			i = cgltf_parse_json_string(options, tokens, i + 1, json_chunk, &out_material->name);
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "pbrMetallicRoughness") == 0)
+		{
+			out_material->has_pbr_metallic_roughness = 1;
+			i = cgltf_parse_json_pbr_metallic_roughness(options, tokens, i + 1, json_chunk, &out_material->pbr_metallic_roughness);
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "emissiveFactor") == 0)
+		{
+			i = cgltf_parse_json_float_array(tokens, i + 1, json_chunk, out_material->emissive_factor, 3);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "normalTexture") == 0)
+		{
+			i = cgltf_parse_json_texture_view(options, tokens, i + 1, json_chunk,
+				&out_material->normal_texture);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "occlusionTexture") == 0)
+		{
+			i = cgltf_parse_json_texture_view(options, tokens, i + 1, json_chunk,
+				&out_material->occlusion_texture);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "emissiveTexture") == 0)
+		{
+			i = cgltf_parse_json_texture_view(options, tokens, i + 1, json_chunk,
+				&out_material->emissive_texture);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "alphaMode") == 0)
+		{
+			++i;
+			if (cgltf_json_strcmp(tokens + i, json_chunk, "OPAQUE") == 0)
+			{
+				out_material->alpha_mode = cgltf_alpha_mode_opaque;
+			}
+			else if (cgltf_json_strcmp(tokens + i, json_chunk, "MASK") == 0)
+			{
+				out_material->alpha_mode = cgltf_alpha_mode_mask;
+			}
+			else if (cgltf_json_strcmp(tokens + i, json_chunk, "BLEND") == 0)
+			{
+				out_material->alpha_mode = cgltf_alpha_mode_blend;
+			}
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "alphaCutoff") == 0)
+		{
+			++i;
+			out_material->alpha_cutoff = cgltf_json_to_float(tokens + i, json_chunk);
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "doubleSided") == 0)
+		{
+			++i;
+			out_material->double_sided =
+				cgltf_json_to_bool(tokens + i, json_chunk);
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0)
+		{
+			i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_material->extras);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0)
+		{
+			++i;
+
+			CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+			if(out_material->extensions)
+			{
+				return CGLTF_ERROR_JSON;
+			}
+
+			int extensions_size = tokens[i].size;
+			++i;
+			out_material->extensions = (cgltf_extension*)cgltf_calloc(options, sizeof(cgltf_extension), extensions_size);
+			out_material->extensions_count= 0;
+
+			if (!out_material->extensions)
+			{
+				return CGLTF_ERROR_NOMEM;
+			}
+
+			for (int k = 0; k < extensions_size; ++k)
+			{
+				CGLTF_CHECK_KEY(tokens[i]);
+
+				if (cgltf_json_strcmp(tokens+i, json_chunk, "KHR_materials_pbrSpecularGlossiness") == 0)
+				{
+					out_material->has_pbr_specular_glossiness = 1;
+					i = cgltf_parse_json_pbr_specular_glossiness(options, tokens, i + 1, json_chunk, &out_material->pbr_specular_glossiness);
+				}
+				else if (cgltf_json_strcmp(tokens+i, json_chunk, "KHR_materials_unlit") == 0)
+				{
+					out_material->unlit = 1;
+					i = cgltf_skip_json(tokens, i+1);
+				}
+				else if (cgltf_json_strcmp(tokens+i, json_chunk, "KHR_materials_clearcoat") == 0)
+				{
+					out_material->has_clearcoat = 1;
+					i = cgltf_parse_json_clearcoat(options, tokens, i + 1, json_chunk, &out_material->clearcoat);
+				}
+				else if (cgltf_json_strcmp(tokens+i, json_chunk, "KHR_materials_ior") == 0)
+				{
+					out_material->has_ior = 1;
+					i = cgltf_parse_json_ior(tokens, i + 1, json_chunk, &out_material->ior);
+				}
+				else if (cgltf_json_strcmp(tokens+i, json_chunk, "KHR_materials_specular") == 0)
+				{
+					out_material->has_specular = 1;
+					i = cgltf_parse_json_specular(options, tokens, i + 1, json_chunk, &out_material->specular);
+				}
+				else if (cgltf_json_strcmp(tokens+i, json_chunk, "KHR_materials_transmission") == 0)
+				{
+					out_material->has_transmission = 1;
+					i = cgltf_parse_json_transmission(options, tokens, i + 1, json_chunk, &out_material->transmission);
+				}
+				else if (cgltf_json_strcmp(tokens + i, json_chunk, "KHR_materials_volume") == 0)
+				{
+					out_material->has_volume = 1;
+					i = cgltf_parse_json_volume(options, tokens, i + 1, json_chunk, &out_material->volume);
+				}
+				else if (cgltf_json_strcmp(tokens+i, json_chunk, "KHR_materials_sheen") == 0)
+				{
+					out_material->has_sheen = 1;
+					i = cgltf_parse_json_sheen(options, tokens, i + 1, json_chunk, &out_material->sheen);
+				}
+				else if (cgltf_json_strcmp(tokens + i, json_chunk, "KHR_materials_emissive_strength") == 0)
+				{
+					out_material->has_emissive_strength = 1;
+					i = cgltf_parse_json_emissive_strength(tokens, i + 1, json_chunk, &out_material->emissive_strength);
+				}
+				else if (cgltf_json_strcmp(tokens + i, json_chunk, "KHR_materials_iridescence") == 0)
+				{
+					out_material->has_iridescence = 1;
+					i = cgltf_parse_json_iridescence(options, tokens, i + 1, json_chunk, &out_material->iridescence);
+				}
+				else if (cgltf_json_strcmp(tokens + i, json_chunk, "KHR_materials_anisotropy") == 0)
+				{
+					out_material->has_anisotropy = 1;
+					i = cgltf_parse_json_anisotropy(options, tokens, i + 1, json_chunk, &out_material->anisotropy);
+				}
+				else
+				{
+					i = cgltf_parse_json_unprocessed_extension(options, tokens, i, json_chunk, &(out_material->extensions[out_material->extensions_count++]));
+				}
+
+				if (i < 0)
+				{
+					return i;
+				}
+			}
+		}
+		else
+		{
+			i = cgltf_skip_json(tokens, i+1);
+		}
+
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+
+	return i;
+}
+
+static int cgltf_parse_json_accessors(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_data* out_data)
+{
+	i = cgltf_parse_json_array(options, tokens, i, json_chunk, sizeof(cgltf_accessor), (void**)&out_data->accessors, &out_data->accessors_count);
+	if (i < 0)
+	{
+		return i;
+	}
+
+	for (cgltf_size j = 0; j < out_data->accessors_count; ++j)
+	{
+		i = cgltf_parse_json_accessor(options, tokens, i, json_chunk, &out_data->accessors[j]);
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+	return i;
+}
+
+static int cgltf_parse_json_materials(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_data* out_data)
+{
+	i = cgltf_parse_json_array(options, tokens, i, json_chunk, sizeof(cgltf_material), (void**)&out_data->materials, &out_data->materials_count);
+	if (i < 0)
+	{
+		return i;
+	}
+
+	for (cgltf_size j = 0; j < out_data->materials_count; ++j)
+	{
+		i = cgltf_parse_json_material(options, tokens, i, json_chunk, &out_data->materials[j]);
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+	return i;
+}
+
+static int cgltf_parse_json_images(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_data* out_data)
+{
+	i = cgltf_parse_json_array(options, tokens, i, json_chunk, sizeof(cgltf_image), (void**)&out_data->images, &out_data->images_count);
+	if (i < 0)
+	{
+		return i;
+	}
+
+	for (cgltf_size j = 0; j < out_data->images_count; ++j)
+	{
+		i = cgltf_parse_json_image(options, tokens, i, json_chunk, &out_data->images[j]);
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+	return i;
+}
+
+static int cgltf_parse_json_textures(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_data* out_data)
+{
+	i = cgltf_parse_json_array(options, tokens, i, json_chunk, sizeof(cgltf_texture), (void**)&out_data->textures, &out_data->textures_count);
+	if (i < 0)
+	{
+		return i;
+	}
+
+	for (cgltf_size j = 0; j < out_data->textures_count; ++j)
+	{
+		i = cgltf_parse_json_texture(options, tokens, i, json_chunk, &out_data->textures[j]);
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+	return i;
+}
+
+static int cgltf_parse_json_samplers(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_data* out_data)
+{
+	i = cgltf_parse_json_array(options, tokens, i, json_chunk, sizeof(cgltf_sampler), (void**)&out_data->samplers, &out_data->samplers_count);
+	if (i < 0)
+	{
+		return i;
+	}
+
+	for (cgltf_size j = 0; j < out_data->samplers_count; ++j)
+	{
+		i = cgltf_parse_json_sampler(options, tokens, i, json_chunk, &out_data->samplers[j]);
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+	return i;
+}
+
+static int cgltf_parse_json_meshopt_compression(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_meshopt_compression* out_meshopt_compression)
+{
+	(void)options;
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+
+	int size = tokens[i].size;
+	++i;
+
+	for (int j = 0; j < size; ++j)
+	{
+		CGLTF_CHECK_KEY(tokens[i]);
+
+		if (cgltf_json_strcmp(tokens+i, json_chunk, "buffer") == 0)
+		{
+			++i;
+			out_meshopt_compression->buffer = CGLTF_PTRINDEX(cgltf_buffer, cgltf_json_to_int(tokens + i, json_chunk));
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "byteOffset") == 0)
+		{
+			++i;
+			out_meshopt_compression->offset = cgltf_json_to_size(tokens+i, json_chunk);
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "byteLength") == 0)
+		{
+			++i;
+			out_meshopt_compression->size = cgltf_json_to_size(tokens+i, json_chunk);
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "byteStride") == 0)
+		{
+			++i;
+			out_meshopt_compression->stride = cgltf_json_to_size(tokens+i, json_chunk);
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "count") == 0)
+		{
+			++i;
+			out_meshopt_compression->count = cgltf_json_to_int(tokens+i, json_chunk);
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "mode") == 0)
+		{
+			++i;
+			if (cgltf_json_strcmp(tokens+i, json_chunk, "ATTRIBUTES") == 0)
+			{
+				out_meshopt_compression->mode = cgltf_meshopt_compression_mode_attributes;
+			}
+			else if (cgltf_json_strcmp(tokens+i, json_chunk, "TRIANGLES") == 0)
+			{
+				out_meshopt_compression->mode = cgltf_meshopt_compression_mode_triangles;
+			}
+			else if (cgltf_json_strcmp(tokens+i, json_chunk, "INDICES") == 0)
+			{
+				out_meshopt_compression->mode = cgltf_meshopt_compression_mode_indices;
+			}
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "filter") == 0)
+		{
+			++i;
+			if (cgltf_json_strcmp(tokens+i, json_chunk, "NONE") == 0)
+			{
+				out_meshopt_compression->filter = cgltf_meshopt_compression_filter_none;
+			}
+			else if (cgltf_json_strcmp(tokens+i, json_chunk, "OCTAHEDRAL") == 0)
+			{
+				out_meshopt_compression->filter = cgltf_meshopt_compression_filter_octahedral;
+			}
+			else if (cgltf_json_strcmp(tokens+i, json_chunk, "QUATERNION") == 0)
+			{
+				out_meshopt_compression->filter = cgltf_meshopt_compression_filter_quaternion;
+			}
+			else if (cgltf_json_strcmp(tokens+i, json_chunk, "EXPONENTIAL") == 0)
+			{
+				out_meshopt_compression->filter = cgltf_meshopt_compression_filter_exponential;
+			}
+			++i;
+		}
+		else
+		{
+			i = cgltf_skip_json(tokens, i+1);
+		}
+
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+
+	return i;
+}
+
+static int cgltf_parse_json_buffer_view(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_buffer_view* out_buffer_view)
+{
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+
+	int size = tokens[i].size;
+	++i;
+
+	for (int j = 0; j < size; ++j)
+	{
+		CGLTF_CHECK_KEY(tokens[i]);
+
+		if (cgltf_json_strcmp(tokens + i, json_chunk, "name") == 0)
+		{
+			i = cgltf_parse_json_string(options, tokens, i + 1, json_chunk, &out_buffer_view->name);
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "buffer") == 0)
+		{
+			++i;
+			out_buffer_view->buffer = CGLTF_PTRINDEX(cgltf_buffer, cgltf_json_to_int(tokens + i, json_chunk));
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "byteOffset") == 0)
+		{
+			++i;
+			out_buffer_view->offset =
+					cgltf_json_to_size(tokens+i, json_chunk);
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "byteLength") == 0)
+		{
+			++i;
+			out_buffer_view->size =
+					cgltf_json_to_size(tokens+i, json_chunk);
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "byteStride") == 0)
+		{
+			++i;
+			out_buffer_view->stride =
+					cgltf_json_to_size(tokens+i, json_chunk);
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "target") == 0)
+		{
+			++i;
+			int type = cgltf_json_to_int(tokens+i, json_chunk);
+			switch (type)
+			{
+			case 34962:
+				type = cgltf_buffer_view_type_vertices;
+				break;
+			case 34963:
+				type = cgltf_buffer_view_type_indices;
+				break;
+			default:
+				type = cgltf_buffer_view_type_invalid;
+				break;
+			}
+			out_buffer_view->type = (cgltf_buffer_view_type)type;
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0)
+		{
+			i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_buffer_view->extras);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0)
+		{
+			++i;
+
+			CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+			if(out_buffer_view->extensions)
+			{
+				return CGLTF_ERROR_JSON;
+			}
+
+			int extensions_size = tokens[i].size;
+			out_buffer_view->extensions_count = 0;
+			out_buffer_view->extensions = (cgltf_extension*)cgltf_calloc(options, sizeof(cgltf_extension), extensions_size);
+
+			if (!out_buffer_view->extensions)
+			{
+				return CGLTF_ERROR_NOMEM;
+			}
+
+			++i;
+			for (int k = 0; k < extensions_size; ++k)
+			{
+				CGLTF_CHECK_KEY(tokens[i]);
+
+				if (cgltf_json_strcmp(tokens+i, json_chunk, "EXT_meshopt_compression") == 0)
+				{
+					out_buffer_view->has_meshopt_compression = 1;
+					i = cgltf_parse_json_meshopt_compression(options, tokens, i + 1, json_chunk, &out_buffer_view->meshopt_compression);
+				}
+				else
+				{
+					i = cgltf_parse_json_unprocessed_extension(options, tokens, i, json_chunk, &(out_buffer_view->extensions[out_buffer_view->extensions_count++]));
+				}
+
+				if (i < 0)
+				{
+					return i;
+				}
+			}
+		}
+		else
+		{
+			i = cgltf_skip_json(tokens, i+1);
+		}
+
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+
+	return i;
+}
+
+static int cgltf_parse_json_buffer_views(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_data* out_data)
+{
+	i = cgltf_parse_json_array(options, tokens, i, json_chunk, sizeof(cgltf_buffer_view), (void**)&out_data->buffer_views, &out_data->buffer_views_count);
+	if (i < 0)
+	{
+		return i;
+	}
+
+	for (cgltf_size j = 0; j < out_data->buffer_views_count; ++j)
+	{
+		i = cgltf_parse_json_buffer_view(options, tokens, i, json_chunk, &out_data->buffer_views[j]);
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+	return i;
+}
+
+static int cgltf_parse_json_buffer(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_buffer* out_buffer)
+{
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+
+	int size = tokens[i].size;
+	++i;
+
+	for (int j = 0; j < size; ++j)
+	{
+		CGLTF_CHECK_KEY(tokens[i]);
+
+		if (cgltf_json_strcmp(tokens + i, json_chunk, "name") == 0)
+		{
+			i = cgltf_parse_json_string(options, tokens, i + 1, json_chunk, &out_buffer->name);
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "byteLength") == 0)
+		{
+			++i;
+			out_buffer->size =
+					cgltf_json_to_size(tokens+i, json_chunk);
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "uri") == 0)
+		{
+			i = cgltf_parse_json_string(options, tokens, i + 1, json_chunk, &out_buffer->uri);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0)
+		{
+			i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_buffer->extras);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0)
+		{
+			i = cgltf_parse_json_unprocessed_extensions(options, tokens, i, json_chunk, &out_buffer->extensions_count, &out_buffer->extensions);
+		}
+		else
+		{
+			i = cgltf_skip_json(tokens, i+1);
+		}
+
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+
+	return i;
+}
+
+static int cgltf_parse_json_buffers(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_data* out_data)
+{
+	i = cgltf_parse_json_array(options, tokens, i, json_chunk, sizeof(cgltf_buffer), (void**)&out_data->buffers, &out_data->buffers_count);
+	if (i < 0)
+	{
+		return i;
+	}
+
+	for (cgltf_size j = 0; j < out_data->buffers_count; ++j)
+	{
+		i = cgltf_parse_json_buffer(options, tokens, i, json_chunk, &out_data->buffers[j]);
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+	return i;
+}
+
+static int cgltf_parse_json_skin(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_skin* out_skin)
+{
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+
+	int size = tokens[i].size;
+	++i;
+
+	for (int j = 0; j < size; ++j)
+	{
+		CGLTF_CHECK_KEY(tokens[i]);
+
+		if (cgltf_json_strcmp(tokens+i, json_chunk, "name") == 0)
+		{
+			i = cgltf_parse_json_string(options, tokens, i + 1, json_chunk, &out_skin->name);
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "joints") == 0)
+		{
+			i = cgltf_parse_json_array(options, tokens, i + 1, json_chunk, sizeof(cgltf_node*), (void**)&out_skin->joints, &out_skin->joints_count);
+			if (i < 0)
+			{
+				return i;
+			}
+
+			for (cgltf_size k = 0; k < out_skin->joints_count; ++k)
+			{
+				out_skin->joints[k] = CGLTF_PTRINDEX(cgltf_node, cgltf_json_to_int(tokens + i, json_chunk));
+				++i;
+			}
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "skeleton") == 0)
+		{
+			++i;
+			CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_PRIMITIVE);
+			out_skin->skeleton = CGLTF_PTRINDEX(cgltf_node, cgltf_json_to_int(tokens + i, json_chunk));
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "inverseBindMatrices") == 0)
+		{
+			++i;
+			CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_PRIMITIVE);
+			out_skin->inverse_bind_matrices = CGLTF_PTRINDEX(cgltf_accessor, cgltf_json_to_int(tokens + i, json_chunk));
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0)
+		{
+			i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_skin->extras);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0)
+		{
+			i = cgltf_parse_json_unprocessed_extensions(options, tokens, i, json_chunk, &out_skin->extensions_count, &out_skin->extensions);
+		}
+		else
+		{
+			i = cgltf_skip_json(tokens, i+1);
+		}
+
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+
+	return i;
+}
+
+static int cgltf_parse_json_skins(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_data* out_data)
+{
+	i = cgltf_parse_json_array(options, tokens, i, json_chunk, sizeof(cgltf_skin), (void**)&out_data->skins, &out_data->skins_count);
+	if (i < 0)
+	{
+		return i;
+	}
+
+	for (cgltf_size j = 0; j < out_data->skins_count; ++j)
+	{
+		i = cgltf_parse_json_skin(options, tokens, i, json_chunk, &out_data->skins[j]);
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+	return i;
+}
+
+static int cgltf_parse_json_camera(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_camera* out_camera)
+{
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+
+	int size = tokens[i].size;
+	++i;
+
+	for (int j = 0; j < size; ++j)
+	{
+		CGLTF_CHECK_KEY(tokens[i]);
+
+		if (cgltf_json_strcmp(tokens+i, json_chunk, "name") == 0)
+		{
+			i = cgltf_parse_json_string(options, tokens, i + 1, json_chunk, &out_camera->name);
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "perspective") == 0)
+		{
+			++i;
+
+			CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+
+			int data_size = tokens[i].size;
+			++i;
+
+			if (out_camera->type != cgltf_camera_type_invalid)
+			{
+				return CGLTF_ERROR_JSON;
+			}
+
+			out_camera->type = cgltf_camera_type_perspective;
+
+			for (int k = 0; k < data_size; ++k)
+			{
+				CGLTF_CHECK_KEY(tokens[i]);
+
+				if (cgltf_json_strcmp(tokens+i, json_chunk, "aspectRatio") == 0)
+				{
+					++i;
+					out_camera->data.perspective.has_aspect_ratio = 1;
+					out_camera->data.perspective.aspect_ratio = cgltf_json_to_float(tokens + i, json_chunk);
+					++i;
+				}
+				else if (cgltf_json_strcmp(tokens+i, json_chunk, "yfov") == 0)
+				{
+					++i;
+					out_camera->data.perspective.yfov = cgltf_json_to_float(tokens + i, json_chunk);
+					++i;
+				}
+				else if (cgltf_json_strcmp(tokens+i, json_chunk, "zfar") == 0)
+				{
+					++i;
+					out_camera->data.perspective.has_zfar = 1;
+					out_camera->data.perspective.zfar = cgltf_json_to_float(tokens + i, json_chunk);
+					++i;
+				}
+				else if (cgltf_json_strcmp(tokens+i, json_chunk, "znear") == 0)
+				{
+					++i;
+					out_camera->data.perspective.znear = cgltf_json_to_float(tokens + i, json_chunk);
+					++i;
+				}
+				else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0)
+				{
+					i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_camera->data.perspective.extras);
+				}
+				else
+				{
+					i = cgltf_skip_json(tokens, i+1);
+				}
+
+				if (i < 0)
+				{
+					return i;
+				}
+			}
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "orthographic") == 0)
+		{
+			++i;
+
+			CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+
+			int data_size = tokens[i].size;
+			++i;
+
+			if (out_camera->type != cgltf_camera_type_invalid)
+			{
+				return CGLTF_ERROR_JSON;
+			}
+
+			out_camera->type = cgltf_camera_type_orthographic;
+
+			for (int k = 0; k < data_size; ++k)
+			{
+				CGLTF_CHECK_KEY(tokens[i]);
+
+				if (cgltf_json_strcmp(tokens+i, json_chunk, "xmag") == 0)
+				{
+					++i;
+					out_camera->data.orthographic.xmag = cgltf_json_to_float(tokens + i, json_chunk);
+					++i;
+				}
+				else if (cgltf_json_strcmp(tokens+i, json_chunk, "ymag") == 0)
+				{
+					++i;
+					out_camera->data.orthographic.ymag = cgltf_json_to_float(tokens + i, json_chunk);
+					++i;
+				}
+				else if (cgltf_json_strcmp(tokens+i, json_chunk, "zfar") == 0)
+				{
+					++i;
+					out_camera->data.orthographic.zfar = cgltf_json_to_float(tokens + i, json_chunk);
+					++i;
+				}
+				else if (cgltf_json_strcmp(tokens+i, json_chunk, "znear") == 0)
+				{
+					++i;
+					out_camera->data.orthographic.znear = cgltf_json_to_float(tokens + i, json_chunk);
+					++i;
+				}
+				else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0)
+				{
+					i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_camera->data.orthographic.extras);
+				}
+				else
+				{
+					i = cgltf_skip_json(tokens, i+1);
+				}
+
+				if (i < 0)
+				{
+					return i;
+				}
+			}
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0)
+		{
+			i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_camera->extras);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0)
+		{
+			i = cgltf_parse_json_unprocessed_extensions(options, tokens, i, json_chunk, &out_camera->extensions_count, &out_camera->extensions);
+		}
+		else
+		{
+			i = cgltf_skip_json(tokens, i+1);
+		}
+
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+
+	return i;
+}
+
+static int cgltf_parse_json_cameras(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_data* out_data)
+{
+	i = cgltf_parse_json_array(options, tokens, i, json_chunk, sizeof(cgltf_camera), (void**)&out_data->cameras, &out_data->cameras_count);
+	if (i < 0)
+	{
+		return i;
+	}
+
+	for (cgltf_size j = 0; j < out_data->cameras_count; ++j)
+	{
+		i = cgltf_parse_json_camera(options, tokens, i, json_chunk, &out_data->cameras[j]);
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+	return i;
+}
+
+static int cgltf_parse_json_light(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_light* out_light)
+{
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+
+	out_light->color[0] = 1.f;
+	out_light->color[1] = 1.f;
+	out_light->color[2] = 1.f;
+	out_light->intensity = 1.f;
+
+	out_light->spot_inner_cone_angle = 0.f;
+	out_light->spot_outer_cone_angle = 3.1415926535f / 4.0f;
+
+	int size = tokens[i].size;
+	++i;
+
+	for (int j = 0; j < size; ++j)
+	{
+		CGLTF_CHECK_KEY(tokens[i]);
+
+		if (cgltf_json_strcmp(tokens+i, json_chunk, "name") == 0)
+		{
+			i = cgltf_parse_json_string(options, tokens, i + 1, json_chunk, &out_light->name);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "color") == 0)
+		{
+			i = cgltf_parse_json_float_array(tokens, i + 1, json_chunk, out_light->color, 3);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "intensity") == 0)
+		{
+			++i;
+			out_light->intensity = cgltf_json_to_float(tokens + i, json_chunk);
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "type") == 0)
+		{
+			++i;
+			if (cgltf_json_strcmp(tokens + i, json_chunk, "directional") == 0)
+			{
+				out_light->type = cgltf_light_type_directional;
+			}
+			else if (cgltf_json_strcmp(tokens + i, json_chunk, "point") == 0)
+			{
+				out_light->type = cgltf_light_type_point;
+			}
+			else if (cgltf_json_strcmp(tokens + i, json_chunk, "spot") == 0)
+			{
+				out_light->type = cgltf_light_type_spot;
+			}
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "range") == 0)
+		{
+			++i;
+			out_light->range = cgltf_json_to_float(tokens + i, json_chunk);
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "spot") == 0)
+		{
+			++i;
+
+			CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+
+			int data_size = tokens[i].size;
+			++i;
+
+			for (int k = 0; k < data_size; ++k)
+			{
+				CGLTF_CHECK_KEY(tokens[i]);
+
+				if (cgltf_json_strcmp(tokens+i, json_chunk, "innerConeAngle") == 0)
+				{
+					++i;
+					out_light->spot_inner_cone_angle = cgltf_json_to_float(tokens + i, json_chunk);
+					++i;
+				}
+				else if (cgltf_json_strcmp(tokens+i, json_chunk, "outerConeAngle") == 0)
+				{
+					++i;
+					out_light->spot_outer_cone_angle = cgltf_json_to_float(tokens + i, json_chunk);
+					++i;
+				}
+				else
+				{
+					i = cgltf_skip_json(tokens, i+1);
+				}
+
+				if (i < 0)
+				{
+					return i;
+				}
+			}
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0)
+		{
+			i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_light->extras);
+		}
+		else
+		{
+			i = cgltf_skip_json(tokens, i+1);
+		}
+
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+
+	return i;
+}
+
+static int cgltf_parse_json_lights(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_data* out_data)
+{
+	i = cgltf_parse_json_array(options, tokens, i, json_chunk, sizeof(cgltf_light), (void**)&out_data->lights, &out_data->lights_count);
+	if (i < 0)
+	{
+		return i;
+	}
+
+	for (cgltf_size j = 0; j < out_data->lights_count; ++j)
+	{
+		i = cgltf_parse_json_light(options, tokens, i, json_chunk, &out_data->lights[j]);
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+	return i;
+}
+
+static int cgltf_parse_json_node(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_node* out_node)
+{
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+
+	out_node->rotation[3] = 1.0f;
+	out_node->scale[0] = 1.0f;
+	out_node->scale[1] = 1.0f;
+	out_node->scale[2] = 1.0f;
+	out_node->matrix[0] = 1.0f;
+	out_node->matrix[5] = 1.0f;
+	out_node->matrix[10] = 1.0f;
+	out_node->matrix[15] = 1.0f;
+
+	int size = tokens[i].size;
+	++i;
+
+	for (int j = 0; j < size; ++j)
+	{
+		CGLTF_CHECK_KEY(tokens[i]);
+
+		if (cgltf_json_strcmp(tokens+i, json_chunk, "name") == 0)
+		{
+			i = cgltf_parse_json_string(options, tokens, i + 1, json_chunk, &out_node->name);
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "children") == 0)
+		{
+			i = cgltf_parse_json_array(options, tokens, i + 1, json_chunk, sizeof(cgltf_node*), (void**)&out_node->children, &out_node->children_count);
+			if (i < 0)
+			{
+				return i;
+			}
+
+			for (cgltf_size k = 0; k < out_node->children_count; ++k)
+			{
+				out_node->children[k] = CGLTF_PTRINDEX(cgltf_node, cgltf_json_to_int(tokens + i, json_chunk));
+				++i;
+			}
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "mesh") == 0)
+		{
+			++i;
+			CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_PRIMITIVE);
+			out_node->mesh = CGLTF_PTRINDEX(cgltf_mesh, cgltf_json_to_int(tokens + i, json_chunk));
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "skin") == 0)
+		{
+			++i;
+			CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_PRIMITIVE);
+			out_node->skin = CGLTF_PTRINDEX(cgltf_skin, cgltf_json_to_int(tokens + i, json_chunk));
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "camera") == 0)
+		{
+			++i;
+			CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_PRIMITIVE);
+			out_node->camera = CGLTF_PTRINDEX(cgltf_camera, cgltf_json_to_int(tokens + i, json_chunk));
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "translation") == 0)
+		{
+			out_node->has_translation = 1;
+			i = cgltf_parse_json_float_array(tokens, i + 1, json_chunk, out_node->translation, 3);
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "rotation") == 0)
+		{
+			out_node->has_rotation = 1;
+			i = cgltf_parse_json_float_array(tokens, i + 1, json_chunk, out_node->rotation, 4);
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "scale") == 0)
+		{
+			out_node->has_scale = 1;
+			i = cgltf_parse_json_float_array(tokens, i + 1, json_chunk, out_node->scale, 3);
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "matrix") == 0)
+		{
+			out_node->has_matrix = 1;
+			i = cgltf_parse_json_float_array(tokens, i + 1, json_chunk, out_node->matrix, 16);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "weights") == 0)
+		{
+			i = cgltf_parse_json_array(options, tokens, i + 1, json_chunk, sizeof(cgltf_float), (void**)&out_node->weights, &out_node->weights_count);
+			if (i < 0)
+			{
+				return i;
+			}
+
+			i = cgltf_parse_json_float_array(tokens, i - 1, json_chunk, out_node->weights, (int)out_node->weights_count);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0)
+		{
+			i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_node->extras);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0)
+		{
+			++i;
+
+			CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+			if(out_node->extensions)
+			{
+				return CGLTF_ERROR_JSON;
+			}
+
+			int extensions_size = tokens[i].size;
+			out_node->extensions_count= 0;
+			out_node->extensions = (cgltf_extension*)cgltf_calloc(options, sizeof(cgltf_extension), extensions_size);
+
+			if (!out_node->extensions)
+			{
+				return CGLTF_ERROR_NOMEM;
+			}
+
+			++i;
+
+			for (int k = 0; k < extensions_size; ++k)
+			{
+				CGLTF_CHECK_KEY(tokens[i]);
+
+				if (cgltf_json_strcmp(tokens+i, json_chunk, "KHR_lights_punctual") == 0)
+				{
+					++i;
+
+					CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+
+					int data_size = tokens[i].size;
+					++i;
+
+					for (int m = 0; m < data_size; ++m)
+					{
+						CGLTF_CHECK_KEY(tokens[i]);
+
+						if (cgltf_json_strcmp(tokens + i, json_chunk, "light") == 0)
+						{
+							++i;
+							CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_PRIMITIVE);
+							out_node->light = CGLTF_PTRINDEX(cgltf_light, cgltf_json_to_int(tokens + i, json_chunk));
+							++i;
+						}
+						else
+						{
+							i = cgltf_skip_json(tokens, i + 1);
+						}
+
+						if (i < 0)
+						{
+							return i;
+						}
+					}
+				}
+				else if (cgltf_json_strcmp(tokens + i, json_chunk, "EXT_mesh_gpu_instancing") == 0)
+				{
+					out_node->has_mesh_gpu_instancing = 1;
+					i = cgltf_parse_json_mesh_gpu_instancing(options, tokens, i + 1, json_chunk, &out_node->mesh_gpu_instancing);
+				}
+				else
+				{
+					i = cgltf_parse_json_unprocessed_extension(options, tokens, i, json_chunk, &(out_node->extensions[out_node->extensions_count++]));
+				}
+
+				if (i < 0)
+				{
+					return i;
+				}
+			}
+		}
+		else
+		{
+			i = cgltf_skip_json(tokens, i+1);
+		}
+
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+
+	return i;
+}
+
+static int cgltf_parse_json_nodes(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_data* out_data)
+{
+	i = cgltf_parse_json_array(options, tokens, i, json_chunk, sizeof(cgltf_node), (void**)&out_data->nodes, &out_data->nodes_count);
+	if (i < 0)
+	{
+		return i;
+	}
+
+	for (cgltf_size j = 0; j < out_data->nodes_count; ++j)
+	{
+		i = cgltf_parse_json_node(options, tokens, i, json_chunk, &out_data->nodes[j]);
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+	return i;
+}
+
+static int cgltf_parse_json_scene(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_scene* out_scene)
+{
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+
+	int size = tokens[i].size;
+	++i;
+
+	for (int j = 0; j < size; ++j)
+	{
+		CGLTF_CHECK_KEY(tokens[i]);
+
+		if (cgltf_json_strcmp(tokens+i, json_chunk, "name") == 0)
+		{
+			i = cgltf_parse_json_string(options, tokens, i + 1, json_chunk, &out_scene->name);
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "nodes") == 0)
+		{
+			i = cgltf_parse_json_array(options, tokens, i + 1, json_chunk, sizeof(cgltf_node*), (void**)&out_scene->nodes, &out_scene->nodes_count);
+			if (i < 0)
+			{
+				return i;
+			}
+
+			for (cgltf_size k = 0; k < out_scene->nodes_count; ++k)
+			{
+				out_scene->nodes[k] = CGLTF_PTRINDEX(cgltf_node, cgltf_json_to_int(tokens + i, json_chunk));
+				++i;
+			}
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0)
+		{
+			i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_scene->extras);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0)
+		{
+			i = cgltf_parse_json_unprocessed_extensions(options, tokens, i, json_chunk, &out_scene->extensions_count, &out_scene->extensions);
+		}
+		else
+		{
+			i = cgltf_skip_json(tokens, i+1);
+		}
+
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+
+	return i;
+}
+
+static int cgltf_parse_json_scenes(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_data* out_data)
+{
+	i = cgltf_parse_json_array(options, tokens, i, json_chunk, sizeof(cgltf_scene), (void**)&out_data->scenes, &out_data->scenes_count);
+	if (i < 0)
+	{
+		return i;
+	}
+
+	for (cgltf_size j = 0; j < out_data->scenes_count; ++j)
+	{
+		i = cgltf_parse_json_scene(options, tokens, i, json_chunk, &out_data->scenes[j]);
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+	return i;
+}
+
+static int cgltf_parse_json_animation_sampler(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_animation_sampler* out_sampler)
+{
+	(void)options;
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+
+	int size = tokens[i].size;
+	++i;
+
+	for (int j = 0; j < size; ++j)
+	{
+		CGLTF_CHECK_KEY(tokens[i]);
+
+		if (cgltf_json_strcmp(tokens+i, json_chunk, "input") == 0)
+		{
+			++i;
+			out_sampler->input = CGLTF_PTRINDEX(cgltf_accessor, cgltf_json_to_int(tokens + i, json_chunk));
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "output") == 0)
+		{
+			++i;
+			out_sampler->output = CGLTF_PTRINDEX(cgltf_accessor, cgltf_json_to_int(tokens + i, json_chunk));
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "interpolation") == 0)
+		{
+			++i;
+			if (cgltf_json_strcmp(tokens + i, json_chunk, "LINEAR") == 0)
+			{
+				out_sampler->interpolation = cgltf_interpolation_type_linear;
+			}
+			else if (cgltf_json_strcmp(tokens + i, json_chunk, "STEP") == 0)
+			{
+				out_sampler->interpolation = cgltf_interpolation_type_step;
+			}
+			else if (cgltf_json_strcmp(tokens + i, json_chunk, "CUBICSPLINE") == 0)
+			{
+				out_sampler->interpolation = cgltf_interpolation_type_cubic_spline;
+			}
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0)
+		{
+			i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_sampler->extras);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0)
+		{
+			i = cgltf_parse_json_unprocessed_extensions(options, tokens, i, json_chunk, &out_sampler->extensions_count, &out_sampler->extensions);
+		}
+		else
+		{
+			i = cgltf_skip_json(tokens, i+1);
+		}
+
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+
+	return i;
+}
+
+static int cgltf_parse_json_animation_channel(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_animation_channel* out_channel)
+{
+	(void)options;
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+
+	int size = tokens[i].size;
+	++i;
+
+	for (int j = 0; j < size; ++j)
+	{
+		CGLTF_CHECK_KEY(tokens[i]);
+
+		if (cgltf_json_strcmp(tokens+i, json_chunk, "sampler") == 0)
+		{
+			++i;
+			out_channel->sampler = CGLTF_PTRINDEX(cgltf_animation_sampler, cgltf_json_to_int(tokens + i, json_chunk));
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "target") == 0)
+		{
+			++i;
+
+			CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+
+			int target_size = tokens[i].size;
+			++i;
+
+			for (int k = 0; k < target_size; ++k)
+			{
+				CGLTF_CHECK_KEY(tokens[i]);
+
+				if (cgltf_json_strcmp(tokens+i, json_chunk, "node") == 0)
+				{
+					++i;
+					out_channel->target_node = CGLTF_PTRINDEX(cgltf_node, cgltf_json_to_int(tokens + i, json_chunk));
+					++i;
+				}
+				else if (cgltf_json_strcmp(tokens+i, json_chunk, "path") == 0)
+				{
+					++i;
+					if (cgltf_json_strcmp(tokens+i, json_chunk, "translation") == 0)
+					{
+						out_channel->target_path = cgltf_animation_path_type_translation;
+					}
+					else if (cgltf_json_strcmp(tokens+i, json_chunk, "rotation") == 0)
+					{
+						out_channel->target_path = cgltf_animation_path_type_rotation;
+					}
+					else if (cgltf_json_strcmp(tokens+i, json_chunk, "scale") == 0)
+					{
+						out_channel->target_path = cgltf_animation_path_type_scale;
+					}
+					else if (cgltf_json_strcmp(tokens+i, json_chunk, "weights") == 0)
+					{
+						out_channel->target_path = cgltf_animation_path_type_weights;
+					}
+					++i;
+				}
+				else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0)
+				{
+					i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_channel->extras);
+				}
+				else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0)
+				{
+					i = cgltf_parse_json_unprocessed_extensions(options, tokens, i, json_chunk, &out_channel->extensions_count, &out_channel->extensions);
+				}
+				else
+				{
+					i = cgltf_skip_json(tokens, i+1);
+				}
+
+				if (i < 0)
+				{
+					return i;
+				}
+			}
+		}
+		else
+		{
+			i = cgltf_skip_json(tokens, i+1);
+		}
+
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+
+	return i;
+}
+
+static int cgltf_parse_json_animation(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_animation* out_animation)
+{
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+
+	int size = tokens[i].size;
+	++i;
+
+	for (int j = 0; j < size; ++j)
+	{
+		CGLTF_CHECK_KEY(tokens[i]);
+
+		if (cgltf_json_strcmp(tokens+i, json_chunk, "name") == 0)
+		{
+			i = cgltf_parse_json_string(options, tokens, i + 1, json_chunk, &out_animation->name);
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "samplers") == 0)
+		{
+			i = cgltf_parse_json_array(options, tokens, i + 1, json_chunk, sizeof(cgltf_animation_sampler), (void**)&out_animation->samplers, &out_animation->samplers_count);
+			if (i < 0)
+			{
+				return i;
+			}
+
+			for (cgltf_size k = 0; k < out_animation->samplers_count; ++k)
+			{
+				i = cgltf_parse_json_animation_sampler(options, tokens, i, json_chunk, &out_animation->samplers[k]);
+				if (i < 0)
+				{
+					return i;
+				}
+			}
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "channels") == 0)
+		{
+			i = cgltf_parse_json_array(options, tokens, i + 1, json_chunk, sizeof(cgltf_animation_channel), (void**)&out_animation->channels, &out_animation->channels_count);
+			if (i < 0)
+			{
+				return i;
+			}
+
+			for (cgltf_size k = 0; k < out_animation->channels_count; ++k)
+			{
+				i = cgltf_parse_json_animation_channel(options, tokens, i, json_chunk, &out_animation->channels[k]);
+				if (i < 0)
+				{
+					return i;
+				}
+			}
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0)
+		{
+			i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_animation->extras);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0)
+		{
+			i = cgltf_parse_json_unprocessed_extensions(options, tokens, i, json_chunk, &out_animation->extensions_count, &out_animation->extensions);
+		}
+		else
+		{
+			i = cgltf_skip_json(tokens, i+1);
+		}
+
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+
+	return i;
+}
+
+static int cgltf_parse_json_animations(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_data* out_data)
+{
+	i = cgltf_parse_json_array(options, tokens, i, json_chunk, sizeof(cgltf_animation), (void**)&out_data->animations, &out_data->animations_count);
+	if (i < 0)
+	{
+		return i;
+	}
+
+	for (cgltf_size j = 0; j < out_data->animations_count; ++j)
+	{
+		i = cgltf_parse_json_animation(options, tokens, i, json_chunk, &out_data->animations[j]);
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+	return i;
+}
+
+static int cgltf_parse_json_variant(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_material_variant* out_variant)
+{
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+
+	int size = tokens[i].size;
+	++i;
+
+	for (int j = 0; j < size; ++j)
+	{
+		CGLTF_CHECK_KEY(tokens[i]);
+
+		if (cgltf_json_strcmp(tokens+i, json_chunk, "name") == 0)
+		{
+			i = cgltf_parse_json_string(options, tokens, i + 1, json_chunk, &out_variant->name);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0)
+		{
+			i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_variant->extras);
+		}
+		else
+		{
+			i = cgltf_skip_json(tokens, i+1);
+		}
+
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+
+	return i;
+}
+
+static int cgltf_parse_json_variants(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_data* out_data)
+{
+	i = cgltf_parse_json_array(options, tokens, i, json_chunk, sizeof(cgltf_material_variant), (void**)&out_data->variants, &out_data->variants_count);
+	if (i < 0)
+	{
+		return i;
+	}
+
+	for (cgltf_size j = 0; j < out_data->variants_count; ++j)
+	{
+		i = cgltf_parse_json_variant(options, tokens, i, json_chunk, &out_data->variants[j]);
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+	return i;
+}
+
+static int cgltf_parse_json_asset(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_asset* out_asset)
+{
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+
+	int size = tokens[i].size;
+	++i;
+
+	for (int j = 0; j < size; ++j)
+	{
+		CGLTF_CHECK_KEY(tokens[i]);
+
+		if (cgltf_json_strcmp(tokens+i, json_chunk, "copyright") == 0)
+		{
+			i = cgltf_parse_json_string(options, tokens, i + 1, json_chunk, &out_asset->copyright);
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "generator") == 0)
+		{
+			i = cgltf_parse_json_string(options, tokens, i + 1, json_chunk, &out_asset->generator);
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "version") == 0)
+		{
+			i = cgltf_parse_json_string(options, tokens, i + 1, json_chunk, &out_asset->version);
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "minVersion") == 0)
+		{
+			i = cgltf_parse_json_string(options, tokens, i + 1, json_chunk, &out_asset->min_version);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0)
+		{
+			i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_asset->extras);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0)
+		{
+			i = cgltf_parse_json_unprocessed_extensions(options, tokens, i, json_chunk, &out_asset->extensions_count, &out_asset->extensions);
+		}
+		else
+		{
+			i = cgltf_skip_json(tokens, i+1);
+		}
+
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+
+	if (out_asset->version && CGLTF_ATOF(out_asset->version) < 2)
+	{
+		return CGLTF_ERROR_LEGACY;
+	}
+
+	return i;
+}
+
+cgltf_size cgltf_num_components(cgltf_type type) {
+	switch (type)
+	{
+	case cgltf_type_vec2:
+		return 2;
+	case cgltf_type_vec3:
+		return 3;
+	case cgltf_type_vec4:
+		return 4;
+	case cgltf_type_mat2:
+		return 4;
+	case cgltf_type_mat3:
+		return 9;
+	case cgltf_type_mat4:
+		return 16;
+	case cgltf_type_invalid:
+	case cgltf_type_scalar:
+	default:
+		return 1;
+	}
+}
+
+cgltf_size cgltf_component_size(cgltf_component_type component_type) {
+	switch (component_type)
+	{
+	case cgltf_component_type_r_8:
+	case cgltf_component_type_r_8u:
+		return 1;
+	case cgltf_component_type_r_16:
+	case cgltf_component_type_r_16u:
+		return 2;
+	case cgltf_component_type_r_32u:
+	case cgltf_component_type_r_32f:
+		return 4;
+	case cgltf_component_type_invalid:
+	default:
+		return 0;
+	}
+}
+
+cgltf_size cgltf_calc_size(cgltf_type type, cgltf_component_type component_type)
+{
+	cgltf_size component_size = cgltf_component_size(component_type);
+	if (type == cgltf_type_mat2 && component_size == 1)
+	{
+		return 8 * component_size;
+	}
+	else if (type == cgltf_type_mat3 && (component_size == 1 || component_size == 2))
+	{
+		return 12 * component_size;
+	}
+	return component_size * cgltf_num_components(type);
+}
+
+static int cgltf_fixup_pointers(cgltf_data* out_data);
+
+static int cgltf_parse_json_root(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_data* out_data)
+{
+	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+
+	int size = tokens[i].size;
+	++i;
+
+	for (int j = 0; j < size; ++j)
+	{
+		CGLTF_CHECK_KEY(tokens[i]);
+
+		if (cgltf_json_strcmp(tokens + i, json_chunk, "asset") == 0)
+		{
+			i = cgltf_parse_json_asset(options, tokens, i + 1, json_chunk, &out_data->asset);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "meshes") == 0)
+		{
+			i = cgltf_parse_json_meshes(options, tokens, i + 1, json_chunk, out_data);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "accessors") == 0)
+		{
+			i = cgltf_parse_json_accessors(options, tokens, i + 1, json_chunk, out_data);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "bufferViews") == 0)
+		{
+			i = cgltf_parse_json_buffer_views(options, tokens, i + 1, json_chunk, out_data);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "buffers") == 0)
+		{
+			i = cgltf_parse_json_buffers(options, tokens, i + 1, json_chunk, out_data);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "materials") == 0)
+		{
+			i = cgltf_parse_json_materials(options, tokens, i + 1, json_chunk, out_data);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "images") == 0)
+		{
+			i = cgltf_parse_json_images(options, tokens, i + 1, json_chunk, out_data);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "textures") == 0)
+		{
+			i = cgltf_parse_json_textures(options, tokens, i + 1, json_chunk, out_data);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "samplers") == 0)
+		{
+			i = cgltf_parse_json_samplers(options, tokens, i + 1, json_chunk, out_data);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "skins") == 0)
+		{
+			i = cgltf_parse_json_skins(options, tokens, i + 1, json_chunk, out_data);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "cameras") == 0)
+		{
+			i = cgltf_parse_json_cameras(options, tokens, i + 1, json_chunk, out_data);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "nodes") == 0)
+		{
+			i = cgltf_parse_json_nodes(options, tokens, i + 1, json_chunk, out_data);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "scenes") == 0)
+		{
+			i = cgltf_parse_json_scenes(options, tokens, i + 1, json_chunk, out_data);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "scene") == 0)
+		{
+			++i;
+			out_data->scene = CGLTF_PTRINDEX(cgltf_scene, cgltf_json_to_int(tokens + i, json_chunk));
+			++i;
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "animations") == 0)
+		{
+			i = cgltf_parse_json_animations(options, tokens, i + 1, json_chunk, out_data);
+		}
+		else if (cgltf_json_strcmp(tokens+i, json_chunk, "extras") == 0)
+		{
+			i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_data->extras);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0)
+		{
+			++i;
+
+			CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+			if(out_data->data_extensions)
+			{
+				return CGLTF_ERROR_JSON;
+			}
+
+			int extensions_size = tokens[i].size;
+			out_data->data_extensions_count = 0;
+			out_data->data_extensions = (cgltf_extension*)cgltf_calloc(options, sizeof(cgltf_extension), extensions_size);
+
+			if (!out_data->data_extensions)
+			{
+				return CGLTF_ERROR_NOMEM;
+			}
+
+			++i;
+
+			for (int k = 0; k < extensions_size; ++k)
+			{
+				CGLTF_CHECK_KEY(tokens[i]);
+
+				if (cgltf_json_strcmp(tokens+i, json_chunk, "KHR_lights_punctual") == 0)
+				{
+					++i;
+
+					CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+
+					int data_size = tokens[i].size;
+					++i;
+
+					for (int m = 0; m < data_size; ++m)
+					{
+						CGLTF_CHECK_KEY(tokens[i]);
+
+						if (cgltf_json_strcmp(tokens + i, json_chunk, "lights") == 0)
+						{
+							i = cgltf_parse_json_lights(options, tokens, i + 1, json_chunk, out_data);
+						}
+						else
+						{
+							i = cgltf_skip_json(tokens, i + 1);
+						}
+
+						if (i < 0)
+						{
+							return i;
+						}
+					}
+				}
+				else if (cgltf_json_strcmp(tokens+i, json_chunk, "KHR_materials_variants") == 0)
+				{
+					++i;
+
+					CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+
+					int data_size = tokens[i].size;
+					++i;
+
+					for (int m = 0; m < data_size; ++m)
+					{
+						CGLTF_CHECK_KEY(tokens[i]);
+
+						if (cgltf_json_strcmp(tokens + i, json_chunk, "variants") == 0)
+						{
+							i = cgltf_parse_json_variants(options, tokens, i + 1, json_chunk, out_data);
+						}
+						else
+						{
+							i = cgltf_skip_json(tokens, i + 1);
+						}
+
+						if (i < 0)
+						{
+							return i;
+						}
+					}
+				}
+				else
+				{
+					i = cgltf_parse_json_unprocessed_extension(options, tokens, i, json_chunk, &(out_data->data_extensions[out_data->data_extensions_count++]));
+				}
+
+				if (i < 0)
+				{
+					return i;
+				}
+			}
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensionsUsed") == 0)
+		{
+			i = cgltf_parse_json_string_array(options, tokens, i + 1, json_chunk, &out_data->extensions_used, &out_data->extensions_used_count);
+		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensionsRequired") == 0)
+		{
+			i = cgltf_parse_json_string_array(options, tokens, i + 1, json_chunk, &out_data->extensions_required, &out_data->extensions_required_count);
+		}
+		else
+		{
+			i = cgltf_skip_json(tokens, i + 1);
+		}
+
+		if (i < 0)
+		{
+			return i;
+		}
+	}
+
+	return i;
+}
+
+cgltf_result cgltf_parse_json(cgltf_options* options, const uint8_t* json_chunk, cgltf_size size, cgltf_data** out_data)
+{
+	jsmn_parser parser = { 0, 0, 0 };
+
+	if (options->json_token_count == 0)
+	{
+		int token_count = jsmn_parse(&parser, (const char*)json_chunk, size, NULL, 0);
+
+		if (token_count <= 0)
+		{
+			return cgltf_result_invalid_json;
+		}
+
+		options->json_token_count = token_count;
+	}
+
+	jsmntok_t* tokens = (jsmntok_t*)options->memory.alloc_func(options->memory.user_data, sizeof(jsmntok_t) * (options->json_token_count + 1));
+
+	if (!tokens)
+	{
+		return cgltf_result_out_of_memory;
+	}
+
+	jsmn_init(&parser);
+
+	int token_count = jsmn_parse(&parser, (const char*)json_chunk, size, tokens, options->json_token_count);
+
+	if (token_count <= 0)
+	{
+		options->memory.free_func(options->memory.user_data, tokens);
+		return cgltf_result_invalid_json;
+	}
+
+	// this makes sure that we always have an UNDEFINED token at the end of the stream
+	// for invalid JSON inputs this makes sure we don't perform out of bound reads of token data
+	tokens[token_count].type = JSMN_UNDEFINED;
+
+	cgltf_data* data = (cgltf_data*)options->memory.alloc_func(options->memory.user_data, sizeof(cgltf_data));
+
+	if (!data)
+	{
+		options->memory.free_func(options->memory.user_data, tokens);
+		return cgltf_result_out_of_memory;
+	}
+
+	memset(data, 0, sizeof(cgltf_data));
+	data->memory = options->memory;
+	data->file = options->file;
+
+	int i = cgltf_parse_json_root(options, tokens, 0, json_chunk, data);
+
+	options->memory.free_func(options->memory.user_data, tokens);
+
+	if (i < 0)
+	{
+		cgltf_free(data);
+
+		switch (i)
+		{
+		case CGLTF_ERROR_NOMEM: return cgltf_result_out_of_memory;
+		case CGLTF_ERROR_LEGACY: return cgltf_result_legacy_gltf;
+		default: return cgltf_result_invalid_gltf;
+		}
+	}
+
+	if (cgltf_fixup_pointers(data) < 0)
+	{
+		cgltf_free(data);
+		return cgltf_result_invalid_gltf;
+	}
+
+	data->json = (const char*)json_chunk;
+	data->json_size = size;
+
+	*out_data = data;
+
+	return cgltf_result_success;
+}
+
+static int cgltf_fixup_pointers(cgltf_data* data)
+{
+	for (cgltf_size i = 0; i < data->meshes_count; ++i)
+	{
+		for (cgltf_size j = 0; j < data->meshes[i].primitives_count; ++j)
+		{
+			CGLTF_PTRFIXUP(data->meshes[i].primitives[j].indices, data->accessors, data->accessors_count);
+			CGLTF_PTRFIXUP(data->meshes[i].primitives[j].material, data->materials, data->materials_count);
+
+			for (cgltf_size k = 0; k < data->meshes[i].primitives[j].attributes_count; ++k)
+			{
+				CGLTF_PTRFIXUP_REQ(data->meshes[i].primitives[j].attributes[k].data, data->accessors, data->accessors_count);
+			}
+
+			for (cgltf_size k = 0; k < data->meshes[i].primitives[j].targets_count; ++k)
+			{
+				for (cgltf_size m = 0; m < data->meshes[i].primitives[j].targets[k].attributes_count; ++m)
+				{
+					CGLTF_PTRFIXUP_REQ(data->meshes[i].primitives[j].targets[k].attributes[m].data, data->accessors, data->accessors_count);
+				}
+			}
+
+			if (data->meshes[i].primitives[j].has_draco_mesh_compression)
+			{
+				CGLTF_PTRFIXUP_REQ(data->meshes[i].primitives[j].draco_mesh_compression.buffer_view, data->buffer_views, data->buffer_views_count);
+				for (cgltf_size m = 0; m < data->meshes[i].primitives[j].draco_mesh_compression.attributes_count; ++m)
+				{
+					CGLTF_PTRFIXUP_REQ(data->meshes[i].primitives[j].draco_mesh_compression.attributes[m].data, data->accessors, data->accessors_count);
+				}
+			}
+
+			for (cgltf_size k = 0; k < data->meshes[i].primitives[j].mappings_count; ++k)
+			{
+				CGLTF_PTRFIXUP_REQ(data->meshes[i].primitives[j].mappings[k].material, data->materials, data->materials_count);
+			}
+		}
+	}
+
+	for (cgltf_size i = 0; i < data->accessors_count; ++i)
+	{
+		CGLTF_PTRFIXUP(data->accessors[i].buffer_view, data->buffer_views, data->buffer_views_count);
+
+		if (data->accessors[i].is_sparse)
+		{
+			CGLTF_PTRFIXUP_REQ(data->accessors[i].sparse.indices_buffer_view, data->buffer_views, data->buffer_views_count);
+			CGLTF_PTRFIXUP_REQ(data->accessors[i].sparse.values_buffer_view, data->buffer_views, data->buffer_views_count);
+		}
+
+		if (data->accessors[i].buffer_view)
+		{
+			data->accessors[i].stride = data->accessors[i].buffer_view->stride;
+		}
+
+		if (data->accessors[i].stride == 0)
+		{
+			data->accessors[i].stride = cgltf_calc_size(data->accessors[i].type, data->accessors[i].component_type);
+		}
+	}
+
+	for (cgltf_size i = 0; i < data->textures_count; ++i)
+	{
+		CGLTF_PTRFIXUP(data->textures[i].image, data->images, data->images_count);
+		CGLTF_PTRFIXUP(data->textures[i].basisu_image, data->images, data->images_count);
+		CGLTF_PTRFIXUP(data->textures[i].sampler, data->samplers, data->samplers_count);
+	}
+
+	for (cgltf_size i = 0; i < data->images_count; ++i)
+	{
+		CGLTF_PTRFIXUP(data->images[i].buffer_view, data->buffer_views, data->buffer_views_count);
+	}
+
+	for (cgltf_size i = 0; i < data->materials_count; ++i)
+	{
+		CGLTF_PTRFIXUP(data->materials[i].normal_texture.texture, data->textures, data->textures_count);
+		CGLTF_PTRFIXUP(data->materials[i].emissive_texture.texture, data->textures, data->textures_count);
+		CGLTF_PTRFIXUP(data->materials[i].occlusion_texture.texture, data->textures, data->textures_count);
+
+		CGLTF_PTRFIXUP(data->materials[i].pbr_metallic_roughness.base_color_texture.texture, data->textures, data->textures_count);
+		CGLTF_PTRFIXUP(data->materials[i].pbr_metallic_roughness.metallic_roughness_texture.texture, data->textures, data->textures_count);
+
+		CGLTF_PTRFIXUP(data->materials[i].pbr_specular_glossiness.diffuse_texture.texture, data->textures, data->textures_count);
+		CGLTF_PTRFIXUP(data->materials[i].pbr_specular_glossiness.specular_glossiness_texture.texture, data->textures, data->textures_count);
+
+		CGLTF_PTRFIXUP(data->materials[i].clearcoat.clearcoat_texture.texture, data->textures, data->textures_count);
+		CGLTF_PTRFIXUP(data->materials[i].clearcoat.clearcoat_roughness_texture.texture, data->textures, data->textures_count);
+		CGLTF_PTRFIXUP(data->materials[i].clearcoat.clearcoat_normal_texture.texture, data->textures, data->textures_count);
+
+		CGLTF_PTRFIXUP(data->materials[i].specular.specular_texture.texture, data->textures, data->textures_count);
+		CGLTF_PTRFIXUP(data->materials[i].specular.specular_color_texture.texture, data->textures, data->textures_count);
+
+		CGLTF_PTRFIXUP(data->materials[i].transmission.transmission_texture.texture, data->textures, data->textures_count);
+
+		CGLTF_PTRFIXUP(data->materials[i].volume.thickness_texture.texture, data->textures, data->textures_count);
+
+		CGLTF_PTRFIXUP(data->materials[i].sheen.sheen_color_texture.texture, data->textures, data->textures_count);
+		CGLTF_PTRFIXUP(data->materials[i].sheen.sheen_roughness_texture.texture, data->textures, data->textures_count);
+
+		CGLTF_PTRFIXUP(data->materials[i].iridescence.iridescence_texture.texture, data->textures, data->textures_count);
+		CGLTF_PTRFIXUP(data->materials[i].iridescence.iridescence_thickness_texture.texture, data->textures, data->textures_count);
+
+		CGLTF_PTRFIXUP(data->materials[i].anisotropy.anisotropy_texture.texture, data->textures, data->textures_count);
+	}
+
+	for (cgltf_size i = 0; i < data->buffer_views_count; ++i)
+	{
+		CGLTF_PTRFIXUP_REQ(data->buffer_views[i].buffer, data->buffers, data->buffers_count);
+
+		if (data->buffer_views[i].has_meshopt_compression)
+		{
+			CGLTF_PTRFIXUP_REQ(data->buffer_views[i].meshopt_compression.buffer, data->buffers, data->buffers_count);
+		}
+	}
+
+	for (cgltf_size i = 0; i < data->skins_count; ++i)
+	{
+		for (cgltf_size j = 0; j < data->skins[i].joints_count; ++j)
+		{
+			CGLTF_PTRFIXUP_REQ(data->skins[i].joints[j], data->nodes, data->nodes_count);
+		}
+
+		CGLTF_PTRFIXUP(data->skins[i].skeleton, data->nodes, data->nodes_count);
+		CGLTF_PTRFIXUP(data->skins[i].inverse_bind_matrices, data->accessors, data->accessors_count);
+	}
+
+	for (cgltf_size i = 0; i < data->nodes_count; ++i)
+	{
+		for (cgltf_size j = 0; j < data->nodes[i].children_count; ++j)
+		{
+			CGLTF_PTRFIXUP_REQ(data->nodes[i].children[j], data->nodes, data->nodes_count);
+
+			if (data->nodes[i].children[j]->parent)
+			{
+				return CGLTF_ERROR_JSON;
+			}
+
+			data->nodes[i].children[j]->parent = &data->nodes[i];
+		}
+
+		CGLTF_PTRFIXUP(data->nodes[i].mesh, data->meshes, data->meshes_count);
+		CGLTF_PTRFIXUP(data->nodes[i].skin, data->skins, data->skins_count);
+		CGLTF_PTRFIXUP(data->nodes[i].camera, data->cameras, data->cameras_count);
+		CGLTF_PTRFIXUP(data->nodes[i].light, data->lights, data->lights_count);
+
+		if (data->nodes[i].has_mesh_gpu_instancing)
+		{
+			for (cgltf_size m = 0; m < data->nodes[i].mesh_gpu_instancing.attributes_count; ++m)
+			{
+				CGLTF_PTRFIXUP_REQ(data->nodes[i].mesh_gpu_instancing.attributes[m].data, data->accessors, data->accessors_count);
+			}
+		}
+	}
+
+	for (cgltf_size i = 0; i < data->scenes_count; ++i)
+	{
+		for (cgltf_size j = 0; j < data->scenes[i].nodes_count; ++j)
+		{
+			CGLTF_PTRFIXUP_REQ(data->scenes[i].nodes[j], data->nodes, data->nodes_count);
+
+			if (data->scenes[i].nodes[j]->parent)
+			{
+				return CGLTF_ERROR_JSON;
+			}
+		}
+	}
+
+	CGLTF_PTRFIXUP(data->scene, data->scenes, data->scenes_count);
+
+	for (cgltf_size i = 0; i < data->animations_count; ++i)
+	{
+		for (cgltf_size j = 0; j < data->animations[i].samplers_count; ++j)
+		{
+			CGLTF_PTRFIXUP_REQ(data->animations[i].samplers[j].input, data->accessors, data->accessors_count);
+			CGLTF_PTRFIXUP_REQ(data->animations[i].samplers[j].output, data->accessors, data->accessors_count);
+		}
+
+		for (cgltf_size j = 0; j < data->animations[i].channels_count; ++j)
+		{
+			CGLTF_PTRFIXUP_REQ(data->animations[i].channels[j].sampler, data->animations[i].samplers, data->animations[i].samplers_count);
+			CGLTF_PTRFIXUP(data->animations[i].channels[j].target_node, data->nodes, data->nodes_count);
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * -- jsmn.c start --
+ * Source: https://github.com/zserge/jsmn
+ * License: MIT
+ *
+ * Copyright (c) 2010 Serge A. Zaitsev
+
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/**
+ * Allocates a fresh unused token from the token pull.
+ */
+static jsmntok_t *jsmn_alloc_token(jsmn_parser *parser,
+				   jsmntok_t *tokens, size_t num_tokens) {
+	jsmntok_t *tok;
+	if (parser->toknext >= num_tokens) {
+		return NULL;
+	}
+	tok = &tokens[parser->toknext++];
+	tok->start = tok->end = -1;
+	tok->size = 0;
+#ifdef JSMN_PARENT_LINKS
+	tok->parent = -1;
+#endif
+	return tok;
+}
+
+/**
+ * Fills token type and boundaries.
+ */
+static void jsmn_fill_token(jsmntok_t *token, jsmntype_t type,
+				ptrdiff_t start, ptrdiff_t end) {
+	token->type = type;
+	token->start = start;
+	token->end = end;
+	token->size = 0;
+}
+
+/**
+ * Fills next available token with JSON primitive.
+ */
+static int jsmn_parse_primitive(jsmn_parser *parser, const char *js,
+				size_t len, jsmntok_t *tokens, size_t num_tokens) {
+	jsmntok_t *token;
+	ptrdiff_t start;
+
+	start = parser->pos;
+
+	for (; parser->pos < len && js[parser->pos] != '\0'; parser->pos++) {
+		switch (js[parser->pos]) {
+#ifndef JSMN_STRICT
+		/* In strict mode primitive must be followed by "," or "}" or "]" */
+		case ':':
+#endif
+		case '\t' : case '\r' : case '\n' : case ' ' :
+		case ','  : case ']'  : case '}' :
+			goto found;
+		}
+		if (js[parser->pos] < 32 || js[parser->pos] >= 127) {
+			parser->pos = start;
+			return JSMN_ERROR_INVAL;
+		}
+	}
+#ifdef JSMN_STRICT
+	/* In strict mode primitive must be followed by a comma/object/array */
+	parser->pos = start;
+	return JSMN_ERROR_PART;
+#endif
+
+found:
+	if (tokens == NULL) {
+		parser->pos--;
+		return 0;
+	}
+	token = jsmn_alloc_token(parser, tokens, num_tokens);
+	if (token == NULL) {
+		parser->pos = start;
+		return JSMN_ERROR_NOMEM;
+	}
+	jsmn_fill_token(token, JSMN_PRIMITIVE, start, parser->pos);
+#ifdef JSMN_PARENT_LINKS
+	token->parent = parser->toksuper;
+#endif
+	parser->pos--;
+	return 0;
+}
+
+/**
+ * Fills next token with JSON string.
+ */
+static int jsmn_parse_string(jsmn_parser *parser, const char *js,
+				 size_t len, jsmntok_t *tokens, size_t num_tokens) {
+	jsmntok_t *token;
+
+	ptrdiff_t start = parser->pos;
+
+	parser->pos++;
+
+	/* Skip starting quote */
+	for (; parser->pos < len && js[parser->pos] != '\0'; parser->pos++) {
+		char c = js[parser->pos];
+
+		/* Quote: end of string */
+		if (c == '\"') {
+			if (tokens == NULL) {
+				return 0;
+			}
+			token = jsmn_alloc_token(parser, tokens, num_tokens);
+			if (token == NULL) {
+				parser->pos = start;
+				return JSMN_ERROR_NOMEM;
+			}
+			jsmn_fill_token(token, JSMN_STRING, start+1, parser->pos);
+#ifdef JSMN_PARENT_LINKS
+			token->parent = parser->toksuper;
+#endif
+			return 0;
+		}
+
+		/* Backslash: Quoted symbol expected */
+		if (c == '\\' && parser->pos + 1 < len) {
+			int i;
+			parser->pos++;
+			switch (js[parser->pos]) {
+			/* Allowed escaped symbols */
+			case '\"': case '/' : case '\\' : case 'b' :
+			case 'f' : case 'r' : case 'n'  : case 't' :
+				break;
+				/* Allows escaped symbol \uXXXX */
+			case 'u':
+				parser->pos++;
+				for(i = 0; i < 4 && parser->pos < len && js[parser->pos] != '\0'; i++) {
+					/* If it isn't a hex character we have an error */
+					if(!((js[parser->pos] >= 48 && js[parser->pos] <= 57) || /* 0-9 */
+						 (js[parser->pos] >= 65 && js[parser->pos] <= 70) || /* A-F */
+						 (js[parser->pos] >= 97 && js[parser->pos] <= 102))) { /* a-f */
+						parser->pos = start;
+						return JSMN_ERROR_INVAL;
+					}
+					parser->pos++;
+				}
+				parser->pos--;
+				break;
+				/* Unexpected symbol */
+			default:
+				parser->pos = start;
+				return JSMN_ERROR_INVAL;
+			}
+		}
+	}
+	parser->pos = start;
+	return JSMN_ERROR_PART;
+}
+
+/**
+ * Parse JSON string and fill tokens.
+ */
+static int jsmn_parse(jsmn_parser *parser, const char *js, size_t len,
+		   jsmntok_t *tokens, size_t num_tokens) {
+	int r;
+	int i;
+	jsmntok_t *token;
+	int count = parser->toknext;
+
+	for (; parser->pos < len && js[parser->pos] != '\0'; parser->pos++) {
+		char c;
+		jsmntype_t type;
+
+		c = js[parser->pos];
+		switch (c) {
+		case '{': case '[':
+			count++;
+			if (tokens == NULL) {
+				break;
+			}
+			token = jsmn_alloc_token(parser, tokens, num_tokens);
+			if (token == NULL)
+				return JSMN_ERROR_NOMEM;
+			if (parser->toksuper != -1) {
+				tokens[parser->toksuper].size++;
+#ifdef JSMN_PARENT_LINKS
+				token->parent = parser->toksuper;
+#endif
+			}
+			token->type = (c == '{' ? JSMN_OBJECT : JSMN_ARRAY);
+			token->start = parser->pos;
+			parser->toksuper = parser->toknext - 1;
+			break;
+		case '}': case ']':
+			if (tokens == NULL)
+				break;
+			type = (c == '}' ? JSMN_OBJECT : JSMN_ARRAY);
+#ifdef JSMN_PARENT_LINKS
+			if (parser->toknext < 1) {
+				return JSMN_ERROR_INVAL;
+			}
+			token = &tokens[parser->toknext - 1];
+			for (;;) {
+				if (token->start != -1 && token->end == -1) {
+					if (token->type != type) {
+						return JSMN_ERROR_INVAL;
+					}
+					token->end = parser->pos + 1;
+					parser->toksuper = token->parent;
+					break;
+				}
+				if (token->parent == -1) {
+					if(token->type != type || parser->toksuper == -1) {
+						return JSMN_ERROR_INVAL;
+					}
+					break;
+				}
+				token = &tokens[token->parent];
+			}
+#else
+			for (i = parser->toknext - 1; i >= 0; i--) {
+				token = &tokens[i];
+				if (token->start != -1 && token->end == -1) {
+					if (token->type != type) {
+						return JSMN_ERROR_INVAL;
+					}
+					parser->toksuper = -1;
+					token->end = parser->pos + 1;
+					break;
+				}
+			}
+			/* Error if unmatched closing bracket */
+			if (i == -1) return JSMN_ERROR_INVAL;
+			for (; i >= 0; i--) {
+				token = &tokens[i];
+				if (token->start != -1 && token->end == -1) {
+					parser->toksuper = i;
+					break;
+				}
+			}
+#endif
+			break;
+		case '\"':
+			r = jsmn_parse_string(parser, js, len, tokens, num_tokens);
+			if (r < 0) return r;
+			count++;
+			if (parser->toksuper != -1 && tokens != NULL)
+				tokens[parser->toksuper].size++;
+			break;
+		case '\t' : case '\r' : case '\n' : case ' ':
+			break;
+		case ':':
+			parser->toksuper = parser->toknext - 1;
+			break;
+		case ',':
+			if (tokens != NULL && parser->toksuper != -1 &&
+					tokens[parser->toksuper].type != JSMN_ARRAY &&
+					tokens[parser->toksuper].type != JSMN_OBJECT) {
+#ifdef JSMN_PARENT_LINKS
+				parser->toksuper = tokens[parser->toksuper].parent;
+#else
+				for (i = parser->toknext - 1; i >= 0; i--) {
+					if (tokens[i].type == JSMN_ARRAY || tokens[i].type == JSMN_OBJECT) {
+						if (tokens[i].start != -1 && tokens[i].end == -1) {
+							parser->toksuper = i;
+							break;
+						}
+					}
+				}
+#endif
+			}
+			break;
+#ifdef JSMN_STRICT
+			/* In strict mode primitives are: numbers and booleans */
+		case '-': case '0': case '1' : case '2': case '3' : case '4':
+		case '5': case '6': case '7' : case '8': case '9':
+		case 't': case 'f': case 'n' :
+			/* And they must not be keys of the object */
+			if (tokens != NULL && parser->toksuper != -1) {
+				jsmntok_t *t = &tokens[parser->toksuper];
+				if (t->type == JSMN_OBJECT ||
+						(t->type == JSMN_STRING && t->size != 0)) {
+					return JSMN_ERROR_INVAL;
+				}
+			}
+#else
+			/* In non-strict mode every unquoted value is a primitive */
+		default:
+#endif
+			r = jsmn_parse_primitive(parser, js, len, tokens, num_tokens);
+			if (r < 0) return r;
+			count++;
+			if (parser->toksuper != -1 && tokens != NULL)
+				tokens[parser->toksuper].size++;
+			break;
+
+#ifdef JSMN_STRICT
+			/* Unexpected char in strict mode */
+		default:
+			return JSMN_ERROR_INVAL;
+#endif
+		}
+	}
+
+	if (tokens != NULL) {
+		for (i = parser->toknext - 1; i >= 0; i--) {
+			/* Unmatched opened object or array */
+			if (tokens[i].start != -1 && tokens[i].end == -1) {
+				return JSMN_ERROR_PART;
+			}
+		}
+	}
+
+	return count;
+}
+
+/**
+ * Creates a new parser based over a given  buffer with an array of tokens
+ * available.
+ */
+static void jsmn_init(jsmn_parser *parser) {
+	parser->pos = 0;
+	parser->toknext = 0;
+	parser->toksuper = -1;
+}
+/*
+ * -- jsmn.c end --
+ */
+
+#endif /* #ifdef CGLTF_IMPLEMENTATION */
+
+/* cgltf is distributed under MIT license:
+ *
+ * Copyright (c) 2018-2021 Johannes Kuhlmann
+
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
diff --git a/libkram/cgltf/cgltf_write.h b/libkram/cgltf/cgltf_write.h
new file mode 100644
index 00000000..be22b888
--- /dev/null
+++ b/libkram/cgltf/cgltf_write.h
@@ -0,0 +1,1506 @@
+/**
+ * cgltf_write - a single-file glTF 2.0 writer written in C99.
+ *
+ * Version: 1.13
+ *
+ * Website: https://github.com/jkuhlmann/cgltf
+ *
+ * Distributed under the MIT License, see notice at the end of this file.
+ *
+ * Building:
+ * Include this file where you need the struct and function
+ * declarations. Have exactly one source file where you define
+ * `CGLTF_WRITE_IMPLEMENTATION` before including this file to get the
+ * function definitions.
+ *
+ * Reference:
+ * `cgltf_result cgltf_write_file(const cgltf_options* options, const char*
+ * path, const cgltf_data* data)` writes a glTF data to the given file path.
+ * If `options->type` is `cgltf_file_type_glb`, both JSON content and binary
+ * buffer of the given glTF data will be written in a GLB format.
+ * Otherwise, only the JSON part will be written.
+ * External buffers and images are not written out. `data` is not deallocated.
+ *
+ * `cgltf_size cgltf_write(const cgltf_options* options, char* buffer,
+ * cgltf_size size, const cgltf_data* data)` writes JSON into the given memory
+ * buffer. Returns the number of bytes written to `buffer`, including a null
+ * terminator. If buffer is null, returns the number of bytes that would have
+ * been written. `data` is not deallocated.
+ */
+#ifndef CGLTF_WRITE_H_INCLUDED__
+#define CGLTF_WRITE_H_INCLUDED__
+
+#include "cgltf.h"
+
+#include <stddef.h>
+#include <stdbool.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+cgltf_result cgltf_write_file(const cgltf_options* options, const char* path, const cgltf_data* data);
+cgltf_size cgltf_write(const cgltf_options* options, char* buffer, cgltf_size size, const cgltf_data* data);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* #ifndef CGLTF_WRITE_H_INCLUDED__ */
+
+/*
+ *
+ * Stop now, if you are only interested in the API.
+ * Below, you find the implementation.
+ *
+ */
+
+#if defined(__INTELLISENSE__) || defined(__JETBRAINS_IDE__)
+/* This makes MSVC/CLion intellisense work. */
+#define CGLTF_WRITE_IMPLEMENTATION
+#endif
+
+#ifdef CGLTF_WRITE_IMPLEMENTATION
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <float.h>
+
+#define CGLTF_EXTENSION_FLAG_TEXTURE_TRANSFORM      (1 << 0)
+#define CGLTF_EXTENSION_FLAG_MATERIALS_UNLIT        (1 << 1)
+#define CGLTF_EXTENSION_FLAG_SPECULAR_GLOSSINESS    (1 << 2)
+#define CGLTF_EXTENSION_FLAG_LIGHTS_PUNCTUAL        (1 << 3)
+#define CGLTF_EXTENSION_FLAG_DRACO_MESH_COMPRESSION (1 << 4)
+#define CGLTF_EXTENSION_FLAG_MATERIALS_CLEARCOAT    (1 << 5)
+#define CGLTF_EXTENSION_FLAG_MATERIALS_IOR          (1 << 6)
+#define CGLTF_EXTENSION_FLAG_MATERIALS_SPECULAR     (1 << 7)
+#define CGLTF_EXTENSION_FLAG_MATERIALS_TRANSMISSION (1 << 8)
+#define CGLTF_EXTENSION_FLAG_MATERIALS_SHEEN        (1 << 9)
+#define CGLTF_EXTENSION_FLAG_MATERIALS_VARIANTS     (1 << 10)
+#define CGLTF_EXTENSION_FLAG_MATERIALS_VOLUME       (1 << 11)
+#define CGLTF_EXTENSION_FLAG_TEXTURE_BASISU        (1 << 12)
+#define CGLTF_EXTENSION_FLAG_MATERIALS_EMISSIVE_STRENGTH (1 << 13)
+#define CGLTF_EXTENSION_FLAG_MESH_GPU_INSTANCING (1 << 14)
+#define CGLTF_EXTENSION_FLAG_MATERIALS_IRIDESCENCE (1 << 15)
+#define CGLTF_EXTENSION_FLAG_MATERIALS_ANISOTROPY (1 << 16)
+
+typedef struct {
+	char* buffer;
+	cgltf_size buffer_size;
+	cgltf_size remaining;
+	char* cursor;
+	cgltf_size tmp;
+	cgltf_size chars_written;
+	const cgltf_data* data;
+	int depth;
+	const char* indent;
+	int needs_comma;
+	uint32_t extension_flags;
+	uint32_t required_extension_flags;
+} cgltf_write_context;
+
+#define CGLTF_MIN(a, b) (a < b ? a : b)
+
+#ifdef FLT_DECIMAL_DIG
+	// FLT_DECIMAL_DIG is C11
+	#define CGLTF_DECIMAL_DIG (FLT_DECIMAL_DIG)
+#else
+	#define CGLTF_DECIMAL_DIG 9
+#endif
+
+#define CGLTF_SPRINTF(...) { \
+		assert(context->cursor || (!context->cursor && context->remaining == 0)); \
+		context->tmp = snprintf ( context->cursor, context->remaining, __VA_ARGS__ ); \
+		context->chars_written += context->tmp; \
+		if (context->cursor) { \
+			context->cursor += context->tmp; \
+			context->remaining -= context->tmp; \
+		} }
+
+#define CGLTF_SNPRINTF(length, ...) { \
+		assert(context->cursor || (!context->cursor && context->remaining == 0)); \
+		context->tmp = snprintf ( context->cursor, CGLTF_MIN(length + 1, context->remaining), __VA_ARGS__ ); \
+		context->chars_written += length; \
+		if (context->cursor) { \
+			context->cursor += length; \
+			context->remaining -= length; \
+		} }
+
+#define CGLTF_WRITE_IDXPROP(label, val, start) if (val) { \
+		cgltf_write_indent(context); \
+		CGLTF_SPRINTF("\"%s\": %d", label, (int) (val - start)); \
+		context->needs_comma = 1; }
+
+#define CGLTF_WRITE_IDXARRPROP(label, dim, vals, start) if (vals) { \
+		cgltf_write_indent(context); \
+		CGLTF_SPRINTF("\"%s\": [", label); \
+		for (int i = 0; i < (int)(dim); ++i) { \
+			int idx = (int) (vals[i] - start); \
+			if (i != 0) CGLTF_SPRINTF(","); \
+			CGLTF_SPRINTF(" %d", idx); \
+		} \
+		CGLTF_SPRINTF(" ]"); \
+		context->needs_comma = 1; }
+
+#define CGLTF_WRITE_TEXTURE_INFO(label, info) if (info.texture) { \
+		cgltf_write_line(context, "\"" label "\": {"); \
+		CGLTF_WRITE_IDXPROP("index", info.texture, context->data->textures); \
+		cgltf_write_intprop(context, "texCoord", info.texcoord, 0); \
+		if (info.has_transform) { \
+			context->extension_flags |= CGLTF_EXTENSION_FLAG_TEXTURE_TRANSFORM; \
+			cgltf_write_texture_transform(context, &info.transform); \
+		} \
+		cgltf_write_extras(context, &info.extras); \
+		cgltf_write_line(context, "}"); }
+
+#define CGLTF_WRITE_NORMAL_TEXTURE_INFO(label, info) if (info.texture) { \
+		cgltf_write_line(context, "\"" label "\": {"); \
+		CGLTF_WRITE_IDXPROP("index", info.texture, context->data->textures); \
+		cgltf_write_intprop(context, "texCoord", info.texcoord, 0); \
+		cgltf_write_floatprop(context, "scale", info.scale, 1.0f); \
+		if (info.has_transform) { \
+			context->extension_flags |= CGLTF_EXTENSION_FLAG_TEXTURE_TRANSFORM; \
+			cgltf_write_texture_transform(context, &info.transform); \
+		} \
+		cgltf_write_extras(context, &info.extras); \
+		cgltf_write_line(context, "}"); }
+
+#define CGLTF_WRITE_OCCLUSION_TEXTURE_INFO(label, info) if (info.texture) { \
+		cgltf_write_line(context, "\"" label "\": {"); \
+		CGLTF_WRITE_IDXPROP("index", info.texture, context->data->textures); \
+		cgltf_write_intprop(context, "texCoord", info.texcoord, 0); \
+		cgltf_write_floatprop(context, "strength", info.scale, 1.0f); \
+		if (info.has_transform) { \
+			context->extension_flags |= CGLTF_EXTENSION_FLAG_TEXTURE_TRANSFORM; \
+			cgltf_write_texture_transform(context, &info.transform); \
+		} \
+		cgltf_write_extras(context, &info.extras); \
+		cgltf_write_line(context, "}"); }
+
+#ifndef CGLTF_CONSTS
+static const cgltf_size GlbHeaderSize = 12;
+static const cgltf_size GlbChunkHeaderSize = 8;
+static const uint32_t GlbVersion = 2;
+static const uint32_t GlbMagic = 0x46546C67;
+static const uint32_t GlbMagicJsonChunk = 0x4E4F534A;
+static const uint32_t GlbMagicBinChunk = 0x004E4942;
+#define CGLTF_CONSTS
+#endif
+
+static void cgltf_write_indent(cgltf_write_context* context)
+{
+	if (context->needs_comma)
+	{
+		CGLTF_SPRINTF(",\n");
+		context->needs_comma = 0;
+	}
+	else
+	{
+		CGLTF_SPRINTF("\n");
+	}
+	for (int i = 0; i < context->depth; ++i)
+	{
+		CGLTF_SPRINTF("%s", context->indent);
+	}
+}
+
+static void cgltf_write_line(cgltf_write_context* context, const char* line)
+{
+	if (line[0] == ']' || line[0] == '}')
+	{
+		--context->depth;
+		context->needs_comma = 0;
+	}
+	cgltf_write_indent(context);
+	CGLTF_SPRINTF("%s", line);
+	cgltf_size last = (cgltf_size)(strlen(line) - 1);
+	if (line[0] == ']' || line[0] == '}')
+	{
+		context->needs_comma = 1;
+	}
+	if (line[last] == '[' || line[last] == '{')
+	{
+		++context->depth;
+		context->needs_comma = 0;
+	}
+}
+
+static void cgltf_write_strprop(cgltf_write_context* context, const char* label, const char* val)
+{
+	if (val)
+	{
+		cgltf_write_indent(context);
+		CGLTF_SPRINTF("\"%s\": \"%s\"", label, val);
+		context->needs_comma = 1;
+	}
+}
+
+static void cgltf_write_extras(cgltf_write_context* context, const cgltf_extras* extras)
+{
+	if (extras->data)
+	{
+		cgltf_write_indent(context);
+		CGLTF_SPRINTF("\"extras\": %s", extras->data);
+		context->needs_comma = 1;
+	}
+	else
+	{
+		cgltf_size length = extras->end_offset - extras->start_offset;
+		if (length > 0 && context->data->json)
+		{
+			char* json_string = ((char*) context->data->json) + extras->start_offset;
+			cgltf_write_indent(context);
+			CGLTF_SPRINTF("%s", "\"extras\": ");
+			CGLTF_SNPRINTF(length, "%.*s", (int)(extras->end_offset - extras->start_offset), json_string);
+			context->needs_comma = 1;
+		}
+	}
+}
+
+static void cgltf_write_stritem(cgltf_write_context* context, const char* item)
+{
+	cgltf_write_indent(context);
+	CGLTF_SPRINTF("\"%s\"", item);
+	context->needs_comma = 1;
+}
+
+static void cgltf_write_intprop(cgltf_write_context* context, const char* label, int val, int def)
+{
+	if (val != def)
+	{
+		cgltf_write_indent(context);
+		CGLTF_SPRINTF("\"%s\": %d", label, val);
+		context->needs_comma = 1;
+	}
+}
+
+static void cgltf_write_sizeprop(cgltf_write_context* context, const char* label, cgltf_size val, cgltf_size def)
+{
+	if (val != def)
+	{
+		cgltf_write_indent(context);
+		CGLTF_SPRINTF("\"%s\": %zu", label, val);
+		context->needs_comma = 1;
+	}
+}
+
+static void cgltf_write_floatprop(cgltf_write_context* context, const char* label, float val, float def)
+{
+	if (val != def)
+	{
+		cgltf_write_indent(context);
+		CGLTF_SPRINTF("\"%s\": ", label);
+		CGLTF_SPRINTF("%.*g", CGLTF_DECIMAL_DIG, val);
+		context->needs_comma = 1;
+
+		if (context->cursor)
+		{
+			char *decimal_comma = strchr(context->cursor - context->tmp, ',');
+			if (decimal_comma)
+			{
+				*decimal_comma = '.';
+			}
+		}
+	}
+}
+
+static void cgltf_write_boolprop_optional(cgltf_write_context* context, const char* label, bool val, bool def)
+{
+	if (val != def)
+	{
+		cgltf_write_indent(context);
+		CGLTF_SPRINTF("\"%s\": %s", label, val ? "true" : "false");
+		context->needs_comma = 1;
+	}
+}
+
+static void cgltf_write_floatarrayprop(cgltf_write_context* context, const char* label, const cgltf_float* vals, cgltf_size dim)
+{
+	cgltf_write_indent(context);
+	CGLTF_SPRINTF("\"%s\": [", label);
+	for (cgltf_size i = 0; i < dim; ++i)
+	{
+		if (i != 0)
+		{
+			CGLTF_SPRINTF(", %.*g", CGLTF_DECIMAL_DIG, vals[i]);
+		}
+		else
+		{
+			CGLTF_SPRINTF("%.*g", CGLTF_DECIMAL_DIG, vals[i]);
+		}
+	}
+	CGLTF_SPRINTF("]");
+	context->needs_comma = 1;
+}
+
+static bool cgltf_check_floatarray(const float* vals, int dim, float val) {
+	while (dim--)
+	{
+		if (vals[dim] != val)
+		{
+			return true;
+		}
+	}
+	return false;
+}
+
+static int cgltf_int_from_component_type(cgltf_component_type ctype)
+{
+	switch (ctype)
+	{
+		case cgltf_component_type_r_8: return 5120;
+		case cgltf_component_type_r_8u: return 5121;
+		case cgltf_component_type_r_16: return 5122;
+		case cgltf_component_type_r_16u: return 5123;
+		case cgltf_component_type_r_32u: return 5125;
+		case cgltf_component_type_r_32f: return 5126;
+		default: return 0;
+	}
+}
+
+static const char* cgltf_str_from_alpha_mode(cgltf_alpha_mode alpha_mode)
+{
+	switch (alpha_mode)
+	{
+		case cgltf_alpha_mode_mask: return "MASK";
+		case cgltf_alpha_mode_blend: return "BLEND";
+		default: return NULL;
+	}
+}
+
+static const char* cgltf_str_from_type(cgltf_type type)
+{
+	switch (type)
+	{
+		case cgltf_type_scalar: return "SCALAR";
+		case cgltf_type_vec2: return "VEC2";
+		case cgltf_type_vec3: return "VEC3";
+		case cgltf_type_vec4: return "VEC4";
+		case cgltf_type_mat2: return "MAT2";
+		case cgltf_type_mat3: return "MAT3";
+		case cgltf_type_mat4: return "MAT4";
+		default: return NULL;
+	}
+}
+
+static cgltf_size cgltf_dim_from_type(cgltf_type type)
+{
+	switch (type)
+	{
+		case cgltf_type_scalar: return 1;
+		case cgltf_type_vec2: return 2;
+		case cgltf_type_vec3: return 3;
+		case cgltf_type_vec4: return 4;
+		case cgltf_type_mat2: return 4;
+		case cgltf_type_mat3: return 9;
+		case cgltf_type_mat4: return 16;
+		default: return 0;
+	}
+}
+
+static const char* cgltf_str_from_camera_type(cgltf_camera_type camera_type)
+{
+	switch (camera_type)
+	{
+		case cgltf_camera_type_perspective: return "perspective";
+		case cgltf_camera_type_orthographic: return "orthographic";
+		default: return NULL;
+	}
+}
+
+static const char* cgltf_str_from_light_type(cgltf_light_type light_type)
+{
+	switch (light_type)
+	{
+		case cgltf_light_type_directional: return "directional";
+		case cgltf_light_type_point: return "point";
+		case cgltf_light_type_spot: return "spot";
+		default: return NULL;
+	}
+}
+
+static void cgltf_write_texture_transform(cgltf_write_context* context, const cgltf_texture_transform* transform)
+{
+	cgltf_write_line(context, "\"extensions\": {");
+	cgltf_write_line(context, "\"KHR_texture_transform\": {");
+	if (cgltf_check_floatarray(transform->offset, 2, 0.0f))
+	{
+		cgltf_write_floatarrayprop(context, "offset", transform->offset, 2);
+	}
+	cgltf_write_floatprop(context, "rotation", transform->rotation, 0.0f);
+	if (cgltf_check_floatarray(transform->scale, 2, 1.0f))
+	{
+		cgltf_write_floatarrayprop(context, "scale", transform->scale, 2);
+	}
+	if (transform->has_texcoord)
+	{
+		cgltf_write_intprop(context, "texCoord", transform->texcoord, -1);
+	}
+	cgltf_write_line(context, "}");
+	cgltf_write_line(context, "}");
+}
+
+static void cgltf_write_asset(cgltf_write_context* context, const cgltf_asset* asset)
+{
+	cgltf_write_line(context, "\"asset\": {");
+	cgltf_write_strprop(context, "copyright", asset->copyright);
+	cgltf_write_strprop(context, "generator", asset->generator);
+	cgltf_write_strprop(context, "version", asset->version);
+	cgltf_write_strprop(context, "min_version", asset->min_version);
+	cgltf_write_extras(context, &asset->extras);
+	cgltf_write_line(context, "}");
+}
+
+static void cgltf_write_primitive(cgltf_write_context* context, const cgltf_primitive* prim)
+{
+	cgltf_write_intprop(context, "mode", (int) prim->type, 4);
+	CGLTF_WRITE_IDXPROP("indices", prim->indices, context->data->accessors);
+	CGLTF_WRITE_IDXPROP("material", prim->material, context->data->materials);
+	cgltf_write_line(context, "\"attributes\": {");
+	for (cgltf_size i = 0; i < prim->attributes_count; ++i)
+	{
+		const cgltf_attribute* attr = prim->attributes + i;
+		CGLTF_WRITE_IDXPROP(attr->name, attr->data, context->data->accessors);
+	}
+	cgltf_write_line(context, "}");
+
+	if (prim->targets_count)
+	{
+		cgltf_write_line(context, "\"targets\": [");
+		for (cgltf_size i = 0; i < prim->targets_count; ++i)
+		{
+			cgltf_write_line(context, "{");
+			for (cgltf_size j = 0; j < prim->targets[i].attributes_count; ++j)
+			{
+				const cgltf_attribute* attr = prim->targets[i].attributes + j;
+				CGLTF_WRITE_IDXPROP(attr->name, attr->data, context->data->accessors);
+			}
+			cgltf_write_line(context, "}");
+		}
+		cgltf_write_line(context, "]");
+	}
+	cgltf_write_extras(context, &prim->extras);
+
+	if (prim->has_draco_mesh_compression || prim->mappings_count > 0)
+	{
+		cgltf_write_line(context, "\"extensions\": {");
+
+		if (prim->has_draco_mesh_compression)
+		{
+			context->extension_flags |= CGLTF_EXTENSION_FLAG_DRACO_MESH_COMPRESSION;
+			if (prim->attributes_count == 0 || prim->indices == 0)
+			{
+				context->required_extension_flags |= CGLTF_EXTENSION_FLAG_DRACO_MESH_COMPRESSION;				 
+			}
+
+			cgltf_write_line(context, "\"KHR_draco_mesh_compression\": {");
+			CGLTF_WRITE_IDXPROP("bufferView", prim->draco_mesh_compression.buffer_view, context->data->buffer_views);
+			cgltf_write_line(context, "\"attributes\": {");
+			for (cgltf_size i = 0; i < prim->draco_mesh_compression.attributes_count; ++i)
+			{
+				const cgltf_attribute* attr = prim->draco_mesh_compression.attributes + i;
+				CGLTF_WRITE_IDXPROP(attr->name, attr->data, context->data->accessors);
+			}
+			cgltf_write_line(context, "}");
+			cgltf_write_line(context, "}");
+		}
+
+		if (prim->mappings_count > 0)
+		{
+			context->extension_flags |= CGLTF_EXTENSION_FLAG_MATERIALS_VARIANTS;
+			cgltf_write_line(context, "\"KHR_materials_variants\": {");
+			cgltf_write_line(context, "\"mappings\": [");
+			for (cgltf_size i = 0; i < prim->mappings_count; ++i)
+			{
+				const cgltf_material_mapping* map = prim->mappings + i;
+				cgltf_write_line(context, "{");
+				CGLTF_WRITE_IDXPROP("material", map->material, context->data->materials);
+
+				cgltf_write_indent(context);
+				CGLTF_SPRINTF("\"variants\": [%d]", (int)map->variant);
+				context->needs_comma = 1;
+
+				cgltf_write_extras(context, &map->extras);
+				cgltf_write_line(context, "}");
+			}
+			cgltf_write_line(context, "]");
+			cgltf_write_line(context, "}");
+		}
+
+		cgltf_write_line(context, "}");
+	}
+}
+
+static void cgltf_write_mesh(cgltf_write_context* context, const cgltf_mesh* mesh)
+{
+	cgltf_write_line(context, "{");
+	cgltf_write_strprop(context, "name", mesh->name);
+
+	cgltf_write_line(context, "\"primitives\": [");
+	for (cgltf_size i = 0; i < mesh->primitives_count; ++i)
+	{
+		cgltf_write_line(context, "{");
+		cgltf_write_primitive(context, mesh->primitives + i);
+		cgltf_write_line(context, "}");
+	}
+	cgltf_write_line(context, "]");
+
+	if (mesh->weights_count > 0)
+	{
+		cgltf_write_floatarrayprop(context, "weights", mesh->weights, mesh->weights_count);
+	}
+
+	cgltf_write_extras(context, &mesh->extras);
+	cgltf_write_line(context, "}");
+}
+
+static void cgltf_write_buffer_view(cgltf_write_context* context, const cgltf_buffer_view* view)
+{
+	cgltf_write_line(context, "{");
+	cgltf_write_strprop(context, "name", view->name);
+	CGLTF_WRITE_IDXPROP("buffer", view->buffer, context->data->buffers);
+	cgltf_write_sizeprop(context, "byteLength", view->size, (cgltf_size)-1);
+	cgltf_write_sizeprop(context, "byteOffset", view->offset, 0);
+	cgltf_write_sizeprop(context, "byteStride", view->stride, 0);
+	// NOTE: We skip writing "target" because the spec says its usage can be inferred.
+	cgltf_write_extras(context, &view->extras);
+	cgltf_write_line(context, "}");
+}
+
+
+static void cgltf_write_buffer(cgltf_write_context* context, const cgltf_buffer* buffer)
+{
+	cgltf_write_line(context, "{");
+	cgltf_write_strprop(context, "name", buffer->name);
+	cgltf_write_strprop(context, "uri", buffer->uri);
+	cgltf_write_sizeprop(context, "byteLength", buffer->size, (cgltf_size)-1);
+	cgltf_write_extras(context, &buffer->extras);
+	cgltf_write_line(context, "}");
+}
+
+static void cgltf_write_material(cgltf_write_context* context, const cgltf_material* material)
+{
+	cgltf_write_line(context, "{");
+	cgltf_write_strprop(context, "name", material->name);
+	if (material->alpha_mode == cgltf_alpha_mode_mask)
+	{
+		cgltf_write_floatprop(context, "alphaCutoff", material->alpha_cutoff, 0.5f);
+	}
+	cgltf_write_boolprop_optional(context, "doubleSided", (bool)material->double_sided, false);
+	// cgltf_write_boolprop_optional(context, "unlit", material->unlit, false);
+
+	if (material->unlit)
+	{
+		context->extension_flags |= CGLTF_EXTENSION_FLAG_MATERIALS_UNLIT;
+	}
+
+	if (material->has_pbr_specular_glossiness)
+	{
+		context->extension_flags |= CGLTF_EXTENSION_FLAG_SPECULAR_GLOSSINESS;
+	}
+
+	if (material->has_clearcoat)
+	{
+		context->extension_flags |= CGLTF_EXTENSION_FLAG_MATERIALS_CLEARCOAT;
+	}
+
+	if (material->has_transmission)
+	{
+		context->extension_flags |= CGLTF_EXTENSION_FLAG_MATERIALS_TRANSMISSION;
+	}
+
+	if (material->has_volume)
+	{
+		context->extension_flags |= CGLTF_EXTENSION_FLAG_MATERIALS_VOLUME;
+	}
+
+	if (material->has_ior)
+	{
+		context->extension_flags |= CGLTF_EXTENSION_FLAG_MATERIALS_IOR;
+	}
+
+	if (material->has_specular)
+	{
+		context->extension_flags |= CGLTF_EXTENSION_FLAG_MATERIALS_SPECULAR;
+	}
+
+	if (material->has_sheen)
+	{
+		context->extension_flags |= CGLTF_EXTENSION_FLAG_MATERIALS_SHEEN;
+	}
+
+	if (material->has_emissive_strength)
+	{
+		context->extension_flags |= CGLTF_EXTENSION_FLAG_MATERIALS_EMISSIVE_STRENGTH;
+	}
+
+	if (material->has_iridescence)
+	{
+		context->extension_flags |= CGLTF_EXTENSION_FLAG_MATERIALS_IRIDESCENCE;
+	}
+
+	if (material->has_anisotropy)
+	{
+		context->extension_flags |= CGLTF_EXTENSION_FLAG_MATERIALS_ANISOTROPY;
+	}
+
+	if (material->has_pbr_metallic_roughness)
+	{
+		const cgltf_pbr_metallic_roughness* params = &material->pbr_metallic_roughness;
+		cgltf_write_line(context, "\"pbrMetallicRoughness\": {");
+		CGLTF_WRITE_TEXTURE_INFO("baseColorTexture", params->base_color_texture);
+		CGLTF_WRITE_TEXTURE_INFO("metallicRoughnessTexture", params->metallic_roughness_texture);
+		cgltf_write_floatprop(context, "metallicFactor", params->metallic_factor, 1.0f);
+		cgltf_write_floatprop(context, "roughnessFactor", params->roughness_factor, 1.0f);
+		if (cgltf_check_floatarray(params->base_color_factor, 4, 1.0f))
+		{
+			cgltf_write_floatarrayprop(context, "baseColorFactor", params->base_color_factor, 4);
+		}
+		cgltf_write_line(context, "}");
+	}
+
+	if (material->unlit || material->has_pbr_specular_glossiness || material->has_clearcoat || material->has_ior || material->has_specular || material->has_transmission || material->has_sheen || material->has_volume || material->has_emissive_strength || material->has_iridescence || material->has_anisotropy)
+	{
+		cgltf_write_line(context, "\"extensions\": {");
+		if (material->has_clearcoat)
+		{
+			const cgltf_clearcoat* params = &material->clearcoat;
+			cgltf_write_line(context, "\"KHR_materials_clearcoat\": {");
+			CGLTF_WRITE_TEXTURE_INFO("clearcoatTexture", params->clearcoat_texture);
+			CGLTF_WRITE_TEXTURE_INFO("clearcoatRoughnessTexture", params->clearcoat_roughness_texture);
+			CGLTF_WRITE_NORMAL_TEXTURE_INFO("clearcoatNormalTexture", params->clearcoat_normal_texture);
+			cgltf_write_floatprop(context, "clearcoatFactor", params->clearcoat_factor, 0.0f);
+			cgltf_write_floatprop(context, "clearcoatRoughnessFactor", params->clearcoat_roughness_factor, 0.0f);
+			cgltf_write_line(context, "}");
+		}
+		if (material->has_ior)
+		{
+			const cgltf_ior* params = &material->ior;
+			cgltf_write_line(context, "\"KHR_materials_ior\": {");
+			cgltf_write_floatprop(context, "ior", params->ior, 1.5f);
+			cgltf_write_line(context, "}");
+		}
+		if (material->has_specular)
+		{
+			const cgltf_specular* params = &material->specular;
+			cgltf_write_line(context, "\"KHR_materials_specular\": {");
+			CGLTF_WRITE_TEXTURE_INFO("specularTexture", params->specular_texture);
+			CGLTF_WRITE_TEXTURE_INFO("specularColorTexture", params->specular_color_texture);
+			cgltf_write_floatprop(context, "specularFactor", params->specular_factor, 1.0f);
+			if (cgltf_check_floatarray(params->specular_color_factor, 3, 1.0f))
+			{
+				cgltf_write_floatarrayprop(context, "specularColorFactor", params->specular_color_factor, 3);
+			}
+			cgltf_write_line(context, "}");
+		}
+		if (material->has_transmission)
+		{
+			const cgltf_transmission* params = &material->transmission;
+			cgltf_write_line(context, "\"KHR_materials_transmission\": {");
+			CGLTF_WRITE_TEXTURE_INFO("transmissionTexture", params->transmission_texture);
+			cgltf_write_floatprop(context, "transmissionFactor", params->transmission_factor, 0.0f);
+			cgltf_write_line(context, "}");
+		}
+		if (material->has_volume)
+		{
+			const cgltf_volume* params = &material->volume;
+			cgltf_write_line(context, "\"KHR_materials_volume\": {");
+			CGLTF_WRITE_TEXTURE_INFO("thicknessTexture", params->thickness_texture);
+			cgltf_write_floatprop(context, "thicknessFactor", params->thickness_factor, 0.0f);
+			if (cgltf_check_floatarray(params->attenuation_color, 3, 1.0f))
+			{
+				cgltf_write_floatarrayprop(context, "attenuationColor", params->attenuation_color, 3);
+			}
+			if (params->attenuation_distance < FLT_MAX) 
+			{
+				cgltf_write_floatprop(context, "attenuationDistance", params->attenuation_distance, FLT_MAX);
+			}
+			cgltf_write_line(context, "}");
+		}
+		if (material->has_sheen)
+		{
+			const cgltf_sheen* params = &material->sheen;
+			cgltf_write_line(context, "\"KHR_materials_sheen\": {");
+			CGLTF_WRITE_TEXTURE_INFO("sheenColorTexture", params->sheen_color_texture);
+			CGLTF_WRITE_TEXTURE_INFO("sheenRoughnessTexture", params->sheen_roughness_texture);
+			if (cgltf_check_floatarray(params->sheen_color_factor, 3, 0.0f))
+			{
+				cgltf_write_floatarrayprop(context, "sheenColorFactor", params->sheen_color_factor, 3);
+			}
+			cgltf_write_floatprop(context, "sheenRoughnessFactor", params->sheen_roughness_factor, 0.0f);
+			cgltf_write_line(context, "}");
+		}
+		if (material->has_pbr_specular_glossiness)
+		{
+			const cgltf_pbr_specular_glossiness* params = &material->pbr_specular_glossiness;
+			cgltf_write_line(context, "\"KHR_materials_pbrSpecularGlossiness\": {");
+			CGLTF_WRITE_TEXTURE_INFO("diffuseTexture", params->diffuse_texture);
+			CGLTF_WRITE_TEXTURE_INFO("specularGlossinessTexture", params->specular_glossiness_texture);
+			if (cgltf_check_floatarray(params->diffuse_factor, 4, 1.0f))
+			{
+				cgltf_write_floatarrayprop(context, "diffuseFactor", params->diffuse_factor, 4);
+			}
+			if (cgltf_check_floatarray(params->specular_factor, 3, 1.0f))
+			{
+				cgltf_write_floatarrayprop(context, "specularFactor", params->specular_factor, 3);
+			}
+			cgltf_write_floatprop(context, "glossinessFactor", params->glossiness_factor, 1.0f);
+			cgltf_write_line(context, "}");
+		}
+		if (material->unlit)
+		{
+			cgltf_write_line(context, "\"KHR_materials_unlit\": {}");
+		}
+		if (material->has_emissive_strength)
+		{
+			cgltf_write_line(context, "\"KHR_materials_emissive_strength\": {");
+			const cgltf_emissive_strength* params = &material->emissive_strength;
+			cgltf_write_floatprop(context, "emissiveStrength", params->emissive_strength, 1.f);
+			cgltf_write_line(context, "}");
+		}
+		if (material->has_iridescence)
+		{
+			cgltf_write_line(context, "\"KHR_materials_iridescence\": {");
+			const cgltf_iridescence* params = &material->iridescence;
+			cgltf_write_floatprop(context, "iridescenceFactor", params->iridescence_factor, 0.f);
+			CGLTF_WRITE_TEXTURE_INFO("iridescenceTexture", params->iridescence_texture);
+			cgltf_write_floatprop(context, "iridescenceIor", params->iridescence_ior, 1.3f);
+			cgltf_write_floatprop(context, "iridescenceThicknessMinimum", params->iridescence_thickness_min, 100.f);
+			cgltf_write_floatprop(context, "iridescenceThicknessMaximum", params->iridescence_thickness_max, 400.f);
+			CGLTF_WRITE_TEXTURE_INFO("iridescenceThicknessTexture", params->iridescence_thickness_texture);
+			cgltf_write_line(context, "}");
+		}
+		if (material->has_anisotropy)
+		{
+			cgltf_write_line(context, "\"KHR_materials_anisotropy\": {");
+			const cgltf_anisotropy* params = &material->anisotropy;
+			cgltf_write_floatprop(context, "anisotropyFactor", params->anisotropy_strength, 0.f);
+			cgltf_write_floatprop(context, "anisotropyRotation", params->anisotropy_rotation, 0.f);
+			CGLTF_WRITE_TEXTURE_INFO("anisotropyTexture", params->anisotropy_texture);
+			cgltf_write_line(context, "}");
+		}
+		cgltf_write_line(context, "}");
+	}
+
+	CGLTF_WRITE_NORMAL_TEXTURE_INFO("normalTexture", material->normal_texture);
+	CGLTF_WRITE_OCCLUSION_TEXTURE_INFO("occlusionTexture", material->occlusion_texture);
+	CGLTF_WRITE_TEXTURE_INFO("emissiveTexture", material->emissive_texture);
+	if (cgltf_check_floatarray(material->emissive_factor, 3, 0.0f))
+	{
+		cgltf_write_floatarrayprop(context, "emissiveFactor", material->emissive_factor, 3);
+	}
+	cgltf_write_strprop(context, "alphaMode", cgltf_str_from_alpha_mode(material->alpha_mode));
+	cgltf_write_extras(context, &material->extras);
+	cgltf_write_line(context, "}");
+}
+
+static void cgltf_write_image(cgltf_write_context* context, const cgltf_image* image)
+{
+	cgltf_write_line(context, "{");
+	cgltf_write_strprop(context, "name", image->name);
+	cgltf_write_strprop(context, "uri", image->uri);
+	CGLTF_WRITE_IDXPROP("bufferView", image->buffer_view, context->data->buffer_views);
+	cgltf_write_strprop(context, "mimeType", image->mime_type);
+	cgltf_write_extras(context, &image->extras);
+	cgltf_write_line(context, "}");
+}
+
+static void cgltf_write_texture(cgltf_write_context* context, const cgltf_texture* texture)
+{
+	cgltf_write_line(context, "{");
+	cgltf_write_strprop(context, "name", texture->name);
+	CGLTF_WRITE_IDXPROP("source", texture->image, context->data->images);
+	CGLTF_WRITE_IDXPROP("sampler", texture->sampler, context->data->samplers);
+
+	if (texture->has_basisu)
+	{
+		cgltf_write_line(context, "\"extensions\": {");
+		{
+			context->extension_flags |= CGLTF_EXTENSION_FLAG_TEXTURE_BASISU;
+			cgltf_write_line(context, "\"KHR_texture_basisu\": {");
+			CGLTF_WRITE_IDXPROP("source", texture->basisu_image, context->data->images);
+			cgltf_write_line(context, "}");
+		}
+		cgltf_write_line(context, "}");
+	}
+	cgltf_write_extras(context, &texture->extras);
+	cgltf_write_line(context, "}");
+}
+
+static void cgltf_write_skin(cgltf_write_context* context, const cgltf_skin* skin)
+{
+	cgltf_write_line(context, "{");
+	CGLTF_WRITE_IDXPROP("skeleton", skin->skeleton, context->data->nodes);
+	CGLTF_WRITE_IDXPROP("inverseBindMatrices", skin->inverse_bind_matrices, context->data->accessors);
+	CGLTF_WRITE_IDXARRPROP("joints", skin->joints_count, skin->joints, context->data->nodes);
+	cgltf_write_strprop(context, "name", skin->name);
+	cgltf_write_extras(context, &skin->extras);
+	cgltf_write_line(context, "}");
+}
+
+static const char* cgltf_write_str_path_type(cgltf_animation_path_type path_type)
+{
+	switch (path_type)
+	{
+	case cgltf_animation_path_type_translation:
+		return "translation";
+	case cgltf_animation_path_type_rotation:
+		return "rotation";
+	case cgltf_animation_path_type_scale:
+		return "scale";
+	case cgltf_animation_path_type_weights:
+		return "weights";
+	default:
+		break;
+	}
+	return "invalid";
+}
+
+static const char* cgltf_write_str_interpolation_type(cgltf_interpolation_type interpolation_type)
+{
+	switch (interpolation_type)
+	{
+	case cgltf_interpolation_type_linear:
+		return "LINEAR";
+	case cgltf_interpolation_type_step:
+		return "STEP";
+	case cgltf_interpolation_type_cubic_spline:
+		return "CUBICSPLINE";
+	default:
+		break;
+	}
+	return "invalid";
+}
+
+static void cgltf_write_path_type(cgltf_write_context* context, const char *label, cgltf_animation_path_type path_type)
+{
+	cgltf_write_strprop(context, label, cgltf_write_str_path_type(path_type));
+}
+
+static void cgltf_write_interpolation_type(cgltf_write_context* context, const char *label, cgltf_interpolation_type interpolation_type)
+{
+	cgltf_write_strprop(context, label, cgltf_write_str_interpolation_type(interpolation_type));
+}
+
+static void cgltf_write_animation_sampler(cgltf_write_context* context, const cgltf_animation_sampler* animation_sampler)
+{
+	cgltf_write_line(context, "{");
+	cgltf_write_interpolation_type(context, "interpolation", animation_sampler->interpolation);
+	CGLTF_WRITE_IDXPROP("input", animation_sampler->input, context->data->accessors);
+	CGLTF_WRITE_IDXPROP("output", animation_sampler->output, context->data->accessors);
+	cgltf_write_extras(context, &animation_sampler->extras);
+	cgltf_write_line(context, "}");
+}
+
+static void cgltf_write_animation_channel(cgltf_write_context* context, const cgltf_animation* animation, const cgltf_animation_channel* animation_channel)
+{
+	cgltf_write_line(context, "{");
+	CGLTF_WRITE_IDXPROP("sampler", animation_channel->sampler, animation->samplers);
+	cgltf_write_line(context, "\"target\": {");
+	CGLTF_WRITE_IDXPROP("node", animation_channel->target_node, context->data->nodes);
+	cgltf_write_path_type(context, "path", animation_channel->target_path);
+	cgltf_write_line(context, "}");
+	cgltf_write_extras(context, &animation_channel->extras);
+	cgltf_write_line(context, "}");
+}
+
+static void cgltf_write_animation(cgltf_write_context* context, const cgltf_animation* animation)
+{
+	cgltf_write_line(context, "{");
+	cgltf_write_strprop(context, "name", animation->name);
+
+	if (animation->samplers_count > 0)
+	{
+		cgltf_write_line(context, "\"samplers\": [");
+		for (cgltf_size i = 0; i < animation->samplers_count; ++i)
+		{
+			cgltf_write_animation_sampler(context, animation->samplers + i);
+		}
+		cgltf_write_line(context, "]");
+	}
+	if (animation->channels_count > 0)
+	{
+		cgltf_write_line(context, "\"channels\": [");
+		for (cgltf_size i = 0; i < animation->channels_count; ++i)
+		{
+			cgltf_write_animation_channel(context, animation, animation->channels + i);
+		}
+		cgltf_write_line(context, "]");
+	}
+	cgltf_write_extras(context, &animation->extras);
+	cgltf_write_line(context, "}");
+}
+
+static void cgltf_write_sampler(cgltf_write_context* context, const cgltf_sampler* sampler)
+{
+	cgltf_write_line(context, "{");
+	cgltf_write_strprop(context, "name", sampler->name);
+	cgltf_write_intprop(context, "magFilter", sampler->mag_filter, 0);
+	cgltf_write_intprop(context, "minFilter", sampler->min_filter, 0);
+	cgltf_write_intprop(context, "wrapS", sampler->wrap_s, 10497);
+	cgltf_write_intprop(context, "wrapT", sampler->wrap_t, 10497);
+	cgltf_write_extras(context, &sampler->extras);
+	cgltf_write_line(context, "}");
+}
+
+static void cgltf_write_node(cgltf_write_context* context, const cgltf_node* node)
+{
+	cgltf_write_line(context, "{");
+	CGLTF_WRITE_IDXARRPROP("children", node->children_count, node->children, context->data->nodes);
+	CGLTF_WRITE_IDXPROP("mesh", node->mesh, context->data->meshes);
+	cgltf_write_strprop(context, "name", node->name);
+	if (node->has_matrix)
+	{
+		cgltf_write_floatarrayprop(context, "matrix", node->matrix, 16);
+	}
+	if (node->has_translation)
+	{
+		cgltf_write_floatarrayprop(context, "translation", node->translation, 3);
+	}
+	if (node->has_rotation)
+	{
+		cgltf_write_floatarrayprop(context, "rotation", node->rotation, 4);
+	}
+	if (node->has_scale)
+	{
+		cgltf_write_floatarrayprop(context, "scale", node->scale, 3);
+	}
+	if (node->skin)
+	{
+		CGLTF_WRITE_IDXPROP("skin", node->skin, context->data->skins);
+	}
+
+	bool has_extension = node->light || (node->has_mesh_gpu_instancing && node->mesh_gpu_instancing.attributes_count > 0);
+	if(has_extension)
+		cgltf_write_line(context, "\"extensions\": {");
+
+	if (node->light)
+	{
+		context->extension_flags |= CGLTF_EXTENSION_FLAG_LIGHTS_PUNCTUAL;
+		cgltf_write_line(context, "\"KHR_lights_punctual\": {");
+		CGLTF_WRITE_IDXPROP("light", node->light, context->data->lights);
+		cgltf_write_line(context, "}");
+	}
+
+	if (node->has_mesh_gpu_instancing && node->mesh_gpu_instancing.attributes_count > 0)
+	{
+		context->extension_flags |= CGLTF_EXTENSION_FLAG_MESH_GPU_INSTANCING;
+		context->required_extension_flags |= CGLTF_EXTENSION_FLAG_MESH_GPU_INSTANCING;
+
+		cgltf_write_line(context, "\"EXT_mesh_gpu_instancing\": {");
+		{
+			cgltf_write_line(context, "\"attributes\": {");
+			{
+				for (cgltf_size i = 0; i < node->mesh_gpu_instancing.attributes_count; ++i)
+				{
+					const cgltf_attribute* attr = node->mesh_gpu_instancing.attributes + i;
+					CGLTF_WRITE_IDXPROP(attr->name, attr->data, context->data->accessors);
+				}
+			}
+			cgltf_write_line(context, "}");
+		}
+		cgltf_write_line(context, "}");
+	}
+
+	if (has_extension)
+		cgltf_write_line(context, "}");
+
+	if (node->weights_count > 0)
+	{
+		cgltf_write_floatarrayprop(context, "weights", node->weights, node->weights_count);
+	}
+
+	if (node->camera)
+	{
+		CGLTF_WRITE_IDXPROP("camera", node->camera, context->data->cameras);
+	}
+
+	cgltf_write_extras(context, &node->extras);
+	cgltf_write_line(context, "}");
+}
+
+static void cgltf_write_scene(cgltf_write_context* context, const cgltf_scene* scene)
+{
+	cgltf_write_line(context, "{");
+	cgltf_write_strprop(context, "name", scene->name);
+	CGLTF_WRITE_IDXARRPROP("nodes", scene->nodes_count, scene->nodes, context->data->nodes);
+	cgltf_write_extras(context, &scene->extras);
+	cgltf_write_line(context, "}");
+}
+
+static void cgltf_write_accessor(cgltf_write_context* context, const cgltf_accessor* accessor)
+{
+	cgltf_write_line(context, "{");
+	cgltf_write_strprop(context, "name", accessor->name);
+	CGLTF_WRITE_IDXPROP("bufferView", accessor->buffer_view, context->data->buffer_views);
+	cgltf_write_intprop(context, "componentType", cgltf_int_from_component_type(accessor->component_type), 0);
+	cgltf_write_strprop(context, "type", cgltf_str_from_type(accessor->type));
+	cgltf_size dim = cgltf_dim_from_type(accessor->type);
+	cgltf_write_boolprop_optional(context, "normalized", (bool)accessor->normalized, false);
+	cgltf_write_sizeprop(context, "byteOffset", (int)accessor->offset, 0);
+	cgltf_write_intprop(context, "count", (int)accessor->count, -1);
+	if (accessor->has_min)
+	{
+		cgltf_write_floatarrayprop(context, "min", accessor->min, dim);
+	}
+	if (accessor->has_max)
+	{
+		cgltf_write_floatarrayprop(context, "max", accessor->max, dim);
+	}
+	if (accessor->is_sparse)
+	{
+		cgltf_write_line(context, "\"sparse\": {");
+		cgltf_write_intprop(context, "count", (int)accessor->sparse.count, 0);
+		cgltf_write_line(context, "\"indices\": {");
+		cgltf_write_sizeprop(context, "byteOffset", (int)accessor->sparse.indices_byte_offset, 0);
+		CGLTF_WRITE_IDXPROP("bufferView", accessor->sparse.indices_buffer_view, context->data->buffer_views);
+		cgltf_write_intprop(context, "componentType", cgltf_int_from_component_type(accessor->sparse.indices_component_type), 0);
+		cgltf_write_extras(context, &accessor->sparse.indices_extras);
+		cgltf_write_line(context, "}");
+		cgltf_write_line(context, "\"values\": {");
+		cgltf_write_sizeprop(context, "byteOffset", (int)accessor->sparse.values_byte_offset, 0);
+		CGLTF_WRITE_IDXPROP("bufferView", accessor->sparse.values_buffer_view, context->data->buffer_views);
+		cgltf_write_extras(context, &accessor->sparse.values_extras);
+		cgltf_write_line(context, "}");
+		cgltf_write_extras(context, &accessor->sparse.extras);
+		cgltf_write_line(context, "}");
+	}
+	cgltf_write_extras(context, &accessor->extras);
+	cgltf_write_line(context, "}");
+}
+
+static void cgltf_write_camera(cgltf_write_context* context, const cgltf_camera* camera)
+{
+	cgltf_write_line(context, "{");
+	cgltf_write_strprop(context, "type", cgltf_str_from_camera_type(camera->type));
+	if (camera->name)
+	{
+		cgltf_write_strprop(context, "name", camera->name);
+	}
+
+	if (camera->type == cgltf_camera_type_orthographic)
+	{
+		cgltf_write_line(context, "\"orthographic\": {");
+		cgltf_write_floatprop(context, "xmag", camera->data.orthographic.xmag, -1.0f);
+		cgltf_write_floatprop(context, "ymag", camera->data.orthographic.ymag, -1.0f);
+		cgltf_write_floatprop(context, "zfar", camera->data.orthographic.zfar, -1.0f);
+		cgltf_write_floatprop(context, "znear", camera->data.orthographic.znear, -1.0f);
+		cgltf_write_extras(context, &camera->data.orthographic.extras);
+		cgltf_write_line(context, "}");
+	}
+	else if (camera->type == cgltf_camera_type_perspective)
+	{
+		cgltf_write_line(context, "\"perspective\": {");
+
+		if (camera->data.perspective.has_aspect_ratio) {
+			cgltf_write_floatprop(context, "aspectRatio", camera->data.perspective.aspect_ratio, -1.0f);
+		}
+
+		cgltf_write_floatprop(context, "yfov", camera->data.perspective.yfov, -1.0f);
+
+		if (camera->data.perspective.has_zfar) {
+			cgltf_write_floatprop(context, "zfar", camera->data.perspective.zfar, -1.0f);
+		}
+
+		cgltf_write_floatprop(context, "znear", camera->data.perspective.znear, -1.0f);
+		cgltf_write_extras(context, &camera->data.perspective.extras);
+		cgltf_write_line(context, "}");
+	}
+	cgltf_write_extras(context, &camera->extras);
+	cgltf_write_line(context, "}");
+}
+
+static void cgltf_write_light(cgltf_write_context* context, const cgltf_light* light)
+{
+	context->extension_flags |= CGLTF_EXTENSION_FLAG_LIGHTS_PUNCTUAL;
+
+	cgltf_write_line(context, "{");
+	cgltf_write_strprop(context, "type", cgltf_str_from_light_type(light->type));
+	if (light->name)
+	{
+		cgltf_write_strprop(context, "name", light->name);
+	}
+	if (cgltf_check_floatarray(light->color, 3, 1.0f))
+	{
+		cgltf_write_floatarrayprop(context, "color", light->color, 3);
+	}
+	cgltf_write_floatprop(context, "intensity", light->intensity, 1.0f);
+	cgltf_write_floatprop(context, "range", light->range, 0.0f);
+
+	if (light->type == cgltf_light_type_spot)
+	{
+		cgltf_write_line(context, "\"spot\": {");
+		cgltf_write_floatprop(context, "innerConeAngle", light->spot_inner_cone_angle, 0.0f);
+		cgltf_write_floatprop(context, "outerConeAngle", light->spot_outer_cone_angle, 3.14159265358979323846f/4.0f);
+		cgltf_write_line(context, "}");
+	}
+	cgltf_write_extras( context, &light->extras );
+	cgltf_write_line(context, "}");
+}
+
+static void cgltf_write_variant(cgltf_write_context* context, const cgltf_material_variant* variant)
+{
+	context->extension_flags |= CGLTF_EXTENSION_FLAG_MATERIALS_VARIANTS;
+
+	cgltf_write_line(context, "{");
+	cgltf_write_strprop(context, "name", variant->name);
+	cgltf_write_extras(context, &variant->extras);
+	cgltf_write_line(context, "}");
+}
+
+static void cgltf_write_glb(FILE* file, const void* json_buf, const cgltf_size json_size, const void* bin_buf, const cgltf_size bin_size)
+{
+	char header[GlbHeaderSize];
+	char chunk_header[GlbChunkHeaderSize];
+	char json_pad[3] = { 0x20, 0x20, 0x20 };
+	char bin_pad[3] = { 0, 0, 0 };
+
+	cgltf_size json_padsize = (json_size % 4 != 0) ? 4 - json_size % 4 : 0;
+	cgltf_size bin_padsize = (bin_size % 4 != 0) ? 4 - bin_size % 4 : 0;
+	cgltf_size total_size = GlbHeaderSize + GlbChunkHeaderSize + json_size + json_padsize;
+	if (bin_buf != NULL && bin_size > 0) {
+		total_size += GlbChunkHeaderSize + bin_size + bin_padsize;
+	}
+
+	// Write a GLB header
+	memcpy(header, &GlbMagic, 4);
+	memcpy(header + 4, &GlbVersion, 4);
+	memcpy(header + 8, &total_size, 4);
+	fwrite(header, 1, GlbHeaderSize, file);
+
+	// Write a JSON chunk (header & data)
+	uint32_t json_chunk_size = (uint32_t)(json_size + json_padsize);
+	memcpy(chunk_header, &json_chunk_size, 4);
+	memcpy(chunk_header + 4, &GlbMagicJsonChunk, 4);
+	fwrite(chunk_header, 1, GlbChunkHeaderSize, file);
+
+	fwrite(json_buf, 1, json_size, file);
+	fwrite(json_pad, 1, json_padsize, file);
+
+	if (bin_buf != NULL && bin_size > 0) {
+		// Write a binary chunk (header & data)
+		uint32_t bin_chunk_size = (uint32_t)(bin_size + bin_padsize);
+		memcpy(chunk_header, &bin_chunk_size, 4);
+		memcpy(chunk_header + 4, &GlbMagicBinChunk, 4);
+		fwrite(chunk_header, 1, GlbChunkHeaderSize, file);
+
+		fwrite(bin_buf, 1, bin_size, file);
+		fwrite(bin_pad, 1, bin_padsize, file);
+	}
+}
+
+cgltf_result cgltf_write_file(const cgltf_options* options, const char* path, const cgltf_data* data)
+{
+	cgltf_size expected = cgltf_write(options, NULL, 0, data);
+	char* buffer = (char*) malloc(expected);
+	cgltf_size actual = cgltf_write(options, buffer, expected, data);
+	if (expected != actual) {
+		fprintf(stderr, "Error: expected %zu bytes but wrote %zu bytes.\n", expected, actual);
+	}
+	FILE* file = fopen(path, "wb");
+	if (!file)
+	{
+		return cgltf_result_file_not_found;
+	}
+	// Note that cgltf_write() includes a null terminator, which we omit from the file content.
+	if (options->type == cgltf_file_type_glb) {
+		cgltf_write_glb(file, buffer, actual - 1, data->bin, data->bin_size);
+	} else {
+		// Write a plain JSON file.
+		fwrite(buffer, actual - 1, 1, file);
+	}
+	fclose(file);
+	free(buffer);
+	return cgltf_result_success;
+}
+
+static void cgltf_write_extensions(cgltf_write_context* context, uint32_t extension_flags)
+{
+	if (extension_flags & CGLTF_EXTENSION_FLAG_TEXTURE_TRANSFORM) {
+		cgltf_write_stritem(context, "KHR_texture_transform");
+	}
+	if (extension_flags & CGLTF_EXTENSION_FLAG_MATERIALS_UNLIT) {
+		cgltf_write_stritem(context, "KHR_materials_unlit");
+	}
+	if (extension_flags & CGLTF_EXTENSION_FLAG_SPECULAR_GLOSSINESS) {
+		cgltf_write_stritem(context, "KHR_materials_pbrSpecularGlossiness");
+	}
+	if (extension_flags & CGLTF_EXTENSION_FLAG_LIGHTS_PUNCTUAL) {
+		cgltf_write_stritem(context, "KHR_lights_punctual");
+	}
+	if (extension_flags & CGLTF_EXTENSION_FLAG_DRACO_MESH_COMPRESSION) {
+		cgltf_write_stritem(context, "KHR_draco_mesh_compression");
+	}
+	if (extension_flags & CGLTF_EXTENSION_FLAG_MATERIALS_CLEARCOAT) {
+		cgltf_write_stritem(context, "KHR_materials_clearcoat");
+	}
+	if (extension_flags & CGLTF_EXTENSION_FLAG_MATERIALS_IOR) {
+		cgltf_write_stritem(context, "KHR_materials_ior");
+	}
+	if (extension_flags & CGLTF_EXTENSION_FLAG_MATERIALS_SPECULAR) {
+		cgltf_write_stritem(context, "KHR_materials_specular");
+	}
+	if (extension_flags & CGLTF_EXTENSION_FLAG_MATERIALS_TRANSMISSION) {
+		cgltf_write_stritem(context, "KHR_materials_transmission");
+	}
+	if (extension_flags & CGLTF_EXTENSION_FLAG_MATERIALS_SHEEN) {
+		cgltf_write_stritem(context, "KHR_materials_sheen");
+	}
+	if (extension_flags & CGLTF_EXTENSION_FLAG_MATERIALS_VARIANTS) {
+		cgltf_write_stritem(context, "KHR_materials_variants");
+	}
+	if (extension_flags & CGLTF_EXTENSION_FLAG_MATERIALS_VOLUME) {
+		cgltf_write_stritem(context, "KHR_materials_volume");
+	}
+	if (extension_flags & CGLTF_EXTENSION_FLAG_TEXTURE_BASISU) {
+		cgltf_write_stritem(context, "KHR_texture_basisu");
+	}
+	if (extension_flags & CGLTF_EXTENSION_FLAG_MATERIALS_EMISSIVE_STRENGTH) {
+		cgltf_write_stritem(context, "KHR_materials_emissive_strength");
+	}
+	if (extension_flags & CGLTF_EXTENSION_FLAG_MATERIALS_IRIDESCENCE) {
+		cgltf_write_stritem(context, "KHR_materials_iridescence");
+	}
+	if (extension_flags & CGLTF_EXTENSION_FLAG_MATERIALS_ANISOTROPY) {
+		cgltf_write_stritem(context, "KHR_materials_anisotropy");
+	}
+	if (extension_flags & CGLTF_EXTENSION_FLAG_MESH_GPU_INSTANCING) {
+		cgltf_write_stritem(context, "EXT_mesh_gpu_instancing");
+	}
+}
+
+cgltf_size cgltf_write(const cgltf_options* options, char* buffer, cgltf_size size, const cgltf_data* data)
+{
+	(void)options;
+	cgltf_write_context ctx;
+	ctx.buffer = buffer;
+	ctx.buffer_size = size;
+	ctx.remaining = size;
+	ctx.cursor = buffer;
+	ctx.chars_written = 0;
+	ctx.data = data;
+	ctx.depth = 1;
+	ctx.indent = "  ";
+	ctx.needs_comma = 0;
+	ctx.extension_flags = 0;
+	ctx.required_extension_flags = 0;
+
+	cgltf_write_context* context = &ctx;
+
+	CGLTF_SPRINTF("{");
+
+	if (data->accessors_count > 0)
+	{
+		cgltf_write_line(context, "\"accessors\": [");
+		for (cgltf_size i = 0; i < data->accessors_count; ++i)
+		{
+			cgltf_write_accessor(context, data->accessors + i);
+		}
+		cgltf_write_line(context, "]");
+	}
+
+	cgltf_write_asset(context, &data->asset);
+
+	if (data->buffer_views_count > 0)
+	{
+		cgltf_write_line(context, "\"bufferViews\": [");
+		for (cgltf_size i = 0; i < data->buffer_views_count; ++i)
+		{
+			cgltf_write_buffer_view(context, data->buffer_views + i);
+		}
+		cgltf_write_line(context, "]");
+	}
+
+	if (data->buffers_count > 0)
+	{
+		cgltf_write_line(context, "\"buffers\": [");
+		for (cgltf_size i = 0; i < data->buffers_count; ++i)
+		{
+			cgltf_write_buffer(context, data->buffers + i);
+		}
+		cgltf_write_line(context, "]");
+	}
+
+	if (data->images_count > 0)
+	{
+		cgltf_write_line(context, "\"images\": [");
+		for (cgltf_size i = 0; i < data->images_count; ++i)
+		{
+			cgltf_write_image(context, data->images + i);
+		}
+		cgltf_write_line(context, "]");
+	}
+
+	if (data->meshes_count > 0)
+	{
+		cgltf_write_line(context, "\"meshes\": [");
+		for (cgltf_size i = 0; i < data->meshes_count; ++i)
+		{
+			cgltf_write_mesh(context, data->meshes + i);
+		}
+		cgltf_write_line(context, "]");
+	}
+
+	if (data->materials_count > 0)
+	{
+		cgltf_write_line(context, "\"materials\": [");
+		for (cgltf_size i = 0; i < data->materials_count; ++i)
+		{
+			cgltf_write_material(context, data->materials + i);
+		}
+		cgltf_write_line(context, "]");
+	}
+
+	if (data->nodes_count > 0)
+	{
+		cgltf_write_line(context, "\"nodes\": [");
+		for (cgltf_size i = 0; i < data->nodes_count; ++i)
+		{
+			cgltf_write_node(context, data->nodes + i);
+		}
+		cgltf_write_line(context, "]");
+	}
+
+	if (data->samplers_count > 0)
+	{
+		cgltf_write_line(context, "\"samplers\": [");
+		for (cgltf_size i = 0; i < data->samplers_count; ++i)
+		{
+			cgltf_write_sampler(context, data->samplers + i);
+		}
+		cgltf_write_line(context, "]");
+	}
+
+	CGLTF_WRITE_IDXPROP("scene", data->scene, data->scenes);
+
+	if (data->scenes_count > 0)
+	{
+		cgltf_write_line(context, "\"scenes\": [");
+		for (cgltf_size i = 0; i < data->scenes_count; ++i)
+		{
+			cgltf_write_scene(context, data->scenes + i);
+		}
+		cgltf_write_line(context, "]");
+	}
+
+	if (data->textures_count > 0)
+	{
+		cgltf_write_line(context, "\"textures\": [");
+		for (cgltf_size i = 0; i < data->textures_count; ++i)
+		{
+			cgltf_write_texture(context, data->textures + i);
+		}
+		cgltf_write_line(context, "]");
+	}
+
+	if (data->skins_count > 0)
+	{
+		cgltf_write_line(context, "\"skins\": [");
+		for (cgltf_size i = 0; i < data->skins_count; ++i)
+		{
+			cgltf_write_skin(context, data->skins + i);
+		}
+		cgltf_write_line(context, "]");
+	}
+
+	if (data->animations_count > 0)
+	{
+		cgltf_write_line(context, "\"animations\": [");
+		for (cgltf_size i = 0; i < data->animations_count; ++i)
+		{
+			cgltf_write_animation(context, data->animations + i);
+		}
+		cgltf_write_line(context, "]");
+	}
+
+	if (data->cameras_count > 0)
+	{
+		cgltf_write_line(context, "\"cameras\": [");
+		for (cgltf_size i = 0; i < data->cameras_count; ++i)
+		{
+			cgltf_write_camera(context, data->cameras + i);
+		}
+		cgltf_write_line(context, "]");
+	}
+
+	if (data->lights_count > 0 || data->variants_count > 0)
+	{
+		cgltf_write_line(context, "\"extensions\": {");
+
+		if (data->lights_count > 0)
+		{
+			cgltf_write_line(context, "\"KHR_lights_punctual\": {");
+			cgltf_write_line(context, "\"lights\": [");
+			for (cgltf_size i = 0; i < data->lights_count; ++i)
+			{
+				cgltf_write_light(context, data->lights + i);
+			}
+			cgltf_write_line(context, "]");
+			cgltf_write_line(context, "}");
+		}
+
+		if (data->variants_count)
+		{
+			cgltf_write_line(context, "\"KHR_materials_variants\": {");
+			cgltf_write_line(context, "\"variants\": [");
+			for (cgltf_size i = 0; i < data->variants_count; ++i)
+			{
+				cgltf_write_variant(context, data->variants + i);
+			}
+			cgltf_write_line(context, "]");
+			cgltf_write_line(context, "}");
+		}
+
+		cgltf_write_line(context, "}");
+	}
+
+	if (context->extension_flags != 0)
+	{
+		cgltf_write_line(context, "\"extensionsUsed\": [");
+		cgltf_write_extensions(context, context->extension_flags);
+		cgltf_write_line(context, "]");
+	}
+
+	if (context->required_extension_flags != 0)
+	{
+		cgltf_write_line(context, "\"extensionsRequired\": [");
+		cgltf_write_extensions(context, context->required_extension_flags);
+		cgltf_write_line(context, "]");
+	}
+
+	cgltf_write_extras(context, &data->extras);
+
+	CGLTF_SPRINTF("\n}\n");
+
+	// snprintf does not include the null terminator in its return value, so be sure to include it
+	// in the returned byte count.
+	return 1 + ctx.chars_written;
+}
+
+#endif /* #ifdef CGLTF_WRITE_IMPLEMENTATION */
+
+/* cgltf is distributed under MIT license:
+ *
+ * Copyright (c) 2019-2021 Philip Rideout
+
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */

From ea45d8cce56cd8d1ecce4bdeb5979faf11a7aa2f Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 18 Feb 2024 11:23:10 -0800
Subject: [PATCH 573/901] kram - small cleanup

---
 libkram/etc2comp/EtcImage.cpp         | 1400 ++++++++++++-------------
 libkram/json11/json11.cpp             |   11 +
 libkram/kram/BlockedLinearAllocator.h |    2 +-
 libkram/kram/ImmutableString.h        |    2 +-
 libkram/kram/KramConfig.h             |    2 +-
 libkram/kram/TaskSystem.h             |    4 +-
 6 files changed, 716 insertions(+), 705 deletions(-)

diff --git a/libkram/etc2comp/EtcImage.cpp b/libkram/etc2comp/EtcImage.cpp
index d555bc10..f52f2e18 100644
--- a/libkram/etc2comp/EtcImage.cpp
+++ b/libkram/etc2comp/EtcImage.cpp
@@ -1,700 +1,700 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
-EtcImage.cpp
-
-Image is an array of 4x4 blocks that represent the encoding of the source image
-
-*/
-
-
-#include "EtcConfig.h"
-
-// is this needed?
-//#if ETC_WINDOWS
-//#include <windows.h>
-//#endif
-
-
-#include "EtcImage.h"
-
-#include "EtcBlock4x4.h"
-#include "EtcBlock4x4EncodingBits.h"
-
-#include "EtcBlock4x4Encoding_R11.h"
-#include "EtcBlock4x4Encoding_RG11.h"
-
-#include <stdlib.h>
-//#include <algorithm>
-#include <ctime>
-#include <chrono>
-//#include <future>
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-//#include <vector>
-
-#define ETCCOMP_MIN_EFFORT_LEVEL (0.0f)
-#define ETCCOMP_DEFAULT_EFFORT_LEVEL (40.0f)
-#define ETCCOMP_MAX_EFFORT_LEVEL (100.0f)
-
-// C++14 implement reverse iteration adaptor
-// https://stackoverflow.com/questions/8542591/c11-reverse-range-based-for-loop
-//template <typename T>
-//struct reverseIterator { T& iterable; };
-//
-//template <typename T>
-//auto begin (reverseIterator<T> w) { return std::rbegin(w.iterable); }
-//
-//template <typename T>
-//auto end (reverseIterator<T> w) { return std::rend(w.iterable); }
-//
-//template <typename T>
-//reverseIterator<T> reverse (T&& iterable) { return { iterable }; }
-
-namespace Etc
-{
-	// ----------------------------------------------------------------------------------------------------
-	// constructor using source image
-	Image::Image(Format a_format, const ColorR8G8B8A8 *a_pafSourceRGBA, unsigned int a_uiSourceWidth,
-					unsigned int a_uiSourceHeight, 
-					ErrorMetric a_errormetric)
-	{
-		m_encodingStatus = EncodingStatus::SUCCESS;
-		m_uiSourceWidth = a_uiSourceWidth;
-		m_uiSourceHeight = a_uiSourceHeight;
-
-        int uiExtendedWidth = CalcExtendedDimension((unsigned short)m_uiSourceWidth);
-        int uiExtendedHeight = CalcExtendedDimension((unsigned short)m_uiSourceHeight);
-
-		m_uiBlockColumns = uiExtendedWidth >> 2;
-		m_uiBlockRows = uiExtendedHeight >> 2;
-
-        m_format = a_format;
-
-        m_encodingbitsformat = DetermineEncodingBitsFormat(m_format);
-        int blockSize = Block4x4EncodingBits::GetBytesPerBlock(m_encodingbitsformat);
-        m_uiEncodingBitsBytes = GetNumberOfBlocks() * blockSize;
-        
-		m_paucEncodingBits = nullptr;
-
-		m_errormetric = a_errormetric;
-		m_fEffort = 0.0f;
-
-		m_iEncodeTime_ms = 0;
-
-		m_bVerboseOutput = false;
-        
-        // this can be nullptr
-        m_pafrgbaSource = a_pafSourceRGBA;
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-	Image::~Image(void)
-	{
-	}
-
-    Image::EncodingStatus Image::EncodeSinglepass(float a_fEffort, uint8_t* outputTexture)
-    {
-        m_encodingStatus = EncodingStatus::SUCCESS;
-        m_fEffort = a_fEffort;
-        
-        // alias the output etxture
-        m_paucEncodingBits = outputTexture;
-        
-        //--------------------------
-        // walk the src image as 4x4 blocks, and complete each block and output it
-        int blockSize = Block4x4EncodingBits::GetBytesPerBlock(m_encodingbitsformat);
-        uint8_t *outputBlock = outputTexture;
-        int totalIterations = 0;
-        
-        switch(m_format) {
-            case Image::Format::R11:
-            case Image::Format::SIGNED_R11:
-            case Image::Format::RG11:
-            case Image::Format::SIGNED_RG11:
-            {
-                bool isSnorm =
-                    m_format == Image::Format::SIGNED_R11 ||
-                    m_format == Image::Format::SIGNED_RG11;
-                bool isR =
-                    m_format == Image::Format::R11 ||
-                    m_format == Image::Format::SIGNED_R11;
-                
-                IBlockEncoding* encoder;
-                if (isR)
-                    encoder = new Block4x4Encoding_R11;
-                else
-                    encoder = new Block4x4Encoding_RG11;
-                
-                ColorFloatRGBA sourcePixels[16];
-                
-                for (int y = 0; y < (int)m_uiBlockRows; y++)
-                {
-                    int srcY = y * 4;
-                    
-                    for (int x = 0; x < (int)m_uiBlockColumns; x++)
-                    {
-                        int srcX = x * 4;
-
-                        // now pull all pixels for the block, this clamps to edge
-                        // NOTE: must convert from image horizontal scan to block vertical scan
-                        
-                        int uiPixel = 0;
-                    
-                        for (int xx = 0; xx < 4; xx++)
-                        {
-                            int srcXX = srcX + xx;
-
-                            for (int yy = 0; yy < 4; yy++)
-                            {
-                                int srcYY = srcY + yy;
-
-                                ColorFloatRGBA sourcePixel = this->GetSourcePixel(srcXX, srcYY);
-                                sourcePixels[uiPixel++] = sourcePixel;
-                            }
-                        }
-                        
-                        // encode that block in as many iterations as it takes to finish
-                        encoder->Encode(&sourcePixels[0].fR, outputBlock, isSnorm);
-                        
-                        // TODO: consider iterating red until done, then green for cache reasons
-                        while (!encoder->IsDone())
-                        {
-                            // iterate on lowest error until block is done, or quality iterations reached
-                            encoder->PerformIteration(m_fEffort);
-                            totalIterations++;
-                            
-                            // only do the first iteration
-                            if (m_fEffort == 0.0) {
-                                break;
-                            }
-                        }
-                        
-                        // store to etc block
-                        encoder->SetEncodingBits();
-                        
-                        outputBlock += blockSize;
-                    }
-                }
-                
-                // this encoder isn't created/held by a block, so must be deleted
-                delete encoder;
-                
-                break;
-            }
-            default:
-            {
-                // Handle all the rgb/rgba formats which are much more involved
-                
-                Block4x4 block;
-                Block4x4Encoding* encoder = nullptr;
-                
-                for (int y = 0; y < (int)m_uiBlockRows; y++)
-                {
-                    int srcY = y * 4;
-                    
-                    for (int x = 0; x < (int)m_uiBlockColumns; x++)
-                    {
-                        int srcX = x * 4;
-
-                        // this block copies out a 4x4 tile from the source image
-                        block.Encode(this, srcX, srcY, outputBlock);
-
-                        // this encoder is allodated in first encode, then used for all blocks
-                        if (!encoder)
-                        {
-                            encoder = block.GetEncoding();
-                        }
-                        
-                        while (!encoder->IsDone())
-                        {
-                            // repeat until block is done, then store data
-                            encoder->PerformIteration(m_fEffort);
-                            totalIterations++;
-                            
-                            // only do the first iteration
-                            if (m_fEffort == 0.0) {
-                                break;
-                            }
-                        }
-                        
-                        // convert to etc block bits
-                        encoder->SetEncodingBits();
-                        
-                        outputBlock += blockSize;
-                    }
-                }
-                break;
-            }
-        }
-        if (m_bVerboseOutput)
-        {
-            KLOGI("EtcComp", "Total iterations %d\n", totalIterations);
-        }
-        
-        // block deletes the encoding, so don't delete here
-        
-        return m_encodingStatus;
-    }
-
-	// ----------------------------------------------------------------------------------------------------
-	Image::EncodingStatus Image::Encode(float blockPercent,
-                                        float a_fEffort,
-                                        uint8_t* outputTexture)
-	{
-
-		auto start = std::chrono::steady_clock::now();
-        int blockSize = Block4x4EncodingBits::GetBytesPerBlock(m_encodingbitsformat);
-        
-        m_fEffort = a_fEffort;
-        
-        // alias the output etxture
-        m_paucEncodingBits = outputTexture;
-        
-        using namespace NAMESPACE_STL;
-        
-        struct SortedBlock
-        {
-            //uint8_t lastIteration = 0;
-            uint16_t srcX = 0, srcY = 0;
-            uint16_t iterationData = 0;
-            
-            float error = FLT_MAX;
-            
-            // this must match sort operator below
-            bool operator>(const SortedBlock& rhs) const
-            {
-                return error > rhs.error;
-            }
-        };
-        
-        int totalIterations = 0;
-        int numberOfBlocks = GetNumberOfBlocks();
-        
-        vector<SortedBlock> sortedBlocks;
-        sortedBlocks.resize(numberOfBlocks);
-        
-        // now fill out the sorted blocks
-        for (int y = 0; y < (int)m_uiBlockRows; ++y)
-        {
-            int yy = y * m_uiBlockColumns;
-            
-            for (int x = 0; x < (int)m_uiBlockColumns; ++x)
-            {
-                sortedBlocks[yy + x].srcX = x;
-                sortedBlocks[yy + x].srcY = y;
-            }
-        }
-        
-        // NOTE: This is the questionable aspect of this encoder.
-        // It stops once say 49% of the blocks have finished.  This means the other 51% may have huge errors
-        // compared to the source pixels.  colorMap.d finishes in 1 pass since it's mostly gradients, but
-        // other textures need many more passes if content varies.
-        //
-        // One pass is done on all blocks to encode them all, then only
-        // the remaining blocks below this count are processed with the top errors in the sorted array.
-        // This number is also computed per mip level, but a change was made spend more time in mip blocks
-        // and less to large mips.  But effort also affects how many iterations are performed and that affects quality.
-    
-        int numBlocksToFinish;
-        int minBlocks = 0; // 64*64;
-        
-        if (numberOfBlocks >= minBlocks)
-        {
-            numBlocksToFinish = static_cast<unsigned int>(roundf(0.01f * blockPercent * numberOfBlocks));
-            
-            if (m_bVerboseOutput)
-            {
-                KLOGI("EtcComp", "Will only finish %d/%d blocks", numBlocksToFinish, numberOfBlocks);
-            }
-        }
-        else
-        {
-            // do all blocks below a certain count, so mips are fully procesed regardless of effor setting
-            numBlocksToFinish = numberOfBlocks;
-        }
-        
-        // iterate on all blocks at least once and possible more iterations
-        
-        // setup for rgb/a
-        Block4x4 block;
-        Block4x4Encoding* encoder = nullptr;
-       
-        // setup for r/rg11
-        bool isSnorm =
-            m_format == Image::Format::SIGNED_R11 ||
-            m_format == Image::Format::SIGNED_RG11;
-        bool isR =
-            m_format == Image::Format::R11 ||
-            m_format == Image::Format::SIGNED_R11;
-        bool isRG =
-            m_format == Image::Format::RG11 ||
-            m_format == Image::Format::SIGNED_RG11;
-       
-        IBlockEncoding* encoderRG = nullptr;
-        if (isR)
-            encoderRG = new Block4x4Encoding_R11;
-        else if (isRG)
-            encoderRG = new Block4x4Encoding_RG11;
-        
-        ColorFloatRGBA sourcePixels[16];
-        
-        int pass = 0;
-        
-        while(true)
-        {
-            // At the end of encode, blocks are encoded back to the outputTexture
-            // that way no additional storage is needed, and only one block per thread
-            // is required.  This doesn't do threading, since a process works on one texture.
-            for (auto& it : sortedBlocks)
-            {
-                int srcX = it.srcX;
-                int srcY = it.srcY;
-            
-                uint8_t* outputBlock = outputTexture + (srcY * m_uiBlockColumns + srcX) * blockSize;
-    
-                if (!encoderRG) {
-                    // this block copies out a 4x4 tile from the source image
-                    if (pass == 0)
-                    {
-                        block.Encode(this, srcX * 4, srcY * 4, outputBlock);
-                        
-                        // encoder is allocated on first encode, then reused for the rest
-                        // to multithread, would need one block/encoder per therad
-                        if (!encoder)
-                        {
-                            encoder = block.GetEncoding();
-                        }
-                    }
-                    else
-                    {
-                        block.Decode(srcX * 4, srcY * 4, outputBlock, this, pass);
-                    }
-
-                    // this is one pass
-                    encoder->PerformIteration(m_fEffort);
-                    totalIterations++;
-                    
-                    // convert to etc block bits
-                    encoder->SetEncodingBits();
-                    
-                    it.iterationData = pass;
-                    it.error = encoder->IsDone() ? 0.0f : encoder->GetError();
-                }
-                else {
-                    // different interface for r/rg11, but same logic as above
-                    int uiPixel = 0;
-                
-                    // this copy is a transpose of the block before encoding
-                    for (int xx = 0; xx < 4; xx++)
-                    {
-                        int srcXX = 4 * srcX + xx;
-
-                        for (int yy = 0; yy < 4; yy++)
-                        {
-                            int srcYY = 4 * srcY + yy;
-
-                            ColorFloatRGBA sourcePixel = this->GetSourcePixel(srcXX, srcYY);
-                            sourcePixels[uiPixel++] = sourcePixel;
-                        }
-                    }
-                    
-                    // encode that block in as many iterations as it takes to finish
-                    if (pass == 0)
-                    {
-                        encoderRG->Encode(&sourcePixels[0].fR, outputBlock, isSnorm);
-                    }
-                    else
-                    {
-                        encoderRG->Decode(outputBlock, &sourcePixels[0].fR, isSnorm, it.iterationData);
-                    }
-                
-                    encoderRG->PerformIteration(m_fEffort);
-                    totalIterations++;
-                    
-                    // store to etc block
-                    encoderRG->SetEncodingBits();
-                    
-                    it.iterationData = encoderRG->GetIterationCount();
-                    it.error = encoderRG->IsDone() ? 0.0f : encoderRG->GetError();
-                }
-            
-                if (it.error == 0.0f)
-                {
-                    numBlocksToFinish--;
-                    
-                    // stop once block count reached, but can only stop once all blocks encoded at least once
-                    if (pass > 0 && numBlocksToFinish <= 0)
-                    {
-                        break;
-                    }
-                }
-            }
-            
-            // stop if min effort level, only process blocks once
-            if (m_fEffort <= ETCCOMP_MIN_EFFORT_LEVEL)
-            {
-                break;
-            }
-            // stop if any pass finished all the blocks
-            if (numBlocksToFinish <= 0)
-            {
-                break;
-            }
-            
-            // sorts largest errors to front
-            std::sort(sortedBlocks.begin(), sortedBlocks.end(), std::greater<SortedBlock>());
-            
-            // lop off the end of the array where blocks are 0 error or don
-            int counter = 0;
-            for (int i = (int)sortedBlocks.size()-1; i >= 0; --i)
-            {
-                if (sortedBlocks[i].error == 0.0f)
-                {
-                    counter++;
-                }
-                else
-                {
-                    break;
-                }
-            }
-            
-            sortedBlocks.resize(sortedBlocks.size() - counter);
-            pass++;
-        }
-        
-        delete encoderRG;
-        
-        if (m_bVerboseOutput)
-        {
-            KLOGI("EtcComp", "Total iterations %d in %d passes\n", totalIterations, pass + 1);
-        }
-        
-        auto end = std::chrono::steady_clock::now();
-		std::chrono::milliseconds elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
-		m_iEncodeTime_ms = (int)elapsed.count();
-
-		return m_encodingStatus;
-	}
-	
-    Image::EncodingStatus Image::Decode(const uint8_t* etcBlocks, uint8_t* outputTexture)
-    {
-        // setup for rgb/a
-        Block4x4 block;
-        Block4x4Encoding* encoder = nullptr;
-       
-        // setup for r/rg11
-        bool isSnorm =
-            m_format == Image::Format::SIGNED_R11 ||
-            m_format == Image::Format::SIGNED_RG11;
-        bool isR =
-            m_format == Image::Format::R11 ||
-            m_format == Image::Format::SIGNED_R11;
-        bool isRG =
-            m_format == Image::Format::RG11 ||
-            m_format == Image::Format::SIGNED_RG11;
-       
-        IBlockEncoding* encoderRG = nullptr;
-        if (isR)
-            encoderRG = new Block4x4Encoding_R11;
-        else if (isRG)
-            encoderRG = new Block4x4Encoding_RG11;
-        
-        // initialized to 0 by ctor
-        ColorFloatRGBA dstPixels[16];
-        
-        // r and rg wiil return yzw = 001 and zw = 01, rgb will return a = 1
-        for (int i = 0; i < 16; ++i)
-        {
-            dstPixels[i].fA = 1.0f;
-        }
-        
-        int blockSize = Block4x4EncodingBits::GetBytesPerBlock(m_encodingbitsformat);
-        
-        for (int yy = 0; yy < (int)m_uiBlockRows; ++yy)
-        {
-            for (int xx = 0; xx < (int)m_uiBlockColumns; ++xx)
-            {
-                int srcX = xx;
-                int srcY = yy;
-                
-                const uint8_t* srcBlock = etcBlocks + (srcY * m_uiBlockColumns + srcX) * blockSize;
-
-                if (!encoderRG)
-                {
-                    // this almost works except alpha on RGBA8 isn't set
-                    
-                    block.Decode(srcX * 4, srcY * 4, (unsigned char*)srcBlock, this, 0);
-
-                    if (!encoder)
-                    {
-                        encoder = block.GetEncoding();
-                    }
-                    if (m_format == Image::Format::RGBA8 ||
-                        m_format == Image::Format::SRGBA8)
-                    {
-                        encoder->DecodeAlpha();
-                    }
-                    
-                    // now extract rgb and a from the encoding
-                    for (int i = 0; i < 16; ++i)
-                    {
-                        dstPixels[i] = encoder->GetDecodedPixel(i);
-                    }
-                }
-                else
-                {
-                    // this fills out r or rg with float values that are unorm 0 to 1 (even for snorm)
-                    encoderRG->DecodeOnly(srcBlock, &dstPixels[0].fR, isSnorm);
-                }
-                
-                // now convert float pixels back to unorm8, don't copy pixels in block outside of w/h bound
-                // I don't know if dstPixels array is transposed when decoded or not?
-                
-
-                ColorR8G8B8A8* dstPixels8 = (ColorR8G8B8A8*)outputTexture;
-                for (int y = 0; y < 4; y++)
-                {
-                    int yd = y + srcY * 4;
-                    if (yd >= (int)m_uiSourceHeight)
-                    {
-                        break;
-                    }
-                    
-                    for (int x = 0; x < 4; x++)
-                    {
-                        int xd = x + srcX * 4;
-                        if (xd >= (int)m_uiSourceWidth)
-                        {
-                            continue;
-                        }
-                        
-                        const ColorFloatRGBA& color = dstPixels[x * 4 + y]; // Note: pixel lookup transpose here
-                
-                        ColorR8G8B8A8& dst = dstPixels8[yd * m_uiSourceWidth + xd];
-                        dst.ucR = (uint8_t)color.IntRed(255.0f);
-                        dst.ucG = (uint8_t)color.IntGreen(255.0f);
-                        dst.ucB = (uint8_t)color.IntBlue(255.0f);
-                        dst.ucA = (uint8_t)color.IntAlpha(255.0f);
-                    }
-                }
-            }
-        }
-        
-        delete encoderRG;
-        
-        return m_encodingStatus;
-    }
-
-	// ----------------------------------------------------------------------------------------------------
-	// return a string name for a given image format
-	//
-	const char * Image::EncodingFormatToString(Image::Format a_format)
-	{
-		switch (a_format)
-		{
-		case Image::Format::ETC1:
-			return "ETC1";
-		case Image::Format::RGB8:
-			return "RGB8";
-		case Image::Format::SRGB8:
-			return "SRGB8";
-
-		case Image::Format::RGB8A1:
-			return "RGB8A1";
-		case Image::Format::SRGB8A1:
-			return "SRGB8A1";
-		case Image::Format::RGBA8:
-			return "RGBA8";
-		case Image::Format::SRGBA8:
-			return "SRGBA8";
-
-		case Image::Format::R11:
-			return "R11";
-		case Image::Format::SIGNED_R11:
-			return "SIGNED_R11";
-
-		case Image::Format::RG11:
-			return "RG11";
-		case Image::Format::SIGNED_RG11:
-			return "SIGNED_RG11";
-		case Image::Format::FORMATS:
-		case Image::Format::UNKNOWN:
-		default:
-			return "UNKNOWN";
-		}
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// return a string name for the image's format
-	//
-	const char * Image::EncodingFormatToString(void) const
-	{
-		return EncodingFormatToString(m_format);
-    }
-
-	// ----------------------------------------------------------------------------------------------------
-	// determine the encoding bits format based on the encoding format
-	// the encoding bits format is a family of bit encodings that are shared across various encoding formats
-	//
-	Block4x4EncodingBits::Format Image::DetermineEncodingBitsFormat(Format a_format)
-	{
-		Block4x4EncodingBits::Format encodingbitsformat;
-
-		// determine encoding bits format from image format
-		switch (a_format)
-		{
-		case Format::ETC1:
-		case Format::RGB8:
-		case Format::SRGB8:
-			encodingbitsformat = Block4x4EncodingBits::Format::RGB8;
-			break;
-
-		case Format::RGBA8:
-		case Format::SRGBA8:
-			encodingbitsformat = Block4x4EncodingBits::Format::RGBA8;
-			break;
-
- 		case Format::R11:
-		case Format::SIGNED_R11:
-			encodingbitsformat = Block4x4EncodingBits::Format::R11;
-			break;
-
-		case Format::RG11:
-		case Format::SIGNED_RG11:
-			encodingbitsformat = Block4x4EncodingBits::Format::RG11;
-			break;
-
-		case Format::RGB8A1:
-		case Format::SRGB8A1:
-			encodingbitsformat = Block4x4EncodingBits::Format::RGB8A1;
-			break;
-
-		default:
-			encodingbitsformat = Block4x4EncodingBits::Format::UNKNOWN;
-			break;
-		}
-
-		return encodingbitsformat;
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-
-}	// namespace Etc
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+EtcImage.cpp
+
+Image is an array of 4x4 blocks that represent the encoding of the source image
+
+*/
+
+
+#include "EtcConfig.h"
+
+// is this needed?
+//#if ETC_WINDOWS
+//#include <windows.h>
+//#endif
+
+
+#include "EtcImage.h"
+
+#include "EtcBlock4x4.h"
+#include "EtcBlock4x4EncodingBits.h"
+
+#include "EtcBlock4x4Encoding_R11.h"
+#include "EtcBlock4x4Encoding_RG11.h"
+
+#include <stdlib.h>
+//#include <algorithm>
+#include <ctime>
+#include <chrono>
+//#include <future>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+//#include <vector>
+
+#define ETCCOMP_MIN_EFFORT_LEVEL (0.0f)
+#define ETCCOMP_DEFAULT_EFFORT_LEVEL (40.0f)
+#define ETCCOMP_MAX_EFFORT_LEVEL (100.0f)
+
+// C++14 implement reverse iteration adaptor
+// https://stackoverflow.com/questions/8542591/c11-reverse-range-based-for-loop
+//template <typename T>
+//struct reverseIterator { T& iterable; };
+//
+//template <typename T>
+//auto begin (reverseIterator<T> w) { return std::rbegin(w.iterable); }
+//
+//template <typename T>
+//auto end (reverseIterator<T> w) { return std::rend(w.iterable); }
+//
+//template <typename T>
+//reverseIterator<T> reverse (T&& iterable) { return { iterable }; }
+
+namespace Etc
+{
+	// ----------------------------------------------------------------------------------------------------
+	// constructor using source image
+	Image::Image(Format a_format, const ColorR8G8B8A8 *a_pafSourceRGBA, unsigned int a_uiSourceWidth,
+					unsigned int a_uiSourceHeight, 
+					ErrorMetric a_errormetric)
+	{
+		m_encodingStatus = EncodingStatus::SUCCESS;
+		m_uiSourceWidth = a_uiSourceWidth;
+		m_uiSourceHeight = a_uiSourceHeight;
+
+        int uiExtendedWidth = CalcExtendedDimension((unsigned short)m_uiSourceWidth);
+        int uiExtendedHeight = CalcExtendedDimension((unsigned short)m_uiSourceHeight);
+
+		m_uiBlockColumns = uiExtendedWidth >> 2;
+		m_uiBlockRows = uiExtendedHeight >> 2;
+
+        m_format = a_format;
+
+        m_encodingbitsformat = DetermineEncodingBitsFormat(m_format);
+        int blockSize = Block4x4EncodingBits::GetBytesPerBlock(m_encodingbitsformat);
+        m_uiEncodingBitsBytes = GetNumberOfBlocks() * blockSize;
+        
+		m_paucEncodingBits = nullptr;
+
+		m_errormetric = a_errormetric;
+		m_fEffort = 0.0f;
+
+		m_iEncodeTime_ms = 0;
+
+		m_bVerboseOutput = false;
+        
+        // this can be nullptr
+        m_pafrgbaSource = a_pafSourceRGBA;
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	Image::~Image(void)
+	{
+	}
+
+    Image::EncodingStatus Image::EncodeSinglepass(float a_fEffort, uint8_t* outputTexture)
+    {
+        m_encodingStatus = EncodingStatus::SUCCESS;
+        m_fEffort = a_fEffort;
+        
+        // alias the output etxture
+        m_paucEncodingBits = outputTexture;
+        
+        //--------------------------
+        // walk the src image as 4x4 blocks, and complete each block and output it
+        int blockSize = Block4x4EncodingBits::GetBytesPerBlock(m_encodingbitsformat);
+        uint8_t *outputBlock = outputTexture;
+        int totalIterations = 0;
+        
+        switch(m_format) {
+            case Image::Format::R11:
+            case Image::Format::SIGNED_R11:
+            case Image::Format::RG11:
+            case Image::Format::SIGNED_RG11:
+            {
+                bool isSnorm =
+                    m_format == Image::Format::SIGNED_R11 ||
+                    m_format == Image::Format::SIGNED_RG11;
+                bool isR =
+                    m_format == Image::Format::R11 ||
+                    m_format == Image::Format::SIGNED_R11;
+                
+                IBlockEncoding* encoder;
+                if (isR)
+                    encoder = new Block4x4Encoding_R11;
+                else
+                    encoder = new Block4x4Encoding_RG11;
+                
+                ColorFloatRGBA sourcePixels[16];
+                
+                for (int y = 0; y < (int)m_uiBlockRows; y++)
+                {
+                    int srcY = y * 4;
+                    
+                    for (int x = 0; x < (int)m_uiBlockColumns; x++)
+                    {
+                        int srcX = x * 4;
+
+                        // now pull all pixels for the block, this clamps to edge
+                        // NOTE: must convert from image horizontal scan to block vertical scan
+                        
+                        int uiPixel = 0;
+                    
+                        for (int xx = 0; xx < 4; xx++)
+                        {
+                            int srcXX = srcX + xx;
+
+                            for (int yy = 0; yy < 4; yy++)
+                            {
+                                int srcYY = srcY + yy;
+
+                                ColorFloatRGBA sourcePixel = this->GetSourcePixel(srcXX, srcYY);
+                                sourcePixels[uiPixel++] = sourcePixel;
+                            }
+                        }
+                        
+                        // encode that block in as many iterations as it takes to finish
+                        encoder->Encode(&sourcePixels[0].fR, outputBlock, isSnorm);
+                        
+                        // TODO: consider iterating red until done, then green for cache reasons
+                        while (!encoder->IsDone())
+                        {
+                            // iterate on lowest error until block is done, or quality iterations reached
+                            encoder->PerformIteration(m_fEffort);
+                            totalIterations++;
+                            
+                            // only do the first iteration
+                            if (m_fEffort == 0.0) {
+                                break;
+                            }
+                        }
+                        
+                        // store to etc block
+                        encoder->SetEncodingBits();
+                        
+                        outputBlock += blockSize;
+                    }
+                }
+                
+                // this encoder isn't created/held by a block, so must be deleted
+                delete encoder;
+                
+                break;
+            }
+            default:
+            {
+                // Handle all the rgb/rgba formats which are much more involved
+                
+                Block4x4 block;
+                Block4x4Encoding* encoder = nullptr;
+                
+                for (int y = 0; y < (int)m_uiBlockRows; y++)
+                {
+                    int srcY = y * 4;
+                    
+                    for (int x = 0; x < (int)m_uiBlockColumns; x++)
+                    {
+                        int srcX = x * 4;
+
+                        // this block copies out a 4x4 tile from the source image
+                        block.Encode(this, srcX, srcY, outputBlock);
+
+                        // this encoder is allodated in first encode, then used for all blocks
+                        if (!encoder)
+                        {
+                            encoder = block.GetEncoding();
+                        }
+                        
+                        while (!encoder->IsDone())
+                        {
+                            // repeat until block is done, then store data
+                            encoder->PerformIteration(m_fEffort);
+                            totalIterations++;
+                            
+                            // only do the first iteration
+                            if (m_fEffort == 0.0) {
+                                break;
+                            }
+                        }
+                        
+                        // convert to etc block bits
+                        encoder->SetEncodingBits();
+                        
+                        outputBlock += blockSize;
+                    }
+                }
+                break;
+            }
+        }
+        if (m_bVerboseOutput)
+        {
+            KLOGI("EtcComp", "Total iterations %d\n", totalIterations);
+        }
+        
+        // block deletes the encoding, so don't delete here
+        
+        return m_encodingStatus;
+    }
+
+	// ----------------------------------------------------------------------------------------------------
+	Image::EncodingStatus Image::Encode(float blockPercent,
+                                        float a_fEffort,
+                                        uint8_t* outputTexture)
+	{
+
+		auto start = std::chrono::steady_clock::now();
+        int blockSize = Block4x4EncodingBits::GetBytesPerBlock(m_encodingbitsformat);
+        
+        m_fEffort = a_fEffort;
+        
+        // alias the output etxture
+        m_paucEncodingBits = outputTexture;
+        
+        using namespace NAMESPACE_STL;
+        
+        struct SortedBlock
+        {
+            //uint8_t lastIteration = 0;
+            uint16_t srcX = 0, srcY = 0;
+            uint16_t iterationData = 0;
+            
+            float error = FLT_MAX;
+            
+            // this must match sort operator below
+            bool operator>(const SortedBlock& rhs) const
+            {
+                return error > rhs.error;
+            }
+        };
+        
+        int totalIterations = 0;
+        int numberOfBlocks = GetNumberOfBlocks();
+        
+        vector<SortedBlock> sortedBlocks;
+        sortedBlocks.resize(numberOfBlocks);
+        
+        // now fill out the sorted blocks
+        for (int y = 0; y < (int)m_uiBlockRows; ++y)
+        {
+            int yy = y * m_uiBlockColumns;
+            
+            for (int x = 0; x < (int)m_uiBlockColumns; ++x)
+            {
+                sortedBlocks[yy + x].srcX = x;
+                sortedBlocks[yy + x].srcY = y;
+            }
+        }
+        
+        // NOTE: This is the questionable aspect of this encoder.
+        // It stops once say 49% of the blocks have finished.  This means the other 51% may have huge errors
+        // compared to the source pixels.  colorMap.d finishes in 1 pass since it's mostly gradients, but
+        // other textures need many more passes if content varies.
+        //
+        // One pass is done on all blocks to encode them all, then only
+        // the remaining blocks below this count are processed with the top errors in the sorted array.
+        // This number is also computed per mip level, but a change was made spend more time in mip blocks
+        // and less to large mips.  But effort also affects how many iterations are performed and that affects quality.
+    
+        int numBlocksToFinish;
+        int minBlocks = 0; // 64*64;
+        
+        if (numberOfBlocks >= minBlocks)
+        {
+            numBlocksToFinish = static_cast<unsigned int>(roundf(0.01f * blockPercent * numberOfBlocks));
+            
+            if (m_bVerboseOutput)
+            {
+                KLOGI("EtcComp", "Will only finish %d/%d blocks", numBlocksToFinish, numberOfBlocks);
+            }
+        }
+        else
+        {
+            // do all blocks below a certain count, so mips are fully procesed regardless of effor setting
+            numBlocksToFinish = numberOfBlocks;
+        }
+        
+        // iterate on all blocks at least once and possible more iterations
+        
+        // setup for rgb/a
+        Block4x4 block;
+        Block4x4Encoding* encoder = nullptr;
+       
+        // setup for r/rg11
+        bool isSnorm =
+            m_format == Image::Format::SIGNED_R11 ||
+            m_format == Image::Format::SIGNED_RG11;
+        bool isR =
+            m_format == Image::Format::R11 ||
+            m_format == Image::Format::SIGNED_R11;
+        bool isRG =
+            m_format == Image::Format::RG11 ||
+            m_format == Image::Format::SIGNED_RG11;
+       
+        IBlockEncoding* encoderRG = nullptr;
+        if (isR)
+            encoderRG = new Block4x4Encoding_R11;
+        else if (isRG)
+            encoderRG = new Block4x4Encoding_RG11;
+        
+        ColorFloatRGBA sourcePixels[16];
+        
+        int pass = 0;
+        
+        while(true)
+        {
+            // At the end of encode, blocks are encoded back to the outputTexture
+            // that way no additional storage is needed, and only one block per thread
+            // is required.  This doesn't do threading, since a process works on one texture.
+            for (auto& it : sortedBlocks)
+            {
+                int srcX = it.srcX;
+                int srcY = it.srcY;
+            
+                uint8_t* outputBlock = outputTexture + (srcY * m_uiBlockColumns + srcX) * blockSize;
+    
+                if (!encoderRG) {
+                    // this block copies out a 4x4 tile from the source image
+                    if (pass == 0)
+                    {
+                        block.Encode(this, srcX * 4, srcY * 4, outputBlock);
+                        
+                        // encoder is allocated on first encode, then reused for the rest
+                        // to multithread, would need one block/encoder per therad
+                        if (!encoder)
+                        {
+                            encoder = block.GetEncoding();
+                        }
+                    }
+                    else
+                    {
+                        block.Decode(srcX * 4, srcY * 4, outputBlock, this, pass);
+                    }
+
+                    // this is one pass
+                    encoder->PerformIteration(m_fEffort);
+                    totalIterations++;
+                    
+                    // convert to etc block bits
+                    encoder->SetEncodingBits();
+                    
+                    it.iterationData = pass;
+                    it.error = encoder->IsDone() ? 0.0f : encoder->GetError();
+                }
+                else {
+                    // different interface for r/rg11, but same logic as above
+                    int uiPixel = 0;
+                
+                    // this copy is a transpose of the block before encoding
+                    for (int xx = 0; xx < 4; xx++)
+                    {
+                        int srcXX = 4 * srcX + xx;
+
+                        for (int yy = 0; yy < 4; yy++)
+                        {
+                            int srcYY = 4 * srcY + yy;
+
+                            ColorFloatRGBA sourcePixel = this->GetSourcePixel(srcXX, srcYY);
+                            sourcePixels[uiPixel++] = sourcePixel;
+                        }
+                    }
+                    
+                    // encode that block in as many iterations as it takes to finish
+                    if (pass == 0)
+                    {
+                        encoderRG->Encode(&sourcePixels[0].fR, outputBlock, isSnorm);
+                    }
+                    else
+                    {
+                        encoderRG->Decode(outputBlock, &sourcePixels[0].fR, isSnorm, it.iterationData);
+                    }
+                
+                    encoderRG->PerformIteration(m_fEffort);
+                    totalIterations++;
+                    
+                    // store to etc block
+                    encoderRG->SetEncodingBits();
+                    
+                    it.iterationData = encoderRG->GetIterationCount();
+                    it.error = encoderRG->IsDone() ? 0.0f : encoderRG->GetError();
+                }
+            
+                if (it.error == 0.0f)
+                {
+                    numBlocksToFinish--;
+                    
+                    // stop once block count reached, but can only stop once all blocks encoded at least once
+                    if (pass > 0 && numBlocksToFinish <= 0)
+                    {
+                        break;
+                    }
+                }
+            }
+            
+            // stop if min effort level, only process blocks once
+            if (m_fEffort <= ETCCOMP_MIN_EFFORT_LEVEL)
+            {
+                break;
+            }
+            // stop if any pass finished all the blocks
+            if (numBlocksToFinish <= 0)
+            {
+                break;
+            }
+            
+            // sorts largest errors to front
+            std::sort(sortedBlocks.begin(), sortedBlocks.end(), std::greater<SortedBlock>());
+            
+            // lop off the end of the array where blocks are 0 error or don
+            int counter = 0;
+            for (int i = (int)sortedBlocks.size()-1; i >= 0; --i)
+            {
+                if (sortedBlocks[i].error == 0.0f)
+                {
+                    counter++;
+                }
+                else
+                {
+                    break;
+                }
+            }
+            
+            sortedBlocks.resize(sortedBlocks.size() - counter);
+            pass++;
+        }
+        
+        delete encoderRG;
+        
+        if (m_bVerboseOutput)
+        {
+            KLOGI("EtcComp", "Total iterations %d in %d passes\n", totalIterations, pass + 1);
+        }
+        
+        auto end = std::chrono::steady_clock::now();
+		std::chrono::milliseconds elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
+		m_iEncodeTime_ms = (int)elapsed.count();
+
+		return m_encodingStatus;
+	}
+	
+    Image::EncodingStatus Image::Decode(const uint8_t* etcBlocks, uint8_t* outputTexture)
+    {
+        // setup for rgb/a
+        Block4x4 block;
+        Block4x4Encoding* encoder = nullptr;
+       
+        // setup for r/rg11
+        bool isSnorm =
+            m_format == Image::Format::SIGNED_R11 ||
+            m_format == Image::Format::SIGNED_RG11;
+        bool isR =
+            m_format == Image::Format::R11 ||
+            m_format == Image::Format::SIGNED_R11;
+        bool isRG =
+            m_format == Image::Format::RG11 ||
+            m_format == Image::Format::SIGNED_RG11;
+       
+        IBlockEncoding* encoderRG = nullptr;
+        if (isR)
+            encoderRG = new Block4x4Encoding_R11;
+        else if (isRG)
+            encoderRG = new Block4x4Encoding_RG11;
+        
+        // initialized to 0 by ctor
+        ColorFloatRGBA dstPixels[16];
+        
+        // r and rg wiil return yzw = 001 and zw = 01, rgb will return a = 1
+        for (int i = 0; i < 16; ++i)
+        {
+            dstPixels[i].fA = 1.0f;
+        }
+        
+        int blockSize = Block4x4EncodingBits::GetBytesPerBlock(m_encodingbitsformat);
+        
+        for (int yy = 0; yy < (int)m_uiBlockRows; ++yy)
+        {
+            for (int xx = 0; xx < (int)m_uiBlockColumns; ++xx)
+            {
+                int srcX = xx;
+                int srcY = yy;
+                
+                const uint8_t* srcBlock = etcBlocks + (srcY * m_uiBlockColumns + srcX) * blockSize;
+
+                if (!encoderRG)
+                {
+                    // this almost works except alpha on RGBA8 isn't set
+                    
+                    block.Decode(srcX * 4, srcY * 4, (unsigned char*)srcBlock, this, 0);
+
+                    if (!encoder)
+                    {
+                        encoder = block.GetEncoding();
+                    }
+                    if (m_format == Image::Format::RGBA8 ||
+                        m_format == Image::Format::SRGBA8)
+                    {
+                        encoder->DecodeAlpha();
+                    }
+                    
+                    // now extract rgb and a from the encoding
+                    for (int i = 0; i < 16; ++i)
+                    {
+                        dstPixels[i] = encoder->GetDecodedPixel(i);
+                    }
+                }
+                else
+                {
+                    // this fills out r or rg with float values that are unorm 0 to 1 (even for snorm)
+                    encoderRG->DecodeOnly(srcBlock, &dstPixels[0].fR, isSnorm);
+                }
+                
+                // now convert float pixels back to unorm8, don't copy pixels in block outside of w/h bound
+                // I don't know if dstPixels array is transposed when decoded or not?
+                
+
+                ColorR8G8B8A8* dstPixels8 = (ColorR8G8B8A8*)outputTexture;
+                for (int y = 0; y < 4; y++)
+                {
+                    int yd = y + srcY * 4;
+                    if (yd >= (int)m_uiSourceHeight)
+                    {
+                        break;
+                    }
+                    
+                    for (int x = 0; x < 4; x++)
+                    {
+                        int xd = x + srcX * 4;
+                        if (xd >= (int)m_uiSourceWidth)
+                        {
+                            continue;
+                        }
+                        
+                        const ColorFloatRGBA& color = dstPixels[x * 4 + y]; // Note: pixel lookup transpose here
+                
+                        ColorR8G8B8A8& dst = dstPixels8[yd * m_uiSourceWidth + xd];
+                        dst.ucR = (uint8_t)color.IntRed(255.0f);
+                        dst.ucG = (uint8_t)color.IntGreen(255.0f);
+                        dst.ucB = (uint8_t)color.IntBlue(255.0f);
+                        dst.ucA = (uint8_t)color.IntAlpha(255.0f);
+                    }
+                }
+            }
+        }
+        
+        delete encoderRG;
+        
+        return m_encodingStatus;
+    }
+
+	// ----------------------------------------------------------------------------------------------------
+	// return a string name for a given image format
+	//
+	const char * Image::EncodingFormatToString(Image::Format a_format)
+	{
+		switch (a_format)
+		{
+		case Image::Format::ETC1:
+			return "ETC1";
+		case Image::Format::RGB8:
+			return "RGB8";
+		case Image::Format::SRGB8:
+			return "SRGB8";
+
+		case Image::Format::RGB8A1:
+			return "RGB8A1";
+		case Image::Format::SRGB8A1:
+			return "SRGB8A1";
+		case Image::Format::RGBA8:
+			return "RGBA8";
+		case Image::Format::SRGBA8:
+			return "SRGBA8";
+
+		case Image::Format::R11:
+			return "R11";
+		case Image::Format::SIGNED_R11:
+			return "SIGNED_R11";
+
+		case Image::Format::RG11:
+			return "RG11";
+		case Image::Format::SIGNED_RG11:
+			return "SIGNED_RG11";
+		case Image::Format::FORMATS:
+		case Image::Format::UNKNOWN:
+		default:
+			return "UNKNOWN";
+		}
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// return a string name for the image's format
+	//
+	const char * Image::EncodingFormatToString(void) const
+	{
+		return EncodingFormatToString(m_format);
+    }
+
+	// ----------------------------------------------------------------------------------------------------
+	// determine the encoding bits format based on the encoding format
+	// the encoding bits format is a family of bit encodings that are shared across various encoding formats
+	//
+	Block4x4EncodingBits::Format Image::DetermineEncodingBitsFormat(Format a_format)
+	{
+		Block4x4EncodingBits::Format encodingbitsformat;
+
+		// determine encoding bits format from image format
+		switch (a_format)
+		{
+		case Format::ETC1:
+		case Format::RGB8:
+		case Format::SRGB8:
+			encodingbitsformat = Block4x4EncodingBits::Format::RGB8;
+			break;
+
+		case Format::RGBA8:
+		case Format::SRGBA8:
+			encodingbitsformat = Block4x4EncodingBits::Format::RGBA8;
+			break;
+
+ 		case Format::R11:
+		case Format::SIGNED_R11:
+			encodingbitsformat = Block4x4EncodingBits::Format::R11;
+			break;
+
+		case Format::RG11:
+		case Format::SIGNED_RG11:
+			encodingbitsformat = Block4x4EncodingBits::Format::RG11;
+			break;
+
+		case Format::RGB8A1:
+		case Format::SRGB8A1:
+			encodingbitsformat = Block4x4EncodingBits::Format::RGB8A1;
+			break;
+
+		default:
+			encodingbitsformat = Block4x4EncodingBits::Format::UNKNOWN;
+			break;
+		}
+
+		return encodingbitsformat;
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+
+}	// namespace Etc
diff --git a/libkram/json11/json11.cpp b/libkram/json11/json11.cpp
index 47393e42..b78fee62 100644
--- a/libkram/json11/json11.cpp
+++ b/libkram/json11/json11.cpp
@@ -217,6 +217,17 @@ void JsonWriter::writeNull(const char* key) {
     append_sprintf(*_out, "%*s\"%s\":%s", indent, "", key, "null");
 }
 
+// can write out json in parallel and combine
+void JsonWriter::writeJson(const JsonWriter& json)
+{
+    KASSERT(_stack.empty());
+    KASSERT(this != &json);
+    
+    // TODO: indent won't be correct on this
+    // so caller may want to set indent
+    _out->append(*json._out);
+}
+
 void JsonWriter::writeString(const char* value) {
     KASSERT(isArray());
     // only if in array
diff --git a/libkram/kram/BlockedLinearAllocator.h b/libkram/kram/BlockedLinearAllocator.h
index a0bbc6ba..6fb350c1 100644
--- a/libkram/kram/BlockedLinearAllocator.h
+++ b/libkram/kram/BlockedLinearAllocator.h
@@ -4,7 +4,7 @@
 
 #pragma once
 
-#include "KramConfig.h"
+//#include "KramConfig.h"
 
 namespace kram {
 
diff --git a/libkram/kram/ImmutableString.h b/libkram/kram/ImmutableString.h
index 7fae5112..fbc6af66 100644
--- a/libkram/kram/ImmutableString.h
+++ b/libkram/kram/ImmutableString.h
@@ -4,7 +4,7 @@
 
 #pragma once
 
-#include "KramConfig.h"
+//#include "KramConfig.h"
 
 #include <mutex>
 
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index 93443aa8..a4658b30 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -169,7 +169,6 @@
 // This needs debug support that native stl already has.
 // EASTL only seems to define that for Visual Studio natvis, and not lldb
 #define USE_EASTL COMPILE_EASTL
-#define USE_FASTL COMPILE_FASTL
 
 #if USE_EASTL
 
@@ -199,6 +198,7 @@
 
 
 /* This library just doesn't work, but was an interesting idea
+#define USE_FASTL COMPILE_FASTL
 #elif USE_FASTL
 
 #define NAMESPACE_STL fastl
diff --git a/libkram/kram/TaskSystem.h b/libkram/kram/TaskSystem.h
index ac8f9d40..a4118afd 100644
--- a/libkram/kram/TaskSystem.h
+++ b/libkram/kram/TaskSystem.h
@@ -34,8 +34,8 @@ using mymutex = std::recursive_mutex;
 using mylock = std::unique_lock<mymutex>;
 using mycondition = std::condition_variable_any;
 
-#define mydeque std::deque
-#define myfunction std::function
+#define mydeque deque
+#define myfunction function
 
 class notification_queue {
     mydeque<myfunction<void()>> _q;

From 0e1fcf93d5a4beb7715fe459338a66b4236346ad Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 18 Feb 2024 11:25:10 -0800
Subject: [PATCH 574/901] hlslparser - update comparison scripts to new
 VulkanSDK and moved to Vulkan 1.3 and Metal 3 shaders

---
 hlslparser/build.ninja     | 10 +++++-----
 hlslparser/buildShaders.sh |  8 ++++----
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/hlslparser/build.ninja b/hlslparser/build.ninja
index e025e6dd..94d067b0 100755
--- a/hlslparser/build.ninja
+++ b/hlslparser/build.ninja
@@ -14,7 +14,7 @@
 
 homeDir = /Users/Alec/devref/
 
-vulkanSDK = ${homeDir}vulkansdk/1.3.239.0/macOS/bin/
+vulkanSDK = ${homeDir}vulkansdk/1.3.275.0/macOS/bin/
 projectDir = ${homeDir}kram/hlslparser/
 appBuildDir = ${projectDir}/build/hlslparser/Build/Products/Release/
 
@@ -42,15 +42,15 @@ srcDir = ${projectDir}shaders/
 #intDir = ${projectDir}outshaders/
 
 # compiled shader per platform
-# 2.4 is first version to support companion sym
+# 3.0, 2.4 is first version to support companion sym
 dstDir = ${projectDir}out/mac/
-flagsMSL = -std=macos-metal2.4 -Werror -I ${srcDir}
+flagsMSL = -std=metal3.0 -Werror -I ${srcDir}
 
 dependsMSL = ${srcDir}ShaderMSL.h
 
 # for iOS
 #dstDir = ${projectDir}out/ios/
-#flagsMSL = -std=ios-metal2.4 -Werror
+#flagsMSL = -std=metal3.0 -Werror -I ${srcDir}
 
 # turn on file/line directives to jump back to original sources
 # and turn on comments in generated sources
@@ -123,7 +123,7 @@ dstDirHLSL = ${projectDir}out/android/
 # flagsDXDebug = -Zi
 flagsDXDebug =
 
-flagsDXC = -nologo -Zpc -enable-16bit-types -HV 2021 -Werror -fspv-extension=SPV_KHR_shader_draw_parameters -spirv -fspv-target-env=vulkan1.2 -I ${srcDir}
+flagsDXC = -nologo -Zpc -enable-16bit-types -HV 2021 -Werror -fspv-extension=SPV_KHR_shader_draw_parameters -spirv -fspv-target-env=vulkan1.3 -I ${srcDir}
 
 flagsVS = -T vs_6_2
 flagsPS = -T ps_6_2
diff --git a/hlslparser/buildShaders.sh b/hlslparser/buildShaders.sh
index 7e8461a3..45a24422 100755
--- a/hlslparser/buildShaders.sh
+++ b/hlslparser/buildShaders.sh
@@ -194,13 +194,13 @@ if [[ $testMetal -eq 1 ]]; then
     # can this build to AIR, then build that into metallib?
 
     # Note this isn't a win file
-    mscArgsVS="--minimum-gpu-family=Metal3 --vertex-stage-in --positionInvariance"
+    mscArgsVS="--vertex-stage-in --positionInvariance "
     # --enable-gs-ts-emulation  --vertex-input-layout-file=<string>
     
-    mscArgsPS="--minimum-gpu-family=Metal3"
+    mscArgsPS=" "
 
-    mscArgsMac="--deployment-os=macOS --minimum-os-build-version=13.0.0"
-    mscArgsiOS="--deployment-os=iOS --minimum-os-build-version=16.0.0"
+    mscArgsMac="--minimum-gpu-family=Metal3 --deployment-os=macOS --minimum-os-build-version=13.0.0 "
+    mscArgsiOS="--minimum-gpu-family=Metal3 --deployment-os=iOS --minimum-os-build-version=16.0.0 "
 
     # build vert
     ${appDxc} ${vsargs} -I ${includeDir} -E SkinningVS -Fo Skinning.vert.dxil -Fc Skinning.vert.dxil.txt -Fre Skinning.vert.refl Skinning.hlsl

From 4ba3329626471d21428ce50ea3cad3a6ea7f90cd Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 18 Feb 2024 11:25:49 -0800
Subject: [PATCH 575/901] kram - missed json11 header

---
 libkram/json11/json11.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/libkram/json11/json11.h b/libkram/json11/json11.h
index e4b76af2..783519de 100644
--- a/libkram/json11/json11.h
+++ b/libkram/json11/json11.h
@@ -121,6 +121,9 @@ class JsonWriter final {
     void writeBool(bool value);
     void writeNull();
     
+    // can write out json in parallel and combine
+    void writeJson(const JsonWriter& json);
+    
 private:
     bool isArray() const { return _stack.back() == ']'; }
     bool isObject() const { return _stack.back() == '}'; }

From 4978b05d0c2c8b6c1f831768b4e667f8b6927a93 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 19 Feb 2024 16:04:37 -0800
Subject: [PATCH 576/901] kram - remove FASTL, and set -ftime-trace on kram

---
 build2/kram.xcodeproj/project.pbxproj  | 18 ++++++++++++------
 build2/kramc.xcodeproj/project.pbxproj |  2 --
 build2/kramv.xcodeproj/project.pbxproj |  5 +++--
 3 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index d62802c0..a07ee7ce 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -1987,7 +1987,10 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
-				HEADER_SEARCH_PATHS = "";
+				HEADER_SEARCH_PATHS = (
+					"$(PROJECT_DIR)/../libkram/eastl/include",
+					"$(PROJECT_DIR)/../libkram/kram",
+				);
 				IPHONEOS_DEPLOYMENT_TARGET = 16.0;
 				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
@@ -2002,11 +2005,11 @@
 					"-DCOMPILE_COMP=1",
 					"-DCOMPILE_BASIS=0",
 					"-DCOMPILE_EASTL=0",
-					"-DCOMPILE_FASTL=0",
+					"-ftime-trace",
 				);
 				PRESERVE_DEAD_CODE_INITS_AND_TERMS = NO;
 				SDKROOT = macosx;
-				USER_HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram";
+				USER_HEADER_SEARCH_PATHS = "";
 			};
 			name = Debug;
 		};
@@ -2074,7 +2077,10 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
-				HEADER_SEARCH_PATHS = "";
+				HEADER_SEARCH_PATHS = (
+					"$(PROJECT_DIR)/../libkram/eastl/include",
+					"$(PROJECT_DIR)/../libkram/kram",
+				);
 				IPHONEOS_DEPLOYMENT_TARGET = 16.0;
 				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				MTL_ENABLE_DEBUG_INFO = NO;
@@ -2089,11 +2095,11 @@
 					"-DCOMPILE_COMP=1",
 					"-DCOMPILE_BASIS=0",
 					"-DCOMPILE_EASTL=0",
-					"-DCOMPILE_FASTL=0",
+					"-ftime-trace",
 				);
 				PRESERVE_DEAD_CODE_INITS_AND_TERMS = NO;
 				SDKROOT = macosx;
-				USER_HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram";
+				USER_HEADER_SEARCH_PATHS = "";
 			};
 			name = Release;
 		};
diff --git a/build2/kramc.xcodeproj/project.pbxproj b/build2/kramc.xcodeproj/project.pbxproj
index f4a1ad2e..2f50dca8 100644
--- a/build2/kramc.xcodeproj/project.pbxproj
+++ b/build2/kramc.xcodeproj/project.pbxproj
@@ -203,7 +203,6 @@
 				MTL_FAST_MATH = YES;
 				ONLY_ACTIVE_ARCH = YES;
 				OTHER_CFLAGS = (
-					"-DCOMPILE_FASTL=0",
 					"-DCOMPILE_EASTL=0",
 					"-include",
 					KramConfig.h,
@@ -267,7 +266,6 @@
 				MTL_FAST_MATH = YES;
 				OTHER_CFLAGS = (
 					"-DNDEBUG=1",
-					"-DCOMPILE_FASTL=0",
 					"-DCOMPILE_EASTL=0",
 					"-include",
 					KramConfig.h,
diff --git a/build2/kramv.xcodeproj/project.pbxproj b/build2/kramv.xcodeproj/project.pbxproj
index cb8b4fcb..0e222705 100644
--- a/build2/kramv.xcodeproj/project.pbxproj
+++ b/build2/kramv.xcodeproj/project.pbxproj
@@ -564,6 +564,7 @@
 				HEADER_SEARCH_PATHS = (
 					"$(PROJECT_DIR)/../libkram/kram",
 					"$(PROJECT_DIR)/../libkram",
+					"$(PROJECT_DIR)/../libkram/eastl/include",
 				);
 				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
@@ -571,7 +572,6 @@
 				MTL_LANGUAGE_REVISION = UseDeploymentTarget;
 				ONLY_ACTIVE_ARCH = YES;
 				OTHER_CFLAGS = (
-					"-DCOMPILE_FASTL=0",
 					"-DCOMPILE_EASTL=0",
 					"-include",
 					KramConfig.h,
@@ -635,13 +635,14 @@
 				HEADER_SEARCH_PATHS = (
 					"$(PROJECT_DIR)/../libkram/kram",
 					"$(PROJECT_DIR)/../libkram",
+					"$(PROJECT_DIR)/../libkram/eastl/include",
 				);
 				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				MTL_ENABLE_DEBUG_INFO = NO;
 				MTL_FAST_MATH = YES;
 				MTL_LANGUAGE_REVISION = UseDeploymentTarget;
 				OTHER_CFLAGS = (
-					"-DCOMPILE_FASTL=0",
+					"-DNDEBUG=1",
 					"-DCOMPILE_EASTL=0",
 					"-include",
 					KramConfig.h,

From caf3a86fe5d4d44989b1bd71d48cdb14a910e51c Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 21 Feb 2024 10:06:11 -0800
Subject: [PATCH 577/901] kram-profile - a tool to look at profile data

Clang, krams' VMA python converter, and games can generate Catapult json traces that also load into Perfetto.  This is a SwiftUI-based app that displays a file or folder, and then opens selected files into a list navigation view.   That way can quickly review the traces.

The Perfetto window loads the first {"perfetto":{}} json packet sent over from Swift to the WKWebView, but then stops.  Something seems to be latching the state, and I really don't want to have to reload() the page and cause the UI to flash.  Find out why this is happening.

More work to tie this in with CBA, and also fix up clang json files created using -ftime-trace.
---
 .../kram-profile.xcodeproj/project.pbxproj    | 346 +++++++++++++
 .../AccentColor.colorset/Contents.json        |  11 +
 .../AppIcon.appiconset/Contents.json          |  58 +++
 .../Assets.xcassets/Contents.json             |   6 +
 kram-profile/kram-profile/FileList.swift      |  88 ++++
 kram-profile/kram-profile/FilePicker.swift    |  84 +++
 .../Preview Assets.xcassets/Contents.json     |   6 +
 .../kram-profile/kram_profile.entitlements    |  12 +
 .../kram-profile/kram_profileApp.swift        | 485 ++++++++++++++++++
 9 files changed, 1096 insertions(+)
 create mode 100644 kram-profile/kram-profile.xcodeproj/project.pbxproj
 create mode 100644 kram-profile/kram-profile/Assets.xcassets/AccentColor.colorset/Contents.json
 create mode 100644 kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Contents.json
 create mode 100644 kram-profile/kram-profile/Assets.xcassets/Contents.json
 create mode 100644 kram-profile/kram-profile/FileList.swift
 create mode 100644 kram-profile/kram-profile/FilePicker.swift
 create mode 100644 kram-profile/kram-profile/Preview Content/Preview Assets.xcassets/Contents.json
 create mode 100644 kram-profile/kram-profile/kram_profile.entitlements
 create mode 100644 kram-profile/kram-profile/kram_profileApp.swift

diff --git a/kram-profile/kram-profile.xcodeproj/project.pbxproj b/kram-profile/kram-profile.xcodeproj/project.pbxproj
new file mode 100644
index 00000000..52629b65
--- /dev/null
+++ b/kram-profile/kram-profile.xcodeproj/project.pbxproj
@@ -0,0 +1,346 @@
+// !$*UTF8*$!
+{
+	archiveVersion = 1;
+	classes = {
+	};
+	objectVersion = 56;
+	objects = {
+
+/* Begin PBXBuildFile section */
+		705F68CD2B820AD100437FAA /* kram_profileApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 705F68CC2B820AD100437FAA /* kram_profileApp.swift */; };
+		705F68CF2B820AD100437FAA /* FileList.swift in Sources */ = {isa = PBXBuildFile; fileRef = 705F68CE2B820AD100437FAA /* FileList.swift */; };
+		705F68D12B820AD200437FAA /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 705F68D02B820AD200437FAA /* Assets.xcassets */; };
+		705F68D42B820AD200437FAA /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 705F68D32B820AD200437FAA /* Preview Assets.xcassets */; };
+		705F68DC2B83E80400437FAA /* FilePicker.swift in Sources */ = {isa = PBXBuildFile; fileRef = 705F68DB2B83E80400437FAA /* FilePicker.swift */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXFileReference section */
+		705F68C92B820AD100437FAA /* kram-profile.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "kram-profile.app"; sourceTree = BUILT_PRODUCTS_DIR; };
+		705F68CC2B820AD100437FAA /* kram_profileApp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = kram_profileApp.swift; sourceTree = "<group>"; };
+		705F68CE2B820AD100437FAA /* FileList.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FileList.swift; sourceTree = "<group>"; };
+		705F68D02B820AD200437FAA /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
+		705F68D32B820AD200437FAA /* Preview Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = "Preview Assets.xcassets"; sourceTree = "<group>"; };
+		705F68D52B820AD200437FAA /* kram_profile.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = kram_profile.entitlements; sourceTree = "<group>"; };
+		705F68DB2B83E80400437FAA /* FilePicker.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FilePicker.swift; sourceTree = "<group>"; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+		705F68C62B820AD100437FAA /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+		705F68C02B820AD100437FAA = {
+			isa = PBXGroup;
+			children = (
+				705F68CB2B820AD100437FAA /* kram-profile */,
+				705F68CA2B820AD100437FAA /* Products */,
+			);
+			sourceTree = "<group>";
+		};
+		705F68CA2B820AD100437FAA /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				705F68C92B820AD100437FAA /* kram-profile.app */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
+		705F68CB2B820AD100437FAA /* kram-profile */ = {
+			isa = PBXGroup;
+			children = (
+				705F68CC2B820AD100437FAA /* kram_profileApp.swift */,
+				705F68CE2B820AD100437FAA /* FileList.swift */,
+				705F68DB2B83E80400437FAA /* FilePicker.swift */,
+				705F68D02B820AD200437FAA /* Assets.xcassets */,
+				705F68D52B820AD200437FAA /* kram_profile.entitlements */,
+				705F68D22B820AD200437FAA /* Preview Content */,
+			);
+			path = "kram-profile";
+			sourceTree = "<group>";
+		};
+		705F68D22B820AD200437FAA /* Preview Content */ = {
+			isa = PBXGroup;
+			children = (
+				705F68D32B820AD200437FAA /* Preview Assets.xcassets */,
+			);
+			path = "Preview Content";
+			sourceTree = "<group>";
+		};
+/* End PBXGroup section */
+
+/* Begin PBXNativeTarget section */
+		705F68C82B820AD100437FAA /* kram-profile */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 705F68D82B820AD200437FAA /* Build configuration list for PBXNativeTarget "kram-profile" */;
+			buildPhases = (
+				705F68C52B820AD100437FAA /* Sources */,
+				705F68C62B820AD100437FAA /* Frameworks */,
+				705F68C72B820AD100437FAA /* Resources */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = "kram-profile";
+			productName = "kram-profile";
+			productReference = 705F68C92B820AD100437FAA /* kram-profile.app */;
+			productType = "com.apple.product-type.application";
+		};
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+		705F68C12B820AD100437FAA /* Project object */ = {
+			isa = PBXProject;
+			attributes = {
+				BuildIndependentTargetsInParallel = 1;
+				LastSwiftUpdateCheck = 1520;
+				LastUpgradeCheck = 1520;
+				TargetAttributes = {
+					705F68C82B820AD100437FAA = {
+						CreatedOnToolsVersion = 15.2;
+					};
+				};
+			};
+			buildConfigurationList = 705F68C42B820AD100437FAA /* Build configuration list for PBXProject "kram-profile" */;
+			compatibilityVersion = "Xcode 14.0";
+			developmentRegion = en;
+			hasScannedForEncodings = 0;
+			knownRegions = (
+				en,
+				Base,
+			);
+			mainGroup = 705F68C02B820AD100437FAA;
+			productRefGroup = 705F68CA2B820AD100437FAA /* Products */;
+			projectDirPath = "";
+			projectRoot = "";
+			targets = (
+				705F68C82B820AD100437FAA /* kram-profile */,
+			);
+		};
+/* End PBXProject section */
+
+/* Begin PBXResourcesBuildPhase section */
+		705F68C72B820AD100437FAA /* Resources */ = {
+			isa = PBXResourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				705F68D42B820AD200437FAA /* Preview Assets.xcassets in Resources */,
+				705F68D12B820AD200437FAA /* Assets.xcassets in Resources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXResourcesBuildPhase section */
+
+/* Begin PBXSourcesBuildPhase section */
+		705F68C52B820AD100437FAA /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				705F68DC2B83E80400437FAA /* FilePicker.swift in Sources */,
+				705F68CF2B820AD100437FAA /* FileList.swift in Sources */,
+				705F68CD2B820AD100437FAA /* kram_profileApp.swift in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXSourcesBuildPhase section */
+
+/* Begin XCBuildConfiguration section */
+		705F68D62B820AD200437FAA /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
+				CLANG_ANALYZER_NONNULL = YES;
+				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
+				CLANG_ENABLE_MODULES = YES;
+				CLANG_ENABLE_OBJC_ARC = YES;
+				CLANG_ENABLE_OBJC_WEAK = YES;
+				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_COMMA = YES;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INFINITE_RECURSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
+				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+				CLANG_WARN_STRICT_PROTOTYPES = YES;
+				CLANG_WARN_SUSPICIOUS_MOVE = YES;
+				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
+				CLANG_WARN_UNREACHABLE_CODE = YES;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				COPY_PHASE_STRIP = NO;
+				DEBUG_INFORMATION_FORMAT = dwarf;
+				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				ENABLE_TESTABILITY = YES;
+				ENABLE_USER_SCRIPT_SANDBOXING = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu17;
+				GCC_DYNAMIC_NO_PIC = NO;
+				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_OPTIMIZATION_LEVEL = 0;
+				GCC_PREPROCESSOR_DEFINITIONS = (
+					"DEBUG=1",
+					"$(inherited)",
+				);
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				LOCALIZATION_PREFERS_STRING_CATALOGS = YES;
+				MACOSX_DEPLOYMENT_TARGET = 13.0;
+				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
+				MTL_FAST_MATH = YES;
+				ONLY_ACTIVE_ARCH = YES;
+				SDKROOT = macosx;
+				SWIFT_ACTIVE_COMPILATION_CONDITIONS = "DEBUG $(inherited)";
+				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
+			};
+			name = Debug;
+		};
+		705F68D72B820AD200437FAA /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
+				CLANG_ANALYZER_NONNULL = YES;
+				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
+				CLANG_ENABLE_MODULES = YES;
+				CLANG_ENABLE_OBJC_ARC = YES;
+				CLANG_ENABLE_OBJC_WEAK = YES;
+				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_COMMA = YES;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INFINITE_RECURSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
+				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+				CLANG_WARN_STRICT_PROTOTYPES = YES;
+				CLANG_WARN_SUSPICIOUS_MOVE = YES;
+				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
+				CLANG_WARN_UNREACHABLE_CODE = YES;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				COPY_PHASE_STRIP = NO;
+				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+				ENABLE_NS_ASSERTIONS = NO;
+				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				ENABLE_USER_SCRIPT_SANDBOXING = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu17;
+				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				LOCALIZATION_PREFERS_STRING_CATALOGS = YES;
+				MACOSX_DEPLOYMENT_TARGET = 13.0;
+				MTL_ENABLE_DEBUG_INFO = NO;
+				MTL_FAST_MATH = YES;
+				SDKROOT = macosx;
+				SWIFT_COMPILATION_MODE = wholemodule;
+			};
+			name = Release;
+		};
+		705F68D92B820AD200437FAA /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
+				ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
+				CODE_SIGN_ENTITLEMENTS = "kram-profile/kram_profile.entitlements";
+				CODE_SIGN_STYLE = Automatic;
+				COMBINE_HIDPI_IMAGES = YES;
+				CURRENT_PROJECT_VERSION = 1;
+				DEVELOPMENT_ASSET_PATHS = "\"kram-profile/Preview Content\"";
+				ENABLE_PREVIEWS = YES;
+				GENERATE_INFOPLIST_FILE = YES;
+				INFOPLIST_KEY_NSHumanReadableCopyright = "";
+				LD_RUNPATH_SEARCH_PATHS = (
+					"$(inherited)",
+					"@executable_path/../Frameworks",
+				);
+				MARKETING_VERSION = 1.0;
+				PRODUCT_BUNDLE_IDENTIFIER = "com.hialec.kram-profile";
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				SWIFT_EMIT_LOC_STRINGS = YES;
+				SWIFT_VERSION = 5.0;
+			};
+			name = Debug;
+		};
+		705F68DA2B820AD200437FAA /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
+				ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
+				CODE_SIGN_ENTITLEMENTS = "kram-profile/kram_profile.entitlements";
+				CODE_SIGN_STYLE = Automatic;
+				COMBINE_HIDPI_IMAGES = YES;
+				CURRENT_PROJECT_VERSION = 1;
+				DEVELOPMENT_ASSET_PATHS = "\"kram-profile/Preview Content\"";
+				ENABLE_PREVIEWS = YES;
+				GENERATE_INFOPLIST_FILE = YES;
+				INFOPLIST_KEY_NSHumanReadableCopyright = "";
+				LD_RUNPATH_SEARCH_PATHS = (
+					"$(inherited)",
+					"@executable_path/../Frameworks",
+				);
+				MARKETING_VERSION = 1.0;
+				PRODUCT_BUNDLE_IDENTIFIER = "com.hialec.kram-profile";
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				SWIFT_EMIT_LOC_STRINGS = YES;
+				SWIFT_VERSION = 5.0;
+			};
+			name = Release;
+		};
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+		705F68C42B820AD100437FAA /* Build configuration list for PBXProject "kram-profile" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				705F68D62B820AD200437FAA /* Debug */,
+				705F68D72B820AD200437FAA /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		705F68D82B820AD200437FAA /* Build configuration list for PBXNativeTarget "kram-profile" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				705F68D92B820AD200437FAA /* Debug */,
+				705F68DA2B820AD200437FAA /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+/* End XCConfigurationList section */
+	};
+	rootObject = 705F68C12B820AD100437FAA /* Project object */;
+}
diff --git a/kram-profile/kram-profile/Assets.xcassets/AccentColor.colorset/Contents.json b/kram-profile/kram-profile/Assets.xcassets/AccentColor.colorset/Contents.json
new file mode 100644
index 00000000..eb878970
--- /dev/null
+++ b/kram-profile/kram-profile/Assets.xcassets/AccentColor.colorset/Contents.json
@@ -0,0 +1,11 @@
+{
+  "colors" : [
+    {
+      "idiom" : "universal"
+    }
+  ],
+  "info" : {
+    "author" : "xcode",
+    "version" : 1
+  }
+}
diff --git a/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Contents.json b/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Contents.json
new file mode 100644
index 00000000..3f00db43
--- /dev/null
+++ b/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Contents.json
@@ -0,0 +1,58 @@
+{
+  "images" : [
+    {
+      "idiom" : "mac",
+      "scale" : "1x",
+      "size" : "16x16"
+    },
+    {
+      "idiom" : "mac",
+      "scale" : "2x",
+      "size" : "16x16"
+    },
+    {
+      "idiom" : "mac",
+      "scale" : "1x",
+      "size" : "32x32"
+    },
+    {
+      "idiom" : "mac",
+      "scale" : "2x",
+      "size" : "32x32"
+    },
+    {
+      "idiom" : "mac",
+      "scale" : "1x",
+      "size" : "128x128"
+    },
+    {
+      "idiom" : "mac",
+      "scale" : "2x",
+      "size" : "128x128"
+    },
+    {
+      "idiom" : "mac",
+      "scale" : "1x",
+      "size" : "256x256"
+    },
+    {
+      "idiom" : "mac",
+      "scale" : "2x",
+      "size" : "256x256"
+    },
+    {
+      "idiom" : "mac",
+      "scale" : "1x",
+      "size" : "512x512"
+    },
+    {
+      "idiom" : "mac",
+      "scale" : "2x",
+      "size" : "512x512"
+    }
+  ],
+  "info" : {
+    "author" : "xcode",
+    "version" : 1
+  }
+}
diff --git a/kram-profile/kram-profile/Assets.xcassets/Contents.json b/kram-profile/kram-profile/Assets.xcassets/Contents.json
new file mode 100644
index 00000000..73c00596
--- /dev/null
+++ b/kram-profile/kram-profile/Assets.xcassets/Contents.json
@@ -0,0 +1,6 @@
+{
+  "info" : {
+    "author" : "xcode",
+    "version" : 1
+  }
+}
diff --git a/kram-profile/kram-profile/FileList.swift b/kram-profile/kram-profile/FileList.swift
new file mode 100644
index 00000000..a893e037
--- /dev/null
+++ b/kram-profile/kram-profile/FileList.swift
@@ -0,0 +1,88 @@
+//
+//  ContentView.swift
+//  kram-profile
+//
+//  Created by Alec on 2/18/24.
+//
+
+import SwiftUI
+
+/* generic template for lists
+struct EditableList<Element: Identifiable, Content: View>: View {
+    @Binding var data: [Element]
+    var content: (Binding<Element>) -> Content
+
+    init(_ data: Binding<[Element]>,
+         content: @escaping (Binding<Element>) -> Content) {
+        self._data = data
+        self.content = content
+    }
+
+    var body: some View {
+        List {
+            ForEach($data, content: content)
+                .onMove { indexSet, offset in
+                    data.move(fromOffsets: indexSet, toOffset: offset)
+                }
+                .onDelete { indexSet in
+                    data.remove(atOffsets: indexSet)
+                }
+        }
+        // macOS doesn't have EditButton()
+        //.toolbar { EditButton() }
+    }
+}
+*/
+ 
+/*
+struct FileList: View {
+    
+    @Binding var items : [File]
+    
+    // https://www.swiftbysundell.com/articles/building-editable-swiftui-lists/
+    
+    // want to be able to obtain the selection from the list
+    // or else need to define NavigationList items
+    // this is same as id type
+    @State public var selection: String?
+    
+    //public typealias SelectionCompletionHandler = (_ selection: String?) -> Void
+    //public let selectionCompletionHandler: SelectionCompletionHandler
+    
+//    public init(items: [File], onSelected completionHandler: @escaping SelectionCompletionHandler) {
+//        self.items = items
+//        self.selection = nil
+//        self.selectionCompletionHandler = completionHandler
+//    }
+    
+    var body: some View {
+        NavigationSplitView {
+            List(items, selection:$selection) { item in
+                Text(item.name)
+            }
+        }
+        detail: {
+            EmployeeDetails(for: employeeIds)
+        }
+    }
+}
+*/
+
+// These apply to ListItems?
+//        .onMove { indexSet, offset in
+//            items.move(fromOffsets: indexSet, toOffset: offset)
+//        }
+//        .onDelete { indexSet in
+//            items.remove(atOffsets: indexSet)
+//        }
+//        .onChange(of: selection, perform: { newValue in
+//            selectionCompletionHandler(newValue)
+//        })
+
+// This is pretty painful to fix.  Why is this so hard
+// Wrapper classes online.
+//#Preview {
+//    @State var files = [File(url: URL(string:"file://test")!)]
+//    FileList(items:$files)
+//}
+
diff --git a/kram-profile/kram-profile/FilePicker.swift b/kram-profile/kram-profile/FilePicker.swift
new file mode 100644
index 00000000..59dee56c
--- /dev/null
+++ b/kram-profile/kram-profile/FilePicker.swift
@@ -0,0 +1,84 @@
+//  FilePicker.swift
+//
+//  MIT License
+//
+//  Copyright (c) 2021 Mark Renaud
+//
+//  Permission is hereby granted, free of charge, to any person obtaining a copy
+//  of this software and associated documentation files (the "Software"), to deal
+//  in the Software without restriction, including without limitation the rights
+//  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+//  copies of the Software, and to permit persons to whom the Software is
+//  furnished to do so, subject to the following conditions:
+//
+//  The above copyright notice and this permission notice shall be included in all
+//  copies or substantial portions of the Software.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+//  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+//  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+//  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+//  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+//  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+//  SOFTWARE.
+
+// from https://github.com/markrenaud/FilePicker/blob/main/Sources/FilePicker/FilePicker.swift
+// That version include iOS support, but I don't need that.
+import SwiftUI
+import UniformTypeIdentifiers
+
+public struct FilePicker<LabelView: View>: View {
+    
+    public typealias PickedURLsCompletionHandler = (_ urls: [URL]) -> Void
+    public typealias LabelViewContent = () -> LabelView
+    
+    // only allow one panel up at a time.  But Panel shhould already enforce?
+    @State private var isPresented: Bool = false
+    
+    public let types: [UTType]
+    //public let allowMultiple: Bool
+    public let pickedCompletionHandler: PickedURLsCompletionHandler
+    public let labelViewContent: LabelViewContent
+    
+    public init(types: [UTType], /* allowMultiple: Bool, */ onPicked completionHandler: @escaping PickedURLsCompletionHandler, @ViewBuilder label labelViewContent: @escaping LabelViewContent) {
+        self.types = types
+        //self.allowMultiple = allowMultiple
+        self.pickedCompletionHandler = completionHandler
+        self.labelViewContent = labelViewContent
+    }
+
+    public init(types: [UTType], /*allowMultiple: Bool,*/ title: String, onPicked completionHandler: @escaping PickedURLsCompletionHandler) where LabelView == Text {
+        self.init(types: types, /*allowMultiple: allowMultiple,*/ onPicked: completionHandler) { Text(title) }
+    }
+
+    public var body: some View {
+        // Move the button into a Menu option.  action should be able to fire this.
+        Button(
+            action: {
+                if !isPresented { isPresented = true }
+            },
+            label: {
+                labelViewContent()
+            }
+        )
+        .keyboardShortcut("o") // open with cmd+O
+        .disabled(isPresented)
+        .onChange(of: isPresented, perform: { presented in
+            // binding changed from false to true
+            if presented == true {
+                let panel = NSOpenPanel()
+                panel.allowsMultipleSelection = false
+                panel.canChooseDirectories = true
+                panel.canChooseFiles = true
+                panel.allowedContentTypes = types.map { $0 }
+                panel.begin { reponse in
+                    if reponse == .OK {
+                        pickedCompletionHandler(panel.urls)
+                    }
+                    // reset the isPresented variable to false
+                    isPresented = false
+               }
+            }
+        })
+    }
+}
diff --git a/kram-profile/kram-profile/Preview Content/Preview Assets.xcassets/Contents.json b/kram-profile/kram-profile/Preview Content/Preview Assets.xcassets/Contents.json
new file mode 100644
index 00000000..73c00596
--- /dev/null
+++ b/kram-profile/kram-profile/Preview Content/Preview Assets.xcassets/Contents.json	
@@ -0,0 +1,6 @@
+{
+  "info" : {
+    "author" : "xcode",
+    "version" : 1
+  }
+}
diff --git a/kram-profile/kram-profile/kram_profile.entitlements b/kram-profile/kram-profile/kram_profile.entitlements
new file mode 100644
index 00000000..625af03d
--- /dev/null
+++ b/kram-profile/kram-profile/kram_profile.entitlements
@@ -0,0 +1,12 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>com.apple.security.app-sandbox</key>
+	<true/>
+	<key>com.apple.security.files.user-selected.read-only</key>
+	<true/>
+	<key>com.apple.security.network.client</key>
+	<true/>
+</dict>
+</plist>
diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
new file mode 100644
index 00000000..8b49af48
--- /dev/null
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -0,0 +1,485 @@
+//
+//  kram_profileApp.swift
+//  kram-profile
+//
+//  Created by Alec on 2/18/24.
+//
+
+import SwiftUI
+import WebKit
+
+// https://github.com/gualtierofrigerio/WkWebViewJavascript/blob/master/WkWebViewJavascript/WebViewHandler.swift
+
+// https://levelup.gitconnected.com/how-to-use-wkwebview-on-mac-with-swiftui-10266989ed11
+// Signing & Capabilites set App Sanbox (allow outgoing connections)
+
+// This is really just a wrapper to turn WKWebView into something SwiftUI
+// can interop with.  SwiftUI has not browser widget.
+
+struct File: Identifiable, Hashable
+{
+    var id: String { url.absoluteString }
+    var name: String { url.lastPathComponent }
+    let url: URL
+}
+
+struct MyWKWebView : NSViewRepresentable {
+    // This is set by caller to the url for the request
+    let webView: WKWebView
+    let request: URLRequest
+    let selection: String?
+    let firstRequest: Bool
+    
+    func makeNSView(context: Context) -> WKWebView {
+        // TODO: hook this up, but need WKScriptMessageHandler protocl
+        //configuration.userContentController.addScriptMessageHandler(..., name: "postMessageListener")
+        return webView
+    }
+    
+    func userContentController(_ userContentController: WKUserContentController, didReceive message: WKScriptMessage) {
+       if message.name == "postMessageListener" {
+           // Manage your Data
+       }
+    }
+    
+    func updateNSView(_ webView: WKWebView, context: Context) {
+        // DONE: skip reloading the request, it doesn't change?
+        // TODO: add a reload button for the current selection, since Perfetto can't reload
+        
+        // The first selection loads, but then subsequent do not
+        if firstRequest {
+            webView.load(request)
+        }
+        
+        // really need to ping until ui is loaded
+        //sleep(2)
+        
+        if selection != nil {
+            loadFile(webView, selection!)
+        }
+    }
+}
+
+//#Preview {
+//    MyWKWebView()
+//}
+
+// See here about Navigation API
+// https://developer.apple.com/videos/play/wwdc2022/10054/
+
+// This is how open_trace_in_ui.py tells the browser to open a file
+// http://ui.perfetto.dev/#!/?url=http://127.0.0.1:9001/{fname}
+// Then the http server serves up that file to the browser and sets Allow-Origin header.
+
+// has to be https to work for some reason, but all data is previewed locally
+var ORIGIN = "https://ui.perfetto.dev"
+
+// https://gist.github.com/pwightman/64c57076b89c5d7f8e8c
+extension String {
+    var javaScriptEscapedString: String {
+        // Because JSON is not a subset of JavaScript, the LINE_SEPARATOR and PARAGRAPH_SEPARATOR unicode
+        // characters embedded in (valid) JSON will cause the webview's JavaScript parser to error. So we
+        // must encode them first. See here: http://timelessrepo.com/json-isnt-a-javascript-subset
+        // Also here: http://media.giphy.com/media/wloGlwOXKijy8/giphy.gif
+        let str = self.replacingOccurrences(of: "\u{2028}", with: "\\u2028")
+                      .replacingOccurrences(of: "\u{2029}", with: "\\u2029")
+        // Because escaping JavaScript is a non-trivial task (https://github.com/johnezang/JSONKit/blob/master/JSONKit.m#L1423)
+        // we proceed to hax instead:
+        do {
+            let encoder = JSONEncoder()
+            let data = try encoder.encode([str])
+            let encodedString = String(decoding: data, as: UTF8.self)
+            
+            // drop surrounding {}?
+            return String(encodedString.dropLast().dropFirst())
+        } catch {
+            return self
+        }
+    }
+}
+
+func loadFile(_ webView: WKWebView, _ path: String) /*async*/ {
+    
+    let fileURL = URL(string: path)!
+    
+    print(path)
+    
+    
+    // https://stackoverflow.com/questions/62035494/how-to-call-postmessage-in-wkwebview-to-js
+    /* need to PostMessage a json object with the folloinwg
+     {
+     'perfetto': {
+     buffer: ArrayBuffer; // content of the file
+     title: string;
+     fileName?: string;   // Optional
+     url?: string;        // Optional
+     }
+     }
+     */
+    
+    //open func evaluateJavaScript(_ javaScriptString: String, completionHandler: ((Any?, Error?) -> Void)? = nil)
+    //
+    //open func evaluateJavaScript(_ javaScriptString: String) async throws -> Any
+    
+    struct PerfettoFile : Codable {
+        var buffer: String // really ArrayBuffer, but will get replaced
+        var title: String
+        
+        // optional fields
+        //var fileName: String?
+        //var url: String?
+    }
+    
+    struct Perfetto : Codable {
+        var perfetto: PerfettoFile
+    }
+    
+    do {
+        // Data converted to an Array[UInt8] which isn't quite same
+        // as an ArrayBuffer.  Can ArrayBuffer be serialized.  Does
+        // it have a count or what is it exactly.  See if Perfetto
+        // can take a String in the packet instead.
+        
+        let fileContent = try Data(contentsOf: fileURL)
+        
+        let fileContentBase64 = fileContent.base64EncodedString()
+        
+        // perfetto says they don't take file:// urls
+        let file = PerfettoFile(buffer: "",
+                                title: fileURL.standardizedFileURL.lastPathComponent)
+        let perfetto = Perfetto(perfetto: file)
+        
+        var perfettoEncode = ""
+        
+        if true {
+            let encoder = JSONEncoder()
+            let data = try encoder.encode(perfetto)
+            let encodedString = String(decoding: data, as: UTF8.self)
+            // TODO: this is droppping {} ?
+            perfettoEncode = String(encodedString.dropLast().dropFirst())
+            perfettoEncode = perfettoEncode.replacingOccurrences(of: "\u{2028}", with: "\\u2028")
+                .replacingOccurrences(of: "\u{2029}", with: "\\u2029")
+        }
+        
+        // https://developer.mozilla.org/en-US/docs/Glossary/Base64#the_unicode_problem
+        //https://stackoverflow.com/questions/30106476/using-javascripts-atob-to-decode-base64-doesnt-properly-decode-utf-8-strings
+        
+        let json = """
+        
+        //function bytesToBase64(bytes) {
+        //  const binString = String.fromCodePoint(...bytes);
+        //  return btoa(binString);
+    
+        function base64ToBytes(base64) {
+          const binString = atob(base64);
+          return Uint8Array.from(binString, (m) => m.codePointAt(0));
+        }
+        
+        //location.reload();
+                
+        var fileData = '\(fileContentBase64)';
+        
+        // convert from string -> Uint8Array -> ArrayBuffer
+        var obj = JSON.parse('{\(perfettoEncode)}');
+        
+        // convert base64 back
+        obj.perfetto.buffer = base64ToBytes(fileData).buffer;
+        
+        // How to set this command, it's only available onTraceLoad()
+        // but calls through to a sidebar object on the ctx
+        //window.postMessage('dev.perfetto.CoreCommands#ToggleLeftSidebar','\(ORIGIN)')
+    
+        window.postMessage(obj,'\(ORIGIN)');
+    """
+       
+    /*
+    """
+        // this isn't working
+        function openTrace(obj)
+        {
+            // https://jsfiddle.net/vrsofx1p/
+            const timer = setInterval(() => window.postMessage('PING', '\(ORIGIN)'), 50);
+            
+            const onMessageHandler = (evt) => {
+                if (evt.data !== 'PONG') return;
+                
+                // We got a PONG, the UI is ready.
+                window.clearInterval(timer);
+                window.removeEventListener('message', onMessageHandler);
+                
+                window.postMessage(obj,'\(ORIGIN)');
+            }
+        }
+        
+        openTrace(obj);
+        """
+      */
+        
+        /*
+        // Data or String instead of ArrayBuffer, Data seems more appropo
+        var fileContent = try String(contentsOf: fileURL)
+        //let fileContent = try Data(contentsOf: fileURL) // use for binary
+        
+        // code does show a way to respond to ArrayBuffer
+        //let encoder = JSONEncoder()
+        //let data = try encoder.encode(fileContent)
+        // var json = String(data: data, encoding: .utf8)!
+        
+        let file = PerfettoFile(buffer: "",
+                                title: fileURL.standardizedFileURL.lastPathComponent)
+        let perfetto = Perfetto(perfetto: file)
+        
+        let encoder = JSONEncoder()
+        //encoder.outputFormatting = .prettyPrinted
+       
+        let data = try encoder.encode([perfetto])
+        let dataEncodedString = String(decoding: data, as: UTF8.self)
+        let dataEscaped = String(dataEncodedString.dropLast().dropFirst())
+
+        // This is to avoid JSON.stringify
+        // var json = String(data: data, encoding: .utf8)!
+         
+        // someone was doing this before
+        //json = json.replacingOccurrences(of: "\n", with: "\\\n")
+        
+       
+        //print(json)
+
+        // https://gist.github.com/chromy/170c11ce30d9084957d7f3aa065e89f8
+        // need to post this JavaScript
+        
+        // https://stackoverflow.com/questions/32113933/how-do-i-pass-a-swift-object-to-javascript-wkwebview-swift
+        
+        
+        // There is this too, added to the URL to reopen it
+        // const reopenUrl = new URL(location.href);
+        // reopenUrl.hash = `#reopen=${traceUrl}`;
+        
+        // doesn't have error w/Data but doesn't do anything
+        //json = "window.postMessage(" + json
+        //json += ", \"" + ORIGIN + "\")"
+    
+        //json = json = "window.postMessage(" + json
+        //json += ", \"" + ORIGIN + "\")"
+        
+        // This encodes to Data
+        
+        // someone was doing this before
+        //var fileContentString = String(data: fileContent, encoding: .utf8)!
+        
+        // https://gist.github.com/pwightman/64c57076b89c5d7f8e8c
+        // Because JSON is not a subset of JavaScript, the LINE_SEPARATOR and PARAGRAPH_SEPARATOR unicode
+                // characters embedded in (valid) JSON will cause the webview's JavaScript parser to error. So we
+                // must encode them first. See here: http://timelessrepo.com/json-isnt-a-javascript-subset
+                // Also here: http://media.giphy.com/media/wloGlwOXKijy8/giphy.gif
+        fileContent = fileContent.replacingOccurrences(of: "\n", with: "\\\n")
+        
+        fileContent = fileContent
+                .replacingOccurrences(of: "\u{2028}", with: "\\u2028")
+                .replacingOccurrences(of: "\u{2029}", with: "\\u2029")
+                
+        // now escape the string
+        //let encoder = JSONEncoder()
+        let fileContentData = try encoder.encode([fileContent])
+        let encodedString = String(decoding: fileContentData, as: UTF8.self)
+        let fileContentEscaped = String(encodedString.dropLast().dropFirst())
+
+        // this is a string
+        var json = ""
+        json += "var fileData = " + fileContentEscaped + ";"
+       
+        // convert from string -> Uint8Array -> ArrayBuffer
+        json += "var perfetto = " + dataEscaped + ";"
+        json += "var enc = new TextEncoder();"
+        json += "perfetto.buffer = enc.encode(fileData).buffer;"
+        json += "window.postMessage(JSON.parse(perfetto), \"" + ORIGIN + "\");"
+    
+        //json = "window.postMessage(JSON.parse(" + json
+        //json += "), \"" + ORIGIN + "\")"
+        
+        // https://stackoverflow.com/questions/37820666/how-can-i-send-data-from-swift-to-javascript-and-display-them-in-my-web-view
+        // just make sure the JSON string doesn't contain unescaped newline characters. evaluateJavaScript won't work unless the newline characters are escaped.  Ugh!
+        // Also, in NSJSONSerialization don't use the .prettyPrinted option, use [] as per above, or it also won't work
+        
+ */
+        print(json)
+        
+        webView.evaluateJavaScript(json) { (result, error) in
+            if error != nil {
+                print("\(error)! \(result)")
+            }
+            else {
+                print("\(result)")
+            }
+        }
+
+        
+    } catch {
+     // handle error
+    }
+    
+    
+}
+
+func initWebView() -> WKWebView {
+    // set preference to run javascript on the view, can then do PostMessage
+    let preferences = WKPreferences()
+    //preferences.javaScriptEnabled = true
+    //preferences.allowGPUOptimizedContents = true
+    
+    let webpagePreferences = WKWebpagePreferences()
+    webpagePreferences.allowsContentJavaScript = true
+    
+    let configuration = WKWebViewConfiguration()
+    configuration.preferences = preferences
+    configuration.defaultWebpagePreferences = webpagePreferences
+    
+    // here frame is entire screen
+    let webView = WKWebView(frame: .zero, configuration: configuration)
+    
+    return webView
+}
+
+@main
+struct kram_profileApp: App {
+    @State private var files: [File] = []
+    @State private var selection: String?
+    @State private var firstRequest = true
+   
+    private var webView = initWebView()
+    
+    func isSupportedFilename(_ url: URL) -> Bool {
+      
+        // clang build files use genertic .json format
+        if url.pathExtension == "json" {
+            let filename = url.lastPathComponent
+            
+            // filter out some by name, so don't have to open files
+            if filename == "build-description.json" ||
+                filename == "build-request.json" ||
+                filename == "manifest.json"
+            {
+                return false
+            }
+            return true
+        }
+        // profiling
+        if url.pathExtension == "trace" {
+            return true
+        }
+        // memory
+        if url.pathExtension == "vmatrace" {
+            return true
+        }
+        return false
+    }
+    
+    func listFilesFromURL(_ url: URL) -> [File]
+    {
+        print("selected \(url)")
+        
+        // wipe them all
+        var files: [File] = []
+        
+        // now filter a list of all the files under the dir
+        if url.hasDirectoryPath {
+            // list out all matching files
+            // also these [.skipsHiddenFiles, .skipsSubdirectoryDescendants]
+            
+            // This doesn't default to recursive, see what kram does
+            let directoryEnumerator = FileManager.default.enumerator(
+                at: url,
+                includingPropertiesForKeys: nil
+                // options: [.skipsHiddenFiles]
+            )
+            
+            while let fileURL = directoryEnumerator?.nextObject() as? URL {
+                let isSupported = isSupportedFilename(fileURL)
+                if isSupported {
+                    files.append(File(url:fileURL));
+                }
+            }
+        }
+        else if url.isFileURL {
+            // TODO: list out zip archive
+        
+            let isSupported = isSupportedFilename(url)
+            if isSupported {
+                files.append(File(url:url))
+            }
+        }
+        
+        print("found \(files.count) files")
+        
+        return files
+    }
+    
+    func shortFilename(_ str: String) -> String {
+        let url = URL(string: str)!
+        return url.lastPathComponent
+    }
+    
+    var body: some Scene {
+        WindowGroup {
+            // Don't really like this behavior, want a panel to come up and not
+            // cause the main view to resize.  Also once collapsed, it's unclear
+            // how to get NavigationView back, and app restartes collapsed.
+            // See what I did in other tools.
+            
+            NavigationSplitView {
+                VStack {
+                    // TODO: have files ending in -vma.trace, .trace, and .json
+                    // also archives in the zip file.
+                    
+                    // TODO: turn Open button into a menu, FilePicker already tied to Cmd+O
+                    
+                    FilePicker(types: [/*.plainText*/ .json /*, .zip */], title: "Open") { urls in
+                        // TODO: not allowing multiselect for now
+                        // but can pick dir or archive
+                        // Do a filtered search under that
+                        // for appropriate files.  And then
+                        // track if picked again which files
+                        // changed.
+                        
+                        //self.filenames = urls
+                        if urls.count == 1 {
+                            let filesNew = listFilesFromURL(urls[0])
+                            
+                            // for now wipe the old list
+                            if filesNew.count > 0 {
+                                files = filesNew
+                            }
+                        }
+                        
+                        // Not sure where to set this false, so do it here
+                        // this is to avoid reloading the request
+                        firstRequest = false
+                    }
+                    
+                    List(files, selection:$selection) { file in
+                        Text(file.name)
+                    }
+                }
+            }
+            detail: {
+                // This is a pretty easy way to make Safari open to a link
+                // But really want that embedded into app.   Assuming Perfetto
+                // runs under Safari embedded WKWebView.
+                // Link("Perfetto", destination: URL(string: ORIGIN)!)
+                
+                // TODO: really only need to load this once, url doesn't chang w/selection
+                MyWKWebView(webView:webView, request: URLRequest(url:URL(string: ORIGIN)!), selection:selection, firstRequest:firstRequest)
+                
+            }
+            .navigationTitle(selection != nil ? shortFilename(selection!) : "")
+        }
+    }
+    
+    // https://stackoverflow.com/questions/49882933/pass-jsonobject-from-swift4-to-wkwebview-javascript-function
+    
+    // all need a list of files, and File open dialog
+    // For a user folder, find all clang.json files, .trace files, and vma.json files
+    // and then need to send the selected one over to MyWKWebView so it can open
+    // the data from posting a message to the page.
+}
+

From 9d7da337881dce3265272da4d0dccba1a53eccee Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 21 Feb 2024 10:37:37 -0800
Subject: [PATCH 578/901] kram-profile - cleanup mess

---
 .../kram-profile/kram_profileApp.swift        | 136 +++---------------
 1 file changed, 22 insertions(+), 114 deletions(-)

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 8b49af48..42486ba6 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -71,6 +71,15 @@ struct MyWKWebView : NSViewRepresentable {
 // http://ui.perfetto.dev/#!/?url=http://127.0.0.1:9001/{fname}
 // Then the http server serves up that file to the browser and sets Allow-Origin header.
 
+// https://developer.mozilla.org/en-US/docs/Glossary/Base64#the_unicode_problem
+//https://stackoverflow.com/questions/30106476/using-javascripts-atob-to-decode-base64-doesnt-properly-decode-utf-8-strings
+// https://gist.github.com/chromy/170c11ce30d9084957d7f3aa065e89f8
+// need to post this JavaScript
+
+// https://stackoverflow.com/questions/32113933/how-do-i-pass-a-swift-object-to-javascript-wkwebview-swift
+
+// https://stackoverflow.com/questions/37820666/how-can-i-send-data-from-swift-to-javascript-and-display-them-in-my-web-view
+
 // has to be https to work for some reason, but all data is previewed locally
 var ORIGIN = "https://ui.perfetto.dev"
 
@@ -106,27 +115,16 @@ func loadFile(_ webView: WKWebView, _ path: String) /*async*/ {
     
     
     // https://stackoverflow.com/questions/62035494/how-to-call-postmessage-in-wkwebview-to-js
-    /* need to PostMessage a json object with the folloinwg
-     {
-     'perfetto': {
-     buffer: ArrayBuffer; // content of the file
-     title: string;
-     fileName?: string;   // Optional
-     url?: string;        // Optional
-     }
-     }
-     */
-    
-    //open func evaluateJavaScript(_ javaScriptString: String, completionHandler: ((Any?, Error?) -> Void)? = nil)
-    //
-    //open func evaluateJavaScript(_ javaScriptString: String) async throws -> Any
-    
     struct PerfettoFile : Codable {
         var buffer: String // really ArrayBuffer, but will get replaced
         var title: String
         
+        // About keepApiOpen
+        // https://github.com/flutter/devtools/blob/master/packages/devtools_app/lib/src/screens/performance/panes/timeline_events/perfetto/_perfetto_web.dart#L174
+        var keepApiOpen: Bool
         // optional fields
         //var fileName: String?
+        // url cannot be file://, has to be http served
         //var url: String?
     }
     
@@ -135,18 +133,18 @@ func loadFile(_ webView: WKWebView, _ path: String) /*async*/ {
     }
     
     do {
-        // Data converted to an Array[UInt8] which isn't quite same
-        // as an ArrayBuffer.  Can ArrayBuffer be serialized.  Does
-        // it have a count or what is it exactly.  See if Perfetto
-        // can take a String in the packet instead.
-        
         let fileContent = try Data(contentsOf: fileURL)
         
+        // TODO: here need to fixup clang json, this neesd SOURCE replaced with
+        // the short filename version of the detail (full filename)
+        
+        
         let fileContentBase64 = fileContent.base64EncodedString()
         
-        // perfetto says they don't take file:// urls
+        
         let file = PerfettoFile(buffer: "",
-                                title: fileURL.standardizedFileURL.lastPathComponent)
+                                title: fileURL.lastPathComponent,
+                                keepApiOpen: true)
         let perfetto = Perfetto(perfetto: file)
         
         var perfettoEncode = ""
@@ -161,9 +159,6 @@ func loadFile(_ webView: WKWebView, _ path: String) /*async*/ {
                 .replacingOccurrences(of: "\u{2029}", with: "\\u2029")
         }
         
-        // https://developer.mozilla.org/en-US/docs/Glossary/Base64#the_unicode_problem
-        //https://stackoverflow.com/questions/30106476/using-javascripts-atob-to-decode-base64-doesnt-properly-decode-utf-8-strings
-        
         let json = """
         
         //function bytesToBase64(bytes) {
@@ -184,7 +179,7 @@ func loadFile(_ webView: WKWebView, _ path: String) /*async*/ {
         
         // convert base64 back
         obj.perfetto.buffer = base64ToBytes(fileData).buffer;
-        
+    
         // How to set this command, it's only available onTraceLoad()
         // but calls through to a sidebar object on the ctx
         //window.postMessage('dev.perfetto.CoreCommands#ToggleLeftSidebar','\(ORIGIN)')
@@ -215,94 +210,7 @@ func loadFile(_ webView: WKWebView, _ path: String) /*async*/ {
         """
       */
         
-        /*
-        // Data or String instead of ArrayBuffer, Data seems more appropo
-        var fileContent = try String(contentsOf: fileURL)
-        //let fileContent = try Data(contentsOf: fileURL) // use for binary
-        
-        // code does show a way to respond to ArrayBuffer
-        //let encoder = JSONEncoder()
-        //let data = try encoder.encode(fileContent)
-        // var json = String(data: data, encoding: .utf8)!
-        
-        let file = PerfettoFile(buffer: "",
-                                title: fileURL.standardizedFileURL.lastPathComponent)
-        let perfetto = Perfetto(perfetto: file)
-        
-        let encoder = JSONEncoder()
-        //encoder.outputFormatting = .prettyPrinted
-       
-        let data = try encoder.encode([perfetto])
-        let dataEncodedString = String(decoding: data, as: UTF8.self)
-        let dataEscaped = String(dataEncodedString.dropLast().dropFirst())
-
-        // This is to avoid JSON.stringify
-        // var json = String(data: data, encoding: .utf8)!
-         
-        // someone was doing this before
-        //json = json.replacingOccurrences(of: "\n", with: "\\\n")
-        
-       
-        //print(json)
-
-        // https://gist.github.com/chromy/170c11ce30d9084957d7f3aa065e89f8
-        // need to post this JavaScript
-        
-        // https://stackoverflow.com/questions/32113933/how-do-i-pass-a-swift-object-to-javascript-wkwebview-swift
-        
-        
-        // There is this too, added to the URL to reopen it
-        // const reopenUrl = new URL(location.href);
-        // reopenUrl.hash = `#reopen=${traceUrl}`;
-        
-        // doesn't have error w/Data but doesn't do anything
-        //json = "window.postMessage(" + json
-        //json += ", \"" + ORIGIN + "\")"
-    
-        //json = json = "window.postMessage(" + json
-        //json += ", \"" + ORIGIN + "\")"
-        
-        // This encodes to Data
-        
-        // someone was doing this before
-        //var fileContentString = String(data: fileContent, encoding: .utf8)!
-        
-        // https://gist.github.com/pwightman/64c57076b89c5d7f8e8c
-        // Because JSON is not a subset of JavaScript, the LINE_SEPARATOR and PARAGRAPH_SEPARATOR unicode
-                // characters embedded in (valid) JSON will cause the webview's JavaScript parser to error. So we
-                // must encode them first. See here: http://timelessrepo.com/json-isnt-a-javascript-subset
-                // Also here: http://media.giphy.com/media/wloGlwOXKijy8/giphy.gif
-        fileContent = fileContent.replacingOccurrences(of: "\n", with: "\\\n")
-        
-        fileContent = fileContent
-                .replacingOccurrences(of: "\u{2028}", with: "\\u2028")
-                .replacingOccurrences(of: "\u{2029}", with: "\\u2029")
-                
-        // now escape the string
-        //let encoder = JSONEncoder()
-        let fileContentData = try encoder.encode([fileContent])
-        let encodedString = String(decoding: fileContentData, as: UTF8.self)
-        let fileContentEscaped = String(encodedString.dropLast().dropFirst())
-
-        // this is a string
-        var json = ""
-        json += "var fileData = " + fileContentEscaped + ";"
-       
-        // convert from string -> Uint8Array -> ArrayBuffer
-        json += "var perfetto = " + dataEscaped + ";"
-        json += "var enc = new TextEncoder();"
-        json += "perfetto.buffer = enc.encode(fileData).buffer;"
-        json += "window.postMessage(JSON.parse(perfetto), \"" + ORIGIN + "\");"
-    
-        //json = "window.postMessage(JSON.parse(" + json
-        //json += "), \"" + ORIGIN + "\")"
-        
-        // https://stackoverflow.com/questions/37820666/how-can-i-send-data-from-swift-to-javascript-and-display-them-in-my-web-view
-        // just make sure the JSON string doesn't contain unescaped newline characters. evaluateJavaScript won't work unless the newline characters are escaped.  Ugh!
-        // Also, in NSJSONSerialization don't use the .prettyPrinted option, use [] as per above, or it also won't work
-        
- */
-        print(json)
+        // print(json)
         
         webView.evaluateJavaScript(json) { (result, error) in
             if error != nil {

From 8e1b7397f7fb6a159237191bbe44fb6dac4a0007 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 21 Feb 2024 13:09:38 -0800
Subject: [PATCH 579/901] kram-profile - suppress the sidebar

discussion here with Flutter dev
https://github.com/google/perfetto/issues/716
---
 kram-profile/kram-profile/FilePicker.swift      |  2 +-
 kram-profile/kram-profile/kram_profileApp.swift | 17 ++++++++++++++++-
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/kram-profile/kram-profile/FilePicker.swift b/kram-profile/kram-profile/FilePicker.swift
index 59dee56c..31ae13f4 100644
--- a/kram-profile/kram-profile/FilePicker.swift
+++ b/kram-profile/kram-profile/FilePicker.swift
@@ -61,7 +61,7 @@ public struct FilePicker<LabelView: View>: View {
                 labelViewContent()
             }
         )
-        .keyboardShortcut("o") // open with cmd+O
+        .keyboardShortcut("O") // open with cmd+O
         .disabled(isPresented)
         .onChange(of: isPresented, perform: { presented in
             // binding changed from false to true
diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 42486ba6..6e735631 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -375,12 +375,27 @@ struct kram_profileApp: App {
                 // runs under Safari embedded WKWebView.
                 // Link("Perfetto", destination: URL(string: ORIGIN)!)
                 
+                // About hideSidebar
+                // https://github.com/google/perfetto/issues/716
+                
                 // TODO: really only need to load this once, url doesn't chang w/selection
-                MyWKWebView(webView:webView, request: URLRequest(url:URL(string: ORIGIN)!), selection:selection, firstRequest:firstRequest)
+                MyWKWebView(webView:webView, request: URLRequest(url:URL(string: ORIGIN + "/?hideSidebar=true")!), selection:selection, firstRequest:firstRequest)
                 
             }
             .navigationTitle(selection != nil ? shortFilename(selection!) : "")
         }
+        /* TODO: This adds a competing File menu, want to add to existing one
+            Also may need to move Open button out of the picker.  It has a shortcut
+            But want macOS system menus to convey the shortcut to user.
+         
+        // https://forums.developer.apple.com/forums/thread/668139
+        .commands {
+            CommandMenu("File")
+            {
+                Button("Open") {} // .keyboardShortcut("o") { }
+            }
+        }
+        */
     }
     
     // https://stackoverflow.com/questions/49882933/pass-jsonobject-from-swift4-to-wkwebview-javascript-function

From eaf1743eeae469e5e31415a5ec5c7e45b4199a61 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 21 Feb 2024 14:52:44 -0800
Subject: [PATCH 580/901] kram-profile - add trace, vmatrace, json support

---
 .../kram-profile.xcodeproj/project.pbxproj    |  8 +-
 kram-profile/kram-profile/FileList.swift      | 87 -----------------
 kram-profile/kram-profile/Info.plist          | 95 +++++++++++++++++++
 .../kram-profile/kram_profileApp.swift        | 38 ++++++--
 4 files changed, 127 insertions(+), 101 deletions(-)
 create mode 100644 kram-profile/kram-profile/Info.plist

diff --git a/kram-profile/kram-profile.xcodeproj/project.pbxproj b/kram-profile/kram-profile.xcodeproj/project.pbxproj
index 52629b65..06d74ef3 100644
--- a/kram-profile/kram-profile.xcodeproj/project.pbxproj
+++ b/kram-profile/kram-profile.xcodeproj/project.pbxproj
@@ -8,7 +8,6 @@
 
 /* Begin PBXBuildFile section */
 		705F68CD2B820AD100437FAA /* kram_profileApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 705F68CC2B820AD100437FAA /* kram_profileApp.swift */; };
-		705F68CF2B820AD100437FAA /* FileList.swift in Sources */ = {isa = PBXBuildFile; fileRef = 705F68CE2B820AD100437FAA /* FileList.swift */; };
 		705F68D12B820AD200437FAA /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 705F68D02B820AD200437FAA /* Assets.xcassets */; };
 		705F68D42B820AD200437FAA /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 705F68D32B820AD200437FAA /* Preview Assets.xcassets */; };
 		705F68DC2B83E80400437FAA /* FilePicker.swift in Sources */ = {isa = PBXBuildFile; fileRef = 705F68DB2B83E80400437FAA /* FilePicker.swift */; };
@@ -17,11 +16,11 @@
 /* Begin PBXFileReference section */
 		705F68C92B820AD100437FAA /* kram-profile.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "kram-profile.app"; sourceTree = BUILT_PRODUCTS_DIR; };
 		705F68CC2B820AD100437FAA /* kram_profileApp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = kram_profileApp.swift; sourceTree = "<group>"; };
-		705F68CE2B820AD100437FAA /* FileList.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FileList.swift; sourceTree = "<group>"; };
 		705F68D02B820AD200437FAA /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
 		705F68D32B820AD200437FAA /* Preview Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = "Preview Assets.xcassets"; sourceTree = "<group>"; };
 		705F68D52B820AD200437FAA /* kram_profile.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = kram_profile.entitlements; sourceTree = "<group>"; };
 		705F68DB2B83E80400437FAA /* FilePicker.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FilePicker.swift; sourceTree = "<group>"; };
+		705F68DD2B86AB2000437FAA /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist; path = Info.plist; sourceTree = "<group>"; };
 /* End PBXFileReference section */
 
 /* Begin PBXFrameworksBuildPhase section */
@@ -54,8 +53,8 @@
 		705F68CB2B820AD100437FAA /* kram-profile */ = {
 			isa = PBXGroup;
 			children = (
+				705F68DD2B86AB2000437FAA /* Info.plist */,
 				705F68CC2B820AD100437FAA /* kram_profileApp.swift */,
-				705F68CE2B820AD100437FAA /* FileList.swift */,
 				705F68DB2B83E80400437FAA /* FilePicker.swift */,
 				705F68D02B820AD200437FAA /* Assets.xcassets */,
 				705F68D52B820AD200437FAA /* kram_profile.entitlements */,
@@ -143,7 +142,6 @@
 			buildActionMask = 2147483647;
 			files = (
 				705F68DC2B83E80400437FAA /* FilePicker.swift in Sources */,
-				705F68CF2B820AD100437FAA /* FileList.swift in Sources */,
 				705F68CD2B820AD100437FAA /* kram_profileApp.swift in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
@@ -281,6 +279,7 @@
 				DEVELOPMENT_ASSET_PATHS = "\"kram-profile/Preview Content\"";
 				ENABLE_PREVIEWS = YES;
 				GENERATE_INFOPLIST_FILE = YES;
+				INFOPLIST_FILE = "kram-profile/Info.plist";
 				INFOPLIST_KEY_NSHumanReadableCopyright = "";
 				LD_RUNPATH_SEARCH_PATHS = (
 					"$(inherited)",
@@ -306,6 +305,7 @@
 				DEVELOPMENT_ASSET_PATHS = "\"kram-profile/Preview Content\"";
 				ENABLE_PREVIEWS = YES;
 				GENERATE_INFOPLIST_FILE = YES;
+				INFOPLIST_FILE = "kram-profile/Info.plist";
 				INFOPLIST_KEY_NSHumanReadableCopyright = "";
 				LD_RUNPATH_SEARCH_PATHS = (
 					"$(inherited)",
diff --git a/kram-profile/kram-profile/FileList.swift b/kram-profile/kram-profile/FileList.swift
index a893e037..8b137891 100644
--- a/kram-profile/kram-profile/FileList.swift
+++ b/kram-profile/kram-profile/FileList.swift
@@ -1,88 +1 @@
-//
-//  ContentView.swift
-//  kram-profile
-//
-//  Created by Alec on 2/18/24.
-//
-
-import SwiftUI
-
-/* generic template for lists
-struct EditableList<Element: Identifiable, Content: View>: View {
-    @Binding var data: [Element]
-    var content: (Binding<Element>) -> Content
-
-    init(_ data: Binding<[Element]>,
-         content: @escaping (Binding<Element>) -> Content) {
-        self._data = data
-        self.content = content
-    }
-
-    var body: some View {
-        List {
-            ForEach($data, content: content)
-                .onMove { indexSet, offset in
-                    data.move(fromOffsets: indexSet, toOffset: offset)
-                }
-                .onDelete { indexSet in
-                    data.remove(atOffsets: indexSet)
-                }
-        }
-        // macOS doesn't have EditButton()
-        //.toolbar { EditButton() }
-    }
-}
-*/
- 
-/*
-struct FileList: View {
-    
-    @Binding var items : [File]
-    
-    // https://www.swiftbysundell.com/articles/building-editable-swiftui-lists/
-    
-    // want to be able to obtain the selection from the list
-    // or else need to define NavigationList items
-    // this is same as id type
-    @State public var selection: String?
-    
-    //public typealias SelectionCompletionHandler = (_ selection: String?) -> Void
-    //public let selectionCompletionHandler: SelectionCompletionHandler
-    
-//    public init(items: [File], onSelected completionHandler: @escaping SelectionCompletionHandler) {
-//        self.items = items
-//        self.selection = nil
-//        self.selectionCompletionHandler = completionHandler
-//    }
-    
-    var body: some View {
-        NavigationSplitView {
-            List(items, selection:$selection) { item in
-                Text(item.name)
-            }
-        }
-        detail: {
-            EmployeeDetails(for: employeeIds)
-        }
-    }
-}
-*/
-
-// These apply to ListItems?
-//        .onMove { indexSet, offset in
-//            items.move(fromOffsets: indexSet, toOffset: offset)
-//        }
-//        .onDelete { indexSet in
-//            items.remove(atOffsets: indexSet)
-//        }
-//        .onChange(of: selection, perform: { newValue in
-//            selectionCompletionHandler(newValue)
-//        })
-
-// This is pretty painful to fix.  Why is this so hard
-// Wrapper classes online.
-//#Preview {
-//    @State var files = [File(url: URL(string:"file://test")!)]
-//    FileList(items:$files)
-//}
 
diff --git a/kram-profile/kram-profile/Info.plist b/kram-profile/kram-profile/Info.plist
new file mode 100644
index 00000000..2dc0c062
--- /dev/null
+++ b/kram-profile/kram-profile/Info.plist
@@ -0,0 +1,95 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>CFBundleDocumentTypes</key>
+	<array>
+		<dict>
+			<key>CFBundleTypeName</key>
+			<string>vmatrace</string>
+			<key>CFBundleTypeRole</key>
+			<string>Viewer</string>
+			<key>LSHandlerRank</key>
+			<string>Default</string>
+			<key>LSItemContentTypes</key>
+			<array>
+				<string>vmatrace</string>
+			</array>
+			<key>NSDocumentClass</key>
+			<string>NSDocument</string>
+		</dict>
+		<dict>
+			<key>CFBundleTypeName</key>
+			<string>trace</string>
+			<key>CFBundleTypeRole</key>
+			<string>Viewer</string>
+			<key>LSHandlerRank</key>
+			<string>Default</string>
+			<key>LSItemContentTypes</key>
+			<array>
+				<string>trace</string>
+			</array>
+			<key>NSDocumentClass</key>
+			<string>NSDocument</string>
+		</dict>
+		<dict>
+			<key>CFBundleTypeName</key>
+			<string>json</string>
+			<key>CFBundleTypeRole</key>
+			<string>Editor</string>
+			<key>LSHandlerRank</key>
+			<string>Default</string>
+			<key>LSItemContentTypes</key>
+			<array>
+				<string>public.json</string>
+			</array>
+			<key>NSDocumentClass</key>
+			<string>NSDocument</string>
+		</dict>
+	</array>
+	<key>UTImportedTypeDeclarations</key>
+	<array>
+		<dict>
+			<key>UTTypeDescription</key>
+			<string></string>
+			<key>UTTypeIcons</key>
+			<dict/>
+			<key>UTTypeIdentifier</key>
+			<string></string>
+			<key>UTTypeTagSpecification</key>
+			<dict>
+				<key>public.filename-extension</key>
+				<array/>
+			</dict>
+		</dict>
+		<dict>
+			<key>UTTypeDescription</key>
+			<string></string>
+			<key>UTTypeIcons</key>
+			<dict/>
+			<key>UTTypeIdentifier</key>
+			<string></string>
+			<key>UTTypeTagSpecification</key>
+			<dict>
+				<key>public.filename-extension</key>
+				<array/>
+			</dict>
+		</dict>
+		<dict>
+			<key>UTTypeDescription</key>
+			<string></string>
+			<key>UTTypeIcons</key>
+			<dict/>
+			<key>UTTypeIdentifier</key>
+			<string></string>
+			<key>UTTypeTagSpecification</key>
+			<dict>
+				<key>public.filename-extension</key>
+				<array>
+					<string>\</string>
+				</array>
+			</dict>
+		</dict>
+	</array>
+</dict>
+</plist>
diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 6e735631..85792b88 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -1,12 +1,10 @@
-//
-//  kram_profileApp.swift
-//  kram-profile
-//
-//  Created by Alec on 2/18/24.
-//
+// kram - Copyright 2020-2024 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
 
 import SwiftUI
 import WebKit
+import UniformTypeIdentifiers
 
 // https://github.com/gualtierofrigerio/WkWebViewJavascript/blob/master/WkWebViewJavascript/WebViewHandler.swift
 
@@ -327,6 +325,28 @@ struct kram_profileApp: App {
         return url.lastPathComponent
     }
     
+    /* Flutter uses this to jump to a time range
+    _postMessage({
+          'perfetto': {
+            // Pass the values to Perfetto in seconds.
+            'timeStart': timeRange.start!.inMicroseconds / 1000000,
+            'timeEnd': timeRange.end!.inMicroseconds / 1000000,
+     
+            // The time range should take up 80% of the visible window.
+            'viewPercentage': 0.8,
+          },
+        });
+    */
+    
+    // TODO: have files ending in -vma.trace, .trace, and .json
+    // also archives in the zip file.
+    var fileTypes: [UTType] = [
+        // .plainText, .zip
+        .json, // clang build files
+        UTType(tag:"trace", tagClass: .filenameExtension, conformingTo:.json)!,
+        UTType(tag:"vmatrace", tagClass: .filenameExtension, conformingTo:.json)!
+    ]
+        
     var body: some Scene {
         WindowGroup {
             // Don't really like this behavior, want a panel to come up and not
@@ -336,12 +356,10 @@ struct kram_profileApp: App {
             
             NavigationSplitView {
                 VStack {
-                    // TODO: have files ending in -vma.trace, .trace, and .json
-                    // also archives in the zip file.
                     
-                    // TODO: turn Open button into a menu, FilePicker already tied to Cmd+O
+                    // TODO: turn Open button into a menu, FilePicker button already tied to Cmd+O
                     
-                    FilePicker(types: [/*.plainText*/ .json /*, .zip */], title: "Open") { urls in
+                    FilePicker(types:fileTypes, title: "Open") { urls in
                         // TODO: not allowing multiselect for now
                         // but can pick dir or archive
                         // Do a filtered search under that

From abd96deba88fdf220800e5b601dff7057ab20525 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 21 Feb 2024 15:16:52 -0800
Subject: [PATCH 581/901] kram-profile - fix UTType handling

These are .json, but need to conform to .data for some odd reason
---
 kram-profile/kram-profile/kram_profileApp.swift | 4 ++--
 libkram/kram/KramConfig.h                       | 7 ++++---
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 85792b88..e2a80a34 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -343,8 +343,8 @@ struct kram_profileApp: App {
     var fileTypes: [UTType] = [
         // .plainText, .zip
         .json, // clang build files
-        UTType(tag:"trace", tagClass: .filenameExtension, conformingTo:.json)!,
-        UTType(tag:"vmatrace", tagClass: .filenameExtension, conformingTo:.json)!
+        UTType(filenameExtension:"trace", conformingTo:.data)!,
+        UTType(filenameExtension:"vmatrace", conformingTo:.data)!
     ]
         
     var body: some Scene {
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index a4658b30..a9525bbf 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -152,9 +152,10 @@
 #define COMPILE_EASTL 0
 #endif
 
-#ifndef COMPILE_FASTL
-#define COMPILE_FASTL 0
-#endif
+// eliminate this
+//#ifndef COMPILE_FASTL
+//#define COMPILE_FASTL 0
+//#endif
 
 // basis transcoder only (read not writes)
 #ifndef COMPILE_BASIS

From 152627c8a03451a27f763d207e17e8cfed72edd1 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 21 Feb 2024 15:32:41 -0800
Subject: [PATCH 582/901] kram-profile - open single file opened

---
 kram-profile/kram-profile/kram_profileApp.swift | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index e2a80a34..e50147e6 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -367,13 +367,16 @@ struct kram_profileApp: App {
                         // track if picked again which files
                         // changed.
                         
-                        //self.filenames = urls
                         if urls.count == 1 {
-                            let filesNew = listFilesFromURL(urls[0])
-                            
+                            let url = urls[0]
+                            let filesNew = listFilesFromURL(url)
+                    
                             // for now wipe the old list
                             if filesNew.count > 0 {
                                 files = filesNew
+                                
+                                // if single file opened, then load it immediately
+                                if files[0].url.isFileURL { selection = files[0].id }
                             }
                         }
                         

From e7fea3fa5cb902c15bafb7f1ded645077eb272fb Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 21 Feb 2024 16:28:04 -0800
Subject: [PATCH 583/901] kram - add array scope to json11

---
 libkram/json11/json11.h | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/libkram/json11/json11.h b/libkram/json11/json11.h
index 783519de..b833c557 100644
--- a/libkram/json11/json11.h
+++ b/libkram/json11/json11.h
@@ -139,6 +139,36 @@ class JsonWriter final {
     string _escapedString;
 };
 
+class JsonArrayScope {
+public:
+    JsonArrayScope(JsonWriter& json_, const char* key = "") : json( &json_ ) {
+        json->pushArray(key);
+    }
+    ~JsonArrayScope() {
+        close();
+    }
+    void close() {
+        if (json) { json->popArray(); json = nullptr; }
+    }
+private:
+    JsonWriter* json = nullptr;
+};
+
+class JsonObjectScope {
+public:
+    JsonObjectScope(JsonWriter& json_, const char* key = "") : json( &json_ ) {
+        json->pushObject(key);
+    }
+    ~JsonObjectScope() {
+        close();
+    }
+    void close() {
+        if (json) { json->popObject(); json = nullptr; }
+    }
+private:
+    JsonWriter* json = nullptr;
+};
+
 
 //--------------------------
 

From f98dd18a0006b780c843dce7a6fcca6fb3954a91 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 21 Feb 2024 19:31:42 -0800
Subject: [PATCH 584/901] kram - update README

---
 README.md              | 15 ++++++----
 kram-profile/README.md | 64 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 74 insertions(+), 5 deletions(-)
 create mode 100644 kram-profile/README.md

diff --git a/README.md b/README.md
index 78633b1f..7af9f966 100644
--- a/README.md
+++ b/README.md
@@ -1,14 +1,19 @@
+The suite of kram tools below.  I hope these improve your game, app, and art development.
 
-# kram, kram.exe
-C++11 main to libkram to create CLI tool.  Encode/decode/info on PNG/KTX/KTX2/DDS files with LDR/HDR and BC/ASTC/ETC2.  Runs on macOS/win.
-
-# libkram.a, libkram-ios.a, kram.lib
-C++11 library from 200 to 800KB in size depending on encoder options.  Compiles for iOS (ARM), macOS (ARM/Intel), win (Intel).
+# kram-profile
+Display profile traces (f.e. Perfetto) quickly in an application for optimizing memory, builds, and cpu/gpu timings
+https://github.com/alecazam/kram/tree/main/kram-profile
 
 # hslparser
 Parses HLSL syntax and generates readable HLSL/MSL code without transpiling.  DXC is then used to compile to spirv.
 https://github.com/alecazam/kram/tree/main/hlslparser
 
+# libkram.a, libkram-ios.a, kram.lib
+C++11 library from 200 to 800KB in size depending on encoder options.  Compiles for iOS (ARM), macOS (ARM/Intel), win (Intel).
+
+# kram, kram.exe
+C++11 main to libkram to create CLI tool.  Encode/decode/info on PNG/KTX/KTX2/DDS files with LDR/HDR and BC/ASTC/ETC2.  Runs on macOS/win.
+
 # kram-thumb-win.dll
 Windows thumbnailer for DDS/KTX/KTX2.  Go to build or bin folder.  Install with "regsvr32.exe kram-thumb-win.dll".  Uninstall with "regsvr32.exe /u kram-thumb-win.dll"
 
diff --git a/kram-profile/README.md b/kram-profile/README.md
new file mode 100644
index 00000000..4824cd2d
--- /dev/null
+++ b/kram-profile/README.md
@@ -0,0 +1,64 @@
+kram-profile
+==========
+
+This profiler current wraps SwiftUI atop a WKWebView running the Perfetto TraceViewer.  Directories are searched, and files are open.  Supported files are added to a file list, and then can quickly view these in Perfetto.  The app is multidocument.
+
+* .vmatrace - memory report generated by Kram scripts folder.
+* .trace - performance timings in the form catapult trace json files
+* .json - clang timing output generated using -ftime-trace
+
+There are pre-built version of kram-profile for macOS 13.0 and higher.
+
+References. See for more details:
+
+* https://ui.perfetto.dev
+* https://perfetto.dev/docs/visualization/deep-linking-to-perfetto-ui
+* https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview#heading=h.yr4qxyxotyw
+
+TODO:
+* Fix document support, so can double click and have app open files
+* Support binary Perfetto traces
+* Fixup "Source" tags in clang json to use filename (no extension) from detail field
+* Tie in with the excellent ClangBuildAnalyzer tool
+* Scale specific traces to a single duration.  That way the next file comes in at that scale. 
+* Preserve timeline duration across traces 
+
+Perfetto
+---------
+
+This is a web-based profiling and flame-graph tool.  It's fast on desktop, and continues to evolve.  Only has second and timecode granularity which isn't enough.  For example, performance profiling for games is in milliseconds.  The team is mostly focused on Chrome profiling which apparently is in seconds.  But the visuals are nice, and it now has hover tips with size/name, and also has an Issues list that the devs are responsive to.  Flutter is using this profiler, and kram-profile does too.
+
+Perfetto lives inside a sandbox due to the browser, so feeding files to Perfetto is it's weekness.  As a result kram-profile's file list is a nice complement, and can send the file data across via Javascript.  This is not unlike an Electron wrapper, but in much less memory.  
+
+One limitation is that traces must be nested.  So timestamps cannot overlap.   Make sure to honor this, or traces will overlap verticall and become confused.  There is a C++ SDK to help with writing out traces, and that is a much more compact format than the json.  But more languages can write to the json format.  The Perfetto team is doing no further work on the json format.  And fields like "color" are unsupported, and Perfetto uses it's own coloration for blocks instead.  This coloration is nice and consistent and tied to name.
+
+Memory profiling
+---------
+
+VMA can dump a json file, and that can be converted using scripts/GpuMemDumpPerfetto.py.  Then open this in kram-profile to see current memory fragmentation and layout across the various Vulkan heaps.  VMA can generate a png, but it's static.  Perfetto can allow one to zoom in and see the actual names of blocks and size.
+
+Set the Pefetto timestamp to seconds, and then 1s = 1MB.  This allows reading the timings as megabytes.  A good timescale is 64s (64MB).
+
+Performance profiling
+---------
+
+Have app write out time and duration events using the Catapult json format.  Then open these in kram-profile to optimize an application.   A good timescale is 0.1s for games.   Can then see where app performance is lost across threads and job systems.  It is harder to measure async wait gaps, since these are not nested properly.  Also good to instrument sleeps.  Not sure now to scope fibers, since these get swapped out.  There are events which aren't duration based, so use those.
+
+Build profiling
+---------
+
+Clang supports -ftime-trace across all platforms.  Set that to dump the Perfetto trace files into the build directories alongside the .o files.  Then use kram-profile to open these folders.  Also see scripts/cba.sh for to run ClangBuildAnalyzer on these folders to identify where build timings are slow.  Then address with optimizing includes and using pch where possible.   A good timescale is 1s.  Files that take longer than this to build should be targeted.
+
+Simd libraries, and especially files like STL with heavy template generation will often be at the top of the list.  PCH will reduce parsing time for templates, but not the instantiation.
+
+Ideally run the traces, run CBA, reduce headers and identify pch candidates.  Then repeat, until overall timings go down.  Remember that PCH is per link, so one per DLL or app.  It also break isolation of headers in files, so may want a CI build not using it to catch unspecified headers.
+
+
+
+
+
+
+
+
+
+

From e2ceec1d71763bde6ed033c1d12e6297f771ec2c Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 21 Feb 2024 20:11:00 -0800
Subject: [PATCH 585/901] kram-profile  - add about, timeRange not working, add
 to ci-build

---
 .../kram-profile/kram_profileApp.swift        | 153 +++++++++++++++---
 scripts/cibuild.sh                            |   5 +
 2 files changed, 132 insertions(+), 26 deletions(-)

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index e50147e6..aa6ebb38 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -52,8 +52,16 @@ struct MyWKWebView : NSViewRepresentable {
         // really need to ping until ui is loaded
         //sleep(2)
         
-        if selection != nil {
-            loadFile(webView, selection!)
+        if let sel = selection {
+            loadFile(webView, sel)
+        
+            // now based on the type, set a reasonable range of time
+            // don't really want start/end, since we don't know start
+            // works for Flutter, but not for this app.  Also would
+            // have to parse timeStart/End from file.  May want that for
+            // sorting anyways.
+            //
+            // showTimeRange(webView, filenameToTimeRange(sel))
         }
     }
 }
@@ -105,6 +113,85 @@ extension String {
     }
 }
 
+    
+// What if the start time in the file isn't 0.0 based for the start
+struct TimeRange {
+    var timeStart: Double = 0.0
+    var timeEnd: Double   = 1.0
+        
+    // The time range should take up 80% of the visible window.
+    var viewPercentage: Double = 0.8
+}
+
+func filenameToTimeRange(_ filename: String) -> TimeRange {
+    let url = URL(string: filename)!
+    
+    var duration = 1.0
+    
+    if url.pathExtension == "json" { // build
+        duration = 1.0
+    }
+    else if url.pathExtension == "vmatrace" { // memory
+        duration = 64.0
+    }
+    else if url.pathExtension == "trace" { // profile
+        duration = 0.1
+    }
+    
+    return TimeRange(timeStart:0.0, timeEnd:duration)
+}
+
+// Flutter uses this to jump to a time range
+func showTimeRange(_ webView: WKWebView, _ timeRange: TimeRange) /*async*/ {
+
+    do {
+        struct PerfettoTimeRange: Codable {
+            // Pass the values to Perfetto in seconds.
+            var timeStart: Double // in seconds
+            var timeEnd: Double
+                
+            // The time range should take up 80% of the visible window.
+            var viewPercentage: Double
+        }
+        
+        struct Perfetto: Codable {
+            var perfetto: PerfettoTimeRange
+        }
+        
+        let perfetto = Perfetto(perfetto:PerfettoTimeRange(timeStart: timeRange.timeStart, timeEnd: timeRange.timeEnd, viewPercentage:timeRange.viewPercentage))
+        
+        var perfettoEncode = ""
+        
+        if true {
+            let encoder = JSONEncoder()
+            let data = try encoder.encode(perfetto)
+            let encodedString = String(decoding: data, as: UTF8.self)
+            // TODO: this is droppping {} ?
+            perfettoEncode = String(encodedString.dropLast().dropFirst())
+            perfettoEncode = perfettoEncode.replacingOccurrences(of: "\u{2028}", with: "\\u2028")
+                .replacingOccurrences(of: "\u{2029}", with: "\\u2029")
+        }
+        
+        let json = """
+            // convert from string -> Uint8Array -> ArrayBuffer
+            var obj = JSON.parse('{\(perfettoEncode)}');
+            window.postMessage(obj,'\(ORIGIN)');
+        """
+        
+        webView.evaluateJavaScript(json) { (result, error) in
+            if error != nil {
+                print("\(error)! \(result)")
+            }
+            else {
+                print("\(result)")
+            }
+        }
+    }
+    catch {
+        
+    }
+}
+
 func loadFile(_ webView: WKWebView, _ path: String) /*async*/ {
     
     let fileURL = URL(string: path)!
@@ -113,20 +200,21 @@ func loadFile(_ webView: WKWebView, _ path: String) /*async*/ {
     
     
     // https://stackoverflow.com/questions/62035494/how-to-call-postmessage-in-wkwebview-to-js
-    struct PerfettoFile : Codable {
+    struct PerfettoFile: Codable {
         var buffer: String // really ArrayBuffer, but will get replaced
         var title: String
         
         // About keepApiOpen
         // https://github.com/flutter/devtools/blob/master/packages/devtools_app/lib/src/screens/performance/panes/timeline_events/perfetto/_perfetto_web.dart#L174
         var keepApiOpen: Bool
+        
         // optional fields
         //var fileName: String?
-        // url cannot be file://, has to be http served
+        // url cannot be file://, has to be http served.  Can we set fileName?
         //var url: String?
     }
     
-    struct Perfetto : Codable {
+    struct Perfetto: Codable {
         var perfetto: PerfettoFile
     }
     
@@ -135,16 +223,12 @@ func loadFile(_ webView: WKWebView, _ path: String) /*async*/ {
         
         // TODO: here need to fixup clang json, this neesd SOURCE replaced with
         // the short filename version of the detail (full filename)
-        
-        
+    
         let fileContentBase64 = fileContent.base64EncodedString()
-        
-        
-        let file = PerfettoFile(buffer: "",
-                                title: fileURL.lastPathComponent,
-                                keepApiOpen: true)
-        let perfetto = Perfetto(perfetto: file)
-        
+    
+        let perfetto = Perfetto(perfetto: PerfettoFile(buffer: "",
+                                                       title: fileURL.lastPathComponent,
+                                                       keepApiOpen: true))
         var perfettoEncode = ""
         
         if true {
@@ -325,18 +409,7 @@ struct kram_profileApp: App {
         return url.lastPathComponent
     }
     
-    /* Flutter uses this to jump to a time range
-    _postMessage({
-          'perfetto': {
-            // Pass the values to Perfetto in seconds.
-            'timeStart': timeRange.start!.inMicroseconds / 1000000,
-            'timeEnd': timeRange.end!.inMicroseconds / 1000000,
-     
-            // The time range should take up 80% of the visible window.
-            'viewPercentage': 0.8,
-          },
-        });
-    */
+    
     
     // TODO: have files ending in -vma.trace, .trace, and .json
     // also archives in the zip file.
@@ -417,6 +490,34 @@ struct kram_profileApp: App {
             }
         }
         */
+        // https://nilcoalescing.com/blog/CustomiseAboutPanelOnMacOSInSwiftUI/
+        .commands {
+                    CommandGroup(replacing: .appInfo) {
+                        Button("About kram-profile") {
+                            NSApplication.shared.orderFrontStandardAboutPanel(
+                                options: [
+                                    NSApplication.AboutPanelOptionKey.credits: NSAttributedString(
+                                        string: 
+"""
+A tool to help profile mem, perf, and builds.
+© 2020-2024 Alec Miller
+""",
+                                        
+                                        attributes: [
+                                            NSAttributedString.Key.font: NSFont.boldSystemFont(
+                                                ofSize: NSFont.smallSystemFontSize)
+                                        ]
+                                    ),
+                                    NSApplication.AboutPanelOptionKey(
+                                        rawValue: "kram-profile"
+                                    ): "© 2020-2024 Alec Miller"
+                                ]
+                            )
+                        }
+                    }
+                }
+    
+        
     }
     
     // https://stackoverflow.com/questions/49882933/pass-jsonobject-from-swift4-to-wkwebview-javascript-function
diff --git a/scripts/cibuild.sh b/scripts/cibuild.sh
index fbb61897..59d18ba3 100755
--- a/scripts/cibuild.sh
+++ b/scripts/cibuild.sh
@@ -81,6 +81,11 @@ if [[ $buildType == macos ]]; then
 	xcodebuild install -sdk macosx -project hlslparser.xcodeproj -configuration Release -destination generic/platform=macOS DSTROOT=${binHolderPath} INSTALL_PATH=bin
 	popd
 
+    # build kram-profile to bin directory
+    pushd kram-profile
+    xcodebuild install -sdk macosx -project kram-profile.xcodeproj -configuration Release -destination generic/platform=macOS DSTROOT=${binHolderPath} INSTALL_PATH=bin
+    popd
+    
 elif [[ $buildType == windows ]]; then
 	mkdir -p build
 

From 0dc07fff277c0223c6761068acbf5b4ce0b03949 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 22 Feb 2024 10:02:18 -0800
Subject: [PATCH 586/901] kram-profile - update README

---
 kram-profile/README.md | 96 +++++++++++++++++++++++++++++++++++++++---
 1 file changed, 89 insertions(+), 7 deletions(-)

diff --git a/kram-profile/README.md b/kram-profile/README.md
index 4824cd2d..b2874414 100644
--- a/kram-profile/README.md
+++ b/kram-profile/README.md
@@ -3,17 +3,19 @@ kram-profile
 
 This profiler current wraps SwiftUI atop a WKWebView running the Perfetto TraceViewer.  Directories are searched, and files are open.  Supported files are added to a file list, and then can quickly view these in Perfetto.  The app is multidocument.
 
+Flamegraphs are key to all profiling.  Why look at giant table of numbers when you can see them visually.  This also needs to be a dyanmic graph that you can zoom in and hover over.  Fortunately there are several tools now supporting flamegraphs.
+
+This is also a discussion of profilers and techniques for profiling.
+
+Supported files
+
 * .vmatrace - memory report generated by Kram scripts folder.
 * .trace - performance timings in the form catapult trace json files
 * .json - clang timing output generated using -ftime-trace
 
 There are pre-built version of kram-profile for macOS 13.0 and higher.
 
-References. See for more details:
-
-* https://ui.perfetto.dev
-* https://perfetto.dev/docs/visualization/deep-linking-to-perfetto-ui
-* https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview#heading=h.yr4qxyxotyw
+----------------
 
 TODO:
 * Fix document support, so can double click and have app open files
@@ -23,8 +25,41 @@ TODO:
 * Scale specific traces to a single duration.  That way the next file comes in at that scale. 
 * Preserve timeline duration across traces 
 
+----------------
+
+#Profilers
+
+
+Cpu Profilers. See for more details
+
+* Catapult
+* Perfetto
+* Pefetto Deep Link - https://perfetto.dev/docs/visualization/deep-linking-to-perfetto-ui
+* Flutter (using perfetto) https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview#heading=h.yr4qxyxotyw
+* Optick - https://github.com/bombomby/optick
+* Tracy - https://github.com/wolfpld/tracy
+* Xcode Instruments
+* AMD Code Analyst
+* Intel Vtune
+* ClangBuildAnalyzer - https://github.com/aras-p/ClangBuildAnalyzer
+
+Gpu Profilers. See for more details
+
+* Xcode Gpu Capture
+* Android Gpu Inspector - https://developer.android.com/agi
+* Nvidia NSight
+* Mali Shader Compiler
+* Pix Profiler
+* 
+
+Catapult
+---------
+
+This was the tracing system that Perfetto replaced.  Originally designed for Chrome profiling.  Flamegraph and track-based.  It also had a nice json API for recording thread names and profile scopes.
+
 Perfetto
 ---------
+* https://ui.perfetto.dev
 
 This is a web-based profiling and flame-graph tool.  It's fast on desktop, and continues to evolve.  Only has second and timecode granularity which isn't enough.  For example, performance profiling for games is in milliseconds.  The team is mostly focused on Chrome profiling which apparently is in seconds.  But the visuals are nice, and it now has hover tips with size/name, and also has an Issues list that the devs are responsive to.  Flutter is using this profiler, and kram-profile does too.
 
@@ -32,6 +67,25 @@ Perfetto lives inside a sandbox due to the browser, so feeding files to Perfetto
 
 One limitation is that traces must be nested.  So timestamps cannot overlap.   Make sure to honor this, or traces will overlap verticall and become confused.  There is a C++ SDK to help with writing out traces, and that is a much more compact format than the json.  But more languages can write to the json format.  The Perfetto team is doing no further work on the json format.  And fields like "color" are unsupported, and Perfetto uses it's own coloration for blocks instead.  This coloration is nice and consistent and tied to name.
 
+Orbit
+---------
+* https://orbitprofiler.com/
+
+This profiler uses dynamic instrumentation of code via dtrace and trampolines.  Note that Win, macOS can use this sort of system.  Apple blocks access to dtrace on iOS, but there are mentions of ktrace.  So you inject/remove traces dynamically by patching the dll sources directly.  This used to run on macOS, Win, and Linux.  Google Stadio adopted this project, and not it is limited to Linux support.
+
+This avoids the need to blindly instrument code or inject scopes into high-frequency routines.  But this patching may not be compatible by the security theater adopted by some mobile devices.
+
+ClangBuildAnalyzer
+--------
+* https://github.com/aras-p/ClangBuildAnalyzer
+
+A nice build profile aggregator.  Runs through the json timings that Clang generates, and details which headers and templates and optimization are slowing down builds.  Then go back and review the json files to validate the results.  Uses hierarchical and not self time, so the timings do overlap.  And timings across threads total up to more timing than the overal build takes. 
+
+Has an incremental system to snapshot and compare modestamps, and only do work on newer files.  This is some great open-source.  Aras optimized Unity builds with this, and that's a huge codebase.  I've used this to optimize kram.
+
+
+# Use Cases
+
 Memory profiling
 ---------
 
@@ -47,12 +101,40 @@ Have app write out time and duration events using the Catapult json format.  The
 Build profiling
 ---------
 
-Clang supports -ftime-trace across all platforms.  Set that to dump the Perfetto trace files into the build directories alongside the .o files.  Then use kram-profile to open these folders.  Also see scripts/cba.sh for to run ClangBuildAnalyzer on these folders to identify where build timings are slow.  Then address with optimizing includes and using pch where possible.   A good timescale is 1s.  Files that take longer than this to build should be targeted.
+Clang supports -ftime-trace across all platforms.  Set that to dump the Perfetto trace files into the build directories alongside the .o files.  Then use kram-profile to open these folders.  Also see scripts/cba.sh for to run ClangBuildAnalyzer on these folders to identify where build timings are slow.  Then address with optimizing includes and using pch where possible.   A good timescale is 1s.  Files that take longer than this to build should be targeted. 
 
-Simd libraries, and especially files like STL with heavy template generation will often be at the top of the list.  PCH will reduce parsing time for templates, but not the instantiation.
+Simd libraries, and especially files like STL with heavy template generation will often be at the top of the list.  PCH will reduce parsing time for templates, but not the instantiation.  See the Optimization section for more details.
 
 Ideally run the traces, run CBA, reduce headers and identify pch candidates.  Then repeat, until overall timings go down.  Remember that PCH is per link, so one per DLL or app.  It also break isolation of headers in files, so may want a CI build not using it to catch unspecified headers.
 
+# Optimization
+
+Unity builds
+-----------
+
+Not to be confused with the Unity game engine.  But unity builds combine several .cpp files into a single .cpp.  This works around problems with slow linkers, and multile template and inline code instantations.  But code and macros from one .cpp spill into the next.  To facilitate this, be careful about undeffing at the bottoms of files.  kram also uses a common namespaces across headers and source files.  This allows "using namespace" in both, and keeps the sources compartmentalized.
+
+Precompiled headers (PCH)
+-----------
+
+These are a precursor to C++ modules.  pch are universally support across compilers, where we may never see C++ modules.  You get one pch per library.  So if your app is a DLL and a exe, then each could have their own pch.  Need one pch per platform and config.  Force include this since it must be the first file in each, or explicitly include a file if you want to be explicit about which files get the pch.
+
+pch spread headers into files.  So the build can break if some don't use it, or configs skip it.  Occasionally fixup missing headers by disabling it. Templates are parsed by only specializations are instatiated.  So may be worth defining specializations in the pch. STL is always a top offender with vector/unordered_map, function, and others at the top.
+
+SIMD
+-----------
+
+Vector instructions are universal now via SIMD.  For 16B SIMD, ARM has Neon and x64 has SSE4.2.  AVX/2 introduce 32B, and AVX-512 is 64B registers, but Intel has stripped that from newer consumer chips, and is introducing AVX10.  So AVX2 is as safe as it gets.  Note that Apple's Rosetta 2 emulator only supports SSE 4.2 at the time of this writing.  x64 SSE is always 16B size and 16B aligned, where Neon has an 8B float32x2 and 16B float32x4.  The default allodator for macOS is 16B aligned.  x64 is 16B aligned, but x86 was 8B alignd.  
+
+Apple has a very nice SIMD (simd/simd.h) library.  This uses the gcc vector extensions so swizzles and math operators are built into the compiler.  This makes the code look more HLSL like which is a good thing.  This ships with all calls inline, but optimized 2/3/4 way trancendental calls are buried in the Accelerate library, and the implementation just calls the c stdlib functions multiple times as a fallback.  It has a nice abstraction for int, uint, float, double simd math.  One defines the maximum SIMD level supported by the app, and the library then uses the largest register size that it can for that platform.  The higher size registers work with 16B alignment, so that is what Apple uses.  
+
+Optimized debug builds
+-----------
+
+One nice aspect of C++ is that specific files can be optimized.  But to do so, calls become functions instead of inlines.  Setting this up on a SIMD library takes a bit of work, but then callers are running optimized SIMD math even in debug.
+
+Also Microsoft has various debug build flags that can optimize and optimize force_inline calls.  Need to find out the details for clang.
+
 
 
From 251f5e0c3f79ef7977633a4ea04b1254531880c1 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 22 Feb 2024 13:47:22 -0800
Subject: [PATCH 587/901] kram-profile - fixup clang json with detail names

Files are lastPath, the rest are full detail
Added AnyCodable, since code needs generic fallback for args it doesn't handle.  Still more optional fields to add for catapult.
---
 .../kram-profile.xcodeproj/project.pbxproj    |  12 +
 kram-profile/kram-profile/AnyCodable.swift    | 147 +++++++++
 kram-profile/kram-profile/AnyDecodable.swift  | 188 +++++++++++
 kram-profile/kram-profile/AnyEncodable.swift  | 291 ++++++++++++++++++
 .../kram-profile/kram_profileApp.swift        | 120 ++++++--
 5 files changed, 736 insertions(+), 22 deletions(-)
 create mode 100644 kram-profile/kram-profile/AnyCodable.swift
 create mode 100644 kram-profile/kram-profile/AnyDecodable.swift
 create mode 100644 kram-profile/kram-profile/AnyEncodable.swift

diff --git a/kram-profile/kram-profile.xcodeproj/project.pbxproj b/kram-profile/kram-profile.xcodeproj/project.pbxproj
index 06d74ef3..3c7e3ed6 100644
--- a/kram-profile/kram-profile.xcodeproj/project.pbxproj
+++ b/kram-profile/kram-profile.xcodeproj/project.pbxproj
@@ -11,6 +11,9 @@
 		705F68D12B820AD200437FAA /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 705F68D02B820AD200437FAA /* Assets.xcassets */; };
 		705F68D42B820AD200437FAA /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 705F68D32B820AD200437FAA /* Preview Assets.xcassets */; };
 		705F68DC2B83E80400437FAA /* FilePicker.swift in Sources */ = {isa = PBXBuildFile; fileRef = 705F68DB2B83E80400437FAA /* FilePicker.swift */; };
+		705F68E12B87EB8000437FAA /* AnyDecodable.swift in Sources */ = {isa = PBXBuildFile; fileRef = 705F68DE2B87EB8000437FAA /* AnyDecodable.swift */; };
+		705F68E22B87EB8000437FAA /* AnyCodable.swift in Sources */ = {isa = PBXBuildFile; fileRef = 705F68DF2B87EB8000437FAA /* AnyCodable.swift */; };
+		705F68E32B87EB8000437FAA /* AnyEncodable.swift in Sources */ = {isa = PBXBuildFile; fileRef = 705F68E02B87EB8000437FAA /* AnyEncodable.swift */; };
 /* End PBXBuildFile section */
 
 /* Begin PBXFileReference section */
@@ -21,6 +24,9 @@
 		705F68D52B820AD200437FAA /* kram_profile.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = kram_profile.entitlements; sourceTree = "<group>"; };
 		705F68DB2B83E80400437FAA /* FilePicker.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FilePicker.swift; sourceTree = "<group>"; };
 		705F68DD2B86AB2000437FAA /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist; path = Info.plist; sourceTree = "<group>"; };
+		705F68DE2B87EB8000437FAA /* AnyDecodable.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = AnyDecodable.swift; sourceTree = "<group>"; };
+		705F68DF2B87EB8000437FAA /* AnyCodable.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = AnyCodable.swift; sourceTree = "<group>"; };
+		705F68E02B87EB8000437FAA /* AnyEncodable.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = AnyEncodable.swift; sourceTree = "<group>"; };
 /* End PBXFileReference section */
 
 /* Begin PBXFrameworksBuildPhase section */
@@ -57,6 +63,9 @@
 				705F68CC2B820AD100437FAA /* kram_profileApp.swift */,
 				705F68DB2B83E80400437FAA /* FilePicker.swift */,
 				705F68D02B820AD200437FAA /* Assets.xcassets */,
+				705F68DF2B87EB8000437FAA /* AnyCodable.swift */,
+				705F68DE2B87EB8000437FAA /* AnyDecodable.swift */,
+				705F68E02B87EB8000437FAA /* AnyEncodable.swift */,
 				705F68D52B820AD200437FAA /* kram_profile.entitlements */,
 				705F68D22B820AD200437FAA /* Preview Content */,
 			);
@@ -141,6 +150,9 @@
 			isa = PBXSourcesBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
+				705F68E12B87EB8000437FAA /* AnyDecodable.swift in Sources */,
+				705F68E32B87EB8000437FAA /* AnyEncodable.swift in Sources */,
+				705F68E22B87EB8000437FAA /* AnyCodable.swift in Sources */,
 				705F68DC2B83E80400437FAA /* FilePicker.swift in Sources */,
 				705F68CD2B820AD100437FAA /* kram_profileApp.swift in Sources */,
 			);
diff --git a/kram-profile/kram-profile/AnyCodable.swift b/kram-profile/kram-profile/AnyCodable.swift
new file mode 100644
index 00000000..5f9924d9
--- /dev/null
+++ b/kram-profile/kram-profile/AnyCodable.swift
@@ -0,0 +1,147 @@
+import Foundation
+/**
+ A type-erased `Codable` value.
+
+ The `AnyCodable` type forwards encoding and decoding responsibilities
+ to an underlying value, hiding its specific underlying type.
+
+ You can encode or decode mixed-type values in dictionaries
+ and other collections that require `Encodable` or `Decodable` conformance
+ by declaring their contained type to be `AnyCodable`.
+
+ - SeeAlso: `AnyEncodable`
+ - SeeAlso: `AnyDecodable`
+ */
+@frozen public struct AnyCodable: Codable {
+    public let value: Any
+
+    public init<T>(_ value: T?) {
+        self.value = value ?? ()
+    }
+}
+
+extension AnyCodable: _AnyEncodable, _AnyDecodable {}
+
+extension AnyCodable: Equatable {
+    public static func == (lhs: AnyCodable, rhs: AnyCodable) -> Bool {
+        switch (lhs.value, rhs.value) {
+        case is (Void, Void):
+            return true
+        case let (lhs as Bool, rhs as Bool):
+            return lhs == rhs
+        case let (lhs as Int, rhs as Int):
+            return lhs == rhs
+        case let (lhs as Int8, rhs as Int8):
+            return lhs == rhs
+        case let (lhs as Int16, rhs as Int16):
+            return lhs == rhs
+        case let (lhs as Int32, rhs as Int32):
+            return lhs == rhs
+        case let (lhs as Int64, rhs as Int64):
+            return lhs == rhs
+        case let (lhs as UInt, rhs as UInt):
+            return lhs == rhs
+        case let (lhs as UInt8, rhs as UInt8):
+            return lhs == rhs
+        case let (lhs as UInt16, rhs as UInt16):
+            return lhs == rhs
+        case let (lhs as UInt32, rhs as UInt32):
+            return lhs == rhs
+        case let (lhs as UInt64, rhs as UInt64):
+            return lhs == rhs
+        case let (lhs as Float, rhs as Float):
+            return lhs == rhs
+        case let (lhs as Double, rhs as Double):
+            return lhs == rhs
+        case let (lhs as String, rhs as String):
+            return lhs == rhs
+        case let (lhs as [String: AnyCodable], rhs as [String: AnyCodable]):
+            return lhs == rhs
+        case let (lhs as [AnyCodable], rhs as [AnyCodable]):
+            return lhs == rhs
+        case let (lhs as [String: Any], rhs as [String: Any]):
+            return NSDictionary(dictionary: lhs) == NSDictionary(dictionary: rhs)
+        case let (lhs as [Any], rhs as [Any]):
+            return NSArray(array: lhs) == NSArray(array: rhs)
+        case is (NSNull, NSNull):
+            return true
+        default:
+            return false
+        }
+    }
+}
+
+extension AnyCodable: CustomStringConvertible {
+    public var description: String {
+        switch value {
+        case is Void:
+            return String(describing: nil as Any?)
+        case let value as CustomStringConvertible:
+            return value.description
+        default:
+            return String(describing: value)
+        }
+    }
+}
+
+extension AnyCodable: CustomDebugStringConvertible {
+    public var debugDescription: String {
+        switch value {
+        case let value as CustomDebugStringConvertible:
+            return "AnyCodable(\(value.debugDescription))"
+        default:
+            return "AnyCodable(\(description))"
+        }
+    }
+}
+
+extension AnyCodable: ExpressibleByNilLiteral {}
+extension AnyCodable: ExpressibleByBooleanLiteral {}
+extension AnyCodable: ExpressibleByIntegerLiteral {}
+extension AnyCodable: ExpressibleByFloatLiteral {}
+extension AnyCodable: ExpressibleByStringLiteral {}
+extension AnyCodable: ExpressibleByStringInterpolation {}
+extension AnyCodable: ExpressibleByArrayLiteral {}
+extension AnyCodable: ExpressibleByDictionaryLiteral {}
+
+
+extension AnyCodable: Hashable {
+    public func hash(into hasher: inout Hasher) {
+        switch value {
+        case let value as Bool:
+            hasher.combine(value)
+        case let value as Int:
+            hasher.combine(value)
+        case let value as Int8:
+            hasher.combine(value)
+        case let value as Int16:
+            hasher.combine(value)
+        case let value as Int32:
+            hasher.combine(value)
+        case let value as Int64:
+            hasher.combine(value)
+        case let value as UInt:
+            hasher.combine(value)
+        case let value as UInt8:
+            hasher.combine(value)
+        case let value as UInt16:
+            hasher.combine(value)
+        case let value as UInt32:
+            hasher.combine(value)
+        case let value as UInt64:
+            hasher.combine(value)
+        case let value as Float:
+            hasher.combine(value)
+        case let value as Double:
+            hasher.combine(value)
+        case let value as String:
+            hasher.combine(value)
+        case let value as [String: AnyCodable]:
+            hasher.combine(value)
+        case let value as [AnyCodable]:
+            hasher.combine(value)
+        default:
+            break
+        }
+    }
+}
diff --git a/kram-profile/kram-profile/AnyDecodable.swift b/kram-profile/kram-profile/AnyDecodable.swift
new file mode 100644
index 00000000..9b422280
--- /dev/null
+++ b/kram-profile/kram-profile/AnyDecodable.swift
@@ -0,0 +1,188 @@
+#if canImport(Foundation)
+import Foundation
+#endif
+
+/**
+ A type-erased `Decodable` value.
+
+ The `AnyDecodable` type forwards decoding responsibilities
+ to an underlying value, hiding its specific underlying type.
+
+ You can decode mixed-type values in dictionaries
+ and other collections that require `Decodable` conformance
+ by declaring their contained type to be `AnyDecodable`:
+
+     let json = """
+     {
+         "boolean": true,
+         "integer": 42,
+         "double": 3.141592653589793,
+         "string": "string",
+         "array": [1, 2, 3],
+         "nested": {
+             "a": "alpha",
+             "b": "bravo",
+             "c": "charlie"
+         },
+         "null": null
+     }
+     """.data(using: .utf8)!
+
+     let decoder = JSONDecoder()
+     let dictionary = try! decoder.decode([String: AnyDecodable].self, from: json)
+ */
+@frozen public struct AnyDecodable: Decodable {
+    public let value: Any
+
+    public init<T>(_ value: T?) {
+        self.value = value ?? ()
+    }
+}
+
+@usableFromInline
+protocol _AnyDecodable {
+    var value: Any { get }
+    init<T>(_ value: T?)
+}
+
+extension AnyDecodable: _AnyDecodable {}
+
+extension _AnyDecodable {
+    public init(from decoder: Decoder) throws {
+        let container = try decoder.singleValueContainer()
+
+        if container.decodeNil() {
+            #if canImport(Foundation)
+                self.init(NSNull())
+            #else
+                self.init(Optional<Self>.none)
+            #endif
+        } else if let bool = try? container.decode(Bool.self) {
+            self.init(bool)
+        } else if let int = try? container.decode(Int.self) {
+            self.init(int)
+        } else if let uint = try? container.decode(UInt.self) {
+            self.init(uint)
+        } else if let double = try? container.decode(Double.self) {
+            self.init(double)
+        } else if let string = try? container.decode(String.self) {
+            self.init(string)
+        } else if let array = try? container.decode([AnyDecodable].self) {
+            self.init(array.map { $0.value })
+        } else if let dictionary = try? container.decode([String: AnyDecodable].self) {
+            self.init(dictionary.mapValues { $0.value })
+        } else {
+            throw DecodingError.dataCorruptedError(in: container, debugDescription: "AnyDecodable value cannot be decoded")
+        }
+    }
+}
+
+extension AnyDecodable: Equatable {
+    public static func == (lhs: AnyDecodable, rhs: AnyDecodable) -> Bool {
+        switch (lhs.value, rhs.value) {
+#if canImport(Foundation)
+        case is (NSNull, NSNull), is (Void, Void):
+            return true
+#endif
+        case let (lhs as Bool, rhs as Bool):
+            return lhs == rhs
+        case let (lhs as Int, rhs as Int):
+            return lhs == rhs
+        case let (lhs as Int8, rhs as Int8):
+            return lhs == rhs
+        case let (lhs as Int16, rhs as Int16):
+            return lhs == rhs
+        case let (lhs as Int32, rhs as Int32):
+            return lhs == rhs
+        case let (lhs as Int64, rhs as Int64):
+            return lhs == rhs
+        case let (lhs as UInt, rhs as UInt):
+            return lhs == rhs
+        case let (lhs as UInt8, rhs as UInt8):
+            return lhs == rhs
+        case let (lhs as UInt16, rhs as UInt16):
+            return lhs == rhs
+        case let (lhs as UInt32, rhs as UInt32):
+            return lhs == rhs
+        case let (lhs as UInt64, rhs as UInt64):
+            return lhs == rhs
+        case let (lhs as Float, rhs as Float):
+            return lhs == rhs
+        case let (lhs as Double, rhs as Double):
+            return lhs == rhs
+        case let (lhs as String, rhs as String):
+            return lhs == rhs
+        case let (lhs as [String: AnyDecodable], rhs as [String: AnyDecodable]):
+            return lhs == rhs
+        case let (lhs as [AnyDecodable], rhs as [AnyDecodable]):
+            return lhs == rhs
+        default:
+            return false
+        }
+    }
+}
+
+extension AnyDecodable: CustomStringConvertible {
+    public var description: String {
+        switch value {
+        case is Void:
+            return String(describing: nil as Any?)
+        case let value as CustomStringConvertible:
+            return value.description
+        default:
+            return String(describing: value)
+        }
+    }
+}
+
+extension AnyDecodable: CustomDebugStringConvertible {
+    public var debugDescription: String {
+        switch value {
+        case let value as CustomDebugStringConvertible:
+            return "AnyDecodable(\(value.debugDescription))"
+        default:
+            return "AnyDecodable(\(description))"
+        }
+    }
+}
+
+extension AnyDecodable: Hashable {
+    public func hash(into hasher: inout Hasher) {
+        switch value {
+        case let value as Bool:
+            hasher.combine(value)
+        case let value as Int:
+            hasher.combine(value)
+        case let value as Int8:
+            hasher.combine(value)
+        case let value as Int16:
+            hasher.combine(value)
+        case let value as Int32:
+            hasher.combine(value)
+        case let value as Int64:
+            hasher.combine(value)
+        case let value as UInt:
+            hasher.combine(value)
+        case let value as UInt8:
+            hasher.combine(value)
+        case let value as UInt16:
+            hasher.combine(value)
+        case let value as UInt32:
+            hasher.combine(value)
+        case let value as UInt64:
+            hasher.combine(value)
+        case let value as Float:
+            hasher.combine(value)
+        case let value as Double:
+            hasher.combine(value)
+        case let value as String:
+            hasher.combine(value)
+        case let value as [String: AnyDecodable]:
+            hasher.combine(value)
+        case let value as [AnyDecodable]:
+            hasher.combine(value)
+        default:
+            break
+        }
+    }
+}
diff --git a/kram-profile/kram-profile/AnyEncodable.swift b/kram-profile/kram-profile/AnyEncodable.swift
new file mode 100644
index 00000000..d5530e57
--- /dev/null
+++ b/kram-profile/kram-profile/AnyEncodable.swift
@@ -0,0 +1,291 @@
+#if canImport(Foundation)
+import Foundation
+#endif
+
+/**
+ A type-erased `Encodable` value.
+
+ The `AnyEncodable` type forwards encoding responsibilities
+ to an underlying value, hiding its specific underlying type.
+
+ You can encode mixed-type values in dictionaries
+ and other collections that require `Encodable` conformance
+ by declaring their contained type to be `AnyEncodable`:
+
+     let dictionary: [String: AnyEncodable] = [
+         "boolean": true,
+         "integer": 42,
+         "double": 3.141592653589793,
+         "string": "string",
+         "array": [1, 2, 3],
+         "nested": [
+             "a": "alpha",
+             "b": "bravo",
+             "c": "charlie"
+         ],
+         "null": nil
+     ]
+
+     let encoder = JSONEncoder()
+     let json = try! encoder.encode(dictionary)
+ */
+@frozen public struct AnyEncodable: Encodable {
+    public let value: Any
+
+    public init<T>(_ value: T?) {
+        self.value = value ?? ()
+    }
+}
+
+@usableFromInline
+protocol _AnyEncodable {
+    var value: Any { get }
+    init<T>(_ value: T?)
+}
+
+extension AnyEncodable: _AnyEncodable {}
+
+// MARK: - Encodable
+
+extension _AnyEncodable {
+    public func encode(to encoder: Encoder) throws {
+        var container = encoder.singleValueContainer()
+
+        switch value {
+        #if canImport(Foundation)
+        case is NSNull:
+            try container.encodeNil()
+        #endif
+        case is Void:
+            try container.encodeNil()
+        case let bool as Bool:
+            try container.encode(bool)
+        case let int as Int:
+            try container.encode(int)
+        case let int8 as Int8:
+            try container.encode(int8)
+        case let int16 as Int16:
+            try container.encode(int16)
+        case let int32 as Int32:
+            try container.encode(int32)
+        case let int64 as Int64:
+            try container.encode(int64)
+        case let uint as UInt:
+            try container.encode(uint)
+        case let uint8 as UInt8:
+            try container.encode(uint8)
+        case let uint16 as UInt16:
+            try container.encode(uint16)
+        case let uint32 as UInt32:
+            try container.encode(uint32)
+        case let uint64 as UInt64:
+            try container.encode(uint64)
+        case let float as Float:
+            try container.encode(float)
+        case let double as Double:
+            try container.encode(double)
+        case let string as String:
+            try container.encode(string)
+        #if canImport(Foundation)
+        case let number as NSNumber:
+            try encode(nsnumber: number, into: &container)
+        case let date as Date:
+            try container.encode(date)
+        case let url as URL:
+            try container.encode(url)
+        #endif
+        case let array as [Any?]:
+            try container.encode(array.map { AnyEncodable($0) })
+        case let dictionary as [String: Any?]:
+            try container.encode(dictionary.mapValues { AnyEncodable($0) })
+        case let encodable as Encodable:
+            try encodable.encode(to: encoder)
+        default:
+            let context = EncodingError.Context(codingPath: container.codingPath, debugDescription: "AnyEncodable value cannot be encoded")
+            throw EncodingError.invalidValue(value, context)
+        }
+    }
+
+    #if canImport(Foundation)
+    private func encode(nsnumber: NSNumber, into container: inout SingleValueEncodingContainer) throws {
+        switch Character(Unicode.Scalar(UInt8(nsnumber.objCType.pointee)))  {
+        case "B":
+            try container.encode(nsnumber.boolValue)
+        case "c":
+            try container.encode(nsnumber.int8Value)
+        case "s":
+            try container.encode(nsnumber.int16Value)
+        case "i", "l":
+            try container.encode(nsnumber.int32Value)
+        case "q":
+            try container.encode(nsnumber.int64Value)
+        case "C":
+            try container.encode(nsnumber.uint8Value)
+        case "S":
+            try container.encode(nsnumber.uint16Value)
+        case "I", "L":
+            try container.encode(nsnumber.uint32Value)
+        case "Q":
+            try container.encode(nsnumber.uint64Value)
+        case "f":
+            try container.encode(nsnumber.floatValue)
+        case "d":
+            try container.encode(nsnumber.doubleValue)
+        default:
+            let context = EncodingError.Context(codingPath: container.codingPath, debugDescription: "NSNumber cannot be encoded because its type is not handled")
+            throw EncodingError.invalidValue(nsnumber, context)
+        }
+    }
+    #endif
+}
+
+extension AnyEncodable: Equatable {
+    public static func == (lhs: AnyEncodable, rhs: AnyEncodable) -> Bool {
+        switch (lhs.value, rhs.value) {
+        case is (Void, Void):
+            return true
+        case let (lhs as Bool, rhs as Bool):
+            return lhs == rhs
+        case let (lhs as Int, rhs as Int):
+            return lhs == rhs
+        case let (lhs as Int8, rhs as Int8):
+            return lhs == rhs
+        case let (lhs as Int16, rhs as Int16):
+            return lhs == rhs
+        case let (lhs as Int32, rhs as Int32):
+            return lhs == rhs
+        case let (lhs as Int64, rhs as Int64):
+            return lhs == rhs
+        case let (lhs as UInt, rhs as UInt):
+            return lhs == rhs
+        case let (lhs as UInt8, rhs as UInt8):
+            return lhs == rhs
+        case let (lhs as UInt16, rhs as UInt16):
+            return lhs == rhs
+        case let (lhs as UInt32, rhs as UInt32):
+            return lhs == rhs
+        case let (lhs as UInt64, rhs as UInt64):
+            return lhs == rhs
+        case let (lhs as Float, rhs as Float):
+            return lhs == rhs
+        case let (lhs as Double, rhs as Double):
+            return lhs == rhs
+        case let (lhs as String, rhs as String):
+            return lhs == rhs
+        case let (lhs as [String: AnyEncodable], rhs as [String: AnyEncodable]):
+            return lhs == rhs
+        case let (lhs as [AnyEncodable], rhs as [AnyEncodable]):
+            return lhs == rhs
+        default:
+            return false
+        }
+    }
+}
+
+extension AnyEncodable: CustomStringConvertible {
+    public var description: String {
+        switch value {
+        case is Void:
+            return String(describing: nil as Any?)
+        case let value as CustomStringConvertible:
+            return value.description
+        default:
+            return String(describing: value)
+        }
+    }
+}
+
+extension AnyEncodable: CustomDebugStringConvertible {
+    public var debugDescription: String {
+        switch value {
+        case let value as CustomDebugStringConvertible:
+            return "AnyEncodable(\(value.debugDescription))"
+        default:
+            return "AnyEncodable(\(description))"
+        }
+    }
+}
+
+extension AnyEncodable: ExpressibleByNilLiteral {}
+extension AnyEncodable: ExpressibleByBooleanLiteral {}
+extension AnyEncodable: ExpressibleByIntegerLiteral {}
+extension AnyEncodable: ExpressibleByFloatLiteral {}
+extension AnyEncodable: ExpressibleByStringLiteral {}
+extension AnyEncodable: ExpressibleByStringInterpolation {}
+extension AnyEncodable: ExpressibleByArrayLiteral {}
+extension AnyEncodable: ExpressibleByDictionaryLiteral {}
+
+extension _AnyEncodable {
+    public init(nilLiteral _: ()) {
+        self.init(nil as Any?)
+    }
+
+    public init(booleanLiteral value: Bool) {
+        self.init(value)
+    }
+
+    public init(integerLiteral value: Int) {
+        self.init(value)
+    }
+
+    public init(floatLiteral value: Double) {
+        self.init(value)
+    }
+
+    public init(extendedGraphemeClusterLiteral value: String) {
+        self.init(value)
+    }
+
+    public init(stringLiteral value: String) {
+        self.init(value)
+    }
+
+    public init(arrayLiteral elements: Any...) {
+        self.init(elements)
+    }
+
+    public init(dictionaryLiteral elements: (AnyHashable, Any)...) {
+        self.init([AnyHashable: Any](elements, uniquingKeysWith: { first, _ in first }))
+    }
+}
+
+extension AnyEncodable: Hashable {
+    public func hash(into hasher: inout Hasher) {
+        switch value {
+        case let value as Bool:
+            hasher.combine(value)
+        case let value as Int:
+            hasher.combine(value)
+        case let value as Int8:
+            hasher.combine(value)
+        case let value as Int16:
+            hasher.combine(value)
+        case let value as Int32:
+            hasher.combine(value)
+        case let value as Int64:
+            hasher.combine(value)
+        case let value as UInt:
+            hasher.combine(value)
+        case let value as UInt8:
+            hasher.combine(value)
+        case let value as UInt16:
+            hasher.combine(value)
+        case let value as UInt32:
+            hasher.combine(value)
+        case let value as UInt64:
+            hasher.combine(value)
+        case let value as Float:
+            hasher.combine(value)
+        case let value as Double:
+            hasher.combine(value)
+        case let value as String:
+            hasher.combine(value)
+        case let value as [String: AnyEncodable]:
+            hasher.combine(value)
+        case let value as [AnyEncodable]:
+            hasher.combine(value)
+        default:
+            break
+        }
+    }
+}
diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index aa6ebb38..733f7d8f 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -123,19 +123,34 @@ struct TimeRange {
     var viewPercentage: Double = 0.8
 }
 
-func filenameToTimeRange(_ filename: String) -> TimeRange {
+enum FileType {
+    case Build
+    case Memory
+    case Perf
+}
+
+func filenameToType(_ filename: String) -> FileType {
     let url = URL(string: filename)!
-    
-    var duration = 1.0
-    
+
     if url.pathExtension == "json" { // build
-        duration = 1.0
+        return .Build
     }
     else if url.pathExtension == "vmatrace" { // memory
-        duration = 64.0
+        return .Memory
     }
     else if url.pathExtension == "trace" { // profile
-        duration = 0.1
+        return .Perf
+    }
+    return .Build
+}
+
+func filenameToTimeRange(_ filename: String) -> TimeRange {
+    var duration = 1.0
+    
+    switch filenameToType(filename) {
+        case .Build: duration = 1.0
+        case .Memory: duration = 64.0
+        case .Perf: duration = 0.1 // 100ms
     }
     
     return TimeRange(timeStart:0.0, timeEnd:duration)
@@ -188,10 +203,31 @@ func showTimeRange(_ webView: WKWebView, _ timeRange: TimeRange) /*async*/ {
         }
     }
     catch {
-        
+        print(error)
     }
 }
 
+// This is more of an opaque object, so how to parse with Json utils
+//struct CatapultArgs: Codable {
+//    var detail: String?
+//}
+
+struct CatapultEvent: Codable {
+    var cat: String?
+    var pid: Int?
+    var tid: Int?
+    var ph: String?
+    var ts: Int?
+    var dur: Int?
+    var name: String?
+    var args: [String : AnyCodable]?
+}
+
+struct CatapultProfile: Codable {
+    var traceEvents: [CatapultEvent]?
+    var beginningOfTime: Int?
+}
+
 func loadFile(_ webView: WKWebView, _ path: String) /*async*/ {
     
     let fileURL = URL(string: path)!
@@ -219,13 +255,58 @@ func loadFile(_ webView: WKWebView, _ path: String) /*async*/ {
     }
     
     do {
-        let fileContent = try Data(contentsOf: fileURL)
+        // use this for binary data, but need to fixup some json before it's sent
+        // TODO: work on sending a more efficient form.  Could use Perfetto SDK to write to prototbuf.  The Catapult json format is overly verbose.  Need some thread and scope strings, some open/close timings that reference a scope string and thread.
+        
+        // TODO: this works, but can't fixup the json on either end as easily.
+        
+        var fileContentBase64 = ""
+        
+        let type = filenameToType(fileURL.absoluteString)
+        
+        if type != FileType.Build {
+            let fileContent = try Data(contentsOf: fileURL)
+            fileContentBase64 = fileContent.base64EncodedString()
+        }
+        else {
+            // replace Source with actual file name on Clang.json files
+            // That's just for the parse phase, probably need for optimization
+            // phase too.  The optimized function names need demangled - ugh.
+            
+            let fileContent = try String(contentsOf: fileURL)
+            let json = fileContent.data(using: .utf8)!
+            
+            let decoder = JSONDecoder()
+            var catapultProfile = try decoder.decode(CatapultProfile.self, from: json)
+            
+            // trying to change the objects, but it's not applying
+            if catapultProfile.traceEvents != nil { // an array
+                for i in 0..<catapultProfile.traceEvents!.count {
+                    let event = catapultProfile.traceEvents![i]
+                    if event.name == "Source" || event.name == "OptModule" {
+                        let detail = event.args!["detail"]!.value as! String
+                        let url = URL(string:detail)!
+                        
+                        // stupid immutable arrays.  Makes this code untempable
+                        catapultProfile.traceEvents![i].name = url.lastPathComponent
+                    }
+                    else if event.name == "InstantiateFunction" || event.name == "InstantiateClass" || event.name == "OptFunction" {
+                        let detail = event.args!["detail"]!.value as! String
+                        
+                        catapultProfile.traceEvents![i].name = detail
+                    }
+                }
+            }
+            
+            // print(catapultProfile)
+            
+            let encoder = JSONEncoder()
+            let fileContentFixed = try encoder.encode(catapultProfile)
+            
+            //print(fileContentFixed)
+            fileContentBase64 = fileContentFixed.base64EncodedString()
+        }
         
-        // TODO: here need to fixup clang json, this neesd SOURCE replaced with
-        // the short filename version of the detail (full filename)
-    
-        let fileContentBase64 = fileContent.base64EncodedString()
-    
         let perfetto = Perfetto(perfetto: PerfettoFile(buffer: "",
                                                        title: fileURL.lastPathComponent,
                                                        keepApiOpen: true))
@@ -262,10 +343,6 @@ func loadFile(_ webView: WKWebView, _ path: String) /*async*/ {
         // convert base64 back
         obj.perfetto.buffer = base64ToBytes(fileData).buffer;
     
-        // How to set this command, it's only available onTraceLoad()
-        // but calls through to a sidebar object on the ctx
-        //window.postMessage('dev.perfetto.CoreCommands#ToggleLeftSidebar','\(ORIGIN)')
-    
         window.postMessage(obj,'\(ORIGIN)');
     """
        
@@ -305,10 +382,8 @@ func loadFile(_ webView: WKWebView, _ path: String) /*async*/ {
 
         
     } catch {
-     // handle error
+      print(error)
     }
-    
-    
 }
 
 func initWebView() -> WKWebView {
@@ -347,7 +422,8 @@ struct kram_profileApp: App {
             // filter out some by name, so don't have to open files
             if filename == "build-description.json" ||
                 filename == "build-request.json" ||
-                filename == "manifest.json"
+                filename == "manifest.json" ||
+                filename.hasSuffix("diagnostic-filename-map.json")
             {
                 return false
             }

From 3d12321ff87d4ed51c65aa65696fbfdf9ed16ccd Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 22 Feb 2024 19:24:35 -0800
Subject: [PATCH 588/901] kram-profile - sort files list by full path

---
 .../kram-profile/kram_profileApp.swift        | 22 +++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 733f7d8f..f2f46813 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -14,11 +14,15 @@ import UniformTypeIdentifiers
 // This is really just a wrapper to turn WKWebView into something SwiftUI
 // can interop with.  SwiftUI has not browser widget.
 
-struct File: Identifiable, Hashable
+struct File: Identifiable, Hashable, Comparable
 {
     var id: String { url.absoluteString }
     var name: String { url.lastPathComponent }
     let url: URL
+    
+    static func < (lhs: File, rhs: File) -> Bool {
+        return lhs.name < rhs.name
+    }
 }
 
 struct MyWKWebView : NSViewRepresentable {
@@ -283,16 +287,23 @@ func loadFile(_ webView: WKWebView, _ path: String) /*async*/ {
             if catapultProfile.traceEvents != nil { // an array
                 for i in 0..<catapultProfile.traceEvents!.count {
                     let event = catapultProfile.traceEvents![i]
-                    if event.name == "Source" || event.name == "OptModule" {
+                    if event.name == "Source" || 
+                        event.name == "OptModule"
+                    {
+                        // This is a path
                         let detail = event.args!["detail"]!.value as! String
                         let url = URL(string:detail)!
                         
                         // stupid immutable arrays.  Makes this code untempable
                         catapultProfile.traceEvents![i].name = url.lastPathComponent
                     }
-                    else if event.name == "InstantiateFunction" || event.name == "InstantiateClass" || event.name == "OptFunction" {
+                    else if event.name == "InstantiateFunction" || 
+                            event.name == "InstantiateClass" ||
+                            event.name == "OptFunction" ||
+                            event.name == "ParseClass"
+                    {
+                        // This is a name
                         let detail = event.args!["detail"]!.value as! String
-                        
                         catapultProfile.traceEvents![i].name = detail
                     }
                 }
@@ -475,6 +486,9 @@ struct kram_profileApp: App {
             }
         }
         
+        // for some reason, their listed out in pretty random order
+        files.sort()
+        
         print("found \(files.count) files")
         
         return files

From c48b1a1e0c01339e9bf682ce3de29f5bc3c9a9c8 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 23 Feb 2024 01:08:02 -0800
Subject: [PATCH 589/901] kram-profile - show duration of build json files, add
 FileCache

Now the name in the list has the duration printed after.  Not yet able to sort by duration, but want to add that.
Turn off nav fwd/back, since Perfetto complains it will lose the file content.
---
 kram-profile/README.md                        |  17 +--
 .../kram-profile/kram_profileApp.swift        | 110 ++++++++++++++++--
 2 files changed, 113 insertions(+), 14 deletions(-)

diff --git a/kram-profile/README.md b/kram-profile/README.md
index b2874414..d8bf26ea 100644
--- a/kram-profile/README.md
+++ b/kram-profile/README.md
@@ -1,11 +1,11 @@
 kram-profile
 ==========
 
-This profiler current wraps SwiftUI atop a WKWebView running the Perfetto TraceViewer.  Directories are searched, and files are open.  Supported files are added to a file list, and then can quickly view these in Perfetto.  The app is multidocument.
+kram-profile wraps SwiftUI atop a WKWebView running the Perfetto TraceViewer.  A dev can open directories or files of traces.  Supported files are added to a list to quickly view these in Perfetto.  The app is multidocument.  Each window is a single instance of Pefertto TraceViewer that is loaded once.   The sandboxed SwiftUI acts as the bridge to the native file system, which the TraceViewer browser sandbox lacks.
 
-Flamegraphs are key to all profiling.  Why look at giant table of numbers when you can see them visually.  This also needs to be a dyanmic graph that you can zoom in and hover over.  Fortunately there are several tools now supporting flamegraphs.
+Flamegraphs are key to all profiling.  Why look at giant table of numbers when you can see them visually.  Flamegraphs also need to be dynamic and display hover tips and details.  Fortunately there are several tools now supporting flamegraphs.  Perfetto is one such tool.
 
-This is also a discussion of profilers and techniques for profiling.
+This is also a discussion of profilers and optimizing.
 
 Supported files
 
@@ -18,18 +18,21 @@ There are pre-built version of kram-profile for macOS 13.0 and higher.
 ----------------
 
 TODO:
-* Fix document support, so can double click and have app open files
-* Support binary Perfetto traces
+* Fix document support, so can double click and have app open files. readFromURL like kramv.
+* Support binary Perfetto traces.  Test with Google sample code.
 * Fixup "Source" tags in clang json to use filename (no extension) from detail field
 * Tie in with the excellent ClangBuildAnalyzer tool
 * Scale specific traces to a single duration.  That way the next file comes in at that scale. 
-* Preserve timeline duration across traces 
+* Move away from Catapult json to own binary format.  Can then translate to json or use the Perfetto SDK to convert to protobufs.
+*
+* Find start/end time of each json files. 
+* Add sort by duration
+* Preserve timeline duration across traces. 
 
 ----------------
 
 #Profilers
 
-
 Cpu Profilers. See for more details
 
 * Catapult
diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index f2f46813..defe1c26 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -14,17 +14,75 @@ import UniformTypeIdentifiers
 // This is really just a wrapper to turn WKWebView into something SwiftUI
 // can interop with.  SwiftUI has not browser widget.
 
-struct File: Identifiable, Hashable, Comparable
+func fileModificationDate(url: URL) -> Date? {
+    do {
+        let attr = try FileManager.default.attributesOfItem(atPath: url.path)
+        return attr[FileAttributeKey.modificationDate] as? Date
+    } catch {
+        return nil
+    }
+}
+
+struct File: Identifiable, Hashable, Equatable, Comparable
 {
     var id: String { url.absoluteString }
     var name: String { url.lastPathComponent }
     let url: URL
     
+    var duration: Double?
+    var modStamp: Date?
+    
+    init(url: URL) {
+        self.url = url
+        self.modStamp = fileModificationDate(url:url)
+    }
+    
+    public static func == (lhs: File, rhs: File) -> Bool {
+        return lhs.name == rhs.name
+    }
     static func < (lhs: File, rhs: File) -> Bool {
         return lhs.name < rhs.name
     }
 }
 
+func generateName(file: File) -> String {
+    // need to do lookup to get duration
+    let f = lookupFile(url: file.url)
+    
+    if f.duration != nil {
+        return "\(f.name) " + String(format:"%0.3f", f.duration!) // sec vis to ms for now
+    }
+    else {
+        return f.name
+    }
+}
+
+// Note: if a file is deleted which happens often with builds,
+// then want to identify that and update the list.  At least
+// indicate the item is gone, and await its return.
+
+var fileCache : [URL:File] = [:]
+
+func lookupFile(url: URL) -> File {
+    let file = File(url:url)
+    
+    // This preseves the duration previously parsed and stored
+    
+    if let fileOld = fileCache[file.url] {
+        if fileOld.modStamp! == file.modStamp! {
+            return fileOld
+        }
+    }
+    fileCache[file.url] = file
+    
+    return file
+}
+
+// This one won't be one in the list, though
+func updateFileCache(file: File) {
+    fileCache[file.url] = file
+}
+        
 struct MyWKWebView : NSViewRepresentable {
     // This is set by caller to the url for the request
     let webView: WKWebView
@@ -65,6 +123,8 @@ struct MyWKWebView : NSViewRepresentable {
             // have to parse timeStart/End from file.  May want that for
             // sorting anyways.
             //
+            // Note have duration on some files now, so could pull that
+            // or adjust the timing range across all known durations
             // showTimeRange(webView, filenameToTimeRange(sel))
         }
     }
@@ -236,6 +296,9 @@ func loadFile(_ webView: WKWebView, _ path: String) /*async*/ {
     
     let fileURL = URL(string: path)!
     
+    // Note may need to modify directly
+    var file = lookupFile(url: fileURL)
+    
     print(path)
     
     
@@ -309,12 +372,36 @@ func loadFile(_ webView: WKWebView, _ path: String) /*async*/ {
                 }
             }
             
-            // print(catapultProfile)
+            // walk the file and compute the duration if we don't already have ti
+            if file.duration == nil {
+                
+                // TODO: need to honor the unit scale
+                var startTime = Double.infinity
+                var endTime = -Double.infinity
+                
+                for i in 0..<catapultProfile.traceEvents!.count {
+                    let event = catapultProfile.traceEvents![i]
+                
+                    if event.ts != nil && event.dur != nil {
+                        let s = Double(event.ts!)
+                        let d = Double(event.dur!)
+                        
+                        startTime = min(startTime, s)
+                        endTime = max(endTime, s+d)
+                    }
+                }
+                 
+                if startTime <= endTime {
+                    // TODO: for now assume ms
+                    file.duration = (endTime - startTime) * 1e-6
+                    
+                    updateFileCache(file: file)
+                }
+            }
+            
             
             let encoder = JSONEncoder()
             let fileContentFixed = try encoder.encode(catapultProfile)
-            
-            //print(fileContentFixed)
             fileContentBase64 = fileContentFixed.base64EncodedString()
         }
         
@@ -413,6 +500,9 @@ func initWebView() -> WKWebView {
     // here frame is entire screen
     let webView = WKWebView(frame: .zero, configuration: configuration)
     
+    // The page is complaining it's going to lose the data if fwd/back hit
+    webView.allowsBackForwardNavigationGestures = false
+    
     return webView
 }
 
@@ -473,7 +563,7 @@ struct kram_profileApp: App {
             while let fileURL = directoryEnumerator?.nextObject() as? URL {
                 let isSupported = isSupportedFilename(fileURL)
                 if isSupported {
-                    files.append(File(url:fileURL));
+                    files.append(lookupFile(url:fileURL));
                 }
             }
         }
@@ -482,7 +572,7 @@ struct kram_profileApp: App {
         
             let isSupported = isSupportedFilename(url)
             if isSupported {
-                files.append(File(url:url))
+                files.append(lookupFile(url:url))
             }
         }
         
@@ -549,7 +639,13 @@ struct kram_profileApp: App {
                     }
                     
                     List(files, selection:$selection) { file in
-                        Text(file.name)
+                        // compare url to the previous dir
+                        // if it differs, then add divider
+//                        if lastUrl && url.path != lastUrl!.path {
+//                            files.append(Divider())
+//                        }
+                        
+                        Text(generateName(file: file))
                     }
                 }
             }

From ce3acfe505c32da2fcf44cc92b14598251f092dd Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 23 Feb 2024 16:19:08 -0800
Subject: [PATCH 590/901] kram-profile - fix id comparison, add Open/Goto File
 menu items, hover tip

Removed FilePicker.swift, didn't really need it for now.
---
 .../kram-profile.xcodeproj/project.pbxproj    |   4 -
 kram-profile/kram-profile/FilePicker.swift    |  84 --------
 .../kram-profile/kram_profileApp.swift        | 183 ++++++++++--------
 3 files changed, 97 insertions(+), 174 deletions(-)
 delete mode 100644 kram-profile/kram-profile/FilePicker.swift

diff --git a/kram-profile/kram-profile.xcodeproj/project.pbxproj b/kram-profile/kram-profile.xcodeproj/project.pbxproj
index 3c7e3ed6..ed8e3bc9 100644
--- a/kram-profile/kram-profile.xcodeproj/project.pbxproj
+++ b/kram-profile/kram-profile.xcodeproj/project.pbxproj
@@ -10,7 +10,6 @@
 		705F68CD2B820AD100437FAA /* kram_profileApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 705F68CC2B820AD100437FAA /* kram_profileApp.swift */; };
 		705F68D12B820AD200437FAA /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 705F68D02B820AD200437FAA /* Assets.xcassets */; };
 		705F68D42B820AD200437FAA /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 705F68D32B820AD200437FAA /* Preview Assets.xcassets */; };
-		705F68DC2B83E80400437FAA /* FilePicker.swift in Sources */ = {isa = PBXBuildFile; fileRef = 705F68DB2B83E80400437FAA /* FilePicker.swift */; };
 		705F68E12B87EB8000437FAA /* AnyDecodable.swift in Sources */ = {isa = PBXBuildFile; fileRef = 705F68DE2B87EB8000437FAA /* AnyDecodable.swift */; };
 		705F68E22B87EB8000437FAA /* AnyCodable.swift in Sources */ = {isa = PBXBuildFile; fileRef = 705F68DF2B87EB8000437FAA /* AnyCodable.swift */; };
 		705F68E32B87EB8000437FAA /* AnyEncodable.swift in Sources */ = {isa = PBXBuildFile; fileRef = 705F68E02B87EB8000437FAA /* AnyEncodable.swift */; };
@@ -22,7 +21,6 @@
 		705F68D02B820AD200437FAA /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
 		705F68D32B820AD200437FAA /* Preview Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = "Preview Assets.xcassets"; sourceTree = "<group>"; };
 		705F68D52B820AD200437FAA /* kram_profile.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = kram_profile.entitlements; sourceTree = "<group>"; };
-		705F68DB2B83E80400437FAA /* FilePicker.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FilePicker.swift; sourceTree = "<group>"; };
 		705F68DD2B86AB2000437FAA /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist; path = Info.plist; sourceTree = "<group>"; };
 		705F68DE2B87EB8000437FAA /* AnyDecodable.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = AnyDecodable.swift; sourceTree = "<group>"; };
 		705F68DF2B87EB8000437FAA /* AnyCodable.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = AnyCodable.swift; sourceTree = "<group>"; };
@@ -61,7 +59,6 @@
 			children = (
 				705F68DD2B86AB2000437FAA /* Info.plist */,
 				705F68CC2B820AD100437FAA /* kram_profileApp.swift */,
-				705F68DB2B83E80400437FAA /* FilePicker.swift */,
 				705F68D02B820AD200437FAA /* Assets.xcassets */,
 				705F68DF2B87EB8000437FAA /* AnyCodable.swift */,
 				705F68DE2B87EB8000437FAA /* AnyDecodable.swift */,
@@ -153,7 +150,6 @@
 				705F68E12B87EB8000437FAA /* AnyDecodable.swift in Sources */,
 				705F68E32B87EB8000437FAA /* AnyEncodable.swift in Sources */,
 				705F68E22B87EB8000437FAA /* AnyCodable.swift in Sources */,
-				705F68DC2B83E80400437FAA /* FilePicker.swift in Sources */,
 				705F68CD2B820AD100437FAA /* kram_profileApp.swift in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
diff --git a/kram-profile/kram-profile/FilePicker.swift b/kram-profile/kram-profile/FilePicker.swift
deleted file mode 100644
index 31ae13f4..00000000
--- a/kram-profile/kram-profile/FilePicker.swift
+++ /dev/null
@@ -1,84 +0,0 @@
-//  FilePicker.swift
-//
-//  MIT License
-//
-//  Copyright (c) 2021 Mark Renaud
-//
-//  Permission is hereby granted, free of charge, to any person obtaining a copy
-//  of this software and associated documentation files (the "Software"), to deal
-//  in the Software without restriction, including without limitation the rights
-//  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-//  copies of the Software, and to permit persons to whom the Software is
-//  furnished to do so, subject to the following conditions:
-//
-//  The above copyright notice and this permission notice shall be included in all
-//  copies or substantial portions of the Software.
-//
-//  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-//  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-//  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-//  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-//  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-//  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-//  SOFTWARE.
-
-// from https://github.com/markrenaud/FilePicker/blob/main/Sources/FilePicker/FilePicker.swift
-// That version include iOS support, but I don't need that.
-import SwiftUI
-import UniformTypeIdentifiers
-
-public struct FilePicker<LabelView: View>: View {
-    
-    public typealias PickedURLsCompletionHandler = (_ urls: [URL]) -> Void
-    public typealias LabelViewContent = () -> LabelView
-    
-    // only allow one panel up at a time.  But Panel shhould already enforce?
-    @State private var isPresented: Bool = false
-    
-    public let types: [UTType]
-    //public let allowMultiple: Bool
-    public let pickedCompletionHandler: PickedURLsCompletionHandler
-    public let labelViewContent: LabelViewContent
-    
-    public init(types: [UTType], /* allowMultiple: Bool, */ onPicked completionHandler: @escaping PickedURLsCompletionHandler, @ViewBuilder label labelViewContent: @escaping LabelViewContent) {
-        self.types = types
-        //self.allowMultiple = allowMultiple
-        self.pickedCompletionHandler = completionHandler
-        self.labelViewContent = labelViewContent
-    }
-
-    public init(types: [UTType], /*allowMultiple: Bool,*/ title: String, onPicked completionHandler: @escaping PickedURLsCompletionHandler) where LabelView == Text {
-        self.init(types: types, /*allowMultiple: allowMultiple,*/ onPicked: completionHandler) { Text(title) }
-    }
-
-    public var body: some View {
-        // Move the button into a Menu option.  action should be able to fire this.
-        Button(
-            action: {
-                if !isPresented { isPresented = true }
-            },
-            label: {
-                labelViewContent()
-            }
-        )
-        .keyboardShortcut("O") // open with cmd+O
-        .disabled(isPresented)
-        .onChange(of: isPresented, perform: { presented in
-            // binding changed from false to true
-            if presented == true {
-                let panel = NSOpenPanel()
-                panel.allowsMultipleSelection = false
-                panel.canChooseDirectories = true
-                panel.canChooseFiles = true
-                panel.allowedContentTypes = types.map { $0 }
-                panel.begin { reponse in
-                    if reponse == .OK {
-                        pickedCompletionHandler(panel.urls)
-                    }
-                    // reset the isPresented variable to false
-                    isPresented = false
-               }
-            }
-        })
-    }
-}
diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index defe1c26..6d91228d 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -14,6 +14,10 @@ import UniformTypeIdentifiers
 // This is really just a wrapper to turn WKWebView into something SwiftUI
 // can interop with.  SwiftUI has not browser widget.
 
+// TODO: add divider in some sort modes
+// TODO: add sort mode for name, time and incorporating dir or not
+// TODO: fix the js wait on the page to load before doing loadFile
+
 func fileModificationDate(url: URL) -> Date? {
     do {
         let attr = try FileManager.default.attributesOfItem(atPath: url.path)
@@ -23,11 +27,30 @@ func fileModificationDate(url: URL) -> Date? {
     }
 }
 
+func buildShortDirectory(url: URL) -> String {
+    let count = url.pathComponents.count
+    
+    // dir0/dir1/file.ext
+    // -3/-2/-1
+    
+    var str = ""
+    if count >= 3 {
+        str += url.pathComponents[count-3]
+        str += "/"
+    }
+    if count >= 2 {
+        str += url.pathComponents[count-2]
+    }
+    
+    return str
+}
+
 struct File: Identifiable, Hashable, Equatable, Comparable
 {
     var id: String { url.absoluteString }
     var name: String { url.lastPathComponent }
     let url: URL
+    let shortDirectory: String
     
     var duration: Double?
     var modStamp: Date?
@@ -35,13 +58,14 @@ struct File: Identifiable, Hashable, Equatable, Comparable
     init(url: URL) {
         self.url = url
         self.modStamp = fileModificationDate(url:url)
+        self.shortDirectory = buildShortDirectory(url:url)
     }
     
     public static func == (lhs: File, rhs: File) -> Bool {
-        return lhs.name == rhs.name
+        return lhs.id == rhs.id
     }
     static func < (lhs: File, rhs: File) -> Bool {
-        return lhs.name < rhs.name
+        return lhs.id < rhs.id
     }
 }
 
@@ -271,11 +295,6 @@ func showTimeRange(_ webView: WKWebView, _ timeRange: TimeRange) /*async*/ {
     }
 }
 
-// This is more of an opaque object, so how to parse with Json utils
-//struct CatapultArgs: Codable {
-//    var detail: String?
-//}
-
 struct CatapultEvent: Codable {
     var cat: String?
     var pid: Int?
@@ -431,8 +450,6 @@ func loadFile(_ webView: WKWebView, _ path: String) /*async*/ {
           return Uint8Array.from(binString, (m) => m.codePointAt(0));
         }
         
-        //location.reload();
-                
         var fileData = '\(fileContentBase64)';
         
         // convert from string -> Uint8Array -> ArrayBuffer
@@ -446,7 +463,9 @@ func loadFile(_ webView: WKWebView, _ path: String) /*async*/ {
        
     /*
     """
-        // this isn't working
+        // this isn't working, to avoid race condition with when perfetto loads
+        // and when the loadFile() passes over the file contents.
+     
         function openTrace(obj)
         {
             // https://jsfiddle.net/vrsofx1p/
@@ -589,55 +608,26 @@ struct kram_profileApp: App {
         return url.lastPathComponent
     }
     
+    func openContainingFolder(_ str: String) {
+        let url = URL(string: str)!
+        NSWorkspace.shared.activateFileViewerSelecting([url]);
+    }
     
-    
-    // TODO: have files ending in -vma.trace, .trace, and .json
-    // also archives in the zip file.
+    // DONE: have files ending in .vmatrace, .trace, and .json
+    // TODO: archives in the zip file.
     var fileTypes: [UTType] = [
         // .plainText, .zip
         .json, // clang build files
         UTType(filenameExtension:"trace", conformingTo:.data)!,
         UTType(filenameExtension:"vmatrace", conformingTo:.data)!
     ]
-        
+    
+    // TODO: have
+    
     var body: some Scene {
         WindowGroup {
-            // Don't really like this behavior, want a panel to come up and not
-            // cause the main view to resize.  Also once collapsed, it's unclear
-            // how to get NavigationView back, and app restartes collapsed.
-            // See what I did in other tools.
-            
             NavigationSplitView {
                 VStack {
-                    
-                    // TODO: turn Open button into a menu, FilePicker button already tied to Cmd+O
-                    
-                    FilePicker(types:fileTypes, title: "Open") { urls in
-                        // TODO: not allowing multiselect for now
-                        // but can pick dir or archive
-                        // Do a filtered search under that
-                        // for appropriate files.  And then
-                        // track if picked again which files
-                        // changed.
-                        
-                        if urls.count == 1 {
-                            let url = urls[0]
-                            let filesNew = listFilesFromURL(url)
-                    
-                            // for now wipe the old list
-                            if filesNew.count > 0 {
-                                files = filesNew
-                                
-                                // if single file opened, then load it immediately
-                                if files[0].url.isFileURL { selection = files[0].id }
-                            }
-                        }
-                        
-                        // Not sure where to set this false, so do it here
-                        // this is to avoid reloading the request
-                        firstRequest = false
-                    }
-                    
                     List(files, selection:$selection) { file in
                         // compare url to the previous dir
                         // if it differs, then add divider
@@ -646,63 +636,84 @@ struct kram_profileApp: App {
 //                        }
                         
                         Text(generateName(file: file))
+                            .help(file.shortDirectory)
                     }
                 }
             }
             detail: {
-                // This is a pretty easy way to make Safari open to a link
-                // But really want that embedded into app.   Assuming Perfetto
-                // runs under Safari embedded WKWebView.
-                // Link("Perfetto", destination: URL(string: ORIGIN)!)
-                
                 // About hideSidebar
                 // https://github.com/google/perfetto/issues/716
-                
-                // TODO: really only need to load this once, url doesn't chang w/selection
                 MyWKWebView(webView:webView, request: URLRequest(url:URL(string: ORIGIN + "/?hideSidebar=true")!), selection:selection, firstRequest:firstRequest)
                 
             }
             .navigationTitle(selection != nil ? shortFilename(selection!) : "")
         }
-        /* TODO: This adds a competing File menu, want to add to existing one
-            Also may need to move Open button out of the picker.  It has a shortcut
-            But want macOS system menus to convey the shortcut to user.
-         
-        // https://forums.developer.apple.com/forums/thread/668139
-        .commands {
-            CommandMenu("File")
-            {
-                Button("Open") {} // .keyboardShortcut("o") { }
-            }
-        }
-        */
         // https://nilcoalescing.com/blog/CustomiseAboutPanelOnMacOSInSwiftUI/
         .commands {
-                    CommandGroup(replacing: .appInfo) {
-                        Button("About kram-profile") {
-                            NSApplication.shared.orderFrontStandardAboutPanel(
-                                options: [
-                                    NSApplication.AboutPanelOptionKey.credits: NSAttributedString(
-                                        string: 
+            CommandGroup(after: .newItem) {
+                Button("Open File") {
+                    let panel = NSOpenPanel()
+                    panel.allowsMultipleSelection = false
+                    panel.canChooseDirectories = true
+                    panel.canChooseFiles = true
+                    panel.allowedContentTypes = fileTypes
+                    
+                    panel.begin { reponse in
+                        if reponse == .OK {
+                            let urls = panel.urls
+                            if urls.count == 1 {
+                                let url = urls[0]
+                                let filesNew = listFilesFromURL(url)
+                        
+                                // for now wipe the old list
+                                if filesNew.count > 0 {
+                                    files = filesNew
+                                    
+                                    // if single file opened, then load it immediately
+                                    if files[0].url.isFileURL { selection = files[0].id }
+                                }
+                            }
+                            
+                            // Not sure where to set this false, so do it here
+                            // this is to avoid reloading the request
+                            firstRequest = false
+                        }
+                    }
+                }
+                .keyboardShortcut("O")
+                
+                Button("Goto File") {
+                    if selection != nil {
+                        openContainingFolder(selection!);
+                    }
+                }
+                .keyboardShortcut("G")
+            }
+            CommandGroup(replacing: .appInfo) {
+                    Button("About kram-profile") {
+                        NSApplication.shared.orderFrontStandardAboutPanel(
+                            options: [
+                                NSApplication.AboutPanelOptionKey.credits: NSAttributedString(
+                                    string:
 """
 A tool to help profile mem, perf, and builds.
 © 2020-2024 Alec Miller
 """,
-                                        
-                                        attributes: [
-                                            NSAttributedString.Key.font: NSFont.boldSystemFont(
-                                                ofSize: NSFont.smallSystemFontSize)
-                                        ]
-                                    ),
-                                    NSApplication.AboutPanelOptionKey(
-                                        rawValue: "kram-profile"
-                                    ): "© 2020-2024 Alec Miller"
-                                ]
-                            )
-                        }
+                                    
+                                    attributes: [
+                                        NSAttributedString.Key.font: NSFont.boldSystemFont(
+                                            ofSize: NSFont.smallSystemFontSize)
+                                    ]
+                                ),
+                                NSApplication.AboutPanelOptionKey(
+                                    rawValue: "kram-profile"
+                                ): "© 2020-2024 Alec Miller"
+                            ]
+                        )
                     }
                 }
-    
+            }
+
         
     }
     

From 99464c02588166e3e0c5273b3ab88f28c78755ab Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 24 Feb 2024 10:08:13 -0800
Subject: [PATCH 591/901] kram-profile - fix race, close app when last win
 closed, ui cleanup

right justify the timings, crash proofing for unsupported json
---
 .../kram-profile.xcodeproj/project.pbxproj    |   4 +
 .../kram-profile/kram_profileApp.swift        | 165 +++++++++++++-----
 2 files changed, 121 insertions(+), 48 deletions(-)

diff --git a/kram-profile/kram-profile.xcodeproj/project.pbxproj b/kram-profile/kram-profile.xcodeproj/project.pbxproj
index ed8e3bc9..98a2a8b9 100644
--- a/kram-profile/kram-profile.xcodeproj/project.pbxproj
+++ b/kram-profile/kram-profile.xcodeproj/project.pbxproj
@@ -13,6 +13,7 @@
 		705F68E12B87EB8000437FAA /* AnyDecodable.swift in Sources */ = {isa = PBXBuildFile; fileRef = 705F68DE2B87EB8000437FAA /* AnyDecodable.swift */; };
 		705F68E22B87EB8000437FAA /* AnyCodable.swift in Sources */ = {isa = PBXBuildFile; fileRef = 705F68DF2B87EB8000437FAA /* AnyCodable.swift */; };
 		705F68E32B87EB8000437FAA /* AnyEncodable.swift in Sources */ = {isa = PBXBuildFile; fileRef = 705F68E02B87EB8000437FAA /* AnyEncodable.swift */; };
+		705F68E52B89907700437FAA /* README.md in Resources */ = {isa = PBXBuildFile; fileRef = 705F68E42B89907700437FAA /* README.md */; };
 /* End PBXBuildFile section */
 
 /* Begin PBXFileReference section */
@@ -25,6 +26,7 @@
 		705F68DE2B87EB8000437FAA /* AnyDecodable.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = AnyDecodable.swift; sourceTree = "<group>"; };
 		705F68DF2B87EB8000437FAA /* AnyCodable.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = AnyCodable.swift; sourceTree = "<group>"; };
 		705F68E02B87EB8000437FAA /* AnyEncodable.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = AnyEncodable.swift; sourceTree = "<group>"; };
+		705F68E42B89907700437FAA /* README.md */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = SOURCE_ROOT; };
 /* End PBXFileReference section */
 
 /* Begin PBXFrameworksBuildPhase section */
@@ -64,6 +66,7 @@
 				705F68DE2B87EB8000437FAA /* AnyDecodable.swift */,
 				705F68E02B87EB8000437FAA /* AnyEncodable.swift */,
 				705F68D52B820AD200437FAA /* kram_profile.entitlements */,
+				705F68E42B89907700437FAA /* README.md */,
 				705F68D22B820AD200437FAA /* Preview Content */,
 			);
 			path = "kram-profile";
@@ -135,6 +138,7 @@
 			isa = PBXResourcesBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
+				705F68E52B89907700437FAA /* README.md in Resources */,
 				705F68D42B820AD200437FAA /* Preview Assets.xcassets in Resources */,
 				705F68D12B820AD200437FAA /* Assets.xcassets in Resources */,
 			);
diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 6d91228d..d1634a90 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -72,12 +72,16 @@ struct File: Identifiable, Hashable, Equatable, Comparable
 func generateName(file: File) -> String {
     // need to do lookup to get duration
     let f = lookupFile(url: file.url)
-    
+    return f.name
+}
+
+func generateDuration(file: File) -> String {
+    let f = lookupFile(url: file.url)
     if f.duration != nil {
-        return "\(f.name) " + String(format:"%0.3f", f.duration!) // sec vis to ms for now
+        return String(format:"%0.3f", f.duration!) // sec vis to ms for now
     }
     else {
-        return f.name
+        return ""
     }
 }
 
@@ -308,6 +312,9 @@ struct CatapultEvent: Codable {
 
 struct CatapultProfile: Codable {
     var traceEvents: [CatapultEvent]?
+    
+    // not a part of the Catapult spec, but clang writes this when it zeros
+    // out the startTime
     var beginningOfTime: Int?
 }
 
@@ -344,8 +351,6 @@ func loadFile(_ webView: WKWebView, _ path: String) /*async*/ {
         // use this for binary data, but need to fixup some json before it's sent
         // TODO: work on sending a more efficient form.  Could use Perfetto SDK to write to prototbuf.  The Catapult json format is overly verbose.  Need some thread and scope strings, some open/close timings that reference a scope string and thread.
         
-        // TODO: this works, but can't fixup the json on either end as easily.
-        
         var fileContentBase64 = ""
         
         let type = filenameToType(fileURL.absoluteString)
@@ -353,23 +358,64 @@ func loadFile(_ webView: WKWebView, _ path: String) /*async*/ {
         if type != FileType.Build {
             let fileContent = try Data(contentsOf: fileURL)
             fileContentBase64 = fileContent.base64EncodedString()
+            
+            // TODO: for perf traces, compute duration between frame
+            // markers.  Multiple frames in a file, then show max frame duration
+            // instead of the entire file.
+            
+            // walk the file and compute the duration if we don't already have ti
+            if file.duration == nil {
+                let decoder = JSONDecoder()
+                
+                var catapultProfile = try decoder.decode(CatapultProfile.self, from: fileContent)
+                
+                if catapultProfile.traceEvents != nil {
+                    // TODO: need to honor the unit scale
+                    var startTime = Double.infinity
+                    var endTime = -Double.infinity
+                    
+                    for i in 0..<catapultProfile.traceEvents!.count {
+                        let event = catapultProfile.traceEvents![i]
+                        
+                        if event.ts != nil && event.dur != nil {
+                            let s = Double(event.ts!)
+                            let d = Double(event.dur!)
+                            
+                            startTime = min(startTime, s)
+                            endTime = max(endTime, s+d)
+                        }
+                    }
+                    
+                    if startTime <= endTime {
+                        // TODO: for now assume ms
+                        file.duration = (endTime - startTime) * 1e-6
+                        
+                        updateFileCache(file: file)
+                    }
+                }
+            }
         }
         else {
             // replace Source with actual file name on Clang.json files
             // That's just for the parse phase, probably need for optimization
             // phase too.  The optimized function names need demangled - ugh.
             
+            // Clang has come build data as durations on fake threads
+            // but those are smaller than the full duration.
+            
             let fileContent = try String(contentsOf: fileURL)
             let json = fileContent.data(using: .utf8)!
             
             let decoder = JSONDecoder()
             var catapultProfile = try decoder.decode(CatapultProfile.self, from: json)
             
-            // trying to change the objects, but it's not applying
-            if catapultProfile.traceEvents != nil { // an array
+            if catapultProfile.traceEvents == nil { // an array
+                return
+            }
+            else {
                 for i in 0..<catapultProfile.traceEvents!.count {
                     let event = catapultProfile.traceEvents![i]
-                    if event.name == "Source" || 
+                    if event.name == "Source" ||
                         event.name == "OptModule"
                     {
                         // This is a path
@@ -379,46 +425,46 @@ func loadFile(_ webView: WKWebView, _ path: String) /*async*/ {
                         // stupid immutable arrays.  Makes this code untempable
                         catapultProfile.traceEvents![i].name = url.lastPathComponent
                     }
-                    else if event.name == "InstantiateFunction" || 
-                            event.name == "InstantiateClass" ||
-                            event.name == "OptFunction" ||
-                            event.name == "ParseClass"
+                    else if event.name == "InstantiateFunction" ||
+                                event.name == "InstantiateClass" ||
+                                event.name == "OptFunction" ||
+                                event.name == "ParseClass"
                     {
                         // This is a name
                         let detail = event.args!["detail"]!.value as! String
                         catapultProfile.traceEvents![i].name = detail
                     }
                 }
-            }
-            
-            // walk the file and compute the duration if we don't already have ti
-            if file.duration == nil {
-                
-                // TODO: need to honor the unit scale
-                var startTime = Double.infinity
-                var endTime = -Double.infinity
                 
-                for i in 0..<catapultProfile.traceEvents!.count {
-                    let event = catapultProfile.traceEvents![i]
                 
-                    if event.ts != nil && event.dur != nil {
-                        let s = Double(event.ts!)
-                        let d = Double(event.dur!)
+                // walk the file and compute the duration if we don't already have ti
+                if file.duration == nil {
+                    
+                    // TODO: need to honor the unit scale
+                    var startTime = Double.infinity
+                    var endTime = -Double.infinity
+                    
+                    for i in 0..<catapultProfile.traceEvents!.count {
+                        let event = catapultProfile.traceEvents![i]
                         
-                        startTime = min(startTime, s)
-                        endTime = max(endTime, s+d)
+                        if event.ts != nil && event.dur != nil {
+                            let s = Double(event.ts!)
+                            let d = Double(event.dur!)
+                            
+                            startTime = min(startTime, s)
+                            endTime = max(endTime, s+d)
+                        }
                     }
-                }
-                 
-                if startTime <= endTime {
-                    // TODO: for now assume ms
-                    file.duration = (endTime - startTime) * 1e-6
                     
-                    updateFileCache(file: file)
+                    if startTime <= endTime {
+                        // TODO: for now assume ms
+                        file.duration = (endTime - startTime) * 1e-6
+                        
+                        updateFileCache(file: file)
+                    }
                 }
             }
             
-            
             let encoder = JSONEncoder()
             let fileContentFixed = try encoder.encode(catapultProfile)
             fileContentBase64 = fileContentFixed.base64EncodedString()
@@ -444,7 +490,7 @@ func loadFile(_ webView: WKWebView, _ path: String) /*async*/ {
         //function bytesToBase64(bytes) {
         //  const binString = String.fromCodePoint(...bytes);
         //  return btoa(binString);
-    
+        
         function base64ToBytes(base64) {
           const binString = atob(base64);
           return Uint8Array.from(binString, (m) => m.codePointAt(0));
@@ -457,18 +503,13 @@ func loadFile(_ webView: WKWebView, _ path: String) /*async*/ {
         
         // convert base64 back
         obj.perfetto.buffer = base64ToBytes(fileData).buffer;
-    
-        window.postMessage(obj,'\(ORIGIN)');
-    """
-       
-    /*
-    """
-        // this isn't working, to avoid race condition with when perfetto loads
-        // and when the loadFile() passes over the file contents.
-     
+        
+        // Fix race between page load, and loading the file.  Although
+        // page is only loaded once.
+        
+        // https://jsfiddle.net/vrsofx1p/
         function openTrace(obj)
         {
-            // https://jsfiddle.net/vrsofx1p/
             const timer = setInterval(() => window.postMessage('PING', '\(ORIGIN)'), 50);
             
             const onMessageHandler = (evt) => {
@@ -480,11 +521,12 @@ func loadFile(_ webView: WKWebView, _ path: String) /*async*/ {
                 
                 window.postMessage(obj,'\(ORIGIN)');
             }
+        
+            window.addEventListener('message', onMessageHandler);
         }
         
         openTrace(obj);
         """
-      */
         
         // print(json)
         
@@ -525,12 +567,35 @@ func initWebView() -> WKWebView {
     return webView
 }
 
+/*
+// Install this just for appFromURL, need to be associate this app with traces
+// Instead of these see AppDelegate below.
+struct KramDocument: NSDocument {
+    
+    func open(fromURL:)
+}
+*/
+
+class AppDelegate: NSObject, NSApplicationDelegate {
+    // don't rename params in this class. These are the function signature
+    func applicationShouldTerminateAfterLastWindowClosed(_ application: NSApplication) -> Bool {
+        return true
+    }
+    
+    //func application(_ sender: NSApplication, open urls: [URL]) {
+        // TODO: can use this insted of defining Document model
+    //}
+}
+
 @main
 struct kram_profileApp: App {
     @State private var files: [File] = []
     @State private var selection: String?
     @State private var firstRequest = true
    
+    // close app when last windowi s
+    @NSApplicationDelegateAdaptor private var appDelegate: AppDelegate
+    
     private var webView = initWebView()
     
     func isSupportedFilename(_ url: URL) -> Bool {
@@ -635,8 +700,12 @@ struct kram_profileApp: App {
 //                            files.append(Divider())
 //                        }
                         
-                        Text(generateName(file: file))
-                            .help(file.shortDirectory)
+                        HStack() {
+                            Text(generateName(file: file))
+                                .help(file.shortDirectory)
+                            Text(generateDuration(file: file))
+                                .frame(maxWidth: .infinity, alignment: .trailing)
+                        }
                     }
                 }
             }

From 49fdb23d98f55e4f187db9aaf7f9817206e70964 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 24 Feb 2024 15:09:12 -0800
Subject: [PATCH 592/901] kram-profile - code cleanup

Cleanup to finish adding more commands
---
 kram-profile/README.md                        |  42 ++-
 .../kram-profile/kram_profileApp.swift        | 273 +++++++++++-------
 2 files changed, 196 insertions(+), 119 deletions(-)

diff --git a/kram-profile/README.md b/kram-profile/README.md
index d8bf26ea..c00b25e4 100644
--- a/kram-profile/README.md
+++ b/kram-profile/README.md
@@ -35,14 +35,13 @@ TODO:
 
 Cpu Profilers. See for more details
 
-* Catapult
-* Perfetto
-* Pefetto Deep Link - https://perfetto.dev/docs/visualization/deep-linking-to-perfetto-ui
-* Flutter (using perfetto) https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview#heading=h.yr4qxyxotyw
+* Catapult - see below
+* Perfetto - see below
+* Flutter (using Perfetto) https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview#heading=h.yr4qxyxotyw
 * Optick - https://github.com/bombomby/optick
 * Tracy - https://github.com/wolfpld/tracy
-* Xcode Instruments
-* AMD Code Analyst
+* Xcode Instruments - see Xcode
+* AMD Code Analyst - see Xcode
 * Intel Vtune
 * ClangBuildAnalyzer - https://github.com/aras-p/ClangBuildAnalyzer
 
@@ -53,7 +52,6 @@ Gpu Profilers. See for more details
 * Nvidia NSight
 * Mali Shader Compiler
 * Pix Profiler
-* 
 
 Catapult
 ---------
@@ -63,6 +61,7 @@ This was the tracing system that Perfetto replaced.  Originally designed for Chr
 Perfetto
 ---------
 * https://ui.perfetto.dev
+* https://perfetto.dev/docs/visualization/deep-linking-to-perfetto-ui
 
 This is a web-based profiling and flame-graph tool.  It's fast on desktop, and continues to evolve.  Only has second and timecode granularity which isn't enough.  For example, performance profiling for games is in milliseconds.  The team is mostly focused on Chrome profiling which apparently is in seconds.  But the visuals are nice, and it now has hover tips with size/name, and also has an Issues list that the devs are responsive to.  Flutter is using this profiler, and kram-profile does too.
 
@@ -86,6 +85,13 @@ A nice build profile aggregator.  Runs through the json timings that Clang gener
 
 Has an incremental system to snapshot and compare modestamps, and only do work on newer files.  This is some great open-source.  Aras optimized Unity builds with this, and that's a huge codebase.  I've used this to optimize kram.
 
+Include What You Use
+---------
+* https://github.com/include-what-you-use/include-what-you-use
+
+Automate the tedium of finding the minimal set of headers for C/C++ with this utility.  A third party added ObjC support, but it hasn't landed.  Seems like on large projects the includes gets out of hand.  So I look forward to trying this out.  
+
+Rewrites the headers, but there are ways to keep it from removing some.  Unclear how this works with cross-platform code.  But maybe it only strips includes within the defines that it sees.  Send the CXXFLAGS used for the build to the exe along with a source file.
 
 # Use Cases
 
@@ -134,13 +140,31 @@ Apple has a very nice SIMD (simd/simd.h) library.  This uses the gcc vector exte
 Optimized debug builds
 -----------
 
-One nice aspect of C++ is that specific files can be optimized.  But to do so, calls become functions instead of inlines.  Setting this up on a SIMD library takes a bit of work, but then callers are running optimized SIMD math even in debug.
+One nice aspect of C++ is that specific files can be optimized.  But to do so, calls outside the .cpp become functions instead of inlines.  But within the .cpp, they get inlined and optimized.  Setting this up on a SIMD library takes a bit of work, but then callers are running optimized math even in debug.
+
+Also Microsoft has various debug build flags that can optimize and optimize force_inline calls.  Need to find out the details for clang.  These disable Edit & Continue, but clang in Visual Studio doesn't support it anyways.
 
-Also Microsoft has various debug build flags that can optimize and optimize force_inline calls.  Need to find out the details for clang.
+* https://learn.microsoft.com/en-us/visualstudio/debugger/how-to-debug-optimized-code?view=vs-2022
 
+* Visual Studio
+* Use /Zo instead of /Od.  Now with Edit&continue.
+* /d2Zi+
+* Use VS2022, it's 64-bit
+* Avoid C++20, it's slower to compile
+* /Ob1 allows inline of  inline, __inline, or __forceinline, and member functions in the class decls.
+* disable STL bounds checking
+* WIN_LEAN_AND_MEAN
+* NOMINMAX
+* use clang-cli
 
+Xcode
+* make sure to deadstrip the release build, or it's huge
+* Cmake uses /Ob1 for RelWithDebInfo
+* use SSE4.2 for Resetta4.2, and make sure to use Neon on arm64
 
+* https://randomascii.wordpress.com/2013/09/11/debugging-optimized-codenew-in-visual-studio-2012/
 
+* https://dirtyhandscoding.github.io/posts/fast-debug-in-visual-c.html
 
 
diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index d1634a90..02d76dde 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -130,6 +130,15 @@ struct MyWKWebView : NSViewRepresentable {
        }
     }
     
+    // two copies of this function
+    func runJavascript(_ webView: WKWebView, _ script: String) {
+        webView.evaluateJavaScript(script) { (result, error) in
+            if error != nil {
+                print("problem running script")
+            }
+        }
+    }
+    
     func updateNSView(_ webView: WKWebView, context: Context) {
         // DONE: skip reloading the request, it doesn't change?
         // TODO: add a reload button for the current selection, since Perfetto can't reload
@@ -143,8 +152,10 @@ struct MyWKWebView : NSViewRepresentable {
         //sleep(2)
         
         if let sel = selection {
-            loadFile(webView, sel)
-        
+            var str = loadFileJS(sel)
+            if str != nil {
+                runJavascript(webView, str!)
+            }
             // now based on the type, set a reasonable range of time
             // don't really want start/end, since we don't know start
             // works for Flutter, but not for this app.  Also would
@@ -153,7 +164,14 @@ struct MyWKWebView : NSViewRepresentable {
             //
             // Note have duration on some files now, so could pull that
             // or adjust the timing range across all known durations
-            // showTimeRange(webView, filenameToTimeRange(sel))
+            
+            if false {
+                str = showTimeRangeJS(filenameToTimeRange(sel))
+                if str != nil {
+                    runJavascript(webView, str!)
+                }
+            }
+            
         }
     }
 }
@@ -249,7 +267,7 @@ func filenameToTimeRange(_ filename: String) -> TimeRange {
 }
 
 // Flutter uses this to jump to a time range
-func showTimeRange(_ webView: WKWebView, _ timeRange: TimeRange) /*async*/ {
+func showTimeRangeJS(_ timeRange: TimeRange) -> String? {
 
     do {
         struct PerfettoTimeRange: Codable {
@@ -279,23 +297,20 @@ func showTimeRange(_ webView: WKWebView, _ timeRange: TimeRange) /*async*/ {
                 .replacingOccurrences(of: "\u{2029}", with: "\\u2029")
         }
         
-        let json = """
+        // TODO: this also needs to wait on page to load
+        let script = """
             // convert from string -> Uint8Array -> ArrayBuffer
             var obj = JSON.parse('{\(perfettoEncode)}');
+        
             window.postMessage(obj,'\(ORIGIN)');
         """
         
-        webView.evaluateJavaScript(json) { (result, error) in
-            if error != nil {
-                print("\(error)! \(result)")
-            }
-            else {
-                print("\(result)")
-            }
-        }
+        //runJavascript(script)
+        return script
     }
     catch {
         print(error)
+        return nil
     }
 }
 
@@ -318,7 +333,7 @@ struct CatapultProfile: Codable {
     var beginningOfTime: Int?
 }
 
-func loadFile(_ webView: WKWebView, _ path: String) /*async*/ {
+func loadFileJS(_ path: String) -> String? {
     
     let fileURL = URL(string: path)!
     
@@ -410,7 +425,7 @@ func loadFile(_ webView: WKWebView, _ path: String) /*async*/ {
             var catapultProfile = try decoder.decode(CatapultProfile.self, from: json)
             
             if catapultProfile.traceEvents == nil { // an array
-                return
+                return nil
             }
             else {
                 for i in 0..<catapultProfile.traceEvents!.count {
@@ -485,7 +500,7 @@ func loadFile(_ webView: WKWebView, _ path: String) /*async*/ {
                 .replacingOccurrences(of: "\u{2029}", with: "\\u2029")
         }
         
-        let json = """
+        let script = """
         
         //function bytesToBase64(bytes) {
         //  const binString = String.fromCodePoint(...bytes);
@@ -528,20 +543,10 @@ func loadFile(_ webView: WKWebView, _ path: String) /*async*/ {
         openTrace(obj);
         """
         
-        // print(json)
-        
-        webView.evaluateJavaScript(json) { (result, error) in
-            if error != nil {
-                print("\(error)! \(result)")
-            }
-            else {
-                print("\(result)")
-            }
-        }
-
-        
+        return script
     } catch {
       print(error)
+        return nil
     }
 }
 
@@ -582,9 +587,13 @@ class AppDelegate: NSObject, NSApplicationDelegate {
         return true
     }
     
-    //func application(_ sender: NSApplication, open urls: [URL]) {
+    // May remove this, using openURL instead. User might drag mulitiple
+    // files onto app, so need to handle that case too...
+    // when double-clicking files in Finder, this is called
+    func application(_ application: NSApplication, open urls: [URL]) {
         // TODO: can use this insted of defining Document model
-    //}
+
+    }
 }
 
 @main
@@ -598,6 +607,21 @@ struct kram_profileApp: App {
     
     private var webView = initWebView()
     
+    func runJavascript(_ script: String) {
+        webView.evaluateJavaScript(script) { (result, error) in
+            if error != nil {
+                print("problem running script")
+            }
+        }
+    }
+            
+    func focusFindTextEdit() {
+        let script = """
+            window.editText.requestFocus();
+        """
+        runJavascript(script)
+    }
+            
     func isSupportedFilename(_ url: URL) -> Bool {
       
         // clang build files use genertic .json format
@@ -625,42 +649,49 @@ struct kram_profileApp: App {
         return false
     }
     
-    func listFilesFromURL(_ url: URL) -> [File]
+    func listFilesFromURLs(_ urls: [URL]) -> [File]
     {
-        print("selected \(url)")
+        //print("selected \(url)")
         
         // wipe them all
+        // TODO: have mode where the url get added
+        // instead of wiping the array out.  Note FileCache
+        // has cached duration data
         var files: [File] = []
-        
-        // now filter a list of all the files under the dir
-        if url.hasDirectoryPath {
-            // list out all matching files
-            // also these [.skipsHiddenFiles, .skipsSubdirectoryDescendants]
-            
-            // This doesn't default to recursive, see what kram does
-            let directoryEnumerator = FileManager.default.enumerator(
-                at: url,
-                includingPropertiesForKeys: nil
-                // options: [.skipsHiddenFiles]
-            )
-            
-            while let fileURL = directoryEnumerator?.nextObject() as? URL {
-                let isSupported = isSupportedFilename(fileURL)
-                if isSupported {
-                    files.append(lookupFile(url:fileURL));
+       
+        for url in urls {
+            // now filter a list of all the files under the dir
+            if url.hasDirectoryPath {
+                // list out all matching files
+                // also these [.skipsHiddenFiles, .skipsSubdirectoryDescendants]
+                
+                // This doesn't default to recursive, see what kram does
+                let directoryEnumerator = FileManager.default.enumerator(
+                    at: url,
+                    includingPropertiesForKeys: nil
+                    // options: [.skipsHiddenFiles]
+                )
+                
+                while let fileURL = directoryEnumerator?.nextObject() as? URL {
+                    let isSupported = isSupportedFilename(fileURL)
+                    if isSupported {
+                        files.append(lookupFile(url:fileURL));
+                    }
                 }
             }
-        }
-        else if url.isFileURL {
-            // TODO: list out zip archive
-        
-            let isSupported = isSupportedFilename(url)
-            if isSupported {
-                files.append(lookupFile(url:url))
+            else if url.isFileURL {
+                // TODO: list out zip archive
+                
+                let isSupported = isSupportedFilename(url)
+                if isSupported {
+                    files.append(lookupFile(url:url))
+                }
             }
         }
         
         // for some reason, their listed out in pretty random order
+        // TODO: add different sorts - id, name, size.  id is default
+        // which is the full url
         files.sort()
         
         print("found \(files.count) files")
@@ -668,6 +699,25 @@ struct kram_profileApp: App {
         return files
     }
     
+    func openFilesFromURLs(urls: [URL]) {
+        let urls = urls
+        if urls.count >= 1 {
+            let filesNew = listFilesFromURLs(urls)
+            
+            // for now wipe the old list
+            if filesNew.count > 0 {
+                files = filesNew
+                
+                // load first file in the list
+                if files[0].url.isFileURL { selection = files[0].id }
+            }
+        }
+        
+        // Not sure where to set this false, so do it here
+        // this is to avoid reloading the request
+        firstRequest = false
+    }
+    
     func shortFilename(_ str: String) -> String {
         let url = URL(string: str)!
         return url.lastPathComponent
@@ -678,9 +728,45 @@ struct kram_profileApp: App {
         NSWorkspace.shared.activateFileViewerSelecting([url]);
     }
     
+    func openFile() {
+        let panel = NSOpenPanel()
+        panel.allowsMultipleSelection = true
+        panel.canChooseDirectories = true
+        panel.canChooseFiles = true
+        panel.allowedContentTypes = fileTypes
+        
+        panel.begin { reponse in
+            if reponse == .OK {
+                openFilesFromURLs(urls: panel.urls)
+            }
+        }
+    }
+    
+    func aboutPanel() {
+        NSApplication.shared.orderFrontStandardAboutPanel(
+            options: [
+                NSApplication.AboutPanelOptionKey.credits: NSAttributedString(
+                    string:
+"""
+A tool to help profile mem, perf, and builds.
+© 2020-2024 Alec Miller
+""",
+                    
+                    attributes: [
+                        NSAttributedString.Key.font: NSFont.boldSystemFont(
+                            ofSize: NSFont.smallSystemFontSize)
+                    ]
+                ),
+                NSApplication.AboutPanelOptionKey(
+                    rawValue: "kram-profile"
+                ): "© 2020-2024 Alec Miller"
+            ]
+        )
+    }
+    
     // DONE: have files ending in .vmatrace, .trace, and .json
     // TODO: archives in the zip file.
-    var fileTypes: [UTType] = [
+    let fileTypes: [UTType] = [
         // .plainText, .zip
         .json, // clang build files
         UTType(filenameExtension:"trace", conformingTo:.data)!,
@@ -694,8 +780,8 @@ struct kram_profileApp: App {
             NavigationSplitView {
                 VStack {
                     List(files, selection:$selection) { file in
-                        // compare url to the previous dir
-                        // if it differs, then add divider
+                        // compare to previous file (use active sort comparator)
+                        // if it differs, then toggle the button bg colors
 //                        if lastUrl && url.path != lastUrl!.path {
 //                            files.append(Divider())
 //                        }
@@ -715,75 +801,42 @@ struct kram_profileApp: App {
                 MyWKWebView(webView:webView, request: URLRequest(url:URL(string: ORIGIN + "/?hideSidebar=true")!), selection:selection, firstRequest:firstRequest)
                 
             }
+            // TODO: show data, and selected file here
             .navigationTitle(selection != nil ? shortFilename(selection!) : "")
+            .onOpenURL { url in
+                // TODO: this isn't called, find out why?
+                openFilesFromURLs(urls: [url])
+            }
         }
         // https://nilcoalescing.com/blog/CustomiseAboutPanelOnMacOSInSwiftUI/
         .commands {
             CommandGroup(after: .newItem) {
                 Button("Open File") {
-                    let panel = NSOpenPanel()
-                    panel.allowsMultipleSelection = false
-                    panel.canChooseDirectories = true
-                    panel.canChooseFiles = true
-                    panel.allowedContentTypes = fileTypes
-                    
-                    panel.begin { reponse in
-                        if reponse == .OK {
-                            let urls = panel.urls
-                            if urls.count == 1 {
-                                let url = urls[0]
-                                let filesNew = listFilesFromURL(url)
-                        
-                                // for now wipe the old list
-                                if filesNew.count > 0 {
-                                    files = filesNew
-                                    
-                                    // if single file opened, then load it immediately
-                                    if files[0].url.isFileURL { selection = files[0].id }
-                                }
-                            }
-                            
-                            // Not sure where to set this false, so do it here
-                            // this is to avoid reloading the request
-                            firstRequest = false
-                        }
-                    }
+                    openFile()
                 }
                 .keyboardShortcut("O")
                 
-                Button("Goto File") {
+                Button("Go to File") {
                     if selection != nil {
                         openContainingFolder(selection!);
                     }
                 }
                 .keyboardShortcut("G")
+                
+                // TODO: make it easy to focus the editText in the Pefetto view
+//                Button("Find") {
+//                    if selection != nil {
+//                        focusFindTextEdit();
+//                    }
+//                }
+//                .keyboardShortcut("F")
             }
             CommandGroup(replacing: .appInfo) {
-                    Button("About kram-profile") {
-                        NSApplication.shared.orderFrontStandardAboutPanel(
-                            options: [
-                                NSApplication.AboutPanelOptionKey.credits: NSAttributedString(
-                                    string:
-"""
-A tool to help profile mem, perf, and builds.
-© 2020-2024 Alec Miller
-""",
-                                    
-                                    attributes: [
-                                        NSAttributedString.Key.font: NSFont.boldSystemFont(
-                                            ofSize: NSFont.smallSystemFontSize)
-                                    ]
-                                ),
-                                NSApplication.AboutPanelOptionKey(
-                                    rawValue: "kram-profile"
-                                ): "© 2020-2024 Alec Miller"
-                            ]
-                        )
-                    }
+                Button("About kram-profile") {
+                    aboutPanel()
                 }
             }
-
-        
+        }
     }
     
     // https://stackoverflow.com/questions/49882933/pass-jsonobject-from-swift4-to-wkwebview-javascript-function

From 2babbb9d18e984cc1834adbfe59de6a6a13e7b05 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 24 Feb 2024 21:11:48 -0800
Subject: [PATCH 593/901] kram -

---
 kram-profile/kram-profile/Info.plist          |   6 +-
 .../kram-profile/kram_profileApp.swift        | 180 ++++++++++++++----
 2 files changed, 151 insertions(+), 35 deletions(-)

diff --git a/kram-profile/kram-profile/Info.plist b/kram-profile/kram-profile/Info.plist
index 2dc0c062..1038aef1 100644
--- a/kram-profile/kram-profile/Info.plist
+++ b/kram-profile/kram-profile/Info.plist
@@ -16,7 +16,7 @@
 				<string>vmatrace</string>
 			</array>
 			<key>NSDocumentClass</key>
-			<string>NSDocument</string>
+			<string>KramDocument</string>
 		</dict>
 		<dict>
 			<key>CFBundleTypeName</key>
@@ -30,7 +30,7 @@
 				<string>trace</string>
 			</array>
 			<key>NSDocumentClass</key>
-			<string>NSDocument</string>
+			<string>KramDocument</string>
 		</dict>
 		<dict>
 			<key>CFBundleTypeName</key>
@@ -44,7 +44,7 @@
 				<string>public.json</string>
 			</array>
 			<key>NSDocumentClass</key>
-			<string>NSDocument</string>
+			<string>KramDocument</string>
 		</dict>
 	</array>
 	<key>UTImportedTypeDeclarations</key>
diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 02d76dde..f0a72481 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -522,10 +522,17 @@ func loadFileJS(_ path: String) -> String? {
         // Fix race between page load, and loading the file.  Although
         // page is only loaded once.
         
+        // What if last listener isn't complete, or is doing the postMessage
+        // JS is all running on one thread though?
+        
         // https://jsfiddle.net/vrsofx1p/
         function openTrace(obj)
         {
-            const timer = setInterval(() => window.postMessage('PING', '\(ORIGIN)'), 50);
+            // have already opened and loaded the inwdow
+            const win = window; // .open('\(ORIGIN)');
+            if (!win) { return; }
+        
+            const timer = setInterval(() => win.postMessage('PING', '\(ORIGIN)'), 50);
             
             const onMessageHandler = (evt) => {
                 if (evt.data !== 'PONG') return;
@@ -534,7 +541,8 @@ func loadFileJS(_ path: String) -> String? {
                 window.clearInterval(timer);
                 window.removeEventListener('message', onMessageHandler);
                 
-                window.postMessage(obj,'\(ORIGIN)');
+                // was win, but use window instead
+                win.postMessage(obj,'\(ORIGIN)');
             }
         
             window.addEventListener('message', onMessageHandler);
@@ -572,14 +580,7 @@ func initWebView() -> WKWebView {
     return webView
 }
 
-/*
-// Install this just for appFromURL, need to be associate this app with traces
-// Instead of these see AppDelegate below.
-struct KramDocument: NSDocument {
-    
-    func open(fromURL:)
-}
-*/
+
 
 class AppDelegate: NSObject, NSApplicationDelegate {
     // don't rename params in this class. These are the function signature
@@ -590,18 +591,31 @@ class AppDelegate: NSObject, NSApplicationDelegate {
     // May remove this, using openURL instead. User might drag mulitiple
     // files onto app, so need to handle that case too...
     // when double-clicking files in Finder, this is called
-    func application(_ application: NSApplication, open urls: [URL]) {
-        // TODO: can use this insted of defining Document model
-
-    }
+//    func application(_ application: NSApplication, open urls: [URL]) {
+//        // TODO: can use this insted of defining Document model
+//
+//    }
 }
 
+// Install this just for appFromURL, need to be associate this app with traces
+// Instead of these see AppDelegate below.
+//struct KramDocument: NSDocument {
+//    // when trying to open a file, getting an error that this needed
+//    // readFromData:ofType:error: is a subclass responsibility but has not been overridden.
+//    func read(from data: Data, ofType typeName: String) throws {
+//        // TODO:, but this receives DATA instead of URL?
+//        
+//        return true // if read was success
+//    }
+//}
+
 @main
-struct kram_profileApp: App {
+struct kram_profileApp: App /* DropDelegate */ {
     @State private var files: [File] = []
     @State private var selection: String?
     @State private var firstRequest = true
-   
+    //@State private var dragOver = false
+
     // close app when last windowi s
     @NSApplicationDelegateAdaptor private var appDelegate: AppDelegate
     
@@ -621,7 +635,39 @@ struct kram_profileApp: App {
         """
         runJavascript(script)
     }
-            
+           
+    
+    // Here
+    // https://stackoverflow.com/questions/68583357/stumped-on-drag-and-drop-in-swiftui
+    // This doesn't seem to work at all?
+    /*
+    func performDrop(info: DropInfo) -> Bool {
+        
+        if !info.hasItemsConforming(to: droppedFileURLTypes) {
+           return false
+        }
+
+        var urls: [URL] = []
+
+        // This is kind of an annoying api
+        let providers = info.itemProviders(for: droppedFileURLTypes)
+        for provider in providers {
+            for droppedFileURLType in droppedFileURLTypes {
+                provider.loadItem(forTypeIdentifier: droppedFileURLType.identifier, options: nil) { (data, error) in
+                    if let url = data as? String {
+                        // can't filter here, since might be directory
+                        urls.append(URL(string: url)!)
+                    }
+                }
+            }
+        }
+        
+        openFilesFromURLs(urls: urls, mergeFiles: false)
+        return true
+        //return false
+    }
+    */
+    
     func isSupportedFilename(_ url: URL) -> Bool {
       
         // clang build files use genertic .json format
@@ -688,28 +734,41 @@ struct kram_profileApp: App {
                 }
             }
         }
-        
-        // for some reason, their listed out in pretty random order
-        // TODO: add different sorts - id, name, size.  id is default
-        // which is the full url
-        files.sort()
-        
-        print("found \(files.count) files")
-        
+    
         return files
     }
     
-    func openFilesFromURLs(urls: [URL]) {
-        let urls = urls
+    let durationFont = Font
+            .system(size: 12) // not sure what default font size is
+            .monospaced()
+    
+    func openFilesFromURLs(urls: [URL], mergeFiles : Bool = false) {
         if urls.count >= 1 {
             let filesNew = listFilesFromURLs(urls)
             
             // for now wipe the old list
             if filesNew.count > 0 {
-                files = filesNew
+                if mergeFiles {
+                    // TODO: how does this pick which one to keep
+                    files = Array(Set(files + filesNew))
+                }
+                else {
+                    // reset the list
+                    files = filesNew
+                }
+                
+                // for some reason, their listed out in pretty random order
+                // TODO: add different sorts - id, name, size.  id is default
+                // which is the full url
+                files.sort()
+                
+                print("found \(files.count) files")
                 
                 // load first file in the list
-                if files[0].url.isFileURL { selection = files[0].id }
+                // TODO: preserve the original selection if still present
+                if !mergeFiles || selection == nil {
+                    if files[0].url.isFileURL { selection = files[0].id }
+                }
             }
         }
         
@@ -770,10 +829,15 @@ A tool to help profile mem, perf, and builds.
         // .plainText, .zip
         .json, // clang build files
         UTType(filenameExtension:"trace", conformingTo:.data)!,
-        UTType(filenameExtension:"vmatrace", conformingTo:.data)!
+        UTType(filenameExtension:"vmatrace", conformingTo:.data)!,
     ]
     
     // TODO: have
+    let droppedFileURLTypes = [
+        UTType.fileURL, // "public.file-url"
+        // This isn't a url
+        // UTType.folder,  // public.folder
+    ]
     
     var body: some Scene {
         WindowGroup {
@@ -787,10 +851,14 @@ A tool to help profile mem, perf, and builds.
 //                        }
                         
                         HStack() {
+                            Text(generateDuration(file: file))
+                                .frame(maxWidth: 70)
+                                .font(durationFont)
+                            // name gets truncated too soo if it's first
+                            // and try to align the text with trailing
                             Text(generateName(file: file))
                                 .help(file.shortDirectory)
-                            Text(generateDuration(file: file))
-                                .frame(maxWidth: .infinity, alignment: .trailing)
+                                .truncationMode(.tail)
                         }
                     }
                 }
@@ -805,8 +873,56 @@ A tool to help profile mem, perf, and builds.
             .navigationTitle(selection != nil ? shortFilename(selection!) : "")
             .onOpenURL { url in
                 // TODO: this isn't called, find out why?
+                print(url)
+                
                 openFilesFromURLs(urls: [url])
             }
+            .dropDestination(for: URL.self) { (items, _) in
+                // This acutally works!
+                openFilesFromURLs(urls: items, mergeFiles: false)
+                                return true
+            }
+            /*
+             //.onDrop(of:droppedFileURLTypes, delegate:self)
+             // handle drop of files onto app
+             // actually want the url here, so may want to change fileTypes to URL
+             .onDrop(of:droppedFileURLTypes, isTargeted: $dragOver) { providers in
+                // https://stackoverflow.com/questions/60831260/swiftui-drag-and-drop-files
+                var urls: [URL] = []
+                    
+                // TODO: may need to switch to url or file+dirURL
+                    for urlProvider in providers {
+                        for droppedFileURLType in droppedFileURLTypes {
+                            //urlProvider.loadItem(forTypeIdentifier: droppedFileURLType.identifier, options: nil) { (data, error) in
+                            
+                            // This is async
+                           urlProvider.loadDataRepresentation(forTypeIdentifier:
+                                                        droppedFileURLType.identifier, completionHandler: { (data, error) in
+                            if error == nil {
+                
+                                let urlString = String(decoding: data!, as: UTF8.self)
+                                let url = URL(string: urlString)!
+                                
+                                // can't filter here, since might be directory
+                                urls.append(url)
+                            }
+                            else {
+                                // getting case of 7 drops, but only get 6 or less
+                                print(error)
+                            })
+                        }
+                    }
+                }
+               
+            
+                // sometimes gets called with 1-5 files on a drop of 7
+                // This seems to be a Swift bug.  The urls have 7 here, and in the call have 6.
+                openFilesFromURLs(urls: urls, mergeFiles: false)
+                return true
+            }
+             
+            .border(dragOver ? Color.green : Color.clear)
+             */
         }
         // https://nilcoalescing.com/blog/CustomiseAboutPanelOnMacOSInSwiftUI/
         .commands {

From 3bc42e4cd25a550b056a7fd0d533db5ad434d41f Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 24 Feb 2024 22:48:07 -0800
Subject: [PATCH 594/901] kram-profile - fix selection change, openURL, times

---
 .../kram-profile/kram_profileApp.swift        | 213 +++++-------------
 1 file changed, 51 insertions(+), 162 deletions(-)

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index f0a72481..984e40ec 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -14,9 +14,10 @@ import UniformTypeIdentifiers
 // This is really just a wrapper to turn WKWebView into something SwiftUI
 // can interop with.  SwiftUI has not browser widget.
 
-// TODO: add divider in some sort modes
+// TODO: add bg list color depending on sort
 // TODO: add sort mode for name, time and incorporating dir or not
-// TODO: fix the js wait on the page to load before doing loadFile
+// TODO: fix the js wait, even with listener, there's still a race
+//    maybe there's some ServiceWorker still loading the previous json?
 
 func fileModificationDate(url: URL) -> Date? {
     do {
@@ -115,7 +116,6 @@ struct MyWKWebView : NSViewRepresentable {
     // This is set by caller to the url for the request
     let webView: WKWebView
     let request: URLRequest
-    let selection: String?
     let firstRequest: Bool
     
     func makeNSView(context: Context) -> WKWebView {
@@ -130,49 +130,11 @@ struct MyWKWebView : NSViewRepresentable {
        }
     }
     
-    // two copies of this function
-    func runJavascript(_ webView: WKWebView, _ script: String) {
-        webView.evaluateJavaScript(script) { (result, error) in
-            if error != nil {
-                print("problem running script")
-            }
-        }
-    }
-    
     func updateNSView(_ webView: WKWebView, context: Context) {
-        // DONE: skip reloading the request, it doesn't change?
-        // TODO: add a reload button for the current selection, since Perfetto can't reload
-        
         // The first selection loads, but then subsequent do not
         if firstRequest {
             webView.load(request)
         }
-        
-        // really need to ping until ui is loaded
-        //sleep(2)
-        
-        if let sel = selection {
-            var str = loadFileJS(sel)
-            if str != nil {
-                runJavascript(webView, str!)
-            }
-            // now based on the type, set a reasonable range of time
-            // don't really want start/end, since we don't know start
-            // works for Flutter, but not for this app.  Also would
-            // have to parse timeStart/End from file.  May want that for
-            // sorting anyways.
-            //
-            // Note have duration on some files now, so could pull that
-            // or adjust the timing range across all known durations
-            
-            if false {
-                str = showTimeRangeJS(filenameToTimeRange(sel))
-                if str != nil {
-                    runJavascript(webView, str!)
-                }
-            }
-            
-        }
     }
 }
 
@@ -305,7 +267,6 @@ func showTimeRangeJS(_ timeRange: TimeRange) -> String? {
             window.postMessage(obj,'\(ORIGIN)');
         """
         
-        //runJavascript(script)
         return script
     }
     catch {
@@ -580,43 +541,22 @@ func initWebView() -> WKWebView {
     return webView
 }
 
-
-
 class AppDelegate: NSObject, NSApplicationDelegate {
     // don't rename params in this class. These are the function signature
     func applicationShouldTerminateAfterLastWindowClosed(_ application: NSApplication) -> Bool {
         return true
     }
-    
-    // May remove this, using openURL instead. User might drag mulitiple
-    // files onto app, so need to handle that case too...
-    // when double-clicking files in Finder, this is called
-//    func application(_ application: NSApplication, open urls: [URL]) {
-//        // TODO: can use this insted of defining Document model
-//
-//    }
 }
 
-// Install this just for appFromURL, need to be associate this app with traces
-// Instead of these see AppDelegate below.
-//struct KramDocument: NSDocument {
-//    // when trying to open a file, getting an error that this needed
-//    // readFromData:ofType:error: is a subclass responsibility but has not been overridden.
-//    func read(from data: Data, ofType typeName: String) throws {
-//        // TODO:, but this receives DATA instead of URL?
-//        
-//        return true // if read was success
-//    }
-//}
-
 @main
-struct kram_profileApp: App /* DropDelegate */ {
+struct kram_profileApp: App {
     @State private var files: [File] = []
     @State private var selection: String?
+    
+    // only load Perfetto page once
     @State private var firstRequest = true
-    //@State private var dragOver = false
-
-    // close app when last windowi s
+    
+    // close app when last window is
     @NSApplicationDelegateAdaptor private var appDelegate: AppDelegate
     
     private var webView = initWebView()
@@ -635,38 +575,6 @@ struct kram_profileApp: App /* DropDelegate */ {
         """
         runJavascript(script)
     }
-           
-    
-    // Here
-    // https://stackoverflow.com/questions/68583357/stumped-on-drag-and-drop-in-swiftui
-    // This doesn't seem to work at all?
-    /*
-    func performDrop(info: DropInfo) -> Bool {
-        
-        if !info.hasItemsConforming(to: droppedFileURLTypes) {
-           return false
-        }
-
-        var urls: [URL] = []
-
-        // This is kind of an annoying api
-        let providers = info.itemProviders(for: droppedFileURLTypes)
-        for provider in providers {
-            for droppedFileURLType in droppedFileURLTypes {
-                provider.loadItem(forTypeIdentifier: droppedFileURLType.identifier, options: nil) { (data, error) in
-                    if let url = data as? String {
-                        // can't filter here, since might be directory
-                        urls.append(URL(string: url)!)
-                    }
-                }
-            }
-        }
-        
-        openFilesFromURLs(urls: urls, mergeFiles: false)
-        return true
-        //return false
-    }
-    */
     
     func isSupportedFilename(_ url: URL) -> Bool {
       
@@ -739,8 +647,8 @@ struct kram_profileApp: App /* DropDelegate */ {
     }
     
     let durationFont = Font
-            .system(size: 12) // not sure what default font size is
-            .monospaced()
+        .system(size: 12) // not sure what default font size is
+        .monospaced()
     
     func openFilesFromURLs(urls: [URL], mergeFiles : Bool = false) {
         if urls.count >= 1 {
@@ -767,14 +675,15 @@ struct kram_profileApp: App /* DropDelegate */ {
                 // load first file in the list
                 // TODO: preserve the original selection if still present
                 if !mergeFiles || selection == nil {
-                    if files[0].url.isFileURL { selection = files[0].id }
+                    selection = files[0].id
                 }
+                
+                // Not sure where to set this false, so do it here
+                // this is to avoid reloading the request.  This is to stop
+                // WebView from reloading page.
+                firstRequest = false
             }
         }
-        
-        // Not sure where to set this false, so do it here
-        // this is to avoid reloading the request
-        firstRequest = false
     }
     
     func shortFilename(_ str: String) -> String {
@@ -801,6 +710,35 @@ struct kram_profileApp: App /* DropDelegate */ {
         }
     }
     
+    func openFileSelection() {
+        if let sel = selection {
+            
+            // This should only reload if selection previously loaded
+            // to a valid file, or if modstamp changed on current selection
+            
+            var str = loadFileJS(sel)
+            if str != nil {
+                runJavascript(str!)
+            }
+            
+            // now based on the type, set a reasonable range of time
+            // don't really want start/end, since we don't know start
+            // works for Flutter, but not for this app.  Also would
+            // have to parse timeStart/End from file.  May want that for
+            // sorting anyways.
+            //
+            // Note have duration on some files now, so could pull that
+            // or adjust the timing range across all known durations
+            
+            if false {
+                str = showTimeRangeJS(filenameToTimeRange(sel))
+                if str != nil {
+                    runJavascript(str!)
+                }
+            }
+        }
+    }
+    
     func aboutPanel() {
         NSApplication.shared.orderFrontStandardAboutPanel(
             options: [
@@ -831,14 +769,7 @@ A tool to help profile mem, perf, and builds.
         UTType(filenameExtension:"trace", conformingTo:.data)!,
         UTType(filenameExtension:"vmatrace", conformingTo:.data)!,
     ]
-    
-    // TODO: have
-    let droppedFileURLTypes = [
-        UTType.fileURL, // "public.file-url"
-        // This isn't a url
-        // UTType.folder,  // public.folder
-    ]
-    
+       
     var body: some Scene {
         WindowGroup {
             NavigationSplitView {
@@ -852,7 +783,7 @@ A tool to help profile mem, perf, and builds.
                         
                         HStack() {
                             Text(generateDuration(file: file))
-                                .frame(maxWidth: 70)
+                                .frame(maxWidth: 70) // setting any lower than 70 makes it disappear
                                 .font(durationFont)
                             // name gets truncated too soo if it's first
                             // and try to align the text with trailing
@@ -866,15 +797,14 @@ A tool to help profile mem, perf, and builds.
             detail: {
                 // About hideSidebar
                 // https://github.com/google/perfetto/issues/716
-                MyWKWebView(webView:webView, request: URLRequest(url:URL(string: ORIGIN + "/?hideSidebar=true")!), selection:selection, firstRequest:firstRequest)
-                
+                MyWKWebView(webView:webView, request: URLRequest(url:URL(string: ORIGIN + "/?hideSidebar=true")!), firstRequest:firstRequest)
+            }
+            .onChange(of: selection) { newState in
+                openFileSelection()
             }
             // TODO: show data, and selected file here
             .navigationTitle(selection != nil ? shortFilename(selection!) : "")
             .onOpenURL { url in
-                // TODO: this isn't called, find out why?
-                print(url)
-                
                 openFilesFromURLs(urls: [url])
             }
             .dropDestination(for: URL.self) { (items, _) in
@@ -882,47 +812,6 @@ A tool to help profile mem, perf, and builds.
                 openFilesFromURLs(urls: items, mergeFiles: false)
                                 return true
             }
-            /*
-             //.onDrop(of:droppedFileURLTypes, delegate:self)
-             // handle drop of files onto app
-             // actually want the url here, so may want to change fileTypes to URL
-             .onDrop(of:droppedFileURLTypes, isTargeted: $dragOver) { providers in
-                // https://stackoverflow.com/questions/60831260/swiftui-drag-and-drop-files
-                var urls: [URL] = []
-                    
-                // TODO: may need to switch to url or file+dirURL
-                    for urlProvider in providers {
-                        for droppedFileURLType in droppedFileURLTypes {
-                            //urlProvider.loadItem(forTypeIdentifier: droppedFileURLType.identifier, options: nil) { (data, error) in
-                            
-                            // This is async
-                           urlProvider.loadDataRepresentation(forTypeIdentifier:
-                                                        droppedFileURLType.identifier, completionHandler: { (data, error) in
-                            if error == nil {
-                
-                                let urlString = String(decoding: data!, as: UTF8.self)
-                                let url = URL(string: urlString)!
-                                
-                                // can't filter here, since might be directory
-                                urls.append(url)
-                            }
-                            else {
-                                // getting case of 7 drops, but only get 6 or less
-                                print(error)
-                            })
-                        }
-                    }
-                }
-               
-            
-                // sometimes gets called with 1-5 files on a drop of 7
-                // This seems to be a Swift bug.  The urls have 7 here, and in the call have 6.
-                openFilesFromURLs(urls: urls, mergeFiles: false)
-                return true
-            }
-             
-            .border(dragOver ? Color.green : Color.clear)
-             */
         }
         // https://nilcoalescing.com/blog/CustomiseAboutPanelOnMacOSInSwiftUI/
         .commands {

From c416c1b6f910a817a1ae37d6cfa1d975ed8b28d3 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 24 Feb 2024 23:04:00 -0800
Subject: [PATCH 595/901] kram-profile - exclude more Xcode specific json

---
 kram-profile/kram-profile/kram_profileApp.swift | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 984e40ec..76babf4d 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -586,7 +586,10 @@ struct kram_profileApp: App {
             if filename == "build-description.json" ||
                 filename == "build-request.json" ||
                 filename == "manifest.json" ||
-                filename.hasSuffix("diagnostic-filename-map.json")
+                filename.hasSuffix("diagnostic-filename-map.json") ||
+                filename.hasSuffix(".abi.json") ||
+                filename.hasSuffix("-OutputFileMap.json") ||
+                filename.hasSuffix("_const_extract_protocols.json")
             {
                 return false
             }

From b97bf57297b486a191803479fd1f8d9fd33f1616 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 24 Feb 2024 23:56:24 -0800
Subject: [PATCH 596/901] kram-profile - move to single window

WIndowGroup remembers windows, but I don't get how to make a web view per detail.  And openURL launched a new window each time, instead of replacing the content of the window that one was on.   So disable WindowGroup for now.
---
 .../kram-profile/kram_profileApp.swift        | 101 +++++++++---------
 1 file changed, 53 insertions(+), 48 deletions(-)

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 76babf4d..a3eb03bb 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -111,30 +111,53 @@ func lookupFile(url: URL) -> File {
 func updateFileCache(file: File) {
     fileCache[file.url] = file
 }
-        
+  
+
+func newWebView(request: URLRequest) -> WKWebView {
+    // set preference to run javascript on the view, can then do PostMessage
+    let preferences = WKPreferences()
+    //preferences.javaScriptEnabled = true
+    //preferences.allowGPUOptimizedContents = true
+    
+    let webpagePreferences = WKWebpagePreferences()
+    webpagePreferences.allowsContentJavaScript = true
+    
+    let configuration = WKWebViewConfiguration()
+    configuration.preferences = preferences
+    configuration.defaultWebpagePreferences = webpagePreferences
+    
+    // here frame is entire screen
+    let webView = WKWebView(frame: .zero, configuration: configuration)
+    
+    // The page is complaining it's going to lose the data if fwd/back hit
+    webView.allowsBackForwardNavigationGestures = false
+   
+    webView.load(request)
+    return webView
+}
+
+// This is just a glue wrapper to allow WkWebView to interop with SwiftUI
 struct MyWKWebView : NSViewRepresentable {
-    // This is set by caller to the url for the request
+    //let request: URLRequest
     let webView: WKWebView
-    let request: URLRequest
-    let firstRequest: Bool
     
+    // This is set by caller to the url for the request
     func makeNSView(context: Context) -> WKWebView {
         // TODO: hook this up, but need WKScriptMessageHandler protocl
-        //configuration.userContentController.addScriptMessageHandler(..., name: "postMessageListener")
+        //configuration.userContentController.addScriptMessageHandler(..., name: "postMessageListener"
         return webView
     }
     
+    // can get data back from web view
     func userContentController(_ userContentController: WKUserContentController, didReceive message: WKScriptMessage) {
        if message.name == "postMessageListener" {
            // Manage your Data
        }
     }
     
+    // This is called to refresh the view
     func updateNSView(_ webView: WKWebView, context: Context) {
-        // The first selection loads, but then subsequent do not
-        if firstRequest {
-            webView.load(request)
-        }
+        
     }
 }
 
@@ -519,27 +542,7 @@ func loadFileJS(_ path: String) -> String? {
     }
 }
 
-func initWebView() -> WKWebView {
-    // set preference to run javascript on the view, can then do PostMessage
-    let preferences = WKPreferences()
-    //preferences.javaScriptEnabled = true
-    //preferences.allowGPUOptimizedContents = true
-    
-    let webpagePreferences = WKWebpagePreferences()
-    webpagePreferences.allowsContentJavaScript = true
-    
-    let configuration = WKWebViewConfiguration()
-    configuration.preferences = preferences
-    configuration.defaultWebpagePreferences = webpagePreferences
-    
-    // here frame is entire screen
-    let webView = WKWebView(frame: .zero, configuration: configuration)
-    
-    // The page is complaining it's going to lose the data if fwd/back hit
-    webView.allowsBackForwardNavigationGestures = false
-    
-    return webView
-}
+
 
 class AppDelegate: NSObject, NSApplicationDelegate {
     // don't rename params in this class. These are the function signature
@@ -553,15 +556,10 @@ struct kram_profileApp: App {
     @State private var files: [File] = []
     @State private var selection: String?
     
-    // only load Perfetto page once
-    @State private var firstRequest = true
-    
     // close app when last window is
     @NSApplicationDelegateAdaptor private var appDelegate: AppDelegate
     
-    private var webView = initWebView()
-    
-    func runJavascript(_ script: String) {
+    func runJavascript(_ webView: WKWebView, _ script: String) {
         webView.evaluateJavaScript(script) { (result, error) in
             if error != nil {
                 print("problem running script")
@@ -569,11 +567,11 @@ struct kram_profileApp: App {
         }
     }
             
-    func focusFindTextEdit() {
+    func focusFindTextEdit(_ webView: WKWebView) {
         let script = """
             window.editText.requestFocus();
         """
-        runJavascript(script)
+        runJavascript(webView, script)
     }
     
     func isSupportedFilename(_ url: URL) -> Bool {
@@ -684,7 +682,7 @@ struct kram_profileApp: App {
                 // Not sure where to set this false, so do it here
                 // this is to avoid reloading the request.  This is to stop
                 // WebView from reloading page.
-                firstRequest = false
+                //firstRequest = false
             }
         }
     }
@@ -713,7 +711,7 @@ struct kram_profileApp: App {
         }
     }
     
-    func openFileSelection() {
+    func openFileSelection(_ webView: WKWebView) {
         if let sel = selection {
             
             // This should only reload if selection previously loaded
@@ -721,7 +719,7 @@ struct kram_profileApp: App {
             
             var str = loadFileJS(sel)
             if str != nil {
-                runJavascript(str!)
+                runJavascript(webView, str!)
             }
             
             // now based on the type, set a reasonable range of time
@@ -736,7 +734,7 @@ struct kram_profileApp: App {
             if false {
                 str = showTimeRangeJS(filenameToTimeRange(sel))
                 if str != nil {
-                    runJavascript(str!)
+                    runJavascript(webView, str!)
                 }
             }
         }
@@ -773,8 +771,16 @@ A tool to help profile mem, perf, and builds.
         UTType(filenameExtension:"vmatrace", conformingTo:.data)!,
     ]
        
+    // about hideSideBar
+    // https://github.com/google/perfetto/issues/716
+    @State var myWebView = newWebView(request: URLRequest(url:URL(string: ORIGIN + "/?hideSidebar=true")!))
+    
     var body: some Scene {
-        WindowGroup {
+        
+        // WindowGroup brings up old windows which isn't really what I want
+        
+        Window("Main", id: "main") {
+        //WindowGroup {
             NavigationSplitView {
                 VStack {
                     List(files, selection:$selection) { file in
@@ -798,12 +804,11 @@ A tool to help profile mem, perf, and builds.
                 }
             }
             detail: {
-                // About hideSidebar
-                // https://github.com/google/perfetto/issues/716
-                MyWKWebView(webView:webView, request: URLRequest(url:URL(string: ORIGIN + "/?hideSidebar=true")!), firstRequest:firstRequest)
+                MyWKWebView(webView: myWebView)
             }
             .onChange(of: selection) { newState in
-                openFileSelection()
+                // TODO: need the webView for this
+                openFileSelection(myWebView)
             }
             // TODO: show data, and selected file here
             .navigationTitle(selection != nil ? shortFilename(selection!) : "")
@@ -834,7 +839,7 @@ A tool to help profile mem, perf, and builds.
                 // TODO: make it easy to focus the editText in the Pefetto view
 //                Button("Find") {
 //                    if selection != nil {
-//                        focusFindTextEdit();
+//                        focusFindTextEdit(webView);
 //                    }
 //                }
 //                .keyboardShortcut("F")

From d7be4377636be399b4b9053ce166d507f710edf1 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 25 Feb 2024 00:15:56 -0800
Subject: [PATCH 597/901] kram-profile - add timing to nav title, menu and nav
 title are set before duration computed

---
 .../kram-profile/kram_profileApp.swift        | 21 ++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index a3eb03bb..b8a019b5 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -86,6 +86,15 @@ func generateDuration(file: File) -> String {
     }
 }
 
+func generateNavigationTitle(_ str: String?) -> String {
+    if str == nil {
+        return ""
+    }
+    
+    let f = lookupFile(url: URL(string:str!)!)
+    return generateDuration(file: f) + " " + generateName(file: f)
+}
+
 // Note: if a file is deleted which happens often with builds,
 // then want to identify that and update the list.  At least
 // indicate the item is gone, and await its return.
@@ -773,6 +782,7 @@ A tool to help profile mem, perf, and builds.
        
     // about hideSideBar
     // https://github.com/google/perfetto/issues/716
+    // Allocating here only works for a single Window, not for WindowGroup
     @State var myWebView = newWebView(request: URLRequest(url:URL(string: ORIGIN + "/?hideSidebar=true")!))
     
     var body: some Scene {
@@ -792,7 +802,8 @@ A tool to help profile mem, perf, and builds.
                         
                         HStack() {
                             Text(generateDuration(file: file))
-                                .frame(maxWidth: 70) // setting any lower than 70 makes it disappear
+                                .frame(maxWidth: 70)
+                                //.alignment(.trailing)
                                 .font(durationFont)
                             // name gets truncated too soo if it's first
                             // and try to align the text with trailing
@@ -810,15 +821,15 @@ A tool to help profile mem, perf, and builds.
                 // TODO: need the webView for this
                 openFileSelection(myWebView)
             }
-            // TODO: show data, and selected file here
-            .navigationTitle(selection != nil ? shortFilename(selection!) : "")
+            // TODO: show duratoin, and selected file here
+            .navigationTitle(generateNavigationTitle(selection))
             .onOpenURL { url in
                 openFilesFromURLs(urls: [url])
             }
             .dropDestination(for: URL.self) { (items, _) in
                 // This acutally works!
                 openFilesFromURLs(urls: items, mergeFiles: false)
-                                return true
+                return true
             }
         }
         // https://nilcoalescing.com/blog/CustomiseAboutPanelOnMacOSInSwiftUI/
@@ -839,7 +850,7 @@ A tool to help profile mem, perf, and builds.
                 // TODO: make it easy to focus the editText in the Pefetto view
 //                Button("Find") {
 //                    if selection != nil {
-//                        focusFindTextEdit(webView);
+//                        focusFindTextEdit(myWebView);
 //                    }
 //                }
 //                .keyboardShortcut("F")

From fd230641ba71a4e1a1d83cf3bd888bfe505d7422 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 25 Feb 2024 11:44:42 -0800
Subject: [PATCH 598/901] kram-profile - add fullscreen, cleanup duration and
 selection

---
 .../kram-profile/kram_profileApp.swift        | 169 ++++++++++--------
 1 file changed, 90 insertions(+), 79 deletions(-)

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index b8a019b5..935d67c9 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -18,6 +18,16 @@ import UniformTypeIdentifiers
 // TODO: add sort mode for name, time and incorporating dir or not
 // TODO: fix the js wait, even with listener, there's still a race
 //    maybe there's some ServiceWorker still loading the previous json?
+// TODO: still getting race condition.  Perfetto is trying to
+//  load the previous file, and we’re sending a new one.
+// TODO: update recent document list
+// TODO: nav title and list item text is set before duration is computed
+//  need some way to update that.
+// TODO: support WindowGroup and multiwindow, each needs own webView, problem
+//   is that onOpenURL opens a new window always.
+// DONE: fn+F doesn't honor fullscreen
+// TODO: be nice to focus the search input on cmd+F just to make me happy.  
+//  Browser goes to its own search which doesn’t help.
 
 func fileModificationDate(url: URL) -> Date? {
     do {
@@ -111,6 +121,8 @@ func lookupFile(url: URL) -> File {
             return fileOld
         }
     }
+    
+    // This wipes the duration, so it can be recomputed
     fileCache[file.url] = file
     
     return file
@@ -145,7 +157,8 @@ func newWebView(request: URLRequest) -> WKWebView {
     return webView
 }
 
-// This is just a glue wrapper to allow WkWebView to interop with SwiftUI
+// This is just an adaptor to allow WkWebView to interop with SwiftUI.
+// It's unclear if WindowGroup can even have this hold state.
 struct MyWKWebView : NSViewRepresentable {
     //let request: URLRequest
     let webView: WKWebView
@@ -316,6 +329,9 @@ struct CatapultEvent: Codable {
     var dur: Int?
     var name: String?
     var args: [String : AnyCodable]?
+    
+    // var tts: Int?
+    // Also can have stack frames
 }
 
 struct CatapultProfile: Codable {
@@ -355,6 +371,31 @@ func loadFileJS(_ path: String) -> String? {
         var perfetto: PerfettoFile
     }
     
+    func updateDuration(_ catapultProfile: CatapultProfile, _ file: inout File) {
+        // TODO: need to honor the unit scale (pxSec)
+        var startTime = Int.max
+        var endTime = Int.min
+        
+        for i in 0..<catapultProfile.traceEvents!.count {
+            let event = catapultProfile.traceEvents![i]
+            
+            if event.ts != nil && event.dur != nil {
+                let s = event.ts!
+                let d = event.dur!
+                
+                startTime = min(startTime, s)
+                endTime = max(endTime, s+d)
+            }
+        }
+        
+        if startTime <= endTime {
+            // TODO: for now assume micros
+            file.duration = Double(endTime - startTime) * 1e-6
+            
+            updateFileCache(file: file)
+        }
+    }
+    
     do {
         // use this for binary data, but need to fixup some json before it's sent
         // TODO: work on sending a more efficient form.  Could use Perfetto SDK to write to prototbuf.  The Catapult json format is overly verbose.  Need some thread and scope strings, some open/close timings that reference a scope string and thread.
@@ -371,36 +412,16 @@ func loadFileJS(_ path: String) -> String? {
             // markers.  Multiple frames in a file, then show max frame duration
             // instead of the entire file.
             
-            // walk the file and compute the duration if we don't already have ti
+            // walk the file and compute the duration if we don't already have it
             if file.duration == nil {
                 let decoder = JSONDecoder()
-                
-                var catapultProfile = try decoder.decode(CatapultProfile.self, from: fileContent)
+                let catapultProfile = try decoder.decode(CatapultProfile.self, from: fileContent)
                 
                 if catapultProfile.traceEvents != nil {
-                    // TODO: need to honor the unit scale
-                    var startTime = Double.infinity
-                    var endTime = -Double.infinity
-                    
-                    for i in 0..<catapultProfile.traceEvents!.count {
-                        let event = catapultProfile.traceEvents![i]
-                        
-                        if event.ts != nil && event.dur != nil {
-                            let s = Double(event.ts!)
-                            let d = Double(event.dur!)
-                            
-                            startTime = min(startTime, s)
-                            endTime = max(endTime, s+d)
-                        }
-                    }
-                    
-                    if startTime <= endTime {
-                        // TODO: for now assume ms
-                        file.duration = (endTime - startTime) * 1e-6
-                        
-                        updateFileCache(file: file)
-                    }
+                    return nil
                 }
+                
+                updateDuration(catapultProfile, &file)
             }
         }
         else {
@@ -408,7 +429,7 @@ func loadFileJS(_ path: String) -> String? {
             // That's just for the parse phase, probably need for optimization
             // phase too.  The optimized function names need demangled - ugh.
             
-            // Clang has come build data as durations on fake threads
+            // Clang has some build data as durations on fake threads
             // but those are smaller than the full duration.
             
             let fileContent = try String(contentsOf: fileURL)
@@ -444,32 +465,9 @@ func loadFileJS(_ path: String) -> String? {
                     }
                 }
                 
-                
                 // walk the file and compute the duration if we don't already have ti
                 if file.duration == nil {
-                    
-                    // TODO: need to honor the unit scale
-                    var startTime = Double.infinity
-                    var endTime = -Double.infinity
-                    
-                    for i in 0..<catapultProfile.traceEvents!.count {
-                        let event = catapultProfile.traceEvents![i]
-                        
-                        if event.ts != nil && event.dur != nil {
-                            let s = Double(event.ts!)
-                            let d = Double(event.dur!)
-                            
-                            startTime = min(startTime, s)
-                            endTime = max(endTime, s+d)
-                        }
-                    }
-                    
-                    if startTime <= endTime {
-                        // TODO: for now assume ms
-                        file.duration = (endTime - startTime) * 1e-6
-                        
-                        updateFileCache(file: file)
-                    }
+                    updateDuration(catapultProfile, &file)
                 }
             }
             
@@ -489,7 +487,8 @@ func loadFileJS(_ path: String) -> String? {
             let encodedString = String(decoding: data, as: UTF8.self)
             // TODO: this is droppping {} ?
             perfettoEncode = String(encodedString.dropLast().dropFirst())
-            perfettoEncode = perfettoEncode.replacingOccurrences(of: "\u{2028}", with: "\\u2028")
+            perfettoEncode = perfettoEncode
+                .replacingOccurrences(of: "\u{2028}", with: "\\u2028")
                 .replacingOccurrences(of: "\u{2029}", with: "\\u2029")
         }
         
@@ -554,6 +553,9 @@ func loadFileJS(_ path: String) -> String? {
 
 
 class AppDelegate: NSObject, NSApplicationDelegate {
+    // Where to set this
+    var window: NSWindow?
+    
     // don't rename params in this class. These are the function signature
     func applicationShouldTerminateAfterLastWindowClosed(_ application: NSApplication) -> Bool {
         return true
@@ -660,7 +662,7 @@ struct kram_profileApp: App {
         .system(size: 12) // not sure what default font size is
         .monospaced()
     
-    func openFilesFromURLs(urls: [URL], mergeFiles : Bool = false) {
+    func openFilesFromURLs(urls: [URL], mergeFiles : Bool = true) {
         if urls.count >= 1 {
             let filesNew = listFilesFromURLs(urls)
             
@@ -682,16 +684,25 @@ struct kram_profileApp: App {
                 
                 print("found \(files.count) files")
                 
-                // load first file in the list
-                // TODO: preserve the original selection if still present
-                if !mergeFiles || selection == nil {
+                // preserve the original selection if still present
+                if selection != nil {
+                    var found = false
+                    for file in files {
+                        if file.id == selection {
+                            found = true
+                            break;
+                        }
+                    }
+                    
+                    // load first file in the list
+                    if !found {
+                        selection = files[0].id
+                    }
+                }
+                else {
+                    // load first file in the list
                     selection = files[0].id
                 }
-                
-                // Not sure where to set this false, so do it here
-                // this is to avoid reloading the request.  This is to stop
-                // WebView from reloading page.
-                //firstRequest = false
             }
         }
     }
@@ -726,7 +737,7 @@ struct kram_profileApp: App {
             // This should only reload if selection previously loaded
             // to a valid file, or if modstamp changed on current selection
             
-            var str = loadFileJS(sel)
+            let str = loadFileJS(sel)
             if str != nil {
                 runJavascript(webView, str!)
             }
@@ -737,15 +748,15 @@ struct kram_profileApp: App {
             // have to parse timeStart/End from file.  May want that for
             // sorting anyways.
             //
-            // Note have duration on some files now, so could pull that
+            // Note have duration on files now, so could pull that
             // or adjust the timing range across all known durations
             
-            if false {
-                str = showTimeRangeJS(filenameToTimeRange(sel))
-                if str != nil {
-                    runJavascript(webView, str!)
-                }
-            }
+//            if false {
+//                str = showTimeRangeJS(filenameToTimeRange(sel))
+//                if str != nil {
+//                    runJavascript(webView, str!)
+//                }
+//            }
         }
     }
     
@@ -818,8 +829,7 @@ A tool to help profile mem, perf, and builds.
                 MyWKWebView(webView: myWebView)
             }
             .onChange(of: selection) { newState in
-                // TODO: need the webView for this
-                openFileSelection(myWebView)
+               openFileSelection(myWebView)
             }
             // TODO: show duratoin, and selected file here
             .navigationTitle(generateNavigationTitle(selection))
@@ -828,7 +838,7 @@ A tool to help profile mem, perf, and builds.
             }
             .dropDestination(for: URL.self) { (items, _) in
                 // This acutally works!
-                openFilesFromURLs(urls: items, mergeFiles: false)
+                openFilesFromURLs(urls: items)
                 return true
             }
         }
@@ -855,6 +865,14 @@ A tool to help profile mem, perf, and builds.
 //                }
 //                .keyboardShortcut("F")
             }
+            CommandGroup(after: .toolbar) {
+                // must call through NSWindow
+                Button("Toggle Fullscreen") {
+                    // This crashes, since window isn't set in AppDelegate.
+                    // But ena
+                    appDelegate.window?.toggleFullScreen(nil)
+                }
+            }
             CommandGroup(replacing: .appInfo) {
                 Button("About kram-profile") {
                     aboutPanel()
@@ -862,12 +880,5 @@ A tool to help profile mem, perf, and builds.
             }
         }
     }
-    
-    // https://stackoverflow.com/questions/49882933/pass-jsonobject-from-swift4-to-wkwebview-javascript-function
-    
-    // all need a list of files, and File open dialog
-    // For a user folder, find all clang.json files, .trace files, and vma.json files
-    // and then need to send the selected one over to MyWKWebView so it can open
-    // the data from posting a message to the page.
 }
 

From 12af562ab9380d8cbcdf0a3f9f56233cb83f421b Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 25 Feb 2024 13:26:33 -0800
Subject: [PATCH 599/901] kram-profile - support binary trace formats

---
 .../kram-profile/kram_profileApp.swift         | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 935d67c9..97395254 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -26,7 +26,7 @@ import UniformTypeIdentifiers
 // TODO: support WindowGroup and multiwindow, each needs own webView, problem
 //   is that onOpenURL opens a new window always.
 // DONE: fn+F doesn't honor fullscreen
-// TODO: be nice to focus the search input on cmd+F just to make me happy.  
+// TODO: be nice to focus the search input on cmd+F just to make me happy.
 //  Browser goes to its own search which doesn’t help.
 
 func fileModificationDate(url: URL) -> Date? {
@@ -400,24 +400,36 @@ func loadFileJS(_ path: String) -> String? {
         // use this for binary data, but need to fixup some json before it's sent
         // TODO: work on sending a more efficient form.  Could use Perfetto SDK to write to prototbuf.  The Catapult json format is overly verbose.  Need some thread and scope strings, some open/close timings that reference a scope string and thread.
         
+        // TODO: Perfetto can only read .gz files, and not .zip files.
+        // But could decode zip files here, and send over uncompressed.
+        
         var fileContentBase64 = ""
         
         let type = filenameToType(fileURL.absoluteString)
         
         if type != FileType.Build {
+            // This is how Perfetto guesses as to format.  Why no consistent 4 char magic?
+            // https://cs.android.com/android/platform/superproject/main/+/main:external/perfetto/src/trace_processor/forwarding_trace_parser.cc;drc=30039988b8b71541ce97f9fb200c96ba23da79d7;l=176
+            
             let fileContent = try Data(contentsOf: fileURL)
             fileContentBase64 = fileContent.base64EncodedString()
             
+            // see if it's binary or json.  If binary, then can't parse duration below
+            // https://forums.swift.org/t/improving-indexing-into-swift-strings/41450/18
+            let jsonDetector = "ewoiZG" // "{\""
+            let firstSixChars = fileContentBase64.prefix(6)
+            let isJson = firstSixChars == jsonDetector
+            
             // TODO: for perf traces, compute duration between frame
             // markers.  Multiple frames in a file, then show max frame duration
             // instead of the entire file.
             
             // walk the file and compute the duration if we don't already have it
-            if file.duration == nil {
+            if isJson && file.duration == nil {
                 let decoder = JSONDecoder()
                 let catapultProfile = try decoder.decode(CatapultProfile.self, from: fileContent)
                 
-                if catapultProfile.traceEvents != nil {
+                if catapultProfile.traceEvents == nil {
                     return nil
                 }
                 

From 0fadc0b3c8841f5af08990552d5ae1f035208e9a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 25 Feb 2024 13:51:54 -0800
Subject: [PATCH 600/901] kram-profile - add gzip compression of data

Pefetto can only decode gzip and not zip files.  This means less data to send over to JS vs. json
---
 .../kram-profile.xcodeproj/project.pbxproj    |   4 +
 .../kram-profile/DataCompression.swift        | 512 ++++++++++++++++++
 .../kram-profile/kram_profileApp.swift        |  24 +-
 3 files changed, 538 insertions(+), 2 deletions(-)
 create mode 100644 kram-profile/kram-profile/DataCompression.swift

diff --git a/kram-profile/kram-profile.xcodeproj/project.pbxproj b/kram-profile/kram-profile.xcodeproj/project.pbxproj
index 98a2a8b9..75f4149d 100644
--- a/kram-profile/kram-profile.xcodeproj/project.pbxproj
+++ b/kram-profile/kram-profile.xcodeproj/project.pbxproj
@@ -14,6 +14,7 @@
 		705F68E22B87EB8000437FAA /* AnyCodable.swift in Sources */ = {isa = PBXBuildFile; fileRef = 705F68DF2B87EB8000437FAA /* AnyCodable.swift */; };
 		705F68E32B87EB8000437FAA /* AnyEncodable.swift in Sources */ = {isa = PBXBuildFile; fileRef = 705F68E02B87EB8000437FAA /* AnyEncodable.swift */; };
 		705F68E52B89907700437FAA /* README.md in Resources */ = {isa = PBXBuildFile; fileRef = 705F68E42B89907700437FAA /* README.md */; };
+		705F68E72B8BEB7100437FAA /* DataCompression.swift in Sources */ = {isa = PBXBuildFile; fileRef = 705F68E62B8BEB7000437FAA /* DataCompression.swift */; };
 /* End PBXBuildFile section */
 
 /* Begin PBXFileReference section */
@@ -27,6 +28,7 @@
 		705F68DF2B87EB8000437FAA /* AnyCodable.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = AnyCodable.swift; sourceTree = "<group>"; };
 		705F68E02B87EB8000437FAA /* AnyEncodable.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = AnyEncodable.swift; sourceTree = "<group>"; };
 		705F68E42B89907700437FAA /* README.md */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = SOURCE_ROOT; };
+		705F68E62B8BEB7000437FAA /* DataCompression.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = DataCompression.swift; sourceTree = "<group>"; };
 /* End PBXFileReference section */
 
 /* Begin PBXFrameworksBuildPhase section */
@@ -65,6 +67,7 @@
 				705F68DF2B87EB8000437FAA /* AnyCodable.swift */,
 				705F68DE2B87EB8000437FAA /* AnyDecodable.swift */,
 				705F68E02B87EB8000437FAA /* AnyEncodable.swift */,
+				705F68E62B8BEB7000437FAA /* DataCompression.swift */,
 				705F68D52B820AD200437FAA /* kram_profile.entitlements */,
 				705F68E42B89907700437FAA /* README.md */,
 				705F68D22B820AD200437FAA /* Preview Content */,
@@ -155,6 +158,7 @@
 				705F68E32B87EB8000437FAA /* AnyEncodable.swift in Sources */,
 				705F68E22B87EB8000437FAA /* AnyCodable.swift in Sources */,
 				705F68CD2B820AD100437FAA /* kram_profileApp.swift in Sources */,
+				705F68E72B8BEB7100437FAA /* DataCompression.swift in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
diff --git a/kram-profile/kram-profile/DataCompression.swift b/kram-profile/kram-profile/DataCompression.swift
new file mode 100644
index 00000000..57e0905e
--- /dev/null
+++ b/kram-profile/kram-profile/DataCompression.swift
@@ -0,0 +1,512 @@
+///
+///  DataCompression
+///
+///  A libcompression wrapper as an extension for the `Data` type
+///  (GZIP, ZLIB, LZFSE, LZMA, LZ4, deflate, RFC-1950, RFC-1951, RFC-1952)
+///
+///  Created by Markus Wanke, 2016/12/05
+///
+
+
+///
+///                Apache License, Version 2.0
+///
+///  Copyright 2016, Markus Wanke
+///
+///  Licensed under the Apache License, Version 2.0 (the "License");
+///  you may not use this file except in compliance with the License.
+///  You may obtain a copy of the License at
+///
+///  http://www.apache.org/licenses/LICENSE-2.0
+///
+///  Unless required by applicable law or agreed to in writing, software
+///  distributed under the License is distributed on an "AS IS" BASIS,
+///  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+///  See the License for the specific language governing permissions and
+///  limitations under the License.
+///
+
+
+import Foundation
+import Compression
+
+public extension Data
+{
+    /// Compresses the data.
+    /// - parameter withAlgorithm: Compression algorithm to use. See the `CompressionAlgorithm` type
+    /// - returns: compressed data
+    func compress(withAlgorithm algo: CompressionAlgorithm) -> Data?
+    {
+        return self.withUnsafeBytes { (sourcePtr: UnsafePointer<UInt8>) -> Data? in
+            let config = (operation: COMPRESSION_STREAM_ENCODE, algorithm: algo.lowLevelType)
+            return perform(config, source: sourcePtr, sourceSize: count)
+        }
+    }
+    
+    /// Decompresses the data.
+    /// - parameter withAlgorithm: Compression algorithm to use. See the `CompressionAlgorithm` type
+    /// - returns: decompressed data
+    func decompress(withAlgorithm algo: CompressionAlgorithm) -> Data?
+    {
+        return self.withUnsafeBytes { (sourcePtr: UnsafePointer<UInt8>) -> Data? in
+            let config = (operation: COMPRESSION_STREAM_DECODE, algorithm: algo.lowLevelType)
+            return perform(config, source: sourcePtr, sourceSize: count)
+        }
+    }
+    
+    /// Please consider the [libcompression documentation](https://developer.apple.com/reference/compression/1665429-data_compression)
+    /// for further details. Short info:
+    /// zlib  : Aka deflate. Fast with a good compression rate. Proved itself over time and is supported everywhere.
+    /// lzfse : Apples custom Lempel-Ziv style compression algorithm. Claims to compress as good as zlib but 2 to 3 times faster.
+    /// lzma  : Horribly slow. Compression as well as decompression. Compresses better than zlib though.
+    /// lz4   : Fast, but compression rate is very bad. Apples lz4 implementation often to not compress at all.
+    enum CompressionAlgorithm
+    {
+        case zlib
+        case lzfse
+        case lzma
+        case lz4
+    }
+    
+    /// Compresses the data using the zlib deflate algorithm.
+    /// - returns: raw deflated data according to [RFC-1951](https://tools.ietf.org/html/rfc1951).
+    /// - note: Fixed at compression level 5 (best trade off between speed and time)
+    func deflate() -> Data?
+    {
+        return self.withUnsafeBytes { (sourcePtr: UnsafePointer<UInt8>) -> Data? in
+            let config = (operation: COMPRESSION_STREAM_ENCODE, algorithm: COMPRESSION_ZLIB)
+            return perform(config, source: sourcePtr, sourceSize: count)
+        }
+    }
+    
+    /// Decompresses the data using the zlib deflate algorithm. Self is expected to be a raw deflate
+    /// stream according to [RFC-1951](https://tools.ietf.org/html/rfc1951).
+    /// - returns: uncompressed data
+    func inflate() -> Data?
+    {
+        return self.withUnsafeBytes { (sourcePtr: UnsafePointer<UInt8>) -> Data? in
+            let config = (operation: COMPRESSION_STREAM_DECODE, algorithm: COMPRESSION_ZLIB)
+            return perform(config, source: sourcePtr, sourceSize: count)
+        }
+    }
+    
+    /// Compresses the data using the deflate algorithm and makes it comply to the zlib format.
+    /// - returns: deflated data in zlib format [RFC-1950](https://tools.ietf.org/html/rfc1950)
+    /// - note: Fixed at compression level 5 (best trade off between speed and time)
+    func zip() -> Data?
+    {
+        let header = Data([0x78, 0x5e])
+        
+        let deflated = self.withUnsafeBytes { (sourcePtr: UnsafePointer<UInt8>) -> Data? in
+            let config = (operation: COMPRESSION_STREAM_ENCODE, algorithm: COMPRESSION_ZLIB)
+            return perform(config, source: sourcePtr, sourceSize: count, preload: header)
+        }
+        
+        guard var result = deflated else { return nil }
+        
+        var adler = self.adler32().checksum.bigEndian
+        result.append(Data(bytes: &adler, count: MemoryLayout<UInt32>.size))
+        
+        return result
+    }
+    
+    /// Decompresses the data using the zlib deflate algorithm. Self is expected to be a zlib deflate
+    /// stream according to [RFC-1950](https://tools.ietf.org/html/rfc1950).
+    /// - returns: uncompressed data
+    func unzip(skipCheckSumValidation: Bool = true) -> Data?
+    {
+        // 2 byte header + 4 byte adler32 checksum
+        let overhead = 6
+        guard count > overhead else { return nil }
+        
+        let header: UInt16 = withUnsafeBytes { (ptr: UnsafePointer<UInt16>) -> UInt16 in
+            return ptr.pointee.bigEndian
+        }
+
+        // check for the deflate stream bit
+        guard header >> 8 & 0b1111 == 0b1000 else { return nil }
+        // check the header checksum
+        guard header % 31 == 0 else { return nil }
+        
+        let cresult: Data? = withUnsafeBytes { (ptr: UnsafePointer<UInt8>) -> Data? in
+            let source = ptr.advanced(by: 2)
+            let config = (operation: COMPRESSION_STREAM_DECODE, algorithm: COMPRESSION_ZLIB)
+            return perform(config, source: source, sourceSize: count - overhead)
+        }
+        
+        guard let inflated = cresult else { return nil }
+        
+        if skipCheckSumValidation { return inflated }
+
+        let cksum = Data(self.suffix(from: count - 4)).withUnsafeBytes { rawPtr in
+            return rawPtr.load(as: UInt32.self).bigEndian
+        }
+
+        return cksum == inflated.adler32().checksum ? inflated : nil
+    }
+    
+    /// Compresses the data using the deflate algorithm and makes it comply to the gzip stream format.
+    /// - returns: deflated data in gzip format [RFC-1952](https://tools.ietf.org/html/rfc1952)
+    /// - note: Fixed at compression level 5 (best trade off between speed and time)
+    func gzip() -> Data?
+    {
+        var header = Data([0x1f, 0x8b, 0x08, 0x00]) // magic, magic, deflate, noflags
+        
+        var unixtime = UInt32(Date().timeIntervalSince1970).littleEndian
+        header.append(Data(bytes: &unixtime, count: MemoryLayout<UInt32>.size))
+        
+        header.append(contentsOf: [0x00, 0x03])  // normal compression level, unix file type
+        
+        let deflated = self.withUnsafeBytes { (sourcePtr: UnsafePointer<UInt8>) -> Data? in
+            let config = (operation: COMPRESSION_STREAM_ENCODE, algorithm: COMPRESSION_ZLIB)
+            return perform(config, source: sourcePtr, sourceSize: count, preload: header)
+        }
+        
+        guard var result = deflated else { return nil }
+        
+        // append checksum
+        var crc32: UInt32 = self.crc32().checksum.littleEndian
+        result.append(Data(bytes: &crc32, count: MemoryLayout<UInt32>.size))
+        
+        // append size of original data
+        var isize: UInt32 = UInt32(truncatingIfNeeded: count).littleEndian
+        result.append(Data(bytes: &isize, count: MemoryLayout<UInt32>.size))
+        
+        return result
+    }
+    
+    /// Decompresses the data using the gzip deflate algorithm. Self is expected to be a gzip deflate
+    /// stream according to [RFC-1952](https://tools.ietf.org/html/rfc1952).
+    /// - returns: uncompressed data
+    func gunzip() -> Data?
+    {
+        // 10 byte header + data + 8 byte footer. See https://tools.ietf.org/html/rfc1952#section-2
+        let overhead = 10 + 8
+        guard count >= overhead else { return nil }
+        
+        
+        typealias GZipHeader = (id1: UInt8, id2: UInt8, cm: UInt8, flg: UInt8, xfl: UInt8, os: UInt8)
+        let hdr: GZipHeader = withUnsafeBytes { (ptr: UnsafePointer<UInt8>) -> GZipHeader in
+            // +---+---+---+---+---+---+---+---+---+---+
+            // |ID1|ID2|CM |FLG|     MTIME     |XFL|OS |
+            // +---+---+---+---+---+---+---+---+---+---+
+            return (id1: ptr[0], id2: ptr[1], cm: ptr[2], flg: ptr[3], xfl: ptr[8], os: ptr[9])
+        }
+
+        typealias GZipFooter = (crc32: UInt32, isize: UInt32)
+        let alignedFtr = Data(self.suffix(from: count - 8))
+        let ftr: GZipFooter = alignedFtr.withUnsafeBytes { (ptr: UnsafePointer<UInt32>) -> GZipFooter in
+            // +---+---+---+---+---+---+---+---+
+            // |     CRC32     |     ISIZE     |
+            // +---+---+---+---+---+---+---+---+
+            return (ptr[0].littleEndian, ptr[1].littleEndian)
+        }
+
+        // Wrong gzip magic or unsupported compression method
+        guard hdr.id1 == 0x1f && hdr.id2 == 0x8b && hdr.cm == 0x08 else { return nil }
+        
+        let has_crc16: Bool = hdr.flg & 0b00010 != 0
+        let has_extra: Bool = hdr.flg & 0b00100 != 0
+        let has_fname: Bool = hdr.flg & 0b01000 != 0
+        let has_cmmnt: Bool = hdr.flg & 0b10000 != 0
+        
+        let cresult: Data? = withUnsafeBytes { (ptr: UnsafePointer<UInt8>) -> Data? in
+            var pos = 10 ; let limit = count - 8
+            
+            if has_extra {
+                pos += ptr.advanced(by: pos).withMemoryRebound(to: UInt16.self, capacity: 1) {
+                    return Int($0.pointee.littleEndian) + 2 // +2 for xlen
+                }
+            }
+            if has_fname {
+                while pos < limit && ptr[pos] != 0x0 { pos += 1 }
+                pos += 1 // skip null byte as well
+            }
+            if has_cmmnt {
+                while pos < limit && ptr[pos] != 0x0 { pos += 1 }
+                pos += 1 // skip null byte as well
+            }
+            if has_crc16 {
+                pos += 2 // ignoring header crc16
+            }
+            
+            guard pos < limit else { return nil }
+            let config = (operation: COMPRESSION_STREAM_DECODE, algorithm: COMPRESSION_ZLIB)
+            return perform(config, source: ptr.advanced(by: pos), sourceSize: limit - pos)
+        }
+        
+        guard let inflated = cresult                                   else { return nil }
+        guard ftr.isize == UInt32(truncatingIfNeeded: inflated.count)  else { return nil }
+        guard ftr.crc32 == inflated.crc32().checksum                   else { return nil }
+        return inflated
+    }
+    
+    /// Calculate the Adler32 checksum of the data.
+    /// - returns: Adler32 checksum type. Can still be further advanced.
+    func adler32() -> Adler32
+    {
+        var res = Adler32()
+        res.advance(withChunk: self)
+        return res
+    }
+    
+    /// Calculate the Crc32 checksum of the data.
+    /// - returns: Crc32 checksum type. Can still be further advanced.
+    func crc32() -> Crc32
+    {
+        var res = Crc32()
+        res.advance(withChunk: self)
+        return res
+    }
+}
+
+
+
+
+/// Struct based type representing a Crc32 checksum.
+public struct Crc32: CustomStringConvertible
+{
+    private static let zLibCrc32: ZLibCrc32FuncPtr? = loadCrc32fromZLib()
+    
+    public init() {}
+    
+    // C convention function pointer type matching the signature of `libz::crc32`
+    private typealias ZLibCrc32FuncPtr = @convention(c) (
+        _ cks:  UInt32,
+        _ buf:  UnsafePointer<UInt8>,
+        _ len:  UInt32
+    ) -> UInt32
+    
+    /// Raw checksum. Updated after a every call to `advance(withChunk:)`
+    public var checksum: UInt32 = 0
+    
+    /// Advance the current checksum with a chunk of data. Designed t be called multiple times.
+    /// - parameter chunk: data to advance the checksum
+    public mutating func advance(withChunk chunk: Data)
+    {
+        if let fastCrc32 = Crc32.zLibCrc32 {
+            checksum = chunk.withUnsafeBytes({ (ptr: UnsafePointer<UInt8>) -> UInt32 in
+                return fastCrc32(checksum, ptr, UInt32(chunk.count))
+            })
+        }
+        else {
+            checksum = slowCrc32(start: checksum, data: chunk)
+        }
+    }
+    
+    /// Formatted checksum.
+    public var description: String
+    {
+        return String(format: "%08x", checksum)
+    }
+    
+    /// Load `crc32()` from '/usr/lib/libz.dylib' if libz is installed.
+    /// - returns: A function pointer to crc32() of zlib or nil if zlib can't be found
+    private static func loadCrc32fromZLib() -> ZLibCrc32FuncPtr?
+    {
+        guard let libz = dlopen("/usr/lib/libz.dylib", RTLD_NOW) else { return nil }
+        guard let fptr = dlsym(libz, "crc32") else { return nil }
+        return unsafeBitCast(fptr, to: ZLibCrc32FuncPtr.self)
+    }
+    
+    /// Rudimentary fallback implementation of the crc32 checksum. This is only a backup used
+    /// when zlib can't be found under '/usr/lib/libz.dylib'.
+    /// - returns: crc32 checksum (4 byte)
+    private func slowCrc32(start: UInt32, data: Data) -> UInt32
+    {
+        return ~data.reduce(~start) { (crc: UInt32, next: UInt8) -> UInt32 in
+            let tableOffset = (crc ^ UInt32(next)) & 0xff
+            return lookUpTable[Int(tableOffset)] ^ crc >> 8
+        }
+    }
+    
+    /// Lookup table for faster crc32 calculation.
+    /// table source: http://web.mit.edu/freebsd/head/sys/libkern/crc32.c
+    private let lookUpTable: [UInt32] = [
+        0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
+        0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
+        0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
+        0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
+        0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
+        0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
+        0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
+        0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
+        0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
+        0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
+        0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
+        0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
+        0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
+        0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
+        0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
+        0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
+        0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
+        0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
+        0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
+        0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
+        0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
+        0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
+        0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
+        0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
+        0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
+        0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
+        0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
+        0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
+        0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
+        0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
+        0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
+        0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d,
+    ]
+}
+
+
+
+
+
+/// Struct based type representing a Adler32 checksum.
+public struct Adler32: CustomStringConvertible
+{
+    private static let zLibAdler32: ZLibAdler32FuncPtr? = loadAdler32fromZLib()
+    
+    public init() {}
+    
+    // C convention function pointer type matching the signature of `libz::adler32`
+    private typealias ZLibAdler32FuncPtr = @convention(c) (
+        _ cks:  UInt32,
+        _ buf:  UnsafePointer<UInt8>,
+        _ len:  UInt32
+    ) -> UInt32
+    
+    /// Raw checksum. Updated after a every call to `advance(withChunk:)`
+    public var checksum: UInt32 = 1
+    
+    /// Advance the current checksum with a chunk of data. Designed t be called multiple times.
+    /// - parameter chunk: data to advance the checksum
+    public mutating func advance(withChunk chunk: Data)
+    {
+        if let fastAdler32 = Adler32.zLibAdler32 {
+            checksum = chunk.withUnsafeBytes({ (ptr: UnsafePointer<UInt8>) -> UInt32 in
+                return fastAdler32(checksum, ptr, UInt32(chunk.count))
+            })
+        }
+        else {
+            checksum = slowAdler32(start: checksum, data: chunk)
+        }
+    }
+    
+    /// Formatted checksum.
+    public var description: String
+    {
+        return String(format: "%08x", checksum)
+    }
+    
+    /// Load `adler32()` from '/usr/lib/libz.dylib' if libz is installed.
+    /// - returns: A function pointer to adler32() of zlib or nil if zlib can't be found
+    private static func loadAdler32fromZLib() -> ZLibAdler32FuncPtr?
+    {
+        guard let libz = dlopen("/usr/lib/libz.dylib", RTLD_NOW) else { return nil }
+        guard let fptr = dlsym(libz, "adler32") else { return nil }
+        return unsafeBitCast(fptr, to: ZLibAdler32FuncPtr.self)
+    }
+    
+    /// Rudimentary fallback implementation of the adler32 checksum. This is only a backup used
+    /// when zlib can't be found under '/usr/lib/libz.dylib'.
+    /// - returns: adler32 checksum (4 byte)
+    private func slowAdler32(start: UInt32, data: Data) -> UInt32
+    {
+        var s1: UInt32 = start & 0xffff
+        var s2: UInt32 = (start >> 16) & 0xffff
+        let prime: UInt32 = 65521
+        
+        for byte in data {
+            s1 += UInt32(byte)
+            if s1 >= prime { s1 = s1 % prime }
+            s2 += s1
+            if s2 >= prime { s2 = s2 % prime }
+        }
+        return (s2 << 16) | s1
+    }
+}
+
+
+
+fileprivate extension Data
+{
+    func withUnsafeBytes<ResultType, ContentType>(_ body: (UnsafePointer<ContentType>) throws -> ResultType) rethrows -> ResultType
+    {
+        return try self.withUnsafeBytes({ (rawBufferPointer: UnsafeRawBufferPointer) -> ResultType in
+            return try body(rawBufferPointer.bindMemory(to: ContentType.self).baseAddress!)
+        })
+    }
+}
+
+fileprivate extension Data.CompressionAlgorithm
+{
+    var lowLevelType: compression_algorithm {
+        switch self {
+            case .zlib    : return COMPRESSION_ZLIB
+            case .lzfse   : return COMPRESSION_LZFSE
+            case .lz4     : return COMPRESSION_LZ4
+            case .lzma    : return COMPRESSION_LZMA
+        }
+    }
+}
+
+
+fileprivate typealias Config = (operation: compression_stream_operation, algorithm: compression_algorithm)
+
+
+fileprivate func perform(_ config: Config, source: UnsafePointer<UInt8>, sourceSize: Int, preload: Data = Data()) -> Data?
+{
+    guard config.operation == COMPRESSION_STREAM_ENCODE || sourceSize > 0 else { return nil }
+    
+    let streamBase = UnsafeMutablePointer<compression_stream>.allocate(capacity: 1)
+    defer { streamBase.deallocate() }
+    var stream = streamBase.pointee
+    
+    let status = compression_stream_init(&stream, config.operation, config.algorithm)
+    guard status != COMPRESSION_STATUS_ERROR else { return nil }
+    defer { compression_stream_destroy(&stream) }
+
+    var result = preload
+    var flags: Int32 = Int32(COMPRESSION_STREAM_FINALIZE.rawValue)
+    let blockLimit = 64 * 1024
+    var bufferSize = Swift.max(sourceSize, 64)
+
+    if sourceSize > blockLimit {
+        bufferSize = blockLimit
+        if config.algorithm == COMPRESSION_LZFSE && config.operation != COMPRESSION_STREAM_ENCODE   {
+            // This fixes a bug in Apples lzfse decompressor. it will sometimes fail randomly when the input gets 
+            // splitted into multiple chunks and the flag is not 0. Even though it should always work with FINALIZE...
+            flags = 0
+        }
+    }
+
+    let buffer = UnsafeMutablePointer<UInt8>.allocate(capacity: bufferSize)
+    defer { buffer.deallocate() }
+    
+    stream.dst_ptr  = buffer
+    stream.dst_size = bufferSize
+    stream.src_ptr  = source
+    stream.src_size = sourceSize
+    
+    while true {
+        switch compression_stream_process(&stream, flags) {
+            case COMPRESSION_STATUS_OK:
+                guard stream.dst_size == 0 else { return nil }
+                result.append(buffer, count: stream.dst_ptr - buffer)
+                stream.dst_ptr = buffer
+                stream.dst_size = bufferSize
+
+                if flags == 0 && stream.src_size == 0 { // part of the lzfse bugfix above
+                    flags = Int32(COMPRESSION_STREAM_FINALIZE.rawValue)
+                }
+                
+            case COMPRESSION_STATUS_END:
+                result.append(buffer, count: stream.dst_ptr - buffer)
+                return result
+                
+            default:
+                return nil
+        }
+    }
+}
diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 97395254..9964a0f2 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -401,12 +401,18 @@ func loadFileJS(_ path: String) -> String? {
         // TODO: work on sending a more efficient form.  Could use Perfetto SDK to write to prototbuf.  The Catapult json format is overly verbose.  Need some thread and scope strings, some open/close timings that reference a scope string and thread.
         
         // TODO: Perfetto can only read .gz files, and not .zip files.
-        // But could decode zip files here, and send over uncompressed.
+        // But could decode zip files here, and send over gz compressed.
+        // TODO: add gz compression to all file data.  Use libCompression
+        // but it only has zlib compression.
+        
         
         var fileContentBase64 = ""
         
         let type = filenameToType(fileURL.absoluteString)
         
+        // perfetto only supports gzip, comments indicate zip is possible but only with refactor
+        let doCompress = true
+        
         if type != FileType.Build {
             // This is how Perfetto guesses as to format.  Why no consistent 4 char magic?
             // https://cs.android.com/android/platform/superproject/main/+/main:external/perfetto/src/trace_processor/forwarding_trace_parser.cc;drc=30039988b8b71541ce97f9fb200c96ba23da79d7;l=176
@@ -420,6 +426,12 @@ func loadFileJS(_ path: String) -> String? {
             let firstSixChars = fileContentBase64.prefix(6)
             let isJson = firstSixChars == jsonDetector
             
+            // this is gzip format, not a zip archive
+            if doCompress {
+                let compressedData: Data! = fileContent.gzip()
+                fileContentBase64 = compressedData.base64EncodedString()
+            }
+        
             // TODO: for perf traces, compute duration between frame
             // markers.  Multiple frames in a file, then show max frame duration
             // instead of the entire file.
@@ -485,7 +497,15 @@ func loadFileJS(_ path: String) -> String? {
             
             let encoder = JSONEncoder()
             let fileContentFixed = try encoder.encode(catapultProfile)
-            fileContentBase64 = fileContentFixed.base64EncodedString()
+            
+            // gzip compress the data before sending it over
+            if doCompress {
+                let compressedData: Data! = fileContentFixed.gzip()
+                fileContentBase64 = compressedData.base64EncodedString()
+            }
+            else {
+                fileContentBase64 = fileContentFixed.base64EncodedString()
+            }
         }
         
         let perfetto = Perfetto(perfetto: PerfettoFile(buffer: "",

From f49245ec767422239a33ecb1f725cca1bee0c3ea Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 25 Feb 2024 17:10:09 -0800
Subject: [PATCH 601/901] kram-profile - try to get TimeRange fixed up

Flutter eference code sends seconds, but code in general take ns for range and complains about mixing of Double with BigInt so I'm not sure what is going on.
---
 .../kram-profile/kram_profileApp.swift        | 63 ++++++++++++-------
 1 file changed, 42 insertions(+), 21 deletions(-)

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 9964a0f2..b8b5ab2e 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -28,6 +28,13 @@ import UniformTypeIdentifiers
 // DONE: fn+F doesn't honor fullscreen
 // TODO: be nice to focus the search input on cmd+F just to make me happy.
 //  Browser goes to its own search which doesn’t help.
+// TODO: work on sending a more efficient form.  Could use Perfetto SDK to write to prototbuf.  The Catapult json format is overly verbose.  Need some thread and scope strings, some open/close timings that reference a scope string and thread.
+// TODO: Perfetto can only read .gz files, and not .zip files.
+// But could decode zip files here, and send over gz compressed.
+// Would need to idenfity zip archive > 1 file vs. zip single file.
+// DONE: add gz compression to all file data.  Use libCompression
+// but it only has zlib compression.  Use DataCompression which
+// messages zlib deflate to gzip.
 
 func fileModificationDate(url: URL) -> Date? {
     do {
@@ -165,7 +172,7 @@ struct MyWKWebView : NSViewRepresentable {
     
     // This is set by caller to the url for the request
     func makeNSView(context: Context) -> WKWebView {
-        // TODO: hook this up, but need WKScriptMessageHandler protocl
+        // TODO: hook this up, but need WKScriptMessageHandler protocol
         //configuration.userContentController.addScriptMessageHandler(..., name: "postMessageListener"
         return webView
     }
@@ -298,18 +305,40 @@ func showTimeRangeJS(_ timeRange: TimeRange) -> String? {
             let encoder = JSONEncoder()
             let data = try encoder.encode(perfetto)
             let encodedString = String(decoding: data, as: UTF8.self)
-            // TODO: this is droppping {} ?
+            // TODO: is this droppping {} ?, added back below
             perfettoEncode = String(encodedString.dropLast().dropFirst())
             perfettoEncode = perfettoEncode.replacingOccurrences(of: "\u{2028}", with: "\\u2028")
                 .replacingOccurrences(of: "\u{2029}", with: "\\u2029")
         }
         
-        // TODO: this also needs to wait on page to load
         let script = """
             // convert from string -> Uint8Array -> ArrayBuffer
             var obj = JSON.parse('{\(perfettoEncode)}');
         
-            window.postMessage(obj,'\(ORIGIN)');
+            // https://jsfiddle.net/vrsofx1p/
+            function waitForUI(obj)
+            {
+                // have already opened and loaded the inwdow
+                const win = window; // .open('\(ORIGIN)');
+                if (!win) { return; }
+            
+                const timer = setInterval(() => win.postMessage('PING', '\(ORIGIN)'), 50);
+                
+                const onMessageHandler = (evt) => {
+                    if (evt.data !== 'PONG') return;
+                    
+                    // We got a PONG, the UI is ready.
+                    window.clearInterval(timer);
+                    window.removeEventListener('message', onMessageHandler);
+                    
+                    // was win, but use window instead
+                    win.postMessage(obj,'\(ORIGIN)');
+                }
+            
+                window.addEventListener('message', onMessageHandler);
+            }
+                
+            waitForUI(obj);
         """
         
         return script
@@ -398,14 +427,6 @@ func loadFileJS(_ path: String) -> String? {
     
     do {
         // use this for binary data, but need to fixup some json before it's sent
-        // TODO: work on sending a more efficient form.  Could use Perfetto SDK to write to prototbuf.  The Catapult json format is overly verbose.  Need some thread and scope strings, some open/close timings that reference a scope string and thread.
-        
-        // TODO: Perfetto can only read .gz files, and not .zip files.
-        // But could decode zip files here, and send over gz compressed.
-        // TODO: add gz compression to all file data.  Use libCompression
-        // but it only has zlib compression.
-        
-        
         var fileContentBase64 = ""
         
         let type = filenameToType(fileURL.absoluteString)
@@ -550,7 +571,7 @@ func loadFileJS(_ path: String) -> String? {
         // JS is all running on one thread though?
         
         // https://jsfiddle.net/vrsofx1p/
-        function openTrace(obj)
+        function waitForUI(obj)
         {
             // have already opened and loaded the inwdow
             const win = window; // .open('\(ORIGIN)');
@@ -572,7 +593,7 @@ func loadFileJS(_ path: String) -> String? {
             window.addEventListener('message', onMessageHandler);
         }
         
-        openTrace(obj);
+        waitForUI(obj);
         """
         
         return script
@@ -769,7 +790,7 @@ struct kram_profileApp: App {
             // This should only reload if selection previously loaded
             // to a valid file, or if modstamp changed on current selection
             
-            let str = loadFileJS(sel)
+            var str = loadFileJS(sel)
             if str != nil {
                 runJavascript(webView, str!)
             }
@@ -783,12 +804,12 @@ struct kram_profileApp: App {
             // Note have duration on files now, so could pull that
             // or adjust the timing range across all known durations
             
-//            if false {
-//                str = showTimeRangeJS(filenameToTimeRange(sel))
-//                if str != nil {
-//                    runJavascript(webView, str!)
-//                }
-//            }
+            if false {
+                str = showTimeRangeJS(filenameToTimeRange(sel))
+                if str != nil {
+                    runJavascript(webView, str!)
+                }
+            }
         }
     }
     

From 80c5eaf5d96650a87a9afcd7e9e7fcb2932f3364 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 26 Feb 2024 08:55:48 -0800
Subject: [PATCH 602/901] kram-profile - cleanup comments, attempt at setting
 font

---
 .../kram-profile/kram_profileApp.swift        | 92 +++++++++----------
 1 file changed, 44 insertions(+), 48 deletions(-)

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index b8b5ab2e..03137951 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -18,6 +18,7 @@ import UniformTypeIdentifiers
 // TODO: add sort mode for name, time and incorporating dir or not
 // TODO: fix the js wait, even with listener, there's still a race
 //    maybe there's some ServiceWorker still loading the previous json?
+//    Perfetto is using a ServiceWorker, Safari uses those now, and ping/pong unware.
 // TODO: still getting race condition.  Perfetto is trying to
 //  load the previous file, and we’re sending a new one.
 // TODO: update recent document list
@@ -29,12 +30,36 @@ import UniformTypeIdentifiers
 // TODO: be nice to focus the search input on cmd+F just to make me happy.
 //  Browser goes to its own search which doesn’t help.
 // TODO: work on sending a more efficient form.  Could use Perfetto SDK to write to prototbuf.  The Catapult json format is overly verbose.  Need some thread and scope strings, some open/close timings that reference a scope string and thread.
-// TODO: Perfetto can only read .gz files, and not .zip files.
-// But could decode zip files here, and send over gz compressed.
-// Would need to idenfity zip archive > 1 file vs. zip single file.
+// DONE: Perfetto can only read .gz files, and not .zip files.
+//   But could decode zip files here, and send over gz compressed.
+//   Would need to idenfity zip archive > 1 file vs. zip single file.
 // DONE: add gz compression to all file data.  Use libCompression
-// but it only has zlib compression.  Use DataCompression which
-// messages zlib deflate to gzip.
+//   but it only has zlib compression.  Use DataCompression which
+//   messages zlib deflate to gzip.
+// TODO: switch font to inter, bundle that with the app?
+//   .environment(\.font, Font.custom("CustomFont", size: 14))
+// TODO: for perf traces, compute duration between frame
+//   markers.  Multiple frames in a file, then show max frame duration
+//   instead of the entire file.
+
+
+// See here about Navigation API
+// https://developer.apple.com/videos/play/wwdc2022/10054/
+
+// This is how open_trace_in_ui.py tells the browser to open a file
+// http://ui.perfetto.dev/#!/?url=http://127.0.0.1:9001/{fname}
+// Then the http server serves up that file to the browser and sets Allow-Origin header.
+
+// https://developer.mozilla.org/en-US/docs/Glossary/Base64#the_unicode_problem
+//https://stackoverflow.com/questions/30106476/using-javascripts-atob-to-decode-base64-doesnt-properly-decode-utf-8-strings
+// https://gist.github.com/chromy/170c11ce30d9084957d7f3aa065e89f8
+// need to post this JavaScript
+
+// https://stackoverflow.com/questions/32113933/how-do-i-pass-a-swift-object-to-javascript-wkwebview-swift
+
+// https://stackoverflow.com/questions/37820666/how-can-i-send-data-from-swift-to-javascript-and-display-them-in-my-web-view
+
+
 
 func fileModificationDate(url: URL) -> Date? {
     do {
@@ -115,6 +140,7 @@ func generateNavigationTitle(_ str: String?) -> String {
 // Note: if a file is deleted which happens often with builds,
 // then want to identify that and update the list.  At least
 // indicate the item is gone, and await its return.
+// Does macOS have a FileWatcher?
 
 var fileCache : [URL:File] = [:]
 
@@ -172,8 +198,6 @@ struct MyWKWebView : NSViewRepresentable {
     
     // This is set by caller to the url for the request
     func makeNSView(context: Context) -> WKWebView {
-        // TODO: hook this up, but need WKScriptMessageHandler protocol
-        //configuration.userContentController.addScriptMessageHandler(..., name: "postMessageListener"
         return webView
     }
     
@@ -194,22 +218,6 @@ struct MyWKWebView : NSViewRepresentable {
 //    MyWKWebView()
 //}
 
-// See here about Navigation API
-// https://developer.apple.com/videos/play/wwdc2022/10054/
-
-// This is how open_trace_in_ui.py tells the browser to open a file
-// http://ui.perfetto.dev/#!/?url=http://127.0.0.1:9001/{fname}
-// Then the http server serves up that file to the browser and sets Allow-Origin header.
-
-// https://developer.mozilla.org/en-US/docs/Glossary/Base64#the_unicode_problem
-//https://stackoverflow.com/questions/30106476/using-javascripts-atob-to-decode-base64-doesnt-properly-decode-utf-8-strings
-// https://gist.github.com/chromy/170c11ce30d9084957d7f3aa065e89f8
-// need to post this JavaScript
-
-// https://stackoverflow.com/questions/32113933/how-do-i-pass-a-swift-object-to-javascript-wkwebview-swift
-
-// https://stackoverflow.com/questions/37820666/how-can-i-send-data-from-swift-to-javascript-and-display-them-in-my-web-view
-
 // has to be https to work for some reason, but all data is previewed locally
 var ORIGIN = "https://ui.perfetto.dev"
 
@@ -305,7 +313,6 @@ func showTimeRangeJS(_ timeRange: TimeRange) -> String? {
             let encoder = JSONEncoder()
             let data = try encoder.encode(perfetto)
             let encodedString = String(decoding: data, as: UTF8.self)
-            // TODO: is this droppping {} ?, added back below
             perfettoEncode = String(encodedString.dropLast().dropFirst())
             perfettoEncode = perfettoEncode.replacingOccurrences(of: "\u{2028}", with: "\\u2028")
                 .replacingOccurrences(of: "\u{2029}", with: "\\u2029")
@@ -359,7 +366,8 @@ struct CatapultEvent: Codable {
     var name: String?
     var args: [String : AnyCodable]?
     
-    // var tts: Int?
+    // var tts: Int? - thread clock timestamp
+    // var cname: String? - color name from table
     // Also can have stack frames
 }
 
@@ -401,7 +409,6 @@ func loadFileJS(_ path: String) -> String? {
     }
     
     func updateDuration(_ catapultProfile: CatapultProfile, _ file: inout File) {
-        // TODO: need to honor the unit scale (pxSec)
         var startTime = Int.max
         var endTime = Int.min
         
@@ -418,7 +425,7 @@ func loadFileJS(_ path: String) -> String? {
         }
         
         if startTime <= endTime {
-            // TODO: for now assume micros
+            // for now assume micros
             file.duration = Double(endTime - startTime) * 1e-6
             
             updateFileCache(file: file)
@@ -453,10 +460,6 @@ func loadFileJS(_ path: String) -> String? {
                 fileContentBase64 = compressedData.base64EncodedString()
             }
         
-            // TODO: for perf traces, compute duration between frame
-            // markers.  Multiple frames in a file, then show max frame duration
-            // instead of the entire file.
-            
             // walk the file and compute the duration if we don't already have it
             if isJson && file.duration == nil {
                 let decoder = JSONDecoder()
@@ -538,7 +541,6 @@ func loadFileJS(_ path: String) -> String? {
             let encoder = JSONEncoder()
             let data = try encoder.encode(perfetto)
             let encodedString = String(decoding: data, as: UTF8.self)
-            // TODO: this is droppping {} ?
             perfettoEncode = String(encodedString.dropLast().dropFirst())
             perfettoEncode = perfettoEncode
                 .replacingOccurrences(of: "\u{2028}", with: "\\u2028")
@@ -670,12 +672,6 @@ struct kram_profileApp: App {
     
     func listFilesFromURLs(_ urls: [URL]) -> [File]
     {
-        //print("selected \(url)")
-        
-        // wipe them all
-        // TODO: have mode where the url get added
-        // instead of wiping the array out.  Note FileCache
-        // has cached duration data
         var files: [File] = []
        
         for url in urls {
@@ -699,8 +695,6 @@ struct kram_profileApp: App {
                 }
             }
             else if url.isFileURL {
-                // TODO: list out zip archive
-                
                 let isSupported = isSupportedFilename(url)
                 if isSupported {
                     files.append(lookupFile(url:url))
@@ -711,8 +705,11 @@ struct kram_profileApp: App {
         return files
     }
     
-    let durationFont = Font
-        .system(size: 12) // not sure what default font size is
+    // What is used when Inter isn't installed.  Can this be bundled?
+    let customFont = Font.custom("Inter Variable", size: 14)
+                
+    let durationFont =
+        Font.custom("Inter Variable", size: 14)
         .monospaced()
     
     func openFilesFromURLs(urls: [URL], mergeFiles : Bool = true) {
@@ -722,7 +719,6 @@ struct kram_profileApp: App {
             // for now wipe the old list
             if filesNew.count > 0 {
                 if mergeFiles {
-                    // TODO: how does this pick which one to keep
                     files = Array(Set(files + filesNew))
                 }
                 else {
@@ -824,8 +820,8 @@ A tool to help profile mem, perf, and builds.
 """,
                     
                     attributes: [
-                        NSAttributedString.Key.font: NSFont.boldSystemFont(
-                            ofSize: NSFont.smallSystemFontSize)
+                        // TODO: fix font
+                        NSAttributedString.Key.font: NSFont.boldSystemFont(ofSize: NSFont.smallSystemFontSize)
                     ]
                 ),
                 NSApplication.AboutPanelOptionKey(
@@ -838,7 +834,7 @@ A tool to help profile mem, perf, and builds.
     // DONE: have files ending in .vmatrace, .trace, and .json
     // TODO: archives in the zip file.
     let fileTypes: [UTType] = [
-        // .plainText, .zip
+        // TODO: .zip
         .json, // clang build files
         UTType(filenameExtension:"trace", conformingTo:.data)!,
         UTType(filenameExtension:"vmatrace", conformingTo:.data)!,
@@ -853,7 +849,7 @@ A tool to help profile mem, perf, and builds.
         
         // WindowGroup brings up old windows which isn't really what I want
         
-        Window("Main", id: "main") {
+        Window("Main", id: "main"){
         //WindowGroup {
             NavigationSplitView {
                 VStack {
@@ -884,7 +880,6 @@ A tool to help profile mem, perf, and builds.
             .onChange(of: selection) { newState in
                openFileSelection(myWebView)
             }
-            // TODO: show duratoin, and selected file here
             .navigationTitle(generateNavigationTitle(selection))
             .onOpenURL { url in
                 openFilesFromURLs(urls: [url])
@@ -895,6 +890,7 @@ A tool to help profile mem, perf, and builds.
                 return true
             }
         }
+        .environment(\.font, customFont)
         // https://nilcoalescing.com/blog/CustomiseAboutPanelOnMacOSInSwiftUI/
         .commands {
             CommandGroup(after: .newItem) {

From 1915f504af446524c2e55fc2f652ff8aa74facf5 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 26 Feb 2024 09:29:56 -0800
Subject: [PATCH 603/901] kram-profile - turn off environment setting causing
 build to fail in CI

---
 kram-profile/kram-profile/kram_profileApp.swift | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 03137951..a7e2b6d5 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -890,7 +890,8 @@ A tool to help profile mem, perf, and builds.
                 return true
             }
         }
-        .environment(\.font, customFont)
+        // This is causing an error on GitHub CI build, but not when building locally
+        //.environment(\.font, customFont)
         // https://nilcoalescing.com/blog/CustomiseAboutPanelOnMacOSInSwiftUI/
         .commands {
             CommandGroup(after: .newItem) {

From 8be4239d4e96cd084cc3c736874d040e045a86f0 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 29 Feb 2024 22:15:57 -0800
Subject: [PATCH 604/901] kram-profile - add gzip support, add reload

No duration on this for now on gzip, but feed direct to Perfetto and it can ungzip it.  CBA won't read gzip though.   Also clang json files still need fixed up.  Perfetto is working on zip support.  May need to export KramZipHelper as a clang module, or make a framework.
---
 .../kram-profile/DataCompression.swift        |   5 +-
 kram-profile/kram-profile/Info.plist          |  16 +-
 .../kram-profile/kram_profileApp.swift        | 183 ++++++++++++++----
 3 files changed, 160 insertions(+), 44 deletions(-)

diff --git a/kram-profile/kram-profile/DataCompression.swift b/kram-profile/kram-profile/DataCompression.swift
index 57e0905e..8bc0ea1d 100644
--- a/kram-profile/kram-profile/DataCompression.swift
+++ b/kram-profile/kram-profile/DataCompression.swift
@@ -12,6 +12,7 @@
 ///                Apache License, Version 2.0
 ///
 ///  Copyright 2016, Markus Wanke
+///  https://github.com/mw99/DataCompression?tab=readme-ov-file
 ///
 ///  Licensed under the Apache License, Version 2.0 (the "License");
 ///  you may not use this file except in compliance with the License.
@@ -93,7 +94,7 @@ public extension Data
     /// Compresses the data using the deflate algorithm and makes it comply to the zlib format.
     /// - returns: deflated data in zlib format [RFC-1950](https://tools.ietf.org/html/rfc1950)
     /// - note: Fixed at compression level 5 (best trade off between speed and time)
-    func zip() -> Data?
+    func zlib() -> Data?
     {
         let header = Data([0x78, 0x5e])
         
@@ -113,7 +114,7 @@ public extension Data
     /// Decompresses the data using the zlib deflate algorithm. Self is expected to be a zlib deflate
     /// stream according to [RFC-1950](https://tools.ietf.org/html/rfc1950).
     /// - returns: uncompressed data
-    func unzip(skipCheckSumValidation: Bool = true) -> Data?
+    func unzlib(skipCheckSumValidation: Bool = true) -> Data?
     {
         // 2 byte header + 4 byte adler32 checksum
         let overhead = 6
diff --git a/kram-profile/kram-profile/Info.plist b/kram-profile/kram-profile/Info.plist
index 1038aef1..545741a3 100644
--- a/kram-profile/kram-profile/Info.plist
+++ b/kram-profile/kram-profile/Info.plist
@@ -36,7 +36,7 @@
 			<key>CFBundleTypeName</key>
 			<string>json</string>
 			<key>CFBundleTypeRole</key>
-			<string>Editor</string>
+			<string>Viewer</string>
 			<key>LSHandlerRank</key>
 			<string>Default</string>
 			<key>LSItemContentTypes</key>
@@ -46,6 +46,20 @@
 			<key>NSDocumentClass</key>
 			<string>KramDocument</string>
 		</dict>
+		<dict>
+			<key>CFBundleTypeName</key>
+			<string>gz</string>
+			<key>CFBundleTypeRole</key>
+			<string>Viewer</string>
+			<key>LSHandlerRank</key>
+			<string>Default</string>
+			<key>LSItemContentTypes</key>
+			<array>
+				<string>org.gnu.gnu-zip-archive</string>
+			</array>
+			<key>NSDocumentClass</key>
+			<string>KramDocument</string>
+		</dict>
 	</array>
 	<key>UTImportedTypeDeclarations</key>
 	<array>
diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index a7e2b6d5..995f7f96 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -36,12 +36,14 @@ import UniformTypeIdentifiers
 // DONE: add gz compression to all file data.  Use libCompression
 //   but it only has zlib compression.  Use DataCompression which
 //   messages zlib deflate to gzip.
-// TODO: switch font to inter, bundle that with the app?
+// TODO: switch font to Inter, bundle that with the app?
 //   .environment(\.font, Font.custom("CustomFont", size: 14))
 // TODO: for perf traces, compute duration between frame
 //   markers.  Multiple frames in a file, then show max frame duration
 //   instead of the entire file.
-
+// TODO track when files change or get deleted, update the list item then
+//   can disable list items that are deleted in case they return (can still pick if current)
+//   https://developer.apple.com/documentation/coreservices/file_system_events?language=objc
 
 // See here about Navigation API
 // https://developer.apple.com/videos/play/wwdc2022/10054/
@@ -97,6 +99,7 @@ struct File: Identifiable, Hashable, Equatable, Comparable
     
     var duration: Double?
     var modStamp: Date?
+    var loadStamp: Date?
     
     init(url: URL) {
         self.url = url
@@ -107,9 +110,12 @@ struct File: Identifiable, Hashable, Equatable, Comparable
     public static func == (lhs: File, rhs: File) -> Bool {
         return lhs.id == rhs.id
     }
-    static func < (lhs: File, rhs: File) -> Bool {
+    public static func < (lhs: File, rhs: File) -> Bool {
         return lhs.id < rhs.id
     }
+    public func isReloadNeeded() -> Bool {
+        return modStamp != loadStamp
+    }
 }
 
 func generateName(file: File) -> String {
@@ -256,6 +262,9 @@ struct TimeRange {
 }
 
 enum FileType {
+    case Archive // zip of 1+ files, can't enforce
+    case Compressed // gzip of 1 file, can't enforce
+    
     case Build
     case Memory
     case Perf
@@ -263,14 +272,22 @@ enum FileType {
 
 func filenameToType(_ filename: String) -> FileType {
     let url = URL(string: filename)!
-
-    if url.pathExtension == "json" { // build
+    let ext = url.pathExtension
+    
+    if ext == "zip" {
+        return .Archive
+    }
+    if ext == "gz" {
+        return .Compressed
+    }
+    
+    if ext == "json" { // build
         return .Build
     }
-    else if url.pathExtension == "vmatrace" { // memory
+    else if ext == "vmatrace" { // memory
         return .Memory
     }
-    else if url.pathExtension == "trace" { // profile
+    else if ext == "trace" { // profile
         return .Perf
     }
     return .Build
@@ -280,6 +297,8 @@ func filenameToTimeRange(_ filename: String) -> TimeRange {
     var duration = 1.0
     
     switch filenameToType(filename) {
+        case .Archive: fallthrough
+        case .Compressed: fallthrough
         case .Build: duration = 1.0
         case .Memory: duration = 64.0
         case .Perf: duration = 0.1 // 100ms
@@ -438,38 +457,66 @@ func loadFileJS(_ path: String) -> String? {
         
         let type = filenameToType(fileURL.absoluteString)
         
-        // perfetto only supports gzip, comments indicate zip is possible but only with refactor
-        let doCompress = true
+        let isFileGzip = type == .Compressed
+        //let isFileZip = type == .Archive
+        
+        // Note: json.gz and json.zip build files are not marked as Build
+        // but need to rewrite them.
+        var isBuildFile = type == FileType.Build
+        
+        let filename = fileURL.lastPathComponent
+        
+        if filename.hasSuffix("json.gz") || filename.hasSuffix("json.zip") {
+            isBuildFile = true
+        }
+        
+        var fileContent = try Data(contentsOf: fileURL)
+        
+        // decompress archive from zip, since Perfetto can't yet decompress zip
+        if type == .Archive {
+            // unzlib is for a zlib file and not a zip archive,
+            // so need new call.  Have this in kram as C++ helper.
+            // This is unzlib() to avoid confusion.
+            //if guard let unzippedContent = fileContent.unzlib() else {
+                return nil
+            //}
+            //fileContent = unzippedContent
+        }
         
-        if type != FileType.Build {
+        if !isBuildFile {
+            // perfetto only supports gzip, comments indicate zip is possible but only with refactor
+            // don't recompress gzip, note can't do timing if not decompressed
+            let doCompress = !isFileGzip
+            
             // This is how Perfetto guesses as to format.  Why no consistent 4 char magic?
             // https://cs.android.com/android/platform/superproject/main/+/main:external/perfetto/src/trace_processor/forwarding_trace_parser.cc;drc=30039988b8b71541ce97f9fb200c96ba23da79d7;l=176
             
-            let fileContent = try Data(contentsOf: fileURL)
             fileContentBase64 = fileContent.base64EncodedString()
             
-            // see if it's binary or json.  If binary, then can't parse duration below
-            // https://forums.swift.org/t/improving-indexing-into-swift-strings/41450/18
-            let jsonDetector = "ewoiZG" // "{\""
-            let firstSixChars = fileContentBase64.prefix(6)
-            let isJson = firstSixChars == jsonDetector
-            
-            // this is gzip format, not a zip archive
-            if doCompress {
-                let compressedData: Data! = fileContent.gzip()
-                fileContentBase64 = compressedData.base64EncodedString()
-            }
-        
-            // walk the file and compute the duration if we don't already have it
-            if isJson && file.duration == nil {
-                let decoder = JSONDecoder()
-                let catapultProfile = try decoder.decode(CatapultProfile.self, from: fileContent)
+            if !isFileGzip {
+                // see if it's binary or json.  If binary, then can't parse duration below
+                // https://forums.swift.org/t/improving-indexing-into-swift-strings/41450/18
+                let jsonDetector = "ewoiZG" // "{\""
+                let firstSixChars = fileContentBase64.prefix(6)
+                let isJson = firstSixChars == jsonDetector
                 
-                if catapultProfile.traceEvents == nil {
-                    return nil
+                // convert to gzip format, so send less data across to Safari
+                if doCompress {
+                    guard let compressedData: Data = fileContent.gzip() else { return nil }
+                    fileContentBase64 = compressedData.base64EncodedString()
                 }
                 
-                updateDuration(catapultProfile, &file)
+                // walk the file and compute the duration if we don't already have it
+                if isJson && file.duration == nil {
+                    let decoder = JSONDecoder()
+                    let catapultProfile = try decoder.decode(CatapultProfile.self, from: fileContent)
+                    
+                    if catapultProfile.traceEvents == nil {
+                        return nil
+                    }
+                    
+                    updateDuration(catapultProfile, &file)
+                }
             }
         }
         else {
@@ -479,9 +526,23 @@ func loadFileJS(_ path: String) -> String? {
             
             // Clang has some build data as durations on fake threads
             // but those are smaller than the full duration.
+            let doCompress = true
+            
+            var json : Data
             
-            let fileContent = try String(contentsOf: fileURL)
-            let json = fileContent.data(using: .utf8)!
+            if type == .Compressed {
+                guard let unzippedContent = fileContent.gunzip() else {
+                    return nil
+                }
+                json = unzippedContent
+            }
+            else if type == .Archive {
+                // this has already been decoded to json
+                json = fileContent
+            }
+            else {
+                json = fileContent
+            }
             
             let decoder = JSONDecoder()
             var catapultProfile = try decoder.decode(CatapultProfile.self, from: json)
@@ -524,7 +585,7 @@ func loadFileJS(_ path: String) -> String? {
             
             // gzip compress the data before sending it over
             if doCompress {
-                let compressedData: Data! = fileContentFixed.gzip()
+                guard let compressedData = fileContentFixed.gzip() else { return nil }
                 fileContentBase64 = compressedData.base64EncodedString()
             }
             else {
@@ -641,9 +702,20 @@ struct kram_profileApp: App {
     }
     
     func isSupportedFilename(_ url: URL) -> Bool {
-      
-        // clang build files use genertic .json format
-        if url.pathExtension == "json" {
+        let ext = url.pathExtension
+        
+        // what ext does trace.zip, or trace.gz come in as ?
+        // should this limit compressed files to the names supported below - json, trace, vmatrace?
+        
+        if ext == "gz" {
+            return true
+        }
+//        if ext == "zip" {
+//            return true
+//        }
+            
+        // clang build files use generic .json format
+        if ext == "json" {
             let filename = url.lastPathComponent
             
             // filter out some by name, so don't have to open files
@@ -660,11 +732,11 @@ struct kram_profileApp: App {
             return true
         }
         // profiling
-        if url.pathExtension == "trace" {
+        if ext == "trace" {
             return true
         }
         // memory
-        if url.pathExtension == "vmatrace" {
+        if ext == "vmatrace" {
             return true
         }
         return false
@@ -712,7 +784,10 @@ struct kram_profileApp: App {
         Font.custom("Inter Variable", size: 14)
         .monospaced()
     
-    func openFilesFromURLs(urls: [URL], mergeFiles : Bool = true) {
+    func openFilesFromURLs(urls: [URL]) {
+        // turning this off for now
+        let mergeFiles = false
+        
         if urls.count >= 1 {
             let filesNew = listFilesFromURLs(urls)
             
@@ -766,6 +841,12 @@ struct kram_profileApp: App {
         NSWorkspace.shared.activateFileViewerSelecting([url]);
     }
     
+    func isReloadEnabled(_ selection: String?) -> Bool {
+        guard let sel = selection else { return false }
+        let file = lookupFile(url:URL(string: sel)!)
+        return file.isReloadNeeded()
+    }
+    
     func openFile() {
         let panel = NSOpenPanel()
         panel.allowsMultipleSelection = true
@@ -833,10 +914,21 @@ A tool to help profile mem, perf, and builds.
     
     // DONE: have files ending in .vmatrace, .trace, and .json
     // TODO: archives in the zip file.
+    
     let fileTypes: [UTType] = [
-        // TODO: .zip
+        // single-file zip, not dealing with archives yet, but have C++ code to
+        // This is what macOS generates when doing "compress file".  But could be archive.
+        // These are 12x smaller often times.  Decompression lib only handles zlib.
+        // TODO: need zip archive util to extract the 1+ files, can still use libCompression to decompress
+        // .zip,
+       
+        // Perfetto can only open gzip and not zip yet
+        // These are 12x smaller often times
+        .gzip,
+              
+        // A mix of json or binary format files
         .json, // clang build files
-        UTType(filenameExtension:"trace", conformingTo:.data)!,
+        UTType(filenameExtension:"trace", conformingTo:.data)!, // conformingTo: .json didn't work
         UTType(filenameExtension:"vmatrace", conformingTo:.data)!,
     ]
        
@@ -900,6 +992,9 @@ A tool to help profile mem, perf, and builds.
                 }
                 .keyboardShortcut("O")
                 
+                // Really want to go to .h selected in flamegraph, but that would violate sandbox.
+                // This just goes to the trace json file somewhere in DerviceData which is less useful.
+                // For selected file can only put on clipboard.
                 Button("Go to File") {
                     if selection != nil {
                         openContainingFolder(selection!);
@@ -907,6 +1002,12 @@ A tool to help profile mem, perf, and builds.
                 }
                 .keyboardShortcut("G")
                 
+                Button("Reload File") {
+                    openFileSelection(myWebView)
+                }
+                .keyboardShortcut("R")
+                .disabled(!isReloadEnabled(selection))
+                
                 // TODO: make it easy to focus the editText in the Pefetto view
 //                Button("Find") {
 //                    if selection != nil {

From f2fb57c75ed0d4e70b9d1a51d195036e4dcdc2bc Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 29 Feb 2024 23:26:48 -0800
Subject: [PATCH 605/901] kram-profile - add new icon to this and kramv, fix
 reload file

---
 .../AppIcon.appiconset/Contents.json          |   9 +++++++++
 .../AppIcon.appiconset/Icon-1024.png          | Bin 0 -> 292809 bytes
 .../AppIcon.appiconset/Icon-128.png           | Bin 0 -> 9985 bytes
 .../AppIcon.appiconset/Icon-256 1.png         | Bin 0 -> 31768 bytes
 .../AppIcon.appiconset/Icon-256.png           | Bin 0 -> 31768 bytes
 .../AppIcon.appiconset/Icon-32 1.png          | Bin 0 -> 1328 bytes
 .../AppIcon.appiconset/Icon-32.png            | Bin 0 -> 1328 bytes
 .../AppIcon.appiconset/Icon-512 1.png         | Bin 0 -> 100666 bytes
 .../AppIcon.appiconset/Icon-512.png           | Bin 0 -> 100666 bytes
 .../AppIcon.appiconset/Icon-64.png            | Bin 0 -> 3387 bytes
 .../kram-profile/kram_profileApp.swift        |   9 +++++++++
 .../AppIcon.appiconset/Contents.json          |   8 ++++----
 .../AppIcon.appiconset/Icon-1024.png          | Bin 131 -> 337596 bytes
 .../AppIcon.appiconset/Icon-128.png           | Bin 129 -> 12126 bytes
 .../AppIcon.appiconset/Icon-256 1.png         | Bin 0 -> 37503 bytes
 .../AppIcon.appiconset/Icon-256.png           | Bin 130 -> 37503 bytes
 .../AppIcon.appiconset/Icon-257.png           |   3 ---
 .../AppIcon.appiconset/Icon-32 1.png          | Bin 0 -> 1498 bytes
 .../AppIcon.appiconset/Icon-32.png            | Bin 129 -> 1498 bytes
 .../AppIcon.appiconset/Icon-33.png            |   3 ---
 .../AppIcon.appiconset/Icon-512 1.png         | Bin 0 -> 117920 bytes
 .../AppIcon.appiconset/Icon-512.png           | Bin 130 -> 117920 bytes
 .../AppIcon.appiconset/Icon-513.png           |   3 ---
 .../AppIcon.appiconset/Icon-64.png            | Bin 129 -> 4054 bytes
 24 files changed, 22 insertions(+), 13 deletions(-)
 create mode 100644 kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-1024.png
 create mode 100644 kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-128.png
 create mode 100644 kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-256 1.png
 create mode 100644 kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-256.png
 create mode 100644 kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-32 1.png
 create mode 100644 kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-32.png
 create mode 100644 kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-512 1.png
 create mode 100644 kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-512.png
 create mode 100644 kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-64.png
 create mode 100644 kramv/Assets.xcassets/AppIcon.appiconset/Icon-256 1.png
 delete mode 100644 kramv/Assets.xcassets/AppIcon.appiconset/Icon-257.png
 create mode 100644 kramv/Assets.xcassets/AppIcon.appiconset/Icon-32 1.png
 delete mode 100644 kramv/Assets.xcassets/AppIcon.appiconset/Icon-33.png
 create mode 100644 kramv/Assets.xcassets/AppIcon.appiconset/Icon-512 1.png
 delete mode 100644 kramv/Assets.xcassets/AppIcon.appiconset/Icon-513.png

diff --git a/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Contents.json b/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Contents.json
index 3f00db43..7a43aac9 100644
--- a/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Contents.json
+++ b/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Contents.json
@@ -6,46 +6,55 @@
       "size" : "16x16"
     },
     {
+      "filename" : "Icon-32 1.png",
       "idiom" : "mac",
       "scale" : "2x",
       "size" : "16x16"
     },
     {
+      "filename" : "Icon-32.png",
       "idiom" : "mac",
       "scale" : "1x",
       "size" : "32x32"
     },
     {
+      "filename" : "Icon-64.png",
       "idiom" : "mac",
       "scale" : "2x",
       "size" : "32x32"
     },
     {
+      "filename" : "Icon-128.png",
       "idiom" : "mac",
       "scale" : "1x",
       "size" : "128x128"
     },
     {
+      "filename" : "Icon-256 1.png",
       "idiom" : "mac",
       "scale" : "2x",
       "size" : "128x128"
     },
     {
+      "filename" : "Icon-256.png",
       "idiom" : "mac",
       "scale" : "1x",
       "size" : "256x256"
     },
     {
+      "filename" : "Icon-512 1.png",
       "idiom" : "mac",
       "scale" : "2x",
       "size" : "256x256"
     },
     {
+      "filename" : "Icon-512.png",
       "idiom" : "mac",
       "scale" : "1x",
       "size" : "512x512"
     },
     {
+      "filename" : "Icon-1024.png",
       "idiom" : "mac",
       "scale" : "2x",
       "size" : "512x512"
diff --git a/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-1024.png b/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-1024.png
new file mode 100644
index 0000000000000000000000000000000000000000..920a6bed83e3386c0017fa7bca0ffa9ac4d5da37
GIT binary patch
literal 292809
zcmeFY_dA<^^gkY>w6!`YYFAN|*wjjm(%M^XYVTUL#VBf(nnj6Gd)A%_ReP_hQZvM=
z5F<h)pY-*9e}Dh}0q<N_azB&nzOLN&d7krlobx#6j?>XnrMms_HUI#idi7FS4*(z~
zEJ*=3$p{B93x5>haO>?$6CVJ8;{Lxk5g_La1K~#^A3aq?K<xzc?!WUL6*Lt9fN#kZ
zSeqLFQh&==$_fSnL<iSYZ6$+-tRHHBBfcE|)UGmX$cZVf@sRoc!^n+<n6km%E^$0L
z;?4&<rlmj;v{vM|SBgWw1}5$-4%^y}E#D!LXgPU5mIjb83MUn^8|wCFUSf2}XN+Ex
zr4)@Dm=%e?_agplra;yFAIF7|DyXTUYfYsmGV~JtWfsYwx3X$%HD756s`^<8enhy)
z|9kzP1^&+h|7U^!KNhfdz4;FZs}mV)T_lxmVl^<&Mnt#X8G?aqigzAHn-6{b?O95{
z?1{63_=K#$lA})j>FUm{oZ`T@>Nd`#`>q6b54YlCFQ&z$F%Jy$C1l~U><Ct$mOaWZ
zW5)Dg;K$MsX|KJetB%6W+k0lmkX=k$(IkkzbZWwx>-x({p)7D%6{6CYJa@OE?wQAo
z-zXxkh|%KVY3I7d{NGa6^0uIc`S<gIUpM%=bTwb<c#$5LrMkk$5cF{H)ex+UgmCS%
zJ^yZWi5e<eO4ZM`o_V?3Uy`J<G{~FxWo_4AT6H-4L*%~s)5O&)zr3sxu^^VD3eN;-
zKd8R+metAPW+(keH&tH|_dII71@MmlXIG^5^y^Ge{zpXN;C^KY-vxC&@@=%%a*5#C
zMm8TXk97~;XOLH-weqwo)<%KHIMyV+)eIw_VVUQcu;-l&sSp!P&Eu6O#UNvf)%o$e
zs$H%kB0>uM-}hry{8^IguJRoRWyqca0Bvrc?*afl_QXueZZ|QYtryZhSpL=BJY)z$
zqdUuU5u9w9(k4~??tYXmmXAbJyInlQwNHO#^{m!^+@K>Kbe-j#3GxKcU~hQdr18yN
zG{wxTG$-&J_uGV;FYaWa*>M-zB`=aAcTgf8`JOB^ov&E+%nnf&G|(NPepuUX2s#Ab
zC27BPqPZM|;SHSH7xKm?NXO@4#l;k1j_epZUq_7$lPTGKkp;gWh)zM<exK8jst%Ly
z+FJ=)u=A?@vRWTSn6a^MJ|?KB&*^3Ujw8g9qsGeDBGq?C`cT?V#q!K2nQaK)k^weZ
zoZ3k`Slzin*XmCr=Z|)^8|pXCe=#t;$LotD`nP|-R&-)g!Vs$NPd<qj_>!;SwfyEf
z!C$4(Sbi4FXPGn1l?Dy<Q6t{R#({5atUc>!ql|uKuxvBwU60N7ERT)2g|_NN>M8o-
zWLF<^7i%3?x;hHVOXge2o&Z8T#z%`9=8i99PcfxCpRxNT^CtzHN0{1G7MQ42t1o5;
zl*iT~>)Y|Z6k&L>Guoyf)5yCL0^L2*_g*{c---jyE*e~;LUyocXUYo-XqkPu>+5vx
zArKs#I|Tlmf*06r{6(!t%Tje+9#Z<E=@tNB{i2{35YA6KJ>fJmrtKTn;jZj?t}$7(
z5{mw#8|EkFH5*1w!pHY;Ox1i%a{AM%y)_sETn<F!`A*8Af2>OmAq85O@AcDcCgXOJ
zFPLs=6K`+Ub{_Kam;Ru+a(swdN-%?|p9=%^alx`Doj=xh`Os^a!{cvH>YV0orK<8K
zA;%hhrIDD<_i4UPD=eBMdDOfKmO&v5<H?isR<+-=esG-rs@q;Hv%&^}QMH@X>#~#3
zr@p3s%pJ8D)B@)5lyfoH)K<wRS{ArDkBFk)+??_)8_`{`;gxw&eALk6$p>B^9V{ND
zM$uqkcTjcL7h(<2)cJcw$R^fN<X5}W9o14!T$9#(P*dx@@B4=2;E&N$l)WB)`?VfK
zfVC08S2jQ``xDNBrxkN|lwL53$cNaLU)Wa@cfU7a^3#v04qH$G!O)2!!ZA&{L)J>4
zrh^w)fJh#ui~%d;QhM%3_1On#nZq{D5L6l@@wbKTQJ?cm=irO=*uFxg*f)dPg`8d*
zB_;GmTYhS6gQ8QpJ~(IG<1kILjh`XA*tsr^<L%k)lj*W0Fmn1t_BKnNVBP}@s|WMs
zdDN2$bv_9Nn^TyD;zpP5IiyndxR_?LW%tfjaa%B}T#aS6leFnO(hhS>Hn{ugDO??^
zH5BP<L|;mUT1AmzKs=b_pdLl(Q)um#0?D7ng{ZqT7{_R=mSlYmxDlcPRBe{CQ?n8+
z`u(T~GA}7Qv_?+IN`K|`5TfAV(cJL+cb*lO9?Q&;V@_=oZDT@ok`c%@(NoG#qANu-
zRgP|5!rPR(65DN?&rzdfqE|M3hKvLKY)T*sDf7eUwDq7~dgxK~TVywSz|6B-{9qLZ
zv&$)+#c-b%m2zQ4G|z#s{L=R}(tSP#FkZo9piQ(#a!hituH{Y7FP7I|Ks>Cz`8y(0
z&TIi+9YI%%K7urwoLAbuLC%l@$BX@A<bLGQC}x#kvJfb{BQ$npuz0hkLb|Z;bfh;6
z<Tu5yt~oe3&Cl~H3Sl;T&>k2dFO|rLPKQAcN<QugVfis#>~3+%Te73x>wQ@n{3m*4
zzTI;;Y@?R#JAToU6g<BkE&;kzI7kmux5%(M>!uvL85G5om+``0XMjfjf{t4*bSd~S
zBmrqI)CL^lzZYrtCzgl^@XP)`>#+}|Vhd{J^<qQB$updzCsVl4Uc(B`VVRZ=OT7MT
zXCfLSe}^JX$_kRWk*bvD?&k5mGHtJZlMYYO!jT6cxqYj@gZ@Vi5jo4+dD3*}f0yV*
z1%%l(Uu!4yuE&JLE6JA}VJ8liq!Aic(8k>JK3M((P$QeB_6+yb@g8!;;^GcS{-ZfG
z2AG$ns6T6#PED2fQOh#NU$YOvb;W}1w}f6aUHVqGG3d_I6b}1!!ikdn_%1Kk0@IIH
zED(9M;O?N3lOHFwVfwQ29@~pqv4rS_`)2s=%g%P~b)lTS&dT7~qv{qDTr?H>Z13wT
zf{I6#X16`11&Fi!<ACe-YB(NLFI*IkANd3>%!9>RcHfmNoGm3+3h7`Jv7Tk@5<vk;
z76RYO427Svcvzd&q(6}sgOy#J_DFBlaJc|D_UOnXd9Q)*BE!&T*X67MyRDRDCKUvl
zngD#I1GMl;s~;0@k$g#rS^_@tcowqz<;p&$Qol|H(_y>%G?tk%<z5|w;9|E>xns?o
zo1U#yoLQZc;A;O(y1x8RtFNS`EBo}>Rl1knrC?oTuTN)fkSpD?or;pHAwF$L=vC2q
z-|Dc<QD4|-BQ2eyaHl9$X$DMnJ^7Ef`EKsiQB$;4oY^vModPTYl=qRfuG|0CpqRI*
z+GW4y%-0!JoblR5d=7oTZ6T+qS-xszjCbMgh1+*EOEgLo>KJrsfkX><sdc5Q<W1IM
z)uC<PPn&|*L&5^U&=3nK#y2PAr=J^NnAJP|t-U>rovd_n>31}0x=eooY}_LM@$*)5
zMU?a(Y3f_Wfm2&tDCB-MIUI}_hQT$dv%=0g7ubIP*{A%w?JN>RB1$P2P+oXe5QfHK
z+1uYOAj7JnRtFwUB7GfYPH}T;&rJe#M)V<?>`ON<AP3y<Zritt1l$DloD&hMQR`d|
z|D*9gYYnS9e-|l`2C=u#GD&Zr5~&5jiX=!6hei=6p8mWQPpi_m@+to)EQo4js~l|Z
zg_QmA?~sWJygVaom0+06m&^TLU00`y3&UO<zuow8OG18DGHk*FD-b<BN$h1S{j63w
z>rcpQudFt<fVGJ7>u;S?cO|dgk02vDJEs?GtHr9xWQF6WG1$CYe=Ma%{t5zdOtTJX
zR;pL`@h;#?xk0v=`suQ7<Sa0R6(Xx`zYBfmq5PMR>dRRfx<EmQYJ8oD4*M?ao(Ug)
zo`mMoV1~2LpuJ$1=2pLf#T`pB$-LShO9m@X<7a0VoEN&PI#OopP?_*<<B!B~efaY|
ztGbD^y#n;=aM^HIt{P;U1qD0VS{=5WrZRCHdam@v<s?rlr8i}V>~p#39VarO^h4Y0
zXY`@NDV`$Mg?~<gG>@EEs*Pzr{SKO0a+x+Uw4PAXF+0o2AVD5{nJk9Bki2Ena>ve^
zfVnM<LWIw(TZe11&!pP2ljhhkCYd{1p8}8RgUEw!!Oc8x$txjc{t5)n=LLJVIZ0sH
z<YYKi(jeour_!8qAMRfCBCjum|AzG$l8**|u3<CJ9jD$w?l*aj&EEDpy1!%1ZentG
z^j?BV0ZGJDs^mgn>WL2zE=}Tve%#$%JN1|L_j$FD|I5p&9~nK+UjP(GD9%_!Pf~0-
zW*pIXnhP3+AQaJW(ve%=jb_39BY13>G!5l%;pk*pd97=?{ZHFGh)-I2HUgd{o0)Pm
zn^Aq6Q4kL7)6U7hFA_+|Y}b@j@Ojhv$>}0654({)n>>1TC4cqvs&PV%<ne|eiEvoG
z36+Ykag5OE-kq3gm+3I9MD9xFg9uQ9GTF@_Gpd#LwSvBt_fc*)-nNovC`3&3_AxL6
z!bbs=1X$pTkuUdTTyd_W<YU46|E~J>Ag_V>v>JdvKWE;zPYiP2oD&I%IUGVeq~i8-
zSHspuYh`@bekaM1qpOjiddJXB1}43Iuh5<Mv2#TE(+3Qq(z#T5o`(#R&un*Ii$8Q6
z9HNQ)lF%+>|K7!48@#s^Ei?rrlZAQpdv2&Sb<Ng4nW(uiG03d)wam29T@2L7K=XA4
zqA3iaRYwg^Wv73RIOxh&`Lg@1cyJ*<WR9}J!LCgl_$6iBenAYS&ST>oaQ=AslU3Ug
zteaG@_uF9X>0(*_YYNxn=`vOrK4K8pcOQo0L4q}mbn1;wYI<N;`R3**Fi(rHvOl*X
z^67^iW1{m3wzd+Vyo%=#UUrGZwktC#4Wl>D0RX{UKW_oT6K+2%pG*qz;R%y?@;u-4
zi9_5MC?UHAipOT8p-grgnD8U4^3)*T%kkY`*$$TzhZyz}<ut0uG#>3zuCVqKB)XJ)
zb7PnXLK&33EdxD7uKdWuR6&o|G)dIS7y2%RY@zQG9UC2ijM7Jal-Pof<Pt^?J<;W=
z3FQ~#XYcWbNxv=IESD3U&;gvLe**Hob1zr=jHK=LCr6<*iotuKl_zrv!80#Q!+263
zOntqv&UrSPU(N=8-QzeJde=zYBCEtFLt2|+In+{Xc*G!Ipwet~gU%n>QhkthtY&sZ
zj%f|YE+N#p@qK_6Qbfex#AYGaNRJ=gyXd`U*BK;EZejKt$z%~d8~E1XwNyW5m?1{t
zeeatOz<9Lh%r;_ib&)==9b(Kir1oC&dfNice>lq??gn1-UkQ@j0qb2_kzJ1}18D9X
zDM!kKRXwTTRxQt@%q<O_CYVx~$D&BMBz+K|MX8RHIwL6+@cAdu1p^?QB8sp9<~lpn
zF!`_A=OY=-xLNX(?Ct<X<JzH_*nQzs-eGKQ)>h~BxdZ6>xJxqs*k|>Ge~`u^bW?MG
zKltPE3`U4Su<LQy&KMh0R+q8TfHhrlIRad@-V$#}bzs?+(7V9L0#SeAsH@6d%ixi%
zAUFT&5useM;L5kRnc%TpU4Lx4ynePm$#dw_Um5tW*83I)<lm8kuOULgZj2$d`Kiga
zx&z<g78$mAChyVEx>1wdu#&>ihyA_6CH=(jb=_cmEEMZ;KJn<auQi7!&y+`4bhzbR
zb|i26golVp1fgtdD<D+wJ%I|45#ZG5l|3LF(!G%ug}qp0n;Nt`MTLTIWF+fD;-8YQ
zZG~Pmwk{t-NvwQ!+9fTT5j_-+ebu3RYm*MZmw8tO(rEhcI*aiU6-iY-TQ>ZSc4orm
zMrWOd#UcE-*1fP=T+7Hp`_#-~g7hu3VM#WR`5<2o8M4d3c`e#$m%La*OaV1eKMeQ|
zK}o0wVzJt>cg^2n*vN0a`GEGEkObjRvAdH4n8iVSaxCuJ5r1_bbloPutV=Ct(5m=&
zD4wV}w7`68RA*9`O>C!*+fIXuxNSJiyBko;Od5Vd(n3I*K*eOE{VFfGi8Qw1i<QGQ
zdH>Kpz@Eke#i@R=<x;SoJZB@nWp}-T3&vgfsNzw*o)__T!MOZ-Wb`_h>)_8At+2MF
zkGf-d*qVnw5O1ExbHMw~DQWRb#jTVH(yZU^uU5vGC9eu8_{8(vQewf6Iz9bfY2P+f
zfIJrO=NBkwq{BVfB69C_*IjWh2?Kr?G1-*^Cb$|FgZaI8W#oTP7D=@{oD_METdKbo
za}%{%Oo4$VqChea9DEZI^Nx+;zd!$dRl9mL=K0mLZ8v{#Q+*rq=gTCB$#ba-KewGF
zAB>y^pDZS?pc{Go#sd9p+PlMzIkuUiQXyS$guO1>Cqo6{j_xOq!OOjDZpC&q$CZP<
z4)N21L@&eUZ>x<U?a=@-Qh>E<_zytOgEhyi=9y{Zv)!$0N^pFw{1<KHp9P*nisig9
z1(mJ9MVzs@Km7cx4H50y_;%K<B~0dc7bT|@Q1#jYtI=MAa?e1T^|w!aI7<u;<d&`z
zdprMOloDD4-^alHCy_-HlLns+p%&#0AbMPjX`S!0E5wVu^pk$0kqdKF!VWed2AfO(
z1)-{tC~VyWq*gi@%4H)@P5C8WJM{8kkw?4hFdvDjw&E?QRXw0e5+kP+xkyoX)*}UB
zvi4w1=_9#MB)<MAiU>sw=y3^e0`%M_z>J^6<*ZPX<qA@JXTkVrOBZ!2D2naU2CdRR
zHQp%+y~G{^<&gs9kl?j;;2-+hj!^f|&+}!A6m5;3iDSyqEe^}iYzte_6I0!HhMFF(
zWL;S8g$c&|ZMazWDTu@F_Jdydn0y&^s(C4y!3AE9_kzuLS#EpsQU6xFIQd%;PzSYj
z81dg&GBB@_9hI3ziTusDitLKT`mb={H+hD@tNifQOCD(uW-^M{adT1oL?RM|O9o>r
z{KHV=$(>Q;X~r#{cAep5R_f*4ibcOP8VsLP-=}3tASc86MR_DV@5v?t)H6f?06UK+
z&sFDpwGX|clk^l7YT;@1`YiaaELi)yWEs3Sn-Cs)b^|H%&hGOc$n<4vb#(I*x#wV_
zLhF$A<KH>!4MVGZNab;fo3_fdQC97BE;=2a6wVsaCH>fkrgBkEbeyKMzi#_dbX9t_
z&3l@y+f%4q&oq1%QS27h5VN3{o<tasKRB`)`OFODpAzhPs<v-H-e6ou|L`gWJcNH1
zvUD5xZ*e09Ki-B}lu8B%1f86g;ZL$~vt=tMb>M>&r9rTu#7AbK&pfhl9fd;kiMwl7
z?O`c}wJeEyPs6>d!Z$fDZ!rM?eC%|9g0=I{jGPLGH5qrEVvrZw<0#Run{`>IqOW;z
z{hOZ8E*zn#OyuH+V5@p)3r;TT<lFbTM<rqMZ44@#_j0NjJpY({sE~c#|GF@)J|W<M
z@fx8+z0!6t!CsphtFMwJaZ}vWOt^u*8%?RBU8Pz)3OsYHl18TaS?jLUU0y5B1#27M
z$vT3Ti^8u3Q9c|13}pK*0mH25Qt-Rz+wx9F`i_o{Vf$!291Xgjg@o*Hc3C_pBSjfS
z(3(!r4DoZ$Hda%vIf6eFKqX98#ZrhD=yK?#(E#f~&N~1=WI<LM7kTi#8fYyrG;0Fp
zi~Tc$n_15)xi+vD3^|yG`Cmch7P@@pYFAku8ng0GhH#mB0TDZ2)XnKfj(*?H<5e<p
zFxz2vs@ZY$eztYnhQ7lYZNF+<SOU9WV`-@`>a9F0i}B=R(L^{^k>N4$>q;T4;Lg+q
z%A}c@179^Aja9;N;1L7(=mH->uZpIADg4|bCVMq#V^>xFy7lmS8e}Kwx8GxKB{O73
z1}A&G!q*dKp9@&B=v(VqrXn7)I+!PNqXLBAB{=32QM~@4T03d`goBQ3$t<YgS&w$+
z`AH-53VMY;6kcLn^B13QGk0|cr#bLZ>4A&PSNLBKQ@)w{o}Gp2E~_7I7r%QJfcl}x
zG%(oMv*Si)XZO?fJN~g}+A3`_zn@@8&hk~N7{jP;rTOIRg|EQRUJUMEs?v{X^8`ug
zbxV9_+EWW*<@ejvz6ie?d4(pHKE6|9sT#+QPr+w`G#?VSZM6mkF(wb$J-81$-NcVt
z;nJD8pRfg@-h=I{!YPS|egYOfvg@=Sj7)28_(ng7BI?OVO!#F=($g|fUb!cae@yYM
z8=RS;{@Z5&cA#ptn1wzqo8DcQw6NvO3$t7q0OS88fRE2Z_ZPyD1HT9M^~zvBw6qwt
zOzs)T8XCKX9A&NbY5wSK{VR4wRYv({hD>=<WM^y7J4aemJ-4v7Q1ossqIEsWyC`$&
z>AKOH7o9x!G;iZ}uB231fJINAVm@tX&W24qurlL^Np5jn{g<par+Y>5;#j^N#_BXo
zMZRIITY9J|F;A?fZ0%TSqV*6+0ml+bkRl<q<`}k(b};L{HNca`+J8~xnK(m_4hgZY
zo5V9FYYo+J$2s=~o&(-;y-6^o0=yHP%loGA_b-hp$Avz2Ymu;O?*n>lEggrr;5%XK
z2+OzQ`AkrM+m_?)h>PC(=N0HAUA;yNeU$_wEK2-g)aXMp<)3SeyRj7}PwIxx3?inc
zM5*8!thSUfKl==1w@hTviIrN_nwIaJv`dXq32GgYa9wKPE?+Q3zmUl6llw8Ld}+tc
zt5}KSY)^wx{jEALPgIBTd3<49SBsyk7&F3aWiG4scvlZjWyCabb*xgYxxd5e{UFg^
zchBpf-*9)at91)rT;DMLvqsODQZX&F)GcJ?InMpxe=Fe#UXh_agg?x}W3F>gE-J1d
z7t6t6do(LScoG`fI~GIjr6L+b>7E!q1rx?Q*M5<YvAo<r<R1gt6ooheZILOC*o2u>
zBP*5Ivz4&J(-X+p#fct%-Sge3Kr5-`9z+|ZZG7%Eh-YcVk5Pe<EmoJGAmHIT91S^|
z1!wiskqPWGB|@$`^X*}NI@PD5Sz9m7ClZ5Sif|81bGbj@ts@`wiS@f5P`K*6&+lcf
zEVQG1;QrwUR)D9ES3ifbL!rMaOldf7PvEuB!f$Sg>FM_qs|~^oua<82;s#!JJLkJh
z?SlXEPAimTCt5sE&kV@n5{Ra-U&-pKbrx7nv@w1IMEV(SzWi1y{WGW-YhXJ=7wBYO
z*{Bwsnym*@^Gd0*rOiv=^Ut6$XlpsEOovoDs)UIM$YtNR)r2v?-~k&APYojJa@sA6
zH2KWFx)rOLO>MipJ(Dn4k&^%CbR779B=8dsVm7lf3+`*I!&icEJuG-Q_=2YnGspC6
zV1vfZlM3t_^*7@{7`GY6X!&WFW6>c^AcwLA@uS7oESV=r$}>>CtV&5MlE@lOoWb(z
zZ@J4mr{(tbto=7|KG3saD+}Q?;(Fpwi(!jPK=RA2G;G?4S}dl<;;NfzpQ(;%g~nx*
zlU7vZL6nVmsRKMUhmmL+@|AM#`8Z`p;<hVVLbP&GZ{x0Mkc#@Ry{TDelvsvy;fim_
zS>V^IKP0=1ZEpfTO>(<GN+vVP$>zhFYCQ(4>FDhW4A-1$gzbg6w0(QF)b$Xln-S`F
z|LMo2Tw9w*vf}Ob`^umjy~o!3xy2bu0l2Q*ImY3|7HoM4=V0ttPmLn2U?;5qkFQp2
zXBw7eI5l}p2YXEC1nAhf4}J3SCIO{*{>Om6HgM3?X08{`3=P7yqp@JzUm2~_mYb9k
z<_9bcL)J{%WW;t`jw^f4BK+EvcL4_u)%O7hdOO$H^fNt=o!m@0|4Sv-XIhQdvN>a%
zqZsKN-vY?NJ_2-jVbzG}z`uL&(?ffN&*(-CRmr_)lVr;7r^>g#=jgrrT~C3@eJ`Na
z5Of~rv0U^0<G_bkNZpMJj`#>hvp#jEx!D2zL52?_;YBV&bhe-H$*EA6d%1H@Yzu#D
z%s;QN*GVf%Dw7@-ZT_)<5+Q5JBPsg%xfY|BP&oB5f_ab&=o{d&L#j`&X81CK#AH@9
z$2-kD&Hl4O#`}v~NxICVoe~CHRU+RFaLuy&eG?<q9K0miF5J5v_9#v@=Zg^Cz;Y={
zuIg<Q#!@btbd8RK`LiEC=!fUtJ<=0HWyzjA6}e3)G5*n73b=@Z4^K;7JAFVkDSu(;
zd4b8o?v-ifg<fXi_R1_zMpctpxA<?QNm^#}N-%j+Fp!Zr(15USFEb~7)s~14JN9d{
zDQ`NN37_!rlL*}b9GDh3kaUuH&=?PSyjVX<1mQ>AU|)77*Gh00F+8o&`GrpzL^UgL
zm=U6Nl_|60f0UaXHp_HAwIWILKACG$;cpbhiqJiWNZ}8s2Wi6G4L=j7`QFvN=ZY8*
zqB8un&q&S|=F-A0rv8);sq7wrUiwjd5*;a8dl{lzejL2{<--@RyKWBU*i+}zovdug
zR{4OQrmC%Xc2mEW?9XW?v9BA&Eq#jaxLJeO4J;WF_)<r&^7~rfX2*h@@r0Izu%ESp
z1y9ki_kO6T?7eJhM@kLTW@_|4!l2{7a0Lm#sR-@|dyN9gu7dZ*^pUvfN$eW%_!&`D
z&4BSOl+Bp$T2DlK^$PO=)Yk!^eKZsiLCHw^7R<#bcju0m7N9=i&P~8Oa<~vouk0)$
zY`K6Y@7i}enwh#UrSIAf`I_L3S66}Apvlw5KD68k&;mTD>E9I?>b^N1wegAvqs{n)
z%9iDY$+FUzsAedM=O>P)4=wLC2cDOUGTzg+ReokKfHPX}8<ZQ3eZj~&#P8ccHl6BQ
z{U+t<X0zjwSWGTtc;`YG`z|0?grRv|#`(G4YWdH+ynK$*KhdSxhDA397D=_^9AbgC
zuRA903T&)qv~CAxW_qEiY}g>R+S1w&wl?jcf>ma`-i}8dT_$7%=42z7Bo=E$pik^F
zAvw}VV!lmw9kN|TgxlPUH6~VOrDTxC`{W-my7dg5Lnz6Po&)fe{18k}PvFpvkM2Z0
z9waWJ^~HDR3k8CJSx+e2N3>JwV%G;nQe>aRC`V<G_+Rz|k;h<!f9Tp8MJ>2D`M_i;
zR~k1yguQp8n=)vifZ^Gqkag^ua+9NZkiTvDkFd5^VO_e-ksQq#i#hgPkWR=?YpeJ9
zFb5U;2rD+&IH!7<c#Cbr)3dSk=S(}hp$Uz)`q*3Uw(sp#cx3dxN((A;gUh6qa`KZq
z3-|ZRG9zkXH^^*H_GNBIJea7&AZ9S1cK{C_5&-<)-Ge`dW34DL5Ck1j9gv?GUJYnh
zNg*c^qhcjZ5iOWA(a|l%H_L8|*$4rC9Y3ccd-wF~$t0DR{1i})WPD<IF>Am)`E*1c
zoPU<JWs}GQ329i4Kc<JEGd%{u-YQPR8lMV;#gx4?8LjR&C4MSOd{%@Tx^Bp>EE;>W
znj)xDll$W2^OsOFYpLot%7L67d|{>$gZ3f1K<EuoZhf(`FnUyD!C~9W#OAKCo9Z8C
zW6~wgO)Bkx4wI9#u!QBH)gYD_VOG;=a|;b{g_WsLqm=GB;m*LwZq@rJ?Sxr!0^H*J
z)9}6ij5kw=04oF8Y2R7GQb|Y%Eiy}v%&@b<aBnBE+4NVXvVc1xc!a41(OLsB383fW
z`$H_kUvvq`bCm!#*_+fEy`E(Sdm*-PW!jMZ>x0&igK^^;tFw;^z0B#Jgi1sXTm&EH
zMg2G-zR}?(5yO3Ptj<f%XJjvS>Hy1Rd}A2_)SpRkNQMX61Rz>fgXtKgdnZ!ono<NB
z1*WXHR<_Mv%bPW3e0+b~z#%nryl6*uQO(pnu%$6TS?tenk-(VPwDO?Ki!dG;=5Y=9
zY4z_JsehR--G)2#A03loU{WYHBz|VQ0*(DuW{FM?WDm!~e*)w=!$WtJNbG~d-)Ghk
zQD)nvE`2q?A{L|w!v_E|KsXEe9}Io;cYXlsrF+T*?CDw7Vpy0i`S-7@n}f+^p|}%_
zY!{jfzjn@azH+qAF(fRl`ndn`C_1Q-b0pxEbKwnjS{?`Ektf$il)X2^fMl8d_l<;V
zJIS6;nBhZbjvfiS=_&IfUov96h+si)dGF^neNe>te<I>tZ+-IvO}UqEbFi8FHLI}z
z8he;%?rOL2ws-tNvn=pXBS5L^Ra#Djd5K3raYp8to`uG9oN)sAfAOk>qsMelp+Ioo
zXHRv=h%aPl$lmgRSD~T8G^CIH8A+{W^o&S{I3i{+lU=Ir3B)~GL@xaGwkgII*}8U&
zaTz4|ueOs(=TpbPvtWk#EyU>{C(Jv84IDh_4#t;|pPS<)7rq~yM>mSGL^qD0MtC3o
zK~m5nqK#G(DANU|3%Nwiu;R7b;HoC$9Lz92pst%S&fZI#(Y0GDIFDje%N_jY?sYAA
zFgR~`7}nh+urox<`oXE{tVdONU=|3gSd`Vn3gm8?q$b{TFkGAq5x645SP4-6helRO
z&?5?rRo;2~5dO<Hq#J)c*?n1tTdP5LMuayBgnuF@dEZ8P^Yc6)$TfUB?t`E?2T{+9
zABh0#;9}nm`%(zxEER8wu=;*HhVP>Rjh)E~pb?B<`DKE90LGbsLXM*Rmrp)1oy%`R
zd=TKAh@b2;jrVsw`z2s3CAH3Jz8K`6?D88Sf|0C+=L6{yE^v@$aQx+*o=RNb{*E(=
ztFZfcuq&nDhLFyr)@wVgt6q)=-!4Pe_Ni0ILICaNKytFu4ikFnl4oyff<p}}Hm&=~
zg;p_6X{77uZN7xY*XgyKBF;lxD=D|Q4EMyuln932&0^lB)NUd+mzZFJI!pu>NDhb5
z4B@x3@QmY@N7Yf1_+xVDR(^8Vf7-|1Tm>Wkn`#ROhJB|?PppP@{Qj#rsU=8|E3eY3
z1efy!Y?Oi&Ajqks5aFJ%9R6_@XGbML2x4%^FE%jp@5uts)hH`|0kmJo5;FbGGwfvL
ztlN{doGnGj;XR(2Fh2BY+lQ%S!OK(bX`b0oTy~~2(risvbc^AD(LjB3@y(;MjUzjw
zSNfkn58rk%TK_U?&8MYVrxd~kb&f2HqoJ3k8r?x}+;2S`TYLOxm+ObwCJhSqO9sB$
zsf05oG!C2FT3H_uRKn?m;)<wGTKPGU%N{EX+;v4RcYKD&^6uMl@r86V;sEg;-1XJ$
zB0!d12&-a3#qOweECFD=IkiNxvk^}V_(D%;Ub*q_=+3U7__8qwf7Ya}nAe$Cgfs>H
z_PC;U`FMe|S@vX-1kkmNzwDL_u@k!Ek;D87`E=2|wCRBYQ#{4>GZMQ+OPFC6zu*aI
zg7%X$mC<}_6@yEHlZ9Kc&)o+Ctu<i}j-ff9f=m)SDJWehhc$FzmcY%x(Sfsjkt;`*
z6}H=ZSG3?1{5(PSQ^Ey@bvCi%LkZv2Zwui2zz{;Bk%ols+pb=!tA1oIBxegrS=<mH
zI_drR*d~qU7Tk)Jbj^nde$HE1QCIj5!R$sZOa>sx_w`8y4<{U8uoYtVSnRMoekBnE
zJYx&{K3%N3>uz-gwH>6E-4AW7KbPYzdNE6*kK|tqN$ggSqv&-6QHcI=)2X&QC}GUy
z617A~H8#+Fb_&LN7RtMcgOiRC-z_Vp{lZx2gh?kS3p(w{I0#O++CW6e1Aym^=}+Nc
zo>BZFW*ZoKcnrqp!*Fr1w|F6Ev8)p!hG6tzlD9j1-OV-ElC2=;c8{R?SAoLMZx`26
zipAa_?s-S#!dahp=j*2hbd8=1em@mFa`w*YgQM&wNDL~wG@0~v4|}2kh9bo&i`ygW
z&X!tR(4N!=)KgsFqlHQb5bw6P-mHaWfh2~=dfHL-L+)kPpEzK2ODH{7N<R+a;iU(A
z1DLTc3$5q4TmK;;#PKyDOj7Ux_=-mD(6veM=|`F)cED2VzqUG@2yo1?M0}A=4)|d~
zeQS=Kv=U2e_Rdklsi&5N0RHY#FJeoV{T3`l^N$f+_=W`#<iIKu3&OzDc6HRB{lR;7
z9UX2?$_I?bE;ur^TDfsG(7o`JkFS-(ZhxQr%#hQ4v&$QzcWx$pI@X{rlTe>NND<B(
z#Gf(xj6o*r;k9(@p2*|@lDO2Td24~b=flT#4+Wet8Qh8x2OM~_4@l-~MO<qkb}#^s
z4@KcGsi6YlR{uKELnK7rX5lc26!1w4AiG@csP)F08^F5LIuEcWq9BcxeN>r0D@lmx
zlr0?PA$HUoe0V`aC4{`vR=<v&Hr-IdT`XO}wv5BzmCaQvy^Xl4gIQT*(`RwF1X%}I
zl8=o3oc{Y2>y9<Gx6H+*4ctJj0P|D%trgLMBCPTjZPr5?M8+ZYU#VTc=+7<f$CEoH
zw)3)L1J+2}*K3)O?g+z?0KSHL#TANak3bbAaukd?1)pzBNT|Ti*zx=qOU@{^@4`@l
zwQJyY2KtC2qASDqv$+~1KkU+Z#7uKE6CnTi?S<`?=yqQPCWKF3LukjH=$^&OcF-{&
z7vL#TEfJ|8zjrK5Y_L5~Y+MvrHDmk@&Tl#cB#fvWDs7I9;`+>>=N5xu>t^6<h*fjg
znf5Lhel#6CrK=^-DrS_Vbf3F&wLI>r!dRC|C%S3VbH@Efr?bdsiLN(ItV3I~MyT|1
z&$$e}lAoc0w4-u<Bi>b4N0I8U+NK`hON5AufU}7O&^nhPfy?crD!0|@FFy*{cpjb4
zdq;1CV}aI$Cf1!4a1I$9{D=|`ZXLoyZ657%0`W=sYl6qf4U#kYNc_ra41pRbSSIm#
z?7npwIFgw)Hau|&5Ak~}jqkA^yz$cxQ2!7f57P6~p&>h(4{IQ32bxK)`Nuqq`*jc3
z#ZvqxT|wE7*SkIC?lUPT_f(wpZY$=$`DMyqb1P^A6G{htAlzba+7u2ljngXF>IewR
z?ANe?55DNT8l9mv`}pSda}jA!1|%cEV}sHH&spP~xQb{Ra;a^Um#6t1z>gG$O-egw
zZ=*wU@Lnx1GwfT(s-rE{-R9e}E^@1R%u=X^@M^w<91F}ON!V6}A@F?!br!yC@=MSp
zO~^qBazgS(z0gTZIDE4wf@Ev2r&nvMikNe$-di->?FK{I>&<k!dWJgyLGgNGKpS&P
zrufiMxesuS3m<~6e2Qj!xRlL&I@pL)Qj)#OHom^L#UGsE_4Cizr11lXAL8GNXAUfs
zzhQ<YQ8j&WkToV7O;6RC6i$WkH-t6L+_oedr0y14KXf85dENQ?hriRV#(Rs9CsUt6
z-()B@($Dx}lF_qM^D%8lY{QxQ*P8ZkP6H(Zo}6cZ4;UDlM~wU>m1ex1N9!%lrGI8m
zYdYC6J<^!VLrq}hj4dOBkjE_ZW2%#F!)K5O9I3CWN*$dBJ(#C&bDJzyBSTp$(zDN&
zFwxk~fU@O(NvN9k-nAGz{v*c~B?j4n<i@OaenYajxk9&GSA*RG@{Z4D2xYn2^^+ED
zdq2KKGENRQH<o1+0LIG%3h0R@cVT(^s*vV#wzzOv3S|in%gV-2h93HajDV2eZnvL6
zpgsr)e)}ZwVs{C*j%NMkf21_`<p{}iY#CJxWwd3NTAg^y%Q?6B>!y4cDTOk}`R1(6
zlKG=5xT$UB^`ZPHNfGXgGQ%)AmKWMibak6bw^^EJlBqn4_v5#0172rlO}7W<$P2O!
zKFU!}3!ENxJ@%A8o0%<sK!38J8H)nOXne36spMx8_^TJQt1!(QCgY`rlFHcrC7)YT
z*BzuSCcrd@(IUk_)(A8bXM0!kQ6%=h0s)`FTO)X(U+0*z3Hh@_7zzYE&szBTb0c3a
zcxGoQ?mo{<M))NG3YggMzeIL%g&UJ3n#`K}$#G2q-u6b&5#}qRSTX5j$@q)+;9V?&
z5zo$Z-RRG1xTS9#IE+7%+zfra>VE)(;t<G-iz{k#ziU-q0Sxe)PO*?JhNn|9$5i~Z
zSWmrcBvHw?N%e1LH~$t}MU|)P>ymI@AicQPZg{tU9YBdmx7=CivG2n~Mc<u|8#4HB
zml<N|cQ3vy_zd6AtMf48cgcC|(5HMl&7(3R%=i0})x|wT3dUu5&0O&jt53xgtnyMK
zV5V;SnT~698<rAkxhH*>4&99!UwvODW&gttRv1&^Wwt3;86@0LK-qRrB`lrDZi7N?
zz_%k*&eFQa8Q(GXb^NVYdeA}=34zm$x0F)w4<!kltnJ>0VnkUn-cCcYIlY^mXIcom
zptr=ig3d0@aK9B=UY-2acMj&$etm2+A0h$Jwtn^ATju!?p^RY;8ozIs;L%4MPI1dX
zY^_KUa)>;a%NR(J!OOagB(eMBw$cfdfk-o&tp>571{J{DhWMRdkPciG4)b)(BX#@r
zLp@jNK&7ZeP`sK2DUfpHWl_1tqmr95RSbYlU0r+a(&k3De4U?J-#wdxWZJUIsS>Tn
z8l?esPC9Al5}K(vVYiQ}74VV4om7<OG&0Ozl;gL`BhN4I0)3HQCPAl?x9+KjrnG71
zOZ?bhu~;mYx%8v}>WOW~mx`9cP>WSj?_f4N+e!ke7FK5XQdWM;k=*QW32Da}O)o^Q
zqjJa%D7)`SV;07KI%jjZ@WC>ZK86_mzJHoHX6QqSs%vO^6`cETM%n70MPsp$Tr3S#
z9uJmBsjgyX@t1$mpwlTOAE?0dTFT{4f6q7YsY0cb^}5Gv`N4;;CPRMGEkPR|S;O1s
zd;x9cggFCxdRx~AuEC|~YXm<|&s8QdeIj6SPUvcqpZKC4lzNQ|>0aI5cDTTJVs>)T
zf7>rvNES&3g!nGoJdbGmr`&%u+dQ|U1rM@&tGC3P=}r8x2akMyLS}L8ktm=FdxCXY
zq+5B(yM9l-v}ChlM4-}uJj)@$<@B|2uBRN!e6lKiETYaTW^R3QyK21kJoIa!)GV3t
zX9*n_s4LEmIg~_FI)^XBwUBXNvrI<6@)Q$X9|HkKy(W<eEZE1t=`4aQv{ZjMoiQqX
zwlLs-O6#+n*T8K7;`D3MjfG5m82S;Mjg(-gg?}7Q5Jlp)Km%#o?(%2(GN7PA@{7M0
z$D1cM-{bA>Sl2s_`bCJ`VJ9aYQGQ*m;+;NEl#P6$NC7}3`VogGgf9nVNr<t9r#fIt
zdef{_@-bQa26_lQVT4D+%%JTAf9ZJL9JT6;?MME)$>do(qRbncVrcF>?@scENnBEm
z(*UaKJ&~&K{?_X@cTGC)gZ>KDVs$OC_(AyZA!j3l7q5;z>u;Oz`^tS6;Jj2=q!hKk
z_~3Fb`m%p#NS0jBH~o9HEdLFrM0dS;`*XG~q8HOo{uI5<_+*{*G*^Yib9~dZlv}bn
zqIB~U^IVIe?bMEU6UJ;e?Ra)V(H_DjAkiX*)|s(L>USA4!6n|Z8xgv4-m?+I88(cd
z^L6i!qNyWAtNprfwXCFcumvZoGS+`Z4c&?w8!%u~D1lx+qGSEv`aD{dU`c%HFt~u^
zpG=|;!$m=$yRo=_H2%W6ksD!yif^su4$fS)_v439^t6)JQ&qR#xVb3(K!@RYjHoBj
zHVLryC|m|Ba<eBd_}%8}@~>mL3TFt+{1~0+D9iabcY`1I>pI7^Hk;6g(Z<2G9sFVN
zvSjeMan}ySV@v{S_V9G>(Vl~_j>L0CXTj$0jNNv0IoEp<Elu4D`BA^z`My4}Sz<Nr
zr=9O)u=r|;@_b8{B|7v2Fxxe6K!4^kQ$bg4^qsb<sB%P_R=)TZXi*Fi!VqXE8}ph<
zc|5pW)}^@jgjP(OEA(C#+FYN-5W1HgamO(G)k+}esC`}-FeU@8J#6gW_L{_C&qT-Q
zxp29R^HZC2h4MgdW5<4h@_MKs#-v<tHS*YKKk~=!e)stk6{hw@kuBAJqx}dq@UTZ7
z`s;p0yi&iq!HI0;rGQOB2!U2O@Zw;0{5&H{I|Uqxwc^HD<<^95qHz@qUASip-D~$@
z{;Qzijo+?yCHu9Vf;VCN`}?O?F#Of7LlrP|SG}u7oQez{Mf@VaZ`AU<0Yw3zr-%To
z5i33vg2Am+qyvXR{zvV-n<{TzU?9BQQ4dKYI0V7t0jfvG(3}+V+fk;uggb5aV;_{h
z<b^&g<@@xo2g&t#Wc|(4D^kCU4%#|FzpC5zbB}%lgh$sElgCF?72?u629olJr){3r
z86*-xlB0S0KS&CmRY#udTUp3UL7O`zRLX_FSNE1W?|03Qo;FRZ-zO`Xq_3ND4Veye
z-HW4IGU$9wMW?Uo_V#S{r?|cqbF#Ah!AmKLCX06Is8Tan<W}!Hr#&~TIL{)>mgAV=
z%UN~Tw+%a^|0D1IXSNjlUtWBOdJjbQ-9|GK7X>@^{IwcIBR{l6T(o>|YUy6jkfTjT
zF7cOYl{^ICE%doNu^>ZC44+f<I{Fe3qwR3bh+Q&AO8R2^rk+g5Uo=>S&Kd^SUGo;-
zdGl1wE$5S3cd-nUl7^G9z1p_POtI?Uze6n|dSZ%(mp@<Y#C%Se<^P#eU>~;Wy(!q@
z+%jztRbtr05~XlJ=1J?)63Y51d3EB^87egK`_?xJsZeNn8xzR@gJt19tKTp4)tm;;
zM0-bbe^k5R4XQL1t!%no2ET#l@ij!QBbByupG4cZ?Vp^i%cY}?x27Jt+c|={?Rrku
z?Dd%n{#WV!#{;v}Y53V(Z5Tom50hDK{Ps<l?Z9+HA^hy@%v*}xihju0;>7MVC24)$
z7|bQXZOnP7dQAhc_8qVQAf_iRf(Imn>rNmvvadXrf9+Q3{t|$oK)I8M^`rH^^}af6
z651O9ft7GsmaJ!$%FL<cgEw~`e5C_^L&TP+!;JO5@#TD`%?_=l3b7z&@Mcz+y`{#H
z<;2NImW~2W2oExLD`af?V_#jqSX3_gG=lyZVUYEEtK8?enk-+JQCCrULWhyjywq=Z
zD#P{UrcgdIbpfuD%cxtD0|L?eC*L}Dz0(``<{~wV8JgzVr<?^OD0dZ;27b|QW~CqB
zS8IQfQO0%s?cvQgw94)gD!DDwLsK5Tb7iJKF%}tSQ7Kz7ZZA?w68JVL>-0Qy=hZpy
zm*n&1d;fWYnG(jy(L^VwCL8R7wQ|3*{4d1$Sf{~IdF=Reg0S=T*n1ei(j1DsSZfW#
zhAJVz?)9#Y-WKe2@3x|985bXDt<8UP1CE-Qw9f+1{e~v<W^RPnkPw9C8ww9I4b-9v
zX{6S#@z_BsX5ba#8Yg=K!e&f>$V1P*Nm>vZ&sRySe?*r~wu{2k8k%F?v)XfskC<gN
zDGIBIc)n<TU)^*PEUY*u!x(CuM64UG_KaTp-t%t^R5V(;{C&L-)qCM(M$#|Voo`go
zHQweK5r}={J@Wg<Y)IN$gQTWZ&yio#n3wZ9)guO2{$j)9uSw(6^&Gae&bhU?n95UI
z@raj9k@vLgz7%P3o;fnk`Md9AiqH-;h2DF~i2cUksyJ(u=H&hRm)!J21t{xlT(PjZ
zqTn5?m5#6*{Z(JPN8XALyzm(Cy~);5Z;08J)=S;A&#a`}y|kf}t3agS5B@(*mjkaX
zqm4t?TAh`yhV<dFbt@=8P~gGPC(hv&Iln!F27hvyfPp2eMefX+Z`fc&o<Fx~(4!hO
zH2^_kz`gdRQ1#JYy{hAAn#;{>LUy+=hhJVh-a#C14hAZxe`J-*IlsdDuVNAg^L;jb
z!2T+YxylNOKdKbg=nQl{W^R(ZJbb&DMb`w<3sZguEVPYeSQ#L%>Jmuq&!q4GemO5s
ze7_U$M6AK==Ji(k3*H<lQSg9t!J8*^wv}l>4&RxQk>bS2{SZ!<5v>j0khC^q+aAZj
zxh|=k%Wilvc;Q0}vkLglbDegFQHIQK55?!h3q7Ec&S=bH@At0xm<~0oEyIAyuE0%b
z{rXl}`rjQ<71!PP&U0qJyxgJ5lCId1MtDVLjmqi2nsje=4*6ePL)h_4<~YuA5FUve
zWdkF?C!P4ZtJsL}HpfX;=yqgn`*u#CB~_0Zky?4L!!|<7i#7afq17(H`iAulz+0_y
zwK2gLn-ehTO7bUdsV#-@Z07U9JTU7mmWSUN{c-qS$khc!{z)PN=kY-a0TK_kduSln
zz!tg98Ehz#=={Ofkoxx>=XABx_q>zu3_3*i?=>pv;9hv`=z7Jewk`$@sQM;3sLs+S
zs_Db@b<N41Mf`p(RJ_Y&vH77rvCY+=2}oyK^tehl(zG}=l)69UvD}sZOofBqlcrrk
z-$ahhu9v38{$DT4uCw3T63xl9xIcTjVpt~S45yRlJey4cM^$nT98v%N^`Z2sibPZe
zxgNCSHH(#q7(Gd|SHD*GTM;gpZI0;VBR9drO!UD)BKx?{oHsoezA@w<mF7?rvOPk{
zfh5HkulmPhV}&~7+?SKFi@}x`Yf&_0i_|>}76_mENapzQp(;SUoWNgV{8k2_`xn64
zz5oS4y#Gvt<sZ1MySNE7zCI>M?}(T~=+$8^RU_`l)%D`$YWFo;_j*o}E7*nI_jyj!
zic_@_SrW0alZu+5{$lPe#y%ae?`sp`fwee0tzf|s!OEj&`D=+K!>1Z0nkSm>rpkWV
z+0jn3e@4qH3;y`8@;wpXbbRwz@P1j5<H#GLKis(T?=kXh{a;cpDwNdI&Q41&=5U6L
zsUyhL8Yos;pb|po!^VU8ixOW4`BE~YWL;p&*O#RymZNc~)j5RLq9)IWoFL8iwCN#U
z_$vH9d=*86S-`R5#Y6wK(GuJ_@t?Hh-<=;z;-amt&sSNwd$vQA0G-q=Ud(Qr@Z+Cm
zo|>@9u=B=QzKGz%c1H~tiLO{kAg+HL(DRIh9}q6U0lcEYw6~2GauJ@GBWCk>lm~t%
zB|mm~0`$jCet9O!^bopuWrb1)d5;g^XsA|b=3YMiTd+Lp<g0CS<5;5bE!?d7^hwzO
z(<^)5U;Oxcj!x<Rr|U1Q24ikrsY|=}hRr^dHbNI{e9(&8U(A@&$0g8y>r!>OnB7fX
zj1;LV-ie8kGT~w!ewdrnytZ+B9+roR8+aR+Bf$&XGoFl0X{9;yrCe<K>9hy?`HRyw
zJ@9g1X9pfy42wFKh;iS$VG^`r!o1|d^!uZt8zisyO;zm1vmcg01}~;|GqJi}*(U7c
zlJM23f8sYC#enCe31RXsU+iD?nq`b5VQCB7TvJ5`j+d=WysK}c&cmMu6K04W*P&N}
zJ*8>FJ;(3mO2cob%HX^1Sr14A{^$+wc?QTJd(GJXiFZ%&_aoNm^uAXC?CeMkBBpu{
zXn8&<ipP$X6`~IC<W^ymGUtTk`W#+ZR!&;<B+_Mv2g`%tB~ggYz0EcUt><M8TBko+
zrD}Yi9OF>0Hu!b;_|O@cbC-9uzvsx8zF+CSbB<b6aoGu%ovR|U-!(F(;`)PuBgat5
zGv8nF;>>t6^yA-Hab$NBpQ!}cE%0iw<ke-jlrQFvz{DzV--M${GMhI_T)}FoSbUin
zC!56k9LIA{AM!u*1`4<>rn_)(>?l6XG5>Pd7<}*%O_?k^%NF~pAz<T2+>kL5iY{Wq
zqK<T}vH9Z4+BFO5cSb%z3NRu!0sz!G8gVX1y34a}Aeo_gu4N3uv+n?8h0m%5z+qjh
zpldG#Ua9V)xz(!K>ukyOu>q5z{Sc}1d?;rQQ()8@<@*h0)BeYo{9?{5;Jo1L+RfJE
zn=~TQD-@fX=&ths=HVEL>-0XdQJH6~w&vCMPjPB{$GZV;nOg#n<*zN>Kj_|4U(66;
z096)zQ-_r;H**;LN^-~Ar-Oc8e4U8XwQ=fnJdrc#_^o|<rh9C^whxmW>Jq+ZnxDh?
z!7@{GDCeW->4<TFpmW{d=Y5c}3JiB8=3qlS>?X=;SdoA{6F~Ut013v^NeS25inWr{
z2>)~PTN}DLz~rg1K-!QuW*P83(R1Vt{tAsu4&-`%dm<12sdl_jc1PGS7f?WWRxCV2
zNN_%3E87t(6P!HbjQ1XAgVAv2?4nP1XS?8_qXs+;3?;eJ>FAD>8GF(o;o!@eCvJh-
z*bp5h?G`qWm%70a+EEE(4N?bgu|KFwsrnulT3p7YfcV4o9G+h5P*Ggq`b+Dwl=h(^
zZv5WF3!B!Q+PDBsxg%J@g|W(zR6^nRw#z&843AdEndiP1Q`Ag5Qob<-OpYp>Qc+cC
z-;dFjd<dGIwH+BX9en%$czVxhxV!gzc+}`3qIaT1CwiNRPV^Q;Ct47qw^4!+y#~=c
z5fOxFL&RtiCA#RnGouY-=J~k4zyI^XTH}SaxSVs9eeJzZ-25@FydigRSddrV)m;G_
z06*@!TU*DX7{c0?&kz8#8tP`+dD{#8OD`1waKgnq!Lx784IA0P-^KN22Q?a?Qui*c
z@%vfL!jS!oAtDd~u>5-d>{#GIc{g-VaN=xOEA1+q9Az)Lgb=FC-J%$oL~dVNV|qqI
zoH;&2e*kb8PmSvLPaU$31e?O?q$uSSiH0mIrcNf7v#Ks;lHvl2r=jLeHEc6a(t>P#
zLYSgT^F@kXg~h_`(_c|8E4RW}v`-p&UJH@R2hGnCGNFAxN>RiHd%(W3pAU<vGyU%Q
z(BJ?Siq@^=({k6FT~2C{cVu*llJV=xdt)e1_zm{Z*zDl?@3G8lM1-sAp6*6KEGqA+
z-S#dR+LZ9+iI?TigX91PV+iFcJw&XB95^3`SN|HnA8TWLz9!z24NM%xTZsV8i4NCd
zNj)m8?t^v!IPNuZtw`MfX+;=zH@3|kdt8!LU$vd?TfHXP&3Y18=y=;np-TDU_H7iW
zRt|H(3f2;W#iQdCX10eKm9TP(*>7%mwvmzOHt_pec7(r~z~NSh2c*TSGa(F9W0ho}
zkWFqte6hgHl72BGl;`9N$q5-g`&G10$6iL&p4e^%%i4Xk&bM>?g+DjoH9C6b!Q^??
z`gkKqKen=KmBB*hxxMYjEi;|MFJ7@rMropQ&$F}w&N~@eD$C)mNY#x!x6jnE$)y?o
zueyg*I?hO7X(?k{_J%F*m8lm{hXc>KiR+B-7;})o2vn`7^8Y%XFLFq0hS%#os%Shb
z!mmGm4Dw;knq51)*a?thm+@gY9?E|w!*Q_F$scKcPYLP<frR5GTYpjX>C@BgUJXAk
zU$efr83uI&O7ZC!*z1c_Q}kbJ^?XPSE%UC~veSNsFP*O`J)2*X#c3%zB#MP9pR4@K
z=0%tzONlbmV+PzIJK_Qw?-;eB>7RL`Qm}1W%eJOs8bq+(SxG!MOu<q^SFtH3V*HdN
z-}MS9ma?V@Nfr-T{owqJm>j(|EiJ{8I>~BuE4t^JSIIeQH>&x@S9#C$GX<B`<m*w|
z<m%$w$D)pwYZWkIqP8T>xp8ccZ>HAV9ocY6?-iWtR_(4n=AH*)AAQTp9o)P6XUj(D
zA!K(*z}-d-7wI{n4`kn|L(No7qSyTur4H<;9t10gp#J{4Cc=)stV55mtq4#-ilO8i
zl~uZX*x1A2SfC=}<$wV=JMuS1o#WRr@)xF?l<N3$8$V$=@ZmQfbUB4jjAp`@;w1Cc
z)VDK<jS{Y(9!Mra4Y3pLIBzV08E)J)nlc~z{Zac>YE!^PQ`U(pj`Zf2nH;Lkhc&j(
z3M5j^j%h-#o7v0SI`j|yKXa_w%c(TPw{MvxFWuZmE$NkzK^@;(`mEMKE0P}es$@n;
zJj@ID<;iN_k%5~x@SM4C=b^ty)BA*;^1x#gOUo<=#2Brv_#0{5;Ys6L#@BD0{sjh=
zm5TX(QLdu(WAkN3!g*c(_9$^P4?szu-kljmbpF4mKzxRlGaq3X8qa4!uc)9GYz0!X
z-WvoB0wa{Jx(djR1XHXNq<P1@$-**9mzjWI$RyhsB&Y<k!R5stdn&|~wy8YPdP1Bm
z1OE&bQd?`+{DA<9v8ZMsP|N0o5rs$Gw<mm%L#5zX!H_ZDS1t#ZO0QorL>gBGok3H)
ziMnq@N>?K2qn@o5C$(2hInTYwtpBw>!M?ybAK;@f_HNk<l}nxti7ZD6uJms_HiMAv
z6R(-k<zA**-B)3G_Q&M%Q>1prhgOEDAN_d}udB4{e~WEMjo9k;N$r<456?_KY3}$K
zlH!)*6FRCI&D$R|m8yea4=h5rz@Fw2G~O@WzW>VMmCtYgevS>uPocX7#eZbf4NQC{
z=1HC|27ZYd(pnC<I$3S~{+*g7<fJD6F(S#mIu8KEZCr1@LXqa(1CqEqZdia?`{;!k
zaava0!~1XWKtP*^e_aF(DN=WpYF!t~$Aqk~3>HAhV5J#A*_OwMBEWuVO3oTm8?*pN
z-SiK!gc48_d{2B{JHS!y+^{OcNiCJglQZONNaiswb9UBQ*)INVAKV1xiV^va3M_CG
zo8RbXx99XhspLT)a$R}!W{w|x?S8+<##3tI=x!8fL4IC68YW#62d<H0E6j362txMg
zWu<ksc!vkVJ|mfo{wY{+QHA-~qCCZ!&mx{b@c&Vvk!d*TJ34G~THA~CED-9i*V&&e
ztY&3nz;t>8K=7T)<qiryMm<A6aTx$ZJ$_$w92IUIf<m}J*2N?DaL6EXtE%2y;jeo#
z`C4*vyUJ_H900JS8=DH7XMj{i#x_HZ5^0%#Rd{i$wOv@xMbWa}`x`TMs!RIN8X+ka
zYaP4@4LL6Oda0cm<neDJ-SD|8+jPuVq&C>MY_>{Y$&E-JZwI;`pw^=PO?RA+a*yXx
zi3U#e&$NKxqbzIL{nAV;1_{4RS$#;E`PxaKLd@|Wg9H-o$`)GY=@l2%y$@jTxiQ~Q
zNi#DMFx{QM2S{Rj`MM`wxnbvALsJ{4WfWhhn)rpcemBMl8G14FUJmQeeCP_!C(Ier
zy>yTK-(L@PHNUG*!$KPn#g29(w615t^$K~&SyBs=jL&-s35l)Xivx{X(A|_NO^!<v
z1@gk6ZwC=8vA{CxN4GpWBI*0K+QbV&Py(<-0Ituy=9fGJJ?tclNK^l`nDxL>@Xe}{
z)<ki5{q4rWW!+)0tTyj8C+W?={Xot#IhQ|m`XbdEvZr5)Sb5!b+tlpizgL<r#wS<H
z8N}rL^R>v(xe&xpU2Rd8&NRthYMM!XE|(r}&Y(y-n%luU{OYXhyemTu9#b{K5>byA
z8ble^9#d9NGtcw7*AIHWwsYc<D6;oUnoTnLs0999-AHALJ5-wPPkl5^CVsJGm-1a!
z9UO~5<w?nO{6{&)5Hi=>Ai=&Dle$aD2H7w;0DK{0nS*$Yqk1lqdVg7TY-U-o1A+MH
zbR#$92l~E%Y+rgH{4HD&6iz*3xH8l&r6*;AM<+0OnJ^T=juhX4+D?Og(SdOjW5Odx
zfg>2q;mve?Si9I(8(p3<ZDy9fhla~WvMLs^$`xBOge9%9ow0M?XlFPXC1PF|i3rK1
z8h_sX@nnK|&p8m{GuyxDs^muNlcBF05wu>()2OLmDRT8yKDsQUbu;@jgeH3sA)KrC
zRz!^SSSvx!Ldu;k0K=6(S;09|ds@l%@h=0@ZtlG>uFuBHi;<6gG&|o6PBcZX&s6qx
z-F*AJNh{kV2c8eeZVi=MRQxKp7h=f1AHdB%0F4L8mMM^Ier@9a!^T2yZztx2ZZ~Sd
z7j?(B^-+sVPK|VhIKogMb?}g5|FQ+K!g)b#Jx6k>H2WkhwwxeV0^FD5dN+|1JmEYx
zbf6S@74IFEW7)O8>T}vzw>N(<&-z}{+q#8(#Mf``vC@e1K1%u+Tw415G7&wN*yTsr
zH(Nzry7K;v<h_Ksd9=;A@pG=jsj)GwBi`ez_G%yL=2s+y^`AId>?I45(q7&#6xW*<
zE^}V!AaEct_3Us~0)Q||0k>Zn5vmS*>pt`{fpwV;68MeVkF9cpHvT2nxoIWjhG;j6
z6$kunuH)u5JbyVMyj59;C>CM&rGGoW$;tinP^8KCPCn*u`<@ju&vX}(bUz@AK>Ppl
zV60^j8_CdXdm&7b6=C;1oFUe5Mv?(G_-c5Fx(&d0(Vu#pbB-{kZjB6A0%5r@YTZ1_
zhoCn(Z&BI0DBXK#QYOe64#sC2!xDz^3>)oAbX)Ss)h@q3T;J-Ok;j7(t>RxTI}}X(
zECw5IwTL#Odhfn77|okgw3E21gieh2`;D2pwJmA6K6HLG^>Zur4bk_hM5M(?;foB*
zYhuOmfuWR)9^qoFMBX^@PcG-_?V>f2sJ*i^UYt5crCY1DhmP*jDFMv}b+C#TAtKwU
zrkOpHG$Jn|OI8DY3i{9bhw#K#BZr*aOH!&c(y~Pme50N0De%XzSPB0O3q3YX{+{VP
z5r~i3LYv1Pol8#c$7ssZ&OreoLuGGW@4D^c$vJi!J6t6+DJ$mx`aD*kpP~6wi?Z(Y
zIt&H}%^Y_o)e~&=eotnW*9;L}5hh6C+7F0KQu^BY$4}NDO?>_dUtz}WYTcsE4j7|N
z#q_RBc}m*%bT4iZ?Jtr3{Y?I4mqH<{#MKCJ0=qXyIvH~K9y*%Av%~A6fClpq0ky-N
zF4OTaj(O3ll>XbO7W-6eDN2*StbcFPEK%J{v|x$nM{6zDG+@w@Qp!nWosoKCNmcCH
zHkF`mOdYC_*UCt~h}x%B54$MW#>tWM_*;QcJbTlA_SQ`9uUyM6{K?aw<P6MSid^SI
zkEL#kvPkK!4_nOZLl~!y-alHm??e^Z?KHuBB*RQ+eDkx+A0dmJZg`tgb18KvGqgNa
zJ@!mn>tYf#IQ7#-4HzwQ4nr?Jv_n!bg5)U}vd{rYqV)sdzyZf?8S&L^6Ff0U3CM|S
zptsQ5bMS^c2#6RUed=b}_Ga$3X&*(HSt3H-W|x;dNUQ^y3Vy|2Ap#iB2<M{QQ=ova
zij$D46ZYmqtggWcQWc&DuzU;@?F;rgI6<$olvf~Fas$c*7%(MUPN)I~W%nZWUS7>i
zhm84gelJ}2ukY+wcjuA{(nL9qaxYm)zaey|7AsXAVnUv!yDU8kE)}I_j$fCe_xNY$
zM||FHQfA8zEATQT$-AG>Ko!4M8<Iib+(1#9$b_uhtlfJm=u}%(2d@t-4VzLA;pwOm
zR{nzC@hr~B3)$b&`WEhEW&+9FPVx16z4w;lK2>ssWrFIc)Z-_t=HMW|niHMqAJ<?0
z76l5#@X}ps)@S+iV_dqPRoAo4${iRJ0O7I@W5Z-g0Ht0C3WN_rsLVG6EG;GF*XN+{
zQtO+RWW=6@LP%Z|*>_|D_s*<Iwnf`)=ng9VGw#?n7Ei&mgb$ijvvu8|ZmJlaZE*g5
z-bPH|D8%LM54Q0ebZjcDTPrzc4%5s+CxaQqeDl2eHCD7}l-J>^ymL8T$L{m*H~&)t
zm7EV22K4Zm(j1HZ8jzj))66y2Tw&}1dhQZ}=p<TYuEZL}8K0;?vPZ+i&(dG<HpMGa
zmx{*lWBQ~%ICRB&guScL`xH#b5e1$5NQ=nU{4}n2DCA2V!SydnT=Nm{VXRSDv|a|9
z7Fu4J={cDAVx7JBan8UamL@iJRhNZpt%g53yJx?D?UXoYc8Qko`dUb;x9|IGUAbPn
zN`HW8OQ+OXJB+lLcWK6m2`LWceo|z-i4oB#D3_6r5f&}cqyv)P%;{Z+&6OWLI(;WI
ziAB|OVV>kJo}q+-SGkwYNWp$@XmQo&Bnw9a@3Xr9&1`JHG70a%X7xq)<@a1bUHO7k
z2oAi9#_ENC{V8C{x(&GoAJeUk_jV7XkqrK5$2sVA-r}2H(p4HLq6OS2VG$gb;P=>C
zU&uo`+ca!To-ft-9=qd)Y1rVnK*0bV^-#bLz2cq)kyZm`3qOJ(G%M9ESABuHsMiID
zJ<Z&YRz_>jxtY`X!+Na?8SioAutOoFw(sxy=nuzN9qm>o)ucZhk@A^B!>n326+aVW
zUX3<=LkX^F{~MgLcEA7F;t^zjn~lO<K5<WDM&MYDq0V=(gx5;|0YLK=?!@8$jbnT#
z<VZ<iz&EWxdHxv<+Y-86x(uZO*VomRM`0zifEBhcDY<oidv=~rKePe*rjbLCw|LRe
z^4scf-DIk)E1F7=5&UP0S+~RLY{$3Tw@OMJsj8I7ZZLYiTvcvf$_MSlf=sf892d-W
zu~^a1y^FM(eiL(`@o;5U&o4tHKd<EQw4#5w(@u;i;90e^id24<NsrK)8`Y*JQ(LES
zot?R5#D2i9V+T30{4Z(bf{BLbJ@l`|Jq<g&G!<iaJ;mx0f;a6$aNX|L8TfhiTpA0z
z&pyxYc}PAyVYS?B%NXw%Cg_cSN1M3%9~o6>l?iSQ5W|GBfh}XD!W9ln=lWsqItu8L
zyl4sD10G&0a={u<0P6g@TS~;c5L(LovWK-hGujvX3+>e=CW8UniuzYR1m7PWcmbi6
zE(2^(Vs4JwlW#|o7^R<e{CY2oVY$Ar4!&6`M$~7n8kM$JH6U!CU%g;ZXbefcCiQXq
z#c%3dAh}qkcBz6UB_?2q>M7F-9HO@wdQXcpS#Lg`WtNO8abj))m!N;g2(f?>rBttT
zb}7ndinS&sUsb?6c~rAwE98vZlm}yCk`jB$6i*jw$>qbu$>GHUTjp%H-UbGm@;YU;
zSrG#Lj<A$3hMhq#NJ^?mevpx)bK1PrnO#bSw@_8S1`NRH>G~4@MAPtq5b@pJjsk!&
zxetdr-PTBk9!z1@bgdAz^;g|CrIhv9v<K&N05fb0Sl2i9#3V;)7vf{l#*#5jY*3bm
z9MO>D<C0*%lm?n4ij=u9mkv1SDK<Ii1Q;lG3m1crJ3Hif4%uYFP*T>&{$RAzx6z&3
zWdym^)n}ZRkRaZIi7I_-3Lh$+u(`d@DjF7)s*h{NtV>#`&VEu$PNyP^4-Vta*|d{Y
zB~M@XcqR+gWwf~7&pD<nP*MC~XBB-U%CCjv3wOwr>Wr8Ok%T?em(?<MtE6pRVl^{?
z27A<QnGU+e!(LP^Hz`g{pBg)1We@I}4ATG|5Jmt5hlbykB?vLFa=GzoTlOzTVfwih
zFpJ=`Jpa)6PBI+O+b%X6TG0D%RN?M7w)czXh`OO3oKJtHf=uz>5UdEQ0qP>?qZQ-{
z^vsfpC@m~-YZ*8pTNnT=B{>e;6k?POTM2~9O$;6yvRN^Zl+Mb7^S9N-qz#K0<d=E+
z;Epm`Fkyo()!aRK{_P)L!$Vd^b_q+Ey*)42xKc>g&q;jUS}7)Xny$EZhGgEqteX$p
z9o3Usj45b3v_iB$+6*Vk$SIqDLg*X6w_5yA;WzI?d?=i1aD4Q)Ns~FV<06KT&-t62
z7IRKONx3+@Sqf@eO>@pFG6229^IiadI}04j<HG`!7wAQcF%kp6RRdqOKo5_Zw~gC6
zEVIcEw(8n@E9VB&xecw8TQ1;q*1hkn?Rpk4t|f2y`Unp6ic<U~4J<xppTB=IEDlk*
zdTXCj;Ms-TqLZW#+A5UA<mKfxV$jBgR|ou`7n?Hm_cWx4i=964>%169^!GWkC~XW1
z`Rmhu@83I@(yf<kCt@@LY-jkRHLpMaL_F+lC>fuiE`KZ;mPpg}`%gLZ^9uK#&uW25
zQ|jib{=9|3M>Hi0m(FM8j`S2X@0R!Yryj9lEaH3rzC#HCf*k03;8?o>yx@v^$Yo|Y
zI2i%C35LMqOhZu+%*lGP99(@K1Ae|WaxYkc?7KX$#R~194+k_<PNRTG$rOj=0z$V?
zwQaKUJe&{f1=iOOB1x<;(QnyKXzgvx+rpcdGOL9Q(OBgO$i_gbsOSLDieL_*r|A)|
zBnjQYGXGfly}Xy#;VZEGy!AH8k#WKgE@KboH!Y=`I2y8OlH26ma>FQJ&H2mk)rR;Z
ziV*)0vMSJR|5VZVP7tL+<wek!NQ1|Z`2~f{6O%TdLrfcaDI1x&z$Qj$4*8qV)SKwB
zu7>?-$xrLbSKkHiuCXBV07Me_>L-LR$CzE*@d2zO%|6Owk~oAQ!B)+(>wN9Hg$nZP
zRGqIE5EoWmb9@W8yD%a22w&viMuv@r8d@w_0$NZjcIqYS9!1y@Y8kk}5o!dne<1KD
zKG(g%H-%qrZlX=YP_dUD3Eb$J%@~B{*O{N<0;VskMfkWoD%gxS6{w7p)ti>7n{A3o
zO_qJ5<cQ}W_mA4nqDqWZeb$i&iV0fKK!r#|$>RD0^Pn6@an$wc$8ECJO-(<nPFOOd
zQYIpE9w&0GWaw*J?RmpMKw5c#gtsN%J56HuwqI213N-v5RBqh<uKmPA-r@A7Ptm>`
zDVPU$46n-S7YF_W7<g+-Waz#r=H#9=y4G_ot{s()j}-t4_dH(%f4}U*<ohqt!Npg~
zQGwr1BI_=&z6%J0z6Og_U&4(czbbDLw?zPi#u3#B*v)yjHXRq<qIYI)Tl&Auw_fzS
z%iiZ^XpnoS-X_9$8$<Q+wOZ_lU`lT)Lsp%zT}Ynm!N>Z*u(jo<#r+mdP8+bmKT%Kl
zPU`<9EH<kv*C8K_)CY_oH4GkWaw1+6%~$ltu=iI<&}Y-inE{x!rXjP4vsKBOeztXO
zz+=_)FU-FCOIj_yGx)KSvCl%eeC7BP`)NnHlBR#<K3IGDfpkGrz%OL_sz$o@AC7q7
zJs=tjdUsO_w8G@sQ*^)S0E8t41FKVDsM+Snm>&MP=*L{wfxRdfPEq|)Ak=Ua=yK7Y
zg$12`fa$<vUBre_MtysBb<}=rGDW|QbzouxS|NJMJfg*Yv(Ja21w_gx39^`wMHUwN
zpRutde_ZA;)?MfBGtiS^>$8QZHrbV_kXqa_FSkte>`EUYCRFA-+j(M*OT2MNYTHov
zSUK6qWBX_QEYCaQ5Ia{sF}m)>y~?Obe`0Ya6SK2`TDcg(#j%fs#UV5ZPjfue_|6d0
zibtQ@llo6te8Mh<Y9<K1G|R8;E9X0``+ilep1Zvf`K9BW?cX5IKJV0k>lPo`KxScI
z?({w*BSytbT3YnU!=(Vvw^BOOjpKL8?ymj&pG14!4-S5`+D%Lj-D$y~5KC`TWKTk}
zXvncoKp@Y1HsEhmHap~Hil748-xg6gmvobXAc`nZJ0&gPiAoUk$W#aA^fjgNE5fA+
z&@l~1wCWE$!EmBVlNIQ#kjBf%^@wQ?_IRD12j>|P9>N^wBuO8m;w;qS)U^KgKbaz$
z!?QIpTS$@bn=}-eW+gnzV|?{wB+sC`2d1mNboS3YK8w2?G!-EuU|C1dFPD6DbJm#R
zPXyyBcsX7EvU)5|keoDeXFt5ctZ=FoqEtEAumcykZVIsPK`8OQEH`!A{Jc6mBP2V#
zIcy%y=U7Ja{_FD=lX+O>Frt9X#C=S7w>GuE>NYXt8TwEX7|{M7I09JEX`z)~wC`rB
zVGfeP@Bt1NgJe_C$s-m|1e475lcR%SR!}{`t136FCy74Jt)n2DY#Xx?96v6DAA2|$
zxJIGv{j+W@$ZY;SP|SEYog@Jut;*S)gDhdd!I)vluJ4_KQPEMcmFJV)OjwDSPY<$^
z5VH1Novm5ZvR10T)dBiqc$<Ab3Hm@4DZp?84-WdhrJn(lYrBfesvO+3l^nSZVSVtu
z<a)BS8YxoTF4k;YVN5L`LQ7Js^|$Kwa>!O@<W@#qkgOz(&Iw%e`<meIs_&fcR_(<<
zMGgOB3!gJtRBnjX{bY?{_RE#E%IQVb=Xb*u&Hz|o)c+1vBfRSkj#`L0Fyvrlp*04B
zP6Xd9^saBM_QtNy&zt{ibRu6S>1IMB$H4DH$Y?$9Z3$on-Dkz9{7_7wmK5$LQ)0D|
zw9U{Hw`2G=30GwCzT9R#P&~o|!l1yq(Q}Vw<Mt^q^B&|2VS@Xi7kpi-Efdw4ip4{*
z0oA#7{JGdEBIr(}W1>(FDA6u|f}EIYjQ67`{obVu&ac?~6Z<40tFn%pA-~_m?ypng
z-7=fb9v+FxFqOCK5#rGd8!_eZD$_tG+1!PF_Ibv|jwz9>Vu@*o0MBfR;qu}0A8yPv
zaHS^Az!tIW(-)+a!^at){owd_kdQY}L4a7?-4oA$E!8OUL1|<x`K@OQrVev#{R8qQ
zmpnbx#hQ>D8?f-~?+m_FWyYoDy0^{^Mrs`KJz@`>5SDLO=rh43oP~8$hg;$<pKgcB
z{X5+@RX8ooM!wlD%)b4~0?hI($D~8~m^ZiQGqE(xUFSI0Lr@bDJr}2EbE8u?JeZ9b
zP05KtGlmB;f{Q$=>y?5xyoAlr+~>uVByQ_zUsmK9!0Q??`ws2Bey(wP_86TqkKVdP
z<y1WO_IIOy&s3x3V(lKxg*wasLN}Vl(8z@BdtO*a-M^L*?4J?5XM8G#pNwwyi9+|!
za1N)eSC+Dt8cdXB`@rB9)XllKp`{UHM<XOTpI^P8qc#j_y<X&nSm0;96Y^)uo-b5a
zK2*?ka{kEqvms9cI5Zp3fGFV51hW9fE)vf}fnwyHg82Z@Ihnf=&;vJxfRh!)t|`8{
zvvWzD6C%(OHNOQGEig;T5Qjh8$pC+~kH8AI#8xO%BCb4GU6scqD7OCHVHUb!QN01U
z*}vi_6?F(VPXN~51#vx!8{8#f0U4b2bB?Yc&#6vyc%2dB+3MR{zq{M-oKZLxH6N~*
zXsc@m<1#d7o*+1s<GYFrXQPe!x$lwt8rOGyQLEl#Ak%BY&}OTJM)z}kwd2>s`b&9d
ze@oQ{gqlw3et>t7-9f+o!=eN3!*XrDp6|bdhY!q4AGN(1s$0#DSERVTJfIHk<lxm5
zPSMWxo%AqC((IfTowguj5I?>a;Z>lfQ|ghACA^-br%b`bs31Ct??SYFH;Vm7b<Ng7
z&%9y~Lay)~1#%P~_ECnuMt}iAljDOFQ(PdoK(fPdqPQQZQ3r}Ep3&hQ*c7q)6P`WJ
zmwUi^;~#(rB5pdk*`C7yYJSl~lov;LX#`^yIZ@#DFlMQt{wAL0w%DbkP4oGZ+q!YK
z0^XkBUr|Nu9Mz>?YG3O7meqe;(vK8CVWI4yOr6u)8iAs6K5Hj~udn=B^29VY90)VM
ziXB#ZU2Y2Zc^XePw2;>^EGx^+4>mG<f^0UhSPOOLi;1P`sB~2w`L69bdJWbyyPJ0p
zmspqdo&TUT?Y-CBL|uKPkrRb7>`jN`A^|S2UnHvjN(*q^KP{mE_7#{g`doE3%wx%I
zA2>Pl^>3|ZO#Sj(%>4Yk93~S1@&Nkg0Kn-&x)86s_+KDs*OU~b{26~1*-aGAL<W(M
zB`Ehn_l15$>H_%JQSWge?$-o-X1M_2lP*ld$onscr=(u!FRCFU8ebP3Ba}zfolQmV
z`F3iUq;2MrJ<z7T(f*s~+Pv9NQ9SNgV03Z+t<3(;*Da3Or<R+A9E398)cwGYNu|Gy
z2-rs4@+0ELV-Wi1B%hqW_3%Q9J=Uw6_Ibj#MCgbG3m8pbR`u@*8t2p0<!M3((;vLc
z{TrWXmAvI&b2P=Dz;*OZd2I49Wy$^tLL}_N=FF(&uejxc!l#D`aldHzZ;%!Tf7bM^
z1xwMY?v{^}?(%OrDRLA5Bp&bq$p^^4nx5^0kdM(gLUO1PmK6n@1yqPQ>m^nIUpK&+
zOlPrT`<))$VgB(3e-FPQg&dnCkgxk)3XUuxP@S37Bdd;&93-i2(Cd!!s6Q=O0}~J{
zbngw?vvn2UgX9(TWGFaH?(l{N?_uPExdW9)8f0?@Ma@~B8Rz=$!|79F>CNm}eb!A2
z6FtR@#OqhLCcjFA?E00oql)QJ#WK@xs<paR4^50dnBYF}BD7lfcunj2x292+PMHfj
zzwCuqz)d??Rbi8uZj(Uc{f-lU&8MnrPE?VhC-I?*jT>2(ZK!IzF~6nnl2oH(RQjy?
zs3b@;VrtK{Q>%g}%dVfSQ8h~N`?JsA_A_o@Y&F`D<HZlDr(H7|z03^R>MXMu{<t<6
zJrNT<D2Zebq~uL*_yyj|iPIcr1tM3Ex|;(2%i$293!Z;{$O*={L$7-~sKQ5aRun*m
z_gF2kq@;8o!p&@vPoFa5^59?vGCW1%_(@=6$<m4Ps_hk#nV}2c<>2KwVeaO*IpAr4
zeydkN9oOK1NG=7eC#Yd#_GZv^>()1$Lf1+zLQ$?7{r-nv=-D|j#nl=;@3;*}A$q*c
z#nw#yO=dL{8<M93hWhX0XQX%&i+&Od<%W%FtIbxloYMwts-9}(`wVZVEO@VWsviEN
zuj%<QogR@sK_b|cq`S$K1Fzd13w)rx^kS3i+N9rlzf3zyn<jN~^`{Nj7UJ8x4>Da6
zLhkvefpcMh&&rr#t~q7isi+#&@S*x=aw%E;X-Ch7XI!WXT$woY{f+kD$#0~~_*#`q
zen<X|J^z2Lp!6dkaDYXdBOKgOh>F4(wP2rAqZt?&5XHLP856rPN>Da;yc7$pD^(C3
zZYu!#ln$!=Bl>FJpwsOED!eLWhQGHA+M<+)o<s|k{Q*N+z+s=VZ^}Y3O525M=m3z-
zh4}6ec;PA&A11~)&J*|4Sf2(@Fwqh{wR-;E+2!Swr#=r`f2L9n{)qVZe=@N(+gcn)
ziLvmi%veKu#ip+wRhb5SO$l5SnDs^pxen|ZnA)imL{wJ<VZSbn{9<5CwYgJyv{*X5
zQnQ2~{UoNgbS7;y_3XVLfx@CaY_MyT=d`T#+iKCnqJ-SwzR$x|lRrv6yUNE&ti#S?
zYF;Z9b=ffy%5Lfm?ZlV=cDv=a5d6Vf_Qi$c>*P7>xlC4*o<6K?vP2{%N`JDrLo0gv
zhzrB+!fs^G?k1&d;M;mV(_A^(4Ep~YYaro6B!@94{wsK~(6|=t5b{eb=HP>dk=|Hx
zZtMP7g`a@Hki8yzB~Gb}UeEgjIXUL4A1pY-QfAn1FD8wBAZ$b2O{VfY7^CFUpgK^@
zgnBG=%iXp8_Tz1KIY#jUL>j5f642MbM7wrSQ4*pYxQQ=su#l4AVc+Yy3FgBgQvCrN
zYFvZvkwh`7nVWFJ@Qh^?`6*rQm-~_nFs+7)8Dy++*Iv1`l^ASj>Z_}!(5l<xnwQgX
z#P5lIyU&>~!x1TRFOr9oD%0cvXT6WYLrcoW;)nF;<_P24*fb}ZVY>t_T_fr&vpClc
zhb5}pr{?-5{-+EHTlKp$s@&p4G9N<-f65xKSJPYAB}_i&_tkRVe!~@)R-c-2S}rB-
zQWX|IuJ5fuLa1i(_~;`?oc>IpHn)V=zIXCe0o~>oRm%yrHpY$ijn;I@-WnaiL5~%~
zNemEzXa0|kZhD=9$>+xGH=Z{EBMQz}p)P9S#YE{diU}c4npY5@@c_hcGjZ8n(K0yU
zaMti_1)Era2;nDa*TjuU=f7o9J)BTv7LAv*@jKhmmp%s{B3HkHPrqF<L;aS0JsB6?
z%EE;bwY<sOs)tKQYEF|J%*A=@4BnU2Cc>#$%;)CbnQ@k~vjnEb)Q`nSo18%%GDoWJ
zm#h|xW#}`IN+g!&2-f)+5jDZ9)Pks7YUQ*#8hG=I=x=ksmb}s2DY11c*fXQDZ+(~R
zHE*rw<=RE%53AoY=H@Lo%&Zf+if6NN(mnhAZfTesIZbP+=oAw_gQ&XIvf8*xVCS}^
zv0OCvtvONCr&0OBGIvGLP*&;Sn5k|dSZXNpnON;-UNwIiNtRr1`JT=F>mO;!O&{pS
zx7ulwf9z#TX$%tmS6m<y<98z<#a+ugp9qH>F6}Q}e9!K@^jARLJMM%8EIP0N=`OBu
zj{jhxC7GKvP?&Y(3Snl~fgNs2UzwzqlF9VLFJUHSpApv&1mFfZ#WLuMxD0*SrIynB
zNi1NUj<Z5pH2=ViH6&E#>KM-&o#>f2E_LIX0ZpteHG;+dbFkoGlN63P`^J0f;_L77
z&Yu5xT>NJ)_o!NN;j&gK9Z;69;#7(kQnzb9{hJW2N&GhUm+03!@@bRm7$uFE31*`N
z;@VGjP~~=7y)Q;aT&jm))l!~>)MCv9Bf@yKC+UXHX@W$QR{PKOyEPvtIJ{iSad2=n
z%3z<nt~}tcHKOXg))ojXa}7VssAbmRuCdkBY5n$>&W^PCMPc0QuCkNVCU<?ruL)kS
z{UC0DwjeH;Fgayc<gBFBC40F8SLME2CSRVhVH`wp@51<>tk^#aHHyu21rx#m(sdr#
z41y?JDVEF~ez8)VEX=}1Vz^!CiPw5QZDCji5VE<wc%wiLvufKAZfpyXA1z!xuLHl6
zF%~Z)d+%fkG87pY^6ziD42E3(OwW9&S1jy;3W}QV1eloLD=nJBCP;1R&&u>N(!{tX
zCR7|>yia3<!X~aKg*oNVfBd4W(hk_PJ=?l{_Z-SwQsNnfpthLz&0_Ui7jPLkeP?H8
z<f)I`XLwR9G2xZK2O|SZTSvw(E5WN~DO9kgHs5pBHB_V;6)2as4e*TBBt)&oxf;Pv
z;^Vs-z>#r171o>j1iXd6to6=}QC$hYKS%Nm)-dorsUhhoe4x`g^KDSNnE&B;=bs7}
z#`q*bbm^04MxE6gd7`OON?u0;1l6K%&ui$pq&wCX)$SoZ*Z*gRdRMg-(Jq$*nCsW0
z{+-(w)&Yc<gm8&)>?2u%dmSYoO{upP!zZ^_Ec&lJpA&z_I#9L&+1yi<4F9n{THgWY
zCUm79YeHeR@;qO)&{-e$RH702g&~_PH^HdW{@#er8K|~oR`K7fhfLYGnW_qsOM|+4
znkEm`%PLMuBie{Y_OBcA6EgkA*9shrR`&;dSZk}q5+6YHlfFl1=&mK!%+MFu#r-{D
zaW6e!2z=1Re(oDo_HF1-+2Z5ynldALV>vdHFXvs-28z#KC#IKtWaz6v7?hitQujNp
znw}c((fLlW4A4F^uo~|xM!?D}eA@mLmQ+R6<)!02O+U&cO@&HeGNe`K2PR`wM!y_x
zm8Q|8$Ch2#zTzs;)hU~PU#@8}r+EkW(f9);X1U?+qY~O5ih^uL*n=^l{nl?&WKTPu
zT>*mO#(T^GtX=@P)j<^ALb;vUw;K|^lnA25oec!mp?{uH2vd769gpTid{0=cms8(m
z)}t8SW#cPy72IwcVvsjIQTKY)j<0DviN)GAT3{uHB__Ets#6s^`r6CR4B#5+H-(Aq
z71g^{e)ZqjNM)m+8A|S{U??_oK;L3ClIb#4d+jA;>{y?aFu&zsAm%o5BKJ|MNBm)s
z5rk;IY%4a|EFFH-+99H<S{1Jej!ShSkuYB3gxDF&S(#;H@$}V8gvYZ{e8MUne!ktQ
zRVwl9T}*@ZOUp@Pv9>nKj8vz8zW96Jcv*}CN@Vt_>e|t-5`eMz>BdBT;@qH=@sdxX
z=eJGwfhyEYlah!oyvO`n_P-X#6!?k%(J`YXHXx=$5Q4_Mu3KsFiRu#d^JZPz0C)=K
zD(4b|@U*i0y^!F<F&ZevPugU|(5(Y_viPySn046I?kapTg1+g*beuu1HfzxnSmDVE
z*E<J7p(7Sa0O5UvA)S2c>2Aynk0y)%wzNY*e4e4^ybTX&ZHZ?_G`p3R<+_5*V}T-i
zxXr@^j}%$W?0I}k6;FD5Z5<`PK9QucGEQ8UDtk)jdsKxYA(8O|3`D%vIA(p7nVqA#
z@B<Qd_t&_th<Ybh-0~NBpX0^r<E>p0Vy;&C5q6*ZY+S@tv#%~4YZjIA$iFEIw~5k3
zACU@~)$9(>jy`j7Hsi>=gwZ}J&`Kk%t70z6;`^q2HdO4)i}gR>(24&{HSQwxVS#q@
zC<{|7`6YCnFL^OAC7&hi%C?Iu1h$R!2D%!sL?rxNjO%9ysm+d?5w()^4VL6x-vY9U
z?Itt{{4<Q9bhyCmRu&2#UI1j?G`%}@1v=3hJ>toPd-0TA)Rm|hjON`F!PwL19~9FQ
zHHf`C2lP~ff*M5P&nxN~L=8q0y+w!yzCY>ec|GyKzIKP`LutkN=X*?^`dr^|h0W>9
z9!Am{>%<Y5nzP0JbZL4_BuJ{cklPs$@w`phg_^u4Tcdp2!nHo*!vn5t$aud!#V|PQ
zOBhX5g`3Z=_cTm>FnxCH2_5yPp0cY4H-q#^ZT$A>LhG+}#R8`dVTw=}v=*r5{J8^c
z@^N-$F@00246^QHgM@+6^R$ML$YG1SRK=+Y+(YaCvvM|#L6MW+vkD#FUk(K(SUQ#m
z=mC!8G7TjfduihEQTlYH{y}3-66^!UyC^+91jOg)A;}*X$(MU?wygmcxG=RU^(*QF
z|9_`P_QBH^o!@ynCPX!AHJbZ$t2E`SRFxm5h;=ARZnTY`_rl8AeCxVgR`Mk{&R-}v
z%Ct2kDL}>CnVeqB=UvpQK8n}1`#i*%8xZMS&5BeKC((EEdC_MG|0?)|WBD_^e;?FD
zT_-h1bmX{C*Jx(vqj$L~ayr0LI^Cd5*$R(#>9(M$s;L}plz`2)>QWOBNnDv2S)}kW
zWVg@kfdLLpSTp?hH;NDXS}?s5T4N-hYeFNx>GzM%8r0+0E{sKHn|6v0BimnmCbg7M
z<kYz!dj^#MbCUnE6(3FtB<`+uE|0*WA`3V3&~$R#4aM*fVH>H?Yabc8PdC*6`d7N}
z3#qz`eXSqS2llac>wvys{lj{X+<*w!1c3mDh&Xa|dZ;Y0v$k>ViunURzA5yYt;Y;`
zBKyCTo5VW-7euaY^R0&>pGX*4MR84n)96GlAD*B<sJ`;E%BxC~K!y?9u*sdTC4~an
z!U=D^7&$cRnt3gzhN?;JwADTz)!>nFvTTX7NOwI5-Qj`ROi)<L%==u*8Fj_;CNqt&
zAsH6<f=b5X^_?RB<Z3q3Dn>MAigKB*u?#nCDCw6N)8)1)DmGRJ)N1j#%Nu>}Qy<@s
zTWvQN)u($pHP+;7rEWz0$f%sj=(b$11J9D_=atIZl#26Cn&^WjPD;1@T25vrO#G*f
zh`O^OYa=SHFEvD)mN<8925{c~2jTzC6vU|2%U*xC-NIPQpZ=nJt04BN;rsVjCDa4W
z;T@zo5aiSDA~m82ck<7mS+x2CW3@#!taU7qRU`+8R|hzj9KHSrEVQ^<j*&h{(7lCV
zT5c!E0l(JKhXQinCwfOaR3E;d-wZNyr+3Vt@)@Xo@GOv%j*CnsC(SU(JO`H4P<(5#
zeVw*W^8H7Gjx=Gt=pG&CVZ8Gk<>vaM(D{1Lo2I$6^$+5nHE2T@F_$;H0=Z@GPof5O
z3F(cmnFe2)h{rUXwwyaXchj&Q{61FJ&^BzVremvl{kcxh#bLd4!^`%GK?_-H$iSfK
zsu*NXM6b|aKP>*eKH7jY11{KMJZdjfHT}JbUvG%LlI>kijjDL4s-Mp@xMfdsfl4++
z*^0|<RbaUM#9n|k(`uUxDXIJ)EC5Zyh6^nNA2EE|=$W6_A0Y5@mY`E%mjSPGG+sTn
zRL~g@>FozPT~WA%J+SLuJ(JjIqvAQGCzzcE3a3P@M{A&15;<z!pIW91o(x3MQF;wn
z^74O0{pJYhIKu$WwZ!)>7f+_g)xO3n-T(Pc^m_<_(40uij6SXcAD&B8hsg=F2bC~-
zWHIu7OP;!%ozz7*zI>u=)7ipmBDjga*m>Q+F9|ImG^bv3%7eEK$%n_85BhZsE9sq@
zZBA;1{cbZuu*d|Okc#UEk#eRBdWt!(bI@}XP4~DeOPm@ENW>UScYaG4&s6zzb(Cr6
zgQ^K)HkYpU^W-7QF_N(p!L2H<b~gUOO64?csHe(Ahm#b4syAmG<0_n^q0`NZk-tMw
z?s^M1zz%t1Iqk=T4GbUozcWM#2grZr2f4@@_mz1!ohKhNK9;5p5F<XUfM;np+}=`w
zJIO(y1rh*)!m=rzNE8}HHIAKTGKQS6v{y%A9$Y=rrHi<kGewookl8^DuTvJrYwF<E
zrDeBIniI>A8}xaT3V#GhXHIZS1IqL#<EI|~TOq|?c$4%RuRdb9J|~lPSpl!GXvl)H
z{!i2%w<;#_lQ(?#*rjnSa3)J-gpVarUsM0)L(;JUq3g=Zm58Q>G>yU|1<_51zxSJB
z*y{ktZ`Un-5$c5V36X<`LUjgYLm$Ubz5)a)En9IC_&$rC$mR}{1^!%~*MVnOto8xU
zy^4DtiGSV)CmEg^4UKO7uZcJ=BZ7PKG><B$V6N%lLZ=iPh5YrhmS|-B33&(H-g6_f
z*Go<*0Up2h&~69vJAf*D%l!MJb_9s)0pV*JAo(+J)NqRZ0}XtGgpJOiqX8mK1&$1S
z;N>-p!>@OMX^>~BCseYlS<J55a*UML3{MVLk;7WgL&P-It+p8c5kHzypeC{Ux@AGC
z-<d(FG{M)cmf@=EEX4zL27k;{&lB%Fkc)35c~PPGXH_%IS&7peA~MTJ76j|#3rHt)
zpubpC{W1`wsj(fU_H~tG+^#o?*Zo%Qdh%b<5T`jFV=ydB-1!=x!k0uwI;D!9H8aFH
zk-+J>1d`(QtxLjOTE@=Lci$D>jn&`8K;nk`b*E2Q4?piIR&#fG7?m@3jNh3+Tc`~{
zlz=4)4h4{}j%OoKGs5kr=*wg2tLbA18sd9vpDdqsO(ViW07!T521ajI)=R|z_B2zj
z{Xbw`^DP+#{i<%iZhEYR2j*`8FO2MRYifxf9}xaw+y2<2XmT9v-4n3Y7UX=&w6a|`
zmZE*mP*?uHmjVA9lcE4)6NI_e?MnyI`^a&l+VRic7xD7}`L5w{pS^d!-mj*V5G-?i
zMc!2S<!Am@yKPYbX<gxpoObu;49yUopy!a{Jkx7wBbg6pW9(r;L!Te(*8id^?0nyw
zCX*exdo4c58kZ)>u~DElntiG??aU8b;2jHm(w7%TmXFFU+K0vxf+>&IsoBga_<~=r
z18<RZJVTQf{+GX(gK|L|*69%_hRT9Mtw$J~x=?H1UQ_U`5PGBVwzjxYS817D0d>;b
zaR9aoK5QNwH4WP|<!S+Z3h7pBq1Uy$fL~2n`%&8m^d=OQ4Fryy?3b-@c=#b9km)N3
zdIazc42Ew!9Co3fP@|^4Cj?B8bw0Tm-6vK|Z{94Qdm1adm_ONxshfHF-oViIe5HV;
z`K2%s)2;Ix|6h}_Gdh%sHDYHyyqi*e3ni)Zs;7<xk)rQPDvnjR_qSS~S1j+SGQ|=t
zZC27TU5rYnr&$}!ZDdSim-1ha+n%yevA~?YU^<`P3&E0BD|-n?W)Wj2^i-A~3KVH%
z3w@bhIAGz5Hp=h<{qFCL{7QzwvQgS{rpp(D5~`WidFMf_>G4*fQ7DTg*1LC10L?0K
z@_$i-;w~S;A?Mf4;Iqa4uoYG?d1yj2FkFuVJIP*vJezXO2uwQMkC6DH3Lg^KoH9;{
z!dIn}4(zD9{qTrhF|gOTVK($g&8|mRdNI!YvI?2$I{7l<>%AD}kP}7bv7Aje%+Jw~
z1#r;flMAM6Y3=rePbM_xv!H8=Y5lUrw}r}-rl&K6dTXhk5gSG6WSeX5BwU()sxHg6
zBDfU++?D_AxUK0cxNT^6PiQS5%F5dhX#`d|C`@#wSb2$tPFzHPo@#OZ`-wVTZ96#6
zIA?ck`VBhBUa$G6LoV=`<J)=dwx?+FE-F4C^5P@)>3wcgJmvqD`uc(UcMw<#X6uHE
z9iv--T)PQ7S+7S`7wd{xTU#%OY^18KrE;ik0dURHLW^hEa_D8Bt*9tj6ew=sj^=@A
z4rkQOhzLJ@^XrYm<9mYG*jNh`)i0Ix9PiuEI%@GrT%Bnvyo*v>xpGZne03PZ;PFxZ
zdZ%=E15$8{Nn}C&X8HZ|Ajik*g(6mb$#guI{MOfsRW5S}v9nUWp*)$8&6rj_$(VN)
z<AzCE57pQk)qe!_swiAO{GCz0k)Lljz02?QD$EBp(FG%eB(AaM+d5?MMo-_@b9|BT
zMFl@!<4^n$Qtn|Jx83CnSk@(C^{pAImR4Eew^1jnr^Aler+V=QqC2TeB+8RBYyg4=
zmPz;X-rElWR)^wG(L#@60)M5mU|Lv^ZMq@o85V}?haoji*Rnt*Cnr!wFSPp$k>mfm
zy}9fM&I@Nuz<t)X1Q_t9Bk+3F>nA6_J3omH4TAvZY%RAJtyheoT@*r;v8ym0=KUP)
zy$wdKk)vR4PPT_R<%*in;$KgiAJz5hlj$vI<d&*FR`ca>aVe{!$9YI<hV1(4_@4LE
zwgvrR)fe_X{ohAZ;`Eoht#KhWi6!r4Ag;{)d+JkB8)|wUVws$oSn?Au*Zwqx1eJc^
z^_!sk*8j<q%jS_253Z<|{9`}Wl=FJ@!zvN3_~3i<d6LsXtIfe*mF*J#grZ*QTr?}+
zj25pi;Z6lVS+`qiKkb|T@m|ZqlY2ivk_^}+VIk`QWGcxS130v4P;4y!s_s=nS7U8J
z`iC)NPT-Tp(a=M3FX+1)>Sr0IjBhbfm@7unAMu||xe=s}(qx4LQ|x$x!0U@&NsBjC
zFDCDvKh|jV=sY)B8<#--x$z=nQTS94cmb9j^_abM^wdaHaZQ173q*P4tTE8B)s{uB
z3sI{`?hU4f@z@UX6q!7RyB&NdQQgG%R#hPdq&BcVHMKNi3pbL)@%1&vh22^G)&{L&
zp2szb?SG=G5~{co8%p_yTK)~>b>}zr!AJ}~T}jAR4)a*n2ZWmY=EtR<?q82xO*pd0
z=mwtS<N4NAR$mvdi%C<s{rF{dRYaJ+Dnws87*@N=ouP34sm@x-I8$Zv7~j6)DcaKK
zf2he_2e_u<b@$CeKqN4rG2JO|eK<hm*q@}l*z=WOECJqWEufRee*~;8SnsCP7$dX@
zNPrXb{u3Z?yMKztbR2^fk5)FuB`>bWl*mteARU)r%bC*;;FD3SC-47h$Nf?N-1Ikf
z9%c<?XrWC^<F&T0$k|CE5_zi)yz612kz1K6@}P5W1LyH&NOaP;Q))tSaUi+i8nfCa
z?%bGuE7Pk;hTADp&sF1{jhG)-<C;&5teh0&nqA0~$Z`7pc?BweIS<zGvFMh~j@veD
zrAoXm8Ih&>c?%G>r&wN=jnfilWiVnJWqjOeetN5c73fuZW~k~UdEC63JuJo>7T^A4
z?>TDe<CyPYH1MmsCQ#L!9eh?54!LXpK7f!dAu5q5(|ZFD4Qe0}L86B+_fg&cVdwev
zt6g}PBCYGAkb}*g@lqEmV2P$4n}VR){O3f6j}y5{KI?baDo0wOb#JGiEOrk5^aWB9
zUKjir0H!oCWhbjEOHK*g7$>|+wU=6@TlgDU68I0EcM%m<P%zljV(Jo0;%0E)B&NC=
zotx^vYNlxP%Ds|V<(yZY>(k`Ny-bwL8|P)J8S-q3_4#1MuxwCZD<<W0^~0pVRjeiZ
zUi@dT85hm17lLm+jr5cnS(3*Md)l%GqwhD#KiTSBU+7=oH$3o~vX#n!_$WT!<SKrP
z`_@xRPoVd)io=Auu@2GbZWNzCm9A0z>~{9n=!RktVFk)92QSlO#8!(Bc*6Hy=o{*d
zWnnVpj4c~xOI8tm`@hBzu(J<$yEuSwTnaElP+mu<(U_#LHt$c+Q(h1Zcs4xk4|h5P
zma9?mXNRO#3L6Wo=!V4mh+PajKZ(MX=hX+f8rZY`8~1>wL|wEK)&@Y9K_y3f-~ZEW
zdx2>V0cfh_Rhjw~yM_I#GSe_2@RcgtRrgQ-SP#mi*EnJ-?XwTrXcM`2*=7b*X*_vW
zv3JM+q(I{$36jE+xl1-(*!!c<BNS80eN)cj%WtK>Z%U9B&d3~xOhm!gGaZs!1X*$w
z&WBqr-T97+vjzJL>#YTE1ayxq>cjlkO&lLzO7U3)utB-P%s;)d4l}q1qf1ej2U^7^
z8`gjCU`W!t=@i1vR3HQ-pwipC;}ndg5atqmaST1?RzR34i~!HW)YZAewLt*u^gSa9
zZETpvh6R>r4!oZ)XDTy1N1TzMmFcTa_jhc6wAl-|=K~dhC^JCMg6a=zMF^oC!DPw5
z50|0fmo3{XZdx2`8Tk4OJMU{IPe?zQ{FyXl+~aJW7UTtg$S`@N)h=t!u&(^iywt3@
z_^+&g>7@!5!Lgvch=HkX){B<`23{(ojl+_4%0clxWAUzWoYJLL>3ddIru4)CM?4=?
ze7R={cyvw-Q2tGCgbE&Cs(8qAsnM~P&i>}ttbbmK+VgBsEkPLH{B4xa&;2K^-V~e;
zG0$pdY?Yy1Jh-ddo-R+(RKwbz{_9=tiZ_HZ1|#jga%FKZw=i%mj+@<D9KBPT{3j3(
z4uh%A0*opE)u_kaSO7^4oCwqrRrx&e+(CIV+>HxGjr?4`Xygib$*?yc1JF@hLe?Px
z7uFYRau>7qt9KJ8`0OW3yG!VIFN+clBUPNx(h@=!Jnc!|!e?$JgRah(^vMA2PzI*`
zSmRdys#R9NSMAvEVeM^soX^!Ud@hzH`zHmGV%Dlar<~7#n<d`j^*^S;SIYAnIVRU$
z=EePFp(EKzl`=2vI=_8EPU7!x>U%y4su8?gd!JNMmLoG{`ST2}Hq-!<4a(i5E3Y}r
z^b)oWI2&qjJ>PTZCRdL|#7*AK+>k>W0QoTu1AMu%3E;W_OEVI6xdE}?vFO;p03K;t
zEJUY{l29h)N&b7Iv{%Te5F&*q8-@yKD^9>{6nR@U$Za{9F9C1i`2Y_j`~l5!rR<{t
zOCOJ{%mKh7;GlB#?|%#Xv)|m9xBb=^C>E(V;Si@5<{!5L72`<_F0GZqn`(ZvyQwO4
zn=M1GJ|yv2$3iLY&S&l0D)f@0rUMbYl6BaprQbL^-kx<_!_2zeAo4P6Vg;@A!<VAN
zHaqxAdhNQI$@+Rs{^^n>DC4bZrMHav!c55~WT`S5Nv?$E7mPvCE<?i;oVg}w>3A6H
z3zO{r|Bt1wV2iT*z8*qCN~J+cN<g}E2nk7%lJ4%1h5<oPx>FcJx<opMc<7RnlI~DC
z24?1d$KU_`26LS`=iY0tz4lrW70X^%Q7gJ#moe@;{Z$%pvsLRSp^AG&{8A1tWRO+N
zyw_TzuigjDpd7a7&{szuMMpObi%<M_Z3Y<7w+a2UxB>HeVAzj{GzB4+`bDknS<)2d
zhTbULNVfaO*Q@lfqv}Z6o@NjZs2QVmdMg1IsD=509k}d;uxsUi9jO4MwnXAOlL;{i
z*Zaou;dHpSwWklYQNH_KD@p1iwe^l<6Q5h(`Mz%UOvmZ8=Tk>InSJym2w=&WJ8%&i
zRV}MuSR@af>)4-vSb(K%UlKKWbNV0!1U8o#C=?{PE$c09^7F{`E<5&g;1>Qr-T(62
z*zWyqAC#v8Dgs0W+{tPMBz)+Yy`s_|>$vwhEX`WdGIQ&e7#x{oy)-5tJR@E97b1P<
zRpP?hwJYv(=s?37x?g)4_IzFOuPR$SMJ8M`!Qs2&%i!BxaM~GL-A;SW#@*Yil#UGG
zBnyFqq<`>Qub`ALnOLw`P79XzJlC2JDk(dD@3O+o4dZV_eG=ubjj)r^^`nFSB#L^<
z3LwmxwWdFN*WLfOCb$E;|2KZB-~}$R$`0pZQKS>1dn*R0^b_nMu(mSF)EWc6K5)TM
zL8o#s0s%{0u4O`;Y;@MH9=e;H&5&JFLCEXQ)hq-0b@T?*Kg^uALyO^0D~q-RGRP>S
z#fZA~Ar8B8zKDsbc#g@#^bb|>C6E~eSXd?}3sSuBoGNoIMax4UplB{hfDm${1>2L1
zF5Pe63HOOG&aoqB(rCwTd?xz$NYtmlEsFI4pU%^rMy^sRQuOiBNL6*@F$TCRiub6M
zLqsX*i|C?z2v601sLAx<aP>}c$x3O<tizCq22v?_yx>5pOu?c<VKUF~=^x&h6*T7K
z)^<}&K1O(Xrt72^CzjD9bV*Qt$|xcorXiW$H>=)Mm3}34(`3lM_04Uly0yu2VRIGc
zu=b6xXq2R|ibz7E$eCe>PkSuvhO(xUmD(dJ?}i;x|0bz)x^!ZPY02k^k6_n)`f=Mv
z!;$;@dLfr(VLE1jKZ{s786!7Ry@MH$;4P7nq)8unKiZlMIE+>ND&z!&(#;qWFI!Un
zPcCN)Ndh>5+-TTqEs{K7!v1TK&iWO!bQfwwlc|ep=zu@RzlYZ9!dq<Z^0BUWp$A3F
zK!YqU3gK;j>2#A~K%dDL{=hnv$dK7En(}}iMZ8!yV%yveyZm|l1eEz4E%YBh5dQUw
zPs(d>s`D1Ah1^eW!mzu693V&&|0_WUp37_tTPwVIZFr{W_@zS#?>c)JRTXbDwXi6m
zbSct)MI-jkGi?y8E)>e@*7noJMNfepwG6Kd4KaR3I6ys+z)<6|BaIC-?U}vJh|yPM
z0-Ueo+jA5LXaj4yovo~g5}~5hW4?d;mM^9eOFv_vnr{xKE#3dq9Wa>p#G5_89yQGT
zR1+W^XJD67ma-@7YV<0xzWA<4;FTbqJ^O#e+?Q34J*H10(+tTX3gAY-N6+iI<$2M8
z=pRpZbuETrKc~t3vJg7QxEEj<IaE42wnOy-vR9=!Giwe&Io!sT*BA0i1*d`<$8Q#F
z@0nex@zV@VmX3+hu-H5h?ylB)*BW;LnIA-*rlb!^;g3{-qC($Xq#JOQ$q)deoOVjx
zL}A~_EgnEvpiG8hDH8eKeb5;+7s>l)3f+;8o<Jz_ul~RSHxubsvi`JbeEzbL_*TP2
z*GX`+CBL`+djR*ktu93M6Wox{F{`-fsfKsCp4tce{70l{W-@v!UR6+Cz<6dR)<#!<
zuKwL7`&$n~tJ$}!oFuHmIi)mmnlPsIEgIcB2W!zcBxcw10>nxmD(@;LyBB?e`d@H{
zj_k@Ji8$BFXUfuJCWE|o)yPKG6L`2LQuPz&d#WAhcBv|AbZCCD{BWwR?KJ!r&H2Zn
zj76QW8Fjc4P!ntjY5porC#g#RW??l4Hw7vzG7J!BKqx!jNc`)^MX+}uMkr|L8l+Hg
zD&VWsq|`CKM98Ieww%nctL98a!X;;wx;4*-|H%V}!H|{r#;`wsc<(E~{iKQgjG%ul
zb};EuoI7*sC1_F9-86%IT&jWt3-pzzZ4>R*kV*OPa#~m>VSx<Au)XdWPRlIAw~)gB
zrXF6YT|n>!m;rUWRoA%<tAFehoWii0HuPoC-soW0xL|`e_AlB&H>oL2x@Ou{_eJkH
znL4vuy0UKNXVvO?vpQAzq381ryT7<%X<!<P9$$4Ft0Qzq^$$~u$g1RQR8|^~TM;Q;
z5?sl%WbA3C<GMMe`dpS(%MZ5S)ZTOD709&z$SNopXEXpitz}Lc;~yS>dSMWURTIfA
z-S4a&>3zhxBbJ`P!921Xau%Q$aunQfJ=&$nAVWo+aVB?;2`|gz26+Db4;w+8FQyuh
zopZp+&uM7i(*D<P4azVvpEFT!pKy`=Ttj;JdH@YolFua84ixVK!m$g@Ia^UiC7J=R
zWC;0e@6mJWh~Fw~>+_7V?HpmiV%?SJKSVHj=Oe<1kx7I;!nmKs=2Cb)eT5l7nR_R6
z53MK;9>#%XNQ)+dQU2)2kITX3d`f3I(SyeR4>tTe+7A8c_ccUBiYo`H?|jUd;%b}a
z2zk_fh>GvNOmpE0_<SoQDeYxn!Q-5GB1zWxvEGsKntqt`9a2NAoXCq`J*Je1HA+@L
zIa5m5O}zG?jT|;D)Z--_zX?8QjI<kM%#qlNi%r1%!ZvAfAtn^CP441z-iQiPDyU(9
zJv~=bu*H;a_hss$)HzJ%b0xR?gj~q?F@!un8CRYebpouN_?foypnPUTLTe@OG}`PO
z2VQ2%1Wf$vKzPj=fw3L1S<@S>Rg}f@DU8rQ|CVZ^PBIS%WACzoB@ZR;nxiT1_iRXd
zwH@_u;!A0Ovc9x^7zREB5u-{r?FXwsty+D?7>glU-@fL_gai(TP~42SWS-mXon^X-
zq+)>n!uP*S5l8owVB&yy7(NPZ-Q$4FP*a%wAN$oDY*^wmfp!1w_y%SAfXE-;A9W{e
zpLU+?n@JTf2|ifuC>MHf>tztmYdd&_s#|B6e~55G#=fhCDimLQSTrj>%pFtFD5m_1
zQP|ihDps!jIw#smR=uQTw`pQy(a|da-}0wXWaG@!1q(5kbP@4xMa!#M<IMU`)uami
zr7X1$TlH5Dc!m`(fx*^#Ho5pAu7!jj@e)L;BX6hpe8L6Xr+3*aa@M#r-v_kVF6%(_
zG(RS2QPa+Yyc~(Lofs8X=oh#9qcq!%KDh_zJAO|eIm3dlTmEnA25Xf=wY&f>?WHuz
zq9!4oN3Uuy%{V;vXz2?lswRIL<|C&Xy3u?s(c`m>uyz<agNBjRUg4kd20*|9cs`5*
zN!?grBpt9ZfEc#NI8zVJ-_;UBBijpqbY4;;k!-DS=H0wF2-`#ky>4{DbQA&vK-9hO
z%w^zE<zF^!gir*O7}asJ?A)JeemTBy5LHnmGH2vEH^@u_*<Fg`46^-}rW-fWvaIq;
zsbsxXUD(C5H^`^=qX1sxvp6=k6#-5~Qs>D<O+T&hkve^W1j=iN<;EYUbt1oI>>Fq5
zVq(;1HCBV?ECtqkc?=UYavCw~9e;ebiRw{%*C?*-*b=kC_3Zk2r!n7=Nr$ME#+)fX
z!++93sLw8jM~$t)`VDX6`JY-3nSOZbEv)BTY}?BP=P@t5JU;$Vjs8-0D5J=w(W4}&
z?hzsNFf18e#zQQnHX+uU#u#-}i-;&?$L`4pkiDaKD4Xk}#hr8ze+|A0OnYGcIX>uL
zNStC4@V#pn7)BMZC%3#HHOKcyFHV$wc_Tw$anLdzB^UNT-ey<?^mqGTVopPtGpP9y
zAIKw{sR8K6Gkk`Zk7U5XxZ}@5fbq0*&;l0`TZV~nEZonx>%r%-=7d4^LGH91j&i@O
z7)EIN&8XkQn&&U#?+@&h6x1YlXiv<rWUgyCnQ7}58WRjAbQ&$KW-T}sQ#&h4eXPiS
zY|0EK$TxM)vFa5i5kw1>{5Rp?TYhC#{M9G8yL0u@<{=0EIdm>WTM8*vcP?fN6<p%n
zbQDd6)|Y*y3sKODt{tthH@n_EHuHEqAL34I_2LK4+X?eAoy649Vq{Ig^qv8Vz5yto
zMZ?hZi@JmVQ)<xq)$!zqt`%Qho%z*%#+{r&z|B<}P`QpzEc~H~E0}wip5$TUDCHj!
z(fO4Bv<kdwh^c>WhFzW$&aEVU5ujjJv`vRBTF-1PQG9~tao8Ad4d&YH_y-rObbDBF
z(9av|y@d@gI5Bdho63x>f6Nv~?#;|NvqR@2S<Qu=W06Iu3&vgcDI&V1=2MfsU8Qbr
z@A8xOEXVqHe}&)0=1w_25-nlr$ApBMzyJEyMsq29@GVU}KZ#Y-TU9AbCXKqRbn_sz
zy`BhnM66k16oOUkC<gN98TSN7v84n=xbV>-p-f0UB1_<X>uav|Q!BDzWA>2z#HcP=
zX~k286<+CLxvjGX(&^-xpx;-~d{0|6!_SuC1i#$Joa=bnItjlaVjQV7&Mf-SnRcBB
z$($qz-D7n3S(iK*uMv2kOK;@AxL%1TnpX;khNetjF!r2f>SLkJIC5=+dWM}PE%4xV
z=-?EC7z9b+mBboOk9{&a;LA4<O}38;?*<e1@O`019VDV&);LU(<~~w@vCqul;Iw9;
znJ-&e8rDh3{SKsC(ECZ+@B<)BA9zqG^Z>;rvO{x(+yLxvcAX=w;fH5VbElZ|Dz8_m
zR4;eP6WpJSbj32%99%JQ_^ry`lB=DF7{wOEVK>;s5Sq95o{Id}9-c_BH6s_3TC(`F
zSmjSjC#xqJ_U+!R15vz)8&V^a`~%|2$TRt>;m^Tz4O6ax@*|fVtUGs~@0r?fWyTNu
zU3PmOM5fq|7PS>fuVwUUrqk;oZj^D#wiXrbHal;*JWS24NFg-_NdQ01Fod=Rg#+iE
zN&@J(fleP_?hoKu(vQ*~SoVgN-i?<Xoa|*c{?mDOaxxAEbaQ%`W%Mc^?V2XK55pdz
z70@HVG*THBfp%`ourYh|Z`Y~Jc{E_=;vZ-D$U9Ls|4!X4P@b%{QMS|^XJ?fDQYNbr
z(<19yD}ujMH|lm=>R;3qiw3&7usahs0D|4_muRZPc>8Hf@A`5b>_!JTc4<bb&x&Q?
zbg*|;VrV9QV0urm-pci;cuSRe{nHJZiIljT)>x}nx7KS*gzrq#%M;9Q$};C1H^#P7
z#r;IJOPg$0IyvE+n=}i!s;Z^>&R8FUScGWM6@lN7lxmOETl=wvFC?d2HKh4Gb*ies
z07^L@l*%zTH%~9IZlEQp(W%v1r>l?VF^uSaG1k#b8~;Qaz}Tz*wUUQ*5dYnNRP#sw
zsDlcvzf6I_@xS^cYNmcUx{4GVQmxsaG>eq7%>AW9mu_>9Pb0wFp?v>-_s}5&qHEVl
zNW+o~>TWx6iiA!=Xnk>U<Myrxi)1`t3Ebg%uqk?Bxt}(<5*Mhr)@Bg8@imMblu2KF
zcljl>Eqt#ExHFId;772=Td493PB_zF1PQZCMW;V#^R)9~-;F;Hkk6K|B0DzMrSH3w
z;{H%uKOMBL%B&P$#1E@~mi2sfAwFZ>n*4LSV;W_8<CdsuFpun};bm*Nu%G9#hjth4
zfCBzIlIHp#oT`uXvnyW)VakBCx`&bUz#xqICBpCes}we6`a3Im1Ld?h;-mi*Dqj^9
z9g=c%DwFye@xD$M4KE!NQI9^A7;=PFt}O%>`18$kA0*9vTm8n{=$RbD<@>wihy4HW
zA$odz@P`-<ei-L1%?4tp;abzKPyTfL@hO6cT=a%rM}g2Vu;wvi6On9Xc82ya488Pj
zF6F9-`e3r6iwdCaS>+7?VpB6EVxgh%4X?KDufQfMzI2icG<qFJ(2|2rSU_XMD=NkB
zI-3E|xIsqQ^TKHQEoouLDe!5-h&)AtNziO;RoH#h=V9J2k8A5mO2gxj%|Dw&H6&?V
z6|8=@WB!TzsFDDvG&VAM5EZ%Krc$mxqao6z<zFwTDX>V7@^NGnc!*JG!jpF_|Kv41
z-O;L`UmE^Z1~0EOu~ftTeO!iUfXD~UOZVS%EmXNb1n%9{TVvVV{da;Am&|sANiyik
zRu(xpr{bea`;x2p=IjR+NEp~Er5Z-x+l7n<E9saOnwQS0v1km_??4ttU<pe8;fP!G
z2&l0Z1P5R0u^(O$00gEk<F!ofN4EAaYj?dtd%2E`jHX0c$A02ee5Y_N!F9}GSP?!P
zY!6@9ZH?ar#_EtUD;JeJb6|1*GYg4xC<)L0+fJe|cJTnOROFeyJu$Z^bXN})%Zcle
z`~<J{+s`0Vo4ex#$-ghhfM~g}o(15amq3GDn4J`eFF1!tUU=pAZ}l+%b~nH~i8fPX
zhlE(WZi_JgN2I&m@co+t7@-?<*#BR}X}*fDADwBJPtE!Ec5H%A*5`FDZN<7*BgZ^L
zN1vb`u^CPGlF6s}?5k&Xx4d0Pp_pN-2~{$laUX{PMqa57wxXBOj)o@a!IwX)Q`{&;
zY=*n0>}5sK*Q?^oYRA+EDUA%Bsr8*({QN6PxL6>5EO$KHlw>I7Kv!h<L=~%4o@nV+
zb&PZLG0-YghHi{sO`yzGzrsnvHAC!}xsmHxu0LOmJA{aIM{kNFMG>m|(!<Cw^i2iG
zSpa*HDTEf}yv6JqiGIA^2mRyG-Kxuvzc)AQqqWND$!xdZ=~^QZ<2ufhPoQ-?Mq_=x
zCmxRVx?jTy2x^Z{0~b0}yDx{S5<j2gX9s{7SD^v-SBKeOQD8*rB!KpxuwOvy0hYN?
zb5+EsPsXStfZBwan=fcmBEBtn=Cq<Kr<&-+*P4eJoKpB+wqfD4X$gjLdyCURR7;hL
zP<FMKM(78!aIx7)=odco`@awPAYaK^RGzi<Y;yc63z3|<5hu=^gF3fUuGfL-3!utj
zXZd;MjY0GuDmaIGnU5Uh4@jFr!vfF#zOAj^$Fyi~{!VFduYl~_^oL#?qcnR<Y7))U
zJZGao$M<AVVj9zwxYN~QV0+&FaIS_#r}M9nDX|pt6f+MMaXojwe+ilafuLAPlh}BN
zE+BNGF@i_11}&*k>_khN(-DvzbKt}Paulv5HHfAZ(A8NEwS^9t6kS+Y*ha6<+=X4j
zQOJh?M#hUa5d6v2OM489CmS<Xd0}b^9jD!rdS_^bg!7<$&zS99o;?81le|Ya&Ww`v
z4{=(*p8x)=wY-B7?%r`2<j&QX1-UNpe2RKiP!cjt^u^gdun#QqxOnJ$x%P?h^H1OX
zVvZ)@3s*TxY9%F`+AUEMKNbac$Rerdh__fXd@?Oq0)(Pk;_KAZM8QSQJ9!zzIv5xh
zx8bJ?&Ceto2`}Zk`yE~{_>MQ5c9^3wX_Mq6o%&aX>MmFg{APc(N~M;|n^@rnJt``4
z>0a1>_Mt!|G$-bJX3q)JZt)C9YD4KXGG*3MhWLo~#<Tj<2y3g=m2D;S(Nbgg3&|Fz
zw1>c&3-Q*C4i#XkuEReXSrgMWMSXEPq9Jj4>Qy=w4KsX>gV9PvYX{hiL;OW+9NC}$
zqm`ZVoJ}N+QNVBSQ$&I#Xv13SOTb1C#un;v#1wDPMmTOCUj;RK`OR(%?M^nk#J5R(
zK><p2JV2NmZ4id3#U|AOYG^Jf8+d78`F(>h53)52z&GO13r=PI0M(CE^JbFY9@m?v
z9yV&lNzJZ`OO_P7IuyL~eft(hB>x>a?2)X_w`PZ$aMp%@`$BB0#-z)d0jGAvDH;A8
z^TPZ5QJKp4rj$nT-&}pO-cs<5Qi^1CVrW@cnmdyZ4ZIv>Dvu63e>#I)K6I+d!oj)=
z$!8;cY2K#D^5CqDe*56IPezyfCvr@@n@;bwSe7Bh#!}VS7)|G?+g&2Fg{ipL9h!dQ
z0p@T*yJK$#Lufi5|Lm8!8|E@1*M93mb&f~nhgn|b>sD{Oo0wkVG{wBmRM2&s9wNP$
zs;t=|TBqpH0^I3CQu0xy(LB!k=HE}9qkBgc(EZD#J?2-9Kl&21VO|}qe{@e8#s2qc
zU<4NaE<p%xwW8jP&X~7tM$x8BF)JrLS$AOT4QOh3vOn^c9<RqTEE5-9iy}WW)A+5C
z_Tz#s4gveAe2rUsJK)f4;??^B-PMlfTak6yH^xtY)CqBv(%P+h9N0T1$(Iv-4t{bP
zUTx(W_)1Al_fs_{U6+SFYeDWw`F5;kwk>brcIk4{a?+fg=n;6rXR^#J;uUj+S+DPP
z>?Fg&=Y+8wub$FWrtY*CpfE0w%WEhdp;NbaZv5fI%RwI_VKJ(eZAMum^0zx;e*@tQ
zf|Bh>ZyV<_Ro?XB&V!|#d~HgfP;DqXo%>{^TRZh7ez6AIzPilJVzYtF<&2ioT*{^b
zBKqy&Vl?8J@!D>tN%w(7(Z|F?a_OTxd>i;Q1LdksO@H#Jbt=FUTv3-d+3WTTu!mlX
zNhAA|U%HJuc5LKij?Y@WropB{4a%u?(-jaKWnf|IxrU>O`;gYvmq5@$Eo##8BISXz
zfj!^9=)az-T(}76=ARUXRkV$0V^Z>zu=8=?x2Gef_;@C0H|~v@7YC0gnaw-Nl-6)D
z-V_atA$HLr%s;;F#hN2tbZtIz?{=iHn|j5Sp}YO$%H7Rm*Z5uQoX8n#XCroH_{>x<
zM_CS|<6Mz81^+Bz+f$;p@Z{EGdUMWE%?yr_25p;|X1DUVqjRMwr|<7tRb1^Q8(p{K
zC|<IpM$xF=%?G@CenS%gHG5HnFZ~#u*if-OW#F{C)Tkk2E7|2<>fGZ0z(~8muR=>v
z_59Qd&bz7CLCZ=MKAcRyV(KoYwA$@_U;nL_cJaRL8|yP}!67GOI+`2~dVUK%sY3q$
zT<JA-yGsAe8c7)1NaF8`zZ6NskW5FX@_i3(JK$pafm6?8-d{ybab#b(x(Lg=a-#Rk
zMS+o>LkHvDnlru%i-Lrh!N)%}Yx3xDdG_sR7<a||#R;+FR`Q1CLABcGRi_=OZwq3|
z;bt7>yP_qP`tY*zu|-c@9S`h$kTzA+>`@O)YGh;u6!m6`(HM&UMn|AM3}-OyK_)Na
za7`1)5vHes2434(pxuXmWARAl#1yz4V|=x#_k{4~Bhk=RS83{G9Lu=a7`t?B{Vw-6
zh}iW48m9ANX#B-=eB`}dns~+SGv!qnS8+jwobINV40S|-&1>Rn_<YWbcA-Oy!J65P
zV=0^A&%W^I<%eSL^Y%y{dB-S~_^Fcu%tUUw=MPr6VXOFm&l;#)27{Tjvsl{}(eMP<
zx!$F^-ne@3P1b~O7eaY8!b@sS^FJ!HmU!({U7T0x7-RzDkCcsvmGU&_*9H@`-w(f$
znhuGnbS25ab<GO0);*4RRx+kmXYUk9<jlf8t~`+XOJj6|@Kr!}FgiV9Jo`ncUEv^g
z<I^?AH8qn9@n1fUdCH@%qDK&!lhcYiSTOnnj@eCrcC;dTs0M8RT>a7@^n{NskF;K?
zL3m`}aPp6My1+jJJ39AI|M4UP`=Lvh?zH(bQz7Y=Pr^vB)Kjz2{?544fmaSX=nb=d
zdU_C}Ar0N=reseENFQV(KYT!O8Z=S{l<fj7Yk|%ier70eTpFt<jnd@I1%M7EOBhqh
zN$^<J6AkwEh);5B94ohCSv}FiOhfK}@75Lwe4&q<9j+!+R7CBRRc<9<Q^NlaRkRI@
zrm9Jq!TI!(oX$}9wFY~5xlb)Cq$n^Wc$UcZ?cZ<dv2JnR#i!&^!8~Kh>C6utYJ0DH
zGkgkHv1$pw=0i(obM5aQ7hYs${3<PJ++JPDy%ftQ-6$2U#fFOxdQ{^$CFnJcNq3iz
zj~{qMHDB0E&`!4>-bj;0@TPT6pC@f54^yEO!Jqp4bO&og7a(%!dS|L}Y5CJ|Ry#lS
zV-<CJ)!!G#m0$EsD$Ne>ZEym&-n+H`O*7nB{DuEc&v3~7N?^$=chq{*zU&x)>!AUQ
zD7djk68Lz{%e?&1WIyv5gpwp3BFAvto!PUyAoU*1K2}eE^-m~MH3HF`AWjC3Z%iPx
zTJJ76!wN68a7pOZ|MshcNlw^Le-BaLOE~tWKFFZ`wj-vlsX_OZJ{wz@Tp*sOcRp%{
zk9u%{-C4js0Dx?e3gB)s${?X60BN|T30nK<_#~|V)Oh8gGmCb!@wG@rc5tFwoYQ}9
z8ASUks^%|4RfNe#rGca5_4=p8`r0pnX8ywg`$Ri_qPB#(KvCwj<tmZJs*g3?h9k5L
z24y-u#7ukjM%`1HSC-t&6Xm4T+*)5EENIuW1g%~A_pG0@Vf?bF+?1^02c5k8x@2Pw
z{<f4N^Z+Iig4UGWHf_TARv69nCG%_P-st1;RVFtQ?1HsokFcgce$ip7yb)dW6|3BP
zO*`^IpDWGKaOO{^$en4Y9_8y)WntHVe2(((OwMJDl@7HR8h$As9kCQj&3#*<ycBgc
z`m^-N9-F6dXTu6c-`;)bDqMUi*upMeUPL0ppeu$fKgJH|mY<$xBVi3L&x=q3A4}A$
zT>*PUWd}l22=`(nYV7;1xI&C3v}6_{G+r>V;Nabyt&jaJlIYK3oI6fdY+S{U3E=<#
zUB9(2V16N|qW_Y#Kh0X+KyJ`o_T~PP0*SN`<*9;sG%iL;&n3%ChESgMADvfDk{(ad
z4Gv4vBzRY9KZI9dmqhAeD#+?$jj9_XEOUI^-@}4RF1P;4za@-L1mw?vIlN{8I9q`I
zNz^E=qN%=@LJStryjr)4k5BkxvtS1knv^>o(XKH(ivhR%m+;reY%c1@f4~1LlPdQg
zIkyJy2KmPq<?i)<1!WL@sImFz<f3z#N2jqY+|n!ZHND&ET)_d8GCz91%<KagqsMKC
z879D>kvUgi=977|?2^~o59@Bu&j9Kkbq57ekx~7gm6c0LO|ob7EkksyrTGu5Ckm}J
z=c%}3H%A%2T0a@r6Ke3(ZAj~scDoSOjp(9GbX@Q@x>EgY<|)>+BlBNq{->XI=`&B;
z2{laCzRyd$K4t@)Ld$(JJAFIF=qlmOfz6q0rpd+icM)TNJcRN9XqXCp@dv(M;eRnJ
zm;-JVY1F1wSZR`@t;nNU=5-rtH63#$2QPF+AcAC34;QDGu4slg`ZNT9qQ)V2t3y%W
zy-~)-#w$A`Q=5J=SB`t=;Vl#@=J`?KTrbRfiN1%lsE@8LY4Af>Ce9+J$6-=)+gUFJ
z{kX9BS}&){06~98LUTY_X!7dv0(ky;>}3tef8P7=y&ZE?X@k^bc&l!i>cIi?&R{s2
zb>a4O^o*P)W3y{%=Vk}qbn;iKIE_7N(2*ViQ?wUhaSY#<3w#b-(%&vze)(8!7erk&
zbEh{@xW#im=2&dq9XU#Pc!~QtvPH#frjE}r=fC<$UE}KQxKAi5#n95P8ikX@uhI6i
z8;?N@m{}RqZuy#O4x*R6Mwk8~3^-z=PVLW<c6rCzK99Pk1Iw=nR_l=jDW$*1?IxAl
zM?EHLKRqES|J*T&+qLr}n3u>Slvr4IVm$}GYaGX#zbMSvaQYSaHW!@+{f|ypCfWk(
zoc|?1R(QQ=vf*b&(A;jt*c-q1^M3F0nBWruHIn25cO=4E9Dad=KN3cH(|=?l$Q8L8
zvaFM%P7+lmj^ACZ76Mx97mEph5rVa$PuB^*XA=vib7Afu`lg=q=^Q4J8-ATY_YOe%
zuKB1v`v}moj#%a0lOedZL@=1UPA*_vIN9f4?+ar3KT9(}FA09;M+}ZfY(8lfLvy06
zviV3Y@*J-Fx0_8nNBy8#UWTtsoBq5LO$%nZGU(iBB)8&XBjyN=sLD3e`8!vt675$+
zmMj-^XEXHaKhtO#H28SMQ<UDdM5%hJC{)u5^ylsgSN^JtBrH9z?wo5>L1;UW@mRTb
zMKtP6v&6EL(%3e)Dk;|#IL}0!>(JT?z*!Ll1k-1liceMZv0qf?;eZyAR9dL@aTwg~
z&+l{SJ~7a0(wLKTX%&282B%ybvxsUu*WG?rEPfD9z-d_gf?UScm`hMnK+p!)GV?#?
zMl}=;ZQ>|@9aPnxbynn%CA}>9rO#bV<?Pc&%Tcw&-S2{M1BvNxA9Ni?^&D_7ZtQ$q
zy@Yn{lI(kek88#hC16U>cYk(Xuksf<=*dw(DgWSJcJz;QK<gWx<c0hqZJg~D#WH!g
zD2#8n6a0Xhav5dpv1_C7s684FmdStm&u(HtinIRDV>o0`FS`59u;==x2fjq<8!BnE
zYZ*GFhopo=YKepCZ|gR(H>puFdOM(%<=;Q?QTIe@CY`uv7!Uoj_LJ+cybgXAo}tBP
zDRDXH=`_LRIAC;a{k*F_UkNLPs%o;zb~-q*II%bGU4})T_Pl`VA@UhHtKN1MQMk~%
z5~c*Rr=LGoJXV>}QKoTac5v9Q6KG%)D$#yiWaDM6=)|Er@p<`9adoB=@oJq6k`C1O
z)@831@;VOlGI2p^%!fx?I2Ayx@A%Aa{hA3e&&I)icgox1Y@*3co{8H)|Ju6LQaX!Y
zQPx9O_MJ+>Gg)CLKJ01k@9$6OpUw~3MeH!=p4d$gY7jV99eq;28MiT#63lL#QDWnE
zaw>tz8M}_D2j&K_rP|rpGN(?=3SzbS*NOBCJZ3qM!4B^eeG3veJ@=D(A=S6aJUM|a
z5H6n|XRf!EAEX()zD@s^o}Qk+{m07{?-F1fOunAh*|zc9#MF}-J>LpR^W?<AGtG3#
zRjD%mZXj)OJR<LQSpO!`V%aReaMVl?EZB0`ZP3^9TUk$YnXTxN88s<KU%G;9x|PMg
z{<U$rukEhVa^_6hvmX@iCdziD)7#&MDoNrTA!SA{0gpgjGsF>t#a*v=7QvlO*epbX
z6FIip(_h3S7>GH{UnWF$TA~T$p>8z*kE;V<ZtyS>DicD3iE-g7(}(E$M6gMHEW$A?
zewJaM5HdJ~QECxZJ{JB<ZQAqT04J;k<obckfHJBTD3j*bZlohe-mXI!&_+s6^6Sat
znVn^VZU{vw*=rBKPzc^ej9PK3zr0wGS1a-<hGmK;Puf4cy-={zEyp(Nmz`(PsUCfS
ztsor!8V9?SsCyz^@jXA(g?3WDi;r($E;9RwpG+z9v;w71`pGTRD#0$L*hJHlayG*n
z+xqHk;DgU|tVnVJqY7}7Q3j?W{~N+^w<v#UvT*y_D|dgB`?6|c$6lSL$7<R63wFJl
zO?gJpfRhoQhVS;Tu7&(1)H#U1xhFd@>%p`Rp{j{DqN%9Cb?S@PpByfqN|emuk1AHY
zm}Tew?aWU<Lq$`Z>?oR!!f4}X)brG4e2HxSVZJbz2_Dh=AhZu`;__|`@B&W{u6V|Y
z#<XGL*WO*^jAjJ9q=_H}LB!G_g9LIa79~oVyTCHG)L;)MYuLajP@AO(VV=i`3>GJd
zCb)t8)ix+|i-9pe-;lOYWMD#A!0ssdhO_{OYr$)j&5NyLpuY|^#gJ&zp)tCZ^QMHK
z$)7E~CjZU%5*!^pT{<v6`R4KDVkmy<FN%Ftn3j1Ylat!_8d}0e3sZita%%I*h>aOG
zwph-bSLL3J(&8G4iBG-esM6-;C7&LTFgEN8;S>^m@T8+_P=1=Ew()`@P0=CF-a!Yi
zk`?oatY#`1HH}x$R&?>DL`#48mE(0xGQZ0~8^+U0o+Y-G3zaulPCUs0{d3H&KJsM@
z{nhW2oIy^##w8^)Y>q{ZOeoCy#9h&sz~~+60l7A(cky3$W4%#Vu0L$dHpTggmm%7|
zJ9%&WLf(BG<@{DV6iPn;hg@%3`}YnZ=n7SUx)@TJFe=1^5VYyg!$gfa<6bh?*LN#{
z^t*>XMJ6ty){!H?VRWZMcK+r5M@nI8ul6OhgUCcZsPDS<UTX`(V)T#yHv0C8CL*lV
zK(gJL3BcGFv3uapN{jch_x|J?vz=mL)KU=ayuVNVu?|O-QHjJe)C={|Cw0qy-|&~-
zeHc~XjL-4m#Jl|OzqqSNn{4Zsy}|5iCxJ5`f1!l3@pZlCZ+}BlMcN4V%_>D`_A1yw
z3wHIv6lzlX3UAr$(xVHGs;Z?@-eDVk@bwv|z%%qrh*4NNh2Elco?>%8%jQOA+Apgt
zQ+2-Mog4W{4bF)oVs6zq%g<npQf&2kms`V*-)47>=d@WnPN{?B{^&?W3BFP(g<|{u
zo+hZ?{lYbT`(dj@&GA|)x*HrXpv^_KTk6_3Ct7UtJP!`J9U@5x{tiQK%?)AG8W=bd
z^l&~B)-e7$;~Qp~6joaF?Zf@)Rb7f!h@|`#0Pen<H6MP#2GsNPuZy3x?NCulc?w}h
zD!wJg9-b1TSl)+15}XQswn|RKUio14<SH{VvAy;L>|-e*301&L3=pq*_jr(vJg5g9
za~C}i@dfN%TwS?k@Op(+DDl^>=Z*Y6?Dm;_da9sD{BwB^Yey)daD>}3ss<nH1Y6;!
zMPoVGIs`kOT-3D8Bnx%$<(nJM^0);NakB|nJAKn!)Nmk3!`aVj)Yr>EK`9k1uRPTG
z)@E2f<XZ*D`H$CWX+WMNQbz$#_k$z-X+XWIQfbJ}9jA~!Ri4GkrwRP<l$?MqKc_U8
zBL@x<Cak3?6KdO%lBFxv#SHV@9)e|YYYLMRJg=z_Z92PiA=N2wlK8AyE<&c)jRnr8
zvjNA{jos7Lh&%>x<aVSE3=GQO2Zfyrh3R<#`&#<C#bu<~+!%SNYw3e4Xx*&6-dKkQ
zV6WNj$KAw%i@5`frR2pyDt-Lw{q#9*SXW^gXK5%+53QmosMGRW=K62z*v!r=gE}*!
zuV=aL-T3R`pn@IiTL@EFDs5S9K-5y>x526lZ5EgEtWFpgVfP4^-rD$k%M_+T<rujF
zsIZSIHrz#TV*IHy*Ii`FZy&By7u~@@D)OJ2Ms`IGdb1}^mPG^Ui$;Ck(ucRh$jLtr
z9Ta{(<J8G^Su82eeDY5cF#fx10nr$YKe;K5H=G7HJ)}vb3}Myx_Lp@(vD-BCvR#Y$
zS*={F-IT|oU{uq%EQ|Pa;fwFcoSX~(cI=y!`|2mvTfPMhv+$n5-(vg*|J~0ATkWaf
zdQrc@pJ+SsVz_cDb1ZctsX#f~g32owyXKo6PY)1U##I{^u=`C?F}ICWjAoRJ?!8ub
z<ox$kOpL)lnpN|aXA(c_RSIUoKKFG842f(=sL9v6!b-+tzM{L>+RJHWhNte~d$M8M
zTlOy#DBpk|P6Y1}ms9YUaxyZmN12CW*#GXSVkt3`rp#qZIz$FbM<1{Sz#Y9NtsWRt
zP%!k;>$L_xXwl9={<~^<&EKF+vBvV>Pf9ttbX;YU-P1HnI`iL@wU6w0{!!rG^55If
zs?;s*c!BA&w@o^p#KOCGI={v%E6H>qBE+^wb6uJ58s`LS)ELyfz|t5GqocdTIlaU!
zQHf1*cI-p5e5DXofUVnvT)E_Ha4L<|ulP(VgU>WN_#($n3#p@{onq&wS3#er`+cT;
zK}PeUtKK71Iu)RT5g)jpAS{nY&Fjw$(&5Zv4yKl)b6n%t9nbhoz$Si3NVyC8xfy(P
zE2z5sGz&QVt@-;DhJ2R5g@yq&=EQt1|G9!X4$Eh$o_M^K(~7p(u<=2^dnj$O!(I6_
z*c|#kA4QsHER}8Rnff8mxW#SP0__JN+l>1igE2s@#T=EO@b97;+PqT@GVT-R0ubUx
z=rs~HoLvg6Ep!mh2<h+T<v!S59<X$yLa=h|TzGG16faP9s&roaxL(%fcWG3~w8oRZ
zDP0L>pE33dkt2yh;*)e&>{=%<Nt)G9(T=>IExaulPF!BX;s^M&w6~_aQT6R3j$X|t
z2U)zYJ$bx{DF&3MeerP04?PF3+@;Da0I6QwP^ZUJ2RNEz`9bsP2BB<6{4lME7PXKz
zP9ITgy1{gE*VX3dUM<toH!!+3X9F0}RZ@}u7RFFeBbc`8uWS_cckdK;)?axKSoItL
z1{l`!ZNC8qkD!De_uyExy^?|Cp-tE-)pNeDPV#W_ozoose|2XnVD8VGPUb7#h1f@K
zz?LYmEB?!R@~e2mM)CNYM<Tr37)gq6#VPU(0JZCyw)Snl`p5*)m~&P$#25t58<9&u
z3%YwXK7H{BI0vJK2HX*Y?mgzfwK?ydi#jpr4)t)_u%dQ{9(|g02E(8E`TA0ze;VSn
z4zM_eA|=RMYeg1wzr+%`1Egcly~a8VZIo~Ny=2C|AMI`33;bfK{|o!+c5Fjxp3GDt
zGo&{$f`jq0>hv%Fir|u`lqXQvJ=Xa-{b9fg|KtMppY7cwbj-FhfgaNZ`#oDq-VbAj
zkb8Hqd$2qHf32lJkZ=ergfvu24+pcF8Zl~N9f?HF{TZv!Z#lA_3J$%3Nhdo4GRq)N
zph~`_Sit90dreXTxHJP+ok^6wTo8?sE8~s*DSt)cN8wnQ7GQc6HEz>`h(F!H%rVZB
zK(4fhi07e9drzYfz}g6bISPV80dT*QKIon-4W*!C{rkum^b^&B0Jji^T6kob^Fr;@
zAPL~lI*L0;C7SUNA(=o>_Vax2I|iLQ*~#b&UIFD6#MP`;f5_Uy#nwQq2Z8|#>cId7
zBrZnal6aXl=hqLMAZti=TqdMdGF34LOfDiLrjJ}MGoKM$HZJfbhhF%$)^exJbL;5#
z=Y0O6Cw|LAODTIz{p-E@tiNtPUc3ZbJyeJ%#I|L4UKRWeS;wF3<MZ6o;$Exn@uBpO
zWTEPxpSSQCMfWs;?#W+APkPpn><b{^!|3+o=ToMV*lu1U-rDyjE$z(=*^Tr1EVC`0
zw;41<Oa^7V&Q5=4Ki*&Qg%giHI_!!!G=`;j!0h5mkE+gZ_8~IM?)!eH{YTMSg~d2f
z!n4&$;%XFW-4(CAdf4=15UpkK5^2G6ZzD6RtWY%OWMH@-y1t(Gf4Exjls1ES97Q;z
z9qf@nDSK;I7v0C)<1}=s`4k^t^zp~c6v4L=K{|JuRnQqu@2F%4ihY;iH9V6W6il&q
z7oN#$UWy*-vAuH6lk`I`aRv{gkCOnj-ls!rY@nO{XH)=d1pgA56L2iBjJ<B7$sJq5
zJ9CM<$^(V0+9cn<ov_wz=^EkuDS#Bnl@sRB;D2pw;%wZPbM8QHyO;9;_6A+UwL48l
z*2cJ=zxWxC6FwmDn*_7L5F?uj*>bm`o98I3eDhnLBDCy6ry<Txy0)h2ZS}0WPa4~@
zsdR*b1;mNaVJkb7;n&<79aV#2`v<w20R4GG+PYc6_^$i9qu|Uv$nx?hN{Gkp`_tQX
z5=}s+clM#<ySM$0aIA(3`0*=B+F-tW|HQ7ZDWz#?!e2S7X8~+G6+m|(Kz9ewVQ;QI
zyRkhFvo`%~oy$GwmuHP}R$>1fYh!OBm+1-P@$2nT$jeV%7<Z9E?=RW>(3S~=VWN1U
z=eAn0C%`@_=q}X&Si%(fIG7Ik@{#&x_$;4#34q*RIf~VLZY{rlv>O=ny7VutC9P9z
z8E^5A%1W<bqmcD88mE0>RqzSXZ)KWnJY7jIp_QwU7u&BrKWXPk{AwDYOn>Nr68rd!
zQ$}n*lAEqmnVO~ftKlty)cyu^E$>GjNHnVk=qpzG+ESgyD3Q7}HL@jFb{zhQo9#5u
z{PdGv>uc+;0BdH;XJ-5b(nr~WN^_dO>4k2ERS#<J$h>%onQFPq8V?_G0qbu7<Y*Lq
zW^x#oT=ckNDB1-4xO&i*CpztnuaoZeFPx+4Lr6QFcQM2zQ)l@KISu0&Dx>R_AUGCy
z{@>RR*}Bosafi$DKLr_Vb2xZx55}8B9;kAVXoDZ0#TU|o)}CLSLnOWrTpus=k4e>{
zqJw?_=0z4V=tb-#+8_{KA3mr}S1+PvMkCB<3jY_My3v#aYX>ZG_Rv|(GMG2?Ue#9Z
znPe}yGq=(9wOJ~$(CANcVYU+~nYW3Mv6v1SnSU5DDe*^&)~lTMC4yXiHogIj#kzBs
zzgO!v;bN=$GlP%Q1e^+|8mw|&2DMvW-<^~U9W?TNfPLsvo&g~r0t2XTDYhIhsQ;M4
zNPjdWtsex7@WJ;(&(^}xjmi`(f)a*^raw6)58LhovLV8ps4>&UrT8JNS_je4i)N5v
zVbtA2s=<3Y38a?psk_fTV73FWx*>k9+fMDFQF8xkhu`^3iP}c58K@e!9z#|Ws|WAE
zU+`bcg8fc+d<C=<XtG+iKLy48<#D;EktWEmTt$UDh<7{iZcvV220j6di2q~<IfQ2b
zz4zVrF6GczPi=n0c)5*Lfs*vkUkgO$9v}R><6rn!;b@#AD&K<z!z&3sfJa^5TT>{S
ziU}{*8x$Kk>g9j3L$uN3KQ2vSwiyGs=9i{DBa;(<eDrw?B9mYb$k(TP5khD?FiJot
zSPXfpncFA&tHnzYuau$m<46l7`K)sK{K6~^3HwHS<tXoO0skHw(z4B?A|*4?qtr)(
z>)$8}u7BQf*n?dKx63}cL~d0Q#otbFv0EG#2I3v%`Q5JXAYc#Z#t)9`*S7{F7$`{5
z^96#=&cmwjV%cS-=nBy87EqJ(#xPAF)zMxVaB{1p@4Hn1E|x0w7wZ4pO*cpEvF|rG
zH;ZLZL!F}bC-Vq6luV=V<^)mOuAoB}(3}~lkUGdU=s=9G9b<z?{szV;24d_0ybEH_
zKTRov1QuD95-@(Rtz~opJ^x*n!LBJ6erjQYJ{jsn@H>Vt+rMD-_(-9tCU>yo5^1z|
zGk_^JIIq%IGxOIq6jk`;@h=Ve97FA@*-BS9|9p)zQN5-r=`A7z&cidgqZ<1wq(Ydj
z=c=TkEn)kk;xiA0NXTnt#E3>pQF*;e-y5>ebcz#ks1YA^X@|zHd6A_(i%13J@AWgD
z0X|_RGmH2K!{b)GU2EP=!&`&vW{$;&<`h+9{Nsz-yR~;6nx;aPKM<!gfnYDN$%>2V
z`#n+V)5;6sZ)?9Z{A^I`ZN6Y^#Ib*f3qmj~6$Y+FB&vdG(fFqNt}g5&Q^E76D!Q-_
zd8kB6<bG&3^HrT{Ma-jgrKl~E(Cw*EF&?WZ=e1ws^dL$e6FlYoOGi-Wb%3>XL>kZQ
zPVyWg%xd2AK1T!8`YZIYPGC4n#Id<!;F<H7w(DN4c+LZ81x{4Arfa@9CBw3OV_!AC
z4|kzcbNQF&#^XQp>m>$$YC}BBXGqDa*wXh8#-LfjrS~e|L;Lt$g~fYSHdDbAwHyuS
zuLSMCr|MRDynE!URfax6xjT?-;4MEsy;*_oV-JOjd|G5NmpQEzLeLy{pW$;H<FBn;
zp>uZI0F1YOjcKkyX|t($I>F+9;1wv_;N0MjdTmWdtYz_p(aW=ZVduQ^#9w8$uP!<W
z^uSg|A1R!bSM{9$&56etYexvJTx?!oN2EIRnBV95b9RUn4Q8mls2fIs1BfcUXqu6a
z1_Os5htQYlktC8q^d4IMWJ6^b080Ryr(Vwm6R>*dL7lDQ-TQL;Hq1R<Z7esS4KOkG
z9oms)hsu#qMs7lH!E^ZEbH`ijd=d9f6V!?(fy6MM6xhbaV}PderD+f1TR@$V`>j(7
z9Hk6xg$eAFZ?YWF3f48hxEj9IcSYurG2@tAN*o_P$3n<+|Koo)`)uH=bc8#9FgP2}
zn{}mQH0v#P!02ILS>>z5>|9e8zC?!Y%HTG|W|Bn*Sbq6%`usG15DP)U^9S~;gHP0|
zTviAcrD2{l5#{@Z!vTCNqwhX^u1pRC+ZUJ7b6B;=<_R)43Tx%(0lMo~!7TmN$7ilB
z@?}UOj3fBOxU0jy{JUH2btZBx%o6D6e>?ZC&_YLWcn(AT+!#dBka=EazH~4+YhUNA
zp&>7|ftheo237+HV?R91gL}>*D1UnTFW$rN+(Mt+0u2vfcYKT8^-eXs5ypaqmWY4+
z<vKkw!u^zM7+@FGY`_KOi+i)NWDaP^Jd(A_uV)I=!rUl0Fb{>ZLa_^IxytZ#VY)+`
z1^;Tzvo4Fp?H3u`Yx^(QK^`DkHV|<P|1-t!lwoI6(^MYls{H%E39e5n1cqY_m{h8-
z)cFO#$koR)tkO?~7n4R63rw!d-cyoUni)oGv%VTn^nyr{Dl8<7|3tE+U;3|wvBfX>
zCgl^Pj&`xElZ*xkUX}<<Cp+b$xQ)2UZn?px@R3xS5Ohbw%{Sxr8#C!;jMaCTia;_a
zA1;RaOx`7C78g>vyBd~S5nTx>5g1LCf2wL5taqsT`Q}<yk0c8rqAE<ID7RNEu%7nq
z+i}T@q|CsP>n8g78Di=Cy?KYkrzHVjbOF8d&%QD<o*b3>Z;#co-JAS!$t=>?W?<k7
z_}g@40Zr`n1q2we5kPWti{rUBr8MB_%>I?|i4{mziq%5+KW<F9`qvK+{~AP~`2f5>
z3M21KK|lfnjvgQLT?Cu_-eI2fjzwcY_eUV-Xb;<WRh0<}^62(Zd}kgavv4u11tv0I
z#FAyi08u{&CEX57p<~RRW4sKq7s;Rne;c`llJ&Fo>+Yz|7~W8vl7BLe{4?i~CaV|I
zbUcF9*u{r3Fih4U5KiTR$Np5iC-(mZooatHcE*i$eXGV*ZdJO&rYu1COntT4pv3mq
zGbiz^oZ*-JZ&T%OwUDZFaFZcVjEY<7b2jxVj-)$7E6!lwGg*;?MU%tz9A38CybY6k
zAr`ZWOG_s%j&NEX{*vas8-lD#_KZS<Qgz}b^JSw}rl9Z{5n-<^GHS3nTx%RSyh&er
z6FX+~hQrkdM!E=BfibibE+qd>yn<Dzjm~i;v&8FGTuS8(GH2rJz|=+_ZG<wMH5^n;
z0bIm4Lqmvb3y3?RKjy{>j0RFNJB|0f-webOj$!b|GX1>lV$%0M+FA~{Lrw#h=o>Af
zh2k;4qyRZ=-27ST#R{WTpgy6VFn%;R@J_`Obb%>5lq-imYmDKxbi~DIcE-s?pE9VF
z6okoxrr!Fvy8zU~*+wr`(#z}<fqwKqkQ+O}RjAIW{4?jxOC`6(H2Y5p+8sd>N2TOS
z)=okT&1;>ixVxk@O(RW>(pqd$^|FqBIayn4+!M8P*kQIQmmeBej=`>yf(qC|B?tG{
z*oExG()sVT9&MDdnHne$onw4_-Tv$9xttQ-R!Gej<rCBpd<OEgyivvFL3@Wu7qd7Y
zYlrOPi}FvuzYWO#;2uV-c>7LVcfEE0bnr?uW<wSG9+pyTvmoyti~C}Ey(cb$gSL<&
z3jX(nG}d+PVz=XV+_{M7;kf(pp4cq-6%3!#Ak|C5FflcK=whrD)p0BD6qI@iOrA=e
z^rF_D^^JfarTVDoyAdz}t@HpX<7`Y2L=ribg*tpF1o~5eA))6mLUH1-uh>27;!ifp
zFE?-*f-Ne=J{*yqb2zqJ@Z4Tbfz5rGdvbeme4a}PVW}?+(}TW%(3B>ZK`moPkIEHr
zxj&`Q{tFWOh^NkhE35g5DqY2|JG$zga&U-QTbWLg^*0OEOTs$4_iwDH97~S~(a~#I
zVQncwN!bPQKaQ5|v8{5I25Wa56S1-=7V!ssNmjGU8g0mNA_sS(l2-fiPOz<v*Qfd2
zv2$KO#x#nhNPbLW$`cv&-lK167Pc49TG>M3HXYo+%`0!x@Vj#D<@YFwa<`V~mrr*_
zAB8BEm<f0%QoivfP!|uh<?qJ*<2hq}y#D1~v{A2HHCxl(m;mq6Ly2Z~0n(l8^rKl5
zx45mxc<Wnj3YB#A%C#0@!23IFb^U=tA3#VR_y@f=1BoG(x$geLs3D0=-D-R%XNE`L
z+ny@=geGPic&UdCSSd8#{QD}a!9DoyZ_(+SK*QfY=*h&_El{M+q<{DoT66pS9Q?Z@
z;L=WvT!TUxt)d{k--_k^ERFeY9mzH8sq(M-ftFfQK7wZIAjsqASYhTzRR(olJ8Eyn
z9)N&ruoO7x#zhP#Ng9M|SpX7YRim+oxk7KV-p0azRUAL$XYY5lB#iu0Z_#ziw`U&K
zMK)2nWG6YsJC?b^xWmUo_(HxL{+u^zdDp^x8~gLS9P-UPsi?k_6@k{%+w&DPFHdsv
zi{WWJD9L2EFj8)W>!}1tKVEzWvKh~B9D90{*1f;zFh)Zi<6|i7&i#jFmOac!XyN+c
z#)xTQKlh#}7<}Sh9?$c>DOR=)OUd%<?<}2$KgVf|HQ%!M`E0EoujvtNuMJnFmu2*k
z#jZIgh}|JbjH?bV)^8smykh6Z*i~g6XBS+fN}%UN(2gpP()(fF|3}hQutnK*>lwPF
zyOHj0Bt$?$Lb_W@x_fA8lm_YUZe$2)q)Qs<?v6Rn`<=hAueH}+>#m);Nr66CQXiad
zRPf*FVuie}Cq2V*VAs&r%fJ^$7-!(%GDxQd=gj)nC&kVP!_wc{fS6z6qX=Sd=Rb;w
z|9B<d-0kMxoK1SGyFg6PBEatvE#g5?-+t`>%5EO)eKs3rpE=Fzr0DkyYk$WZCV7)}
zA88jhmlDRWfh0MDfm{A42G54k4h*2`1q0WGPiNXApyG4}`5ATJA#TUM$zCtpXxGuV
z`iBiMZW&YU49Yx5adEnDQOu`Mu-Be9(3$Zgr6jESBt8D(Pa~+(iKsjdTI807d1Y0j
z{7u|pHEk}(K5S<Uv#&23b!9z&x|pe|-Qv#s)+z>APOU}^5G>cEK298Z92rkfNOuXW
zdcd(ts#J9!Ox5>_cF*P~d&e@4Wbxc~mObP&8+VUN+&8nr)_Ii!#W<cNbbAiZv^G->
z-yD206%aP!B<!R>!;4Vr#2fC!&3YAmfhu0YuqIy8et}-&+)0fVEoxg_Kfgx)%6*{B
zK{y9cWg5qj*YiFsX$K>=m*a+sA6^h&C?7>l3?=-=AJ9%eLE$fUnpVV)POilr<7N;S
zr9=RE>8Ic?oq>ld1f!E>b;ybzcImvLk{itMe@GK^r!xw_Yd>p4;L9oFu}1Rw-po>L
z#@?`9UG_!K=1kjpy+|x8*(2%KN6ml(<<C5BWG?>oht`k##2GqWYO#^&WM$*#OQnPL
z&9A%8G6;?or@#pDn_pte6f`WM!4!egXhKgXV*Dsqs*`7<eGtOIxrYcGX!G{<;>O;m
z*O1s-TCcpeYxr_;Wz;&&%rP;|%P2+rhd-Lj)_M#|KMD@v%RG@kNj$IQE5@EF*s&EK
z$(Cx?<;NAZIrlzEPsBU=yc)lp9{_cSY+b<$JA-Q$lD)ol5F>%yy9z!R!&kacYkZHs
zW?ejh%9g-BcU0}248zW{#9)1AUD5hc_g=2zsFGT*{B+pVoIgfzf-sCE4XtMPg^1gc
zyU8G(we>*49xulO&Y8QZXX<yRV2hDcSj@?Zy~lUcnjmbxkco+vCBL&MR+16QO2Vs;
z>t*y1!dvqZyWOTS!_-Y~%N`}7$i4Zo-zf{!4gj3BH5T8a&(7Y*mNL{+cz0tbplnC%
z<2|cTq|>1^RvoMET|{xy)esj)LOZO%U^MdHnyCBCl;AsJ1E#q4Bcr~dCT0`q&f}VG
z*WT5P<+8IToFkCC+A->tI;fkVW&Us3pozPN<zXHA=SeZ@b{}ro^&|@8k7o)0#J105
z^DkkEV*O^r=C!h}Wf#R0!U$2=+3Wq({T;T*8pQ@fce*j=`8Dhv3#?$RF&FU|Q?#}R
zi|JBvO`PBWEoM)>1f~qzd=jOD<@{YilLL<sg2Z>e8th=jpRN5uPvtNivCI<l=E0|~
zo1P{FebyJ;F1`NW@I}t^F$>gv{~z{=h}>*4IuFoIiy%mg<N7W&@sO{nEG=X^56E?y
zv<BsVw>B}at24wivd}-k&D|kU=YBh^-9*UX^IhT&*Tnx|K+g)dfEg>Zr_n%Hby4NS
zL8F5n%;afMd6f0_W>#pPITL#z%6XpEK5b}^<o+>3ROHLIsprVbOx{nNW-)=d>(8LE
zg}*}G@kG5yUld?*t174o>J@Jh*1DHm=;ti;gGA6zR41rBVk#jzODEd7+bxSBqdWg@
z2T|{`ml1XrY<2{DMN&K!m;MoiH*vkN^$H%|;kuC~PBqRqUpegsE>SQ3@{p(mESkRG
z7~v~rX+b&3UEc!QSwrau!6q>h+e~Edr{g5YG7i*A0)d60y->syJ%SOIV~~AMW9*{D
zI~9B13X~)yG2RE;sLL<?hc;jRGAx!%FkH<TcQrEkH7NfUiy|Vr3vnZ%N5N0zl@nTp
z3p%f;=|UTkFpf{*B!2_9*|vm-7qu}L*d<ga=FPsA{~VFglPjivvv;uJ^sR`&JG3{U
zm_4{0;vxm2|7^=U<+dsUe;}43ysKsQ=R6bK<APHqkoy#_dPibU@z4_7bnta~`{9)V
z8W&f9Io*@qKn)*7Sk@ZAF(MD{r*JL4URQn9xAew}{LIm_ZHxENF>9@#tU5>2Yi&+m
zTOF_TsO7Di-BQd7r;S<3Y=w3s)NgtVt;#k->Grm03XIU|XbdhKlxithRc3#tH7TeJ
znSi<ZAaVv?s2Dy34%T%-oDTO8L@TnK_%<XfoZz~<VX<esr?n%4@Ul};A}@kd2QX@(
z(q|}S=N2$&rMi<2|9-^e3>c9Ik)x<yI(0U3LZ5%!Fuc-^Xr;&X$pRxPP3==u<ty*+
zDw$hYd`iuE*<GI#sW@k1l_iJ<E`>!6-TR`vKQUCJ*xvmqufW}=G=pP)_r_2a?$u7(
z4blE(Ool}UJmNXDK3lk#DS1@>wQb{3>1gm%s$~t95G4hL9A?r(P#){|U!Z6+Q4EOg
zMvu8POd~nAn(ZB9BZBLa+*Q=HNtkkGP$!oHp4jM+ZM&m7w2yQxLbE6Ku=w#5T@?+>
zM~bXCaQWR@WHW9pCEo>RmGOh<{j<uR5z-FusJtKSz!E3f*jCY|!2}H=nCt{L=^=(d
zgl#biRC<(QCvYM!M=tYQ=i9J9I}DEI#f3AW_8=}ba84Y<!Z18_$~PfEC=8wgmUo&$
ze^$P(A@qRshxsPEh5t$kcVu_M1mb<Z0Y*l}km3<yqShbvLhp8|%QFMr-{rIgL^GhU
zh@WO_`*xUxY_-TNH_fI<+T~RH(_P{7X|?pa{?RD=S0h687scq%%=Y~h93Mg0`%f6|
zA=@LLVniV5pOY!|WK5Y^uR}D*VxC+97?j3WdnPrbqi8Tlme9uDq7(DU*k=4%LM{BS
zTm`!O0s?(C+@$#Sug{DJLrVv*g+%Wy_ilVGOCIYzT$UKG1Lpmc_I9zKzTKAF){7a#
z8V8YP?YY(ssWML9Ej~}MzIH&LK-R91xB_@_5{OZ?$(=<18Cj>h?d-kw^V6lu@Nx+g
zqKKuqh^i1fL~8bc%FQ(+J`VZiXwOj{pux;C%|YLgL1ZAEh2gRpIHsd_ez=eos^q}W
zHidb}{>~!gz+*upU9%6%47_q&><{y@%z+todIgP)gb{HXVGu1ZFF$yi6o6S_<if6z
zqpVdTPMmtwS=Bd-XmRQ5w3Upnbb@<F&ygpM<^3B!zBAov^0cXFW*=PA+oS`2)kZ|V
zs(JMnLqXT#{gQF6I>LQL&`xnB<9J-$11*JbM%Q;W1KyBOFV6LauV<}O5~pg&J0Db6
zg6=|6aEexHeqsAHB-QmOu{L`8&pAeF9<*lzYtXN|AE-7}=THu^>dcv1ci!fajtw*O
zQ&v?d)IYtv-o9Ep6DGX4K7O`(Gy=ovpPv|{y7qH>*qaNyMMB8|yps=1h0xml52n%G
zMhXT}B`~dK;i<rvd*FG^?{hL}aKktGD8}KF|66>k00e#5B@;(VAd>e+mmC_^2NB~c
zWs#O3yOCq8f}Fm6RAcEuuH2vnIj*$gI5=TI0G2nMNLZ%%gE%8E1^64RBIm`i{_+g9
zjd_Jcyqc}sZdzWiJna%8i$>=_?4nA*@oINm>j(dhl4OUo=EO%8Jxi69fRRrmC0&*F
z5tMzEZjBh!AO~F9L`N0+`~q`^1nc<+QR~N8Sm{3WKs0IYHhmf<($iqW$FHfH6;Fz~
z@K@5PzZC4i(d!}HOrgLfn+gzNidSluvx_x9LY}<#_d1!Si1k~(G9HVz?y|p6)1Z4^
z2pNNQEKpv5gz<L&HI9pVIY_M~GlM%GI05nfP6G|6Wq1$kO;yYU-Z%|hLpibv{bw#=
zuzyRiM>zmTLr?q#G<|sRr93DYz|%RszPmkt5JpQNQZsi(d55yGSwIHD#9@JBaKY}b
z>3T%_8`WO{`}+6s@u<cWfIEQ&n>-FmZ2aoPz#!Lek^k`t)~6qEaqRdtn!cVvwI82U
zOa5qNhYZ!%?LAR@PGbW66HR57H_LJV&~xEbfL6<3qj`?u)WfpnhK26Zr9Lx75$LAV
z5b4)vb5W5vj`D^mZeF}mnQxU=5aS@Ou#v&3BG_$nsRx<<*a}MFVl^|m<4pgyfIOk+
z6ojm1_%hK%jD)RuE&<h)d|_MvF8lsNm9S%ux-Nko9OI$26^FcE{cOuZ7EE|r0wBI#
zss<;WuX`Bbf4*oJ;F!1~b`*div7k<56Ml>yUh1y*!+jwpClAK}ADd4y4TV~b7)A(E
z!vN}V+V^;ljhN`eC)UN6z}ZMI(<B`^h2=tcorp(ZGQRzr8>bD31IRhXMkM5Fw<nx$
zBmic3O74LKdYw>e;p;29PQ`Bd`xIs+xF$+uscg$LV@uyT?8L4vpcG;tTud~%3s$#C
zOAD6>bi?)iL;G2x9Rn}8mT3M@TGN0PRVn*&V?ggkT85+u>(M?+g_dMKAA0b1M)j;A
zrWb*Od1BL_l-Dq1<%_L>KP(^dl`V7yIcBZ<vYpK1e8=D)Tji2Vl#%WGFc*&#@9I`(
zhtC>|*>WOC?F;yKl;)0}s!yTg&ni|Pn>#r$S_}y<FvXP=Ks;2wAj5;m;E<ML_2CCp
zQlR-yFs>>DkH?&MFHk#Rby({lkAvIpSAN~<S2ns%+H)Z#wX&p$G%p&Cy~yxF2hqpJ
zzc|_-EMftWcmfS4Q%ChVm0bNcHN+2rdJA9<JJ-Wjjp3g*KoAT}&`=I8`&Dmxv?UDI
zMeNkTojb~<B#qv{UVI~RDo{3-(qFI8l2c!<pOTlXSbnwn!MkSU`Gt3d))3vR`<a4*
zH)KsZ{(%!0dH5;}&t$+?`n$nwfw?41J=hQZ=<u@FbhP`t{VEpkuuQkck*>*6#Xr^8
z?h#s3rJ=sljBrsVJl2_Qaizk{J9{twKvu<U2iJ#{-8X8yh9vR}o#Lu*mmZQRp=bnR
zA(Yg3EfDMmjpPimDrLPcr276oSl^)d4pZS&ftDoJRrf__A23O3$bsKqf>73X@Im5g
zRlmVLt3CVHc{y<B;TQcx#>84ZoWeB$C&$O3u3QfymEkhmcpzoRY#Ss{WtztLU$5}T
zpn&XC?=MB-1^TRw>3T_%qeMT%ZktViXlSn=Be(fzJa)o7S!yQW6GH4p8I_u_NGC{l
zG#7+PswSdOL|i*WMB;GrO+=iMHm)gtNK5j-hmsXqQ}vF_!x0rEbcBD|{Qx_9Y>l(3
zv}0{UubfGPM=j@;g2*3aehz@EJHwXj`1RCkwPIWndK5mX?BkHg*M6Y>;GD0txFvq9
zRy|<U`>yA8PfmOMu!XX3ya{=d+wte)@Q`p0X6@5L$AuyBn<6#7CGz$7xz^UkP^NYM
z+fRU2k^Kc)aS4MH90xvz^nCcVB?OA6d>0HLNg+s%U=u8m0G!{$PHLVkKn|#|q*S_d
znhdh$1CTYf7i{D|Zd4KW{BULR*9^W}aNQ5D(&#IA%R0?R1%oQRu2~i5>qEWFIl&AA
z>+^0*Y-8>>Av~w(AP}exHB{1Z%O0&M{jPE8F#U%z`lN|?`KLdZ(<i7%%j6QZ{r?ae
z*kAh2`?<Oz1+2Q1<K)|2fL?@%`|AfT{$;`MN*BprY(oQEa72%#`RP*<XpVL$Mv1a9
z<%Z?p43$K@>0Cq1_OAk{P4&dv-XH@SJpUnk7eq|??@WKba(!IcdYXM_E2e&CM#9ME
z%2avK;!DgqBPp=g%P$x19AKc1HG8%<-t<%e-avID$2uPd#h;yZpRV3hR)*gzf8`Ji
z$#dJFt}uxw{ZnII0xOV$Jw{C4fXJC2VAlgSMq_lh4g(h7&9dY=b4bXE>}O-&%NLB}
z+y2`)cvgY~FQs^pBRvvAq>;|FznilXKjXS;DEcF8+}B5hz@4r`C$zZ48MyZLUSPE6
zvjfAd-1kjy#e;U4x4lCDk<i)Hy>#}A(X%UmEE8U;G+*?o!8M~T;fT|q1Z?b%WJf=~
z;j=%e-IVdy1=*^PJ2H}w$mE+@BZSUa?%34gw$8aLm@x{mnCA|VS(}+1?5EMtp}D<`
z>%TWQ75Gu$@|9v&38ql{*<B#E{8Y4Etk=~2!{OUPHO)AIa_U~a5RPJR#`w&t=GaJa
z4GFTcMug<Lh?Sp`;%6ULKO4V(fGOqdCOoE+mlFxybS-sBGNV0XSY=9=|G8}$8W|7=
z1P{a;J2}tl+OP36<FB~ATJeU$xY%0E93bE1>JbkJzE0DO1UdI3V_L{Fw9dfcDeRBi
zB}pw2A7}Wb8ss@kMc&*$vgQHB9kY0Rp;F!b4%R0Jba);iv!Y{Isw<mtIekad5Dr9C
zRMd23FjayWPL@_mi0bNKri6NmN02a6`|SiyR18%iTU7WRgQ*nn5mV{s@zfAE;(?6c
z%AdxB6Jkzv%^>4dN+rV_j~k)HZ#$+V5*X;mCiq=^sL!8#<<rf~1C|-j$FNE9D1ys~
zq`1G%5&m%~bnFFDsIcc`@RpqEaSf7hEAjKccuYR%4{`?)LHJhR$k$`^z$EX<mk5%D
zX95W~H1s=bfjrBlV-<zCV}ZCtM>+BOsE^)MlA8@}p{S+YWi0l^IkpY#axwIUPv!{9
z^{!E=ab)gFJ>w+>b&<2%QO<Ec=0x5ZDXh6Sm2;NG{49ZED8iWgnaWe_?RWAFZ9?Y^
zbnifZ5-sk=+k%^cGU(VJTl7o(M$FYz)sMKq!^NWLE!kzvdUeh4Onx<`yo9Bhizbr=
zgIr93>;JxOQ0{fL%U^VFr+=DE&;de@IFvm*c_BOb1{8tfZ$SWU%<;RFzjt|gym5kq
zkb~3UK<`E1u(wgVFE~>*_m@z!Z626$p&woGBjdT+90~e`c^@r$lIr<p5?NP~HKxtj
z9EWa5X@t3L@#C^;hvqeSEDuCJB+MtORqKcPX;^iS&?Vn%Q98cUE*q{OCK=*7d3(OR
z&XJlY-h?O+2ZM7<Ao-;r?QO<#v%{l0fu2MVcg}hGPR-qI3^gOX(^7(U`hBI1{LthR
zcV3_)IjYR?0cw<0MxFXZ;c1E!=L`1h&t*(Y%4kL|{6uL76)k1dYccLQWSfrcOlqt)
z2f5*pw*cp7hVesKIX1$ZN`wA$g{IHYSmdT*&krNAv}>;~Jt9zbjZWLhku#f!!CQ*y
zm5O!I%D{=Thd@B?&Kc;;NDLN<;{-I0cdrEf>R>HyE)y<<%54n@_;&&eQ^g8EX|udg
z(LV&5x7B6&5GxC_mNh=S{Rce2n~5rL0H1>K1Dv;&@ZWi3FkrS=(Qh2pKruOmS%J@`
z-A`BAYN^5v&9cS)qnLFD(i~`!z#7|cKco=seLxQj3N{^=E&Vhv{%1uh5m&c2#Vwtt
z@KYqgjT+A4Xra{bzpsdz6K+Ls!kKr6-}3OAjtSe3I5K3?DYYP&se4@N^Y(h1PqScT
z9b3H5FzwwAElG}8c^wiC(_d=OfM1^CHl`crdJwyc{J^<+^d2E6eFj=9$`)L=ne{PT
z!m4NK=2NOv7iU@aR|6Sq;9G-@CG51Fha|gV<-{ZToju?2ZXBpIeXVVfKzycwIX>U7
zbA7ZwVG%B)LL;*}yNo$aO35e+dqY=-l-h1vh{`q)+?n@eCX?+R#Pnm06eG{j$8$BE
zhqLdmqQs-<ppt;DV8f*#8e%w)OH^>k*R1@~ucRW!*MW2gu+m`|dQ7|0(i(ZU(FCxA
z(e(636a6P>*|fd}AbbEH_+3)MErd;-gOISaXH^#m4=}NqB<~TPr{f6Oa9QW?-}zlC
zjt`%%{UYmXK!!j<EY1mm=a9{*jy;AX`7Qh1WeM)%hs80l>?BVU9iy~;<Sd_M*a(}S
znd!U{brt?IA0~=r@KD8La+m-Q&!4Z?{%;Ih+)BCF`RpgPtok~q=xRpd6AiPkMs(o$
zDn~7YRr^vITXkw;sFLa!^tF9dRSbX`D(su&tr4Bj2UeRQ+X*gJ`o*!>*CLMTfnZc`
zffo5tPaK8s8Se%8wbJ~^^f+qth86~NY)YO}_b5m@W{ux*%Ih8xps)mc6OV>HtZKS`
z5^Mw%B2hfrBR>my)AN!q)Xy*v9(bTwyU0w?@6QLG+K-t_AO+@H%?js_md(4c|3u|k
zc361@(p$4C*vkIoEuiCRj`P#N3nc7Ea-L+j$5bV`*w^^{XK43MGrstt(EYZ2j5d&q
zMJtMedd&XEWL3Vi)Lab(K&=AMx@l-|kcua|k+ydzj4jBGZMTU*&siZxD{Zb3SJb|s
zS{z9H0OaimH;yCvCf0CRG>NHOziG8(vPt>fXA(u`To*+aB~?5-8Vejs>0qVp^hgtZ
z=7`xxgLbn{*#!5~>LoLqt^!x6%iWl38es~!*38L_c`l~1RiP}&;^_yYU@qIXFGHFw
z*H*Z~Q7Lg?-}%&(vzVlAtE79pIp`~sRN@R9A-c{#p7ZxODZ3wG?QeoS@iGDqeB6Gu
zv)6wv9If1i&r*rcQGiDwa2zpD9)yo>%vq9W8j-a1qz#OQhIpfE!0=((DqGKm7#uzZ
zA0WvWLr}c^<USqQY>B*1^mZy9hD-fJ?r-Myg9-!dFr&72bUV^&ZmKPYe@3wZ1d&e1
z&=*ueB_0+RN2u&SHV7Zb3KQi13fB3$aer#ENLpKGqLveEDvAH0;HSr9V99|*fJDY6
z>>ehdyYP~`SBnf<-;<fzaTzWt2-Euw0wf;fC<!g<4anVAIsOb<Akj#VP5}V0*aU~=
z3c2i*<OGcP>In5p<XAF;K{kHF(is3g`@WIrH#H<u(%Rsq1bTlS*q#-pJ_)y$kY-pu
z$)&^sXZLn@>^PK0wpvH`kf&ibYs#4XVoxB*^7u0*{lHfQH3|bn1Ah(l$<M}LI11C=
zh!mHF!dBLQQn8u4xckxwd(jrcejlcfDrTeUfu+hMlNl4Wl^LUy74MPL(+<~j3(Tt%
z=A!Op{295CKD6uAt_S9Odf2?fyv})3Qtm6FqJtw{@q0AE@)bR7kZ6-5U^uwqiK0ID
z=R>PKM2pxW22@##suOBVZ<FHup>)YG^vgTs#W=-fx18%UTNi=Lor~!Uyjib3@v^!k
z7xOIZCA$IC#)KLK;~S3Rm<L0G0_{_*<cH4-I3r7XJ>#6GZaYwOm7lNvXcsgd;S^qN
zPdq;{y3y#6QD^;_X7Jn|1NRQ7;<}z;a;Y#jU+}(;l^_1bFW);zi;QuXpD@vo^9+Kq
z-vwei_klAA^9=xbd{iB=_0oMPqKE}iTj?s)E2eUUefWCwgD5=~;qqtv=>4A)Dtn@V
zTw<+{o1s>X!R9>wEWgkqF!$QqesxH&;z_^o4kOP)Nsf#}O^hC+uqTP!#FuK)31A)A
z&qdT>Sz&y~cEapOq?iLnOVm91gQ4P*o2gpZIwsb`LI~079yfOw9ivw(Ti5>tzM(_l
z8J#^45&43~&M0dXtP6>m6(rV+R;a1XBAJmOgbziD>J@brP1JWhMNUvV-w6=l+SVe-
z)4p0?A-chs0}&a0ZBLD^f6tcY^Qj}2>j*)X-Xc{PF(!yX;8RttP0#F07epZqS!1hU
zw~24<hg!V!gZ)sF<ABth<%kCAB&M7DI&o6W+wX_VYp8?IQ>C-+ZpUFxJl!<|cQ$;a
zYO2q|l#(Je)IafS;b^jZt3-fp<ywO10p_^|BU(NP<yF|Zk_XXXQ6Ua(6o6_B+JXU!
zHXhDm;=m0Tba-*%a=8g;IWbWcm_CT3C2-yoO9n(f=tO4<I!@oaG!#WQB1T72x4~RP
z$Ic%%v0nR=FZ1#4lh2i|qfo=6@(Q(A>bBvXe$O=9+ImITI7EEG&5#;!D<9L*lt<(H
zL-*qOd8c5+Zb|p?m=SH}i;Z!JympGJCMFGusS$C0%}hL5D{6fVg{J>JaKgKopPxV0
zh=uv;@%{0mM<O7#-o36t*fJw$I+^eF;7fu0`@R#cTf{v<6W>DkzCr9SkJfgMbk%lb
zVPx|}eFnpgt}}_fF{GO`WEiDVJkb6aa;Qh<GvfJO=jLb9p|<(fI=VV?d2{Q!{+Vqe
z^>>*->#;zE{rQEq(`$z}5<W<xmCue<z}<oZc9!vH7ma&C54obiAvuH*DyZu}t?UQM
z5}0S)>mI=HNIU@lvTp<mVRql%3jCA^6xS{aDd1g1hx{s)iY41#62*l7uZ$(F5VzSu
z$KY<x3n0+z;Bc;@6A`osZ-P*_j4pEr`?~p$8WOXrd5-Cn@~+SF(%(?fW^>|#Vl-_h
z&uky<@8cnYOkbsjo`j7~g@cXjms9c993?sw%_qa&do{#{-rhuY;kGxb==;SJW`Ac1
zK*Wlrt*;rK7R%dD`2;vN6<P?-+Y6sjfRjZ(f@{-3iH%Iyn=ya&9KVjwpH(42af9zS
z;R64uk@?0ai{KQTLwG7_KYUJ^>RmZ?Ud8rIV$(7kQf*(w*Zyh1Yx9!0K-m^uDY=G)
z8T#wGZPsyDum)S$w1clOJEwsf(?sj{WyAF^eoqC%4)Wu(yVI~=FbayAs2xPWa8;88
z17gDo*4kE`L?kiLh%r75H+9ayI;3H)iLe%xtmWN}-Oizh>jQQ9h)zUEkMq<$9f+j&
zKP1m#rIkhs)W-~ZhO}r)59a+>tXL2Bsi}NzHRs<6H^qHHp`lyRoDQ7DkZlsD-R-}3
zm!C{26-@Xi8o6|v3crMgLkQq2iK&uY-oXEegmdk6=do6dt4jNLx+QLrH9w*^{!`rP
z4<QZv5LtAMRp|DTqN)}Qlo69@Avt$e1mCdjHlmmDH2qg^kUNCIlhc4{)o6;M2stcV
zQQkElHW|rq#^%ntse(Gr!w8`y1({<rBT6+zsiThTJenLimMpM(Ep<<^EF?uZ7)Vaw
zHCL94Iig6-4amNmb6gJ2G*|HUpszd_2A{S#bSkPEZJ~8YbV!hbD5c~ErJOGNF3y%%
z5D-@WuA&Y=2>PqQNn&83Y6XNHUnD@avF3F#aIkK|a0zQuKQrEB2X*%Ao{9%Nr;TTc
zTFFYq!6m8B2%%7O7LSM)#las7AzZ5ihqo!CJg@k8Qxh2bd|CeTLUMr8?UVx#I(&|)
zimf_v!Ryhxo;9M0g57IoF@bDNmk{4i@%!kxtY;o%@qgB~I_&<{8dA|PN2%>ur(^c#
zdbln3fv1lBoA!zIEPG5Rsk9+-TmAe^$;fiL-k|e)HNB?jJOq&+R$;s5<+X2<lUE+5
zpg4_^b0s6wody$cWOOv=^6jDx;Fa!ZJ%eT!>G5JvUP=XRCHtY@x(Rm&KeY%idrT7R
ze+@M2ySn-P({!MpKaW|cy#3NhTTt6}Q-bMTZL!klgO#%{NJ<$cJ06&~cgUVW<0q1G
zgQgqTDsn}K!=?8yLopna{t{KrX;BGmHlhoQWT(kBGw>4E(gEyb8hR%Eb=+VZhDhZ|
zSq&VRFSz#Z3J5pr%SOEg>2Rdt(!H}F)rLh9sX?DFM`>C=U7<1m(-;*s`rjp=oL5wG
zWbUYO!tS2R2>W;2`($vB0GYEzygOUBV%7Nu<`N&orwfVyEkwkj*ZU<nKsFn#WZHY@
zFHY4Ljw&CS?%R_L{Bj#C(i;n<$Lz|7Ph2O9PPAMqMSl{B@WaJzz8A|r&C+r6M<$6C
zF6l5N6;Ji<o$L$API6?|k95Dc24>WC>Pn7TPcg#lnj^cW7F?&SAkfIPvLV)-`@wo1
zD&xJ@`VmI%liH0wGk%k*U6@k=ic(At&Z4`w`Z*E-qjmHT_*cP+tFO{XhE`))*t*=m
za{3NdyVjvwu_i}Y*pw2)>opNuqs<>smS+b?V|Qd1(P|CoCKxiD<gMHuk>^l^zCJn*
zPyP#D|N4rP2vcDbc#kb909VVwcPLgW8y@l!b_6J}Fgi6e-hqSymRcVrVk%zSvW7c+
z;;CB9p@4))`ZwTDr8Ke)F#UUyV{Cf?T3lSD7DsbxblV}i2WDRvQprUa?D%hk3Bsbm
zPYg=X5}eI9Vv`@-AC#~yzThgS{lw;}_)OcvbD+nnv9n2eM;dI+te>XW5tmZbswrQG
zQnKuWJ4~o$t*pw`zv%L%9PxaX!Cs!j|HQANfR;KzKruhMkSR4+E=JN3olQW>sE8C%
z3#Tg%{zZj^;!+{Kv5xuqA-oBiagp2Zpzq9M3wg$9v`WpeS8>G_UR%pJV;KGX!n6e*
zxt}U`eal%!AgvplbeHeYe{GN+`Y^lb!YKiN)$pzRlY|$J_UlssEKU%#f*|z73K@h2
zAXc1d^^}ebR)m{t5*-tOOzC~{w%LD?8lxRv!x;7h(OiGP1XI;YOkcj#>Fnb>ZT2D>
zy$s?pDq~b)UHH%ER2xH;ZOHfsK?xqaULXo&xp=VCw+*61V~si;8?nGwzn+w;iyf?N
zHkX`tAOu?HP%Fic5usm<{-s&_PzM*+iHI<md*xXLyOc#PUMi<TiLv!3%Khl<KP-C~
z%Q;Z;Lt7AL%f$2|zv*fe@SiCTXUreyaA~|m5^VqA?UOT>&`8WUyDBXDRj@@&T`fdU
zs>)Y@C`4-Z!7@_E|LZ<g9y{?{osjpt(Op$`#Jp|w*7>if;Ff1u1YcAbyLgSa_fJnc
z=9G4KqtM$<4Wq<svT~m8xQ8W8Y3qq9{3_)Q$`sE5T5XDTMz#F0v&tp53g3!#A2>kQ
zt*rejy2kWB2!X%bw#*FdWZmudfw~9}E>wQEN+%9jy(A1C7hHc7{&9J!+=gKr_wHws
z!%H>=t-`*yfDC^5%q5k=!-|duGUqNt{;NU+JRE?7Yz&SESvb)tiX%B`ydGTcj6Rg2
zbHkMq1B%R1Jc&k(H)E`hD{f+&-$x`tYr&Zk1c_W|%_=GaHD#_*B*AtSEf%Dhfhejk
zwx_V7#UIRHe$HsU7hk;IoKfb;Gw6L5Q%UeS?tV$(sv-sqXjuFH8DWHcsJ4i6Xm)RU
z*!;)CU(48AGv-*FPB_C#92(J6owK%A)!BY=%O)qFw|WY<rCauX=f>0CE;Uwtc(Fe7
z_;J>f*^%*g)t9=$R8MCdYN$~0WOwMYgKs&kO`3BS@C@>(c?XG$YXJ+ueSL8jgG-17
ziB@<iQBSc@VG+ZUzU}S+lz4<JtUUO7XygW{HkEqi0)>C(pq-4C_YLAn%3y24A)mSb
z^nKMelsu1+F0es|M=Y)Oh`ZPD-}3%>_BFU}2M#PsF|KS7sOiQ2M4X{7W|j8(rznxN
z1bCj;hThN0&ZwZP+z{`Mt-%_lthE>;a2=)d(Dl=L#uzb~_nnC?11Em~<Ihqu#*y!0
zM*~d-=CagqJGP{!+jDSJPs8%o{k|xBpJ2Y4XQ6+}#MFD!JvZ5mO&nXf%fk#97f<_V
z@@Q%WqoQP}D-H8TJ}nQP5#jAV?+Wp1J72sFV!J*psJk4Ys3cKj?~bo{5rwwmX~9HA
z))<rxz(O}y1jp4LvO9su`9$5=&G6^G;&kxlS|)itynR+Bfn`UU2hjGjoy(W=O<E=x
zuMiT%=y}_I@@KI7+t)3b&uc8~ZC>d7gP*sO1LrFqnL!cJ4j`cqh13L_dSW01LB}<W
zoKD1zwO_tYug~6o*ZVlF>Ttc8zuLquGLvuqp!*}pqRa)a&;?O<ru@)7!C69{f%s4F
zCso*VS2WJuRO!9=#0H$9US%@ve#ASugyucHyNKnhFW908)$>O2RhfyB(RiLg``C0S
zHRER)cMVncDvoq9!U=~}lPzDbw3ZcHsb?d3$HA43$}WT#fA<oJCsBuC%&*w(>@A?(
z5hDw(>B-kV#WOC^uM1DZYF=@|sXBpjIfMUy!vqhYj1a9zEx3e=W`Of?Or8bXVXPi0
z-Z~|)pLgyD(9ij0+yOS()MxX7p*t`{ht9j_2UpkB7KsLKli4tn*~oA1w6#@lC}#m(
zzom}qTn|VT6B*!aX=vGw;x~v=VnHt1J1ET#h@22$vv!e`TwGfVpF)vT$rjTEA_%6e
zDfEAexTCR1wOG=)-OYQW>nuFIo8%D#9j})kUa<VtBQzN-<<C%JCQCHVGvjP*IaMj5
z@sv`ZVC*`QTdn`=Y=9jaWgR%r5o<KC$(L57%H<~H`=|}KX7QqKd0KK(^JBb8E&i$E
z!wLuD$puCCfI4L0-h?j$KLls~y(`PIm?+aq$8IwGm2a6{`%uqo7vd6mZo$^m@A(Ni
z#N$^qEl=*RroCWkNy#qCfmHWAqr(&zAU(t={WbP;&7}rfGiM^{4JG$OV9=;*fkR)p
z8j8nw(dez!ZTN2Dl*4-09}(OO51VmeQVg$~z)AM9W^XrglvVf*>>dY}PL(iokkj?Z
z#m=9JYJ)ah@i{`sxgkYFj}#~|)*(K1dB)7VsMKufsfcL8tC+1K_UTjp*P_)5cJ&nH
z+Pln`w>K@-UsnqJz9lLq4v)GTG?<mV&g%`icg4k-hX|DnqOK}@i?$#h>a~paryzQ~
z>&>a5x*HPwewREwMR`+g1C86-m^q>U&3`xfm@G<vo_S{qy*xBJxO=oOP6BJM9rg_}
zFB<>Xhu{2s*}2ciHVR9I5feKJ`ha5=ju8_C;6c_!q#`e=NWEPLftT50J_LwQC?hcP
zZAXp$K0=Vuv6$<w(i&R#+9dO%L@70dU=bdoq6bJS{<oFl<oMr`#8$jM4?3_W9!wKL
zlt4-tG2eyY6m>ZSs&(Kp8BE9s^mfiaKvQemsHCRR|5U&EbYvh+ynthDT&A3ceQ&5o
zCDSB~S$)F3YfxEizbiJ}IQ+^#vvWK3%2*K>mri{OGp1Wzzl^1+{C2L{sUm|P76l$l
ze=P~Ew9r_*D<2PtT920mV&INe<4r08tklPi+_ey^1I3zR%gYr}4~=pwcWZ74l#3Hx
zJOZClfJPe{7w3uU6|90pt;6maas*?^!t5@GDZs(Qg`Lqc)|r$9YHwP!ySpkjVEDh|
z1@P@zo5hawc(BHU5_zMuGoPOe1|(kmk%_!$9NRGJ4XDywDm4x3f2hbtYIc?S0Y1b@
z<@PQR2-xqI1J%190mHs2ItY6Ih$86`%}fc}Mj-?<RC;BT%#ePm6Lecca;+v7;6I`p
z+<&n;L6UJ7&4|aoHTW})m*iSz%YEywYjkJl{FPggq$FiC*sXVbMkEf4NhT|Nj3o(M
z>P^Ie`$(g*>-IsQLRaKnk)v-*=542(t_L0CWDO7|!$7N35FLQT!QMn!&Ub{czZ7h+
zJ^_7EmEb*)zlk4t%2$nN|CGeMH7}BGs_|1`AbBolHDb;hw7>5~^q->PVFx5ij-QZ^
z1A#}tY#b45Z{5I3iT8LkR)iHE=6&%ssPiUgy0g-{g!ayqsL&ip(r}-_Qi(y8tf-%{
zCD)WG)^n}z15PMCbbzo3njqsxi!3zaSdbgwR(;ND0XboSnq5|Z`3iP+0|0j?OadFe
zqyPM!M<TZYZz`mzcSVn8dMp^%+4ensvIH-~;78X@JvB!6njWq<Gj#e|TKAfw80wlk
z`5BsrvX-)XydszK6`Quon568xg6Tgo@IuQxO1VpKP(-NvPWOdIzVS^Yw*Qohjov5N
zkmvTnz|V8)x{jC85}-KubI!*q|5;A?u9nF@s+h=kET5=rHRp`1w65q-%0a44Gv3kv
zcL8DUREp_tX*m<q6$;<<k(f<2pZj2)t<|02sEJRFz!GV4xkMZM|D&ItB$=t$&DH8$
za2QE&V7Sn3$XM!Dj+g11HvqnG?Ax@|OfnHlcr-r(<qkh;ZikCYd!F!pY)in0m(<30
z%XdSQ49NXg5>_zh?R=Bmtl)MW2|QaC9}5-UcvzW!L+*Y$gJNM(5vF!GB2r`bmZ_Im
zM@F+|Sl<NVpv;xWzE=+G^)mC(z!(*swY9T+P=B9?@{PqzP-8IGI{XteNl>zgKM?0&
zoBz?$`iq6a+dth3r0asq0wbNG@ixu7pUvOadmp_864BiIsqrQ@7&3dQ?4JNRYP`vC
zeCqRS{5<UAu*U62yPZYk;EZ=KqMLsTW)r712C(P8d@x@I1nh2Ci00&ZD-0v{oG+m+
zV@Y^=N4JM06)}n9jsaT+RQpq7J7(n}N;x-a@}y3^JZ<wsf)dSXFnY~z!pJL7bd=ky
zP1wXqsQK%CL%|jkL<LFchC56Lc<BI)0%Y7EMcug>DPghT@^Ex*JQe6Jantg709~Km
z(p;^+pQ!?t*c+b#Ao`Xl!2{eN;>xS@e<cw2w_whcO99Xt-1(aC8IB`3L4A%3P6QMA
z1s@^}>L{#zk}z%nLyT80!roM~R-RuGLhuo=|H7Lz={aM7e9kwdxVpvsA2n8B(xj^M
zANXP_LA-YayKp<5<Aom+Y25m3czBcTnESa7e}_uyzKsTPxX)Vdd3+~$@9tx$odPv{
z;Q%H;89vQ1$-8xKu4~#7eALKZv_iR=xa4hm)kF-{GWngH!)S*VzdkVzyp_%Z^OTQg
z0ZTI+!Ipo~R6j`lz2m3@hrR;1e(9iGlWsiOfrnyQVGe;QxuT#_&c6ssdH%IF@2Hvx
zkCeL6wt9?@1jZ5Nb7it5zHnFiA4L${%q?rozl`}$GJkAZCyvey;1pV<Ba=K_s&!~7
zuelDJPD}F9mheHVSoa4t{(bNsoa#Nui90)cG1T$z>-0b!0)vUhytHc#*x+H78F^Cp
zvoa-ta<DdbKMC||6dC6ED}=H~TowA|8NKG}Cl`(Fz&LRCP9zR>{CugzFjXM%aYinH
zT!xQTJ~s&R8;}08&8`u}j5!56eKffX{DlYsocv&?n?ucQe|A8TjW%(*+A&xH3N`22
z#s^`3`(AHGC*g9+A+wK<`T3qoL}~)v-DP2kda72B>jQmC-)Gbe7|(%Ss)43x&=<@M
zMBm-=<tX72htb>tFyoVqJXg^)sT==cGV9!W?^x1grH|o~d~(ItFU}vz<>vJa2iWGq
zqW<D)ya1vCiAkR~!RljmmD?CLH<4%Tlj!<a`LOn{O@~EZ?+&iLi%J_?&OO*3ER<Xi
zwqKK-rRS(dpdreP_C)58#-f<`#({XEw$~JAq`3gzE1IPMQ?<8rmf17YXqwz{`_92)
zT9=szw|}#fZlg`AWN8=R%WvmYDEm&&R?kKP2fz2HOHi&KRMGqfZ>WQ}-9UfQB$<VB
zcNPDOK<d{`<WZ@XE0is5tQx~njb-Z=+lGAu{_?HeA6}@0f9H3f5U-3CcuWrn;hBtP
ze(Gw$r2pc_=_1>6?R)wE*M(_L)ntrbCOCHhS@s4%DQ=4Tf>r>od?gU}a(4i1^VCux
zKw&fhdme<1vSu91zi1-}@evAtMsWOK(zsLjvyJ=`_Suk;<cmh(+*oNDN@y4jLAY|!
z<R!(8YpLy&mKpPM3KMN;IL#4H15Xer<!?Fn)Vx+d#9z;Keb^&~SztiFEzs0k=VpKa
zpksZlmvU>jlBV-rak%c);}B<~99c{UuFpc(k|v_V+B?}k_%2NSRvfj}KCU60h@n<C
z>PDO_m^!4bLCVia9dVSl4Ru33UC?liq6DkH+<q#PgR8Pa-fx5dWMuvgM+KkhEQ<SD
z)i5<X#~RP%&I;>&BMcCNV+fOg#$#k5&O63Qfv<FbIp_?SsxYWLALCt-w}nvW*>d;5
zs+~=RQngVnIdZ#iVX#L=*aLwL){$8Kz3}0Cpeq>A^*i?@d7(oJlXDNp;UTm6HiLG6
zvIrAr3q5L~+7NvZ6mZ^ytk6!L-p6%5O2MY>hd~YP{Q81>?(Y%%cX3ezjx$3gH$(n#
zFWXxd+^WOfK&|K@ksvmKq1Gj;YQu)Vi{?Q8hV+4SjCV9I<-z95{*~1(Mn+I_SB6g^
z^n)f@ELoy$A{BIOn>1@$>tHNb^T`rpM=;^DePBDQ`rw6ZE*{JKrMJ-zGOGAk5GjOl
z62RoPBUAf!#&{JeOTZW>C`U93B;N+T=;HU%DF~ua0a?c(mb`(gg(TzXyuK;caJw5_
z*@`9RseijD@W24)#orebpV^{hfLebBPwZb1z{#Ur(;EQGLw-6+L;cSoC_ri<=n;ZL
zJpf0oQzvEeu>_upvIkc8aH}&7RWu?Y-`4XcYwX0ke&*E%Mk{mLR#-!IU(m!SswAEX
zO3?EI;13F$5cNC_RiQ9uuX^>|e1QD7_y<cMN;-@pb~M)_cLS*o2Y}(~;~}vm4U#ym
ztEVR;dS`kr?~8sO7kauXdYNu(C0kg3yA6U)(*)xstKvqA>72jd2_oyyr)E2v&VFll
zTJ@KpX6*7K+*|ZnYxn~<ep+^Sv*d+^EcTpU6aB<su9t!P?e(N3`~Ceiex|5c<v)BA
z-ig+_b>D*EWIl4sTYZ3KOgxA^c5UW$sitx9v>u+cnre12Pb?H^`7}Z@mbIEXUNV#S
zcDk~o-5v^;>%;A}uITcCOvvbG^76ffGLLYV=KM>QWzM_;b7dVP;+?7W5+}9|+iz0f
zKJK7n9MIdG=Hc_!NePp5yH0McE8~Y|hc48dGKvt)RunF>hd{o!&Xv69tas$t?|g0t
zpMWSC2bS9;md^u`9AZ@10JkAo=sDf_?YO>ItKw*(CISSBVb#vJg8io;08E2F@P%<1
zI{FH`iM=k4cTS#FQLFW^=f;ZqnOMUQ)1?)u=hlY<7RLI}OT``i*22FCiBtUxC779`
zQBo<fQO2#t+^RnyF2ng@Xv}w&g?srDDB>S|5Pcpu`6i8G5bwJkgNWpi?-R0&>oJz<
zM}!~^Hi(O-(Y2$2O5W~{UhWbW7lVMeo}Z?Kv<_!w)Fk>Ou~lubajVM7^tFz64aOB&
z_qBoLu1pJ0a!$vC@<QIxt>qpMY46MVL{8s$%_}#cO%d7smPi#BNQI?#F;M~gy;fPP
z*N+DmT(9sw{5LOz+w(Cs;6Qvg+1CnxexXXlC6iI-5kQcg<4V*fxP9-0l}Q~Vly5={
zKo5_AWX0)nTi%R9fjVe@h7hti7YF|zi}7u{8ebW_A>pC^J(mE0rGj-=+TB^df;X->
z++Q24Ccdy(z<Hhhez@c7!|A<^QRU@k9|~uxic0u(GtN3$%o!f3RKOLN@%N}+@1#HS
zhfS~jTq-k0huJ@D`^t91qeHtn=z>3MS_?^sV}%N>eb#48Kwb&|cAae<x&FPX?i-v}
zaE}SL1+XUsj=q}}b3FfWgJ>&-%bFKU>bD$cW!RM+QCXKOFt9J4;ZX&yUN)<ZU*~OM
z?l+jcL$wX^HGFb^m#<@88SqT(0Xtu&JgzanU3_Hj9hlSneES#Agpm~P2N6V7e<Nbt
zJ+S(E{ofwOFBg@#MCD;00gu?z5c?VOZ5UM+Y#+@R#HF#qi;D$%t=D1u2CFvuMf$I#
z4wF8C9b{vdm3=O1g!e++Ee-Mbv?&AvS{xN@(1bMwq46WY3lT#8iz`$IO$QJwJ=nD2
z)2K!_*vquiCy;zeM&q8?iaqG6^tRTmux!xE?SF^3pcUx0#8Zba$dR9-uT`efP$b_&
zULdomg2o=y>5ZkP+gMVS^?29gGUGJA!uVSrv9Q2A?%P=hG5Bqh*`sn)0@1&Lv|A=0
zN<O(q#|wpttIWI51@jIWk$4C53S}kQcl^jrp)RyP-yZ*ERHys|BlPhrZ^P(`$*Y;U
zD;T4-H*KGjTg{#u#4^3nR{=nT){U_V9LiPWo<kfY@v#BOs@j4f$J^MLw-uXo^|<14
zj}fxszMQYi3THv(>nH(1b^(_#mRq8bn!reSY%$PvGsuW-M-pNj_~PC*ST{EyVc*$(
z-BA-*Co-S!3nItt)gK;#u54Vf{`Hqp2Dkb>xQHWh1qmU69Kjpr+_IxOC~!i0I9p~!
ze8i^xrKNv={2Xul@QFA7v8{#la0y%Ns-bt=f<gy<Hv?z+woPCMD>+4BwGET$pi@SZ
zm`;kFR)t$P-Y3i<zovL>30GN%p`!ZPlUy!|pdhc6L^fC%bz0l<Sn&N6cOI<SxtD}Q
zl?>@HfPXJB@}=O1Bh)}X9@?lFs?#R1+Vbd4FIKG|<Z8aif$nNZ&mqlX_lHu?^<DMt
z+3P<C0AYxu0Fd4WDCxn*wS&$JmK=OIJGjGCv^&8NMr+@14^DX3`9%tLo9Df|Ge7S~
z=YH^Hl-}&#yX{e10N9%I_8+eH+I|4nDwr+!ve_C)z67*NTK6QCijfZeIS(zTBe0+;
zKum^=3;cnG2wC)(F$1NAZlObg&@C}g8p^gn$KX)hd;1;r2Lt((kI|}~f$dzZ3bFpR
zuX--t{RY#U2=EvaC}i#}jNb^Q2ih;5LU<_gwRuaIJ}*T0Hoq}-w9MCZNTa@64T_8N
zmP!g7fFo4k4*!QJ+3!SrsS(!w>XdYlv(%aP>`4zg(|2Bv*xc02j$}F>^irrz;`2o<
zUbAenT$!5id!0|)5tw#czVrmBu8L)@=M6*vN0wuoh(TNClv6+9K>}mI5O*X7z$j0!
z2rp6)z>w}mouS3OeNkb))DvHDfADE~Uwzhc#CFGOeua|?Q^{Uj`a)r4u%ATx%rrM@
zG}ZHx1NaMcMmGA@Bf;IybFwc{exjn7;lP53Pzu5CK}i^|8sn@HpZRen;dKt8e1~R*
zbae{$yFDf;<h*|!j)n>wet%gq6Y%9xPy3RV-lMAf$EJs?HY~|5JyL~n{m!;wcA1dN
z&WmBjw&IIcqOX4t$oZ4eVqhlZ&_!?EI7|%)xO}tq)%aGCVOkHOSCzg&MCw`yGf-)a
zYaY!q8CS2+TvE?1N50k8vYfQvChF0oH_+eJcn~=ek?uEoa?Ny~yzcl1o#`i%dN}KO
z6^Ro~A;NBcO*8Rq!iz`3P!LbvfCzo)`Xs%(D3|s6;t%DLYm0BR64unO-C=<u$r-##
z{8~iY%WlAdxMu<0%h1Hh`z3&25d$*qCjJoLl!yInOlfv;Q|t!+S2Uns$nu?_fmHs^
zQgVdz`6~T~qAstCxB4CXz_hE|xy245r*QTAD+i}Dz(XOW@_oxlp%8mV#FT@ekV>ni
zShX@^rq?Q24t2ZA>Zxo&U-RadY~F>ivZqo<*rz<<@L47Y+y2jcAvXhYfJ<9?D)#bx
zA}A^96aPhMEwsTRhxbLUr&_H$iQ9ytHi<<1rOUvQI?c8~{%q+$ZME-LMvIXN=Sjuc
zUze}xe=b7!6{O%RuyVS`b5>4(qY=SXHjoFrm>zPp-OBf5y%&&*eH<<f0jDEM(t!>o
zSnSw|fLYy~Pk?xlSHwT-c6Qe!*xwJob1IMMupsAIMACyYQwfhi;>9K%2orAqR|yc&
zFv~2WJ=g<iGm%4^-w&e?>w^ZM=V2kB%h55QX$QfBT*SrUXc2o+h}q4z&?&V^UGj9b
z9k|gS3{+4O<qZ1K)zhpbYv6vCxAgq(Z@P0Q(I+(0yp^}?M(wE);jXl&-Q|2xZ-AXU
z_PMa;ggbi(H*BR*|Gp-J=@yp$nL07<_!1sS&wo!4*){{Ks0~i(ZWAz6Uo7zPGkrr<
z{A(}k*M@eC+g}zn_fc&!?dh-H8kQO3>$cJ;x1J|7Zf<TmfSx^5LK$c=5J*KQfe8w7
z8(_jEb|^XLm<eP(NQ(Ar(`M~`+AJvIAR0t81;8olL*WZL&>9l^2HrG~o`g@Rn6==h
z_3BC<cK?@t7d}4+&gtVvMsRLt?tiji6r-NIMNn-+gF1=+^lRYS-RD$BB0KAjp`T6e
z6WAhU7)M&k8#gG>ZGJKI-!hSt+?H;vpKDm<st*)L?ME5;uaszUCA=dIxQEME!^pIu
z(Q&rbGg>DiX=1BW&KBKpZ+SoLVn?UYQ#Eb8jS|)CeOG*aw!DTb^oBQ^2hw`0Y8T*i
z1DCYF{FDM^7|Hr+SNoFrDdi{zXU3tnykFga$0Sgq1G!Hk5R-y}!v1Gl-1&V^>GOKf
z22xkPb-rO8Q?%cE5O42LO^KG>y8L5~0@jiD5i8BkOuaMfu+a_`|3gks1&sV$EX$7}
zO641v`V7Sv^i#;$EP-}bHx<_jTaw^;0``HT<{Pk>OSY!sXaFdkp5>(;M=D(j2x!aW
zgN&ea@igE!EK?Cg6X!2i$I0(l-|e#kLWYImy_&Kti;*Aq3J#)p?n-N!Y{m9f{Ob!R
zym&#WD|h~I(Z8Wu@(24u9Fl6gqglPYq2ov_J`4lMt#OP>CJxssNB1r1u_peahQ8hH
zpv4J|npIx3lXm@^V@8TXXKMOZKuB8?bDf#+=twc7Wif&-lokBqz5^`)6mBo{p`dgZ
zmhN#|M!$N-ZkC@IpeS8o^FxF#?MKI=k}tG(5%2D4zW*OfR~c1R*RA)Vq`N`78>E%)
zl5V8Cq(eGYx>M=yZUO0(Zj?qqy8GPC``zC%WQ;X7Yt8xO(Uzf@?hT7)mkwXRVe*7g
zSMXz35Irb!5$u-^tU0<HOMhcYW086JVh8EG;0!eTv>7)0qIJ=2-XsKUzo83a50br%
z*h8S2R>AN2?k@u;u=pdu*QS?;+~y~HBdIH;K5(1y+Zaxf^e0iynI4MSs#SRo7+``D
z_F!U~wO$-5mF?jW&z5Tx9sKo#tfeWz6mK49P~$bu*kSU&Z=bmFa7R~<J1VwrDaZ#2
zZeaQtrbWH+GQz{ne_E*!1RJXJk7YjNL4Q3Z_;-rObqW2O%$_sjmX3F~dHR?;V*#uD
zUjFJ`;cnJQ7~TyZ-=<`{i@?g;nv2M{LW@Q+gTg-tVLNZCWr&w``gMSP{)HO6K&^vN
z;4kEqZ5lee8dy_IwKi6qtvq}&EwXBLPt6AX$i~|&@GG4ys2Mo@YF&YIh7WXtz)a0A
zI7f4xW^5t5wSk0~MsQOLp^h}d&Q0L#T--%V&SxD+CH<8xLXP_CTeuKJ+K#YRGpaW2
zRw`<gF&OLqpkX99gGA2fMm#`X$fdeb5RPM5mI>w9uzSFB@bM-(Nn4q2T=VU>Ti@4v
zwdYR*Jl?VaLb9;nl#DKU8YvIOOnPwQ&UrV=jmo6QbrdTN8b6Xhghq+-wZ6`Y_Y~Q&
zvnDb`S-)%j*hTz#_jVMw@Nd1=lO5a^Q*`y35$}l3)2S{vw+I@-`o3ESvjc2)0=VHO
zf3@6qk!))UybhFe9Aeft-@E&e6ENv8xu(CMo>OYdNyO;u!ZdNC{)oCAFz?_MpZMeZ
z6%IPgkl+-E$C3vF2kHPQi3$R*5hP~=o->%ZEs#!&0kbUv<F*+3*muAPr+#q1g{y1v
zI3murAd0C|d3fyQ45YT^269MYhk`h7eaF&zG!qaxRFsV2zWO`4c+>eM$WGRH%4kSo
z_w&GO2N}Gc4<>Ex&`dv)IjeoEj9a`sSN3uIi7}jq2UYWvn%)=RkLL4TVacH;pClZp
z9sH;7Kyj29BGAH6E+J-1R1Fqq{n$=|%iQJ7Yn|@ePd)hJs4lCHt>xROy0Gn4xv?~B
z-?{R!>?GZzo%ey<!WKQ~>vj3IProOJd+w9BmWV%UdG@<t@U2?_9c-a9BIN6LMn^p^
z+iMDX2s;60oc=M;vsNwGK}14m%BTh2dZTZL-+eZVkv^)*8JikjjsooDdb$Q)@eH5$
zbH`4=#l<OSAnMfD7iEvUQL9T>G`!pS_VBvWcQEG0Z2n>xR#K2mbkHsboS0!2X~UrY
zPydWE&&(r}l}ELz;56tFbG!h_H$gtd_20C=kU6VP>mHlJOMQXSh0nm`EkvjEw;(RS
zuFu~dG~ZQFq!|3yWaVK2S<@;WEhU}~CkVG7$K##eJ}LC0Gb$Gz-^-kQlJAIWpr{(L
zHZPS3K5^Bo2g&X)GIs^kPypwHu_+Ftm&%_DBSgelFd1(&6JBYf);t<-{mwRW;^B@q
zd8TY(jMZJ&qVV(K58H3PPu6IJNAjaLTHQe`!w=s5c&`vWd<VM5AszuIVlUqxI;7Y?
z*lE*KYs0JiyW2a8L4+>_Z)svU<l`jHZtsQnyR_;w1xx|-0M<W>G1a`RugWal_#Ri+
z{}!Z=K#h^dqL)EF7L4290C)RdIh*o1rgw>nz@mD_7H}#D*aOBNm}9_Sgc0#F9e()-
zB{1@hHySz%lkzv|dcL>oaQ~MgH^Kh)#BYNlFc>6pK2IpEE}|2p9}i)<HTesu$%|`Z
z3%jo~p2Jj`(!?=SkH)$F7+Gex@%w%d8>+fpM<Wz$t0sn@=dOQg)f(IO<Wf1pa|PjE
zBn$pfOI;FrWq;)EkjlHyIk`nWq12w}lzof4%iUyOM(7C|=i^zkb@nFPs|h{=V>UL|
zm&z(6^aibetmUtD6JieB*1h@W?B{#?TD}w7**7RHq>hSbvQ2Q>K1vGSm>D2#ep<DI
zbRg!o9ddhS{Q-TSt6>{5V2p46mQ#)UYm+kwIYtXf6Ko$fjHelCAO(&~nPkW(g>L<#
zfKgA}*y5zBb2YVzumvI_-Q?;MoI(uuk<?uuHL_B{ZcQQTy8yXJ2W`geP<(7*-&>A~
z#{dw_n`@|i2T(Xb0=1Fp{&2o9GBl^1xJUqhXP?`PmuOrDK=$f~f?|Rtm*f}-Di-IR
z7wRAKdIOf`=p2jCcjIxgNcC`r?L|CewKQsLZ!SVi3njmQ9`TctJodKvW`SF<ESs#Z
zv9H`2T@{^lSLobmi_{HrLMo`U$olv-wCpL@HqtR3EdPAVO7mE@Gxdf8hfr~Fl<1|x
zm2J<ef6WTUAeA_?Xa9||7!T%R9qbCzfd}tJR+EKOy$@a7@%C!g(O)w(G{_k*3~IJ8
z@1u6#HV%BP!8a`Dx)|qb&UwaC7Z{MG<SHJ?oGE%PIV=&>2z6}mTl<4FBJzC&T#aIS
z`+d54tc~@Lz-V|%;ckwC^+4FTR6KYv)n^Ak&*yyvxj=dwmj)G~fWYs>z@}^EGdU;M
zFpnvEKDxc>3DIXGM^KqRydv<co6tk~@7HyZ9-V~o0Zy?3O@TdrQV^j2iDhWuFR+6!
z2(&nFg>UE_&iAlCfC)?Df=dqxzk0_kqFJybb9Uh9q})7mr8wU^jY(EA#dBmD=!e`9
z<)s`8(24WLE+?9;d@EOtYV7xt^OM=n#y8{qfMY#BzwKPYHl2pn`oo3X!t(3U#CZOv
zM;D_@ql$Or7Ad1JC8$JYWJD)A$ZZ0ZoWr0U3pdPun>$cym;CW#j&6@4G(lK013}4*
zgZV!8P@8eND3%vPe#PZ2;+U~{R@H-4)5s)-D{O=)5xkDt&OjNi2>nrOZkSZVg(NY?
z3OlQYnpp-Vvsva{f0X!$_?;ijO!2M7tt!U#!qgcZ)_a88$oN0MDuS_$lcRz5C7^q<
zj*2be?E{#kLG44FrE^PTQ>b&XJ$A1^)L*rKYkYtF7i(l()nd;|#oP{O$Di(8B=d(l
zrl{BTp|16Kn$^!wQ{Y++I#}A=QTTXyW#6lrot_=kRknfiF8|AG-P`f?@Oomt>;)nu
zxOy>f37j~XwD>W5?T)3NerN*BH<)I)Gm`Vyxyrl>lXXkEWz*D5NoVbw%(al4OyCN~
zW>+s&Q&U&W(^NY|ymFP_M2saN=l>c;GYe`+CYU44bs%U+gV{-zN<<&!&m|&Pi~4ib
zkGNkbEN_-aL?zcgUJ&n(&tvS-15Siwm?K|d$Y$15k`yB3Z7}g?Y|&}Sev$!%MU~#u
z0WrkUVJDHN((*$EwuOIgIxmwuR`vDJa-wn53EI#4rXLnXoY$iL2JC$&w?yx5hQ(W>
zDl-Nz-{rs>^cU@M&N)zLYP%yflGr`d=sewG9X=mICN2NH-rw8fX94#Ks{lD06wxSn
zOjJPP9e$h;43KZYApdqtX<8({TxPURj!bI9q9{hgGNIo+xt}d6BD>Jd=GRmmi+H7P
z{+&jcDWe_ldBD5n)UEFu^YdD+)6eT682l<UXQehBe#G6#1B~dv6)W&L1iUgRXyUS4
ziuSq#hCks^=^3`Um5#us!@S{1S;>(3>hxttU0vyG)Q2=VeEH1t_gK#~WyMSCeT>0A
z38pPfMyS6mZnosVo5O8STlN~vMr0t4P_2jmp|Z4OK^A8u=X%4g_+c4-WWDrWRs(-m
zO~m!m*`)hE71OeOP(qi~VrPPu(@cL6#}Xp=%-f2IDOHTBV`ck#5H|jP>(b0xfU!$#
z3L`sSnm{-G9owbyS3hQ}_bf`1ebYvh7s0MG3r`3dtE<TQTt;z-F$Mg>t@D~a1bXRM
z4K<Mmj*cB<!K+kp&$eR1&pM2ej~<0cMP1YWv`H6N2g?#VPurZ}^49U1g`L9w$?A-u
zTl32+j&8?gbQP_GMCa5WZG9B?2zy4i7OHiWqaDkd{uDw;*3X4#X>eM^2xbP84iWlG
z8s87peU$pW<GKm0=K8n@?#KyHHd%qXMaI%w!fvqXaXV*k&G!QBR5kFW@dH_8jYJzN
z_g;JE&;09G5}?x-vm<YUz4cEf6@*#%9_*b<P5Y_EI@}9k2jvRB_uOz)$VmfzViG2S
z0wJG%zv&$nr)6M_{ze;TpdU$r*l(X!k*JbJHC(2HRY5r;X!%p4qOjG#p=QWkwNral
zB-3EHK{pD<LQO@SHINHChl}is1Ek(U^CTL+5SPMQ3Wh@8Nwt-msOZ~BU|kP=PkY6D
z`a{p2Ty*X~8_P*V;KSVszjRHHI1Nm&I6VsovXJwL*ekW*yXgSV@FEX+U3s=~dYTf!
z0ukAqd&44xhI9LG%q|Kez6oJt4O|6}m9zX-1K--!bcB*isZdVjZ?(42Rs2P;Qm{t6
z<b|XrV}eTw(GSzgyv25_q9GpsXRH;%V78;6;2+-i{`}8=@A@2oM<bQz&*qpq7TyW6
zjLlA8F0<vwNQRQ$>20eq)G^sUE9g?qnA&NEDzd+?<wHQ0MNqACabiK~T?F>L&5O>V
z9HFd0n;awNr(?=N0L*YgNA$Yb9e0Y2RkZ?p?Ag0e!-iD@C1=CHr5p6%17M)T*7k7X
z>?i-{$2-vZ0`3=)EMkCjP&yXj3W<$rY<7A-<Ax^5lM0+{pgrYfkF9@rR*EdAsk=7w
zZUOv9O=sKY_*OAa56#qyqGY@P4MlEvcvv~u=%zhn`AlwVc$?34((~tD`CV<oa8`Xl
ze~HpyBvn2xL_<$3{wE_dsh&NU`1ZD;j-L6kBx(H-HS{BEohfq~^^=d30Q0hjBxA3>
z)MQ@|yPSyV0#VPHD`hNCnFsxIMHISJD6>Tvz8RLZ>VgDS$pGg9!s>au_`c@>sy8I1
z!5F;>kIO3V-cLa*aE5vD?jiu@{wK=b-rlW$&6Rp+G!$zPc2Q0rw0DaxBT$3~j*&Cm
z!gQNVc3t0=MRgb_e;Q+%I2$YIlg<}!3$xz+Iu`kJq<dL~Vn8=|QRGduKsvtID3qW5
zF2G>!GuIQm6NQ^0>JA}LC!`kxNpdstyV9(Q1JyO*EDfe}yD{4RVB{vEX_!3w#}-Qr
zK<gCDm<8Wl&Onf57ng&s{|q^uj>L<+01df<gfJ-$z8I1WFi!Xq{i@xn2|7F6%^F{K
z(&?=4-tVO;u!%EnO1Acl<)~DKFL4g{=bGX!xoa&WWu(h~P?O1r0`f>s!Gf(#`#s&f
zDkwR6LsFWZ2g2O5@Nd|3k>Z-@L&BbayrWJQ-gV9`)qCM_hTwGw4cK^tA1s!aFv9W|
zk+No3F75^|3IZe-J{zR&<ln!!fea&heG2Aqm7(=zdXBX)I44vig557CNZ}!G4mQIw
zFL!p<%)Dj{S}iI*Z%3Yxd|T36a#{U+x&o@?K_zB)`ai^2vI=9b7DSo$<Cp`|3tz+A
zKRSK4?653#d92(|ZI|TUDjn*#nquoRj4&ch&P(`PsO>r(1Kmlqhw^8pGg=Jln0Z{k
zM&6WxbQ#9D{Ik(I7b$F-DUcNZ`idOX*BXJS{h&g-%MS8KNVn?eJX~d^CSdQEJ3@e{
z8%7{372`7vyU?qWI4h7DhK(^F<^<d@MXv!-JX8*EAat~UV8F*zXcoUIjVP+dE|iTS
zSZ{P^8dF3NA$3IIXjIaX#ooG<Rrig&e~tIy2nk4}%;9P0=Wv#k%5|O~OBSO4;FW50
z%bc;6)`R=_p{Op(_O;}|%2}rT;#E(Rf_7svsVitopu;5Ar?H_%5enjyvk#<X9;U+q
zk%Wt~Sf<D*C+Q|K+u6d<#yF*m-PxE!(tloEAARPU7i+i-M|3u{qlc2Ek{ej1F;p%>
zvEM)G1ZPaRWvbU}kqdtve0)J*w!Q;kzst3DzKNq9A83j}gFv=QflUa7!nKK%2t#DG
z3{>DXqrZe!ZnBR~8hS&Mrp@!d#Z*UHm<Jhl7BN~~Sa!dD0j@hEO7!OU<P)M1D2jo-
z1WzyAZ^p|5un`Y~?<FT_8U&!`bHN=O>v$ADFs4`H23%{5KdsOr=78K>DxXxHIUGt#
z$M)lN@e@0TUhg6-1tBDY9HKLj^QGadWYm&_wR{{bmiPyl!UBa)8?i+OEc=b?dqj<^
z6Ygfcg>pQwi3saNhO~j=!+*7Z#&si(P&VS!{lL>`Zp1FJ*=)7-B%@Dmv<!w*XLhof
zwt1ujEzYmnCE0Q(N4hyxI9^{3mbY%*?1)bZ0z-8@Ec@9*BcY2rjgY#>I!=+rN)448
z0h?#0U+QKtP9Zs>ww*&gZ(6tt3}$a?2(ipTz-kiJTnncBnE*-1Po=Z#{uWZ<Uw(3P
zvgftXO%K~~qYw-_5;X2TJz*!n+g0n9v!7TF{hSnFv$Q?^=YCl|)8N&?!jE^S!)*-|
z#(#cDrcbUy>4={}SvgQ^me{3Yk+$seLp6KjAmADgb&kgo^0{WASI`G~2tcXM4QQB~
zE7vYJ1BgInr>+PX()VKE2gXBhX_c^;m#zZNK!P5N1acepXK88UiB;_GFDnd)1EOWY
zf+Z;9<~*3W7P2WE)VTvFSZ&;#w&8dyxY;%d)fEw6Tdnz&zrSBCbP34)8Ak$6e<4lX
zDZb|K(HOT7myq=cP;-8(L{DaMQKj5pxf}du218g{$P4iGeATZw**gR*zer{9JSDF2
z7~;4U%{za?)b=Nce=?_}{0;!v)ZN>%mk!!rVJ*d>dIHi8x=!++c8jBVZ6Gz9rPE2~
z#_?xk2EXTK4(!Mj8f-d583qW_2(8|Tq`6$8s-H+zF7tvk*5g#OUsr(4;9FEMnC^1o
zmDfF##O9N%Z&oavIm<0nDh(stgkcQV_~ZR&y+3289n!<GdUTbeEOj->p7gsB#(}?*
z!;~F-9f{LOEX-R8H2<UzK`U;C7=eyO@7*yXOl9jo_~ViJpnncc6dX~azrkQOsa@nT
z{nN66LJi-z4+?{EcBI(17JpMOBH25Qnemx#`I(+;+S^A*0*VrRK+B#wKtqxZ(+&5N
z6ENN^oR?%}lPXAQi&)1?v`Hx7Ir<)qm89vt95!3vpkpG&DgW{NTu(Ec5Cm^BaY%DB
zBcn2DkJgCmt`WYukt=!h8ee-&dsulAJnJX;npN%Oa+Z+{T{EK9-M;xLPOH;@U9t*j
za<W4`upvpo5po;j!DFQR<SQ1+TrRe2611X&b@prK6=p8|M^gTC-P49ox`!C%=Zkka
zCW|z*vU~?;eD2<J2K9}>zd`Ofe@k|K`!iy(@UHp?EfePM2#;PK<|j&7GPDTtLT||Y
zBor6hXhH0Xt0c)VSt?9qpy2&oVZ!_B@WETUBn#I>@}#jc@6N5^PMPFp7E<TCfJe4s
zpnwoaIuBExkJj;Q^Ly}E<Q+HOf!<&JyEEsRuU&-Av2`1?dtvgi!0043eP9pVZ~Eq0
z<a)K(JUrzF0R-knYc%SnFPzRSG`xZS{cl1b^=(`5E8XQu{S!%vEwF+n=pTe~&KcWy
zyfbG1h8<QcN;v$W)pRuF9l%5W(9T^SMwRr}vC%8tG{XJu2f>Wv$N=-u^|2&M@-7Q{
zVoi9}`tMpqi)L#7_R^4UmHy`VhcNWZeT`}Yj{F(LW#XIqFaZvU`(jHzx0ypvdN*4A
z5k0fKotbNxpTEcrb1tKEx&3F;WXzMJ$3kb>=q;?9@a7sjG5&gN`a>)6z)_kM?LQ)f
zOd7WPJwIMx^0P6V>_}G{%@!$#`CWA$6wZz%e8jj6Ph;FPa!NfJWtLabmiON+-hrTo
zc&YO7sKHKjZNJX%xD|Ey`Z(y|iElx68KZrO|K*-)=grSo<d{ARuIrchUXhKC3Lsqs
zew{GW2LWE3GxWb(oT+`gK3y?oTYb&PqDV|V^fUpT`_!%dL#1?Kp9@jU;O7zw<xnGZ
zQ<`idvpUJUnDb~AR#Q~P40SX0+!Nf|48|oneseJUiaPwPcFyORnEv@=F>qF)?Av10
zKHg1Pq<qWAR+mOPY$KgNO@TPWH9mkFDRoYZ?{2mFFbvFV_9Q()vwQX)8g8xQ;VWf#
zj?=%J9JZT4H*u2#+=xq8OU`@Ioi`4&7P8%HuiDPbUEZCqYMb3271y*~V+w*0G8xEL
z{|+EFzH%aPYFL}1aw&_J(va)<%FzP%6$yJiE$tMC)sEO-&X_L6Z!|+?fl{-17ZM-I
z7$CR9q|~wRfiZ7QiQKW94Z3jz##@GL%GS+*-lnepV4-{oR6v+B8YF1Fn2;<`0QR!;
zn+Xx9ZyD|D>cAJks78O}#i|nqS4L=Leht?R*1&+7T@>vCpvYACui79W8!*%-);k1)
zuXV>4jMFHP<{TS5_Db~Cv1mW3uK=s9_~gFHN5uL*HN8fP-O$x<nr~QDATn;x?q-04
zzx8pkJR*u~7BaF{ve48qa+wRXec0>XO38B(%;gOUd7wmJLd1m6q_XEcoTto9wGm85
zd9pG*rdihd{qK5k+3OX@?>}iR#G>rnJ;d>m20nG~4(^(XFGdKt?s}IVdO?|@?c+ai
zYi{#8(MH68{${`k;QIw>2!>}h<~fX64$x5GVAZ9@3$gr!d6&2)rDRshSM;6`i>akG
z_q6`2cOcz)!rv?2G<(NXW~-R<uBr{Ny!GcH=Yi;uKA`eFqRD_w&J?Km%q`&nK^!;r
zWA2CD;|K1+v?<X##Bc(2@ZZiSUOF5-tu|6Y3HfQE7f@gZOyRTaC`jw9gu9^iQz3nI
z#x{qvr;G5RN6cX(Q;!@g{v!-BQv+St50<J6L2XYvSw<$z;%_oe<{Fa<k8@r8sQBV@
zZ0+h<(Gee7PT&oHvjysL{8GdLzZ5k5y%?roFPe2zRXD!!Y8W~EQBg+g@XL?a{ylJO
zqdN>H({HfUz#80DtH|tfU;ls#>7^AUc_#;DOgab0_D|?kr@d}IyhvIiK9k9y^pMDy
z^T6ZpGmMGOap)GHZQcwU$_m*~Z&KbL2Ci8#{(eKdHof$a>?J7I^oizi*Di*$&U$5T
zZnK>F?mJ4>`%iG&P*CwrvynKV)B2Ycz>S5Bi2*!|I{~xFJx#W{Pbb9yDcMUnUbn*C
zV{pW)ZtGfPc4qL|$R=GBpLXd}7#@)9mN5m#sF0NDw^F_>yZB-&wtGY+o%1mH*oX6+
zdH7-pILH?xn8{u8DMr+Bw!NYXIhXscWVR?*+aqH3sV0t%BUAe~X=yIRcLmb=VD>_c
zZFQlcu4*Rj#lh_@K?j1`vhGZm`XY0PHHx~gp%9-%qR%|GQ+`-MCx(hQSxzSV!NIWM
z183j4elnLP6{wSlZ0C5~BLeTVF<AQ--z8!#z)vU20p}zt%H#})I|)7Ou&|tPEWbS*
z_bj!vT2FS>91`5Jf;ST_((_N{{;I4@l&++g|E@Pa6^cc0;{5H6;h}k<2&@0VeqzK@
zmQaBJNG>SH-s23Jmf2TZ-awBzef}Q-yR*3>zXb#JdV+-jHA#i-C)US21=PT)ds<0~
zZ6$jA%q}7_Iyx?*ICBh3`jqxYB(<r5x?<r>@oPBhJR!x%fwr<V{oUk6<6o86f9X`;
zzOEWRK|`8r9M|UL5jo3wn4Kuf8Teg%q*%Gt5(v*Kj8L}PT{GQ_7?hnenJtWssh+ho
z4VBC+MzL!!VQ+}So5lCCp{my2r1x$nf75ymbJ`{#75o&P4U)YVJs{swnT?UZDMAd<
zYBwPPz-T;q2uwN#>f@R<;bg>7`I&qTa$}pe7p~GTG`#s!cJA=sNIP%m7CN%Cc$$k}
zn}Djz+QnTv@O0R%L7N;aurF}N2t+y8a$AXbpPM_z3nJ98r@Y1nKcK*z%iU3wVO5s9
zi-Bm8jdm}<pv{f3rFqCO^^)jI;BS*#L#No`F2woJd8*m{@S{jd`Dtm0E~aPmLW+gs
z*J))(_s=6jOAqDGx)?4-bv1vyI*m5GH&uPIQ=PcM_O6B<)k=znKp)?ey);g9_hL9|
z;^qB5%#1xVj(xBt4$h}t?_TlYJx1eX*|Offf={ZP3QG91_p?1A)3v>;J#6*ujVu&A
zl!4N0if^}^<#*);kHuS%!7jp%Slp9hU*$lY4!8@gH%u-i;Tu7Yjh!%+PfSvAd<YF*
z`4yj(#Ni*9n@Np6gm|hxD|`4|1IE=>A2z`<Lvf^zz_QV}66gzw$DWhd2kb>!{13Pb
z{dpS6IA1!YhPQ~EZPMD_PGSmt(TuD#*gWo;XMYX5K3l3z_@w}wNsN=pS^XTw>W=N7
zrMo^}Y$6pMo~mw9swq;AUT21F$f|?0Y-vcuq|Jfbk=?tIxyTjK$lA9^jl6`PqCtL7
zW7%eUzbRRiUCLH_r;St0jClU&V60-xi&8!LkYKDsdj8=jA)@(?5ego-CjVnuJH}D^
zs4dDPE%^F?x7EjGEp-XoFf+L--fHS*WrDIwQFGvQYtUpu#KGfvaf;IuoEsuB!fVhd
z9FZ2$<e|s5^j@$W0me9DNF{pP>eG*gXP8`$A7<B8nPPX9dfIs~+_Wss9_L0HO38C*
zN%+w4x0gSLyc&~u{=5g_cuvol`2Loj6L9%i@fKu;5!RsRqwE*>|6+Q<J{3|bFvKT?
zDMk-enGk{yEB|J&k&EGz6?Jzay|Qec0hgF01`xb!NkYqzP#3zZjH41+;q;d%!OEkJ
zMR`x~JMym8N^#3q0xCbDf)VT;qc{39hGiMr_b5e5RcajHNDB2_j@Ww`EoWHqY<Q9G
zXlvQVIQjBYQXqPiIS7NacVb0#pjZhLMT6c?v>+z$P(rKZ&F7i&8E&h48Cpe?56ahR
zIZbCOt2#Un2tvhPf6#Y?r?{cgYvo)`0IOK4Vy)J>hME4^<E`IS{pbJqbIx9$F49t{
zo55Y|8IXrX@hF2$0$~5Mwys87N+}sb>M_l%`v}h+(R?~;9p%_pa~9FQ;jq`IJ4o+}
zP{&p0&kcJg0uMnIjI$4rZrH^XhKX|qA+j3yb#Qd0Y`HH!Anab`KES^CpLA({SAeU~
z*Gd5|pzakQ0bb+ml`ZW<&#7)S;i=o*n74YV1a6A3$)XVvDUm8#=cRV)1$4ID?nDa|
z^ewswn->Hoe6Cauhm%LtnwB&CIsX#Fy#|(yol5B8%8iqRl&gN>ze!>$v<KtyD}9ZA
z-<l7As**vNkS!^bt}P1g;RY@^F=ntoT=;t^r}lmIHDq=yD1!Ud{9Mws&<BJv7tKsx
zt1{lQ2itcg2gN5C_n@*)F~E|HP(=Bp=Q#VuH~{?Lu)JW5-+n`lGlhs=A`*sQrYa+R
zDTbG1=&YZ|QH;vZ7=&o}KiF1Tf3j|cZ^O++sLSmpj3)6VhuFr}wvnAmpU9D--DMkh
zgJzt@bGC(><KJ%zKm7Q_<9qI5tM$Bu*e#1v`46GMEPD~e1Aqff+CSAjVt@z;?>>S7
z!y#bwHH6Ku<BR9XA08q+O1Mo#eE`2zLncT4q{9+-glWN?_<f*5k+WjCjkUy;ec1<o
zKm9ifyg-7#=*-bscFqW=%0!}5>34J(YS_BrZf>@}GdqLbVK&(VdeZVt<%m;-hc5Gc
zNws@y!0SOot;;E7FKmu!TVD2-G%w2i^j|_G_o;T`9FHo*><-v}qH9;@vt(`OB+&lY
zdR6`X+txXM;l@mYtnpn?jMn_0i`r}b{BvmJ$CnU-7_%q~L8A4uuC?eO({#-!^2mPd
zU}6ELXA~WL*kXo#L#-;W`3wOnn#h$27vG@MfZ@b!3o`V6YRZN3k*lNOIW^T^xw|WE
zLWvgK50n?uU}L~AG>upI8wm6H7J&n6{9eVY4=qlVzUPd<3v6Bs2{Z!?y8ORO?}%vk
zApnxAW~YMJFPsIOEGp2ss~O;N^|zimTO@!irj}6qw!Rp?YnyaD33`h<hGiZDaD$V`
z3>CeOfF%wIOkzq;(n3X!5*LAq6Ph+XbwjB#Ye$ADp4l()it<$Byra(8&hHtu<rB7=
z9J&-UUV7RFgK4?_hhPp|$6&Jp=SS*>d?lp=&%$35Agih4gv|#%`&7%&QB)HtD`N`R
zEl2C}{xX)S68E!vw&VZ}ZQi|`H_O~pQqdS~o5<o`#-nt<)8^n@b9BRGrMG`0vDPve
z?*J(m=~%dV@N{$QON3P6_}-*~H4z;FqKjeQrvz(|BKvx6X6aSWX8K`fVB9v9W)n20
zKInHYE*B+Z^EZ7-nUG7DLvPx2a_=1;sk}1}2ZPHV3Rr<$EPZ`_6D}eG1px$6*+K%@
zJ3>*mAR)j@f#($nd>b|CkyQVPSrA`?aYD4N>>OH3CLIc{)b&LU90aFdS7~$@!VUN_
z@zc8~`@m(MQd&~GE)`Y=OXec8;Fz=cKdu&K5~=tk;?(>IyA?rSQoLX}*)wc3^L9Oh
zx!A5tkF$@MHT|@<s<er*#$2ZpC1)c31-#WXB17jADrk(5kr{mKMut+g+{zTXZgZdc
zY@(d)(aV}QS=XiNPIsNUGkNE#%_{5{#Y?)cY38u7;wCa|=XRC#MG)Mnytie+syL4o
z{_rVFh+<4;B)sPSRL<ZIv?<BR$iRSj-?<s`Pe6NL5<o{Ba+SJOSOG5aYx6o%TvSGX
zvT`brpha=cUbtui-bBPv*aoO=<}*H?pVWD|caywFpuXozx9zC;=ml+<%F+sLYQMTU
z0k~}cdz(Y=+D<o8;3cF9vm7_X42Z&KP9Ow8@I)D6{4WK?E65(+Y$y8%KqDIdX|+P(
zHW>>j%_X%%C`b%fRxBm%Da0vK<t*!|v2U<-LG3?9Zq3)qg)yUa-4^35xP8<4qkAuC
zI-nsr?<6rnVv57)-?ByC7@hD|NK+iS(VJoGB0W7x`R;4!e49})-~X|jOhvB$)rL{I
z6Z;uFZdH=OG2^r>2euZAgY@OcCP4~~FovMASy{tPO+le_y)k!ehG=?V9s!N{IQK|I
zkFv>;Z@!Bl#aZuz4`Oel%;eP6)JE+K3sBx`4Yx=X4LF}bZTo{Dp850+TnLCz+%V12
zfjb6WpyWL~VvR4SE0>vy=2G`r>&R&-L36?eVVwhD>xe86ZSmtG0c!IFbU2dU*gvuc
zM?%2JV-OY@YD<8FNfJHCe^0x{2&xrgLh?Zz1aYZXSw3%z@V~?>cA2`_;XA_a&luz|
z?LRmcDY{3X)^HSbF(#9eE0}-koNP7z-sZwdPN0u4DPp;ZX>I)AQB|#`zZyeL!ql#x
z&;wXa%{N68Ff?`FkNvU=3#k%KLnYjp;qr7T6x&TZa;}XcyGrd_oxDWrwJj`F(hVI}
zA`Rm)^(a4|D8B}Kp$JVhvrSLi!Zsu}Y-qF->a!!o$vd|?_C-d@hsQOGX#(e0RSuLp
zjI<&qk$U#)8SYhfVecjwuG+}n&g+EBJP9`azA91MA8A?cdZ1tHIo(hP*_FMto~3`(
z+5*kN-{%w-E&4^^;7Owz2zMh)7`Rw%c<|n2Guc)zlhKygCx=;CcG8`*lvSD+?2<QD
zqU8{#sLn}-*W2L$THor|_eP<FG5nDnD_J<8ge|+ic7fYR`GV>IG|jmP#~^hh1a3dC
z5tqbhi-I!_Da|~?5j=$>GwIMKC@)?j9U27A(^E~a<Yi3wgGH(N@hx<JPoPRS!=d7Q
z5vRzwJo6B=d)vyWTV9G!%825==;j&Lr!%Uj-*`Z0!BngAMvaO~OLuIuz+~JQUYq{Z
zY<R0O)%;m4K;>)2S1h*eZ)cb6g?X}#`Iq9jIDEw6M5ezYEn$exKX5q&InEvO^L{CZ
z626Cx+xi}=MR;A(dH(&zn>a`TyW3!|>r08X?R;Y{wx9cNc^W#4cS;$b@X;h>BJ$WT
z#HxPOo=f2=e{^zZ60m({^&S*#_At5n>lw(_2_{a&#JGdeYkwXt;zhE*4#FIx?tkfa
zQZyDMq9|ZFw~ydkh@(78*6OqCWo2~MOZ^;iCg@+BWF>onAM~Am<em0A0ykaVXXT4k
z(1>%%oO{2;Z%t_|$X-w<9&onVZ_vP!71O4{uk{@y4vW0&QN~_Lf+veg_-DC~e-WUN
z(R5lzhP2m%?~wo060B%<JO`04clc_#ed4n1tBAl{{Mm?pv{_*qj(v0Tat=J!im!I`
zOq80(ME|l(Qp|4+Efv#)hN(vbk?E}u!{nnFnss$>ZLmnjTjFd}Ez1+|aU^hqblw<^
zB+5G9%<b6H8IIw;pX3>f(}<v&O!P)56KT|Uk{L8mMfJ<b*!=Dr7B-6ZH5CqnaZuS(
z3aS?|_86<NvIz=W{qRxA@Lbl;HQ~msN_hBMPIk1*Rx$&Gbmh3-NRH3+GQEEFV7AjG
zbAPh8w@CjIkK-?Jo`k0L=qX_k_&d1|0c_quts|ls`qAT=e2B<%GO{*juFBdMm#k3A
zgrfLC$})VU#JEM|U`c#jDt%?;KDO<#OP0VF8Sr^mf6&JX@Ef-oj)NY25Pyne1wO?m
zw{`m(gSr|>oVKns)&1ke1gBI9Dm$$Y_!DO^0tvL>p{9T~o%-_8*~s#q^(9TRA6#VK
zkyz9UncX5YsJ2v}_2(E%8Q)}zuctg&fI+|U@<#<_lPOZYfoj1$P9yTk^dBb%q)OOO
zYu^n@f9ylkna*`xNYE6NY&d#Vu9#%h_bk0l=xunVUSU*}b>2}Rp{)R(q)Fc^CWxr+
z7iP9^VLCigYMnwN_w*+t>{n*PMp1y&@Pa$)FT}jNLSDrj?A?gT{SShWhCP`oBdPWI
z&D`&>E<{~Vs9FKbAy4k0sz8vJ4OA6;t7h<m8PW<oE&WwQV2C7NKj$mxuh)6#E@!{}
z#ud}rP&+H1E5bJY{l!6tUm(!2S#7buGz{IsopAo;RqM(PlVhKqax@urb~Dz52$Fg-
zrLo?ZmzmYI?PFwwpveiAg6$b+#6L+{<jIXP2zp!i1P+)WmkL%L5qWxeoNo3<4g~?^
z@4N!@=a{=XcHn><6Pg6C<x*NL_Q%5e+`}iuQ2S8nP?yWWP!-v-ccK!LXRT`?7<lho
z?Tcg`W@OTD@0kW5Qb}xMv&D?zFu&P_@TFuu3UNx$RG!@K+%hTs$cYDJ*iWlNcmh=1
ztMko{F@Kr{m1vALOa#Q^u7_2u4w9aq80;I}mTdgI%hrqQmnN~-Wunv#2_j?P&+n?P
zE3--oQ1Fo-=OquQ@;`?Q&wdyF@|V5PKa7Fpk$v2A0rkL&8rUEd-GElS4ZjP%(h%0P
zM5>!pEP@^X$1I@kWo$OLdMeNG$l<@RQcap)W%b(nRbb<iB_p&N@%?wf!Xj%LVdU@4
z67|~L<nf(4EDZ3FvGMJ9+2EMuqKt3&Kbs0L-HB&-HNnn&qPhVD=2KEk!k?Id33-pE
z)mqHksqb`H%^F0at@k(i=^}X<b#s*?O=*tKjz)<Z;=VDvMKogV;=ak1CMjCcT8zZ+
z;M9c-x1!^@8Y)UeyUd8cK4jHF$xjU)A;hy>G!FLhYjb|R${U&jb3&BYgl`pTb4+-g
z{}JTvVA>sGp!+{=>ag>}1fSBi-HmNGvtK<uTtgc0T&%Cd%HEZ9-f~^>BKKB96!m|u
zmZQCHq@<c_6zD#lAcJ`d7uhJv2gQbS&b!4ZImK{qq0rwwHTW!iW@&G|Owwc1$Ny+}
z|B7n`k~l>J2l}%z2DYxpw(cl+s(4g(+yoz%eO=?AJJ}6;>6yLGQ##q3@M_WEQXKxR
z4qh}uX+*?bqWeFSMxYk}z`fZDZU#WR3cYLtcw8MNAm3&|9T;B-`j0();?;recDYq!
zOz{*kl`I-L+LG4fo0^l)5s?@TTHBe-25ry0vRGXoW$GPr(v(6-!pRcY)KR9b4{(~9
ze#fk`*;bT^WiEZ9HA#qnNEQ<vOi+L*!Dq`K%oBnVIK0hsa<}_1l_tF-nY*mL0IaT8
zA&~L(=VW6my5KhS4^neYi`_c-JEP|;e;r6rOH@5ZhcL@)4L`Jo=`-s*ZTRK5zI_>a
zoOSpwTth$KNI#!5w64b1oiY){#CvAGYPZeMF0ns~V1BGc9X*k}`)n^NU6HK&o&Ar|
z#}0?#F}eUdy+CkS3>g`A2E0^f5gww!cpW<tP}LTxEG=;c7=zV2wYwtwU@AXGI4HFS
zFfjtL7}$5g{?WKK7m(M?KKw-mc7>6*|8N4LDl1vd?Fc~*Q_6@fMO&Qsj=Hvt=OCLx
z%3g?`N#9Sl!Z&VJIJX5xwUneAIi{{AyoznDb2D5|4_O5zOJ^WogTnp5MTu`1F;8a>
zxr+6gr1X3wYd&E`KMYbtWcWHHGy{#}p;-e1(e3Sg?Hp((3ko@wbAx&z1CoEJ@t^u~
zs6Ju`7nX2aHF1dCvomwiN+_6o92Vkztz(k9`bKAlJuS=CSEw<FH~Gg8*m5J^l^4)B
zM&k1~uvqA4ChvE|8kF)mM-3ji_qLQfza^EbER!yJl7gZtY((_Ac-v6y7*4Yh|ET16
zACvAt`(c{zE<g=nK}gnxX-OaKmwo)7{Gh+zUGxgqY8=Dre}?rhrC&y(s=J;u_z`ou
zpr;$XvV548hNPGw)dHf@_2^zwJDBtgA)CFXJL<YBF~n!yHT^&A<3*F>%7|73#p=D4
zsz`fP=+h`0Wrp9_-%;U+zYp2bJIVhLX{w^>tv_7y$ua$j`MDD;7JF?&F0`|*glj!n
zT{c+IuTOD2QxTbo^2B#zBHr#8s<%HVWS*z>wDHz-W|BkI{X)>!>gR*Zgz>s@fY3C!
zatxAyJ4ek^HAIW#?S=**fE0)qAWX(#&qWg6z49ujv5vv)$JC6l-NkvzE_?hXCG)~r
z!m7d6n}fDpN+`QA7AnwDHi43L`1(68w5*kh&{$aQdpu}wxsO?p2A4BY>!2&JKulZ!
z!3oX}Y}!Eqg&IcuHT|HrKNf@?LXM7(TD%KYNflQMzWUsyZ^|k`yvLLA!^?do(TY@>
zdz&iJ(ectFTgVzOstR{CCJP?^^+gb-y9uu6I25yKX>F8os+t+(bu<mG=zE5pA?zv^
zlIyLXtX6FCQ<7@S$YaqBJ@9g3X}Lj_nU^B>Sl6}XVL(p%N{cahfv51Dw!5wW_vMvM
z*6o2jq1TPM7kBi%U0$pIdjauSq^Y}?M98$`p}mfmq$Jbd>PWNEBB`=tp{{`r0#usb
zz0A+7?*yLhK0Met${eTLi&4*7yfs-eD(lu(C-r)N-ZhZ^#>y*_yO!@VUnZ8JU#54*
zg($23xNoJ{`|TAt`0@|6F+dxN?!loKR9#ST2Qlx6=v2}K_P<zVel?jl<NFVPc&>pD
z{9p=ROP)vHsA|3$(O0nlq-T+_Zyc?z$RRx8`4`G-meHZ$v%*z=Pm)k|V&p*-?xFQP
zwwvpi(WI%6Q0EQkph|vx&T!)ubt;eJK5GZ>AgpxE{4a5r&$zg8zxjC3@SlTtKBv(>
zHP1fHBPV+vRkOgTH1{(+YX<q8Qf3S_%hlX3J%LtNa4^vP))HL9!-fqOKVQS?OgWyi
zL>fXQP?s}U8cBWhnHx3eU@_5Gxcc7b>fw4ScGyGf3^ct;wCDe_L5q1+?qhPo<GSi&
z`lNyRbjROqcw|GvkP0JO*g*Rqf7W7W|C4}I&>;4|<YR6EzURaA3^H0^2iQx@iJh~J
zs?UeG*luGdQPkFA_`W!@!!X2Xc@K}HbYsHwjriP8<A~?3uM&t$RUMPHhOd|cB-k|F
z^X2}{+#xfWeBiv47t7hT(9|kt)4(ui#|_ruU$Kt)VONhKPM!Rthsr=^Fkbpkz=Vu|
z7?y8ON}!~@l+M=`xSHI_fp-`A0Y}7TX!zsOYBnnxAw^Lrct!(Um52I$f<KTSo<5WN
zy#fh^*B?pftD?BBUGSP9VEPRSEQ7<VFPxBYT<-Ub4S3YwR33`c=oL?00!QL%=5um(
z&eBJr9pkzuH!r2ZLp_6epuLoW0csDoZUxFM3zO&ztS|y+rH@2^f4dgE{tV(qfaIOB
z3?%#?*kQ=k)c_iyc?v}FY1nzXwhxbcZovBnmq)Kv+$k#J)HPzjugK0Nr5_>NhLfCJ
z$kk{&G}^_^Hd}V3Ge)W*otlS`)Le{oob@-|OsL=Wpk8qK=qmp75pAHmUOUG>OLF4m
z%fO=7gYXh+2a(!w2?plweqS0A^Pki<1YC;)Sh<ymxD~y1ofo!(kG#$D_{T@;DL!*J
zaIV*assUx6nlA8`J$c1`g68xWdPeGD2*T1@x1KW8ph$}ljMbCkL>GdRpp7?uLz;5W
z@NV+*(bV$E;u@(>aE*Ta_$JL|=Z|U<lxUw&R5miFBTpHuLHo-(-M3Om)Zry~pnQi5
zApFzSiLnyZx%lUH82$20P5up+cS3Gb`whP8EtS6Dee)Bs8+27EppZNN)Ca&Oo!D4%
z5p<fm#zwpdQ<(L2LiLZX-ZOa3z^9$&HGR4ti!D~(cZ#K6(K@T1<MmJ7oSouR1z9Hb
zI35A>ZfDoX^d#}D#nvY)1tXt`m4;2=6Tg5xopL@=GP1gg)uO*)CY=>Am*B%LGNj^|
z2Ja=&X$}2pWZ)<)3$OVn18P63Y=m|4ms0!P`;~wCv%XcU$B95gs=04;Kym{K9~Pec
z`0n$@4LRW8K~w=wK}TC>MqQx~&~eYeb`+g;7mag>hgQC6{VM9n*>Mf*Xn)CW)WG%*
zjM!LNT2sc)J|>S2>(h_+zBUAjWQ~7I8MfwkX8KZFhoGCkCd7bjAs_tr;y4QdofCr7
zVC08{4S0;i5QMY5lwGW;46G9`yq7GSz}UiDuqv%bVOScd)dC6hCgUHzcU*k{=IDH@
z>)*YN-p{0Pu!*OHn?MbLXb}DgaZkbW0^9(_Ztn4@ZKgc?aYa_^EGoTU6QP!2neB`@
zqe4V{UH2@uNbRS2>~#evg0%qpF=9HUD$Sk5j7kLkDiIl4X8LJQ`>PhD2fL$jelQa&
z4A(tN+Zvy=_G@sjWganVEtv6EK&pY*SoPNEzS?Cp1<^Xmwk<dLu}|Ao>L_1YZjK=c
zZ8Q!miD6Rfl+^#6<`LXr+<eJzui3MEp1SVY#_%GgHv0LQ?g@h-F$)Ta;lQp#q_H_G
z#1QQ-LB3-cmCv&7z<KX=Bm2m(x29jTizOX2@~sF7KnR91ZctBscH<(hg&5B!Tf4d@
zN}m1ovIP_UTUuU1X_(6-xkTMMgh4_bR@6}V&%;DEh(f+2G1A#SdBl!%SLkMcxSe)|
zQ*71D%2bjvw<aF<(@<1k97)EA`&4Byn`9U&<*CnucocF;WHEvdJCm)F7ULMgi1T}`
zA<pn##<5xrYe)%k-k{pjV2)y2P<2O4-BI3@*P1ch;Edvfl}Ytk{C(-TA1ELYmQ8Fv
z95v6ezQMGC<<bAbCxKb4J0sHipi(R|BR#5sA~I-<$`D&RC0%yB&3oU;gi0=Q{DM0U
z-UM&J|2%@8A|5}@&2*ob=5$#L$8Z837k{9Xui<4W$C-Rb4t#ctbYtGDWc|QlA&#gZ
zws3oE#PFQ%<^0R^)=}X)tNC#v@*C+z6v4axhr6?_!42~U6aqFCy(bJ~j6l^5KAsY4
zz?I`?%)(1Q6QGB<LlJ2I9h91bDVenTmAAJM>jN(T<||LtKoug~Hb8iYK)p@~l$vWQ
zP^~MTSRd;K%h0kUK5_T3l-9XN8|Ki`-WGk5ORUG!sEp>`adlvkO2p=7TnH1auFAJ{
zG3ne9#%Gh8Hfq=bT28{mKKq%JR%1IGTukUqYF}yX+}ke0xRYwyr~aPe)K|eCOA2Pz
zhfz7x%H5Ku$Y6XbKS!<RmP+AG$;0e$rffO+Tk9H}H?n1`5bZL6Qe|uQ<>#}oFY^B`
z{I{{%r#H}RvUiWl6EoN4lp^`KXW0VyB!HI05|sTy?w$1fNM#v7+j%hO^8NjAIV3*B
z>P}!ng7!JUK0hVbOn^?*-gz!qU=;cot^95)QuPKYJD&_!_yjEe5>%6A5u3TnE91^^
zuPlRoT^;uF1nA+Huy@{4sUWwvoiYNxDYR>;1Q|Eb`o_pg!;YJ<O5@=t5miEB;%OKi
zOZg(K^H`kIs2oPA{05S!KDjcs2@V%EHE$mI*NJZvH2exq_ATSMrI4;OzG|{H-M6pi
z_f$NZsVU0wg_6FpLsNLRm@KG2R7bhei6XHTFR5q`RiUa_<^qMr(Ne+?QR}e%aQG(C
z=elOhJkG%1f?+UVYOQ%F%b2hJ8~NXc6MTqK)3G{9-fNUtEOM#(aI;iH(a?{T#<$yS
zll`eLYs^{G+2hKG+Xv`@@Vl;N5}g^2it{Gg`nv3@e9vy=hEyQmNs-IYNSAJ|BjsBV
z#l>I`N%Ucnxy|&>Zfl5!RvlF(TR_pdh(re<{hJ<6Jm7pA>6P>=beNL*-yPmKAZwMs
zt~9<<1~LeS#ul-5OE4wMxO-mimxRD8*pbMg><$@=$Na9l%r2zaq4{z!xH~kIln~Sq
zpQ;l09!g<)EPdg?me@dNfFaeRi`&=dL}T6%9Ei`#*UF<M(#=&lp&WrJv}8z2H`w;O
zCt5uJaP{o%9*Te0XQ%_;AK2iemk`mI>|&%Q)TYFRUYCtu%Za`|Adj?EK-lCc??@%?
zz8#sZxLxF!$naJN19TG`1Vt2u6-$2L!Peo280*3{I@LPKKcenue~S;y@Fi9>DmxKS
z7J6S8ARjk(WPj?cb^lqUZSB92)tBAtyqSHS9NJ5*^Merh?%q^$)G{bS443d~wq$eL
zTXZ+I8%4bXxA!EpHgWT7#7BCp-(zU&78l%I)&hKU+LL9lk>B2$fp&HoNZl`LF~Ie_
zA!j7Au=(Q}I$ja{tW$O}bPCa!c)<+B5y8Y(V+z&hJMmW~U@2ge;eXpB1Tl#9r5<V3
z090>-f_N0@<^Bw)%6kPFlVk}+RB6#}qv;327`-;>wL^g^rrA#xOaxTH1x#a;>E()W
z!v^%pyhtHhkJ1)Qa8b6^N&DqYA{b!}?W=SPe?Vr-j;S{#JM7%aw4!DfrR_t~N~$v3
zq_{hq(Ok9my_(?Jr?iF~5D7;W&z?fbnG~|P7h~T0GD+JYqJ2N8)`$i65Lgj&sw$I7
z3kZl^+$E8z5}6`KeHK3;Z-1zMzBGJsj|z4Y_&%g6uXjNMet+KRk3(&iNFs2h36Mr5
zC24uBcjr#Ie+xAk^5q#wSLG@`b=$bZM(G~vElIKj%nZgmG{#@$>|%cS{Og-8^SI%&
z)lPOb==NXlSGkVBcN*iKPao)4UtL@r5|SkYRIzi&fK!f_*oeT2`GQo=j_Ah(mp~CY
zYBB&750)Z9ZW*Tgn4<%pLUL&Km@B<SieFukUW=5F(t9(NTkp$QW6Ea+KGiwx)**a4
z$)*eAG|Gbsjn5Uk@z`}gQ$FUs|NBF&?EBuRDyjksw3M}kJP_wo{pu0nr_J|Y*w`CF
z4X;RYh8_J0E;9E%efXXd?-@St?Ay)FRdSKGwKNt+!7<gTc<DXyz?lQJM=jt-IIGPh
zIHU;NAx|qp-^kS1g<-LXV|%6fddmv&n(z64Pj1VFkqFY%^C&6DYzG^WKuE1CCBlxB
z+9M_7{aHpfLI3P89EL~TTf2M5kVa%E&FBqC`}G{leZ-J;7|*70ww!*K{FjdJ)wt}l
zmC$GCHle662;FQSO}YHgt32l!R8O=sG~ZEQn8gSF7rys%a(0m)ab4TU53gzoHt?q-
z>n0@be52()yEO6#WRo*+mQ6P;if;Nchbz>^n;nx;&{74JAbiOj5@+y~&RnK%ot}$N
z)3}~qF1q5lw58BVHgxsZtTQ1eD*P<rY!VMuZ;|$+J;QIeRn!J7p7Q|qpZN#HIBjwf
z%&5dX>T_Ee<t&P|Z3|%P9&T=AQXCc*>`cEGAl_l_J!8MWhNM^O8g`RALS4?hH^NR!
zA*ijf_Szz9sKVXgi2T{f!1>|mMav7$dLh(LYxmX1y-&~>Z<YAhoP9AryYY9vEQT>a
zhhdF_6m()Dp1FM4#7xI?3>#m4eGTXqkzzwxr=ZCRnJSfn)M;E7xH(sM?~MBH^2gZE
zm2TT+kvyKK*N|d59@(p&w>Igs1YGI*BtWvt0P&w3HoKBrF@D=-0RJ8*$YV`ldSxS@
z+<+RA1)~6mtlAfbfAM#i(e2re6}9Lou>N;gh|U`~kXjt5|M-f6dAP^?T%ctCOjKhv
z7q>1g{VqZ8iE5fW-Nx%=YA?8OB3hbumT5P?5X^Ziu&!rM#;#XG4v=U3+eC+tHo(NZ
z1;jnrtz_#ZhT0INPPb`TRl(!dClO*?d7Y^@<)D@QiEz$)eKoznC9GN{1KKKMwt{6v
zY%1xPOW{1=3oEAK^c2L2s}I50VxG*2odcoV5khhE2ZpO%UcvnGFW;Q|qrl8bK7k)7
zu<idf+<*t{03LAtIrMyrZHTir={8$9V}j`k#qeT6UW5eFBlMhAg_W9)mMI`}w&^D0
zYDYbQJ<ZKbpdwscSYx}<E?sp+MEel611(b+I-BfK4FpqfKua?0$5cZw!{g7)A(7wK
z0Gd1maJE0m!AEbK^aWl#Dxxg9`-Nldo3~sS@qr){`;zo^e9?)o^}ItU`sb!6&W{7B
zU|$lyJFG$uO9yMye2pd9_p5h~dW%^Z%|Do;bNVmjQyi_cMnjIOaA3BB{D4b(*vh34
z`#|nCvp5Tbvv6wqJzf^maY)4e&ai02h~odzbe2(3wO<>b83q`-Llgm#?iP@cFzD_M
z=@z7i5~Wc=O1c{<=>e4PMq25T?uqyK{MWl!d|@p=c-Fb^v-h>H{kwA6iMl6U?tSza
zaLnD9Kq~Y;tYsu}{4*dEPA3!R`SAPJ?CsT*gRx3WzV+ybJ^>an=?*3&I0B=fKLYMA
z;CQCL{R-2Jg4xa>biPR5*Ei^dd9;i`AUtm!ekXUT6V|ui2S)wCYD)fkKxZA?7Y}}A
zzC!XG^!&G%N2ScM^@r&83QV0R!o(Tc0|Xh8QKE^KU=E_xiZ|v~Q4~4LeGLy(Y?CDh
zDwBQz%TGpio_&~ib+5%G3X{Z@_P9%JA3{=F|CNyd^sEo`L8iLC5hnn2tvEd{rPo%{
z2YN#IZ|c&?S8YXm<1#G(x@IR>8S}imJp9_1@&np+<>#jI8aH}J&{{5F^?>h@;(l6Z
zrF-Tg5$F=>qJiJcU()?RXjMr?+qN>*h&N2h*62YptZHW=Jo<BVNEXe^xMz(ObQYs@
z^YM_Il!`*XF>+c=CkR(R`tSWhpL=dlOZVtkVOc5#^c~JR6!^QVA*@E@69VcvMHdz0
zeHN-8SBcURxT&McC^P{vw}DjYoq!y5jlw+EBHL7wiJF5^7N>@v^hUFja6NLT)ngO0
z_@AH>$Y=2ugTQ>hvbYHIBc*1cCd69rsV})r$bKr;kB=c2`^03yObZe1RZp_ZXObPX
z`2l)t4Pv);y{0UriA}iIKjHU?WZTZ*nu{Y?spas7B*)W~jcqFY6UGC8r<JTnR6oFC
zykHKrtG4e527sjDRp%pOs~gu293)>ZiwybIf6`a~1NcLao0E2hpkSMS1Hq`9mofiB
z$c~Vo&KO$TfIt0$kY>y4`A;Z)jD~Q_^DDs8PYmpfKHX!;vlKj}s+<J9*~B|btR;|{
z{HtEJtO4~Mct1*)hW&=6X1m(J3fE?+hI*9A=!uBeu$~r>Qd-hCDfMH?Go>H@7-m~>
z`XuCDkq#fFEw~@HlRPt@p!=dPmg$_sHTq*#n2^*)PVnyPj7AAp6J{(GZF^A|ILnf8
zn*`z%#hZ%e{_y2enYkeLH=DY}HR}EURLv_0@ns*}HlWy<RM{2X^~6N)m}X!E&dh;A
z4&Y--_Fl-#8lYAQv;VM!IAq9f{N}*vOr%@8_Mj~N+!<pl&B688rz-L3LXlJQrXVif
zIkIqbl^fHlw8e<=;+%c^30D>%lo;pPdYp%oNW-$fyDsC4|D%-5Rhw@Q<yilRusQtu
zgw~%BxCEfWD-gyjgA+^!;^uy^<?f;zf(VCNdNPd1=)pe}a_A`pW@lL|5*r7RqXZl6
z@(3E6(d$1~42k}h`0b~rE8j*k1ILRayEv#>i2=cg16WD9A#&|ZB+L>g>8!LP$2F$@
zd(+YL{G5etKHKsx*!LQKIh9X{6)eK;5X!n?Ya1M#SX&VT2{0i^GH)&yZjzj3MNvtg
zx@}@(`XB4;(xFxQK4|U2H_{gK`5??M_C#OBH<$;>y4>f|`nPQDMmE=Fqn7)_o0@4U
zf(h83hidOUYY+(x?`HnV=<(l)0JrffrWN=2AcH2_jmlDQ-`6loJzM$L>R1&fX6F<k
z&WW--g5#rU!a&jPU$<hNNmN(s1nx0%b)TfiB31n2LyIbkrl?lRg~}yng2HCo<n&%g
z0=qt>QdvC!B@*C2lncXIGdXNF`!Nr&)_#!45aNK)pM!n=t3g5I!+UjO()pS<V>bPv
zT&^o^X{#FF903n*egjt`B?8HTBkS8+3`%@-xp$#9Sj!~wO0w&tAeOFzN=_;?#P;!;
zN#Gklr5bLz@8w!BGok4~1|sWzm7aQp!fUEf<16{0niKH_UYo7BW~nH*eamkntiZxe
z|0I~+m(^HP*U$<ZJDJZ|X6DIGWZBA>f05J_r;`nN@aQZMQok+76Gw3zaFk}?QXwKA
zO&7}Gb?Wd%klN=*(rtBr{jO(;fd8-PBsrGGWMDSGA0IH7Q5G@?@SW`FVrs(@fSWUE
z25xo%6p{#<aM^R}emiV#((^$N<&E0N5Koyp1J2cYG1)XeC2>RC6mfZuP2Db=eK!04
z2w)V(v?ty+FU}jRCqxq-UQ*5jdb~0{&`S3U1o-BEJo~3Oe<EW{?7j~3EKrI!xR>QY
zDvM_&KnLn(!v)(gcLp4_zyF=e1)~=DMPa?-cmzrJd#{&=s#~g(3ryQFB})6NI1u^V
z&*qNPR-Sk9`9^ka{1z4W2sBebvm@Y#Rr$fE1eNR|Ht=;vtKb1Az0ULZ2KeGQcs?4r
z2G2(x@@qbgm9x0AO@5<;3Z|#`!i}^?PY_sq)I)PDizQ`JxmrFvD}-y=s^fV#<7pKO
zLSC9H*X*&Ea@=u_H`5!SCc-VC{|qX=i@xYXV&o4*08}!;N9ZW29aWI}fMJ_v8>>Gu
zq-hSG7Js+cKiClu4E6i4d27jXkCo`Amt@s&U$A&Zn;h5}(br0x9Y3Y9@X~y74d;2_
zlwbGxYMULyb}R~-&A6r%CjKzWg+XQ}fZE>dPS%+40yz#7xzaZqpWSQe!ihjUZXe_I
zU;Nve?2qB8;#eP20;~gwCP2Ss0l@N0?Ey4~H!^Skn)I{Y7~a@@r@f?GJ1QBRIT9-+
z!2J0<S`slj3{EHdo^8Z$`d&#kUhs95_QLDI3GYG)WZ73s`x54_)S)epYkKq!y=I~l
zmuhS1xT>gESaQpid0=I{rRxUM;mG^4X-_I*jKzmq&uZh>vB}sUPr~yG9ut1aPscw@
zb$Q0Ge*4z>fz8-!7w!f~iRi;5@y@Cp4S@ev%>_8V=~oFJqKdbV-(s%rj!#cq^_s#3
zQXdl&Lb0>9K~Z1Hlkb|UWfX21lF=iZQ8jyr=A~f(BEE0K-v1C}Kv?vvb4a%5tk5Gp
z#gR4PqEHXH*<r{u=KWzJT_yA8;rzZ;Vrnju<fiHDSQuDAc{XnAR1Z~Q@MfFM?T&KH
zL+*ciLJce<!2oX>D2YZYI2sI;@m$XB3(LzFHl${6f_HBKbcsccF`=yOo4Q$H`8Bn3
z-B~66=r^X3u44WU6iKy}wE4xt3~8&4X)1;-Q4%1$27H|UiH?CQO@hvi+OlAc@(E}|
zEh9v%On8(%H_dq0^B(QtEEOBH3}+-zk^hU1A=&VYGGGCWK%x7i6ei@xB*cAWMN`~>
zBC1<W(K1T_<;0Br5|Inj_5W1j`+Pn4?+Rk)-=e=nhs=N7A_Mxz5BHHkhmUQY3PLHe
zLF{wu#q3un3Ho<pKtpGUT7_kSJP`0DUdC@W)Qk@_Vmti2zaCu0KEb|s#3Qw;GT;Ce
zZ9jJ4BT?Ucp|7Sp2f(m83ccwgzouUTyX0$ljEjYaZ|S^CfKiYXK;O4M@!6*ghy&%Q
zzsCJ%mjWp!avkM-g>-(;Jk35jlL0nJ6~(bbm=<A4!Oj-;9bF1FETs4+0nZHd;yYU+
z9aET^akUA<N(SBsz97ne*<Ecm&^-`4&TDM@D1|Maz9qn*R<rdXpZlF~<TotZ9}2>I
zUnJ(5A3JnOsq-6r2eOLL>^Rb&FqA?{qOz^{VNh~L{vyGik#O`k(?}1*S2@lxI})u)
zb%yu$|K2WOC;}uTGQ8-)EFuEaeFIE($2=b^NYlka2s|zlqT3IyR!zRb<}9n>(x-gS
zQQJ>I&a|@aKmT4Ss-CD(N)xk6snU9y<7rQU8>U7~(Q_<=e~NM3z~ig71R{K?@y|o7
z!o+5;<L4Cr*_$RbL_1=glm83>mJ;Eo02Is!2Jr=S#jmOAU_eg<3U?rxGQAB*tb3_0
z{7l=)?5vG5;xg+s?Ty*YZTFj4#@NQ#FKcOdfl4v@v1Ke@_dVN<E9@R>9}{YilJLzg
zTDjOSQY1l%<EkuL=_9|r8>kS)d8Yik-|xU$A?16%_11@{Y*l-Fls_iU-8M+bdl2IT
z2Rkx&`aE6bGgx(D>wY2tLE}z8JCNYV`+sDmf2~P43WY+n)0kkecaR(JNQSb4hRHA~
zoa3v4)#q-!pDh`CutE-2_`q00NUQzW<7dZhgqemVf6CLhnWwLeR*NZ7oa*T$!)>zr
zXS>K3s^}Dou_rhoYsj0k-`Z*rAX*M4@E;fgz(ow-e)-xqYr7TxZ;vw)Nq!nk>GR4G
z&~NmDE@t$9Z>oOe#`@XP1DKwUv6(X=q+wRFmRP;m9J9Hc-f~R43MEtfDzkJu)janc
z{#+FGz9A`8QJQ^0pZH_ec&QS{R6c3DJ%^Yh(^cC_BTdskg;T@!r7iKsBDPJEYkQeN
z?+X3}kG`b}MQU{^I%4as4hoR1MP)B6?Il_;1PVMfw+!~FQm6pGL0+GE&(gB0VX(Wu
z2q)%kAD$g9aM}Q{hFM_i{UkW??RIj*&N9niGzY1(#@-Ma{Ml|F?M}iK+KpSxs|Mlz
zlc7i(;u~+kox}m=x@qXo{$}QhW0<&M_E*gd{L$6VPnxBeVBCJ)lS1~)olA1e9KGFS
z_v#8S3ZQ!ehs*<LDl|v~G|pCm9FUidcrQz9iE>;m`pW+$f`iFp<>J9Ad`?Z&Bq0EM
zj?}o(7lQ%v6VHC79N_$z<?oQ`*kS)N(Wd`W0G~N=>#hXGKrgP)yMDDl?-Z8a?;*DX
z<+5jWNc);|AWkz-RN{P6Xj02V=`5kE@4#{sDI0^U^X1u(QfvBxQ~e)#s$Ef`DUYJ%
z68#bdsLGM9?<V*R7he0A&3Zf~T@6s-b^;x~{Sd!hv&R28T`{>Ta5M^yC%A|!$R*S>
zpV$k~uNox^#Mk}|l_3Ay9-v>vlB+>z-mC@*8r&1+LdJBKf8j7#6=c6m)kCc_a3P0m
z1wq`RGr^d+yz6p|v#>-d-x7$~LsZ{H$Lx6O-RE$xG@X>>EKAV`_AZhr`9?LT54;|3
zuh}s2fa}`*^59nvfs30PmN2X5B?JIYJ?~GwL}nN`S-kFw*Z+4!$*&9B09Icq56BOL
zpY7ncJ6Y&H5`dcG05L&Wt?MIC@Idi{ljd8H=%`d(-jVu}(J^QlclOMg<AoB17Z0`m
z)d#_;Q$n3Ru`gZSGx!*ANkw|01JM?9H*eWdWufp~F@Cxw$HOwCl&L5!trsqpX?Eoj
z;)Bm=+0t(dJ4S~j;t6b45rKTsm8%)->QZ$8n`Ck3(VKFe&5mz`vQb&|*C;occT|9%
z@P9Wvu6vEl%9*;Pc=u3~Ty;`UlIzEWFw=+`4C#{FH<bXCdch3FbHkjUyx=INUX{_8
z^f(I4OrkI%G%%E}Rd;Lvk9|OwAL`1n1rJJFP|C6O8L2k*N=&}0g;h>X5pH7669CS_
zJN5YYtUnUNKscFET@p#o;6!Y_NbZ)Rc?bIHe^fNaX*A~ekrJtFyon!>=kWkDOfc97
z0ybCz@BqH(FGy!OhIHo{n^**vq30Vp*Z2aL$-QbT7KLK#JX?uBZxsq%nQaJ!lHU9g
z-A`5bR*16hw2$z%)GCln%C_q=6q>Nn9MRL}yox4Wmfezx&n@!lPg_;kNEH*d@s}BG
zLA7L;I$L8G)@O=0gzIDjMJ`yw_tP-}=OE(Xw~f1O$xE5UU9ZveYOEo>72;?(#Y`4?
zpp*$<&hAV2?~c3cltIlU{9Z)-0l6_}-=~D<S?v=>KUMgSA)?wuR2;h%-QC~4Mw~l0
zrzKobFw0V#o_*EWg5*ya6Y%{Mx6vR97fjH=_bk4t9n>Fkin|@Q=2q(XIJX{>dd<g~
z@Kg-TH)0bmz4sNk<9R?@QS2uGU%v6x*~Bh6R){x;I1UhTYq%ZV7okn_mE_;fFzso^
z2w!uf#<!VhbBmQ9-F!kvPMT!ASofa*@m!W1%w+r?)5AUYpfB;N1Q?Qp>cN~i5oLsA
zNh8s1ug9$>rX-%|UXUl(H}Z~fJH5^eu%RUYPu~?N^fRcKqrCW=6%zSYCsOqW8XBng
za|hI{jDFOAOQMLS9JJRwptQMH!Mkc)Xj~ozRK1F3J?1vIY%EV<RA*W6W*YsNdMjy}
z+pEyB*y9wLpX`9NrGNKcrk&)6+uX-wK%SOZ5q>hix{6-yc!!Hwp3*!?@XUCyP2=}?
z)$nmXZ$UEhTIJ{>#R5J=df+DX{92tC^HRW~;wRC|<j3;iDJ01PjCBEmm(zj23F|d4
zp<F2+d6kP@rX7l+4LaCwWaF5tywEp87|06xK>rwU1&4uF+Wdr;C`dr1Djb+HFZ$~L
zmRCMVDiQ2af?D1lJp?)}$R+5d0H|B}!C2p8#eyuRrnk%0U*+YWv8H#hQFp1Fq&|!x
za4lb%uA=@dI1EjyaZID$p%OI=I*aX}&ZkxW+w!_&K^vs&wJ*!RGP!Ts$qIi-8oI~<
zPFiv+?RkIQtS+PZ#J_iSLcm-@>p;<$Y{9BafEHqr>#I-@^WXvLHjtf#EsWvlF4?9q
z#&R!!w_KC|?_=?JQU>>qqQj`I@zLrZ`a7m^2HAyu1S~pe=`K4DmB57o0xm5JWn6LB
zI^jdklplJr2z$dju|Es`wa(MAr`2A3W}Tc%J%VkMmNf0H{aQKjVf#`vZ*xj{Vc>F=
z-vCClFADqH)fwP1)X+NtT&l9bO4JNNAqFb|e&?nCX(=j{Y+IV7w94wF(lDo}^JCQC
zZZ&Ji$Cd9VhpnEsDuX50-+k+?3Z40mYJ|UxLw`ioG7vl@I`~4=gmCZ0d5k6ht44=^
zscP3!`K#l%8?;eR%66o>&1;+I3R}%%@@CYAZL#JvKLT8{0;R5OY!rYWd}u#-<|e(e
zyr;(qQq7Dr)kY)-uA!eCV(a3dJq;6_hi_VWfUmFD6Fw$@ZD>NYi`5E+8$s;6Aof1c
zrH=DY0sn51)a!J{KRFdPEcFXdnM|HI8mqo8j;fH&p1T+lSPG)Qk;Ff_AZ(j1N=2%`
z=Bn^tv_*6_zw2bD$|2hi-X=W|=~}z7W=lQKeE5{&Cgn>(xU9uh&itd2t%HF_A$!Cm
zN0$YRp`elJk3N2Au9m+t;KQKaQkq|@GGQoeNa*(vHiSP80;&J~_f39yub9<h;n<si
zvRROyn9R{mb+jK`<c2Rgr1UEeV-fmmR*6iMrzO!03t@d8lXY4!gQA&&;;KW5dt^NB
z9Q)OXYd~||$DaEq(~`ZI@(91P&rgWc8zHH-1^z)mtgH=}<2}+Ly*l!c=rz8oxlo=O
z+|Wv<1@;bJ0nm6_<>M)8A1n)kklN45R-pbMX&eoRb&}Fh59H%L2Od<h%4<6QJo!J0
z;VrorSP|klb+1<SnZyEdIL}|)Id;{MxNNI&5UC*EeS7~bv&FPfmp*T^l7-aeF4F$i
z&V?qjBaQ4EtFB6KtE>iAC_VPzG-=Bxv}w8Nhv5wYEcUaEaH+#fWQs?_r4ig@L?)OV
zjSiJvtNi^J7*5T>FLOBU$_4y2`@dkB3_bVKgC4MS1rI($6|o*4Q7MOP9FefhXaBrW
zcV0`Pj#He~?9w_%tNndIY&frGDZ5|+*N~ffJEpH4<6><u33h()v+5L6;hBmZymGuT
zqTcaE8>E&ko#}(oFV`LHpPd3gVz#AzSR(Y_fCp1j6%#ZZ+Brc+4KkMViLDpin=Djz
z-jh4~D`VE`!_%J0jf}wtSqJZm-`YwL)IonH(pe77wOg$dBy@!H>tGYJ`Z>|`UFc@j
zvr#r4<ct+5M>pM5txS3geZ2T7x!cq>@t&KfLybm`Pt)?-AB}x!CEw^+I}py2AwcZZ
zI{M8r{v&E%ge<$4y3ZR%fyiIZ9bab%IlECcpXf6&(L*lLakJ8>4G$RRzsI$eEi>FL
z124Uue~xA7YhIX%CdUQY=)(uWn4(e`I$+mL<P~8^;JLXcnXD$LqU_!5$Bp(l{M9e-
zCD8Myd6hi%`=PGZHJA2wf^^CQKKC>ww_ArXUSEYoZ)noqSN~gz^gUh!w}J}dCIOGy
ztl6o|1E>bG589le8)BjCi-C)3r<q#>M->}hvx!<;s?GxL(cQH*kl~OBVyAQ%Fa~~t
zguF%HP=;(Ugxh1=<mB<|&`B2ll0OfY&8W}H*d|I_<}s-%$|F<AGb8xrQ^oz;xk20Z
z($ZajLH5B6oO106qJ)35PETq)DIK{c{C!^Fs<9vEcfNKmFU5$g#azz4f(opkD#S}G
zs;Boe(|YI_6McfvPX6y%V)BUY>hf%;k#mR(A}A!ZGo$yGqYHlh%M_R+k5TtXRl@oq
z7#*m;%O4p6Ou5x%^`ujc%rF@1^PUh#sB)*SyuL08XC*WwZtI3QOFM{@OB+(i&PrP^
zDMw(h96dUB)X)sZRNUMQt{)L&#so(zpf9P%{673bX7hjy@H(kMZHr^IpXvqON+y%J
z^yh&X3Wx<-nd+3SS0Kcgi4u6fZZO3(U#LLH+S*_E8P&%_qs5>=API5+lG9f6(}*wl
z7l+f25_T|m-JB|~{jL$7m<>Bm=pG*SljpQ5!SX6%WUWNsd*vhti?ZoL%1cc2(8zHP
z*KB>Y!FQ#NFI%iHDV~Sl+kAVG+UG*^Kq4B)2v3r>8@?7K#{|hI!uNQl(6G*r7uVf0
zXd$+RSl~O2UB-1Xz~l$W7H7#blET^?+-ndB^pxar7z*|3<4>hlA=T`nis7%(zu?qJ
zecj`2_4s2%Suo~TpNt{wxWUSfD_}6k>?JOI%sgNdG;YD6`P1acS)Qz8c|p#*7+%jE
z_s)v1<hy5wi_TnY52o&E0fjis;v0w&xO+NXK{R}36{~#a9V#p^#v#4)o9%3Q7-o^#
z1Z0MFt;pBG(RP+y6p>mv+=_XXA>!2V*e!jTgh9HZ?#DHHyC8nTyI^!>f`TaXRQAGS
z+~eZakAP*w%2N)uFb9I?sOK4ZBDqnrVtgEv5hPw|zpphHF5Z6gQS~HL^=d^EXOYEw
zeIa)3C-~9wW{u7?meJmDO)6}CCS5vlasnP0p!!E$DQ;rI%lXB&jH4<n>40V4{J$gU
zvbMO$x0Zt0BYyDIlh;sAtLGe%irnrD(q7rL%YEooBjZD`2244nKsD*N+Xj5vrTE%F
zH0Xu+6*hD3lar5Md>RA3ysvtj$(6co`HG;|aY`vtrKPI=%x%cch_Ze#1!*w;h5i)b
zxt;k3v;PXTOXf-1Nsh&6+vK%P{DA<J4Fc@yqWo@UMZ(l*V;YiGIzs2?>>x9WpX4DU
z`dbNfQ1=y7HlW^Xo_(9$19$`cPVL8le`%Ln#lAjbdNET5G|Z}v`}qUCr9zKFJZrwC
zAnx2wwKwWRw6MhsQYp<d7qRXax-i){L40EFI(**w0tIFlE%+47x?$W;(nEe>%Wl{E
z$J`Fb%3<MnTNduG3^(&p!45<Nr(aug6q8d#U^36CH8i1(a7(srndFuYp+fZR_nP>Z
z|1L0|-=pSIk>=<j<C@@E<QCpLExQFhg)8o18X5TIrn(YHdc_T`EQtx|s~y8QRzJL8
z3pLw^pIsPHBHp_z6;vlp8muxuVm{wL=WNnmB%Cwh+=4|Yt;{?^j4mzI<h3I9LaWS2
zur24V;Tgk%iILLSIbT7Ee@abT-Z%*R!{|GGVA!41q!g6#Se<BgfJ7e-s>KF5)pP))
z>-1#M&n}+p6~ImSF9RzuExr>dB9s_q75Z!j1voiWM;OCc8Lzlq!bNq-XVZl(Csc$v
z>K?1gmiv8tq%l}f^0DmQwvLQ$l7R+Nh>p&vdXYGXv`|m|@c#U_eD$~WYE#Opq;Ubd
zF5pE-F*LC(Zq&rT^ghqGNCEJ4P+dX5%T-8drecwch3Ms+5r=HrFBLGPbeL@6@FwB!
zO$Gdvc1Nazq@E#Gjgi#%tm%R#VQm($xNTgwH!_7Mlc-8wv3H@2LJ;+f=JBY-ax!%B
z-kl896y^^*Nc@<qleJmRzPPwpnnV`yu;6Gq_LhwseOs1Oyj)8xGBB&@LkFA1QgMyj
zk%hL7V!SGZ4sqQ7{EXTl9%%xKV!#YjF0DYEGb%U@@ZWCun)PF1b+c4|g67F{f!_r}
zMZ{Oa35##XV`y|>5I{5I+~O^1Fc0L$QIB=aZX?b9sOM4{MPC1^#ECzGd}3+F+c6Jd
zo6Xpyzp4@`8;AOtW_R!@bQe4D2K{8b4>7~z@s5B~fGLZlljpd?9St3mu)%wF9d-`V
zzeF|3j0zQdx@d2WZUwL*t?^G8N=MZrv#is>gEz@??(eq|MVb4UbpXlM{(=++g}BiH
zYMmP}(QyyZ1KCKFm87z}<sQy&Z#aW%-it@W@z2HRmBTP)2H?j-pkX%1OJ~r}3So;q
zv6&W}`nvk^-Exblqw@@|FU1EOLd{Pb=vhMc)s3~@&KM?($B58P+g@WTD(5-S=6IMw
zx>|pC+%LKF>FHv+EI1)wA&f}*wHY$}5S~uHqqaNqY$n84<R`(O*lR#S-2+_EjZKJQ
zE5HG#tfVpq)ewXz{Ipr6JuF|q`?)}g+u4^{cC(?hN<LCNwn(P6PE%j;?R@u_6<%`V
z*j2kjY}a<~v-EOAKH=Ki@lO7Wj1OL{z0xAqJP~o+6V)!c^x;PcYI{fHnkBd8s|i}H
zP}7LaMVCK$=|7?(HKmF93!Cw3W6oqu;qD;Z6gMoz0*ox<F*Rcs`Y)z>KvDN3O|>xU
zoka`@CJTDb_2PEZ8g;q7jQ(?%&5d=bbu5#_jE>%TqH9KNY$D;<W=*BF$gA<f*m&$I
z!><T4^_YRAhi6&0cdYwDJTHZ26=IMFC9Au1BDn1VNSUE0EI6{Cg3ejlaluakqd01`
zb_#I3Bv)0NwJSM;0cXL|fK!*KTj&#P3*`@@(QItSpkaJayyGk9#_nQJ5;I8i59q77
zS7bg$nN-#Xi!lo-$dT?x?rrqa#q;yoLWeV}yOhTRGPYNIi$*!#k?bbl_<MQ?OC0Vw
z%vU}3m0N=OWLLFlP^MH`uFg%Ib#^tqxz&;i;u5XC;@S6lNW4s3`ZLES^a@8stSGQo
z2p9F)v_N7NT(&m$vhSIuHI6n_O~6_9>)&vc`cWhT4jf0W>&r{8>s!04wHs!|@I9RP
zt)==gv`!K@VCFn~<Zdb+@oQJz5);iYSs37S_`FcK&LzCxvoZa}W76EO>4_@!FOTAU
zd62d|Qf6YG?gba#32$TvNuq;;Vrv^USFcQfX{)Q$A{E-a0yK|GtIYRfZfKGyASCBE
zT|eC%8i2@(8uTnkaDy7<4M5I#!5)gt7n>>OMPr6d)#o7%2C;^ConYQcZ-IbUaKNfD
zpmwUxMrZ{ul|lV&8FNZiO~xVEofhbvwtL9`qh(}4?BH&4!Qm=`arRBJ2QABe|1g}Q
zIJ1Hk;nyKqId^p9g}XU*pBM9%6w;p~JQ8a=@4|%pUJ20jJk<=8Wl1rqo2(an-jsia
zc|T?;b%cC>%m9O~;QYlP?_X!gv}bUGDsuR`-G4Q|x7srgMBgCV{~V1;B9R=fpg@^y
zXyQ~24&GP7v0@gZk^BC0nsD>SwmgrOSs`4B`=@d9DLHk6FIE#h`F!QI8K-?L+luBJ
zx|?&KjoBsa;l4#xondN!;VGg&@;TEVdYt@G++Yk*Dw6YP9i0s&!?L&^0o1U-`x6xo
zQli7Hu%H16nEw{yaa(Gd5N+H}`BDkpXuZZ12Uy@KWp-{fWSJ$-LTdN2TY8_-5&q>P
zBZ8=>R`T;kZLkweq{3RQHb1MpF9Wh68mtdtLI#ypqveZoHH7V~dL8DKZOrwL2@;4b
zaQIoLGxp5<MMFEQm|UhUm`MwecS1x1tE?kZIX0{9H|jFMkbRjZEYdDCP_i0#9%4=e
zmafG}Lb7)0&*d(Z_$6-Kbg@vIo$vsbJWQ~}ikt2*rcN*<8LLDgj>FK$a@~P|j3#$!
zawCgtQkX90RVsUuW(qC8P8M7>yO7!6^jG~ilN&F(v*8XYox|yG9A4-N|N3$yA}~pH
z><wyXf*D9I1`7Ths4@oFR{e}Y{M59rApoU3YrrC<1}!p~fEq(mJ1JFll-5(uX%N@%
z@O--X{w<B|0>4rCwX-oIMi0PV!8cPjYZBLOLvP@@QgkNF=bx8Wr=Zxx1N%o<+*fR?
zUJ<yu!OjZpf=BP`brC7n7Bxu{6|oHKlR`B?NBbuoT`L7Df7fz&R;w3yyW9m%gYdPe
zea$<)9+CY3IOC5@VL0mIuQ^|!<AO)`_uqP>r(}FnDM6vYrB&3`g?2^WVEG;+HW{Z4
z$)d*}t=2(Pu&+y-TsBKk<<!U3IG4s$UYK^r+w7qYdblONhgROYtdfgxh6?r3m21LP
z9kQgXY8~Es5%T56x>b$wdS;DZ+W%+xdKgz4hl~LRdC}U7Ih*0$F0c^?$9VwpBlQAC
zYG4ENCil>5C`c1@9JxD^A=aA9_y}r7Zwao!u3s|`1QtyO`at-AcN8A&6VqTCc&;b_
z&vom#i41%*yzu<yLHiag9<)acBD$yZVCl+Flq{~}AqZYS+dEv->6;-_*6MH?RLbKS
zG`oIw!G}k}{HUs-(c-T)kI+K`Z<J$9I?EU3F&WZ%Z+@&)$X+D}!TfHCOqrwr?q(B5
z5tcJLI|*tta<F=FgILM#Ljsr8e@7Aj@%(By<}gF0XjUh!;&GEu8n5)7Vd9uv%E58*
zT5xI*sty770CzsPRyV=6nUl#SopL5#_325DO-0k6D4-&%6O`7D;`cmGPRT56ZWVcA
z{=C27wD|2Ou}V@`<-Ai@<^Gc^-1Ufl9TW3adpS#)v@<C@;U(WQkfx~eujusk(vu6I
zJu2xpRDElsP6lgx{I9dLr=XO^?p(Dannem?wf;+f4;}y_mlUs#R&g6asSOs|UJl<X
zN?6(BEDUj)9xhJgZ5BS|v51Kz{-fpCpPbrK$bIV*)16P&otIyzC+6ewfXJwB@lmBx
zP?_3`L~n#ktNn-Eql5tbGBr=z{c+v<&56V>i=4WvMn$U{&!NlsZGG(B!O}8xve|EA
zd)Pjosc64`8cGEf(ry+y!-R7q-~V^h-?#XUaeMt6^XmsqVw8Yw4>wgu`EkIFy-Y>N
z-M~3R?JlNF0i8YgR|Y=jr`P--YN)fo*H+oj$?<O(RjreSEZOY0no6aJts{HKFkAd=
z8L#+SSISrBDT99<3g!KB%VI)k&hICvEiY$nPwmgZ(lK+kV4qM74F=;kC1eOBw%FGS
z3>Jg<=Z(^ri^M}DaJ6KDkxvLPE=!hIm<8axR+9xPV8d&hqk*uX*W5_KGv$#!(4cY^
zH)zr(aUb1J&KdEZ&f26XXQo^H&+8AB*|Co3oQ_25InmbWH-)b<<{NKw0<(^lvb|S@
ze%nX1t2)q;R!yq8n4kKFm7jkpb6pfuu6nindF7U^nC1g(DCv0^R!Psm?B5jZ<#K^i
zLZ}bzQ}x+_*<yZvTfNV<XbvPEi&IG3=lZIHKFOz;F~u4LaAYr!cT&`UFCbco#uzI1
z)nq8x#pB*hgrO`&H~G+L?w#VCV3q|;)&aUWAETtvnaABp&KA=0Fv%KPK8}~{BP#v<
z_?tp<-|#mpy5%vYFjju$Wk|(t`{c8v;ZJ|DZ)SyRPr$>DxuMJ6>xm8S!~~lIr<k_C
zhYv8TYT!g`&=3cx!I7RXu4{H0FgLX7dsT=Kw>&JuRO15WEV1EL;S%UN6~C~uOqv_1
z^>5YaXt`DY9qMLN{VMo4oZ(5b2`e-iWXq{fYb6-BnE#A&%5adjhzeH2tX0x+u~A}I
z6m<U<MqzAJtMRs#l&B)P2`%E1=LqY3Ko+jSg4;m5Bh&*9guhXL-W_(x(JZZ;ecr9%
zrG0OXpS+3rD}CBD3ixDVQBE=slO<o7TY)#0*+ySRk7{OK9ql2D8Q`B2ZelFp|N5!5
zfBh8ZxURJZElQB8YhVMe9`qi0lZ}1*7+Jm+%zbV58v(nU0R}|bmt_#9iBlz&`>5$h
zoYPLje=S7b<$Ut*<1$M8bFX}_5nXIdnyy~xiioVgJ(uE{{O&<(Tdgngc~5cE^zN`y
zw55?X?b`t@W=s#Xw^6roHzr6n^xT=KGo$c<nCcw6AQi63dqG$6AfDqr7)=OI$jwM)
zk=7pwqn&Q>?QZe;jJ~<*KVlx7CAfL)tGN9BMT^n58fq0)oC~?DKL4EoowO~bP9qPV
zrwRQwb2=i2Cs%E=lb=Y1dWQ0T<1vbG!g6kn+v#P~h)yssU6F;!j`)!-&)oN)9m+h4
zg(ZHt@ApEp!AT{jBtW!9x_!#7*Eou)#SQ{mhD7|mNhbHmd$lGN8lP=dbl;{(cy(xr
z-MCPVgMCNh?o&tpS$44Ckq@%H@4tot3uqYrnoCz9v-CRkxIn{qVeCpFlvp_mNc9&O
zdaf^^fC2v;Y|Ky>deV-ihe9XRXHt$^Z-KMToCBolOuf?YmII>gnD}s*wWa*%R|`%-
z$ILb-rz^;2VXjTM=|c9Q&3aFH48(C|p-RhDo#Cli69p1}Q``ia&J2u?=}om|-jEHc
z8~%ApK6<KMTZ9JGZNsYW8@^-=z~*48kpiwu0$Nrd^dK*Y8(G&J{jZtu9N_bj*gpmO
z*3$7P3bx1OthivEP*4V|#;d^`RMx`Aj}Ddg$qh)*(IQLOq{f2D0z>w6vTgMQ?5rVc
zT1wSY*L+4K?r+AZ#f4DW&*ul>&+y1DJnFF(W$KVYM3ph?0{U@wabb&D&rx0xG$QNO
z?;tlQB~Rv|g5I5mX`r@|XJyjs$Vq#`i5T1SN=+MRG{g{(VEn#kqbE?LXe^+z0l&E2
zwt!!u_g<~j5+DjI);<qNhhrmsmdpk2^k1U`%27TL*(d@=d@OD-rsm0oe)(tvYrRcn
zApn%ui)GX8J#A|DC6F~OrLO3~zb=^!=>6^}585lU>A_eAPr);_&nla<GZ(s356eaD
zzhA?r$i9WW{tzHcaOVtmN8Nh2_?}pe$Q+@ojdcww!I0{B4UluFRf`thuN3p(sJde}
zGk0PerZvV8yV0*;V#xLJZ+U4<tzMz3)v!X@s<x<sp#D2oh#M*YE7nG9z1GWx0x{Ht
zOX6dg<s`IAJy>;~LkJVye=o}@<H!9lLu!1ekTe^(<SvW9HMlGC(GK20iPWo^-Lgkg
z?C0k*SBn(8vKIPGlFa+{%n5tWJAvOPM3@B9vVHruFz@$mPSxSt9RF#UIJkFrS8z{)
zEaaX8_4(t(EEpoSxtemxZEHZU=S{XM%I}4Y;@x&Xx^4}DkjO%za-m4;%}z%?dc0I_
z2!A$ZXP0qZNL-v~i~Gr!*Zv*U5BSO*%JWwr=_m_rJuGRYN;rrYum4*VtuG<~vDei2
zO95N5D3@KL<a0$TfooflaCC8#)YN;1;M919Z2<lL_U}@u;?~I2h8z!D*|)$Rz1hvj
zAzCiS!;6f7#2JFpt7pK^C>~zmqW<fYUJGL6_yK<aB^@NR^rA&8lL<;;#rktf>^=XI
z^Q=<Sn*Z5&nuK3vCVhbmB1udkPL_;SS(u3F5o*AYz><SZPvm2F=w`^tcdgw<p4xSR
zE0N#j$>HpbWgJF%Jro;jLa4f$ZqUMcE$J^G;b(7LGJ@ZhD>2q&um@7##hscAy+Acw
znL}QXCFNrhbwDL=ZP3~NA5kdv_7GX1ZJY8vEw_7h4Ill8R#rfB<};<jFj0<{D<eRo
zOZSUG@5UbSrfoIr<ib1(Th@Do6Pef4o4&%{(&}*4UXc48rW<Hm*A&KSFvLa{nu73V
zd`5QLt|D3At#e;LS-;XdcJ^NNOVDxmQ->A9Ud25MpEL|E_m|P;O+Sj{@S+%kkdWmf
z+cO<V8G-LMqRpI=4dVPBcqhRE^1t6E=4;>H1xB)jj~Kr0k!^t=t+b!l)?tUp`A1Ft
zENRFoOu21Mp?a5-_NzQ~y6R!Fn5u=;H<2!|Vm9h{ocx)yyhD*L!HxG^KUw2sc5by6
zN?OUudsbZs?-EM|JYaWy-GIT1?aVF5DPIyMuymDsJJ?_<^L!m2>O<o|W?miI*^AXb
zk&xR26^<N*J{P=3k6<oVfn?FQk0$?Xcmd!-;4p&}#ikwbc3g9rqqg?5^HC|8sQPs{
z_u(<{pZf}}(GLTAX6~lyb>3zd^`&#qQocG5EZ2r5e&h#NESA`Xz3bH0hQ0LOOo)ue
zS$xe_UxxnFoDlEUD6;2YHuJ8dP$tOSRP|=D>KG&INx24$B@-)iSlA`Futu|_2*li0
zRIX;W9Ec$OuCs%C60}=9{|PN_c`$S~9PO8`3ygV0i~z~N&Y+e$roj-Xgt{;`kt~&S
zzy<js9K<_qCgQ^5j%QS)J>u~eyi8a@$ZtsWFeKx7R&5S5zg)%rE=3nk<w=~*Cah`Q
zRSE?S1vLuG2XiwHa<~yILc3*-@{F7J=Ikm3HJOnvm7Wpgma;h)a#)Qhf7Q!hP>3Yq
zp|Qc+dz=iPu+W><koy$LYOs=eSjYFX>>$)GgY<|06w%7HS@Wk68E|yX&9c*Mc$vc2
z9aH=@u_z0KQY$iG^MlO#U4kJ;=UfgYZe-P9!`3UwUlV8YgXEM83m{zsY&InEPAjPm
zPXc+9&-m7b3&+FR4MlUB7LSjc)kI!qz59JkRnCH0YFSjaSFR_RGgiCyGXULQpk&7a
z`bH4K4xt6Km}P~T#I^mBB@Re+B%rxST96^9ln1tT1cOgv%nQDs0?C2>M%<!q0Ihu1
z*&~M_*iW34@ebiGeffTZ56%*(OhJKrGl;B^d`&;$;wS5L73t#lEMV#V!H;rep2}(`
z87)(KT>5NB{H+0=xI~zoOT|J$x!|FVef@8gN3QwGxl-20zi&s>FK?Jbsh-x1JMbiF
z+?^jz<(P+d287sbRsVtKq?Z_MeQbhQ*b4s+ki<B^iH;YArclPKa{oWKsK$&)`34?x
z)c4=1exO7Iaw9X?bq$;B;22%;4)yr!4F03b>L=2`OicF+6!rOu+LMLfm#b3hWjC@t
zb0P_xdw!1}Odln;>_lkP1{SM*mk4Wf*FmeEdCT2-n>*W<1$kOu9`c#&OEkMl_G89c
zG4i}Aw*KfO0HozGs03P3u-)n01AH2@XwO(#_tEMPH6UMA3n!2`wygW%Lc_V?XgwfC
z{Nx1zarJyW*^pykZGluh<lw$G%c_>gz)OpJ7R|C5uC!J>uflm__F71bbr34P%e0M-
z5y~&-h(O9oz*eQ%o%l?Z{iFGW*3Aj3hTF=-I14;#y>9E%k8a@y*9Xy{e39~A9g%eV
zG6<|RL^!8h2&#D{N$QKK(JMkjZ|9i>{ff8`-$pka%^)#d;Bkf}qTe#W;D>6%0xtG@
zf)7FU8^8)${QmVxj_R~(S0@7Fe2d_Oz=7FD9p>N+UU7gtn%TjKj5CO`RL0Fg<J70m
zXW3!>rxiwwB2URASnHIUFQr30nC=fuS@;S<t(#eNQ^FdXtpAJ)AsVpwZDI>mU3_Hf
z&lPr5&XjLV|6m%hL9L+Sj2vfy%C{Ktc%zooWKbZXg)Uu9U{AK=6cuFg-{4HPUXPF%
zhkb(MS6eogzDb*}K#-jab$9}JLmQ-&b$(e(00J~ZB@lo*AKI9r(4u%LUo%c_p!$SM
zUWKPUonDixc&pj9j77_(_?G0}pZD=QoVi&Rp|q`U)ZHo1%))(Up6(e=OVH1VLCp#9
z#&^ji@{AU+H{Y7{@qP~>q!&>btq-*rZhQ`_!*t%kL_!}E!>FIDab0}I25D(m_~CGv
zA~tWvKB6&<{~P*;ySd4{N1%6Id{1^jpM1?L^XZOyTW`9x3qN22V^<ydP*HA()LVUX
zOgrkq<3vU5!TNiz7_RikzX_z%Xq_8E1Lfbh=O_ggioBw5Ec2d81t}jS9b2_5FEqNQ
z;~d`hKmtPN&I)(1?RIYDIbQ6o#GheCAt4WCf}Q{5p1g7(ToKO1eBc2oH~_$i<6@{;
z!Zm!6nG&=JZeYOY0?iZ2LiCQwsF57hAi!MPqs6XD6nujX7zcPj{S+m)(bjW<uwPC;
z<vws5_UaewcUN$pFN}Bbe41_Z@rmL{@p+D#`&mT(LL`2V!dV@MU;dOPa$eZ#<%Q%q
z{kY5AaBU69{&&0KqY}EOsEM&kv~tk>jbQz~AzVl+;R&j*Hsi6w)9tA!P%01PQ!<Fk
zNY+Kqo5sxnJ>@3-?qYM1?11mzJGq3p`D}QN5F)_rKgzBwcXvV-Y-ex}7t63eLovQU
zjy*$NPLaX?oMlfwF~^5INzO+VZRoithJCLh*-d+}uM{UW6HDRHtRL}8=aiT8_w1a@
zhsH8<dzr>X8{STSi(aH@Ja2`07yD=L#g>(L5{0?QvN2t2Ukmr_)&Wd0m-Iz!wKy@I
zG05HG97tC=S;#giw4Kti0@bKo^w5}F?9;r%?sSr62jsB_&vHdH&xT_DRCfbwapnlu
z{vW^;=`%)#fvTTRrWI({A)w);uB@8WsON{1$<u}+^us}wLElxWwo+8Er-cVxrYoN=
zw3MBG(!sRXs8=O?nj`CY6V>?e@Q>5pc=jZN*4T?Hbs*6q^vnEyo7u|}5qk*1Wy{@i
zCz-tSn<R^xCTz$Nczh!Fr+HGR7Na(w&_{#O<iGJ>{9}OD2ACW%tS0=|FaGzn2{Sis
zCfT>!?^YWAS@yMrvp_lia;O#UlY*eXDh@|h?wtguy|fmu^448ceNngf*;u5y$agq*
zsS4YC+$qE!TEiY<Ea6xLAWwCDT=FcnsVQhPT{l&w4y#ewx^xBamIATjAl;`UU*ODK
z?)O*j^S}?Fm@0GmqVdZHvqx=0NVqs8QS9mGyEDKophXPq9;L2M>_OZC%A*aq6Fd{6
zqG)Bya~MhW-?Yw0)(?EsLrFmeHU^nAx#t$044ImMKFuoUs$t5UDRE{~CrRt!DgID-
zqO9Ck=__G}&nZ-RIL7rIcmsNkp>L*}br!Q|!<e53Ye4m)O3O99KeqCr+?_OCsZ=kN
zG?iG}IV6%JbvrWfN5&=z@a0JNRd=C=e@Y}7U~M$^pV<_Ve1zD~B-zp;#x3$=@<%ei
z3t)B`o+{!0uY?2&A;hiN)$iFEXX&`w(hyZce0C*mwXA4T^CVDtd&>~K;u?8Gla1l%
z0nY=do9Ob8CI{8%4|KV;<fC8$1vL%DopT)fh+VUqsi#((^if95Mrnm-IQE+pN_P$1
zv5yP}Z*!9Hpjg;p{y6^Q7W)-#4aWl#d=1h^5*WT0Ai)i&un*XR07RTv<pF$WbfT9_
z#Vpp>VgIJWjaUw6aAGY1r|J(Qx&}^#@ins4Xl?MLuMMW`b4ZRY{=NhT^#;{7;dnV=
z$w%S*d6H_)c6chUra#Y6;4y#wB-qUcsjjHRpZX;IDm&7<!T<ReBO`6&yB!~xloplT
zQVql1huyiKq$mRrq2?Ue)M5I$Uwlj4*OlP==Ohsp&zQ*TxsN+!<4-u0Nkz|;Rr8H<
zI(t)dtzWHdAm3Zpb^pm;!M;72JgNLA<1U8+#_@~nl}KC}4C~Z5UU?0^zy5=q>r*>8
z+UA;16SYl`Mx8oTLiBdEKUma4*z40U#-Ep=r789f<y+z`=8MN35F4tni+JfWpPGTr
zao;|p;=DPW=%dFeyiW4|t}vy0OOX46#d_TKSyZrC?67mi1l?^gZB5PJdxsL3F=~(%
zb|>IMr4oJu=-)%iz$i*^np-*TDLJ&}Cqx$3Ciu^Lw&$00sx`;~Qgc!ib6tAL$C(Y`
zcWTvI0UqUN`z|e8_Pj^;@>t}oTJ#{bq(59C;g9UG{^X{ak~z*iXoM2-{Jw9dED3c8
z)evr<RSbR_E?sUp8?5(8rP7pXy5%WPoN0Kc{n!1xuu2EdF<ek=a;n)+N9jp|52htB
z>MX93`;M9f&=uVN3H7{l!RikF+QfU&rKwD6@5oH=SE*BJ_8>mrV7lmbPU%Cu#+U#-
zV@0^59c4w0KI)v3<QpdT-eM4x<a>7ac!z-i!`)xpkN&fWaGXGix2K`2_sHN+NJ)}R
z4E(5ngNa;B*KF&L-L6^mF7dYuXj$%A4~C7znq5CzWsq+jI4yH|OV6#Y;_u9(`JHD|
z*u0|ev4hc#*5dsFryDcsEY^Y}Q2?p->xJmEU)aEwJ#o_LngQrQZiE*Eqze9_1?{F@
zctR~4tvXogKS&*)0)u%U#-N9V|Gmt;7yENl#n%mA0TTHp$49=8D2sW9NL6bCAw9Ni
z)=!Sl^B%nx|6w4jX#aUi&F28-&c#oyAqAf7uF68Tq|&^;(6{X9Dx8td9n|ktE5kcX
zQD~70i{~f!&D0=!PmUPnRT86gqsEIcyw>SRCl0!&dd{U8(E+qQcOg%8d44B~GwJEx
zxSk9}e1D38UqAidM;!-P4HUy%(EFkpvqj|?^>lWwyGrp-xYuF?L$Ct{&eTIOlbY~N
zC}{X5ojmua<!^PS2>!f`d3$bl>E|c8abj{mS_i$C*FrCglWQy6zeXupsxD%GVUw@J
znQn1!CY<S3{1~Q2>zYh*t<&n0WB9VOLNW_?di8B4hDHqZCFlw1Ezr3nngTRuc2gUc
zE?^Y@nLKB9%qW~1Jl;U*dk>gy5tO9Vw1AF9a4S;H=P5h?0Md|19*a}MJmg>Y#O{Hz
zDBP0Azyth}LY^Qoi<Z^?kzD)^9Y<@ykWDqk(nHI_hN2u8eMz+{$`A%4TmXxla9O&!
zlpI*pH{|rnsgbSSPW7tAb?$C=JKQ#hI_Ym>&FEwK#g{R(@GK)1%u7x9ME!?PRZQTr
zai(09kWf=x#QvlCZCUuQD>&!pclIu+t$~@-;Nzlg8RYeSM(HiS|2<m}@NB<hmaz<E
zO!dvI%D#NmX0hh@z-b}X%lG1DRk<6jql#o%TQ!p)V(i22hr<jVK!xZE8xSK)h?3-d
zfhDuaeMrTQlh^N0N1UdUAc@?SdIE#HbwAd+(y%J>Lg;u9C`DE6;hL~nD51H_wXR@b
z)8F-y6|J@{igzCd*mi4LkqCOcH9+M60Ig9xAb|RjVN!ULpsuz~>v#a+AJXlAQC{cm
z{SCyvUTl!~T>-G%U|qkPp5PGgmT{__k5JrRr^x{R-6|rxLm8^l2(oxg0DAHB>69bv
zK?qa#^TapUq$cU)-S;VNJrvr=G8&i&Z9Y7g{98Dqnpvcs4ifoNMDHrrABRoPM4k5@
zhJrv`JrX@+%gf276p!m?o{hc-G^z-#1R5Rq;JlNcDyc423*WO*$Pqyx<)4jw3H6AJ
zg}*gt6WS~UJ0UOK9MM1AWFWVP@BgQ0ilLFu@GoCDk-bblq<ozf`xeO(ErWZsem|f|
zKjBKb6@yH(0#Rne*_FaPAITDvKRM&tudq49whf|nTo=zh&##S5KPvaGPk(umsp|3D
z_fhkE9?@9MCr`3oLG4B?{KcRn`3{ydubF?W&UQ=-cE}ogNH9GJyo22~zd7YHG;8(=
z^?8rc2Sz|k*?vMvsyE`WwMygfiDF9Pz{ec$OW1PyfY&?+S$@_{hNb`yw^8;4p~T0?
z1-ec!p9HLivqrC^XJ30*5L~+brpXKkQPYIj;AjJh0>_{09EgU@GflqiY1k&-&m9(D
zMC8=Bl(71_yvJzAHqv>#05xb3DCwq0k%bk1kgsGMnBdVeDh_Av_baYpe#3Zpk$;HS
zs9Yua@ivX%o0;)msYG}Kx!}xat>W~Zh6YTdI@2}oiX@|>s&vIJ;~3R+eT5iiY|hOB
zJ=Tf}{AmuoBLGY$SPynF)o}m&f6=zmE($wY{EBI<gP3piTrWzjBvulrzyXE=8WEUw
zax9BaA@H7UxULVnd9u{mAZIdKv^G|RopbjM3!`XG-52Mu({ZN^LK>%fJit8QOe#@I
zBHO^Xso_;HZ5XpDSg2tPOFB)FP<|J<95%Uyz#BhX4YmNRr2cV_e#oS+HJUNiG9wW<
zxZn+S>c2PkZ#tv_i$zPH-qauI!}U^+V;OJgnkQDM{6Ddz%c#Wg_ysWp7$Eh(=KOap
z`-0v8MTAZ7BVN$<Xy#`@pcTtBb_Vkwq4n{moE`_LW-=3t^b~6`w+ZS~YPxpRtG(>0
zolk-)e#yx^T=-C1Z>^fq59<yJmLX^Q;9mpt7hqIm-_O(9cmCNdO=0+$GTb~n23Zbc
z;0TMgCF`nfY4EaD1cxd$>5okyedl|1X_%Y`B-rG#X<-MPB?3Nz*+P}isBTt0pVi(S
zXO3Y=Ufcd;Si!!QRR7<%?a3qn8~AK|e7#>B^~5%qo1**;cQ(v%U==|>C)Ui4X}cTi
zMqJ-i>-8vwBx8R*J?pc{P2b652h+0Ne)`?@pnU6{lB49GI2lF$V&+$zqP(x5<3{Cn
zcur2uIMWbs2Yna)a{qp3=ltE!*x~}}E?t6&5i79mdbMyv<ZGJT8&f~l9nto|bURAQ
zl3oQPhTJSJ!d{Z9^Q&x(T}`1-s}zu$^tkW?_ZvXKdy_4ujJ@}`fzBc`95b^m+a=@{
zE(LRbcM2Kw2$931^#uyxH(zWuLDK<>i|>Hl@QM5vGuX1(9}R`>pRs9pnZW*!sJCv5
z>U-OVH$!)WG$J7#B1ngnbax}&EjdGpfP_d0NK1DsASoaqf^<lCN%t`G+kC$F@jQR<
z0@%lzwXW;Ta&^ireN5gz<E$D!pgxazl9(!Hyb9HA&MWq*^^;hPrd<UyukWDywiqe$
zod;i}ilpdL{5{9K=FjQ3QLN+T5p=dJ&-!oSMa#aGD{prL%_)h^>~{xXc3KzySZIC>
zfeHF5l4KYLb+nsa<G<$oMUWbjx&8<Sx&KHz{Vp(SG<OjtFs-v8X%20%l5s@rUc^^V
z^{Rrf^DBhRR{GZTa5E@r9kS7`!%{mq&(UX4CR5e@2(FSTU^C-VwDIBe;%(Y-(dT2Z
zk~;H+Mk{S;^XqemD9K*Z%s^!eyuli4Zl%M#8xtr0Q=5!f?QbWgEVp>?EOQ3-4bocc
zpXX>uy=3ZBY3|rbbuhSrDkS($bAi=nYMG#lZ%Qzoh!QrO&U=R8T#qsKZ~Bq=nLZXS
zj62`FD=^>xit23mbLm@JPft)2$wWClrzDRpZP@fZPtaxTP&(aOZ;pN0q}BGFXHrS#
zO1RJ*v8+9nt&fnBWOxgu`2eX7mUmgF)048ae5cT;u}?SCQUN^5qt;IiCiqnH_<Ve?
zY`+j1zH7tvhLAk!FUWZ5$o*~dBlcSBN8xk%YDyDjhi9lq*d#h8N<Ui#jCP8Pi|k2N
zmv`aO%YiuOm{+pbgy0o|?vL2xhmYh^fQ;R%r;T+UtU592Ol`>BF3De@fpMtQ5cpDv
zDCUAc+f<c7#>N2@=;3m$xyp@DZZhHdGEpVvQicNT)x13?@)%8xu|}-;qe_#pV(d!j
zmnfgr+@AtQkxcv_ZMEW8cq-&Zy^Z1oc>hu$bWer=iO#<{T_m~KE9)y8{0~!GQr9*f
zcpv{qg}fq<MemBjrPM~*hFFq69NMT_kR-j3!<kk-;fH}+uR*oxtE*X%C-`x;J$i->
z?Al7actL~<LrXwmEaq#l!wceedZHbFnLmXVOCPq)YQ;huNaA!@cu+8B=JvQ9HN0rZ
z=#a0m4^r><NF}4#OzveZHBlw?90vrD!^PG^70o$F!+IX??3nJdsNivILnppn=VzF~
zn8jaEUQ%Xf^4jR`eH=w;+Mg}YurBq85w!qw4X0wh_>D3F+pT6ttnG>^2cA}5lV8x=
zK^s_Z<bOoI*!4xOROABktoPRdRkuFcjCH*Qf;3<1=CrpQ1M-=9#Ukc3R|EZN2ZCiD
z+GMRNZsXW@c&WeIfKyq;JLWpP{kh4dN+bP)idxX7u?f0h4wh<sbMBWME%)E*&4D}q
zjjql0<_@^gL!-@pSrD7Cl6ik4XxB;SSbtO+l4bnj9}jYL3yB9B6c}M32fYkRO_|zW
zHK2DP5RwDH7zJ=^bVY~XP6CpBVtJ|p`atkWAEir&EW93c%}HW-G#MAA>u~{w3oIXa
zG4*7NQVKjP;!|<uUo<9aO%ZWa<xiVuo)=ilJ--PerB}Od3qysSbd`VXbi_jm9C_ME
zVysfy?{vG!f86X|MIiWta<4&V#(tMugqvl4Z)*0OyAWH!s8R6bfRg(@tE;ELZl3_n
zk9P3A-fu6Sj^shM)MR<-(<)st;{Aw&Wq1FNmUlO!ig)+D(U0Mnpy>sIed&M4%V4+3
zCWxZpw~JEDaSobVMPSS2nQ%4+q|uI)5S{aGMFFEy8sr;+>#Zw#bN?U5`3xiZb1sq{
z=`aL6>=MPDB0I+)orN|QaNXlpGbqB6*5P<$i;g#T6l_9YL0MlE!j;^qPaCJlwrj7w
zz*hJ^$#dd*ReH_~v$s9n4`f^H&!C>ZTa2(ohHW67i*%43Bv<&{Q;IP*n49X$IJS@*
z0~BwbKWM^s(vu>9Q?{(e9dWq8C~s4M&bIgga#Yc3r4s+i(mn5U*?V0`ZDXotOamH|
zXZ9YNNgs2|kZyJ+k#o;d_@_rBj#Ko;-OX*RY#TI8n9b`O;SBm~igZuhhmvdBOrQ9m
zg_EtGvy@9~`k$DPDyh%rO=EJ-3WvKwA&ue6->JFq9)ND~PBQOJJDa64r>&YpRVPKU
z<_siAMHBu*xu0(N|D>#x&{O9J-b((Mt?%Ve0;korA*mihwXX(#mj=&!IuGDATcdI>
zKh9cMqp6Y#rb9+1tQH4ZSf1|h4>?eD^A|;!ny~u(m}y+R=vVR8H?loo62z5aO*1vV
za-NeJ=;Vm`N=c+Bp1%3Pc9BRWuAEB?hnU7h+F-Lk%JNykdUqSbF*MmP1&N6b`NRdN
ze|{0>f|4gJ7ch|`tT7jR3wmc9byC6m7}8}T3`(IM^b{9S<hVeLZW*W=8(@HbUYc$5
zKPslYFHDqFZctC?Av*t|<9I9A#Vla$l7(wq?{BOw;Cs{BU1}`DKfmL*mg-SK!G1qy
zRVo>2m62d5Hd4gSMJMBX=*AFRErP84X;)#qAY;MIW@K$Ntan)J(?S^t3A@71C?-E9
zTRkh;U#s>*?rVK6OQ~L&+D4Wcfdap)tx?MVebD8lP=ZeQF87-gkHtt%7>7%2a^vCc
z_WIo=pW@YWmfZ33=&p`FWaAMq@!4Guj)-Hj;_i_o3p2)sUKr!H2Ck!Lg+p{D68wL$
z;|9m4jM#m=Rj4YYq}(TUyyI=O+BV>PuH1RZhS%|Nc$$n*)Gl@Im9T6mui;GKV3Yiq
zJH~2HG0_y%FAf>K0f8MLeti;_)hRX-QceZc!2RHYr*m70)-^so>~Rx@{~%pI=4IkU
zd7;gMiSpV1d>70jSH2P{u*G%>14--EM*usr@?KzbPU`I>&$p$U-`<+U%U02<+x^1u
zzebGax-lA0Z~zIEFX~DbOkc}+l|LHZ!KxFhJC-7sc$iAvoRK0wz{Sr{PAKdr&u43#
zA~d4HYC}ygybC6o8ORZK<EU+scT}B&T{+9q=Vo)xXXDzv6~CcNCnZu}Y5pf)?7q*H
z3Ufw!Q6Lz~3RTDtRf6zSP!VcglMGLDk<5SNR+h;i6DhrcEFYoR2W*0VBvuwza%V<<
zy%C?Io}@~~+OiU5EUrE2b!jw2d@P4m?)bMXh|j*^_kAf`%({_W1^IKDRaC{~D2ifG
z!=f#WKQE4!lRmXL14lLn!J%URXhG0>$j$-(Jx_d!4!{9=a26|6OlrEmVk$g*KE{F9
z^^g@XU`HjUpoxU6ilPp^3tnF$cmlQZ{v=5cu<Ot`49OPOHQn<?Hcdd%lqvk?hNUet
zz7{-wr{*v))L0Q~nW2e)E1&Q#f#Ru;8z6u8zR-9iFfZ~#f9!o(2wglwicfw_R>;<t
z@K`5)7lix3(QvG(D=13Pw=^S5KU9fx@F!m#d5tJ?ZYDPbab;5tdIW_NO`s=49f+B$
zu7i|9G{daqGK8+`WeIgv=k-11GpG$p?L03|7|8_`K}N~Je^>KfiM&=KFz_9n`(&2i
zRoi`RiRk!SjEm(g4C`N{<nkx=jFpjRMO*DrcIok^20kJ3uU3Ssoz7^;$fVUX@{Sf7
zecny}K0(6T|FD+;zd{sm){j6MpehQ4t_eWaR0HLy=hUgm`^%Kru8Jq>D6<f*MoMl+
z0Srs<Yej==g0lNoNp7fd6|mue3%V^)cpiaH7Clcom*<uydBxd(YrQ{jFT5icqWbmy
zx2jpJzIn#&Y2b+mq>VHHbB0Ef@~JUTdSqmARo{5}Z-KnaQ8M!m^JH2tTiYG=cVn)9
zt}AtsZFKlDV~``vafoEuB6$w0Jl&#@J$1xm)=%u=&pX!O$io3?sf7PP?U9GXiNoCG
zXb?E;9}50-^~mSAjT+S!q0)xf6~n#OIlWuXjSBtUna*@?zbUOv;W6h}?)xY|QK2Va
zcEFk7x>&xW_*n7lx9sM^s(qac8cAmqpOibr(gExr5)3mbqhA)tp9MSpIr7a4xpJ{k
zx^H_-bMO>7I{jyG-pCX%On4B`dJ=Vk*$K66f)5$*uLAVcwN)(VV!12(AbC$w2<|?h
z(hz|Q7o#z!D~cvucU(6k3Yqeo_l)a&0vwu3E&}5>f8r8^J{~|NasFY<*rm()M$N);
z>aW`?6*;=Mx;93Zbb8~Yx-X@P?u1nRjA%VnyWbj{%lr69DLLkpIX|6=>M(=FJBOVQ
zUyX{lb)S{1C2%lEWc7+ZAh4m*xc80-Ro9%Vm_UWPB*w?Syw8!_i=hOUl>fOfU18t{
zjLuz*az7HsoO`ZVR7tkli6I#k?2{Y3M~A2+GHm(?ycvM<4A02I%T@FSkS=R73gQHF
z^^IKvR)hZUJk)ax%+}n+cFb(&d^~T|ItkW`Znveg1sG=<$g*Ph8P9?#u4nsBix_(C
z*>*wiDkUzU=a^imo+BOu_9mD#paYny_8Boz&?V{3Y$O<3TYIb#9B;e*9MrTceg#}G
zrOZ4+waPw2q(RS^YHt0ylyjp)=-|gr+`I3`?7(M9TqJ7w0oFosTKbmER5g9F|0_O*
zl)?{wZzAB%9kkOJrBTjud2i$6$#4+X8Ja6fFU{jj!b_w^?tkl63s&V+pb3FnJXL^!
zltrC2uVT4*>N-lCyNi`|iI?1p%EQ`;zR$4qe_ZK*Cb^oUFQe3E9XLDj+ZjU%A%zhl
z#}+~t%fryk($4QBL>_vnXgc#JH8YxMjdB7NYUC~(G7dkM^G1LU9NI5~RVWr{5*oJi
zJ8#S?kp(2o6SHF%i3F<|_&ZqsnBt{~kxb3D{DKZT&aZAHt$J-VZk;OdPEgS)1H@f;
z<3rxuo;$v#+65C)&M#&xvMBAAKt<EE{54_v+#$HvAHq(VPE~Gr5q+IX?i=7MGNCqY
z`OxF|{b&2xsJ-d$blq@(gdyzlN6Q={O+gz=d-lsr8Cdf>7b3TTw!=lha<1CJMkZDM
zu$69*ZD^{{<d3nEfN+9tDBirs_EQ@MqK>|f_@)IpMQ1iJYBSIAkz$!$TxpYjyG)MQ
zXUWgJSzE|&_6^5Sw*P^9V0zuvFbM2#6Pg2Iew44}VWMd@Uq!3<vaVGGp+XlHuacy=
zlz}?5Ps-9=>?UY$W_9L=w>7)y4WIGULlyu2JPzLMH*-iIt?i7BE}zEB#H2Yd+Mqhk
z#!_6TO6SsZw9H_=peK&=#D`OI%!myk8^GlQ<@NE_(_y6tjJ}ic4haP{h{vEQLbeW2
zx<OngB4@Fc!VKiWvxS}W3oaXbJj%<SRr?D%u|ANvKRlRO<-PrzxO0UTEr}cgKRsh6
zVga1{m8)8#{MaM${}OvC<_(yqSKLVz?>3$|yK~fxPh6#a)zY~VFcvrcr+B5RO8;h1
zh+yLFnpaL?qu#)Y039xxACa~8-kXg6r)?|H94szyA2|^QI`6R{ppx_9H@FCIA?(gn
zd0A(tU0`~8!P|BP?jG~^0}_RJKdS*9F)GGG1nRpctNPF7ZEMCc<qkIXJyI*{swen2
z`&TG73a%xHB9=zuBJsy_rBv9x!?HZwI9)1PuU6hZd;NlvI%4EeYa`^nfBiBc!eT8q
z`-Laudos)nc!34tY-l8CV}4TMC~grC5+qOg8&GRF0ndD{_tcI9j{x%KaQ^vci>=q=
z+DxX?EvaUJ;R7ZF_F&6A;bT-rpY+AD>~6tS^yQg)M(;ZORd?x{IG)ptNlv>XyAr^4
z9x=Qweu7ufv}n*#XTfJE7p?1^^ma41SG5=%)$mwj1<vr<-;nFknTqrcC4FaE%*sG^
z-aq<3GCh(8>c{X8zPTQpLSt!#BH*49@u{9w`sR)@-vTXE3-B0|TDniy0g6u<mOlpW
zt9RBi7?-}BUFoe(2D)|#oXl>jzfIMN2yW#Ptc#_O=QZJeaj)=u{Q=e+(DL_pPmfjG
z!gP-A+gL;|hey9ntuVaM)G5daz~sZA73)F9r9LweAFU@m=`Z~&2F7WKvZy^UmF4<h
zeRj1$+F7dRx%*d&ZU%NZ&@2n0!lW+%OMWfSAR{WU;vpLRJ7Z}BRIM+2jQMi#$eq0O
z3h>lE8YljQC|YV;Yrrh!n6YmK@rx=eue7wR>jOBu#UmN-e6&YiA!$6L`G2i7hm{gS
zZky1ndDnB*X1J1#H79tr8wgWz6xM@Y#2dr8L6ALCAWBWl+R?l!AV?J*_=U7IKFI&#
z`srbNU|7a{aYg%Z!qf`U?}9!=Lj9ac5;T9o*@*wLo1kuFBg-((Nm;tepF%*NLctF-
z%c|0^1gcF-awIc?D`hUox500Z6?mYe&HR5>ZeN8+%u+HuMVrK*SxU;K`;yr2%iQ7k
z3NY9dTS|EylV45E`B(V&->XfB5O4I-CYJV1P`u>@0c_Gj#x+ETW!YCox!`B4?1x83
z;Mtk{7_cq1X?3HF2{0S~jlrb_^M);Pf3gF3`oQr&DaG>FuH?C9M(QX^yxU;CjJBjC
z#>1NSd;mx)<O3tG1o?-=8{CEPW7b)*R;zunbq>#@ookjDxTu#-%eUCl-&&>&s65jl
z-#@~4Yi_;mZ$m_|%zO1FMo`>NcC!mrBttaW?XvM9e`;mDW)K-5hmPuy9Raru`D?rg
zqef-S>bEWk4XA#7Hn0i@Gf4io+5!{^P^{asnniMuDA&c?J-g?)OHy0&>AprkXzFjS
zRNql9IpogqN-7IcqaCUbc|E6;Qt<6|h|bpG64hCIz}>e!yHzWIWZc@mqW&YMC!?y(
zZl1et3n8i%#Q;2Mn^R1Z)Rj~+<c<5W*%ysR$8iBlqCMf<WdntGJ7DqVXz&bN>H$2U
z7!A&{%_9+%2<Zf!!NbafPPZW0!w7bLa#_@k)o=qn;5-}&S$Q5t4m<-l26M0x`TG_8
z6toi?WuY@b;ry}Ra5)LK(mQ_R=iuFngE@cF*jWj+j_)ItAOC&6=!kh?Ka<mXDL2q!
zLHT?KyLeaXd{;$I8;ilw+VJdR9TtVSqWf^`X+uYMBGBOwt5je{Cxm?xn*XemiaQ;=
z?1)L=9M9NviZotqp4Cvr#PA?D(ew!fZDq2b(XQtH{|ffOnw;+cFx&$f5m&hkv;Fn)
zacGReCu7D=ukYz-NZL6I6X2&s!gX#cp!4DD=_^lNbj7e$HVjC1u+x%zQWHDX_()FM
zxcEr&ijuOH$wc>QE|9?R1%)>{o^ZnU*CQc5OY<>#9kez{<s^yGC#P3UdwC(DUxuz0
zcgXlAIU$Xw4}RgL4K7eBvt{4<?C7742z54S>DPy(WxT+E(H5ES_xk8!_za+7nq%jK
zvaY8%FhP4Bv)0#JBAAVq9xGCpo*5RGWH+av(RvL6^CHV*OJ<DGpZ=w3tc0&Iskkgb
zJQf)@>Hq5G6#a*PPw-<pdG<$&h187H-m83kOZ50Qw5{1vu(>(!csbLd?yrLu+J(#c
z3x&99wl}Sf=Bg6iboU7$wDoe!3)|&~p^lBnsUCq_>Zivp1NKwjM!FGiwxuHfvu}Fb
z)zEiaAkT%`uP+(@l*<*c_8C*QK2HTgUHUsSe=I{g(QY`k5EEDT;95zsnwyFeL!D-}
zcmY3ZJvMhm4cjAdacU0c!R`QTtj3OCz7FX5u<~zs*RLO9JKB<P{d7dbw&UZkPp5@_
zeV$!!1<PtK;@%6${8MS=k<u1QQW@*<sH)jwam=gsktFK^Lpb`-^Mr;4h-}iLkHO^p
zhlB(z4gpg4g8JvrO|U_d9n)m{lQS(L05Ji@NhZ=q^Xciy*DRpUkl*Cb3S|~Zb;4zH
zUwATQL-=r!%-c58t-~d`207BNs(+*5DfIAX3?6Br>*;-DHP5u%I-$|}{)CmA*Vm?Y
zqwk_C11&2Mn@p>C6)R2Q1<CQB^E$2h93nz2>ziUqPPJpF-2JbU!i~O=ND{_lgmmZ4
z{91I)w9nYq3z>^M${E-Y7GlFw?)Cwh#fRS6U#)^EC5(S6%ILAPkDgCJOPSmyJ6xv<
zZqu_7KJB?;YZ{SI#JZ>fJB73e2T?sYjQrmiRyMv6*5~8Lde>;f?#=NymA1r~@D;1W
zr|PwyGRaH0%T8p?cGOL&tHR!2Bp*%gJ5|}ibKF@{iKkw)Ln3vlZsO}WCr+Kvl>&K*
z7Ov3ElQ8a~GdEB_|1@Vml%KwjLHHGfX+iCW9|}=zDh$ogo)Sj#MxvI532TLdakHZQ
zxpq3Q#a1D(x;s(AiUbKaW(;Z38PMqNf&r#}eiL=K(evv!t>yeyKCjiIuB7lo-+V^T
zSz5j{Q^YOjXEAy$ZseX~QA*}Ub6@pT_!zeUACp@vZJDHuS*%0SQg1G-B2oPht67fj
zU`Xee^AOY-N}r}{1k6c?!fBx-&pzx5vxf`UF^g3dT59<D7b6HvdT4U{p?tdBK3@(6
zj!piL#F^H>Mr_&puVh(BRD?@Lgdk>|N{kb>dr3AKSTdpH2m{A$3w2PHnA2(CAJ(Jg
zA;~YsHl}s6ptr77y55ba5$*6*D2Q^3z<u41(RE1BT=93}3|wzUJ!-4o<L8sx4ksB=
zPPIIacQR8LdE_Kb{s~!5!DatEZvudF^vqU`s?MsKLOOpm0l1G22GRACyZLur_u)1N
z^vD`@l+vl+UYfCS{S<&WbQav;fXmCvcV+;{rw^raYH|Tiw1Atedk=;={R7lhTA)?T
z3r{{B4%{7Q$s`AUhH~LuI$L>sQDOf@0VL8(SPzNTX{hGV8Y^+jMOLn4uzFkNEd;wn
zKE?c^^J76(<MHVi)7DDKW(y^sK?Mkn)!14M)4U~RDRG^<joG+H^56to<V7t*T4`z<
zT*{aQC~toD*i~!LSMr`tO~eaVnI`dl|Cc<LPibRLBs5qDlGFPU)~qH=ca7c(o3#6(
z0~~YZBdSk-(_pZVBg_5|0~48Dt~xhz-Ka<sn)-R7gMAv^2epWV)6PM8zo8!FA6!$$
z0ZBmEoUL@^H^19KC9(zx8`stteAZCuV$F9Qc#Jz#kiy;b6$?C<FTF>nms=(}S~@<g
zzhnVhJWJ(aGoyJ^J7@ECltn0WLfsPlcdFdEe^1piwuQ8giQB;|&#9?vWllHLAZ0`B
z2NLvV9i09e03<<Q+qaizTif?{S1~pKmU;V|4>;hx;a*;kF74T*UJWQ8oYv^9yaUZf
zGXS={kvc>=)CjbY8<$}?H#`I<n`=*d)uF@M&(D~ZI#D#<?LjnS<cQ5>GJ1#JADQ12
zv&+}6)$XzDu9C}Z#8^I$=+Nh#-xH*UV~#mT+y!d!BLsxd7W@SwwObakir}2PX({aZ
z9PjVU_Mdn6{hX>+CjMqW?#Eq6qG>@e89uisD_k$V@;wZ9O{d^?+D3yV9+~wz=W{02
zKPjyLG`4i&h+B4e8}w$M8O|1%n!q>WEQNdi9=kp-ad3MX2RYaRO^vz-*&n!?+VPCt
z+WSdJybxvOJG)g8HfqyC;-hi4J$)tYw4Aegh<Qasc@q03ccN~!CUgc~PLabMZA|Sd
z@dUyto%n`p)c#fOyb}CblZJHQa}+<DzInplHWDg&!hLUaL;dkdf(;>ut7|AWq*Sbr
z;vq!zSH?4tgxCeC2r$a#Q6Gd+Di#ZIW&8kHz^1n2k}YCCqj_u&k;Bg?H1@f+(=$B(
z{S`^F;kprX?#vMnkaZy_WQz*@X42@wK_<uQ`HfHo!qNI_ytrs%Dpxd}GC@(Uev?9G
zNgb|$HMUjM{fx_If;sY+EaSa`JW)gc_ao;ZimU`I$H+uglkt+j35{Yru17-Qytp4m
z=Dc17BHu@pqAwBtto&4a=um4yMxOlG`{`jgKR&!o0|hxs$TVy5V8VZp;(<_ZUdij5
z9NFHz>Q1!#tTSdLf}P=v?Zf@e?d+bR7DnSDlq(kX?c~QS$99Ev79zNM)s_VZp*DGW
z_9J{IL+Y^jM!x;NknbX>`X0>C+}PjJX9=MbeK<!?!nANRs1-hFzL6Pq*Ov^CF?3nS
zJf@Lo&1UkH#cg|fbJ}UnN8yZrpU`;OuV2`DBsPM4>5WSC8C<i#+O3Sr!qFUwEP-q(
zsN{E818pj`ZHDq&qhCnd9Q28HbWmA_Uh1!0e!W*wQ~IfPQU?GBkNyYK&A(xOSg5rU
zjhaKsRwitht^Dy&DY}`Wq2MYbXMZ_wGK#`|vb=`q3|0X}rP-px0u^ZpAo65}=k=Ea
zJK}JM0f?I^+Vtl~cpvIrjUm;P<fV%*ZN1Y{@$y3HT<I|{f?S7*UsXk!y@?0YN95|i
zK6dzpLt)ivspmJAJc`ul2;05N0S*RevB7tcUbsdw(gHbm-&n{StXg1o5t;1L${Q?Q
z5BW4y7jgh$Y(rGs*Pls^%nVo(UX6G{4J8*&6m86v<0hGs3{b|flTs~oha$aN(sW*-
zj!D1qS>p_(Z#a25iyb!K=am6H+e^q?J_$in-__C?#GB)ketplsiKPnNe?yEBQs+ic
zyznK5GvOC}VaGMsjM6_IiOKhHdNqaQ=R}QbjjMLReq}Djq6DyRIpg615HbRznNk5O
z^jZlfjIOjU_@!)&a?C-^DJb7P)`IN{0_rxnFrz|rOiawRnPiB?@$e`(18Tu4(RTKD
z-t*(h$wht`rR;Ku{)VV<&~5A0mCbN5Dk+y?FwST{zR7keMf*Ue#huZ8l}9YCg7qr*
z`S_0OZe01x@h`kX8fyK__vz?XazeLlwjVGn(2_1WUlmU$-nL7(*f)*oqY{`IGaj3F
zfzgbA;vz0kR0mjJToPJz?O7k(f)tuC;e#Ry8p;3$C%h_9sZ6T;b3al2{TM&#$r<Hp
zh;72L%_xFoj=?~0MZ+naUF^l?I;yKG84C5E(G#haYhKeW52XQ4nE5+QO&p_vTrC5d
z%=Xlm^1K@f@Z*IR9tKCrDGpZ66O~r;8fiZ9+BL`L8lMvFXy4;m)wKZC^e~Dnl*VM#
zJ?~?USxMY`cVyNf6Oy+_YCzm(-##HCEmK?UhE_V;bG0XGKEoQN_Q9ms&}{izYS@%$
zmf#^2hN+Z;62SP~cJsoyHVHjc$Tb837lE>dgoVx-U2hL|9V^3Ftn*{sNS|N)uwdP2
z0t|bY#KsHFPV6Wqos@lTG~S!Ir=(3f*wcjl!Og<vE5vlg($>JJIEFdlHSz3Fd^CU2
zf}2dak{={=7?UBdbY&)C@iO@02~dkhev8gXG;90lRxg7Pqx<}EHzqQDyH*tSL1o(C
z9g3I{8An#x4@xcL3iV*BVaX>AfUzdzDAxaYJpuh$GKg2U89Hl2Kv$Eu@#6Xn7v$F5
zIBe9LBs0RLhgNr$+agd)57)E0eYFq}%$+#X@q5{SnrnG$R--j#F(+9Wh(dkpY8gs8
z?zoc+@ilOrWa0e~C{(U)J742}RaFX>Pc1YD7JsQyY^sM~ge=k_ECzs;5skD>28qCw
za<B>rF9D^C2RJL{r~_e9gY-gR>K~ME)PxZXVu-vB8ayGREwxsNH60u<JZ50~6$hMH
zw3a2@t-tC*0Ypa72Oc$HU<GbhMyr08wf&W?tvOZv?tO+v;plXz316x6VzO$dp|pVl
z?nvf4-$md125NgXH+DKoySwO!(4Ja`Y)JUQy*_EHR{T&KOYx@*BUdr$v6`+sY$?UF
zi=`NBb;L5?f4heM@!{b%`#6}S#x99n^(;3m9+flzf7q=9EoK;jxNG0eO#xg^M-j+@
z%LFz`po5|zV1#rBfQgERM9qC}f@e@j;k%b|u|MzRC%N>ItkQJQGcqU69y5*d3*Yqn
z<t9?vuqn^ycC|UDEB_NVa4I%JhunhQSoM0WxK(n$d)$bCQDnP*<JWLw4lLCtSV0dq
z-bp{YZyR}-An=ypufdDAU}^gaFq$isivudh9~XsO&e=cZ2WH_6#4mTCY7d0a(+<vE
z<KM-v@u!MZ(#MB~><u46{}wyJpkL*-per|S<n_sVpo9j_mF)2Afl6*PF{=i5BEz1n
z4o+)<PslV$<DS17d*jwDaJ2h^KUqv+Y#eA#ToIRQ4IHpvmIiccbEftsC}vkSr6GpY
z(CF6I2-de%<hz8e{H?_jA21C~-z3&TK(838)fa)>)a@T9%ck9PNS`ITJE8~7CVqWs
zG`GtFxqC>xE0te4YP2(2e5XS^TS8JAkAC;vsH;Zn-XLhuOZqR0_8`-WeG9BoFzL2z
z@a>9P9Ve8}vLVn;<Evt1>xj#So;!W@UfIF8^sJlJfvb^Or*(QJQ*ZBSb`{;K31;v(
z4B8Yxq&j8~X_^a<fEV&;31hr!>G2x%E`&afwRgMk?g(A%*qwJfMMmfV57MNI%tRD2
zqZnZgUp&~2hBYwxaR#s?C1rSl_Am)&n6iI-+zfmTyx?%I0zt;v#3B&A|8LZ2uj>22
zD}n(;m;pcV8S{zglF3m}4o<x7>_98BhDvGEw1>`Xxc-H7_trZd6?e2jNcO1?F(vin
z`u4K`>-h2<*2`@&?LO+0m%<5!jz-<_dbiwYl1Ns`o^pco`Y+i{=YDxy@A6ndWO44C
zD0%9)A{6;37(NO`J)YHKb$l?4ng6|wi$1<cZSy#8q{ZbDn<PoKzz%a9kjyx<?}l;|
z-`@odj9)dLB?gJiOPQeMiPG18Le6mG86i_^^@O>~k7pyzdWT<hbmJUJ<U%&Csl4&?
z3P%$6rq~k(oe!nm$2&Zu+ifmrZT4?WcujFM>~bgt=>vaw?@8t##eHcpJIW+Bp*dZc
zGV@%O@Eki*gPSe7g!hBR%%@0el&)#81qf8@HbFqV>v2{|ILL>55w~_Vo5#1a!Ug3D
zL0>2}S*1_3UV*m+?q?FPJC?w_d&UKY(Gy^%=mN`@Kdu>X#N@w&6?f-I$D0Q^1@?+A
zrzj)|elVrQSAHxkJA>(8NKDc4>8Z8pz9QS_8G^g==lLa}s)3(Td+Y=YcPR3@!(pK=
zIZHU*dabbv*H|07SDDW8NlgWiziX6VxQ@*ss|QlIGPfXU5+?jmnR3zDkzR0LxMfv)
z_`{A0-?>Zjdsc1~{5!`^gSO>nDr(<3ltVnci3ix3BZNG1@u3DO<`YTd<5cEKS=&u3
z2h<~Ujv6b3LOTOh*w^(`)p*q6@$=sJic8dPx13Vwxwk+dF0~I0=7@u2a@SKEU5SxE
z-6@kOFQm)C*^!BFdp*@zk60Mc)`cAYCp92ON(uYCh;z#I42<(-!2ocPD@j451Atr9
z1GrsuQBLVn4B6cLY+*je5B$y+OXmWvZMSs*=4<qA_hSA@V(yo^5@BP~wV6t6mC89*
zi}#wA?E*1WlHum6nMlfuzOb(0+eWQktoYe9yI*hG`4X#ZLKjJ79Vp}-qd#j_@9D(7
zA3{^eOYf6)$|K%nKC`+XsI3tudZW?x0l^C|DOo+Y{|`+RCKS2=9g3JzyfUBmlPg22
z<-e|(A-_{>@R}X5tV*H7Rd#K+q4S*GC?(eaEXXhi>43`I&PwDHnK#Tzaj=pb!qp5>
zln?#Ej9>AIb$x$gvUmJ$Da;RG{ceO>oqd~qz)k%cWi|EZ`OHz|la<xaABktXB{BOf
zrkCZUsI|o}q<8v^^S57hUs9UHuBMK-*Dt_3zc|HhCraL8(Tl$UsQa8+t<x%!eVdl<
zHBv#plm5e{S&EhitJ%&*kkx6Qnu6D5QG*3;8-R6PWRMpioAjx_Q5RapYIZeW*h>BS
z5iu&S<$lnXSeTFoQ!aLeZuM><NzwvaJVoKWWNXyOg?wrm+Q41N4ZL<N_pyxb#bk%8
z;f^w7-lMvlpneD^ATl&ejw;%jlW5k6=m6Rn9)|?~X-ud+!+)4|L=@6l3iKt9cJmC~
zI7q&|9%oyXqPvft8qGyV-cOk;gRYLCzHKSo`#~H_W4%q3nV%@=-AANcRY;ZgSDPla
zAv~#nggJ0y(pqG$UTH=gHwDFxqV~;jVHH+&3O@QF8=9^}Gfe)cjr4$$9&r15Df<Hd
zR5a?{v8SueAoguB?Rf~*rb$o2F*5kMyHBy{8l)>LpG_mD(k#^mkoU9pcY2n><V?5s
zo9G3ZC*F#mQcd&M!sEFAFzv&M5{<d{Q8^MpZx3dAZtPg3GjS>bu+^dH;F=7kJ}a2q
zn6+-)ry$f8{p9q^{uwUu<;8YQ<{Qmic@6j<rsDm}i<LYA33{)45_ur7tX1=~6|?cq
zDR(%Ak=~tP;O{I7pJk}#`a_gR@UeeF@AAF9ROId(t~Kys8Cn0+<i08e&kCvyZN+G2
zuFjuMh9c37QikH1(C)IvPeMp$d2#Q~glA0-rv&Q=R_2S>I2glqwrT#&Ac<=V^9PHF
z!6bdLHSZyb)1KwL?`|!BFSNotKTu>U!k>gy*~^_qU;Q-q8-3e<5m156TN93>{`H5m
zUobTpl>tNdc7V0vR?Gb}n9S0u_E!@)w9bbW#D&O#U4ns^Hn;$I@o9i<p^i|flo5&t
z;{{mW^{0|~<s+b!fP%M*ykG=*VVIB<`}Sy-Q4jA?93`(4eKRm-Z*~fz2bem3$bmHm
z-}p3)P1J!O^66~;RpUN+Y31bfS6|qYR#=(JJ5{;w>T{ANWbZ8(_-8s?zXKUVc|7gK
z5)(YW@)Qdefjlot4GvHJLk+MLX@VGQ3$GIq4BP*w(`-TR5yMAD|1q!TmUj`_T?yXY
zYSd@dL?OR3!M8PH;JjDzE2G2+6TWsH<@@p5n$4QuEp(OAVoD4+imY7>KC&lCM@)RO
zpABUVv(dvASJ^m^d74&lw08zRRCv)%1aDv=0*HKMOZf-c3~HkH?8$GVN>EMcwM3pI
z?N5$tD~^9qySnh7@_z%a@N*kvlt5b8k3=L`;I~T#^mlioU}|c(O*XjbI%f(S)_p}u
z383Kznq?j|^%4V3AvG2+JkUGa!EP^&W$+P({^k`?Y*%%3LDBqWnQWx$g>sCG3d?Rx
zaBw{@vS9wF?4cj<Hd3p_VqeiW?rmfgFNN9<@$RdcxWn6pG%N$1U}Sq&(>g`u>NCt8
zO5@ticP{iH#WjPx(89jA7>A&^<soR{@axHc04l5Gz-Hy`Me8Y;IinJWFf6rr{t>B%
zy<-r8q>_jI!1z=yV97C!M$TJ@xI&4t`kCt}lkqlmcC4lI#}&*;G4&N>W~XD@6~i_f
zCtACjLj=Qi<X@xP@XV1^dQNKe_C6lcB|~vs2I4D~!pA+0LJ_aQLfa(YyetMLroHcl
zf=ab~kh#=1uF@|fb1x783k;QA%<jQ$aBMplnv-D4n^pZ}@Ue$nByJalQvO}s9kMc2
znRg9LtI}lBKOMdsWXt_>P9yB5#83OZ5iB)mw2_A$sHVKuzs2s5`<q5V!G--q7H16m
z>kFo&h%UjyH&kY!(wgEYXUoq&OvI7#{57y){&Ld8_h`W*-*`xv@iH%q<Eq^6k%Q@g
z_V=t?5-_eZOxltY%zR(EKyDd~j{AoMHy8rU|5TXn(~+0SBG5B%tVTlTFLJL%8IBC*
zI9-CA(OwQBjxV6hg1pBrZ?}(c@Z9Cs@4k*naiy2`3#{2t3=!Og35{m{r1z`sTAr$>
zE|9a#p>jOA!CR2jHT1KQcM#h`d)bjT*=cr<HDw$+;jVcD?S1wS1IbGP*e(_6g4+nu
z{?0{Uwi~^epUa41*=SZ7t-;8o#IVWIis&)UgUI?Z?_=kpYwu94nLXRSnodL}a{)^(
z1aK0qzlqcBMZQCqo@U-t=yLMKY|JAS_@1Ug`ub5Y<XmjN^}7}gM^#*C&AYh750H-K
zOd{V%s_4jRQoG}O2CM_YN8VDbjRBJl(;bL8{~?Mp&LXM^3mImb1Z)p*jsri}%!4cI
z2(ekS3i3!`22CV1=GW*?F#VJSPI&ewgnegO*YDh5_mkApiBz*_TH-aB-!rHT6oZkv
ziZ?ZiSWf0Ew$sojK4F_Ta0$cf>azqig%P$tZ`H?oa(@f0Jqr;7(i96>Kl-(kerr{e
zaCq}qa$NGfJ@4PXQc|@|Pn1211k$LX;1#{3`iL{n{beSSEo<xUj^_+H4}sYL@6drG
zgg23N9k8M^`67qfz4Fvw2s!Gj{R8FAe}X<xw0<g3+~PT{U!*}`$`2*NL`i|)k^vb1
zWuWEY9gz0U*NGx0BdfdziSf~P*u+y}#Pci#V;vG#$o;cbC|_88E#5E3vtoR?A%6H_
zOmqXgVBbVCz;1D|up1(0nQLTV)B=-!vQ*5YEmyVqF>#X3#n>5HI<76(`tOo!+;H!a
z%jSIz`Tvd{P3;)+2)VIUiS|ZoWcG{)sk?SYMw~E_cX;dV(Ww`6I@*Ikq=ejytT5oh
zgPz|)^+|SIUAV*w&N=pxkeVK-j+H0};Rd<@tS(7kGU{j#P1R*oI$dK_r6nT5Ph3i<
z6dE(2(&GI6b*pO8y*#}xPw)b6`$p;Ezh_f2&DaCZnVw)Ph5PV(CefTve@p&VK;3HB
z;n1@M#k8<ShRy@TGw@}s>9|$iV4tF3Y6BwCyDlFlk|fI7_V+(}q!i0G-lc+7W%A&m
zl>ww~Hb@g(V!-&HtzQ{|TSe{H4aXLX5D)3sTFx(2+TKc+{mh;hReTqsHo%JYu|SA5
zjb13+u9%YBbfz!f_<J4&)nG*YyA%Ew<(#fo+B4GXCFNLp<n0$sMR#v4G30&2qTQ@2
zM{(#}z1OR7f?TF%{9X~8>CIlf%4Pq%!!cz~|KCSlxp<IQotCtCe}Tg6zOe;ky~c%6
zU*y)5kQ}%>*@3I_x)jKG&E04Wps4E@-!%+No2<)NbPnCteERBA#~4B(<=w~IG~~4O
z=dFuvQMq8*c<j^KYQ=?b9S$<HCxx|>{!`U+WGKTl)A=|IG0L#C5vGZpP5%&vG{dsr
z7Ic=L&)AorjKvb3_D#8OaIds=jjsIDGf(+-Ht&n*yhawHp>VCgDgiwHQ?N~1gMgax
z-A-ZR9yai~p9dfYJc8u#V27HCzy|b%G`eD0d%&4pW*RMOUi%Na*kxRhPzDE{#+!pk
zZOvk*sJHqR5puL2KKmu@y|0!lBSF!&Eg}vXoL^Fik7V4D4A?WikHj)gN;y^XGI706
zLMXLj{W7E&6~%C}DV_Ne<>n$-!&Uj$U#Q#UCH9sMb=o#slSp}Fl4*9vQ|n!Kle%%e
zU0$l88j7F>AkXaSH#*s|0l?};!Y89Biu1o4q3h2>9wc(JkoBB$iR}<>V}3Tbk;`9X
z7OWd~$2^uxdsI>Yr$m>zyvZGElB95pyI`OJ3xp(ksWj=LFM<@!;>bIsu>)e49pN8H
z5Lmxn5YZ);yb&5H)}yF6HF0x^mC~loSnK{mxU)CNqFFS<Z~2c8d$jnC8efQ8Rk@<=
z9dCR7aT+7Uhem@s8eV-kz%K50BcYs~nmB#4NrhUzosht**y}uzaDu!o^==MLb5j3b
z&Y_0e_^C<5jTHDYz0vVatNQs3KpSpnd2j$EeMPAyhjny^a?*B%*N8ks3wwaVIkFS4
zX$KZc07c`yxJabwbDgG0(cut6ITJ<Y1w~&^#~zn9Oy3WT#~nJ!#lw%U19A1H(8x01
zYPHP#`$)c~_sy91-QZ_)v;)KE=uS8JJY95QH1DPk5ub=MlS?Ty8W_WjYsvX}uc%kK
zER-2Guf`Aa$5$}^_oP1u6GK|SH1E_$Drm~bZpyG$`%!UPD{^>W4K#NIS>m^m`$dCg
z9C~`2OO3l2DKhJH?=Jg7Cbh&1^#V1!%iWhyO-a2aW#cfU<j<!BVe_G_^cp6{!nIA~
zIo8U{q`>lvur*vx#n|SYq1WL-cBiJV^fn_WhcMHG<ZEX}<=<#0*X2uW)gb)Jtu8(-
zxR(tTtQR`B#~8oGW8fT}Jo}Pb@|PdMFw{wcD*c+V83KA)z*^ICmJO^6BOWGO6c~xe
z_jb1K7w%yX8zC)Lxpi?5?_rbk499oNWr`S-<CSjI<e%xmiDtltym};BDx=ugl5gbq
z%aY)5E&{;mUh7B4VfePYE;fk#a#D`ThgPLtg&b#&q!go_ixRwC32Z8;PT%u-8hVx4
zmG@ZHwW?70GfHN7{?xZIs61KIt{J_e2@u~SD?wTXbC~yYh>OzKmkxp+5Ih9hfbUwm
z48f&8D!thqg*3LXtD1sCH<Ed+Gxi^-gHS;rSmM!<-@O`h<&c=1HDQ9FWSutv&3C}$
zOdXXzDJAbOnvdYUAUuksS&cysprjvntxsMr+Nj=&dhTxTS1~Lxx)9sbd!)L*{3@{y
zCnFOo-`7^eKl=u^XV^DOS)|va-T3BlXO_MIjJi$#hr{!*e?jDz-L5e9SAjD(NcYZr
zAv*uxJOD%RvCI&-@2pW&bt{&F=^?{pxG=JNFs1l*q@!z?2?Q(wz0P}an(IFJ=`imq
z7#=GhkD~dzPL(Wa#N;~4&VsTZl-s>x4V%=9shMeoWCmq2`b45s3hD5^V6B-J|2Tck
zUR7xRY%Gh@cNA6CXMsbUGx^;#yiAB7MC?K=&uB%#bza_Jua?@vRe){rqkT^`YTq{G
zZ|UqWRB2Kh`4yB~bY0#9?oKm9r@%5METjLvZOz+<be4UowvQ=*Z!|3pGS9!}BTZMZ
z6z0W(#&E4Z;PD0N({3TM#rqAj%Upt<$mjbkSb#BbTa!wjg;r^*-^O(*Zrmgam~>Je
zIJa&!Y4_ch{^<CnL?qbb%sqaVp+Ln~+G50G*r&{UZG09`xWi|(@XU#QuO&|{<moag
z(&1*h_`K^jXvq5u3i9)?VFyJclU?);wAqWt6BxfDWmEq$qA7yDaxmAhtD~;YY3q<y
z0z)#I07-ij?;Htfi5~)HQ{FZ>6+LpP7QjV5W#s}dZByV;iF&rOam~%<Oeo*YBk8AS
zcD2tAjgPRbr`+N&K8<apM-@A9amr9R1jslBYLSYF`Bm*YBkW)Aof_-0*L1d+GrgZd
zrA_Hgc%;Y8sYMslfw|K7?^6T{c0M&qH)&tL>7=BiKxE-En)EZ2i=6kk4-n-8nnwHY
z9*%8#w_8QFYcYma&}3x)VdQeV<^oF!wsmHqcHeN+NsIn-*P2^%KQohQ#kUIi#QfEE
zwE7EY+TZ<)Ru{`Vp{^5HiLduY#PjadUk41C#*n}U&wH+v(6nkyOE*0<hrqGykZL~^
z$c$&RDU!Z4az>~mqUzrCdxy`t=59P1oOf{_F+#Wb*6-4kcV|4nW21w*OpeI4rPYTh
z5p^jh(r5HHeg1JWkwWf!3qNQWRi%afuM0r)2k-YEaO+syE>OD@6JdC)@@u{l9P#r&
zkJWTw-4ZMUU~=xAMQ0oU9OC*k*KstX7E)A*+ds^yFQiWQ@IS(Tf=3be$N%xu%wrQK
zTPw9LV`Dnh%bQ`6iFkh(6wCKjc?oomj48qeY%KL@nZF9G`+4+8zFxXoh*nkUE0xL{
znLN1;;zN`@uoFaBTq|2s(Z?0g5W=0gR^xkq0rwv&`?`@=v|rs*5tqibQj%-#zFgRd
zJC-4-?K#d+gdr-bx~1RzjEKX$e;q<))Vm{@AtmwKTCQg5pKz+DkI9GrHBr3tb-r!D
ziJ9f(vh>i%`i4s=>^5-bCQw@}2e7?+lG^ccp94l!f^{)}zTC*=>SAq}&(8C=YhPll
z-JPnhwQ+79=+oJfdLm7^YWl{@QTEgT!Qht&D#(KouQQ@i^Stq30Z+*K(7}24FIcbT
zn)wjNq}A~J0>spRpaVVVD;Hpjv^nWU-)l-BH0<W1g*jNsZ5xV!2Lu{zQ;Mr~-W1ty
z;?~gs>Dz_;ryF?+$NMD$XDA+-s!gzzm@4i$ntYDsiesFn&kiT$)B3o-s|j4UmP~$q
z-1SB$g*;Y{2=o>n!#RuSYs@+}#0b~N<rwX*5@yB!!&8}8<4YA}HJ|pgqc!*D8#6Z=
zndp+ElOmQZ#ltPz^{1;F5qhzGwc6c&=ROSuT5PhSn3oQtNe*i0*0&4cHIr9m=(uvj
z>42ft33zagYby`2YGi?>$>qt_YfkP*v3wFjZEOn!hN0#A3y;?ggL$Nzi6c(lzzLpx
zcy8NbQ#V>2PfmucJe@WRm}=3Fr%!oxx^?0Ech+yNsFUyddt+X8ZkE&Uzf0_h<$~X=
zlISoguwo+t(hjCoa(C%Z73baFqt+yVmP?XTrdWMt5UC^s-Wme6+IfZmBN&~vnT#%N
zu&7XF(Yi+g$@no&`G~|RmBF;WL=hE|J!~|>Miw4x{G?KF%+;WS2u!n@cPkBzn5y5i
z*WBm&f$5pYF2C&Fk*9u_5NmXt`(n(ajljTwPd9VF8B6#Xhogo+AC|#?w<a&)<yq(D
zMAoZo`$oFSW^Bf-xeQ@zePM<>HZf%TelGmp;BIgmIop2MY0+Un1FOFq9Rfa4TRiT$
zrZGk;ik&w8ENl{TvQP=-PAOcQXcZ-XdpMbGIg_wGCFgN<-@}B(^WN)m(?!hKY4lcy
zW5r29zw(5Hcg+i+WezPXl<D47k0z7h624`{C*I&xxW5nf7EyxW2Z!$&G3*@+(d~n`
zOrRdO>8lpFZy~s7s!}4^@Ni+SV6jsT%BK7Vr9ehmEy)V#P&+^0)iJFHJ|Xx1Nol-g
z3CGAzQ**o?b0mnqANN-j+i6uZcJo1f&rXEjSTqUQ;4)ZfVaE-aLW|(CE|7OKMpSE9
z#^fzk3E%tWVhxA+w|2CihK5z6;iO`;^I_)wHVMhFr~B`8-Qmz3)L{F`|BP&&7V7pc
zhuoK6pM&9&W#|Z-WyGmO1zz#SdhSSvN^j;#V8lN19orw<@0PqXe-&^8-=jLCEi@k%
zF748{JfU@dHqoLCGgfqsbrBmo+6asr6-qmq6<hlBe)8|4vrYKFKhB)GBNI+C6Nb!;
zJn&1S$1KwXHZpo`0GN&ln*nE`+$Xp82HW#V)tI0a-9!uw$M0+U@xaeGfhBA8dQ_-s
z%V#@u_&(q|KLbcP&RIm=>u<CrI1GMjGV|JHF~}E4wcHmmicxK9aLg1gFBfmaq9V6Y
zd}X>nyPVWFMI?IQ|5tI|%7=Qk`meOdOw0ReYM(nV?f}<~#XsSXS<MxC%Xw`-kMT;I
zJB#Ws+03}vQz(oz_Wl#bj(bQ_!a_>}$M#TAS-Ub9LAfZZ>ZBv>cMl4Ye%)RvCrQoi
zj)o#w07eToc|hn4I=86mD$>!r(q@9JkP@EF_6Nd06j^UJ$yNW*l9t?=<sE*qd&2d}
zIPP^~)Y*?e3V4RA4BUw<oq8b(#ep;SI!MER4Q+q^iNUBz69VK4r8RAxP<c9A-R_G{
z)Z14-DuxTs27{mxC@LQx@nQ~amGu8d8v;KWXm?a@zmwub(ar~*<|SV9NQN}?@U9OJ
z-C<o>Cn|WrrNIsOx+K+rkvCNU=sp8m={}1GBuAKk*&PYvyJm(Jkbj)R%O*+S>K;5q
zegC{SIUjCwy7HCwHn_^n18qS9^?1v6{6<}8Z+Jda__ELWUHmdhle_@*>td+myG+6<
zhq}w`GYUvydlh$4DdU}sg759A?_<0N;E2)E<BXTSX9Ri&{P>$Y^9y^Snmg<2j5213
zCmf1<X^;X;gzDg=C?Q9fqz_~CMD|CjkiLbzXMA+XnKP%H<#KC=-DYDW+9_Ed=Cbk=
zE8{NtsSclcoX{tszOVa_axo|Ayd@}HMKqnf;H#$&3aWmI@x}04e~f|sgmQhZ?PC$$
zZhF5=<n~8kgA4}dA?+`X2Y&Ad+kl_rbrmDe7}ET-U%mq3Mj8wh48f_J7;%FRUzbAI
zP+V()scOfgAiir_x3S*?2A2Eh;CZ3tm793Ag!4^TX9OuJJegr_N|`d&tNMMw<~NA1
z@(8<fjZ;?ucp+O=XV^ld`Edg)*4#seSY4+^9JSHn!uz9qg+VlL&-F;yZT8q=p`=|H
z!>1W0P+9?=His8)n%~|boy7!1&QLA&Z}$Q|`-K?jU2B9Qd<URXMKgtnji5nJ8mvG{
zqlQO-yWRcrFI>j=s$0HoJS@n#*Fx^XIEi_F`Mygi7F#vXI_(9TL`UN5R;Jls)}|Ln
z6%7xO0(7QbCRCGlDdCVk3<0;jq5RRG+q;q1)k}pZZ@_GjxEm=@)=az;jMLT5jsq@p
zD$7RLj2DhKyP&X;-J`VuVtBm?QGCE{Vb{6Gk=BP~7T$OOfnm1{Fc-Wz-{kz0ACP88
zUNZ;<-5mE48d23=UU_XM*Vjsq!B)z?*~347aZ(Fzi*fS&YDZkYMGco-Rcpe^)F%Td
zp8H`{afMZsZ04g|Pt{}P8=#eaS_{~b$Hmz4d11)-7AO9ccHRwT9z731L4)PwAn@OC
z!e>aAwbCOcg<G+%GpFkYsYTmY$+=^<WWc?y;5d)#>WuE2gr<^1D!@XIHe@@8GcIiC
z<=B<yeTgl?h54Y_x_2b8NYcUit#QlL*Y}IeDq_8A3?J3wP{(3Zmq<wWS;45vZp-&*
z#owLo%b<k4oOe9=U@*De@>`n_!qh9_lb&x4<*oj?NZvMNnU^FaTk1hE(UB|S%^FhQ
zB-o~A(HWZEal2PRrWrzk@I+MfbkYa?uYM<~N@HI3M;r2Nco>2Fp?K>F;EJHbx<T_J
zugCzP6g3MkG9*^A#FJwI+=IWR@>PxGd~w-lKJR!bIbkI?RQSYCu)1)Za-l|kO}09*
zQ8iDM-=06O3N=$?`Y|pmEJo9r9Ca+-{RPH%mVE5oMjgi@8;(gavj6a(t~KG%uY}u3
zjgs=<#~rtDGx)_E3nThPR=p#X1YvMDht6mOG{T@2B&!y=6pTIhS6=r6YRif^*qk1B
zbPI(>gs}fd(^)vu_5XkTT-`a{)6Fp5HB5IiGtHRJt2Qy+Om{b%W?`7=o^Ga_%l#eu
ze1HFedz|xrzvA(HK6PL9801^b3%?f2>Yo$0-SNsCXs&&E?<#9NslQG{QYf<FHPv0*
z8awLmH~8!{MH0a?8*U@~Ax;cDz_wIJL03KVM?)hCaNci~nyv92pj|StW_PGYLd)^_
zrxD0QB3~s*LNu5ZJe^RNRLuodi)tj#y)1umOGJ`kg*ID7KPXTm)$QaqMWR)lQt;-9
zcTQ8Jik5a`BY1G#w8w=GbC}$T?d;{r4(|BPK0%H4QF!a*%g5R7Et8+JF&+}}ZxK+3
zUJ74AeY-{Tscd=5-WYZ*f4s~t3csUVt3A9{@zq193iDFIfC^Oi%H5N63|}lem2ia;
zpXum_sh+YpzbKTQmk!z&IOP-CqDeb7T7A;*^TaGtFm?HDo#h7G2ietAO2a%*%o?=0
z_esce2}z!4ASjbbI~{}%vNJh>>${GD%)`bZfquju(B+oBqa%uKC0H5pC+NIj3~oI{
z*@2Rq@LW&>8{7EGV(lE<&ea|+w)bb08s0SvszdFj-&b#m=)IfYgJ0!><M_k&OXQhg
zW%qN~0sH$LmXG6RB`__$BgF8}Rh`eH5qcbaIyUDndUf(x>FwKRxlz70-N4~9PXFEs
zTAm-vuw5!|*u{inq!|pxJlPLBuoi@Z>%%LC>)cOxJd8Q2=!e_`mHs6A@ccm(kymCZ
zrKjGteb@dU4meI7mm#r#ru&XOi%u!7hYDL<5NNu7-CYxSXTjj`tM1#N2k+EKoUNfy
zT#0K7aTjRGzK^!MI3|4e2;6q>`%Jilp@ZfV9J1AMG?KrSp?hrNS00?}(4!6XPRR&u
zpasbx|BaK5c#cd69NeI*=SrwD17MCgIEDK0m?`%&m`ex;$VHH{zwG2skU&T{#<%v!
zOFUAZJ68U7UuLAVhd#X~(_t&#GZ|-=?av`D+Ic<mF#b7&p5ecZknQzGZuWZz_;*lv
z?``VE)cAJf&HYV!_r?US8`g>$(Zjn>(1EH6Ic^_}kp%;Inm5_s?nd<vS1WJbvVuZw
zZ2q{5hJy~SRX9t>*3jRoN|`)$J02&IoVHXakz#Ai;NBwv|M!Z!r=U}BBL(!AAxO!y
zLs4<T<TFs`tjYy|z@OtoH%P{J-|n%m6;1`67t);U+7BFME+uc-_CI<fX}kjB4Zs7M
z2m!~Umlt?rDQS(m?bgc$Y?^st^yt0KT;oa~(j4r>P(V70m|v$aowyFf4wK<perj7^
z6B8gb$VYzBo3`K%TA4mJ8hDgXj$0oZY5KU~8#q1hIGFAY!UBIq0qD2-v5;>CSY=%g
zVZ&jPe9FYjGZ#VHrki&T_<v1AvwvNfo^q(P?}pmwB92gfe|pYXe@-db7VUg1u6Sj}
zbfkGKz6Ce@%?b_<wo22F8;zSlZzE9jxN>dxlM93+<JT*OF#wGRxZK{8mY&LgX1Zo#
zmx`<Efx=+);;$_@Yqk}@(n~#(Evoo9A`_93K$bc@De`7y1gH`fl!XFQ+FE*tI8qgr
zmg{yn@xRb>BGt}+z?Zwj;jTCM7bye&yoCCoSI9K*b;7r2-9ev_ddPp(%AJv<L0=>D
z2(0Gz&V0a7aIv}%BFjN1sfjZhvAk9b`$13aO_sw^oOGwG=n!bUIaJ5J{*1mhF0Ep_
z%x4Mm(2WFnxk5kId^}=NX$SsI_Mp(=60VN{#d|PEeB@=DHXk0>b_kH)7{JnaF13XJ
zi^get(B}WCxZmYRE1(Dl-mSg8(rsIdAnN=R1H@t*b<n>n*)+b#TN#a1QDBT|=`fZu
zt2Ccx*J&)jFKRaTW!g~CKjn~L755K*JF-`HJnuDi_!i%r3e)3X6LdtTc25%spvIve
z!qwHco842!6Vv17M$Bt(usEVZjt*|sDcCOW=~AAre84%{lL-s(s4^XDJQg!+%P!;-
zz4-iHe=|;@yO=^hW?5bl)_I}6U#+H^RbgtEq%g(#B`0|qVqtRwH@wVmK+hEMxINz`
zM~XKsnL~i8Ym0(FJUhCU2s^0kf?v?7K|mc+DZn5I1#!wk0L<0ADmc|7X>5s}g~P#F
z#<w>-8YRjtz+1lYNvY8rG*-#lKs}8Mm&%wjlP$Cq8I~F0t{-%>b>7amfwtLy9hyDu
z+C;>R&fXulS43%9j5Sp*IZV15`YB^l#Xp-Y3*5yWE4lq|;6q7d17F8o`4;5fF(9kP
zGcKBEw{E?~oBDJp$b2S<?P7syq=8K9S}g1C`1y|zW^!TtVhR70Yz*?IMi__Pw<D@m
zAAFAEca!$dB4@Sgw!XSrQtHkx6`NhV9AS~4D4)!a{&-D9f9u7r97(kSpZnYW8TiR1
zF)(23NH?queFXtPkAexqk~J$p_-C7eNg8iQax#7$xCe5O8q2ghdxGMM1^5KJEOi)8
zfDuBB<eaZTLD7eLVxWW#@9D?Ab`BlAuchd3n4Y>(s+ys-FQv&_%T)<pyr~mP4G*p0
zaUm&~j%5rh`e{iKSwV9!`vb|mc*-=WD&Qnwu_!^Ilu;hg#{Hb=+0hnT@y9VG#V?c2
zkfH3~_y+0u+J~TH#2z;hT9>B`(kMPDruz8`&q>_Ephx?+%h&WH+YFmX!gN|;`m1Lc
zgzwkZ0Kf|3`%7JbdJsKj^0wAT%(Udhe)O!wcH&mT((kYhC#*h2Mqlg9H?_ZMM!v4*
z(>dKBdc%l%T-(S(PS}C^dW}bJz8{WC$JOC6oMSy=^f&QiD#+VfP}eKfW&&1>EeLp^
zJ1oLaH@)<SZt6$3n}~sz!fA;DI+S&wunLUZ69dzcImxS^AZb)+3X}wiTmHll$=WLP
zi$%9)LMW(~lZQGvQ@(Um*|3%=bf~09DDJu}Cuwh!lU64-7D;``J73nrS;NRn<5tzO
zc1;d9;Fj}T(VJK%_7kNxS0AkOBGzg4?9E%{t7eL{K)3(*Aq5v&iM&6~f)9KE?&g{k
z!yjN7GgBA0d67ylgvE~}`>q`B6+NcTyc5-=^b&wR^A!9tv`c|p;@>6#J3pp>h>;R%
z9(2}pqi8waHCA)X1um2pA{+69SZRCX8F0n_3aak5RhTk2&NqZFZTL>+sJHJg)CWj;
z#N}O?)s3W0YTLSmtikIqEYhxnAjXoCzUM5~U@|sn^IVML-PrUQDDqzpymM-(*m7v8
z<8lVRTxuXWKEh+@KKPPw*gJ{JL`2=Uv^_=5Ywpt)N=wGA&I2kr2<F~rL<a~^dl{pU
zd}cP?HEb^K<PHso2|^l9e0mi*c1`9wq+fUp>fXJ0#O+eM^29nzC4{JHTvik@g#=Dd
z72Wdb)tr&yQRNrkIo(gML*6Z4qTAnnMAw{}INd4xdN9{)Q%vG_8t|BIEJgd4MUNTp
zx0yuhv?lQb?v4fV`DR>{bx##q`cWjil<{1*<UV|yt4g28d$pFt%vgfserojMqc`r5
zS1NlG0#Ar;UQtk_NUYPuY@M5vS^lf<p1CvtN;5u`5F?9cK{?{|$7y+6;625v%#m>g
zTET(o0IV@Uq9xE|#cwm`bM#*V_(;6PHmGfo2IG7~fYjl!L*|sisZGuLDn@4jAIAbL
zEkQXiJ~2Y9*1$ikIq&FE{;;`83q^>9<n1eO@wNpDkbGRhG+RLD$|Zt&kX4>IB3X(_
zRniSxJBr5C?H~^(3{L8r*kU?JtAz1GoHrwkT>dr`=0<jfmT=xNz(NipKXVD|{rq|1
zTlcvHKD#^`)2^@e!s)-S(G0mi=p8c6!*4e9Th#qfhK*8nA(nmDE%Zzav+1=a${@*U
zc*mFg8eN>Bp-tBpT{&kH2%rBxRl#k0)BdpUxpz(Cx#5>jfHE?4`fg+XDJd&m6)ENl
zvT3Ez&yXuy4ci5yq*e0^p?*f=Afvkqrv@2e?3}ps=f#DGoz*n&JD0H^X#V2ar3SxD
z0?Q6$ob=zIIu~@n?FXKDze~}qs%G_sw<`W~5|sxhS%FH?UocLFhJwKA#C!b*wvgAM
z$)LICgW+8`sMmw=^7H4vyvS10W1^Z<syd6=5fyGIj$9fKq2E-D)>UtEQaX#tqaV!F
z(0xaJ{LMel^qZN%)%}FrAYnIt{h1Y6LW-{oUPiNvIh;B`oS&st?)txxGbJ{2kQYrX
z94-17{uK3|y_Bn7DCZBNh0{#HZD18#m-egcVpohnb+(kvkzh?+k<3sQ9S2%xAjv1<
zyZtk#3jMulSZPgv>%qd_jkYK;d)1}SzDvl^Ao?;%HuD8(^NPdK&HneUc&A~>XiXZ=
zaQJqNTCATQLLRf1?IuF}F<6Pxqe41756B~tp^i0!XPAz0;FpFv+5hInzkZh&xZd16
zpR!Nc!fywXWws8onPqQtRIO4YnF`gcp2#3((_-^;EfeZB^lwPlN}ne!bKCDcP^6s5
z84~iV3kmR2T$mGn>N0o6@9o0%{iT9QGX_n+dxPo`ciU(ODhT^d-aT@xpg6!y=;H3K
zPDJ(il^)QMN3xp~9;k`d)B{D7SmUgPt6WUMxR#lu7yK26W}FDR6gSgOd+9ndw^`Hn
z_U}T!Har;W<Rq5u!^=hrOPkNJ?vGNh`vtJ~r(`a)Zi+9Pym2UQufT*#f8#Fz7<%x?
zryuSuetE}?{1U#Aq@L*A<2pHuAVq@+7H-YYzKtR|J}xKG8DU8VfXNrP;-UUGoLCYl
z$6(0QZnzBzkb@NBk0Aw@py>T<%t5~KOQ&|g5#cKS8Apr2Z4Ks)r+fFtLqvL0;00!}
zp=16Gk4i={@~<YP=ha)XJA*f?9?|Tv0yE=k2a>Uox#k}g<h<)8|NCrWk#I1)w@xKi
zKIoW0a8|zeo;6r4aUh%@+I+vVy%+4jyzt;^<mo(iKzhwVh03nM-GVqYh#DZXQf6A#
zv0(5Sne;59Y(nrjtcyBe<gBpGTt#dnzsJauGa6IuB$21%8(%jrtXtyXBl0!4nF;=c
z0LmRgFiE&*bBmbLi4a+stHW>L`~QeFx<89mZT>fIovENUtp#*a0CQh|u#Ik=F8iNh
z`w9ZM)tiK`%<l3dF>nT@Y9lYAXUF?C6DHCkZz7J9DKcpDOdZ2MQ#=hpyMHx`mct}w
zg>4-aWS4*Z^?VxQD;E8Y!dN)yNjwNZ2ky1}SKK(_f<OK?_iw3bkRC!&_Y;a0c^ot$
z_zWu2!vhH3^4W|`tnk;^FeG2V3@oK^9X>qA{G+3$Q+Kd)HyUG{6XR?{NW$>uO6x<-
z$fWGO(0M8K2P*Xu#n?cS#rInoiX+C$3uQ2&5aBz}8DTV#?sg><x9+oSsWt#!!q{N8
z65SHVrZ%w0^X9l@57y5+J-e4VR7?o*r@iPI0;~50x-pF$(4_7WbVQ&9Z30Pqz-?6#
z$pzS|3dDec7)ly@mL$+~VFG)nLVo_aliVDZo9`cc2vSw0u9i#M!e992o3&b<R@8G`
zSo29Y<UZK-qHy{s_T#mkIN5uZ=QywC2jV9-d}#g;30m2F`xk~pV`?-U0+04b0IZMT
zh#teanf+@$zRS@U=EiJq)FuQQezoGecVv1wvCI)K7!sHW(3Eu>=g+^m4?WZzX&-AK
z_!>0R@$^fJg4l^xb~U^vh=1u5!O{Xdoq`j`MLzon%-yWLAh2_15#Lr-SF#`h7snt6
z+o3cF+R%>Vs$X%Nb)=t~w7HjWF{j=({v!H#mik!k0V#85b2hof>Q0Zi&rMRZ5!k_i
z{R_0&l)C$bqC5B%Ip1VMMYvp$=eJ1TnB1}W5GIyM{2Rts^5EoO@tv?d<uO_qE^!3x
zvw?ZnekdNjDGijo=7#bH9&_6?+jYn%&}TEAukW_vEsxb$@^4ISMz03D54c`ouB60W
zSwW^erlG-3k41D0h9D~m*9D_qtIY%%IX+RqQtwyU1?!{+!TgJ)msC6&&gXp_>1pIC
z6IT~}vYRR^KI5??PJ$m~{wAKjnp7P3uR9!~i*x*`omA^}bJJuqHnZP-L;$a?Owk4m
zm|o3ASTj7Bt(63S(`*N4_9B)s>PDCnHy4i$UBM@S$JGL(1&Am8*Q-CUQx0hHIJA}}
zE9HWkV;K;J!si`2?AA)!P3us7og_w!pz=nkx`a2F45QmDkx4&+P2Cj5fPssOSNldV
zpZ<KYZ-dy>&3cUMM`5^Ax$0uXVUQCp#p?borl)kuc49=aS=00f?(zSf7O_d)t*DiJ
z!uEk+8Ip)86@w1w(C^W&N*S<S*USVJvyAp_jF+HaBpMvVl~?{5a=HpQmuab07eM=P
zpn-c=e3nS4SD%tEeZv^s;^3EuM~2zeW_D!~jZ<KTd+oHyT#J=p^M`1@XP@9>E-RaY
z2UxR#mc>J2EYg4>^*ArUV3sMR1_9l+Oo8jRbhzw{?MgAITs??4Qy_i-_uyS`IKg=B
zUwuJQ7CzwC<2<Cn5L31t0L6Y5??^#vn8=uJ4xh|`_E_z1dw(z*7pwH+mFc@CS9)H`
z%Y~`^JM(>$GMDwI>%;>_IYzfHFn3A5;Jfbqkfpf&8<BVWYH$y^jDFO*cQ?o7ztVOl
zq%p`FYb6wP@Rg55q)i1LJps5z(Y#!I@0tD7uQtmHV)Z$8BsK#t2GK>7H<1GTndVJA
zS=`q@AATK=@RgXwyrGwvV4-mDCK&NXD(=_v<vtVX4bVtM@#ryNap}H!EcIcH#b+F6
zMHHayBM?kaE(Oq{C<N+XerJMzA0(er0(u4hV_Q6s<OB8%RW6QKzusy+iknPxC4+J{
zNPF(Cj!K__s^8pj)_wrYM40FAC9}(SA__P+DAv%KvQk-?tm)S*rQ*VzF}-V3Do|Um
z=;yHG8TGzHE<k_4)U>oyGLHKCux=STGm;?&q2}0;?)!IyUjMGU#asPFbBK6QdsYtk
zGgt`IK@pKc=D*j<Y=YCA-FROQ$=G;(m-iN5TF18i)~nUls8TCWxngeE&aD;WKzgSl
z`@}i0{iyOp?C58+r3rDrPcqceTbU-Q=L#J8Ea9AizaP}aa@@V8<WDX9#%aU>x)|jd
zsGlY5!F)CcQzEOYZWw^Mx7hy@)q?rw_<~iq3%>ytK|~r83QvtvwSn1X#@z`C*s<*M
z&;g=fNm3yImpyrQssUlXrm$0LvgBJ04|Q4`#?u@iYwYqk<nD)G4w_S${<dTkN>)#d
zmqy&f64eNw{eFDg%X1_55je~Ge?>%{z~#ux<5d@RVQQe0r4*rRh!w(pdIEJsuPe!~
zv+k<o4|SRjT0L<$<^PPrKD)4W;OyL9d&qH;u5wSF!rO``rt<mQ!YGO}wjz3k`46|%
zI^~V4#QKzOHdxYbGH2zj{UKudFq0<nri#gvB9asZq0fITAIv#`*@KK;=h3tGcH$`U
zposf1YZELbQYR@*I&&VoylV+##3v4!D|xLw)JSGzn?h0i*oN$kHaZPo24M{)qyUVd
zF><a7NlFj+)9ELux(bRw%JjZ+0oF1fXHX>__G<+BF_{Dg+R-`AaLbubcWOK`@ae|o
zWVWTV`fUw8M7wHKuPo>mIWgS0GfM4-od=e`V;OkoH2-25VX2PQ<@8pOBXyH>&!gCF
z93lBP23@%b4MnKQ1;lR5&48Lr7lFHnZ2_N<siq1{(KA)E9@T4{nV^Fown$PzG|$g)
zrpXC7a*+U8X@e6hgJ<#EGtZj}MbNWf^|nq-HI9<3ZbCU|S~cSSsE?rA2`_6=<)}}$
zikTZn3rT374VmrotO$$Ox-3DKZM<!6f~vb-5Ci@;dPqfb;E;q1aED#HMq)x?!b67p
z&(jpTHCD<V$x;?>x>Seh7sXb<4RV+JF2Q1V5Y6{L;pQf4lJB;#>ItCDW(0`PaJV6Q
zd_{edW4R^EH#}cUeeI-#p7!1OwEXZ6=Y-Mtm2y)_okfbs(-L`ELPa^1FpWUmse`d=
zafK)}5l~~&^g?QpkLqF$_Wr*!wTtf)tTVvdbDNd&&rfAQ0*SU9p@c(bpd3gNOuPpV
zRZa_O_EzuZ6Wem7e#L9(Wp-Yh^mPE41FGc1<RO7eBZG;3mrJB1)k!(#NEgL^iNuS!
zv3u2kF>s5r*!F9jG!=Xd{a(rNZ3CO{9wNw)WOC4|1vHqxJKM55fVAtvcN8U!8dEMv
z%>8=>YMNn0pieFc#UI-&@LeNsF2W6u)dlC`!cBoUtCH4i(!hi`c**1v;31xpPojnW
zC>dZ0psV|~$rG}OZ9A%VGZCynd}o09<uqGn+-_3QgX&pzdmdv2+sy2BBhk{!Uuik3
zCC@hRMj2jiQ>FZ`7|&k&yJ-bILwSvXW}t=?FX?);vCp9+Dt)b516L8tCjM5@Q#nt<
zh)Ai`>KtnEt994YBHoJ!M`SSFUA(PYbrXm1=F^H_*~f?>>vo6T(1-Ir$&}@#v$(=G
zySFZ_H$nI3_Lv}OZ5kD)Ia3NuR@fZxI^cZ*O~W5~V)+6}<n$$f9G%|QjbdvO;0Ks>
zu8jqNIq6_BHBJ~W{THCl<=4}5IdVME*-_ogO#}Dyol0N*pw-I+etgewOyanz918cS
zt$DsOtLU3$aWb|ZI`fPL`iE029fk5bt*DsG_iKDlo8Iu2xQYYP{}rF!D(SIwu;W}R
z#94D@#vaJYE-*550!1B-MJ-z84O$+!HIM!*L;v7H>+V1uj*>;AKuUcgp%Ora-dK3q
zJlfgA?I}$Z*A^+NCG~mP;`~p=6teV4mh&wY>>V=CD+j7|Xn)Po;LgK}mvA!8AF7Ak
z#ge3jcL@Oo#1+KA1b;F0@TpeCYZn*p6gJl>sdl4xlz^}_?XmsiBeBpXK2ZN|qvMtr
z1*&oAzy|?fM!p_c0i6oaR<K@?0QG>yaTuOga1>)mH^WZqfPsq-LeUJpn@=nT-$}W;
zqAfnGn9ih3QStUlE*!_se12X|xcotzIuuiKeL)Nh?G1z_8DAu(tY+rCIAE=>5C6X-
zqP>3)sZTLY@4{9JMEF^91!!qCh#Ws!T!GGsMtKY0lCZ*hRu!W5Rj5%vhVSCT<?DXY
zkDq!tefT8BCPv!gduCvF^QBm_;eghoBX80S#a{CMUXvn^L;F5m6SpR2?2p1!j`Kom
z`Ti0^)!jMO8r<h=5@KE>1^<vt`3{GA^^C*iS^t#1{N5Aj`M}{6ht&x_lnwB+cobra
zPU;&sX<?1&f%`-j?SmgN6jT3lR$$W1YM-EMsIvY~EI@x$d9jmSw9KawZc9~5i<hE*
z1!xuNg6UuBk_hbW^Wf$CHiY;S?I>|Z<@9k*?vij#NG{VftQSG(5T<bJ7Avge^z{W_
zx;d3M2(aPI%9o$_;u&Tu1tO9c%l=ny2!`QGEQS(xH&!SjQYRNe=E$M5j24{AP><DI
zA&DNZrU%i~{$pXI6N}l3ggTY?+m*X$dcQAgu#JRJNilo*Nwe^=q|pnMH>U5CMeLW~
zhVr~@Sud$tA8`*Dw;R?+FpyBgybHT&r8?;xFWI%J{OYOCYFNbmaD_yA)&VAzUwOWV
z3-<UBK4GA}wQRrnivVWMStwAvTS>u}23lnWq*+T?zmc=yT=u&HZ;$~ny$>JMyrqIk
z+%jOgyE?bHL;Eb&`xJ0l!NUrK{fwuZh|OcQf104@{Yt+8+Lb2IuGz0F%Ep=1_T^b7
zfl{Dbn~uAWZr3DDSQn06YoOQtl%*Q>$@EPfj9VH)!OP%sh{AXajSXS1Y(6Ztmu&Zr
zA7l<-HAgd{al2`Log{MM5>==x@ZVY{>xW`@JMPGlMCkU+jZn|07Qutz$}`AIyWK4h
zKekGbgy~~Z6F572L3q<Cu93vAX?bvl?$4mf%})l=7SOw)(yz6zFjJX}RZX-#*7|)}
z9GFT^5Sg58e#85>-?Z47dj}^+b(7B{XzVioQYlcM{G;~S0A)BAe5)jfoCJ%q7JG~A
zOo>n_f5RtW7``8Phy~A$LjdR1sR5tGagIwv0k@v>&``ur)}^5Boe;PJI2o}2#5E0Y
zxDZL?$e@E++o1d=Pe9KxwJ*2v7?ho9n(2UuT_dGcLJ07i+poZ)>i*ReI2>#ik%~rz
z3a6gL*o4q?72H>L*~jKh{;lBJV|EZ&hRk4*H8?l76+0ycvaBv8%yb<A_QClI27BH;
zT(6afYDk2~*Mm9sX5=fMR7amhe9otbH9H_8aBu!mm)^R<UxK!|4i+5VQMUaD^DK#}
z!IJ`@xYKW?<`67YzuWPAL8dGkVWMOu+?Z*5(5~!Gh48fR^<mnrs$zc3e${j8Gv{xV
z2Km|;Y~%V7s$8ABbEun|WM3KcT}P&Jy?$e1{z_3CGfVqIHm-X^zS>dBgL{*CCtWMe
z{Pna53-^<>6aJe|<he#<zXLxRMU=V?h7l!$q3@@%e~IC_9N9zVHt1kDXU*f5t0UMK
zoURefMfN^RF=;%mU8_Z&`RBmGcz)N<&D0NAkU+V+Vb3}z3BHAZtG_3p=x{O#@^Pi2
zXy#AmU~z<X8ZMUfuVquJ_dC1qEb%{Ta762>hWd8fd7jBc77<ftcjA3jmJ`6|TCdti
ztB?vPV{v6nwAibS)}c$+G3Jy*pXZYjcNSpM3LD{QJH@&vyqMQ1-JIIoFPz*b-Z7uJ
zcqhgA-)zpm^@TJ9e&Y%aP(}cA1UFWojVp1(*ix?$z`bw~r}<5RNnf4KZb(#J1t}+Y
zPh=1G%sArkdhlAMj%4!^omWDS^Jge|@*1krNlg@7Zm!u&bWtZa)HmYkT~;g$oi^*U
zy+#e6h2wiZUH_8x=CTKsEs2CE3hVhvr>5n42^_d{`iIIul5%b8rY*Q7>P^su673uk
z*pc#sYjU-j8dIxqNMxc#YNduJT!jV=q4wij2nZ@Z)H6j5!*9S`j^D}v479WXnZ*q-
zIvVVVR#u>@aJ#_O(nrb}{Wz2c1}%J*N(%T8q%bnHCNSyS)#1s=RbYN7h^%qUDl@y*
zMJ90lnO4SZPgKfoM<<VFKaU_UACR!n*BhXPDL1_nOwLEV7TcVi%oQCh=C{lEobQ=k
zb>>}V$0<i0k;5Vz63tX&O+WD<OR@3m;k;42*8^zxR+>yYQ)zbbg8w@NdY90{Cg3yT
zpU)b%&&VYiRiLj&BpU}On$$k^==kSmNYZUa-y2N_eV>u)R2L9e%$e!SaZtG?VeH6n
zOS}K0d;c1_og;e7zk~{(Npr>;*{YL_X_-AvZ%*jhNcm~NYoC{s7ccJm8uK?#_OwSY
zreYZE#pp`Q`#0}ctmIei#!f!h1is77{IxV6$`sT(KYV64kC9Rt>*I9;2ean_NkAhN
zW0giCU`P#YdlL3Kz6_?gQUVuU*yOmBM4;OOf(+I*S`Psd&+z-3&CMi0h}1#^c_m3*
z!m>Bl`Ojxw@E@jy1#q#0-WSNUx_L-bOq6Li6%_jJS?-y?Us@1nFzK*|fUAuOx96$P
z_GP}Ezwb1R*OA1oM{SQ#+8!m8?YvG=r=JwG$9?)hfxwMe?vTuZQ4ypnJwZHn3rijc
z_%)fBVS*t`+@zBio&HH`!DR6jG>d!~)^Gk@Q7*WTb9m-J8*GpoCfU8CMuix3L+#8O
znN5n5AO`4AG+)HBfwv1a@TYz%0Z~VzrT99Al#rg|m{IQ%whjaU=bC*g_Be{*Dq;{?
zhp|-S|7FxaS{G^7!drnwuPUp?*t;WHt|rhvDtKPK=TvCwtUOy=nb;no5p6d|U7*`q
zQ<?O^Fy#z`GC<r{Vkenc*IVIgVQ0DC{c!>0F;1q01=ZpPBRk{3Wa%t5;BC0Jg@*)k
z-e<jbLBZn+&|!?TmzxI_ezsfHyhaOcBxXl$0(Y<J{*TiYaK?ZH<dDMB(6QQD$ro+-
z9BbvCoE6Z%5iUa0q0ift#kaH9DcD*M`mUZ6Rt9BiXydL{%T^@D9{cvDZ{lUwc>!{i
z#mx7DNjo(DvS)^wF8BJox}wq3->U3?RPH_3?zn)yw+-r2kgFH}szvD#q$G}yaG#}g
z#TLvR9ZY5wU;l3`wIm|MvT{OWDNY@qw(T#{Bhrt;0+ZhS`LXA0Z(q_r7%%VG-Qq~l
z6>C10NBR9_<mjHpNla$my!W%?b=s;97M7fb!*#uIRPtb&raWUrL)PCfFq140kp_Qm
zWc&;QGS8f>Dx~@+^a3@|Hwrit6pMbuq^1mqxaw-yevdu~MQe+Z4C_ms7sBn^o0j;=
zC&fPqDN|QrPXat6;Ujv#cPAT@!K5$JQuW-LlpJsgWZFn15L#UUdRKr!Ge;e}Mmde}
z3k2?ea-7=D#I!Szg7eQdGXvWS^6Hy#H6O6Q2W><K85;-y3!&D@Z$%>Y+RuvvCg;}^
z^t6A?#-PW|2Q!zNdQj~|zBW#1>$(j@=XF8wpciQ~X%TrRx6nKmLC#_gnc91qRm`os
zZ;WoGO^pUd(k#T(Ng%RCw;uA@#?-LQ1hQ*6xJ7Hy!gqQ9?F8_=?;e)|(9#)0^tZ=i
z$f#fz&onDn4<-&4nzewUbq-E#*MozlYNOt5*INv;(evE-o%yECJv{1x^ke<lm``ta
zZWzALB#2?Msw=oDa`W%;67}jzve%#Ti|(^1qc4aJ3RJvqxDd2l5T~(6aAse)1sBeG
z`0GBs<AIXDr)@@Lr?*SympmSFtFyarwz7&a9%(=AEpU7@6EzFVwPs4q_9jemz1W*N
zAJ~FZzReai9TFw@ct(tNFyQ&qz0|Hdaok~APu1~3vb*DF0!Oa6-}8E+9+=?pvVK)_
zrup?0=x=>-zA=zQ^xFE@5*!Y{Uxu}t)L(jeCFN9owuSY7qAlM)!N6plz^<^(P{J2A
zohBqOt+5wFJ&+AF(2bb6KX@4tI}Vq9Ul1Oo;Y7~aY?Qb0#%8=I2Q|dVCb>mB2<J|}
zd4m6M>z~|*Gtqx%uGGrG+IRX8W61!FiC(r0_>uZuU8Y{0%tKe&^Z|NlQ-u1&>uc<o
z!Wwv%&!@f*SmW|zdC^|}Pby;Y@X?xk6q;sZw`cqp9@<h_Nl)Tb{5Q1jzuv{F^Bu4Z
zg|WS^q;gwVSo!gmG;ZOF4=ze6sXHt+BYF2L|EM2S-NAFYAC4q*ByqN|M?!^T;L~Rv
z!rs@%s~~ML;orHmyetVeHMEM$FET?x%C}bX;o9mJgnI42x}6%aK+LE>lVZ{M=h6wZ
zPy)o2%Y%g^iNTOw?tR}*wN#Qp7-Pn_*~RUW&X=3>*rFG5>5Rp8aytSo7G7U{#*ESm
zUx?+mBvG{~EfQv!VWLmQ$?~s6WRZv%4)Cv*V-gwCnO-dra&_mS2lIbx*+<F1HN{Q0
z0lvho+{GL79~5G~EZlz*A~A~fAI9n}x{?RA;X3+y0Kc5$xE7S=40uTide8erJ)r=Y
z8z*uay1w1UWxvJHwV1jQAdj<doHTip^!4<88<i7XWKDMd=>EP(mhk#BZswQ1!EKdE
z%93a(eL;k<g`6gVNRsdGT}GNPqmDo$$we0=9=tzyyzvv$9k?*b!p{($pZGx_wHn%J
zGvXG1uJaWPiC+bw)PbYi>I#rE4E2Jm7$bp0$bXhA_z!b5;gW`;-<<(iLjwZT+zbq;
z`y^m=E3evb^ou*tHyS($zVME3&C7pPZ7?%CU}k9{i)6&8dKp+g<FH4LZxJH1Vw*xQ
z!+9IXg&B<pF-!*L8HT=c=W)her_a@U&U=Wb{*h*NMkzRJu8%=o6{s+a9Ni({h*g|H
zcK-Okp$ZN7w3vfjaH-8UNdfIBWM~z3y5#rBR4=&CoK2T9rVhEHA-r>C#&HCKv%tB%
zCnQ9t+gqb^<W10we;C$c22efcTT@hBWV_CN73c0(wy;Awh|et;7`VuU6H@Oe#R-Hi
z?S8PFOPpizecw^OCHn4z>b*)7<~p0q5(X{2eF!G0cu~9f(o{Af{G~){hW8n0rzpv&
zTheCu&Sw%4vx5M%sdMOggqq%H3!~HKBSB1jh3joQ<LND*$z@0>h!CA;YVZ&esH&;b
zGwAZ?8mUz7Q=pF<Ss4UOw{=|@eYMsYdzr+@Q(V3Dr23Bsxe}SpRF@1z+-_-cR2#{c
zPG4B$H0+WG^H!=|a<KMp%Fb1qzj=`AResICI|RW6ml=$of~_!XM<MnEQ4(5Y_n|Vo
zMtN~+OUSKS=lL&t-h0VhqC8ir!m9{t)~GUkacNgvP(wA*iVdS?wafjfD;rC%D`q_N
zeisKPd@w&T<hC}O$gaG(r8?ZM*s<Q9jpI@^(1lZKJBG3Bpan;BH)+4L=tu~TE5Cz!
zky`)v-QFat2sl?;{4!x;af2j)GNi?bMtjOs`O~o8vPHb_G2m25#|A~Qq61tCV!ZB^
z2Ef(O3(#3nR(K&X4)Tcvvn>cNv0dBL_8Daoqf<CVVX6*6s38BAY5qm>F0RFJ^(S|Z
zc_SlQB*X&1VkYWM*YrGt_|!moSTBK1+}L@^m{)PJ0FrZ={QKXxQcXX!jPBlBAK1`{
zl}Pksr()icPaPNMHV<PDhW+n}k(7ewQm|W>?(mEIU)&5Tz!vhBQxnnYq2zX^0ak6_
zID0MJp4phOI0)U3P93xDb5r|A#4|s~dSH)=GnCD#Z?swJ*7DC2j-zw~37tM>fi7jq
z!Z9^(WyK*rPcheB)aOjqhc6t`FgRbdntFo1|Kv+y%gcPQl69M`?`$k)q|q|H7t8mc
zbyoH!9HI>LxFd;R!a=OlELY5j0F-(}gXn=kXo5EICL_v%gJ4C*cLnXgp3%H2C2<|_
z@iPCVS7X}#qZb|@9)yyM(8DdB)%gPHWe(mM&68o$2%q*QUdMKru2Uln!tYcxXxkK|
zR;a9^tr9cw_jl64hu_Q>`Nnx}W_$c`W;+K>tF{vknGYf&=wYule~lD^ku5BA-J7~J
z?>~!*m`?N(o60TWv)rT-*T>)e>v68l);|P!`=^>r@7nfwG@y9%)~=&kZ+99Xq1A@h
zwme`iPooEFc(e4o+d_U7;Ue`<2NIg4dH<@RV*EQ3n2PDpZ1+V%qr;ZyEBgCX+`dx$
z%a=?zKQxb2Xz+UL8kN7q-)QLspnMheW1nPPa(3M&TezRrw;f5(#EZ>fow_AD!}3gl
zZx{X+w9I6hAbh04d4Xh1DRB%Qu(t;1UQ$E>T2Hka2%zD9U8!lUl!)o$;F5lDmyhSh
z7Au1i>Hcqdi879A5}NY9Ns?pGro>LT6z0Z(Hk!`NZ-ln*iu_SE^z_(;T+4Q$=oKX`
zMD8AXeJC+#4s_Y`uRh4&S^Bv8GjgDv$kSyvmf7fVV!7Ek82vZbIimTG??I^pO)MRe
zdxENf7s|k8H5Uw{x^eUi_1|OeYWU7`K`aK-nL=mn#hb(EsJhuxnO3s(1v-7RR(cP2
z!Kr)uiW*e;*(rOkCiIR-(T`c00Uz=iVj{BLiVU4wl;z9B+29_tBZZYm&LEi|z6Hjn
z&3K<~DBXVAEM_3g2J@GZfeqTdPl2G*<7)`5G(hHP$$2pF-r+y6o;L9AEIQ&DL{9d5
zAUjAeFW-CzwKAVr;j15LN=QIst7}O8@rWK`%8yXZy<kBBg1o0S1s=|Vra_h+c+LBl
z<&md&UCzafa=dpT!%`*@5%zons?}`F0cGDXo)c0O)+h*dyaN_rb*zqo#@h<i=<kVf
z{&$*t%}R>)MlKr;hjbf2o?rrly;IYTGI?wW79FbzUAWWmCQ0c|K+h#)S%~a)FRx^R
z*U$cC0eDIH0M3Ux)}qs&Tx~7M7ZiLYf69dWL$MbaNZA!%95hurXnO|7iGQS`kXY^&
zFZ?7mY^B|dQ%;PQ_a4$d1V3X1{P!}xNwRNhufBq4IHalScvilt8|BWbE*{oEa14$B
z$<#p*)nI{XdCCQwFM*4JH!Q|L9Xn9k2!~f)D9@a(r4{myvY<}=J}F?0H^b`S)~-eP
zE0Fy;o&<Q#c5f?d9$ThVjfpq+a3dB7Gumezih6$;R1xUj{PTU+mrKReHy32L4Mp=)
zx&z^d(M1VVHvfBH1i==d*9zJXABs+VR(t3nifx(VO)P<Lp~ZBjXbw;FuW`s+4i^l+
z=i=r#oJI^jHPjcIPVjt@x-X_R7w%7=?`|+1)YgE;<4H1mPKC4my+~<M!?}`M9dW5e
z+yy_?kINH`-4sTMH_fpDdE9JuK*;Cxweoh%c%0-zde_74zCGma?s%~gol9NApW+{N
ztEw23@9O|>?4RVa-z0AfC6&4P9V)Mjtye*tVF}Qk14KAo7TZ!zb45G`leY&nc>ELM
zo>IO!*s^%FFGi1Dpxlo=|26ev){zxm{O7#yOFHoJkmJNN<18Plt=dhDF7nZ4OucKx
zWs-)?accwMKZ-|fW%B6gYyH{Hj${i;{AYmU`YcEz+^a$6rsSfVP@^1eS7ZR=3rZ-}
z+1J_rt?r<RFYD~bFQfg3Tb^bS-~A*9IQAF^Ywi-VX5;7Di!yD*uCY^-#VLY@q;nWM
z9AJ{h;%R}6L+;98jGdVXvea5J*j~>Hv`s(S%5LnbbiAJxT6nZT#o2w-yXq;O;1e|-
z=Z<fXN=XbQc7rdF>FTSlXX+^5Jo(>O27L8GF4Q#H+0`(785LVWjfJ@HO05%38oOV4
z;Y`pq(9B0Gp~wrbkdIB&ug2POs=;AL)KJYDz56{TgKxA`YQoKd@xKpZS?KOVf3SoQ
zm=&|Xh6C&uJQ31pZ=bxm=B|Mo&DLJ^A573nW1e!;9dR8|vrGG>2fnDj*6RGZV<Iu8
zW9Qiq`TNf3FLBqWrQUDPNY5QK(Mn{=D`3?R*>&6vp)fcHNkXEBf#a(=bQ;hZ4s7JM
zv;k;!@wVy&s4sc;q+J1&zbT-9emLdU&6E8MI0nD^p!MDGxw_?9ieczf=cl!WK1I`G
zuW8@No9<+FX&~jT7{9&@Y1U=(UACb(2N)sKW~FIiSugw&Q!x|+t>TM+Dba@x9wu^M
zQhZZ`+LtZi4^)OLdh?p(vt!u%H8HYduy(&tcQMC~|NVZ!f6J3o*U$IM5}_BEU~1pl
zIJ!QR{Sy&FMt`qQt<x4nJLAV4za#&IB*qiT*Qz&Xog7ZPmvvsXl+S#!7Ao{&%aC3a
z&H_SvP>~sw)uf`_x}JVAF7o&H#vPKR69q%8Le#@z>N~&gNqp9O#R;!nKHLQ1c>Z>a
zyIKTiG%Y~7HOofNE-YJ61N0$}+5{!|dea1`D`hT@77$CwF?mlCG_DB5*dJ<|b^ZMO
zSYtq9A&K79o&<;uW90${B!5(4Ajaf$T$<e;piT7rvD36s2))$}nWh#AS#nSk&~2L9
zIahsi9~KaJmO-J{<uRHAQGRatVtQR7y-iWjk$mjBZHkvgaTJd7&{}c-zZ&HT4?GtJ
zmKRhM5vZ$j_1U4JPq5<0S%KY%o(2^aMAmV%IQqb%^+n2Dd5Y8cE$(NC&#<~W7#4ja
zCo{TdiUwPWa_~^Z=z97{(HY(r)VA+$^MoG$5{g!9^cUc4lK+tYZtodq-0`O@BrBuw
z%fCYK+E|F3Fmm4Yd6u5A+5E-+%r4j%d^?<Md})37&aVmyZD#J^5zx?BmIx%9;}7TU
zuDb@KnMwd+@CJFq>5l=m?GidhmR0QgD`kN^tNH%WFDWeY`RgmKMy}dz9>tb?X=K}l
zUtv(U2d;KzH-0A49Oynrc8$7Sbg!PJ@z)0}!cloa<g1s!-_i<9-Js!X%MP@>!zyPs
z+y5LOj-E(dyW>j0vOAj!x`n(g)zB=cx!&RLpMUT`d|mfI(W`MnR^%hP%E!_DdJgsb
z3{^(0lvnkQ>@agI8^e_1h^VmH2%lry@L>*G!({P{@BMW9gE7m(^`4%(%hGboh3oos
z2kS;%vX{o2I>V-DShpU+PtBc1F*&edLImd>q@el{3=xu-kgqlZj`5)#1A`5EHk}0C
zjE}_Zj?@!JNRJ&X@i(`lNV`@KJ|`Jy@4#;g#?XWQ<_DYfyvibWPhIlM#0J9K(?kW-
zYL_n<`eje0vu76(^1AmDH;~`#Exa#|+>fZLiMB9dzHQPXe6`%dLj9B5K5!KvWkvbO
zO<ulTHvX*la^%-#(w{R&hd{i>|K>;npJ0mt=@MIu@X#UvWTM?UzyLcIOl8zQ`jzq9
zTpxlegfgoZjq9pk#u48OH#<ksyAPKLUS|BD@eG6org>}Na`HwtBWT^92RPl>Cg@Wk
zUrZ=t4DU-5?x}X^mAdMe)gWK&%7{*kuv0sV9~hlw6yCLPByP!Cnp}yFz*|v#&XVF}
zY(O6NmA+kU+K*=w9d~iBAUy17S`HlpYz=xAevc2ymQL??39JyjRL&oxTKpKVZoDxc
zHeRH`Scw60k$G!D*6{0iW<0Lk94jF_#}7%=sLQSzzg2f)<m8r($2t7GL&_)ay3Qfj
zMiNGK6Gs9S{LZ+GGEbwqZv2XfW5>LU2aC5WbIrl>{PA{Ol7}7Rf(EjP%A)YU1a)db
zH_1=|cBG0PE(*l5Fy|pJ&0*%ZA*KET)<p0k`Ua1V$nA2SQ-J>uE!j!R$~=R9Jv<CT
zBRtz-xI$L#)^qv!V(7|nD*7;Ew}GGh9lLS0L2=ZWi^uD(SS!k^JBi7K_Fnjq>vo|G
z_@Cck0M#qtf5PW$vOx<Q1Zduc_fb#?VSpMKstQ&N#pac?BoO!j-<f&#uC25T0pq_2
z8W<TFU?DpzAocraRheA)6X}a@-$oc2^KeSvPj3;pv+2kg5dZE?p`W6n+%_)jPp^wP
z<r3y8cP|(#$jeP2ZXn4qm#9U0k7;0<j;;{svO>a#v~%#&cJ_)cwn6Q`$=LN$a415~
z-FAQQr5hI1_ZeUeShV1oM8{ocawuxuU_<H5w^NX^0X-z`huU?<4vT7v3#aQl6^~{~
zIrChtx9Ps_e)PTXAY0c+BehO@-6MR;mtt#WlD-|qEalPkhKM~X=Okd%Y^^c?uk(4A
z910{-BC-%Qa(pKS6??oJPk@iKfbk;}3HeSLE&u^xSTNj8QQseodSfL-ZHh+#Ls83J
zA46&rBtQ;K*Sh`hR@}6a$f`Es2N@0yO$B`?C3@b%aXBly{LzNm@u&jEJ)L(VmL{Ai
z1C+ln%4qH^#`3~p<9be7&DLlgfzAdIt*@sf5kmw95Es%CLJGK$9$FJ>%`BnzV>AEn
zbvRQe68skAM{NWF`x@Xh!wR7aQu>Qs_g&!L>!-*%ToK|y`931rrEIuNt!rs7Gk<qA
zeHr&=UB$S|^Nf<DJT^^Le9}d1LKX9Yk0TphtaSf4U<%ftM|~}JDT;kGmKVd=Yc^<;
zlh*d^_KG}(f?@x13B-S3@NLx*P%ESv3jwogSrkh^4}@|94loP~ra}n?Yf>ukZDHIZ
zsh>Z13&oh$GVSsg{tj6I9<!&xC#4N|U%^4HxGVDsGM_q*ipDtpGm$z4o6DtBqgU~T
zBQ23Cu_gn82XRMnM!jmI@+og34cZl^P;XQ&5}o|^A_&SWr@M2~&x`HJo^H1?4E68i
z3$Im^41ayPmbnKzlTvw|EmNQ?pErCXlR@@R57!<;1DmG)biLSVxMN;Q2+@-2@gROk
zy6#TZMwrg%&wA^6Uki)D_kPQwY=jf{1C}$$Ou^e(Rqn&R*azJvza&Am)9~p(41-!q
z2|lwz?JuoCRmL$6UEh4)!Pmw-Jqa|$_Y=D)<GTrb%nh5|xsic)Ks*yb&yt|jcS47A
zs5Jhd8TDu`kjGhMO_-vtH$e>avi~QKv5@#$fKr0TIaP)AVp%vI87$uQqQOP%WfHS`
zgF2=dC>GhYbea`oO~izf2$af^&ZOdPr8-bgWh9oM7ZQb`7Tag|n{m$GD$30%DlpDa
zUt)e>yfWgOUsi9<H!4G6MiF|JqE*#p5Ycd`Sc_aOXKZKRufo>Cy!)#twevz59bqT;
z%bnU26yRTB^A}KiIOM&Kp;0|vTM_Os*zSQUk!f&3Sb}7t@Qaatwm=ev*}4C|e)w8M
zl2e2pEyRLK>z}wCm3lhZt>@3tk-DsUgfN<KUtBa!i{_@(YLc?m|D!{jaI&0&;)|K~
zrnYX-5LKsiSQL%?RNzQ(%gIhl`*2BcU;a{FS6usVINzsAKB5ek?~3h@Ib%xAnoDxl
z4)!-(%+=q2!}oh1S+Wbt+p10mJp|D05QOY{e6Dioji2@d=_wGmLq;1V_^)}MH6fs)
z#vglF$E+kdcTLaZ&Do3ij)Zz~=#2Q&olR7LQraOJpB#aHvwR$47BW4G#$RDpxK~D#
z_K=J$&7Yk5G0$1|V(hB<qAr16^;6kLCa)h7!KYYJ>_GYV;tSJ2c<LLH#Oc`AF9U;Y
z{V?mAfd5TMNnH1?u2$!k12IvTJ6`nt;TY{%vd0ZWPfw&`&|Sq&*|%KBSAeB&l2k}B
zoFFrF&-0c9(+X{2aY{aK$&>5<DnPCLBi_ew0mFAGX5A*i$y{N%oDfA1!Hh?-f8f%D
z)MK4~7}frv`nAlI`GWSyx^H`M7tK;?-g;^Med@C%(=|BR8j41&lp;=q83?J8*&TRY
z2@8pUWL{PoB`$OrzBv~m>aj9~2U<wv<;d1X5Euh*doVzTd03wd9BqN;KY}sH<LR3K
z<L<11JXM<&4X(<7`0%^IhWJ^}2F;+-1T{@Ur72pWHE8v31rq_@{G87zk?l+sulv3d
z<VoOqT!lexpUNngDAb9pXpy^W*r;PQ_qgP?1e_+)K-z8cf`ZhE7hDjXH2H=3eXxxF
zGsfY9h9W#1OWt4`)Pmmn{YcQ$3^k<D_6HqR*c{;NxcbpTkcHo7ViZZ_t>~dR?qutz
zH*;7HDiW!aJHMso&}k~S%<<Z1%6B`hHvC8O&K9Yt3TID%s-EiH=yP0MiQ%A3Y?{$~
z=3^PY%*>9#-m_@4Ip89v$-!t0CV4;&ANiMRWpT_41xcD0$igT+U={F`9TX$~l@TA(
z`o;xdfz)32*tje@CFF4f+K^-phBa-mQr5|wg27_BcC0IDFi@&oE`$o<FybLlB~Hjn
zy|{4Rjp3mEsPtq?DOTZ|;|_Bv<FwPv;?~bFkXgpeWO(7HH+mRuyzKH!AoU4?eApi@
zG36Xj%4qzp=q(+={m5EsZk#g<#sc5f_$@^wHA3G-8_74OxvF3~3iz$oVa{f4X3;#X
zx_Y}FKW&F3CjoAh&{Odf4xcM(yKjXC`OPP!IY$KNu+;k^I+`M%QaYOuqm8QtCe^pi
zV)<HbQv0n=B=)9yrVwXJ)rcu4J&80*8iS!`bsZ~YT|EQkA)yKp^q0dim3r;#gRn(*
z1pZ~`6f}E3hyI%&A|yOz4-b-Db%>DPrDNO3vk9>9b^ybg+mcU-$)eRwq7eZz#6DyT
z%!tc!7RY;-^^UvTR{&4F&G2*vpN(zc7&EwQ+nhs?YO+Kx<F3B(GX}^b8?WXWetBK=
zK?GC<izBtE(w;tV>D1HU#!3CWxeZZF&uTMOkJrwv=cMw@HOW#)K~a78UaElJG4?#4
zjMY8NL8oNu4GD$2*TuW@Y`*TVJb>DNvkEV&e;eVTAR6k21(J+fOG8PgZn~<ipz?2k
z!D@1NTLw+~^*m<S=-n~==<oevqp6`AO@wvMJMIb$sntqI@C0NC%|qUMmWWyl;=%rk
zMRasH62JH)l%D_lank(?s^V;rTsa&~v@h^1tGb|1+TJnA$`DCdR+-oFt%9TOOA=kv
zZ}C$*3WXTUBrGdS@7z3Vn%$~@nK&%_1&1Ei_^}7+qT1wk&de-%9sRk<)-$*gOIl}t
zQaDW^Ugluqv{m&9k8bf)$+w70-Bgp@o4VAnv;5bT+Kc(<4>qD+VF{MSr>=bdHY0Jv
zRmkoUC2bm$zg5n@XFm)3>%rWr;I?f!1Ej+H*ZbirFv%gYq`%r9Ht~SxvCp_bO?KqO
z7kA&=VQ2U&9cyIhH}!7#!yVo%)+(fquHwQ6Vfj;1yo>YwE>n%5@HSxa3Ebx$EX>U*
zQ3Icl>iAt73Z`#UWDk3v&Zyn`^iFp1*j^v6?{rLt;#eSi7D9?zQjMxn$b*j^oHONk
z&VF*&HuG=u7iKe%5<WX9vTm*^yPZ30av;N3lQfoiHt(>cvg0R(2LE>zQCfA}4?79n
zR7U?rm#WkugDSu1$t+4weiq_>a#$9-*NHvMREzX-q@#>RVN~}3?>@r!VQCif2HTU}
z=D791I2|(EaAC9>oXXAb|Mmd;>vB#ZL5>$gnwFO?#gTDb`KaorjQ-P%gEErDu`1O4
z0Z%bbD)k5RP+4G>w1SsRMVrJE4hNHFCM0kwQ`vyTx8ileU|mC}(}&}!5)5LA#oj3n
zGc{9LX-PoCMXt+Va7n5^5)3*yl&&+P)9NF<-Ixm;@{f9cboW*upO28tb&=K7fxVVD
z+2^m&4)q(KKb)3(NM0d`=*&=2#kb#D1Lsw&<h(84ycGZa;@#p8rm{g5`@-uBhrY-|
zQYm%V<(ba>k;lISQ)<N<%8tk}BI|a)txo))q6_GuR?T=AH|q8Ipy3vV`;X*$OOy(V
z=oF?1NR@fUv`X9hhA|2KCNomdHnRK-GrCfs-nlLNjumJ;)Y7aS=fgTYu-7^46wp*~
zZ!*8>wSen^)Jg8wc^-HolxLW8Ep!S`Jpa_I1b`kWNaU0VJw(9k*Hn7LO2C<&B?IIS
z9(X7M1a~8NxX^-P<v!SB3beEu!cQ`eIf|7(U*46i-D{(P!3clJzvg~L3s!!Ptlr!i
zZ;I|8ko+@d>V+<$eW%_R@x6^19)s+UW78~DKM4fn`7iyVl&X6%mwXi#d+{u;{EF&p
zim)nWT4JlspV2b{(3!UR{pRH_cCY?tee3;{5gGRT+je%r8gkwL(R7tzQM7G)mhSFG
zx&)M#7LbsTF6l-(q*+>&l9mSP2I*XoMmh!QE~Ojx8(*LA_x{;qj=Ar-t}|88A-$MT
zAxcR$e6*%)HCu1Oo6I0;ZdN&*=?9Fe=f$;HBQN4bdI3qow2SK}89q!>8&8R|HN7Wl
zPWq}}8U+PCKRVZ+mikh9VlnM8ZHZ><$iM0NIeX41b~f=|-U>cU5ft+bKDIS#O6yE6
z*|#;EQnLxjAN@pxb~2!^9>}r+0#FNwnU6pv0*@ZNwc-7rW#S6rBzC|y{UyMN3a|`}
zhhqMEIZb%}i)?1pTg*)F`MHdo{lTfz1>O`@<gydV?$W}<)Q>`)eE)$+e>UN+^z%o#
z9+6Kk7F^^iJ#Kv^;uw;BJ~lsgR2(Ai!l(biQZX!yY;>59B@~kQ_e{Jd!{QPro|@RN
z_k3gCf4lRUNYB~9w)VzLqtqLbnm&^*?)MxD3c{;ER$9x(8P?bnSXzJ$_3_!+Y;GIj
zMNIs%X`DDqVvq{V9-jB|s(y&!g>#!!?I>I-EHz<BvLU9%1GSBOzRGUIK-WmRV5DW4
zh#^OMGd!ANo)j!D#U2-I?J41LTxyX`AmO0S;L~qWD)k>^wFIW3Inkd<>SdpyEUb)&
z+7cSc7W&Th^B<VLAlX8RGM~1kHIp9ig<LER`W-Ok_$SnfKT_wh(v}Kr^4B|3xkaJa
z*ZdjtRQYb|B7YRIhGX~Zcl>0>pO61YvVLH3htzrdk3)(YRBH$`t_^S%gXW>>hLEbf
zk}ge9IujgaLtPNOft36?h-O%*ec`%Bngm_bs>es2IDReid)Z)n>&HZ0np+X=nKh4_
z-)|HCc*+jEa6}wp%+)?A|0DNT7lPyd(|s<R@aA9MB}mYCq;brXL#`1MT;OSe_=Q5h
zYy(A-F>V`09tO-lSQEj7YsUR+;fu5VnKH~6^EFEEmXc-yU=I8BLM7-(beRUSL_9uv
zdV0QNf+`9v%uCD5;nvN<W*xo>2|;g@0_=>sXNjGw3tI$u7R2_H>`<vasTt2Ek43@q
z)bpitN|f!Lcz?IF$gUHg5tsARFwY+&g6Zfp^=ETp{}g8@C@jc^fA9|PuUf0?=T#6S
zIFOvX+D4y%DvQI^Oe}6j_77hJNARn0S1gH;=T+A>)*Wb|%U;hHa7P!i5`YGLsjD|_
zR$`^4NQVbpBHIBoEqxVy!@FTV_)7(8iQ9Acu7g#J57GA?mmA*VH@c^AjCBai6))nI
z8@XgSE6{@o{a#W9D)`#6hv7-$jQ-y-K{|&9sKTEb`Qj*3sdw|c_0JPu;Mi#t0&9Bz
zhh}Bdc4g72Gt<$qBj_h}VPCBfcCmML8D(l5)0t|}*LC2IIrlkkT^`MD-*`_io<d!k
z^26%bxH*6B;?pC__L_zD@5!V{?V-Z-8CD{atsT!;JV%@_r`y7*F8qEge?=DrB-Q@u
zm^vtg?kWhY{mD&beXCoSmAxU?3~L3#ERx^6n1}bxve5G;Rgg}GXeOIZ)C3sM3g$^g
z`IH2u%>xI1e6oBuf=bbWo(L7GGT0@pyriJfid7c#-M-sD{xpr#N^TtazHAw5mwDPP
z1<d+{TXO^CLDAWufjuY!mo_gEVp090M+@+}`;IG7t_8^SVYGp939I~4rNHapz;@f8
z{<4l{L5c|SdF|^_sal`-UtuM-D*Yjm`*`_=<y{)Gki_k8TWNZq@gT#9TWKC~qX#b!
zLdv(*iAA8VUNeI{x}WCa{YKoP&W>kylmA5Vr;x|ndeYQv-0g+En|*<Ldn!h*|Ggen
zRHN`dbXdaSC0JtOfqy?0NHC>)gGyy#^PUW?_>TU}BY9^pm1ShC=^={+X{GS;kEa%?
z=ShC~J3YNuT==X9(h?Ev2yTZgH`Hj!+=#sbQ$Of@%D-T~pyXEl%0FQP%)C0e?+=Be
z_PQute;YmK76!-?AoZ8Y0-WZyOp>~A!IyvUN=7(G<z5K}jPcsMSx<vpvLjNbjM$X`
zwzn;J39!xvOh9v05K>W}hUS-w<tmJQlirBy#-bUp%6-Z==l=6g^hDiY81Nab$;~|G
z{9Ew=sj#WvHkWB}U7n%z=Yfhqllr*`7k9Jo?=hNPOqai!@NEb1Di%L>Y*+ke4l09I
zOnpiuifeG?htBv2?jWur1sxpXR~uE-I&YVw+|>j{vAacw`i!V<oBRpN?Z)C;*4`Kw
z4O#H?wG4TYaZ}FBbr~!|PE+@es0y8|`cs86wMX`r$K1#1`dKEzi{?asKEZ}&xq$8t
zu8)@+^Cxg`7D_Q;$Zd&6(Y^~h1kV`_ny7A#3w=WXD@Ld$2L}!E;+Cz~CTrP^PQRME
zvVZ{^z>knSMJdKu3I`$-_~<vOuRtIffR-al#E))5Z{^6Yl%v#&!7WqTRI;Gc$ld?K
zCofF>qWb;m;fJQgaEI5fNX9=$rvm7mpUYGdJa&2SZ|u%y6Fsxbv~;}rVaJIsf?#7d
zHQwR8|6Y$_|I){K<&~59c-8+d)sE8nj%w)hm4z(M$1-)2YE4vDCWtVKm@is6SFGtZ
z6$R#W4iev55#Jpcwlc=?JLIs%X{VleWAOB2m_*s4Re6J{7vJ~qNq>4c>}&}u-W0?X
zFjs9fczQel-$XOlCiX8Fe4j3O@efa8d)-bH6pNOcH^qWwn{uOeyP@E?vD#)Mg7jDY
z#D<=esJ_Dv+>5b6ew2eVD)|%i)$O=fLCm=l-hg3efLL>lGlHZeOz=5NbrTrQQ?5NY
z@~Isu{?deX*Z#>{{%>Kc1uwT6Q^F4didUjnKQraaZG3VxoWs%FCzY-aGk>6bMaWdQ
zF~{50z<W0%nwB?Pg5pLPq$Tl0H_1wab6O^N|JPx?#+=xL%oRb`>DGFq#h{hSUG3hZ
zoPN3}eD;4f5<2H7m;Fo!UFFm9guqtRMZ(zX#&D~=blotfr|QvACX+Ru;uCsmZn>EE
zZ`u;=R`O0tQdF@APEEqH=?ysow7g{1N$wj^7-Y;kx68k)iGHT)n<V9TV(q_xb~_0N
zH24*z`XYUoa4SaM^@d>KzN;mU6VdJ)oHIW0uHX2O?S7wqYY?ZMPi6j?9i5Q}tg-_O
zh?20{$;(U6_;jJrv@P~!ol+n!(a4FV1D;8l`pOG<2e@o;OZ&Ajc#uod!ns#5fJ4YJ
zsM1U8Qg5xDpLA8D(H7RWCjD4qQ{qD+bH?m0O~?(rlBvr%N-3}NN<u3U{?U;B@(=2(
z>+|U}Jnsr-_AVoP-dDfhr16l`)9&9S%sEa<UeE1BD>|oGeq=$!m%Ngm+Bxcz@u116
zB1J*}pABo3BkFl)1NLGHeFRW_AW@$13MQh3mKr*<=bPD~g+p)YX33{5gTyyxn6s7L
zHt{l4E620^lI<wknMK-qqtR9MSzAnvUBU)EKR0R^sz7j)QEPeE$O^OFPG#tl)wONJ
zVuokOL^A%5;Xjo=LE9k;6;3g#(-K?H6n(jVB-kRnN`hOj8H;8%W57m7RW8D<^mf6b
zn?bLkZn(t*i$I>MIR9noQ+nXK{f|GcEc!U58X`AhD2(G}sAmVgC9N;;S`m0`UwsB`
z`vVHpJ}oURAveN7F&_yC4eO~ae%k9ZR*4Wsg`NKVW^FJPQgh$=Q1y5dd%9{8rY=i0
zpj!ngM^O#n$P=%Po4a)Mon_}j(wbsu7dS*Vcsc#4`Ykvf3z`zfMe|hkY51SnT;5I>
z%$~`rb;9h!My`E0Rn=Y%p;I{A@j}A2;S(Nd)fcKX{{_5P(adtA{eC1hhWVhH+Sed2
z4ihF6INmzdyfy-K8|;PKdyH(>aET>0k@&C=1#&3Fu;5?4iTGoGb3dFzTxl@EV>Ry6
z3BlaBly(25_yb;NKbrY1U2%M0qw4floWee7{?QK*B%3VK+thW;5yvF?D)UBaRg72J
zaQMpOS>S#2UcX(uxAs(fjnC~lTdPTVA#Pw5D5h#kvtYcvo$Kt#N(9bFU;HTt#Z|4_
zZFUI+P+j!nvB#eh1_xoeIwD9o(P)r)V83qz3CfQO4F&tLA2XPvb<`1@GnG_O2DE^l
z3pl8#<g<0t;~h6N=llF@){%9LjGvx7haprOnnb<j%NY03#F6u8HKqiWn!08(bn1$K
z_C47+VXgh#PQl!6r27QI`_MQPdv^2Bcx>;>-(x1Yg~xR0n^G_OT<{a*8N?Kj<}^Po
zbr4hicgT;4``3}W=N_j7nVDd2x<?g&PHS}r5rp#zuYHZ-GbKHl>YfIoS4IvdC%>0?
zR!Qr-dZ}KvVn^=`bxzZnIna(E^E8Z;bR?c%XT6#f&D1>>>ny%*@7toVZN}nSOPgjR
z@d*pk`>-`*p>D45s=xn<*OwKt`tg(4aa^ZF!HfGf-D5ca*cbf=M$|P%+5$t{1kpsf
zFQI`MSjAf7q`1(*CPmYmKz~qHgn}nX1PaFmS2ENbFTj;`A|EJ@Wm%a|u)-V#`zk{p
zCuy6B`tq@ra)GJupI;Wru#QA0s&RfLe#RME<xA)B!Y0(@d_2zPb3LpglAA2=%iJu|
zY2BPX!^E3hIZNVPsokX;{xxKtr*>9i7DiY{^B?jM+?SC)o=zIROB6>|?SbS>IhK;@
z1*~m0*3amY24jTO|3gf#he9|IGwxy=*OsG6p`RkZrp`x#<-7@y$(Cxh)G+v7BN;{1
zuS#Usk>LUQ@8*!GXw`8qLDrMsSyu?o_iy)fy=j0syr@n6(J=SG?*^qeZAlAfjscYN
z=p&^L<Zi~Z!<Y?yN|Gmq+df3S2&GhSHpvvk!YN*y&Dsyczw>UuXZD3qCHHv;#qXN~
z;Q<R{Cy$2w(MxdQsN3z4blXsMGwON*V4^t^$q3^!q6wn>P3P50pu`f?5GAf&6RfpL
zNl$*RS7`3{(v3i}W6)!JNF6yR3FY;<#c{P+uFNTq>WV#g@%CpJ+Wgkb{C>4i$gp3|
z-B+Jo;$pmy>~R_llKGS~vSqwo7;{#?YK`jL$n%&F^500zz3D_&7%?96KSucMGSJyG
zjsbU7uJK(gwXY3V+_fZg-YKk#Y_qt(!1<>_N&0RO=!?1h7~9~g1*{JNty5rXE!_+x
zbPn1_K|wQoW{^6pEl88UbAr{$s9wJ2F&;H{#`RHkzh$?%q%tT`lI@PtiA}UZKe$|J
zf$V$q&eZrx0?$`Ywci(VyEOG9cli9O%Q2tV+bKKC`!z5La8Y$bw&5dMVo&b_e-2s#
zmIkgs^?N>;ZmM<FJ{S?gc@A4c{~ufextx-$K?MShn_Zcf%2T+F!`C5koN(8vSbB?b
z{0zH_QPrn;Q?<51KgGz3wP5*ng#;Jjo_4rRc!U(_eUsXR@68<qaRV60?4C`B>JrMR
zPNNakgelG|j3T@HRv%QECpMsYv>I@OHIwIQ^|M;-KQ`0&MT#qxsO2(=^wZEEWvTvm
zTvs_hJ?{)aSibGc0|5)AEr?E8J0oY|m<VQ!y?V0ajm>fxpTYE{Ies|$w4l7fnigN|
zyw||30SA_(7yUbgB2u(rE6bbeicjaYOSc^G{8&mU8crweyFt}n9Xn*Hc09hiJ+u|;
z^811Z5%hh`>hS!DMyeuDoLzG57s4|hB^|Fa;B!pC&+9?c@n6zydEACgU}>nPLUbA6
zj0OX<>pxV$-`QYyfpv34KzdgkX$Nls2Gk2c`~9m~qwz290={zgCa!2unkoXGiUL_#
zt~W_Fgd#fi#XeeTPlzmo&m*#oHnHtkLef-PqS8Nl*?E&E;=SG+NK+T504)l27(u1o
z!m#6Bnf<NA*`!Fy*_5oPkju^K6l^B@A;i*8RmQ)=65%!aE3PA(Ero=d+-bK2HrzIM
z)&-u_|BG5cL&6vhIOt*IQpSK<++>3QJJzZla1Y7E#ZJo2JyGcn$sxpgPCMx(&VHYG
z?>Otfb~X@&T8IseNtx4%r@)o93NU^9;vn!Xbe;IXrGJi`{#!+KXbPtxm3N`$)gN7M
zf11^6gED7UJHY?herS#I1XxXBe1#cU_V%{s?RWDYI0xRacmbn`4g#GqK6pw&HCJ!s
z%+H*UPYh}@3arCm91O0&JG1~+cVcLK@e^;LnqU=lcDSMmBSG5%&}}p94M6QpKzZeo
z>9WrOS6WB<;}08&^tjYLgUCR(P-VhV&QJ^~n{GCek@CqgtuL)?jL&72j?g4u2*&xk
zd^j#mORX>To=LMr-IMwu5dGBgZHR@0te6jRPRTA#aXZ?6^|TR{d|Jbt%YHNL`ho66
zu{ys*@a<)=pKVo@AjOd+{QbWJi?^B&O?g~PKmHbY(rbbLR`~_bmU2-qiL<z*HVTjZ
z_sHp<Ym;jkq97_OFDiD?y!E+67ld~E7ih72*Dts}X>7>Xujh_dY~2ryVN}hL=z6oO
z*Y8@knB5QF+u_TzWtYu*<lBcRH$DC3lH|Vl>!L>~JxyG#V(Bc$A`zLt4WZ<z<nW)1
zKe<HQNn<njH@sVL``WX;xxoY$4fD%zaZnT;@#<St-8MV$E2#o*h`bstxNH()YiAG&
zpEd@ujlLUVaDz{$Us{YuG$D8dKX9!BC+t(oqk>Kj1Cd;0;z&6l8g_TR#yVzB+4e5(
z)&beHSlY?0s1ZZZWl*}&xeRY)?6Zd@%ecDlOc#3vPtpy?iJN`ZsTX~)I=gl@OVDHV
z#R#K4oyTeYTl{;sCsb-V?8OJ93NNV3TkBvk^P?36E;d=nno0Oa_ZGifvSyX}YtMGH
zK9w2x{`^-Ogy!X}4y*Fti3a~6w>KhM)wirmEt)szD<iFVK3%x%m|C}Sgp<S?(iXQ*
zF*G!PKC5fX;3G0pWRaddjPoR|QpCV-Mo(N_xn^GD59?<*f=F6;agAty7K!cN)2c$k
z*F2zVNjc8z6PkKzvZb8a%gTk@O4Aa#raG8WBoW|u4Oe)&Zn5+3xZL3<r9KQr{c{o@
zvp4$BrlF)+8FaiYxB!^NCh*e7XiILBGs&<lenV^C!4ii6fi5m0h~IqSTPZ<kUUb5W
zqB!zkmg4WVx1^=qW7&AA<yx|5PsY3Xi{Gw4zO*aCTPb~8eopE}MKNC=pJ!Lhs_!pc
zd~@bPJdCohk5xBuT1Y3$yQPo*=Bb4;|NBj12bv%s$A$H(Wyf1t{f%N6!JP=QX?u8<
zX5<c4$tBW16cK^M>3-;07-DsAc5~R0mI_8{H)M8e!%K=&G!Op+@tF1v?h%EN80t71
zPBq3-+8oshcT!QQ9F8V}HQ~AeoY{u5KzhqGT@V`2Mr<sro!XnMu<}Ez=!w5spOyKS
z<Gv}oSadZMhGu5v$K-GC=5)?Zl2}l=pdk|sR637fO!XTkDQ-^T8vbmh{JERX$BQaA
zgo`?^S|VTV6h+_-#xXxA!=TJTtJym-tst^5QY41DXS~(Dz5^FTLu5MJ#wH9P0yqxv
zgs0@{UXSrV3&SfFhT)ST;60l}FUbkBco#ZUo&jSWL-CJJjf(zIXZl9jp47xJ7DG$p
zS&g@_{QK69235Agk@=&>C6Zt?#W<%q)g1!Xs#XZTl%T4AWS;+E&R&SzxAZDB@+V4G
z_YK%~KmLJ_kl4n~W^5MtcT18-4RNRz$-R;|u>9LAzxy?L$*|KKx9U0XNSQ?b&ge$Q
zR0kOr`8;Y3tY=AFrQvj?53+h{VF!~%u3z+p!uwQn1-6MgbVKdF|F+9g*IP*%{FT{x
zTy9c_GhGjPJ~42t5v{a;%cqKf^c}u&vH|LMIJJOMEXD%BJHe7Eh!OI)IE^BBi<7O@
zsNDmxDhf<@0~EC1pv6p=yk1SeoUJgp5Nxe?#sP33pdZO}p`V;b7{BkF2X6<6t0BwG
zT^xH$BO)7n-hyGe7wLIa@_}woPd?#Z+C-t*H!0JC<~o-Ods(@Rm5D<X+fJq$eZp<P
z4%X$Rx!zn|k(;#xp?Iko+CCaG&X=3Nf`1h8qil|uLRCGbp1{rKZkjEY+w31V2MhhX
z@_lmC9D=COn#jVId&DiqpxRHWlWgr#n3&u(2(QP{ZJB?sxS1(R3JgJ*WXEdx^6=H}
z^-NhQad)>bp3w*gB>^H*YM2j3JdNgG(9_ks&`0y+0RnSea{Kl##^}F2l@)_gEmslB
z+5mo1*a^szml8nHPJ4_dI91;^Ip=~s3RCfKlA_DlQK8L7K{|f{Da7H2eA<9PO<}#W
zgUrF-oLE~Bpam-@Cc=fU``4<h_b!bLhfG5O9Uu}!>_cxD`!<|!#XFs1Nn4uRXT~nQ
zxiSohBAJ~q*d0qhtnu$lIlJ1h%?-pE@hI)wwxWUm4l&x343M~t{`L&o*W*VkP5W=D
zfPo(d(#|^)P8F%Am`%${8L!!iS(_mrozXT!y_w=5!8MomWVrI3&{96Y{O8<*s8dm_
z@7Bxo>6kT(Nfms$vx937_cqdzW6$pz(v7LY3>o^_8A`-oy|eW@wzz$lXDI`*xIgyC
z!b#V%Eon-ieNTv3AVk$Q?)CcM((TLyz70)a9X&-d*gk`v(~XW$1qEN6aCfJ(@4ZO?
zBNCKQ+Zh>9Kp3D0KJ`9XDn5k3R;d6G)l^tK#F=VCpGKU*XHvo%JgN#MmbsHNr>E>=
z$y;>yFNRnCe$^tuC(ZjxQw~X<_WZZPqpWFs8Bup|LDdOHDuc@NP>-hkXHECY`;U{c
z(;@@zL`SyvWuz08SI2h$Bw%Rn^CHpCkGGMAB+lvxhb~W{iFDPWh~;k0uybQm3@&3?
z6~)F4UtCSD>%ACi)yN0u0QM^PT`X>W(;!c(=T+<2lq-v~^rOgb<JtjSO>-^-nWX%W
zB$A<j9;u2CkB89o8jYUc@bbfPoe!_!RNKFjGk12wu3x<Wo$pT@c<*vr*a7B7Ha&w{
zQs3PNA)-D32Q~U7e{~c!X`smhd8IBGbntDszx?K_WOoH7U}+++f|>x`Qed}-Y*seH
zikVobqJSCeajobf%Wp>RfI6{u-Y?(3{;>5#&dt_GNK_5SE~t&*xvHtVFDo~Oa@|x5
zC;t?M!VUIs(H8|l6mBYSOl}WM6KAIlp932e|E!I4&bk>*AT}51^kN4=cV`Hg!K6!1
zP=7%rhg6MT9>Bw5`2Jla!FJvF!kL)1EDwR%tKDm{pT=YYT;+@7ilq|Is%AnD0&wd5
zEI#jwbsk<k(N)p@wxLZw**7OziT|58<kyakO<Ub>`@5f)Rzdp%_^WUYaJoVSnu$b)
zrQ;D-L4<_OIL%Ahjq(yuvjhgWAVNJ$#HlY0+py`heZzl!0JkLLYE&QvVi5}k%jG#|
z5BO`9hVzkhGr(xDz%y&%dV|*(&@Db34zp|D1Nd$B!x`<!ND-|WH!8~$1x38@n}`6<
zz#D6cFXu1;%ST85CAu?gIe%OV)m`s7+|1d;z8#4aE%z1gnUiMErm=d8i=U4yxb*)X
z&nlhh^|zhl!w96|UJ_0hbc5le!P<yD!NNaVJrSH~t)fTY|K!v%<C3%QUYeR>L>!rl
zYE#Qy_xgLOFYH`-9eHw=!!17ew$osCU9inruiOA4_JUMR0&mpjk=J)`n+IbN;qbug
zKzM@F>p2sr1w+-9d%dJT7p~2QTqK_^E-x?b777;|omk!uSUeclApC6v;vb{nK+vr1
zLaYoh!TF0}y5CE*UoHC%2(nq1a|#3dQrZa41d`emiHuNp&?hP|UT3^+?rui%{7cKo
z!*}DWnF!j7-UWQg7p}HCw>Hsb*<Z!)70uZkm2J@q*e*BhQ&`V891+whrbM!*knTvK
z<>)z6B1&h%vZ3e5)eG90#5%K*w@+b?Kk+dB`)*YQyRG;jCI@~pajH+3hFqsFJqq(d
zy<@c4@SE_~XaO6!^7`b92IDiwy~YEE?$ZxdXXY~-&(-r9!_zN<j!St$(N5F;RFHV6
z-{-tbSP)05d$tCIwA@rwad@L-9P-;O<A8ULHTf$Y2&;V(2EeA7)?VP<$_QZ;C_D`h
z1=>K-0V53nI1h9hnuLNk4hm7%XD9i$$T>NWPc4Cyt`JmruvXRmOx*jF(SP_mT_mD^
z)O5hJye1a%Ary9cjGH7irjG%+@t<cfd-m2Uk+AON==0eR%Cqnc+^APH!<-xAzPy)a
zxW6;;%ScO)C%g~aj+uRT#UYJz+Q@b^Jd8H1pcO=$i|^w-rABE}jQ-!4?uBl69W7|$
zlBS3Q%^;-nubQ7v6aMB=N&tXH5X&Z*pF0wTg8TS&^8(HV55r^qiK;x0kR!4OmHhPP
zm**bNu&h}(N^%3dK0ou)Vra4;(X!@TNA`2HdZP_v2JM-%P&X~SceFz4_npI#xE#W~
zZ@Hk12WP?gRD%6rLT3{wp{u3^sQ_US^&Tb;UHe`IKsbSE<8*6yk9Um`3I+m{=FwSq
zZ$_nR_@+@NPk)~!oVrisP)91Jnkw-Xy`QQI)gU|M?&%So-|Z`l4S*Y(Q7idq5M~i~
zNh0yb9BJIe)1)2htD$e1pr=mU4>{4*3#7d{ieD;NlxJx6%g;ObXa!Mz^`c==*P!ZO
zD18uYt;WL#V%+-3JU&Zvs2nUIrGk@+1mBZ*0~`mM1=-4Bs!MxptYHOiDl@6bSKMJ$
z?M722zhdkyL;tgVV{pQ$mQY-i#`|h4#PhcF9Ug>KtqLmg-Y=G9ecWfuWv&)cpFg;P
zIiV4Tr4p_aW*7Iw?YCrBirDRaCp#py`rEezf;?a6b@xhuE^?ZjN_}L72^9#tI@|_v
zuI*wBwhpPE%3EMI(w?Aob)tUq;cPcO00#dEzCQw)mh-O>m-M8;+B7yU6K@7&(8`Y4
zJl>q~zEYd!bTDR6?87dOuP8Npcl7B?EB2qoy^9;~3GTdlj#7P9&Ll=`X|f96#3v?w
zIu%JDZ0|Iy=pS;r#@M$)#K_A(V!HVQcE1do^BoOACibBCzrtu($6w;)+lmOJZ07g*
z&tYO<Ru3OW5B%UzFCkDH;l3MC)BKW>MLI40I~6yBdgjsn&}4_E%FAFa=tY1^eLwq(
zdW}BebzrW3OU-daz{r=_my^NGI%Y&PWCN_v>R`VT8MpK)X=>PbDDD<2E5xfddf=Z>
zCE5J5aB5MZqeXqDpiIZv$q834+zcG`-XcpzQ@%b!0iTsU)Z%NZquMAUjEQ4^<cB`z
z&qolcRzCwB)`jVhAe5@PZu&z*KA-lrM?}Ko43bsZX?{a=gK9$bV10i>D}(3cD50V<
zk=!4AZq7U2MZuOchLuKk)P>iZMg*C0F*YC8b}b>_5Smz~ZHm-)RJJ8ws4AyYN;z!A
z`+UhQy8ds`;11IG^X_|z@sgqu(DDq3Kty}yB&`)^?~<Bag!mm-zsnG_dG|I?U^sU3
zxdR?Ohh{1HexvTH@?r5L6`zx$!w5GDnK@qYWh<FYX-Ajh`hJwf+q72+2g!lv%3xks
z%(YFUyJVRm>xSlIw&5T1F1Vnm|1F<~IWC|XdC!OnE6NVA6RCMupadj~0$qGg&?YZR
zN&5=2&WFX$Od@tL)^Kng1Oh<{D)K>{82Ud%KRXflqyVV~hvqd`yP@pfeC!PjV7TPy
z6#l6Ch^d7vDUqXR{<C$VZX?GmULLbMUv_5VyG`Wg;dP6@c67^|T39-glS^zV>KkR#
zxnG#;#mOxzEg<smW}+o{R_b<orag0#DwHM_Zhz@-<45x>mSJ>~r}UrQ49`pJ*F%VP
z_1(8YfGG(=vRKLv%%P=<?-^ireC)R?bO*J^6sijsF8AYV(3G$4$Jd4ZuUlhcLmuWV
zlZDnj9@?JIAtdG6g<H$Y%xvZ~Pp9^O03rH^(ZI|1%YJ9dB|x=K#~e7$>1Don?>M3b
z{OzR4oYSc?db`r%p;U);pa6+u-ZZq&KvTQ+k75U^qK<)is*7zxQsPPk*d54Py8@{=
z2~WVJLPNwekU+AfUFi`<H_^7EZ-_7|1grm&eUbw)Yv`(_op;pvi)I*#2B~ZjwB~}w
zjSNyEXg#7-^b9BgJFGO@qMdXNtZ9$do26?WvN=1ci`z|FWvq*a)<PeyOqBVzg8mfK
zqZM`yxDV_9Lj3Sj`{z2&m{$iqP7y#Y#wC*On?ZC*;pdXu#B)POMrT4xD172JJ%Lgi
zx#L$eet4e!X@dFbxH~sOwh_!SQnWgEb)+F{^u(%);)X<;Sb^ka3vShvRSmfl7lWNl
zzq4(eV@ls{!S}9D;0aL-GouuZ;)CLURsYJM+$#c*#{ovm`e<g`xCQ}R))z=dD4;3|
zOLlFBm4*YNh$h+Yty7ldJ@mImN*a)HD^N7c5%uDWqqa|w`bFR&Bi+cg_)@y0Pfc26
zE}Fk*P~wM&U_;4y8z6`Ib6bPscYFp^q!GVO4QIc%0KHb74lng$)(|mf{-r<qgo%nC
z1<uu6^N$v*MAWpLsbc^Cma8&T7lgG*2|vtPgE9h?YQ-Bs_n0(S!YMZN4Hr_Wxp-&%
z8q24yA>Bn&9#3we?InFRt-JlJ4-(S7FNwpTZw$SHXsJ7k7cc#amEGk%sFB1`lkC3k
z$aRu5`Y^~FblnM^!JXPKv=>(K02PGY4Y(Aziq(SLEJ?wnetdJsRr?=P&4~r9zw$8G
zvro()H#CG)!U(lrD?;6^L2@4)t>b>pgQ3eiR6vizP;~6Qq%#dL8?}a&+fx{TG#>nQ
z3Q2!Ao+fW=u}HYdg?}xRzB~T|-^^nS+Go;V{pR)F_Y+1uRdbOx*`6`;d*aK)4%h2X
z8tW#8m7-_x2fuViGL&D+{x>Ke@2?I4JAoS`o5<Cz>wbW53Fm4lBf^hC0JD>+FE$tU
z;iI+Pz52%0!ut(=>OU_T=;ztUY^qmGeyDnPHTWRAH>08Td1s?+jQV76J3M>HeM_k+
z47nF4;~?W(@wvZ&TO9Ktig}-g&a)1!s_)%{>bUwVz7$BC`<^<5Zak3812xGIlmvH6
zu<*#jfI!ok7yuP7iy269PF8XX2xz#p;;)H5)*B~~l#9DiA(+%z4OL`d<&uk?y9GN<
zY%6e7X!X`;&(u-cD9*qNW2wK}vk~Y1{*{%J9q4H^5f_u?TlK}A`|D5fF!@~LNOc@{
zXQZX5*oJg({Ge!fUbEXj-7uaPXu4W7aK4pT7@`3PFyguqT=gX7)KSWX86{7+biIn~
zcvW9XVzwo7or+F=C?Y@cNuv$(c0W+-9Oj+&t3|7Rnc_EZT@fRA<fVm>%5R1)fodWu
z9f{%-@i`c>c6|ny>_vnM-GBL>Qvi-_xU1b^n0patX)1DBk5=SL>u@C;=)+@m5F%*G
z8-e;gKShz?@pDt8fxchy7fgb*c^qiBNEK1Gw9c}l!sP2wPz}QR`xo7!Ey<M2iFotL
zpUlbyKNRT%zdLHh{VY=vcHw_<|9mGJwSzu~K)qn3?`0If>)UiK`3;*T|1r6j=yJ=*
zXm@PV{~_2$)YgFy*j^if!GpIeL=fM4854nGqF88Jo3fV-Pp3hK(B){#^S%u4ALME%
z{<a;oLoIUOr!N_dbWp~_v+xOvz4gTor0#E|942BMHg7ft;UIuZ%9=Dbe?2It)(2{v
zk3qd-{hn(xBU|ohZQ?38*uUKI_MexKHUT&3HY;Z|fTi37yem*vib=VRUN2H1@c6$u
z0px`NMOUoQ+=kQYP#B@rgGo#S1Z9%z{Mf$xOS1<il|b*)(RVl@DUr3+)-9s?5eovm
zhCZzBJxT9G0|d8sWj)4+D{P%yydK8m^+oRw82?*;c$$tWox0cgn1TY8%T7n=frAVp
zHNE5#RwRPqt?NFkB~L#_?CWxz{71_lCutoK3*AO>1+@YD6(iqoDlCmpg1^z^G30vH
z(WHI}<azp0%h>qk-h02Bz)RO{wVlRgpHI=*a%gy_kKlJ=PY_)3aQAT~?s*=pHrRIn
zy7Z|7Iyup5TyTPgY^rSX0B@8|l!s2(+eT=BC~zs7OHVsFHf6s^UXUjS`p%st={xO6
zDl$U@I&YR}9H&DuY97UP2N(ruQu*rxi6JPvkyT$4o^8+9qoL*rH2bF&tB=I9KXhFT
z*VW<g+`P-3Z%@8I;kAJa%r8_#d_*pzcerr>U+e=Kk`b;D+iEY|eVw~;uDWqF3n*L1
zrfUZBdkime#`1z{%F6<CH&{Ns`f6roV@y+WoDzZs&mv`9%Jx~SH~#vLZH=lPPw6!9
zXw4FywSzP+>XJ}{&RKP+9sDT5KlYKgbI1E=qZe+?VBK2%w-=juLI@T1aCbE&4t^D%
z5jG`QY_R+4DV=r}1AkeuV+uM`;BF0^<4Z=|6IkApE{pr$x+|J>@$we%g}*GjQ#c?;
zK%`bcP}>xRGot@tS0%#PhP<?JUmCxyGh<VfZqQ=jc8E{zO|<$@`?lhob^`SAel3{a
z9V;xC_XdP2a=-Wo;%8f3#@6L1*6Gw3)+b_)#wMquNc|Ug>(iwG;Wm`Wb%m6ty^<`!
zg55xR5MgH$>yK4ok|R~QwvMr%34=I{<?|PW!lWb5vIrCJDw+i>w#hHDuVDFI0-k<+
z9u?M{>ndt;sXI4#v9$0p8sP8<%dX21KYAE&MT52bUi}!F|1d-kWavb9VE|7Ni`}Y_
zji~>A6h}r47{zDR(S?v!K~W?d`l?TRrGpYVOd75(`nvY<6G6`2sXmFSq0RaM(k4a9
z4B-=Hr1N+#VSKJ)H$`^dHy5@ent8AJsia1<2E!?ouD(^CEDfWW$W!>5rR>-r&@^e6
znDacZG%9-A-lWOqgiswC{i}GFxpHA@jk2)bh?eZ^U*7=T@u}B_PQ&Vtl|>aJpoAR=
zoO1@)BWGs$S!f4Q0Qv59=JwCT6lFCn<le8=-EgVkPts1vJbc5^FF0{IISse=B;DNe
zkYGQB!&hu>&ig14pz6+ESu>d90hL4WDNrF=vGvZ{nuXuu%Px2qQZZ+un0|?L1LVm(
z*eV>^fpH;6!DE8|G4vn__x1YhZ;(ZL;&biVLi<Uhne?|wkn$z1dyIZ~3S~2XQMpHm
zKC?<|<X2`CCesh+_7D7#_B}(Nad$^C(*$%HnZ*!p9@0JtaMhHD3=p;+QWy`9r<xy$
zT+b{BB-`YrX`s}}B(>CZMORh1_zvzF=WkDO6O`^$!Ex-^mXa#DaT6zQJ7xcy91dWM
zE5E`MR&=$m;2sw!^GaC|y<O7nsuQ<aQH`AI7IsPY-1j{*WRDFE_IKxokAfrjm3i%o
zn-PkfRYOq=!<GHzwA<bdzl<<j#?c>anw$==osS$$xHg+dcz57~s{N;v=Tgbh`RZx?
zpmunwF%*Ed-~{RXB}JfkMCg!fu&QEGa0dX@Gzpg<LA`OLb&1J)(imU1g(@VKkC2Pc
zQ*Jh9#eAjHguG|RxX;q^?9=&EEzognpD2Cr@QGmFl(tBho33nckmf@Q^rP`{<da!W
zaB+QD3=g=mVJY`~bKw!<gJUO2_QGjUXbH2_ji0hpi2rl4y&Sc}zlK{pFMpw{3UK>>
z+In9#dq9B#OM)<3xQ;MWxCdL`>i9yUgg99%7CIXWwt0_UHBNOm?M$5OMsvIqF6{9)
z!rX3`#MCnR(|>06%=6dJ-S^M=_L|$##P+MCWfv>W2jFLu)kZ#Shyyee@9^54*Sgj6
zIz5l{6#-pofNlur_*<TZcpS}%6tSUx-DgoPr=+MO&xVN-0Rb1eLC-;Sp6hh*O=5!b
z{Wf{}AZgca6KQqOTp5a2E1-iP%U%~xD?yt<_@kgoT%15pZ0}?f?}OC`wlv--7>)_r
zCr8wf8oo7BGaEFKm`DnbkV}!Ng^2qL+P8ugjCd)Pj`^l5{}_#+xyVGK;}=d{>wK_f
zA3@AJHbGd6H_Gde5ZV{qYjOJX4sB@;pPr*o7Wey(uQur*L2s9C`EXJLt1>-|$n4$i
z?GakF_-rcQsgTWn%cInYE$AoDbt#(86f8d4fWh5TpQiwcRxMGqPIpv5@L<l+De*@H
zx@b!JZ<Gum%Oh>5cD+4)Cjmz1yS=YH9y0}YMl+R;gierGjtx=p$`MBWHgg`ezs&`P
z@sS0is-xE7<NE%J*+l<sqtY&*x_KKAExNvH=;!-#bvj31;>Fd#6*sAe!v_(B!m->K
zsSOu=pYD`C=|!Ahsaf)~_F-Q;x9*$&)kdPVJ$;ucCv-*Y#IhDxzYrp}P__odBdLi3
z?C<k9JB_NZx?4%gJ!rXmuGlU<uETg`kQl_P*l25aq*ts5eB-y0dU-bb=a%z)u*frV
zGK+SMU$r?<e?O^r;Pl9vu3B8*J%NLZ0|#Uh&#Y@_a(5vu@b1!KlK`r&5WVOcz>oe7
zXYGG#07}XI*!@%@PmQ&xq^b7{{q>wT219^dH0jE*I!atSFzF%~5nyr<Vjwq;)IZK9
zn|et(MSIp=nKo3{>&VB`^YO=rOH+O%965j3#nXp_Yav5pi9XgWOnX{5fe6-p%Y*PP
zDSEL}yV0JFaazP%ssC<(ZLc%%YCK$7e5wVxRD1?YrUXfJ^#*1H;^p06eYra<Q!XPE
zlQ8)wcX2-+hZ;rf>+UZRinep*=-2qJy=3K#s&r(a<WppUzi=quVfc^Q??%58@a%B<
zI``i2I?%D(2uNr%*IR#buS5U?fpf$N)w?}KGGL}NDZU7SJC+Nr?U^E$;t&M76_Zh?
zc@fUFz_A&X+}he24ZU>Xcu+RX>U)H2&Xcm&>(7LgXTN;)>yk3ouhrFpsDBE{RZP23
zz?Q2m(ldikAfB+UezX>!$DI6C;8U*FPDcqO{yasy_7?73rbnxpEyI2GnnY9K(2V71
z{)N~_SrR_Z|BX*DZW5=!Go(W?z3)@8v(N+rx)XiEklNSVT?F1S_PVRJr*b)7`B{OS
z87omrOd<;EEYVkOnY>L@FlN0j?J|PCV7?OX{EOYDoxqFh=!pCi;UBDwd?ZuIdW3pe
zvm@BfqSt{Wk3SJ5aR-;jv7RChWKDvZ4I4wYJ!?~kys-Yiv9}5M{Mb6ukW)+MZZ(89
zk#0nj=Q)gD$7|T8M$$U|27kmON|pjVG-5t7;L9YB2CNL+J2%-;QwLQ{@clOZjijd@
zI&hTjTRqkA^y1l-F1F^!tkl_>P)S%6tn7mX%uBgNmBCBkw_=r6X<nQ`RTT8LcPd-)
z+iKYv9+yYTRPw^`_pu$gTqR|cS>Jy{bif-S_>rKgi$MRwA0}1kI=0#=B)?R7kU{c9
zDz#CRR%Ai0pU`$I2rX`RfxW>Q>$}`g%k4_?)nN<H6=rO70LvF_OqP4!*r}nj=1Ti5
zcAsB2flT*Bg`9}C&N$G)Th+1|42g&Hsk%R45dJ{q3@Cv9tPL`orkA+*LY#j+SaLP2
ze`UdB$#1ejz)HJf4g~kq)APs~B=7wAv9o*)XWEiJ>?=_ChnMqn_T&?=Ep}ecpr5fz
zO(rJioVlRx8djmf(7GKuXE>8f={MY#QTcx>&W7xNh`e$77TL6fzVB%GbI4c#v9fVi
zP&UNp8$@V7@+HJr2MbN{KL=JykkX1j=I%5*<}Cq&TEkDoS`7Ede*GfxQ!X;})tg59
zknw9i#$I^bn)eO?bCIL+BvmE-iH}7)U`0`y?OD!<^)*w&dZTE<Su04&Dr2a=)Z^=3
zY&hv`Fx+Jg?zX6p4Bg7s#E<6qs-Sa;F#g>SY=pO0DIW#p^JX>|O1N#JPmosm=?Ej^
z?Feg8f&P0pG{6^dCWnE}G*jD8f0s!?|BqEeYe0`Mz{m$9BqkP#yP~^LrLt3I(Av}g
zK(SZz`ch<FGvoAYRO|(rX0VQG6}8R=t2(NMJn8=DDPDf{x2iKl$<-KotqlD<oMGzz
zmu}&JdfEW~P1VM%Jw12XYt<iRO6b=ev5CiJ=EY>sq{}9ELw#M3{-1myjjfo%Y`V@A
zT>DKc8q(fgtMLVmbSlu`Lzsu2MY8p0a>kK<Z;0MC&qhUL%y5Mq6^DZ$P33^kbm+XD
zxYU@rbb5Qj>ykt?z85nhhXn3{GQz4$vK5giXCqmk47uCu>VH~$c?!|oON5jGiBy1=
z@A>Y;*K>cccZcys5+0CY!ODI!ZHFc)w$*G;EyM>N9>ln3@B>$9?Vrr|%L+z72BIVj
zxU>Myt^sV(^dUjI31z#i`>hM5pCd1w`P(_tA`-CZ*$(2c$2T?mq-Odsd#uo9zwPxN
z*m<+4`$F931SB+rXA12q<ug?~%IAu$&Yr4cx2riQ;c1sT#2DZL4~n0K7ky6SFM2fl
zrTD`iD(1@2?`(SigFht#LGCWJG4M#H0;Rffp<)^M(O=%fkQ2cAFf0Bz!}0A@f;_2H
zn#+crK}7>!v!m)V`9$p`ENP7KdqQMV%PsM)Ia)IAQ@A!g>4ld?l<c&KWb||7EqbVb
zJNq!|5RLltkXDr1N6%inzv`(nkLjfhL?~!&oDu{D)>s#Rr_B`tgjKx1kT7Tuhu%t=
z&k;S<pr;4?&XQjzWBpAcJ-)v=tFP+mtiH%LHy6UniPHDUr~P8W4*52+jHIcK)El=H
ze^l8*^ErS_Vd@EBlk6p}n$KBcd-n2MXrn0+z1<V%B=4D)gSb3;kFNX8(7<#F$-3r`
z^0)Sk4l?O_{hW!fD<iX%I?-U%RsN?d1@VIK&h}DF4fy#pUv|JQZneh3axDKnuK)$1
zvk}7T^?ay=AybF7>>*BdC^8qbDJ?zugUe@RnIBn@B*F7!zmaDY;&!kDP5M?|tC3-K
z3Ut+<;nqe&wLj2Suit<%l6VC}MOtgix~I85=&8-^vTesZX>dk0HwN8U2+^MpeRuK6
zh3JW9km)}g-{F|dA=VBf%j2N@F0eYPM*|GA_;OaX_Y6u$4>0Zd7-@iVLbSgCm$uE1
z&Ku?}JZOI`?GzZR+0o>4!47t|4C|L`mz}SrC%XX|;{ItHZ-wx4d<5#$G6Dtjkb%+q
zwjYLfpZ&XuPu|4VFi#dEzq8vZn3aCg{PkSNoMXg<`W5=kG|GT+ODx)`<zb8GSzny;
zLzFqgYJQkaacDKhBYQp_p$~r{{VzKQE#HbU%BWRbtsEV0MEd3JKRIYEPoggVEL8Hj
zqoe${3I_y%8iUrW?C#%Vm$hFn885g$a6mXQR)$~HV5FwSPWeUS>>*VnxNKy~M?=&q
zu2|nCQQ#7lu?LIUJt~}x?HlybiE-Ox#rudWpN-6?F1HZ8VFaMq#5Nzp{2p_VOaX>N
z-0O!+gOf<m^^*d6HAa}mg@t|4KlQooj824MlA1Idc7yP;Ix!%S2Ws)6OF0dat^@tU
zgRt@F0ZJGq+|MoGxC?df$~|O*@%sDnG7TpDqNE+v=awR<-7@GvybdZF<4FN5Cp*f(
zDVjjn&Rp~yo+ku=z)3`2+-(qv)GrU>?kvIa*HTu11i${eNg$C3*pLT2*0nrnMuG*L
zcx2H$GYOgpQXa4I`xS*eVi0)dpz`?Jxg4FHk4IyN@X9sFe*j8K^rsbXOcIuoFvRlr
zrm_W!FrTECfckLc^Zad3m6;>oH^|r9-MC4{`_0tu>6UHmGVEPTm>KyZdCNV|NN4=>
z`&sm8p<Z;F%S{;9ZlYA$QcJGCKK64Q*}7*Xy-)Pd>e7-v=Usyrl+zB2I{ehbJX!4u
z(M0|$pX=AX4!l}=|2OE?mMFPq9+56P2EKu0L;}8Tc;ZOVdQEmDsQAOVhM7Zh-5MSU
zCem2~dYG4oKcAbWr0%W3ZlO2cLlVjZBX<TF8jnoA6{?AayeiHWclKq9SRNS~nV-)6
z0Bvd;bgYlt)lU)9p_MX5ZYm5EXyde!)t>}+g7+z+ZcF&dgDX)=((IIs*+<?u8Zond
zoV6Nu`T7BPA8@n~j{4u4U@Sla<{v$LZ-h1!Ya{AL9vgdiYqP5<B2jP;*p1L0*Job*
z;w?+lAu??{RgE@&SF@j$EBjovtT+G0q5==vV8oy=LPO{yUdbj#RE<7t<mYssYosAY
ze=PL$UV>6?@d*Ts7!6j(kM07U=ZH|kB6|=%ecZ-75as%kgldHS_XMslYnH={#Q-4}
z*%E{}$4LcrXtw{Q-YGB+(g7&;J$5)y6@ppbSHP-m_#xwx4n84OPYQ#;2&V^&c5-gy
z(}i5fJBj@r`%2^J;@l@6id7LFQnOs^?I$a-N>V}%l-fx$dmLs>JvVY}>CNh8h6fC!
zIFRM$SUlcB>3;yf#rP&a{FAk#-LH2gK}P-TU!Xtw9;nmcxvqC=DIn+~->U!xMore!
z=HsEtOz;MWDl%tLgLUHx^v7hDX)|wP6MhK}zq}t6*n%p+tmyOo2l1ZVn5EFIG_nb=
zyi&L4ieVeWAi)PcU+3A~3sFZo2GtjPLZAhgK(fn1QyH=C?sx0n^%V%bCkAq@gVwrD
z@Udv3OR2m*Sp+#P%uBFtf@Bope73C&%EfHnzb9?>A4D{91sI<whEh?Q{~C}5t8V-I
zPRr8@nN@--Bv2>0J}XM9Q_R&2+9II_*U=N$(XfBS|9skx{MM!p|Jd^D=YxI5ocees
zoS`;nEGczvjEy(rp^vkP?D#*nbf)>oxgkq5IV}0qyyYx5MkYx1FX;00xT7EC{`&-W
zGG+u=Q_iZQ(ZG=-0Lf8wI@h^8GFN6Ac0GU6QVi&Nd#Q8jPiY3j5IY@#DCaUSwdwG$
zyiQR@HsL{&U1TXRjQO15%leBr-SKuwoTK|w0<C-Bb{xd8aB-axLK=-nW0a&)YY>y`
z?lr5?4)(j{5(eU5^Zh+kd+FE}_ampX=RvH<P2}Kk@RDeaI-d#@T~{BEtz-b>t>2DE
zM(KfgMCe3094sc_n*T_miJ4W5mgJc}S*fazSQ4h8+7@+yC&jRUHp?G*8_}wwWHmCg
zUsWG;ni~+1@TTsQ0RqQrV@S;Eo_KK|mqo4JySd*;KonE<^t?V|MJGO&)HHT+8<ITr
z)MC=^#s}U1w5#`%&f9aUFiIUfZF|p)6{RhpcPPCZah^NpG(g`uoSn#SQC=*EjP0cM
z&`ZN9k-Rx`ZexC%P7uZBQNyFOoJ`~^wuh?|B^6h;Q9GF?e;HrFdNaR-W4Gzd{H>v6
z6?F_UTw+jo5AYRryM7)a^^K}0`)p@y3Un>~WYuT61yc`x@(lH95*&1DLRe|$AMKEh
z5dNO9!r5)44ygjo-uxOx!Gpgx|0}S_;9%<Xc<tq$Dif;Yj644m<nd6eYNYRC_a45C
zk|fBo)GgFdX<-Upz6~?~$J~GsJXST2SA$~{Hj$CpRr+R2W9VJA4W|e;Ob`WpdqM8t
z=BI_s4ld*gybD4+%=>8C_&G}%=k=!xwi9Y|q@)+2+6&s)++hOG@zAIYe!+Lq?|0t^
zVwCOJg9^G=n52(g2Gv4IZ{c9>BT<|iLN$I^ZOTRqBg(tB*SsBG3KUC%YHr!6OiiUY
z{Ce5P{#jN0Ly+3w+Cv7GxG72MH(SdP2`h>q;a6@n1gE9_tbRsieo)WXYf{9tKEJL5
zN8OnFRCohEUSx-P<3ED2*}kf(rwL>lHM;P~B{*A@Wfk(@=fsD%z$`@z{>;+4N<eZ5
zQG2FeNCo&Mfaes21;&aiMcKpjg13IJpkUU-r;8Y9x#p;QS?9pS&#|UYp}RIO;jB$5
zuMf=Pm9;ipr#MW{I=yVm&9v6Sk}QaRX0GvM+Hx1=cklSDwB2Z7iIzE5q@eDTC_Ay`
z1c-^>NEg0>|HvD71^B&Ry<Q!?qU~O}U$d@0OW1L~Q~d+3_d@!`3Xnz}Hhhy&#A>@6
zU{;vkqznfzQQWfac;#gOHG12hUsa*SFO$-(C++sZC@0y!yECcYh9>I1<wcgt5szr@
z@k&;a2BLRB<7sa+ywS2R79EaD*Eec$FY2+c=`f@PkWoo<nJ_`4@`r%gPIK@Y`bz@h
z-9r_nG5SEa!zA{g0zeM>#J_NXHn3u+dC)QQ62=60cQ*CcbxEPO-_jRds^vXmCsoG<
z*tA;D;Y)GJ%$)^CRwpodN^4#*pe<W=wpe^-dM2Y7B)v^E-Mw&Aq9N$&-Y(!6pp@M*
zlIu%aq8_X-Oh-MqUlX|?Vwr#jYtkd{MQG||oA3Nvo=Nq*dKLcdkM(wp!umt3XZ&BQ
z92M6;(YW%wvI67^REbu=h1pE-QRnQ7akAxvlAP+=k{&D5=Z6=dF9Sx&BBx#OMGy3&
z)TL0~+_l^+$mDwquAPa9YedP+pFy8J#gDoX>wmN_=;mFAgKNGk@ImzNhT<MI4A_B0
zP#|cK7pMfKMQI5f(tv<8h<(0n!7NIT&&L2ZZZ8h@$aEm{i;GIafJ|Ah7O+{kg+#WF
zGBpGQ6XOlO*2>bVYvq^Xl<6NBN!zt+bW^)~yQuP%8l!W3m)AR3P3PTvl3;nubWZff
z_KRk62j&J#ouKrV!I7sj&rxGYK5e^?w1&Ul5lHqTWG!&A=ML31%aNQ=k=3=8SNr?w
z?5_82h+SWYWWz!_{q*<Rj?*MsZ}2=F10mp#HftlsKZ8N(E^^rgdl(34!&*WC9y<~*
z*5I6hyqN5x9IpZ*5_FSxK5E(p3p2a6|0C)vxT0#?aA#m>kOm3qmKH=11StvW?nW9E
zX<>#GK{};VK)OM?rMp|CyK`pF=KaoD=MTVId!D_Y`@S-PpJ4NBfYC2XBoNn&hJ2xt
zbC84nsaXl{m@)QQs<%<#?#42p)$b_T+Bru~KDhz`vQkRGcmS|zMYvfoE7m7qQ((CT
zyhT&Moo{tAl=>*Y1lKCYuz_gA{l2Hw#!7*nGDAQBrvPj$IhUR3>FNK#?S++m3=JHT
zEYbJAMla<i@wjM-DT1FBZdmNsRh#a=QF!u>xnX}}L1R^JE-y4Q8>YJxt@8v%UvkC9
zo}%EE#Iyb3GrF5$4di_}Z6!NnVS1(x$=oEvyW>I*+ylY(tLv;QetuVCLGy@Zdk$b^
zjHK4RLH@fwYM&g<WVJVSoevsVLtfH;L2uG%lAv>`ak-jhr9Ywy=eS^X25DoC{R%>J
zN%Z1qp2#E7E!@t`Mso&n^;2zxzih`G)k4$g;fYyUrEsOzcrI#E5U=XOUv=VqOFic4
zJyUDj@$8~e(7(AfrrjRRF1W;E2<d85qYG!<VD-j9m{(>2m|DRGu(e2mR(!9GxKS~I
zbUudm_Q$I0mjFtXj}1!7D->e+Q1Z_i7z=hdKwr8Pi6npr?_7tNm40fIIejM<o$!{h
z`)5qoLf=3yrtH4$yDEr+AkW1_O_`+qa(0Htb^Eq1)TUdN?A;#q?{f!MS-Twi^qHU%
zrCo!0Vo*L;S&CN5idrF=xlb(B&j_?FtMPbDN9u{dgruluiM>ozFr7dOaGF9l`Yq>?
z*KOx<d5}U}kDQor%9lMR#pDp^CC^dko)>VnuNAI8)UO8hG;%&1PY;0h^DyOVQHpv(
zCR2W`aXMt2goj+JHD@KVzI{e!T(SwN5PcERTin-wW&|3J)AShsQt(>)*5rqQr=b|(
zUS<DToAsBL(2Y@K^`*%Y60YzH2xI}rOF!EBI@YHSw75uTy2nz!Mp1)O0Xr5IEZEEM
zrVUUtaNVRjzi*YlwYE~41#Z8XT3z51CJg$Sx)k#>C=W}qpU)<%mTI`PwKO^fnmw~Z
zYOYs5edE?fgY)e4hZt1w>l+H5gXeoXeY5+r6_m2-(oqW{m)_q|G)7Bj3z9%%fGI4V
zKrP~IGpCL=>zt_A0*J4$=J&EKa??TKpxnQ^wsq##Ok#gLXdG|uJ2vDCkrW5Ml_-vU
zafUpS*NF}97mJ?-WIl|e{&yLsk(~VXXy6C^J3yx4o3<$95<s`pK$^^dYRTqE%Na|e
z;?j$2_0EkK42;iBQ!=Hkf?34qbiw3V+Xm#J2};G<h~KiGeJeO1qCAGdW>whef|f6x
z%rrRU0yl<`FJ5g^K!Ks0-|OwRHQ>^xBgkdn1`*(cz=L{+=|po71A;kB=9p*lpRi}N
zKQ@ccmVH4ycyU<p**5>m36=uQ6eaG7Ot)K}pz?kHpeAX?!o>A_r(M@k@P|_a<z4RA
z=}sQ&4lUwBv>ayTF`VU$9qmp%_t*t9%;#)5$0Nhvh3(o#g=60EI6$6Wc(LKJuuJ|x
z^DKVGvSXrbY=OJ8rDZ=GrXvBbX9NU<H9VDgfQX#ug%h%0tl>T|ZAgAcn0r9T8Xu-`
z3_$QV?bU5w;7nxw>yz3xA8Zu=_Hi&PnkbTrb?xX_T=8J|lQ6uRVNTyc2?e@EKhFru
zga^MU!)%6q4<^4fj*)zqr?N8kphaciA!xDx#%^UG6-BSnK(FC2=lpnlp~XGtEbCXg
zLv%{6l+nrQI#Lm<ed>3RIOEs%nZm9t%u!?VAVo9~5k5#cZ=|!$6GSp^f3%18#Qy|H
z(K+dSy*iffZlz%PaeL(1-&7q#1BE4=j5of&-QyRM|JGSxS~1(?2^8$m_#N!TeMcJ;
zr?=9zaJ#1LBk2>2Rv`2Z)_(0z*6=!@v`_VQEzX-gncsr@=+6KPC{KVwQ2G}6G>(KY
zX0mIHVqAR@(Z*dpLxloHt#ONNDZyCWT$}ylt@x}Yqg<mIxgB>&I|z*s9{JpY(5WMr
zr_*J6Yvyloa?<AWBnk<~`G03_seV=N7Bsh)$${$fqumWLVmL{ecNQ!EP?EenP|Wf6
zGn45S9;<^S&zCykF*aOT^^%h?eTl11_!8xKdN}5=x0rNfJ=0DX`dV=plAH&`VFfDs
zp4zIXJyC3i4@xZZya!YjimT=>Xqf9zVt^-m5sg_X!X4}iGHh~>V8Q?a=$1}<-ZH<C
z$a553oL)Y@2MIZ%_C1z-)?cqHbkj%)hb`uV{h6~uVsn@a6HeIawY!K{6S->P#g=O;
zu@Q-{a4Z^FiXAU$Kv|HhI<g?)!=D~JlS*8b%u-KlV?h;dPoBM1UbS}}fkhCw)1Z99
z1I2I6!&@*Vu^!({5_j72H6G~pOVLL7P&2XgV%S;wG)g~FauA}2C{g~#ghd5}I9|Wb
z>^f=@1WsjcaDAH>iZWu4K?jYl47);smIE8iVO(B<Sr|ei-IZ(CVPI;ASa!xk85Q3o
zOBDK<EyV8D(*D#Lw_<h5_FzF|lRBcP;0U#kG3SF^N5&`sA8RDPfc*o@VS!iu?hk9C
zp6BuDYX1gGOcoNQ_hNM-;y`3PFb=7=KxyWq2<I1#Rm<VLQLq^3W@{6;b|2@|cKO@{
zjAR-R_cyfx(3Q3Y9tXb2+j45TK)16`289y?{jba~(RnnUGyJ%(N!O*+9nLVWAm7Wz
z?wPX{FCt)^nhc00rnI5+DP^GGDu&lFMsjOv5H*ZgB{c+3H@Y5Mwp8ue$YA!jsV}U)
zZ|VTU#P-K=Sb?Y48nnjm2lR~-!ThItI3}Bi?5&wWlZx*<5dkm6R8|cD<)xsnGgtj2
zhDZI{In}7YF@zo$5=l7%-B+3u!KkPokOx=4Yv<TRrMp$RVediO5~w;)^H62f6J<=C
z)PYQw5494Q*Ntn<<WwRZN!_SbQTTP~e>@mYUE!45a3OWQoNf1i3b=j-a0g^fA0TNF
z-g#8?p!q0OUkgyF0Ze4q?`GTA2?nfH)V@HJCN^vq7DEndD&;LRZc(UrUM;f>0FXXm
zS9F>ET0pKGS&I=cB_+e_Hz$YHscq^gEg`lxL9_5m>t12Zo*_T#;Q4kaueL%KSs^&0
zO6`u=zw+dHkNgBty_dwO-3)Wz<~+HMx9>9o3>?l_QGegJe{>(xA>J+<T6@OjX@+q9
zV{KZYeVgY(v)o={Y$#<?$u5!J01DBbgqaCnBYfBaW|2LwkEcpZbz>i=$`F&R;}Ufb
zLdDPp>)`er-?<l{Ozw3A?IAGzzVdFkSxzJ2#X<OdQ%RWuMVNGU9><h*=Ly9p6#9t<
z;n;s+o@veFTn+PZMQ58I6U|OwPdM$T_(qvp5KpSrt}RZAbj<e#%GfmFf=HFVcp&p<
zNgNQwxq>9*3zf;a4GX2hPd~@zM@Rg!dhH6oefGjoeeL8w`Gf2`Cx;@G_EV0uhJX(J
zl5!|~KMj>mTzfJCv7@FGFCB}S+t5v7Rgll2w3h#pCu@=<)-Lrv>}MrcLxhy&H^pbc
zn%{jWbnKl7tqxwYXra6d+0hCJJM`XkruV;P9Q~NvHb*Fv-lIAG*=824gTRup;rY)R
zLo2Ufx$xTGH2T8hz3SK$xoN88(OLQ(9H;0%qJ}OVwzWpc@%$gf06fe`Uy$G>MBIH?
zwq<eaz=a6}acA@w-a|}>g>bn2keykIY`|L#Czc1dx6J}4^K_W>@y$k4#`ZjIaz=5`
zz{3DOiD8m-&g7^H<^t#Mx(8P<^PmP9Pe{a<KF!yE<WRxnQf#A{Kl|wTCDUjH=TAzY
zJxc2Z>ljzFP@x)A)^`k93FrP_QLIQ41@O-&0CDj>o0E!*42J{2Gfp*U+%|e6ru3-v
z<OeFyj42*>&@;LObr>5!6yl|S0=ZC)SoSk>-Slp)8t$JR%vT#vXhg-4aTe)kr}{8!
z`xAu>@bLE$9C|O#Ihx;N3g$3coz@(c!;3M~Ft66CL)76DFNk&p^#i2t(*6*B+0_uk
zlG95yq*7y;I#=WIm(05GmSK;ne-|a=qerkq9!j*;VXGg`kQD=;Ats>g`hJFj`Iz<E
znzm*Mtw*eVIH^v>_uoy_y$sb~LgZLpnXoZSe7gN$Mq6J!h%|4~eNbVtk|25r#{eO%
z%l*k|=|$meXJBB#$7AK@Tt75fRPIC=tD}_IeUjkF2g{!-Vg+B?ipKvf{X-Fj4GkHH
zHO3$WlcrQ~(mBSUj|Pfl)Vh+Npok@S?d_<>rEzr8QJ(v5Y}!jo+evzE;4gASQ6WdY
zMs74jGbWv`K+(-MX>3N8%r7&5<;=;SxZ%P3>F~?{a;g-zk|59#)qXyXX6N7i8V?G9
zbWi9C08TX8fLyPXcghAxu?~NK!382ABi34jOopFIFs^s|H`-J-w1v_oA9)*S3NZKb
zHqBUh6<;Vtx3m{qbk#6`Nc~T@>C0pwmNRTAT4VQwLy84<Y%&JFMkuB@hbIe-Jh<j`
z@Cqh=q=2ajBy7zVzd+s&J-+1lJT%80QGVB~Cp+aC9Ctgz?{4kYo|=)JRriSZ-Jc&H
zmJ&}dXOg1E5=4_?gcFfPlR^wmdhl6TPW;BYABXB8A(i%BXQw1ym=;{(Z~9CKvIvEX
z@<q%%nTq@~JLxPmGiSDdPFL?hb@8O+(&Fo%*Wt&#Is;&<2Evtz6L)K;zCj(bw1ecc
z(i*D1VC44MPAq0L{^7wv9urKT<+tm$a_o<%oZ=rd`?K`#|NOo<ZZCdx?@EZO&fW3c
zGs((Gg1!*7KY~wycJg=jPkaUTcD?>el<b~`?@JLWWaV&&Aa$BSJI2`{l}Ixx41u=?
z0#0G3<2KMmvX1&gt|0tHugj=vXBBq`88|_Ffd5oQ&bNI35K_~trna&%xYI@e@!CB<
zmms435Qpcd1l@^MPxYutdz9aC!Q{15{=d$t{Bmg@a=mDi&_N`>?}rX(@o%_m6VpGQ
zY9R@1wCFlr5^yvrxS>9%5Bk1h0gHx*;FaAY|GnP5DRT<Z-8`OldX6WT=rPP$K&}bq
zEZ4sJEUH0h{y{Dni*d)ZW2qdg^F?RtZRrPXi`&*Wep!Cx2vsli&#oh}ExCipL2_0x
z=-q`a+@kt@_u15=YHsJ??eMB08my~0U#z@Q69se23)KO*s{!OY9~Z5diJTh61<(oP
zX#y6Y(M<6Vcx21J-3C?Bo`BiBKofpNC}UYz*toR6rvh9;WJ5y;egzwDbNsQee(qjp
z4K&H5zH_9mz4&oC=XJQyF~Y^Q_eSA;c7x=+^-fJg;Q|S{pA@vYN}}|ABM11l_Q^;!
z0`CGRH^ON&yUgi#$-iEm^QQGpQOw?1HsH4Q`_X_LCvJcjT*uzG1#zf(;bF!l8-Bvs
zH4hg@CIus{NcZZ)80eVMI3*D|o>+N)bQV^W19{PksAU<QfCaFt5ej(D*Fx7PJXAVW
zpvSux)W;p>!i2bFh=Q-_&rvIYKU}9?3mVO2_<loA$h2I!cT^<yuGG-R({xxMv5uiT
z_pcDMnsI8N2!b4J_R^vDWZsQ?^Hg>FCn3`7;TFp6GjgVZb0`NShpm13v5Ms&3MAiN
z{j|mhX`@t(jb^R^VFr)1u<GDj|4$aizdskP{5WXsTs;{1bbDrHw;Wmw8rz*L&>)gv
z2mdMKw);$w!}ozc!;RwIIEcePIg1=7_?Ah1^z~b~*%r%A=}rq8)nQ&@XkB>LOrz7V
zTT_RveWzs?#9E}#*M=oV%yRUt-mc@*If?UqG1NVAKeRjVUAMtn+;wpR%rA{D+oMNa
zk64}jP5Bg7_S7l2HZ<~_wXh%FeU_;R7hH#`b%JAkpaY1^#jFRNSct+<5z`aP{Ej<t
zXIyFwK5&KG>87<mrr6C?G%Y+Q1#9o7dS%3uo3@YQy(&KCaUeT<qAYJ}boh>OVN8Qh
z^+#oMg0E#Gd3X~WZb}_V(02F8S(K^NuSg!`XvPThf$&=$@YseSmpo`bHD!}|h1+YU
zmz?Qj0=Tk6PChPC#%)4xg2FD_Jn<`DaxOoV?53nkg@%FfJ<*$=C#Vg?Or((ErP0e8
z2{jdnt$PWu(xpO}y_eG&G>%n+w1^F6f4+9iwoTDply~S_P{}N3$PZhaAx9sHDDC5L
z#Y1qXx2x#0OE^M-733kd0|f~@8l#)tYg)Fwyj5bGKWUf`yBZ~Ttgkv&nVOxei^ZV@
z($IX!+NbB&{`tgrm5~L5m{pyo*E~rvB)kN#0sz``yVW;G8^tu+gah^DtHLJ<blXuy
zR37R<mf@!=ORXb&tEkUaL|sd$R?kL4xU}CLB<mZ~bzzK@9yFXH9S$tZw}B=i_gqC3
z2|W425jJetr?8F}&S5|_6~RJFFfl0cRRZLr$-1Mm!$+XOjATRouTh~*b0HJn{I-+q
z&XQA&c7$yAh}f8*veC7(eycFf#T=fZ!E_~~ku^}SX#A_L;@<0;uxfio3IlU1>_kVB
zxlG<V-J3Koa@<9)P;kIfBvn|NJ(>}<aEI6-aiIbM40=4MujW@vSCJH7PH)3kxa##0
zrDGXl;KZIP9w(%M-WUdF8M22l>-U4sVq40y1ml-qv0czlW%}>n2cxqUzLBQ)pLqTJ
z+toUEJ(Sz``YCCUJ=mt|C=N=WR#uqKow+K`Y;jZ@Q>6X0KILu2;vT>J>#xKkMnJiu
zbN*4uDQOSIaR?rd+c}{xg-<e5vCVTS;s60fBtgI5AQvoAe8i6r4%#2zAlnOo{-tMC
zTwPgFUZA_mvS!?CI&KXYa$X4nysuGVCn|Q19iq-Kz)wVQ2OjTi<3VMk2LaxYYj_M0
z$;`zP48m1fIs#8R+OVZ}F1h;8lt1JAc#5mH|3c_PL4EfdZmgDsR{gm}qN~(44EKGi
z!(&zg1tWo^kYqjjj3-&o1=^?u-wDgjbY)p-)qc}?GPMVgE$yRNvAf9ImgNfr^LKdg
z^2FlA1C=Fs^;#R76LLNn`nvJqgN`3NPBPKMx_YqD4<4I=ahbhAg}@rI;oE<!5=>~o
z@<BI(xJy0hB^{yx^}yQ_&>d(%`*zbjAtVRE@6|;ui21>seB&sM`6g$JOs%MeD3|LI
z{C<8=&E^!@zM}0_yiB8BaqR~vrSoBLxzQXdECIy%LLTLkh%!AEM1}_XwFD;cbOY90
zsXuss5<q~m90*(gc*isd9XHKKS6@}vyE_GvTWnK_&HuFCxvWd@mMUR>m!0G;WW?UF
zIL51qelOLLWR3Y(Rd?3@#fu>V3CK2jKH7TY*Ttyv&;}1*{=;j=y}-tjOnbPjlt4e8
zqBYgK`-n08KsZ3sorI$%UEg1~xl*}jmT;7Q=rFdin_N~1fwEa4KDk3H5w73Go8Ua;
z!h>yW5VElW6uYqUWhYlr4z^>IKaT8QQ8-zcQACZ}H^ehiT3`SM+I><BJ(;n*GfF*!
zu~VCRpLm(nA%qZrSPnJlMkgDO7xGUNdsTx3Xkxwne4t%JuzC_y{0RPwt}M7j!<(M~
z8!S?b+wC>xxp7#(+KYit!?bP^nLrI<GoCtbxy~s0%7ovJ$e8pG1XeS=tXfr~q))(X
z?0sE*uh96--T`OzAK;r+Q2Dm>APVX0`Nt2F*%y9}`}^x6GUpUk)VL|;O|~=S{x9A0
zMt#tCG*dPORmvKSWaij4L1<(<z1zK#Mzs%blF3(u55XaH#RfW5_e_au0WeaZ;Pj>+
ziw7A%PhRMX)z9#SSl*1Hc*@^>oi8<s1^s-AEG1^{Sk>0T<O~}`<}h6p)1zu7=_l){
zT$7x0A6Dnk>yTw(;LP2gOSSqu&UWYke0S!Y4q4D=-bWe}A!dciVSqRu0~B_NWY%RI
zq_SH~VFTMR=>(F7Xd$~Ot!su>N|w9aDAwn{m){`Qg=tX{v=YBE2&vvOgXw^DhtLav
ztefO}nW229VK*wPg+8dabOXO$HA&j1;0eGZ5C*i@+~f@X?7E7V{j8aJuj5=nkFql$
zUe9T@HR92(@}HTu4^X%iyrjnHk~&3gSAfxf40z8Qve4-3S2Pk<XG_j%Z%h7c?QPfY
z+mGV(*Q!7sd=JMfhMDars&KjE>kSa@@~<b1CHP}KCzEC(=1KVULimrnfgjMNR&c$3
z)aI)ynmYGFW9MV;x8$lzDMCawT^rjGf?<be`k4CO<W52vH?(JFEZHSFDylU4dOwHF
z*kHwfd6)G5f~2D#>BXjm>Yp3*pYm+EehpBbrS70@{$dDcAUS8T5@9q0&u^S%sU?sX
z@hwR#mp^lO{d7L_i7;(M-&m9N8Rln6uV<0gY1ok%NS=fdCakQdZ<%pTV`FxzN8czq
z&U@|=6>UG)B%^JteoXK&MxA?RH6fRu8k+!$tNUerv7LGYECZgq6U^fd9YD_G?t0f(
z7ocdi@tc|gc*N|8UJ-YC2j;gk%qU%^WG`2Z`rUoJV%CMd!6fH3f9^BuW~cY`X|4l-
zKZFly;OJSXKC`pKyy*K`^mju=jI*VH+Tz9w_5zN?M{XK<o^9|@R!0nPn!p$a7!#8p
ztS&*i3TJIH3kr!y%fqh+%+Zo)p5O0`NU)zxzEZgsNH>YUJpDBjPAYoeLF0fdmBW6D
zNiEoX``iGao0Z#hp-ngq89P{1%}JBh8K0wn`0HooliqKC&5=3~fkbWpGua<w7hQOZ
zn;}wms17+-&Zt510z*G}V<;);OQ;w~7mK%F%J%ci*iWrBo1d_lzofj%)R#vIJ#M|{
zZmh%~yud&YC4k}4eJAQeoD1oIqb&@ZK2;$Gl^oiIUeDJ4!%ap@n87SM_oz9zzW;Z0
zoN+1D-|?IE$%nMne8%hR<6Hg|5s`nVQVD+J?H$hbqZ33+eST0_iQmrTv~y~f88Aw2
z<11hFog!5HhMbZ$gOFsdtHx||hKPt-6LUoV(5m~w3P)S<AoPy&-1P5&;@2;<np-4U
z!B1)~|BVGQiz3R|Vo}gh-Vr>y#9rM-SSh1pJ~yD`B@Wv|i&pil)A3!ft(b;SyrdYV
zi<Hzb>rRbZ?=oPJcq%#Sx^#<FM~R+W^E&(2UjviJk}#w=1ehoX_`7~om(78Me=<g_
zkizU4|JlIDEFNDAfDE2d{)pKgK_z6mpe)3*+I@f+1+<!wEIQlP&s;+Q01_b@ual6G
zbrdHy^W$CV#|kChI{z+uTy|G!P3!vSoLm?riPo}uYc~z78W{bn)dMCiEJSZ+8}kyM
zg#W-%_uLq-SZ-}RFGO84drk}B5$)EVpYHXD5Sc0pumhq+!-s00H+k^EV9Q+|d8Fx&
zFW{=RObfVcRhv2*OcBn-Gy(;L4|LqknkL#`H<gMzL129ryOII43}R!>^>HNBt`X<I
z>HUAqjNn1(m8neZQ<wC3a2hQvETC$fkl8zD<j3;4YQ_k)CC8EE^eJ!rfNPDL?d2cJ
z-m7~`?CEE$j(DG4sofB?x;H9HEc2{f>4TuNW1vf+fB>|d98uU~pA`n;v_Jg-9sS#G
z_NWkpZD@ZX8^C{aBOxAzGUD_jo{Jhey1-rL(+_x!9&b-Tq%S@NillM#>BWCYi(U#1
zVh*mwZ$v;!gE0O%7MVo_-h%$@z8(DH&8q&+{vb_hR{qH6GdGQq!Mn?M7MnNfPYr!M
z&~Q55UK(6GYS`5aCJWQ@yW-D2nuf};JfJ58erE;v9PeZ+(5bvN<74{uM-^ZUV_i9z
zck_FrXt#Aq23nXV@s_aWHtUn}_uhzK!lnNfCnvxhyJS87|M0V@pH!p-1($9TUAY6g
z7yE!0$ib{EU<N~BMQgrTjysuzR-zU!YyG4S-@2{l7GQ`ga`QsSkE3Q2w$SZ(anElX
zVDW^Y1|maME}K)f8&gl5yd+$FFH*K|mM!{_QdcOZ^F?1^Vjxd(R`Aqe11=qv+I;FA
zAbEM!uif9yK{HGL;2a4ITDV#`J!R~G193Tvx;i%2sNzb2?i6{j)hUDk0@%4w#|)fX
zf@2{Th-}A{rpCn;`}-P}fT0g3C!^nUG5(P1{fUVZUW<NOh*Gw}N78W`(4-sHoorc-
z=6*Lmj^+rNsr5LtxhVQx&Ae8)O@)WfC|P!`!^#IdMgQ3dT4ewBNt}42$&P3Cx~4S3
zTrG*nm&+bHa$+K$EWXHj;aVl0aF{8Eq94#MlRtU3a&nv<D48+lt3h}tbEBe#^=Zpr
z;CdyB(A}>h<dIs9jzuwsK4-tU(Rhw3!7V?4ES<TX|0(F^FcI3P)YeaB?a<ULmf7jl
zy***dot<xa_r3d)Z&dMq$m8G!T9`3X*pfN<#DSX}fN+(U8{$QrUH%B*AWI;iv6l^N
zKZG7>Q`B!-0YaYxu-y&J4oU4Rft)c`z#)+cJTT!$5<1Q41|Xf|L1VyE$N$Z0o{Qlg
znYkAmVO~yn*<E6%WhBM93OVmqBmQxfXJ=lBtmQ`jW)UcC+{2ujnKeQEw4deVcxO-^
zIFQ`RHS*=&-|CGQVMA9Uuf}5e>>GVoU=4BzxI7nJx(jSNna8JLwby6K`4P*U<m)g~
zcTsA7Kt31yke>&A&~vg<fic`DvP&FQCv>8d^>31@mAFr}I1^M@Xm1)k_qeRAN$(`B
z(uxktp>Ii2@C(>RI)P2T5GlYOA)#TUDb9%RJ_L}hl3+)lflI0-VNxh_2H(P@+l)0x
zj|7jy_ZT1S5i_y{!ux`SFRu)!JY)e$NYtyRR<Bz1FG8OTTmja%@W-fk)$iE{q@^AF
zlL^mtT<q<Om0w^X*$L^`8lV4{aSr|^^J?|`uG7EN_*$)r5LXbU<`dY%5K%thClBMp
zLIN9rvFlG-Wy*u=U&@Sf16`cRc>kK|-^6}6GN(ZkNqn|ZZt2(p>-OEjoMzg(#eB2U
z#)$e2+)x%6%bw*-_Uvjapk~M~&=&X(`J{3KzT}WFa-P6(IM3M^fBG5#Fkbc?0V0R>
z<A<j-h-u}Y+o6eM+P|kDe(v`w`43^yZ6!3IMSv~loIwEZY)PkhVCm-sLKY>De!k}O
z8oiprj5jGBpfCg8m(IXwRsfBxSO7+5mu{D>ROd%njTvwbtpthzXohnRwM^z2Dw7IG
z%`#xU2v-Gncp~RkHjQvg1~4n6p2YMvOogPw>ozV$9kun%jZ`*D`%jRJ>kozJNY3q(
zf_U&h2+Ym>NmL>P7ROL{1cabh4a;hm{btFSB}D&UV0;Ka6)X5(11Y)>i1((K3?DXu
z&$<uImnp#l0xx`CFMci2(+I-;rYfwD{#KcnRl0z%LAHBFb}b=TjY5N%rj5m50*gdV
z%QEhQP%&Bb^yGau0kx}GMbC`=Pjx|6-wLA44ADincQEr<QHBBICaUCnSw#6P(ZJ<6
z#ugDgyYatTD&Y)Y1j-x4a%TIUe0JCL#HdY;He`qCJx|qOo9?}92(mQ(qWtJXJum^w
zg$2rB(aNo+%yVn%Zpl!6(%LN^DN!mULAx><PyUk@*G@B<jbp3SgyZ?ZQ;^H^NJa9w
zSiv@hITE2)Al+>aFgqtKOhQrX#vqXF=q=Wmxvgz;)lUxDh7nw6#g{goCdFX_UzYuC
z7(r}W0Yrwy`OQqPYN=08{~>#(GsQs4=^|(cc*DZu?M9k0>9UyrOJAkRv0@!rdzG+1
zTVCLpjtT>CH{6dN0NBR-gk^ac2>2CyRcFkfgIEqVv)cI-t?Z~mBsBf4Vu~<juan#H
zPo3a`IKkf<7f7veHLE(TF!G{X^G@$)g`>fM)Lo6ihh>_0gJ?wuD~MyxfZT7hth=n4
zS-ICQ8b>bmO`FQ5e}^EJ#t0n6^qVMKUJ3$7)EMnr+&eZ1I&#sbZB;`yuneW2xtGlE
zE;ad!Ncy$3svUNl#1GSLB^P9&eYSp%mczvU%kSMk-~C^L!~GaV`hB0S`B3%xN;lDM
z^`Y52Y0$NK6dJ_GAe2Wm9au4(;alwO8W5l2WpxTuBT_J6jHaGK362zaQ+rzBHDu>j
zj@{QAQD;GuLcTA3Gt)a>xDayiD#S<oefbu&HVX>4!Ew^@0ZwtH$%6;4(U4odxj;BA
zCE^#bbhHAwPI;DPrYZmB9>8p>ZhkUd2EC``isy#`p(0B6jkt6^AUO(&#$91ZyQF}}
zra#wc9HFR{AM*myP4ZJsh?JOj3R{4mz<7;7W%kt57Xk;%lquE;?fm{>$$7nrG4}7X
zY-C)^x3D)uxu@-)C?)qUs359;<jW6l1z-NtM9(RT-(MubFD0MyYNrNr90RHON}v&u
z2Z=Eqecg`$K=E3^Y8yTj{?)?_ZqC(ctDiwS@ts0*-}lqzR~ff!6O!yUI5!#c%y9<*
zSdzS#>rk*MMUr<9a++r_f#~ulHc$bXfHChM)$z{^O~sNfm(fHU8v`CMq;|k#ou3EW
zr^c>WNzb^{@?H^sYkEnPQ_H_4url%x(%Wa38~cxhu7^><ipUm!Q_ezl(Rh^AL!!|B
zxp=majBt$MmT)hy?+OD?X&dyXKzh!k5%&soBCB~6*!ZCT4^Rg@l4onF^+dRCLVMJp
zdZ%T{$PbMG=zh4-w~i5TUBCnj#$@yOaYYwSgzxQN#s6+#T7$M6-$t<PLPSD|DacbV
zN1o*feb7@WX(9Ami>j*Cj{AGUWQB(8NF54k6sb=S74r{yL5>dH73`Q2!`~AKX|fGo
z-^{`5STv#02>n1U${mZKOUkH)N6vB3^zsWH6kdMR2`s{e_YQT)D{eEe@V)rFG&G#@
zye%DI(DF@s?!t*AL^1LV$r&eQ3eva?y!djOx$-eyN3WAEzDDDWE3<Y;l{r<!iOvgR
zXJ31V<9XM00s)A1O64z(SleyzkYBhJN%3Gq3c4STzgXKv#v}giN-r!)C5&KBj}3qz
z4rF`g(kPP!nC0KJn0gM`Tc5_Shoq?C#1j|=`&5MR6h5~{FHeNr-<%<V>Qj%8Phg+?
zc7=Ro&aVJ>LW4QwM^ex!h|PE5Yyc&6kCyu+EHu0xES=1QZ?x$Nc=ATLpND&_czq?l
z=VDPJGY0%137~L*!Qh$Mzx)fVdkzYEX`)`Agd?hx-xy3DZz4Bok>b#)<a}cd41|Gg
zXY*!HjvBsmKG?>*M2uTs&O!TFc)s!Ix>F>we)^SpqcdE~C;hKhxsv;9t`GG}b$Dzj
zhJ9~kV%@y;d^tF-?44TlKW582-yb4b#v&8iHmH!d#E&yo*P~#s;qqQQkmsnSaEJn@
zWB+$)LMgKoZ3WZqV1Ri2>fdDO(yz88A~Kkam8$x^Jvx5E^!qIv0nKDPRVJf0QA%I*
zr)ux2TwNA>a`+ed86_+fdXzkkAVJeJ+|BS*%*E}%X)NG(ID!j}#uD*-W#ZB>j+M?N
zYlPTv8(}LEXi4Xt`G$OO6kSt=xet|YHj(<qWs^j4d>;6{r^q8lrdfIrEhO6efJB}m
z&lgvdXPSg4#Af3Ts)1I3U2p#6GGK^me&cbp<bBsvmNdBOt&UQ$RcHO>pEL}*ef*GK
ztEqoqfV{}mW(NH_^Y>5cA_MwB&)QuvVFw%kpg>v^)*0UY?RF;XUN<wBZf=h-sA1^a
z848fCp-a^(#Mji5_Z2@?F@jXir;}M=v`Q<{rr#L!oWUR>j~TNx3LzV}gC$$l@>2MR
zVBn9y3iymP@xG}OO?~W;ocg;R%d#y()8LQUA5xONtlo-*d6!x$Pw5_vvy)gZhi*M^
zj@L-@pARtRJOh4<bAaZAUfz4G_2jllrdGGFN@o9CmAS0?JLl(=T6JsT3Rjsq@7Ara
zfLWjNSZG<XX(`6AeE6nAq8|wo$!xRMtc|VFnbI$ep4+=xHTK?g)Lv0+c2Z_acf+@D
zUuG7)-5^c7RbdJ4V+a1(J52IcIN8C<qCdHZdt4iTnefmD?;0IVcv3Y{mMU95orB1`
z9;kcs(|AV9`e5?B3H(Jqst}F`%iXUQh*Sx<bUk_xk00p`^vaVAAISD(TT`_~aPw>E
z<Kn;pc-9gPG^oED$766Zn92aiRAG@)>)j-ACnlLR%7!2JXG*=l0=x<PvkeZ=Be2Ga
zxB37G!9@~_LDAkvOL)p_C0wfPinO#$t2VnJf$(qBh-r3_@F%b887d)KyA;a|Jp&p7
z0dLCM3XgD4ZH`V<&8z960t;vGE>lePsD8s`tJyVmMzuZ^X*T^%vrFQ#*!*=+p5s|~
zui;~R<ELbyR#$qW@+{_2-xb)=kMO%_68CTE^YQx1($eFm`MQ9)xGij#90#c_e^uWy
zq=x`(vQ0=FBTMf3gon61-z=i5w^oRurqGletruwt3XCZmcsBI{)kVU_J$kbhJlalj
z$*8PoOM30O|G_2lhIdEux@n#2G`!H*^EOS)!hSHg#3P!&9kyYMw{W~?8#DudM%u=<
zn$Em@nmi*&gDr007b%~<iCp(Q17^93N>_frJ8|hwOE?B8iiEmagks4#tOSJl==K%`
zy$t&FwqqAP9{|PiSsk@RdgVdP-+J;ua;rd4Py)!g4gRuBF;Us?X_#z@e7$54y7mbJ
zUtlE8FULXWZ~i%V658_hZ%#8*wR>@=sUrq7|Ng_W7;R|$N__Z5-=1SAt@25>3R6D1
zWK-hNm#~@fpxb@>uaR^~_U2vSimTZfKS_T3nb|joy}h9`vji{Ef-7Cr3>2s5(A9wG
zkgp)t`}_H8S{5NF3nR|wMx}wD4d*W$<$oX(f}wimh|Q+MQ3rp_v2Py@NK%v*(zW_m
zC3W^^-|Hsj<XQ+B+D_RNy~!N-jNRg1h#gX6$w!D%LTjpk?Rlp^Ckm(~AZ55NyG_m=
zqlh*35_cV9A0c*BO^(v{-pR_P8fH!0!qocS|GK{JwS^h${m>idcX%kbo^dn$vKgm#
zUYI9J*nWBrVxaR76{8kMWb*aDP8*5>YSq*wKi55+J98)b0gT=g)HNm;6F-TL_J#ro
zR~xkB65d|SxDrwhK067wANX8Nqyt`~x&~57x{o-&kG{Fb)w>Y~Do*!U;CeS7>YrXw
zs;XppduGpahrO&6I5wXZi_1eZhjk>I202m8)|6UU%oYeb_21*Z_|;0a?$9T1>r_<I
zK|~%9xjR{de(5wdsAfMt6?GTP&MfWG@qHyreSRTB(?^ORLDjh#LIO@$rw`Qwcsx)&
z5(v^m{K&BEXlr>+CMjHO;P6vCJmnA1hierl<REcH%bap05-@)US+k$(SU6Bf2cO2T
zZqnAEv@#<PlM8$Va+(HESdRs%B=dJ)Y6d$}*{NOAbMJi}-MBSj5WWA?asgb}Ni>gg
zilzEb>g|G7JfQybnEr|1Y*K`>DxMqp#Uv(UpLi2O5=JQ2*1m4h<Cqw>yeqD%@?xH@
z;5-!jwZo#NGQBa3O!~CFYl;PKk5v^rj0@V0JLu-u{8h-(*+z-?m;i>>)lC1G0A$xY
ze8z<hLT@UqxUzG09u=3}mEBIw@Brd0WPvuKOd7@S0{&{^m-Mbngt${f^DfIaIv8Pu
zap|s-bu`+1wyr2HRznv-j0_qwE>-tH*%t%v5g-B2;%ZwIWPaKTsG-Mf4s~apXZEha
z^Np9UE`veC#Ztjf=IkH)A<5Eqk*iP_D!(go{I@y;<1|Sr$NDTC9>vzzCS*Fue@Eq2
z57lEpzQmG}DEdtke+{Y-6sODWcalaq$k9i})46TvwKV=>jBH;}m~!F<LQQrs`$-PT
z`eAx1q{$;-1ON^33RZ=|AE1Yj1E52U1iK<h5pBYF^Qnd)EQf@*vvrJ#@aVZo36xxu
zUWad)KZ6Z3>)Ef>FqZw>8qzk3Bx=i%$kw{ny8`;kyPKpm<hGdB414r1TYMT(xeO2y
z_i)uDlN7KQ16u8h?>*}?%-0J;J-F15W<XE^mWGhmHx2ow^b?cob8&#A>;-^`1+`90
zY5TbsZ){q<8G^!gF6<#v$A$bJvhy&7;2j;OAd_DfPrbL$++0!==*DVw_T?kXhzj`#
zOR~z#!SnW}n8fUo!A{(6zlH@BG<0|Y&`I<XGNPLmGW!Hwwam_nYkh<S6C%6z9tpjD
zEgn*k*~V6+#xM2)X(s<SDacb(8_9{O=pg=KqKp)zd$SFJF;iq+i~~X7fswlu5v%f6
zP2*<5%8A10*~N1!&v!^#+zcyle|bfj{=naJ(+?$=P{S0x6@nC}uTcSjen@p20=@a8
z7uaWiS(DObIMW=?y3SKHT`(()%qV}pnt<`SnJQ*q%AIpr`*^l*Y*?rsU5wz#&NAOR
z@~sgCpC913fFZc(ryO`U_v{>n9f?u?3q=q4@=isF(LUev{83bj0choy;~^)LD&nOP
zjo6@V5b(VK@(}qB2)=@*=!@HTf1Zg(N%T|+`TeoIs)6ldk$Vm^CP?W^F#8rd;D@p-
z0CC1~q>+^oV0XtI_7WD(D%4A_91*QyI98>`z8i#G3_dnMuGB5|R%csjZ{f2s4#<6Z
zY{r}f>!}t9SfHcQ7fs%GA`jzt#b21HDm(~Me%pLTHvlVvut@^`3)Up$;ojD&IWnmT
zsjYB>k!7oSKC3RTCTMg%RQhMVH#UE_qM+84!cc{^g*432q)S)b0ye#1xl%&Vk43P%
zi$Vh*r`$mU?n=fGAlF5pP8D>8p4E~^Kvzbcq!2?Q(&*GLK8{tX)PWiCbwr%gL|;B^
z&m<w<^hT8WWI4pxJ)lKBQk!QwLq+W60~EY1W&ymu9l*vhQLl+Ix3Hif3>YwA9|6LR
z!&!c_Rt~-4zbd;ur9Y=^2eF+rppw$NlP>(pVFP|WmQeR_SCGqJ99h7v8EX!Ur8W4e
zv=bzM5E|ukbDpz+J=oWcC2Urcj>qN~W<8=Cp%{sE=~&5EV2PQk%6u!+B7ujXCh(5P
zU7)w8AW{%aWC;DYbRO}VV?N)T%SPDNN9p43!i{&My@cU04h`~u^^ktp``i0DOhG9Z
zQU3h{>VGd4XAoQ?TjjA2j_Xwk4;#ENtNyd7c~rW_d#P_fSf$c9o-rZcnA!B|9`5sN
z>AdQ7m(I+O$owE}0dF$p%`GWsk>sQ&xUj)-capVw1kn8BP>d0!aT1IB)aamnLkd&x
zR-Jvpf5>FIxorcgKvkUKThmdM+{0WvF?~8&T4HC&$71P1)lN?(Yn^S3{S0W@4H9L<
zN82*9#+5(pCjqDza7#}}0IHx&q17*OOz$OR6L~=(AdC?8u`yC2R!xqg{ZsW%$5T*)
z&oy_Yb@&F7_RSYvdy^_ET>CCh<mcSnN<-AkhxgZX#m;@nf)oZgWv2IcehDskp-~}D
z1~VFH4>4pFBf*Iurz~!PQd8X=v<=5x+@~eeHJT7#aS`)m%>xP@s8a1uQZw0;uXQ8i
ztH2Zel7G4(eoO6y+!c#v%6-p37RG0}{!B5^PYtD_J#!FlwGM4!dRSapC}T#{h4D)S
zA+=ZVImF7p6>r&;q1Z+VvetNT7oxx;$A|Vq%Y4>N_-fG;=xaCR(p{cJ70%k-`5|;L
zRiID*LnY1jFHl;q@K*xFu26J0(?I*Nbi4@=!}9R7GA|XIgX}H1KxR>}i;}5qP49E`
zFH~w70lD(Z{;IQaTjE;};!{j8Ea0=KTr~#B%uEG{eZWi2UN0ha%@Imvf=>-E+t&dl
z^em*6eHHM^sMCW4{etcFfz?jf(X|8$98Z2zeip~4I-nh4a-?bC-63s4BL?X`#u#xZ
zA77Rqa)|mBk*AyZhWAuxl83+0wr!Au`1(-u#AvzKPvps+x%hkLA;|`@j)Puz2xSM$
z#2HUoqZ@!elvBT&r!|1j-?bTk=%iawc#t=DxFmQUz_khSo4?B>MmQEs_~{YC;w|9h
z;Y9bD@o3>I6Pgmn(N~`pUc@R$^bwA;ES9nR-RrDGztl-7n*GMx*Rb!b`3<+474$O=
zQ#YP=6uBt#Kb3+q;_xt@gjWka8L?=v`32NZWqKEtfbu;%wc*bJWBtuS-es||vhmwp
zueV*Rcs4tsGiJ9K;9lITwd<Ho<hdFa*tCmvTPkjb8O!uh)F&m6345wbVg8w`aT29G
z6d<)QNs*aO(m*@FkLQokG?RTxS%9NP3^b<*ivuZON>m|mhEJufFDmS_b-v=@lJDN(
zLRKP^ZRx2T4W>SxL1s{-$C^+mW{dcvS=c0R3QaV~HS$~gaW*c~WYt9vqz;xo5?&1R
zRc*AJI(RIk+b273lwZGrb^>V21ARY&26>}bshz|R-9Olis^z3j41y+G+{LYB8r?7h
z?7rpgs23=anUfBuO`U}otCz8-Si_W3|8VC-^&@XzUgArs{geV^`aY)~x>o)43AS0!
z!Jy##Z-nH+7whA+@6_8R*4X|ot@;uJB~twC;7{M$Tk>$%4)Y+U&Y%Qmtn_SK@pWAs
zcsi@Ub2;DN&9P_uH~NE8sTM%Q?zZ|iAn20<SxOWB6$bk2<Xv|^_k-K3;^V<2`{IL!
zGVC;l|HU#}7e=v&L-eJ3^q;r-3x@<dTSwZfQE*Rgxb96AEo9K5_`TXwgX8VhfB4Om
zXlto-TAaqnvKHxsvWppauuoE480o`Ol{#gEW~O{+bbkUCUsWx19-@zojgP+9HnOG|
zI8zF=2%Rer28B3TgWNNbK98r6=|9Z&2Pblr_K;bf^&5h}EtE^Vr~!_;H3aX^7wfxq
zjcv0cx>NisUU`?~iR63;7i?}p^pE=n3X6OTbmyHM88fYMDsjav*FM>^K)_$+Y({2;
zchYCR7GMT#Lv$TGa-s77Q(-ln914L|%Hk^8g!JIodHfRCsJc2E;o1=1FQhD}4zj7e
z|GIQd(P8j-6c!BFPe<O#50Ou3sSx6UU70Ot0BFmEwN`3SKk?=i6s;G3f=`fkE(@Sw
z_OOQB?^(Y<V}ODZ5ba^kp*}_^2z@Myn#U<*l#zIrkYXw_aTGx_LFoEZoKX@S_-4%}
zJcl#`ypNSf9QmQ-w%2uq^(Eh7g}8}2WT-s<$+XGRdJ(xuc;LR`ijl(babGg?MYV@2
zFq|KmdLY{#*7{C*a+g!q%Kdn%YFllnim95qPk5L|pfQ8@j^uzQWt!i!2UQ|;Sj1{*
z<op_)Np!8HdbF}?N?}XPqR18=ihk!2bHE)w^tr)pXaJ{4ceF<F8}APZQFbOCpr&zM
z{=c`D8W_zrK#Ti-(W1{$N?wW4S)oZFjD@;zCCL>{Y(sM8U#(FE(@u#No25?k#gWu{
ze%^iK=J44!iXLfVm1c|YBMvC=Aw6NpwGPzBIG37i0cMncL3_=^E7L^gT;FE->^?X@
zChmuPJ_npyAxl}fktQ(&Fd#*UJ_-Df0QMLN?h81WB|#O5;HicByQr0HYi$+Ufsd5s
z6KpgsS*6JWDU2dx^`*lkF$YAOaSK3BXWFXH?DuCeZ`;s){c@A@3HjpBOQhJ-UB)?b
zhRU~!?*&g;WITuU7x;&-caFb$zXMbazt#a9HZvA()luUmW(yXHlPW<G4GS0NPRVbx
zyeAKC*Cs?1v@lP$u{POIXb*c5>3Qd4E}sa#@Km9Hb@$2}!$x0z&qsP=FjAn*F!%0=
zv4GQMpIsr)y_GpHCrGMdwmZB{a_;sm8yMI$(f_B*-;9N&5k&XEFZDMeYpS3k-|`@6
za3t!afShDy);J3ZYV=-ItGLJn+n2mdjY*SQdubi}zT7{^;;y*UxL5coS18GrdB1%(
z2S_b>?DPl+wSnHsur;3SSs=;;xK2RhvZpWYi=37k#>L(o%<$XROL&2zJwdz;xTXV<
zyZN#e%}RBapqpf-OT%B0s#w8^weo~N)iQ{<n4)YHtnx%6=-piXGwRFw6|RiJK*Acs
z4>@8w;x4sM-dbb`R#H*y%EP7d)rb*AueZl>q>|P6H^9*hRV?wOeFEOn^r-&rr)a>!
zzAxU4BMz^|VD1$^CMWmp2S=Ao^#`<71hgsK)H54{^1EqqYW{9<ucI!pP9BQZDcMVk
z`BJ%bOSWG6FbSbE=HV!HJ3;FG_Y?)M&R#coKpH(FXsUd~|M+dbO)M*pSFse7V8_2X
z-~nYImB#-5M<D{{VAoYYMy!V-QPW|ZXfg0v{Q!pQPO+?pg!<-3d3iYF{Jc+aEP};t
zX<W4EXo3fX>-bvxLA<&*u6-0;<)z<Qou)Jp51m$t2)`Ei`d_tgf^HHuNL?f*z<K2{
zaEIU%s-5Fht^MrWbnvVHeG>pG2h<HD94%j@!TF~<IkEW8JRelN+JD99_7DEB**qR_
zVRtk<%0(1&ODge(RMBqQlFw@!$PMsG(G0#KN>%7lkUlEP^kK4cl(5_?y7$MebU2)*
zOW{RfD|KKr%fpN<8GR8}7+Nt38MLz8qXm(i3I;V0de2YBV?<Ydy6*%Nm2Hws9sUL8
z>vE7g?V~jRnAHWD%PQ0HjbkM?J{a3f3B<J}ZcrR|!t(<`KFHC|U14=QON-^EkEs@`
z>q&4I6tcL#vjp0q^qzfK!ff)pOs9t@#MV72*{P<#3m(}U;~JKY)7P;|eB1K6h!3G|
zhok5Rk&nN*SuJ5?H>GodeEg4lG+9o8AX`cxy5T3~-ScNQu9zgW#k|dInj#u{20tk<
zBc1f=Q_>(SXP7pKrU;#u3hd+3daziz_r)y`a?s0-WH>*$0z?3K0H*hs0A|Ssd{>>C
zV9~_C?1MIumu5I9R2_ZD>ePYHsOL(}$w5$aA+j0E$8aR=V$!Na``_alnXD=qOlY8_
z?lle&HEm!*4(9W&RfnEzYb|<*6tV8e9ZS>t@S#dch=lF9fC|eH^|V-m+7OBxy~Iyv
zdzE0-Y2m3QbE;n(JJWFqQTgvlwIs!2<1cT#d9>A6wu0=|Mpp}$<SVZR+i>~?52*+*
zXN}{je&h_KRAM9k?r56O%xw`wSOcHym7Blk<R?09$QPccp7n2#@2R`17%BlHsBesJ
zE9W1cPebB3#RffRnHCSO{Oy|frj=~*$_Y2maT#AY;ap6(-+{AiD7H3rDT7m#6<!Mr
zakaNINTpnMi?kMa=@B-8O07x<^z4o)>@PcVbhUtF#$y_$Qn`WwiNQHr*NA8_RlafL
zKjzAMG(fY3J%Y^Lox}PMwKb$zyG?(lyKKTtSz+B8saGlUBygZ#h5Xy~ASay`n#~q|
zCGo<G(VsUnITD9n&L!A#Mt2m&6CamJ^23rKGCol+kfZ!hd_=1IT;Hq?kv>{U*}-$D
zj}<fW+OeA|=vwCGd-8P*S0JDu0CGc>i@PR^`ON|qP<imtej{i9$Po@?Zj)z<Ln9}L
zTh~(Wfc~`baB`l%pDlQfR@>T=Ym5hNk)J37#nVtt?Y&<mBDbLz$eS3Tw4EYZM2VRw
zc&-7X)aTV(V(jTM{lL9jP4vMU+KbWvzV<P^cj6a*O~zc)vo#&Ybak#-V)$KCR<;H&
z!dA2jU7y9ifknz);fD(2e^v-M`tm92<-m<IqrG|aZRRl`aeI}~aNX5+5(jmqb<g}5
zu_ufAS=LVtBG(M9I;h^HRTOShUr`|ehkp`<_mv^zyU;m^^?ytUdNBv}8m4$iztLSq
zWu$a~xWBs(rg^C#7xH%Im{+Xr$ZTep6pV=~Tc0r=y|s{C)PPgpuwFU{`2c;9#`Fj=
zw6NMYoZl1kTz*#jl}mzVi=DopjN+sWO38FqQOUx-#Vqxf&@gQc+rLd_m9@YOhzz0O
zK_H<><edrB;!a-)8`fgGbU9g|G8@NP<Na`RYR4b>2@Uqx2ITDQT&|M#s&b1O^kFn(
z^JNKyF0q_tJ}i$&0=SZl2~XX9@YXQ0x_?$|ii>6Zt>f22#=W$ku#;c>JgY3!yi`Mt
zuhDmgZ&huSj5W-s{#|7--l24VcoSn7)uc8}`d2z>v{EODCeydV)OY3j&K`Vs0~3LE
zN`72-22mN)Tw5G++bgjdPuFEq6^FxNKk-FE;#5oZ2-o+F^B4Do)sAiRn}=437Ug&L
z2?3^!7O=bdh25^+GvFYFU(!A|B6uFteMD97$B6|WIOZa~=27tfcZaY0VsU#949eOO
zWn0+U&C6E#VLI&*^wZ!K<3krrudJpmrDv(hf8ljWWNrff*plitNvHlzQg7+u8e9*<
zKTjY=@YJn`lO@l-#RFrl|NmR&cAHquqKd(JE-<%ZOx0M3U0rOHZAuJ+nP0T3Vd(my
zaQoRMp&Dh@{H#shCZiyd=i>O)De}8I7F~<kD=dI3edW6K-p=KCMJ$>CR0e8arGCka
zg|wx`LE0I0@HqpkN;8WbGsxZKN2{uxjvsvA#02no7SvfJCCKNZ#mVA$xVvXDfl0_h
z_N3%_zZb-Rog*T1+zr{AkmfZ{DarcQds%HO3U3G-fYnVU8Hv5v4GpOKZr{=Jx(srv
z5L%%|Yay7jv0q3iQPO$$dt@GR=S}$A!mEz|Y2qBud4SDO6L0HU=+=^;jg!WGnly0W
zU*7CP-N^m!;25?|VhHU*vb!c#q%|0-R()4}8P=9xlF3wU05nk^ql&d|+7A2loU5!%
z(^@CXl@kam1kU_s7fNF#3$RkJtc=$#%6hsu?tS^{?}W3>K}ly;iv`gJz7k=oB)htt
z40rZT4?bZAlt@72(Z}@B3~L*SNUf+}Nqy7A&I)2h0lFD-a81i;{jWr(Pj4ATSpIx|
z?QGjqKs{YEc$Qvj@=TuH@T<c*-x{(bonlae@;A0b5h(hs$msu&bQTOzwObUPp}V_5
zx<OhR0Yyrb?vj#{&KX(>=|)13F6mCCJEf!>q??(!hwuJ`6L0Lj*Lqf~XT`iANOli^
zm;h2K=&u|g%HgYWJWet&w*aJ}AsztTHX{jiw3x8?w-Z7SbSMTgOHh<JklDY0eqtr5
zU`~XZl;Z!ioeiHmlSgT5sKJ`Or<{?rV3d|eQKha6I9}Ev{=>n0uFevjs2^+l;;OBM
z_I(d_N{}`R`Dmks>ZHB-@#4^3%NxBfspVxx&+>o89U8&u;6EnjMDg7bv@_Cxe1xaX
zyA2(3*B!eKH<bn-3WN%ao5sxZ%|TFJUp5+}Ode|;IKRNW|Bj-U0u;!fzDc5~WbJ&P
z)b2eBY7|R`Ww2reunhki^eY*>mx>X6<o=AR7S5uuG@RQ`1eS$)p74&p_w{|ff|P5T
zAJ9MGr^=?HN=lOZ9!-yTeQ;Fx=r8^v4WPI!D1fdm16bX;5}~!f_M()bVk2yms(K;U
z<fUk|)@SoWU`tx`Pm*hCJMT>zFNxBrsLM)r#5x{jb|v1-j;ZtmI}9<vt(5dWg>wCv
z9m|saW?h^q>U_?&txHT<HWESsD7l|#GNGOdlcH=2LV5uD!{=#4{m#`(t^&4EL-A<#
z-r{?xwezT^+l>xh9nw!Z<s4tR`j&hwg@bu;&&zuF6n>UNyfwF-n1*GaZ}=FI8o$AN
z;cAHp>F`zJdP#cE`4T%Xa$4794<>uL0972^`1dHT%*%-ne85|#Z(~n~=LEpN^S@2!
z<f$&<_^Ia+`nvoK_G4zX2IJoF_ben&5Vi|jic8#;%bQmjkosJ}?rZF2p8va`Nx}Sc
zef=u-wy*jo&yT+^7JJZ=$%f-N!%4ZHy^h?c<uLq#+vF8ZiH;tLwkg2yz+h~tq*Yzs
zuy#hT{M3G9-*8x!##~BGZ%C?va-RczkcxMoyIM}eVj6^5WH;r`x_l&MoUTf<Aq_~r
z+M)JvdH`Mz-23gKR)R_N!Y!QbMM^(Wb^+`-EmYYmj#K;pODw!<;PVaxqHL=LIGEt3
zPup0#VA)C>e!E63O4&&RAmJJafKeJTLX|B+IkxKa5D3s^9ctima6@MJqnB-gi?my+
zezh!X;J#~2$ul+o-BPT5iRGWdOJ(0SRc|vFp2f87nJJkRrGLs4#l~##@rjc94E$04
z`};h@BD>A&%)@N*N=Vzx8&;t0c6YXQ{rq+>L}UFf-}ve=YGV?sU4J6g8)Cc@YY9sg
zo-|eY5GH=u`Ku%!12*)jfKm*XXCjrBc5v{q$a6YE1W#XLHyK5&-#@`x%Yhx*7Ggy&
zlyQ_?`r$7)hV-`K-mg<PI8bKL!#Jr^4NzLtLi-UCX`@eXb_%gU()D`C8vM>qwiHuF
zcwG6gr~mr|6PP2T9)I8>xc$sVhVv6Z^G$IDjI+!Sw$Ju}*6>pT$d%}@`csDDVh0dQ
zss`HQtU{e`18;0Nf-2fKsM|upYk;Rk-0t@Vz-0xdpC(2f{s6a+8;GYL*9MaTFk7qN
z-Ji6OxC<l9#HC1#5`OA{363}aY=2PLVrzA}{rQu>*cVq=huy=~p>*^3`L->rxIPcG
zBD=jia8)YvBR)Bxt$LYkM8GlNdpFf}x;Olmu`;UP@I)#M=pDJc-k{^JUqfI%7B%6j
z&*ao5QlG!GnMZtFx6y?-Z;w-!Uz{=B_I?-Crlpl;ULoX~Hh+k=I|2uwxkDaJdN;`4
zL}co=zqp2ISip288$+F)E(W7mWAz<EG+MQyPwR5R(7v)rpUD{ZF3qO^o#?M**^rLl
zR>bzZt`+Oi5~e|&+9^IZQ__SkmAO(?78acGDV4m7AuZ|r+;U#{Gm}e-3k0GFae~;P
zHuaCMS*ujfmTb=W8^ZC77s8i1OiHZ`z<m7FHTtH7v0<sa#s+SrAmMjcNT2zky2{=_
zTdw1$;<Nu~uP1<WL-%WJy<YTd%q~pcDbKew1^CXXw+Q!QnkxMDk@8*-ng&Ndz5d0X
zgL6EQFJA=PmjWNkgs_WHdp-2H#`Ss+Dx?E41uP}J6eYVyI|uw*`#!vPSTEh-+e8!o
zM-4n1fLPhzJ>7hLJ05<iME^6;l5`oEOnyX^jO19|RaGLEbZ*gqSIgJl4oiA;tUu)5
zU=-JVlwm2fN7V3N8b9w`OYXe*m(}Yt+uB?HlR5-+AZ#D)m|f*Q&P%crkUAvDRJ;sY
zpkBLE#u%42Pymij1Mf|TH&38`6>aF_g5fSRlxeBbEdY9S^$tTcK3^{KtD)^(-K78{
zm=h<Nn@1?0#tm*N3CbOiIS*;imjb8_ky$93gYVhR|C2>OwJ|#|n~^|<aBF(Dav<5r
z)ihlQSh!=f$2?tM_zq&Y&Jg<)f10Q-s%HuNUMe7e6VbZ<haDP1+3~XNnz*n3xSrK*
z`QyuI0;N3Uc0t*zZq~T*=jDMi8Fxq0wN%!x?Jd7>%?;oloVt($woe*L0IssWxeCKv
zhDAkq(3k2V4pdCbVS3zm366nftoIHAh=nT%cxm`}gRmO4$#(F7C{|3f(xBVGQkHF4
ztmdyoAr-ddXdtEFk5yB3vz#B}U+w<Q%?Qk{Eyj|kCG)3v4lf=U7O-o)6i<)`Pp#U$
z{q&sMY?t-rQ$rJD<MIXD+Og@2cuBu&Ep4dqQKi>Ab*T0bb{wklF>2_ai;83!@(#zj
z#h2|}I2kYP;G{eVd?3p)q9)hkQgsO!jpZrF(z#-QRBA4PNSC5p$Y6dS@Y4Q;*iX&x
zke-qLe$wa|<g?u{uND9boiDFFyG1Xyhg0pmyhJ&S^bUVh@r*&mS04Ax*94mWZg=RA
zWtb|g&ItG!gy`P18gJp5E&SbOyr@)oc6e{(S4+YjkbM2z*zgPo8c)5qwp^xjZjGT}
zsamgCTKOi4_>WLy%OU?I$kYk9-2%W+)1bbpk%5oJN^(kVF-_Mg*4)|9y}?8f4d<-D
z8nuo(ud!@h;W~s8cqQ`VA4eN$IgRr2Z4`Xf;J<&;qn$9?A5u+eoKb-?KWq0fWSFkr
zW(;7YxPGHmzPOrhuF>X)je5(Z>SslV$0EE@Z4in3C!U;14Nq>$J5va8Ho^+uWLclo
zcO*|bOr5zu=jf4dNICJLB>xA>Vvp3;J9MY>?D3@IfRY2?I@P?uyjy^#1&(nAk}j<%
zc(g%tx|K$ZK+qu;NF)saeKsCUFO{IeR#1S<P;lLvDJN)#I&m0;#kwvdOms}<(AspV
z=nBaL%8U5%uO+M;)DMPZ_<DOI@MiO2@M2NbPX<yR7M#x`G;IxU?Z`4cNk=*st2_?S
zY0JxKf5XTNC^Tw$xuaO1y_3|zsqbWIUfly7q*Q{0qu4MAIHk#VT_*#y8j4WlSWm5a
zoFA`YeQ=lskqKXPT6NKf$+C<zPsHw7q1TbT65BL4q`J&zj1KR1?~!1i_b*zG+k|<C
z+{k{T_d`<K^wLVrwT~;3o6|(%ljU~L>I}-}U0bKG2r_x9{VZ+`JAJTHion-RoU$q?
zubSm$YaW4i_d~5zQR4=7UFQrpy{N?|A=-<8P9SrKT)Ze;DwP>(2jY!4NdSGPc(V|d
z0a!>xc0uf)Qm#Q5xLr?qhv5_W47%Xce#H<LfNq_bBjuN3X^_z8FcthGB8Ur|#tB+W
zFU?7WisfrBM@S?E3N1_5DScIPn=`#gxR&FbO85AY!)r8PWGV?g3#E)oKag3@c63j>
z2K+C>Se`eC$rCur9N#bFIsu1-<W9_Ei7%^yESUzpjVgVJ9r)@Hj3=QJq=F+McO{aj
z(L3Bh{b4V&F?hNg=UdiE5Y|Y*6^2i?!JsVa?k2;d8OgO~`nUp@NkxL!(LQLFIz}c6
zwP7oI8ggj{ej)SO9|Zhiy&EV!+p7|&O==t@W?om{bx@0E!Che<Oy0)UJ%2LzjQb)L
zYhx_ocVg!VEmSi>-@;WROtqdoV5l9^{zLrZJuu=917wVB0K>GWGlLh5lnDfGR1s!X
zBy`?|rwBri`g%m4ug(~uX*zw{2FKB%Er?(yEKe`6q(4Vrug`mCi%v1tAE4~G`z6oD
zHbSISZ6zRF)zY}@aq&gt6ms9(5^GV{h*;h)z#vN!cyDvF-VZtV`7yU}Wzvx6L@pqW
z`N%yY$|%zhvvGhl+}4^63aOtXala9hjZ`p=Rsf3xe{!g3{m>9VAJwaux7Jnv>e2C=
z?xfYX@>5NfKP~t-`V;Gw`6kGme9$Zsg0+3+?&v2ChG7x8%ev@a)gR#Lh-)ogs16<f
z^s{^%@7J%;U_LGO$?2OYC8evta{IlcKLn~d%1^x8_qW5jF6;<|`C?V=0MUl-Qbq{V
zr_d_ODYdd*ko4^uaz%ywF&M~bLG?dflOw$@%9NqaEJtXQ%v46KSQ$`@8x1m^65(Iu
z_026#hM3g|cXupB44*zm^~<RW;2CR9_vKQL(E$P~Q)!o)&dz(-3zuDBpdGEo%Y(T&
z(R8pIFj{ZQJ;1R~-%tS2zcrmnfG{0l7u%6?zH1ghJIPpRx=m%)=Bt%<oSYY^Sh<V!
z3$dRG!Mixmpsf1GfzU>i+;xlVT$nzoTL1kmcm3(_C_(IFid{G?af~YyJ<cB{;5@mq
zQvQ85^nVb??r$8M(iWEHwC-}<c>+^vOTWpU;3oe0@t673NAB%5JKqoSC1UHobdmN~
z^?*Jq0ptW40rDVG0p@u*idoym*o(`vetNzR_*K<~My1DVz@}aFW#BzI*<5f*gxmu{
z=oxBr04%n9!)$KajVz*iDti9Y=Z=VPQ6B>7U0Nc-UT2(RS&lVFHmFHlHod%D9OD8k
z{{G#r@J7~k#J<h=wDVv^(sqx%I-ennpZ}MY)TsaqnCZ+^;PAnD>~BMc<lK;*K&NuK
zFq1ZQ>pgzg<W+>k-c~e``B!miBiy6Zf;{vFTKa+nqqijBw)Twcv(?m5_E@+#{upA@
z`oFmf#Ta6x(X9RgCmUwVCpBzHGg~|09wCKHruuc>B1lGdT};ALFOc8^j#YmYLW5`F
zFugZN@$HDuBt_!K&@2p>gmgWVp4WjYe`kOTmqjyHk;Va_bw5!sa|=c9LL=xl4g7=w
z!bTibG6BUx<j3=vZ}!I0{<r)d5&`#hRy|wHP!Ynl0hf<-HOSdwUbosSO0Hkm+IfQz
zxt1-Xhy1>GV)OH;qstv5O^lD|NT$A$505@Wmx!Ya^4vFJB8M(fv*f7=D@OGtm0P|A
zm|FuP;i&4dksA{U)#3`#JjqS@iPvXMIvb78dYvyaTG#j?Xy~hML1Tn;#MH=ptFH4_
z_K#$(0J_ao>Hl-hp2k%aNz_uBH3>{aI+{&k<*&UC>A&Rodz-rwbdR0-pS|UtGfQTK
z1zdw}FQ+y30fk&W42ZNo^!OqwyEY3<&v7-4bPp8Hflz@B=EqFxEMk2OUXwNq;MYJ0
zCc-QrYxZNJM9(I4&v&Wgx}$=oD*~b(;=e;;>0cFejKsFi0n0bYGUS0?DmxUB(-WoK
z>VwYV`&r(wg!$f(L{0$v>h%P9D&*@O-)ndZ3t_G|+AI2re>)bJ^vpdEisqHUU3pRj
z)f|GKly9fHQmZ9YA9m`K{kRK~p6R0fk@NO{{-3e)$?%{Xct|p#L-r^xkQ<6pcM4L$
zLJIdrX~)v+j~T5O?*NSW)uB?;Pl+P0-GNY+Ca)6{M=?r`cNq2W-&@{AwwJ$QfzH=f
zR*5~S+Qgg``}3;(h6>s();s>a0}>Tmdkm0)&#Os??HRF--``i)x3zn)8(~U=Cxv(;
z9V=X!VDjx%ySjI95hV;cZ`sY%@Ovd-9)HuT0dz&+5fBg2-*B5D&Eog<V+`-T5Q?sw
z8gp>&qgrTA7$kX{zE+C?Qr&_mGFK*43fwLH`=0NMZ^rR_-cL$ee<`6$7rYPdryz_W
zWQGWjg3b=k<0e0B^Iblj|H_W147`)9@<EVpB9WOQ;6_kE`K;fp{F~Y=Ey-E7#$R5B
zDp`T{=y_)>J|gz_w0Nfu>oZO^wNwHSI6^Jz>^)H__Mdapq@Qo`%oIX_c&g9TSgRpf
z-=^qysZww}ZPH1Q_ius7$EndVIuO=As|X`VfrRY+IbN5!9N*m%JuN-`XiiRls@?*_
z&%tqJLYF21mX7E1A=;s_N)FTlI|b#z{Hxb-5r8m}kB<8k;HLf!d$9^JE5{MuF(nC|
zu()2}<aR|w5g{>H!%`I2=IKECN8>sWLSFqZYb)EPk#zsw_=@IU@G0ac1x4BN`ian5
zz<Zl;X-3#L!2x)|gCVj!LQE%okmu|yNJZnd5cJc>X@-NR?a~m@O_qhYrXECwh9QKl
z6lJf_*w;Zg$AopJl}WDb+7}GPbf)MVdZ!5H@G(G0!sI9SM$26X&8W!*{O@mj=2xyw
zJ2j*M^SPlziSm!sOi$3zlb3{#s}Kateb)^SG-vyB32VOEiigahwlFgz5n=5%Ig`Bf
zln4O~uSWJl0*0t!<mIp36zvP64A`3CPW}DmX<o53wmhS0*L&XdxV!s9v?*5^M>ELX
zOF59$?2+P*Z_?;gS+z3CuaOj2Jb>w>Tf3oQYzv;`-U$WZr+*8G(CHEX9D!cCa@c|a
z^?0sMVPO19SK?ZSb{L|rU5~^Zg5B`@%G7><92HDfCZJF*`}%Be06K`S=q5ydAeQgU
z-ct%Y-jx#_g9qPyzJ5LOF~7{sDZ}>u?oQdU-=TK%GLJGJNaZO>^P?|8+drA1@Zz{g
z!>^6#--JkE;=Pa?f9k3~U-c(e53uwjy43|jPTt5vurS#HbLOEvgM}3qzj}ehmReL?
zbfhP4at^Jy<ozSUYOCDF18`BhGqSRPTrr^RNECpgT=={Ft8))i6M@t7&zu1IH)<`%
zNu~1L;O$nT#*}e$V`D`5Z;MezKT!3DC}QK&(cv#3ad_C0t7LS?9cB8<O9ICC2&ac>
zm0ZkH3sw8zvN#$N&JEVY1`Lzj<Z#WKZKZ6z+vJk#1HF8U0#(($9p~epmK4Y0vu-3k
znz8j|x%1zvbPeBWRO5d#SFGP@De5~(c%a@(KcXm6JdFbWSW>3PPVbu3>}f@&w|m*n
zgJ`P>Jv;!Lg7CK9u*tuX8DiaLp{Rbgxq%s$ajhqiO7c#xLM3u63Hfy%hGTth$zocD
z{^B8NR`#`pwGXwi!UeA#SAS(rVC|J^RdwH{1JsE|s?$MrtFN2UG=r3EN~Y1g3g^Qm
z*!SLPj2v9&=d86;{RwIuX3D+Rjc`UKho8BdW?`@j6Nsrao!+bl=Pdi-T)KRdM{?0R
z*p37x4gAW`TUfgNIBaP-OuF$^plw2ethxyH{PKYP0+DzH8Q88-9bZjAhQ<^EXZ|xz
z1vVWZrdNxyFo_Uj?2eR>4pfDCgv+1}LIG??Dyu&VS=hkL0E4P0u*I$!s|(N#e5QxK
z#efEOh3QWk$nfd-1?es7b<YX7hcRG2%cu8On^}a!xFYo?1yQx;8BEjuBnWRHeD<jT
zeI(77PE8Yc`tx2T?p)b+eO*Rz{d43iLyZc|igpuU8OZ!8__S-?=pT@>e#^49&i3MT
z5<Q2NMfQhBhCWy1(ECN__zJ{eM&|(KW8#mTFxg)z$DK_un<yFV&&_{K<1fq1O6}D0
z@i7cs9GBSN2-K<d@t>fapI|<S*dh!TCBAFi2dt&{+T+SAKbvh;@Wow4K4@o-no_kN
zzdTsg4D(_?NuQ*AC-q@9PcU4Xb>86ls4wl6u@MQb{$PmqS#LHSaLRPkD?Xj9|5pQ!
zP7{Z8@ZNz`=s=v9V2%jn@<=beRQ#?lKe7wcJgqHhY#?p7kYjKcBPfwDNvg2^D(9m3
zM-0%Uu?Ce*LASZ!I!d}2AH#v>1ZdJgK#Vfis>tdl5b$4I9n4;4*6Tl|hiQXUGTm>0
zYlk05RRy&5hXUr(7=gp7z)Z$w+e%vYhfcn?SgcQ-PKoPfBBMb8tCJ2MkE$PT9*cU0
z>+G&MrP7PkS-wTLQ%>-wilv4Rw?|Z?2E(BlZNV?-hs(b=2YLun5D!f@=u$u-l)!-7
z6f5u?xYbv|*YJQ2Mh*91^vWf@P2dhK_h}I#5>>_~aJ&aX_SmRW(&_l8@0bXY^Bl2D
z1kobWBtkV{Rb{w*%iyj*oke&&EVQC^Kc^d85oY;@7zU|OB+izSI<_nMQNJU(<$X0#
zGlaLQv!lB7@_J=sytBb!p1l#3|0GeRHqS6Wdlhbcem{_vcaEc}>aT^A<Y(EORUYoe
z_<xK8@ExYS)JfMNsD5>s%Looi@8WQ<^+`qFyXV}-1FtT7CIb4O=WAEeDZ^KNKoetQ
zUk4-llBXy>3VoCQMF;vW1dOg90g=oz#Oe-EM1GK^jp)Gw;UUL`k{a~{cm76x>TT*t
z{3yzC4(tYmVECY#3=yHWj1QxdgcBp_HXDzP1PPpJ^5dZ*NJ#eC_2z&4S2Gy(1#;w@
zK-~rr17pGltv0`utTQE_m4<n&Hdr$JB;d>YjnW(F``AF=dUfBh<E&X<X0{ygmT>lo
z#nWGexY~8ueYhFsD^$}LOWq1hH?UEZCEC#zlyN;9pMzJVTvmCb6?{C4VzF9x=~fVx
zD3?#BYTaJB+%2bp1%AuvQs7b$#?ATxE7-L=VG?IO$^0i}jb7$<XcvCM%Jf1;{)VSt
z`7lM`@&)?|btnGS6##CruOM1oZ5Qq0bh)T!KNjEulR<?qH}~-|vIB^+|0_nRO$~dR
z6G-^kz(zl|pm$z>3rYCjqj)Ee+kHi^kgQ@dH^|dN^ltVehC7X}q8r8Wsr*1L7MB3M
z=TuldZ^}Xl0Qhjf3jkt&t_rjPO8HaCjuuBmV!;2+$t1~}bPRmDUnD|%3S{cOdz4AV
z+RDrUaPLQ8(3g34`TS=@7%FN-*SboWxpvsySc5UHA?nB&>bJ9&joyVH+H)*7l;j&&
zCF=u>6V`EBJu=<8vd^Xu)0T80_-U5GDXPB+NW4~HdMFNV{{I$duedBKfrVoWY%a|q
z&tYx=_!u#k!Z5|HL$*dfTmQlbDd`X5^^ryjqT)B^ACsC{Nb84(aRG#3i5eLiKrNv2
z_^H>_oJ)7{AjEi1_fm+#mWaBU-k6d@tv#VmFl+3fYX1jauV4%uCOoI|-a0sYd=xdr
z?X+$sC$U5l>gX}SgBnq1Y`8$MvtJWvI+Hf@%g6>FRFHyhvCG>14|e7Os0U0<vS2%z
zF87&i_Uy9f7|;zyr9+hsJvmJuy<dz0>T2k-$kFnI0LP8bd@Qm1dDxQ*j9D1CmPYTd
z=qZ4Jre}sENG#7czT{O~`KSj@o8VKutLY@$A@EyKv5O7_w4O>HZg+SYL9Y*g$gXAE
zn@6f*mUb)Xk|WKrxc+IdU-yjx)cn!k_IF#TH@<())<@t-=KUO@x;!j|J)B?*(nj>#
zp61Aq56c=T1{rZ^Xez6=IOSIv18?<r(B`A~U&P=sA6oiVEvaZx$S^NrSdrrdJ<8x8
z4<?MA1zvMSFBjUOsLe2Q78*M7cp;;OWE&NS(6CL^t)%Ksxz>&IRhs1DwWdoSp0*}z
zxuURlv8{>^V)%NoE2G7EdvWSEdqaS9D?H@qBqaa#Ce7^%iy&a7#p}<a;AQmh@7u^-
zfKL~m@9`cDXjwCKTpu@@xjJcWRCu}hbOB>zeZ>4>0m-XzglcgDoaeG6-!HtPU{2@w
z#ld4|63FTmst^!I0N}dgDxVB+LJSe_PC^)psR34eW>DRO3|mQBo$jxHoaOaeEZlKn
zh?yV^Hb?Ry5lGaF5o+f)bG<f+L8$%<9HnS&*fG7%6@{I}EM}d7o4!yaw30q%UXaNk
z`E*iN3{6b4S5!jT4K`8#Y!71zBOH`aj~8U2ZYXuSD1cBpsx0Z`?J3V(ed69k_R9pO
zgd%ohy<#xflavzx+}Q79(~axjSCBU<k31SyH-PtZ8&;#+C335WU^Mm`@6-zr;<+3e
zTqO6EGu^A2pspi6DpO@?$J5kdhr;Hw>vKN9GCZ+%^}_6s6h$BWIea@!UtGO(M>ytV
z=XSr4&#{42&QW&>_Gc7gx2IMU7OS4Q;}h%@KsCru_e_<gpN4V4iAYZ0`d<Ipor3z6
zNOai)*;#2q%=n-@H-|T^l2UxF+Qe`CzH&XKM{l44hus49Vk1CkOi2YWXx7EAQv@BE
zG0g^1U2d_7_5exUsOI6@AvF$dP}W@Jzvbpcr}vde{C2HcOW(fH67lF!MGqTFFujf4
zN1xIcZcF~IUP{STgEB}=`m9<@Bd(O8LYo9xcf$N5p0JXZGc{}J;gnGB=dS|HRM`=9
zrfgOMvL5O)W2>=w5yB&-Cas_3N+HN7z5YYqdV!4>E^a8a*eHSjnjXmU*OfbO-syF^
z^ia#bUenPGZ9W>~Nm*vvQW!q!<bFu}F57_Zs9FgRYsj-m0A2gQ74@!F=@-19g%8;~
z^NB7x-*^F2(CdHiF-omzl*!nYMO#%ceenRZHq)c-2cj-^JuVp9&DSoI63!ErW-``C
zIgUVFg~J|p$%>nk#7ZeKR$60(hoAH7H-q4A73kGC+3D>=-hU41|5tb@e+r4Ps@3yA
zZN|jNmGd#uqVjg$XG(ljQg(h{#Sv9qHL(<O;waE<diA8SH6sv>{3s)6zS4M;22?WQ
z%K=U+{#U2=Ip5+-P%lu)WCpoQsn8+;n?kgP3`{8CjnObL)zivMxFZtB3*R*VBg|^o
zH`KeqBfjIRjqk8ghK6rsXGg)lV3S4$yUm^26EDQTwkb+iAN6O#S#;R3?;_FJ_<rv*
zVOC{yG1Vnlu{Rc`WNB#Lo(WzOF8!5A<>p9XDx8x=oXaxf!(%r0#<zlB`UW`t?oR=~
zf&V>!$XgYUp^K_CG9#;ux*2P@3K`_}B_IUa_jCQu`k1H*wRS~Rsl0N{OHE>|I(@`m
z+5d|jNk-|55vfFZZ+(-B8@Ht{E9H2Fh1SbVlzwJ@sZMBSDRk&Vff}X_J@TTJXem$Z
zJ!taZt8fbJJV{(UK#BO=$E7f?yiHGhQbr=BH!i-atS9-V<JMLSj^IyvIspNTs3ZV|
zeTN23A~vf}-D6`z9l-<>2fccbbQG+6Km7(~IH|pl><(P>Fp<p`B8c;HqywmcZ_xc^
zN>PXC$&J7PKDaFxwq=vLXuzOBd-8Q1u$D7|+D0+C3aLd{s)0t5%Bbz20nar^46r1Y
z1`Ph=D@WyG1nqYd<Y~#&G3<QrakXuvo^VQ5$Dp%YQEh9txRpenzTyhS&Ey!40Sqdd
zjyIIk#4k}qrR%5XaqLJtX8qG$kT7B#<_A<uZ4;QnOJf*6yi^!YXZv@bb@0dcSHM7$
z-a5anKEvr^d?E~n@?@U_C7q@kJ&@lwJCGT^-bzt<_<;fu?}L<KAg2F*JTu*igKiyE
z>{T&imA^1KyUVHV8~)js6_n?-$dp*vUmi9?_eSZpCdTn+Ctd6pLK%mv?+n7{VUGqe
z*d_XhdV^+d^shJwp(7HZKC_YYgtaRiv6F)B#S|Yuoft^j=wPvL2mAHgvbY*QOXegR
zW|4%sLEHZ`=89(p?xJMf&svk--)I4`19{{Qe>9reg^H$7Yb2ZV$~@wx&b)!qol2I3
zM<g+r$PcvK36Tj?xrpE@a>WzI1E+0qZcQB=85>K2wv2+IPERr+v{T%Y+IOGAA#(;l
zv7?;&MMeBTl0X{|mV%RZ1nvUP0EfYDbum(;DOTMGyl1bN^gh^jF?+Zb2P~F4?Y!Mw
zyppZLoViE}NIq@0X%RHEeegu7snpofLF&kSGiO5CIQi$R`HZ~0_E_4qvYYWoE$*pz
zOTx1bY6O?u1m5jH1uyfs=Dap|Y?F(+W0VlfNt|>IPAPy)s6414$v8Wcv&aq|0no4%
z7M$Em0}IcK6lQ1&hu@dooZO<j`sVr_Tl0=7;qCT<T|q2s;cR~gXKua;d;Czwnbre_
z*h)N=(}vc`tVI4KlWzODNY9aVt@<!*bXb=$*Lw%XtH1mqEzXxkX^F3$cvS2L&L7re
zwEyoV0Zz<Bmkq%bP3X#F`{J8O=sno*p4ga4?^!hY{gVP98HIr5F+C>lXlVR3Nao?_
zxs?LP3D8Ia*t_YG3ZN#_?Ftq~-Twjza5d>R+>vXLpfdi6>OC?)=TsJdibN3`y7*(S
z?q#7=hk}?z)3lKI73_9q5_QrEXk(qyesjhpkbWEuHfJxG^XbMM%}o<)=k145p{bAO
z2}8efT=n?$8)3<zP!Xl-J1o44tD5hlMQ*KX(b-oRvIz4$6)7?-Vm<PuqY{Xe>p=?2
z!3G=<Ov!gk0p-gTQncMPMdRtP-{<3)k2P-)I)uv&(`OYx?n4;rRwQU&K&M%SGx3sh
zTt?oRHX|S34XJAN8cF0T#Qm?{H-Z<r74{ID1ur+;)z=3Sw}Cq?;B<jkotq$uMW^eM
zRrm^8_I?;Z?EN?(Zz;X>KyjlVk!kamNRgFg@?B^IL`WaU^cOZT2D5+l8ob)ad{h()
zr6H0nV|uiqN|r>9td@D7i={T(CQR!F?*v>m=c3p*MpWM#`HY(4zW$uA3)Oh=x6gXi
zFdjDnrk_+#I_dJT3={}(^-DaBJu3WkyeK?|F8frnJ8=^sxtwh%?)O@Q4mW}AJ@Lx4
zDXWj5B;rq#FOr@pL%+9s^jCcsYG-wa`QImdn;8tY;hKi~^{4-Kc{jg)){rv&bi5*h
zqzAY`7i?9b*#hn`(6ad6#k50;IOYy1i5m_p6xVQ%7Z;5K8^wh9U%;gpCL(J;=Pl~o
z)@0(6l?JH168^fFZxE)prvaz~Og{`mzFhTh^zk?$n)LL#HBULmA$c*hJfB7~7j!OS
zxMA|t5h;0u#G<+16v9eAFp=WUA08I{S}(NABT`GhG0AJae(a_xTcW!UC|JS+d<q?(
zMskXDRc5aL(SGP<{bYj4fje+RD<$iF9`gU>jo+A|!<`;jwp@mf3)p7HJz!wxsQ0ZL
zY^N4Fo)+3}x<JH9eG@;r0|>Tuu><l?X59V-Jx<N!8T()lb$HP{wYSGdIkRU;)S5$J
zs<{oIEnbph{T4K1skjuDg}erES%p!cE0(MQ>-k^Q#1fTlFGiYeO1;x+7+zIYd<(DQ
z{p0Az1Wox;&KoPVXS(R+xO~R=d~eKg><4|)PP}GXWxCtMYRH{OdD%XG$m*2x@60)m
zqoP6WmkU|CcdfS<rPD;C#K(Y$#rE;?;by%E;wj_UNI=I=_T}glzvLzLE;ue4@y0sO
zI@R$EN$;Qyr0=1*OKp&1dUZ$7Z(Z}D_hU(5hR7IJ4~A5X$ArVN#kO39Vb1tk6Jgzz
z(LPOa&Z;rrW+DE><i~)l$Gfi$_!9rp*J>0Za?&-0IzK>Y2V#CCF}=o&s|mN%;{3UN
zdZ)~#mu1jhf^fkEZl!xrE@Y$I2j^}Z#OymWo0{1cA7|J1e==0O7wVkb3-@6`eF`53
zs_+_Oi4lCwV=a#ualHjVY#xwksR78W0$~!=dWoV?yhgehLNUpwh{j^-YA4%6Y0H*$
zVh3gx-mz?{%zCsRC}_@PEM0XF*emSvjQv9+Ka7t^_|*@%$aVD33&x>GMQ-^w^dO36
zwSO;SkI?5KInus(NB9kv_cR6(BH_}VlXoF2tA;b>g1OTvoZsy&d{+p1#g$QnfF*>L
zb#zjNm|bChOnme87gDR<9Qk&h0>?#!xQO90{P+DhlEHmaz#ulXztkL=XA`2VE8+u3
z$5M4horjG`B4@<%Kli*lwp_MWo!LgdT7*gFyG@^bZg${rF}ZK*6!~hdB-Qbv2pHkG
zLr%X64rQj-Q?d-MtrS;9u!~kRe^&L{W?F@e!)eTJ)Y|}q@PF2o>-p6IM$n__7XZ@@
z;vz+w@I%Gf-xJtI%-7w&$9uEkh6d1H|D);`1^q%^8%pLXDZ&Qtk*o#%rw0l*h!3vU
zGD34{@j2^@q28xxMQE&g-<@w8(Yv?k>y{;ZgP2|LK(1lwLY8TC6gtBsY9`s+ab=SR
zFSZlzWomA`)I4dGgBkV$omjPhJAaqk<c+8ePwxKc75%}mq3<}txK=p7!ZBvy<4M`2
z*Pq8{4h_!68zV`jel4@_W{pv=Ud=vd5?D!yRD2c8{t8Jp7(^t?8fW(5uV27d<xsyS
z<zLO|vaGMRMqLD5$CdrPe_`IZ+?<}eyfoq`7`6H0pS+etvMQKi`))7jdt1rwL|0Mq
zssW`33+{o0U2?VYoLbDGPRoY5ziQco$E>jMkn`+no<hVkF#5y8N^A$t1AGPU{z?+O
z{i@?AJZ6wIGe=1=NB2g&YgOq!<{vzZOg`IP9+XEMDC9ZtdJ-~-P4HOiJ*e#1k-*2a
zrG0*Bc<$`VXWg+#RGCFcLVZ=YXyYRy^jBfi7t7#6ISrl4dXmw)?!JxXbK{P}B^ps?
z16DDru*vt!S_=YX#q+o3wKE9hWyBfH7_8QnrelUJ7>`z+xa|xAdnfkCplg1fd=lF%
zOEAuI0XN=;yH_&U>?`gKs*rb|#1LY=*zNODwm1eu4pNu%lmK*KsbB%w%*`#)5~PO@
zHL}8ods$3Kfxe=%hmN2@W2Nsuujqb8FGB%uS%Pe8^(o3Xz+H2^U;On`#m=zxb9pe!
zak|!xnX$nm@b7$`ERqcas$nKj&xH!X;d?51Czb)NNmj9fuQj}B#hd(nHYk5;aO#^Z
z@^KOyIigcMl1^T{Ih<JkvVQihy?PaL^N&vN#_?`;cCOAsra&8^zxcqX{5Z-yVd_iX
zh&(d&6C&X2$8yA<#SBYrE7$a`r+HWk*16mFskgVtS*V$&SJw_zE-w0&j>nPDnA`FP
zQ$)=HFGz&<U(408D$Km@^V_1X+6_cP#bpn>tH9k4naz|03O}ak!Mk#F)T4>@q@Fv(
z?i>C#3=VcP9E^cWx>yWVS8y@miDmL~%ZGEBR>bgY5Dm)dWWMrxh6j^4DnQ8W3!8#|
z%PL4A)BX+1(LvKWa<SS&gIWgDhSH$KK^A`eE?n=H#IqOG>KAH5<iz%LO;RbDU&)YQ
zIIiPkF^*y=kP;@T@CsG39oS;NnNxgE^d6kt<9AW6-KmCub#b5<e}2MVXVMU_v)qf-
zHc;oPy}(mA<fzY*)i`uGk=IL%4E&R$ozdO!2^e`05s!VBbXS45Io3hy@<YU9SznM9
zgQlV*d*QH+A|hqA{?^3qWiIKqR=$y+>Dr2*tg4f>?fOjc)P%v@yw(w;jPuz#_odc~
zGSBt{zY(qpFJ6$^ceHrY?NrBjo6$RdcPBh~@vp7h?QcbzmP3#{_gogCAxQ8OcxGyl
z$RBKIEQ(P<=OmB}Z-Zp@?7Oy^q2y0+kuhA<Fq;L#49Ft8+fI#_0v!dj^JScGaQMCL
z1Q<2uZ<9knHQ?NRxykjCTx#IglQ08Zk4)MIYu%546Dgd~50K%DqcX%r(}d-@eCB_T
zNih4|mc3o5#GOs8(YR5MQ1_KC)#=N>Y8-1i7+E0Biu`2g8%Hp9E`gKotNcWm<{BqV
zOVEbPU#G{&H`=U$Db88>@P((z+|%njx!RRN47am*dtJP;9mx#ex|T_kns?oXJVU$d
zy?1{04F&qkE^(dVteKIL$ll}{XOEuR2X9#B$K#OX1i|Uh;m)L|hL(o@m$IU~YU&Qd
zGac?J+xYDQ)MGdcG+8OG=A)nUL)4V$tZDF0o$aB6nu1`&XPZdseAkcv0@P3U$X76c
z&uE8?hjqJyIQ-$l)a5UPG?K_D+YUBczr{~E|1@|q_X@5&fCX>b`_cwWH#fq-9ammq
z-`e2;k@1>#&}7FdfmA&_zubjBy*==@tkpA+a_JTWbTd-gxZPK{4Pa7oRHJsB^-<Fx
zGZfHQnDRozuqGxlqIg#<@L1~252*xGUFFN-bt+uYI$l8>nRA(O;~!9M2d)44a4;~=
z4Eb>|l+IY6+6wf%Z$R0P9OXh8(&Dd@%_a~2VV=^h`4(f>(=dU{bJmlke+yH>5LnT+
zE}KPO3U!FwsNul=0%?oIS|Cf{xyN1X!{eRvGuq?+{>QUyM0$rtq#g#2Z9Ys{no>6+
zhXqCrH#nb>XY>eH8D7#}8}?tUeO@+;lq8u-f?m9X(nz&+Ah7*DF~v@q<?Mdx90e5m
z9VY8BH#It|v{1Q&1-LpY_{MGQ7&uz=sALG1R5}$X%oa7{`nhFAkOp3pw00Wx{Jr|(
zIt^^*yJYZ7NGD+k5-W1ovJdhv9<VDH$a-!z4n{Dq0iWL->t{Ox$B`)r^GnF=CW59o
zKv-?4&_-Qlq1IKzYdKL64f4laTr?5TQ)2ddo?Zn?f_Bs&WHXwh_4Ff%xfcZ>q^o(9
zul^jCKpg(+yQIF8#T(*aMfT5GZJ(leF7o4lUGCwjM)$r_X4;bo8cJcDR2fXwP)v?&
zMHTX};Z;)mXBXNO_hJ7`&XAQND2A$s%ZE|-p`R|^nvy|cXD{vJ?7xf%j|Si$wqJXN
z&<%8<bwX^&6&%YxPqt#_!ds_g{UNP%pE>}~e0xU^*-q=+qvW5~4pe=+^J(JQ)O|K3
zjJhKu^TRN>w{b(wT1?1a1!2X9qS>wN_`-;>UY3Km%u{jjN~R@8lFjTNG19cm-cXU&
zXBNV^K@Ds@o&CGcIm1z&xLt_{@e$miPODl2-Ie9=kGZB-?QYbTrv{0AZDVZ61^KM~
z0F<46x7vG#EetrNs9iL>QhA+Nts(IiNG>$c=Amxh0nQLyr_u3LltVTI@+SsaU|VfO
z9iU&$dEteZI&9qp=&B@Ka6r<i48OIbP*qX2j!u6~Xn@7gs;9EA&-Pw}Ht9Q__7N+S
zHt)CXpe<V<r2G|_mm-)nI%-4E!Ucyd9i;~=WCfFXEwq%Tn!TlaN9^kC3l{Yj^H&3c
z=Yr-qI#z#KS>?V=44z>>i{Z#l3gh_w`>%~;o)G!>8RzVGoNv#_8jZ|@@6{r|CbTus
zir{V+>Y|~Js1jfJYy=TzbP#13WoqR(&?&MgE~~FD)GUvUOSq?@eZ7QSnFfL%meAL;
zN&Pk~lV8dys}erkrM_eSZK|sy8Z0WSx8EOAaFqYAGvyha*+jOu7lS(9bAMr~lW;<?
zvqrYW8>j5`J+`8p(q#0lf{5NbveP8K$jQ7@!D>mJIHod?!rNHVmxEUP<<iO}DS<sk
z+2>ZzG*z^|Wqs@yvwMEwI*-1#3kr#Tklvy0|HMDog8W(c<<4|k43ZAR4<f#AVc;ln
z4liw0;(p#Lf^%=0Ly5F*Sf_qhGh!TV4SNDuChw3;SHJb<XPP1(*7Udq2RZlon}Os1
z+LUjXG5(YkW;xIBNJIp+=l9xyfaT%A4qy8FmlY4GslHy(x&4Z)9d^CcZI+|1h27Oc
z>ZcclF1yi;LYqz1g5D*-pkJ~48q}r{)(AR<6+cGoN~ZZ})_O!;xO7DU-B`Rp!q3nU
z5bJyzH&^5jHB_FNc_nG$m(XafZ?WCMs*mcW!#!sH2_!~bTkLos4_RjMG3H@!bpthJ
zC%g2NeJDRE7rUX{mm2=C;LMrCWDmpUO;Eam%CqlXl~qISJ-H{(I<CcSA$r@ZpJaU4
z2D`lyshD3q;6C06+<J`{;?XWvyKM>>-R=2m!Y$uh>&p%LT?$P4N;=tnScZwZn|7ky
zN4<r^=AewUm}v`2J5;L_Ft(1IcO*y|g`-2_OjLi-hH3?=im}|)9sVwen2EINaB<u5
z4mxrg2A6_t@G?(DxxMVB^qhI#rVVb@Il5jXjqnwYC&ez^Yf+L7j0mUQj|3_cwpnLe
zX%KV93?%iVE;#J7*(G}$L;Kkp)HXmkk*U8>D6}N878Siu>35#K)-?DAazzk`kXa6@
zYP|8dl4(NpGW|kZ%kLr&DuNCbn>&EEd=zz;fE}*iog|n2OpeX>YF76Q4ttPOZt*zO
zc{MQ9$({Q|<6}o0p)z3U-hjlU-I2S_a%cXy&=e=BgLARTc|%S0&vx5+HW~0AjJ;Q}
zfve;=y-Z;FGyRv+D~5@533q&RA5nPB$v7%7dRnkh7w+hiN}$qiT>FLQHJ_}=XOFXU
zK7~ooX_Sud&XRI5#4n<7nUik&J6#S{S5{hu+b-yS{r3$QBRxGi2SdoXMF1(CXK9n4
zqFem=-(L^q6JNvlOUjf=`V|RMC2#`2ee>AU?A7h`2X(@9O?xby<C6-y+3Vtx3itZ>
zdJ^6hOl2cne&tXhPhBi1y+V)Ez<^q*R+p^<-|Izoa<X~F$eiVYtf7)!)~mZ;xnI^u
z-P;SP$gAyQYAU64Fu$C$Zi0PzhWPDsa(#41txH&q8EernhMc=cUUz^gm(G{;pNB|s
z61nLNl<Dofc!7)}7V;%n7$ZL2PtEjmt`jZ>8FEQ%|Ndhcm=10Mrj>B_?qEbIgtfNv
zBkma}@W^*w$?Cv+|3adTyyPz2h1Qs`H)T!6c9b2M5yJvd8WesX83%%{c~c0jzVgQJ
zDZr%lT`r7VN4QHuG4-&+%oAw>7u1iO6f_WWo`bl6&?#*J()ivt4QB0t#?qYOqJvC&
z^HW;^2};~81`cCzJcB$G2M^Z*^(EgCMOiAL0aBbbPQeV1d^D+6w0A<XW3TL%4@srw
z3JK3M7qAMEbi*>Y&Ih~%#V)Pe;SuX}9m4`vQk}t{okkJ*F+&pV)-UMF#gy)2E#Glw
z=_~Xm)KFKDYS!bi9a5pn6LWqtqx7pS$D@@%X@XC;y1*bUxOF2~1XST^q!R<8`fu?g
zrN`V!Jt(HKA%9Q!#-~!C)l#61_9<p7uWRQhxb+W{q%nJzh^|q>9+}!WG&9{B#f8xC
z9p#T7<TOzf`(xQJ+!oyIlWlc1HikQ3;w<SR-u}iHvh?4(!ERCwwkpTkLE`n=(e1W7
zt)I4ECA*+Jiy+?OVDZzzP?;2a*+CEPrY@=#bUrHxH#dlx2i@(x##9oA|AQy29gaT?
zf@mcqW4nIZ1>VR6Vw#$C(32plf$S*un{I+QbySSbsAk`vET$qDB96uZ8j>M%8t}%`
zjVQkFC}69#-*4yQ6VPSZ*EW7f0fXn9Qgl#n1bv_$+3Y$;UOz~TmH+G%O0u1UzJI*b
zX!l4>C*@m!oXlegh#lGJ)_~Fh7fJHg2OCNORvM;=RAd!IY98yc<bYz~t=GRZf<4b9
zi)tIoP`YyCA~19S(iBRrvX(hBNDl@C%BiB?3N{W`iUbkPU<YLm`~%j6^)U8S5>l0|
z42Ob{BVkagzl@Tk<LaN?HD=#lJmbw4(prTGs-1k<tGiAZ(@{PN74>(|f4155p=zMo
z0df73{w>_Dq4t-zsiX)BkV}faoN>D{sa7`rz4O_-s!J;A99HaY$5Ig6-*COu`QlY7
z8=f7fK-po-$<!Py*<bc1=F(hcS5wp5G}cR&sH<duO)7YIoA*0@a6<lR1bhvIbM2by
znoh>r>a1}8Y33TYDWM;j=eZz^J+j_wwB|>WY*Xd;3o*Nh;YE1vowkAc3-3lvS8$f5
zZ@1El{oZLl-en{QX6I~_A}oYN#~ILy=mg2N=<vQEpl_SzRpA=^2U6?t6d>-?hJA<m
zN>pUoAr&GN&D;}&%+uZ`0J0>IeOwE{N4oo(!bu{UO!zqj^fiQ{znjL$z#D>1p+Mmb
zj-vq4kR}>i7Bl?Oj3vb>r^Jh1v)E!xUBw`=zzt1tL0v%F3w@FAZ@4};s1wF-gk{1)
ze11#uVOuAr`rnsd%wUiT;eY_Qd2Z!s9GS-eb}F;+`%|J`j31+sqHnQV<tN6*KZ^6P
zd{>CTqrCRSTWf?|Dc+LCX*&rZiEYcE9&aW?UUi0`!J@d}wF@{Fp;vPGj|y5f6iK^2
z?dQ}*kl+M-vPb<xK}w-#@7qXz>hKqx-bj+To#Cav7a0;MLqabOQ*kI57#uOOI@Lv%
zSFm@7g(;c__rF~%D?=p9Dcv(846(YWVy}%INe22*2tA`pNjNwUzA84Lx^ScpQ`yo~
z<y-s|)9bp1=mFP*!vE@JA83RqeQfxFVK*}Li}3FP+)J8SgtgGtPeQZLZlr#w{A#ri
zGI+!M8b6g~F+?)FgTp)$5YuqP+Sy18{&;^<aB(M4l&mKS@y3&@0{v&u>fM-^ypl93
zaPEJ#OKn@r*hyn8J9cpgDr#aRJW!g^`;48t#=pMUczD6G!-k-6t1GM`5WfJ1l?Dx8
zTT)7sf%kLOE~#;Oy=MFPL!n>amcJVG*pa2Ert4K!`$ck7rqrpOg>Yx3$m=ZN&mItv
zAvF_rys-?P*%?M-uWGAS6ZYaj9prYr4`fGCX1{;Q{S#&4F`HH*OYu*8E&9AtkUiha
z``DT8JQ*~BX~L~n2n9UQfJ&g|?lhr&W?N&<yg_hJXWohaA0*Idzq@Eb8WPT`2(0lo
z*A1BzuJ{%ceZ+r(BmR#q{{3C<FE;Db=M=)xn963_qzF$FlsJtxdy1E~7J)VL<<IRw
zb$2!gYjH(ai~bQMB2CV-S;W}L8gIPO^#)gmU5WdcnNfrKuf+-%0@8rfWu53B!W+-|
z(T99R!u?icJu0?wZI11x)*}t<lQ|Y=D<%nT+NFPgo#Fr!M$=uXc^Q6F75~V`2I3#U
zw9V%wxfY2TlHp63+Sy^H2NzF2qemW?FE`CAZ|VEgI^nOnS34j8eKp=0_YAavA)5n%
z`{cq%uo-YR3>%%!C{x!dx?C;TvX}2jKy)Miqe`G_Bm#NX;xNY6@Gi4270l=kVi-5L
zKMbng1SM9_&SeP1bz^p8l+|n69J=?|$hW$`Bk~{e1?FFz;XuGx&eD;#$*H{gzI-N@
zr_vWJ_U9``tgL++*>62xe2Bn1o#p%2dzG_{IUj$m9g5ZUo_OWv^&M>y^;F=#obH3@
zhzVlvp05n<Gb`>xz#y%A=^R!%hZV>&v5l#)t%+2uGn~IXf8F%yQRECEGR}O#vPege
zqT3oMh!(4ps&DyOGEER8t|V3mqqYNIYsml&GXh<tBZ#TMgPrwda@SIJ?Giz!zEsf}
zo`s2k)dDoAUqG1<zYkhPR(&yuWOCOznhUJqRqoU4x=aX!{GZ7JcpVH*L&sMT$-v6N
zkFGGMcscwg+9rMg(PVPoi8|*&p+x&Rq_DTFs)H)Q+0ij<>jbbze6qAeHu2qOxd){3
z`gizpAbnFn`WGr@-E!>meoqdhfuVOtAB4{FH4;K`)n*g6qUZFt4b!>f?!#vnN}?D`
zFPckpoQoU+g;3SybL#8XhH}5_LISJbHN`jY<S`A_tIp_hJwx6bGJF`e{z+tUVGyFE
znhD`L!w3Kdjk@Ks+xHFxlhobMb6r#^%F%-58F*F`$^S5QDkXLjbRE~qgPbtG1M7li
zZI(m_&T>ns@P~~f>GR}D4tvXcv@pnk-y=&4<QA6;F22#(hQG_Ja7l(?&8YqOa&4&q
z!O`L9Zn-`0i3m_#B>Q_#SZZPx@}8^|gte1?dNT4;fKo$JW*$u^xN>_v5%#5l`t@!l
zC(noFf+Nn+P%ZpI|3QbGZ65r)_PTGyaMm-ryrYv(!U%o_`+1pyhiYpU@1MVpIn$!e
zVyg%wTc$M7_9oUdLz`9+Xv@t+NIrgl(bM6-8F(aLdVl<b2GUa#XQ?7c_gLA2n7}lO
zK%qRKazwSL5Pi(pptw2}aJQucN_}Xf_QeI@PGnmOAi>kDsG<FeyaIBOkHZ1U6>Y;f
zas2%J#B#<LGDwwV(BYd>yqjkD@qF}BzRZRoep=Llzn~YBz7&iY-{n-Q<yBG4InV0~
z!t*uJDf@)?J8VPqK9LTYq~6snGjnav@~`5>#&y<5YoMB<=)$d*7(6pC^Qxm+Doj#c
zVUf6nAGrAOkB1DnWT<*W;GSf2>o~|&g^%!)j$l6N1tjAhbq4u~xL_94Fd6o{5YzSk
zf=hDc$1m}+*LM+?;LYsbY8f|`W{A^X=ageD6VGGIwQ6xY`NqR{Bj+HH;6+4|!(qI{
zQGocf-~Jve*Ct8E+!JTp+jXB}G^qjNb@d&_!M!0`SPlH1ni?O}RX9OSU)N8#c!Gy3
zd-IMGk49p!nz-HA!sSh2P(K3X;^I83ItLUciNW8fW7mh2A))ORA-~Y0-#*7G3KFr7
zqUub|x1D{1J9RDmML{8+vY!H!-v1I=s3Q4M#&pEFfCrS3=D`4`y<+r-UClFoOCk#f
z3Azw;0rfs#!8#9!=HoZZ$}7d%#Gnq;H428Mx3?0gNGQ}8{EF2$kur!H^e#XxMHmfw
z@#4jRA?VNV*nD%K!$3EKZSc`q^Vq`9*z@vB6Pp(E)~j4AZlC^;qaKy9d$x1SW+H_N
zGqkjum<H}Bo_VH|4@p0?VX3*3GLEO-azmL>il&O%kf_N&4KIeP0<j;AS%<WAOVX-8
zYDC~2;<aU*FVP2pO;@D`uynl>f0eq>7#LuxnShB-`O5ctbITr{OPxErD8qhhr^OK`
zq+}0u?`Ff3Z-Tq^lU=LUQY1}ZzP4`)NcqTW8GrcGf1xGOzPUtbW|0c^KaQ>{AgXQ)
zA3CMGyOEX<7`nSlx<g6{=@yXq=@5nv=?3YNR7wz#ZUm$oX72I6&f}TA*Iw(ZCAhhy
z1>xbh?!>M4Thd#ecT7H4fADU=ME!nbmUS{`P1Cx*l4c31%pEY<&3)%DiE{ml4aH3l
z2}m4AEYI%u3c>gdG4DGHkxabJOl0`$3*HPTI#<od!@O#u%MewaBsM-hfsg(C9QXbK
zCZFT|eozPoogFJjhgF`K>p;bSv6idBNH^ww>^ziaGHhGFiAw@o-IH1DV~Ow5fMNtf
z00ww{sBlBufR8=%n|}XB6QHwk5T9dLGypj3$pF=wec!&Jo}8Y>_!htC90HU^fM~%(
zBH#^j12*avea?e3AD=F%P*i36z<(1(Ww|><g9S219LOeuzrG)z>?ih%_LF@|USn&+
z8$ZtZkrr0i#bSsfPF!C7CJzwg{e?fy8MRsa<SU|jcK25@5LQffXkloipM#+9LsoR;
zZT@|otkJ8N&(%2%PN(of`<_%fKE+;WV<YWqKO1%e^EM4G6^W>uxbPo5{Bn_d*Un!N
zTU=tJVfsUP<dvmKX?7n*s!~Qs>i0(Lx3fiDTk^8;7OB$vs%Lx%LmoR4EOA&SLQt{D
zpjXaqQQgMvZxg9r%J>^8Q+-PxTqR%W6^seFn0f`?1{_wb<HDO=*4vFAJTwR5rKlYU
z>HA;kfqu59H8KgxPyc-&1G-BEU}UhOIuQF6E(Z><)^H^L$aRURY>WR;EUiKVcq%Ro
z{(u>Kv1q{6E<82jEg(W;$-tIENmC+jv>tHUog8}Gz}t1dm%EwC_(sM42JpK(vl^WU
zunu=o+jdoTW*g<EZ%`(Q68~c$AK1tm-X(_Y&nz2hS)lXdZNYBT_(Fn9k^uJI$JuCH
z;&j0*eI)wM0pbbh!341mbu5cEda~l!@4`Xj2n+a`7}rhaTX}h6J~`rwA;Qq8^7;!%
z>PJ{3H603UFyYR;_}M{tQVYgj1TXuA;2)^E3z_d}r0Ar;`RQlABfnd-J1(a8!j&DK
z`n(t(ds>uOStHqh*i{)iE2Cv$D&Ko}A}uDvNO9ez|Mx#P!!;>)88RMvWW=FQtW0gz
z=3&TYeHXCl|FaPTeKgLlCBcEhG>TQGu>&hI<9}@|mq&|i*F&;mqCIdDC`#=nmMt}_
z5wiE`1UwMltXh!XL$8Q{#DH({(0}-@Y!j<cfI@EZtPm#}!j8N?2ql`<Y_gr}i-GPd
z)IJQLY-ThfH3GP>RA&W{EnODC@eN6$K0RIn0NFIKwATwrBmW|EMQhVDRz#UB8$wnr
z#Nydex9t<F;gLmVh)=~whDJ*o?s`Te1f)j#7U&zZ(2Q;~^3E#2yiQqZ@ZmVrKg76h
z6LP&%h&=FqjkbSdM*zz<yuahwz2q6zk7W7uLdKAvz;fAPEYw*oR96*lL@ZyX!F5xz
zy~(EaD4N56a+%R^qjafOiA!p^-I3;rF#h2;ao?Zdpm0f^+YQ6{<f|5ct52$eR(PIc
z_&H80jz8c&xV*7WuISM`WeFp_42#e61Y++FJOVei9Yq&u1HND|{ulHl<*$mvGQ}U<
zgTF_A8Bb}Szl@&mB9>`o083wI<55#0`VivxZhSWc1JN<8P1YMv>Lzh*d6~hStK0vQ
zWo)aGtBeH~`1C$@xO>p^|4hXK=E$QCC@TR^al`Mp6aai{Jz!7%iX_{AASHC6n>@DW
zh+6SytpD-;YL@ZKqZ|D|YxmNHxB$QYXwu^<oi;|Tsi^g=!r1W|b2=3u>GgU&?~Br;
z%X#qMmk$(J$}u=Di1xcem@hrarHh$uh}vb+;8wCITnSgu{1;>0V6$!wuz2sR)5nFN
zOtC-D5jm4_a=+ojCHzz7p&bRi;EUG}W;_;}$aOZ@rd;eU6on0@BD8+E$f|A;L@n^w
z;N}S@e*R;)J|1UT6l@yJ^nE?+O_IE4^7#}E4=%)b09NxB87#0OeUHx}*E8=I*YhLr
zsG*XKjXgqNao7~+STtJ-7Dwos`3;wJ9qq?lUhrbIeiRK3P2Kcd1+$)|S`pCCtl8W_
znQ*AKB4)$vH~G2btX24ZX7`s;$Ay+Z6tBf!{2N~_(~09&4R%a!n$FfT(s(}VbfEyV
ze;`PNs3feB?YRpc5L<nf%a+KF!T73p{_wKr_t~1ET<K#ZRpQ*yBF5Cz7r_h?L<4Sf
zi_SHzWvemdl8qWNiUuTN?~H`&$Gy${HC!07Rv(#-2<omTPSp$y>~q<1q`*&rs!veJ
z&5>1}0}~2*;6{;Dwq80zCYWEb^vISY3JhvRchwiL8*+Q&qVHGo$Hu4l!e%JX`4>WN
z;d_7E0)<n*7_91t;k`E~ik>!ZKCJYM@GU3Onc=(s5Tk*u8LKA07#wO7kLPmV&2M+4
zeY^%3lCQC46A!>(ZN<dgbbrnthE2z%EPvtb`QOzvDjEV{l&Aym)Bequ$tvMMqXuZd
zKy*dkpbI@D>%%ilNJ89^H;}|A_|iG3E1iVkq1NX|6#5x4bUSp{4Y*nhI4yKVvh>aP
zH(h2<Fn`~_#5?y22zdP&u@7t<u%tc#ybXeBBmoptcGuA<z_N*Rm@Jf9NCaK~p2gsm
ziJAj3(P8<=#|r-a)UJ>mHQnbxqbBO!<EN@Ya+FPByB%BnW~uaCV}i!5FL9_kv+2R2
z2Fl|*#IvKg%#O>IWg!izmelGUN?v~2c2QAQ=?0~4JEC5_1l5y2movX|L?fnCcRSC!
z5ceo%7w6n9AC=IJmiveMpDH6SI>Q`r$=~!UE1=WdKB~6Ew^2Snh9rG!BK;cGAsbT+
z{1YwoJ`?!7mGr6FAOHk^sAq#aj*ZVGQ}4&fkhQ`Qa3a3dV1&8ZT|)~xM(4xd{|8@s
zhNcJI_}O<m{0w88udAG62Lc1Xrm@ffT@rLO-3x|O8pvM}HnQmCY+3=fGrrKkNEBM6
zb|Ws^T?;z1az<b<Vg|i0UqQY1(>Or|Kc3xRDMfnB!XG}bO)5?4uHIlW&vQ^vYqJy;
zZ!Ld=6!*O(QWg1$jgjxKFG7OLQfx+g!unXv(cJf}oWMjaR*iwIB1Ip{g#BU*Mr){m
zI?BnpCF5lH8sF<H8}88P<fI(y;e6kvDm?8b<E4<c&nuV9mX*}Glb7qpnz8zBkgI#E
zB|ID<pXqU7r@+1|?db1;G2dg}DdqVYeO1toDa|`52^>eNeM#$;avYcpHVsCup6&>*
zGc(Wrz`Oqsbnq^AccS$^&ScUbKv#b={hk?s$7b%F*hR$O)b%^Y1VV_Mj#ms&OM|Q*
zAf*?*2?)Hku$YlR3W|tp!1A96sR3sgs}huZDgdI^LAwTRIEv9=!<t1``^QTh1?`LF
zhV+Dk(FI?q{0CJA@DaI&CLpPishS*D*W#sBq$Q7;9qpzqxyL!;q?(P|l^XPn6DWNj
znQNDqiVumZb?Q*gv=bsMZDDKsU|-VuMIuVh5IX3*uIp)>@gHwyka_s6)tj}#!JP0n
zstA+X@OxpJOX0b0Skcq|k#v&oY<f14*4sA+&@Z8g3Ps^5+{?0;8H9K9J$N-6@0raj
z?4-8#_i%rUNX=(#fhivSSIs*joyw=Rl)L&lK7qdP{_|ApWfHd2IPsOxF}_eGCw<Hx
z7Y)Rmwr|^yj(73I7QDIMSM|HQM0s~GZAAeN#9>vCF8p*j@_@q-FGtmL<1a|{!32h&
zQym2D>G$XEf-v}yH#c%c!i<yP$BJ)84MLLi&rpT?5RC~pkQ=0he7IsuV?&rB_shKk
z*NYO@`eOk{T2z|q)<FFuWG_7eXCxBrhEU~B;ew-zmOMbKRzpji#U+e5);4>^L}a`)
z8Q%*}>KqeY5lE~r81@Ntap9MtwbQO6`8Zq0AE^TjD12~J*SF}=@Q<bNQ{Rms-c|4Z
z)uYB%h%XlOA-x!Q&&;&nCR6X$Wb8Rvs#bz<cEXs+IuTysuTo}?`2n9NTr5SeDgivW
zd}IV@<Kr!^7#k8PfjiK*NMy73W4t?VNcQsbP9GOO!c32USMJpK*v;UnKYX1=)$}>6
zRFbz$vR7Lm@wgn+KBE6}cKr>-^e>TxpnsKvC={+yGi!2PcI8}ADEMq_)%GJo-@i5Q
zrmeb@eu{=49`j$8M{)&v1S(5BYO`Xm*;PR(wU}BTDXn||Df}-9*G&_>UfMg+LZ(SU
z<gY?*EC>z}`%4v1<=f99GzpX10_S>2@4QXKuUUx6f^iB%9M~kDFChJa4qHTkJc!A~
z)eiNE{mhc2LK1-~V*>{Yp{Zfumqw`n%Lbn9g)jf6vi7=qV$M87Dc4mo-@j&y0|N$i
zM95ki2=#hvc(sum-FUeO<@LJs6=M8f*sIe%Ax?bD>C(j>-s493%Ku$XV!(q#x{l}=
zvw!mErZ~nIF(*XB9Hscb=21N9F%4R5Wn{&{*k{xC#xVlOUv76R#<kD*-=^Zd$q}Ai
z=VFITBU9Sv@Kk?D)mPFbE>D56-u<C_s@J#A9TBtAJ!enX_Evu(?#3-R@BV(Z=7WdV
z-*AQQg~jQuc%L5UmE4mT_YynzEf2MX`XV-Hca1=THgc$?K3u(}_&7E47yf>T;5NH!
z47r?ktU?nSB>(o`*CpF{h6N7MQUX_g>S|zg&C8BE*|=l(#7yewu3u2iilq&G_>w{#
z43b6Y@`{jqkI;$ftSYnLa2i6b0q^-tBCdE5(p9p95ZZ+zg;;dS4y3&H2f&J&=$Y_A
zzn#?mhz9xW?%yDV?0J~k&laHPht)S<WjHwaFL?!&W!Zws+HaAR5P$}88pe6cVACP2
zVIGnxsnz(#30Czqp57Oki^|c!izgMUWdU(#@hWmmCi+@ijoGpXV`Ibbv?gkEH-<~m
z1kTY-X(~Fj#cOI95rSc6@s+*@^<`2*r7yDHJY4pSQyr+?+J+axIe6FD%R~5o-DI!Q
z#*y5nxEtBMlfm=uwOgI`OB$w?>I^ZsC0GBU%bdTvvbW#trN3@btQ_~A!g*V<=4<2I
z)w^1!{1UIIH-F0QCA2o3e=p@ACGx@<D=!GP0XZ>iMhdFQjkWd3u$!ZXf71n$&voy`
z46W6^0`^zx%8#>pjgty?Wz2jExc4RGq$jk+tWqB@psmQG+RX!XXfNGIX#i`>&?{T!
zXHID5I>v)VKj+~5B?FxC;16Wvb->lZfAdhYekvfDRO_BhYN2$JFVw^eYz6}Vuh}xH
z-!Zoact1sux?+cdn=%CafV1l#KX4=h@3ayST#?a-3d2=h2J&@$4Dn^y9sOmPw{`R~
zQ|C;hli#SyD;0gCrC8u&4y-0fsi0qU=sFo!?c{>1k<sd(PINag3tV3_;s3O<YvQRX
z_<OM=d9I>3mh$?YQGnJihvGBoGTE^*@L@&goeRl!-|{Tv)mdM7Oln8%UYr?4N5@c^
z%Xdm0%_^>=w<fUk-LwIVBMFVJrsv;%uY)cP>ba^fc4iGQmnz}fbrZM#JiH<+1q1yU
zQwPxdE89Pf>+HR+UBx>u*i6`D@YE;E9X{f}@Nw{fpYDU&Bjf1D-T-97;4?H4@GOUY
z(!Ott|3iDAkeK#e;c#Ow>D&nN3W$_|9~zkiAcnpmAiFJ(`|k|nu-6)a@CvtF7A8!O
zAb2K<6d(&m)OLP*(;K{MHDC%DwWZ95gt81!=i~ir75Xs6S&|*tQKdqw4C|uzyXtnq
zuwdsy3bY`mymwQ`+5x~QvgQlKnkb8V((52~TK`immH3R5i`VZ;;~wpqwvIm(>xegl
z;z%_>h3{jI`>4`@ApN33p=+E|x=e|Y_rxu^3g7C->4%})_<rn}k3kvq4>|Qj90)56
z_!^i&U@@jOx|L87cTrM&kSA4}+_5Fvz@xS}fG+$8>kiGBkk+*>Mmb2pLf&YVGD4E;
zc@@}Jp}35SLwH4vVX)NJjSdUu-wWG4Y~XTjHl8SX@DJ{dWS8|8rBSJGDG)#`$o=iL
zp5C5%r21~XPH6e*GLQnPneX-ZlV-sW-Wh$-ue2j4g6Nl&=(r&>;HWB+dp10BJHQum
z45JM-e$T)j%c%SJ*OcPa+bm+Z@frFwp;L-B1xDToAZAk<<sb(?dzyf!IGG(_Cy4-J
zNF#vXNy{(6upt0%^$^@Tv^}gVuxq^<OP!DTz!%C2G?GW5v6xo+)h9;0YQM7R^84?$
zi|*9Ln2Ti86ChJS0z9!pclkT<P;mi8Zl^$-+%y5pYZk&rVy>nNw7m}W`h(es3HH-|
zBX~0t>!fLQ1;=oC{giILHSxF&S>1MzmA^5+LWF}3UOj;=7V4h7<S*0ulxyn>8z+yU
zIKE;hYZBfWBE68`;4F*ihaYxm4FoU%ST>>uv?%G(%0H%mF3*G*{G72`HpzYU^tr$}
z{(CmD={k-J6}^Sl@GqMOap6dd$*>x0<BD~8f8h+&m=%kg-$;w6splGIy^fmC#Fye$
zNc<_BkaEs|ulY!Je#)=QJ`sk?*0x>C>pt44D0+wxgc=(+iAu-d>6I=odw0yt8}>vU
z+o1YAO`uGgoA*J3+h}nYH(deFC*j}^ujL-6lH1Yt`m)8FBJWwrkcQ~+B&Haihd#mx
zHj-W-W^K(Vc`S2P+4C9i&X*U3Xo)AOz%Tah)8N}6M}PWBB0m)&)Q|6~KLQQpLbM@U
zC$se(9pqhr<nxuTxmKy?d7367-IBFitG2HQ>O0)fR0K7LfN1Hli1ZNuvmg83r5XmX
z6XVQ4%)Y;W-+II-djggnS?;;kd5a7K2~v$>Qs(K&``@fHk(<~GcGFoqTT#Zd#-$9*
z+Z&5?NMiB&p6HvSN+*%MJH4I{JeBY9y68CH|1Mx58AW|jvO;P({^NG}b)vemIe$yh
zmU9DAW|4l9xi+`oBfr=?5f?lsd*z*#`@F`)4J0xrii%nOYP4s?;&7hTJ((CvqptqA
zcNH~_3TW@{iYUxx($Fj)kya>IM{>qJ@{P#&rcaT4US4p@?0lHyJifu&EkCqy?*4vq
zyC{Qc_U6VKwnswA#d~&rSJhP-sRna(ZFE1mtN)fEp1+fO_aWaI_Um%|yO~;rsaq?=
zxNpk}>MvFeIX<E4OQ(IpZw<c2OM|c{4KzPc)5cvu4R5w@visTKGgx#K2#vnqlf{40
z(W=L1D~jyW=6`}ov_OOJp^oQL25>tpz&(Ow=XAA`v{hN)6ISsk!cTunu$U+EUwMWR
z&%FX5_r87{|A<B8CDeZye|tDu9oX39et(zT3wt;U<5)zJTCvRHT@-s|@uN*5+ti+c
zWLS<5`3E&CA^V@yNYYUM1<d@-Rr#8aktV%_DQ{G)Uhb#>gJ#7mS!>JsB{C#q?#F9J
zU$P2`R|UMkPnGtT=n0bDe^s%{6vWq;_)pGwTqe(<r*;2?n1s7QL<`mf*5W0g?#jP@
zCeE&nR&>}s=n%3}zS9abLlgR45QOHVQL&HzD2Nut@65N`Y{15B`1VcHPK!)UhV;${
zrThs03>NhIAcqM9PonQD3YHB55~Yj`7~AI(Bu1$&PWe^i?gw+@RDT|oK1`!Vfrv^^
z^s)!a?xb-@Tk#a!iyvh#i=17=*l6stdtY}*+krH~>uMp7<X%olJI-S<zLb*OCo3;B
zrobSj$6N1D*)LWizt`_W{@$*mYs2o_dx-##Q3XJ%u-mRgae%NeB-ep@2i`r5K#N2M
z{vG|geQyyg-#7`DL>Bqsr-y-(U*Bs*+8{aFz%ywFb5`UaED&5H{OOpEsmF(-m-aDT
z+;81~Kkv*e5fK|s6oy2uFkqCZ5Nes+FkqMe^TiCajXjCpZsv-+5<_@NLvT2uauE4V
z*g_S7Myt#Rds%wmKVA&BGwo(Rfr!XFOh@#E7J(*mFEUnzv}b;46ip^p*ZsLSUcLU&
zuD`ijBK!&9HJ+;1arj6<NoBaMT-`k+c8#qN@OIdQ?pc&Y_46ijdDwnP?+XcjoO+lE
z5#l2|R%_A?b}pB#8r_pbjQm2~0U_|oA~xz^iKj|Fr%1K{S{U)ja6>0gCumw(5Q@Yh
z>gTev8WHxnExzpAhWmAoD;cIa&aTus<{rr~wc8rwbj-kcvhWjb^440L@9<4;o^%=}
z2qmIpN9ZtP<Rc`1kTRqP1ANCQ1Fz#BJ)T*88NRbA>wFxJk<IttB59Ii&eF`clD7yk
z-gMZOLcb7>-w_@P6iCYw!b<=S>^9sJV4GmUT6!JcCQ4dffIk9kSYRaoFfG0JwXw1B
z2W?zbq`FReFzPsNMuiyGn7<%If@et7pwbP-lBp6R<JDR8Y7oruceEWyfzo4oZ3c93
zgL*jrVxPvJr5=_cQk31#wdVQ=%z=^N9e&I2BI8A6tr2NR1rOiZz0Hoo1+K88mhT4%
z86tXPCM~@lP5<_iK=^B<Q<M5`tv1t-h9o+8UJ%4FhIwB%5A>?B7<UclbjH!;kd@WT
zM*jFI_L&<`j1YxOa`K90KZi%BnrOADqwMN5-E9tnyhMrExG7EG#5sm(p5<9Vv@g>j
zbno`etjVCB8)6sUP^G#sF>zDKZUWtwdt>)OfP2{V=tXk<ggG71!#YhS)wSd5+3KCh
zZK%q*2PL>wsb}?|{bEPfs1+Pjo(GR1;vyt`9@78O)qB6O(5~{J3?VNVeCV@w$M_ZU
z!S^ZNv$YXFyO9r&>b64EKXZn2eZ3y(ebJri2>*b+f!BtFy}8PT@~qm$!?&{^1%oQy
zuYY@;T0C{1WCL`8rRE_K9Iep$e0wgb6U)DG!V67&;F&NGe*M>!B66NO&G#$wP_<h>
z(c>VpP2CXzSRhh8HL$X}`Xa!3QLK!1C_}+AJlaw_gnQDMKKX*tW6M)pa#xPnv(gfa
z9L-KIN{WfVi&?Hvn#GGmscV3jB~-_w?6^05F4;jLuCrzez0u!px}?7-F&tWOLaLiz
z{jMXO?w4V<HmCKMU&&s-SA5Zfgmq(@(7*D>8uQ{OZ+LMDe#9%Ps&Pn$@nK{$IDQ>w
zZdTzVYnf-Ojqb=0|Jh3a&1BlTkmTXdy3Q&|&&%+#Nm@ibJ(o_c7c2_(*Xw|~B)=2-
zIIi`ZkP~4RVF)>P{Y%3&6`Egd_JMkyG_y3@)qX@s%%rY+`lg|pw?mf@Ap=|t!BBEe
zp-w||nxKUQfLa4`77Sm1IzwH7-FL_Rj2;p|^A?1fsq#WZziEanq4SpP_pd_oFq0Ee
zkfj`WiBOC-Ov^(i;^{AWH&f2sM&U0ksg<S$A0GxKposq@TG0+SwnE1fa8}fR)NY>v
zKvV6*2@+T!f(SD5I9=BZrr>z7|0546(T{!U4yt|ci$uZ6pjiKC%(-b;*BEN7jc&(a
zU_C_~-ttiS8+Ylr!E(Yt9G9L|@HJLBH*H^sLM7m2$IFlC)n8Ep;Xjq0+oQr8zWBKO
z(Gpp_a?l6c__<$z*REXwapCz}^zS|5@*=9=8G5wK(Y^;B(R9hw+1+!N#BB_AI$I0b
zum-hMZA1q%UOP%K=cQs}HKWZ%A4P2KnkcTd{rBS3j`pPdLpodK^4miX6d4lB_vxy8
zg-`unNd0KU)sgxaZVP<c?;%U{`aSR5rY_4)b7!%JgP~Nnp=N}~e#tc*uzQRtcq??K
zI#9-JIPeM=8SVC=Yi{#u;BTHp`-7qI=J|r;LAB*4bK?Mw9$u#@STa$7#2<GX#$iM<
z!9|NRWTjp<JLaPFIVT|B=3#1S<97zUqfG}JNTUlyyggkX3Ap{MZNl_I{!P1wH0O4^
zvlk)&QC)9ywvS=`M`p2*Rq3N^^@njFGlj}yfhtqQKG4#@C>_~HrbdN}3p&y1Q!45S
z{u(6sGhWFsI1d^NZV-l%qL$)ZT(dKtH-DbEQTk@9aWnB(i&=JBoI@>au*GCdAl<Z$
z>9NaJNfM&(rma_jx7gr!I9@~E%CBBU6zVIKE-B>wA7`CxOcRRsBSAh<B>N!N(+63G
zGlYmj%e+G<$4-H3R{B$cv-;mx^(x)m<LqO}zl}K(wAbVznRp_nXz#A2Znz2zMD+@D
z9zzSH(5f2-HPL=f7T>beQKC7<H>KbE{C1bC?B)n1%xc(=2sjM*-Hzd*z^L2WI#Kxk
z>E42<i2lTsCPjFx9?}<~>TM*>0CEV;{f51Mcnr^{r<07&S!67)P^GXJW5(P<GFG0J
zRzn1i&!NhZ@ENkce)uZ~b~ZY>53%B|=+ek&iHy8MDjHbN5Znv+wsza5<$fSG9mM9;
zVGUK{k4ze3ag5t6_{*SUGQGK+-A3xXQ5#1q@UHB=+W%`0NIocBf_aR8rX5^}Pfa%g
zlE5iGV23EQrltn({36}Z9RD^GI7R^>azwH+OIzCHOm1Ju{w(`cf>cQo*01@vPbN%J
zqV8)kDj7*xB`;#fTZRRgi5Icm<BUi3lTcpjRR1j=p3LGP7ghGQHBbAbZYGy2!~0X6
z<F~rLX@iMB(dQhqcroE4j|@89425DvJ^CmP7P1z;wnp3|Wq7Nr>reKRmb)*AT({1V
z02|8WfPt0vbeC>i^Vb%VClf3WrEtt4y|-dfPQi+0K{YtVvEn*zL}oJ~nv<8BLWX6C
z^`A*wq~fy<e+b?_@O!+coJjh#(WN~83Of2!H>`@Mjx4%QCCa<;a-dr^h*K(N4u$IA
z^qb~IaEP`wA3ibebNxDPz1XrlIv?L`Ply4`s%+$GxER>J++Yr^P<&>ODAbjMPsKj<
zMoOE0i_d?`eOHhi+g%I>roLkql?nQpKI#WlWC~pu14%EyqJwCe4dCju8k`q&f4KgV
z@>;PqjSK)C!^xwWR(R#yKz2U%yvvx3ssS5kc5L^{{L1O9@r#!_2~R2qS_0hOs(;te
zKct~};g41jsjyw@nmQOOUuRX<bBv@hl`%;xbERnU_Q-skduO!Y^nUn3P4ePR@!K~T
zq}5sO8s&CNh$fS5vN1x)tBhmt7a0gc793Fo1aHHS)Zy}-7=6f<Nbk{@dGD+xG}GE@
zZp{m-F5BJm%4lA%GfzJKw2U&0R5@^cyUB&e%nfw4p)Il3=2JCG=n3c&FXD3qu4dp|
zj<-^~IFa(_%o5sDM)<$HZm$+Ih~wA%0LxU|LIFf^`T$S&%L~bY8&)jv$or$2pTMsz
zRzrXH1{Ro()KlH@Thw1jVzV!(iQ@>gKYEAuW>-H=%mj-91%BAbn1%F{+I)1}e)%uT
zLt#+Td)f@p>UiT5d_GLNi~jDP%KZ>jxtk<{e2!wp>B-4v1!v(oKzEMVcR}_UM-%&*
ztd9_&FB9Ztr$!;O21?w3Fp|aYs2Y>ozpGe>;I0uRMtm)X95r*MaNVy$*!={BB^X0d
z`KhA}37B1nh8H5YDig>%%jn9Hl9l`~1>3{XyZzIp6U7m9Ek>>X)Qhy1P9yT1IjKa1
zyWk2veCOTk`Z6z>Qu?J=8z;+MFGFXnKunmNxs<C{HbwwwoTU|qXEOF??fRbkv3QO9
zZ5S)nd!>6*AJAg;zifG(=@g~PL$@qZdP7c(n1Aw1=e+fKFY5JNo{>XS?7Kv#>+WZJ
z`@d|3Q8}+vJ=0GOo5#wc6n^h~^K+a=CP3CT3L_k+Zy8JEe0?e^{(WUBXHKw&+;k)l
zzMGkz)1H7DexX_6l6!VHPG!FxzM=}>Cx3TvCXRS|3x86y0Gn_mya*R6INr)-6e{S{
z6Mu932d-xLpuGw!?u4);A&82~n!)oNaV!sBKYd1g2r=IYx$&Y&0NOh{DOK!nLr+fJ
z0zvp<;Q9V?-}6;2_wJ4$s<~Qv@qCBRD@3Un!Z5J)MC_yJGAd<q-mQ3IG_AYK0Xrm9
zEF@ZgdQ-rU_7nIjFG3-SG1eK8N?OV?<8*9Tjp=wFjUByr5v7=aj=%pBuSP8;*-^V@
zWsk{MRj=gC&&_|k{Bk1Fm4>S^^NuANmD@)O3@srU4`Nnz7W|K=rH#wt_Cukv1`X>^
zrh^CG9!^Z|Bap99m-ZXsXK!X0{kVZQTcGTA7`S?9G0ue|_edpW*h$RHkI3Tx`Ag%t
zLyefagdiVlyJXXQhUUj+$Hw5<3;*mXlCT~ci@;*U*GbOiUYyqY5Eqnt{Ooy!OsWR<
zCl~0ey&W^QpWB2R*^_W<$+>hjsU{vV8p6A?RFva&6PXyCDaw`ZMx<VHLl~~lo<giV
z^)?yN137xs9x*)I_+(uPO!La>J^l-sOAkqeK`i-^EniwNfJW2@-`(X0l{Ml1Be#q4
z|KO*P9^VKFKm^!`=jkDf+ZS=f@HHqF>4U_g^+jQk_W5a}kzI4lw}?^Ef?(ZUs23Ls
zuG$)6-=n^QD(N|ShWN*&ocEV|>dhm7*9O*)*93@V0(|U7BaUzcib4X+!Wq7JCcw09
zPN%HNm_xc8g%Hu>t(!ge?n^og;jb2{L5+T&D|!$A*k%zCJ@1n7sKGP-_Lhd8M4r5J
zC$G|{8nVXL5)A8e5AslxTvy}Z2d5l*^wUw>9ORgi`^o@ec*$kNIren#cG#<KZlZXf
zG6#k<q2KP0F$F#dZ<w){`O!aj2G+J|;}Tt0XPLV8er+t|nhNx0D-$@}kNsMqVk#vX
zJ=u~c`$kJ@bX#gfHkJ&b8969Y;kgn5U3!9^4KqE&&Tx|<%&Htj^jiEwAwG$TLqLem
z^z5;>6J<hzf18FA)@KtC?L-Z!xv5Hlnmyaqg>)rN&7<=U6@+XNv+&t3M8m37u#EbP
zOQ*}z;UlNg1pA*P01&QkM_1`08wU~5dj~@}sF;nC*9AYV`OjnZ?<0T;ULV-)h#r^%
zi5O$6Q8hY_tpIi=j|EOdCK?~^UPMHMN7=PVA5z|@h+xaqUrOj<lZ)tmLk*v4?-IB<
z-qA18vCgzarv%`LZF22E363e`s8g1%?3Rfe0b;5+wcdk(>UG0Us30wucf`rGxo~pA
zbsn`6-11F_6M_;}-ngKn{T9p6w5@EyN0aT<k1e^I6nx!nV^BPSr5rlVcDWh2_hd5F
zg)4mFI^FdU`27G%A^xk=FK!1-AHlW;&DP_;`@yeW!X;XHXqH9!g34OjJy*hdT}$rU
z6=Fp*sBE)@52ysK%Vue;@0TRmCfbn7^`ki*#1hC6Tc#mGP|Uy6p$54cA7Hpn8UOWs
zG*rhRX-3B+%<NpDHarRzkMR!aLAaBFrildx)GwM2xWi9K>%D`1O-toaa>WlE%oE%`
zmODO=RS*+D(WBkWg@#g!zLFB7nR|<tD(1L)&>QNm0<)NH<TAFO*&zPj7hkTdxJCCT
zRp|B^&Wf~(^pAq&v`)8{1(g7ObTokLe2T!!IZGa6g-|OS0bExbZ6?_Sy}6{d6imsC
zmzCLO?RZOq<L7-?dUlfWj55y-m+5L1$eU8NMvS8}-_-<h<IXTFgZXXv@9L*hO{Ihe
ztFpxB-NsR}nrD{|kq*Y`?CjNezbn;XdBxy3AmDwjRcluJ_VZ3n6SZJ6N#sS&lZz@m
zF(w6HNqslfPUjoSML+^|sNSXToNrIfj%U@=)un}97ICER(m{U2;0u+pMV0wkXjat9
zm+#!LSX7JQ^@Sw)mL08%x1a1#)oD1mUwxw%KFuxI>ZUf7zh87D1wJ1<)#<;qaY8<H
zy^<y_91aRSnUG+!jB&^P=Q?PjeWGWBd;V4#7Lf`ZWm<%?U2bjh)IN7@O^rMb>%+g3
zAq(W8@@@bkvBMOzy+O75Sw7;7_ux+5ofqy%^&W6oP|apd+ZyICDjxM#Z7eXS%^Ai2
z<ZK6w6wm-gDJqM7DMflXr@e4V&-!?FT;LTI_|Vb~^+i13;?}PD-9n$T*{SrhsyQ?E
z7u9Ib8dEY^h$|<(Fc=ZOc;sO+Iu-Az_eN@rUfRnkovNDAXL35ZiQf3f7EeHfPdIc5
zN73tMUtP7Q^e?rJEL0}c%cpMPt$W@3+i4q8;j?N_?CQJ;22VD&=a8ut>cx0}#b;ZK
zQ8iLkeno^@=4ajSsp&zF4_sBmFbWCwPw`*2cU#Ri>yjUV7uGfE_p?cQep-uyo22v<
z6!iyN;UO-PR|7>`RWD+Sq&JEkiK(>%No(dBjK-(Fz|Nol>t?kU%SMx>JrfbSA1*Ha
zBa5Fl7HHEg9HjH(vQ<+><i}H8RYj%xM1@KNKe8u!|F7P>!LuzY^ZTZkEIxPkoBgLW
zAZdHUX>`mZ@O)p%U~at}7?qWFzw?kp873*r%ke-dyY^Uz0KfYP;byIiveuq5nT5B}
z!QD^Y+{?*;pGN(-97OI20D8uUF2D0F9c#c0^z}Vmkhl;EcF(X==OcqIOpMSL1y4Mw
zwYq^vgPrUI$1%&qu6i9s27RX0MVoZ%TP{9EVpned4iN&Ee11(tm$9ozRRxjhXlAR|
zd_sbLq3oK6RFvE*_sE7;9F8IpnCm;)xa5IjyxjdS3VtYBV%PR?+rfV#@+?pGObK~^
zscKv)ldwsKrV`32dm7MG?<zxlgFy+ibk4}wbR*lX2-Bv@eM(5ktAq7sRgc!V&h2$n
zH2;^+w*Hkt`xiRg*k8q4ap|VGNmjb1E70gK;XmI7;cQGad4JA>e}M^~mZ}sl8eCR1
zS3NDr!PX|-JgLIj*RR4*RMw#>Y8!jzfN(~O=D0idnferbi<%#M2|aYo?%^Dwg^Rhk
zNg2f>*@M;lEG%r*(}r%Ni|8}nS}&arJ8q>lfXUWF%&*t6ZT!5SRzG*+!36K1Y;dbw
zVCaY!jhj#cD)9g>+kro#nZCYo_VqR7%V3;YV(VL*J4WDrnFB_JeNqV38n4ievhYTx
zd-s1&B}O>5#uC_#0_N==^y1sx0Ak<{3T+UdIj93k24&7e+9zpiGE#h3@fop3n|k1y
z$7+VqbYyHxGOW-|29Cc@s|M`90_DHXUXY*F+X1%yQ?11Ox=p)ed(b1{Di_;1(p?4;
z#4@{g0|;)A@R2u{WIx;_RbMG(;V{Zmd_K#suCP6lx!=(fxWu7eAku=z%6uc>e;%2S
zw!^zZX5wh7ZLj8hW(}))Q`_M=&T%J-)PL^e)o%Zps*PUP*toR{%}fHlr?z^>CakH_
zbfT}wP5D!E&>KXVzZTMcp9qP&+}1Ns{yVieH;vRO<MABj{15%@Cnx9X{4nz0<_9ky
zl*XSG_Cx%|OWscOBRGu7KdhBT&=A!R3dG#6d6`}J3Q+)m$y?Q|Advq?LqSs@fx+Ya
zFr@ac_woxO**-PRwv7{nCi)-rGkxhV&<WyEr<k5|Ux~Zi!!2oO5*~2e5zu34Bm*l(
z8_|IEMZ!5`qjHtC`8#l!wO&W^;Qz;{NABF?inIA&L*M8vCPYO5{I0I9X-GotaPJ*e
zgf&E7f~Z2D3Y1B|VjeRsR*u<NfCPVX3L9E6EA1I;z#CoU>WE3uLGxuULYebcZwoV9
zlmyXi=e26YZ{*bG-f=aoGBYw#{xy^@T9_4wsPT=0l`2CWnjH37@iPv2>F-5sq#iUQ
zrSHgD3UQmJM@T2?Wd;LMJi8Aq#LBh`VKlkdx~qEx*stG?Yen@B)QKUaMdKMNIAW39
zNx>9KJt~6i9~UTml%VG=+&Xh@IgY)h!=l%WUo6#zJHIf&sm=V$YV5I7sFM~7Y&^>@
zjEmLR2cDqHh`OQ8I~css{)P2pq?5MCPT834e~taTVn}F>#ksS`o{cm&tt*$mHcVFw
zf^jA6y2A!9zUU2OpAGSN&a8lo0+!-ixCA_Cc)@#{t}}39dU&}m@bNkiT;dcJS^P3`
zyNGS2)0bxjbgyCurcl5eHHQ1=Mcqvt^ZxYVXaeAvtEMCk$Xa6dWLn$+mbd93AKdsF
z3&O9XA%dEm)Q5g+55Wc(YQlYm`)2J_H>9~7T|-rzd=N=<ToB*yvLka%C{-|5e<Nk-
zu0)Ezy;;LZQn+jUG}zO&n?`L#PMbD4%1^15<n|kZ<D28}Vznv3E@rwo-ev3N#Cs&Y
z=U2n6^_NXJ4j31O^4h&DZ_aVwLMOPiV(SN)1V3Xu@DQY<VilSAllIp-+PiW6#}Ami
zO{;L@e_F-xEem8sww{6HtZi&9hoe~dVl=R8rEq4l>?t*&wGAn{+iA!^I5kl+H14dK
zZAk}OB+(#{ueglq*&V-%ao6{rlAPYBxmPzLcY@(Pk4WH*TVl2U&8Gc=G$ge9#Ghg-
z8@K*~wiX*2+YgI==L+gs9xgf$#SynK0MGX!(?Ku_F<zMJ;vt+e84mw`3VS>vKngvz
z_;6U7tkMd6e(n46sj*GaJ-5f_grb2|=t!sdf_4kIVgxivKD^5$M8wxqRGsgO0Mk~?
z!6nNt!Bz$lf&Yvh5Fc%8Z5`+f0p;N$SgB#%zz~XpVOW~dizWn4bi@jo&yI;)0&yS1
znqce#ysVy@_yJt?!P=O9di)}DsLI0nXj-o|jd9g^9q_m6M=5D2%Q7C6EC#IXQVp@=
znB-pUS=@BW5_X@tOvpRlTK#){jVCWTC^KZVOs+AyYW2rKE*J@_(uXt%Ex+Tro4*1h
zaxkGr6eQNSrNLbOY09Sa0%NB(k`^h=VFmo?<rlWW`i1GPl9&GYabHb;@j=P*l=i4O
z{-~ES-k&?2$G;rkiL(9}hv~!;{zY9~sapqgZ~f;Jk09e0?FtT56thpL9nVKti}(Kj
zO8(A9mBmWT5u~H|ihAXI?h{^GNZE|`e@V^b<AHj9#hh@p;=9X`t_O%}KddOsQft^^
z<<XAQN&Gzvc^VQ+9*h`;S3j@dskg0-RQzA7|8Ke?IAd^pU=jQyfBa{v96)9Sjx;DK
z!+uTQ_S|-Bkp&n}Fu25Q%Yw#w1SKR9Y_+v2hN<<S+tssNaUe}*aW%?{>ggD@c(?w&
znv_wZgGB6~N^IVAza%p<)v{7q${ji6G86P>xg#isZtBQe=3#H@QR{BEgfOOEhn~oX
zE)@q9VU8n&lT_#7STEQLZhrQ1Q-e@mNohE}%Ru-Ng4h%F-$do`T&BPdAc4J1Hl`)A
z{g=ors&Ni~c<NcZ9VRN7@=sJIQ^kk2E`p%kik*r|Znn>#y>dCj8ivR}PnvE*1*N!*
zUoZN9+v|V&p?74epMS5lWwMp|JK78PAg3&Ip`Ky4b=lRR+&YuwJ?VA<x$Af!WH{e3
zJe5>{i<sahlK-q`5^l@JKjW}WPR&?3^~&ltQ+4tAgbJkFa7KgSl#p)9zl_;-9oiys
z)ypG6kDoQt9uu@Tp(wY(G?{@z>k7-LeQ|JwxEtDTBdz;>OKUT6_!aIt*YoeM`C(5}
zynjSmowBR}HAHuWjgz0WL08&$c-uVEF@*nsWdtQPxm8^-&*dS1)p+qs`s^0CaD<DR
zz2up%xFsl<v5}fn&x#r6r$char@=9+sA&}CzFfRC*QMyQ2CD;Imhjf#OD>GHA9&f3
z7FJg!RF!?7m#g*21Vp3dQQ7BA9hVbCV}H8SkR>-W+pwObR-0+T=06Zf8fc-;1#>a&
z=-K`2{2e=xy;kC`{C9}=j9B<c;_AcN7p`bcvxk~~+N(?oUs@r%EAci(9kSMK;EPBT
zznD;-8bePrZkH|IQH2)zLn;45YRR#;a@AyZbvC)0>I=k4clCaLRVQA^M5MZ&BGkO-
zHL;KC%2V`YgN@m`?n>_C7rTkfCCQGTer-8{hNQyPvvdzlDRBQb&C6ksY@oeV^I<;~
zg^Nbr3KWVugF2wz`_1=@5A(9g>34wBF^0N>Vvw5Rog1hFI^N@{dM>)cy~u1ulsdzZ
z%6mN&S}(4Dbvg&NzYl3JTOmvLpZE0f^-a&dBmoxU7hmUujLe5@$?Kkf28ly#DvJcw
ze|BrJOHulR9y3tb_Iv*z?P(&w0Q^&s>JBdH_WHRL5gMx|j_P_7sSd)?B;65s@wsDH
zZROVJ88?hlz72&|*Qb(`R7_TdGuVm>QlO6+Lj(p<n2mmV;a6MM90~>%eJC*_V77xs
zCQwG|kZ@9ewPZ<-l2>N^#ELas_8i`6^QWv~CrY_riR*`4&vPT3@2gKXLmm)cX~r?E
zqh+YyFFo$1{^NAJlce+Sv{gbsCapoE65@u}QxcMBtTlCF8?K7dp6QV)+*W5Kz^P5&
zFHcIbOuYEGsDVRINB4-_cJp<!CXsZWRA7+h<L|2m?umC%M={lXsSAdf`~C)ewf&*B
zJjWb)WQaN=C7=C2$o&3ni*Ww<=5kSHZqHe>TBt8=3gI1<F5fSX9=UG|Zu>@g$k2%U
zkV@~jW}*o}uT5S17A$D{?n7EJ!11_AjEdUl^tj(W<O+rAc#^O#ZfX#NM`^q_+BR@t
zXKkwLZWFy1Z$3o!)cA0JK1A)DV`vqEdar6jtCHO^2XAA5vu=<9;iT>eph<|nRDP!5
z^}pnn&DG&}(m{b6U_)Mi&EN&Ffk@{gLITQHY-BJu6l;RmZ^)zo9+%Qo;EJ3UWfT>u
zH6@B4gWBrrWI{iWj1?DE8bZfsl^8+}*0Ryxm0ZLrLa#fXOdK*)^moQNl~HU|Gk5p|
zicG32cj`(T1lp!}83cDzxJ4b(gY5PAE$3*QQj8>P0OzRhPj4{kN1L;$dN3Tu6|i})
z-<2(FR)?}VJEo1q(X#nlnshW^EM&r3%;ZKMo3!?AC5TaL>XnIke%MtNu-=yO^tb-v
zxRaN#{6ilubN(`NNKY<*D()fWeczU~VcE)4M317GavA*UH7Aa3Zs9J%f1HEgRkQYm
zks-L53T!}sn}o1w7gat+p_gPJ%^tkqs=S{d;pM%^k6p&dfXoMjP#i_HwNv;KE)0A>
zREdupY3sIBdXIm%g?Moc+Z#P=&cISxr55uU&H>BmXSj?Vg~(0w3?#VoDx~ou<fs4a
zD+iFwO%N&r4z`ObhUG}r$aV1gl7Ac+zh`~tXAM#@sbNxHjVPgS^g#qq#AA#K=_A!f
zIMN$1f0(wltxdWM?T#7^!4`Ci++S=&qaI5A#6*s~tH+a0-x<SnqS2mcole&mXwljG
z@qYg)2agfecun3vEz)>S8`V(hlY(cmIC9IGsvfW9J2#!G+J5b$VXnXliT1`nk}vD@
zJ*VM)zfg=lTAuSgg~MHnVW6rDb7!7hwN9^Otc#0-ZtM-wuPj&t34dNPi@MbBm;wa^
z<s^9)P9bLF7iV~b#hAV<&{oAiL)DQs+DU1&7AgJ;Js}Bz+P6_U1#Dro6gbVY<3)Qc
zKzVH_pfR2(&ACWo6Z*%0rgiMrX^>23g9pjvHzqL$>Xds3jI=nH%v?tBy<9?H@ELL6
z4ePUQC{7IM2VaYVz;;?qyVD!G)7oFg_Qkv-7)~y_cb~Mjgc!iyb4p>}u#Ui`bzsB0
zA?&Etdi6TvPDLRmw4ki~l9J^cdH3B`{4z>i&x7-HELZ)ntA83>296`0^~bf_@Jt~%
zv-r$`CUri+0|^*K;Iq9i4Ml|0D?ZWI7=W|2yd`7bZ}(nxG&D3n(7}{5wO?=cau5mz
zfgk~=N!bi$H6tk{o}K5Bd1MG5Fc*FrgfsGprvDY<_NAN<*^$nUE~qf|L&l&%wQ0O7
zbZDQpOv7NcTIs>|mLt7~=@b%sb<X^>nMf2Nt?hha7D#UIh18kQ-!7M}!3L2ApA(Ft
zE@8}&m%ORf)2#V?P2>D!aQRp>*$198Yx4w_n%22j6a`iuXNcB+dwd>8g#;Prx72N4
z4uPu0e2bqRa|bP6IOpsAJJM{(9v8G>Y&ZW^{#sCGSJyC~)E)jTd|fkUpDbwfDKXR%
z*1AknB*sJJRN!nt*pqM%*-Ax3LjAA{zkhlkK4M5x@BbDd<Y5)03$*?X70<oBD7fyy
zH8%L^xM3GeOKVZ=j!?23+b9dfhp}fhC@vq*Hwk!HJ{tL8_7uivjtBROwsL*gK63T?
zqSSu%W7pHg1e<uY4S+qVN9PCN9ck2c-%tdoxUhGPKJ=r}qQ50HYaG-r3;;gkmFNOm
z)(9S;>kQan%SeevyUly;s|Bv8K(rS_<((!Z99dhVNg$b_w!{2sLP<C<DPtDM=EcK~
zkb%tQWpn-6E2sy@ieqDZTA!B4if}RxucTN(`x0|-imU2{5aW3-j&Z!Ib0>ox5walj
zkbi&8(xEB;wXokQ6I-1E|0HOQU9fV3x-;a2ynZJlTTsQmgyEfiL(O@r-Tz6?$jsrP
z?oPa5P~V@!9>&?bz}UA%S9OJ6G%AhaxZ9WZt?cqwMR<vRYbkTat&3(VH)Rz+H`G@l
zmzW{H_M3J_cWGKml}1Zxb;$#cyme#JTqMh7J+?ak-O>}s8DncYR_bX-4jg0F_>WAb
z;}4Vd&=bu)kNTD^+4mx;XLmnnp7vsd6?WmC?Y{lY^c;U2{U|<eK@snVyJ4QtUgYOo
zlw0_BLD;Jm{E!q{gr9^`8+x-43DzrI57&E0$PW6yU@zKEl+bHNNmx<{nA3qL4k+Y(
zv?)CgcRs7%>&1mUohg^?<8T&$e+0lJV;slsLRH;gKhV#qz-i#;1M&tE7#j(~YXzRy
zP$UavPBLih7!GoA6j#ZcY3tmwBaB}Q3FCj8wD%Y@q*>3hH<UB#DGem?P|-}@spLkP
zC@BvSP}C*d*;jv=yto(-`sO+RN9%1HDe?r>!ZI7D+u}hkh|e4e3;*mJPB-?U&nYmg
zT-EuE*;Shac9fKKRfFS%a;TH+1nc83_s*Tv1CQk3wi8HMX5AbT!wjc2M;XmuK9dUR
zUrP@^c;DMS6wS;+-sX>?ADWeTCrP99|KRhHL=dja9xIk<bj2b<ti9=`<lOaR^O*5(
z&5B@4nm4P(&(~$ZzY7}&62E!qeDAH%zNeamD;JV~>?S3AZsfD<0fV^$fqht@-lX~8
zCL^@aPQ%vC2hG#IK=(qbK;>7pO8l%Z&hv&cEKx#UO<n~vc04p`TQd41$T5Z`UGUh5
z!M=&9$Xb`f-8ErQo_#CbUJ$S5{Ud4h9!e@Jx&#twgO}REvz0b#n(NPheXox#BoRW7
zQco7B6VX7F8#y%Ap;_g{(*#l~l7Yuh715pn#Lf=h`V036AUqR`7>~c0y~Z0|Xl|@o
zL*ZkjYbh58J)cm<WvdNKGCosM?v}oiMZ#9WA&aflZ@nFNj1|71=gBh}yRSw&W2g?1
z8SOF&7c!D(;je0w-%qF6CeSRGf8@Lb9Rxp;cV5*$@}QUmhEtp_<iS)|f|jmTIhgd~
z%WHbg=Gl+%<$Hz)RX4R~o&BSx56;45KE8^raEuXhZTFhAl^fo6yi&e3+Sq;RwY2Cr
zpYwL5D@L*=mvttM9)t4orO0k1%YNs+J&ZLV(nN1%2<3IjBDLkwhy;xkS|~Ww5w5_D
z@6pDee*5!S31T4&LF8|7Rf5nFN5_NKjv%_bkkk8L&#=hxNZTW+YMU1av{1%}nC&q*
z_XZ*0_a0g|BCp-<y|3Z-Bfd270|h9C*q9F)Jy{R|!?^(}N{}dv4CU56&KW=jaE$s#
zjL*EmOR{R(+FDe%;!@Ymk|bX{`^;xCb^CTPlN;gVWE_EYo<X+h7G`Nh6=KEYA$P4W
zx>i_fkiyM9J)%I)3JPslBiTqGM$Tt@VgW;P&~3b@d;dl<>y?@jZu!p|XKHT4z2?6R
zaNWRvfxp~~`q)9JGEMo@cEn?)5?EL3QqkjBpT8}^Tji%o9_xrsKUENVos4It>DELd
z=ndFb8Rfo|3?R72c>^|*=B>RdvSF=-+C?|2l}cLA-Obqi?d8N%#b@tL&Y%3V;Z5+5
zIyP*I2CkX6ysl^m9<M-5H1eO<JMj84z~1!(?n^#0lwC;v$(Mk~h~FU>fTcV%={C3Y
zBfP2kffg3+$XidrU(5JBcPS4CbAdE&vQLCz&mVfKte)?O1fpYbX&ERH{GjmefLkeX
zKn0xBT1^&O3I=1Pgn`d=XZXZ>On{;%D7?}jYxYS_THxuxK4nrum0@g`XID0+gHFa4
z#kO;5|3WX&kcvtlcKKk4ON@-Gp`piOXE^l*ksd2$v0@!_etx1cpTXiyo<j_?Mq-2P
zb0WohD2v)qX#q4npZFehUw9Yjd<Y*czH^qU8*oV$F38bX_ynW2U#wMKiw#}f>aA)Y
zBRrh9{DUv<7C<3lT(FhH;^ElZ<oD4o%(7RsgShhSgi@B7l08t!fUn|f%t{i4s0X5c
z+kA~{L^cuhKtP=J5>kdkVCeeHlZSnF*TgLT;EF)~gT$jA6xQC!V&D8KIO$sDyzrGQ
zOEP?NaI%Bc-j!Ft+bdPz3XA)__}hrs=O3jel%k0Qd8dn~oyy^Xz5`c0hfqBjZOWHe
zNe|it1t5=L@jB#xES+UQRBhX}hwko>knWad2m$GqlrE75r5T!=?v(BZk(3xhx<OI8
zOS*gJ+dS|0pI_{K&A!fcu5~OEk%88=K&lsEhsGWIr6&P@C7R5r7@-&qL4x}wDJ*m2
zeNm9g1-ghvI|aUmfbV?UgNq3j)KPavdkXP6D9zYbRZT|df-bLp`<xn*K$h@{9&38g
zCa-5HbtkjXXSE7X+t*01qJ%FtWw`tswzY$e!_IfP%yfwin3lL;83v+c9g)0n7A^Hf
zP3icb6y-I<>5U^<Q;O)J#|3HiqWzS2B<wv+QGvPpYix}Th}vG7F+Q}2B)$`GC>T(C
znt~fM{ohk)C{w$>p3r^#xM_$>7o2iX%ZoV?ICc4}%30S6T|EJZth%r!@`5ML$L+@?
zaQ1~U<vrVPIDXVr9Q*S}b{VSx?`qKPBjkn*g028M{LNJCO3usHIiqIQ39@BtGt<1W
zqoUk*VQ7C=0rgN=@)Ke&oCyx2mtU7c1i@2S&;4^FaKEuor*qd`qA7*0Nbl?X4Zznw
zECX5)H&RDv1Jt!LE0B@|=%^bG@b~_wBqP6(*P@JjBQjv$EKAer53F@BKCL1q&Q>-~
zK~=H+C+hg9uPnR&F136X4A0Y()@h^Rs{9}?uBzn3uj>iU<I7wtK3dPRUY5_QNLb94
ziG7_%MOu_9+tF7M*_8CD9@bk(CnDy*ZQ%#|H*HZHJPv*M1dLnfR{i?^EBpDXW&Nr4
zz3nJAM5DFnT(BcdGp&2}&Jrcuu0cCCl&isVJ(GZYep1E&N~O23+(%d<)Ty|ux`oPI
zOZ@;P2?cJ#qrROVqUYQ;_4ou97-|Y<SX8h%>0P7k*_<IjFYd35;E`GlS%V+zH~fu3
zrNtSw3%pk@x*iIE9bJI2PB*(@ZL?D|&q9k3Uz*v3+o-C3sGQiFH&3&abKrZ5+0BL@
z4L4gZIxw(-e^>tx93KC8U-i2kooR1^tKa%&zkmDBCjs!H5rDlzvmxTJiGUwX`B>tD
z04&AJLDaN}8Wy*Zb5!$&(DdLHBat>BXgqu{Br`Q+92T4tj~M2O3F5X`^ff&Z!kqFZ
z!AYk}lJ9!vLz^#VaKqJANU!yNT{&ObYh?Z$bM!+cM3LY+zq7#paOS}&>2+tnpiYUa
zx?5M&>#Zc(Urs44iJRD5%LPm&Wfw*~`WucN#owE8?NHz8hjC(DL|al2h}PSB2kxq<
zKtCzKISdg)Dw~r2Y=&yqT-DgV!4<A#x#kES;`q_YjWC|kDB`;@@1>phy}?;USz$6j
zbr&C|P%Fx|DYIi@Ar;i6@3o?|Ydo(L;PYpS-`U#vZD<S+k{U*&zn$^Yr68P2c(08l
zJgr$odN%>J&FcUJ8_9P;><*f?=n5i%eS$nPN!+azi}a0ZP)A@Vz-8hPjxg|BqGLy;
zz?8FE5J%}n@;HUYzWQ=dCKt2MSA&O)MQX>ceQ4R&Id=Hz1avk2j^P_xkXo)f0(y_>
zY%u|y^%l`DfG4f`KEs{m&{O2i%bbs>r4-Lc8f2n^Fa5=7qc;E;Zdk<KRspU5o1SW|
z`O+hvHoi22s72>(pQdlR5`pptrn&5|eiPd^y_Apa)#)Gv^K2zcxBB+s&34_G*J8^f
zg(%tedMrXlZ&4G5>BpY?cJu;m7F-S&S2JuVu#wFKF2iUBo^V`W3#+Fu7WVswpmV6S
zA`s1POI6Ij+b*Wa=Xyx6u{Df*>R{|qZ^SKrLb@mQM#9khyeOrIk@3FyRU<)p&&-sC
zy=^41E-qIQkxxvr3LtvEv6js>8*LoFb7+Qg=SC~zQ%l)O*ht`FffP`Os_26Dd|)g-
zxMK5G(1A45_nYar1T_8tI?6!UTpvCKMC01P?C_}Qf@sZz3aemx_M4e5@L<ee)f2eD
zUtk-!1uJI-*n{uTo(G<{GmD;(9=@UF!Zbpk;AiU_@s0@Zu183N5@R(0YmGgSlm)I6
zm_d~}R#hmc4pIm`<WBfv)L1VeIlA$9v@+mk_vdMn2Fd&R;rfj8)n|g1mpmu~0qbui
z&+(NZN5bNM2UL=5PK^-GbHhSmtgHhzpUrp<5z&C}-DSVIu-vot!%PXN`8vXW%XZo{
z7&H~)qpw`Pk;`tx`ZHeRXj~_J@+tL)BL!&Me12ji1g<z70GcBhTC$_{+;8ZXRzD*@
z&3Y~gICmofcYZ$N>iaPVeU4#kOpMlhR@d~1Kpb77t{@d0`MsC_Ucv{_MX5LfSG$H%
z)IOvu>5#$oT0KC;)k!Y_6qEC>Z2z+Tlj>cDpY8XI;CwIipwS1F<xcJ3xLb<pj1LpR
z)$5{}aINip`nnqw;nSm&3HIgHJy3$wAq!Fs*3q&zb;$Ga={QbSmI3~MQn)>I1hBb(
zQqKF`kcGpkt+09~R&#XxicyH?Wx^<9#^M2V^wDVsx(hae(L$@n1i5`3P;Vl3bwCO$
ztenY><yYdTokZ+@w<}(9ucZvt1Q_vxPEh`5vfn})?IvFMY6%#m(COffdNCi2|BepK
z*Ac?5G~mPwLbgrGGx{B7&X27T=V8WUsl+%b3goVqL9Ezn?Uu^4bFU2wfis__aEQlz
zCOx5u$}?0*FAz1n?bKBkt5i^x^X7Xb;#v{T<}D-UyR_ENc<oe$+X>bWSk&=0L0nu_
z0!_Gf$nOl5H}&LnVHt$@nlf)Qnh4c(1lygj);3W}H4VvGS^p5!51&2D8aSr&F>x;K
zl`w_6f38}-3XuGw9tC+=L}Q-5s<`K**`>&``$FWL*5v4_75`C9h(+|NQVUzCgV?RY
z;Zyu8<IXPoxbr<X*pyVCFLy_iB)f;(Z?C-z#2Y`&9vwWh=S^T(tgq{TI+R~eI=|dI
zr?)2Gl)*;&DfD#p)#TZ3gbtvY$l>?6U4!$`d38=f7_60U=F`rvj;|n;JMhB<Dd=Q*
zjwmvHe%PSc%vYE0N`TMdO!<zw`K3x?c9Y#=iK(t%2e{)=)224op!J9}_`oq;8C8O3
ze(a!fmTyRt1F$tDt1|)S<gZJR<&iM-f<OPjX6O#;)8qH=mOrsc=@(Yvt5!PT3ayH0
zHCJ$yyI_?kny9PD%%-Yw#a3v!9A^0_?p%8AnY41A>}DI?tK66dSqui!8n$p`IQ=G3
zM{bDiRD5&nY74?j`owl?FwCFCoD_x@5!pg_*`aV{ruXUYha$)*=*wio1tCeemAJ6(
zXKRUX?xAYe28BtZPozAc(`=45o5Ez1T;#bmgqR=NIE)^ngk{iDu8c(+{exK#k|Gt8
zZ@hF1E=I9>BXT<eAQ81oi06=+SK|#h?6+BaH+&1pK*BNMH|~`1_-#7iG3!G4194Yu
zwHFnJbfrIv;6{|3p;>8ao-G{1QE*5p;&1w!9uh2BgKDIyD8I&&cXeNxoiFHc!XZAl
z+1w6|<(I(XY4``8pG@R22>5~N5u0r@_{YDb1NzzVV{8;~I)PmK=-#vAn)VulR)J*`
zul7Q1cyKFd*c3wZgbmaJpbV}TBOTD5z{-!a@#UqXGwd^i%=j2?V`GEl(}FfGe-6EO
z{V*W1B(0ZK(k$1@`fO%sYisRZkFA<lU?+7?_Q6jgW#VojK%@QWy${pn*MEv;QhKVQ
zw@%~;X@IL3*A6L+u(6vfnwAlkPGj|6Y>N0<r&^M$w`O~*{fn?&n;@Y+sL({{;*ZVJ
zYwo~ivwy(~qKSt|ofD0P6+^I=v&PGJAM)};jn;@MZPG=i9VCX+T`t0=Vvie{AG4YN
ziGR2RlYCYrJMlbPf`PnXzIgV~+}8d}jLtRqA8;qG7kKIYj+xDm0`pxyjKqX{tjA}&
zqAM3>CP@DP;(rLBEmDxsnNiwXpOSzA9`lt0=;rrw0~kC(7;g0+LJwAM@r6c5N5@9E
zH|VVi#tt#aC%r{$?q#9px|l7APyhgZrLJ^9>Zqwi4G2MK)tz?$wsE5`9ILZDT$>uS
zxVX6K!4su{@KS)`V>0JXQ4^8vqSPeGKc`=NW=)ax>M?v6g`cD~OFzn;K1e%y*rb%J
zaCk5C$imaGOcc|q&MUnx`j-<ez3Nrc!tGlwTkcVW-M=WcE?U1|3G<VAr&Z1|hA1A?
z14a(4kRy@QJ_T$X@--?1M0tOTgS7GZiRKcxUgsi~OZr?Z{SLkBy=>I4O}LGu)Rgu8
zNo!AM6&lN^sm*rKzjoxLoV>_bY4zrfNP9g~KePBK%KTU?;<YZv%PJQXqL1n`Px;Z*
zvr05S^u6A|;w~oq$<}*WJiMRb-!NX~L4d_k=LHl>gd|!u@+OWzXfpel)_vS;`IJZN
zS@^nQIP2t*Oc<W!i(P|l@5_8ZizSig@nThdov}oX9pK`^lLO*qhi~wqAyD^yqkjq8
zV7bkRp!V$UmLVggT7l$j*5I&PG*eAA#o9xyf3NarfX}8i9Wu!Jkhr75^`|m#Dle|>
zW&x=S^!W{dnZt`2u*|{B0CGEy2!O*zA~QvgbDJW{7R`0*ck=n~5N!-WpZtNtE9&y-
ztg1P|zoTu@%>y_rflRM@*iDwEjIrISxKmmQhYKf*``T)GU@atZ3KVfl(xEv7B+oqZ
z=}v$}_?bT0dRs=c(Sy)rd*N<K9BFpM5;W2!m<9qO3h32$bp!O}>~~^!4=d@aMguAq
zjokU={;T#&j4m!1KXp9h4t@L;bv#l74<C%}oyBVMji&@w-~U8q?V>Dus0uHhm~-$l
zgsOohxyw$VN%#3A=y6gQA8p8_i`q^Dy2Ela_RdD(r7?L^0ze)L5IwLgN@D002YoSb
zSbx!#Oa0fXDDlsQ)C!^G&`|p(l%9?i{ykGa6_Sf2_S|{^OPz*TNf|eC3z(kq?TtLs
z@~XuyEiFA!ah|iGf_T#VfCbP;K}wKIam#;hoI!##<zfZ>MACbGEXYSUU`kM*kYWur
zngeh~v^W*V^#4QQHEIQ5AOB0I^C<qbE&|DY^jL)Qg~q}A{)EJR8pCcdSFd|l@jga*
z*8pQwXMFHSu%(>qP4li-+a^tw*n7V3zhhhQ<v$kTescSQJ>LCk!;hS39x>o3UEwQ|
zj)J|qF5nFoXgfZLy51)MD~ZzZUSzzB=VZZ_H59c%DbIqjgw<Jdp0~ZLob$GxLJ?Hs
z*dnR<p&+8{H&U9xOm4A^5!I4M(&zQt7RyG|DR}KJRdpMQ!n?t@d*n9qS~dVO+)4Ei
z@jjKoRcsd$2j-jQph7~W9IUqadx@hGrU*6Y*`>m|OM$zizRCl)!BUo75BF|8V~C6R
zjzDe42BA@C`0*E5buhzoTI-#Bh~4+~oBDG^ZEOTKDixH6*g1&Yv9k#na^>Q20lnOL
zLam3%h0Z`9{#AD-CjdzgbUdG=tB32uY<rVuyo0|YJp+wWLd>TG@*3#Ci;f&rtJO)%
z^a^b_p|7AT;PDL{McJl$UZmk8_3+Or?;aIk>IaaDmYj+l3q5WidyFMs0mQ8m<4QXs
zr?CpPKbD?RPMjmP4N(G#7ekvqM0i=8<|qtGt<B%HKGFp#Eq56^XW6ntQ5;;|b<k!o
za|k~%1ijye-T4oM0r@~?8sJ|K#<vM1xbi2SJ+q8mM)T70X=%A~@nIreU|C$>mKD&b
z0DTxI(47ze+|F@2=3Bsq98x>f)#@=zJRPPPNPgEI5<g2CsW0g~dOwvR_d9c-tYEc@
z>X1=KB)90^tmSn0FX@ivAjaHzC|ra#4pf|ccTvR$;9HS__1Y5T5=El>vN=$3YL$?7
zx4I#OM{yrV{T?qGBC_Zr;DnJ`N-WB1&B;aKf#!I*DWDGbR6c;F!x>zIE(OgZGN?fx
zUPNegAUoO=RPj@TXx;C;F@lAb{1xSNC@7CN6M-_w3Uu}l%mMeGQV2$-M+KS1jzV(y
zh?*M@$9mmqifrfoVty7ytw${^E^pH~exv(|b<#DW#rt~dQevx^U9T!~w2?|R4b)(s
zQWv}H;kv__p)~yN+GkKvUVY9B7c4lX{OeoQcg~eF{)+I=A1&G;<k8IYHBEAnpV4aW
zFet)}?xZhJ-<4S19@X<ax3&d+r7`(8{&lX?Jp66VQlj5d#%XAi><7GUU-L$qihmzN
zdqeE`4V~UjdQj=LQ7#_|ywZvDnzh^C;{_{>?h^be9<@XrczmDVVyWy+eQ7V%bqUeD
ziI5>0lR7IvoD^Q4G&=<tHd7c!@yc+QKyL;%_|w~NkD@|)lshWzX{jL3SioxW2b%Qy
zxf}#xYlPdA2H{*IljjTt#h<9d8{)5cPWNI$5A|%iDS>ttiM_2^C#$=XCV=M`e%REu
zxPe?q1yUy$(L{lliGVDyKuBnas-faNz*>Kj1~eSZs~{~!3KT5in<K>JiiF$ZmnCJX
ze!c7em$}UGoHD`@i^3s0>HdMc!`i{=JyYpCB4bu+3H(i_pU3MS)b9v8Li|Z|e`#bg
zL)bys1l#6sKs!G3E#r5{*Fsru<~ftx6V%Y3@`CI8e#k{$hcMZwzqSK&6dd8i?M2x+
zTOel&mBQ1_Z4N(vdq)y6BK6&=@;pIDc4TYJJ?5eZvu53O{MI1V0JGA3_8qCf*TjJ~
z`jYI<k^_}+y0yz2Ol1yj-3`?`$ACzFIU(OA&+n|HRlN8?PM4{}CiOLh6=~JvwB-RX
zGM;>V$<?;hNM3{lw7)+*L9e<$yqGIs)rSqO0r*liCnFh<l-9JsM{D%|9${7uQ1S({
zqUR}d59Wv<Lrj?PoCt!n_LmVwvF8l_laJ+)z#H%TJ5(NrMcIWd(E}+NvKPuyo4O3b
z$6pz;B*pQ>Roh)~&==T=E8%qOi;y}B9zsmi0g2!S1x%fAb!LQjflk(-Qwgj%u9Bo%
z8nA7RzSFJHAz{<U$;0)ZES%_)QoqGNPZX-|+nYpED@8^vbX=g*UP?D8Z*zP%=N>VC
zU#3z??Sc3~>#}C(2YoH`Q!<<0I8O2yrR)iZRn$#vEjLl`ZfLZbPFg<YX+_TvDNs?g
z0K!0I?Ncu3dApywUC6qwWg;I&&Fj#p+9lR?#dDh;sovUFA<m?$f$50xpKU$YZdNg8
zH8M%tt1o4oFj|3n)Yfu+U+%K%`?ihyOgGjo|Di@EnlKsc&UKyybdMpr|4?<t^ABWQ
zX+TtKGv?7Pj&6~YZGU}67qZ6x4j`h5pQcON+C}Jy!^>mvdl{x5IXNY{$dE7V(Z2*r
zMe4(9Vv5#KhXWziP72`^6K#V{+T=LR0@sM-SFB*YD?TU{KujSchz|}E+}4)NG{pCO
zh<x)*1VA#%z5KBYsJ$ka@&E?e5Xk()e>k9;Cl6Q8Rqn2D-@aL&9so!zJRsUus*(}n
zjMA+lMosi$x(;5!Vy}j?1<8}w_}2iJ${a96N`e**nio0(aCA4(KZ@F~!z^8v^rX*e
zxp?O>7kuUvEnA#bk+ZVwEi%I$dGEtClGC+9CFWgCbj#~2iPZwK7XAw|<|nnd(QK}K
zr*n<b)<R5HM;(e`-Ma)WR=_Ya^GF}ANiIkmHm*Jdc9aH?1`eDg7=xU4cM!YUj^u!D
z-DM5!b4E_V_CR|5;;!K;4{H24&v7ih=*-H(11GrU^je(6#?X}e`cRzx6YD<)V*pCO
zkbG0ri=Mp`6&$QBrpu_tWCJ`NTLaPnm-^4R1Dow?3AQH#Ia0T2a8m8EE2pjyTP$$t
zCD=l{?*ct~390DQU9lPW#`zxB{@FyYzY7NRzM87|0s_8lJfoj;3q0^c$p{z@e)`!A
z5aOtI%$DJ33z)o1?`#<7WcU$TxzJtyT|sZI;CJS?Vna$><e>F(QlLEG7M{WixB2if
zSP%s4Y#5|DPShjIU#wsB<{PPKEO^=gMK1qRG@XpcbbHKo3Mp3uksK{x`1WFq4^v%?
zO2A;Gt*z^R>lJ}=KXgzwGpk(L`=H!5PL~(TnO9V(O_-NRVW;ev_4R2aiOX52CaK$f
zgAi3UeZ$8V<qg!$hP~D88$q*mUb03~Z%+6xOyx;~l(nH&F^sH;#M<m=|1w4#WI6c&
za~5I5y$Gx<46hcz%mRu|#v^Y*LdQX-WW;`ab~Cv*y8tSgD2gDQ`TzAOM7e11=BKyX
z(cp|Ga82EI&SQ00hg~Em`1KNHcd}yzE!=5g9xaF@y0Ofm*=C<X8GVNRFP~^))Tgr&
zBvx&F#p~N!GR@e~R`%YtB@--<+=%NPzxz_t#Q&e=8hf{Fw^9CmzNhQ3L9)jtUoHmp
z`J5WJ#n<)J1@Y#~uImmOB<z?<Yu9s7e|Qa_kK>eZieBMH;{>+3JLLv)l?#177gt>c
zQOQZ1Xh?K&?j2u2vtdprbE@#$FRji-B?f;iLO~q2o#?(mkzXx83PRYue|+)DPq*1b
zr1BVN5-Y^C<D;`Cz>>{^W}zGSa8Pc+o1hTn3TD6B$XhWYcV77s4om`DR!E7pW~%7L
z1VMk1f~JP8IVfC^{`z_!DscYdMbVb32qTM{7m#~B#oI8BXG<H09>scE*|^;txD(PN
zz%-S^bt^wa$JDy6f$}(dQru2+8?(MVz}Zi>Ywi5%vP_}<bw%>W)HbY0#aXGV`=jv9
zpdIm#I>BGJCUPr3i-;GTB;l`wATjRt3_V9_I@~TBpSw97<oEa}TNnnRXV@<m+SsPe
z)ZHcaYPOgH1M}#24(9yJv}^mUvb<&C0VzGiy)JoN4xNN^pJoNLgSm^ZWkaI%o54P~
z?A`gbP@&dB7jGRy>6q$g+<jxc(Mo$}#rU>b4<2hd@2GyU33$k-SVUU=Y&(4Y6gR1!
z`2;-k^9mZWSL(PgK*cH8CgqXSX*nP3ICtI69v{=%r~lR*(aqRd@7+rC_3wqZRw2v0
zy>Q!aoO*<Fcii?r=HMGwv>lsQbMXGqNB=?Ekxk}9u>lOwkB2E!AQ8Ua{OQj-PZrdf
zYD@5s`d=I5)54~?)aHwGRf6ZR$5S+$Kwwr|j`_^NNHh2CI_8p>k5_Bh;|=aI>|s7M
zdBpIO#ejVSK6R>&4XR8g#$0;1JjIERAq`{M+X%P6zB1%SEx1MzYuT%8c7jf<IEYd_
zl>R-uoy&tQ;%QFRk3!U{w@hC#M))O+Nd>)=HpK?YFLDlPKQEN`Qy{8(vIJ-Mec$%2
z3ZVqk7STB{ef?YpHvwDcsm?t?x5wD6lL?MEVf!db9K2#rV_yo<d$%8d{@QvNS+|@P
zyo||WQFULT!|r|@@qE8Zzq!R^O+C#)Eb^f~RcZfpWjS2_zH)h<8l3_qQanAN@|f-w
z-me}zr(L+E1h!z$>vjHyGvq7ub(jX;dj8WN9_h%G$gi|(;XHvYX|!RdlE|%IzuX9m
zuVM(nB`H+0)8N#gz!{<-xZHPnx<DUMh9004_F@0sy9Dv)e3S<$6xxjBL}!W|0MPBv
zSG)XF;uDx<K=(TpSCNOOfwvuWiim+gX2E?IFfvF1f;<r-O`?RzeKt^;Rt#LTQ1C#a
zsAsuetAzlzmgC6FqLu9ZjVLNevERCt+yfKz4A{hf<BXB-)LR;%c-0_J{j`?|y$Dv%
z;YDB1{L0MRmzVm9BB#I7x-Pks&ZyMWRRUTwyQX7nJOA2<?`IM3>ov_(vAKF9J9<8m
zODTarh>%f{R8`GGUZTPqr@q$4rZ0Ifzg-GqF(ui{7r&jPVAXx*1>cz}mKbY<pNVX2
zDvA51%h2*^$2?mCi0+U4=~on{ZRK_68IA?}Y68qCNswDjOcz@npbS}V7PCaZLU@ZT
z3w=CN?cLX=0)cwz3`Ccxj^kG4^#LRMKR%gHUIO;njsNcPofhQ3O|;(-Pr_x%lH*AR
z4WrQ-+Tt3XN2tYes9(4suzhC#^YipAk8=UVRVu_<%jL#?2$j-7Mo(bl9|(gt$T7kR
zlw-$me4DWzM2}uTeJ%V~O*N|8>jCU>1utHEvJpYmw9H|uw~K=e-U6*mf3C>5P0*p8
z0oi0)6n~L=euC@+CUG?o0Cz=LP~$I(;YhCcTjsC+Mh_?=Ncmo&*MI1UC`O<wA#2Uh
zH}zr4`8)z>5DS}5Ank4c_BKBL-*807k@Q6_6e>$jiZn?Np|Yr7YWo!CyG84q)h+Sq
z%_;2MiQ!6JWVS1P%@wQN4Z&JrAH`zWg54x@Q4jNs1TjWU?Ij7Y@%f2#bHfW-Dr7(8
z=R5MgortK42Ml^3MhmsD$t!fh(eI6z<A~Iytec1Ga1@^3KYFl!zRArGxad-|b94D^
zv1D=II*y`qKXT?7Aiu8vi&8|<pA9R2^zzwLza?DNxTb(6R#w{3fNK%i2-gOoC3Dl#
z&?h!Y-f+?S!I10E_MV|&3T0Ns`hGVB+l4l@o=2GsF2cM8EaS@n5yB>>AQ^w`a}znl
z3vAqT#1xbPtLDz5A$!RhzQ}Tq;+NC6gxlP{Mp@TTL3>oK)&5a9{_WM&o8plC8#rDf
z04Ekz<9*GDR9kbM;nxft8p#K<STSrgp}pD(g;tD8!&@Z|STDNk<Bd~t?W{KPDSXdN
z3N`T}sLvT$gkqltG==`xa$vq%>s}Ga-1->-4V`bk9`pp9ts?;XKR!9b00{l(dG4t{
zTE^Hw{DlB9rOHAnalc@=lCR7hFXcU7lx`-<cb@;~>SYlse*YD~LW&^30PE%u<bbl5
z+};^tXprrPQDW0v>v8wZ+huawbe|sbKF|G}*e*s?<h4;Tau8C@I~1my8_WT`_5ujx
z2`C)5&it%#ai=2a1*@2)^?c7(Dv9h%wu^g+n3m6nqx`ZalP>>c#A$;F?%cyZij%e7
z2mkXeI6AH3Ti4&f?CpB0GgQ7=GrbUzS=kLP;%MlO)aF$*OV?9xOwHX>GK?!J3bX??
zZc+KuYGaX!FaL3b3Fq^-XMd&h*iz>AL!fpP`gG{}_HX++xzr;WH1hel{n3@sL%zk>
zLi@>1B=eAX1wtD7_>F+>sk8t>C5L@S)`C{ze~yemtJOURUnqm4Ot{4UQ1zam0l8z-
z`GH(9=Y!v7WejLV#E|hsyk^W4tA|<LK7!r;53YvT<IL_)Ob1x;$w3SVnqnA1TcCZq
zKmY)6$AS&ZF!g1-X!0}}v==O3ny43NR?Gu&3A|?bAZGCkMu~|l=#&}vU7@J9=aMoK
zM8RB!P0*Q^(j(x+N?uS_{F;ckuVI|H)Q?{8l^iM4cs;@Q4hkhn8!D9pNufo-83FB&
z=<|Pc1?Ty-IcHOPOM$QhTIm+!ydbB=jz3S|xY!(Uzhk9jxYO?a#MW%KCO4>tR5|*2
z=DmHBl6OsWM-{%3?i5S*@AVG=zNoZNn!QlrVz&il>z<xMzfpA#^&;+kt*(2VLZxtR
zos7qw$|UOx>zFVjMxj0P=VM`A#+6F4PhxN^CrepIZy32vD*WWx8;XYFuMCWU{^M)@
z5RA=TTBy_0{sXP@8CZ3+^XQsZ)*4Q}%p<u4|9ecUQvhvva6!c?b3wmP3Ri~%xFtma
zupj6zq2h<gt-NrP7rUzWn>yGh8dFZ2+$ar<L@tmcX#Hvh)wOXtphg|v;9LWm8IM#l
z1%@xj8ML@_WvIj>NIQba(L_g0GYP!0q<KLNza7;tUrCA1@$rOnVh-t17QWx`wMRFt
z5U{DeOW28xxUcZx)p|o-S2HdSPWkt4m7%QWe!sSn+$oa5kwo;nyrF->wyB7kZ+gra
zb(KVja*_Fvb)JsJKmI=>*@Ko}1?aY04(dx9G1MNvCpwFBp~^a-a2-xtNYY<&KUO2+
zQ~2TpmKw4fetOX8eDB8BSLb0<5ZknsWLq?S(Sz<CT`qP#M58#>>hTzES4^qt>D=%f
zU-3*o^=;p8<M2nVO07WVUP-FiXcFndi2RtW6>g(<7GB9cJsw$-(eG+{vfQQGOTfEA
ziH2;dqZGhA+m1Vv8Z@r7)$$?Lq);xK__VJxqB#eS<2J~!*Vg2XDtS9}F@x9wbEchy
z|KS03uT!pBAbLaL*ZKY^UiFT0<Pf?h8^v!sL?x(XswYntzrneabiwe=Za4r(cckk<
z;HM>fF#GawdR4#X<^sD0k6O(;ZGXN11<j`Ax5{5y0<5XPe|*%zWCq~7b)<~${__{(
zBz2->adI3&vEEqH(cqxMU&J06&U+~*1oXGPz(FGnO1%ZfoQRrE>pmp8ZAKDt?nry3
znaM`;|MdRLCe%R_P%6K8d#6`er&Y!$tWb8G715?cAlhO_F3*XiW@uQaidfG7n%8NB
zD50N$hXa&$udSafA^2Rzk!VfWjxm53jCsNCzf^+m9>E=?Y9F$?-mS4){*1kiYlqGo
z4=EFOySyz?cu@5zumOEihudSm)v1L9Wwj=MGC4^>ePEOo4rWJs|4uKk#y2F3ti&Wn
zxoYUMS2osXb=7fGtG3Mhs(*G`<SVUc9|nF+(yft{SRqfaxV)v{5tA?fBS^d3t{zQP
znAzi31=1ajH?&|H{+|0(G-o~Q)o@8kQDO~~z`BH1DzqZUM(=IX^xW|HYW5UM|2>6g
zK(G~`wN94LaC;9*>4qN9F`;kI_qLwAUBHUBYt+ddkPF2B9-nn$LH7wbs7SH{A#-Zw
zaCLa+e<Mp-_vq3X?%b=Bh{nd}*3kf&VPS|8lo(p|{SnFv4}il*fSTtVYE}vsue1h~
z^^2Xcjfa208--3si{iaE`3jI;sKK~MhBX%0s*DWZkT3k2;CBklKU0l3!`b(Zb`x3j
z6AXJx!oHPw#n+TN5@_zM8j&Kr5{O#hh4N%Ek~2p#r|^?9(2<*>MittJxsJx_RJGP=
zJkSs_$PNjmMIC<sHTn1LM>5+8JX_mOe1VZftj<Xk+yQf1%l~C4@bPgte3?C%wm7zU
zSHp#&l5T=CffnAwJ_mf~(<5UX9hc0pnw>O6hPDXR9vgZzX3H!&hYJ6h9UvOxS<|Ns
zH!<D%m==Bypb%K{DgUSAQTzNF`c2=XqX|CVnj{l!jtucsV>!n1v|{m7DL4>E{`8+)
zu4VYyFT5%f&DCUrA%_|LSz)^Tm3R)`Ety5gn_W0Um$4Z9g8k~x|3|b{12!y=255aN
zE*+XuoWH<jVyCamqX_$L<19EP8e;UkhRz3yjRY}I1)V4OENkOr<if<C<>cXbw_eU?
zIv9wVq@qIq(ZZQO0A=R|L6`T`qOM|Sigr6@@gVDWOEm;yIp~0`4I^YFzfT6_gC)&D
z0gw892zZKwOc3bWM|h_vS62>2jV&AKe%fZ0TNu4(ZHh$&u^-y7&ewN`b|_4Fu59CE
zi4yIXAgODrlCjlVX}rzmEQ%_FvqC{vtU9FRzcx8k2}`~3*+*LL<iw*u#Ih}PUx7#0
zz4w4_+&O&XXK_BySXR3(>+^ZP0j62wjhDH1&FNyZC5=C$%PxU*m4BhBp1hW_@$HS|
zTe3M53JNK;2($9`x8Zvxv<AUbBc=Q2Rk8G%8A#h>ZUV0mn%+;91ddc6Gn+-`e(FiB
zzsx!yOQBI2(UYo5HU6bD5H|fYMDgMV-v;@JIzmaGcZ*b2vXtMkTT!xVRyg!J2QHMD
zqbcjEn4rvN<^pjC`I&JS$l<gG2I2)?_k$zMAu{xeg)=skkOF0B!gt=eAjppgaNScR
zI-Ky_3=uH`sNbUXkkrn59(;KepTf7+ZnkP&F+n^IqkKUGB-#uhv&-}Sm*pC5MxEa9
zCR-?wao`Dh`0D9x^zyk!)QvGUN45xar|Y?0gMdxUfM|OsEt=@a3@LNHHWzSNSeNdG
ztvs(SYkJYh{Ye_+!!qsTOPw+A#U9?+Z;x5A(nyJwuFbOlht%88U#6R`j*Uz*)jWX6
zhU8Z5liYAU-h#ljg=Di>5s<Q!bqQ)(2H?rxF`AEl_Cz@-?BgTOCCuYc1o$@Y<?Ov`
z2MX#?G#6yvb192yY*XrITK+p)L<e|QKinfsS({Jw_TP-_h4aTif7#*GlsUa`@Cf1k
z;X9mW+!BR7g|-I%LV}O875OxA@sBFlQO$K{p%HY^+B`7gK^&?TKbDw6%tu+phxlNk
z-#vY$lKr?Wp**rAH$-&j1Grm?LwrX+JyuBf$K8L<hxa<l^%|sGTMeXqm^ppE1Cip5
z;eBb@NC3Cv{DBZ6RV1+|WJUz(-!ZkBVPBDvz~VN-&xBLb;3Y>>$z;-kJwM$B%LRD1
z3$usZUVgl|9@ZAtR!IXC;t~9>_O;LvSy4gpk%N8!+%r1?VY_xV1Q94i(BW!_du@U(
zUY<)xO^<>wiJNh|=Ye(sX1wDFRAQK7G9Ro_4~6-*+3J%qV>WDVzoUkqEJC{rABBi?
z5+nxjTXG#|h-K1eyq`EP^$sS{+v5ghn!)Hjyo#6V|0Z!}9Qes5u#tK-Nc)z*Wvb?!
zMeqKhNq0cZOmzCnYd)cpwZSxK_}vWA&VTw#v0ZFq<9l}IMQ{W~3Pk<O#ZgUde-_d&
zhD!3HBgUfa97$S~_TG!6&*cywb({m;(L#PXCb_b!-S~=%NPo$A7qaVu+H$VUu^}0*
zC8mq#y|Bw=u>C)`0OJ;<00ApVwrFiz)$K`0ASCrorc2s~7$<t|_j+pFbCY9wW}jY9
zKaba*a>|0yY3$5l85&a7rD^p8fYD@bqxb*Z0{QDTNI+s`2y*Us=V0<oX08vLhP+k`
z&r=P4q(eQ58HFd_JSb+s;<jg9;J@Ug#}bc`7gD4P=OEnQ=b)dapm#&?HIjB3GGR9_
zuMGEZVm@epAl>*oU<#c7T0!rM|KbD0O5&GgbubxRDMW90eWy1E8&S}x&9h+*3IR^l
z3^{`vG6x0P_&PXZz}+o$2vPzb^F+uAQb3+m(+PZ)f0M$+XnPXAl{n4I>SR#tTp8m~
zIE^vMHMlkQOS1I!4>H-Xjz3Y1D<bqH0Z;?#u&m^PsZGl~wMI&vjAr{!5Q`2|)?KAr
zCrT$E(Ktbm&XkCRPj5|0$E&Cx7gCFg`3o|i3R4M`u(@lx`_Hv3#BK(+w|RZ$i#$4B
zwIcFcWIEQ}3`^&VPMU(1+|*fv`skxBnRcGa%vN1|(q1xwz>Cf^?8*@N^vgt6rf63z
zD^GBp7XOWG?f|)c>vF<(_ju3V-pT1Fmwcut1Nxm!1Nu@$^jolK(0A+(ubcHh5TAgj
z=S4|8=lh_C1)E`PHUl%=6SNO2YneOJkgwm#X8%Dv;baWb518inG+O15+v-2LFej?w
z6GXzf@!#VOU0ttEh6bixrvO429R3pD^1wyRlmJOD!&BU<e^<WBLwsSQBYy$MNT>jR
z6P`cS_Y){H?}5R(yX^oJ4SDCqD945%2sg1;0fd{GG`h*lAx@pv{647JG=LHNH^$FY
z$Ow8-YMA*@KK9mhqrXj*D9N?7?efhZFMM4h6lrzE_x4;eEa=J?b@_t$*hKk=9h-lu
zhTi;E<Bhw?h@EF6A`&s^rBE|6Ci~nyzu_p2zmm$lgFbHL8r?MMXDBQWM_$o~reC>u
z=TysG5c?NZo{gEV*aWMeeDB#Jpr(D_?U0bz_(6L6F=^CG-uUPV#GD(jk9=JOEe(*S
z$!Mg#%FXqg{oCWYO@H4)mKNlXbsRZ(cw@lYvl#?hXL217nyOF!34E_H<vA}oC*hgI
z>_*&0EFds7$=vf)M#JSSLg$6uSJ(NMP<-fDa0gb5q$oRgVDUJH4APB^_PFIXjK1ax
zFSHGgXORH)MmqDuy(7cnIv45D+)SKDw+Z_EFy;OEK^VBo=kwe$fy||~TR<-%uHI*`
zcMZ8s9+^T4TG0FPm<no*xM78_N3Pm0U?B43aLk1d;b<;m7$E3n?rB)?Sg1*S2XAQp
zxDv?P3ejJ3?HN1gGtLT=-gsdz>$@b6)(6L`r{rF>-#QZRj?hLi%*{j=oEQ}!HFM6s
z3s!-uXi-G5!G9*Tj2bhM(fM8T7riaK0*6&D1uF^KU`CZ6xx`1vfV-3;r0pR;ocogb
znc1|tq=`^ona3^|Hc${u0^xpdJbbHvLPTV(<;Kd+G{=}2DDTR`q<cq!(ehIb+1F^`
zmnzi$zD%lNE4XJ%x?9qIQGXI=;u{K)2W9&vhn4mM^#x`IyXCJ6ol)62nqf>rqnyzu
zUoaoI{_!dQfu0rfAgiqv{4{__1K)7W#RhD3@H5)(5Sv9r0yfq;WU(7r-_*@PI>mb@
z`!KlH${$JC3>~k^5D)I+P<ovNeE(|g0Pgy*W_E;v1o9JzQD;yDU9{MZ43!rxxO1C!
zRv>_U2-3RBb*|vJEZBfZMIkMQnwU%wBn5x+phWZJZE;PEdFi?$jiH`Nmqh*x>Uj%X
zq87B7O+kK_^)8(kA*BfzyU_+~#?>`akr=2S&_}bu&sAwntbfupxbwdjQ2&hrW8_$$
zH-@J((dlwv=%%XOBpXHb7^WP57SQ6ZZ;bxYIJ}&4FxAKsEfv8W0hkB4u;p4LKWVb;
zAf5Z*J8OnKWQ~)6k>D4CbI<|BZKDhSjO<tD|G0g&*)8TJ`DJ}7GevFUFcDVmyn>jZ
z+&+>uw*J*+Qp#!Rm#J5NY)L;nIp|71)>(hj;&}KI80z47|FAOJYHShu<=*ZiY!A+t
zi_$mh=xUf1UebT}^x|j7+EYNeu?hPeao@HGzfW-Pn~Ym=P#x@^sV<o+&3*ilu!am%
zk%5U!D2P4q_@e*hA|;*XD^iEo{ZofW&cb^y=)$h{xTb678l2_W9^f4HorKU;@8{jG
zE)mN&$RJcxi8M-U<Ij;P;lQ>@#@y0Yg#>T@6avsb{6Ndy0pgm>#?5u6UH@id(16tx
zUBA{(z<Z69u$SPnjm<m3snYj*?@c}LyD{>QAxWe=vggJGkfc23<^MQ~zf~it{b3qH
ziCJq5$fVT`&`sqSe^pLtabc)nAcd5f$rwFCfF(iod&VL~8(?f8YgHyFfX<Q8gG7xa
zEh*YfXQd}_=c8faJd`8fh;6yz^;T|LdP;-Kb{0?z#kx}D@O*f7uy9lEG^7`X>L!<I
z)aD&Rzv{dX*h8GXEt?m1p3o7xm^)76Q;du)o}ZQoy#!a6r4phrNPC0Xt5kvp5PDXi
z3!%dB1uu}9{aKU{Xmt-TY<LjewCrW&u?{IJwOS-VtkvA|!Ddt1x{$ah+8cN|!}A?a
z?Zem`ga{Gb(woh#mzf!kd<F5l<)8v-#*3Iyy6mN<p3T=91B}}4vp%d|l^`-{L!M-!
zxscxlSR*XBFVy%@gQ~wy^6H4@FXG3}CKu?VM2BIF2IBhO$9nAJ<Kvl$M<`sCX!z~N
zQ;KN`7Xr7Br3{(os;Hw+M)mbj3W?F*9I8W@`=~>tTP;3Ll=>OHXgci|p;_gH{Zwj~
ztXNRw&-bE*(7GQ{WtV?lv|Di0!0(JqH0S|64|DoxLiLF?nZbutZabW(n7SnLP!TRq
z+xC@>G<e@<mzcXU_UN;l_kuNw>1DZlF&8u^U)jH*hobCcgjJ+HurWN$j~(pP-TM6Z
zVUnoLoDVX1%-*glp7!Rl98!~<ejZfQL-M)JGr8qxO5hFL&dV{7j^LP&?K=`DEspE$
ztW*uda(g2V6gCF#gH6gV<P6HA|7o1fkH4p~7NQNk;F;<p4Z=Tj5kLVgqyOm@pgV>u
zM%#$Y7w4DFLCWDQ<?ztZp{1v<A)p9yyJC0R4!d6-HMl=bXF4Sx7Mnk5BKm2w)I03I
z33_><y-|ar*hRd<C8+BA>m3pB^?_1r8vK`3Tngw;)Od_sp%~$5uLFJ`!ISHW*pQ%H
zsFBsk6gZZsYHCKBCYYBmJc}hx96b9QYoG)VHK8AUf$z_Q1@62hV<1U|RtgUX7PIun
zcNL51LJ&_4_T9{}n#aa{S0RV(FM=qyfU}W$c*dB`)#iX1ty~-!tg<(Kg4rSq>d@4D
zb@GXH=N>szj0dm^YqU{T5kHTOYH7~345Asu9uskZgcr%ri)1w?aen<4sl_6pRpPmB
zE3_P)R;U2wjox51bLWorWQ5BnY1gGTsICd;xh0^`#EC6qd8}mCP5lF?NviRf>dGhH
z&^t_vy}iTF{5=y1r+ugJ=ZuEG37wknd+)PJ0<}6fuM_KR!Xa{S*#hV`2L9uxbZj(p
z6L{ow`vx(B96*aHoP!3ezJmf~P_X{e2JeyTdqSWd+Kc9jS_(xC8{vr)1@vo^=fjV;
z0J4O@rIeWu<SAnKKNZztRKmb#`eJ>@Z}s#7=qj86e0g%IV+)Q7gm)nth@k#K1=)>#
zHI-H2ei{(`AOXB$I=vZT70WB>gNedc?D@;#gn*q{Hg1#4LOeaZ`yD|OTJsT7kW~pO
zjz=A96Q#_AQvYVWHm>x*Q_zJQx50;>h#Z5qRgEJ~`E8;DeA>U1KC&Xi$_5%8#DC-i
z&*D|lstLYOlJPe(b_+^M-TmI?GB~^Fu6vEApt*{&$}<~B*hEV!-L6bRbfa5R+DadV
z9#ar2n$B;n&j@vD+T3ZUHQI>HjhgI?xVn=p`n%eDWrH(jZm$2xW9p|NaMZsrBqyw9
zl+*WljUZ+3*V6fL8*GJe{dm+^f+H)<o|WoyFe}~ZE%c$7ihdUQ=s8CIi)aq&Mhnqg
zE8ff}_J{4*!<NBcEq+7FhG09#pz~Ca=qtL^3I&v3zGBZ_4-lgVGwzHfxX&T@v;nt&
zYDiw~xEshMhOj;x8yj!XyC9J3!;@N-6Jkm?Ubz&~!2AAz2S{{!<n|hIH4tCi)ICUo
zhBclg@#!_L6MUe%9F^|f9<r@<JfvqpfB0<kU$ZJA!ej=T@UXkmsOkd0W)>aMYm%?7
z2N*ebAI@^if{wP8LZU0#tejKjwzG)Rs_mKa1TTWDV1e!h@5M1AxaxlPO*hnFIYbzQ
zcpMaoVPrrBLw>Y0|4ebW=ePL<lAiPL0_1E&6337drqS{wPeLvfIcYIw-ix>$qdBnb
z?s4O&jbW{)VY_nL5;>Op#p}fWa>7FV0+TqTlsA~y22j@74m4CnPONZFO%*>6<7m%H
zD`g6!vA3LyMyX2c$WKLgIXu^2P09;1#?(bLwOY7Bu?crf^y6pTYm_u9PgN||4=$kx
zD!=b|j{=71Puw79BM8*XpYTuEg1d5&?v8lsdPAqY`ny)hxJMI^@2t$pV6Hj&KVUJI
zV3iq|u<AM(_cq+^tF$t$Oe|l?E5LI-2X{Mwo3}%3bYcBME(rL!gN!&a`7a*eIyfLX
zv`9gK?8Z4D!*vFYmV-3c9^pqokE}YW<{!e8IC5Q8%<@?aCj*&U#iH(+q)_23B~
zPxswx7X;1j{v{C26tuet=$MK)q>J*8urLhyGCnWCvA4xvZ`WASM(h!6<;-Pk%%3|x
zEXhz^pvUd2?(i6iS!#5_8S^`w4b`5dfYq}Y^RF1eyZdlyvl}$xXRP%<Qfg#CQ7&`O
ztRB9r=h@hK(8x6{Yg<{Bv{iVUPzi)1C+fA!eMJ)icKn;%W5N%7#i;6_wCSq8(g7-W
zi#GwHczVZ$h-7DEHa#4i7lsJUIsGN%2MjVGoXMMxPgEnX2+R8X`C_T-T1<}>*~6Xe
zIapCOItE&pCZmhYNpOwL^WjACQLuZlx;K{5?qGH(n!w+Oj{4vm{oCnFbm|TGcj(=-
zJ#uC!^@dXFeSzK+{R*CM>>B6Mxm`HK!<+uqA&WEohz*|v=1NRA*aLA8j1>|^{dE8~
z^L&7aJwW^${KB{LzOe|fs4ZoS_T=jlc+$LwWsKRhLH2UJnkrsI9&b>IXQfiL@#@CN
zi{Fp*-G`;Y-qCpuz=?w<K!n)QDjmw=?BWu6_?9}c1;IY6<5E#rS%x&)Z>VH6vW0gv
zR)xGkErX%%&4mNSt^v2;4oLUCmXZ!wZfJb;rf`d0wAf>y=pnIC+dSQD?5I>qa4MTm
zlat6%FxKd}N!9qD*0LJHp!QN~Do|K!rzw>ZzS7C_DsA$(N|Hz?9<#gq%4LiL#T>Z;
z8-+XW>^3Agc>>Lpu1O-Z2djrm-zg@tD0jB7Hm1Nx$m28F&L%O<`qHb8nya&6h+eQ}
zhMgJyePek+_In+L+E$^d+KP_rPNBTg0CyLny!QJwd*cNHx;p>J{o9UWg7s7sl&8(D
z4%ZzXIC_la6l+b#_e}EQ4v5sQM*Q_w*E7}kiv910aAvgolzjj{sp(%s{!E)&*f0lH
zl`V&`ZMi<ceb28yzuO<Fv<1=z<YTVL1&#>j<_sWsd`z+D^*_C2?~msHVk*>V$Np{{
zyuAG6N1&Z9jw6JI@863({Wkhg+J9c|O`(OnIIv~siSeyQkW*gUMeLbLKFMkz{`d3!
zsbV9xVO(ScTd^X)z^}Jm_Nxo-Bn^(`l~`DVSf)M1GQY`HKcHsJOEmQbS^aQI{e{TM
zA5upuJ@}1S^IaS7!-LaTN6B^x&VPdv7+Duw_W7ld6pJ~v%onX~7!zW$vv&@zFvlD%
zMy(!y-52JcahdD>ps1tO#U9G;+C2KFF1eeFKv*_=XUORN$d;3(P8eos<?P>tz|{uC
zj|*Qo@*f)gpAA+k-Ev(#%msgnr3AY0K5Lhx{kUEY^bnsfSu3FbyLA4xC2)MLhP9Y=
zzIkP8Zu*Hi0G1HI%XsT0c`4Q2LhFxWA>IR2cC5h{*H%NQz#U%)Egh&p+qKH)D&I2X
zG|&2@VTrewhcxhgz*BGe`fccN#L4|$H(`HNs<Efni^j&H05_BB98m;um$Jqq^pu0n
zV1W5@y%(v?>ol(n3RGHubG|z1I7J*%*|Gx&8Nmtf@$|Glr2+`E7fo7DY%!!JXqdEq
zEQ9AzD`?AnFW)BXGb3&?YZi7f0l+YyfV*Fo=kHt;enj~`u9U@|<|lJ_W#f}xxTv&b
zM6DuYXg|wOITlL{z}#+{OfaV?z&c;pnik*a+>;5g7k>ru`*5w+`YOvX7N?@5G~yLx
zdNQ`NGxmjjyKKR=Lo;CILXo|-E5UMSbN3ByE{23CO9LY$$f;XhUc54}oI|Khtm{kH
z07_w=BD2_+=#;1=)LQA-2BRos&>?*?v>7>A@N+iC&j6LtQG249B3N;qV4;wa42|6O
zAgAuN1DKqeps>>XuiMNXBq#*ut(Ej>*vDqS+n0Fz(LE04@Z&S&xgXq@J4F-b7jIR-
zaq$#1`yKeK29>nzNkiD$@~N*gOyEA^L4(Ne6j>s~K;x^B{j2({f;p&9AmJ4pYMaZn
zZH)(-k24Lt-*Np%!Q<O~K1k&XtX&LH2%l*gg089muOY_{nHo7U0fCrf-u^eHBEkh5
z=%uZt3utR$4b88$V9?@q11_+{LMc|DX>j26$RuI<hU$dx^&ooDsIF4@Y%<>+_4KYr
zDg`k~Ryf;;z|enj@+~!UvV(wqRX$PuPKVDhQRB~#@AFRIMvZnEMUMh1{TNi&B5!;Q
zCCgbeE1{r23Aj|hQGs6{NDt$=4U^~`9&=%2a#(hngCJvL-j&%`u?kYacHt0B<Q?2J
z;78Jx{O8vzT?(0sOpeh9tjBO7#%7^Zc#W$P7XpQUK&_uR^nTA2r2oX1Q9l}&wdZP6
z-fWtr@_NmdznX&Ca3ij^qFSjs=Zos~J+1Ojo3SzTn;(~7{y+02Y!By)vqS{sDQ2zz
z<hcGiYZ}gfE<WQfzk$~a6I_H`A$tcGKgG;WRX_LQ^Qr*k)!t0}5JpTz;ylV|?uXJj
zs8oHy9Gqwd-qzCoe5qykjY%0B1hX^2NAY+v^Bx>3HtYBjw*p{5=}P{7++e-Nle!D)
zf&9Z|K?ZI`7X>xC@d%xO$n4#F!i9qT+8gjSeRc&Jd;epwji|`-#{~I-&emZ5r*_#G
zJ-p%L-}v*TS+K>%o5I-stI<I+ZEvh3eA~OxRO1#x-J6DKYjV0lf}6V%YPGV>k~-{^
zI%0^``cbC4_{q^BxpAr_cG=V{?`b$PU9iR3F?FG3b<r0vN>R}b8HIl$!A<E2CH9We
z6hT8Ec1}c@xGsS>X40Tn7?qxrX*)Z&qi6K_am8o;`_{)IGBOE&lP1D>IOQ}JV1y?A
zHpMIz1jl(xUiKxI`$KL$H{SJ!pL0#ZU6UL(se7Ws0d_-1&l#VTn8^J>5_!5xi|zi7
z{w(8nvNN+%@bbTuC0&uWhI%b1jvp372m1AnpKUkhc}{xk1A%j)W5zb|9|1#)JR()2
zmfVL2p&t<4C!aMqXYCUn<r&YQT%ttdX2M88;{AI-3x8go1?Q>1=sadgibn(E&@VfA
zWa6i5(0U)2l^><@@nVFuS(7rea_9bl@cFkg8x9~eWP!D%L|E`-9mhd}1`@h4h#U|d
zqWuYb9N%CaY*UZVu#=*CZ6M%>pf!^9tUjGjWPdB=u%S%1NU@;l)7|o@`Ab59%3?E`
z>O`UWG43iBd1BjXta>3sBj0)1?-3U(Say-#+m|`3Hc9B0%v)X8H#W91$j!K1R9bTm
zQ`)&x8PflcqjT`9^y{Md&9+^WIeDr{6DHfXZFh5%ZQJ%_yCzMXjA^oc?|Xmmzi>X!
zbI#stuf4w6<bT3aEk&=Pk|4i@-}SWkDb_z(ZCqnB=rXFAr03N%5K_;pxl6~83B$Yx
zRW^%ow)=6G0GBA5&*cO>Uaq1+vqOq-iFH5H&omP(HVqecS@Gpv*8)9Uxuarvvv6Y(
z=v(iQWv0!%pI}`c+|1ocrt0qdOF5nso0n$K+CyN~!u)UJ@RXy+mje0j;p#8a_CZj8
zvEZo(Is!NW5S{-($#IwhOfUQqRoIl8Ga&Vm(F;;}*^&1H9Uz14+mp8G8vsJc><NqB
zA+kK2#Dt!&7)}NBuN6x&V1H<U(F8}(DWa&t%y|AC79{1C^!5K7>d%HfgwPO@*MC{^
z0_t8LK;A|uvV6vCO#u7yRH(mj0Sq_}rT`rs-I%{VqHGN+5t3eDezEusynFUm&<mnJ
zSu{8dOA5x;7HD0vd?px_&rFXfFSlS;m$QKkWHLCph{iiIGp#I%MeAfngky3w>lUqv
zx|*u61#(;AEq-U^2~hkAIZf|J{OgirG%~fQt7kZACHLZ=V_|A9GR@~szPfiFRQoLT
z5}v#j$+Rpr_zT1qYK+=CckiTiEU3&G%K)qhxze=g++x0jXqsyL><wGNd#{?!9RZfd
zN$(2h=HZOp{8#$UUJkPlYwom$iDN;8Un{e(8m%K4<$kJdIEGo+rk1)XFOa^?%=eZ%
zG7_1LKHJ_0a*VycV|2~_&6${DcX7TsLdAa~X#BW?@xCbN1MI*<{CYLuSGx1<mkl3A
z#KJDTBazBmMsEt{2a?IMf;hl?AT=Y1_z=VFRXT@pmqM96B5{uk`oDG>Jn{$9GED!;
z5ivUg>i3{R1Wc45B|xGWM9%yV)g>7aK_Kp9ci+iJ3=L?85^YzcZ?KVX7PDLPgkU&f
z23nxXh}%v7^abSnF$}SYNr3M0ywF{{8#*y^bZ|)09wvksjc1b6JAb|3&e2nVDGb3P
zbo;F-)on}VKQ`joNhE6Zp<}oiB~7~Wa{J0qk2q;@T9nzr`&o4eHJ(8x*MUP5cX$Hl
zx;*WhAv#(N^bAZ-#!171io}a=<q4C&!<`9J-+KOgIDXi%OUnd}WXCK2(Xx<afkyc$
z=kmpn>(wj7rsOiV7>_jWNA*zWNdXU{qh@Nu33MumN@hWufIH&HNZ}xB!O`3;p|tQ+
z+j>{kNThtfDt5@A`#ckR_McO=uhApso4aB9>~MNi<Xq^g{cn!S{uNv4w&>H^^1HDi
zkm;n!=zD^XN6OCKjTid}Y&Rsv?dF!$@U@{&Cn+NhR3q{V{n+tTgno<<)JY=9EIVV0
z?ytX{--<%G5ss6Rjlif1O#@~}2md<lkBZs75&ZSvh1Gv|Jx%id^&i{BX66xuwCteD
zF5-I-_vMEN1>+@TF*Y$?CKA#-6s&Ot{Q=_$*9bdAnu+c}U)xhE<03R9ew(3(nz+G-
zkfo-;)`XP8$j+@1r&vLxFrvtNC6G}+zzF=X0`xIMYIRu0Ej>#Xt&^+mIFwvwQDfY(
z*!yaz_Q1x)?{YS^LNdm3WTt~4nsUkq54%f<KcXR;S569QpShJ8TTx;#&R5H>-KZZv
zB&4luPWJnwm`+Px`K)xH*TCrs8R!R%I{93{QR16Z>y?zgLw2(CXyNs3DsT19c48av
ziMRpG{vU=aOxv<qsz7|fHqr+*$<R=qT4mkdDL1+Lr5QY?Og+-_6M_Cun(eXr>*`yZ
z%YnIk_OZ$$or`Xks)EZX;#xvQ3*QUiYkW8>a&*1)SLm_IS_HsS8;9TsTpkgIxl^aN
z6qvQ{hgmqd!w(Sn`M<Degg1rk?;2YNEsZ-Oj3AQlgBg^NjN<wJoE8P4$Is8m##@(^
z`D$5pZG@r~;9U5b?vq5&>HPvY`S>eEz#|yUlIM+oPaJQ<%LJ}I(AHl^$tDnoTeGHI
ztrcl{6LI&p{LhMyimM2Sm7qgCS}c>3Lx-IR&H1B0w)5)>(usQ#ZuygrL1J^At*j7U
zGEo_2L7ADE-p8c+;xl8&tj8BSM~8=N`CEQoOb!UzMOrT@oQ>)s3J5yVD^|xs<WO2M
zIUckyCcN40m1_J+j0Dp%t0-uXKDmP5=s#lwLN9aEvAz;nhrDzY-gTf`;e7H*#^F3j
zu_&CiK*p8+-bs(Ay_y1|He&S-K{TskP~DF6McY77r9V884B-5HTCLIoln`xGk~M))
z4w*XeldiP}dFnP!-+$~*4{Jg>G{Tcbc>vn;KlRHSt!0Za4U+xwqgM}o>P&PJwa~Ng
zb+nJhbl6ayDJg?4Sq%V1p1x3!QXq~0m7zdYQR#a^gM*uS`=Gi@cfZiaD~9|C?rkCs
z&sL*AwS;#R_;_5J!~`VVZ1kt!&x@x!We`F893pD)uToF@+;Coi=ke(NYalcyI2P8J
zb>n5Mc3A<W?+Fb@9mA92kY}5HEr7`eEaC+7fBys~Y<JgnV1@~yb(3~j2|&uUaMjQ+
zZcR>fO0<86R=;AGI54?}qw;bRX;s+3L=6s$rI$NLm9Z&frdkwNw#L;rqYE(=^>>EW
zAuTl~gW3EMY*`R-CZEanzJXHolHqsD<f@Id8#*m`9*{k`^^O<s>K<zF)KmB|=~Lss
zxV}U}z!CD;#!`Q(9AH~nwRIk8U;SD`U6Sd^Xi`qDsMFA(d8p>iQ9IpkikjM+_?u9p
z8+6eATSjN5Y_w6PbZB|O`xA_9ZRTXZqD|pz07@-Z6%Txw;S&opYt3fTwDUpbJor~B
z(4cPg**D8HHdE-qdw?G1T%@HisAs33<bCJ$CEX#&u!xu|!dZes>XzV02=-ZS8=0h~
z_CDrYP{{|viqRH`g9tjAP)UZlrGnc1zX5{6B6w~aW*gjq!-~KKgz&%G7W|=uBqc+N
zravIgJK{i}Gj0Txt84rJ)C%%O8vAW{=hMZq>%=;J|7}{MyWJD(3~bcUsDr)XNPihR
z>j+}JzKz}ou6bBHJ6hbCat<^Yb)5QnJFU!5M)w~0vkrZBoL{<$gZRPAG-@k*kHkLk
zM%_$%5;_lD2DF*pS3Xpe=LluL7O{O@p^ipE(18pwXTSDX0QmP5+H;%>V*4WgGA_Wg
z^+IqnTJlb5-4qX>KoZ{IGpslXu|?r%Ns>6+so3uKr;-P?l!}x&o*U-n=-q!B2XvN+
z;V&g(tlE(q7_2Zcs_4TFq$zcv*#p=Zd+oR=VepsJ0#zft58mOwaP0jZVQ&Sb12V%9
zeB<*&<@?o^_j%JeZk9+9cM!vsN)pE9`(zo=Qvc({P!M45VFWo_!%kBmuluWrfctBQ
zPl7nuivsviqcR@s4oQ6mNdJue%loasB=6+?<&KKW2`cde><>%nC)a`yL<sO^7F`U^
zKP>U&gND4alcB2kS4K(SX7E`dY+B<E0(f-Hi2<1060<(YY}gkYfH1$y2T;B2?|*C%
ze#9yGz!4pPw}vZH+Bxa*pk$JZtaH_!sT+)-qc6b=X&oLuo&dMMne!x?`Pcj#^>3(g
z(54Sf)}eSrvzGh`(WWt2qNbzCf^$y|l;q@zTuWDcm->a0Pucr^Y1i2hp-r6V#AV?9
z{or45O;gJjX!Yi-ud71uFg=0`u=BTJ41v-+_^r>3G`eMVUAaUm88~x<BZOpB{xj1(
zSnXk>DOQ`2^~lk#dQ74KY!?6SG?LvM!+)3wKwR$)yt<>c2m4bfB46sSqfddi9|cX#
z9#atX-APCKp_;`+xE9_+H^KHW@Ul`LO)8=gTeaYNj3_js=j(i@mEb?_idfJM<eJ_A
zJxUG2P>!;dxaW4!rQ!hAU|#3)zV7k+Nlg3wPI_K*?^7JF+{6e>#aP%wf~Y|GP(M&a
zFbs)CN{T5SpYPtxFOWmCLXGVm98Q_~T|O|vhE=#<XaIE8%pJfNV@N``_3~e*6%9vh
z@k;u*-!zM+lonk>n6h+woHdNr>;We~bC#AbJMhP;uOpZyO>o2*W0wQI;fX7Hxl82I
zy+U;77QhW*2}9|p;Chy$0o9>7_Rfj+$C`S)kEPUKzsHJJJT-*Xo$&)VY~>*OtydtA
zilnQXL5D?}tJTxo@1yo0d81<F(Tl}|*K!Z16V`^FeVPQijCxVM&+mjBHg$5{98786
zD#L5)6)hf5FGkIhy~uH5@ir76fP}!Nu-?xE&ib7%E#24Mz$dtwC;Y@TDM>0KzLb56
zXKlgl&-KUS(%_r}CLrP^0v8YT4EF|@g&cuU^P>z|w#NktVj8)S@^O>>TcAkrarODB
z-oO1J#sNa2$fM0O#>eMUFolc1pJy+a7cV*o&DVgF(>>@7KO!{>+J~F|J8LXwcF`!~
z$r|u~lrFmU16N4ShcVp25&&YSYM2tCAeaaSsNK(8$JW{Ocm(~i^~4|#)c<baQDZ|C
z{0^(d<zuZ-r~q%8Wq#mt_DAj82oee;Pb_<E@ThIIuK``j2`(<39eTHRO}js(_}@yG
z=AU*l`UNjC0YPvfvwPG74Lsp?QPJ@3V}c5%4jX<v@22zADOgzr0ZI$eUeh?9IidV~
z@~ld>*M)jj4(EX)4tVrY`h9|}AEQ0Yy}D+=6Q#z#zQL}{?I%SoV8`~Yb)WNtHBR&@
z4AiW&Ii+0y)>=_p>K}oc$OrIWZ}(DA2zn?FyliuxKlQ)d7}U+zze|B=r^rhK-a=z8
z=Vd|PqyJGr&<76```spM(hy35R_ZPpD&f%AHr|UeMR1*e9w_EjDIfHN-q+XMPZ4U@
zI5$-DUlVo4e(K(G9iIR=H2MwvFaK@(*T~?)`iIc~WO-At6`daO58yghRNrcm($Y@r
z84_CKl?2KB^IPBA(s!j4B|2>AxkVQAe@;y3lQ<KG?2Vv&?g`TC_5zmU|Gqy9_2<sa
zL7P)eT7-qM<)oWua2G2qS+vUjrKL+>MsCt;-ZGqmL<nz$9g#~<c0tm7>QV;RJruJs
zqr6x*l96A^3b8BT&<Mz}&?uYz%xI};@dCiz5TKipK$_Fnr=*^<{VK+RHnpYFT_7&i
zZ}E!|-IE5bD$Nz~*AuO-Unq@KHr??*Gtkv6HOta`n%^C@X(}dfr-RZRP|ZksO;ytM
zmQ7Bcg~1e9rkfc<;)_fI#juMcL61<5;Dm$%J_bsJx3}^B&hod94`CY=>*BW<Iq-Lg
zElpCS^LgAWr{}~76&m}LA|lLBJ#|=<0b0$d0;qjcYT(7}PyyBqkSYUr8|}PgYJnX-
zZI8_i=;i2i5RErMGR4k**CnPCh|^*q$enzEi~N_7-q<4+_81{=VLn+RyFEHIAG+=S
zAY`|rgfn6PaR{HC16|Pjn)1-hbnlMy!ZXMnclTB#0Fn#5RUZdtq$|R2QqtCSas4DS
z&&rAnt|Fw(Y%Xm7r-(F54^4|>z=2h0Q`4LQk5k@kG?B5{tSTb2B%pWZY#BT=a|x%x
z-O4aDi@g{)a@5+$?DHOAaCN5$Zklv3+`_%4D;n@(+KIX;q91H9s6r97xXW=eKf5L;
z_7t}DS7Ly@#|!blTgN)CD{1Y||8jPvKARUQV;cZ^@@P#RF&9k?@@>vmgrHX5-bxVQ
zdcZH@&-Usd4Dva(G)_x39>UFfa1=9`U2XNhbqiu>-pJWQG|WFAqgN)4n}z8|f%Xg=
zn<0u|$k!_|p9li))hGp8HZEk4F2xe?T<}_LgXhN!-2kFGsJ;F#&EL55i#@$5$I!=v
z(f(e`2WeejTvR?|*sq&@AwWMLXZw7xJD!AS3<Qdl{<rD(VlA3zoyFU)|NiF+=zaix
z1HOW0U2eTM%S3Z(&9l|}r+<`JELSStA}?&ZPEVn!MQm{D^nLa54vA8sN7KzC$1wS;
zwA@$Rr{U%XF?*n!d<z_qfM>a$E#A!yleV37fmm2VL^pdrGXd*Q3-YgDm<pjI_jR0U
zA8kz8$4a2Yh!BeWJ#l;fHQyQ@i1#fVx@CK-GQet9*#@RC4n9{pYl9x&kSm6j|K5SA
zo)WvMuCqGZ82)M;h`g;rrmtGyb2x1{BY%Ga$H1PWO-pXDH-G7-yS$O=Kiai|+RI2|
zpve(X`Vxd)KnZb}J(EHIQX^6n_;{m-oM!am(!l~a1l<ud>jaKG8yzb1=8cq`g)l+&
zzA=P{7P-l{x+JA`pjo%K2I%frvK9?<L3p6ov3tW9A`%>CGVXHl<oBj8rl0mc*j){9
zu`X%R0XrgFJ1<_&5`5cXc3CiF*=+hMFNZ#%V>Hj|7OmwIB@apX3q`{^;gotb!3(t$
zF}oQZoy9-<stLeN%<(*A4Y5{|U@03a%~dHog~@TbJ&|&L{cVX9U)jmKw~lFlhjV|O
zWq0wmQ>{O%r%ezdW|E$c+{^Lg2Dio(4XN9}(~F2WVVbz0aKhCx4&_C|NnubzeY+48
zzMtIl$4=mXAS>Fh=Hv>zbO_a6gYrVC&tBAZ*LOSLuK{bZdVu6x5w1(npkVOL7h*}d
zzxNbN4L?Ob?kVt~`SAVK?)$sJf2|J0EZ3@Yp#c#eU`A>RC_+Vk5(0jHaYEBCPTS^B
zzy&+p^HZCi@Soc97tSG&?rk1@s4$%n(1R8Pe91(C6u6Mt(N9Qn>5u)-1Moo{abccl
z!QAIEfOin+pw6gE5J&%0^InDM&!8%#B8x=)K{9$|LFBKcQNH<MX=z91Zzp?Y6Yaa2
zflY~4$KJ5|)(-)YNH4Ts@5$>Wgf{ae@ab}@9nU4m+!Yz1Q>22{kzXY)0nHA4)dA4N
zQS@>RlJ`~ZDM_9!d*dgpCbGSaMiCRjg0efui|7YSQYyXi!eCX6FA94aLZ^3NlOse$
zY%2-<{G?(R73ebuAZO$UYyYlyV@uG^dxz1$hmlaU3(P&5r5V67oM34UjTub0J-~8t
zmEZ@4P!<H>04cXly@jQ<ir^Q<+5wk8e<>=itMmQe?tuX(7{KWS?W_(dz~$v-@;P{C
zdz(%enjKE)gcZQ|PqY>z9B%HvwOctzoJn48`hnOzWzak-#I;Kl{HJ>;)Q9wN4+_H>
z9rnM-e~(}pE7O0XO7u_za-Vx~FW0l-lRBT%K`Ra5ZlKedF6(k}j_6rcNuOhe>W+fX
z0TZ<)Rk=JD)mq|Y%;;U!zz!$BYDsM~sYNoHN%0WEp9I74pSSm>S?<wX%WO-yxVN|w
zPc|kH6gdz4%n&L9=l}`G&TMB&xLXIkKFoo$^mDNvabU3>>L$wjA+_-A80z+WrmAXf
zEdD4OS<&%CcTCpsAJc~|Mq7=p8a%U?Ht3qTSzaE+jXP?KT*T~ybD7|Q3^P2L9jDaP
z<&yNx6L52_#yy45nVR`W<D#%NrEo4rqsOTo8uIgV3{2-^Mu*AE57muxjzr$Kibb;*
z>~p8YgZCjru#WQkmk8Xwg3$Le(V%y98$c9Z(9Zx$5F3utIpkX|pnVnb?b;FkFnYa6
z2poHj@&-&G0m{97|04kNoC!GfKBTh*MyNtOyHXSZVf(bR0x7Y1kU#z$lB(t%Vf;kU
zga9Cc;=|mn+>I0sf-L`kHyu3c92pG;IPLVIMM;bOLInjty1Y=@xL`CWd2_9+4_7}Y
z(DMNSFX+A?J;kDupt%q!BI`#*ovJ~0p+$4)z8A9p09!$RsjNK;VzZbsXKpnaY`C2>
zes0+oCLt<u3Oa5;(srp&f92=RO+AN>Cv9PIYWqT~zg_YPCN3c{?KO}J7&e$VY9L>S
z?Af9?#~PJTb?P}6##%>P>s4y>%drLR0CF|&2X0@w(!ZFW*iTdK!0Py4oTcJ#(#A4}
z!qE;L{wezK8qe!p$cfE2utM9cO}Z^*^0R?R(U1*VZE6|PjJ13ctvC`NWuVomi|fl_
zu?B%g0KCRFq+}2Sf2Sf&hNiHNP4i;&BG{eVxfm>X&w%lN5V-=dW_@WAQ_{Do{xaps
z+2_6^Y~BG({Qylq7`4OBEOSv3<61Hf_1`;?H~hEYk?|l1yazv2#^@7yU@s$^g{XKT
z#blE%okKh<Xxq~e>qW`BJ^uC`*56!6(IC|uDFo0)Kn}m=4RTj!{}yQl4=K@5zoi1V
z;jcfgfCZs|Me99N7*Rgt#5hO{A-nGIP@xDA^nSa$Yiw+~7esUpl{hyK>r#`{pVxa)
zkhkM~Qy}09A)qQZa-vRC<E)s;nh?b(7%Bb)7#g=fC3jdfloZ?NMV7^LVjZWi2N`=p
zH(Ho~k5E>GW+rN}LBZ=}7#=Zo4SOuLIn(?1HT$sgNMg(xY&3I!<ofRP4P|#A@$P7l
zFN<y0_&x74`q<9Pv!gKQf}}J-coGfw#234EUJ!{E3F7;EZpoGHTWNocS5v<!{fE=}
z2h!U1qz1*0d|#2O98Ei$Ni<uOQHFLx?9QjGOgi)3p|Y-N_JP>|x-af0+dj!r!<=8j
z4t+Sy5NCQ}*>|ai2@w+;*WgLV2$$lKGDnfb&mYlwn-!dM#xysZ>ohx9bh?*g?)$7j
z{jUXT9*P<tJ69jAZRt#>A0R!}iu9J{Dz%EKM`tu~hcQlMqr)kuaD5PLt|j(5^5fUd
z;M%K_WXJP5m1`uO-A^c5_$u$-rc)(sPo;{6a%*mX+H&<tGzfJ^P4H8jy~8~9v~$hX
zR&^L6`<*bR%pl0-&2D$cuPA=f0leh1;~_LNp8kh#+*KLpp;h8+oKN#>k_TMz-C|$&
zk&F0;B|BI^DHmF|mu*ufhVhI9(VdR_77L;U55gsp(~)Yue7&BUs6}k;A*|eg!~Dm!
z7m*A=`BJ?@6uMCvttd_LQcK$c_@QI%r><B3yrl!|cRz1-lX@KSwO$OTc9m0T9W$B}
zE^2?lvr{Ge>KPKrdL&stusqj60>+SRm3G6Yp##YiB+Q+zm&}h&`Yj`)_GFw0%KwhU
zj_`K7j-^6+yoMq7YX)dOs0@@h^53p2_?{bWptC#j|8hTkEj(sJx{L>tft^I(MSS+w
zHuL>1yUanK@uwM$H@S|vn!zIQ3{df;{Ma1=)MWb;H$WC)z*y1bFA{g!fi`4_wUh^y
zu+tdX8I~ahs820sO-dSS!ec5;P4`*F44yT6I|iFFf_{$l6-Vgv*GN|}sdfLTyC%H{
z|0tX~TTaqpT=g@ul!;m2wXCSX>n5~siebIqI>oRNQB#r;>Xa;byFTH;*#1C>x6Wux
zv`VFF`<Am{(_?EP#@Xsn^T2SoCw4a<f%ADpKD)M&x(1^J$;8<M<5TV$^tCWDnZD9g
z?RC=65H}s4hLUy#LWOp5X9I!>w^!rLIEzC%*{RQ&bL!(xG^e-!946SO<Z56WpL_#d
z^R=4~@fJtC-1}%02<Y?p1G0a(EwE+x%j9hQ5X+t^_ujzYfS;+t=c;nczu!!wm0b)?
z&m+7GVU`&o!%`rkxXo9!(OODJs!$VZb?nQ_s>8%ueAyT&pABt?;r|Jt7O!V99b%eG
zKeT;_6H%;t4zWgq)d$BIp;EwD3$Lvi0q@Hh5;A|~0`Oe<53gs331>n_r<?UTA6icp
z2+BNXhk(-la(*ouQeW6+<W`kZ$Kmyg&5RWTCeuO5SuD;5<z*spvop@l)d*^TBFe5C
z4!*yIUjR6`6DRHiPQV!nVEb>F7$0^e2p}E((E>>>Na%J8b${E!vr*pbth)vf-)57g
z?y_NY5-5)|c5k)?2|>nEN`u!eDvaCPy?oxeF<Sgn_zb?m0fK_RUUpK`5;%dHwDWp~
zp$gHQ0hRp22!J6N3_)Yt8js5qupQA)=v`v?bfjGGW^+3zfK<?r5_%{~W2M+!#Kxdx
zKsgsAGUCUZ&BQL<{`+kKsOAad^Bmv4DePma4MS8Q9b%Soqnx=G1sBJjggom~BP^%-
z!_!sQH|r@43xb_7i(&t@3=!;^5Jl}^s+Dks-mKc+OUMNJJzPC1Q>^l1t!P}W`Oz|P
zO#gmcci&iZ^BzSAE|e!~vbmdn<w~*iur9sAvgEpp7t3y}UZl)o#xCPD-Ih3<<1F9O
z0@Xb#L|}o(SK@n&76S_yw*=EU(lba5ckKF@XlS3+_`gkMC21%p9^2vKXm@3WCmtZ5
z+g0__>?Ce_C>Srb<fIIgv&)gxAX&xA!PBjZ5Ls?k*oOSWV=>z-^POSwPZl|%E>YF_
z^rKqT_KZiM6j{e)$TGyq*abPwl*K_er|j0$Z^LCM8R1C4nz<p|zu;0#dZ<P5y+zCM
z^Qrc`(!mA(C{VAj4-p?Aqc!bZ8}nVJ^5{D$411g|Wl=xlJgL-XUW_lTB<4vi3yO+i
z(!Wt0kJbRXA&V*Q<-)I2*+TQuD=Hg0yvh?e@F-p(Y9`6wQQ@`YPX1+F5|q%&v(aTp
zsxv26UJ}%QOXJ-~<$@5uZtqcTDb*ykuI9iQyZuab;oEe+{5iCDAC;5<jKnA)W&xB~
z)c9n7CRnNkcyr(1-p-iy?!;6JP(_(Rl|n8$0FHx~oxccxjP&&dZ;!_f!6%J?pcL0z
zhK%N57||I$#azB3`uy;Kx5p*{sqtLI&BB#8;5w<0e$FpEz$$yICN0M<1}WeFh5z*w
zfcve}(l8dK%3`skAZ#*H@xrN<BFIy0E;ED+ihAC=4xXKr%623v#d1k#I#_3EO4ejo
zLNp}i9EnINQQ%KP+O{(t3mb(`L8bv`)P8RFvN%01^6Ui5Sb9}uq$*p{v;DuUt}I}N
zmb8X7Bp9qrd1X7trpXLC>#yp5@%sG+B71ot*XmF%{h=vuWl9xK%Q*XoWr=fh`8>B<
zihkZ&{B>Ln0nL>ZZP26>?=eA0ZR+1JZ{B0bvCy5;)bk_w<o-IMjt?0y?WHbhJ~;F^
zew4DL9cMvOq*5SV&0V($S<)AuP|TO!P}nq8*pM1e7|OtdO^$d!++rfVZBn?6x7cTP
zS+ysb>em)wG@90Kv7kR1*0r6s29e+LFABi7#x8$u`e+lj>$=h~!L;mR33o`}L)ma_
z(T}f=Nv^WXZk%7V)w-<F&u{<35@3`fQ2*j-S=Ddng&oYv90dzQ8n~7GtfceY%q=MQ
zz@wm{UxQ4CH{wzDnXn6Zh$kNdd&q!j8Kn)^%Yot)hjp&Bt&`m_<)pUvo~AI&;Dn6s
zEHhfwxAG*`*zSP$gNZ6#d!?EPqz+HSKIy1)BRG>j`khqi%v0X6c(F1rQe~T230<RS
zro&*poGlM*1oy7=p%77z(HhScXXG~iTx9`Bg~~w~{Y$qII~nNT&!7Dc`N&4-qbdZy
zE{ecO`lf#cdA&E>KnMWu{mwT-2-k>QfgY;yFH=iOB5|YB%id30ZWslE5R#mA?i(el
zTM-|<s)Z<)?v-z+Y!Ne{-YoHeHMwHiWZ+;3;&P6U=h%f+hEaqLq85FI!j#ah#1VZj
zba@-8O6dKAEr6DF9#5)BRLb>mByY*h7a{JG(0X~G&=fXBJ8Lp|o<ixMP~<_;VuC#N
z1NOJX1Z=CfjP6__p)k^tiisFe_yM!37apPOpS9(k!x9YL4AFS$4W-&t!zrG{P&0<}
zdZzbE)cy5P-kL`-Ov2XL?}Ojyn+CTUXh%h}!*XOCU1hnaPf*Unwm`+k)Kb`SII}z*
zzPa?C)bc(@KF4(DzGTzsX~{YqGObh492Ltl0h*|_w82vil?2WhE{kD$3F^hQ`iQ1;
zU5vhlkIt3ibPfq^I+~e(qvP^M>~gFuP<p;5)bLkr+jXHC_?JjT`AK9+1b@0pjCb9x
zs&Ag^|E(T)D2&Ecml3?$n)C=v^tA=Aw<`=)w@QV{2<mKt7X9jCG+Ylkt!#Dzo7odx
z_*x#6xpeUoX9>riwQ8biSP*`;-58ZM+9s!-UJ_==;Gr|^@^?=BlE<MRv}ey!dxM<{
zVaVd{`mP&+94v!-RLn^g(?TV;sTv7Ahq*{Y`4lZ`bvs#?qsNOhrH**GVYs0f(!bH(
z4%fLG?5pTM5$CYjWMjbD2&YPKQB|!AT;g0G(KeahOmS7S`)UP}D$lpYvUnSRN)E2M
z1L_ih8I_}wP0D2#&u4+G+G-$`LE`@Kg@uA6vz)Nw>JZJB>~=fi1j!c9_f^BwXCT)u
z_<)}Lt<HoEyB`%L<tIIqUWVC&$tY^YiFcRXDjTup%jw^Rk{cq$UoNAE6mtt4Uxr#P
z!B=}5k)lHSO9RJ<D9_LXGJtFMmkH_wSknJ^crQfUCVr3SS%Xl!5ED!up9^$6z{+Ya
zH(0%gmvYHo8M^k5$EraKRlmZp6I%b?=6OZmp2rml;k2h(KKq1@X)Iz^B5o&-s}7Z{
z#AJK)%v<!KjjE!P&JX)#1`^$*J{*hU6lAWOOzhk~@^yan3>*7xxZvg&ZMuEfsA5BT
z{S}TCE-bT02RaV(DYg-gPTMkG+_8P(%*UlyHghL}#oa)3Dc~>$I@Agg<u=vV04I|+
z;!T$2uwn|G8Y+upB-#3ev71;ox)~;p4r3<sFqF*|Gl3=YD5-Az=oD!_9#o`b)E5R!
zWo(9rKb4e7jPr0OcyNaza;VA%EqE52H-uN>rmebMhAgW6wmJgp$&`iA#0XsJd|XeY
z9Bl!~?rH%uzZ%Vq?xufKFUn%8uOVr0VIdQ6YI>Zn_kA)_3Jz68rT-c06e4B7*<0~E
zkDrl+Oo}EMxLy6JzHqdqIg}xYH%XVz`W@MeVK|`W0H~9;{KOJS!dF3oCwji?d0{`^
zO1Bui`l{JlfEUa9D=LsC)4|u(LAJ`V$Qpyok>T0X{Y5n*=STBO7y_&EwSxk`4RObD
z01x{4FK&fQIz;&E<cy7*gS$B+GS8gy`UAgC#=}1OBm4`)4jzyfNU2o-RB=4E2?q(i
zl>(8xT$B-7k>_2_EoJ^P)MyE8!~c;2RE6tm-<rX&`}7Y|WPvS5!E!NjQidP|@NPe;
z<B2%v*+A^xqzHTUrO~^K@Y*<j=X9xpKy|}E^X!_;YpZGC(MlT~RW=xI2E2kqDJHGk
zLtbwIp~DDbm}I^W?@0>YLQ6M7<N(MVmsU{9e67pPN{|@SrNi+Lahvr%LeK~@U>mv!
zb$4k#7j)rwx$*L(dK^+hO0Po2oSHKti}a(M^+(DIp{;NP6m7hWtTFzJp;f@3e0Z#}
z{2-13t#y<%5nIv^XC?%n1Pi^8%uEaRFedMfOTu2-xv079o<)`w^TZ#`RtY*Nwb!Na
z)99-qMb@LGy1_A0=Y(`$e03>)yx4zh8SP9K$}o#!n=O#5>0mj6oun2&>q4#pVDc%p
zx8?F!B$oY3)0;(7h5O8YA(URJ7$D|Lus<TqF|zx%^w?DUqR5#qIL4S3?&12wLfdD~
zEcK383fG}sFjhe%J?3O#aQ_+2@*c$nJ`<NLhZpJU(V@sjV}i1dB%x-tdd6i&c<oj8
zXW=lP`4$g!bUyKb;HyXDMP67}7IIJOzvQw0c-Hm+25n>5pk+^&AKrTiW)4)_4SFA(
z$Naj`rM~lH{@9KS)@baHFASxXW~^<3QIjJfpW2A3{_>XwlU(ZIg2&tE-Bme6$5Y!h
zjW1BBA{7le9~##yy49tkADYCh4U@Q@jpR#J9R)h|$)YF488&tD`BoK7CR=IO;3sE~
zHnQaXPCUH?6*t0Tgj)knQop3qqjUvKv`N~M@FF)AQm2ysG1Y6&)6PPwY*(r0@#|?n
zm4933l(&GLngjFfq}>!SJbjtxG^j|_yh8ZS&LlAKSuEebigw$2GL^NP({_eIo4l~$
zzn7H$C*uwHH0Gq_1&A1|;KahNHHZ)JQ5rrhh~QUn_hj3YJn)kTxr~B<`X)qZv;chB
zNLPIH1iwzYqK@kaq(OZcd3|`=f!aoB#BPY2V~-~28y2S46PA}UpcV}oASi_r1l9B#
zeniLbaouVao^|8A|6?A!rxoxLVDvFv&(2jP#(<O<A-|QD_%d1H?9i+$-7H^QbtF%u
z#=FA>%L65mK1@r;5vWLnx&GYFv)<_LX9ovctp&}}@`PH?Kxr;(-Ar$brK(jg(=d|V
zCDY0Z)9#~<_9ZM4uDm@|*t(t>oGJSm-$jfp99J{l{%_-6iutY<Zwf3e-OkR&wq<Uo
z<EOe56K8JU#rb3F-2H!Hs{|&lxKvReHGJ!N8Z~P~pW3Hj)za%hTD=6V`IbFPmG*>t
zW*H<XX68$__&_oC6#eS6bgg``l|GwK3I{aWaw_A>D0U8_2RG6aFhgZR5tBnQso^FJ
zk+p80YK)uEW9*Hpjz;CfH@O4lb11K{wwgdNxm20aaCE%M>0~&b_err?#{QBt!Tb*6
z+Fa;N;;N{nF3vUk{Y1=)ozr35<0x=5=bObZe|nj=8DlkC;q}k8N?rAid7C|EuVJ;a
zR4Lo2Y1))NS#H(#J1LbXHzbDFXnc7~MvJx3%jWfX_{8Y|@${VO?E;;-i^^Hs(bu^%
z4SKnpJ~NsBTYdo_?TgR`7QGXKKYccw_$ShoC3yD8$FD-i|B{2$t3hk$+}inHeP8xm
zupckFW&I4?oqX~?YXF^oe#g=T+q6P!{}s~waQTc6XoS^9O#zP{&*r#LbO>R%6~N^d
znQ`Fa1FoSc6#+WNRm`iy8fbr@cLm_iod#_Hp`OgI&;$7de9(f<y8z)kNC@OGWVuh~
z9l)MzDM|#8?|i%Lwo%3&A;O4X!(<NnLDTRtFy8|<0?#%Ga8br=!tee-!qTa8GhCN-
z`hi20o(Q`-WUol?GssC-bF_S)O4wv;dKXPcl*vtD0!3v$WJZ)dtCuB)4V$_PpVTH^
ztd*Ymx&oi#AX&=_Wi>vR%$KrZ=3nrPi&Zu$el6-}X%*#Fc-})>+Uh$aN0^~9eyXZ}
zgtp*zjaq}t@j7&5K_6a~TZqFco5G98)zX$F-#(sZ&qJmaY?&KG;IEz&a@$E9PRh(;
zp2Ut6w6AaT&9-^ourM>68_Ot6vf4{cdBG*`b!#~dx!kC=Ng69Pl6>4#8W~k^+$~#*
z7G~TNsqa?YFvnZXvemPbSi5?KnTSDPPvJR>ZkzN2vC`t(ITy8ctqucA3*A(+R2UFP
zCp=bhY?@m~IS2y)Hn?#aNwOA8vWDh{a`m+ty4XUuzs}5cj_7UcA>34(){Q}qxoHJF
zXe9bGFw7AZourQeLq*E1Xi-LKc;{6O)vk|z_D(3ZZrK*#UB~iw5Fk$4tEIaKBkRv4
zbybG9ckmhz10xb>Moaet|H*fGyugiLsRo1+F&C909Xc-y+ZAt-Z^VZu)(*gKHgut|
z80<%DCR5>-@1W&(&!t9TbPNAloFOdKub2Zjz=GS5zQ4l2308>@tXKNSbK5(li>g~R
zEVfZTSu}e5r%nJFK^21@2YX=JK6mrX_kN1L33pnfWU~j6SGM68E~~i_j5TmV*KEF!
zpLOPYcQIz;Ek=QL=5p(Gd88c<M63{irNp2E2{cmpUk?URLlwaiimmknMIh1aGa#?4
zZ3Si6-OgBu64ywnJmQ(TvIQy2V!k0^!m2c8M2L9p{XM(teG3QwAazN`j2(~-`BzlL
zXfbX;CQd};&7Qi6DIwRIN~d}q#tGpZ;a}CHp<58jwz5V!YhXX8HPKEn$8urK2{<G}
ziB*ML+@{EXnC-Hs+%MS2N&KCFmYyov<ySn@gJ1fh-nzucp;ctb;MT+|A0H{R$nJx|
z#bZE0ulQ4!;}NQ?{nyX(KKiWPdGmeR(K@)Dcw{+j-7u&znQRBUX7Q*yOYq2+`z?R{
zxzplF8>Uv$QZ-lH@x_EIT_ZdfNK7?aT5sZzuTF$&Ba!|yr<y`tH83RD0sq}QkppuC
z$^arh`H<t`Ki4Ctx~7>^MTa(B&1wRoY83JAsuLNEVL@%i8ye1a@+rDPMeEwXD+d~f
zOdZ9yHr+Dhpy=cDRyA`N7c#7qIJm4^DmOTtimbK8YL8sX9sZKa(;-9_w|;V6TwzTB
zR|)V2QZnI7uzYybWZ?rs!5fD1=5#Uwft>&w4js=hR$PzdpUioU7nUY^yr<IQX`SoB
zT>)y5gxTNw>XEd+f~th*RO_NK1~vJpv_B=##*v*VF&f(1A(InuP0?;-+orxw#UjmJ
z6ymUb;r}HMi9WB103#XqAP8X##<)95ck@znA<{fPvzO>-B4YYvuOO3wCquSx3UGS9
zFo9}b!0h0lI}zeznzc8IuL$UXNVw@`m9Jix%O&9Fi6m+cuNj|+6|g#Gyp&YXOTR9Z
zGRCT)JKTT=VrC`G-XSiaC;D%L;%9VnLFu>0ix_p78c^nY3<Y4L-GO8{hK}1-8B-J~
z$hggsLg=Q#31WEcx}xB}Xfpea6)iFGeYP4-8ALNhCo#@`%(8aOoDo*bkke)SJxV7s
zdfj$m(KsH^rpc<XNyi^_u1WJHsma(o<Zo1Wund<*xk=BRtECm=70daW{o1dkkS0PL
zNrae5%85gfi>zZ8NkP;ZqT>a`5`KQU>L&8j?PZ}`KeKl7bMJwaxl<V}Ck09S6k9G>
zr{C5&Ij(9+_I*mRw&gEVnF}R|9tq|wwwm%kC*w^XEJcnREbk6%!$<Ng#;QGz3Wu5W
zhzrr0%06j-3z2N1O!2x{N(>^Q|JT($mgbi3B3pj3YK|@3{-DeBwL>SuNuJ&T>MUM6
zm!byWVeI1$MYfl0<V18CKkiM#>K_IbuX=vt(m;%r;&((Mo(EiERxTZj#Phu)!bzs^
zd+w0stJ0d+Xc5bcoVFwR?s8tj4;b@3pOFh|(EJuIrW5~rhX*RT<t+`@((|s3+gysl
zdOJ&dqjpqlF0PDyH*%@Ma4=JFTD0e!aZ?e>R*z+(*v8%YGw$jKxnR?c58g}q!O*Ro
zmiE4nZdUH{r=mRT^drur&PU+|s|RY8`-Y0Wa(~XFFj3lsd$X!LF!LBVRa97#3mA>=
z%}Dfkf>k*O`Yud{EofuW_TRIqhY0bsJzs%I^OvCkDZm>wd?nV<E6y3edsx7hnG23E
zpb$3kMXoxR914;7bWLy&dTqE3*#OF!40?R@)i|N+$LiZvzX<A&&=e@dIe-ZqKw-W2
zX`AHuZlUFCJPfW>P&9iiB+j>z>BdR;)h;36D8T56-piryjQM?JmQzK%R5rU}C)`+@
zmHKNrE1qgOqlAV3p?`x#1$^PmJ@jhWIwyKB2ka7;xGQ&3jBmV86M-xXwTC=cX=nN`
zEeG74mibV@8U2+>8uSPmI5kns5GQde)Y3GIa=xk%K*-9UaIAXEh$cF!(&FnOZT#RZ
z+iwg9yZ!QPp)QpbdCcs`lb6pu<v5Z}=)0-r3j1+0j+VRAHA}d&j78VFDBoK-Q^dFC
z2rMP@@QW!`<bo+{CR3-VZ$naaWwZ@35@!2$U=@2#YEexNAU+}x)x$uoMNpN12;#=d
zex}n57ma?D7uVFo7xFxw`KHdS(-J}6F<!*d`1>)o5HZ>ZmZ&Y_H0X^>#+g=!an(Df
z-g`gY3%ZAnBPpg6?#{{`=vk$3%(fwEXZRfv-L~cY+7BVQCm)oEtNG<m9oQg(m!G$?
zMV}7^FJ$$3)}31i%9s3AY3Zg-P%?{_&b^^6f)^fx%bOf@(JR&GRjFhpjgbGiJfPE&
za8VU~Z<`jd(X;-u<;JAH?UF^Z9?kc2D%Dxy)QheDZvi*mQGSey$+-E?rZ}@ZF0{=T
zaN2<gQd)B5!SCwu>*jr2hb3W|BO8~#0}+q+9RR4CL&3dd7u=G;=D%Bq06as|ric-D
zeYmEe|5$5|4eCPK&J*;(_Ip40>J1*h0XDx<bcl)EPEgyT)o~i}3;1?^|4#}N$3zLD
zT)huehGl`2@Tl>C$ni=*kQ*wtc?bnSpW_8VH4X=<9AXp}aDYjyeRiAoe{}Hs0$k8C
zKcug2O(QCd7G?Uz+IEq@fN62^HT8~8mSH-WerlKZo0|kHl=V~!>{O{-bKwmGU*_1_
zc~(}`@qT*j)G~cp$Hi_4GSA>@5}KN^)W?~xWe_tIjZ*R0bIUq09@>eII+NSC_@7J9
zicb*<+qVRUDCC^6RQ7YRf-6a+C3E_$gbGbnp{aS{V#TuF3q9hAD>wQT#Vz|;cUf3f
zOSEZ=_C}jc#%E>g8ysmU<}Q_ldyr~T-H?Hk!-7T<>AQ*@_=}^Jk63NWMrwgB93hoi
zZN%g-Vo{gU``tw6dTejjJ%19D(c<r@*0hlQ^w1HnQCmu<98{ZLyJI>ia+U9r$lW(d
zGlz0WbVq8tVu+s*EKY*Vy0|`TuRGW50h2uB66+2~xYnBG>nZ}q=@b`u%)>>;<)$KT
zY`*^<7!9*Khv_J`VkYp`DT~&tKhPdD*qum$t>2?wnulYsFfzBJWO`LPa9h#b`pfKT
z*2h6d%A|DfvG4``(zSW{rJpgW0Ubmlke_-MJW;1Ai~F!XL<ar}TB?fzOU;Sg;4}H+
z=%q{=yC<eVMexh(dk-R6c}FyGv3Q#Hi{Nu&8G{lI7EIubZy<#bh2`?F2MXbqWrEzs
zQ^@ss%X|a`P$x-G1Zm|g`DQU`hIj#sLOjlmR6kM@(N;VB&sTwV+GN5tPN7taHqfb1
zMWrswn798*%^cJJ^6yKf(tYEsDpE#7h(ATfi~?r9J2*RMljQ#>xmn{yIP%rwv)>xo
z5~s1<!aXNJ-B?~nq4kkm9vZNCkrd%Z%Cor=eefz(Q99+~_hcPIZAsnnjwO~1hteEB
z==x`(mODcAXDz_B{pYz(?2D>!iow<Rj<>|7{2VX53Wl%Kqus%W{B8>AqQ+gE^8yQ5
z9`|vb6#eX{8DG?`OfBi7ge&=%)M;HVc3-}g0m0{cK{5OWs>6F|C20?gIL}6hvZEv=
zr7kuuA!4S%#`L?=EWd<FZd`6Nk?>-X(gLx-_yc2JxeEcuI>MzfuBxOH-Fh`P;Eb7q
z$ujq_P<+nJ17?N4pyT?FqfOyx^0zXPHVac=Hg9g)hrpN-*M-Hn1a@;3X1Xltr=M-t
zJBu^TeUe|-T4i*`^)sq$9Q>IpyQ@ipn{d}yQQ_+5Wk+^j;8F^-(`IMlyr?og*<kg*
zP~8`nwPvZ5aTL~s1TGGjzhnP^@3i08D&52OG$KmzwqI>6cDQYGNM$LQ>ahoB;6QN(
z{C)wijDxvR!`aqXfJ;vL!d~xJ&jr{!@6YqxB5%LPK^L2iPZZ4H;T|}n6%z?=FhS%I
zvJEObm;fQ@hKvQ!*sIeEKEnr8d3AI7cK-YC&QSY>gdY!3hXeQn07%Zb@cAahZif>|
z-oX8ls2>u}u+kyFMcMRGQgXb(R!aVV&!Zrtn)dVz`O-xcgjo8(q2+aFQnuEjb@VmN
zEPd_14Odd?U5TDhv)cR#xbc?>{NyOj7{M_7!PcxW8dREZdDGFEf5Sz;X%;qmpmm-9
z<a@mSc^=ZUPi$#Xe7IB=g*BPRWiLRKn}yVstuh{Ekhr38=Rk+k<l>r1??+{TM$LO&
z8g*@-PD;K$SxdfNPWK}4DD`Y5qf(SIjOi*YurOm||9;NmSi8!iB&|To>X!abL@vd6
z7UyaS){j?C%!XapIXpZ$gq9g|Y_d0)wS82##h-+UuOtfmTpLKHx}&1W084E@FQHOA
zg(GW5)fqS+KxUp9x!mB>Y@45@_Mr^aaJKH&DT`{iV+aH5sC*$)Yw{=G-2R<{8q4@u
zss<N90F*Jv_?`3g(S}>Zc7<d3a)Xx}r6&2z;L?e)F|0qW2=7qxdD*kYp#g`Dso%|D
zRlV;vN4;QoJo3P=2&&=Ys|fs}1pQ$84d-f2t(8jo;0oz{Y?SE=oBTsTx*qBZD)0o%
zpHdS9ZjW!TCj-xR$AdkDPSXd$E%SMPo8yWB_o$B`0)8p5d87JRx`iM_JQUh}7MZb#
zCl^BY34XfR)Vct#z9gQ5rPk}P)`ocyA>~M3$VU5a2oy#NfuI=}B_0N1j5U7m^SUxb
z+;W*m<u-XGNo1niX||Y#)&z8Z+4$mppzr^0d9Y)2NH#>DBNBee1~VzO$23CyPsdEb
z*!<x5(W3ekPM0q*HSQlnfCpsUuTJM2fm!jYT};N5h)lOsU(lXon<-?1#!Xwx5L_>;
zvo}FYw1bdS!QN&}q*|?;e_jS_KTSC=$TCP!PO{|Y>{Aw~%Yu49b~k9~w&GmDow}#s
z#+OIPRpDC&ukT|fOM(Oc<$FpQ%-J(?m%=*Nj6cP9H7|#nT`jBT5Smo90{XVvkbL&P
z;i#C87POL6@|&}|B4O&2TTbf3(}vG7$e7hy`KmyYQQ5T5a9=cCb@bzi;zS;GhFduf
zg_Q!jCK|2E0A256yz^3;<s1$h;d-r%e|GwwotAj7SOx1cG&41l6Mq(La#zrL+s50I
zO!lbh?r*e^i(K}`bF@=9<dcKle3N<apLYQ1TTBf!L<jgDpKI)G#5fp@VSvT65;kHM
zHW)-bY>u$`j7#RH6ygY~u8tVNbC9P%>+@I8j{kFk={Dap_%XEq4TZ)HU4P|G<d`Ni
zXEy-z1adRSJOOJ#T=Ec)p*;-#Hy5UC(j@Zh67qVzzsNynxMDxh0l2{_lPK*_1IR(J
z!^WRVq|FwGFb1Hrd77Xpg<N5zoGT}1>_o2BD7-yEGX?6fQ`quv`Y?<;on#HhvHG^A
zBrms|mYQQI@aWk=9p=!q{-bX_uCh2tzhUh64YQ=phBBudY@Achv6>r=V~h=}Z|WYc
z;y&A8UnY0D<;9~!)P3lFH9{s2G>Ny#)ES%N5%jSpDk^deLKUYDG#z+sVGKovU$>`b
zE>y8@S20|;QS@h`D#2wrA_m5=P^RhzG>15)c{$Jj7W|d{Qf4*0c2W9sM98uA8w2f>
zsA3{I&PB>i!RE8gCXJBm)aO>%-gxlYJ`gS;$B2hn22a@afcW3loH|cp%<>R;nXXQ7
z6~z+OYOe<%eQ+bOfz^l<_$3&umXfQ!^Q*ked{?0ZemqO4Ug>%;7Hr9)o&tBvcM@&2
zdg;040ou9QNk7e>wi^e3&nE*rcpvYFi@abY^dGTbUND;f`!JeH(9&i*06pIvK<Q<#
zG!tpgDH9tnsKLZbjE_*r0)*g#d^_3xyL*j{YmU;Xp$<fWhe=ZVew28a=E~3#6-3Ig
zqA~CikebZ{UYdGwwB^f6?s3;izTN=yiIni$UO@xlE$7farFk{%$K(d?ljWxtc}nX(
z3^cx}W@uI`27JpXCoMahXqh@}u>;{YH5kp?u%EQBKF9yUQ_IBECL22^TB41%w4mjv
za+iaSA;hbZ<y>{T?~Ar~7B5P$c)YqZM(j)|B9fbFL#a53k}YJqVldNB51MAsFZXqP
zbzZdM9cKQvlBQX^!|DeH<KX|CNSpIy;Im&hzyR;BAS*)1_p12}K3pH}kK>1*0h<J{
z;7`!hfNflwrHN+2SdjMmo;)VMG4Q+oO!)FU5Y~;T_{o_^-0Ad;VlfFo!vG$D!3b-y
znvZ}%Ko9!{6;uLc1wmTnvf`tVBK$ZzhnUD(plmF~d)lDZEyeNep3zQMBcAPGO!vu=
z^V$lYKLd|;8fJe#nipfH<1{0S=@#%4e>z5z_j_<jK247eq3>(UXU-gY9ch94vS90B
zNQ8b1>yvh?HR&a@Wxm+Q(x90awPC>)>P93l(RV3-_M1O0Lt|PO?(%o{@0AF&LTQcP
z$pZgQwR|?ekx=MSzVy)kQU6N2UM`qK<qmm~cW(L*z_vvP;9JOn5YrXMgF7feM*DJ7
zeQy|j5B<b+;5(BAEUVOc1UNuDU?%cq+%z*~CUSI2PMnP3_F$<(iF(^~K3_;XSKk-~
z&@O;c1J?dEvH<+Y!p;^E{V@IAuPBlk5hAp{An_ql=EE1#LjkCQ2s<O;1|Z@rfhp9$
z@F4{{&tg}0XgQkh7s+&MILuhHfeJ`9`5HDL_)Q&~=26%__C-xb>w9sELc(`0Te|Sr
zf46+&HB$yxU12tG$imoCx?&Dz1EX!G#kXbw$DU$Z_3UBQh3qQF*o3A_yfeL>@;kQq
zRdig=nhV3p+{tr{^Xn%%L#?|}--x5_bkJXT4>=vIQ;iH+^DDy@r>GBmws<;UV+*3=
znMH{<X{gcEk}9ICrYqYKrHS;1EH=OKyy$+`#TEcHkO+VX7QTW~>gJ!TAT2iVzqwk!
zL0k?gSj6r4*Sy&g#Wt8H;v77FF;prWIDH9em!B^ox{W(z(~(^sgftFDL&;ls`yWs5
z9ZvQC|Bs(z@0q=o6|yqJv6C%A$llo^`yewrvLh8j_HH=#7P3e79&yZr<2-*)ulMKs
z`zKvjU6<>+U-##I+-|q~?Sa@#oCKaV5uJJZc5?5oe8)e-4pRh1mC12kq4f3EGt=bv
z!b`O9I%0Q7G;`D|m+`F#;z55Q{<!#JsW(*Yo22(bPCk2}4&Z3X)~!d5^^I=#n$4+|
z%L+iBD~sWXd^b}HZjOD4#fV|)Re9x74;^K0A=RtVKXYOY0k3#lom2M_FIfXqy9u%R
zFB^I3Es(rt1ncL)sxy0{ZUNehMpUP@ti?$akg;x_rG~=Pq99M-ucg2EKf5{(Bi=6%
zh_6|ASh5h?J&&;>ZhX&oRYT)a_m~e(q&6@qwD-J%wdmKYOA&&*O=PFdVP9Nb=6Nb}
zy8UbIxj3wCxC9%Xf4}m|UcJpl4>lQ)6?%<3S9{A2fz#r+FZwa^#QF><0k&g4Ujj$Y
z94Cp}=)=k%|8i`J!+z^ltGC$pl68FA_y~!e{rC3=-Fyz+Q4n4cbW?`F1ZR@F#tahJ
z99n1wG2d#FP6_-80<QQqY<N7_ZPM)RhH9VJ9)`X6;a777)uYZ9G;2)B$Af(7c=qrW
zI?jMiE@&TJW62b!tv+W$yGt!bzy*{uKpE7kaAuE6P_M0H?fRLi@A4k^k2R0Ur+;<i
zU9K4t*xrp;8$GKVl%)S|A&g5m4SVN4wfydq*vo$Kw4ZwG?WF;wNW#cabbj6Sy49*$
zv3R?rYPNbt;dY2dIiq8)a0*?<K*DUg(4~-;YD8Q{&&Q}Ascdu`>D^B+UGKBK{cd>B
zC4S<V);QF}%z5?n^tB!BcdAO9OWUO)`QoQ2K4f>o9#Kz6H+60!p(?9gWJ(esy>wgh
z;zM^(RnCtv^R+S<gI@Ukx)wPkFkS+?BtAbQ(5aP5@=3K;6#cE==&?jL4iF3=*Lnjk
z{{1z?I}0e@vBoT2+`wSBZNP^hRVv{#iy34YYM_G?<{-d9pAG?41>s@}f7~Earr5}?
zE(hpgt~dCCgO`ZY2CcXJ*6Nb{`S)q^eQEA2hE@0B*!1wkC4ufw5i-P|@9Od@tH)Gl
z`==AQcz#JE4KFq<E}G=ic_W2492K=fR6;;2U(!yRG{bM;KwQ!<D@jb3kntiy>ubNk
z(mjIem-xgA?xqqXBSG=n0VOi}lmEDk>yhI=Ul|-Fyas(I>WO)Ld0i(+@^<y854w)D
z&9Ai!Cu)7oizEuszc(m?L$YPD?fO#^!`6Np;#<lkepXH-oydV#gA%@cUvz_>Dz3G}
zpEs>OByu92I+MidYa8<GkxkC7HQj{c4>l4fKHtJ>$dLbamAKpFJwzdhFt&$4`Qq6K
z+-#G^uUv%mB-tMhT7u>2HMXH~e9}HVdaZT>2eFaVO{rN|0CVFt2-Z#8c4{BiA$Y<8
z?BWJ|D;>mePv|G^SeaZVJH>>E?9Q@n*$<2{#4Dz$VL$D@hk7#(`RMhA4I&qz49*a&
zELSLGa=u(BW_#}my_Q2Y2else(=<W*LxG9@x-VMnGbt7&`sr423B#%*xVl=2Vj?G{
zq+(?Gsdk9C4m9;XQ30KFef}MdVw>y{lpn29|6UYp^!Epfgx+seB#N}Fy7HwmGw1q*
zX>M|H;!_GV9ki364tUPVu5}^#x%Zl$?Q-;*5OHYSZ(~q+deF!!+53mpZe;>h`GWDl
zj!B~@pRXrTHry|AQlhLY>q#eWrdRbdNv<tFeUz%m*4^wNHy-UGw>~^k9$^>r^N!qZ
z5|)}qa`6Jn$D;In4-CXPNpb7~*&9@Q;q&rOpZ;H!y$$_sW|tfRqy4>bs8P4)Do=Jv
zTCWX*s59H<y~s&;NXg-ux{sU$MLE#<$zjxBbg17cJh1s6n8^7tDd(-AKkhe(Csms#
z>1fKzd~3+{(_mh_Fg;MjrK=<WHt)}pDJ;xh3SN+^JSno2F3x4`<eOZ}H#7<Y27@{q
z{M1cd??`w04l6!M_Q8fLCb~*~lGJHz)QNieX~i<*WPhVFgDPd2vX>?KJ$aq+lkt<K
zu(<v54yl<My_UR^NsGV7?#jwg#8d8OhPD)s1gliWXwgo4{2&LSIY$N=Vi+#JUF0kA
zHZCyOxEtTUKDNk>p}R99th|QI_E}2Q%JFH?dNf~?ra5xBU$GND!RnlB-REh0X2W6n
zn%^#U59W81s?AD@&U<M!K96@=E|?ctSoUF@z-m>Y<;BCdJLQH38NZ_Jfv5k>$kQmf
z`w1Lsv>LyvvH(7LNkC5P|BLVcByVix0PGsW780*EwFZ3a^kTk=nS={s^=1r;gYO>I
zbaFbr!j#N^P=zj2w#4kQf+SY}!LBg&g#ymcj(cpsMJDLL>48KejNW53t9u+`Sck_2
ztrY$h&IZX^wc>}=yUyUT2b_F<EFXMcw^eY}pn|g1>kz8;f14XyKxL|nmV1L_HSuAN
znf&H4V;;fru~0+z4s)ueXIxw+l}2w@sU7B1wqLb3;Row3uKjGq4M!mq&WY}ch9{Gn
zb#`5cgasd^+h$a$puVTB?Yf@Aoi%AP&4Bm!QpW$)a-5qJ7}<}WuNs>2*=%@gSa*sT
z88`^mM?HM_N<c4b6F=u=#0SA|_Ad<8kIuXVd9*VAHdpof^k9n)P1$vF8IX}#YV_*2
ze>T(jY@3$G{(gQ8e?DJ2X|oMCsq|<R9IyT_QEvlTk_4!zV#G1FXKZO9;y@EWAa$in
z12wscf!E#=hXMV<NXzYYwoRb9qluxGg;v_@#n;KYWgkfZp5MWEcJvcR3evUQ`-QFV
z+y{2yb2<%NS(XT)B7_KH$XJMkf>(&|dXw;9i?L4!%O?-y7?kKlt*|8F;9w0my3SHn
zWj=V3<F@!ydi7sewY5T<@E5|~f{<NXOJqk?sBq-$2U9<_t5nl_cvI|c=PwAd-<ddM
z#<q_AZtTcfq#ER8BO;BUE4$89qqpfR9e+VZk^b;qxtKHo<5_Vu-Y?|e_gkehrO&fF
zR_D2zetA6BIe7TYf*Q{_w9+;LvES>^YCmscQY5c>X&>?{16I;-*|hsKGizR_!1)u*
zv?J<Q(@lD>_Fv-`gji_0@+=Y6LAl$-awNsC6W_cRN!kbTx<&>CktK28(}mwl&ECfK
z(SNON@9O(?5`aqr?l-OAl>sTbcNa)*J=uVTJ!#e2TpIv{d$VkhVX|wX;Rn#pV4Q1!
z&ZSURz+#o)#0giJ7nk#+P~ccRO0#uc6J;+H#K4>z6T=qjk4$>~^KKuyB+*XJ{A5%R
zl8;EKHX5>4uea4BQb}XL+5a^*3ID5mcdmBZADdgM**Jz-6@%VRg~^?C&5ocseTJ=Q
z4m5^eVJxUmt?pEFI4)P}$M=)2aJ(<<3NeyYuV#o=d*3&zr>|Z+#re`z?BwDM*(EV!
zZooWTac@dG8PC8ODTZ5BV?$ZS<jr`cF>X$vz47CVbBV&nK!VEG>-e9Wf$4|!FCzF~
zti1Nbbs-Kabciio9;Iy@_!yEUzJGaR;;HkZaT3+y{yAD8eLy$l%ZK7po`)C-*pJ<`
zfk`h}zAc=wsk-o*2ZKb(_t{<Ic_qvg)A~Boy?*>JW$|Q<LvL#=<71yaRDrE8I#aTV
z5Ch*`=ULr_{i~@k`M1Wbe3N9&ie+g>)cjk29d*3j7YsHuB-6JH>y3o>aIZieFu$$x
zU5i*N-3TT+PiP`UlK+GffNphGuilBE9Y*A@UFnU^*A7>^<Hby%0<`g-AePqv8pp;c
z{%G_AkIhG-gQud|Ds%A;n49klJetDWXm?duY)~|iypHogy<f5<ei`(Io{E+=uB{Iy
z_oh@lncql5)Q;mkQ0IZ<vBwu~F0X@Hv4x3*yN9H2oZ4Z}LnA|Ou99}d1EXX5h_%b~
zb-W&j-KW>+ac!88FNl-4(;lr#k@HW|ITMdW!jdWDz1%aMqyis)+~V)9mP-=BMGE>E
z&OcIE@|?B))FO@7Gjm>Qu)B5IM!lGBn)I*P*}Hma(GnN%{h(;I;{2pTQvWuB&se!F
z!ByckdP#lze;e0uBH)wH7H|ni;V;R4$zAh8DMCF7<$F#81~UYGf@5r!7g~HwZVC!y
zd|gDejz8N&n6nP&o04NL`>`<nWXw_&bP_R3N`pH@VOy|Q#9>=PUG2#gkUL&tHtgjz
zVflDr*PSusy{|{BNz;IB5{hE+(XvuJvQ~&y&gUJl7;C*s`KPSg3g8P$vx-M6-C?%w
zi}d#0WLvLtii^_YI^cHvL|?^GQi#{dH)zzX+*dOoB~R|n=oQppZ`hzmX0sFr8Z+)_
zZWb(Mi>9~MR5?4OC>m+?G#<|_YYk=3h1`qEdAPp1!DB6K-l;%iE)RcjJoYN7UEkf-
zEaqK!({WRgQ`MzZdv~12Tkl^AJr9|TGF@SpR})IfENnSTGa~0fOq-y(il5GV#4c_6
zhKBKyS3WUeiTHu}vOtR$o%xMhsHr=MY6#wrv;WNjx7Z^1`HP`}_5L>qDFLZ6Rj;|d
zWHuw9qCsSn_6U>ApM>eG*&qO=idYIC0PACNz=0qOwYIf4)_FpC+p$RK(F!<RUjiTY
zgz<hWfilay;&u7+@fm+3q)VXD{jbIh_=FEM!C6Pi<mML!c7w*xZ+*3*YZGWcA*(&A
z)J`Ct-u2WXsg&B5JgG(H_$8su{lTp}Z@qJ{p1e_<-jQ1wQF@&E;Nh-*BjL--gT_y9
zpU~7K7&WTy&xBKQ4}9G-IoG9CdzkH*5$BwCsBGrf-)0u^)J2!_*!4qBf;i6oC`P6?
zj9k208P>Dk^*)<J`HG&$8Wg2JE(bQ0X@3g*1kZO<YJ3gDr-<UY$Z%bnO5SiB#VWUF
zjzx29<*@5>Jeq_x(-26Y{@$+qD8a1-@FyY^T(jzSQ63z-C^-Kb;FFnu9S=Xl7-Jfx
z&y!DMy#yZE6pAE}_0^zae*wbRmU*%PqQ5o$4i{P0R_RYb^C`70IOre-rx*c<L3Lw?
zBl(CA2;*%@r$?IqvgL%VR9oU{Ktf6$WBvJ>t(g|WelI1BSHG&>1K&axKan*A)ej5s
z{pfsBGHNw5NSNv=l=!m#{as|gFUe&^zvR{iLlgynnmqidT36)b(6XkDBRxS~IV;^r
z&Q^NX>UHH<al4qIkf+`1hjKmP(Wf&U<xq)BoDtqx!aEty9eOq+%TA+TJeOE{x?SF*
zuWU86)^{#PchuOUrenF`e8o!~S6CBab1a<j+@Q=#%>hk0sa3-@Vb~F#a82-r`N-Pq
zEAza&_>h9QA2|>7x2Wr}2$x*;ML@z%)-zH>W8&uI|7-8|x8L<r#v~Fr?*y9A-JL?@
zm+7@qG0i8xA3Y<apkol3zEzN|i5UU9A7p5tQ<HE3!++oh{i+Hf;Lvje$VNAh^0|%T
z702vR-R2^hfT2<xZi-rJCtlWg8xv2;URNBo*PmotNjy?!d7dq11ruh%*h~dexejcv
zMb!37dC#j^YQ#9_RGe_69>vK9YF23}6|#|j*LrBlP)|J|QlUZO9aU!Ck|DOa*Y3Lj
z;cRb#b{dXuCpDI}?62;cOb6B(e#Bf9-1wQVCDL49^{V%=tUbt1y__;>=u0T0UDwDk
zk}GaRCOr7d6a6k*QXHp8kBB=RS9Lny&$T%)+0BK)K^Tjt<Vr-l!o)dXzlT|J?@piC
zrh`%GH&gaMj{w)8wq$9)v#k4d=87Enbs=nfZ4AH9-%|hg{gVAVu2}<K8L<E%-;)47
zwO)9EWiQ-pE-@M4DUe1!I>ZdISs}dgB)3;rJwJ4gBS4BLV5{=#QQ+w|mJba&MTl0U
z`xGV*I~`RPi$Co2ebRg8S)ptUkxf=i>E(bl;wTt^PYusJ`fLPI;MKyM1#9D=u-LQF
z%6BhL$8~#yzJzy@$u*5<og7tRW=9Zj16UjIj81ame4wLGPhFk)PBK$7?eHiyA|5#3
zSDPb{ucTvNz0m745_L06n7rqcYW)7WQ&-{lpLizeQSS*49@3oYvR4^fJXgyWGd|o8
zDGK3swx!I=n4A{%D-QHkSZfZfiJ4YhdlYTX=`biT7x8J+<4VwEMP>00#Z%lWVH=B*
zgU8k!nPRj{|I9)L;0==+g^jcyT}@xK6BsFw`+k^WDfxqt9=WY~;+eO9{tq%i2rRrs
z!Q(a^fg|J+Ml}(4$@=?l6~a0^HQ}j36_QCHE!FdI@t4RO@XPH9ATR=MHE1){QdfuC
z9)Z-k=aZ*$vbwkX(Uuh@1SIEtAn%qRLh@2ep=kk=WgPo+4g8!V=LhrFHq<Axwf4k2
ztRJA46r@Nx7hK4fn-d;jf5>q}?kpPlnTlKYo2;V6edvr~i_Y-~_5RN$jp2^DuD50j
zKi}>Va0YOTs;2Y*Ti{)E$ug){TND?dW~l5^R~3GW=jv@C_)SCEC)DCvYcfP<Y&+gY
zwYYn~?3=lSEG5T7P7f<|f3wEO-7r42><&4axzn=&k2SKE8e4J9eVj%kx-6yxNAD*a
zIC163fDgzQ>-uUtDBWk<D9hq)6zn;xFIwn?QF9N4sHZT?Sihan5pm}4;T1>#CCuZ0
z;kf8W{~HjtY#W0!lG1i(J+ca?+yua9DWf*L_Pea|fP>k(n3ZbgeBclPV74#Wph?3b
z1?>wxzcHDnZl|6vl#C$@?ACuslGMZ)%?lxINJh{4a5o5#ZhiuR6#Lzz)mU{R$1nxM
zti3+W2mkxv4jMMo+`3aU{mZ&Fi>=09mwN75cD$}gY1|zf6aHDjR8xeOkXC)G`0)!4
zfpN^#kh?&*+_Q@DY{WsW+NVWY6C&-icULA6bKI2+zwb^^@U-6-`QsN+`^#me9Jlw5
zt04E!F{I<1!0|$2@{%ZG%3^5p>C~BtOluBS3d7)@VNo=H^3N{^1>!g}NkH-TZ8C*~
zvH+5_AUh~>yyW-%Ho0JMRI)7)y8n0Sg1EVN0Pq|O=u}F=#FyW6E^u0iOt&oppklEr
zs&}n7`9%2AvTqS?>}-N(Wle1~L=d>k@Pf>GXQeEr64ET9^faMelbg_cW*p*~hxgNk
zlUXkCPOR$a&vxt*6KL06C_d?5B^~T)9_V&z7&7n(W$P$^)D;Ae{qn`t$XH1=)9gds
zYc;09jcFR<c?y^i!#Po0ri!!Jqsl+in<*-Tj6QEYuOAJ^^Uz1dcrbh}ZJ0`=Ur0*Z
zTbTM&#o6WRYUQRCRu}SAQt`*%-9!1KjigCZJa<Bc!n2THr#TrnO$x)I2PlMT`fVWA
ze0l+2rCxaEZ?qfTED|pB3SEV$%Cs1-j0Os1SdwoHUam{sO7H$+eaz@??ls58^a@|J
zePJVh7pTNw1c<+Eu<FThIC~i__`6W(W=R<Qw%o%ghFYaazmr0st+x^Az}#HZ1zr40
zB#?AkBmnD^%D3~*hmNtUNznQrzXWB4Xyvw(kq;r)4?{>Z9Y0Z*F;SB!=IA*-u#Hxw
zJ=hvd{^_00{N6Zt$tHpt`KG5};AMpVH>OImVYl3Y3jE$41_yhi{)5AcUnS?Ir1-k&
z)BbF8hlAj>d*0+X;Cr<}WS7`x^Y-O@ickOfAN9XT_}{M}KXUDGf6i$eB?M9fhX4O>
z0d-^{OIoT-q$~iJxh)!Yw%@;WnGFlvICKTKN^Q)f3w;t0-QCsvkzmj!xRV*%hY-cq
zxquA<FcEe`dq*psUdL5IfEW1*VDj*hCec5cFs8d8)|E)zJTuaxz-j_rW*%0I&lsh<
zc3mc+*cAWacknjn(Iu#UO~%LhV{=FrTGmf}lD|DkJ-q~=JZ|ottnTATrIA$LGhGZG
zS<<4C%74)CwmE^I^3#M+UT6IYv9XG)!LrQ!d5oy3lUn)kS%#o~{htQo7e;s6tc#49
zi1{5TxGSl4<^03#E;nkijMs)%Y@nvLs&BOAH=1P3EdOmp#@Q8`B(A7k8g%H8J_jND
zm?VH{#OHZ^5^ykzv?R8ae1*<TW&wCQ4`P8#)E`PObO~|Pl#3J}&XIPgFpxYAuHoG+
z_^sr`@8Yd>sE8wqYX99Bp8z=ImdMZq)=KXg6}bgHMJ>B(8g?7LA;;)F8-=piS*bK?
zBMA7oPvkMT%(969?W?QPo%9%KO7=FNEl;-6&r;_EZ#b<;)zN)Cp}>*x!A*|k!jCl}
zcvDonGbs}XrDkEvpi|bkyv2hP6Y&pG-RWCeYe$9IY#z+=#etSqZzQ#FUo&fn*?eZB
z)m(n%o#hrQ^Gz+@><>xRBl!%tWg`APR941SZ}nwTQb|rh#zh6gAj7$D51b+5*YMf&
z_31xR6RSG8PCTo5D-bc_k4^%pt^R}YSE+o_9+63arUodN9vb;+&e9ksrTZZOD+yR8
z*7|_yeoO9?AbpPMOqJ3CYOazGx>3bsy<X(}c2XE+3s9)~Cb@R|#&WQ|6>c6AH57Gf
z-Go83D>Xry6|v~Htd%Ik_#3g7%dVX#IN0;tAeU29Q>*fMAg=nrZ-0ie!JngGI6K8O
zFF=JKqIixw6l!tS%7#8G5qcqIwG(YS$Bm=A@igyQ>~B#l=I#5GxPI9dH*Kmt;j;Rc
zQzWGRM%FHq6-3gR?dnD@)E2;6S<qOe5u&n8DnIh2oG?+lGv5M#n9a&E&HT4MD^bJ%
z$Cw)ZZmriG65frzZNio;NlAbzdK1KYG>J}|Sb%B4CQ#KI(kd&137WjdNb6vC23scu
zKtnyzJSDO1fl@4I_~YO{lHAgR1v)UZ>hz~_F{cmKtYwvdJwen+?ZEW0%zyVCtT0A_
z<x??2XYT9^2|IB*M1j-kPne#9KrS9N+!c59v>GeywT}NzBn{;F4IX)4!i{Xi0<k{U
z5qQY(`Q7pb4C7(h&f$S;byHV4OGByqX92l1fODeJ-=VRQwHK)M<4jJA;+RmZ!S|%R
z_W2)+!e;Uca$}CKVKwb-iH@y#2@jfvxYkmJZ(J^QI)^i77nrD`P4#ciKa%K<6Q8SG
zI^=n?w3i&BEa8VJ7=sK(h#W*`m3!gZi%6h$a358$G<N%+Fq@CzX8`dfEX%gf*IKWU
ztoH_3Z^CW+Bel2`bok2)Qv*sl1T;E08Kwrjl0e~Lau-_o^_U=}+UWrra_Wg42}Yqk
zLUlWaHIVmgq{g-vi-P#Kc$n*G^NQR<&uT0wuT_=2q0UD<4Ezx0zt+r9H8|uGj>&c@
z<nU^OjTLiRs@tkI$QTgTX6^5@18@xxXlkX>eq<@U5zI{Y=oQm(R_55CVLS}(TE8Q%
zCYaRXoNc1&A90w*5Y_+CbwG&0$_C<{E!*m}dab6t%UM$-F>XRDK*B^Q<G?8QLn%9x
z%ST{|%vp>7#OD3aA6PAR_G%V}Z>tQMbF5B>Fbo_I+mSn&lb`;RQ(A#j36{cbgU#KS
z!RTg!E#?MY$%=vb{);n4Y>9cmmDYjNPNLRtnBD*=ExA|JCF0-BQo~DQG~O26BzPjA
z@8)8ufR;C=AA=R2p$Bn-^-XQ^yF{>7hJtGtlMs$-`YoDO=<v`Jk9S)ArPn?76mmu>
z|CPw7HqgzIa8cSCS5N9pFA8#gHnhPKN7IIdV1rLzATT?cTjcjnDpDAXj3ll-xU2;a
zeGjQ5`z(W3@y1E>5Yv3z+1Y}&3&a_-)the8c;o)2{U5spG{h2=_xOTFkId9%5JaCg
z){n6R9P|mT(yr%R!lSh;T<6+}IWfQf=Fa^2uG3)Kzw963t6eg(Gl0?(Z+eQy;wsy?
zFrF;v?)B58I@xh=sQ8<x`=i72*VN<df87gALMLO!!{d>EzZW-JkGm@#mZ(za-`9Lh
zHjSFUo#QtD+im^-Beer}e7=i<0M?G@uOqf%09gGCtd%1fj}d53r5W=k!cHtmIUx0i
zT&LUmDp|U_x|tpyW&~X9&r&zB+LGB^-0bZl+J?Fb9S3th#%ytbU5b>}jyOy#Gn@+Y
z7>f*{&VcX6Wbt^tGY)dp!8Z@34djHh4yLjoOXhw;TL@z-&7h}tVZ*tS<A<{6o5mz2
zOifr)o2At>TD*Vj7AJ@JvfvU0`oxyRB=;ZQn@WrIZqG^|(-+1WmC|$`W!_YMLC0!f
zXUzql7ELegV#Cd}xDrsb6y+0geV_1YGQTooGNjCw9XmdhfuX~{_gVG=h5LZVMu^h{
zIQQ6beaQ+H&}sHuyStQ3<2jUD8#c)(7O;>VJd@F#^kh?1o2=IK-C)IKdJ;a+@Xh!)
zO@z(g3;V;VjwI}GAL|;+2DkOx;@4~W8to~6oY)7jBB%I>%UWQuw;0w4*z`c~75rGH
z{<r<+Vguc$O9Q~pHtg?3azEmEMKWQ{1>D4HVy$;ASXmC(quda^H16Ffec_+&P!zKz
zej7`A0e$H4SCGLnpq__>y#i~On8MFD(qt*Ljy|h_=lip`!BEJUka_dR-%!o+uofhy
z#s6>x7Q8y{NcKxcY)Zs`sxA6?ZMdYKaQzSw4IdXBPHHK;MZVt4MXmg7uIO*bED|r)
z!HTH&c;j^mIU$U+HhJ_`91|&S_y$7u_nxP@(I-%=$=V(t8H{1F-w$X<#}WT2nCsYF
z%T9Ll8Sb1CF(j$JQ`T1@T%U!Yd#{vjeidPG?gbHL8sqBBAto>VhgbG#ct%9~PosUm
z^yD-_S@`|JiaR#yYx?eH#kC{-4KXht)0zxO5C_zoB9#!cS6i6PdpO%EzikuEvz>>W
z7Q_4AL}_LHG#S6m+?RaXCTH;#O^YuXZmYqMbg*LtumA?<EI^gqZ8_*GXS?iTljTtD
zZh;6u`i4mDmSDSSAf&Z!Nk?UUMZUFW;=h1^!-p7x01=)so0aATR^YKA=~q_w6wuj1
z(!ulE{!|5G3mUTl-C)f?NWI$DtH+V@URO_uw}q}0`Sd2ScoH=0#e_)>yum?k+zaoL
z{D`X(;7N46c8CUmtMQQ+v1U+*PDHBSlde}Z@49hERh|bZ?#tg5vQLPUGR0GOi<W+8
zb|U1jXm9r=|B{z_NZ6rgn?8UbiQ**}^H`CVXT@?_HhEE?e*F3~H$zc6Z=4WktK$mr
zp^l2M$NAFK&LK)JW=yX0m#uXjIWbF{^ER07?sZi{JpL~PlD`wOLcZU_@t#Vlvqq3f
z{9fx2k0-z`Q*>O(_cMQ9sC~kKd$+<(z2j|n_=MVqKX>KaJRVOL#j~CLXY=6+dM;5(
zhK(<Zh>Ame?iMu;bc9nrkZQ8N#SZOZe*Y(gL2S+=fz38}-szdp;tdSLNi_ZpeyO^(
z+#It56NuU%@VX(}S`~N;0nY<p3s4d#n}l`s+xAFHo0Y)vqFVs~j}+7=V#hos{J7;}
zg8-^K*g&WjB53}fJW<|nKTBrU3ON-=k$MQ4@6*=M)h_XHB3z)LeIRo(9{O+cuS*)@
z{DK!pN(3L<UvLn|IZKB{eI*nlj^0s8<Lz92pTh0%=m{%{h`{U^ua}cqV!-`x(d^vq
z_fFb}*tHl3Ps;)gaxYud2qw5^aFrX6Tr8wv1{v0<cHyca$v35UgPNZs(rjHgeDq9v
zh7QCpQmq?{Nw8m073U-*?@2uAu;^2=CNzk6oYg!sE_U$nJmI61=<ZX`K`LXHN4eZM
zu-~%@ZU;=8vKaH{>si$m#<Q(nvk4&|+=AXuHgp(8_i|ecZF{q7+Os~1{b-*2u$7))
zlm#GG?`!-o&MW|n=l^gDMEeo2ie61aT+eh?r5GTN;zeObC@_h8m;GU2r(-+E#IGTo
zo+NnKT(PkLJ7aJew(uQLLTK{w&In+a1hJd1lv^;l-KA<FCyeZi!k5Xd5In&JrC_Lv
z9dkNlj1F?Xw+$BNWx_AfG!U)e>Jj{W`9d7Xak&}=jx{@#k1H;!?laq!^?D|Kg7vqD
zq3V%?RJq=h?Sw>1sJhL+kr6dT_33e6Z+c5f&bx&D={0-y2KXCR?uI4R6t{_cO@H4)
z_jL>53gsr-tLQkbJc;!Q-7gTVv!CyAHTEEKpbVZZ8eKcS=kX<D@L1`&w2naCIHuIa
zF_N1GFTK>%<eTSaWW0jTavKJ5&r_UHG=<LZ6<5cZa(9aIw{n{NR|3~RZ)S9>nZl>z
zI5>#?8iRTE`*RK9;h71ZZFb8NN*Pi7CW_f;dYuraU2rEI3Gltu%e3AK5Oi;2oawa_
z-3cg{khNa@idhP!Ck{?I6S?etwhn(u`IDZOaPo=2IeVmmmjEwW8oag~sFn>q9eZqx
zA(uT;_*u3E7clbvcW+4@TuW|)yibw(m@v_0@{|a?sNTWS@~URTH-ccl>+#UTf&|3O
zH^X@PLRrzT3dkueK}YpKJl<7h1JrkNV1GW@6187-H=j$(E-N^ofkc-6(=(9@5&a^r
zN&+Xvwprn8h1PGP+I<ruolk=e)JhL$urxGmiL)X&c3GQ?m!A>dRb(i$&f*Z8rOY4F
z;}3Pt`n-9FOY>Wco4!Zr*$GKp`FT-Fw&nm&>M=>OThr(#q$*t3Iegw(>`i^bgAFF`
z0n+uTgm;P(%XcYpUgV_AD_v`x=tDajEXOkD-2L*LrA@c)h3WmZynmFg6i6FOJN(^A
zn$N9njyfWpa<kOxFAGq*2AKH&k7KwU?-5UqfSIU~R-`3-xpy(bL^1EpUWTO?nvM<!
zN&J}&!e|80)t11DQ}6bS?;=2+9JjBTA`XOmwKxSgxuYMv8V8tUaduat$*MmFbn{zK
zUOzdX#$jiWbt5akfto?_))SZTyFR@5{)&%TKA8UPhuM!U#MsBbL1m7-vb^$w_UomP
z*Yv9uq)XFy>%G?vip2D5Ykw7(r||6CTt{htTc9I={e19|_KRPkZjFujmye2d^80s@
za$N0GQc0;4paN{OJZ_Lcn$13Az||n16mLvmNnX=J=fs8n*f8|`SClmsD=~uOYDX?c
z1??ldLHFE*^LXg<dw5xiHG#bxb)J1LXzWw29cMnr3{!s>CtJ!pe`+02@$jOMc2TQX
zp0vUs=J<NzmwoY0+6b)5YNI@!Dbu<ZB{emh>^LvM=k{`cBX%#zH@6_FJaRJ0VBWIg
z+U4gfwC>@fTWQjN5;~B8gi~MOwF8<bn!1>dYd9JafIhX%Bm3Oxp@k9Jv0<YHqAxOq
zFL|CLm%jV%G1GP)v4I0|iOvVpwk7&TNLvJW;f4ke3^cH`F#7qG82$;ol3k4p&(25%
zxrL+fHHjPwmX}|tmfP{!NWAYW&_DFjCxjHWfg%tr?xT}SSrVAJBZ$~d5Mo^QAYD&j
zq3Y!qdRSJ+u%DeC#TsE%i`QqCs3rvHDA4dc%Z)#K=g#M>dtXQuJGUrf_k+q#%JY?9
z_;ugQ&SBhs%a=+=V4Co$TYWqzvr7z5^w3P}i%qdBz0_rpDQEMn%a6$jJRiYqi)W64
z{G#<c!8vCFRt>(d&uS8WawMc<<qQW!c0*o{Q#AEvGZ5Q9`f*VyDlRZVr8D3qJvp^O
zzv0Jh)^K0f<!5H=+s(Iz1Ybza-H7ZAC(_VC)AEV3no^H2boPGei?f~oxX1tG2*xx3
z-A}1&54Mn-rI4i+z@L6T@X5Czd<Hm~J*1z{Dv%ton&D<v(_8`f^h1z~glgJcst!Uy
z<h)OT7s48_p4xoDMhlVEF*G;RQCR3CrMjXG%Vh_}z+c||5=VY;T3E#fE#mIrFZRbl
zT$6E`KdB_LRN7>SNd|>1<JdizQ*g!QO~MPlrjF5N3mjJ8lFCj_409&Ct2kJ!L-<c5
z5nZlT;>z;=lG+}N-YI7mU*j@r=);CsuB_aex-UDM<O>aMl?l6d&t#klB#K^2oBl&l
zh<e#sm)$$yu68!##$$6SjMO%>&r*(-TgP~JyiGsIYLu`f$Vu7XsvS0GJPye-{YZh%
zQ@A6Gt_+<kAXWU`k-|Xo<6-IF+02yzmjq}ON214#KI^!1Q89(6mRjWtwO|5<#iz?Q
z1wF5y9Qi9WCaohozvzc|of!}Dpu9V~nyrPbT59HyFgZ}@k*?bd<o%}z6-t%Cq?Lnz
z{qBx}uMd@2$Z25;=(zzI<m<=b@zh5IaUamOXP_+pao?-sWU0<PP788Y*(^pK7U0QJ
z*M-TK^j7Onc%bH3ISNc&0gm*!<=f5ozNb7gPIu^I;v*3B(5irtvB*4}2gec+D4Mxn
z?KdKKu^j5RIh5^0SZzN@`|Z}r%Df<8GrRr@MiGA+%Fd4bOOCM&J>DpQUxWaMCh|1p
z#01gJ;^Na94~IjvrvA8;FkT1$brOtqF3NV_upXysh$1dqsn@E%)QVbGQhlKzbugfO
zKj==_z<XTcw3kAF*SiTLQ#XU9+6T{WGSk=o2v2&}#S~7`9*a!<*xmc2l^N3tK0eUF
zq5X}y3;l3>NA}NdN1%O%?aSTkS}%uVwqEK>i9#76$wlG?#wD)OB6T+N58T(q;>ART
zRksCU7&O(Pl)+qsD9exYZB&80A6oL#k`xW|!_;{Mch6&X%B^Z)!BMszQb0Wajmh$7
zB(LX7!s=$v6{R`6ux_O+!1fr9Mj`9zr38f4xIj{<Ph6=aK2;_|i}=*@O^iF#GYv<X
zcQx?}S3wH2fvER`I!G(1`EP?0#gsv8C0gwK-1YBwacSJhb6s8BTW-&qkRtU%LL)`w
z`Wdi8XL++w<Orj6CiBTP8AttgcH^TKEzj1SGg5w~S)5k?b$`iOx>D<Rx>SWqB$hBA
zj~t`JrhjJmpi9h|bT<dNk1+79aYmFdv%uc;IL`<t>(8@!opElIpjdz8boY~TaxI#=
z;|O(3TZZ(Ux2v2TiDAT(=SeK&(E%HdX-QfIxf0sK`q21aInB=BvL#*@GBc5K|1RZ}
zx7bY+HSw29E}cSE?5apF&M+_hwxkbnF&KAfpcrVdQh7c#u_Q&N9Au7*om*=5zg|~*
zl>uJtkk0&XVrvq%&_ws9!aLUi9=qcLW+1iir1~`Kn^3LXI-Hp@Ng8^7`lBE54I^at
zfRZ?@NThNVm2Ai4jptK~CAScJJ4C#rs1-|8m#bz`kn;eYlr<nA*z1&ZbA8bp%Z}x_
zD6*hthi9W{fy~o2Z*rZjO)-F`n{?SKsmJ1J#dzLtTk8c@hdkuYqYtxB(6Q?)QDN_d
z(=Hw2j4P8>2*o=kw2JQ#QN0luq=yj^2s#-4>t`A?FKclZkqrM!d4BpFQ)AuCy_EHv
z2;ZK<WR?K?fgDZ58*P^;TI+ox#LEj#uS&CMX^|GZwbxj8DBnAH4SRh=YCqDBsvQ|@
zycm-4TSnDk*%4IUC{J1)U^vRI4dY8A3HgVlX3$y4a@pe0_pa@%5;?pGik}NoHCT?n
z{n2(S_aiG5&P;r&Tb>c!wF8CCV1v6I&@9{jB4OP-D33o{AV?ZKi-J3Q?exPhwyJ;_
z(Mz5eZA_s)zl<5!sNr__<M8JcT8K0r-Ja7KwVxm`#z=dI<!iF+vAX$LubdHP1zpuO
z&g!0YyW#0m;ArEp#+EQj@j}D0q;jy!QAgm7c7Y3)xLi7<S!w=pHc72C(Y-5db}vRH
z8)(?lH5u%<tAIJ+B3TQ|2{!`g6~-aX#<zf8^J`YZkEUYXfodgX_OG!)4=khdkp`h#
z1A+`X`pIvX8+P**$l~-eJEA9}gNFSdOlKptg-v);^v8PEquxyP7-bRD3glNBKX2W^
zFMscjTrkY!7AtAgd-y(cu^PSby7LvbenX(miLa+uU7p6C2hKAd3Y^To3%=`DntOkD
zZO<vw+9bL1Y7UKgq{`Uu+jbERJ&CQuN+)_wqo?{m1q~wTAF2RxJM}Ih3o02fh=b9?
z5OTX$0Y8KhGs<cp<$g0k1TmJndE~RHWq$AH#bS_eCB+t{lnpa=s0lsns12KtaR}CR
ziU@a;TOhej(AE?R5+lei0ma)ekZ(A&F1}lg7TKSz<8e8+!u8ypD7B-FJduCnimS&D
zxl~n!5F&+hR6^GthhAbA$lQP~hCZ!0&-9?2Qk)s}HyOM-R0{c(zUYm^!qG;f{txC9
zqRLq&&#MEz2Wy6RY_bd`E2TY`?&*_96wSG7j}%w%&<jY;6%i3R+OVoA@LKszPc@|s
z5^cOy&x#$Hpv`-G{RCwekU5kd9H67%TrtFJ{_t<;<b8bIHun<;4JAL^DjI<ymndpl
z(?S#NuLF&~#cF1`!3+LP?6e&Qp^iNtDTDt{D>DJwJ%fq(dZ`SxfCNlefHX(>lbohd
zv?PF~J9AQ%7KFA_B)=e*J{I~7eE<S5wQ_lpJljap`Ec=|v#aEwJ(LHIv*EYxjP;_B
z=}uBQP_MvEEbtg>F02CbHeHI+YV=Qc1%#j)tF06=x(U*P0uEjvcd@-gZHA`;P0@-3
zjlg)YzX(FpwPRNMp}=u+D2lZAH|$I;yjSEpb=GTmJ>A_@DS;iQrj+l)y-4|`C)`%Y
z-jOp@K1{liZ$<}P8PdjNU4L;eS071;V+`CV`(B1<7LKG(+STH|sm0RozGHU8pLed2
zZk#IM<Fe8FQ1CQLW~5AaxbCIo@Q;LE>W%ar#-W9Z`V`<_WW8F@J?p-6<GV?nG>_B<
zy-Ri%V(Z+Or}*8QN?0t5<^Jw6{V&Zx&`W0;372LD*)b5y&;2Qc4cx9bOmOtCC4iBU
z1=GF;8<be&osSnFpOv4Y4@C%H_O=36-y$f1TlK~~$bZEUmIE`KA2+O8!^)*@#_#su
zT`)-?3CQ&(mwW;Owz=R1!7nOK#g1)qg^LaK3IEfjBF)SWa4bcEet+?DLFZll=*kyA
zP@s!)oC6HB29HN7Qdr>%qd7U)4+(@@vAt3+(6C>7LGa^_?h`g<&8N#w(kompxqG-A
z(e|u~r^Bx^80HQOk&D6|_hRp8#S?44Y|)@fnDXPxvv0vAdEa9>|EU{dc3B^xZM-<|
zI#7#U!N%y;@HC6ceg7h(t?l5th+j<NFxqp1nVPYQi=IL}D$+rgDm=ECC6fqMntG`6
zy~y_gE_)D+<EvER-%8>4NFxStiGP`8DoiW<3N5+x2s3@L*|G+_dktb-*(^Y>C(|<E
zbK(d)SN6Crf${a;bl+eM`~DrFlB48mj-!@@v@l!V>y~-Touc&R*PY3)=7sM-P|=fc
zEeu=0I^_r3l>M7z!71Y#kHyiTf83<iIBhh=l5Z2FuqY0r-($iIpm<-B50~$FHW#d@
zx7~f56bbSSul>MDfFTKwnF8*n;NE39{u$(*S6mgY{9#VyUINQ=AN94S3u9*$?l}m}
zVOCi{wh;|jAOadMqX?^?5>VKfWh>8G5gtxloV7*?v>DEAb1p__P1lM#-wWNi-)G18
z=`BMK4ZSHa^I`L3qiw+{#(^cH-uNxzTV7&EorcrdR+*R4H#4!rXIAD#@<m5pMQdK4
z-Q<vq;6L2drk>vxHn^O>5g~Z5cKO2nXA+P}cl+5EX!x)o*>r$K^qA%K@7F9><S}P`
zu<X;4wHv%k77IYAH0qFjex#->`2cZ*jfZ=S^3!>un}ROfrHCC(NXUKg^5`L&Hmv~W
z`g59>2}Asdh=78(2omx1+wMvL0Vt4vt~`1p*^Loz*d_3sfvUEFE`W|851GIQg5zRL
z^-*HWLg^52><qJkq=l?EMF)LhA?RjIhv+erL_(0MLai&I+5?0@H(lu8MPND;^Q+*s
zMwrQW^O9Ke{;*<t>(oXnBUh6s^(saW8obc~4>>#qH*591hV&M9ipF)XX1hQY<M-y7
zw$-K35>ld^a^a`fTJ^6I#0#eTgSh+;=*_Fpz5_2?m((An7hqFL?yg>ajZ)@MSRq|8
z?S<1JZiy93A)Q{}zQyz8Bwzw*SrF&WkM3OuBmp!jaP$Fu5AU(vF-)ieQk`ts^pDIt
zI5?&s;eOdIv21<Jw${gI-E2eLzB*c{9B<tKyQ1#Qura~^Xx;z`>qj(>sT>3~6DoaV
zb>FoTgr2e(-B{y-85Rv8hVVk1mRdugW>_B~StDCxoK~n6MjjNlqIy~|8V^??IGPGF
z9WFf7g~i?zPSciRj54#lT0+0Cf+Hm{Xn}8T<*`!}A$v??ICU;h?#n0pGB`eGz4VR{
zbRb$hix`Z|5KH8;H)+?t-;~$ldPhezK)3#7o!pBM*Us5jwC5Z4^~A+z^WS0%+MaVQ
zm2U0cT-jFM<ZIuJd;%?WS+b9qwF5Ms+kf}n-82erPA$Rxu9TpcDdx*{yKuv66a3mI
zknqXl|5B{awgJ2blvzxS2Yhl0ZjuX7AK<4M$?|BEnq!Y-bmt=AAGHHJzadWpTCUJa
z@|||@q#N*sYfG!#>hwEsDSa(%04ZPrP^Xtwe;Q;Ck&^`quOZF2Sloa&Cm4qe_t7~)
zD=6gPukdAzgwVg8c5+ksNmNuY#_?9N84QgbPibj=%q&IUE>|Dv52Ike`kRc{5n=@v
z+6y!yeHf&xPdj>SPlg2Dd0ywVeta}UY$~Kx!1k?)d9d=lRQr#V#L|Wc4$#sq02*h?
zKUz!`wM_o;KA|GArMou*plBz%oc!~iUcx!Pc5gEK)37qJlh5HfxA$El<HME&%|yos
z?&h>>%AO$`D`fVFxs-Le(vQvsy96BJh`FuONg{i5s=`mtir0q|8(n)!79JZc#Y5+1
z9{pha;hqOJ)`CZvYp_9p!nW_^GMHqL1_ke;acNLj+XigtRIt%5fn+bi6L&V+tv_;h
zzEY4FW=k#1-B0OhaQ%B<B1A4>;PSCE0KZ~q`B_<85tGXa5+;Ha%h(DJ-wKR;!9Xag
zWALFy5K!{}HYoTb&S*a@wmn<S@#8^!8eD|>t&Xw^A;jlXl+zvGV~^oPRLRoC+a>8&
zSGK^PV?3gAB5ZS6&1HdaUmd5$`ri_ogD#qICg0R0{u<u||KRqbFnvK%&-pi1PgbL_
z2FSzns_!s&=7V;R?IpUx`t3*kBBS>v)@hHB%$-d;A&kGx8!6wSjQm|rOAs}Q?+;tf
z7W)g^lRu0yLJOfh0Z{Ev8=os;&DpXW2S{{nC%J5bYi{y!I3LW3Pb66Eb=uDp3~9Br
z{%QFa>^M(8L>dq~9Mt0N4*T``jL499O0Ye}8&vhj*B&$_0o~fY@LCYb0GajMkW!D~
zU|<2~VpW&#VK>HyVJ(O(^x6zUnj1(N?A}8>YGu*^j!0sbgs*#B;j@eioWSEM08|%r
z{&2b?2dqtG)8zp0&^Mu5tx{Vyu=$Ddf8X@E1#Sr5J0&Q4ukVl*;SoqbtGwgB4qU~A
zTn_sk%+WMK^JIMQgQqOby>NC~FQ&F5Gp0MxF<6)s?nDIydA}7}O=CrBhJ~30cHS=`
zJKTa0F9;cut5Z)Nx@Hysy~sd_JkMatgV@Ema_{DZE@(O_dv2)w8N!NL?;VJp&7k;#
zBQe7DS?uWR4&9}bjsov+2Z{2~*~E7yXy-a?sxI2mzIpy#Tuf(IzW{&M;oz~gi^!>!
zwZ~~mk5@-VkEWaBE`@il;eqCUPh=F|`)ya6$MlZYMg)(HAT`z5-*b)|LQ91Pt*)mC
z!k@wCTOaI(H?#i6a#RvwolWpUGg%1k(~EjejHK`WyQr87+3%^&_zzG9c|Y*b^)>px
zWVWQmfS>;KfEU~g2D<~PrI*YEZy_$K=CL+GyPpr3mA^?Wk2Xtm3)E7-hxLNerWb9Z
z^zm=9#M<^?Nz}R&Z#T7Al8DW<<Q7iB+suV^D2`HGB<o=K$KNv16fFJ<xsM}8uP+XD
zFF-Gt9_f!r0llAZqA7acEGOtWk!@^KxZ;Q5LOQ7w8nK#xJqGWGRh%&ck1@05_)2G*
zrI4sPI|-%jbiLxLjMR5@-06DBRc=4E78SdC{qG@C!2{&zv??oda{S(b`aCP5<(hVC
zvE_F-qQt3^!i@BHLV47sG4{A5<e&9y%4Riq|J~dtGsC0(f<FD7(fMSwZs^B{@`IS=
z!7;VEa$2Gv)G7Nelv`J3J^c$-efLYB1j&&8aK`7?V=S}^c6_Xma@-5PkC#TtOr$u6
zXV#h|z=I=J<ds^}mOxB}F8a>eK5<tWSBg%W?k);v+I`)Q?w~V@@~L|7r&DA1Ju1l*
z_tajUzJr~437!K~!Ql4)!2N64|3gX<oESFgYraR9kH7F{me0au&(|Ee0cRNO-{f~X
z>I>_3p^*7HN8LHb1V^bGFi!E&M^{zlm@s`FKyE?icIWvJHW?c8!KD#9ChUrAPLAR&
z=!-vgWVUS-M$Mz6f+g%k@Oh1V!Lr$tgNn+DGG?o}M143u{fNYv*5^wW-u1gswA>P~
zJ0cfynwVW`uu7~kI!odCtLe`hnn087&_G(X)bS^jijjBuq#{+`#q|qEQjnyMC}eyS
z+7L2b6i~5~Xj57@=;Tk+cj#$xa4zD&&dUmnu&k8+rIZuJIMU{-O@!yNT4O6<STDzA
zGP_CXy`T{KLgBs66FqOO1oNdC#B&0Uf!|-1?kj_PXU3dy;hk5(vJ!52>pcsaErQrZ
zfsd^3Ka(a_GzzP~tf5?ZNb{&#v~vWB4*4iRmO?#n;(ucAB)#8b>yE2!1wLM6o1Sw&
zS%gxaw;_f-XQ#aO*&PcVV~I6>UC!X(-P_<dfJ&#^+Vh`inGYk12KFyMBan!n7x=5|
zmh1cV1UwGF?y1myd^=zHe>ZHj$D5_j(FOmmdq1&r1GOo1P55h%Xo+It;Al*-<S)L6
zFDGwXkKub!+7q3El<!JM{q4tQREe<dXt^NV7gFP5rqE>-2nw5?0Qwt3dUBa}M5dk;
zqV&#V2J|<v(ZlNsQG7!MnTJ}rs9?8qNa3a*K!lY!rEpdJHy$A_N<xqJLcQnZUjJIo
zP2*1gYIWB&s|_VC!olP^mnvpZ8b`u+fRf84y+YT1RE}d%$ClabZ1g01z&1E%xUH^b
zzC413!xjBW*yWknw4TCjC-0Kn$?s~rb`CCa-^^O&4;JGtL85j2VI)2i&nR_#r@pVt
zKS|(d7bEtCMH?cP2cBqaiCfmVeRW$sxufQGHBng|sh2=F@F~{2tIJQCSc1?{W4Y+N
z53Av`AX9qm)Hm~u41-x3N~H|b&Of6#Ve`p(-XbU!X~gm%(UZ}K&AkDWcZmnqd*8mq
zelXYBX}>a#&`<vp_`mH8);{w;IRa>=u>$m$=%N>se_a>c-0Ob}%RAfIm8V^?N-bZi
zZpPRA`{Unt{%p)z2>l0u<3#Eb7K)ZC-w+zY#I!)rk%D$B$?G^GjW>1ziOR-U;CBL+
zC>v;BS(41@Hyx+ru3Vm^(^wrSej?i&h5h*^p`LIq2tg<KRuC=drk;H`Q*$?mln|*t
z5AE=ec3OC&qDCF})7LC4|MAsZuzrFHhMkd{{OM+&?vyhZhGH7mSPeYN$Q8RZnet2R
zOPbO=JjtzeoWfMHm{akW1^r^$Pw_q#r<yi#vIHphccezK+OM`Qpoc{(m!oLQvY$;2
zT}uegTP3*C7q#5il6I-AOV#XC8xy6Z{aGF7(^{PO)N`2YGSs8pJ^R;<5fP=cd6r2_
zx-)%ViW9r0x2<r50z3W>+g-csr%|P&-;DWWw;cVHGBTX!UyO6@Q<Vy5={(KSgi0lb
zd?Hj71D<iP#|K)(*15TOfoN<myyeLVxWxUsof>Z!mZen~Hvl{S4XiwNN-xb$aIFZ!
z)Cd|^?wmf*8pWTLm%i+=$#+LaB_!h_K0i)YtAp1)qYv#kitoi(Pg?5IXu3U5C}MTT
zfcJLph3q-fTA1hgPyVgk5&pNsP#=U!=Uw>-?eYZ$VvEAOgsV3cjt+6XQx#@!*5mhV
zhGam!-$Fe8Q^v~F&fK*p+xVEH0`w6%uoRl%7rI#KF%@x82IabJf9Y?gB@}A#J$_yx
zV?+wt@$OY_jJ>x?erfaZn-^XpN*R3PJ+DVNJyY!!%-fXn$L@uPiRC5r3%C4CYqK%p
z!D5(+=$DiPA8s(!m>HC>!D7>{uOPy<munF~=tt8?@S7+(3HzF2U8ANG1xt#^D+@p`
z!RBXzzxi#<37U(U6JLs!MZ%Z{yhYa&Wr_<Y+#gA{&SZ`aRgN7NevJuosN0<gtNCX3
zR}DO9{0mNzYb{`)|2uK=@Ib?)1@f$qtEfX&8u$SdS86VWQ&2zIqS&~%%V%-4(Eyh3
zZn=vEYWquY#gz<1_YT00!?7SISPPw}Y}XhOe7$iv4lh=8l!_ZL$=Di`%MBinnF*=f
z6W5Y>%Em69bV@;LS&tRgKMHy+=s>Mo5qhf-JGu%E0>Y@^=&!yHsm9A0s2ZW8UyCMQ
z8saGsLfncw-b1ua1C{M2C;BK3Xy2@!^2Df&G6{S#cOET>dcwlOSEoTT>}Y4@Bvx;p
z&xKwekHUHNC)TX6go^B8;1P*9v4ASW5UFBK*gSzv;eDRw)|#Eu)<_|X79AL$N%lgY
zg3C&iR<`-K<(_vxbl2Jl)Y=JDNoRV`*NQsyA}X9R_0~%_=KUsXDkh(d+@;|A5L$Ou
zdt=o2nDFsl2t#pe(R=nX+>D>A#i=i|22U%z|6i`cz|$x2sUh#X*ric`Ul-hD7}Ewv
z^LQPTXETeO5-~8%yW2x`w1Kq$kEW}RYVv*i+vrAGYJ_wnEig)u?nXicq(hNrqoq|k
zB$VzBX;2zLDG`)LB&217vAqx9-+9mB9N^#kx#PM%*C+0Kc`UZ3!I_W9WN)^{(nxAB
zj@x!AXig+V^~yn*QYiYQnGWox?6KSyXDx7BsWSr=U`K-tj)@EF*RhdAJM>0}3dL3&
z&g%%?ProCS)C$3DjKHKa;pB7FvwLz;Mr?mYh?h<tiEBoxDpB&FpOx%Y_1Gxo1?}Zb
zKji9v^nH0iyzXtK75GDjEm0uvHy<IslF)nXk}(|rPHIHUrmeiX>A3wf|Cm}nmpasm
zG%!3WGx)r<Q4-=sm`WX)AO2j$(6YQVN%Vfts0Gt@3ocid;((#bUR1Tq*LJ#Lr(<!;
z2#(+TcoC%L+Q;7{95}Gzzj#S~xzd;DUtTE9ng-R+QV=)C45dKnr;|1fp4<tEkSA%!
zPccZ{Zh}h~K4*?){@-|}{Pse~)41KMwJq&97{_ALG`J4(#F$G77?aR{KNyD<uxAbV
zwj<K%c12OEJt5LHp&wNH9R~Doa3}#R`vRI6MJQ)J(`F!}YIoDe1;}?+BEh4?B{BH=
zgoVq~QK0C!Sqd3kliJlHh5St&0}JqIV|d~%@wb5t#0cm%0aFx>_mNQM6jEGXe()B<
z+X@=C6Ee1F0pJ-0ZeO5kSMIv2uK6DCQtOAhu!UB}7scmMJ)$|LvAN0_5R;FYq{TDY
zF27uIL}i_yIXGQ<Ss55Gj0al`5D#kPd=vsL3}(z*WL4uU_Dnq(k|yTg+Or_C<nHa^
z80LLMxJhQ&@18MDi#tcBQuckG3cRquL;I+gb)uuleOATdTvXy)Ol8IFaha5r!(01V
zD`mS0-VyUr+Oa=VRiq-`ugVN6AO`<C>4`6HiwuBlP<vv>gS~g^FN=cGecu^iKI2J)
zXS9E?N!jc=AuA6WhzwY)d+h53B?+nQVDCSN()Bms4@a{7a$xQMCid}al?U7Y5`Y!0
zC2*I9yczV=p726mq^8Q@e_AT6!tL*6qkPAH3x;LkAF?}A+#&jUmymRyl~V=}n0N_}
z;HW=D$ngxAa%0~p#S_hLTw{R?b+nO0zsK;d(&zK+yd=@UJ^kAG#C*K#-&{{0G#9Hg
zWv*t9avL)~<DQlf^cd)IC!{g8Es%Bbyhd)Kzi!`mYh6ApiZobqRudX{CMRm#7)+TP
z#Cza>)lK|fFnj0)%HwQ(Qcd;4iUDDx-VB-LtEz}`-zwVlhHkxHkxD*=PQ?Nb8<^R+
zQP7{=I)`Q_?&4|Um|{v>#?6mhNtvsMTTiO`v;$iJwCr{PuYqpmKVO~zxWGEL@p<75
znLkn^=-R{Tr@6-RJ--echh!$9=W!X>PCX3!Wuamiqcip-@W^o_<uh{Q+#5`=Hmco&
zaTJ?jWltIFlcPFH!&*N8$QzmD@~q2uj(i1+0xy$9V&mb+PdtiZIP8c`SeV9UNB{kF
zP%|KeiBl#Lec-=0HyN7e38<x{C<|JZ?}##u#~<Ox^#6Dl@Jp5{9o!u8>~nw&w-1}M
z1Djj(I^$~CrTNWhz`+tGtJjn7m~HpXw>VAwyblTLa2($Y#kMqJN#$4VcVx`Qo@cC$
zdv5*7{ul;C!u4N#?h%MGuYR0iN@%~IK~1j~XenDgA7h5_SFn(`dD!iAvr=&uyXL+{
zFJls!PiwxyU?b9ZF<vf(w9NL})DifpKD<8{V0_?q_~T^t^C8l1=j#dF6$PL|mNq>+
zd&oiuVDcik=C@{j2b-FIGwF`ax(vs<PZhv_yORiD8|rAampi_ze;C0&`zLbZ>$Ye^
zj{%tZJ3}H?XHQmPy|?mZU`XB4-)1+84wgp(32qGrfi)b67rZmHFHI<<J$C6p?SX_%
zETFV-$$e!?ZIF>22uBQ|Y1o1bDrSJBkPI}kzv-w@Ric_=7X-(S!Hj?Xh8|@Y82%%9
zosVN=_G;Shq8E<I<pky#+S(B=nhnJFsUL2;`Rd+buQ!v4{UrNiG*s8m<!B)s%2*|t
z2di&y`ZmL`A7B^ivq<%82J+OB(2|WGVQr;%H&Yz@I;NEjVFHi=CRo}87ps1DVw|Tm
z+$|R3WQj6t2(MPGOxN%Elmd8ANiwm2agW|ef_ad=tc-POx{o{5?4e#sV|9W+J>LuT
zBE{d35jcTSWa%ax&tnAZY-6l80td{F?99>djmdI2Q5u}F_NhjTL>hKon!eC#;T)>r
z*L?eW_c1TAOkuX^dv1pIv24>LV|@qLsi6LI_-pLXExUHoeMoVCpYr?Jp(z?*3L{1I
zQa{!IJOZS<w$$trm5V!ILTH%EEISDF(J2~H{prsF<2WC_*9V0Xtg~-s<9^a?O@FFK
zz}8$CpVVY;0O|fW)*NToWyn1*#coXnqe0J61q+`xld@Y3)acm_Ze)|h5y`0B_w>RM
zNng4sp0p6`;O1D8AhUQ!qdL`405T<MFzJU)-K&kaXRB&GOp9q-t^w!w2<jU5o{644
zmACD!VXs^se0*Q?S#m?^Qa!W!^YM$b$xV00FIudEI{wzJ*@MaA0CM}k<6F-thq3kn
z$ZY*9f=Kx8+(03q4G3R#9~~Y2r={Lz76(t)#s1EoDTKjor!ea$aPY`a3sjsqAYAmT
z%F@GZ{;~79isQsSll?kYHl(o9bldSp6$J{GL)aR4aXG*UeNZbd;^m!?=mzs=GmMe1
zN<KWY=K>8v2snYs%5!qfnqi}VWDlJ{Em<-wi0Ic59S-m`%1+sqLa5vo=IHV9Mom-@
zQE@v&ySl(MR5*q_%+4J%VT!m=t0=4FZZ+4)W^#(5qO(h#v>!owW!OW=3!>*0Yld{B
z8JY~7{xSgu{z@@QPn;s&LtEcv5?AE52};uBEX$L8w-MIPMgc@Wd9Tmha}Yf0TDFR~
zGW+HliY(&L%5eNljwA8My!TvTJ00V_t!@^vm9&5qG`+yF;P%>UKmCjBlfd5jl7hAB
zO`bbh9oq5hj79bpeO^js!H%W^T3YOGJ$>Sn=NXB7)-%IE$jbT)>#c*WIBhe`2b<pK
zzrXMXY<9r5fNf(t{DgHwrEWx@4)q<0O<0a3*<btoB00Dt<5uR#%Kn$6=@GfgQx*sZ
zf|oY}ku?LD_zr~fy4?Y4^yC|ruY7C3^x#95A(xU2qd^;v+Uz|l2tElyG?;H5$OyG3
zT-`8-o!76P`~&*6fX7xg!Q$X<$RVF&pl5)E?9X8K7S=%t9M3%UShG*uqPk~FwguR2
z++O_ipF>MmU#2>YeLb8t8QybkXO;SqX)rQrn<?iucpBIm8EY(Z3)HZ+{F;Znd<0oH
zu`TWYtzv8;HrS}n>)@TVlsO|ge>uS;Xm)&vJX#f+#`*y&zg$6J#y5gZH{S<x1Rnq^
zT9;3SE$li!Foq+M*1h}m<lTD#V>0LuhEjUwOP7F>?RPeMwxdHTuDdn?WU*QG2bOX@
z8?W&6A7H+qfd_gTgbOg@Lq=~((W^s;^iqZt?Yfk3&_HhgdW;3Wnr9IeVKy<DS{(ic
z^`<Z#2^Wow)zg<7pNM$D&Oj>}j{!ZCkCRXb9Jcu2TVj-A>G%14G$0x{5qa^BO1wC`
z?HF`2^ufz@Bn*=&avg$ooAsC_;n`OzDAH!B3jC#0KZTn~>?kbK^!7~)#MIU;Yo}3X
zBR74WJD<0f(&I-ZZ9KnlG^6dq>E0ImkLXX-mbtw@9`(h{(20KwA4neT96**T{je1h
zUy|bD&IOg`Q_>h_4agZ@W>nK>4mAmWX!8+lIwrmWluy980Pqxe`rl}twjD4N^oc=Y
zS4w{upr=xM*Sj#tQBM6`Kose)acJkXxpkg8S%6heDezuu`YcEA=^i{qsMF!a6+I8u
ztu`?p7%XfCdI8Zdwz&T11bCqOAK3~kZ*RNs*nwvwn_JLK@B$AALKy2sfZzg2B=`^$
zYJ-5BpkCgVlqf2D-@hxJF;_UCH<8b&WI05sL_F|Bt#K`wanzH!nG1Id$}gcoBh=pE
z=NGFND;qhfx_s8!Zmq;OZsVrZe{ux~w9TCT<UZC=1}<sg(N9+sxVSNVA6S-XWb&?P
zEo)&nR_1y4)s!fF-abeGsgU!>iPQLFodfaTYP{jtQ%fpT)r%!{Jg%l~g3daqv?3q}
zllC0Jnw7nHtU7|7e-FT@#hZOkgGkn}F~7fr9skB6;h1ZHRr4yH6>fk#x&<fiWDr2_
zJ+iKGd3R8g2A8~s`R~!x_-*zuUurb`h?yjxg)^Vr0G^Y_oT>7Id&d+_y<Xr2&w;P>
z@%d~3K}>3$AIKAvb|8FzqwTm>9LJhK%$`qP<a>PGBy!#41LVLV$pkI|h9h9MB5LD{
zwqqci@c>Zq!1m!yU&QDG);lu1ARmb}4>mXVQ$^c1(>}T*IOLbXYWAU+h<a>1{0bGY
zdKs`#8|@ZtNL^x^8rsR&73VLxz5jFplv4NVs|Rzz1fd~Qxa26J?X#SxUsUv1n`c{G
z<?MXLuZ$Ly`mF6b!@1?-rNEWZy{T9r?*JYIED2DlWj&ogfonRGtyOGd$r$T+Fwh6s
zIdAm$uD&C?w_=w(Cs*EKE?+xjGHF2HFY8z5H(~jhv9{%y?3=UO7y|wMK?n<TfGXIY
z2TL4h3Q97a8s*h0c9;Ji-0}7Hvg&28FsAF-JAr;DMhK)3bpiv>&6@}uf;oFnDau^Y
z+n<688U%-ZHhNmGP*GtSAeJ5g)JF%vlBJ3iNRrh;v_>?r_wYkDq?d>(kETR2&wkon
zBzUhj-z4@XOVQN)XjiVy5_gJUO}PIM8RJkhSQFM(?MGz|Efi+jkKuCSw^>XSOWm#!
zKA^o2_@OxZysN;xOV2zP`mf^)T1FJ|&^<{f&n**TO0;Yek|bLzFj`~wPW{EIU6yoN
z`I&Y3G1<=I?Lkvl8NptB2tZ!~jH2tAlhu?cRUSpmEbQbLH24GaiFv`Rn3o9x*zZUB
z8!~yPM4*O}<S)%}pIeZm6E@ixil&#7`aD>w!poyoit3+)lWbiYvw$P70OI47_co?O
zM8+ny@#|1B9l+>Zme%38Y1k;_Wk@0YdL7ZBcE*u?v>B+R;HCENwa04jyIL2cXF|%|
zPX+pIUM;TxGHP2h0WB0mI;#5DWmG-tk^p|@l(j=&c#ntSPkpFxj?)Hf*@B7uclxUB
zZ7UGr$|!=1=n+Q6kMOYD@;jv2Vbydj*&sNWyzGSlb4t*D7PBhLq;N*)v!}MRtEa6V
zlP?RhKBGsXw?36T{7~?ake{oF+`&?Izo%KWhPs@L$W*21xkqb7qF>m&+qBW{V*A5~
zJz}YR?UKa}n@*Z6L+Ld6*Y7jrO1eEip0Y@uN(DtP;R^esr%MCSsg?huX93B0_=9kn
zmhkCv%q9$zL(zSjymxG+*Z)lcJN5~h1|R(-hk|n7d}RbJKl4AW4{!SfW;+6m5r7sV
z3UK^%dNK`zYy~_(!2z?$%@75Q^>+B<3!rmYmOd}&n~IY8ov7Rz3G6{n5F0a~xk+3l
zP@!svkgD-3+TD`A<z76K;)Q&B3Gm~9NfYtv2QE<4CoL@M?Dr!66DcMdNVD>tqGixq
zf5fk9N2SvRP;>ptnPf=swl?5Kz}%o&BN!#BBcHCrsZ;JcEHb+#E#O`<zn(NT(bMdt
zlVJQXXTsjjEbijRApF9Ai^Hng##+7T&p*N`zix}W#NQXj7Bh5B=Igb-JsYVysQi6u
zyi{M?T|TTWp!oG^p``i0XB>8U`T}3(kV5tqKB`ya15F;Juk|&AEIgzhD%KI*uAz5p
zk5FF4CX5!<@}IBgYHZggOejbgh6)dJ#vF{m_dnEQdx2|M`rmc=e9(nIR|=-#by+ob
zlt>HFDTCHdbO>I7qCFfU4kvT&ANS!sRLV!I=J+6w*Dd7Cu<L2=7A=X>fK&lgY3$95
zw}zgUGcNgu*k6|-28?E<DE{uUu#@{q5Jo*iAOHyB0Ggl&j<HyOr8FoDfR3y&XKx-Q
z#g4)6y>KHErK}BXO#z=gHpeO-PqOlcu=1y|=N_TvmJ5*u*u!P)KcL;cQKuu8!4A`x
zL|Z8Q7`}R_{&0&ZH0n+4+p+IO+YLLmTMxHfT~n;Z;7uw|M16MeXtIwu<?=QY(ozf8
ztxyq;zuepq$UEKkBvcIJdZ8ogwX887&{0P(y)K|y-Q)R@$hp?@OD!5AX<m~XW<}%D
z7x(%{4c${l$ExUo`GHKAUw9<iIA#1I*ms@t`aht?gUmVTl5ln8CKZo>1!;25o;ksS
z+Jj|pz`1W1AQj%i;nOVG!`4l#{}6V%AZ%~l92z(upzO>gviPx(7yJza&xo4m2Z=Z9
zU_Nvw#Vc@QfkdIF@6Z%|xVvoCBQjUOyEA&c@LpPZ;(!ht+@YWf$n<79@OXE_x~l>V
zHYq?FIA4k$w`78Rj(O-nU#aX3@JlkTJ(t^J03YPx>xk@!?w}e>qCSG{WR1z5`_m&K
zoXA$vBH#QkYs|;nc4w;a-wz+yo_aP@wk>=Lml$AN&D1HGGTC62r5sY?e*C3=vd`Ji
z@}*xRb)h#Q^;OtLUYzvYwwirsO8W~Zhbm#l+1&b_MoJYTea<fNdQqlt!i+&N+=7DJ
zA;l5I>y;i%93l;kmNtZV2W2T#@oD|@&opqDBsbGftj+Ttzmu-!dsf8d6u7HkQwuy^
z=0C_$JtdN9v^>1AP2EoZ)0dqTBcU=#8$XKYobV;pL^0|Yf^V<&sQ*;FT0ALCkYA`y
z96kV`L*OC~M*ZjMXBt{2EAl05XNw2^2k@%+;GIt4`q?gZCv7b3#W|1fMu10ZhA9>a
zNNs+er9$cEV#hEma_AvCo2|r6%%^qe*7MlVl+SYS(95$uu^&~7?Nh<tcXC<1J^(m)
zaM2rXwPgEyfN#~lWpM+(!5KRe2jm+OVEBw_-2)ew@Q02CMCrb!`b4Em-uRse()air
zub?&Hwe58EqjJ$0qd|tL4+md)q^1i|o94YIWbmsAkq|H6lS-W=p)-n<+NEmK)0E2{
z^fJD&p^9a_RYRi7Hh&wM{k5g9LsnXQKmQrU=Vi9-S2J|k-9ST5*lFt@PM4DZ;5nWp
zSiLCWBt|ZPXGNG!q78~_vQf~z{$nn66gB<MMvIQDZA|@I*`7?jY~Jm;8)uoN>KZ%$
zJ0DkUj7ij0JuOv@sCxQ4_?vpB<UuIC(|)$&*@4?ns~&BX6`0rDYPp1}SEs8K$*wc^
zZSam!9`gF*L}cyavG+i-SY|N%&A1&hrFMCw)(mhPxJ7dRE8SDD(*zj1e7oS&^;Gou
zMEKPW?38I^I>EobF9%&M$kc<bo`t=Mu&i<1d@wpTv|-t8-VA?J`k}A3C*)G)SL`3S
z{9k==Y$#?eib8nq!0r;=j9Xj5iqcS6`NwWRq40=TH&X46y<Lnlm~xE*L;>6&ypTbq
z^hltO)W4mXcF=}WX@f3^5SRa=5$yF#MLHHtG$4R`3<cwekp-)<jqyS3TF$tZu`s)T
zhpYc?i>cS+!kjTocM*5|hd<`Hl=JCv=v6#%7HV=qY=_M6R4>_7OcE{CP({<JJ?w4E
ze`#C%J;c?~lFmtTzZTux;LIT7gwJ=1Kfe?1-1=Fv?Qu*omlf*$U+(0ctl1r;<#YWq
zp$d!jra`u}82OiP(j5FIO2c&y^|r1apEcd+3UOFv9xdzT{UnK!l<)n*O<>#`*2dW{
zwJn%FnC+d_cyCdQTbR}nQSn_Pd)TmIPhZcVc0@Ve<>h4VDcRq2&pL6(|L@m<L{Gp)
z>;}+Bp~_h?7aDfg+J$l`<lWG3@h}>&pY9H(<H<&HZGAd@`-VdIO=47Ei70RCm8#WN
z{0GdxqgAM`U<218>t%Tw99M7!AXCy7>tjM@C@bPvbtt(&)s?TQ)P7O{zS(K>uK)=1
zMzk_j4-If1DP?rNX{Li5fXDPZcLR4&9MHgi9=lOGKh94f`ScR9ME*J*w3Ds$$$uPo
z>vA;=6#zfO(1(t-d)h9Ct`hcv=xZ9(P+W$}qh}$mVE#{u>1{MGzN<~?aGU(`_+{3g
zLnAtxSMJI<jUE+~rHveRd`2g$%kH_bjk?N!XmI(|4*3`h2^3e<SqnYVFVWw&RxW<i
z{-+Sv#Y<^XLkhYK>t%VRwO;LkV{OYw!tu0YD}8x_xVG))K8;NeKuO|AVQM8O@C(x_
zUd`IOW^Z+37j3+&(V6`(du%ndR>d$_mYe3UoUTG6{|Epc-9jU0asLBIW=%)nWvEVA
z*j`jUdLDMOc!a1Bq=u`_WQ4YrTaRto2|3dHiJQGZ#?51aD5YRU!IXzwpl^F)R!B1O
z7x@QA0J&KY1Eg$>G<468VF6XJKEU4q(?D(K<W5ufQjXT8$iqmKD1=|l-k~HF=D`xm
zm_415QjaWI2K&HwXNn(f`A`UP(SUK*BX^7>bDjr^f2Od8;6IUG6oTv_)h3HKnzv&1
zL)l86|I{xvM+-mx*b;a_V0RJXjoGaaUqmJTR=js?ES{lF%GGT2Gi0oU)cL85;OfTx
zeJ0Jkdu=CgEh5+N*t|A%vX<qU4y7@hJtbZbZ2k5b`JRf4Dk`g@QEcS-b6mY4`KTT-
ze{GmvewyxG*@tw2Ep?SWC5A0z&wmO=N%`Mr?xbQKT{KpFI?!CvN2^xrBb5m?`W1n0
z_SBGiSFUo`TJu}~LA1Jn32s_@SNpL+BztYa8)%*fz6I5b7MLsv@1F<1<vkyKXN;Zu
zf^noP(FxH?Zt8Tcm@RGNnmyV&W&h{t!TN~c!-hJa!~+N8Q={&=m!pGyW0|+JCLZJP
z|34shkJ0ep@bxGP_?AEXwvsHmSB*^HHeSepkJd}bp%TrRFQj#@Ht8&GB@p6q9;7)I
zJXn|Mf@E?moAl5Ubi6$co_*tZN^wi2llkkNeJ~Q}yb8;c+ZH}7CZL8~%&2m{8W!DE
zi1a5VEJvsk;(!y5K2<_{rw(2sfZ8Wkndo&2k$l+I3kE_nn;vL7*c}I~LyirUXBAbL
z$V9wQTl0a&?lYzkODsdanMG#7FH*zz>AhC3JyXW`!TL$Ap$~|0)bPx8=_vk%b#936
zq_tMXE7OtA?o>l-hJVG_k2hI`Ja4_=APLzikKMZtwRWwERnU<1bh+EB8>ju~ROP4h
z!M<vlr$=X{o6aX{AK~g|YgD6atGLJY1J(7bv^~@G{Fp!0>R~OIr$0Daof@0gM?S<2
zEp$m#*Z$xj;x$Pnq93V8=WzS_m~Xx!W_UH3p#SEVl;<1oA|sntRBGc*f-%@dwvKUA
z@Ul<?V2aKC_b>pmLVZ9;?6!_oVZL#w?-RC-uY`ANfaOkjo>$RwW3!RVoR}gIf(vm&
zMvdLIp2)o!T3-cZ#h6xdsA9Kh>r?-KarAqCR_l<ew$K9l0Vxv-h3wm#DHB(0d0MT2
z{~_q_=KVID-Ab@(Hd(X)sg*V_=zZx}kx=q7Xp8>xl0pk1$_w6*LI4Mm(bWZ~IBB&A
z)Sn5G^<~rhNARSJdwac|I{|@AlCVR?*meE3?ma)k9+WMW(eoeb4{C@)vK`1Xk!cR6
zj9SK0&Tb@bYIspqc+>)<mcxJ6bLloDpi=xNPDwHnd#jO4#9HA_9QSpvJru7)D}{M@
zDNG$AN9$K)2GUpxdH5Nc?(_>v2q|;6npfbVoI|o_toRzlb7<{<K1HAz_^#tK+h8H6
zn6SLpH|o8tCMhm%i&H~6cx=}C$r@w>HMzJ*^|3HbMbF62`A#wi>GFxdH(-J2`;3>e
zA_>8_M1Xo<pf3&e?D}88M4=7u36!p^@9aAh{9K_h_&QB|eZ<=Vu>J|wZ!lw_0iERO
zpc}1HIqPRX!CybWD}bdJwt02(?8U!w4UAaIEGzdv66X5ku|BJX!2|5tK$OLWOU>0L
zi=f}Fd0opXY|cq+^ta)!SX0EC)tjQf#ROHcVH65Tds+}7j-jR_KCa7*iuw~r7Muhs
zHbn|t59bV!8xbOOI$xF=u=SKPcIsE6{9p%iB%-X}Fjf)*Id3qrS%F#v!3JLffCCVE
zU2q8hV?Cl};GAChL}H4xf;}+9Ccfgy)wbJEHigg>=Y)I=+uc3!xx1;<2E^+FPj~kP
zf;3yB*|Q?m*-L`{lu~{42<17Zjnr7mq-}0<wTdL2cPc-%zEMf;ZHRQ_tDsSdqSmYU
zS??$>(og%|kUvuwPWgu3IkBxKTJ|*37*BfnV}@!?W3bVMt{A&0#NM>9($_T9+qRN{
zz^47aFVRqT#J(y?6!{y2dQv&$15A1M|AySamIu_!{77%J8)$4NjQiSo9$t5ic6!Tt
zq^Z{**$lfl6*&Sn`3QRKshv5-F&7x6Zglhk01@W9)`Oa>SwMHNVF*W5ZXfHLV||UX
z4yPN1!!}ljUhFD@f-M+L1S+RR#f+xk7fG?HAc+hU6v0ZbZy$0vTarv0ZuB9LEAys*
zU1!$ebXsNu7xx&AI(tJIDat{?l{AyPk-%wuOs!w#D?)o}dRToz($Dq{acO#V^Sl10
z6?R?f-6=ET8zCXCP^RUl+07YF?eN9*eN-Dx4F08_A93nrYz7lQwZquZDf<^EzmHXX
zj9kMs(TU|vQ7*XyK9<7k2}xhlvlyyH_mnb}s*mw#`E?X%hK+o*J*iZL1pqOquknL1
zSgm6NM707hYDJONpnz(e3U(QGXVo{Hp4o@hEXEEmy5$>X1^cHQT8=#E8#|$I+(ahx
zF;yR(l+!y?%!#GUq&Cj)SBIz<Jx^4tOO<JbQ?=<l*pgSN3ZT^qQ?1xfAJ#cO^w@mH
z*+iNvgTd(x-jW(a=J4w~*W~IQ{#UaCT=h%)?f=HT#;nBjv8Dje;F9|2W?=Po4+dFc
zi%H{wyPu|hs9`UE3nPDU1wwp)<)B9bfcZq;{uUgV{)%3wii?|Jd~2jqP(#u+XdGb=
z_L*S&ZKz+H9;Vf6A@~DOL|KU7?iI(;;%M|CyfWA@K>#!nmCZJV{}4o`Ce+UZ(zzrU
zyt}z3M>&;qLDUb7;OM}rj3GK~g5ZS)E`VRQ^#W034SAXYUm^m(`Sd~F276tPDvtZ-
zb^X4t8PG&1@=#a~dgmBC@k=&kNiHY-S!$@9ng_`&^A@i)02~Yaz$Y>!Z{r((NKiLe
zIsa`rSm6~299Sryiu9yPstnBfp%KkTqHS1qzr4j&=O=%f_UT1FX`7(9zh98GL+s;_
zrHq`ail(2Daf@xLc-ms*EB8188B|*botlKM+89+$BV7ZnxXY&+7e?!K9+*C@{?#_s
zbFAxikuH=%)w-O?y=R$zYYhYt6o7}I4a0(cEZfe9nGOp7Ys>>XnE<@)&7aiw{q52Z
zK_3>tQ3c)C)njmmcNj|%ti|U-iYB3<b!fhWV_%<e!ym|P(#Y(1AL~V2;Op{6A<(pi
z&;4plC3o0-UtDsm7kFkUHcFi0KpFxzdQvj-noY@?Xb|s^t2=XpxR#qR-wLuT#)Aa9
zWuZHU*$;@o$+@%aB3)VhZWb3dru+UGNt?7{Q@K&um;>m|?hpV}ucFha_2GXn$4=!}
z7>3Qo#~&GJhi_)XeRTM1L66jg8F=oh!-hq_B{}RVXTEy>(&i|NR@6kPG0Z4^Gl)4O
z(v8MQOIhc~ho_^t-EKU1R;ZpW?E~r@{+%lV5^0`wUm7Zz`hLwDr|wSLtaM$sLYum>
z=P_s5`l5e4!&+>XGl>=4<zuS9mf05Bj|<48Um4#gDLx4K5_z|E<guwYNBN1qzoouX
z)8;Rm*1j}0hqrPACL@im-n5f6|Lz&k9v=P03$j_5B)5HhGahELWxCc-U3TL<ejB9?
zXf+IPKZ&;k;LurKsBrTo4tDx6>t-&*?nK@4VsRdWjFQ*nC{IYrE%1#gt;#`9JDt{-
z+X?|n=?;Kj?CguArv<-GCb%2KE`a?y3;T!0k<!7$nPO#7i;fsvXTff~dig)aaq}ur
zAq5W^(dOUrD*pCFh<(5|Y{OIt%o=w7mBABmM7{q^$`?l26=>8?8E`9}p459MS6Yr!
z(u01&Uc`f|rj1a(YafR5$@`eB$oEk`-g`<w4w9xcb=h})dW0#<>R!|C6|%%(U>|jz
zi03a`@=BV+-;$Vc*r@Vd(iSu`?lmc5r%ui6w;?KF{U!I@4?>(5tP}i4s9vht>Cm~C
zYk;GcEg3o<mg0X`@k^6j?7bOURL-?oVFjhxKke44Z*H@qkAFA)92acS4de(P7eNXh
zUN=#!xTW;I^wjfbaqVdn+l(G3me|W5pL)}@rV_4`)^?L^C^@W)@kA!ln&uP<4o%%w
zL#tf>VHdp3fMLEj5xTFV137n1(RZEEcjD4_uG}q_a$^C1IZ-KhB`|s8E3v9#esdz2
z5ZN(hO4qp|1_zL$q-s<X*Gn-SG?BH_iI;^;A~{~;JF`^O5P&Cb+QJ39$5OAy;S^a+
zAMunZvRHf=5u(kH0*i8@B|-mql!3EQdYyS}`%nBtY1bZ1$?|Tkf;vzEPdQqO=K6gR
zJK0WPI=im?{fx<=>U%%Tx{VV-A2R$;lp0Q08GjarVEd3gM>ngzo7M#~*Ex2vQS=iw
zT6fEz%H_~z7hXS-jPexKY?l@i;~ARVDxKfq|KK8U@AVxj-5@h+#U0PJR-G0tdZXBg
z1aTQ2U7fC)a+`_y_iKNX=d`TKZQvn&I)OfAh;LGS!N>l2G<!OCoWeT=>xN@Iu~rR|
zPT5kLVN(y~q;8CN>~=K#f6&;4b7)A}3-hxiNbyhzH1|w<ZklP3bg+bOGtAK5Z3>&L
ztUr)h&}WPz>9CH3nFkJg)$D$kvN!3tjW6hf%VKUF8CK^fFe@5B@Rl?^MipOPd^8e<
z9l@SW{6hc6?tps7S+P$o`7gg7r32?7BYd=&#P?tMQNt>1{KIXTB!C4(55#xrT^si5
z$+7l#wVM|0PQTt@zAnR_y5cT5=W%0?Kb!LH1n)ZF+yWRj>IQI-D9orF5iHLZnNLSq
zxK6L6NA#)>U=BW91vP;BiNJyF?u&I>)b<91llX)5HIn_5^on4@Uhu*7I%6sk^Q$vP
zJ^cQVd@B1I@Ud_K9U{b?%m-UgzjQWrja`6sxJ;1rR?hKuy$&?1$Tr^MF_!3{G)sTe
z*4yQB-5V&KL!nzDv>K<bz$jXh5o2UeP90)Bmj3XT7-^p~z8veU;2M-W_R%~js{m0+
zxA%B8nPT$c744Nf=kdqJxk&hX7|%q(=tvtpL2h0{Ev;H74MnND*cDxVAwU16Hk4<W
zRd+xyIaDkE+2_&}tfRthDfFLWWdAhYiysX;IX`Ld{X5imJR3L7EYEp#CxqP=<G-Qb
zkrl(cP+LAE$#PU)jX~CNW3AgffQ*OXNIKwQ-CF&Nife{Nh+<-iFe)`pcb+VLH><A3
z1jg3X9;^N`Tk5LET>8lVx5UcNJP8cQxY}?@KM5#-5%z7kxV+$FJzB8nVpc7Vub(32
zWDYB}4CM?moUKZRUzAi$%9E(bmvr9@ffUrh!}q6pmO`wyQZ!vfgJiM;yr|HbA~#*I
zm%k6bh_LR9iu1P9dqw6*;iYj+j^K@?yO-@|uhFEn2Ysaq%oJO3YT$_|Vn2TySGjFp
zZaLf(kj2z@5f&q#yHi`e)1Kf{a#A6rG~9s8`gm-OD^jN!Jr?e<)vBUk{BlKjxiqgo
zRm(!SbDBR)BRMoPhW5D0WApaffM=lPFzsi+h81+7YD>XRI9PFo?_aC29{<G_b3&0$
z+1C>V;n9Ju&KU6%4RWsy{mm-4{0_yXMQv8#^PDvit1m!*b!*Ki=nlliHOg-hK`jA|
z2hUa|T{8hW^Q&!g@s>LDrvJ)O{W!wwYIP_hR!QO9Cq@}4*6-N&IW$p_ac?)qx-$^k
zwm@c#A)gr8bidWwf#e+rVU)8KxUWyYg~)QTSwTi6`9PEu)fn+VcK1c^;eo>Qq&#!O
zMzZ?QdH?#>>O%$4I5k~Y2I4B~bL^Adlx;~TbySQ&CXoYGN#XoQrz~Gj<=V%shC>&l
zeEI8>fdmIw`-W0WL~p~lyR{2-b%mm_bY(-C>5caach2NatRllDm0ernm@ZA=g@Rr4
zZ35@I)WTLgsg>b2Mg{H^5Ayh4^BpDrMcM(1e?Vwgb@1P6avU+?VmB4BoJUuMzt8{{
zGQ+$dHy^Hp{_VV>zh`1~dBth_`a|XC3D|NE<`w4`%t{q3rEB<~nIWjbwh`~;>j1G6
zJ7)Pn1NZcQXvaR7ZPijAOWMcNRUV9jjq7c7-my^>J?izyX!ahZ=rggCyE{5;K0_h}
zz(0!XClY_dAcoQOChVbx)^fbyHBqj=#SrBfb;%@`s<Ix7jx1xyhAVies}qS2I$21=
zJ^zHzzNZL&CO>WZu163*Run!(0N)5$hpshFhizp{PZfsno0U+#-zx3ZUmaoEIb}*L
za=2eCeu5Ov_BP(GU>ka9CPhvkl;K244<Wsl*BaoKIMsFX1rmpQob2K_!W*<j`1A$j
zCHZB?P7^w~VLS03b0$@?N85P2Qq0rc^O~qc6A4YhD6w>>mT@m4*nzOHSAJ5uYocZ3
zgK$AIsWg;{-EMi+&Drx#>G=yBY4JI2OYg;0vX!b+ksqAv`l6^LP`xw?s4{>#bH?(?
z>TmDyeWcwWVBkN#$+!kfxwr;mBVag7UfF0U;^0}zX=2Fbd>1T!5&ni{Q(D{>!|u3Y
ztAI|E1^n~iGS@>rxoE&l<aA4u6)3$Yp_7;oPT(!W9Q6L1a6_r^lSDgGZ|ogEg~t<Z
zpdU3r0p4Nt-_=lRe*^h`>DGqp4$4L)8mECG<AlH#o#yVKlNG2CWakX!!Is~E|Ed=Z
zOrSk`&>CjZhoGn(+H#zVhe|fvakQWsusu6y74%U-=@Pr_3Ff7?vSZYzGN_I|erJbL
zSZ%@L!6mJ5=WO);Qh(s*>Yt&TIk@C=m=RZXDP*YNdUmDYZ_~?rDsqoM%UKrf*fz0N
z{!!6*K3#mox+jaaIn%&Kc(vr^OmjJ(B>nwaqi;L73Ge^TyS;P!wS4?risaBQp*x>u
zTFSeu3hNmx{bjs*iV0$J?8sY2aWP~4?J^wiYmJBD7wP)9F;X7;E!EIvwRxl3G2zr?
zafu(wt68262RAfdB=*(mME8&|+W+t|;OXoAr8akh{jrISr2AbUf1MMzihhH=SdZdi
zeUQiioCrTB)aM@T!lYfF<7cS(($RDil4f&P5w3%d!{avtxY;MblO+r!dwi7yq?Hl?
zogtCL_TygaXVL6yibcd2%y#cnpTnO16QTy&q05;et^a)twrKujzAk;rT(AN+CAb%?
zbnYWU9m4pEBZZh$9JN@@kGqkHSeKF`lm{a20Dsxoh+``lNSOxwM`mEX-MO9gnN27?
zml8^dp8%hOg%&)N(YrZehnc#zJLZbwSWCHn<i0Db=WK5X63C|272PaDJn0i_y|fBu
zZz3<Y&*gqcOT$~(rJa#nLhIwfVl+TKEd(y6vGG_kDaje1$W-l3YO*!<=Nov7HbIPr
zywGM(AnvGOf7KeT<QHiw`G(DH>+#@ti;aQpLgq>bm2Nu^s>N7^J>#M*Uc8c_`g5{1
zn7DUoLh+T0p=*;pcTn<6P65%S?pI~Y(c(HQyHQ%tgzXP*PsINQ-$!Sn$CQuh(IcO*
zqdC*vf7UhN+v`IvivG^8H!UWw=Q|5?Z$wPki}WzZ)CBP7e7Fmx!JnYjTnM?VF2G_Z
z7g;?96Er8Vq@~8jNTTBWz;_cMY?)4}sF1Y?f{jbRJ{UdYU>#_8-3>4@I`_Fym8`2V
z1qp(EVkAM#IOL2o+4$jLpD2Z!I0|SP;B+AK5X5~7cyQ@ZV<L_>7|kzC@Axb}4ydFB
zNb(U4xP%KL2rHaNKeDqdz_?+lh5V4?Fnf;kci7LD;j@@`cH&mJ48u@W?fbtd=W<DP
zHmT+#@tl;)zMXf8d$ds>y&}^G_}UE*-OxJ;Q9`qdX`0{d(fb3xN$O&p38?2i9eaOY
z=@6;wc;>V}!#b@BqB9IK68V?q;B4(ihOO@NeQh@UgK7#t+YN>u5A)&q^B^-FAJ~N{
zc;uLrup2`QV!v{cp(f@gD?f=PX368V$JAD|EOZS%reg1-%>Lhk|F`ms6pQp&dU@F8
z+!7~6-yhEUklFGt@~q})F%-|zUp_>m?>T)R*(uxG{i^WXt-RlzVIt;l=dm%s?A8*~
zkWD+g9_U_k1#~bE3OGbZn=Niq)8PL92s#6@&zp16?eaJ7EF*iD^H|n8FTy!hibXcU
zM&j+}0!l;?9oRc~HqZGZi&Bbmgg6^NM9UrI0n{NY>I4x6Y`dD^4K_+^$gzSKiBf?C
zKX{`Ax=i*+k95Wn$4Nk`MdGy(VX+DRtB1<+t!oeZ{0wj{5<&cUxeY0x(p2Nfd##(6
zMD}kR>0IL(s|jP2zD*Xob{2IftjQW`@W&+b6*tmkbB}uy$^T`VN13{s@l6nS&-&&%
z=>YwGgLhULoBQ91BQJEz2F-=Fh<D6~=*DcqIhh7QLB7QEuQCSOM_I|2>ey1W?(oE#
zYfD<5f!X#_=<TusBPe~f+bu0py=giOm$`fpdX+}ex7`@G$`5SJ9Nzr|coRs;4FFLJ
zYbLW&FyF24DvtP8@=W2wdp8Y(u(&Z;ufseX_rB=A>!Y7tSz;*Y))HZA4!W5+^>OeO
z)IDo|ghQh~&P3t7VZ9+fcz2x<H|H8yQP6a0jUFU+`u*Y4Mp6*_=dV0Il#kr*fIR;c
ztHwPEpDBqfBFB{?%{48_85m381)#DarPr<okk?4OSGd+Gq|qa0&upW3A?bta7ADlB
z4-rTRVXz8<AMz<l&<eMECkl2FGSt-`K9YsKTtz;7V0W5qE%o!}TjPS(2cff;7hU#)
znJ0WQ+D~(kKWb?@MABS&%=INWT)=|PcWPc64!3^Zk&`(y6L@6e--613Od%CrmMN{Q
zAd<%HN<sq7CyKQPf_&QF=6rcc#Jw^~4Fh<(aXBel?FI1OH91;GGPUkMko@ZFx};HT
z#qe@q@E0Rbn(LmFzxh;YDC{knh(-IbanT=fZXWqI>M^1!*H(R>=P2UmBXDf*W}iU)
ztt*3m%nNj%3OaQJrEsFrcgh3}`Wc7csl&5xQDXUuInJd=f8_d4NFZ|Y1}GvI=Yz^A
zXO4zG74kl~0fg<N1wQA}=v_d8bE1$=s*%DRzpWbkdCQIm^9ToO;q$7NEW)^HtE7fE
zOT|oGaSv6P$zclgJK_KBdg17A5k#+;3g)Sl+Hu(IwLx_j^CSKSxPZhPVS1^22N7_e
z7h+EYN(AjvA4kDKH8Y7#>=y1Id25~sh;b7>r8c5N0d%yr+rPzT5Pr`ZcTP)bRU@X~
z{4Xk(K70s+y|NBiX+3jykyzHEP4gmRtBiLmE`1s<6j9X4*qEfB?h6shkXi|F8}TG=
zP}CcDFhVvh#ZwknQw(I={1A5>o;a)8H|+fof7x5GY20{fPJAV#{CV&J7b2PJ-9EhC
zs#&m>HunIT6FIbPzEGd!{O`L~Pn1xN1xu4`Y^9i@Y`9Jj-{p0-Z*})VhmBZzhpFC^
ziVF8S6Fq6kl)N;iQ_+`b@8Pkf+fn&{*N*-M?B6B)@1V=-Rik5S@BUP34@Naz_d;{>
zX8tQQPXSXyj=5L&cuD_gZVZ}o+3bsNDK2|GD-o5H(1SU7xAeQO-xl5<$%&y1uvpUQ
zT?b%!60u9*?$JTcZ$Xvyr{1DXy__A@mQ^Xt70$oT4_y!r$cYo-nGTtOvX#g_iRm@O
zxdW@ZOAlf-1}2d7?@n11LuCMkj?ek-SCBi17orBHtT!@Sp+bc4Lhz@A{<<7f6fNTb
zA8Am~{DPvW9FLL)yr68qE^5$DY_GqlvnRRSb;GrcslLHZ*8|CM+P$t5J6+gZ>Q`2z
zGsI8zUXf6vgEd!TXTDSY4Xwv5ya?W=fxLG4d*$3D>PB2bRxCmS*&2^SSR`=i8Hk-M
zH|P@WYvXnjvN8>{y@{*K78R-FX<csROZRKm-JdUy*F+E=S5sc{-4XddSE;ov!(GMm
z)Re=Vhs3-KI#w=rRBK#Mw{mEI=6SA5tM6MsIzMRCc7*%I9~}_$Un(367Kn^pJ~bc0
zl4N~>@9r+~{JJ<|4H>egco&EkG+P)E930F(;}lHlg#D@vzy^w(Yyc?y`nDa*&6^WU
zYCM+hfg3ue4hbkuqx?5gHT(R};{@!DHQ-S;+RodsoNIUL^y{1Fv*$fPPjEfc4tVD)
zK&&;mKMrWFMm*x+ji4cOMH4<rtoTk0GOhyn4^J+j<PhbpMlyuYFh5Zj!-nb|rD0_@
zLKNda&vnau+`&GOPLs3xEU}Z)OfA@l6ekAZMI;KNa{_GuX=(&w29$#2g+b^^cNo&R
zPnwrM99wOB?jO?YfQ8>swEgVdEdA<Q#QysO8x#6IQ-p&@#U$H>;S<OCQR1jg={v7;
zl0oLgkeJtkDx`US;`xrUn$ZlLQn(5c5q=Vf%XroiKl@8XVkScFKAxq0ug$PLn)znk
zC~$0UZc&V0ocmZbNO^WaJB=>Jm1_i@Y_Jz&T1j9XVY)m|n|`W&m>1o!b~RCkNP0aS
zGnH0JrPtGtc_zkDL|C2Mr8(5AyMoNkf02D>;9#Afhm3C6{){YW<xaV|SGTnVf3c+L
z)Oj<-U9FfH1#+6Ze7`!yr`|mVleMMXzB6f_=H>?Dw-Pw)oK(9kW~g%Y&3*muKb-nu
zYmfV|C-6(L{LyIB@cXC{Nxg+c&A#Ako|>!WvA#NFVkcbUT0>FvTKWt2kq##4HRmi5
zSR6I-YgXcq$mNxL;_kgZ0NyXd@E;9A><vH_Q673iGk|#CrW`PQQ?O|V6mT=qAJRY^
zL|pw!7R5BL!okOY_r$Y%GtCiek(4~|D`CuhGL(Nht#H-?FzBG+HxbU-phONt%y;}&
zd=SG1@9WR}2>%d-0-sU_Xjoh+l`Khvp!aC@Nai$x^C*bx7xu94G{El4yl)ML9X7n{
zGZLnpBbo}aeKfg1s3^=(Y`Q!}Oe!y>%KKzrnu&)*)#MZO==&r7P7S}0VwtoI$qkO*
zZ#akoOo*gcC`rGM+6_GN3{)U7d$DPw_vrVA+(0W;J|DgPCI3~X)VER*E+VAK+?x*D
zDI<2CMc?S@-wwzJ55^iO=X8H<YV)_x1tKrLv}m$7pd{Ew+Jh!mCG;fgR}ShPd9K<I
z^U?a0w!Q99Ou?BUOmL1&<F<ad{@2w*NdRrc8e6GHBF*<y+EQTZo$)f^fem4o2PNm_
zySvj*ogSP5f~WsR1N<O>jLftNJKI<Rv|?fV1IBPnoiyyK8v2b07A_O7X?~$e+{}8T
z{TzP(WaP;g=Idks!oC&jdW=|Y?v<+Z+~&=TZRl#_hBr4RDW+mwV{exNrFgabq0vod
zf#Osk|He>-+ICmU0{4&pmsF=!pb=>$32V9Vd8}%So>l}O5`h`dzxPK3mWlCe;PW3P
zz<41ILThI*?3s%J!)^8!3w;NG({1(Gp^2lF?@)%}*js~`aW`1n@OI<DA0?GeQ_FE)
z0l%mb=!bWeN{#ya>DmbpuJJo^z1RoXxGb`9MBl$;>!TISw+9=15F5hO#-OHtc7J`9
zM+v)lZTz*@toMwHE9nR71O>vN5BSBN6Qqh`XT<HAA(UM4Z<0mdMt7eiYI{E{GGg9<
zq}asLe+*2=edq60ft$Y;^9FJZ%qTw+4?<#U{WR6~F@o-o^gy*6^qy58#cGFm4QW-}
zEa~sZR`3c8t&PNZ6X5U~a}jo>ZjHo5^~`gB?%>3E-W<2-SU}nF@<kf#w87wR6Pxao
z%*7>o;U%0^<7KG6u7_1%&E<bL=k1^we!YRs-}#+lCv&wBHKLdhb_0!`jNW~n_r#r|
z>3ufJ*f-y<JHZA1HX!la3XFjOK0H_G--pc<vbO)Mx$)CZ2v77+>e|oe(+U=c`qc35
zxiQ$kD?q45@uh0B3Z~b{NLRB_)z>z{me(YM(mrC_&ke)6P0avhwB+qP?Kh7JIb_>5
zm6%jCN2UKI=<EC8_-+N^*H85o2Z0%p%Q2aKItcKC0(yCtdtOQP3m{;!+}o@N@kO)M
zeWxlyarPn!2{)i@pW`a;*6r-}=UCvs4y72Je47FKi>((y<y~X<daxr|zXC1`>SLx6
z7UWJ5FQ0yi0X-}Inb}}Um@y?|cHA)egD4lDkcYPdl>7XJN-JJH?`rsS9Ob%FA1fl|
zm!+{~D5w1p?S9{Bvt6!#Dlfp#vnz1)SEvDR9`oYy7GW=Us(!U?VzTGen8T04Q>&rW
zypMlQoN62P&>WZ$dg}V*9?sdYRE?;~u2=L0vG-bfiQOId9dGn7%1EPomrmo98F~3}
zvmv|x1)68^B@0#|^!8d@B?goY8z_px;45St`MYrR5Cyi}4pk)hR&YzJX`{aS>W9Qa
zqg^o1k#BNZKn-TdZ?*=>(+n36L7&WqVWM;@$ZzIWpv#(fN3O|EpSJGR>tO)N9}s6+
zf$0APHGhZR|753wUDCUn3Oq`|1PA_7fd(nadq>E6+el=Bd;#-9!y?>Qz6V6$U>i0X
zutn{nF$8~*dFL!BDJ~7-W3vVrqaA93mB$}Pr77`3jza-ygiKBrCr<S&<PkGC*BIx~
zDiJb+7NLVNX8y^~HjZG^5$40~^;u4A55KX)#Af~N`u*9i+4Khg*>V!uy+hS`E%VQF
zo=kbyTF=tjg&gD<z-onu#Oc(G**s@COSHHGbG)SL9Jy4ZxsU*M(cx1ZeO9{~*N(<q
zhrXalix$^LfT{a@l*pu*axJ^O$gN-VqQvD!g8Jq=l}h#4Lw6P9#afhS%-Gj^lkC4u
zckW!L1-TY**~s3!4fi6fS$S2yDMhp!!7M*7lS!dl$~~WEDU|n$U%A^95HfKA-y|$I
z1uGji0tbx4<}ZRbu%ADFF4t3hj(*JZg$0Hrhzs9;4AAxBo1I~B(KM_dtk3L2XnEPS
z#cR!K%*J;SJ&f!50kq!YP(=5}cRmbr`b8E6h+9pP0R8agDVw(Niw%luV_AMte4v02
z3A`HlWLM}wYtM2sfQC+Y<Qjv_c*udapBAuL3JxrY`bmtik1rjLFaoB_N;bu${hDAL
zU)@#f1dswh<s!hO>}O=F+ZodDFDfN7;O@6%<Guo%PI&O6BiKtcu}WJ}D<d*Zk{}*J
z02rIBFQzfzLh$2_Dr$$D5h=N`BVa>k2Lg8I{+B=DtLB*2pVpV%=XDR-!cQK%2ONr+
z^VArK@qy|%mGnqbS{oYb*l3dszEVbWJNT&R@@40FYZ6)(I+|@uQR&WmX2{<t$VOC{
zt~xoj{CI1Gb><CX;r;n3B9mmK_Q;IwomSR{(4RyzrtcY_l3VnJUf~DXHW_u0N?93=
zj3H6ld<Ot`iM%AfX>&DOV}i8P_T2%pPZ0Pfx2FVV2k4~Go#kH?1NMdr72xMl1S#FW
zhmNr4U4<rT7(iK^Ke`--w#x5qUj1bA3nO|GCk_8b3*V9zr2M0%kjdU#7}y~k`?(P5
zWkINg0u&hvxmWw2ps+X6n^k?mNCKM%21)^w;SxK#GCH@`graIp_HMQOGS7{|%A;El
zYz0)@f<I%8zQdul$7$d31(^10f#RcX%)ir7{ySt!CHYhW^(9CaO2_vdBuhku{O#ni
zf;<blbvmh}0tJn=Z!syI7T$$|_e4d%dxBrZg^XKMIVV%&9_ZIX9@XF**(`Ca{a8zE
z>j_+5?A>Cry}}DP`~{zWeA#<mHx+PL_|?Nbfz2?lnn^oa*!B3{z$vlHWJ`ncejUaF
zJV{i;@%=N5T4nEQFyV2|>EOo<!V#%A=_kskPy3pjc*Os(Fd>;Dokc-H)Wg~XZM5|^
z{$C^OP5iQ1DQe>%@tfJRI-Cf+bVko)#Ep$1Lf1=Om;Ehh>ni6zqI-lWMW&h?gR}a+
zZtZcuDYU`0B1pXj5bW4rpzUB`KyZ*j+sQ2+*^4|MIHR8qMaMT6qK+Rxu@q3u`I3G<
zAI}vsiZ*;Dapi-#QT<I*A#7MO1g&X);2&dWhThb_L~gQDhs&<>@q<7GI#&*Vr~Mey
zgD)4){{{fKu@0I(8(Gdzev*_1`?h&=e1{wRvaoNw>dCMAFLjTEY!<!OS%W(i|NBO4
z%0l2R+q?9WrOs|rG2Yp`c|E4<nCm0+-w47JD}KmYjXuxKch({d*yJu^tb2)$a`mn|
zNI_9lkqHVO_42e=bi<=erAU_$OJU|>m~;i7bonwoBGU#pS>y3;x_0j=b^?zKx>lq&
zVEGq)_<CF4`QqPS07JLd2%!_Hh4knl*{`2_Tx*b%{OzjRy7mPV%9hwf7c8%>b2zV1
zh4-?@(rIIeOlD#%YIIR^RM1f<Pc$h=U$=nUUSwj?TcCEOPSM#a@{GSaqB`5N{Tm8h
zHnRB9aeQU*&^%A2ojb*xv&G!he3uizgfqo}AUTvJkQ-)2+z$M?`4qd-Tn<A|es(F4
z_gzO0P(ZPp6g@|2M}b<G2k!Eczj+Kn72-;C4_}D%>0xG{{`$Z6zVa=qFYKC<P*Mbu
z7NtQ_X$AyDx;ust2?^<*0clBTX{1Ctq+yWm?i#wg28M}ue$RVd@89r#c+R=5{poy|
zbM~Hn-+SHbUP~PwHRXUdy#9?f5r|Ua%p*m8nq?|O49+kC%q~Q7kQ;?A!GWmcOe<H+
z##M~@^qUxI)rBoc5Is>?*S`=<&(43bh3QO}adrz`JyjJ~J{*Xa=O%B(%m?ZJosWO2
z@K^C&J;2ct;onbXqK?S3LS*g;Iwi>M7njM{d4=d_zd2!leJV;0w<8Jifvvb}Xyasw
zxwEOS^s;nv;@}s5EC1|fB@<)IgY)?paU^V$gIqcI?Nj=YOBwb%9Pd!IWZX~5cmi=3
ztet*UB}9=Q%&y!_Fomy^-ena0Ip{{k%LV$-yL`Ih3SG@?40N^Uop_e`jZjE+_Intm
zvrjX*+HNM0)fo8k&6aV#6j8tMOGy=$2Hv_)f}JWuY`<ppZT`3nvkN-?i(cWf(z7*Y
zTbL3rvlOc;^<b%*yGi*t%=T2L%ce}#7{5EwHMyf-wLg;Xh0Ca(h=WFWy72+sx5mQX
zF#W1`eIirg6+~uYpNoe}`~M#g+`#n-A_9{!@8wEeT|BwE(vWuDosMU^8}XM$-3`WP
zQ$0~|{!}!6*bI<f*t&O=e0##Txvs^2&(qKU#i+%sGXq|i4#yBZ#vKl;TwO5`&sA6{
zVCvsLC?;ZWA)_eSH$Rzc{I}sV$`U>bQHTGnZDF7?khl=xMt(Z853JwMk2s>maACVi
z=tJ*YT;Z5}Z8Xo7ikDlsYLW#@8t3aN7Z1*MVNf*~4U+oC$BAP>5@bt$X4dM}tJEPS
zAcMf|p$hmy!EE5~VL`s|%QM)Sq<Rd;#$V==f?4r#xLX(WuGCBN*$vs$n}cler^4T{
zxOfT7Yp(#ya4J4{2GYA?_3z1(Q_u#g^>pE~OJcrZ#d~jw0p=t~O5kK@<N1jyPM^q>
z#L}spvVuO9$VZ32M=(;~ILslX2!^8J@Q1!Bz}e=C!q)i~@K#GFy?AZ<QfbA_^6rw2
zJ;~F7IJK0j^XXX8{nX*PFgr;vUY%oWnb-?1cF1VtbQK%BF5Lm($uQSfz9$&(9T|+N
z+&qQ@=-CYx-L=limGHhe{#TOQdcM7AJ0^K^H`q^#>fK}Xzr@g>ywqW>nFTS6=33wH
zi<5k~^e+Zap?vxXrL}|Cxjp$z2R%^fXb~{p>Q=mx?}!}RgKI7zr)HDSQ(YR1f0BvX
zm!==SL3jI5RZ&qyg#MZvchfB~12(*gkZkVk0aB;?B#3Om=Kg^3iWy^8r@>?*pY{uC
zBj}_;wF|y8lj>>HFOsff4l=I6)%exwI2*lG%2=z6%<RPb@#{R~CHU)%^10jlGLqeY
zurQN>lY_v|<Dm_Flps!=Y>l_&eL-f?__)Vb51)B0lr^aCX`b<myg3}Mhy*dWgcv}u
z#m~PzDrcxOOim4KbGni;0e*$;K^srvPGC+st2H;L5{+Pi&DCbFz4Nw%4x4Z4Cj2I&
z<r9@|<VM(@VaW7e#br@CDjAgx(4j;{-Q&NB)y*Eetn+33_bn^e&VttLGE3~eg#)wx
za#Qb`^;T2569tby4RkfWpDHSIZvYM7Zka1Vc!s~caMU?V-BFuKCvSx<&HG;dXa0pj
z4(OQB0?oN(Ne_E(q>wX3_eJg+z?Spv1IusV%Yy=qz{`<;CDP{yFi9i&X2o9fK<!6C
zp`y>$E1>D8{jgqhR3Tmh6{yx=6qtf{w@3tt!jJ|x<Cz+a4m&`L{#E{WTX^QMyG$ZL
zSqMON%@d5S?yRHHN7@^nEBQuygTFOH)ZsN>3(HUg(2R#y`ylj;=x6?PB+B3m$Mr<_
zTcThL%H2nH_>nbkel8UYul!Dq?g68W`5$_k_Z<r<O@8lFg~GmGkZ~ko{;AomwT@9k
z8-Npb`g7poE68MA|0f_=6dWHEOr-EA2u`SJgLS%t7rf0sff!g*LEMdT;SKq~9g}f&
zxya8QsjwPkE8IGbpZxaCHKh98zt(iJ&W)NffPCF!;9F>My1K34Yx)8UT$Xn6ucz1i
zf`ipze-igF_xX#73J{NusDdg+QprvN-f~>~oA)Ed&LBmF_Lz7g^n+zPEoB?`kJ7pi
zrRn4q=@)&s1et<EM5`az^wM4w@0twbou>%xrT~GihT>Z0FW3I~RvC}&U}6xavrd`;
zO;HU4b##WJnk}u+E6pka2P>VK<2ewI)6$Y$|J{{3#C3ClEg8V`vUxoFp+KW!TK%+C
zi~jlCw}aNd%)0#ngu8cAt-qid*93`5&B=7&M6&4k!Z1Vm+F;(F2<y#?2l$BqsAbbd
z9oujK6Iu|~&7X4t!<Zr`yuck$I2|RakDT2`V>HAu8W$;c!h4Fowb{ZIcSp()k{xP2
zowNTSp_P4%#)V%K=QnZPS^o#MFcL=+tRH>yK>Zt~wz@G4-LtfyQ?`{3pVy@g+Ui}U
zgV!m$ezDbW-GDod`Oj^Unc(8%C$SxdDPRdto^r_0Qek_!<*r5B$Skqt&HmG1c<j(o
z#2VzuWmk?XEcMf^yg|Btsftj@dUL*(_TzoJ%fH8@erSbd^R}C-#?{7?CynTx2mF`u
zji9v#r~kIan=!TA#+$fB29s`{(DBSwPbLS_M_n8uBl~f(QF)y!0+@!J4`J2A8F@|d
zO4lxUS0~>KwSj%YBNbGG;=Gd|WbNl&5B}&_TQ&T6_c7@)#i2>H>CcJwa%d$_yuE0#
z1s~ilENVpAdbxg;N<%ZX^PQf0Ys}4(SxMqieD;n07mPT@G^Sq+LUDn{7?A0}Df|yz
zC4e!Mcj)%^I-}a1hFYQ8j86+MO_FC~EKKr$F#gNBJL%cm(7B$$4O7f;Sl67`AT%me
zXmDEfSMM?u_21w*#*qILtzZ?yHwsxps;{4-IS@{c>x{o(O(-YyGNi2;aY82?j4rE~
z0wd8K@Y_rg^g|~4eiVI|hDMDtK_<OKD;6}IDt)9UZrga+a4%&%1v>fct3c>CypqQb
z#m*7Cs9*l~3Ox|AYt|L(crY=%v=^e;%yjA+O9keFN!Z;)upVEbXRAPet*fDM&zort
zz{#%WIYBEqtUnKEfDk*6PbzK+R8Aif0Le82H5PbtX*T~sd|~(bXx`X+g9u#1a@1Dt
zS8=N%^uJpyRlN0_B<z}qFDfQD6NJ|f@1C^%#s9V3TPPD-fV;t#@<>||Yh-=xy`@DO
zey96B(Rv;vbgEN&B8?Y^wS%FAuy7jhu0Trqi$+nVQb-mnZt)iWq8+BQs=6_&ue6}1
zC?yOv`W_af{%i)3&BYtz^C>E{MbpO+bybBP21v=`!<bO{nolj;94QIVcW3B}+^?(F
z*qeLk)8}YT$rrH#4ex|S(AFbwR~EnAuQN#_$b2BrJ#RX)TvgKNe%v#&akv3%9AY-}
z0rT3Y8Rg{RMPrZLH0{_JSBCr+j@W1;(>#JS@U8job`keuDsNkuRw04y0sUItFOT;^
zey4dX06&8+NsZ|G^sX8~^Vd6nync2xWlF^t_e&vffIYhWJ7u0eV^<BKc!-}5+#+sq
z<(C4Ejz$k1Oa8haPCYHvhts@V;to2pdp38nW{3B>Z*<Dh7468|CH>|k*Jh@u>R5CE
z3|ymqMoaDsIdy@wFM^HX9;@g%eCWIj8Q&7%ei(GW>qM8eXF#59FpqLEaY6%JP;lNB
zLJg;<51EiaB(MHj>%;t)dlw+efcS9nbmv`V;e%Fn(Q3gR(!H0D?7CzyxD5a_xbh_v
zDMSLAT8&TU_0WQ3@K!JIN{<E82Gsh8908IQAGy_!4LG$ff^frhVo@s?w@@(s9;_}G
zSw4`L>woV}47bGBLTjEkHiqSOY{mzJO|DA~FZydh<S^TSNxq&7a8pS#7IpIW-GTiC
zRLBXQyCVznHEXL66W+~c!AH;^$43RN^Alm^(FL*NV>wa<3Cl!gkT76FP&ZYLq5@cR
ze#?AAI1g9jV3d0f?F2ja*HGryxJ!S@?j}E4YQ`&JNp3w0evPfUnPsg0*u(wUv%4p#
zd<s^!PeKE|3f!ldXkx-g+vM<Y9DqFDc{~KEv+Rqk7|W<Z{;0^@JeiPIAa*aoJENw2
zY%>{rkj>FNKSnuKS;_YNmt0ZOV~0NV#fF5h1D2L>$LQH-Z!=enlr+juqZ}cI;b9v#
z3ruP)<z1}zy6(n&Q%J|{CS%v3?D>!vTkJNLH={<T=^Ae%bf%o1+RR>y5s6IQb4CyB
zo(mC2pF8=Dc`gYGsb%zDSw8zaw5PB%b=ts1W((yx#2xhPkcPiWd6ZgKTAq`9Q%WzG
z!|rL76d;-$t&simIIaH%cvd1#abdP=0vfM(M}qkd(bo{j$W1Ov(g4l0<EeH793~#T
z4@iGK;8iC^kQMz-ylQOkCqyf4rz6y48iWd)>Uen=FDn8&8fIqFzcuXKhlb)|DZry6
zZ|Cc)Aj@fbV7n{6KMQ<4h?-5H{4iSc6m~NYwC;se>SOkbh~I$Wr*XFr$93bydg!)!
zpaBv9j70dayO?up^rjmS&SI~pp=6<Rzc(Qs4+PS2cK^)ZM0#kIKSAG_;PO-Z$USpJ
zUk>wmf3N=w{fqP(VTtHl`0~Y~@&vU+n}w<*fxEW<l64@NrUHy;HinmFHVy`HkNt8w
z<EwCOy^QFqT1j49Uv?G|UN!F~COLj9ym9J^|0bu3_<7~YDwWV#HPgK1+@@=+(Y+<p
z{O4llYoBnW6tJ5#N31)(&NU*?VZD9j8D8uQUxWpjmmS&s{@unrM2_Zy7x{xpTv~Ia
zPBEnYiY`<{+zz1+wQcqHxIAEkRAjK#^I0}`5qotXbDf&92E1(+$>D?5C6tt92!Uwd
z^rhKkS(3H3HVH{7-nE)g+?sVg@;7i5YBl>Q!U-t$VTF7!e(ha{+@PNRiTVxx!RWbB
zvRhURd#2CHu0wyocN~sPoMPYG=@=NNN0qg;I-ukkb$R7@$}S`dqf^O9rgu7OMJ^3a
zA|G^`6WeqjnqxF}*uW(_nwz?;q#2;Hw#-Xi%*z)(8J_q{e*{*$|8oN#>Q+F`3)X#E
zcq=tzD)Lv-9tkx=L+{}Lv%gs-aG=H^8l{Q^Z}+3}P<11hA|3G!kci-u!iL<h1Seb_
z!K+Pi=QR~?8sGUD2-&0WyUV-vme7-YK}N*ze(xs17@uA56F*r19+DVTHGVwZS$<>x
z=A<7r`j-SX32R>8mtJrRL98)-7loZGWL!(@(%r$NsYa?HOc&0$2<}^00n;2ldi9My
z8gRdz^yV~P*4Jd<0KNBvnjW#WJU6~9a9;B4J^jf%4BnqQxn_&ljOM!?cVSn%VWR#i
zKjpFHU@`Kg=PA~1phe=sHsBzgcs~*AV1Bfw>d7)LCiaMYLEnk|z~2|gvFKQ0?EJC<
z_!~}`h%E=es!Sjb`c1pyQa7KBzhI@Y{DBC2pD6O@S1yb{Xubtx{shN$!~z(+ml`B$
zV=-g9`kuV*3i<QarWgV)SjM4F#PxKgmHi<MwTfS-ro^I6<?<~A>AIS*(y$%i!t-wp
zd>NsxZy{B;PO!SjE9y|rQzjn$&SbJNgLly&K4(=|{Wo>;vcN?(XVtRyG+(OtvJ_K2
z<3qcq5pJ^*^p=gqPfyX`i;>YC)Ag23y<>zh1%003z_(6At;2zyUWKDZ3q8Ckq?1jD
ze>T}%hb*T8s>-VE#m#s1wE9>}_AHCe+-h4&T}kUU`fbbw#y7;v)6@LLGNzbI&bZ1G
zssEe{snDxhQa08*>Vh(-J}dLZR)q&@MF>M?S549EjqdkpqV0wBst29t+i*QoG|&VI
zNa5J<C!t%`Te6j?B~Qt<$TZ%4r<AhoDRf$zh7r12_Jl|+bF`><zYBab89sESY!Cei
zerruvhH_&C>R<E=86fP(>ZkNTgJcZMFz%iQ*LW(>Qa(O-$v&Fh^rYW9cRy0r@A<_U
zMbtkL5Y2ga3^&T~q~CR%KVBBB^#}~U>t?E+0+w<Z$KOQ%15NVJZ*l70P~(xIMvrzw
ztVWLZu9^PkJPk^H{2j;FiyLF^Xqw!<zR-!X{77O!PRbcfGGt}^O62wDr69=qiXr1m
zLIt6dC3fupSeVD@<ekKxUz#l1&o0Pzg6Zj9YNhfbYRqbE^4tWO5&buuYy`g*Er_cr
zfV1qK6}Yx=yiaGqvyTH3@6MMj=|gV!%js{Q3=utI^%mU6Pr)19{%5(FM^A8NBfyz3
zS~3{rX5vd@nZh4?zMsbu^?}se^9dd#rRikT+dDd2vn3)Y*iL@VNAIWUqRyVWg!SRs
zHnl`qjKQI3Y9#uZ^g*b)iTLI+eX7&n{yKVs45XQ3_fPRA{-m=;w~>hUf!Dfi%Ws`}
z7*gq0r>43`gWc9@S?sHym$IwkQrOQ9@>SvpI^8i$b9mprQ_B2N{4tjhT*<dcLIT;n
z$=Q@`6b=+dJUDz!JX9HbrT$vgQ-6s3XexE(j~GL}W7Z7XO&hK910&aOU!lWSfey3R
zciyQf4A$d}hL6{>{#$v?#5jwVZ1P5e&2G>Wpn!3BmA%euvhhQ->@&2Jb~$Xr&&tAf
z$-#V&f7=c8ITv&(y@W>b65IvET~A6OSjo;`PL138iu7Dr#>qlBF1dH!m|<Ulc5i4F
zwiup0<exp$#un|U((K9gf3O5Z!C~+q>zw8#jUCVqK0Z3^(+O$<8_Vj4-P0S5$0%hb
z^j3^-tYdFFjQEhiFlT!C4pBK{^W>z`Ohkr*sPJ}PU?4mP%Tx|i&BkxRQQm*N|Jmv3
z`T-yKUgh5bHZbsJ_Kp#bvxyh%jGv87F@_=@4AOKIV#EDbGU1~_gso1fX`BbOc6%8&
z`hxOV*0V?{Zv|vh5<xL3@2K_Dygn%doECXpTZ9JwzDhRsj<n|F*m|nI{0yQxk;VBY
zuPr5txW=C*Kz?CN{Ok1L<RY`2xI@x59)Xohyz=u!%M!G8qTH6D)kHoe4{eFUp4(%f
zGDJ=M-Z~*Kgh0)j$eZ^{UfEYTFWI9^Z2Yyfb<pLl=s|%2$V8d;jt60yQE?pVn@T&F
zq(BShB}vfV`Er*w%F0yxX_P}m0bhH{2uM<WfFLA84*6yc|1Pd0eLb~qp|W$8ux8J1
z=-%-~oVUYl7MySd_LW5f?uPkuXV9F|PaD$a&=~|Py*J=XldG+i8Fbwl#06Qkh$uwu
zo&7~IvEk&DqNjVbjONj1_*bT-(97?EZQSDL8oMj~@hJHy(4&)B?oqL?d(t5D=^-$h
zw;l$wWn7MU^mO7BHfKxg+{1+)OUe`7FKOM6pm0p-p_+O{e`h@rf0C2Tg4HsfONqtu
z(pYL;1n@`_+hR>+!SJg>){^l{icawP=y4PrH;;#$#bSjTt1caHKX;3a9fz5svjSW4
zJ^B1C11&XZ+$9%-Ct7DQYA#XT>YaVarLiM1kDA8Inc&zo{P*sKrj_<O=d%sfBwW|7
zpzwGT0$uIA2cWaiO~9$w-3ZaQLJv(e-?cvl_HnpCF#jTFqf+P}?MI=|qndDL&-!uw
z`=+k?3VdsJT*4Dg_jT7LSsj2*KY!+iKO<=ZyCmu9UYyNO0s)<V9VLc?k*Vy)I-LHL
z)gp;#kKB>xx0hJ9g#{T>w5lH^$(+maMGsm-(6-s9iTI20U8w|pE($HI5nG=oTnQDw
zPYd!D3gupHCN%C8zL`Lu^O2pBqHMuKt_kg`00z@H$GLr1Cz6)cQXE1zg{Uev2G6|K
z5g>^*m&q%h!SUJcs3Y`ICAtg|=q30vB^Qo~_Ho+{tQtN0BEIjPSIP)MAJ0z&?V$wi
zEQ=!eOH=RS{{@6Pkv&+G4{yPKCu?+@e9{iu%`eJjK_pE{GaUDGp?!A0OT&f8k4j#^
zC5uLe5X=IL@Q;Qk5$_=z&im$vEU1~?_&twgC_Yk+zaIrE-;EI7izHx`vYxa}pEt49
z`g^Z2RoEK>zRKC;#oBFueKwBaQA3fZ-pHHL->1ohojm+(Sm&g%@?UtEsU`rM<^uFD
zLc$+e+1xQwVzbh!Wzl2Fz1yzH-{b=KZg=v2q$|YoQpM|OeMuGc;{2T)3F|kFNm1ut
zj>S-m2@NG|<GeT6l5W592AF~M$8kxIe5QgT&zv&ry#&N2*h$WLLXeYDonC|#FIGI<
z@>bYz(#Mu44sf4Kx<6uB<Zk<85`a1{_H7b88B$Zuw`+(%N@d4XA6hV8GZ&JwOW0_*
zjh}f3&bhGCH`kSOn*>0HnJt>RS~pMH5(`<{TBIzGpGdt(c76hO+cP8pwieFTKW?XK
zixZ88Zutr{WuCh4<(`6kfP?g|fthSyM63NJH2BqUaYmty{V82)h~-({C9@r0<?)cr
zHYMC(F6VgBj`k{=G`Hv0YtM1B?O8R4-4=4?e<w*yf3JgMR7egyBGU?C=AX!UH)uEK
z?biV**vQ@P*Waq_xi3bN{H_%V;AOL?gC_(x5Rho}ca0FhJlgvgO2a<?#_)&|l6F^H
ze(R5fXlbKD!IJ%;LR*Jc;Sprcs2y!qUHcn*7o;7)dR>D^k|E*hK_HS0oPXzRyrE%o
z7ml&`-d1^#Cb@hSqi1ks$(V@Ay9r?Q`w`)wwdcjy$;{Imb0zsc;3$wf+9|(17lpn|
zgB-)iFVKf&RekFWWjBVV!Rde@ak!2EQ64R_|2z1o@x-vr63zSU;imTmDax}b*Eh8>
zzE}y%dQIZLJ>kogd?!uvaLieK#y4zf+9z7%IzGax9K<V|3+W_D_;#uS0-eFGU(%Tw
zg2vth5Bjyf<px|5fsj&hls)gT;ftSJ8mk~Ezi8`6Dv_m58~Pg*A;bWDv?f=i6K*7K
z;UgAx4Z(-F!650cdNv`y?5ZVFUlWGU$b7^nCc?gZwM^u5lFhj^i7kQp{Y-{-#J$LB
z8XL^%Z4)L~s;S40E$&(W;AOsA(zKPHH$aIyWnU)vZ(=%uoBLWt9<{z3b~?y4!eLPR
z)E&m-$x*LEo85Mrx|F285m}|0rFxJcFI6JSela!(ot5y6M+rb)x059aMFOVZud0D!
zzwU`1iHMoDB&KYcuvEQ0u4g*%p^fN$ym%M4Eolw1U$}%tx6$8!^v`NjPBKX{5o0iX
zGt*IAl}tynoSvW~G)d%^QCXkVUAQq&79B7ryU1ml@y?j-x+OvI(Ce*+oB3hT5s&ko
zH+nK~^HTG3y2zs1&0hIP`C;-AI^G=ZUn?*I2b)|Obrsl)-nQ@^g4KNcu+cR8e21OS
zFy;xT_r++raKnxs8<WVR=WzDh2;kHk9Z5(5Zs7Nm;TnGhR@Avu`Q~&LtqF0zGJxPF
zGV^rfc~M}pHa#V_YWKt(G`D$)>}qa&adP~hJ5T6!FrX_0q;<rU;&lRTz(&WRDA@<T
z#?OLQ<no!0W&U7UhF^Z>f`y1x?$?D)L1GhgxOT@q9wZrFwHr0(-bbw;@LnYaFXQ;c
zpI{;St2Tjl<Wo`D6I4M)+n`=~ner8wtPn;|=Qik+xPcIHpaJGeAil#KQd?gX;;f(L
zq7i%2oD-EM!aj6tI2NU9_>vp1v7Ld0oY<<w#3ZO18;hRO74~;<DcbFHto0WyNa<(i
zF|y#Sx>kF558l6|tkGrmr`xRpTAf6}Ps<+HQ)VaBXeYCgw$RnGTK?3S;{0{N#?W{&
z+6Hvx5DF0b!p&&;k)c5{XoWhYrEN;6gU_bDJ@{<FGU=;2w|{d;rg-u2nBTiJSb^Mq
z9%ThffeqE|GuRJj&at;-dBP06qdYl(ERFkada|eM2S9){X?FX!y$7$e$u_7`p(Xqb
zmwU2?o{6Qq9(lR;d0#E47;hl-M?JU9qb1c@3%*eRlRr9T|Mo36w?~DNSfg8nivpE!
z8*6D)0Rs`7&P=@c2hmMUC<xsRSWNcf16!I-mZ5&hzq?G~;Fkxoe|9f2eJ}k&SyyN_
zHXa8!JTF@lTD4&Uv7Zc=Y|By+L24YBSRPi@>-Y9?PbTLKDCW;@JrkIaVM>@p4`xk)
zvjkU(ye5;#zw?Q_wN3$0i!v`kSM9hFG{<BlsB6XEhpqjNAJj@oU<v{Buc?el18&`0
zV)xrUk31+toF1x4|Kt_zRmT<))>Tp%NwGQ}2MxmdmL~Qi2kEggE;bbbjw=3FbfDeO
zUQm6$FuAe4HJ!}HCz;IE53eGWg1eN0-%+j+9NN*#31a!-!o4S62A%O!Fw17)V<g$g
zV})oBP&vx@_PHj67-}-dqS;zBD9Dmck(x9L2T8rVzf!F=OBMMv5m!NcTJWdl9Wiwh
zp2?}`pnmv;n(9a0X1?gzr${eVoUc@m&ZHBm(4wFkJKHalFQkam_feSzpwK$(e+NO^
zpVozaUU7f3Cl^6*+iQ9Y+AX==X-lSA4!RnpGa$xX<M&F1DnU8Txg8Rjc<Gy_ubtHK
z-)43U-{;%p^Wa<ChNKbTj_KW{U5vGLo(!%Hojx%N|8dq8Kb@ij=e{jVUbgY`j)uMJ
zA6mrH-l}gI-|%+_#s&(%&{59Ve_4q>Geyp0-wN}iKgNv(x-a7bpUv9E1AYmWDAQ?2
zM^38vO~vMTEroA;m8>kYW|LPPfRcjSPTQF_hV7MpAPp;^wN26|gAP*q=ib)@*EAic
z=aPid>HRG7BtK9g4CR;B05rAYjV!02K3uLIq9X77Ymlkz!Kvnxr29JN%y-nkZ_f}o
zfnK!DO@X3OFxE<)g&PilvVUJ7giT6QXCd~UbK}4HWg~NbUf12;-?@b+$Ab0!Q@DaE
z{&~mSk27=7Mxx|&Vl07|6Ycq9SRCX%DU^7$j2T+*u_$pA^v{Re3&Nv<r70*3oEavL
zUh~Slr{wc|GO;S~l3e@|zPKfGKitvMdqt&Rp^;WsY%p)xri3*xO=7Zs{*1F_KndTY
z&?4N4+{RqYh!h^y?Z9}F(oC5q;a?U96ic!r7bzhHHmj&+ElIee3$05}ztGo>MOcbN
zhkqaVt!&3Lea%CB2;wZNER@pU&}*k;Q4mC=1uW#KoC$&TH_R9JL|F7&2Olz?Ib0@j
zB^lm0ZYqtW?zm7Cw+sQ0(mNZL-;R4AoH1gjoX$2I<Myb%e=+bzTJjWYv-Cr-o5^su
zeLF}N3Do)@um?!JF@1YJllv^E+d=xOV_>^x8_7}S*e%*TdgdvZ<&;Xi)YvN{tueNZ
zVX;8yNy@$YMWz@Hnw<lR2Rh(2t8XvjsRBEaQv%`BPzCg_%Rf`!ohATrC^;}IK8i{^
zj%H@-=l8^m80%zf*-t$LxS%{^1hxS@xct`JOs&aECcc+89emOI+@JWe{|{h9Zz{>>
zFBFk{ffpq3&p}Xzj#wQhv|h%=!g%XSo>j;$#txP{O@t#8PdUSl54R%(Ag`wf(V|8c
z-Co!~iPr@}%#27e_&^~ZvqQuO-rRRVfSMGG3_=-PG9~QqHz^8=%IeztA&z4CdD}@u
z3l!K>oLUkJg(MBv^lP*1DLk5!lBAafLc%L*+JwHp^0#dTn4q@y`0yX~Oh<(u%$ib=
zFS7Rvb?vSDD?j>`4Sc>UVd$|GiSi;v>)hFy--tyRb!AEvW)_ZHVihWCQ7c_3h<`@A
zn-=r$jMX?QP6~5zo;Z-k_t+GE;b`3al(d$nurw3q@now*Dc#MN?o>L`fD||up?#Y8
zjsUEk5hs|R^o5l~_xY6gjeKHB>Wz%|<({J(-)w2dkQF0&c5iFtrTmnopSt~zqkUy<
z`%v~SV*_8V3yy2slsKD8?t3%NN;J3%4rEp3D@DI5kLhOtj$hUl`^3Yvp$9Ox^Qs;$
zgk>Vm=Q6MA<Qlazi+BTu&JV4o<MN9Iu}q)o*R5TH`p<hsno-Xyx3?IP#F3ZQvfsWx
zVD-Y~`n6)6;*8wEz=J(^lLH_r+}DHY)Hi_XhQ9{FdZ631Srrm^dNMxFcunWtO^MYv
z5;G>3!+Xl-@)T!P{Uh%62ZAKrtYCaeWpWEaEM<o~Vt+2GAnw<=6XYGfSG$~_ds_hu
zTv(#mK`*Z7s-0&-Rot<Zf+zBxL}Bk!?w^Tr=7~Sj4v<Ko9@wqHnP4YMY;uPh2azh6
z_Uq$jGq0CEs(BQwjo(f7LzpAliLjuqN9j>@f_UDq?mwqdtIj~=00XBd6NAdh>uMnd
zqKMbVVv9<*o|hKU=N{e*FXvpWG<H35p%me1mb2%3qIUH5l}Y0Dlp&dayq=e7wAW=J
zj(t<6f4C0X|7+sg6mCwsbW^b^FYztC7_><pk0e{!YR$UM0A)0?>}c!wN+n~%)2=eP
z-D%&DXuRP&TnmX37;ROncVik7nl`$u^#4H+MO?Pq+?QfWjczF9acogu34gAg+HkCR
zS#~Ir=$y6ib1MAyfN!+EWBW3vyAaiif~>)T)Z4dgf&Raj9r!enVu$^fLAeNHl3Tr+
zwPL4F=FPU5k3C$ZQTuBMni-h)WF?vRCp~bB)mvcjL(NU~*Rzg}ZW*Ruh+oroQw!t&
zFvqo%C5(@~UiTDSHgIBG3MsfDh>yZ5L$vL^Q(eq3(k%lhAp8qI3!r2Dk<kLhl2q0)
zg0wwO`3y89A7g)Fzmt9@5?z4zAR%n1`7s2)=B=Usm-X|4zNNMOjR>TZ(>d8>^Y<6$
zb4|cyd8`~56tOEJ`o|9bkA82np$|qI%byFIBZNJ8wKog~x2xeFJAFVcjZ{{=>~H--
zhX9&bgOGPy@viii-edr~JtwHC$sX3}scZXXVB3*F8`SwR2rd<OZ=W6arPbVopa^Ig
z*VL;AS+v#wibVKVT)Fp3Rr(aaINqW?%a27X11K5@p;Y#Brw<Qzm6;p;=iP0dEtMW2
zkk5OGqsILnf+7AV4-C)<x+H%eMF~qZ3v5+=q5WK9!5(G$cH~gMPH2;Hoc2R#QGc{=
zkL{YG&j-X-f76ED04KP+;t9f%#xlZLVdVnVRZm(cH4?$}RG$R;c5@l|S=8;WEBi-Z
z3>iYxJv`|%oEH+pq^iNo$8SsXiirkw5eF4COV53pntkS5W&g(Yld&MgJ2jsW<D_Qx
znrwYn;fj9u$LVukiEFv7p8D_A>N^5M7p!Y18+=#I5ei)%v9PqSa#uGjHa~23S#GS7
z2kUBSE$J++vS`$IJ(J~jufpu}cQh|=huq3h%0?X@eGR<HJbP}??Z43x==)~gNL!G<
zY-_&#c>T!9V`P+<a@92Kka6i<gMamS!<!XZKmkP2uvQ9Tu~zxKzOFjj>-la|1GYP)
zpX%&uIX|e$b~)Co{qMu`df1)7Nez%GF#NwVT|1<cdz$)esq6h2@VuX2&IC+{ICDw>
zJB5I+P0o3f@BWQfj+_<5OD|n?i_ly*a>HdM2+)Qnis(p1yuJ?5ZNVSAlQSec!pf6r
zwdM~uvMC;9ydl!&ckVL6pn^qn<>st2_)E;Itw{wxCJMX!I{LS96w(1{D7(laqwo&t
zKl06;>9r;i_5|(Xo0sIYZuu$}`xIEZJOdTD7M_P#uOC_AY!FT1P()Ogmkb<A#2wlv
z5j3fDr&fgUA8Q`De57`i93sD!t510R_(?DE)++V)P>w*&?($`-iylj|)8dN93JrXk
zT=~hb$7oFmTA(Yik6)8n^!4R4ESsXekA>IG7S0rz6b$`LuOE0G{XlG<P&*U5oBiDK
zesXon*WGE&Y%me4*Q5`6nZ0kGx=9n<Q3OrhtgX+TwqRxW<y?tSxhTzGQuVf>G9Xcj
za-)PYY_@(M<|1<!o&K|0dOtLp^?@}C`bWan{`W28PWL7QRgjvmKea(>re1&!Jt^oa
zyXD7lAqS7tXhfap-b2qv9p1AY^E7>X9Wyi#j2z!lUdC!kTS`zG4(#!c?2S%b-Kc8d
zbi-Rq$a~_hmt(M>Q$80z9VpyhP;uOUs@+evWN5m~40;MHxs+;H<o^PD`x+<Oyr7-$
z_=!bzT*vz#Ex)W_KKfdqC0QVzf!>N@-e0DvI9GA)Sv}CTZ*^{NAj_)q;DhFVV{Z!b
zyK?X7&b!}CQyH0d*L;2H5doSp0(@Z%JEi{)_?*YBsWZlB=*W|>C6WpHZi8qkvBPlf
zumlD*eWuxQjk_1=)Fk@{as25crpJ4F@vuM7`7U7{|NYR?O6D|?PT%<s$C1k76{+;y
zI_~#AD=)Ak`#Pia{tJtM30WE<<|Hi9e5}Mr8%#MW9JqhlZ2exwnb;AKYRNeZEa*n_
z>+}3BW0vN%tm=7CT6y=3NgtGy%yc@L|1B=$$=QV>8>8JELyCHa*MhFWdnU<4ftV9t
zGJqdJvSkwKT`_|*m&w!^<7&V16X?HkskFP|$a`vrn$yOtTmGIx3IKfgY#ks`(i8zC
zrkuuQ(hN;@6w&>Zd>|}djb%A|R&QN)KvANp+Y-$$-`}py^r-tMTU2rVcMqof=avk&
z!aR#+;Y=ZqJ+^xxoNu((N+gr2ziRSNee51;Tl@rZY7!GiX$|<dumC;z^PEdFM_$c6
z`rEJEJtp3rhnu8T^O|W!g9K)!DHs-Fle{rmJsGISRqRP>BTJy2z&6zsx@=ut?rNSw
z7S36s^>|tr?JjDhZi+s$*-4p03lNp><gXm9^|g)MFrDU^m3jw?Ip(&!=$+mYU6&r@
zb(*r9E^?|`s@(V)a<3*o^P|e{750)6>HbB`ySF2)T~fpt*NhgXT(pKEda}4Xhp{!{
z7SKTU{sokVU&kZYgsdh}<09~uVjmw)x6n=2$lN~Q6iah_bti>o^Bn^K+b|&+i|Gi^
zSjb<ZT~%bam>#OgYZxX>oW*<@3A(O7Ux%irSrcJdcMwbwF_36Bp9vIXgLXo(Zt$vI
z3q!HqE^0?H6a2NKA9i91diaW2K3_u;aU{*KetA}0tcuEN{sJk=Nfkbt_`OA2cbD$l
zUx=4{D=AygmrUkmhU-)1k1%GpLX3CH%YJ-D4C!19mCdr^&&&CiAed*Qp)QT<6@aDx
zZb4u(r<dy??-jCdi13DC|8?}COr$lHh6M4XeI|aejd}j4IMw&^Db;LQ*Q!g2r48@;
zg7J;{)Rl6{o0niY-ccJQI?#<fjJwr+zmwW1Eb&f#ih8oyr1<`4?s$)9Q`v<Lt)1wM
zI$Qrwl&rYC#2Iazf?s?}XLX}e)?#VyW|eQHGvH%LT46zpH{q623w=ivzl)^Ur&g!w
zMg4=y&sMgV+z|%(VB(NngSfQakU2=QdGf#%_F-}=-*2=|I$n4|3kurdA9e%&Q>IOj
z{!TX?mb@w2VM8;KBXdZyyj!n!M7nxA<o-0Er9Vaa$|^`aMXxRTw#rRLL!_X+d@0AO
zm+`BnMx*}21rcmVPEb=eGyV&`60sJFWSLlwQ$QZ}(8(D6#LzJ>hb4VOg7n(=T3}j?
znx{`0Ff9oeDk!g)MZPI}U>&(0l0^?tGMuMrd%Wxm0a<sv%E98_J(Cgc%gSd=!oBR!
zbc`)?Y0<+n7zvPjkClX@=^>}^O<aX^;z#gZ78f;MI=M^%4~w^jDq%f-pxjn0Q3&}w
z_sh5Z$;8DccxF*#>`$7wCUSCruco=$_T!$LQ$6F#5?z;?=6psIH%($p@OyVKlviEU
zBBaVHHYl6pL^EoYSB))swl3ocmX#bym9E^XTg&0)XZtaQ&w-udtIDGl&c+Jr6z8xP
zCjySEY9v)%Q55k0bIzx6JB7+*KA}k)gU)sVM*-qJeM&zAiZW?C0>UI}F5e!;7E(+H
zUPfCIzoQDY@w2Dh6}FHRUe)`q@T<KPtxE9gl&2$i^SwVYU-z?%DvCG(pFE)Ledl$F
z#3BCHc$0mHee<-3(oW#C+aFJ>9meju1T5n|o#|ili|Y2GMRH?uYIJJT8HEXM0d6gC
zrqG(+Dy~Rdgt<F%+_wrgc&v#oCFENwzR>PD5OFS%TP{M?s6FvtB@XfVTR?e8Gcs+u
ztS`_ViWL5d#s7ORRC?4SD1H!}9wyikyZI0lh)a6j{5#11mfJ`76)UA)7L&<~t&Unq
zVk0@$t-Ed%u({&W#pnmH;>fj~0MUo|ios_`;<4EAHc`w}n6k{{@rs=ToOHZA8q*wd
zyVw)z?{47|m`EM+p22idMyK_S2BU>sWy>RefgmDmXU>6*O&RmQ_fHvtzbWy8#%Gig
zri$3VWhDpIeaq!A{$2L`_)haW!T;H$>^CD-=80G}ajkaT$O__G<yzPG*n5=yuHRpg
zSmXTayN#G!+%A+45K$5(_d3)+6CT@teK@JKsMbPhNlVH6moeV+S726imPD2q&ytvw
zyW$ix*u<yuYO|Fw_YU;f-Oc~I2)mDP7Oi*7;Q0h2!4qFMj)*Ym%N2(RXa!rkaNCk4
zuwCU~7CDN$=2#**pg|J9H0rvC_5Lzo=H+PXUL`G4%U|Vay_NB)SFL6C1sAFZ(_4Rb
zREA`ID|u{M4_(A6bJTH#)u4#LTb;4D-pE{tf7&6syOz-l-h6$J_VZ$|s!n=+)29cE
zrCgF<`eU!a2>VKzv|n9t>Yl=Azzn-7bjp5B|NA*dWimp-baHH(xAO4FB3JRkruyxV
zkN_D@1!tSkJ89?vD)8FOLj`Lv1gy6$V(t?LO=lv}z~on3wOU+xfF9p^@zx3wM#f2g
zOdxi)%-2ou-wC&p?QHj)<kyJPo@B0+BzTd8TQPZ7cmMV3*zSEK7Bxjl=!=s)xk<K1
z4Tf4A3iz)rrMrR#akKgJJh0bXCEhO_?XG;6fh@k<Z0_<CW?_F(`}<YyqU*N9WASGz
zyPdPovVE}XBmBJ<?$jkblyll-_7Z;m6p+=@)5iIH%u`$N@qj3s+;=Qjal_bCe!OEe
zd-k}ID_Oo{e~x^YX!GNPF!<T7UfWpLUqRraQE$>t*oKmQo9nC-Rljzsum3q(%jZ6h
zQtuE*T9)qO`<DL!%`raSmDx3Q^c-KHIp&La@^ADY2^V#?eA(={9*w@GmcQ-pwF{Mc
zyW*OaL=u;rjnDJ@a?9*4jrwxCCQ~%@01g-`1T`j_i`qYLp<8MoZ%|ke8uwB@yJ_#!
zz(2F@GF|R)`y%o{VsPL&m3`vKIP=^-P0kVAWZ>-Nzq1XXu>W_;W^g?Th&&0`ohk{8
z8VZ&^OjX4gc_~5^@b6ha>oRg`<&=jv6QKGKAMa>ntU;!I6yF@L<%37<R+<juQ^}HW
zfdLo2j6!%@k#t=B_Kgx}CR<N>zcoQ$eugbWQC{(jcc6M^d_)sjO<QGXL_fuQZ7hFU
z*IwelBf?|Jj0DA-jmWOsN+yC(Zt<u47VGokjG|K|OlKi_VTMKvTQuE6*5GQEV5WGy
zE(%pq7Q*Zok*}?wFLQ=Z2J#sVGP;Q=ahfh)D3V-Aoi0Bdq|2xO^)bDOA*Jbg>8eTr
z60Ie~#SMLQW}&}nr%=eWmiAC%C^d4ov7oRJX5<L|jUWZ2D}eMt;{}Odiy%H^-gj~R
zVt=ICTr43|j3X8iDEV|n8Wz=8r2N)C=ah9Q^G&0B-jhkd%Cz+AoKO8x3amQB=Ql&K
zT#t33(6qs(_FIw{??B!B6HMU>S5GZ%TLh%OzSjHks?W!xI6UH1-rV%3d7^<qG2K<1
z>vY=?EF&%^q>s!JQz7-=)$*dWOt^k=P;$}#HjQE_={l>c+lPhc0-0i?6?gyVV)gCe
zhQf5#*?M?#DK~Pi$=>KkeQCySIa=5{a7sAh!FW!X!LVfgP1iF|)8VI6;Ca7%8@4Xr
zS@?KU3kos>2VBY>i7(@>HF_N?OK=F;Y1sAM$}jz56p`FwEITjruLp>dl6^jz$DFU6
zUaWSxM{WZPkVlGGjb5(xrPckft~11zpr?T4wYfMLcifCzW{j=fTEBZULvuthl9A=1
zJ+@zJ>3;4MRuD895ovwr2f_2iLlWMF1P5NTPK@sdl2q>YUj+DH%PL>1aXs)({ynfl
znVuwo#vF>k0qi$WnyXiqm^=#QBt~!P?jTS|a2FF5oK%5lO~vSc@>OmS8zK8@EYNqf
zEweNZTV_X@>up{c?uGNxXDr>9<b_YpnK9gLt?COqW|^X}&<C>T+JxP4S>H5$;MfLo
zy8YOrFQbA;b=TGsZkQwQwu=?%HvBz^Eg5h7AJ?{|HGcg1H$0Kt_W^HW)=pVPQ~Q0D
zsaZ&-2VZo+hn78l{<v<^)8?+<%MvqEBxI|Sb-CSb%4EgB1-hpKepxoR8jxYj>86S)
zZlJr52U+<cWJ0R;<6c%AEnrl_6L74bN;e3t`Bh?H{Wur?@>Iie$hev4>UA>)lGa+g
z&)$U=i0<%D=9+%!J)@XYSgbHE=}^f~hD#Y}r0yPSRFd8M=q<S-!6`T3<hS=|b3-;y
zR<r5%L$`L%?^GY;m$@oi`n_75$Gl>lo-Vb!%?}DqF`v~LEO>1yM@ePu-KO{a=wet+
zV?yaU9qIXL!VV}8U*Q<V02_F2f%_t~XGMI|NOeCZcy#@xOU%7N&)@NM?*u;sF2mY4
z!94FvlCa#IPhW!wemf@EZH?LH{2NBzA3qxaA=Pu3We9Nnh??4&5oIkqGNZ$4Jg&nJ
zB)-Zx8lriEa(Xg_l#c^SyoLjDtl*4EB>RpRGk6N4&lwumY6^aG>2AaP4&QS|)Fq_d
z9<YhWh!4uP{_!;GF8A^j3_W$T3KeHqmquFAf7J`qfLFX1YPlL1SA->gP-slB%eXXC
z9v>WcJCd2#L%73J9<3ZbM!!W7Ymi3w0k81J4V|V1yM>L5$hv!lXFLpx%669zkz7@m
zB4sd6h`+JJ(zZ*^s|dxb6id;;qgYC>!FIWH8knBxP4>)zxij_6r$3a3=3@|g#d-9+
z5&EYWE&d#O)?c#Zo%R06hJ>`7a@+~|GLK%@@H=j{Ue8uDf}~yD-~F~uLat;AkV$j(
z<vZQUoE*eIFGDD|(hF!la{S?jGjKo}KX>rwN3fp=8`F1z-RyrDsIslKdU9lM`7j8)
zPic5kR`6l9df4T98{3qNo!IB`KP5xuP+Ja;nODSmFB81fzpCw_-^7z8jg7wc%=7wd
zT}T`0%koY66}gn3)`(wL6M-POpin7dC~n!E@@eRqYpz6EnSYH)A;)E-N7{x-yFs0N
z?AZRx@}=O|S2^$gQZg(l<!>*-7UNFEY})i6wB2ptTE@ytuG$CW)V`Hd2hxRl->tP^
zGrYV{>bpHG#fCTLv9WUwP+YM>(MeMHoE7fAFD{BC5Fb|GluBkLzX-d%y5Ya0fRt1p
zNRaTYPAYXedZ(n??r<&x$|CLqZ>Mk2jA;hC=o0jilD5<EL7DxKY^!p^!G)J&#83rk
z{42kg${LM{pAo+vUpKLgOWg+azq+~}c;80m&)SWIImTZFF$UsF*$~bvo<Ml$xBtOs
zP<y?EfwnQe3_duW=?2QgxBd*&H7#n%^dtgud6d}oJc^X5LW99V_1y^#w5pVNK4Xe6
zF7F-baEiz;y+g|wY);d!<Fv)kn)l^#*`Me-YAdk0N2o%N9Ei{AOspW@_<C?zb{-k?
zgTcq?&$7wIu2e=vfBaHZxHMTElXd@_i8SoxEXfUb*@1W^%tG6W3L`h)=qW$K_2`;K
zr}$rA@wm9b`JE2DwG)kI8BZ!#hFpQY++BK?L6#|1(e?6+YjxXdr2U6xYaIn=voRaf
zwe^oF&)x>6&y`kUoDe;#laIRpI97CZwfrt$`RMDak5)ug;k!L$pGO;LqYZY#H}rPA
z?!o`K7(%-L_%XIsEpb!Tji2N=_lGd74O6#Irtbc0YnWbg2F!;8=23iOYbF)zWLlu;
z3%V<D*U`7TQu2-D32guufsk=q=Q%n!p7!3aKW}inXwMnq1!i6aCCba1DZMC{r=Y6*
z5S2aY+a>gu{D?Zd&FU#TyQA+f%Z<7Ho@|eOhuetkmNq?>K<{+C;HF}A{c^;7wG9pE
zFO#l%M>#^jwJhqVqQTCO7X~G@9)ttFY4Ob?2av=!M2!w>4YkUdv;OKV6;n2E53cvy
zz2?w&{Qzm%sMJ*1@W2%4VP$b_aar*^*PAl8S}vDu9*tb3BTkJvsOr&~$>*`I_U~6l
zD`T5aFcd%`G<p#OF;<`#s}w(bkQrnzjP1rK`a_0GrELDb&&@X@y>7MwI<w<SnVQ+s
zWi{f0#%}?@_F9(_|40pN{#~ipw4UGbbbpYqmm~M-*Z(j)s#%braCN{Q>5IZ^x-^)o
z0SMeh2u-M)-p4SpJ{nRxuD*QizkeP|=n2_;jhFGyY)w(Iv^6%48npASh9BQ*vb5ZC
zjlg$5{etk7LwB6|V&P&_=&f~&Y-@l9_-=%fW}Ui{e@nz8!gW^&@&$Jy`_{KeYUQd)
zQTkP-Ur}qDZPwd3TJ~@;@m+;2MaTi)mPV`ad~U`|Y2j&p8`!z}^!IG5SV*{HH-&HW
zRgP~kV!Hk_=-eM(0w|Q~RuTCbE~YU<3Yl#Cv>FVcK}-YY=!#{trhZHZ6#m%FP6ev*
z^`&e~y8W{JY&2i13cEAE(&#Sr?SrRW>0W)Djy0Hj!pkREEgs6g2kDFVl({_6baw9u
zNP{!EdLYq6v*1x^+6=fs%dTp3RlTtY<=r@8Uc<{hnX}^+2MFkgjYu^3vM%-8MO=CA
z39%o~k~X6MnLfw8OtI{VXE!P=6WEo)u%`QH5)=?@9524Mw%cFw9-tMaM`a`!l1T@Y
zLuXKjV%##OPC7oPMx(mlgR=RSNCrdg0oy+x7ELRYK^(c`epCz1S=SW%%4F<{94E}&
zGAfNTk`bJ}QQVqR<H7-AvHGqv9$HO%*<RFs)bGAm{^=p?PTUmi`un|Lh0mb-^;&z3
zVAYF`EEsL&ah0QOso1IOYHe-#@qEZR1Bms|{N4Oz0<<RJlMo5z>SsUurgUJ7@*w~A
z`H;J}3ewWg7HpS$PzbZNuk$kxQyk7L4*9OQcX`)*1UW*(exq~2(kg8`h`A&m4rSnT
zOxv*jtmjSk$>Aqbz@qu?+4|mIqmq}y+MfM;;_lm#HAKWoyFed}^@zo6Gf^P!e$csM
zT^(%Nz1j0aG0Z`+%AwISD5s}N$ou8aHD2r8L8Gw78N0jd5hqy3IeU)X(`APV>|n4a
zrDR%s?jX6{-gl$aow~_?uh=+~6Y4*Gx7)PK&vUtFp>ZXf%NolDsgqcY)iXVaL!@pL
zGi4}pmTQ!%Iyp29K=q6Qr`k-Db9o<fho&1izyx4@-yhSwMOR6?dT*G-poxpuS;N|K
zX&GM@$30cIGPl|u>r}coj}>12Kzd_tbp8$Gw+Z_VSR(3bVm9e`$E%Fz();7yLA;lJ
z#BI!f?`ZzSq^0t-Bu?k2gi?Xf-~IH@b<=}52m2{ui?^NS<>HZx@t5D?wuZ(E_G(WP
zQd90#M%?bkrw+7o<Oz(Y3wSn-Yct{5#YbH6W0!goQ{zRxm)<5w?M2Dq5VzDf2d%Z|
z2JAzO;U`**+mgV0-_%pGsp90lVK-l0M*R&b*S*%BvS;&XJ>%@%<)5p1hD^QrImuUt
zaBv3bGO*)vjm8@DD>;ro9R#YCG28^)Y_VpSPex52vCv0&N1t!Utq!67gh(B;ONa#|
zJ(L!hs;_YFEL=!>$S2WdHt8gSZ7-Io?bEsi5F@yu1RLgI$mW01;mC=_G2d*7E6kI8
z(fps(k-p?>av9~9fabc(BOa%*t{V88w7u!tOVuVu5)>(FZfo7wN>5!IM4zM4^)PT;
zO>08!I<(<|1n(W9dyHBWuAH>c?)1fbwdX2pQb&;gzH5vxQgZ&Kq(3@D?4SwKbNXVL
zEFkv;Y3Pc6bLwnMDZ#gMmh4pmMRuMa<bCx6eS)zDq{)u7zFWWl(N)(zp*m`YMxzjR
v_749uOaD9k&qDa$;eVFa|9?YO3YzShDn&abKGq2q=2B8nm#=(p@$LTrI`T00

literal 0
HcmV?d00001

diff --git a/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-128.png b/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-128.png
new file mode 100644
index 0000000000000000000000000000000000000000..f270ebd2d33f05530e8c5e1dc15e4243f56a6306
GIT binary patch
literal 9985
zcmV+cC;r%pP)<h;3K|Lk000e1NJLTq004jh004jp1^@s6!#-il00009a7bBm000XU
z000XU0RWnu7ytkO0drDELIAGL9O(c600d`2O+f$vv5yP<VFdsHCYDJ=K~#7F?R{I2
z9mjRp>AA1nz5B-EDFOsR@F7A3BY~tPk)lLPR$OuzRV5+gB$X<M6)Uk*kyH6|loZJ*
zAE|QXCsO1}RH`s8Mu`c@Wm=|XkrpY7q9qjtN+b<}1eYK{0=vKh*!S*z&YX1j^y75T
z+}%5ioh2OH1KgcQ_jI4`bNalxrw5=1J?KFXdeDO&^q>bl=s^#9(1RZIpa(tZK@WP+
zgM0uH89y*_)IWY5Jo@OPfjIY%BCkI8uW30nkG>W^KM%d9kKVn&uY+FuxTHMq<ki>u
z+;5M1X3JqKD-{iv_3@*;{?w;F#r|IZZ;$o$701T=$M!K+808p^7I@$D?!X{oqx{|o
z<O~4|Fv2LezGy-7UEQWIY@Gop!ct@Y%s>0~180sLIfBQI9YdZCdB5Nzk31sIfiZ@#
z1+Wziphr*ed!PM-@f#-g9xerapDzT3!<_A}2Nql}b76-k(H?oH2+6U2ZJo-4_3K#o
z-g@O^1qSmNMXyE(ue6}?!#8Iq|MK5`_u*MpS_B&}xy$Nv07WGB*kg~eAN=44!Efz(
z<Wtq4|6jA9a34<`^{YqzPY~BOQ&L+L<j{eMGZvT6^z-Ce=he?T<(vEQHR$rpmBL6d
znK{zBDL3eP9lUK@92^Y}%sZOaT73BbcG!Mmwtne9{;$jb<(=oBe?F9T<irG<&HzG}
z352<LFZ|=NFCN%c{lM1)#_oePkzj-<Zii6v`1ycd%Lkc9POly!K@w#dqWN`Z6DC0G
z=peBS<E>M87==$<Se^dzH|PHADc+wER}m3xGHfaX2;x8V&_luNufJaS@*Q9MV!7D=
zCEh?X;+?SuX3UCCV)~v_prY)CF@iOlm`G0MH#9Q1tW%~bislFJ8Oms^15)<birW9_
z-~ZwZU%u_O+rpzqkA_C_YyxZ=18CykG<(m)-Mj8SS_}%GkRwzPmwD&vDRodHQ89rL
zpHz~otW>(El^J(VQiW?9g4T<P&LHAH))l6ZK;5(IAgM|cD#w(k&K2Qh4)i1sBV{6K
zrz$nX@P!Mj=l}V$#sBj*XMzYeNhNF+0|<-0@X+?(zHx75-}m_C^*+&be^SC3_1Qr%
z7}$vMm14m8W(KWGp5hu+Fi7DlaE(ruQnvqyQ>SHMAJ;_r62Iu(GYjwh?Qbvrk4a@4
zTrm>_aOEISEd2eUFC4tCa@{k$mkIHucOx5#+Kv(D7=a!IrUajgi0Ws{Q3*7YcaHA!
z-g5|EnP}4uApkvmw5~?)8GQ{d=b(=(5_y9*FX;!TjsA!*`6a{byMAQ<_dZ|!CpX=H
z|NVu{h#g!h14!b3<$-S=+{Y2)#qE=K(Kb2G6?1wjNf51}q%3ROX<*7RB{KJp?l4=n
zBD56QIvEX}Y#u^Jla5Y)V76Y5>^@roC9YGalLts?qzhRxFJ&A1jF1_@z5`>2{`}+N
z=eKVL6Kt3PRD?@ZUi!e$-mf!7d_9sJz9%DuLBgS;WdiLUO-$z*WQmUIToxD{)@e=h
ziZEs;)sV)dPFCY_B%g5zp4I?>6t*$_vG1<EcRe{dIa%Ur;ED$Eh94Hb-V;weQ9AmM
zp8F5Qpm?vttC%)kG($u{Nn=NtCyt%+A}42BH-hsweXP1inwNRk0Imh^rD2D%eWqh>
zf28A1+M7@_F+@@9{ZanlfwX)7^GCn&Zw?+jSXANV4Z()QhZy~L-g#&7!HIuy^9Q%z
z{?|rD5@X6S@R+Sk-?LtfwTc``!Tf7T9VxUv0IyE8&pB2#19wz2A17Ou-!|I^;P8#9
z2c(d4SSrC|rawBjk$E1Fn`XR}g%g*K-Sg*-Z@sxuqi{o<BgD@1^mOUQ@teP7MJ2i;
z0D<%fT9K+673+S8N4GJE6cLOq0Etm#Z;|>`=g@w(88tq1#F2E%e*`CuD<-UToyWA-
zs65j-aT0(d=`6k^0;tOdaO@ukUre^2^v%dk+YbNtlP6D>g$oWFfXos=kNyWnfBl0W
z-gVC_ZfTZH6U0c=Y$I;~n39|lMN7{L*2ji5afy$e{4=+bPWb!x?X@1N&;iE|42r22
zYLo+zfB|$eMrM|$e(g7(`UlVQ({|_D(;*Yr=wOc+{kKkh<aZUOR=ZL81$q>uTlAK=
zzGjTiB0@a-j4czKvTe+smhYCUkC9Uz^%ks|<>Erih17gN`XIg+l_F^GT20jN^aIi~
z*As}8^wCCQYzKL5RL_j{ZU0yNPy#iD4H+O40|>4>b?Q{`ncaV9qOaKZ>oIZ>Z4&5v
zTDpR1Tq+v?`gzcHMMCVGXBl8BIqZ+JARr|lfI~{{i+kiul7&8Jb%#@rI*$h0Q2*$8
zXdIb=+P+KBj@qyui(yJR<{Z;-v+XGqiuZis$nQ+>(_r1%QV)t*G7Gow@y8!8-20Kw
zd~&UP@yJ4Ru8k|f)$Id|(B8iUc>udqg5oRVP+$ckQPm(<{Y6DTc9P|~G_YY@@~kOT
zUy%+MBX*<-^*b*D_P66(w=cuWYy^W#<2;;P@G5kVRGrufC2;H%Akjn7#mHnBdF0le
zANlb9{rhJiRs&r_76xF)j~{2XTCGqmjND7bJww_Ag`~Wpr$%idSmdH?AQO&VL!;7w
zMy(AK{X4m6v%(Gi$b;h+=#lK%9E_R~^ghw}9@B(-m!Wmr9AL?vv@3jU6;_`w!9Z&m
zm?xp>6u0R&EybL^iy2SP44cU$5f|YE*je56xs{cb@AGF`0u|^IvZw&Oc<~}@Hk-v#
zp?rIcecVH?SkuUA36Lh5^MNozsI_WB2s`)PI+0Khi2I0cW=tVIfMRNJJs<vS-82u)
zJ1!@PFN-Ym-zOHK&Q_dyB!OAdnlS1|*OBmMz_Fo3{Y6fbR;yJMtE{r9fh-KbI0FRj
zcDon^g+nnC5%?N74Y0#`E`X#CrnD|GDCH5|bed+*U8Z(Ogs#EXIy49En-^d$U;~^j
z@1BQ7SOaS4Qu9Px{&HrJ7*W>Fa`B*yQDDK|`T6<6($bP^eyszUjKD^tA-Jo+#p|%X
z$AAWEsQ`0XBPk{F039l&gtwt%NMY54H-bnULG;qmHEg*mYutAM+BbA((*)Zit|GXk
zh?*&0kg)t?gEUgHk(imS#BdptcnSPpz=G@k?u`$Q9Xoa`$g)--lLV;O>%ni{@jLr$
z@W+xC&~1(bxet4Tq8#24`r|o?4|;6?5>0X#mATiZ(+5ImeryJCxCvRHcoS!ueRIS!
z0O-^~m|)aS9T^k+#$7jy#Wz7NDnP8{<uPftXXyI9KrK^(gC2GCr#sX#Nxs~GMr?Xk
z2_?FTHbQDVLgPx?pvw>tZEHaD6X%VkpA|}fs|l+k^J2-DAu`qN&fr3D()~PfzF0-<
z7`*QKojZ31Vl5_g37J$t7>2CgY8B84QedK^D98qq#hAI0pK0&Y8hRQxy(&khD%^pg
z7je{voj3Z(NEL3Z9QmayTb@_?SPfQ73nt*>M?P(TVv;!+bFUS?JT@~T2B}(h-xXwK
zfM9H3>^ftlXjp^an7_6uPlzojPtf@4z`gWGU;>ClX@6)Q+K1LQ_PPb(85HkZg+_VB
zl;1UAl!7CJV$e4tqO}h`_~7PC0I|GV&o(&{z%xb=T-zX`7VMgOY%?Gp`ECSKi_*x!
z&k$d!e`;JgdS7JWqrLU<ixA$h44Vn8$_u}nyNAK5#m(_dFmYeoD+#+q_H!iFv~btE
z#i8p!5N7~UV-R9!n8saN%^bujpl>YX2vMkJ7a1k4D-h6|HFfnV08kyw^?du|m!#2`
z2LxjgG<ood{enTI*ykD3*3`lXZFG}SPsM_GyJ>cI7PfES-t{QU<OPh6k3+RI_yJ33
zI=YEiqO%-mU6w5YUo~M`IvqWc#W%iVT|sW)w?8?P8}T)iZ{X_XmPKeqtyp3jlf(A3
z#UIIiH>ntuhXsv}9zDAG5+KG08IABcFIq59kCAhxtTWsKMqtbp?Z^l-I<bEj4>O|F
z?e>9X2scUJcLT-STF_Wofd1(*kmHgtfjWt`lbYBt{m-lfE)`0{kP*6)0D|y5#0Yb9
zb4FY0qY0&IJQKV}JcKO}0$LKu5UXD=f^$s+Ce(|GZduw=;%g}0(SVhK`B(~QSKI4P
z<K8lG+9%EhF}VtmwCcLfZZheP8W|a35&vgqLD2fncETzpKwwOl{2c*ewnIV(c-)G<
z94#Ry{31mAR^j~s;Te=ZR)_izm!YpU$O*cZK2z6Z?VVu#(b3UNUEH+|aOB7lCcS`2
zGaV&<Jara}2XqD+dTf!m5TII<*!)0l0;c8XdcOVf%kLZUH3U`u`{)XvII5-eBk4ok
z&uD!ER7=Fwd_Z^gK?dNH4dC1NF)k6%6)QHGSkvA!`GFtU0Ttmjj;Tq}>LMC$LCA@Z
zBQ3ZZAS$+Du06wz!X}VVATj(>j6-o1#gR!q9{}A#*G8Z!lxaiItXY$Hljb%O#2HI7
z$8!RtvdJ2&BpH~r4`S68?d3lBhZec7zeV`11GVWUOnrX=`ob!NLv23ITe9jEtttg_
zW*H)ogA_7!cGi`o=mvwp1b3WRyN<W(hopo6$lDX7fgWPh*>I_cfH3x=4=-Fb#GgA}
zgR_6O096=)?Ss3ZT<DYcrLhf)<V;I1Aae$~yCY!(Ko%pgYX;zjD#C}cL#GIwwrVDJ
zr(7{?Q}T8L>52|@WK=3(yef$kKDuRdBYRf@mtS6iOFvnL(Y|rmS=|l&#eO@6lnMj7
z5Je?WuSY=EcLTCS0-4}0g-<NnK}4UZ2S7|dJsG2(Hwq)?!RCaXK`lF{cnm}k-aW@%
zntYa}Wre7LF#W;;G~Pp)7}x=0ecK=i($)@wX$SM~gB5p}@m;}g;{d`7@b7|eifH6?
zz=x(9zRlSWWMrAT#?qv5OFz74_NpO%Ybk_xpPq%*Sq9q;@ne!u{OOnn@VzYX(z8M4
zH~?dTsw>U0BfDNueaoyI$d?0%t|#(S);_^4{k!L-uYWZ_{c;<o{$d_VwF+#j?tp4}
zP&vyn;(8#_Jp@c>Lyvf(?$`$VHIoDo`X!7)W(++~7|}O<h{koeH+M@=0y9!b)_R}@
z(LHll4e=LFuEN=;=U|{Y1Um+I!$5Hm=nf}ne0<V>=#6qldRgTYYYT)Iz|}Q$3tcgQ
zmH?vgcoBGP1X8PC8z*KxTihIVK%i5|`GI~#3}c}qaQx6R;O+Umhp-X2^zstSys!ep
zg)x|@?u0&J?CJdz%)%9I+@UqFNf16}4%_cYq(fFOplc&=XlO{Gf|^wmYyMqGOUPRV
zh-y_piqGNWFD*#Xe>H$G_Rjxg5mqLfFw!>$+xjQ?>|IHv5>we(JYy+K<0|P8O9B-K
z=o&K31`6(CftidpqRN?M(|Qa1=ryKTubT}@zEnfNdpYrQp2qDKgt2$_**U=Z0Jc|m
z!BBY^3W4o<(}xU=8l|o^-A{~KQ^w~aUXJ$Ex`j+`psXzd!i@m><xpz)X`j^J3Nc^t
zA47R3pF+8R@v71Hv88_*3iT394DNuza{P8s>?M=oA$@8f5mv`?a_=Ajb@ph#<|+Xw
z4w6e*tgB3@mw7oEv0M_nGHgCnwv}PvS1SNpz-ZmT(&;*!{~-_SVimRxOhA9BYK=Xe
zi8@nsErU+enduG`4!CX2MFQw}x+pw?$QW~stWK5qvOpOxj4Ti_`T}%(C@(0KbG9!s
zfH3whA6t?7er#ZyT<+y8OGn**%!qz>>rg8tVp~fExKO1aRk!gok(%h*bT2@F2nBQT
z!jK3b$^`MX-Q|P2ge1c?0ZXTAF#GdW813ICef)Bv0`6)leNc`v^_pn!8vh^T>2|W2
zqzMfx8pt1H-y$YcAP6LnGS{t7A+VjK@bS4eIH0<x55~3?U^LtYMMk>V8xM*81L)jE
zh>18rN`JGgHbAD?Ksy)UhIlaLoc^=VFxU!1@co8>tD^B=DM2%+$0}3z1E%+n-zWB7
zGInB~!1OY5n++5zg2Yj5S*G1QjgTwSz!)|Dp1Rip@ii2x49fR3pxJK7YiAd7ZS(;+
zqu6H#$Lh~1h}Ja&=s-~X1R=jSJqhI6ZodKm>jpCZ|AW#FPWHnq(2iPm)EZ0P9Tet>
z#4aHD(dkD7XSy9>!~NPd1Bi8I!Uz=SkqP}W<G1PsB6icI*l;lmMSyw**ADdE!sP4Y
zVb}(D!iFV?t3$yw%wml|aKgT{&$VpG%?r>*2A>U-h6XTuU%y1*=S^<&cI~MF4SgR%
zs1Gh10y*NGsLIsdQ8%>d&CDN2jwR(OZn_Piy?}*<g}4$zShFgb7%r}A&_CY<zIH$u
zg#-77P%kf$NgX63U#m*ys8I0j0WA%P8bGIyBM}K?xvUD-U$(Lc#s8v&apG>~3<kYx
zaqL?t3^M4urwz5%s;90k3Bfs{Ri(xzAVUhl>t6wZ@B;K#$hv||?*_()2+}mU@kQyr
zfk-*#S|)!C#W4njJ8JQAZ!`LJDw@7S@;A<dHeHW?`pTO+iDC(0JhpBis|~;+u)5z}
z3*Z?ms2gRQlb0*=HwpT02_O{j9Y!I5-(Sue3)0W(Swd^Y2Rgps4oTy&bpe@d019$P
zY(|bUqdU+I;?!oi#=L+u7(5a{bDT4Qvv`K|9ZOnENpVm5oIIR$GAK(R$SgT#WIX_o
z(XPc4V`-@NECC7wcOx__s}Rqhp!@R-XtzJnnJ=);gfRjk3uH0^#gb#eaFJuF0{GTy
zWW<)X{ORXgpa)=$!iIVWkcx2MT%u~!-JBDMS0NxKXD(iVemRtLETag-5iWbA5i0tR
zGzWSRLus5r@s1`m!v@e@%yn>xI(ac5v7@ZrrR@MzZw6&~H!$<dp@0F|dRL*QS716K
zpSj1s0ban$R2{Y&`fm&%_+TymEQ}K_XQYp<VF>z`%I5^zBFOLEKw()5?&7Wpv!O_W
zGZ9H>>;sLY^#ZQeQ1H}?v+&M$F2dsR25dD9+#W!EY{{&sHho3nLdF6x=?)*nsv*d6
zHc-t4K>T6|HBNv;r;K0%Ux?AA3|A9~)w(C2oPx#I8?bY5Hxy0|LpZnfOgelYgL-8d
z?7mAjnu=-0pZAk7f+&06W-$U|7egUoWBtT;Swqxcjn;p2sSPK;bq;V*tk&HL!<A7e
zhGl5{RT;vSt;JV_i-&$0pw7ew6|`=`j0xl0FNEdi1qe4#cmeXwAfT2hHTtwM=%)F{
zs{xj$YVg*#rl45w<0F3$RLesu@Df2#D?#l?S=Y;6IY`goF76o!qtJ-@DAk0v;-B;_
zmjGgr<}w>7YydtPg!<J*M3>O9y`qgm4>7dIs{t;(wg{)ca|x<!7<LTqf{J<%pT28X
zT<V9$&kL~C(6_I^jl#NIL!cvtbZ|MtN!tmkZ?*EsZ8lJZf|0SLSBGSpl1eF0JGeLB
zADDXaGEBcXFU9@#{+&<`DmK!o;}_~rILXz)n_F)W3?42(w67+<817kmVEdZ(F&SIA
zsQ_UF3JCyejho>-#}K5ZAMsO-SM&ZUfv^$5+fPly{OffXADDpg{%!K@9Idk45f<MQ
zpPgyFQij$HY&pnqai5A5>i$DL1M7dNL(?J3FBGheK)nHhEB;-gYoOP5Db&`3H1>|3
zgXRpvME?%C3x_5?K``;0NH6ky&6mm$y$?p=@GmoI6zX>|(m_iGag52Ciwz)*z}OAc
z8?qy;rPp1Cph#{l)Gjx;v3Cv%^)k2gcf(L+SbSY0k<SLNnR{2Pcv>yM>I<A<np=y%
zFbW6mjiAx4Q;Zz<(DlS(D<HRxBHd--U`11BE^szx-s+jai?1!n0FQVdZ@fAoU(hg1
z(-Fvbh2yvN64TY>O{QR}47Hy}Fz`!T%LJ8O1!#S^4b5LPIsRpXSKRgbqf?Nt1fUR+
zg}&4RO#mj3G+>lBod4MzO#f(}i~DWzja>0+ma{aQPGJ*^+Fx&HBbxmGnF`e3fGvl?
zTe)X&unwWx2QXIo%n1+6E$^auHgMCu0IdR~{trlpIj){H`hX}2QwD4e;oIEUJM-L(
z+;vO5lQ%Bj!&e_{w0ML1p*?~<H|;@m3`5cB?N_-`cmcK?YTOoBZpIs3=ugd2H5{vg
zD4#e0T@;ETx&cWi1g!5<?T^WLK~B(EYH>?{3TkhQk4J8oFJ{L2-d(BUThK;}vZUam
z5HkUf+rL=c(n#T1?z&!hX%1TGZ6{CglC21Wd?bJX;RT3g;jF;O5CR~>X$?zN0n67B
zXfC#)Js-(YKgf-}0yAN&4vdFD+mieaGFZgTecoy9BCLGBkh@VR7<!U3^up^k80jB}
z(aM;eQ*m%r5as9winoJ>2heJ_7PY+$=BR%zW1!c;fH}(mjzWnCb}GdI(2GzkW2XH)
z$%Xok|E<9YC_~5?LA?YkKgo}sS1&ar@mm)Vw)O3h#23=c_C0ej#H}y_B?E-@MP0S7
z7!i|VU!S9^DsqLGc5&CBYt5rCw~(lQB&y6X0Z9Xwtxa|3xo7YiY&u+iV+r2=lW9Ir
zQGp3=zz$XhRontB7EaVMZezW@bT+FB=yt|KRx>v@7hfQ$m2{RIIdSj1>ByY<fm{Kp
z<YCUJRz4r-_w5}#iIh*YPqz*9w08WgFbbQm7lBn<(05>Sq`~w{voQ0sMH#Oct&VYt
zR#xqZH{M89O{$IR)%{GJoMn$7Q<d?h!BqxSth%UTMzpR3>`tK+2S!%UCO&T?IuDtf
z*K}&CvM~2VxL8aYm3D^mjo^)f@Lo=RcN*&F+c3uU{V+FR3)*`@BrO9!RAtTui;Rzt
zV=f<!5@JPskf<Iu78AOitYNc=C4uE>0h)T<y@HKFC-yQ@M8(vwFWucFzC&MoV`Q+=
zr^W38Hwu@(&o*onics|%PreIn?p})R<0$dzA`oNK5cNxy(_DS-toUl!(W6J9Tj-hr
z1Xl$?fGw;gKBhxOrWLtF!5SjuYy^Tgve6}#JATmn0f5)3>F4fG{}XEn7GHqOFn?+p
zPH^HE8x^^W*ihey@?L2FL8rzVyyF%|^2=@yKKLMX3tcn7p+kqH7jSm*%vn2o7FX_^
zqa*lJ{Jy-!fO3?)wQ~u-9|;-Jc40c1NgDbrW7C356<9vj{U>FGzCZoU3=FU#IZ4&W
zy%&~-gBM3JGsDcy_wK@b?-dG#NYDzpgsyD>^*!3?+~|dQ=Z#=zkTKuxvm=rzjJE(}
zodI}@mk}H}u1*+u+b}8v(LSqMs*PLU?$v3E`P(N8P#X)O|3K%lCG`EdA6<m`w^n7a
zYJ|IZV%8D#;30Ly9v@41H8ztqW}klh|GqaoJdC-0J9y8YJ@CV~o?dK+?Q?Nwx#@{l
zmsN*3mO;**01~k%UtpnzF~!f&8{OxW5fGVBFu|Q140!t=3f<j!nNQ%%rMFWAciwpN
z3@pFh<N>SgFjg6lH<Hpk$QU@93cQa>0~je`8C#$bE!JwakV}C47+`8@N>mxPv3(W^
zk_mCiT=6RN=+ToiB)($^7Dtmvj8?)PPE12TqX(O52Vg=xJM9GVeO)nq%;yc3T_l#y
z)!@V*p5Z~HGABOQ_vH~D+$q@pLRW=qoM^oQpU9b~i0f`f%{QvmYE&+lGkw-P3j^T6
zg9oF*!NIWBuAKnWcp~DpmQ6L7av%JvZhNk;ZAV2m9^tux>S=Xtqngy(_N*HKmwg;n
z0ILO9{)w&Q!mD#I`6pBHuAzM7SH-TOWP2S)w2;76q7LwIQBt7MuD`yrveGUVi}=VR
zk96JBnU=otxqxV9W+ptlc>39q?RWf!O+gQTy<b0g@qaE_TL>-g_C&Dx+#r^HnJ<PC
z_bL91$p|4yPaeS&#r|y~mQ`%Q=WuqZ1WU(SuvDLinOEnzt{;^yp4cUvj_5cu7-FV(
z4K)m0Tm)QLJ^xd#|D(yt$%tj}x4UKl(a8JnzaKe=wO=~_!e4)I;_xD`XBZMz3pjg;
z#;vpnHQr!w`PDisH0BBC6N5&B@zzL3DSd19*LR+XnNECcN#pC31I)=!umw8slSZLV
z^R{j^+Bh!5?39G=?y2EhCC->7xdyRX9+t+QRiB0}t?slPPP#?3I|BV$HvNi-Hd<de
z^~{gC{tv~Rjr!7iSCB~p@WP`LCr*SvedmSC58e73FZCCzpEmD`)9#=K6_63BZi_Ye
z;zJ4gzFcIG7*T<IT<%NAbQo6Ti@>wjz!WaZlFxY~0L`D|7-Amt8e+H~#|Fo}P2Qu@
z#r1qZu1J#Tk@ArINxWp^dn@QlC0G1&+oCc5$FI#EyUZm(TM%Eq#oguC!@6bwUEu!x
z`^6^w&B@slkKexI&QGUV%4X1$QEH^J>XbrI0ym_o>hVe{6p9H6U^B)sa{HAsJ*Jp7
z+-SW3;PIyEcufTNTL<XiqNM{%ac2{%g(W(W;1HJ~SMd;U!z@vwjKxUbrMjUAKRW#v
zU*V&_F*G#PK7amvl;zWLnU{od1`z9#n*Z~aufAAoul|hcT54BY12M&L(GBzN0m*5b
zXP>p7r$t4)$kyyx?_kcfm5z>!sPkJ*qn>4!__Pfc7r#a5v;&>`$Y|B&5vx9;8)*W1
z#wI52X}gN{^HOu+sb|l8=N)eSH@N-}vwk-)^Or+~8sNufy<V?9Gx^l-Mi?#u_;q?B
zj27UFNq__stb;<zGs!%zVcog>L^+7-l$~CVTUN?W>1&phg=TKv=TANJRX+Mx`}+FC
zz|7<V3dk%06bLm?|K3|qO`cnP@As)D)FV}00(S8I5ZuvVN4Y<u+`KeDq@>wAk+UM%
zXFPw@lFERpURV8+d|p{z%Q_O(`1<I!kG2KL$OrK2w|OtFod5mrz4b?veDv3OjHZR1
zI(rRZ8S`r^IJoY*>nii}^MlLF%VWRytKazF-1@%Ll!#z0WlX8fD%sA*G)#RpON=UO
znS*Okc;|_HB#&eC`4OO$vJyCX_ucZ+Ecj$U`5)m(Z7<JKYt?@CrRRR@pEjG#S)R1W
ze`4>s{3U=wPy~fqocOhYfq~^ec<rlSY-4LG#vE(|{5EI?CXEO5h_)W_J)6T^p+{6m
zshi@JQlI|ptZ@vC^<~Rtb|M5i407;yNjI6P9pd9d3`8f6;dqAeL+z+F^~~fSe~A-+
ziO<rma<$w>#LUDOupt#7M2)fq8eIIWzIN{C=l=A~KYCa&fo!yI;HhhKR1J)%QOA;E
zIqe#dkRH99n10l`>=BcUL_3=1llZGU=)aeT2|T6pQF*`#*P<!c=J)IE+!md;6}6`S
z^vx$9es=OZZ;p<Ru3Wr$u_<VgCHO;OV^3KKxK~h~ot+(6TwEOf^o^gp^?_Ue;WrAb
zxJMW64-!XgL;43Kk{eB1WE16iWhBaU{mQPOQ~eu(<ad=eR<>wYO#DB5_{DS2y~zjO
z!rr}mm-*a(n@|31X!HxX(ih;xcZ6{F?%feTM(@nMwbY85f3<J)#@kAT@@_XLtj})L
z&t?9@8vN*i%IxUwwIMNBR5Z=IX%DQEPVu>mbx2QpE+st?FtJc;Ex-Krm%sAxYnNU+
zEr`Ek$Bvc5hYyEe``XtwOnd=XyaR$z2MY@e5syGcA{Kpmesb}-cb@+49XswU4wi?G
z$gUx6ae`dbJ_2TZaNmoh)1UOiNjj^k!xIsXfu+i1=8`$O)<zp?VHzh!qL!|qsA;C#
zfyS`&$*clRiGUOhIe}{*aR|AaeQxp0e|z+~fBWc4IDfHHsVt0-kJpYIInq9Q^yn3T
z8S_e2fGLNIA@M77z0}ueG=})x^x!9N`0T+?-}r0)a<p&T=bS;3RRX43+ujH=9S4r<
zmQ(@SnuG2PPFy1$QtIoboHfd&<<;6NFaO!eZ$J9ub3d3A1|RR+W&W$*b=O_3%^<$6
zb~8a3l_k!h70v|x?RI;RAFH3(|Ct+p<-h}97#$Fdpj$~FTc-y*_OgB2G$bW&Vl*Ze
z4cNNqh;{F!;Nyg*8DUCaCHPF2#trc?f@pcAx%~2pnODB~<cY_B90b8K5A^Vv-j&*+
zLx<|ek00Mqz^4;zHUr4=xK&y>dGchL2PZ21{rv-68Vqo~)xUT6rfv5h`0Sm#20n0P
zurhR0rPz0K0KuSn*i0UgZt;yMmG?@82)&n9W-4t{o=-m4*~m*v3!`wl88xPt8%uAU
zS~&TaKYj1TH{QN<{4!6f@!txcg0JxyfBo#)v*Ih*5o{vyX&W{hv`Q%M+_|&F8KA<Y
zK_8#ltB4<nFZ&`2`$PUddi^JN?;Y5`hm)R-_V3s&=LY#~$#7+4r+gVT5N(d{DlM*#
zVDCi{6WgST8N--`NM14H5-?%~2+(C}kS|`SXLwl<xMjG_OG!(Q!8fDka-&^emOH)h
zIDIo*S*kVbOYg0onwpuPUgSg<L7b??fgi=M$zv<^)z#Go@4Lpt#6*ja`cU}!S>ycO
zKwb<$%MZB05^xSK^EMW^%UG&btAYUn&J5!F=>eZtC~)H@5cj}ZVDfpc`+!RiM#_`G
z@5|2MY0mxi(p7mfU6qUcobhwib*VJ-ocD}48iAs()lt`n^j!T!xpMUKwuW3aqKMz~
zwntn_wz;SextMQq_1WfqB*eV<g?tc+HGaB`O^3W0z|^Bc$OW#)0^ZQz^y$-r*umJ?
zSYd8XZq3QK7r}TuL2OAUua}mVU~q7dkun&67B~ZeI0_Kw6ll6Q$_=jLeT~GksGp(t
z`47G4OkP@tPNVs08}&0BXXH^*<VQV?F7Na9$aC?QwE*rt2}2I~NEb{H@;-^UUmtP<
z57neTKu&}=NW&I_Vgktwk3II7ted-!0ePF1iH#_Z9Xlq|c(~cYkE;>JJ4sAQJLQUX
zaC{^QDT6vq=c7kn9*Qz_I=!aA)8vR1&xx}6G^->!&ky`ul*hz<@vKhw>rnS)vpLht
za`}jt#Pq-EBLX>UB`=bG%9Zd}4Ro9>M8~_=zd(&ztt#YEk7O^+IZsJj^BGQ%z%Sqb
z(LCZ@UF-X%{M6Y>yf*0DgC6vt2R-OP4|>pp9`v9GJ?KFXt{(V*00$5D*9Eg%00000
LNkvXXu0mjfhkGn-

literal 0
HcmV?d00001

diff --git a/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-256 1.png b/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-256 1.png
new file mode 100644
index 0000000000000000000000000000000000000000..a4dc172e5086daf31fe842faf1872c1e925e745c
GIT binary patch
literal 31768
zcmcedWm6nY+lF^>ch|*(JHc5r5Zv7zZZx<quEE{i-Gc{rcY+h#-Ffr;iTA@yPghM%
z^>kg`SNCzA9igP~4Hbz92><|~%1BG7006*$k3awd+`mH4xy<5Uf#@i${R03%#{S<0
z1Y~3p{O^??D&NEaHB%(V|0*z+qVl2uKz%Ion=vc^;9V^vA*$v9Jnuqm(jRsr(0Wh#
z6#4k<g_ntola4I<qLl)pLrfj35gC!qYmhWaYd0)2q<zNQLdW-GmzS@FB%+?%ayNY-
zaj=0hHk)vKxJ8<^VMrxO3y(fx)&y})CIW{hfEs``(EWDL%&qU+(O%j9+WiUahTjFY
zj>Nh<Y#CHmR#aA;KEPV#1;7#gf8AKn2&{~72qcOOxrQKJJa?y96)uPu6vgwg(|a7R
z4eciJ2erqvld&J0j+yG%ZHu1bEG#FS1qK&XhpMjQ^(D(UU#;o%etfu09DC4wR8U$W
z?Nt%WV%FZI4zGjJOmP8%PL2^lW^(a(HL9WJHv6v~m)+~QKBu}{kv@vcSs&X$=oww!
zkGt!e4?JzRQv%{tkDK1L<BHaIS@ZNF?_(SvYb&-G(|?5Ye-PiKo|N8v5Sba&1l`Wo
zwb{2H`l;~ghMi{qmpd*iIM(f)8T_43xFYDJuv5@Y8~!n5{F-3SZQqb|hBJdM1KuXF
zH2Sa<)P^GmeI7&1;*FZuq{C6j!4^a!<hAg}|MNWtblJ9t#_Rp?pN8A#$GgNIwNQe-
z;>pR$f8BB4?fdFzXE!-=czSwzv04}_9O?aVrtCU3HAQ(??DOuoMZ)W7H1d4JSG?X}
zG_S^2^A$GzYZt9_t`k2y^P={$g-*|9#}2=_m;TnAm&7WBc$PJ(t;Fz(QDl!N8@?v<
z0CIW$2Q*3dyLs6liit!U<P9#5j~E!&VbimR0!4rO?*Ecpx%;*I`X^p7W;{mz$64$9
z&7{Dg+aJOCoDNTb_>s}0g{+0Z`2cJFqTG6kYhvZRL2LA{@;}a}7S%=$^`%;E(>Pny
zm%J1N0B*aa7mgErn>oOg14-tt)00+)f4JjX!%2DEQlDx+Fsr4>aZR}OxL=ic#rsdq
zC!Jzdj?Y_a&s}3|eB3p2y<2G2C|LW8o<`qx116yS{Ch>IrI)Fu`Hz>o<E4$-r{q*-
zm)iD?rnRrSh>kzTn+%8uzJ^qD+*l}4zErb~7v(KHgrtxsyLCGXyGCQN=?<EYaQaN#
zJeg`-`D2C!4f8mjBa1v9=RO_H&CNN!KAcMvLH}((%<&GbcrxGBP4fcwMzd;qsr)@R
zJgsJZe{{$=XZo`_lw$5Re&(h=&A@B8D}RGcx@|GhmE3-EW8X6O-Gi3VmXuT9%J&c&
zU-qfk?kB-520S@2%Il@>MfTfK?4Q>IRxNCk?Me87fdIBi+CDWTK&*eioZ=L@fGBgr
zYLD&V)~}bvY0rDb=_Y>v;S<PL-R2Fn2K&`Oh7BjDkCj#btlAVUq(55U%ra|oa{5ur
zNTHtf1nT7SKW~JW4vUf$YaOv(Nt~#2XcDR`WYB4oWQVkNP;dN3Igos4-R9^Ao;Yp@
zEBBnKDAHqBNQI;}{1~(=ecD}9KURhu_cp$1#&o`lh<rRaG+2OzvE5)aH!_4p+3bOA
z%GxX~t|*mHooid~4WBNra<&h-P4bVEEVV~c@s}{7*Ak~3@lI^?!-(-(a&spCeZ^{~
z-w!`u@F-R%SJ!FyIr0?YE5XgO+Ffq`289Ml=rp$gO8+e!J;+P?wrtArn=v=}fNtFY
zc4UZI0CY$>D8nxjQmsRP0^j3%bcWq?mw4H6n&~!6*Z|tj#_kD2oQE<!KQ3wo4nF9Q
zQ*N<;D0XdEPxn|4PhK`ew5HN79WDn`@-nR7$Rd=EGzI={u*^)7Z6($h$_$sM(%5s(
zT<po8%y@~O0epX#A`o(=DT#tY1R!J0Y%ggBEB{blZZ(OPL^WMz=M!ZplJ#|Ze!HbW
z+48-*+jIC_tlfH-XM{-trO{Hb;X*;wKRPbjEC05hOsro#E!rCR^^g0v&!2n<9#*bz
zRYMok)467~GML_YsmP03)*3qXHS$YsLYZc1E-!x9G};$icYQ2=!-CFy(6+kAhZ=m5
z4LlO0<F&#=^n01{ehn%me~I<?ILfB8@48<yHo?=o3EWQL0h;y3Y~(mi_+M@Re%Ppe
zi*-Ls`}Lw<Vj){24dWV|T+C^@n95kbp9s0NxSAsfO1EXzvG8eZRF_I#TjIpiI;0?t
zyybxl&_06_ZduaxeeHRh5P7R6cIb2hy+fzA8*Z!MDK<DXIc)j8y1%`svwB`^wz|l_
zmfr9254MF;sX3REss3QSl^o+~@kS6Y`$mU%5?mBpyv3!cga2Q?AkNr#5}Y-aX%m6(
zY|g%&Oi<BBiPGX~=TgA-#cLo;m_-}hY%JCXAecWzE|GB7kYF~o^Wx|4p<2J|1$_VG
zPl21v;ZbF2VW=fI7Ijfa86?2*-&P)V+vyykbN`oJ|MxfDRD&x~MiqyeT!fv8n_$e6
z5&Q_gLK_<p1ua0nmX>a+B3mo9!_rjP7~!0$(g|E|s#xL)w|8e0=9MWxDt@xBT5W1_
znbAIolBF1!9LgqE=tOiy>qI*QFOyl3`{xrRa`MV}jdg$vD%&0Wg*auR_N0K1gkv|b
zDLzX_^_2@H2_MyPdJkT~|GnemUGrn{E4lC8EN~{q#GKeL0V)sHG}x)EZ!>ngckLzS
z+5Z~%^6j*hY2!skfc>i}buIWJ9Kq|AwgxHOselp6q;HCl?v6~f9xHY{`TkRVUb{Lx
zzcXSYb_dbM+4vLcMG7PG`uMGr(gnrZST@=gy%X;=h>h&-&AO8<$|LeGcpC<)82*4G
zi~1GsHJmnOIGh?m73_%t{QJkTV~7r}DpKK@hw}h`#4PKYw-_3om)S*bN|WW!+g^D0
zOskg0RI1I_F~Cd5@v|)J)BeV*-@U_12T$CNdG=CM+;0!OKlLnYdrJVMhTFGZWXRIL
zM5Xikc<J!@8{PK~--5qU1fQtPH=e5I2IA?^cXDzYj9ZgOI=-a@OKT^}G+gO)AkE2r
zA9%545%X1@?54dTH6wZnq?RlD4(RHWeUp&~EfV2mV2*V#V<FuppsIf{ZHQ5$QhsM#
zs8%v>T1uJxNhJQCp^aHw%yf~CPM{%j*Ur%T=jb=Tr}GZ?%ZtRIt4;4qQUjYyJQt(R
ztLXGx!S{i;;aKv24cuy&*lL+zO)F9u55INz^F3=$OGkmzv5!gAZUQl0b2!UW*_yN=
zXGUwx2i8~#2d_Mi?RuG-!TqRkKhT9l%ZgJRWjPT{KWUAGaGb-3L(p=fkxV|<otfb$
zMj5Q+N@w{I>=c9-TAZJmm%o2b&6UVi)kK+GZ<Dv4@%(J)*?*ZP;D7mG2(`urXk~=V
zO7Qs;JSwcW!S=#JqelPd=cOE$_PN!4@cA6F`tWC3U7Btei$rTT6bw%0L~AsU=Db$i
zH=<vpKp?c|G_F}NP)8|T?@Ra|=*eY>12Q3uKX=Lv#EIS;gEgrdeqy1wrb`u!Kbyec
z?#8`=>nxIWKG9Z$1>z{N;Y5rIhN3xHNT1+&&ii?hNahpgTBFh|VPA(;pr;`m^2<ZJ
zJSKbqCR{E3FN8$yjBkT_9`Qe$^;aNVK)@k;H;9(3^}*5D#45U1*SB-nfs}5;-~aWp
z)4YWH_6O!DcY9-}g6p^9#N-nK)HwmtEa||bW)O$WGBszZZ=7|3Vmhf2&$Q})RKB72
z_Yp~69g?Z2mB%Tw>1H@ic*WmOTGf%?r>L`l+)45RU+{4C9c|4Uo>?bFlUvyJUbjA0
zhyCyHYkwVN01<&+zm8PcVArmg0yRTOI`>E~t;l&D^KK2hFBSa)(nJcl@1wuP<ro+i
zP8<IHivu7iHx3$MRW+NWl|i;6IA25$K7V2zs=?P@PrI5J5dBH5%U2`FBeFZfCo~w>
zl*Vz$j2FZ?g%C~f)3gR%f;{jF#UHUBI=qtzDp6cmlW#rsur-uBIo-&{vBs%7SK>{R
zxtvGdKy%$?%VYR{YyP9Z_U!?>%K%;EE+73@BTCg0@-!`h2VQ`kpUP`>f9<WV$ko=z
z+SIS&t?JlJ{KOpTEu%*K8!>quCu9Ubq_S7TRvlkbIF?us(1lAsJ%XX5U5^!vs!S!;
z6<2xbZI@on6bbkaNIAt+P$6JW14>P+uCW}Vb=1{0^UdA&(guFLO!YDdDs9?SwWafa
zGW0zl?^$MX>y&(|vVk()fD<I&J{Pw5_<Zl!bUmH>6XOo&sRQzn<m%);_=Lu&WcV*0
zU_%l_vlFz->H@_jev)cA77Z|AR5;+)*43i^5{zjo&J5E!_C^s)e^(q@W}e;#cj9+a
zV|-1B=Vz{vm^2AAC0uYVj^Rnv%ZgP>z18-~L7~<MJo#BaRJeZ(w0=w=+yDBT!qYWv
zQlXIH^k2Rqg5Di3z{JPxX_m3mMQQl+Mx^UJ1K-el9i9oAxjqq2?z>Vv7fzwcMEqlZ
z%xv9NlIkwQAWT_~(<fUsr+HvIkCr4;J-ylAdB5b-L9xpA&XY`;)1TgJEVzTbjqNVB
z;rtv_+4-|oA>95TsX*?(bCqwNfBy@XU<?11xRy)5m^&?E@vblin<zPN%k|ahcyPJ<
z2HOAcFA-<dkY;T3nat{vHL|iqR_vg9BuKTr-Hg$(x`7zP^~H7b>22MA3Vd3!EvWin
z!r+Zv<RIxS#z$zaFLwsmbcFB45)YYzraon&wHs*v4wc^|0Esk`<AMUE@ukIlyUQI+
z#!&c`%run6M~wO<YWW#mZqAjdHA*`cIc}H2j$BOWMVOu(CK2;p<~6uLqXiKu`ec|;
zMJe^LJH&HzWWwxSdYX`p+bdPR=UBkuBPN$F>}%KT3AzI5>BFYRPC*7htZ!CXzdAWN
z`Jv<Tt#>Q?bmDW5VexNZ7MP2&p4wBi>LOu>J=@Aq97MgBi_B-2<%BmL=G;ymc{KQ-
zy<!u{o1@H*;taDG$$!U{x_`*r3!&Tczq89oGyehuJ>mx;3Bzkpd7vi`!S{0ZjeIt-
z?>dp$fzAt2C%Q*~((L}PPhbRvv6(KVz8sLsZM+5#p_I=rgS+FY{uVeLW*m>eW#&UT
z)B8h7674$)U_vY7v-#`iCMlg_&e_>UN9*}%O=2E)P*e-@ww>P<k7AVIhsad{`TBjs
zzXgwcRAMoxO8$nHtPUH&xt+W7;cDN}Pt=C8)+YxmtwVkebv$&z%8ZiJskpc(C1Ue(
z#^K5~;BgF|A7Srr1)u00JxiUF94#qhD<OPej1(ajG9n8ObrP{DaF^G7bLQ{}5Pm=Y
zBIvi!S)`ZHGO8ZwW)?pq!Jk@SZJI!F9HKp|5T3==ZpSBbp)sN>>g=Te<na0OZ0hpa
zI?ddb-!c2r6{g_pbl-L9Tf4s3bJ_iRWpaU{#}yH@%Z8rdp(yfxH{t(;+;f}qoIdD}
ziJ=cR8FM*yYet;IUkoniSM3Yd(p+qP+r-i6P-3aIp~gWpMChzDz$K8pXQ!!2nyJGQ
z6W_55GmF;tM?$Fz#sPcQ?WUWwiRJS8Yv>ybFR&#FTbL>k@N>9Pf`k%dD;(zQOP)da
zQtkHFbJ+X7+lP!@KRyq_E}vz4nk=Vta<~y+@VzyS{wVauLxpJipFY`sF39>nEInWJ
z^*|&RW6~Z@u#m)f^3L=O83*3ogYQC!3N2(hX%0@2D(SYo$HXBz%|c~moi+r}%>`T-
z=>(cuyS;oBVjVWopn41k@HV6Iip!v=|MdAc<G?&3jDhJR)PW4F)`^i4G<O;o2@Y82
z!U9$hQJ>tE(HhI%cNhLlTxDJ>EZPY(+I`CEyc1I%#ke_|;CjyYfiYm75kH)d?EJ#A
zqTp3+&+V4b7KeYP*tWIqpW9h!10wwBb;R&umxq_v+P=8+tqt$<ZvW9n>P1aMa7yTQ
za?0-3RiklQ^c9miPR$>g4P%`5LFRrU6KO<>nV3sxx(w<bTX^whhQUTb1#T&AVPm5%
zoxwvn9d*unmdU5%G$j<oJpyWI;#m=lKe#V}$}2=WZwaQOY78t6SW-B=@hIT{NelJ&
zg#?rzHIIwb@1wnsv9E+h3}RhgUtap&@uGrLz|L}jR{h5l_my0<kluOrhvP6?T15O}
z3cvnogO-|}kCzLPrzQUz=Gyj;hiUP0x&%7vLOhLK2H6)lqSWhTym6YJG&Z^#njnpZ
z0T5E@^?QZir1CKUv^>qn=P%Do4lh<`Ns4)3mwbv4gcsN+0Y?bydb?8*eSUau(<yP^
zsb<I@gB1|&((ED82{wT)cDNg)uQ(Mimrtj`gc~eZ!1C2?V1-vCf2tI@r}IDUUlP3N
z+NWV9eEJGc=U22M$qsxwL2v!zEbDD&S8YnO+2ew4W-o^gQb4Fw{mbKu3&M}Wir0vy
zOdeT5IjISLwP_ap1SBpFiQt}M!{M@?^U?asynZp%*r71e(Wuz%#(YpX^@ALV^K%om
z!ia0w0l}g4@IGJUp-hCyWW|m|B~v^wENM~L?ehx;A};dV#FZ=*%hn$S=5s(L#Oy<z
zmB#tA+|)@4R?&alA>m&Ku;}U3;?fXdI$F1`ur>8v)rak41#<4z1KF(8o_IjWu#B^7
zm6es}<ld{(!u~Cd=V29=!szdNgfWOL@5tH-ToXh|5{Q9MX=GfSCi-1vm*&uLE{1$P
z%aDLcqVjO*-Q8}Af(zmSb%I7rqLAPfLm{$_`3eC1shS5*B>m~tXqyT$qm2(H3iOVt
z(?@i#yiuIm5slrFa3Z4q#7*h}rrD?bnIf%La__R;c=xEKjT}I7JTKfZeRf`|bn!js
z(tGpK@E^3gco0VyTEJ6OC8$(OYeR=1WE2rLFP`47wVr#e#<HzyAmGf!*%dGFDAaHa
zPK+QDzF2O`1ZA2sDRjiVVMQ5OeYF%^;P!w@t)#A4jCd^>4}H;A4++oZWx)xK`UORE
z60dj{KtU!l;!@^BN=TOIwWpINEVDlU9$U=ET>RPO`*W)aUE^rt(fIynSl%OZDfq-t
zg_^i91S6IYw@O7?)Jadv5x?kA{z=%#hn8VvOkkLGzv)_^^(`P5M5b+Efc9Hdsj~ei
zJZ%MV=43jkuEpP$ji3p?JxXLh_aKqtzecsy1UZIU#(>{3bcStpk)XU9Y*SBcBJQ{^
zGZpY&zVp<8QLQ`I41Avs+O-Qp;}R6Q(+JPVCut4JY9i!$$+dBr>~@jkw43gm9(EQe
z?5uB-q1;#-WH=Y>&zx*+7N|>`js%Xn<X)_Tu&2wp1^v97pRzb_T4${?5~#>1>6+s(
zGOQ(#QYc}i-Bl5MUUZkk@yThXH@54Mp`p;*ROO7r+?BqxppYRQZKSuBT=h?Z4!84+
zXG_-4L0{I*_qkUbQVUWB-cW*XaNyhsbfunw)HDU*A0dNgDL0v7@08@K3iX`<vnSsu
z%c85$^SUnq_fLt`A>8k<%&C#CMdtIu5%nR%I^^a3wkb73CQ?ntd7p3db=L`r?HxkU
zU9tZKt^SovvzuP~&knNo%+$=uV2ATxiU=j$0TTt8@Ar_@VFlrr<<DdIk3;m!)ke!b
zlL6T1dkK(S3?t<sIq8ittM$FZ`KHqzv5CE?P@pnLH6+adLk7X~ub>nX>SCs2^G601
z+s_%{Ho!R@Uhhvw*@BGv1Y0~GwTzzCCbUUNIcNw9bs15B8E5_(wXeUK1}4xMBf$I(
zTLX|aTG`qSd9c+q{!8Vy;0w)34YPkd!5b*0wOwMz$Tk@A#q>i>A>#>9Y-IUH^Gv|1
z>Cka;FW`Rh7VetT`u1-$%A~ENLpnoX7csJ5dVDQj-G>J^uuJG@7hJH?4618wX0HdP
z&{4m9Ll-G*k+#c{>NO=jLSJK>&v264;Mg=tj4G9pR%SD8x@Ck?eM}HBGl1zy3?ur~
zQ@FxAO#Xr05ddm-E{~~v=8|J%uO(7+M>wtJN9xD?v7!S`^GxJM75)RqX(ie!w0Anp
z#)Id_Aw<Ja{*{1?IGvF;Ri2(qyeG&<c8}+7MYIL`%GRi*<NUsVC+^Lh`2{LvJTOr}
z+9kmOI^i8%BiZR;BAwHcKU?F_l>-YIi!z=uD$>MsA8EHNhh2poDG-J>usI5bk$BwE
z;+>1>aZ1}kK;W`qF#A6@1lsurORe7TQ4ttJzsJwtsUk&?kXvT#M6q~_&Ld>D2V1=C
zCHPN%74TFwI-Ebyed7>;Mg)5NMXGwx%v;_nGZ<iR_m;X}HjRW0c1RRBzS1@D44jFC
zlBzmtaEmsL>T%Y^gxb)&6Kv`Lp08H{j^a3HZes!v`y{d<E;ar=Tkb@?J+F*C*IudN
z22fGOMC_E~Y|DkYkP1i|jT3oF{!>0;PALf8(Mz)T#RzrEE-R$@gUGj*OsTAyM4~S+
z&i^lC`LV2wT$A#D9OAN5y%KTX4lS>=OgmFcfnj#y*so?_&ggOci?9HCF<ZY6r(w9N
zo`>)s4|cfiUsqo21uU;yka$VLpYh+}pY`F?rae)-_7QL4-U!131lcp<-8J^(O+^iH
zZJ?2Laya)6M^8KjKAtPLKJ3gCbjcyPIisVH2t+``=RY=Ni$=mbqobo^!#ru|ON=0Z
z15Er2RXBqJ#HYWF^ggBze%gg?04qHlxIo?aSPK0K+5uOYm1)>DOJt5L;dqp{Xpj;p
z5AE@?1z2g;Xi4FdqSA7-=jN7W*R$<bL^I*Nn%_R=ww}H59QMDVaI%v0ZZG}Nu&U9}
z!M2rQ=tcO3O7V-^(}eP=2`~<eh}|M{iXldgBppngYVSpw4ty9QjgQ6F8=&2Vx$6NB
zgDy9_)=BuBi|o!<GQnk%Xm@>7EOH-Nm)f6l7+K#(335fcFy5(QH`L98!!zr;(Zz{E
z=~OU43%3CgVz9Pra_eqA9R5=PV>#yvEYZzYUGFaZoKQ<BPS(r*m(d?f7yqqh2k^CW
z#<^}SYy;-kGXhFS;Uakx#3&C<qf5d(c;?ePB4B#|i2>^kQ((WZfN!lX#%%1Ukh9PC
z*GJ-;4^|eImiF`2^)K1*O!%Mei?FhZiC9V_<0r=i^|wqx)WRXGaJXIvpF6lBAfliO
z!tq6r%!_49`w0RuKOXz0MhvYq^HrpE*E2Dzz{5W(V|;*0T(_p*LZpTssu46c)St~0
zXRXP<6)q*`D=c|f{Kjp5+^}&XIBhe1QQdog@9OS_SAlLCjZTY&DBZJQeLBFVk&yo5
zaH{z%&ps&%x>j0DSi}9dA5M;rhTSufskY3Ib{_rDg~0MYyJC65p`U$He{n<8XF2P=
zE>2Zl5fCbWP$+&x@rxod;teeNS4IBt6*aordh1<%g|2*wMV4De3i!HuF8Tuv?zcas
zmkog>$4i_w`heD5a}i@w+k@+O{s}er0Bj~J2?MfK{pgF$kdP3(YqVD&EiASt;fOy1
z<_J)_V&d)1rxNv?m{32Wp@x+8H@)eb7<INB)5v`j+wbzm&4BaPa(v)LTr8L!F0hau
zOTmTuC!Xh#6TzV<5`usJMUWvZhRP#_lm?(sqMeN=xRzq%2lihS7EjAu*O#BfI>>I^
zzLSyblE>(;#>T+#sjp;VB%LyN*ciOw|1n&eE-kQPq}p|n(ElmKqqLjvo?V)!-TZYu
zM11_z8G#rO(S-~Kf~f%t+5F}oo3j2lo^ia!Y@h^vF_UA=0W%K$a;9AEilIVgPYyZ;
zFdY(HLJ&<ze|X-MDJr5WV>~4K5$ZMwr_B$I!!vM{)yy^6AgVh9#Aed-IxPTCT<iG~
znB}V*GUb{Q+HFUu;mtTR?5Fo3%E4XMJ$`Nu_<1;2U;^^*Ps&#S$*!f4^xD0j<A3ho
zT8YvyZ^sB+KxBRo_yBMQz`s$}TOAxW>h_+e=cV_G85JNoDDrWK(8_gsTF<hmeXVGi
z_d6|v#_7Ah3!%Z{%1K|3*ZEY&8OrJhPpJtxffTjTjrZQH0>*ZqqXM>;2E4&Lt!wAM
zH;Gn+puV^Z;)n7jxuqmDHTVSk*sB1UV8Y*j9iQkrnEcG}uvQ5Rj@AbleKZ87pH4B;
zq$9!F)--CC8>yY<QPq}aJqtCWy!-uXEDo~QXUS(}FGn78qc31#*^n<BnOb=CQPc6i
zlsP>ok8O0QWb<DChgGM4*{QElu&0OFGeRPNqo#2%d(&AlC0h}VR?A>oJER`eP;x@D
zjx7F72=m5J2;>l5m$Qf5TLTsscR|G={k)kwCm7MNJPgI6izRSLfefXEDri$tfVyNt
z<FBz4Wc&E*T1LoV)fnwTksHOWL>B*KAE)$5fo<hU?Uo<R%d`mcG~9%bH+qUdsquy+
z5*jxe2U)eDa9ZeUJ%O((lRNuCPy<{DyDsklH|!c>o<5gzndbe@iU=78CN@OjGoKWq
z-{JOaszu%%-n4BFtPB$^?Mfb~sYaeix;Q%)VYU!VVa3e@r)T^#IET#WhaP|JzM&)S
zXfg?iw^Qw-Z#hLQhu^g+Pm-7I+F<Rqf#q=m`9G77pUc8Rn3h`_0d8<D?;&JWQ(3M%
zeJ~?HArnmyFM{$bQ{PH3?jX=AUXC3>#<W0@V$OSj9T{D|cJaPt>g@LH!4spksrn*z
zk(^O9Rv*Ad^+F0r&~ihbfx!mbd$UoEfB7=Si4!2`8HY~Gz<eSjCSNv>YDE`OItuf(
z{R6H*`6-Uk?;|7^=9`J6B~+D|3hGE(q$6_#Vcyax!=Y)LEh&$^|CrsQ&^lNEnzL5F
zaXn~!iX@I#SqFPak`C-awm^=V@N-dCT*i$2Mexloyn^ibZO;e8t^TMR)^3<{KwS7#
zqMMx89;kF?JZue+VJXzCQpROabO!yA=Up$Hl<R_F(;<Mn^wIOB_@{@tMxK>z#Jk{z
zEuhF`RSoi8-wOByG<O(!FTlR&G0b$2VSUJI8W*>onZP7z_bU-ZVU=+ttsMfHOa-dm
zq8TYjid#S-Hmw^QqEU3^ZyK2j-d}kLUe_TJL}RCmXl+WpM7TK0&8O?w(>o1-JxyGy
ztym>1>7lH(hk&om=oCNRA}lLd;^{W_rodAX@3sEx^haI$IC_z6w?2$&hD|oD@TnUe
zcs6G_>9QCLcM_)IZRE-|)IVVD7fD_V&lPe6kDILhi#}G2{~Q9p)MbCYy}d2jusMVR
zoZ6oZMLcu13?|{=-RF;<ZCXPwK|(h}r}rokoVi87k%djtfvoEB{3cnovOO5J)-<P}
zI|KR4tJ?lzlqSx)mIf%|ur)U5A2asNpw6%hP+0}WR$ozSRVcguI;v7w?j5qEItgIF
zhqhZ;+V`l*!d}n7Ek6EEEc^Agx`^_F@V5jE02b&Jrm3M3Ei%O+_!Ww8Hr|Y*5hU;-
zTT$@eS5Z%Mlw6AplaJaVw&;^Wa?=3=!FxJhDQLE9v0h`Jz>9?}7lnO2n7QO&#Bzib
z#*{-${>oUj582+$Eu@L=0`$t#sQ~Sk96V0q2`RTaEH#dV#fN;k5H1dIPb=;<HfdCr
zVz(Gc2;vKW8Lp0?rG-T=KlEt%6j7fRy_ZP<c-hduOIt^WW4*<$^5_diw$0IKBp#l#
zyjHn+NO&aBc?awCU*j~xAQ&?dU}XZE&N90hwYH4#QvO^B)&SJy+-E_-vzc$S?di_t
zs$I9Wu^-eLVVbuu`jW-;>iKWizw_Hpp`CHIrE*~b7VP&qM)5hEJt?yV!P9;P(mL&J
zpC7NS`<w!XX#=Od7i&m>$By=Pn{3EADHY<hwNTkSxc2+aCiQc}i%pI^v0BbvB+j4I
zurP6gzi%iLUs71AJa1VCV=Z1Aa<ng%U>?!@{4WPlF9`9~1fWmr9%gx(;Fyn^I=Ckc
zge^^QNMqVHh**Z&k24H%sLnkPr?P8$i~0hFYk*hmL)9ANu79inQUbug6+0kqTjhP(
z@na@FqZ@(0d97TwX<Pft#y$L%Y(24bOtyE$7UHCLh8}J<>AC;r5dK$<jd+tP&t$jn
zHrEhtYgem&pZf2oEuGL+z?mKXM2}kYDk1<p{n>i!^o%xO2?m4;j_i?6Dv5(rqFqyl
z6p)A=WD<#tv5-lxcVq#vhD*8pXNdpC?MNhZb4!p08U$lAG23SQzOO6vJ4V-D3un%B
zVH#f*;A(lc!LIY)E@5h##Nx!ucts4!ve_trrueVpVqwz|q6KhPtO5VBnftql?@Lpn
z$p^&~^e4EQbBD3z4Vk^?4=b+uZWn9q2_RM?xZQWv8YoPk0_pL93GcHH4yXELDuVZq
z9|nI<B9ys)yK0U|*Fp&Qbe!s5Chht!E!X&hC#qjpvr5DWw}SOa%`_%JUIs`AO>xA5
zsstULn}i+Wi>ntlrFhV|^&Nh<JcAWhh>D;U4ueHBfoRrVAZNXl#?a<ad<A<^6#K$3
zT#!N#XoA+8+C#zxxJ-DMSaz<k9(=pLwS4Q`T}Jwa<R7=2C)ORgf9tG@goEH3wRtr)
zHbr)4=;~9E_0RV3qn4m|tQ=QMU^R4gN-u<pf|tf1C@_Rzvq^2CYN{PatOMyv>4&-`
zikSN8Oy)G%(NFLtJi%!O&x0LU0HgD~WqqRFbXYoYmhrd2m34ty@PYqA$4USgd$9MD
zsx`-Cy<J#)nkx`#Fr?46vF{BLN(C|CB;T1U-HR?o#vRSf&+zk#)|N`w`%@+IEEDEG
zZ(nkakB$wpKWrKZ6HwHm_Go=>E`fAB)dN#<``%M@!QqB=CPLfYZr|R-b$b78;83z_
z`4H1ac((uiAtB>)rY>Wxb~}|KJUPm%<4VPSNMrvl-OXBNmX{P&4i~~n!cJn&XRI~J
z2s?(1Nv~C}BvK@%iEI)W;QD}S;CIqs=?BHB+Qr+nq!YSX2M>ZVb&a2C>}Aw7@QX)>
zqCkE8>m^2reOPL@_EihDxA<bSi<lPHbQy584aGm2#4j(_`wU2mJmzFr<S;giXQfj5
zWQy|Man6^c?{@*#5>y3hR5tj3K@2M|h3laZIk;bLK8<v=vav*fK0MP6U_|`$uzk8@
zd{2erUvJe4AE7T7BTH2xT2jvgI^Gh$dZVH~sKG+nSNws$RIcD}T6$D(3vlo~)s5$d
z0DjXS`u{a-iR7f*H~gr2k+})x;z6CenTIcWDMUd?xT7dzk;;o;&(x9~NS;XCJhhxc
zY?)kptrc(^WET-XY(E*Kc$w-o;#E8G#_5B7yU{aBIG|%$c_Eud87lhA0Kx{nt6cFK
zQPx5y7OB|Cwh+;5nqTQE7}G=@DNA)P7q<mh8C-bVwvY=<v9N$n<3K}U7-B*1K|bS3
z2(sFSjuz=knlsA9c3`7ypZ8^L%Pkf38nd)_4I~#GHvwp9emxuz0+bprc21uQiv%I$
zPj2rm=pB&c^KiCWhbkS#7)?mK15*Sfs0D%sfiQFkwHliWf47uUJ|*=HkubXq6mMD*
zAO0J!%eRJu+00QDpg<}t6PhcwNzljLRESOq#z=o~ZnK3Btrd2XTzS<Ec-ExM67a;E
zI6g6@I(N$?m=|K4bw+%WgJ4*}l&6K~G8Y;;LnR>gin=C3BCG%(QQ$h$;TQsVz$QHu
zN6LF2C}p^gsNzsJExH_1S4Lf{El!P>K4dPZLqe%M>Voq*xC;lzsuc>XenY)w)bgO}
zTbtl}oE-W9=b8y3sbXy{YOo*#*aZ(-sQC^E6alr2_nhT+jb<W3ps=qz0T)D_<w;dn
z^wI^BfLgY|fwAQP@yVhcr=61R=&749mj#J$+gZM<WJ?0{dD#Iw_Wa`8Bq(scU%C|Y
zLGJQ{uZ}<0nu4-~j*OI?p%nUH(!#OhYGu46G4?IFkf`s!s|!;Z_m9=mBLwY#aA4rs
zhg=B&m>*T*8h(OzhVvA@{aj|6oED#<=0DPS->FX3G~43cIij2Hn`O-o_`Atbg{A=*
z9R3DWdNuBgNgnPP)z<ZB&)aV<AOKWK3WU8Z3;;*)@yc$w5I$iTbk#3HKxNPbN$9Y!
z=%ozZD3SD|a#g=_SR4=L>Qo5BPJ~=gN<^hZq?R!XrWmXX>|lGiP+RYf9fy^B^`|!j
zvQs%wmNC)ZkLh{u^GeOD4onFanDP2BOCwW+iaPX9T=L^6SRnriH;Vf*$N^3S%NePg
zzXXn!igB}DF?N30cJ(7wGakqhei~$~F<~j?#R<MG_pqD2X@aT@ixjSa$5=vlgE%9m
zgfU5&$1|1msZDhY#OSvecTMAXS;9MR$Hw&En4W?S33JZ>3;K1@`FT>83tg`i@^^%@
zMQ@q$8FU7P#(r=llFM=!IbpS_LU&G7x})W&p*a#X8UT(-*?R~L0%s*e1jL&~KKF^?
z!HCMK21*$J;HUU>%kEXi2+zv_hD*|5e6eVAFP+kI^?!O9nm2^E9rnH&fLG`<TJ6e3
zzs>es5$=G-vux>?^M+e<5hMtg3<EQJVw51r3K$UJ59Sd@F*~lo#=W!SM!Po9I#Ux!
zLamXDU$Q}mtNv2LcD~PyraSw&P+V!#my-5CoMwVM55EZTh{l4yC{Ic(*9akMJ&|NW
zzAI;XvCTuVB-0Km6)}I@{)*9Fg1p|29CO_|$utwuwHR+x{+)b*8?!Y4)URKW)meRd
zOHkZ3<Zj`C50L}RRn+r4!4Dx~Bf^##u3PJAYv*Ij?}<xR(Yt@o7C}v7QyQL4mg2&3
zC1^w8-4azgrSMat0oQ#sRk&uuETYZ$bZaK>-lg9D!FmoYkSovo@yndI<+IAQvt{FZ
z&y0SsCnMnCyjFk;H2ACGUB1MZa!6hz7qBzPm}Fwm$#&SN!@TBeTxny`Iibi_uVh5t
zwQq-~q4}3#GLFR<vEi-J=cphn-ZkkY$G%LVKt3%iqKa!^fp0LT*be&iOkj!4A2qJ=
zvOG2?bT3yQZK7vo15Lmp)#!K4#|;GI{%5dDZ0#7}J2t`z*p+#!J=znn|H=ZR0i9Q-
zGmOzRXn+*$P0Qwn+MBCuR@3#M)BW?HEFl5bI8TEwzV?ZMR)Vx5ILT!&ODFvDGYsch
zMak)mPr-5YLf8M59)I(nFLwV-;6I`FQ@WQKo96692Pcc!>K7MU@OlaTEn}gsHgV+Z
z0F%+nqfWJ#kU_Tr$|4WQ8qkz>@lSWHu$$27rG^TC2-s5m2i<^!Fohm>s_K#ik=yfu
zg^%=EZXY;Eg}&5uQAaiVBJ|R!zt2gpY{S*9H<h^z7+qav^T*6;ynLtIU}gWz`tiWL
z+LJwXdYMK}=?8u5%z#7`YIZ1D6lP0KoHqyHXk50L3s@)jx%$m;S=@~bhJwgJ3ctm+
zd-H|YHSajcj`_XlebL#`&YlH@AW<*RBVg*@jDxSIi}NslIoshFnhCwL_Pn8AwJGtw
zk1z;+d|=NQlC{l4T3zw7#uPU{>}^YsdT$Uc-%`@_0*g-Xiknrs+nvjYbGxy8M*T^W
zU;w~$Kohc&JB(NuE`t^nts=XzIgYb>?XwdSZ^ln;3ZVw;7a%i*Mh{UGmZlDV-v!1M
zZpt;WqCYdq>lvTN+}c|maA`Bw)$Joe|9<p%nLT@3XO58zj$YSpCsU4OpjuSE6lIWx
zMD?Xc4gjSiCsWz|&wf0tFqMb_s7YL~_UE<Y**swcpS?-{EmtD2VGTy>rA;yeOfz{w
zm0+^1#0KSmlBj!7N64lA_O!k1+4OpG&<TPYbKJ+a+zxhqm^JwL&t2E-WA$Vk9UEHK
z2N7pV5YV!?(MrUDmH5|L2%mym4|AW4c^IbdK`x3u9njepgZ)3C<^=dBWH@KL4-*-;
zs{p_B)6@Kyi>ERlRq(Cz$~K|B6qg#a;1kt_CWZcGRdm83+gH}t3Iu<1NB-2m=lGJZ
zet1l0x%F^?w!+@H-}18dAcB223Uhh<lIU?Iz6oLiPiN1NRebwGHfXDmy2fb@P^ADM
zNb8UaxJ|^D{?L)8A_Z`uGdN1%!1U+Fu_fhqp@^l8X+{+~VrbVli78+jsj<s{ce&7s
zMbB+SdkRhbS=s|PGXFU#!fzw=E)><C;`%VjXSr3-{J3%4lOBq@N59J-B8FD@lv6=1
zh~N_0z+V%@nE1m+BEunU3)aYAM}68O8-k4tL)kp|oh>z#ujiE%_<5}W&0sHiEih*2
z)Xt#vq&P#PM&%skR>2H#_mBtym0GvFp*4_Tj|-ii752c5)%kdkqWK=F;jS*9XykmP
zlh#dEeCl^>DMC?Pm6iT>bo^<gGZ9$7>x{Sc>qfUVQ1gC@35kY9e}hu|$qsy{d!hi{
zwl5wq3J*Ss+-m7PF;g{YVK=q9)A}J6H(V23uD93KFDa$#X{Mft#|`&9xM&Thk<0Kq
z-QF|9=FgTJ+7+zkLAtMQ0Jfn)XleDNB2c|?Y4vd7;}^~2JvkN}?5BYASS7M9{&;No
zHe+zHX#(qiRAXGfqJo$NvxIx;pgggSs>atO&|oMI=*lHoq1AKbB8)cYupVOFSMuLr
zgVk0eppyDcwMA|flE;`&8;x{q_yZ<MRi(2C`(`Tstv#0e;}vrdClncl2JcwyI>uIC
z*&A5(zX(o#QOBB-fiwu^&JhO|iN*jCDzl|k!@7wMVXB54_rrI10R)_)ig#d)aJ;Ub
z>bTzt93_ij%yjNpbHN95s1t6(m-vT+PHxCW-NQ!><7@8}Kge=x&)=Cu!Owd2{O|N^
zg@d}^+FtIZ+5~yFufaG6NbE<*R1V)APmpu>ekERrk4Y)Ol;odVZ3q>c4<lF(u+!En
zMjJN49LBw5k=O#I(9x0$Eg=1Fr)ADGzh}rh%lm4R=lnn#v{=fcheO`gqZlKMbg^nt
zTm2n%Y#Y7A6jdm>ZzgKFBx`E1=uT$@bHM<64}ef&fe(q3j)w?ieDUB|<=f@_m1`_7
z%@*$69hU=X_X%%jfgJMy`U2ubjdZ>(B5PEaekOTW)&`J#$$G4RBoF)9=-YAQPSStr
znDcQy_23ZS$a=p<!+2@crTPQ|c~=<?QlZ$8GY|<IMHWdBR7#ZtWc8*jNw@)z=C=*(
zoE)HUfaDhvYL6Dg<WX5U7<zvzYs?}AyS)gq0<H-{v=kfwc{zFXL02pTGXkmh0?1c^
z>Q;Fu|B}JY^}Kz^WqX~$<#BGlqOW^bso@Nav!i+uy`(=vVBfzVNkI<^Vx3EOp~z6H
z#y=0v?lEdNaWR61Lxtd}%mKn%i>%qu(pJX7!dPr>De$rE1<Va?pV(-kz7o!Fia`h%
zc&u7Uw+eI^>{`RzO<Nu{#vhoDca4@i)~8pMY5qg!m#mZLE($#{<!L>EcY9@8TGb)*
zXiCZjnCxGqR<1cRxb;l0yvkqtk~Ot)n~9RQeDNVvK>fbr4;DZr{aIU_o7_eExBNiD
zW}nJ)5zO-xGCN#O(Ywkg5on7FSm4j*MkAv`ciqAJwV_Q`Am1yR|9+A{Z%~@glECYq
zW1-D?az(WRk#j{@DwSftX>AD{bbt2*_sTJ&0d!k<t<1@<P|8u&{QDwkC&DO{n{}j&
z{}`|%2txo^Oq#d~9vk^qcAW(ybA0uWxryay`i=Wm6atth^fy9Id%WSB@L9iJ9(X{q
zd}F&l39>xR7TI6)EHlmDNbT>pmU3$QyyW|Q0(z+AL*QLwtP}C3LL_G3lmyWSc?!j;
zf&fJtr#dBe;G0VM5bQWZI29rYPwpLS?OHT|xh!hEvG8P`6$U6*4+V&7oe*>PCMFum
z|6l0-#D;#nLF;Oro)Y$Ioc4Sngb|_{^B1#~npaFAq&z3o7tV6i&y!`kqY}oBkZPr9
z1s=a=$RI!>NRY<BQ^=1}sxws{cm>eZa7t-RT!0XX6qcU<LQ*YjXq*}**F2wpmw(gP
z9-&i!w%6JU&rkLT;#IZxur(ejT>hk^HFou&tNWzS`K9SwirzVoD#)oZ)!UXf><pu)
zS){%*CA$l^@6J%!JUP!1*)F*N%B;LtZk;ZL!Y0>u-*9C^ObZ6g2o$<dr-7wBBsI}C
z4Tt-&DL}HhSkwzp$~<cpn4@Q%vkF;oC8RMrx}QfMoKrP)ivH2ZwSw<30m`Db!3pWg
zOp^f)*cr&r;u9kCB-8?aYd+xSm#8j=g(gxtjfM9;eWV+p3x75SCNDEBq@9%?SWKsm
z2sLtBvMmp5U0FPAI6SJzU}t3W+c!Hb%XevA@wX6XwzlQvyue$`^E&P=BfPViq89cH
zHlZ2|$F2?%!u0}OAc_b&a#oq)(3|Wz2-$bo=v7m(3mx)it-1%jtz;yO5u6QqQI}P!
z|D;GSwi{|}|8E2t#D(-F$yVcY9QI3w+5MH*Jz7um<1wip|A1*sx?xp8Jw|j4kNmq7
zgo@K9WA9s9MW^=rHJ&I#y|qF2p`j-k<a0?118FZX;tVQW8oVPiSgj68o9@?W5f0f7
zskXvGmBVea)N#ChJP%y`MTkGHnBRqSG|w|?-8Jgt!1IakGG^bfViu)ET7=e{(Oq(!
z=tJs3Nd%CDdePkhYzXp}YGC10TUXWQEg^>OaSa2x@&eo>YJ4Xt(^AT_e3|7f#=2QA
zzR14pa)_4|;?}^+-i6a}lP@|b+$LDKK4V?Q8@=&gQHur85yAJ?LNlWKaXc+(2aVze
zwj8&y5W+CsN>EyDI|qjd>8JIe8g=y`1?_-JG11c`LKrMbz@j)3ik&@(_X9Jx!BX=7
zm3r{wB78!HlR~HCbDxj7J%LF-IO5P@yPM<z55T%LEJL^i=^F$|HEBgc;(_FwHH)+A
z)1A)A6MxUXKG}6Pk13+N7vkN<V_Y1~Q=%DNM<rbaWOUJVaWzj9{a>{4DXfUe3t^%i
zfF@XJuthVYdwC|H>9X~o4UP@zIYgCDD*}U=20Hbd84iIz=K;^p$IpgP(ptOUYiAsH
zp~SF^GdP4Is-vp+r1#&MkQmzK#>ZXMyuii+DtCoA#R)bIMUvQc+P<T<0JBoTm!rZr
zV~qam33hQ78qRrINsEJjq|p#mRJcSy)b^IgZEAEZfKnw!1H*wD-}*G*O&!M=^U{hI
z{W3ZhvYlUakwn{g$Z>-!q5omPlCwK+l-dmk&i;D~$Qnpcz^1bf^Avqga0TX<L2=*V
zeWxTuDdiU1c82r%W^#>+z}RJOiIgWS$H75JSH#$YIgvSs!C4~h4&Ow?f|9RiW;QzM
zItQ2VVTHKu(O>!l5ckP@Bw<7u0De0u_+$&Wv}*CJu!Se{q!4ln4?YwY23I)#_j=7|
zlhsoJ7a-ER@&sgYNb^)cZufqzvkB=nlHL--m^db+m%1H%J)$G305qQvQujegu6S0-
z9{yz1Y|l=eYpRU3*67+exnx&iwkv*RwNspS2O^BXT;;<3cnsH(+*@Yg(`Zi^px+IC
zFkAsfgonoE!XmcL_x?qXTB1kzTHQZnMEcTy(rhhbdLayxGw8hbA7EzZ@2o?t@J~a@
zRADHzEy=}_8R*UyqRd6KZ+5q*S01$&rp^pkTE=HN94gNa9p^1Ii%=G4=T-MWrq0vX
zkE{B;^i+O|-N$}k4sEGm-5l1Y&jT5&?Hf4$%g?*8P`1X-?J1a*Gj2BA=}SaT0r-V9
zW*DsG@uWW*kw6uTFKolHJ)aJnojQYa^SSqKVp~f>B&9*#CggAwAaskKt!0%Iu*tC1
zkfLczV-r7pf}pJ7%KyIZjC9e~1zY-Y%FqP2SPo_`6A`5S0xxP8EFl1xT;&i6-(TY>
zS^7l^6H}!in8?Z;Pbl9F@FKpN61XIJy;zdi1&qLhx8P6|z+XU=ylP-+Lp&IEQ^E+X
zC4Hq7RYX)h;C5DED^=CQC_V3$VGKhjdS6q$I??a_REH38rMart*0z_r)n50I$RJvh
z6GU!ohn+Od=7?v&+WEuG_u4Ud41+5fxQRe8d%KZ==?%wsJ>=_2rJQrP8jQ4!jlf^2
z=+&x@gOW&Cb*P4>5WG3n_CD&3PnnF#vC`+t7A0nd60M=9PMCj)e~;O<wcqXEkmy7`
z5=vF@l_7=9VkQ*)%&oVJ3snXstq5PB<z3~a=vZxkhdkrA{yx(V%W*~0@B(P82V+>%
z6rvU8$}Gir&Ql%l`MDAaQp=92X<Pp4(!w7E{>#w;+(4FhrERruJG~)u8o52{J@g5@
z#R_}hMi@qNxg7_8$7s45`8utJAdj0K&2xV}!}+3}XZoy~yb%t|bb&j-pYK>*ma_qK
zGHqrO5p={`waHTDF$xbQLkF^h4uux=n1O|tR*SPqBc&3L#quBIjg~^vtFOB&i9yG|
z@U-JlUWC4+TlWHOF_){kP454f8^nrjJ-xt7c3(j-ziVjloeV*YP2CC5=}xEso1&~L
zMai|Mk{k-6DfXw$4!lX%$SUBKALDW~7aQh>b?=Pt3(-9L&=#@e_(0<oEk*;Mq{()@
zNzro1->N1iJ-Qy~!usfc%T(uoo*s#3nT954MPxIGg;G-)hsvJjGCfb_>p<oG%<Xx6
zBp>Gy^~F&O>Azb$G^Q@`_S)eK_Il#?E8(-ac$-w6=}ksHBs&qkMAm<l+-?#1IwLAZ
zr%WSlg^Jv>^J|olZ$Fu9!mcEJE)WA|{9lZ}Ivl%ojQ`X1D!;{$<k$A9l$lb*eDoPp
z(gJ$gjEeEcIDxqHP=+SbQOk=WzumRgkkp7Eid>Wi0tlN@&~0<Z*EIT8{<7ptueQQ^
z1mMtEjVKpTOI4==PaK)<A=$IE=r4PAVAxIhdwe<+Nr9fwt#Z@IAy5vA77re5gY3yR
z3S$JL>5m}?Ae1GI2grb9zg1w8X;E#mA`OQ&awx3=MKNukdVpnFf$z3UMuk20?qzWC
z0rC}a#PW&G^4|-TpADDz!xDBRNz?a~|G;FgfCa^}kk@5z3TMZ`O=TvqYzk>3hrgc%
z0VkbJ+JTUP8(%;^cl}J9sF)4@zbK1-<WImLvg-Y<aEkLMtx*hOKz{J_o@AW(C|i-{
zGX;r|*rpbZ#aLp*82f^Ax2SdlGoO5%6AQDK8X(=#y}t)wN?Tpk-d7D^<ff(MmJF28
zs-mblq0eKYo(nF>`_4lIhDKr0dy;3?`dT@Y>VPKJ&8CjA_r33j@S4EBH1NlysGzEQ
z+B`kdJ*4$*C4Z{a?k6@a1t1G?d&^jXn@C!M4rUx)`RV3O^rsXD*GZEZKB)VgrnF?|
z1CGn3mZR<*R~(<^j1u-4bz#goI5NcyvHD_oc?z395m~WC8eWn-@&(*S{1%K3LxBqM
zusItSk)y>J$u<y+K^C5_^2&35He@HG5<c8N_oz0Igg045<1qK_VFwaP8~G{1w|m3a
z@oYO9<G}T}EPvz2kt)yQ6wS1$RAklK6`dgWW3PudAwz<p6d|$({5x=)8G|0g_S7Yd
z1j{4p6<X?5Cq=RZ9w9AA3DsdqS-#&E!X>CZXqts08ZmyAtyaA{LG9qILxUy>pASZ=
z6hJ`Y$>=5_kYD$z?*Dge!-61Bnp@j%4p4H^587QCP%=`ASHo%+uNUvVRFN(Dnx3mc
z|4D%=kq$?+f>P7n&kt)^En+kSP4a?W^4542b?keG?AShqeXLm`Th9@x(acsRjPjib
zkPcA2HpYNEg`vKO)i=VBm@a`y+zj@e9k}=oBgG@LgWf2J%B2LqkJEX=>S{E3BrUqB
zUD5f^RAX<As1cRffTD#28VT2w@24Ns1haFQLB;XO(*ooGwAN6(DJA-l-ae^L*l>>j
zI7;aCMSn@1t_p*eqx7kAztV!CsUUqJ_y(qb(gR&W!FB7Tgpum>;F{iHwGw^N+T$9W
z&q^Qd2t^Q(<Da7;P2$T{Q4d=5#m^EpTJ|<J4GXpOVlgF0uZ;hz?JTt7+L|rgH12MJ
z;1D#p6P)1g?jC}>I|O$L?iySI!QI{6-JOQ!?{nYbj<H{$_vpQ<)~q?d5+{QZ|E-mV
zLLl=*L}X<I^P{~SwZZ!YC2)aWWx86OsV?24dzOM~u8@72*5Z$`0~((2-+0H4<Dlar
zN?$C6?mALL3#b<cn)#P&ws>zW@Qn3$gYQ&f>3)Aj80K(dXT~svtl8|Lk5DBA#jX?y
zmftI=Td|BlV{HyzD}1n|wmo{KxWS(3!V&@T(%eYBO-ehgGd~aPU1*OP$+2n+%(<Xw
zOTxe<z3@i-gaWB}au0UG(>}~FCQ^^V?ou2ys-KbzaGTz>&?%MR5Q;CjYUxlG!>{|p
z<Fuh1KE+-NL6?$h2Qt7mcU3?Ke^xItI)Dz?PCpF3*lib*rpm`$0?Z1>+-K1O@`;ad
zclu%ec|fKxS4+`EgMXm3ptjql)&jC(MfK<}48JX*Q_L2XEd*FIAjsHSJ^P<c^RI<N
zllpvGRJhNdvk`0ghHUwSZhQk5^4NP&BlEWgwWA#Ri(oCV+IVd@51J`Ir$PdL49~j>
zkWu@qy-L(uihQ=R2B+Vca&hwuQzlpndi%;Q8k?p7D!%b{Cu&BCdxkfNO$lh`g4+th
zc-M?iXPejL14WA$sJs(l*5Yb?>x7B}-y}V@z%$K-{k;_Uk-Gx5Hi4A|azJ)~{)0eP
zx;=Zr^pT%n&?HLqmnt$sy>8Q)%C<?K2gZ6#jF!Q_@=R*7Y*vc2Ig=wS?ku3v`@CNO
zqA1JIQFfuspFLE-YdQEucw3)VRwn@vM9T)5@C@-wbVT>wx7f0Lo{5QHpAiXk9=hf(
zd-c4D7$<*(zg!mR!aAZFzXYmly+vulDOl#3!frEg89m%Fq!#G7bXWWDP3ay>#Bh9r
zTR&3%^y5{}c2J#@rg|%$OMWOQir{yheE%|MfcNEJ%i{~&CGA8LZGYq&z)LEEw~DKd
zfh5$4=#7;T2Shi&0mc5s^&6~sL(Z1a%4|o%gBVwv+kT=|als!hGgkRMj`o~9ri}|f
z>4wz`YB}%pp2la~ql!E*KuJ#1C0|MS{Y4gzs_-YYs#5VVNeI}4H(E|mUb1!blfq@_
zM5UbwN@QVy{YTM1oe%fDXXnEam{SkTvupWhvC1BMf^X@r#r(|Ga;vG-Q8?2q7zkc!
zs|8=<Qo$H327(_sk9AdzhMCJSjW$5EVAg2(;lR$;R;&HGZ5E645Om>~Tw+3MGh+(g
zl%NoZ4iOrUu?ejV4fs_H$f$ur-9Pbv&JlAsm$j_w*b34fyY7EmW4fEtU4y*n03W6%
zAoRv0%0211=*NCapgKOC#}G=DYAefQ3L<^g$L=c0BvF7$hSMjiDeIdeJ0Z6+#fdTD
zS>2OIrjAmvuKF>4;0@1yEqtaP|8;C{$U&$WCfS;%owzU@(c;`k)0Pkl(HD@L`c><K
zA7mr|zNif{nw-?+_o^n>sfhkq%!95(U%se6V1TaK$60?F#H;#|6};JPj}AoI<9JYO
zD4Y&C>s5hOcCk|KMZ0k+Z?Sj~SF4F6Mxe0QJEn-6+mrD-PI}=eyFd_83K1aIFU|_@
z9qC0Q0LSLp7A<Epe46}h5E_2<Z9hbK&HL4T{Hyx7{e^!p&5g{^LT;alw5e6iME58(
zb`(X|exA2H(V?OByY@ah0SZM2(kjXlJ<r5Pg{!oH_={Fq&H46YH#4+6kcI^5JS)IY
zX%IW{b^41GND%5|4q6F;i|tm_ZgOxKiBR36xNn9zC#<IHimrVHeR8gA!t2?6oKZc;
z&-SBzT2tCX#`D*e=Cm^wuzHRia?8*}QB&yYbVd5p8;^dNf<kgUFD_64E7-$8e;F<=
z<M8<_T7Usuf$8I0JK(>DAwQ4JMmh{w3VM%@PN)N^cmwb#L<!QVn8kO3Q1AG8?uBCe
zl1PP4h&}gL43B@dFbMI24mUJiP_fM@92mN5cVRCkz$#S$Uf3m)Fb23(gKyA%`b3jB
z2;T3}%p=D^2A_*ORfgp>jr79dUB+|JVZ$7IA&TJQ(o%3_zdds~a|sU6hfV&Ql?=_H
zVn!xx1mjc?CvL4K8RR=UyYj}0^}UHUUCv0|I4s=GS@}G9v(z*xsPriK+Xw?_)XN?>
zd>qZ`R@%)sEVvc8iP>smbad8M%i6x%$(6h#ivVMmZw-TGo{l6ui7skg<`8?i7O)SK
zMUNd&l+snbXcp_3>Lw`lL5Ji`+q6??N9_wu&m3^u7A76Qummma2Bz%tA6|VwQ!vjx
z!MJ1QJIrmE6vW@|^GJE-!gvRapP%=`-VbRyGgyU=qEx+mf{>h$nvISwK5G@$pw9db
zp0Nhzy#eLY#CF~kPWbu2EIl<sG{n3p>5Zr59-j#kC0bNWcN1$mz_0v2)Om`Zi3=^g
zpufW-)wzWBBisIqM=bf?ChctAeR|^xnaUl909aqSNEc-%1(6=}kGTe2Ke?8N1R0L*
zgDlfub<aPl7tk2}$MsludLh=gWy|%R0$<1pTb=&<I}BEFC*1)7{-gwO5kPapwM}IJ
z561@Ob1mA0hfIQsLKmTk7!q{(C!L2CI*3D}wf?~uUGicRoDUz%>)mC_Hwo$1I!8CJ
zf&>iYvF);``aPIo-83dYX5A>VV6`^8(4uNW8Gv|?F8TgHp%+PYoOR2K%y|)N@}m?t
zJf&`E$k#;5X6J2$m>dC@?SS1eVT94MD`PW<X@ubjyibEq*Yl-{I)_4+=e{~GN0!5L
zU45RfmVE3+-#>}3o+I^=={q2)W85@X*AfbDhAuGc^Cs2ZJ2@f!X;+L5&4%jPKe`A}
zP`1<b7DN}O=_4?nr2#Ao(gm*A42pI#EG&nQt~@=5$Mq(>m!8|K2d!LInZI8AjPv)i
z=#h9_C{(jD*767&IIlwyZ%^fjj)8Aqr1LmH2(-OS58Yh1X=1OHXqzznH0tEgDZKCx
zmFH*r@LBgggB$?2zp@aD6H{W%z~}d87eQ3so05aC?qvMAk57sMKFv;#+C=^{Y#)!j
zvFEnb3QIWLGw6j`+BPKp0*Jeuuq|AOzYpS?{t%Rsg(4f?r5zb<R6Spa*yUIpVvCc?
zo)Sd2{y8l0N<fPjtiL-hjGN`!Ufx2;8EywG8u#3cbsNH4P?%bVI}4BC(wO?CF*Jxg
zxOO5r{uQ{m4PG!k1Ahj)T82-x)oxO^EO6Vlyh)MmD`=KEFu|~b42T8$a_|s($lloD
zLI`?LK9XzL&wE$UR~lmNEPuOF8hjjI(!M1yrQlb)%z)A3T^_v&^9!v-Oi{}4qO
zHdRL|g7?|FZ9(au#}Yxo$M{C&v~M3f^ayAt|6~aGC2W2jk6~I3e5``d+Esn{s!!LR
z)V1s(%HfSdxgLK|t^6nku@c7J&Clp{FVtQlfl18dfbG+4bQ52^Nnfl#1nFIWk)2!e
zHwd*{b^XP5#uZv>5ZR7w2AOcDx$=aJi3U60`a&q6PL?W|<~?0h<ZQDXw?`c2e+4;8
zDC<(f{^nT2?SAF@3hyz?2O2&swc|!QqdqKU>U^0DLY_n<Ir&tOIJM7xL0)&Iiq5qs
z6cdQ<Om<369-uhcJ&JpPlNI_n@vRcEO60*;c9`M`$G1d5Xz`zb!h3$6)D`erq<t?e
z(uc%ntJC)~Qy?8fKJyJ7X$gzUc?q))r9nI)%wdJK%lUD`GTS$om+;*839XC2!?Gs>
zssu-w7q}%;Ml+L)B#;x_9j_CT1??!GIXsxQRtt_H*Q2YwTWEkBUwFNt+U*m^bq=3z
zUxi!?9z%?FYg~9q11M4A0)=^YH#dIa@{_?A4hHNRn^)1>i70z1Su>N;Z5!;p@AWzC
zR$7{x<!)@Q{mjUHe74>a@)eOOe*@$-D<v^?1(?Pv$wHTlXOmAO9r%=;iVCj!>Ajz)
z9N?_qRB%<HRY>)mW<ym=|J@3>Bc5W7pSbMx`t1+lJ@EwI3DfLB*Jc+x&-X{0=8ffv
z0_W7!-dI8CSPwbfYYKxQQq9+##*SG$6b3X^=(U&W9~f1iw;XD265ajVA(@#L6t1IY
za8Y(SqXKkLhG$9-mA}g<+osF#_i^~Q@&1;74EZFMGjzV-Wk&i{!tve=hJHGad)d;R
z?%dCTDVt8w=<ycJ$7jECeA^6L_r2LIC$kU<zib23$G<}EI>9~No{ZWttl#+_gdyM>
z1BwGPt=e^wfO$zk83JcB_lCpcG`+xXuNV9R6v@WR=w=JOPzIm~HF<E1|105L*i@~U
zlqPu(g?_QaIPPH(FQZw3-ock^3EjbLa)-mhv=#u*bGIXY#DeaX?(0>%X@6MX>9_RS
zGuT<jOeAQfpp~n<zvDkN;)oz|!C~PrAL?*nP8mKLk`s)G$%=%TN;Svqr^-z#wUQ{r
z($2r<n}<ZpDGg@=?@^m`T^p=jsvyE|gwT{m`orY`Sa<C%v^+J5P9n6z0`%=6P8Crc
z_|86fsZWM};DFADiD|cUNK@bh%rIzPJ&e^jTLrcL?#*+rJq^U0f=sC)%8vpP%`P;V
zwd?b>aVb<hPDAr_zDnon1DUqhe(Fexh#G}27X91+%<9pX*i#j)8dET29Ng76Rf6$b
z{kIse>gQOto?oF(y)+n@wF4b?K$F<bCBE9ni7`n8C6qzUE8pJiW^oZEv&01J=Iu}p
z`yqOSpawUMW1`Sl%PPd~ui~DJ&&x&-wHs-v3QBy9C3>KbcBzwDd#t~t_BHJ^*M!_i
zO0>l&b@C*4<MN09({9Xs3wgt`NZWp4su|bB@6u8-9)r}B(z?`U8(+cKGy^4_wYcd`
z-jGZ6x$6ASEodu!q(X-r0kPauRt#>s?!W!dW+mldpy-Zb^2H0)Kf%weFg-+)+fZBq
z*BysGOLP$9La|!Qfb@APJO<Fs{}Km$&qO?FEqBOZ6Lz4d@|=xyl!_V5A5^AdtTv$#
zKL?7FlYMsWHBFq~If7GO$anla7RFaM8cY~Cv^wHqh2B>C#+rVb9r#7axA*6Wpk0f_
zOm?Z1B@?4tbNg1tBQe2{4rW~WF6jwjUxxznTEH;c!+rmSW4E|m0O+bq$RU@1m4Js&
z5i3^1@P$^0QjbbN=YrA*w0I;UCjh3w`Y^A>X1U*^l*}}{0er6^O;BHXjP^*09{)iq
z7g@`L*u$_Nghyq9C)-@TW2uY~N`_w+5}<w3oe?LOOD9}Mu=Tv1bU7jGKALC_wiCp#
zT^s)wHH!1fP&*y%3`oEaDV)cjer_@H1x)6Db&7E{{|#})`2Ym5ri)LS2K$-J0Osg_
z4aGyL$YDkO_WXnIr1N1!IN;{KLh<AnUWT#c^~QL#0Li_Qv}MDyl$DFd6b+8hgp?H;
z2{$h=R#<zwOyF2y*vwZgEqB&&cwy=A_86W7ph~;yc8P19-HHwG;j4MJqRst)9h;Iq
z$HOxI#$?EDdjk``+H09J`!G~PeDwS#0xStiMyHHWo-7-oUt~01i{W`q9UQC8LZpi*
z2XW=FjTkvDz6Xf?#<XcQoa}p!k=7tWSE1dF7#*;;Xe%tEm@cigL=8Yf{9`RvLX;7v
z-s*6bz*Xh*FpCdt{0vebd+Tt1KLtfDZW^ZRyYy%|gR5mnhMcOR3x9<7Q*{MxU*PQ$
z5+X`PovOhAM-*Pk;E0W`jLebMPY}f99n-5;;etj9UicG`$TV3x*ixGDOYzc2k~u=i
zFe(QMu+Y8>KAGcgvV~Xl)1AKM|HRPgXDPXHt1fkUVZ$hsl8XppXv79dPc}SEH5u<=
z35o@r`y%!4UTEoD^tt^QA}yaZO{JmRi*yANiV)^NnFSI`0gZ%D*|QO)YM^CNjjCAc
zTA<9S@|1jf0V$Kx7~(YF3*4n;G=rj*`Z`s~-)D=<nF4P_8_XBvJ7kJP6?qPU369h>
zyNgnvRk!7x%sjr56g%e~4kGJ<-AGJajzbiRo3$9P3`m>hnr`W!gPqeBLlWFS>d;B+
z9>IvEw{uU}*8Hgixt)3IT>4k%*ruDO^x3m57&v}VMNBSF@p1vyNH9+AfN*ru!Qx_S
zp?9XHKeakI9ScgVzA3zAoiiCBH@xC)E)2PJM6Y#!PEvtI%BbOBGwrTj4a&s)_VExf
zet+x0FC2BZ9a{NYO%KyRibFD%7XPn8i}Zy7a&*VAcaQx(vjitbGwh_F4cK;i&n&&K
zo1REqbbS{RPT4O#nl9W9O389hH;{#Bn5jGk0c7q9$UX5hj)|A26$MuM*pQ^cw~PgB
zk@4j{01t4=ZB>hW+YO2<>?~YL9VrvNR)1=ZYN07XJTQS)-cA=leVEpK_FN4)t{tOd
zvh6aQrPpBv#mhzptQ{q3!qjbn(K5Itp@G>oB75-%WXc>iuX!%$rHGArz7~Ugu-rx<
z+qLT-z!i|;$F?!SDpO}Hzm@r<=j%xggJFxCU;<AY$8KEU5Wz7C1$uG_pcp;u+mT31
zR2hhaKn{VX9@UC7z+u8hE|OrMODaCK*e4!aS<7E;=NP?kOo1Wfg8j;YA(mlgy)O1e
zUWYqvhEIh|hVkJ`jnb<RgohH>@o8u_2pou3CGd963!4kxg%ncXaK^h}cKP*$Q?o1g
z7KTG~J{7gbw_VMW1s4UG7vVz@9#c=tVS8wqoKIimdJ(Puv+i+08Tt8<To^o_u;|ew
zDh2HgQ~xe}eHLGYL%=0{fJ+16OZd)J1I34wOL$&|cr_wI%CGS-Xl8N!O!-P4(oUU1
zgw(nc2wBRBe)%$6vt4FGweQZJ_2GkzlvlTep1YMjb^}ZG6)ma<+qNH{(z52__Flzu
zSN?G*Ezi!~rObKZ;F0BwIKA%-YTE;O{tJ72!9o&9km`o`?7UFB5)C?fURRzuzA$b+
z-4lvul3_Ky8t)oUv?;w4wB!Qk(0{y@+Hwl9k}Xo*=vAoJGg?>=((W<Zeo5}<Pz<E0
za|zCGF_&m<D{YT$s+sb`Va`;IKYy33>iD?FS7^7AJvu+rvfLtucmYkpWhYBBjL0@S
zL<!G0olyFcRqVjo(xFL+GtJ>B+kJOl9GD2_uouJKc9xQYt9MaNQk7%m_$}^ISXz$2
zkv>9bzmM2@Wh0p%>R!6TtJ-YWX}^erHfewHgm=c#Yc!h8a71T8F3Ys*t9|~(blqvr
zrhlHpP7>)Sxplf$ryr;!FNMLiZvQ0tHNj%^8V99UUz$%gqP~V?qs|=B>aj}0-{@^m
zB>fP~ynU`h-$t>XDpknt4TeYW?ReVMow!XGLgk8NhR(mJ0v&{qCy!LLO`mqIOZ>VK
z9hF4oWr#?b%!Bx&8vtb-pRozE5)#??MyCy?;v=t3-Az#b;@Lf$oCG%1PkLmBxyn8c
z6(Ec@#uX`kUFf$|UpGbOvt_US*M%NclRZwA*zxf^l;gIFesQJhfX@4RD1A`6xA695
zUig7&2suuoL=CCp5iuT-6$OY*?wc*Ydq4hKE_Q<0z1btE5y|6*8bFn-uDSjIA)Z`#
ze5|kD1iuOBn#$=h16dJvKFtmYq>Jh+)FrRt4c?cB=4pEsu!-WlifHikl9BfM23bYU
z&3d7g{1b%77cYap6Nsn1B{+&^haWI}q$9Mfs-YJ?eIDdqbGVFxCg6_mP#jlO_{Jxi
z4LR6yTemNK@z+g-!<C;WQ&Gswv5aD6gUD|M@@Wg%oqUPj!6yd?=8Xa&|B*wu^!I|K
za}XoGC}cTw!KaZr1BI&>No1A1ared=nPAFcYe$FC*ixI>OeSF(JSjZdr2>qbT<ZYi
z$Br{$(O;=1qvdw5*rH3QaGs+>DJ4yN0a~?o!FYpMFc<|H@UWvp@%V)}&M($ozVAJF
zRo_^1vZ0VJJ)b)iyAzoEgVi^_%vY8APm=B?_@ShD^1H-5JrD2{IY1a{JED`)@%;qy
zFos9mib75fkdUnadIEah<8`Vf5E~hGv%wPmeS3G80z3lIHZZ7yk-V|lB}sx-rxG9W
zYeL@QFx{%xVc@H{CrS|e@hn|yLlUsC@9RlIjT_5YkYwEXVQ^!A(TQoa#=iVzda(@x
zEb3Du#U|<^!%#*Ik;2>%i@>^O{Q91yI_+Zgk~H;yf8EcWPE)21m)Dudm-%sCvq!Vm
zzo)XBjOK(hm+Ac2bZ~)6DCot7x(FuaD(3BE0^#7-IMUaS;G)Z`)h1+gVxDc~SW_Va
z7f9BO5%6r!LQGsg`v#XrC)e6O5+QwrIW?di(A@;02cE#J1OcX^4Zb2G?3*hgD4^4k
zK#hqhI1V$k$)Puk&_~B8Jl#b}YtMgPToN^BHKxXOwo(y+A=6aC|0<9ks7k66UfETO
zYH|3MvAwK0rPb8s<)0huy&fg7aauW2U5hu`ncho}_Q^g=Gmle1QbEpwIvKrx6n&lZ
z=GO-rfP~*Ix)UknKFy2-S<Z|ozM0_}xd-MxrXgl$P?I7dVnh*BI!_}cUA;m{X8>#G
z5394%MhTt(rc5a9jN$-iNXp+tc$0G67mQH1<SGNb@VBVSbVN_(#UZ<qM;S3he>(Pv
zOF~Z;xH8`A-q3c3Q_7<&be^}Pp`AJ*eqbM}!%+DR2y}OO9h))y7O}F-I<3JQ%^M~(
z<T&UB^NlDFUb>P7^AjH2a^YULRTJrKtj-Uce`vx%k<Wl*EtW*1dry{6A@aQsJ+H=I
zB{W*e$_R+`Rw7<Ngfn3V6`E?~UjwDcw9|t%tKYFOg)}M@AFh-T`djItiZ&T&1Sn7C
zILDI-^BrmkU{s+B&X~TTUy#M>Iq2hT)}dv+j4y@=n_rQJlnOfcB4U`hxjRjrqkSyw
ze;uAnW)4x)5>7YED%S7A^6N|2rd=j#w*%vvxV;v#7`-miK#Jim^tLn92{qh@XKjy-
z|3RJ;$%{X`Xm@%}qEs@xPDoxL=43TpD&YHN&CpyNRYSvW9O9Uxi@8=5MzY<nS!S|P
z7-~5eN+iv1485p@HS&MTBvg5_RZ&i6*-~FjwD&fn*FE~XPnRT-|NF>}zrk;x=i?p;
z<$v>iwW3E1u)7qa|6H?9mEe_cr-la@cIix&!RwcZf;H~<&W2{6!O2s(USzCsAYkcG
zmt|gxB+IKiZNy%K&9l!`=DD8C<`2i2aoO(g%SB?qq|&9MDY1Q`MSxj|X-`B-Wc{j_
zW9Zc#J&t06^$kAQG&vzc)FHy0Hg76`HPm&HfQd|426)l<t9W2CIvrY0gJ14U?oe_{
zO$K^I#rUK28z^Op;$9({@NAxYQv1+<JbPR0%Y%x*$L=~B{;F*0bED2I+)@XvEw6|W
z7Ei>p3@^KKW4}*cZ{1Js-Q(fWgqrcGDRjkL!3z=<0u}|Sl<WvVo@(H*+YKlUrtjm)
zx$AKnBh0d3gATzcg6y-5`Jp`KP`7Qc5r8hUlrY&cD^9XzgdQAIkx0gN08g9z6;sPW
zfTh2Q5B4HEwHLAB_H|R3tCadM*$)o6+Ha{tFzXV97<}*e{ObSh$A#|)Q>cRZ6|+wG
zRu%u>QdE}2v}8YM*ZYw4Cv;Wgfu(dL3|_zCS-(B!uG?}*O||5k8Xs>pH2#QF$C48V
zV6ZB27?sZDD-8y2OcmAqM)P`a3cp(~``4lSlXAZXBT7a_)|hR=C}=#&X5R`ELpC+C
zl2P2&hRfP!N9WbnX7&5K;;mT&Z{lQddRp@hN;dg*WM_M6nrG*WrJ{ED7AvId1%X_W
z`Er+*ZjK)877Z$l%h{ZwtLL>7ZA`3vL)};m#~0x${X0856&@$zg90MLeT+85QyZj1
zfWccxD1~>i-8+9{8gesFet2^tyQHPv0RrmZg1hDV6wG<)93Nh(?jCZvF4^VZji<b{
z4u(FdmRETvHKz(OV5*LIZBitBxrqN9Wh_wA8kT%o#)V$X!ly=wuw)|mFSS}UOtR&7
zaMqBLU8BOf79^s_Y|OCkEgJ6|IBn<xdJHxNlnQMIo5p2_u$O9N*(r`+iF#S$z2PTU
z*wd9c(zp)PnbZl<IvjYoN0>CosMbs}a5CdSi+A;%j=lP28Sp+@y44Y5?d9?~22ns+
zv!c$HA;A)C{p^OMkv)8vn3z!AuQtC?xeoq>MMKa2_FP_hR0BGZc^+7az>b;$vzv67
zL>f6+m{T<rNwC`fV9y2Bx_)|C+!<e$)+$El+7(s{Ea>nM=TfwONRZvrG*-i*hXA6s
z{E`~%?=@pgmN1{ASgF!`hvXZ4;M%hPe1LPCJ19?vD`CQc@Wi>8;V~FLR0}ijH=Z(6
zFy>1R)0~IhX8i~Z-8pl`sEtMa>BR>-GkJ8D@N?*~@d!!33Ir(FqaC)Vh&k(8{(jeW
z0D&%RCwExp)9PA!YWrE(O)0LFw>J=KzIbIOD+ng*F4H`2P)(Se+C6k_@w^O|{vd)j
z;D`(#E}x8jaI}Zbi8JboZ{z3Q^e{cz?g@5YEU&X;Az%cPZoLpax$0z}Q98mz2k2m_
zz#!f-v#J{nx_Aa7t=xA!rfeepKv}HhlWRgM|0!%%b}DZIQ~X%h&BjKRl*YC42a!P>
zQAw{X1@G>PaMg}Xa@_f=O7$KrYQR0Z)P*oxPUq2MB7SHs#XOGX+c<(T6Rgs`<nT-I
z@X7bdB;|MYmv5cjangP_+Hu&TIW9Y7%~uag)ox4f=Iw3^Uycl;3=g5o%9$~Bun<Mb
zL}WCv_K6)fa#B%|5u+anb}&$q4vitz6`UdT4szF;`l#3E(EKsmwgGj#;lAY?EZq;l
zkduLS7;ZiL(;imTsl{nww2%>~Zuk0SF}CBW?d9mLXzC!Ld-(f+_$VTE&Vd3g*sK*F
z=DQ+-x&#y%^5W0mL!S7WQw;ySNe+jx#ng-K<&cReK{PN4YVeiK0~LT@@P(rhe_=Ej
zs&4EDW?r1DQ~kcutCgs>cJGt){ww1MZd0a4!0HfNgborRWAiuiO7=~DmL-%BvfNQ~
z;`kooxsH5D18?|%yIE0X3T{MOA+dEeNr-@s+ly4;E0=u`BCHJ5dxo<kzfi3@dp@0c
znmx7m+<E~V)A7y?eFifan*ae6bf`>sb4FH5!9qUI8(j}^hA$05k(rtIHSOO=5MO@f
zN6;QFq^8rFe4M5KOkwtGqH7$hU`7a>I>^f09PPTTut~G|-2eKbG_^}rvMLi3Jr$dc
zrdZo4m~mKtnj&Zg<h1*~KJPkuggHEVO{r$=m-^5#Z^`HamvjWTz@rtWUKDQ|@$&DW
zSF*Trtf&SKsTVngFaoR(rv$`LX{O{m@)Nvy_VqWDKM33qEgf+3xIdmDfAUokH8ODo
z?3d>~ANYbEXO6@8-Ols6!@&6HmqJbd{2m2IlP1gm05~w|7dR3KILCvtxXqG2noN*F
zGj2H{*X`#i|7`fe*W3^IKnOH;B>YU|#x=K*aZ&G!CuJSd`m<pbUvUm)|E1(B151c>
zLW=O~dPEy5GtE2Sujv`dFTeO?Zw;HH;^*LLGBA;%nLxDu4f*}>w_=>s93gzRR?!D!
zG9v1l-3KD^iYHHyhF_@j;|5)@({8O94B>vk_rF1spCXDf?_bi>0eO++L$I(gE4sHf
zkJaDSS`w?Dre~Nx@D383DRCxP>V6?8Vbp2fnYG&PYeTb1bCP`_3_g>zMpZdUdB2fR
z(^Y=QpX8W#4P}KvrGEUvx9mbl8%?UNDDF>8687sjM&*cesEkt!gTlghP->nmk_Z;9
zAQvhr=dl0ER$^vIl(bj=7}jIw6LV1^7ZS4C3FA`(tB3VOJH~yfXz2lQK<ZyF66L+M
zJy@|^mP5=&_=1{qP;sshXb`kz-10#jx4hQF)UwV-wb>&r0q{&Of)n;7)%g;3XQ4AR
z=-}y@kwy5v1-|7MY+5x0qXdDO$7$K|j-}BecT{*vJg}19og~I-BO8}(BTfs`{ip?4
z8V8>G9E2^XNDKx<9u*fO^I`>S5XiauW2I>C)fTCnU`}Av1u@CbgYFa%NhfLxmXvyx
z`+Hc(&rRH<d}#$p+sqob9?r?EdeTe8x`XAorQ?*gDI&0i;&5XK)#LhQaVWfEP4^4c
zW|Xa|6E)~#82#P6_Vz)7ul@f1<21ZbMi=0n-rk_S?4Z+JY5{BlTXX7bP^w)!(R5Qg
zRD&P4(N4@1XcGS|2LIy%{ilH#4#Fw|v||jV43WsQbmv4Ce^bh!fUzy=oS(?-;j!OO
zIHw~K&&H^i3PBuq1{v4VR>B2!h>;r!Y#Q3BSnL@<tcf|09&P8ME8vI6@xs3Dq(6ld
zZMexmu1dn)Ln?URzV+VS^YO^pF7;=8e7x%G*ROxpF7CkKcMyEM%n@rJl_HEu9zJ1M
z>I=L^{X=zlgMGpl=y=cn^?7bgvC|oOKy?FyLqP>jLALO90bSPmz)KCAwo<rQ8|ILk
zMt39!v4{cQ$a?*iES6D^(_-G$J~NLm(Tv^}(KD3tkX%-hGj`df1CX>aH*Wvyz*YZu
zRq$`#Z5t9wEBQLLFpur2PH^Xt)ccps+LC%_z^2z(wHPD0@wh`B2f%b8WAXiIYfE28
zC-dyV%UJ_C{Ug=Z;2&tn@P+-USsvuoiV+`m$wOZ5Zd<95Nu%+-jkwq2)r-WGr0F!v
z{`X(ub{BFYcSK9<{LdXkjf7zMKJbp=$9)$lP*Oq2WgaOYC7a*<PZpo+FQ9rBDS+W{
zW^gv05{fwt;=6VOWk5D^5bEHTO1j0qpM7&W&&ST?eU5DT<x@9~7lA5oUCbWZs8kMC
zk8@c|=ee78`hi>$g2x$aJe-E5+l=oo=s+S)Bmi6Jz`6_dTcP}O*Ftkr$X!@8N$qu2
ze3+Jjo$XxxpTq2dwtI2tabK}EgYs&hyZNP$yS`Eq0qYF^KE5}u;|{{I)L*kAtLiPx
z52}Hx+h}ucVGqzK7M7Oqi;E{UfByW`BCoyC&HY$VB+>VKb*HbfGXB7G5{<`NY$4A<
zC675r8XpDrNj+hHr-5Z1p;6Fs@X9y_Fg`XN@;mm3(49TD*+|EibwZsAq$AMId+w(P
zt<&)?p;*{yzDK)`Hk)l0Ttja#8wxDi)zYfTJ~+c-4V|kU_-Kn4jx!HNVa37z6L}@Z
z(n+#D0S1)$2uHIo7;@s*eGD;8e2jj0tdU}(YjtVGdGuPGtaJtXKlvA|`J-TSLm~WI
z6-}##Krfqf{a#Npnwpxl+qeEsAceeqTl)9BO6ooE;iXu*&>`r?sNjJ2hfZzwxar}e
z01qJ#?svFv>a1w!YUxl)fXbC}QIu^`o`%=4`Bt%ms(4Ht3Fjfq7f<bt`q8OpQK<rI
zSAOmH*-Ytc17clTOW}4TR^26>P3lS9Tj2+>S*YZ<9%;kbEiCyMVxeQVdO>J2e!=+h
zs2`>F>L2d!FCQ<#=p^sGnC(|xHWy&^G326GdtqD_?t62ZuYj+kmRb<l?<E<`G3O4X
z#-qKych;#)Y02lgdcBMLoA0edGq*h!+yBg1{4pv;NcjcjydkJ99y@r1J%iiy=Wxa<
z{I8HUtb!!w2x05F0yLC;OaqIgdf5DAWVE^@@(j^OUUP?db8^x+=~t!R-^YrB=@j;M
zYDCDB%FhfeK32@e-~AGzU<<#0G6O>Q3zV#yucpls6v6fSnqdDrBi-FT_wKq~6MB6q
z%oY6cZe*K^zF58FzZXXIbNM;;>N&OtlFbQyg7hj9#(pG=5>R*m_f1dBGor2M?`I=l
za?eQwebSp@kE))We^0P|#C5(~f^Q)p+fn~YukeGEy3{%@(>4xoYkn(+Xa>z_QNkvf
zn2L6_uVu|a33GgadUfVARh^?UzvMi!AI<kZNvt_n#0y9Z%Dv5Sl9V7FEVsP^?%b;}
zNx={Hu9wh{vtW$}!(t&>P~{?rL8(4lS(ZQk!`v|b0HPF`57f`7y^6{|1p)B&zET(3
zEhXKr{x5I-&(lek=$)1Hm<x4fKdHNJGrDT@FU)hUQEUk*=YKB*-~6q$t~{r8T2PW$
z%~?I^=}(}B>@y*Qr3ko8nt4BHYm5wjX_aZk*<<7DudOhpy1{?_dZ=D3Jt3JYODOlq
zU!6IYnoer-TERg*BgDyc!h`ggWrSHc;(o76d@c3mV}94MFSDB{=aG(Y5%%2(mJ<J<
zK<tUITLK$ELQFG4eq5OSy74oOm~<aQZ)<s33v>|o@y4Gc0P3%SUsN&BC>-W-rH7S_
z9?4~Ov<g=}#yZB4_nEE3vB24D+F`5u<-~ikjMXc%$gCwKf=#QF?AcnHSf`EP=tUn;
zWQBC`NeZT*O#k5<Uqe7}JlQ}NL1Di=Ry58iLJ5*7>$>}76y`_%_xwaLwys$yLWclM
zXXC2SmqEss;osXd^X9u+s#(9E=~&wQY+fP_TN>YOQ(vm*{C#7Be5*FP9LJbi?*H3z
z5g3<V%^laeDxZ_ftoH*7V9+}wUD^i?kA^Y~ZF9w4#cVVo)B9jh(*hnN^&hMG*5q@&
z#3|d~KF^$Im{p#T7)piKA+gaz+c@x3tco40fQE87>w8AwxVxBF_&cDNU7Zvs1b7`K
znm2m5Cj)~QA))Wp8!SBEOM}F1Cn(i3I;9%^<%u=4UWv^bC>~yn<3(a$GVYMh;&4A1
zoJo2g#P4;YK1T+u6tR`2%a$^RtsrF_ZtYUc@!Vbdn1Hr_;gnso9e;D%ajq1)0l<zX
z9BtW-z{+G3>$yN}Z((kWsDv1Oyq?uuLGEbEKHXusKwBzm{}qb!2wY(O4+)%EC<2l)
z+{G5vFYQT<_Xnf_d_B-g26YUqSccIzdT<9fM?f0}o9s{0f!Aia`))<;T<P;roWAUy
z`iN-yf>L)9fxXtN$xz*Gi>S$K97<Wth2|xf-h2rF>KPnZZscdPb>moUR3^q(>8W4*
zg5W2*u{ZmV;jX-tT)bA!-~WDgfQc4CSIuwdTk&W!p;6<{DERsS42)juX}>OKtEK=b
zSOo3<D%~}!>?}$un5uc7BUMXha*UdRkXWONiDa7f`@aS&vIlc1M6Bx6o}8g~KvgT*
z4f~Rd)=4{gyY&iRZLDAT73qu3+NlmEoXqpVY_{4e$uP?Lq$nlkB3guhjrI?<$OJqu
zw?E#;NIyc|bynRDnP!^xkB7GwC`r>@Blw{l>IA^l1T`X##=^c<R}iXz#0`{W97%TF
zlk=OUdhy>bP<`&}WZI?Qfv!$T$LFsIDp(SWbQ?`*Ti@bh?~np@?OKvyqilR|?5BLJ
z<11XWi=@~^I#E4)e)#s7%dU0)UK|@K%*2f#-Wa5@vxBm#yy~IM1r2=MhJM_W>%2t1
zRCn2?Wcj%r7(yH--nl`$!zhP&q#mi;1^rObK^%hJZ2$O?R#s(Pl6kcDGp0U+_f5w-
z*q?mQI9oc(7bwWdA_<|fzcag5AR}oqrl~sHCfec*VBx8q#4lCK1q+~8_ytj{(;~~M
zq|qhU*lOx|ljC1lPn^d8tUiTWoaaB)oJTEwJv~iasbk@$^QW?Qsu8rcuk^)SLI4D%
z7G!9pJL6pF9;16HZr)9|fAn~hzGhFMg^SfcELFkPTC{+;+|HNljPd^pevW%86nZUG
zc<2IMw&!vleo5a?FwzBHVdstQ{MQJz2tqyavdTcvQXIaf@q%CZ-uCn3NO*i6x3|Cv
zkiAZES2`Y(=ob5f%}qOW(9Af(D_;NFZbCGY7?vc3PDZ5r#vAo&_z&qZv1nn!hxHdq
zPo;ba$BVuv(*BBp%bik>R{BLB!eS6@_wtXN1x|R~be<LG(!~3AbU4S8ish&IzByet
zZR&!D3}+dSA1}?gBYJ%e;Dn<49WV{1xPL}0eiH~HFy&j)(c1@8P&z!_|E0Y8`y|`-
z@+Uk_$Rlgs&vx|B%q3n%GuIj1%ipS!GYvlzFtXaaEY?j>Pj@l&V1?Mdq#xp}+_5Yg
zyM}8fAqt!`-be%3{*bie7SiMB*7)rRoj3ALp@!O0H5<GqVkl4|Kx08rUl;P>N(z6r
zY4O-hM=wU=*R&*cxU3ED6`Qczjv_hq5tOkkh>;_j=UL?Yv8n8ec#vEp1&eysLziiS
z07!etOYtl@R|?JPQBEgl5(b9EKJ`0Tp}AO$N;&8_9=!sMtwQJDUQWc&-)67;Zn(kg
z7Ea>|XhM~@bB0$C6fhB_Q6!gqx}LE2yW3vT08#7700V>(q1ySUn=9tzy0&t8-7nXx
z<emLBsxv*o3tLVPThEK6FS^`6x?Zl@gQPub6W^>E5TW6KHKsT;Nb#R?El!oF$Sndg
zX$1nju%t3DZad5TsC%i*IN2&Kh7$qhwvve0euKUY51a$0t5ZeUC?87B-$V&;$3)~;
zX)b6dsC;u8pZ0HiwmhcN{Lb8SdG4C9-G18#16QpMO4BynF4v`*Uw_2+UG>0|Qh9%5
zRK2M;nEHKwR>1}c2%CN@I?N9zvB?$itT#<qY3eG%j0dFOblorc_D%TTc>2G<`d^S<
zuDeY<MQ0sj6%`ICn`8Vsrbmdea!b$RonW_uA2mNE;9!&0>K%2RXOs6e#Gy<*r9(vI
zynH4@Rwo(h=k%3XV@$cr`rOx8tQI(PK{KTR8q4*)3ch+No+5r(#_oRxbGo-gnP>a_
z%-vdj{pJO0!1MOiCjCldTYCkL7<My<cs8<E!v-w}w0`u+cvk}%e%NmCDQk@Uw#lv0
zl9nY^(DL1e%;TR2;@-#CHt!C$p24cNLqbXuiMi$D`8b-wMnD9A3e<hl{VtmFV1M6F
zNU@UYHOG>x3T1Ob+#~G$W&(AJY!zt2aBP8tutCS+$BJAQAqA^cIr#+kazkc5(jcrj
zlPwZQo&uqI!JfzpJE_;Kxd5L8<g-Jdqxi<c`%26l>3e^E+}pga;M<(gO=K{spwXE=
zwN6o-r`Dl6hqV3U3!nvxy=&{tf3Q}&?E0mbx)e$(uV>SBNuQ~m)2b>s{tfuKQp2@c
zdb-IkkP!6m>>a4zt~*UWx9#5sj^X|XKD>I{Jj2Vm>)3c4cTVms(}*w={Pk-6l0nv2
zA=8Y=Yrpz^gzSi;u3qh7AVjH)QS*{aDoOGbJj+PWw6>zv|BIILv>rU>rt%NY=UDLU
zX1L-zb4O1(K5<t3Ui6Sg{RQw((^Sw9LcL*cCp@VGCjnZHK)ug#7q};F>pe$O(dVJ4
zxB;sARS5!B(DlX{8GVAt$$vu?9*W)duXDUFcWOYr#RzGQfZA;zpbKO(=4ZO^pJb0t
zD{C&b$fwE4jaqd0_6hELyAgq9ZrrO|pn@y^8RVMIi<Bih|4SnLHsqSJXZb3!SOto^
z#fCJC<XLA7fj^uEHHgWh^ZSG=_&X<J34<t_t6P8MN$Kv8Iw45V;DP($GF)_|l3*Km
zSe%?&I~g_83QU!Z`1yAF6B|P$Qs&sH5_AWLpVeKjO{$ecLQl&hHU5w2ov)WuT4AI*
zgQa(j8$F+Knq-*om(K+4u4d|<if)tAgzBDD)xN0`NRZ&GtrVj*DoR-*5kzS@Uv)e+
zSLrX*+h`mOCs2NldTV~DcdN#sA)l^Gnr(U{ctKuuvH$YN4)XX<p!=<Dt+~I?PIw=#
z-7PUsUM$bqO$fBCoeB<d0?mUI2)C%B4R8=b1*TjU)v_rGG*hG_H$S}oYU+pcjiJ;X
zfN^2;%O}g8=MgKHhd4SEo^7BK;ymZey!tj(D?1Kqs!=s^b+irrZWFK#!RP+o_i<eo
zAi+kCy-te*&9_eV2at;ozVE-_*v7)b3&y=C+N!7PoxV$2=3ke4H7{QBCRyWMDdgGi
z=BiLR@j=o?umQY&ZyuoTK<G)1+SoKSYiI+?yskgljI%I08f4So5TU@d>7b^hBfqZq
zFmAyoPkwvfOFhe<poIoaq0+USjh~@?CM|RNKYqu)7j9FI@`bpKl{BqJ8RNaFxvZs-
zNgFd{4rt6uPI{TBCpY|{ezoJ6XDtnLGCQD+5&d(ZzA~Yxbb-p16Qow%tPT-_e|U}2
zxt(<^l$K>0bh-cGUgvSQcwaZp9N0UP%=OwGU&?)b=Dre=6S2&Z&aSxB^aAr;b=N#!
zrhy|)($dnT7rx-;c5bsTx7d>&K-CQ#wSK6mgQ(?V0M|y#f_uRdk5^k$C>Qg7vk3pl
z$WXiAwX5??i@Eh%sv?DffwwLH#0K=&B1>t-#WH~(qi!dpvW$b7B)2)awl-5&`h@fh
zqai+A*#)Hfeun;VdXq~DsbU|D*JW~PrWhAax`Rvb1(OaJxnrFrzECp?e5p$KRLGo1
zNiQcEq`{}q`cZF6y~r(J29N$+=mllV79pq#4ejLI&B)kW2cho~>wH4os&A|Wb+d!J
ze@Z-Eq;7We6@$!<)&_?DA}})5SEFMqy{NXbaRLzEWa>DPbv7CEt}vRb%zkQH%B{V^
z=?ukwrkv;=z+TRV_WJ&G@%JyU1vqs9BMU$lC`>-2w9~ijJt!;NJR%wh1LGlTQDBni
zJt-FQ-E-d=k%BqGE1FD+^73!Au}NyTc|L_z23*i}!xl&Gzb#S220qz`WQYFe@-fO=
z6TV3*W_GSA#ErsiF(gWO(AlVW$)Wetk)|iGWmmPo-26Gen4GWDMA)#tz;f|MU?1DN
zTGPL^u~FResZifXk5Q=Rl|_5d(h3Fu{Tr6bbwpcAL6i;kyua~We5PWvJ`+&=2}g08
zU@H^JOtA4BhKL!5IB6x)y^D&#=OLO%rcjkQdVxYOvjff>^@LLm2#tIac9rDfuBfDH
zxcqoSKDX2M=f}o3_;`oR@h>2iC3J04M5!^Qbip)FGeN0{)SLLtF&Aa-RENVWR%>iu
zZH3Y&X{E-P)R7)`-f7)31_lNz>mEet_TKiZo0SDWDgO}WUu$1VN>#nf44kfKekrca
z|7)`Ry1V9+=1Vpm8nC?T*bY~PtF<=1jOadY>mMvkMwOT<Y%ay3)zDL%aGhs^hmYTb
z6--Vp;#rlKXB+M#%kqbArIVUUa{oRHVGN7F11R=8HzaCI(i$ae2ssD9g-SQ^5#}V=
zWlCi@*aV@xObtzm5h@Fott3ySvHCEB;svpv>O13UUWs2-fS2$Gmc$-Pei}1hxbf)T
z-ULeRTD>!gOf2{fFS$pe+!SW~s}_qvPx3+UifeAEH_{CGaVrJtHyij~<<aQ-<pSOh
z@9BK4j4;B$SceFs{>eaWI1P4(sf({c#@-oz(zIHEMp`^tcTjE@s|WOjwzf}Ake{(t
zOQD;BQDsB$$$Uia1m%8xv;9rOPAz-i>l_X4mfh=xZbJ6u2Z_uW2f%P}nq8ZFLCkg7
z5Ux#86VppUG~Tegz}99CocelnJ@#;Y*Elh&b>Q(A-FnMPVE*gV6nh!)_ml(Q6x7bf
zWSnf_-+JAF*o?V)H^E>;hxdo2q*S*1W`XGng-_6|{}yOQruJiN`pRs8dc>R`1w9R5
zu5B1$X+I%P+$PT*CfMMVa~xA)K^X3Ku55%rzl{#pM*>;0Kg5`29b6jJFtb=_BoNM&
zeD{)g#J%j3&qXc~1y8&FK7rNK<%SC*A66kI_B1<%pO;1Cbd&s)o!?V5c}HozUQS>m
z?n`vwoziEvm|G>G<IL6OwaQ^n>#X|fMpOu=o|0*Vl|yK+bM(<n+f&~2I1}vSS8A;F
zW{xrh+2wmlx|0b*K9Q2mo;Dbb3R_Z~l8pz$|8ab`BWrMrRRC={=dI(I^Jaa0Xh^Hq
zNy#9l6g*&aip8@{+tFMZz+c(@@u`cx^~k`Z)g~hTIFkcagIzW6s6ejuaW#^mZ|Oc-
z4(soy7yY>SxZDx8b+ouNkh%w4J?cj^2Q|i1Mb}8WI+5$koa1aqRqolg+g!8FHSL<q
zQc>sJBMa%D5nz^vgUZHBoXGMOAKx{W;m*(nw3U&wgGJyQ^cBy(P{!mc#RjP5)5Oag
zwQISdc1>08*f6NEb2fJvz4#_&aMW(>UzWy_TRw9wA&Dw+>fqsAuiMXj^!UZ8B%R@;
z_r-Z=>Nl!AL^~-j+6BXIXewXzseOX~k44sFC}4l5%G9vb)*5BVB5#1Sgo5}VQKO*$
E0TkMEh5!Hn

literal 0
HcmV?d00001

diff --git a/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-256.png b/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-256.png
new file mode 100644
index 0000000000000000000000000000000000000000..a4dc172e5086daf31fe842faf1872c1e925e745c
GIT binary patch
literal 31768
zcmcedWm6nY+lF^>ch|*(JHc5r5Zv7zZZx<quEE{i-Gc{rcY+h#-Ffr;iTA@yPghM%
z^>kg`SNCzA9igP~4Hbz92><|~%1BG7006*$k3awd+`mH4xy<5Uf#@i${R03%#{S<0
z1Y~3p{O^??D&NEaHB%(V|0*z+qVl2uKz%Ion=vc^;9V^vA*$v9Jnuqm(jRsr(0Wh#
z6#4k<g_ntola4I<qLl)pLrfj35gC!qYmhWaYd0)2q<zNQLdW-GmzS@FB%+?%ayNY-
zaj=0hHk)vKxJ8<^VMrxO3y(fx)&y})CIW{hfEs``(EWDL%&qU+(O%j9+WiUahTjFY
zj>Nh<Y#CHmR#aA;KEPV#1;7#gf8AKn2&{~72qcOOxrQKJJa?y96)uPu6vgwg(|a7R
z4eciJ2erqvld&J0j+yG%ZHu1bEG#FS1qK&XhpMjQ^(D(UU#;o%etfu09DC4wR8U$W
z?Nt%WV%FZI4zGjJOmP8%PL2^lW^(a(HL9WJHv6v~m)+~QKBu}{kv@vcSs&X$=oww!
zkGt!e4?JzRQv%{tkDK1L<BHaIS@ZNF?_(SvYb&-G(|?5Ye-PiKo|N8v5Sba&1l`Wo
zwb{2H`l;~ghMi{qmpd*iIM(f)8T_43xFYDJuv5@Y8~!n5{F-3SZQqb|hBJdM1KuXF
zH2Sa<)P^GmeI7&1;*FZuq{C6j!4^a!<hAg}|MNWtblJ9t#_Rp?pN8A#$GgNIwNQe-
z;>pR$f8BB4?fdFzXE!-=czSwzv04}_9O?aVrtCU3HAQ(??DOuoMZ)W7H1d4JSG?X}
zG_S^2^A$GzYZt9_t`k2y^P={$g-*|9#}2=_m;TnAm&7WBc$PJ(t;Fz(QDl!N8@?v<
z0CIW$2Q*3dyLs6liit!U<P9#5j~E!&VbimR0!4rO?*Ecpx%;*I`X^p7W;{mz$64$9
z&7{Dg+aJOCoDNTb_>s}0g{+0Z`2cJFqTG6kYhvZRL2LA{@;}a}7S%=$^`%;E(>Pny
zm%J1N0B*aa7mgErn>oOg14-tt)00+)f4JjX!%2DEQlDx+Fsr4>aZR}OxL=ic#rsdq
zC!Jzdj?Y_a&s}3|eB3p2y<2G2C|LW8o<`qx116yS{Ch>IrI)Fu`Hz>o<E4$-r{q*-
zm)iD?rnRrSh>kzTn+%8uzJ^qD+*l}4zErb~7v(KHgrtxsyLCGXyGCQN=?<EYaQaN#
zJeg`-`D2C!4f8mjBa1v9=RO_H&CNN!KAcMvLH}((%<&GbcrxGBP4fcwMzd;qsr)@R
zJgsJZe{{$=XZo`_lw$5Re&(h=&A@B8D}RGcx@|GhmE3-EW8X6O-Gi3VmXuT9%J&c&
zU-qfk?kB-520S@2%Il@>MfTfK?4Q>IRxNCk?Me87fdIBi+CDWTK&*eioZ=L@fGBgr
zYLD&V)~}bvY0rDb=_Y>v;S<PL-R2Fn2K&`Oh7BjDkCj#btlAVUq(55U%ra|oa{5ur
zNTHtf1nT7SKW~JW4vUf$YaOv(Nt~#2XcDR`WYB4oWQVkNP;dN3Igos4-R9^Ao;Yp@
zEBBnKDAHqBNQI;}{1~(=ecD}9KURhu_cp$1#&o`lh<rRaG+2OzvE5)aH!_4p+3bOA
z%GxX~t|*mHooid~4WBNra<&h-P4bVEEVV~c@s}{7*Ak~3@lI^?!-(-(a&spCeZ^{~
z-w!`u@F-R%SJ!FyIr0?YE5XgO+Ffq`289Ml=rp$gO8+e!J;+P?wrtArn=v=}fNtFY
zc4UZI0CY$>D8nxjQmsRP0^j3%bcWq?mw4H6n&~!6*Z|tj#_kD2oQE<!KQ3wo4nF9Q
zQ*N<;D0XdEPxn|4PhK`ew5HN79WDn`@-nR7$Rd=EGzI={u*^)7Z6($h$_$sM(%5s(
zT<po8%y@~O0epX#A`o(=DT#tY1R!J0Y%ggBEB{blZZ(OPL^WMz=M!ZplJ#|Ze!HbW
z+48-*+jIC_tlfH-XM{-trO{Hb;X*;wKRPbjEC05hOsro#E!rCR^^g0v&!2n<9#*bz
zRYMok)467~GML_YsmP03)*3qXHS$YsLYZc1E-!x9G};$icYQ2=!-CFy(6+kAhZ=m5
z4LlO0<F&#=^n01{ehn%me~I<?ILfB8@48<yHo?=o3EWQL0h;y3Y~(mi_+M@Re%Ppe
zi*-Ls`}Lw<Vj){24dWV|T+C^@n95kbp9s0NxSAsfO1EXzvG8eZRF_I#TjIpiI;0?t
zyybxl&_06_ZduaxeeHRh5P7R6cIb2hy+fzA8*Z!MDK<DXIc)j8y1%`svwB`^wz|l_
zmfr9254MF;sX3REss3QSl^o+~@kS6Y`$mU%5?mBpyv3!cga2Q?AkNr#5}Y-aX%m6(
zY|g%&Oi<BBiPGX~=TgA-#cLo;m_-}hY%JCXAecWzE|GB7kYF~o^Wx|4p<2J|1$_VG
zPl21v;ZbF2VW=fI7Ijfa86?2*-&P)V+vyykbN`oJ|MxfDRD&x~MiqyeT!fv8n_$e6
z5&Q_gLK_<p1ua0nmX>a+B3mo9!_rjP7~!0$(g|E|s#xL)w|8e0=9MWxDt@xBT5W1_
znbAIolBF1!9LgqE=tOiy>qI*QFOyl3`{xrRa`MV}jdg$vD%&0Wg*auR_N0K1gkv|b
zDLzX_^_2@H2_MyPdJkT~|GnemUGrn{E4lC8EN~{q#GKeL0V)sHG}x)EZ!>ngckLzS
z+5Z~%^6j*hY2!skfc>i}buIWJ9Kq|AwgxHOselp6q;HCl?v6~f9xHY{`TkRVUb{Lx
zzcXSYb_dbM+4vLcMG7PG`uMGr(gnrZST@=gy%X;=h>h&-&AO8<$|LeGcpC<)82*4G
zi~1GsHJmnOIGh?m73_%t{QJkTV~7r}DpKK@hw}h`#4PKYw-_3om)S*bN|WW!+g^D0
zOskg0RI1I_F~Cd5@v|)J)BeV*-@U_12T$CNdG=CM+;0!OKlLnYdrJVMhTFGZWXRIL
zM5Xikc<J!@8{PK~--5qU1fQtPH=e5I2IA?^cXDzYj9ZgOI=-a@OKT^}G+gO)AkE2r
zA9%545%X1@?54dTH6wZnq?RlD4(RHWeUp&~EfV2mV2*V#V<FuppsIf{ZHQ5$QhsM#
zs8%v>T1uJxNhJQCp^aHw%yf~CPM{%j*Ur%T=jb=Tr}GZ?%ZtRIt4;4qQUjYyJQt(R
ztLXGx!S{i;;aKv24cuy&*lL+zO)F9u55INz^F3=$OGkmzv5!gAZUQl0b2!UW*_yN=
zXGUwx2i8~#2d_Mi?RuG-!TqRkKhT9l%ZgJRWjPT{KWUAGaGb-3L(p=fkxV|<otfb$
zMj5Q+N@w{I>=c9-TAZJmm%o2b&6UVi)kK+GZ<Dv4@%(J)*?*ZP;D7mG2(`urXk~=V
zO7Qs;JSwcW!S=#JqelPd=cOE$_PN!4@cA6F`tWC3U7Btei$rTT6bw%0L~AsU=Db$i
zH=<vpKp?c|G_F}NP)8|T?@Ra|=*eY>12Q3uKX=Lv#EIS;gEgrdeqy1wrb`u!Kbyec
z?#8`=>nxIWKG9Z$1>z{N;Y5rIhN3xHNT1+&&ii?hNahpgTBFh|VPA(;pr;`m^2<ZJ
zJSKbqCR{E3FN8$yjBkT_9`Qe$^;aNVK)@k;H;9(3^}*5D#45U1*SB-nfs}5;-~aWp
z)4YWH_6O!DcY9-}g6p^9#N-nK)HwmtEa||bW)O$WGBszZZ=7|3Vmhf2&$Q})RKB72
z_Yp~69g?Z2mB%Tw>1H@ic*WmOTGf%?r>L`l+)45RU+{4C9c|4Uo>?bFlUvyJUbjA0
zhyCyHYkwVN01<&+zm8PcVArmg0yRTOI`>E~t;l&D^KK2hFBSa)(nJcl@1wuP<ro+i
zP8<IHivu7iHx3$MRW+NWl|i;6IA25$K7V2zs=?P@PrI5J5dBH5%U2`FBeFZfCo~w>
zl*Vz$j2FZ?g%C~f)3gR%f;{jF#UHUBI=qtzDp6cmlW#rsur-uBIo-&{vBs%7SK>{R
zxtvGdKy%$?%VYR{YyP9Z_U!?>%K%;EE+73@BTCg0@-!`h2VQ`kpUP`>f9<WV$ko=z
z+SIS&t?JlJ{KOpTEu%*K8!>quCu9Ubq_S7TRvlkbIF?us(1lAsJ%XX5U5^!vs!S!;
z6<2xbZI@on6bbkaNIAt+P$6JW14>P+uCW}Vb=1{0^UdA&(guFLO!YDdDs9?SwWafa
zGW0zl?^$MX>y&(|vVk()fD<I&J{Pw5_<Zl!bUmH>6XOo&sRQzn<m%);_=Lu&WcV*0
zU_%l_vlFz->H@_jev)cA77Z|AR5;+)*43i^5{zjo&J5E!_C^s)e^(q@W}e;#cj9+a
zV|-1B=Vz{vm^2AAC0uYVj^Rnv%ZgP>z18-~L7~<MJo#BaRJeZ(w0=w=+yDBT!qYWv
zQlXIH^k2Rqg5Di3z{JPxX_m3mMQQl+Mx^UJ1K-el9i9oAxjqq2?z>Vv7fzwcMEqlZ
z%xv9NlIkwQAWT_~(<fUsr+HvIkCr4;J-ylAdB5b-L9xpA&XY`;)1TgJEVzTbjqNVB
z;rtv_+4-|oA>95TsX*?(bCqwNfBy@XU<?11xRy)5m^&?E@vblin<zPN%k|ahcyPJ<
z2HOAcFA-<dkY;T3nat{vHL|iqR_vg9BuKTr-Hg$(x`7zP^~H7b>22MA3Vd3!EvWin
z!r+Zv<RIxS#z$zaFLwsmbcFB45)YYzraon&wHs*v4wc^|0Esk`<AMUE@ukIlyUQI+
z#!&c`%run6M~wO<YWW#mZqAjdHA*`cIc}H2j$BOWMVOu(CK2;p<~6uLqXiKu`ec|;
zMJe^LJH&HzWWwxSdYX`p+bdPR=UBkuBPN$F>}%KT3AzI5>BFYRPC*7htZ!CXzdAWN
z`Jv<Tt#>Q?bmDW5VexNZ7MP2&p4wBi>LOu>J=@Aq97MgBi_B-2<%BmL=G;ymc{KQ-
zy<!u{o1@H*;taDG$$!U{x_`*r3!&Tczq89oGyehuJ>mx;3Bzkpd7vi`!S{0ZjeIt-
z?>dp$fzAt2C%Q*~((L}PPhbRvv6(KVz8sLsZM+5#p_I=rgS+FY{uVeLW*m>eW#&UT
z)B8h7674$)U_vY7v-#`iCMlg_&e_>UN9*}%O=2E)P*e-@ww>P<k7AVIhsad{`TBjs
zzXgwcRAMoxO8$nHtPUH&xt+W7;cDN}Pt=C8)+YxmtwVkebv$&z%8ZiJskpc(C1Ue(
z#^K5~;BgF|A7Srr1)u00JxiUF94#qhD<OPej1(ajG9n8ObrP{DaF^G7bLQ{}5Pm=Y
zBIvi!S)`ZHGO8ZwW)?pq!Jk@SZJI!F9HKp|5T3==ZpSBbp)sN>>g=Te<na0OZ0hpa
zI?ddb-!c2r6{g_pbl-L9Tf4s3bJ_iRWpaU{#}yH@%Z8rdp(yfxH{t(;+;f}qoIdD}
ziJ=cR8FM*yYet;IUkoniSM3Yd(p+qP+r-i6P-3aIp~gWpMChzDz$K8pXQ!!2nyJGQ
z6W_55GmF;tM?$Fz#sPcQ?WUWwiRJS8Yv>ybFR&#FTbL>k@N>9Pf`k%dD;(zQOP)da
zQtkHFbJ+X7+lP!@KRyq_E}vz4nk=Vta<~y+@VzyS{wVauLxpJipFY`sF39>nEInWJ
z^*|&RW6~Z@u#m)f^3L=O83*3ogYQC!3N2(hX%0@2D(SYo$HXBz%|c~moi+r}%>`T-
z=>(cuyS;oBVjVWopn41k@HV6Iip!v=|MdAc<G?&3jDhJR)PW4F)`^i4G<O;o2@Y82
z!U9$hQJ>tE(HhI%cNhLlTxDJ>EZPY(+I`CEyc1I%#ke_|;CjyYfiYm75kH)d?EJ#A
zqTp3+&+V4b7KeYP*tWIqpW9h!10wwBb;R&umxq_v+P=8+tqt$<ZvW9n>P1aMa7yTQ
za?0-3RiklQ^c9miPR$>g4P%`5LFRrU6KO<>nV3sxx(w<bTX^whhQUTb1#T&AVPm5%
zoxwvn9d*unmdU5%G$j<oJpyWI;#m=lKe#V}$}2=WZwaQOY78t6SW-B=@hIT{NelJ&
zg#?rzHIIwb@1wnsv9E+h3}RhgUtap&@uGrLz|L}jR{h5l_my0<kluOrhvP6?T15O}
z3cvnogO-|}kCzLPrzQUz=Gyj;hiUP0x&%7vLOhLK2H6)lqSWhTym6YJG&Z^#njnpZ
z0T5E@^?QZir1CKUv^>qn=P%Do4lh<`Ns4)3mwbv4gcsN+0Y?bydb?8*eSUau(<yP^
zsb<I@gB1|&((ED82{wT)cDNg)uQ(Mimrtj`gc~eZ!1C2?V1-vCf2tI@r}IDUUlP3N
z+NWV9eEJGc=U22M$qsxwL2v!zEbDD&S8YnO+2ew4W-o^gQb4Fw{mbKu3&M}Wir0vy
zOdeT5IjISLwP_ap1SBpFiQt}M!{M@?^U?asynZp%*r71e(Wuz%#(YpX^@ALV^K%om
z!ia0w0l}g4@IGJUp-hCyWW|m|B~v^wENM~L?ehx;A};dV#FZ=*%hn$S=5s(L#Oy<z
zmB#tA+|)@4R?&alA>m&Ku;}U3;?fXdI$F1`ur>8v)rak41#<4z1KF(8o_IjWu#B^7
zm6es}<ld{(!u~Cd=V29=!szdNgfWOL@5tH-ToXh|5{Q9MX=GfSCi-1vm*&uLE{1$P
z%aDLcqVjO*-Q8}Af(zmSb%I7rqLAPfLm{$_`3eC1shS5*B>m~tXqyT$qm2(H3iOVt
z(?@i#yiuIm5slrFa3Z4q#7*h}rrD?bnIf%La__R;c=xEKjT}I7JTKfZeRf`|bn!js
z(tGpK@E^3gco0VyTEJ6OC8$(OYeR=1WE2rLFP`47wVr#e#<HzyAmGf!*%dGFDAaHa
zPK+QDzF2O`1ZA2sDRjiVVMQ5OeYF%^;P!w@t)#A4jCd^>4}H;A4++oZWx)xK`UORE
z60dj{KtU!l;!@^BN=TOIwWpINEVDlU9$U=ET>RPO`*W)aUE^rt(fIynSl%OZDfq-t
zg_^i91S6IYw@O7?)Jadv5x?kA{z=%#hn8VvOkkLGzv)_^^(`P5M5b+Efc9Hdsj~ei
zJZ%MV=43jkuEpP$ji3p?JxXLh_aKqtzecsy1UZIU#(>{3bcStpk)XU9Y*SBcBJQ{^
zGZpY&zVp<8QLQ`I41Avs+O-Qp;}R6Q(+JPVCut4JY9i!$$+dBr>~@jkw43gm9(EQe
z?5uB-q1;#-WH=Y>&zx*+7N|>`js%Xn<X)_Tu&2wp1^v97pRzb_T4${?5~#>1>6+s(
zGOQ(#QYc}i-Bl5MUUZkk@yThXH@54Mp`p;*ROO7r+?BqxppYRQZKSuBT=h?Z4!84+
zXG_-4L0{I*_qkUbQVUWB-cW*XaNyhsbfunw)HDU*A0dNgDL0v7@08@K3iX`<vnSsu
z%c85$^SUnq_fLt`A>8k<%&C#CMdtIu5%nR%I^^a3wkb73CQ?ntd7p3db=L`r?HxkU
zU9tZKt^SovvzuP~&knNo%+$=uV2ATxiU=j$0TTt8@Ar_@VFlrr<<DdIk3;m!)ke!b
zlL6T1dkK(S3?t<sIq8ittM$FZ`KHqzv5CE?P@pnLH6+adLk7X~ub>nX>SCs2^G601
z+s_%{Ho!R@Uhhvw*@BGv1Y0~GwTzzCCbUUNIcNw9bs15B8E5_(wXeUK1}4xMBf$I(
zTLX|aTG`qSd9c+q{!8Vy;0w)34YPkd!5b*0wOwMz$Tk@A#q>i>A>#>9Y-IUH^Gv|1
z>Cka;FW`Rh7VetT`u1-$%A~ENLpnoX7csJ5dVDQj-G>J^uuJG@7hJH?4618wX0HdP
z&{4m9Ll-G*k+#c{>NO=jLSJK>&v264;Mg=tj4G9pR%SD8x@Ck?eM}HBGl1zy3?ur~
zQ@FxAO#Xr05ddm-E{~~v=8|J%uO(7+M>wtJN9xD?v7!S`^GxJM75)RqX(ie!w0Anp
z#)Id_Aw<Ja{*{1?IGvF;Ri2(qyeG&<c8}+7MYIL`%GRi*<NUsVC+^Lh`2{LvJTOr}
z+9kmOI^i8%BiZR;BAwHcKU?F_l>-YIi!z=uD$>MsA8EHNhh2poDG-J>usI5bk$BwE
z;+>1>aZ1}kK;W`qF#A6@1lsurORe7TQ4ttJzsJwtsUk&?kXvT#M6q~_&Ld>D2V1=C
zCHPN%74TFwI-Ebyed7>;Mg)5NMXGwx%v;_nGZ<iR_m;X}HjRW0c1RRBzS1@D44jFC
zlBzmtaEmsL>T%Y^gxb)&6Kv`Lp08H{j^a3HZes!v`y{d<E;ar=Tkb@?J+F*C*IudN
z22fGOMC_E~Y|DkYkP1i|jT3oF{!>0;PALf8(Mz)T#RzrEE-R$@gUGj*OsTAyM4~S+
z&i^lC`LV2wT$A#D9OAN5y%KTX4lS>=OgmFcfnj#y*so?_&ggOci?9HCF<ZY6r(w9N
zo`>)s4|cfiUsqo21uU;yka$VLpYh+}pY`F?rae)-_7QL4-U!131lcp<-8J^(O+^iH
zZJ?2Laya)6M^8KjKAtPLKJ3gCbjcyPIisVH2t+``=RY=Ni$=mbqobo^!#ru|ON=0Z
z15Er2RXBqJ#HYWF^ggBze%gg?04qHlxIo?aSPK0K+5uOYm1)>DOJt5L;dqp{Xpj;p
z5AE@?1z2g;Xi4FdqSA7-=jN7W*R$<bL^I*Nn%_R=ww}H59QMDVaI%v0ZZG}Nu&U9}
z!M2rQ=tcO3O7V-^(}eP=2`~<eh}|M{iXldgBppngYVSpw4ty9QjgQ6F8=&2Vx$6NB
zgDy9_)=BuBi|o!<GQnk%Xm@>7EOH-Nm)f6l7+K#(335fcFy5(QH`L98!!zr;(Zz{E
z=~OU43%3CgVz9Pra_eqA9R5=PV>#yvEYZzYUGFaZoKQ<BPS(r*m(d?f7yqqh2k^CW
z#<^}SYy;-kGXhFS;Uakx#3&C<qf5d(c;?ePB4B#|i2>^kQ((WZfN!lX#%%1Ukh9PC
z*GJ-;4^|eImiF`2^)K1*O!%Mei?FhZiC9V_<0r=i^|wqx)WRXGaJXIvpF6lBAfliO
z!tq6r%!_49`w0RuKOXz0MhvYq^HrpE*E2Dzz{5W(V|;*0T(_p*LZpTssu46c)St~0
zXRXP<6)q*`D=c|f{Kjp5+^}&XIBhe1QQdog@9OS_SAlLCjZTY&DBZJQeLBFVk&yo5
zaH{z%&ps&%x>j0DSi}9dA5M;rhTSufskY3Ib{_rDg~0MYyJC65p`U$He{n<8XF2P=
zE>2Zl5fCbWP$+&x@rxod;teeNS4IBt6*aordh1<%g|2*wMV4De3i!HuF8Tuv?zcas
zmkog>$4i_w`heD5a}i@w+k@+O{s}er0Bj~J2?MfK{pgF$kdP3(YqVD&EiASt;fOy1
z<_J)_V&d)1rxNv?m{32Wp@x+8H@)eb7<INB)5v`j+wbzm&4BaPa(v)LTr8L!F0hau
zOTmTuC!Xh#6TzV<5`usJMUWvZhRP#_lm?(sqMeN=xRzq%2lihS7EjAu*O#BfI>>I^
zzLSyblE>(;#>T+#sjp;VB%LyN*ciOw|1n&eE-kQPq}p|n(ElmKqqLjvo?V)!-TZYu
zM11_z8G#rO(S-~Kf~f%t+5F}oo3j2lo^ia!Y@h^vF_UA=0W%K$a;9AEilIVgPYyZ;
zFdY(HLJ&<ze|X-MDJr5WV>~4K5$ZMwr_B$I!!vM{)yy^6AgVh9#Aed-IxPTCT<iG~
znB}V*GUb{Q+HFUu;mtTR?5Fo3%E4XMJ$`Nu_<1;2U;^^*Ps&#S$*!f4^xD0j<A3ho
zT8YvyZ^sB+KxBRo_yBMQz`s$}TOAxW>h_+e=cV_G85JNoDDrWK(8_gsTF<hmeXVGi
z_d6|v#_7Ah3!%Z{%1K|3*ZEY&8OrJhPpJtxffTjTjrZQH0>*ZqqXM>;2E4&Lt!wAM
zH;Gn+puV^Z;)n7jxuqmDHTVSk*sB1UV8Y*j9iQkrnEcG}uvQ5Rj@AbleKZ87pH4B;
zq$9!F)--CC8>yY<QPq}aJqtCWy!-uXEDo~QXUS(}FGn78qc31#*^n<BnOb=CQPc6i
zlsP>ok8O0QWb<DChgGM4*{QElu&0OFGeRPNqo#2%d(&AlC0h}VR?A>oJER`eP;x@D
zjx7F72=m5J2;>l5m$Qf5TLTsscR|G={k)kwCm7MNJPgI6izRSLfefXEDri$tfVyNt
z<FBz4Wc&E*T1LoV)fnwTksHOWL>B*KAE)$5fo<hU?Uo<R%d`mcG~9%bH+qUdsquy+
z5*jxe2U)eDa9ZeUJ%O((lRNuCPy<{DyDsklH|!c>o<5gzndbe@iU=78CN@OjGoKWq
z-{JOaszu%%-n4BFtPB$^?Mfb~sYaeix;Q%)VYU!VVa3e@r)T^#IET#WhaP|JzM&)S
zXfg?iw^Qw-Z#hLQhu^g+Pm-7I+F<Rqf#q=m`9G77pUc8Rn3h`_0d8<D?;&JWQ(3M%
zeJ~?HArnmyFM{$bQ{PH3?jX=AUXC3>#<W0@V$OSj9T{D|cJaPt>g@LH!4spksrn*z
zk(^O9Rv*Ad^+F0r&~ihbfx!mbd$UoEfB7=Si4!2`8HY~Gz<eSjCSNv>YDE`OItuf(
z{R6H*`6-Uk?;|7^=9`J6B~+D|3hGE(q$6_#Vcyax!=Y)LEh&$^|CrsQ&^lNEnzL5F
zaXn~!iX@I#SqFPak`C-awm^=V@N-dCT*i$2Mexloyn^ibZO;e8t^TMR)^3<{KwS7#
zqMMx89;kF?JZue+VJXzCQpROabO!yA=Up$Hl<R_F(;<Mn^wIOB_@{@tMxK>z#Jk{z
zEuhF`RSoi8-wOByG<O(!FTlR&G0b$2VSUJI8W*>onZP7z_bU-ZVU=+ttsMfHOa-dm
zq8TYjid#S-Hmw^QqEU3^ZyK2j-d}kLUe_TJL}RCmXl+WpM7TK0&8O?w(>o1-JxyGy
ztym>1>7lH(hk&om=oCNRA}lLd;^{W_rodAX@3sEx^haI$IC_z6w?2$&hD|oD@TnUe
zcs6G_>9QCLcM_)IZRE-|)IVVD7fD_V&lPe6kDILhi#}G2{~Q9p)MbCYy}d2jusMVR
zoZ6oZMLcu13?|{=-RF;<ZCXPwK|(h}r}rokoVi87k%djtfvoEB{3cnovOO5J)-<P}
zI|KR4tJ?lzlqSx)mIf%|ur)U5A2asNpw6%hP+0}WR$ozSRVcguI;v7w?j5qEItgIF
zhqhZ;+V`l*!d}n7Ek6EEEc^Agx`^_F@V5jE02b&Jrm3M3Ei%O+_!Ww8Hr|Y*5hU;-
zTT$@eS5Z%Mlw6AplaJaVw&;^Wa?=3=!FxJhDQLE9v0h`Jz>9?}7lnO2n7QO&#Bzib
z#*{-${>oUj582+$Eu@L=0`$t#sQ~Sk96V0q2`RTaEH#dV#fN;k5H1dIPb=;<HfdCr
zVz(Gc2;vKW8Lp0?rG-T=KlEt%6j7fRy_ZP<c-hduOIt^WW4*<$^5_diw$0IKBp#l#
zyjHn+NO&aBc?awCU*j~xAQ&?dU}XZE&N90hwYH4#QvO^B)&SJy+-E_-vzc$S?di_t
zs$I9Wu^-eLVVbuu`jW-;>iKWizw_Hpp`CHIrE*~b7VP&qM)5hEJt?yV!P9;P(mL&J
zpC7NS`<w!XX#=Od7i&m>$By=Pn{3EADHY<hwNTkSxc2+aCiQc}i%pI^v0BbvB+j4I
zurP6gzi%iLUs71AJa1VCV=Z1Aa<ng%U>?!@{4WPlF9`9~1fWmr9%gx(;Fyn^I=Ckc
zge^^QNMqVHh**Z&k24H%sLnkPr?P8$i~0hFYk*hmL)9ANu79inQUbug6+0kqTjhP(
z@na@FqZ@(0d97TwX<Pft#y$L%Y(24bOtyE$7UHCLh8}J<>AC;r5dK$<jd+tP&t$jn
zHrEhtYgem&pZf2oEuGL+z?mKXM2}kYDk1<p{n>i!^o%xO2?m4;j_i?6Dv5(rqFqyl
z6p)A=WD<#tv5-lxcVq#vhD*8pXNdpC?MNhZb4!p08U$lAG23SQzOO6vJ4V-D3un%B
zVH#f*;A(lc!LIY)E@5h##Nx!ucts4!ve_trrueVpVqwz|q6KhPtO5VBnftql?@Lpn
z$p^&~^e4EQbBD3z4Vk^?4=b+uZWn9q2_RM?xZQWv8YoPk0_pL93GcHH4yXELDuVZq
z9|nI<B9ys)yK0U|*Fp&Qbe!s5Chht!E!X&hC#qjpvr5DWw}SOa%`_%JUIs`AO>xA5
zsstULn}i+Wi>ntlrFhV|^&Nh<JcAWhh>D;U4ueHBfoRrVAZNXl#?a<ad<A<^6#K$3
zT#!N#XoA+8+C#zxxJ-DMSaz<k9(=pLwS4Q`T}Jwa<R7=2C)ORgf9tG@goEH3wRtr)
zHbr)4=;~9E_0RV3qn4m|tQ=QMU^R4gN-u<pf|tf1C@_Rzvq^2CYN{PatOMyv>4&-`
zikSN8Oy)G%(NFLtJi%!O&x0LU0HgD~WqqRFbXYoYmhrd2m34ty@PYqA$4USgd$9MD
zsx`-Cy<J#)nkx`#Fr?46vF{BLN(C|CB;T1U-HR?o#vRSf&+zk#)|N`w`%@+IEEDEG
zZ(nkakB$wpKWrKZ6HwHm_Go=>E`fAB)dN#<``%M@!QqB=CPLfYZr|R-b$b78;83z_
z`4H1ac((uiAtB>)rY>Wxb~}|KJUPm%<4VPSNMrvl-OXBNmX{P&4i~~n!cJn&XRI~J
z2s?(1Nv~C}BvK@%iEI)W;QD}S;CIqs=?BHB+Qr+nq!YSX2M>ZVb&a2C>}Aw7@QX)>
zqCkE8>m^2reOPL@_EihDxA<bSi<lPHbQy584aGm2#4j(_`wU2mJmzFr<S;giXQfj5
zWQy|Man6^c?{@*#5>y3hR5tj3K@2M|h3laZIk;bLK8<v=vav*fK0MP6U_|`$uzk8@
zd{2erUvJe4AE7T7BTH2xT2jvgI^Gh$dZVH~sKG+nSNws$RIcD}T6$D(3vlo~)s5$d
z0DjXS`u{a-iR7f*H~gr2k+})x;z6CenTIcWDMUd?xT7dzk;;o;&(x9~NS;XCJhhxc
zY?)kptrc(^WET-XY(E*Kc$w-o;#E8G#_5B7yU{aBIG|%$c_Eud87lhA0Kx{nt6cFK
zQPx5y7OB|Cwh+;5nqTQE7}G=@DNA)P7q<mh8C-bVwvY=<v9N$n<3K}U7-B*1K|bS3
z2(sFSjuz=knlsA9c3`7ypZ8^L%Pkf38nd)_4I~#GHvwp9emxuz0+bprc21uQiv%I$
zPj2rm=pB&c^KiCWhbkS#7)?mK15*Sfs0D%sfiQFkwHliWf47uUJ|*=HkubXq6mMD*
zAO0J!%eRJu+00QDpg<}t6PhcwNzljLRESOq#z=o~ZnK3Btrd2XTzS<Ec-ExM67a;E
zI6g6@I(N$?m=|K4bw+%WgJ4*}l&6K~G8Y;;LnR>gin=C3BCG%(QQ$h$;TQsVz$QHu
zN6LF2C}p^gsNzsJExH_1S4Lf{El!P>K4dPZLqe%M>Voq*xC;lzsuc>XenY)w)bgO}
zTbtl}oE-W9=b8y3sbXy{YOo*#*aZ(-sQC^E6alr2_nhT+jb<W3ps=qz0T)D_<w;dn
z^wI^BfLgY|fwAQP@yVhcr=61R=&749mj#J$+gZM<WJ?0{dD#Iw_Wa`8Bq(scU%C|Y
zLGJQ{uZ}<0nu4-~j*OI?p%nUH(!#OhYGu46G4?IFkf`s!s|!;Z_m9=mBLwY#aA4rs
zhg=B&m>*T*8h(OzhVvA@{aj|6oED#<=0DPS->FX3G~43cIij2Hn`O-o_`Atbg{A=*
z9R3DWdNuBgNgnPP)z<ZB&)aV<AOKWK3WU8Z3;;*)@yc$w5I$iTbk#3HKxNPbN$9Y!
z=%ozZD3SD|a#g=_SR4=L>Qo5BPJ~=gN<^hZq?R!XrWmXX>|lGiP+RYf9fy^B^`|!j
zvQs%wmNC)ZkLh{u^GeOD4onFanDP2BOCwW+iaPX9T=L^6SRnriH;Vf*$N^3S%NePg
zzXXn!igB}DF?N30cJ(7wGakqhei~$~F<~j?#R<MG_pqD2X@aT@ixjSa$5=vlgE%9m
zgfU5&$1|1msZDhY#OSvecTMAXS;9MR$Hw&En4W?S33JZ>3;K1@`FT>83tg`i@^^%@
zMQ@q$8FU7P#(r=llFM=!IbpS_LU&G7x})W&p*a#X8UT(-*?R~L0%s*e1jL&~KKF^?
z!HCMK21*$J;HUU>%kEXi2+zv_hD*|5e6eVAFP+kI^?!O9nm2^E9rnH&fLG`<TJ6e3
zzs>es5$=G-vux>?^M+e<5hMtg3<EQJVw51r3K$UJ59Sd@F*~lo#=W!SM!Po9I#Ux!
zLamXDU$Q}mtNv2LcD~PyraSw&P+V!#my-5CoMwVM55EZTh{l4yC{Ic(*9akMJ&|NW
zzAI;XvCTuVB-0Km6)}I@{)*9Fg1p|29CO_|$utwuwHR+x{+)b*8?!Y4)URKW)meRd
zOHkZ3<Zj`C50L}RRn+r4!4Dx~Bf^##u3PJAYv*Ij?}<xR(Yt@o7C}v7QyQL4mg2&3
zC1^w8-4azgrSMat0oQ#sRk&uuETYZ$bZaK>-lg9D!FmoYkSovo@yndI<+IAQvt{FZ
z&y0SsCnMnCyjFk;H2ACGUB1MZa!6hz7qBzPm}Fwm$#&SN!@TBeTxny`Iibi_uVh5t
zwQq-~q4}3#GLFR<vEi-J=cphn-ZkkY$G%LVKt3%iqKa!^fp0LT*be&iOkj!4A2qJ=
zvOG2?bT3yQZK7vo15Lmp)#!K4#|;GI{%5dDZ0#7}J2t`z*p+#!J=znn|H=ZR0i9Q-
zGmOzRXn+*$P0Qwn+MBCuR@3#M)BW?HEFl5bI8TEwzV?ZMR)Vx5ILT!&ODFvDGYsch
zMak)mPr-5YLf8M59)I(nFLwV-;6I`FQ@WQKo96692Pcc!>K7MU@OlaTEn}gsHgV+Z
z0F%+nqfWJ#kU_Tr$|4WQ8qkz>@lSWHu$$27rG^TC2-s5m2i<^!Fohm>s_K#ik=yfu
zg^%=EZXY;Eg}&5uQAaiVBJ|R!zt2gpY{S*9H<h^z7+qav^T*6;ynLtIU}gWz`tiWL
z+LJwXdYMK}=?8u5%z#7`YIZ1D6lP0KoHqyHXk50L3s@)jx%$m;S=@~bhJwgJ3ctm+
zd-H|YHSajcj`_XlebL#`&YlH@AW<*RBVg*@jDxSIi}NslIoshFnhCwL_Pn8AwJGtw
zk1z;+d|=NQlC{l4T3zw7#uPU{>}^YsdT$Uc-%`@_0*g-Xiknrs+nvjYbGxy8M*T^W
zU;w~$Kohc&JB(NuE`t^nts=XzIgYb>?XwdSZ^ln;3ZVw;7a%i*Mh{UGmZlDV-v!1M
zZpt;WqCYdq>lvTN+}c|maA`Bw)$Joe|9<p%nLT@3XO58zj$YSpCsU4OpjuSE6lIWx
zMD?Xc4gjSiCsWz|&wf0tFqMb_s7YL~_UE<Y**swcpS?-{EmtD2VGTy>rA;yeOfz{w
zm0+^1#0KSmlBj!7N64lA_O!k1+4OpG&<TPYbKJ+a+zxhqm^JwL&t2E-WA$Vk9UEHK
z2N7pV5YV!?(MrUDmH5|L2%mym4|AW4c^IbdK`x3u9njepgZ)3C<^=dBWH@KL4-*-;
zs{p_B)6@Kyi>ERlRq(Cz$~K|B6qg#a;1kt_CWZcGRdm83+gH}t3Iu<1NB-2m=lGJZ
zet1l0x%F^?w!+@H-}18dAcB223Uhh<lIU?Iz6oLiPiN1NRebwGHfXDmy2fb@P^ADM
zNb8UaxJ|^D{?L)8A_Z`uGdN1%!1U+Fu_fhqp@^l8X+{+~VrbVli78+jsj<s{ce&7s
zMbB+SdkRhbS=s|PGXFU#!fzw=E)><C;`%VjXSr3-{J3%4lOBq@N59J-B8FD@lv6=1
zh~N_0z+V%@nE1m+BEunU3)aYAM}68O8-k4tL)kp|oh>z#ujiE%_<5}W&0sHiEih*2
z)Xt#vq&P#PM&%skR>2H#_mBtym0GvFp*4_Tj|-ii752c5)%kdkqWK=F;jS*9XykmP
zlh#dEeCl^>DMC?Pm6iT>bo^<gGZ9$7>x{Sc>qfUVQ1gC@35kY9e}hu|$qsy{d!hi{
zwl5wq3J*Ss+-m7PF;g{YVK=q9)A}J6H(V23uD93KFDa$#X{Mft#|`&9xM&Thk<0Kq
z-QF|9=FgTJ+7+zkLAtMQ0Jfn)XleDNB2c|?Y4vd7;}^~2JvkN}?5BYASS7M9{&;No
zHe+zHX#(qiRAXGfqJo$NvxIx;pgggSs>atO&|oMI=*lHoq1AKbB8)cYupVOFSMuLr
zgVk0eppyDcwMA|flE;`&8;x{q_yZ<MRi(2C`(`Tstv#0e;}vrdClncl2JcwyI>uIC
z*&A5(zX(o#QOBB-fiwu^&JhO|iN*jCDzl|k!@7wMVXB54_rrI10R)_)ig#d)aJ;Ub
z>bTzt93_ij%yjNpbHN95s1t6(m-vT+PHxCW-NQ!><7@8}Kge=x&)=Cu!Owd2{O|N^
zg@d}^+FtIZ+5~yFufaG6NbE<*R1V)APmpu>ekERrk4Y)Ol;odVZ3q>c4<lF(u+!En
zMjJN49LBw5k=O#I(9x0$Eg=1Fr)ADGzh}rh%lm4R=lnn#v{=fcheO`gqZlKMbg^nt
zTm2n%Y#Y7A6jdm>ZzgKFBx`E1=uT$@bHM<64}ef&fe(q3j)w?ieDUB|<=f@_m1`_7
z%@*$69hU=X_X%%jfgJMy`U2ubjdZ>(B5PEaekOTW)&`J#$$G4RBoF)9=-YAQPSStr
znDcQy_23ZS$a=p<!+2@crTPQ|c~=<?QlZ$8GY|<IMHWdBR7#ZtWc8*jNw@)z=C=*(
zoE)HUfaDhvYL6Dg<WX5U7<zvzYs?}AyS)gq0<H-{v=kfwc{zFXL02pTGXkmh0?1c^
z>Q;Fu|B}JY^}Kz^WqX~$<#BGlqOW^bso@Nav!i+uy`(=vVBfzVNkI<^Vx3EOp~z6H
z#y=0v?lEdNaWR61Lxtd}%mKn%i>%qu(pJX7!dPr>De$rE1<Va?pV(-kz7o!Fia`h%
zc&u7Uw+eI^>{`RzO<Nu{#vhoDca4@i)~8pMY5qg!m#mZLE($#{<!L>EcY9@8TGb)*
zXiCZjnCxGqR<1cRxb;l0yvkqtk~Ot)n~9RQeDNVvK>fbr4;DZr{aIU_o7_eExBNiD
zW}nJ)5zO-xGCN#O(Ywkg5on7FSm4j*MkAv`ciqAJwV_Q`Am1yR|9+A{Z%~@glECYq
zW1-D?az(WRk#j{@DwSftX>AD{bbt2*_sTJ&0d!k<t<1@<P|8u&{QDwkC&DO{n{}j&
z{}`|%2txo^Oq#d~9vk^qcAW(ybA0uWxryay`i=Wm6atth^fy9Id%WSB@L9iJ9(X{q
zd}F&l39>xR7TI6)EHlmDNbT>pmU3$QyyW|Q0(z+AL*QLwtP}C3LL_G3lmyWSc?!j;
zf&fJtr#dBe;G0VM5bQWZI29rYPwpLS?OHT|xh!hEvG8P`6$U6*4+V&7oe*>PCMFum
z|6l0-#D;#nLF;Oro)Y$Ioc4Sngb|_{^B1#~npaFAq&z3o7tV6i&y!`kqY}oBkZPr9
z1s=a=$RI!>NRY<BQ^=1}sxws{cm>eZa7t-RT!0XX6qcU<LQ*YjXq*}**F2wpmw(gP
z9-&i!w%6JU&rkLT;#IZxur(ejT>hk^HFou&tNWzS`K9SwirzVoD#)oZ)!UXf><pu)
zS){%*CA$l^@6J%!JUP!1*)F*N%B;LtZk;ZL!Y0>u-*9C^ObZ6g2o$<dr-7wBBsI}C
z4Tt-&DL}HhSkwzp$~<cpn4@Q%vkF;oC8RMrx}QfMoKrP)ivH2ZwSw<30m`Db!3pWg
zOp^f)*cr&r;u9kCB-8?aYd+xSm#8j=g(gxtjfM9;eWV+p3x75SCNDEBq@9%?SWKsm
z2sLtBvMmp5U0FPAI6SJzU}t3W+c!Hb%XevA@wX6XwzlQvyue$`^E&P=BfPViq89cH
zHlZ2|$F2?%!u0}OAc_b&a#oq)(3|Wz2-$bo=v7m(3mx)it-1%jtz;yO5u6QqQI}P!
z|D;GSwi{|}|8E2t#D(-F$yVcY9QI3w+5MH*Jz7um<1wip|A1*sx?xp8Jw|j4kNmq7
zgo@K9WA9s9MW^=rHJ&I#y|qF2p`j-k<a0?118FZX;tVQW8oVPiSgj68o9@?W5f0f7
zskXvGmBVea)N#ChJP%y`MTkGHnBRqSG|w|?-8Jgt!1IakGG^bfViu)ET7=e{(Oq(!
z=tJs3Nd%CDdePkhYzXp}YGC10TUXWQEg^>OaSa2x@&eo>YJ4Xt(^AT_e3|7f#=2QA
zzR14pa)_4|;?}^+-i6a}lP@|b+$LDKK4V?Q8@=&gQHur85yAJ?LNlWKaXc+(2aVze
zwj8&y5W+CsN>EyDI|qjd>8JIe8g=y`1?_-JG11c`LKrMbz@j)3ik&@(_X9Jx!BX=7
zm3r{wB78!HlR~HCbDxj7J%LF-IO5P@yPM<z55T%LEJL^i=^F$|HEBgc;(_FwHH)+A
z)1A)A6MxUXKG}6Pk13+N7vkN<V_Y1~Q=%DNM<rbaWOUJVaWzj9{a>{4DXfUe3t^%i
zfF@XJuthVYdwC|H>9X~o4UP@zIYgCDD*}U=20Hbd84iIz=K;^p$IpgP(ptOUYiAsH
zp~SF^GdP4Is-vp+r1#&MkQmzK#>ZXMyuii+DtCoA#R)bIMUvQc+P<T<0JBoTm!rZr
zV~qam33hQ78qRrINsEJjq|p#mRJcSy)b^IgZEAEZfKnw!1H*wD-}*G*O&!M=^U{hI
z{W3ZhvYlUakwn{g$Z>-!q5omPlCwK+l-dmk&i;D~$Qnpcz^1bf^Avqga0TX<L2=*V
zeWxTuDdiU1c82r%W^#>+z}RJOiIgWS$H75JSH#$YIgvSs!C4~h4&Ow?f|9RiW;QzM
zItQ2VVTHKu(O>!l5ckP@Bw<7u0De0u_+$&Wv}*CJu!Se{q!4ln4?YwY23I)#_j=7|
zlhsoJ7a-ER@&sgYNb^)cZufqzvkB=nlHL--m^db+m%1H%J)$G305qQvQujegu6S0-
z9{yz1Y|l=eYpRU3*67+exnx&iwkv*RwNspS2O^BXT;;<3cnsH(+*@Yg(`Zi^px+IC
zFkAsfgonoE!XmcL_x?qXTB1kzTHQZnMEcTy(rhhbdLayxGw8hbA7EzZ@2o?t@J~a@
zRADHzEy=}_8R*UyqRd6KZ+5q*S01$&rp^pkTE=HN94gNa9p^1Ii%=G4=T-MWrq0vX
zkE{B;^i+O|-N$}k4sEGm-5l1Y&jT5&?Hf4$%g?*8P`1X-?J1a*Gj2BA=}SaT0r-V9
zW*DsG@uWW*kw6uTFKolHJ)aJnojQYa^SSqKVp~f>B&9*#CggAwAaskKt!0%Iu*tC1
zkfLczV-r7pf}pJ7%KyIZjC9e~1zY-Y%FqP2SPo_`6A`5S0xxP8EFl1xT;&i6-(TY>
zS^7l^6H}!in8?Z;Pbl9F@FKpN61XIJy;zdi1&qLhx8P6|z+XU=ylP-+Lp&IEQ^E+X
zC4Hq7RYX)h;C5DED^=CQC_V3$VGKhjdS6q$I??a_REH38rMart*0z_r)n50I$RJvh
z6GU!ohn+Od=7?v&+WEuG_u4Ud41+5fxQRe8d%KZ==?%wsJ>=_2rJQrP8jQ4!jlf^2
z=+&x@gOW&Cb*P4>5WG3n_CD&3PnnF#vC`+t7A0nd60M=9PMCj)e~;O<wcqXEkmy7`
z5=vF@l_7=9VkQ*)%&oVJ3snXstq5PB<z3~a=vZxkhdkrA{yx(V%W*~0@B(P82V+>%
z6rvU8$}Gir&Ql%l`MDAaQp=92X<Pp4(!w7E{>#w;+(4FhrERruJG~)u8o52{J@g5@
z#R_}hMi@qNxg7_8$7s45`8utJAdj0K&2xV}!}+3}XZoy~yb%t|bb&j-pYK>*ma_qK
zGHqrO5p={`waHTDF$xbQLkF^h4uux=n1O|tR*SPqBc&3L#quBIjg~^vtFOB&i9yG|
z@U-JlUWC4+TlWHOF_){kP454f8^nrjJ-xt7c3(j-ziVjloeV*YP2CC5=}xEso1&~L
zMai|Mk{k-6DfXw$4!lX%$SUBKALDW~7aQh>b?=Pt3(-9L&=#@e_(0<oEk*;Mq{()@
zNzro1->N1iJ-Qy~!usfc%T(uoo*s#3nT954MPxIGg;G-)hsvJjGCfb_>p<oG%<Xx6
zBp>Gy^~F&O>Azb$G^Q@`_S)eK_Il#?E8(-ac$-w6=}ksHBs&qkMAm<l+-?#1IwLAZ
zr%WSlg^Jv>^J|olZ$Fu9!mcEJE)WA|{9lZ}Ivl%ojQ`X1D!;{$<k$A9l$lb*eDoPp
z(gJ$gjEeEcIDxqHP=+SbQOk=WzumRgkkp7Eid>Wi0tlN@&~0<Z*EIT8{<7ptueQQ^
z1mMtEjVKpTOI4==PaK)<A=$IE=r4PAVAxIhdwe<+Nr9fwt#Z@IAy5vA77re5gY3yR
z3S$JL>5m}?Ae1GI2grb9zg1w8X;E#mA`OQ&awx3=MKNukdVpnFf$z3UMuk20?qzWC
z0rC}a#PW&G^4|-TpADDz!xDBRNz?a~|G;FgfCa^}kk@5z3TMZ`O=TvqYzk>3hrgc%
z0VkbJ+JTUP8(%;^cl}J9sF)4@zbK1-<WImLvg-Y<aEkLMtx*hOKz{J_o@AW(C|i-{
zGX;r|*rpbZ#aLp*82f^Ax2SdlGoO5%6AQDK8X(=#y}t)wN?Tpk-d7D^<ff(MmJF28
zs-mblq0eKYo(nF>`_4lIhDKr0dy;3?`dT@Y>VPKJ&8CjA_r33j@S4EBH1NlysGzEQ
z+B`kdJ*4$*C4Z{a?k6@a1t1G?d&^jXn@C!M4rUx)`RV3O^rsXD*GZEZKB)VgrnF?|
z1CGn3mZR<*R~(<^j1u-4bz#goI5NcyvHD_oc?z395m~WC8eWn-@&(*S{1%K3LxBqM
zusItSk)y>J$u<y+K^C5_^2&35He@HG5<c8N_oz0Igg045<1qK_VFwaP8~G{1w|m3a
z@oYO9<G}T}EPvz2kt)yQ6wS1$RAklK6`dgWW3PudAwz<p6d|$({5x=)8G|0g_S7Yd
z1j{4p6<X?5Cq=RZ9w9AA3DsdqS-#&E!X>CZXqts08ZmyAtyaA{LG9qILxUy>pASZ=
z6hJ`Y$>=5_kYD$z?*Dge!-61Bnp@j%4p4H^587QCP%=`ASHo%+uNUvVRFN(Dnx3mc
z|4D%=kq$?+f>P7n&kt)^En+kSP4a?W^4542b?keG?AShqeXLm`Th9@x(acsRjPjib
zkPcA2HpYNEg`vKO)i=VBm@a`y+zj@e9k}=oBgG@LgWf2J%B2LqkJEX=>S{E3BrUqB
zUD5f^RAX<As1cRffTD#28VT2w@24Ns1haFQLB;XO(*ooGwAN6(DJA-l-ae^L*l>>j
zI7;aCMSn@1t_p*eqx7kAztV!CsUUqJ_y(qb(gR&W!FB7Tgpum>;F{iHwGw^N+T$9W
z&q^Qd2t^Q(<Da7;P2$T{Q4d=5#m^EpTJ|<J4GXpOVlgF0uZ;hz?JTt7+L|rgH12MJ
z;1D#p6P)1g?jC}>I|O$L?iySI!QI{6-JOQ!?{nYbj<H{$_vpQ<)~q?d5+{QZ|E-mV
zLLl=*L}X<I^P{~SwZZ!YC2)aWWx86OsV?24dzOM~u8@72*5Z$`0~((2-+0H4<Dlar
zN?$C6?mALL3#b<cn)#P&ws>zW@Qn3$gYQ&f>3)Aj80K(dXT~svtl8|Lk5DBA#jX?y
zmftI=Td|BlV{HyzD}1n|wmo{KxWS(3!V&@T(%eYBO-ehgGd~aPU1*OP$+2n+%(<Xw
zOTxe<z3@i-gaWB}au0UG(>}~FCQ^^V?ou2ys-KbzaGTz>&?%MR5Q;CjYUxlG!>{|p
z<Fuh1KE+-NL6?$h2Qt7mcU3?Ke^xItI)Dz?PCpF3*lib*rpm`$0?Z1>+-K1O@`;ad
zclu%ec|fKxS4+`EgMXm3ptjql)&jC(MfK<}48JX*Q_L2XEd*FIAjsHSJ^P<c^RI<N
zllpvGRJhNdvk`0ghHUwSZhQk5^4NP&BlEWgwWA#Ri(oCV+IVd@51J`Ir$PdL49~j>
zkWu@qy-L(uihQ=R2B+Vca&hwuQzlpndi%;Q8k?p7D!%b{Cu&BCdxkfNO$lh`g4+th
zc-M?iXPejL14WA$sJs(l*5Yb?>x7B}-y}V@z%$K-{k;_Uk-Gx5Hi4A|azJ)~{)0eP
zx;=Zr^pT%n&?HLqmnt$sy>8Q)%C<?K2gZ6#jF!Q_@=R*7Y*vc2Ig=wS?ku3v`@CNO
zqA1JIQFfuspFLE-YdQEucw3)VRwn@vM9T)5@C@-wbVT>wx7f0Lo{5QHpAiXk9=hf(
zd-c4D7$<*(zg!mR!aAZFzXYmly+vulDOl#3!frEg89m%Fq!#G7bXWWDP3ay>#Bh9r
zTR&3%^y5{}c2J#@rg|%$OMWOQir{yheE%|MfcNEJ%i{~&CGA8LZGYq&z)LEEw~DKd
zfh5$4=#7;T2Shi&0mc5s^&6~sL(Z1a%4|o%gBVwv+kT=|als!hGgkRMj`o~9ri}|f
z>4wz`YB}%pp2la~ql!E*KuJ#1C0|MS{Y4gzs_-YYs#5VVNeI}4H(E|mUb1!blfq@_
zM5UbwN@QVy{YTM1oe%fDXXnEam{SkTvupWhvC1BMf^X@r#r(|Ga;vG-Q8?2q7zkc!
zs|8=<Qo$H327(_sk9AdzhMCJSjW$5EVAg2(;lR$;R;&HGZ5E645Om>~Tw+3MGh+(g
zl%NoZ4iOrUu?ejV4fs_H$f$ur-9Pbv&JlAsm$j_w*b34fyY7EmW4fEtU4y*n03W6%
zAoRv0%0211=*NCapgKOC#}G=DYAefQ3L<^g$L=c0BvF7$hSMjiDeIdeJ0Z6+#fdTD
zS>2OIrjAmvuKF>4;0@1yEqtaP|8;C{$U&$WCfS;%owzU@(c;`k)0Pkl(HD@L`c><K
zA7mr|zNif{nw-?+_o^n>sfhkq%!95(U%se6V1TaK$60?F#H;#|6};JPj}AoI<9JYO
zD4Y&C>s5hOcCk|KMZ0k+Z?Sj~SF4F6Mxe0QJEn-6+mrD-PI}=eyFd_83K1aIFU|_@
z9qC0Q0LSLp7A<Epe46}h5E_2<Z9hbK&HL4T{Hyx7{e^!p&5g{^LT;alw5e6iME58(
zb`(X|exA2H(V?OByY@ah0SZM2(kjXlJ<r5Pg{!oH_={Fq&H46YH#4+6kcI^5JS)IY
zX%IW{b^41GND%5|4q6F;i|tm_ZgOxKiBR36xNn9zC#<IHimrVHeR8gA!t2?6oKZc;
z&-SBzT2tCX#`D*e=Cm^wuzHRia?8*}QB&yYbVd5p8;^dNf<kgUFD_64E7-$8e;F<=
z<M8<_T7Usuf$8I0JK(>DAwQ4JMmh{w3VM%@PN)N^cmwb#L<!QVn8kO3Q1AG8?uBCe
zl1PP4h&}gL43B@dFbMI24mUJiP_fM@92mN5cVRCkz$#S$Uf3m)Fb23(gKyA%`b3jB
z2;T3}%p=D^2A_*ORfgp>jr79dUB+|JVZ$7IA&TJQ(o%3_zdds~a|sU6hfV&Ql?=_H
zVn!xx1mjc?CvL4K8RR=UyYj}0^}UHUUCv0|I4s=GS@}G9v(z*xsPriK+Xw?_)XN?>
zd>qZ`R@%)sEVvc8iP>smbad8M%i6x%$(6h#ivVMmZw-TGo{l6ui7skg<`8?i7O)SK
zMUNd&l+snbXcp_3>Lw`lL5Ji`+q6??N9_wu&m3^u7A76Qummma2Bz%tA6|VwQ!vjx
z!MJ1QJIrmE6vW@|^GJE-!gvRapP%=`-VbRyGgyU=qEx+mf{>h$nvISwK5G@$pw9db
zp0Nhzy#eLY#CF~kPWbu2EIl<sG{n3p>5Zr59-j#kC0bNWcN1$mz_0v2)Om`Zi3=^g
zpufW-)wzWBBisIqM=bf?ChctAeR|^xnaUl909aqSNEc-%1(6=}kGTe2Ke?8N1R0L*
zgDlfub<aPl7tk2}$MsludLh=gWy|%R0$<1pTb=&<I}BEFC*1)7{-gwO5kPapwM}IJ
z561@Ob1mA0hfIQsLKmTk7!q{(C!L2CI*3D}wf?~uUGicRoDUz%>)mC_Hwo$1I!8CJ
zf&>iYvF);``aPIo-83dYX5A>VV6`^8(4uNW8Gv|?F8TgHp%+PYoOR2K%y|)N@}m?t
zJf&`E$k#;5X6J2$m>dC@?SS1eVT94MD`PW<X@ubjyibEq*Yl-{I)_4+=e{~GN0!5L
zU45RfmVE3+-#>}3o+I^=={q2)W85@X*AfbDhAuGc^Cs2ZJ2@f!X;+L5&4%jPKe`A}
zP`1<b7DN}O=_4?nr2#Ao(gm*A42pI#EG&nQt~@=5$Mq(>m!8|K2d!LInZI8AjPv)i
z=#h9_C{(jD*767&IIlwyZ%^fjj)8Aqr1LmH2(-OS58Yh1X=1OHXqzznH0tEgDZKCx
zmFH*r@LBgggB$?2zp@aD6H{W%z~}d87eQ3so05aC?qvMAk57sMKFv;#+C=^{Y#)!j
zvFEnb3QIWLGw6j`+BPKp0*Jeuuq|AOzYpS?{t%Rsg(4f?r5zb<R6Spa*yUIpVvCc?
zo)Sd2{y8l0N<fPjtiL-hjGN`!Ufx2;8EywG8u#3cbsNH4P?%bVI}4BC(wO?CF*Jxg
zxOO5r{uQ{m4PG!k1Ahj)T82-x)oxO^EO6Vlyh)MmD`=KEFu|~b42T8$a_|s($lloD
zLI`?LK9XzL&wE$UR~lmNEPuOF8hjjI(!M1yrQlb)%z)A3T^_v&^9!v-Oi{}4qO
zHdRL|g7?|FZ9(au#}Yxo$M{C&v~M3f^ayAt|6~aGC2W2jk6~I3e5``d+Esn{s!!LR
z)V1s(%HfSdxgLK|t^6nku@c7J&Clp{FVtQlfl18dfbG+4bQ52^Nnfl#1nFIWk)2!e
zHwd*{b^XP5#uZv>5ZR7w2AOcDx$=aJi3U60`a&q6PL?W|<~?0h<ZQDXw?`c2e+4;8
zDC<(f{^nT2?SAF@3hyz?2O2&swc|!QqdqKU>U^0DLY_n<Ir&tOIJM7xL0)&Iiq5qs
z6cdQ<Om<369-uhcJ&JpPlNI_n@vRcEO60*;c9`M`$G1d5Xz`zb!h3$6)D`erq<t?e
z(uc%ntJC)~Qy?8fKJyJ7X$gzUc?q))r9nI)%wdJK%lUD`GTS$om+;*839XC2!?Gs>
zssu-w7q}%;Ml+L)B#;x_9j_CT1??!GIXsxQRtt_H*Q2YwTWEkBUwFNt+U*m^bq=3z
zUxi!?9z%?FYg~9q11M4A0)=^YH#dIa@{_?A4hHNRn^)1>i70z1Su>N;Z5!;p@AWzC
zR$7{x<!)@Q{mjUHe74>a@)eOOe*@$-D<v^?1(?Pv$wHTlXOmAO9r%=;iVCj!>Ajz)
z9N?_qRB%<HRY>)mW<ym=|J@3>Bc5W7pSbMx`t1+lJ@EwI3DfLB*Jc+x&-X{0=8ffv
z0_W7!-dI8CSPwbfYYKxQQq9+##*SG$6b3X^=(U&W9~f1iw;XD265ajVA(@#L6t1IY
za8Y(SqXKkLhG$9-mA}g<+osF#_i^~Q@&1;74EZFMGjzV-Wk&i{!tve=hJHGad)d;R
z?%dCTDVt8w=<ycJ$7jECeA^6L_r2LIC$kU<zib23$G<}EI>9~No{ZWttl#+_gdyM>
z1BwGPt=e^wfO$zk83JcB_lCpcG`+xXuNV9R6v@WR=w=JOPzIm~HF<E1|105L*i@~U
zlqPu(g?_QaIPPH(FQZw3-ock^3EjbLa)-mhv=#u*bGIXY#DeaX?(0>%X@6MX>9_RS
zGuT<jOeAQfpp~n<zvDkN;)oz|!C~PrAL?*nP8mKLk`s)G$%=%TN;Svqr^-z#wUQ{r
z($2r<n}<ZpDGg@=?@^m`T^p=jsvyE|gwT{m`orY`Sa<C%v^+J5P9n6z0`%=6P8Crc
z_|86fsZWM};DFADiD|cUNK@bh%rIzPJ&e^jTLrcL?#*+rJq^U0f=sC)%8vpP%`P;V
zwd?b>aVb<hPDAr_zDnon1DUqhe(Fexh#G}27X91+%<9pX*i#j)8dET29Ng76Rf6$b
z{kIse>gQOto?oF(y)+n@wF4b?K$F<bCBE9ni7`n8C6qzUE8pJiW^oZEv&01J=Iu}p
z`yqOSpawUMW1`Sl%PPd~ui~DJ&&x&-wHs-v3QBy9C3>KbcBzwDd#t~t_BHJ^*M!_i
zO0>l&b@C*4<MN09({9Xs3wgt`NZWp4su|bB@6u8-9)r}B(z?`U8(+cKGy^4_wYcd`
z-jGZ6x$6ASEodu!q(X-r0kPauRt#>s?!W!dW+mldpy-Zb^2H0)Kf%weFg-+)+fZBq
z*BysGOLP$9La|!Qfb@APJO<Fs{}Km$&qO?FEqBOZ6Lz4d@|=xyl!_V5A5^AdtTv$#
zKL?7FlYMsWHBFq~If7GO$anla7RFaM8cY~Cv^wHqh2B>C#+rVb9r#7axA*6Wpk0f_
zOm?Z1B@?4tbNg1tBQe2{4rW~WF6jwjUxxznTEH;c!+rmSW4E|m0O+bq$RU@1m4Js&
z5i3^1@P$^0QjbbN=YrA*w0I;UCjh3w`Y^A>X1U*^l*}}{0er6^O;BHXjP^*09{)iq
z7g@`L*u$_Nghyq9C)-@TW2uY~N`_w+5}<w3oe?LOOD9}Mu=Tv1bU7jGKALC_wiCp#
zT^s)wHH!1fP&*y%3`oEaDV)cjer_@H1x)6Db&7E{{|#})`2Ym5ri)LS2K$-J0Osg_
z4aGyL$YDkO_WXnIr1N1!IN;{KLh<AnUWT#c^~QL#0Li_Qv}MDyl$DFd6b+8hgp?H;
z2{$h=R#<zwOyF2y*vwZgEqB&&cwy=A_86W7ph~;yc8P19-HHwG;j4MJqRst)9h;Iq
z$HOxI#$?EDdjk``+H09J`!G~PeDwS#0xStiMyHHWo-7-oUt~01i{W`q9UQC8LZpi*
z2XW=FjTkvDz6Xf?#<XcQoa}p!k=7tWSE1dF7#*;;Xe%tEm@cigL=8Yf{9`RvLX;7v
z-s*6bz*Xh*FpCdt{0vebd+Tt1KLtfDZW^ZRyYy%|gR5mnhMcOR3x9<7Q*{MxU*PQ$
z5+X`PovOhAM-*Pk;E0W`jLebMPY}f99n-5;;etj9UicG`$TV3x*ixGDOYzc2k~u=i
zFe(QMu+Y8>KAGcgvV~Xl)1AKM|HRPgXDPXHt1fkUVZ$hsl8XppXv79dPc}SEH5u<=
z35o@r`y%!4UTEoD^tt^QA}yaZO{JmRi*yANiV)^NnFSI`0gZ%D*|QO)YM^CNjjCAc
zTA<9S@|1jf0V$Kx7~(YF3*4n;G=rj*`Z`s~-)D=<nF4P_8_XBvJ7kJP6?qPU369h>
zyNgnvRk!7x%sjr56g%e~4kGJ<-AGJajzbiRo3$9P3`m>hnr`W!gPqeBLlWFS>d;B+
z9>IvEw{uU}*8Hgixt)3IT>4k%*ruDO^x3m57&v}VMNBSF@p1vyNH9+AfN*ru!Qx_S
zp?9XHKeakI9ScgVzA3zAoiiCBH@xC)E)2PJM6Y#!PEvtI%BbOBGwrTj4a&s)_VExf
zet+x0FC2BZ9a{NYO%KyRibFD%7XPn8i}Zy7a&*VAcaQx(vjitbGwh_F4cK;i&n&&K
zo1REqbbS{RPT4O#nl9W9O389hH;{#Bn5jGk0c7q9$UX5hj)|A26$MuM*pQ^cw~PgB
zk@4j{01t4=ZB>hW+YO2<>?~YL9VrvNR)1=ZYN07XJTQS)-cA=leVEpK_FN4)t{tOd
zvh6aQrPpBv#mhzptQ{q3!qjbn(K5Itp@G>oB75-%WXc>iuX!%$rHGArz7~Ugu-rx<
z+qLT-z!i|;$F?!SDpO}Hzm@r<=j%xggJFxCU;<AY$8KEU5Wz7C1$uG_pcp;u+mT31
zR2hhaKn{VX9@UC7z+u8hE|OrMODaCK*e4!aS<7E;=NP?kOo1Wfg8j;YA(mlgy)O1e
zUWYqvhEIh|hVkJ`jnb<RgohH>@o8u_2pou3CGd963!4kxg%ncXaK^h}cKP*$Q?o1g
z7KTG~J{7gbw_VMW1s4UG7vVz@9#c=tVS8wqoKIimdJ(Puv+i+08Tt8<To^o_u;|ew
zDh2HgQ~xe}eHLGYL%=0{fJ+16OZd)J1I34wOL$&|cr_wI%CGS-Xl8N!O!-P4(oUU1
zgw(nc2wBRBe)%$6vt4FGweQZJ_2GkzlvlTep1YMjb^}ZG6)ma<+qNH{(z52__Flzu
zSN?G*Ezi!~rObKZ;F0BwIKA%-YTE;O{tJ72!9o&9km`o`?7UFB5)C?fURRzuzA$b+
z-4lvul3_Ky8t)oUv?;w4wB!Qk(0{y@+Hwl9k}Xo*=vAoJGg?>=((W<Zeo5}<Pz<E0
za|zCGF_&m<D{YT$s+sb`Va`;IKYy33>iD?FS7^7AJvu+rvfLtucmYkpWhYBBjL0@S
zL<!G0olyFcRqVjo(xFL+GtJ>B+kJOl9GD2_uouJKc9xQYt9MaNQk7%m_$}^ISXz$2
zkv>9bzmM2@Wh0p%>R!6TtJ-YWX}^erHfewHgm=c#Yc!h8a71T8F3Ys*t9|~(blqvr
zrhlHpP7>)Sxplf$ryr;!FNMLiZvQ0tHNj%^8V99UUz$%gqP~V?qs|=B>aj}0-{@^m
zB>fP~ynU`h-$t>XDpknt4TeYW?ReVMow!XGLgk8NhR(mJ0v&{qCy!LLO`mqIOZ>VK
z9hF4oWr#?b%!Bx&8vtb-pRozE5)#??MyCy?;v=t3-Az#b;@Lf$oCG%1PkLmBxyn8c
z6(Ec@#uX`kUFf$|UpGbOvt_US*M%NclRZwA*zxf^l;gIFesQJhfX@4RD1A`6xA695
zUig7&2suuoL=CCp5iuT-6$OY*?wc*Ydq4hKE_Q<0z1btE5y|6*8bFn-uDSjIA)Z`#
ze5|kD1iuOBn#$=h16dJvKFtmYq>Jh+)FrRt4c?cB=4pEsu!-WlifHikl9BfM23bYU
z&3d7g{1b%77cYap6Nsn1B{+&^haWI}q$9Mfs-YJ?eIDdqbGVFxCg6_mP#jlO_{Jxi
z4LR6yTemNK@z+g-!<C;WQ&Gswv5aD6gUD|M@@Wg%oqUPj!6yd?=8Xa&|B*wu^!I|K
za}XoGC}cTw!KaZr1BI&>No1A1ared=nPAFcYe$FC*ixI>OeSF(JSjZdr2>qbT<ZYi
z$Br{$(O;=1qvdw5*rH3QaGs+>DJ4yN0a~?o!FYpMFc<|H@UWvp@%V)}&M($ozVAJF
zRo_^1vZ0VJJ)b)iyAzoEgVi^_%vY8APm=B?_@ShD^1H-5JrD2{IY1a{JED`)@%;qy
zFos9mib75fkdUnadIEah<8`Vf5E~hGv%wPmeS3G80z3lIHZZ7yk-V|lB}sx-rxG9W
zYeL@QFx{%xVc@H{CrS|e@hn|yLlUsC@9RlIjT_5YkYwEXVQ^!A(TQoa#=iVzda(@x
zEb3Du#U|<^!%#*Ik;2>%i@>^O{Q91yI_+Zgk~H;yf8EcWPE)21m)Dudm-%sCvq!Vm
zzo)XBjOK(hm+Ac2bZ~)6DCot7x(FuaD(3BE0^#7-IMUaS;G)Z`)h1+gVxDc~SW_Va
z7f9BO5%6r!LQGsg`v#XrC)e6O5+QwrIW?di(A@;02cE#J1OcX^4Zb2G?3*hgD4^4k
zK#hqhI1V$k$)Puk&_~B8Jl#b}YtMgPToN^BHKxXOwo(y+A=6aC|0<9ks7k66UfETO
zYH|3MvAwK0rPb8s<)0huy&fg7aauW2U5hu`ncho}_Q^g=Gmle1QbEpwIvKrx6n&lZ
z=GO-rfP~*Ix)UknKFy2-S<Z|ozM0_}xd-MxrXgl$P?I7dVnh*BI!_}cUA;m{X8>#G
z5394%MhTt(rc5a9jN$-iNXp+tc$0G67mQH1<SGNb@VBVSbVN_(#UZ<qM;S3he>(Pv
zOF~Z;xH8`A-q3c3Q_7<&be^}Pp`AJ*eqbM}!%+DR2y}OO9h))y7O}F-I<3JQ%^M~(
z<T&UB^NlDFUb>P7^AjH2a^YULRTJrKtj-Uce`vx%k<Wl*EtW*1dry{6A@aQsJ+H=I
zB{W*e$_R+`Rw7<Ngfn3V6`E?~UjwDcw9|t%tKYFOg)}M@AFh-T`djItiZ&T&1Sn7C
zILDI-^BrmkU{s+B&X~TTUy#M>Iq2hT)}dv+j4y@=n_rQJlnOfcB4U`hxjRjrqkSyw
ze;uAnW)4x)5>7YED%S7A^6N|2rd=j#w*%vvxV;v#7`-miK#Jim^tLn92{qh@XKjy-
z|3RJ;$%{X`Xm@%}qEs@xPDoxL=43TpD&YHN&CpyNRYSvW9O9Uxi@8=5MzY<nS!S|P
z7-~5eN+iv1485p@HS&MTBvg5_RZ&i6*-~FjwD&fn*FE~XPnRT-|NF>}zrk;x=i?p;
z<$v>iwW3E1u)7qa|6H?9mEe_cr-la@cIix&!RwcZf;H~<&W2{6!O2s(USzCsAYkcG
zmt|gxB+IKiZNy%K&9l!`=DD8C<`2i2aoO(g%SB?qq|&9MDY1Q`MSxj|X-`B-Wc{j_
zW9Zc#J&t06^$kAQG&vzc)FHy0Hg76`HPm&HfQd|426)l<t9W2CIvrY0gJ14U?oe_{
zO$K^I#rUK28z^Op;$9({@NAxYQv1+<JbPR0%Y%x*$L=~B{;F*0bED2I+)@XvEw6|W
z7Ei>p3@^KKW4}*cZ{1Js-Q(fWgqrcGDRjkL!3z=<0u}|Sl<WvVo@(H*+YKlUrtjm)
zx$AKnBh0d3gATzcg6y-5`Jp`KP`7Qc5r8hUlrY&cD^9XzgdQAIkx0gN08g9z6;sPW
zfTh2Q5B4HEwHLAB_H|R3tCadM*$)o6+Ha{tFzXV97<}*e{ObSh$A#|)Q>cRZ6|+wG
zRu%u>QdE}2v}8YM*ZYw4Cv;Wgfu(dL3|_zCS-(B!uG?}*O||5k8Xs>pH2#QF$C48V
zV6ZB27?sZDD-8y2OcmAqM)P`a3cp(~``4lSlXAZXBT7a_)|hR=C}=#&X5R`ELpC+C
zl2P2&hRfP!N9WbnX7&5K;;mT&Z{lQddRp@hN;dg*WM_M6nrG*WrJ{ED7AvId1%X_W
z`Er+*ZjK)877Z$l%h{ZwtLL>7ZA`3vL)};m#~0x${X0856&@$zg90MLeT+85QyZj1
zfWccxD1~>i-8+9{8gesFet2^tyQHPv0RrmZg1hDV6wG<)93Nh(?jCZvF4^VZji<b{
z4u(FdmRETvHKz(OV5*LIZBitBxrqN9Wh_wA8kT%o#)V$X!ly=wuw)|mFSS}UOtR&7
zaMqBLU8BOf79^s_Y|OCkEgJ6|IBn<xdJHxNlnQMIo5p2_u$O9N*(r`+iF#S$z2PTU
z*wd9c(zp)PnbZl<IvjYoN0>CosMbs}a5CdSi+A;%j=lP28Sp+@y44Y5?d9?~22ns+
zv!c$HA;A)C{p^OMkv)8vn3z!AuQtC?xeoq>MMKa2_FP_hR0BGZc^+7az>b;$vzv67
zL>f6+m{T<rNwC`fV9y2Bx_)|C+!<e$)+$El+7(s{Ea>nM=TfwONRZvrG*-i*hXA6s
z{E`~%?=@pgmN1{ASgF!`hvXZ4;M%hPe1LPCJ19?vD`CQc@Wi>8;V~FLR0}ijH=Z(6
zFy>1R)0~IhX8i~Z-8pl`sEtMa>BR>-GkJ8D@N?*~@d!!33Ir(FqaC)Vh&k(8{(jeW
z0D&%RCwExp)9PA!YWrE(O)0LFw>J=KzIbIOD+ng*F4H`2P)(Se+C6k_@w^O|{vd)j
z;D`(#E}x8jaI}Zbi8JboZ{z3Q^e{cz?g@5YEU&X;Az%cPZoLpax$0z}Q98mz2k2m_
zz#!f-v#J{nx_Aa7t=xA!rfeepKv}HhlWRgM|0!%%b}DZIQ~X%h&BjKRl*YC42a!P>
zQAw{X1@G>PaMg}Xa@_f=O7$KrYQR0Z)P*oxPUq2MB7SHs#XOGX+c<(T6Rgs`<nT-I
z@X7bdB;|MYmv5cjangP_+Hu&TIW9Y7%~uag)ox4f=Iw3^Uycl;3=g5o%9$~Bun<Mb
zL}WCv_K6)fa#B%|5u+anb}&$q4vitz6`UdT4szF;`l#3E(EKsmwgGj#;lAY?EZq;l
zkduLS7;ZiL(;imTsl{nww2%>~Zuk0SF}CBW?d9mLXzC!Ld-(f+_$VTE&Vd3g*sK*F
z=DQ+-x&#y%^5W0mL!S7WQw;ySNe+jx#ng-K<&cReK{PN4YVeiK0~LT@@P(rhe_=Ej
zs&4EDW?r1DQ~kcutCgs>cJGt){ww1MZd0a4!0HfNgborRWAiuiO7=~DmL-%BvfNQ~
z;`kooxsH5D18?|%yIE0X3T{MOA+dEeNr-@s+ly4;E0=u`BCHJ5dxo<kzfi3@dp@0c
znmx7m+<E~V)A7y?eFifan*ae6bf`>sb4FH5!9qUI8(j}^hA$05k(rtIHSOO=5MO@f
zN6;QFq^8rFe4M5KOkwtGqH7$hU`7a>I>^f09PPTTut~G|-2eKbG_^}rvMLi3Jr$dc
zrdZo4m~mKtnj&Zg<h1*~KJPkuggHEVO{r$=m-^5#Z^`HamvjWTz@rtWUKDQ|@$&DW
zSF*Trtf&SKsTVngFaoR(rv$`LX{O{m@)Nvy_VqWDKM33qEgf+3xIdmDfAUokH8ODo
z?3d>~ANYbEXO6@8-Ols6!@&6HmqJbd{2m2IlP1gm05~w|7dR3KILCvtxXqG2noN*F
zGj2H{*X`#i|7`fe*W3^IKnOH;B>YU|#x=K*aZ&G!CuJSd`m<pbUvUm)|E1(B151c>
zLW=O~dPEy5GtE2Sujv`dFTeO?Zw;HH;^*LLGBA;%nLxDu4f*}>w_=>s93gzRR?!D!
zG9v1l-3KD^iYHHyhF_@j;|5)@({8O94B>vk_rF1spCXDf?_bi>0eO++L$I(gE4sHf
zkJaDSS`w?Dre~Nx@D383DRCxP>V6?8Vbp2fnYG&PYeTb1bCP`_3_g>zMpZdUdB2fR
z(^Y=QpX8W#4P}KvrGEUvx9mbl8%?UNDDF>8687sjM&*cesEkt!gTlghP->nmk_Z;9
zAQvhr=dl0ER$^vIl(bj=7}jIw6LV1^7ZS4C3FA`(tB3VOJH~yfXz2lQK<ZyF66L+M
zJy@|^mP5=&_=1{qP;sshXb`kz-10#jx4hQF)UwV-wb>&r0q{&Of)n;7)%g;3XQ4AR
z=-}y@kwy5v1-|7MY+5x0qXdDO$7$K|j-}BecT{*vJg}19og~I-BO8}(BTfs`{ip?4
z8V8>G9E2^XNDKx<9u*fO^I`>S5XiauW2I>C)fTCnU`}Av1u@CbgYFa%NhfLxmXvyx
z`+Hc(&rRH<d}#$p+sqob9?r?EdeTe8x`XAorQ?*gDI&0i;&5XK)#LhQaVWfEP4^4c
zW|Xa|6E)~#82#P6_Vz)7ul@f1<21ZbMi=0n-rk_S?4Z+JY5{BlTXX7bP^w)!(R5Qg
zRD&P4(N4@1XcGS|2LIy%{ilH#4#Fw|v||jV43WsQbmv4Ce^bh!fUzy=oS(?-;j!OO
zIHw~K&&H^i3PBuq1{v4VR>B2!h>;r!Y#Q3BSnL@<tcf|09&P8ME8vI6@xs3Dq(6ld
zZMexmu1dn)Ln?URzV+VS^YO^pF7;=8e7x%G*ROxpF7CkKcMyEM%n@rJl_HEu9zJ1M
z>I=L^{X=zlgMGpl=y=cn^?7bgvC|oOKy?FyLqP>jLALO90bSPmz)KCAwo<rQ8|ILk
zMt39!v4{cQ$a?*iES6D^(_-G$J~NLm(Tv^}(KD3tkX%-hGj`df1CX>aH*Wvyz*YZu
zRq$`#Z5t9wEBQLLFpur2PH^Xt)ccps+LC%_z^2z(wHPD0@wh`B2f%b8WAXiIYfE28
zC-dyV%UJ_C{Ug=Z;2&tn@P+-USsvuoiV+`m$wOZ5Zd<95Nu%+-jkwq2)r-WGr0F!v
z{`X(ub{BFYcSK9<{LdXkjf7zMKJbp=$9)$lP*Oq2WgaOYC7a*<PZpo+FQ9rBDS+W{
zW^gv05{fwt;=6VOWk5D^5bEHTO1j0qpM7&W&&ST?eU5DT<x@9~7lA5oUCbWZs8kMC
zk8@c|=ee78`hi>$g2x$aJe-E5+l=oo=s+S)Bmi6Jz`6_dTcP}O*Ftkr$X!@8N$qu2
ze3+Jjo$XxxpTq2dwtI2tabK}EgYs&hyZNP$yS`Eq0qYF^KE5}u;|{{I)L*kAtLiPx
z52}Hx+h}ucVGqzK7M7Oqi;E{UfByW`BCoyC&HY$VB+>VKb*HbfGXB7G5{<`NY$4A<
zC675r8XpDrNj+hHr-5Z1p;6Fs@X9y_Fg`XN@;mm3(49TD*+|EibwZsAq$AMId+w(P
zt<&)?p;*{yzDK)`Hk)l0Ttja#8wxDi)zYfTJ~+c-4V|kU_-Kn4jx!HNVa37z6L}@Z
z(n+#D0S1)$2uHIo7;@s*eGD;8e2jj0tdU}(YjtVGdGuPGtaJtXKlvA|`J-TSLm~WI
z6-}##Krfqf{a#Npnwpxl+qeEsAceeqTl)9BO6ooE;iXu*&>`r?sNjJ2hfZzwxar}e
z01qJ#?svFv>a1w!YUxl)fXbC}QIu^`o`%=4`Bt%ms(4Ht3Fjfq7f<bt`q8OpQK<rI
zSAOmH*-Ytc17clTOW}4TR^26>P3lS9Tj2+>S*YZ<9%;kbEiCyMVxeQVdO>J2e!=+h
zs2`>F>L2d!FCQ<#=p^sGnC(|xHWy&^G326GdtqD_?t62ZuYj+kmRb<l?<E<`G3O4X
z#-qKych;#)Y02lgdcBMLoA0edGq*h!+yBg1{4pv;NcjcjydkJ99y@r1J%iiy=Wxa<
z{I8HUtb!!w2x05F0yLC;OaqIgdf5DAWVE^@@(j^OUUP?db8^x+=~t!R-^YrB=@j;M
zYDCDB%FhfeK32@e-~AGzU<<#0G6O>Q3zV#yucpls6v6fSnqdDrBi-FT_wKq~6MB6q
z%oY6cZe*K^zF58FzZXXIbNM;;>N&OtlFbQyg7hj9#(pG=5>R*m_f1dBGor2M?`I=l
za?eQwebSp@kE))We^0P|#C5(~f^Q)p+fn~YukeGEy3{%@(>4xoYkn(+Xa>z_QNkvf
zn2L6_uVu|a33GgadUfVARh^?UzvMi!AI<kZNvt_n#0y9Z%Dv5Sl9V7FEVsP^?%b;}
zNx={Hu9wh{vtW$}!(t&>P~{?rL8(4lS(ZQk!`v|b0HPF`57f`7y^6{|1p)B&zET(3
zEhXKr{x5I-&(lek=$)1Hm<x4fKdHNJGrDT@FU)hUQEUk*=YKB*-~6q$t~{r8T2PW$
z%~?I^=}(}B>@y*Qr3ko8nt4BHYm5wjX_aZk*<<7DudOhpy1{?_dZ=D3Jt3JYODOlq
zU!6IYnoer-TERg*BgDyc!h`ggWrSHc;(o76d@c3mV}94MFSDB{=aG(Y5%%2(mJ<J<
zK<tUITLK$ELQFG4eq5OSy74oOm~<aQZ)<s33v>|o@y4Gc0P3%SUsN&BC>-W-rH7S_
z9?4~Ov<g=}#yZB4_nEE3vB24D+F`5u<-~ikjMXc%$gCwKf=#QF?AcnHSf`EP=tUn;
zWQBC`NeZT*O#k5<Uqe7}JlQ}NL1Di=Ry58iLJ5*7>$>}76y`_%_xwaLwys$yLWclM
zXXC2SmqEss;osXd^X9u+s#(9E=~&wQY+fP_TN>YOQ(vm*{C#7Be5*FP9LJbi?*H3z
z5g3<V%^laeDxZ_ftoH*7V9+}wUD^i?kA^Y~ZF9w4#cVVo)B9jh(*hnN^&hMG*5q@&
z#3|d~KF^$Im{p#T7)piKA+gaz+c@x3tco40fQE87>w8AwxVxBF_&cDNU7Zvs1b7`K
znm2m5Cj)~QA))Wp8!SBEOM}F1Cn(i3I;9%^<%u=4UWv^bC>~yn<3(a$GVYMh;&4A1
zoJo2g#P4;YK1T+u6tR`2%a$^RtsrF_ZtYUc@!Vbdn1Hr_;gnso9e;D%ajq1)0l<zX
z9BtW-z{+G3>$yN}Z((kWsDv1Oyq?uuLGEbEKHXusKwBzm{}qb!2wY(O4+)%EC<2l)
z+{G5vFYQT<_Xnf_d_B-g26YUqSccIzdT<9fM?f0}o9s{0f!Aia`))<;T<P;roWAUy
z`iN-yf>L)9fxXtN$xz*Gi>S$K97<Wth2|xf-h2rF>KPnZZscdPb>moUR3^q(>8W4*
zg5W2*u{ZmV;jX-tT)bA!-~WDgfQc4CSIuwdTk&W!p;6<{DERsS42)juX}>OKtEK=b
zSOo3<D%~}!>?}$un5uc7BUMXha*UdRkXWONiDa7f`@aS&vIlc1M6Bx6o}8g~KvgT*
z4f~Rd)=4{gyY&iRZLDAT73qu3+NlmEoXqpVY_{4e$uP?Lq$nlkB3guhjrI?<$OJqu
zw?E#;NIyc|bynRDnP!^xkB7GwC`r>@Blw{l>IA^l1T`X##=^c<R}iXz#0`{W97%TF
zlk=OUdhy>bP<`&}WZI?Qfv!$T$LFsIDp(SWbQ?`*Ti@bh?~np@?OKvyqilR|?5BLJ
z<11XWi=@~^I#E4)e)#s7%dU0)UK|@K%*2f#-Wa5@vxBm#yy~IM1r2=MhJM_W>%2t1
zRCn2?Wcj%r7(yH--nl`$!zhP&q#mi;1^rObK^%hJZ2$O?R#s(Pl6kcDGp0U+_f5w-
z*q?mQI9oc(7bwWdA_<|fzcag5AR}oqrl~sHCfec*VBx8q#4lCK1q+~8_ytj{(;~~M
zq|qhU*lOx|ljC1lPn^d8tUiTWoaaB)oJTEwJv~iasbk@$^QW?Qsu8rcuk^)SLI4D%
z7G!9pJL6pF9;16HZr)9|fAn~hzGhFMg^SfcELFkPTC{+;+|HNljPd^pevW%86nZUG
zc<2IMw&!vleo5a?FwzBHVdstQ{MQJz2tqyavdTcvQXIaf@q%CZ-uCn3NO*i6x3|Cv
zkiAZES2`Y(=ob5f%}qOW(9Af(D_;NFZbCGY7?vc3PDZ5r#vAo&_z&qZv1nn!hxHdq
zPo;ba$BVuv(*BBp%bik>R{BLB!eS6@_wtXN1x|R~be<LG(!~3AbU4S8ish&IzByet
zZR&!D3}+dSA1}?gBYJ%e;Dn<49WV{1xPL}0eiH~HFy&j)(c1@8P&z!_|E0Y8`y|`-
z@+Uk_$Rlgs&vx|B%q3n%GuIj1%ipS!GYvlzFtXaaEY?j>Pj@l&V1?Mdq#xp}+_5Yg
zyM}8fAqt!`-be%3{*bie7SiMB*7)rRoj3ALp@!O0H5<GqVkl4|Kx08rUl;P>N(z6r
zY4O-hM=wU=*R&*cxU3ED6`Qczjv_hq5tOkkh>;_j=UL?Yv8n8ec#vEp1&eysLziiS
z07!etOYtl@R|?JPQBEgl5(b9EKJ`0Tp}AO$N;&8_9=!sMtwQJDUQWc&-)67;Zn(kg
z7Ea>|XhM~@bB0$C6fhB_Q6!gqx}LE2yW3vT08#7700V>(q1ySUn=9tzy0&t8-7nXx
z<emLBsxv*o3tLVPThEK6FS^`6x?Zl@gQPub6W^>E5TW6KHKsT;Nb#R?El!oF$Sndg
zX$1nju%t3DZad5TsC%i*IN2&Kh7$qhwvve0euKUY51a$0t5ZeUC?87B-$V&;$3)~;
zX)b6dsC;u8pZ0HiwmhcN{Lb8SdG4C9-G18#16QpMO4BynF4v`*Uw_2+UG>0|Qh9%5
zRK2M;nEHKwR>1}c2%CN@I?N9zvB?$itT#<qY3eG%j0dFOblorc_D%TTc>2G<`d^S<
zuDeY<MQ0sj6%`ICn`8Vsrbmdea!b$RonW_uA2mNE;9!&0>K%2RXOs6e#Gy<*r9(vI
zynH4@Rwo(h=k%3XV@$cr`rOx8tQI(PK{KTR8q4*)3ch+No+5r(#_oRxbGo-gnP>a_
z%-vdj{pJO0!1MOiCjCldTYCkL7<My<cs8<E!v-w}w0`u+cvk}%e%NmCDQk@Uw#lv0
zl9nY^(DL1e%;TR2;@-#CHt!C$p24cNLqbXuiMi$D`8b-wMnD9A3e<hl{VtmFV1M6F
zNU@UYHOG>x3T1Ob+#~G$W&(AJY!zt2aBP8tutCS+$BJAQAqA^cIr#+kazkc5(jcrj
zlPwZQo&uqI!JfzpJE_;Kxd5L8<g-Jdqxi<c`%26l>3e^E+}pga;M<(gO=K{spwXE=
zwN6o-r`Dl6hqV3U3!nvxy=&{tf3Q}&?E0mbx)e$(uV>SBNuQ~m)2b>s{tfuKQp2@c
zdb-IkkP!6m>>a4zt~*UWx9#5sj^X|XKD>I{Jj2Vm>)3c4cTVms(}*w={Pk-6l0nv2
zA=8Y=Yrpz^gzSi;u3qh7AVjH)QS*{aDoOGbJj+PWw6>zv|BIILv>rU>rt%NY=UDLU
zX1L-zb4O1(K5<t3Ui6Sg{RQw((^Sw9LcL*cCp@VGCjnZHK)ug#7q};F>pe$O(dVJ4
zxB;sARS5!B(DlX{8GVAt$$vu?9*W)duXDUFcWOYr#RzGQfZA;zpbKO(=4ZO^pJb0t
zD{C&b$fwE4jaqd0_6hELyAgq9ZrrO|pn@y^8RVMIi<Bih|4SnLHsqSJXZb3!SOto^
z#fCJC<XLA7fj^uEHHgWh^ZSG=_&X<J34<t_t6P8MN$Kv8Iw45V;DP($GF)_|l3*Km
zSe%?&I~g_83QU!Z`1yAF6B|P$Qs&sH5_AWLpVeKjO{$ecLQl&hHU5w2ov)WuT4AI*
zgQa(j8$F+Knq-*om(K+4u4d|<if)tAgzBDD)xN0`NRZ&GtrVj*DoR-*5kzS@Uv)e+
zSLrX*+h`mOCs2NldTV~DcdN#sA)l^Gnr(U{ctKuuvH$YN4)XX<p!=<Dt+~I?PIw=#
z-7PUsUM$bqO$fBCoeB<d0?mUI2)C%B4R8=b1*TjU)v_rGG*hG_H$S}oYU+pcjiJ;X
zfN^2;%O}g8=MgKHhd4SEo^7BK;ymZey!tj(D?1Kqs!=s^b+irrZWFK#!RP+o_i<eo
zAi+kCy-te*&9_eV2at;ozVE-_*v7)b3&y=C+N!7PoxV$2=3ke4H7{QBCRyWMDdgGi
z=BiLR@j=o?umQY&ZyuoTK<G)1+SoKSYiI+?yskgljI%I08f4So5TU@d>7b^hBfqZq
zFmAyoPkwvfOFhe<poIoaq0+USjh~@?CM|RNKYqu)7j9FI@`bpKl{BqJ8RNaFxvZs-
zNgFd{4rt6uPI{TBCpY|{ezoJ6XDtnLGCQD+5&d(ZzA~Yxbb-p16Qow%tPT-_e|U}2
zxt(<^l$K>0bh-cGUgvSQcwaZp9N0UP%=OwGU&?)b=Dre=6S2&Z&aSxB^aAr;b=N#!
zrhy|)($dnT7rx-;c5bsTx7d>&K-CQ#wSK6mgQ(?V0M|y#f_uRdk5^k$C>Qg7vk3pl
z$WXiAwX5??i@Eh%sv?DffwwLH#0K=&B1>t-#WH~(qi!dpvW$b7B)2)awl-5&`h@fh
zqai+A*#)Hfeun;VdXq~DsbU|D*JW~PrWhAax`Rvb1(OaJxnrFrzECp?e5p$KRLGo1
zNiQcEq`{}q`cZF6y~r(J29N$+=mllV79pq#4ejLI&B)kW2cho~>wH4os&A|Wb+d!J
ze@Z-Eq;7We6@$!<)&_?DA}})5SEFMqy{NXbaRLzEWa>DPbv7CEt}vRb%zkQH%B{V^
z=?ukwrkv;=z+TRV_WJ&G@%JyU1vqs9BMU$lC`>-2w9~ijJt!;NJR%wh1LGlTQDBni
zJt-FQ-E-d=k%BqGE1FD+^73!Au}NyTc|L_z23*i}!xl&Gzb#S220qz`WQYFe@-fO=
z6TV3*W_GSA#ErsiF(gWO(AlVW$)Wetk)|iGWmmPo-26Gen4GWDMA)#tz;f|MU?1DN
zTGPL^u~FResZifXk5Q=Rl|_5d(h3Fu{Tr6bbwpcAL6i;kyua~We5PWvJ`+&=2}g08
zU@H^JOtA4BhKL!5IB6x)y^D&#=OLO%rcjkQdVxYOvjff>^@LLm2#tIac9rDfuBfDH
zxcqoSKDX2M=f}o3_;`oR@h>2iC3J04M5!^Qbip)FGeN0{)SLLtF&Aa-RENVWR%>iu
zZH3Y&X{E-P)R7)`-f7)31_lNz>mEet_TKiZo0SDWDgO}WUu$1VN>#nf44kfKekrca
z|7)`Ry1V9+=1Vpm8nC?T*bY~PtF<=1jOadY>mMvkMwOT<Y%ay3)zDL%aGhs^hmYTb
z6--Vp;#rlKXB+M#%kqbArIVUUa{oRHVGN7F11R=8HzaCI(i$ae2ssD9g-SQ^5#}V=
zWlCi@*aV@xObtzm5h@Fott3ySvHCEB;svpv>O13UUWs2-fS2$Gmc$-Pei}1hxbf)T
z-ULeRTD>!gOf2{fFS$pe+!SW~s}_qvPx3+UifeAEH_{CGaVrJtHyij~<<aQ-<pSOh
z@9BK4j4;B$SceFs{>eaWI1P4(sf({c#@-oz(zIHEMp`^tcTjE@s|WOjwzf}Ake{(t
zOQD;BQDsB$$$Uia1m%8xv;9rOPAz-i>l_X4mfh=xZbJ6u2Z_uW2f%P}nq8ZFLCkg7
z5Ux#86VppUG~Tegz}99CocelnJ@#;Y*Elh&b>Q(A-FnMPVE*gV6nh!)_ml(Q6x7bf
zWSnf_-+JAF*o?V)H^E>;hxdo2q*S*1W`XGng-_6|{}yOQruJiN`pRs8dc>R`1w9R5
zu5B1$X+I%P+$PT*CfMMVa~xA)K^X3Ku55%rzl{#pM*>;0Kg5`29b6jJFtb=_BoNM&
zeD{)g#J%j3&qXc~1y8&FK7rNK<%SC*A66kI_B1<%pO;1Cbd&s)o!?V5c}HozUQS>m
z?n`vwoziEvm|G>G<IL6OwaQ^n>#X|fMpOu=o|0*Vl|yK+bM(<n+f&~2I1}vSS8A;F
zW{xrh+2wmlx|0b*K9Q2mo;Dbb3R_Z~l8pz$|8ab`BWrMrRRC={=dI(I^Jaa0Xh^Hq
zNy#9l6g*&aip8@{+tFMZz+c(@@u`cx^~k`Z)g~hTIFkcagIzW6s6ejuaW#^mZ|Oc-
z4(soy7yY>SxZDx8b+ouNkh%w4J?cj^2Q|i1Mb}8WI+5$koa1aqRqolg+g!8FHSL<q
zQc>sJBMa%D5nz^vgUZHBoXGMOAKx{W;m*(nw3U&wgGJyQ^cBy(P{!mc#RjP5)5Oag
zwQISdc1>08*f6NEb2fJvz4#_&aMW(>UzWy_TRw9wA&Dw+>fqsAuiMXj^!UZ8B%R@;
z_r-Z=>Nl!AL^~-j+6BXIXewXzseOX~k44sFC}4l5%G9vb)*5BVB5#1Sgo5}VQKO*$
E0TkMEh5!Hn

literal 0
HcmV?d00001

diff --git a/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-32 1.png b/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-32 1.png
new file mode 100644
index 0000000000000000000000000000000000000000..8ef2897ec67cd4f56f554a785f4102ca42b0b46e
GIT binary patch
literal 1328
zcmV-01<(44P)<h;3K|Lk000e1NJLTq001BW001Be1^@s6b9#F800009a7bBm000XU
z000XU0RWnu7ytkO0drDELIAGL9O(c600d`2O+f$vv5yP<VFdsH1jR{2K~#7F?N;4u
zTtyWB&E2nj>?T<gqy#La2EnE@hLq}q35hR)AfzH#gg*FC@ZAUh0N?u`2zd~ZRIK=c
z`cRFaEk;tRG-*kLl$Hi<Hpy;oce8izojK0To%^x7_oiAO1o14(%+8)U=QqD|=F9>9
zmw#DU%DVN*xvL9V!4_b#NedY4+(d^M4E`gcyUfBK34HqM2QMrpZR-NCbng4H0u>f5
zu-Q(ljQJK==pg<dQx1Kd1ayMYuP%4by#C?LZ}D}jL(eKu{*tsYOAyko`UnW&&$$|<
zbs>)=$p4o}_&Vh4=ykJ3fd%=S5l&{AwJ3dLQUNTT{q8Nfdr}(+9SMEJ_(;Y{07S%e
z^|#M8Iurs^`V4I5qc<<V7WbKr%_VzmL8%AaHeb3A>hsztqB=wF@)6`Rc@xoC92C{V
z4qB(S!S3~<us)0+V8r~GF~QDAU?0nnQ6~UP0vIHHQXnnOIB{gvDFrvEqoDI+Hr$D-
zzx!Anu9rn-W59<Xd@L4YC+FwqRT#W31|(5ORlJoVO92tYRE<hU(XU4DL=AlGVVkb^
zObs}&VBPKW?P+q^!1D5P82S*v^z<~-5Y%ZTUzB!1wGm%W!^;!gQ@fxMCsExyUPHE;
z1*_$J_#IayM6$ZNs<NGh2_@2QI>M|My)L}j2dU1LxJ?)Pb}e>ubck6O19Nk8B-Fu8
z2z*D$q74nSAYR-9a_K;uJ1+Q@K9odR-vVV<ggMs1fQ*au%~^DpDHQ@e-GG?iJ=FQb
zO$Y49GKzf2zp{pY^_`fQpe_cWL#xy*gOtD$?B_}{fhW@`S7NWXP`EjQe5PP>B{0Nv
z2-eot(8WN2K?IergQ~wH{YUf27Bj{gZ!1cwnvRc;vqS(v8Py*m5FSsgqCCp;Rfwzz
z88AgXNWw@rn8+kF<&|QzJuU=U@Q38?6Sm2I9xLI5&CN~f0w9wxVjz429xb(v16`hz
zBs<1oJ?lh|mMLH|3DE@rsuC)sAx)1y{m!*I=voo2D?O<?nd1)Jaw80Z$}h)p5_5pK
ztdrs|GEn)cj@<1b6lA80Dua7Hm#W)8BM;Tb!)H1%1PCslF2y($z#m`N(Q~Jatd$MH
zwl*^GHZsW@WW8vN?CKCJiHD%A5(asHSpK(c7lmp`Ki(loqwZXnHGfDT^JKK&a|Cu*
z8tAz%t05zukco9bB_ZeB7A9E^V<7B!DCmTuuIOMzvSO0bkzjxS%tr54S@WxOxZwBF
z>Hs;sxfUALh7+6vZJkj3*643t(*1*l{W}e8f8N9ZA4YU24WTwQHRW}MK-sO-AAB=9
z@YEZ@!uF+>rY!VqAob=8$(2$yrZUTgd?x5>5k>R(M2#$Azq$9-#V;;~n>#OX|Aki;
zzs+0u)4n}s8jtkR4+)vh2yHEMyr)pQ9&g=xZ{?lSl}g1%LI{*JZtvVU?~2w<iZ!6g
zV?=wP>z8nD=G4}_9Jr02*DJrAmCoyJq#=RG!NI|yx1W3We7R75S@yC|6f*fE5s74G
zK_l9^9<53k_JP;9Rc-8j`TmUyOEoD#`(u#Scqvi&kuzgHO}A7k!M5%A-1qJ4w?;=t
my{)aSmYj3^lgH&hD}Mqm8kyRQ`R6tO0000<MNUMnLSTYZ+-}?e

literal 0
HcmV?d00001

diff --git a/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-32.png b/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-32.png
new file mode 100644
index 0000000000000000000000000000000000000000..8ef2897ec67cd4f56f554a785f4102ca42b0b46e
GIT binary patch
literal 1328
zcmV-01<(44P)<h;3K|Lk000e1NJLTq001BW001Be1^@s6b9#F800009a7bBm000XU
z000XU0RWnu7ytkO0drDELIAGL9O(c600d`2O+f$vv5yP<VFdsH1jR{2K~#7F?N;4u
zTtyWB&E2nj>?T<gqy#La2EnE@hLq}q35hR)AfzH#gg*FC@ZAUh0N?u`2zd~ZRIK=c
z`cRFaEk;tRG-*kLl$Hi<Hpy;oce8izojK0To%^x7_oiAO1o14(%+8)U=QqD|=F9>9
zmw#DU%DVN*xvL9V!4_b#NedY4+(d^M4E`gcyUfBK34HqM2QMrpZR-NCbng4H0u>f5
zu-Q(ljQJK==pg<dQx1Kd1ayMYuP%4by#C?LZ}D}jL(eKu{*tsYOAyko`UnW&&$$|<
zbs>)=$p4o}_&Vh4=ykJ3fd%=S5l&{AwJ3dLQUNTT{q8Nfdr}(+9SMEJ_(;Y{07S%e
z^|#M8Iurs^`V4I5qc<<V7WbKr%_VzmL8%AaHeb3A>hsztqB=wF@)6`Rc@xoC92C{V
z4qB(S!S3~<us)0+V8r~GF~QDAU?0nnQ6~UP0vIHHQXnnOIB{gvDFrvEqoDI+Hr$D-
zzx!Anu9rn-W59<Xd@L4YC+FwqRT#W31|(5ORlJoVO92tYRE<hU(XU4DL=AlGVVkb^
zObs}&VBPKW?P+q^!1D5P82S*v^z<~-5Y%ZTUzB!1wGm%W!^;!gQ@fxMCsExyUPHE;
z1*_$J_#IayM6$ZNs<NGh2_@2QI>M|My)L}j2dU1LxJ?)Pb}e>ubck6O19Nk8B-Fu8
z2z*D$q74nSAYR-9a_K;uJ1+Q@K9odR-vVV<ggMs1fQ*au%~^DpDHQ@e-GG?iJ=FQb
zO$Y49GKzf2zp{pY^_`fQpe_cWL#xy*gOtD$?B_}{fhW@`S7NWXP`EjQe5PP>B{0Nv
z2-eot(8WN2K?IergQ~wH{YUf27Bj{gZ!1cwnvRc;vqS(v8Py*m5FSsgqCCp;Rfwzz
z88AgXNWw@rn8+kF<&|QzJuU=U@Q38?6Sm2I9xLI5&CN~f0w9wxVjz429xb(v16`hz
zBs<1oJ?lh|mMLH|3DE@rsuC)sAx)1y{m!*I=voo2D?O<?nd1)Jaw80Z$}h)p5_5pK
ztdrs|GEn)cj@<1b6lA80Dua7Hm#W)8BM;Tb!)H1%1PCslF2y($z#m`N(Q~Jatd$MH
zwl*^GHZsW@WW8vN?CKCJiHD%A5(asHSpK(c7lmp`Ki(loqwZXnHGfDT^JKK&a|Cu*
z8tAz%t05zukco9bB_ZeB7A9E^V<7B!DCmTuuIOMzvSO0bkzjxS%tr54S@WxOxZwBF
z>Hs;sxfUALh7+6vZJkj3*643t(*1*l{W}e8f8N9ZA4YU24WTwQHRW}MK-sO-AAB=9
z@YEZ@!uF+>rY!VqAob=8$(2$yrZUTgd?x5>5k>R(M2#$Azq$9-#V;;~n>#OX|Aki;
zzs+0u)4n}s8jtkR4+)vh2yHEMyr)pQ9&g=xZ{?lSl}g1%LI{*JZtvVU?~2w<iZ!6g
zV?=wP>z8nD=G4}_9Jr02*DJrAmCoyJq#=RG!NI|yx1W3We7R75S@yC|6f*fE5s74G
zK_l9^9<53k_JP;9Rc-8j`TmUyOEoD#`(u#Scqvi&kuzgHO}A7k!M5%A-1qJ4w?;=t
my{)aSmYj3^lgH&hD}Mqm8kyRQ`R6tO0000<MNUMnLSTYZ+-}?e

literal 0
HcmV?d00001

diff --git a/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-512 1.png b/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-512 1.png
new file mode 100644
index 0000000000000000000000000000000000000000..46d1b727ffcb69bdfd4a2f87161f485b6329bda4
GIT binary patch
literal 100666
zcmeEt)mt0R^L23d7I%k2ahKpyw76^W4-Q2N1eX>oR$PiZ#a&w{4#kQFcMp~jl3&07
z#e4By?6Yz=`^?UsIcFw8S6h_;ml_uU01&9FDd_<KsQ;Fz0Bp>E(_P>q?B9grrDp02
z0N|1RZ=e8j^C|xwqWJ2mDgYX0Xixra(4FM9<N<&mDR^*O3;;^}th$oCK_JTc3*KV+
zxFKciW#|m@^fKr2Bxuf=(j_=n9<{A7G4MnnF}l$`;>L2NpJfoUanf?-2f@Z)TV6<N
zKK?{E_M4wKF&IBhXK)j=?4;AfTL63mq<CM^KmAHVQ)ouBpC=tDo}apNgWUsXR-qq5
z+iD-S48amd$ezmQ{L3{VP|;Ow<5rv7fUEIqxhPBmYGLQOV)_5S_<!RP0;f?*<fCY?
zl?=9DF_R+wz<!j!d%_NR<aWxRjR^MO<reozyHc>b?)-?>9n;%6_dh!gT4jGq8;mte
zAr)r8lWz6C@+;VWS&c1h@h&F=MF_SLQ!Z$*X`ku);!oojoM4Ri4Gs^4c2{=_zSUJ1
zTZ~f_R}6z6<c*NgK55rn2xild`3Fb&z77!#Ex#FAS=oyp>A@uMz)mcdN5lAfT+A*D
z=efEu9o7#iip5!01;XA3sVZaZ(;^yhga-mZrIdvex?obFohxp>3~_GRBcU{Aw)zJB
z2G^q56!`CyVs5PR&F=q_OV{#mzXTPBpH8O6BK<&uRo%{*Ur*Vm*ei4{5`vN~%=M`e
zdsn(7*X14!40?I~7S&F5pWYjbsBqdJj=Hm6WL<eD`W2~DkE1!zvenNR7nW;+tv&Mb
z4zIcuI%LX7XyXT1hEQfyw?x9aWrZAY3u_SAd*4UH$}kAE-?%@ZW@2ZBh1I}vfYqvO
zz4B4$1WcEV@#uqYJXNi9buI_F@BPua;X$d_Iw05r(`B`OC1<SAdlsbS_Nr46!GMm;
zy+PXy+_HZhQfK}4%OvH*%Gd$yL_AIV&uIXir~wB~da#<cngnShc?BeUg@Ui|jx=nS
ztcY>b3jER7=`_sS$kc*Ey>We*(H`3rzy!={3!Z@;?m>e!Hbd#!_zxDIgHOCM*BL4X
zTm!%FogOjKR+ZQ8jLXq|2>Csm{?}3F@dN!AwwUj8c2i2!m^}1hh^TgLxu|veT!0r=
zxR6lfilq3v4~sj`02JJ-LM~b8etHDSFY7R(oyL9`_g3iR=1*H{dJz1EcxHA}EHQ}H
zZsbI}NOt<B_?fMMJxs$E{wC-~4D4Y|cvQXj47axw$f7$fUuSZtfyIdD4=ft*b_TkP
z%f%`ky@rIFnQf9AQe=4QRm{YV$-xxw<*8{3tXAZ2zxB^U7k8h!)k9ee_@t5*zcwZ&
zG3Y=mZWM3l_Ato<II~{E)+?8;s6&F%jL+9IDlgqORn+-*`e*I7UO}*Av?Cc@L*xc(
zVR@}Y(${jXli|ea(KuntkqYq1GvgVvdGv58w0{=o$tnBtofp8CT(srMY}nu0=6d7Z
zkXKY@6X9X~C80!U-E_3K9}mail^zv-dfN+p_-o(P)uq0^zW&=5-eBwLNi>@;p-9@W
zFy7MZmfGtY<htFQgFW-5cHpGf6tLS(^lQcc1F@Fb?XCOcK=(_Vr<<D_>j5<E`eOdZ
z&9+VYNebyhcPB=-a%BE&L_=gf5M+NahS(oZukZ|cIo$bO=i};scd+8+@wGYjT4lH`
z{K8u<{6?kFAsO05KJZ>EJS~9yRkuV#8-lU(3xMPZ)O;@Ua<{PmBHK!&$W}M>S%GF~
z3BzE<9MfdN36&*UC2aLS9`EQOw|f0z`*Wr`7L~%lomu#}6TKBp<NUI-Ux@w1^^cpt
zJrh-|%bhE?)@7NL6cSCTHGWA^W@*uwtuk@yA%U{_AKU(dhDmaP;#9(4$WO25N4!{U
zs_KHg$<;fG;2ugFD}pHwoT_{#yo#S3Lm$D!GiX|TgS5wv*GH~9WZ7Y|2dZ9${6}qA
zJw;z!X2~uJinJ>OA5NjO1wA1<@Y%4-y`xYovsB{aWBKfuwX_QKNMh9t^tOBoo``~Q
zeg_aX8>jBC)gME+ay0d?BXX&L+N0MID?}0&IY;mFh)r2KKVF!{@3o~}fgoer^y|`o
z|M~7+b_kJO2@W_(QxBB9xu<vRrJKDRmORvwDHYRY`NJmAD}X<D%>T?Yxk6-v)C>cj
zkdwNSV>7_|863+-9FrD$?*Le2g(0w#ZooN%&cheCFT1;8G0DnaHUn2!e6?h#6%>(P
z0=TVvzUe1rWSqzFx96cAf~#HEp4RY9FsyMF{sd)FP1cN|Zz>*KmZ~5t;3q~$@7pL2
zQToXd!QHRu=%i@=&cwqc3Ud<Itd)GMmVerG#3`2V?9;p-!&Hb_sZ)!L+s%JgA*cI~
z8xRps_-ThnV#ikU!p%sjfL~(hU3*uerHwOnI&7&yT@gBk*C@cTcZ;57I%UV$nF8Zg
z2ytN~AjmMTjng5@e9}$OK=Y^i416UjT%j0hrXgl9Qy19gL*Y&~#py)uvE*epFX)J)
z-a;uxxR1gQvf_!16DM|DXxa|bX=?a2Tk0_^RJX@I_=FuYP18@_e|SIpakTTq)1L<h
z-}1Yy)^-+eQCtaw2!WnJVW&rh$h@O6qk@Cf*VwRgCt#lI#3W(<eVpuFi4B*H3PS<$
zbTWarv`1FNL9{5h26mW~WxfHErf{gh(HHwP=SLo5__5f_#iy;n-8Y4QDcxlShUZ7@
zY5P*LIe8R*K9)jh4JPYdqP~=YiD=8{S}5<sPLCIOCjjJ7Q4U#q^>^Li8+tpdFN?lk
zC1>sOA-CCvM)d<P0pU?Q_jLjro>tFV#&0~G@%=4m)tVVXj<fikC12`g)}Gre*IuT-
zy$!wTK(x$;JZaul+UBO~HIvz^epBN?!~ZWw(^L`}fjNnWO^}88o1Qtn5Goc7F1@WF
z1f2`tZ4x23WvrpyVxeb}^`N~#;b7|yLZsJkH+p-t&M-%*xcD=NFQGyX;b-=g0)5qr
zyu7!_O!3<;>A>HkGByk|b;j=J<hEWbQia{L446NP?o_8~Q63ua2}8~O>h}JY@4CHI
zpGl;u&HxTq+_4)Jy@akH=xq{E*52+LNYOV^*)E|~Xd9mubeIE~4u(a{-jJf=q33_u
zvSmV`&q_n4gx`{+Hkg}Jn-Dsq4%Wx<W0q+Xl5zBR7N#^?#<#(bwUTE;#(<x|qsO{q
z*JAx`=7*omC)6nmIsSCF($xPl=%-Nn9Y=IP5+-BpeZx2^aC#{h_WaWBoF95;Z)ihY
z93hUy<@r^z$LEniXwqx1y=wWKmlE&pQ?gk2o9EYrk4#?pg6`o^{FP|K(|(3&P_c)Y
z8v=4W<R=K-Um7pBhCK`*Re`HqEUbiayNbjCjB32`6J(@$j9z#pIDii9pLKj>3J<H3
ze2r(dFC#3=cfbb`&{@Lt{8x_U(Y>p)*vmpYyUMAUCgp%nvN8mVp9Id)qT^k?4K|d1
zvI-vw7@uW@*in-4_;G8ze=|+zD1glV`07-*dY0K;$iJa_`%BxKm~xm=Hf!enN<QPj
z5|!m($&(!=wcPjzy&!Z*3=ac?kJM*x2($<P&)+34Y#&<VBUvP+cQ}cu1;n8{I45X#
z(mS0qQ-Z(Zpe!#6+>eYMwIQcBTElEYhZM<*75w2S0^LJ^+!`3PZ+i&7AEen~Hs>*B
z&Y?$`|8pr1$tM<xgX8zpcR?T-(5KUgtFA{nqW%{t)zW)UIu_ZIIL-JhwdYumFV*#o
zgQN@E$En{wr&k0`V39^>C(A0e^QbpscfI|=dL27L`8{iW@nCa4wop3bUUYh1#28&j
zh-9TOa@$RgBja!OF_q-nL*!5OgetvucaY;&JJK&?p-35QIyBTel42i$*F9ZRAel@L
zGS=qvN@7m`p=p>>J}OqVD0*LTSAA#ce;&Fn-=`R{5T>c+;n=_5?Z*D?HcPTvcEw$!
zGlriGtdFb`FI#IAaZG8Mltfml4YWzEBC4;D*2oQTW(!*&7K%B>2N$_^@(H71N0LsF
z@VRY~xQVqefFo>vv1s{0N(;C#d(C0>xcMl16U)&b;M;rOX9@c>M`B(2!{)sa%5kij
zPz=+Y5-S<D25o3)NFN-rt(4K!s4w1VA%v~!C8D8m6;ohBBCo^qHSzV}BKST|yXX|n
zCOg*gZJVs%OXWgd2xOp(NP}G*y{-AK!@`Iw6@PgN`wcY0Vyz!vgpi3k$9sqoJ9Sb%
zv-Ft{QM>ngul^lxcIlY}DsaYTtr$aK(<v8)@W;4Wh){+0HRn-{V?(y!@;%Q_v1tq2
zwnGp_TMMoxZf!U@`abb@i_p)Cf6*iIP~2&@-S9JJ!QZ~<?gxuxu(<B|cJ>}TgW*5o
z7`mTJQW90%z-&RzSRy(hy^%X24Z_|ixTDJup=Yg14R_*U<+q6&kGNmwqEY4E_!~Ic
zeGti}H26{h3)cj?T)bZp1IN)z%h%nb=&ru4!)0`SmJ1c?E~G{?crj(EsVK6{Nrg4c
z1Ty*OzMjxy__Pu4$><;vf#)hN-<1-w?u#6f`y^<8M~;?|3D(${47JGm>ZP}))^9%1
zez&Kw)K$5j1__V;PbK?vq=%9T+xZW}+SoXMc^|eT;O@xY{8JBo|Iybg7>)HK7{0;K
z3&TPJ$<Yfz2<VLlU!T%02O}1t3}i|uLUa_b!_RtIO)iQCfK6Z}#2E0p&Z1^y%1ZO4
z?gnyG47{BNhQbRT!|qj=y)K^N=vhapI`Xu|3JQ$QwhNty#`eVb381DNBYQpN%^1_}
zZXP{Ht-V}2ARe|ztWXOrt3H$aX(g2d!C`_CCs#K9)C3I$wWlQ^;FVeX%;b^j7z1C(
zzW6V(pU}u9xm8_vU*y$Bp!n1)DivJhQACk>TTtb|${%ab9@m47WC%zhz34gW%K!s9
z3gYAW-ybf$;K=<n9zst9iWt_K|DYIUWNf7-!Y7kk2;>M4p{okKE0PZKffo?HV2y!x
zW)Y3Zou13km4S=0gR7{FKL>cizPE1fU%-yqh^qI2j;^=E-TygHj{Bbhv&<B0dgg`w
z&FA(l->AAV_M3GVO30QyslTZ%ANlUhhQV5O*9(;<YZWOz_dF4!{ryR8Kin>)S%UW6
zVM^|e$uw=vi|XadvidC~D<b!>X0mXMRo~S4%>K5H|80HHyWD7`M2o9z+Lnn|F>Pf|
zB2CN*x0{YYoFg7i33?8&;|9*K_aw#8f3!dF^0b0jLC#wvYLHJP{X%6enEVX59IwZl
ztNJ4{)<i}dsali!hUB1<T79EMy|8<ue$cfO;@g!ujb}(%p6=rmGa*E{_AWVfrN=(J
zXuqWl{^Fe9Qe43nx%~9$tY$o5;NGm4&_SYjgxq=i`-^pgO+8L=!Z~VIid}s$$X>vi
zH~g3Ia><Q6G0Q_Dddn>j%;9g1?li|PG}XxB&0oAX-4+E$(w+1cnpdutU_K_^v@mh-
zQZ7>e`>f?=5Bvwi69w{WVC0+nFU45SEmw6P41WtztpNfL+SaXJl;RaA)dxC_tAit;
z=p*DuFY;y&iR@qi-jxHla!$6e#3%^`ZMC{16=yUk5-^7%ez?3}K!oj)yI^4>LQpqG
zhTgS77qd?l+}E6t^sk759dRPw@eLn4-f!|$P@i(R=T2wpS^V4ywj{FJ|4wdRQ<b-A
zpZhdVimtYdV;c$KLB)zO%tu-d&%Ru)f*?ahPj{fZPa#djan}XtH@W+)*w@w@B3^q^
zmxM9wfDOR4+Y5YH^<nglI8;=&Hw1cnJll)lV*stt^k1^jl(t<holr1YC75o=4GYw<
zSr25f*aG|)yy`bX53X>u*;>wQBUKa+ZgT!0oF?VtcbPN$-`df9S*`piF4P$Dqe#N4
zWgBquCX|M}l@PydVE)GGaOn^felgybQV@1<rxSY4fW&fwZjjv&rGAMQPkzD_J{qJ!
z(*kscVQX5=3}$`mht`Ezt=sv6oE{iL;hUi61bdoC1cf-YjI?hv7_ciV1P+377@lo=
zZJG_qPTh*7F}_7@#6X$3npB-XTsj$?Wa$u6>TMMK6*Ps=E<$cL55^R<fOfEu9YEyx
zQ4uI?BC)6*W&XVvcNT!uiP_m4vk@zoY#W79at?izfkVh;6zqavu|+ThZ$554r-Pp$
zz_9-yTcAZP%wKQ}kWjqhyj+QTVO9VIN^Eg}?&n$Tw|xu$;q}xva!gl6{ZwneKV2Ep
z5yhQKR=5GXm|AyuSj}dT<97`B=1^ybMxoUc_={Z~=!X@7Jvm=lc{ywqdgUewwdKma
zZ>bJS2mi_jx#J)1Y}JMEY+KyKU=EXWEsQO(v*6r-+x9H;koOb2BV(dh&#J(ipP<K;
z?hZM{p7QW*;QfBOPIC54v%iQWjwr^1WQqSaid-Ii(+u=?lF#@WaiGr-v_Jo8>-KT1
z^v2xKwJA6=`0`-YEaHLg3sxpy__X=E;$Tyy*F5oDX@bQDalMpyQ5RMF4rfNSVqKC@
zaZ)m;GL)*)J2_|tuRy&^?8R8VN?ZLJLz}l((CZWO>XuoZP@TriA72^j1e6VwpKg2)
zW)}&o^&S9W?@W~oY&$~_^Z58cEuB_xdIS$)tK07-5RM%0hGh~Oo}H|Ktx!+!Bow@~
z4RM-5_5dGlKrd)TXLSJmkEDJ9_?pj`CGD(Ua?a%8p{SmFd)6wFwg@=lX)lH2aF!j(
zfQ&_+f{~y1JfHm?5vF8SWHHfgByr=;U*8w1+*j)9edT*aqOP^os`#GW+u>FN{%0=>
zch}w}{jD*HML}5nU$<T3JG8&nncq%&I?bI;o--cCWNTfdmfSzP!h6HRYY<$9h;Ig#
zS~A3T&wg}^e6|f(C!RUtQEL;*)je-IiDNMeuFJ;WwHK?iXYIs<B_|GhFt98+NnBl9
zKKj08doWGBY9Bv&C=X$MePZ$9+_!+BhFGqhIDLpl0sPHolDD18Oj;!g8AfOr!xAlu
zG=zmtAuo|)(mv;t$Y0=)M=%sAsCD7&-c;8t!e|v&h}&z0H7E5t7tlmB_xyepopJ1`
zdAw-afJ3K{_NAH(gy=5{**yY<xP|QR(Yex3K7)DJ^jUXT-%$S9e`PDGb@ZDBZk<TL
zcB+p^Y5tLa$W38lD-O+jNfJq<j^vNs{n^=cSiQw1q;3h+3M>!h#8)*`P4DG_rq@uL
z8r4#o=-3mhLD;5V8cyE29#-|IU!bZf5wP&;k$jjH&I$2pE^95eW|jMO*hE1-m~042
zj?gkBwpYjl<1;68C%>co?SID=jf1v-;b^K$-1}RBs;r-ciO0S(mxZ<7=t(8(=CJQU
zqDv-JCliPGIYrNv>kV7w?Mk7T91ggQuo|e2r!4T+LB3@gd6tg^jJ1m<hrl_Co<0G?
zc7G-aNa;h2P6I!47uAwF|KP#lPpbInF*rOY=_)^`R^D(%=bsC_wj~!y2ifV8<UNlU
z7KHx{#?ILq9bObmRoDJZ%<e-lx**E&TYCONLOYy2H|Hwhsa(*|tkU^aVgIFhRjFNm
z)f+~xa(u<I@ifMwMz{34FPHWmLM%cYEBKaqvi;Ro^I2+Rde$dlY4~@#i^3xY9A1g)
z_ocQ9HL}&0zK(p6D!kyZb+LY0zZ-qZeU_km3K=uU9tUc?O(&bNjmHkx@T+g<M*s}P
zH~F)RjSL*%N-8l!nixD6$GWu!=m%TwI8mhM9h@UO3;`~9-UK4V!h~o!rVf!@3t(+%
z?k!j3|8g{gwjowe-8cc^Y@Delsn}U;5!jOTi(v@z8O*>s`^LCo`zwp83LS*u>ij%W
z7S=ETP3Dt=9U%XgZrT$-TK#MJ>r0m1d`vD3g{|}w)$=$=&Y(|%&o~@O2Hu7;1l}Y0
zak9!pubEUPd51e>d2@Z(+!8TP7bf(~!kf|jHVyjxEHy*)z^`8vlr85;tP;SFZkIZf
z*M<xRIn>#oKNF{BYnK-kx=^dQbGEsDpVV0Swf?FmdUZ)iNaI_hwkLOLLK~OP9y>PL
zjYFr3ZB76(Jty9ll-@~hM|KSsB#*Y#VL<UG=&#JnlJzE*GtJx~C=nEO1fsb5dsF{u
z;QmcYALJYI<_fWkvBmvc3Y|XWTK=Ic@(OWzMYs&8n3rC;bBQE4os)FNjBl~d`ioYP
zygMDrVs!<&>^d3+BHfYaJ;`<8dv6up@NOfqItlR$buCV<|AeC`S!Yf+HQ<L&MvRl4
zgJQjU9ZZL2wsL9YFxk3M9eKe=OPVS52n>NbXS{cAHS;cwq22g+yNcW_rPcDLfSTAv
zVU1~GJSI8g(u(>V6}b`-*Q7tcarf8EXoRXBP4z<j=QCR|5D~nSZZC|T7uPMv)=dtE
z&jZU?CV8Ze2-M@%tUpAFjWrs>IGo;UT+n}pYty9lkwc2Mhs!sOLD$cqi`kdWkvJlq
zee|IjTw)$TuYW2Lk=~wFjynM*G4&$ge&1F-Z6!C526sRR0$i<sERyhbyaU2>bizK^
zw?cQg=dr5Do^eSC)nyjmqYrAnn;i-XlI>^b2dL{`Od3qFkTPhU`_a{E4la$n(rI^3
zkaKNmAueT#^o%!Krgc4ARZQ$2y>rSFiQ6-N1|OAx=}`ExlP0YB=nPu?>|NxN0?6a*
zPO=Zb4&jpM_&4md>6|_{rIp1lMEQG#n*1W0jW_Hk*?h<J8%Ts=77_1^N#6=368nU>
zI(G-1>mcD|zzJ)q3gG)A46jxfGQc64LKG@I7}Pr##}o(y-pwiQZEkMP&xS0tFc3}^
zm8e{|#p>q`qNVc=;bR&p)e}FbS|l}D4-_hEvejLsG;6nDuQR68)=7&K;9-fx#WX|Q
z@so5ljSkGo*vaj(qGA*aQsve0T~wr}|N9)q?=aQGjeifuj}`d+8xT7-bbmAT!#LFH
z4pIlF_$6-{%TGb+XcI-w<P|h^_T}GG9fzkV432e=sXKiwpJA3t`Z3bj5_w*#7l2v!
zPX>4|s66nIl>(YC8ydVQfSiZS0%vnUjtC}i#66kZY)tX{>TyXjmhti;0g}(R!t)X5
z7@mL5f(<F<n6s-QUB1G&hp}?Eza={`Mc$#U-RTmS!6ds=oK6g_X0`5jo7C`JjR3hL
z7B$g2ZEQ;;D_{I*e9h<xxxZL|ZKb)UddXLBuTqJ3CLT`k?Y&;~S~~GfIrU_jh)zCH
zsE)>9o}x9-E6O>@I5Q><y>3-doUk`Cl`Z{e)F~Wn0)}5BuP=(Cub@Z&klk9Fn=Q`v
z%^1TGOSEc;5g=Tm+I88Wo2A4Zt(904=~KstmiL?kZK(tbftKM#FUdL~AMES0L2Bbj
z&I;ySIfaMB&X)(KTrHCj7%Q`}W7GIp=(pRlkOIufTRn2;78XiorJ&~8?3Z+=vj4IQ
z*Eo|o&rV;*RNiI8*C8CMbAtDUl}$6Kqdzw@DTt(ee~U3vW<j(_P!jA|waZ>{N|ZhB
zjaw2|WybeC;#QA1=hjDoldE@(=v$@jF)5G<XX#6dPVV2z#y{H44*L{plQdk4&EO-!
z)9+y)uEnKPB=Z}@#uf6Ar!R*y$fUL;BIN!va!WN^G=Qm>O)8&a&<Udn($4Ac9X2(R
zCWUL}1Yk`EI`R-MjJP2ZZGiU?1IVxSHzf?oCfTX_v)McTyJ{kMG$N~4vSGooKHmwK
zMu`0y9|$tt96wS>e@-V9LV4#9F^R|RAonhBx;mpuxyV6N-#HB1Flciz3KQn?vFFtP
z!9mT(=l<r3pIElGm#OL?0nKG2n_PT*40D2i>~7NEJz74q(WG-3s)(`j=Avp>@4fEo
zfTDYenQTpw?q2=`O~~4sboF*%9&#HOtuqFKT-(ZIc<u)w=zzh;b0y<BLQe6#C?Tab
zm2)?As9no_B6iD4z4pbX0060=(gjcew2Ux?fUbwI#{PS7;_`Lty&o|Zycb_&sUQ)i
zlIY`B1Q_)Jo@E?V4><mSe`ghh?J3bLV!uW~V|wS%D-87y<nGWDGbZO|pLg4>0h98|
z71gCNtbWW1#5Q8N_P<BBIFjnYzs+rDngRIpCiB?7t1&9yk`$WvAb?;sDJ@etvc3+N
zKWsd$zImG*6`k&Qm@s$dL+3#)jYY#z3wn?`g}Z!wZu*mD2GSTXygjXCsCY<o>P7FO
z@Aj;LEF`JtAw@Nem!}Jn=5BGIr%Flc(EHORfj9rvb$a7h?ERD67S*6WBJp#QWmp;a
zpx;wPMXT!!R3(vz1IsRmm}o-Av*2>Cg1rY{`jQNIpIS=hol&IIc*vSo)!O#`kBu_k
z)=1__+JQNRr}SAOOjGXQX=V~9U0X)Iz?hw13R;v3NpVJ~v1j5MdAz$HP^nI<PaVe0
zR%Y85KK@9|b8PNPxr-2>*7=WN-4b1Vk^X%03`#wuR-g9(OddBBo-yz!IaoOF*YMVI
zuxpYj@%EDb+)tO!rudr6Ys|zamWNHbsP;$JN5kjZY2;9|=RABswCmPh=cSM6`ZD~)
zK1D|Kz%7O$^lrXYIkoXs@}6WI;|3z!D=up5Z1zl<cqxw1`|0=i_Vwd=2WWT1+|8U}
zIr=?WYjj0npu}z=lYL8}TA0-`WlY=@adEb@BhnY9vgGObg*xMd`mU=JeQ0}#6_8h-
z{3o-Ks0$$DEEAj8xgx?LY&73>;GWBoIAHpiL^DtB=%PBA25oxV?Ni?u1XsLC&Oqz0
z!`fZfjW?qu+Sw@Y-W`oeJ`#M0(JiO@X&Yrup5pght7LVrbs4$QPwhWSD?om{CZnqZ
zp&PuT>H1*8yL32zi1e97Ui7&E9~NhUF42H4z3lv!L^D|5-u8@p5Yfp`PJx%Pfo)Bn
ze|T|TM;ZzaJKDMLL1d|aAf}gNsgnPwT>2S`6J}?kaJ)w#%9!~djtx6*SlVgSJ0G0f
zox9a4_2l4YnEiX-p0oQAW|dV6XS={**Ryn-!PIZPM$%<T-Ifa1gGc%q?-``5EAu_O
zQmof@VlcoI$BLM1af%A@%r#4}q4Ti3IH%AsfCygQoqwN9sPk=D(XXG}p=kU>$a;se
ztSf2CcX;pQXu0*rJRk!`1Lk5#z6%Su2I#<e;OwedXK-zedWi=`;7WOJzdc%N87my7
z!SeHbcCZ4prGWb1!_}55=;VSSY<~{+HAN#AHAf)VY!Afc2`$y+$HOP%5_?(l_M&%8
z`@DcR?5amxmaC4apjFcM4UMGw`q`6vXMD77oWU(dcOKwJ!?UyK{?0q2q7sZ!{7!Uy
ze~FB+K7nmlxt+agL2sND!%~;I^Ibf84$D3JaX(Q$V;T$9`B!hH`)P)a?`>JF)3t)c
zTvm1}&tol%>^^(ge=tsTR}36S&tyx0o+y^9kvg&#d$>nmDy*vBUGH=e=V5AEYV}?Q
zd^lw5lI(yEyn`UGVe<osmHtc5-o2?Dp}l{qYO<#b!DELSj}d$q-f7QA+|5EmkjyT9
zzo@?xtp1UFM=|X0IkFUV$3ZV~&t?sIV^7%^fhowySw0KjB&}qcb~nO!c?=4|cQI*$
z1&<oJD-QVqFq`>=hg?IQ%CYk0-^Ko!qCtZej%Om<t+SWLBt6650edJsgZNJjt$Fn-
z_N`x|TF@!wy%AZrxEc~1%sW5&CLwkSC^YbH;|JD?XH$W_Ue4}OO?v4hd#6Ea;4E*1
zjx7udg8cCWL3F}SRl_`msein-VIIQ_7zT`IT^&<+5vMi%3HNeR5PX5uhYiDZM)yLm
z`X0C1ie9$)K(5jLhrs+dQ%Bs~GmHY?F`Bv9SM1O$7^#~;D&ikT8~Q$_m$L8Uw~5kL
zX^ZDwGYB^_ISNnS;q&vi-xM)n;`3`_Q`pWZy4%i})SeNeY;sV~YWdE2VkLardv;*p
zrrhGe^eIOCbGAV1SSYakq%sWJ1-hO_UO$2^&qqf`TT5?t*oU;S8Oc#$uF*gJDxqi_
z-WmWqnA5gD@;5waQ8lC%{PG9}BFsT|6bMN|eq_#*yrh+AX3m{KQX$Sv{<w94_~qu&
zg<#ca3%lgEpZ$@d)Th!Rg`y1$;_(xbQbG(0<w#7-a8A{R(QE+^lSvI3x5y08WP2&B
z_8xcw$JDx6m7)$cqx6UsbvZi<*AbW2#%>w&#Ku+Vm)mfC`Rv)A^lbZ+S`2@SL?XWq
z8Tn=55OnW<a0sgW-FKe?yd48>%s&de38XDIKRq}AB$9_Wy&6PrxM-jR7}(!$H;)0q
zOw(=(=Qp5(iA=A4<iF_P3l3O0oM)i9q}GTPLP^@eP_F-aaoW00@z*YPLie7B^+T?D
zyv7{V#p!$5AEVu+vwZu{>0g(vV(+qkof<YzOT3_Zz!NX^p2=}ORY6`&7%iQmzqL%F
z+mTz3?VrZ9eC*fs+7mWNrdiL~_U38VD$9>KmDw<MkOo-lDBg@88BT(}>Of$<(@4Vt
z<hOvg$hiP%h!$``=F%Ue{}DX|k_VRFu||Y(co0d8V>VF!{y0E5oC^eoJhT)MV7**_
z1Ue$X&&v!UcNf;M`+?Z#F~8Xi$5mzQEG}sk8s`5*%GmBde~DAQ5YW-{Lhll_z4iPU
z$x<aGR`8hLO_YVj|LOZ|?xMgH`pN{Yb|K~W&#6?wGb}=9rvA(R07{P@WCgdeL$;#*
zE~J7|(2c;Ke2X^?sf+CHZ+J-0QM>6xuUSmthC;9Soi<9+!m2K|Z)vzor;+Oph=5J6
z9WiV8{o~7Br7U9g{@#C!n$m-KzW{|9{ij>>Rd#u}lm||D4v<JR53%|tqfaph8UWhs
zD0u3&z9fF8B+MSCY_>8{6#tywhg8HFq0|~nXH@sxh%wyC^dl~JH21)-q*d+p(|{>)
zX-Tw|Aw)(q9}lur48iPPwz<F6I1eAPKN_EEBm?YCwQ;h)+i>e-54dyWY@&P^zRgy+
zbXouw0>-mX+3`t$I{9?E^kn((*{iP+;&|Z`gSEGYQZJRLVAs?!LjOGdt=I69<14L}
z)4c_lvq<dT(TBPsDyNz<@4$>e=RBkr%?#4RBBk%*=L)g|1S0@FY+YPj+#F1PgSHMf
z;qz3YcNf@p$ZU`hum${0p@rauK96mJ*Mz_iv!Fn?(6czfukXo7vEqFr!VmmvNFHjS
zb=>9pGFsp&6r8`W3AjCL3`krnJi?o`ge{D|c3nyp#5Hy_)OxJsC%CWeU>m-7tZxYI
zTuKdj(xsUU!5<L|P0sN$JYBE+W1obV(FnOakTk&#Kylpd{+C!cm?{PAcsk?FDba6s
zSjcz4QFV(zik)>wbc;d%C`d1<S!6-q8zgk#Q3p{3Ty#Nx+I{u+Cfa3;ERt1`r0X);
z5+xuKgHZtV06X#r^>0_2KT;e55qyY!p{sQ>;GH6~05vb$_Q!mBYPz(a++?N$lMHh1
zcG?6bQKz~5R7rphYAQqatWR+UQ0bCCKNT`FogH$I7qL<Bd!PBK^E)cu>JbU$sNZ)u
zy*j0^ER>ruu&&mCC3&lp$e`<6QBPzukpB!^{B|Nl0jXv8Nq#uNb&zJE8zP0-(Hh9u
z=Z!1LH@%dFHZ6JnIQ*OmXX?<H__C*{m&L#>_C(dz;-wG84x(jT$-}(O*ES3!0FKY7
zP8=LHOTYaU<^}<Fs2dM1d5R~VzTaObokHH<lnmUJUO{s>esZM-p}dK%qioh^6vFra
zwCzq15DAcjwE{oFzEpr@Pf`RN@5)sN&YkQQuN!5<mE#p=co8*F`P@Kxs+GXZsHq6e
zRQ^AO3Mi4ZR^ORVF;&=$?^HB1C;b^!H%ROubNupG7)3Z?G0PMv*(XfF6>$~#%-|g7
zH8bh4;IfLU_5*~%q@Q0Wp^v7@qs{~M)qqX{t$Go6x5L*c&D1beH?y~I(gW3)IG=_n
ztX{7d2bnM*-YS}ge+@}6*vO{h+ReX~eaK7ZvW;(h7^Oa{apQTdNAU6Q`|=+zU(db^
ztm6s?!_(xHBs?vFdIxj(D$9q!zkwY)aM1Pf83=|%z&v|8KG6#B#VY`=+r0?CbYN{g
z^q_j2joxev>>W8dB6xOWkZ_3oM_8VA<<l@8U)>A5<3$G;68LgW<iI*XUwu54`a+6o
zQcJfcvn+1%4PFQLn`+UvD3?g^(pU>F@R7)F6|Mc{7^~Y#a!z&9ZB=!qXydc!j%NJ0
zRu~kz_7}S)NWYbj_FQ*5mB;r_C~kj4CcoC>?>70PYDl|#(gicqd6%`J;Tvc-g5Un+
zHuXRL$2>&gDD5XWqmWT>K3z#hKPP6>7Dw<kL177<hCb+S<cJW6+Obh0Y2J@l$kzK9
z8qQ`_ar&Lc_&21)+34R>sGLS#j4vVKia=xpLu{Ba;0iOlS%14D?5sn3AGR&d5h~<K
z(^<d^aQSUoL$)vw{bLWD60(p|B;C3!fTZmC_ErdksT?}4#2n=gP>l1`jj8_K=<MJ!
zJeoj>_;OWtEBBk0OTEA9l=a@<fok}aQ%2pAnMo}z7`}wBBFW^I(KRHWpnGVbm#OaE
zhx_U*_B~pTYvy~hUmt>(T{J9paaYi}AL**3=ZHn3r3+oa?d-ol!>|?SK_9u~?`b00
z4&$jLUL}`TW#6c^n&9Zf;0+~-oATu3jyhV1TE+$8Pn$@3ls?P0m6KDvmQYCw;FgMs
ztR9RHu<@FS`h_7#<un<EEqVBfx+p($k!)V%n+5nEQ{xpkg{*H=1f2k%vx-93{~>}|
z<NlQI53>_?owA+0D0Q5_ZGrRW48wo#X3Y_QH%91-|CKBLd7`t1U0R=#$>m1#^B;=L
z+6Zjk|KSNQhY=IK45CCH;{;jb^c3a|y#CYoHhHm#S-2{P<h0wV<;Q+0#MsZsAsdqU
zsA<gk4J*g1;y>Dj-{>)~n5Lon%#r)?E!-x?83sAW!-hX3lQ@=d*?&?Q07=AQ>Nca5
z6N!dH30?Mx<j-oZ-)q`LHy;diS`S?D<m|}1BVuvQR9^j23*`r!GKLnd)tt#^F{abt
zQRxe73Le>x4(<4n3eTk^X{q9Z$re6_+Ecbo@)2}2NVP#h95cwclu&5Ozc?QwWYfJe
z$nvC3xkbgeyti&jEY~^B7!1c+Jc@Xpi<1@pSE$+j=(WgK^sw^pdS0f9;00H`PLTd0
zQFa4eCE8ad0REq4jHI%fWX;r<#TS9x^b?)3MNBOW_y*Na$(ia$Lf8H^#K#6=hQ*iq
zbgDuVu05ErA8U_ziHf#X{`PrOv%(ffcc`+S4q-7n+-QdA=yRdg=7hR8_7#fuhIDvf
zUS~){raiZHl-f4I0xBnLy<I3TDDF^++h-;)7cGPK5`V{YSSnM59c8TC9WpRAxN;&|
zfNZqtdD^*xUm^eM1jwhy(FZ$IU*TnsI`3G7%MPr2a&5P_0GtnJ)8xkB&{!b-#}6s9
z9-AQe5g37%Lc1=Bh?4F%jY{Ao`D(0DiTTRz@nAeEgjKnVDW|I$RXzcb@W3&y*_&+*
zRL5?`c1w}=cPLc|!O;Puz^Q$cN}RbYhg(1Hi!@26tGAK)u<U6VzFWC3Z7!N(tb%1E
zRwnAySj!8zxqFOd$7&Y%7o}1a(XOrJS#0{H_4|0|8y1ui{Y1wj0h3O%U2%R}ccJ9w
zlx7oD>R2PP(On=+Nt2tRtm$WSkyt%cU>gvQ(_M+dXHm1J^%hkwb?cZReCKF?s_2nu
zM_De~4h0_q2Q6s=klV<!Gvq{H@2kPA@EHZ#WL(E0fn<)X@O2UoDHOS0(Y>qz{`io;
ziX5vLjmW*Rl$5QT!98#Ue{j@0429}nublWLm;w`PQ&;|5dH-Ojp0=E@$tpU46JZFw
zQC!<fV>cPV%JmCYMd4bX^LeQ5*Vq5(L#nVuub8>}vz*JB*`EEAYtO^(^u;l-^sT69
zf}V9wHw`m(*Hw9P+SkeE#)aRcO3LNGeeKeh%LRTW%GIVP09+<aX@GihLzHl)%9pAi
zsTHO5Ux(T<Sl^4iZn#lxQ^oF?#KLpT;`_jL-Os6tmh(eIPbJYaC#8nML0U+G+z@8a
zfudp2ao@6`wzgt!X|cHukCXC81Q?1xjBQ=>f^H7JSrg9eoU_hI{d(%jr#^G_TOYh|
zMo~*o(?Xs<ja4B3iL!+{a2gO;3b8gma;6&A#Au^Fn6y|+?29t>v#k|lkYwy!od?Xl
z(x!A#OgyA)z&l5<P)0JPtdA2N$%SGzN|N`Dc0rd+7G1G#e)i`^Kf<XvMf!$KN<G4W
zR==IOqMKKbZ33OB@09c`4=0~lGQqx9^bDc<uav#I(I04#-P@>GBlZ3HJl@FtT>g(T
zss?Od*x2b?C4c<)7qknA`*`${keW58yb+!HeYKtRul%p>Ca(GR^w}`VB@3TWz9(+~
z`OwNqOpeYIQ4$BYe>9;xLb2hnq>Tl1vktzYNC|L5PQ6hHdB<o?I@S;9T0N*U@uwWQ
zz!<+n4x$llmng<PRgKN>jKqx<F!FD0Y|tX10b$6LByafgJH6jZPW;akEead1v^77y
z!T9M3U|QSjb$@)40z)kZtoIJ-RECV~2`11)SBK&n3X@@vwVmNmbAQ=*H0aM>1Nis1
zvKAc+`|_fVN>#6SY8CoI<@XqK$#g=$927e`3QgAS8{csm^efWp^AQ)1(|C8nVPdqO
zEx0O|`4ofn`a))VF?-qt!!QV^^48rE;A#j+c({J-GIVblgj@$5fi45F{55{v{k2bt
z3H}%ChMZ&}RDu6wCZZ0S4Q=vpuelUPRt=B;U{|f6d`<SDJEY9jQ!R(sXdd{zGBd}J
zaf13VjSPw#l+!Nb&vdqCjEVxrjz-$R?05D|l?y;?H<wH3`XXzwyo!p0c?k8_wfS<6
z`b@i3qk@!W{Lcc-{%=~%!9->ne79Z~N`1gyR_zfb^*jTCql<*DnJ=uCN!7JZ22d&s
zg(ahcN+tPHt`nc%N{7K|e<n%>8*4_RGGU|5J`(6`^H^iT_@s&i&ipnughSG_*+atX
z@PWM}<nr3iz!O71vJtp(^QDQU*dETQfr39}y-Vs#5Me~Rye%3LACK}W5P0MSd|u_(
zm5%4@f7(6%`@_Dpqv#37&`QS0%(IjrC2n^*(YzDOpFXKlB+Y)qC4jcKLwi=<aT<@q
z3oRzpHZTv3x?W;m#%Y|eDJHT2A3v3WpSuln3Pc^cpF%@LzUo2QBT*a{2<-oHLj66#
ziUUM>I#8h-t$vAoyU)Sk^cn>=B08cb&GdMg)x$swyd5)L-Ohw&Ldzk;O}{c*Nz?F4
zve)EPi=3nmw|m<Y<u%I|RE^&QX&KL31`jq5L8gz~8#xdpul216a1jRpUNA*@N)Gor
z6oZg`WIsBpEDEFc;OOM#0leqlOL&z%uyHpgf4(%nhJ5UYV9{^=L-rRWp{x$fhm4(S
z&rEMCNNw2M)Rg|KnP~cK$DaKX0OEC<LDTPKOB7baf9}lW!ZpxR)()P%Nb6pOb01W|
z?71uBsi@3nXUo{T9FM2M6~=0$YpN7Js3<?v?tMQ$&p1144XOz=A4ANwQJxbumpB-<
zh&l<gS|-o0`7H^zq<|8#4)K!pq$vR>SrI-VFF0@}o~)g8ZnD@qc8nG@&KMMP<j|@8
z=$Ks>s&9dQC~@&LQWWVl{Xet<M(}z*>5oDG1AL@*I=a!X)K2<ex5C$au5=;G&`w@-
zK#rQU98dL4&P;D+lum%1?~I=$U0?kIBFPRj$L2HXcFc&dfEHX*5a!7mj~TRu*Qfnq
z-A%=So{QU`GB>b$#sF@(s{po+!LK{T%uHC%W5nsj@+aYVW#g82^6u@=Z+u)8u6Aj&
ziX_mrb3C#l{3KpVk|omGTh4<IW2zGzkA#ws8$(=tR7I#)8Ah3x)Yta)F@OaC=UDiE
z(YPACYlW~0Jj%DfkCacucF_%X$<07vx1pxBU6-{bP(ee~<f2-=6$`pZQRJRy9`bET
z0TQ%Z+rP)hKWj62^G|hR2*gpfKhS%N^F8R#$>+ff#$PrModl(HtfCLQ=bGYs+RxTf
z0hb$VeDECW=M@&(f&L~7=oDk=mt?nj=ZJSy`rZZcr%prqbZ_N^P9@(Yvn}6yD&EAN
zV(<^}DLMVPZ43G0iCsR-75yJ0q~ym)wh1=pQ$ST49>I_dwu!}1&8S&MBdVVnRYde8
zzcl;(t#6^Cds_c|BC(}o%joKgi&sOHw*|hj4Y6t51lEg1eUE5Qt=E4LhS<|4xjujg
z5(>t!4t&rDJ=Y?8bY9Ni%rOuOpVXmzs?;eutXh!O=9`)J`EH5P>7Ea800V(7KS$5R
zLYfrE;Ht=_A_T(#3qJ_J!X(J4@gE?HGojd4e(<H~IY8@o6lT1?V`}QGmY|P6m4Xmm
z<HNKf^GxcQ()O_9;+tP+Ia6K9?b<ajul35GMUPe~q+8*5pD9agQ@uYzu0(6<vGDh+
zx8ARm<WdZ#D#$7KK2_enzF72F8E(cm?qIgQzP~CcSK5OF+497FzWcD(dF#e|k@Pyd
zkVyda>AY@?W?W}NX8`ak{P<R6<ek5mFV6~TCRHfaA#y8pMbF9|!3Tz4O-McriG{9r
zw16oGZpnSNv_@6Y|8+(MI`G#?LYz!oP{J2WfTW{qKq-WO3g=e-So`G*9+RwYrq`io
za)?dH$KU04`(F3?x{{Je)&+*{YPqj!mGCM#k4WhfTxNaV%@vj--WCZGmSN!&-+~XL
zJvfoDq(Pc#t)8V%4EC;DL*FQDxv}zf-|E9k8_By!ecXMD)`K&+&Y7gE&dT{2QxY29
zOaZ8$GLO<ovhdBnDYL&l7BxCO^i79NGQBlu9eV3qT#E+N!!=WN3EP*{3B|We8_v=b
zf^gkZF0GjoMjMoiay0Y(tS>^QtG*q6SOKAY1c!*?XhrO(K*a6xkdypKqIw1xc|XN=
z;ipUWen7}u0BEl~yc1t{>a$_g`VcBHW<-Z`6NG_?5D)OU_30{m#T#+@9JaqBkO<ae
z^kd6Qta_*UkrmDMMu4wX!Bn4B3%5;GqMJ+kv}0EtUu2ha;6<~8v~%epZLDVkb4c>`
zce*1jz$ceF!s&HZyk2tSE&vKU4Dq+xWmP(qsuu5mu`H<gaB9fQLL#I?kKLU9Pog(W
z31-hWz)6uw?hm;u)h`o^o+{6}`DVT^IQkp=tiO7N$Z?w}=i%EKqzc>dvsSysW;ZE=
zf!06uG;)|ss$T%>R|qb9{q9Pe?=5fL&l5$fGw1-s(8;#P9**Brz}OUUi>g_f-gKG7
zI*i%|9YBYG+*=<mi=GaVhkZx0MHI?uwya_kpD}EfEK!WYXc!xR`JoLRQYA)_Nh2jX
z)3>j0;<g@*+oFv*Q0BWFaE<oSbBO7yeI?OcHyf)e!vyisZTW%v)~FWI*qk&R<(MRL
z@>TCMi`Q$|xen7pC0-&lScrA*)^gq0*+0OFXuVMYV&f>Wqw0sw>ab|VU)?ofzdN1}
zy7BDC&lbP3Ni3AKPV(F7-Q(wIPI_@a1K#}nBdb_mpEFl+S4I`d>}rtA^(zF%>Ad!p
zTV3Hvj--`(uT=@$uAE}hVBYR>aN)r$&WdR;K9pFpdU{ONWQS|Obj&_lpMz|bjky>b
zz)p9DQT&zgCtPkww7HAX%8o@Vf0D~3`C#D!K^n~<pOZsQJ|)x3z})x-;DV|<ll#M}
zABCM_^e=F)mrSWT>yuF((+QCO{_-z>>tEvJ*9VgxGtwCb^O|l+^1j-P;yCY(mqkzT
z88ivH{>L@gNQ2{BqFJ>hrkn2V9LXfU8~@-|;C#xR{;zWjKVDg8uGJblv0%5~ecMF4
z!OhW5q~gPR`6xQ?g7AN;qqBEtfE>LHN1cp*S3w*7U`B}B^w!sT`BA>jcJ5~{YO$}R
zxO63@nia)Q3RF5@%vh?fn;G8Sahey5kPJ@KjazoKfE375+Jq7lXYfc3r3IP|5=#AJ
zW4Ri(r|R>Oeu1bfTaOAuaEym7Z}N|#jl(k^XdWth_;W7rPjnsr9wK>jkgCg9u*bJn
zy}u^`Llkm^n05{e?l=~vCP66SjnAQ{)s=F~PJw6Ek3y%c9rGaA5c0O~9A5dA75?S3
z8h@Dphq52<ZDf%%{f;_l@*r?u$n5(pD7yVEX|lFpoF?(acxet+b#F+h%o}F@h2)8k
z6*yWAKs;1^4)L5IXy?AqO?3aV^?~`N8Fzf<Ok9n<)827P#mr~^tmQl=JGJDeaTcy#
zbu_#OKam2$*AQAjE=AUJ4EYS;l|JiZ17zvshu%w|(Q_&!<AeKQd`Q)HlU5mw9l`X<
z9~Ze*1_kzkXd3z+F^UyG@nfG^CV_qFKZ-UCzr7f~EF(khOsQDt=Vj^<MY3V#6-~Zi
z&VoK-!gYX8Emsb1J{@JWm+7`xzK0?_fDJ-(6yuLLa<u%~1(M2}HrLAoO$Z|I3BNR&
zu+{lEAhsiES*uMd4a#R6VeuW3M685yG);Q{Rnbu<>$j`BSnTgBOdWbOp+es2F}P9*
z<%A#l)5n_wm+>1(t<c=;T0`F{2Tw$|Ez>F4=$O`Kus~^;kC$2Bi}0+Y`&x7Oe!r`2
zY~;gorzy4?iyTXU3L@70mvQHZ--#cRSIjGUO14MkJfmtz8{+@UH~_!E2hIAVXC!Hr
zY7PsQ%_P=Imf!b`vJmeWO(XkMtzrr~w}3>~*h5ced?ODW98I#T=gN~en2Dur=s&i*
z<_>`y8Boyf?}SIctOIMQF!)nO0ZP%QONkFwBhpxdb+*%|*X1#puK|JEL3FV>vf$0B
zvlk+nJKfZ|CzZ;c%Wk(;fwjPZL;}MPqTB2l#D7sy#!0epNeW|9_4;RK7sr`uD}dq#
z^n;f)v&J?191$V<2ERVUJ@gd4BIUT+$x(E+=&$H=2I~u#OQd<Gx_`36ciALzU)L8D
zL$K>K%}7^U)25ZlPUh4hFJ-v+5U=vP{$t|i#lOc-<><_9s(4HjKY(jF0+Km%*Nz>3
zcDoE~)QB_vQFI&e_v+j`(w`R#rHTThkV39{Y_w(O1Y0Swy~vJ`m&W;LhkF=*i4v0o
z6b9EDhAsz&XLzJgyXrkzRn{aR16x_9V3C4(y2zIF?_}Yz70v`YTC&}eT{=PyT|Szd
zzVvP7SlP-cEd5l{#`kKd%J{(6u%{?GKB!KhdQL}XfV3m^>uG=v@J0AdF+?)N1;y51
z{!}(?U<MAnTy|dm7!a_61bRL{GC)Vu!SDfJbj4X2h<9qH%uRRuiNjg4Lz<+g^{-&6
zAMuh^I0k#x$2s1Qz6a*ASOgw#|3myyOoi96G-7L8nEj5|wiWlSSh}fdzdW-gPZ9x(
zlq`c?sywJsY%x8?B-1jvhID?ILhP(qLZp45v$mrD`luGxUVYrU!PV5Zh=>|~@Jz9R
zdA8fQx#KOX(3~d`?h#f7_s2v*K!LO+TVgG+jS&4vpGWYkcMPwh?8Wn~nbc4-_Qc;5
zO?0%rV&3~Mv>e%y#x-S9k?4xO&H?KgxY757bIc<D{8M}GH|y)``zWjeZU1l;EG$&&
zv|!krk~X}mt09v68@F2jf^`KF9lw^H_OMa$b^(C>!0X_2W{UXowkUiYiGN)F3GV{C
zsv#5Kk;0ZM+WekrKVEmq_`)YgMFoSTV;`(c6)SGxCrmLicHxM=6Dw7e3aE)$d0RaD
z*AqQ@U3+nLmmy;DQ2aWj4#gy)vHsbb*{KcN3PGBtG>-3*Q`v^h(;>Q<Z%za%8JjVd
zob0AV@2S?hk0pv1(g`R!sUV=WdWiL>U82S>P@(_&9dMtnMJ?9P3Q!-N`RG&dgpNSB
z)&`(1G(YNfpn)RKBA7FjTt3C(@c#hsKoGxJ$-)4t^tudGbwL+@ATv^OY5$uWBREU3
z^5pUiKU#P&_(V{><f#k)qi_6EALLkP&z=oD=H$tf$r-Q<ADKHaRt#2B0HA29;q`wB
z<1?E(6Akj-f8Gvw?9jd6kmAdOW~k)z%#no<>o}N@IOl1YJ7(e_3|>#zeu7d_k2JxY
zz#w7T3aQIC7u5_B=?1SQZ0}x!oyiXD?(GC(J&ij8ND&ilu?AtPfPe*YLdAS?cprpG
z6V=eqgA!jY4pE8E=!k9-I8oIV93v<hOX5q4!6!{CF-g}<uyY8&66I6|<~d{93ARog
zJ`$ljmJ+S!Ij*k&+}Qa)J-P$i-|*sWXTN>G<~t@ZIl2wIj8jt)G70n<0e=2#0ZHSL
zr%^>8C|oF5m>O54OpnZF38>kqupBEVco?XBL$y1`e4|Y7Xh48KP;m(A!i1uy@@IrA
z74_KGJ@4-No^O=pWq`07K%4<>eOC`wP5{uSFR>2*GkZSw|HO$CeCbcPuldN4BWnF0
z5@SusL{_So_wdaBDCV6IMCh4e(pgLDCkUDNhM0gYgL2h+Krm9V1Z70boP^Z7n)eNE
zGF;oc3fmL@+wKK{?r+i&Y~Jq1AndeYB!Z8cIg9p^h@r9`sZka|z%x;cJ2_t|(`(^^
zTj^a%7$S7&^ake?{%agXu$3~*5^~+pR9Q0%?S*ccmS%8zzDoaO;!P#de+}pV?n9Sg
z`|EEU^AE*u033er9_()RxWza%?Sz0NSSqtk>J||eox&jRn)hp^#Lx~!t4NQb>dP#6
zVHuwKkc#-ZJT%3jlhh1t1&ipK){1;7ZO?^Boz>18B`Bm{n<V3`!!w4B@y5F^UAh!_
z3-cKMi>J-96&h9<R!#uu5YMp8c<pZh9w7Gq|E{-v@58+P&k%vyz2}ezT(pcnP<>OX
zLDrHqbyEAjjc6PZn1fIe?nF@l3REOe)`MumiusudN1|a?u(`oagDZQN;L7ghC<I&!
znp$8$sM%!?XoyNY@-<T6M6fVxSP6$B4(2_}F=s`n6z`6VD`tQykCcw=T0LZ;#A=7J
zJS~lCAq0eZXQ99f#(Dib6XSl45J1xGb-Sbv$&!XDtzz(6NCwYf6g7XX=l|XtFTn2G
zUkL~O+-Mjb0oZ!a9_);+b%Nz~SZ)f!WvIs#2<-gN^o0~{fjkZf2A7g>6XWYJlz((5
zJKXT&D}6!=5t3kL#$;LwjHrpC6`1?7$#Ut0*94>nD;Jf=U;H(Uup@Iv;ZmMMC7vLR
zUjN76^wd3kCP23gz$XLpT{G-_4t)BoZ_X;gDlP$pj^=%sd^rOHOJjUoBVYW(-v3{9
z-&;=!;%Kj7OB^V_Q$D3En;!dbs8j{Bek9Bvl_z@42fS!36EoEwOq`@yXhNuokKR&+
zzAI=&QE&BOyIZH--n$0Zx}|`<uz$NF(zKVyTd5P7F7{#8oZAyB!GNsgfl_lcR}vf(
z;!a9~S42@Vz<dR-Fzv+j6GK47l%XK_eR`F@5_DoLkU19)`_W7lF_i&{K^aLWqIhN;
zQf>-Wj3xC8*g2CJLK1*poB2=co#y}cv-|e+-!F`h6KuR=Cs+Y?!y!Qv;NuF#!ldUv
zCC(kc=p_Q8!~|;~nz=t12aTBo6r;uzPg|E1vVt(uBmnw#9#c2$aYoh0pdhI+&_%^M
z>SoE@5jxzTJE6WM!Aa+run3GtgVdwT>w{nMC)uKmd_fRf03LksLEd@B_XF_vk*?J$
z!OCp|jD#d%+GCGB7JgrT`DHkI^k}dE99=*9mOgTsHa>wp$63sf7ztY@Oq#iCM-MWD
zBp(Aynm{Y*QxpGCl1U@)pok6T@(GlrtJ7M7+2G~^6XoEg4V{S=SUa1laZ!ZCL()b`
z5#CHY{GIEvHB!hfC|nRSrtw1<plE)eHFc>K*?Q@@phZ~0OZ{D!pF*-s0{s!cIv!QZ
z6vB^2lk@dTRM=J)hCo{-UW@t9C;M%`>m{IW-T!95#v=gJ%iR*wzv+0)xThuS<Bcr9
zINDHJePX-Nw<X4P{h-H;Vmi4rDB+gm1B(7Z>8Fv4Mmw&Eru?2CXPim6s)b;}EVQb+
zQbIhmv^cSd9?xoiQW^)60G@=EoDcurxAnj`bjszgu$7s${!RdZ=bn2GcnkWYk3LGz
zKKm>$5v<(xz$<$aU|f3%2Q%>5|ImA5a_^6hY~Iy70BQtsP$Io1tV#lwcR#RR!tS3;
zd5Hxi+?N?bE=@LtYR2#Qf-TR8z0lwm0TZs?G^-hzJ=M%S8kI?{e;WnKu9GLBLT80e
zgHHLPVYod%a2l8|u+Jdn7fFB0!yp)z>9g4xz}V3Xf(`i_KqBa~iKY^$j0vtX0W_1-
zac<E3r(?Ulv)@f({sLQX>H6rg-r<y;P1)B(tT283Ah*>G7l7_6&7ouZq8~7Jb&@W?
zi~*@Wish0K@Eomlic^*nNMn%uMM3;r7I@1RRe!C3>)Pt@pv(=6a2flx(fAGQ2*?U%
z7dql)0Cqw@efo5`8(<ZgKf_820O}m(K=IflbNu*mV28lNhY#cWXya7)I-m(yZ7XG?
z0}92X7+RXabqkGWMqFJHr}^ax=xTwuP?il|ThP~>%E*oQf)t<%e~n|7m}Mq}n%L?4
z(ZjFNQr(~{{K^8PDbx{XTYJls3BYM~<~x>U7K|3$L9gVPbc|=LTV4YlB%)NUj%01-
zgkc<TL+1bF#MK+_?6)7-dgm0TM>|Wv-cIrdm_qh5N^PykdHbS6N}H~KbMG|)&M|y6
znmpOu3c*qGg|aPV#|Sn475G^LEQFJkUdcEGbn!OV4t>Mr%a;S9+;h)8Ar?EI^A>b|
z<}2xmR*xvJtObBgE?wutLk~R^-lO)9?|<}<-P8RXt&U_xBPg^hMLbZYF$58zjwrbx
zsB2jHw>oKzL|y|bCQ+aD%*r%)t$+Yz&DoejBlkdWN@kF-#CQObm#B`XHCZW4@F&G9
z$K4R28ZH?E2$eQb{rjxAg&+c;trI@J)%tZisbC&Oa^FvbsDI<;KRf&Fyz7gviL>8+
z;PCq=uy^QM@I47z1KB44da(<&1)v5(zX}1tjYHDVJ(JZ*&@f$^Ly!EDjIpfLXqu^#
zn*%AGTk4@&`2#f8L$Vf764EQo{OBL~if`egKf`VS_BmtUvrm8e)3C~y1Fe(*&|6tB
z9skRFG5LZ9>bGZv8=Kzq;2${^W81_U$(**8k=~aF`y+_@qj$*B44wdnrA369!i8@Z
zBCMk1dAmS3xHUMvi4$EP-lGl;k0oiUMI~`eacpTe>;U6V4ljq`dSyy22sp-LaMsT;
zpNCiVb<~>Huj6Vs;QZfx^K0+yw<z%Tz(eofgWVBZ0-_MWOHiFfU>Mh&z(+O+yuc`e
zXCaX4Xqk@HiXts+)cVz1k7yVdX;2^>o(H7n)gX=!;r9%@|G*>P%DiR7$IkORpFP7$
z=mQTtK;4ow#`<3G%(<zsauxuVw0O<s?z`^}qs$jH@O{m^J@CkpBly7KFFjRFSQY%N
z^l75hq~%&(v*J`F<oVh<m6LQvlE(aR9rTQBnhpT!K{IwJab;+{Jn>Xvg?3uO2ZBX#
z3o6<#RJ6icAS^Bv#JNL#AqC^1nzrYk<-MxeDWO_N-iP3v>M=0?+0&ob`}tCzWrDCh
z@Vz^*JMDJ{1b+b87Fa{As>U0C(fvu-kfyAm*4J8AaMPfO=Y%n$8FMCu4+#TaaMnpG
zf}n~s*EdGNeOnKl>Ud0u#5{+00(8pg=rf=B%$Ngt8K5}<zQz!@|Ahm9*aY+9i!bu}
zf3g5<j<;SPBF-6z8h&CCK@mvAGEF2<G^-GFgk=GSP42h$4OR=!ZcCU}v@YHeG<6JR
zP)W5Z(^FEe)USp@DT};sx({KHVmcc7NR_=MQMs8#+jHiMe><XUiMskB*Jq9qkg5|E
zOq)vUz?>=$&Hv!(|HL1{^uBAbjIj14-QPQQ!ubH<xL|fdC3StUfMV*VU-fwCKY(og
zUqDasuQVgdzUh`)(x~E_!}UR-(ucXZLk7>?rC>UL;ERC*Z^2Ia3_RzB7hVXFwzjrH
zBEA%8)fBRo6aW}bpFYipF|Y-I_@<^Vh))E>jkV2FnlxzS$-6y!$PPdmeBAI%kk$&Z
z$g`wx`nPJC!?KJ5j1*gzNj0lQgPRO_8=vd|AS&01dn>15RWk*{C~sz^w!+K=@2UcV
z%fG;7D2)-Mxi;8Ga<R~rqa*9>d?~IXx<mMiCXdQ5+3&TPV|m%Z=KrK)`rGgO!&~O;
zw<v7Bv1`R++psM=1LAVPRPB4$9f;rrQ7mZ+fEq9I{h8yXZK`TKQ03xK!r*FO_7Kfg
zWsy5418R7kAv@33#vLjv7F6K1(D&Ms5C?6HHy&aWKmQ5O;Tvd~7qL$oEH2(G4Xmsa
zU~~Ws&KTWmum~JFbf{a79UTo32+VZvk<cF+0fk21Bbq{F@i+yA_+SZWiAolw<=-@+
z60KE8i#fKwH7jq)%5};$@})Ppxe&)7;Yfd=2`ZdPdyBymD9aSdjHaX^f+SQBki|>&
zOkfFLexOvZ{1&lLO$3URm0Z05HTIf}6CNu6zrG#lcCsKO+)iyIGV{SM4dfFM0n~iL
zvQ3t;JGO9u`9FE|RoMB4Tkq_*C>-LQ0r!bhV1E!5knUd^pd$o&XF%!!^wfk3%;A5h
zl=Gk(K7oBk6Gm3+Mon|6A~aCd?IpLWPdP2DLv~i^*(Z1k?m5w55#qH#+Wj*gub;Yd
z<qBT9bP42M0RF_hh+!2S0ar=@kbz=m&klh6%m)B<g8_Hlbys*_$MrW`gw&*0YcL*3
zAjQJ?pJLhlH?r?`a!5EP;v{>s<=^ZTPDR_Yg&{M4)r6E*f6HZ*`89a$A@d}~&;s$u
zI|3x8X7VzUCH_FdJ&A`}xPW|Gpl9j`8n-Mv^myD?Ssk4Uq%E$QC@aB5TQR7>L&a!F
zmqUpmW!$ASBT~6ac#s_w)pIdaJ1xVAy^|)){^$ShsdKRN=5w%WaO8bc*xS0+Spasz
zX_S0>Ab}NNDy&|8Q6eOjxHuf&ez$s5?CE4q05JHc@qsM$vY4BNw#>RqT!v~GfIiYn
z=PORAf;&XI{dSHJP{RE&j*qgE$#>0!l<`=4-U*P(Y(4<59q@qg4gh)Q1Asay48$z+
z?k{`tSeo8cB9gr8iBLllt5Kj~$`V2@A63hYtP0UdO)#mIC5SmSsxtbvfJ>_;0@WsJ
zgPQ`F85i#aFs!Mfwj>5-v|Gtav#I>OAj;?kfif=(WP}R)Wq@&$ZDYQ$2P^4SM1Z0!
zJ&WWw6LCxt%H1q_9^3h$^UHZbmR?}-`48*RY5mLR?6)XjSN|g)n84oVb`Sz4VS8W{
zLRFjX_~)?*zo^H|_O;;bFn;ASB#nf7+?6t=yeuTU`ju4~D7lVv5=H~`(OPcHIheoc
zc7!sH{<*jQh1d6a^gM@mq48<7ax6VxPzNgnD<c5-?G=0sBLjP5Uc7iQJiq^uZ$6cA
zr#6@=pv)gEp-}}JWT@s>DYFdo6VNhr`>m|q5DW_<4PI-I6YvTT=736m9DA{j%Eic-
z%(GI3JEVk8@_KBUSr%%&?pupjO9#d*pNX_jr^}&%vV?&^(Qa)$5F)EY63C27r+`$B
z&WT)%Mdp8Y_S=S4hW_H9-LMqEI|KG;5*!1kkR1c5(w|vTu^O_B1DJR{SZ8|zpbFir
z47L|=g*nSNgE4w%01nezP=a(#n3gyMURcl>cs%j**7n0an-B03b2xII51`{`=0$9k
zX62NZ0ceG`0<P3u0KCnDw*W>6oy^{tdwY9)G~;M%{m3y=geX1~NTkiPR>Cb2O}^_q
zONls>vRe9F5vV-pI-((fIbRE&=+@#ixEauE{@CLeWhNHhJ1xv?TLW;!xP*);4u`RZ
zd&77kgmbXoP`6;&auGZ|v~vXjs<`dTQTd_GqwI$YUi1GryPPb1SV2T*>F0kqRqtzF
zS$b!`MPYm#VB?*;u=9WP<F+y02<`05)+!biW11{496rLtyv<T1+FKu0YLE+pw8q`%
zKcwW8f|>p|P9|k}8p3mcA&+}643NQCPlwU9vFsy>%Xz*2*dx5)|C_OVJaOU#bjt$l
z2*{4<uxhX}0sybK@Bn?{6Q3CIWej{61N#A-JD1n(?%ufjp)#R*t`UQT=);V3h)~*$
zYl4BT2qM%G8Y{v`s8W_)YeaW;c@183=(>Iy*$$?|Q{tEvp3Y>_l?0~5znY*~;A+ZC
z;Rn|98x~YE(Y5bcSDd}<Rj+SJY{4<~S&)SvUBaLcQ0-Hw&i}pFUkbPR-EP1;188Rl
zc0bb}P8ANQ3jasPR08Ll0!nBl!`*$HsuHAdh6G=5n3k48LN3ibV_63qHw%XbXMOyD
zYqkbu97=tYqxJD}o251c>uc*rnep>hz{q>><jIqKDG)sR=%b(%)Rll$vH&1k0CYen
z2mqhUeE$6TK!ewD{b>5Aki#RZ{$AD4hlq(sAQz8j5^#dF%xdak2^|%LZ<yrH)ASm=
zwh#oi$ZSQ6(!KyVG&oq#t(L#VIZDY>Y^)K(dVVmBNdsveqnI-%W}kc*qvAZv6%D7I
zna-shnE!ikycmx5y<LGX4!U~z8ti<&Z~QuRmVj_O^BC%<hT8rdQi!S+6+M(*2(5sn
zZ8`si6`4W0EFWDwRi|nh1rNyahUzr#c?|D53(+5V;DHd%TLJlxYWskf7*@##ApG+E
z%M85!BHJ(`U}yGveSVXK3RyLve?_RJb+uS9o~?6=Glb(<3mQ;oXm+{=HxJSk&J6Kk
zDhINJFtEQK7NB{<f`PBhVDvI<p>hZE_j!<I3^_VloKN&VcZ&9LSiI~7EAQq%Z|!5P
z|8{2n0$cASm~=}Vd`cki446=VO*KtxyoubK8y}Cu6(tJNr+Ea)CmyKE5UoTP5f?9J
z=A4-qPr=rB^Yy$HkW=$kKvqIcmMgXmV3n2t!Y_MUc0vFjz`z#+5NrQ;-E|jCCKDKs
z)*mXg{TT=g<`dcPS_oEK?YtyBE6*VwZtGLm;Hrp7PU@ajc7xX(f@yRF$^2=g(ezI(
zAtYH2YYN%^YEe(G3AV=-tk=gp58+n9(7{$g+D7w*?(7>dUrVbppi17g`OnUNJMVh=
zreEWGtAOteIQ-rTT>0nQfY-VnnIMeD-QQ?zd8i*})yHWpq+i|ALHMM@n1mrMuBDq$
z$Iv})VP#xlMjQM@oQ2B+e8SX8m4i6qt$=v->ea9dfR)fQXU>eEJ~NgcR>}#GfxRss
zd+agZ1;8C04gu;0BK$Ivgf&@&Y*~-ba<W<h3X$d4EPpC9QW>6q-I8gQ=oSP{<!$mA
z%2(x`!kZ9qiy?PDj>hSXdcmg}fu+dvP>pfQs)9!pB?asfs*3|t+Jxh}N)@7BKomfG
zlc&@zn?JRNo|-WWPsw(Ro`ZF;!mlUrDZR(>83f<Ye>k?=GylW4hqK?Jz&isDy=M=u
zvJf!oeNxsrJO}|;3S%0NDEDLk(Y-UEQkuYqOMaDNQhpKjhQSE~QzT2vwGi!Sd_gmD
zM!9-UR^kI#++XXIOn&5hXD(j6NW2Z5;nb;9#LEGlg1R!ypJAnz0Qklw_O#?}7Y{!8
zAP3{oqj}lCL+*$2Ad+#qGuC*u%|!kS?@H{fd6?S#sQ4BTj8u+_5OKIat6kSko--1Y
zDig9aj;cI>)e$vsL-1bGya&4<e-%fFl}=M=$MN(#a5xt6u=lM(Lr`Vcfe}bC*D2+p
z!vKo&B&6&thgPUET4oPYPXH@mC<2`7jk&IQrv!jGcR-cz&rhfd?y3w;&GX?d5a$2%
z4VU|q{q8X4FCaSuwkO-L!^;8GUm<KRuX6On|B5!8g;fGUVTq{y0rD7=5H84DNpqQ2
zh$S`tVDQAmj|fK~b0Zd;e#zE7H$VxOWCd>ds}NEVaP*uUjYdaV`{(Do@4kD)OVMnN
z<}HA%oUYnwfGcAG5C=f^w!CoRLa+cFKYkqeLZB{~O)?`M9OIb^_9QWg8dNF$QepD)
z-$jGBTVh=P0NE_YNF2Rl2{M!DX%;H!Qck9VBsK?HkErq|6F-y#)@sq<<^w`+`D22r
z59sL`c^*G4C}>9KPjT!=Kc_W}5v>17#wShm6n?5aGEU}#lB1KVV|sUZt}AHpT?x2-
zprX_Ue{-E{ZSif@{bRnN7Eq-8sfnkZ{U&d`2zNScJkn{;mv>?JGjRv}n6=ooNRvj2
z$zirMDSXIA8qZuKt&;wSe3GLPGUKv<Wzd>tVIf#XN*m5GsGsMc>JsYS)Aj;qwct!z
zP|0-I&ciajhi=Y?C4jyez0K?S=bs;SR_Q4#r>vw(_$t84SOEB@C3$A1&-*VK*c<cm
z<;%PmlefDZO+;3N(D+|AS$T;N)D|v*OsE7w1hEqmOB{_a=uV$X>VGC^iEu~4jHD49
zvPdgo)k&iThZ)}H4PIj)%K%Y-fy|I%fXBIHEr{{lPAsdH%oyS<Kth?AtcIoY($)ga
zsAZd;;H#b=DJ#Q_qd=<X6O{xt&BZh?!OSmXn$~@n0_>bj`9@+cg3I2i^Nsl{;GF@Z
zhj(CS%6A6rg@b~o6a`n%0-cNvWO}FYD3MPR1w?^UPy*=rKw1C<fH+>pN}&G4O5tg4
z7NT?g$OtGFCk2m^KohH1Q{<9JlIrI2rLfEAXl@gY&YsQNNm)7NZGfwE8sO3k02Gg0
z<WA+z?aVG=MB*t-{EmD_^ZOqBz|n*(y8B591Oh_5D3xItsWyKZh}MIEEJ*3yXdje?
zhdznbbTUK8O9eFzz9e!~6;d?fHn_<k#6(_=Z4r8{#A)ps=X4%PKy{jMAEiPh0l>Q*
z3UeHWOzZ`>Gy^bolJeAh`ltj7VcbMnddYR5vh9@Xq2niO1t}pKHNm$`fV3nMXVUc0
z9&Eqw3ov=)5;U-I=<B-vIm*ic*I+W)i_2BhWWCZL+B&NB7u3H<)$uNr5gBi^G!ax<
zDKH6{Xvf_!aiv&e`bQN<7XSUbkDC~MYFn;%qVKxOJ(j_@zv;UlVx_ZFK8X*a6J>N2
z4+2^l3&7J)KOKgG4+7$AfEg$rxyaW5zwYkWAH&MRI3g4XE|Q(soV7;Or&YvJ<(r5P
zm57M$L2LdJuIt%bpRlMy%><hyU{IAQo*UdOpo9dabtbKYI*s3eVZxayQX+`)kH9eO
z!qG66s`IR)ge8tbbPb{RCVO9w6G{olVd4NTR@6mpS@6pPdhVI;j;N>kcW45;yxxCo
z7aG*z$a^O+*~*rHslH{>Fgq;>3Vz2RB&SSasi-)>^y}!Yw~i^n(?-ncldu849#97(
zk5Ey5-}Y!lkhWqjFDqEtg0Qw8R?fu%J;aVn(IHv%0~o5&Qo+&-0KA?e4D`t-pA4PK
z*8uxykKXr{4|%Pbp=uS7Z(ya+{-=V>*vF1)ny#BcT$^Vm*j~FTCy63*Y7!<U;M=0=
zPehIX-zw13U(|lW>1%2{sL+aCa`~nh9j5pdEeo<wiS`4?LM7D;0nw&Pd?JOwhQO+H
ze*M*eE;Gp?b>Uin6Wq=3pq55xFuiXdpXI;v#23PLzXrp=I|KOQpq){FanN46I0zDX
z(_l1qCnV%Z!}JxvF>Os6HGX)Z6EpoQPSwXUjnv;NNIn4!(@Cw}$($oaZ`GXO{Eu$j
zckJ-t!|4WEwnleKDV{oY3ZHuFsXn(p&C&}2D`5eUf#|e<;=7mE*4ENcvgaia#$=M?
z0$|zo1BA(ZuW4!W%^-;`Yp~KWDV4}NkOw)lSYiV4y_hJC@!wKNj8O@HJ&`bxY6(;P
zWcCDSZ2~2X8e$2F=xV}S#YMppU0ziXsg#qlK&$m>MlnO{vchxDD}7UWXp2crP%h@I
zvKs6?a<S9=UjkZhlVK5HA>i<P_h4tz9~3m9xE!GHI6%b9{H%O55UW<z8hHCq`k7&x
z&$^bvSF`M+(mM^mw3^zN!ouYv%|Rp1|L(23k6pQPCA^<Me?EQt^rt@^QnLjh=DqYt
zxhlH=*a{F#F8m_q7C8c#pYJ|&cis=E52Vn)U<qYumbDDn5IeGF%6$zj4Qnsed~~>}
zLq__U!bgLf20|2j^b}zghW$u%m{FYdi`B+~D%zL4Q&Ay>^NQf&KwMvr`j#vUh1RF$
z&JYg!xZpHJN`=5YDCS#y-aU1$t@j@Q*1iN_JzN|#g3+`;7l1YGu|Fd~k2Cm1t^t^b
z54AW$N|%lgv{>Fg8)uy|t<dvh5s3YAWaF;8!O0e#RhoALu%8<Li4|b!cLc184?tK~
z;Zp!RDJukU=N>zDjLk6Nd2KX4R-~#BTJzZn12rOJ=`!6A#)~u8X~75^>Ahyk8q&ZT
z+(b~?c+~_s2XKCPRRboNJ^mg?W(guXUl-q-ca0W5l&{Nkc$uoy;$SG6svB$jyYGIf
zG5_m;tv7bW|FP|Gg)pBHAXf(`2TbqGJYa<}?=>N{1_MgZOzSMcVmQ1A?3Zrpwsn8&
zqb=SF7{17Hz#a4F!+>}jAjw$(%g+26R>lI*{UP52@YrLIVLXeu|Iji2t5>h~FGE^U
z1e~F8&@4=%1u{s5K%sH<QuGu+nR)AlWXv=sRD$89i3Yb0!jSc2CeEfk!z4a11ZcXz
z0BJ^D6F{M4!ZQV1hkJ8zWS~qV2&g=Zy418XV@Oc`14CAb!TDM_58-fh_G|0?*A0ii
z8sO5oYp{0~VKf;9!|B?X&kLk{fRz>mHF&(O;qd~(p<YHKFAxsRjd`$uRcdY)sLsgn
zINw3yIWan002ssqKv5%T7JwT7AN}Y@M`8hB=!h$^<Ar!&^1%#ut>67+B@!7*6nVE7
zCx<kdn!!uO)%jy315dyU3Qn_yj!HhZ?gaar6%B4Qi1S=_^qi*9X>sw&9Ezlb#aZ8H
zYSJcd#1GVQXdq1`Y%n^xa68A06#*b?l`i{oE|v2v^rv3RIJOJB&DrmU!jUIBYsyw9
z1Wfyr0@)v6TEm^1#veav<g77E6W@@*x)ZYrThyvh0L^*#VfI;{L!du1j&7_a3qSxq
z4wx+fVx11v>;aY?R_P$1@XN;l^I8i(#~lIzTPM+96cxs#e~rmB4oWm-MlsYNyLH?f
zxEeAo^a`>nhcKTPWM+}b9OgaZHJ|4^>6DNWw`=8`9D!|cQ=wlq>h}hxt$=a-^SF3|
z=mbl&<2a=O!2xr=CE-HzmN7z|kSFgoTtW4qn6YKZ9z!kOB|KL<3&e>FQywMgKC{k<
zsUrhS9(@(IpLnr3``uu`PT{}n15?-@UF&ZTtQ{8&SandQf>sLhSPCU!X{=Rv4bOz$
z3jRpE6vfOKPh%mGWUAn-{RNYQ4%c<i={S5H$Kgs~z7K$Z<8i>M15}Yq4l5%7$XUxe
zjN&0dayGLjWJH2UUW&6?jyNY}$+h3gB?vBrU7rkPkr5MN?#UJ<WY1`=>H?7VS7)-U
zAl89&{_;$@oOE(pgIfspAdwK#nf}K(?r=!4JtEMI*$@?~pGRQ{6QsD3XH9sZg5G57
zgydTSDItLpj`gjxX>|QjiQ0BPfN76q1C;7r2qS845!rs)d*iFHck1@P#`ji1zcXMG
zb_VS1^*aL?CCoViV8_Rw)!^~ZiU7h=?KiU}HO470RMTJ3IOy@rDr#u*wVc?*8`!Rz
z^TFEu49m`}eDYm1d>Wt}OvhGgB3S`gepnd+fNxvkQvmrUz!N7<pgw}J1Kb#G9Mwcb
zJdXt|iVo~GR)i3B7+8ep-&MYeh&xGG=dF>L6k$gzarznHC!dsEC*}=qCXkp3<5;lZ
z$V5{v1<D}usr{8_KbENft2m6#bs|+c)$-Pi94d85L{DjUBDiWo430QzTrEzQLv@%8
zQFU$>{nKgtdv8AvlQ*?veP3$`I|JUa+X(^JV4H;ijl*gv&G-`(%+U+P5@vA@j9@Bw
z<(Y4ICUqwPKo#F450hRUoTY_=8qL@9P3M;qfyM20T)&%_oiAOwWE#pmim$5w(1$)W
zS_Shz!zwKS1Pg#{0lfF#d*R~6i{Uxk1Aur`$1nGT)-~pGKWo-Nd>qkU^dyjc4Prz6
ze10uhsKA^zct%eql4$-7mITl;EME5?MaG%gW5$P!la~2;!s7{`k5>yb)p9tg!>c$5
zmMJG3^tf3QnxQU*m>o8N#f}?Y2Q65n^25n~FNGWZ8r&S%dQ&F|ym_~`1jI7}`U`nV
z4I7|7_FkAX@yH1f@|_60#;pZlQL={0)H}Fpb40>L+^ylTrn|;!Mp>bhyvL)_F}48k
z5%g?<W?sa=R_ax|4`3w(067Mj;mnyc;h8(Wvk2r!G%U^apKFX23c^&?-brcy6%Cy#
z1bvV&f@S0-k*=Ag@~=kH1ysNWuQ6n1Nmetb3eIW8oopWId8WBOLygG~X-7V+CY<<u
zH9^(H4^<;9E>`M@bsYBsw#?m>*RfsLd-qH2Sl?R!hu+#Tg$MV-NrAhQZXCM53Evk$
zbt_iYasy5~R}X^>T%|Mrk*BSdG){Oew0T7<383-~%zF*5y+SG50OL1&;e{8{x2K+Z
zD)_94lln5lN(cZv*nAt{>C>mf5`YH22LKFRveMk#yrD!SjXpvEeeN}=2^iIKV20)d
zP+b=;^H{Wn4PG<gaiVN47qU?Uqw$9LFh6@3*0$?-SjWF1B3BiNB^+Tk5yrCILg;pf
zuvCG-l(+Z#%l*lI&C~yu!Qrp#ma&fVeF3{^NlW|z1ZvOT>XX0Eff*69aABj)T4S#%
zW#qo=XDu)+LRG~*GF}J-=gys@2OoSegz+*pTL40x+c^L?2I789Hm~4?3m4$U7heqF
zYyn_13=hQmc=O8^A*w~*Kr_RvDRgE=Gl}|qjSi|vswL8)JeFR}yiLKI>%$lt4`|Ew
z0hFU?EkBq~EOngHBwGi+HLE5dP^w#*_t3L1cM!*C%?%YkdE={Xy?+S+H`pm~3RnK;
zYk=E4K<mM|btHo85Teb%F&nM2%r%RMF$q2>k(-5k@!LFw&cBav%s0=Dg!k|gz+^H(
zR!-Rm02YvWmIYSE2Y}5geBI)6&pnr}U+f_40th|;KEVNpKx_W`cj1J!@)xKJnXC}B
z&0)AmEYdc(c@U5K=pT&fo_CUrZQLZpm0-q^VCH}hX)@;H3~jsityVWuQty!FXY!r(
zUZ`&_`dV}LTQ(TWodKh3u(P)ttO0wxJ#actk-@nHslcJ@P@Luh9`Kv3gn_kA0ATH&
znLjI`-qg>$YE`xYEWH3A+CQJJ#10k=Yymia{CM~*@ur@*%#!+QVPrqo%v!^htmI|Y
zseM7sxQkZ2PVpMtN{EafvBCovGk+Ix5w72)g9GoiylIY*Tl++?Sb07Ts$U5^Gzshd
z&Dn4HV0;{5<DI=fK)(zy0g6Wk51~v|o7aM8O$ePs#s)7QnnlYOA&rZYPqb;&2?=Ry
zI`brPP?sfuRe)8v4mdbebW&C5+><9yN;q-n^Dld35>4%5k$LnSTKy)3RpBJ*#pD$U
zy+kwFQEBPb&qGJ}lKUp7U+Hkl^&wsGN*S4MU(i?Z%CM*X|GY&|3xK5XO_m7?B07h%
zNjX)VBKxcJ(fC_5f{eC5lvj4Lp;VXDgoL~5+mM#0`hpJcg){w|v)}T<<|CcPduk`%
z9=IF+f<FMwl4@t+x~I3b^ghIfVZtFQaogY-4C8%>L$kkAW&U7hn%xpW2t0T0TzKbO
zYFSBT3&2Vo0|+a!1i%6S@3G`(?&Jp_e30TUfDpE|wS_ohN@lh8^sgpP=@6d&mjsy-
zQe~zePI!wNUIL_;R3i_Sgx4TGS%i(67&`ND($M52H1W35v{xQ*2C;2$Qy^HUMra)7
z<QUcdc0et(`PKjUZt%!cFAG?)G#J{7j*&+kPdG&m4~qh%?wG5Q+B(hc!2C-RPyQ6X
zno7!VoKMpe*I@4(+nN5W0Egbv4cWt;5U_VGSOJ1Hpj!?|IH8r4w3ABFP31&w%|KLP
zvX0nQ1ZR>w(czlMX>@rAFG%we!7t%~7s7@9>s<$kB!|%P5&&DHd0Cp5rzMV@T)XVB
z^iF_reI;B6%+eJf1<VHl#cw12e(l<|UO7@1v&cmbY0RP5V*2p>U1(NaN>w0;lJ<V5
zn4mQ$xh+lOSWX~!tM{7RRAn98li$-EA(M>=EvV|%mI7`SV7Tz7ACFNQbBrfR_AxXw
z@7<sDvB5oBcnMD!E9Lb3zriI?xC|9xk=(Xc%JKk3r@zR(3DrSX0l#v28P9Tgj_T%V
zp%RGjjj7!o(-*vd^in(7Z^huyI|*Jrw+&ajVcT5W3aTTH*)1?$a;9jOc-9N;0!`P_
zRqbSSE1o~H;{-A9*^lRKXZ%*mT90lBS$WI9ecoB<M#%&7Tr{`~?(&}qa=4e<c9>px
z7XSk*rToi&YL7npD69&s!V&;|^rIi;>wrTCKlIQ;q0Ve}xpe6gUAc0FEda1SUi-4F
z4T~(G7`71(`n=N-zz~JN_#n9exTAzoKlf50BDyuIo^n!(um8vj4qye)B#;n-))+Br
zTqtEmVl8<#{(p-ALG1*fBuJ9pGNte<;g-gy^lmkqB)k%3gOwWSd}?KnE(sUF{t8hQ
ze2F#e$zQZ)nZr}k{E*BO7Dk$hdEO|C86~Tf(CB#1Kuk|j+?~AjLSz0b2)r}kuJ=J$
zwqb|BNw5S=Kw)lxOpi3})pQpVGf#RFk=tgNl{7)ZjJtwsn1l=o7hfDr4tx5kP9oIg
zQm#CXIGSz%ROx%>Z@Qx6HS1r-N+*MyMf1W7FQ6<>GpvdQU}X*h;!~D7=?ZxXfWz*&
z=bpq%!+y&sh>{|jLUcnU%7jqw`xMB>QAX6HOb`VzL|W8@RsK?CC{GAMK2$=8F!U@m
zp!(P9@>vkIQmOk03-45DaAP6j0T1Q0N0>r`Fa#-DhmwUSgn@Z5I3?N<Vv=o*J%|Rz
z-lCSZh^cx>d7$c^%EL&6!3vmZJ7A>h9xtsHl?9C8>HnTrVDd<F_FGlpI|Gh<U;?|N
zZP=OiCk2L`0n`h<m=q!}kY-si(`HI?LV^?r5}lNETKUTKn4<7Y*+LbwGDibbzCT5z
z4q39x76^NB7N)ss6|L~wS^Ze)6kuNTzylB9nKNfVP6AvNSe5I5c^3elI&}&}01!BQ
z_;9)wSSeQhFG9J0RG2s93oSI!g_JgHVE~ypQWjF3_Fo~yF|F3fP-*V|u4yt%fePUv
zMUu&b8BrVDe8>ulKCYw}@<z#G1CWgYLPR73`^{?_mPtb@!3Y?n&PqoBV-ccUPpmL&
zz5S2?s(3o99MWnT<aTMoAHE&h>(~45+lB_K4J-s~eeE9X>}|tt_s9MKf*0xXAfdwr
z?FGZ2`v)+Bsb<nST~^r4@+hchg^CH+t*q=G;wzPP?8B|#EA|Rq*UtJJb^)9}f8J#h
zwx<)dlK^iBq@nJt0K{7@_~xaq(+eF+fD)0ixuBNrVpwhcyfmdMqN}tr!`wA?gD86l
zrQ%M7$u)_)0Lt`*^}2vX=4>WwaI>Lz0JImK@OX?yqo@@*BH}O=RanyU6+sjMRlWzH
zm<B;<Ta*w3YX$+${FS^a8j!UjU|}&ZOI(6(HPYtrKSaNO2X>pY-)#jOuSeK;OD6<O
zcjL~0oqT<8V*VN#@}m)I(F(Mharfb%%=gOwq#{_Pq7<$tOD|RgU`wu*m7vVN&%<(x
zZV3Q8<rD6@>n;<Q!=HNUsjzIl3L62IS^$s%llA}30uanB9f1w+y#B&I0Nm-QTT<hR
zBnai0s+U%^CWV%u$})jo642jl-6Mg8O1m`n)wCw3VfkO5DK-9oOF-7avv$(|@})pl
ztAT9J)ak)$&3fG*hj0M0X081L3M&#QChQ5HNWuyI$p{mLCa+AX9&b|vU0AS`e)(nC
zd-uyg>kUC~H{hKCqp#Qw%s&VK!5@GO&Lwmi8h(Pp%EXWb+>CqLa_R-^1ZZ(R`M;2x
z1G)I(-LMXs58!BhC;00n00H&^U?o*80W3Y@FSP)m&GWpsvg>r>PUb@wc-@6PFj)#?
zpy@y!P%`Ek7}GvzCoCO@5<;L86|2Go%)kIgxQlO4e%D%H4l9L^1~&(K#*R*tp;8!x
z#e_6#+E?Yc{A#OCwNuBHA!)E2!SU=LlgrTSqX`!)0!GauFn!ajFlo+yw>2F3>W)6&
z-w6RzzCmy|2mzBR6XHxCXQDm2XC=hSpI4BIv__40Q3?ePs#dC@s-@ZB>3mmEr*u-E
z2U5q2!Z$ex$YrzCunKnpusLO<9V>Xx<<8E|Fm5SAMF&ui3r$-jS;V|PI)vI@({n`A
zyw9r|&97G<wDf}J?FmXi4X-gwh)2SaiiSKpUob+T$6@mL!+7BF3|t(a@0@b9@L<X}
zbE{HLr|nPPaSo=hy9Nzzf8d<~um0ms2)N1{`9?5i<LqcHGteQt1Q$-(+#f*GUL_b*
z)7Ai?viv$+d`P0wkWQfHNSJF=Fctvi0}u`Z>MYav^wUpI#d5vOu<T0!NoHdU0C(;~
z4?Pro0M4B|7ryZlKmcjUEQFf{Rf#g20D3J>nL@Q0%BL}nfb#tya-yyu+02^(;MPEI
z4eA~KlHW%7;dgsT3W%Uq!?PO9GPT#z*EDk<ZxA;L{5onaJ=O#W{d~B0_G`@l&H>&Y
zc;o{Fdt*K=@EUYBfN)q4gPbQ(CW=1}hsm393ngyA3ZS0{*Bodd$O*kgd}}QWGEKJx
zz{}3DZYUN2=1m;_zyl8uCWnRPhGky@NWIUy0C*eV^UpsY-nrAQ14g8>+HRD>)vq9k
zMws4yTc$T&_(knh5tPd>RQ$>_>31p+F*9~783ENKiUzj=`XzvVNZ9cY#Qd+u2ou-T
zV-J;Kll4=A!v2|}Q03Wa=jaHg<b$B%*YH!<gsSxrr!3I*lzfivcI*AGG-tm%5cuMt
zLws@2bO(0#c3_YC>w~is0E1(sK&7QrJ|a|@bA_fC0&I_y!6D2n#|_ctS>d3*+j1x@
zbJ)q#G5zq%%hdeaY5yysmaWoV095%F!no5rR(;fpmbHN-$4E&%o19bJNcx-<D#!(O
z84sOp)id|1aEL`+vtI!vqMHz!a{dQd=4f#Jkj?)T9RGNX`U_L^z5sygey9RJ<B%2$
zl{&stc_JPayj=^9St<bQJV2E?>0hi^0^9*WG5+)oS9@o_#{BPCSU-WV`OZ!Vm~;Zb
zq(3YuYzkyS0LkHTh2c8Cl!7@YWlY{<oC@i)OIcHZL9;MA!J|gdb=|cv5v<Q40b2oH
zdg-MQo0Zh=8RJ1fOHY_r;x<4A?o`k+Up$NX@ZrN0b^*ZXXd<{(@28qnXSFr$)ue^X
zAW@=4#-8y?ket!3nX_f+B_=qbAR(HsYih9mv1!{Hz%2psoY!YW<Q)N{7-oqZDSQy&
zGXD!uX#VLD4G~SdSrC%2VTfx{rVbmROPK2r&hD$tBeZ!2XTNuxZ=U`QVCzi?<FDwK
z0(fV@WU||1Bx!9w&K31+8L$4X(nua=J8Y)Oq=oH*9BPj(!D-1y%|G&jHf9qaWdDTG
zX#5cWMhA3?ifFP<^Af<SoCB~5PJjVE@rh6H51u@EGTg)5nOr-iYsyCfqnKTSl&FZD
zCtn%~#7WCZ6)8qOB!wbD%z#?^hLX_g16Fi7pvuX0!)&%C9Zw4o@}7~^99!8e0JjYI
z8tSQN*OsP0uSJ6zZ@~tGO+bo>X}q(zLxnt;mK)HXV~!fX5Zn#{kB`yX1RAg7m%=Lw
z1^GJ4F%4e+to=7=AaLYygiDvZ<)AN&!a4ekGi#%@$V*2FjRouUodFpWy-=;hW;td(
zK6@HmTDl}-8m7m>Hxf;ylulcIF=v)c{g*CX3W4|Ee}8ZcJa+6@SgzK&!=S`{>tWdi
zfN<R}ucz?-%Fle}GrZ3-thcZe;9Ym!6%J!yr$CPEy|FD~Ss=|!m1)N_be$G;o^hLC
z?SwT?$a6WFc_XPWHH8nXFp}#fmH?Y?gPRZB_v`>TQd)^viZnw8aJXZH&k}d)&!tT%
z(<Y58@^7|=)$qH9XIilAlWHME&80&Vn0(E-c8za?DjfNGfQ$cV8|WGarq7p8jbPj@
z2~3T01o27FfZ$LIkmiqox;en9LKP*?S-Q~0Ff<+193@gS#a{{(FS9~3-GUz)pHBPo
z%P)se763wC*k2h4OU(QkmVF7JFr)A}01V=JDZ2m+BgFoewj@P@H87R?v}<IsWU!=6
zqH-QU5*E=ImOv1#K+cqQ80^8aRw&8e@_!9(5%4%pr^yl{)}mzopDKK+XgM%r{1Uz*
zn)e#3M^^?p!l*qGiaRh=9S{J9{7Dk-9@~QnJNq@}-(Ud3#X;W$FxlD;M+WnyK@(E`
zAqdU_)dS;LIzbbxl9O>br^(M~!u^c0hXa)y8Qr&<m0)Uw;&9?$;5}>sU>+q-fDBJQ
z`DCB(3M|d8%w<6A1HfjNGiS~uhYJBdltD{%X>GJZ%#w5oVE%fDd$>`*X=$-eT>COD
zVi8zmOJtTTg3851_V6H|Q;dt6h!gb3Q_^yan71(m3&3cU2?yq5O9Kqdf6JKXFkDBx
zK&8z(k>rb3AW-2poqhLpKwpjs(qDS5WB$$AufcraodI{fZwk9%dtiTOz(h_9WCojz
zX7j06!O)dvFJggKlxRj;A672N)s;Uia|r*i1>nMk{=lmTAAAtEx3@!hyAANh0LBY(
zkbMAT?<GrHvIF4Ip+kY@88oWml-K%8X)sBdltV-dmo%j&1}4rBWG9shAy;Ol64|qm
zqMghr{nOx<ftEUfCsU$oCW_VsY8uEqSU5gvEe2E-wXOs+g$~!GKD<M`;OzIc=bE!$
zgN0#y9AWF7d$2p*fxRFA?1j?;`A?vx4Eb=afMNAZ7KlC6v2`}BuBW#`vpxuj&Hk*E
zZftCX{ejV6txULtu=Gm+{Ew5S*ct$`G;sCmRVvP9##%-tDu_l-vkG@<>*k}n^l9S^
z7eWJ(qah>auT8Y7tVWM-889)LQWg^XV^HJH08p!z`*6~nr?J$@AAq?5Z6HqV9R|J|
z^XFSGb^^d9XwYE4u=!|*Nw3=tO98w!aF4giMm#O8rj(WkYIIhX0^}W>Vm74tBh&@r
z8qEEoGG_m?=mZ#)QnpTSZf+7U0kG9N&ikdN&?~b9z`yJO*g;rtsa(OhFGz?`qo_2@
z@FlI*#Pv2SL%>N31vFd{RT?=g*7;|FWGH=un+dX96s$Xxz4d#-Hmx#K1AgZR%_4j$
ziuu<6y4(H+d`$MUSQN@+PycScfBNWUXwcvwaPc$S@ambXofV+>2bi)YfF@x%06hJT
zFbj9J1y=iDc^P|X-T$E6n2LuU{B3}|1i%)6mADEJRzv{c&Sa_T)TvW^++w&5kevWU
z`pTe2r|<-o=ZHGu&=)o8epM%akEVenIl8=hXRG?{Ls>(w9okdGw!aBeXT(R9M&j;6
z!)$QlppX}n2fY>KNFr%G67vo!oWYDcIjw!S-*g;R5iDcW8&D^;A$hmXKYjA$aGh_1
z1_yyx{`F;e`CncN=6^mQh?fHR`rvR>upACV8h1VY)_*;%PIHAI&!KE8%`~yfensH6
zVni8@%zVwf19W&QL3{w1H$C^<b9^7b%BW>ae-}VJZOx3*i!Z(yI{5Cp?<O|Gutz2s
zLlA-Ws`K(AKw5Z@{v6pyJ4MLmTeq*&duGQ%zy|7zEzrg>d#iIDd)skF_CoA*0zx`&
zx>*5k2_(&5mi}qlZ+qgu5tieJ4*&Xx%J|icpeidvpa@E#QPNk<Tlp8&dzNBgdfzro
zzVW5z?APExFx@8jgMWG+UO9a!?f}>eLI9ip_h>g*0H$*8e@46bF3OmE%bqLo)UEzq
zpDw7<8gDS8xW)Mp9CfD^`l|j?IjV|Uu!v`N%Ps)f`o{+Wv69MsDQsVBy8x~a*wK$W
z@uMI8XzEGcb9v&#i7*^|9dNn~C=I43dY-^~A}m^ZjPDO`5<x-znLDNx;Vh0?Eoitx
z<WzpGj`|y!rsO)$xRJ2IO#yXzZXi6&_`}ho;SaKQlIW1e7jQgT%mHO7lC~#|=<o;i
zT0ogD=8t(j$>MB^(Bm7M`zP-?+noIx90+z_oxtz^%2~MlIk6mJ+A_^^gNznWRdStt
zS^V0VVd#~)f^*)<3IbWuz??0q2Q-1MqUj{Sm^C3=0OB%r;92|e{kc`J)CU2nGXRJW
z0%GSszH^xm{$U>gJ_u+snV>ni#3Ky@_S%}Pm1c8){1|=|G3+zT2!fb>1~k!B#5p4q
zsC*L_0Aon-hz}0om3dg$4Egw=%3J;uCvQs#8vkf4cA@YupMF-I_Qeos0s~MkVnV>t
zj9OaY%Qz@_)&FXIwD?@znyBzF5azeO+SdCU92l;>vI{T#vkNe})NA)F5c7N(cXNcV
z3Latqi{bFBahZW^H$`>o9%-#x3k`NIh^(!Ga(YndL8pZZEp}Na#Y+XGDNB49ohYg#
z7JyDMT^WMBG8TYmpM5q-SNa@)bYmb|n%6J!do9_Um!h<(z88XKBA}wciS!!dQ@_@q
z9I;5{Df_~cf)(>mco{W$E&0ZDzD^oxrNLr>XqJFm1aZw@4lkKT4Tph>+t8FrF`|&R
zOj<3Tj6sVekyA!UJs^5q?sG9o5W!?Dc(&Hnz0>*|^KWoaxcEEQ;Q4=e9`-IxLC%gC
zMFC)av<~az^>8HcTKCJ$-_u?BEq1abT1|<jqoSE9oy2vlH%~KibTuYON*1a-;yDGW
zTBvwIo;SP$jh6se85L!fc@!@Jv~vJ%07$3C+W^fsISG*C^gaN>p!@Ga5X)2lz#vZ+
zEHl>i2f8SF8!7gvKa&75j$$e&?dO;D$+=LOm*n_hg^T(iA=`HS*H<__Ybj-AWX;<T
z0@*-JW7%u}WX$x5sDNVpph=#^TM-NsE`ieQ70w8OLMhAORE(}P8NnSd$nrY>q&fRF
zxDI&bH!i^!K6#;A`|maXpy^|;@iX(^7;nPn+E(~o>)yxV`#1{FeI6R`d%%RRnpWo}
z1PgNEknz!r7U3)>P=&67QKCo~y(uDpQ{{(a=-C1wyoi?o1Xe`=Sn4H!c${+BQ4wzg
zWJe2<Z}Q9M02r&MG9BsdQ5tWupn!o=B0;H%Lr>%-jL;zl^@P~p2SCOal_e5cq7WRz
zn4h-XCIpPh^JN)i79Scc4~X?=8l3<^a0SqsGC?tChcjZvPxyz)7yubb2(<HRzA(iu
zeVM%Vyzq12=q^mZ_Iz{pYj7Z7XTO*J<*RVvclryVF#4phnV*^b`r1Y(0Bpd<+9qu9
z?|35$2jg_+e>9917OZ@)OON-Oz7ys-S5QNx(0Qvrxo7}ZkIDwJoG>YLDg!5H2avk}
zcnKhtfy=L*E&XkP(utq=#3w=rpFDYz4_uJTfPxbs3jm0EFv-wO^X%}2Dr>K4>BpO6
zwa_3)8DkISote9qFjMOQL9O$8AXR!|4yGzVO&Yjm&~F7D=VhSCczH}lMzhICm5vK0
zb$cJEaa4Gkv>(b5WW`J4hQ{&qF@nw;HDP<hRhYi>0yJoF5MXD&7yjvaxOR3o3BA2V
zY#hz~{U2-oy%4a`YyRv5z{h9LqGOmvXDbv$+N&+DR0`AJ^3=ClR_B(BFXzJ~p`#|K
z_jSX#3xIC}#7+oE%hc{Pz@>$y7XXA%KKbO6;SK=43YZT9+TGn{hd_w80os9#zo-V0
zlu0!+_W=f)q{vxy%Fm$kOHyYtyXGu)-K22x;o4|WX$ur>TYf3?_JwTbm!+Ag1eq9m
zJ<)49>63}0(hJf;7A|b&GlW2ebg0Uvue{Vw_G@rp5KsTzZoOZw{AH#-#%_&22ml+w
z+`k^R{@(1r7UC>jgG1eK)W7#;+qPZHw%xMrT8pc0#<FYKExWaBbGctywvG4hec%7!
zi*ugmob$oKy;Yc1tgO}~fKvz(i=c4;SrV#_5vu(V49VZ8M4g;eA&mC|7OS-YBGJ_h
zf{j1vK8Jvdp#)3G1TwvVm*Qfl-UVnNg&2)6{3FiyD6@!t4)2kH4)SAdC(#cDw|znS
zUdg|qtW=-bOA_g)ToYh^*xR8v!%7LN;YX-n!-)0&iRDTpWzxzLOmRy}MF4~^3S(_U
zvw#bqoVly-So2g)Tqe4E#kc1&ogUXk`H=k5>3_Wb^%WI-s$UVUdWvr~bvRQ)hh1D+
z^xMCPIq>l}Mwo`pCfKB&qL7~&bTqr+)}KFcjQ!$s?8x!4h|O{(XkkhQ*jL?wB5wuO
zfT}`az=7>R{CbZ;1D5tFnRAa80Q8yYa5yuPd20W|J|(xEy}7ONjrd<zocjh)I}cOJ
zN6?&v^LsJwFY^@*nU??deP4X_UYB%YcPv7FAQt~YAq}r10$;gFt(@ZfE8$163m_oo
zCg7c=NQoiZm`5@akOYl;r>5aj#}1g;bUSyF#hr;m$_}9Cz26?McbS~(4|eAxqDHU4
zvTd%$&C?9g+NT>%z>ilSByvn#MLh{B`B@{U2GvEzJD0+fd=>}GL$_MX!5h9N3+1sg
zT-e1|v?QPR1eQ+!|GZc9rt&f9#P*e6hkI1OlW<$GK_c*SV&|Zl2#&)O3Qhc<EL5YR
z$G`TOSrz2JqRHW4L`L(KKem4UI~$$*W&A9vdjz$!9L(`UgiPa!hhR%5D8t1!8%YP8
zc)FMY3bxuxE7vC9$<+Sr;ihXZNi8*tsZ%c#I%Dx$_{L=_00kAE?d|*L?b9+=*|H<!
zA2!t@j%VL<-zRQ)`!8hB5oNAK!<!~qgW!8|X^;I_Ixx$^V3ZUs0`wF3t<;#jj#rpm
zxAN}%SPhu$C;a-pZW&p5`(&{IzA=(Rj(R6BFYHe*#!F1VM>LixnzLxfbpost5=#S7
z(p-ZRPflUDZIMOo!b*e2p!NXa&BLBxYOdCQz#M|M`qw?cN+yRwpUq5<ET73U-nbYl
zIHoKtRs%z_GEq+W_Ddh;)H%v$2*4lv#LEX?SN3u7ajnPquyz#_*|tve@}hUMY037H
z|8}F(T^;u{%=4jc&88jy_Ks8>^Idhd&;H0_V};TNE~SZsrLxrZ&QLd^Ue0CCjPoko
z%NUg4bbN}`6JUot@d`m5U;u@CW%G)GBy|^3X$_CmVxNAy4%;AsCG+h!mm(WzawBP|
zR2~mV;36BE^mGhy3^?v^w-IOOe*e+aE<|T0To*{Vx)@fOf*DDp1&TVOyV=<?|Mhmz
zr19x1-N^zo$H3Z%-ru?mKY!u_)HQBqntUUu2{3CN9#Idhn}#2{a?L|o))&Rb|0uZY
ze-rs|yMT{WQP*HQ|D4ODkW?BLhelnhD#H4c&vd*TaZjhec8M*(^J|;$oqggDM<`2R
zzV$E2fe=OmmV){H6|eo<6Q`4lO9a}t-9mVP@PBv6%}J>oNKZEGrC;SW$&)y#g6?-~
z?4H61kn){(76{2FDt;?A-r9Qp2*w<t*9R=qg~^nEW~7MEQ7Eex6k`=xHi^)IiPowD
z-loMp1^Ng=I?LXzF|zYer+|5q=t@JgK~dX#NV%9LD)<`vsi^Ej-}ClHAVxi$x??~u
zqhzM7vu1SK%JvsBqm@?U!`Jz*;^jonFR-K#_2P)KCxG2Uq9+EH1+J=39vs#A)f#Wt
z2yjj6V$_F^_o{a!+}(8Y$#g>ET~C5oBO{}PfFWJ^Q>=KoLU?}(DtK6iQxwG+Wmi(Z
zCj+56p~5u@yM@}fVrY?xV(`(o7lSi5BKghO4svt_ZLgQrLY_D|$GcRt_91?#v(d!x
z=HTtk;ICxoQ}+v!^#?sP{-4jLM>a4s&j{!~6}$)J<4!Qb(sj3_3eo(!P}iiEDQT>)
ziPxvl<>Dl&@s&+Trbic`1e+tRE)=!CWl;VkyWZw?WO78^Faa|LP|+c2pw%4ar<`s|
z4(3mjHwCB|kpj-eF8D_B61UY3qIH3cP%Hq23m)bsdJw(G82-3RilGH{Y@^=FY+>l|
zO3Vrenf3bx|B}!fqy}AHx%l24FpDlcwCA?ExtaO0gn9?}Iw1TG@AG|Ai({5CmNI@g
z8W(Fpo4D%0f;5y=v+IHeX<4t3b%9M$)!{g^WHU?k_BD!RZV)ct07NbVE%yx&nf6lJ
zQsM<lf#cVUQUzMtDWaMI=yASv7h3A^h}Gv7hs<#LlgPTC{<TpL(crhmZ$hozlVvn<
zA-GAF#a3K7rrboYM*y}Wnh(dPz~m3-cypkG`%>X`>k}3mV+ZiQr;ZZV7ry-dx~lSj
zM-k}s^a1YUR46-kDxhJ54D|uQ_}u_<3pRJ*zy!r^-A#7^-hiU~wWKDBdvYHj%dm3m
z!=GG!M9znd>PA=Jr52b>o|X{zONpzHpfQI~55M7}ycAuj-!;HoLKdpYuE6A{E?b3K
zJpjp9V0&;Oyt~KU(RX@N{%~~_Q1+gyJl6(~M7S3QeWe|jARQRwwtEU#d8ZlI2^jW@
za<o*CBX<T`CH!=;_-IAU7>t)9msE;)xP4Bj$Fm}K>$mD7X{KPokjP}!>*#t_Xx;x{
z{QnYV`lJ6#-2<aR9JGs1;77T^$wmAvijrvV(X@47%;#5$!6R*E&8KB!5|Pl^r!^rU
z!B+yt$(h8S#<7McpH=%Z%NF*s5b)f=%SCsdS4n*!q30r)p$eY46Aer>vkOIj04PS2
zaKFp<xr()Ck_`BdE^q-pnDIWLIIA@$K~Ljyp<>1ti_YZQoJW*+#N|H4{!$-EJFXlE
z3nqpW(iW5yB3-%N8)uG{yyo)pis|O~fQ1Wls6Fkw@R|sGFrR)USn{0|eSCgU3hW?I
zxmzQA;C-$i74?3Wm5wNAIny>^k|cik&%i-bpnP71qPEjeiNX1qCQBa6f!hN<{J6A0
zQ$&(t;=ArV-UhWIpc0^X0fk;7kOV1_iO5Mr1F~=V0i!7)fP4GlcZY6PY_>$XUJM^)
z)RYYy`9fTV>oC?)=G2yoKTUpT+=os;H-}7pbNFt{YSd5hu&vOT@20vh$lmN@HXvI=
zjVQ{3K6AEicq~#jM_pu-i&M1!d_^5TPzZ^kA91^<Z!+I=_Swde!u$Q7?*bNYpM*ZD
zT~l3JH>S&8VxQ;Vr1<3(O*A`6bR-A#7`5r#KUTSgsrA#Doj8acJhe1#F~*bV=QYo?
z$_q*l)oG+uK!28iT#nApN&kk-bU@ZOBLMPxVn<EiG-OO73mk84LqWe`_c>G#p6ly!
z{G9}oyZYQQrHGz_m=df9*GO%{9*Ws(baxy(g+bq)PTx%StED$K^xL^N?-WM&b5Y@_
zJrl;n!XGiuSPp68xiCvBoPL3i+tJ4V+CNbL#g_LIuW^{@>%xtgAT~<ca^3+M>+O;1
zy#)}Q`Qdwed*D*edOM2DzllV?{f>a$EmNjnz)@3)--%_U!e%$p0I5keIAoqAHA`pS
zzby!emEN>(=s<(<^MbsJIW7`kY?s+${UHUvX(}^d{2U5=<@n{>?W}B>QCa*G1SPlv
zMv5WCPV_000AJXy2H9>8BRdURB~&$5G|U11oWq9}U0?C=)b6g6=z}Na{}Tis$9QRf
zb_DgX1ik<1#!xE5%I8?Fw?Sth7g_xhxFh$ICszXZj|crTrt-xa%yawAnQI;_9LV$j
zWv9CX9`AjcX|@-8Wj}A~%qCW}>S6q<t79;}_G<85cXLBxl-~{(Ig_G{EbIZDrHFoP
z?oZXchK4&em-#`hMlFVZPRn|6AC_dgZ+@1lgdj~<LuD4f{sixulw35)t<9VV#9*7o
zzZE*cydFj@Wx<nZ>kewvPT|yQwA*GX8=<@7yf3Hr0wf1zFV!<*I&X?7z}VX(BqxPU
zgqKl7oBBybT42QdYQilZo>>Qh%63u73RRt`-vm`ISkDDTF4;d6VH}iTeU8|=TSRn$
zt%?A*Jzn%VXv44OU$O<e#$JdDAsq<rvmIQH<956Dpw+-t=S|;i8Xo+3{nX#};bwRd
z54R~1#wEi|OLuD>?udLGkoPq*2Io3_k+al;%K}4r+n5|kr)`0u+SeehyHKlOefG_Y
zCT05LzzOV5M4qRL#R5<iRZkB<0IO<35b3f$wMz!Y|1f*R%Jlo90_vzrEgRAJ+bgMc
zg+~sf*)fYSk>|XcI@*u#mDAda8a42VB`-4$!{%DBwibc*eW|WLhq)M;fxh4`Nw1X*
z`M&#(-PfEm20tJ}+7O`Ni~oDNNl+!*1w0ON8xiX*!oibEXy;qdQ{zhB^b=Wam_^`x
zCaz{l6Sa`eA9}aHq}NR|-m!q@Y8@5tx|@OY#J|P;U^N5DJHrrWMVn!mx;~f;8gurJ
z-e&a@GdhTzlFhI!p>LPE=MV~fMhrdy9Cg^_`q2K;zQGmiLY6xRnR3POr!rIz>fqPf
z-l#NU4mtH<mS2j0iEE(uw!y0+k8i=m??cPnd~s38m+Mok7g^l02Js6i>Hw}##H)}p
zCB8M$Y9*rdu{Snuel~&j)%?G*rtiY=DC0wK`8ToZ{mEq<$+PZn8=3C<MU$QW-1pbt
ztm#iZa%Yagz*V_>BLfAPdiB7fq;o=XG2t>!2Z_c8;#+94>m#sL&fUU)VVE}gl-b_T
z>u>!cfC)dx!gp4HNWRLL;r$mtS^%CHoqPc^)WCP#1U4v*HGmTw)4mGl430QwMv2_4
zrB%|>;!`kiNel@$ciVxhTW_gQqOEDQ+Z=T(5l=%88lVhn<1xKpiEk^aAHqrxjpvAa
zjLO$edd3=PKcJeO|3F#F7Y_;P880&lWBYb(-g>`jqB<%2+#s{LxxaLAraBdHWhpAM
znEp3*rV&o5I4Vh^q)#59^9Vhqs)|}a_0UW%#jXw{pS6MF^v6tvedbG)V;k!Ru@<!l
z!G9^*M<X+P0eAodzyO-E!!yJ=y0iu561MlBh)JN)z(QV<Y6<wI%z+F5+j(x!T#E{P
zH2D+qJxx`SbcJ)boSXg^C^e-LU+i%n`3G@>%{I>{6oO3%m6D1JvkPO~yOS#tfPw`o
zGDPc$hJ-AIc{Gr!Iqt}E-ZlH8iR5H$OJHlzq#K^?LuNXESr4ah-Xt)1Q|97G^xWkJ
zLSESu`iPv4>J4#!JpmShg5kGvjIm&}7L}Bjxch8ikjJ9HpB!rO?D~UCb8IVr#+EUN
zEm;t*9l^jQ)7g)adIF0v2M8yB19k<rT;-K($(fryTW(u{WBd@kQXekBvuUducm2rE
zFOzS~TSYwwwIqQTp_IFl888!>-*5H!pVVQP+IM~#Qnfp-D77-MBLmePGCMs99xl7B
zYomDN2)FM`v^06f`;M43866{cOa-SR;qd;F3FOKD<rWmc?>X@OvvJT_qn|uJ%E=r1
z@zQ-!L||aWv^6?>@TjQa-yc!RLH@@5u&r2QV*AIQnG^ea4Ju6nFcR{D@^6J>pA9^O
z(PrFYcHndp<buQTltzXUzX0$dd^AB|Lm;7G#SMh5S5394e=Of|3j$E78iL^e4x|Im
z&d*C#QpMrjC`juHHzxzo%LBL~58y}?XtNtUMs_pnG2#{m+&HCJsd$SYOD88ghwX)y
z&40m6tP!f&T?*lfqvKN)bCDDGRuS4`t3U(OmY%yCsvmgy@Ix*9=eI?Qz#dg^H@%$>
zz4f)C!+AMxi~XDWGtVo3mh-F;O8N>i>2=Qx0-I=Gl<ZK9v~Jbst)!i)jLsyLRt{08
zHIQ?%B~s$(VNsIkq=#in=98<F0xI-i@t2?1^@@Dx#Nc<50fIDN$9@CM{iVH(^eIef
zAha?Xz^BT~4aM_aJd<x6TV%D}|149sz$?em;g5xlC{dr-Y)z@x5^~82f`96gEAf?;
zWqhJyD)Hto2t5C2_zvI~#QoK5%~{G|%h|&cOrJm_P9FEQ?FVI^60Ug?se|i#vC%Ri
z%s-r-h#0%(bsP0{gRN_w>q8E^kEiRuK!+>lK;siMx|ge7ks>$ud!pEeRU-q93B_lO
zVG8w6iC&sQ&Be}>kKsCcgymjVcOo2`R!*@^@gsAxNmhok0GzWXLDVwF+KLJ;tyR;T
zatB*?f&>5?aHOJ{f`OIS%^0FI*uFiQKN_Mzd1D2~P#2?1aV>kWOEwV6qox>h^-f`^
ztrW8(;805MUy{0Ad-XCC*ZG<0VUV+VW(f>^Voa9a=$hLql_dPhunQ%}Nt)kqLe!yX
zM<p&Og!1yg@fgy#ve|{%@}%sw6V8C#rEx^KIqB9tS=$Yvbm~C`7n$VKmD}d3Nia3x
zDzOhuQ%BBWX1u1Sa>#8SKUHw1_L#CQXwU&l8~k$x4qj1|*UX)ZQyQB*%1$b7Mt+&A
zkdb;Lh}GVWFWjZjWI#M}F#*uK=T_kmjgm%3CWBb)WtIiwUsIx4?qJ)cYuFGKPX(gS
z6odp{_?!PSOBdEChrSoBGu3MQ-+uoLZJg@piq&giF;S6F46V|O6iiKC%pE~TawEF=
z51PR5dSo*WV|#yw!4QABd-txo@HIR@GkBBvVfF)3Nb>4TkdfOh#Ni^GDUHYL|A!Zz
zV`)%*s%sTFLC6HQ8NzLF|BHq>7VP1<gId<WcoU$^prBCTWVkY=<T~$FIe73*U?Uj>
zvEJKDU_aN=Jaah!5aaJW%X?clj{S`nRXK$oIbcPC^tPG3W&ITJuryM&u_r=Gkrs7&
z_#+@_9rTK^jEb~)wEhV#kIoA2Ns4wp6la0_HO5rvZZK@X<eWY8ZM#$cLmrCF3$JaR
z3H($*&SK!b=IIgHbO)Nj>yTM|pzevu1=<4w?^E@i!}}|X;a(*HO`&#xm?)!k#0b5L
zOb?8-ABDs6;>lFHIs@0jpuX{ZgZ=BG!|AdCn-;&V<1c|9wkYfuH8ldt+Q5nNvHz~W
z@Wtw+CZK{e-sYJBa4?%b9nkrkzi#y8*x`WUqfHBN4@MLO0>z2=+}r_o@`>p3O*iI=
z##EnB4;*l(@*_?72Dsa3&=)Z&kOB(t;8ilH%6NL8u2uXV+rLlFL%MQs&Rf6*UN?NY
z$ujB%{?y4Zq*5Ac+uUizsJ%d;d->YkxmYXwhU3H#ldy7l-c`Oy5hiaSgb~h?sx?W4
zFcUXK$P`-Nr=Mll^oX8sCxlNiSf&2>Q@b~y&ErU;JVv)<1(g2{kVcvYDU09~vjGH;
z0;Wf&-bQtLpakxGWtpe%Vg)*SP;o}4}fFUFy=1*S94=z(zf-{KTfJ#^UhJY|#D
zaD1P`<r|lpq?A%EEhpvLHhh1*{D%J5boAACbzdt$ScHCG0{PbAf_X)MJS;)oY{sYr
zMqu99Oo}oL!xXhBeJR~Z-Z?4qrYK{wt!aoB&!H`AXkIdoty#b(t1@sel0%J}TC`<A
znK5U>&!EI~L5LOfJjJ)K^oVW-A+9Pg124_QRvVK(wvz!?{?dZLzWh<rpWnPG_6d}&
zWL#$y1qqKdu|wZ#rv;<hvNk9z8==<)#QgRH1){n206myF%?x6#8>%J&FofKE@Y!!?
z<ClKjmzbx<$LX=p)dA>>FT6ufLV*w;OX9oqr%<l9zZy8l$%D|yPrBeVJ{Ungp@^NJ
zl31>z)C#=JT~<w^T1>jLe({Xzt%Zh9!9ZkeMf`NY*89r^8r`33u1#3{3lP{UqQnF=
zGeU`qtG_yT0DyKq;eiakeMva13l+EKwNe}|ZOn=*4dCFz=aQd$2ghtLv%n(hz=h``
z!$$iWJ8RXChdikEK)ioX+;K|;8P{}-(Rn{bjDLGn3gmc+{{)!~ZZmFP`9AowZ13JB
zMbDAhlT$JfmgYL%2GIZqlJ4hv8sm#nC_kK>=l@HROl*Z}5g5n3gJ?;DY4lafQ@;Rg
zVYnMp>j04Hc>^@SzEc_uM@5*muFFuGy%=cyS1mgCee~!6VT=|fu`*dCzkDLxk#%2C
zdq)P!FwrFB2b{2aP9OHQ54!sSKiXd{nNo}Af20fXpqw}3$Az2Axo%&ahLM#Ih&jbw
z$3B_RJ8z#AWa|z)sWfJ7s^x^ubGnEEF7jkS(gylOfsHK+EWgOYcm==gix4m+UZNzQ
zqN_4X|Bv30_Se)2k?)&&9Z2AGKoSf%zB*oTDQyF~?hhUCXL3rPpAcfkG>F||AfzB&
zpQrCmPB#D0J-RGU_|~99?dl6zWdL-k9vp3K2f<l=VOtRk{}Pcxl($YN(YnxKl&v9b
z<X@TjbFY-oVTZF25ey_JAukZ{k(V7i{46o^7*ZA(@#U{;@r8`QoEKNs-MG&?l~uV^
zg7A|+K0~E<MSAkfSrEW_hO#{La|7GhVbLzHY97mcMAc;Se^RmG&S`cuh2L(tn{eZ-
zuA!?ut|>e^pxH^|`%`<^Ixaz91HUoXrG8Dr*Vz#;g@p*PaFIk1+YDR?u8tJlu{5J=
z-%~pmi_y~!g*eO0B$Dm$5NY!Wp18x{ckr1s%vjZYbV3sc#I*PrDMGSOt7uMX5!V?<
zA|fKgQ-xD1y8BxHHDA!4_kuU$Y+e(2>9OTD@95LF=~VtppZ>Og==^yehBO<2xZpB&
zg3w%iDmw6$4MQe1LCIJ^<JE}pO49hVcia&{n+Ss%?GLk=@{?4D;2Ogy$&{1GjP4&<
zW}Roqs~~a+2}HV6QzN8XzY(wwh$MlS^qK+>sadec0g5I$D>ScRk5`)kU>d0lL>4Q_
zO1oi`KgnX_X<IFucKfGb@*0=}0X;kY@6he6`jq#$03I3y=aF{fkPu1J_aZ*y#K=&N
z%|p^B-OZ&lv46yfo9-_#T{G1W{VK(#vvUwHgW9mQ*FK==w~Fbu?yZRUd+FID`o`ZP
zN*tmi?~GZVG)ep+H8^I5sD<<UJS?z3MB$~UJf>!hwSC>|q(>DOQb#mAC6dEu(z3@x
zl?1howSNHgjK!x`63`0%nt00DG>YbF_?<4-=T}9~3pD>@8nn&-wG!J;2%EL#wmKR&
zft9ka`&w?*{uIPr7$jrbbQQ)^S%7WBCT{*_!X~^(tERw$;*lH?oxv|caHRFd-39|z
zpp%Q!iJ%3m=Kh0~g0@TMYx|J$_~4q7(R$3zOj@2sH{O-`*KL|GLO~)(U<$$fv}gr4
zITdIv$hxM7`s8*>OXLv*tgQ2-g33E@z41>VxxwTOBy@#$6+1Nq-26kgmN|L;{<FJ|
z0YPy{%rzi^34|ePVuAg<j-~X>C$6f2uRX}j+Vu~jwax6V5sj@hCa43locnKOI#YjK
zhDIP|fH>O*QRvtCuz}a&<_qTWB(p1>Nb>4mOftFD<Dr!Vy^2wS!xRhcu&uC7E+dEd
zJwq?;JRekw!Usgm9FQUZ;N~y;l<)80yp5GwlZ%U1$lg_9!&@%YEYV<MA!)te^-Zu<
z!9_=LEx!6EMWq~>Nl`5|nN>>#&0n>zYi8w9nrWb3P6qKn{(Ek?l*CYU^s6=oCGRNe
z=--@fxn2NufF1?<JSg%Q5ypz0wwBvaQyOLfX#LkSCwZ4wrl3gGFm@>ur`=7@f%n&;
zijuCZCz6KZ%y%sU=zSw9>hdVHRA!u-McPPRTn{}-ObD|Gs=?X!a28|zPC4g(Gb1nr
zN{nyi0#5*@jO$I)+#bL6sd^a6A2lq*KjZ0v_9Gy^+uA*xHD5=0W|pUf5J$hoI!Zu^
z`$H*x5ZmPsZG(Qd_3L%6T{=_w(vO3Vm#o2$LWWHwLit3>Wcix!5#k#;{R41d7C==>
zgZob(tQWvPC<%U-ll&gY=W|Ux$`5b^Zqa~M2*<P4Kc<8u&BPE(Rzh5^c|eSWhIc0_
z7$+9GGIGCRasFyUZ}+u>0Zd?vR*qXf&uPKdt>xcK1)3v-+{?V|HwVdY%SZ9zhHu`=
zfx^DeE^#ks{_a(5NYJPL&Q0NndP%Jw^(tGFE6;pApm9Tu_H5i#cGK&g+jz5fEwlCf
zLi56v-RtX%W9bt!9}=;>^Os@PHEqGkJbbw>SLnd!s>yOw8AT+fMkH_!<1)x~e<IU^
z2;#>@&S>VlN9!<%Gn555nO>DB#@HZ0nIO2>UZSk=bmZ`PWGue6ZSxhV(VAD<DH(2D
z!-Nd@2VTy@|6U0FZ(;Gs{4xE$gJ*WI(8=B{yKHjEg^Ks=SPT{LY2GP-Nd9D)Ft38M
z-quP>;hTAY#KHcdP*+A5yv8fH9oKzpT9A4c`9cw%TKTm<J(^M1cgjtFT8VjiTl&<h
zKBl;%1s$cQysH&%iiFfZ&cF1hfb2q!1ZFF$peoo#7_cxKLHpN?I%u_wm(*Q7@|3jT
zNx1THXK5|E$t1>zKhK1G+Z?Yb<mtva_*npB>k5?wSY9O3cdUorSzhOVAZpO(fEUbf
zkO2`Y+#7U`DDmpN;LDZohqb_}7ZdT>I4+=Tf-W1x?!8UGM~jyEi;$lJqjKdr#3rtt
zyxQjI+KhZUOHE8ALY^@vHu*&2?$W<buQTOm7^UX)6DW~EJsY40F{c8rs`o6R6^!q3
z=6j>7J@y;`*q4jIZm@l;w)|dqRUbWrv#agtmYQAmvEy<D?92%XO#L58sIGU{R@`kB
zbze;yA+RET$Kwg!*7uJ!=>#Z6nP<XHb<5WigSV`}hhex^=>7O7%kEdj)Dqc=X!PgD
zI=;~v<dvC00)odjfDV?1go0{=UBG`r%1cUqa;9q2tJ7o5!rsPeIPm%9nq?lPgdA+<
zCfrv*@B0;-&*pYHW=CE?_7MoDl^KxD=N8=3+S;23HKPhmoWgygVfUKq!L!|yaqI`J
zAnqGXkt(NODXE-XrT`&j^CmiIxQD=^q><{beY^$w;?}%xtIW-*XTI$re_W1F1)iP`
zr$0$mi}L+Pp_2yvVp4y*7Ff`c&-U(ny&&7N>9MR-S-dOUlYrn?U;I;RHs<yxai0>V
zc=BlqUNC<!k5wfmax;cjt}Ruu`;Ol&1U!MszbM<L0bkaWRhE>_F;P@NsJN=acUp&X
zX$`v*dm!%)+pIhqU25g&Ke6)0Lb^9Z^5wa<41XqJd+|E43-UBc*3Y2_XZ%!kMzEBR
ztLpz`ToH2H)-m=>1$><2buCxFn1q~?g<k#K*qHBrxZhM*dJ4DjPaQS+k$U!8R^G4H
zl}_oT7dI`TNiDx?oB5z2@&fw>VQ5oswm@oJ=myq6=6<XeU`<G$rdiQiu(=hQg1=SE
zDR)a|^j;B0N`Fv`>izedTr1O}@^z{a0Q8NB{G%Z?>uuo?)<74i>kAZP1#OMv?l^&u
z7HRkq`3zx;n=WETg7Z^IM4sWJF_QB@)((pG5!#ta9n{9pFyMN8Y6Ki|pS`S60xmAB
zUW)P+h#_4#Eh6NdCeX9r1olOL2`QT?i7xhq2|M1K#P7dPUbb#jKa6L-*KNI)4Pf}S
z2bnKa+0BO&J^0AKx<@48GQ-1mK4}X<{~+}cMzR{&&rmg(ycM)gTRj<1@f|rT*WoJ&
z-Z&6y^3A((Cn6H3#+;PdWd$r=|LZW(7xhXxTEyML_rkRK;K)4+z4flG#LR(PQ{C=+
z(XNNk&+r@9h~t<(bvqfd=iSsSM9{NX#mth~%BSz2HNIBm-MPdQ!QyjL(uiBZWGfSL
zw`p#lx|oDde~gW@|952V(`q<+!(;o_Mw;{1GPf;a7BA{@6>@kw{DH!k^zxL43N_ay
zmyb_ny>D1#5f=8R!@O%>fOCbO$Fj8YD1Am@z$1$yTE7-LQ|fmlVpi6e=Pi1Z#{N#U
zsg_GbNvg1aX<^Qrw1VZ>XrEYZ!ssV4bqPTvG2Pq%i1<$;yaCB{#mp)jXBkAE4m`TU
zML{4Kp<nyj6N4Q5y6Uh<{3jXxGajt9uu3{joB;MxA-rj6nll=x53{0|jVPf;=3k;<
z8^6bX4(TVO@tuVX7e)gdk8RLC>Ly*BxE$XshVruCOlP0sWehGPId4#`{KSJBT*((@
zCqvKb+w|(?#}MtpWEgnbzts^6P%K*^)4`{BU^0P#*!f`v=CT<9H=8LXMR-Yo(&BGp
zj6H)lnBaAot(NDo%9FpCJ#!A$5WkDdb_K{(?Enz@McO8u<7y5U&8mSDvjLDO=}u@_
zy3&+}$S`qDSYJAXCa$a=@knV!yG@l<NnK<7aC1=_@Mi!Fc?X062;}>5Z0>#0{N8+c
z)J-b%_9T`U>zeza)BR2+^d`ITBog?pdBrkIYBjis%&7;TvglY<Wc*h?-ad$g7)7aF
z$!EP**ShjH8gLwr%Cduufbgqpqk&@ddza)e8nwwNkyGX;<A69i;5DY$GE7e^(7kQl
zr17-Q9l|JD`FIUDU;owgj!AW|B+MCzLb@&KAx1M<+QW<yy^^Ov%P2{2^&7vHaa3S*
z9aIrGF0eF5W5=WN+5$b3-+K)Ze0h5p^?_-bKDV0Zy-gEeeBX1;i|x6k`g?Hm>ZVmf
z_I=_*;)TrgRx*L)jZsN|zBj|-8AlW<J)W&Gi!5C_;fP(Yl8MG8K<gO~q_tWu9#OW3
zII5{_)LJ>BcoPJAR)DN55xryu$Ht;8udgGU=;>*H2C?xlzsQ5PKMQC$Ls^Q8O#w~e
zNi(f2399t|gC6Ah864$j87)VMg)*dE<_N@I1QR1T(@#*Mqd+p?tLs^6jz?83J<MBb
zr^8!Cz_w}LWZPb?*S$qA`D`Lg=@*WVY7NrvqTjf!&{Z?d!P$QrdWgH9mEZPgVAIKb
z2n~6szaMSb9&UvpIvF+d)7r=rImwXeXT{GhQQ+a^GBD<i-~x;bt&spX^g23!jR1A0
zb(nO(<KF;i2;<}zYMVPN7d^(5aL5%-1`<P!#UrgkTCNk+A1X7R^&v79QC_EikqtT*
z@qs7xem%crhI_3`gM{+pjnBJ7)YE6VtAPFPrNG_6OF7vP*A{Je+%0B_;u8taC<*P-
zc=w`=;?W4$ew#E|*4P$f<Y$~)wjJn^p4IMgH#@VEZvbv7B(X!&M!LqHMwpCa5i|U#
z@JVZEt=N4|(utapd4vVXN75yQ=--N5r-{&@;(X&j)H&Z=Lhn8TUh+g%;u%lC=l(%Y
zq7M9Z>~2ffCg}Sd=aAhvv`}pn`Oju@^ac8xpg-wu5nV@h8R^*)m8u|Y8Xm@KMb(1O
zklt?tal{%Yye3)lp%G^5pF_JJ#|bVtdjq!TuD6P!#J_Y6&6h98TwT))@)4X-vLC}O
zTo1cqx30P|n%_2dH!naley<08ls9BH?AF=It-C&*rfKBF)Z12h-`wP85=An98H++$
zBN*z>5JqqMTQN1QE8c8{pp`B+QJyp~eZeHpgF;8sqo3)`8%r@YeKHQLeFpV_tP5{B
z(bEyJ#4=}<u-!fhyC~bP28O^Yq7<QoF1;ld{m0f_aD7R}Qn%4P{S?oh7bGYeR9N1d
zb$GpROZr_GM^2EHt%uIf3pRa_J4r2gH7;bksw1imt@(mhoCXeYe5wH|HV5Ee<%vEM
z8}z6C-#!XwXXm-mWV)y0K~vA|6Y#vT($dn7)SL@Sq8Zg)>ICt;oQm2IT(U;>6H|l)
zXEK#V^)wwBr})pN!uy{{9s>N@u19W_;vs^hi|2&>>EE_oO?BrlWyyyZ=3=uy^pSL$
zMoz7-Z&j9n<4Ipo)b?r}PLFc%aK{>`JvSWUe@+_1eD)C-R6I_oHTv8{;IFiXD|sS6
zZ(jF~XnO?t3&I{9bgvCq<R4=Q&GDk|j|t_?9(AX`0TK~oPGDpRk}7^~%kdlAE*dxv
zRzod}E1QNnR7MHL*Uc&1A*RHVQeE&hqIS`$ugf4+QQ{b3<(aoE`ea80xkriXo=O~C
zAAS$j@%|TL_rRN}VkVJ166w7Ds_F($PG%V=!&g2VT`ms68__ZI-TBmf`yWMvMc5+^
z33K{}#wPgF9mDbpo)w?UR}BWF)qbt^M|)JO9l8~CJDBxnNFpMMwQ?(;QTGr5o-dJL
zjX|bJU=c+87`Nz@%kcd8EoCUAz0g_s)zQPNAGu)XV1~NQ-d6?T+gj-h=FVfUA1dk6
z3%CDLz?tt_+oA4`HaO=<W%yMOJwr?+rYhnj^8;F|<A;Y<k<nKH4Vy;YP%}%i3LPjN
z`8<P)46_n$C3A5C=f8j6E43srwO8#o_c{PX&QAa~!Et^7MGX9&fN8k)FJ#*zG*FTx
z+m1Z=Pjx4B)n6L$sAZDCFZMGrOurKqYH<GuC*a-NbVcx(u4d}9VoNFOnAz|^kYQjI
zrD$eXPzf(5kL?V2dt_~>?q-M8(d3Z3_71~=Xj`j?z47Y#pxf*{ak2O@?rOm{DZ5Z#
zP@xr_TVh44#Gv^ZmBYUvRr0nLTVp?+r?OTGo$)&MP3=|@F@mw|FlA;29pd#mW+zk$
zI|v8vQ@$Vfqq$^Uhs6)DLI4LrTnlLc=Z2mG@U6fklGoxDrXaw9tD+0a%(fVg<y~L6
zA1s&XtjeMtiJm_RB6SN6QPr1!XBpBm!)30EA)G)5+?wB-nLM}}xHT~km-#U?CubTb
zT00BJhEF|cHGAd{<<U&fHYeuhTZO=rtM|j*&_BZ723v&Q&iyRXQh9nFM&_ppwa`E{
zHnBf|o)j8erNaNTe)y3UT$RDf0a@%C<pgfNOMxts$8zc2LOq}0$Xs&JL|af+4PY7O
z@}7B;Yd@_l6h$^KmSK!?q>UC7h8U7ymgiwhehU!QiGf9dF#Pbn5dYTG4b`o&z?>?3
zn$5AmC<%xIh&5L)ULV{>-wwKQkQehX0BY>lg#o+FsA}rrl$nZzZ4PBqs_A-_59Eio
zt6A#6g1b^>sW8ydnm~e)+`yu(r&4;!(QSPWw6{O{Bpq$clAcO7pZGTjR%hU<YXi&|
z5dn8byF0(XxuDF3&j~aVdVk=?Y6YKaso1im?w?kN3O@>`EkW9K*h-j_hq$fF&rqkc
ztfxwzzKRe>=!~z=Z&QJj;LgshF(GS2Jl)YXCb8W6UjhrvXk09*T@w^DI?zVxlvy~o
z6J}zz5Xc_X+$5080*^)}LQE>)z|C3f3$YT4tc6PR1MIwkzO>C$hn3s|6B^T+VZcjz
ztR`K=0;#RQYw|itnI5t#<lOWPIb}>y^<u-Gc7V9)Vwe~L@jk)zml322eLN~Txr}b^
z{&Xj%tZqn{np$nu)ar*E?Flcx<rv{tti6}&V{J^ol0Cz!-LaK(sQz8roh#;q&FDho
zwHk2Wx1GmXU4_eP_Sb46II)^Oj`vYSY3xBTD22=Xe5Kwq97lrm0Q3<0UI+zX=BoOG
z2_S16Spz`Y!!{)HAx(>O^ja|goa4f7ZK48&aT4<^rQ98C-?xKO)~c!>tZT|zY**9S
zYX34^pYORw<)b|iBFnj_^?UmyevkZT!b$XIa~{(wHane`;{#u3A;T=6m@U$~y`AjY
zGKJ!^mwVI7U~Ty1F<D)jKE6ir2m<0^H*9wk`cGD<RhMxWgsM4AF+PNZ5@utUZ5Ie}
zom>`(_gD5<RWZ-~S3fe7n8HD(NcqF10|s|sQ6Va4#qD#q&~7us*j%?Pg?l&q<pu=K
zPX+sJbfWqhteNqH(}C?@chQ}zQb3LS-6B2Kk)(fYGw_&$?`yk4{5~N!OT2XKY1y77
zumhVr%)@C}SG)xb4=(;PiT~vDueaYT8S}%%3~llff*b6Z))&pw8lQ#$RB1<^s68lv
ztNdNz)Eup_j;@B57Tmt&8w~?Ml2Le4xoswWf%&TQA8GmoN4ZIEjX)SVe*FyBk;;G<
z0}h5<=+bL04SG<CW`kpPgX5SLFXSihfpb6?PeVM*gHFpir_c}!5gQaah3`EbPi&vh
zX`jmu^KEihIgAc-qRG1Kr4jyc?LVX_Dx1ossW)vR`@=}HsV}gQ#ij7EGBz@SOO|Lq
zx^iJ$WSMpW7}%j)cSU20CNN4XVTm7R@6>UJ5O+X6$Qq-(|B`-u5a^gAfF}W`x>t{X
zA!*;7;Nni=3JHZ8lsmF|L$u2|OEcPII<!HA5?yzx;7BoR4ZaCg-H1<0Masx@=w>k3
zb5TtQ2vOofT6cSRBHce-=OBMT8{mo9abi?N4!GPTCBv3V_BM1bmb%#wM7$u-Aty=n
z@fBDs8>73Yp~cIr>U_t=dP}67sG*_J3Vp?vr&eo<zjo`Ca0n}~CI~@}Pj5-YHnb>r
zC;nYpGN&DVzyH%#Kl4S|6I4tBOd-@Q4SIhkXaUq=60!idKZ%Ld<mRQqm{yd4IhZ*j
zqiAZNwg$iB$^OUL1Cd3=2n<d4AuR<~V<8TG#PfcvbSL@LuI|g)@0iNya)Tse!rfFE
z_iSPf^<kAcLw>R`?#Bt__@Q!G<<<O=r1p;aMQ<tVq2cn%D!~yE*Jb}GGXOdBn0f2a
zdyoSUzAGime5?6ZOlMn*jO%(srrict6ou#_IX2rkMs}lJb~fma85pK?;P@_o$9ad5
z6;m;laV+2EoW%RHiJTjg(JHuLTOc-tf~U}M@Q+Br=JSA_UjTA2)2EWRlFg?V(ODKK
zB`;4;+8lQFJrX+-)hq~00dJY1im4#AdVm!WU_FfJSqD)IxPf;2!Kh>+N<RM~_ZWOn
ziVzVqKb7f23Q=}5*P=GyiOG5j*@eqMXW+P|msZfzdk!;<lSDJg%^T8IUDkRl!R7o{
zX8_jU?s=Ad+=x7l5%I>MUzi>ELTUQ%Zn{5vS9#VD@V2=`aU|(TICe<5MKvkT_hgXI
z53JS}+Kq+;f(vY7O*EF<-g^o?N)>-r(wk9NB4n}^KdafR);ExQ;<yX8XG8Y|*i85Z
z=PzDWq?-ijc>+Vnk97f&LZ%fkL8&V+=<y&_v-MsGj=y1(+2lDktZg5KrOu`CBm>Y&
z&pM&A6?gmjCxuhum~)SyNLvCLlj)98emzuRmrJMon{O(f;lDXbyX)&HN(O1%0hH1`
zrGF+X_eUpBQWGXL;O2h&LFY*WK_NPZU4L+%n{7!)S?Z0Ipxs1UhO@hpF*77s3%`wT
zwSnpC?Q8$g&uAYeiqQtP-S=68w(2k)X4Iohl3C5LT~_cRbmTv!LNEl5LCh!_&yNjR
zC}&VelQ&SgQfIyx!cJKnwin~pg_Hj=!<P2L6F?t@lM<?;GPZEX8Xcd!3XlKqB=y<)
z8+`H7?bKu0gar{Fi-TFR!x41$MG3KZ#f(8=#&T-66An#b!P`AtJ!TDZtf-lL(6z6&
zzC;_nh!_m$RdX;utG83tWK;Xh$q+W<3!^pqIu>{kH1spbiAw)oY2QLFN+i%fi9CIn
z*Uuag|17PnlF}(A3cw)?4y6^uma6vmC3z^oDLtM%DO>1ac0@)oj_j-TZ!-G3=P|hA
zpVY{-EI3CKz2rCu?U2c3E8OK2`1hvpF*NwU_cqhj@vNF9FvEOfRpyWjYt5YDmkhI!
zr_b2fCUI~-cd_~g+m4~bv%<vZu*7;@6D=7+91HdrY*nT4?aaiW3xfjI(Jr094LCW<
zZ~v^~8?a1;;PTr&>Ky;=sbB+sd!Elzkz`8;0>L;RO+L9zrOGfGP$@#*^tlmPi$*!f
z<)XH+c~!gT*EhX9-9d&rTtEmVRIqmu0+K(npofD08+C5y?IFXdItVnY&#te^=o{fw
zc)FYV3iH^Jno7>5JOMjN4hLZX%<X&;xYJ|sV(*?|rkDtmmScj@*1x{T#>O(dR6q<x
z=*?ju82P)v*2nkGz@V1fwkfE3ql5LeRw7%#fMSBFT)3@NOYtK`ZxU_GX+j)V^qJmY
z9=D#VQH5+A2cg{MWF?*5nA3qqdf!QJ;`q*vlmUwqo_Jg0%ur!W2HC=DI0}VA>Lg|P
zVmSade6UnekPI*3A@B2s-&Ahk=Y^W*`<mzJ&i`!lFMhqbY~Ax8e4H5lzOVE=!$b#)
z6*KZX_`JV9v1jP*ScwS=3i4pE>p&b`A9$|(<ejcRk=smPgC175e#VcE2N2<u^+HJ2
zZ?R2ZgAr(TRr%f{cx?~ndLFw4;DO`CEF*zjvumcm)hWEM_vNAN_i_$Mb&}GYeK%xf
z(l;37x(q!^KRqcCb(ZuqTEspD6nDS#Ur@#B11yom;>%BP#YBgDgPYq>bz1?Jfzqu2
z!0O6{P@HN;jiMfE>;qz&Ri@AjgUhiU%~@$ShHj1}CV)0$tFBLYvtInS!O_0h6VnwC
zk!4(8IPuQveD)WIMsE&WL7uDXHG#WZkj@M3o!m)A()_BO9Zm-2!{rhoTiOf4x<x%;
z#Iq?IguX$Y2-x%4*HePa7ONe^1xtZaSO6;~iAA`el~>g#8QLbhSUk(n5RPIAnH-Q?
zs5ff55n(utZC;r_tx4`0F1ZmoKU!=v-WbQ7a!g<_0;-@?Sx?{%{MkgUv<bis*>GP>
zJa!u6O9_KC*ZpnOH`e3XB>Of`+y$8tS8fdAlVx}Y#~Ti8ac&94(W7aLQqVcQk<k0-
zmgd5wWJ~q)LZ1By`P%F;H2ti7uUTg(l1RDh)&&S1;T($WQOtWMif-q$97Zf2Xg~Li
zaav3{|2ZPX`im?FkAHY6i6T96Lc+G6CSBU!gw(f8qpv!*-z%_7QQVe?J&b5$5oj|^
z&&2v+OxFClo`K8_IOO^e-IL||47~r5EqbhNTc)R*yzHBY!f<d=RM#sd4D=7SG7C1o
z6l=JVS}O^Ht__WnGn*N5g}4pbteV%J6x9UdwZY;5%hlo{*?7XfN;d#BWRx@tx5Wa6
z1tmoyUie@5eXk1+0z;eAo6a$KB<p9;Q<9&f!1M)1IjVgJOiMII0uVCClS??gO=?Fc
zTP8~lNg&H2Vb0=VckxY%v6R?*&4Q|HbLOvxGi$!>>zlVXigA(3?evTUL!&gjMC(l>
z^k66QGm$%0`lu5$c+g@e=__cuykRndtBTC~Q=)$M%{wH!$eQ#fw(k<|Njc0Go37DI
zXn=j4`hVi@*i{@wY*VHF+O+^D;yF1xr**gsD60aAJ@{rofVDLMk{mhtS3pV;e!o;g
z!y4=YcY>r0ZQx?^$bV!#k2y&0(7MRRI)C&P`BB_`q;6P9^w|4{F|U`}UFGk6g?pi~
zh(ER6{NS~Hm_t2#J@?#x_;EuewUJpvKcX{w=~0RDI~kyQZa~-Gb&0;Y%iVbX<Xfy+
zLDMK>YAVa)L{<*Zr7sxn-inwMm8H--V@hd1RD9n)=jI5<V}}KQQLy@Pm`B@tZ28-F
zOI_Vf_)wxpU-w^%!D@?5FuNmyZ={jO_mT#Qq3Ff-1zYP27hX2(x3yq8UtuyUDKQX%
z+TQurSLB>4ayox-C^Zfhq$CV@F%a=_>J6H*O(Aj3B-ggJq~bDv9@YyY*8!JgBV^1w
z<{}pZbvsLl5bh;egx;j!W6do#*$9qE(?+F#UDdx;ChcAGRff;T2R9293y8a*_09wo
zSLU8`HFIZ;%$u$~kixqpS%7z-E_ahvNxk>k>U0tT((M{#fM2M(B72{DfVymMhsw%U
zMLx&D(vhZ{5+a}0q$2sU(#O0Cb6MlQM~xst(_gg&2jgHgxisf=bT0_~`_6>uWj*+;
zkISe|7pTEKocz$&bLGQY)v;q+x2Yw1&vsv5+<u!Y7BR=9nskLK^b0wyf6uOGS|%>A
z20iPB!OcT(kd(@KREnnnyUM+sb(n;Znbcx?AiK1R3fKjOy0qcjzL;2X9B#t}CAl?z
zW=!hpC8F^tN@X>Axqm?#XtGjN-Q{ADi;}>#Tm4<J+a>65Ydqs$^iGDNxFLR<mT4F8
zMhEOZo5tcE5_H4;ra{mdR2rD!6o6fH6?u>QkB%QpL_}w@3xm*5Q%PiBhd$q}=fy~k
zTw@P~i>MMXf0!|11nAqHML=KUg{&up8PPEGfKJc^m^}936{3?*O95tjwdus59avXo
zjaXH^DA+R{Xn0KCj3><lrWJ3z<=2qNIX=^|e7&w#;QIH$jF&sSWQIQuV^{-aYJj4I
zwaBRBG?O+rnBlx_P5_tDD(vWETtV-4uQtJ_7~jMESr>9qeL#%&)(JdG7KuJmnDxWm
zy)9eAIi@%KF}}rV>C}uCiBH^R$!=iU>s%IRDGRt2Rsg)C&VO1lKt^dL3~_3t!LEY`
zngi*#4!#T$=cgROgwuitBFQm+BIhIp9aWgk;u?&y!2KwuV-Xye)ZB;0QgU7ExpLVk
zF4~lu#jH!dK#sfu@?jG$Iw}jy1u4jV8zLE}<9r8PASpW{SlBk%)iPIGCkQ>~{GQfa
z`(=CnsK;ce9`({`(hyw$P9L|rJmWuf-C_YmQ}biH67NDbz{Ax8m~oN1AYf0<qS{Rw
ze*5`QMfi&E=seYTrvBDhxfWJYuhsCu(p&!2=%xaZ^e<I0bM6pVBDKbWe^2+j7jh@b
z+e{l3<urzTqVt+(lca{bSz(?p*&w9_L4~M#)R-BNpe)a{VS;VR(&Y0?jLv>2_onob
zwSM*F_pRpx-{BrrT)<?ZGoR4@RUO)DgFi~S`py}R3;3#Q0>^iW3;w3+k@+1sst0Sp
z0)VvU;|2gF>muPTx)3|0d4{B(3%*fQj!Iuu=`@z!SIw@%nPD5l`Gyy!1^4Ribwnw0
z^h0w)3E^g^7>WMg4eKmOT5rgBFOi8#e7E@B)ab|Z3=KbN8cxoad9SrG8v4q?@qRrw
z_cQCKs>#iZ)U*4p)_+7j9a)>UrWxVle5R5&jMYUg0bXNFiI+4pYM(6BFn!+h|EfXb
z=TB?l2(w65Pr2iPpNkM&<}?BIZFWdo)j>nR%U%FCdIEC5iRHG{##X3w0xJfQpPWtx
z-!DecldVDYr;>zV(~<QMBj{+gbqr%Z^t(?x_|Rb*2feaQkQ;034Csc~aUuMpiEA^i
z6)w#-sNcrQ`KOnwHerl}gbZCp;{7F!2ow#S*sn1#64eDaz8w`T|NMRG^XNCUN77Zh
zl=g(RidA9B9RRm>Do^d}T`_V87Hv9(LK&&_XY`Y%T!@PG<m)ns?+_OdRgQftuw^#0
z*9W8X<vVhK2&^36(H^S(Ir%S2$6rbDBOBw2xEI3aL$cbEyr9sue$vcWN=gHhr^$pR
z1Ze0p-h1AqmVXc35;FN>?eTiWf5u!Rod>`GjgdYwSLI{n*yO60M(EkXr?xPlNLrN&
zS&(byqK3?cENV}F=f=jS{#~s|Vg|gPkuy>MJN;i>QwGZ$n@}-{q_)Uic^+g{%pwiu
zmH;snGgSb@n&k4ZD1PXI5SGsbHl-(HEb&)T@~0^MSTbq@dBWw4y%jpDvI5>xzw$C_
zDB4Y@kdUq8tyNl?0YfOMAOAwZxb3DMMd0~`!5BZq8Erk*+Q*pHAPnj&Ci{v_oDNGC
zelnSyx>bzQkXKY>*2xT!@p{4Mu*G`+(n*=UF>m}}^4<i}BT9#gf(5uvOzU0jU0|w?
zh1US0|NE=YoE#X@6v#I-;8P6(B8=pWx5MPCsQ~kf!V_656yh`CJ||~zYL2L`Tv03L
zTsx-SxYrEJMIw^FfzGvOKJgzl>la8&V!6NZ=XFB~rZo7lC@KFtPuI{Mz71m(!g<<7
zcs0<Nw$5-c?v65$7R(j?6b%~sVMm2AL*10c8M1R`OEyB1YS~cGU9N|xpmWTWiMZV_
zXb5j?aIQRaAVy2waec?E#y<CX4+Hw_iy@g8H7rvTU-7u{*BGcRC?Ljc5bYG)Gd38U
z3k_z%rHX_BJeUx|5>z3Z7KBGBwHYM}+5%~;R8*0p1+#+6nZdRg65<5+t@dpSyKlcp
z>gb8cGQel~NnPhbvq0JUe<6_*`m*_3+U9+a2QkDL8<#7~X`+ZmEMt?|#HnV21uPpd
z5;`xSpsC#^c^}J$OvHv1JTO7^tgaKc^^3wH?EWjfK5h6I8a{M*!de(C^pjC+7xK59
zp!zQytoh_vhMn(yMRU!T&r7PS%gb91OEq_TQ^2t1IzTxZARL5?euMN|?YngERQGpm
z1u34U+T>3aoH}fmC7VhX3ZJO@%icQ!_D>lNter9YyqcO)eJ}lbNLAS(R|R!~YwyFy
z4}&Jbbh#kGrp`8*V*(d!6b-`A+bAwc8gH2)LVfqpv^d7*A75K(g6#U|-FKaFVx><&
zx&(hTPL9YM%bJ`2YvY}s5w%r&;(kD2|Itx+hlHkqe!|^CfrLDd+hYKO0T=+~qvGyB
z(;EQHfeGFd|HIN%g~inc$r;?;-JRfWgS!O{?h@P`1_|!LJ-EAtAcJdghXBC|3GTY{
z@9yKg&3A9#I^ETEs)&#qRk#TtXA8is>-fe@+^A1<G70#_x#Id&0-a&HeE5Vm<I18j
z0BB8w0)>cO@87K~&EABJE-ZYofEkKd(X%-D)om^2X}2vp0uVtnwy%>IKy8M2;;-|3
zQd!>|e}*e1w|_?gS{WhS1Z`k=9~knm2GEc>Zhhuds{q`XkTd}v{L3{UF2EZivtC^;
zjD2X(`9vm%DgVhLB9$+k=GTvEQ5m6JoQGA|{Y5`#{o)O=J_}#jxAlCl>t7nKPnCnL
zsz*Wl?XV4;#GpZhe=pz2yPm!~Xig@gMC2hMnb@97uQ7OeYK&uiURAKQBrz+u+iI}h
zP-kD9oiy-NB^AEWe;Pj9fAg9h?l~6iLIl(E`yPHEfFj9|!D~}eP&}saHKBf-284S)
z9G<G6DNQnCkH8-V^Z_=O08GIH7xATVA3q??k(ehrok4Z0c0BwYO$V(Vr&^XXZuJmO
z*)mk)n^|_qvrgJuHrs!XQ4;g)Z7{d>HbG`%(h52G0l?5~7s+|mGw6WM)Wh4l#I3Yz
zB1u8k(@L;8;P&GHZ*)kkpDQ8=OD$vvAemTn#13#%!6ZV_uWyt%J`4#B?}gWh4CH(s
z<;j4WQ^4k{0~db)I&<q@P%bvPE$SIK{s@5Dd`gyD>Z-U%vDYH*%pVcN>7nyPOeR4M
z6;`}reHO-lj}KYc_1?!3hjK%46qa6NmgW~RmSUo&KF+@?rDTD0(m&dTnJ$iOIZw5}
zlB}hkPD5&jshn8hm;w~pee0^0ozete7UdGLf(&hI%uXCM#jR06YQQlUO_h%J_EnKu
zN-M#O6o5`Z#w7R(8+F;o9ls=y;~N+?A`FQ$z5WJ3deL8QE~CWI(gAwIF*9_To4=O-
zMN|_~rWy?{$EY~@`hA&(Jwk)<pC_6tbW|n^-^cS!^R^XINR~k+vx|PA-sAWa5V+5m
zqhbs48(7dn|H^UVTKIKB7p{(T&7D7rp<;0AMo$nw<GfQqvZy=#j%t5sbKZ@^j?-p*
z4@@|V%;CQiia2o|N60a<<8=x^+(On!mdyviQ~2$t2%`V-O_BUWojq<9Y5U2cemuyV
zM5pZg=l#W>Ns6N?usU$;PFM3xp!hxGh#0eblrB>9{~#1D2-++MY<ti}q9p19)O--=
zuEjEG{n6QBNbvI7<~Ws#5|*U06UmmFqZeCxHCNFh?mq3^>}U6t#q__xK4yswz%7rS
zCHyKu7y4J^Nk9oWkoA}PSO4Q|ml<bA6+pgo-}nA%t?QY5iwA~KC43<;545K;{&W6k
zPBAD!yglp=!8;_nlJ%+t8KOAr=Ju@uP2g(!?azPjnt{md)ZxhUsAFcg351SA7XnjV
zi2G6<D>vpIS|MFj1mUs1NEk0zGB`QWQKvp-7F4)TwvS>EO0v6OPZS?_w0HbXh{XYH
zQxHS&z5Bc7tJ70=xWj6XxCKKM&V+#vEyO<SWO|$;@CeYZ12976Ik{Z|&+o!z3jf@l
z$!PXgu<wrx5kXM<5BK6B#@o$`^1UfgkmkT(2@Sc>g-GeSqH6U!Ea?vh+szq%(_<>V
z7`w+uNpbT(PG5p;i>*X1=^al2*w=!YF(9fim(6&LAR(K}S!vASegagG8w{J7%VA3k
zH~;w96l;k90zDri-$(kSOdl~Es*RU21Y0>2@`Y95tepQUKOD$qbnz>$AEjsm7)L}6
zVS4yNkuW#MHS^p^$ZCu%raSd3Bf|Mt&U~iUkA<Lhr*PHrMSb|9WJC&I2(Lm10OLs$
z(uNF2cL62tuTeR=v7k}kPKo+-s37upu6$pzrc~as1Br=ev0E{og<#Cwcy^4l7wpA<
z9vcB|PQ<I*<H-M&^eP9G-ne(3+tn8*$$DSq^j6lAIwI7d@*O?g2tMW%zrL+4cBGEL
zVAqNLEEL9~Hs$o)a$CBiK5c*{-whSarQ}q7y0dW5h62wP@Y*#qucTY&soQIQJUa3j
zU!`|e^n7W)cSG6T+8rvV?vHr-Xluxj>w|k*2(5;C!oyv>hf)5M^<9H_f=;q_n0fpG
zOHW4Wi&jQnPqn_JAIojA$?CSmZcO$Ut2fPp2W#B_%4K!_?lyqxeZFZD!S<_#_F4vz
zX{4$3#EKWfAYn?pxMtPE29tvF0Hf2o8H!+AgvYEueorqjv<7XJ_CFPR1Rm<6ASXWQ
z!r=~?8HY1WQ%X<#lppE=CnL4w|NHgSKuc8MxPH{Ob@InJB`6FobkF3Q8=9_8`|q*a
zufKSvlKP|>^UYV|Pg<H`N1svwVewnl?mr2bx#0Dp@KriD)O~iF>_gfCr3t?!iPI%w
zV2e6{7-66+Hh>HE;uK{b6TJ`6jt-P79wZmTrCuZ<R#6DN(l0eOQ2Z*HO!axTW~=;Q
zz4)EW>kDpD=amEUdQgzo#*XaQ@f!(f*>>Z1o*E|3E(^Gc5;u}2A)OKI>SZy{YG{|^
zIPFFD+Gos)dN2glQ+}zxOFP`+anAPhM-w1lE-gya)HDBBx4q!o3P)v%g$il1990r_
zQyEnNhf_5Y3_U_9VYLxh4FNE_aM2Zjv(7z?F{*bfJUhX*;ATmaxINwZMM4$@UuN#|
zpDT_Q1lK*`-COOPcQq7})C<CFKcI=z&}zzpUOF}2#q*sB^Y+#%1-W4ujU@Lgmo!G$
zV16Ah>=95|k<}lzjEKx=D)SG3v2z+0sEH{tvFDR~J&3k=<46<WV;6W|KBC_j(fp3?
zsv&2&qMk3@=1sC@06oWtaMcxRtz#PT27z(#X12ESXYfVC7UsTA>5H9Il1<OySjnYx
zVe8)q0Dpp+2BgHWHRis1t>f-j&&z~AJj$|$9=2xz@V(ZSo@%-crErhLFM<hvLgzfr
zexzuv9fx=ST<>}eZEM=ucw5D|jLC4p<E3`BZ<6T>%*5DUwYjA;%A#In){7ik-<=e5
zacN5AbBNjgVP@MPNUbYw838i!bV!Vk#pjp6-Wvwdc`~^(N1pp*DaBdKY>1|OA{Rcu
zF{^5!gP|DQpV7Lu5aCcGtORYmy5EdKL@`>Qnk9|oG-;@f@n1O6xgC|l;9E7URb{ep
zkfdj}+?kLzsC@8+PI;;ow$lR{|AW$cT>iwF<KBC+ZwDi0z*W^zSX_IwBCwU&g9Q`!
z53Tu6c1)hQ!$7roeuztHos#RTG30YBz55(0Ya2TX>B0j)vp5D58F&f5nqPJ58+hqm
zg$6jm<>dy`?fq}P|L6(e;6_>mO)H8HABJ{DpJ9L}ba+;*5OR@jn*28zL@b0-OGCLL
z{vnqOi9G8fDjU0v2$N`P>RZ9$&q6Y1hZqw!0QZ{z048gv%lx{Rm^>tqf&)EP3hWGz
z=bwH;wndz5Vl_`NX2eIf$~|9up??rxIV(U{@sKMVi|wX!g9iRsN*y!)Nmqw}3r%8G
zyg5?5o>u^Tk&;-zNZ~mnc0cq3Fob*ojX|*dRj=kltP%=#Mmc7N@=Qcf;d3D1LdAyZ
zeV`MudrWGZXMawyJa#Nc4I7(byMP|fNn9M&D&MY?hJA&AU-Sp>cb94w8%}RykD%=T
z4yjH}=-i|#eD@6d&piPzD*~VjBoR2~vZF^j7Sik0t`pzq9n<g_$=RBfFLE0uW=&rE
zP|4m@eh*e6=0}MjknTgprSeG8u&d@O%COV?NEz~SNyWHA7xqsQ6V9_J2!Q8KRpdS-
z1sdRp<<tlQ$no0m9LVj$aZY=WIPSBdBtDjp{{2&hw6^reD`aB-nnb)56I>*RgtX`N
z-++WvArVwSu%Yb#kn~F#mrUGu#m9Lr+e6R(=&!eEj!_ohO3fH@JKOU{o_5&LepI%f
zN1=_EJ*8&La7^p>OkH^F5%=%&#;vo(rm)h#nnb90i_B6fYv!51fz!v8sfzkZusC3g
z`9j;iqicZ30DSAI`hcMqn((9)04ej&+>DDJnuO6NBNtl13|rzy)P;KjL>l*xcB+Ul
z^2~W`C5(TRviDWZdAZ(^w>}Bp)*K-Kcf9=nwGbZvwdI(Hi^4s7*OIsQbBt=go`bmC
zV;uzrQzgdWHI=mqIn@}`CSYW(&TAKc@Z+ie#KErT0GsLlO$_~k9jIplZWj`Fhu*KT
z$Vqmx??zK-4e1^_D;kg_1ndHAfmFWG@a0zR3F$~xIjjrarvmF<?O2;ifY%Tvzk=fn
ztQq#s-wHk$Tf3_E8Me5tgEWa#Z2Vy-(>o}l{%Y)OFXaf@=glW3dS^4yQN5XH=#k7+
zH!eZOpoEg7h`mQd|E4<@euj3(Zve41zBT=*I1t-oJiLSnQ%6<lGQXneENd#;<oZ-R
zyQPtY6$THvFR7?WUoqRtQ~(#}eWn8)?^RQ5y6Z=20jvSmu9y6fI6_HSAhlM<yx7;3
z$3^>Gd`4{UUhm-3Eh`2_N0w(fxqQ=NPKzLwFnh`~7~c?56lZiufcHOWeM0Z&ah%Pc
zzg=e{+PvFz?}@QT5&yJ>ZmzCfW~YbYRb3y<_I!(eydBHAqmV@aVu(5c6`(c*-|z~9
zXR)9w&rUzZ{WKaKf2ql=Ha3kbm|0~!mK>IeH}Pfj*bl8)#*a!f9!xv)4%i43K{uK$
zsy!Iu0EgdjOdlb*>Y~_)A4}Pwl>rC47SgW)L8cBfFGaxm-t3`(#2)7Bpx!k7RhEWh
zeYl64sfG=1h2k_>9~-NqKlUL0w75Umq7b7sTBzS3ob#3A?OF@=t2)C@#C<5g4e8)E
z!%rW9lk_B$@`F3NND4W@w^fE;kBGLKND78PW}^%4^D4p$Ke{ZqShO_!$}%0anAvrT
zDV}zf0w>@^WkQ*_LumP%Sqp+G2ew;9=oGscAtfLNj%j1Is*h}lQ5701_*U2saE3uj
zjiIAIV!P%7WhBRmjVsZMgmA;rYybM9r@`DT=ezqcLvcTNr(IW!c|LNVi$v_E%ATsO
z>oK;b)sQm%ig8|~e*<VM;S*Z$JjUAE-Oe`9mi3N%y<PNE5+K;zB%8a^e!N@3$O<w4
z^5cCBd)+r{K^H;=$M)$9_@J_RNd&2AF+IG(njgoMs+D423zpnJDyJ9!gCLJH#Y$qD
z-ou^i*Qu*`HB&ye9;S-VKqLou`Dft=z_An9?O1kVKJ(KPcV+pQ7Lot*&61H0?4`D9
zw{W}Xf!07xh?M5{)1E-SI?PE_Jrs|0o4qfjz(2FLJ7T2Q1a1N$1E0CQ8sMu{8`WI@
z;2s?>HuS1>De~w3+|5`Mohmu(YjaC$KQ#2@4W=mjMQG!B>+zMzU;lUS`Bw2k(?2_8
ztWiklt5&Hr4vH99mUm<9^v<qK6Bt8%aIb(k&UK0LD(i1Jgi!XI96nK`9LdW-o5Glu
zgf_oo7uDuey~quKWffom3#{6E+K0VZ2C2NxAzz4{`>FFbKzVQON*XUkq7jj^&XS5k
zT)9nDV(D<n_8aj}h%le_Kb~;$PDvb2420!K$5iXiln2t>pg~-F`>Dt>Qoe+)Mp_4}
z)z`8R!Qo{xX6>+t**(u-8oaN~m{8$M#jQy=&L6&vL0)+FwngRSZRGb_Xu~-hUmt~>
z3QLhOsG-Mx__hb)zv*^MaM7<{`3i-f*@C%io4JF_?2UMxMG?rK7D)@fT(QMhokYaq
zuESJw7ES2Al1C>T0KcRHf;6!OK^q_GAkcgGaujr-;2|1z0&rbj-F#<Y;1vk>2sVTe
z9Ebq}A|4_%@ulvaa05{BjR3~bxHo)e^U_QZ@nOd=ejBtr;?G**Z86k|<DGB`E6BzO
zf@#qN#ugT-kdXiJa%f~x$2--Psr-3|6F?lwK3Asn%WiMc+GZ1ZiCN~r2+3`so!bww
zde|g-I~?Q(_u_VMlkYFF_D%D?^C;(R3;jMYhy9OX?U6s;oN<yD<~5wA!MI2zCuSM(
zd&mgR=6TY#Z~o84`={&B%-=d)<D{D?ud6|B;ul=0D8}R#jR=~}hr44;^Q&fA_U$D^
z5X{PfKV~cKP96p)TEIKEKhNA-RN?Pwk@NVF@H+`oy2iD;JE-%jhle=Y&CB)^rPyUi
zOm=*x0*^U@N~OH2X)xl_Qq`|DN?K=8gN4-}W};depGUfTUig2GbT}@+;chY+1v%yR
zzVI~2C4~Tgofk&6ZgbeYb#6;=zOm0e2qX?2T~j9@^>VrXX;j4`2;B;FQ#3rMm=K&@
zv<@Q$e&+$*y>+Np7;50FN01GxH@X`j4X;$a++o$WCww|3q?p-e4mi*a<j|cI|7{xd
z$Qa_fBR0vZSI-vt4}1Vg%79K0oI!&EfrIW?2oIK&&#T-kA!gZqkWe2OynmEE%!sI<
zY!<FQrp1e|y6e|S;J#b#d={j}l(*Haf8%Mns16fT98V`A4a%L0>HN9ko>!+WyKBBh
zZjNEF7!48u%sH<e9Dj$lHGgOwN^Hj;zS7R!gz)CP2}GU`Zm2xe0CI~*&|T`C=ZBLP
z;G*!(9%;j=9uibG;v7z_0F2+<Vr+AM*!Tx-<vflXe!exP>-XBgZPEWWU?8`I;{Ykt
zyK!oa5lO1%Ej9$WlQ#|llTX3Jh@jT@K)7TK;o5KjmM7g(H_a#jN|hv;jRnW^z3Z8I
zE--S^YN}30UfzTC#Q7P@IX{#|av^u{Pm5jTgdA5CK0u2P+IhSp1l?NNS{4;XWba-G
zaE!jBT~ek<aJ^!}gl4_Q9iZILhLK_7^_#8Y2=Wx>!C-Q2KIa?aZM8)+!g>>k2z@l;
zJL=$+x{if_3-f)pls?;;CT(%ADaT(uCu$Wj&xrpOwfLhbWII!kunkot{ugujR97dR
zb@w07DPKm>lxzvUiXi}mPM`S)H%XPqZK+!3ysUbx{vv4tnfap?H8DnX9hoYfq98%n
zH?LIF!5kRz@9B~NjZwC_W7KQRKYxALJ^?lh-m|=MgAgL`EyR4UeQZ4cJt`sz##5<<
z-uK-zu_u-p!Vcp!8hwauF@>2-ex-iydKD_iN_Z;@vI5)cK<qeJy09Cw5v^NAu_)-@
z0In!t3~n-CAP7;TUVjnFi(3G(<R!&YzdhZWNIT02TyMr<Z3b-_1*dG-YyQjto%wzK
zeB7?u$GMn4pHyC878Vo^cbjtkYVo9Z*zo-Q+P7Ci<oV}9L7zU5Ux1(1vI6YWUE|?^
zFm3$>k6xn|UQe8-<kt$Ls8~GJ8{q!+vd)~<Oyn2YSX@6&Bb{gF*+PGWfU@%fwNZnr
zH=Eei&H+m#+0#ufm*iVM2Q13xFB$BBM?8wM72uH;0MS6%F!vM;FVmG;LV$y!AI=f|
z^~=_j_T1zd?k$uSbKtdML^=yC2KMTu01{qwRgRGu9O1*eJ(f6wBe{c~8RmuiZ^44k
z$k!p)d~PIltJMteb$%8vir4CiAvAaqDl9wA%2yF0$O9*ZyV8xDrOJJxJ2(3!{!Hfo
zF+ou3PbLKHgpLO%1(=s)s90RZychF*4pr1Up%Zr??tHsw?W(9X4|q9bmHg3W><?!g
z2#90{SLTV|ty*64IReS-aC!(*0PsUiQXmAsRZHe`dWsf(z@Ji`?B=}ZziAsZprGOd
zU+VsdKR?gc7GwBfXCO{%oA$KreJ?h3w#bj&u3Y_a0pWHprcnB$4OQgRp8BExT1%m<
z`g`5+zrMEY={uJx0UF%Gs!R!_ra#JiqRtb2SB#<IHoGLg*5Mxl195aPSvPOl<&Rg-
z3!&!V0tm<k8nJz;C&YUGpVH}A5Mh(=baE!qOO`86KB4Cw+B2tE-()ziG~aAD?g|0(
zN4c+z0iEdNw2R_UBmNY<6Z|_Zs-h00PWYmtOdxV4o-WK)snMeoRhsZmk&C+qe~p?T
zhitU>lkw0${VtQ?%egNH$PPVgT5YT!_A5FK;uXS_t-_FX!;OAFFPVzs-oJg}8oj8{
zY$(K>68C#TT}YA)vbi#T0j&4DNj1X8Ha|_L&bO9vCMAY?dWYBNWx3uqO1YsJ?RrCK
z<~jg>(^$z>Q4+1Ef?_jp;2TmUMG3{mzc90LG#2(pBQ0LqSBcUIbc<#(T6j_Ts;d-<
z16tOm1l}R1b`G|{WCie5)e5jCLWkGtZiKNW^K=o>wYfz^(|b4j^;=r<eJoZ=n21SM
zA<smu_C{)qi(A$>Oeg~SXU(|-0oGE&3g|e-qM*OMP$*tFeOl8}nDu%j)Ldxy`ltm5
zG+zz1o#Qrx``A9fj8GXn1I)x$w_Rg2dyeDjdiont6NOLVd#-6fv7st}h+zmMVaDoE
zxunysf#{dlPKl*Xfp0{Zh}qx4{s+h3D++a~S<lxNPS(XgpjRGLZLgInXQE)@K=XRF
zOuK!X1c7R9fGrwdRKONI6|5<O;0AEaaka$R^pk@m7lO>iC@hZ@T-E~vlSX9*{Er5m
zHXqq#npO+)<u{#)mU6a6$E<2i6~T+JL%0sWzTI|<dN1DK6G13n;jhe=mt1omf9QMJ
zP8AD5zM&m`8QKUDXJqJLKpNgF3eYYc`Y7#O5z(oen$#cIuIQqs78*q`#C#D7CG9MF
zNrQmi`YxC<Be}i!n;Lau#)~kq!t+{XE!of*w=>;Kpv{bSZR-Rf^lAn?e%pYwl3#K|
zgtt={d3N^*FnOC%EiamJWtkcS66#`ZbA@AM&W{A7X`G@6yy#EJ&(BYgBw8h-Yorb5
zZGSX<Tf+?gK3^06IWZW&rKbazeh!ZzJOAXDaust63S65J7D01I>!astI800xZAT4r
zVNuAL=;_@1l68(541CP7-oR6=)|Ml|rBdP}^4HAHx49n@!mmzphqVmvOms+|%j$ts
zjw{67@{?H!Ns?S)d4I95$$xmhs9)lJct{@j!r{13h8q_cYsG^S66VuV>q25}&*tus
zv~NLCG>3maTdmBe$~r21d=bZGlDcuL7jhT!nlQ)@xx3sO^+Bk)nLa!`JX>B~-rXPn
ziX(3r_L)tHs{xk%U;R%4FgQ!^Kg#_9qz+6j13k=v_;4cJ4%HpmHPvwF`RtooXd>o!
z^2q4scakYe*TX9H<-zWfos~y6?ANBSLe616SRV@gzXsnI7AlzsWlz#GkMH+o4?H@P
zv17jtJ%{joJ|-Ri+aMTw3a)hi6q^2XsA{b6V4m$<f>SiDYv2Hn*;Sa>Rq*%OF&>dc
zAfO4Z5+BeZz4Xp3)z8JsnbC)^O6*X#Y#P|oy|Yu>qyeg1xJIa}moaA7t)CbEEqna8
zVq@tP;Lrr0+yI+s%XUCi1bzTmMdOAZQt)!5$Ebfv=dQCFI}GwmJoH;B&zbE8O48t&
zm*9+>O=<%Nsr+1VSTp&Hi?HRS4b)sk2D4}KlM3E`xO`xey;W{{65wmzcC3NB!=?rW
z0wOL?yO;$db3!fziQ};xkUCN7ja1v($Sy^*%Cz8*1db>0m@j4rdba)2Y)o4e<YE1|
zkYInz!O)q#7Upqlxgtk|Q~2WKxn=x_H|-1Pon_4=|A{DD&lG{YHVic01&ld?Nsu+t
zw14YBVK5i)pJq>Z=RpM)D1-{g6dM!ORHOp1om~8RA{H*EbRRp2p%U7sv811UWjjA!
zQ78g#%yNZtt;qt{v*s2VI{*DHGYv{Gc}n*nPREhOi)6LY=el$`Z0x<P4H4hw$b6h?
z_ArB^>wT=b4`7yJuE&J)_LE#4DTxyKs<rbi5^*aaK4Z{32?^-V>xxYdyT+ntGS1Sv
zf{Y-elOnZ#9HCR12|V_{QlINuEoap0lN{%a-FAmFP6QT6goq=+d^}j(7vBkn9<zYt
zSKL;B=-b0yZ3O`DN)zp>#p-S<K&3g$GaAK`LRPMLL`D6Bs;j~wwuj1}MSo&C*g`_w
z`zM~c*$oot71!^WeJaqwlZ)aK7g6-~F7tI||D+F}<0@u7q<dE0@@YooVNmU&HYtH>
z9XAI(V($Lqy_`47rjQu*g*hBD;S1lkM?Ar`)B08q0Xv|X919lyHjA@Pqq(!>mt_uy
zu`YepgQ<BJSMTNy%XwTcO2YtC+bN6#F0uw2016UFz9=ASS(qt#z-!>|2%77uRHn!}
zxi~zUI6|ZZDwK#+O@)iG$WdhFfG{pJGm|rFz8gj%Q%c7Z_plC1tHSrV!Qj5X&#dl4
z08!ZRUwKX$uDxzEAsN$8@)fnsH}b92SQY^vSf0RQXEb0elL<4H3i!jvt<cFn?vmky
z|Lx5fVb2~DX}S2p&WOjlpa_ZttGL*wwOJ*VS&+_1Ms#WAJ~R7pb2mJA!~dpcq$p+v
z!GX^JDonv5cNIK_0U?*M5dhHP<H0^aZIhV{MBBr783mq7HVq_P&B2{;LaHmD!4PRy
zCRck!XG7P44?JC{`U9tvIx+kjUZiH?4@CU^8wfbd_sWz6>hJmSSlN#=+}GQy6E^Q;
zD<QK_eAT8f#_Zb2th<z2rzSX51^y<m!(iFP9cT6u9P>x9$pM!ejO~2e3J7v8Chg|q
zDg*hu^tGqQ4tEqPMFg1M8IcGfmzi-xoxZr_bCUU{5fRu8UiwSYYTlXU=gXe`5~WFz
z!2Bf`s^HJXweBAO{`N<Ne*i#*t>>__;UsKf)h#x`1Yy^Dx^QD!g%~hwJX|ff`L58*
zM5{u$+%(Kf+cAPPe0A{@DI4-sZ0DSqI>53U9t~B2ekb_dzxTt8rp5o1e-O({t<MnF
z2|LQ1zqzNopq+i~ez|3k*t}Nv)La3Lc+@+DBlUenzBDLvW(c_|gAtv%wabOAu#0#Z
z$@`$50)xrEc`c6syunsuge}GgsZF|F7tX8PnveHYhv+e?YbK+~+BTY1?IU}Nid06<
z&Ih0@&T0?+(@+N<r{74HEKC>!ife;+U?~NU?*X#<q>K;~CNtu|9yFx;fQzZ1p-^9f
z?PjI({Fo`xHw3Fn98>x*UK%w10~QNbSu}nMu&6o%SHRY`J}t<dUG2-FQVs4V#l=L0
zX+>u6LEDw#%o&sq$Ui4^dbjJEVhnh!Kn<!6LQ7K8>Hn${p`skkTu~+EP@PwMhoJBY
z7P6z5#bR{K4F8CGIy9}fZv{zZ5X^IZ_o0mAqN-=FA65vRV|E>9pN50^5(_j%CVPZM
zYHmlvCr#GHN%@VuzqQ9K5$}5<3rskb0^j<QH-edJ&O_dxSHgrFzTg&t(f+hKtnBi}
zjFyqX;$f<MPwod>PI;yNPJ`$Ny=@^N?P^gRyw?_CP5f{+tfXkN@P_*U$pjqOqNbwo
z2eB7E->SJXN#?zpysQ%YA80j%^V#XDybTXRt>w#B-%jE~P~LKR&p_!_hA)vN0cM@m
z4pHv;(KC4sejN@F+^yKYZ`d-agBH=D(e-a?S<pd^`TwYp_zcn?ihyJ%z=S-k^wJ&i
z{Z~LoEtL{_ILA3)8>=HcW^SGZta`yxhaZ6v*pLYvhnnh2<~yoOKwDR~Z(wN)llD18
ztYsRk<btZr^>qtkD>08MMTOfN@VsRlFQ0Vd_h)=Vd{Q=4cAiKPco=c^kI^+NpQY*f
ze$0m_P>or#ybrx&irHE9trUDGy1#^<PycaEG%w))JR0;~qKUN{jlFtUY^h}_XsrzR
zZaJ(J-_!6@?;CYoO*PhuH|efVWncPj?ky~g>)*-03f<zq?kJ*p|BZD$NB{j|+@@es
zO36c`l9%|}8>vJZE}@GV#pWb0%Wm6X?|=9RN*HFfwwy8nsTIZG8?XT^>X!f6e=k9h
z0#e$^nACtk(8(@YojCAvNYwwvE}Oc<$5+a>^$t!UkF8l7nHzsFLqoU*tW<p~0+f<`
z#bA7Tmhs9By*EGFR?k6RCamxEX6JrqwQVS~43;swDjcN^8hF?<-Fn_0(|ipxIoQDB
zA6mkG`Hdg&&2booWw=#_P-e4SRA;2vW0#HG5U&D)+J{xH5E_M-82k?b3Lq^^fH8hZ
z{R5oFk6W$0(w{u9QdT`Jw{C3|`YIl}hPQ7Xw`J_nO}De+gs5Q7cS&Zx8N`C&Q&8s0
zAEF{(eetK`64AmL+IEQX6lgzME@{-Q<~8%RKm%KTGHxaTOY;lN6$6wm9Necs?*9G;
zP2VVerTpysx)ygC|9<y{oaIL@wN-+|@Rfw{-`i21w-#FZW>e2QSjX#uMz{S_kb91C
zsF7U`Uxn>RM|G>wpYlql7%r50=h-i&g)cx1*8k8V@KrAM5;(-gxkV=U(&1@X${ysY
zl+&?D6A_`+c`HI7$)~^I)WCVo`ken;dYW+&rhTJK$h8s5sgWs)AuzcC_Sc8`v9mUQ
zAca0l0F@Z7dxYhacn*8A_|LvA_pCQIbe9}tBd!wV>?9AIsQ0OvzOJry-gA5vpPUY!
z>!#%2eG9(1+E%cA6%d%$1)N#ufT6>qBOk&o9tb{s^7CC3rbVuEtq~KJ$zr$W6!Ft{
zQNI7<HSUe&nKysLUjnUIK`$`P8SRKS*_}T}{hq;HiB%N798!PQ>Eve`OnONA9Oyu6
zFhsuE&Ee?D>M$yX&^_?f?6U$Gh8b&CsaM_Dd7I&L-rL=;0cdW;6;f9I9g8Jge3S!q
z??phEH2lCt-&36Jv9;S>$q?Gof`r^`+~`Yt{4$4BC|L#1)+3V5bM>_&<t!5ZuYoGD
zaqh^?9{Ai2>>+ziHD06Vm%OdO;8b@^+Gs5^G{SD`-(fyi<6v1)Vly&L(w!jjU{v3Q
z+QRsSi`~^GOZqZSf65XYay1yoSs)@kO*1@|E&$5*Cjn8=zG}b#Zy7`ze-wLS;#$9W
z^%w|$mM*_~5_HvPhx@Bz&F|%v#G&SD2ZDmq!|_Syb|D$4<xt%|M09IYV5KD3zAvOa
z^Y7FSi7B7wzl1XvQcjTtNHA4tTk&bI_>0fMP%8h~!EYBFZ%k4s6uqARD(0aK;we65
zi)u7^`nRsb8F>`YV+dcu9jQ6uS=Mwx*916L<nuC;IBp~m2mDcm2BV?)qs?aN3sXFd
zd|k_Bb7F6mKzjB!B2&HadE>{ZfrC3MwecUVe1vlmLAnk(8?2zY*2AbL0IaR3V+1Q+
zZ%<GM56`LB!lXQkcs4%2Kf?y2K6t+Ry*`;Oyk!due>iwDEBJq;gvx@mY2a;?A>7Y!
zOgHL@t)A_QBC)CcrT>0Auh@$gPT<t=kV$fk0uD-Zwh#p2VY-m1hLrC<Uf_Yz_@z9@
z#)hQD5^f>b{U$c7c#G=gI%AN?(14*@3>Bt0-v);dF8*GXE@2W^QBK^`k=jXmJ^009
z{rzC&iu!rN_eW)}+H<t}pZT`7eYDrKj~@&7A1V@q2VUT3k=x8XS{O~~BYH2!>b|o0
znF8XXEEyshFg+^R9P$^ov((P{ifTNB-!GwiBIvZ?@~hZ#TQqJI*2j;~8DrlHKelQ$
z|Mh&<ym%LOVJ8=xEY&5Zwn=Vw45PU-v`nx;E!M)To^WpXmXIR&;rGXIB*3XzzHtpZ
zfbVs(LLUGp_zOPyHHkV)8k9kbHh9y=l`ka=u-@M!Q)a#;-mf_~u-iy}?48s~!79wP
zG|Yo1E_eYflbgK$_49w!o3Y;gI`FmpHTZEOy6Hur7=FjDYh7S5xmRqn($IL!s74j&
zW?D$oFKy&}DT2BTW!*$>cdj2k=m)Owt+$W5ES1%9tUav?DSKWHefV|zkjEexcu_Cm
zp4P5z`t6@U{rHypK=!ccZrAPyc>agb2g|g+U6j3-9}`-qRCcb38+>>R5%mM~k|(>y
zfC{zz-GK8R!UalS<Q+&9Oa^8GG+yF$X>K<`V2kMl0Tqyo6eQ*$>c_rrFYv3%Jv2m$
zzF5C~**I>ucrIe%7ka9nOdX%WWGFTfFG$&0nYx^}ziU9b?};S;Y3REzhOpdFoHBR7
z)6sD-*90Z!d$usVu&4~97>6B+jpIdQx!%Udep?bkwt%qEIz&aM(f48E(PZD}_KCa5
z2bk!=y}OpsdQqNyG{N{Hwa~y(8WR${aJcb_9oc@>;q)ClRyoWYk{)cdJP>$W6MQRG
zx9m`GAb^96mGm6boD&4V=(h3Rkp`an5HDUI&CCqVNwotOVT$CrP%fFJjC`6&$_=rY
z9q8ei%?8E|c-W=-T~PL;iE=IV3p8`Hfcw#vr}Eg2Fi7x^GLX>GW@Hj*X4f|P-LvWI
zU#hXI<|*sV>uH;io-te2s9ROSCpue+nx<rYDe00jUXmm~3JtrquqKXCo};HtI+|7n
z&q=c}%CTLMjjmZ*BMZ20=HFqag+p6|NOQ~=V)7no5ZO(PXq)?~q8b7WsRrZ;U6YTB
zSrRocUwGG;EmRUNYV89CTA{E_CtT+URH!8h9Y?-k0A*B%QB+_dqKY9z699m8!TVFY
zCL$a&V`3hc)5z(>vMeCHfQ<Fee<Bs7RTIW>qYGUKmsb|X{^*X%psoDpDYRzZ_kL9+
zz@wQvOdEJT^|CZ*+8R9gg7{BjpA+Jk5=r7fu;ElrqfPVXZ~oPP!wR5b&7lS$t_eb9
zw0!k4W&-LBm>_ZOh_<T-PhyX)ogo-MJ-DL~1}d1Jz6t%hCYMju+HuB#%Mdc4B284&
z)?ipTmHLYBc8Oa+D%-H<Vm+Tm^6R|z_VkD<t0f?0=M}I#08@0&66;AH3f_N)hIZmg
zK=-s@Tm$GOxGc*h_g%&_pjT^%)uQj0>PKS=UDaP{m^f98o8Lp|>rxaF*dY1|^oO{$
z=+P}u`l%p3<<&|}CztSNu*aT4>=9-5dxrVT95x1=Ca)?99;S|^oTT=48JiK3?cm^A
zecWqmL(zNDtWj^6>sfSdJMNdzydrz$ZdmG4hoqAU1mlN?R(w}|D4?trTZWi5-k@y&
zcCm;)z0Wg$2G5}HsdG$Jbif4+*J$2FR}xmDOp?^j)C$c1VFmz2;az86Tb3E{8UYlr
zX_7M?@XO5CNLZfYbB^}xFv7nTk~KcxJSz@SIr*21!VE{mkl^X`LmX^NK9<l%YplXK
z0Bpqr#3OHi^)PJxJ?U=A^D7*^em#jcjXer!)dzl53DGYH)9MJeMNH%hYPv!sYU)9c
zCY^t=Mse_Gkr!1d!uUP6{Ek1d+-N<sbx_8J$O!f+5~`~(!a^IA`}|S6JqN)@ST+Pn
zFo7F1<q6~J;plSs!Q{(wDxQR7jP${DK@PQHFoFKTe)dq&oB-6?B_;(x8Su9W%7h{s
zKNITLOoBZ_;J1b~;<#|kqRe*G*=De0fpq?ACENp2%<DL<auXgT#7{t8ur9c~=?wGQ
z%n-9ZS*Ra?vvj`SP*9h7jkRPMY|dl6CxQf&7f39d4CLp(hVRX%&pGOImh@^*>!dx#
zK`Qyi?ovi|GNCz1V5qiDNX02p{wY1G%879)*xt4D7svF;1kYD2=sW6{@Iquy`d6ya
z|Ko8Qa5y5I7QCa!2zPXTV2sCfDsl?9%CQiBQ2;dok*D`;1i1EMtd{;h`2Z$GEcu1`
zpVm;G5+H=YBo643#i9i!a!FG>sx8v{%Ng3bh#{4e?dcEG-um?q@VL9*-fa2IH~d|J
zLqvdnPm`2lsuk+plEV@*Q>)hrLof85Gd5e-Nzbp$1%19fGe?h|q}<m7o9tQaoI!G%
z56zx#=~XkyVjrGt57L(gl=6FCdJ_vS(0voj4L8@5Il9E$Z+Upug~Zsy29ZvzAmpvM
z^~3+bcyxBybpSbRSTYVy3w)5Rl*&e1h7iR!>&4?1{3|aA0(Qj@z!uDYe;n-xn{-@8
zH6rqU1-(l4i!ny8`5lk)5uUzfn=hf8U<TE)FW=VGApS~E7^!>*=Gm)TU}Q!?yP}lW
zt%s|!O6TK(BF;LZ8PnZ|xMc|mg6wy)>4#AOeb+Ggm_f?!`HR{m-_3>@xlWY}mjZJg
zeNj5+$16246HBSd2=UX;4>EfXNxB$+QFUM_M!B5}9cRl-kH(oVlEVbydryC~BOLbS
z*JEW^hPt~mpe{x}2FRjbVALU$Y*dPg-VU6P8Q?;>Y2etsS0^IKjGPV71rPBjHA{pe
z;{DkOufY-WA)<QPzyn|SC<Kib0Y(AjxMnplvNTH0U`kV#?x~_CR^!E0>D9k9*@i}U
z2z=%WOUB(4?tEJ3e?5ybEoQado|@w4`h_<WQn2^nazG`K`{|nUuDxY>``WucBHEZF
z5aT?LKS5^7PJ|0x0Y4ON%fgW8qKP_QhMYPt*ET-eA13Oqj5+gP7ws<$B7S8kVVxP7
z+OZOLrskUO`qD(}xk9-W{RW@`{FP&c49rg>fsbPYpeE^;(IOLgvz8udq^_<|D}(qd
zo`!i1$Rh(t__&eZ4Zx(HqVNgI4~lV)yI%7uE{WnzaV7m>U;k#3ePQ=U>7{c=F@-@n
z4qzUQ(wx2<H%PU(pB2b5sn{Y`UG0VFn4am((7EJ#wiXwD`2MjK0C#JYboeK5#Psf2
z7C@90a&xz>An>S|gOe7;`etBMsbQN>%HDV7Iix;xpoCZB^{rB=oHN{*2wnC1EAB_-
zpw)?|O<y-s$!yh(5kF3EG@8}}E+rrODQ2V9qBzF#ufQm5@5naK-({?GggfhJxtCU;
zRyZPy+;2O2$3@hcqB>aUI*sToNE`6O5io8T5)zz2-AQuofFX6&+9hUYb#dsd5DO2I
zMEG=Ol5x)c0Mks#Lp7)<_pcB+Pc#hs7%uBnvafg#Zgl#$u$E{?t&B>zhTW$p5qqWY
zjvU{(b#S$*FLK~cA<?=jm9JmowZ7na6iTXGOb<>L2IreRd|7A>aQdXO#MK;<mD}<<
zw-@q|Yu#ToLWh~%u;hD)owqy#>P3;p?Eg(I#q>bIO)I;p*=@;GvHbCxwH~or3IukC
zt*v00r0M^ebR}VYS(^1!RZAJ7jkRJM8=rNizUpT#mnk8D42(jqFX)joeHvrc1a!6s
zuych7Q4m?ex^7}%G4cLFDuGO`MI$4Jp^N;i1Z0S!brP){!N(}EHb5CKDTw=bG>fv4
z00)dNOo(J3D!7N7zOc<I0ZyM<+qh~JD^5wq+cc$mb`~xew2-tA%VNCj@obb%T#0dC
z6)%ko-%=4%&fthTcz&Z;mTHpA*vUtQ_P)qyQqnS$m(<7C_>*n)TAd-dXX4&2@sD0x
z@G$-t8BeJtSF|nfS{Mw%k?<_-Z+uoxh<h6Q93Na>1;7x}!0(5+xg;pP7GjBuT~E5N
z&d=EvaAWuA&>JA+!dHAMxtQd32M%FD5QiBfvgM^uBy8`GIXixy2=QUJ-0(BIoHZk~
zeHb^Upkf?J{N>P>3c}a4Bkk;vVedy5H_7&VdV~a~Z^Z{kmf*Yw*m_cw$5Ik<4Z({{
zAo_X;nMz9aKF9WVm5q%J$x;rXIN~H=s@L2gXJTa)=Pgp*m&UNHC!hZ8vnDlMqe}a4
zw9!VgSzMB(ZLTU)?^NKdr^5GR*6{M?{F%QFwO7PW5kHA~`Qaa`Y@ZJ>i8%|fI7c&9
zrvt2dsXvAKkEWB@)Nhx>lf-k1L`oE-Bih2S-!}4C2!#^l*WKahszyw4_8w~7a9P{-
zGngc?v8;2P&;NKsn|qm7MAFyuwQel$c^UVB@3Sgc;Olpn01YTSis42<B(tU&s+~WX
z)9K2Ijaz4j_(#H2AAGRz(!K8GeCTH(-;TxvGfaC?*wF;-zyof}$?kM_E689s*VcYR
z`_<8HoCIdINE?mfwYk4*e}9NDrv&6EUoYX5=lpm>kf|{c*|^dkn36H-$X`t#zJM?L
zxT~9$wX+O?+CvuJJ-6f$(=OSoy!$2o&CGhYL3?PycDYg}d{Xe_L8D9%v?Q`As!n&s
zH(LAs%=Gnu1!;8GtY%{GCTqmlZI;WU!uT7=DAhG!PQ5Q%SAXTo&4FN4d2+eypzr4+
z0y<sT*28!5uJaiT%OEQpm}TVs<65Le9KHpEk(&pT=34sSel{@H2`r%rZTMRrYeX%)
z0eBy&sG0y+cFz|18I`Tau$;0m-N;lF=@u3#uj&8EhQ>_dL<z5z6Gvo5pjoK2+Qu|^
z1__(i4wDQt2mfvbNOz6-D%5gg4;MK$T8T^Nl#*4saJvUDMy@kY))-AyrV1_CE_`!M
zk**!jwu?)w``#4-3G$b&njQGiAO116$(FEnu3N<*dqc!(1M>MyD>@l4+dXD(4=2<I
zoGGCuzgG7cK8*3(FepF2Br^gX?+zLA-`kiKW`#p*%POMy=%-BCrRdFcd#QOS3iDk?
zY<c!_b*LlW(9^P(4Q)wT3DXWH2|C>mlqPZ{YZpk66keAxy3+3;r~u*BKl;?+Y*4Xz
z&;6DKFq$Ge&4#9Efj)Rx=<jS1q*8I#D>Qqzao0KuX`AVuUl%!NM;CHU8RWN`t5Q-(
z6hfmd-~7Gr#mQ0D92BjmL>Rjq8decXnx{Rps>i5CIii$K&1-t`3&or|z5`W4`HD2%
z1e%m<C|n#OyIM4??9uhI2CY&=tk9<u%pkt4uh?yY6NJ`35B)Y&vfopP<Y`d=7Ecu<
zgvA~33|Ri<b=Vi4OPOUTOiTARh1){FdCADO@2H@kEHF5TAOxr|0f9$lGpWkJHzWf+
z$gs>UiUzPgj(XPef<_BJ_u1TRiN{cq(0_d!?wl=6EN~tmab0=hVpHBg_!NDJ!>--`
zL7<KITh)aPPCU+%J7(o{$VKgA*tvmIO`xf!d7lgIiC*iEwNL2nP6s!`oLd8eQ0BI%
zW=35KB8ea*c6|`yC5*Bdld@9NG@K;YFf88XhZbmmsQG(=9c;6K2I@$1R``9zE5Q@+
zTMTB<+f!R9iX9N6gS)M|0^BA;68z>q^3la8RRiLWmOP?Q-SDCqaBy<w!Eb%nfvd8P
zZ?G7=?$KyHhAibRzA?t2BzDLc1~KBp|NhfO26e=?v!kbLzUOOJUG0afq=x8IY?;5p
zzhBD?FCAx|F63QxC7!G8lJwLfm_P0S;h_IE-BeVyA@C2`$<!ZR12Mjq?8KRg930A=
z-vXcChW$J-?cn6!rvridq5Q=WTOKoFIXp;7{!niW-KtPT6pSu_St)Z5x&|x|7?FXU
zq}l7nCt9n3OH!|v&+CN4cfU+{;t~>R@Z(6><X&dg8QH3h2L2%%bI6`n?oK4NEjZ!H
zhaCQ}E}vq$KGnvYQnYpg(RY9TF)(Ok(>{t>Z$mt+p~UI5H6@7T1wG{N>0&7vqI7k<
z8-e$c=i5=HRw)D)Zso(-EYD!@`*+(9Bir=Vv|pxP_zVt<Ze8dW`jFsSx(_D|Vsu+i
zV_s)>B?!;|a7m=c)~r1jwqSyU;Hfqi21<rZ(V<B(B)^v}P|cDncb$~4j1h#QU=rsu
zy@o)$J5G0EUYj&hk}!plozkm297hiGa7(TuNQ|NS>(7IC4_z@Bg3jdr$#aG9(dUKi
zfJ=2Go#VeiV&;_GU!G^&j%~)?u0@@YLjlKZ+O-vpR%2Sn2<;8IALUX<K@D|qPRn<-
z1BPSUNR)2^%UsJQw=G#Z>X)qHSQnSjFIy5E$6;J%TPUh1Lg-CpBZmXv-&<OJbjj$L
zw>e>Ax9$UwwhZjy`}FA@IW(lDNYWls<YOLa#bfvrCyxNB0KattXM5hcYrg=fo>|nz
zAFX&BpbAzLKS~C+jJQ?-vzrGDT<g(z9_re1Jvsw1q<@SX{waWT-jPX?Xg!Y?tZ4T~
ztCcV{W8Ch&<U5L%l}AV2f!B%aPu>?Tz91QehzYV$LK#`IV>f3Gq(j?HToQ66clDyb
zq5d0D?X|@&e#Lgz9T5VH+{CAt<zjLGd$>!rKGcYm8R0~?NE>Fmsof=F<A)pQvh{Rl
zX{MlJ`}LyAcatOZ$<dLnVy*bqHmdLVvM+lf4K|<+(nwgqg#J@-%A37W(V?%y2yAcL
z<e}7Cg$W3|s0`H`{_4<*;99f?usT<|R!Iaxn>83<%uU%hqEk7ud?Qa)r3m`YtJ{=<
zVOwcl6l@eLc=+0wxj+;@6+?GpxlSY&b%yoMse>$@wQn~i?QJw3f&8cTD!ARaMFrO_
zQ&W2dEy8rznr6B6$rUt?G(e^AWRZt6v!#w3Qn!v0hX~!@uDrVo+}k3{XC9mqTQY+5
z*faH_@$>v7GNIdgkLgzoIER}0ub06d_JxV;;`c$?EzOW=!t4)}rnC7WJO1sq48Ao~
z&#%oZA%`AElbb_(IbBDECDyU9b%Jl>-Z@AsbDNZVHcaGR8X8|qc8s~AxZhTY@DS9|
z<`Kfb4rgmmBBw`UI(?pf{kzt}_0188CI#|+f8OpBJ!S;JVFXf9-6m&f0SGxv_E$^-
z5)l6r5a_D|$M~VMiyO*-%`{lo+djN@KNB$Fn4(eck{zj;aAkW@zBr$HV~`>H#jIM7
zMi&m|uY{j$YN%ag*eGuI^=zKRhk}>S`jdkA)vJ0G4#c3juzp(Q*6It8s|{tAU8P@i
zHP&P(&_!zDZJll@u~LjX&H?3*<e@`I2aM$LqC@dJHp=pCP>~Ys4Li=Mb|HE@U)NjI
zJHPw!0n}rJy8LGVrg(}t8uYP1B`<o%6Kf8#W4Pt&k2mphlbchAZ=P1VQq~og;8g3F
z#O8i(&*2XGo$tFL{z)@BRCWKPUw(t7K-2yBGk6_~h#=pTP4$uum{dN8dc@$q6Ua6K
z(_SS8{(n1l3m{|?Jfwt`1f0G}#ckFsgv+Xq*>;;$%1Y_^TCxhsBk$3GIZ&=tcf4uO
zQTthEgv8W)eQ*m0+sDV*FM`U==)M)WhPwY%0y_aFCHu70?Wp1Jkw{RD3?d43&F@OH
zqM4pU<%_L8>298WZ$O8jpt<y^fGm#!GtjkndBj<g%dKToRIX%JQ-Er{{aF0U<kWO)
z=wFWUcX3~?d^BG~P*!i&3+{D;p$!dF1>lpv;&RgivpC>7P%rZAenqG(3=_u7>6<g=
z1nKW5GkG3r95ZhsuSxiegoNaxhV!n|`UhopfV<*sQEjA`5Q6UxRQjZ4$pUr5-4jIu
zLKjb&gJAjR{0Pf<B<`$PKbtC}Y)+~iSwpwdNXaGLlUXL8h;6+n-dh7ANxzZzAzY2H
z?P$kP2%hp&5zY_AnD6vdf{pqgnj=4=M-H#c*wBve(>BXYey+n0NuDEMCnuuP`&3-Y
z6;`ghMeV5bP}=;(j7Z|JfxV0-%gms1l*ga%6>-dP`rpG10!SFC7ap`4L>y52?n|dd
zGj3w-zgI=x^*-Y?59fl}{j?_VpI|<N*TcNotv7X<lH3+i>-}&fg=mK{a=gH$w4^2F
za>bp<6@uh{6=Jud>aT|2!*aIbf=_6;HCv0S?{M-)j1sOREUq(p6Dl!m3H}6V+#LW$
zv_XlKkLiIS&@kB(Xg-}}BJeXA*qj^Tt*?6(&1=j9K^rsM=^J_^#8oj${vnob3=A^*
z0$1XMMffjj$p;{^FKpKua5iiB(htFF266S*g%(T7%QVOHJlTM;s?a7QGeDxB`}F>m
zmvcM}tcSV%9x$x+k8f{9CPeXJVPx5rKj==T1LZ#%!Nb(&h&*Tb2HFM^R9;J#M8VyC
ze@mZ5#Vx)j^}JlrN-pYvRePIo7~Ed7$VifZ+3$4RI^AARgNP4q7u`~hkJ^5DtN`eS
zqCG)ZEPg?}YIjC%T9+w13k&1ksyhAoHzK7?zMmw0lPMGl5pP{ymtD|z;j-Yt-u>j9
zg2#q{ORYSXo9w^uxWi%ukA@TC@kS6%qU#SDQN%26tP1^!OnZ1-#ELB<dpVkuuyv!f
z1#cgKpI{cH)00VuhWVR}0iXZS1$ysj{>738NX}6!b?qZkE{a-xjKXPZ;G8#uAB<cM
zy&cV3L&s>MHlqDGBV{%F_g2<~^}+TFVQwu_uPh++gJD|RPh>s`g6fD~C!$ZP$ve-%
zD-za6Ey-#KMgiWOS`s|SKmk;`g1)R5^^iNS*zn%kfp2H$qdc#YyViEveLu;*yA|v{
zi%_pp<%P&;9}4;g(nNU}6AN%X)0^3>aq~y#^`5c$n9O?)Ba4MYxtk*@KrOtrNiI~=
z3^kqGyEad?f%+TM>5}hkiBJWL!SO^#z)hq}szuMvL>6@gZq-^LiD60AhO72N`8M3{
z7PBn&lPZAo5w4qbHj5jd^&hf9qa?2Ox%RY>qAf7TH}uCK^;{M|y`|xj$vR^k$G-a)
zHzk-_%4IT<KM5PAvSV8_PyYikLC(JCTGo7)E|s}_VqjdS$9gltej)AmzeE>bx{~QD
zrF$79B4ZBZBi{W-Wxqd0%uLr~N9(}ww`4X75Yk;#hRmlX=az<|fNB%h&xWR!S0F(I
zi?iZ)rpEv!Dy@Jhr^2I#Edb4R;9)HU0I>kb1q&)&J@n8+@%bQqMJs?luarTx8*Wt&
zqqayAaMbjOr#}i2HI%!459z!2L^kDO*@0}MO#5tC46=I+=T80!-eE|CKU>A9*?j@a
z5vhlJ*8DAgDP!Xf{;a60;3~l%TS1`U6SZ0u5y9RT^{FGQE&rWd8NO~6dXS;xm>~Fa
zd+o*bx!?6qlpVj)#X-V^F8<o&6!=!)wP!9y=D(eT`$R`S3gu#^7}8gm{!A~(N{)Wz
zf-D52t@=ox`#;iT40D^(n%M-r4^AK_>Z~zWPvsR$O%-$8bI(0t=qt<ibmc|s0qbxZ
zV7XvHKz#tjLGZ4-?utp|pT%<L0>R!9Mv4kZ#JqT^nBWLhK>%_zDHL(A{w_p36vQ>=
zS0i+Ahe$%37}~aqvl}PDiD=yl@cZ`4)Rt_I<JCD2)Z4S+LSPghE1Het2IYBYvSuPp
zUxkWtw%*`=sw5zTZOlzx2w-YWX|>F9`4HFgN&5u-#&`g=PpjJCn7?5Tkmq{Vf38bU
z4+$fZNP%xWwV1*M-vFHal{56hFTO&%*YZ{hCyiMnV`@~t6y<L#-sZQFtuN7EhKNE>
zvi0~D*F`~UYXK_OTu{b{S{y6C5Z5p<SG>haGTttDd0wo}8in@!`SYwd1d0>jlTSX$
z(`7(63G^g@Ua){Z0P<y+g!s8wE-rYf5X#VRWJN!iU{5IG<52UnBS?h-C*)z&(t;}w
z32E?k1~SAMk=xZ59MY#YFwsP>ZV8~)f=7jd{%-Ke70_YG^%V#W&AjKl3=i4nN2`?8
zwk-p!?}BXu5YwjY9>n4#7)KzW%I-7$!&qK$V}KoCzZToI?bq;iuYacE>??cGbbd}3
zCa1uCg9P`H{eFq=7d82OJD+I(N6Oj$Bl(EmR=y-F-IHPFxZsbUb5N{Mh{h=fAv2VG
zMEU$GHMkXmrPX>$CBT|Sq19@Dw6oa0n707R8;MT4ySp2I3-h0j0p27KMlTEaeeZi;
z5b5f%#~$O07cYtxfU*T3rddW$lVG(3T<Z#q%pVac6wNK52H_9^Jw<C?HbuIp<OyOH
z8S*1nPa*nL2`{^}hQReYIl(&t(a~=nl#ft~R+f$o;q8B!D!I)06Y(n(vQ$>6!XM%f
zGUkG-0dPK;bW+Zexu;vzdUAR4dq$`oX}cX!&S?X4`=0b2b@C%_yP#Zd-Gok^&3BWD
zZ)pANPhnUxPQsXWze05R-UZ$H%~PbnRUpoOU;L?)@iXZ*AzM^Z^ghw<RpXb-eA2V@
z+|XXcmEah?KGzSJdy8iYEUq_`UyJd$LuRGfwdC3(f=)gLC_G5`QCz5lP_TUb@yFwq
zHK+Z8po;22g|)E&2*?o&`BZa@ycS1{t5>gv?d9&p>Wsxz@XN$kryl7p)tw`&NRUGv
zREC(%`zCEp$oL804U%a?f^CO!O2IN9U44C^@#}CysEm3RF#eq~Hq~Bw42%skMGZR{
z4#1xt%gXOmYgw|#z$`>~p%LoP`bs?vgfTNtcEYVGFW8uBT6V#2Z!JKfCa+J*f(f>#
zt`BIZX+{4xu7w+3|CIgILt<W+Pb?>=z!mVu^OxzF54||-_osUmG*qud@7u@`{kF2B
z-&PLo8%_Sf#>`OqS1>_^S*KJXy{+kq%oYIkai>@kGAxVmTqO5Wr(I1biq@y1Z04wc
z<)QecsRbZa8OuNL2(%VH0B{_TpL_1P=x}lGz4w;)!v`-GJLgpjjnBymmZa00o-din
z1~TDF0HK`Xp-mQ#ih{2B^FLlVDEp#$zcm1%%N0=Z1gEOFXnth=NT~H$2&*E*Pw=(^
zXJQiEnPL16%)WoLDW9EJ0nwG0@=|8XTm%ahL<_Ez{Fdn(+;9RZIYtdKXZc;3GJG<e
z>sNsp)(x{g45;sxs`cmeh`Ux$aRgtRiWUQHrxTps>iQQDCM8a4=YMIJ7MG^g<>BQ|
zoue=P?5V;0zgq>iAaNBoqj_IU{zs#;U*7K*FaLZQ6GcnwGUEa%!lr_=@}u)w@EPVw
z@h@n-HEYgOS4F{Jo$F9(GH2nrp27)c?P%TdEs&+fS9UJHB(Z0-P0EW9hAn_VYXEC;
z3P1)^f-8u5<<UnUjn7_xUCx+NbOt<j>=-R*H`K{)37-;ym?>*8g*lTW7)(IK9HVEK
z7ONqq;OZ$F7IQ(chQYHXUf6zZt3gbvgq9}uf6HJt159W*R{kCA4B2_q?~DqHv+9IG
z1d9Q{iU*^>?z`fRDn3RaaCw4hffWMqxgo&N_C%1G@S;?y*O+Lz-+EjlsS37&O$beW
zme}9;`d6QvXvp5(pmRUBJ2?erkp2Fz{Niak^~u+=GLvt?oX@jUU%JgNGXHt<^xudf
zebX(Os<0Hu+J2SeKzQSd@4a{!@oQisq!O&B5JH2)y39O&L5a=t=4oz}@SHpEypw$v
z*`{76SPKC_K+aa`;0_3<7J%psIBWsXVs~-D=SfVGK$0Y9=H!M3g^VeI3=J=3j4}U`
z4p4$S=6iDa3xg+1_=9EvlYk+L4B>@of?EMgu58gbn!${yqCLJ<W5YxX1lYeX0j3hg
zGdZh3&;k-#<E*F)0a{u@7zf42if{owd%^+=v%XDUJ~itn*gUInxakY5tG)7F_xjJD
z;=>U&*Q*=!#;;A+2g^SH(;xaOz4pun!oXLu^(AKi&FJaBRsJ^f`TpqTpV>^cU?ot&
zE>Np|r}Q;lf#BB`3KY;_gCn6;=0b}(W3;{4vog-?@XiI{ISQhTicgw=D5(PPfB*Z}
zLTWtlPJjY3nBq+#gZtWRuhC+$2r}sE1SrzjRlX7h03;CAEJ<-@9X&F@@Wl`_ZZ)ZK
zM9H6>M57D<Ou0&g3yo%5FupJK(0HFFcqbsuo@heFJnxXE$7GDC2irgClkFz~<Jt!x
zlT$=OywM-E9auj$T<adc`+~>nCyT54ynN@D(BrsRTF%|?K3y#1CaiyLOpHu7{I`8_
zNtZsmpyPk&_H+uo_|jGS(nn9x@>=w3i+?hvF_wRh-G3zC;wM_aIsnE~0a=`6m|QP3
zvJrw|J#^aA%)djh;O99|D_EqAm<?!tD`0v&y>#s&Dx0yohaY}8-cl=n?|%2Y=>s45
zz!;l2P_P!)0Skx)K(+x8<TQXLyKv>g1-d_eZ8a57fIUWHT20f*R|`l9SI%(n{fZ18
z$wrHH!tg$M!US&%WDSxz?rRFDHY*pD%(7<F=7R6cqgt$CX3fh!<22<vK@sb$ud{gJ
z3bBNbn`daX;4xvx*Q@I$?9KEITo)<5N$a0elIRpTFte+_vr8NIF6ijj-(Et1T;?P3
z{L{&nbU{zNqBmPeqQl=TMfcl?xA-Mb|8xq5DxNYq{}2ma_-&wrpXrP93-QVKfj3Om
zD$w9M2o;LAf&qil$(Ju*F0D9q>Qv047Jzkub+7=Gxt3D^GU$Vp6}JF(cX#DPK>T}S
z=hY|;R4pcq7RaNRQS%x4P^5tX`F#nhZJ@JPFlEXaS@xt_?v76<Mx3UFH?G-o^gSzv
zPH>AL%}e(9t$XW4S>v>GW@(smLifyUA2`-uIA;I>vuqNS->735XI7(o1!z^<Et{Tl
z$xx3GF50q#Bg~MP^k7eIZ~lcvVEg@8Hq<@1DsI;LPje~}>%wACIDUPbHt!n*f;$iY
znv?zhmwx38z4ojqyLDSAn*3wT{*83LKRWp($3J~;?!bh(?DO_Iht2xHIV@z5As0%Y
zwtVi=OtQ2bn;DndvOKr=jo+DGy!66J`6fO9!jJB|?>;#K$VZMGSv$m7ms0>iq5*1{
z#lhSn`z{&`DE=*W7n}%2%;icMTkK=M83>G*DH+n^UowqVrd$bZCMm;zOKMtC!b_@G
zGA;qG_DfSI!QPk6Gr>CtRnX%?DeA3|Eq>|zhWe;tNx@s6Pl<0JZ%?<vVf$O#X#Ep9
zmFM<Wlwy|#%k?gZ_Fd^Kw`(OJhYwJavC*?>Tk+t=`W*AGkp<t5^`FX%!EJUn==Iyc
zDKK5;bCS+JbDruHr7;jMf8FQbg8hE6l5FS^zo^t;i7owVfemI=CqAID+~(<**`b+A
zl2Nc_hfl#h*ZhG@J+GX5A#MSH1%L(%K)el5z6-CKA_3k+;7q~fq6Hmfd3@nQ*@_AO
z>iG}9l+#ilDS8YWktB4!dxU6Ils)Q5&j77mI$BXqPS1VfWwrmquaO)?C{A;s9&J-0
zuR2P>T<2$Oa3eGC4#TXBoxo>FnI)!0=sib)6mbjd!tDz1)@G>F*J0ja9eptCfT5mD
zl&f8Uh#Ct7<R$x#jdkuOum3DSrT74Y!SwQsUjL;XI;<f3{m=f?i*)78n~~wm5X?fl
zI(ajj`;TlKjhFctXTNyWudtIq=6)52hxPhweb*P(k{?uW4y|(4=Er$O3j#w^M(A%@
z53W8YQ{HD@`%Gaxci(+C%dvF%{@ioVL3I%hJYim&QvecO>A?pdT;d=Ls1+bZ1LO-9
zId~r>BQfwq(j(GXd1G}sVzOr$`q?4Y-G#EB{!Q@qK_(82ObnM4w`sd`%K1s4LL|%M
zG|xQB@U^V#!IjYct;%y!l=e|yz;)y6fak1zYXuyv#|Q{beONA*@6+A<^{>%ECCZ&>
zx{EUsF8J^X0kYpOn!g11QCytm{d_&&zd4ul{WZjIIt4IGct6HK%@YP@JjNe+{FQmv
z;&qD=p8k^i?QwXd&iuLLWn}(x3Sig*Xi`dKK;c!{3Gl$f+C&1x3xP!9df<TvMEcVA
zQQG3=@?zCaS^`$h<~CqNO9VVYMFLImoTyJsO%JHmJX{t-KzodRRrwR#I?zBOOL($W
z*4CtLiKAuO5MHM~#vEPx)MH*_OxvfgYoz#_6qCCTMl{BRsj1IyPen5c9GTQMtkT@r
zF_Tq<xfLqX&zrpdnWB~(o!^GPYtPQ;>X#1B@j$uE=fz(>6ThdBzWI!QX8s#Fs82Hc
zAI;wX^LU+d()@9}5bE<j<>FE&VaibfA#`XbScBZWH9l8Oz0|a`6*vKJn4_<moIDey
zQ>@Tu&z?1f=gWY2iUfEQKqCR_p2}m7Jw_)_o(wYR!{C<zzIN@}a={`1<jGP#%Tz4*
zOW3oWHH^EyBlDEYTl_w*iEytEmYAl8?pdhqEgjff^RaE2;GG6FX_nbh1trg8;|JF%
zp=H{^rA@zW<UA~~;xaeH5!1Dwif`Ph-+bqB919K~{jsZdZDD^a6E?Y4$hn5qbK4qn
z&Aw^tpHs-^kYn0k{0!61x%FOq*DmhR(;s}1PJi}w2)vWx-Af-+EdQe=xQ}@I&$IV`
z2>Ix1nKEU%jYvpobIgx7U<lyXlDJb!7Ltumi!G-WT*;*Cuzx5km^bNNZM(wma$BPS
z@}*0c!aeug6O~Z`RZ`bw|6gETd;t8QmO;OKxnAJCT<)H9WJDDZ!uwLvd+yZZ&7YyK
z=X3qwv9fY5CZhh?<+O0v71}z#P(wDy0%lC`&I0cJ&wOOY5^XO1)Hj~YSR#lsj#`iv
z1(>YHge5mO6b6Wc&$h;~ZFcqdFFF+a_qn|MeaUV6v}kh{zk$#t+C2|!6pBzZ=AbRE
zZP#>~?Pjlk5gsJ+-F#<*-uU<~EiSL;0=#ne&3Kv5m9tl=Y(I&w!(7hyZ^r9;OJJYb
zk>cz(k4}EU;s`0@ZpBWGi;*$ZVKbTSc|X7Pea5Q%veI6UCsi2mbhEeiJe4i~mU?`=
zj9F>;@#Dvr=g*&y7Jx?{c|^9U>8Yolig9bQpt3@YJs7YyJ^&~H$Y6W(ikuMGS?<RD
z8P5NB2C4Ma>Rd2K&2({-PRsC1zG_mA$S9M?Hb6OTS!|J;tZ_1C>si4jc*kK0jH2L{
zYDHw{2*tXb6mSc1GZ-So9p>f<nF=9H9R|G2rEyP)AOrRo6=UNKK5BV=d<2Qd<*kG?
z6vhy!?U&{Wrrx>&xKod1?({aV{}2TVagqxXDex2P=MV7epz+6>{4VTJ_GZmT{O0lE
zp3SgXj`>9aVB<*A`h{hL;O4b2C{<>T>2X!m$qVqv;I$SCQ0q#CnqYv4q(m!~BW7Ek
zeMzQ28m(}bw@>ENXfefL0Vr7p3jn?6J@3&VKs0Ru+$7K~0Fkb?wzgPK0-QN>CcNui
z@8UrUD|;>1cHexd<{=|V25KeCo`$1Zk(z%hv8^FleYsel|E^!qycSH!T0S5=yEf(*
zRYS2E$9C5ycqbq;YR>!U%!P9W!i%3O8$ls}-1mAaz~Wh}g|op8O}GR}v=I_L!}s5V
zWtQCODt`7DE^7Q9m)W%4YV_pm^Nq`eHR}<D59H-<^ZFMPxd;IB;d2?C0@ueMKmz-G
zb=dD;Y^T!=dOP1d#_~^^{?=wT`EMReJ^&#__7j-p4HO*@97AQ6G$u^46^3>G5#Q__
z1&eQn4st7R1}d>5JXK_}>&RQ@(DGy~N&0(8e|5X^7=gh8zz;t7AP?Kp<YBS^+++~x
zO3W;B?V=32JRA05%KL5JzKFz6X`!F!@*|^&Ay$U7vms)>EYFM)K@t27fGP=h6ppm8
zfJ+oJZ6Ff|rbwv4EwBmRG2l$hq9z!;KnJ>e2PbfE8cm>{;2~ZbLdgb&D;TN4y=R-h
zCpfP3^$YHMATop&aMKWEvSs?v!q)Oo%PGyzb>TR9lsC`ck@cS<<E6XrH|W~4!*=A;
z2U`e`;69@DpZ@F{2G3OMmp{3|Pxkw_hJF9dVGBUsN&k}Oue6LS{we^@McNjqOW)E*
z9Z*FnRG4(`43lTT&Ct%Pt%4y<D1wS#4?XKZk7J|qwpebT7assoI)(QPN@#p1R%x4O
zx(@iJK=kg<J^;e>#mpkomY%(oV;5JhT#3`WSnOPgaT}2{BO^7Bn2Z*Mi2({juI&Nl
z&yL{bN&;DDupad<ZQ@Eqk;r+ZC$a=T)b>npYhaeW+n2nE6W2uUQvjw6Y|nTgS51Fe
z5GQ2h>=C-0xR#$S5Z12gq*Eoz@jDv_u3Ajp|Ho5)r3=uP+Im8D(9~13L$1<97iXCb
z6EJ1#<50PS_kpPGdk5FQ2m!Ms0E)xhr4viqIoAgJItY-Q{a%RY{3+vD^zs)+zhTc`
zocp%K(JwpurAUACbiTjmz@hUUK-!}GjG4d?<?@tNRM8czFo3^>$(J--%7DkDLZtzw
zLVFqN5BNSnK6aV4@r;5h)1{qDVgV4I6M4@YZ@dwCl*XqKE48MbEC4qFREkm`0I>jw
zw6$C=<KW7D0C(JRM|^kc{8ye%xiW!JDPC7>KxzP=0jYzXaMk02%a<&@p-`4W9CQ8Y
zs!AgT*y5S`(rBPQu~d;8>zm+~f-vjEc%o{QrT!U}V}aediw`n^V`??{Dn0s3fyfZa
zil-sxOTq&R0V<sW-fE7D)RkGYmZOZjP|;eO?~77mhrcMqdM;YELtG3>(AlmvDBQSg
z@v@}XcNkOP9a;Yr%DZ?ZZeXNu{L(HhF4Zy)0>}-1Pye$QVuZgmHhKFZ2ltV^{pj>J
z2mwd(cli_TzeHIKmG%_ISX5)KbPJz({EyFx3{EQH)btsD?Xo~zOKrfpPg$0PcvRwW
zX<dfTJDkb_aey^f^JAJ}3t;E!#iK`$3eVwu7Y)zn^SIy#-o#Hl@kB!Xbb9SzAwmb%
z#Rs4a<l~P&9>1SCb4H{u&K7`}=FHX8=Lv9>kOAWY52akvWcdk|2gs2biA#VoO9L~K
z5LJ7k(^dksCqO+|ZEd(E;_UE<Gotc=$FIW=nepOZew=`bD0H+F#Sb7vmC1x6RHP43
zdtBXxaXV&8P?k^c8DWF|O8wA|%b3pLFJ&tQ;;OSQ;F{u6b*L3X9~svw+i@aSWHTo3
z@cNIuFQfoa8#Iqjf#;suJ}}{eU;flN`tmQl68HOcUgbZ%$VdLf+#j9&#M57#{lYwN
zYt3NjGEOi#i$JhbgF*l~+LdWmi3X@;>pT}Bf(nDr+_chF^bCdS>s;3LWZF^$R~#KS
zUn|KCrGCw?*JOX{tG^JHOyM~Kawm;k1uVA!3W#sobPVuq0KH#X!vR0^&_i+XGq6Nv
z`qy9n$V*E~k($WcpcIKi09v368wo5RwC*1%aSNdm&z!#?tvhFgOT?I3*<o*fhNiCI
z-CF&EE5y<w{RFoPGGPtsP8EtaU6IfdxLz4`8d3WOVUHc&l<$tP0fVZ<8|wB%+%s*g
z3)<A{vkDnXFul3hFj<4I(5FDQe#CKQw5{|*rjNdP@8tT|w6mmH)A@~feekUW+3$bu
zXI`S0K6$pxK@?N6*<becx8}+0FXn$a>o3e-+#1y}uoWOPb43dmyZJ#!rHmrf$iK>I
zsg5lwPy}W46di-#Tm>|wv4-9fHIgiBCy^7dOIaXnsD<g*PyPCZfd?(+A|SbrUKCH^
zMFMgYplnl50>Ik<g2ZwiuwDq1pG7l_gk+TK87bmsiqu5CeXD{@J++~TorX~9qe`%-
zA_ou?vMLg>(fmUuCc#u^m9-*%k`GQ1^7ERR%0lbz&rR@-K)r;9s4(mNTlR)6;j;Kq
z2oDmfObjt%9r^FdJbU<{KaueT#~p$B5!g8~dSAg?LmYz@6v*{Wb#6zjS&1ycCt4N)
z3c=9FVu}s5Tz37AZ48z{lPlLZ=IvPj`F-@H&)4K$eTHwDQ=snmpa1gZ($nOpFpJ0g
zG@_q)`^Tt$!!v1j^zILdB~$@6gP(G;?|a)+l}|SkKxMu})LIF!3cN3(eNU^%pTPlk
z&+R;$Pc0YM8pj|&Pj78)#VCNeZBY^$EC3~=@F-PQ*8$e)6hIjESK=4301W#saggOQ
z2FmDk?b@|CNih*SEOE$yv>2b8ya8CSDJ~G2;M>^-VhF^8J)(Q!t<MW##&SF}fx0Il
z;Ff`EkRe~3!dVE=^Y<wc<^Wx`H;0U;2XNkjAtIARbEU1OX7kL^)DJj@HL>|ch2aLD
z-z5t!QEwZxXjo7*uBjel@pT4Z*zDf<@9)I==h_cFQYTJ<*G|8)PJ!a&_w)z9GVJ#+
z3SK4Vzmd-O4}1Gb>pz-~_(@FvjTGP~-FXn|+!$e#`xQ251$fkBtQm<K#%_eS`iN<?
zW~eSSTD+Fsr;ls}5HF1SHfP~HMVckEAkUpU7jLJ1?6Jq<cC_%QbyCXKMgWk|N)l>O
z23cYO$n}c?5{nt40rJ(wwe%vi;dgQ7>J)(#7~JfZHSQ3F+0s`i%j8E0EBj^%60`-&
zYiP2zn^!K83En2C0e>=c&cEP?g^+~<mnM*9bjF-YRX`{v>x>_{5p7jS#B-X4*lC$G
z@*8B(x&*==)R>G=QGRBPpM4#qxDUc{#(%BO_RHoo&$A$g#Qc`5|Hv=XR)AOn-gs)~
z9dimi^~u-bF~7LqPh2kQNiqAytADF_`A1KGiDo#P7vVK^jbS{g3YRc{1)MhCqkRP%
zjKzvGE^8`KcBs-r6d>4@t5%;}nzcXx?F>1;3CHfHgN?xBl0dWH+`0Pnpk&6P{Z+ui
zljOTt0M_Yb)&{K+$Sq6xm_?ixISMEj0xg$IS}YcEd*IF8D<^sDU4!DmgaRfav}zkl
zcr{&e=%K2F=;B8+q{|<)HJlK!qOt?_#DWm-i<zI90cq_T^HyKYIPw2m1M2mk@MuY>
z^vm(U83=@FPK|t5KQ$9q?`Hi(JXBmOiWvb;0p1oo?GWdn3^@xEzW#s$C>#1$2&q|1
zWbu<7K8H-4tKp(vJQUcj2ZlE5cwrr5KGju;tW4#WuK$pO<qYYT3#Qk8Zkrzbb4Mr~
z3G{Y=?DwDil~?KPGs*p_+~gNEe{uAi$IE>520skxJIjuKDsZ7QzT#NfZiI2n6;&LO
zxkg2Xs1fvX7NA-Bj7vEI%8cU&YoNt{Zty22!S#|SXX$VjA%xP3hJJIMetL8H=1Hou
zMfCE2;DHBN_>P=TbIn)_SSKHVXaSI5Kyo3_Bab{12YRpoEF~br)vH(Iq@2F=a@+z)
z5@e?wBGBA)F#}StRVa!?%5WYV@@qY%ELR`?o&^X9WT<y7H=x#+kq@`3w60V}n3(@9
zgoGE%Jok7b+BhySd@sr>0XhU~1wK~@ZucV;yoGWY>e4#U;>5ll;JEm>22^1q)Uazo
zR`yFhngG`$T0pQJ*o3jM^3P5EPGGsU>pyP=ND!T?8+86{iWDgO{m=g7i;?-OSv}S(
z@%(2;qLwcKee|4vy3A)Dtpa+C5se^a`jO6nb)`pp%3N#DJ9r!Nh~T^^!zV}M6bjRJ
z2<P#iT|EYWf^C;y7fj|KOOgX@i45?{#Z%A75kP_C$B$FK2Oz%HFu>~uYvltV|A?36
zJ@?!bH`ZkEb0nrRm%skik3CZ|1pBE85&Yh#AuuZChgNFr0TKycv6Q(4DYz^!ASPM{
z(BQ)KdkrvN$P>KdpoB0nkt_(rt5Ly{Lzb!g{4LJGd+w(vB%qi!0Hg2ZZJ<MRDY&|j
z>0v*D`6y{BGlY6UQouFo+vsItzizeM&UUd$L)6xVX~?zN=YPxVzw~t$g??ee+s_aC
z^S^y_g@B7MU5O_DtFK)v&t`gMaXjX?5zqEVFMqk%N0@)ui0#q+evd0E#1zI%Iwouw
zH{v%zdLn9YfS?RaUxfisdLVu9WpHQyT>5r?olv7BGfA^{{ZLiXicbo0Y3FCY`qa}9
z1u#Ay_)+vp(~E$3o$UX#UOoWvbJ$}kC?SKNV+54HFP!`B^Li9e%}i)+YSApi9*bO&
zAcPfVkSE~C+&y`Lit!h!h1h1OTbGM5Q%>-X0|tUB+R!owBnfriJxxHdQ0Zpy3^kA~
z7m07lZSgA+yBy&Tr2|Zdc*X8_{)%$tm_SlB#G1J2gW|he6H1J$kiZ}$abX@NC=1rh
zu21j{*XKc4|9YfuHfY_KKewc-U%fd(!0F$X%Y0s<YZrEkxjYIS!+)Ey-yg5>t+)9}
zV4viiIL|_>j-~S<0j;}+Y_?<9j-*C=BsUh8;g61Q;p|v4kCQf73w{nq6Z<E5F~pi+
zoh)I&nWe(Z3%_;lw=RgeKgR%w%BRGm!J@;O9m`r99{?R!iL@j_z@v{oI?&qXa+rX*
zI0g=1#W6!11ee>pZ+`OV#<4#g^N~jDs#lNJeLj`)0@?k&Qfnek6hI?qP+y9^MJ@I*
z`IzU}55>Y_h9hkUxxG??HY157S~Fqu&_NvkSty#H(TEf!*y|<UWe}J#1<+P1mBZiO
z{j8)nEvtA<UPnTrOyta~xHW)z1GV@J?}2G(O@&C0Zc_=<Kt@EyDo`~;?u;QNCPuhU
z{5mWv=hm)&PTP8e(LJhL7k_=1j@-vIKYrtU;;X;>D!uyI7=kt(H_h67-0PQ^{#o-E
zAwVqw(aS%;^~(%x21L-YA~UvJFn~bm!11Go(v^bESeEa391X7fu~a&krCE(XfEaCX
z6A{nMhp{~XhE#+NW-ZzD@?>Xu?HO?Zl;h?S1yF;}NeqA-1ZXXZ@j7h*6d6+jF36yt
zI(5pdq8tl=-rRljRL!K)WS-taWWc1AZ9?@bR|ZKxfawXP!(h@t^Qfk_gh3aKH9K17
zklK~XuOg_&tuXRA!8;8wyV3MYp?W4;waCw7pJ^0a&4QvOkSUGgt4Uw-=QJI$z_z+-
z+9s=Mi>B2l+<A{_T0AuCDYqL%pxQU|3TMLc;@()XJ2{ZQ7{*2iXZ`D`fZ5KRUi-Og
zHxwyQ_WQr^!LLN-FQw1&pi0>>ieJ9MS1$93CjT7LH+uRLC>~~<sZk_~70L7n(u6V$
zrkoZJbj;Pf<B*MO-P*nyTLWlhOt*yUEpxZRMSX5DrV49ieRgo0J9sZ}@!Ew?il@K$
znhE0<rStXIU)SU4EIjE4e&7dW2Y}bf2(VVC0J1a~2VBlwKKI;n;)S`~-cILUWOBqr
zbLPtFPrmEuy?=3}5d2ZfC6JDl32Pd>zA^!X{xwK%5x3NlH3lS3Rok&ra?OS1v(r4-
z<-2}jGQrJ*wBVPt;9VL>FxB}%jmFOzQJ;z0=V%F1E0EQ6I_>EaM29aOE!H@N<#&a4
za}Q6s^Xd9%aNoA2wI;OXtJYeY1ZAeLR`VOTIB1c#X8mh<!i;&aAfNlC9lHO2euVZ5
zSI)jkU;5dXqsc$jmQDV%bQ@vP^pC{rd`0`;h#LPaUFJixB7F7RSo=kw;kSYpyR5Ah
zprkX&aHbJA0nf`^xE5suw6n(MJ<b*4bt|xjrruUqp)QV`ee=vS!h0kLkXV`}{>**%
z-51}9bNY!BC+OYpes_Q^>9v5hx(--C!Yqonr7(X9z@Q-KGIN9`{>88S%TLxEMfD(Z
zh+`m5o1Z&f_;oRaQ6lb%7ba<uua%yAW%bIjX#=f~w0RgIV#c!KMr7EW0|O~h;S~he
zl~$e`D^aNlo`jL|#<rbWqFgm*TMq$lmbe2w#@2-uv@f7gRNLFn1MOo1ZQqTJ*Iv78
z_|~ug5{@^X$)D5W)j$_NyFY(`)1Q4kdiodUA9C1XiR?GqOiaIo^*xr3_|3AjUp(U<
zpnurC;QGog&0e%6bOfa_oYr5faaaG+Jp^2=FJUAPZr7v=#^$4kXKubjn_ssdzI5hk
zIb9Z&Oo==zj9!kR%by%iA9#{%O|P2|;Q9#w3Ucg1247ABT)1$7j~_q2%%K_iw_g0z
z`NeYaQcBq)UXE~tKz*wX%KSa8#lsje)3UNTOxqL?>1jp2@%qLW)CBJcl;O_=UYUl`
zN-(j{?7xo_kidv9`zO>bQR->%0UjZ)_R4N?(h}%^mAmi0&sulr<nD|Gj|&Sl%76~Q
z`j`Fse6_Ad3cT{!C0+T_Y9T=O`@j0}S95Tm<fR_B69(okW`1?{i&y!^^Zn^vV3^M_
zrp`V5)9*m1x&VC*0ZP~p1GM>|q2;fd{PXWpZ7&bDc?1s|I@x*0UNB8~vgE}}KmF{J
zF$TaLcif?TCW2f|FVFQ3fHm_0T(2#FZ0-?>i_e@n6Q9XRfSsKk-re2R8=7LIz$-hK
zK6TgDJ-=r~X;hGh&8{SM7qVAecz?vZF?pG6z4Vss&`QQI($)#yImpc4kV};FTEy8$
zp?yJt_sqLrJmB$XD6H%otwx*!wb;;k-l<#7_Y4doEj7oi*^Nc4<9gS?JHP(Z5nqm)
z`><ma&GZ*f?9k?YbK1PuG7bsu^ZZ9o(fJoI7p9^D#6~vxYfvBA|DR=NznKR2ah8&{
zt*b`HZikqxJz>^a0hsnV7tlUy$`7tWVDu}C=Ii*)t{$Q>dV6K}$|v!K9swNwmg0<l
z|NZyJck6TskixoI0OBzVyc9^R0CFwxFexIH>7~&8`qgv4k+O_QWK4Z+0YBjlbDL~G
z4c=2S4W0DJQV0a*Rod7D?>tmTvSND9M82<euhORWg0Ld}nAS=Ag0N5A9Iv3q41E!M
zH70-qUNz_H766)fd9;o(t-J@fX8i~KFC?eY`R*pY{?xV!7cA%cpMCPHbm8QcoF}b)
z**XxDzc}_K=D!uS{*Acre<U&gS-z8EdHr;OO`G9~4%0r+w9owe1z6+I7!Fw>;pWmi
z5G<PSrB-_F%B!EE#CxLVFSh_<B5??$HHEbj0K~ykK0B8Ju}+Gd1dxgPndd+Ji#gXw
zS?OI*W*^VhDaVy*(1>fQg5ccbYU*xUR%jnLxz=pU>5zdvQl!_d+Bs>73XrstH-2{J
zvL(vt7c%WCgt$Dx{$5pfuXTH^0&m^7?GFu(j^S!>>h;oVf?@Br<Qn$*?AES-2Iot0
z(xWRgdgBw@@%8yHUZpSp^qF{>PYmTdPqF&rQ9cqae>0!$KN@fJ6UV>g<)3fz%MIwc
zIIQ|zd)~oVUeRaQ|2}XhH8lc-Eq7c9+Y6sowV?NWbvaDG^YRI`07Tw$@#4khefQlL
zj{ph}l2oFUiUnZZd;r#M3m`gKeBc8gpy!`|J`Vm1FT4=rF^ez2@#DvNu~;l6TGKOU
zK68F&vGdFzE&l-&_R%t15Giz6FGHFzF8r4AnBaVI&K{`B|L|-acvLSLHCE+v-~63q
zdRq>AXS>TCS}b>mzXdIL5ufb^=*eqt1fyyaTxs6(jdb7foHc@x={uc|XPxzE9kcNY
z4xR1H5XQ<)lT%EM`%!LTK7E}fpbhjy8Nkf75$6m)^$jYpb_fEnQHxtWT)}B$S6>f@
zs8bJ)8ERa(CPJq^3~v!i>uLkaIAWU8A3TzWC;F@F9gW~!Z@u{U2XXzYRxdh@F!h}?
zJM`+me3LG|dWB}o6v<E4FJ+&BX!4J7{WtS9zDJS;VPO99iOR7b6Q*{&%zNGoPuo6^
z_MbWx%lI|xheipV!Bxc@9b4Bx#HG@v&@DD+$pYpG9{Xlp+r9Se&wb^mWut#74uHaA
z<axdYP)?#rSo&Cv90Pp+``=IN1ZyP#Ac)!J#EBCLrv|EV&pr1D^CvwDC~*GGbH8x^
zoe%v1zc2}yg(4LqfO%(#7LYcFDR|V?msP4{c%}%*l>p(QsD#+w4`6T``gHAgW|7(N
z3`ApRcbDGWy-L>>Z_>_UI|%^5gt0JX>d5FyIM7@{vdpqw;PPq~j8q=9ylkdLX3yve
zO{<u&AGs`)<is@?NT@`VxG8h5`ZP+L84QBH0Q0H+&gIGPWy-HkS-L+<oT7=%XTh{1
zjY%C_+tZdBA_FY*+!?PL$c73|)$yRx-rlI1GW;S4WXBb#GXvg)lHXHqvcx>tIWf$D
zEYWV)55vE1kf1YT;=+Ml|8X1#ycuwMV_@PtuP-BGk4d@C;h^Y5#=nt0{^MmnVO~uB
zIz8MO9^jSbJ>rzUWaOkBEv45dGj6`$xg7ni^gz>iVS%ZSLS;PJmaXeCgtjqQKas`t
z7_d9{lf6W$^}<+O*|}7-e{lem&v(7+UCUwHc`4h^+9c8bvlC#0rUx0;P5@9VfH(rm
zNr20jFE59`+4=M5W#HokiJ3;M054wr%BSwX<7?7H2Qo~cYJ$Nm?gn8C(c*$w7*R_p
zNXeNo840^z5Q$(ffqn(E0O~^0QXm=`aB36TE7z?|QJIol3Nn+M>XysMuql@z5yX~-
zISDhT@=jougQ7@P($Yj|GGk8(?8HERO`AC-Fg{=aZ2LIN4bVBV+P(%}35Hqe=+vM|
z<f%|Y$)gjN^sN+7SAQ8#uIUJ7$uP<}l{V?$EWq(Utvd9BlvI=mgwzN1`pS&7jEbp*
z-Bk4|OOjRKL7btIs|;0<+O)l)l!u!eA#-qcEv=x73h+I+Hl~i$??`~_h)_d`gX;@{
zB}&`Hj+F$cmFeKFf7Z6l;&R%EV^ZthU8G}wf`?m!$zLP-Mdm+C=liPw2)GG2r{Qqe
zNHZNvSg<%~BIB|3%cyf12Y2|truTWZL?ft|xvAfc--nUda{of5f#lYy;w}L{FZEI&
zZ!1dzORzKo@=$pk=qu;H{PV-F<*>E9ymIAA3;-lPXL<*K@SvxjdW!Qs05qKfc$-1|
z0ER7qAi)>ToH-Mjzr<s{_uhNuBmm3)j939AB=CRpTmSD*{=@J2Z!ZkEbGPIz3pXH<
z;zD;6K@${8VKtXfpCg~K3rQa+QJED2p75DX6-A3Ky{rUI+l)lCG7x>rl@Vp8B&NX)
z-W;lux);PVpbun&tAt<|Vhui1#|8;GzSE0vk_54=eXWVxaVo?xG0kNrhDHOBa+h2f
zS|z4jh-muVZg14~)p|I#E!44fX^Vv+=abm&(yo~jO45FYk}EZHUZT87Ko%`CL?LKt
z>p)?kj5`dB;(@uxSBal8CMrxv1t0xn0%8peQ!|H5NR?q!Z5;bfj*tR@C17a4?5e0x
z|E-YlLPr}qPmPKllfXO&`^muE%zSy_fZ0_CxP1WEzqUbeZey9=i?E(<>`OSdk<ax<
zp8!fR|NZ)}LI9U`fap^3ri+Jm?)4>(xei(g4p+G18Csrj9ty(_Y=$QWDIz9sp$lH{
zW4A5{EORK_X@5$)a89V9Yq%DWZi!H~v)n%QU;ow*ek$1eW@6Wb_vCv3B66?e2>^jW
zYX<5F_`dhOZ}$B2&x__i8<_sa>C>lYci(;Y=7kFvHU??!$dGPxumT+U+yAp4{69Z%
z=R<!LFttJ^LX9+x3&AilV3a^;3<IUkV#<pMrOVm|EGrZOh>~~Zo7@gaz_F1V2X|?A
z*a}!Ic4>!qhGz?Ks!DAYYk)AV!7dO9Nn~`h8Le_)!DBk|F$3Z(`H@L$9ViXQy|-Ln
z(N0Dr=5a}yS2A(UTQq#K!cr*0fU6&-rq_nx>&uiUnfkM(MO?I^X?7O1G-Ql^zRj~T
zY>ts46YCJ^if5V72gmrhwzZj8;8A9K1lv@Y7t|Bjmg@*<nSZJT9|?K8{#=8Y+I}SJ
z?$#f4z%h=tUy$|yum5^q{Zh`I(_)!ElR%$vN=lexU>_pa_af<$nNY?q!?-xB0jw4>
z;PDTH03&b_wK(PaZ!G}W2iIP}4HROG;8*6~<{j^&&JWr3RoY?+!KLjBKlPtH^?&*s
zLwVcsHz<~3lG%OWfd_VmZRXvRCr=9VU*!8~ypXUKu#Q^*@l#F$h&e_~{lj0Bn{z;*
zcqDL`pm;3s<Qp%1^1(a5_OB}XWKLQMZqWuTq8Y?f+EEBnmPRvzC4@@3n23yKr^w$B
z*^GJFtZqQd3}`lkay%$<2hq3WLXf0c&z7o<GYA+I&DW_Kt4VohyVBg471Ki3h`_v8
zuRM;?M+*Z@%`O^NsPz-hE7!!9PKUzSQ@*5$7Xl1p6I>s<&u~nU2+0)9Lf+p`eL?Nr
zGK<`|m}ge*SD)p$>$op&Ovc2fzGlwJbiB;BH=owjsa%F}!u;riV9QSbppaQQL{M*l
zdf3i~?T1?rB^S1n$O{W;T~xfV{b&hzt*d?p><4O?xn8i>9N6_AN*iVw4`l&uL|oTf
z{&0KAKv{@(<n1_Ubq*iO^e`DAt;~}qOAmFKGU6rTPc-4PYZn<jZXnYan_kC>0lk2E
zhu$X@npygk3Mu6q{9tlpX}5iT?n^)YkL3a&c_9}7$*1_5iNm=#{)rV@t^v;Dx>k1p
zD6FFsV4OY~{9)fEj`$#n#XXr}3t;*B>#r~Gyz@@EC@=;D`fon>k3RLyU-O4PwY7Q2
zcled#iDEMEWh4hC3^py-3a$$|rX@=Ndy5Op)J5irR*%s;DZ6}OyV&A6-~|<C6#F=<
z?FzWC9Yd=G5ShPf`D|%P2&CqJqlAH!T{Wayr>Ub@3dl<40oyk#3?JK+%2huG%@!(g
zR|d{hoE0V4jOzCgthQRFV8f0DaZBV11IfKoe}ND^vL(;5GKDgCW5mzBD#WQ0Hlt}F
z6H)6VQ;T<*DYup#pnd_E0*&FhZKJUm0F52(pYNv*ToK@``PGgywa?uTWjGA=pa#l*
z{SiFJ-?w`GXNYS!w=c}*EyG^1{u_<FGxgqIpfYDF?F&Sma&RlM*45w_yYFTFv*~+-
z(}*7)@3omATzH=#Ggmm=)WJ+#L(qP0PxStOE_wgUpZJxD!a2-MH2Dk9dFY{sSi;h0
z?VqT-01h^+<tcznuvknlgZV#_p@G>ihCLbC28f}7#c4w(ZAiO(?b_L2e0byQzoQh;
zW3Z`UD?z4*cERa0T-;;teAz{n;FcH$BE$Tk`o{aaRM{>(G+oEr>8S3n)pY_3Coe=p
zxFAi`L<VLyPh8qC;FwN@jGD0$5AdPGMTnAobY{x!HD*K2l<Od>Od0E&<$jUwxrbWE
zEU+OUr42xg)3-n=^f)*Dm-eZUqOWK3Ss{F=F%ac8_zCM$pgLaWWg(WNEvtR*5jtK~
zM~5IPqh=ZvJQ@?KC*DMDe#GTod&U?kk-<gNKTpto8}vH^PK8d}f~)g@uYX-XB_4h*
zmb@g?TmR(xY1&9F{@|Fxvx36S9MmyK>xiw>;g@N99Z$x2#CDqIa(SPu54A!oJoq`y
zf)(u{5Oo9wfm4@W{P814j)<p!bST%J$PIuGKm71gECO-?knkW{V_4g}0Ax}{@)D`+
zvBw^Z&ju^NQkXwwi7kIFKJx5O{M3L*7rF&C4A(8-LRIp+Dr#56i^PljM8y_v=!B`S
zEEsMHrCETsxbi%ZAtnJl*n=KX%UZii)=>l2`0^p`kL!P$FD0Jk-ythKId4gq_9^3&
z+?V*9F<mHiQla<xT^{37j>b&a$Y!~-xDQy`pWCildZ{B$3AV5HKYguv128_@#?0I)
znPx+=denAkKAop@`3lSt5>ou_C($q&F2tR?IK`GBLNRGz9YF=5s1jr{42S{mjl}|)
zrxTEh4W!p!ExX_)8~n6?!4wj(pZ+Tf3($Xcva`&H+B#h&0p|t#YM4}cNkp)XrE{<A
zpGMYcZwv^Yx%JODztH(n6LaYGwYIm?C~EtNc+->{hztV`Zv1_HROy-GT5l{?G2y&U
z^?QJRut8q3{be_{g|2qe_W+VDcya3Qef<CNQ`_6yk%y=<IiJrL5_3k5|H)-^asiN7
zqM7$3{0A1+RsfKb0OENmmoN<bGD|VZ$n$&ey?2>IGe+M4*~59}%<o({bNTc?Et!o(
z$c}M#u%sY$PUJO!vVeh@i~-V<-aDdEV%136_}mkAY!`mVGJPMK`t*5bMoX>>+qc;)
zV))%Gc4`SvAiH4Ud#uy<uWA3tXYYG2Z7-jz!#ghF27oPt`*phU;O4!{kNC6-Jb}}?
z3^7c9T{`9K$@7t>sc^|=Y>MgE&x1)9#>W}{g7$-R78J0wO*E1p2h{h4JAYVL?g#e4
z3rT7nLrvdoSD3XmGGD&$q4S)Gx@!f0dMk!6-;B>~y#Dn)`8qo9I&gXC@}~yvKb|BL
z9&+{SRS^JWk6-rxmva2?@y8!8wFxf@Yj*x$VILIcV1k|o7zTbeNMZAV(T71Q6I=1d
zAOLL2$$&v}+Z-lOB5Q2^!Qb<3_x&Hg{rf+G>#xUH4GjUJz&)bsk!l1%8Oj*LL#A<=
zpa{Xfy({YMmILLj>W<|*-q|iC^34fN8MVKALSts6(f*cpuNeE4W7$dH8@C2ed|hK@
zU{^Z$wGuk@_`WoKTQTROY4*zFad$;~R+ZPo5&zDAn(`cDw%Zc%VHd}GQ`KwJwEoB6
zW1+X(`v1t&Kk>i#=yM<b%F&}o#ruC}(D=6p?SE&mF7FQJ|J}j-zYwK!*aE=&XVwzd
z)&d|1maPEsc0hUd$Rm%4Ghj5;!1cgOk>s8|^Z5(sw_pDV;*{Sq(hnnq#@}4hvzL~&
zyo{Ner%A(HES*SzJPxzVsv1ctGkyokLaqRZ`CZJ+g_XJj)>oQa2FVjlLnPqStRu+B
zke}g`OVXw7H`j5mEp>~ZaDJ#6%lf|Kcg$|A9q~Xy!J(E<K&NV*NabxO5Rb6$t||6e
zuzd>deQ;^1gTsD|>o{7xFKrL-hkc=zU<Ow{L3w42$o8-6`AuwbCv^G%{bCq+>{pNH
zrLx%9hQ*t4z!%f{FU`Fz*Z=u9U;oI!{MGqyL1`N~YX8DpPMtaxoy#Sy@E(G8t|P3i
z0H821{~&uYB9Vz{#@!AWodNSvj^)ok_p|?4^5{AVK^HfdPs-0>8!9~`MXb77O5XSs
zCEi+=`Ijp(sjC`+bDg0gw5C^P(-3*4DwT8j>=Q+88=0*RW>|hqMmu1tq@oZ&{Z8PJ
zrwk&iD|N??p&r|ByY*YW`OEx}KND{2((%&vgg}<5;g?W*?~kT2{D;6`0%aH%&TWlD
zuTEo7K8^#nIY9TB9BwEM=J5?i*r;P^XpN(IxOZR5u|2YF@{|<E0vN4Hj-7y6F74!I
z*2$Kf$1fvzAQ)$0^dXQTtR;7_{VDakwNPuu`s?#){ZH%PtiAl-&p-dO|5%*=#Hn1E
zzL@_9Ub0Z{f6esncfWfn0zht`U1n<p>ns4s#S2Su!Vo_I@yHYb;PmO!EH^dDX@DzN
zu0#n=VghV$Z!doN#gCqR?aHhFv=9s@xwXfU7$kd3Y?8!_%cSNpWdV|fk|&|KrRDl8
zt-!KaGUAt-kSKEXa%ZRqT5s~1cU8nEb*l4uE1^_}yUzE*5_4%KC=iYdH)Ek~=p8k}
z7I?=y3XZ7YH#cvgq+nBaa`dbDf>CD}#x9ifItOK(fpHM|<4{Q9+CCxS(vM)Np<$%H
z9ZK}EElA*~^rprzz;bFjHt%b$0AoXthYX$-=gxp}4q#lV2X(v)^U|^z#@4SvBESQf
zN5x&PCo;?!ggOQW52y7{)B4Y2n}1x`e&Zv*@THHO6w9&v3G=5KUY<8MH{<s~=`5iw
zbNehJtr4uX1t8>^ldBj7#55z@0Ad9YiS4et?$T|5g=_<e5b)o9?!WrSOv{Uic!nU6
zA~KPBqay)I#GVlIfB;safDA-IrDm$NP+jCihFFw5jIYe1E8gyfXxIAcqPA^W=}Am>
zaglEWQCYNUk=3$;Z@;U@jzw^NNzd3er4TnqDJ?-DoEz6}vV<AO$oI+DTk}}FDDOjP
z2%QqEQYL%G5V~!vZD<)%EuGD{a!myJZC>rOVVWVBvNdnOPiU*+PflQ(s7$R9eDqg<
z28MdVg29%nj9WFn`i;8n#BMYEN1gP6tWZ(#j_lE}Y5mi*{s;K^=YBeB|AWOiDvtPf
z?%cU@`H!6aUw7|+g|)p6Pyi<&U&s(1ee}`jsX15yf<yz#R)E=H1rP_p<(J?1!g)Cn
z@HKaS-G2b(lrnJv`}tL=+)B%szbDn35^ODj8YvhTtv)eigKIer(GQ_&PJucMi1yTa
z+V<L|UH2s0vTGZ-YA3{|UD_XVL>L#ccCky0;)_b${8;GoKt6w<hLH!`hOzn#d4o;T
z@*9XWV?Rur9M>r0;Onrh$A0M8Yh607z8|%C%b0MUnfj3h>wMKdp@vOB^clMD*M#jM
z^h1m75Ma#UX95Kp948b&n)TXQf4z22>wjARuU>xnpZvm?e&(c@{s&$m=6~TE3sE9P
z^A`a?z5m7HT-w7tE>x`nth*0D9C<wsIDCy!0Og+MVZtOJ&{CWM^EHm~PtJD!#OMCo
ze=1f0lr<3=LI$Q>YcBOL{>GQnBQW~LlTBN8OBzdFR-zcp;?I>+)1D47>?G2{OC6P|
zv`~q8a71TyKSJJk9VJk{9y3nub0s|ZrYW~8z=xKgB`!!!AE8*I1v;(KYlkKy)XBXu
za`l!pfy=le0}34thgObZW$4|!!7`63mS{WMamDs#NrlD%G*j-EQ;2#9j)ij5&y^5D
z!xVb?ruE+`{t&N!Irsl7U-|hD4_~A9FGv5>fm|HQg_nqBSp)z%VlLW0w0n;MJlL?l
zq6@$v0L;W0@XVPr;o`-M;kDOZn-BXp^MToK?C$Pv49_;@NZ>Hx`gi2}{*52_i;vv%
zjemXrwOD;7D_DeP#<vc&ju!E=*^h9)^~US%YK^oHqR<eRYL49R;$2n$9<3%z9EorY
z_C?UTWvzO)`u+9QxnBs*w0>PVyX_fmXOCHoluXxC?utD7!rd$o6iv>w^LlU}((b#j
zdz3S+f0JX~*Z*_po_+s6`qba~$>HnHz%#Z7UQsUok-vu?dT4iewz&WP`xp7>A5%~I
zT|+n&0szN*7(#gBi6?^G5I9WIY_I~%Pn|j?W`FtHl)u5Wb7YY2HimtkBf~sz{)0dB
zU;XIOjbnebAVc}+FTNXt6?x_HjP@=X+Ep|%-baQTZrfWUE^j~F;`SwVl&-f8D+%Ha
z60hsYw^!QhCCa11#>#T`Gj4%USN3^szR)$T{~ODHtKjCfPqhE<`&a*mKRe)TWc~w>
z*cra>4x0b=rAwEh06^(D;9}VOUx=mo$tRy&f`xCbg@Cm`1_%&MGh(6{_G#q2=5mnS
zmiOL!uRM#6fU>WnCj!LT<5ypN<(=h2l->9B>28p2q@+U{h7=?fZUJee1*D_|W+(xr
zMY=&k>23yT0U1iVyN4WN>Nnr#-+1QDyq-PRIp;ciueCnM(1YspQW~@C64H=AuapXa
zpyql}V9BuHup-(u-yRZB_7PcFaDO&E*w#)+PxEF)(IA4w(hts()^p|9;#Ey9*JIz}
z7JRc-u;!3fr-<s-X6rFl4A~-3uTZkFHmrN%V7H`V(#31Mv2K+$4@L&VtI?SClZ=Kh
zo!CMU@ps@0k}bBzd@EL*!cWJ7{;2o_&>99#XqnrAi<7;nDHMK>OGf)#G5Bg1F_tdW
zNX`k$9pCd+L3mO?WZGI=Wny~@T%X*pfF23KXq1EPHMlsC`M-wOs_#-ZYUVzMqH>A7
zT$W_9*V&uk1?e&$M-+anIePnDWWc+94M&7Y)=v?pC)q`P%_uKMj1>+Lb~HzcJWCEM
z3_``vgQYH_&oCTM2bWFiAq3zQ3H>@F7y1jv(SvT9YJ%bP71v`O`l#BY25b@kGdK-7
zutW*+EBNZzai$(SiV-FUBL)vZH?aYkT@~-t;8~Nkj$u5mQzU19yV5PT*h!neUHl_n
za#JqzsG{>1QstLK*P6uw4^kV=C>_E-hWcf)+Zr*iB(ZR5!gjF}E$t0%g}jReLC~wv
zqcD4CN1#`GLHQw4V}sG#Z&P8cU>;Osm%jkcLZPp1A6Nj=;^@!j(L<+V-D-x3KZneP
zN29m}yPwWyWTPeBQC@_6y*u^uBUspSq}V>j1TPI#`o)8}JEk#Rwei2<1BI-_-}TrX
zS?pjp<AotRvtr0)%Fu-c&>dyfT+JsVviH=d!S5;NyXR*!#uP!CPxa=z%@j5|9C4q5
z=*E87i@IxuaN6jO*GtnhiSc|~F7?mw9ZgcZTC5p#s_TumNbq?<F?FtR9$@X3zgq_*
zZxK0VTwJ_27xU10@;*m0`FDodaGRo8K!P_}3uQUmZqjacuqAmz`Ky@O9tV+1R?0z`
z{WGb|%cI60m&gm|Ou*UZiC~zYTouF}u6X}ucxBu9ZJ;%Z^coOJ`(H&ysBolj&3QBG
zC)?~m2MXYSMGN56FEmcbYF8F62R(#gMpl)aW0&9&_7MG`xGeOz<_SF(8;;z6Dp)1R
z_$Qkh9FJ4R{kY>&@||4YRx!gZe$#L*dYqf;gIb+%^yxL=>W`lp1c82r{iE?7|M(pf
z*L1dos!1Fh?H4q@p5{2yW>H+>Z17K|@m!#fw6UT*A5EpvXf_eVkYPJxC3<g%Tbp~}
zqEIGf?R9Ypmt+5*sds)b7EndMlv?FMoZd<?5T|$NMCuHy-*SYZB~!sxem>qUo=I<E
zT{?D-iYtwQBpy$QR*Lt>{dzLscaV>1A>*6?z~$%9;)0PF_Kc*%HoLz_-sdNaWe{GG
zzxM>TF;x`_Y1uQ&E~5SejS&+1ey`y(EGt-NHDpPDgC2Jh$|1$A!gMgH8uW@ivar+p
zDwZ$@EjSi}>O_lakNzY;f9cj}21y_f>cM%ggIjheoCpePalbdm1O{l#J*tpA2Pg+Z
zA*@1i)^BSAJ%#Dx6Zd=tLE8-8nqk<z4=1S29`t$}(iXIPh&Ys03nu5%P<@$s29S>c
zniDIP`7#@E$449c=tGLD{LK@e-TR0$YltqB(=YYnTkW>U2$P=72ij<Gr@+R=cdv~L
zf@IIGJ*NYApQ2J_B<P|Y)<rU5j2z+T2(^udB=wt+c3Oone#LXmn*H~~i`@&)JXUo5
z>J!BYsvhU-DmKc~Zx?lmSmw>mUd+d!ZH_B?#+wTL^AKLhEqGohW)mK0MKO#I{X`$4
z(O`@x#I^+S<p6wo2OV7puincd&L|RWs|VsuDh$skzU~_1Vfmm!FcUaPhX@a21JFB5
z7|ZG1Zbsjh#yMe12-X}4XC~gy!HPho_+9?EdLc_}0CK`8bR#lQj9&BaHx~rpC2&7v
zhEA1);?1#Gd}GuzEn{9I@$1}IymnSPm6vmdE0V-l1FxS1Jwarp4;?&zncn;5S4`2-
zt1;1F``0ne+a@`$W-|jCJaJ(y3jtEj`9QlWa-Zmj0mRXBhgMXDNX~zH&+uNN-J2hl
z+g3L%z*VKNKl8;m6Rt7|K~t02<x+TgmCv=(JdK2u+Jj?!%u2kD)zr_?t5#CC-Ww7`
zv0`$3!1EL4=Dm4eUrQ!o{3-To%JH0e^p*u$&&CGvrvkKHx0{6;qNxr+_cJ_=?$-R4
zZt{-t7Ah_YiWu0&eZ@(;uk%#}{Q#N0h5+g^@xDA|z__9rmW-(y0pX`dgW|u9l+krY
zzyltP2_F^H1-^tQ%+*Y&Gw5z|2m7sR`o)Wx|C!I4%wiE~5M9?5d7bkd88uY=mMrR!
z3eQSEtNaFkMARwHBK)ZBJ${4*w`S{P$&;Jo>obxJ!OUj4`C`%C8Ftx>DIzHk8xw7V
zA=ef>fAQwnV~mYL**s>N1$UM{Z`cl=bTpQw#-S`B&Adn?A+4_M!aC927xK3>Sx=#y
z&xjglG{a#fW8np77rm}V{$|Pn%kzmDN~z&st8GQu_1$G=1^?6mp2q956^R;<0^JF^
z4D))BY8!TWV0eCdI@*h_^}rr4JVnOu*d{%;1z}RN7{t0S`hD+r0>SXL%R^fe2v>wW
zn>v*(R!997sqH-uIay&0aYlUPHW|q^hcNb)KOZy(EKWpZzsO7!HOHWWLO{Z{Fw%|W
z4Zo57OLPObNh&I$kCRJykeLy;mU6<<EY*4u{||C!vjG47*j;?NCBT0Szdw&7W8XfX
zYmTry(cB~1=JOLsJ-bJt!`JfAQIlqI*`(1^12)aGtd0g5H+4&ntySle%6t16LfKvi
z|ND{NGXYhq7P&YFiXG>PA1Z81lDRTcHLS@Wz&I`H+K1n9uVr#FA0LLtSXdlpy-NPc
zg@>JV=gy-a>(EkWHb@r`loovl;<f;1Jrd5MYcYwwT8vcadW^zVEXH$tFJ7#du?3xJ
z3(VfQ>%AL7f)>7N;KsfuRaN^dt>8n$s`FGq;$$~)9HZY+{1p22f!_;%5U@mX4+X+w
zL9QV_XYtrS>@=mVc3)H-x(VB`yyTYYu7f1fXD|qa<gsX|1wie7GYYa6Qm49jyMB7{
zGMCloS+rY1eE_|0S_Kt(D<tcY??2ve9R{mRWO)^$w1*;2o?<kXt%Gg(E6XZU&K(rB
ztNpDN+EmvST+SbqbXjy;l*aI;Bd321$b0)21-x0-Ypq#b@;#j2ffEVr!ru;4qSY<6
z1><ShvmVGzQx22L1o@?dTw&|Q2sh-wZu*BB$m53fc`qUm_<)ApX<c)$YuDK7qXz;z
zgg*p|!^H+)S|uguVaW@=2qF+iX^=)x2s%oEny<%W<?t|=KASuohZIiT6^?66;)Y2c
zwru-4EfpHI$OC#jLFap+0zs}0t(X25d^xa$A4?i(Q^6GRHF`QDMP?bGx-(U7ed*;G
z<-&(Xw`A$n2VhSML%S?#=0Vl4M>V)d<~Q_(JIhgQ`N?F+B0PkMhr+1pvFUw*0#sw9
z*=y#!be%v}F}N$^tIaZtYaJ8+=)vE8g5mu<R}k`i%kW^S8RZ6rvDM#Vj4CiBz~}0~
zmK85N-{WzEp3BY-p?Jk#4)Q^;L`SYg$Zo#r^HG%FSx<x=s}2-5V-bZv;k3Ys>B%d@
zBb=!;q*S_4ox9gip%(xx)LO?Ms1qtJu~F^sj@aCrfx?))$n~dOv6UlV(=>a&m&Uh9
z<tPk1cGLg4`mjnk?b_@ZdOpsSc7;^5eKUf0bTZqS<1=)y*6*IPzx3;d*D^Ns0s0>k
zq~MIv%>;vD{8+WD`-axzqG;B34&k}8CG~Nc+)ttvn&c_N4FB~BW=GNktF>TxVSaIc
zKI`|6SqL6~26LD_+Gs{uqi}ms49)HL8vx`EI%3M#{=Od}_!et-0{vl~!ko;qfBFPu
zhq~;vT~e7WV|~9b=Ix4)g-wk!7b0c5HHu3!Xo9_cB%c+Q;ky+T6i%WZ_f`iMgU5tt
z58Mv~+}vc^gupoFaonCy&q}Wizsh8^W&l1R=a#5VXV$?L6`?7`FnOJ!l0}9c5&EEx
zrj3qo;c)5uYHe5CrJk7v`Qx5}#u1ePb9o-JKT>H8ouS$H^%Dc%V9}BUwFvczmFE3G
zue>{W*MQP1Kc2b$e)Ap0<mNLxo5zUSc)CFRC+pfw%KJaS6}MjF@_i@hhlPTNSa87A
z&ryV+*X7r5{9q?A=8qmavE3dVg<bUVWW0OKECS#P4`n9QjsMpK^sdDGSUQ}7plxCl
z;s5}lxlk)n%vTH)wYeAXwjhJ-vLD7h<;L5+NQp;Z&4TU*>a2iA+9amw_@v-2eeN<r
z?dWa7mF~`ieYUVpHZDC6WF*<5%w4zRpo*%+c^On0LTR8}%nMAtXmt7}?<EOMd^N4V
z_(H%y@8di6$v2|zU8YI6{^QR3hSpY(?^;B(Swq?*!bUpm4*?-R)vlvkMG|b=d1F@H
z^a^0LFE^Y{y^OF5o~eXrkFHiI)bZ>|hyC;IZ@<uTv&mk|`sQRoK3UIVWab=CC{xHG
z_AxSd9#A**2zuC#2V6ygVTEDl)g9`PrV-HP!#9A>eOeeuo?d_*?dzz-0e6ts70uYG
zz1^nzy>-r}was8hv-h#@6myks!YD@|1cO=m3jVs$MTu%xX`E_}Xt0Di?jXR<FNI{s
zD;UjzZb0AEg|1mABb)|r+pE33zMVccyHp9yw35&%`9zbC-(gD&8;nMO@A$srp1jX9
zVM#=#A@6wNT`)1x>)DcLp&hQ$;3@P^V%mk*HbAElmnH|s6uI;GLK%9b?%}=!R3cs>
zulbK((EpgEgW3s&lYO9d=B$J3l-*no%woGx6xjJ1j68OddsS{|jd%aVu67gxAaix=
z%wPCw3p00Oi)DupwThXu|9F8I>^3O$j3pi+1u-cdHP-?tZ`nG0o*wewythDqyqHG|
zWg4MI(bs0Ec}n7I4&ZQn*lx~-+V2%6x}LQN*hmf55&^sL?|96+x(C<F0FAOK9@dwq
zdjIXyPj5~AKC{EtVJ{shyvH5IqXdn^p<qy|vPxzo(Khij_Tu-VG_h0<1*xu=cPf`{
zz{9zrG{7&X81bs;C0DWtr4GATjjynn@D$yV+Ys`c-${t?W2Y`-HuNDx#VDC1OKc)a
zyaz1)ie;Ep^Lc~=3*&QjC0op<`30aFtUs!n<q>A?Tu$^{`{-F_@IOS0jW2VUWoyms
zKAQtw4&Zx0^gNfL3wTqjXlxN@ZsEk_1FhtyI+>6P<La~dXla|<Q-wKK#!sAdG|fhf
z6pTbcUOto%I1f*Qz8HA)<i}j1fM$Wb+bhP8jROyrXgB8BPOSSukKx5SMX$AgcM39c
zwb2MK3-sefUk%zM(-3vohX9!?q4qQxc=v4&A1?&?`%3<qWuY;$|IY88TNKxT(|I&e
ztPK`p_@#N#J<h0Rp`4F;fflwZ0(+22Gl}3U;Qt9ziD=oh8>#9MV*e}I{hk^T)u4H2
z&(gC5Q%%@J@ryr=c&j>(&i3A1F>?}H!8lR=M>t#ywR~ZfgC#UaDh*zTMw1Mf<ip08
ztb+;zj#~rogqI_PkJGHT#kx9??uE!XB`HgO@u5|Gjp#5iX(~wuHjjk+m**3dT^&~m
zMAE?sn_RN%Cy#1h7Tf-{M7)XT;a+tSYr(cLmvW7dRUZEE*zYmi<g`rB|0FQVp1I{1
zYd(GWS)XarH0(L@&S87rmbzCx-f#9XR+i8e86OD?EO<n5l!WFfDxyT@5QTS0-Z^OP
zDV%ZzbK;oOsgvL3j!PHDGJPP#E(+&PQotMAutR-`47XuX^1zRKu39#9pPee0^9?mA
z7KWzm@fCcFL#KrM1rjb^+WKQgf?Kq;W>+h6^t72z^k}M^g&DR$lbAP`@RFB)mDl{U
z>Yl>0R0p1j1r>|PszJK(**2JLDnsXU4wFKz+ZMUX6{+uAAg=WSbJ+1jR6QV)armI@
zPU*f;VUOppHi6|x{$DHC-{#ti`;HSG{(K(MYQh^*Lis*In$+D<pRB-incY}1g*gk!
zxeWn_Sf4X~H&tzr(ivFKS>S$E9kfg5w97C63+ta0>gTkDAjb>Ps=1(`MTl8vkI+J|
z%RUD_mOTp7Ch}5(ily4`wVE&9fkn8cJn@bxIoI>A;Maxd&>+;qflJhLRXnbTTuLHI
z5b}0Gmf(*!&j^M~2us@jtQN&t-_X;;7KVe`FGO^K_O`mfsE{&NtGQoH+mRwrq>B#G
z#%_hKu=3o{kSDuH{g0rNO$JX5y2|%@|CBthuXXfNxDv8gau6Lj=+%X;U#KklapKUm
zsL$}Lo)^`}w7uFQm13@PG4%ZTQtQ#(ksbOWLk$7zL0bGi2%LNNqD{vZU(tuS({*Ix
zHe0LDn>BMZ@T#g@Z?*RKbZc}5Q({|)1-i`_304wtL=V-WYB<<?U%1x@c=-M?>b>;0
zq*HjH#rLA33czTudDzii+f*vM)Lv+s8~(dJXR<YmK2Im?sW0g!*jM}+SYIf`mA>Ao
zU}o9_(=_6UBo*u{S#flGtaR455`5LWbQG!%$v;hFN44%Q;0Tf_v>`poI`ZvOJFfX;
zD%A5+n!<oc#+t~%LJi%qaC{+~vom#bq{moAIUd2V-{mCj*lZWZeg!sQtJGrN-x}+P
z*!wWC_dPDy?!TpA2%-%5{=l;ENx42Y#vlKp*KtS+ozs?Vv<9R4K0P7W1#6t1cy-cc
zKgQ(2Vi0D9D{c=~pN&mL9O>1TqC7~B^le*)=ZxI#v**D;Bj^gv@NpN@iEXQ6`a^t1
zT{*fN3KsD}eNKvBvdsbqA;x>byMN6vWcF3V3ERz98Z5~yV&K&ue#~;jB!*rJS7JJK
z$qblmY%8nY(6`+$rW2ZTckrH$^kHAxbY7%$n9P1p550@2ulw&qv75~zf7Q{aY|q#?
zx?W%A7S+pA4^;6q;H<x({Q?@>s6eS!R~)h(r$p-iC>200p`9OeOf9PGe(oxa9jCsw
zBf2ftYhs4Y;+;E+`{iU;GK4=aAN#9TTw>e!kNgn3vo>RU6i&#(Q%aMx<o@A1)Qm#<
zRrX>AN=WU$Xxh{VF-gy)1vo+bu1ipx;?&jJ?_Vs*aN@)T%z+FQ%K@qIO}4RIGqhtH
zx1sv1-n4|SOKo4;QD>1m7cbcnr|d+qM7<K{XvN;g#qnfS3=sZ6NCwIsST|lJV5EBQ
zATIBTXaVhbp;k6NqT3MKP2yedZ+gfRnYa{s4jf+Ir3Lz5{VU&UW23?Ev2Ra515XwM
zLzYwi_P&V%o$UB1??c;6lb_p4agaSKb_#F2W2v~^h+r{C`_@4H>cX}N(SN>x(bQuN
zNG%E4*fZ9@{(*K^ic2@(|MgqWRcD82IKQsrWP=fFNjcztPeCk4ok;H-#1gHMlG+l7
zY5|1f(_!%vrptH0iRVF~-WQtkOqNh{MU_i+2DC5$+CD+|w;_f=%@P~cfrPJ=P5ty<
zJbG5QEp_$23A4hN9V_PD`1XkGc!>;E{2V)0DfjcwmcsHo41yrPN9%gS$qwkfp}qne
z;?(Bzvp~yo1kfvrflfas1JT-ra0bZk?Av{>qUn+cXxs?8KEF$70u;oK3x3HMBo753
z=!e00`(j!f8904s?f6CEaDb1I4gx#oEc}p?bH7B5D6XMI>nkgjF3-y^Uj%KQrD_mp
zZ-j5i-HwX^505y|Rgp#SS%sr{q~9!$?8%#GbTB7IS%0$p&?o(u@26N{4#M-(ol4!4
zBvxbnv~jD2U%ny8lkeoO?kt$T)mW6~_xjZJrZ(Rjeql;d-pTT1J5c30JDfXVYwp)U
zI-D_|k!C9l!~PqAfefmyln4r!$~-vd9^H~H5Z5RL#yY;N)$WpK)*#^NFzS<5&2UFi
z2=|4D1zLKh_n)WsVGY_nUKVr*M)#xd>X0|H=u<Zwknknsn=+{Q?--(KpF<4xdS7g%
zJF28@PTfAgI%ijdysZg~Gv)9i++&-n@<0{;HJ<%$<!FcB!B?uH^&i`e0tvP#X~&Bc
z&%0T%FvRX18exkHoU{V&MTNDf779jizGuKGC!X6#q1d-PO!+2yNnze=kPqxUaA2<Q
zjWZyVE|BAYC&Kcl?Y(KQa<Iqt>mFV*QbjIKOpEBlho~csaHXCHyZ2v2J5Ws_2~7lh
zQe6tTb9%Mbi_RrJ>R+0eDgmbS701ppb2akO=3N2xKM5}@wW~2%fNY=6Y?aSFSeA@f
zMq3gq^A*v`a^2V0v@e$TR^-+*`>`4(oongJZ$YIe=zl_Dpu<?;4iw#@HmxA2jeG@X
zcodR;o9q?8UkG^=i+&jQ0Tdra`I~j4eKydo&yhlSIYq3L6I29(5=sxdX-&UrsOl)$
zQw&i1QR3npKlCnfvut}qgB_@7p8j`Xg}{jk^;6~+*Z@Pj02ff;fbzzDPm{Rf2X|5n
zm-*2}mH2q}0%~^9l_s|x$Cbrk^ZWABtDXy@E4N~+qO{+02iFguJl0@cUW(Z&<pKW!
z$ILY9k8**M0dZX9G(RyOqBEn8NcR()u=BgJFk27<<Z5$PBj;PwWd7^JRO+0|pM?Ph
zAr6I%rT$hskJL)Q;Lsnf-G%_134GYlPNDM8-{qBadGs5@Bg#Fq4L?-~TJ`_FhpY2>
z;jTKAwEo?3AD+jIu1b*AXf>ktn|}+1m{<LU3RF|!RAu$MU^bP~CXUmGlQZBu7>0?%
zLD!CPO0%GLzj0vJ)0Dpi+;2EhgX07w*4`6BnZky<M-zvEbMAweIJI7I*)^DtBC&I~
zX*m;YxAAsEaJM(Yv#yjB2q?e@8^{p!9Q45!cw&ptW@$>?Wk_M*z(ql|#+GGy&SF0o
zz%{sTGTwQ+ElF6~mh+ppJoQ)6z%eqfg)tPo|G=$x!NmM4A*zxF|LK^+*^JC+7q8mX
zlfw$$d>En=X$_AZ;9zZNdRUWF+-D$&VsSZ$)U8KaMNQDyfm$f498Nu1BQ~>`_OHXd
zhRe-*x4*IUog?H|hb3+uD)Y65okk`kqh0ZQarZvQTht8+R<mR?Ww^tdBq$3Mn6lvw
zEn4}#h9|b5Ive!=5|_>gAfXokRHBJd&(q>C!E7YM(w3L@bC4tIp&E=hDgf^~Twc=b
zQ~v9E+pBEvHPQK36%SjUiBm0-Ao|Nk77^@iMmeQiN*X75HVnTjJOxuC!u?rniF^d|
zFW?~%-BZ_fk_shPj<k8!3Q2fdXYVWXij-z=@@v~hge!Nm*4*c0;sgTS_BfGZDSg81
z!08g!;z>#+9;sUsIJ;$3LZ%Y#3{Vks&k@^Tli<F>n@S27v^{8RYwXPH1$yl>9#nQO
zTxmoxH1s^=|D+#Yl`<&7$RwzqgV;&qFg1&2Etb8tThp8)-Ta=m5UR=hD$P@tseGF$
z7u85l3tuIYEZ?5&JlrfxW_td0gU8*@0u8ssOtKY_zbSz*ZO~~TL^;f65^ADw%s*j^
zsN1#JVCz^if+2W6Rk5!0LVMxd*O@hzx;bcLL?`PGe@+%CVr7l6i1d>S7VjU4a}IL2
z&U@ppph)k5ic>{m?sFn_=Yu}B2sPuiKN0-&=!X&XaPt40y7wF(#A<tJgou!z^nSDn
zf6KfEbBVC>vak9~5?}D9l%?EGeBg99u6w1stFklHl26}N4(S^D)!@JqF6fPd{70}i
z*SjEHy~>HDW*nm-Bd-6K05A2lp{DU<%&}FGJpZFc>5fe_f%ARojLXwse<FxUiwQo4
zjzM3I&wm<I!kM02`-Plvd!C;d-{+X^DR7JT>tNKs-Fs5wh$&rOz>l1*Dxsve?~}md
zL8Tx;j6!=-)I3Ks8@uOK`=Qda-W>Tq*~se|4#ouq$`)=itRM45ub7GacYA3p5u%Z?
zlB%-3%9s&n(1REF>PwmtG8TPgvT;XAxQ`|%QwH9)(ZsqJx2$0Di(*-?SN;^Q@QBcZ
zM5Yl#UqbHLvO(+Db)I{Z@46GYrtk=+IIuJbFo8XMO1E<+eybF6A+TyI9${y6b__ZD
z(-~LdDR8L1cnlMLRo@-@5BRz#c@;^%L8|U+_AZI4+R<i3M>%zks$iHllnfeW@a``(
zC6Sm>BmFor0B7HFgU>-!<jipv|9EaFq?v_@u<P=A<BJ73*#Xwdhy94kf6hon9Fb3&
zJ2DSeYh^|bNHzNi^#Kp>&>aU8NYDyPx4ZYG180}*qO*(rk%6c<Z=JQpQ~s%&8cvr9
zR4Yrbr9p7>d*L_>k$l&(fWq|yUuzYUayPKhI}7yX^~9bT;06MOYXfgS$3v`(b~h`q
zff>6=@R#8-K996o`XBt#Ya%d{ls{{tsLH}-p)r9rI1ofxPVLk8ubF=2MJw*ENp!^T
zA5Sf@x32can=nW*d)Q_a<yY}~2_oEMTB<OtM%zBCa|%Y4(pk>EGpq;rWGooI1-=kL
z!_h`J<cCyrdZ%ZRcX%cC8RsEq=94t6)2CDi-+<BPnPEF`G(W>3+Qfi5(Gl9xTKRv;
zkYBAg$K3DZe1f08N&GkMezItl>O_>-7wLHzR&n`B8~#sR{*bThpc~?=xR;z5@AA>w
zidS|-UYW$kCZ%B_dKjN%L?BloL{3whyM3<XO}W?PV3&+^SY+>W&mMyX^uINs!r<d^
z^dlh%HO_(J<|REbc6c23l6NaJdmNJq7e{p;h;8isFjYDcEBjx@+*_TL!o}yc<=?(M
z+}Sgy`@}i$$l<AnyIxK6+oeeEEqSD<97OSQzP}$y<|uU0>r;Z`#~Fna;rOa^S`ZM#
ztJ2}bNH>AdE9l2Uc-S2p1zpSY0(w?A43L>g5}E(eaeUn=Dn4A_|21%TEbx|^OQM;q
z^Sj0z?~ggB1R=%~onBKD!hJ<!BZg0&WrpjM2&#@A^UT1B4v*#@wti)qjM0>!H|Wuw
znK(W&Fp|A#`t+w~(@5$GJJhZz=jjKlxz`1E{iUiq4$JPbh3<twK>a&U|4W(`f0i0F
zuZURB{bMHRlp}N-$w4=(%zFm>xsJLbTzH`02OZ6neO^80-wNaH-za*B{|Q(>&4dON
zQ-&RCj7vMW-ARCZ?4tM>aifMGRu3gNICY|)ha2X}{l@j<m$#8L#|}!-45q?~b`?!#
zQ>5p8b*Y)Yq0uRjP7$dptc@4J@$eUPAx7xl=Ykr9B4<J8TU|h??Xe6F{m-{`N$QPm
za&7S+qO+bGzEytFFOt|Nt@J`ClqKtNu*k%j9z6P~v}KRe=jM&+FkaXxwK;QyO|CVI
zyittAzq)s$wLpW}+}YRV_>jePso}n{f>7Y1oQMDMr8+tM?)$K*7k>h{h5g+7njL5d
zO8uFf$N}qGaA9U5-8AHy_(iXq-~79^LaFqt-WpL*PXnvFWrfk&NUS%KbmxD)MUF=X
z@bwM{Vr)q4OBx61EaU*rAm3-vJ!yt|+F5vdO_{OKz5qaAoK>O2i>NEXt+3A&?!uYG
zFv1AFMM^wgMrg<e+!=E<IH!nP!Nw&2Z|(j`dq<uEy;fsPsfy+`?X&79^h<go&Ur0t
zE2tEehI17LJEIJ{gFZ-Y+#H~1!baHdn*t=Bh|brBI(?}2aSvg2P2g#v6LSl7;8Arv
z{aP$imK*w}guOyCjLl8-wV9B_uaC3Jp^L>cJ<<k>R<WOQe^@7N4tqZN()cTncdGK3
z<qP2o4%uX28Wnjc+|6+&^610+;b(}er%sNBL?f{;u2icdYqRl|4>;hBai+;#Oi_j*
z0;;!iS<e#_LNL!I#R+b>akh+Dg4kb;R+N$Lr-wh3!1EEHJ|lqF6QFRu=Kw6ZWJm^i
zU2(wRncGKVvaMg8ur%N*mxHvv!+>YDW#n$_cTy*9B6vA=wte0aDM7^hc51I%V?-i4
zh_JUg<#Z}pqMqByWL0Sr#U+I^OR2Wv33%dG+J-9-Pofr~VbFn%Yd8vlX{DT^08+hb
zf9flhq>jZ2M>c=S=(L>19Hxw$oxK0zn{3ImW%=?=WrJT%t460XW8g}i4Tr*QV4S@H
zO=Xp@zem&AI$#3-LFJ4=>C0`xl0^0k)p}3Qndf0YE?pdMs~ZR4cllhh2`xgxa>D~3
zL_gg=!m7X1L}EM$dH<ywxCHmQGz+2&9J;?T?d%LRp&-`E@FoDfEE%!r6}sn8A#k=J
z);J8FhzJ|A0XSi+GNOw#MKCA+`5GyMsPNE|(FgaTc9d_LJAWU<@o|IyfAuKB3Fpj`
z!}}U2NP_*2oKYI#y*HNrP|yE_Ndd!BTY@sUs{+d<a7zX@FXU;}RI@-Z$~Yjw2I|6&
z`aTRZ(pqovhqQT4%79zzJo}-@<venYvFPaGtC?QnQ0vozSIOyG(!Zu};~t1Kn$}MK
ztvM(Z5P!_lNVZF+Q9Kvd2b@0?DiecZ<x?>`NXRUtXF1;gV~2Q6^rp@WL1(X+PBFyk
zzqVwRk0SUE`krN~x@Yj(SDn9V9IiW)1A=}JOz^5P5rU9n!9GXn@%)(62y`$4xLL)Z
z?{>pbqlljcCt_g2a+_w<mRI(3^j;rgj{xi{h+)D<%!|g<n7`N;ySWblFF{{$PT?((
zuXY#)sSb;0RrX_?oeoo+OEjEQ=X=+aiqM1=9NZG^VZaX)_delR>S3j(#Kc4aGQ^%2
zx)*SF7?#`vT|^u@=o~6DRZa+i?k2)r@u{1%m*Pytf5k99lnAT2Ez6G;Dd4s9OG_hl
z&Z=%XP1fV-@y7dLGn34b6>i2MWIR`Ux6+rQQ5=M`+Or>yFY=U!nXZ%7Dhx`mmz;F7
zNKd>#@EzY4j|q+mZQB5=-}Q*Qba3%z%DB^lC|#$nnOXJUb<LWbJHChCwrI0}I>i3b
zcm?o*9}^l<C?CAlK|0V{`Sj~LvgDT31Tu}lNadS@pAdbl0|_3$hsHrg5m$M!o(rE@
zFVnJw8RyLy!S^l(`zV$uRaz@qM{HGGg2Mo;qx61myg1uFjNe<FgX+yYBD!*nd~4Nb
zgzcC?oFh8?UX+9B`z($Gn>p@oW$~O8yw)oSG5~@#0MMC9Rc|<P#JlF&DeOn<MkPnt
z&&V{Dk&)^q+?JhGSCo9mZYMh5oE9_2h@N|CH^E$1h^sg&8Q`l;&vs#&XILjDeq683
zf&`9!8g=URTJaYcP&%)+f1ieo-zth{w-)|4(bjt!^HxIk?7Y699JGEVq2k>7UoJQi
zN@bVSb)RnfAL{RpCz*&tljl=ANB`6J%%1-_2PKBvYkv%FR^WjS8!9fdh|LBHv?cNQ
zUvR2^u7z24qlOxwbDtRs5o0X2j}zda#}NRJz6$t>6yP8XwRYOf@Em=*RcC>g%tbn+
zwEWL*@iPe+lv^kclHccVHf2R&SlRj-05_y8P6S{3YocI?pj{s)HQrO3K1^0fKc;*C
zEGLYZwb@E^&qSR_+dW*4M0c|YyTj#}G6hh|%c)5Ei;5F6z)7ubzbS@@K%cdNW}`M9
z`IEeQPF9jUSwqOa7<jO{nkeJ^qmk#0qa*gAH76Kj<)v{#C#p(#5*OC21aoyYzU~#E
z*ZWnl*rP&NRy4IR)u4NWRr)WOpa$yBq+hBY0nHR5JQ2cMdote`|8Mg|xcm3rA7Zl}
z;Je^r9!I7aT{+n!dq`7v)s_jHNitzC>rBA@oJc`uxIVsiyTtc|M0dqP?8ikn(mVWE
zWw2r~-H7RBt0Om6XXpKg&*Y&lkD+M`=-hbOLdC$nf5`y!qaXNT_W*UW!Rk6|+#L4&
zb{qwqzc1xf&Av2wem4UBb!(zRptDp3$VKora!{(PDs6~40~1d(W4t6Ez)y&ze_P@}
zSp`Pr1CmwgaUxfr>16+*j0}Gy)I3pzQDz-|!PZs5F*d!VOlcJOo9{sz4Gq2T0-s~d
zO4b{&DK^_X&ylus=4xE+UV7r^>a^(zrV<0I+Q0B*kO7&a<_e2*Z-`%ILiU;;Yk5~|
zWZp@=2RSb_hT~BZzx&txWm#lC!|oC4;2`~s)&Gn4Ibm|<9(eYv%YBl9dMu^f?q?~D
zL!?qnUQ95Zmy!u{6#KPTw&&+JVPa=7u@3!OZ6bmLhYH>?Su$W+X|oy$D~+Dc;pMhp
zAH3|3hIT-T^XPNm%d5|43!symJu~37E$9-nh=mw>KXOTh$`AAZ$#^@9k%Vw^jP0XF
zPv6XDLZhx|+wgbc=&ALVTXWy1dzDfrmkEKNhGiK;>bt)MA_iILY1DP@DewP*V>yON
zi3V}a$AuQhneETa)h$@M!gBTaAG9zj&{cA8D?+BMJZKB&tScI?XtL9psF|3uKQD!~
z<*8q2aJeF~l5-Gy<{L6}h2tk9pNVy`gVc!}!Y?;D**VC-;UCBs{oVWe&m!x3AD=lt
z>g92^M`;y_k#1rY^Cr;**i$-TK}0fe`W5OIf%03bOovX3w!KJHE)+Fg*b5&PgRb`Y
zpS1wGQ2n=EisOa6y{w|+oQ(Afx^vECKxLL5I`Y{iw|9#UInJ{PO6Mz<F&*bme`T_Q
zXeyhoL}`NLm25qYjWS+h`@aGrMz`F5i}A*POCMh<nf_LG(0Aqi3cLfMBv~r5r#aiS
zsiFUsBC+0pYC~{5mK$8PC#W|YHoabNta$smR~cJD3`hU50)T*kf#^+m2pE8bY#@+O
z6igP38~}&?9ie)BjmWlwxIV^e116ApdsUkwP%kiY{xkY-t}NtmEBP$!KE>!Dy~Zd3
zVaGv)zS{t8w840{Y$MUcb`AZsYPq2rssSXY^?F#kYMO!A6k1s0IQQ-c8?@dL5GO1F
zTx;Tsc6;i+Z#ahZKi=4t;oYbm*(29CRf(L6Y<s!^@(=(1*mffq0=>{zlU@8`>!CZl
z$2DDM8Eb#u1PZSXYmxL>98Ng1j(i^6k!-;I9q@L`SM7IZnS+9fOxp9)p7aQHi|dVd
zn<ID^@5_5Vgv>2;Q192cYNW8`%wi^zh$>szT$=JmvTAZYNf8dZku&0LhI=wL4ALPC
zBZuNEHtRvvK)~zGQ<~e=u`&H8vOnW3$Jz5?Z;?lVl!kz#eMHE{DgcRCKXTzfk3-R0
zX&b0JFo^9XOOOtcZ`4~3d;9&n1L^?!!eh3v@ci6I!Fz}TpmYRG3S`|O<p)zCYDiH{
zMgZiT%Oe#^&XpVvcc=jYAOc(H!B-@Gw0QZrf8mGX`P>tM#GHn0B&X`s-o=Tf(G9|?
zXiQ#tMMXutFnOOg*5#HsQB|K|H4Z5q4iU<mE}CZ3FshhL7?J2MfcxP~AM_PffLCF+
z-_?N^`d?k0Z3pn`s>N*mQ1^G@2{`EJFN1B+A2#wjn~ulf^tE&rv(6{>3Q^V+f5%o%
zdW^j0ujEF@ag!ZXRQSxr=yYJa$ABmAT0ITg|0@WvO7RkuVc3(v$6s?g%yRtX@$!hV
zIiH;{GX%Usa^y4vy_Q$si1=17O51eTB)Z5EfDJ-#1r`E4D2>2f=(8TU%bgJ@=pK0l
z3Vpa7v0XzC908E$&~tpFdVxtar`>>0^1`Lk{4EpZIs(Ydfe#LX75W1|)4qlb&H<be
z-C)0iB(JmCx1iAeh4Bd+l*IHi{7vFwy6_6Tk8_LMi&tj&4Z`8cBwMTD|AXKL!eO33
z>y)n%=pi+AbM{65wCvbh?Co)^|8VS>Fz{`h0F=su$7`){5Vdm&W4QD)ykAeVVup}c
zRcV1A5SQa#>p}4SI&1l$qvJ%Dg@6S^@jNdZaIj)^Ei7J0EU&x1+dGOQ59kwkmKb(&
z`U28*yrHJECjPSC_T!$z7R`%-4%@FkljNsJk_S?>nH5D5u&ul@F`jpp(@|G}$<JvE
zN)0N%xmeP`zPwxSO^zC&-hStjt)$2uepVv+-}sVY7n)6`@{jL8R1;w#biutfKEyHE
zkVE;C_j@@ls@TQb!{kZ!0(Do*Jh!`x)2#){Nub>A!qUPB_z@+v+Y@?sm^(WQ-v~wC
zQrhMLZ!gA829XEo$B~c;8{zP$Ra)k0muLM-(LJx*`SJVelqOdvT`oncaa<4v5nc^+
zs!f2}`~&jwaD{t(YZnYP%Vyji>0b|-)fU7uqAsav%nuvq1opkG5Bvm>#i}6cy`8tL
z$7z@6IJ$@^wru<GkfHtzhi@z*UkF$9uvp*wCswdu2j|$0+^7jAbV9?JO23xR$ISoh
z9{w|;$?N}obnYmId9NFKA|--;i9e;JDjP6<i#VImyVmOP_*6Esj|`0#GD&F6yg&0X
z&(uuLkUegE9k}n5^I|5adF89k%<o5+oWJ`SA)YLrlrby2$_7`qJ(xI^sz+iZpTFzN
zFB4~ypGhj`2u(jOcUrT!8GNL)FtJR!lPGmEzMJMS?y2xhzfN^AnBhqHoL8~$c~#3F
z{B~m9!1)Y!D{q7eQzX6Kon#-4LrOKal{<@xD`Ykiik=;L&o}m#bszVjcRo|`@}ElK
z<-i|k6GX)w5uW_vY+xPOWN`p8txU!c=jw6Ch3j5(sk)4WWp!?{x-QBP`7sLPOhqeB
z+63=CVHN5=DR25r5p};wg~aa+_B@J$q!S2J5Bg%$Q8FOvC2!R90ob&9*udG^ay@)I
zcFeOHoz@`SuSwun=EQYQn3m|>`lTWCO;RNFw8GZqg`!B6^nNNGwP%3KMcj&GEWz*l
z-vY1M@o}zM)SpJ#KPcE=@PH(P#Wl)Y_~u&i2wgQTL}Kj&@QNZH_&@1Hk*Ke{`dEI;
zb4FzqDrTJN7A*BBZFo~m-CFkauq$6MLCTe_v1pwJCFSJAW8i@1Uu@H4lv>>xxqsi>
z_gNOHGa@Ykb5;*Yg{KBq|4mjIIxw!aeNbFy%YzB)KeV@cZ$=e)aa|w>obNS4Nf71>
zckTyz<hI8HVc_Eg4#J0^?hk)i#|B&67ZRF)7YgS3UB;XIk8ceO3;%aO6X@hr`_-*4
zA<KoZSgE=&q+@b^TMXC3$ei^lo;^9`5*rdvI9id_r4MgQC|o@BdnKjr<bL2*2-fGm
z{y<fi$6E10yTC1uvA%k;O`+I;-sVl>N~*;N$w=ZRb@L|VRsRJ2*@WR$eYTTY*1C?-
zh^~zw=l8dbWPkjKp8t*4CBjd9CZn%CJP}}r{2?s4`@2tSXQ9*hkNuJ`KV2Sc@0ifB
z+;?_rf>O_WrQ^CAVPWm#nHfcs9BqJ;JaxHeeTf8dkJk5JZ!C?m{`kNCEVf`hP<eV3
zvfp!qON!muI2T{!F{85dYtOk;+I>Dy`S_W?DRy=9TjTF$Q1Jynf?+c!i)*va+qdOW
zP@4Wcb?e!sj;Xu4?BVJHh!U$hN3hijX{ZHZeL1Dt`eWk{(y$yjd>J@^t)fGSip~at
z`=4zd9j?Yk1|^7r*R>Pb!1k!RC3KUT+gk{>L9b9aDTJbr+>i!wSemSaIA7~hsfxXT
zW?)Bm)I;~1c>^fg-N2OHW$rV2r<~!EoBiFCGrXaMcD&{#R<@GiWtg*^Re;cXpw0Zn
zg*ZYZ>#PRf1}IKjdesNVn(-3EDtK{ap?2-WEGec}9va;wDv4{6-Ty|sV#(aF>C^MK
z9n<X!xijJ`q>V=jViKWmSfwFqrDspdaevDf&*B=}WUOe?C2D<+;T=-D9zSynAvMSy
zdb7+E`j3EiPs635O37JWY=KKKd}pcJAXzZ4wLrohUe##d!69Jak$l)hieHOLg1;EI
zl$#U@R?@t$pUO^z#90njh&v<F!!vzkX4HzLdMA&m0er+NJQZ!=Gnc!*w}q}jtKv|H
z7|PgQg4kZso7g8h_<(96?^BJSpxcn%n^S(}tz9z;AOzb9JBUJO-^~SQ_4sNlmSY(s
zagAETL9x0!yuy3raOPO#i;9Y_*A&+u`;Vv>aW!s_ts2A;w-d-Qp+ViJ{5>4CQhMt`
zuSe+IdPQg+DKEz}ARIB)AN+j2$xm38GwB(HssKqM32uQ84BN4|aK^$|K}o5a&fq8|
zl)`x}dp?iv%5&@^wf4ETLWa;@>O<Xk7W)akUKgdGnWPkgV;x(j$QBCq1*&H^Ii@py
z3K@c?IJrbTCKfF<9_Zd?(T==~>X%h_imwqA$Irl&czhcTF>aRcs=xFpXWU_nbXX56
z8U9uolZqH_3A5$+n)<zCaj*~@B0!`e!GXhirz0i>`zMvg1mr+J{dvjcvPp%qA0Aoa
zQ5neOZRNVxSnS%2?F0Ltm=5Ih=8L_ZHrgb^DO^V^j`83AW`OH2+`H{(TFV*KeRhd$
z#8PtE#f_$-w{6`H#$hLjRU`5=CwUj#cPO_=$iCCzUnE2{J}w4Un!kgG-@4fIG6oL^
zXqb};SL4?+Gu9^H@?w+V&=FdlS$(O$kavCevi@HAJAsvfb94~>Kbz6GEc$R?E+B2G
z*EixZ=!Bfe$Uo_{1Cj6Pyv`a`X6jnb8K$nvM9IFEQ*B>4AZEcX%nmPFcY}VALYk|I
z!%98oc`|d4i+q~%)_zzMg#9EZu;z+0^>;D<b>bTMv#Gbsjb*?4#yT>I$nradXP0K?
z{J8N$6T<A_hl<P-s~pO36^PGopjepq4U*BUs)5?!W#Myj(oU<Cksvm8xnYsvdm@du
zjCWZ}Bh~DPKDMiVhmD=rn+_T1AqLy_zStx7rqI(hsM09gBURE%UE5xdi^BtUpkx0M
zu==dfB`+GZ^-x3?&Ow>+k2=3m2}_H!*@d!Hl4^xW`<r6mdHWdpaGJ-A?1Ci&PXOOa
zRMotVZWCujedh{WN-b}as9GwTmky7Tut}Avy`sNQx>%gkgXr8&y^17epd^)x+CJU&
zoAb+~7ePNfQz>g0c_vSpowCldx4090-xSQ`17)A7S&01siQE5lDIL-RRebyenCMPx
zb?Au|pLuj=y#6PtDNwy^`nNWl??uA{oN;SpEW11QwYJ-MwxsMaH-zgZV9WnQE7z{)
zMy4s0=kRS$?8}~0M&rDUez{zjZ?l2$`cD#<;^}WwHZ5lFHLAgspB@)k`k5V!ouQ&u
z4#MtF4vSpqV4Jvc<G`s0+-{g<q0Zt-9r%1JKlxrNG>e}#!woxp>P}M8R`S8PkTg`j
z&jn<>9Mk__5KI7nQ=4Q>od_T6E0*esYM%Jv7*@Z0SBw-V0pBFUAxT8+Z_p;}Y(;Nj
z(dUAymGPY&%Ou-jPS42Ea*vdrIYs^VaBD)VUx&Il1TvFij5oI-1HlBZ=w`kRzYi#2
zqJsXkihdW7J!;Nu&v-q4`EEbbedoFot}|FqWc#jMDx`Icb}#DZd)8jk;n%x2%&nIy
zW^Pi-Ou9zzGv8Q?jR^rZO86J-O~G&1E&ai#Na4Jj_i%SMzTlvZ+?LoI>}1;hZ^20)
z8U7vU^rYIrdu@3_l@@yAL(Z<n+fKTEn`6r<W^T}})9*m_$8LP+zV-~sD4pzds$D|O
z3b6)Tm8_^MBKSGO4rsHMqHOJyH?9YONYM*&(-8q&IE>u=v3+}!ed5YKf7h6=I$No)
zqV)zD^qQp42&+>cYjl&jkeeWA+0NFBg)h`<K>Fa$Tqc}gqi(*A$0oFOF`NE7Ui9==
zEW5YGLZq+gFvFamuUPXM;`|)q%Lq^S2452j(AfK5xW~j?w~_qnJ|ia+r=td!xK6lk
zx20y}nLc6Cl6x+aV>8Zq^FTl^QJQo=q4$JuvhURgO|SQ<OC`?_{sb+vGnZ#IuQUT6
z9!v{5PFIsKm*}xs770A2Ym>*MJ^$>x3MhAvA4BD7U~|gf^@{zvhfzf~WIj>aW94{*
zPq>nN%tzrr`uR9(Pzlip@qd(kwgRDF#T8@UP(`kD9*TUA5#NI=NB89EZH{wVx3^I}
z4m48^_J-}?@j4@JGz5#$JlgWuC_GL>$y&_yc7JQtka1szk_3z$*#5f+I)=qKLbJi$
zm$Q!Zg1Z;bf&Q<**)yWmFsu{oe*2_oDOyHt0Ww3vEZwQJ8tln-JI^9X>WL|~pWEYc
z;<BO*Z$|US7&Aj^BuucyEw?Zz^Jw*%!LM7vj*tr-=%tj&JBw(IKfk@iy<^fS%C&5`
z^y`~vvH))15M~e&I+6J=U+I=hAk74xqqq>9#D_*;J7i6xtt&#N-2Q>~UIi|GEh-$V
z%f1rj)DYS#v>el1fBtoJ#hf80t<+OHcO1^R*H{o&DlJY?@cYWKK@aDFyB3Dui~HIl
zR_m*$`Z6_#{jy6bQrp^OCDD&hhasKR)9c0o7LuWr;}#Yb%qHb^gDzT_#d^hek$0C`
z?o*9@wSuppTD)EZ0w!OQVpF(t=G|<jQ6+t%Z~9chgu=@b*gOfzTN=Su&N2@w;0V3I
zM$O?YO?GKJg!l+2!h{g*uj#`J*<*fEx?WLpE716^CED^1dI=EMQ`v=4+%0~H!%o^z
z%8|?at^_n9jM-EnuwP%<>ROfC<kycdB*kb0FX-g*tf`uaE2M<U+0C+P!b6Y-oWccM
z2D8$sMp1<t;)>;W;_KH;2HB(W`rjb5vsP=6?e{A8`EGX1D!F@Q_MiUV2w7W<SS627
z8Ra+%x>|s(7!pTr*v5rFJahSTv?cD`?<F(l6$;gMH8)<V(LY}{w8-d9d&^Q5czQbr
z%-BeqC}1caXih%Zm_dokX$Zijc3-9`&f2kV$&MILIr@An5IVYCb&F6X{0KAblc!w2
z+YTkQ@h<n-?ko80et)y<RkAd4xnHgL+VUP)g4w(d2wmudm#}1y0e#QAx<gg@!aWuF
z6sdf<vUvM`g>TdgM}PiZ7%{DRUjh#s%Ke1z<9PFyNR84%DYivDFG;_eq3UB0ANbAN
zeq){4gKPrw1<9YQoPhmQxAaE(XS!o?T0ZY}g|1(IbVt5mbB4U!skTwSBbxo#etbnW
zKHb`*FW*7FMBm{5X1w9Q^IyW{fB6nq*L}d=1R>LK$49Hy)-b!N{3E;Upf-ybn4Wqd
zg-&k5RkQ6$SZ!5#dS}9Ipz6Z5yhE~gXv`|*>ThM2t9CxxUU1+QUJOJD)`Pv>UEm}*
zed(v#GgK%s(`PO1vBdGG>zxg#3H|+2ov-L?BzKZjKp$5s*^4@+M^^o>`tf9>31pnt
zBz|>u%93n{`%)GXSi?@AL_;<U5@_sw1-=}7_S9<_`gW41S0LiXJTvP1omfhLd`>zo
zNY5jeS>1fLfkJzRW4GrG5M#oeQFz}gpZr$E^7g+^&7S{e=tvx9rKNO&*G-vw-=LQi
zUp7A|&bQ|8$!o}Mvfb!eE%E!TeKPBS{MyV=NeX3%IdEYOvDz<vmFctGE?UTCyP(L3
zCQ*R?J4GGD8)Kmg)fZ0Q<Qgq2*AO4b+H2{MIclmhK8a)c$d6k@k5i88KzlNng+04O
ztxKHw9BXjvR10FX+4^$WMSdc)LQK-I#BZuZ>Z#!D?f@^nk6CakfkEjltwpnIS(U_w
z)rXP@>Eut1Y(aJ7s@W?0rSx3nkUiH2-8xhCYe)HG-dfL6|FOG={QW?q30Id~*UmiI
zYi@&O84EKJm#g(0xow%ZEVezk`y64Qo1;~+v(N>jaR)6>lYwwdfhgQlgwVocVCUYj
zKWuJ@<g>b(8h#NQurr`#WwQhzAMMIK*+in?cP++pWiu(xcuB>-%@*x$LpJWTC||m^
zw;>M=>fP}xGuUx^)DJ0lsWNO*xKc@n1SyIHn%2dSfh*bk<U9U4pD9<xwvPXpc)R9&
z)~GliAnY(t5b8DILgSTr(tOK~fyy2@+%?H`PXzrjV+)+?Sy}($<6ofT&O1xGB0#FT
z^|RQz+f;7;ufN+nSV&dewJSWIFx7d_n|@1v^1ZT2K}%8CpUW(M<po~P6^5pi+Gzr^
z1sD}`?wkoUQ?GgGSHW@i2cdz9_d>g4mX^nDu8K#e^=#~~o{zb0W2?CFoB9iTxhm=e
zsj$QC0&<RfeGzGQsVC)UO~*oClHP(ag7-BQpizMvknlG3QS1v!z4(;?9i~|Se|#Mz
apz)>Q>W|k?K<-$W&r2;sO-$x?<o^NP+4v{`

literal 0
HcmV?d00001

diff --git a/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-512.png b/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-512.png
new file mode 100644
index 0000000000000000000000000000000000000000..46d1b727ffcb69bdfd4a2f87161f485b6329bda4
GIT binary patch
literal 100666
zcmeEt)mt0R^L23d7I%k2ahKpyw76^W4-Q2N1eX>oR$PiZ#a&w{4#kQFcMp~jl3&07
z#e4By?6Yz=`^?UsIcFw8S6h_;ml_uU01&9FDd_<KsQ;Fz0Bp>E(_P>q?B9grrDp02
z0N|1RZ=e8j^C|xwqWJ2mDgYX0Xixra(4FM9<N<&mDR^*O3;;^}th$oCK_JTc3*KV+
zxFKciW#|m@^fKr2Bxuf=(j_=n9<{A7G4MnnF}l$`;>L2NpJfoUanf?-2f@Z)TV6<N
zKK?{E_M4wKF&IBhXK)j=?4;AfTL63mq<CM^KmAHVQ)ouBpC=tDo}apNgWUsXR-qq5
z+iD-S48amd$ezmQ{L3{VP|;Ow<5rv7fUEIqxhPBmYGLQOV)_5S_<!RP0;f?*<fCY?
zl?=9DF_R+wz<!j!d%_NR<aWxRjR^MO<reozyHc>b?)-?>9n;%6_dh!gT4jGq8;mte
zAr)r8lWz6C@+;VWS&c1h@h&F=MF_SLQ!Z$*X`ku);!oojoM4Ri4Gs^4c2{=_zSUJ1
zTZ~f_R}6z6<c*NgK55rn2xild`3Fb&z77!#Ex#FAS=oyp>A@uMz)mcdN5lAfT+A*D
z=efEu9o7#iip5!01;XA3sVZaZ(;^yhga-mZrIdvex?obFohxp>3~_GRBcU{Aw)zJB
z2G^q56!`CyVs5PR&F=q_OV{#mzXTPBpH8O6BK<&uRo%{*Ur*Vm*ei4{5`vN~%=M`e
zdsn(7*X14!40?I~7S&F5pWYjbsBqdJj=Hm6WL<eD`W2~DkE1!zvenNR7nW;+tv&Mb
z4zIcuI%LX7XyXT1hEQfyw?x9aWrZAY3u_SAd*4UH$}kAE-?%@ZW@2ZBh1I}vfYqvO
zz4B4$1WcEV@#uqYJXNi9buI_F@BPua;X$d_Iw05r(`B`OC1<SAdlsbS_Nr46!GMm;
zy+PXy+_HZhQfK}4%OvH*%Gd$yL_AIV&uIXir~wB~da#<cngnShc?BeUg@Ui|jx=nS
ztcY>b3jER7=`_sS$kc*Ey>We*(H`3rzy!={3!Z@;?m>e!Hbd#!_zxDIgHOCM*BL4X
zTm!%FogOjKR+ZQ8jLXq|2>Csm{?}3F@dN!AwwUj8c2i2!m^}1hh^TgLxu|veT!0r=
zxR6lfilq3v4~sj`02JJ-LM~b8etHDSFY7R(oyL9`_g3iR=1*H{dJz1EcxHA}EHQ}H
zZsbI}NOt<B_?fMMJxs$E{wC-~4D4Y|cvQXj47axw$f7$fUuSZtfyIdD4=ft*b_TkP
z%f%`ky@rIFnQf9AQe=4QRm{YV$-xxw<*8{3tXAZ2zxB^U7k8h!)k9ee_@t5*zcwZ&
zG3Y=mZWM3l_Ato<II~{E)+?8;s6&F%jL+9IDlgqORn+-*`e*I7UO}*Av?Cc@L*xc(
zVR@}Y(${jXli|ea(KuntkqYq1GvgVvdGv58w0{=o$tnBtofp8CT(srMY}nu0=6d7Z
zkXKY@6X9X~C80!U-E_3K9}mail^zv-dfN+p_-o(P)uq0^zW&=5-eBwLNi>@;p-9@W
zFy7MZmfGtY<htFQgFW-5cHpGf6tLS(^lQcc1F@Fb?XCOcK=(_Vr<<D_>j5<E`eOdZ
z&9+VYNebyhcPB=-a%BE&L_=gf5M+NahS(oZukZ|cIo$bO=i};scd+8+@wGYjT4lH`
z{K8u<{6?kFAsO05KJZ>EJS~9yRkuV#8-lU(3xMPZ)O;@Ua<{PmBHK!&$W}M>S%GF~
z3BzE<9MfdN36&*UC2aLS9`EQOw|f0z`*Wr`7L~%lomu#}6TKBp<NUI-Ux@w1^^cpt
zJrh-|%bhE?)@7NL6cSCTHGWA^W@*uwtuk@yA%U{_AKU(dhDmaP;#9(4$WO25N4!{U
zs_KHg$<;fG;2ugFD}pHwoT_{#yo#S3Lm$D!GiX|TgS5wv*GH~9WZ7Y|2dZ9${6}qA
zJw;z!X2~uJinJ>OA5NjO1wA1<@Y%4-y`xYovsB{aWBKfuwX_QKNMh9t^tOBoo``~Q
zeg_aX8>jBC)gME+ay0d?BXX&L+N0MID?}0&IY;mFh)r2KKVF!{@3o~}fgoer^y|`o
z|M~7+b_kJO2@W_(QxBB9xu<vRrJKDRmORvwDHYRY`NJmAD}X<D%>T?Yxk6-v)C>cj
zkdwNSV>7_|863+-9FrD$?*Le2g(0w#ZooN%&cheCFT1;8G0DnaHUn2!e6?h#6%>(P
z0=TVvzUe1rWSqzFx96cAf~#HEp4RY9FsyMF{sd)FP1cN|Zz>*KmZ~5t;3q~$@7pL2
zQToXd!QHRu=%i@=&cwqc3Ud<Itd)GMmVerG#3`2V?9;p-!&Hb_sZ)!L+s%JgA*cI~
z8xRps_-ThnV#ikU!p%sjfL~(hU3*uerHwOnI&7&yT@gBk*C@cTcZ;57I%UV$nF8Zg
z2ytN~AjmMTjng5@e9}$OK=Y^i416UjT%j0hrXgl9Qy19gL*Y&~#py)uvE*epFX)J)
z-a;uxxR1gQvf_!16DM|DXxa|bX=?a2Tk0_^RJX@I_=FuYP18@_e|SIpakTTq)1L<h
z-}1Yy)^-+eQCtaw2!WnJVW&rh$h@O6qk@Cf*VwRgCt#lI#3W(<eVpuFi4B*H3PS<$
zbTWarv`1FNL9{5h26mW~WxfHErf{gh(HHwP=SLo5__5f_#iy;n-8Y4QDcxlShUZ7@
zY5P*LIe8R*K9)jh4JPYdqP~=YiD=8{S}5<sPLCIOCjjJ7Q4U#q^>^Li8+tpdFN?lk
zC1>sOA-CCvM)d<P0pU?Q_jLjro>tFV#&0~G@%=4m)tVVXj<fikC12`g)}Gre*IuT-
zy$!wTK(x$;JZaul+UBO~HIvz^epBN?!~ZWw(^L`}fjNnWO^}88o1Qtn5Goc7F1@WF
z1f2`tZ4x23WvrpyVxeb}^`N~#;b7|yLZsJkH+p-t&M-%*xcD=NFQGyX;b-=g0)5qr
zyu7!_O!3<;>A>HkGByk|b;j=J<hEWbQia{L446NP?o_8~Q63ua2}8~O>h}JY@4CHI
zpGl;u&HxTq+_4)Jy@akH=xq{E*52+LNYOV^*)E|~Xd9mubeIE~4u(a{-jJf=q33_u
zvSmV`&q_n4gx`{+Hkg}Jn-Dsq4%Wx<W0q+Xl5zBR7N#^?#<#(bwUTE;#(<x|qsO{q
z*JAx`=7*omC)6nmIsSCF($xPl=%-Nn9Y=IP5+-BpeZx2^aC#{h_WaWBoF95;Z)ihY
z93hUy<@r^z$LEniXwqx1y=wWKmlE&pQ?gk2o9EYrk4#?pg6`o^{FP|K(|(3&P_c)Y
z8v=4W<R=K-Um7pBhCK`*Re`HqEUbiayNbjCjB32`6J(@$j9z#pIDii9pLKj>3J<H3
ze2r(dFC#3=cfbb`&{@Lt{8x_U(Y>p)*vmpYyUMAUCgp%nvN8mVp9Id)qT^k?4K|d1
zvI-vw7@uW@*in-4_;G8ze=|+zD1glV`07-*dY0K;$iJa_`%BxKm~xm=Hf!enN<QPj
z5|!m($&(!=wcPjzy&!Z*3=ac?kJM*x2($<P&)+34Y#&<VBUvP+cQ}cu1;n8{I45X#
z(mS0qQ-Z(Zpe!#6+>eYMwIQcBTElEYhZM<*75w2S0^LJ^+!`3PZ+i&7AEen~Hs>*B
z&Y?$`|8pr1$tM<xgX8zpcR?T-(5KUgtFA{nqW%{t)zW)UIu_ZIIL-JhwdYumFV*#o
zgQN@E$En{wr&k0`V39^>C(A0e^QbpscfI|=dL27L`8{iW@nCa4wop3bUUYh1#28&j
zh-9TOa@$RgBja!OF_q-nL*!5OgetvucaY;&JJK&?p-35QIyBTel42i$*F9ZRAel@L
zGS=qvN@7m`p=p>>J}OqVD0*LTSAA#ce;&Fn-=`R{5T>c+;n=_5?Z*D?HcPTvcEw$!
zGlriGtdFb`FI#IAaZG8Mltfml4YWzEBC4;D*2oQTW(!*&7K%B>2N$_^@(H71N0LsF
z@VRY~xQVqefFo>vv1s{0N(;C#d(C0>xcMl16U)&b;M;rOX9@c>M`B(2!{)sa%5kij
zPz=+Y5-S<D25o3)NFN-rt(4K!s4w1VA%v~!C8D8m6;ohBBCo^qHSzV}BKST|yXX|n
zCOg*gZJVs%OXWgd2xOp(NP}G*y{-AK!@`Iw6@PgN`wcY0Vyz!vgpi3k$9sqoJ9Sb%
zv-Ft{QM>ngul^lxcIlY}DsaYTtr$aK(<v8)@W;4Wh){+0HRn-{V?(y!@;%Q_v1tq2
zwnGp_TMMoxZf!U@`abb@i_p)Cf6*iIP~2&@-S9JJ!QZ~<?gxuxu(<B|cJ>}TgW*5o
z7`mTJQW90%z-&RzSRy(hy^%X24Z_|ixTDJup=Yg14R_*U<+q6&kGNmwqEY4E_!~Ic
zeGti}H26{h3)cj?T)bZp1IN)z%h%nb=&ru4!)0`SmJ1c?E~G{?crj(EsVK6{Nrg4c
z1Ty*OzMjxy__Pu4$><;vf#)hN-<1-w?u#6f`y^<8M~;?|3D(${47JGm>ZP}))^9%1
zez&Kw)K$5j1__V;PbK?vq=%9T+xZW}+SoXMc^|eT;O@xY{8JBo|Iybg7>)HK7{0;K
z3&TPJ$<Yfz2<VLlU!T%02O}1t3}i|uLUa_b!_RtIO)iQCfK6Z}#2E0p&Z1^y%1ZO4
z?gnyG47{BNhQbRT!|qj=y)K^N=vhapI`Xu|3JQ$QwhNty#`eVb381DNBYQpN%^1_}
zZXP{Ht-V}2ARe|ztWXOrt3H$aX(g2d!C`_CCs#K9)C3I$wWlQ^;FVeX%;b^j7z1C(
zzW6V(pU}u9xm8_vU*y$Bp!n1)DivJhQACk>TTtb|${%ab9@m47WC%zhz34gW%K!s9
z3gYAW-ybf$;K=<n9zst9iWt_K|DYIUWNf7-!Y7kk2;>M4p{okKE0PZKffo?HV2y!x
zW)Y3Zou13km4S=0gR7{FKL>cizPE1fU%-yqh^qI2j;^=E-TygHj{Bbhv&<B0dgg`w
z&FA(l->AAV_M3GVO30QyslTZ%ANlUhhQV5O*9(;<YZWOz_dF4!{ryR8Kin>)S%UW6
zVM^|e$uw=vi|XadvidC~D<b!>X0mXMRo~S4%>K5H|80HHyWD7`M2o9z+Lnn|F>Pf|
zB2CN*x0{YYoFg7i33?8&;|9*K_aw#8f3!dF^0b0jLC#wvYLHJP{X%6enEVX59IwZl
ztNJ4{)<i}dsali!hUB1<T79EMy|8<ue$cfO;@g!ujb}(%p6=rmGa*E{_AWVfrN=(J
zXuqWl{^Fe9Qe43nx%~9$tY$o5;NGm4&_SYjgxq=i`-^pgO+8L=!Z~VIid}s$$X>vi
zH~g3Ia><Q6G0Q_Dddn>j%;9g1?li|PG}XxB&0oAX-4+E$(w+1cnpdutU_K_^v@mh-
zQZ7>e`>f?=5Bvwi69w{WVC0+nFU45SEmw6P41WtztpNfL+SaXJl;RaA)dxC_tAit;
z=p*DuFY;y&iR@qi-jxHla!$6e#3%^`ZMC{16=yUk5-^7%ez?3}K!oj)yI^4>LQpqG
zhTgS77qd?l+}E6t^sk759dRPw@eLn4-f!|$P@i(R=T2wpS^V4ywj{FJ|4wdRQ<b-A
zpZhdVimtYdV;c$KLB)zO%tu-d&%Ru)f*?ahPj{fZPa#djan}XtH@W+)*w@w@B3^q^
zmxM9wfDOR4+Y5YH^<nglI8;=&Hw1cnJll)lV*stt^k1^jl(t<holr1YC75o=4GYw<
zSr25f*aG|)yy`bX53X>u*;>wQBUKa+ZgT!0oF?VtcbPN$-`df9S*`piF4P$Dqe#N4
zWgBquCX|M}l@PydVE)GGaOn^felgybQV@1<rxSY4fW&fwZjjv&rGAMQPkzD_J{qJ!
z(*kscVQX5=3}$`mht`Ezt=sv6oE{iL;hUi61bdoC1cf-YjI?hv7_ciV1P+377@lo=
zZJG_qPTh*7F}_7@#6X$3npB-XTsj$?Wa$u6>TMMK6*Ps=E<$cL55^R<fOfEu9YEyx
zQ4uI?BC)6*W&XVvcNT!uiP_m4vk@zoY#W79at?izfkVh;6zqavu|+ThZ$554r-Pp$
zz_9-yTcAZP%wKQ}kWjqhyj+QTVO9VIN^Eg}?&n$Tw|xu$;q}xva!gl6{ZwneKV2Ep
z5yhQKR=5GXm|AyuSj}dT<97`B=1^ybMxoUc_={Z~=!X@7Jvm=lc{ywqdgUewwdKma
zZ>bJS2mi_jx#J)1Y}JMEY+KyKU=EXWEsQO(v*6r-+x9H;koOb2BV(dh&#J(ipP<K;
z?hZM{p7QW*;QfBOPIC54v%iQWjwr^1WQqSaid-Ii(+u=?lF#@WaiGr-v_Jo8>-KT1
z^v2xKwJA6=`0`-YEaHLg3sxpy__X=E;$Tyy*F5oDX@bQDalMpyQ5RMF4rfNSVqKC@
zaZ)m;GL)*)J2_|tuRy&^?8R8VN?ZLJLz}l((CZWO>XuoZP@TriA72^j1e6VwpKg2)
zW)}&o^&S9W?@W~oY&$~_^Z58cEuB_xdIS$)tK07-5RM%0hGh~Oo}H|Ktx!+!Bow@~
z4RM-5_5dGlKrd)TXLSJmkEDJ9_?pj`CGD(Ua?a%8p{SmFd)6wFwg@=lX)lH2aF!j(
zfQ&_+f{~y1JfHm?5vF8SWHHfgByr=;U*8w1+*j)9edT*aqOP^os`#GW+u>FN{%0=>
zch}w}{jD*HML}5nU$<T3JG8&nncq%&I?bI;o--cCWNTfdmfSzP!h6HRYY<$9h;Ig#
zS~A3T&wg}^e6|f(C!RUtQEL;*)je-IiDNMeuFJ;WwHK?iXYIs<B_|GhFt98+NnBl9
zKKj08doWGBY9Bv&C=X$MePZ$9+_!+BhFGqhIDLpl0sPHolDD18Oj;!g8AfOr!xAlu
zG=zmtAuo|)(mv;t$Y0=)M=%sAsCD7&-c;8t!e|v&h}&z0H7E5t7tlmB_xyepopJ1`
zdAw-afJ3K{_NAH(gy=5{**yY<xP|QR(Yex3K7)DJ^jUXT-%$S9e`PDGb@ZDBZk<TL
zcB+p^Y5tLa$W38lD-O+jNfJq<j^vNs{n^=cSiQw1q;3h+3M>!h#8)*`P4DG_rq@uL
z8r4#o=-3mhLD;5V8cyE29#-|IU!bZf5wP&;k$jjH&I$2pE^95eW|jMO*hE1-m~042
zj?gkBwpYjl<1;68C%>co?SID=jf1v-;b^K$-1}RBs;r-ciO0S(mxZ<7=t(8(=CJQU
zqDv-JCliPGIYrNv>kV7w?Mk7T91ggQuo|e2r!4T+LB3@gd6tg^jJ1m<hrl_Co<0G?
zc7G-aNa;h2P6I!47uAwF|KP#lPpbInF*rOY=_)^`R^D(%=bsC_wj~!y2ifV8<UNlU
z7KHx{#?ILq9bObmRoDJZ%<e-lx**E&TYCONLOYy2H|Hwhsa(*|tkU^aVgIFhRjFNm
z)f+~xa(u<I@ifMwMz{34FPHWmLM%cYEBKaqvi;Ro^I2+Rde$dlY4~@#i^3xY9A1g)
z_ocQ9HL}&0zK(p6D!kyZb+LY0zZ-qZeU_km3K=uU9tUc?O(&bNjmHkx@T+g<M*s}P
zH~F)RjSL*%N-8l!nixD6$GWu!=m%TwI8mhM9h@UO3;`~9-UK4V!h~o!rVf!@3t(+%
z?k!j3|8g{gwjowe-8cc^Y@Delsn}U;5!jOTi(v@z8O*>s`^LCo`zwp83LS*u>ij%W
z7S=ETP3Dt=9U%XgZrT$-TK#MJ>r0m1d`vD3g{|}w)$=$=&Y(|%&o~@O2Hu7;1l}Y0
zak9!pubEUPd51e>d2@Z(+!8TP7bf(~!kf|jHVyjxEHy*)z^`8vlr85;tP;SFZkIZf
z*M<xRIn>#oKNF{BYnK-kx=^dQbGEsDpVV0Swf?FmdUZ)iNaI_hwkLOLLK~OP9y>PL
zjYFr3ZB76(Jty9ll-@~hM|KSsB#*Y#VL<UG=&#JnlJzE*GtJx~C=nEO1fsb5dsF{u
z;QmcYALJYI<_fWkvBmvc3Y|XWTK=Ic@(OWzMYs&8n3rC;bBQE4os)FNjBl~d`ioYP
zygMDrVs!<&>^d3+BHfYaJ;`<8dv6up@NOfqItlR$buCV<|AeC`S!Yf+HQ<L&MvRl4
zgJQjU9ZZL2wsL9YFxk3M9eKe=OPVS52n>NbXS{cAHS;cwq22g+yNcW_rPcDLfSTAv
zVU1~GJSI8g(u(>V6}b`-*Q7tcarf8EXoRXBP4z<j=QCR|5D~nSZZC|T7uPMv)=dtE
z&jZU?CV8Ze2-M@%tUpAFjWrs>IGo;UT+n}pYty9lkwc2Mhs!sOLD$cqi`kdWkvJlq
zee|IjTw)$TuYW2Lk=~wFjynM*G4&$ge&1F-Z6!C526sRR0$i<sERyhbyaU2>bizK^
zw?cQg=dr5Do^eSC)nyjmqYrAnn;i-XlI>^b2dL{`Od3qFkTPhU`_a{E4la$n(rI^3
zkaKNmAueT#^o%!Krgc4ARZQ$2y>rSFiQ6-N1|OAx=}`ExlP0YB=nPu?>|NxN0?6a*
zPO=Zb4&jpM_&4md>6|_{rIp1lMEQG#n*1W0jW_Hk*?h<J8%Ts=77_1^N#6=368nU>
zI(G-1>mcD|zzJ)q3gG)A46jxfGQc64LKG@I7}Pr##}o(y-pwiQZEkMP&xS0tFc3}^
zm8e{|#p>q`qNVc=;bR&p)e}FbS|l}D4-_hEvejLsG;6nDuQR68)=7&K;9-fx#WX|Q
z@so5ljSkGo*vaj(qGA*aQsve0T~wr}|N9)q?=aQGjeifuj}`d+8xT7-bbmAT!#LFH
z4pIlF_$6-{%TGb+XcI-w<P|h^_T}GG9fzkV432e=sXKiwpJA3t`Z3bj5_w*#7l2v!
zPX>4|s66nIl>(YC8ydVQfSiZS0%vnUjtC}i#66kZY)tX{>TyXjmhti;0g}(R!t)X5
z7@mL5f(<F<n6s-QUB1G&hp}?Eza={`Mc$#U-RTmS!6ds=oK6g_X0`5jo7C`JjR3hL
z7B$g2ZEQ;;D_{I*e9h<xxxZL|ZKb)UddXLBuTqJ3CLT`k?Y&;~S~~GfIrU_jh)zCH
zsE)>9o}x9-E6O>@I5Q><y>3-doUk`Cl`Z{e)F~Wn0)}5BuP=(Cub@Z&klk9Fn=Q`v
z%^1TGOSEc;5g=Tm+I88Wo2A4Zt(904=~KstmiL?kZK(tbftKM#FUdL~AMES0L2Bbj
z&I;ySIfaMB&X)(KTrHCj7%Q`}W7GIp=(pRlkOIufTRn2;78XiorJ&~8?3Z+=vj4IQ
z*Eo|o&rV;*RNiI8*C8CMbAtDUl}$6Kqdzw@DTt(ee~U3vW<j(_P!jA|waZ>{N|ZhB
zjaw2|WybeC;#QA1=hjDoldE@(=v$@jF)5G<XX#6dPVV2z#y{H44*L{plQdk4&EO-!
z)9+y)uEnKPB=Z}@#uf6Ar!R*y$fUL;BIN!va!WN^G=Qm>O)8&a&<Udn($4Ac9X2(R
zCWUL}1Yk`EI`R-MjJP2ZZGiU?1IVxSHzf?oCfTX_v)McTyJ{kMG$N~4vSGooKHmwK
zMu`0y9|$tt96wS>e@-V9LV4#9F^R|RAonhBx;mpuxyV6N-#HB1Flciz3KQn?vFFtP
z!9mT(=l<r3pIElGm#OL?0nKG2n_PT*40D2i>~7NEJz74q(WG-3s)(`j=Avp>@4fEo
zfTDYenQTpw?q2=`O~~4sboF*%9&#HOtuqFKT-(ZIc<u)w=zzh;b0y<BLQe6#C?Tab
zm2)?As9no_B6iD4z4pbX0060=(gjcew2Ux?fUbwI#{PS7;_`Lty&o|Zycb_&sUQ)i
zlIY`B1Q_)Jo@E?V4><mSe`ghh?J3bLV!uW~V|wS%D-87y<nGWDGbZO|pLg4>0h98|
z71gCNtbWW1#5Q8N_P<BBIFjnYzs+rDngRIpCiB?7t1&9yk`$WvAb?;sDJ@etvc3+N
zKWsd$zImG*6`k&Qm@s$dL+3#)jYY#z3wn?`g}Z!wZu*mD2GSTXygjXCsCY<o>P7FO
z@Aj;LEF`JtAw@Nem!}Jn=5BGIr%Flc(EHORfj9rvb$a7h?ERD67S*6WBJp#QWmp;a
zpx;wPMXT!!R3(vz1IsRmm}o-Av*2>Cg1rY{`jQNIpIS=hol&IIc*vSo)!O#`kBu_k
z)=1__+JQNRr}SAOOjGXQX=V~9U0X)Iz?hw13R;v3NpVJ~v1j5MdAz$HP^nI<PaVe0
zR%Y85KK@9|b8PNPxr-2>*7=WN-4b1Vk^X%03`#wuR-g9(OddBBo-yz!IaoOF*YMVI
zuxpYj@%EDb+)tO!rudr6Ys|zamWNHbsP;$JN5kjZY2;9|=RABswCmPh=cSM6`ZD~)
zK1D|Kz%7O$^lrXYIkoXs@}6WI;|3z!D=up5Z1zl<cqxw1`|0=i_Vwd=2WWT1+|8U}
zIr=?WYjj0npu}z=lYL8}TA0-`WlY=@adEb@BhnY9vgGObg*xMd`mU=JeQ0}#6_8h-
z{3o-Ks0$$DEEAj8xgx?LY&73>;GWBoIAHpiL^DtB=%PBA25oxV?Ni?u1XsLC&Oqz0
z!`fZfjW?qu+Sw@Y-W`oeJ`#M0(JiO@X&Yrup5pght7LVrbs4$QPwhWSD?om{CZnqZ
zp&PuT>H1*8yL32zi1e97Ui7&E9~NhUF42H4z3lv!L^D|5-u8@p5Yfp`PJx%Pfo)Bn
ze|T|TM;ZzaJKDMLL1d|aAf}gNsgnPwT>2S`6J}?kaJ)w#%9!~djtx6*SlVgSJ0G0f
zox9a4_2l4YnEiX-p0oQAW|dV6XS={**Ryn-!PIZPM$%<T-Ifa1gGc%q?-``5EAu_O
zQmof@VlcoI$BLM1af%A@%r#4}q4Ti3IH%AsfCygQoqwN9sPk=D(XXG}p=kU>$a;se
ztSf2CcX;pQXu0*rJRk!`1Lk5#z6%Su2I#<e;OwedXK-zedWi=`;7WOJzdc%N87my7
z!SeHbcCZ4prGWb1!_}55=;VSSY<~{+HAN#AHAf)VY!Afc2`$y+$HOP%5_?(l_M&%8
z`@DcR?5amxmaC4apjFcM4UMGw`q`6vXMD77oWU(dcOKwJ!?UyK{?0q2q7sZ!{7!Uy
ze~FB+K7nmlxt+agL2sND!%~;I^Ibf84$D3JaX(Q$V;T$9`B!hH`)P)a?`>JF)3t)c
zTvm1}&tol%>^^(ge=tsTR}36S&tyx0o+y^9kvg&#d$>nmDy*vBUGH=e=V5AEYV}?Q
zd^lw5lI(yEyn`UGVe<osmHtc5-o2?Dp}l{qYO<#b!DELSj}d$q-f7QA+|5EmkjyT9
zzo@?xtp1UFM=|X0IkFUV$3ZV~&t?sIV^7%^fhowySw0KjB&}qcb~nO!c?=4|cQI*$
z1&<oJD-QVqFq`>=hg?IQ%CYk0-^Ko!qCtZej%Om<t+SWLBt6650edJsgZNJjt$Fn-
z_N`x|TF@!wy%AZrxEc~1%sW5&CLwkSC^YbH;|JD?XH$W_Ue4}OO?v4hd#6Ea;4E*1
zjx7udg8cCWL3F}SRl_`msein-VIIQ_7zT`IT^&<+5vMi%3HNeR5PX5uhYiDZM)yLm
z`X0C1ie9$)K(5jLhrs+dQ%Bs~GmHY?F`Bv9SM1O$7^#~;D&ikT8~Q$_m$L8Uw~5kL
zX^ZDwGYB^_ISNnS;q&vi-xM)n;`3`_Q`pWZy4%i})SeNeY;sV~YWdE2VkLardv;*p
zrrhGe^eIOCbGAV1SSYakq%sWJ1-hO_UO$2^&qqf`TT5?t*oU;S8Oc#$uF*gJDxqi_
z-WmWqnA5gD@;5waQ8lC%{PG9}BFsT|6bMN|eq_#*yrh+AX3m{KQX$Sv{<w94_~qu&
zg<#ca3%lgEpZ$@d)Th!Rg`y1$;_(xbQbG(0<w#7-a8A{R(QE+^lSvI3x5y08WP2&B
z_8xcw$JDx6m7)$cqx6UsbvZi<*AbW2#%>w&#Ku+Vm)mfC`Rv)A^lbZ+S`2@SL?XWq
z8Tn=55OnW<a0sgW-FKe?yd48>%s&de38XDIKRq}AB$9_Wy&6PrxM-jR7}(!$H;)0q
zOw(=(=Qp5(iA=A4<iF_P3l3O0oM)i9q}GTPLP^@eP_F-aaoW00@z*YPLie7B^+T?D
zyv7{V#p!$5AEVu+vwZu{>0g(vV(+qkof<YzOT3_Zz!NX^p2=}ORY6`&7%iQmzqL%F
z+mTz3?VrZ9eC*fs+7mWNrdiL~_U38VD$9>KmDw<MkOo-lDBg@88BT(}>Of$<(@4Vt
z<hOvg$hiP%h!$``=F%Ue{}DX|k_VRFu||Y(co0d8V>VF!{y0E5oC^eoJhT)MV7**_
z1Ue$X&&v!UcNf;M`+?Z#F~8Xi$5mzQEG}sk8s`5*%GmBde~DAQ5YW-{Lhll_z4iPU
z$x<aGR`8hLO_YVj|LOZ|?xMgH`pN{Yb|K~W&#6?wGb}=9rvA(R07{P@WCgdeL$;#*
zE~J7|(2c;Ke2X^?sf+CHZ+J-0QM>6xuUSmthC;9Soi<9+!m2K|Z)vzor;+Oph=5J6
z9WiV8{o~7Br7U9g{@#C!n$m-KzW{|9{ij>>Rd#u}lm||D4v<JR53%|tqfaph8UWhs
zD0u3&z9fF8B+MSCY_>8{6#tywhg8HFq0|~nXH@sxh%wyC^dl~JH21)-q*d+p(|{>)
zX-Tw|Aw)(q9}lur48iPPwz<F6I1eAPKN_EEBm?YCwQ;h)+i>e-54dyWY@&P^zRgy+
zbXouw0>-mX+3`t$I{9?E^kn((*{iP+;&|Z`gSEGYQZJRLVAs?!LjOGdt=I69<14L}
z)4c_lvq<dT(TBPsDyNz<@4$>e=RBkr%?#4RBBk%*=L)g|1S0@FY+YPj+#F1PgSHMf
z;qz3YcNf@p$ZU`hum${0p@rauK96mJ*Mz_iv!Fn?(6czfukXo7vEqFr!VmmvNFHjS
zb=>9pGFsp&6r8`W3AjCL3`krnJi?o`ge{D|c3nyp#5Hy_)OxJsC%CWeU>m-7tZxYI
zTuKdj(xsUU!5<L|P0sN$JYBE+W1obV(FnOakTk&#Kylpd{+C!cm?{PAcsk?FDba6s
zSjcz4QFV(zik)>wbc;d%C`d1<S!6-q8zgk#Q3p{3Ty#Nx+I{u+Cfa3;ERt1`r0X);
z5+xuKgHZtV06X#r^>0_2KT;e55qyY!p{sQ>;GH6~05vb$_Q!mBYPz(a++?N$lMHh1
zcG?6bQKz~5R7rphYAQqatWR+UQ0bCCKNT`FogH$I7qL<Bd!PBK^E)cu>JbU$sNZ)u
zy*j0^ER>ruu&&mCC3&lp$e`<6QBPzukpB!^{B|Nl0jXv8Nq#uNb&zJE8zP0-(Hh9u
z=Z!1LH@%dFHZ6JnIQ*OmXX?<H__C*{m&L#>_C(dz;-wG84x(jT$-}(O*ES3!0FKY7
zP8=LHOTYaU<^}<Fs2dM1d5R~VzTaObokHH<lnmUJUO{s>esZM-p}dK%qioh^6vFra
zwCzq15DAcjwE{oFzEpr@Pf`RN@5)sN&YkQQuN!5<mE#p=co8*F`P@Kxs+GXZsHq6e
zRQ^AO3Mi4ZR^ORVF;&=$?^HB1C;b^!H%ROubNupG7)3Z?G0PMv*(XfF6>$~#%-|g7
zH8bh4;IfLU_5*~%q@Q0Wp^v7@qs{~M)qqX{t$Go6x5L*c&D1beH?y~I(gW3)IG=_n
ztX{7d2bnM*-YS}ge+@}6*vO{h+ReX~eaK7ZvW;(h7^Oa{apQTdNAU6Q`|=+zU(db^
ztm6s?!_(xHBs?vFdIxj(D$9q!zkwY)aM1Pf83=|%z&v|8KG6#B#VY`=+r0?CbYN{g
z^q_j2joxev>>W8dB6xOWkZ_3oM_8VA<<l@8U)>A5<3$G;68LgW<iI*XUwu54`a+6o
zQcJfcvn+1%4PFQLn`+UvD3?g^(pU>F@R7)F6|Mc{7^~Y#a!z&9ZB=!qXydc!j%NJ0
zRu~kz_7}S)NWYbj_FQ*5mB;r_C~kj4CcoC>?>70PYDl|#(gicqd6%`J;Tvc-g5Un+
zHuXRL$2>&gDD5XWqmWT>K3z#hKPP6>7Dw<kL177<hCb+S<cJW6+Obh0Y2J@l$kzK9
z8qQ`_ar&Lc_&21)+34R>sGLS#j4vVKia=xpLu{Ba;0iOlS%14D?5sn3AGR&d5h~<K
z(^<d^aQSUoL$)vw{bLWD60(p|B;C3!fTZmC_ErdksT?}4#2n=gP>l1`jj8_K=<MJ!
zJeoj>_;OWtEBBk0OTEA9l=a@<fok}aQ%2pAnMo}z7`}wBBFW^I(KRHWpnGVbm#OaE
zhx_U*_B~pTYvy~hUmt>(T{J9paaYi}AL**3=ZHn3r3+oa?d-ol!>|?SK_9u~?`b00
z4&$jLUL}`TW#6c^n&9Zf;0+~-oATu3jyhV1TE+$8Pn$@3ls?P0m6KDvmQYCw;FgMs
ztR9RHu<@FS`h_7#<un<EEqVBfx+p($k!)V%n+5nEQ{xpkg{*H=1f2k%vx-93{~>}|
z<NlQI53>_?owA+0D0Q5_ZGrRW48wo#X3Y_QH%91-|CKBLd7`t1U0R=#$>m1#^B;=L
z+6Zjk|KSNQhY=IK45CCH;{;jb^c3a|y#CYoHhHm#S-2{P<h0wV<;Q+0#MsZsAsdqU
zsA<gk4J*g1;y>Dj-{>)~n5Lon%#r)?E!-x?83sAW!-hX3lQ@=d*?&?Q07=AQ>Nca5
z6N!dH30?Mx<j-oZ-)q`LHy;diS`S?D<m|}1BVuvQR9^j23*`r!GKLnd)tt#^F{abt
zQRxe73Le>x4(<4n3eTk^X{q9Z$re6_+Ecbo@)2}2NVP#h95cwclu&5Ozc?QwWYfJe
z$nvC3xkbgeyti&jEY~^B7!1c+Jc@Xpi<1@pSE$+j=(WgK^sw^pdS0f9;00H`PLTd0
zQFa4eCE8ad0REq4jHI%fWX;r<#TS9x^b?)3MNBOW_y*Na$(ia$Lf8H^#K#6=hQ*iq
zbgDuVu05ErA8U_ziHf#X{`PrOv%(ffcc`+S4q-7n+-QdA=yRdg=7hR8_7#fuhIDvf
zUS~){raiZHl-f4I0xBnLy<I3TDDF^++h-;)7cGPK5`V{YSSnM59c8TC9WpRAxN;&|
zfNZqtdD^*xUm^eM1jwhy(FZ$IU*TnsI`3G7%MPr2a&5P_0GtnJ)8xkB&{!b-#}6s9
z9-AQe5g37%Lc1=Bh?4F%jY{Ao`D(0DiTTRz@nAeEgjKnVDW|I$RXzcb@W3&y*_&+*
zRL5?`c1w}=cPLc|!O;Puz^Q$cN}RbYhg(1Hi!@26tGAK)u<U6VzFWC3Z7!N(tb%1E
zRwnAySj!8zxqFOd$7&Y%7o}1a(XOrJS#0{H_4|0|8y1ui{Y1wj0h3O%U2%R}ccJ9w
zlx7oD>R2PP(On=+Nt2tRtm$WSkyt%cU>gvQ(_M+dXHm1J^%hkwb?cZReCKF?s_2nu
zM_De~4h0_q2Q6s=klV<!Gvq{H@2kPA@EHZ#WL(E0fn<)X@O2UoDHOS0(Y>qz{`io;
ziX5vLjmW*Rl$5QT!98#Ue{j@0429}nublWLm;w`PQ&;|5dH-Ojp0=E@$tpU46JZFw
zQC!<fV>cPV%JmCYMd4bX^LeQ5*Vq5(L#nVuub8>}vz*JB*`EEAYtO^(^u;l-^sT69
zf}V9wHw`m(*Hw9P+SkeE#)aRcO3LNGeeKeh%LRTW%GIVP09+<aX@GihLzHl)%9pAi
zsTHO5Ux(T<Sl^4iZn#lxQ^oF?#KLpT;`_jL-Os6tmh(eIPbJYaC#8nML0U+G+z@8a
zfudp2ao@6`wzgt!X|cHukCXC81Q?1xjBQ=>f^H7JSrg9eoU_hI{d(%jr#^G_TOYh|
zMo~*o(?Xs<ja4B3iL!+{a2gO;3b8gma;6&A#Au^Fn6y|+?29t>v#k|lkYwy!od?Xl
z(x!A#OgyA)z&l5<P)0JPtdA2N$%SGzN|N`Dc0rd+7G1G#e)i`^Kf<XvMf!$KN<G4W
zR==IOqMKKbZ33OB@09c`4=0~lGQqx9^bDc<uav#I(I04#-P@>GBlZ3HJl@FtT>g(T
zss?Od*x2b?C4c<)7qknA`*`${keW58yb+!HeYKtRul%p>Ca(GR^w}`VB@3TWz9(+~
z`OwNqOpeYIQ4$BYe>9;xLb2hnq>Tl1vktzYNC|L5PQ6hHdB<o?I@S;9T0N*U@uwWQ
zz!<+n4x$llmng<PRgKN>jKqx<F!FD0Y|tX10b$6LByafgJH6jZPW;akEead1v^77y
z!T9M3U|QSjb$@)40z)kZtoIJ-RECV~2`11)SBK&n3X@@vwVmNmbAQ=*H0aM>1Nis1
zvKAc+`|_fVN>#6SY8CoI<@XqK$#g=$927e`3QgAS8{csm^efWp^AQ)1(|C8nVPdqO
zEx0O|`4ofn`a))VF?-qt!!QV^^48rE;A#j+c({J-GIVblgj@$5fi45F{55{v{k2bt
z3H}%ChMZ&}RDu6wCZZ0S4Q=vpuelUPRt=B;U{|f6d`<SDJEY9jQ!R(sXdd{zGBd}J
zaf13VjSPw#l+!Nb&vdqCjEVxrjz-$R?05D|l?y;?H<wH3`XXzwyo!p0c?k8_wfS<6
z`b@i3qk@!W{Lcc-{%=~%!9->ne79Z~N`1gyR_zfb^*jTCql<*DnJ=uCN!7JZ22d&s
zg(ahcN+tPHt`nc%N{7K|e<n%>8*4_RGGU|5J`(6`^H^iT_@s&i&ipnughSG_*+atX
z@PWM}<nr3iz!O71vJtp(^QDQU*dETQfr39}y-Vs#5Me~Rye%3LACK}W5P0MSd|u_(
zm5%4@f7(6%`@_Dpqv#37&`QS0%(IjrC2n^*(YzDOpFXKlB+Y)qC4jcKLwi=<aT<@q
z3oRzpHZTv3x?W;m#%Y|eDJHT2A3v3WpSuln3Pc^cpF%@LzUo2QBT*a{2<-oHLj66#
ziUUM>I#8h-t$vAoyU)Sk^cn>=B08cb&GdMg)x$swyd5)L-Ohw&Ldzk;O}{c*Nz?F4
zve)EPi=3nmw|m<Y<u%I|RE^&QX&KL31`jq5L8gz~8#xdpul216a1jRpUNA*@N)Gor
z6oZg`WIsBpEDEFc;OOM#0leqlOL&z%uyHpgf4(%nhJ5UYV9{^=L-rRWp{x$fhm4(S
z&rEMCNNw2M)Rg|KnP~cK$DaKX0OEC<LDTPKOB7baf9}lW!ZpxR)()P%Nb6pOb01W|
z?71uBsi@3nXUo{T9FM2M6~=0$YpN7Js3<?v?tMQ$&p1144XOz=A4ANwQJxbumpB-<
zh&l<gS|-o0`7H^zq<|8#4)K!pq$vR>SrI-VFF0@}o~)g8ZnD@qc8nG@&KMMP<j|@8
z=$Ks>s&9dQC~@&LQWWVl{Xet<M(}z*>5oDG1AL@*I=a!X)K2<ex5C$au5=;G&`w@-
zK#rQU98dL4&P;D+lum%1?~I=$U0?kIBFPRj$L2HXcFc&dfEHX*5a!7mj~TRu*Qfnq
z-A%=So{QU`GB>b$#sF@(s{po+!LK{T%uHC%W5nsj@+aYVW#g82^6u@=Z+u)8u6Aj&
ziX_mrb3C#l{3KpVk|omGTh4<IW2zGzkA#ws8$(=tR7I#)8Ah3x)Yta)F@OaC=UDiE
z(YPACYlW~0Jj%DfkCacucF_%X$<07vx1pxBU6-{bP(ee~<f2-=6$`pZQRJRy9`bET
z0TQ%Z+rP)hKWj62^G|hR2*gpfKhS%N^F8R#$>+ff#$PrModl(HtfCLQ=bGYs+RxTf
z0hb$VeDECW=M@&(f&L~7=oDk=mt?nj=ZJSy`rZZcr%prqbZ_N^P9@(Yvn}6yD&EAN
zV(<^}DLMVPZ43G0iCsR-75yJ0q~ym)wh1=pQ$ST49>I_dwu!}1&8S&MBdVVnRYde8
zzcl;(t#6^Cds_c|BC(}o%joKgi&sOHw*|hj4Y6t51lEg1eUE5Qt=E4LhS<|4xjujg
z5(>t!4t&rDJ=Y?8bY9Ni%rOuOpVXmzs?;eutXh!O=9`)J`EH5P>7Ea800V(7KS$5R
zLYfrE;Ht=_A_T(#3qJ_J!X(J4@gE?HGojd4e(<H~IY8@o6lT1?V`}QGmY|P6m4Xmm
z<HNKf^GxcQ()O_9;+tP+Ia6K9?b<ajul35GMUPe~q+8*5pD9agQ@uYzu0(6<vGDh+
zx8ARm<WdZ#D#$7KK2_enzF72F8E(cm?qIgQzP~CcSK5OF+497FzWcD(dF#e|k@Pyd
zkVyda>AY@?W?W}NX8`ak{P<R6<ek5mFV6~TCRHfaA#y8pMbF9|!3Tz4O-McriG{9r
zw16oGZpnSNv_@6Y|8+(MI`G#?LYz!oP{J2WfTW{qKq-WO3g=e-So`G*9+RwYrq`io
za)?dH$KU04`(F3?x{{Je)&+*{YPqj!mGCM#k4WhfTxNaV%@vj--WCZGmSN!&-+~XL
zJvfoDq(Pc#t)8V%4EC;DL*FQDxv}zf-|E9k8_By!ecXMD)`K&+&Y7gE&dT{2QxY29
zOaZ8$GLO<ovhdBnDYL&l7BxCO^i79NGQBlu9eV3qT#E+N!!=WN3EP*{3B|We8_v=b
zf^gkZF0GjoMjMoiay0Y(tS>^QtG*q6SOKAY1c!*?XhrO(K*a6xkdypKqIw1xc|XN=
z;ipUWen7}u0BEl~yc1t{>a$_g`VcBHW<-Z`6NG_?5D)OU_30{m#T#+@9JaqBkO<ae
z^kd6Qta_*UkrmDMMu4wX!Bn4B3%5;GqMJ+kv}0EtUu2ha;6<~8v~%epZLDVkb4c>`
zce*1jz$ceF!s&HZyk2tSE&vKU4Dq+xWmP(qsuu5mu`H<gaB9fQLL#I?kKLU9Pog(W
z31-hWz)6uw?hm;u)h`o^o+{6}`DVT^IQkp=tiO7N$Z?w}=i%EKqzc>dvsSysW;ZE=
zf!06uG;)|ss$T%>R|qb9{q9Pe?=5fL&l5$fGw1-s(8;#P9**Brz}OUUi>g_f-gKG7
zI*i%|9YBYG+*=<mi=GaVhkZx0MHI?uwya_kpD}EfEK!WYXc!xR`JoLRQYA)_Nh2jX
z)3>j0;<g@*+oFv*Q0BWFaE<oSbBO7yeI?OcHyf)e!vyisZTW%v)~FWI*qk&R<(MRL
z@>TCMi`Q$|xen7pC0-&lScrA*)^gq0*+0OFXuVMYV&f>Wqw0sw>ab|VU)?ofzdN1}
zy7BDC&lbP3Ni3AKPV(F7-Q(wIPI_@a1K#}nBdb_mpEFl+S4I`d>}rtA^(zF%>Ad!p
zTV3Hvj--`(uT=@$uAE}hVBYR>aN)r$&WdR;K9pFpdU{ONWQS|Obj&_lpMz|bjky>b
zz)p9DQT&zgCtPkww7HAX%8o@Vf0D~3`C#D!K^n~<pOZsQJ|)x3z})x-;DV|<ll#M}
zABCM_^e=F)mrSWT>yuF((+QCO{_-z>>tEvJ*9VgxGtwCb^O|l+^1j-P;yCY(mqkzT
z88ivH{>L@gNQ2{BqFJ>hrkn2V9LXfU8~@-|;C#xR{;zWjKVDg8uGJblv0%5~ecMF4
z!OhW5q~gPR`6xQ?g7AN;qqBEtfE>LHN1cp*S3w*7U`B}B^w!sT`BA>jcJ5~{YO$}R
zxO63@nia)Q3RF5@%vh?fn;G8Sahey5kPJ@KjazoKfE375+Jq7lXYfc3r3IP|5=#AJ
zW4Ri(r|R>Oeu1bfTaOAuaEym7Z}N|#jl(k^XdWth_;W7rPjnsr9wK>jkgCg9u*bJn
zy}u^`Llkm^n05{e?l=~vCP66SjnAQ{)s=F~PJw6Ek3y%c9rGaA5c0O~9A5dA75?S3
z8h@Dphq52<ZDf%%{f;_l@*r?u$n5(pD7yVEX|lFpoF?(acxet+b#F+h%o}F@h2)8k
z6*yWAKs;1^4)L5IXy?AqO?3aV^?~`N8Fzf<Ok9n<)827P#mr~^tmQl=JGJDeaTcy#
zbu_#OKam2$*AQAjE=AUJ4EYS;l|JiZ17zvshu%w|(Q_&!<AeKQd`Q)HlU5mw9l`X<
z9~Ze*1_kzkXd3z+F^UyG@nfG^CV_qFKZ-UCzr7f~EF(khOsQDt=Vj^<MY3V#6-~Zi
z&VoK-!gYX8Emsb1J{@JWm+7`xzK0?_fDJ-(6yuLLa<u%~1(M2}HrLAoO$Z|I3BNR&
zu+{lEAhsiES*uMd4a#R6VeuW3M685yG);Q{Rnbu<>$j`BSnTgBOdWbOp+es2F}P9*
z<%A#l)5n_wm+>1(t<c=;T0`F{2Tw$|Ez>F4=$O`Kus~^;kC$2Bi}0+Y`&x7Oe!r`2
zY~;gorzy4?iyTXU3L@70mvQHZ--#cRSIjGUO14MkJfmtz8{+@UH~_!E2hIAVXC!Hr
zY7PsQ%_P=Imf!b`vJmeWO(XkMtzrr~w}3>~*h5ced?ODW98I#T=gN~en2Dur=s&i*
z<_>`y8Boyf?}SIctOIMQF!)nO0ZP%QONkFwBhpxdb+*%|*X1#puK|JEL3FV>vf$0B
zvlk+nJKfZ|CzZ;c%Wk(;fwjPZL;}MPqTB2l#D7sy#!0epNeW|9_4;RK7sr`uD}dq#
z^n;f)v&J?191$V<2ERVUJ@gd4BIUT+$x(E+=&$H=2I~u#OQd<Gx_`36ciALzU)L8D
zL$K>K%}7^U)25ZlPUh4hFJ-v+5U=vP{$t|i#lOc-<><_9s(4HjKY(jF0+Km%*Nz>3
zcDoE~)QB_vQFI&e_v+j`(w`R#rHTThkV39{Y_w(O1Y0Swy~vJ`m&W;LhkF=*i4v0o
z6b9EDhAsz&XLzJgyXrkzRn{aR16x_9V3C4(y2zIF?_}Yz70v`YTC&}eT{=PyT|Szd
zzVvP7SlP-cEd5l{#`kKd%J{(6u%{?GKB!KhdQL}XfV3m^>uG=v@J0AdF+?)N1;y51
z{!}(?U<MAnTy|dm7!a_61bRL{GC)Vu!SDfJbj4X2h<9qH%uRRuiNjg4Lz<+g^{-&6
zAMuh^I0k#x$2s1Qz6a*ASOgw#|3myyOoi96G-7L8nEj5|wiWlSSh}fdzdW-gPZ9x(
zlq`c?sywJsY%x8?B-1jvhID?ILhP(qLZp45v$mrD`luGxUVYrU!PV5Zh=>|~@Jz9R
zdA8fQx#KOX(3~d`?h#f7_s2v*K!LO+TVgG+jS&4vpGWYkcMPwh?8Wn~nbc4-_Qc;5
zO?0%rV&3~Mv>e%y#x-S9k?4xO&H?KgxY757bIc<D{8M}GH|y)``zWjeZU1l;EG$&&
zv|!krk~X}mt09v68@F2jf^`KF9lw^H_OMa$b^(C>!0X_2W{UXowkUiYiGN)F3GV{C
zsv#5Kk;0ZM+WekrKVEmq_`)YgMFoSTV;`(c6)SGxCrmLicHxM=6Dw7e3aE)$d0RaD
z*AqQ@U3+nLmmy;DQ2aWj4#gy)vHsbb*{KcN3PGBtG>-3*Q`v^h(;>Q<Z%za%8JjVd
zob0AV@2S?hk0pv1(g`R!sUV=WdWiL>U82S>P@(_&9dMtnMJ?9P3Q!-N`RG&dgpNSB
z)&`(1G(YNfpn)RKBA7FjTt3C(@c#hsKoGxJ$-)4t^tudGbwL+@ATv^OY5$uWBREU3
z^5pUiKU#P&_(V{><f#k)qi_6EALLkP&z=oD=H$tf$r-Q<ADKHaRt#2B0HA29;q`wB
z<1?E(6Akj-f8Gvw?9jd6kmAdOW~k)z%#no<>o}N@IOl1YJ7(e_3|>#zeu7d_k2JxY
zz#w7T3aQIC7u5_B=?1SQZ0}x!oyiXD?(GC(J&ij8ND&ilu?AtPfPe*YLdAS?cprpG
z6V=eqgA!jY4pE8E=!k9-I8oIV93v<hOX5q4!6!{CF-g}<uyY8&66I6|<~d{93ARog
zJ`$ljmJ+S!Ij*k&+}Qa)J-P$i-|*sWXTN>G<~t@ZIl2wIj8jt)G70n<0e=2#0ZHSL
zr%^>8C|oF5m>O54OpnZF38>kqupBEVco?XBL$y1`e4|Y7Xh48KP;m(A!i1uy@@IrA
z74_KGJ@4-No^O=pWq`07K%4<>eOC`wP5{uSFR>2*GkZSw|HO$CeCbcPuldN4BWnF0
z5@SusL{_So_wdaBDCV6IMCh4e(pgLDCkUDNhM0gYgL2h+Krm9V1Z70boP^Z7n)eNE
zGF;oc3fmL@+wKK{?r+i&Y~Jq1AndeYB!Z8cIg9p^h@r9`sZka|z%x;cJ2_t|(`(^^
zTj^a%7$S7&^ake?{%agXu$3~*5^~+pR9Q0%?S*ccmS%8zzDoaO;!P#de+}pV?n9Sg
z`|EEU^AE*u033er9_()RxWza%?Sz0NSSqtk>J||eox&jRn)hp^#Lx~!t4NQb>dP#6
zVHuwKkc#-ZJT%3jlhh1t1&ipK){1;7ZO?^Boz>18B`Bm{n<V3`!!w4B@y5F^UAh!_
z3-cKMi>J-96&h9<R!#uu5YMp8c<pZh9w7Gq|E{-v@58+P&k%vyz2}ezT(pcnP<>OX
zLDrHqbyEAjjc6PZn1fIe?nF@l3REOe)`MumiusudN1|a?u(`oagDZQN;L7ghC<I&!
znp$8$sM%!?XoyNY@-<T6M6fVxSP6$B4(2_}F=s`n6z`6VD`tQykCcw=T0LZ;#A=7J
zJS~lCAq0eZXQ99f#(Dib6XSl45J1xGb-Sbv$&!XDtzz(6NCwYf6g7XX=l|XtFTn2G
zUkL~O+-Mjb0oZ!a9_);+b%Nz~SZ)f!WvIs#2<-gN^o0~{fjkZf2A7g>6XWYJlz((5
zJKXT&D}6!=5t3kL#$;LwjHrpC6`1?7$#Ut0*94>nD;Jf=U;H(Uup@Iv;ZmMMC7vLR
zUjN76^wd3kCP23gz$XLpT{G-_4t)BoZ_X;gDlP$pj^=%sd^rOHOJjUoBVYW(-v3{9
z-&;=!;%Kj7OB^V_Q$D3En;!dbs8j{Bek9Bvl_z@42fS!36EoEwOq`@yXhNuokKR&+
zzAI=&QE&BOyIZH--n$0Zx}|`<uz$NF(zKVyTd5P7F7{#8oZAyB!GNsgfl_lcR}vf(
z;!a9~S42@Vz<dR-Fzv+j6GK47l%XK_eR`F@5_DoLkU19)`_W7lF_i&{K^aLWqIhN;
zQf>-Wj3xC8*g2CJLK1*poB2=co#y}cv-|e+-!F`h6KuR=Cs+Y?!y!Qv;NuF#!ldUv
zCC(kc=p_Q8!~|;~nz=t12aTBo6r;uzPg|E1vVt(uBmnw#9#c2$aYoh0pdhI+&_%^M
z>SoE@5jxzTJE6WM!Aa+run3GtgVdwT>w{nMC)uKmd_fRf03LksLEd@B_XF_vk*?J$
z!OCp|jD#d%+GCGB7JgrT`DHkI^k}dE99=*9mOgTsHa>wp$63sf7ztY@Oq#iCM-MWD
zBp(Aynm{Y*QxpGCl1U@)pok6T@(GlrtJ7M7+2G~^6XoEg4V{S=SUa1laZ!ZCL()b`
z5#CHY{GIEvHB!hfC|nRSrtw1<plE)eHFc>K*?Q@@phZ~0OZ{D!pF*-s0{s!cIv!QZ
z6vB^2lk@dTRM=J)hCo{-UW@t9C;M%`>m{IW-T!95#v=gJ%iR*wzv+0)xThuS<Bcr9
zINDHJePX-Nw<X4P{h-H;Vmi4rDB+gm1B(7Z>8Fv4Mmw&Eru?2CXPim6s)b;}EVQb+
zQbIhmv^cSd9?xoiQW^)60G@=EoDcurxAnj`bjszgu$7s${!RdZ=bn2GcnkWYk3LGz
zKKm>$5v<(xz$<$aU|f3%2Q%>5|ImA5a_^6hY~Iy70BQtsP$Io1tV#lwcR#RR!tS3;
zd5Hxi+?N?bE=@LtYR2#Qf-TR8z0lwm0TZs?G^-hzJ=M%S8kI?{e;WnKu9GLBLT80e
zgHHLPVYod%a2l8|u+Jdn7fFB0!yp)z>9g4xz}V3Xf(`i_KqBa~iKY^$j0vtX0W_1-
zac<E3r(?Ulv)@f({sLQX>H6rg-r<y;P1)B(tT283Ah*>G7l7_6&7ouZq8~7Jb&@W?
zi~*@Wish0K@Eomlic^*nNMn%uMM3;r7I@1RRe!C3>)Pt@pv(=6a2flx(fAGQ2*?U%
z7dql)0Cqw@efo5`8(<ZgKf_820O}m(K=IflbNu*mV28lNhY#cWXya7)I-m(yZ7XG?
z0}92X7+RXabqkGWMqFJHr}^ax=xTwuP?il|ThP~>%E*oQf)t<%e~n|7m}Mq}n%L?4
z(ZjFNQr(~{{K^8PDbx{XTYJls3BYM~<~x>U7K|3$L9gVPbc|=LTV4YlB%)NUj%01-
zgkc<TL+1bF#MK+_?6)7-dgm0TM>|Wv-cIrdm_qh5N^PykdHbS6N}H~KbMG|)&M|y6
znmpOu3c*qGg|aPV#|Sn475G^LEQFJkUdcEGbn!OV4t>Mr%a;S9+;h)8Ar?EI^A>b|
z<}2xmR*xvJtObBgE?wutLk~R^-lO)9?|<}<-P8RXt&U_xBPg^hMLbZYF$58zjwrbx
zsB2jHw>oKzL|y|bCQ+aD%*r%)t$+Yz&DoejBlkdWN@kF-#CQObm#B`XHCZW4@F&G9
z$K4R28ZH?E2$eQb{rjxAg&+c;trI@J)%tZisbC&Oa^FvbsDI<;KRf&Fyz7gviL>8+
z;PCq=uy^QM@I47z1KB44da(<&1)v5(zX}1tjYHDVJ(JZ*&@f$^Ly!EDjIpfLXqu^#
zn*%AGTk4@&`2#f8L$Vf764EQo{OBL~if`egKf`VS_BmtUvrm8e)3C~y1Fe(*&|6tB
z9skRFG5LZ9>bGZv8=Kzq;2${^W81_U$(**8k=~aF`y+_@qj$*B44wdnrA369!i8@Z
zBCMk1dAmS3xHUMvi4$EP-lGl;k0oiUMI~`eacpTe>;U6V4ljq`dSyy22sp-LaMsT;
zpNCiVb<~>Huj6Vs;QZfx^K0+yw<z%Tz(eofgWVBZ0-_MWOHiFfU>Mh&z(+O+yuc`e
zXCaX4Xqk@HiXts+)cVz1k7yVdX;2^>o(H7n)gX=!;r9%@|G*>P%DiR7$IkORpFP7$
z=mQTtK;4ow#`<3G%(<zsauxuVw0O<s?z`^}qs$jH@O{m^J@CkpBly7KFFjRFSQY%N
z^l75hq~%&(v*J`F<oVh<m6LQvlE(aR9rTQBnhpT!K{IwJab;+{Jn>Xvg?3uO2ZBX#
z3o6<#RJ6icAS^Bv#JNL#AqC^1nzrYk<-MxeDWO_N-iP3v>M=0?+0&ob`}tCzWrDCh
z@Vz^*JMDJ{1b+b87Fa{As>U0C(fvu-kfyAm*4J8AaMPfO=Y%n$8FMCu4+#TaaMnpG
zf}n~s*EdGNeOnKl>Ud0u#5{+00(8pg=rf=B%$Ngt8K5}<zQz!@|Ahm9*aY+9i!bu}
zf3g5<j<;SPBF-6z8h&CCK@mvAGEF2<G^-GFgk=GSP42h$4OR=!ZcCU}v@YHeG<6JR
zP)W5Z(^FEe)USp@DT};sx({KHVmcc7NR_=MQMs8#+jHiMe><XUiMskB*Jq9qkg5|E
zOq)vUz?>=$&Hv!(|HL1{^uBAbjIj14-QPQQ!ubH<xL|fdC3StUfMV*VU-fwCKY(og
zUqDasuQVgdzUh`)(x~E_!}UR-(ucXZLk7>?rC>UL;ERC*Z^2Ia3_RzB7hVXFwzjrH
zBEA%8)fBRo6aW}bpFYipF|Y-I_@<^Vh))E>jkV2FnlxzS$-6y!$PPdmeBAI%kk$&Z
z$g`wx`nPJC!?KJ5j1*gzNj0lQgPRO_8=vd|AS&01dn>15RWk*{C~sz^w!+K=@2UcV
z%fG;7D2)-Mxi;8Ga<R~rqa*9>d?~IXx<mMiCXdQ5+3&TPV|m%Z=KrK)`rGgO!&~O;
zw<v7Bv1`R++psM=1LAVPRPB4$9f;rrQ7mZ+fEq9I{h8yXZK`TKQ03xK!r*FO_7Kfg
zWsy5418R7kAv@33#vLjv7F6K1(D&Ms5C?6HHy&aWKmQ5O;Tvd~7qL$oEH2(G4Xmsa
zU~~Ws&KTWmum~JFbf{a79UTo32+VZvk<cF+0fk21Bbq{F@i+yA_+SZWiAolw<=-@+
z60KE8i#fKwH7jq)%5};$@})Ppxe&)7;Yfd=2`ZdPdyBymD9aSdjHaX^f+SQBki|>&
zOkfFLexOvZ{1&lLO$3URm0Z05HTIf}6CNu6zrG#lcCsKO+)iyIGV{SM4dfFM0n~iL
zvQ3t;JGO9u`9FE|RoMB4Tkq_*C>-LQ0r!bhV1E!5knUd^pd$o&XF%!!^wfk3%;A5h
zl=Gk(K7oBk6Gm3+Mon|6A~aCd?IpLWPdP2DLv~i^*(Z1k?m5w55#qH#+Wj*gub;Yd
z<qBT9bP42M0RF_hh+!2S0ar=@kbz=m&klh6%m)B<g8_Hlbys*_$MrW`gw&*0YcL*3
zAjQJ?pJLhlH?r?`a!5EP;v{>s<=^ZTPDR_Yg&{M4)r6E*f6HZ*`89a$A@d}~&;s$u
zI|3x8X7VzUCH_FdJ&A`}xPW|Gpl9j`8n-Mv^myD?Ssk4Uq%E$QC@aB5TQR7>L&a!F
zmqUpmW!$ASBT~6ac#s_w)pIdaJ1xVAy^|)){^$ShsdKRN=5w%WaO8bc*xS0+Spasz
zX_S0>Ab}NNDy&|8Q6eOjxHuf&ez$s5?CE4q05JHc@qsM$vY4BNw#>RqT!v~GfIiYn
z=PORAf;&XI{dSHJP{RE&j*qgE$#>0!l<`=4-U*P(Y(4<59q@qg4gh)Q1Asay48$z+
z?k{`tSeo8cB9gr8iBLllt5Kj~$`V2@A63hYtP0UdO)#mIC5SmSsxtbvfJ>_;0@WsJ
zgPQ`F85i#aFs!Mfwj>5-v|Gtav#I>OAj;?kfif=(WP}R)Wq@&$ZDYQ$2P^4SM1Z0!
zJ&WWw6LCxt%H1q_9^3h$^UHZbmR?}-`48*RY5mLR?6)XjSN|g)n84oVb`Sz4VS8W{
zLRFjX_~)?*zo^H|_O;;bFn;ASB#nf7+?6t=yeuTU`ju4~D7lVv5=H~`(OPcHIheoc
zc7!sH{<*jQh1d6a^gM@mq48<7ax6VxPzNgnD<c5-?G=0sBLjP5Uc7iQJiq^uZ$6cA
zr#6@=pv)gEp-}}JWT@s>DYFdo6VNhr`>m|q5DW_<4PI-I6YvTT=736m9DA{j%Eic-
z%(GI3JEVk8@_KBUSr%%&?pupjO9#d*pNX_jr^}&%vV?&^(Qa)$5F)EY63C27r+`$B
z&WT)%Mdp8Y_S=S4hW_H9-LMqEI|KG;5*!1kkR1c5(w|vTu^O_B1DJR{SZ8|zpbFir
z47L|=g*nSNgE4w%01nezP=a(#n3gyMURcl>cs%j**7n0an-B03b2xII51`{`=0$9k
zX62NZ0ceG`0<P3u0KCnDw*W>6oy^{tdwY9)G~;M%{m3y=geX1~NTkiPR>Cb2O}^_q
zONls>vRe9F5vV-pI-((fIbRE&=+@#ixEauE{@CLeWhNHhJ1xv?TLW;!xP*);4u`RZ
zd&77kgmbXoP`6;&auGZ|v~vXjs<`dTQTd_GqwI$YUi1GryPPb1SV2T*>F0kqRqtzF
zS$b!`MPYm#VB?*;u=9WP<F+y02<`05)+!biW11{496rLtyv<T1+FKu0YLE+pw8q`%
zKcwW8f|>p|P9|k}8p3mcA&+}643NQCPlwU9vFsy>%Xz*2*dx5)|C_OVJaOU#bjt$l
z2*{4<uxhX}0sybK@Bn?{6Q3CIWej{61N#A-JD1n(?%ufjp)#R*t`UQT=);V3h)~*$
zYl4BT2qM%G8Y{v`s8W_)YeaW;c@183=(>Iy*$$?|Q{tEvp3Y>_l?0~5znY*~;A+ZC
z;Rn|98x~YE(Y5bcSDd}<Rj+SJY{4<~S&)SvUBaLcQ0-Hw&i}pFUkbPR-EP1;188Rl
zc0bb}P8ANQ3jasPR08Ll0!nBl!`*$HsuHAdh6G=5n3k48LN3ibV_63qHw%XbXMOyD
zYqkbu97=tYqxJD}o251c>uc*rnep>hz{q>><jIqKDG)sR=%b(%)Rll$vH&1k0CYen
z2mqhUeE$6TK!ewD{b>5Aki#RZ{$AD4hlq(sAQz8j5^#dF%xdak2^|%LZ<yrH)ASm=
zwh#oi$ZSQ6(!KyVG&oq#t(L#VIZDY>Y^)K(dVVmBNdsveqnI-%W}kc*qvAZv6%D7I
zna-shnE!ikycmx5y<LGX4!U~z8ti<&Z~QuRmVj_O^BC%<hT8rdQi!S+6+M(*2(5sn
zZ8`si6`4W0EFWDwRi|nh1rNyahUzr#c?|D53(+5V;DHd%TLJlxYWskf7*@##ApG+E
z%M85!BHJ(`U}yGveSVXK3RyLve?_RJb+uS9o~?6=Glb(<3mQ;oXm+{=HxJSk&J6Kk
zDhINJFtEQK7NB{<f`PBhVDvI<p>hZE_j!<I3^_VloKN&VcZ&9LSiI~7EAQq%Z|!5P
z|8{2n0$cASm~=}Vd`cki446=VO*KtxyoubK8y}Cu6(tJNr+Ea)CmyKE5UoTP5f?9J
z=A4-qPr=rB^Yy$HkW=$kKvqIcmMgXmV3n2t!Y_MUc0vFjz`z#+5NrQ;-E|jCCKDKs
z)*mXg{TT=g<`dcPS_oEK?YtyBE6*VwZtGLm;Hrp7PU@ajc7xX(f@yRF$^2=g(ezI(
zAtYH2YYN%^YEe(G3AV=-tk=gp58+n9(7{$g+D7w*?(7>dUrVbppi17g`OnUNJMVh=
zreEWGtAOteIQ-rTT>0nQfY-VnnIMeD-QQ?zd8i*})yHWpq+i|ALHMM@n1mrMuBDq$
z$Iv})VP#xlMjQM@oQ2B+e8SX8m4i6qt$=v->ea9dfR)fQXU>eEJ~NgcR>}#GfxRss
zd+agZ1;8C04gu;0BK$Ivgf&@&Y*~-ba<W<h3X$d4EPpC9QW>6q-I8gQ=oSP{<!$mA
z%2(x`!kZ9qiy?PDj>hSXdcmg}fu+dvP>pfQs)9!pB?asfs*3|t+Jxh}N)@7BKomfG
zlc&@zn?JRNo|-WWPsw(Ro`ZF;!mlUrDZR(>83f<Ye>k?=GylW4hqK?Jz&isDy=M=u
zvJf!oeNxsrJO}|;3S%0NDEDLk(Y-UEQkuYqOMaDNQhpKjhQSE~QzT2vwGi!Sd_gmD
zM!9-UR^kI#++XXIOn&5hXD(j6NW2Z5;nb;9#LEGlg1R!ypJAnz0Qklw_O#?}7Y{!8
zAP3{oqj}lCL+*$2Ad+#qGuC*u%|!kS?@H{fd6?S#sQ4BTj8u+_5OKIat6kSko--1Y
zDig9aj;cI>)e$vsL-1bGya&4<e-%fFl}=M=$MN(#a5xt6u=lM(Lr`Vcfe}bC*D2+p
z!vKo&B&6&thgPUET4oPYPXH@mC<2`7jk&IQrv!jGcR-cz&rhfd?y3w;&GX?d5a$2%
z4VU|q{q8X4FCaSuwkO-L!^;8GUm<KRuX6On|B5!8g;fGUVTq{y0rD7=5H84DNpqQ2
zh$S`tVDQAmj|fK~b0Zd;e#zE7H$VxOWCd>ds}NEVaP*uUjYdaV`{(Do@4kD)OVMnN
z<}HA%oUYnwfGcAG5C=f^w!CoRLa+cFKYkqeLZB{~O)?`M9OIb^_9QWg8dNF$QepD)
z-$jGBTVh=P0NE_YNF2Rl2{M!DX%;H!Qck9VBsK?HkErq|6F-y#)@sq<<^w`+`D22r
z59sL`c^*G4C}>9KPjT!=Kc_W}5v>17#wShm6n?5aGEU}#lB1KVV|sUZt}AHpT?x2-
zprX_Ue{-E{ZSif@{bRnN7Eq-8sfnkZ{U&d`2zNScJkn{;mv>?JGjRv}n6=ooNRvj2
z$zirMDSXIA8qZuKt&;wSe3GLPGUKv<Wzd>tVIf#XN*m5GsGsMc>JsYS)Aj;qwct!z
zP|0-I&ciajhi=Y?C4jyez0K?S=bs;SR_Q4#r>vw(_$t84SOEB@C3$A1&-*VK*c<cm
z<;%PmlefDZO+;3N(D+|AS$T;N)D|v*OsE7w1hEqmOB{_a=uV$X>VGC^iEu~4jHD49
zvPdgo)k&iThZ)}H4PIj)%K%Y-fy|I%fXBIHEr{{lPAsdH%oyS<Kth?AtcIoY($)ga
zsAZd;;H#b=DJ#Q_qd=<X6O{xt&BZh?!OSmXn$~@n0_>bj`9@+cg3I2i^Nsl{;GF@Z
zhj(CS%6A6rg@b~o6a`n%0-cNvWO}FYD3MPR1w?^UPy*=rKw1C<fH+>pN}&G4O5tg4
z7NT?g$OtGFCk2m^KohH1Q{<9JlIrI2rLfEAXl@gY&YsQNNm)7NZGfwE8sO3k02Gg0
z<WA+z?aVG=MB*t-{EmD_^ZOqBz|n*(y8B591Oh_5D3xItsWyKZh}MIEEJ*3yXdje?
zhdznbbTUK8O9eFzz9e!~6;d?fHn_<k#6(_=Z4r8{#A)ps=X4%PKy{jMAEiPh0l>Q*
z3UeHWOzZ`>Gy^bolJeAh`ltj7VcbMnddYR5vh9@Xq2niO1t}pKHNm$`fV3nMXVUc0
z9&Eqw3ov=)5;U-I=<B-vIm*ic*I+W)i_2BhWWCZL+B&NB7u3H<)$uNr5gBi^G!ax<
zDKH6{Xvf_!aiv&e`bQN<7XSUbkDC~MYFn;%qVKxOJ(j_@zv;UlVx_ZFK8X*a6J>N2
z4+2^l3&7J)KOKgG4+7$AfEg$rxyaW5zwYkWAH&MRI3g4XE|Q(soV7;Or&YvJ<(r5P
zm57M$L2LdJuIt%bpRlMy%><hyU{IAQo*UdOpo9dabtbKYI*s3eVZxayQX+`)kH9eO
z!qG66s`IR)ge8tbbPb{RCVO9w6G{olVd4NTR@6mpS@6pPdhVI;j;N>kcW45;yxxCo
z7aG*z$a^O+*~*rHslH{>Fgq;>3Vz2RB&SSasi-)>^y}!Yw~i^n(?-ncldu849#97(
zk5Ey5-}Y!lkhWqjFDqEtg0Qw8R?fu%J;aVn(IHv%0~o5&Qo+&-0KA?e4D`t-pA4PK
z*8uxykKXr{4|%Pbp=uS7Z(ya+{-=V>*vF1)ny#BcT$^Vm*j~FTCy63*Y7!<U;M=0=
zPehIX-zw13U(|lW>1%2{sL+aCa`~nh9j5pdEeo<wiS`4?LM7D;0nw&Pd?JOwhQO+H
ze*M*eE;Gp?b>Uin6Wq=3pq55xFuiXdpXI;v#23PLzXrp=I|KOQpq){FanN46I0zDX
z(_l1qCnV%Z!}JxvF>Os6HGX)Z6EpoQPSwXUjnv;NNIn4!(@Cw}$($oaZ`GXO{Eu$j
zckJ-t!|4WEwnleKDV{oY3ZHuFsXn(p&C&}2D`5eUf#|e<;=7mE*4ENcvgaia#$=M?
z0$|zo1BA(ZuW4!W%^-;`Yp~KWDV4}NkOw)lSYiV4y_hJC@!wKNj8O@HJ&`bxY6(;P
zWcCDSZ2~2X8e$2F=xV}S#YMppU0ziXsg#qlK&$m>MlnO{vchxDD}7UWXp2crP%h@I
zvKs6?a<S9=UjkZhlVK5HA>i<P_h4tz9~3m9xE!GHI6%b9{H%O55UW<z8hHCq`k7&x
z&$^bvSF`M+(mM^mw3^zN!ouYv%|Rp1|L(23k6pQPCA^<Me?EQt^rt@^QnLjh=DqYt
zxhlH=*a{F#F8m_q7C8c#pYJ|&cis=E52Vn)U<qYumbDDn5IeGF%6$zj4Qnsed~~>}
zLq__U!bgLf20|2j^b}zghW$u%m{FYdi`B+~D%zL4Q&Ay>^NQf&KwMvr`j#vUh1RF$
z&JYg!xZpHJN`=5YDCS#y-aU1$t@j@Q*1iN_JzN|#g3+`;7l1YGu|Fd~k2Cm1t^t^b
z54AW$N|%lgv{>Fg8)uy|t<dvh5s3YAWaF;8!O0e#RhoALu%8<Li4|b!cLc184?tK~
z;Zp!RDJukU=N>zDjLk6Nd2KX4R-~#BTJzZn12rOJ=`!6A#)~u8X~75^>Ahyk8q&ZT
z+(b~?c+~_s2XKCPRRboNJ^mg?W(guXUl-q-ca0W5l&{Nkc$uoy;$SG6svB$jyYGIf
zG5_m;tv7bW|FP|Gg)pBHAXf(`2TbqGJYa<}?=>N{1_MgZOzSMcVmQ1A?3Zrpwsn8&
zqb=SF7{17Hz#a4F!+>}jAjw$(%g+26R>lI*{UP52@YrLIVLXeu|Iji2t5>h~FGE^U
z1e~F8&@4=%1u{s5K%sH<QuGu+nR)AlWXv=sRD$89i3Yb0!jSc2CeEfk!z4a11ZcXz
z0BJ^D6F{M4!ZQV1hkJ8zWS~qV2&g=Zy418XV@Oc`14CAb!TDM_58-fh_G|0?*A0ii
z8sO5oYp{0~VKf;9!|B?X&kLk{fRz>mHF&(O;qd~(p<YHKFAxsRjd`$uRcdY)sLsgn
zINw3yIWan002ssqKv5%T7JwT7AN}Y@M`8hB=!h$^<Ar!&^1%#ut>67+B@!7*6nVE7
zCx<kdn!!uO)%jy315dyU3Qn_yj!HhZ?gaar6%B4Qi1S=_^qi*9X>sw&9Ezlb#aZ8H
zYSJcd#1GVQXdq1`Y%n^xa68A06#*b?l`i{oE|v2v^rv3RIJOJB&DrmU!jUIBYsyw9
z1Wfyr0@)v6TEm^1#veav<g77E6W@@*x)ZYrThyvh0L^*#VfI;{L!du1j&7_a3qSxq
z4wx+fVx11v>;aY?R_P$1@XN;l^I8i(#~lIzTPM+96cxs#e~rmB4oWm-MlsYNyLH?f
zxEeAo^a`>nhcKTPWM+}b9OgaZHJ|4^>6DNWw`=8`9D!|cQ=wlq>h}hxt$=a-^SF3|
z=mbl&<2a=O!2xr=CE-HzmN7z|kSFgoTtW4qn6YKZ9z!kOB|KL<3&e>FQywMgKC{k<
zsUrhS9(@(IpLnr3``uu`PT{}n15?-@UF&ZTtQ{8&SandQf>sLhSPCU!X{=Rv4bOz$
z3jRpE6vfOKPh%mGWUAn-{RNYQ4%c<i={S5H$Kgs~z7K$Z<8i>M15}Yq4l5%7$XUxe
zjN&0dayGLjWJH2UUW&6?jyNY}$+h3gB?vBrU7rkPkr5MN?#UJ<WY1`=>H?7VS7)-U
zAl89&{_;$@oOE(pgIfspAdwK#nf}K(?r=!4JtEMI*$@?~pGRQ{6QsD3XH9sZg5G57
zgydTSDItLpj`gjxX>|QjiQ0BPfN76q1C;7r2qS845!rs)d*iFHck1@P#`ji1zcXMG
zb_VS1^*aL?CCoViV8_Rw)!^~ZiU7h=?KiU}HO470RMTJ3IOy@rDr#u*wVc?*8`!Rz
z^TFEu49m`}eDYm1d>Wt}OvhGgB3S`gepnd+fNxvkQvmrUz!N7<pgw}J1Kb#G9Mwcb
zJdXt|iVo~GR)i3B7+8ep-&MYeh&xGG=dF>L6k$gzarznHC!dsEC*}=qCXkp3<5;lZ
z$V5{v1<D}usr{8_KbENft2m6#bs|+c)$-Pi94d85L{DjUBDiWo430QzTrEzQLv@%8
zQFU$>{nKgtdv8AvlQ*?veP3$`I|JUa+X(^JV4H;ijl*gv&G-`(%+U+P5@vA@j9@Bw
z<(Y4ICUqwPKo#F450hRUoTY_=8qL@9P3M;qfyM20T)&%_oiAOwWE#pmim$5w(1$)W
zS_Shz!zwKS1Pg#{0lfF#d*R~6i{Uxk1Aur`$1nGT)-~pGKWo-Nd>qkU^dyjc4Prz6
ze10uhsKA^zct%eql4$-7mITl;EME5?MaG%gW5$P!la~2;!s7{`k5>yb)p9tg!>c$5
zmMJG3^tf3QnxQU*m>o8N#f}?Y2Q65n^25n~FNGWZ8r&S%dQ&F|ym_~`1jI7}`U`nV
z4I7|7_FkAX@yH1f@|_60#;pZlQL={0)H}Fpb40>L+^ylTrn|;!Mp>bhyvL)_F}48k
z5%g?<W?sa=R_ax|4`3w(067Mj;mnyc;h8(Wvk2r!G%U^apKFX23c^&?-brcy6%Cy#
z1bvV&f@S0-k*=Ag@~=kH1ysNWuQ6n1Nmetb3eIW8oopWId8WBOLygG~X-7V+CY<<u
zH9^(H4^<;9E>`M@bsYBsw#?m>*RfsLd-qH2Sl?R!hu+#Tg$MV-NrAhQZXCM53Evk$
zbt_iYasy5~R}X^>T%|Mrk*BSdG){Oew0T7<383-~%zF*5y+SG50OL1&;e{8{x2K+Z
zD)_94lln5lN(cZv*nAt{>C>mf5`YH22LKFRveMk#yrD!SjXpvEeeN}=2^iIKV20)d
zP+b=;^H{Wn4PG<gaiVN47qU?Uqw$9LFh6@3*0$?-SjWF1B3BiNB^+Tk5yrCILg;pf
zuvCG-l(+Z#%l*lI&C~yu!Qrp#ma&fVeF3{^NlW|z1ZvOT>XX0Eff*69aABj)T4S#%
zW#qo=XDu)+LRG~*GF}J-=gys@2OoSegz+*pTL40x+c^L?2I789Hm~4?3m4$U7heqF
zYyn_13=hQmc=O8^A*w~*Kr_RvDRgE=Gl}|qjSi|vswL8)JeFR}yiLKI>%$lt4`|Ew
z0hFU?EkBq~EOngHBwGi+HLE5dP^w#*_t3L1cM!*C%?%YkdE={Xy?+S+H`pm~3RnK;
zYk=E4K<mM|btHo85Teb%F&nM2%r%RMF$q2>k(-5k@!LFw&cBav%s0=Dg!k|gz+^H(
zR!-Rm02YvWmIYSE2Y}5geBI)6&pnr}U+f_40th|;KEVNpKx_W`cj1J!@)xKJnXC}B
z&0)AmEYdc(c@U5K=pT&fo_CUrZQLZpm0-q^VCH}hX)@;H3~jsityVWuQty!FXY!r(
zUZ`&_`dV}LTQ(TWodKh3u(P)ttO0wxJ#actk-@nHslcJ@P@Luh9`Kv3gn_kA0ATH&
znLjI`-qg>$YE`xYEWH3A+CQJJ#10k=Yymia{CM~*@ur@*%#!+QVPrqo%v!^htmI|Y
zseM7sxQkZ2PVpMtN{EafvBCovGk+Ix5w72)g9GoiylIY*Tl++?Sb07Ts$U5^Gzshd
z&Dn4HV0;{5<DI=fK)(zy0g6Wk51~v|o7aM8O$ePs#s)7QnnlYOA&rZYPqb;&2?=Ry
zI`brPP?sfuRe)8v4mdbebW&C5+><9yN;q-n^Dld35>4%5k$LnSTKy)3RpBJ*#pD$U
zy+kwFQEBPb&qGJ}lKUp7U+Hkl^&wsGN*S4MU(i?Z%CM*X|GY&|3xK5XO_m7?B07h%
zNjX)VBKxcJ(fC_5f{eC5lvj4Lp;VXDgoL~5+mM#0`hpJcg){w|v)}T<<|CcPduk`%
z9=IF+f<FMwl4@t+x~I3b^ghIfVZtFQaogY-4C8%>L$kkAW&U7hn%xpW2t0T0TzKbO
zYFSBT3&2Vo0|+a!1i%6S@3G`(?&Jp_e30TUfDpE|wS_ohN@lh8^sgpP=@6d&mjsy-
zQe~zePI!wNUIL_;R3i_Sgx4TGS%i(67&`ND($M52H1W35v{xQ*2C;2$Qy^HUMra)7
z<QUcdc0et(`PKjUZt%!cFAG?)G#J{7j*&+kPdG&m4~qh%?wG5Q+B(hc!2C-RPyQ6X
zno7!VoKMpe*I@4(+nN5W0Egbv4cWt;5U_VGSOJ1Hpj!?|IH8r4w3ABFP31&w%|KLP
zvX0nQ1ZR>w(czlMX>@rAFG%we!7t%~7s7@9>s<$kB!|%P5&&DHd0Cp5rzMV@T)XVB
z^iF_reI;B6%+eJf1<VHl#cw12e(l<|UO7@1v&cmbY0RP5V*2p>U1(NaN>w0;lJ<V5
zn4mQ$xh+lOSWX~!tM{7RRAn98li$-EA(M>=EvV|%mI7`SV7Tz7ACFNQbBrfR_AxXw
z@7<sDvB5oBcnMD!E9Lb3zriI?xC|9xk=(Xc%JKk3r@zR(3DrSX0l#v28P9Tgj_T%V
zp%RGjjj7!o(-*vd^in(7Z^huyI|*Jrw+&ajVcT5W3aTTH*)1?$a;9jOc-9N;0!`P_
zRqbSSE1o~H;{-A9*^lRKXZ%*mT90lBS$WI9ecoB<M#%&7Tr{`~?(&}qa=4e<c9>px
z7XSk*rToi&YL7npD69&s!V&;|^rIi;>wrTCKlIQ;q0Ve}xpe6gUAc0FEda1SUi-4F
z4T~(G7`71(`n=N-zz~JN_#n9exTAzoKlf50BDyuIo^n!(um8vj4qye)B#;n-))+Br
zTqtEmVl8<#{(p-ALG1*fBuJ9pGNte<;g-gy^lmkqB)k%3gOwWSd}?KnE(sUF{t8hQ
ze2F#e$zQZ)nZr}k{E*BO7Dk$hdEO|C86~Tf(CB#1Kuk|j+?~AjLSz0b2)r}kuJ=J$
zwqb|BNw5S=Kw)lxOpi3})pQpVGf#RFk=tgNl{7)ZjJtwsn1l=o7hfDr4tx5kP9oIg
zQm#CXIGSz%ROx%>Z@Qx6HS1r-N+*MyMf1W7FQ6<>GpvdQU}X*h;!~D7=?ZxXfWz*&
z=bpq%!+y&sh>{|jLUcnU%7jqw`xMB>QAX6HOb`VzL|W8@RsK?CC{GAMK2$=8F!U@m
zp!(P9@>vkIQmOk03-45DaAP6j0T1Q0N0>r`Fa#-DhmwUSgn@Z5I3?N<Vv=o*J%|Rz
z-lCSZh^cx>d7$c^%EL&6!3vmZJ7A>h9xtsHl?9C8>HnTrVDd<F_FGlpI|Gh<U;?|N
zZP=OiCk2L`0n`h<m=q!}kY-si(`HI?LV^?r5}lNETKUTKn4<7Y*+LbwGDibbzCT5z
z4q39x76^NB7N)ss6|L~wS^Ze)6kuNTzylB9nKNfVP6AvNSe5I5c^3elI&}&}01!BQ
z_;9)wSSeQhFG9J0RG2s93oSI!g_JgHVE~ypQWjF3_Fo~yF|F3fP-*V|u4yt%fePUv
zMUu&b8BrVDe8>ulKCYw}@<z#G1CWgYLPR73`^{?_mPtb@!3Y?n&PqoBV-ccUPpmL&
zz5S2?s(3o99MWnT<aTMoAHE&h>(~45+lB_K4J-s~eeE9X>}|tt_s9MKf*0xXAfdwr
z?FGZ2`v)+Bsb<nST~^r4@+hchg^CH+t*q=G;wzPP?8B|#EA|Rq*UtJJb^)9}f8J#h
zwx<)dlK^iBq@nJt0K{7@_~xaq(+eF+fD)0ixuBNrVpwhcyfmdMqN}tr!`wA?gD86l
zrQ%M7$u)_)0Lt`*^}2vX=4>WwaI>Lz0JImK@OX?yqo@@*BH}O=RanyU6+sjMRlWzH
zm<B;<Ta*w3YX$+${FS^a8j!UjU|}&ZOI(6(HPYtrKSaNO2X>pY-)#jOuSeK;OD6<O
zcjL~0oqT<8V*VN#@}m)I(F(Mharfb%%=gOwq#{_Pq7<$tOD|RgU`wu*m7vVN&%<(x
zZV3Q8<rD6@>n;<Q!=HNUsjzIl3L62IS^$s%llA}30uanB9f1w+y#B&I0Nm-QTT<hR
zBnai0s+U%^CWV%u$})jo642jl-6Mg8O1m`n)wCw3VfkO5DK-9oOF-7avv$(|@})pl
ztAT9J)ak)$&3fG*hj0M0X081L3M&#QChQ5HNWuyI$p{mLCa+AX9&b|vU0AS`e)(nC
zd-uyg>kUC~H{hKCqp#Qw%s&VK!5@GO&Lwmi8h(Pp%EXWb+>CqLa_R-^1ZZ(R`M;2x
z1G)I(-LMXs58!BhC;00n00H&^U?o*80W3Y@FSP)m&GWpsvg>r>PUb@wc-@6PFj)#?
zpy@y!P%`Ek7}GvzCoCO@5<;L86|2Go%)kIgxQlO4e%D%H4l9L^1~&(K#*R*tp;8!x
z#e_6#+E?Yc{A#OCwNuBHA!)E2!SU=LlgrTSqX`!)0!GauFn!ajFlo+yw>2F3>W)6&
z-w6RzzCmy|2mzBR6XHxCXQDm2XC=hSpI4BIv__40Q3?ePs#dC@s-@ZB>3mmEr*u-E
z2U5q2!Z$ex$YrzCunKnpusLO<9V>Xx<<8E|Fm5SAMF&ui3r$-jS;V|PI)vI@({n`A
zyw9r|&97G<wDf}J?FmXi4X-gwh)2SaiiSKpUob+T$6@mL!+7BF3|t(a@0@b9@L<X}
zbE{HLr|nPPaSo=hy9Nzzf8d<~um0ms2)N1{`9?5i<LqcHGteQt1Q$-(+#f*GUL_b*
z)7Ai?viv$+d`P0wkWQfHNSJF=Fctvi0}u`Z>MYav^wUpI#d5vOu<T0!NoHdU0C(;~
z4?Pro0M4B|7ryZlKmcjUEQFf{Rf#g20D3J>nL@Q0%BL}nfb#tya-yyu+02^(;MPEI
z4eA~KlHW%7;dgsT3W%Uq!?PO9GPT#z*EDk<ZxA;L{5onaJ=O#W{d~B0_G`@l&H>&Y
zc;o{Fdt*K=@EUYBfN)q4gPbQ(CW=1}hsm393ngyA3ZS0{*Bodd$O*kgd}}QWGEKJx
zz{}3DZYUN2=1m;_zyl8uCWnRPhGky@NWIUy0C*eV^UpsY-nrAQ14g8>+HRD>)vq9k
zMws4yTc$T&_(knh5tPd>RQ$>_>31p+F*9~783ENKiUzj=`XzvVNZ9cY#Qd+u2ou-T
zV-J;Kll4=A!v2|}Q03Wa=jaHg<b$B%*YH!<gsSxrr!3I*lzfivcI*AGG-tm%5cuMt
zLws@2bO(0#c3_YC>w~is0E1(sK&7QrJ|a|@bA_fC0&I_y!6D2n#|_ctS>d3*+j1x@
zbJ)q#G5zq%%hdeaY5yysmaWoV095%F!no5rR(;fpmbHN-$4E&%o19bJNcx-<D#!(O
z84sOp)id|1aEL`+vtI!vqMHz!a{dQd=4f#Jkj?)T9RGNX`U_L^z5sygey9RJ<B%2$
zl{&stc_JPayj=^9St<bQJV2E?>0hi^0^9*WG5+)oS9@o_#{BPCSU-WV`OZ!Vm~;Zb
zq(3YuYzkyS0LkHTh2c8Cl!7@YWlY{<oC@i)OIcHZL9;MA!J|gdb=|cv5v<Q40b2oH
zdg-MQo0Zh=8RJ1fOHY_r;x<4A?o`k+Up$NX@ZrN0b^*ZXXd<{(@28qnXSFr$)ue^X
zAW@=4#-8y?ket!3nX_f+B_=qbAR(HsYih9mv1!{Hz%2psoY!YW<Q)N{7-oqZDSQy&
zGXD!uX#VLD4G~SdSrC%2VTfx{rVbmROPK2r&hD$tBeZ!2XTNuxZ=U`QVCzi?<FDwK
z0(fV@WU||1Bx!9w&K31+8L$4X(nua=J8Y)Oq=oH*9BPj(!D-1y%|G&jHf9qaWdDTG
zX#5cWMhA3?ifFP<^Af<SoCB~5PJjVE@rh6H51u@EGTg)5nOr-iYsyCfqnKTSl&FZD
zCtn%~#7WCZ6)8qOB!wbD%z#?^hLX_g16Fi7pvuX0!)&%C9Zw4o@}7~^99!8e0JjYI
z8tSQN*OsP0uSJ6zZ@~tGO+bo>X}q(zLxnt;mK)HXV~!fX5Zn#{kB`yX1RAg7m%=Lw
z1^GJ4F%4e+to=7=AaLYygiDvZ<)AN&!a4ekGi#%@$V*2FjRouUodFpWy-=;hW;td(
zK6@HmTDl}-8m7m>Hxf;ylulcIF=v)c{g*CX3W4|Ee}8ZcJa+6@SgzK&!=S`{>tWdi
zfN<R}ucz?-%Fle}GrZ3-thcZe;9Ym!6%J!yr$CPEy|FD~Ss=|!m1)N_be$G;o^hLC
z?SwT?$a6WFc_XPWHH8nXFp}#fmH?Y?gPRZB_v`>TQd)^viZnw8aJXZH&k}d)&!tT%
z(<Y58@^7|=)$qH9XIilAlWHME&80&Vn0(E-c8za?DjfNGfQ$cV8|WGarq7p8jbPj@
z2~3T01o27FfZ$LIkmiqox;en9LKP*?S-Q~0Ff<+193@gS#a{{(FS9~3-GUz)pHBPo
z%P)se763wC*k2h4OU(QkmVF7JFr)A}01V=JDZ2m+BgFoewj@P@H87R?v}<IsWU!=6
zqH-QU5*E=ImOv1#K+cqQ80^8aRw&8e@_!9(5%4%pr^yl{)}mzopDKK+XgM%r{1Uz*
zn)e#3M^^?p!l*qGiaRh=9S{J9{7Dk-9@~QnJNq@}-(Ud3#X;W$FxlD;M+WnyK@(E`
zAqdU_)dS;LIzbbxl9O>br^(M~!u^c0hXa)y8Qr&<m0)Uw;&9?$;5}>sU>+q-fDBJQ
z`DCB(3M|d8%w<6A1HfjNGiS~uhYJBdltD{%X>GJZ%#w5oVE%fDd$>`*X=$-eT>COD
zVi8zmOJtTTg3851_V6H|Q;dt6h!gb3Q_^yan71(m3&3cU2?yq5O9Kqdf6JKXFkDBx
zK&8z(k>rb3AW-2poqhLpKwpjs(qDS5WB$$AufcraodI{fZwk9%dtiTOz(h_9WCojz
zX7j06!O)dvFJggKlxRj;A672N)s;Uia|r*i1>nMk{=lmTAAAtEx3@!hyAANh0LBY(
zkbMAT?<GrHvIF4Ip+kY@88oWml-K%8X)sBdltV-dmo%j&1}4rBWG9shAy;Ol64|qm
zqMghr{nOx<ftEUfCsU$oCW_VsY8uEqSU5gvEe2E-wXOs+g$~!GKD<M`;OzIc=bE!$
zgN0#y9AWF7d$2p*fxRFA?1j?;`A?vx4Eb=afMNAZ7KlC6v2`}BuBW#`vpxuj&Hk*E
zZftCX{ejV6txULtu=Gm+{Ew5S*ct$`G;sCmRVvP9##%-tDu_l-vkG@<>*k}n^l9S^
z7eWJ(qah>auT8Y7tVWM-889)LQWg^XV^HJH08p!z`*6~nr?J$@AAq?5Z6HqV9R|J|
z^XFSGb^^d9XwYE4u=!|*Nw3=tO98w!aF4giMm#O8rj(WkYIIhX0^}W>Vm74tBh&@r
z8qEEoGG_m?=mZ#)QnpTSZf+7U0kG9N&ikdN&?~b9z`yJO*g;rtsa(OhFGz?`qo_2@
z@FlI*#Pv2SL%>N31vFd{RT?=g*7;|FWGH=un+dX96s$Xxz4d#-Hmx#K1AgZR%_4j$
ziuu<6y4(H+d`$MUSQN@+PycScfBNWUXwcvwaPc$S@ambXofV+>2bi)YfF@x%06hJT
zFbj9J1y=iDc^P|X-T$E6n2LuU{B3}|1i%)6mADEJRzv{c&Sa_T)TvW^++w&5kevWU
z`pTe2r|<-o=ZHGu&=)o8epM%akEVenIl8=hXRG?{Ls>(w9okdGw!aBeXT(R9M&j;6
z!)$QlppX}n2fY>KNFr%G67vo!oWYDcIjw!S-*g;R5iDcW8&D^;A$hmXKYjA$aGh_1
z1_yyx{`F;e`CncN=6^mQh?fHR`rvR>upACV8h1VY)_*;%PIHAI&!KE8%`~yfensH6
zVni8@%zVwf19W&QL3{w1H$C^<b9^7b%BW>ae-}VJZOx3*i!Z(yI{5Cp?<O|Gutz2s
zLlA-Ws`K(AKw5Z@{v6pyJ4MLmTeq*&duGQ%zy|7zEzrg>d#iIDd)skF_CoA*0zx`&
zx>*5k2_(&5mi}qlZ+qgu5tieJ4*&Xx%J|icpeidvpa@E#QPNk<Tlp8&dzNBgdfzro
zzVW5z?APExFx@8jgMWG+UO9a!?f}>eLI9ip_h>g*0H$*8e@46bF3OmE%bqLo)UEzq
zpDw7<8gDS8xW)Mp9CfD^`l|j?IjV|Uu!v`N%Ps)f`o{+Wv69MsDQsVBy8x~a*wK$W
z@uMI8XzEGcb9v&#i7*^|9dNn~C=I43dY-^~A}m^ZjPDO`5<x-znLDNx;Vh0?Eoitx
z<WzpGj`|y!rsO)$xRJ2IO#yXzZXi6&_`}ho;SaKQlIW1e7jQgT%mHO7lC~#|=<o;i
zT0ogD=8t(j$>MB^(Bm7M`zP-?+noIx90+z_oxtz^%2~MlIk6mJ+A_^^gNznWRdStt
zS^V0VVd#~)f^*)<3IbWuz??0q2Q-1MqUj{Sm^C3=0OB%r;92|e{kc`J)CU2nGXRJW
z0%GSszH^xm{$U>gJ_u+snV>ni#3Ky@_S%}Pm1c8){1|=|G3+zT2!fb>1~k!B#5p4q
zsC*L_0Aon-hz}0om3dg$4Egw=%3J;uCvQs#8vkf4cA@YupMF-I_Qeos0s~MkVnV>t
zj9OaY%Qz@_)&FXIwD?@znyBzF5azeO+SdCU92l;>vI{T#vkNe})NA)F5c7N(cXNcV
z3Latqi{bFBahZW^H$`>o9%-#x3k`NIh^(!Ga(YndL8pZZEp}Na#Y+XGDNB49ohYg#
z7JyDMT^WMBG8TYmpM5q-SNa@)bYmb|n%6J!do9_Um!h<(z88XKBA}wciS!!dQ@_@q
z9I;5{Df_~cf)(>mco{W$E&0ZDzD^oxrNLr>XqJFm1aZw@4lkKT4Tph>+t8FrF`|&R
zOj<3Tj6sVekyA!UJs^5q?sG9o5W!?Dc(&Hnz0>*|^KWoaxcEEQ;Q4=e9`-IxLC%gC
zMFC)av<~az^>8HcTKCJ$-_u?BEq1abT1|<jqoSE9oy2vlH%~KibTuYON*1a-;yDGW
zTBvwIo;SP$jh6se85L!fc@!@Jv~vJ%07$3C+W^fsISG*C^gaN>p!@Ga5X)2lz#vZ+
zEHl>i2f8SF8!7gvKa&75j$$e&?dO;D$+=LOm*n_hg^T(iA=`HS*H<__Ybj-AWX;<T
z0@*-JW7%u}WX$x5sDNVpph=#^TM-NsE`ieQ70w8OLMhAORE(}P8NnSd$nrY>q&fRF
zxDI&bH!i^!K6#;A`|maXpy^|;@iX(^7;nPn+E(~o>)yxV`#1{FeI6R`d%%RRnpWo}
z1PgNEknz!r7U3)>P=&67QKCo~y(uDpQ{{(a=-C1wyoi?o1Xe`=Sn4H!c${+BQ4wzg
zWJe2<Z}Q9M02r&MG9BsdQ5tWupn!o=B0;H%Lr>%-jL;zl^@P~p2SCOal_e5cq7WRz
zn4h-XCIpPh^JN)i79Scc4~X?=8l3<^a0SqsGC?tChcjZvPxyz)7yubb2(<HRzA(iu
zeVM%Vyzq12=q^mZ_Iz{pYj7Z7XTO*J<*RVvclryVF#4phnV*^b`r1Y(0Bpd<+9qu9
z?|35$2jg_+e>9917OZ@)OON-Oz7ys-S5QNx(0Qvrxo7}ZkIDwJoG>YLDg!5H2avk}
zcnKhtfy=L*E&XkP(utq=#3w=rpFDYz4_uJTfPxbs3jm0EFv-wO^X%}2Dr>K4>BpO6
zwa_3)8DkISote9qFjMOQL9O$8AXR!|4yGzVO&Yjm&~F7D=VhSCczH}lMzhICm5vK0
zb$cJEaa4Gkv>(b5WW`J4hQ{&qF@nw;HDP<hRhYi>0yJoF5MXD&7yjvaxOR3o3BA2V
zY#hz~{U2-oy%4a`YyRv5z{h9LqGOmvXDbv$+N&+DR0`AJ^3=ClR_B(BFXzJ~p`#|K
z_jSX#3xIC}#7+oE%hc{Pz@>$y7XXA%KKbO6;SK=43YZT9+TGn{hd_w80os9#zo-V0
zlu0!+_W=f)q{vxy%Fm$kOHyYtyXGu)-K22x;o4|WX$ur>TYf3?_JwTbm!+Ag1eq9m
zJ<)49>63}0(hJf;7A|b&GlW2ebg0Uvue{Vw_G@rp5KsTzZoOZw{AH#-#%_&22ml+w
z+`k^R{@(1r7UC>jgG1eK)W7#;+qPZHw%xMrT8pc0#<FYKExWaBbGctywvG4hec%7!
zi*ugmob$oKy;Yc1tgO}~fKvz(i=c4;SrV#_5vu(V49VZ8M4g;eA&mC|7OS-YBGJ_h
zf{j1vK8Jvdp#)3G1TwvVm*Qfl-UVnNg&2)6{3FiyD6@!t4)2kH4)SAdC(#cDw|znS
zUdg|qtW=-bOA_g)ToYh^*xR8v!%7LN;YX-n!-)0&iRDTpWzxzLOmRy}MF4~^3S(_U
zvw#bqoVly-So2g)Tqe4E#kc1&ogUXk`H=k5>3_Wb^%WI-s$UVUdWvr~bvRQ)hh1D+
z^xMCPIq>l}Mwo`pCfKB&qL7~&bTqr+)}KFcjQ!$s?8x!4h|O{(XkkhQ*jL?wB5wuO
zfT}`az=7>R{CbZ;1D5tFnRAa80Q8yYa5yuPd20W|J|(xEy}7ONjrd<zocjh)I}cOJ
zN6?&v^LsJwFY^@*nU??deP4X_UYB%YcPv7FAQt~YAq}r10$;gFt(@ZfE8$163m_oo
zCg7c=NQoiZm`5@akOYl;r>5aj#}1g;bUSyF#hr;m$_}9Cz26?McbS~(4|eAxqDHU4
zvTd%$&C?9g+NT>%z>ilSByvn#MLh{B`B@{U2GvEzJD0+fd=>}GL$_MX!5h9N3+1sg
zT-e1|v?QPR1eQ+!|GZc9rt&f9#P*e6hkI1OlW<$GK_c*SV&|Zl2#&)O3Qhc<EL5YR
z$G`TOSrz2JqRHW4L`L(KKem4UI~$$*W&A9vdjz$!9L(`UgiPa!hhR%5D8t1!8%YP8
zc)FMY3bxuxE7vC9$<+Sr;ihXZNi8*tsZ%c#I%Dx$_{L=_00kAE?d|*L?b9+=*|H<!
zA2!t@j%VL<-zRQ)`!8hB5oNAK!<!~qgW!8|X^;I_Ixx$^V3ZUs0`wF3t<;#jj#rpm
zxAN}%SPhu$C;a-pZW&p5`(&{IzA=(Rj(R6BFYHe*#!F1VM>LixnzLxfbpost5=#S7
z(p-ZRPflUDZIMOo!b*e2p!NXa&BLBxYOdCQz#M|M`qw?cN+yRwpUq5<ET73U-nbYl
zIHoKtRs%z_GEq+W_Ddh;)H%v$2*4lv#LEX?SN3u7ajnPquyz#_*|tve@}hUMY037H
z|8}F(T^;u{%=4jc&88jy_Ks8>^Idhd&;H0_V};TNE~SZsrLxrZ&QLd^Ue0CCjPoko
z%NUg4bbN}`6JUot@d`m5U;u@CW%G)GBy|^3X$_CmVxNAy4%;AsCG+h!mm(WzawBP|
zR2~mV;36BE^mGhy3^?v^w-IOOe*e+aE<|T0To*{Vx)@fOf*DDp1&TVOyV=<?|Mhmz
zr19x1-N^zo$H3Z%-ru?mKY!u_)HQBqntUUu2{3CN9#Idhn}#2{a?L|o))&Rb|0uZY
ze-rs|yMT{WQP*HQ|D4ODkW?BLhelnhD#H4c&vd*TaZjhec8M*(^J|;$oqggDM<`2R
zzV$E2fe=OmmV){H6|eo<6Q`4lO9a}t-9mVP@PBv6%}J>oNKZEGrC;SW$&)y#g6?-~
z?4H61kn){(76{2FDt;?A-r9Qp2*w<t*9R=qg~^nEW~7MEQ7Eex6k`=xHi^)IiPowD
z-loMp1^Ng=I?LXzF|zYer+|5q=t@JgK~dX#NV%9LD)<`vsi^Ej-}ClHAVxi$x??~u
zqhzM7vu1SK%JvsBqm@?U!`Jz*;^jonFR-K#_2P)KCxG2Uq9+EH1+J=39vs#A)f#Wt
z2yjj6V$_F^_o{a!+}(8Y$#g>ET~C5oBO{}PfFWJ^Q>=KoLU?}(DtK6iQxwG+Wmi(Z
zCj+56p~5u@yM@}fVrY?xV(`(o7lSi5BKghO4svt_ZLgQrLY_D|$GcRt_91?#v(d!x
z=HTtk;ICxoQ}+v!^#?sP{-4jLM>a4s&j{!~6}$)J<4!Qb(sj3_3eo(!P}iiEDQT>)
ziPxvl<>Dl&@s&+Trbic`1e+tRE)=!CWl;VkyWZw?WO78^Faa|LP|+c2pw%4ar<`s|
z4(3mjHwCB|kpj-eF8D_B61UY3qIH3cP%Hq23m)bsdJw(G82-3RilGH{Y@^=FY+>l|
zO3Vrenf3bx|B}!fqy}AHx%l24FpDlcwCA?ExtaO0gn9?}Iw1TG@AG|Ai({5CmNI@g
z8W(Fpo4D%0f;5y=v+IHeX<4t3b%9M$)!{g^WHU?k_BD!RZV)ct07NbVE%yx&nf6lJ
zQsM<lf#cVUQUzMtDWaMI=yASv7h3A^h}Gv7hs<#LlgPTC{<TpL(crhmZ$hozlVvn<
zA-GAF#a3K7rrboYM*y}Wnh(dPz~m3-cypkG`%>X`>k}3mV+ZiQr;ZZV7ry-dx~lSj
zM-k}s^a1YUR46-kDxhJ54D|uQ_}u_<3pRJ*zy!r^-A#7^-hiU~wWKDBdvYHj%dm3m
z!=GG!M9znd>PA=Jr52b>o|X{zONpzHpfQI~55M7}ycAuj-!;HoLKdpYuE6A{E?b3K
zJpjp9V0&;Oyt~KU(RX@N{%~~_Q1+gyJl6(~M7S3QeWe|jARQRwwtEU#d8ZlI2^jW@
za<o*CBX<T`CH!=;_-IAU7>t)9msE;)xP4Bj$Fm}K>$mD7X{KPokjP}!>*#t_Xx;x{
z{QnYV`lJ6#-2<aR9JGs1;77T^$wmAvijrvV(X@47%;#5$!6R*E&8KB!5|Pl^r!^rU
z!B+yt$(h8S#<7McpH=%Z%NF*s5b)f=%SCsdS4n*!q30r)p$eY46Aer>vkOIj04PS2
zaKFp<xr()Ck_`BdE^q-pnDIWLIIA@$K~Ljyp<>1ti_YZQoJW*+#N|H4{!$-EJFXlE
z3nqpW(iW5yB3-%N8)uG{yyo)pis|O~fQ1Wls6Fkw@R|sGFrR)USn{0|eSCgU3hW?I
zxmzQA;C-$i74?3Wm5wNAIny>^k|cik&%i-bpnP71qPEjeiNX1qCQBa6f!hN<{J6A0
zQ$&(t;=ArV-UhWIpc0^X0fk;7kOV1_iO5Mr1F~=V0i!7)fP4GlcZY6PY_>$XUJM^)
z)RYYy`9fTV>oC?)=G2yoKTUpT+=os;H-}7pbNFt{YSd5hu&vOT@20vh$lmN@HXvI=
zjVQ{3K6AEicq~#jM_pu-i&M1!d_^5TPzZ^kA91^<Z!+I=_Swde!u$Q7?*bNYpM*ZD
zT~l3JH>S&8VxQ;Vr1<3(O*A`6bR-A#7`5r#KUTSgsrA#Doj8acJhe1#F~*bV=QYo?
z$_q*l)oG+uK!28iT#nApN&kk-bU@ZOBLMPxVn<EiG-OO73mk84LqWe`_c>G#p6ly!
z{G9}oyZYQQrHGz_m=df9*GO%{9*Ws(baxy(g+bq)PTx%StED$K^xL^N?-WM&b5Y@_
zJrl;n!XGiuSPp68xiCvBoPL3i+tJ4V+CNbL#g_LIuW^{@>%xtgAT~<ca^3+M>+O;1
zy#)}Q`Qdwed*D*edOM2DzllV?{f>a$EmNjnz)@3)--%_U!e%$p0I5keIAoqAHA`pS
zzby!emEN>(=s<(<^MbsJIW7`kY?s+${UHUvX(}^d{2U5=<@n{>?W}B>QCa*G1SPlv
zMv5WCPV_000AJXy2H9>8BRdURB~&$5G|U11oWq9}U0?C=)b6g6=z}Na{}Tis$9QRf
zb_DgX1ik<1#!xE5%I8?Fw?Sth7g_xhxFh$ICszXZj|crTrt-xa%yawAnQI;_9LV$j
zWv9CX9`AjcX|@-8Wj}A~%qCW}>S6q<t79;}_G<85cXLBxl-~{(Ig_G{EbIZDrHFoP
z?oZXchK4&em-#`hMlFVZPRn|6AC_dgZ+@1lgdj~<LuD4f{sixulw35)t<9VV#9*7o
zzZE*cydFj@Wx<nZ>kewvPT|yQwA*GX8=<@7yf3Hr0wf1zFV!<*I&X?7z}VX(BqxPU
zgqKl7oBBybT42QdYQilZo>>Qh%63u73RRt`-vm`ISkDDTF4;d6VH}iTeU8|=TSRn$
zt%?A*Jzn%VXv44OU$O<e#$JdDAsq<rvmIQH<956Dpw+-t=S|;i8Xo+3{nX#};bwRd
z54R~1#wEi|OLuD>?udLGkoPq*2Io3_k+al;%K}4r+n5|kr)`0u+SeehyHKlOefG_Y
zCT05LzzOV5M4qRL#R5<iRZkB<0IO<35b3f$wMz!Y|1f*R%Jlo90_vzrEgRAJ+bgMc
zg+~sf*)fYSk>|XcI@*u#mDAda8a42VB`-4$!{%DBwibc*eW|WLhq)M;fxh4`Nw1X*
z`M&#(-PfEm20tJ}+7O`Ni~oDNNl+!*1w0ON8xiX*!oibEXy;qdQ{zhB^b=Wam_^`x
zCaz{l6Sa`eA9}aHq}NR|-m!q@Y8@5tx|@OY#J|P;U^N5DJHrrWMVn!mx;~f;8gurJ
z-e&a@GdhTzlFhI!p>LPE=MV~fMhrdy9Cg^_`q2K;zQGmiLY6xRnR3POr!rIz>fqPf
z-l#NU4mtH<mS2j0iEE(uw!y0+k8i=m??cPnd~s38m+Mok7g^l02Js6i>Hw}##H)}p
zCB8M$Y9*rdu{Snuel~&j)%?G*rtiY=DC0wK`8ToZ{mEq<$+PZn8=3C<MU$QW-1pbt
ztm#iZa%Yagz*V_>BLfAPdiB7fq;o=XG2t>!2Z_c8;#+94>m#sL&fUU)VVE}gl-b_T
z>u>!cfC)dx!gp4HNWRLL;r$mtS^%CHoqPc^)WCP#1U4v*HGmTw)4mGl430QwMv2_4
zrB%|>;!`kiNel@$ciVxhTW_gQqOEDQ+Z=T(5l=%88lVhn<1xKpiEk^aAHqrxjpvAa
zjLO$edd3=PKcJeO|3F#F7Y_;P880&lWBYb(-g>`jqB<%2+#s{LxxaLAraBdHWhpAM
znEp3*rV&o5I4Vh^q)#59^9Vhqs)|}a_0UW%#jXw{pS6MF^v6tvedbG)V;k!Ru@<!l
z!G9^*M<X+P0eAodzyO-E!!yJ=y0iu561MlBh)JN)z(QV<Y6<wI%z+F5+j(x!T#E{P
zH2D+qJxx`SbcJ)boSXg^C^e-LU+i%n`3G@>%{I>{6oO3%m6D1JvkPO~yOS#tfPw`o
zGDPc$hJ-AIc{Gr!Iqt}E-ZlH8iR5H$OJHlzq#K^?LuNXESr4ah-Xt)1Q|97G^xWkJ
zLSESu`iPv4>J4#!JpmShg5kGvjIm&}7L}Bjxch8ikjJ9HpB!rO?D~UCb8IVr#+EUN
zEm;t*9l^jQ)7g)adIF0v2M8yB19k<rT;-K($(fryTW(u{WBd@kQXekBvuUducm2rE
zFOzS~TSYwwwIqQTp_IFl888!>-*5H!pVVQP+IM~#Qnfp-D77-MBLmePGCMs99xl7B
zYomDN2)FM`v^06f`;M43866{cOa-SR;qd;F3FOKD<rWmc?>X@OvvJT_qn|uJ%E=r1
z@zQ-!L||aWv^6?>@TjQa-yc!RLH@@5u&r2QV*AIQnG^ea4Ju6nFcR{D@^6J>pA9^O
z(PrFYcHndp<buQTltzXUzX0$dd^AB|Lm;7G#SMh5S5394e=Of|3j$E78iL^e4x|Im
z&d*C#QpMrjC`juHHzxzo%LBL~58y}?XtNtUMs_pnG2#{m+&HCJsd$SYOD88ghwX)y
z&40m6tP!f&T?*lfqvKN)bCDDGRuS4`t3U(OmY%yCsvmgy@Ix*9=eI?Qz#dg^H@%$>
zz4f)C!+AMxi~XDWGtVo3mh-F;O8N>i>2=Qx0-I=Gl<ZK9v~Jbst)!i)jLsyLRt{08
zHIQ?%B~s$(VNsIkq=#in=98<F0xI-i@t2?1^@@Dx#Nc<50fIDN$9@CM{iVH(^eIef
zAha?Xz^BT~4aM_aJd<x6TV%D}|149sz$?em;g5xlC{dr-Y)z@x5^~82f`96gEAf?;
zWqhJyD)Hto2t5C2_zvI~#QoK5%~{G|%h|&cOrJm_P9FEQ?FVI^60Ug?se|i#vC%Ri
z%s-r-h#0%(bsP0{gRN_w>q8E^kEiRuK!+>lK;siMx|ge7ks>$ud!pEeRU-q93B_lO
zVG8w6iC&sQ&Be}>kKsCcgymjVcOo2`R!*@^@gsAxNmhok0GzWXLDVwF+KLJ;tyR;T
zatB*?f&>5?aHOJ{f`OIS%^0FI*uFiQKN_Mzd1D2~P#2?1aV>kWOEwV6qox>h^-f`^
ztrW8(;805MUy{0Ad-XCC*ZG<0VUV+VW(f>^Voa9a=$hLql_dPhunQ%}Nt)kqLe!yX
zM<p&Og!1yg@fgy#ve|{%@}%sw6V8C#rEx^KIqB9tS=$Yvbm~C`7n$VKmD}d3Nia3x
zDzOhuQ%BBWX1u1Sa>#8SKUHw1_L#CQXwU&l8~k$x4qj1|*UX)ZQyQB*%1$b7Mt+&A
zkdb;Lh}GVWFWjZjWI#M}F#*uK=T_kmjgm%3CWBb)WtIiwUsIx4?qJ)cYuFGKPX(gS
z6odp{_?!PSOBdEChrSoBGu3MQ-+uoLZJg@piq&giF;S6F46V|O6iiKC%pE~TawEF=
z51PR5dSo*WV|#yw!4QABd-txo@HIR@GkBBvVfF)3Nb>4TkdfOh#Ni^GDUHYL|A!Zz
zV`)%*s%sTFLC6HQ8NzLF|BHq>7VP1<gId<WcoU$^prBCTWVkY=<T~$FIe73*U?Uj>
zvEJKDU_aN=Jaah!5aaJW%X?clj{S`nRXK$oIbcPC^tPG3W&ITJuryM&u_r=Gkrs7&
z_#+@_9rTK^jEb~)wEhV#kIoA2Ns4wp6la0_HO5rvZZK@X<eWY8ZM#$cLmrCF3$JaR
z3H($*&SK!b=IIgHbO)Nj>yTM|pzevu1=<4w?^E@i!}}|X;a(*HO`&#xm?)!k#0b5L
zOb?8-ABDs6;>lFHIs@0jpuX{ZgZ=BG!|AdCn-;&V<1c|9wkYfuH8ldt+Q5nNvHz~W
z@Wtw+CZK{e-sYJBa4?%b9nkrkzi#y8*x`WUqfHBN4@MLO0>z2=+}r_o@`>p3O*iI=
z##EnB4;*l(@*_?72Dsa3&=)Z&kOB(t;8ilH%6NL8u2uXV+rLlFL%MQs&Rf6*UN?NY
z$ujB%{?y4Zq*5Ac+uUizsJ%d;d->YkxmYXwhU3H#ldy7l-c`Oy5hiaSgb~h?sx?W4
zFcUXK$P`-Nr=Mll^oX8sCxlNiSf&2>Q@b~y&ErU;JVv)<1(g2{kVcvYDU09~vjGH;
z0;Wf&-bQtLpakxGWtpe%Vg)*SP;o}4}fFUFy=1*S94=z(zf-{KTfJ#^UhJY|#D
zaD1P`<r|lpq?A%EEhpvLHhh1*{D%J5boAACbzdt$ScHCG0{PbAf_X)MJS;)oY{sYr
zMqu99Oo}oL!xXhBeJR~Z-Z?4qrYK{wt!aoB&!H`AXkIdoty#b(t1@sel0%J}TC`<A
znK5U>&!EI~L5LOfJjJ)K^oVW-A+9Pg124_QRvVK(wvz!?{?dZLzWh<rpWnPG_6d}&
zWL#$y1qqKdu|wZ#rv;<hvNk9z8==<)#QgRH1){n206myF%?x6#8>%J&FofKE@Y!!?
z<ClKjmzbx<$LX=p)dA>>FT6ufLV*w;OX9oqr%<l9zZy8l$%D|yPrBeVJ{Ungp@^NJ
zl31>z)C#=JT~<w^T1>jLe({Xzt%Zh9!9ZkeMf`NY*89r^8r`33u1#3{3lP{UqQnF=
zGeU`qtG_yT0DyKq;eiakeMva13l+EKwNe}|ZOn=*4dCFz=aQd$2ghtLv%n(hz=h``
z!$$iWJ8RXChdikEK)ioX+;K|;8P{}-(Rn{bjDLGn3gmc+{{)!~ZZmFP`9AowZ13JB
zMbDAhlT$JfmgYL%2GIZqlJ4hv8sm#nC_kK>=l@HROl*Z}5g5n3gJ?;DY4lafQ@;Rg
zVYnMp>j04Hc>^@SzEc_uM@5*muFFuGy%=cyS1mgCee~!6VT=|fu`*dCzkDLxk#%2C
zdq)P!FwrFB2b{2aP9OHQ54!sSKiXd{nNo}Af20fXpqw}3$Az2Axo%&ahLM#Ih&jbw
z$3B_RJ8z#AWa|z)sWfJ7s^x^ubGnEEF7jkS(gylOfsHK+EWgOYcm==gix4m+UZNzQ
zqN_4X|Bv30_Se)2k?)&&9Z2AGKoSf%zB*oTDQyF~?hhUCXL3rPpAcfkG>F||AfzB&
zpQrCmPB#D0J-RGU_|~99?dl6zWdL-k9vp3K2f<l=VOtRk{}Pcxl($YN(YnxKl&v9b
z<X@TjbFY-oVTZF25ey_JAukZ{k(V7i{46o^7*ZA(@#U{;@r8`QoEKNs-MG&?l~uV^
zg7A|+K0~E<MSAkfSrEW_hO#{La|7GhVbLzHY97mcMAc;Se^RmG&S`cuh2L(tn{eZ-
zuA!?ut|>e^pxH^|`%`<^Ixaz91HUoXrG8Dr*Vz#;g@p*PaFIk1+YDR?u8tJlu{5J=
z-%~pmi_y~!g*eO0B$Dm$5NY!Wp18x{ckr1s%vjZYbV3sc#I*PrDMGSOt7uMX5!V?<
zA|fKgQ-xD1y8BxHHDA!4_kuU$Y+e(2>9OTD@95LF=~VtppZ>Og==^yehBO<2xZpB&
zg3w%iDmw6$4MQe1LCIJ^<JE}pO49hVcia&{n+Ss%?GLk=@{?4D;2Ogy$&{1GjP4&<
zW}Roqs~~a+2}HV6QzN8XzY(wwh$MlS^qK+>sadec0g5I$D>ScRk5`)kU>d0lL>4Q_
zO1oi`KgnX_X<IFucKfGb@*0=}0X;kY@6he6`jq#$03I3y=aF{fkPu1J_aZ*y#K=&N
z%|p^B-OZ&lv46yfo9-_#T{G1W{VK(#vvUwHgW9mQ*FK==w~Fbu?yZRUd+FID`o`ZP
zN*tmi?~GZVG)ep+H8^I5sD<<UJS?z3MB$~UJf>!hwSC>|q(>DOQb#mAC6dEu(z3@x
zl?1howSNHgjK!x`63`0%nt00DG>YbF_?<4-=T}9~3pD>@8nn&-wG!J;2%EL#wmKR&
zft9ka`&w?*{uIPr7$jrbbQQ)^S%7WBCT{*_!X~^(tERw$;*lH?oxv|caHRFd-39|z
zpp%Q!iJ%3m=Kh0~g0@TMYx|J$_~4q7(R$3zOj@2sH{O-`*KL|GLO~)(U<$$fv}gr4
zITdIv$hxM7`s8*>OXLv*tgQ2-g33E@z41>VxxwTOBy@#$6+1Nq-26kgmN|L;{<FJ|
z0YPy{%rzi^34|ePVuAg<j-~X>C$6f2uRX}j+Vu~jwax6V5sj@hCa43locnKOI#YjK
zhDIP|fH>O*QRvtCuz}a&<_qTWB(p1>Nb>4mOftFD<Dr!Vy^2wS!xRhcu&uC7E+dEd
zJwq?;JRekw!Usgm9FQUZ;N~y;l<)80yp5GwlZ%U1$lg_9!&@%YEYV<MA!)te^-Zu<
z!9_=LEx!6EMWq~>Nl`5|nN>>#&0n>zYi8w9nrWb3P6qKn{(Ek?l*CYU^s6=oCGRNe
z=--@fxn2NufF1?<JSg%Q5ypz0wwBvaQyOLfX#LkSCwZ4wrl3gGFm@>ur`=7@f%n&;
zijuCZCz6KZ%y%sU=zSw9>hdVHRA!u-McPPRTn{}-ObD|Gs=?X!a28|zPC4g(Gb1nr
zN{nyi0#5*@jO$I)+#bL6sd^a6A2lq*KjZ0v_9Gy^+uA*xHD5=0W|pUf5J$hoI!Zu^
z`$H*x5ZmPsZG(Qd_3L%6T{=_w(vO3Vm#o2$LWWHwLit3>Wcix!5#k#;{R41d7C==>
zgZob(tQWvPC<%U-ll&gY=W|Ux$`5b^Zqa~M2*<P4Kc<8u&BPE(Rzh5^c|eSWhIc0_
z7$+9GGIGCRasFyUZ}+u>0Zd?vR*qXf&uPKdt>xcK1)3v-+{?V|HwVdY%SZ9zhHu`=
zfx^DeE^#ks{_a(5NYJPL&Q0NndP%Jw^(tGFE6;pApm9Tu_H5i#cGK&g+jz5fEwlCf
zLi56v-RtX%W9bt!9}=;>^Os@PHEqGkJbbw>SLnd!s>yOw8AT+fMkH_!<1)x~e<IU^
z2;#>@&S>VlN9!<%Gn555nO>DB#@HZ0nIO2>UZSk=bmZ`PWGue6ZSxhV(VAD<DH(2D
z!-Nd@2VTy@|6U0FZ(;Gs{4xE$gJ*WI(8=B{yKHjEg^Ks=SPT{LY2GP-Nd9D)Ft38M
z-quP>;hTAY#KHcdP*+A5yv8fH9oKzpT9A4c`9cw%TKTm<J(^M1cgjtFT8VjiTl&<h
zKBl;%1s$cQysH&%iiFfZ&cF1hfb2q!1ZFF$peoo#7_cxKLHpN?I%u_wm(*Q7@|3jT
zNx1THXK5|E$t1>zKhK1G+Z?Yb<mtva_*npB>k5?wSY9O3cdUorSzhOVAZpO(fEUbf
zkO2`Y+#7U`DDmpN;LDZohqb_}7ZdT>I4+=Tf-W1x?!8UGM~jyEi;$lJqjKdr#3rtt
zyxQjI+KhZUOHE8ALY^@vHu*&2?$W<buQTOm7^UX)6DW~EJsY40F{c8rs`o6R6^!q3
z=6j>7J@y;`*q4jIZm@l;w)|dqRUbWrv#agtmYQAmvEy<D?92%XO#L58sIGU{R@`kB
zbze;yA+RET$Kwg!*7uJ!=>#Z6nP<XHb<5WigSV`}hhex^=>7O7%kEdj)Dqc=X!PgD
zI=;~v<dvC00)odjfDV?1go0{=UBG`r%1cUqa;9q2tJ7o5!rsPeIPm%9nq?lPgdA+<
zCfrv*@B0;-&*pYHW=CE?_7MoDl^KxD=N8=3+S;23HKPhmoWgygVfUKq!L!|yaqI`J
zAnqGXkt(NODXE-XrT`&j^CmiIxQD=^q><{beY^$w;?}%xtIW-*XTI$re_W1F1)iP`
zr$0$mi}L+Pp_2yvVp4y*7Ff`c&-U(ny&&7N>9MR-S-dOUlYrn?U;I;RHs<yxai0>V
zc=BlqUNC<!k5wfmax;cjt}Ruu`;Ol&1U!MszbM<L0bkaWRhE>_F;P@NsJN=acUp&X
zX$`v*dm!%)+pIhqU25g&Ke6)0Lb^9Z^5wa<41XqJd+|E43-UBc*3Y2_XZ%!kMzEBR
ztLpz`ToH2H)-m=>1$><2buCxFn1q~?g<k#K*qHBrxZhM*dJ4DjPaQS+k$U!8R^G4H
zl}_oT7dI`TNiDx?oB5z2@&fw>VQ5oswm@oJ=myq6=6<XeU`<G$rdiQiu(=hQg1=SE
zDR)a|^j;B0N`Fv`>izedTr1O}@^z{a0Q8NB{G%Z?>uuo?)<74i>kAZP1#OMv?l^&u
z7HRkq`3zx;n=WETg7Z^IM4sWJF_QB@)((pG5!#ta9n{9pFyMN8Y6Ki|pS`S60xmAB
zUW)P+h#_4#Eh6NdCeX9r1olOL2`QT?i7xhq2|M1K#P7dPUbb#jKa6L-*KNI)4Pf}S
z2bnKa+0BO&J^0AKx<@48GQ-1mK4}X<{~+}cMzR{&&rmg(ycM)gTRj<1@f|rT*WoJ&
z-Z&6y^3A((Cn6H3#+;PdWd$r=|LZW(7xhXxTEyML_rkRK;K)4+z4flG#LR(PQ{C=+
z(XNNk&+r@9h~t<(bvqfd=iSsSM9{NX#mth~%BSz2HNIBm-MPdQ!QyjL(uiBZWGfSL
zw`p#lx|oDde~gW@|952V(`q<+!(;o_Mw;{1GPf;a7BA{@6>@kw{DH!k^zxL43N_ay
zmyb_ny>D1#5f=8R!@O%>fOCbO$Fj8YD1Am@z$1$yTE7-LQ|fmlVpi6e=Pi1Z#{N#U
zsg_GbNvg1aX<^Qrw1VZ>XrEYZ!ssV4bqPTvG2Pq%i1<$;yaCB{#mp)jXBkAE4m`TU
zML{4Kp<nyj6N4Q5y6Uh<{3jXxGajt9uu3{joB;MxA-rj6nll=x53{0|jVPf;=3k;<
z8^6bX4(TVO@tuVX7e)gdk8RLC>Ly*BxE$XshVruCOlP0sWehGPId4#`{KSJBT*((@
zCqvKb+w|(?#}MtpWEgnbzts^6P%K*^)4`{BU^0P#*!f`v=CT<9H=8LXMR-Yo(&BGp
zj6H)lnBaAot(NDo%9FpCJ#!A$5WkDdb_K{(?Enz@McO8u<7y5U&8mSDvjLDO=}u@_
zy3&+}$S`qDSYJAXCa$a=@knV!yG@l<NnK<7aC1=_@Mi!Fc?X062;}>5Z0>#0{N8+c
z)J-b%_9T`U>zeza)BR2+^d`ITBog?pdBrkIYBjis%&7;TvglY<Wc*h?-ad$g7)7aF
z$!EP**ShjH8gLwr%Cduufbgqpqk&@ddza)e8nwwNkyGX;<A69i;5DY$GE7e^(7kQl
zr17-Q9l|JD`FIUDU;owgj!AW|B+MCzLb@&KAx1M<+QW<yy^^Ov%P2{2^&7vHaa3S*
z9aIrGF0eF5W5=WN+5$b3-+K)Ze0h5p^?_-bKDV0Zy-gEeeBX1;i|x6k`g?Hm>ZVmf
z_I=_*;)TrgRx*L)jZsN|zBj|-8AlW<J)W&Gi!5C_;fP(Yl8MG8K<gO~q_tWu9#OW3
zII5{_)LJ>BcoPJAR)DN55xryu$Ht;8udgGU=;>*H2C?xlzsQ5PKMQC$Ls^Q8O#w~e
zNi(f2399t|gC6Ah864$j87)VMg)*dE<_N@I1QR1T(@#*Mqd+p?tLs^6jz?83J<MBb
zr^8!Cz_w}LWZPb?*S$qA`D`Lg=@*WVY7NrvqTjf!&{Z?d!P$QrdWgH9mEZPgVAIKb
z2n~6szaMSb9&UvpIvF+d)7r=rImwXeXT{GhQQ+a^GBD<i-~x;bt&spX^g23!jR1A0
zb(nO(<KF;i2;<}zYMVPN7d^(5aL5%-1`<P!#UrgkTCNk+A1X7R^&v79QC_EikqtT*
z@qs7xem%crhI_3`gM{+pjnBJ7)YE6VtAPFPrNG_6OF7vP*A{Je+%0B_;u8taC<*P-
zc=w`=;?W4$ew#E|*4P$f<Y$~)wjJn^p4IMgH#@VEZvbv7B(X!&M!LqHMwpCa5i|U#
z@JVZEt=N4|(utapd4vVXN75yQ=--N5r-{&@;(X&j)H&Z=Lhn8TUh+g%;u%lC=l(%Y
zq7M9Z>~2ffCg}Sd=aAhvv`}pn`Oju@^ac8xpg-wu5nV@h8R^*)m8u|Y8Xm@KMb(1O
zklt?tal{%Yye3)lp%G^5pF_JJ#|bVtdjq!TuD6P!#J_Y6&6h98TwT))@)4X-vLC}O
zTo1cqx30P|n%_2dH!naley<08ls9BH?AF=It-C&*rfKBF)Z12h-`wP85=An98H++$
zBN*z>5JqqMTQN1QE8c8{pp`B+QJyp~eZeHpgF;8sqo3)`8%r@YeKHQLeFpV_tP5{B
z(bEyJ#4=}<u-!fhyC~bP28O^Yq7<QoF1;ld{m0f_aD7R}Qn%4P{S?oh7bGYeR9N1d
zb$GpROZr_GM^2EHt%uIf3pRa_J4r2gH7;bksw1imt@(mhoCXeYe5wH|HV5Ee<%vEM
z8}z6C-#!XwXXm-mWV)y0K~vA|6Y#vT($dn7)SL@Sq8Zg)>ICt;oQm2IT(U;>6H|l)
zXEK#V^)wwBr})pN!uy{{9s>N@u19W_;vs^hi|2&>>EE_oO?BrlWyyyZ=3=uy^pSL$
zMoz7-Z&j9n<4Ipo)b?r}PLFc%aK{>`JvSWUe@+_1eD)C-R6I_oHTv8{;IFiXD|sS6
zZ(jF~XnO?t3&I{9bgvCq<R4=Q&GDk|j|t_?9(AX`0TK~oPGDpRk}7^~%kdlAE*dxv
zRzod}E1QNnR7MHL*Uc&1A*RHVQeE&hqIS`$ugf4+QQ{b3<(aoE`ea80xkriXo=O~C
zAAS$j@%|TL_rRN}VkVJ166w7Ds_F($PG%V=!&g2VT`ms68__ZI-TBmf`yWMvMc5+^
z33K{}#wPgF9mDbpo)w?UR}BWF)qbt^M|)JO9l8~CJDBxnNFpMMwQ?(;QTGr5o-dJL
zjX|bJU=c+87`Nz@%kcd8EoCUAz0g_s)zQPNAGu)XV1~NQ-d6?T+gj-h=FVfUA1dk6
z3%CDLz?tt_+oA4`HaO=<W%yMOJwr?+rYhnj^8;F|<A;Y<k<nKH4Vy;YP%}%i3LPjN
z`8<P)46_n$C3A5C=f8j6E43srwO8#o_c{PX&QAa~!Et^7MGX9&fN8k)FJ#*zG*FTx
z+m1Z=Pjx4B)n6L$sAZDCFZMGrOurKqYH<GuC*a-NbVcx(u4d}9VoNFOnAz|^kYQjI
zrD$eXPzf(5kL?V2dt_~>?q-M8(d3Z3_71~=Xj`j?z47Y#pxf*{ak2O@?rOm{DZ5Z#
zP@xr_TVh44#Gv^ZmBYUvRr0nLTVp?+r?OTGo$)&MP3=|@F@mw|FlA;29pd#mW+zk$
zI|v8vQ@$Vfqq$^Uhs6)DLI4LrTnlLc=Z2mG@U6fklGoxDrXaw9tD+0a%(fVg<y~L6
zA1s&XtjeMtiJm_RB6SN6QPr1!XBpBm!)30EA)G)5+?wB-nLM}}xHT~km-#U?CubTb
zT00BJhEF|cHGAd{<<U&fHYeuhTZO=rtM|j*&_BZ723v&Q&iyRXQh9nFM&_ppwa`E{
zHnBf|o)j8erNaNTe)y3UT$RDf0a@%C<pgfNOMxts$8zc2LOq}0$Xs&JL|af+4PY7O
z@}7B;Yd@_l6h$^KmSK!?q>UC7h8U7ymgiwhehU!QiGf9dF#Pbn5dYTG4b`o&z?>?3
zn$5AmC<%xIh&5L)ULV{>-wwKQkQehX0BY>lg#o+FsA}rrl$nZzZ4PBqs_A-_59Eio
zt6A#6g1b^>sW8ydnm~e)+`yu(r&4;!(QSPWw6{O{Bpq$clAcO7pZGTjR%hU<YXi&|
z5dn8byF0(XxuDF3&j~aVdVk=?Y6YKaso1im?w?kN3O@>`EkW9K*h-j_hq$fF&rqkc
ztfxwzzKRe>=!~z=Z&QJj;LgshF(GS2Jl)YXCb8W6UjhrvXk09*T@w^DI?zVxlvy~o
z6J}zz5Xc_X+$5080*^)}LQE>)z|C3f3$YT4tc6PR1MIwkzO>C$hn3s|6B^T+VZcjz
ztR`K=0;#RQYw|itnI5t#<lOWPIb}>y^<u-Gc7V9)Vwe~L@jk)zml322eLN~Txr}b^
z{&Xj%tZqn{np$nu)ar*E?Flcx<rv{tti6}&V{J^ol0Cz!-LaK(sQz8roh#;q&FDho
zwHk2Wx1GmXU4_eP_Sb46II)^Oj`vYSY3xBTD22=Xe5Kwq97lrm0Q3<0UI+zX=BoOG
z2_S16Spz`Y!!{)HAx(>O^ja|goa4f7ZK48&aT4<^rQ98C-?xKO)~c!>tZT|zY**9S
zYX34^pYORw<)b|iBFnj_^?UmyevkZT!b$XIa~{(wHane`;{#u3A;T=6m@U$~y`AjY
zGKJ!^mwVI7U~Ty1F<D)jKE6ir2m<0^H*9wk`cGD<RhMxWgsM4AF+PNZ5@utUZ5Ie}
zom>`(_gD5<RWZ-~S3fe7n8HD(NcqF10|s|sQ6Va4#qD#q&~7us*j%?Pg?l&q<pu=K
zPX+sJbfWqhteNqH(}C?@chQ}zQb3LS-6B2Kk)(fYGw_&$?`yk4{5~N!OT2XKY1y77
zumhVr%)@C}SG)xb4=(;PiT~vDueaYT8S}%%3~llff*b6Z))&pw8lQ#$RB1<^s68lv
ztNdNz)Eup_j;@B57Tmt&8w~?Ml2Le4xoswWf%&TQA8GmoN4ZIEjX)SVe*FyBk;;G<
z0}h5<=+bL04SG<CW`kpPgX5SLFXSihfpb6?PeVM*gHFpir_c}!5gQaah3`EbPi&vh
zX`jmu^KEihIgAc-qRG1Kr4jyc?LVX_Dx1ossW)vR`@=}HsV}gQ#ij7EGBz@SOO|Lq
zx^iJ$WSMpW7}%j)cSU20CNN4XVTm7R@6>UJ5O+X6$Qq-(|B`-u5a^gAfF}W`x>t{X
zA!*;7;Nni=3JHZ8lsmF|L$u2|OEcPII<!HA5?yzx;7BoR4ZaCg-H1<0Masx@=w>k3
zb5TtQ2vOofT6cSRBHce-=OBMT8{mo9abi?N4!GPTCBv3V_BM1bmb%#wM7$u-Aty=n
z@fBDs8>73Yp~cIr>U_t=dP}67sG*_J3Vp?vr&eo<zjo`Ca0n}~CI~@}Pj5-YHnb>r
zC;nYpGN&DVzyH%#Kl4S|6I4tBOd-@Q4SIhkXaUq=60!idKZ%Ld<mRQqm{yd4IhZ*j
zqiAZNwg$iB$^OUL1Cd3=2n<d4AuR<~V<8TG#PfcvbSL@LuI|g)@0iNya)Tse!rfFE
z_iSPf^<kAcLw>R`?#Bt__@Q!G<<<O=r1p;aMQ<tVq2cn%D!~yE*Jb}GGXOdBn0f2a
zdyoSUzAGime5?6ZOlMn*jO%(srrict6ou#_IX2rkMs}lJb~fma85pK?;P@_o$9ad5
z6;m;laV+2EoW%RHiJTjg(JHuLTOc-tf~U}M@Q+Br=JSA_UjTA2)2EWRlFg?V(ODKK
zB`;4;+8lQFJrX+-)hq~00dJY1im4#AdVm!WU_FfJSqD)IxPf;2!Kh>+N<RM~_ZWOn
ziVzVqKb7f23Q=}5*P=GyiOG5j*@eqMXW+P|msZfzdk!;<lSDJg%^T8IUDkRl!R7o{
zX8_jU?s=Ad+=x7l5%I>MUzi>ELTUQ%Zn{5vS9#VD@V2=`aU|(TICe<5MKvkT_hgXI
z53JS}+Kq+;f(vY7O*EF<-g^o?N)>-r(wk9NB4n}^KdafR);ExQ;<yX8XG8Y|*i85Z
z=PzDWq?-ijc>+Vnk97f&LZ%fkL8&V+=<y&_v-MsGj=y1(+2lDktZg5KrOu`CBm>Y&
z&pM&A6?gmjCxuhum~)SyNLvCLlj)98emzuRmrJMon{O(f;lDXbyX)&HN(O1%0hH1`
zrGF+X_eUpBQWGXL;O2h&LFY*WK_NPZU4L+%n{7!)S?Z0Ipxs1UhO@hpF*77s3%`wT
zwSnpC?Q8$g&uAYeiqQtP-S=68w(2k)X4Iohl3C5LT~_cRbmTv!LNEl5LCh!_&yNjR
zC}&VelQ&SgQfIyx!cJKnwin~pg_Hj=!<P2L6F?t@lM<?;GPZEX8Xcd!3XlKqB=y<)
z8+`H7?bKu0gar{Fi-TFR!x41$MG3KZ#f(8=#&T-66An#b!P`AtJ!TDZtf-lL(6z6&
zzC;_nh!_m$RdX;utG83tWK;Xh$q+W<3!^pqIu>{kH1spbiAw)oY2QLFN+i%fi9CIn
z*Uuag|17PnlF}(A3cw)?4y6^uma6vmC3z^oDLtM%DO>1ac0@)oj_j-TZ!-G3=P|hA
zpVY{-EI3CKz2rCu?U2c3E8OK2`1hvpF*NwU_cqhj@vNF9FvEOfRpyWjYt5YDmkhI!
zr_b2fCUI~-cd_~g+m4~bv%<vZu*7;@6D=7+91HdrY*nT4?aaiW3xfjI(Jr094LCW<
zZ~v^~8?a1;;PTr&>Ky;=sbB+sd!Elzkz`8;0>L;RO+L9zrOGfGP$@#*^tlmPi$*!f
z<)XH+c~!gT*EhX9-9d&rTtEmVRIqmu0+K(npofD08+C5y?IFXdItVnY&#te^=o{fw
zc)FYV3iH^Jno7>5JOMjN4hLZX%<X&;xYJ|sV(*?|rkDtmmScj@*1x{T#>O(dR6q<x
z=*?ju82P)v*2nkGz@V1fwkfE3ql5LeRw7%#fMSBFT)3@NOYtK`ZxU_GX+j)V^qJmY
z9=D#VQH5+A2cg{MWF?*5nA3qqdf!QJ;`q*vlmUwqo_Jg0%ur!W2HC=DI0}VA>Lg|P
zVmSade6UnekPI*3A@B2s-&Ahk=Y^W*`<mzJ&i`!lFMhqbY~Ax8e4H5lzOVE=!$b#)
z6*KZX_`JV9v1jP*ScwS=3i4pE>p&b`A9$|(<ejcRk=smPgC175e#VcE2N2<u^+HJ2
zZ?R2ZgAr(TRr%f{cx?~ndLFw4;DO`CEF*zjvumcm)hWEM_vNAN_i_$Mb&}GYeK%xf
z(l;37x(q!^KRqcCb(ZuqTEspD6nDS#Ur@#B11yom;>%BP#YBgDgPYq>bz1?Jfzqu2
z!0O6{P@HN;jiMfE>;qz&Ri@AjgUhiU%~@$ShHj1}CV)0$tFBLYvtInS!O_0h6VnwC
zk!4(8IPuQveD)WIMsE&WL7uDXHG#WZkj@M3o!m)A()_BO9Zm-2!{rhoTiOf4x<x%;
z#Iq?IguX$Y2-x%4*HePa7ONe^1xtZaSO6;~iAA`el~>g#8QLbhSUk(n5RPIAnH-Q?
zs5ff55n(utZC;r_tx4`0F1ZmoKU!=v-WbQ7a!g<_0;-@?Sx?{%{MkgUv<bis*>GP>
zJa!u6O9_KC*ZpnOH`e3XB>Of`+y$8tS8fdAlVx}Y#~Ti8ac&94(W7aLQqVcQk<k0-
zmgd5wWJ~q)LZ1By`P%F;H2ti7uUTg(l1RDh)&&S1;T($WQOtWMif-q$97Zf2Xg~Li
zaav3{|2ZPX`im?FkAHY6i6T96Lc+G6CSBU!gw(f8qpv!*-z%_7QQVe?J&b5$5oj|^
z&&2v+OxFClo`K8_IOO^e-IL||47~r5EqbhNTc)R*yzHBY!f<d=RM#sd4D=7SG7C1o
z6l=JVS}O^Ht__WnGn*N5g}4pbteV%J6x9UdwZY;5%hlo{*?7XfN;d#BWRx@tx5Wa6
z1tmoyUie@5eXk1+0z;eAo6a$KB<p9;Q<9&f!1M)1IjVgJOiMII0uVCClS??gO=?Fc
zTP8~lNg&H2Vb0=VckxY%v6R?*&4Q|HbLOvxGi$!>>zlVXigA(3?evTUL!&gjMC(l>
z^k66QGm$%0`lu5$c+g@e=__cuykRndtBTC~Q=)$M%{wH!$eQ#fw(k<|Njc0Go37DI
zXn=j4`hVi@*i{@wY*VHF+O+^D;yF1xr**gsD60aAJ@{rofVDLMk{mhtS3pV;e!o;g
z!y4=YcY>r0ZQx?^$bV!#k2y&0(7MRRI)C&P`BB_`q;6P9^w|4{F|U`}UFGk6g?pi~
zh(ER6{NS~Hm_t2#J@?#x_;EuewUJpvKcX{w=~0RDI~kyQZa~-Gb&0;Y%iVbX<Xfy+
zLDMK>YAVa)L{<*Zr7sxn-inwMm8H--V@hd1RD9n)=jI5<V}}KQQLy@Pm`B@tZ28-F
zOI_Vf_)wxpU-w^%!D@?5FuNmyZ={jO_mT#Qq3Ff-1zYP27hX2(x3yq8UtuyUDKQX%
z+TQurSLB>4ayox-C^Zfhq$CV@F%a=_>J6H*O(Aj3B-ggJq~bDv9@YyY*8!JgBV^1w
z<{}pZbvsLl5bh;egx;j!W6do#*$9qE(?+F#UDdx;ChcAGRff;T2R9293y8a*_09wo
zSLU8`HFIZ;%$u$~kixqpS%7z-E_ahvNxk>k>U0tT((M{#fM2M(B72{DfVymMhsw%U
zMLx&D(vhZ{5+a}0q$2sU(#O0Cb6MlQM~xst(_gg&2jgHgxisf=bT0_~`_6>uWj*+;
zkISe|7pTEKocz$&bLGQY)v;q+x2Yw1&vsv5+<u!Y7BR=9nskLK^b0wyf6uOGS|%>A
z20iPB!OcT(kd(@KREnnnyUM+sb(n;Znbcx?AiK1R3fKjOy0qcjzL;2X9B#t}CAl?z
zW=!hpC8F^tN@X>Axqm?#XtGjN-Q{ADi;}>#Tm4<J+a>65Ydqs$^iGDNxFLR<mT4F8
zMhEOZo5tcE5_H4;ra{mdR2rD!6o6fH6?u>QkB%QpL_}w@3xm*5Q%PiBhd$q}=fy~k
zTw@P~i>MMXf0!|11nAqHML=KUg{&up8PPEGfKJc^m^}936{3?*O95tjwdus59avXo
zjaXH^DA+R{Xn0KCj3><lrWJ3z<=2qNIX=^|e7&w#;QIH$jF&sSWQIQuV^{-aYJj4I
zwaBRBG?O+rnBlx_P5_tDD(vWETtV-4uQtJ_7~jMESr>9qeL#%&)(JdG7KuJmnDxWm
zy)9eAIi@%KF}}rV>C}uCiBH^R$!=iU>s%IRDGRt2Rsg)C&VO1lKt^dL3~_3t!LEY`
zngi*#4!#T$=cgROgwuitBFQm+BIhIp9aWgk;u?&y!2KwuV-Xye)ZB;0QgU7ExpLVk
zF4~lu#jH!dK#sfu@?jG$Iw}jy1u4jV8zLE}<9r8PASpW{SlBk%)iPIGCkQ>~{GQfa
z`(=CnsK;ce9`({`(hyw$P9L|rJmWuf-C_YmQ}biH67NDbz{Ax8m~oN1AYf0<qS{Rw
ze*5`QMfi&E=seYTrvBDhxfWJYuhsCu(p&!2=%xaZ^e<I0bM6pVBDKbWe^2+j7jh@b
z+e{l3<urzTqVt+(lca{bSz(?p*&w9_L4~M#)R-BNpe)a{VS;VR(&Y0?jLv>2_onob
zwSM*F_pRpx-{BrrT)<?ZGoR4@RUO)DgFi~S`py}R3;3#Q0>^iW3;w3+k@+1sst0Sp
z0)VvU;|2gF>muPTx)3|0d4{B(3%*fQj!Iuu=`@z!SIw@%nPD5l`Gyy!1^4Ribwnw0
z^h0w)3E^g^7>WMg4eKmOT5rgBFOi8#e7E@B)ab|Z3=KbN8cxoad9SrG8v4q?@qRrw
z_cQCKs>#iZ)U*4p)_+7j9a)>UrWxVle5R5&jMYUg0bXNFiI+4pYM(6BFn!+h|EfXb
z=TB?l2(w65Pr2iPpNkM&<}?BIZFWdo)j>nR%U%FCdIEC5iRHG{##X3w0xJfQpPWtx
z-!DecldVDYr;>zV(~<QMBj{+gbqr%Z^t(?x_|Rb*2feaQkQ;034Csc~aUuMpiEA^i
z6)w#-sNcrQ`KOnwHerl}gbZCp;{7F!2ow#S*sn1#64eDaz8w`T|NMRG^XNCUN77Zh
zl=g(RidA9B9RRm>Do^d}T`_V87Hv9(LK&&_XY`Y%T!@PG<m)ns?+_OdRgQftuw^#0
z*9W8X<vVhK2&^36(H^S(Ir%S2$6rbDBOBw2xEI3aL$cbEyr9sue$vcWN=gHhr^$pR
z1Ze0p-h1AqmVXc35;FN>?eTiWf5u!Rod>`GjgdYwSLI{n*yO60M(EkXr?xPlNLrN&
zS&(byqK3?cENV}F=f=jS{#~s|Vg|gPkuy>MJN;i>QwGZ$n@}-{q_)Uic^+g{%pwiu
zmH;snGgSb@n&k4ZD1PXI5SGsbHl-(HEb&)T@~0^MSTbq@dBWw4y%jpDvI5>xzw$C_
zDB4Y@kdUq8tyNl?0YfOMAOAwZxb3DMMd0~`!5BZq8Erk*+Q*pHAPnj&Ci{v_oDNGC
zelnSyx>bzQkXKY>*2xT!@p{4Mu*G`+(n*=UF>m}}^4<i}BT9#gf(5uvOzU0jU0|w?
zh1US0|NE=YoE#X@6v#I-;8P6(B8=pWx5MPCsQ~kf!V_656yh`CJ||~zYL2L`Tv03L
zTsx-SxYrEJMIw^FfzGvOKJgzl>la8&V!6NZ=XFB~rZo7lC@KFtPuI{Mz71m(!g<<7
zcs0<Nw$5-c?v65$7R(j?6b%~sVMm2AL*10c8M1R`OEyB1YS~cGU9N|xpmWTWiMZV_
zXb5j?aIQRaAVy2waec?E#y<CX4+Hw_iy@g8H7rvTU-7u{*BGcRC?Ljc5bYG)Gd38U
z3k_z%rHX_BJeUx|5>z3Z7KBGBwHYM}+5%~;R8*0p1+#+6nZdRg65<5+t@dpSyKlcp
z>gb8cGQel~NnPhbvq0JUe<6_*`m*_3+U9+a2QkDL8<#7~X`+ZmEMt?|#HnV21uPpd
z5;`xSpsC#^c^}J$OvHv1JTO7^tgaKc^^3wH?EWjfK5h6I8a{M*!de(C^pjC+7xK59
zp!zQytoh_vhMn(yMRU!T&r7PS%gb91OEq_TQ^2t1IzTxZARL5?euMN|?YngERQGpm
z1u34U+T>3aoH}fmC7VhX3ZJO@%icQ!_D>lNter9YyqcO)eJ}lbNLAS(R|R!~YwyFy
z4}&Jbbh#kGrp`8*V*(d!6b-`A+bAwc8gH2)LVfqpv^d7*A75K(g6#U|-FKaFVx><&
zx&(hTPL9YM%bJ`2YvY}s5w%r&;(kD2|Itx+hlHkqe!|^CfrLDd+hYKO0T=+~qvGyB
z(;EQHfeGFd|HIN%g~inc$r;?;-JRfWgS!O{?h@P`1_|!LJ-EAtAcJdghXBC|3GTY{
z@9yKg&3A9#I^ETEs)&#qRk#TtXA8is>-fe@+^A1<G70#_x#Id&0-a&HeE5Vm<I18j
z0BB8w0)>cO@87K~&EABJE-ZYofEkKd(X%-D)om^2X}2vp0uVtnwy%>IKy8M2;;-|3
zQd!>|e}*e1w|_?gS{WhS1Z`k=9~knm2GEc>Zhhuds{q`XkTd}v{L3{UF2EZivtC^;
zjD2X(`9vm%DgVhLB9$+k=GTvEQ5m6JoQGA|{Y5`#{o)O=J_}#jxAlCl>t7nKPnCnL
zsz*Wl?XV4;#GpZhe=pz2yPm!~Xig@gMC2hMnb@97uQ7OeYK&uiURAKQBrz+u+iI}h
zP-kD9oiy-NB^AEWe;Pj9fAg9h?l~6iLIl(E`yPHEfFj9|!D~}eP&}saHKBf-284S)
z9G<G6DNQnCkH8-V^Z_=O08GIH7xATVA3q??k(ehrok4Z0c0BwYO$V(Vr&^XXZuJmO
z*)mk)n^|_qvrgJuHrs!XQ4;g)Z7{d>HbG`%(h52G0l?5~7s+|mGw6WM)Wh4l#I3Yz
zB1u8k(@L;8;P&GHZ*)kkpDQ8=OD$vvAemTn#13#%!6ZV_uWyt%J`4#B?}gWh4CH(s
z<;j4WQ^4k{0~db)I&<q@P%bvPE$SIK{s@5Dd`gyD>Z-U%vDYH*%pVcN>7nyPOeR4M
z6;`}reHO-lj}KYc_1?!3hjK%46qa6NmgW~RmSUo&KF+@?rDTD0(m&dTnJ$iOIZw5}
zlB}hkPD5&jshn8hm;w~pee0^0ozete7UdGLf(&hI%uXCM#jR06YQQlUO_h%J_EnKu
zN-M#O6o5`Z#w7R(8+F;o9ls=y;~N+?A`FQ$z5WJ3deL8QE~CWI(gAwIF*9_To4=O-
zMN|_~rWy?{$EY~@`hA&(Jwk)<pC_6tbW|n^-^cS!^R^XINR~k+vx|PA-sAWa5V+5m
zqhbs48(7dn|H^UVTKIKB7p{(T&7D7rp<;0AMo$nw<GfQqvZy=#j%t5sbKZ@^j?-p*
z4@@|V%;CQiia2o|N60a<<8=x^+(On!mdyviQ~2$t2%`V-O_BUWojq<9Y5U2cemuyV
zM5pZg=l#W>Ns6N?usU$;PFM3xp!hxGh#0eblrB>9{~#1D2-++MY<ti}q9p19)O--=
zuEjEG{n6QBNbvI7<~Ws#5|*U06UmmFqZeCxHCNFh?mq3^>}U6t#q__xK4yswz%7rS
zCHyKu7y4J^Nk9oWkoA}PSO4Q|ml<bA6+pgo-}nA%t?QY5iwA~KC43<;545K;{&W6k
zPBAD!yglp=!8;_nlJ%+t8KOAr=Ju@uP2g(!?azPjnt{md)ZxhUsAFcg351SA7XnjV
zi2G6<D>vpIS|MFj1mUs1NEk0zGB`QWQKvp-7F4)TwvS>EO0v6OPZS?_w0HbXh{XYH
zQxHS&z5Bc7tJ70=xWj6XxCKKM&V+#vEyO<SWO|$;@CeYZ12976Ik{Z|&+o!z3jf@l
z$!PXgu<wrx5kXM<5BK6B#@o$`^1UfgkmkT(2@Sc>g-GeSqH6U!Ea?vh+szq%(_<>V
z7`w+uNpbT(PG5p;i>*X1=^al2*w=!YF(9fim(6&LAR(K}S!vASegagG8w{J7%VA3k
zH~;w96l;k90zDri-$(kSOdl~Es*RU21Y0>2@`Y95tepQUKOD$qbnz>$AEjsm7)L}6
zVS4yNkuW#MHS^p^$ZCu%raSd3Bf|Mt&U~iUkA<Lhr*PHrMSb|9WJC&I2(Lm10OLs$
z(uNF2cL62tuTeR=v7k}kPKo+-s37upu6$pzrc~as1Br=ev0E{og<#Cwcy^4l7wpA<
z9vcB|PQ<I*<H-M&^eP9G-ne(3+tn8*$$DSq^j6lAIwI7d@*O?g2tMW%zrL+4cBGEL
zVAqNLEEL9~Hs$o)a$CBiK5c*{-whSarQ}q7y0dW5h62wP@Y*#qucTY&soQIQJUa3j
zU!`|e^n7W)cSG6T+8rvV?vHr-Xluxj>w|k*2(5;C!oyv>hf)5M^<9H_f=;q_n0fpG
zOHW4Wi&jQnPqn_JAIojA$?CSmZcO$Ut2fPp2W#B_%4K!_?lyqxeZFZD!S<_#_F4vz
zX{4$3#EKWfAYn?pxMtPE29tvF0Hf2o8H!+AgvYEueorqjv<7XJ_CFPR1Rm<6ASXWQ
z!r=~?8HY1WQ%X<#lppE=CnL4w|NHgSKuc8MxPH{Ob@InJB`6FobkF3Q8=9_8`|q*a
zufKSvlKP|>^UYV|Pg<H`N1svwVewnl?mr2bx#0Dp@KriD)O~iF>_gfCr3t?!iPI%w
zV2e6{7-66+Hh>HE;uK{b6TJ`6jt-P79wZmTrCuZ<R#6DN(l0eOQ2Z*HO!axTW~=;Q
zz4)EW>kDpD=amEUdQgzo#*XaQ@f!(f*>>Z1o*E|3E(^Gc5;u}2A)OKI>SZy{YG{|^
zIPFFD+Gos)dN2glQ+}zxOFP`+anAPhM-w1lE-gya)HDBBx4q!o3P)v%g$il1990r_
zQyEnNhf_5Y3_U_9VYLxh4FNE_aM2Zjv(7z?F{*bfJUhX*;ATmaxINwZMM4$@UuN#|
zpDT_Q1lK*`-COOPcQq7})C<CFKcI=z&}zzpUOF}2#q*sB^Y+#%1-W4ujU@Lgmo!G$
zV16Ah>=95|k<}lzjEKx=D)SG3v2z+0sEH{tvFDR~J&3k=<46<WV;6W|KBC_j(fp3?
zsv&2&qMk3@=1sC@06oWtaMcxRtz#PT27z(#X12ESXYfVC7UsTA>5H9Il1<OySjnYx
zVe8)q0Dpp+2BgHWHRis1t>f-j&&z~AJj$|$9=2xz@V(ZSo@%-crErhLFM<hvLgzfr
zexzuv9fx=ST<>}eZEM=ucw5D|jLC4p<E3`BZ<6T>%*5DUwYjA;%A#In){7ik-<=e5
zacN5AbBNjgVP@MPNUbYw838i!bV!Vk#pjp6-Wvwdc`~^(N1pp*DaBdKY>1|OA{Rcu
zF{^5!gP|DQpV7Lu5aCcGtORYmy5EdKL@`>Qnk9|oG-;@f@n1O6xgC|l;9E7URb{ep
zkfdj}+?kLzsC@8+PI;;ow$lR{|AW$cT>iwF<KBC+ZwDi0z*W^zSX_IwBCwU&g9Q`!
z53Tu6c1)hQ!$7roeuztHos#RTG30YBz55(0Ya2TX>B0j)vp5D58F&f5nqPJ58+hqm
zg$6jm<>dy`?fq}P|L6(e;6_>mO)H8HABJ{DpJ9L}ba+;*5OR@jn*28zL@b0-OGCLL
z{vnqOi9G8fDjU0v2$N`P>RZ9$&q6Y1hZqw!0QZ{z048gv%lx{Rm^>tqf&)EP3hWGz
z=bwH;wndz5Vl_`NX2eIf$~|9up??rxIV(U{@sKMVi|wX!g9iRsN*y!)Nmqw}3r%8G
zyg5?5o>u^Tk&;-zNZ~mnc0cq3Fob*ojX|*dRj=kltP%=#Mmc7N@=Qcf;d3D1LdAyZ
zeV`MudrWGZXMawyJa#Nc4I7(byMP|fNn9M&D&MY?hJA&AU-Sp>cb94w8%}RykD%=T
z4yjH}=-i|#eD@6d&piPzD*~VjBoR2~vZF^j7Sik0t`pzq9n<g_$=RBfFLE0uW=&rE
zP|4m@eh*e6=0}MjknTgprSeG8u&d@O%COV?NEz~SNyWHA7xqsQ6V9_J2!Q8KRpdS-
z1sdRp<<tlQ$no0m9LVj$aZY=WIPSBdBtDjp{{2&hw6^reD`aB-nnb)56I>*RgtX`N
z-++WvArVwSu%Yb#kn~F#mrUGu#m9Lr+e6R(=&!eEj!_ohO3fH@JKOU{o_5&LepI%f
zN1=_EJ*8&La7^p>OkH^F5%=%&#;vo(rm)h#nnb90i_B6fYv!51fz!v8sfzkZusC3g
z`9j;iqicZ30DSAI`hcMqn((9)04ej&+>DDJnuO6NBNtl13|rzy)P;KjL>l*xcB+Ul
z^2~W`C5(TRviDWZdAZ(^w>}Bp)*K-Kcf9=nwGbZvwdI(Hi^4s7*OIsQbBt=go`bmC
zV;uzrQzgdWHI=mqIn@}`CSYW(&TAKc@Z+ie#KErT0GsLlO$_~k9jIplZWj`Fhu*KT
z$Vqmx??zK-4e1^_D;kg_1ndHAfmFWG@a0zR3F$~xIjjrarvmF<?O2;ifY%Tvzk=fn
ztQq#s-wHk$Tf3_E8Me5tgEWa#Z2Vy-(>o}l{%Y)OFXaf@=glW3dS^4yQN5XH=#k7+
zH!eZOpoEg7h`mQd|E4<@euj3(Zve41zBT=*I1t-oJiLSnQ%6<lGQXneENd#;<oZ-R
zyQPtY6$THvFR7?WUoqRtQ~(#}eWn8)?^RQ5y6Z=20jvSmu9y6fI6_HSAhlM<yx7;3
z$3^>Gd`4{UUhm-3Eh`2_N0w(fxqQ=NPKzLwFnh`~7~c?56lZiufcHOWeM0Z&ah%Pc
zzg=e{+PvFz?}@QT5&yJ>ZmzCfW~YbYRb3y<_I!(eydBHAqmV@aVu(5c6`(c*-|z~9
zXR)9w&rUzZ{WKaKf2ql=Ha3kbm|0~!mK>IeH}Pfj*bl8)#*a!f9!xv)4%i43K{uK$
zsy!Iu0EgdjOdlb*>Y~_)A4}Pwl>rC47SgW)L8cBfFGaxm-t3`(#2)7Bpx!k7RhEWh
zeYl64sfG=1h2k_>9~-NqKlUL0w75Umq7b7sTBzS3ob#3A?OF@=t2)C@#C<5g4e8)E
z!%rW9lk_B$@`F3NND4W@w^fE;kBGLKND78PW}^%4^D4p$Ke{ZqShO_!$}%0anAvrT
zDV}zf0w>@^WkQ*_LumP%Sqp+G2ew;9=oGscAtfLNj%j1Is*h}lQ5701_*U2saE3uj
zjiIAIV!P%7WhBRmjVsZMgmA;rYybM9r@`DT=ezqcLvcTNr(IW!c|LNVi$v_E%ATsO
z>oK;b)sQm%ig8|~e*<VM;S*Z$JjUAE-Oe`9mi3N%y<PNE5+K;zB%8a^e!N@3$O<w4
z^5cCBd)+r{K^H;=$M)$9_@J_RNd&2AF+IG(njgoMs+D423zpnJDyJ9!gCLJH#Y$qD
z-ou^i*Qu*`HB&ye9;S-VKqLou`Dft=z_An9?O1kVKJ(KPcV+pQ7Lot*&61H0?4`D9
zw{W}Xf!07xh?M5{)1E-SI?PE_Jrs|0o4qfjz(2FLJ7T2Q1a1N$1E0CQ8sMu{8`WI@
z;2s?>HuS1>De~w3+|5`Mohmu(YjaC$KQ#2@4W=mjMQG!B>+zMzU;lUS`Bw2k(?2_8
ztWiklt5&Hr4vH99mUm<9^v<qK6Bt8%aIb(k&UK0LD(i1Jgi!XI96nK`9LdW-o5Glu
zgf_oo7uDuey~quKWffom3#{6E+K0VZ2C2NxAzz4{`>FFbKzVQON*XUkq7jj^&XS5k
zT)9nDV(D<n_8aj}h%le_Kb~;$PDvb2420!K$5iXiln2t>pg~-F`>Dt>Qoe+)Mp_4}
z)z`8R!Qo{xX6>+t**(u-8oaN~m{8$M#jQy=&L6&vL0)+FwngRSZRGb_Xu~-hUmt~>
z3QLhOsG-Mx__hb)zv*^MaM7<{`3i-f*@C%io4JF_?2UMxMG?rK7D)@fT(QMhokYaq
zuESJw7ES2Al1C>T0KcRHf;6!OK^q_GAkcgGaujr-;2|1z0&rbj-F#<Y;1vk>2sVTe
z9Ebq}A|4_%@ulvaa05{BjR3~bxHo)e^U_QZ@nOd=ejBtr;?G**Z86k|<DGB`E6BzO
zf@#qN#ugT-kdXiJa%f~x$2--Psr-3|6F?lwK3Asn%WiMc+GZ1ZiCN~r2+3`so!bww
zde|g-I~?Q(_u_VMlkYFF_D%D?^C;(R3;jMYhy9OX?U6s;oN<yD<~5wA!MI2zCuSM(
zd&mgR=6TY#Z~o84`={&B%-=d)<D{D?ud6|B;ul=0D8}R#jR=~}hr44;^Q&fA_U$D^
z5X{PfKV~cKP96p)TEIKEKhNA-RN?Pwk@NVF@H+`oy2iD;JE-%jhle=Y&CB)^rPyUi
zOm=*x0*^U@N~OH2X)xl_Qq`|DN?K=8gN4-}W};depGUfTUig2GbT}@+;chY+1v%yR
zzVI~2C4~Tgofk&6ZgbeYb#6;=zOm0e2qX?2T~j9@^>VrXX;j4`2;B;FQ#3rMm=K&@
zv<@Q$e&+$*y>+Np7;50FN01GxH@X`j4X;$a++o$WCww|3q?p-e4mi*a<j|cI|7{xd
z$Qa_fBR0vZSI-vt4}1Vg%79K0oI!&EfrIW?2oIK&&#T-kA!gZqkWe2OynmEE%!sI<
zY!<FQrp1e|y6e|S;J#b#d={j}l(*Haf8%Mns16fT98V`A4a%L0>HN9ko>!+WyKBBh
zZjNEF7!48u%sH<e9Dj$lHGgOwN^Hj;zS7R!gz)CP2}GU`Zm2xe0CI~*&|T`C=ZBLP
z;G*!(9%;j=9uibG;v7z_0F2+<Vr+AM*!Tx-<vflXe!exP>-XBgZPEWWU?8`I;{Ykt
zyK!oa5lO1%Ej9$WlQ#|llTX3Jh@jT@K)7TK;o5KjmM7g(H_a#jN|hv;jRnW^z3Z8I
zE--S^YN}30UfzTC#Q7P@IX{#|av^u{Pm5jTgdA5CK0u2P+IhSp1l?NNS{4;XWba-G
zaE!jBT~ek<aJ^!}gl4_Q9iZILhLK_7^_#8Y2=Wx>!C-Q2KIa?aZM8)+!g>>k2z@l;
zJL=$+x{if_3-f)pls?;;CT(%ADaT(uCu$Wj&xrpOwfLhbWII!kunkot{ugujR97dR
zb@w07DPKm>lxzvUiXi}mPM`S)H%XPqZK+!3ysUbx{vv4tnfap?H8DnX9hoYfq98%n
zH?LIF!5kRz@9B~NjZwC_W7KQRKYxALJ^?lh-m|=MgAgL`EyR4UeQZ4cJt`sz##5<<
z-uK-zu_u-p!Vcp!8hwauF@>2-ex-iydKD_iN_Z;@vI5)cK<qeJy09Cw5v^NAu_)-@
z0In!t3~n-CAP7;TUVjnFi(3G(<R!&YzdhZWNIT02TyMr<Z3b-_1*dG-YyQjto%wzK
zeB7?u$GMn4pHyC878Vo^cbjtkYVo9Z*zo-Q+P7Ci<oV}9L7zU5Ux1(1vI6YWUE|?^
zFm3$>k6xn|UQe8-<kt$Ls8~GJ8{q!+vd)~<Oyn2YSX@6&Bb{gF*+PGWfU@%fwNZnr
zH=Eei&H+m#+0#ufm*iVM2Q13xFB$BBM?8wM72uH;0MS6%F!vM;FVmG;LV$y!AI=f|
z^~=_j_T1zd?k$uSbKtdML^=yC2KMTu01{qwRgRGu9O1*eJ(f6wBe{c~8RmuiZ^44k
z$k!p)d~PIltJMteb$%8vir4CiAvAaqDl9wA%2yF0$O9*ZyV8xDrOJJxJ2(3!{!Hfo
zF+ou3PbLKHgpLO%1(=s)s90RZychF*4pr1Up%Zr??tHsw?W(9X4|q9bmHg3W><?!g
z2#90{SLTV|ty*64IReS-aC!(*0PsUiQXmAsRZHe`dWsf(z@Ji`?B=}ZziAsZprGOd
zU+VsdKR?gc7GwBfXCO{%oA$KreJ?h3w#bj&u3Y_a0pWHprcnB$4OQgRp8BExT1%m<
z`g`5+zrMEY={uJx0UF%Gs!R!_ra#JiqRtb2SB#<IHoGLg*5Mxl195aPSvPOl<&Rg-
z3!&!V0tm<k8nJz;C&YUGpVH}A5Mh(=baE!qOO`86KB4Cw+B2tE-()ziG~aAD?g|0(
zN4c+z0iEdNw2R_UBmNY<6Z|_Zs-h00PWYmtOdxV4o-WK)snMeoRhsZmk&C+qe~p?T
zhitU>lkw0${VtQ?%egNH$PPVgT5YT!_A5FK;uXS_t-_FX!;OAFFPVzs-oJg}8oj8{
zY$(K>68C#TT}YA)vbi#T0j&4DNj1X8Ha|_L&bO9vCMAY?dWYBNWx3uqO1YsJ?RrCK
z<~jg>(^$z>Q4+1Ef?_jp;2TmUMG3{mzc90LG#2(pBQ0LqSBcUIbc<#(T6j_Ts;d-<
z16tOm1l}R1b`G|{WCie5)e5jCLWkGtZiKNW^K=o>wYfz^(|b4j^;=r<eJoZ=n21SM
zA<smu_C{)qi(A$>Oeg~SXU(|-0oGE&3g|e-qM*OMP$*tFeOl8}nDu%j)Ldxy`ltm5
zG+zz1o#Qrx``A9fj8GXn1I)x$w_Rg2dyeDjdiont6NOLVd#-6fv7st}h+zmMVaDoE
zxunysf#{dlPKl*Xfp0{Zh}qx4{s+h3D++a~S<lxNPS(XgpjRGLZLgInXQE)@K=XRF
zOuK!X1c7R9fGrwdRKONI6|5<O;0AEaaka$R^pk@m7lO>iC@hZ@T-E~vlSX9*{Er5m
zHXqq#npO+)<u{#)mU6a6$E<2i6~T+JL%0sWzTI|<dN1DK6G13n;jhe=mt1omf9QMJ
zP8AD5zM&m`8QKUDXJqJLKpNgF3eYYc`Y7#O5z(oen$#cIuIQqs78*q`#C#D7CG9MF
zNrQmi`YxC<Be}i!n;Lau#)~kq!t+{XE!of*w=>;Kpv{bSZR-Rf^lAn?e%pYwl3#K|
zgtt={d3N^*FnOC%EiamJWtkcS66#`ZbA@AM&W{A7X`G@6yy#EJ&(BYgBw8h-Yorb5
zZGSX<Tf+?gK3^06IWZW&rKbazeh!ZzJOAXDaust63S65J7D01I>!astI800xZAT4r
zVNuAL=;_@1l68(541CP7-oR6=)|Ml|rBdP}^4HAHx49n@!mmzphqVmvOms+|%j$ts
zjw{67@{?H!Ns?S)d4I95$$xmhs9)lJct{@j!r{13h8q_cYsG^S66VuV>q25}&*tus
zv~NLCG>3maTdmBe$~r21d=bZGlDcuL7jhT!nlQ)@xx3sO^+Bk)nLa!`JX>B~-rXPn
ziX(3r_L)tHs{xk%U;R%4FgQ!^Kg#_9qz+6j13k=v_;4cJ4%HpmHPvwF`RtooXd>o!
z^2q4scakYe*TX9H<-zWfos~y6?ANBSLe616SRV@gzXsnI7AlzsWlz#GkMH+o4?H@P
zv17jtJ%{joJ|-Ri+aMTw3a)hi6q^2XsA{b6V4m$<f>SiDYv2Hn*;Sa>Rq*%OF&>dc
zAfO4Z5+BeZz4Xp3)z8JsnbC)^O6*X#Y#P|oy|Yu>qyeg1xJIa}moaA7t)CbEEqna8
zVq@tP;Lrr0+yI+s%XUCi1bzTmMdOAZQt)!5$Ebfv=dQCFI}GwmJoH;B&zbE8O48t&
zm*9+>O=<%Nsr+1VSTp&Hi?HRS4b)sk2D4}KlM3E`xO`xey;W{{65wmzcC3NB!=?rW
z0wOL?yO;$db3!fziQ};xkUCN7ja1v($Sy^*%Cz8*1db>0m@j4rdba)2Y)o4e<YE1|
zkYInz!O)q#7Upqlxgtk|Q~2WKxn=x_H|-1Pon_4=|A{DD&lG{YHVic01&ld?Nsu+t
zw14YBVK5i)pJq>Z=RpM)D1-{g6dM!ORHOp1om~8RA{H*EbRRp2p%U7sv811UWjjA!
zQ78g#%yNZtt;qt{v*s2VI{*DHGYv{Gc}n*nPREhOi)6LY=el$`Z0x<P4H4hw$b6h?
z_ArB^>wT=b4`7yJuE&J)_LE#4DTxyKs<rbi5^*aaK4Z{32?^-V>xxYdyT+ntGS1Sv
zf{Y-elOnZ#9HCR12|V_{QlINuEoap0lN{%a-FAmFP6QT6goq=+d^}j(7vBkn9<zYt
zSKL;B=-b0yZ3O`DN)zp>#p-S<K&3g$GaAK`LRPMLL`D6Bs;j~wwuj1}MSo&C*g`_w
z`zM~c*$oot71!^WeJaqwlZ)aK7g6-~F7tI||D+F}<0@u7q<dE0@@YooVNmU&HYtH>
z9XAI(V($Lqy_`47rjQu*g*hBD;S1lkM?Ar`)B08q0Xv|X919lyHjA@Pqq(!>mt_uy
zu`YepgQ<BJSMTNy%XwTcO2YtC+bN6#F0uw2016UFz9=ASS(qt#z-!>|2%77uRHn!}
zxi~zUI6|ZZDwK#+O@)iG$WdhFfG{pJGm|rFz8gj%Q%c7Z_plC1tHSrV!Qj5X&#dl4
z08!ZRUwKX$uDxzEAsN$8@)fnsH}b92SQY^vSf0RQXEb0elL<4H3i!jvt<cFn?vmky
z|Lx5fVb2~DX}S2p&WOjlpa_ZttGL*wwOJ*VS&+_1Ms#WAJ~R7pb2mJA!~dpcq$p+v
z!GX^JDonv5cNIK_0U?*M5dhHP<H0^aZIhV{MBBr783mq7HVq_P&B2{;LaHmD!4PRy
zCRck!XG7P44?JC{`U9tvIx+kjUZiH?4@CU^8wfbd_sWz6>hJmSSlN#=+}GQy6E^Q;
zD<QK_eAT8f#_Zb2th<z2rzSX51^y<m!(iFP9cT6u9P>x9$pM!ejO~2e3J7v8Chg|q
zDg*hu^tGqQ4tEqPMFg1M8IcGfmzi-xoxZr_bCUU{5fRu8UiwSYYTlXU=gXe`5~WFz
z!2Bf`s^HJXweBAO{`N<Ne*i#*t>>__;UsKf)h#x`1Yy^Dx^QD!g%~hwJX|ff`L58*
zM5{u$+%(Kf+cAPPe0A{@DI4-sZ0DSqI>53U9t~B2ekb_dzxTt8rp5o1e-O({t<MnF
z2|LQ1zqzNopq+i~ez|3k*t}Nv)La3Lc+@+DBlUenzBDLvW(c_|gAtv%wabOAu#0#Z
z$@`$50)xrEc`c6syunsuge}GgsZF|F7tX8PnveHYhv+e?YbK+~+BTY1?IU}Nid06<
z&Ih0@&T0?+(@+N<r{74HEKC>!ife;+U?~NU?*X#<q>K;~CNtu|9yFx;fQzZ1p-^9f
z?PjI({Fo`xHw3Fn98>x*UK%w10~QNbSu}nMu&6o%SHRY`J}t<dUG2-FQVs4V#l=L0
zX+>u6LEDw#%o&sq$Ui4^dbjJEVhnh!Kn<!6LQ7K8>Hn${p`skkTu~+EP@PwMhoJBY
z7P6z5#bR{K4F8CGIy9}fZv{zZ5X^IZ_o0mAqN-=FA65vRV|E>9pN50^5(_j%CVPZM
zYHmlvCr#GHN%@VuzqQ9K5$}5<3rskb0^j<QH-edJ&O_dxSHgrFzTg&t(f+hKtnBi}
zjFyqX;$f<MPwod>PI;yNPJ`$Ny=@^N?P^gRyw?_CP5f{+tfXkN@P_*U$pjqOqNbwo
z2eB7E->SJXN#?zpysQ%YA80j%^V#XDybTXRt>w#B-%jE~P~LKR&p_!_hA)vN0cM@m
z4pHv;(KC4sejN@F+^yKYZ`d-agBH=D(e-a?S<pd^`TwYp_zcn?ihyJ%z=S-k^wJ&i
z{Z~LoEtL{_ILA3)8>=HcW^SGZta`yxhaZ6v*pLYvhnnh2<~yoOKwDR~Z(wN)llD18
ztYsRk<btZr^>qtkD>08MMTOfN@VsRlFQ0Vd_h)=Vd{Q=4cAiKPco=c^kI^+NpQY*f
ze$0m_P>or#ybrx&irHE9trUDGy1#^<PycaEG%w))JR0;~qKUN{jlFtUY^h}_XsrzR
zZaJ(J-_!6@?;CYoO*PhuH|efVWncPj?ky~g>)*-03f<zq?kJ*p|BZD$NB{j|+@@es
zO36c`l9%|}8>vJZE}@GV#pWb0%Wm6X?|=9RN*HFfwwy8nsTIZG8?XT^>X!f6e=k9h
z0#e$^nACtk(8(@YojCAvNYwwvE}Oc<$5+a>^$t!UkF8l7nHzsFLqoU*tW<p~0+f<`
z#bA7Tmhs9By*EGFR?k6RCamxEX6JrqwQVS~43;swDjcN^8hF?<-Fn_0(|ipxIoQDB
zA6mkG`Hdg&&2booWw=#_P-e4SRA;2vW0#HG5U&D)+J{xH5E_M-82k?b3Lq^^fH8hZ
z{R5oFk6W$0(w{u9QdT`Jw{C3|`YIl}hPQ7Xw`J_nO}De+gs5Q7cS&Zx8N`C&Q&8s0
zAEF{(eetK`64AmL+IEQX6lgzME@{-Q<~8%RKm%KTGHxaTOY;lN6$6wm9Necs?*9G;
zP2VVerTpysx)ygC|9<y{oaIL@wN-+|@Rfw{-`i21w-#FZW>e2QSjX#uMz{S_kb91C
zsF7U`Uxn>RM|G>wpYlql7%r50=h-i&g)cx1*8k8V@KrAM5;(-gxkV=U(&1@X${ysY
zl+&?D6A_`+c`HI7$)~^I)WCVo`ken;dYW+&rhTJK$h8s5sgWs)AuzcC_Sc8`v9mUQ
zAca0l0F@Z7dxYhacn*8A_|LvA_pCQIbe9}tBd!wV>?9AIsQ0OvzOJry-gA5vpPUY!
z>!#%2eG9(1+E%cA6%d%$1)N#ufT6>qBOk&o9tb{s^7CC3rbVuEtq~KJ$zr$W6!Ft{
zQNI7<HSUe&nKysLUjnUIK`$`P8SRKS*_}T}{hq;HiB%N798!PQ>Eve`OnONA9Oyu6
zFhsuE&Ee?D>M$yX&^_?f?6U$Gh8b&CsaM_Dd7I&L-rL=;0cdW;6;f9I9g8Jge3S!q
z??phEH2lCt-&36Jv9;S>$q?Gof`r^`+~`Yt{4$4BC|L#1)+3V5bM>_&<t!5ZuYoGD
zaqh^?9{Ai2>>+ziHD06Vm%OdO;8b@^+Gs5^G{SD`-(fyi<6v1)Vly&L(w!jjU{v3Q
z+QRsSi`~^GOZqZSf65XYay1yoSs)@kO*1@|E&$5*Cjn8=zG}b#Zy7`ze-wLS;#$9W
z^%w|$mM*_~5_HvPhx@Bz&F|%v#G&SD2ZDmq!|_Syb|D$4<xt%|M09IYV5KD3zAvOa
z^Y7FSi7B7wzl1XvQcjTtNHA4tTk&bI_>0fMP%8h~!EYBFZ%k4s6uqARD(0aK;we65
zi)u7^`nRsb8F>`YV+dcu9jQ6uS=Mwx*916L<nuC;IBp~m2mDcm2BV?)qs?aN3sXFd
zd|k_Bb7F6mKzjB!B2&HadE>{ZfrC3MwecUVe1vlmLAnk(8?2zY*2AbL0IaR3V+1Q+
zZ%<GM56`LB!lXQkcs4%2Kf?y2K6t+Ry*`;Oyk!due>iwDEBJq;gvx@mY2a;?A>7Y!
zOgHL@t)A_QBC)CcrT>0Auh@$gPT<t=kV$fk0uD-Zwh#p2VY-m1hLrC<Uf_Yz_@z9@
z#)hQD5^f>b{U$c7c#G=gI%AN?(14*@3>Bt0-v);dF8*GXE@2W^QBK^`k=jXmJ^009
z{rzC&iu!rN_eW)}+H<t}pZT`7eYDrKj~@&7A1V@q2VUT3k=x8XS{O~~BYH2!>b|o0
znF8XXEEyshFg+^R9P$^ov((P{ifTNB-!GwiBIvZ?@~hZ#TQqJI*2j;~8DrlHKelQ$
z|Mh&<ym%LOVJ8=xEY&5Zwn=Vw45PU-v`nx;E!M)To^WpXmXIR&;rGXIB*3XzzHtpZ
zfbVs(LLUGp_zOPyHHkV)8k9kbHh9y=l`ka=u-@M!Q)a#;-mf_~u-iy}?48s~!79wP
zG|Yo1E_eYflbgK$_49w!o3Y;gI`FmpHTZEOy6Hur7=FjDYh7S5xmRqn($IL!s74j&
zW?D$oFKy&}DT2BTW!*$>cdj2k=m)Owt+$W5ES1%9tUav?DSKWHefV|zkjEexcu_Cm
zp4P5z`t6@U{rHypK=!ccZrAPyc>agb2g|g+U6j3-9}`-qRCcb38+>>R5%mM~k|(>y
zfC{zz-GK8R!UalS<Q+&9Oa^8GG+yF$X>K<`V2kMl0Tqyo6eQ*$>c_rrFYv3%Jv2m$
zzF5C~**I>ucrIe%7ka9nOdX%WWGFTfFG$&0nYx^}ziU9b?};S;Y3REzhOpdFoHBR7
z)6sD-*90Z!d$usVu&4~97>6B+jpIdQx!%Udep?bkwt%qEIz&aM(f48E(PZD}_KCa5
z2bk!=y}OpsdQqNyG{N{Hwa~y(8WR${aJcb_9oc@>;q)ClRyoWYk{)cdJP>$W6MQRG
zx9m`GAb^96mGm6boD&4V=(h3Rkp`an5HDUI&CCqVNwotOVT$CrP%fFJjC`6&$_=rY
z9q8ei%?8E|c-W=-T~PL;iE=IV3p8`Hfcw#vr}Eg2Fi7x^GLX>GW@Hj*X4f|P-LvWI
zU#hXI<|*sV>uH;io-te2s9ROSCpue+nx<rYDe00jUXmm~3JtrquqKXCo};HtI+|7n
z&q=c}%CTLMjjmZ*BMZ20=HFqag+p6|NOQ~=V)7no5ZO(PXq)?~q8b7WsRrZ;U6YTB
zSrRocUwGG;EmRUNYV89CTA{E_CtT+URH!8h9Y?-k0A*B%QB+_dqKY9z699m8!TVFY
zCL$a&V`3hc)5z(>vMeCHfQ<Fee<Bs7RTIW>qYGUKmsb|X{^*X%psoDpDYRzZ_kL9+
zz@wQvOdEJT^|CZ*+8R9gg7{BjpA+Jk5=r7fu;ElrqfPVXZ~oPP!wR5b&7lS$t_eb9
zw0!k4W&-LBm>_ZOh_<T-PhyX)ogo-MJ-DL~1}d1Jz6t%hCYMju+HuB#%Mdc4B284&
z)?ipTmHLYBc8Oa+D%-H<Vm+Tm^6R|z_VkD<t0f?0=M}I#08@0&66;AH3f_N)hIZmg
zK=-s@Tm$GOxGc*h_g%&_pjT^%)uQj0>PKS=UDaP{m^f98o8Lp|>rxaF*dY1|^oO{$
z=+P}u`l%p3<<&|}CztSNu*aT4>=9-5dxrVT95x1=Ca)?99;S|^oTT=48JiK3?cm^A
zecWqmL(zNDtWj^6>sfSdJMNdzydrz$ZdmG4hoqAU1mlN?R(w}|D4?trTZWi5-k@y&
zcCm;)z0Wg$2G5}HsdG$Jbif4+*J$2FR}xmDOp?^j)C$c1VFmz2;az86Tb3E{8UYlr
zX_7M?@XO5CNLZfYbB^}xFv7nTk~KcxJSz@SIr*21!VE{mkl^X`LmX^NK9<l%YplXK
z0Bpqr#3OHi^)PJxJ?U=A^D7*^em#jcjXer!)dzl53DGYH)9MJeMNH%hYPv!sYU)9c
zCY^t=Mse_Gkr!1d!uUP6{Ek1d+-N<sbx_8J$O!f+5~`~(!a^IA`}|S6JqN)@ST+Pn
zFo7F1<q6~J;plSs!Q{(wDxQR7jP${DK@PQHFoFKTe)dq&oB-6?B_;(x8Su9W%7h{s
zKNITLOoBZ_;J1b~;<#|kqRe*G*=De0fpq?ACENp2%<DL<auXgT#7{t8ur9c~=?wGQ
z%n-9ZS*Ra?vvj`SP*9h7jkRPMY|dl6CxQf&7f39d4CLp(hVRX%&pGOImh@^*>!dx#
zK`Qyi?ovi|GNCz1V5qiDNX02p{wY1G%879)*xt4D7svF;1kYD2=sW6{@Iquy`d6ya
z|Ko8Qa5y5I7QCa!2zPXTV2sCfDsl?9%CQiBQ2;dok*D`;1i1EMtd{;h`2Z$GEcu1`
zpVm;G5+H=YBo643#i9i!a!FG>sx8v{%Ng3bh#{4e?dcEG-um?q@VL9*-fa2IH~d|J
zLqvdnPm`2lsuk+plEV@*Q>)hrLof85Gd5e-Nzbp$1%19fGe?h|q}<m7o9tQaoI!G%
z56zx#=~XkyVjrGt57L(gl=6FCdJ_vS(0voj4L8@5Il9E$Z+Upug~Zsy29ZvzAmpvM
z^~3+bcyxBybpSbRSTYVy3w)5Rl*&e1h7iR!>&4?1{3|aA0(Qj@z!uDYe;n-xn{-@8
zH6rqU1-(l4i!ny8`5lk)5uUzfn=hf8U<TE)FW=VGApS~E7^!>*=Gm)TU}Q!?yP}lW
zt%s|!O6TK(BF;LZ8PnZ|xMc|mg6wy)>4#AOeb+Ggm_f?!`HR{m-_3>@xlWY}mjZJg
zeNj5+$16246HBSd2=UX;4>EfXNxB$+QFUM_M!B5}9cRl-kH(oVlEVbydryC~BOLbS
z*JEW^hPt~mpe{x}2FRjbVALU$Y*dPg-VU6P8Q?;>Y2etsS0^IKjGPV71rPBjHA{pe
z;{DkOufY-WA)<QPzyn|SC<Kib0Y(AjxMnplvNTH0U`kV#?x~_CR^!E0>D9k9*@i}U
z2z=%WOUB(4?tEJ3e?5ybEoQado|@w4`h_<WQn2^nazG`K`{|nUuDxY>``WucBHEZF
z5aT?LKS5^7PJ|0x0Y4ON%fgW8qKP_QhMYPt*ET-eA13Oqj5+gP7ws<$B7S8kVVxP7
z+OZOLrskUO`qD(}xk9-W{RW@`{FP&c49rg>fsbPYpeE^;(IOLgvz8udq^_<|D}(qd
zo`!i1$Rh(t__&eZ4Zx(HqVNgI4~lV)yI%7uE{WnzaV7m>U;k#3ePQ=U>7{c=F@-@n
z4qzUQ(wx2<H%PU(pB2b5sn{Y`UG0VFn4am((7EJ#wiXwD`2MjK0C#JYboeK5#Psf2
z7C@90a&xz>An>S|gOe7;`etBMsbQN>%HDV7Iix;xpoCZB^{rB=oHN{*2wnC1EAB_-
zpw)?|O<y-s$!yh(5kF3EG@8}}E+rrODQ2V9qBzF#ufQm5@5naK-({?GggfhJxtCU;
zRyZPy+;2O2$3@hcqB>aUI*sToNE`6O5io8T5)zz2-AQuofFX6&+9hUYb#dsd5DO2I
zMEG=Ol5x)c0Mks#Lp7)<_pcB+Pc#hs7%uBnvafg#Zgl#$u$E{?t&B>zhTW$p5qqWY
zjvU{(b#S$*FLK~cA<?=jm9JmowZ7na6iTXGOb<>L2IreRd|7A>aQdXO#MK;<mD}<<
zw-@q|Yu#ToLWh~%u;hD)owqy#>P3;p?Eg(I#q>bIO)I;p*=@;GvHbCxwH~or3IukC
zt*v00r0M^ebR}VYS(^1!RZAJ7jkRJM8=rNizUpT#mnk8D42(jqFX)joeHvrc1a!6s
zuych7Q4m?ex^7}%G4cLFDuGO`MI$4Jp^N;i1Z0S!brP){!N(}EHb5CKDTw=bG>fv4
z00)dNOo(J3D!7N7zOc<I0ZyM<+qh~JD^5wq+cc$mb`~xew2-tA%VNCj@obb%T#0dC
z6)%ko-%=4%&fthTcz&Z;mTHpA*vUtQ_P)qyQqnS$m(<7C_>*n)TAd-dXX4&2@sD0x
z@G$-t8BeJtSF|nfS{Mw%k?<_-Z+uoxh<h6Q93Na>1;7x}!0(5+xg;pP7GjBuT~E5N
z&d=EvaAWuA&>JA+!dHAMxtQd32M%FD5QiBfvgM^uBy8`GIXixy2=QUJ-0(BIoHZk~
zeHb^Upkf?J{N>P>3c}a4Bkk;vVedy5H_7&VdV~a~Z^Z{kmf*Yw*m_cw$5Ik<4Z({{
zAo_X;nMz9aKF9WVm5q%J$x;rXIN~H=s@L2gXJTa)=Pgp*m&UNHC!hZ8vnDlMqe}a4
zw9!VgSzMB(ZLTU)?^NKdr^5GR*6{M?{F%QFwO7PW5kHA~`Qaa`Y@ZJ>i8%|fI7c&9
zrvt2dsXvAKkEWB@)Nhx>lf-k1L`oE-Bih2S-!}4C2!#^l*WKahszyw4_8w~7a9P{-
zGngc?v8;2P&;NKsn|qm7MAFyuwQel$c^UVB@3Sgc;Olpn01YTSis42<B(tU&s+~WX
z)9K2Ijaz4j_(#H2AAGRz(!K8GeCTH(-;TxvGfaC?*wF;-zyof}$?kM_E689s*VcYR
z`_<8HoCIdINE?mfwYk4*e}9NDrv&6EUoYX5=lpm>kf|{c*|^dkn36H-$X`t#zJM?L
zxT~9$wX+O?+CvuJJ-6f$(=OSoy!$2o&CGhYL3?PycDYg}d{Xe_L8D9%v?Q`As!n&s
zH(LAs%=Gnu1!;8GtY%{GCTqmlZI;WU!uT7=DAhG!PQ5Q%SAXTo&4FN4d2+eypzr4+
z0y<sT*28!5uJaiT%OEQpm}TVs<65Le9KHpEk(&pT=34sSel{@H2`r%rZTMRrYeX%)
z0eBy&sG0y+cFz|18I`Tau$;0m-N;lF=@u3#uj&8EhQ>_dL<z5z6Gvo5pjoK2+Qu|^
z1__(i4wDQt2mfvbNOz6-D%5gg4;MK$T8T^Nl#*4saJvUDMy@kY))-AyrV1_CE_`!M
zk**!jwu?)w``#4-3G$b&njQGiAO116$(FEnu3N<*dqc!(1M>MyD>@l4+dXD(4=2<I
zoGGCuzgG7cK8*3(FepF2Br^gX?+zLA-`kiKW`#p*%POMy=%-BCrRdFcd#QOS3iDk?
zY<c!_b*LlW(9^P(4Q)wT3DXWH2|C>mlqPZ{YZpk66keAxy3+3;r~u*BKl;?+Y*4Xz
z&;6DKFq$Ge&4#9Efj)Rx=<jS1q*8I#D>Qqzao0KuX`AVuUl%!NM;CHU8RWN`t5Q-(
z6hfmd-~7Gr#mQ0D92BjmL>Rjq8decXnx{Rps>i5CIii$K&1-t`3&or|z5`W4`HD2%
z1e%m<C|n#OyIM4??9uhI2CY&=tk9<u%pkt4uh?yY6NJ`35B)Y&vfopP<Y`d=7Ecu<
zgvA~33|Ri<b=Vi4OPOUTOiTARh1){FdCADO@2H@kEHF5TAOxr|0f9$lGpWkJHzWf+
z$gs>UiUzPgj(XPef<_BJ_u1TRiN{cq(0_d!?wl=6EN~tmab0=hVpHBg_!NDJ!>--`
zL7<KITh)aPPCU+%J7(o{$VKgA*tvmIO`xf!d7lgIiC*iEwNL2nP6s!`oLd8eQ0BI%
zW=35KB8ea*c6|`yC5*Bdld@9NG@K;YFf88XhZbmmsQG(=9c;6K2I@$1R``9zE5Q@+
zTMTB<+f!R9iX9N6gS)M|0^BA;68z>q^3la8RRiLWmOP?Q-SDCqaBy<w!Eb%nfvd8P
zZ?G7=?$KyHhAibRzA?t2BzDLc1~KBp|NhfO26e=?v!kbLzUOOJUG0afq=x8IY?;5p
zzhBD?FCAx|F63QxC7!G8lJwLfm_P0S;h_IE-BeVyA@C2`$<!ZR12Mjq?8KRg930A=
z-vXcChW$J-?cn6!rvridq5Q=WTOKoFIXp;7{!niW-KtPT6pSu_St)Z5x&|x|7?FXU
zq}l7nCt9n3OH!|v&+CN4cfU+{;t~>R@Z(6><X&dg8QH3h2L2%%bI6`n?oK4NEjZ!H
zhaCQ}E}vq$KGnvYQnYpg(RY9TF)(Ok(>{t>Z$mt+p~UI5H6@7T1wG{N>0&7vqI7k<
z8-e$c=i5=HRw)D)Zso(-EYD!@`*+(9Bir=Vv|pxP_zVt<Ze8dW`jFsSx(_D|Vsu+i
zV_s)>B?!;|a7m=c)~r1jwqSyU;Hfqi21<rZ(V<B(B)^v}P|cDncb$~4j1h#QU=rsu
zy@o)$J5G0EUYj&hk}!plozkm297hiGa7(TuNQ|NS>(7IC4_z@Bg3jdr$#aG9(dUKi
zfJ=2Go#VeiV&;_GU!G^&j%~)?u0@@YLjlKZ+O-vpR%2Sn2<;8IALUX<K@D|qPRn<-
z1BPSUNR)2^%UsJQw=G#Z>X)qHSQnSjFIy5E$6;J%TPUh1Lg-CpBZmXv-&<OJbjj$L
zw>e>Ax9$UwwhZjy`}FA@IW(lDNYWls<YOLa#bfvrCyxNB0KattXM5hcYrg=fo>|nz
zAFX&BpbAzLKS~C+jJQ?-vzrGDT<g(z9_re1Jvsw1q<@SX{waWT-jPX?Xg!Y?tZ4T~
ztCcV{W8Ch&<U5L%l}AV2f!B%aPu>?Tz91QehzYV$LK#`IV>f3Gq(j?HToQ66clDyb
zq5d0D?X|@&e#Lgz9T5VH+{CAt<zjLGd$>!rKGcYm8R0~?NE>Fmsof=F<A)pQvh{Rl
zX{MlJ`}LyAcatOZ$<dLnVy*bqHmdLVvM+lf4K|<+(nwgqg#J@-%A37W(V?%y2yAcL
z<e}7Cg$W3|s0`H`{_4<*;99f?usT<|R!Iaxn>83<%uU%hqEk7ud?Qa)r3m`YtJ{=<
zVOwcl6l@eLc=+0wxj+;@6+?GpxlSY&b%yoMse>$@wQn~i?QJw3f&8cTD!ARaMFrO_
zQ&W2dEy8rznr6B6$rUt?G(e^AWRZt6v!#w3Qn!v0hX~!@uDrVo+}k3{XC9mqTQY+5
z*faH_@$>v7GNIdgkLgzoIER}0ub06d_JxV;;`c$?EzOW=!t4)}rnC7WJO1sq48Ao~
z&#%oZA%`AElbb_(IbBDECDyU9b%Jl>-Z@AsbDNZVHcaGR8X8|qc8s~AxZhTY@DS9|
z<`Kfb4rgmmBBw`UI(?pf{kzt}_0188CI#|+f8OpBJ!S;JVFXf9-6m&f0SGxv_E$^-
z5)l6r5a_D|$M~VMiyO*-%`{lo+djN@KNB$Fn4(eck{zj;aAkW@zBr$HV~`>H#jIM7
zMi&m|uY{j$YN%ag*eGuI^=zKRhk}>S`jdkA)vJ0G4#c3juzp(Q*6It8s|{tAU8P@i
zHP&P(&_!zDZJll@u~LjX&H?3*<e@`I2aM$LqC@dJHp=pCP>~Ys4Li=Mb|HE@U)NjI
zJHPw!0n}rJy8LGVrg(}t8uYP1B`<o%6Kf8#W4Pt&k2mphlbchAZ=P1VQq~og;8g3F
z#O8i(&*2XGo$tFL{z)@BRCWKPUw(t7K-2yBGk6_~h#=pTP4$uum{dN8dc@$q6Ua6K
z(_SS8{(n1l3m{|?Jfwt`1f0G}#ckFsgv+Xq*>;;$%1Y_^TCxhsBk$3GIZ&=tcf4uO
zQTthEgv8W)eQ*m0+sDV*FM`U==)M)WhPwY%0y_aFCHu70?Wp1Jkw{RD3?d43&F@OH
zqM4pU<%_L8>298WZ$O8jpt<y^fGm#!GtjkndBj<g%dKToRIX%JQ-Er{{aF0U<kWO)
z=wFWUcX3~?d^BG~P*!i&3+{D;p$!dF1>lpv;&RgivpC>7P%rZAenqG(3=_u7>6<g=
z1nKW5GkG3r95ZhsuSxiegoNaxhV!n|`UhopfV<*sQEjA`5Q6UxRQjZ4$pUr5-4jIu
zLKjb&gJAjR{0Pf<B<`$PKbtC}Y)+~iSwpwdNXaGLlUXL8h;6+n-dh7ANxzZzAzY2H
z?P$kP2%hp&5zY_AnD6vdf{pqgnj=4=M-H#c*wBve(>BXYey+n0NuDEMCnuuP`&3-Y
z6;`ghMeV5bP}=;(j7Z|JfxV0-%gms1l*ga%6>-dP`rpG10!SFC7ap`4L>y52?n|dd
zGj3w-zgI=x^*-Y?59fl}{j?_VpI|<N*TcNotv7X<lH3+i>-}&fg=mK{a=gH$w4^2F
za>bp<6@uh{6=Jud>aT|2!*aIbf=_6;HCv0S?{M-)j1sOREUq(p6Dl!m3H}6V+#LW$
zv_XlKkLiIS&@kB(Xg-}}BJeXA*qj^Tt*?6(&1=j9K^rsM=^J_^#8oj${vnob3=A^*
z0$1XMMffjj$p;{^FKpKua5iiB(htFF266S*g%(T7%QVOHJlTM;s?a7QGeDxB`}F>m
zmvcM}tcSV%9x$x+k8f{9CPeXJVPx5rKj==T1LZ#%!Nb(&h&*Tb2HFM^R9;J#M8VyC
ze@mZ5#Vx)j^}JlrN-pYvRePIo7~Ed7$VifZ+3$4RI^AARgNP4q7u`~hkJ^5DtN`eS
zqCG)ZEPg?}YIjC%T9+w13k&1ksyhAoHzK7?zMmw0lPMGl5pP{ymtD|z;j-Yt-u>j9
zg2#q{ORYSXo9w^uxWi%ukA@TC@kS6%qU#SDQN%26tP1^!OnZ1-#ELB<dpVkuuyv!f
z1#cgKpI{cH)00VuhWVR}0iXZS1$ysj{>738NX}6!b?qZkE{a-xjKXPZ;G8#uAB<cM
zy&cV3L&s>MHlqDGBV{%F_g2<~^}+TFVQwu_uPh++gJD|RPh>s`g6fD~C!$ZP$ve-%
zD-za6Ey-#KMgiWOS`s|SKmk;`g1)R5^^iNS*zn%kfp2H$qdc#YyViEveLu;*yA|v{
zi%_pp<%P&;9}4;g(nNU}6AN%X)0^3>aq~y#^`5c$n9O?)Ba4MYxtk*@KrOtrNiI~=
z3^kqGyEad?f%+TM>5}hkiBJWL!SO^#z)hq}szuMvL>6@gZq-^LiD60AhO72N`8M3{
z7PBn&lPZAo5w4qbHj5jd^&hf9qa?2Ox%RY>qAf7TH}uCK^;{M|y`|xj$vR^k$G-a)
zHzk-_%4IT<KM5PAvSV8_PyYikLC(JCTGo7)E|s}_VqjdS$9gltej)AmzeE>bx{~QD
zrF$79B4ZBZBi{W-Wxqd0%uLr~N9(}ww`4X75Yk;#hRmlX=az<|fNB%h&xWR!S0F(I
zi?iZ)rpEv!Dy@Jhr^2I#Edb4R;9)HU0I>kb1q&)&J@n8+@%bQqMJs?luarTx8*Wt&
zqqayAaMbjOr#}i2HI%!459z!2L^kDO*@0}MO#5tC46=I+=T80!-eE|CKU>A9*?j@a
z5vhlJ*8DAgDP!Xf{;a60;3~l%TS1`U6SZ0u5y9RT^{FGQE&rWd8NO~6dXS;xm>~Fa
zd+o*bx!?6qlpVj)#X-V^F8<o&6!=!)wP!9y=D(eT`$R`S3gu#^7}8gm{!A~(N{)Wz
zf-D52t@=ox`#;iT40D^(n%M-r4^AK_>Z~zWPvsR$O%-$8bI(0t=qt<ibmc|s0qbxZ
zV7XvHKz#tjLGZ4-?utp|pT%<L0>R!9Mv4kZ#JqT^nBWLhK>%_zDHL(A{w_p36vQ>=
zS0i+Ahe$%37}~aqvl}PDiD=yl@cZ`4)Rt_I<JCD2)Z4S+LSPghE1Het2IYBYvSuPp
zUxkWtw%*`=sw5zTZOlzx2w-YWX|>F9`4HFgN&5u-#&`g=PpjJCn7?5Tkmq{Vf38bU
z4+$fZNP%xWwV1*M-vFHal{56hFTO&%*YZ{hCyiMnV`@~t6y<L#-sZQFtuN7EhKNE>
zvi0~D*F`~UYXK_OTu{b{S{y6C5Z5p<SG>haGTttDd0wo}8in@!`SYwd1d0>jlTSX$
z(`7(63G^g@Ua){Z0P<y+g!s8wE-rYf5X#VRWJN!iU{5IG<52UnBS?h-C*)z&(t;}w
z32E?k1~SAMk=xZ59MY#YFwsP>ZV8~)f=7jd{%-Ke70_YG^%V#W&AjKl3=i4nN2`?8
zwk-p!?}BXu5YwjY9>n4#7)KzW%I-7$!&qK$V}KoCzZToI?bq;iuYacE>??cGbbd}3
zCa1uCg9P`H{eFq=7d82OJD+I(N6Oj$Bl(EmR=y-F-IHPFxZsbUb5N{Mh{h=fAv2VG
zMEU$GHMkXmrPX>$CBT|Sq19@Dw6oa0n707R8;MT4ySp2I3-h0j0p27KMlTEaeeZi;
z5b5f%#~$O07cYtxfU*T3rddW$lVG(3T<Z#q%pVac6wNK52H_9^Jw<C?HbuIp<OyOH
z8S*1nPa*nL2`{^}hQReYIl(&t(a~=nl#ft~R+f$o;q8B!D!I)06Y(n(vQ$>6!XM%f
zGUkG-0dPK;bW+Zexu;vzdUAR4dq$`oX}cX!&S?X4`=0b2b@C%_yP#Zd-Gok^&3BWD
zZ)pANPhnUxPQsXWze05R-UZ$H%~PbnRUpoOU;L?)@iXZ*AzM^Z^ghw<RpXb-eA2V@
z+|XXcmEah?KGzSJdy8iYEUq_`UyJd$LuRGfwdC3(f=)gLC_G5`QCz5lP_TUb@yFwq
zHK+Z8po;22g|)E&2*?o&`BZa@ycS1{t5>gv?d9&p>Wsxz@XN$kryl7p)tw`&NRUGv
zREC(%`zCEp$oL804U%a?f^CO!O2IN9U44C^@#}CysEm3RF#eq~Hq~Bw42%skMGZR{
z4#1xt%gXOmYgw|#z$`>~p%LoP`bs?vgfTNtcEYVGFW8uBT6V#2Z!JKfCa+J*f(f>#
zt`BIZX+{4xu7w+3|CIgILt<W+Pb?>=z!mVu^OxzF54||-_osUmG*qud@7u@`{kF2B
z-&PLo8%_Sf#>`OqS1>_^S*KJXy{+kq%oYIkai>@kGAxVmTqO5Wr(I1biq@y1Z04wc
z<)QecsRbZa8OuNL2(%VH0B{_TpL_1P=x}lGz4w;)!v`-GJLgpjjnBymmZa00o-din
z1~TDF0HK`Xp-mQ#ih{2B^FLlVDEp#$zcm1%%N0=Z1gEOFXnth=NT~H$2&*E*Pw=(^
zXJQiEnPL16%)WoLDW9EJ0nwG0@=|8XTm%ahL<_Ez{Fdn(+;9RZIYtdKXZc;3GJG<e
z>sNsp)(x{g45;sxs`cmeh`Ux$aRgtRiWUQHrxTps>iQQDCM8a4=YMIJ7MG^g<>BQ|
zoue=P?5V;0zgq>iAaNBoqj_IU{zs#;U*7K*FaLZQ6GcnwGUEa%!lr_=@}u)w@EPVw
z@h@n-HEYgOS4F{Jo$F9(GH2nrp27)c?P%TdEs&+fS9UJHB(Z0-P0EW9hAn_VYXEC;
z3P1)^f-8u5<<UnUjn7_xUCx+NbOt<j>=-R*H`K{)37-;ym?>*8g*lTW7)(IK9HVEK
z7ONqq;OZ$F7IQ(chQYHXUf6zZt3gbvgq9}uf6HJt159W*R{kCA4B2_q?~DqHv+9IG
z1d9Q{iU*^>?z`fRDn3RaaCw4hffWMqxgo&N_C%1G@S;?y*O+Lz-+EjlsS37&O$beW
zme}9;`d6QvXvp5(pmRUBJ2?erkp2Fz{Niak^~u+=GLvt?oX@jUU%JgNGXHt<^xudf
zebX(Os<0Hu+J2SeKzQSd@4a{!@oQisq!O&B5JH2)y39O&L5a=t=4oz}@SHpEypw$v
z*`{76SPKC_K+aa`;0_3<7J%psIBWsXVs~-D=SfVGK$0Y9=H!M3g^VeI3=J=3j4}U`
z4p4$S=6iDa3xg+1_=9EvlYk+L4B>@of?EMgu58gbn!${yqCLJ<W5YxX1lYeX0j3hg
zGdZh3&;k-#<E*F)0a{u@7zf42if{owd%^+=v%XDUJ~itn*gUInxakY5tG)7F_xjJD
z;=>U&*Q*=!#;;A+2g^SH(;xaOz4pun!oXLu^(AKi&FJaBRsJ^f`TpqTpV>^cU?ot&
zE>Np|r}Q;lf#BB`3KY;_gCn6;=0b}(W3;{4vog-?@XiI{ISQhTicgw=D5(PPfB*Z}
zLTWtlPJjY3nBq+#gZtWRuhC+$2r}sE1SrzjRlX7h03;CAEJ<-@9X&F@@Wl`_ZZ)ZK
zM9H6>M57D<Ou0&g3yo%5FupJK(0HFFcqbsuo@heFJnxXE$7GDC2irgClkFz~<Jt!x
zlT$=OywM-E9auj$T<adc`+~>nCyT54ynN@D(BrsRTF%|?K3y#1CaiyLOpHu7{I`8_
zNtZsmpyPk&_H+uo_|jGS(nn9x@>=w3i+?hvF_wRh-G3zC;wM_aIsnE~0a=`6m|QP3
zvJrw|J#^aA%)djh;O99|D_EqAm<?!tD`0v&y>#s&Dx0yohaY}8-cl=n?|%2Y=>s45
zz!;l2P_P!)0Skx)K(+x8<TQXLyKv>g1-d_eZ8a57fIUWHT20f*R|`l9SI%(n{fZ18
z$wrHH!tg$M!US&%WDSxz?rRFDHY*pD%(7<F=7R6cqgt$CX3fh!<22<vK@sb$ud{gJ
z3bBNbn`daX;4xvx*Q@I$?9KEITo)<5N$a0elIRpTFte+_vr8NIF6ijj-(Et1T;?P3
z{L{&nbU{zNqBmPeqQl=TMfcl?xA-Mb|8xq5DxNYq{}2ma_-&wrpXrP93-QVKfj3Om
zD$w9M2o;LAf&qil$(Ju*F0D9q>Qv047Jzkub+7=Gxt3D^GU$Vp6}JF(cX#DPK>T}S
z=hY|;R4pcq7RaNRQS%x4P^5tX`F#nhZJ@JPFlEXaS@xt_?v76<Mx3UFH?G-o^gSzv
zPH>AL%}e(9t$XW4S>v>GW@(smLifyUA2`-uIA;I>vuqNS->735XI7(o1!z^<Et{Tl
z$xx3GF50q#Bg~MP^k7eIZ~lcvVEg@8Hq<@1DsI;LPje~}>%wACIDUPbHt!n*f;$iY
znv?zhmwx38z4ojqyLDSAn*3wT{*83LKRWp($3J~;?!bh(?DO_Iht2xHIV@z5As0%Y
zwtVi=OtQ2bn;DndvOKr=jo+DGy!66J`6fO9!jJB|?>;#K$VZMGSv$m7ms0>iq5*1{
z#lhSn`z{&`DE=*W7n}%2%;icMTkK=M83>G*DH+n^UowqVrd$bZCMm;zOKMtC!b_@G
zGA;qG_DfSI!QPk6Gr>CtRnX%?DeA3|Eq>|zhWe;tNx@s6Pl<0JZ%?<vVf$O#X#Ep9
zmFM<Wlwy|#%k?gZ_Fd^Kw`(OJhYwJavC*?>Tk+t=`W*AGkp<t5^`FX%!EJUn==Iyc
zDKK5;bCS+JbDruHr7;jMf8FQbg8hE6l5FS^zo^t;i7owVfemI=CqAID+~(<**`b+A
zl2Nc_hfl#h*ZhG@J+GX5A#MSH1%L(%K)el5z6-CKA_3k+;7q~fq6Hmfd3@nQ*@_AO
z>iG}9l+#ilDS8YWktB4!dxU6Ils)Q5&j77mI$BXqPS1VfWwrmquaO)?C{A;s9&J-0
zuR2P>T<2$Oa3eGC4#TXBoxo>FnI)!0=sib)6mbjd!tDz1)@G>F*J0ja9eptCfT5mD
zl&f8Uh#Ct7<R$x#jdkuOum3DSrT74Y!SwQsUjL;XI;<f3{m=f?i*)78n~~wm5X?fl
zI(ajj`;TlKjhFctXTNyWudtIq=6)52hxPhweb*P(k{?uW4y|(4=Er$O3j#w^M(A%@
z53W8YQ{HD@`%Gaxci(+C%dvF%{@ioVL3I%hJYim&QvecO>A?pdT;d=Ls1+bZ1LO-9
zId~r>BQfwq(j(GXd1G}sVzOr$`q?4Y-G#EB{!Q@qK_(82ObnM4w`sd`%K1s4LL|%M
zG|xQB@U^V#!IjYct;%y!l=e|yz;)y6fak1zYXuyv#|Q{beONA*@6+A<^{>%ECCZ&>
zx{EUsF8J^X0kYpOn!g11QCytm{d_&&zd4ul{WZjIIt4IGct6HK%@YP@JjNe+{FQmv
z;&qD=p8k^i?QwXd&iuLLWn}(x3Sig*Xi`dKK;c!{3Gl$f+C&1x3xP!9df<TvMEcVA
zQQG3=@?zCaS^`$h<~CqNO9VVYMFLImoTyJsO%JHmJX{t-KzodRRrwR#I?zBOOL($W
z*4CtLiKAuO5MHM~#vEPx)MH*_OxvfgYoz#_6qCCTMl{BRsj1IyPen5c9GTQMtkT@r
zF_Tq<xfLqX&zrpdnWB~(o!^GPYtPQ;>X#1B@j$uE=fz(>6ThdBzWI!QX8s#Fs82Hc
zAI;wX^LU+d()@9}5bE<j<>FE&VaibfA#`XbScBZWH9l8Oz0|a`6*vKJn4_<moIDey
zQ>@Tu&z?1f=gWY2iUfEQKqCR_p2}m7Jw_)_o(wYR!{C<zzIN@}a={`1<jGP#%Tz4*
zOW3oWHH^EyBlDEYTl_w*iEytEmYAl8?pdhqEgjff^RaE2;GG6FX_nbh1trg8;|JF%
zp=H{^rA@zW<UA~~;xaeH5!1Dwif`Ph-+bqB919K~{jsZdZDD^a6E?Y4$hn5qbK4qn
z&Aw^tpHs-^kYn0k{0!61x%FOq*DmhR(;s}1PJi}w2)vWx-Af-+EdQe=xQ}@I&$IV`
z2>Ix1nKEU%jYvpobIgx7U<lyXlDJb!7Ltumi!G-WT*;*Cuzx5km^bNNZM(wma$BPS
z@}*0c!aeug6O~Z`RZ`bw|6gETd;t8QmO;OKxnAJCT<)H9WJDDZ!uwLvd+yZZ&7YyK
z=X3qwv9fY5CZhh?<+O0v71}z#P(wDy0%lC`&I0cJ&wOOY5^XO1)Hj~YSR#lsj#`iv
z1(>YHge5mO6b6Wc&$h;~ZFcqdFFF+a_qn|MeaUV6v}kh{zk$#t+C2|!6pBzZ=AbRE
zZP#>~?Pjlk5gsJ+-F#<*-uU<~EiSL;0=#ne&3Kv5m9tl=Y(I&w!(7hyZ^r9;OJJYb
zk>cz(k4}EU;s`0@ZpBWGi;*$ZVKbTSc|X7Pea5Q%veI6UCsi2mbhEeiJe4i~mU?`=
zj9F>;@#Dvr=g*&y7Jx?{c|^9U>8Yolig9bQpt3@YJs7YyJ^&~H$Y6W(ikuMGS?<RD
z8P5NB2C4Ma>Rd2K&2({-PRsC1zG_mA$S9M?Hb6OTS!|J;tZ_1C>si4jc*kK0jH2L{
zYDHw{2*tXb6mSc1GZ-So9p>f<nF=9H9R|G2rEyP)AOrRo6=UNKK5BV=d<2Qd<*kG?
z6vhy!?U&{Wrrx>&xKod1?({aV{}2TVagqxXDex2P=MV7epz+6>{4VTJ_GZmT{O0lE
zp3SgXj`>9aVB<*A`h{hL;O4b2C{<>T>2X!m$qVqv;I$SCQ0q#CnqYv4q(m!~BW7Ek
zeMzQ28m(}bw@>ENXfefL0Vr7p3jn?6J@3&VKs0Ru+$7K~0Fkb?wzgPK0-QN>CcNui
z@8UrUD|;>1cHexd<{=|V25KeCo`$1Zk(z%hv8^FleYsel|E^!qycSH!T0S5=yEf(*
zRYS2E$9C5ycqbq;YR>!U%!P9W!i%3O8$ls}-1mAaz~Wh}g|op8O}GR}v=I_L!}s5V
zWtQCODt`7DE^7Q9m)W%4YV_pm^Nq`eHR}<D59H-<^ZFMPxd;IB;d2?C0@ueMKmz-G
zb=dD;Y^T!=dOP1d#_~^^{?=wT`EMReJ^&#__7j-p4HO*@97AQ6G$u^46^3>G5#Q__
z1&eQn4st7R1}d>5JXK_}>&RQ@(DGy~N&0(8e|5X^7=gh8zz;t7AP?Kp<YBS^+++~x
zO3W;B?V=32JRA05%KL5JzKFz6X`!F!@*|^&Ay$U7vms)>EYFM)K@t27fGP=h6ppm8
zfJ+oJZ6Ff|rbwv4EwBmRG2l$hq9z!;KnJ>e2PbfE8cm>{;2~ZbLdgb&D;TN4y=R-h
zCpfP3^$YHMATop&aMKWEvSs?v!q)Oo%PGyzb>TR9lsC`ck@cS<<E6XrH|W~4!*=A;
z2U`e`;69@DpZ@F{2G3OMmp{3|Pxkw_hJF9dVGBUsN&k}Oue6LS{we^@McNjqOW)E*
z9Z*FnRG4(`43lTT&Ct%Pt%4y<D1wS#4?XKZk7J|qwpebT7assoI)(QPN@#p1R%x4O
zx(@iJK=kg<J^;e>#mpkomY%(oV;5JhT#3`WSnOPgaT}2{BO^7Bn2Z*Mi2({juI&Nl
z&yL{bN&;DDupad<ZQ@Eqk;r+ZC$a=T)b>npYhaeW+n2nE6W2uUQvjw6Y|nTgS51Fe
z5GQ2h>=C-0xR#$S5Z12gq*Eoz@jDv_u3Ajp|Ho5)r3=uP+Im8D(9~13L$1<97iXCb
z6EJ1#<50PS_kpPGdk5FQ2m!Ms0E)xhr4viqIoAgJItY-Q{a%RY{3+vD^zs)+zhTc`
zocp%K(JwpurAUACbiTjmz@hUUK-!}GjG4d?<?@tNRM8czFo3^>$(J--%7DkDLZtzw
zLVFqN5BNSnK6aV4@r;5h)1{qDVgV4I6M4@YZ@dwCl*XqKE48MbEC4qFREkm`0I>jw
zw6$C=<KW7D0C(JRM|^kc{8ye%xiW!JDPC7>KxzP=0jYzXaMk02%a<&@p-`4W9CQ8Y
zs!AgT*y5S`(rBPQu~d;8>zm+~f-vjEc%o{QrT!U}V}aediw`n^V`??{Dn0s3fyfZa
zil-sxOTq&R0V<sW-fE7D)RkGYmZOZjP|;eO?~77mhrcMqdM;YELtG3>(AlmvDBQSg
z@v@}XcNkOP9a;Yr%DZ?ZZeXNu{L(HhF4Zy)0>}-1Pye$QVuZgmHhKFZ2ltV^{pj>J
z2mwd(cli_TzeHIKmG%_ISX5)KbPJz({EyFx3{EQH)btsD?Xo~zOKrfpPg$0PcvRwW
zX<dfTJDkb_aey^f^JAJ}3t;E!#iK`$3eVwu7Y)zn^SIy#-o#Hl@kB!Xbb9SzAwmb%
z#Rs4a<l~P&9>1SCb4H{u&K7`}=FHX8=Lv9>kOAWY52akvWcdk|2gs2biA#VoO9L~K
z5LJ7k(^dksCqO+|ZEd(E;_UE<Gotc=$FIW=nepOZew=`bD0H+F#Sb7vmC1x6RHP43
zdtBXxaXV&8P?k^c8DWF|O8wA|%b3pLFJ&tQ;;OSQ;F{u6b*L3X9~svw+i@aSWHTo3
z@cNIuFQfoa8#Iqjf#;suJ}}{eU;flN`tmQl68HOcUgbZ%$VdLf+#j9&#M57#{lYwN
zYt3NjGEOi#i$JhbgF*l~+LdWmi3X@;>pT}Bf(nDr+_chF^bCdS>s;3LWZF^$R~#KS
zUn|KCrGCw?*JOX{tG^JHOyM~Kawm;k1uVA!3W#sobPVuq0KH#X!vR0^&_i+XGq6Nv
z`qy9n$V*E~k($WcpcIKi09v368wo5RwC*1%aSNdm&z!#?tvhFgOT?I3*<o*fhNiCI
z-CF&EE5y<w{RFoPGGPtsP8EtaU6IfdxLz4`8d3WOVUHc&l<$tP0fVZ<8|wB%+%s*g
z3)<A{vkDnXFul3hFj<4I(5FDQe#CKQw5{|*rjNdP@8tT|w6mmH)A@~feekUW+3$bu
zXI`S0K6$pxK@?N6*<becx8}+0FXn$a>o3e-+#1y}uoWOPb43dmyZJ#!rHmrf$iK>I
zsg5lwPy}W46di-#Tm>|wv4-9fHIgiBCy^7dOIaXnsD<g*PyPCZfd?(+A|SbrUKCH^
zMFMgYplnl50>Ik<g2ZwiuwDq1pG7l_gk+TK87bmsiqu5CeXD{@J++~TorX~9qe`%-
zA_ou?vMLg>(fmUuCc#u^m9-*%k`GQ1^7ERR%0lbz&rR@-K)r;9s4(mNTlR)6;j;Kq
z2oDmfObjt%9r^FdJbU<{KaueT#~p$B5!g8~dSAg?LmYz@6v*{Wb#6zjS&1ycCt4N)
z3c=9FVu}s5Tz37AZ48z{lPlLZ=IvPj`F-@H&)4K$eTHwDQ=snmpa1gZ($nOpFpJ0g
zG@_q)`^Tt$!!v1j^zILdB~$@6gP(G;?|a)+l}|SkKxMu})LIF!3cN3(eNU^%pTPlk
z&+R;$Pc0YM8pj|&Pj78)#VCNeZBY^$EC3~=@F-PQ*8$e)6hIjESK=4301W#saggOQ
z2FmDk?b@|CNih*SEOE$yv>2b8ya8CSDJ~G2;M>^-VhF^8J)(Q!t<MW##&SF}fx0Il
z;Ff`EkRe~3!dVE=^Y<wc<^Wx`H;0U;2XNkjAtIARbEU1OX7kL^)DJj@HL>|ch2aLD
z-z5t!QEwZxXjo7*uBjel@pT4Z*zDf<@9)I==h_cFQYTJ<*G|8)PJ!a&_w)z9GVJ#+
z3SK4Vzmd-O4}1Gb>pz-~_(@FvjTGP~-FXn|+!$e#`xQ251$fkBtQm<K#%_eS`iN<?
zW~eSSTD+Fsr;ls}5HF1SHfP~HMVckEAkUpU7jLJ1?6Jq<cC_%QbyCXKMgWk|N)l>O
z23cYO$n}c?5{nt40rJ(wwe%vi;dgQ7>J)(#7~JfZHSQ3F+0s`i%j8E0EBj^%60`-&
zYiP2zn^!K83En2C0e>=c&cEP?g^+~<mnM*9bjF-YRX`{v>x>_{5p7jS#B-X4*lC$G
z@*8B(x&*==)R>G=QGRBPpM4#qxDUc{#(%BO_RHoo&$A$g#Qc`5|Hv=XR)AOn-gs)~
z9dimi^~u-bF~7LqPh2kQNiqAytADF_`A1KGiDo#P7vVK^jbS{g3YRc{1)MhCqkRP%
zjKzvGE^8`KcBs-r6d>4@t5%;}nzcXx?F>1;3CHfHgN?xBl0dWH+`0Pnpk&6P{Z+ui
zljOTt0M_Yb)&{K+$Sq6xm_?ixISMEj0xg$IS}YcEd*IF8D<^sDU4!DmgaRfav}zkl
zcr{&e=%K2F=;B8+q{|<)HJlK!qOt?_#DWm-i<zI90cq_T^HyKYIPw2m1M2mk@MuY>
z^vm(U83=@FPK|t5KQ$9q?`Hi(JXBmOiWvb;0p1oo?GWdn3^@xEzW#s$C>#1$2&q|1
zWbu<7K8H-4tKp(vJQUcj2ZlE5cwrr5KGju;tW4#WuK$pO<qYYT3#Qk8Zkrzbb4Mr~
z3G{Y=?DwDil~?KPGs*p_+~gNEe{uAi$IE>520skxJIjuKDsZ7QzT#NfZiI2n6;&LO
zxkg2Xs1fvX7NA-Bj7vEI%8cU&YoNt{Zty22!S#|SXX$VjA%xP3hJJIMetL8H=1Hou
zMfCE2;DHBN_>P=TbIn)_SSKHVXaSI5Kyo3_Bab{12YRpoEF~br)vH(Iq@2F=a@+z)
z5@e?wBGBA)F#}StRVa!?%5WYV@@qY%ELR`?o&^X9WT<y7H=x#+kq@`3w60V}n3(@9
zgoGE%Jok7b+BhySd@sr>0XhU~1wK~@ZucV;yoGWY>e4#U;>5ll;JEm>22^1q)Uazo
zR`yFhngG`$T0pQJ*o3jM^3P5EPGGsU>pyP=ND!T?8+86{iWDgO{m=g7i;?-OSv}S(
z@%(2;qLwcKee|4vy3A)Dtpa+C5se^a`jO6nb)`pp%3N#DJ9r!Nh~T^^!zV}M6bjRJ
z2<P#iT|EYWf^C;y7fj|KOOgX@i45?{#Z%A75kP_C$B$FK2Oz%HFu>~uYvltV|A?36
zJ@?!bH`ZkEb0nrRm%skik3CZ|1pBE85&Yh#AuuZChgNFr0TKycv6Q(4DYz^!ASPM{
z(BQ)KdkrvN$P>KdpoB0nkt_(rt5Ly{Lzb!g{4LJGd+w(vB%qi!0Hg2ZZJ<MRDY&|j
z>0v*D`6y{BGlY6UQouFo+vsItzizeM&UUd$L)6xVX~?zN=YPxVzw~t$g??ee+s_aC
z^S^y_g@B7MU5O_DtFK)v&t`gMaXjX?5zqEVFMqk%N0@)ui0#q+evd0E#1zI%Iwouw
zH{v%zdLn9YfS?RaUxfisdLVu9WpHQyT>5r?olv7BGfA^{{ZLiXicbo0Y3FCY`qa}9
z1u#Ay_)+vp(~E$3o$UX#UOoWvbJ$}kC?SKNV+54HFP!`B^Li9e%}i)+YSApi9*bO&
zAcPfVkSE~C+&y`Lit!h!h1h1OTbGM5Q%>-X0|tUB+R!owBnfriJxxHdQ0Zpy3^kA~
z7m07lZSgA+yBy&Tr2|Zdc*X8_{)%$tm_SlB#G1J2gW|he6H1J$kiZ}$abX@NC=1rh
zu21j{*XKc4|9YfuHfY_KKewc-U%fd(!0F$X%Y0s<YZrEkxjYIS!+)Ey-yg5>t+)9}
zV4viiIL|_>j-~S<0j;}+Y_?<9j-*C=BsUh8;g61Q;p|v4kCQf73w{nq6Z<E5F~pi+
zoh)I&nWe(Z3%_;lw=RgeKgR%w%BRGm!J@;O9m`r99{?R!iL@j_z@v{oI?&qXa+rX*
zI0g=1#W6!11ee>pZ+`OV#<4#g^N~jDs#lNJeLj`)0@?k&Qfnek6hI?qP+y9^MJ@I*
z`IzU}55>Y_h9hkUxxG??HY157S~Fqu&_NvkSty#H(TEf!*y|<UWe}J#1<+P1mBZiO
z{j8)nEvtA<UPnTrOyta~xHW)z1GV@J?}2G(O@&C0Zc_=<Kt@EyDo`~;?u;QNCPuhU
z{5mWv=hm)&PTP8e(LJhL7k_=1j@-vIKYrtU;;X;>D!uyI7=kt(H_h67-0PQ^{#o-E
zAwVqw(aS%;^~(%x21L-YA~UvJFn~bm!11Go(v^bESeEa391X7fu~a&krCE(XfEaCX
z6A{nMhp{~XhE#+NW-ZzD@?>Xu?HO?Zl;h?S1yF;}NeqA-1ZXXZ@j7h*6d6+jF36yt
zI(5pdq8tl=-rRljRL!K)WS-taWWc1AZ9?@bR|ZKxfawXP!(h@t^Qfk_gh3aKH9K17
zklK~XuOg_&tuXRA!8;8wyV3MYp?W4;waCw7pJ^0a&4QvOkSUGgt4Uw-=QJI$z_z+-
z+9s=Mi>B2l+<A{_T0AuCDYqL%pxQU|3TMLc;@()XJ2{ZQ7{*2iXZ`D`fZ5KRUi-Og
zHxwyQ_WQr^!LLN-FQw1&pi0>>ieJ9MS1$93CjT7LH+uRLC>~~<sZk_~70L7n(u6V$
zrkoZJbj;Pf<B*MO-P*nyTLWlhOt*yUEpxZRMSX5DrV49ieRgo0J9sZ}@!Ew?il@K$
znhE0<rStXIU)SU4EIjE4e&7dW2Y}bf2(VVC0J1a~2VBlwKKI;n;)S`~-cILUWOBqr
zbLPtFPrmEuy?=3}5d2ZfC6JDl32Pd>zA^!X{xwK%5x3NlH3lS3Rok&ra?OS1v(r4-
z<-2}jGQrJ*wBVPt;9VL>FxB}%jmFOzQJ;z0=V%F1E0EQ6I_>EaM29aOE!H@N<#&a4
za}Q6s^Xd9%aNoA2wI;OXtJYeY1ZAeLR`VOTIB1c#X8mh<!i;&aAfNlC9lHO2euVZ5
zSI)jkU;5dXqsc$jmQDV%bQ@vP^pC{rd`0`;h#LPaUFJixB7F7RSo=kw;kSYpyR5Ah
zprkX&aHbJA0nf`^xE5suw6n(MJ<b*4bt|xjrruUqp)QV`ee=vS!h0kLkXV`}{>**%
z-51}9bNY!BC+OYpes_Q^>9v5hx(--C!Yqonr7(X9z@Q-KGIN9`{>88S%TLxEMfD(Z
zh+`m5o1Z&f_;oRaQ6lb%7ba<uua%yAW%bIjX#=f~w0RgIV#c!KMr7EW0|O~h;S~he
zl~$e`D^aNlo`jL|#<rbWqFgm*TMq$lmbe2w#@2-uv@f7gRNLFn1MOo1ZQqTJ*Iv78
z_|~ug5{@^X$)D5W)j$_NyFY(`)1Q4kdiodUA9C1XiR?GqOiaIo^*xr3_|3AjUp(U<
zpnurC;QGog&0e%6bOfa_oYr5faaaG+Jp^2=FJUAPZr7v=#^$4kXKubjn_ssdzI5hk
zIb9Z&Oo==zj9!kR%by%iA9#{%O|P2|;Q9#w3Ucg1247ABT)1$7j~_q2%%K_iw_g0z
z`NeYaQcBq)UXE~tKz*wX%KSa8#lsje)3UNTOxqL?>1jp2@%qLW)CBJcl;O_=UYUl`
zN-(j{?7xo_kidv9`zO>bQR->%0UjZ)_R4N?(h}%^mAmi0&sulr<nD|Gj|&Sl%76~Q
z`j`Fse6_Ad3cT{!C0+T_Y9T=O`@j0}S95Tm<fR_B69(okW`1?{i&y!^^Zn^vV3^M_
zrp`V5)9*m1x&VC*0ZP~p1GM>|q2;fd{PXWpZ7&bDc?1s|I@x*0UNB8~vgE}}KmF{J
zF$TaLcif?TCW2f|FVFQ3fHm_0T(2#FZ0-?>i_e@n6Q9XRfSsKk-re2R8=7LIz$-hK
zK6TgDJ-=r~X;hGh&8{SM7qVAecz?vZF?pG6z4Vss&`QQI($)#yImpc4kV};FTEy8$
zp?yJt_sqLrJmB$XD6H%otwx*!wb;;k-l<#7_Y4doEj7oi*^Nc4<9gS?JHP(Z5nqm)
z`><ma&GZ*f?9k?YbK1PuG7bsu^ZZ9o(fJoI7p9^D#6~vxYfvBA|DR=NznKR2ah8&{
zt*b`HZikqxJz>^a0hsnV7tlUy$`7tWVDu}C=Ii*)t{$Q>dV6K}$|v!K9swNwmg0<l
z|NZyJck6TskixoI0OBzVyc9^R0CFwxFexIH>7~&8`qgv4k+O_QWK4Z+0YBjlbDL~G
z4c=2S4W0DJQV0a*Rod7D?>tmTvSND9M82<euhORWg0Ld}nAS=Ag0N5A9Iv3q41E!M
zH70-qUNz_H766)fd9;o(t-J@fX8i~KFC?eY`R*pY{?xV!7cA%cpMCPHbm8QcoF}b)
z**XxDzc}_K=D!uS{*Acre<U&gS-z8EdHr;OO`G9~4%0r+w9owe1z6+I7!Fw>;pWmi
z5G<PSrB-_F%B!EE#CxLVFSh_<B5??$HHEbj0K~ykK0B8Ju}+Gd1dxgPndd+Ji#gXw
zS?OI*W*^VhDaVy*(1>fQg5ccbYU*xUR%jnLxz=pU>5zdvQl!_d+Bs>73XrstH-2{J
zvL(vt7c%WCgt$Dx{$5pfuXTH^0&m^7?GFu(j^S!>>h;oVf?@Br<Qn$*?AES-2Iot0
z(xWRgdgBw@@%8yHUZpSp^qF{>PYmTdPqF&rQ9cqae>0!$KN@fJ6UV>g<)3fz%MIwc
zIIQ|zd)~oVUeRaQ|2}XhH8lc-Eq7c9+Y6sowV?NWbvaDG^YRI`07Tw$@#4khefQlL
zj{ph}l2oFUiUnZZd;r#M3m`gKeBc8gpy!`|J`Vm1FT4=rF^ez2@#DvNu~;l6TGKOU
zK68F&vGdFzE&l-&_R%t15Giz6FGHFzF8r4AnBaVI&K{`B|L|-acvLSLHCE+v-~63q
zdRq>AXS>TCS}b>mzXdIL5ufb^=*eqt1fyyaTxs6(jdb7foHc@x={uc|XPxzE9kcNY
z4xR1H5XQ<)lT%EM`%!LTK7E}fpbhjy8Nkf75$6m)^$jYpb_fEnQHxtWT)}B$S6>f@
zs8bJ)8ERa(CPJq^3~v!i>uLkaIAWU8A3TzWC;F@F9gW~!Z@u{U2XXzYRxdh@F!h}?
zJM`+me3LG|dWB}o6v<E4FJ+&BX!4J7{WtS9zDJS;VPO99iOR7b6Q*{&%zNGoPuo6^
z_MbWx%lI|xheipV!Bxc@9b4Bx#HG@v&@DD+$pYpG9{Xlp+r9Se&wb^mWut#74uHaA
z<axdYP)?#rSo&Cv90Pp+``=IN1ZyP#Ac)!J#EBCLrv|EV&pr1D^CvwDC~*GGbH8x^
zoe%v1zc2}yg(4LqfO%(#7LYcFDR|V?msP4{c%}%*l>p(QsD#+w4`6T``gHAgW|7(N
z3`ApRcbDGWy-L>>Z_>_UI|%^5gt0JX>d5FyIM7@{vdpqw;PPq~j8q=9ylkdLX3yve
zO{<u&AGs`)<is@?NT@`VxG8h5`ZP+L84QBH0Q0H+&gIGPWy-HkS-L+<oT7=%XTh{1
zjY%C_+tZdBA_FY*+!?PL$c73|)$yRx-rlI1GW;S4WXBb#GXvg)lHXHqvcx>tIWf$D
zEYWV)55vE1kf1YT;=+Ml|8X1#ycuwMV_@PtuP-BGk4d@C;h^Y5#=nt0{^MmnVO~uB
zIz8MO9^jSbJ>rzUWaOkBEv45dGj6`$xg7ni^gz>iVS%ZSLS;PJmaXeCgtjqQKas`t
z7_d9{lf6W$^}<+O*|}7-e{lem&v(7+UCUwHc`4h^+9c8bvlC#0rUx0;P5@9VfH(rm
zNr20jFE59`+4=M5W#HokiJ3;M054wr%BSwX<7?7H2Qo~cYJ$Nm?gn8C(c*$w7*R_p
zNXeNo840^z5Q$(ffqn(E0O~^0QXm=`aB36TE7z?|QJIol3Nn+M>XysMuql@z5yX~-
zISDhT@=jougQ7@P($Yj|GGk8(?8HERO`AC-Fg{=aZ2LIN4bVBV+P(%}35Hqe=+vM|
z<f%|Y$)gjN^sN+7SAQ8#uIUJ7$uP<}l{V?$EWq(Utvd9BlvI=mgwzN1`pS&7jEbp*
z-Bk4|OOjRKL7btIs|;0<+O)l)l!u!eA#-qcEv=x73h+I+Hl~i$??`~_h)_d`gX;@{
zB}&`Hj+F$cmFeKFf7Z6l;&R%EV^ZthU8G}wf`?m!$zLP-Mdm+C=liPw2)GG2r{Qqe
zNHZNvSg<%~BIB|3%cyf12Y2|truTWZL?ft|xvAfc--nUda{of5f#lYy;w}L{FZEI&
zZ!1dzORzKo@=$pk=qu;H{PV-F<*>E9ymIAA3;-lPXL<*K@SvxjdW!Qs05qKfc$-1|
z0ER7qAi)>ToH-Mjzr<s{_uhNuBmm3)j939AB=CRpTmSD*{=@J2Z!ZkEbGPIz3pXH<
z;zD;6K@${8VKtXfpCg~K3rQa+QJED2p75DX6-A3Ky{rUI+l)lCG7x>rl@Vp8B&NX)
z-W;lux);PVpbun&tAt<|Vhui1#|8;GzSE0vk_54=eXWVxaVo?xG0kNrhDHOBa+h2f
zS|z4jh-muVZg14~)p|I#E!44fX^Vv+=abm&(yo~jO45FYk}EZHUZT87Ko%`CL?LKt
z>p)?kj5`dB;(@uxSBal8CMrxv1t0xn0%8peQ!|H5NR?q!Z5;bfj*tR@C17a4?5e0x
z|E-YlLPr}qPmPKllfXO&`^muE%zSy_fZ0_CxP1WEzqUbeZey9=i?E(<>`OSdk<ax<
zp8!fR|NZ)}LI9U`fap^3ri+Jm?)4>(xei(g4p+G18Csrj9ty(_Y=$QWDIz9sp$lH{
zW4A5{EORK_X@5$)a89V9Yq%DWZi!H~v)n%QU;ow*ek$1eW@6Wb_vCv3B66?e2>^jW
zYX<5F_`dhOZ}$B2&x__i8<_sa>C>lYci(;Y=7kFvHU??!$dGPxumT+U+yAp4{69Z%
z=R<!LFttJ^LX9+x3&AilV3a^;3<IUkV#<pMrOVm|EGrZOh>~~Zo7@gaz_F1V2X|?A
z*a}!Ic4>!qhGz?Ks!DAYYk)AV!7dO9Nn~`h8Le_)!DBk|F$3Z(`H@L$9ViXQy|-Ln
z(N0Dr=5a}yS2A(UTQq#K!cr*0fU6&-rq_nx>&uiUnfkM(MO?I^X?7O1G-Ql^zRj~T
zY>ts46YCJ^if5V72gmrhwzZj8;8A9K1lv@Y7t|Bjmg@*<nSZJT9|?K8{#=8Y+I}SJ
z?$#f4z%h=tUy$|yum5^q{Zh`I(_)!ElR%$vN=lexU>_pa_af<$nNY?q!?-xB0jw4>
z;PDTH03&b_wK(PaZ!G}W2iIP}4HROG;8*6~<{j^&&JWr3RoY?+!KLjBKlPtH^?&*s
zLwVcsHz<~3lG%OWfd_VmZRXvRCr=9VU*!8~ypXUKu#Q^*@l#F$h&e_~{lj0Bn{z;*
zcqDL`pm;3s<Qp%1^1(a5_OB}XWKLQMZqWuTq8Y?f+EEBnmPRvzC4@@3n23yKr^w$B
z*^GJFtZqQd3}`lkay%$<2hq3WLXf0c&z7o<GYA+I&DW_Kt4VohyVBg471Ki3h`_v8
zuRM;?M+*Z@%`O^NsPz-hE7!!9PKUzSQ@*5$7Xl1p6I>s<&u~nU2+0)9Lf+p`eL?Nr
zGK<`|m}ge*SD)p$>$op&Ovc2fzGlwJbiB;BH=owjsa%F}!u;riV9QSbppaQQL{M*l
zdf3i~?T1?rB^S1n$O{W;T~xfV{b&hzt*d?p><4O?xn8i>9N6_AN*iVw4`l&uL|oTf
z{&0KAKv{@(<n1_Ubq*iO^e`DAt;~}qOAmFKGU6rTPc-4PYZn<jZXnYan_kC>0lk2E
zhu$X@npygk3Mu6q{9tlpX}5iT?n^)YkL3a&c_9}7$*1_5iNm=#{)rV@t^v;Dx>k1p
zD6FFsV4OY~{9)fEj`$#n#XXr}3t;*B>#r~Gyz@@EC@=;D`fon>k3RLyU-O4PwY7Q2
zcled#iDEMEWh4hC3^py-3a$$|rX@=Ndy5Op)J5irR*%s;DZ6}OyV&A6-~|<C6#F=<
z?FzWC9Yd=G5ShPf`D|%P2&CqJqlAH!T{Wayr>Ub@3dl<40oyk#3?JK+%2huG%@!(g
zR|d{hoE0V4jOzCgthQRFV8f0DaZBV11IfKoe}ND^vL(;5GKDgCW5mzBD#WQ0Hlt}F
z6H)6VQ;T<*DYup#pnd_E0*&FhZKJUm0F52(pYNv*ToK@``PGgywa?uTWjGA=pa#l*
z{SiFJ-?w`GXNYS!w=c}*EyG^1{u_<FGxgqIpfYDF?F&Sma&RlM*45w_yYFTFv*~+-
z(}*7)@3omATzH=#Ggmm=)WJ+#L(qP0PxStOE_wgUpZJxD!a2-MH2Dk9dFY{sSi;h0
z?VqT-01h^+<tcznuvknlgZV#_p@G>ihCLbC28f}7#c4w(ZAiO(?b_L2e0byQzoQh;
zW3Z`UD?z4*cERa0T-;;teAz{n;FcH$BE$Tk`o{aaRM{>(G+oEr>8S3n)pY_3Coe=p
zxFAi`L<VLyPh8qC;FwN@jGD0$5AdPGMTnAobY{x!HD*K2l<Od>Od0E&<$jUwxrbWE
zEU+OUr42xg)3-n=^f)*Dm-eZUqOWK3Ss{F=F%ac8_zCM$pgLaWWg(WNEvtR*5jtK~
zM~5IPqh=ZvJQ@?KC*DMDe#GTod&U?kk-<gNKTpto8}vH^PK8d}f~)g@uYX-XB_4h*
zmb@g?TmR(xY1&9F{@|Fxvx36S9MmyK>xiw>;g@N99Z$x2#CDqIa(SPu54A!oJoq`y
zf)(u{5Oo9wfm4@W{P814j)<p!bST%J$PIuGKm71gECO-?knkW{V_4g}0Ax}{@)D`+
zvBw^Z&ju^NQkXwwi7kIFKJx5O{M3L*7rF&C4A(8-LRIp+Dr#56i^PljM8y_v=!B`S
zEEsMHrCETsxbi%ZAtnJl*n=KX%UZii)=>l2`0^p`kL!P$FD0Jk-ythKId4gq_9^3&
z+?V*9F<mHiQla<xT^{37j>b&a$Y!~-xDQy`pWCildZ{B$3AV5HKYguv128_@#?0I)
znPx+=denAkKAop@`3lSt5>ou_C($q&F2tR?IK`GBLNRGz9YF=5s1jr{42S{mjl}|)
zrxTEh4W!p!ExX_)8~n6?!4wj(pZ+Tf3($Xcva`&H+B#h&0p|t#YM4}cNkp)XrE{<A
zpGMYcZwv^Yx%JODztH(n6LaYGwYIm?C~EtNc+->{hztV`Zv1_HROy-GT5l{?G2y&U
z^?QJRut8q3{be_{g|2qe_W+VDcya3Qef<CNQ`_6yk%y=<IiJrL5_3k5|H)-^asiN7
zqM7$3{0A1+RsfKb0OENmmoN<bGD|VZ$n$&ey?2>IGe+M4*~59}%<o({bNTc?Et!o(
z$c}M#u%sY$PUJO!vVeh@i~-V<-aDdEV%136_}mkAY!`mVGJPMK`t*5bMoX>>+qc;)
zV))%Gc4`SvAiH4Ud#uy<uWA3tXYYG2Z7-jz!#ghF27oPt`*phU;O4!{kNC6-Jb}}?
z3^7c9T{`9K$@7t>sc^|=Y>MgE&x1)9#>W}{g7$-R78J0wO*E1p2h{h4JAYVL?g#e4
z3rT7nLrvdoSD3XmGGD&$q4S)Gx@!f0dMk!6-;B>~y#Dn)`8qo9I&gXC@}~yvKb|BL
z9&+{SRS^JWk6-rxmva2?@y8!8wFxf@Yj*x$VILIcV1k|o7zTbeNMZAV(T71Q6I=1d
zAOLL2$$&v}+Z-lOB5Q2^!Qb<3_x&Hg{rf+G>#xUH4GjUJz&)bsk!l1%8Oj*LL#A<=
zpa{Xfy({YMmILLj>W<|*-q|iC^34fN8MVKALSts6(f*cpuNeE4W7$dH8@C2ed|hK@
zU{^Z$wGuk@_`WoKTQTROY4*zFad$;~R+ZPo5&zDAn(`cDw%Zc%VHd}GQ`KwJwEoB6
zW1+X(`v1t&Kk>i#=yM<b%F&}o#ruC}(D=6p?SE&mF7FQJ|J}j-zYwK!*aE=&XVwzd
z)&d|1maPEsc0hUd$Rm%4Ghj5;!1cgOk>s8|^Z5(sw_pDV;*{Sq(hnnq#@}4hvzL~&
zyo{Ner%A(HES*SzJPxzVsv1ctGkyokLaqRZ`CZJ+g_XJj)>oQa2FVjlLnPqStRu+B
zke}g`OVXw7H`j5mEp>~ZaDJ#6%lf|Kcg$|A9q~Xy!J(E<K&NV*NabxO5Rb6$t||6e
zuzd>deQ;^1gTsD|>o{7xFKrL-hkc=zU<Ow{L3w42$o8-6`AuwbCv^G%{bCq+>{pNH
zrLx%9hQ*t4z!%f{FU`Fz*Z=u9U;oI!{MGqyL1`N~YX8DpPMtaxoy#Sy@E(G8t|P3i
z0H821{~&uYB9Vz{#@!AWodNSvj^)ok_p|?4^5{AVK^HfdPs-0>8!9~`MXb77O5XSs
zCEi+=`Ijp(sjC`+bDg0gw5C^P(-3*4DwT8j>=Q+88=0*RW>|hqMmu1tq@oZ&{Z8PJ
zrwk&iD|N??p&r|ByY*YW`OEx}KND{2((%&vgg}<5;g?W*?~kT2{D;6`0%aH%&TWlD
zuTEo7K8^#nIY9TB9BwEM=J5?i*r;P^XpN(IxOZR5u|2YF@{|<E0vN4Hj-7y6F74!I
z*2$Kf$1fvzAQ)$0^dXQTtR;7_{VDakwNPuu`s?#){ZH%PtiAl-&p-dO|5%*=#Hn1E
zzL@_9Ub0Z{f6esncfWfn0zht`U1n<p>ns4s#S2Su!Vo_I@yHYb;PmO!EH^dDX@DzN
zu0#n=VghV$Z!doN#gCqR?aHhFv=9s@xwXfU7$kd3Y?8!_%cSNpWdV|fk|&|KrRDl8
zt-!KaGUAt-kSKEXa%ZRqT5s~1cU8nEb*l4uE1^_}yUzE*5_4%KC=iYdH)Ek~=p8k}
z7I?=y3XZ7YH#cvgq+nBaa`dbDf>CD}#x9ifItOK(fpHM|<4{Q9+CCxS(vM)Np<$%H
z9ZK}EElA*~^rprzz;bFjHt%b$0AoXthYX$-=gxp}4q#lV2X(v)^U|^z#@4SvBESQf
zN5x&PCo;?!ggOQW52y7{)B4Y2n}1x`e&Zv*@THHO6w9&v3G=5KUY<8MH{<s~=`5iw
zbNehJtr4uX1t8>^ldBj7#55z@0Ad9YiS4et?$T|5g=_<e5b)o9?!WrSOv{Uic!nU6
zA~KPBqay)I#GVlIfB;safDA-IrDm$NP+jCihFFw5jIYe1E8gyfXxIAcqPA^W=}Am>
zaglEWQCYNUk=3$;Z@;U@jzw^NNzd3er4TnqDJ?-DoEz6}vV<AO$oI+DTk}}FDDOjP
z2%QqEQYL%G5V~!vZD<)%EuGD{a!myJZC>rOVVWVBvNdnOPiU*+PflQ(s7$R9eDqg<
z28MdVg29%nj9WFn`i;8n#BMYEN1gP6tWZ(#j_lE}Y5mi*{s;K^=YBeB|AWOiDvtPf
z?%cU@`H!6aUw7|+g|)p6Pyi<&U&s(1ee}`jsX15yf<yz#R)E=H1rP_p<(J?1!g)Cn
z@HKaS-G2b(lrnJv`}tL=+)B%szbDn35^ODj8YvhTtv)eigKIer(GQ_&PJucMi1yTa
z+V<L|UH2s0vTGZ-YA3{|UD_XVL>L#ccCky0;)_b${8;GoKt6w<hLH!`hOzn#d4o;T
z@*9XWV?Rur9M>r0;Onrh$A0M8Yh607z8|%C%b0MUnfj3h>wMKdp@vOB^clMD*M#jM
z^h1m75Ma#UX95Kp948b&n)TXQf4z22>wjARuU>xnpZvm?e&(c@{s&$m=6~TE3sE9P
z^A`a?z5m7HT-w7tE>x`nth*0D9C<wsIDCy!0Og+MVZtOJ&{CWM^EHm~PtJD!#OMCo
ze=1f0lr<3=LI$Q>YcBOL{>GQnBQW~LlTBN8OBzdFR-zcp;?I>+)1D47>?G2{OC6P|
zv`~q8a71TyKSJJk9VJk{9y3nub0s|ZrYW~8z=xKgB`!!!AE8*I1v;(KYlkKy)XBXu
za`l!pfy=le0}34thgObZW$4|!!7`63mS{WMamDs#NrlD%G*j-EQ;2#9j)ij5&y^5D
z!xVb?ruE+`{t&N!Irsl7U-|hD4_~A9FGv5>fm|HQg_nqBSp)z%VlLW0w0n;MJlL?l
zq6@$v0L;W0@XVPr;o`-M;kDOZn-BXp^MToK?C$Pv49_;@NZ>Hx`gi2}{*52_i;vv%
zjemXrwOD;7D_DeP#<vc&ju!E=*^h9)^~US%YK^oHqR<eRYL49R;$2n$9<3%z9EorY
z_C?UTWvzO)`u+9QxnBs*w0>PVyX_fmXOCHoluXxC?utD7!rd$o6iv>w^LlU}((b#j
zdz3S+f0JX~*Z*_po_+s6`qba~$>HnHz%#Z7UQsUok-vu?dT4iewz&WP`xp7>A5%~I
zT|+n&0szN*7(#gBi6?^G5I9WIY_I~%Pn|j?W`FtHl)u5Wb7YY2HimtkBf~sz{)0dB
zU;XIOjbnebAVc}+FTNXt6?x_HjP@=X+Ep|%-baQTZrfWUE^j~F;`SwVl&-f8D+%Ha
z60hsYw^!QhCCa11#>#T`Gj4%USN3^szR)$T{~ODHtKjCfPqhE<`&a*mKRe)TWc~w>
z*cra>4x0b=rAwEh06^(D;9}VOUx=mo$tRy&f`xCbg@Cm`1_%&MGh(6{_G#q2=5mnS
zmiOL!uRM#6fU>WnCj!LT<5ypN<(=h2l->9B>28p2q@+U{h7=?fZUJee1*D_|W+(xr
zMY=&k>23yT0U1iVyN4WN>Nnr#-+1QDyq-PRIp;ciueCnM(1YspQW~@C64H=AuapXa
zpyql}V9BuHup-(u-yRZB_7PcFaDO&E*w#)+PxEF)(IA4w(hts()^p|9;#Ey9*JIz}
z7JRc-u;!3fr-<s-X6rFl4A~-3uTZkFHmrN%V7H`V(#31Mv2K+$4@L&VtI?SClZ=Kh
zo!CMU@ps@0k}bBzd@EL*!cWJ7{;2o_&>99#XqnrAi<7;nDHMK>OGf)#G5Bg1F_tdW
zNX`k$9pCd+L3mO?WZGI=Wny~@T%X*pfF23KXq1EPHMlsC`M-wOs_#-ZYUVzMqH>A7
zT$W_9*V&uk1?e&$M-+anIePnDWWc+94M&7Y)=v?pC)q`P%_uKMj1>+Lb~HzcJWCEM
z3_``vgQYH_&oCTM2bWFiAq3zQ3H>@F7y1jv(SvT9YJ%bP71v`O`l#BY25b@kGdK-7
zutW*+EBNZzai$(SiV-FUBL)vZH?aYkT@~-t;8~Nkj$u5mQzU19yV5PT*h!neUHl_n
za#JqzsG{>1QstLK*P6uw4^kV=C>_E-hWcf)+Zr*iB(ZR5!gjF}E$t0%g}jReLC~wv
zqcD4CN1#`GLHQw4V}sG#Z&P8cU>;Osm%jkcLZPp1A6Nj=;^@!j(L<+V-D-x3KZneP
zN29m}yPwWyWTPeBQC@_6y*u^uBUspSq}V>j1TPI#`o)8}JEk#Rwei2<1BI-_-}TrX
zS?pjp<AotRvtr0)%Fu-c&>dyfT+JsVviH=d!S5;NyXR*!#uP!CPxa=z%@j5|9C4q5
z=*E87i@IxuaN6jO*GtnhiSc|~F7?mw9ZgcZTC5p#s_TumNbq?<F?FtR9$@X3zgq_*
zZxK0VTwJ_27xU10@;*m0`FDodaGRo8K!P_}3uQUmZqjacuqAmz`Ky@O9tV+1R?0z`
z{WGb|%cI60m&gm|Ou*UZiC~zYTouF}u6X}ucxBu9ZJ;%Z^coOJ`(H&ysBolj&3QBG
zC)?~m2MXYSMGN56FEmcbYF8F62R(#gMpl)aW0&9&_7MG`xGeOz<_SF(8;;z6Dp)1R
z_$Qkh9FJ4R{kY>&@||4YRx!gZe$#L*dYqf;gIb+%^yxL=>W`lp1c82r{iE?7|M(pf
z*L1dos!1Fh?H4q@p5{2yW>H+>Z17K|@m!#fw6UT*A5EpvXf_eVkYPJxC3<g%Tbp~}
zqEIGf?R9Ypmt+5*sds)b7EndMlv?FMoZd<?5T|$NMCuHy-*SYZB~!sxem>qUo=I<E
zT{?D-iYtwQBpy$QR*Lt>{dzLscaV>1A>*6?z~$%9;)0PF_Kc*%HoLz_-sdNaWe{GG
zzxM>TF;x`_Y1uQ&E~5SejS&+1ey`y(EGt-NHDpPDgC2Jh$|1$A!gMgH8uW@ivar+p
zDwZ$@EjSi}>O_lakNzY;f9cj}21y_f>cM%ggIjheoCpePalbdm1O{l#J*tpA2Pg+Z
zA*@1i)^BSAJ%#Dx6Zd=tLE8-8nqk<z4=1S29`t$}(iXIPh&Ys03nu5%P<@$s29S>c
zniDIP`7#@E$449c=tGLD{LK@e-TR0$YltqB(=YYnTkW>U2$P=72ij<Gr@+R=cdv~L
zf@IIGJ*NYApQ2J_B<P|Y)<rU5j2z+T2(^udB=wt+c3Oone#LXmn*H~~i`@&)JXUo5
z>J!BYsvhU-DmKc~Zx?lmSmw>mUd+d!ZH_B?#+wTL^AKLhEqGohW)mK0MKO#I{X`$4
z(O`@x#I^+S<p6wo2OV7puincd&L|RWs|VsuDh$skzU~_1Vfmm!FcUaPhX@a21JFB5
z7|ZG1Zbsjh#yMe12-X}4XC~gy!HPho_+9?EdLc_}0CK`8bR#lQj9&BaHx~rpC2&7v
zhEA1);?1#Gd}GuzEn{9I@$1}IymnSPm6vmdE0V-l1FxS1Jwarp4;?&zncn;5S4`2-
zt1;1F``0ne+a@`$W-|jCJaJ(y3jtEj`9QlWa-Zmj0mRXBhgMXDNX~zH&+uNN-J2hl
z+g3L%z*VKNKl8;m6Rt7|K~t02<x+TgmCv=(JdK2u+Jj?!%u2kD)zr_?t5#CC-Ww7`
zv0`$3!1EL4=Dm4eUrQ!o{3-To%JH0e^p*u$&&CGvrvkKHx0{6;qNxr+_cJ_=?$-R4
zZt{-t7Ah_YiWu0&eZ@(;uk%#}{Q#N0h5+g^@xDA|z__9rmW-(y0pX`dgW|u9l+krY
zzyltP2_F^H1-^tQ%+*Y&Gw5z|2m7sR`o)Wx|C!I4%wiE~5M9?5d7bkd88uY=mMrR!
z3eQSEtNaFkMARwHBK)ZBJ${4*w`S{P$&;Jo>obxJ!OUj4`C`%C8Ftx>DIzHk8xw7V
zA=ef>fAQwnV~mYL**s>N1$UM{Z`cl=bTpQw#-S`B&Adn?A+4_M!aC927xK3>Sx=#y
z&xjglG{a#fW8np77rm}V{$|Pn%kzmDN~z&st8GQu_1$G=1^?6mp2q956^R;<0^JF^
z4D))BY8!TWV0eCdI@*h_^}rr4JVnOu*d{%;1z}RN7{t0S`hD+r0>SXL%R^fe2v>wW
zn>v*(R!997sqH-uIay&0aYlUPHW|q^hcNb)KOZy(EKWpZzsO7!HOHWWLO{Z{Fw%|W
z4Zo57OLPObNh&I$kCRJykeLy;mU6<<EY*4u{||C!vjG47*j;?NCBT0Szdw&7W8XfX
zYmTry(cB~1=JOLsJ-bJt!`JfAQIlqI*`(1^12)aGtd0g5H+4&ntySle%6t16LfKvi
z|ND{NGXYhq7P&YFiXG>PA1Z81lDRTcHLS@Wz&I`H+K1n9uVr#FA0LLtSXdlpy-NPc
zg@>JV=gy-a>(EkWHb@r`loovl;<f;1Jrd5MYcYwwT8vcadW^zVEXH$tFJ7#du?3xJ
z3(VfQ>%AL7f)>7N;KsfuRaN^dt>8n$s`FGq;$$~)9HZY+{1p22f!_;%5U@mX4+X+w
zL9QV_XYtrS>@=mVc3)H-x(VB`yyTYYu7f1fXD|qa<gsX|1wie7GYYa6Qm49jyMB7{
zGMCloS+rY1eE_|0S_Kt(D<tcY??2ve9R{mRWO)^$w1*;2o?<kXt%Gg(E6XZU&K(rB
ztNpDN+EmvST+SbqbXjy;l*aI;Bd321$b0)21-x0-Ypq#b@;#j2ffEVr!ru;4qSY<6
z1><ShvmVGzQx22L1o@?dTw&|Q2sh-wZu*BB$m53fc`qUm_<)ApX<c)$YuDK7qXz;z
zgg*p|!^H+)S|uguVaW@=2qF+iX^=)x2s%oEny<%W<?t|=KASuohZIiT6^?66;)Y2c
zwru-4EfpHI$OC#jLFap+0zs}0t(X25d^xa$A4?i(Q^6GRHF`QDMP?bGx-(U7ed*;G
z<-&(Xw`A$n2VhSML%S?#=0Vl4M>V)d<~Q_(JIhgQ`N?F+B0PkMhr+1pvFUw*0#sw9
z*=y#!be%v}F}N$^tIaZtYaJ8+=)vE8g5mu<R}k`i%kW^S8RZ6rvDM#Vj4CiBz~}0~
zmK85N-{WzEp3BY-p?Jk#4)Q^;L`SYg$Zo#r^HG%FSx<x=s}2-5V-bZv;k3Ys>B%d@
zBb=!;q*S_4ox9gip%(xx)LO?Ms1qtJu~F^sj@aCrfx?))$n~dOv6UlV(=>a&m&Uh9
z<tPk1cGLg4`mjnk?b_@ZdOpsSc7;^5eKUf0bTZqS<1=)y*6*IPzx3;d*D^Ns0s0>k
zq~MIv%>;vD{8+WD`-axzqG;B34&k}8CG~Nc+)ttvn&c_N4FB~BW=GNktF>TxVSaIc
zKI`|6SqL6~26LD_+Gs{uqi}ms49)HL8vx`EI%3M#{=Od}_!et-0{vl~!ko;qfBFPu
zhq~;vT~e7WV|~9b=Ix4)g-wk!7b0c5HHu3!Xo9_cB%c+Q;ky+T6i%WZ_f`iMgU5tt
z58Mv~+}vc^gupoFaonCy&q}Wizsh8^W&l1R=a#5VXV$?L6`?7`FnOJ!l0}9c5&EEx
zrj3qo;c)5uYHe5CrJk7v`Qx5}#u1ePb9o-JKT>H8ouS$H^%Dc%V9}BUwFvczmFE3G
zue>{W*MQP1Kc2b$e)Ap0<mNLxo5zUSc)CFRC+pfw%KJaS6}MjF@_i@hhlPTNSa87A
z&ryV+*X7r5{9q?A=8qmavE3dVg<bUVWW0OKECS#P4`n9QjsMpK^sdDGSUQ}7plxCl
z;s5}lxlk)n%vTH)wYeAXwjhJ-vLD7h<;L5+NQp;Z&4TU*>a2iA+9amw_@v-2eeN<r
z?dWa7mF~`ieYUVpHZDC6WF*<5%w4zRpo*%+c^On0LTR8}%nMAtXmt7}?<EOMd^N4V
z_(H%y@8di6$v2|zU8YI6{^QR3hSpY(?^;B(Swq?*!bUpm4*?-R)vlvkMG|b=d1F@H
z^a^0LFE^Y{y^OF5o~eXrkFHiI)bZ>|hyC;IZ@<uTv&mk|`sQRoK3UIVWab=CC{xHG
z_AxSd9#A**2zuC#2V6ygVTEDl)g9`PrV-HP!#9A>eOeeuo?d_*?dzz-0e6ts70uYG
zz1^nzy>-r}was8hv-h#@6myks!YD@|1cO=m3jVs$MTu%xX`E_}Xt0Di?jXR<FNI{s
zD;UjzZb0AEg|1mABb)|r+pE33zMVccyHp9yw35&%`9zbC-(gD&8;nMO@A$srp1jX9
zVM#=#A@6wNT`)1x>)DcLp&hQ$;3@P^V%mk*HbAElmnH|s6uI;GLK%9b?%}=!R3cs>
zulbK((EpgEgW3s&lYO9d=B$J3l-*no%woGx6xjJ1j68OddsS{|jd%aVu67gxAaix=
z%wPCw3p00Oi)DupwThXu|9F8I>^3O$j3pi+1u-cdHP-?tZ`nG0o*wewythDqyqHG|
zWg4MI(bs0Ec}n7I4&ZQn*lx~-+V2%6x}LQN*hmf55&^sL?|96+x(C<F0FAOK9@dwq
zdjIXyPj5~AKC{EtVJ{shyvH5IqXdn^p<qy|vPxzo(Khij_Tu-VG_h0<1*xu=cPf`{
zz{9zrG{7&X81bs;C0DWtr4GATjjynn@D$yV+Ys`c-${t?W2Y`-HuNDx#VDC1OKc)a
zyaz1)ie;Ep^Lc~=3*&QjC0op<`30aFtUs!n<q>A?Tu$^{`{-F_@IOS0jW2VUWoyms
zKAQtw4&Zx0^gNfL3wTqjXlxN@ZsEk_1FhtyI+>6P<La~dXla|<Q-wKK#!sAdG|fhf
z6pTbcUOto%I1f*Qz8HA)<i}j1fM$Wb+bhP8jROyrXgB8BPOSSukKx5SMX$AgcM39c
zwb2MK3-sefUk%zM(-3vohX9!?q4qQxc=v4&A1?&?`%3<qWuY;$|IY88TNKxT(|I&e
ztPK`p_@#N#J<h0Rp`4F;fflwZ0(+22Gl}3U;Qt9ziD=oh8>#9MV*e}I{hk^T)u4H2
z&(gC5Q%%@J@ryr=c&j>(&i3A1F>?}H!8lR=M>t#ywR~ZfgC#UaDh*zTMw1Mf<ip08
ztb+;zj#~rogqI_PkJGHT#kx9??uE!XB`HgO@u5|Gjp#5iX(~wuHjjk+m**3dT^&~m
zMAE?sn_RN%Cy#1h7Tf-{M7)XT;a+tSYr(cLmvW7dRUZEE*zYmi<g`rB|0FQVp1I{1
zYd(GWS)XarH0(L@&S87rmbzCx-f#9XR+i8e86OD?EO<n5l!WFfDxyT@5QTS0-Z^OP
zDV%ZzbK;oOsgvL3j!PHDGJPP#E(+&PQotMAutR-`47XuX^1zRKu39#9pPee0^9?mA
z7KWzm@fCcFL#KrM1rjb^+WKQgf?Kq;W>+h6^t72z^k}M^g&DR$lbAP`@RFB)mDl{U
z>Yl>0R0p1j1r>|PszJK(**2JLDnsXU4wFKz+ZMUX6{+uAAg=WSbJ+1jR6QV)armI@
zPU*f;VUOppHi6|x{$DHC-{#ti`;HSG{(K(MYQh^*Lis*In$+D<pRB-incY}1g*gk!
zxeWn_Sf4X~H&tzr(ivFKS>S$E9kfg5w97C63+ta0>gTkDAjb>Ps=1(`MTl8vkI+J|
z%RUD_mOTp7Ch}5(ily4`wVE&9fkn8cJn@bxIoI>A;Maxd&>+;qflJhLRXnbTTuLHI
z5b}0Gmf(*!&j^M~2us@jtQN&t-_X;;7KVe`FGO^K_O`mfsE{&NtGQoH+mRwrq>B#G
z#%_hKu=3o{kSDuH{g0rNO$JX5y2|%@|CBthuXXfNxDv8gau6Lj=+%X;U#KklapKUm
zsL$}Lo)^`}w7uFQm13@PG4%ZTQtQ#(ksbOWLk$7zL0bGi2%LNNqD{vZU(tuS({*Ix
zHe0LDn>BMZ@T#g@Z?*RKbZc}5Q({|)1-i`_304wtL=V-WYB<<?U%1x@c=-M?>b>;0
zq*HjH#rLA33czTudDzii+f*vM)Lv+s8~(dJXR<YmK2Im?sW0g!*jM}+SYIf`mA>Ao
zU}o9_(=_6UBo*u{S#flGtaR455`5LWbQG!%$v;hFN44%Q;0Tf_v>`poI`ZvOJFfX;
zD%A5+n!<oc#+t~%LJi%qaC{+~vom#bq{moAIUd2V-{mCj*lZWZeg!sQtJGrN-x}+P
z*!wWC_dPDy?!TpA2%-%5{=l;ENx42Y#vlKp*KtS+ozs?Vv<9R4K0P7W1#6t1cy-cc
zKgQ(2Vi0D9D{c=~pN&mL9O>1TqC7~B^le*)=ZxI#v**D;Bj^gv@NpN@iEXQ6`a^t1
zT{*fN3KsD}eNKvBvdsbqA;x>byMN6vWcF3V3ERz98Z5~yV&K&ue#~;jB!*rJS7JJK
z$qblmY%8nY(6`+$rW2ZTckrH$^kHAxbY7%$n9P1p550@2ulw&qv75~zf7Q{aY|q#?
zx?W%A7S+pA4^;6q;H<x({Q?@>s6eS!R~)h(r$p-iC>200p`9OeOf9PGe(oxa9jCsw
zBf2ftYhs4Y;+;E+`{iU;GK4=aAN#9TTw>e!kNgn3vo>RU6i&#(Q%aMx<o@A1)Qm#<
zRrX>AN=WU$Xxh{VF-gy)1vo+bu1ipx;?&jJ?_Vs*aN@)T%z+FQ%K@qIO}4RIGqhtH
zx1sv1-n4|SOKo4;QD>1m7cbcnr|d+qM7<K{XvN;g#qnfS3=sZ6NCwIsST|lJV5EBQ
zATIBTXaVhbp;k6NqT3MKP2yedZ+gfRnYa{s4jf+Ir3Lz5{VU&UW23?Ev2Ra515XwM
zLzYwi_P&V%o$UB1??c;6lb_p4agaSKb_#F2W2v~^h+r{C`_@4H>cX}N(SN>x(bQuN
zNG%E4*fZ9@{(*K^ic2@(|MgqWRcD82IKQsrWP=fFNjcztPeCk4ok;H-#1gHMlG+l7
zY5|1f(_!%vrptH0iRVF~-WQtkOqNh{MU_i+2DC5$+CD+|w;_f=%@P~cfrPJ=P5ty<
zJbG5QEp_$23A4hN9V_PD`1XkGc!>;E{2V)0DfjcwmcsHo41yrPN9%gS$qwkfp}qne
z;?(Bzvp~yo1kfvrflfas1JT-ra0bZk?Av{>qUn+cXxs?8KEF$70u;oK3x3HMBo753
z=!e00`(j!f8904s?f6CEaDb1I4gx#oEc}p?bH7B5D6XMI>nkgjF3-y^Uj%KQrD_mp
zZ-j5i-HwX^505y|Rgp#SS%sr{q~9!$?8%#GbTB7IS%0$p&?o(u@26N{4#M-(ol4!4
zBvxbnv~jD2U%ny8lkeoO?kt$T)mW6~_xjZJrZ(Rjeql;d-pTT1J5c30JDfXVYwp)U
zI-D_|k!C9l!~PqAfefmyln4r!$~-vd9^H~H5Z5RL#yY;N)$WpK)*#^NFzS<5&2UFi
z2=|4D1zLKh_n)WsVGY_nUKVr*M)#xd>X0|H=u<Zwknknsn=+{Q?--(KpF<4xdS7g%
zJF28@PTfAgI%ijdysZg~Gv)9i++&-n@<0{;HJ<%$<!FcB!B?uH^&i`e0tvP#X~&Bc
z&%0T%FvRX18exkHoU{V&MTNDf779jizGuKGC!X6#q1d-PO!+2yNnze=kPqxUaA2<Q
zjWZyVE|BAYC&Kcl?Y(KQa<Iqt>mFV*QbjIKOpEBlho~csaHXCHyZ2v2J5Ws_2~7lh
zQe6tTb9%Mbi_RrJ>R+0eDgmbS701ppb2akO=3N2xKM5}@wW~2%fNY=6Y?aSFSeA@f
zMq3gq^A*v`a^2V0v@e$TR^-+*`>`4(oongJZ$YIe=zl_Dpu<?;4iw#@HmxA2jeG@X
zcodR;o9q?8UkG^=i+&jQ0Tdra`I~j4eKydo&yhlSIYq3L6I29(5=sxdX-&UrsOl)$
zQw&i1QR3npKlCnfvut}qgB_@7p8j`Xg}{jk^;6~+*Z@Pj02ff;fbzzDPm{Rf2X|5n
zm-*2}mH2q}0%~^9l_s|x$Cbrk^ZWABtDXy@E4N~+qO{+02iFguJl0@cUW(Z&<pKW!
z$ILY9k8**M0dZX9G(RyOqBEn8NcR()u=BgJFk27<<Z5$PBj;PwWd7^JRO+0|pM?Ph
zAr6I%rT$hskJL)Q;Lsnf-G%_134GYlPNDM8-{qBadGs5@Bg#Fq4L?-~TJ`_FhpY2>
z;jTKAwEo?3AD+jIu1b*AXf>ktn|}+1m{<LU3RF|!RAu$MU^bP~CXUmGlQZBu7>0?%
zLD!CPO0%GLzj0vJ)0Dpi+;2EhgX07w*4`6BnZky<M-zvEbMAweIJI7I*)^DtBC&I~
zX*m;YxAAsEaJM(Yv#yjB2q?e@8^{p!9Q45!cw&ptW@$>?Wk_M*z(ql|#+GGy&SF0o
zz%{sTGTwQ+ElF6~mh+ppJoQ)6z%eqfg)tPo|G=$x!NmM4A*zxF|LK^+*^JC+7q8mX
zlfw$$d>En=X$_AZ;9zZNdRUWF+-D$&VsSZ$)U8KaMNQDyfm$f498Nu1BQ~>`_OHXd
zhRe-*x4*IUog?H|hb3+uD)Y65okk`kqh0ZQarZvQTht8+R<mR?Ww^tdBq$3Mn6lvw
zEn4}#h9|b5Ive!=5|_>gAfXokRHBJd&(q>C!E7YM(w3L@bC4tIp&E=hDgf^~Twc=b
zQ~v9E+pBEvHPQK36%SjUiBm0-Ao|Nk77^@iMmeQiN*X75HVnTjJOxuC!u?rniF^d|
zFW?~%-BZ_fk_shPj<k8!3Q2fdXYVWXij-z=@@v~hge!Nm*4*c0;sgTS_BfGZDSg81
z!08g!;z>#+9;sUsIJ;$3LZ%Y#3{Vks&k@^Tli<F>n@S27v^{8RYwXPH1$yl>9#nQO
zTxmoxH1s^=|D+#Yl`<&7$RwzqgV;&qFg1&2Etb8tThp8)-Ta=m5UR=hD$P@tseGF$
z7u85l3tuIYEZ?5&JlrfxW_td0gU8*@0u8ssOtKY_zbSz*ZO~~TL^;f65^ADw%s*j^
zsN1#JVCz^if+2W6Rk5!0LVMxd*O@hzx;bcLL?`PGe@+%CVr7l6i1d>S7VjU4a}IL2
z&U@ppph)k5ic>{m?sFn_=Yu}B2sPuiKN0-&=!X&XaPt40y7wF(#A<tJgou!z^nSDn
zf6KfEbBVC>vak9~5?}D9l%?EGeBg99u6w1stFklHl26}N4(S^D)!@JqF6fPd{70}i
z*SjEHy~>HDW*nm-Bd-6K05A2lp{DU<%&}FGJpZFc>5fe_f%ARojLXwse<FxUiwQo4
zjzM3I&wm<I!kM02`-Plvd!C;d-{+X^DR7JT>tNKs-Fs5wh$&rOz>l1*Dxsve?~}md
zL8Tx;j6!=-)I3Ks8@uOK`=Qda-W>Tq*~se|4#ouq$`)=itRM45ub7GacYA3p5u%Z?
zlB%-3%9s&n(1REF>PwmtG8TPgvT;XAxQ`|%QwH9)(ZsqJx2$0Di(*-?SN;^Q@QBcZ
zM5Yl#UqbHLvO(+Db)I{Z@46GYrtk=+IIuJbFo8XMO1E<+eybF6A+TyI9${y6b__ZD
z(-~LdDR8L1cnlMLRo@-@5BRz#c@;^%L8|U+_AZI4+R<i3M>%zks$iHllnfeW@a``(
zC6Sm>BmFor0B7HFgU>-!<jipv|9EaFq?v_@u<P=A<BJ73*#Xwdhy94kf6hon9Fb3&
zJ2DSeYh^|bNHzNi^#Kp>&>aU8NYDyPx4ZYG180}*qO*(rk%6c<Z=JQpQ~s%&8cvr9
zR4Yrbr9p7>d*L_>k$l&(fWq|yUuzYUayPKhI}7yX^~9bT;06MOYXfgS$3v`(b~h`q
zff>6=@R#8-K996o`XBt#Ya%d{ls{{tsLH}-p)r9rI1ofxPVLk8ubF=2MJw*ENp!^T
zA5Sf@x32can=nW*d)Q_a<yY}~2_oEMTB<OtM%zBCa|%Y4(pk>EGpq;rWGooI1-=kL
z!_h`J<cCyrdZ%ZRcX%cC8RsEq=94t6)2CDi-+<BPnPEF`G(W>3+Qfi5(Gl9xTKRv;
zkYBAg$K3DZe1f08N&GkMezItl>O_>-7wLHzR&n`B8~#sR{*bThpc~?=xR;z5@AA>w
zidS|-UYW$kCZ%B_dKjN%L?BloL{3whyM3<XO}W?PV3&+^SY+>W&mMyX^uINs!r<d^
z^dlh%HO_(J<|REbc6c23l6NaJdmNJq7e{p;h;8isFjYDcEBjx@+*_TL!o}yc<=?(M
z+}Sgy`@}i$$l<AnyIxK6+oeeEEqSD<97OSQzP}$y<|uU0>r;Z`#~Fna;rOa^S`ZM#
ztJ2}bNH>AdE9l2Uc-S2p1zpSY0(w?A43L>g5}E(eaeUn=Dn4A_|21%TEbx|^OQM;q
z^Sj0z?~ggB1R=%~onBKD!hJ<!BZg0&WrpjM2&#@A^UT1B4v*#@wti)qjM0>!H|Wuw
znK(W&Fp|A#`t+w~(@5$GJJhZz=jjKlxz`1E{iUiq4$JPbh3<twK>a&U|4W(`f0i0F
zuZURB{bMHRlp}N-$w4=(%zFm>xsJLbTzH`02OZ6neO^80-wNaH-za*B{|Q(>&4dON
zQ-&RCj7vMW-ARCZ?4tM>aifMGRu3gNICY|)ha2X}{l@j<m$#8L#|}!-45q?~b`?!#
zQ>5p8b*Y)Yq0uRjP7$dptc@4J@$eUPAx7xl=Ykr9B4<J8TU|h??Xe6F{m-{`N$QPm
za&7S+qO+bGzEytFFOt|Nt@J`ClqKtNu*k%j9z6P~v}KRe=jM&+FkaXxwK;QyO|CVI
zyittAzq)s$wLpW}+}YRV_>jePso}n{f>7Y1oQMDMr8+tM?)$K*7k>h{h5g+7njL5d
zO8uFf$N}qGaA9U5-8AHy_(iXq-~79^LaFqt-WpL*PXnvFWrfk&NUS%KbmxD)MUF=X
z@bwM{Vr)q4OBx61EaU*rAm3-vJ!yt|+F5vdO_{OKz5qaAoK>O2i>NEXt+3A&?!uYG
zFv1AFMM^wgMrg<e+!=E<IH!nP!Nw&2Z|(j`dq<uEy;fsPsfy+`?X&79^h<go&Ur0t
zE2tEehI17LJEIJ{gFZ-Y+#H~1!baHdn*t=Bh|brBI(?}2aSvg2P2g#v6LSl7;8Arv
z{aP$imK*w}guOyCjLl8-wV9B_uaC3Jp^L>cJ<<k>R<WOQe^@7N4tqZN()cTncdGK3
z<qP2o4%uX28Wnjc+|6+&^610+;b(}er%sNBL?f{;u2icdYqRl|4>;hBai+;#Oi_j*
z0;;!iS<e#_LNL!I#R+b>akh+Dg4kb;R+N$Lr-wh3!1EEHJ|lqF6QFRu=Kw6ZWJm^i
zU2(wRncGKVvaMg8ur%N*mxHvv!+>YDW#n$_cTy*9B6vA=wte0aDM7^hc51I%V?-i4
zh_JUg<#Z}pqMqByWL0Sr#U+I^OR2Wv33%dG+J-9-Pofr~VbFn%Yd8vlX{DT^08+hb
zf9flhq>jZ2M>c=S=(L>19Hxw$oxK0zn{3ImW%=?=WrJT%t460XW8g}i4Tr*QV4S@H
zO=Xp@zem&AI$#3-LFJ4=>C0`xl0^0k)p}3Qndf0YE?pdMs~ZR4cllhh2`xgxa>D~3
zL_gg=!m7X1L}EM$dH<ywxCHmQGz+2&9J;?T?d%LRp&-`E@FoDfEE%!r6}sn8A#k=J
z);J8FhzJ|A0XSi+GNOw#MKCA+`5GyMsPNE|(FgaTc9d_LJAWU<@o|IyfAuKB3Fpj`
z!}}U2NP_*2oKYI#y*HNrP|yE_Ndd!BTY@sUs{+d<a7zX@FXU;}RI@-Z$~Yjw2I|6&
z`aTRZ(pqovhqQT4%79zzJo}-@<venYvFPaGtC?QnQ0vozSIOyG(!Zu};~t1Kn$}MK
ztvM(Z5P!_lNVZF+Q9Kvd2b@0?DiecZ<x?>`NXRUtXF1;gV~2Q6^rp@WL1(X+PBFyk
zzqVwRk0SUE`krN~x@Yj(SDn9V9IiW)1A=}JOz^5P5rU9n!9GXn@%)(62y`$4xLL)Z
z?{>pbqlljcCt_g2a+_w<mRI(3^j;rgj{xi{h+)D<%!|g<n7`N;ySWblFF{{$PT?((
zuXY#)sSb;0RrX_?oeoo+OEjEQ=X=+aiqM1=9NZG^VZaX)_delR>S3j(#Kc4aGQ^%2
zx)*SF7?#`vT|^u@=o~6DRZa+i?k2)r@u{1%m*Pytf5k99lnAT2Ez6G;Dd4s9OG_hl
z&Z=%XP1fV-@y7dLGn34b6>i2MWIR`Ux6+rQQ5=M`+Or>yFY=U!nXZ%7Dhx`mmz;F7
zNKd>#@EzY4j|q+mZQB5=-}Q*Qba3%z%DB^lC|#$nnOXJUb<LWbJHChCwrI0}I>i3b
zcm?o*9}^l<C?CAlK|0V{`Sj~LvgDT31Tu}lNadS@pAdbl0|_3$hsHrg5m$M!o(rE@
zFVnJw8RyLy!S^l(`zV$uRaz@qM{HGGg2Mo;qx61myg1uFjNe<FgX+yYBD!*nd~4Nb
zgzcC?oFh8?UX+9B`z($Gn>p@oW$~O8yw)oSG5~@#0MMC9Rc|<P#JlF&DeOn<MkPnt
z&&V{Dk&)^q+?JhGSCo9mZYMh5oE9_2h@N|CH^E$1h^sg&8Q`l;&vs#&XILjDeq683
zf&`9!8g=URTJaYcP&%)+f1ieo-zth{w-)|4(bjt!^HxIk?7Y699JGEVq2k>7UoJQi
zN@bVSb)RnfAL{RpCz*&tljl=ANB`6J%%1-_2PKBvYkv%FR^WjS8!9fdh|LBHv?cNQ
zUvR2^u7z24qlOxwbDtRs5o0X2j}zda#}NRJz6$t>6yP8XwRYOf@Em=*RcC>g%tbn+
zwEWL*@iPe+lv^kclHccVHf2R&SlRj-05_y8P6S{3YocI?pj{s)HQrO3K1^0fKc;*C
zEGLYZwb@E^&qSR_+dW*4M0c|YyTj#}G6hh|%c)5Ei;5F6z)7ubzbS@@K%cdNW}`M9
z`IEeQPF9jUSwqOa7<jO{nkeJ^qmk#0qa*gAH76Kj<)v{#C#p(#5*OC21aoyYzU~#E
z*ZWnl*rP&NRy4IR)u4NWRr)WOpa$yBq+hBY0nHR5JQ2cMdote`|8Mg|xcm3rA7Zl}
z;Je^r9!I7aT{+n!dq`7v)s_jHNitzC>rBA@oJc`uxIVsiyTtc|M0dqP?8ikn(mVWE
zWw2r~-H7RBt0Om6XXpKg&*Y&lkD+M`=-hbOLdC$nf5`y!qaXNT_W*UW!Rk6|+#L4&
zb{qwqzc1xf&Av2wem4UBb!(zRptDp3$VKora!{(PDs6~40~1d(W4t6Ez)y&ze_P@}
zSp`Pr1CmwgaUxfr>16+*j0}Gy)I3pzQDz-|!PZs5F*d!VOlcJOo9{sz4Gq2T0-s~d
zO4b{&DK^_X&ylus=4xE+UV7r^>a^(zrV<0I+Q0B*kO7&a<_e2*Z-`%ILiU;;Yk5~|
zWZp@=2RSb_hT~BZzx&txWm#lC!|oC4;2`~s)&Gn4Ibm|<9(eYv%YBl9dMu^f?q?~D
zL!?qnUQ95Zmy!u{6#KPTw&&+JVPa=7u@3!OZ6bmLhYH>?Su$W+X|oy$D~+Dc;pMhp
zAH3|3hIT-T^XPNm%d5|43!symJu~37E$9-nh=mw>KXOTh$`AAZ$#^@9k%Vw^jP0XF
zPv6XDLZhx|+wgbc=&ALVTXWy1dzDfrmkEKNhGiK;>bt)MA_iILY1DP@DewP*V>yON
zi3V}a$AuQhneETa)h$@M!gBTaAG9zj&{cA8D?+BMJZKB&tScI?XtL9psF|3uKQD!~
z<*8q2aJeF~l5-Gy<{L6}h2tk9pNVy`gVc!}!Y?;D**VC-;UCBs{oVWe&m!x3AD=lt
z>g92^M`;y_k#1rY^Cr;**i$-TK}0fe`W5OIf%03bOovX3w!KJHE)+Fg*b5&PgRb`Y
zpS1wGQ2n=EisOa6y{w|+oQ(Afx^vECKxLL5I`Y{iw|9#UInJ{PO6Mz<F&*bme`T_Q
zXeyhoL}`NLm25qYjWS+h`@aGrMz`F5i}A*POCMh<nf_LG(0Aqi3cLfMBv~r5r#aiS
zsiFUsBC+0pYC~{5mK$8PC#W|YHoabNta$smR~cJD3`hU50)T*kf#^+m2pE8bY#@+O
z6igP38~}&?9ie)BjmWlwxIV^e116ApdsUkwP%kiY{xkY-t}NtmEBP$!KE>!Dy~Zd3
zVaGv)zS{t8w840{Y$MUcb`AZsYPq2rssSXY^?F#kYMO!A6k1s0IQQ-c8?@dL5GO1F
zTx;Tsc6;i+Z#ahZKi=4t;oYbm*(29CRf(L6Y<s!^@(=(1*mffq0=>{zlU@8`>!CZl
z$2DDM8Eb#u1PZSXYmxL>98Ng1j(i^6k!-;I9q@L`SM7IZnS+9fOxp9)p7aQHi|dVd
zn<ID^@5_5Vgv>2;Q192cYNW8`%wi^zh$>szT$=JmvTAZYNf8dZku&0LhI=wL4ALPC
zBZuNEHtRvvK)~zGQ<~e=u`&H8vOnW3$Jz5?Z;?lVl!kz#eMHE{DgcRCKXTzfk3-R0
zX&b0JFo^9XOOOtcZ`4~3d;9&n1L^?!!eh3v@ci6I!Fz}TpmYRG3S`|O<p)zCYDiH{
zMgZiT%Oe#^&XpVvcc=jYAOc(H!B-@Gw0QZrf8mGX`P>tM#GHn0B&X`s-o=Tf(G9|?
zXiQ#tMMXutFnOOg*5#HsQB|K|H4Z5q4iU<mE}CZ3FshhL7?J2MfcxP~AM_PffLCF+
z-_?N^`d?k0Z3pn`s>N*mQ1^G@2{`EJFN1B+A2#wjn~ulf^tE&rv(6{>3Q^V+f5%o%
zdW^j0ujEF@ag!ZXRQSxr=yYJa$ABmAT0ITg|0@WvO7RkuVc3(v$6s?g%yRtX@$!hV
zIiH;{GX%Usa^y4vy_Q$si1=17O51eTB)Z5EfDJ-#1r`E4D2>2f=(8TU%bgJ@=pK0l
z3Vpa7v0XzC908E$&~tpFdVxtar`>>0^1`Lk{4EpZIs(Ydfe#LX75W1|)4qlb&H<be
z-C)0iB(JmCx1iAeh4Bd+l*IHi{7vFwy6_6Tk8_LMi&tj&4Z`8cBwMTD|AXKL!eO33
z>y)n%=pi+AbM{65wCvbh?Co)^|8VS>Fz{`h0F=su$7`){5Vdm&W4QD)ykAeVVup}c
zRcV1A5SQa#>p}4SI&1l$qvJ%Dg@6S^@jNdZaIj)^Ei7J0EU&x1+dGOQ59kwkmKb(&
z`U28*yrHJECjPSC_T!$z7R`%-4%@FkljNsJk_S?>nH5D5u&ul@F`jpp(@|G}$<JvE
zN)0N%xmeP`zPwxSO^zC&-hStjt)$2uepVv+-}sVY7n)6`@{jL8R1;w#biutfKEyHE
zkVE;C_j@@ls@TQb!{kZ!0(Do*Jh!`x)2#){Nub>A!qUPB_z@+v+Y@?sm^(WQ-v~wC
zQrhMLZ!gA829XEo$B~c;8{zP$Ra)k0muLM-(LJx*`SJVelqOdvT`oncaa<4v5nc^+
zs!f2}`~&jwaD{t(YZnYP%Vyji>0b|-)fU7uqAsav%nuvq1opkG5Bvm>#i}6cy`8tL
z$7z@6IJ$@^wru<GkfHtzhi@z*UkF$9uvp*wCswdu2j|$0+^7jAbV9?JO23xR$ISoh
z9{w|;$?N}obnYmId9NFKA|--;i9e;JDjP6<i#VImyVmOP_*6Esj|`0#GD&F6yg&0X
z&(uuLkUegE9k}n5^I|5adF89k%<o5+oWJ`SA)YLrlrby2$_7`qJ(xI^sz+iZpTFzN
zFB4~ypGhj`2u(jOcUrT!8GNL)FtJR!lPGmEzMJMS?y2xhzfN^AnBhqHoL8~$c~#3F
z{B~m9!1)Y!D{q7eQzX6Kon#-4LrOKal{<@xD`Ykiik=;L&o}m#bszVjcRo|`@}ElK
z<-i|k6GX)w5uW_vY+xPOWN`p8txU!c=jw6Ch3j5(sk)4WWp!?{x-QBP`7sLPOhqeB
z+63=CVHN5=DR25r5p};wg~aa+_B@J$q!S2J5Bg%$Q8FOvC2!R90ob&9*udG^ay@)I
zcFeOHoz@`SuSwun=EQYQn3m|>`lTWCO;RNFw8GZqg`!B6^nNNGwP%3KMcj&GEWz*l
z-vY1M@o}zM)SpJ#KPcE=@PH(P#Wl)Y_~u&i2wgQTL}Kj&@QNZH_&@1Hk*Ke{`dEI;
zb4FzqDrTJN7A*BBZFo~m-CFkauq$6MLCTe_v1pwJCFSJAW8i@1Uu@H4lv>>xxqsi>
z_gNOHGa@Ykb5;*Yg{KBq|4mjIIxw!aeNbFy%YzB)KeV@cZ$=e)aa|w>obNS4Nf71>
zckTyz<hI8HVc_Eg4#J0^?hk)i#|B&67ZRF)7YgS3UB;XIk8ceO3;%aO6X@hr`_-*4
zA<KoZSgE=&q+@b^TMXC3$ei^lo;^9`5*rdvI9id_r4MgQC|o@BdnKjr<bL2*2-fGm
z{y<fi$6E10yTC1uvA%k;O`+I;-sVl>N~*;N$w=ZRb@L|VRsRJ2*@WR$eYTTY*1C?-
zh^~zw=l8dbWPkjKp8t*4CBjd9CZn%CJP}}r{2?s4`@2tSXQ9*hkNuJ`KV2Sc@0ifB
z+;?_rf>O_WrQ^CAVPWm#nHfcs9BqJ;JaxHeeTf8dkJk5JZ!C?m{`kNCEVf`hP<eV3
zvfp!qON!muI2T{!F{85dYtOk;+I>Dy`S_W?DRy=9TjTF$Q1Jynf?+c!i)*va+qdOW
zP@4Wcb?e!sj;Xu4?BVJHh!U$hN3hijX{ZHZeL1Dt`eWk{(y$yjd>J@^t)fGSip~at
z`=4zd9j?Yk1|^7r*R>Pb!1k!RC3KUT+gk{>L9b9aDTJbr+>i!wSemSaIA7~hsfxXT
zW?)Bm)I;~1c>^fg-N2OHW$rV2r<~!EoBiFCGrXaMcD&{#R<@GiWtg*^Re;cXpw0Zn
zg*ZYZ>#PRf1}IKjdesNVn(-3EDtK{ap?2-WEGec}9va;wDv4{6-Ty|sV#(aF>C^MK
z9n<X!xijJ`q>V=jViKWmSfwFqrDspdaevDf&*B=}WUOe?C2D<+;T=-D9zSynAvMSy
zdb7+E`j3EiPs635O37JWY=KKKd}pcJAXzZ4wLrohUe##d!69Jak$l)hieHOLg1;EI
zl$#U@R?@t$pUO^z#90njh&v<F!!vzkX4HzLdMA&m0er+NJQZ!=Gnc!*w}q}jtKv|H
z7|PgQg4kZso7g8h_<(96?^BJSpxcn%n^S(}tz9z;AOzb9JBUJO-^~SQ_4sNlmSY(s
zagAETL9x0!yuy3raOPO#i;9Y_*A&+u`;Vv>aW!s_ts2A;w-d-Qp+ViJ{5>4CQhMt`
zuSe+IdPQg+DKEz}ARIB)AN+j2$xm38GwB(HssKqM32uQ84BN4|aK^$|K}o5a&fq8|
zl)`x}dp?iv%5&@^wf4ETLWa;@>O<Xk7W)akUKgdGnWPkgV;x(j$QBCq1*&H^Ii@py
z3K@c?IJrbTCKfF<9_Zd?(T==~>X%h_imwqA$Irl&czhcTF>aRcs=xFpXWU_nbXX56
z8U9uolZqH_3A5$+n)<zCaj*~@B0!`e!GXhirz0i>`zMvg1mr+J{dvjcvPp%qA0Aoa
zQ5neOZRNVxSnS%2?F0Ltm=5Ih=8L_ZHrgb^DO^V^j`83AW`OH2+`H{(TFV*KeRhd$
z#8PtE#f_$-w{6`H#$hLjRU`5=CwUj#cPO_=$iCCzUnE2{J}w4Un!kgG-@4fIG6oL^
zXqb};SL4?+Gu9^H@?w+V&=FdlS$(O$kavCevi@HAJAsvfb94~>Kbz6GEc$R?E+B2G
z*EixZ=!Bfe$Uo_{1Cj6Pyv`a`X6jnb8K$nvM9IFEQ*B>4AZEcX%nmPFcY}VALYk|I
z!%98oc`|d4i+q~%)_zzMg#9EZu;z+0^>;D<b>bTMv#Gbsjb*?4#yT>I$nradXP0K?
z{J8N$6T<A_hl<P-s~pO36^PGopjepq4U*BUs)5?!W#Myj(oU<Cksvm8xnYsvdm@du
zjCWZ}Bh~DPKDMiVhmD=rn+_T1AqLy_zStx7rqI(hsM09gBURE%UE5xdi^BtUpkx0M
zu==dfB`+GZ^-x3?&Ow>+k2=3m2}_H!*@d!Hl4^xW`<r6mdHWdpaGJ-A?1Ci&PXOOa
zRMotVZWCujedh{WN-b}as9GwTmky7Tut}Avy`sNQx>%gkgXr8&y^17epd^)x+CJU&
zoAb+~7ePNfQz>g0c_vSpowCldx4090-xSQ`17)A7S&01siQE5lDIL-RRebyenCMPx
zb?Au|pLuj=y#6PtDNwy^`nNWl??uA{oN;SpEW11QwYJ-MwxsMaH-zgZV9WnQE7z{)
zMy4s0=kRS$?8}~0M&rDUez{zjZ?l2$`cD#<;^}WwHZ5lFHLAgspB@)k`k5V!ouQ&u
z4#MtF4vSpqV4Jvc<G`s0+-{g<q0Zt-9r%1JKlxrNG>e}#!woxp>P}M8R`S8PkTg`j
z&jn<>9Mk__5KI7nQ=4Q>od_T6E0*esYM%Jv7*@Z0SBw-V0pBFUAxT8+Z_p;}Y(;Nj
z(dUAymGPY&%Ou-jPS42Ea*vdrIYs^VaBD)VUx&Il1TvFij5oI-1HlBZ=w`kRzYi#2
zqJsXkihdW7J!;Nu&v-q4`EEbbedoFot}|FqWc#jMDx`Icb}#DZd)8jk;n%x2%&nIy
zW^Pi-Ou9zzGv8Q?jR^rZO86J-O~G&1E&ai#Na4Jj_i%SMzTlvZ+?LoI>}1;hZ^20)
z8U7vU^rYIrdu@3_l@@yAL(Z<n+fKTEn`6r<W^T}})9*m_$8LP+zV-~sD4pzds$D|O
z3b6)Tm8_^MBKSGO4rsHMqHOJyH?9YONYM*&(-8q&IE>u=v3+}!ed5YKf7h6=I$No)
zqV)zD^qQp42&+>cYjl&jkeeWA+0NFBg)h`<K>Fa$Tqc}gqi(*A$0oFOF`NE7Ui9==
zEW5YGLZq+gFvFamuUPXM;`|)q%Lq^S2452j(AfK5xW~j?w~_qnJ|ia+r=td!xK6lk
zx20y}nLc6Cl6x+aV>8Zq^FTl^QJQo=q4$JuvhURgO|SQ<OC`?_{sb+vGnZ#IuQUT6
z9!v{5PFIsKm*}xs770A2Ym>*MJ^$>x3MhAvA4BD7U~|gf^@{zvhfzf~WIj>aW94{*
zPq>nN%tzrr`uR9(Pzlip@qd(kwgRDF#T8@UP(`kD9*TUA5#NI=NB89EZH{wVx3^I}
z4m48^_J-}?@j4@JGz5#$JlgWuC_GL>$y&_yc7JQtka1szk_3z$*#5f+I)=qKLbJi$
zm$Q!Zg1Z;bf&Q<**)yWmFsu{oe*2_oDOyHt0Ww3vEZwQJ8tln-JI^9X>WL|~pWEYc
z;<BO*Z$|US7&Aj^BuucyEw?Zz^Jw*%!LM7vj*tr-=%tj&JBw(IKfk@iy<^fS%C&5`
z^y`~vvH))15M~e&I+6J=U+I=hAk74xqqq>9#D_*;J7i6xtt&#N-2Q>~UIi|GEh-$V
z%f1rj)DYS#v>el1fBtoJ#hf80t<+OHcO1^R*H{o&DlJY?@cYWKK@aDFyB3Dui~HIl
zR_m*$`Z6_#{jy6bQrp^OCDD&hhasKR)9c0o7LuWr;}#Yb%qHb^gDzT_#d^hek$0C`
z?o*9@wSuppTD)EZ0w!OQVpF(t=G|<jQ6+t%Z~9chgu=@b*gOfzTN=Su&N2@w;0V3I
zM$O?YO?GKJg!l+2!h{g*uj#`J*<*fEx?WLpE716^CED^1dI=EMQ`v=4+%0~H!%o^z
z%8|?at^_n9jM-EnuwP%<>ROfC<kycdB*kb0FX-g*tf`uaE2M<U+0C+P!b6Y-oWccM
z2D8$sMp1<t;)>;W;_KH;2HB(W`rjb5vsP=6?e{A8`EGX1D!F@Q_MiUV2w7W<SS627
z8Ra+%x>|s(7!pTr*v5rFJahSTv?cD`?<F(l6$;gMH8)<V(LY}{w8-d9d&^Q5czQbr
z%-BeqC}1caXih%Zm_dokX$Zijc3-9`&f2kV$&MILIr@An5IVYCb&F6X{0KAblc!w2
z+YTkQ@h<n-?ko80et)y<RkAd4xnHgL+VUP)g4w(d2wmudm#}1y0e#QAx<gg@!aWuF
z6sdf<vUvM`g>TdgM}PiZ7%{DRUjh#s%Ke1z<9PFyNR84%DYivDFG;_eq3UB0ANbAN
zeq){4gKPrw1<9YQoPhmQxAaE(XS!o?T0ZY}g|1(IbVt5mbB4U!skTwSBbxo#etbnW
zKHb`*FW*7FMBm{5X1w9Q^IyW{fB6nq*L}d=1R>LK$49Hy)-b!N{3E;Upf-ybn4Wqd
zg-&k5RkQ6$SZ!5#dS}9Ipz6Z5yhE~gXv`|*>ThM2t9CxxUU1+QUJOJD)`Pv>UEm}*
zed(v#GgK%s(`PO1vBdGG>zxg#3H|+2ov-L?BzKZjKp$5s*^4@+M^^o>`tf9>31pnt
zBz|>u%93n{`%)GXSi?@AL_;<U5@_sw1-=}7_S9<_`gW41S0LiXJTvP1omfhLd`>zo
zNY5jeS>1fLfkJzRW4GrG5M#oeQFz}gpZr$E^7g+^&7S{e=tvx9rKNO&*G-vw-=LQi
zUp7A|&bQ|8$!o}Mvfb!eE%E!TeKPBS{MyV=NeX3%IdEYOvDz<vmFctGE?UTCyP(L3
zCQ*R?J4GGD8)Kmg)fZ0Q<Qgq2*AO4b+H2{MIclmhK8a)c$d6k@k5i88KzlNng+04O
ztxKHw9BXjvR10FX+4^$WMSdc)LQK-I#BZuZ>Z#!D?f@^nk6CakfkEjltwpnIS(U_w
z)rXP@>Eut1Y(aJ7s@W?0rSx3nkUiH2-8xhCYe)HG-dfL6|FOG={QW?q30Id~*UmiI
zYi@&O84EKJm#g(0xow%ZEVezk`y64Qo1;~+v(N>jaR)6>lYwwdfhgQlgwVocVCUYj
zKWuJ@<g>b(8h#NQurr`#WwQhzAMMIK*+in?cP++pWiu(xcuB>-%@*x$LpJWTC||m^
zw;>M=>fP}xGuUx^)DJ0lsWNO*xKc@n1SyIHn%2dSfh*bk<U9U4pD9<xwvPXpc)R9&
z)~GliAnY(t5b8DILgSTr(tOK~fyy2@+%?H`PXzrjV+)+?Sy}($<6ofT&O1xGB0#FT
z^|RQz+f;7;ufN+nSV&dewJSWIFx7d_n|@1v^1ZT2K}%8CpUW(M<po~P6^5pi+Gzr^
z1sD}`?wkoUQ?GgGSHW@i2cdz9_d>g4mX^nDu8K#e^=#~~o{zb0W2?CFoB9iTxhm=e
zsj$QC0&<RfeGzGQsVC)UO~*oClHP(ag7-BQpizMvknlG3QS1v!z4(;?9i~|Se|#Mz
apz)>Q>W|k?K<-$W&r2;sO-$x?<o^NP+4v{`

literal 0
HcmV?d00001

diff --git a/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-64.png b/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-64.png
new file mode 100644
index 0000000000000000000000000000000000000000..0957f7bde854ddf5229bd3039ce17f9b752d88e6
GIT binary patch
literal 3387
zcmV-B4aD+^P)<h;3K|Lk000e1NJLTq002M$002M;1^@s6s%dfF00009a7bBm000XU
z000XU0RWnu7ytkO0drDELIAGL9O(c600d`2O+f$vv5yP<VFdsH4A4nLK~#7F?Hg-s
z97p+^z1_R>^X#~;(<W`&1eLZ4B~=<#C8&+~(IP=rS~UV8@en9N{ZT*yAtC-0ejo(=
zKuD;f5(q*SP?J7UONywH&?IdGNuX`g=Ap<*Y$uLmpU>yJ+q>JHVP<z;`#SI1QS$?T
z)@65RXTO>6{mnNMfMr;QWmty)V?c0qz{5ib@gI<nmOYF6xaUp~*Egn(z!G6G3_#nq
zZ8N@7{O$vY7#o4)))2sh?1h2Q8n1^ze^|RN6(V@ZEGAH)1I`lcoI?g2`;+~v{m(u3
z9HtD)#FW$wpxj409(!ZOKu_;?Oe6OsZ5m+wCkaAelJX?x4dM48fkBb?t@j=&?NJjG
z?<K-WAH5&NaLy&}jz^yT)Q@Su)Qm{W0JQr{f8SIv`*$Nx%1BdpP8q*p%}6z>tM@`1
zRz7P}AcUg|GEHZgY!pLANFe7X=Eoo0{=%cD(=cH{1~AFL@Z~qZna^3jq}@xgpCT>^
zO+u0cMN85M(0Lj}3=mc!&P1eLWqZXT&i5YO`RU)o0;6|i03-hCN1nZHRsYA{K>^}*
z1e#<5W1)_Na#@GyLIIP~pLNITKeGpACSfY6>?<?1sRzII(!>9P4oom08PJ9e8>}1q
z*1b;eOHw7h2@vnf{;=nr#Ir99e~v`{ym!36cq7aR%XW5;L;bNf9`=5~pE1_@1CaxZ
zaVT#}1HD7Le(~szVJ65VB6CSEo_gvj<MG_LeqtHc=T&Qnk-FN&MJJ)20U0UA0P0{L
z^4qi!#^v3YdZ4?#`^1jD`}XZ4Nngdn>eZ{eetrL+Pa4R$DcDuW4~2X?Xy%HfGZPXF
zPgHt&uTKb1sILI$whH7f^Z;s@kPN|jQrwj%|M|rm#>U1Poq9*wfA@tKKReWO{c9xT
z%n|W#TlY<ZJ7hx|AT<Q$8zWH26;x~x3IHKDNIcV4q_cl43{5&7?r+fyAP0rFM<C}|
zN&$(60Y!<LvM+va`}2?Qftai$$p;|I%x{#^)2_ZX5g?FWJ!yP~Myuv)1GiIwLpZg7
zpCGCocela5|Ds2{H?aX1ns-bq|G@kJn-daq#0nSx<w<cPBnUf6mkh%&*AU&nMcmG0
zmkN-9qD&Kyn5gM#egm(lL;D^|tQ8a-TGNC^{R$M%4EfO^GN<6PfpTlW7q%Pfn5Y2;
zW$1t}4e&j%R96C!kq3l(c%n(xZwHA>L=naecJJIeN6l<1Lc9QUM+58|%Tb&%8W9<h
zD3b;tK?7nLaLX;XP<eqD_X!HACCO?4p(Kf?y?O{!p%i=LIU8odSwFil;=Ah5?49SP
z#KWX8S&v*XhGQ_AAT0yD7lzrVbC-nvvOoX_*7AXHZeS5;Q>nYR&ZokQXn%4q)s#ZR
z`<|08eGBHoGK~D{(b3UZ9I*_boe@CPndrYvz_1AufCQXf4z%y5o_?sA4RtrgyQK_m
zqp1=hqH+%5c&P@&ejm#Kngf7~U4sOYm+&<M6G-R=abF^yOKor-xD2G*g~b5sA>iIU
z>qQ%ZmuM!&S{%no9|hX&wn2E5_7I7c9!p%p3y+i_of5)eaF6QQ|GGNh^%Ynu%-7}t
z*#-=D4Jk*hkxUbzY(9WsH{?<IqJulJh1ZcGd_oq4n*$^M<MUUA`01lHXdX79R48fr
z1YaI_W4kasJe(;Hkp447g}P+a0O0oT&4M~FykQQ=%?(%z;3k3T!*wW34A7*1Fi=Mb
zPSz8emo7nQ<{gsike%}sRj5|xNWe2l+(k9(UMlsH151H+)rCv{sKQ`t1p12u0m=Jy
z5|tpam19zIB$fdfV<c2RNJZ$H7DN)&@A$+o(%A`M#819ngJE(bbQOBEP^yL0l#wcy
z0W=RLV%-qT+eC3HBd||zvVq*w_SAnVP@8DN+<q5U7H$U9FlCY^0BN%m`rt<d-V}n4
zCcvB((pFhAu4ZI`)t3Y7KL7ir!1Vj`&^&6uNbf4hp`1QdZkB3}V%{vzWVgOhqmak|
zB7I2EnTWQ`TChZLTa=Iwx+w8ObfIEEcum6PBklq|iZNtzax&Hlb|{HOQ{SwP?`lFW
zZ$RHBQ-x)bQSv!q;qX9DfC6cI?BtX4fKaOHEnavG_v83S5BarpE-ZHtwE(20z8X`o
zzJ}rVNqK;=@UXfYB(d!uC0Oena7N~3X36tO(4&?#0?j#2O-;q(OJo2Ig`A@yg_6}-
zxMon;Oqoz@N(J{7Afy4H+88BN17dL{n}ESFgIHW}jv{m|Y4-r-DHFEZ;N+Uh4<NY^
zR`W$d3vhM~0M0qAshS9l*U}jZU@JiLKAX<RRSuEQ6KhNmOyL?3nRtiA5buwK5-E==
zZ0u@*+g#ihvU$C~pdv*004%dWT@yO(P<PY-kz@%Q1CCtS*wp}gx&;?rDns*B3V$2}
zU7w)z-Bee^Yk7<AzcQHsO&YY-AdyJ*ZLF&S<&*O;xu*fcXcdtEb^$IgF1;$=jlt<}
z0)X0T!w|>;gb73jNUjM2Xtt$dEnZAXECw$By8?4>(RI2-H#rPHA#ENi0Ior1U{0~P
zr3EdmET%he++i({$N;arM1mQWl>QR(U}o>*`^(^dSb*W4l{BbGRs+-xun$^5+K?G?
zJqQY0n&4n3;1ok9kWK?eMn=3Vfp7|}va7|IL96P(=wB~`b)gqZ`9X<W<{S)X#e~-T
zCM*W>BL?I?*^m-fyUWSYF%_0qk!Z*(PP^Qe(5$v$?A0<YV624Re7{<jkl><)wK=DA
z&^!r?g09=C77fmM8qEE;&V+O(z?`6{`o#Zb1E%&9FVDes=msd5MWv$BsFYvvTM)GW
zQ-Jm)WQK`DWw0-{wZj$uk5;;4t1wArai&8!xDTK?#EdLpOW7{yWqWi1Vj<*Pbkkz>
zJ^P>uZY|wdOvBE;LOFJ(2&LkR0B(lXG|g<Q!M58TizF0cvWtiUa>YipHHH5s@UMKw
z`nL^e9H2|!R%hIeD^0lY>I@9jMxd`y;^T0vgw=%IxukDn8Q^nEgpR242NI4{psP1g
zcifkV1(uMOh~(bgEM>y6<ghdSVHL*zR)Hb15-h{gM&Y=@i)h&xK)1%QL&%&YSff=+
zCnHGW2xxY%u3{Wfztj6VqdBO*8y$1};55v?jo~`$1}~S;t5^y*{kcws(v;o=Hf`EO
zny!68-^ImYrv=$S&ncuok*1*U7Ap-<{(?Hi6Q*>E4t1)hU!DMbrr_-a$rEIJo+=5#
z-dd(b+UM7=UmrW=*nqos?OHr{>B{lJf{N8X5vq?<#o%_OtL?Yu98LX`T}X5sJ@23u
z;Wt^b(}MGRr=Z^*q9yUJQ1oCBJg>;^XYKNPciwrYd;IwE$e4*|gy`?O-#q-6GnBdg
z5pmN-mu6hmX2q|-$^*>&02F=3LVx+8sqTC?<c%WVR^;#WLr3z!5Ro~8JZ_7S{KnyF
zqKPa%6(s8v5V|rnE^d$iaQ_n<tJUfZ#H<UGQZ7QX!Ft)g@`u6V(6_}vf}?Bz7Saa1
zaHC?kARY)rj6@t|!NNO(s^qf1h@<c>n*9oiitX7PCtQw5f{{t|6L`yZ`2|Y64GD|l
z#B+k1H*Y3q$IhI+edw;oXc@W>(t?;-xLHUxMc)Pcr1=g!??3T_J=?!QZ`pB^SOz2}
z-TU_KW0}+WH9hN|*tBZPvj%V`Xy@11LPkIO0xG{rpoM^Ds+#(OroPdyg6HNSQr!=j
z3cpg~tFN8f{Z&RhZF7_UpF*c91RKKs<FTg?oU4rfj3yIRHKPAdz_XKZ#z1&ADdCT(
z@>*aU9#o78L>&_RHaZr_WX&N?b$o90r*BQ}J-Th%wtA;i<2mRI?8EkZGlx&q?fC=i
z25<k2Zw_iAfurr3_YHDf#d>jIDtgpC<i!yLv<nf9@t|#e5nb&F&0w92?~L!?zVm};
zUVP}Ghbp^v?drG_n`&iG^8)M0kt03ycKAmR{QUbv-NO%?IhuV)hlYX}17S=#N42l0
z)Exx~)gnP;7Yjw3Kn+ccqtz?5%R65=_53qq_Ng;7Gc&BTvfws!mePhn7fFS3xm>E%
zYNfB=^W>J{?vXo+W^py$rR~chvyY~ty$NR8aRR}9ilHdfBKp~G;}YYW<2coZGe2>~
zo<6?w-QOOvENhm|jU72*^|4fy)~vN1)}NiZvuJ8*_4W1TX)(j3KZ{Dyg!nll+g4|a
zi^U?aXL03i9&Pj)i9Q0fKKeW`ZFjr7yWM)d-d?d{MXSHRzeVTeaOa(d_e%z`+sYg%
z^CgS{CNmd)&#u<ity@j~IdmWQ9Qbj3R?IghJJQd_CmsGjz%neuGAu(U_#Zm0eCXt{
RoWB47002ovPDHLkV1ku0V*>yH

literal 0
HcmV?d00001

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 995f7f96..8614e767 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -113,6 +113,11 @@ struct File: Identifiable, Hashable, Equatable, Comparable
     public static func < (lhs: File, rhs: File) -> Bool {
         return lhs.id < rhs.id
     }
+    
+    // call this when the file is loaded
+    public mutating func setLoadStamp()  {
+        loadStamp = modStamp
+    }
     public func isReloadNeeded() -> Bool {
         return modStamp != loadStamp
     }
@@ -870,6 +875,10 @@ struct kram_profileApp: App {
             var str = loadFileJS(sel)
             if str != nil {
                 runJavascript(webView, str!)
+                
+                var file = lookupFile(url: URL(string: sel)!)
+                file.setLoadStamp()
+                updateFileCache(file: file)
             }
             
             // now based on the type, set a reasonable range of time
diff --git a/kramv/Assets.xcassets/AppIcon.appiconset/Contents.json b/kramv/Assets.xcassets/AppIcon.appiconset/Contents.json
index 6f4231ab..7a43aac9 100644
--- a/kramv/Assets.xcassets/AppIcon.appiconset/Contents.json
+++ b/kramv/Assets.xcassets/AppIcon.appiconset/Contents.json
@@ -6,7 +6,7 @@
       "size" : "16x16"
     },
     {
-      "filename" : "Icon-33.png",
+      "filename" : "Icon-32 1.png",
       "idiom" : "mac",
       "scale" : "2x",
       "size" : "16x16"
@@ -30,7 +30,7 @@
       "size" : "128x128"
     },
     {
-      "filename" : "Icon-257.png",
+      "filename" : "Icon-256 1.png",
       "idiom" : "mac",
       "scale" : "2x",
       "size" : "128x128"
@@ -42,13 +42,13 @@
       "size" : "256x256"
     },
     {
-      "filename" : "Icon-512.png",
+      "filename" : "Icon-512 1.png",
       "idiom" : "mac",
       "scale" : "2x",
       "size" : "256x256"
     },
     {
-      "filename" : "Icon-513.png",
+      "filename" : "Icon-512.png",
       "idiom" : "mac",
       "scale" : "1x",
       "size" : "512x512"
diff --git a/kramv/Assets.xcassets/AppIcon.appiconset/Icon-1024.png b/kramv/Assets.xcassets/AppIcon.appiconset/Icon-1024.png
index 93907fb6af89eebed65f4e97650509a3cdeba7f8..758e37ffbab213fcdf13425407104ea3c2020f64 100644
GIT binary patch
literal 337596
zcmeFY_g7P2us$4$fJ(E|0-_%&BE8puNRz549Ri~C-g{64EHp)W2?!VzAtJqoBE5t3
z5=tQSKxheR-+1r)SG?=4y^?*Bb$&Q!?>*1VJkQK~6C)j#OI()#004`g?xUvw06q1R
z9>ByvoghGw80vJ<N7vFH0JwbZ--iZ}oy$S}lg9t4&O<=OFwfS%`<*omH2{Fh<jX{d
z3xEp(PxKyXJPV=OB|)m4bprX`?JFS(WlfWjP1cLmu3RS<1~^6ml9#%p={urvH*#MR
z|I=sse6|!O9g|okC63|bma2M7k)UxdAAHH9T<iSGNtD|oW;Nt9N3KE^y`xluxpt62
z`kt&~)~8?JfLe)vMZY(BBU{g`{e$x6o`++8__rc4Ep<(pWYQ-u$jEoo8u#gB+wf^`
zKuo*61P3$2|4sk51pjvm{?8Epe~1Nx42=J5JCkdPFzTg*KHyxff;qq2yHCkiQ_zvQ
zs(1_7fYcrNj>PGy_28-@K*j^i1G|0D*d5CC{I0UVxYL|*+ktB=*iP4H*r6&F(U5nc
zfd0iEk9E$O9Qb{*6^|DF*(%C)IxwbZeWnft8lUhD&4b)G_zIl3L{!|+49ibRR!V%s
z_v7v}u;W78PWtdQ?TIp(gWz`MvUx34d*8{IWlRG{@4C`LVshDgR((*(_sn~<hQ(7U
zd3PztUeqm#i<$vMx7*M7j^;^v2^lfTj~R=KdCByv8EvB!3=!jsmxck$&(JR<qV~)K
z?F7b417*%zSpv5MVAUfabX9k-C{J&ai}&1pVDS%#IYt7##8)IK43Pm2X(mp);$Mye
z?H5=4(&|lZe{@Zl+o`CU^u@O-T~2Gxdd9Y>E|?L|**|aDdRCHwo;9(pIGo7n=AYdv
zq7$G@`uI4mgNP}?MsW-R_K#f_O$ftlV}anKpCNTLmt)#}CPt~xEwS)lfyUdo6tbgT
zsdPNz7^sHtGnU&`jr>S<Kj!I*b!gz`Oh9FDx<vGsFXwRQACuQW<TVz3b8HXg&d@Q8
zWjP%LV_8Y}lC;)GER>oZ(zi<dOcLQD9q;$@kt?eYp0L~5br>EEt!c>jYYGkIwH~$K
z;uXLP%I9OUlQYl0{tRgSWGxC3W^)5-CqPPzEc(D2os;1VU%*Mh-4<Dj!_wmZb~*vA
zM^a8I;Y0FeIl>X()`MezL~|5YN$1!-FczIvA*jO4xPB2bwRLZt7(0FMF(Z8Kho%53
z4K$6D#3L)Q#v<I8TCR{Tx3sNJ?N@A6M8z_H$Bj$(Dd?)c%7gr`gH`4pWM9SLdRgw7
zFkHv9?I2r|!cN#M(7cfEcQKAIwJ2}gGC*wybmN<Y1J`<fM=I1R1jv^>JwOw$4NLCu
zYs_Xn)Y^g{!X|pKom=f!jS%S&6+X*mmJ#thui*g`OKeC0?{<V^z$5bH_>a(K-^~a#
z<^xiDTGURBn2cr=XUiA+`aN`AWm~G^i^D`u&agaht1)XpYpKfEtGVMz@gc?$u&r?j
z@EOAJ9(d6t+kk9H4t_^$;>APtpm@KOJ6jg}@$1seWB+4PSEuYbm<iYIhF!)7lg*=k
zcs}wxO&IyAMm*@uj|x@rTZ^)$>zC&0jpsYy-W<jJSAf8w_nR<kA*fF}5rm!1vn1vH
z0~F&SBjkV)$MQ~)7VAqPoxyIHq8LecS<cJ5hLFO9lngbBZ2n>1>S#b#(?rBHj-y{>
zGk|veiSC-<+5_N7IMHZD2$&nJWf~2O>HnI}3yeD|I?~DzIn?rHS3MpGo!Sya%<;7x
zg@h;!Rf%2BH{)f5yEhU;)vkoh+0%xE9^zz2QbQ*3ZJnD63X83NsKCtrq%qxQW*p19
zal}?g$ZQrdhv*$o^am*=mcm_X=(m(BdRCPsvh>ccY^8uIOdEL1%ioTMJ=cIbYRKfJ
zS}*=6vu0-KBuDEZ+J7atB}7*je~w|k;ncf%5L@^)qw*^5>Qk@3C!D{DdE6wNopgUM
z$Er3zEsNHpy}aP&YN*SzOOP7xNG}yJqnrgy1Df}2T^us&ADlGB_Km-IZwm$u5;V>~
zz5#mA+BKGuZe@`^%^%5OtnZzG@4*-06g@xW9CaO~9*y_p808G><av==w2hB+140@@
ztU@9=f3T?lvv1j=<J#7i@#aO+o-HVhXO?X4H|=t}zt~R;vdP(I<pv5sF=cwY3S0uh
z>ZTi3D=PfuYo)RnS82R>s5Bn=g&3}8uK^6Rojxg`!R#KjGi?)#9+Hc>W;PIG#)TQG
z^St9Q_?3LmD=Q<rIUFxX=`JwDICyt30GQ)c->PJp?2Eoa#^$^!;$)fRgmHdDX^#Sf
z>7o2jyK|-;@Ck`>-Cy`Wbn^u9L`RL;YlB?sr8*Vxng`CbaUX&2=#O<m$awT(QL@@L
zruoPy7<aJp#WmMxoqOyk_vYb8nK2N_E&DpdR`p0eDXzX36nn2xmP={Bu=)0qL<><d
zidE%6W#v`d#1fWC9D*O{vEa?sKIYr}JHDi8d|<u?p1!bp>j+`y^#~fW?2vNS`kUV>
zGVo3O_p}%>NNIstdTvruk(tpKh9P;FrT@fegguAZK0hRI))ctnaFBrwz3uUVZC$0`
zZ&foR?f%g-SV#+sd@OD37@#j*B0wQLC8xHQ7IxoiHMK1PpQ^G9vb)7_r=@ARnrO4B
zZQc-&jkFH0h&Y1sa4ZP^xpC2`e7}l@CdMJT{om@@(p!DT%uMR+rodMVq$&=sRHK%R
z6@&~&#wR?K|FEin<1SVLIWq(O7F@=odfV2!?{PMlbC)EtGaEi6CXqX(DeF-IjN)9S
z?U8|jlcJ)cc-@c8x>w959@^K-K_haE`Bhqr!gGct$6=VwZqhRRTkObY{6tapm5^p)
zQ;?uqZ*tPzbolZ|^u00Eaol)&`^1wz{lj6`&E`jlau9gnR-Rd&(al>ORp%Nqm0mEF
z=nA7yTRxfO>vl~?&1)xcGk8e8%hIg(JDr|Hl^tw>&zGdkiZ5RI&bj9K=1QVJ$i*tA
zp?w32>-^8Q=?*8&y+cBM=5UFy*aEEBfmZWzAH-PgU4$GS3BnpzuLYZz$rlTT%;D_c
z8Ogt4@VFmi{*dgtti<Z2%$3rr!{Skuc2zIF`;48B&mz9Q<}J(Zkk-}cQ5dDqXc-e$
z#)qO4KS|fn5xGG%*!^g|vi4t^Pb@#6Z}Tqa^8Ag`@cX!8inbA#ewi1x)Q1KVUMRkF
zk1h`B^LfAjUPH~j%PI9z=lcaWpx?QXwh-s(I~?oZF$Z8iz0~$(ZOn4U+V*6bm#^;?
zXY=LLdGFw4)zeIL)Pb^4b=0VL*tf48543}eUstaN8xsqy6GS5Ja%IaM>ied0uT35X
zV|yl^tks+vt)yrh=syNEJju2=SEpQkW|M!^5n>SF;-r7XG;3lca|5bn(Z6HWI(DX;
zj5s2-3LAaM81Fr@OoQ~_dY<$?NHlktZO=FZ-Y=hL^l79y>7)xjPyx4ju8K*FM?V4M
ztkkvxu;=i8v!0cr=9_^ju$&$Z2L-m~W?gb%-lPN`8HqIxz1A5ECRdy=22U%+G`VKK
zn`>i!!^Z39quPtC0jcT^FXEe)whQViM~?(2XIX1NwO3Qjc#E+p=WC;Kv_!$iPkFRs
zqAa{z%yD^i+yDuVAdVM|-~|>V0?TZ^>X}|psfnlF?bmz-f6hewG?(-1Bh<)d8K1|0
zKepO`zYGjeJ9>-oxGA6>JQoOSdXX6q_Vn?O@r{gBeVdh&bA*e0^{Pvij9YVwi1Wtr
zEFM@hbwV$q;sRdSycTQR`h_y`1>sSt`2@}2ubN&&>mAS+{;}_euMy6}xxSCMw`%b7
z?2A@PclPd!pL36EpK-A~zDD+g5LLS|bHD6hl?=Ry4Ji6n!%_3}Z7wCA<c|x2X-`DO
z@^lj5GA8O<UOSdTxANtTtb1;_I`+IWXg*oj0^8oUvy#uRXtM?{F?W66I;!?xvb1H|
zat^(GY$u{a+f@j0r<wxVEzpDjN11vros=XWd0`QRb#R~EILi~~QS}&%`%#)vK|4E~
zA+tb=U5G&<b6=CBC=mI+z+1+?eGw<lddCMiXwF76ANF+K?TO{{+rygn)+Zm*=%7Dy
zM><<WWiRsAvOoG4JO1jlrvor_8op6dNM>afbGnPyiBJnUSav10_^tOVZj4N_9^V<Q
zo4Xo8j|{f5#`Y_p&H1c&tb!tf;t~W@bK*+_wcT#tJs|88IxP32>{ndEycZV6x@8CM
z*h@pqCa|Lq>OS#au(ISgGgard8#CGL=qP~d`FQg1ZXHJk_ulkK3Sw>wTaUo0`W(FV
zf#Vk+M^qhDFUnk**9+!Ss~Z@RS%C*P*fd0_5qVVtRvWwPI2*-k--H)VOSqrcX_93V
zhx$R2SKM@JJ0@NxK%-qX%H@jYyRURQ<}B4YM@Ceh1WQizg%w2Bk1{lmp8JN}wLiw#
zH;;`2VYheIWZTi_-RmEx<)*|&3DcClp8R)8BFrf%$}E32+nwY%p4f7}SuWFLul^BB
z7@PNSv$kS7e{hxScB!W<4%Ma+?|XZ>YYacq`182cPv3;=dMS^JM+_ZxJ)>Cw9L(|@
z#DZ2mTye9M5WRxWa$8+Z9)x)6Or&>u=!v=gMqK)DcoJSpyS2K~n^WaYFv?w8uMTm#
zHlV|rXOqt-%9F<pw9CH?#7g3vD)!zs&Cqx~6l`&|pREDn4iz{OoU6uWa@XUwos=&`
zY6}GiA3~2S@~>F$3E7Sq0TX%n;}`(vN_6k^^5X6c8=iA+w;4}_X3^*0YTT7M?+@o%
zswM=bVPu*OE+unZiqBI%t_vuDqzH}0Jlzk-2JU>p^X7+biA1??$>X8w+%NMmsfCs(
z3btg029OHbvWpst;R=nW1$QHlcbga??TORktGEB0cc-Cqx$z?IxAH?y8z?&sAQF<Y
zUse5vmNf2mf!KA{S`Ng`S0%QWNndeu)(C02^ec!AV5N`PkcKJ(_dfYAfIz0v9vptt
zbzHs8)l~1cqHWJgh2X#oz`ro<R^Kh#iTmogR=Q3X^I9o-LE^G;C8^jaTjX;%rO};{
zW|VD7AE-SwZ-h_)DvvV<u{ZX_e4#azVG^@z=19kN4I`FvxDr0<XNW1b8XTQ?pDWin
zIFp-@0mpK=7lS=@c<lpfUoCIip0)YKNYBrNVw}d3JHF&S_Pa~x*2{im0+a1?*_B?n
zJlDTG-Ml5y2Nw^Gc|6yC27x=I8)b#?K1N^Pm@Fou@DDMO)$YKC!r9)|gNcJ2+|!kK
zC7;N=kq)51k(8&-tEWnYeAr$lJvqGf$oZ4ACYJ8TX7fw@{y6BQJ}yKak1>wNF$hp<
z4wUuDO*H$u@*sIb&RR=mL{Wq0O0dHU#{(o*%0e;nR(E83C+>y*+C`rKAZxbdiwE4u
zM-cCbi5fwl=d|UIaDvGT6ncq6ot9)Jo;uL7vR8}y2}puu97A@!17KwGx$_!|=&WN{
z6`$j|&N_=mv#6WXiy4`OtO^E^V-h=+DIT#!)6RTbG07|pou_TDsp+QK0V@bpoP63}
zQbQ&&9=&e~d6ZGn>!wVpM-A5$?K|`VLH9V3_}>vVNV{Da_<i`=6+RppY^7Kquv**T
zwBk`0pf=cCCw)>*Ga;G?vew^8d98$kO!-v3u%c_axgT~QYuMwgyu~cKxVP5}U>>e;
zyPFL%-Y)j*jH<#b8>*ysc-*raTS=g7va-KW?hjS&nCdtRw$qUl$q=CAz`|A6Ay--c
z$Y9|+OtI4Vc%T?%O>oY~cv+$Iyc@Su0aS^WWWDZDO?P1gOq>Mc^(jd^6a{tmnI6b-
zVNOjI(`H)-5}${>Xbo7xb;~n^zpcII<K-ek({gl!=6I>Ze9llx+yl^NKV$Si7^-z(
zioX~U3St2)SD8fwr0}Wj(4D6+gIkUDPbmj%l!Gn~bRC3HN8wCPg?=z#T<(g*4XI)X
zee0<@;K2C~7kuDo!tjguR^K7_N`H*a{(^UWq14CMeTgDUczfl<ARzhy!(??-{J5;>
z;!>7vX>dPb;i4-f*uCC3CvSdI@8U`rE=T6_yUcsYfO=PDuK8KX<&o6QDcrmpx>U*Z
zVruAyGr;3sl;JdrvN1sUNj?u5wSNCCwhsc~7(xzxhH(AolL&dyTzNQj*gXkPCY{H^
z@=0rO5IG(~pam^_VpV-v_mF-$x(!O~gEn(0qv25}Wt0T)`bYbbd_I{{xWJ=h7TVIj
zYX_lh_o%$YP-iU_|E%fx$~Vl{Mx_IBu8N?c^STd`4_1-~P;dCGI*iTfY9MW_M}?^;
zIVB|}zd%q7q6Z-j$BiVn;&40QHEn%((~f6R)lg~ZUJmk-a|=cZUp4&Iz*9(aEuNVl
zIA!j#4bFUB_b9kvZAIkJ(7oOj6iFrljAdKh0>UR|9fu6(zjJP9{>5#t-C)=>_z+y$
z4BKO7nVfI++bNP273h1s7y8s@f{RquTe6Qc$;<lY=z#U64sg#>1|{fffyO*wWSO%D
za=HDa1lG#!rF_yG+eEAfvU5O4L*DZtFC0nO>5=ADWF)B)P6@unf6KdmwxCAKN2{o_
zlhOCai)Kk7-1~WkHylxG+(@6pta+7ozMuo^d7&B2`t8b^tmX`Tz#DMwo2#qgbE@`q
zv<#gd67MBBoTBsh(2wK6lRX({caMb|i9Twr!BA+uu}QGhnuDsi7isx7sp~GIF>)^_
zc3O6LaZ1KNxb6vGIcG=Ji_eD>Xt|*}*Vd5_-<@|D#${jf7;Cz3hC?Og^jn8NBU5Vu
z2S$T3q4N3*^UqInR=lUe*1ukS4AgoA7W7q?)aUdTK(MEF7?|puFA)~L;pK+TEFG|*
z$f)5vl=E-z_`@PGsYK%ebP#@W<+O}wtXWpT;^V->$Uy8)L)!6y!-${2=WP=T{9|+f
zLChL>C0TAtV7q8ym{eY-uu3GM8Q$$MY#p?Td8%R41i&gS+{|%X_Tw)k3}_-#z^}f6
zm^F{qN#}2)qAq}W?y+HHI+F><Ymw~%vAjQj{-g&{&s{Q)ts419UE|p6CUbF8yu3_N
zmpM=&1gi`Gnm>MfyqoG<fv_fE=A=c=lts4e-ozhsuEP`J_b#N4|I^%57emcD<(0*n
zye4ayTz>qfp*zZWP3>iY*A!sMfU)#+5T7k4nwl@W=c?pKz4=_u6U~ZP14Q>h^7g&b
zDErpaO^Y9hXDonBqA}&CE9N?WIwBBLH9$!lFb37NQlF1?5&ISLex8ZEn*}=W&p;fH
zT`u42VB=+u8fz__XHYn>dMI~gwa@8mIc-4Z?O6*2i2^wHGi&<XJWhKF=1wZYmfjgY
z4WDXf+Yg9kF@3mViCKXM64&g=t8=Qm#EHl@9AQqLh?>32XoO@bYN%7>SX>NK)@K0P
z4ZTech&1mpHNychufpP(<JBM&&8GR!wAXKVFO0m*4s0d39>;1iABu#2A;V<9DkfSk
zTU=Ht_f;6G8(KiBHa!8^+>F#Nm5bYIxC!11l@7Fj2f6RqAtUv4eABoLv!y<@ed2?&
zxfRb0$&YJ|XhXM(EE$l45kzkWS+4URx?4yTM1Fu$9q2l)%pxZate1toYCb%X#37M0
z7KFWlGRlewf>47v9I9Od32?`=awJ^i4Jf^R<B^gM0J3$iLBDA6D27I|HJ~ioExtMb
zmm~)>r@##cx|lgffdu--x^(S=D~)SN`GMquBoKH!S-qrweE0x?no?F<&gq&?%5Gq*
z9(ESaHQUbFTA1`vP3YE>fk`fH3j%@>k=h%h$_{hYr<^3s?#chG21~MQ`+z^g1mn2h
z=E|!yUw$1&`0sA`W{Xp_o0TlrM6tu?N@Zba+$!IBEAYf_;@&1}e{nrblzY!v`PVGn
zl7aylR@7zV^^Q0)-p3n;4*Y{cK*b#7`zS$M5VaHq^`rNdNZez(Y4R3Sja-8yV>3WO
zG<BCpqtzGgv)|{)<6}41j&ru+j2<~>*NF0zLb%?hKXr}WkVEIU|01lk*-NnVfQ@eQ
z<SM*R2TxOJ0W?&J?B)vDKA51U@KWfg<w9h0xWme*bqlnKntjz7DgQ>K(M0dOGBCI{
z9mHu@HM+IaJMM8A?QLZ>alU13q<EP`HX8pWX}z;x=y?0?H&OUt%<f-bF3saWr2w>x
zSqwxZ6eQ|~Ek(FOG}mSNkR}m1>(EV)Y4ueZ-?64XipR0M2&HOt6?vt(`P}TiF0TO@
zFTdvv>!$=2!hxKhA($R8C&8U^yghk@AZ=34!DO|@N^8b$;+)2Ul)C)w6*HP+{dxM)
zeK$tNZa4O5azvzZj3<i(2Rk*)qf5Nrq!#?mhR4=<;(q?$E$=5h%R2gmLryk~=K5)O
z9g%3Q;~x)_m>!HQoqV|3);i8n%Kc~pI4DE%n^(IVgVu(q$Xdx{@;!Xx<rK#p)41|u
z#O}Z}@cuVSdT4Hif8oS`NI@8ly13y3Vh=>lo#!-ozxkbiwp2yTxQflx{QJf+)!9+L
zZF-|?f;n3Ew+*0UBP63A@u*-cJSIFNKyU!zrDH%oOCquzs(mD~)!Y@JG+C2R2u|w=
z8yblmVjUF9ku2Z(J8G|CgglJEk02gSvwnu;s<eaW?yJWDs%4{Vt-H%og`dI&e1AN2
zkD-YpF*h-c{sLe5d8>8=B*fTC3vebKa>7g6$6gW>KL`k4U6@El%;3FszIV#XQI|xS
zxZ;*u*SrU*CnS0|tP`|{h??cKirJ1C%%3_wJh<^#o|{fqQ=jW)7|0>28$z4YS1u#=
zUYj*%QR9!TJafIIEf=j+0%KI=!@PWFWMo>;b!mN_MS#deTjGu3kAqd&l=JW*3v`^#
zS?|%;-RS?&e@xE?Lhsns@Q>i9F&x_1wESphtF3&*Nz6;_ATNu-D$YVlCCn?!$?Z6~
zl)%RR^OSW>uEPb3zX3M)K#k!0bezV*A7$^(cw3Qa49NGAg`H9<mng)@DlCXT6_N`1
zq;wPg67O9OuS(>O3~^YQiN}Ny?ix3Dq1yoqylr&`t9v>wricH292`#=7v<d)>Y;hf
zpnMi!bS)q+^7r9Y=`k`LH<QG!j)DZo>BWdv>8Z*VQWE^y1PnJ(H8+oW5f5D$uS*MY
zqi`<~P2skgAabOkH023XUX;_UZpEddm=x!x5C!CLe}st8^uE1-VJf#QU|a%_!jP<g
z7>uX8;C)@-NI3CZVb=9{?MGL<hj5<`QO^DG<E8gIeR$(t&tUR|k>(6vtUtd{a_EwJ
zO=@A{FKiR)^~BidI{)HfMlB$qs;TC82KDt@u`FxubKYx9v%kVf!^oZW{rgb?@xA6w
zou!?h67j#IPP;lMSnrGVfsJ-d7FT`Js&#|&<g5B2wyh2wXTMqPmEY_$vgIYXzI`Kq
z$q|caLV-6<Hr5ekciFwJQ56Sk?8wfO3~iy1A_+|_1V28}ZeDr8egF%Kda+9~&krYa
zL(f}~_?c_QYWQNttSv{Q5+IW<a`pL}YN@uJ9ePb>qc0a`V<R6rOYADj*h}o*9ewA`
z>xr?7<N|Jl6DlW?Au?;p`lNZp29>5~k|rGfNAY>Z7evHFok9GGL*9g4He}!knt~r+
z{f=~1ra-Fhg@3u;@Ju0s?T4|~YkNPwkKSQ%F-wQjig`x5zV}sO%n!^Z!=%I)>X%oJ
z2j4yY23#a|ubvFruN|5wZHGR>ywv@|ke5tMNn93j?d`T{ndl%Y568$DU=7%=zuL6n
zGxRLI=Be8}HGZET=^r`@PWm|Meb|!jWy^X`L9`z<3>iYyPO6A-=kxldo_tscRb30z
zoVv$OT=!{Rem_xwE|l7J>Qz}QH#0MuoX?%kBmN`y|96ZCQ1*9AIU{j1*kIzADux(O
zA--mVtarLNt}s^D%^{w+hhGS1rue*wGkf`UNZVTGip$6+&-hwiqvP9r^(Cg0g>H#*
z9H)1F{^mYa`dky6s4Pbeuw}Mg*Ac{5i8wt3Ji9G3ZyZk8Qb*K35VbH1$wa#Br(<&5
zH~8EfmG39Ad6s^*mUF7%|DGm==jRH&&{qEDrZCY@;L?3xx*V;sL%$LgnX2{6*)=l#
zxVZcsYL=kheTz$9#c(`&gTXl9{Z<!sqd8ZbHgt~uEIrLyC$5u=xeeYm9h<X35#R=5
z0B+A||Jyv~zH7{?Rt81pm3bKfAO(*QXIm+Q_YI+BTp$qwa5(ui%^$hzC~9kE29GBW
zV3Ey4xE<cCK)>vf0XZI5fge+;tjl>oIZ={ic?taX{0<{)K7A_1z+JqT>JBM3I!4bX
z>hrM3^ZL>Lb5P3xQC{r?!$<nFR<8wEbp5EV;xRkT%KWGxQHPF3O`Dx9<mRWY_xx0Q
zmfB?obmnWBxfOb{m4|w4eoR*lI=AIfWUV48WGCDmI8@5^Dd4lS)`TnylXb`<Xzk#5
z%TF&2uZDhqtAAm@S)qAZSyNm-@BvS>88%Fg|475D<Vl&%QN!!@wU?FGU3cX_JkGCr
zkrrogW)QWbZQyG2SW}mqWiDjZ^(UvvxWt4^1C%R-6B}^UOq;SU|44^BzpZ~zqpxY7
z1Lqdy8v|c^z7d#Z_g*YhZuD{ffYArjDvMV6q%hm@ZbFJLOPY*ginhQzKcThbg7r58
zUOF*o{)}6D5WxXRmQgB10xyq`C-GCR8=oRL@feYTy)Trhi6_w;ojS=8J@J@E^byz+
zS${qYCyua1><?y1hm_bKB#21&Idyu{?AfayQ6Vwjdj=2lq&Y)EAewZv3uXz#v3|fm
zK(w?NlH27BX^0=tsw?*K>E?sD)<TVrb(Wt}Lb4%D`r$;XITsCxh={Nn!&=b!P?fNa
zS`1zy;U;MA$Iq&E|B>F>N1-z&Jz!i$Z0yA<&6?CKy^FykfotDo5{`Sr8~%KbzaYdS
zLK0c~6e{V#Gg_64|2{VDux5IR@s0t;pyMs)J90qVTbaMBD`HX64A+4-$(~OwhLyoI
zqhO<&kZrszX88nWeg68*zK<>6@KK=@i)5%JG|q|it;{iMbWeK#l6dNbLRizJI0qa4
zbG*|7&O!JQd97R-JjQpv9E-3$2Uh^<g|U=BH76pV!$}CKePmUUro24P{Nk#^;IHOp
zm2Z<d<%_q{FRswDE#8hU&;Xod+}WU`SsK15vCIF@XE1C^AfCbSucHpHQ+zfq09j)S
zI!P`V3+gE&5PY*h!^cIP`rvaYuxW4oEF$1-{CVsXmrVQXm!=9TdQDaiIlpc(^D6=*
z8YXTAtzFct_O^6H_$qkNyT_C&KlpHD!exCt<($&4z7WZv2Zij}1+n5bA+BFexMc#D
z&J*EQI$>d&7sJMF)6Mr6CevR{jX%d;w-GpywLMo3cAa^g?@s*7yz*O3<EwOTa@D~I
zZd)j3{}}_HydE_GRkbAJS8N3+;risAP>CiYTFLJ!HAG1#dS{TQtpQ%?#5#&<WozsC
zcQ|<`KI#b@eRZlr{nfTLd-Zud-`Y#IC9ddGJ{E>$bpZLd2=uV6t@7iA{T9#561$=n
zo^rcxAwe#ekpo(n?l#_|grxC_p2e5?T6M9z6!^6nf$&s*9TSc;=2H#A)__#pZff=*
zyiyF@nZ9gF|5fTN2v0wt2BgDz@gt9U?;{x%ulgD~>UuiWYIn=_wPkf159#tTNt(tx
zRY^yuDsj!dyRf{)<Q*lg&CmSzlZQ(ngIvS(_!xMKl*eB>3Yf7WeIfU=zQvFx<DAQ-
z`6mRSR0@NUJ#5jb-3bLIh%Ukgkmo=Qbul!^`fBe^J+R2~|7fQgr17_|2UQZ!e2Q1I
zsSwq^u}S#WOHrs`!kJiIQFp$u%t?d$7z4<qWu+bEG}(P1WIRE!;dRXRkZAk0;~+cp
z%YEetSP@VFg)wmDRaO}%D)McWM3NPDj0ZZ|GsJ8k*S%;yp9DSK!<})`r#gL*I!{cB
zx+g{oh--2*Z=S4BrEMQ5&f@a)F;f0GMQOp&P6P@NMuW?*4h|N5yKFjW!7`G0Jhn>!
zE<63&_klDw>qmv4#WF<IA;g7uv;{4hkx}QXd;VpSWx*V^aYI}vO<<hmD{|NXNY$BF
zUXar4?~DcUAiYu{hSw@N@o=~tchWQp65I>&NF`4<I-z?J#&k3?farMzNA=wUIv@vb
zvncs$<-q{kg<{R3PXdNgT{M7Ocx3A(4~YeJs!0U#hoC{NA7`VKW}(E1rx1GUeZEs%
zs<PM_*xK5C>2w&3-(1*i6SVrF<DW3Wktz#`{J>*B6TL6b;{qs{M=suq`Yl<Us0$qV
z@2^u;+?V6g-7BhWBKW8nSGi8(5ss85=y>V;0|8FatmKu0)Bz#PqH4R-XX3Nwq3o%?
zJpSlbBQvtxv#B{lveAGyIKF6*z%`<*L*BtC5mA0NEq-dnP2M@Z|78+KK5z?Cvcx>}
z$U9|^0(b-{Byv#2zr&Aqe!pvgKQs3D36I(qp_~_j|GWkei>&Dm5&*~TY%IllG}p6h
z8E(6Le(#m~;?TRsSAxS!fro>+iMjd6_|gS)z@JZ6I(ZPQZR08uu7maMMFbTn9iWsD
zm7w6oBes1z8x^uYJKNbj4;I^UdXZRgh0Y|$S640E$y{J>fCuGe%{*J)Bb`_HlnWrG
zLtuH6&97NlhPM(fdtpW|upU(IVac2{(qZWBGPySDCDp3d8R@d|Yk+QukMoye?r;V_
zFTt5f5iyq;BSWWXySORMAfMA?-F}imadm1BYsjheud~b~(k&R^F)XS@#0m1gN{R~>
zl(2Z0G9PQD{QKHEB2Yj6!niu1an&9Wgw04#kKkcq+LvL94*IX0sX&wV`w?I0*}_d(
zj-bmwYq^40R)2^{elWOO8L-S3xLY~qrA+=FRaexA<01Q?GAgj0-ri9=5Xw|+tRrNi
zVPoH+#Oi+TR=WA=SZPFHYKE~_#)@6CyR`Pi!QjoP)Um;<J;jz)TbqBKl((g?rmpVK
z8!KHu^QcO#8pRZe8e^vN@?4dq09Dg;L)LPr9ba6(`0=onS9i!>&IvaYjj}Dd?s6*Y
zhg@C(N~bR}1kZO)&AGg_rm1`Wn9@znIJY2?Oa)1fJ$(p12&v-;iF=JC<Rz;XGSGI^
z0v_=&>+&D)sVHQ_8J4Z-6pVj8?{bw`xcx7r^{78)cRoMN634>>$-+FtL=fqC!eXS;
z50ZP>(@FYuMER{8b+D<52}7Z8-SQ?m+oB<~mmki)?5s2eOw{7Tz9_*G$4d~mq@S?H
z;duajh5M%DJU-Ya%h+klVE|rwmLgNFh$+EN6EArcI0-UyQlygemYv;-V!Bed8ueN~
z1~k0>3C~y=4R-1f=txRa#Lv88Srhg;9Y=r1t-sI)()nRVbnq3Oe_vH-!&s?l5@j&C
zk3yeGWNSlFz6~JU?ZS_H_aMi$!7A7zcx0z>zOAjT=K&j{faN`-Mo-#ThZC1v-AjMQ
z$S7<5E5zFGZN3SERdhdC;_{fv3gUntXQG29$hTSKthSROAv1hY@Vz6nAYp=+lCgL=
zotz(en4_j7Z$)&`cUA0}5pFv`$;r>P&ibhA>&>*@0Gwo{zh;*ZO@2}FN8=R(z`ijn
zU5#tPN8-8;#3q)_9NL$i2OBEn9g#q42BNeN%XiN>S2$kJ_Df_}n!eqERi&;&no>pp
znL#3-`Y2N&b3Af)DOD3?j{1$=D2S;qQph8N4ELgmUgW3jO@CZVoP?6Gh=^su>~9zI
zeYM&OHRH!cY3jL_4^}!?JODiGE)dmnh5~7r4>y2?{$P*Br|9yeeI`wgS3XPJ4vm_d
zm9?FgGzFJbR5`;w;Ro40UgH&0nY^J)jwlYgw|~;JPX2hg2hu9b?zO|MhXHflQg~}A
z*@E4cRxf@ycqzEo2QIu5rIq}o!LWv*io;Erg>JwTCS?aRBqWaH>@*;hN3MHT(8-j2
zQZZC{FEF*{^qW$6-a4_4*nl*8NAC4rOob%cic!syTz1Eh4C^vVgCv#rLV|O2NhYg#
zECQ4z$kAMP2^5zGCN7QSt3`4R=PAc?##+;MmjmDDYj&^jzy9Z;Lc{=zYFgLvvVXU8
zV-{xt9H%lHR2{;yAoBU-Ye35apEC*15AlrIqv~C{4xQuW($2$e?)${Qv+JGW1U<Ai
zUmKgDmT+nqN`DXK_b9VN;z8Q(acz>bL&Rju2qgenpOY;+du&o*jsV(934N11t$zJX
z)>clnN{Sw7E%$8j>HD4Ee_hc&*IL=|(~3StO6(Ke6L?|gz;A25Q+~PK;a?R?Di>u0
zC{!Rm|JVls8ITDM=MeLLgn!-8$RgICjWh{gj9ek|LADnf5ZFh`t|;FVp?uC*wVStY
zb>0W$+CR*>#>n8adJ(eKxTP*D^&y5%Y~ebGU1!XEgf=(xJZo8ETpoRo@_7LA{@YWK
zI6mIOdp@qdo%7$WMpocHvd9e|_YmxCm9uvatPo=^hu%t+3zSB|3v%AoEXg?I+{HYX
zV&)!8P)mrMhnKcPt>s5iM^n7a<=UPU<Y2qWbnu0?0Hzb6XY^ZY){8U5!TF)ClT^z4
z%-?ori-%M#-nKrJ`ZY7oA1y%nrMSR>#}uiS5vR9%_tPPie-bE^ybi8M!{#-jCFefx
z-Hz^|0bn)eX%FzIzSO=v){7`cdMVC()<Jd8ojQ4AsZwzea~!K-1BFcBw%JbBC%eKS
zDzhL_JR)H0slwmSMrV*kZeBaZFjNxMJsaNyN6<)q{5seCRrA&73K|t3+CTT7C3;Hu
zxZAj{FTr*h69HY;o_%W@3Gep0QGUX>=6&GDKHo3ev&<(|@u#QhJu<L7@5OifA+1T;
z#UH$nqF@=6FV4KR|4~dA4_T><))TV6hY`8XIY&2-FO@Re=?x<q$GH9ze~(sd>+Uw$
zlC4?u>v0RvW=8MJZD)eei19Ubz~apUILjYoy=b(+587i-njpDo2Ur_b2=d}8?c~eQ
z8}r9$!$^01T%S{4E`aRX016_`GjN=d)|%XP+-cbGRQD3zBThPS*>4kUw1iX$!`Z$8
zr7>TuhMYaWl|FlG2Txl=+%uJqVHRi{s8?cJi9-?=VkI|zTSu&_!OO7FE{UvRp$S$W
zlB7S6^k0(jfV9a;<oW_75BGhR?0OG#UGA!$UiQnMU#!15VnHkh<aQL5uXds``KNXB
zD8x`~Rfu()HR3EoEjFHh)oeV-PdK6}?BxER&JxwR!T?=+Li{N#Io@;)|12caevXm7
z?$>z|n|B!vv;2)9mV%XUcY<)H{sx4SdOjPoF8GW0=GAY(&2wyv^WB^oq$bRH1s{me
zaz-wK$jJvENc;6sii4)^iIb|XR%DocCr6TB9SexaLdiJ0KR-a=%a@m(p9yz-^T`St
zX7tQe?`mNXY@SSL^n%*od+BhSxe#gkB)yMm>vc}IZ@6fx<$$}tZ^)tka@=}#+`_25
zr_GQXQa9}69iHCel60C4OScD>O@Jamxo1Gmg7R7kP-J&m@O+7^5vxE9xds%m$gNg0
zK9_0e?k*RSCUbVpyyzQ0=PvQ?95N34Jh6;sb@I~`UVb5fs(G$_i>|;8oNbQx`cEsZ
z{RAyG%#r10+G_8XC#93X%^{Vph$os0D%9GV{dP789u5p2b4N^lu2=4syui05%+h>B
z_>_j>4c_l2{aJY_h%mG}<$%T|#OgLic}o~D_2#$0#k!bLW%yBN=D$mNM`($Og{)mc
zE`t)^$&8XvymPMaCHNXmejpEER;cJ5k)9Ii^BgkH>LZ!07V{bOJ1p(0Ir?M&_>I6T
zpTohtr%gvhIA@vkRr=LfVj?5rY+y{r@0+D0Uc5JlSmm5b8DB&}eV-pXVv&lN3;6h|
z9x6-6#)lg<CkxDF!;GN<^Vit&yyo7QGVGb!<b6Qw4MJ`wh?wJpqFvUWDgnzQPUz;r
zR8Mn(TEeuz{NK>SyG8w~uZ{4qb@I&UxCTUFCz<7tK*2&Fr<#-z%Dv(9PCts&!|xHO
zpE_qlvJ1N0=ADwXtFyPXGtF{=o}O>h5P)%upMT1k8}+(P7Kd&GoOiHtix%5QPO4hn
zSIpNSVJ_Qkn@j1f^*^!6=iW{szRwayXYxOlN*$<tv0J}u2j%~Mc=2{Z3eU9wI{Vyi
z1#MHf@}kaHStm7~aQK1kUu8oCEGY(MKJ0Wdc1q`~?!c}O+HJ66JAWxa$tGSFU|jfD
z;w$Za?Gi&93S8-qg_8Ez@K7lDoPI7FdD3kR^#tdnrQN7&Y~(tB;sPDter7Kx_T(L-
zwX)hungSPZjU-3K<)2fWUP8@{OX7{O-JdF+6n2tZ4->tYg(>wt{awQlToSb)hC8`u
zf9Q;ujI65;Id&enM$THjSm?$QX7+ZRWAw#Jta9G&<8E!Wz2~r+dHw0i)$710^(*Qi
zjKQo{Z+P&l#@}BZwY4wVVZq5d6xzQva-j8eZkutTPoWn~IJC9MD3yMs0$VC}TqIbl
zs`PTfxZ<O0{l6lKu-oGYw`8==WggQN+nL>~upyiEQs@|sg9201o^#e3H2oC$DQ3QN
z!=S))py_dIcXi+Fqvz%fA!##BSnvq=gMS#_c(3=hHI<aeB%01HW6V%|Y`I;x^%;$_
zi5N*dCfQjI4{pJhtFJB>Ui$?`A%Gr)jepJoH;uqAqqU1Zfh{bUinNkJWCdRKO}r>g
zh@AR=>`T5{0Iv%L&3&FOxV>vGF%I(%A^>K2X1DL4Q7lAd1*$~x?M9C2Q8s}(&kz4T
zxAB`K%ht2)?9Ng~D#Mo!W1%X)LUuE(lilDT_n$>k)H=jh@Wa>Q!FO9-z9_H>5H`)f
z-3$J#&KdTg|Dogtn&;LpK>n6q&v+M7i~fPpW4cn6g6cQSJ;%P0*P(c4R}|rK-dgCo
zY5qZksB_JoA-hfy!ghL&pZR^A5f{RpmuOx}^RP78W_VcvW-Ku58$2w=5&9XjF+$^_
z8H0Z9+-<0M_lYplWe^*Yk%HfOMyz;&It&||L`tzWg3}li&J1AP5K94Bx{3jDM$4N(
z6V?5XMQWywR3RR!nf?E<u-<jVI$PNM2HFeX5ZsJ52l@8^Ata%>;=|MLJ~w2IELMU5
zbB&L$j0XX3Y2-naj_0a8X&5?B;-1TN2F5zW2>$DD68U7~X7D{vOg-o;kBnhN#~x6A
zHJ20VFa^YXPDT(fJ0}D2Nsx`~japsN#IH_k^aV090MuqA*^c#3M)&7ujbX=7kzy6B
z0pe9_GzX_3?{A2zR4!s=S1fMLT-ulG4Ww}5rBx&6r7XeE2p*&1{{O6HNJXLdK#B&*
z*!T}C6P=pp0v|PA{SwEmhyEb-=^g|>=-0N?x#8GVbxc=esXu{2=Q=mHrjt#xE|UG1
zY+H{-k1sH?=Ftl!97>4?Mh=AnI%X#E=EGBytF%|drt{PwvYPT}W5u;v1PU>jJk$p%
zz02wx>@ABo_N8Jwo2T;5GjB8UAy=E%y|Ig_6jcGrd<n(1X13jIdRHHRb})2gOll8@
zoCSl(e@u;4WxMBTH~ZVVy1LqW`%(pa_P~!9?k1~T&BruhTI~trAEJ|-b(p!Br*G8K
zw)xn$I}<MYia!fUGAG8iJlWHAElr*|Og1N6ut)R3&y)kVpZQZXDckYMz{sxYw)J6%
zSzKj%#eE1gKUz3`kS{WA&T6=}el=@+Ceb9gpDwdw>sZ%J0^2kQ8wNqsE>>#EtG*En
zG|#xHyQPcI`#gN4$@SbwsI50X1QzjlA;^ITEqQb)|LBXe7uJ9gIz+RKw@TNv6kuOF
zc!(dk5|PE%hW-0dD$#h$`on;K-Qb5?p<7yH-R#>gLF@ZkVIh`d(vv!0M~;%Yf!_yv
z&6rNuIDn_3lKU^SGfgYtV)4$xeHgV#hjtyQ`~Hd>AT||*-;KVAFVY1MCNBR?)cz-3
zr9!?^yIuYb`G@4ZroAcerZotDDI7$hBH>f`&!2B^l*zn~6`}Fb7b*AXN&+z?T?1zt
zE6O`Q^pK5S33Fs+j@yY=_>s#6pV?xoBL2mrVMX0pkVc~maB>W_N-dU#W|xTAO+rKk
ztRGvett=3A(v&dyJe;?WyQDRvqaxk@6!pJ<EF)5<;`bn}NF;%u-67ZUgQjXx_7j_D
ziP0Xn16y@p2R52<0g1CBF?SO?1jAEBD4AU1XEZmZA1AOXhPGE>^eaXSZLTQyu*~Gf
zHQS_!>=w=@_twyLgRX14n`<xB_a2()y+hTjtx0p=|ANR;`}K#m{tjHYMN(8tapQRX
z(U;I1oTFT-kY<JgissSN;|Hw&Wzm?O0-icOMtOb^D>39a`#RVx(ScQa;cC%vM=XMm
z?#t&QBD*o+2{}-8_9ro>kCLAT5hq^GH>7%<It_|oM&tvE=jeJv1058<3?}W(le;p=
z6MXKTQ|!O;nmb-c+mEa&mwR>j)68i^kK)nuv{wa}+_h!=89LJ?X=7HD0Lnxv&yg1%
z(21OxSyBD&0EIM*buEVNlsilzJcVKi)Ets)7f0EGl3o3$Z6V?7UE)z|l)o}Z5RC+h
zm|yV^-Fdz!ro0j2_Sw&<tF+k~cWE|JdRjz#ok1hW-(WtxOGNm#m(Qz`{xy^45(S>0
z;3gj8kT{RQhcuBHH{y+Z{VXZPLa({6C6SzX8)!276t(9s7HRa9EZY3$%9VJ@K`o)|
zG>HECB)RC<Kr#RBmv-7YpRrf_s9-xrl``L-mo~!g?N~}Z2x)A)u_Zb%uD6z2QeJj9
z#O+W+TXS4*3y`;zd2>YhX0w;it*|LClP?YxFvTP*)x)ue>H9y$_WkiNnZ7W9^Z?~N
ztb!_2Q|V#%nFQjI(hDr24uudf&iO>CNUFC_-VMfkFKjj;NT-Zy0jL9a1a8KWf@`q$
zYPgWZqqzDG+0dYfo11RE=~depftT`fXNjRXEKvXKe0ExgP$f~(;4<|Td(z9=*W9Dv
z;`Pau)KWoSxi^sn(jlf#5bCiXe(*6^T#T2Zs1l%ce{SKnDM(9Z?s{k;&Vr--iWH@f
z?}}m3HbAQROpoRj=)=tk?ZpS|rq!{nBMzw_pB86`=dlH_@w|iWYd;(>7;APb$-Fx)
z%@FI$`Q_)W?w>NhZg&~4JZ{|~U?1N#cy!yf>g^%+K=JY;;HXM(#HULw57@zaFnIiW
z!*Wd_|4uHdP+R|Av0(mg-J1TyFhR<gskJd;*Qw;{4aOg9CaIPCvz3iz1I}oh5)TFx
zEE`txAYG~%vJUD2axN4TCqw~@oB(?)A|3^<-uV@Z$875cU#1oZ#Zqh^P<|Pn^NqY$
z&nTf2G$5Ncb|WKBXM6aFEf4~Q5WE?+zo3Zt{d;vQJ$j0@d%hgteQDX5ZZsXxC>)LE
z{+!`i(f&);!c)qF+XwO4kwqqmxWx@T)(IyUNt16*6IOVC)IRZ8io&2FF1{=2BM0d%
z7TrRgaUAlxpbO9%%-p5V*$p;Q1Fl~#WkN)lIhQ}gc6BCP?2^<iGPrMpEn@MB9=G}Y
zL@7<Yby<%?RuWQwg|2{w-v~Zfq$GVC8-1;BUG&I;<KcZa4NhJa;DUt^vsAjlC6QI<
zYX8$T?Gm}0;YU*U=pm7lOc?2uw=`ue4X0TZm~Dpl>RE$|+g#O^U9Hz6(z-v%?4Q4k
zhW!?6a?ASBhgkjola4B02WJYsrCaKkp~gl|_*;2OGtI={#<=ZweH6RTkQo6|@-$$z
zuEfXWKbZN(SX3${F|iDK4yE8Dq1jL<UtN&s&*~U}zrX*Mk9xBhJG`;GPlAg{U6~Uy
zd()C{g1s!>_J+$0?+kx*oF|Pi+rk=nhgg?K4w$?s7esWs9`F%HlwUiX$x{mKUnI1G
z2?5i1440XMN070tCU6f{k+<I)3o&gnxC9s%pFUJ0Jt^ZoyApPq%M}7$^Dz-p?!gTl
z60TcHB}9vSOR5n6f-pW5B-6J<)W6sefjG~88669<yalTcb&?4=O^kh=3wr3OtlLcI
z%!X_2&y8YNK@ndXd{A-zrheM7It+cm6+)(4x0ZW>P0;?P=YfCYouAQgP!GNj{Zlfs
zp1M<fi-BqV)$slN>9+pa_vc^w=bZDK;q$bL;lP1kVoBq={;|Xm`@`2t3c}sSD!z2@
zdWKl++jCO&1BIic3WPi#YFk)*Lj12|y6`W{<)e~@0s|D6>f`sA$TKQ6zeh|yV0Uj6
zV>fTA4e~wU^jrbh06(5e<)z(FCRLK&<<7KISMm*&ny9_tVDVHa(ThG+Thf;TB0JE<
z50HHkpkZPi<oi*w2DOu_&=C_c0w-xtJc#(hZ4~x0N82kjLXT5vR!v}$^^sr;lU9BL
z`^}0+X8mo6jH8b~l|66@kyuM4K0Oy=$;wRK+F5%0Bt5mC#ejX~bI0Qq_1Vit8;K$l
zgGYjBYqNKK!!P?&Z(ZOFUYO~nggGso+jrdSWU7Di=;A<xX{a+ni_29w?y=$)wtPtH
z9pGg>?DuhB#3y|Ijv%MAFkD^ZOYmamZMFU|z@49Kad|}w1m-Wg*FO8wG<)Q3KS<%v
z+__(7t>=|ndZse+YGGk{C^Q{$Z}@+y0qBu4wUy|9dL3hkBZ;iU(Rj)><=oqjq6*pB
z-I{>Kv@}ukpWVeNRxcld$lRZ5*_)7mv6A{n<KoFBmbmh^`>75S%XdHdeSd!ThLN-~
zNaZ~6`4Eo-pJV@5uP*33TA?bs4hTv(WtHM{`lr&I#9(_U$o*ng0UhEWGYAoWL=_@m
zKO58vSqF8&uR9k0SCG3k>T9BQu`_ARuv<q|bWk*p#^+$_;UPQU1xm`b%p20;RoPcX
zSm4(`NJ75=bz{X>8Jt{hzTcLvRbY8uQSl%~Z~eJ+O7SPr49p};=7HHr*>O!mS$@Ub
zuEmCCW7^iW6RWXR{ov40SIZUOhOb^B%YJB^^!%_W)+4daa7%DN=#4E|y!qCHBS$kl
zOcW5n5Zy;v`UKHPy^#H!+@#b|my<=zXinRzIGj<%@ZjS;$0-g?mV`fb)RIZ_aa0eI
zI0YdVosqk}i3q3*BQ3F`gVxTUCY}|@)iVjGzi~U(&C@C3R25BQ!X;2G#h?lDxDf4N
z+RFm7`4E=}iE81ylkf8(2EYP5ts4&_`~Fw$$#l5%cS9U`7CXH(7erQ-spEHD#Ely~
zz9op(B)@Tc*K&HR)4k*qS4c^Msg0h7`s!=#HenFY%X=#pD!+$LKL-ouZHZLXs3%7U
z%M1h|{6B{;x;z6VMuqPQEQUV2&Nc$i)luuxxj5xe)q33K4A}&8tq4*+8Qd!$XRBM~
zF(I)R&Xhv*bc#j0gdI2Ve}IgMDgf8NQbAktliYxmD$i!4pt!38t?b#;be=E&DqlUE
z*68ZZIO+IajRuP|Yl!76+)izdd3X73LYJR|Zzz<rB=}!#0Ang-wTsG8@y*DmLQd<g
zQHZ>m#*@EgQETF45O3&WgFl+bld)~5VolK^iM31n&vQUuzR})8!Yy;!1Fn}6gUn?r
zafa<Zbd>Q@G4;7MV{_69H2<V0ZojgJ*L}_s(ncb09`0jEv1}t$ayF~h)q0r4CExNz
z%yeu=X-!X9@!!gW<F72GPc=$4-xVB-gqRW!u7r6$U`ad>1QkqV-(?X2SY^<~8~K56
z+=UOye=^>kbr0@-bpP6^3F^WK<47ocFoYSEQ!LC@_2ZC#L8IHYc)ITjSxv10VrTwH
zi@vYUN#B@}d1(0L(0Z**Mrdch%zT)!gPQdS5i?%V-{DJ@9NdPVf;=oVAYlbh;ljKC
z2|V-x<&LOz3gvn2<3A{fcOMAtC_+_w<#t%F$>QN-^DWqsABX8E=(cX%V|oL!U@wFa
z;7jU{O^zTRVaO*zv>pla^acLz+=MAPR$gkS=<opnn}JaIdG&ufkZ*yVL+U_lICptE
z4mlQU8vD$PzO#srBUI{E^!*5K?ZD?6wW+o9vGUIUqv@-ontcDi2Pmkd2+}3;0R-ui
z8mNSTq|&L<Eu8}qkZur((O8spj_z)07~PB*gR%R${r=8#_7{KboN=G)e#fh>f5JS3
z=YipbnE#N8Gq4XnMJC+e>1!dD0Ca<8=tAp%mVL<-Kq2|Lr(m3<j?rO`$m2hG@%?ZL
zDn;x9P4+_5c&VtDc9=-}@Yx5oUmE#y>FU`Dt9Ee{UZrooaGSkb3&oE~rZg$G-K1kH
zKFiJF`F=MbYkLMY^rMc8EZ3NAhMakUIMft}GN8y7V@}g?N|fP;)t#jFZu}+E+*!f8
zL^gbr++d#~{=TL6=oW`#jL0uGRH5ID4}6=-=XNup#1!dT%uI|!6@EtjNuD}^+X%Cp
zuNv+d)4$zD@s7$I{r`0l7o82n4a^7EI%XEQ^1=)wG1n%@r5+u+EOl7Nei?8!rqtmB
z@wR{Xcara(-aZzo5=7|u(V2*VUJMU}#MEMDdoQK)_OxGo{t}K0EyP{YN#Q0jJ(d6n
zet3Vq=}{!N23Wke2jKc&p=ar_x<5!7Bu<6JCF|btP}wQ82`#eOQXi^!zJ_ThtSfW*
zvjEb3hsB!TH0fVH_h}r@aShel{CPqeL$P<{#G3uv`1GS@zn&>!z=pEQA1kiM%*F<U
zgK1o4A6lO|$pt??tXz$c)?D#<!+h&@ivO`1+D|>L_<q43PH{cKtCyLrr}FfISpXm2
z)H`<}<^;qy3FUm10-lHJ2F2@@0<90=lH_}TR;R>Acf>&L&c6>)9|5|<M-Y={0h;2|
zup=Y<fon`?S=8O_D+@MmkTYI=2&u}Lzgt!A`;c>dG=_|@v$A@0SaE6IY@~+joTjQx
zGZ^f&|9!H$`MbhiAFd9S9G&9uUSSFB=q66X=1bwaZg8vQQoxZfx-8H?aCN1}jq$gu
zmFt9Zqi(rqm!k7J9Co7c?Mq<7^v_1Jt(50|SgS&9;-m$+ak@yJ!nPMFK>M+_^u<%?
zP?fT1+KC;(7wk;5-dFQhTmEb#afc!W{$<9<RvznYD{y@9`S3YNIfF)#IK@n6k%JT4
zsDr!em!Ru>@@&CnVzmnR#&-C=Yb(&6ga5<O6v8&-%W3@SlUjm+FnCehepCCXFs7_0
zW!xM^emh{p0fcdm+ScI{VxSF&2XH$jlEs-z-S=GUjBt6)I@h;-n{)8Ei1!S3d2GR=
zhyQuSYr$11ZJ+N^+D=$-diLB0f#h;LNXMIPdKGx~2}NRm??;IPcqyjCi*c-Txw=}^
z7<J_G7id1ZuGWa#d<bnzc`7;`+!C!HJ#Ck*-C6n*cJH=6*JHcO+}OEsRmm@*dqcmy
z|7fpD%kc+LM7xFZA<OvLOSqq?s}`4O<hlaY>7NZ}AE_6viyX9g2TTrDUsmPnzO7^R
z*|Z#E;iT$yqJB+Y?~r*fshu?0&7w)@Tc&gDx~|1EntopGzntO3cu_O@Yg5d|h<exC
zWMO4?M0c&2{%hPuV;^E&sc~mp4OidT2jv2nT=1y;!N!5ht5@@p9`t5XM<cXPu!)+u
z>~81jpW|<>HK)r}o*tCelJ-%hw+hg)*%_@7ZSfWakTBEZW`+7-4=msKag=J}F%3EP
zg}8Cc%i?d{_lHI?J!ta4%U{sFg$1Q%5QBJ;L2oeqd{eQ;>3lM}_`~2Ct%`!7-*(>t
z+27Z3H4~K#>m@z$ZiN|QiID=KP&(^21{Z?D%^4co&6D%c37p+Xga6O&U<Tds%<wl*
z<8+OHjl+d?Yi12!BC8xBg%Q%2wQC3^f>-uxYJ;8hquEBv#5&>w#dW_IUFopM7H5wU
z{MCbQrVr0FtGS9h{RA*07XLR|nH@W!_>uhWRJaeBx1G&V(4F+2q@~sFUf{dIRW3Ph
zsto5~&lzVE#p{&T<y=P(30c#HV7uPdG)=&L$ay|8mB9%+C_odIrh<c<bc`pv%@GaL
zMK}}8cqwkgTk~$L_L6H4lLdzIsiiNfk8^eMht6SV!~;@KT*eso1@?o9({J~aPnwq>
z&tx%)ib{ng1Iq&)CMp{nQ_inKV0+v3Dwn8NPhfh5dODeN<>UUZ3Ma<cLrVF1N1)8`
z;S2{_$rP_Yy#3yG7`-^1Fm4Tj;nnMus-g*T)jZ_7<v5*V6W?aT4<yfCCO=7%;lj7n
z!S}BpL`olJo2<XRW}{08SKkpq)p7f89Y*@|?&-!BtGzeaG|v7@!+QAdY*^BQ(#Z+1
z^DnPM!$Cv)avdhzkO6Z97S@;;QscW6nMi9%8rIL9r;xmZ#cHyrHt%9}@n$_C9gjhJ
zCYL<d^@J&rT~uiDq*zC#4|;$u#tTZ~)QFwy{#}o51}2t!hM{3>9r=EnG3Gv7yv|&<
z5d6t_pNk~GJ%m4oE@gD`UE@0R^1O9zC`Mebfl6HrH(u9QCF93O9i!VR#B94g#>Gz;
z{x{xj(D{{K5lymLmU9awxduUyG2w2|7I8|7$fthmqn`cx(x#0+FP(UMrBlN4%R1A6
z?}0wLhrLlt9Xmhi7iq$H#qU*Fbne+O=XynooBJz^M~3h}vovF!k<YTSbuVg82o==t
zC!~A1;qZwDGkj_|oz~Fw|JAu(pg32E3G;Uy-UxCNN(^+I{Vum5ixffgc)ATGRd{t>
z4-cIDyMu$gFFh~A!tp6b4$PX{8A$GnLCn8D7iX?&`XY;jPTpCS8pa~|#op8{voHww
z_h>2aVs+TRlcp@_hj{a+<$1(dIT_b$v8U8IU<)a^sDFLon7DgSnn$kG<zwbqDK%wz
zSc~&!LFr^&?sQ=qN!a1DFqN>oxo-@s{drT^xusi{1;e*a)<xX1G1~#!ldTinF@!_T
zrj~1OaUWm}vFq+G`Hzd0I+I$ST@Y!)wLa!86s#)1(w~UeNgVy$Et2*aj7(D$63_OY
zV-i-jT-@-F$I~8G+wl)+XZ=Md#WwVWR65Sx=OpF%Gcvu1o^@!?dlVe`zrF;L)#M;n
z0jwkFhGfo4(b(OQwa&lb-h$_7o2l6dvdbaAARw-9>^N}Xj16~MeODt{f*()!PT-|}
zmjKnV!w$t$&b;p1W-gQWt#4ePQ7*lW%z|ZN%p&rW|8?$(VXU~x7N7^Kp?eyr`NYa@
z-|_{oHAAN6cmFf!Zr5|XJ5Uum)j3AvU>7}O+D)Ei;UnW6^5W1Q_L#f|v-F3W{>Js#
zl4Jkll~?k<@yp5Cl29QH5A~Pugmdi^{IiYsQ11!$f#|Kms4)E!=1G($?cm!b<NIa_
zhN<dK9}?JBd2CB&(ps#!yc00-F_Fg00YZ=5O?ySWEXs^!S)TJTHKi0%=lsW07`*w*
zB250{7z93+u*?w$YQ&mI`=DvI>b|-d#7E4|vERxHDACBClVJtE6%X|T8>(9!Qqu_*
zU$JQL=T^F{!y3W==vygMJ#|rdAMhBntdF>=H@oVp4O|CHOP~ln#%*gDGw+O$xiho2
zmS<*Yh$^``F!EbKlH!S6$9Z3_h6ivsTiNE2UPC~*jB+*p{?eC~aahYT3puu<P+8(p
zgddK@Z@<FDcPB|hcE@D|W_kfdkCKc%x9B|Mnhqo=S~^y}J!@?|dC4}JY)_~0am*k6
zWJP-?d6Fqg#`DndN|ngzwr%_*b+nl7y{j=BRUCw}?4xeZKzIQK=h)Nn%#MV%#D3v*
z`DY(XJMI$P|6}oJ?-nv=YhE+(^-mUQjiMiZLh6toXLsjj_ZAop4nqhn(`~b~2urj)
z6UWN(%LwoWXZa|aQ+MbRlU@m6IDyHC_T}NHEpg(BP$7wCGmW$6Wv;bW=L24Cw&GuU
z*>@4M^}qTM_VhpT1d9mswh2X?`1;LuBR|(Xe%uoigra+1r??r@2UT_`!>Ym{87aV9
z2PMX20tgr%<()X-F$pAYWt0JWWtrse9!!Ea2oDU)_4&w66#vaeoXG1WLcF|USE&<1
z5an%gWd+S+-{cr6zV{AU-I^#@5+ovg@zR0y`4$U9oF^m*e#s)21#9|tYx(aWB5DA~
zAl<p1i$EE{JxPzSX_3Bl^7~jz4kglNr+6O%y@D??eG=Xw!zqytU-nd%3v*rajOcj`
z>yz_4GQ}aZ0&>#DE17GB#FD?UKXg`qhCOI5HseCfP+7qnKj0yGPg{iX*S|NPzEHWJ
zOi7qpDzv!MHz0z4Tt)fR7yW3rbYd-q4KfJ%In{$}U?(MHLSR#osCFrI2E01g&}~ng
zs?wDU)4%LzrR#57u8g}T$-ChMkdwJ)N0K`STdwoz#mC6e3KlPs2@*Sl(gp&N1)rVh
zE5_PXST;@@7a(N=;mxj~HJ00*A>4p^Nqf_+LF5_rZHVsDGfLaol;=eFNJN6*kdy+h
z58ek1%>adrkgvG39d(UHW8A#_bodhHP>BzRvOdC$fjy&PqF5u2=HbIpb|LZZ1Blm>
z4=1Cn|11(jG2@C)AzaSL{zd(vV`FEATLSJb<^pndTuj2eOrnqP>~DjHwZ&KTl0e-S
z%gstI$Lt-M%T<1uzlh<LHzKc$XSx@H<P0N{NnEf#zRY&kjHz8#c1<ZiG2=x2Dn2>#
z;z#0dvdC>EFh?jFx170qA6gw22tA&g%gK;ld#ZH79pe#_#6L749ip66phM3$2T;US
zH@qndpxixIM?nOPy?l3_z84E9!+W&0Oc*96y0UgWmvzhhiR~(dURe10Lq@!c`<t$x
z?750XqvNn%!sp1+5a^-Rfs8{H)AczYG}3ac1GOw-@*qlh$0WNG?WxNwBbGa{fN?T}
z`gHSo7UAiey!3soHC&Z)M=xS>orQ?QJFC0cRU|p2aalQwMkwdBKa{jL>_Tyj``cr{
zHis2wL2nX*5#&U5uA5C9{F)aKR3o)*{F|HfAAmV2+65+q31{zLL7h1gjUZ|Od5>Te
zw#>}v#t4C`_rp#9&?)XdxMgJHU^G7u1#?Z9t+7Vej52Zg0f)9ykhz1)tg_7*>A^(X
zFqwNqVF7UxPiV7kFW}3<DnyZ;dOg70{XTF)v{#)A`{A>a3p+ol$AqxJry+N5rCX&`
z8f7)Uk+UR_uA2M}50_QY)EPV{(?su6Ill;-jSm+5`<)%}-9_*aM7N(bJ&@K>Uc7ek
zm+@QsEYFD?>wZOMn2Wc-uZOe5MM%(gNw1N~s5C?yo!;*k!u(_iHBnLwW5?|ge)@8|
zh=0IBZ42>RwU>{Kk_}V~E85dvP4vf$ZC~VJkwRCcFA(>w_r=W#tWPTPrR8z9o{iU8
zs>{#ATe41^+W9@~OztB#%dF%^o&Y`>y?5*Gqv9|#_xA|N+9B;<2}v6ch-KFgXp0hH
z#?OMPTzT-xzSLRq`R8Mweza!k14RDFlHRNaciwP-Zvfz&hTSU%OHUKW$x9ACZs@a2
z^|&426>g@?#EA&61+7CXZ|~)|@UXn9N+fW<qKB0X|I!oEihKgO7jh_(uysnpQkF3x
z{C+mc#e8<=7}JVya*U@L2n#y8?s);Yj6ky$Bb^Sm*~uUhix|$i7m7Wa?<Gb*<A1sD
z2;ou;uSb<~_vPw`z4P2vrQzppGhjdmmNM#idJq&cxQ0h4rn?-hmYL<+Zb)RcCHH^2
zk$R@KB{rGM74syB&C={RKRW&!oXzV#lqWxSv#ROMyYI9wHr!&TLiPlM9xvC&W`171
z(&T|Y7ipJ4s?aIFKs7OJAoij?h0#dT{qNld*b0_p;6J|i&d)ikBABlKYYN$&B)6c)
zu$0<Q3?M`nyi&334S2y~%kx<BEru0mW7`D62fm}Egq>}{TD5jT{Wg+;1=mhVK%gYm
zkvl5xy7A<cl0kW40EJ*U13dd+o<y^eSXIByGl2K&TU=R**;R!ZW?w2Wtn5~62%as4
z9Q^L++<~`pP80!Nb|KvArpDBkwDF-QVNfuAZ2<<vmk|eF-g(Dz;u>ozTpfE@j$5Hi
z7(nCbnv><LnZ%jv674j-(Z>u0Od4UFu9fIF(a@eB8bZ1SYkN|wDSsU4H%6?*s=Tf?
zbqxER7c2Rb?+3wWBhhr+nJhiwvq=d`CsfZAsod3gv%d-blG(Z}SPMTh@g&JnDJvav
zUv%jb|JK(>ct`6=3XJ9_Z@#Q@N4}Nj<ZeMyee!Nt$U@xjsVVy1!yCn4>)gW(kP)Rn
z4BnS>`@QOEKq-6uL&>YU{DAlVCG%)L?54nM@V?~P>zVY<!kr=i^|7|c4r$oaD#tkN
zH4`Ub;yLWGL+WEZ?IkNmF|2cEi|10ne1eqTeT%A9l9-Oj#zj?L_!~J7e>ur(Ts<hz
z8E^p|4wj6KN`Ny0n}J(?ih@F61I0(MZE=3wFBq<EWR<lQi*0h20!Yd5J2!A_B-zCO
z+>_x9jg-p?tKOwF>K8Fms2WOh#JfLJ?`D&ZcYh+jvwlbNO9I|=P)}Tit1Ux%xh}O7
zur-|J#;KlZv-EL9qj~fADUX?5R$B2^&NS`wIITSE@Gz3>+@qV-E2U!(r5wie16B+R
zvuhZg&r0C7UD4{sqC%ZOp!5d{%PWKX8bXOh9cNWn58c%BM6-lSHFeG-d+;1Qi%;jb
z%1rrv-`_G5<t?z7^yoL{aO5CwdwU{wKnKYbwtb^zoHS8g@9T<OM-a~puRH&)&GO&p
zH6>2(M{iiy$M_9r)R$JqRrcl*s8n8xo8hh{$wOA3B&Li&B(z4C4^T%)u^6W*IW(j0
z$L-5^&FyUvUi5u33X#*?L9a^eh^ejzrfe$V(K=Zg(0ACV`mZp67J@XR2s`NvoVB2n
z0d9UKYDD^U_8O=yyMe)2YVklq3JFRdDWS1D(m~UnEIpMC-6wNSuR=Mx2_Y9?K!`cZ
zCokMiGEDmM=WG!44_sv7yD>8}BiFmBCV-UJ)QI`s>;Zd~{Jtk%unI>7r=$wC8P-S&
z8I@ojz0@#n8wbw_Fg+?qNRn}fASisO7G59BPx`3T{PK%yt$VFh6Aw?$&bZEX_ryvj
zb4;3RM@@l8BX*!8swn49M>k`fpLRe(*s_a%(&C=5rhq7d-WTRoG&Rywo;!_+nIiBX
zv!E@KA+7mG*e-iqh_CIt+*J8~yCDtl?;?YFp-zWWfng1F6E&vzx{-X%BJcVo&NT(%
z#By9Wm+xiHy7|J0UoqFTiU6No+Sy!vbhli10}&l(bAGColpq~H>&P)jibBks!?vvV
zP-dzfR_m~Fh+O*gj`<bK73Li2jO#f?0JvrB8o8|eJl=5TGo;ZxBl&F-_IdCB<VaR*
zDIlEav)~}A`)T}OWTGS9to3LXY>wu0B`_<|DFdwctp31V%3b3sPM5_bB(9ei{En7o
zuJb0GFW>DQ)ci@H7b5fZ*?X?}Vm;Tosw(!Qh&WHUf#O=_qv2I9@2%7c+JN@Q3ZvFs
zA8I}>O_8Kx_jMZn+X^?0EMClKODHA!6fA*vm11e4m1Y|hf$bOCJHIQ<V&^*Lb7~jc
zrf0J$H0pl(JC=-C6W*w)l2N}W&Etbk*$QM=r!Ca-k6)hS4auxukxowbF<D2fR7Wo`
zVw7r2J77;e4_&w~>yE>-uwz`FsEktK8Rz4DcTD&X65z)MwEiFf(<6+x@ZE!UNVsLq
z6aTl8t6YxMeg)G@yTzFqy$~p)K%UW^h64f-^QzNzXy1>VW*-dEY&vSB0(J(74Ip-4
zvmRH=qP5N2oqH{pVYRrJM%;lOZn$#$GeO1yE5x|jcWN~qLJQ$MR?n7>$by+)1h5ty
zz`p8glKK&nkzC*NhXe)vVbK7*B(%R|1!kW2I7^&r4B-BnxnVC|(M#mmE4>V>vu}p)
zpY3fsjAJIQ!e^TrZ~~a0-(=a}d4G`p_$KgGnC6zV81kKPy#I8Xm$TA!*2DsxPuAEt
zLclpyf^&p&%IbwA`s;GD{~-O*X9_28D+T)CfYstCw{qM=p|P}Zj<Sj2{=r13@54y1
zd+X@f!}EBc+!^-%sr#tdoY}%gNr$`eHL(L7@Lh;@{G^Uy12(R<Q^mFSr4JTM{#<5H
z_pb2+p4)X;FCHbA!q(@89OVzzVZJ0=<Ynwq@;K3%Yps8;_zV&c>`TQHqD8>9?KzCt
zn4}jr|6jnlIN~gADSRY<jO+I;eFKBL>?`29dwDr5bwq+k#v*IHKqkk7H`}^&UfLVR
zL`W2N6ZU4)e-o7Efltg2a0N8yl_eNocUl6MUI%m4MULji^BCu)QylDU$ExZqWa6b*
zbp%)R-M_~v`PW8DeMT&`a{(WJuA7$akY?C^*I2IK+lnx*aS|D%fDVM)zB^=^(hD;=
zXk5N<-!RBzyGrW!oo*<nGiSWY{6fbKfrzh*^ZOa4z@V{2=haN*Aa|Z)jc&ZTGU@qc
z^iziXM)*3(d!7k}Fxd8$8p0hq#=z1BphD1##S6-xNe!}|GxyhmTHjgDx5gv^C0z-s
zM0<C}jU-)o90-d5FVIV+r<U;--~0-8U0%_0C5*uS^*g>OIRS@Je<_NUkt`{tnS#$|
z#$YQyqjI~9@kD?QVy8@bo4TFyr;zJAEeqEypPABbkJ;ve`n$g{X@#@3uBf`?J2#?r
zTQF(n@L`~F8TiFAk_AXS7@yE5um105V8NX|LM9m8YYYTrYOA$&*_3a$-B@pomEi}B
zh>tyG*k7OQm`MXeAgq}SSF1%sz*yXo6npq5^y1`84e!}R)Dgc~i_0iFEL}RJna=Ic
zSz*I=^SPtj@tVO%|8{!IK{D*yDtY*MC>bnTxJZz3QY^CaI^PMvSyjw$AP_G;fKQ90
z4@&90;4e*ul3RCWca*2=nZE-e`epufoFOzgNPsoymzGNqL=C4ZSp>K_{0C&gCV+sJ
z%lU<cV3w<3BA^Q%h)Y{OpQs~+=;2je_*Rye3uI?x*c&n|)rtks-jZ8M4N6W{ev*A1
z8iF@Wh<8C=gLo^tuRQ7i75xcXu<(f>E}MvhGx0dtD=t3rc1yqs<c!O(9WsE$CgOZo
z_Ba@J+^}JEbm=&2r8ad(+f9_9dQX^s@SREwl<s-JJ=i@%ovX$DdqK8>-*c?>muj9i
z85dvvH=-DC&pWec9A}r)T+?zDb83dFnVY`vUawWzm~nf#10mSbU%I*%<TD!m5NY8+
ze6VjPe0!?-DQI75@$3ZAw&DK(3pv(OirLw1C)W5A&n=?pcDt0&w%~=5USPVxj&8<i
zhC&ShEnDHHvS8Z7o`I4YpurD}$e@Q1#~Qe?8#CK17l=8?!c`GKvTN{j9GA)Psf_TB
z^-6A)6X2om7F86HG}5|G!*QR2dOD!(Xm74Nqh6VqnSbyT)OGE1Bl-pg?4wTfU}O?)
zpG!n(Cqzol75hlFGbU{sv$t#$Jd*T|Ub_1hR`}dT7P<VzHz9X`&c(d_bj}f!4j-C`
zsbc)~)OR}K=He*$Gu?u9NEsK*heGR2FYBP*)MaNGOcnIJEHh~&(bMA(JLR1&l_lES
z(*yAY>&vaphZYvA6ue9l%&E-$ybJjG^@Zh44RI_Wf|LZja%V4bOa)wKKN#t4(U8@8
zp!F|}Y~U5@?-#rAf@99#>KpmVPTu^|<M;GfF4OZ~$UvEe?0{dSR|9zZ2~Z#tk`DLT
zk^+YBm5z5EG<chfJ1sgwMQQj=NdjaR=Ax|qs~-dg$JYRVU*^U@_6hXajvS9IWgaiM
z51CQoW@$ff{X?OQ&wH%F&WNa}7;6HqTQ1M1Tk9R4D2Q5AEnSz0t=ai}(-;=V+o1ip
zlPACi3wxKl*GR^=u+TDSdPQc=a~04al(HeFlbO+5dC)474{#so9M7h0mTW?M)(z4l
zM5{O_lZDxMSsDy*)rWR1b#ZX(37g5MzlvnAI&Ob7HV_vzblafR%&f<=ACNGG)>Q;n
zr-a(C8F{64`PX(?hE?*4h41WYXeo)Y0aGJIfO7+u=1iO$c%zg`mMMTi^j#sQN&uV&
zFv8ygkORkCe3?*~>^{)*0?){AskTN&`eODyNDF|}XIdNx|L75{oCk4xKcSI&U=D57
zh3s3n``HE1|BVs`@=k#mXMgt0<G7gTQIi;Gz?C~aUFZLbjy;UHjJa>pWFsPADmJhk
z9T71$Vj4?U$sKX<Cqse`+6x0n28#?%k}_-RO)fIt$Tr<6Qu)m?yY~#Q6eUCjr2gp@
z6iT}LOz+lS%<{;#tZ-3Y<y3Fj2FngojdR$=)FhxLYXc@b>>eB9Dk$y+dH68ST^JiR
zARrENsXP4_HijSqL4$h5{6fvG#s*kHN~n{Es@Et-DVaB430Z6Kr@89XkXbyG^A(~%
z@BvtAzT)7Y;x<K#L8nX)^5-I9*}f_0$8}NQpS!4Qrg6TA^n{LzF1sp}@7a8Dq?v#G
z8ua@yKxV_g_~DrDf@3{Pz?oJ!`)9{3=%uaHO=;E*`OMAYbv8eC1?%s%ZN}_6q8Q;+
zYp0r4WO6UfTE^cv=_N~Du?4-6lk!vnex9v2F_ash-(NRv=(U}0R)Hwh2rK8r=Iuoy
zS&LVebf9-rEXHBg1MoHH_h5c``x|v2zPJ$lIh~)SLOpJ@V+>P5#~;cj!cbvs_SZj2
zXZZ;#j)~fM4$<I?SGqEetEOpR3Uqf?bQ6dhxJrxdvb+gzS1Zom5W&nWhx_7#)+7U5
zMQJ|CApZh>R8*D8N-8VQo#ZU1U#-Fi%ayJfgOX~UuM=U+OJ{&=60B}X;0F41bCV3W
zBDHjbVQG(Oyrdz-bAdpd&)cC@=sq9-y|}d|y7CI=^5<9)j(%5nQjQnY9JRE`X?jis
zu0c!W^iqI?&}`;2P*)ln0Bobr%Yd@sVg4o5dsD7=<N`a#j|2-c8ICNzk6GL^bo`ub
zG!zG~J~e)lB0VohI3nTQD@MsOlsern(R7{tdpFMPkIdt|+xa;=MbyR#X@>rpsr<!;
zIf#9KI`Yd1-S9G$cJa|?wspF=ZyIJ1>@M9*x%|xEiyGk?<hxh=AChtA+S17J*G7#K
zUk2WM@BJxuJmq(B*w3nht4JF6xPsqhN;$uvlj2_F1)fWF_^56lgSt!7N?a)&DasB7
z4aMs9_pHOv1F*W)cTU{B_p)KZA$ZvRAbb0?_7!?E!WFlJg&@1k87-W>zzu?Yg9Xg?
zO2j#zAI2^EY{3E;=1wldLm>Z*h+twdEi;a0AE>*{x_}?{Af)O!2ywHhN0gsJF8KcK
zKCH2up-DHG6xf$ycwj@@Fw_(s9v-%`%2wj0e*xPvf%@*xo1(hSoXyhg`}0DaczxIv
zaow6vzHiT<$1F%~$G-b^6P&(??9!w18T%@=t@EvYF~Ll)bFZtL-|F%W=?_D!ria@j
z{+^qr2(y_NGu+n9u^R^REKg2$on?H3ovltSmpn1+=E`l9H-SdaHSSKKOg9o-_VT-w
z(Ds&{9cn@<XTpU&Zx?!-`J2i<YN7`^++8j62VPht=*FbB_!3(1Lp`p_Oq(gBZWzFr
z45qJ^`o@w=!w$@M2eN)yTA<7#uK()=%$ui9z<pd`Mg!Ne3-SDF?iIhHU3Z&+h`>d(
zb&7|kJ;zAjHc6Mz7={tQSq>1saYn(K*AXN~zZfTP!kh<+4d+2eB<@&izZGd<Dsd->
z08+;Uxf#_Bs)15~hxMI+6RGE{wfPpFH^Z&jjDP3zZjaLVPABZvy=V8NvI!%FfjY)J
z451RI@OJ3wpzxuT?$Y)96~)?Dp$o+#KkgI8cL(WyYh7U6PSE(fBK1||TV@+c#;z@I
zgCP3B+8AL3Zu5#$o-3xJg0q@$r=sbkKgOBs^tQ#Q?SNMy!5dPpQAN7zL-#8miC6G(
z4l0dxnKYh+u@hB}sj+pBuraEUZ;BWhhFWzLalm;_sirL*JTq4gcuj65%6>~Oxf<OX
zvtX>49@iHhFoojIV5U}j$S3<uudYw-S*$~W+}cE3!2Av(S%c3RzB7pvu&;L)89YM@
z!HnB8Nd=Msrl$Zmv+43{oCQd`*f<F~Q0v<fO+QRaZA`%%y3;^#d%i8GN@(SygsL2>
z!UMy9Iz%H)Hhr=U+B7@<kv51gL~6Lc<=2ydw>dgJ1#$U)YXnZ7<XeXi;l#LPIM0tc
zmse_pK<XaRww2zF7HQ92C7l!+EZ-CSu}SC2`Me%Q0Ia=&mr}cjtHMT^h#+%gt%MAp
zK}=lS^@0Trh#z@_>8OVKk{7}TW|Q=GC2!ma_8EhIo0?Y6U{uTcSwjukOBqbMqR{)5
zNz0vXERIYM?;%!S>WiV4Wl-)?+anplB0fdYQ6jHudD7ry2<&HcFDZLIa$*NB#q`Y^
zbxUV6Z&Vr5qJ?t4TH15)0c>6It~)kf{xM4a6rY;e*IPI6LprziH`tmU)oTrG=-}je
zaX~J^M*)7q;FEyK1Wo05-&0a%Y}eh7J5jFJwR4^V)3YS+dz^(0Ki6NBN!5+4LyNDk
z24GKFKod3y8iJ`ki`;;BQmZ%eiNnePK2mz(aDj1fgyl{++1k(Gp<ZZrwQuN5!lU5*
zXW^olWhPYF3(#dE<@a();ysF7{3Y)C4BAsjgF}V~9<boL%$!-o)w3iwl>X;<YR$ka
z?|?d86ETnAbsV(TaRia$cS1+Je{gn|^Q^X(r;WbiVi@w&vOCEkZ@8|c_EbWDTHA**
zQ+x#F(+lLEi3r4wyhmV-s5Kqu%i@FHEys36E+c5+zCp--vmHUbnKHp}=?oX%wq7S@
z5xpJ$6Q7c>{qY+rL6VbAU+>@CXT#)(j~omSqPN;@_OgjPOgL4zpZu^?^$mq4@MUo_
zSK;#~Yfe!Rq?4c2J8_-$Xn1gOtN*ly{E2;dVcfN?jgb@C3u6r8eZZVOqcC$_I-xyD
zo|hGx|GhHN3YKm1kI(d5X)Q@l<wNH&3LGF^ESImY?7V`^&Zw=flvEc}0`hMs0U2CC
zV$aY3Oi#(*NJ$+OP#&hil4s6IljlWOh%l?HkZeRk*~9%bm?l%xzySo(;*llwRP{fg
zeS|LwMn~RiwE>_kf3kC0Vn>xG)&bvM=$1m?YFBYKZut<GW_D$-QN@S!{KLP`(<UDT
z3iuIqb=$5KIa!QnX5he7QAY8l^VN&xgg)EyUwCLT2jW)dPluOCS?2vqBv1Bb+<<@i
zY1}~ID7h!@gd9x>#wJU7?thg!&y#{l8e_I7D}+iJ$%D#xVi(H2NU6mMkJH6JES=Ud
zib|P8ZKcBxS*{{Z9+V7zeGe2ZPu7UBGQ2KERSXO?sl9&8K5ia^m~ofhR)n5p&uute
z<l7^C0)M>pHd`J0QH6Q=;7MO<`Wl^vV8HWSjEc034|RyUO64KPL;*A*LIrbr_S09s
zARpGVTQtG@TspCLiZ;<FWgyY!>P&UDGf_?{EMqC75Bp!)-|}rF`&KCXYvLd8+|!Ah
zND2~u^=I`d^gEjn1r^3jJ(aFInwulu>6hJCbI-zI%9~)WJNm{;Q2y`f*y}|e03^=S
z)}aekg3FE%T7?M|-H6))q+1@AK5S~3%c3bCBcrgvq#50MBcqLKs~5*Ccm0bZV#!o1
z>4Tm5zTbckC_Icm22fGz*n0Rg-!rbIU25H{-3&ZXqTobaR06rvKp#*3=Db8#U>}ST
zZe_+}9&Zaxx;S3RhP~ujuy7|&yQ$3&$UOxvk#SOz98?{(S-|EJ`lf@${#$)xla0KL
zO(-()<G4zQsMStZyPI#Pc#r_uSIWIPg*5sXTYvtj_!87<r#Ngj+oy*Qo6{=YCch13
zvnk9)`s({5UGG^Ak>>xF{`le$QYAFNSl*j6Xv)g|h3-Y@nw39=^N;U!LaRP6{rxaX
zit1WReDg{LwCVVRDhUX;e1gx+gXjM-+*9Y0Gf=nmbz3TM$;-BaP1?R9{DRh`mo)J;
z&i!n{5ok3kaq!>9mk&IB`oDiY`n|RBEr!~3x-(7ecjzke8B{y$B-w6m1`!#KS^W*G
z`{Ufw_|4%iIg62#HN{rDIfUFE#h+;8+mMZ7;b~9ZfS+2S$o*dZTWZQ(9+MER$bYJ{
zBkFLf5^C{0uRrW2Gk!`jLOhyvcLKPU!!Z%pz6Is8z#+r_9n6S(1H335v1f7=u+HvX
zL!*9{I|Msis292Wh36B9Ga_^lc)<@MOt87bJJCY>@ATyl?;q<(x;_rNI9j;}p|Y7n
z$5jGg&d)t|*zBDZ8`iL{kmpM$*}%nq6yhKT0jKZ$Z8_jIowLiK(<StDI>@7JcDw8N
zui)OJ6|Fw!R(h$Hf^su?0WWd|<lqFdKASKLF_%@G<#$W*@loGtqDMF4LFa&oLf!`w
z`C2a;>m#zrfOG}I6bl)hvbXv9#`nGrh=u+}P10x-k=UKl_m2UOim$SL{IPB<B`W$i
zCVEPmgU@Bc8*Th-XwDrY&G@W$=&CfSTsJ=MjXFL!dAGw?NNRnnv7^1zQu`nrQ%|t?
z<Hq|fybsZ;o(Sxh(F<}7`)!zIKd|`Ukv|z)>k*bYzJM&OAt&U6bKNM&=rZjs&3c~J
zP^M#f8o<Cy4l#b<)y(ITm&5C3V+(?)qt!JXjllB~#C0Rw_nioR4AM-J{NK$M=z)QO
z_tM0W7e7uR{z^im5N{*W@@vr1>iMjxAv@%Aq9m}sG>i=&hVr4?&=UAx6$#>)Wp-Vb
zVdDC`DNiit;$kGD3bh<Mh2p;2QS#LoG>ttN42Q~6?$+xLH{AZ<wl86y$yyLbYuB$`
z#8zFMNfu?=jWjT%V@c5X_@H0o1F@MMp&+o9t4wivSGpiQvS2}?%rsf)(P7m19#+mu
z%P-%%u8pu$gV~ob;i#`SqWe>1II4t82x3KYjQY{JdR{r4#MmC??)R4E<2Nqn+cqLh
z9|OO%dpZ1ll2Wl}73ptq!b&4j77|H=+k0EiDzLtiQzaB|v2PjZYf=88gE9uhD;gVo
z1>JG3x-8a&CvRLumNqxZYnB+2fSX6aLR=qo-x8xFa?rZ;yzbSCe%;Rp&PQ_4#h{hK
zY1!5TDF5d+JTPv5>I|kJ_UfOL!!racRqMaY;e2JgFPt+Jr$_LP1yYOuzjWlf=d_%}
zuGIn;q_d>&_{a$B>z)@qS`S+!1UEp2JD+-w4qu8>nPWRFWiDfD@kYmC#<-AcTpSBD
zHb&m<=$aWLxPU%-*)e?|)znrHmPqB{UnJ1PM$>;wDCUFo<?l?C7e7^!!z^pp>f8Jt
z{RhX=WLuFAZod;avJz9bIHbBNycv%-{Ia>JO7Dy_w|^i{CQR5msC$1Pvrb04!%#1x
z^Miu_Ca6Cx*)TWeyk$g%R>^N?ZKyN5@EHf|7AbWt@}$D7bfm9nh0vs=1+y*Z^J?Kg
zf;-=3RXW0UBWKPvbjQYRAet9RG;z-kI)#P3t6dW$mCye2{Hf;~II2d)TXK^;+DJnY
z!dwwYc6E>TjV$UB<f>SjaY~>ol>KSODfbw7%Z@Vm7-1R%hym%Ms_tH6P{kdBN3`}3
zkCB8#@1E0dP%pMMhu0yhIJFKN=|k)Q0h~Y#!{YfcIuJ+X$^*$J3JQQolLyt{o4aIk
zbyl{o1qVsoi6_MWZd!chI6DPS=!>MeZ<zxxEi?vz`&_M6X8`ynkb_e3f6xPkyP=HB
z7TO<GgTWXDQo*ZHg<7_7dR}s4t*x*iK*76c@croI2Oi4<gAiprcY+tBhE5A|loTpm
zZ?QhqFX@?|WtpNj3Rc@OAJtBeIkb$~<lA^EpUOS)STjMDy!PNvafpOY5Gl@7e9>l9
zIpHv*iXuwoG^I7_v-wH;pY)h*0kgC)AuAnktmL<3#e{A3HjepV9>*2mX=3JHb(ac(
z0ZZ3-Vd!QhS~Hb9NQhGUIM#L3AP~)az?*lOPWI<hyYl;s7-iMXp^B@XukUf(%jL-+
zPDV+7mj5@9MjgK=9nsarZc*<nwve7~fg*2wjx%|RQv)4HFhoCekKxW2p~tmWlQqOq
zi{|FW)Ir<2x20(a368(Dxz@cK1@(>yC7e$?J82R?H5Qu)=mNmD)Y3`qNZyUcFT7*&
zYr;z=&U+^`<hMFAIzaVoc^)k%Ey%wyI=kY3x1g~gBk{xI`nt*%I~!_Ewc-hZNC6f(
z)*x(}cKta30BOwD1O%Zq(2)YG7|Y>!{UgR&g(Jq(7jF0g#oylWTcm*5?bTh|MP<?&
zBJ-Fhm}BmKDp3mf$LKWi?b2}(*TU^dKbw^{;kizh3E7-;AE(TuyonMYy9Bns5>*nT
zKHrba(A2&^o{4gDx9o>|EqrEcKfV8fc?*kvj=aagcvIXUr}#W_fU_egO-G2asYw3S
z@lXF|fkaf<Z?9Nxi(#|0QR6u8ig=v6EBDX$gr;JQz0hKQ>tbytuQy7~E;K2vuR{LQ
zrZ|7r9ObW3pAq?$Av)98i*$BMU@vaqCIoFowmDA+Xw73dLlkB?xWQndMkQSH`iG={
z(I4e$>+rSVNj%11kxled>ntvB<>qyxx5z&A;02wl1`%zB9^mC$&jo)fJd~6d7s8~~
zH}PyNp8+^9lF|_~soJtte3OqOm?iSQ_-p-dq^E8wsS5b)$@)(Zu^<+$N1%Q0`1qLL
zf0fw%M*geAne_9H(vT9!Y++7<xKfGbEvC6(_}VzE)lU{wx(OOy!4+=|+rs||pk>!`
z>r3)eUTKApwbGLnDgXJjiaJVacmJLH>1DX6dbL%DYe>*h**jB4y_$*yw^GfE<Yb4h
za?@}sSTuk5`EAGXyuX^g`Ardb=!gfuoX7$Rn<wd&RMSzHg{33LT}|&v5gOS3mmD~Q
zz|EXek$bFZswK+)S3MP|H|(cccPS5OMNjvH1iS4wyI$R9Lb~;T>36kaXXRGPiV&1F
z6+d%;&_(`mlH=1p{$u)>K4&jO<n=9LSstgODb(}q?#p0352s%IFF#Wq+qPpXJ@n?&
z1F`e^h<g+osGf9OVPQT9_}%%~9D?6|=bsRsA9y*l4q_{o2HMXkNPm7ceCQ$j7+@>$
z7VCXCAIKOUT0?5P`4FG3Nq%K_H*|D(C5G*v4a0yBgkzGh$k%3d)Hw$PIpj?b1r}od
zUfgfzFTro6?yR~OboEe6%*0=vCxWz*94t`ic0V5!v@;1f;Tv&@CyH6CwgGw87sa@6
z87S?=firN{Ki-}Y7yt42r-cTo;D?PD1FPC0Oe_VUgBEm0Jy+r8loK;GZsXuk97}{9
zKOfVSE2DpztK3XK_PT`AwI4ZT;X(ht$l5B)Tq@gf{qv_=3JhQWc(5uphmLhpz0)=J
zNj4Bc$;PF+(UVa@vq#DW=+ooP^{+|>xxDN31Vb-=4^qGQfmFu+S-7(^aYDfU6HXWR
z$*kAo^)QKd;llgO7UqysA+?%C|2LnQ&m3X|NSe|xnaRHD`N)$E^G8S8OdlBE*}0~&
z#Gs@*#y@j3fx~Hh`ml#?@9lMe)@S0FH2(t|jP*v90cOj`{$ExyLU`A$C)`S~j~tDl
z#`G}YKm1njlC@HOez+NbT<b|%TCXYqg+(>|CsN$KZdQ~6-M@wL%E3Bn<0{Yo69Yy2
z!B~DiM$A|Z`ElX$b+ZO^tYxj5wN7SM{53K)HPvODB2;ql5^3-b1Q_(_!(+o$L)=Vs
zJ!ayGe~9e+gL6LLt-Z@Au|&lKmm_G1k%0l8@l_lXjB5RTPi$xi&~m`Be3S2eY-Ur=
zqr%h8k#f)ZM{DuJP#!-e;0@JxyN)0da^I~;8PP*Q$<j*`yXc9nZ6!VpWcvCWUrIBB
zC<xr6xBGKJTGDF^xg0y2u2HI#kj(H{ZrWi6x+h({cOEzX$0MDV*0~Xe&>=~0>0gPo
zNVwdBd$hj{v*O4`#d7~m_F5uFcKZw^4icG~)M;;6CLlbB<)1Z51ND?v4Yg4^oLZq9
zB%Ciu|F9+bSa}dM_@gx+@zW%`YBq0pa1<tFzt1((VS<UYp3s+Rh3H+@I|RrZkmr7r
zO#)0f0QJp|q7AqmA`%2=w)zq8?<OnCK2Uvkt41Ej9XCfN>v?hF^B(!cxN(|{@G{0l
zJ;*J;e9gw`kaa~4T$1?`K!)ww5H}B`fe}0zvKz1&*d0AcrnJvTf?A^5fX}W{7PO##
zg{L*kYl-fY462u%jgbCUZ~BArx2mNU;oz+#`$C-wV!j^!&&cj^@lCZg0~EL23g;&t
zNCFx%=Wq0vLPVC%vqxkkzDXVDuuvG;A>BU2SA`ghO^d4ksE9bwF3M7h)cLxuWQXOu
z=a|K%P?oLyK$9qf=cH3y?`sPyc|lOspL4bMm%S3A{r(tz^&IQ47gj0BB6=!RrZ3a!
zZTYEk!+1VE+A56-dpSOntv{gB&tcym)$~e4C;M3{&F?`U+O;G)bxY|?J#!x}<fdWh
zxE#wLi&UQ?^au01979+L4}l_l=GDnza7s*^bVK_wrQy;KJ<b^JkzJyoLC)hrS_|9%
z3R^11hU-lH@O1SMxA#g;Vr&mnG5j5C<65-Ul{nDt`T#{p{l32kAH2ma6m?|D>}UuM
zMev!vEMDLxf4~GCq0l?6vL?0nk$y=iD7{BGAs;W}T}O%~<{%?{T|Kxh(JfOOBEoL4
zy~yKgRe_KKFs<bVR53Vz3q>ymj4UIFuWz|B$l+fwaVZ*WMe{@<S>?9fj$hz4I}*&M
zfFqL0ttH4fbN$0QDCZqJs?l;RH2Z3KWs2E*7Zi9!=;3k^#58<$U0v^?yE0$8HEnEz
zCUX)HlR@%-$+Yr`jaMJH@Ep8-oaYhn2wyGO)w<{Ihy2I(kFzxr3ArbKGd8bi2|fM$
zPS&>f!K_BX(PJ3}Jz5gYaWrp0XRTQ4M-?~v`dm51dpFN4IV0+38+A_b6Y#Rro(&ml
zH5i^A6@1d@S1W`wJc;;ky=me+B5B3vadh_jW&s=Cd4f;gTYy#O&c>7EmI18b64Ta%
z-rD151iFV6cf`AA3_GsgNer=|(fKUKn{j?_W00P`rUJoRq4$Z^2V5?|7$qMO&@om9
zi)VH!z@AEX((vyl@=LPKDByDWfc&xNv_Zy_F4Bw?Uz`?2KaI57H3dBG0z=-tHdr3R
z(=9+`jujXfum+a;c~~SOv%HPj{88(Dn_$cW$^IHEq!hpPpwrdTRe~jmjJ_wMM@R<5
zj2-_?q@fGZ)#FjuHx;KNWH^E*#Dmat&;t5mHB>hq&l)(Hgkb~Ewza$fB0xd^s%-w<
z;vKw6YOiNMxl?#=?rfOa8>b7YU4-*9tWDB>av%|As%aW@v)ugriK59@fkovZQ*V{a
z#v@%h!H`c3Qhb$fXeG6|UoqEH9uFeQYz%YDC#+4lgh<a04g6n?U2x0CM}{V+y9qtX
z()$tHrMA*v|J|PTwA)o_o1b$@Yl=|GF;$_!B$0dV5kV4ai5go@P}VCKq>t7&UO%)y
z5y+0v0Ao61q0DumoF9=r7z{yqmAn0dmz0@6Hp)zrp!NxdH@Od*Pu{48<MH@!B3=I9
z;cnck1GA9*beV78WJmg%tiuEHiJk&{PUIk}&rHa9!ZiO|RPt*T6O{la{LyQv=eQfO
zKIm8MkuJu0<Ge<%340CwH2;YWeV0j{d5uLDiGyKdenq)vxK1QQ*N=`df0y|`sx4H`
zBEL}lAR18|0-P-Y-euM=(EhR@Fh2ypc7y&Ag2J}hx7kEnsUZD|-+xjEWEEov=>sm*
z&mv;=a3+NnoU(Om%CYf_p^sFsr-bR;)ZcrM$zm&{;%8hv1ynL!t14Z2k2ZWd7VX0B
z3Do|6lO@!u@6UIdLPe$%zd}bIU89Gcjux<}a(Ug+A+I4DEW~;*gseA}a>GP;<?OpG
z8ma%eLEY8#OF-FT7LO(5WBt95sVxy|%ip`%K}ml$4X!RDaF+{J-De9}d3{4^=D>_1
zH41Ub`gotAD0Cm|8fd>s8<8@^Wr5KRDX;^*f`RjbE#H1<aud1_bhpzybczs<Tk(P9
zkA|(*1o8YD39BUdXQA)o2P?RG0)PoMlKptGAD}dL1729$ZQsryO%Ayx<#FYalNzJF
zgA@y@DNFbh>sVjGfaggEGD&H2cXfSZ2LJ+_3jn+}bFyXiV>W9t$Y3_X=F`OkemkzE
z65QuxAhNsf^RfZ&bc{_r(NyHKcHiZtNi~X<fla;<rPidPpyzCz`3q5;V7>wO_M%2p
z@w?o99gg$fnR#W%%kVN=)B6WkK9N&&kXp!lob7!E@q10magkA5JaSVMdESr$6teq`
z_MmA`$Cr?TwjC7uL*1Kb_M(o0;N3Sbe@EAiBRAe61vjAULPP$0koAWH%_HWwD5k@a
zf&O9=-*@BXU!EX&|4#j)(e7bM+4Z-7$<NCi6>k6RZQKWD>WB{~&$?)~hSp&;{RnZ)
z78DeBnnbE5BG1>0`k*j4xI)?MzRL);PFe|qFtcuV1P70F?50@1FtP?IWROI5TSo-w
zw;-!Q+;wAPgOL0#6o&KvDl(b8!wZqNk0xUxdYk1yVUBJkgm~Va&4x7`@(1MG11@D`
zo;cfODB%l>mlld`Enon=gcSW#8D-8Q&p+^t0riAq8Ex&ccq97rXaE&vEHW!s+o3H?
z!=OEF>T)}UnnzfLOLv4ii^d-!w`d;ZP_QoXmZx9)5yGBXTIUG}#AMnGdf;rk<fS<Z
z@!hvvi~3-FtuB!;$H%?y8$yL%sHA9_l`suiOWhv0PjY_|msBybKQcB%!HhOQ@X6b)
zFWTvxJM~3%^7Qtd@3r(IuX?GtqCyl$bzWawF^t6vi%>n9DPTPHmE!TN&SD}Cx9*|?
zLPKAyk~xXwF{Ig3Z;;+3zZxcboA72U{IIJBP{V~fgPGtU1BYB+Rsw!Vd&}UNS9>aA
zyym$8Fw6zc@HCX?)`naw@oFZ-@IHtD6Rn#GfKnh(Hcj_bR+$KYjWy_qjL_ZQ-hTDi
zW#7o0h2WarTNe_x8E~IFPV7!OgZT{P5dYE}1*H*?g-}QVK}5h<OBU!23cdJsfJL?d
zgdHvGHMKW+4)Q_V*`@b;Ph3K|7e97rs3gLjjJ`v+>BRztY@<~%`*S3ykz_B2A!&O`
z`+F?6n!QEUE=S8Dc?xRmRc-uHECqIIhfOELG20S_{6N1yn~Ef?dHmj^#b~_|de@br
zusDAoy#jdJs|G8@O~Zcn$Ruj}ygsy)Z$g}w4@cJnBwp#udZDu?PCr(-2zn{v&*KY+
z(|jJiX1T{n%9@+Qof|ti%V_rV*AI0~nSM)M`qI)B@oSzva(cv~EsnvY)S?HG1!g5z
zeRc@$6-rQ~r}#~epSR=!8cQ0@aCBz#k3ayOTwE8YgDd<qVSBF7V3i0PvE1DpfY#W1
z`^nj$r}=9`vXJGEAoH#D7=qOd(29}>l<dIPgu1{?t?}i<1b}_OI<P8b2zI*QHWG6(
z>e6W=(tp=H#~#m>K?n!JE)G2AZ7CL8ZJ^x4|5k(wsU4L7Bczo>LWDB>+U<A+3L4SJ
znT)SkI>fwp0jcGizvMOS$G|IxpKLK7)>34sv<ee83H@~yWn=uKUMPGvauUe0Vf1;w
z&C^Xt)vqGvVo>jz?akb$SQiyy#Zn1mK4dwa;t_pL>i73TYcGAdOIxK^MY_w(u}_%a
zv`HBL2>R-8XQA<6!^%Oan|dL8aZG<`e>~4CI=*6q<U*vPxw@H<*kg2i>&n>=>20Vj
z#YWRX|Fhda>7Td9t!0h#uUXxTlPnzP8aVF^;hpJhwicG<Ca1S#O!C_dzql-`?&kNz
zwZ+W+t>~~w{Vn`So-GZptHYA(I^p9=q-6JmB?_^(vFsRv<y;C5(6I$+y#4OF<pQi5
z{hKHC*_eYQSgr5|)R?XAtKO~X8{hedYmq@t_CaSF{vrM3Nq}Pla4-(6@XY-;4&I^s
z7In0+V6=2~0S{1>vi%bH9KhGZ`zM2#!zqL$Mb`~|1=mZ77Si;A;M%tkDQ57m@E0D~
ziE{=nckd0evoIp}&qVv!{MI$$*p=1AY_2b=M6obl-x98?h1T`3E@8wtjQRCC``o8*
zIXfxiLx>N?>tQm{K7>St3vUUYDA_k^y%{F9j+i(KDf8x<NIMN{oFUs7*m33lTGp*Y
z|F}cCPcyXNgO3L?LLD8QG!q;PEAFRl_4|=rMLB|#n_}uCVu|5f#HfWvi!*SZ(5dLC
zZ6y)R=;%Q{EE2Aq3&dQh`tXowIeG86$7{bP)L6g0*n{+_^E}Y5ML;{n@A+ZJSW}y}
zg|-*!5~k@`#`SX~l~1<LP-ZHir>(m*2?m!UU=;3y_B{c+nnx;ZST8jy5jaSL^^0!0
zJ;a3nS#k=;zHbz#e;TR|Eybs;4at?@nyVWSK)BrbW_4c;S5}F2PQo4iKaQ?4Dypsv
z4<I2(iAa}7C?MT2pa>#JzI022bm!10ozf-J-67rGDKT_+$1wN1e!qvi)-2|pz4v+c
z6U?9?67ikBUhC0G?c?_o%M?>N@9+5;9@z4kVn?8Xu+sr49_lJPHP&|!{o**Ehd!14
z*^>F%IXD$Grl`R2WkNb?b|%AK2sFo{;UhoryPWDnGB#!C?M^(#*hS`jns74<twB?P
zDv1JqsSfrEDWxBE$4*W>eY6xcDd(%fnEaAktfqq4cv%z|@18~m4Ivci$KWQfbQnbC
zlKubtty6faWOIibiyDpVxQ_L?VbGJW0F=h1sw<Daf1{c}s$TN$jC3C*EY+=yZVPia
z#=FcM#g~k@+mjFDwZh~wQ&0ZT-<xTfA{y#*2hk#ZNLrg#hNsEvPZ_AzTY7l`XZu8|
z&P9MnR*<Y72}xAdXq#LVAIJ9)9h-Eg?=>7o92Cn7#)B3pQe&HKmwoP_y9bEfdfZ{I
zbr0qA_VEq2f!pV(07#f@U+3EbJ1cRD?fsdZPQL<om&r}&MGe|N2Fp!6;?H)SC;Wzm
zrX!)NRrtfR_0V+!knzmZfbCoTg(z-c#Mk2$t}q54q#|H#;W_yW4Xx{_le522*R%B=
zJ8{g&VTl(4l$BP}$9BBp{10tT>qdosmrWg|yq~lPIX)7)XJeF>({185|IQ_*;TJLX
zTuYvOozZwshcrRUOw}!PYF4&v>~K9yC;znKd(I=29d6{&!5Orb^lKFPezcO6&O}xH
zl#9fz3y)au&rXv=LNm6y$+&p7j+J2}s$9C<m^rGtUOL6thx0_3W8>UiV5mf5qOS+U
z&}*xqiV%f1u{$BSpal9iiGgr+ZpoBB;OyA3@!p_ck5&FkU#FQjeP6$K3|H!@o~xti
zgmb_fXAe{RB+yxtTU`?yL<Q5&;rP^vkuVCdc*G&PB4S{a&w8vCFD^oS%kaAcP|Hho
zv8@Ziw><h^fk?{(z51eopq?BW&k-x6eqV<7^}=}D>6y5=h>jNswC;+F9|o*SzD|9_
zzqFg=!jqiAnW>=XK<$crLXuYB_IR{L5)^T~j+jhr-L$)Iqawfx0jLBWx~W9mIF^<d
zBIkM4%XBP+k+R3SCwEJjJKOl8%!Rg6j8t|eA4A2wovS!s;O68_FGA2hZDn<#?A9r<
zjF7X9;@_yKH%zuOx4Caszsxtw4!O{xz!4Q9Fn>V~1Mz9fC6EVJ=tjRYD#2zl<-7hP
zzMbWR6ieSL1s&0Cu`N7*>~j4_L`*DWfZ{0Er_jw&>GsA6=7<oPRd<!0ktk6e?Y0;4
zO;TON3Rit(z}+2vY0RH-t8k0Z+<J36-|G}yLh9|m9wqcgSsz4O!Nklql}-I#RXgrl
zgbG!%8JG+ROB#|nSCE;b3S4utZbRT3dEDtMIByaxVS$(CyLov&LjuScmWF;Cs+%6R
z56i{CroKJ+G=2}!n=cGN!3n{9WD=r!N0${Sjmq7q_t}|&K5j#i{MKePe_MlK_j~!E
zM@`$-Pi_Cq#O6(gj-y#^Tj&3$*n)dY|A&({8lQuDpD?^9-5n@!JPcn$vre#WD%$1d
zCQhw(P}W>#$|L)Vh3o?H+SZ7vkMc3pF<IHD9!T?_wiNl1OA->Jy+Kg^#6R9s8oHFn
zeRX-~JVU53Vr0|shvPgE(ZF*)oFGAC>yx#`{zBi$xrBZbT~tu$Bssf#@4r{d&DU;W
z^vAJgk4p*GJNzs$U-?MyBdSEI@V_!L5Syh0Wjon_P<;2)>6MrD)7LnO`?=-!1!wzC
z%QEyxp_h7QL1wBhGxJHTgUu7p-%8U<wOa1w%qrfyo?or6R|;(4zw(}BRy~fs5T4t5
z{b_LZ$K44|S@x$N18rcC@)*b6Nte^P!qvQ0)2mhrX29kKr1m0rw($aqQRahR^}6Zq
z&zN<&qc8%r<~uv7uYu&y0f0IdXbjRuhE<o}B6Dt+t~sTc6fy!;^5q*r#Shn|pdB$p
z{?j_H@zJI+RsajiDTz9yW=w4Rqrw*d(l?EcLvc5kBO2`IO%szaNf89dU;R&B<6l0*
zX&X2SuKCInl|MS3G~bX=YYDlX=L=Z>8qzT+K$*^Iw<y$B_SGZXd9)vxP={|MwhHe4
zti7^Fml@c?sC-&o)?H0aCEv{LV?{q8R3?Ls+OB?)aMAmS{*I4WJ%mMlGz5-@DPK@Q
z+nHYju7aG-Z+Y4@$>auUsLFypZs|;ZA7B>X8MIL5_e8_U2TrsH^KnzIu4e4oX^u2|
z$V$MK)O+t~i{!Gv{4L>aG;XOMLLe_(U&!u2{8lc9ALLZdTb#qkq_fg=LF!uq3mhBy
z=N}^+miiGng!fIK!V}bIT`unM4Nq8VOM8{j^UETY^6VK<>#EcBg~Xk~mpTwPzIq6E
z5-2V$J1eBxTN~YMJLeO&sX^##d#*9Hoetgj<Q=Jm?`&`Ljb=hm6f(s`ZBlyK+1Mif
z?F``ZQui(dv;>VOtq)$!F9|lzcnAnCtXO3Dbua}u4kZH}=p=F_d|l$%O6J?A_|FLN
zDE+#M1-~XuoNe@_nq|26vaz#=ECBe(gmR=_@e6%y?M`F#PoXI+;kUVueZ|4TzrW=L
zN3+;cv(xZ?`Gi|<>4sxu9Q$>n@)`47r(I!RUtx!&dSQ};Q@a<>pdDmYjSyN}uB#b+
z&K^PcHbg_mA7;ZkeK!=`87;x2veB-f^jBMHSA@?Z8ka&abuG?U_CI@J4CE`RoVyA(
zG?>{&OTL~WiXYGG^taXI`vH=ve&8KB4z`fI0LH8J3QgjI8+TCW>V1aaGeKhd2TaYN
z#r$E}V1Ev!E)QZs>$)nWTfulqg5Ot)@}p(|z?}u+{&^~HatNy;7t@+=Pk_}OeP9HN
zlpd?|#!g$qf|-q`b)cX`!NS7A1V~r$X~Yf{GDOHoJ0&3HTpi%`kQyWd;eBA?Uh4+t
z$A$O737Y)HI}uD@P{jQV^JFKmj4}Y+WV*s(@N&`l$oMv%BEwzs$ZyOzIqpe0q)iyh
zkO3)SmaprbOn`EW+-m!dU5c$1#!uvP6a8#D9gn5r)+~j_9kSs5h*brsijTlq3kOl!
zW;-VX!MZ%+6tO)BMWXta|E|s{PkbTe0U1=Z8-|3gg%TPgho&Ufyn9-nFtOl9R}J%0
z_1$i)IqRL1(e<#=Pcku`HKv<1)m?iDt(IGu{XTth5EQgEOPbE|Y(&U7!xXq98JONL
zeRtslf|TcNeVcIG9ME<a_KPDIK|+_VGYUd;0zsfE<Zzcpz)qy5_Dj+c+_$)x#PDeU
z!dkc)++^kfUy#m}+HpYL<xl-9Dtsi#JbOngI$;lS-#pL`6$yEU|3ngK5F)c>4zu5t
zlwaH>wE-P!3^4tNaE}WAy|MmwAdNQNH^=&t>SiNN$oZ|v#v#nf<|khBuJk8+8HS$;
z15RFc3_S_%E(ts)8)+|c&r>&&{PC~T%t&~a2Mixt<!qnzB(b<BN_RJ+u7mD8&MUm$
z8NJFe7j_PSM&{nE{&iEt@#JcnT9ek<evR@1+VrFbKke)845iaw;hzJ3!Lv&8XH@5l
zSk%Z?LPQHd6+-)c?@T+<yN|jA>i1WD`uknax4voaCGP%)TT+__EN;SX!4!|kfz3A0
zvLL3H*l|mqGZy|Cr=XKG9k3jM()WHb%xR8a?GFXo`dWiG_!W~7J4tfLKRa5tk8c44
zE5u2_eqFMG5Q2$94MUvS;gR{Y56CQ``i?o#L%>nol!&Vf7c`nY<ltD7pR-&=0u3in
z-{aNp3I5A_&=qN|1J-Z@)tNhc1cM?lwNUFVQoL~;Xzt^aqIjmT*sTy%?`f2|qNRrO
z%|pZ~xpT<ZWR5<wli2g5vmA*DAlbv@`*bOEx9@n~AYR8~w<&(iG7~q|cit*Kdl9lI
zuAaG1RUzY?wXMFDi#-&lMVi<;L~&o7{jg1XwQz1CqG!8xyl}jE$=BJaBnG}%<h_X}
z5&EN+&&cxQ!e6r^PDSsdrS4_Bx;8T#QxP}qn{6Tf%=+qk;wcEY#$8_>iTaaq2|qV^
zNZka*(P~LJZjl9OkJSPGS!Zhkz;}7>y5a*zo3C#d!*D^I;Xi^mEXESl?*WiC0*B^b
zln<;&2EId1&xAqCnu>tcg>zVetYw2IQP2Sh-a5K%!9ka9747F;e|pgBgX4fd!A0+U
ztKZV}Sdd>$kMNEAWoGb)zS=%^WccwYVgfT!-p77YR9O^Fhx@`6rZEOw=>;lWCdbbC
z2pQ!p*=kTwxuESOEk!mNPcv^9`!Mxvc)Wjdwew$d%Vl1vS*yN#-^p5|^Z<rY_YF&G
zwrzmU9j?Zo&>=bYK(i%yTZP+`ZIaz+sA9EPxGmELiIW6Hoygf=jYIz6hBp#dtcjTe
z&}Uw}cQlcpM@{dEv2>{G_uX{h+)l)&1Ahr}2gkgpSu_x_BgQm3vuhvMD;I?$MKPV2
zts+&Gz^B6hJ!N|ywP+*e?;Qp%2Ml)FEQIgZ2JI2S#SnsjzF`8H;WkfJ?I%8)+VZ0w
zGHVzf(%!n=L!aWH;v=m9k;tO#@iiX`O6#sTKnVh}ckzJ_+24X3r$>>{PfUPS&=fxJ
zSFA^m9EyR!15|3;h#QN^xX1Xy)wTSw)KWUCPBchr#FvYvWV98@zn*RjpEH&U5NX6M
zcPm0)ffgR->dJ~rdxjN>BjFE@$B^*9(06FY(K;u;51JN9nC=e#pl5{$B_Z@_bnx%s
zZ*kW@)`PChQ!^(ZqJQs>>bx5VR7D<s`zMfF%HzdLhr#O>Fb!>x281h{u=z38q%U(z
z8c7J-FG#@*@9ijHQXO_s{^%k*7bTU0+x&DDad+Xwk!Ab9Ky8Z-A%}m5HN+<CKHU|b
zJrr+G_Z8$gWCmT?gBXw)n%$$&$P(~a?P3Z8o0-=etNlxBT_MYzlM|SGi<c3NG;Rmt
z5$|ba?vK}pOvxpwfA@Oodq{qmGeR7k>gJbS=v}Z;+<AB?r(jMFxf)zeWIk!_cBnQD
z{N#zBTS`hc6}TKq$iwE;df#g64LVlsA+6YJq&B7OI)w}E8!hxcgUkoj^d*elrqp#5
zO-vI(@Z1FP-_cxG6W@E!vz_bb&^I5xyn~0_!)$6u@o;`yq1g|;%$V~>L&<6UC^2*i
zYs*aDQ|L$>0JbYZQRm(6V4qfszV~tCj))#Fa+rH8+#dZL)V^zLXta+)R}*~QC?LTW
zZ9%FiDKrO&6H0IiizjZPDYuK*0BcJ=hA;r?>&)+ifz?CkQI9*wZCH+>mtH#1yWzL7
zN`R-v^&Nc+%$wE_d#uJ)l9wq`v{89GH*H3Go0l1sx&CKK<x>vu!G0|GP5j~bE=Nwq
z%&T848SOU|FUYHV8_5C;nf2o&FuhI0Y_|55IY<eYYkf-B=2;6I<U&h+g%2Sz>%`=_
zB#SLRH_z<~&hsT95`98X7=aJ-d*}FyiTPEOu0IebDvxlU98gWUXwzKC@C~QoT4ean
z%APi{&Sr~)VdTF0RK*LPc@A$;2p#}9T$&I@@bzO)<ce<F;}#9lQh2Sw9&vj%4iyAF
z`I(Uk2?=%|$dD5fTMb_uH`rNi@!|~@wXnL>49y^i1mn&yd?<orFch>1^S_>2-2>|3
zKAvv9aX{t9$wmgSwPk{^md|W?kQNv@BJ@!oFXFqlz}H@@vAE4JU*cdw%c<J;UZnu^
z0v)~O2EAiyE)`UqXTlde%u1&o)6duL;!VryIi(anj}9p9`ZFcbQA2Nqh|{(5)h|?w
zsT!t1jNfcw4;ivv>vQq>j{9rS`OdWWh&B{WQGq=(D5bX96uMi0@lR^^-17@Eq{3<o
zmU#fSZP$8qr=40b?9av8YjIYgt9amF_>`gkAh6bz51#|ab)yeNiB1f(8R)*n@f)?i
zZYF}KEGR#M>m4C3&;Ipk2!;9KZS@T;trIw2A^u(dgXJIl2<u%oam@*`9=G-GCvPaz
zt$wcXt9q$Y-+MhskYFbUraq(^I%%9LJ379LG!M-qHua-X<fyHyqSU?0)t{pol@Pr-
zci7^Z&Z^8_6@MFC!fGsb^%hksjT%iff61U-{2Z@=d~>a9XVYH5I#Lt+Zu_s#3z1&8
zmchPk!b|xMn?TK1X?N05j*ej!4R$=N4?9l%Q1*OtpkRj!-zlv&&;nh7M^FTZ+{VnM
zt5-fVO?@c#+3l}zFA1wy1XF82fpgjOs!WA-PMUCrEgEHVufcjTG?~-VMfwEhF98&_
zFEaC{zA$=p2s7zhi+Ufm7mtuxX~%-zvvVQ>s^)*RkPqw0ACZPPL(rAycIVwFZW|Qu
z+Lz4HW*6(IZE;AeKcKq$BjyHsak2d5>-T^b7GUz20s293I;tq`GB&m{avT4#y;vW}
zHfmTVRi~zb`iP_<qT*uR=pi;}--nQAgXebi;t$lLC|N#VbdoCsw{aJ-LB1l&?sbld
zySQO?4EKvy4pI5m!C+y0putCWaXFQPc{?Z8KPcCn+yfZa1IY2uy}|G_EcFYtu877<
z&6^YNLxDJSj@k~9Lt6(frps#_=%tt;vX)6DR3cPwYvzq!DXCDs<x}<b^N%8L!Zxa9
zdEV#|Cz9@6zw(N38EJ7dy%O);_OLj2i_0$9bs1cCb)Q3)=^m6tkb^$u3ekERIyPou
zc=b}t=F#kgwr6P<;-sW{h!_1^%n`#{UYxv&a3<}B__JFr#9%{2_`{pqZM!tJ&@ucJ
zCq2=-vafH1G?5c7tBBc6T9rdvz&uaxwCFKZ=m=Y=*5RCJ6Y53P^>eko@|H*8Dveq9
zx*QSkDKU{}i9`RD-h2nRuK)fbs=NnI52XEv52qAXLMpWEay9S$$qeRZPY(FAvV?aD
z26=#M`ln{-zmsgM5cmU;!FzyDD;S@9bNADMZ!=%(STRaJ@Q5>9du~hJyAKWO8)d)6
z>Hh377R>Y!DrRlS6mg&2C^dPJfhPbzTn#ex1f3`O$$06c{5oQ|y?R>!a8VywQcF+`
zSntr-y~YDxCfa!Wc21c)OD9L1yKsJ<%aHoLIrJej^vH8+uoD;4M9lK0ruTM=!1{U-
z9>1E%$an8NoyfQU-|b58e<8ysxdR(DB>LNh#ip@*T^z2xiW;6oFWx`p@5t4|hWPk`
z5{|OS6zsCt?F>?{(z0!aFgInMcj(LCd*MsF5Y~4tLys(nrg&*TbM^3Oll^#y3P+t}
z+-XZQ9yg|If(D@Z^&t1BlP(G2%4wa5csL&4-vhw`GtOhNJ0Q}w4GV_+gESJZSiEYm
z6+K?PDyImc(B+^kLp1%xL*4L&Ohzv6H)D5f>Njq}4qh;V103~lMgVPP0m={3QDkCo
zCI*et0zvy_>$cZkVa4$uGSOic#N4k|Fd%ir20P&1%|=Yf-Z}_%45b~)`;aL$58}J&
zra;bVpl!M5GR(b>jO}+9uxZY#PQD<@yD496fz&ok;4RAyjWcBV!T|vsY2hjb*a|I2
z1uC3xb4jOmi!E{*lC+BXydrrUcrzr<$}g6}>bu>ih_jgq%Pr302ZQzS*oB#cQ<FJW
zHVbbQ&ks%XuFpGhYY|`67|PcNut#sUGhL?6Zt^S1)lT8P!qi>0wX>@@u97(&bM2Gs
zN=f>=*WcCFG*)hPPFg<Y%Lfo>UpMwTdbui&@9l@=mAY@tt(=*Il(ZZ~VoefQkJ$#=
z_^SSh{?$=q;ker&JbrrzyJBfWFK>8!Pq=QvGS*eB*N6c^wORH#b_YZI3AP=7|6WvZ
z9%Epi?+26xnEZd23Ex^X=egPDZ{6Ysp2~~V1wc;0mcIJM7<WZVY6m6DcBx5@CCOen
z4g)f;y0e0TVgsrTR~gZQ{O}->YqaBQu!c+wbnKhCG-20w4HGLckvnkE^Bfoy1z-ye
zoB9i4Gvq@u?-k%*bAD>IwJP0fdEIkc(FUceIlN%!jV|MP^@Pwm_Su{ATUPQh%BXL{
zjUB%%BuVlqyzo^&1oQXnvFMtPx|M#4_qc9;6Q%GT1ac34ND~s8_prJ<kGDbncW^6i
z>83U)us$kMWnQ;m_%P$AzfL?44KgwjT+y(?ZN{l+tt9u_po4{3@3d(j>(4UjQ^e%k
zrhz)(j0eihl7Nhu$-8ZIYh==eN@8(YDrkt87944J(+{!{x~`&Onswnj0u<)w>{i*`
z$FZ=qBOrqxf$6reu#<%k{;yv(Uk-0o99e;*V?nu<fIv7)mzJ)dda5UUCc-8xZ=elf
z11b==Xk01~G{V<hBF{Jks6{=dCnAhr_~W7mrlOwWKPuDgfMU+xaC|c~|Ft|27O;7B
zyGw$Hnq^n91*}mV=er}$`p%+py}o$bywhnQ5CU$&=;xJ5rjI5$Gzlg7`Bi>HZH?Ij
zLq%zppFGK}PE^r`lh$x!;=tsm=+JoP+}*5llr3O(jT>K!Fi$FR;zqGMn;36Pf!f-Y
zxQ+Lo)mx`=e2>>+Ib~R1ec1B8<}wViPllHLtVm=}zqI7Pd++3%&di-NyAOC-1?;a%
z)b5K!=Icf16yL6mx>G*;Q|mGdTN=IJ&;xRFkE7eNz0GT$#9ydImT>2O^6R%<e>zxF
zny1UHXg_}0&@n09yV>TS&H~)cY_@G0x!mRJtCyBPg4C$=bJ%J-#?oV+qus%oYEqzj
z+qtHIgK=s4gic^k5dC-!V$Bk=^xRj7o?VsNb-<oCneUE$X>WNG>bceCDUKw08HF@E
zk_AmRD-`^-Q8@k+mRTq%E#;jzlNfXJB$GF-gRPlzlSA4wU}?8$(S*SVPbnY`?siTJ
zOl>HTL?EX<YjyQGIk`;N)<5d9IS7d1E6r2t^_^YU1Jl}nzj|5JvUeq5ST(<QBuvm0
z(_pVueRTe;BnxOpUbGh$*Hl*emDPUsf6cpE)j^W~)RXTs%L`I`97QgPh$Kr-W|+=L
z_c-P9+z_aTPI`wltaftZ_$@~_i$AmIkB?*LPeLz+)G#u(%GqDw?sO%xVhITT!bi|D
zO}?&mFb<usbfr$%!I!Ii_SP%`)-)<q7Vga&Hk<Uv1BGw4Ja8P$yqkM3$e-9n?CiVy
zfiq_vd*kE9e(rPy;>>-JF{E@f_>xQ``XIaD&pYyb#ZFM!kc!xG1LH!;(&Uluiuh@+
zdi7wqS||eUfw0^~&<IrEO&R#uJhQzmyRb9Z)5@BmLiOYj*-+yYd2J`M(!cR?!#<fB
zY5tGtEZD8K1wS#k1GUBQ3le09_9@{QYI`z5V2Gxos_MB&Sy@@IHN>e@a0^`+6HHgb
z=Y!Z?tlqT(4bVIR1bYS|ir%V52SWjgj|nV046z-L$0Ulir;tgl45-oZ_D;J=wtX##
z;}vQ?%X%3f(f+U>OV5X`tI89a;ahy05K)XNHCg%KM5XRejP{Y4vM(Hy=Bgxaut=!t
zD0`O&OOMW^5sQu7J!3AzBu*B=fIhm6PLrkO*@_SD`8J%9J2jsfcDWTkR*CGAIk4~g
zDX%V3Nv3}dm%gU*bXJ8rZyj>OXDS5CK|eL{p15>ylG0c)msxhrauIy?!8u^p)4JKM
zfSmR<$y<#3!<zFGu2&D&v9P-qwenn<uI0BeDToW6BZIsA%l}^|KZg9+aX$meu_T*-
ziCHH85OS}g&BBibW5sTVyVRy()@5Zb+y26k7a|pP;>OorTRZd$Sylb7TDLu(XO2O}
ztD+xQ<nEweo5P;#YR2s>C{a|xFBuunSbgi@lO^30D0T$CbtncjJIIF<+caIY3VMTo
z!vxO*JTe;HuYCb<A%M_y4FIJCL7+Q~N!;}WD?2;dahv;E5Qz@AmY3rKDc&0cBwH)D
z$vX`A9e>{vZB+2KOT}dLobghnt=>iMo$KJssN)||mnhA68!eU!+|YEN!3dv9-=chr
z!JJ&3Y}AdX?N3e0eijht%pxevViI*D$Y!I%f|K0}*^W?^e3WA<Pg`F`sVQ>GPn>-w
z<oF=L@($c0IccQF2Q$d#qcNYqf3s6|*tcfcc4f8v6lCi8+03>js~9?xT09XD{L6-p
zI?1J|rz$arqfhR$M`qkY=NcoHLBfi!DL@k;nCYL40D~nJF1hk~<=Z!DvKRrA^kWt|
z>fX&v#Ie-BKAHIxmfCY3<Low4oM~%z%l|%~<|b%^7Gi@-+Q(+B{@2gf&aQ-9L=KVi
zFD>hO5L(8#FmMiqy6&QY>o!`5-qU$J)b<}lOr2yVAjV4=6!1vwQkHzPh4pKV@!$M`
zs%T)&4vh?)XeHHQ3>Y%el6blvOmE(R?khjneZV(PV~yf=f{E<o7v}e{_>HYxp}L{X
zjJ^GioMz_J7t#VpF4W%OM9?AC$w^>9?$_ESbDmkHht2Cv2CHSsbTl#SVO16HQ>EO@
zix1Yc&a~Lev0WaY>q$|fnqN$C!9-+qjl(gi@Qnlf`G;9qM0b`G_vFxU-%^*ap9R^Y
zzL#*zPHVz{?}wd1hGPsV60wh@;Cq{;b}R3ruiqA1=H&2&2=7i?L!dj}@p7oxgp0>}
z;5R0jJVFXOIsm_2QWfnf&%=Lz<Ih*IS>k7e-N5)0OuUjnatkv8VE9mxV`tViBd~X|
z5)0flM!3uZg&keUydG<I-2nC0b>(p?9s&-_mz`G?q;G7{+x<+M`6`<Kj(}nm7%%|S
zR+9!}r87PTFG9xmK$vAO#X$r!E4|wZc8UojP~++QF%nn@S266jqyIM~ql`9!s$pcG
zFt@hESp$%{QJ9~BT@BceXMt?v1_aQ*mMErJ_v`hodwhk}?uMCZ%eJ=w<d>i9=OF%9
zLBXL)aWrMSHIt71<^D|lN?}qe{Gp06Z@Hg`#<_WK%kteS%BZ`P<FOL4M=}WR{lt#S
zU}0m|zAu7}&;b4E0|s^`U8NDh#<Uz;|IKVCN2+Wk=h%sU?$~mB0oO~^S2p+$xioHo
za2q=mf$O8wr`yZ|@6FO`fsqlm+TcY_^OQT~jvgVFs}%MtozHQ@z&F{36nd<WhaUSc
zPm+Wb9Y`(0Dw6X~={F0V{#i`J@z3GtKb)vOje(0~k}W9Z_02XkbssDsXGQ9IL5UfS
zs1zGk)-@aBdwsVgFYS5T-`gG3{nUMi_5>VjKs__zbt+<m52Azas~u}k++Jza*U7sj
zVBx+!OlHo+nJq;t@8!ZU%qWS={}QaxL<IO9Sg{{njlN>iNcV$E+T-Mo1sJ77n7UQq
z0!-&?z=O5oeA5<yW4rx6<%uA+4|R|+HW?()08De&iO@t6>?sDY?8apltTroqyjeLG
z<(Dscl>62|nXt2#)OnR6(^7(4;EcXN%jHw!A6niLkg>!vq&${PB;$OT$YS}4{=k7x
z;?1@{RNsE*Gjp7sqRP25QyFBlO2vCCf`T#mQZDi2V@s%srts}YrxmAkr!xN0h^yL$
zIY8^v#aDs;J%ZmY(Qn=?iSH^|zHh98|E@_e8{|)14rGHgeRUMHmwPgBTQsp_H|(N_
zO${;^0uheXUyyr7wrQ;2i)=w4FlrEp=~7?&Vtb3PLUYZJgIuzjTiuPZT6^1GsSgq%
zjFLbK8yaf5Rp(ljTk-JyeGzD|ECl{G<(n`cr(Hi6E-uAiks@WNAc<;&ZX2(hTY8nu
z%x#LSsfQRM9+w?Vfe%g~XS*pLA`|R<8cMXCVqgStUop_R0;7}>c0YeD0y0?zI$sXr
z1Rbv2Ag8ImaNkV+cJj(ie=!)yyUQ8A5n-r`blBLAt2!@XqeDM-K7Y>LYW|Gadlfg+
zh<=>b{XdUb>V{GYAJM2Jlh4ZX{?befMicq?g-4EZKR#-SElLdh;J+4uD@#^RxHd}6
z3B@d`{%TWEm7+DzXTnS=yi9mjT2kXk?UBc$BLPQ&z7EN>S=9aVf^N`%sh`8((ksYS
zNj({A1FP|woz?R#c58vVEwRn%2>l%e9aJzVS$0B^+gITv_zFY{vL41xZ?r|#o8+Io
zh`~nBAj%nx&rckRdEw8~53cZCH#4{D2cVaOP|icqAzL~xu%;*>_3vXzI){O)T?GC1
z4$2Bb)XJ}wMjt2r@vK*>UvQf3;-bE$$yBTa(lEQ9d1aOOkNa94hWMjP3pC)amU$wN
zwS}M=CO)oqg1Y*VfB_J!(nkz&*G|XmiU`4lEV)UNv9gF9$E}&8_d~CyRT}ac5AXWC
z`IP#X*g|YPWuGg~o^E3d3<Zx>_KF45q5UME>`qUtQ6^A)de+Q&+F5EYNkps|VY);8
zA%Ns>8CKD=uri4+1HbE7dGiE0GcKO#@tNzrkt;Q5><ndVkUe?P&B^|bx4|Nim$L)!
zjgkFSLiCI2%Kji^=q$#`Eo4LVPo<Nm8eenEP1PCGR=kk17>l7233H~Go=^J5<u98*
z{UYkr&YN6eGIECQ_e~AGhO;y`2>uA12gDgRQ(Ah=Pya*|38wd=#a^eZ0o5@m{&aoe
zG=_@Iy@mbNhJDf(5`$U&c$w7^-XwPx=WS-2nUkjHaCRWt%fITVuo)Zye<5If_4<Kx
zE!*05E%v(v?Tc^^T&Wxvf_B?>QlNLPll$idqLXb+icEx+p#F)DrZ;15`@iB1Mi?Y4
z<0S&xHtuJsXOxh`RU=??cOqx48z#~~8Bhx5bIQo4ElENfX2=4jM$FQe5B}RDw|TPN
zZug->YQQ(RZ5JsAf==ZCPNp`Br5ynMMf|pX2EuIKfPfiV+=(t}m^kSDAX>f>V_F~;
z%6;ahA)(G=pyMY4v(SwDR_jYKo-q-(W@MIBO`cH1l#>}FYpnTK<P*c^h}?^AVU_Pm
zK0Qpbd0*jSuAfQ@NB<Zh=_^k+yg$>Q$%y1UJREzYGNRP`(?(MLdGdg(Ual9!qFxq=
zut~uf>omu$C26OPIp`l9PGv_{R~H^TE?9p;HdZ!7fmC;4o8}^y1^aD{#nMz*U)<QO
zkqh<rVdcAIYa`1zUZ`H>++I)`{^0^ko@7V}=g&#H3!z5$Xm7uGfYn6iO(nw2J^F&e
zVf$1lasM-!>pLR(%>3j%_8qRfc(<5`W{*LUbFPRieHHSxrUO{R4-lxgc~<VTRc&|;
zNTFb&tlaMKuNLcDX^Bo%EpP>!p{){dw{HA|G&2}jAGx=)p{lF35!Yp`m2&_2h}4|^
z*9T_Y@+tw4ld%qgWS~I=WEQSfMQk*C*&tx}_H_ep*3s0H&!sdm?<t@BbJFkngZpI3
z4aD)kv_}EAQJ93CgHiNGN8d@IbliujKZXF5L|_9XOYWVv&9xk3{!BGRjx?^JF*Imu
z;m~XH22aPj?J7~P`SF`erV#y?1rF=i@9X%*_rLH2Z!SFer{yeQ%zAA~v5#e_tSeWO
zc5-gDClO-%vh4l((7v#PE%ZmpXuh(1PZm1KD`hycLd&55nhh?I>sYOM+uIZ3Q3^on
zT)(;czqIPUP!!_deRecGfgfq3KhVZA{okMd4stacCHuw1v3;x!XlwrU(tv=&!yy??
z>p1-Y^0ImFI+m}%>ywo8JNEqi6pajm^iBw|2O9_gYKY>z_i?}sa&`LD#`&NosqS;>
zJ|0pPLwfE|SUz6g^Z`{6S4ThKJ2xTG!mf=kQh{rCEpHbHkUg;5ImisA51Ct)+F)Qm
zJJl_3!Dzd=imWu{!4#v!n;+TdKY~xLPgFd&_BJ9e)AgiFBO7=54pGlJa=IgNs+U0i
zaRQYP40rj%lvO|_`}yP|(0LvQJMS?&^Qr=OaRep^D^OTi?dMJ2JWDwyVD%tG{lM$?
z4sz{Rg~V$Ey$x+B|DiNlRA<=4h=13BQ+YWpl<`sIh9Oxi)kmw*bm0(r3ExxWky$oM
zWJr!>eaqp)%Ap4R_h?)iU-LI~{QJ|8H=-;FHRh{-NLDbm2q#9%%?1A3*cM<B;h+h1
z$*uZ|w!>=Js;J@aH1)K%X{^;ij$%g$-yx@n-1yZOD#8rbkKJb_JJ^hj;clZD1F1It
zxLA}BPw<M-sRtYG3c$qg1as&}UDzvF(UO#S<+<g`zKS1{+})&@Ok{*Ic6wSII>rtc
z$dxE*XOM2;wtNnbJ911;(1ncswaO@`J!gla4ViuZ;9zX=b5E6}lhjBz|1Gh6jA@3t
zCRru3_h{<U8^Q(D=9^7#y!dI-;*j@BTnu{x5`AJSgasZ?+pQ1f*ths+1`bMqHO75_
zUV3(Q3^q44XWa(yxb?7z=rmQK0DUyB=}0xSTB?k)cahl^fTN+Xua#7V&yhoMv6^9X
z<;$`xT;Xr7HHf}Xb<F<^Ow6`mS`UC~J^Uua1+uDc_X9XT=PG-Gm06hsY{n@xfnaYG
zOR7BuWW^~s1O+^9{u4lI3(TtO;M43A&Zw3PRA2G+_Z4>7PY$w*BxVcWFKZOA6l?TM
zEi+PBTn>7~?tR4Pile@w<sLBphD6&_8smyfPJw1gTV8!Ps#3E0y(V|y&@?id9kDjO
zR+0MgrA;>*yXQj*`zw)7(O0vhPaMo%aTCfjUawg7c(VbW%UxYLEI^2?C`S8QY#1Rs
zRWl?w0roS8qg|llyQs&dxqmwu$PPT~0K}_K^y~1!b@>+bGXqAQOjbK8qt9~#90Js@
zg`6*4jg-h%H>hUY#mO5&3-UdFF{%ErG~-R~-}fIPLhHL9(Rt2okD~nY^ea3aRYXM}
z<CvpxQ%F`kp0mVP=}Mt!wO3Ronz3J?BN{_NV&OLbX4Dz`Ng~Xx^;R4<RR~1~zhqyg
zn_Gs)mw-)y#FH1{sB2B9Ss00NV?bJKvBSJej%sG_-IA^v*&zJN$4c7xiDJZ#Jgi7U
z#;EW>$=gvrBDdOzT>u*DF}XK?EHY#FJbdJ!qfu|Fd>+i2;eHN#Bn&g2dOh=jTROK<
z{rKRfYyv|j?uo|$5ulu0^i{pph{*}rc70EOKQq?mmOa=uz6i~jvPyLIT7_;?MJas8
z^ff{u#DrK1)S-Q%B-in5au0DaRS*JPk@<;!z})QEa=bO<SGfTNBv!A*Bf;)hGLNRa
zF!#$3Jvg*Aj+fTEPLOF_NXRG3f+plyAI(Dx@EwV-PL`HQHud1hOK-%`pHf!RMo%P*
zTA)!f<yhkQLxN~37Q-GFL;ekXKN90ra@wGe5^HnXk_>tsUC)wy?H7~r_SY!ppHaFJ
z<_ey;Br++Ab`+jv&;Oj(3ECW<SxQOywjV25^u;3lzjr$h<1{;mr@K|pwq0J?-}TY7
zc5Ttb;aoqu3L&4+k)d9Fd+Be5p6zjri-xqhBuKK#`zm&ZIEjx=I4zGBO0RyL*@zh&
zN6n8>X8AtamQH5L&G_GH#S4;S7M=PprGe6%hnuZM8=ax&GEduH6$`)pE8bYnmCrF_
zow$=ym2Z|H{Q+kv@7)(mJT_t+b(fWww%f{afrTDr_sd1`XoF)ruSWsf<pyaea!tLa
zNhm+(ecR(mntUc%IHcMDE6H;Uy803;QxQ2g2RfQV%sn4Cj}Aq!HD>ShXD068ugNxG
zLShK0uHXfn<|Vs#coil-_3w=KKV<nRS^s@QfectcPTGLh01~HN|8IrnkskSIzutYJ
zc_;Yj-*t|zPI@QG@qLF)@B<M{5!sWDVKWZBp8ycrU6L2USm~Yf7;?|uwgwVogCd5_
zFs3QHCE-I5iCJt)BI6<Yxz%=enDTu7$EV@7vWa^*jpV*YA1;lf_mzkl={^Lcs$^oj
zrCY*+E{%4_UWOBC^tKAl;LW^p94V|5;I)uRPWSf|P<yPrXMYFW9>jGG&@m(D*(u_Y
z3$z-2a2{DRTg>c~xba)%Q)paF5RqvtF+pRu65DiTr=5z}PGfdAWt{W~8hM=Ys;YFZ
zuDk_*P=;Bc-g5quny3Qt6){{Ba5*0?<tTqyzAqT|P5bRf=~{K%F9&Ped3!&?9rWB=
ze^*zHsE^w3d2hN<KkB7*-_dj+SDNfIBgsokpFeKEM=(B0heH-S90!avPBF3yx>pOt
z5!qwk@hckL;S|rr(*a@NzzgRtGYTKTFWRI4qaWMc(S)#HTH2vVC+fF)E#wE1K*TEh
ze^^7he&xrNj&VGZ{B?|C*YNK&NOCp=!8)>U0|+3B$M9x%q8P5n#^j15++hhcw22na
z8@omASv~hHV%NJt1-N0EeZR&_;?JQb3W|m8Z_)p&V^tRa%J@{-`9)kX1oe%<;L$+A
zI!_YIII#qDrcffaph+(RWhM@H@484KIq)j;rD;RbckQda&f<ZvZ;i%(%SuX%rps1i
z@z41+q90pfCD-7oC<fByUrMaq?0oxhW_E^}H1W+WwVMLcOvi#I_=zCh9I4<|BK(o;
z;n#PopaTuLB83@tD!%vZN(^&2v-MBJ^PgBRv%lM4QxmP^Qs16JwcUr=L_5~Sc(7i1
zZT3f&yFW2~US(l?Si`lh^Emwd+am9tyBZ)rSXR_vsjXBlXV;YBaCqfdy7Fiw24o7h
zd^kaF!>8(ja}+Ao&H>0tE0G;`#^)7+_6~zR(FDZxmajGoOMb9Bjy7su*GV!+(>eU+
zj66Xo^pp~H0r1;xe0YcY?%Vas@Q-o}gI=I{wj~w;L^PhHIY*boA3vc9WtBjtM0@_`
z)daAHjOtIiWt#v(URR{K-vAOQo!p<jJnt`a^(m}O-)f_1H;wWsy?0ABldGzw9s7Bw
z8e<_P({Dquv#D=Zo?pHd{@?5r0W*)O421^qZYbAerR3=L6wIT_VH8gt)hHqOJ1*oW
zQJ`ZPc?jfmY51Z_8;Uu^@zvngzFiE)3@mQ}#*<#HTP+Z~nxDWWHAFbD6y^2%t2Cn$
zh<s?{>F|$j;5~#`!&%TU=z)A)2B&`XXTtr_e}4vMRTKL7O0gQs3Z%W0JT7myS~Bk3
zb2;Bp{MzRqE9ITtDu10;g{b-Hm?Ji8Bqyb0?XB>*dM?U5`dBwo^{9UHxs>(I)F}2I
zlQk0RZ5iSNMk48b`VE-n+CFP~CXayyMx=hlh=SNQr;T`#|IkI<qLG47l3~%eU(W_~
zv_CdnK%J3r3gC}X4`5|Q0`GXm&<t7a_>nUtp)uV3#`G4C%52;j;m_ohX?R5g*X)BB
zIREF7ACUV_m*%;QlWg-Y7UX-P-V#9gNJGda0XBv(GFG$Lo5O|b6)Z?^lKfm74Qo8H
z9Fg>YIMQ_jR>=%Sto7t+3ojEGQDt__1DRCsPlF;v2=+Ig#Fk*Y1j%J>L%C)4JPvtD
z4wP0U-D4}AH$UYSzDsCJW6-?n+A7<s>y_Gj%9BXzoxUT}V2PhBgTdymsf2G7@pJ<u
zz#;jU9z%WvHgVo=PJZJfg<#M_UYUOQo-q=DVnB_H;<vCJ<2t{ga3z-Z32DYc%3@d=
zKlNFHbp%W6ly!2`p-R@JNV=ZQ)ZlHP+zd0@?Pr?b89KGHk3TtM;F<zvarzj#87xfp
zKUIX4??y-8c#O-rbHZYCW}17NN>D?t-ey<#6#J;;W+~d2QpYSYin23(r04rQws^0Z
z9)AbS$SQ=WUcq3m{U47KRy@LXw{HR_p;0I(kDf<SSmBwh+DVk9W$9PaX#S?veJcb6
zldj-#X!8YrL=gk!B_#kkjsVwdH1=DtAl(d*=zF1@n7Te(Tl?o<7(6-U4aM}0e-ZGT
z+ga#^!}Olwr#grtrQ@L|nVwI`4D9g74^JaIREUl#W5CQeE{$<UgI{2;X@7*>-yZ<C
z>tjAwZ3x#CDl~&etib!*xB2a_e-G#o&ueSZ8xZv~u$cDel*OS2-_j#q&Tqzp$!1Nh
z&s<&v1$y8|r;B_Ylk;>^HdQ-f-&ed^bSa*kCLJ`+?@;TNOf*c1o|N^}l<+}p@2^T*
z3<t{sXRyng8j9r>&pDW0vs+B0@PH3YaNU-}1(!$$n(ox*G!2gdHUNubvYw9=JgDYh
z7`q&4#PYlHq0*jJLOQAU2k+kb86yQjHgsNgq1C>nq*uU^<j9ArUz=}n-pZp0oQrzj
zo53EZ3x^^+O2oB{Y8Gp$_rk0+WyoJ;9MukHZkMIWl)v70A0E{$8dFV4`*}F5HsW}f
zK5k^as63k8w5(QOdnGegS|U5T5%brwoaPb%+zuon_xGmCC#rUU6CK#<4T47fz72pf
zcKPF;TJR#is6UT%M|>9GIx|9L&p)yj9CdkKkM#s|jHCld3EB6!9t9LKeYV=tg|1Qx
z^DhA8Z5w4h_!&s(hMn6YU13*nBtNizJRT{C%$wSKhW&UdNaK`pcXcqg6F>-AsDKF3
zI8o`Nyp+s!K8-$>^6kn7TajCL6+klxMKut=lfHxz=onQ1Fj}Swz=%<V9xB^>)dEEb
zq0kgMvyXX_{SLhAu}sq7S$!gyEdKnj=^D?h4B4~vuio;!vJ{^lKht&;c1`T}TcAF@
zGCt*%_>;m)Gxcc&_4gF1T`)5WkD0hied3oBV=p-bifXw=smz@8<Tq8HL7K&bbLx^B
z|A9$ZVy|aMn%gg#>xTn+WYam|FwS}mgfRK)-({W{Cu-3=<G9ef9rD5xx<iZ=Nrg4Y
z6nw#Ws$ScdTORits~?vIagZRbonO{x=IlN#_kehm>2xN1W*s%{p_it2c$iIHlA{CN
zI&mM?$q<$!s;(%4<B)YAL>90B`Pzv{w8BQ#Yq%ylKa0D&T-P5G#zi~%#@{1jdTjys
zji1AG?n@WTw#|iBE`X7fWzpF|=x!pkZvawFfC=eC`-(&K6|KA7x6|xg>lw@MnpPJ&
z8>`f**=^A%*uOcU{uUbb_iPQIm;*xE{6<l!@(^7U+X8q5E29GEl=&zIgfH|U&q=Ir
zH|H^U2h;IF|Me#3GUNjZIs5%-Cq;>R2jnro0idT*ldgP=E#$nvKEk>r)K!7%pPO1@
zIvOnx33~QD*2Z_!eyc`Y)8HLMM%jFo(E<HC(=@;J7Z$7TBJ^3)WZ5Y1i7ev9LX;6g
zGC#2|FH3~>L&<gP2KUdtbgiM1GNSuciGSFVWSlCxR2SD_q}~=G%pd5#zIm<E2yde~
zyK#t#LeQWABO42Uk^J^*w;#_;S&}loA){=Hlww_iJZK-W6RX$WD+?@F&6h>%7?uv9
z)?nrQmU2@P<(m5>D)k?)?vABTY$JU+$Cl4FmxE`-8Dk`w>IDtvTB-&v5d|GTR5~#u
zJ6Ma))~|I?b+JIu3?uu`?~>d78-*yM)VtZYMnztnuNo1@RC9MQ6BGHi`#Tb8p?SF8
zQzkS-9DHmqfxEgP;;BF#kpf14K59SHGc+6j1OO+X;~^f-SkdyxYkeYejL00^MSxy<
zz-98B55q?ZS3f*7+wSt#`_R_gbsQL4SbFS6%gqWmQ?^Nt<_$#P+5V7qh~)X#E(iyr
zkraVnzXYDD#}(4?`}=$Tlch#(viMhR=TT!J_%uy3Gc$WrBbX<Kv6b;C|M@PjI9r_0
zSaVwF9R2+Jb|{|7gm^FRWBjzSa{SgW_gGbToC6*c?NvkW1C6<&Z|Ng@PTnS&PJ-tV
zQh(#8h>2>Z-Z^;uX4dMPF(wW6GoI{Iw_nzibYR`Ie;tk+4=X_iyzXp69M@0^?Vt2d
z<ww0zP!QFZJKfVY%jZePm;?5VM_^66%3(a6+)N1oZZCuV`1+fPV~xgJEoSSBz2(=>
zEIHFMsx!`;Rz(UqImd#2k4=Be{Gt!#R^E6sW>`WaM3d@y@1A%f<voVAHskrBa9L(x
zp{sc@L&=(AsqAE}V;C$;E1XAv#}Kgl>2RORM|MoHjEUuqmRs9hOgR1h7?3&D0jYSe
z!mojDOaZAF+DI+wJTSrIFF``7o<Y8QUYKd=3i7V5SlB*+vLfp>t`G?JlTRvA2Z&7V
zi*N+Zpj#zEf37YGaIMR17G*s|pwsPd*?C5PFbxRjhc7Lq;b$WDi_3i?59ZIq4SbcK
zHUB!>tGTMUf(WxE+{Z4`Mc2*#D8~7;1Z-p^8RrEmzs4pM!=(3JTv>4e$H$dehqNSq
z>123rRP0?G@qfBQz7*x$g)cs@s&&4NWBcQj_jWP2&+k9?SmD<y*35r(v;s>xeO28l
zhj8Gvv00_FlhnA;h3@vh%p(i;3~P+zR6liY#1IqA;E@G<o%AG&#w$Z=T=fY@aEfQO
zftoL+{KTj#D}c3Jt~&?By(C|{2Ptf`qX3ET)sGV_T{}N>Jyu+4yDmIeksaLVtxNoN
z@X09eKq;wou}3FGOF%bKY;Q+J42|t}zwu*+<~t>|{@-K^CIX4Y7qS_wIy+qH7P7-J
z$6JkDW4fcm-zBBcV{YOZOa#@=22%PqrkRRsO2w$6?tt~6FZWwxNS@WlaC!1{D3lb%
zX3*YOINl%CgEdt;%6DCq1{p%q{`aq@E$#e@pAw>#{e#L5FbW7BP3!;(Gb-?e%+tfb
zV=m?g5X(Um|J3|zQ7Td%^W!qwYBUx*-!C8x1yaUt7t>+MTnEwl;}VgzIc8c7a3H>5
z<Rw1~hMb^qY^Edk>uHd?!^yU3)wxm-daD`Mj&=>U;uB};$|ci2FGgHGhT8o&G4??w
z?|$d`y1Ku)C$;-^R|hp@#&IS;i4f2E|IU+Vp4Dt7{5%&7d>Sc70dh6poUHfs)u;=}
zOF--;pGSstxV$WJa7qNavUBbuOUgo5d4CiGhPmIkdWwJ){3ilbts7AY?7!2VXiwe-
zb`W&~VGoLR5mH(oa=a}JulA-ArP0f0Z}Z&q{bsU9O71gum^nRoc5p_&)((>jri<9x
zjG{D(u6b5eR}iLXU+5H<Pa>BK82&p*(B?ZbVJjkHOH#0YIA5#-UC>q$?6w+MFYdf9
zU|827?nff&|Dg`c=SPC~oU(2MUzE1Aj(7Y9%aR@HZYPyj7LFIK{FMC@e*VrO9YA)*
z&JL^*wmocBSTYJUwT$%v$@uz|05k*vJDxj1md_#EfEnOAs>}HaBn=-&>_Rs2-3S2|
zK_3d91I<58r`XH((;qWHkv{^@S?JUL^V<#;k1z%ZZJ8mCuN%ad;E_h!r2ph%XG~Tv
zM0Lus-0K3@j5Q#uoj<LeeF+EhX<$-a%cPpNBz1r>0utJk0;zlTQ&e3!Q_s1WY$2Iz
ze)bn@iR8=vOVPw)KJP^2VvQw3q+{c^cJ90vT^4EaPRpplxgRzE8fJc=?E4xXhd#T&
z_ghGT*04wa)gygBjsK(b7O=%QCL1Gtu@C(yg$>^-Q^>sK+DY7G3Ik?DbFqHrFx`Zc
z9)3$P^_X43Ys%oet_d_;$k1GwvYzeL*U#kg;<<?F@m_4SQ`r9boN<b1v-!Q*j)->R
zsAgRbtKj9F>RxU+m^HLh9nCu&^${0`6aLeYatEllSrWePKwf`_-il3K9dM_NtrPlT
zS-!QG|ElBbj%`kUmF}0wD6^-3Wx>aPf?NDM^<Pg<b%?|P^nI>EPV6cWx6oa#+lcm^
zSQCv?r-1X5dI@?0+G9gHo_DdcXe)c-1+r_H3KE-6?-~K=Fp^h>LQf%XSNp0SN40Za
zkoG^WC-()(iYekC*&;b`mAoWOPE8FK)Oh>FAD^&-yT2Mm&6<wVS}dZpb%;2!(|VCw
zSB|3$LyIX>3zBcY;fKNajuHFqViwAnYU7dZtZV3xx49N_Y9=hY`%10*1$0Y^TN$jA
z9mtAWbQWN_YVl3mg+q^~4H2@w@W<`%3)l~ox_1$0#$QMZk47(*qKx>y#bZRiPbdgn
z;q<FYXWHI~SyyQ+vyUiX@EJUr{Y+Z?P{dm#YE&$<K*dK9-_THAe&nrMB)7d~_)~0C
z?I*TO>80Ql^`m?I4$SQqaU0@_f!2N&p?)E9c^$Rmv?hWI>79c7T0c?0G5MYJ22&_P
zfsq=gp~_(}f8nnO2Pq1*$_`Aod#DFkJ2=|_uC5&UjZ&3q#Syw7sB*yne3SCyv#>0@
zm2o3tM=0*9cp(J@FKK<qwya~Ff&CHe&7#kJkas)Cn{?qfS)sz7*Cj+4q>RC-e>7Qn
z6$jc}hzI?@$W(D{rAW1=oJ~n`y<f;3481}9w(#ohad2ZPs(G&XG#|(GQNP-;qCQS{
zZd*}e-iRvI^ea9S6C9r6jK(Kf?&usWnAcdzaUY^+pn0I4_x5D|07_a_pu$AAhNk&=
zgGD<;lA*xp(=$wt-#29WC>DvxM0Z!8$yGlX_P(mEt?K&yH*r<_uRQ%#cUsXAUHSK6
zEwb$Cy4T+=140N#Wf_8`{u(|c_01_2ipN`7$sNrI7BWG1cKzO4SABe8Q>FUnTVpo0
zsH4f6&V9LOMOd$7p9vD`=pogH>>;`AJ;2{Hp-+2uy69?J<6}AG3F((%A}vWy!&`k*
z9)56R?|hXYF^hAtIXX)8cV*US{thr%2RuX~F#&}<E87C>SUmdwvGkP<QFh<gGn618
zh?F29l9JL5qI5}vbV;`$J#?qEq}0&eHFSrRbazSjFxP+X-}8Kcd2{AmXRp2XT5CtZ
z_67OumyO;$yF;?Jt;XlmT7HsKc%pZHV(_z1wmwo{I;*9pt+0@6)-(BVp&)<9UGrri
zTN9`Ts4UF7ad(l6x&CzrV>6vrc^+a&4lB~z@OgY@bY#;iU~Rl_vEeSk_;>vGv+FZ5
zEi@xN^xle!$%Px8b>^;#fRdR)ugx$V?P&ks+N||PmHLy%`5LI1%nwV2o*EN%bed&L
zWKn?034ZXvq%8FnUa}z?<YPAe@#)pUG;)J>WX^tqj@1__KicwSewUy3Vmcp-gFEvz
zeWsE^OuodV^xfC+^Qmf;QgSluux&O*X?~5~RQ1!GC7L^tORt0K|H$yL6)ee4_i(fJ
zZn8}I^&y?qR+GSV*sXQ=wE8<VdWDw)df@`8^MB7|B}wIa6gvfn_?j*;1LS)Kt7`fo
z2QjNp*BP3H^Dn9>VuY`ud_?}r?J45M_|z;n8HM%rF1~uzM4(lW9+&MNB=aBI1*~!}
z^<1n&8487zzlvn2WwDo-yFad-P~{M}MU6WFbAWR2dKjkXu@|hY{@-yYeFup%0(YPB
zaqv?=HuuFfjj!LqjAv)s?{^_>9~}GoE$LW4D-wU9xBKjZg{PO)kx%XSN2XaIc7=&>
zrEn0Xfw+ZEzX3=0j18>0Sguy6JbpXUkgf^cTtsml=}udkW8+HEFwyX`#jQFd5_=u|
z*Vbbc69sp51-z_QCH6V+WTsd$>CZ;SS01^Pws`T%!wfUTTt(97i2y8~{zE~D5tab!
zNtebJIaEoVSOa+*V=u<MGJS~Z*WeOWLep%`KcnUvP0WcXv(tJV<q+H1KbY5bTF!;z
z9F*5WQ=H#ZT<9h@J!6OANa6w+r4ZRQ-};GN=q=+qJA_gbu(-5DOFeP_<adh_mP$gn
z=HaPH9rgIVMakx9Pvqa&H(gOS@f%9gym6rTY#d-$Kx8W{_a9XP*B<*HdNwD}IE}7Q
zWRQ`8ARRPG1#Wyz!1XEfMRMWHfw+}>I0NKq4sx=hX9RLbKF)6v^B_;iUv8(b!Ren?
z)}B_jk+ET-p!EXzl=YiUKvPSLL04CoAK`FOYlvXC&-B<Khn(%Oa!2U!J2YB29Nfqk
z@VLf%#c#_T;5d%Z>Od)&1H+__CQF%z0#25*I(K!zOt%X}>kiY~LjgB+76gxnAEoUy
z^b;Z85f@^aZeboTzpJmk{0%$+m$b(5u?Q+<@EH{R<t}6M(M+Q26R3^mbSib_;qDUh
z==;>#VxU)wOQWtbZ;I2n#pU%ceckCYtF~0dbv5c(Nq^}`FF(ouLtu=2={I|0Uo}lt
zUg9z3^2ID=nb}i)yM4FT8A#J5%vYND<!*WUS}WiBQ`a!jKsYb_CoW!Tdcv91^Knv9
z^yKENVyrp*S4N_1&qIQ`@cr^ob=|Lp;SQ%_V!(({O~Z}c9w73^X%pBhxK<+(LxhSU
z3>^tT1Gh<`eG2_ppeD#hH##B|a`z1I`QwF;7rk)+zl8v|XK^<lr#|rZbThdM@*fU>
zC^@>4Bn`|yH+lm|$FA$K&%*NZ)c~-820{b%s#x9rTaouL776S-9zB?|FO0<Y&<aRS
zTqg*YGAY5$_{&+M^?Q+d>I6fkT>zbA=&;2#YpUeUuv6Dh-Lmlo-|F|$Iy9v%W$E_y
ziw&W(w-)-p9Xvl&+2K`x7IVJ|-)4Ndx<Y!^$NVHRZwOssdJn5{>@ntv+B{VHS{rep
zhcC*&P6VWw*J*~rPaRbXm`Bd*35fbNe3KgrJuQQ*Z&ft2Jqu*m4t)k&aUDv_@0-}u
zTraF}&CSc<Zb_`8Iy=i+*N~v8VUs^MGtV9Famb&-%ugR;D5~!vuCD*`;IQ+DcZce)
zlrTt%o%S8X`drfPQb7s#ejaKhB4B#C{-MVcF0rPM)uN}3;7%`47U<}RA9pe0UnZbT
z3GfYUrw#&8jLe<MJZZoKe!$Di>+R^G-}ztXTKq-g%R{yJFTv@9&i`s>Tuxihwb*-r
zb{J+0?$k-`32pVfeh)#GP+y%Rq)71};^`3+v_GLZ@RW4THK(37*23vG|DEiCdq(BF
z-RzDS-~7nBw364ZRNs}tRgjAbEA^M>VMAjGEh%0TCx?2%6y?i?*;~o2jZ+d+V;H+9
z{XnGSC>09gx3c4{bQP=u{^RTC`|B$HyuaVzA|ZT3!`x?!)ErB<huh~6;&rJ9^Psvv
z&lf_6q(HVidnGHTwqG0Si6dOe=<Z59%P{Q+O1cqy!D1TFnXc&1G#NH;UCV|)<V8;K
zaVocP8rqBw@<@FnNy|E(Z!fxs**@DJJ)n44P=<LH-#(rOuOdE7ue1tX?Pw{y`bzf;
zXNy0u^eO@mhyNTI0NV0R1|rdEgf_qGLE4DAOhpK-4Lmo`Ozr~8wMvj*+^z8i#QrF9
z53ooA<}fnRmr@d*E$3jrZFnvG=_NJ@n)Mc=^JJ;f2N_uUxn~&0j=l=we;a+QzH$I6
z8UXQ&cSo{&eKiBblt7&fnt(BwJS=opEj-}hcF^cr{RZd>{$XC@=2GVUTZMz%PB$I~
z5>N|KV0@Ph<1B}*7Qt#bOO@^Osq)(QlNCH-GwYa}yoR36Gfa1aTQZv)ikRp$ZRRQy
zBrva_vG<=k|2yZ!=qAu}7+!<i&V|n~%B<`zz|tAL(kStrm)0=}wj5BfL{a%q=ZsRf
z?<c{|+D&qB$a!y4v$;}k#IlTNCZD(AnfgM7Z@ns;XXS-g;(&b_MT=6&qDA00+0|-`
zq|5rntIb^i{{T?dzh8UT{+NDmM4J7m4GK~N^P{0%`GZ3NT2iAgyQs~ZeTMoXd#GqM
zV>dT*b?w*4XXoCx=59?OvvRhOWB_Q%;AF)V#Qy#o?L#QrtE0FrkkerrjCWrgahaej
zDr+8AL3qh=n+L*+LQ-Yxj1&MARd>0?bp$rra|nacKLT{mcab^_VaiTaam!pB(6^@%
zZIT~W3g^1mu{`IDrmvz6`Rwvm2A^-WVqG_^)k!-i(Fy6z8VPY8MAbntT&<Pov4?G<
zAMe=K^W^6(Vv-<V#C9vEYvZ}Hb@q)|?|YK~N7@|4#zsK{dPL=i)r$R@6EiiSHc>=M
zQL?Lks=kt^^7N;}OM&-m+&-1W-v=yhd0gjO$OC*_qHEkW1(fx_aZ#MC66>yAP5=<6
z9jx}paoA}WBFkShG#G%uA73j%h3dExw!kk59e07BkNn~1Khsa&#b747<17iqP9wa&
z>;b<7fyPs+I{4}GF!_$TJMUV%S4zjjA@h%&;iM84cd$5K09afJ)zQCby6X!c=na1L
zCL0n_oc0`w;zcA3ge)~W2noD$sfu{Z5dvdN=0xjre_8@Wf&vs7K#AV_WOKSWYKyG6
z94dt32}zd={tX(^vTRt7g0Ar%9~VpmJPmY~BK3)}c;;i2rM&3ux2<$kj0Z>kYfM_y
zF{WK`?y=u70?0^^^R?f1x26a|5%Qga_jpOhPM^>HnrerKi8TlH`YMskl)SgS50y>m
zlBv-3u$qvRYi3FWcPq9<oA@7ZUQf*}<)TcfXT6`X)}T>3?_U}BDZI|yY!&_+>J+GU
z-dfVw;Tv~}+${@H6!UtNhuy-E|B@KxS>`&}ZwtJ4d5ndx^i2G{<|&p|YRG9nX4TU?
z5S$^P7%2$o3Le7OHhW0MfJm9S4f<)DN*H3*V=$z$?c%u-j|+0U*R3pueyG#oYeW?;
zbr#xs9SMku$Xe%A8Tt!(?Le|OFh6_{{yzZo`-ir(159yzJ(F$H&gf{|g~wB(QU}OU
z_9~9JZX$)4ZrO)%4(nNc-8W3nb68FaEppoxh@GKz5~Y0|lNYN0l5<2Ewr+Sa;vz>D
z7#{eJEJf2)wCc9Zwu44sjDX{P=n({}G$EnWnHlFchS!#Q>+*A2jw^wgAD({!bK~J*
zHmXa#(*B38Htdme?V}=My43O_;O0H%H`L3l(T~-~@imqX>c1T9Je1n=`rMmWjzr!g
ziELQQ5h}vf)AymDFKiVtsigkk^qIN?F9c+Z-uVMYB%Zi?{)1G?4~bY2LTw<ohEMqY
z<3J`58vzH#_<SEj+`~+Cxr9M%25tMX!UI;I`G>Zenr=A~6WidBkDvzvVx`s#NY#Ic
zRXS$Lp>i)pS?p;BXmZ|>+oURc5OY84JexioX5Q-&(H*?34koJAT~pH^odEfIDU(DR
zSQ>j5c8J_YZOy<UA6ppa4A|sXZLm$;L@A{}i~#1tGcYJ8CI^x!D8g)ogL&$!eu2$u
zkZa;~3d*ix+z+~hd0}wRP`*;#y^OOlU=|@;47RB4^R@Bz{mGi=Dakx{W$SukjwxRm
zW5Fc3INYjTVXzH{wRB${8G3(QL&8j<lkES7Y-4;s;KZO93{Wp9wTCfh6P1zN=<?~8
z(%ivJaU|ktY1KnwX$emVK+<FFbz2<U05<vBz367@+ZP~VP*sya+n&K*&`Tr1Oip3Y
zS*W<*Y5T1~L_9_}mUXN8q*eBRrz11pMkCI|0Q&C%qXNs|&Ln{REGDw9rNnH({62>l
zw@bW>?f9{Jy6g58qkgRN7c?MOsiMDg151ri*@)~N`dGwp@&=sA%|xWXVz9?$b%nVr
zy51<G>6iM!Ai=8Xn@eaC#3xApzbiyA%HDMI$ZSSz_w$TbYw-hTR5@QAgvtWZc(VA(
zqpj%5>CLL$=dFh?)j!;;Z5xiBZ*mpb@9e}M-5aEOU1q57mJWPcBlp?F_4mkHZuBwW
zGW^F&b)n6}bP}0?<br=qvO{hdJg=wdUB6O3T@5Hy>SuWbQ~d)jJ?07G0)J!pVlScb
zZ<x!DD_Z$H1pvzn9&$(qS#cHhe{+&biWoSo#*>T^^ui#wi6t_fA6?ue3kfk3K=_1D
zTd`vOvY<kla8&Wk7eW6${DYTx1_n)aVdmJ6dfWfOLNrur#{N#iRRi&Oh!GCI&qJL{
zY3ZDEyj}dCXwB1zHRtjmYPRR53DWFs)nK5e-$UOZelg(8-ka5^7u(q4+#xCD(x*|}
zNQ<HW=jhvI86LSlGo#1Rcjm_{o1XKu!+=ro^*~eSPa$mw<j(3nx|c+7Vn5o8fM@gq
z=%ribgMB_ptt?3y)QCNF*w?v?s5ERCf5Aqsj_qQdm&o~xBTvRzwcW9hWY5w3Rbiop
z=8zJbXt!R?L{}C$Ruc!+iXt9%D<YFFer`xHWp(?K_lsRJzNJ##3|$ZROQ19*%X0^q
zDW*I=rQUBm9X%L5u!C@dqCmdn;@p@<%;NkviIyr=yk9s_Y;jyJXS@h)?N+p6)H4B>
zgG>NV4Ut`u)q+7G^$#ynihomhHh>j0|6Dw2!7!<ql{qt6X$9oyn5YRjRqP-$-cBLN
z(kX)i3P8SaUS>I#8}m2|LR{CA+$4%1)<3rTulgPQZJM%fb`p>g`PhG^*2xiz)anh=
zpTtT9BYAn?zugI020ng&qoAW>I|4h;n)f8mcWKG`?w2||XKe%Sl@9uRPu01|3?Q>@
zETBq{@5Te{qP9s0;T~qSZ#b~_%q*0_y8S~t!1<}-=vDKc@lJ?qN7N%*{TPwm`(P*Q
znew9zDUtV~XdTv3FSXX*ByIg0t#3iJICQ95t39q%1+b`z=9ZS_UHisAjWy4fx70hw
z?`d1tiQN=CGcU-mxu@7n@B;7VsbcPRbq=}^FCzZJhd>I@K1>emY!ghsy_XZUq3T~3
z9L~gWifwEHUmCS+;$o(!-!k?RBgaR2_iN2aoH7yzXleb0#VXr^Etd<~JMOx@Kt>-3
z(IzZyiAag%nfk%h0uPPI>YK8S=eNBq*Vg7-NT+Auh-M@V?wp=2)%vY)j_f7(SKGa4
zRX8^|-$|D!(wEK+VwT>tMs~XtUU&^v6&CFN5>;#XCa6h;e%CL|rpRrd(sF?ozfv~?
z9dX5^Zp{^4!LaBoM|U*$8d0{+-|6?^n_>`ga?H#caXUDFU3%2_>V{6#{VazW$V^ry
zpw>t(xlr{YRORXj@03;H7;1APxHlYj$$J|7b8J~w?u=-#e+3nG)ol*1R!`A2c3fbO
zUQ{qc6cwrMsrfjXUaP2(>{Yv3D~v7kynSk;sNWrizt+nf5oy1!{WH+8bN}9Z*DFTh
zOT*Q1jd&)S9kjorEWN;6BqU8NSp<N2Z298d{E$IA-CsipR>B`y!L^xpX0*iOZXbTk
z*8V*uivsPnOM{~RJL|lcIhPQ&le;sBjMp6;#(kenv6&*!3yw<woss*4o@EZS-z?gE
zOH|Gj?Bl0XB#CnKMryhf?9up157XW8{H7+~dk~Fup;B}D;UOFPMV@CQq~VDF+Ruo!
z>ov3itNgI}-`}l2l!J?VCFu4S+jyyXrntpBi%%sD(v0}|Vq}8gaz(E_w^JAGXr@d=
z(-3>5s%#`4wl*dP(;<?s&h|gvDL(29);qIo@h~KSiMk){CpP<DB4=~(t=j*NTX93r
zIwUyZt9P>c;oB868l(c;a!Pg&qO>D0RRPamEF7*p%$M+Ep4Me4@9S~W`=*w~rRB2X
zET2+MVykw)<zT?C<YB8wuAt$PRGJ|jHe8K)owBt^D*tkP+9mgD^t)bUTNN}rPtCP?
z^A~YhZ5)L3QY_%<=`Ivf2y40s5~QHSeZYU(wBUMxJf-w336ifCFx{tu#>@W<b4TpE
zLov`HFHGT%iZF*?XB9?5evH5wS{x~O3x0Zo^ak4R0b9tp&R_l7B7tzRHW%MV&bBnu
zHz1e~A$I<c(^bh{2;}7EWC@|pt@FkQS=;XNVnt2d(LUq(os-l|cN8J}OFy$UIUJNh
z$XySWDh{Y^X0FmudRtyL@(Iv>y1kvq5;=+m2R-X7H$+snVJn*WPGF$p<=a@Vw4<z*
z1V`JT@00%K^(E_Sg413uHTECe5MH-Rp48e^qld*QHD?bcHDOrWD}0-h(3MV0DyHLi
zaWNr9RS^2-X@dPjQs6gAl_87k4s80Pns4^mxUx*lfSU1_O`gJUpt4tEz)$w@@M471
zGRxyC{_2&h{hn=7!d?KuZDj5U@2`7_9oCvT3=>^gG2`wm$`85H=y&~JXo_78!xl|7
z#axyQG=Uaj9%2_G`)tB@Yr5>tDu%<fKVJC$$HeaHBb2lMJw6$t`8*0v=Sd4)@d5VQ
zx7nc<SUr2Ar-y0%PftCMLQYJ)em+sp*Sr<{E{8E4*MM;;5a;RlJxFd!CSV7%vbq59
z))3hSkHFI$@F-?N4*E`h9uqQG>pft?c=g-Zct$wR{kfm7zaVQGvLYIV<t;rIeUg81
z3qb?fPWDBTY+!*x&Qw2?nH#059g*-khkw4dZh2R`mPoTD_lv;NW;>b+-SlzRqvZ7U
z+j=xLPEg<2L(yF(co9@MoW4;2;^x}GG<vD>YI$LuTxjLFmj90xX_0tBQ#}y0yojgT
z%4ja-PDWf$3n4^-fnFdA)9N}=&2QZ=3J3XbDoQ?Q)$_+<2nQsaRx~D4n*Fdn%7QD&
z_KIyWqGNRK$1rnBru2j15+N?EEbOvlxK>n*P(-x{YGRIv;I}ckh?^u7_p+X1pQ5pb
z6pu?=g+jXq+2Un6BZs;@&>j%&l73a2jc8^D*i(HS6ZSW@UO%13a={!$Vke%z_;M$D
zjtuYIU^jwFS7HFDy7KQp4bSqM+b$t0b0Z_bGbSK=%^lkS1PM|K3|7Q%%gno&I1A|K
z$v|`5>c<VW`?GdG{=?giCE53^>lvce1zQV8NQA+42w^lh6v<3@+i<r|?C1gR+A(YZ
z`C;^p)W)HW@z?tzzM9Mrdb{DJVnpFYjdR!zmt8P&XqrPSMZK`}D&8}nKK@2G>M8VT
z__s`Kx<i|LBU<wzu1vqx`BFpuoK~M}2jHG1oqqnJ=V#qvY=@MYo~s>w5*ss7z3ZK$
zEHe;xv6ciNC#^Ja4huE~poZ9*1!8U~SPVb4wJNM^iDJQF`1AMrrOJ(1(qq2oA(@@^
zMy$;hLkwN*Q~uvXwHW16cJ2$IaonMTzZ&Krq6dSlEZ<n62f03uwV%5c+|riY_HzrX
z{>Z}-`nIu9{c=}h<?u<1;&)&wu;%_WS#<%>WgZ}%gE_il{^?gQf~oz4HYQ*$8)2QJ
zi^BrGL4dyx2a3XS#00t0v(y2v|4>~5laS9?f^8lSRBllpVQAQT)}O#&EdMfwZ$K7Q
zb5SXa0yc?PkHJJU2G>7B5#~Rn1U(bK=pH^4!?e)bB))0sTroF#&Ysr&M1@Qt0F%6S
z$GyvW%`fKSda_)fYt~oz>uuu2aaa=_%g@X=MLIFteP*fwx$k?tXA?f5N$*8W>1c`n
zjV@|Bjc`l-sV?>|Y-&GCyV%2Et@Mx~0V>(Xp;$59#2ouodE}=#r=?<N=qJ`47!_>s
zAct{;M8&bo08-CAL8LU~D!2ikaGgwIE+4lU`7LtQ*;A)xj@_YIszN;NvSt?ZXXjh~
zMBlc>(VOTx9f*1Tn#P1x#md|icHTu3Bf77MZ%Hg7uvlQCw<VL!@Zsbbo@&^q%AsX`
z=w#Uu^5A(doFW9kT(`Q;eq;+$P<+*=^SsRzc!C12%|5p|zk&$*>FPvRW?w>c{kqjy
zZ<BQ|{K3!Off6AC6i6>rz?$k8WBEOZXWAW%P5OdO8DXd|v=xhz$O?-45USw|0ZJ=7
za*?L5{Fb@7IUi#JW>$N!QFsBy3L|CGs+H_a%5AcOyX;PI9~Maae83g0!A~CBk^|KA
zAEto~&SQVVVyD21+BAIdx57U*e}`?!g9=TmLYb}se?Y;L@A&z{ecNK_sw7H9UN4BA
zu824Ij<HT?T3c&fXMv4^NQiiru<Vl5{5Y}ZXy`+|%aw1MTjS%08jnL}o(BmtzdG9o
z>>MOPC20GLHZ3|S9g>(s6Pwtjp5W&ia1RTaOJ0|1J%jdt)t7pg$s5wd$-2kn_q9}c
zd~kEhA3yCYU|;8dVv#l6RFaz}js0k)5xwZMovXn1Jo3cleJnH4o0Z#mt*)C|!VdjO
z`pi)xi29XX^Cb|01R_voKsphrFRnFadjUo1Bsb!XGyt!18QE7=`@Urb_NKwP41S_C
zRhNML0FXx^0J?{?)ZAcHyemvY*7znDtMi;^_ortOAr;*FGV||Gu)UCE^-EzN8h=ph
z1<d;p!;kKj!Bi2PAtnXn*Mt2p3F1w~V)$=_9d>m)n?-7frU{&z3+m$$2llo_yd5I?
zE7e>EDnjcOF%nlq@hc8&h@r8<S}zk1PMeY=NqH^bHshMWnqrPpv=}$7j)7`Y5)e*i
z>wp`?%+?xj(}FOG(?9Q1c;DwE>g~;Z3-6Iz`tP++bm{8=6Lbra5L2Q!4!)lE?PI<O
zLBkd<hC8&oo9a?%xIT1nSPi9dt|woQ%oEN1GL>=lT}giVaaIZIYm=sxD^)N5%hN{c
z&@ny>bDI@5Y1ezfO3JOPyF=czg^;n&pLYeCa`oxCNG1XfEhN<O1d8jvwe74r?*h|M
zi>q;V#I`;lM4{i}J~^oj=?>h4QAB}U-n`;xJmDvByu*WnK~!9X{kNt;z|#T9hCE2z
zk3||Sw=F|PWe-Rg24+cl2Fl{s1$Kaskqhn@+p<$uw6CrxSbT?NL6~Ta3;66O$lfqy
zH3;U82@*K0`;I(yRGOyB&Hy@CU%BKBfS1e;6yNqx<BpF6)kjg^lBugp7z|qN@C8c}
zageUFfaC+AV6U2krZ#*S`4{#J>oV$K3Nn283}%i-$z~w|uG%cjclb`FaX3}8nZ)9U
zL!ZR;8=EQpywh~Ai4Rnbz4!6w{-aWydgYP_M}KyNuMct%-+h^n7g-+Z0*FHP&Sg5Y
zH5U_eGBuxit_iG}H<KN8>S_rOCB>Cc>{T+d9H-tB@*UD~qV`3zK`)wZC2T9andsA(
z9n+a`x%l319Hu#aSkW=>*?G{7ogo^t?BF{5abTh_k_k*W*m~?hOz)1rUmYyXrTN}H
zkz)G=UWi48fME4Zq89BJ$Mo2DVi*`#Qhx5AbNJqW|DSKiFbFP|Sz$uXC7s#tfLayK
zsSooN(T5-r+&UQlQO&Jrx~53}@!4mPjCLjd9~5@301(pC^{W7$?sK{-0s(*IG{VQX
zS?Dd-<NDi=;;l!=5t1IPpbsjQi0D0L8%olsQp9CbeZluHfem)9HUXylGUzikm)WJY
zw0f7Yu-;^oRJs_q?6TFXQaIOdjoN_W{)zH62AlXo-20K6ckscU{r#%w3{s)b<|o4t
zD9Oih0d%^GPR238$$~_4$kWIWAiMt8jwSfR%TWz!g>1dN;y(9p!N23)M|x{Cw}^@u
zlWuVNs&IPUFEgiQ4ts`;zE}6=I}C7st(SaSq#~;lT2f*gdSfkropS0l*;t6^Igx7?
zpnOaRsyBgBL+^!mPgGpUSHYn}fSeGC!$Px>>NVzAP%@DX>2;5azkLuu$6>^QLd~%r
znAmtJmZ^<^s9XTOioE!!Ai=Rb>8>Npns5JXv_X;_S53H`&rrOP<3#O(_+S6wv|gCm
z6!SwE*pK8j^W)vp^78lL;T<-^j$@dut!@6EA1m5C5IdM=&pGxHgv-cSvGpLD{@tug
zQ#Icv<7Wu1USKG4jj#@?u9b%R?W=vE*<(iTbZ$0f%CAKYn(G+`#wqQ;El60;5<s#7
zvfT8zYEB*^+gJn1DtRiBYT7)(gHcd561?fbaHq~;z?S#uzXO&)%ujn@yq+L_)%OLf
zrHq3);!3^;kXm=U@eFRPxT&&k$ex+6F3&6DUx@<LQCj3BejCh?M=zEMGmh3KVp!@J
z0Ld7&_UR)#(K7{5sN1h^yy?aGa77Qj(Sic(ijh<MStaP>){M}a<+A85(75W2)Ie8O
zj2BOz=Vc>O54xqaua%d;-?~8S?Rnmba1ZXdnt@r#$@2t;PBbaMOm}taABc$sB|-ou
zb8q-PmCIt>QTX{E21;U)l}w=@@E>o`@9y`qw@AfdGM-0&S+hxWLE;1o?XI5U&?l9Y
zhZLC)a&&!uL<fSJz_Ze1m>(%9+vg^Yh++#ouN@yP4Qk1T!y#^pPR?VUzsQR_`%+i^
zs=jGw^2yF7p4Fj<p?mjFCl=cUZ!<1Ct28TplKiDffa-Z@f_-T|6`r8-peC-Ph=zVw
zYlWTeQK&RgqVVL{4I0``rXQcd!+n@inhM9uXt`KBkbLF72KympIoL)vPa_>X&S^}j
zJ&dA^1CT_kpB+on;-YEHmpNt3aO~4V8xH8>?6Qizqw;a(Qooyf6-~aVcV;+9fENWH
zX!Guk$J<+dz1F)@h~%B!p1J#G-V^VcqTXCS+X)xWJo#k$kH;m0*$}sZs<3rJ3|ND&
zwj+rnv_GvdV?iHl=6*Ckl7S5V_rT>odQOtS?+=JuDG(K@BZ2a>Cc~W*xWkA@4BW6`
zLvj${b3iC}T;H9WvYQ<y{EzNcM#1-dxH>?K%<q9F<bkTg$PULp`U|nfwrs;btL|wC
zqJclF%WGFxJ>ZS}g=YK8F*7rJBLz<oc;DHDR|@2FADUY46M$e9dmQHDqsdS+an}MB
z|D@AL85_N;NS5c?uk+*jz)k+GqO60yd<ObFiE~(3!=r?fuI|t_zf)&R7khe|6tktp
z1cl|7M#I>PaOOqz)7Rss`9d1hE-g}Ya!bysOGrfH*mL;b?N+n`e1AU618JOz^Pesj
zubtOw?8>e~Rz6k)voLeFkPQ&!wA5f#Q=Z7i2W@leACkOAMT_oM@Av)dk$OnPHSD7P
zYgPS0)u=-Am4Mhc%GdU-5|ZyuG%wB02`poAFWoa)8JOSN62<&|qgDSQw(=ksbH1h9
zA_I7N0Av~>!aJT)4Ar1p%KWd_semjiSMyyWHyDNA=lkpdlDF>eL0Sdw2W0S5`ZF|4
zX_W3)Mm9&rd%&x1pct=g@x&ks4mmtdayhdmCfin06IuqIWnyCI;ZRupNmu>0|Aor?
z?BiwhQ61!P3UD4OgTKQ7iSB4elJnW;d$}JlIS=*sOSOn<50m}_m~HmPvAL}AN1Ad|
zmx=t+<y_D^TM~JpTAk3T8jKh8*g@!iP1-MxX3Ge;!wGYi)((V954nq~QRZ@GRG)^;
zYY&pw8H$~_eb}&h(wu=eJoF_m{kRRjmULK4I7f572n+dPle*pb-#rgbAkiaNl@atE
zDw7*El?$}}8LUJLU<W0;+l8@)kad+hLR@*9r!NB#?_3oLgLC!2Evt4MXU1tpOnqXy
zc;p@z7J@FxHTz0?vK(QFYkjizom#ClUr7?}>iaRr78&7W%!cGnD*83V-t$sfw>d93
zXJ(lqAUmTko{&Zee}ncq*-WJu_gmmK{~D@j5I@<UXP6mY=&N3G9s;&Xwu!oAJCGvK
z>MNtWj1%F4@{>^HTtfgz*O|InNNYH9p5ik9+JaP89?$Yg5I{b+7#^!jgMVg(guM9^
z??{-VN7}*F016uz*#G-eX+lyTRhx4N`}rIZ{OwP=01!<R`aY5qnLkC@8L16>0SZ$N
zPre^c8<~p&S$9Nf*iT!1lT8*G&v}odZ}L&5va{9b?tA-s-|@<D32uX8Gizd3@ZkmX
z91SkZd)=SI3%kta*_<k*Hf(&I+2nT4#AVDl?-}?lF6CcIU$#mK$QL4m4W@Q?3n3N%
z|F+An7F2fpa$j(G`9<|j0%o+otP%?_7ijWR{e&&}n?6agXz>lSmCdw)QhAS9eU&Va
zd$(eIoy>e#&kTc}q;iCNLco`^9(^Vk#S2DlIvAe4LGkY0c6uE$ONn!6j$f?fBFmd~
zRm^l*#fMQOOV^yoN(MMAfL{F*GQ2W^FeFxZ2ktY7J;wl1VRB%gUJXy`wSjB;HQy_+
zA{h97*I!pr81Z_^2?@p86=VXX7cuXFqwms^h-@XL?CFD?Bcn+!O7#616+c1|5Zplk
z9mG&lU>dMuANc1$$1+%ug3;${QaZ%v?B_es-<z8Tv1hN=_AUuC2Bb%VT1S!dBmq#4
z=N`{sK6awhIFaA0KVe**L%81pds<`|I*-*UhdUF4&gVh68A=~~5BcXfYq~L~@z@SO
z_$2UFqg&%%{Plc)udcfCtj3gS&F8+4c3oX4(<<=<L}E&97cQNy<$8GaO!5n2Lj$FI
z82Bz&pp<Dqi@sqN!`OA+h?&QBp^y13X<5VPS2KyK;d2CgC376F2e-^~j8fqW{HdC_
zfo72wc57R1sr0?P$&71lM^Cqw6TO(%Oc`2F96hqUfxf#M3w;i>Q#<OHGCELU$V!9B
z@-bYtp}MU}Z^hxs_A6|<b+t)az0TjZd94FczjnEHU^e|@jmGoJObK%PLkQ8VOi$oq
zHv0@SNFEY*dgwF#3;6ij85;O<9VsGh7aJ^$MH<LvxxBW3tA%^m{bStS+Dux<$o40s
zR_e!e(q$k#EsK{phf>&GoP<q}T5%o)ga;x8iz4n!4e5|mtRcNLZ$Vc7|3;u#k6QLC
zEjxyujK)>AU7#~$3HB-_VKE8|DZcGpAB<8LN43<$<{-MX%E5^mI`~Q)q!Ymb$}TZy
z<=E@y&O7pv`YMxB!xC3oBi-^%Tvq@6WpKxQ_}e5c1&jcnUx(+_2O~sXs{HgTT6-7#
zgeMAXBb>z|yHm)pJ59vP;Cdv(8iRrbs~M57?AYV1YNGBHq+`YKzeVoZi+tV0qE<f#
zFI?u0p7_btj8aP!G;(v`+frj!crGXRH@KL)xlCAkUUX43uA?wm-~KcGj|O`_&Boke
zZ3*?)5vsQ{JS0Gpz)a%rHEgXmNHOll6Jsc&NYPYF{@!}cixt?Nz8HX1Ny)DvMnY!}
zcBnIO9B;>YeXjTXe*5BvKQ$k%zUs^_B5>kl4*0A-{4D}tsFSm?*|W5Hv{_vBI3CeX
z+ihRU1P;F3i}evMp#oIqNK?x-H!;`i)iJiOFLEx!Zn-d>CNWk9Q$+U_I^Lrx4~Sr)
zX~(KUpHy#A4LFTq5pQ(C-SSaI$q$&F8|Jsq(3ji2NHjU#yh^>0HXlP~(ipz7`DWCl
zC(ZQ58T_V<U+6;~=8p=nFB_ng;@z$7$24K65S2~#HkV$FPTeNw+jK{Z8WX1W=Z0kQ
z@#oPztAZkoEm<h}1k`f<+#PJFWYt!=uT4`PZ5h-k7)J|+lAO6z@#=%sm4ilTk8er-
z8%?S8Y<Eay3Q}M>m$QO$SVZ{QJLQZ@88<eUknF7(xj*NuatVY?O=C}ga$K*3Z2EgJ
z?!Qpp?PmJuSY|X`4d*PcjCKp!5h<!S8kZ==9UL}g3^vD1RX>?hvin<8=cX<-CYDK6
zwJQX;{5Cuti+Kk`kzp3rZ<taX97@1{@P<b%@?D=Tjv$AKf>1+X6RAUiU>&Ti{aicT
zh(&#a@U{i@qQ`Ba2pD}kc`iB*OdklAmt|bfH8fI9j@#B=0+E}LVG7al3*bBF3I}%j
zZ}03k27k11`c(YxqSdBy*f)e?zDRSpdWR~_t^g^@E1)VfiC(z{fPGJ!ri5<!>o#WR
z$%&oVzn+m(T(ILv|9xuMAGS$OhJ;xg@OOQT3QXc}^(<h)cpZT$YQnD1m73_Q<I5!<
zY4S5I?jnjPQbn^ySm(_;8I<2U>wq$tu#=PyUkHR@%UfdJd^Dpq%FTyc;JaASOuL)+
zhg?SeK37s^@i6)0uP|u#v*TdPt?0u-zS9h`>XeH*u8EUpp7|y@Qe3}Jg*gn1>0T_J
zj$~lF)5kKnBt4UEkg7h57P^N!aMW8G5;P@#>6g7KLAOR<uOaAT(&w%C=8crUI!8*A
zDOk}JlX7Vxb2#mpW#n|#1=vOg5e;NoP%`{oMxKF!Vw%`7%TC5?aqa#U!$^N79W)C%
zp3-}+G3Ayr?u~`!i8j9G`(_sfs{-XL+=C9*RA3@w;rH6pZX^<p(iPNe<U%eer-y0Z
z;F1P_$KlzK&|MR>95bA=#5=?GiA1CvZf6GRPpvM(NV|Udf#jT45TJSvK)K(+*&Eg3
zrPUEsZ1%lQmq@$EMpxi7rHYL~7?QK1Neb+nQWm`r+GZ4YS5<?e{<a5c(>T=;2AoZG
z+^%{d2Fi6C;aDJNG$S04%sHkwvYKq<t}t>Zov_9yPNWjAf2VBd{-qK0o`9IpG2AvC
zt>3mi;SSaIVVzJeq-8IWwTi4Qc;=8~#EgK`GQ}t2WO}D{{=ttb=KIMefjo=fDZ!b%
zz+!IN!$f`*P6oKany<JH>uAb?47g*-SgXyjVEUU7qo80N@biecYnv~9uZcxMA-pSR
z`*8-z*hnYn5!1?#q>Pk7&lRc;km_vOrtr?;4~^3(F2sXVe}>oN-DUZCDjGzgXnKEB
z$38m!IZ5D_6-bJ3&FtGsJQ~{nxm!2mH-9YAt1M~3N<Beg?JJk}dhMC67g^{G@woDE
z8m}N-Z^&BNrAEDzC!%aIb8m-VFyr)fxK3_8)Yx>%kVj?*4y&$~JcVz{3r+_<wa;wF
zKJ$+C`O>*y%RmO&-4K$LWE*mqc+OZf29%asSPF`qn1_#zJu)e2qyxp5FzZ|6Gck&H
z_Ygsyg|MCtWr2~*ksfvu`?Zb}F((4?C{!axGJHYS@D{Mq5ZX7Un=<!@t7&EA>|)1I
zw-uRBdwSc`Nw<p>FtTBckWQ7~QTt(xW~{b<dfuV;z!&(7nZ-8)v{Zamypm6<8}t{`
zZ)18vgE5JnH~LCv)p*LuN?}_~Gnb%HNMFt#mEp^l@W)=+LPC>IFEh|hpz*Q=E+~t{
zRguOx<_F5qQ1dK^4abl2O3vRc+#M`Fnyy4~F!t`xxPb(s8~-ERrISl!<l__L2+S!a
z=o46;Wg+7bN)6bs_owU`0t^W-bdt)0RfoIHdgmohwIrw$kmB;+=IrG`D~ad_W)*hA
za<7+;zouJM(w3C$(aQxbBY&Jot9)%MFiYMh+f3lg^n<-#V9e@s1N-OQ_Q9vQcfWUq
zR3l!g{C&OYJ|x8QE?#iIX6|`-PWdW3J+F`D7_ioi=&Xd_$^e<-B<wWwAVT285_A40
z#IM;MR7Hw1e`ahT;&2Hp9y0gj&bCZ6HhWB@%<V!Bkx9*Nr10!h3gqgL=A4m1H+B5B
zEIy<>G+-o!y%k9i-{nnK8p7BZz#K}-sBiJ9l~9bT8@_)fb4Bg^DVqvnKQZbusW!od
zJuw(K(>@}-cW?)H5awGM3_@N~@Eb+mu!XAkE*OGfyXwJj7;X`h*k4Th_WKE}Bs<iz
zK!w1lksaEUx&bl4G=CEd^>DQz8MRA#2Ff?wFk-v!E1d6j{3cBEm)<f-hRek1<YtqV
z5;VwO7YF6^Z<-I1OmYTVRw%G+6tl%=dqCzxI3@3q0#f&UGbr+ld2i}V_@rlyAL-5;
zJA5&;3$gkklVY})Tdu0pKg*KX!dMqy8<1aSZ-pwGxHY|;L+dn!(l(+Qo;ZxT$N5z@
zwNU+|8_yV+Vw!8ly!Ir&Y0<M1I|)id7i|4|mC0WKt4Pm6tw%P-)=xI=+3d0PWm6ex
zk%|nK3vl6``FsvnWh*YJ#1jEI8hiYH>2+JI#-O<7Gct+SLDdMtkpjsrft`;Dr?i`;
zk<5Hwd{-EeD73u168GjCg5(emQ%68w(EzOnA%voX`7c(BbCB+OZYsvMf{%YPdm{x2
z4~nLpi?dOxXd|LFHlcf;Jv>557I4|?KjDDjQhy@ZVQ>PEH^Lwzw9ZM=#2x;&f8;c_
z;n*q9x|@`TnCOWMzMk?8=L`!-nhnyPS>w~AbBsTqrV*C^ik-Wl&kf2RnSGA=Qsbq}
zj#Q0y&B#u59ltUf&9b%Mq|tEmV8C!G8`xsitFKtJl!TuLTK%3uNnyKaCtiHFrOy8s
z>nNxd;`j*%ixEf|>V+v&a0pRYIcEe#D%gHz-C%8-w&zr2#zxBgo_^3(cWtSVgX{3i
zj>amo2!qVWCA4is|6qg!O_uB#P@9)jF+F|bH9uGt7%vz)PT0*zH53zLBh;T3qqE#~
zb0mvYhgzEw3@%iJ(Ea{8rezfzS<nk#>T`;7+Tw1o#$l|C**RhpW4|^q^Iv!^`BBWp
z>Q%AmMy8>@Iiz=uyizPzDf8=9pmFrU<kssIgx6jpsI2MCejFoE{LALK!O*bn?x*N%
ze?kBcD6h<pr34Ap)!*}!XA2shL2||c`CWh;c;j#nxw8l?Kk9&FoPj!BSt3lj4lgIf
zZku~SNSKU<((7jJ4)R8h3{B9Bp!dk`a0S)KV>@W!W%I?;?b-k(@#nh%on!(~TxiNN
zwAuTf6W!}0CJO26AY{w}V%+cvkC#B44D3MJrDPpt7=WRuhdGwmsrivUvhPcZd@#s{
z3G5B-bQ>Pj>;Z{ybog<VV)++=q=t<Whh0A}3TA*s0+Z~cvoJ5JnrPP#tA`^332GQy
z1<h^h|6!vW3$IQvdnt*;=@~H6=t9u-c!p>RsMfkIs>t82uOXW$t;93YjaoHmo?Kb*
zXJUj_sj$+W?a#ZF)|?h(*pm*4RZXy{qD-YaotF0SUHXyw<wxsOibr8yw`&GdS9=En
zZ#CG$C^k(LLA<9ZjrLLlt=5b-@lwp<B!Q`-yG{U_Y{`k0yMAVDjen?Vm*<Q8E87ff
zH8u%Nj@q0fMlZ+Swid?W=aEUC=AxtJ3UXhpZ76NhdL}Z`CWT<|8YJ3xD`<pti;DJo
zcPI+gvgYoibKDw$$wKuI9oWD-)+kUy7f5LD<m0%z_8Bby2oAwV;t49ld*mXRHF|as
z#SxA)c(vIjw2Jis>EkFC?^S&Du%8#J9gB6#50cNTpkoP7tT{B)Y^w`=>9@@yFYC55
zOq7kO$?g$vOdI#e&t|%H+{nwx5tgHg=_2=MCcw0J0F5{8Ed|zEBC}wAXJ^_`@xn_}
zez0Mp!z)s<TRN!!pVuQM4|z@mBn35gi4dkQ*f4ZUlfV1|+c(J|29&nuaW`QBtl1VN
zO$lBN^Fm5TO7=ZlhfL4HsZVM7S)o~?J`sb(9h7D8A4m;@3d0ydz`FnvaPO{;_PH>(
z%CeL5y%;&69-;HXZO>~|shEA%t^VLIyPs2eo+3}&%;Kafdk+XMFkQ^6qWc!$AJWx4
zGL!PBd7ktx+;{KamOl_(8WwfFOdwKNutfhR8k09Dd1OcC(U8AH2xBnL2#fmpQ|?Eh
z!Rt_U#}XZ8ew8$te)`9<{Ep~oJILr-e>3ahj1?b}CWk#nUrOcW+&LiCJP0AB=c%Eu
zPzKuuS}$RIC}o+K{B7+|-|xK2?hKQr?jgl+i0U>VpMtP{*PpkkEvkfa=HW<4jVTQ_
zXmuNVivfy33EKNEjMceg>@w@UcyM2A^IV>Wj!^IodKf!D20r_TnEEE4M_xR3J{J8l
zR&AqzeMEMQo!9l0q5cTFpjtBln*tTfj}{`iFgBvS)N#&Oj82?kVyKCZ3J0s!b}$dC
zc#9cWF<FW;P@~gw=nq+r^S#on(c@TgrY3$eQ!h%Ew#XPiW9Ml7mtz;g$+QG*wx@UH
zID}jMB}z0rX>^$NGr#oQ^2*E03H8vu5F2u{#mIjqQhI?-#31T8o=CIuimYog(aq#e
zY^#Oyg`+(F?9RdAY$apC!sBl`z31JBt*_IX0%)J`-zZ6~(6BQ3bIN%N`oAnOui&Jq
zA=dVxTpGVp`eL7eSVd$G2u;YU68GJXh;k_+BTZCLjx(4hr+Row)r)zdw_gdYgX!yb
zbmu6&c7V}?*RF?kTHPo|t*4__<}F)|Xoc@k_Ow67glLSbJcZx%1KWiNcX4<S*r?&M
z?oy264UkQH4|z(O<5hq6#H2w^PX2wPJ6JvGMa+u;5>Q*wSkp@)*5f*=|Av_H-&ni{
zh(Ye4bvgq>!@Z-qN>gl57pf6;$P)ImTfl7NXO11$#VS~?ip8JR2m9K*RsW$(M-p7?
z_tO;JzL{OY?Ue8}vl-Y|uhJ2nx>zf^#=M@;x7JJB{-fe+YSyM;x8X*#lj8PeTNkD5
zAoF)Lr5#lmqQqO};FAnl?7?PQXplYjv3ax?gTg-(?rtYY*uxrfct9N~`3kBDOT+F8
z3B&2g(nHMoybmAC36nnxW7%dgct*GO`kh3RQlge?jU$6Z?bTY{yB*iv=KLh4+QWh=
z=REcbnNRtPmbg3(E5W%E*Ka;YYm$HBgw84M#Glt(&v0eWkdM%?RqQq|xQPI~F3@Q^
z2G65@pNDh2)4O)yKk>1C+y4Fu)w4MWBf^-ua}?%sxxhkxmf_60h6*S{kLqg3wr5T<
zr30Pbl0yZ7qteGMAb}pp{wfCZU*C<>dRh9u&;`k`Na%5Apiu{2@j)*6u7()0W$?uz
z&q4M^UOcW}$*!sWM)G=@+-1*Eq;HAdSbYox`yuu1K8x<>8#kQkIwKwK7-h;6SqU~M
zUz9XD53?E`%$YOn)yXkG`LyVP6H;k3SprJCX{MNUVv30W5(!v?q3yXJ-T645wsG92
zcx7ipj)a@lnXO0ZwZJa4kC2g}Vfw`QZTNCVllg%Y`x0Pl3i`Q)_|k7=bU5(jD)^W1
zF8<YE_!k;xEUZ0zOj&h!!XThfoxRc8bx~&>s?nCI9#Xa+j2QJUbTsL1+eanOSn&2_
zc%G{st->p_zBF^S<S-!9EtoYxtB>9PGn|ZSyqm508$|zHcx3Or1Xr3z5v8(z17>9l
z%%c@8&B;ndCsux9w%8*<+D}M>^n&CxY@5nU5pt2&>H~v7eWrhOPz0bL0Km?Dtnk%M
z$ltS7bOUq)G|Ct`TjOIFmCotPgF_<_X!ZEm=x8G_OvIS=19M6ykevdIbY!62BdH7d
zx!kB<F@jXvTA~m{o5#kcieD5z_)j`M8h&Ik0M7<oVonJgqh4fY#UeWv55oBmm`U(Z
zF?iR;j>hBTtG;)jCs#Dv>HCk&M;5%c)e+G|>y8A(`sNgNPZbrfSpA;e>qGgv%j|v@
z=ao4`Ij=f##Cy@`y{gx&amF6ug`Q&5$5bc((x{QjR$ZFEv0|6=u|B8l4_9wBmh5Yt
zj(t4(E-XtZladyEVV$@v-<=m}B1d?*$h<Is%r;0{sV!QbX!nn`%<@ejBgw1`-YAGm
zMN^`A=j~jZ&B^8&lLi&O^r-j*u_AvNU<~T&>1AABo&mxyfAb_a<#VKZ&=%6h<U&p9
zW|n-nz4hu#cUa$E56h*<+c`_!-2%0qH`SYm6q{3nW_x@e4ttA*^n0lEo**~VmHY61
zUgWcb^RS_q8yf4j9}B^G4tVcc8c72&<9o**YD$_Pk8@ZUX!vw@bQ~S;bah3J&Yyq4
zXlR5{MxpE}wEr|L>K)7f;)Y;{ma$ZpM#RA2VzRm#zn7)&XIEg5wF$;E5UH6C*3W-j
zxTQdp<V>s>!u{V$VYNg_Ic4UJB9eol<z$vUXAORQ9spWx^Oz1_!oF!<M+221kcODK
z%1s)KPRF(IU|YHAOT-8Y`r_)=WZh0gvtNYU*WrJ~X_a5ioxTRU<ON*OfLra;cTAYM
z8RBQXWdyUk%hrj))&JSDZezCKzRfDJjxwtJ*k1A}urbOIp2U#o{v%dyfJUNI%xBpU
zsx*+0o0F#b1H=Ej;7j?Ez36bQ8wREmv&|S*;M00YCr+>+BYaXaWltlmV10jp2IF>A
zj(dmbzD|N}u1}4ovCM52C)+~FP;-Am%J0QNUHWO;xd8MyoI`Dk`w}7~v^0hgXq5PP
zo+|aOOYS(GNLxUpksnI>J8O!6+pBdX)YCi7_4Km7##dKae}KmH-@IH}n=VxO<WD7J
z;r8*zSJ9_qJL%hhr+=Qj6sCzt!QEbbnH4y0l13Gzh67rfJa~hr^&bz`mRt6!Z7ud>
zlTf)s(ue8|E`OG6nr{Jx%iaOoz|i-;XW*chpq4+>jV<a=XQ8H(=N`ypJ%I+ft!?gJ
zWpYhm4>-<#s-4+uLN_|^IjOVwuXap@Qu*#1TDMg>VT(tc!H^~q{^yQW-wR0YkoH1n
zGC#vUANXB0ABsZZ(JE?=xh%6-Pizfc1XewIWmU`}cVrZ<00b1?;98B*w9BI*q_xv1
z9-Zh|I<iIYo?LGU`W`d-K8KO)^Grq#F*%onV^ueG2H}fhhGzL_M#c#_!A*$RzshN+
zf=;(Dl!3YLAE)2-JTvvP>>wmmWUsO|J_)-JL*PC~#H=|)x-|X9WbdBq97u3C_<=tj
z{;bcJU|>K|VX60P3HmIrjz{s=4|&4Umd2i6MwkXzQuFIP+6J#+`RmQpK3VB&&a|%s
z5`OcOVt#K8NWX}n!j6h0#ZaiM^QUd&C_$E*r3+%FQzxg1`VG%6ckG)`Gfw3rFSAvW
z=Ci|sv5&HkC8xKi->K^&62Pl(AGD|@_J^ilgH=1%IvGjTkxShpZ9JIU2J}ezhAx0O
z3%e6|&digC_Mb(hv?GbAm8hDxPRcytL=M*fmV({9tSq4yF#Y~>8?c(OYV%Dn*A&Ac
zVK?YKgdrWv>7S^&UH6q6?N>INqRBph%qry1L4qgQ$d@gg&`<$TeFw@Z{OFY4OYJ9I
zhG2Msj|ckG`DS$lJ%iY(m+MceU#WhfHRx5G3U#mD$1i~#L{YYcZZQ(ylf_=~5cyPq
zU#7je#B6ZB{Bij9Gz*Hs2}Uw5Mrc=u;gO?31w{BNUyJq~YP4UmI%+Hl9fSesac+m-
zM+cdNhlCS|ypK((5`9VZkiLa7d5=kWWzy4v?GgmTys_Uzr#xFh^C{wo=H1C*n;MX4
zmn_oq+q!UNAcyegyVv5%T(^7D9I2#bqJ$Vj9qH!ohZp0cUfdrX9!-ZOmKvYj$G2xV
z4fnY{uodWSoNzKZ)$>EiAdP#<tZ+5Ekej)$rt4&;agR1fQxj^;sKw#uuYL9&KDFGp
zd(9M2e<V=CxMMhd`4kBeIGcIlh3Xf`M-=7MnNxJN*bW~z`Wy+wQ~m>Vb@7l!g3f~j
zZnSsy8<EMHed{@|gs!=K4j&X(;`d@c07o%a6AY@Hr?PQCUdp`DlIze4?gtQKBlsU6
zx`cG-VnGXsMA{EyPbc3QZH!J?In3~c(GjF#vNHr|t|MJ$G&-B4ogeS_O0_Br^Vs+?
z;A$U01i1f4(phjt`Mq6uhVGIE3F+?cmJp;t8l}6W8B&py5|HlhE@`B@yOHjOnfLkq
z-)}H$&3eu`_r3SNHt0#dR$6{~^jCKd?lw&we$rJ1k~SNPrCb{@gRWEMAGFFR=_Rjx
z`oz6NzbKHZn1_EgLkAdc8YOj*WxPFBxKS*#lY&#y!Apg5v*KN?=|=ArUKA!fwNq|I
zJcJt)5&3%2FFE`KY4RD_dFkHL1MyS{f3MJDEK=T?k<Ziac;NXxGWcQ2*`MWX&C|F>
zq3@)jK^G}tEnBmQgBoZ0;<cx%tqS8_)muua+?nW>4aZPF+8|D@pe*cT!u*_@Oj%Wh
za5^)Mp<BP#5|^<`f*FR+s~%PL;#2i+ucMo?dhw{4<z;!&qM@JkD5E?Q11l?PFDViv
z1JJ`6-`+*YX_n%maSo=^^!z1Z;<?mSarOSPb8LBcX?Ynd16jSjzbgzmWcxq!>~}d%
zxR;{?ue#oO8ZY{>1gn_kfj2X4-?$gIVTXYpwO5@TuRpKuvinb`%BJ+e(PxLa*>L+)
zi{^fL?K{0dXSW~E$RdLRAXv_?PkK6IWc&TqvHmA>i63YgG+73{_%7k@?=dth23X%6
z*V_+k;0!?3FiQ57snRf0B1gU3u1li!nwlIF>*%T=UN=5g`*)13+jqs0c;}z&c{-Y>
zK6zH~6Ughqm<~shT52mC^CbX9(9<EuzCi-U-j!0nr(?2VvHz~d(Q>b+K4IBIS022)
zLaQzRaz1<5oKTKsN)VA4TsfyWk#%WerlUUwfVIABpn|=vtRj1k{_t^bX4$NEX6h8K
zebIekUP(b@T@Kka{QSj0azE}xj&fIoEfafi4$fW%fp2e$J%u8ESFA*J9UU2-Aqu{A
zt<D+&WrEh8F2$Pzju=TAk`5tcVX?bREsxSkPsC#OHMs}KVXhK}Q`fodc?F_FYRylu
zmm(AI1(<3ERzhWm>dg#rt|dTv0}^8Ir&C4F^>{NP(sdnAT@Lg5VGjT*W_b81Fd=PS
z5Lu=uUGSp(g`=+`F-GB$d9O_J*YBI$hmE`9N<Je4(8b>mP@-Vu<5V827rLb$9~jgh
zc!lE-DtdnXD%~LCXrB`R*xA~_Ad2~i5o5fJJ7wUhARC{I$H4-KnA>PlkI0xDqIV6G
zVdzA=7%D|@EkJdm&NpKxrZxh-V!H}|X^&^~bveV0?CODvVUxs0oZ)ER4V4WL^Ux=>
zQD20<pH0qL1#$GEJ9;VQ`>CN$Y@5lp!&cUr3^R<yW_u>V(C%AGBtRdI;TakZQ9kd`
zly8_<9jDUH=PAJshtbt~092qL<o?&ywyU<oc%aGoV19DZ@#`_v*oLDgn?K%KK%2|<
ziKgEg;ctskBi|+?*cVLo{S>czFdp&hqWxt<MTfV^#?DTpf)U9niVsEthg~XOaObQk
z$q#hpT8OlKC1F2sY;oHkjRL@~6em?Pk$Fw)eM2C(rxmr3t<jyuY%~BWuo+?o-uU1?
z8h0Ty0PBflVX1oHHcbi9#S+c>H3Vb(yjNUaJYC-fle>gOSQRXr^rz*BVjpqG)V-Y3
zvm5{$3fo$9bvr!-j}Z1g&$CrC`&gYOe^sxKd4l#O^T0la?fZWl{#}7f>`)>?d5~;f
z6v8p#R!StWZ>FK5G7j*$*aS=XJvejg^?Oyvd{E60mXn&Cvlvig8ssC6CIT19nim+^
z_$9rPsDHk<JpHW`YGRIlm*b)pf~UEsS}+rvYWjAja(;=6IxW(vjU>yv9L^SAAQ<Z4
z<mB5APdI}Wkf90})+Lj0GXh)jg7sdyB<3ggeOKOw)Wb`2>LeXpR}5QIp-BW^2{q|l
zZb)50@SePC;x}5wEv<{Eg`WKd+S$WK+u!l4Mv78SpIeGAl=G}&G@sH_OHwtWHERnH
zgM#6Lt(b=2do`}Okjr2o*^6~q`5<RmWNN&=b7Vpeo1!&tdtnArIjK6j2f$4dnV$s>
z{dd+u;(wcTeYEDsO+0KmsLavOBZY{^2DM6ZbU01F2ci6h7bjW2ok1Rk<$p1FD_U(~
zxF<J1F0%=#m^~JqkECj9c?n40gMsrVuv;;;Xedv}lfOLkwCA{RFaJk(efy)vh*TeW
zi3s4Gsb#2K4lz#eZCZTX(5#V_4n#ZAl)?bXR?Sj?7AlRdvw#WIGr9SxDz>-l$Wae1
zub;z{ZY3gV%&;!IL-#9FP*KuEOU?1EHeFkqUF7?sWYj`dnn0F?g*g+CkfRz>785*<
z+TXu?!!n){1KQ+;*O1o=<J~7E(leGrpdP|P_zNnsX$veHZ-i(v&3_Bu2RuN;8AC+^
zC?DZFWBu%ox2UQepRT$-J6jnUh>+c%n%wwKF(llgZD9R|Y1geh<XzaXnmqcKWA5G8
z@K*Q`ZMEc)Q}*+nP4zU+`C2Ql@|S(-3wTD<dDhjPz+(A%@OmAhv5kqa_`BPUvWDC@
z3AfkG&I=i*!*BeQ;4y6CTSZ*IiRQg~UZ>mZvDqGM<Ui{<_sy9TmD_lHPE^O1ePIke
z=KJ4XtosIiuuU#g25YN>%fJSM@53-U$`}nM<QO^wJecl6Y3*I5DW$?Ar4+O}g`f$<
zxvT#p1j*?i@#_Z1Pvg#jj86#Ip!E9rB-oz%Js?2(1AAd|6IEC&Kghb(?EuTz=fvpd
z@EuQ}kXLp5<(FF5v53CM_wDh!D2c<_L#y@p%dXl&F<<q!8g)LTEo9!>uErIso3UEg
ztJ?jGmZU_R4(cwgXE*Rg?7$08`gZytaw=ihNYgFUK;<;bJiKF~(m6OptDB6O3{i(}
z5!BXB>o?2{<xt7C4koZl2X_*U7t;5ai<YvTC&6$pY#Zx@{2WQ``^2>cwD{lRzYMuY
z-?;d;y!0MG6g6e6j&^xaqub&t?J$ygpet$Um>8CF>`|Y4+#PO~3et^^Np0FAk%CO|
zyF^)x!D`<n5TsWq5@Dyr-J)K|<>l#0f4R>1`wAkfI*2l-3<M}1I)S4RucBi63#brs
z$>Czi)NX|&n*P0+b@L4e<%imwdEd`dXD`?ka8=YFkLFFS4YCZwUDUC%U^5QvM;fj!
z4MwBQGc(f5=&<uoNONNnHdhkvq=PkbAUR9EN6g4P=s$Pz<NzScGoT(gne(50C7um5
zI2j8zq`0N)r^7Y2*DKm)0zpaP^fs?&)FkN0!6?Us#?Al*B{EiwQ`$Llhyr9!{OkQ`
z6KgmNmzMtX?{ro4_XcP!!y3r3c<wcX1H<N`8d$MtgDR&Yjy2UxT_oVRq(dPlBxVOj
zh$rjPn8uoI*{PdD(3pJmGE(%{owYz*QwN8!^Jn0yrd5v+W+O^$J6H^G?VTCpW`p(K
zt|%ODV8lIHT^G@w{hAqK>QwM>c+Hg<@o9A})qrZwU?stXAfDPWq$_u`zwa7E)tt?=
z-g4RFk4>{ksX9zq&zV_;3Xi3LD%_GSeqGCY-CZALs&XKKMXzUyT$p=>AfIF|uu{Cw
zLG3ITSc`X=)GJ@IqU+gS#*uP;unhE5r_=)6oBngvux@<`^xR1O-t}AW^}rPW`mc+I
zI0@CF>4AJq?TeFynXJr5$uEJ)W9eZscET#qCcHYaQlWxvF7KPAhYfbBB=6(tI=sx!
zo9VQVCNp^ov5ZHO;#}r=Ti6>pEU1b&U9sX%jKEX1xw;9pWu_8H!7Br0vlpPB8^GdS
z;&C_OhRgs2SgCa?tEw+;vjL>S3G99p)I@9;k`_gn#!L1K?V33Kh>C{LkJt3jm+(DP
z()bZ+nVo%C7z?p^ZA0}V>q`EJ2io26RWainkm>AS57oSu_fGw^DUkU)!C8%W9ZL*}
zFYvE8(L&7?hZOAFeXy{Ha4PM2Cyk5H?ceRKUn$aZhf$72Pf>w{u!(&$zWc+O?3?qG
zO|RyY<@4Cx5ChmUiwk!1r#Qx8TUc%`)Z)5R8TP52!=+}e6liRZr4*wpW=Pg&=xHRp
zFpLv`6^I`Sb&<ao93;5X-thSsahVSom8lA`j}a<}|Gp89RW3{NVo=R!1%Om<SJn3G
z^P<5k*u5q2C7rDSbftflt;R<?MqvLhV#RP9J$}nGZ~~eXf7*W!>XFDo?w7cPiE{lJ
zDAG~EK|l;Ob2w8|AM$ffe2|z`AL(VYFdz%|+<~<^SgdbX<_)hzsl~rc7&l$Q6MP2!
zi)dF3bpTDqg=)R<Lh5~PK-sDzd4OMnTQOhh0|0@$$l3-wA5PPe`^TLDTVFZ(F2X=C
zZ&gF87W_85-GXY@pt_}@)s<aoMUwsWkDXtP|H>;%G*lk-CFgH7;{LEM{3_}*%VqX#
ztC%Gx@tr97^QqY(X^MTJ=OWjJ=`MO@@7RAb{32FD#Z>H{Nl}85k5lu8FQx8AUEDZ~
zYD&3I*{tNfD~2GQ5)8ic9{;e&xJBeNNrYS(5k9c_E2TEH$--c(T)Xr%WIW@z1aX12
zi*Co>jC{<&Lkn$Ob<0;%koR=3#oqU-S!Aa9S6m30$1GAxyQ6$qwdkG7>Ed>1;M^Qf
zeZbMn#h)&o-6)6&CqF0c*4mW|d?Urv>vW5*PM3IR&4+W0VF__y6v7Hk00VKu-aBGg
z6M;E+a+F7t|6=HC1zwFURN;3+_a+(*=mEKgKZYBoiOw+RQMo+354-Z=ul&`Vj)|m^
zwN9meFmozoeAStz;*Z1ob_OjnOM3E0XtiF1185PBbFeFl4TMsBri>+<@k_@4^Ccb%
zJCif)uO~6^YQ^Fx_cRLXiDd(Ep=5_3gT~ngricI%*0sht@RjwwZWK+;_e~!qEAn_C
zk=CfpOIt7%$E?k4SWI*n{~hx7s<&r9J8(T}JWf|(vvVRvzPzj$koqHpIAQ^Fc+!aD
ztU=Tevw#`aMMGnj^R#a*>eqyy>{7nC>{6mQ!v6Q~C~vLZ!&(^io{qs#Za>V0BK28f
zovV-xs*6#39A+zVXKm*h`O=}jDR&<qpt!^@^kJtDw)e1DFDuNs;Kz@?VbqKjn<IH0
znq=&y>#^jMu4`$4fA>@xzT6w(b<|?=cB1BuwBpReVxh}~v$7=1KIRKYugoV~U^uZL
zdpl>qSl^{ol4+QpK}5@Fsw{c8B3oGWk|hALm%wB|f1nChd!-zJLke+jH|;udGV+z(
zfR*|7dK3b8&70Q?dR3%F62j81G|k^?me4POThZ8mGkjh$WIdt8Z@LDH8VtYla<=z`
zAbrzM|0C7)lV%*2S_@}yp={GaW6RiMCLi~pArzfyrSYs4Q2Ass!B_Do@v|%b9>CC~
zk_S=$Q@i@U@t@Zn!0KlSox$!mEL^l3FFlqe{&P<N5aNCu(4L7o$gneD4U>b{sKvD(
z-4=K=Uxo(~7`NY4hxP{&zdyqxCJub_ts#)M8V^C#<^%ZMVG&|U{G~&7oP8%5(ZBSd
z%BO(Th0}O>crPRze#<$Rty;`Dk5umyLpa0FY91a;kGh8il8w=Sn&JG6xW*HMjH`mp
z!%*x0{5gXGhg=K(x_^~G5Hfx419A={B0}FrU@DHN3#NK$Q&VS*5`<CV?nX_wXUehV
z8!+wSKa;GEhFno3`sh?1xY@SGYK%00iVeifUS@p1md2un!9x4pB`ePER~%xU&1xf8
z^olUOlv#>ONUf+jj$)RrkJ2d?*-sfXM<(*eC3>l4ed@$g?|!tKJDx=JJ!Av<@bQOx
z$Huxnc;s!PPyx_1O=Xu!N6QC+PFN`LTPttMOa!0Qpmc=q8j-Q&@j>k{^(jQ133zK?
z3dfVACh#pBjEWF+t$LIDxSTbv&f=^9bo6mMqAQQs8SWOo^M{iBWn0s9L(QOll)!0h
zFrRv}kpmB`m9sdET0=oc7#ucVq;Vu!)3-(LV9JJkl%gN{!IBh5H>>}<lEAzt+HrPS
zepjfguvft5Le-22jE=Rv=%5wQc&1pIDj3vp`wE-lGjLUYU@Z^7to%?_9u4N=$>!J1
z=+jXA@{)qaI;3*0a?0i+0{;tfV6u|%V!w8AwN6Ko`6y8*Nis3?wG?~$gKCdAQYT?%
zRBt+4%ADF7yGV}yN6gNzjkx$0^Y^cwK0<?Xw3nhN&=iI4c*e)qHVnxX!BPjkAOamY
zGd>#&AR0yS26|t13!n8p(10ht_{Q>BM!g7W`-c-p?ws1rEmd>NE*tNqjr3?x2@}Vt
zdhkrkT9I0(?Q|HyuXYn&Uy9H><YZGF$8H{p7$|WKZ`DcPOGu9>%K`n>*_}0Z=2{;a
zl1_XPSdk|hXBaVHz-fPJ|B<Vx9(#Sf@bT>rF+R^zLo#SabN669VU)@uPxKDWTd{)k
zKaX_&*BlhsZcly4j$z;?UuO?t$6Ym~2eBE*Q&u+$=e4lJvnCjch8CHE?ov<o+rNIg
z7h8hCD3kCyuKV>*BI2AZG5h(yZWvvUaQ9%LJNa`?`gBfXZ@*Gpv_!>)hDUMr#bsC!
zHXM2l!8SS`CH??y=6A4*`)2eE1;59MEQu^p&^I{^FPVuNM9B!ez0M1eHRjhMtNj1m
zfS4S`5SvSqHJx~bKoAjN=9q$95=+KVfS}^W0nG#D<>j9iXmK^DolzN?yZL^aufky@
z6iixBIq0joP371p|0%P@c)-;MB?=pSQP1x{Q=pF?i)Yq(6M|Ijqdl${EVsls^IH=<
z{EH~?6DUVrP^Hb)T&%m=?t{t}MIsqOdXL**k*SiI9}H&TgZHa+b-P4fXk`^5D{3gm
z9|O*zpuhyFLzA~Ui9Ic6n3<}&#-cE{sA92Lmgf@!sCQlluGr4-@|TY>LzIf&uL>2H
zICRGb+HrR?GeusA8N1zBv;O8UCf_I~G)wmvB9W3^TXX(=zulc+M)4$pory_~iLgBs
z_hojduc}$ZiCFK_-*adYH?vrO$ju_saP7|`gr$6+Yp|!?E}w2iZEq|{8&+q<a8uVr
zIh{J=?2z&Bhx#W6)D)#_<LHRl<H+3*g}Nl*`S{Tkfw*~AgtCPy;hkjmoAijQG%HNi
zr<Sd>(8w+HMCjO<@0Q?vP-hWKkposk52b6v#s0qw3BKo}Peh7j7`0YT6$3>zcStTo
zV(hBlUOKs7kl!M&WYo+NL}ryVDCKneKR31oyb6E51iXS0d;j^K42p7eyJv*ICiMq!
z^2d2B!~b{IGNS3Q8i01W<p?7x*`Uh+2!VkUWwTg-6RZmE%5-`;J+V(f9o&7FcIatT
z5%nXUq%vXp2ibG<E};<o`;sqMjBzQQUz{D>HJ6s=0;Lh(FK&?fmHcKb2HlAgTIk0Y
zl$YU;E90;eT*kYbq*oZwql`!NFXqrLjp=5_!mWnyP)j>HlNsHXrN#oBpafN8LBOst
zM<iDD5U74bd&2WK;yC+84LQig1BVKCG1fs!AsCi(@=Q><G|KWSwUu#TYhGgpdF{I&
zLKb&tL)_3eiy8Tfi`$S0VbMY>jJR6ASgZ(}*Q(;#(?a^SC~F&j%zk$4JYx#WRP|`f
z|80$OAs@F11}FRYjdMEMMk+A69c6Xg6L<Xz9)c@azL9EM7#0&D(snBjfR^*7^?-mK
z(Wt_cgd1rMVQ{TPx#1&KC7==<VxRUybyv@Qbw%DHw|d0^IycWN5o|98`vn`$5^>^P
zG3E?ni!zBSu)G-*teTv^lcB8l;5LfX+j8ZeuDpKYc@w+cNGnJV#U^G%Yut1}|H6dc
zI%a7nly;lD`6C{dQ!n$EN%wpURqUVa$A7PBUvH1*e0orJaDT^PE@}T(oseHsaLAhV
z!QHBbE1LrxyNqX_y9q&A`&WA-XJ>2penaQ5?;<J~QCs7GTs>%#wNNS*H*M=Yq$BOm
zXlw)HJ*t{K2fyqw%G8z2WT$A_q*>Swo0={kgfCnDX>IkVEep$+T-vKI+{mXVSQDp*
zzxZNg&SPEIN;*SyjhA-yX{WEyJ6<0|_zb1KsifMRRob}UINYO-@uMmS%Ph|C`%|uB
z4^ssXS74RN7R|F`4A01mW=0ze!k$5)>q)n$Vz+<$)<~ddkFT2|bqtC*w+h%;ejSa_
z;R{c83O>&t^J+#H-8<EA-yAUT+&S()pk|=XXp`A1j7S+d525{OaYJh&tZ*(+8J=6v
zBWSJ+30Lt$Xdxr;A#ZDH^_iF0;1nl%r7&y8_#9@G*|H%c8g)8<w_r2c`vY68od|sF
zU8?uC>_DVwxpE`Aw9e%rgA_O#iF7lk&(|NZ)gLG7YiqX;t^wrxvsybmr@LwYG2ozH
zU(AB%2jm#{Ly-;or|$dxDLsKmB;mVrj*Hm5?wA>fciAK{OlCmpqh$WNpN~XKE{Cf{
zQ<m1tINTD@bbP4G+ArrZ>{F*mUHKENyh?5$CLwesfe(|KQ0#_Eh9@AEW5`uYpy6!b
z@;LbWXXMDB4sDmB!Tl=+XfeK^lZ0Kls56*N7NF%05Q74=H|r+EuMI%|RdZZVXNvj$
zy5$dSm?0V$!n7nU$3wxjIL_DL6}$TYhtpQhUN>Z-hw0!%2*o(f{%E!I%T_t2o@v3Z
zlE`AgQ8E1~ac1BbLw6@q(`;qp#1b!Jz}`qSud0!iZM|<CXXSJ5v@>CS)6O3VqcBUE
z{|2Ne`MYc}+Fw-%yx{e)ju#f_8#L;ei|gke26L1-Juk19^ZT}moIpo8Gq1K{&T>a;
z5Va_QbatC|Ew|DbEWf6o7@tp`9Y<CB^)M|#>}AiNMR2|+-@x#X=I5;C^JOo+imv|i
z$}Tq=g$OQdyhiZD8*<hDZ9!t~{W^D*RQn<nFkbJadBQ(x&Rdp9Y7{d*r{CumS-0mJ
zHG?<v(N%|SPU6{e2>k$lqzfHn4E_n<#{yb-R^-Tt5q`Zuj-e3CoI34Ph{9IjnXn3Z
zhyB*kyaPl#fHFaJEr9{LUedV<@H>h2?}4!`O`2;29mxH*HJAloKz?<6Ue3>&=$)9q
zbKg<SR<6CQITBANe}wgv2~=XH+$+39C}cfLZ-Sixj#8Fwm(0KhAX){b3ZnhYQ4aco
zE_nmmJp0f9`A_-%`Sn8(HRvyB`#}PyT#qXoF%gnQe#Q=vgFY02-pi`!4Ho9K2S0v_
zu#!Pxzq7F{{^C~9|5R8hH`_O=(PfXi#`ckxM}7C0ly!$zde~v8#zEOmgBXn5mYn23
zLJ#_oXPAsQ=N!30rP%6b-$pPqVVTyeWaa$&w`Jz-sBnkM?!<2jou^(X)W#5(LgzbD
zfeGAmnAnz(PB;s}U-(_v5(e7A*VW$6j79xT)tI`gve|?rT<BW>zr*@(?)kU{4}T@1
zoo4{2`|0Zfx;=mM8gVC=%2x|-Sxb%Z6r%}VnrkJzOz#_S^D+c%B!b`|i{|#DHZe7+
ze!+n8ow|57?Sy)Nge;btFMRVAr&xs!hGS|H4^aQ7I}`DID{3wT(>?sm03TnFC{ndp
z_FvS0?plXMP9m+oPV(3k7_0Efo=ynSXT9Qj#R%}2Xj$0rhj(7E+pjM{jT8?%eBDJe
zNe${y%7Z2Hh)R<13E~J(mXXF9?c~GA@dK@Oxx<#?;iTGtlQG34m=&$>z0s_%pSc(4
zp95HrRSpAm0!y|N0Gi;p)F$dGk>OV*Jg%=@2RoCt5kWnVNzNug1n8Yr#()k9*6IVY
z>TIC!r<g8c94unXc|si5m$AeuzLdEVNHkFpWBKavJe;J}afM^DMxlJuwg!US8vk>W
zLnnf)cG?m?sdybSVKQQTv*+ROAFFT=WMKW*1lf-s^(n&#yQ7_gXLeT4HJ#|XrdEcH
z{|zYh9UYiW5)gJ?fTN|S0(htzea2Z}izc>&sXvNi^iHbS0W<OXHOZZ$L~fDVIaU_7
zwXa@A{9HiQdmm3Irm)A8HmqfTZC1C$JAV!iiv6)y$>N|9>JU)B{I({1hY*QEQS(=x
zS)&;B?80j9N@<ol)voJ0Da}H&i9JcR0_(S%Zqxex%Qx%qj*l0GtG9HCinzo<F-N^<
zJNxsm4=Ug9vE&(I{xS)B3oM@}XJ+Z>rmjEk=}wft*!9?U+G7VwfjYMcKqR0t(+j+R
zvxmjFfO5R%<osz+brmgJyj!`pfR1_9%ph<Ylk5eXgNfM3#m@HtH+8evcN6l+wx~}b
zmeN_<LZ4YUF$~*}Q8eCX!#87-)*(EpRe~SodL4>`;qq@E&J>m4_emtkeYB4xyoca4
z0)Aj*sbmd`dDTo0UFiLP=_FBe7Z?n1xV|{qgMSi(=(jF<ob2trut5Z<30rt6C7dL`
z=6lfe8a$jrIntlXj;4aS2~20>Ra4_fZMr@N{R00mEvgv#kwrjX>%g<5n8TbYA3<*?
z>Ws)Jz>{xS<I2wdCe|vBBhTUU@^Zi~!(@`pBK87nnP$-&+1v-2rJ!l}e#&!konTRH
zSzYT+ZP$1Uf)J$w)mAm=_st0Zmqq0mu{{)3zBEKcTrkQM`-y@?L%<MAvYlPk!(6Gl
zkRwny6}p=nD94>c7}!ExtRWlFR7_QRv^_5-!<+-XZd$dbb5uvH@LT2NcUBph<*ard
zrZG_5WD2&TK?3)E>N<#!U^6Gh6g2=C!<;1WIQDpsgN^1IW0<#}vtZ`|*N7cHHbQ8v
z#BiLjWEZ$r>g*YHS}*&~XDrn}T`qxJTG&-DZi0vKdZaO+w^6Oe6)6H3^<N%<?x{7W
z6)EFBwIQ2-sgDwo8TOA&<r2`Z^qlr_e>P!wni+0dWJL9@#`6aA1^dVX{K&h4>Lm}K
z*<n>5ChR5uk`M|DF`wrU_1;GkaroRrh{4r*Xg%8{eIQB02C2^Yn^_y7{{~Phf}cHP
z;O&;A!@VRnP&^Rf_Z6qYQC}Pm{viQ%J}zSK%+#oTEq`c4NYl{EXNI3zv_;3=(?dv1
zT|n`dqbT%ZZz%1ZosgqkY!p&_6kQ)Dyh}N5&5+xk{kB@QKSh5TNj7q<WrVizTW~~<
z#~t>qU2q=bAe8~#t4Nh0#kPOrv13B-GV499=Y0D?BDwme97&ko^vJo`g~Z+E&f*8g
zAT@z=A2sswD_^zHq0)_5Qn)Fjn_<98mOzX!vI%qNx~-C9Z&)BO(yG<{qX(}GwZOmQ
zUNj?QXF)WvQVq^C&~jl;h#o4)A9&d<<?s%0Q!i&Li*(?IYsdJ7?#A8Dql26`R&t9D
zP2J8&t1N$VG{fdLuYS1S9O~%iycl$%V_flhocmDqlH3jbC56Y3O~6@A1}FTM??2Q`
zRX}$LwS!3C`UT)ArT}#jE}{KYt67c)N{h#z*x=2M$7j;*vtBkzS@K=xi<$3Pgm@R6
zMsP_4&~rzbPxXW_T`jq7XvNH~U|(*dBz8!zCEPQl=r0&@w%QPo5{W??+cQOKnusCB
ze2<$U*gcvkE}&0uJ{DPJ1H&bpqLz|h1X0&T?Gq7!6iDLvr0zxoXoV%2XQ!Zl?dw>C
z3;dTNDB|@lNc)qN#PEVq$cLePRD*QuKZuNi*WD57M8jU?!(YFw)v4^mF+^{U3PrLv
zAz0?(T4cO0?;W*uWn>GfwEl!|>Mi@QYbmDX{{FUKh{uteNoZmGT$kLF;yF^!$ek%X
zrcQFDWQMCq<8t9_V)42>m9|Dq)0JV2f9_|l@W2_#1LgBWuM))l^zR*6rUFN(W`GC8
z|0CW)JNjd_d3jMk-Bl;or@ohcPq1X=;CaM}M@zpG{NH!0`ltYZ!BG7N*{S5q5{R`C
z&b#Hq+^8nEM08SB<VaOd^O~%#iiIwHK$goDmY&3cSarK+S9Y`HW0xWpl=u6;oZAeb
zq-3G<Ww;GS$?G8_S+pa$V5~qM<%|~Cw~7K5vRbF<qC73Z?;K)(!G?yIVTGOw8>auP
zVTD$OiiAv!ujalhA*flZm$RDKYi&JI`iWKd`$@fi4`8%*_fxj2-heJTYlwweBcAYZ
zXx|!xP~h}@)M&6I8Iu(=5GHLXfzRui)*&pO2+siYO#%il1-j%8F~^aV21)Mr;u3S=
zd@Jha9uU{~#-TD?1c$;1Uux;9fH=9wolbZbq3y|jSsf8oI=uJvi-2@!JE>X!@UC#!
z{3FK_%y?@z7%Cr4Jv4dQikP=roJg;#vgiE!3F9AM*9wU3*qkgOAtSt+h=BLD5_T-~
z8R5B95lGDP*j7CBb}>C?BEB)~LtZ){HVy2xZOdyf#naG4l3jiS+i7dQmpQ!ep+mmc
zs=E36`Jh@FLk*!QtP6e{#wu9boktI|D`yL}WG>@`E3fl9iRc?|oI2!Jp8{QDL{u5h
zEB7x(e`NAXX3^;<Zk2~J-z5d8TFONp1rwL4u=imT>rkp?v~rG49lTtA;T+t~2iwE9
zKnExfqZeS^EF0gw?Kj@ZfV_aCo#+F=v8W*9>Gc0La~a4*2>LFu1Q`$0Slosn{FC(~
zFy)7Xr{IOPp(!k_E}&J8+u^J3<A$XVGOv&>&K5jaVUnwQGXFSckcY<N-;gV<WQYF8
zEKg6|CsIbQIs_7~k2&OtsuMD=_<?llKn!3AJtB_<ryXv=6husaKD>wr_)&2=Ywxv?
zL~0j`5lss=nL&tB`n$Y^K5^x62hzCwAnn-|kRp6$jdT%+zk%8t93@x5zC4E$Y~*6(
z*(uLdxl;7>@hzohqS@siL0k04mchcSoehU`ThV*M^Evlj6MGdn+oRb-J-oy>UYt^g
z$YA7}X$fM<v|m2{T3W|{XxN-xm^Gyo`eDsnD@!+y(5K=*&axZJ*$-7=4#*sVuo8Pu
zNB1<CK~F&*u2ciUUQ`|7_0p`S4~N!uh~3;<<_}l8c36zd6%L4V?FGl&J;gMnYn2vh
z{mev%UmNfQ0=6^s4A5893uoA#pq`!Ckwlpo-vYD!LU1Zfavv!Nh7-i4ak_)JJiChL
zW3f-%@&C=Q#GI{V;bOI+ae<^+K|!~uoOoVi?{-lC{nNO@9+X{AGmXguQ(|^bP>{kH
zKK0G#^0wjHH$<20sc5UVvevqM7QaeX%YKQSgCK~;GGx&?Aaj9s-v%Lf1-iL;sfFwk
zOX38BN;e^M@7;a)ftS6%hTv3if>$UT$Q5JLwBbqtF1T=p345MRAZ-i<_@a{2xpMyc
zv1h_@lIYM?YA50Mm_){<MKNa4J=jQ<A!^ZV2#uW3z|k)S(~W;8JWD6=VG+wH#(Q5k
z6*w*Uukj=GAkd#7Z4E9)HfVI9GNLEycj=8NALd|S&V25G4Z?eV&Rc9oBmv}@?fh7N
z_|w7%^gT(XL!}3J4r$hupV-WMv2q2QO2cpnh_IbOaoLN%e;kD(^~dA?+Cns*JhZ<L
z!lpOFSk#0+D?czh0y)d)C9OBn3^flm3fbpE*Jg=dqdT@pS9n_oYeg<+4#lI*Jco&*
zS^D(%`MuL|S$h}EO~a24^k(-+d#(;=czh0$J^zi`FfJT)s4K@DhnS&5ycS%(P~u<q
z3bv|YfH^C>$)uKNFu@tdydBSExv*WBDB1eh+w}0G&+s6(8jf^^%?X<UZEzwk^j-nJ
z@{e?yU<teeyz0Ea{`Vo6wvWcc!6$FU+lshkQqA<ZoTc^cxPH#CjKU9E3*L!)kb`*v
zDl(AF76Hl!V_(S;{zHKHlfF#|`r~m-_?h~)VZqpq(*Fk6oec`yRISKe@x>Dq&$LT$
zfys}(*b?q|@SwBT+?SpA=<TzvqhWvm?%CXp5+u}4q|LPG?b?uR*}Q5?Fvs|Xyufy(
zz)!$`WI<V;gDs&TO)Y_UqLz(ePj|ReHThbT(6~iAfR$cVaH)Sdsh1>Rgn^?t!QuA9
zN4Du<xpMa&Op<#Y-SLOHLHCvXQ@#1a2b@p>mEFZo<S^Ob=OGH%nvSAHgAp(acc~9o
zKpa;Y5h026!Y?8IE9?D`>wxfe2}YRs&y8i4kYM-j^ZT#t%r+e1FqsznYQ_7Kry68p
zlobX>aLYGi-QfB^OGhThO*D3r)&-vI1Wwh4(1;cqvl_g21?)P_u^g6}xzZ>c6!DQ^
zE|;R#_LZ!NTMzd9;||!vuw?p0N_@Z334nfak#zyrRoC}i8G9xY#Rj*@)I1`6iJXI3
zEJ6Nu^&m#|yPEUwM+OMyc}glK+WSj)P>ccm;@JYl4urgc)V%tq1pPeOhePLwEcEo*
z^-rUxu0|zaj#tY2=A;|vuDohP+^Eq7NFl3xu&qbOuAe2px41AFa-|r02tkIuOrXis
z!FD<`7wwz!81E4v!M-R_plk9@EBG7HN_&oLeg8$gZx9?3C_f0f+Z6M&$}ZLCN!?(2
z9<prH!p5%Om2OCK?09^k!m78suAhk23#PJdyTaYbl%3gEKB9%$?+wz19@To~8|%Q;
z8hd;uut^_6X@aw?v?q(5o%t7g=Nvir$SWeE_9Ba7Cpa#o_T&AS6<R*<$sHjSAP*zc
zyKUm(7ntE~By3kP?aYc2!_n)!SMe(WIf<9w(Y#;TpJ@}1Xp7yHNbRF}4xXmed0mSG
zyh|zxm05%ro$PIc2GT8zqXT0m%9-O`C(T`dg_MtlQ-!ZEeLrZ^$Eq~d(ecKUI<oKc
zl1R*DL}-z;I`XKg=RuCv0S&w|5i-M4u8I{Q5UTydLj7#wuhP5}8Js)S2QGv_WV6uY
zAUKf@l%A(K4#)r(7RVgV{&|L%ljDIH9EW;9u8KA(IJpf;kJ|l~1nDF&y?glhATcz1
z6i@bO{KL9jT<Ii&<ah2{w*fzeo!kvnU{ARsDIeHW=Lo()2LN=+$$mJJ{%BncS$^B=
zzUN}{h7D#?a~#z`g=WT`nm>A%xMECRI2Q=#6#&TWC1(=+=K^K0B)7;3g@<5lN{XW3
ztHyw3O)=7ANrq`@{A`a6K*2w|TE2C?C&5UF#BZ3fkrLQNB|x)o3y;v>_~m2Zpvj5b
zNeYy~%>=p+1Zg0tTHu>HOfv<}EVO_MwF+2{77phc5E^mQ6A;^QGH2HL8+qOrf9tTj
z8bA5b`6s}K2EN9}(Q^ApXG~QgW!N98hudE&A)b{45NtwkP?;a<kS5!0!EzF^RvU`V
z0+)a9GVTsKDdnKpX@WzWvz0GZGslhnPx&<76yZbqe5yOPOyyj3?W3<$b_iyC-+bD6
z$B)3Ct|6F$RxcCR5iA8TLHSY!*;0oVbsdzsUbzD2vL6motv-H`<AH^nH37;=W5q;g
z9Gxw$&JfBzJ`;09Q}j`(xO#98uPAOG4tPB@F4qbDpL>e?Bli@x6Sx1<#);R<zyKQ~
z?9B=Krm5C-nh$ZeK#|fm@aU<k$|xvGySKJ_F2SBhodD&AY(b*ONYC|NX|6RNXs3U2
zgbBIwWoq8iFdbNu6=zZ!Mz=92L3MekQNSTmxo@aZs6-2@jM|X=K5sI%mjG~U$f%Ea
z<xdaMK-CE;V1`BrRz?#y%b8xdG=S4b=^pkt7=P>MafAS7pa5km1#xp363TY42$h$j
zCGIN2p@wMLONDU*prVAqlrW=~xO|DN6UJ#SP!d~Uw$Yzup~LVERINc0Gri?4caKl+
zV@^Mq006e_pH!+Nou#5hlgub>I)U>zbPF<V_Cj3X9~-}vEm~RO%l`NXGzL{)aa6*H
zd3f>z+8Gj&Yk;7x-1ZqqSrp!qi4JvE(<lx+qjl>PHFqVc0(oyuY8?Z0c9sBf=xc%A
zX_m^u3xzsiCcUu6OBL7b(Qw-ZgnHcCY{Z4I+LbalyD*Gndlq|>r>^HQRE^}jCKYpw
zWl@-D6HclhH-*hQRq|z;>)Vov{HImCfrc+t$LbSo1Qm$aycfLOUw!Q1Dk{eh_wOkt
zn<~X}59pS%I9xbsRYB0~C>g2mo}*GsyEBJC$Lc@HpbPnbmtf(y{I7ZE820=bQyWmW
zfbKMw@`vvaEQ0<PM`2CghIpB>LbzI25W&pb`uoh)WgkJ}nQ?^|6AD<{cGKVd@F03?
z>bbpmew>k_c$-s%eA0O5hIBGe1pCOY{fMCAl|-rzLhwBmueT|ks*+4OuMjGzsJL5`
z)CE+16)UIx&)}l*Y#0VDnZQw|a(PwwiPpm3UX7)o>j(6to&gJIp2P2oWM?q^5Rgw#
zC?;v>DyE-hGP%wu4aNt>ivq%Hkz~*m!TY~)aUZniQHE{gn9yv>yLv+$hO_!Cl<@M9
z$jt(D4vl&VhUId`Yi!JHLlq|qU#S%xMBg6-Cv(1GQDgavJDXDf4hPY0+g)H^i3jP8
z4SK%x0dER)1#6r%YhqC0m6D!^`C{UezED$#W)Y1n@HY>4o?LIlUS);TI0aSJmtogQ
zG&<QG2P{hOTq3iSRZT8GA##vEU9ZG)!Wf+C2@Zue^txc_oSFl{`1?OD3fl@@Wd7<@
z>wPGpNJfri?qgyX<oRivFI(&;{Gn1Je@j#jha^aIpXjizJ~ppwob-H;>A11r;C*ZG
zV%6eR?a9OIP3l|Vp<{BY{J22Na)t9{WaqiM$76B@zALiW;0p>d!bhhK!pTF<!+ACG
zj=N9c$X$owU0WhydEmOy;zoq13rN=k!MP%cW7y(K0)2qF{VQresj-r}DyH*92~qiQ
z;?<1D<CqjlA;QiPMc9?z9CcsGVge1UF3ldNr0}N%>KGB<AaTGRWjC~#Y7Z>wf({ZJ
z(dkv)`JY&?S40ciyuhRJDS!pMKHVlFu|V}+iAJ<VktVAOx4baQiB;%lqD@aRylaDE
z-drOijaEwLvwln65JK%pMVJho;;E*+h-^ye$&6!MAHkS#?F6M_{LvpJ%n?}_D%5X5
zoED#xPbC%%`J%Limcv89=zX<Vjpt)@Vyy%3E-|O~>v51@q6NK3sMi}5x()ZqL?l^K
zOJ{xhC|6yzIOJ0ijm$^AY+1=4-M;7x`yEIRD?AqbYAInm+)cnX7X;Z9D!;0#a(I~G
zDb$qxmbqN6@Duii{Tcn*Y9Hp~hbjC^?8E6`Fp0(Lud7rKxvw7bf0|jV<2Eb3CHn;n
zTs(BV3RrclWTam5^2ap~Te@slPs-^w)ehA{XP!Q|t9xY}jF)+u?z}YWLIGs#Os%3h
zVxSslQ;1cU0$%kw6(-pA3F&A+={GS|b>|&tF#gvggr_wpT#T=Oogky~fh1Tc`8YVv
z#-=lZu&N5gM=#%J)UkFV;?p!34Xs7H5|UwsAcGkX?*}sGyvEGm+ZsW;pCI|V%N4r%
z;{B2%{TjbMK=5{91Vq6gd8dQP)@^*g^vwC(e_$#u5P-G-dIIj$p#A+LOhh>Fn?S`E
z*YvVI|Mj!>f8|0mL+i<26qf9f2QdiFi)bp_XEMXKDCS@x#k>?QmPv0mI!ldU+pi=_
zDr|dp)NGc!+_A>EkzEOaalF>mh{of=4KIX~H7ttp-^X^%oMT)Q_R>%uxskZPnUBpS
zgy+4avFwEY_Qt0bDU#<DV<{Yy3hJ(!o6`;77PnH8uJuthbPpY4nJJmcNoanSF$-eh
zS=VReL9T5QyxLsNdVTTIt7%ueUZO|5?`EpN8mMcyb+f^?9;E5ck70)uH4c>y@mxuy
z%`q!U+u@YEQ)mQR;@78>1g*kmIaD7JkL~ink@&=6T7|1{0Bc0EW%$>?w=1-CW&fnc
z#a5$F7VpGP<iitPa<)lJyy=@1I+49q{&<{nbzDbYnnFfs8MlP3ss3?dfH&1I%Dmof
zE}99k0g0OsdZ7MQ4YoYRVr996(zQ*%sTR?)Zy{0D{2R8l|EJYw;QH!rej&)}t{XNK
zDfv0qG|lac&dQR742Wr0?H09>1wLrN$>>+`>&K4xAf}BOSIE_uyf<{PHev*vfuYGa
z`=~d$=tS(ripl?g1Jf-8oU}*FHwhHSWHsu$B_}0I12-px+v(jZ)U2Y_<M<au-3b8y
zoPTygFJ5fg{`O-TMtCm<&I={!nH)!Y7x!C^cPEz#P7<}Akti#TEjZ^5s-V7<evroa
zF}`y>a(5sulawl|KN2ewTqqv5{%v<aJRI%m%W;Ix?pXoAGqf|~X%pIml2R{N6MIW@
zN-FB(Y^-~46Ob};&Uod2h~HegV{=cN37rPq_p}I)y%9O&b0Hwn+FUOjgdCi>7tgQe
zys#kZE}q3})Cm%o>w6?H7nKhU0yG9#tAL|Yvoc^t#&6Xq;91`#SDG-bmCOD~>|26X
z|7N(7$dATHjlACc$gWsGL=(j??o9Zi@{^gKPRmV+g}p$ld3L`gn_P2_|F8-#z5qja
zoSJJ5us=@%R$C{pWbJP<**d$>UQUF;62zx^&@5_*WRWVeVtCx+nFu)n%pvv~%0Yo1
zJ`w8QCqK^exZ#ADQvSdOU)L@8(*EKURr=0OUZ0;NJVW!U_G^u&K4EgOQ6plg7rhUS
z)?N7UY4vr~jb>vPeO28TuY&x*Ec{0ZMMc-eS8=y}mCw_6>oegFJ%W6gD@7$ZQ8^~v
zE3LjG?EeKevhde*r`Pe{qHuny@{=6_z(88wgI87i>+Uk>Zb^~F=KCN<TrDjhw&TOA
z&%uK?`XCk8Ok~UCu8#-kADMksmKvFYSeK5`4CAR7s7er5871jC3o)u&n(idx98+&7
ztw7rTBhR&h=qa}CZzdD;k$F3BhO?A583ayVk-IJQ2Nny!W-|<`a5Yf|$PK`p<%PY)
z&WhWgQ!nY=HJ=}nVr^y}xZNpM{`5VGIvuxNoY9I*yi@`o!uaIvVSuXT=l&?c_i^U>
z3c^icU86yu9CMYwHO<G(#?d*{o)fB0H!0MBHV}qm)s}eu%{=3GS^lK`<YXegcj2n1
zgLSb^X(AQNL`?mdY{gScd&j%BEiCDs`*VP^`10GBwgW{buu^jyud9n;#RpKV7=!bU
zfo|GZxO8Mdj)kYq>{q%I?L)>1u^xgw=(dA`bG!~xA5G2<x#1C*YF3z>K=XPh+=XsG
zeXh^F(wa9J#s@I8i%cn)M$xmVQS85|(M4uaJyHg}nud=?W>9l+9s-+xCdQ|OEXdgm
z5Gz<?d1>Y^ht{Q67IsvKoW?#efu8f5bI-q+pd|(%lm|w_qR9l(Wf8w=qf4>>L;`bA
znbI^P1}NC2z`KnqR`d;)MpRA`BZpECQNq*touPgE;6SILqw}r~jqW}qQANMc14+UV
zUkuT@<Dw{(7dKne$U8d`k*6u=oh9PH7X*nlhR-;rjnesv4g<0>_AmW9#S5o2CCQ^&
zKcC~2hP(v?Ue)O@$*+E>^QJ@|y!T-ZA8rFyoJdk6-=(S0Q0T3i58omv*1I!`ZwYn|
z1x6OV>nsl!-9`<jRwJMsZ~#jy#K98AsObYJpd2f^XmX;nB#1^OA$kouWN=!478mo~
ztrR(ay_YS7sqXEtBD>w${(ScNJ$3rQO}od3-`~Boz$VKc-}JSC_Qf)1+P=n=EvuF&
zuz{+zw&y0&!1LdnvH}BfCga{F&lcjNNb$c{Mr?V|5+rgnaMPFg0c*IeNVDkNkIcaE
z;O4*5mdgW0QPN3o8A;HvjfMtaX7WpH0z%=XR^VxcLV$|}$4l1aA~E4^V~>8Rg;{j$
zrwsvbckZy*{9`U8B)*A!k*){tv!*zwk=lQHBd|#b+EP!y<Ub}BbCBsRrOlRDbTKe4
zy&Z=ArG$)5Rsh`7@!JH@9e@qm1Ob~gNx`0657>W2+DITk=)Xz|2BAgP#LeR+5MJyK
zsUxXdO}=Zx`@zRuarIHYp&rk&wiZmQbh|vq%{H3ERQA<&&!k~Q9XcDH7Aoy3U#-gL
zOx;(;U&Eh|Ly(~Is+V#MJ;I&p=}i0?W^i|AWBlVv9X1jEmWBme^@Dt?YO>L!QrFq+
zVRo_M?cX@_aM}Qz;m+BxIaNmC?uP5x?2B|V{GXa=kocAfh-Ibcy57giHcC2ZQSR=+
z9^}+`s0K%8kNL*}F#-AEpMl{YSAJct6F$0!!tQD5JTus--$X3Fg-hoX-U#xukG^3K
z(A_CrQA|ges7I6YiCPl(I8cG~r?M}zYWS*UWlo(#o{MM-UhI5uDZqHNAKSrQ&ue7a
zGg8yokj;M_Fg!pOPe2B#BBuOZ1U=Ab7%Lp{5|6<yFnXtg+$acVE!8~tU@xgI{Wj^E
zSz8ozQKKmzbc_p%K}7QAkCWWhwTSs&Wh+)TQ_C1g_^?}myY92LiN!7LWf%^;WTJPU
z@=O>;WGMv(E<A`fvO+seM2yD+JJ3JAxgwka^d}M-)DGSp2>uLmX)MU^^gPdm9MTAS
zv&@@5-hTsSs!sg)D4Vl6`ZBCxHHhjM!TI-4xS%d?DUl8}xL}Ukj_}3Y-?kv)%_KpY
zMMTBl&?{CiOY*_5SS8A+jWw_GUZZsJq`6cQnvQApBDYfapAbH>hmEjSA6*h_j=I4P
z)(c4beTXc}H>>@P4O8OujXcLasLN+^rpzjyb}|M^X>`c<UVmwiAG(YDxp(V)%EHKm
z6E`lae@_(Jy~*yK_+SzR8Y0}(mCKBU8BHP2@rLB>YYy+U`(#$SQM9f8GAumHvPp<S
zwhA+TO3%bT;5Kinqox~^B0tD}a8J*m9FFrSeVd=D>1a!F?jw$IpM3*_J|W^auh0pZ
z&lJGhq;K=%S*>ze)gGoZl@#EjC;k7Gb=uS`@|)kI)NE!WM0d9d_b61I&fJPDGm4pf
zFXz@CrzaPG#4n2A{!44K7eAVO(S}T*BubOyTn&qrXlRLR?7cVmT^-+1``)awe?`mA
znuWmaBej6S>C5gQOmNFSn;4#oE(jXJ26_l028Abo1uDSo!OkIA{al&9?ZK081)o5W
zz7iU!I1@9Jl|u|pYf!AYiYlJY9Q2PW@B%bh$ub4s1u@D=atl@%<>Rrr)kU|L)&Ds2
zoH1DP{AqBOMKM2R!^SR`$0PY<fWUcIw4judEdGh$s*QA%kp)&B*k;&{?zQiZm6pxC
z1ZRVn9SMD(G3_X}Uhs~}V)Gxj-**w780H=G9^9N}`{XD$yk=m%W(qe>J^);0=;fBu
zQ(ez&e^Hd&T|b%@ot)CXe47h^K*T2=_RBxI0@;q_v?>V!7?GrbS-D2`i0_s!f4$Np
zz69Iyz`9W6M&WmrFAKXn#06@q14xQmBpi#46#(?nih@gA`RoyR@fxE~SH1X!|8aZG
zk=Q>MWQ6@ziUo0H27T{R9x0qA%g(}|Jj_O6fPXm?h#5~`9|D7|T$v>=K+a6W)_uK-
zW3WZE>e|}<YqM@dt*g$TgX8vJR7+%hzqfp`S8+T|bWv>^vFi}_u2e#0UmLpqKEMMp
zz<8rnsO$ETZsU3=9tdQQ$s0v1WcYn%H)`GGFzoP|D{288o(nnRkGpRmZw-LiK%>ML
z#2RGicT9(p4J@Wd$Y}I~+YyzGUE6P5VCZ+--SAyxU_9q)0t>!`<>4U|+xQkZ*;kn~
zCT*_#@=WtfH#xarw;1%vkJ3~HyzJnQu0JRD5(w;HZ98%4OYkZSFjaIw9Dm<$D&U9N
z6F62i+I2UaC@m?hIi~B11LBSn`n!3hPv+b@JMGMh!|zlIjr!hh2*1UU8vJg@wH+5&
z-CA6Jp-fXwFYOZNll}0S;7yhTUZ!hJ#^!hD)i<G%vRHRme52p0qVQmFkDYy7iqGvY
zSpjZFRl`Y_@y%iq#Sy$QzgPOgJ`P#Hk*0dMrYWK(R}$(aS=&B+&eTl*BKuw-xQM06
zVwY<!s~H+{zLDo-0+Icj+J^&zCRTth=D}*+MuhTh7V`xi;0x6AkTc9ln<ZM{KP!tp
zFmoh+@agFe%MzOptlA>moNi0Tck73hu~;GRbc_8SlO`weiVdsR*L!<iJc_P5TuvgE
zZY#9lqj6}cl~fgKb~`t?Cu_s*+VYj=T3`QRcL>IA557Q9Nl0`-4}<}qRP#4M5YXpV
z6$3{2FWUG&$xZ<HyW(-U+Y`MMKldp$L)9Eto4YyTHp9L-M}79ne)$~P#hK$f5oDH|
z+(hD{Zj@NSZDYosqmHlF!uFHDR_FBgUNI>0==&iN0al4UVrvaoiSFNFzhOD0pp^O+
zL@#hTdbef(iAA6oXedH|eIoyOa?=3)imu^|h7+^0;@wXY9d?fvv;@SLTc^i(1e2re
zd7H;j0$C@+n_>5loC5?`2Q=zhfi6_D@ckxt0!@tNPXluSXuiWaH{bK1+*QToTp1&s
zCV(U4ThQwj1V`9ysifo5;78FIKMP{ol6k38{<65ST3MBMdQ16y{+sef>FVWLs#}hf
zEPIim7$@5DJJ|MoR_sXt?11_*B}f`;JqZPhjLV3#4k}i|z$!-e=t2Q}-?jgJST_uA
zPaMz<mGJMFC-+5goa%uuitYSrEGJYUH8Ro&2<c7|q+UD6TW>pXbSwA<te9)$M`AzG
zTYOF$TR5%&=?l$k)7kL9{Xde<f-S1{>*8nVl<qD8DQW3Yx;v#2kdl&S=<Y^oM7q1X
z1_T6Yq(izJX5M-J?>Cq^*M07@_u6Z(-+>-^9`AfwwEvQB5=<)>9wm8|os*3KD2Gte
z=T0vY#|>#XfPiLyIMm%0BXC_Y4laV}!mUA0S{@~0nu1rHQRnsByU%c^kl9dh@EOVw
z&AciLEK2EgwQXAsl<1J@-iI3rlBECrAkuc+sYvKt+jMWOWSrv}tAf8RaIc36X^$mt
zuovXeTgDuU*020Wx+x;-G*`=%Su$Q9N0Pr)&M{-Gfq<{Cg}(LR+kBpESE5p5+;5Qw
zKMn{3Kf=0TluugCMrueC2E7Yr94Y!Gs^j#Xrs@!;X_8db)@LrGhY_n+#UO~pn&k%B
zp}~)5dwf0R1Yx&ZaKBAqXH2HWmrdlWo@(AdQcrteIhmzSxV)NVAKdU`Isa)dMwO&(
z)P8*YrMBM{vfEm!gq~b2;P7b$Rq&`TDksYn(zZ8|RYY~{+=E0NOebjp>ZJutjG8;d
z--6g&>R%+cW#K=k5MB}H3itLJyqlS#ITG}{OFDNE^Ye+mk`_=!fyx0(2kvtvIlprE
z`Z2>+hro&nfx_L2tiHlU>J)B_74AR2AN2bTYMXaDWiNfoUztYw8{5Jx1(o}5-C=$$
zzN23a3w1*Jdl19beZKNjN*(G9k5%6;56ZXfL(KmyU$S5L-^Rk`?guQO2|3Uu6}i$N
z8AX?~wZp7_h#1iC3m=0ITtkGyb=VwZuLu|7=wIvKDIhw`>yuV181D11;3s`lNaoW{
zD<0Qp#ER^#!mL34g->aji|~bV&Yz_~Zac$1NI%J`(V?2V9$!`EF%guqF!}Y1rJ<=r
z&FfcA))njx*5q}WU*Snl!6kU-$Q3ZJnzx&>F(Mo>`R7Oe(07{378u9Hxs|x6Shywv
zoX|8GKH6U~aCCWI#t=*;^yAyq;sR+b@|fE8uJ)R<*<AS#ZhP~7L{4bK9Umcdg?N@I
z2>}v4btiXEc|u}EDXR50J+U~n)Pm!*7q1X>H@XnM%6BHJ=A_H~XFd?z=gV`9&;otg
zVH1~mNVqi1MBGzYnt$e6;lKk&ZE^eTroVoZdOpMQM!WRB$$Un`O)A<gosUc1vQEi0
ztGTRVxi>pnoBl!}3HBR-@<8r04{A3S$}e{~qsPS3ONrr&PfYEQPfN4=vd9ewpp@VG
zcxF0rKL%d}ih}vSJbGl?9H1~rT5v6$Aq1lWEbT*)at@r|ktTQ|HR=$Gq?kgiohaDQ
z<1p<_&}qLZ7Q{!%>J`QII%alvo>kV5$9bH|GQqRv-{k9^d`>M|`91lkbj3iAljc5Q
z)Tk(h!cD1G_aVxDfqPlWSJ=Y`C3xeNF<u7D`s^T$NV-xsIM>EV$_$3i`cNuwCecSu
z&@^~a_|TK&VE62-SK%sCC1CHV%`Lry{ms($<6Q!IzoqLs4ePL(_-<$AX~&jGMwVS?
zSd1Yoq5CD$Dd0@JOPSFRg&{C|3k336T0z)&#IdAqW>usry`v2d`|2a~pPKt}poS*i
z_P#-A&GD8Fc<mInlTWh$lQ6cc5WyxE9RyPI<(l|a6)_;v@mo9KS7ZOwsb$&Gb&uR$
zkmf1&oJ;YS5*{szr2!A6Zh;=Pz~_V>(0li<vc?3B$cxA!ubWl32l?P0IIlP8)|ddX
z&czdaD80I}GjB)t-!2E56YGEQ(AW5-7G6o4=o|2zZBhM0s@GbT6PwveREV@tp|>M`
zld5|zn;zpEHzGC2e-e{of6U?c-<CVP-n=B6X_kzRzHM2Ra7)>KW10m1O?n(QL2Yh=
zOIT?gsD6_BOTZtOTv^>BYSM!x(XnyKg(1fmD(-Q}80sk>e>=ko8#lgLxPX`3slodi
zSemsGf>CS?oEWdTL0V1iRmcVV1=bg^E#ccKSX<5w^r23if0nVp#cz>vDa~*8bZ5rU
zn5&GAR?vwxNYu%b1^X2?sN3VE!#UstW}+MHS{-uNha1uRd1Ju(WF-R#ddqnD4ha?S
zUE}Py0{pPU9>L##r<4%%rvrQFzr1axVMTa>igz!O)Oa|$s7sjji+<fFFf&5j851jM
zWex@#ido~zQtK~sc@OQ7iajC&g(=c++#knx|J(;$?!cby-<my^*-QsfHUsaP@|=Zp
z>IZ?qO}rn?!2PM@dh~xWP$7@RflTUz;V&_)+(7>GwjctKe#-8(RQD#LW($OScb#>T
zyc-Rn7wX={NabN$nE6Y(ly*~V>}G&Q;~3%?Q<R-?UGm7VqP(rBeQUH|vSqRq;kV92
zQte95x>EkcEim8ON%av{cV?;!%Yjrp@yY7mn?5^;cr3@!W)GCIaKhA4YTvZ7@0vm~
z{QLAz)~E1-`<Q;Gk)YCXqVzKq`@M!vdF!ONAJ^wHUBL>E#y7!z5_U#$zHO9YO)^Cd
z)0tO4x&PQj2Rn-EeJs&VS6cAqBgzv0_xPFYPuw-PAN}GwTzoq#)`jvPID+s)-Ih3?
zdU2O#8036!IY2q-Qqw2U15=T2s?FQPftv|?p_xG6jVbi_!aLXojg$@ZEj;6C1iK*V
zTjuGtzl~mHjLuV%1Akr7MVf~|{dMln!Jlp!yM%~moFb;=jU8T=yI!_eBfwAEz;7Fu
z68{tfjG8K;cUnNY99$!1?AH`GQNI94*>$1TL#)942Qy5Cn8=J|Y~{cxqu}R6e5{ue
zd?<0%)*oyLjip(sx2-ZTADkobcHZ%erAj{5szF_bgy6T4=8}oLkD25LDr>DcIw6Jl
zl1;cr<R0mbUA&~9y-tZ>2ku?v4G7aH2cLGj@jTRzu0=!rd(p5^&gd&wrMOK$AvvQ#
zS!gO^z2t%np4N4H|17U_u&uMQ<Q6q1Bi00^?jc`*F&lQ;3A;?8Ttv4sw8Hb%G8MwI
zWuCjFbB?OADTTU@p>-yhZIweDXzNA@9&I?f+jPM%<~=a@Oz@{_mX#3zJ^xrsi?OUk
zXNLY3c{hemy&2|y*hmwMx8d2tM1u)%gWwBgBE+>aTfInFcO~pYLP3R_+6V|1ut0RQ
zMcOP>i2Y#(NIFge+o<^Ez@1g}b0u;1Jn1@p-+Bro42S1Rh?JlRwSYaqwUk$GDlCF8
z80h!)na-0Z*0T2HUxjsy9RL5QVeeN$1uT=1D!w)lRWjE0ptc8@t9=k4tu*aNf({L@
zTeUw5b0TfnqnR=GzMT*DbHwYleVYo2Qk43gWp8b$wHE0GxuBuKWxA0vcx?m-&Qz`T
z%C>W>&l(J~4;k-N5M6pNKfx#O)rKmw*0Fe6V)Iqa@PA4YoKI8rj}>jb{uUvl0=nhX
zx$qAf!%B%*9AJ%Tigl=s0$^q0U3CIbuCT(84Vd<PHAWN1rh!8Q`Q66>(!$x;T_#Qj
z*oymdpt%wGKJ}ZhJ>8F+<w#EhHYEZ<%pQbbw59q8Fd1XGj2Qq4U9CS(K@;xEcHgDg
z^pYe6eY(nV7w+{FSE2?IyY{lzt*az`xqx^%o+~fOX*0nuH%*~I${pT#jfWZQ?Gy9H
zDW}ZJmDavSEh2EXomuZv@6^Ane<Tuh$h{dN&%>-hzIh$rE=_DgnZhAyZ?}Fk<A265
ze7RQ{B={Qa7Z30h)BAOJasWI-`D~5e_v04xNNER@Jk*;5pCaAWQML;F<Lk+r*!GKK
zLiPZ+7dNS8mkP6bhuUbU8iJ&8;KV60z!{(u{5qldqp{|ZD*9UAP2LMcB6yB8=%X0f
z$(o4lTr>qdd(yMNqa^H${ybCuB#(T}qY`bOb$Xtjr`4RV$y$7cs8GFI&gqwjJfZb8
zq9f}octTxV0Yh<V^K@=0pDrogTl+gZ`+m}cPVt_KGlC`V*_*Sqy=j!n`yTcr=$%rr
zAmHVhwFYeXn+1BY6|M$oo0V8kd#+<JBPJ#w<HiB^;;8HU1Rw-t-g5cY#V3i626uLF
zo=BGK(Dzu2&()0bVL?Nm{Z%?QGXChXOc>&+$%0tHNx1*sRP?)Kz%1=v@nC*=pWz3m
zDw-k@t9y{)-@v-=)&>yDQ%C_SxAgb$J3<YEZDo@Xs8hvGe*>G`#u_)rzvTu7%aPeK
zT043li!A{xY0-WQ{!mEg=fV$zso^Tiu2f1*X~z&OLCrS$Cf=qm8dLfdhqnukqo-Zf
z#<zurlJ38fKYl~`EIEB*5As0=7aM(UJ9Dt{O5dzJfTtnoypSFKiA?LSaVr=7Tm9-&
z&aLV)b_*dSL#Nr~W(2a{@Zo-)fGLQcSDHB5SQk1A?IX1x5DE*37=c+EppL*S*FExD
zHFrE}>RX6NOfZa#fA#a}us!;8mzKqyRM|g9x$s9U@_3Lr|Gi&6dl*@Rsrr_r3j#Z_
z*4~J=mPMo(zQ%#7PsK*g9p*{wl<4pF4fqfy>;!N|kTtK7<b5YT-o6PV0RpC;e+P?{
zO>9kfqR1*3@I=`9eJbh|vDwGsD9J!sbsrf$x%HXDZ{A%7YMI|dQKfXrpvrUS?6KT{
zX5Vy9Eiw>#L<4?yS+@B5|9*Q$ZlG5)tTYXth7@fCJmZmgjnzHkF!T;Y;20Dd2h+ie
zJ(7H)9x`ifBk6$>whz)At7+BgOp6Egin2I;tvnmyx(6PV4_0o>3r<JBgQ{FVIMH)_
zVHWG%DUyYi*3plp7O146)>n8g&#)1MxRq7SjQ?BN1Uotfob*UNEChF+W>63XS^hPC
z@AUP?kkP^OkPz+Zxgh`O1*$%iA@!E!Ul$bjGNa=#Pr(Zwc5iDfacQ0IGjYh*13;(N
zjS)pM#$Zk#rW;<YLoS-BhF79z=T={Z+`M7FVhLy&Qkd&doVM2bp2M5{4S6)3#s$VK
z(Y_w?auTBG44gM3p?^d<IFYsBLwXH=sc3xe1QWw->~3gE5WKwgkdcrKuF+ApzA#?p
zzmru{DTrMV)}woJt>B;BFsv3Cq-U{QOs#&A&1KS~YkI-5b8kCEg21-okf`LFC5{N-
zGByIPJ3JMk2YF+@%1FHR@d<uqynVGQv*)k*pJ0&xi-D*3IlLGn?;HRQw1D%O=#H3U
zaFm<{A?^>d2u~Mp7i8OaOo_s~K3qWI)$UpIhoL{Rqq4aV@|>K&2T3XPzB*Oab)k%L
zMcWAnRq|;?sigf%&eYzqLk2ZUvgdamy-sqq9`voT>qdLbS+jB7ZW=ZZm5}b`i{JgO
zC90N;(xOOCQT;u!FHo<$x~&9s1G#(Cbn^lOn}<o9PC>n3^0&h^j~$uw*49El5{fAi
zU<>_8pQSq+9H2FEvC7u;x({&`cAtAViwM{L=$AivP#B@0zR1O0TQ6-toQQ7xyDp=8
zd~x=z?XgbsLL6Q5KW}sG`Aaf3PrD(5^J>wKtg)F~4J9+&5iq$RNdTM)Bg26+bXt^>
zKq@w^Y<b>-CBJbRoXj&;p>K+ow!I&m%&IbS)3&O{^Hb@IwPp9L&}pew!vAz`eS0UQ
zI|XlS_Bl~N?uOnOXYhOp?jbnd^)J{$gg+I&Ya;1V&=fPSl+X0!(cQSimtxlQUvO@#
z%YZ-J{<4M$$Oit00;%bKgji|EZe$@=GSPQckh&k|EC3l&QQr^7@r~>%YatlZr4X?h
z&Z#F=W)9b%AFR}59nq(`k)fUl7TK6Zh8usZ8JR!Y^l<_9Su!o0J1a{8TM`3@Fh)bO
zlZLV&zO$km;9rjs(@^}{B8EkYYU<=O3f-H#`pIqLKB~Olx2^jeS4cr(DH^@|K?_A2
zyit%Iq2yEC2h$9i!gU%-Sjo4gq~Z4p&QJ6YF<0fqZKdgsV!=f_hqs3=teCSLTo+4a
zV5JWwe^ws9<+hwZO&q|zD|+D5r<V+6^*@_q45SELKsIqLEJd^)PZAQ2hnEYCFDJu6
zUJPKKUgt>Y+=$4|9zkWtv{m5hn7bl%9~@i9!zARq#*l!?f=Y%hhd@s(DPCMw>_EWu
z9Mv-LOaM_n6wHvFVb7btY>1hw<rIVTeHF)W<f?77-5N`Q4fJ9yZb)o%2SVlD|4G54
zSg-TmA#cFZ5jLdosH6X03sDwW86hcPJdWKm%d7R@Yr%*DbX(5RR72l&xMmW*a1)0N
ziT$eg8l!XA#`)cWY9VH@OdCj!*NR)H*jL5mrLmohUpV*I@Ly5uS#zVf>lH8B@L~Hn
zZj$v)r;YBy3NiU^H)n%G3CdU9U6*a~GNKPZ=kL?D5raj@*rz5{BG9d?SoE}(s{1b?
zV@r#KbSWU-1n~}4E<y7lRbx+cM&tA^aXi~eq6!L`K6FPLtW~Wecj7Gc*+km-O|wi4
zz@&e(zWe!ejbSB3R37SSMdb8(LtL*y7^=$7tT_0&GBt|-=h$TmujP#HnbHL3aFfIL
z$7*hgrYRc>$oozBT<-hzm67LT01T1Q3{@0ia3kVGyfwVXIu#osx0f_<zmuK8x+Ejv
z3r}xV-rE;F*ryds7^NBa`p*L-6FZFyPhFuL+Sk4Cm?q_1pEFQ42Ck?;Rl)B7oGTTR
zGMev*#uwvc*(p4`R%{|2`rFkoy>ZAGg1P-WJ9u>iN#X#y@esxD$B?|-q0ioVid>0X
zq>_Xn9KfMW7T1FXRzGLxd*b0EerN=wOLE*%3U%NIv77zsX9bwB5j-K834d6TmDvW1
z-K285<v!!sw<jb0ORTrD!2ZfkEL{H*?OVOr9ID+`=J*NCJ!jH*_Jp6!r`tK@li|wg
z{MqoKHv$8h(o0TcE7FYUsW8uj1NAL?RaH(gqjm$@IEQt<-`7f?(%kQmeo=t~cb_cu
zGvwG%^*?=FwV7P+A4oF5=-15qel+QqIQ?0ZeM9Rq9n??j_)S7=mFmwYd};g7A-iBB
zJpoxtMwrdcgm;s9e!pbc$~-1*i^k1sGPGnH1ec(GCgQY>ypfr2C=lkvmD~&Mzq$~Z
zXSKvN8|>qQgjaGa-G(<hZfm7H)cT90P;R4E$vz$2W)kvU`D!WN^u`9POdF4YO-k8}
zIB_}uyR_F=O(oe9Bvf;(Wdy-!Q;ePCK8~uaR~<})+vpxhD66<TvrCp0pT}uo+dB;f
zzuY#$4nv%^|D<W63<9`5aE|)>skYx6s#D7U^-1^@glOCVN(_+_766|EumZJCL?AB8
zO0&#Ry2XW&8@@GXZZBf2h3McaP$_%eGG@8%0-O1AnxsxTn{*vQYGL<ku6ls}#Wy`x
z$YdJB#y8Qtzs6deaz8on#XA2oS26B<?Y#ku+t3)Jj6jR8Rqx~yvb5HXf^nG5du}>~
zb3O3!7yMceqrY?Lo~3JW>Ou!g19gO-shXMmsS!nxF`g-^>qtanXE3C2OpB88=UHEc
z!>Gz61AS=rL6<#VNR3E5Z2iJF8l)VWFr(nS&zJ`~Oma=rM)O789}>b=bvc-b`^`0)
zw6@)^p6()gQ2!j9m+m}S%U;S=f|d+{V|?cj)tuaj0I%8KH$9;v$6vMO>sUg114Zn*
zY2h!(niU(=R?_TPITonvTo#Fq{6;$V$55qZ_J_<4mizKC`95;dhZgHaKhx@LcDM(`
z5(vb-bFgjj<OWOvEwBL8j|vDNMcNywlH_&D7C^n^1%3e?r&!@gu)Ttdj&Sb7t_-76
z4&?~21`lOdi=`z+%72EB&EbWpKCd3O@~#HmW3K%wUOlLq1szOQ-pZnqp@SJ}7}?NX
zp<~6d5iTL{5Cz%RaW`N>v#%&*TOOHfEfaUaoJ^}VSsnb)u#Gr|g%Wsh5NWTd{xwCm
zBXjt5yta{h#phQiw%w{+1P7(dh0k1;9wvp)MSflQ_w=J|mxEETlwfKYIwnpZX=2)R
zEv!3Uw$l%~Q2g5S=KU5-U8OA2X2;2juQjtyOh?H8Z7w=M=+hF5dUM(5TH+7I&*yKW
z42QAjXr4RT`>&5oq2Bs0;14+_SW){4A1+A-HUiM~nPdT>rXLg_VKJ`1k7Y$y`!?dO
zG`_pw^fYiHW-I?)`1ae3%IdJ$rsgh?nMr~5u%F{QjxzPFsI+Geue~=}!c!nf?J@a{
zUXyUKycWY0yyRyt%TH(ChKJ5-%6h2`_>#wz!gWOjNJJ*^7Wgm~&-wN&1@6Cr4fY!^
zV@j^-;*M^?4OU9>|4o+niuJp;G)S}P>Io^8xH45Z1y_etR!e6vLktDAGzPE0S(TRJ
zWyz&<?5J2AlCiBU3LtYj#s-8rtVWKRV73w1xX<z3s^k*`1!cWang{RK#)hj!Ps|{z
z88MP^GYw@3ar(B|imGgZOB=^3Wc|RJkt$UUbLNSOYL_M#X<S%p<AD`rMyWbmmsZga
z&qHo=De9O(!VHRYFfsLh3(Vo3hHZ-;C+uu@u8tDM80w0z@sXS6N9Uj9io-hHu#xTq
zpU?Pu#z;?3FCC^cyLuUJj2}z(OI($JFZ9lGtEO$iAIWFoil^>>NEz5ksELr+)e30O
zMFbch0}k~?lbA2o!v2d3>P;sl!!%Qr{d>;fn~#6MRUAw?v&?tHmBFZ6bXM{75Z>>}
z)H5#XX|V&=CH}XV8G%Fd7<iyBA0`ccxZKrFX3u@qo+_bB#W(^8|NX5x^&dn)!WCK-
zM~i?MNU0OlOC<fpCW#dA`n?IF4fXG$rHsSF{{hB#QQG9ti$v<kR1tSVv$AW7Hc~8U
zHfi57>^xPZ(-<&;W?%;%cIa*}g2psx3$4gF`V<$m0$S2+d~NWxiSFya%2B^g%gIrZ
zNMBUb_3#*c8NJmFoGH5%NttQ(6b)1=$q1=6e9d+}t_i((b#&FUd5byWy~*j0|8c1;
z9H474{*>pnx}M$Ep91e=nTFa3(GGea*3+2Cj2*~OA^16Dg1&;70nMkMq@s-*x(N3R
zI^(rh*)RsZ;EEUAJz<@24GxuWMjE9@YNqFWZ%(p}P^%0Fj2vE9B5^<bU4Lh2kcE5)
zdr)1(*oXriTMH-}jh;s|ZvJ`g^=g9YYanxRh^!coC5MiiC5Ic&Tqb+e(vIR~%Xsl_
za-${{_Y31V%*EIR0}#zE5T-)`5Plxb`9*;OhqUOH5b{Y3+E7WC1mS|NcqGFj@K*f!
z{5=VS_G1P<H4rj=f;)zP%AUM?`nThY2br&nQqQjmF;z_S4{6`6S<ma*lRi7|l8FSw
z&xvukx-sN*SM>h&vr6Q*`|(jV$b4c*$4DoQ6U^;soLJ^~WMxg|(ao`91a%L0Kc>-n
zR6tKGr}+_RylOra*7}Vv6+cyE?tqEWC2fxF`blSIsLIpEhPi6AizNf*f{zuarQv=-
zLd5@aWblE|O;{aTb0?X;juQ_v*bF3I+fjVn(7e}bUE(p`i7j{HD0OjiOOx3h!(n(?
zO7f5qYs&{;pNw38hqEn5@tbL*ZE)XOlF0(+JP@Rx9t?06{q#^we88iCx9GQuDnOZc
zJBbN&`I=yVTQCSN8m#o|o9F7L4xuH^q*c}O%BR5rc|I|c3f|uV4Qo>IzDtQu_g==O
z6tRG6v)cGq+vz|&ySMN*R;~=kyPRwhd=Lcu4~T(-UqOV#$pVd3j}m*+8lG_r&q}%}
z0sfn>Zw__)J=DYfDQjW_I*{G?Swz;bcLSZ2*e;5R>{Z^Jh}dli%G%uZ3j&y5t?}6=
zIEepxUAIf-vgtix$;P8s7xya5V74K&e0Hv(9_bHzhq-^uF%-#15&vSOXUz|B3hF`I
zmPt{eU|GnRQCbo1a%c3tVxsTyLOzMW4aC%FZ4&G=i~Ug~gV!?C?ILHFCn3|B-`Tx7
zT(^4(w$-etlwCp{hVj4RqtiHAWlj2Kc<ioDXuEReK=)9QTjtJ=R8~Qb;w114&p!D9
zhwrN8#0%I>F$k5D>erhh{Cx2|etAl<8u56IMx8Lqj!TP_FWF05N9Pz-$XHF=)UM;>
za;@L!c(%B__^s*$=Maqgt0&@<&-G=2f#Nc`qNU2`pYk_+zj^7o8)rzd97@%>@`1u~
zp6!X3)t6TANR6DX^#@&@Pd{jAk7QRPI@T!`62SY8<|rE;8{Q!(R2kjpG3HwgsAD%0
zhOiWRz<#*gs%ol0mq23eJsR`nFXhZviXk8Tq1o(5-ZJKE?dcpX(Hbiw7{7%i+{5j+
zOB#N9;=tbP@UoK4;e(_D5Wt`w?wWs5XSS*c`Y0jxYZG*U<~5C6g4J}<Z*{;yO2~=H
z5ayUSRH_^u*xK}txN~vQq|=Lg4@i7)i27DtID%#0+|P`gascRkUyOA=Ftfm-Ytd1~
zpHiOZ>EVAe{Bv-xzr9y2JW_r_8a7h%cl+jty2a<Jo*8joDb&w)A}@cz=Ps+KC&giw
z!?nf!llFK4Ziwc$6hA}x9VtR42|#E@)VS=i%o-vThF{rEkjGaPcJbaRDALAM#&{=x
zfBHE#<m6MWy_fFX9q{z9{`5S!4;!fc)y_|s3R?J@3A=nj?q<2#t^Jv2(DTdZHz-~9
z>51(jXT0S#a<3fEKL=bd<hKxoedLX|i07RJNTiMEFT*ulJD3^CB)yyFk499~d&WE!
zrLBst{9eoS-XLXBHXqPG;YOKaoBdcHF)~a4e?otVgZ}nD`HVy2H^EP+&sJNYU8{47
zQLWO{VZMueFIH*5)ytz9`3DF`ruVD+va+;IwB~)!>)rWACwiof$D1QOJGJP*`^Rmj
zyj0+oD;-kcUz17<$rc;~-YFpUmuvH@kWUMm4V}Cpvx$L)H8h{e2ZVSH#y38Fj$QNU
z>Ok9FrR68f2$vQ4cGioi>v1;{a&g*c-QwIrW!k;>cKC%x)qdF3kT2K6vi7Uroyt~b
z;#m~6O_Y=R$%bEtuE-sa?Lrw5)Hll|K?yh_J_Bp6Ati)N?9w<t({pFPL)DZ4zQ3WP
znA0JCy}?DSLo2cQaCUnmE|K(Lx{^6|AZm6JTO%Y$3Y+>>aeb=rbaL>mc{>0c0I87d
zQ4oi&x<zQ*gD=8PI7UNqMWxJ8aX=D5ns_}I%@~ReP-_rvfOyp9&yY!ym-!wH*DGSz
zy}-J#Na}6V9krMVCjvwxP6-CoQj}WZNb1~}U1(?DM4pOEB0MWidHATeXhUGH>^Y~#
zc)-~c1C~-7FABVku9vH20!>uEJf3!(^&8P<q5`SFdE;<im`MUl;EAml{8aCO-fGm6
zT+Qj3aySdQ0%n+=#7YRZ3w>`QkVt2P$>DI*=WQ9&(A}e(4<$luPj3*vg+K~b9Qb?`
zdaG3iwd@LwEGp+Qo4oeR8%>J-=;e#c$h$iGSZM}Odnb^?Tu!{0@~R&wS7qnRwOYlx
zKh5CM{CHPK|J;HjI4!W_(qgIQGS!bnwyr(S_#W0m+1TJd(=WmNgs292{-Da1^&LFQ
z2^%(S48$*+_v7zQyh8q?TRL>I4}1Pr!VPQ6J}bxN>%1;#mBW5i=;F7J@P0NNlKUqy
zF!^xxL64qGSsDhudJ+sj2M>>V5$w@>#q6>QMF~g^5SUHBM#{l^gD84e*#@Oa9}N7x
z)ENr;;mL0<9A^!p;;-kjw6sedjfZ8N_;FS)`SZ!XqUTw_JtKsghMprdr|@Vf&A8!h
zv+jxzBHlMJ24<^^?HkyGqogQi;CF?BjMoW!^jYUsJUzNu2W&D&#JXMbC@hrp9*e{G
zv%|c?-*rR>Io2av3*AqJ4dYl2Q*M%B{omQu*8S#x-{VK;B{aFU+XUP+2r&CF*=L$^
z<?PeUH*)zU+<Y4tP%FYTNVIk(n}r{h`;JcEVe5q^0>2Qc`LBYuE&qOm!A$CVW}S3+
zE4VYhV0<dGwP0K_qNU=>AYA?mMHMU?(8`8kGw2VHG&%-zsb^t6t>*gY*9j{@+I{ux
z09!ZZnX_|D7(LSyMHbXE<XtS9=<u5~hxhN%-`W}lyRE=p)|LBT=!fnYn1SR`gb_M9
zfDlwMptEyQmVL#u>m}T}uy47E#^h=nYB)kIc4f16taOKdVRdujT6EIh#C9*iUWfqb
z`cL?K$=URQ=NXw0*_q~~3_#QFqPAM(7=av7K?L{!y()6`n^908R=PYwq@}#W#pO8H
z$kb>wn_NLZvGK*-+91A5errYk!V)SDu1-gOTgl?IhloDIVu6!`&ba!5!_m3OhW<fP
z<J7M>H*9iUMir%Rn=dam#b2a9x_QgrS;W?jhe_L@Cb$jn$~*=Jz`5buxKIwG7Ic{W
zjt4OdUnISG#NfuvAAy*jwz>$k)6a;m7FVW!t4rR&d?!Rg<@T@@{hsB;|DJ2)a2vWz
zwfLPm9&Gv_WSl{I9)F22%);U|zEhqvzCprb>dkjVR}5T-i4~Ls5w4j;pbDL>FooZu
ztQhH2veo`(Ch)k*wPwtqZD*>2ui<4~$baW+FS~ZXOGn!{?PYJkrUGso(2{h|(-g``
zjm-OK>Iv}67zD|!$|W9BB)B(w8oXXDhlV4VDQ%%Ar)Yl&o`>M;cfzULb<J=_dLmM7
zn|L_T%Y;G)giF}OzK(2JTZBH(Q5~Yq-~3VMPx5mzSB)KxlM46FJ0eY`a-ZO}f|l7&
z?l%V?y|=q_n0WBzpcwWS7`z)*b}kGVpnOuxg2mAZXH&KceQY4DJP^rSw=Ox#yv@%k
zOd*D9sE${<0RMFRkJ#0RsO>Dsq4M(qZ;}XMYFM1FL44--Nc%n4)&7Q864%Q`A%_V<
z70Hj>K3~}UTYoeZOyqJUD6H7?NNvFMV3+C7hc&TQ;10z~Gtsi-fd3sppBGc4w&$7@
zd|Gb=${njfR90?U-NdMfp+(ridkmu1{rhV2ZMIHTF`S8rVxq5EjfV+rHB-#LL@Nic
zw2Du4zLLYVZuq(@{$T@T=!(Fo@>%zjlPJ)JW;Cj205U6BkgIABp$_`*Ve;Sr+6a=b
z0tMIoc=g9Hc7WP9z|W;Z{ft0-Qd-8rTL0zm`US|5SUKM>&VyW~S)MqEORYucH`uG^
zDrL&4I+c|RK-J<V@?xbwlrR-{rNZ_o4X!8^?+8g&(a=txsTYZC$=!}6Dq$+?$3y}#
zR3;+<tI&-+{)Dj{ruXQz1OHuw%MD(}ZokEh&|PNqe|7b2#p`V8ref0W=_4EI^JU~s
zKc8V89ZZ6W4(=@!H{;lMP%SF3afp*{=JTl&u(ztT3pC!M;WP*M-?j~qp209{oby-%
zrVn1TfbwmTrns4Uo28%B5wMoiO<*MFnnLU*0NXe+?n1ma6>y|*eU3GT8R-2ph1uZU
zD>qUI-@p+z*ohD8rt_z><r!~4m*u?#GXcjy0Nmxaokb9U-r~F--`}I7+~Izpx!R8i
z7<mCdn$sF!c}&x8hqf$0GcDrkqM<P)jZF`t^fW1QCa-E$aI3k^G<C`u%@{aONG!!T
zlF5xLLf1_1<obFTZxv}rNN?r$t7*(!gEDPkk%#9~G8QzJRbAIMZO^zRWq5;BX29T|
z{&%zrgkAot0AdODQ)npX&@c}S(*l+;bze{l(<dm@MyWiS7X3)JY_tBV1y2Cwgt$-f
z>wgb-pu;8$I%O=dO8Hb1>VSa;A@DE$w)>!N3B$w{%6f37@p*SrHUUFe6F&KD<M&1-
zG;rb7#C+W3%}l(UF#!<^D$ZLM`2H&Ouoo=p_{<N_>(xE!8u+|0xySbX=QLo&hZsv(
z!u@Jq5qfEqr-~w&iYS`60|jReAZR8_j$`Axfkx>wa69)YKD&J0OIW66e(p%`;lcf?
zZ~RU5a9Q)K=lMqUCYX)wTU>tazau5CE<*Tfdl^~=N}reTm|8IgfaaR6gKjI!Yl}#3
z4YbfAf`0<NG>zJDi)exq+b9TN0?L8jVq_*xT!b%2^Hp!{Cb!*3tv%FLY12Euq!iB$
z7OT26`yZ%oxmH2^@YAObf3L<$jb*!Jp>VtXOs+G+rd+UeVqXPs?91gZP|ZRJo5=J*
z1#Ul^16Fe;uNE>Nz=xKwez!8mUHj*U^dQj-Q8qnU>#@U-$Q5^S1&SQ=KO-{xua?OW
z5}T6qnCYGoZCAFuHH#7RFsuXc-V?u7Bi5M~R9L9wVXYRMN}&IfJMrA&ZNmOiVh*k7
zNick%eW$FKFvn94ZDYBK0Ta)ppKtqe%+oc&M;8eDWNK3ju+^!6H?T+3HR=oKeroFv
z8t6Hv(?CM2*!e_5>Xg;=Yyr*7+1>6LtRzpB_(cY5GqjTCm(cjj=!nHqPc3sr)EP%Q
zorM^qH_B<q{{Gq;?+np&nir3`f$1)1q-7fwliM7-Qb^dGx!vhP^+MWNd823eMNivR
zh+4iCcWnONW5P>flKA~bh!^VHTvf%uG;fWJq>9^sIw^{2bs!rkU>`Da?jkZ(WC1A~
zg;4_2aGHArCHxxkxfmk3ftLl-g%McPmehf*z*i}Q^OP7#b5NEPnabl=X_f`X7!BIM
z75N=3{lDVBHbf>!-|u?+=Z`A0aw}EY@P@?h|4ezGZ%MV|aHGUF+We%9l`Q2(g?~p>
ze*-3J7&txoSo(2iTSjtps)DD5yhzzaC5rp|-WJQanVcm0&>sO?Jv7ex=`Y`l1JGb;
zv3fr=RmRyr)Exdc9yh3;<{F~t{keDxr`DdbB3GJvUtaZu@ekJ+?M>GnxMr9qjZ~7=
zDiG~0aFcq1t>!G6LT4U%PG06ryD4BOInEXZe>;I@9fUw<!?C`G)CH^Ql;SggnB@Z$
z{Ag#)MoiE%%lIB*!H*85b9rLCiij>K=^9t~C-Lst4ufPA1A)U&m;70HBsWY7g?Nw`
z_SH|5-0LJxlsAm4Wr-!p6B+hhq~{_Mou+{C@r|CW77Os&K;e$LsvaFWS`w}qnBv0U
zc`;0Jm}}D!fFn$dJ=u=DG#;@CcF;iXjaT7v==JOz{P~xse_Xul`m<ACKYQS+k{D?b
zLCl=ooBLlAa!UwSlJlZOKZY_4=$!-2ABefouz5>uCYbZ+R5=zF*h>xHHq}09plv<J
zXCqf2A3Q*h#mDWCM)`LFMBQE;kAHhon+I3mv8A<TeQgB%eLEG4$|qK_1?A1#cUxGo
zBWVQvsfB<2rJqcV^IAGSXyC3pPiUU3KgZ!wlFrXMR08I0Sw7iR5M;xOHMVU^%%BxB
z0pv!YjAfH?Wd>Xc91SvDVgEGlW{6&yC3|$-?ef8P4QyHN{XLB~nsmp7;GWiZ{AGlx
zL0>QYITZMH_qU_w@}%eK3qCX`Pm6V_RNll6|B$H-Dn}r!h)}J`38bPc3xTw6(-mC8
z4%L#3@Y8k5g6pfPWLQKS+h%Ii!s;k)e?2b+b`5lNB)}RrrTZ8ro|Xvx;X}1Q4lfBv
zIDrZqo?`0gl66+WAp#`@z+y1z1RXkUqZS%Sax7ZGzW6>q<NmMbdK_0%7+{4V9|#g8
zEy_fs<u?{FuZUgzQSoFfa41<TsP?WZGO4ymMR8^^tvhtb@t3G-O#Z2*35BF#rE<YF
zfiZYj9U;pQ`4M4jtShq3l=lPoo^?s41s&nW>B^NPc@#`Mj@x{iT28RmYw3-|l=`XA
zmvx>J+|_*1(pOt~0f}4l$K+T858#Z*FmP)vy~$S!3-!3>4V5pq3YHE(F);gB8GBdJ
zXARyE75<C*1$(2++vD?i)zi3e4p}ZhGm1z>+3L%a1&s4@P@}KxgDG^I@O7XaEiKVw
zP`DUDGoCM{oF0SZj;{zbHnp^PS}>uao}l<&|1R)*`7XE{A@RuP8?}2!UtgnB=W28e
zD`x~vc)jbE`{oK#UZ{C>wjUZ4u22yyC@@a!y>b@%3BTgK4!YjvyB%(F@mDgDB${t+
z;RF-%ddqt-Jm+re+&tNT>uYkz)-8ORnV7U?kj|GAS1zD9YItja;wbYTREvDWr*8Uk
zsJD$3R)JX#<fk*;u3*O9gKaxpQO*Dk{jaboUXi!AY!iX78%tbP<;cc(l+Y`Ker48B
zbsO&u_PIm8uWts|8qFiaN)v`PKTM6;<6nte<P1vSyLm_l&fl*W&ZS<q9)_@xtFM3k
zJh13v5sQj?w<Yii%B+5j6dj!s6Sz5vDcv91VP1ARM*k^@JtjrW6nob&?Pr{nP4YR#
zzzI#@9ORSZ`>J!C@kOU;=}LsaUqllrwpAJ~^BJY2Xre_ed9Y#G%sYc|H4kWuZnocv
zagzDA?W3+{W@dy8%BOPC^FN*+U;kkQqpDYp?B|N43c~&&dhfb1yno6U4-(Azfe&2t
z>AFr!m!agCw!2(Oc3)()?Cdon+ejDosNX}&&nM}>gzT&Af7nj;MkiHS;2<GkWDlkk
zy=S!Clk_rt!$th`jg+Mdm{$s!mC-j>zxUibi#_q~+Ppn_toOZN?*v{iF~641+Y`Vq
z!b}ES$Ki}GOw;Erk|II={&EczTohd|;}stW5i$x@p6si@6PCr8NDd3_pAs(@31G?+
znnt~v9;lBfUKFSS6;S+`e=K9XnExU3_4}Y?DXiW)okhf^_H|O7N4YRbXW7=_>UTkD
zwsGJ2YaIi*NHaN}Y_*ZyXRM~;$|>}gX(%~$ri{Ha{kVpw{k7WaHb@J^QY0n$-&!(!
zGFtkYIG}J;5iMht^s(=`VOAD|r+v^C1)j7{yzX4Z@6!xNv$w1IiyMyGTb*cKE3gUv
zPTKUB2udcdqmKU}x)wtZmJDT^N(oEE<@!DaM)ZxxfgEJQbPlqrUA8f)t>#uyp}xB(
z0s$0m`A<d6(tcTaS=g|&jI;4GlWzXx7i=FEvV<)N@I9PulPW9_u#D2tEPyD4Oya%F
zO8kv+g58}~l~0qi;$qED?Eh+T7xt_ZbR@L<1bMW8iwZCyp-xX{LwoAc%|kOywBua{
zVXb=%?+-fW9}n;DHK@{?H;b+N6b4m=jO$wd%It0Fe6M|fFR@8JDw>W|E1vF-99w+W
z0}JrOS_&k;dw&mCm<)Z>s?O)~H$k-=cE3W)An0#4K)}gd$60sG8-TlR2$jYU_pfmx
zA`nAva&M=BnJz=}txh<KfWj<B1(y|<YwFBl?l&2W&f_*Bk;5K3&1lug>j2P*X6CrE
zgJ4{)ZmZEFyDkT7Y%Nc%t0tLRS9|cCPYv%oj$(Gwb#iXf+0{#KYLTsO$?<=Wrdi)Q
zly+@~`qATJsvK_(jvjOjG@kT!O~dgW$^^D4-RIFTmqSo-;6z`Q%kq!JP$?ABWD)@V
z<RBz<6>t)_6RQ99Y$M>lyeDzMC2w4_ke-fkldmGND!aEY75uL0jZtT;X$1{;H+XYq
z<Px<9?1lylq0UmBAqezl^<s-qekDwE-F3E}=KONQ(dH?k$0)V6S2{SjZ0^U(6wj}0
z^p@@3tC}x_SMS1*_5Q0f*KYN@clTvwucFPW*`KchvTBgyGxlx?DyE(qP>uRV=Bs(X
zmW{y7@6N%x7_P4Z-@!9_9YeG260=@!+byoFuq8120#+|EuQp*w|7#BR3v<_HJAcTZ
z+w8-2sz_I0$2(aX*mxC!3$S1>ij^RYAAftFQg&_B=(WWx{DO|yF<7N&Bu!AJYC@Md
zp<LT_2f>Olb1ETcm_M8$m#-*oqHE7fqo}CGv<H>`jJw{D;CASKYV_bVPKK{=z@#D(
zE2gBQG-A9?xcF%+BXKPtP|x3cJOQeHX|A(8q{{|O><**Gj=B|e&l1`AJq9|ujfUuQ
zGT{^NH-3`c5eE5QJ(%`V7@4VVKXdujy8KOT=RDuQFy%_1wQ4honF$JVwhLJ<d&_$h
zW(B^d;QQC=KBEYnYj>t>)UAZWI|GsHoU^sGvzLL9!6#kST0WVof+-mdmVMj?cJJ^`
z>TRzs@ie4RX4^{)vC7Wh^E62l%x<2BVt{n1-}>fWe~n{R`H{R1<(J&7Z&O1Na2DVI
zEYa=U#u|WEIm;is8vVsZMcMO`)P9K%j7FE+6C%+5|F=259iJi@=zh6fx!@HAB)`~m
z;{^JB%&deV`}a}MrvDiuNc0YLqy#t_XqB?KS?MatldUT*Q91k<8JnqR4#=>koMzP;
z@gHfWbvd#?Q}8pn{i$!ZnZ|N)H{ghh;ywSr<32a2vYDQY%}BukG9A=D$uhBvIDMJ!
zwE-c{^)_ikjVo%OuW*U}R%ymou9BLMc(jyF5NRsHwUbuIV<#GH80VcpcT!_R{p4P)
z@LJ=+Gfs}Z>cTT~I2TG?sHBY!8fgzZ6rapiVZg>7?)<WS{t49O0rSxHHb12RJH{u9
zM&$o4ebF>F?EJ>d{J+J%ciP<-zl<(JU<lNMYf3SlpCYZwc$^4KmHQ<i!XsAoi{Zuk
z)npZy$>;Shy|`jh7aZQ<;mh5u>5l4sSF<ze@*1x@Ze-YQ<4hGcA$7N6*6)*ZJec8~
zz@yrgP@qIF1-TF~jkd!$p7E((D<OOX%){pjR{^UiFt)UD&v2M}Mb0|6<NFW+MWD4~
zBP`D(fCW^D3ikrvWr|+J+$9M3<LPtczop7niB1-+VblLyq)_R2PGr}^^HVv+P$)kA
zk@+V}Ux=pLd+eNjy{0U)UPDf$h45Rc$l3DGn59i*;8EiX$yZ4)rissI4IbcHeb!HU
zyQ)JL<DaG~x^S9y*figL8G6a8K!tUN@%4n1<%mf;l!J^|&TsGTthIzC`g_K*y{mV<
z@%A01IK4SnD2MLC(EDQI_IeT`u!t72>mTx#th;D__2C3Pa?PiOkL*rSXFqT+3!%I_
zF@6?%I+k*O0G}?sC*!j<5+2J2Y%#r4dKt{cLN(1OtdH_%6-KWUtiUt@?zDS`G%f}5
z*C?}v5pe>r!STW-3CtIS0x(&^rthdC;NX_FxTO;wu8u;kQ}7DsOl4jC`!eGj2Q5c_
zT&{n!cdcp_<xSZ}NMLX^-(|=EjCaxZ_5!S=j?!yMt_U!KI=!#<Fg9mqJ9V#e!)>nT
zD)ji7f$a8qqN)r;K4+-`L^Dht72N>V@tAjA6D;T~qyM4Sid$)1IB=m=Sozkd&W9ll
zGYM%vPQh156=T^f0op(GbJz=rro<YaGX?4dA7W%@%!k#<T*)1&isqqJFa6hz8|FB|
zCzJ?RuaQ!pG{diQ511CpAlfGZ#nyaFDI0xK2((|G3{&^B{zj%Yc0W*SA6;#(7h2ys
zB=QCo8W&WOIR;1&h?8dG2=fjumAS#c)2WLwy@zIJJVUO^NaTy8coLk%VKqcb-OJhQ
z4$u{+2G{K22h+dO#pOK8ILY3{Ze+zSsoa8{jIH~UfvLUBw>{_mxKPv_(V&c+)EjC)
zVkXp!c%!B?sgCy&4`cohSFkgVhks66Qe(*uid!<u|CkMG)nd8Z9=V8pXV-t=V)!cj
zGd<S92^^wl1AWc~&cvx-%glt0lHl;jvQw9rPxOb~3~N?T{*in)@Ec_mJEcRbshVK8
z2a#%-|2Z#?lFl-R6oq5vH<tV*U>jH3`np}o9{VF{qYyT7FL7u_#;PG?WmE`Ihag<A
z|Mx|1`{{^EfbitT+Ustzw)`R6Zcjc?<zznUvxBr9LUU`TR3W!;2AI~3KK)mW5e}U>
z;@Z3#n$xMEs0QXXD=H>S#^q3@$&jR4a!^N{0g)`n%JnMC1i){uhwdf0y<)<>z1BuU
zu0dAH-Fn!plUI_Cs*z>#B7;^M6X}LX(Rr+l7a&)m7Uwg!f(8Qy(^nzD>7>>k8I~q{
znbtSvihem_gM5J?uat+pINXEM+fOg>nnz}u1R?SBop!6gW5V!qe3y9Kqh1%sAP8dW
z^-_{@4z~R7>xvmvLY5FJ*ejq2un_dR$$9twEu`susq=f`a#n?WIj22KHmP+gb%B1l
z8d*-Pe8#=JtEV^P`gCn-#RRZF#^|H;hMt#Wz5PRdC8eN>(puhD8mr8-(*60zVHL!X
z&e);1lcn7zauI#8bA5S{=~-g)d+WGY5Shj%QQQ<9*5jI*22V`vN|xX-eedI-(;g9&
z-I8AbyaP#pgmlI!08;6Q02jBSVdw3#{e_5$>$8+sEhRvVfSaI1<bz3y<*x44uX{W?
z!EipowB;7loY>@J_*Y6v?nA+BZro7NF_u0>t`@sP0iWr|fM2dekhF;pZ%DRQK|jf=
zx3Nzj2zB4(xV8G=hD8s#N^sE9xeJXPmxV3&AnqHce7~;73!tWRCr<r|!_nzEO-5^5
z3z7s$vrl>uNTY!;VJj^7llXNvU5wXJ^PLcoqeT_QwxVLN${si?ye(o@7<Q7YtgjdG
z<M3t414|OI#U6vW%swROoz@TXm&2OXH1NxJPuZU^sf4u5o}m4pJ4eNI+q8%M{01fY
z+q;~e9=7-*>sgOc-GC>`G+=KTM~X%l#{P_H1S5I6puEDDuYQS{(9)9FldR949C}#v
z{F2!P{J0q8Ija!eqi)eQY{w!g8SbaV!9>5`3iHF%SP^=bI0^2wT#HXf!lnEDiPHaO
zPH+46zphsg?gZdBhNqF!6vf`8)%Y<jwxT-9F`)JmTLIW~Kr>D-ECKv%*;(teJsekR
zCZ&LB+X$~YF^Rq5q57Hngaw3=DfDgtQ|iw_mv^rs>k4exmt>kZ5&M?s$tea2#9LE7
zcMvJY=u`GgbK{}Zr2V#a%U-Ee3>00HG|<rbc$6j;{Z@}h(kSzj<gvE-0@1bF{(sGl
z3C<nZxAbIj)@COA@pR_O+0)M7snUgsd)Urs<!Bc9p={FoHeX9V`vUyk70gT~{bin|
z7PZ$M79;b*IpkydppW3>9nYKKJqRy^qJG=u!okV<3-L`qRqRoyv1yl8TAOIWLA|hI
zQ>c8vO~%xeui(D}{CmoK>U-u`+S+Hweu=m{=`VqTpyhXJ&u)5XV^HX!v(vb*M4O&h
z4Z$b$#c%ZaP>z`I;Kp0aj@5(__J=$JXjs_ps!5F2-ECgl(;f8(F2E})+~l~!HZOvJ
z$nuq`l6^EuGzGiLecp&2NuI|uPVh*>FCF+qGgRHqp<#kBB6I>a3OWzt@}qkLXPQqr
zP4DrE!lmg?y3SoU>8kzUtO$8ymy_kD%kXcym^#2u8MaC6&SxP3PU?XzuRkKcmYcma
z&MjR&5Y(Hn{gBaS4qZ6$k^0kdBoI52ne-??MPTVz(kR&;FFoG#N6mIbb7WAv2D8|d
z(KG7CbX%G`q3Y8)*I=TfX;cG-+o$i3*RiX&ON%Qn4U+@EF(LW0?}6{xrOp(a>(Xer
z48GKT_H~7rh>UT$!rVxb_0|?ToTCer&d?IL)jed{2XeI@QZD)29=uRUsNoTqE;qQr
zW%xODq+qt)jiu+xsAcC$;;syf{F-shHTw(c-0Q(LJ#~n<*SRE04wZC$j|2=yaEBRo
zldFZsj4^q*?gI*Czm_|K(Z6sM5`hy45iq&A5Rxc4ap#TVi$;2XsQ3D|8B)2sd@)3o
z_jW95x*47P&iX2vy1xp<i*=fc!gKUgxednM$<7{aczRD8B^(BndTZI79RkR`xKOol
zcwd>Gng(3l<eI2km46t9cws3398p6*fJh@oP$<6Hj0vjI{adnnU}Np`eDlK#U%^5=
zo0@peSzZ%KXZr7)vud67+hYrZ3Mzg~-Na#G0TH=JMD?s7Ls)^ic;h;L$MKoOivH7}
z9fv)9PpJFra-O~3clI^Dy+Reo%9!G~zKKaB1C-Cx0%S0<d-|9g0Xig`EJg3vh}J_o
z{+o$0I!Bder(Rd<ci!#$5@)8UhY&aPRNOlApAIKBX9x}^&snIK9X<X=r~W>)At++l
z!T7*h;ytfgpo+0iUr10+Z$^@lnN~uulv=vesuwNz1_rCOdQ>wpQ|RR?EPc(S_K}%b
zsK8Y5o`jJNl<up^3uWGwbbYd6lHN_L2w&&vdw<>clE6H1A0_Ix8OB*-gDX6+<zC<M
zVSd9~R~R*K3`}y-gVztx3Yt%T;)2{$F#)E&Crw0O@T39daG$GRf|l0&xP*Y1bCT3Z
zNgu0P;PH^aLh6CFjy9aK><fp=hKXSQ3&9ab!-bmqT?~0;eas`N1!_PG)uq>K&WI@-
zVV^3#R#%dFl7Uy6DHQ^eeKT07jsD#*`EQzjJDa?GO{9KB8c&{w$Jcg05M0-Cr>r*s
zL6E8l{U??{ZP_#ws3!2bN+lxuPa^+%NJfqmPr+Zw{hwA#6(A?vai4%OAk2Def&Gj9
z#4KKC)WFB9(yN|hOgCCj-94}h($Ld~Mo>O-C5DknrwsG|2GFQtAGgj~F~gQqmC0f}
z{TY!(U#<se{NI3YX`ED!4y2m0BV_h;_OR<|!vf|1kEXATit7E^K6HmPNGjcp<Paj#
zh;(;IhcwJk0@B^3h;*kQUDDFsjdaJ%yz~1%>-{$0X03DXbML*c+Da=J-f(EjhKnXL
zy$8q$mTvQ)Knm%HtyoG|5I86gH<hIj_gg`bTA673+CRLLoO9HhJNbB)N?KwnrE|Tk
z7QD)i;Y9!sbr+rVc@qKjF&>anIblt+EMoC%Rg~EDfJDNL0De1;|48bR&xZVH0H!m?
zqzMf*b;A{X$AQmbZ74A$>RR%xGPvC-qu-$1&#pt6&hv{}zJsmqIc~`y>Ph>p-eLVT
zkB})Eva|P?t#iZ~T|NO<;YfqobxL(WCCafgCrrZLsPiAC1Fgv2x^UWnu27$n=1>>H
zV#TYj{_2o!Dbq*R(Kh=T$u^&e81etRSL|Pq>~bBL&#5ww8f=!=o~Q8*#4E2%ZFOFf
z+DoTidAaW2-i3BtkGx)7N8B$JsDSzkb7p?HW%u{4_6GD^smHauv*Bj=soH}{<&!Mg
z)2H8qUWr_=!wDkT$^KjLpIJH+;0?ED?t4`#iqfMD9MgQ9KMa0I8$@AoUq|P|Biz(Y
zK2gW-TgDHCc;mshT{({RW*e59T@=&-ob)gM1pjF8p#ro3{g57J^LXpLSoveYHi8{-
zlyQ1yA1VXPyk9@3L+@?tp3%yYfjZuh?+q?-rPOW}4Q#247rsmv5q|TNllJwW)a^Sj
ze~%=h;MZT2U~D@G<v=sXmZsN)0tjXMHqF^Z8am-d<ebg8_4B9y>Zg~MN-pndze#-#
z3H5P0akUl84GYuGxptXek4nZcAUnQN&cq6P7A>*s?I+Us#wZ^p@0AaBfi-&^agM@k
zit*AjT=6pbzlnsb-#5lvG{I#;)rd36N!EPN2UNqo_r~m43@I^kiGF#T?k<D<6*}$+
z=myVh4$rhj`I_OZ3vykiHoEZsaK`J`e@(fsnuk&W*(y|x`3UK>yEyms46A6N^pTWk
zoP}D(kE;oD#%(&mzqjx6v;VO@6C!u5bA?f%Pb*?ci``EOirWz*#>=MfG0PSbU)88e
z`}3dw<o7X>xwK!r%MjGPXbA`ONFj#J&WqH-SO6)o75)5&MdTL^4Vsq!Qs>;NcHEma
z$@!FG1y$DzU@qB~jX~tsxa36t<Pu~qR{1xqYdzWeO|D=hF$(y6V>_zwtU55Lah(?D
z-}?IVqMgU>_YXtx8u}2z#E8GS;@$`@r%?DKy!iCTdkt$G+8zwr=IkSTumVC#`a1YX
z6k}v5hH8Ao_Em`?Swh66+8=(BN{QIiz}4sc;KK1YYRk_qgU7yC0Wn->F8PCNc>`8p
z(A9HrLxuHuq71=zmYX;|ckSzlXCQk|mv5797HNYNqPHazpH6!l7w~+c)By#pB`!DP
zhe!8n!v?Jn(-g4`wjRitdclgY=9Q5}D0wTY8SjZpBP{!lf=`>y@=W>hFO!=;T&m|9
zq26Ley{9hrTL16{`eFINgrzMQ9;@=#Ruqut`F*XqV!#CXZGRC$ej_j;L40T0>Mp;D
z1zCTQI=_L$dkHuyAf`w0;je<R7py)En@E5YX&AV@T{PL6aoU*y1M>U26<b>6D}#JP
zWu|bfAJ8B<#YH2Fvdd;aTIKERsOZ!=zQndY5~x$mZ;KK9Sao51BX#7nP}bz`tohlE
zSCxBRAl4wZl;thn9^O}6iP3RxS}gmVJbg5A&<yO}xAdi<>&b0%-QLp&e+)_0p0;2#
zY(uNcrtpK7zCS2`(U-)|b~vwwufSQI<;+r&GoqP3z2C>+O|b%^@W${D9jm@BpYiTT
zZFMep*MC!n|97AT_`qn@D-HaR3*Au%RwD%|$WP^FXSxA?aI-TlVYO<?`GIWN6PRp+
zT$$xO{T^OIUBw40oiUA!LR!v2(!RfInPT0eE^u?dCpD;X|NEZa$)Ve5({Li6?!}P<
zcL#=;cw@J^_1y(1hF<?Ribc*mf)MyGw3O~;2!;@hxi{J%10v*L4)7OT=^(l=H3%nV
zcq-B;kAqfK`_pM#ZSgK*)!j|1u8E9ZSBOw?3SwIE^EqAA6-%R^JNw}@YqNJ&G<IF|
zXR7#-@IaAQz<bx+>%hcPN5t7u%H}X<f^(v_*uCPUvx#FS|JTH=QPPu+#dZF}sQkr*
z-I@u#Q>{G*aWD31(8|4>by6jUTzqMhV^9ZYiBbFg!H*b?4;GW#uSdryLf;@o=d@fc
zSna|Ra;Y|ENP^VAsvV2FZ}vb@cg~rlETd1}>g?mZI6x2wQ!r|H=86PmO+`NZbN=7O
z47f1zaXn4?OR$-I*j<(1uE*W3H_v(&XDQE6uigJY^U|Bx2v6vWDhDEe<9?G-19B&Y
zm*;O%8mN%)?@SyFFQlfH;+Y6w=CDJFaJ6n9(uXNWfvzx@e+rrLxDV9u5<nPDxGfGl
zNW?U_;M$Z%^@q(X%}nw2g{acVhdAUWk$=z{qdWUoqu=*;UIqC>ud8nSy|Lh-8-<pN
zh#eS~_pIp(-l#l~a^314sHw+j=SU;-Z)?xV*VE^~8-^a`hk=(a1-NF<&f0)<iT%Tp
zLv{Z|<c)}VPg0a>G7>3pz%+N6&hX=$F(x_JD6=T1wEkEQscuSADt{phzK@tLk2{V+
zh_h;o)MXG~m$p&9WC?ei)2IEFjVd?EoqbIu_=%)pwd4J{!FfK_QOrB*yhiPlBL1|z
zzYII2d#`g-AB~z>@%*S}2B0noZY@p175Kos=Ta~V3K<jlGk14Ol_)&oinzU@EjYu8
z(;{#=6eR8aZ7ytN1C*G-ZIhrwa!_0*Orfq*#x}O=^=4D2sx>5$VB;{E>B;}Rz+$Z5
zg1g*z6V!jFun8J?G##pi=^)W_Aoom#W1=c5Jr1Enr61$Qf2|>Ox=(rc@wck{HzKP2
z43^T~7BTYaWg%5tA49S2n*GUd7C#y-yw5wKmb8iQBjW<>ZYkV}x>yB*>E&e7kTZix
zt=`S&h&xSw5hMW&iA;i8zP}Hu<-(%p*ERX&$)*B6MU#&akpMQBAdC=ZlCDRd;-4J4
zEm5!SWgBtFHq88;VQFa2)9?Ml#lY3cspj?tPgX!4k&g92*-}g`%lJo7@Lhdz%9GNd
z92>I96R)%hWx%R!j9sd73uoHXSp5cC0Zr$)-dT>DvNxDxc)_KAH0LSrP)Vf56W33o
zkNas-%O$IVpetDbMqp7^dxth@mF$J=b;wzNw6IdSiF~{d|JCYAwg(E;*&93jmp7lX
zU+MB}dN%YQ|IBYUsn4jE@JrPiFmlm(cfP4Ahm9b-I<8jZcb?!_oiAPWcGbtnXvv;O
z|5k*Vl3ZISUUmH2{|EOUe@&nEFj560WA6G^JF(x>(vlU+ZrV>(moyv?U8-xobNoln
zgp{a})ptWVaB{OB|K-(=e><%|X*q50edmiDYw~lDQts<?&Ae>2$cX*8aK&DSC3iqN
zt&SgJ1C1C>{Y{VVx)y>^b&0dP2s0x&loT}f;KWdnwU0(R6ym144ImVD`IQxRS*RJ*
zZkpV()i|XE^YyQ)E3a@%`s((;qKavJc}vMQ?-&FM^jBkxAm^>e1X!n(k>xKx+RxNl
zbItqlW<Tz>RPu^TIru#KpVJ3_k(`M#=Ij?xpgpEuxoeFgr#QON+@3pF<KWP`Fj&29
zowLk~!uQKUO>c18hKYW^8M+7kH27-w0kL8!Op0wW;sAB!!3dq|cn{=eU8i>MpmzLh
zdp?8AL9O{Ida$$eXYwq3&I-ETp0ZRR^E0?*hW(yZ7Fn2K90O3VK-5dAU;eBJmRMv5
z!v$<a!kr4r(j>2avgcAsgB{({RYFG`cPFf&NnQD*NHv$r+L!L4ZKRtY*}kro;bQiQ
z2g1nyylKoE!A8}h?eb1Z_A)m+HTu59QvG_agZQ#dkEglw_#X5Tzt)ognHESxJzr{c
zaLe}v^lTZQ=d)inyuj1)L$*3)m6XMYsQ6v&sqVgE^8w6=R6=9}qCN(y{80S#jSpo(
zsdBe{mVGX2`5*DrPAcs`;?M<?&chfV8$$2)82Q-61-^-fg`NT7X7~0er?QxzdE3P0
zH9w_U=is|*wne)yxGWuvyW$v6cuiGLo0oRwy1CG_+m=KHt{a9!^8_;+Be~uWGTqX5
z<<Mr9?1zaGlV7A_e5z?+B>o%X^EQhdO>6k?Rle<$em0z)w4iZ0FA2C_@!;^iig)R2
zK@>=@VCHQ_tW4y0OEV$y!lT({(?p@6v}!}w@Eezvl$Rp3Z%gaHXxF;Ewml%w=1Zu%
z*Chw*!4jX)LcrJ<(n1+osFOs{ern*A6iYN-aqocb@Jzc4?TBe24=im2=i(za%FuzH
zyo8ifRII@ks&%vDnCoZ40r;;_#D0`Fr3|BEZQf_-^;_#5frg;_4HV&&fwz#>EQ9N+
zCPD}0cG@q;|8x-d$nuAP2ToD6ArlS4EnQC34TO{%j};}75JGMmz;_yoA#Jh~l8Th$
zQB7;*xVpDboKI<CSB+QnXXHeyW{guo+|rzxKi|Lb`THxHJpqiB!g<+;=D0G-sJSa*
zLUmyC{M@98>a-t~KeZ5p%G(&zU}JpLZ2*viR(Ntd7#nt5t=&FnE(~@M@*OC?z&Wjv
zcCu(3?6IOnMf@sz`-WW01dbgfOZGe#l0azE$V{oOCjzf*^g7MUeV8c3!>e(>OE~B!
z$G2#QdrSzY5~JgZZTFD{Y?lr)jVfF*L)^)C^)PZdp;w^wbeHG5=AF~Qi2kin=dGxC
zp38lC#rt6_xJ6wj6VubkDO54DhLZuM)T-dZ|HtR9t4n$sSZ@5AqYll&^E)kHGoHL=
zu>fguHedXPB5R4t5b39y%&KLl=LvD<{yYokI&y&M$MXKXDOj>AddV5HZng+@lJg!h
z{uXc{C`vVm%FFPHNloKccWYsRjX8e%<~JY9vM>NlYd};VPu=+hyX$1<Q*$bk{r=LC
zIj@U?ym;W3#h-bnst+-|wl+$*c59&cV8e1wwoyo+4Oi<}C`SH3?$cA+GfzsMu_>c;
zLkcRk+x!(p01EB-T~dNC(+_g>`%Jo8eq6s%D|%v7E+I~h?1a>srGd~5ql76E&&*RD
z%?iTu9XzwfT*Etg4RLt#9fSrHeFdhO$c`sIg}K1~#xgt;Ea~&(NO9Q4D{t(Q4GSru
z<Z=Fnno1>hK4yy(pCZc<gp<Bo<1&%v+5~h4cyOSI050o9W2o^|XT}LA9Xn<dMUv@U
zZ0hx$FNc>0zW&K(A^>~d*dR6!uVihM3s`RMYvJa=pF6*L_*~j=+ylSIKnCIFWh~!2
zwjBRRH*uc>U*)~SM2Hcko9d{6z_VDH!5;{fP|`m%E2o*=k#vT)tVw`Ln-^Ed<Pee>
zEl_ayL*aI$rOQ`FMqGMYz33)J`E8t)bm1b;m6xs4C`-^^va+hyTRZc)*0*CB=5|Zx
z|LpLq1?oqtS~vm;L6!`MriuRgGi~-~ONK}<i%SA*&eE~Q0<SYapB)nT`FRqB_#29v
z`TV4j`P-6Dyoy%0ff0Q*Lv%GU7nn)udi3199q2ma2_D`Dac~~(4~ukOkI*6Lt5B~z
zHTLfyU2=*-fJF`gBhz(0nQyhqQ`ykAL|!~G-$(1;SbpoYHge#)1*P8B>%pORpGD+l
zcFI4zpw}-6d{bIMjEjYm{^nM6qUlTzIc>x?$#&@oGRM}_Ju~lD-aguK_u<xg8@wYL
zI?`#*88GntZgKp-Nm}j@(Fh;&GBIh{Ql-I@zmJU?g<>~ez+FFPnwYisMJwLxw$w@|
z-yj;Z!_7@|NScG+-)$jQkI--|93q4U^zx1O$sc<CMIo)QZTVFzZZbqTywSY&=+M<{
z^kSiZ77IZFR6lLium~&Q+g>9CI^I2{fUtq#sc(KuwJ?h{kG_6tTrl?cvg*82-Fs`b
zca#c7H>a-}TY&O3zq=>&DinLnB=CDiZPtJ&PZlpes+YNUwK6EJy{s*?QkGXFH6|E%
zn^$pRDgfV~-sc<#?|H)n?7%LhXmHc}2ZlaeF*++HeZ8|nKw7Vh`I+=|fKagO`0nm*
zd%a+L`nNy(`eU}?r776GvxYbNq?+VCQpZpa>{a?jxf@(@(sbMi{@NQ~$lQ#my_<j>
z&$JSm<!XhJo7>8)xj*Og%W6FR&9V<4X*`Ijk0eDU?zL{edD}-DS_>DaO-#&Ziw5nv
zk;il*)N}m!d%O~PmN<)0csQbG_8_JO5V0JeQc^3@{}hy+m*rV;gkyAFlfUc}j^T$G
z{$~nPB!SbJ>ahanbv;gKOq}O+-?6(8j0?FWaEVh+D0p7yF!jY=2RU+nVNA%W$#kU1
z39vpVoGBr%)}mA)cBq!kR4c8-H;q(ye12to26%Mz%wgMBmTz0rd{c_JxQ4*4;h~{c
zGz-iA#ZtsJ(!m2vk`UUyHbUj^hfaB=HLM%?XW#7#Y$vQIiCNmgWM4ubnoBpv-Qd|e
z_;1v;IN_Rz5q@ruBkTq(x6KCj)at8)=-`9}nV+6kN1Av0$=^|56<o=0lqPEshI^kI
zVB!wm)P!IFBv@Sr8xPqHZfl3%<MA!)I@|K8#WL&8yK|mhYKMkL?*kkvI<a7i_v;g6
zaBH<ATUq$jCTN1sP^NW)C$**mRTF=qz~5o$#c|IX>>8$ew${C=&eH$aBX)8XSGby?
z@~;v?Z+rZN?@Zkm2LBVva{892>mO19D~g@qn6{5b`WYEA?;qWS%InJJwIqgT7xiA1
z&&Xj|S(YEo9^yTER&<z;!810h23+DIYEkwtI9AM0lWsAc)e7^DKY#V0x3?V|ILW{l
z7!|#*SQY-)0{q+abOK>C%bhGueL7jE$itqW%KONR|0b8<xbw)|#Eu(Nhzr(=s?X!-
zklx+8eVTncVceTGr7;(U9-1zRCx$f-9?_KCg3;wtdouuqqA7z?KRtvj*(U;hVE)l+
zZ?>;MJK*SnZwT>054_+?X-~f~_GPEGHDw_%`17R*!OjF6B@ABtN^Y*xU0<sHR<2KP
zFDXbyvkt@vgq_n_WZ2XlEk7_sGbYXto_+aZs3t-F)i<2Az5a#tk6Ly<6R)4_E`qxp
z5Zj)<P6AzmC1(%I6&7#S<8<oNn2v}DV^5n4a49MwSj%VED%?b3b8jr$f&nN2AY47@
z%-=t)U~G1Jet(VB)r9C5Sl;eo0e7kP#1&q<R#4M+IP7Ux!miRJ;6lEI3S*{tY;N!i
z*A0tfJWWGF%g@~ryeVPNcybZ$n-hM4*)&R$Dn%&j1=WW^eP0Q&Hp4xp*|AcFALZ^(
z5#>8Qzd1AEfnc%y_i<l&8DQsBk)|DJpE|ckrDIY3u8;5MHf+_CM!%sh&D^YSAq?Q=
zAN=0d-H+Ier~Z4w`tUWFD%)D38Sz9{Jeu~w4i69E`EXCGM>@|_tac>dWKOy+=u>v;
z?}jqK&Qc4_NAR(GnQkN>_JH9I5571VePCrX-HhZb<_LH0$9+s9hRJV&+5G*<znyo!
zT><$bF-q7w=|yQbJe9P(x87UrcaVpTJ2W)65W^UemWk?KHIR7vNS_&RXv?%&vS&wh
zb<m%*#2Ezc&FA1fpJ8B9Mn;tbJJ<kU{t}{QwRL?uK!9@piCst0(0l>HcneDQL{vgr
zgvc9_DCRH5hD7p*%k>{Znz@M(df!tho9R_9JBs2?sVr=4p8eAsIWU;=tHWE$9UV+1
zwjUcePsx4q0y++VLXEsiw#f@c$2RvYkWxaR$C5M2-*tr3?G!Kc8*9DZIKMB-)uPHX
zMUxn<emZeCeC&DjcaZ_#H6L9z%fu%pgrYQ*9)l|0?@cwS-gmVYB>)Ov;njUOzp989
zwDeQ;_c-yFT$+VloAyC@1W8cn@p}8~rzeyHTq-N)XZox5rKVVhx74cZPcY+$)Ae;#
zSzpdt&Z2_G_}M@Z^Zf`DEQIM+ZUxRygEDQp=L5f9P3cIxj@R4iHA@`++w<9OTp-8r
z+%`JG-?-?roy(cPMHs@f1{D)Jt^ma)JRi}8%zD3szn-UzKvKb$6xtp+S#JKoR%sND
zdX!LDW%B!LCxwX<zs|2z-v1;tD7DUt|GxC_unY<6Aru<z{r%D8LTzoRb3uii@3JU@
zURm4vuG_N)ia>PsSz0B7F7G0ZO9F5*J!;>js%7j#4nK@ADtX`)5R3_f=h9GY3@T3*
znm4UyczKM(b1bFBjkR3!6l8S<8?-aqPW*~rPBes>6Zm8IZU30xD`1h$a9I9zVt5ZE
zDdNq;w>1kmN29(HI-BAOE^>diiMeTbeFZm#j)-5Dce?AM<56j0JpdX@1Rhn5^iu4Z
zJ!z>ep%M&>>+7)9h|AyLmh6XY@*HA^IF(<&;5?&Ha5Xd3&}8UqFx2XzSps;E%Q|u?
z$u`-jWyj{2q_>Tm-ej<PO7vpoi@B=^efj*mf3`0M>9nUJ?!-8k&oW*gh{AI3v?s6-
z64cII2Qnh#UoM#ZB<-c<T8-3z4q8D!L1>)%e<I+`-P{WJF6Gv4pE7nGAq3Ii()a?%
z>?kTrm3rut4Laul1bvC$!hjU=)`6K<pm)+v$?%FyfFe;Li*qfU-3NU(Wd5DGuhw3O
z8A{%XQLn;h<JVVb7#NFC@IxldH`k;=vRa?ebIkWtWav2s9IEe5u~v0hzbbifK{N<<
z-hLz?*Mg8;ZBaHL$<roH_DYB(@A-9#ayJ-P_GDGt2Eck};bWj*eEd)Xs)jF+TzXG6
zvYuEp9~?AZ>+=F7x>d=PYVcL@^LD&u$jUGVo*nbKF5FOq46!IY-Wu8iv&zsX>js-&
zz%}su7T_}`tmt_f*SJ~YHVX?cA|xBnTLtU(+SlS<cP*IUqr7?3zJATe64C6Rkv6FG
zl@y$y4the=ezkEVLTfsm4F;@@5uQ$?4mmHs{ao*@eFP-NE&(K<6qL3L7hI7EmLU#h
zaV+xdP>il3Kw3vRd>Ok8*dc;+Ujjx5H#oPjC>DJWK$6_W9PahOa;-P#FNKXIpLmjA
za<kL-90ZG5tv8UtC&K++W<IXB_Xze^M=qPwVc_5rPvj1`+aO(m|AgZ7Wj8{b?^ttf
z>8FtP#5!rAXz-T}gSK}J1c$n-Hj~8k9?<oMoEg@KX3606ANyoQDL!>7aP%)KTe_o8
zGv>}I03N}|HCxYtJ;YE&GfWroFc*18CwPE~d<FpsOgilCrF~v>Wdp-TW;Lp*-EHqg
zoFx(v!cV8xrwj8<?I`0H2QdYlF7?MjxG01=$U8+~oGRpmOO&F7QqNqD<}!~Wy&6;6
zo}9W{y`+SSW5Mz<u`bRw%+cJ&P&e*&%AW357HS+!ZD|wUw`~WFbqj2j=A`iE3|HOr
zPPTt$Z-TuAaZB047@?7Ayt#*CBI_}<fQ^}u%P~TTOM|!bi)pi8D0XJqrIV&P+10cd
zXwWNA#DbubUwC-=M}b>$3v#6<pxpu0FA^jV9Bp!i_s7w>udXs*j5eg>Sqj_C#x+|r
z=T_OwU!&VVqzJEA?}&zdRPRl>xp(@1P!=`Z-C$ZG@|&hGu8WFT(`)EbrkV!L8V|oa
zfhR2)Ad-ym<$)VIvllo7(kw7^Cuw$7(ga*yy&e`?5-v~u*A#ZJ{gL_ewt68r=&lvx
z?<FV<0Ts}yVgBw}7cK<!P?~NS7nmM5<Ac~u;Uk8KjTTss^ylK3SNd+zCp}8{^5#K2
zROU*)oi)0b$bDhpCUy5D>FpcJwpaVn-+RPouZGd-Fjxk3gFVUk=0b<qHCNUY#dv<A
zuQQJ;-f^(unvG@HJcVG5o?P|;rYnAe;_a{R*JE;%cIlpw&koWMs~t!ZP!p`m%*^Ni
zh`a6p3ttepm+~+ZK&l8h(Gls;qjs(6YD`~3@7Aj&to`ganatK5o;$VP&8uuI8m@(C
zIx$<~nE6k?wU!*hb9k&jHW;wnW*jJI$D1^r7H<C8=NR8k?}U#(?O}K-Ic<J@By3E-
zCUMsEQ&fQ9RG%y`OiA_Y6n|eDWjQr807H^&%V$EB>rR6n;EYQGFMmi%6)4eg|2LYM
zuUb#{=C4+Vm($jF0SP$x5GQ|#F6d$J{jG@IBp;>;u+g85eZIj%jqRGp^>rN{5sx6+
z?h02r*OjW0h?{i2=o3R3xP$chA6f2T*4Rix!vIz(wb^Fv-h+1bUB=(P)D&o2-y6UD
zP9Y58cij7}fO()}Kg`r&$Rfxgp)mD}t{1r1TUMV2bsE-}37lj#wB3V(Mi$tDI{fdd
zg6=mXFp)6RIH8DI9(@5%Q@{%&1dNDT`6kbSg}8T^U1VCysdh+ZK8oPyZMwF8Or9%;
z>fgl#v=Wg-7!4+qb_^)YMhm=@f=ol_{5<NDpU-^tu3Ej<#AJ-JVYJLKVm1YF_SX$*
zyCf$@>p>zxJ*V;x0N4|5O4H|&K7w=Qv1B{+6qZ%&uOs=1`XPk!O7hVI<E(m7^uUdX
z<z%_f9b^Pok8^z2&ypeV{v|6a2U?ewxEqc7(M5;A8#ovm8^4ntbU}>JV@_TSq58}a
zts^?WW27FWg0}z|<RWT(s)`Kv?szJEKFC$aK@oI(S?5_L+FBRx#DsUs-v;CH2mmQP
zaDChYsUcIeK8MJ56uVeC+3MbQCGgyh1+3JbZKQU02ao!x*`Gx=zNI6LmzN(hA=$8`
z0Lor*k{vIbybhb_A@K=#WxZN}GZ$k!w!h~469wM8Knc2#@ZSt4|7I?zi=_7(_IUgi
z8z=@K-Pefthvy<qOLNG9lO6p6S0oyoI$6_1B?U=P79Vg_Z9{V+-x{lXidKt`6Fd3r
zQg_GfcVO4mF6N0JiITg3(z9egB314=*yM%%W$#|BYptm=W%Z-{8(4JXFAR+QJ^4df
zh1P;ADZ@|KIJ*H<%bXq#)!ma0TEkbk0d&97%+G5E)d<%rov(|538i<4dVga7B54`U
zKJj@6sitD!c2(*ps&O$jX3$zw`!?<v9Qr%;x%+IL3eoV(7!rovImsc7;sCTJ$%XI6
zasJQr^HOA20i^WR>D+RphmPy{K4JSwwKho#!KyEA(f#vidW^`qC)h+$d{INl{ZenS
zX&mkEE5-4saCn2<?~PHG+xuUaLIwmEzlipDe>G*xPNuy_r`SMqorqT`icdA{uNm^g
zT`>IRg$lP7{TU{?|3IZK6Lp<~%9~M3Vlsp9!cFK_8U5w7oozeUMn$Jd{_i5#!ir=E
zPTZ!=^f3XGO9Vvtf{*LNwh-29H~s=5>94U^CNEiJ*Iy!OmZ4YlZD(N5PL&PxQL_iq
zaa2ah+vJ2i%`agHm^go5oS)_Xdi?9wJS6aNA}^TAJIw@l$~>lfDk-y(*Gs%`u-{iA
zj=^i(38`JE0H!lvMRwAobt*?U?uwDiYTglC@jUJJ2eRo31yN2(5NKY~C2HxgqrBa=
zr;ORk{P{K*4~PLvG*65_$7jOe_r>j(txpJoq2R=BxeOScpHSuoMy|oz1JJ&ngp66H
zt&g=tU<QBt%tny@wP+Jq-w4h$2#v_0*RWWTNVml|A)Em3EEtDxbS>#w|CvB|1XlQM
z3WH|u)W3f5{3>Qb<1Q=iAj!*&uPLEMK(OdKNRdN^J`kV^inCKW)I9P~IPcU(;#F4~
zHN=T{M+y+O|9BxBi+c3FD9-gr@xJ=I8e?b^!ua+^Mm7rD@j}*+Qo5yf!+JnDmLGDP
zl#C&rgR7nVgZ2%XLyHdc4x!z*VO9<wHR5;;?$8Bynn2e1ioTb1vSd@gsrYYRSfxRV
zt_MfevTOIBhE?h(`W}{YSUD@T{@$40XjsrJwjAvE4~@=Sf5cTa9Ntd+q949kSt7Vi
z;A>v9{wIahD{any$6=2~c$?x{53@(SVhHd}f$Sw7GP+~nUWPRXk?#Nbx(BZE%X#>s
zI=zFJ(2JVVooWLsw*3U_<VTovs64-+I$GozXAfL{e+6NCQDgIu|380DG6)|1(*15*
zyP`%DcO#gz7DtinIRW*V^Sbv}r;&RT9ICbFQmdPUT>J(ufBUM&3M>(VF8*a$T(mx@
z^d>L-HiQzZ8lB9~<7K#`vFn2Vd*Poq1vSU*RT<J%25A2R0up<H8sSPEf={;cdIVc|
z28C-LhjK>`#nc6#(HN@QBH-mAW5d86JFts7o{k<OB2)&@w^OywC3pt5!18hN7~5#9
z^D<2ajL;JrwSy%Alc0xP*QeW)SO3&`z!Jh(q@QiXAskT*`;}D5yaNnVoSvzCj6Y1y
zmwp%I@yT<`jhd$^YDGPiF0+LHECo{pI2};4O8?$5VPY<E$Y8P;y^?kFaplRQT^;35
zuA=euA>tqlnR7@Xc@cb8OVf5^j-x}^B(?PMOrZFdQoxb~o`X2)yDL-|_0|DW-1?`G
z%}21i)}lu485^ouqHnIgf1R0A0H0R2N>-d;dZnc3?<+VW(`Z_6&jkS^2p(8rwNq{a
z854xuamp2Mhu%>x%KvZRZH7Ij1^C6Is;J(%B#e_(@lAL3KkhHUSfBSm94CWoNQYS|
zDAMlusjt@2xAM_FC|S@sa5$`OcT4aEh3+*ag9LNreHz(O%0gNy_`HG+-#N4u6>WUB
zX?<3C4vJLLn%c0E(tcmMy5a53^N-;Z%Lx@MeFv?}B!v*hKrACHn;{tUV<>w-H%lcv
z?AU8vC(sF@p-cjtWf*?9TC@V3s9m45gD#>?XS~;ZZ7&`sz_g~MJa>q>?K2VcR~#HN
z_AK0MSqwKaX7MFpO0d(Zu68e$x#@$s-NQ=HYp(QQzTfl}(2Dhb0W3?ZGT)NIUZB;>
zj@CtEi(szf<l{t4%wod83k7`LyKOI*vrI7+%OHmPm%6n624*3AUTxmfk7B1;3x2Ky
zCO_EikAuKG&nibkhn6MLri6DAy>ZBKKNTIJ>u-G`mOFRxDXQQs3ak%(lrWCUB7<Ne
z9~B?&3BzdRS@gl2;0J_s^+7M_Mproc11+ME_F{4O57naP;RVI70D}xhmZ>HMZ`OqJ
z5YRB(R}&z~(*<n$?RCD+BDS!yRLDxz!Aysgf`7MEF*5<_C`MW(l#)nElP$cvP6KXH
z&9=eqLnXcvm3568B_Tew1{i~RhV9hesrW}(z5HbY<0YX@hf6wcFBtt~RI2}2#XJh?
zptBy!*wy;7_PAd-n{^u=NZm^<$BRSYCzfhW`kW6L^#xevXpG1?R?I`n=1KYAL~hEf
zJFr{XRlpaI&1-(j=uYbY86dV8A@^Si0Zh27E}tLt2C+=i<e%R0B?6_m_=ebdxh6RN
z9yx;uDN7?mD0nzVgVIb1%K6~&mwHK|axiC4{m3^wzbqRbu1DBhDNL;?i}tHq#ym)B
zT$Qb1)cDY6`KR8WjG6V#BUcNOh*x}jj-8$k@!a-}Kj5S8?tISqbZ%5%nZv_Mz*hPY
zRs(&@V)<?AqpKHwRl!G{=!s>!yZe7~{3aC%ZBGV`mcvGq8thLYG7cr@QJT1u7Z~+o
zuB}CwoU<)CI)9&1Cct`62;k!kcN08WFsMSk*BC_(421=Kq&-7sm<A!hjg8J3TO$5x
zL0H7tH>)Spfn26G_$%|u10=#U(ZB1j{E$d=;b`|;vK8NyA59+4zVfMkXjiiN9)^YE
z=g6D5$xQ5{4LzLNV6Ux`4B|`=J5ts8-FYNr9aCXO$QHQm%V<6L$v_7`V7;=S!eE|@
zOXTFVx#c9{SKL2(nD=*H9=n8a1-XR?j4p(wusogHn0;S;4o=!^y#^r6SiyG91^<3&
znp`%o2Z^inbozHE4)q)$<SvQP<UC)QkltcRJCxaFIc<rdYZBJXEa*(2rcxM+=^hLF
zAbs?`B+{#V*4;O>2;m$`Gr{>jgPm}g)n%ae(Rn9O>CVtsB3_q9z_9YP|JE{g(LEUH
zoIZe~=hFte;*d(W!5f{xieO4d{fdTG(;%C%vv*mh^YM}3DrO0vwKsOk{F`(-xx4Oc
zgG{VDqyA=kaP_Ph^@=+Aj<iUnh}e3c4jm#*tbA|3eYkDDp?o?#|7meGj*V9Epdo?H
zK_4lAdaDdM+X%dbiKyNGCO`@K&-4EMcG?Wudq1*>ZrFSrxaXUBH)%7W-_JW^7SBE|
z?DGG5P_UL(d&Y=|B&6=>?8+;$DH&yR_6!JM&0mVj?6o((7*Z{{)m8pW8d*~F`4=X9
z*xZKky+eN)X+)j0$rIZjK-Xy(6XQBZdQ{Q**g$spGz915lBNgltB&tK_g{ogEu+{l
z5Q2!%&>s+`RJX)z)Yb|OK;t<!!lDNy1!p!F{)goeHut!(x~gY3=n|feNL>ONZRp_l
z$gIA@=84^OWZadpgQ{kDIiGe@w^#z#bA``7&<4M^6D;HmXnP>}L>G>woI4sGkBN0S
zDwwKYM^<dw9biM|rTV>*tJl@weV@^4{QlcJxa=qe%6e6LJI;1;&kg5tr9S?2AXicB
zpPz~1@5ZtW4I_sMQt33IpG#D2dSsxrGKMeGq04W5T!`U#V*2dt)zosozPjZbQ0URS
zL^GayOljm^L?SkYn+9ysW5XBlsRR%Hyhy)U?)yqGp>j@M!C>3UxuG89j@YTfBbAW?
zywA#MaW>aYsxfMq+2;>rLwpB@&B?o_b^RcH=*P=Wq9?j=u+A`5l~(4i#!%)B)7`Oa
zk~q%^2Q9L%A@Ooz7aOPEzL&PG7uk=`3_CYwUs6BEb(U>VU0yNd(8y{58@xS0$%T8z
zI*T=weI&l<sgAFV?)r2Sei#!|?mjP*D7Y#M#1INQ4SZBCaYh{h<Nv1J1)7Tfho$17
zozd5QY#RK~ON8p3rpQMakD$NBJO1gUaefSl;^TY&?*<Uod)5m770|-V)~Q<*UK?EJ
z{dIE<x8zUAK$eUR(Mp_sT&oBlUj2LcGhxoR57AzFmlQ=G&{4hXIV4BP<m?LWjzu5*
z_C5zg!0F067@sEMHv*5oC-TTz3eVJ;>b2^F<G*~s!FscclwuzCAVAx|S9=2{o)1(;
z79xU6O7&dDZWyuAL5f>-y9rRlkb_21)i<Bv5A$Gl@cEO70>X3>nulCV3QpDl@R{C}
zTO%RTVUEGRA5TFgL?EA~ZviG+iB*07m2A!(xh`X}LQrcv4=3dY{|~}?2c|2B!N_na
z8(Mqn*8a+H|8dTVhxgqfg)g|TX7wY5!=--}ciDc@^$9LH=_f8-T~Rnn(jg=A{HXVq
z{bSqKPI#EPafxgFyWO8I5!5hps<rVA_#rO#U##<xrlJE4^!De=5qIe0jZaR@PbLIX
zRdMKx-__3aKVz22UsO1vOmK`#?*Nc@IL;g;=2%>04f@l{u{&Vl!S8d&5BvG^i(6W@
zN%wELwwQIy$>QeO8w;Ivt$P}c(QD+H4v#)zWF4S(#M-~NX=z`7OxVkdq*U@Ci2FG$
z<xtMQnP5@b<-_9SsjsB8{EGWU)Lfnt=sHGAY*bKhhRW7hyEemY&h?8}U7cFEkkef9
zQBa$Uf1F&VFMk-STFr{%(J!!%&l|)$@Uno&3JC0oQXpyLd?V|N-kBK(_dUYrT!P;G
z2O?gR{HeIF*>JJw->-w%j^S+hufyN}r$b^!bJ*2>r~h`G-}U_kR35SF10ro04Q+l4
zc7@vZBQZ4e2+*uK4hWCjq&$35Oi&2%Jzg3QtVPzVmqs}LZwdT~F#TyGeeh&hk(<no
zL;IB+qJp&@ELwe-^Sph0s<*7jcJ0SXytr@@OO$X}t_>%|zrb0n4MYXUCKis1P&L8;
zJGOA{G?W6055^v%4`LA?ahGD=ks=ZSthzxRDJ37V9{)=OSRc0E#GJI~9!4Vo?3xk|
zka>8BkY2&i_8Dlun8Nys8nnv4&R~w)YtFtYcK@`o!0a}k;(^&UKF-4`k^eK#_nt2-
zgQ1c1$oQj$1XC=eu;q<61887MjowC_CcyRFUV`P<_wZYooO$tSP<E~806A4UUu7z`
z&x-6s75``eT=vyFdfk@if5W0<7B=w<r?Glx1zebo)-u@!a4wO-_s@4eY;Q-R$ar<!
zWLeF|Wsx|M3W8%uBt3ua!C^3^pzV37Bm}WrZ_h_9XqP@zaeuxSLJDsoMd~tbYK7So
zCP4$AmYa8%;E7>>`+IoPE*3O2^tNghOR0>Z9_?V29bKho{X>x6eY%D32aBoB6`fD)
zqv5I->$cyr#2B^i{Z`A>Y7Hig=JB^nzfx*1W(*|w#o6j_%(|PdDUaebv*B)ZBZQ})
z@1;p*ff@f~#pTYI%4cz2<Z$(?QXCJt_qO`@0EKEHD&z%eU({?ON<js*J_!^WCx^0{
zN7((cs|=tpM#vGIJyew$k+i}9D|1js!=$D<PK*UIqi(&%p6MsuVaA5$6~Ix6dh1R=
z&hG7@pA+Vc+UG^{N?M%5IVH_Yv<c43^d;q4RQEG>A)HSmGhfj^P$f9;lZ<f@eW4k+
zB9F$|ee5ZbUzH>)R@=X@&^*l_5VN!%gA8BcXd3+t%eS<N9F7sVZwmDfFt4d5X5@Zq
zj=Bk2KE{v~vyU6Y6OhtoF~r3f%7SGo!`ZUn6H*Al*Zr;`g2)y;AbC-7zp-1h7253L
z4%NRve*XtnH3EXt<ug;aVE#p+w-651vk?;71X*=UwOzk%;BSSV4LUVED<Jy(D)gH4
zRP>@EJkoRR3Ox?GMeo--Eo4%8=6RQ`3_di$0yRasKL~o`2Tr3hF^;BPYnz6FmkP@5
z@toso<~fqAI^`(UDrsMr^|7`kB;}k84;iu&IvwuwB>_TYh!M?D#5;72oe$A;h^}}T
zAp3!|!{PYnfIam8xHJ_9G+BP>5jCv4AjYZ|M3UGuz7rWjVDkZuKA(8qBn;?6G1Ej2
z%RuFA%ONutO~M-H>g5-ylHiMS(~(xWXx#HEk_uLk*z+SQWwEnSJLF6+VRSFvSmJXg
zjTCLOW!?(6e39AzOkP7bOkvT^n*>+n?(A_Q8mbPo>z$F1`*b!hiP3~zCN@b|gmBL#
zA4nlW{x>w{l0HV_E@d^Tu81IRA4-dR>%|1Tp<3Sju41zOEMt$XJrF+>zWG1svh?*i
z`u1+x{BFy9KEf~ZAF^H@c=cuFcl6~@beOmfbv(^Cq4lyKoOS2%_b`fWI^_qh=yH%m
z^?4vcURvUG_w4#PE*rsnX(wsp-R4*#;RHg{IB~+#A9$a*Il@wlK2$m_QcdxD9<x}k
zBXhL6h&bLrFGa&Xj1Z;Hm+QqDFzS>Ht1q*qO&=cdwl^N4dLsm72!`ka(C~3^mlD)c
zLg}^>gR)5gD)ta|V8LMP<^_M7B%%O#k-Wi2!v?~T^m?#UYP#km{x?96dT18E@X$tA
zjV6%*Qg+vRSY^?&XKMA@)q1|Cx@p%KH3l@TH+&hlF8}-G%W&;8s?N=lK3mOi6Vo^4
z-g1`^NfDO3JC69@tt^wFIrc(nCT_>!!lT7ec<u(<V!*F4e%fWriO45H9dGnQb`c4C
zeauz@rz*DNueeKJ7W_IDx1_kA{(I=oQ@D|2J(+Rezwt0Xj1C7M-nj;JAEb=J^P!9W
zmwR-p6n|g{?br`glei+t23<Dh#m`lo4MgP5+m74V?Bw&#VIP;>3TXeQ+~3GVUMjUv
zA~L1{kaPM24`VsF0;ox^fMK${*G86A@*)3SA4j3LnV$;MHD9%4_;@N78l|6a2&;TG
z`?bh99jn!Gw~C^LovdYDdi$h*k=)v~1f^v4=R_^z=LEgN;V_rZD#9;6Z$dlfJY@U(
zn*?0t+>0;3&l$;r7&|hd7y!|CL`(DXp5u#B_TL$VOttv&NVn4fj6^$95NrQmP9BF$
zcbBK1Z)krx{r@J1<GsmB!)A+TkP_O~t^y{2Neft9F^+Sh=ld=j6goCf?tBj%<K7%u
zYN5%^&85I);oa=!-L|s%NK(Q;=ui?9=Oc{O+093{#rOF|l*$Qx9vwONr|&er*%}jk
zz3%9%Y^|cDcGdyx@{1KGstry{qTOefnT4nL_51wd_LH^tFOMbDq4lDo2Ny}f9B$a%
zR>tfsR?;oJt~wNYlHtvVlgXKI)5p~K-MgTt@zVFV5qIQ0SSe<J?m4HxF?QE%!(JZ<
z!dW5XOY;?cOEc(5SJg{*X<oq95Y_Z|klA_`9nUYz$^m(8Q?(uV_c*<WcK%-w3CDWJ
z>pQxIoG{nZ4RfM3IrcO1h2i)>WsW=Dk*Cb^&bLxITrg2O>3)G3^^#W`MkVAd@?}2V
z^SS8{UN5#3&B*I4bOT8D28d^S;QHrtd)2}+Jq~0!a?x`+$g+|_W6S;g;Y9<yklC+!
zSh3m?@9?)SYI~n@h#Vu%L6(9W750HqKk2dP?fev!F}r>!ry~Lzh{_Ol!LgC0M;a4g
z`R{w=78*2vr23onF%ci29f4%yl-ow7KW+6#n<E=$ZJYd?;fHe!jDAdFHnTJJ8vHYL
zM`$(Rnr$pF#;CkkXb0PidfV5!HwSAtoJYO&!cDdOGFozVF&;<?<YCkrNJg#~+H*xM
z`rVynU-Y`*lXNUyn^LCpiNEZ`{=>1mCPc%&t}NdEW;Zk^xKV=N|BD<-4WE+(biWR4
z%dj}T$by2Ig+#o<T5I~_m$Z&`=2&`-a|j}_7=G-sl4|<cznxCLoTf6V@B=%b=|qtb
zrA)!zqt$5O;Y(%Ummi-e3#Q-s@LXVUNclkBQrpjwA?Fq*fVzKdi?{l28^V%+pks*$
z*eS2O^U1UuIJH&lMuHv<=TN~l*9szFB_I-dQLp$E{p$Qj1)RMyAa7hi#0#WD*0(A(
zP|@SwA-2i4`MS5u^Y<gF4;QyXX+)^<j$|cWJu8gTtO^eu`L9o0L_~6k7Tu60PfHN-
ztwR<r?dvk1=lr}o=S?uqPb7UbK^!FM5YJ+V`KoY5B*Na(c=}YCm(zNc&957iNc06r
zN8q*PN>$dB^v~*MTsBYn4aN+il|Ff`y(-Z$ZYO;i{Mb!@dWpICDmDJHjeQ!F=>N0w
z>XjPBgz?N(*0FJ4e0|yMK<r_C!H5d8;5_Rp@mYRs;-f?k32;xc3eSG&i00Y>BCE?x
zSH23!=*fbvnAlM|_i|;r{k)i+br15hd63I){qKFyS;O-L?U9P;qbIFuGWM>NI6n%t
zMP2?2NV<aeSe?LE*E_hX15Pj6khg;v9HkE+1@WDqC0^;@$&&Or;nbaR;|uznTKd}i
zVFTCp>y_Yurn7s?X1n8;V4di|mQVO%cwu3d=gz@(KFQcTdM`J$uNpXz=@-}k!wFZ~
zeG~TUwJPj-e*HNWHK#*`k@T8M0^f)r!%vyI(hb+i=VwIsQ6eD}jUjNc?&)r(oB7$%
z)9cozJax^VsAijWq;K_0@?y20{XV#{<Rw0$i)X0zTWUWeWq%_=gh{{G9J_P5JAS`E
z!3v7}MRZC5bCCc@+!M8p`DHP?7)20e#vK#lP6{X^M;Zasw#PP;=>L-}mnuZL0__Pp
zXn+9+D+>Q1!WGmh9fE_fse1bWD$!AhodlXPS<+k~pOf`wRi!{~hZ30rwhYW`(-{Va
z!7mSU3$`)?F$$_sR*f#@?Ns7?caSP{?zZ__;I@3y8It<`Q>~!o!#u?U4%)YKIH&<J
zlxBCY{x(j0zSaL++)8tO;7K$Go-DRT!ElmAC#i^YU8@u>m&+o+i5{tVK>qZ+o}3Gx
zkZeu@-CtG?lAXmr@#B2Qf^uc{uQn0Zsot-r>4}<MZLmgn_|nyc^mKU4ebzz8J<F0&
zk9G`%)R9ZjVFzy~u+`@klXt}`9>C{JSCiW4=yclAOvo!?I{ZR`_@WsvLppZCz&3}b
zZ@pnfxO#hU!9PegUbVvRf=yoyWJ&gpCC-a7=&)DMA%pu{crAzLXmShr{v54#WUMzM
z*EYQ#jgGn*+irZa{t+x0E&qPu`uu$D+BsmZQsgACjc=G*F8#`z1Yz<)GW#+40vUL~
zh?<faLWB6C6Z|I)e}M?#u#xmbvHE_D{ciryrC{$rk#p1cEhGq61dws=ate=h==S~3
z7$;p}?d6Z<Dlae+Z9kYHWB=N%X|#TJ;L`EnPs+cLD$l<kMx(?%PMVX~Z>D$Af{Z-P
z?VeJGtzPJ+qPAO`v@EqS!a4o;eMe#*=tXANMpU9XUJ2-OU&Ks7oo{BocM=FU@!=iA
zDSr$E7Kg-1B=TRGN9+9h_XmOQJ%e59p7TB1L8rYI$A&l{!)XS`fBjZQ1fdJ+aXyak
zC^~)7GG2A1pcE5!1?rWIX|&+^C85&BdlO5*TQ*H}UI4?PKR3}JR^WAL{;3+2Zs2Wz
z?C?i2NU|*NSb4_K1Seed!gLs`CVhCGt9^Ez|0%m1e8ZZt4I<s{56VxxN>cIf3tM>O
z*yuLlFn`Bu{U?Pd2v2XYlWRNhc_r_AP>w)<u#z@^If$oH;Lk@$zW>k!k%<GQzOjv^
zHp-?kNW#vf`3+|X=pQnynHLz6Ixq1><mIAV&U6vuMNRz_>NobiInr(ocyOm7H~M)j
zyKo#d`TD;IahYqPG(w;S-fL`4bWZvQ##!&9mWR4YM^y<4^hq3pq9r4PZ-uKZKP@3u
zg>Tj}HzD<D6YGuC^AaZtK^0p~Y-5_eoy{4S4tpC}RuLNo?zi@}Ov?4(v6REltr9WB
zo`dXHwTrf7)Wbqs`5yL`W8a9x0-G(VXo2h(s4?bHkblZv1k!@z=6uQzF}C|89KRg7
zw9D6*-2c=Cjugib^}a`aS0w1<wJ4iKA>A2Ivi@+b&>gh?Ek$*|;mb{023Hv@GbN<9
zvqD^oYeuZI1!39e>EZvq3Anm<bQc!?=F2;)=c#-9(kh~sc2lBjuEO<KERV`=@Y$oz
zjC(cJFS;Ysq=m+HNpp-o-_i_yaoRBgQoV^a40;F4|FZUd4(WZ5bEbPS@cPq_-3yf^
zPHYC8U)CHRDpj5+63@GrnTM<Q_JRib_FZYnNZVG=<?}c<+CjF9T+b6@Jvg>W!6dXm
zv-2u;b^GNowmDKsjtv8lJPUdrop>I_UYotIe+!9yWrnz}bXJJz+e345i6~)(jk2VG
z6akO01V9*pUWc2JW9}uKSw!$Dbh%i-aq9DGHZtD}eGxtn@g5<g-!;xSc+DRN4Mx6)
zNmqV$?ppmBIin}+bc-Tw@X#6PUCGz<nYzFlPXMppZ%^-5*;u0GpCYAyua&k$ebq!`
z8oT{dqL3w}rvyJzkRE)3tno^szu;B<wfQO=+f!%C!<<!S+8_uH-i~c!dG)*<bWzoP
zg+puR<HGPZqYKvoGl`x-5EpsazW&xl&I{Zk(&B-9ow*Ta3+QT;OPHA;BLWP#mx?R$
zeUTt}TJn&yfIGR?M`(QUjZ*XtVzAQJM;h0E-qvS5Nl$+6hq~TO=zxh~<njy&sVE!W
zeDFc)=Fj!r`!1tp6nFOKH8-dhht8%1fSYrMnTq&m_~qQtl(NMj#aNRE{_+PZ)M`(x
z`m5uwwMLBnwkiga$DneinSm-*<)wZILITW&Wmdxo(Iek@AA56#$R~if2{xfW{6RPd
zie;ldCuaFbhM5a5r+xV^GesM?c(vz4qJ<1e!>(n=X=#QOjPd{Sxp>uJjMLH%0b+`y
z2;Yb^N3!Gw?(;C4ee%#+1!<{--$t>v8lx>4#?3`>oyL_*FJpGY9915Ri#gWQ8&t{?
zgX}@~cnzhc)lm1zqYQq?L@9CqS?W9G!}wGf4M(Lesp|dcDsFE5R7lm&Y_F(~C1Bd=
z4YTaT{+BLUfdP8(d|Legc2<2E{B3Wt`;Tlasjbu%@qzzo+nR%}<Q`h{SJmjlXJ3YZ
z(&+@|gWA3C#ns>8Ud?ppGl}+HZ6+XL>SRo449EJ49u(Od>LP+6&>$*sgz!E4GC7}-
zsnLp{bVz~!wj_XKN3&FD8SHsQQ0N0tGhH{g)Lib(e2&J!Q%mEprDqnD^&=(M7Pn}w
zBPzJO>&eBkon84r`eXcp{ch7?pgyU~l5foNs<PHrMnx&RS`p59*)k<$bEEL8i|UsS
z8eB(vGv6D0U4Ldv0T)1oxHb`<x?L0jlj){GqO?!X`yjer1kr6*2J{&@=!68aA~k)l
z{pGBvf3F~{B##fYlpum`L{$tpetVr5aH2(~Q-(1@9;g_RIC;1r;|&EQwe!h-v4;i~
zC1Z`5hda;#R`%B;qk}Z*lq9zXN`?V;Mwt(!<b0)$5tNn|9GXv-GFbzrG66?C%y69l
z$I)3nMAdas{L(4i(%mhHG)Q+!hqQD_Gc-s`mmm_-ozmSQNF$xn-E-f0-cR!f+&O2T
zz4uzbHS%cc<2{@u>x6G;`m2n0<iUNDXP_gvi|w!6Fb6ZoJI|cKKPM(HQ(9@=m+b~H
zU<DL5D#-+#zf8wV_dS9hBbT{h4s|sT<ELQ#y=G-&71jPT<g``#K@j3^&$dYw0>{3r
zel0syWyfue7D-61i(UUd_d{=oH17HH)fDj*z%1+Fl=UHKVix9|j#eGT*l_i!pq_<w
zppn>&+rLFim)4|DAzvSK@$Qr7@|2!W7>*#%D_pMNVsk?aSi-IM=Qd>8i%W#qywN}X
z@*83KU<|&ARb!bkap8x5Z>L!*{*-(eA(KoBw`S;H`C(w_?7Q&cuv1&H^wTS`awDxr
zb;IO8jg3a9yJA!XQ@Xepi!-9zw>~QwR}aE437Q^u@L#k0ey`;44N?@?HC14QM4iN-
zx8DybL*R3khAsY-Xt{z~2zlj=VOG=mx^eU%H_TSJb02sy9+V(7>Pd|5dj;ek5!Wjw
z1q|Q}+o@|ky~-Rn^-YI+{vVIve7I@EmKY*#0ts<v$ph0a`ukc|W9>79D($xfV_;GJ
z)z{Hq&m=2ig-h$)4W4(h?8)g}wD(iXKL_3yeVS_wFzK)_vQ<VAC39r!#y$dP87+*G
zwxAQUvV{f^Oq^w*-skg+agf;LeO(u};OMop6BGxXJ+uq0^?s#aX}2B64|!t=j=t^#
zu*Wtphw2oH?5M;9E9~|c@$+T$OwwmBJGuTx)y|_YAMZ^S8UyK(?7m<6|IWgGPOQI6
z$Ac_#YGrmEWKl9SXyZ~3KJViNmtu3zrDi04cva4bRU9CxQgdBW=OiSXvpx7Qu73ZE
zSX>&HleFz!JjI@rsToq;pQAEnRx6Q{<!Kd0lBD!t#Iee0Utep7oZ3$@``^8V3f+QY
zA!7zbZVlo7HW!d2_m(Y8{NBQ^+@gLe<>5g4!(KLbjjS_()l8Y;BTN=$sYHvcxz=Ct
zn|1wXx{%welopBNv61Cq#Nd{yaeGuAo>CceoJ=2<n6?xCi7J8Xj??`B(N_gYF4_9C
zv0oK&heMxEnLa1^i58$5<E;JDyn>ggs!jb;Siq8|m|UJkPiUdpsBOLfSTrCzDmo#G
zVf|=le3f$y4doyIh0b1C+{zi!Ink=R#3mjgaP`~n^?x-cPYh0PU`OYk_l0tVp%G!R
z&g4z-jP}Y}(1OC$e@-1go;9!~6IvOfbhPITe*~}$(RsslzwVau=U%uV<tgx<l*Z*g
zppP<s;P`r6ki>FZ6aPIf*+4Gv{C4Q_KeN<f=o6oJA}fu8>y4U!d<D2P0Lc_LWC$|A
zrS`)pl*J=vUxM<bX;t?AG^cWWMJFnDcwv}0z0u|oANkIqa?hTWYj-}i8n<`YQ#?D$
zw-E{c3dGtgyX#~<iRzmrU2oD8(!zTb|3(1Z{cPM3S{K;#W(wNRKNf?kA?9K^zr~RP
z5)r5Cz-*N6qv|e7KYJ-)^(<g!wE0RFrjw>feV;O|sFGF2-O;`YODVV@foX9ay-yk!
z=xtsS=Hrgxa7NsKjVnZlkk1m8^5yy%SoGzyTRQnOh}IU`0tzc%CY67l#Pslm9`>TL
zM0`Qo8m(zZPCDy7`r1KDjb-z(Tizx4qxId$%Wzyx(_#?BIxNt&R1M~#+->z}>`RNS
z=opeXxu23O9P8INT(ijvHR$E5XW#03gb3K-!ZCGguJHtz^wbq;e$+2IPgY>YOl4|{
zep}$U2c`$^ZXcfE3vq55f9#>s4MZi%)-EL3`O*Pj@%c#|5%V1jFAe1t@flHy>JUx4
z-`<6V?u6q33@q^VnG1X<pbu*v#L=mp34B1rd(6kF;Rcj0oAWH7=?|+#E`dQQrUUMh
zgTmC#vF7GRHodu@Pk&x+kH#4O?tBw*ANWMlvZ2vCa^s@|L39e>e4a9_BvFpx=G@9l
zQ24h@n!K}?jK|1=^9awH&^O;e9Xsqr2)d70pI?aV^Y6cxhT%PUDJX?tIUJmRz@Ml8
zVzU4LF_5xXb&+wLSZBw`2;uZd7&(X}Vrc7&d;p1+RAXxoryEC-AK4|e<6F;#<px41
zzNHW7-yJpmdbHbOWuM0XW~jTXD{SnFov>OX#|PV34^+L6bODJKZnPW41*|1bLIxav
z@ifOe3p6bcI{Y!P-<p4K$tuT2@Te5Oc!s}c^U6RCFvdZYJqDK;`i~G&7%{Z6lvt2;
z@LiI0_Ds><P|9Nboe7Rh&H(+M%18b|yy~D-Tm^#OZK>W!`ZK>a_dsu3W_V;#tll;Y
z2B8Fdl?JzTb=hn@Gkh2;9Qs)NywkOo?QKLf$;(!rA7&S$&dj#*A^Co{FtDy-392uI
zQ?0Zz%7k%nPBPvvKPW(9{x(6C<i*>-W_Mpy<si*h$!l{D$$*E_5%Gc3Bg?RJ0b~Bg
zdzc0|v2_%!v+3J*?wHmj=?>GvIva62)6Ybk|1t6SXKVUzKPVyxI|Fz{(v0wF(u9DK
z0IACMSjH<+^a55(w&Z#}N)lKHFv|^bRA8^6g_VDya;yB2f7szc*{ridQJ@@4EH|87
z_>Fk8EeA12<sVVk*Q5Q;jt5`%&}0|??XLX8sMbj)=B0C+clnn|>iyKpE%dL^aXn7%
zMU*-MU0B1g@!O8O{db6_j9R4%mGNimh4cE$vS|K)8;{Ce2{TKnG*70~)9$u`**$IX
zD#bEuh}W=cahNZ#`javd(~a+d<02#_{2p=dKXJ!qx-*s1ee+Hrju=f?uQ3JM%ei0}
zIHZ=*oW_lFwLk!eTFPfN{#`ObQCD)X*E?RR`28BchL!T|9Wf|^2DapKRM0;wEx#vE
zb4Ow@YDz~04^F$NwqGnpVB%~%P$B*);ZrEyRUY^%7_n)d_T;@{uRARtCfz&y>%E<e
zxz&3qDOcPem4{6sFS54rLG`ZMdm-r|UZ#YvY*iSidelV<F&Roq`ZV7Zb?GAK@~I5Z
zN&KAv%kbf<El4eW(ANZU@c9ak5|EE~8p6c_wprRsxJ)}M&H)KL31%Q-1swS;(Y>PK
zV}9}R$8(-lnPu<T^S|=yLBaok?@8rAzrIZoH*2^PajNUo-_VdY0e)o}GzO|3hkicQ
zqp6IPwA&(X#7^tKVaJEqY{KYAQ*#DTuEBOYzc(=}BDr!*C-JFyUnb)XbKHc9oXk(H
z2Yu}+ot@E0JqTRe_gw#0a1@j9SAc4H<Rmi$@Mi8689nE?<`Nf$-Z~wDF%RGtM&u<*
z@;U4mCBZ|?o4SHfemU9;)dJBwc=sbJR4vyx8eoPyfBj0P`}=lcB@+4QRY_QDUMykc
zP!*%g_8`K;vY>ck!fBO%SN^5!L=`B1o?iFFRj@&Y6oCI+P-FTRnvC&Cn`>VLz%A>J
z{4JwS_=naFU(Iin8ZM`c=babi8Is%5ak%_Z)M?WB;yVGpdt#<u*Pd6|VmYC2-?8%P
zaGsS^{V#E%EI-|QZ}x<-B#@%&w6*hXgBi5_r6mRWm*E*ZYtIM<y`0S=Q_Hd}SVWZq
zGGHbovb(<ecXyuT8R7@1|Hpj=;KH_r+2~1tDp5edf5s43%ZBkU9Pm;Ape=##?YhA9
zs=<amcS3h>Mfz8693rJH1akEkqf+z9Eg+`ezuxdG)xa;IQ70`=%zU`p$mH{c<&C{9
zT~Wl8<*(N5y!hRHU$=!ifz!sme<be5MTgekk_yU2(|Q&tWpzK>;lutUj<eIjl1wTJ
z!-7YFZjOW~BBPneFz9LKu)q}+5h(tASt&Ppo#2Hf@>*9c7U_&KYzlWDyhiJSOW}6-
z5*|6oIKL{npFExl(c%K!?ZqY=#h-sOUH*hz^w2+RTczr^Cx0m2A!lLv454w^taf@~
zJeQ+^Xb{4){5Ivy>W)!#Z)53a9j_Q2SMx*0b`0<Dp5Kqgz9G^3<TSMpZFep7W%15$
zbNt=R;bFqIW6<_Fg1F4bAy&!Jxa>q1si$4-qLrwDMQtV4?(bPGDoo62-|DX64Om*(
zD#uP5@3R8oIPlyo$ozMn{u^Q?N(e~KlALzm=#jc#JGbL{WzCN=+c&~Gb|8T|1`;fc
z+19l)kKy+vVDa=AUD(U<X1sh3Z!bnhSh<t`BxCh?IgQ45bD^GUO2qf%%WL~Ff9?7k
z$XC8KMHhoXZu|1t&}jVm2gOb%==^K<L`wIoDI?+W->SwxUw=Cz^*>?Wx_?ors6w6=
zh?^t#s7u-N*|RDad*<%`qzEr;H~WzA9@2G_osf1&b(Y=wWD)DVPwkNVzPDPENlw?b
zCx!R^Z%uK0O%UwfIYx3Zs#Byo$Mhvw+WAj~0FCUDnQ*V(Q9cD@a>lvZj`ycV5U4Sp
zmi{?Y0%KcnHN;4u-lKDE)bq?Ll!Mfiu*NVeVWqA8!|I`NTcf+q`2v-~vcCiQNTVR<
z#U}1I+?wv?qsqv<mqU(_z1x}nd6(TMxzn;jYFSFa$u6GHZ&ACJ{*)S;4v<^tXUP9>
zg4{!4QEKTiYXNu0<E5|-ASxX9QrUQFEwZyppO-5}$aF58Ln$KnOZZqi*O1^<jOp{o
z3;3EDBG|2@8npWUPe?vX16+0RjCYQBV{CMB1-<dGeba%jIf!Y;=%+?J{=?Bvl#4R!
zulv4e*=x{%7K%g9A5cb4NiT_g0dDzLe^FQ!<sZ#xU(>L8nL6rP8s3u%BPbS`k{$a{
zW!Sy^RK1QOEgY?UzfK=W)jcVN5^s3rGd+U8hlFPmoRn%)6&GJWRYAQGj87Zgtt*%f
zc3lh(T`G0m+h#&J_`J#XX@6y5e*kAyZiUSQ4Sz_$vaZNUXkfd2+HuF7EA9`_?F!rt
zp%%&o3LQKgSZv4DofqQ~sKzp4DobS0RhSU-k3xs{o@wfhe=~`D%Y8u@mErRyS7Q5%
z7FLkKh`jsLuUoNbskEsW|34PFl)BN;A=BuEC;AVReqZ!MJwNkXm#ux5BNs2DAX7QA
zo0kroLtpnp&NuVm!H#J^WiP%{!1~&KtDg3^r>Wp#^oU{rJdcmR>X!nj<pN-ylgAk)
zu91msL0nFbJn(s+LtM@dv5rurVgiAh4UY1b4*Olme>0GjM%^kr0LB)F4?0Uer@Gs=
z3WPmV36?pGLc@q_*hOJw#TAbl&8$gP7cDW_xOTIZn}$s8;e!v#H^DqQr%Se&Uve(G
zKU~5LabI=iXgrm)J?j|=xRI#o>m9zXw3(k$jZx2ps4tB4_5brEI~u)xUx>RC@O=5p
zX1<9uo-9nnzvtb$<$R`!QMKG2I4e^$*dVX_LuZ%F{?3?mGFJ6E<BDn?3b`6;rc}6h
zu7uW{#=QmWWj3%~*xrZ*H+p&_VEU5iYc4tV0`opNuwBJMmydMdHJzTA-{@!=W+Pq+
z&h^BJI2yS0h>Zc!h1Y|Zh<Qj@K*U#K;2rxU$_b7u6n!M;9dC#lHUwpI;x|xtL(XIm
z{bF9`jn5=%!bZ=JNb@BJ<gl!&s1FSK-c<9yY3#gLN4z2V8c~Bv{86}kuA}jVTk1Nu
zz|w!2<C*I6nKe#|F>8MDKfO3JU1lvoZx^Y0wNWcxIK&$+qx%hv5epl8EXG@Znm&}7
zGeVN!g88hPtuONU4c>L^@T?&LNIF0bq)ykPMPn>>-b+My*<k)A%Dt%I;g7Y_f{^3o
zb3<K$Rb|DEX!MpIW8C4t^BQ*lro0t5$L7EGe#gwRzgc!Q1#a3}sqQ`&`;JdH6ctxu
zorLgtFVqezf{q`1ns*?uPGEVMz!#ZKI^Ove&jr=6wbteRF!esk71(O?0hTSAVhI7x
z)F+qQ;3;1rod9Gu3FEJUu6zcxe&@C?F?ZjuB-KeW*+5tP$4nGBZdE^yq?dqgF6Ys>
zZ0zWnq-3<8=dxecR6bYqL*??RdZB*;l~0?~>z|nzB-qbjnh+h=L=I4<-5Sm96*2~4
zJ;|Z+xz^)C)m5~?x}}=@@#b<t*vRy?jOXFD0r!wa(Kk<rHlzXj5sdX);nZ81{cWqL
z<`PPh929E)sRrR3*neL=#VVm9u_I3juRGnumqkK0r1lzSxN_$jx76c6n}5@4B%UO~
zb6^WA!6(d#D=E+?2za*r8a6VY^Tr{o_>a`mIgAgW>2C8f!D#~o*@!jDx)XeS)!efr
z%S}}ES|e>POJa76pKWNvKVy1mFbkfhp;PLN9ZB!X^L=U@AsIuz09_@G(%#q?XF9By
z{A@AuJCCx+_c1dwVe=$&Qmyz$h|*7@P_VX8fz`96VfPxNb0AxPa8L4QWJV;-Zz{w$
zj_fR7m?|tMhvRcW*cJ~;|Ge&RTPNOtE#{yLQDPUYfZF7pzs?ercoN1Y>$hyBml)Ck
zf)=;?VYp+MGU4^*rkuKjZu+Su$E4MRn}{C{{!ThWHt5!B$_~;5t}E?AiC9R@Vjrfy
zBnf+^G#k{6tookuSbDnO6;@49TK~XLjipj)Tg~`PuR`b<;D>!?u-0fYrzH~QA|!Jv
zxOPHk7DQ>HMMja{ue42Y*mJQva|wvl$UctZ-RVgW#vWc(ZGja=34`i5u?2}=*C61_
zJwFZz4sZo(tOY)uY{3DKqaQlAOmImXcwi!tw6Kv-;u%;Q%n;u6FHUd`)jRk^#J0)A
zXE*gNgBzZNQ2`MOLfOcO$SmK-6}QcXOYWcLs>$&*C1KM-HdTH0oz3y;e{xS=d9MGG
zY}$2}7-Z)tsPx3~=3f0Q!Zhq4DkLRXb1Gqr9cIm@^E9?y<4##rJ-s4i%OT0kFY(zZ
zKNcSa>kgcin~NDd`unzucX3s>^y--g=9~z;_<0#=+Icl?32`8m72g<AAIFmUeVXWl
zkRcf9;~Se8B*~E(NirY1&H9BstUC#gb_o`CvYz&Y-C%i(vTi_N-$4M?Wryl#$ccy}
zLa7~n#PRQmWeyf*CTXo6)8h&M-ZNkfDrly$ewsOTJ9#uXy=k*p9aw%n5BDRoW`3Er
z3Y-5&lfT)4iGy$4+L;%@$6V<S${>gEf1aPB-p4$@&M$nkbX^k3-B-Sk1WMy#aM>!s
zwCXi>^l+AS^g$xA8n|Y{WQ_3~%uH~YFc`INPHJ*7YeqIYssd1L+(uNKF7&Hm2Qz)w
zYG}+^2rw%;wqmNTsJ5ED64H@$uUGRJENEkoAeGb;OuAWc59!PdYOgxMIxwx&e1?qw
z__R;FN~I~4LVEaYUTJJ=5ZBu``P05%kCP<lb}GLqmh>{Ck<Y_t_=k7yNEjrjewz`r
z80cYs@%qKQ(hgf4U4GKwV8dlr%6uJRFD)kU^BrvOut3GJL?zLwA3CuMX{T#b!kO-t
zPTcMx5Z8mq2@Ofgx@->_z|l}f|Jc@!t+fnlxpf9le?2>ki^#(Z4Y)ITr}Y<x^S$OO
zyAdC}-!FvlV48{Rzzqw*&9wRz8RMlV%a{Di$Fb<0j*3Ox<LAdw<g3<6s3eEZJ|~P~
zp4rJ6c`bwsrFl#!a;X?EyVUiN2GWk}J)Le0?!805nz9&^=W@|sqC8dol(hCOtcJ>q
zs){i@$zONxu2j|Qq?!<g=?L|B`7<nj3C#AwjOD>U(;<<q{05WW53<_H{-@RV{r?k)
zK4|u((#&PTC!`m)mXni)bCI%~hh)~~zOU}M%|fF?h11Je!2p;I84VdTyPc9xeB-{C
zPrT=PDil?-e#$_X{voyzT{L6F71&<i`Oa~Qr1AcOPM0+eh%IvVZPoH|HN<bD?{CNs
zA0*oH+%{m!Ngt57l)_!!C;R7&KeI@W!{@R8`O(D7L0zAJy!W!OisJgd<QL_qrYMD+
zCzdf!YUNTS`e2ckn=1%J4W_`^kB52XBHPZ~SZGm$=;lFxoHed6uZ`x&ph(U3;b*b-
zP-N0g<Deu|?~@h=3v6tzVL|W-k|s<Qp|Es)TxhEqBBj5nPXM2`^EUlx%nJGzk$M+S
zFHI59P+-lJ8>PJ(Ky=Y9qflnR9w3ap%W<2FCDHz@lDXXkza|~#LYWm&EdK58h$~UH
zwPx>G_Z`PwBl4AJ*q`?EM5*`Rk_rdbMi<qd_0-X*-w)ifLc`||fBEcTsil`R|Lp;F
z;Q;;-v5N`?qUb8U<a2DgQdLERF*uy;NWnLg!TwfKkU*w|m#t9BuMw>l7#|l1nE|tx
zJd>G{N~x1k*u1+l=f5}!5o{{}M&ihu;K1cqiOry>R~EVZR86dBlN=jvww=N%7ixI3
zVE40w6v7y&eXEcfz9L`~$B&VUYgF{(4fk=SeshS;+ws(>LtDO@y6zDkmc;35PsvxO
zW|XOh$L~Hxr0Fa<&k74L<8?n2kAP-hYV9La&pdvQCeg|e=@{2)c{^mG>LQm@caq~x
zXM<_SXXdc5(THiAUeM)19AcSI$#`w?x6w!>6j{G2W1A7-eKFYe4F1hT%<Ll>^!D~v
zC&<by*oD+LnkJo7B|mX+l&+hM|1%|k8@{GW!pTe;<#2n@CXv_d$_EHCHz`n*8kUw9
zu!t;ZG!~2(bB-TbT9~)h4;v;jn4<cegQqqtKQ}GM((pfp4jY`Vp`T0y3eI<*AVK<e
zO+vbwTP%0g<8oF;J=d?#0-GrYL7$g6pJ}(5>-!fNL?$uy$HFyiRtnQB1BegkQsRC<
ztf!watRCaj!eSStibq{wwfMeldshapTOpg_KVA~I7cnzdaeAjRigJYJR2{Bm+|)S9
zk8fj!Yt<%nC2|VstCEpuRaQwCdU{hL^E_8gYGizPzz>C%4j#UW{Sx84%u9b@LVfhd
zHBRwwcGgjGDW9f_=yQy{uZ~UIm@~#fC=9%g7mR32?Av$pd!HYf)u+X~U6kush^Vq}
zhXT^Ll`aYDS{$>~hR?xBaN<Ov#Lv4<T<><{F+pr2{Edk8Bv%hvxMn*YSNHd{_-xE`
zCh=A-$SpBdza>O|HC>8SYZq77Ru;+H(m5t+m<#<TY4nl9%^KW-lNb$=paEFg+9hdk
z+&+fZ=irR3$aPcDxCmEXvtotbJZhYnsfqq@CVCtC_IN+cX>G<78=_f%MImxfd}31C
z<5pevG9+t4Qry-g0z{%*d5yq61w)II7bb`>waW^q_YO)kXJf6!@WHH$R>O+$nP@^l
zMT+kUXKBYWZ%?Q|0nB1bGm0KGOv!25dF+jVXAq>02Jmpsj#2WN_Tr^V15zwV!}zsd
zJ&SN7+LpPjqdu!7Xia29RDmW9{yjX5M8j>f>bv<vt5P_Lz&Ukb&|aIZq(12kE3Wqn
zD`E-u-j#u_>S4l&tI;Qk7oJ_QLwDPiG1K06Nb+6`q=+Ge$R0)*V(AuSryW0&;e$(4
zkfwv8^O&tA`4RwR0{L5%F|d#K#c*6uL~;C;KXU|fPdVxt{2E&O>oh8}>4)NNdW7em
z6|Y_U^>&)PHQ{lzm)nAA%35V2BaS}E=yKO^E?f#M!r)P>3H)?pN^}R=L8?hZKV@ZD
zDh$F!VKC^ajP$wp-Z!fHOy9BJO;Wrdm)JSIh@OM}Rqbebni50K=+9vucWzAj86)O6
zHnqnxRtL=yIaM@0I~%tXMcO_|qzv|l*A7eQb8R7@JE}x0#<0Y91xj?7$R(*80f^8{
z&NO=;F8HP$5!P8L+7Guz_``tj_Ww<QnAPKwo;6ubG`)e06?A+PhcF=MTuN;|QV+=t
z4?6eH;vG#&sTIyRPUvl>yYRzG?33}{B4*H}J*kV{&#WAti3t1_*U~npMqhP{qKpx1
zYpq3<#0OCn$EJyC12~9)>^E^bu{A59iHFs&xLWo<isRb-Hs479;I*AoExfG5f?%Ou
z(WM$0c6;K#-VtOw+v)mtTc*Wvy<84mzII*>uod)<RIZUus{VTHjDyb<wBuzp{lK7v
zjGAkaYbpAeu-?J<(&JnDMpq2|gk}rp3Z8ix%vmXkyD>HRBJPhpwnbqU9h5@vnVI?>
z!_`U)Jr~U$k;ic(6M^Phzs^j5rG7g&{B!C4<Y(@A+V)v8=|T$Wm^oBPRk&QNLR)n7
z-dffaP1t11yS|btqOBD2DlS}`&3wN|O{SBwvk(LDv$5@ILF9xNB1pHo6i<yv-AG7a
z1xTX81)%^iGtdTu8QMH9d?J1|_Ht4LnkDRR=bV@CfGy4bGpj81Ui6?)>0to~WF_!8
z8*qtPvso03s#VwK0egPiYCULw3>rem;k5lb5+v|*iSS>=A+88$4SnSg4i}c+&E>Fo
z)fi_jugt^ww(t^Sj{7Vm^Qt2EH*c!DbiJA#k3C6h<HV9NIp3=Wl1tA_yJm|+tO-!;
zUNiUG?ISs2-GjT)CY5Co7!h-KSO}lSFo9~>Z$h#>JlyN8H+*G<Tdh9=4X5t9u%{a3
zdVcPRyhSL7!O*?For`ZFqcmEY(BMEP@PSP@ZyV%qY9R}myKuJ2aBFQpFDtuRNsYlc
z@nEyRk>A4}VMLv~k1_>%umc9IcYx~LtGU7oBK5a=rr=6~TE!7wrmbXFqvieSjUSw{
zWs4Ph6EWY5v4jcfrI4dDeugizXK1fL$uLR$g)2~9rH{~JJ-CW8zX#;V?Y~=P^-!VC
z(dnI1&Oc`OxGVM+>!YkaH;Z{2$;`W#Q2IDjS7V7|bi^pq?bEl=pR7QU&bh&k3{0wg
z&)p8d;O8tG1T?@o(BifmZHXO}ghCxJ5C-@EETP4Cc&ma~)AOV)s}BE%I#qb(8iTYb
zdpMfeB4bSKAhev`KQ%$wZdv5Z+~o6ZI>qMt4M(-_HbYC9d}es*IQ5N-c+cC{Z>s+>
zwOB-)vP_e_|200y<LTwPRQf29<VT>tPkZGwgb-1eb+xUZ*J?aM>(!=y8A4PMhR(NV
zeN<&g*K$>#ysRAn?`e)Rzo9tBkCOQWy7KWzFV@=&YEWXrsDx$?q&1QZE07##OcI94
zF`1l8wuj>@+OE45uaB!>-!P((5lp_1?Mt%xHwX`KDpjZ6`WxJ9%M3Tk6l_B+jz5n#
z7J~hFa0*LteNEM0X<ideRY!)e5Zm{nu;It;D4yK`UzX^c2G#yo?Tbht0|$*++J5!h
z%Ic6+JO6J~xZ%(`i8^96H_pXd>-pRhEJCa$zHZTGM>N6N9;H~x?9)$O5x>J+^p^83
zxhIDH{qlf`bUWOJ!;D?+$1v;dR(xX)L%I^~{Yq_*UP&za)V5HUm*DtP$8<2f83TBn
zRfK3%=E5nuL70}3W{Mf%?Zq+))~oG(7^|N^2quQ{A?6Abd=p!C4h-kd^AH5}R|V$?
z7H*>Wz+09wyE6m@rNb(CxfQZRwkPbU-6u(n!CXgvhd&i?APCB+_#SxGiV}KuA!$>~
z=Ou33N1x-qu<|Hnck2TjWY&VEhUU73&YAPeKVNuUY$EhCTxb11vuMq<3}&<@Zi7Wu
zYy7!CS{beTEi{%FKD9eOm+{WJq#GWIY^*jn1%z#Y8JROJbE4QsQAF$%BC#W@AY5m{
zQI%sY2)IaZ2!H#-c=21O@j6CWx2Hdb3q~@6Lalqj;cM{&4YV`#HBu$AFJ5-j4|L}k
zWB4D_#_j^$)S%Z>P$BDQl&&%bC256QqU1*ojx=$7XKBtW6dI~3C=Vtur1YiB%5Kc)
zIfg;df+C%|%q(UjT#oT(v1scXQF$%EJU7<;NXym^k<fJ6SB@HoFXP5uP_MNcrZiCv
zR<9<sVMcSxAe6GYXz?;tmF%Hwe#*ZfgKpW3il)jno|CAt?#;?>YRp%yG82g0x4?7R
zvEM?AS)mIQ{a_kmq90s>Okggo&-ejTVBkrRN1_1Ce!$KU-}$A&|8p^lZw9E0G%N7B
zQ`tWB5_jDX@D5z?hQeBG!dl+6d#sZMrab>2E|DzBQir=(Qp!n;l@_+Stvbco_K|hp
z$-E3#_`g(=THvwoAb+KMQ;1ZdMP(dk{<x}Y7~|6&ASKNDd%2k@{$T;DE@+yad)aNu
z1B!VrEH)GqJMV`^dpyI`zBu~ad1U9AUxE4c9Y<H_4BxJ+;QEvh>|inOJ~g#|o=&Dk
z$mRzvP7~0B@TQdJYx2Mcze_n&=y`r7R8sUmGd3*Y10EBelrvQ79ph6rJ1p7tgL!q7
zu|6vM{8M5QoGd%Su64up{y5Z}D+1}gRnRViUd9b1b9Z-D^*Ait5-PD6h4@#3<UQ#T
z0o>ej28P3EZw3CpYz~WTTAa%Z2lLPHO+%lSSsB6*2i>wGrVKV4dQxvO<;_0^9AP#q
zQfRftsb5rS6^CKsySnVwGGa8-(wAhHMXY_jZ`&H7jKZ*Rc`I}*0!7K>oj8EG8nMsY
zL%gnG;4gADU)UsdD~eHwwFxzI)QPtZw6R*MfjUdmA(~-~;?EU-DS&Ye<o+A~5wgR`
zX?WdkFe@1~ou0}kX1EFg&q5KJiJS7%^5q5*R9E69p^y6e<p9~r)=2q{Sg(VISzrl;
z&xgM*Qhqwr$WpP5pDkkR-Fh<^bTsGx(1z{%ZVt}C^VHQ%HyY@d#=bp4e?>Qrfu1li
z7^d<)Ospv?>Y|sr;zo7<i{EYP6!MVSEXNAA(!#HOZqx?5TL|Deb}hCFs&Uz2ZtCyo
zcMiL+pc^W=W)Aq6KF;NhRtFqH=^8Prar>Y1K5SCmIl2Q=@Q_marLuErB^R&#NeZt@
zpXGBR$ayc1{ejQrj~XwOO~Cu7?B(#4-0dU;q;eVq9~0i89Y5)7pv8xu)S*IUqGDDC
zJWgzOdP3DcuHNxlUzql5LIzjTVlXbAwb7^hUu4E11iTS5=!p2Wi39gcr=MmT1{hh9
z6)c=;lip&IW*wEEubEZSTE=j{t;omw^BI>l)OofOu~QDdd6F4VF*#pHJ|zIzmK5SP
zm$7mBW|Kdm4PAD7pLH2CJkQXUlBj2NRo21zYrFKuZdQ%E?O;E}@Z=UGF&r+a&c=j4
zx>tDF*1(-wTrxfcC8!hjpzwWp^d!-@sJprL)`|SE{uw4mH`J7OuHLCgfxBS+FD+XG
zom&^7TMQLUmSqd6BVWL)ME8EcO^?<LlDX%?)Wo~u-ovq`5L6zO&)(J~iha=5ScoLz
zC`c1>7?YhlowuLCk?so{1qL#F|A{qE#4@haiIMLm3H{blwBJeo_HlIkWA@aYWvs4a
z;Xguzizj6TYQDggcp(;&Fru2t5DgJB`2(*mCFAQqcE1jZ9P;*m>}NH(f`UgN^Tp?1
zrL33aQc*+P6yGu1Yq}EGA14r6uOUACM{G7O;@G`islOT?K1zYbmJ5_~$bo`miSZmM
z4sDpddT3@sa1nSL&oKAp%hU()RLJutQ=^Ef$sNrz6S@qw>}p{2x|;s;=3Iu$NJH)k
zLpex$Y!%)^yDy2$QU#LQuviG@np&ze`_H1C2T0D;B=$MHN*kfL7cgA4tT2#wVQ9zq
zj$JlVlQI*PTz#jGXhP^kby{U0vOwmIYN#9j9(k@|DlYZ$LNWTbWF9I_7*?dAn9?*>
z`Li9n1U=uEq(kCaw5;nlY%08`5t?Ij9w<xMz`t6SB?0d_4eLNnWMKwEU08ovh6ZIu
zVTRcSgOi%+@sF7i0RqNXA2Rs$;NZo9ImACTagXqUOEk}h-!4lKUECE`CMzBuc5JL|
z*D3J1k(I_3o^PCZ%g0x0UnF_)56JgxcVCW^_O_Bn^<3W$fO3QKUZxpdmh%3UXKWeM
zlZrs6G*P#q<L3Zo+I-{1gQfF({bB6D%~z`l#$5diFWq}F$3j0H!@T1`Dv!}G`w;5#
z;fC${snX{QDiIvFjCV=Pnvgc;=g?|>T{j0mr^AsOZ;ce2XB&}PLbD0BN*y3W3eNC~
zy@n?v5^I<MULE=P$HJ?W>ph)dFU-@jzRQNEu(u!#%{QSy-I|9uG)LcZVtzVp@C|81
z<`&>ak|eliWUucM>=3SPsHN{&AAQn4h9F|c#FFZH8(U)LtHxU_CO%g8RuSJHgv3gg
z)eJ2vz8eklbxt*C*RIv*Rs6;iHb|`%CVK(zhnpgFuX&ilO$;@3U>AKZq@c-j`Cjnr
z_F!DkDm}5LI^qTu+pSGCKZ%(YxaMYsgTItOT!--LK3oVQFz@H;JWmM$-5+Si);s(J
zvBE?FiD9@PoS=~sGEpGW=&vH2CRWhe82170e;&8OjlX%<>o08(V%YPtqoHT(1LILo
zA&JRGjVd)Q?4_@M3rN6sbB}#ib!YX`y=$AAkfC8pbWPS((Jksx%52f$b_}!V49QeV
zn750wny{%{|4K-{z@9c%=W_1jxw%oK{*88Z*>LgZd3F9%eks0Kv8T5aF3StzA)Ea_
zAFB&KQ8+_kKIxRE6G@qtw@zr{_<9jfpnkr{2oX}zV8@hrS0d!eIfPs;qFEE7I~(u;
zRRnR~<~b8^vYhv>?|0tf>H8A_Tobqm41Et9rgW5fyI1$^ekGMYrv;^5%ZSz`C3)5H
zm+Q790aX0?z^s>kR^)BadBf=ZigrigsMNg^p-K`iUF(gSySp=sZxA2$&xdPe5lW@U
zBECb4x~VtS+V;j)JKGnTlSobNp-Pp=c$$4=7;Zw<?yJqILpX$RtswgbpY^}L2v?8u
zAypB{4YjMns0yYf0bU97Y5eaMaqP}=M9|m+bBFd$8Td&!cOV9mnV1SZQ+?N*)R@#n
zM6T$sjj_Scs9^RJ<Q9mE_DwMyczh*=*1cfjr?L(K1H6I$fd4Y4g3e)y#?ZWust>$4
zSM&c}mx%*adMpy&a1zp<50|^J36M^Ib^lc&f|+kVLK3j{%ahFjM<NQ0ga`OlqEk?7
zToxlP1hPbDH4Ti^^wIe_R03-g-xydTCqsqpJ;oZ}B_NgQx>yJ1mTDL0lGtykURwQ9
zl*$VS?qyWlb&Lnr6@J4P+Ut%+2dfhmZ&6Uw&a|1ziG7WMxCJ{`G*Q{|6wA~Ilpehs
z6Y)LgBic$Z476_n^CZ#Jr0FKU<OMjgI78Rd+%`!bUjt}kPc>{Iq=*jfQ@SDJ!|W$}
zAz&da0S-&{IMpogB61x`5qSRQ8BU(d7gWXqhuMy`EDA$8ihYHt<NNHOmBaRot<HVw
z!QN0A_TN7Tc1?fmPnc98CMG=$oDm=g9i=O2!FJ^m!ij?xRDHy%-GmqBk@3p53^Z*X
zzfsF}uVa~2ye*A(5#4{OT44>0;uy3u@U6~$tPSBC$oo0dQ+=AV!CirG>zJTeAcE7`
zap@rKXaDb_QY#EqRI7Q)#V0P+)bdQ%yG<AKRN!VQ;!y&Y;^S2Y5NuL~@xs6<E=vuA
zEc<|jyAVo{-a+Ga_w{$VV%+g2id`QKCT2w)iF>)jXr**cr2g6+R$x>C`1&vdAU2p2
zA)EOtYAfL9Fq#VYL?@Ev*e*XS<%Oi0d==uM`-nxgy2<ITD1Et49AQ*7I}>dIJ}6xS
zQGRJ2SdBunVr1LDdkc@ff8%=af92TbP-(4ScFyeCeToC$7##fDW26;Tr_B1Cy&i1V
z-O8x<PO|R>%u&y+K3{%l<T6eKZ`T8@@t}QlrdSeil1mj0HbP>rMUc$Rnw{6<zoVY<
z-I{YsXkZd6tkMC0>=cJ1Yt3GI7uC0jj<P09mA*%Yh2DQ_8qqj!_(G`M!j<0OHLW^*
z*9m?#dbs^B$DWC5^5_{pL^u{B@@@>Ojf`{p?8hUtL7RTHVhGDcf~q3{L$|kiF{43+
z-12Auhjw>OTqTI4mdWs;?A(;Tmi;%Oq$lGuzN&+(A`w{;YQ;l6*<i|#=?nzAZxAM>
z22JtaK_Xl{<^X(6OzDR5290LQeYfvHJNuS|m<K+)HU+WRnKqy7mR`zNolyzpeyx{L
zvi0(@A%_fY9L~f#dd{s@P+(($F~^M1X2c-t=l)>3zs$zpF{uw=kt)3&yQ8VzSOmbQ
znJ%pIC78a;YDLQ*&YeXfM&k<&uqXuwU;{gVgd2u8uwgaj1Mr86>4DNYB$FS%DP*$f
z#^p@Au4oU+A@C3W`DAhuI`6mv^$d)**}9RRqefP6wJ*%HRDzBV$bWR8m^5N%D|wY!
zU&}Zr>M_HsI2)PB+dW8OyP0wuh+dDO+Vg#;rTxofL*EYyuV1?E-=18iYID>qQY5pj
z!RFX~*hgj8Ok98YX0sb->UXR6p%w=@|EdBV^mgufaurwf#|shbHJeP5LkL8uf86fm
z&bxTM$yc#QO&;32!45ScsrP~35DXRUSAUxuAZ6DDJ8-HE(0UgKr-xeYJjATL7+-b_
zC`%L7{-)k5gvAxZQJOHZD+A87YZ)S=qes)<k(R+Tqpdu{<tsiVLFVr{)}_8%4&Pw*
zlD~scD`v=}uE^J~>wjg57j3vW-jZaAtB(z(47!6ac{|Pd23L@vCp*vNWGumVQjT<6
z0ooB-b|#MTXh$U~&7d|%O?l@EhG9$Jk_Y2aM<DVqgdu~GBKO8UAQ61`(dW*)?$+Pb
ze8JVd7{He~;gm?eQNZax!9B(gG##=3l;2is{{hirxwysX{u_5~zy+mK0KG}38_!#0
zeV9BHv?s5v32Kks8gaCgf7s!;#_GJ3J}8|l4n%x;D7VFA)Znw=IVsogIs7ENXW$t-
zRRLlw#MMlbv*vj!f5?(y|Fb9}tm5vcxXp}#U?2IHS0hunxq7~2`iJAx``1$dO?Z%E
z0S&S5LFeBqMMmzRc((rR8wl)<<oI<yE(L7^_mzf-p!(kt*F|C!sa6qsyT{)-%?Ijb
znx6~>eA{(F)<W)P1MV!la0w_{Y*wH2uTi-ZFmcv;>W}BRnHip*9v8$j!oZX;p|^)6
zlj3^U_|}R(KvE27f_Iy4mwn4L20=#~-i5-a0FkvX1HJ9!h9bn)w4XGk8iX6o$cT=Q
zO_`{zjb@_G@D5B@Os(1;tMRJ%9DX9Hw=%L)j$34GQrsEGaE1-NX$y;G;$uK$W1;M2
zbTrz^O?|uB^s#k%Racjn<aFYL@7l3R&eBzL0&JJ~n2RSpIpl_Q4tdrS$z|$YKHrR_
zLf5*@;^ZIE`Q4UP36>thc*C#$L*Hmu&V4z5fCc9?dZ)@SR2j!S;Vy*W00oR1sjB!=
z6K{%-a{^SC8+pI{MH^T6Qm<<wY>3F(S9LOhxby7%n|<%-zG4fVp>u_LhLxm<+nwxR
z^cI*p_2Hk)%l$v84`(ZJWvxq}MT{flW0l6}E|u{44*jq6uT+MVUYkfCr(j8mr$#l5
zxJ}YI()Al2bk_`i!_8W|$@2+h0rSAS{hj^5Tkl@sJ#2!K#a^jNvn_U<LB|mBHN)`$
zq7*NUFX4V6yX1Ga?$_*W@aJorV)w@69=E8y;6mp`u)hnweF6%pV@T(~rBvOr%~9pR
z1053}MNB+afg@22VY_Pn{sLd&ZtCH?Q0}s-4~B{lh6rra^${UiM0K?rsbeL66M+R>
z7#EnBVR97H<=C7&+!}xIY(H6@7MgbRZVDQto(QKjBeeHoTfCCU;t5}J>Mhn}UphIn
z=&M>(dA%E@Eh+9|_-f6FevOh1j(_`Ibm%b_G(F>-oJ#r2r&OKT3e4{#s;ED$dG(Lj
z*@o0GUb!!QCRlgGkn4#-;~+%k;;48B+s0~Y!$}xnOlGG5uAIO@&kKX;Yc_-y`XJ3W
zzNwq@r91Zp(c^!RK84~tjq~}WwvK7gMd@y4EMwEwAJ_F@hcX3jP*5<2(hE_~#;!aV
zZ5FR4@ZT{g#-v&_?phnqytX46mcwb!CMpWx6d9axbbY#XocNrKetV#CEasT?k|&A+
z(_Jln&wFaM*#R${#hIpz-a=`eICpEf^B6X%L}uNHNSFnX27Tc8?k8$aH*+t94Z;zg
z&0mC-x|jg=d#p8?Rg8fKtak_=CwXG2&fv)j$(iXha?I<eUXFh_&*@{~75psdKf1A%
zAj3=00wxK(+sZv*W=6FBM!&qTb1!WgKI&n%$jYzcZNrL@@n<;k-!-aSFKkICTw#7N
z$Q#HGdSsP)W5I!9ryXryTqh}O^EQ!2FJaaPZuJh<ZMG%nRQ<ix_CF1t5%By`S)+RL
z?pPs+3-zyj%xltx(iTY+vOd~<IGjm@?x}&7gToEZP4M`zBGYtGdN>jp8}RPPM+Yn!
zykmM`cif-<RLaRoKq~NO6u{a9Ep5GFhSlT1kEy`@Taz_3>J;}4xb8+y^M@b|^CqZ`
z)5^8RJ9*PR<!Xhfk5p1G!_R*@KeWyo*59B?Ht=UZm>O_l*s$9Nw<@*{D2i$&T9cOs
z7g?;*=H?^kJC-PS6Y)Ee{Ja@kHiwoo$y=Ez=htrt3^{7$a)^F-I_<*@^Q5Udxtg3&
zjuLmeh1`;pVDv402NxiBqq4o?VK%CdUb=zK?F9fV=5qrDcv}-8Z7Y&+R>1<lNC!*}
z_R>tkUkFF>G*kMAs&c(-oV3-BQ-y&9ejO!ApG@ux=EU{vRK_mz!k`&;qHPac^|aK!
zNYqd&*R@)f2BB}QGj=L%I6S{M|CtQB9xXsnzDSj{uwqX|*gdTV+J(_ITKtR89P{dN
zr^O25mPMm~md&w$(Fy8H+ER|-padss3|2o`u$;skwTm=2y*%Qq%85gXVigA*dAq_N
z-j20NR-JCudVY@=A*$BP5CuNfPlV=AwMBrH@IfrTGzj|S;z{d2&;iul84Y9{^FR2e
z94acv^ND@HWjh5dHTM5dJE?P5iQrCZS0s})uk`@IKsgvr`DZ#gDc80b@ZI4JY8v7+
zT7<}#Mqa+$>B<{}!(SWOuG>v7yE)XkJToNL<5@3lbsG~9t8Nay9f69oLlKLJG=|2p
z_QF~D0w4Yl{&(#qqMiHGcAw^vIFdSSMq<3TeC8;)T3)V|b>=y5_cN0nwV+c3z|ajj
ziAU2gd*%1~r1x0VNc!g&kr#%s;;WleZUZ67v|5PWNCbHt&3*c47-n1q73*|V-G@KH
z`@h1ortw0mQvH@oriASuM_#sU>NjlCLD(SUeGxGKFC6Wq0#%P)@fe~^&@v36u>vD`
z&19@LtdAgVp@mH$?)}?@On4Ej^zY#6?B+2$mMN(}24{quc(u3UI#%xnveJ8G45yGs
z7*m+&oO!BOUHokEo3kb#6)uD%AkEB#SANvxqh?pdp4oEsY+1lozZ6a;d_VitM0=)2
zdB?d8IhR+3x9z6^5>pv5Nx14Y|9z-7(eeHOxhXfD`wys=W;7+caC?hP5gO5TOjIc3
z$zam2z(#1=&-S6JTZe&iXzU4wNfkSViLdTX>ZbkdHsNrR#RmB5Vv^8(^22*Em85C;
zUYV0EK87<`66SIMJ>U3=W2NiYi9#r~e#d7gzBVX1S@Mbp_r+4H>Q@qmJSS*zs%MwP
z#AWrn)GvJaZEtVu99KS*=Gm#OxZ3I<x`6(Wqd_+l+t<X<O(;Zc_+zCxj~K=%WlN)U
zM&t1$4nbs6nVR~eBoCI-uW)2D6GMiuS!EKc!Dt?7*WWvLq#lX~Yoe*nFB2*cBWfin
zrnrwPq{PED$xvE>k&9=^TP;kGzPqR&GPc7|qdX5`(F;!TBL&!rXZ`9PODnLb<Y9?M
z2aoHg;&}C?@08l`K3k4!^7Z;D6O(6D+*@+Ke=H0el_<93V!pvIa~Xd5Ib}36O~v)r
zK_LF|Z`cfV2Vzv9cPs^IU*p>a3D}BSgG#X4-}phYTw+SPbRlz&GM}mO*b_F+EUvM?
zUl##FUcI0Bg~fYzpzwtZ8y2MaZ>C7eoVS>^I_t<eso7)4cqumm?W0m%I*tL0!%t#Q
z87_P8vyQtzyauot{q!oLUXAs++68c6dt-!_K+U(AM#^f>HO5QyuXmagy^886^tLTr
z%CZR{vDkrY!ifDQOlN`T(~|>(pQ>=h$aT_K8^sG<=t)}z_NVXnww=Eq3+h^5s^U{*
zv1#uuJAB<U*YV<TFLIA<*e%<S5>VMf*@*LU#^vA&{dU5d>3PH6|FRvx&(>sh8yRka
z3Su*f(21~#@LeHuGmP3XdS7n-MHDB?2Yx2N(qh%X=RBrOs!ayPUPz)r05d&?BknWI
z*NzUlf^M4HqI_siPa5|_Y^0ShGWVuCWq?q+hSNHC)b87NVelKc^B~beL$9g*H%C`h
zA;CZP4TnKUOQ)E0Y*mfM&=OO7adp3$o-ZE0DAnP*@t!AXSC=hDBer+~ty9ZQT?`34
zhtNx`J9boh<X;iZa|{ymNfajh=D>+i{W<)S=<^gM@c)3f8YmswcOT$;G}&coPfhfh
z3z)vTj(WG6i~MmdL|H4xbnsq4b-AoXW-HpRAQ`#5{niKxKwOXudaaBcWH1|F)YNf~
z#`;`jjM|S;D7x^S|L~==7Y3;)dm2{`M*3uz@rk-L$GFg%pe&IBAJ|$RSaWbV6wu}4
z_{kww&m-SQUfyt$sn^&3KuPx&C(|yjRl&kr(wE7bgWo<@N7uuk`sPXr6&CP}4~p9g
zelZ4n_|)oupjgOMbnUVPnW4s<tE-B>+tlC8)YlXhQ4*P*JvK>LU;bo}pdK_UgYyU;
zb5)JWd1067@Y45)4~Q%TAX_HQiQ7CRWWL<}*udTlbOu|!9eZDoAV?^?Cc-6cN|}Hb
zq>}#gp>zviW(^x5cGgxmwaGz)**-cc=fROOwbIME`^CMCF&LP`A&^v4{K7qyneX_s
zV8@*>dHM4tf|*s=kei1J!l#GUORpoXtlJbaYs_QB4fVU`shFW5A3%GrD<-a-;;w1~
z61{@Gkx4$}JP*Ms1KO`9g&EZ@Zs2GDSjXZ=R$_@!N6g(7i@r&v&-%6Y61E$J6N1?$
z&tWzw5ne-N@Bd;>YX7u15JT{wN_rAj5lK^Dx0>i45Iys*b_A_$LyvfJyT1rEux$_x
zNcRyn-0AD9-#Nr~{X&vSQg>eeP~Ifqv|`G<?RKhLrQ%Cj)Kv!eW^Yx@@PvD2Nfh~G
z`-JYH$y}Tz^jNAn-J^khaX)1|8haCL2}x=N6D}cbN_(_X>-YR@z8eD{Ls-|g&2QG@
z&yR74%v#};Yuyac&KL(BtSC`L1ElJ@VBj@b<y#b9qr~}Ron`#+JDG<Zc37{yoX7qs
zzm=}|c%s&IRe!<v@qt)qlPVpvBIT)0jR9(?v|HeLB)(k~#g^i#D<@GTx{PWodi?Q&
zHr*htII-h$`C>V0`zuPvM!>kD&^}BOQmTgxE>}SC?9`A#vGB`8!aQ!<b9hSKo6s(d
zk&w+D+J@XJ4c?YqvzW9+Gc`mfx^cmnY~n*8%I#L@F~V9reUPB@8v6w7RJ?i15~hR%
z9805i%L6T<uj-dRYbyd<RP-ehX`@FA)k6;v<iG<%t_E@=oNm0D*hAR9wxbcfeWGak
z#BWvj^vlFK*V;Z2g4y?c+eG$kVVMzH2ZkzY67c-5O1`-j?<zT&e<56^t1a~3z3lx7
zk2dOKj-~XEYhAkJ&Xll%wJG+<Ath$j#8-^w56_&R%JvpsXxWW<<A4Y33}##Ljh{~3
z8m?#({ZW5xLXJ7<(LHh>ofK$<ZhO7H0NYe>?k7Wz#F+#_er&Z{--+At%An_RtRZ=i
zZw9=rJcA!Zs;0+4GOO;CR7->1RpQ_VXKS>Lvx?rCyTTi-w_IDx$*FJ}>XB9uE@;sd
z3pq6d;%ahVXkM=`j18aGijBaMkKR7<NOc7qs1*<c8(wYDse!-#6^Xjf3;tPoO7a5{
zav8&DzM~|vV=|13mpp?mu_&Z@KDfN&mFFILu{<#$5!1#!DxbW$E&8Ljd9Cf2y-rxq
zc&uUxl=W3>nG_IsbBxcwbH`i7{-wmWKgDjP<{U=oc<nAZ9LVgwu<rg+`56u56G!A~
zwq}&6{sE5}TPR?<3B{&kMU|aXXqsU}G-IzZocS*!<WDcKfz*wH)9iLQQ`CYD6LO(<
z!zgaF--BZcS}{Yod1iK)XWyu_R3xz-_g_V|xBP*~DA(+S(eQ81@#)&VH&_$2z%FZa
z%+F7`q4jpOvX-*a9)%D5L4cAvcD|EVp0-A|_xpZ;n+Ki+!^NsllXyNNx-HF*bEYWm
z#-L9uR-%;46C<?I3?wViUWl})-;WoF0|jB4Ua&bEQF3alRLy-JqUWb_AGwkDm&|H<
zBr#mmr6~YD;b-)+deCO^;|>cfCb0bVWhfOr7bd<QzW%Uv-F-~BE72w-v;KbngxBi6
zSsBz=%!8)+;p<{xBydKDn7=$?V*}(8ltFI#_0SLIf0zs{m_pseX{O&0CTL4`(e^F=
zC_`kj#Jgm(5Pa6TtHm+CBt~?F7qDnfQ(W<(r*xjL{U1qZ84y+1uHj90cY~yabT>$Y
zgmkBLN%znyjdTbo-5@PpQUcPQ(%n6C_WPZm^LNeK>#6&>Zi+T`bRvph4&lnO^ec0n
z$J7hi`A}r0nX9jL+&e!QCIj&FUy4yXZkB}O=ZZ-X2I%~Imw27es#kX6H$y~+=z>~L
zK=*8#z;o%=Ugc8tk4ecW;7Z8R56q3*|C-ktHYqn$e1b&n4Pc?q7D!gdk%q9*ha;)&
z;W)q@0W8Gitau7VvFG+1_NLXPN6iJI|M68570kOfZP^VmYBDv_nn)b^S*lv~CbV7A
z?jtN+Bl$aO<H2d%^Jf|Q7L>9&Z`0Zj0!348<9~h|)e!tvws+qF8yP0ARYmyjPKej*
zX&1laD|7A{pw*V4Kwe|V2dw7*lm^f=;H9`d$z{d3?l#*#-Ay|5;-tfVD`+JC2z>Re
zaR*5#4qkvT`TnPMV)!@8BI}T|(Z-Olt;vYeHf)b5SVX7{@#9x>n$_nY`CVqofV?`S
zrG^!C%<5tzQC6#0c0PmwFIMqY6lk3hlFBq1++%vk-opQ!7|}@U8E=sk*r0rfRMDV6
zwXH&&ojGSw=FiFAUT^d3UdDQJB_{KbWBPE0>2&P-TiQ*YmXb<?mUqlQA(hvd`|%^P
z_PPIJ3KX8XYckMG@nlIsZ!4?L+S)`63<T8cvHy^(%767wDMwb6$|%&k+w3j=s<ISd
zGXmAa)ByKWmmgfi8Ybvd*5~St!Rr3xXSq2L_;k{|^IH<?J`9(5q(8WbbVuAi`N<1T
zEsefW$=tavK=)M63Edt~YJdYdk{egW28EMYDD!je<S>kwiKy^zWz(th&pl1z+(Y92
z6J?L8thTvEUe~<AMb&$6vA`16V7#=`A>Oa?!^i<LLXztrf2_LeL=oyyh2;I|)mp9|
zo%3S=haw*?z;6W#?ZjN-2-|Q)|2DmU8h5b@PPRJ3(;8eD9s`~uTCs6NSSBxy>ms!_
z8pH$in^c7ncV;&<6Y^YEd}-GokFqcSuF7t*?_(2XxKVOkBv(EN3+J^ylEj4Qrv(lF
zajRj$xxM}N`OS(58Moz8d7+(t7nGS3lBj|v@o6N|`-Sr}x~QF8uG{N>^;Kl={8aT*
zfzFq2MW$%nvn9H#XR@=DgqK1(D=j*EzdIo9)GGcvN^ZStm_6K}8K&vLYC&8=xA9_A
zec?;Bw{9R(L%FpW(~aV<mC)P4&lC5n7ub~Y69hGrizd%FjBlbx)!IeLWvkG;|5HIU
z>k4z2{rD=zcrQDx0YAxm5_&70pomcCB(05<Z&xJcrk5E3Gk?Rb2yaIR&@u?Xn<Khe
zV8Es0ObO)M-^NdnS<c!S73*u6DYI0o71*%w%wK+CX%#e}`8FKAHS#u=zQIbzzOh_D
zmrL?<tkc1nioZ60VPmdF!*KAi^upshbxa-)f?BKt<H@VmPS~Syg}2bpsX)JpEe!~&
zxI;kj3e=B2W|QUu7M?LCiw@arK=Z+cssSMG{VQ|uZZV>ev%wv=K#UKS4IGW+%eg|{
z5ZDasxqkC+%0Zb_`MqiWTirwTFDfA`*&Hvkvw4=VAI#mRrlTBs-KNHSNn<trMUq>O
zUHS~JOjnQw_o9%1DEQaTvsb&lL>QqSDn9v=NjJ#Rhrtsv_Vz}OlCuQT*-fLUHZDUK
z+uUMHmKp;48b7o!1!Q_#m3uxOap`OvMIc!j@Cw+=o2FvY@o^ZTIrfIX5@(bX?_^OT
z!4Z8?@hdr$@<xCenG(J~R9E(#7amm_%9A&paeo@5aBUF-{+o9jHqm;BHd_<=a&sVV
zWZHhyWT@*_!E~Mj*?BZAm>Y<O`GQjT-GDQx41jN58Y5>8`%!FG@~45|6CIQMc&thx
z`tPlkl7>EG|Mg)qwQ}ORAQmqQ5qN)EIAguf4;;p;E4p@fW4}QKOf2`Rh227}t-pQ3
zOYk?esNw;6h4QO92Eo#$Fdv)IAjBHYH^0%Z(ma&1(=PrC{3<#3XAO*+yqoKT;0At)
z?DO%1ps-qm8}2z13#+=vP1w(OdR-uG0w*NE=yctTNK^X@*QbdXc3Q)*E;-_j0#tqZ
z0!f<(^=jCC{M%3s)K4bwS&<5F{F9b}D%yT3s~5o{*=t(;U(jawMh2_TK`Gd`Bfb?8
zLMbt9j|IHhoriPmtyShj#O7VQbDE94HLgDfT!<uz)Wbg*JI6SkHU6sK%VHl7qR#&w
zm)>n$*rXfK8-VvccZ<F`=jr+hVO9OS1ux*3YKU~9R%AP5QTiFjQKBi)kKJtC8WoEe
z9zqGs*O(>9zX$olG30AA>6~UwxBwX)z^s7KnuEI-^EsU`LIc1x-7-ez0?Fq=Pu^qR
z!X4Kbs4w73LlJkxb%hQB^MIkVc4i<^JjOEmRPBm)*nLyxj-rkDA0qK6m(#Z=ncvFv
z^nQHcCQt!bTHiQi*!$UHOtNcBeL~aWDD{54RIM>_6PHb=mvCQq;r^>z<D@Q-!lXUN
zTX8iqgxNqdFE>YsLJtkO`$PcCW!k~sWm$nnpNEK)>cDR_3#$drB$I?ik#r9eB)>?X
z!`2D`#iJTDN7Dk=F#V4^B|^b5cQ`566I)PCC}{E9>+kZX@}bp!)PxY}!KxBQSJ*gy
zt}Z6#;c82laHri<6Ma&yDf^OI@V)<`(StE!Rege98cmWIJ@6gTcLUlc_q$<?VX-&2
zJS<%vn(~S$zh*gp==i7#M@t30>DU>Re2*8OXUl#Xr<$ccKP9lPr|CGBf}(u)3j`Gx
z)DwW4lz-@eiz^Vis(f``?oDEN-^T@Yl0)clQo4PWr8^=9fMYvr&}!Ae1_uK9&&tFQ
zp+(ulk?G6y7Ne;)Nm#wma2q1`frzMCR!6SQ+YfgJ`W$iL8Ai0o6Itr(rZso`&PT?)
z!{o+RzP!8Np=fjN2dcaiRxZtAQ`BEKcV-={*)W{QzEjMa8(#=6@WJJSwa3EJ>Z>Mb
zUQ>Ta-wEtmlaq3T4{y!lT%diL_)f$w;D6=Dw!h~Kiq4%YT@S-2o2P{3#8uc&d|+pR
z`0pd=t&pB}&l#j{`uSj`6!Fu1VJAnEUy=t(rKM7rf*DNh0k}I;(9|y*<G4KHHhf7N
z|H=JT>R{<gKK7%{{tdz0^$(;3<pch)L4Fd_qSg<8f>U1^&THD=iO?|Ntz7wRV^8T1
zUUfJ)GY=)nD%0pt?5N!&ZrIL4O?y%l3K5eR&!kOGQK5n(bbyjA_pr=Y%?1XUyx5|P
zw-Fjfr2NT%Wj0n7d1MJV6XB2BSR|kwMSuu`Fc1xRkmzN=%tbMV01&IRSANCsZutR{
zL8^aS+huCn8|@M_pm*`LdakxaEIF@UL=v;DdHbJW6&mlDOy7%H^>RD`IyexU=I}Oi
z7OK6c(&gRWUfLsgF4r!ToW`rzt>6<FJjV~L;;z4K8#A^9f$G(3hY!}4$Aeu5jbKIE
z{HY37@;R}>1{GHF^;nP_32TJs<LA{9wR8CHh5Qv6#Ze8Yivr`~dLd$Im`QN4oPX6^
z4@<x8QJ2B}dt=ylYpD8DvL&raf@A%HJ+6xQB@Ye|$M|xse<az9#&pwM$l!)!1oNMR
zE)^3Td-O_p(JSr=puXK*=%Z6^`#z0B*GUFtePWApG`jum7$ENWGpbW@i9jbesU-aF
z{`EC3b2EV*rP%G^FrL^g+l%UY>S+~yX|Y>!Vjs%o(-PTL^=63mbxEzn-Ii2Do7a&p
zSwDEIb-|PqEUdZlXss)r*=275p}dWU2)q0Yf_(3f^rlGeY`3|*hGDqyiDf%EhCaxG
zdtpPdN^^=nzh-#1%Ws^Seb2J_*j4Qazx{1A?58~`y@J-7A~?oQFay`l4z|Yw7mAUj
zo*k|pqFEl1ezBR;R_Rt(NTnqHQh%mcW*nVI@r`GZbYw-kYOdt7+b&$+3!6_K6iqw4
zL~FrY{nQHC&J_)^29*%<L+v41l_$o;ZV~WK3bpqkFWEFh(KOWBKF+pPyG{p$U9Mpg
zj1Mts!KfDnQSfi&fIW<%qs9V^Lr~f6f$Z~-S7iLrECuBQqWyfxjak^4LHXp6Z!tPd
zV0Xdx>r8@?ooLla3hff@pNB=(sg~mDo)3klSer|=wuK&{9VNIKUK^*MtNz9|m;Q;Z
zZwycnIVvE^Izn3_s}jf?2+;kvKGb@V{uZ(|zHIZH_uwIx(b2TEr)TOXhxbODOpRy`
zri6ye>3{V{aNCO##(88KCY_*)Fzh6~7}pipoKA4TiV7!%CIxPC;KG|<_hN8H98ZPu
zKrnSjj8<7!Vyo_<JBlQvAIRIAe~(RD=YE~)dPP-C8Ag$fPvFq_Bfd55A2Y`M*W8-i
z3o==y=j;x^iMx`c#jz@S4EN>ZX~G3R2Lege1Oz$l?&@QKC(V^WP{6a(csjoMG*2+-
z&`#>vcED!{1Kq1aqICSg`<;HioL`TPdX}7;B!*>u#<y0o+0;fxHwPPOb{+0`s?b(W
zu!4lAj2(t1LsMg+76!^sa(XZg4}g92LarL*HVOaP&Kn2w7Sm5p6>N7us$hSQlB%<-
zU~zv7Vr5^b-FXBTUvk-m5+F~BWI8`ouHvKC{A3ms>=g@Q&E%fu`2!ZezrV1rIM2W-
z#j+{APa))74MbAd;3#QFb^KkE>!JTp+q7s7U#A1F-`eCu1q&CXkc?<1#v(z6bf*Ft
z8;_DNku`xkY@r<hvhzEHKTgubeF;jC^vjdxb?h{R?qrZWEahPk>lNDA+iAtqkOo)7
z%~3K{qESD3$xRR~XNXK$YLnY2=%I;z^L#@na`e{5#EWvtqOhVt@|u72WDVc3s54=l
zhhr>jy6s=)$I<S7s?=f*ACFv*X;)msxteceP3lF|4vWtZbSi+aae+fN2u&&Z`tHX~
z-geVwlhCDFPB6TBZGQmjvJgiq+ycf1-)p9Ebh<v6B$O*%RW~!>Vc@1=A@C<-$UzD)
zO1cdeQ_mdz_0ai5fcMQzAiNTU$wWM4r~s)9zuSEdhJ+6nr7b_X{`QPlQzl2J^bl3b
zgKpXXw&ifM_q|;#vqz6E#pa|$B>!Gz-_I+7htG{$wX)R=rzO+v(M5?ObcD}#`}!lD
z(AsM}E`u|&1*$O%zBtFs4ykK%=WKAw^XG-;s616lfe13p@&)nWLNo=KqaIZUZ5=<`
z2$6F_pelU(N&jJcX=5aZm4I#Ae*nabh8l1)YX$j>QrN%+zN2SACcCl*a^;d$-O01s
zJUcq2qZMY2YKO+STkOZ`vKis4pFe578&v+#aL0%o<NfBaX!fKF@LqQ;c$~~dR@nr5
z*LW1KOn*$fP0rHxH${R<1Wf#H9=MyC$9&HjdNajvmcHHF7E?U2%B%u>TDP4>gn;k1
z&whge>>@Z!!7ApXO9=;aPyS8BI)*X>6iXe!!Z=?O|NP!_vsVVX9P(5z=3%jktMKUj
z-d^%J4qll3Y1BsTd>-_sfAdqXo5%K^)8-S&za+bgo4-dPEY;6A|AdZ0#sm}HH8|s+
z6?u+=6~=vftM3JL67UB_pU4?8VrHeqOs`67qY<ix1wy(5LHu(E%b|F4E79Q@p^etf
z-B76k{14r=X8B}>$k5ULGkMClqEFi%`&6|i!qP?64fKkm@3vr0F9~r!5Jj4N&er1A
zIZfM1o6pna7q0)a@;d(>#m<igvv_3d4IxtzlFWE;|DSYU_$)6d9*wm)o#X=c#Xnuq
z+(HL{s^3Ck!}UjPII{C?;=G7lvSTC&)Z*mt1__-sBNRu-wbD`|C|T-tZpzmZuDklS
z)@8fy7n9!GJQZOsx+j<0S*APpC-t(br{XEnLwHHeqZl9uzdp`M1XR5k1e|%4Jgp(O
zJ>8m0O^Dy+yxgpK%t0$_jFf!fGN?cPflFj9dr|@282d@-mQ+_)(uO?Es?peOD}?y;
z*#pKRo}7IBre-X>N8?Q_j5Q1iHpaFSb{Jz-l${s(80?Adj>}5;KGBC8YB?<7>CJE5
z_*Pr~wZM*~qwuALVWlsv=q)jRUI*J<oLVvaV8^E-vUiX#0(PZvh-AKu@GUhEcywYj
z)qsMjJu+1;bi}Mxwd+cB9^p1~`}*ttnMD}+QJ(S3PZB9D_)6@06scRx{KeTWr>k@l
zt0eUI&ZaOu1HVL`N1A{a$srPK*=&{mS?QFZR~a$|tr1EQ@G+#tB7Z;qb3;SaMEk|%
z`!w~=@=C0Tmm2$Kcu)8Aqo|0(F3ZVGSnX2k>`xhswFkJT4cbDFNOu)GnOQo4lls24
z=i-2Pn9lfLjtOTH0IT9X4)Ah$f3baCCJdx7Eq<m$ft0%IbX01hamfJQ|E*ubjbJ25
zWMp;s<_!t12l57ezyU(cW>)3w{m%zh%u}>Qqv~|Bsa*o<WFNkD$Ewy^Vp8nQi2t@>
z{E=25P$imhaGhtu>#V*qso+|9a!{;Ly>xC=tK_F~55i%q9VpN>DP4~VP)24B+mK++
zDOe=YO*(*iN5RV~4i9+3cnw}+0!$!QVm~m6(P>8gg=6B&az|@-yftL`Z_Udey?w!8
ztMMz`G~*8wr;-@Z4;OlY?i<{&_BL%}=uN_hqEN$sw$hb1gexu3fK>xDx15`GF`O-3
zw>tRd5)rdYw1aK>Wr@rmiz4Gc-cMY<nt$c@AgGH?wwJEItC=6)U!3`G$R^xv<b<KM
zZ0&uR1|{?RBID337e!as;Aw&P{A)TAbuLG!e`AV+s4pbXJNS_J!z`I#B2Y6~fG@{T
z9Jqe6wpN?x|7Umw7i;pr6I}@9LX?tJ<t-v(^BP5`zX7<a(hQ#^q4!pqIE?MKyh3g|
z=5z;vBlR0B`dxk3D(o|1#mQ-NB=*Zr*_@auY~P}hRI?3V`{X9wKBA6&t?1l-Q@6mh
zrd7RMPK2>a)F3|Kl_!zMQ!^+11@ii1^Pd<QrXg1fm}GL@2})U{uB-^5VjfZgHdR6>
z;m=n%B6$&O6y<i37;~?%5GHL+^8B`K!s8XXo_wmO=b>hh)K9BHA*tI?4it#tJvf3k
zK--wNm!mVtZ7p`KgO>P0%b<_vS5{T93J1>nrOTXrTyqJ+;}${f>Zea6#CahG-)`kq
zr<~*I)3$bbC|!VLg|Fd#IaEGc6~*0?Q-@m3t)7&|P+`pC62M|`Q?dKVlxEND1zN}L
zAi9k340e&*Q`<3s)fRX90hl8ygj~%F!;29MZt}y&V1DS|gkXk8TZ?(ni*{4?7yV9U
zN+$w@5tdDDs5=@i(f`P0L<(WWsy6Eyf6PA$uc762V$`TO5aHzNv={CVGB6iFBgKzW
z41Piq-i;|#H+A?GvUVq7Qp#L>6X6UbX?jKjz5YLKREtnmV-+%q-{2ZQ=7n<<np8fP
zPv}Q2Aq_s&u1Ay)<kK10U0F^xQ3D{BYz1}~J|=ENaGZ-|v1|;xd8p4gk={$c7SulK
zy2NS}SbPIlBG>lDfd%5n7<-<4vVR6`7TkwD!JZPoiJSZazFL;c?!q3PDsfx>anE~f
zb*5UCkg+MIY$Tf2LLJSb3zbxHUW?*U+P|p7oA+WyPm<r%p@y4Bw?whhjb5O9&n9;C
zjG7*B&_X-r4*eKszz_&B_I^iSeBjuweMcejrrOPE_Jc<d+^?;`fzU?Jh;_I5GQsO&
zj(Gm3R$RnGjVsKEiK|aAdtLa9k8-j=3a7gK-|H6;zc;KevMJl?u{Unm4F7zf9>Utz
z_&DDM^;$LI7%&+<f04u^XYofF5eE93erVbNrp?ZP2a{!xIYRF&E-<RK;)P~xsVmMz
zMrvumUcdI$`T(hh1iv8dwb7G~-ytR&7nvch1AnoD_;2wl;wBpVg$g!oRAOOv*VwGA
zIt_8=0u6!2+1B(wr2c#5M~L$XAv=#yjg5}`msgWb?$5vefyw3V5TVEiG9fjd0#(0O
z_^;AW(vL>0-7_r#FQO-_l&Wy~A*fL(RJnYwH(-0P7rlKN=nsYKhpS_#`ms!~GUm|Y
zjalT&Y$zLo>Lc30NK2zJjn_+af2dO8-j5nsh9m&OVQMDV!-n?58iSSDYY*^5KT6`e
z<zI8Xa&_S~K1$v3WgV;HWunll$T9a%8fX)(Cg1yxOTks>xlD8@DcJFb#Z{-QfcZ;w
zs}|(QiV@>W=G7PQDINxugBR27rTpwmDS$E#jl|5eeI*@m2$%0S%Y+0-KX+}sar%F^
zsL<n{X<x#Zndqyvj)1Hxp4@p#fOg?OBX%a5;QNBEwl)li$#nX6O?Y;0eP8MDe2f9h
z)U|rKILNWHQb3j8#nBO!;m7wzwwq?F=Y?jC1~a{3(~jwd2JE}so(+Ao?yj+`SM~Y&
z5Ra>q?RqJFBXQ)cvqs9$)>DGTwD`1!10isr@Fd%G1CBs!H}Cp$QKODv(8Nf1J5mVr
z^@5n%q@59b`K1aGzAfX$j^7W*dF=hO=Ue{AJm}3&VGHtf4Z!2d%%&$KvbPKRD{H&P
zX82JGYz2H}G)sTJxw5K;T|Jf<Y^VXoQVus<?XRVKNNW`d`6|>&MV)cC0+2I4oa*l?
z))2l|Y;Tx^VQ`bpTgCCGwX0Kob<wz-j5;&k{<o%IzAmzl_31IAwM12a8oCKn-G3--
zHPpD*0~-O!0kk*3<b8++fVl}n1oKF5+H;w|87V2KZd$;;<FChmU)8Or469C8TIs*)
ze4ee68$Eghu=elQg_)_qm!d<=kw)Lxo5+7lxS$H`QN-9QYvM`zg4f36bub_)qc?Z|
z-JDGrZsRb&>}OpF(Z5Ivrgt)_0U?>{f*72<&Pzo3`)^vwXwaP71+Q8PZJ@>+@N*2)
z^%(QtrkZ5LSh1rZ-q2zEpSeg!UFfhhP_YU<qp@I-TKn?l-B<1X`MOekCouIb>ADd#
zE4H18fSz<c&3rv%B(xZQD6M?B`d!JQe@VOc;YBbq-0<UkvJZpE<zq<VZ77M>PEV>0
zTpcepoEY=05K;vm2dWz$jv>;R^9(;4Mq2Ui%GSbkv=cEkLTReguo=sGpDI7{+m)CW
zZW7NkxPz6L@%t@T!F3YC-f6PDa9sbi>u*BE2Y1XjfDnTOph|C}7unGV%Vb(qsK?_=
zoumZP5H)0rXAz`)-oIZHe~mWfrvqT#!M^?o{jUD$@(u7B6J!W@u}KAkrD`^cPIKEN
z1B4s~b24n5!HT+c{K~FTI#ncJqf}a-rt!Tmb)czMx)|d|uKmx020pL<g$m~m!pLLq
z9tJ%}Lu%wDF1zivzHHs&t-J9`3+y1q$*bl##g&S_BT1=o9}me#BueGgEq#QAknJCM
zM>6HsiJOM_?t#0gca|s+>Ah_BhrB8KJ&*3Ht#_jGfs-T(OxG2YK})1lO^7o-<{OKv
z`>wEEwrewJx}5!9YgnMko2gIMu|3S~&;U{hlc(t5qggx3TwQ{<d8^|atBv|~yl8)j
zs{p!tE{1_QmYxvC54+Yqp(bdCD1N^79ZwF5F`5<H%an{{E)Np(Ak@*ARUbL@m0okh
z;V3VH)I3}Q?-xBJN71!e5r-kg5}XP@F6rISsX^+E5U-~RI>2e&*_Q7nQI`?I2sc9s
z_?z0Wt|GfVT3xk_d-!hJp>RxV06D-;1j+ul?+jT8-PGoPob2~%%(`62IdpLV1g2qp
zU0z*L+}C3?rYrdlU$e-@veZVe7QNEl1)U0Nzc^`eZoSgwW@z&0yzjBJJywi-_VnlH
ztd>5oh*WusMc`&ygfOASh%E;8^}b3?!9Kwc=7^EroSk}OI=;cfCLm?*Q77q|9JDEL
zPWx)M<u=cI70$dRC^w`&XmUd4Qj!3w!$L7q^unmacW_lKM$)`4j-^HYVIs>}SlBM$
zu_olsrY&vSyOb+8;yHq2ciVIA&M4aJAz}{pFUUO1xGlKhCL|R}N{u!HMJb~96<sI3
zhK2_5!mf7^3Bor`{YA_8Qj9HJwmH(_E+T21NNeovSZAIl9A$*xZv0HV@b-UkR<pl9
zQ_o759o;pwg+!&Z^<<GB;d!}du;*|JMsBz#g~b&Oe$z3FUv3q%9t%i-=?rogn1}<~
z_j;SuU|J{LM&*bN+`x0;0BZ21ALqIu%(5U(o8F*lw4Jlap>&EE0=~n$K<U9?97XY8
zWhJ1B%+4LUk6MA>kV8p&{Cw+v(>q>MXuUaO;I0mFA?|;^45K~eY~ZkL)W!3)v{+KL
z5$1*qI=RCxM!A7V^R_T-{!@KAL5J&|ZDWsb%IcOObY)8q)mt*ya*&|R<aqA|>kuX_
zQ5;}Dq5eo2do6SlJXK8%SfN0!)OVpFOQ#bKFU@tavByv9i`#Erb{!kqzQHg4bRDUc
zUY<^6Qw_PltA2<6Z8$1366&ReIOh0Bj(@^!z>@h*C=)#EKC-X0A+a}I47kSW!Ik=v
zCR=`RmDcG4d-T!^A6R?+B43QTY|!C<t^zPfFk*yUf7!lxZNz{oH|}6q2qB_EgBFT9
zrl=m;U6Hd8_TK0V1TFR829rmEWZzba)Mp?8_bvsQNP>@ZA(P}`H9Mq6d;lgt5|bwp
zl!ac3(yc%flp>;GWseP~xD)&^YQ|LKAg+sZM2W0%x}IMJuGV=o1mJp^jetT=JpvuO
zSKWn;Q&;6Z_4WTnMDc&GFXT=-d6)}im}GO)Q%5a#Fwi>NiDd0EkY<K4WqV)g5XVI+
zM9`b3jN383c_p2J!nBDF(V6~eG1!e+W_j2eBrTn#Od@S%tR{olurKYK&ND{+z9#_1
z8E=pne{4BqP^VEtuZt%BA&dpCUwYb`8t79F=tNtEsf*H!8p0GzKff@KU6^;h*#AXE
z5He`<PCZz=(<yspa4)G^H+!J|VM10soe<FxLl804w&Z60lB_DB5qfkR;bAmUhSy$E
z<HE%i%Ru!E*^>h9E?!mQX9Q$~c$o}w;|G{go)L|`78`Bcil}HUvU)G|Hb=jf8VObD
zZ|r-`h2>C|caUSKM_jZsR6-Iy)~<_LZ}rRe5$z}P;3jBkTS9)J8z!Y2XDg4**|}t@
z^)E@-!-Rp49mr{36k2+&ZE9>&)3V{fKp+kD5G4O(i&D?Q9?Pdv<PWgeV*~BpAU9H5
z_bWgS{%~T81QtYdproSVBy73)r)$*EkIQJE$I`Eaf9Y~K3>kH>IJ`|ldy=88>%1Y#
zvbIGhK1<8U1AfUzz2cI<Eid|O+V_31Q)V~*sMVeF*Bx3c{vPGM3z!X#sm(2pVZK*Q
z<#?q_mC5w3_{!sy{v;Vu!yeUPFMIL@#>amTLL#}9LL7rm-z{U9gOm=jmyr{M$J=JU
zx+RPH$D>D4qaCk5b)|>G(&h%!WbJTZTSf9Upzk+wWeojv6ar=6L`(Utq#Zy%18;d5
z_K(P%uUJ>$gG`JUp7N+$DeKx%2_CU9%z}O)K}z(Eu`;0yl02RFL}_k#LK*RxMIdcA
zae`*pg|x}{luGUmIU3P0voBXat+X-!4I^7nrE=^34#{qnOdowu%I^7UmZqm;73JA%
z^^X$rt3sn@mn0@tFo#nnm#JC?Sj*gXUI!SB)<VzTAt>bm2jSqX`7!Z>57FOfJs4AH
zG>t@{BFJDfWX<D%mA_beh1;{^8FTB;7bfwt7G#wb$NU=`7+UuVm5akb7<Eq9`)PpB
z`oZK5ZM$1y^Xa+K62BG|Ii07R^a~{a-LAJn(lGyac)Y_OkMza=;v|1eVk_m&*NZpY
zc+YQx^L}9}TZ)UVw%$C+A9)c$-7!I@q$g?HftL_{7$?)~GwzA6AC6WzGv!IUho^ly
zy)rw4Z3XyBr6i1?wlx6$sB{BzkNkJ1`q@WU_{&Fd`jE{dQ&AaHPumir;7c+%?aSSA
zj+!xu=$+?+x4+<~A1)5~6qv_sm%4qs+xa8wSH0vNiS)b9V^t6{nk})vF2rT2jF~Z>
zLU^ny%huUlA$D|eDS_C_iDG1+x~NUfTm8ya-4Q*_bA`iV;=GGoL<h}rQ=c3giw^h<
zLLP(W7$7**Vz^fT*EXxp3D(j#<io|dg^vIW=!~nTUmy%zZ_ZwQ?M_}3z=9h}?VT3w
zz&r?mr#D869Cr$BO&ma>d(Xltm!3}l6Dh6|G0OR6<9lE3U@sM<Nm@wZBi+iQ*jNxX
zE|POCx7>d8Q7{8f@ODor(l}A8ATk^YzlYUVAsW1bT2#FGBbR0GplPy<%2am_7xv%M
zqV4O}=#Xvn3td<6PUnrw+`=qgx)!XvS|t!l(3sTV%#9FmM93c3R!2Rzqd*gr1J|ap
zBY~h|+}eH!3)j@#Q5VK#O627>=q`bNER$={8*cnt&i4oUZ{g|+WL<-w7~@0CSUyPX
z12H!-wF)z&TIZyo*W#{*!mDviA>2VlqIVVI!Y_i!=&}Xp@3Jq5^hVG_g;55g$JP)U
zz0C5RT3qo%XwK*>6m`jdWnbE^U%;+8BY?yx!1$N<pO8s~1nQ!Q1GA^4`Kh=330eNY
zh5&>dUZZWVhtDTJwAhCM7djvYtoiNb7U#4zvK$CN)W3--;j~8q<^o&7h(Jac?<@BY
zBSX)uS*dt$H661@qt1#x7ZX*F_T1I_YQNxmnL@E;Kd>6Dky@cI*<c@i*nB4=9iB;E
zkA&q5us4{_ER&oyVUoVvn4cth9+lDNuuJ|eUZX&@8-0~h?rPra2?uO6Wj;s4n@gfb
z!ls`&CZ^sXhIa-|tsCb_?{t(LAms;64qBWI=C3M!g<pGu+O$1>87ly$*Ecu!3?>bk
zE!=afZf=s>Paa(jmV%f8$33JY$(l=Scwz}$b4cRAAjT}hj_v~6cg+LQi9R9<c#mtp
z)CVQ;+wWBFIRr(E@b5d!p2k0kvu!pF`td{IVqbSCDeEZs&$m1q>kFV(>n9k31@Jdw
z^pO<y!tE=9LCALDVeqLaw1j;xg?n_mM19yBZW7$i5?u2Q_hRP%p~T(YG`-{olO!V_
zvPBu`4e8?n0Ze<p!KC}HX$BXPE$C|ba83TQMbfn)fC*uxjxp>U8_Ojy=wZFGtJ-W7
zTp(ufeQBM0r!s&W_gbm1wSrrPQpkZ3sfkC426N10OWy%Q@^&vM1&L#vjFiCPU7azh
zp1o;r2RZ_+?cosaYgd<y<um5)f^m3Mk9MgsCnFq)b2Bz2xav@OT0%8wA(O`#LE(8Z
z)DFB8g|7AD{r5&BGa)^9theS=!paf#_aY#>NFaZU_8*_j+28pPQ<D)jX?YlZCG41Z
zr3K~J?|KwpT4_5II^Eu|)7MH7gQ<{!kExS5TU}z_c!clRC{vGkbY3*<nY#t~?lUx_
z*f5!AnRkAz5Cy9x<}KTAW^0pXypkv!@3;z>>uWi_`kc|)8)ShEd4Ewia+ORjh|m9i
zh2%Mbqw{ivEvK(w`VDAXypj)I<<I0QUL=4WKbcO6<e$1?(|JSA6I2amEgDZTayx&)
zEQ5jT%l*F6Uw|Dd#P7eW4%X|cIEUN66JBy$5K1|vT@<9ts!)k!{}nHP=c^>8=XuV0
zo$1YK@+DT?VQ{To*El1J*M)VA!On^<yi6~XB8T8|zbrL@Z7h;F!y;(hRj$#H)=Jfj
z?S%U`Cx0F%#-dD!5cPKW%5HAe887r`dmz9^+HU#82;9yJo-`PO)>uvMDdiPSuSJ3r
z%_PG$j)*h9NLP$VGHQTXz1g*9|J0R=;i!!XxuG!dY8>!*ZujK9qX~JzF?s}5Ce2ic
zAI>5c;u@1eKak4fQnFFff5LLcER6u*T<d7c5XA|%>ST*qo{{L>;NBJRz@2G2OAmHx
zvhWw7NsC6@SN({kb*M|zw)v>Dd^xUw+$)g&4c#Txl!h|mPU{=`?T;|~HY2cddkTkD
z+3@=>p<%&k-~fN%^5wO#;95Y@NL%pUc&_#?cze$?zF21-Z2^0npOCX8;F8B8*Zq&G
zu0K-)S%?~vIu$qIGGqNIeEw2(mZ|>%qxU>%nI2pjpZ$F`IyML;{>NF-l^+{NEbqJ|
zStD!X-WS|`i5=Za&V?F!VtGjSVX)^$1~EpFO;v9__rYgmW`qEK&3Sr=+2qPOq#7v*
z&&-~L$fzAL1`@eW*7WO}rkC0Ng3N_qp)<h)xG(v@Z1eT@DX)0LnRp%VNpfgzW%?Rk
z!?d(L<R9F<<$m^hi|YEl5lVwZ;JfIfhlyqcK1LvdQJF`}>5=O3sB0Hc)UfQ3*nM=>
zwXx5~9$C1_Adh9#8ZHnIb)8OWIZ!}7&q#SdrlWB;2c%SP%vT_pLa1phDIWcwON}8-
zh(w4{j`n?ltph#5iYr^AkHqNaw*7??f-SHAyx98JX~x`Q!{&J#O#p2stwlu}hL>+w
z#YsDwzd~TaOBQNAh7_kTP`9IYrs#(PNo6Vq|AJ9CF1Rm*UEed=<Cso#cX4=aeIKu1
zgH;XE_r=GFNv`%!J96WJB1v6(-CQjt(n!IRL~h~5&2H%%{BjFse<%u{FSLFk#wsT0
zQNaagzV{7cjqMyoCg-p&8@*qwNHEl}{^a%vlQ@CHDO7YD3!|JfEQciLqjL~k_j(iH
z!vc&;W~ScoeT5xd*;=5M2**p?hO;BStvV}V4BYmH^kHno6r8PfycKlSz`x()0*Eeu
z!gr?tyHw_3@4thAZk8HO!5H=jL8^lu<G4HM-)Dzr|E`($=>IN+bjQ557fKxj?HICd
zk5vfW>T=tP8}`U{k$PFT+l^~u!<_eViQrK)Ps|1d3SmZ%H>FaqeWZJ@15ej5A=VmB
zTu>%1oU*or4DkR6s*}=TnCJ-!v_$yXCE0Hw832Xu!T`HjR?-Q4etl{bu<z~-WByKp
zh7~UGS8Do~ra!69{k$`9qu;1~2*-s6dmX+5V#%fu%3U~=N#XNxx{}-aN>QZjgn4Dn
ztRcf=VsU+zN$T$>YaflWGjW>^ew2LtGj79V!iFSTS!DA}<YB+{8xr6)kg#1bHoMhx
zVn0cZf}F}fZ0r+;5$#zW8M>_SFYLyDuKf3?F=9xJad<`Kil3O*+@yFR2s>KnphSmU
zso2szf!i?K2ILV+_s5GuSUkZ(mq4KV^({{IBpWBlFqxr{Li*2;b)pq;`XvRJ{x*c^
zRb<wbMfZRc-z_>^g7Zb;@|qss$Np4)F?3hyI>>!&_6JKJEljF4V))v~x`+z_`Q&o{
zRvj!#_EV_u0HS(d^i-1Zve^45sy>q)p$K54Fw!5-qZe)-EAj*U@RUyI-xEBj-lkbc
zHB%<D33wx8w&&HCk-Ax;HDgA!4=?jND#^UzTTp8sFn)wWylEH0dh`5ewSq2(nV}jI
zzUQ=5pryEx6Gb<(`nN(6u-rb4v|k3aR!JMl6^&`z*?C-NE_Yh;9@WrdF&p~E*{n4U
z&^)3AvSJ8R8uu;v99M3RSosJ^rrWQT`9Eca_E_SRT6RcTM@s3l<Qv~n@7A^A7nH=F
zkGvh?PJ*fIqXbL84yc*)Shro$O$5iydwFAE?Lh=EnaBML36FJR>zvjGOCXE0q5Q8U
z6?&O#P%w)KR6=^`D@3fNg8y(NN{Y?nQhK5Cec(ni?E8;u7~A+VurwDjQzGy7Va<3h
znjntuCnjf%78NT-7LQKCf;ujGP}Y=#8tzr-3K@CQq-si>Gd{+4IT6Z@P|O~<T2fv;
zgOxIEVaVh}kfa^vAEfTj#mNv__%aA%eKvwOO%3A1$r{llCU*Ys1)Xd!@lx@62?UK?
zbPsK+MBy@eye3`vCa<vzIRl98ruyuJ0Y1~njoI4RG(Qp`&PS<cVAfvl5!}-I_COZH
z2-lzfjfIC2{Q7tKb}e1&Oore+%(WN1ce|&IPr3YBUd<X5P;W<Iu(Tj}oI7e~sQ-EP
ze=eHWNmq%4--%!+3U{B7WsC80D))KY(lpVZ+4|HwYN0u>ei=!yioin`oK0Ho{1pXT
zMAS2l5a@deNlbbTyyHb)|7eMc)J>uy=vuvR)K#Y}icr`OIT9x67V(20v!FdBt0n&A
zQ4GpHr~&hqbJC+OM%)><2#K|FBq;~PC;gZ&kmxCL%Yky1?zD0m3i=x*)QFCS4_)oo
zj21_f3_7mZk;5zU&U-yf&nbLdGm)?*P2ec1ZYdh(65AF5jJvl>e8>Ydt27Q)qt<F#
zIse<NV~52@Nvk8jUq{vu2U0q38*9Lf_E4#6cs7XSzsNQ}OF?VZGw|DKGR8hXpNhp*
zA<0y5R(X*+^{|>pyz(^)M53SdUR4>;Md}8#hUtoj*_z<l?7z5>XBoFHn)_EfMa@1d
z6J&jY@AS&YPEgyA@#ntxs^()yr{=TL<BzzRBnZ_JQ^fOfj(Q#$x*oEAx4^O+->x+m
zryboN(|Jhb1x{)3bTE^{P*MdgcM^NFMj=N5`v_OEXm32PI<|^eI;U!(X2v|5${~Hz
zT3hC*y}oJ4WCo-V37SVAax_*i%tuw$nx9mx$U)qNN^l|2*nLUpqKAm$^2MTV@ZybM
zN7gA!x<_s?%fIl(i~z2Be&<kg9+vg7)v&mtW-p3lDW8XnRSt`J_7ARHY$Vd|Q!T{Z
zKNTszwA<}6RI%Ns0E?21C}*@)DlP=!UF2hwECGJiQ#kj{?!SBd?CW)tC4cYm4a!5|
zw+vfzlMgvKDny#xTp!=ms^908n<;}6)u}>MMdn$Ad~`V6o4HXxIDJWGVaJm*ttUh*
z5#%_X4=5uzbhc5f<p9`+RtB{UL@UT#XYl#dz=4HMReBOS>?^*x3y<d{jPOZe3&U7Q
zCXoJl#|r)1`=P|v!<9~qMpr#^OMD8p7;EZi4n&>Bp_y&J4AbSN+z|{O9w|a-U7-FX
zi#M)Il{RKaAJMNI`A5u#H?B*Tp=bL%O=reVPH?{N_RFg@?~>&&yQEdDWnlJ$8wp~7
z8FkRzsZHC<BV~=N65M;s?)LMNK%l|pu*3I7@U=pGBlL>l@lWmRJ2Z24^L7UY84le`
z4`dD@?TaR?F7AP@PY?bIMQX<w7_UImA?Yl*<!d3MLl%FPUY=_R3!TpC;4X(A-d40t
zwXRWMSXx&YZ@a*+tUkT2Jt<ljdiNZ-9rDL3O}6Acz7)}Oi6jo(5O^YO<mz#JmHx>E
zVklA$BCaSxb^+v{{83ozM0be(qG(rvpza;cX}LHn@PaB;q@wSDQjDx!U{YP>lk^Nr
zW^R~KYcp_0<Kb(8@P4B__c2%c!<VbR!~9MRD<>iH-;b2P<7Le0k2$&bxahJ`s(CO}
zrO4M7*)4}MoFy!br<|L}w1OzcOt4IvD)ABKPdQMZlz&A4m?G<*#Dst?9olC8Po-Pc
zoDamo@7?0LAe_;sK`?Za0J-F&E-){LGQf6PoVqEp=R!vJ-<7Kh4L+)Ok_0|2-#y9}
zA@K6c6U$q;q8WQ4y1ITPUK%Ai@z$q_w}O{9+SjuT=@C(%A&rDTrFZDyt;6525(H*_
z!?&3|mVHh*%o4<DqK+;Ex58*ZpV5;CapP8><Ie4nFU=Fbx=jE#mbN&NQ+O$x&dOgk
ze+MArW~5-2h;1G7b`70Nd@ZX~hI3KY(Xev4*|mfckSB-F2cK8G!MCpwX-|~eX({N!
zabjCBi@}BRt#jil!~0i0YZf)iRP+9hlKqT}XfD5BbHSC7AvMAaZId#_{~h_EVq)%3
zL`I+zi;u4ICnGdB!Yh2c{X)V#&v+tLq%R3czJ0Hu3IzhUwF&1Ie9*8ByIK6nodW`V
z^M9(9XJ~bLX88fC!XLg&Nt>~owDw;6z&1wuapt}em<Qea=_0e#3@sykX{Au`IX->N
z!=i9}ZMg!5{;(b)V}%9XZHE1FbHdjPI2Tma9muofW08pW6~rH~R9?BfEf9^cl`(4U
zDUOhC3PKUny&;P*$rEa@c|k8U8#8b0nVLPrV}F`SGHe~I8_bzwe&0XX(bOJ**hIBN
z^IVgJn%w*C=kbsF!~j5*loQK7IbDl{z*MpEqbYx{hT|Uub5T^xrFi7;YoRl#NZVze
z3Ce2P?sumqbGU$-xkV;>>PJ)v^_M6h0|k=YiJL|>&BFCXS;LtYU18UCjMTLKQs$%6
z&t|C`W>q2r7A#)l1Q)$rrwVJ2#HhMg9(NdS#>4*zUw&Cq^>F6uaC6m9vNQ@BXdGzl
zMqy~^+f`|PKEZev;2&S{>ASD4e(v+wc%HJ4c8>)^zCh9?F+sQbD2Dkl;`z>WV}+_u
z$Pw*pV3z@M<QF4#2e1Err0_I3-pLeJcI*)Xs!l4qQy#N1T&84aPnSF>@S227L*aZs
z%D`K+g-5!zW6A&X#$-{9xsgTUjFe%bBtYNCA2i<?$WEPNp73cM2(!_!*_P~~dw*8^
z7<lDMIVex)R?}#=iKP94ong7uE9H<M=S-qVA5%+_&GMq{IS#hK#Q5RbTq0dICt20i
zgrrSg{I&+*e*uN+ST(w2D$PIy;&HflKIrii_xx9&Dka&zf-e+lXZ~QbFZvYD4TMI%
zBs$JlAE+e8d$D!@gG-#P<o9qS%Yq=ge&BOiWio%)w2+wMU1+H*md+Vtc#MH;QMKn$
zK|2(alBFv){#L7fRjgRYgWi3!o_qT+@(^Aunbj2g2?2_3XnFDUmus!IRB2XEg3Y~^
zbvtSx%29%~_I!KG^bj+6t^`lS4SLWZ3yjO4_JTLKgeu&1q;3=Ld}k`#tn2MMbPn#%
zGF<N*Bsy;bJ+#pezLGaMD;2)urYk~Vq;3wT<GpFBXVgPC?q1L#r*l;ju&`)=CWI3i
zj9{#3&f(09S|M-u7YtPyC8)PWvs~|3i63|bs(0VcHs|Cgp?R`#Nc7!0X(az1oE76x
zqIx>nxLy|c-g2jxK;X_0`E$WcS(c@UN*i%ZBwEMCOYIX(E}v2kGtl%RL{5xMdKmwE
z*HBgs9RMb!J0>bvn5r!V;N#%JFzBfRmyc88Q07SjQ9?`nHYVzGTFAkH>U;2&S$#^j
z2B*EX=N|)P+}eQ&ks=|2B!1J_Fr$>nE?j`vbf}o7_;{|r8?ikpP<v%_@-beuE@f0<
zYK4zS*N{^~cYfuRD?G%4wJ7JOwfwCOCh0_a2$^29f6s?~n$tJ&2_1T&0-wJGWWHt`
zS#50nl=v&YZr$@$8xZJZ#C8sj3tdGplqmm{i>|;>IFbxD)K!4yT;QDtCP`}NRn_SA
zpxL&;dHUW4lAB^04Q-N&8@TL#Bv}%Bn%qPrw)Qwt6jVTaOz+Qa{OS!5mSSgzeojQr
zsyfMbmCxfXleF~sT7P8O`J8$~Ak^ZXzZ#S8L6S+Pa(}y`Up>l?8?}4|P7Mx&Kp2^=
zCAs~Nfo7k}*y|BQ<N{sPext+Jvd4Eny_J8>X_93(r;B45?np~Fco<!D;*Fh&ZHHWW
z>Mk+|0KQ(!$ZlE-R}7+`!Qgbp`o7F5k`71<%tdxq14nZR1%t|~TGv;g(fVW%$Ks0a
zG2HmFejx+oKl_F))AARskqg*s#BRQHT6JCazv+ZDD7>vghjk8v8AEc`6phW1u~V4^
zit`<nKQELjRai*pAC13E{cN@=N!<C8Cws?h9TyQ4+D)C2iMFAY(HS>S_C1kT!0b)M
z@ciR55B%q@h!iB|cfZ(MZkIE>=k_pSpc2#7?fzgL%$$pa)ICJcy)WlUY<wQ~I}VI-
zdBJBsZ`5UmMf_fuZ*Uq6{app-4pzt`TnhXHHdp6iEN<g_#%h4R)1Zu9p_BzKlF(Vs
z_sjfGzy7_Ln<4ThZHbVv51bY$MG1Gv%<_I`%<zyaa_)}sRhRu$iYg^`wA_bBp7CW%
zsuY?z(!R*;gZnkJLRjNwIXy9IO2~XTkQ?>l^Z+k}pdWVsHU&B%49zPqE%`|%mdb|$
zq3XI_a{RswNCe(bMusqI<Cnq%mo-=#EeKeA4fLB=B{;?4AAk$_54$2hlh!Yz<N*Lh
z_auygWJ4rW#u{PQ$2-qEP9sFcw#h~5AF#(hq45tyUM#I7x<8?6Q0(vQxytdXbz5uh
zn@$|VvtLC%X#UMlKHwLQWsHp)&C=i-c=#HV?-FC5NfRk7H!J&~7USKr&w*%IMP_2$
zj(V!PkBz>Z4EkES*4vtcZArTJTy-9c-G(^_)j1Iv0GT7{k|anzWj*#Sgd_n}0<AxV
z*G%q=^0;F7-uu17F3l*4fJ?+k0iTjjg})78!e>Y|Bgh^*^4FR%L8?^&UE8Gd!jLc^
zk-$>yl{Tt&AI5LS_MSTYYgt0~6zwYFn4q2@%JVELV!E9C*j0jJ7QP7`#+1e|xal6T
zx);T*-oREP89Rnv&?_NUsJb^wE=Fc2SUDrzuR!^hocmcl;1ldfDFbiD8&1j}<Z*zI
zVU%(%-1qpZABie*kVu2pEv`tE>FKB95i|c75NPk$F~NcT-aP1aNgD=mBmhy)LXhe?
zZ)h34?yjje`ITl00d4FJep-$T1EeO!iGLQz2;=u0Z#nyXQBAFrzDR2Y`i*K930JRi
z9OEn(bn^`RzLUr``hZ<kd-Uy<hqFS4En5+dD8*V(8Hp(N$z%Ja1_nX_K8b>Ol&h6>
zc~-umwQDd324|^0A;GrC_EPLLUIH@Kqvdc!CLT<3zw#?9+$DJs>qJbI3TPuF@?SY|
zzHa<I1S%Q)jz=8B!xZTF_*u6S<?-pC^D;{`oMMleTw*#XCS<`yNq?ZcK7EzeWS`Hd
zXmI@IGO8IDV~M}4HVY$|*SC;miHOps<&&(D919`q#Hkn!T9^&tTG=5|3bE$L!X;&A
zD=#rpb6fj_$O<UZIc03o;PRw*zX#=V^HDHo;Nheh2kLJLrz44W$xpDzTRhlA%2(jg
zXBkOKsiSBXL&}Dk2<oij#b51UMeg8yS920L>Ha^pkQH?sl6ky}l55}f`Qba@H4fxM
zJt)E;C{2vUp{WLyEolg}13mjO`E)>8==qc!a*Ub3dV6=qWm{4OK%bOW=T}HNSLe-g
ztCJyrEX<>2#8s>K7bSfl*7&}0&Z?bM2OTfeov5lJngNbihQl`#NA3>xg?koLf@fjt
zW7IJz$;NKG6fF2o=#AJcynx+grzY^LM2bGTKmSKE`U_a{<lup}eYFzN7Dw0ndM-4R
z&0USfA$K(2N^T6^=h~YBrWNM_QpxRby0o9<r6p8+e;dQp+^X0o4Ki56>V2P(_#1sy
zx|Kz@%gKLfOyl5@TYiDqdt5zEs>k7IYLsx@u@KJZv}`Ew{nL=jmbm32!DeX^c%|8r
zW^}N^>}ocmk2omLpA*^Sjy>rtp#b^uW9qwiUd``5cIZ=|)yL`!$zx9@)+Z?*$%p+P
z3I9M@p*;#vuGmq|k0OO#g`MyQYbh&A{Up8+r}k9AAWyGqHrutc%&;<L^3@qU?udBM
zV+z<{qXMo6SU45>s7GyyjkDcbHjK9Xjss_ih^3nYV+;>Gb(N`N%f9%VIgI=OoY(g}
zFoB%#CVVo1CPU)(CYsA6MN>y<CG$hLLB@JxdeYT))aJXNrEG5f#@w<F#MvLm@{u`|
zam)}cLW$c*zr&i+Hl~|sahBrV83ADHCp=!+mJw|o44QB3wYqE8E!L?uNbE)tG%itf
zd_UXbK*+vUf)o6&XI~nyGb-d?#h!loR*QwJhF8jJ%t%#`O7RWTvMb%)k)KGjv46nB
zz6RfO08fjC5zi9DqvHRO6PIx!=C0}{ZO7cMWb5g0F<>y3bB%6utF5+WF$kVUR}fWZ
zt_fGhE0>?~|C1N;oY$W7`our&jS(tjDvm<1{qKK8ht<t@U9B32RU|1{;QAN2$yI=_
zNWP`|e^)r)KdCADD%WT2@Amb%12!;gr-tLn`a=nb<3psSsb_^_>H5L{j<`H>&A6=I
zw8Wzzuu^5nK?r?F<zz|B==jY`eQ2xpm>BBW+bR{1QAbN4-RoP+l~z8n#gxKd*7<1@
zJ{&{7-RGu##Sw12aTLYaE6zJ-Vg*&u$9ytB2y9DA<`BkF;)BOnQU#C-1LN31XJA$A
z9+X=Ni?&yhQo}4YYW(vHek>E3x`5OT!D-Tm{Uz#@*Dd-p72I$gzQHtW-a`DKOfdkL
zlMOu<BRAa=dKJ(5(%*U)A^*J;v2GP_H`!(I0QE@K`D`YiyWf+kpSKYmevFq$LD%rc
z4>g|i7U?jl4Ugt$qUV{+HA89583|O+b<ga1i^QG_Bw49N=85@jrOv#V*)XN%&U`BO
zgh5}f%T8fgZYFWne!s)ua(wdf;A;^q>V;lvGg5L8AlI9XCnd4kqeVNC5B~^O;lnu)
zyR+C&Y>LMkjl=}UI`?7z@RcT^>3OA7A8+~okEO4Ss;X<-U3=5rT_S>Xr!+`+Bi-Gd
z(j}=#N=i3KgS3ExbazU3_de@+zw?iOjIqaDbKdurpHhlBZGH~Vtj=!|Z*IOu97tx$
zzD_td<GI{Cc!zNOMqSXIPV?R0bJ@LLnuO$ND0?g3l@s5bw`S)^bnIx>n)VnG%qh5S
z#$!OG7y3I!E&GRv!yr{^CDgkcETiwD4^$!}zSLKhPBT79u6>LQ(CLG&)W62>{Jw#>
zV->xY#xIxa`v*HsLe-+^r1FTOap_;y?}ficM3)Z86bY0aTyZemEKs3EoZ^(IM+g-R
z74+Cow;K!pTu!h`&W|qVXmQEeu<254O@54njSLI;10I5)t<ubQDh}jv-e!`4XO4OM
z-RaK(Nt9aQ|Bb$Tp=NRb3sU#k$D4WrC(HaotV4KNJY0m^v~Ap}SeUcHdIcoihSW&1
zSlV3QsP`|ruhCC83}^Jlp`tOOn8iAST+0IeaZb3#b8d;CJ50rX1fN)~WT~d(Seb-<
z`Q5_o{$(S<nwNl(_;(tXU1_Ewz1S**HP_sIkJ3v6q0aiw;|qB>y&QW#$8Gi3qoC^!
zpXm^O-aZe5%^8DZ^!r$rMqNBq>vlz5DI#ni0u-EJTra(i2lR(Q`9Ef_ms^%Dij<jh
zQcisg85QT7Sd>H+4x*9u^c3;l6|des%E#x3m-2bzJ2K%bA{-L_l)*RLG`!o<`@It9
zdf2X)SOuC@UkSXq$Lr!hlx0PH88ZcxoBG#4UDNw!rxo?pZ<#IbyO_ctrj7D^pj8F>
zFs2{aCd~M36u=rLir?WuGqU@;S9u3FL6uk)U=aC+)+DH9pc46o#N%ksA$+&bFlz`5
z6={DP7SD@g%eX=!?EF43SI|%hkP}%*m3LO&(7AT7V_Q@XTkZxOyrJo!^poK9#rj}e
zUhb_5WsYYm1q$P&N-2W&&214U%=7!2+GmOc5*JNZ{m1$6fiUH;NT}Jknl18rnB8Py
z@Y-{gf(LBP1~XX8?O9ZJBv%fpRy}b*Yk(lt;jY%!dh4M}&ys`(^<)2qNONtBl><WP
z3i>{&rnZX;O9KT0Ow*^K$3P);-Hl3^K+K-@)b!U)C5Z_<yb=CO*@5wpDa|C?t%7Ev
z?!k@yaLaQ}=rQ!foNV_a2wokWu2>xgrbb8zq-0F{JnVdPMx8!qx33zbrXI+1w7Gkq
z13d4^h{`)_fQgK1%Fq^iDXS_SRER2L4*b7<rZ+5-F(VN_gw-<X?KCGgD!owZj~bS1
z-A}G$Qd~M?e)-mM5zDeq68+-iSnq8Vh>P!lPaSa1N&mI9R=bYi_>o+L?~<u;5vE-6
z!sPd4;%SV8v4}*9nk62VAT0@T<=y{38t_|1gHHPkMYZT#d5Nzt%J#Az@`i^&3gAR3
zgY(THfHA$mq*+zBeC(m+trj(}oQ-q-IX&;}oKL<|b*IME<IXDKs0&F=pj(*fL||v<
zluS<}dMnx@1$r%vpKveota;J-NoAY$akXqn1hnX<fn5JkflT7hhLgPF!XKK-(BF0z
zJOcJ-zgKL1fFrT)z2}i}#gyBV6@?gPZj^P9joPjs6ER;}Qg4Q25u*8CY{#T}8)Zqt
zt&cFW9mFDRzCfqUTNRGhGV%~&IXWLEl;U_<Z9VCNrnSA`FBd7wWqvEQKbt-Od$b~T
zWg5%3J(>RJ%aq#aGc8uNPvG(P3`h31#jKwFB9#G)w1LfI5jFTToXNP_enT>ViO{`J
zbQ*Yz`rL2)@&(reZ-vz5$AwCgQZ8{HVJcDZyMZ{ODi}`KUzofOzA<}DbhtKYvxnWv
zsqn*cFpiovHx8THt{*<V{gKV<pQas~Q*!S|tXKSkrjG-wgr$UOMC539mgtX8ni&(7
zyiR?TGF{ZN3{R`4^Q-ug9BggLcPU`I@WHld9t1DB!v0MkuV-}b&$k8x4ri;z8$d=+
z0I->QkK`x?hjT1O%vPYsu!N~v^|;e?vEBnW>j-Gw=}Z~|SNS^4M8m;lY`?CMNLU2e
zSCuTh+#hKe6%wEI;pcj*w+^wKRgnsa5J}T2j9QC3%HLP{S{&F%4DUXvbacDS*ez+?
zU!}sI;JBv|Yzk$yy5|b8;Tk9thA3D4)<nV}1@HAdachu4>d~R=vCM8lR9povvRu4_
z!%1d8ESp}4e%}J)y6YQtQpb^W#EO{s-}N%n!iT8QQ(;-;=Ssh^sAg1fz=Ah@UCwTz
z7RbGY+{j?SCQWgYN|0;Med}_ke<QIn9YpT?GvSU*>Y4O@_8=;-A7)u`DWZfruR7xe
zQJo|Qu%YYv5X{9KP-TEw2yJgYv6mjo%2o$eJQVS^AQW8K^TQF~!2%@^79&I-kb|A#
z^VtU3g3f;nXlOb*y3x^mT3Xh!$V$+&{Lq1&^1WXEv9xCHi*{47@yms4<!&pG*NJ83
zuK(;xUgbcqbKQ(VQM9I{XIU_qDEgg9?S@joyWzNH>@Ywjd2~`(@VG8+w?t*D^U7AJ
zCSrhVa&qluH1p-oI8ZrPSTr3zRpA%X)LY}B>UE^61?${5>?Y}P2FoiyUzm6~#hsGN
zrj`WgoNQU{p0`7lU~GFO10K_H-Y>I07L$`g#&OA+Uhm8eGS_yR66U<Wvl<QZieK$K
z8E8H~T@WP}6xpbpi+(xz$V(z_%Na@c;Q*BMGC<bBFK#vyZXN6cmc78EgJ@#eKj#P+
zc_@gHHV9*2{da4_<o2xHSGX`z2C8(CgFnt8JvPo6p<Q2Z%pt-4r5O_vya+EyjW}4o
zSjT*L*ZgXKPyJjhFCp8Or^?+aEn-fmoGzcD`1^!#CidtvHc&nEOJiJRVp+TI@QT1*
z0cNIQEz==JtmzD~AfMrw%N7qq{NKq*li{nb0rd#FO1!YXg@>-u+#BPE&YPCAt!kK*
zA$D0EsykI~p_kx>HctX>ckINFf@VnVDz*4VF^qpz#7~%1Isk&mC5)`^i+THv;#BT7
zhjn9~NUGvI@*A$382jSNpFV`6)kkBZmd8As+_NrFkkOu#aqfQNo?7$+3Cwh(1W2{3
z3K3_2KIa}k@u*&6atnbT3k9qEoq(D5Gm3ovKp~$dC{j`dglqJ!<qmnee%$wr>6@P^
zu&voW<`>w}*4ya2Kln5X$Xr3Km^I6g5@pE_I<6+Gq=y&C)&gFnGDn)EO(UzC0$Qnd
z8_80j!?)!nrtx}O%lCOQ;QxO{tl;|t>NW-gjduMrJyK5=Y!fYoL<320i&VCJS-5jP
z%XU3?*b0D$2X?OFDHE)ayzb_EgShTnf-{1CmXhXFcsx6g&@gh?g%V*5LD&7I!`9%i
zZ<sfd{*V9eN58$#`g|ipZEJSqwvFfE7|{gUt}l8(tYZhZB%0Qz;90Jo>SMu|W)^^F
z+gBYZtZ#$suBhqE9IeQ|x}GymyIl)f69!IJI}ZCa;OuXH7Gq42db|gvm2buB4~Z_Z
zg6{gR#44KPsq_e5X5iNCjUO{hA7N2cAJd17yFLm}&!tgo+=^pXpC9-8qQm>~ophBb
zILjp(M?GY-#SsWfDWQ!hJqKi`4Nm0vgq*SA&y2ir(Q9+O;a&(Sy0Y0oypRYfJ*PeM
zDm{o{x%&TyCa+$3t=&Jv38p1K`*8#%=>j@#3cj8>a~^+;AOx&x2URehZH|rcbHXWU
z(<IxTV8!9LMPY!yt_zOFQ17c$mv~LB)IxQ=19tg9aRG^Ixk|=wYTAx}n(-#N@!4B7
zr83uoxiG`m@CUD6FUmaC7>#kKO*jHmt%)KJ^v6(>`$=$bCM7*)dD?y}`ni?VIJ&C7
z>%R*06?)-xkUL>ySov;-`!Xf4gs6J^WBRlWO-w_ey+)A@&M6*;jBiNq7p>I?LevwN
z0fZdJ&mmh9G{QTB+lp2n(TElc5H{Mm;r<2ZcY6*UvvD4kLy2e~&F)K&w||%N={`ND
zx3&&`Th4xPwRNFTMn$18Wg+I?Tb{7B<`q5udlEK^m3@<dV?O-%Qp!2VI2k$$!n#bs
z<dk3N;IdMnQTTYM*3GA+dRm7;Vdh8b@a!up`E70vQF&uZ2!-z2g+U+H7Pnn$mv2KG
z+sNI!ZRuU+s_(vOr*CztITf+#i$$yz$MSJ`r5f$(sxVfrlB|eq!sJ+SmqBhh@A`Br
zQM8zh&uNbpC|sRPg%h)+$B3T==_G-T2a}|?VqiZP($Gix7rk=-Pi-}dq?J1x4fBL6
z2NGQsv%|}<s?Hrq`WmWT21#0)QwY+X!mj>gH+@^&=L!`etfdg<&m37wD7Nt5(09|5
zQqVN96aJ+|cJ8;um$>E*5lHSVMbs@7sbg3Han1V1rAm>*rpU(uhzev4Q5KmG<n=P(
z**W;nXu{U!7O}rt2!Ce{gN^Oozj>-yF(!c2+gb~sA}8gop?;NTfr^=O8tj{*If>$k
zPXGBzu>=1hBb$ni4p#87%Lp@E{lt{4PK2+CbnxprzF9Y}G3GJ9bSkj5<27WUzuXE5
zn%O$@lj5S8_r{v)X=$II-vM){`-4B-62t6&xTp9Z%m<=@va(>gNHe9Z>b03*KiEqk
zsIE}I`2n`hvISTmITnMo7_^YB{saJ7;0KCEEihkQewQ1x;qpw;Mglf;ifEAWJ7V+K
z+o#3`1VjFj=${ygXjtCF9!e{Wr~c}XE?k+z;}gNqn>--B^-J=4x_))o+_s@1(diD1
z3S8t>>Lz)5M(Q~_@Fhkwf3zodNvCHfROc?g<GRu7M9xuM(hH43@E&L7zR{-g(0u2J
z1=@}homLDcyibiX!vitk?wEZcr2*!Y07wk{gcc^7jLZggt4)$J>mAW~e(vYQaW8}<
zk%+l4cewsT+o;Eh&W8nK38u5_6d+0!`PVxUP$!(~ZHR8F5*uqZ(BT<Ku5I{bICm%0
zkEs6&%n#BV^g4?Yr&Z!Xa<)!97j=O9&G@+81|1lGpUHOIal4v~ghd%0siDHD)l&Uj
zhHFAE?_E$gbS(XHwGY;tGRRNC4WD*CI?0=ug1*Gr@9jYPjsl)By(!(_wtx5iMmoX}
zxqbp4tCQjvyzS9xymF>G0r$_U2|C@;Xn1j<O#-jv7*#ZHkrYj#+MDKu{8946h?Mv#
zP}^$7o=?WN+j?+L199k@ginhtlNAHAv<;S$xlsr;Us-B}m2L3gBHK?Z9gxk4lBJQm
z^TcHe@`L$vuXg6xCYrlE8P55y0o@wYs9<L7a@^qF6TtG>TcN41F0RDwag+ZIngL4N
zx9$ZR*Q0tI9r%rgRLx7`z|(h5Od;&7x|F-Wql=)^1*J!&2D=)D3_ePE8a6tiz2c~t
zH~x%uBiyr|woJ_$V>#>#uOHTKXE~_9SF^X(?#N}h@8NYG(Y)wL&;~n2RCE84yqJ)o
z`V5{V&=&-N>s-Rg_Z>>W7p>dk+50plgx33e-O!O(db9*6U&s<EoPzsyQ_ejZmnsiR
zbGQvjq3ps>`};F292dUS_zCgB=A4wS^hv%XE}s^Eu&8%z8@QT(iu-1=QCCYr)X9|d
zH5P?geKq%2XW8v$CO*2Ay8{zDo=rWwPg!x#KM|_|8zZ<J?Li_YRy>m|J|4%1<9s)&
zz&#7JSTTS1rBRpGwt*HxOe<a+*l(XnTa@*Z6<FS-WE5$ik|aRo5%4hD%sO>gV~-j9
z;mmmV7)XBIEd*S3UB}_HG`|R@ryqB1LLKq(txd_;l?V6`1l|6vAkSsKfyZAQ;8kp?
ziO^3VC~b<y|Ea-%_S2}x_~^P&r?$8KRS4+|+5-aX@#8Zbq)Xcjc)@_lr(`ptWr?4U
zFN?)EqOL3cG@=-j=xp*AdrX9NcyN9^(B6|5fEVM|)VnMy`^)_Swjc!1A&4I+$5+zX
zV=QJ5FgC;GcU$>uyqraeDz1Tc!$uu1!{+93l@X7!+#N{p*B^$X0kH>ZxLiZLaRV(-
z8m#aZ2E2Ed!{3lj9g%vLNKM9pl8aYwc1Qci@UB<((E8{v&8#I>gFZmEV!GLCPnk8p
z4Ww@vwU`*i4`$rFyV!X6xZ~BLC+jeA&2P2W!c1P<vhGs#wL1`}`T%*t#hSDs%43o6
zZUe%;R|@{&qalVqH+v}uz8$yxxg7xa8?kL)+_TvWAqIFmA+4J%qCrxGdYlN8n>Z%<
z79a!|{ipH?TYm_SH{T#CRCUL?>~T*Doz7;wM?OJP{Ei1wdnGmzPkso8h~7(&oIH<p
zvm#p<9wFNsu**ww+Nx7Atl&hK@JB)wtuoRTd2kIeYMR#3Ug<Fb=GgA;3@@7uWM)~&
z0V&gPCY<S?hO`ABui9Rme^!L?f>-&G^vS31A6Tw7G_e5MYe4llh4QekGLz*$KLnHy
ze0WGnP5dp?efSKo-UP+M+t0C?m?{#~gt0}S{L-a|qx^jTR||eNxYvN6J$^YlvBqq^
zyVgpFx<?SjeF#0#wPnD8q<sBNzJcyhAv>G-%{XiF!nJ@V3#kcYK)-m?JQAUx6t&WS
zZ|#)jFUp5B=E`>e_e*V|`oeI7VWULx*x;2J5jSYi=FxWlISQ%w{>_NtO&Dwk)i8&W
z3JZZ>NKrc|{zVfm+(O*BgH<BU5B={CTsqV@s_+(-aFns)cTyoa6(hH**iy~s9vBg8
z^1laMUnNLJND>luh)R!Z|0Fj+es7xGdcbC;1~cyq%WALhtop_}9=}Cr+eU8<N#Ym;
z!6y!xdqE~d_*47%G@Lj%BlO7H(ezt9SUWC&$eA1o2>D&fI*^IkOIb32u?`FJpl*LX
zpJ63tfU%yeuC<_)PR#p1)s5}pr~jff2qwb`wKu@h`5dHmu={9VqZtxT_e@`l=#PS8
z@cCkx-<WN(!Wp@D+{(mZ8+{qUusLbBDaq88Tqma{=-rf<v`MCc^)xz^T_d`~S{EBR
zQ$pMbwZhA4Hx?B~?;yj3oc>s0gdjt;LB=Y2!L^L=peSu5Yo&jz=$>7M(S3C5Hc<Vt
zm!_lNB+*7+=cs3Ej4!(htrNvvGK3qXBNt6|ZxYaRyg%-z=%jbvt_)8(-v=n8r%M7~
zI8Cw%KzoE8!MPq=2@dK3ge~vYeiXuP>!F~T7>ph&<ZnoAw>cq>YbnJ3QwRP7O7-e{
z&Gw{%aeLkAgg-7UDV)Bem)@9?Y?wy|jUm5SO^BGf7;q0b&$4TsV{R)s)$awmXmOXM
z{~k7DM%X8GnMc&$;=uHuU=~+bZ-V80$;3t=-A$HlbdUz>3{d$2dZm#}!beU(7^ap+
z3F~`-o|0-lI2PTW6bS*DfT}lSJe;rzEm#i*0=w#b$v;er(`amPFVSj5H9iie-e!ZE
zS|cHbYU39P+<d|@1Qm-G0=zBEEtjflRqVxY=HffXOXIYhv^_mN0dcJW9vg-hnu!(i
z3CZ|3qKmq2q4kdoueX}(AC%5gx&MCdz%aI`ienH6U|~kHRx?H~w+-I7RI(Z&2f2gM
zciBxy-M__asX$I$Fz+<raq`X6ST8uL=_(Mu$W7KUz~gIF5)0vc7Q<}B{uRnXk6Og~
zd$+Bpja7=i-feQ@kHG_Kb~_YOi>>t>;kEvz`yFBkc7)YZDz>8tJ@M`l%D?ZXZ+~U=
zQcySlov3p+9H1__N~9RW;mIj46EV|Z@X;vukH{?!aDB?g+dCF`(K+lI63DzgO3^XG
zQG)880>PA%R_|xBXU5@7OnQ^^!DmZW=+ty0hCxw*hXHQVTgf7y5-6VmAxOO^us>)f
zozU;$%9gQUv39?Ro*L*@u^MRcLc}-Hw!ZqB0<zA%v20Rzik^bZzlc-2{65g=a#@xU
zQkN5Rgh-DwZa~G5@Ea=(7U`IEc6wWEOc(e=+%OELpc;jrGVbTQ2~aaW665vRW>Mj=
zpz>y%_k@yF_XK8lAl-axBKReZ+kB~%4CM~YNkuTq&DB~Bv=EXEOO?%4LEbNzdZoVx
zBE12)j4q&rf$8eHtL}T>^{xQ<`n`vx;TJgdKhi;^tx)8=3^ERS)T#Hux%}(zi3l$<
zfUj<@gFaBH@n~v%0%WgIv8=C`92?}wkw$6X&Ska~ao!XWrrL_L54v(lEe}<l#a*Vp
z`st1x%C|Xz2$!}=%@%B`!p=+WafR6H;}Lg=Xnw-<qT?p;R_8aT+EC)n-ULlb^3MiR
zwV7{=olDC0eHTHT9&I5`yIDsO!-9}#M#x_{F@{}W{m(;92FnL`fV^1PY^pO&@69up
zZQuJ#KurIoHxkKk9av=(?XOdu@cHhdNWgXdN31=Co#hO1YcuSPlr=dav3YH+%U{cq
zb87d`Z35>5XD#S|1<b4>?>cNNuR`P@r5dvo3~;!N`M|hz>lk@4(F5w@^beD2snVUq
z6{(NodBXWqH-M@-Z8$1yC@9K>r&NaSXGZS<_iud75`jqZw=)0jj4zuTY;S4C+&2lP
zF8g*pYTLMb(CfFWf14Fqx1{*unU(6f@L)svCESQD7gkwyOclSP=g|bbMFlDc=;lB7
z;bZzih~UdiiV=fGiBPJx%N)1}ze}r8e;{@mz=?_qwFwf*eqikm#b!ot!K}3;rfK-F
zWs&biej}VUlqWQV%Q}8a^U};7{1f4wy~Nc|wBRAxL%sX*KNFs0Vm#mNL;z1%v%mSq
z^nYQp;a;!e!Z_4H&$e!UJ5c&K1VW+jlL0-m*ug|9ePl5|DbPA?*D9u2c+m@=R(4Rd
zb?^!~m+rleb32DekeokyB%|Z&E6YXExy>d|(mCeJ#6fxi9SyX&<(10c8J|sZ)wzBR
zc{1!_gp(du2i3V}yKc5fWHVy^Nw19mHWY}BYiCe?)SP$o@r2Cr8@f}AORjV8+G#@{
zAOd@GC*>}cNp55a%&~%Y|If<fqzU_|Q_cRoLG_cH!Q;Q6DEM@5_}_;gew0&w=|6_E
zD=j4ib+EQxL!zct7*O&?sOgLhPB*&0HZhKC@`vxH<h(*-w(D~SU%VZdZ1PWZyG?a{
z^~iT^_<7N>8C_?hiSX=}VKJy8<}Kdpd?C77Op3AuMwJJsjYFWZNPJ*T<EaM3a-eyI
zOMp?k7mMKxIgosvtE3GgpxohpUy9y?*)Y>^!38pMFf9^~H>4GRF)dO9+)21^@s)H<
zEy3ODwt;&x>gnP7unfGY|LU=GC~0FPZK##pKy@UT2{5wTk)^1o59L=J913vk>_s-v
z%lb6!^U2c@4tq<DT@|t>>95{eu1L_KOu4H-H3?+}PDiwxtM6CM=Y56Lx!Q6N1=v0A
zDp$(GAJ7|ZEktc~5V~C+-*tH`%4dB>b{UV|tNaDSI)vTRHiVLu1zg@>bxK6Zo#i$o
zWHloiJl=+X;B>yys_r4C$JUGb$^nljrSr?uKmtCLTq_duugw~26m>-xTJ^;IYaSz1
z;)DliBV40=cgbi=V5KVUmh$KFF-nWti|7t9UurGUn~o>e=)*T(|E-(qJ!|s=*n5sT
z$k#9P-cMMd``@y86HE~S$sYMREvx@@9Rcv5ELq92KCVcxIvxX14hA<HOpC>mO87mI
z_~dKRe}L+GMP}81*#1>s4Jf;r)xiR$A{nG2EdRZnWb(v(VQKp|m#JjTUpj^!$Z2Z!
zk6ec!!;MoN+rl1^`p1W?W5Y@kPEWI#i}_G{%{Bslqsm_Qbn(5GL0Lx#tzG!Di*p?c
zePWlCa4zO$WyvXu%T1OS0TjG1pT4xNSAf<W_^jbHwi1=NOG<9!{RUw`8rprSB8(6`
zS+gu6y8#5b!F=^jJ)196Si{=zy06?T+{9LHSHvXT)mwrc)%#{{CtW*NH%nFH?BY<~
zLTbLcO|-lwwv)r>NFK|EI$U<39b7L}t>SuMBi2Yn`jUmArQt?eZLQO0>Fe*4L7K1B
zKE88u!ZA~zf}@u#)MmXJ2yOY{vBlnj?h8BX>W5YQ<@zIHq#y}Q^Ot)(-Z&~{odl?^
zyo{8VqXMzH|K&ox5A!Ohn>>;=nZXa_e@iVY^f)DXgw?tOsJ4B4_Ir1Gv{1hd$&&-z
zKVOg{4Co~Yqz=sq#--3`ux6^;k*GYQT)IJ;DXHxMgvd9)#q!74JLmC7$5f_S-{(o8
z$H?*GuznJ8j&!w@{=n5>M5aFSX9e#pK#RneC#!skEcDoFan{{re1Tg3uL>7VUMOl{
zq~bUk_wV01D1p+-3IZT1Ix6ajx^cI|$2wI5TeMTYOT&ef6KzvhVdd_^hkVc-h2~DL
z_tc@mC9R)L;Jp-SfCbKD35q1l%jo~eVi9GsqPmrLJoU>za+tnSE^>G)d-5;lLR5}O
z31_q*%DNpD?q}q6jDm6LxNQSIz9<gYoESwTGWjF(!h^t9tfAb%UYHp8bq}Z(?o-I~
zOh3a)M4xTnm6*<eV-l?aRf8bN+a|!18ha|_&lv{^_g5@;_|H)pDM3qTg92-*L_Z$o
zz=3rF3UltbVk;=YWOQ23`Dd|l9a%PsEI4t~1X;GON6Dq0knfp)6fBa8xIW!RL8ofp
z@V%yBVjrbgys_U?CzyqtFpEroz|RL*>8jC_v{$hdN$?T1qXqx<X6?(ip;8S}CPS9C
z`~Zjl2;BVoGqOxe&^v}WW6?^j%=H3mki<p|Jip3zHqQSx$FK{jO$wIGOum(yXH+Fc
zN0!JoZw9XNN?xb%NSGI?vJvxYHz!vU?~zEl2;n2>qP8KU1lAbtRXtR4V0*XqeY6+)
zbUaNU-a5G{Q@of=Z6L!yB+fF*BHN1}@vr|9LB<_x@Py=r??>S-xU`>v*c>MKm56x*
zwZ$Bjdda}zPifrlmH%mkmRWxb0Y6e1Q7*ktmgR_eQ1Yy5HJyV&D3ATuV+l3@BD*|v
zt%K8Sb<E^~HS4NBO?xwJ@4ij9P?+|?E9O&1{o>FxjOZPg-fliO{P$q9uSG~4#&Oeq
zw~5m8?rx=BKb=a-h$y4|8cQD8z~bOhHv6$LgcVA(<Vn{3pZ*)g@JuDP+MxA|2<UlM
zRk~4}clRUZmH<y&hYb<Ze=32Fw7ml+PJc+{t|Mgba4dN}_f@~&>g(}yJ(XH62X!)~
z$wqRtHJ>+a9=l2&%4x~1N`n&OrPg-LvnLGL?HK8k;fAM4L@sXxKAP=?*|hr9v?Y#=
zTCnTV#8|ZpKK;FJ8yDhkVCO75^E?eWefMz<@2O@fdkS&V5lC@b>l_h45@Q5kmj{6_
zKn`<!iWQQ4wG!?v21sMm9R_LKfGSP)s+4U*V6rX}0IRVTlGKQGu0MGj;tfCknz^mk
z_L>TK0tml6?O&%YmR8R!rPT(4=HRMXv)h4ibdq$FQiQnpiaM8>1P`8{XEAZ9rCPi$
zo-4!TpNTwA3RZH(Sr*!Pzz>kEOf|WwN2os=_}if$(UGr6YGKjJ@Ksp>k~db6&?ljV
zn62_|h!u0LdQ=WFckog(8`StfrRUb2)y<D)^@Ud07QXoaudK<!-E;MKuo`WYLd}Mr
z^7~l-&QYZ;_)lR@75Mn%jYU~8q+>&*f+#G#tkr{b+4j7V2Yw}*cLUiQH}KE>ba2`^
zxg>)FS{vXeDSdD)^hadB;RKr03{jK=X7u$sia|ee!Q<^|?2z_q_8)05Y4j}!jpMxv
zm}gOz+6M)KoVIn7{c;5nT9d13;BM4?^-4c9uD2!5*6l7wWha6<=Mx4ti#^rSR%XrU
z5Jox`GvTB?4XhPfyOTamlIHOaNqSDPXmBi4Iw@40>-X8rrg}q*EAz-ABLQ0q|7@h|
z&Ski#NnPojvf208?tZUxfY=E1zbV!Vu%HQ{>g0ZWV89?Pv^arT_dJM#Ntg->D1g7m
z<<6(hNBy+72;G;3@ca!`klj=X`tH|G6$sl0lbZ<LRu}{Ms7$^ALEK;1$&ono;L~wj
z$*jjhLbdV(%a|{@kXbC{$FfCi>&;A(&*WJ~<62xua?cS%TGmavLsxlOHqFKA@7JDt
zU~MbDydFPjA(at6*Qwgzc()=SSe{({%|R=w8htTGx^@BO^D%s$UirSblFAM^^J$X~
ze#g4FQaw}#+MG=$y#k-k2T6IlLETS^^;k|sHju>^+`mY2GKm)Nnbaz~WYjkj+@&jE
z3}0yXrA&<zkwye5!g;(GHK&Zgn_t<srif9%RKCwWUr4!bLIAtcQX>8`#CWjjLKrLd
zHRZ1T9x;3F$@G4F>@cY}j%*5WxP8+il3G#md_xoVcuz~<1f~jw95#frV8GRKr1yG`
z_ZN?eJTeFBMgO)<VxW!$pX5okN|@~0)AjI`!<UekMA%&!+G8f3NecTkqd7v(h{vli
zoJQ&Ih@iTXA7VV6j>E1RukZ=LB;Y`ImjfzBk;uOyX*@{9v_#an6Cg|Kei=yXwmnVf
z3#vJHcpz98Q9u&p-R*OX6*UUXopMh;AjASNk5BMk*ucDbMkHIG2GG}TfeKs#<1Po_
zN*$9kcMNl*sB7y(1KYDe;!1X^FN5;^O8rGu^Qsw`u67D)Q1>SaWFC@A+>M_Pbf&^8
zuxi`-?ovZ%bw^~Q8Jm3>*%+J4c_EI5e646&yM_N^c@mFo84io9j5QYJ4M7)U%yamQ
zk~)&(dGWDSZ?{B{i%1Y5#;N*(+mH4wz^<_3#XxfOAAe=RcJtEPzZ=^hr<zg&;J^m*
z+e?Mi<)em6AdWp4EClU&Hr{8BGaYyGAjbj<PFFf4nqlQTaKEVm2DV^21Tf>_t7LKb
z^6PfJj9-!2X)0wxq=v65&XmJKTHYBq$LA%KVL=lmI#l7ieXINOqLIkP<k%De5hh}-
zcnnWWiR@Nha|CqVt;S;0gWw%Zgni1ci`A_l+G}WSz1>p)Xs8zUy$E+C6QfPZ@E`(&
zKi;l*gC@b`I>c;r@OcH#C6pcI<z>A5@ssc`=_GhnMgJla4yD+}LJqW1wW`)ugONdW
zslH7y3cCH|(t{aIE))D6R29@L_zX)k)QYfs3omc|&0R}srsWtx1Tw2cMLdB_O-{iy
z-^hqyw1O17rzUy=3<>LH&BbekmI}7LBYqc%&D{H78m#VnT19vtz20UKnd0c$ET%A4
z){K8E_>v`3#Q?D&kY&8c8?&``e>{BkgAU=VC(hFhO}85~Iv7dhHXc`OLtRDn-%XaI
zfK6IaIior$=q+?>#$fkqM=8Ns*NFLpsJ7GO_9A>WfuJL9Qqz7jyv%_O>Ze<$l<WOw
z1`2Z2i;9KH8sOgN1pI<QpO2AVN-k_-B1Fw2gN7k_+$5@jsoK((%$iX2LzojcJ|LLX
zPiI}6<poKH4;sa?Uqn=;+O#z_QG)UdFIQ3m3yaRA?A0K&dF`Xj`6a@5hkk69oU}7f
zy<vd6Y0RM@d-!d5X~6@Hb*Ts)Bb!JV9T(?Axw}2Gc2f6tSz0#1p~;%1z!$qG2{IpO
zwF`dn&RoVKThlm?Jn}x5!cUcs4fcgo2Gf^E<t5aMB}PodY>FSm0sqKKGGf*YU^0}!
z;!Qg_#gAPbXsLy6<M)MIkpOWAXHVDKqo9uLb%CvYci-40VMyv5Rd;yuQLuGS-8)};
zd=yt#rBnvJ)CRsz7N@BUIkFXU;-SW#{_C?Q1uDUEbSR^;iB}ODt|c*=8lf8`Ono5X
zHBJFUx`C4yCxlZ3l5ksO;uvyM$f!}xA-;r5hLWVQ{!x~sGzNwMyQJKNwMy~$*+H)=
zbZQ<D&>-xS*uauJ7-8lo2Us3KYyZ!XvVPOg_;h$Kc>1#-2LOM_2W%Cz_JNw(NMQM=
zFPCi<GfG}3{8wXCxZ>h#sgZ1sj=a0W&Z~$8fhoFk*ibC1T0IG|7S7)Zb)6_GSN>pE
zAWec!p#97m!fxBn6;{wI>mn<~UbABxm)S>(6f3pRuD#xQ7t}bwNyRP1GX{D5;jhcR
zqdpLwjCAs;OK!h{{U-K3VnV_pq6io=DQ>kPq=XQh(8FwN1)=mfm(3L)mU7FizD*4?
z(S?oiIaUDUetb@qd(J_-^0+tPAai*FETD<u#2*u{>xS9i(Lu)W#Wcc2MkU(JnOTX)
z!uxPPilOBpKbR!wP6@_DPhYaC;S-n5(rKEb5@VjdvHq&FtwG0Kj$f(;*!9#KB#ab<
z($nJ+*OJfwgG|U}#O5-%A0?a6?0d_)8`f|UG7DNn7!uwf!zy|SgipayQ10@(DRMX1
zO_TctSpB142CjS$0t}GeV+u;3eFDVSm0w!EJ;FNtZzoZdfYRItcBT%{OyS-mXMYWP
zfIT-h8q?Ui&WaJ>dAHf<<-13t;}@i~wYU;~i7@e))rxV6D>1~q|CC1=VHo-{1ntEq
zDIKxx#5(-BH*L-k*SEv!Y7)*AmeR{xEAuoOzl6*9b||p<9qtZREK!=&j6Tj@@@1)-
zYm)x0D_KEFe8qs#=a7P}w&0~xUN_JH#Hny-%Lsf`TzJ<Du(B>VC!+YMgvFJ_-@z`G
zmKcGfTi7>eusa1dAihXqq7DBy*DMqiNU$!W{a%!yo>l!sZ?gucinfc9FWb)Q&X71j
z`bRejH?saP(sq`;+99Fr8ijYqPL>xj^2Lp70oCY@a<1_|Pm_7Y3t@;SQbtLb3afJu
zQG{)RE@oo7x!@~=`mtDgKQer&&~O2n<=9Kl=`QO#D!kJJunQYvQuybE**P;(RmDx~
zOBuKx)9KdaOrV8kbrJthGHGxxg{si!2prx8(X2qkgn=B9zylKIFeWrQIBM(sn|?nm
z*PR<PLd5vE(5wt><dLK00`~vRRTd0m;pnjvLGSTd7ReW4xu6NcY1<zSI5s)ox(mAv
z3w#CZ0W+nfXRVnVkRN>OUSq~pTSe7lW7$FUb$^^W?fy)wt>#2<6qK8aU5POt>f23i
zQrfYe<!Ghnvs3xAoTVa&vZm8{K|nQl#@Omp*wyMAlvVuix$+G)d4nJcmB5KI<^YnG
z>lwJx6fe~zqrn*SGWoiXcmz)KXrf=ki^(MY3C{#>&~?ARG6)}ey_&g>L_h3OhKwAd
zxN|TPea^{yEz<HzF3TM!5Uu*#FqbRK(!zYb$?>yBgusg5=!qvI1do)mp?avYF5AU~
zm~CC)!XN8hMyLt~oYgeB%WFlu>K~!33+Hw@=d0^MT5XxQAYtKibClsPS;2g@H&O5v
zPAtAS`Pvv1Mc+?PSG<b?&Ju&3%$rv75Gj}A!C?hnmP$4rrZX9(Nu?cVbG@NO8*E$u
zl~VL$<q7K_5ZG+*maToCQVV6Q7M%$j4Pp6}?*6(rs~q|_=}9UfQ#GHluM;1!N4$X*
zeeOdPeh=ZwA3Y57!ZsCabw@1tL)o9An(!CD(_hn=O($rW){X>57?E%yxEaZ|qszG1
z_APWPx(#Gdwja!_>MYK}0g?dXlxUZQFYxv6^?k#D=_)d5BLAM*=jWXVbn-bf|1Tbd
zU$MiKBfUVjrt^Q|=?a})76=xW=-Wn|s(MbbS$Eyw<g25;;6<@<UU5tWQ&h_s;E&pS
zQ+1a&)K)m9e7E><tUIVciwo}_yUF}_ZWoB*_aw#joHc`CiBh>Ngc+2$m;l!O<l$^w
z@jk_z_9P~uN|j4`8-_6k*arhqL0kffe-K7tkHIJpHtq3hV17mNspg0BP1^-k5a*|I
zhhP88dn@Kl-cixG-v{;8X5dg}{GZHqXa#jb3NmVp4UQ6+7AOD<Q_(+QJW6Xt8x*6n
z11r1gqNYSZo7ZvXyis~N!4wfqEw4D=t9pDQ#C^*RVLbl+84@o?3yZu6H66unh0vn*
z$?Z1n^s=&Haw!LMioZx1u0n6iGl<#!-2T~Hsg5QsoMLfWocj*8ng|ih{l!eKx}zB0
z4eu3BAhktAfam<}rlcx(B)QjWj#E4HTx<w`@0fxVHcFVW4(2!lwDs>xv3YaKVkL+C
zj}`<SZ-X>|QkkK=ONB=4AaV&TpomE){be=ik)SQ$!xy4pM#!7D@CM)fJMc{jT%+@e
zA>84=p>@*ee()h8(E;S6uDr!QdvrL+rRe|qcj*W=J!lfPn$1$8aDA2EPa|Jh{OB}J
zG-<KxXL-7}<E=E!-z*Cb<wu?LQZ9dO5Fd+ze%vn*7CwGSa885yzP$4W19q5+W(t%I
zy+Nwm0a|=7cM8|^O59JoU?LSr!35k`00x9OMqMQ#5^93PIUvjcOyZUO=jAMpN-2pF
z`Z@ey5_Go_^=G7i`T3g5IpXtIsGha9#SbPTdC}j}J>`nz#<FINy0Z<H=!!I)+<z1e
zm%SNK>wmu}y=G<WsQ=Nsk8IFq)%8XfxA3pygEPjWeLnp>b+S9}n~1Uu1>CkTbttR&
zD2?N+dtrk5vM*-CCCH6_78Z7`ibef7?Pi=ckiE~}2b#1DrnkV8ubs5U8vvqDuQh<I
z9j{>d2l4}54|Fh9XLI~R-HSQy5Qq|V{+TV@LVc_7Da?LO`^vw|-B<akIp-2mc!PKH
zdjlMhfzPyvn(dQuRIy0{$?%H1vhfV-;wU_y)L-Sj`lWz-IBO=trQy#9^jXQZn#e_)
zj!}L?C!TV(aVq95%wu;({luvZ$tRCkwZt8q#vfo$Y$Z%Ui{vq|(Oo?KsO^*9EtHl$
z*G;g0GcSPNT?-T03<_?|gnKn68R4gPY6AwmlLLCAaV58>Ybv>!<|AK#2X~GJh~R(k
zs94i;ZfhYJIc-<=pW)zUyz_R|e?JpkQ{;ea>cQ<GFu~+J0D9SsNX%u!VR%Z9YczTP
zDTzbf!w}aYZ1^IH2WJ(l&3EQ#$=J*Qc=Lm9n(%uI=50k*6g;zHqook%>O8L2^*rim
zXmeUAKH5>`I8o9MV#1!BiX4)MACLR7`Wej2$=84Pdf<S!QRZgjsJE%ZptA+x-q9Ny
z?0c8Z^(FARqTqLA1L}dp)L<H-?w6?#KVHmlU{jj!(HFBFghi{7y6O6Rg3kX_u>@_Y
zk3wAbmf-Z%yIWQMeMsRDM@6)+I_;MY)n%SW60eU=DMA=$fB%GFiS92EKVG~cD_Kie
z;hZ;ASA=Y`$O*+Cq2c9%X#G_B#ui0_zd?FVxr3f(B>I<~MAa?a>(gr>;_^Oc_hy+z
z^lk4fcJSA&kQ-9i(x1w3r9n_u)=^-}7&2Yq$5~05>_OXYONxCHGEiO*XQ^a-n?KV+
z@Eoue&o|lU2g0oXt=EXE5rO`DkDA<(YFjby<gl$`;F6^f2^YDQzQ2EVGNrs>IQx(;
z_o48+Y{FlPtsQGCoD4_Clx|EL`6{+?`dP&h;`Bs-<($`Of281Ve}uQQM&>zZ!z)d;
zT-;qKtIPE>VOhh;B|JSVgGF2P{bGL-teJ2-3aR^L`~wOcI~!1R6whOmxL>IYONo30
z!^fXf2Lp?Z3i<CwKpfEcnXF>rpzCAKS4dRU0Y<HI&|xJ!ed_M(`5D58HkG2*fF8q{
zy^H%&RToIM-H}F8N<qu|XObCYx}StuZxFC>w9G84`*EvpU2NDn{{^{3CM*EPNWcD6
z>HWf|AXNU8^WCnM&HI(PE8o%d{Br-^eW!fSyyn{5?$O6k7{^=&sVEP+9+UE?g{#fw
zHqQzViA!ZWQspXYpr-4k%}+O$R4-~hh!fmt+Hu@Ew;?8=k_st!qAv>TSr^a923HJp
z^@fc8tzlp7X!)`f`?lLF{@?2YDLS0Y$XaOO@2pJ$Dmd4^E0PRm0>p&f+78awCb+Lh
zNB{a!@uYr6@oN`1;VV|KV4059^L)D=tLbhtNO{WS!-P!d_vd-rr>dp9j?4dN&C3#w
zA$??1)hp5e!rZNC(Z%FKqC1;WH!*%=Es9QBAA`T`*btbJ_`ip79{g!ny=xSO?37w)
zP2>?vDHT$&*~xTP##Ko)h-=NAm-o{PaU~1x^du-z3~M3gQlA3}e;n%z5IvO8|EBRP
zvSa3zfhohk=FowC=O`iYnmC)T6jA>ci2QJX-092o{@1_$qRxS@mQQk?$sJPG1;h;_
zMg1Tc#{ViM&|($W&>M<f?e%_&+z!m|ltr0&8^-v}AqB5`&M#J6AMBtxl6_DIC6YHQ
zpmH7Se^yxCuPpHEAn&xF-zdyJ!<df+E2J=YXcnn97LwSB&@=p$>R`U8-byHy`b|oo
zm1jb@!z$UU+UhM!SX$&CJzTN;##=4!FZet0-#u(-xwt}%!Z`COv6>%_-8#qc`^<hX
zQ`i?UC;1Q#bIlBsc^{mJE_5twL)Z-($3#tugWJ|}Z*-YC)aK(Y$Biby@W!c<ms#un
z2hsnUAsko)t2uY+|G@DutW&-8C}=GCDu`V}b%R89k$w*W!p=E>)jmrojhf1|ltWbY
z1^li*Y8Z1-kGG_05$p6GYBmH57Y}vrV@^SlZ;sxtnx3$)swVNcujlyLO#1mV38~rQ
zvL*`h{#rJLFQp%jBgFW;i*j?hcpaT?ygq*l$##Bn{xjMbL;jK_0dS+_f%5G?lcG;&
zeI5!k4=~VB^T`h={USD8ewIrBm_A;MFHPwBdKFxq{TGs|dftOXRXb2o$}TGW(}?|T
zlZ^y$L_6-DL{WbXU+PQJy63ZdSf|c5J3Q=5b)O<ljlUUEiGM?M2wFhv*gxMp_Y0sb
zx4i{s_2e%Rql?A~DvVr6BP6x^NHwP52E34W#ELCxt<MI84s%+}`WtnPcM^_T$O(?G
z`(MM!4H9g#a1_4~N}{;Dpwyq`@|KY`p_;Y5eN=@WQGG~2dX86FdT`!SBRzrb<~qob
z8P(Q0eJ^0d1TEf6qIdG65cXXNJDaSq^+W{8>YFb^?GwldU1sT}wZ>!767(MKB^I`6
z8(pDwr4P(Bkh=OfCA&97hA&m<|2VEICtVHHd<NUYK7A>A$-4+p{<Qd=^=&Z6AsN2E
zE}q|qD^LgzYiP3jaoHW_sNg}1k<9x61xzy0jjw2%%5-e~xa{qy3i_XG$!IApu)bTB
zff1MwY0KKFza@aUBTLnF+c9pP?DalM1VyiS=PGfBejC)kbnF&J>ExX+28&LOoy*8m
zMHm*xezmBUo4v0_=9Pfc#(0{B{u(b0$r$4}uCQ40a&*;UJM_+G?yo;wG2g2d=&ea+
z;juNepBBDahQHtP69y5nXKI9w2l7%d+Fl-@i1M{yprS-Aex`nAVYz`ZBg2N;udpsv
z(BE}g{M>9gfzdj_Y(9k<w+4!>J+2b`sD%AN6C;3TqL~Pvq9=`P<KvyV(t4<OMw;=h
zb5ti&Z{2IN8{;<N=(x%x#SwGnRHPtWUpGlQti1FMYn|qimz@gh%-1%2*oj1>Id`|b
zY0YPEcn~RZzhBs3{sb!KbHjsnzQ7y0`P+@`f=r1|M@`_deWsW&#W&_6YJkyPEwz>P
z)8G-2+<$QnmIK*2UNiD#UW}*Er0OMJLU2&)1VQr#eK2GXEFNb-$o})1E`p&1Ezj|a
zh~1c)$jE<DNU(hMDC5x+VOF=fTmqknK$Ay@rZmG!Wj^<Q_GgMx5M;k`nHAW`#E;Zn
zuHnC(v9hJz>_;e?@Z(d^4a{wsQa>b6t!|iZzkQV7xhd~m`_B=66E%MQ!t4VnmDs%a
zHypzUI8%Cz$^|JrD{0M(ph25`)8po0&v^wc=ee34&M%9W4+CrO(!q3B0f-e=xB!*d
zBBkntRITD~=nwiYwk$%UC6Y8~t6y5-Z;$8cx%r;eab_LkN^7H|6*74bGx9jB|L~hz
zA`BS%({o>@y*7FzZ_Q5A47`S)E~F2B@4K4H5E3q_{}>5#7j!`Vpl|KSBqpr+;m>an
zk?0B%<;j!;_~%HB0Kv)s#;DZF*Y<9-_9eLcl8>On30CdI0z$WvK#q{_mr+|pMWt|#
z4s^~BbXJbUQdanam0iqeHHvCsyKK}pBIfQqQ-e3<MDs(JJD>4)G#{9MWnai(hgxc&
zZ9BZXY^_A@wjch5wyXDS*E)D7oKP@*eY)J5*w7$*!WaJ@?DW%I*Ph&Vuof?<F#>i{
z*d-vNqGc5wV6901HB~Y!JK`Lila6k=jl47$wf}D+CQHVUa}xp7rNmG%(uLuEeFeAx
zyKqx<cWyX2a|C029Go$dA!<gB!@#((W7L`SlG9+7>A_y%kVcHLxPNJ?mWrXen5pol
zWNH*kr~fkAB-j!r9t^Cv(sq8iCCa9|*OQ*0yhM4<!27Dz*rb#=t+f5SUh~<)5Y;Xy
zC)F8w4x$W2XA7qIFL9nG<q;vH#Iih#S^veNTrd{b2f%iMix$gcmwxovZ#1>|jMH24
zR|bSO_B|J#NUvEI6u<R*o{$rb96CJ9);TmV!(T>9r|rJ>LgS6YSh#odU#cZ?z&c&R
zt#6BF{#$7bUUyZiuEI!&*-GkX_J4q}&~9M~$W7E$bG@OLN4zlFpBqixY3sl@uq7%`
zE4;jRNiPUo$tFWzUOLj`7QWKKrq}_ZCk>;b*I#BLqv-LWtY15m)(T#G+@e!WGEmhZ
z{ev^AK<8;x&)YRLIVL%%sGFm6&}r_!Ls%>q95^RnLugN=D3Te}H*sn@GMn+C-u44e
zm1DflM~9yur0A8PHiZL|`zt2kpq>qI!>@yWayg~aYpN6gTbbPkjg)EJ>FZtOF}w*i
zkXb9y3~2!SJ)n6DEZ4eVY95H#Skh{H6W`KxM`n=HHT*)4pZK;;mYw(DnMybP$eOLP
zP^;*{lkT&!RWPIsXxB`vO0Si$n(xp2i#LzWTvOG4aJ)HbP19FZlzk}~tlZrycw&5%
zY4-v4E=^+65VM0g5%z3P5lsqHg_K5%$I=+>2Hm}0et#6C1^dI~D?1?TMd4kbs&wD|
z#u9{U4cmu2P9JsLkh?44YXg8!Ws^=26#c3dJqnIb%w}JRg3@T_Vv7{k-caOE$L}pW
z)=;uB;`j}PhO6<_o@jGO#IC{RI5M9<3%v76jiz?|YfBWxoj{;w{d5Or26k=wU%CiN
zJb^>rl)#FdQb!q>Y^pHB8|)5e?zkEQE|h%oD!GnuHlVe{GD4(5y4iVcXFo*yHdXWF
zPx4z^qToN17W$?x&$z^w?>_DP(A0~*#;q4~uBPhslb;chV{$R~XCrSug|Br=4=C6V
z*DcGe<3^|IoGVyr(2ek0PKa_b$cD^&{cOB=73k()t=W~1?vTdv+%!VPzUS~oPx9J#
z{C4@qR@9L*mP79|QZ|)((Zh$KhnjG(WophaV0@v_F#HaTcjj*MbeCG|@E2f$>|{gm
zRmz9}vh`uy4Tor5++kki_UuE2L8qqb=(KlbUEh&{#ge~9SlJHhynRzi0L8B*lIih7
zVe=kfn?X5A)mKk`b;IggA_Os!|FqIm@E8LFBbWt{fQshqAqlW67Nn0Dj5B?e>`AVH
zU`L>62+rkeuFE(S#5$b5Qi`k~j^C6_I|88Md#vI{-d2y?LLIu-mrZE~3KS@Tu^eHA
zHI^5cY_?@NkDGAuv0};}udDPi_a)_bgrzju(0SHDGq&%Ux8JaDzrP8-zBO8)t*W;-
zB|?lr$GH_<dG9OiC|Dy}wybjgX|Ub;QPHpKnq=Neq06xk{2~oGhr2(VL&tYDd>cNN
ztD9}Z@rxFM&T*MLvK$17+c~gzY6%r&(&t1jZI)6y@8gHB4!gY8QBvkOqu?q*dB^Fy
zPESH$t^uFRIXu<VsKqIKo-4&#ulDy3^8uw}1nebZ)55yY+d)>c0hZWnJ#k(?q-kT)
z0@V}$m=^W-^i^}j(2GrL6P2?dPA^8R2A1&}!}b9@L<4Y0OG&9cv3OQln`zf$7}n#e
zq@)D?w=zo;!`P<ahW^_^YzBt1;JVqAvH0zA#R7kW`i%_Ar^#`TRY1aI^6d3PfsycK
zsVR@?)K;Y*+jFl%yl2Z|Vt9w)^w2<Ct87~BlYyxHUw6!4nlehFp(|pwj<<H+8>qM2
z2yqtsIem?O6WnRcCvntqm!TKz^_k>+a8I)EEkT8Z6ZJo6^FBkl;27-c_?fmy8x+ec
z>(Hkjma<d${Tp5qt6(=#ac><80M&OfyO;iJEcbHh!+wo!l<wDfJaM_?fTDQ0)Y
zNA18uC?D;*(yFSx7k9MI_}OI3r?Bf55odneWPb8$Z`9XkO|Hb!?^|bKb`C7x1dV_E
zaDGR0TJgT&FA_c;57cIj>#2ql^90JCIN}Y+=1(55R1FHC<SBYidZR&pBhD!$Zs*bb
z_d-fLgcgV-wV;X7!E%}*+n`-d)+q3L=ITr&@PRvUao8jVPBIRk2!hN1o&?Y$RiDSh
zuXwd(Y*y{wrJm4%6~{7UDQu2r_3(8(Gd_|2V#ZhGvViZ@P+?lFhxb<p=Z`wne0|Np
z$3hLSI*J6#H!$_AL9M2|o_-b>ViO-5w}ud=#id>IpoLQ{4bJ+jW!y974@+9QT(MP7
zAGp6ja}0Z3FalFYzvKy5!XD+8B7ynEMx{Nm#Fq;2#{)AGcfg)-6#i-WkSv_cb5>+|
zzKs!tHiZ3Jj@tFkD*{TTk?t;>)vrt#S(bX8h->zRBsJ>TsZ*LH;nS;EEq1R7^pR*0
zJ-yJ7TTy*{`Hjky8}?9Yb@GL=LXw{Q{vS<e!4}o`e*GD`yBnlKN)VJrx?38gyE|r(
zQUO7_ySqyoN$Kuxq(PebAHTonc?VpZGkf2$)@Q-mSD2D<3^5B*5vn!9MvshJo{d$n
z{r_uiHG0+Ef1}On+2DzdnwP|{>H#v#Kw!)M8lBZdO_eL5O0)QGIB~w&qOdsbmB#vy
z`=@#*nRes$5USjFlL+&7pThZ56m<2xIT_}%8X|Q+DAR(D@#^MGbu}|gNvK&_5g#tO
zc{!>Jd%FCUJxIg|H9{@^+~R(Ie{e+pwGK$+ql34gm_&59Ao~hJrnRYUtlLA(Ht)%2
zNF<ks*2z4iee97A`2PQo?IG<b>$2PdALK|5GoEg+V{5$)PAdGH`8Wq1dKNWpzKPtP
z+Fxo)PZK6UH*zVL2%-6cCIVJurOEdlveoBTxsLo8p>Kwk;11>MNI<HP_8)AwgQDWX
zP(KNub&k=h;_5VGKWMMmt`9pRSd&)Kd+c$kYk<53*vAqsD*s@=R3$u?YR+?2q`q-!
zJq4M3ok@5z)EcOV(uD$B&j20=FW1m`k?f<w%dz)o`M_$u>u;?l5I_v}e!6q<wyiR1
z|K>!j!9lV&y@Nq68;MFf{aVr5(qC22R}srt&xNC;PMg>uBDw6VxuwG^nMTrK(<VGp
zfX_M}m^#NiBoF?c7e8z`Oq}xiIGhqba?!1r+?)MIcFW<`ZFXzVtL&PO4beWaV4W+F
z7M9^0k;JnXA^EfUQh#+5v06052y{T_O(Yvfc*tw@c~2ECow8^B1LPI@vH;tgf1l*f
zj~5lEaxB1u&$4lQY9yBf8oKPvbOVh*>T>#A5tC~^sfD0vyY+hgk;p-vU+E3v`;Mxr
zlq%BpMKcUHXjsAfd*Ux;R3uvVvm-3&_3&1TS6d;T`76k9n>zA>-Djq2QmUCvj_H{h
zvkaPLI9rf@-4oQ8rW@g_9!&u@$AFJyNPxhv$SZ^+I+YRp%Mb*LFlL|d?*YayOk%NF
z{fFRLz#dTFLv1<xY4PHp$M)MlkFD@EAZ&}Hz}|Msv9Th*APAj&vVp5hGB-EJu;<{B
zkK~tNRCN#f(H+Yxml?Xk{%cNe^Qf|mRtF=VNAhR9pZ^-lve6s664FIF;*-~*J_NKG
z87*;~o>x8;IlprBI@y18!y~%|8Jna>-VR!-k<_gR49S0fde6=Ex0tqrB;+e7x(uQE
zJP0?b-8dg4o`o7fH6|}1_NqY>gQ06NSMTmjH%>V&>;?j{(Wj;AMN(e+cE49;RA>zA
zFW5oMMg`wY_rQJoQ|ezvqAoxpi&p3+G^lnUv&o~k(|4grz{ATOxbyI-r%~0+Vq7n%
zWu0`Ku)AtQ+|5ho$tszbB&4?I^A`%gS?d;}YIH1wr^Df3gSbUuJwxj~G#U7KcMKb^
zHA$5e99VeGAIuld>#Ho7kViUxgD?0u_<YUpEL5IVf0GC*h~3o(H_D_4V83{Nr6x-N
z0y)&xR*6*|&4_|^zDg-!Pz+E7hJqBkbmBm+lgWZjndtLp`Mh7|0)|s^vbI;^Red^-
zliu=R!{U8ivQzH)bfwVp*gt#W7AxHMFzeVn;*Y(y;}SHRN?(p?roS&Q$P9AR={Yg|
zW(DqT|2#Vwj^wsANWnoHfrIjg>-(M&|FMzjrTAIbLVw>Ycom?&9#957Dp;~;aYE||
zwsOwe866u=z~Ois?x+y>XQk^VK?TR0hd)?AzTs4(<5Qu)T`r#H%U>_!++|2Y+yp1U
z5a!6*-}V?2ie8vNDbR>79sWuqjWhzkz=&F{NvhMeA?`VfF#Hg(i&hl-^OV@$px<N-
z!}=}-`&Uf@3r?HEBgVIHsh1#(2;QxYOSPYa#G7l5WCrlqI?=HG;4ivh81EVNOSr1l
zDxBd=opMw*HROq#)&`{ukT6Zv46E;HRq!OTfZ3V4vQPfCBuo&LQ7I|F>2h_{@ktOQ
z5qcP;Q;{U{v{zY7RvnKJbHRY;n#7797DdZpEx}uM4KQP~Ql%MfA#NP~fd!wSVppp4
zml!4mMC7}lYjJMqDzuT4WZ}#>IbufP5#5bL?JvIMp`Gxm{a!<Kq(-?__ss|J#R6;7
zQ=-I9@YiJ3>k(UeHf|i1jsi&$1LKW^JKP8+J76FtS)oLPm{}}?P<^7Z&5s$ZoMgMX
z9vD~mq4%M)Gr!H|Uv!8UmH<}!b8r%)eM@E>xD*-c3%8RFs-tNhlV0J=L0n~~AboNv
zb5e2UBq{X{mMQV59M-~iOG7L}T`&-k+#niI!?|0;d^3TY!)(B~8HG<{;mWDu!gcUW
z+?^<ISa+UhsxT1;9!O8XUW*%%=A61g=#g33rD~)6ivhEew6fN;$kH(mm_63V6XVO&
z!vc+#6;jRlM(TsU3;l0%FU_mKhHd&yV~7^?a<1H?w1HH5qJI9#krU@^`0rNEgzRRr
z;q(%LSYcX<gL6hvDmU$`9DBZ^k$LT%IoqK>(F!fOS<Ij8NtuXg^<#eg?N_M8Y*@@O
zIfd-N*NMjm1}k*9rI)KQIHR*E_8K;}$x|9j=VZ+gbG@lLeif%qb*5vIPe9VzCA^p)
zv1YZzpV#?9WcC*N%utt{isO+uh%knW<@|2O?NQXOr}O*h+)W)(BA2z7#4*ez6sOG2
z)ZQ53AM`gIa=UBeAHMhiNv;4KQ(}ADCj8<-#MQJK-HNYqq-mFrOvJ!=zE*ht$rpJa
zkG2J21eN6U(UMIuN)#gBaYMa(&1nvZIj6lj=~22E%;`wN{a;I*Qk}u^5q<HJy5h;l
zZGodfTN7?JkoN4j07ha(@IZS=5A}Ew#h4H-PGDr{Vm+%*G_RzF-3<suivA~x5W~<_
zzWHdV@kUt+Ut3WgdTnlt*9EZjz6me9by@?^dbt*t!Q&4S)>NR`$XRZM2~ynQEc(p?
zS0vKAn!CHmLsC(6J#lNKhQ`VudNVl;IJcK?0r`LFG-^m*dcH(tkCo{{6;Oe9X1c4!
zLfUu_{g&#tnc&lyg;68a)LRVot;x>nMIp`ht8`c!NisSF1=CqcP>Vj4mWME-OtxLx
z0A-0GuhRz{DFk|yK`ZCD>i8T|C*6Q2meAUY{G831cx^cE2SAA+4EQfMq#_a9<n`o>
zKLb|~pg;dmI2*KjdPu=$&pP7IXE!&m2(Y*yi6PJs2{NsmR9bg+xbJK|rri;T#&5kr
zRD=SY2>W(HX%hN@*2!@#JytDH2A24#MA|aC<W!TWJFYCR5hcXV&5uBz-7Vo0o134#
zg{en?qS#%Fwe3;`ufsZl*aiFg7D@Bz`cR_=2eis+D9VRQ<EsLIA%V)@*S0$rWV^t(
z2Snxild(;I&(~vq-?%^!rPBfgg}kc*mS?8g!*ZZQpzA!W&@p*$)BO}RjRT6*_e!Gg
zpEpXkc{S}ePi3zCH)rv0#(2CZ7u70D+xgXxridjexC)am$iwTa8K@))EpJmMD!ODz
zfBCMW?3mV^-^cRSGT$|Avtj6@`!p)9A}dCf#Zz+OzFjgx>--AYIO`dVa6>Pq;39sL
z{u_}2USfbSd!)h>xII*+&Wr1sIkJCzS285_SO(xS=?$ysf4|O<o5MvM8Xf<vx_x19
zg&qkcB$aH4euCY`H8yOW)w*@@UmHK&C|sX78PWk!Cw_23<ITN|HE9WW+|0NL{WRv^
zwq>RNg!a(Rp=!35jfl(gZPxeYYxd(3fYnU^v@cxw&z*URz=|)<3q##qz2LzbHas#;
zs1IJ<H*plMj41d$#=e6DSJ200P0z3;<0q9^VbF(pFU;V^RO*0h+|^bOwt$kh!Z<Z8
zQLX;r&!W2_U#*<L0y6&=KWg$XMaU1I4ts5UKx;t6O+DsDFf#TO{5#=^(e^M3&@L^K
zX>Bu2cLM_>dgL@Ii>U<BcTOfX)9)hW6d<)c7l%7a=jkw19mUw+x$4Oz%?XG~wZC54
zP`{g*v@xkJ)GyYLkPBG5>^H)5mV1*Kx%aQIyw{0Q9lsg7dz?i_;hjXZumF^7fCexy
zt|O08%%*mx?tz>QyUC@LcwU$4*oR)D@*qHKDuWIrkjbJhK76UY@SQ05Z-g@43oODN
zR(1Hc^O^AB*vYx^#Y(D+9%?HqhK4SiY=4po{z~ym9hWub8y~qaBWA0!N>`Jaz}aW#
zU#g@0zd=9DCEM?VB^yxoLR#T|V#ocqvQQ;*()^a&by4{1-g&laT-fL%VDjuPfF{8|
zt!X@E;b#<OjL+QJtOdmJBsr}#Z#t+fDBGiZBosuxK{eU+5hhxqv@Ta>QUpdzsBH1<
zfE84v7%(t|ffT7fWlL8`H=>KFm03!JStZ#TSw?;h;(kPz+YPNpQVRY^uW22vAiBn+
zGm>t$ZiItPN)$Hb-~!1?aaclAjb{loO07oiiur1aP7?io3(+ze(OHufbah>!ogYu!
z7X6`yN-N^CjYYx9yA_%FHJ%pB#mbzBmA-^$e23#<(M!-D(Gm|sh*@NbNQAjWiAbC+
znE&mK_)pw0orCsSKmWvvqlZYnEztl^<P&OByiSL0SSLF+=oKbnv!_<<Ig5L;Xo6@(
z1aU4w+bb5uAh83&0l5Qp$DKRWoI<=$b9&h)t3MwoOzQ7>Bn6S+(YN%zA|;eYg-~>x
zglJ!IIaa-beicL+mb!r6HQ|-BJ;{>6&bGt-zIfx7odU5doK{*ftk_uu`xx+UrZ0fp
zUUnTY%-v)I0{r-y=2Afr{aRfFMut}*00$5HBR9hZ(oi^*EWKd$1yq9atp%aNGx9))
z+hnw6Kd^LD$2oLD!|hH}toW%+K!a`tCFbG9UI`32y45P(R$cF>B?vX6EnO`&xd1hO
z#3aQl+qq;jBHW}3x%S4+R$+Cs{TqVVPZ5t9v^l`Kz$mU0w5auz=4|?lU;fs)Y-3Gs
z7HlocS=%=Oo#FQZjl}W*hs&8`5|9Y7a#Lucgo_`~YB0nY!_T3UIxAUQu1i-k&Qd}*
zBl`;bo76bERW)7%$l8(uQh>iamEipB8Cbe?+Ucb+buIP4&C$@iHa3mqg5TZ^fUvxr
zU+I{FzZ^FAX(8OlP@s9tL3+ClHuL}dUGuO&wwJQcER^!TrM0rx{yivV8sxysXM}E4
z*dLuuQ@mG!3fk9qo#@`gTVCRQ%8E>%I0oK7%E|a~(ZDBxjA<a7$@23L2KDVyUtRsn
z&N`A8k2PR@pbTZlFu`B#2zZ~6LUp-wnmh2%R3pd!hOrQxfHy8ZMVbKz8J*w2c~2uu
zvoSp{cd{(ytL8{Y-lCaRk81x?-sRV)^?}2Y49U9kROg}PT<5-g=aHbH1tOLY#l+{W
zg{X)p9tC`n-@;l&sNNcZCy+CXhd-`&oPUXDHLbx_B{MJxYXlNv;7x(GN@qyTjMZxU
z8iH2cOR1FiA3SHFT6Wl9{p0pGeZ#s8l)Pcg81t~_0*6&t=bt-RYNet6Wc!(MPAoc&
zAKb@}UDxY+nQ@^ABbwe70|k%w!{<Q@)&0qsW3lDb#fbZ^?SJMzfLLp|)KrXB*>@3X
zwv|<xvzJ>ebk*OvZA|^Rj_~ZIb)`lBDIn0SSh!$)6t~&^+yet*(AUNU#CC6n*ZC0s
z%>@Oj%|FcI{HxLO(Ev39j$0KF=Gf2LKpvFi7098fV3AJ?`scfm12I82EUx&WvBxf0
zVCFeGE0it0ajj8|O<o?$GJQxziq0&ynt7}_hIlDsiz}wxdk3CwJd`GyU*oBZ3isDn
z2I23v^JFO=zpugjj0krNH*@p#zJ~MDk|{T|3ufT~HjwNmXw0~9DmR^%X-4LpP=_l}
z_H7qV?cXFWf-Z!LI{617z0a=*5eV<t#;MmpoQ!o3$HLQ(0xUzqq&vEd?$6Z!zEQ3S
zm`lZ75iT%&s9Ndx@NdA%1m~YAGQWsVgStA@T#JM}68ndfo_!3D3OUj$OUfCO7?aZj
zlb-;F@?dtx<%wVxInUY`qM78omfZk~^k{h)nFl@Xd+Mo@HinNCO*hU9d`DW>OA!7A
zo2`NYzBhNPgnh{MqzoBj2ANt40O<Zm;<2VD{09^>;O8QKh5B!)E>@VM=@9?Y{&@QH
zkajjQq4Y4dUgxE{mmg|Gxux^wrL&r4J~^mInTAESul$K@<~Mv~7UfAc?>bkFx=;-2
z+y|*%s}UMV69RY08KVyOt-%tH4)1SS{b9|KW~#i2p60%?@W1v?3!dQ~^%BhtRwX2Y
z8Wedg{v2F~&KMD2Y+99QUDa%e6*|DBHJHnRaiMSrXZ}<nHd?kNet8a7#S*z;h>?bD
zdQ-Pl6)z9UK;QnX8CsARYw<j9<p#v2zjZi%sR9fPMbC<mEApg69aw*N+oSq9(qIl4
z_39(PU))O$xQnH`4i8bRH?<)qqM7Y2W%;6}Vb$@5SKH~Fl_U8e$=k??rI!bNRgDPz
z0=(0&bXfdCR>TI~KL&mPK$P=rO7tzrn!?k%;>X>B!)Pu3+KegNR-lx6sK<fwJVivL
zsC@oUE*1t*b42flRHljmG`a(b=_CQrD+p%x$*md^Fq@y~<l6Hwr{nA@acU(?PRzMH
z9xnY{do;BuhyR-NS#=rToH&(3QM-69k&d?~vf^;roKH>>gQNM>=tF>XSeAfyo0Ism
z?nAoa3elHiOF{o%FU#b#vuDJ|&dXuEGKcI)#Njg>uKEfuDDcfBA#++<pmgMDY|1*;
z*CXUzu}d~^3!61O`kEegqR$#&xw9XYQMUJ@*c(`J8l^=&&d=*kJPcPZZfeC-<3b;l
z*$_a7|3<T`&9rsMV%l)UJlTKHvOk&&v%#TMf28}BDU6QnU%{;&m8Ys*h4`qXiN2@1
zX!4nB9;9gvzo&n|#mw3aY;-B%6W#3JQdiYodNsVGk=|UEkU|YT1U^t3>pz|KCwsY6
zleReR9>>~Ehd;f}3K5`!2+)3hAZFXFVJYZZypdRKTm8CQQ#~rt!O&D+m)V8{h}*8V
zPf7(zit-^tPY}4?DiZdN(vycMFiOm3aG$iKd~<WLe?5ZwIM6{q=kR!%KET<9V>fDp
z3NK*)I2buCN?f0?`W2zfzs+l<r8B+C&516<k1E+BptFtQ&6gz&`{Ds}P8tWhp@H+D
z2nuNzTv<u*wF<<lfg~$wT|Y~$!8#32GRx%#sk!(Y=y`Q!sp?jY(5rDbk9DA_rZN+0
zlec($M^{-@z}joc;<3}=I6w{X>#TR&@))M=RNdsTj*XJjz)I*u_7PFS60SMPr1FJ5
zx0uuCBpzeA+CLaHOxJnpyQ#}HP|&(^p=?qJ%(>+Fq57q{()g=Kth+%Fw<|x1G9TVn
zi%uE=%H5S?B4|q@m;H9pfCm;RZz#L;k}wU<gbLQ>`}5yqTh_8*A{>9b6Zm;=9%^nH
z_ktW(ntu$lI8Pvp4Adp+D3^9l=su$wroI`j`ONl;cqOy-1bLLnp-^u+{KodTu$oMB
zkst<?#U}5>-`bTM7h_UdBali4Kd`Id%q0GdyAD&R5dU%(gVrF}#+|qmeQdn3VQ1MB
ziZX$8!da4SJ$GG~iJi?jdXxJNgipkoN+-5o>^3V{cH9=rB6Gc#&XhEUdkd*SS%pO(
zADYUa)DcX%I@F3jubmxv&$$<$!UPda#?(cx=x#w=zkQKAL(gPMUN7Q5Q4+fdIjJ^>
z-?YMccQ8+R<(3cFC*utCY3eQyaW}X>vc<e7DIQj(JG~O{i-3vJTw8NEB#16bK4ry$
zSkDwYLv+tQ?A+`3I$<x5<@%r{Wm`S%t1{i>8=M!Fuzx@1T6zYOxLilp)Ed_Z#eH{9
zh39$)klJ<ucAEoyu@T4(T>2*b{BYzqIkQ=}%cAHY^!P7-{dz#j*9SO(1CFI`lIgfX
zgHnEN8zxYyS>C&o5pHGvj#qFWtcD3l8F^SrE0=Fm6I;vAqF5+QP&Qk|_=4PCeP@|`
zo%r+vd1EW4PcI9IjFKfHn$SQfW_uC|tNj?+Ii|K>de(&clDNDn^h3n0w5!X?Wg>9;
zwcOKKDl$|}9_>y)3IFdDC1}qaNL_VKZUe=7fpVBJnq9jGe_(Y_(0%N8wtqd25-Il4
z@_T?YcG^3|jn+pAzP<Qd-mYV%7e+RHGjnpUuGyOXcY3o&<-Gmv`R_qu$#xR~=RFr9
z8xZ;XG0ip9SKOQHfX<*JAnlERYAG%d+gP)Xwp$`TP;|9v=OsP#Hy22{PR|;YQ`p6}
zpEIhlte@$VG(cbYRU!L4x;yoNNFNn|i|)hPMiO1qrg>tTb~lB*H&UbChw({&ORLAv
zkx8)RzEoa~%;;kGn!^XQqre}c-Y1Y%Z)Tl#&Fb^z-bnt*h||bJXYEZ9L-kvKw5yZ!
z@O6+%Xka)$?+@KeLiWRT7ic0HzU>ckAJA*06r2jUx|dTj9X~3(e%eKf8Q*d@)$Wg3
z#?RAswk(r^8S~AqhG&YZL?{DOUN=Hc2}PZ8eb4%i;o^@fjK8Sg&h*AlEqi16lU|P+
zcS@*aRUE3o+=VwOGF}x$B~GbYAaRYPVsFqau;=&xkR^&X#_dt)ZR%F83(+c3r!(mk
z?ry+(+-Y>|D$Dk^c6JPfkawkpI#4uix>Yi`369(-pn%*i>QHX8!x?}Gj`<rgAJN9*
zZ>9`@08_YtTG%)LebLu2m9hb-5pYFjDoW41Nu@yJ62+nuiU%$KhjYT91sz_kdacif
z^XWbT>Wki1YxB3_DeP$_=L6J`a$U~nZXJhYk--PoW_286WqNq^4>P6DTRMr2U%LeI
zEA(TFuYWSxBA(5tj4Ik7L7Pno!qnf%8mf#q_x-7{N+pQN7d`7a&y}xFtNQS6yTGT6
zJa-LlV^oW8ue{mZc3u8zn7^oe9(DVGLuAcrln=S2EAK24O?&|NfZ$__{+-J)*1N3y
z&KlJ5BkjPs+i%8nng|DVPL;bvYuTTJ(J1%DNwbBF5d#B|xhZ7s8{kBv5=6)6T%QaT
z#d)n^sLm@j>)nQ)Sa-DtO`VT;&wagC*OPIFs|I#F%6*y`$(DTLB$${V?<=L`a6B{u
z3q!Zb@DWW@ib>2vT6-X8e!pt@7Rug1#Px;KEzKkZAtDSMp8TewBOLO(!{qyIXtvky
zpgshO)aQ`sF~Jw)P0@rI4|1SbQk;X)v$X^F1CWBEe2lvP>&Pyl!k$g+DDOyoAUmgZ
z?T2sW@IP<{H+%cLqrcWYFoY-dlycIzxpxM)ukP0uuGwYdI{kw-c?O&*N{+&PL($14
zdu|nr=`Zk2Lefa2@+?Bpb>##CsCIm)`sY)L$wmW~iI@$ds^h~g!pwqmY&zS?PkFaJ
ztVP!SgDx%BXAOQVD@#(4@$8^0BE>ek?CNYv*m#2tDAdJFm;bP+2#>p5vwlq0Z*!n7
z&hH6l7?~E#WKbb)KYK?+8Tb7pFIq%?;GN|%o8f-$T*ejAwsD+@e)6@`uV-V5T7SZy
z&9JN+`X8OJ;H1N&-{JItcl*I$e--T7T!xsi+&FrOZ)_8;pVmEs;is4^wXGb*_k;d=
zwyzf4;nA2?*{_QE#THWx?^8?V^rq>Q4Kb#a@Ayow$Ab`1<nwoWpC$jLf)#P!y#~o$
zR2&Dho>idVz@48m`>0p()wBYz1|BmzihBU=?F~u?jVgXNG`0MXO5(`gin(!I)j8I}
z&K5Hr9+>ghG$Ww=4YC+M8L-j09Qta?BUP!%KPSsjHy4Gj@q6B~!hj%2H}`d7o%TGU
zU6ZEU9%c&W9QVyy5+q1Y^%XQp<Wd`ICx>e7?aRI>h@)1l8nxQ5w%11}bKnfi&IwUw
zA5@g5{ma<A!f6$2-x<&<ilN_^@5gGIYbvnzUDYa+I7t(1(z)-#abGM{y+RiJH|x4-
zt_}C0h!*^a6IU@_`*OH)EnIyLLXYltC3`VB?K7@VP@88b4_<ZC{CpfRe^FeW2YJLJ
zk49t6Sl{3zj`)+o%W$Q3SndF8`ueZcTD~>Pg*=RF1<4EvC8tx)UDf#7j-4LG<GnrJ
z|AzjW70)cR!QjhIq`3IOc<47=WQ;c?y(u+@cHKxq<fec8F}Yc31A8yzxVE?=-#h9m
zb(acfSwfv1ApDsG(y5-HiH9;CM~bv^@@w1V3~S?ah<xU-ATreM*u&1CVBW}&V@MgO
z!k9?qT}Ak?!RIz-Lrboy|IvNHJHgA~B|#5kHBKM_6AIYYdnA7-Ij@nKIkI(jRk|>^
zP7)a#!paeq_67;y;c3ZbIw+cLet5S8cK_KOxF;_QD$%S#|GlL9`NhK_B{Wb_XhczF
zMZz&6qUsgB)COa+<*M2!b#0SaM+=pol&J)qx~Tkqi%NQ23_9%7%?bW;jOW%GmPX!c
z`$0(J_;w7cc%cx9JNF=7&7o5j^ijwG`;M+Q`5p)}n;cDhJ1wMz!fh(Cj@zJI%2uGe
zaWn1uD*1iKO9@TwkYrD#U0SL0L@SX|4oGRakrMqnH8IY7@|~>NR87ilkB`?))ldi|
zp#Qpw#NFjzb;9;5ZNJNBap`hy6dnL4R>qtlIXAzgkC4-BrvJg+fY3`hd(e`3VTp;@
zlb|6eu}V{TAxjyUFYq-b>B;!_h2^7R5tX(P`ZoXWSP*v{=zmj1y>&h=gj$Z~M%u}O
zkxM6mQ6IMnWFM;@Q<lr`2e!}L;-0a+3~18^8n^$7XEd{U@CSf0u^0uWv+`$cSISOm
z)Y(4BfG_(e?zTd1B}=PPXej8QH;q@9bqH8-4x(+9SN3%l-x#mH;z(<-nM2cOR(_k~
zW(F<xK;-Lrf|#<6IR=-4?@e$mTjq9rVTw}kS&dX^q}NGuZQ+DP*534Ks|Z|o>WxN!
z8vby6!euS$f^m;herBO}L8VJNy;2*UkRhtzkyLVtfm5t=WWMUuk%Qq??+wS5(4Az?
zawYuwW>FX$iBm&8NG2!_jdsAJPOH1uAYaD+w2xK;t~k3;9^{dtbK4W}>Iy@()`4@9
z&S^j1C?7$?^rhKcA|~+Y(SqvzqhhP_ar>-bTCUARUrxlvOSHKh7&b1N5y{P#X*hom
z-7!hHgc}jC-G#GR>L(5H#OL_Ryz%=>o~rf8JYqZ;i-&Y69X)u2g1b`O<X4^m9yj+=
zk2I#$dopzIiGG6W)s>*_om^V3#g+}(M0J_*@_AlSqYDOCA(ZC1Svn}pXZ*bL2ZP#d
ztgcHsnJTHA-3n0l{1XeVAjPmF&@dTh{UnzIdFB2appi-Q2F!<RN`e?^SVbQf$F+g!
z1g;j`K_k%yPl+7IOe~FH2~Ha&Lrw<o>)8X;=9`^q1UTlPx#};K2tIqzC~%ymIY$s!
z!xRZ&gfn{Fm%~xq;v;}1%c+k3O^n?5q9iMtygcTj-dId?75d&dxsibHD>o(Ecn__R
zi3qSBL_=pHA-Cm2?8iZXNW3i@Y~yhJ76o1w-g=NR)yNCOHE`$2>0yr@0I$5l*w~6i
zwI0^z2>&I*3XWv&r$WTv^(aOWD}y}i#<g25RZ=Y%g%T~aM0^08!*y>$U=a$-t*~Q1
z%WrvCg^_dl&}eE{`pYF});KPRZ7lI+wwyq=2BzqiD;5z~Wzb*pWw}9&vRZ?yJVt*V
z-GKFkl<?vWD>9vB1)?wNK#mgcgEQ{1!Xon@qCAz0*J8^;C`I3qH5%#>X_(A(-ZP@z
zrMj&7g}@w$LC>20g+r5WAK$DuyGR03%>;rGAQD3n0DIwTC4g)-wH8?iVA}-9Ct_xN
z%R#oA#?)h30ABj=@fvzXe|Nl<)6?k|4uHRk3_-2|+6oDJg0tw*R%||$h&AqnT<4|L
zSP9)ATeUCzC7K%;`jqhshzPgkaXV`tS;5+oYINf~Z#id3_FS%xXgERPQS2iN?__rd
z1z7A1BBXWFYltaX_e0V9@e^ZfzX-<dHCNMzjY#>JMz%EPYz7b+f1Ti{SfL3M#LTob
zC&I7mphy^zcn8>S8B-z}JEB>eXLZAFGwj)vmT9|V*;D+0yNNjUg(F}-M9~-9!3qvt
zu(`OWb4>qi0Oy61>hR3Thx+^R3xui`=9Jq6Ip=A2)!K80-k8>!*a<)#=iA0|KwlSf
zry@4qXBk^WYDcoF-BtPR9DPxGC6@3l8&?4R&!>>^@3Vu5Hwzu6LT?X8f`12yF$do;
zExaNYrv$Txz23Pz?zMk5ro{R30M_8oZUPOocf%ShsC_+C5sf8BfI!1^cp5{EO&EZH
z$o<!`YistaV?$d4zKV`zn@xY5^vr*UAl9IX14jc~R++{8zlTXC)Wr%xk_{%EcYT~6
z3_len1+#P*)o?z`pl!<{obFb>BXECWNr=)-kse<&gj}@DaO-%^C+@LHOc)FFxW<!m
zw%~{)C0AVReU6rg&KM8Fi@DXtyP{e}kM{nuWwgXdkLUsGgFA3ZTfMiXG&oH4_^H8X
zU^fO{22ibQ*gL6py#_0@RuhmPzC#+a*ibpgfEE?T61IA_<5v)8E}qH6X-H3}h=P@H
z&~i3snBtf9{?PTgtgD@uX!l*TY3i5NB)PYBsa^=oJrJ-p19jKCZIuB*I+IZ+7Y93G
z&g5?>Zm7<8VGV|$@CU@nIW$zy&9z~fP|(JEi9ZzFjI#pDBh8W(Vy9ubix1<1GY!}J
z7>ZIQAUxg)prPW6^qoDN0lXuNo*y&-(-n{_{dM{Wz>s*J!8!IR{gi<<(FPA_A+dH&
zck6DlCgH{Kb^h^kB;T=s?|QLC6{$e7jYZYAzu1w*dgY2gW<N_Yep5tCc5E5mWk_UJ
zBX|2Y@=K335<ero+9G!_D}UN}WMsTxT`0OI(n{LCQVk9r<FRvA7<c&ggDlx?^`|q=
z!a%gVzb;*i#k2IuXYA<%ePOJO%$inTq~>IarY0(~&ZPRzIeCB>={|ny@DZ))*m%rV
z?>F(F;6~bAV#zz|{QU9Hp?H)!Uqr`GVY2g;l?Q%aoJA{&Lq}DclBmlG^iafk5@@`2
zh+K$l0;=Kc<k<J-MlFmFBmIk4P}MFkDRAW@qiTeoJy|AS`oS<l@uqx4B(3nTxFZTR
zyp@-B2b{4i>Of`79XcN1NoKSVvgiO*rK9j&r8Xqj*8`21|1uk<JZ@6w=%LbaYJ7a>
zbNWsN$D%J6(SqZqpf*bD9pJ7OLZux2%JTBE4trz+5V*udqhRNMH1VIH!F)N(i1G9n
z2`s2Rp;c6r`07$QNo3--wNGPg|KnxKQ=+t5+;UAgFNKi@aa&69zKzD4$SyJUy%%<R
zg~>NzzWrw=RBLqmAE3kRoO$O?yv(U%wOuox1&$(W_z|0UA5Sc+LK<!v?mGq^bxz;&
z4Ts&4_9Dg%RqaoL;IZVK@}Hw#zXQO56D!#qlC2#&!LO-O@Wgnxj(PLjyr>&dD6acr
z9B!;Wr?oy~Z^D3U9F_>%5-{&izdCw}zWyf=DjE-YS?z=gb;mb+%Dag))LtBZ&xWw)
z09JC|v5(TOg}_@PURz4jf^FWQH6aGSGg^X1QHB$mNF!=k<xq|#ajSQA6a+;cBK+Wt
zCr+luN9kTf6mf7G6<qY_F3EA|+Iaj1bD(KFUYsq{@$qe%qZuqZON8&LUS9p_HC;re
zccZaeap+XO`+n*iwD4o3V6_pR3mbr8-Du#0=1$oks@ZTFfWYp)2W-wpwiC2%yA0Pz
zFrh=fxtFV#@&1-rsIt~V5;tdK(wbfp9zL}(zm3u9RuH{7$-as3{yDX+Ft#@GG+eDb
zEeP)FLY&7hO_<Wj+*@^^KBH^!TcR7~-UsvHpj=Z67#n+<WT#>=Umu<i%ZNilsB~#H
z3alMTFlDw6y1?%NAUz_K3K6O~;5^>{RMOwXY)#G|M?i!1UWyLRWo$FTG?p0LH`u3~
z0w2-ZFXq{~&#wMVy?*_q!Jmt8_FFOREoGBy8&6!`I8qvsY5T_SdxZiq8rEd>Pse+G
zq?2Ty4B?Wz?6SiLL-a1%NAz8TGs)%0i~Ky3<;&kJdQ^8PSf?E=OFo9em;{*&RfE5W
ziIz1b42wT_UF|9PoHWeOFdtDqUl)mL^%j$9^p(Sa9cTZwBz*OM_W{953)2+xOk-*C
z8|_2WcDH@yho_BT>?enslO#>t7&j0!0gsfnqu8KjX9lBksu6q@)4!xFr*Gw=G$%R9
zqi(56V0>ZpUgTZB<2DUS=-S{niFW#O>Yd8W$Ay&!s$>Jxxf<ijGX?Dl)yBIBUI8#C
z$Tp>wpxYSb%t_NA{_4jj;Uwg>7Ox(g-7TnH4rw&xCzwJfMrWhJ0i`?pnQmmm@$;H0
zod5lG5#(?D%Ov$PV!*@UV(YE&CWQX!=R*E?NF`<Cbid1>=eoezDU9!VzOzV9YX0?k
zS<zlO#z!k)ZYYU>Qscr=t6r`A>UHc!hqB{&jhUtFh@ye+(-0xfoY5>>0uG2p!f$s{
z&gAZpcJ(s{gJzEqLYRuf3QG}T3D1Ko(J<o5wnAEgN|@jY@yooBmF1r|+eNxCt>m|x
z#&Mw1+iQB!6z+sNfOGH+3GvLth|+RIK0<Yf%N|cfP>nM@AR;;f$MEVEQh**}02MWZ
z67+{LoKyZBdjEWeK^&2h-rl+Tn4VCMcHB2JCc0{E;`R#fAZChrPJ<bwM!GkV>q~G*
zj-PpsUfl$a>8kH@LFufCrTe&oaz8NVb$wuL9z;M4`6Xf}edGpRpF4g3cgSGD!`$Q9
z&xn)Kdt9xXyLbWs9z9ohUeJ)#8;58cBJLlda%DzvM!)PpAQG^xXwiI8*p<p5<i(hJ
zQkQm@ZI3W_Jgh$#YZDT4qEQL=<U)9r_~b;r0{g+vvazcWzNp}qamhEd6bmY)>mopS
zgZ{AcfN)2O^hZ((>P4XGBS+M9vruTVO%TQJJ+{>27IwUMnk$h_<rQ%#c90bNw73Gp
zoIX57Epg{}#BDmgu-21cd+ykT6xjb!>{nSS)bRphg>Y<NX#u&|)tK2$e6E1H+N@yt
z$2DS82!Z@Rb&4+y0LqK?0a;uA2D(j5>V@mM1BGmT(2<W@G%d4gh>?H8@bT~!4P0Dm
zL*Sgm*=&>|PN?-C{Ru?*Z_>{Ak}=;-h?3Y*iTvxk%qkKX<R(8r*)@*|)jo2z^YqcP
zbNJcHrCtdK2|ayGfWB`026pRz>>o+^1P;`g=El*o_r;l#3js&2f{<8BOp4$#Td(zM
z-@-oxcL=C=!&FYkH<a(!vR)SJ@^J(l)H|~8X|Huvjkqr*F?ecMtr}f6n_3@}{iNgI
zI{xX{;770GNt}u9Jmw2Mv=OXXhK8XGZI%+x{uELM>;__ZbaNt^c5yShqG95=O-P)|
zBYm14N?iQEwin}Px>Fc?>deFh)3cCoK+cjX(c{ihoEDrHoxcBAcyZyHsAT|?mCgnT
z6*}UF+8O|N=4*vW9B9Tr_F&#MfiiZ%!fTNHzr?S3pfI!HmK6jkmKFR2El8H3a*bdX
z+QWa5Fy(m2E!G{A=gVHAL{6#vnSp%F|JI`9!6@Q4Rak?w@b@ng#v)X|2jMa*u`H8X
z>YGu<z<oQHRoq*iOi)`|Z*X-PacPQ=2H)Stij8j7CKi%6s$?j`I-R!)w`ZT?^wh3Z
z&URheL7pY=tsyLIhSN2lad*3WANO_+<5V)IRbGCI7KuLc8g64$kItW}ph0h8Pf6}%
z=EKzbep;#;3Ntt_$SftFto4ZlwKVlt14d~t*qS&ey*fN>)Zr+`073X7IdtS9Yfyso
z>&UFXh{7l?-j*cJrC9U2{g{nQn!5vA!+sFMO0Vq)#E7(AVOF?Yny3?^Kqe|UAOU2G
zRC|b;@G+9aQ}nzuQ#DYZ$wRyE2?)=e;)hDJz6Xvr))D>=5L|nIxe*F05J9F)_-E7k
z<o`nvd_KgKRc%=@g99p{fCCVk&cL-i?R$YB5eukK`uZ2pg*yiq@Egc}doPwu9=P;n
zC$k<m3P{G5D&@G=GMg_h|8jBlh&(mO;bc9%)oiHJ+2aBYErZpIzsk%_MVO?u#BAQ)
z<9|;fx9G)FK~J6JDtn+Js5tPacER{?uoW8Vr%DfRn#_DvY)+K!aD=sFtd5hbyLQvA
z(<K@UpUGZw4vU5jNj@x&izv;Bc`fGmP{R&IAGs19c~E;r-4y}uiB<RyV1m8p&^YQ=
zqp*7Qq4Pc?=|HEmBb5Wj!-S-03buR1clG*v_<*(sVfdxsO;w|*#K(WKSwO0z?fvo7
zsGeA2tIH92h=eR$!(F1_Cu`xk9Z0-{KIdc`p*uD?diuK4Q&rBS<xLhrwSoRQQ(p?u
z9KSc52Li(oHh=~Jpg_unLKF}L4$S|<Wd_8!t;9IWZDJQ%md~gFRaXxXvKG`t4+U2L
zS+>tjNx7!)H1#8wkG&0;h**eIzS+HR+;Z}`-F<sv{YNL&buP-iKRB}c$m@8--`X-!
zg2l6Rwnp3RhxYA4V*HUFZtLg?+La)?UCQILtcU%)9-sJAz|y0Dw|+D`dR?nFmCo_|
zSTL3zoff<9NE2$9q+0!ZV7$wDNd43+I^NT6VK<kjwb#On{q%=72lkTXFP0VfclTUa
z^ZKK}pa_EvdHs9cOVNVu{&Tb1rGN<7*`yQXmI25!751~U>^5!(#rCDmn_sNU+o|*z
z6o(TZk2CN*{mQsXTlxVNvHFyYNfP_h!Gu(YMfMlOZF9fm=^HlD5~stA#6Kp6R$JPm
zs8*Aq8wh{{C?FIsgIH|@8ho{uvFt3Zl}@55{%7wIV;y1Bc)W;)v`wi5aNW?(hHLQ|
z0%Hc#*-ro;e-D&QbxGwUpUM>=c$9SB7hK#a?T||-uE5*4j{oks^>@GxDWBxxOkm$#
ziw>126CyNGMZX9oQsdqByB;N1%U=v7CMTX2#-H-F0>rvw8*I4U_T)mp=Js_aW$(&%
z-XhYs?;{m|XUmQ<hX(VjWe|%c)ErTXw1FG*ATVkX;Lsp<zYm2onou=ZQL}4f`8ZIK
z@w4_1jnJPjI-kCD$VwzZgy+_M=^x2wUj0VRwd;4u!p48mDsjin<oF-1^e`dmCgJs0
z5aBK}Oi>VwQo9-vvLb`&(Pi#lR~5JF(3SOp{Bdh6iL}VY;wyyPl`r8l4)`<!mMIFe
zcf8GJ4yhHQLf9r-|Lc1^CHM=LdZj38?6aG&gP1X_bQuj;08=J4-Ttu{ehlj-YdTN-
zt%unTb0*jBeq<L6wJm$IW_(+j&W>e69$}+)F$;KP_u4h*QRA5p(I@Ndb5*V9w)Gyr
zV7-ape0`HrFtOK)G{>)3ctyk8!KMgCFml(84RzUA(Jo6zHW5a(_xDg+FxCShR4M49
zaG{!3MI}l}<X}Gkok=McZwL+Qb<*+u{g~p|`kOd&ra-;vNIk~G;8q^d&Gva(RI4^B
z)C?z91e@$zfjlE2o|am^wwUl+_FpnZkmpXJR05D257V|ynbz8t=D3o?4+n0(Btcn!
zm?&(2{9yfQ+3LCpa>bhQhNW|2U2|UlM8{`<|FIQ&2{N@mQx02hK2LLfpc6I*lu~|P
zRTG_eI-U8wH*QnRUaJ>I?E=$3UtjTikCf$~yLIDqRhZ=;`Eg|D_EC!#yxaFy-ty{s
zM@md&Z#giP82Yk(?^z~pS67USHQkxGLdJ;iQcC%Y&MzEx^*&ZQqc=-evXCj%eyZ6F
zud59n3LHJ7y3Xc00M7l>K#zp+4i1y>4po(-?HE4ewo>scJC%q(^TT<nox*p&O<<Di
zBc(57JBfg}w{fYdKb!KC^EqOW^sD>BnEuHuCbBSl(gVNAY2>_}tWDmSo!-`kjudm^
z&1LfYeIQ%$gC}+iFP3@FicQ3ENg=lyNxSM#6s^^}8qRlt+h2zN@&g$Hp}YZ%Gf|Kv
zbEugNR9qqd8yyDf1ja=#`9|OMVUgX#0OXoW7Zw$VW*?luMK->F;=Omlrbl_;7x4Av
zr1LTUr79~kJ^cIMvd^6_VGoaok4~9X!ALiQcz1_&t_J8GtXPdo6y9rxH}lvBZ#x4*
z*{Q`Y5hf6VVtc`OTe@)(u^|4psD){f>=L+MxU&M~XM)h4M)xDN+5K4aMUB@p!=HNr
zwXRU*U?V6JPPm?ypz?OFp7>5w<1buH8)o*7V7?0A1^kC|0NAdgPn+1&MWhBcD6s#z
z0YA-QL@G>DfB*_PA%I*2LY)Y?Y?^w4E5CteKieoqcHkr;I3M=yw$!5Mn$n<CA>6o7
z#=V-ji^cg(@moGxN?{r6jiC+FKL10>_1@>Q*#ynkKH!i>@0#Z>`)1C;h-ZB}{epVj
zQ%SzrOOMsapkjx-7RRMM)~KVx^?(V*#)>I^i8ft#X_BsNrF_r}!0OoaQ5%oT5jdJ=
z%A%MOy*pbgx`nMYYK$By;a>+dY>s&943QOb*E|S@!zpffKRm@fW=-V{{AAPD6fdrM
zc}Pp~3U|DCYw&{XkZZ_(*H7KJ$nQ*j;MDGu68Jo?ZR004nE{@;<Or9djcGU1l8X@n
z7f0{zT|IBF-R|1K$#A#|qd8EJdyR#<mLddXR_ZFV!6|r0;ZA<;pnr1)|MC_i=?at|
z0v%3Ea1VdOknj&<objFGN9(TEG(B_?y~~1`te!85(B<u?h0<SHejYk%*q{J!K4eg=
z$PUmmh+Ej2_<{-S_Z&badCqRBK7%~?%Yzip42|9l;(!`r+8<j#Uo*9xeOA7xvHgy3
zdecAtM50DAu^u1#-6i0z&9-We^{*#c_7PgbV=B@wN=7y%%I+37f+el8pY_$Uy}k@w
z8xu9Me4oV?>J@mp424jT429H<%D;iGkW}q1&yl|5FW)@}L+(TZE@v8Bc*G%lK3E?|
zw?r?l&2Ll&k}MIsu8SVcsLNkZ?~ij%zK2~-$5S|yPpwPO&zjy)yBqYJJbq{UWhbn`
zZoFf4;s;z1j7oNYuIkyk{@?-v>wNRy>OvqKm{e@#1}mli-d%!O3bVm0R*SnNvMw#N
zZmP`6&>icW=()4r-tj5H)uq%cF-%PP*$y!?=J@#l;E3lnJibH?i@$YTIj1|~t5}=1
z{SW{EQ`;vG1ss|(FV>+W|ELA&pqn<(jDytSSjPLym&ddBSpX;spV@TUS1Fsv@QV1e
z3$@x|r#<}!Kb0;eyq8%qep5Hwl1|PYQ3A16t6RB^rNQK8sc^2nsZ&EOucY2v4{c<f
zskc`kYm&;D!)v1CMw;IV>}|w_J0n9Ck_E!poR{`I_67nB&9rC4#EGd)4LMk9SHDA2
zH^qOO4}orU*BJGG+GNQT-ipi60!NKrOT{**vs=Mx$N50@i|n|{5tbGB?&lrj3*e1i
zV25QE+h~J|)xA9LMP&y{ab))|z1sg?Wl>L1!^*Z<4%mJ2^q#c@1<GH7zbjaFuQUE#
zCm3F)$69&&@J!vNcr?o~^!$7u88`J8dQ9r~!@LTrXHYI>=kkY0_FND5o5Jc+h(<N3
zj&O>4?o55-SDX?;+0|0QcX_YhtOoCZxh|S~*?L;pdy}6^|8TtAIx<>R0J^D=BiG|O
zjw-YMX}QEte5HG4H5ZY0uvMG#@9$$Qa_f_Rdx9st36%_T^7YF9qjJF0bVl@`X}Si~
zd>~n5U<_aR(5Q~E#@=P=Oql6^`{8_&kf^_g&#SY_Y&=x#{a5x1K$tdLB4Z85AMW<C
zsGd~U6)D10d=NT$3Cdpu4SQ7X36EO@-9N4<%msN))<}Km!P`>Qh|#hP1tB+L2rsp5
z(Gh;mFQ&-DlSJb5qa_tv?q|@i;B92sIEN!~*PPmy4wb9JD1{p`7~!OQ9(s;GFotnP
zKR>dsz<Z4n!BVO!gx0c7{4NoadqFdo<I`EJ{yK}~z+8lL@)`DQt?zl{lr5h^AoBL&
z<8zdz4e!6xmv6kuLOWZwU$;WrjQ~by?k7v%0%2<k?)SAL%kat&yOdt=+z{^umwwH>
z?ia!>lkeiD$=hBd2O}K5xuul7A(rg6lZ0E%D<TbxyK43v63tn3zgoix<1SzMh?Kj2
z!Ee;uJ7gx0MbjMgIXj1}p>b&eU$P~J)}pm9GQ&PdwqJvM2|%;PvF82IvL|m8*Z-&K
z5R2ur^I;QfT!=pmQWgZ&M_Sg-aBy6A&z-f-?;@z$8fmT-(R`F&5n<^xlFAOTL}Q{Q
z5czTTLCNMy{H4&B@GG1HNo7J;#no1sN>pN!7`g5X7q7cXC34bGJ19B3?e6xIL#_a9
zEP0Diz*X9MsH}9p=L}tE3Av3^J-PmE79ly;TJAM7I`QKte(%KyS@2SKce$EqB~I1&
zFz~;q$1HKqHYvbDSKYkIWrlmRxGws<50SftjpC=?pW9!kWEDD%raT*>as^b@-WVeW
ztYRs60mB8z=v7bMuBZ>#c?^|n_I?^>_wZP^>x>?8)l~5u&&eD!vu<tDBR)V2Fn-c@
zAP=X;GdmCk_v1Zqz`lM`T(zhlINksxcaAK+SZxsG6^4!8=o<7UNx>hHUY}sV(1M1U
zeOZg9*Md&G`E_$ke;(qQG`oy*20E2i0rFQXm<)vjQk73Da|k*BqS09?fGozuu@)mk
zccrF395S+%T+FzFRNqh6KR-y_{#yRm3nG6u50pEf$4DT{%iS*w7wW4r$A(^;bOJor
zk~IV}cuv=2Nbb>YiOBP=nK^49>pA^8|F2{jkaYU}hYHL{&Y7pr@RIgta5-bDZKznm
zHFj>U#R6fM!e{ttDaoHdFc`|pv$;@JW)b96zKYH8>c}7_3EB<){g>6e$9tnfaqxQS
zOgkn3_sXNl=+%>ITUM$m){>&->az3~8BQqfxbQ)<xha^{+K67Niz!Q}2y!h0QHC9M
zk7q!(mf}PxjK{>h`;Z0yLcZVX1O~F`K481jb;aUL^ukT;@45riZ{*Pwe4kvj7oz>D
zdm%;cydD_;+_E@v8do5M{1#?-bjllWHD5LHw$sH&8I3vo<*iwbF6zbtKS!@Rfmay#
zd6u*B1)<7SAjt3-eLGALF~*~N;=80(6kH30d7$FhpFK?LB>!W?l>a<XqGx4A1kVy4
z=u^185@AP+=JD*^23R8=e(t<I2PFWLy?`Fdj4$ke$2FuV&njw@F_f(rMsSLZ`Tuqx
z=2Xl;m*_^6_lXwdf!76urhH>$EKN&&X-0WNL;t8B8p4^>+@(uG@k+(DJ1J%|`F#R6
z9>M2sx>LoT1|K{b%t+MxJTC(Q3HDu&yoK*P=ijRGKOf)h7qk%o1GrbYwM}pE<wnaG
zoZ{5?D5ffX{VZN~*N`kV#_PFsvj_y4`nmIWL-R5R#yEKA#f8;%emfZ6H)QJCh7osV
zI}90&a(<@Uuo8WYe7T^0?z_JC0ecyIJisV)i-fq|7+Wa3u<G6@G(rd`_U(?2s@6d1
zcwkE@@E?@ytZ{7Y1LGIF9Mo5Zezu=G`|iso3Oc);+o|d~Qf?q=cL5lbBy+IDz{j7Q
zlPAk%e^8y1CBRtNqT@&z$w@=E-@Hr-^=-wuf;Zkn#ug?vEg6pIka7-OY+j|nb{#Ex
z9kTPQr^5<N4agp1Iw&bX>^odCGJqdTJ3B%A``W7~C!L%2v^j;WFT)>LRfK)7QGPRG
z+dj_#`9Ez-!^grS@4*=7z!oau9xES+x+F)NHLV0dF#p^nWnX|u><JzY93VJ7xJ&H~
z0?j^HcHe;*DzV$5e1@ao@eaK(c@(QGmRrNMzyJL6xWnaBZn!`h9le4L#**<DEg1_6
zV6>zs``1sVkI;Us*lAZGMNoX^okeUwCcEUA#z&M;duq<LS0stfrhi-<6{B`n5pPns
zAMYw0K9<`iK|IiKciLK%QT3T`5eF!i$m)NLBp9!gHgr)C`yW0Q-dw9hhyOm$c926v
zexA0H%aQ@ql&*4FY61M?Zl9_}pX-XQMPXIZv5%rwBK=+OMrYS`GspPYz}m>juV=;+
zUlatLl!-1IW?13P+oe9CO=9#Fd5XN;5nrpo?l})ms6S?1{dCF{3Py5J_^Aiub~h1<
z2zYq*tC|pfVG_;q-g0}Fd^(wmHC>k3!$`nZ<M^W~lbcAd|K>ef-<Lyyts2dF7jBfJ
z^SI=)RzD-WdYM0P7Z_(4OlIZpttc=*zBUv2mCEw7doz9CBq1U7Ds;-}i5NpoN<kb5
zdh}R!OUUYSW=cd*l@v;t@h%05d!-5>vJF8s#~2oN3>%sN)-ORc$cW3q_YcQak8vfZ
zY!4-;>;QHqQC$hxF}8S~=*h!b>w`+m(CbeXgM)Wu)o?22DH*+-BsSeE@eoaryqLsK
zSh&<*8-z&sCvtcGIVFvcwaEQIh|eK8d?oL1t+e1V8NcKR+F1r8^&=?u>0-?%3HV2!
zKHrBeQ507T6zg_yYu1Hk?}bWa_;rVJqiJoAFU<p2gcK1{Ht4kZb}Uxd;b$6<_R7cM
zgHW8S(}b<5u<4TrJ9#mCAcj1d;=QOD>y^8g(Vveih%293=|5MDpqqR@X>x|vGia-$
zl-GXxKaS3Vp{k~f!iR3@6e&SkI;2awySr0q5V*8-x1@A;cXxLwozh+Re)s+U!kn2i
zd#_phS-}S3#($JacwS#`jOGq=d=p}F<lC-2Ah8KSqRV5H-!Rsa-5bG}4-mrnWX8+{
zSi`mk#dSE<cL-_1E@bg>A5lhX@sN9@^=nL9tRK-Yom_ubKeP7Y@1At}NoHDGup}^l
z-HmU26Ihh{qrgpy=`<4P0TaLP!#oO&Br9IMVgD&e3l*|zx@ch3ootg+Z+Q=O-cLY+
z|C785xFw5c%%3h~KUuy_|AA}sFZgN1W#D4wK+XY@2xzALr`n;oKN#f{Wh0I<_HA6*
z_D{-qI+NaQ<P$t=1Qt@PDL-qi=QQfaftAMuq^%nPN7QSMenD7za-qTr%!9rXuaNJ^
zd+2nv3OK~RZryAq@-mj%&k=B;FXsp$x{5;9xMwG0vF#}6`{TKZRBG_@V+LC6rMbBj
z!|v~RPg`l#w;<3p79G3K8R;?n?L~?FkF)i;y6#2zkoiqk6Ow!gD2r62L+;cB5iD8!
z2TYbpPVSCftA+BWg+VDXca00rMqbB=Z-<Z<#J6GyCsQR9B&j&|EZhYC!S*Kn@9jsb
z{;(Tu*tu^VD)i2VLAUYjU=?g}s9J1t8;%XA=<?kZEGqnN^w0@9y{UO~%cyfYot;nE
zucZi2+8_{ZemcUo_#DB3NGVEj^$A5?;~@JcGVDXGN9G7@89R(RzFA_3fEO9|cg5_L
zt92)8gx5L3ScG0-YJ`HXWr(aSD6q-2mLAWj<2vFo+PzJCs&&PyE*-4)_g12O24yQ1
zg=!_=4Y>crIR%#tK&E3BC-=Z|kK+0DCx$V2;9_kAz~vq4bp|I)E{#ITJ6Tl!6*D6=
z%_1_`8md)xe`z3XM;*5G1+^)DW#{&Io^pt0uw8o2Q)aFwxq0;_u{0!Y)Ml~cN1H9Z
z??+XIbGrAD-AF&eO@y9c-QQcXsuBfnN_`E}+!kyXprPDoR~s$xYIzIF1=5gh<>!4Z
zU)_AVs44Uu6J2urvatKH1{ptVE7hN??kv$Fj6B1$*W*t<8YN-s8~i=)ML|Px21H8M
z$~Cb1h{49`Tsk)VZn|iqUqXbDNw5emvng$|0?7oco|xXDQm+1OhMZ{KUNtf|H__DQ
zB!q{{e!%r%xq1<Q4!*M^Fa>|v2Z*k}%Q7S888r<bI<x2?)Cx1GSk==eB1F@tpzF=N
zht%XADxzdPHBK?{S4y6Sy&>JX?e-oG{uPk)IA=PVY&QEMGU65YB>tp~`;MfE5Suo{
z9HOapQsf(w*MgI)A#1S+P*2!wuH%2KgK<}dmpk=l!LYXt*$6CFn_~Fg^AN!nNgApy
zf27<tIEyOU?<R!=Szp?IL%(QI(IJz~7k=1qfg*@@+lc(!JeB*yo<mrC0$2<?r2oqx
zp+SL|4q1}P7k8>^8B|HTo9b?Ne}*fe1d0{2%BroHB<U^H>wftNrcc1vqbPssGc$Eo
zoKNSMTSiUip4?N~`pgmN%KV=2X~RnB^Z9E^JI?wkPS3df?4PoG7W1Jg9TW=V3Y&>m
z&?mTDK~`58m0)(Yi9fKW<PB*r9Mi)BNIUnmzsBXJ_o;rVXu5o88DKnq*s^`ia;L@L
zc{m;W+X?Uq+Sl#FU6rPv<h2B{0~RLnE9VrRh}K0AyXBAmi#W~PDmJzHUmqYxJ2@MW
zR_dF`tLo8xVT+fai{uV{sK|_v`}@jE`aH{~U>Ba)EYgrVtKq|2iILoBp+Hqik>mjI
z{RJI0!oLNZ?uN2Eau-5}f5rA&hv|Nh2gRAyzgy;4vFDI`r2zHX6PLjJuPYvDI7uP9
zOw4M~($07^O{LCl&4=f(j5V{LpSgT{$m1FXA@Y*^Dccq~Gl6GU?@BuZLui`yv1A)E
zldX4DaVjhT<ih(;6re1qS-VQZrr`nq68t>Yc-+n#sgBXYWPuTw%Q-ogt4Bg6h)fW^
z;>>f((A@OLOSE<Au)G(0*aY72g1L!W7mw-?$tKw1nLX0fsD5@>3r;HBPlrUmyPxJ_
z=7~OkS!Q1tz)u@Bl-Ch8UX#kQR*s_l_`~c#vPA!MCuKtLd}rvQ&}-9t+La@SBd{Ix
zd+%cNUMsDL`PdZH`8oJJOA~uXOM}Aoq=7K75t}A9g&s~>6v%^J`D5o6se!Wp)^!i*
z7GU^z;6;Yf3$G=RM)gZ+3H+@?`=v38JoxeGzMHrnET<pq?tUHv%83dj3<fLZGhVik
zA3E9#eH~WH{5?6DNbhQ#OQa`z<t=N0Fnllc=9)|1A=z+eO|De@QKG!<;#AjUEZxmp
z<arr%J7E1}5L_^&OTwy-ARW<rbnaNA5iQ6(<(j6LNmC>e5STAl;r-ozS*~7R-%JkQ
zkJeMO`tEI73ITWr4ZG<ix%S*_RIMq*uPW@jlR}4>oof>#d(G6aRYeLg^aov5539Zd
zG+9on{qD-wenOE7wb}Do_woiXHLyC&NkfJDE#D}+pT8N&uzZhk=#}$3U8-c2yWN+<
z)t2w;OYm1xOZ(o(MEog9iW6JP4VI@dSaIEI>gxKzynE63;QPXEEviy&nTYd*ggIZA
zE+hZtr@=kt2j>^Vo8fca&t>DzywTy4=j<zUsiJKm#4aY8Izjpd=aPN*>$j`x;}y7K
z%a-R^OUDS`X3XmOKVSsT)XEbB%cdQXm#8h!&^~2k5#s={EfacVBhc<A-^2E9l9%MZ
zICeAz88F4ORXGT6$V{f&dh;)G;ESa)mIUE(CB@OjzYP>k@RJWC;F~G@9>49^FQpjR
zh2yK6<EuZukBY5wcS5=U3;>$`==X4)R}YwnI<9<c&?92B9B%mR1PU(j2i+Gd#kdF!
zfrk_P(IO~CM(<fpL`zc+&FSiR7f=TG(1pmP@R_u)`NM<0MWEd%T2(5U)xI?f&C^Fd
zxj&LTeZ=2~(aSSLsx*);{3gxY6dEK24+%=ZQ8b2Mg!e~=5ed76I%fLWh?JzB=rb|%
z8IYX`6#bs|Qwfhf&e3{N7hHe}5z~!O&MycJ>QK<BpC?B=+*aqqA2W5B7M~|}VaV$Z
zCA=}by8vwaw|6_nGkBjwX6WSbU8<6ZOe|d`EPgL0wzMsI!<UB6SV&Y7TN3!6zF$Ht
z&5SeKx6%5vRZa8J+65-dzEx%weIhCGbfrbP=VUKEBk53FvZSi9SpBw0ox|;*Fwi%#
zpQy-@tI9M-yz-9Ax6mSbWS~ms(Bg#(_<}6^GKpY3-XcnLx^S!#&j8nuB8Ufg5^`Aq
zLJkNpnX&9L&&Bx6_j1)n9Woc4)!R~N)6?_rm+%y_=WjGF!pH)4LZ~n7zpRGd3pRao
zU_uGI=>KM~jw4YGH1L=-a+SRMAbZZ!zFMRjZ~^MMT}Tvz%tLN^550>OPwT)BP{!)~
zXd;~NQ*2J|#p|6F{KZ9y@&I-WeR_`-yo(|JwLPoBqV|y9YTF(L&(mudc461&4>)Fw
zsQQ!lYP<I9{;NDWQiY)h;^F<ZD>KG|Y(&c=+XRr8S0pJoh+QEE6e}pHi~&bvLH%@;
zAKTZ;nyneWF#j>aN&F$rAgc}57z5rTN#!HHp)4JYq=UxPQEaLwyz6Tp8cT^KnkIQb
z`yrkEk=%qX>WmEs^CAh~KBJrfLfvu#JI_A3Wt}Bu10`K^OQl0Sa@83)i*&lE$nLc3
zpB2q3V{aj<V?kJLMu?}H9&^tIXE?cLq}0N+Jbrje?JC<s&Vf)NS+%?yN*6i1eD)j1
z3A%%kIqtKl=NwJ|rmS|<w-4Z&&0Riz!OwFZ#CQgW9)Wk{TR5N^p`Ge?*fVwBYyp3m
zh8<4YD;ksCgxTLf4<JaGpV$659l9WHfF1150A@-_!n_QanRnqld6gqufvOZ<@KFpX
zfs{_r6wu!292J-?&r8%WRDJf^Qkg3o{6em86;Hn<zKXQGdyD<hbvO0yZLRvQz(0#D
zYRTqJE5xAqZMGWnj0m0NBg+SQ;y&U1HWqr%OO3ckdHvHSL*U-0&sVWFF>O3ECH{Q|
zJid4MT3UR<rF`;?-n*Fg-@i3oG;Orp!xR*0)H?g%rO2>ftDRLcK$Petvi>S$s9HVJ
z5k5BB)vAvT6gki?X2j^RkMNbQ_0G)QTloalA78o`#S|iiAW{OGJFMKUsuY9-b!Pl4
zu|p{@^ahkwgyx*>8fIZX=M#KPv?y-kjU~%O-B~ga3~JGYXe8GC5-~8Er*h@8Ci7Q;
zVdWS7{1m?Ezjkr&&F|;vaaQqk&r>r!`wG87WYuA0%YasNn37fJ>UMNXQD6wv%>2`U
z0YrY<0he*f1XZh9e1eX1s;Nth+R4BPrN2Wt5wOVug5sU{MIXD}&m_O|&`b^~q_-Xm
zDOQ0wrPmIKZekzbh;i;5iq3E;i*j!H%wLs!lGKD?R4!MxO4Hf>6>8h;WtBwpunU88
zzddf3^WR>8g*h4K`EUL;@VlKtdVl|gZ0_c<_q+&`Cr9OrVKR(d)>G4n;E~{7P*@3j
zvFa8v)l3xi{f@!cAX2WFS;tDlK7dCRm=TlO4*=gX9VyiuU=Kmv1!}B>QmaxzZ?|f}
zFvtw~(PF|C5Y`m-CMb8)?#4cN$=EogV$nu$0UuzDTkN7lF`iQlQT5+-$D*sT)^BqS
z&s!vE)I+ZcT~W+Lnj|z8bQ2wAn;6$0t{jgp)U7ERdU#HIlQ*geuymf1Fik1-*Rzux
z#H7jDZ839W6kxW4ovSF*eK$j6slzukr2O8X+;Z7QEmiQeth`XHovF3Nv<1PHWqU4*
zEbOGt?}x$+bn$J!pIv2Ty`w5U1-<!|!HOiqu&slsZa6jGy<Vq!Ku4M();`OGI*;Ua
zA;O=)L4(!=8|Rb*PSoRI=#Sk>bVZmT@y*7o`?8pLHeI$vgvx5LSmxE?J*Vmlmd4US
z9Gqe{(32yt$1etXc{s?&0mg7n7D{DKK7`&i=|M2x`5s5oN`Bp=b$tts5|BC+9QL8y
zQs|G{IQiKp#mNFp$pzjG1l0{{g}Dz#sA)6P6wIs2orl1F)Bpe$bp*?(M4r9|Ll*am
z<eIFh9Mh|h{*Ymi)V8qChY-l+d>%SQm-uHfWd9F~KQ20vbWn$(#5X#-5DX&2dZrm0
zqyIhw7N?FI!I;XU61Sz)h$Ar;_6{WPOl(sw80dimYFWRa7%Sj@T1tDpYrUgxI@G1y
z$fk7=8a|Lq``eB@=ul}f<Y<E|=S*7QI~QSw(<nJf_bFx-^}KNJJUnA!s+Ad=t2+K=
z6H{%lrtxg(cOt*Tzf_y`y1jI_HU33I;D-*;x6cQ;DSQ7={S|+PbHW=p`v6XzzKMRf
zX4DzJ5i!KH#O8msaNAR8F;E_GjzUjCBbU?ldsD*WeV|t%_-O!j5hls29!y^1UQ}20
zrqCm^|D!i$l$HV+qO3e2&Py}~3x4cIeq({O!SyOS1X>XypNchu$w|djdsw<}seNyR
zeZ|)KrIC?Y0IuQs*h3?xx&XQwbTbX79oCml1V@+tuU~9pnWF=o8ji;y->+QPEYRn!
zSvMUvqyR|J0vg2|<+cMabfV+Tiw-VK!1l682m}^DFApTO!+JII_`q5%E4bpuWr~Z4
zB?b%;;;}JOt|ByO$&x@o(0y>ZQ~rysY4~f11M@^-(x(&Ohgsh#+;ny{DwYxgGX2k~
zi1WIkttUD;EV~8>28Kg%M4Ep}ymDBYeA`hvaN-I3=F`?QCu7Z)f962L(%rT6t5n_E
zPS0-Ugnuy4-$EA|w^A2dBpERyyi=_>U#$A=arf%NduLbXDUC>T;-9cNquy!^Y!~?Z
z^Ly3RZtLTW!R`XAnfsD0+gL(7L)+w93AyD&>sOM502}GC&H~pohnb(LUh3a&8OXAV
z%qt%@pFx+%GB8HZyR>wv2<Z4QaUk<-&!niAoeje<d7XiQjRT<~u?E}>Ml9`|_9%w1
zW{%jj2BIQ{%=U`S&Bs4M#&6=G5K)w0Z0I@Q1S&X#E}t*@A*v@UA|sr^AUYWP>dohF
z5;ue%+av?!8WUV~ivJdG#*`8X5Ch^iaU4I7zyNYIhL0*7A8>#JYJaY?d%s`d?ehM~
zWf)cIN)Eo;RKZfrrv>Hrjc&!~V%g`*{gxuFe0DVBjFgF-bipw4i>28Ci*Qcw66F74
z?7a98wvR|ZY2TY+H6|A;6l~ZCn~y)AX^2O8u<h^7csDMT&YOIqetDVlsE+BoI{W8X
zCcu+)H!dJ>j@HLj`~58E>nRB5)k+){MH<R>y5T*Gtp%4&+aGY(Mk)tIr{0vAQTOs4
z;Sus3d}nfkG*b@XdYqWsXq@^sgN1#bFF?LQ;7gDYH;IR29;g;K33v>$>y(z4EJKkc
zMElOGV!JE5V&fp1@>d<*DjjgFt@JQ)$jN~cO66Zllip#UkK+AMGed`#_r<LRUjXgs
zyvv9t`1E5a*+YQD)fp4HQj$u9-C3)6v4j!Oq+oO0pnq@SDU(+@=Mpy}CNmBsC}PKB
z!Loj4qJtSRahB&yvTUdHPXU*D#gi1&Pf9W2hGhnogV}?q{x)x>Zt|2OdhWD_u*3Z@
ziXm=VtC-q##-!|LNRVN6xBr-}cg#C6Z0=L}^`p)X$GrWZqsD*+XkWcFX26es+`FZr
zj1XWFXdxrWdDH(}Ixm{H-t2K?2YRA>dp*oCe|VBlStKb3l~BiRysaXGGbVGm<{_-(
zu-#4V+Dw^)Bf6Oz-c(`=<+wD<e=Iuqb#i%cbf?CU?o2&*FVYj5J&F|repZ`y8r?lB
zVk&mu@DCq*_}P6NfUrpV(2iKKqb`ZUVAY%o?6b#Ln8RBT&l4N5pOp2%rAUt>619&h
zqJ~Y{v~F_(!3CyK;`f2FwAxxf9AfW2M)%**OOZjur+SST;KE3)pKODrSd?)B#p>2O
z$Ta}h_mZ0G25Qr%js-f=(Atm7cFcwFv;K$%umuD3MMA~aV?Ho^Z)E8Q>%KrFe|EO4
z)@PZN`2FKdB2*p(1556u+rC;^4z&4<?vI8HWfAXM&==U~r=mn{T^^|w%PDK#Z+B#G
zP?Eb<A=YqNsG6HyGuPRFb7=p|M8bxuIT1*)a8#tP>F|qup+TnMmz;X>-5i4<aCes>
zP^nhjFF_9K_-X`MVmX?kRe5Z(3)t_cnFleQ@k}K22+N^BT96^wTaMJz0cqPH<v|$C
zubw<(Y6emp-oJ;_H!}#n{^%aszqxt#XVHyOj<q}LjcW2hd_F}J(`zRbJG%Wsap^eu
z3cB8tS!3Mx#z0V;u2IBHRVLnQJ*55#CoOH>hAbNpSV-06DpnujUWRQv*eTglwe62L
zA5ATgi7Rgf(`l?0dFtvh3I<KwDupBQ*=e;+&`AvL*xWxF#e4#5X@NWefx87YN+(?B
z-zr>q$ypisM}ZLC|28vqWqCy~6!vE5O>e(W3Q%|g{tKH3GJqPXyyq_{3|t++FdDeC
z2w5F@kBaLW6+$<p5YFwSQLwm&Jubc5wBpjrwhil2m!7RAtjeoxX`8Nd-QXk`Qj$4*
z0KTUAPTDVj18gu3PYTlQGE4{1_B9{XW1N@yq3@$8E~Ke*nCca45@P&dyrSmb{n+iG
z)I`xdFZ<sXulkB3qHi=@>UQEfbG(_eiP8tvKkybL3Di7wYyL1|E?i)p*wC?oA2TQG
zEEu&R>GM{2uN+$HHR(?R@q}xG)!1z1S_ezLXp~nz)BVg!3-?KoqDwe2njqJG>(aM>
zi2;6>wpts$e@0W@#P_4#^f76GstXwtatuoPEiFUuh-`0Fu_{^_GT;ewbjvP^xC|a|
zo6l@T_h%Y@)DCN2e>^TAMb@ujSoFwZ1^MY=dty{#D>deU<Yo72cO^{z_4Q)(KwKQB
z2kNW};b=ssK;B+pErW7bUa!YlBI8^6tPKO88y!Jafxu9fjl+DwI*;0T#MtY6qqPP^
z6dDId5d!S#?&3~5yDv`MCkc3bN{m}&qb0;m#K%?bAx>Z2Ilz2B`Qfy4-(5ns6yG_e
z;45+71GDo)@U3D+jU#a;9u~cuUOMGh)T+|swyjSo!xQ`7bq@#Wssu~@es0WiX41Q3
zxeK1S+M(I8hga!8-1|Bnkv`W+toCL_SEg$=?wA?w2#U(|yew*5EF9;~oDx>gKYZ3N
zto(sCsgUT^%4;k+<V|k&gp|0&(hk0noI=R0y?18|Cxgdha;x=n_nE&|k;!E<owMz%
zd5ztavf-&F3||fc{Y+G?3fzjvy4XLdMr?Z=%H;DELZN|Z(!c!Vf?E#q{s+k~Ou+kw
z&1%T>M(P5?bl@`AK}8j5I`MsF3OSp&x@dPTe6Wg|*dukX009(%|9jQ`r{(WJs+(B}
z;?6_(>Afp&GNfd`IjbRv^X;WDIy)U)T9Qj5gRQ&yB~TJw(h_R+#C7ro&@8TkCPQ6b
zr?AuPJ2ZR&gdVrxBn8Hk8MUst-1yV5R${S$tj~Pk7X;N1=Gg;Q(Q=+h(aNIk*%W4|
zg0uqbpwnYMnb9Kh;lvSU2^D6euvHeH3V$^1PVFvUSg_Z(rqa!%c6tlf>&U0@8rjp#
zZU|H_<bQ@Y%~5)0ij<5zdE0q7Nn^ippo4V{7$b~O^OGiPOcHwic-lbK8-Y1K=~wv}
z_OwutxrvrLFG>r&I)7al6)yj6!S*2P(I$<-p<B528*=+I{OVNXtjR%s>cj-;eFN2$
zu&-WJURLL(CM_Z<i0b5%_Txr|l;$3|{%-6-XtGPfBEgicbrvPswZ8Iw=nU|Cc6@+;
z6iCp8Jm}p|dt?4Pd{!)<1$lT&8u5VyXkapFzXI1ngUbFT-{6I{sXN-}A_jx2NOMMI
zfiX`FgtfrSIWE<;@Klm*VYA`KFFC_l@%C?rea*6T0clE+g3*_mLoN$f!e#l}YRR^?
z5+PcgO-eaJ(GfO*3;=ZCql&FxnjyVT*>#Q9ZK?@zt}TfUA&lCe0y^`a>WK&5pGfD}
zZeQw_*oZs;L~ukL%H^Fo;v;Uq;KvQJ7K>Qy5R~1sz5fE`(<P)Mx0f>$xp1(7+6ENF
z)OmrIyzp6Mk^|b9FBwg9t?3|_iG~RAfTtx?gl2>D)To-t122@-VqF2`*LmEBsFj#z
zi@`aBA@;BH6w68tbqj8~m?rT$gAter&^9ER<_vlSu=U@%uX6lkAdmG*=;QC}dN4%=
z^_)ru&V~y>^_d$^-0u+}tCsh$X_K<Y-T%Nj06inq0jy<u#hm7xvno-JUOA4^)YwAD
z$x36fzwz4;Nz+%gui}Us-q1GeuOHHAVKASs-^i!)_1HT<&K1{F2n+@EspahNA8I#<
z5!3HG;RCp^B`Zb(E3aR5igm_nrwC<zjzjICD~?1R4z*>X;k{_d#@~1qiHAwdFjb<b
z(OAZVxX;MFUWL#7JMukc69FnDe-bGp&+U*)uj9GGZ-ax8gv5o5qH(q0*J()R>CCC(
zOa6bS;ub*y88DQ8rBp<q1V;2rxtLVuV95X|D%}%iPd7~&b$}G0fn^7?*aQ~(PMydv
zWW#c))hrkFuLe?q6YVS$SSZ+goScKv=I_KBu(Lm?8FR3Ie60hk1%Kxu&D+c1Lo+j+
zh&CR+w}&Geyq9?L8#FLYuxOoK{oBt-mq6m|4Eiiy=0`goCnz4>Z8q7!Un#p3(fv0H
zEUgN6f&#<;K|-vq+sWsY5Q<!TNWjlTWo>w!NjjZp0t?`kgQSfSB8Bk>fqkNv-XH3P
zT(AaA75+Iw9P<0zw37t2K6%jwZdiY^$apJ49a;?)w8h6PSg8?4?tLVMJr%qy{HIZL
zEGn<hJLl+<f|WD?4{x8_LKY#>?t8nWe!z6)P#^^jYUz)sntGo!Y@!v~fZ+6mr6MQk
zub|D!CF7=D`g{4SGlbkTWdow1@5h`Ov2VM>S}FRQ3^G4E0J-$k9E76N)x}CUg(x{x
zV8@V)$&vvRVe<-swwP3{qNz<y3cJRyrV+CzImPdAw0tgmk)AL#DhbMfsI@9qr_1YN
zQea!V0@gS$4%8b`mYR+6tAsXaipP=>qFUQQR5ahhvaXEi7+!AKOwF*0I+i<ltWZy3
z#P${KY2}{Y93GUvZ#LayRbTQw3u;S8id$o``1Zv&A!M?QRz-zCJ5>L<Nq(ZvX1ZXK
z!-70;EB5`+DVcGx8;-unReQ`>OXjQ>i@7+8(6OpSF`UGg)xTNVIE&7=rp2_)%R{$>
zG$Pl4^b**lDZU>ziABuHKo(3-MD3JCOI2DS>R7l9Pq-R8=VCTC{I4jW^IRx-c-XQP
zu*PsA5*B`PB}xViydJzLPTMg5bZV)7t=1s(@b0+X7xr_;!~2qqBw^FzHK2!<NXlx}
zkcv{2L1g?XWtaYQqvOMUCe_|2%!TiaNk#73u_Z*2=$3nZf~nv5enRYmW>k71X<BK;
z`B(K{{GsvP2`DgFcSVYS7JFCLLLhfDf4-R0O39#LS<XKOLg#IsHOLw@wuzu%Uz|u4
z7%ZW9cr01};vBz>#ouA0ah#@OM(nt@VaevS8orK}Jbc(U!SV!-wG&c}emlIsvRdAy
zg)6B47T=Cg(J7Ji`$v`N8={aRXur-{n+>y3$Zbjlun+$EV6;QK@z`SObiZ0l>?D(r
zT$RijQ)5K@E9v}<J9W5-62V&_)=1O_wy(*l)(o5fMDdhbz>IE{qQfj3hUk3J4So4z
zut+kpGK1^MEicL_*bpk;ufi*sifA6FknY}pXKHzIiAszeRfo#=lr(RV_`q*5iW@yy
z4On$#4_Q@J?o=<sgAM%zsrc=mDY1%6G4^>3vcbB&SRvlmBCD=>sG&7fm~8q0IgcZ&
z!RyPHEJ%>!4R0CzAN1w7w>I#485@bv((x581DwzQ1!A8EwY5K@Ha+ppX|EsSSxl<n
zl=Q?E3sABr1%^#EB0V~X;xSm}Pp^nwa9Ag~V-nQ`Du?pKgoJWR$pV#8*zN=tvz&&!
z)2@X3016(MR7y%dCdgxq^Rd-brcSOck{F;KDrUi@4hx9FDATr_H3*-SZ<9#VcDEHr
zv6S~XAh4Xbx*Srs*3U?h`wn1%bA0btzoi?-4k9|TU6NeR8;}C*f&@IYb?8H=3rY8?
zU0vA>b*@2^-TNcz+Bt^#Q4@cDE^O|Z*{>0RB<l`Z$}x^2O#tx-l=XC0>8hcu^>Nr1
z<umXAqJJCh(4A^8KEN#2q4hIiSv}&;AvbWYF?MQ#47HSN1h%6yZju^<Mr>Kg!#neK
zEw&nAI|m0)jj`k~9@c*9Ym}wxj?UZ$5mdbWqP!;$6cTD|z#Tgty!knSXaHNXQKz+H
zSw+7W0;w5Q*s^}A3S~35(^(sC(khnzjJhsx-k@|t8jKbC9Wo7$Ki>3s*$|9?H;cgk
z^RV0IR0+}M;QK(z=j$yAdK}yxJrnkZwU+7eUs9FyrhO$0lL=A!-dmgrp*(FOld#aC
z8Ypt2q~GR!Hwn)9UW+#?=;wU!!xTe8AO3EmTKzysDC1Fha{e1@2=Gm|gzVm??^eY<
z5FH=-DIR$ygY|(08=C**UplKQmR8`Jt))`rN1+#hLjxDaYUuqB>bxA?ly_x69VMnt
zM|90!Y9CY*KK40NZjT)ql5A~~DZvlhYDh^QSDPhBVUzmAgJ2gTE|WOA_p5#)9u;nm
z>A3_Ei?jBInTkg;y#gkqp&A3isoxXZ+)_v2uK3h7_eslR<|v<<jj3&AqieRzz4v-M
z9`!PV!nPW|yc?P&MJG}`EIM)v88b30;`!TV;~Annxl^ymhJ008L0)X`mub33d3zWI
z7H@i9cLx0lgqlZ^`r{+(X)^kR7&@jtQ~#55`tIg<@b@6WHmHJzDP>P~m?oKdq2u%J
z{UC(FbzduMnv5&Zl)%T=#l^*_uYK}EE=>mi2PW?;p4i!oxAR&iAC?0)M590&Artyv
zA0>7=np;#uUZ^T)$6K~(vHy_R+rztbiq%6f>&;4`W_C+TJ8M%H%Skbfn(WKv{7}fS
z0mJE$UuA~-?ojj=O78YtT$Us;!~gX~cw)5erAG|4>$|B5t=#{vY}xd}>vy*Jz`v?s
zNC%V{0u7vNL<OY#k;CNv73H)UGHR{n(4*Dj3*h#P)KNGP<!W#*TxF$H<my$hS8S)Y
zVlb94dNwdgB3zlW<=iJD*{{`A$o_;qh}^vy>`aSd(0TfBYtHSVZl_>(R{FEba)J6_
zUN?8)Z0R*_-b$+S<p#UWglQ9QD-S!^!)^0#HGt`G$Do10=eEW|IS*L|U-!q|vcRdE
zU1>!=#O=I@--A9|ytItnSJy%)NsYt2U^rWCYo1TndenfGJyoiCu2G$8`HJoWt^t2D
zhmU`xwpYEGf2o<`ri`DysR3`2Iatf&<wryp7fz6OvK=u{##8BOcj%9^p9@&?<KPeW
z&u@PP6|MXnpeh?-T_Wz)SJD)Luv`A{{usw0{*Ui0Xhg70G5&DyH&#Ba2n(5gX88M4
z`IlBx!6FJF`{Q47VUow3iF})6ElFhmw<^j7*0dN|z%i;g=m5XOrEQUJoAjM7z5Q=g
z;0Vp=5jwW#39YAm*vkM4=y(F~r`2Wn8YO8^+FB;6=eWI$LaB%kObaLB(v`I&Ul`D0
zU`Iw{_jF6jw&clARh5RI<(pxxcpdfCiRXib`>qP>(fU{r3OUfuL2Zn}>zvJ(vXL0G
z&OS-d?`9NUjQZ1-e@6jLcCx;-gJ8r*5f3S*v!4Ht&0E3utg!uMcBLPb@AV=E%YWFr
zdgHl1U2m#9cFQ+AUYPT!)XJ(;U4H?7H|mA>QI|%1hWv^*(#;d5e$G$2Bd4|u*rrf!
z_GFvI+pz4iF)*B5Q|*_<9E&7OL7PYYS#(O-4&JTDa_|>aUMkQ~fnlF1&cV2S(;{qG
zbo2a~y|n*~8FKFDg5>JmaJ!$HU6800{YpgxKS16AY+;K~;)!6hl_lXhb?b&%^t=3?
ztNq6PD@dZE$e-fX>1!sj=c)4cHT`+hE4vi`>ieA?9pkHR;bZG95Y1gv?NHib_K(j#
zwlw~;*0|091w7!o-50{-ly^;%{@rbj{Qo+qk=Q_f4-EntFr(RdGsVBXuaM3JSL%-T
z1z4Dr=US4E5H<J0Xn!Q<d1}>GbJblNtu~v3Avh~g!ud@r*FPz}VO{pVYOR5yK}aj)
zJ7xb$1L3`+_}z+JREdq$Ykk;fZFW>Ql3xZp?WFmftN2Xf4dG=1xVL9vox)cUXfN`%
zxw4!V5i$4&e_J@}ekyRMfVyEjLSX^Vdu4w`%wK=JUkH46h2Ij+9hfr{8ZLE53C`&T
zT%yjFojpfSre(Gc@m{J&VHO$4k`cxADObeXD!x%nw_pD=salG$xGAesRG}I8<)SYv
zra-1w9mz+~-*<xu4@u)h&BLfyLFpDHgmh^k^8PERf-Lphu{69x5G$6FlSX~C0zFeZ
zIDR&z64EKi1aJ_*c!(mDQh<e}X|yQArQU?j?y<c{*|+PdDa{rF%93~<9ltNpgH~1S
zSBZo}gW6-v;Np*AkYv{7gnzj1F_N=but1yhzN|Ge*}^0L_nlztG=J_&6F}kvZ?#y~
zdSw77aYzb`Jd53mCgy*&7AmJ+7^K_l#<I#6MU<L`pg(eI$=M;qlx951_EHV5eu9_5
z!Po?L{xq1bU9pXuq3+Ofh-xPt3!mATxZ5IBKT8VNpGc^Xwe5Muo;QU^(-lEYv|CZr
zLezXy+(&ZJAmdk!EGqj1&!+2O)F_;rF4g1Vr{ZM~Z%=cxCk?btt-3tm<%;@f;aK#j
zksKs1{Cvg5K&iI3%e~l`W!mKDwu=-?vhXY%i2~skn}A4;B8Z50WviXpC%<XL>$Y%c
z=w;Vw!N%8r+jom9dYxooWAb#kg21jZ8XH7^d>CGQPRUlmJLd&Ko{;2)-*mKKNj2-L
zAX}p^lo}Kir=Ci$9-c>gqXFQ%Zf`12m<csSz+O)F9T7eZ{yk@!S9)>Lu4rgFelN*b
zC{ZZad+xdIzFmQ2)ddNz&pA}O`)yLhkRW@igG5>-M4h8_tL!{YtxMPz&nr`F_QBV7
zeY*wke_3Q{P#H-fIQ1I(BMMBz@+a*M%t(QjO(^SOchK(T`Zt$tzcLI(=c`)Jq9v@~
z-@0&JAWL}JPz{d5BJ&PxNj^v5dFD2C7uHeKn5lKDk~(eAy}X5JM&u7aWbfogrC#6N
z1+^=hgx-r;OsK>gHxA7o_8$AIAuIpN(`8+MKI0Q4o%1TL%K1^qhi5D-g8;R}NlM>8
zL_57DK*Ja`Cmh6*3|=Gk1g$JYN<wj_M13HV&KG~B3*Ss(MdI;x$?@RWra;oIu<E8w
zDl(1ZR+Gg{apo>l>|Eu$hqkK;-uSuUkrjF#QGL8Yr?yPB2KB-riG$OR6Q&kHM^Bdz
z5Vs>47);8_e&<+j^#C-{g8A*=p!;cVaa9rIx0+{hY^-3&r9q%7K+PVo6xyw>KOUeQ
zYyKU%7LrT3uau73o+72_bp&sBN~?rlb?0^evONg7sv$8lev}a66cnU^L1)KBW={$Q
zN&@f;B$t!;E9Zqu{}WDeA>@V<3b0`l$%j)}yI0elxpZ*In1Ck=1xjux(DSUsdDmg1
zz{kiN$|gTx1!C7)u6f_>RR5=mxxUqkf3*@pI>bU4zVtlggSd%@rr@XQ&ZvpfhWDJc
z8R*}-vW8Qqn|I^5;hIjxpKd08WVjaE{?d(FE9X3M*|@RPy+?D`<yeG$^()arIJ#w(
zPJHwAe5yxAO&p#Tizo*b&^=1g9K~&bw-2U79}h`kS$`peweRzhfLZoIyUL!14&8~?
zdk*_wHI++k8dLP0x<VzV8XME<;>*reEq2VSTvOLRWGkKm)?m0I@{YAcSTb(aLB@*v
zm+x;#rG%gA5<He6{W74-AGAk2kSC@+{|GXG9#aGh>?_t|&0<Qpt*%#`RprmD7>yM7
zCTu!|yWbIE8&&xd#*A3~y|Q;kiU^Xw@Z~_S&qy&$>K%ZQD0%EU2?OZJjss9H9lMrl
zgvUw;tXowx$$o3Nj4_w`jP5pqBs!X*<v?Q^da4OHDn;51&!l%RY<<rkD?QTI!t#R=
zhl#Yn;$!Q3Yt~=7_Q@w}31PN#8eO5K7_f3mii}4;@c)8@@)Hu0VJI<y7)lQNFo^S&
z(mXYds_)w8RG6{C;TBejho~};p1i;XA=UBYO+1dsN0A(6NP`x1PL3MC!O<a$@9byX
zHhBt&`X#cXixn}|j(Xr+>Ku^}^5{zd57Ci+<QWf#s6QUYkyD^R=)02Ko(`K})oh4!
z_J+K?^j^KPkzNWkR2x+oJx|T{za$B55ETwi&wJ-DmBnFFYD%R|^Bh%o(+bjWQBq#6
zX(Zm;d|AnUyttfC?eYxF_HmBixRLYfIZ7wIGBz*FUdJ<J9kkjG^StX3lY58A@_Q1c
zN<E`tuxX+pTdkb?>}dgh3hL>U3dE^y+UkcBfrbV`<ph%)Z@`*I@-cSP4eB05=pm8b
zRM9mr14bI{mf<ykq%D{@XCOG0cX2o>F|IhHOGF?mSFwCc$w6^$LSO}svv@I|`-_Oy
zW@8#gRtEESD-CITFp~!8iH~-LUsLgYz9hkmyQ;4y``MClP1lmzOYwr$s&^OFoR4AU
z*FrBRc}2#}<L$)g_XtFp(}zDCsQiaD){hoYg3UqIuY*7HFT1leQc#tD;?-B3Ojr*x
zIR2~kF`BramVCrq@7PX&4?V4$HD}wNuQikP^;*N<(_BY2W%jIqF+FYb5#)Nbc=thi
z9TDB7lK9h)s06HBIEFFINwmw+VtK;9M|lEBSrvWm_{$)N{B|(8UxW-y4yp`H5G^p+
z0pJP1nu>|YKyTqeP5cUav60!})WWl;sm&gtI1k&J*4z?@EqYX$YFBFs>K4BkcCZ#P
zQv24M&*9VuO-3-3b+HEpzt)A$#UW#%EY@ON*kaT!4V-S+Syf9w?SLd9sY;1z{<jDS
zlzv~u0YgvdekK+P1_WOI)titD0IJA<9dXaILki8hJJm(ecKLb*GtGV5!24qP;x<F;
zFE@2Ol<Xh!&7ALqY8|e9heZy&t<7tlgu3~O6aAc`H_M?2Ai}>nOp8Trex6@$?BD#J
z*3L(7M@B7U{#3t*92{6$!=cT!q^YpT&D&JpJ`bL<t&%S%OGjO17T8XO|FbMef8|;=
zVtQ*fVS+628*B1SM5PwL#a%ntR0xiHE$r#{Z|Hu1YnpD0J|HohWg=8ISgNs^V9+?L
z{Z1U#qa#RV$_pgGcHOx{Z$b4wrz*#nt=1QXHLG=z>u$xLIdOQG<1B*Mf&eEHKvV6H
z^$u$sjzH^$n)jkk|3}nt?JIfj?rjV1oJYG~;GrF(>f*A>Fr=w;_K|)xlAWB)qM*a~
z{+~`LVWT_Ds;xS#G|ns6ZQDF5)QVw8u_$TsUbhU5@U#2@=7z51838dS0G$@b8Uy+w
zRx(%{dl)1P%cbRpzGkZ|yp(>I582qX)T~urB<UZ+`Lkt75T<aR{5G0Gf1U8D5u!8V
zEC?U%M28{%(5lq`Dx0E7|Dc89+|EdiZp=cIe8Pq`3)eV%JR7eYZ`}FUiO#>NR+d+^
zc$yq)`F`jECHpyFNk&~My>PYK;N5a9fsik>@vhC7K+=!HG~49h8Wvr!)po|#K^5Dt
zQQfj|2x(-Z`;d*&&i%5i{`g|yJ+nj8t#Jn%hSOZ&#>MZGnj4DU1WXDB%^ertn-5T2
zZOzS2kaR{Z`HhGRH20wG6f#?ABwR20pI(pr8mWwL>*x%}q#6RSQAC$oh%`OwUeP2#
zwx9KF2L1O4Fzv2J=%*7=%=k|p$suA}KYMXqh}O&Eew?=+X5JZf7GuNyXCe;4In96*
zQ8it5{PzF%*spVhGq(9tIl`D)grYb-m_m;f9Wx^^Q1DCcRiV`Vcum|QnhLY6^J&RY
zF!$zkWOh{xvR8^~-Ktj4{GCnVULDJ+a-jqnGww`z-r=dWb61P|Kuq#T*4O9@YZq=?
zOZowGkS1HfG45DvgY<|cTl5bC++M+Es%SlS-xd8T<{_hIJ@V6AYnu;wfc5Tian8G(
z5~i6O7}vJ{R-LEo4$b>vlV67d-z<ya^~+#v5!1c+*dO6}d8kJM`uJV(2|PY}?%dZM
z_Kz0@oJg%cTIg0SFao<YvR<}mi_+5sX%c|{bPD@YzOL`OCRI7ate@c$6LRzcJBJYr
zzVBx8R_%e4+$w)mGi)cXL)D%9J9lh7d+|E;{rL=3E6V;CC6vMwZcoC$VuLGQ0u!c?
zWPC8|yOT6{m~BB_$q&oKR|`D@zmx0!-t(4j8?^T&XuNw&U}eas%k7jv3<%7-Jy|Tj
zmlT5nNj9&$h5nOOP*Ol|{T?l)6#c@6w<?dR^;Xli#66dDV&iT%2b14E>xYVJC$VB<
zk>etWq74?vyZ0;CvFfmxs`k@y2igVL#0LyID8#6?XIqB$q=eLd(H+nf)XdQtGJ8%T
zIJLi+MK3;24~w&KZgYY|1;Q?7mJo*=R}*+8H3e#A*Bf=k=C~BU@rW#-oc-2fx^5X1
zLdy7zH)Sd*_8~@qOjnA|zmcvqxm2x~z^1M{w%P%|>K79UC7&Sj6WdTX@=g9Xz90>w
z5TC|pr_eW!$y;*MS9<;T<C}vO%PHg`=M0Vub*4IG7RxkmOcgbkCf)AwaN$k92K>C_
zIM?O+-k;h+!vl4kT&Nfr;|-hyf<J*2PB>2_co~i9yb%8VyDjiX3SYv0!2GY&e#(mZ
z1ohvQ&XY2ON6{!e;hBE(_{*V7W5eh&X~Db598Lzo0<TCY#mP?0Y7Qp^iPL#TupJEl
zv^8P)phtjRkv%RL?gR{(;Fu1s--7XH_s^&N^moNt8Q)VwyeSX~km_?+f~lc$``kj6
z{s^f6dd5+k@snzL#$y<Od>rU|c>$qe?p@z_c7w^caFj%G(U`RGgZk7W8$u3BVoT*u
z+_Ri14dzsn2~H=)tnnMoZOa9n^7m_{LUq<!f;Jnk!D)=!{EljfDe?&auoWb%ny6a$
zxJ5?-F+3^<O&Pu6!&*J=eOGBSy7vzgiRkz><@bi<yhsNlXo?)ptWZl!O`FHz(D?*W
zVQVBTZ9D-ACM)63ML<dmSqg){0+X$8z=`>B@x>o9=85Iw`u19R4QdqFAA2hM8|WXt
zFu?yV2i0CDwmquJo~Pm=1pF#6SNqqP5_7uJDDugNIeOfG#QOu*@U(g^Plu+hp@FVO
zh}1E{GIJPd#ck%_nE7k_a)KG~wOe0!bn#8}AbRmT@6JsbHu^10BZC<&aKXAX8LiER
zuu!kjgWh#@I~e2#)nmF}hn41_fO!<(=TW*>sdqAHHPW6D*9R?5*?2NR+lnDMM2p6#
z^vP-FT%H<>J`1j7k7QpqN)A^zl@R1~;RyL(jnIxtgT$jD(H`b4fDDj`<e!V4J7>=w
zgyaUUq6G4N5LRUk{hln!7?EAq`0o}SFoIFyOx3o!a1OHDDxrQ!ncT@hO~$PEMFTA*
z7Exxqp*ICgm1-D4V*_=4mam4cA#RK^=oVEH%53ZApStC%Y3y{uNyKefLslwU&UDir
zo+UqmQIp}7-0kssEb3xKoI&-`v|@IVPnu-kd~gc=n$EO{?PuK?s6JD@;>D!AG#_Sc
zb5)nSpb@oe+zIdr+CuwMtgSrs70gv1AmeYo8ZB1Bo9{)RM%xb-REX^ri;rkG&BL20
z?%Uv=-wDcA!kbPZpueg%<==CqnwhwNGc{QcbHMbsUaa1%rxA9^hsBqMHN&%9H|Umw
zk!%2UAF;_2LDVI=L=$s5eX~_RHtRZpg#0P)%Qw1YA^{lFm4W1P31~f(EJ#MX+5&k)
zJ>S}osO3~IpPwy?!~3QVn@i#!wFJ@bW6^}4p$-sI7!8!Qz?OjwL$Fj)BhDf0_+S^l
zHzu9;vSF*8F4>@n-f#j)b4OO#JAUwVGSJ5i>q28D!@IrKXa~)XDgpeJJk??)7i%#4
zK<7oYfOh(SJ!?TWke^F#nY~m+S_$d@@8R8RMpqD84rzxGoe#-Kk8o`2#_1Glb*9+6
zVjcPtY|x|M4AXOP+QHN=OJw({l+(>wYei|!`^)4jO#DnDCQ&~)J5wie_?_t*vbyPT
z55HB%a&_p*mFP<fgOz`Agis?MqEjk&pOjaJ`y9e_u>eLB|ICx#`@=MieN@#U8ztT=
zH09Wx%v~ATF%OlUjpd=fYsCl9)6)|q7bzA2aiD^sR(tFw4sIM4_44;^sClAcm||e#
zgx5kG5WQuVngu{6OWIE4XZ9t<&pS^0-O|?d73;ELS5M<q)5rZ^6EV$FWY*8iFOz5Z
zg<?8X0@BO|YcDk>s3A`Lf8^F&b<D&{h*A-#h4d7+@!sFggUr1lhgnj>?_e()zC&KH
zH!7(J0*C$jk@!9c{U`E6t|MtxKl`6$60v@RlQ97z3$_vmAc))oR63Ahm!RKndz}&l
zNH{u9nn<LZLSqqA@}^a|4vp9`-g}qbNF8n?g_W>z3_!4t?csEelOnhZPCJ^9M?YWz
z0pSV~CN+zmEDZ~7Xsv5L8+9?|<A}s<zz$a#b;pnbE9f+=3yZucWSNzHHowb~>Gb2k
zYNCy-=*~UmtT6co>M(6}WqZuru*VW`+5-l|c${GjXx*9M!?|KJEd41iSV1+D7R~o3
z0x$zLD=5a)^Vm|$AhFFQ=6+TOC-tgk^jcTr&}nyxa|uc`q18*js@WRa<-E7kiGSa5
zMX2y#%C#>YQf#%e?@twGjxTW&-a4-d$>*m1+-ZYKOwY*cYH~VZc-soa0E|Aru(!Vx
zSKyjkD;ob$n7t^->=}s2*0Iw3kp&yyeVVxCc!cT`vyc8$^GXv*+{Bo*SmTG%3PK6>
zXl2<Srbt%9jsYHgQe&m6@9=u;*5x}oD}<N!-9|8tkqftZg`f{wMz1e}Esh?4Ug<_P
zEnrwStGBw|VFoJxOobM_Q0p8A@^!7)f~ik)6FnYElGlzeT0bo+c<^&l^1JnbVdrhe
zbkt9Wg+wutfm){Iq5EwqF??Y1EBtj<f2isc@#(JP`z|6w;l5NL{7GEiXTy##;W<*k
zYb2h}?En6uuXT=e_xtdrh-iJ?Db2iYCrQ*EgiUKKNrPb)z&JINT2}=2^mX;$i*BV^
zo%dJ?>*a|XVF!++D8%0W(qknJTp#h)eTT%R(nr&=!yiqE4+xbAGuXA+mWOI3wV17)
zE(*=<$8bUhYB<;*a(BZ-YAmaE((CIS)7izlk-zzIn-g=b*RFRfA_Z`Y-!c&RER-yH
zYS7_8>!oROn<5bPn@ibP@1+8}<_Aj#omVq`5Y{c%F?nj7|G)d#>*6ml$s9A~yydjx
z&nswSDP~eaX<gyX;6pa8)7Z3^IrlUSJEf84ORaWUYN2%eny+cJ^+gK=yhwjeu^8#k
zY8Ws``BPqP?#61_0fhYtpDs~h&q^&sPjz_1Qj~<eI0TkW%-$b>fKT!mOx8PL-($?2
z0*-Eifu|jqxH7l_EST>rs?0~qBol{v0U&$(qD&D(QxElB*KE>BdDh+S-%-py(q0tg
zqOV4~^cdk)h)F2ycparC>Wid+E`rFvo6`oC|4KDvmJ92*>1iw$T0_;iP#_nzr%*2S
z5n9pBIEJwMmo*0F!fZotsVhNE7`mCUyRS=ywa6)6s+NO!Ce(^e=5RYvxqRPmnBEG1
zF(F--s#fhgcX1BMH+8RtR3}7kQWP}w-21pxG5)n8QVt9H0K-ugis&ILwnQ>)K3w~a
zNngJkyul9s-C|A8`Ok&6D!}1&8@2xIw#CMP_|2qM)d?0@>(ci1JG$PVmK=xBw7LCg
zM^4p0_@YN+HFcj8?8kf~Rog%=`xavMh>i-QUx5OQ>nbNDB<E%twP!4Qxf`ZL^|^hG
zFzwX|DX4(asX9RQIpF?sGxo@2pP-fAl6mr2ubM0v$0V|_sXv!T6vC#XY*5LTSV$+>
zeF=$6;=F3^{Y#sJe!JU!dh?)yWg6Ja;UK+aII8(hdoU<>el{0$Bc(K(s5I~h(I#OH
zg$Wi8dLp3b0VurDDHTja^=XSfu?WA9Dxu~spf}UaFm%fO3+)W!zN^0Tr+Cr+NS6R)
z3;FVae*U}tFDv~ll8^;e0EmMVVXWFO;y?Svr==gC8{J3l3|I%ECc$vZ04lJo-38Do
zRn#15Sl+HzWVjK0K!k6Wakd%wP2CXcQaQMRTF6B_{u8y9yNkn!N@7rOaoFyFT?TRE
z3yeO(Ha(%W-|Y9KwE2b9*0U8yVx?_GFxX+DoV&61N}|P)a^1rIJRqs-pRc5(%ooeZ
zGr{m|J-*2$<~Ix`vi*CrZ-Kd?l<G_6im|DjyA6|Ko#cF8*XBTut^ss0ikts*$rzC{
zL-h2*rQ!$&K4QI3;L4OyxcagrjPiVPA}m)0sa5oKej@+94zd<R;|YniI1Sr2c*<WY
zQ6&pBDQaudW<>izp=~y>M#G7=x6@WnQBOxWa^-oN9JlH*sP_Silg>7~V^~RbsmB@W
z+1`OphxLd(AlJt2C9{D)bwA@j6!>GNFX*8Ps3q2&|FqxtfAhw2<>UKrZ0~w5(be8X
z6go@U0;Z(_`e3lAdQdQTHdh0#tY6icP%?-DNLkFC<;$I$F|BIY?)F>+{xUW}^1~q}
z>H!mQgQQ}>0-6X$eg^9%w8`wm69Bd>vSeXYl@40@?%4eq6PEHxc_?px@;K}JT>nC0
zCNTAvq7lJ@y-YQHyi6SfPRvu<a}4G~$0aQKfw|fWU%8tWIt(0(P~$0wcnf2$ZqxkE
zf;puQc^_f*soEEZQrfa=wnJTva@1?$dJQwP_rvtjKjaUcHrw$xJ7P95{MQ5%Gjy&J
ztW1PYMdgU)OeEPqxZKH?%qICk(+@e?$O}nvvdGxCJwo;kZ(vFi9IMR*0YLsiZ_Jiu
z;cxt~xb*GAUqA2i3`pi<@J^|Dj?$Kr+~X<^Nc<WR*(Z|s1LI-xJxz1gXcbb-vNs~=
zqW=B<nz!%BDXPNS)Ue*)Gq3i__xA;#{>Rc;xJB{)Uw`QCQb4*p1VKScq@+8fyOBm<
z7ik1Rx<Nv^TT;4HO1h=H+1+P8-{12W>~+n(_sl!ad7Yd>c8Kw`OICy5Cu8SVkJO35
z-@J>6er5vIxQfVqU$r!!Xji*gXI$Wy6Bu{?iT<cOkA(PVpC#RXt#t_b(j{Wvao2z|
zwJ9@whxI>~h6wYFz>A0WfBJn?%+VZ7npw<&15*6e4FPX-G&Z7C(Mo+3VlJndlVABp
z8}koh3BUbyM=Qb+ii~<MhX(PHGb8{eK{bVs805Pphr&D;?n$NGq#eN@aX_a)3t26m
zn2wNGhYnS8MOSU+VmOI2)H9lwyAAcKwI5Lw>^oiBYMB|VQU21B<z<T7J3N{+uhABB
zmGNNKd3=)Zx0{Jr>!sXEb2|b}?C|@$I}T?7#u|e$|A1)`o%FXO{vsLc*E*ttrDo(p
zh&M~#oahz7MW4Vi&*fd}vLNv-;j(Sw^FE0@@_uIbd&`XJXa6pdI0$gS6H2Dc0lq(?
z#e2$2sbc$F**Z8#L9XXJ^AKA_O<VYAO2eJ9N-5+UIhwVd!25=B6CGqAvws>=x5$zt
z2FPr*KR4Cbuqtu4c`<Bx*C?7RGV!+`N12;DS^V{C|4XFbUN8<{jX_(lp-cIaf&RX~
zsvXL;x2iWZ&C#-87_CO{uaUoI?oz#(9Txay#Jgin^j99sEh@;WTDS#;TDLM0_w&O@
z=_7@jtcC7~Ef&~~i%SSKK+m5$DiaDN#Ez3j;k#|PT65fbw-Na*r+B87!_DqXKz?9N
z^NAsEcQ*=2;H*<HVjpu8vLf%p<`~;g)bhX4fnGwP3F!UkML5t1fkqiuybOhQlZjE4
z@4qm*ur1Lf0$9)EJHTWuo<%g{T|t}i!REnGZ9D^B+xm4l@<^^9kh{7QxmC1mIXri=
z(w|GxfNXM4U0N=6ZxGzGU`)~J`pbakU9kK+Wu&ySo5KL>=$aPfHkJ*-q>+TWr#}IN
z%gs4dzq^#bFz?;fXfod?*AEE$&K>1^^f%UKeHUYM+R&7+^)uizp0G-mpX*_DSb~RN
za|o)&8PN534z}D!RGFoR87zhAW7b#IC7p}}IdMm)kjER|{t1hEU);jHOiZe~fw;|^
zo@`QG8m{}EI8vmcW5|O1z}F0j|5_gBuL~O+qNrA}oLBw>`r#a|=&u7euM!bL3r4F|
zZOwg4EMg7i{0)U~zkT&LY&>1z`WVw;;E;G2JS>{9C#O}>>YMPTj8DCoS%tX$ZXN5a
zGw`|d!3cSOzAQmP?7DBapuM}hpdN;9<YPD;QVN94nKraYyGT0Rs^b0?gfv9u*99m4
ztd6i6oRLkB8Eb^vd@d(ntuQo7v3kh6!ZW)d$OVt9()Wcr3B~KD_7Hq}czBJVqg^2r
zp(`0~ehGHhCg?Evf(?`c(BJ=!fc{1;Jf7A=4roA^AK8;88uZEB9v_GUL%WX2$=NXh
zc2YnTDDVN|FzhU!E6FZTLKSAi7K8_tEniiwc%FY<aOgw$MmUHn2;Cpvlq}<CC}mo!
z7i<vG-Tx9RFgR3LH6W7|wr|2Nw1f)DHCk98$^~iXMYNqiYX}7uDGS(+*X^m02km_5
znAs}y+(7}FXT4t5=gw^W1W$#R7mZnxPW^ihcAVx69-LO=;w>hEeq-%@4cs9aGWJwu
z2KYj5XwQQAbn>tw^%;9JUHbQK)M+^EmWvEu)xI?G?p0o82|HDuix(03w5p1w#=8}1
zJ|9#o*tBuMy=}t~_#}=0XV*ugC-#z*V6SfyCxB3G(Enjkat%V}-2KN6t#(6wJ?4=N
z&h4TTxKc7B@GzVsCd-<WXM{&b*I3_P7i=URFD6eS$DeJK#!-F}0%3^#O+sK<Haa?g
zIV~JZ3D=#6WbLtInTn7wPitZVWi_P6e?aruF*rDukFOi94j>Cs`CI_1X}%w!z9WM^
zG}~9dL<M;7>n7fE>Xql7Q#P2I3FHz>RwBoxp=Ly~2O`hsymLaTn@gL0%A@b9Wyodz
zP-HEp(Yj7DCE!$WIA1?RZfhXf4>vBLlv}YCuUsSyl2ijwaq#c=MjA!m*p@!C(~yt*
zaA(w?vMXS}(u$U5O?~pDf)URrXI6t@+K}dOtJo;e#W5VB5}UL7#}aaz$56ynS$7UQ
zxG*sq6k0-XVZr1tbfp2|+I7|aU%36`?4uDO1$|Kaj+aEso`#f%!6I*Yo*y;dXYFa*
zwd>XP|5H0S%#6!Q+V9_r;`nWZos+FXm39WH4N%F(NzndKUv9KWoXYcDgc%igIOF+B
zGykNai#PLXpn`9%^-~l1?{;FJS{4GM!Yq{>g4bTr`xEeE&3fMtw*0X>@Oq9ef1)Ov
z9)60swCf*a_}-B}SpA0-muXxm0xYZ@{}aIWAi&C*(}V7VT+a`6Gyv7bE<~9z=;14G
z$FuGdxsA0O))%iT!olf4jAa>M`&_cQX5Fe^DMkSL5Klz_<$E5*q7;2*!a_c{7+87>
z?o^MrO+LlA<Qi$Rq*(UPqb6wZ%P5Y<mGY}S?`&W7wm4}(+Z?o~Oo{H48eGNY1?nYo
z))*z=%V!#dsn6SP%<l8l)#ULUk5hZz!z>mCS}f19@Nw<TIs6%nwJEA$aXRS#6rnvz
z5-mPi#*b42`x%}!x+#~<IU2+zn>Q|~0hYJ4!fkyy=G~p(mQxGm*(coBs*|kBzO^u8
zoDQvz>G~uTvc>AXb2@IHV-DR*dQGzaB3J!fiI2<?SDhnSU}H&5j#WMqzep@Pq_p^i
z)qL0+JHdRSI02FG+-sG{Kd+itgT?ttZLFZHF7w(Mlhvab&6dmr6LO$DO?AE|0-JZH
zoUd7Ny!QNS-=6#;$fO9R6}8wY4BN<F5{-r$_4^5n2yghif!kg<ScJ_k>5l+^-$Cgz
zc=1qO?6r_2eS|(${J=H!hM5rjC!!&Bi@`#XEWysNFh4|ceYmpgaNBxMexr^~n3M0x
zugYDMTbgZD8pD+Sp`V4kjK!3ile?Y7xSOxH(QsrJJ!h)+vLLz>I%5Ap`rD6Uo!Kqr
z67Rd?17CKhQI(J_eI&_QPieTm!nMt~r-G^$z2uE?v;C@|W~XGa^gAz`@hp5){oE*7
zT&$uSnRJxg6bvzc510BI$YWLIWS=?`Pz&V(>VLD`E9Fp5Ovp~^f@Z4?3119JGX(>N
zxCQ>3ZiZgNDEn0_jNR0PMe?#@(w;-+CU$E)`@y-wF?8Bpa>mMglihn+yUok$KPZ)d
zv5QFgxV`WO%eB4qsmgzv)|%Zx+C<<#_gl~Z`M_fOnmfGFJ1CIPFitgp1av}a#PpJC
zs-I3@NhlRfu)<;yN6%oZh!DOhTiDtvoXP)-1S3=KGz)ogKTVVMaJS$632B;i(vKmd
zuDLn|(1yjDzI_ipBS^!nCF(9P<C0x{xfC_5CEr}>Bt;~drIVM9>Wa2`x4UZuP$C1&
zz`s5Ulf>KA&n|Z{Yj=45WNph}%M!1CnDtQ)fR9<a^7T@(3#^j3hyAI`x$sU|@1dz(
z;{^6qoPc51*N^9RR7xAF7LH#35@tBjZJtLTp?g>jz0JdHl*=WzZy#0xHWP#$-W$h?
z#;PXAie`;gJk~|4=RC*Z=QUf+!e{!eO?F9^OX5zkc8l_w?2dsJQ?gu#`k#j@mK&T9
zF7fa$dut%Gi>kP{%H8djb=(<2zbk=t-*{x=aoyyMs8M#(zW;VXeun>x_S7D8DZ%0f
zRh$>)_ZQ5<T@%ZWN6ndgf&C0_)mIjESfy>VN~bLxZae+@=Z1F6P0#nXjY=yW`tEl3
zxc6U0Vr>43U5ovPQFStjxSg=h$tZx9#c`-sFnCB5?C47oa>Tu>)+}nDrhS5wTn!cF
zOi6USGC)umg5p|5gHbmh^!4iy0!e%YMJ6GH+9o<xO<VEr6b1x4g|gles0+6vI3wxK
zx%NfI2<jIdlSdU9{t%bqlbG}6)in-p8_*Ip(CG>r;qCm81^R_}J_c!>%~xR}fyd}g
zRtT<S4p@wj)DACp$1*k;^hikl$q{#@V)EI+E|!I%{a6F!ri-cqd%uP{P*a!XpyRi!
zVLu{rp2luC;;jbmkW%Gdha19(gr1v@nc7ed^xWKdxve-OO9@sE31@U37~wdl@ZiP-
ztisA>%;=!Xmg&Yq?70nrKRjkR%+RuEcmkfz#UJ{c0{H`-pU?Yb-6`1WDha2gB3vEW
zx+PeDK&*`rTLNO+d_f<EHZ8c{lf*SrA)L;y9*llQmX(lH8sZuhA5{gJe@!h?RBsJj
zwt4svPo?7!tI4+D9wF9Q8Pwt~16uMDaetkJxJK7A9{4Nz4BYq!!yjD0@y?_AM=UF{
zjWrh7lFLPxN|vO_0|W8Zs&Wki^Z7^A4HL2e%HY$5S>o~~(qB}mt|d!vDX4u>&pY49
zwC74u(%_lV(f}zuALHHk#&<5|4iS0F_%<$3mB;_Nvf>XFvzfr?Mf}+A&d<NU4@&Rf
z@8u8_YP`gvGheSSX@gqDWZJz)(v>+YM_bvaE8)X+HZpWTEo)T763fo!tOzU~N=&sy
zUjAevJszbKOE35$z40;ki(oh4N>F_ND=zCsp~CC9{m+MKF<$-tKPlYAe=*kfGQkgT
zZu3k5)SBirZ8FRNPDbthTJP$lwP~LUv7h2`=#_ZF@VU|UeSEc8fgX%$ka23wk+JoO
zkwQV`B1Q2jRMlIVm@npy?J-Te7Lol>kgWaX3As<51CW5TG@MmOG;HWUT~Bu3Q6Hmy
z-N0>bF#grhY4-6bKxaa-py4jfp)|0z$z<W%9)^;c+6itx+4ucTDo_a`Q2V5OzZW>f
z0z2%66lr69|AiQ#eUbm~T{k^HT_RGA#sq_t5;jh59;x2@Hvi5Gcm1$-GK8VjQ}H_D
z$_4v#a#6M)N8&rV=aQ44!s9|H|NLc$nuDy+pzc|%N)lk+`iT4Dfb>626J2k`f(U7I
zQS@I<%v;}_bAYfN9m1}!EJE;=0dA`QGG<_+9G1Y&rvJr+W$o}XEm`#`StmXseKG0K
zp%A%fL^5qZrNVP?)qSiyc}4DP`otHxE;k0p3Gb7Ji3DWF#a~y#Do*?pN7-crdDamX
zZo>iN`yO-(3CPIMlcS?@(g6Y&WXm2jB&NOi+$@?m?Sbd7VPm&d)9tM|7RdZFCgMOC
zy}lsW!7&|SP@JbtTucOWpjq?RiP@!zUL;eEJmLwtv1V9suc|8D2$5DaimyyuqmB4f
zYe9st9ob*l4!uRo63TI8S8>=uJDzhs%;jny=T^GE{g(a04H4HP{`L;KKi0JOXDH#?
z^~)JSc9RJ*1a;!05Ja*_MbYO~j-Nd^tg2GgC`X6(Lf@Rb(~O{i?!j!*XuVFq<?agM
z5CuxTbc0!?-i>a?V8nnXafiYPD^C1R2TUk&_s?0MQmok|(|gt71!H_u+5bj|@+)d*
z_zZ8X0;q+!q+D&Dlc8+ffkKk*5qtu$CuKIqeiCf{&Q&11!jKYw*H|uG4R>bc%{WZq
zPsLgzw*N)d(3MiGUFfXoWuxP{HUiCh<axOiflbbYLTZQ7LA=&N=djP=*q<v^#W9wL
zG=EEX?b%V{-@KJ7Bb!d@`hw>P=(Qh`e)Fizw_qm@>TlLsT|KpcWje#ZY*f81d@od-
zM}M2W9e@#tW*~AS5h4M!)(SrFsMt>=8lKmU=tcqY!o~I%ynoT2>4-&?9Y^K(0)H1e
zLtcHB<M|QU1clWuy$x9Ke2er3|8op!ptwFsvwhQU(@l2i#=i$vUp7igBHpN|ocN>j
z<0(_m{ht2VOHt-C0je^B6c{WXk6sPUu}Gziu>9%QDqLb*T-oGd^PKC6gcOKxL;7N#
z%eNbk_&WU7>Cc7B_ptck?DdOBiuAL6MiAwp`Daq}0yXTmfJHW3U>&3edR!mOx-LIc
zbtje#(Wk09{FbU)FfOi3*A~ff*9ls3&4xkc_Dcg-Ln%BZ+nU@lN)vk~#7H&PUNlbX
z2d?nz0(YQHFJFJcExKST>>ob+np0SuU$>z4-p|7dPFr*<jsAVq%YL7C{*ddtm3E!2
zh6$M^?=8|bmyTDlDsDrLUSiZ4yeALvW0Aq-x=2LSfM@3mR^MFvft;o7{nIoMNKc!E
zk2lNnREBvX{D#NKsFisA{J)(b%rND@^=*_IP8YB8a%Rn2$AKNe<ddbrZV2gxX1ByQ
zEBSE!XavCtf}P%!OGs#>jTw2kPm8{ENuQqG$xP^y?Q-A8%3Lz}7RNW+W**<5JGTDg
zNEI#td8=^5r&S$BYwBdVxm(%RV4KgKGn2T(O=A_0TA^r5@PoaP6Jy3llejQ@HJzt|
zx`DN)0mvIxlGTS6<}<!C0ktgHX9=eY&X8P9R_1`B+z^a4c-ESzROkJs^&k=-K@@<6
zjiiN57iEU@ThcNT&&Bho^sKK|j`G96`e1$R*iK<=PYgmXI#KS`_{L;UF)CpQAkP!3
zl8!=0rT)>K{r(qc)x!T+D`iBiza`odi!~!9@Q@Z37r_jc-zdO2x4g&sQQbtC`&Fo%
z*l4pzEe<Fkw2t5N{X4{6U@4iy<q*-9JA7Tz2tywn>6-ZPwyBenAnB4bQP#4n{uTf8
zPhXlu-%h>ke`_>mJ#YLJjSbL1GA5!MYtJ1Q8hbi2hJY~@E5!tV^vyKu4uau5mGl!2
zwn2l_ACoe^WiSn0WHDon4aDDp^OnzpWP1%KBav`vTpy2pxlx4Umfj%KnDCrQDBNuD
zDJVg}ZQWL?k#On~XN4PmNul5RyKdhl^NW!&zP0csjjiU5fVl_bV0j#_8EJ;fje|gs
zC1fq{UzLozl$Cco1+dx^J#o2><M`i)?>afoSB0{h#3q7RqD;&sc-W1p644NMJf>@N
z<C1Hz3{fX1NMOsu>rjXxk9I<}*FimpRO;I5x0a7bkN3Qtcc@u@Ds0IZ@5YN$N%?jG
zJs(*vHVY#K#JpJ5*|36`cthyIG854YvL}i06&9J053#CaNO2rRXp9_#EIOc4Q<J9=
zcJ^AF5NsFHF<1ZFgY1KRV$7V9pUm+9wlomGPP=MsTPufGV2Zp=2)wEir;Dl^_>wRr
z=@I-Bmm#2bgF~8yJyL6r^8F-V(%rqyTh^`3mpAM)(YYgEh7@zhO%tMoH98|G+ECob
zc4_L?_{2PW`a15PDs94?*NxJZEyhr4Bdd?j|L9KQzdGk=dMtHwEs~Kt_!}jP+SHgT
zW{Yt0WbVo@x|O&KF}@#BlER7$4PaeY-`4-#Afb;CbsnGwGSnwiHt}h?0qedulF<TE
z8u`QTaWdHUwzteDgs^4yVzkZ)8^u=YD~8AB`1xUmURJlQ*8_}y!~|OVn>|D1DWmX;
zCapMb*>5%yC<a$1uNUQs;4f%quQivwKNa0I2uiH^t8Sl^l@A^ubuHGR9OA|!2O`3A
zCJLa3q;R1r`-!_XyOoez(o^k^<F^a^KdYXwN<Fy0OwxuR*0CDpjScvzCCSh0#xc~I
zKQh>zllG`MW&;D>vE=Y38UW<moiJ{n^UXoF)p4jmG?EoiHw|2ny^WbldR_7UKlEV9
z9CnB$G(C&>1SW6f1~LWCJ~rrsqYe&WKlAKHx(ts1s*Xh{4rH<@52`A}keRpE0sO+I
zLn@zIC9uZL8{#8>i~KCaWeH-p!`e$T<5x~4@%@@?<{S1Stz*=T06HW=Qt~hd7f%-4
zYYhh}|H2H=g|xNnlhT{O0_3vMlY6U02|+2AvVD&af)WIXql?P}|08+AMBifB3g*&Z
z#Or61s>YovYxiZG9;YW_HJP%hO*oN?eYS=30s+*G7Q_-{Pp{(Tx|(C65i-M15^!~|
z>-kSXC7DLKQB_v-Mg&wp47}eM|3jNze!nSaOrP6UhVJ0#Q+{6{5QcfjL1Wjyv&j2c
z!%i^KQ!N>f<3hGXo8%eAX1HIMN5@M<(?*`(-X7lFx`;cn7T%Ke0Oww7Ixb-zUG^TG
zd$AU?c&9>_c|F$j)<gkmp8JWD7tLkXwM->zPHJ!kCEaI@c-VvBij`Mfs1k_~ZU$T9
zIYHR}Q&`XwsN(Q<!aj850d`943_lnX4IqXWJy6deJ4kfvB*L_m9ECxqFJB-~L3Z>@
zNix9Se)BpT;o=Ea0<#fL)Yzop(N2btv6%&dnO!0Q4=;<pb|1~l62Ih$KLZFcB5+OF
z=_WtC790o_+j6DrKT*Z2MdB0k=Zx+?{n>lQmCL~Tcd5oO%zQCffOICM2LHb1E6TS<
znnv|3YxHE=FsrbW<ppc>-QPIt#?0_KjIBcVHRD<91pf4Mr9S)-jo%%|D&gt5Q;j=F
zy7~p+Ax%w|Nj#*2)>6MGVjmJ#^1LC~W@gMPz|vx(xVvY8m<hbbUH|9#&YpRkn?*{m
z^j<>UL&dCs%pc}_$j#SYZui3853~Nx9V;M0pq8lLP9d?=Dks4$;!XTsq~UGp#2*Ox
zbYX`$`!L!evy{?YkKdcmUUsTFXA~#}B~bV8Z9MGcZ1gne?LLPue%wOf_h+bjMQ5>l
zrQH%fr8<iHf9zhMZqS~Rmm@N2;A<i9))~Qv>HAbt*YBk$wxQ76XOi-ggBXNpcfhgx
zLi|ot*xH<!Fzu@aZC{1g9v)|qCwX6rXTFo5=&Sc1n32+T`dc1k5^91n^t6K{UE?8w
zKTOC+^~m6x4!eMvjcbtB#CXn!Y#V6y*pJV6KVM~11cLFy{-*fcYv<z;0Mq&@UCNCR
zk)S{J$BF(mzqke|oeEAUY!-PmwMKo@yqR-L8e+u<(gadR!Ce~j8-@|ou@5X974amM
zS-E5VwHE1_m}_4CjRbC~2JOJ^<;15$a%+{G&ky5%vE-ApM=>m0O%<>FE?^$7>n-av
z8J!zSZ)$7~6rtgApw02x#rS8_nhJ9SmnzY?^heTd`ZfZqH>0!q)lURHHR_U!AENpa
z|M9Y62=}A^xyT_-uJI#L>$5D@;Fz{F<T@RZW*3c81k$h@PYQm~qUdP04m*6!RV_FS
z-AX!1b<r&tR^a=wNt`hsSz09^HeyofIO5@?{?%yFuwNKyr6FFI+wJCeV;gEjaU*Qc
z6^Nn-x(mhD!8kLwW0M=|Z|D`8cNisSE@c%U7?x)`Zs}3GP*S67?Y_c7{Ve4fzT_MZ
zltD>v8@1A&v|g;)arXe}e)w)bF-eXTaVUu40zDF>ROn_0^(d5ke)vct-X}(9fTZ*_
zlV+r!{?gup9Rkz=mk}M}L{Mmc)B+Xkt~W=EZ3MjD*hrWLyfW6pNgv(d+CbkI;m@H0
znU35g<Avn7@%|MjIV?APZ6jwGoCQkG`7{o)jsy0po=4%C{qZaWVVO5|&aDh~SqzB>
zZZ2UU5f1v)MheW|5<hC8SK<94vaaADTj$(-<Bby>yMOHQQvOi0jJV71;V9gmxoi5<
z573)KxCa_gjq*l1o0SrlnVxsy6<9r>du~l&!K|hJO}N2_9#qr+5zH&p1n?OaAy#%f
zk@S6a?HO^(cU5r>l)7s$9Nvvl&Uik4u%hl(anpa`&;729lgCr|a>MG#uT+QbMeWX>
zim2I%L*8BC5M_eg=(0}~2noY5<7u2|C`WM`Nq^yA(1$vy$+7=T{QZC2A$ZwWM#Ja8
z+#gSDge^|n%*8bLVw1Ip4Sc?)!(#5d3Rj(UpTOqcUE*hj+A>4+Xb2U921N@tlOi+K
zjKngq8+>Qfdl15Zq!aw4r!WWrCi$kT-x#zZ=tL^*I4_jL9wLg}XJ_ThA0^_oz&<jl
z6}w*NX9$|i9RWqeS@E&?N`KL>p6GP>aUCoN&uoBfML1_R^M-YjG*vsg5Go2AkYOXU
ztrO<^sM%2jNlh7*|M~MXr~C5`qFlkUQZ!rdc2T-_bB0|GbR+!kuj)&Tfj8Viiludo
zGF%GiqJWiGGCSkn`3Wh@3r+HBW3at^DIer8i0OP_dYZyjjNUmpEt_?tLTe1?VtH(>
zJHA(rR}@M!d>{|;8#}4wfp56i!zbd7AraXCGc{c@JpYItP!!W%P#jOV&#8E;qs}Hv
z$Gw$>oo7F*9;j_<(qcYr*6b!ZsPFkn!Z8``hk@4o^^eW;_vOh?ycv5hf><cvRc&n-
z_+N`2-^3jVCyGC6ha7xQesm+RL5{e5_)dn>*l=N(7${=)Bt@&b{3ehh%8U+-H=2w^
zj7*JS0HFnAlbxk!v5j4Vt!HcdX5Sm$Nw9&0qM;%m(mQoZ*C)fHNikm(s36xgYjbzr
z=0|*lMojQ=N`4IDeEE7YAD6Bz_fU`s8SurT0pfnV*B{Qx2jNaUz)zLOpX2(v`9`rC
zCW;lvYIYUd;3{`RR+bE^zC<?uqG8z_EjX2m?nlbcoz&8ybikzIIoC>jH*d-FcoO0K
zoAIXg=NjsFZjH&+GY@2Rv(r`vdjB*sA|k_G_<E@|+0inwB9VMId8Tf@eob)TF^udM
zXXcB&{EpKVJ1_BKrB=V7qO63QSvwJ^Jd6w-<o|f$e^mTk8@X$<x^&X{w-EVMCF;VP
z_WK*<|3+L2hPNTL7O~i$gDH|598DUx!`?w$_j8yD7+P~K8x3U69x3o04J#W*pC<Nn
zo<xJ?+a>Q!H-A_fTyNUf9Gry+vKnV5;b|5IdAE{x!U<K!ONjI<nBx2c&ne?dooKt&
z?(Df>?<zgp1wV%Oh4J<NCffP@(m{2Bo5a|KJo%10oQY7G4E5+vTs{b5%-XUI?J~G5
zD?@3A_fM%K>%FbV-=IAX(*3Je3f?7!&kb<Sz5=NH73c5GPE1gjXG9NV(lNouaK1i+
zm0M=>e?+pUQIaD^7=Ag*f3Fq-NW!J^JP1D_(S=ab1E3a94j-y!EuIA6I(jdYXGFBm
ztd_XX&&rc}tQXeP17`s6@s|g?zrE3j`-C{fxI)eEXpv`J>3U){cS%DUn~d5m<Pl?*
zvkHr;u72-|a?su>htg}#8PDa06__)b2iZ8l*(IIoL<q(?c3;^k`ZLBu>GVU9MS+hh
zN$7RVwYi^C`-XAeyva#BU$p7{`9iZXPCVBd7Oqr@JkoP~=`5b$43kI?+192!sl)&A
z!e9C_%!9dW^?&}#s&;fJk>kt0bw|mOIG=b>TEzY>BVk9P|FLHnGTQs-aD<2}@6h;b
zr5CgL?B*4M`LfR3tcF=DvfztTGD*}QGUVa&!7}oFu{`1tmFylCmfd?jN>yh05f97~
zLmug08wqp6##NYbe)$C6&q?nZruA-NbAb${79(}YUq*a9V<H#Y#4yVy<+=BCHeqhl
zD5JIU|5hMO5Bsg*CRhMa1uDTG=IEasKCh>(XJx&v$hfh9>4GjGRVM#eMfb?e78LaC
zrt+A@M(jH%t{b}Oyl=KgcUt=peq-QEj}jfA4pz%ipBlhK0_fF$4J#^A@EYSPLItRa
zcJ1l^zZ`6^EkqO*Sd!P{@2I8^>RYU{aeuF5+pvyw_FuD#hJV8AMby{JCcX0~UXv8L
z@3>!zjuW*G<GrHr>RT0uoR{|Zj*2krE`8croA@`w-nD<4SIi?6nk%0t{@d60snQTq
zAewO)SBr#)_IC(d4`7jt+qzd!NQ#<d=DOSol%S0WIg62fkZb)^Vjg<fcElDxX4(#(
zlfu_Ynq7v!hC%~f>P6r_*RV79)mod>_?Q@zgWx4k%MXH~Bnj(#T3cbq-DeOoIC~N-
zV@&wsUL~TT5N?@tZ*t??q|9-v!7jl_o3!lZ*pgT_91yRYvg^spDdKLSVmp76x6En1
zX4z}?L*0kHFZAUr-uFY2LZ^E}tQqIE&l3!~d&QYbp~bqc|2(Zy(M=6gbZc)*hr{!X
zQp@=taP?&rrz6hCqYuZZ5J~t>8j@lpXVeXUf+>#s;{P&?P&C9Vf?(>OFard4``sr0
z!|__W)sHpM$rdTenmO;7kqDs-&07ImtTqJZCSp>2_aW<zH&|(dbsE8s={uR7m)mMG
zm|bkT{EB5gt*?E7f9=#N=zdTTf;T7WL+|9JXn7Ytht0!vBi6QX1~h}7<-5w~d=-XO
zbsbs;FIP@wmE6nk>Q!vmQV96BoP(I=!-;&zxD@hlmo=mec8q(ED*+lZx`B7snL6lw
zvkgMQi8@P0Z`iaMRc^l+0Sld3?Le=1KJw&<r`zbZhzSi5RSM@#exn7UpmSN*KaVR+
zQ;RG`puXt5CL$#iUA&pr>l0}L3`X*?<LSA+_ZeF*E5F_fWjI?d5i=4^B>eq`1olJ5
z)T~4=+yZiij;U0PJERF^XThS5QfnT#=iT;o>suE*rSIXah)eIYDB(>(Z!Qz*@D%jK
z)r^KJAL}&ZYvy*}rQ|EJ`aBNwo@CdWGjL*$>M0vmj=i^;>+Xv=y!y7>!PTXCdFxnV
z6X^C;l*v3af*#U&cJMUGjP_^Re_ixWyeoK)`>E=$agoZ1E!kmbAuntr!DAN`6`(EP
z=bWKKZS?O9dr^ae1U_F!nzpa2>Ljw3U5P2)tuY~4;z3l*;^SgK{P$b)XNol@GQher
zUK%z%(5#}EI;)`rMsECvD9qIgV`zh_0qX?3`)p4*^*Vku_4@LTL3iYB|G<iU=l42#
zf+B!kn=V?FG;&`c2-KQ>zx0il`rh+V14W6J!m_BgvvT^G&WC7fK7jyCBC41I3$gvD
zX?)Qp#Xg_m^z^#65}oZxkmX1P`G8f~hSbGKY8p5pXk=5v(WzkVm89g{LBZgjbvk7x
zb3lD`DSfDeZ0i%mExM2dguaFQIMhFx{9N*mYHgr!JpaCVCE9Wpt^vt7|4e*#d`qtl
z&~cxK4Flz0yfeq7H&Gv#y{{M35yM**p2b77mMvX|E{1R;+E*-814}-iE)tu5h6(a*
z+Z~3g3ePd|@+pNnmbI==qiZL_*<GsUI{Y)l_B-Lq#Xq(q95h>0ogz8a)+ob;)5rwU
z**^JKy@e&pSV1c)dTNlNO_Ec^i}z*?hO-)Lp$L`Os5vuO6WDsF=4{g)DP5pPK&GhZ
zI5a5q7jk@}<H?>P9siXONHf1yyfeW9<*Ha4G=)<&0S{OEuDHfCly5)oY06wkXd+iL
zV|w^W{x`kGGALF*vtdG_4uyne4BubweJ79vF?zx4{dx4jgao3^I9LGc`&N=llqW>?
zXF)TV*FvoxLp{XPWzjNXhvYS_MQUI5O~080TV|bsKn@t3wan4G#0j?Lr~OfqCs}kC
z67<Z}sIu&D;-@y6;CB4_n)`Rnf!S?9>Qg)0%VfXIgSQ+%O=da{mwuIL$c|<u1mN4Q
zC;}PmRO{%90B|+&rv0&Q3FKe2zwjn43o=upCPv8qFs`Vl8iGLtpJZ<#2|JL8Zp3Ia
zjDUUDWU0yDrs~o}*3f2l58a28VwZbIon#}0t@UiaK;eWdqKpwQ*@ev8uL(WvT>0~M
ze{xGI&L5#_K@`1lm@x0yQo_&B;==XGYi)_+l{?SDH5;ivfiEr`WZI-(d|2>F!L2KH
z{pA+CU(rO+F+I-lY(6Qg69<t+!(1`)P~~y-Y$gY`ugT~Y&5V@)s!Pmipfh@&`CMqn
zEeuINd-JSw@_P@0F6%Z&8b$Fp3SFiT2!(N_?$=<}RN!XSle_2?$lPUnBTna}R+Pln
z4h3P}W?#YGTBt#?0#aNovl9g)1_(m;7LbWL;s4|yc7^*r?Md2FG?K!0tDFSG*)nfq
zWXsbc=Zm`4TbiG~aTZixEuu4Xg(w_?vMHCoC9HelE@*DNhVC)n_QXu+ARa2nr={5T
z(7Fh`d6-a_<(ZM4+DZyexigI3W($ijDqglm+{&MIhSq0MzJ4L9k^5ETuP2nap+6|)
z=;#PFx1+sXIU^(E%C}&i*SILBEl?aM!M@~-KT_sul>k|6U^^f&9+viS%)bg5RQ&S0
z`GLZ^=MVP8+QgIY>R$2Y%-j4rk=5o8gx|^%_ClPy{4#O$u1|^+2Xr4wzUjgi4u{1f
zmURCrax*EVLGXnpgjaR8Pn#M7I=3`<ol7n=*HfOnzz>AzoltEt{-*m2GEY?cAQ_R!
zQE#wV$c#lk%M3g;p&o7aJ|jLOT}IeWVlLkM_Vtzi-*GKHRD}!!p!-5~k^pjLSN)R*
z{mdNSSKCI9fn5FX0Hl~uz*T`GU9&m$hGj{P5D{)<!~RN988hKMv80dkmIfY@&wgQw
zsVUwnAiV6S{r1YY=md0_rBU#?=pDYb^-n_yn<S=~c>Cz65vR~kq5wRznc7SawbSoO
zURS|0cSI(ree83<^hS}f{HAb?!DlJu0HdIC!08=%n1X42tjewSjXBOwzxozb^Zc_=
z@&zeXfcz*LkUnurxvOvPeC(}_&W2mb=ke(=aY)a0XyGay)bbcKu|oT|7^LV{-}KHS
zUHNBY4#DBO-P#XB#eiBzaW{qf@Mun#`m$jNoWAdAz!H)H@}>Ho!lMxxW^cZ<kP5F0
zz%}89p715I!gY!9dF$1(0a+HYv2&aBFMMmA-XhwAx}+?bRCd=gN)Kl4CJ8ob4ca=d
zKHVzpitx6`*)7l1!baxW{kW?oA}Win%>*o*E585ADFzOUtDj%|<;Oj5m(l3r?0oHm
z1Ng{u%#-<iO&0nX`U`A|)MtKGAd*45#VzF4ryMyK3#rftt@yF?WjiuMLwn^nayUhr
z$@CWaDF}{1kJc<w2@U&$vJNWmHl{gHyDm5J({=Me(O1m7LxQ3^2#t`oKS-Bx#XUhR
zh`8^5VEZrAi1HK}v|ZRqSEId#uRUsJr-o&?qETyK+OoEyDJ2wsd~!pJ6j+=GH2(aW
zYtqM=c-`>}4+mO7{^xo~)0inHFnu(V%?31$sUbJ>?|v0;EBjc(HE7hBQvh?+3D(ZS
zr>Zc-0;X&8{{Ce7D|!HK;#}_`w;#84_yxtc^0meoXDxL%0+Nm3^he>xQwOo5aB2a#
z%eK$)BxO<hNqHnZLcZYnVe|0QgxF-Zu6_=;s(IcJKf_x1-gy}05Zc_^oWjrk;Mkfm
z9REl%|M^Gy!0gh<_VCcd)Pju%X@ZUEZPQ%-gKucSX6K%hur~7O_1wNrDf~i|{oc}+
zr46DNvGo|?eefu;EXt>VBs9~Bt<w1PkK@mOUzK#4<m<KnlV3<Y5eQ<o)Vv1XCNMxX
zte}Ie4UWy>@!X-sh+DYFvoncZ2{=?8SD0v<Tne0i29+m*oB^eVf?qAHJhR%#T$EtG
z04V(u8cqU!06sfOy6YVb;Nu4s`{*DoD3n599LPPM9!*I=KW3dLDg9vGS#;F&S7Jq{
z_DiF&)ly3Tvx1XVsGYqLC+Mbvxl!SM3V#0gbDj>ZSUYd&$&VA0(xsZmSHju2IdeE&
zE2?TsQI7#ElCgj3Ufz!FHjD0?N_MKm#)Gq)*Qj1-hwI9KMDgoSi3uoySD?`1;T7-9
zz*f;i@t%fP@RtyVE4d$m$s<oCo3RNakSo;GLRTQ+7z9JG1H)gMJjW2}DYIXG?k9T%
zzpa3e1g@3rRJAyPFpt(Et6RIrk^!#P26|0n%7Jd@ich9<9?eI-A40G9y)mq*q&rMh
zi+sr)(ZUNQ^{yVSv}95hM#Y1Uv$7V0<gtre-o2dZ)aIy^ZrpaGfWHyP?rtqtEcaxq
z_R9bNMhf-nD;Xq0Vr2AC`*O_cIN85H3!6`iw;-k@6{t`qs6qELf$us<P>e4Bw*);i
ziuOarr6-6y1)vq9%l^M)gZKcf4J4>)bNzz*nLsRW!)MI_cS1z0#xpUVN&yw@Z^z24
z_9tJl<?Bp~zbRCO8reH4T`R<#`U=<_5~>f4bDdBNZ^bdgT7?Z^QL{$LUfQCQnFq7u
z4>VROrxk7`d~`!Nxr<)tV>5dqNIzQ%XHc>i_T@>SeB_d33!dsmq2O=nt>kA@6Ar;U
z&Xp}6)>NK)T;<riTUd^8qM(E$6AE3rDTu4Ico<$OYA!vFy}BM}fJFJd$(<ca$oEtC
zw7eg0=Npw~^^U!ikjz4ttm>C)45kcj=ZTMcM2+zJjSR)Qe`=c>bl<}I51>sx6Fi^s
zPd`<xklUl!c?Wdk?}%r_=<wxzbrE}OmDT^~@Dp+t=ebRcq>AE#5{pvxp9n)vO3mC1
zCUX)Mo<)M8fP`4-6c@eGZUgcdO#jP)KxO-3CJS{s@0A3R@?VkeK+UNd>Nl~l4J1>%
zJihd9A8g)(%e@JVe2#cP8i}s|uyW?VCts!>Xa*v;1+@*jNB#@C33+w%(A-_b0ho>3
zvg0xz4lkq+a<?xz0|WM`OTjLNc3ha(K=jA(MUC!<ii4&4014B~hWveV4A;8d@5|4|
zibrpETs`jH*>W<d)HU1c1UUf9je&{6q=%;B)7g`%-zNh7%lnrKjRmxlIOM(B4NK`2
zRO@m|J{63k>$!yHFrrI{3y%BCvDdSsL==Y-1dBLym=}ak!Cev~X|P}cIwuEafS&9j
zK~^t{!EAVBP6E;5HU_ISVR<@uJ&Ed*5a?4FctAFID@e8gd$52f{rXtPeO)>m*dg7@
zDRt0z?0xu~E<qx#-eJO_!C@fvqux+kzRU-)dpL%)%UiH!CY<~(uZ6~fc~EBkFz^PW
zBYzcjU9Kz{{|MwcVV>4pi};d)^{h$Yeu)|$pa>LUf|*AhWopKkt@$B{m>Ymbww&fU
z;{?{D6DG6}GzNviFstw@JoX_@|NYO^U<^OS0D7!-+o=>#Xva$}_kJXC;fre4Yciux
zQjD6P=JQE&2cDKuZN0k5h%rJJV%?|To6%^0RGl|(&&rJ@&4^14xJkGAP=YT1L35)W
zJ;MIG0zxwY66k@no8jhT){efb8P7!R7$zvq2HmGOfRW?5X5VPN8lP$97r&qKuDrQt
zDMN*#;dl9J&j)BD|M5k_Zv}xN5^xdA$Jk`79?#TInVzWu2NzXX@OUeT{2W4`?Yc+@
z5tE8bIJa;nj&0q-oj3S(+iV=-hPc19?0!@79Z)N6{Iq0rd-QnIR^#dek?Alp+b^mY
z42?JMm?fNoEGbLTeIrSra$0%sOEYcs)t;aZIc((euQJi*{FvWLSx!Nx@X<tIb4i1i
zK0{mCR%`e)H07=TOLwg;n9!5$$02z6-gh02{r_%(EXPyNlBKrnobkU-#5Eqs+<TKv
z21sF5^U))*OXXgi)H^E6%j8IAch3CiLl~S;w|2S2^S@+$KM)8KJhTA;dQ@Ozb|d8E
zwo~}OhJtNCIb96+FJP!!W!8Y5xgxm>#ngYX=dI^8TE6`bN@~9vB^#3pSD|eZht1M{
zvFo>z>}?tiU$6X}_{Y6bo(0Dxm<AHZ(tDVk^UIwlyBM3=WMZ7-f<9+#%{)cDTbsYl
z8YuBIof#SGZ(9EA$A3$?O>{`Aj8JRSRmUyHPHFDhXF0|=p-mn}tjR6ED5s;euEyW#
z-lDU7KoFR;zH<TlXXKLVS7S<zM(+?wi5n6Im!f*N7CH$1;TzRNMfe!#Z>o42o=I&J
zc{<ybcq6}i-=S^2E11e^I`Xw5b6{jQ@ItR3+E$;F$?7<6SbS9+G9Wc{ZIff7WcB+e
z?eR>hBnz@glOI#<?W>Q5qfu-zLW+t6LZp>RFXAEG?(L6RqvBEDlt!tx+vWAzwd%LG
z?`Yt6F0_FflGnHzIO0CNugwb`HcWgCid161oWVD;(8Z%i5x4mgckQfTa^!Nf!yjpJ
zfV@8Sl(0o8IHJH<n}K_qZ4+Xifk~P)c}1`W;<?ARTqqiOKmH1-?T_XF*sBa2C6#Ku
z<e*6<?%QN+SjSTIXd4o+nq9!>ypIBnmtS7VunVOt8KGow%ZFtC2^Wp=J=+++j80s`
z`Vcaopl=_|-YDC_;gF*;7wK;5%ja<-!rz+;(S7er>@c+Iup#epzUaFqJq!9qkFNVY
zc*3YMi|=FUY00kyqKN;L!z_)Br*YOU4W@rkK7P8*$Z3(!k447jz5^9wB=Sgt<yME$
zYx71ZrAENnJgP!?P~1M^zdVe9CpT0t!lOE&x1$K#b6s!yu@hCJ#$X0h@+4sDIwyZ!
zwd6I_4`2%VF!E%F+0~Pcm|j=Uj@B#x((>Uxqrvjfre-=-r)62DS2MIpX|pJq9~9|q
z&$A0`)pY$SVQMCR@^mui5ld@1h?SR-4;@4@?X)J=x018#`7-&5OM?G=t5%gg`70M-
zNMB91yLD;<ahV~{_{=vk@r|M0A`$rK{@La|sMxq~hEK}Smtkt*z+RhQ{w~vUvujU(
z?dhj=)m)BH1fwTpQXp=x`~a}$xIBA)3A_shtX!bIDGO})v0QLgegz;HH?;FR0aiR<
z-0H>c0~%1FyTBw4z2ScxL@w(ryXPuz#6>(S+mM;1R<)5OGeMcg84OIj!+AUx$Psma
zpS#j6q~}p1$?#icxSDfFsQ;PQXu+%>z0yl#jbN&TT}^JhWd08?gUl;f;08`>x1$Po
z<3;gge}j*s7opY2af&3R{Fd?9sqE0Ybs=M3KO83fZ%I-SFD&4eue=DTVQF57{#_EY
zOw-NF`$pgF&A&#w-+SLBbR4^L*z7O7;XNc(NCM=IEbBg1<$nYMn(sH;XD!7rR?1F#
zMslV&A-;B`dW~NnEuXnZO{sb&{n3;cyeDA)X|nZTc2BTIe|4#5gwWKJM3RrhF#K^t
zGz#&G9=%lMl_W>AB{IV4yeWUT_0qvu9!`@ecNpi>eLZ7~n?nlvnGeZk89k}zZGQ5c
zeP>w`QoS<9<#4qrOZ3IpNKd`cF7mgob9|VUNn(-y`sst%vqo&H$8Y~yj^?N&XogM?
z;s%nO9N2mLigE;C>)~(eRk}s^_dHjFDasjFX;Enk!7a7!e+8O7WkvsD{*Id-Vhy(V
z;$aQ~k8oaD7C1)`-sLlxc0@!X39hS>AAjj#-`ZmgMdTx=BuvroW93~#wk7<*E}xF^
zW6LMzOw0|Fo?>33<}S>tFbXKP(F%&G=Ru~gQ0aHtSl&`50zed8ir#%-=B_3jzKaj{
zq2P)xMux%|F>^|XdJksn%W74N(K*qYt?4p@;v|~T-}S|xoGNmESbmnmZDTn=Vo)ln
zAuWwprW`=q|ChmdA=aWEcM3mwNV0L1P`sHdWA(LG>!rb2O!E=R#-qQR_l3h(-#{JL
zN&Y=yJi^gVw!_71&OarZOl`Vq#HH2W;(mBCozF}gH)^d*;bZw69%o#QfkeQQl~6R<
zFP;)O_2q&Rzs?PfC$&(|!Z@!fHW7GTQFDkpan`Y(rx$nMM-iiSiZc7HSn9-|DwOX;
z$>@y>j0R%d$7B;MY8@RLSz~y~sHCkvzqF`@PEzk3q#M}9Pps!b2lK0qO=k@>(#<}T
z?9Tw^Xk{dl>^m9(6AK)45iHnbmrG6A?Aj9yx<v6ly!WbXot>S(UQcGYR26**-zA@l
zeo7`NR|Ia>)l0R2(*wHpnS2(!ioQ2@#Yw92A0FFJA!6g9>gsrX8+m$96kngAcSZdO
z_J-2vF4!eTS~cV{27!!>dR#h?9zIT_jC{B3KEZFv^h}A@q+`_UWxkmZt6um>Ll}~W
z!T(}z%mawdHQ<jB#Ay$e@A+n*D<Ne$C@T-6c<#ns>?Z20sEEuK(KMp~C|fDSsRMZ^
zZ?>fp&{Sw}qb$h&bP18WM$U@uo`?pO<w@1&MzuICaac@HSAYQUfNir9C9OUY?Bde2
z#G2kn&qYi3Z&XZHEF3qa1Dg>aDuZGc&jI+}o24z6=AG=M<KW{)B09E1(%$HbtJg?z
zS+BJ%4_%k;TC|4@;B-&Hyf5lGkU*+q&wS<k+aWP+rn)fmLOPA0m(6zsQxy`J4+aM=
z_3*uD1nDJG2or?59qHna%@$7yEl%X8#r#x+`JDycu0X1W`Q|QVoZn<ep^wgepcwDX
zR((;j@MWDHASRH^g~QSJWN37K*to=%G3F~9IfV?d!XvRzekFnIc0+!C`_t($pyT5!
zm6Gx5?$7R5TFEqJn5r3bOH3UZ%=C0`*Zd6s!E&m8<_1qz&G{Z;)#D?7Q(r}bBB~uK
zy$}8{v~4{EN+=#me46nGgHEYy5O~81aO80L8bk{RAO?7Cy74>_`(d*iKzl2{@xU=+
z#J&h&(P8$h56Tar&>6utzw%DMIc-Z22P7fzJ;<umWx8$f>xy@&Vm|cRcLPd(*9Y3q
z5n<{iX(n2>joA>m?{A!MOi(q<^_>Cf5=E%nhS7yNg_<yun-)?Bu2;dRbt%*qEwC1y
z5K*`2(q)YIaNwr*YOws;hFwyTccLQklKXUWY0QTW>65^5R7;Z>{w!v#sPi~`!AZ43
zgsDYR!UdOzM24BxwwS`wQqJVfxQ)lrg0*$}1_visHQEE&JI`B_&o5HlUwhgUN(xEh
z`Jg2NuI?eoZrV5)l^7Dzt$?a6m5%gT;lTbc9LekB)2A;yh>OZJ7o(6fF2}yk#W++S
zy{o!cU6OVB^*3^|k{k#OT?C($D9+({fYH%Y)A@d=u6X^u%UxAqtyT2(5aGL)u9oh?
zKG$Qnui}uo&ylG!f;62SK4#e~%@wl7SKrCi?0f{4JbNpZESnbVM`u^&6hj}<7Cb8r
zFiO3iu3y@PAHu<Hw@-TZ4n(e~_{TxoE<Zn+q!X7umX=aroJ>9W1w*+E#Tw!3>*YId
zdzU*3WTNuC_2+b$Fv4$hW?2IZ>aPi%%H)2(zQ6>PCbZL;!m%26a?a^hkV}D;U+phA
z2T<b+P;=?7Cah)tVyI^8U!Xo4^^?~3wr%;*NOnv6>B(d&LbTgh8_<(pxWb_ICnN>-
z?WdTaisEp{e?l#g_CVLB4#I_Y_};|jO0Q_WCk6-LCgwCIyX(_m?#KmZ2<X9}4B+o&
zDzydjixOx&^7!y`G}kZ}bYLIe92?0(Cu_(C2xnJ8Up1)(Nit??q6IGKy}uCsAS&Z+
zEnp{h-Q3G3$P>*ez}qv!F&zD&myN)2Db#c4(dLr6&^o$sW%1}4_wMbNU9Y!+7yVfC
z!aGE&&+T*!sxhL@GY>>7z74#7VTN4&UV|HLxq7#Pv!C1_v!U~Mcw<wO%+k8tVJ+cF
zsB^63!Qt8i+DeCZZH_)tRuY!H7IN>i1KWFo90jZ{nRC>Loq;eTqTFV~0h=PRDss*u
z<aRt6KatBd<l0XaHYyJy@6KN&7(VCHtH;)~?m30Uga%R%j_PS(fBW#oRb`&l8uy`2
zPtES+Z_O)4ttC0NIT3bmi{E7}kf!lxriY{hjj|HZp)U+`g7gwJUI8hd)om0zL-VDi
z7o9)uvV6tO%Lo-=zucsL^yMS_sYv*Esvu4#g#RtDh!fJ28(6@EMkLHH=cPcyhYz6<
z{Bs&OxcN%=bl7@VoT!$<3wgf=KS|21?$7*asgve|nh5;vai-aIWmCsXflyBCxvil0
z*=q;yA*bPiai^VU*pxy7?_vYrf4Nd)#?$RK^*dP>U5}l72J*bI)_qQp(dT<7etvZ=
z5?9v+$|K_dzLw&bea)bq8r4*sSQ-c`xW?87${TChb`nTO9G_uR;Cw-{LN99xcy4(R
zT~SVMwQ9INyuIn*_}N*`G1u$5{&)`HH8B>!AX~S7hO5fWy`7DmL0<zA_G?6&de3>-
zZz#xC^4kk3@(elEWhD<pP)_~z;ld=}h{m-X@}>FH<#y3laWM_g263d)>Dq=tuQq+>
ztvqF@N>SB*BSoQ~70K=3olOng{#$~0GBZ7n#0~H@HnU=%$q+vg<@K-!t;LnY>eg2!
zl#mhc-_n#Kt2wHQI)LF}t>chkUF~(7<IB#?U~|&s_#3yH2~sb69}%)--9)k*tcTB+
z<aBQ3#z!L4z8Wj4&dW!f6XsaZ`a?|`mCZFBe)ES3hoH}u`x{GZr3SzLD0P32yHrbE
zmI)-kY5BVry&TMQtmS~S^q#8V*dSC!#2fScx0K~G>)!#-PWQ7^$**9vN0T{$R^I=9
zipmi0VqW^appjA3sR6Q4H#%7#53iar9e#x6u7C6=y-Jv`nmVL#i6dAGa)8v|X#P#;
zJMIb}_QLE^5ta04uZGL<A01(CK)z*1m!eK$+cIWjRTAf)5ipiWO8pmqvn)6am{VPl
zmCYt`*B#fJ{CBzK6-7IZ&|Uo%c3`(_1e|8WXoSX_oVt~{{Z?!!A_AO9J;?dpBhgM!
zN51MWa=j=e3qPDcNa2wA8i)q{8VPeTYE?}>exM$TNXE_9C!V)Ms?N43e}VSTAdd6p
z^C+a=>1up+^w`WQih-lv6&*ZDc5hC+pVp~au<RSzT`?Tbp1rS5f72lE2mT*NXTcU#
z*M;F3O1c{*q)S2p=~C(L?(S}8q#L9=q`O-h>27JHq`QZiZ{F`8%ypeJXRp21z3wM0
z=_sExn&8Q5SD-#d&%f3FyjZRDC1L0y$f(zWN{q_!<;2<V%Nj0W)q8*}SZW9B@<5O8
z-y&52Vasmz{OP}BAKi%sCrc8kO4+7I#vLk0*e5vZN+?zBHcS8daoeHwDK8~j|8YYy
z1NQBo`}y<H7slQsq6yZ%)yoxY_EI$O+ZR@-(T)6rK4$+XBiakb;pjgsj1`)1scMjp
zF@67B+Iw-Su;L{ffCeu;PNvAgtQ?3n&^F&>9j$=FzqZLDSAl<OxjY?gE*~E$nwFZr
zIx}KN@((x#KLk>4!i)kwQ6b0k2VD8Dtzs8YZIdtCGTeGh8Dxbc$qsnX<DD+p^q6lA
zho6Na*umlhAzZJwR;`iv@zk=moyN1gpEW|=h#pfCjj0mHS>omGBtu5%4<}~(U=~&1
zbb^bHfYaNz|HZB)P3H=CO3TXm9OigYL&E9632BLM!0x$?0v;)6nW+6RX>S=HLdZa6
zTsl!(y?(cU2HeUTny|y2{i(ZiSB^s$VSsTHABTa!d$d;IMeQ!$POL;GO501uqkz|{
zk4ixtgT8lH%z`YNvXHGRdY|3$^zX3YhFLex{cFk~)0OwsM74q~OwQ*!MNFJ;+Rdo2
zmK&afy$D%N&V@ZPygz7gOIQA)z+SXPmuLF)_X_|{w0g~X>t(&gEdVx)H5ttzeb)}5
zkrvAb|J*UrKnL?6_ls6Wi{RO`i~dwSW@k`}I_&KbosWSwVyT@w(GER_yMi*WK(sz*
zbWT7;=Wh(A+jCD&PwHn4T`f->(9V5OZU93;o&uyRuJv!@Xs$ZSPq4z80y>_H*@1r*
z-Q+GrFoDSRV-Lf;5?K$3@efeW%g9H$!?WJ6x2iR5C~10@#-({`F}lR(GEm-hDa18f
zrrM;$z)MM9{k!smU;PvNZ7wTiEww=HWy&=L!q;2VKCe6fj9Dd^%D>W!DaL@{H`_Qj
zgl=9rk<g}qtW%t^e(XU(Va1?pJ-I!R(-j!*1uPb4xZUgH`F4yWEslN}r?B#b@IXeq
zu_i5MZ!N)|OO*~uvI-R$L<z67hy?)b5rcl0q7@=@LB1|3K!+D`35Wob#T5{|5>qrI
zyG}2oJ<xzJwk>^;0peQH{M8UE^oWx>A#2jvIg|P;I@lJ&;Wy&Xk6?=h+*U_--lK|M
z)Bj#%nnHsUv4Vj6(dJIctF<IjeNrZL?6ip3h^1WnGh16@Z)FTmJxyp*67=1G8H=pq
z@AP{^{K9J?<&p<mkv}n#;i=-8`;nGx_wC$7B`B=BY=f}ErBka=Ez48uJ_?c;eZ9De
zS5Mw08=`*Giu2YwNOfa2)hbe+Dn{PYW7u+E2kKJtg~8*Z_3W>M>Z^P@{R+4uRGDqR
zbQ_++ku*cq@H)+#y(k)jR&-J*cxUWwAV;nz0=uDa7HXe*a$2p8|Ksw7lEKI5e1Q92
zD3wi1uzjP|F3(Tp*MU_^3*(`!m5(rWBVqmSB>i3x{A{<KLs%8amYCp|(_zel7OOHX
zoFM(sLJir<_Xg~JWqPIdgy{E39jMz>j~?(jk5^yOMNl%{6`IU7fRi^*TDQKh*$YMS
zN3u|E11sL@YX#yagi+YMSiiU|XkRRpGcXm4WimaO%!a3l7HUEN=KWi}b9f&uxtK}y
zC^5=a`0|&$IO>eBg}7qMIG0h8Cc0m%LAe@oZoLhAGCdq1L`YBy!wi}Jk#`A)-K5G@
zb>R%K1b#2PdmiqFivTu18EJ3%AUS-P<BJN%w0D?R6e9tn3<&aMY{A>{O5T8M0S^{<
zfIRgi;C!@$K^ln^uw)u2hl0#XnauxXWJu>@&e*B(u>1Ae1xifYZX6+mR7+|;e|*fz
zq}T&UPrzq;Z<Q}8GS$!|g?3wv!F?>}us9t~{!p@8O@P}#QN0#-xB~qxOp=WJ<-Hgh
z8t@0sws%Jr9o59z>G$HU+Qe*DnD_k4(ZUfBmCMb;5P*tw(f}!VcMG&A?jWh9<pWeP
z!=40t^5aFr_+niS4F{ia#ndvZvfUSjib6oYe)MWKoHJU0wXloMyy?r^G`(^5X>kN#
z%tiygzR&u*5RQ?=DCcT0$9D;uiztzI#vNOq0_OvhCbh+LDv^Sc*JVDAd=ko-{Crl@
zmck@j2q|A~7|6`vOLJ*A3O`l3pgg&ljZcdb+cbr=#QJw9yJ&18s5;f3HFgx82agGB
z4K@1bu&~Nre|^P@k?MambaLSM*N@YB1bgMUn5#FNr7+&8hf*^^d)0h@fl7nF#aEnC
z_0Jn(?AV0runnpg?vO^6{CZb}076w#v*5w;!NNG1&tJycGrRCPhB{q6m<+-q?VB5a
zXt~(RDWl+qINRX<D*b`vtWMf1M;!v7rh>;6t8X4R{TF47Zq6<^qFny60|anl$3ohu
z$J@YRW`&-tdjM$*zySYfY^%In<?MJb7=tutLX~$yT<DT|TC{4$fKSD-;`}E<FYydR
zzFpo|N2M|T_6=E1j(RmBWr9wFxIZu2&u?$_F&fIjtNwDHG4h=sCE}%R+;+?H2scV{
z&ZezD2gWSm6H&Wi#4J0MC72+Ge1LfAe%IZHES1wQvu)v`eBuK@7jm3xZ?A^+#2=-G
zKsj~CKxOX9lcGofv-Zsoya3+u52*unWZ*?My>D1Ee`yeD6;nase)l}Ea@1CNLvOu#
zh*tN${&hx)zur^VPUTZ~@{fk2CQ2y}>3Az~1BexWY1@R=xC(ojo*_pG6^57sOox<V
z|7yQIVs~GI>Re>(b#;r+%YYrv$LM(?Ac<%@qpTv1dck&mi{b2EQRA@lAz^ZRK7D%<
ze=GcZLj!8Y?^cn*EIyG!Eu*!9T7Mt6vwG-10)He3QvKoy8`Vnrd<7EbYuz}sPx9=J
zuVy~WseeBYi$LZaLKX;@f2kQ}*&AjmXQoCa3NXz~2#wjhY%726wYnC4|Cpyw1oj8^
z7f78V?3Tr#e@m8~rUY=7y-$Ab55+hJifustFF9U}NLo_7#V}O;RRyPBaTuxNkZ=DR
zdnuWBEJOp#F4o%#YLF3w?c~cG{vbrJ(l7_62MwZKrUN7p_1QmHriZN=)!v0M^SAo4
zP_G~=fjIUXg~J(Z;%(Rrxs;m<v8`M-Hj;H@2tqPLm`}M#GnBfNV)+b5uqVAlFo+Nc
z31yM#HXdKM(Ot}pN)=zInFvja)i?l5G|NZm`da9{L2*i#wO#7(ALF;$J{giED6aoV
zJfhbCyEw(1CrU<bEvJL#oWbJAP;CUx!KcGIpeNw>b;1hcer(qSEK)g3bcqud3vhf8
z9GP#pO+R5gTQ!p)7{B2*X}*+Wu+mxPk+Ei}i=wi;@ll{eQ2<v`q^nPxKYN!v91rDX
zjglNguSG~t^lrivi%&{hLo%|wQYc4YB1ZymhVHyQIHHx`S2eNT@H;rsUL9(Z!g7=p
zXZ$Wi#f>0&%Wn=B#DJ7PX^`U!>8#;o9;zMgrkZ!Da7+VeN|K$OlItUA;+o3xLj`2A
z>Akvk!Q3;C+>TY_&tye<8;9TI2D|-t$=YFfn;&cG8{@H=p;YnkWGRT}HZz=i75UD{
zBUGqRe(P@r{}dOpz|Hk#9$FGLksVaa<Pr005ovH@<*K0s9q>JWvcvbzHeGebwNDjp
zZ!^UYNqa|tP{S3om-^Y5vE(=P&pb6k_yZEIZ^o)wrTu9s#6Z?f*Azk*Kdh&K9^uGM
z!o9_sKk!9?T8b#a-LX0OG-TGx^?6<1&-9`529uU&x{ZNr*}-vWPfGB{4{p(r-$c~}
zy3k}o8TJ`GeWu{IqgcSgqJowrg8EH2_M9xRLR*YV8n-Qt?z^6zi|aRB$$I^UAw^p@
z`eJiXa3Dh~%p{cn3g0c=0dRGTBCvX+F%gW(oLYC?h~krowA4KH?D_U6Wnbz@03-C(
z{Wnt8A5@Zf)q*Qg^4(74D{CvLYWggo4)S?-<SKe8<l#4Ni%OlA+EAlf)<y5C&a+$S
zJiUNn=Pfr8_4#VD422MI$)RLR7iOkYr*gT~@ezH$m3au$Y37PCw|LTw5uSTI``DVh
zPKEo-lNK>`2mj~@NQZc-5x~R~^!W<jo-Ox0nOv%?p(J#THK0-wO;!*_6j)1+Nq$WJ
z()6Sxxe%6}pt(j<p`D_BlCn_>88dW;Gg%D4rVwZAOA2o(qfSo+W412;r-rgC_1JQF
zldCUa_(||A2@YW(&!xYE(0usH3e<0N20F6(=)&u(e{c<hmm1cnoGO0rX@654!4`PD
z*o<?{q`R3xgzS>5&ok@?Dw&vh_i_xa+%5{64p<V!|7`3FBsO9pApQJzo?d#k`VyCI
zT&c%}dh<B($h>>c8XrR8A2x1oZ{WX=yA}_EWFQO`uj3|4)wBSB*Mtib>2l6*s$77w
zWrZFmfJ_M6Un7Z^h)jT9j$C%~<rjy=Q#=B6yDvG2t@)m@7QjmnV?euH`V3=NYXCd!
zKN%Sr#c!bJi`U~W=k<rF=~pPfK<$k@4vF=-6{@b6er|obmPQuC%4<n|^zYc&kRxck
zZo=uwV|;aKPHoQpijjTHqZ(?q=SSVM6!G*<|7CxlVSdJ3f$--6xn3>opO@8}CqBpU
z<ByGhaDPoHW_vqkQw=v5Uq4e|bhui;ZYUnGD1q`p4jObKrwz97_c3KV2f>##^1n)V
zZ27}%^_rlMtK?3Jwy0m3ihgUDzQH%-m-G-`tzt6S+{}N%0^=w8Dt~SFv!ZI7mMA5|
zG2mifJ2NCqG`NH0O)nE|sJ>0%?5AwZ2atb%rLJFi)js)Ylhc1US!+Ia!gWn|y<HvL
zwZs~Xd4y<I5h6}A)4H!Ie_^_Y9Na*8N_TS+HH@WZ=k;G0*MMX!4rwf3JO86p?=;@X
z)L^bTuB;HLW?ZM?FHOl<BluvemyGN@a0c@D93a|wTN)KL_S~P60HGz<O?U}`*us|R
zWWgr3sHRi5Lay<V_Q#e08a3wr&;k~LKV}rzpb}Zy#O+GZ0=Gz$5Th#Wk7YxHeWE2K
zFEpUh-UkizsATq0<`<Fo<c$%sA;yYkE{rJNt-tv3&;nVS>#(Ux(oxF8yCb6Lg%%V*
z{QUd0M(nt#{FsuVpIc=fJ!LszwD&XL&l*?9uJR=et5Mr#ty<wlfj!Uktv}sw#xw6%
zgKR*W?VLL>8<_tgrNc(&SHU%|(qVgl7+<`wAPXRuI^eRZonxqn>KD3Gb{7YOt~ipA
z#8a3i3?3atJv$%|d034X!3O1jew`OcNgg|n)r9iK+<}N$XOZMUI$3`+i)&!aT-0gT
zxh&rn!r1tKK8pT^h0nu<CFbwF0Zig(&hVBJJ0j_IMFAmoOjQ|@^23G=Xu%CoCH+<o
zXiWs5G|~<Mj=&jOodLl1l<UC+8D&N)wIA;XGLkiAWB?6drmYemd~!S2UdSAGBdtN8
zPv&m*UH5AZJB=%u^w}Me*i~c{%YkY5SCr({X?o*UOVtm&i4<ou0rz+cDah4k9R?fV
zOZS+TfXRgzWk8$x`RV$*jHUYYo#v}G9b92DTRVUFK1AdvN25wO6ldKt`jYyi1HI6o
zS`xb(#R7nYHfW=(8d!KkucwYb6P2;GlKiArImQ;uJem78G@mX8O#*F3RW_42V^lH|
zwrDYaaD{$jv^Mc4S<h@U)`?WaP|we{_vq#Bu;c2}J>PKB3;Ut;4P~0p_%kode&h;I
zIoF3Y#C&M1#=WDkV5NNilJ_W+P*IHd%LL=xwbzvX+OZ$k7Pj4(X+43bBn?V|sAi9H
zg7DrOTnX!;$?6>B09d%&vr!Lsp;?wZjzs2X!duRY|9Ze5`a6DV`{bOEz-0<AwSoMm
zcYnK5izce_-DOpHWo+j&3C&)sC8+5uG^PO>%8Xb)&DS@!EAKL$zg&u50z(4e$DUvz
zCS83_;y-E)pxD!`zr{5`%v+4DYl$eyuHZjgocmllE*K69M&J{}yF_`@i`TV;Eox}J
zb!O)FsEjO*WIQ7cZi2zM3XUmJK~3OFJ1G@1g^NOZ>sCOW2?Fu$stkudu&!eRg46`E
z1sj*+rjUtY+?F5XS(o7r%PhX`U6G0LwWqDOY&!GA{Ft5QjSCHtcb*sWJT)gH){>v`
z9E{C_w}-;N=+cW3Tv}gudoZ9P^%;=9S~mkUK8vBLq+yv4P+wjoRCRLQi^_jE6d)sD
zOOc16mzkm!&zO!(utg*L5WInR^7E!?rhOwd(_#2#vR|LllM#HsUf=F|>`o+d@f#Wx
z@;IWMd9D*-<bb=DI%Xc-r`x-05Yp9HE~J*IF9Q^nD3VtHEc%SEMFemwW!{=O-ma<7
zq`NU(g!p)wk{P3+L4&rZUE+E-sO(}4U<0KAGQjPD)AZRp3bzh#7jf7zthw_<gcD64
zdWF^xsoHfD^njmtJjgdlC!-zsu>bpxrnjCfq@UB|8dY`k+SI+HUQimpr6e@cq!q;O
zTfI?>WMHY%P%#*L)e)(@$qPxtxkcRuWg!}?e+ug&pE7}uZs5`({1Ii&qJBg;F~nZ~
zp(6dL3EzX~BC4q7ST`3Tl10qhMr8os0#JFgK^WX3%*BJf%9Xh_#kg$zGgsZ_y<F(h
z0*@=V&AYi`16(E^K#aH7*pNpvabu3gpVP9nAGVNc6SmH2p_=64#i#FyTbh*$!jyjU
z6>wLn#<;sGh@z1|95*9glLBHR%0n6x9ge_PQ2s*faeS+cCWL~}!EoU>lm6O;->bta
z+WQGdyxfVh1z2tQaaSbcFO$uI8SKquLzPyFd49Yv^rYT&R3`vx1y)1k6Vwz;i1|f~
zoL^67q{D=cl8j=<Kw}g1ZU3;C-5@C9glncK@7^JmOQ+TMVRXF>hh;6vB{Te7fcKE2
z@Hpc`k$OplS=Dmxv2IOA<1|A+)H^>BuTu;(Xp2c?4_OF|`f*xWXZH^iq*P?()v?8~
zt%G>Fe?1U(i18fWde9WZdAM_pvqN<#iMIl$Lf>R;7yqPTIcb2xZa3C$so-@AE)<c>
zFzF_-h{sl%$P$I%C$BXsIi`4{*<iVZI^pwfOKSZh-A{t}(qF`ECW=x>u=uDaJzZpg
zkrW%Q?`+6F>JS+?tH5ASiv-{%ye_xF2DHISi%#Z?E9Z0na~I{qXMcSv(uzPVq9%>c
zF^@!(<{9g{3H#hiR@y@gtZ<{6q+s+@^$M>KZd#f3aS;MOav5=$GNRu7VRR>1ChBg+
zk3kMy*~$dv`It``nSaIG=u1Y0rBOo1ZAGKWM6zg@E)CNUSm<h|`-HWkD=Ms#-<%JR
zAQPcjQsE=Sy|j3VC}2nd$M#Egl0$!sWMNjo(^0||0g#A4%M+XAjXhIH-(@`l0UAiX
zUHq2ZIHq{z&Xba(h@2FvHliM|L!_bx67hyXOyra=Psg)UpazBn|7VfI9yNr=#)a{|
zv&lEvgECJOkwq-sE(JoKmTu6&_jEe&?ISkd-1^M+z>4O|ORKBh#Q@FT>1jtYBl+be
zJMB>Aae*-()Fc<xMJL<5`72e42BD8O)m=&VMZcWXs%7o{vfa`#^z?9t3MruWp8nZc
z({n=yk&liXI<Vl*d><QGQiY~(-FNr?iG;WaJ=N$4l;9_{Wtd_PHp@;IAJ5+M-{$-O
z`tT1Q^b4x}LK)rqld{=hW2RNedD=;jHfXClBT{M&km-g}v9_Tqtshv@J2VR2VmcQ-
zO_#8#GkOhxdom4L{+KNFi1O}J0p|8&DL<f$6}Th`w^*PwoHJblz%9p+r<=Y2WZf!t
zWU>4Wfq-XsRFm&Ztn|PI7J$#{w;6FteV)EAKhZed3*Cq@^PI~OSUVgJHIYxF?&o4y
zTfgoh`K~Flk1UJ1a|aSZ|2Pl01_HOoq$gckB%N+EE|$-4dS^lC{BndcDBvh|KB>B}
zNBLyiOpfz8q_V7+D6`$;R6)^`+keso0e@5jamFdcZV-EE#fX5>zdh@`9i2d_q2s7&
z@)>jo<|dJ#WPHvaVL&EP46@Aj%ZXu5*q=<x9R5zqfq;V^WLrx}FpDIs$iG(M_(`}T
z$Z9^m(}=;9nB!-;n37t7)(9)+I(aYDaNV~rY3*L{HmS0CBzCB&a+leRshDd1^F{{^
zbW^X$`*S<+&_wGbs{<{`Z|REz3Gii2nh?_J6q`3&?b8D6gJrm|`}`E@d8vjS3D2$k
zWg;ai{_|k8F)9Pp(}d;ztA!iouowsYI&EcS2VV*YsR#kTn?)gVV~f?jdE?_HXEgi8
z8T6Gfdt!E|ivb*c&(bl$0zJN!PB^e-sQktAi@BltO*-sLSZ(XS?QPJ8Y}8wLP%gt4
zDI&lbtq%a6#-c~|t)0bR%jpEJHT?hFRL0=>&SHU-C<O2e4qKWW!uKL2@x`)HaKHkX
z)ZZN)myf5UL(2GlMwKKiMbU0{R2%Bh)K5$6-Q^iSua%6KAa~jKvH38lK9oLuHW5Pq
z8&a~W$nrpwdQ}wrT>tNin5l7k9Qbg&PQLxerARVsI2Y4_+Uz6JTd(PJFV^KJYyn(&
zeiT}24}3CaOav4fECQ?GUCQ8wH8{&NG`2V1r#+STTUYc3QXhb->6kWz9w8)+IqV!y
z{vQVDXx}P%+wg=<NR7tGrQRy+KTjln#^{d&xh=*tH)uWR4XNkMkRO^mn0W=PRa}7h
zMqeO}HjtpeHKtG3b_ELU%^#s6f7S-OLF%vFmcrOUu(!mptldK^_)W9Fzurt~{$MDn
zJchKL>!JfH*TMRse>|bf+H!Y2bJnZdFHPk6pJnT_mq2nzvviWWuGiHouKugh)2irf
zxb(f%fvUMzbWwz2if|ZBtt7cGF)~Ew!v#r-V-~ZzLdrs86AM|fzUlk=Jd~xD{=oMJ
zSn=r4;U3#psqT*<h^{~K)`Oyvz?}I%F{#a&)llkgSBTPB)!<`J>59I&3-}=WQ}-`(
zH^8sUehm1pU2ApPN)_}vuTumt0Zp+1%CS|hRt~*9^$a=<nBslv?KN#YW9F_(Q%Z;R
zLA98PF<?iYl!MHpu07D|VdP^Wv43LyuSb;1LuwR3S+lpQ-v#F5HU*cdUFT%F4F;LR
zmBB6;60v*}8CIrZ3<{sxa|DES|L$P{3VMAJ<#+FvCI6YZ?G9QW`E$1k?*7J!0<){n
zw@GCj)-fokw#qg?m(qPlk7MKTiMbKwwK{CVQ>Jo;x|zL2{x_IK#AU{q8)uY)znb>W
z*bLI0r>S?qzX+0tii`(yKJ15HHaIR0AG?KAscM7VM$z#rR%hMLZYo4htPGvBsD(tF
zxa10qL|^MBwa>bXn82-axRBPu-&Do@LEE5HbKLi)UUyCw;qNE!L^T=W4c|Y<{QR0I
z;krt}CtBR&_iFh62UsNY7aXtU@(O#J7A=5XHp8MFxBkem#;}|))$p*m;uL=|`k;W}
z9^TMcUIib~!U_hB@S&12V&r2TJeV>W`F7U7AK3G1;(y5VgWozrIEq#3`QP>?Ic{|9
z`*y>vB(LQC6*PL~cX>jE7fnk4T?5e&gc#=nnBR!y|3^5VO2HH6DzzF*;MctQ^~fzQ
zni5JtM{m|c&Sbri1*Vy^E}x?@8na+g)Dd0*^QH6)_ey^Mr(_dX<m2m%&PgRdQHzOU
zuM%AvV|v@!0BnEqF$4tB&!yT7)XJ~=6HKg?TSG%vbvR%r5g6N66k;@Xe%4hBAwB#l
zzMsQ{c6_Eaq}rCN<~S)zX!J&sp-@J8>&F6SZj+lL(Q|iFN1BjCYeLPg0>EQXjdmp8
z*g+cwYy4yHIbj(Lni*lrx^Pg&r0}grIOa56rF2>7wf_eSf9)%GOODS6TrVfr_5PiU
zjalH|3z26a*Xie9F_%t`jC7(!D6poJ{Vz89Nsn9Kqr?oJxh{f{r^&Kkws7_#EiI1W
zdw~&G!#1{8qVNFESP1rLVxQ>9^F<gji}b{Bph}z@>qJH+m9PvIBu~k+5>zVrwRiCN
zxkvVI&wqPs$xofFM9YMw^At^)^7xM$GzkOKybb$C4>sANrTzeN-UL<v9aKdk!JTeQ
zsl;xC<p!p_4f7^5eI5XMBL;!Fz=Fx2-hjsx;T_QMt0p1wgCogPc&<ogAGvg}%B87<
zTxgnyzfG_l4DLvdNGJKGKkhGLwFd;mq65US0d^+AVU0ka_3YaC-!?Ou)&isRLN1z&
zEMkmyuNANJS!|MJQ&;uncVhA|>`RT|d9A6#tQLLHEW%*McfA>H6x^PnK%agCEX48c
zuCEWfT)Y@=Z)u6YPfjK^!Q%qjcyJ={Bpm_ovf4>1U;b59&5P4q(hMHc$}s;8RjKvz
z^Aps@Ue@Hsy?E)hL66I~+5J-2F+p<U?A*P=@aJ2ix;MHI(O)(U>c5DBu#ciV681-b
zfd`;Au3XP%`<M#(I^#Ob_-x}1Ye|MyW)!NiqEP|&@9lJm$%8$TPGLrFlZT_8y6kVC
zOzH^+H=LG8)12n9yj_i^=6+jZ-Z|NDv<7wQ@s*SFg>K<wK)gI<nbL=lr-vSdP|*+O
zj#gzYCM?_^j9>%zK`rM2i50VAf1G1@<E2{HT48J1xgz|fAn3Fg2M)Z`ElzE0HYw9+
zv<w=2>A5egq*()%Bx`?kCeE23NVw8dK`jH<>cIFKwy9X>F?TxfPvXPI^cSAkuqvU!
zlQ8GYzQZ0tb#J_Pgb*c_f?$8EM}s5;F=NcoVVW^O+%;Z*@h}hQqc`vu9LVJ?8?zSr
zN)MRDwfkVQkfWIo-~FTfYke2nh`_-ra&-+CVeb7w+u2&`K<cIJ^KcGV`Nn5-y3Reh
zN?C&NiQ@a8NuiHB<qHYKc0LVD7{aCaO_h3|CRzXSf{3YgDkJ;d>y2w?HAR-=#!q<<
znQHbUL(2f)>d&GbI(-!hEQbws1bwj27uNFva%TWi%a%XSs+GzZS&!UTq*;-Te+=;w
z0y62G*)JM9zH!KV`jAU#1F8yXD!FmQ6ILmSfDaO>x9o|MoG^pV_kR9{@>5T0!kD(T
z;L(+e=Jzq9U?Gglh*;MkVJ3U*$jU2~r`M1X7J;|M3R`Y41&%NSr2CZ|lUKRi;s6n!
zaGT%Na0NemtVi>c-%~&ho#}H|eY5sD20XUrF8q?}mp-q`)J4XnBhETle`-8`i6!05
zQwO%=5;hamSsp_zYZKP)?oXI4uX(+^l|G-N^z6HEw4KxerM3DUNI2c<<TMX@gddWf
zqMZa!NfPQZGFI#`lr3@;vp>sU8iP5c1d~HRDk{n$Aa(_c%#;z@qQ5-~*q!!Jxn}IQ
zt`H9Hp7JHo-GLZs6ZX;b-3g66`6m|8<9fRrO^mNUj_YCJ_Y^hK-VYdVrEiMiT>x4e
z(*NeqZ3rB-sle<>%O`peJX5p33j7A#(PwOm<i+?Mp%<L@<-J9sWdz-q&3B~;kw=1p
zgufS}U}{6wzI1}+jQQ$s^0%eT`_)IhkDVRoTNgv#^B8AwejdO~X_v)EO)iK$vQQe6
zKs1(oQ4?eE#PqZe13Eei;St9JCJwyH%U3w7(L9uwj<)fHfW`r{r?1;8BXr%a+F*H~
zY<%$Y!H^mauP<WcY=XK?6H3A0!BUrAfiIxK;q4wA6R0NWz2Ji-S+E3u(lD0x=u*~$
zT6Hpv%=s<c4D!<PkX+|4==dJ<*#2p@-<7v%;638^F=(u*^o_k?U|K$eUazWcc?swp
z-wh-h9AW}25v?6WkF?vd*0+Q#Z+pDV2qB{HmM?u)dg}3zVX-<*&uME8&3d(6x4kE#
zgl5VDArLYN8qBlDdMo(Jl=3Nv$1TK9@CNJoq(|Ia%*aXLoxbkk9hD3eIso@6t$DRf
zPM8jC{HY^Ch1hN=)j$dn1O_2DmMsv;AH4Gk;64^0el6;v_0K4QT0H8jiD&pfRK=VU
z2oW{1FT^yigDx+I*`or?i?6(JV*4V{;nADkT0Pfetk>7$99{>_497G8J8Jnd6;>k4
z-;X~VEUN~`{oqciIwLt?7^q_Kd%lY)+c$b}$;BOl<D51NsjXPqr|A-EwH<m^PqnmR
zU!=ukmgPUBy2xv(1-}D=k$v0oM6o@`z}|VxZ<yMFGG>9Sw232zH>h7)xjsxgk3FhV
zJ!3K&CzQVQfAH-2o=YtHHk#4Zv9KQ0_F(`YX*`A@j;1hlU$ZAi;2r1O=_|mrZVZr4
z!v>(nl9k`XUilbWUg4Hm@@=(O&9X0%|B@i&i>$L<n1|Yda>a(6MS5H~XIAiMm|M@>
z=kR-On8EPAuNC`G+iK3k=)<%pmzC)G#w!n0bMB~1`$mbvTeHqxp}kL(eix>1-YP8|
zYQ#@(`mSsEk*rt^9I0UN42)PmeOyl9h9PKz`8X85iql>yilg;hsf8m@;`C9a_E-G&
zt}Upw?W&v_wR;L!f~|niKW?Y%0r+|P1<a_Zhdtg(U?dad`H#Vguk-(iuOtBv0c&#u
zSpE4akI8<P(^kHg(F;>02K|*k(i*ls@)Zq-5X&Qu%nVgbFBLp-?-A6LBIi&tJ>si&
z2N>8bf!KE4!0L8jS4TsfRz}DQiKXGzLGB^4yKZJ?W>;`@_u5)MMdi^e_JOYCq~O1O
z>vGm8+hRlCqvB8*gpvQcLX*}R>w4GGF}Pr7)l6J|w0wDoJ=5lpiA9y`ByL(xmR?z4
zD?&f<#poWo<7`oDknc^A5vj2p!Sec1Y9NPKhq>6d%lMcCd|0Lg#NEw2PqL<17E1P^
zC*Xs^B03tzrd^tv)w`(S_oOi??{Lw#dj(0fjLOfO)k=e<0U|RCcG>_i`7hwT279l9
zAJ=+RHnQJ{P70r7>@5LfF2>y)7tGANm$7jF&a$F|t2Fo%?9Pw<Mq~t1%WrMErue~A
zQsIHw^VbtUCl5cyGJ+HBHxJY52E<v4v8TpJ#-0L)p05qRYJJ-lNq8U9KD{^Wtqt)u
zn2yqr!#R5J4cMQ<9)?S?U-kHcT(!&@_2XBdW?9oS^}Ew_z-(8lxehH|sL**UR5Dpy
z8}m7@loQc=g4&(oc9LaX8lArvySfCAJyk34D?C^F-SNUv{tu7WExEZd(hC?@*S+pU
zqlMW;^tkALyO_+syh+(0QEb_L?Y|sp4`$8JIPDN4`P(!W<b-QC*<S<Gn;z4!fhZV>
zI%iO-$S^#&OXy+U?TIuIQ1Pbs5t}-XoDEi>(gqt|+qgp!5T=Hc0Z~Xh20z25y6&X)
zxdKrW0mmZhz&fN8vfhtXHgz>Nu@ftftGI7$m<x1Y92)3}vsG+{r+jO$_ti%F#k@)n
zf(G|itZ9SC=@i6L0|i>jCR9g&u#TS3=2uqI9w2O_q&Hrxfewc1oFmy6tscX3D^qe&
z^+-?Q%!<nC#e0^xityeKb5C&ua55Gf-m%ONaR%(Ysar(2*WgtqOeeE^^B1WMsn=Az
z;xW2-K5Tmx(*hu54;V5{pVS;_>TV8TG}kktqq%dL23gh9|9OBM(p|bzepI;9ZgwCd
zk%)b%-<^0}KmaYuL~C@Gnkglr8QohoXPSsEt!DmC-HC>hng?dsh>i+06S65Sk%C0~
zCIg#p;TcySyUm=NVQdFAq{tzYSbJ68yxOmY;c1>aNQn1^F1vd7Y`Q0+LwJ%{j&GdO
znI4BZ5hwWz2jbCh=pb|xxT~XFlOLatrbD3dUY+>)^L+)c@S}ATCK`An`5ewyt8|x#
zc<5zhaL*Gc+VN02@%m@32M?d`&3B_NU_?@o@kp#*=#D9kqeQI*l+n|&jp&^vWb6Hi
zSaKSzix@xyunDK~>ZUa{cO9Wd6!3?biK+|uRRu3U_yGw>m+<X;clZn>>`m{{d?}*#
z()i`{$iE7bj?BHKsJ%9Mk?-yjmX<2JpRWafKYY_UNdq4w^Xu@^&407y<agm=7hCQe
z8A>WKUTX^-8ZW=}F#1rp{?UqaB#tJ^L~##Xf8SSP<h4bFXcr)hTt^?z4aE>g){B_c
zr}I+bdfl(T8!XAeM*GevZh{sMn8eAqKs<Tu_C&P!IQ%oshMzyY4AmvGfUFoXo(h!8
zD>Z&N!~_6l7o6%|p+9~?m5q0U@;Rkb+Cw|<Z}W7u=`$~<m@n>C^6&0k!qJzw&$24t
z=_9}Ku10J&wvh?j&uWbQr^aSx;iYuC8lR^cD!$^;chB3t)l_RCJ!jx{$N8t5NdMJL
zwMWH1JqLB^es?$zJE-*yJjWt|w@haA{peZnB4XkUY-wq-%%7CV(EH)XHV&nw3X@GO
zOlP@I1|UnxlRTEu_1ZFXcM`C1bo*F7^$g2*-ISu*D*TU&H+b=u1G3fc5~q?l!<Q94
z4qdsmTsm1NiZZ?Vv@1%8*!Y;SOv!?doO6w10j03$SLuc2CnJLG_NU;g^2g_qh{PYC
zvi;$b*>$?>3L9j=socd-a2It>s_HcSGyBIrSz7r!TZ-)wTx)iVuf){O&$VtNHLeq;
zwYJtOTEv)JE65eZ**Ur|iq)i=Zz<+j)isF)%$`#j<9g!h&CYdtmX#cH`8G#d>57n1
z_hZ5vUy%bdnEv8p8iCgC?g1%%JZm7)i?;qDUY2WIxd$~2s2}_5+5}xaSnto!mGYXu
z)gb}-x9C_k(}f<hQpNBP`#=Dgj|ph}yN{6&$$8g4iRkgCT!{t6SK9(1lR`#?V$!!Z
zsobA}iEpYC2(kKC{arZr(6@6`binxbBSg61cfxZ|7v7q1{~GCm+F7YXTG?oH+LbxQ
zfD_Tr`}Z#S@?!xdZf`$N#ALiHyE9|LQNLKc_FPtM-rD?gD`MeWvj>9k=*N!>soMzE
ze#-4o8eANx4&>_QTe71LbK804=XG==lkzZ<fEwt2OLC3+l9EW9`!R0|i;~?J%DILq
zgY1yr7hDZvhI(h$axbTwmQ=eq>ObyazpmBV$K0gD`_9AS-b(Od>0~I_@)dMboppO`
zr`4G7cGERdO?>x&mP6g^l|~ODss4;>0K5C>H$uHBDm@@+8#71#yCBM#^hPol=5vVG
z2TO2BIMtWT-Qg;G^Mf=%9U%lBZE?oO%Zv0)9haq&1_uo1CJoV@E&l5?Fn5LgxyA|l
zf48>GVN_61upoEe2{Pn_Dyydt-%He<%s-p3kYMr!hMa}wiDqO|p8!cL^3}P1qAAlJ
zcGx0cEqQhi`%fTz&WKaMb1Z(HaELNI689fct<kzVBhUReC41g?`t5|Q2;tWMBFw2V
z*)zfugzeZ95D_W!asmnH*C5!&rvx}NhEljSZopgAH0AFB2t8p4)<1ho-uG&Uq~J)@
z;SX<(WiKGf>8GgN+~fs+B@Pm)Dk>m;VG=7a?e(uGN(L6ltKlBRz0sPM;(-|;b~e9&
z11b>um_NqkPmlepFmau<qOilH_zKYawSJ`v``GSxIXxRZRs1j(h)GPlmBzPps42Ew
zgF}y}RrHu^AA2#XiTZBV%?;DJ;rh^Xo%Y9QSL)e4CkFMy;YuF91kB%1*6|q$<fjLd
z+kNi*jfbkAJsgl1$Rdt*343YGC9Ip@rD2MHIDUp5$BUq&1C-&Lmz<W0>SLK&@VT|A
ztLWSLL{MWgpV>$rKb2SqLyOV18!L`a`i)VKUxIu%1?(n4EBfjII}a5oT?`2J`V>=|
zkaGEMbAAMPc{{HZeFfMqLITh8c+tb<50Y>dnr2pb8sRF-ucnKx2g6+Q5&QC7V4<nl
zM<W!RAVLJ(!_frNFNpdnN-i(93=Qs7X}suWc6qExB3?if6pJ?9!t!~9h+RCxxtg<i
z9s`%JFrM=Sr2aI*LR;=`U(E5`fugR#-6BV@JUUA%n&g()#C2QQ>8U;P%;@5NPLhYf
zX-JWR$4@aP1l)Ubxi_l<6NpO6aanpl{ep_7aX!IxB$48T39gYb2d=0gw57f8lNCC1
zTv7b-g-~qmE(g+Ks?D33Fjed<vq&wB9|zw?N+ZpBcto<@&)3pisbbyI5X5!n;Nd<V
zOi-OBeZM=Ms6Pcl8EJQeYFZkvAGo_BA(XMh#0>-zU(Po=#UF;eXtW_RZt_BS`jGen
z%>9GN)4x5ZKFP>>PCs*2>h3<Y*PH1V;v?*^!hMNhLWqb?VfFcR3`ohZRj_B}Yd1d4
zDJHhbe?wR$@GC0ZJQJT;_u=0vU>Wu@d$uNcd*>7=?G)SJ6XvzgCt?$<@#8JruA>7}
z<DV#7Si>BJHVtlp(?NsK!$gqGBaSAFhN(2XNM6~n`TWT#3`BPY8<~+PcVqI)^}m*3
z|NdcG=tjN{%4$OdNLHrW!l1OESEK|;Bcyi$Ixc$zJaE7~35pDh^2MrPnm1w<W@HCs
z>w5W%9f7aDpFs=$qo2{f4E8s0Gwb!+9C)DNSj95J)ly2UrfW6z|D<;z#A<b0#V!u!
z{-gariAgi|I&yDk`>Pt@;I_=_d4pfGSr+%uoY`5f?$(L9(A)~g5!C?C32``ea(2Ys
zMC(P!o`Rza9E6R)y++u^DFE1R)Kdb{XSNm5Do?8RG@~>FnUmNi7A`eLhDJsILEhvG
zwxCtc5fIw7Y+)d5Bjf^^^^F<5sf!wdn7NYg4W4x!plNnR_0wB+n<egBzMF#;cIO5{
z@%m%q7Ow3VJsl;6F>T#tF4b}2BR7TyewnYQ-xj-u3@or3OZo9NP}#mg+TXFgj_gqp
zQC<07ZqvM;Pc?6P5|clid<n#qdB1I&@0qhW?jdxpL#73qED^>)7>m69koYpGsSEp)
z%wqO|l4rLs4{9H-7?%!dh5i;9^M%Xyar!&*7i%d8@2IYzu&1>iOc_{}GuhVFPxUFO
zhGwIU*Kq28;mVwl(ny%92-ui_XCEj>5lIJYY<Y5$A^-K~`f?x+c1022!xyhjtq&iL
z=(RdtS${&^%7d>H=1y8-RA-|EacRg^778)}%jR{<`~X$osrxE5Wg9i)`b8!ELLL5F
zM>~eh{oF(TlmplEqoHwowunT3<@zoE1(_=;aK04JeD}T4luDS%noW>yqr;1C(4A^0
zJc^t6oonIivtP70o<c@KZq~)u>7ApM>vp+wnqcJ*0Qn1@)TUWVISaCibieKoufYM%
z!q};Ae;Dr<7(Bx%nL2`G0DC-usgK%Q67@l>L>n(^d%!WbmKz3-xnJ?;_D0G6e05qz
z>dhU3uuYoS-A4<^1^yN^&eG*dddY#M+RAxPzSx4(pHe&-$1l|63+JpWH<yna&!r}W
zS9x{qkoe*V*biN*4_WWxp7mSL69QI7Gvl^y=8aXBixN-=5{pj7wTTm@DJ9>J#^P&D
z&wp^sx(Ye=GRt#TuH8SLM!Tx-bD>->_M>Uv+MtG*VZ!CCKHR#r^B!|=<Ku3`mtiY2
zy<gW#?xuNo$qfNoHK+AOR&Xqot_oS3V4L4lbQzY{?U&UJjm0^-&v{2X=;|{(@FF_T
zAyXC~1R5bI41#gWMZ$RHNX4EpDG~T}EGtNHpIM5}mv~@lCJtTAg<&w!m)qxX<@;@~
z!0S;)NxoxfUPU=_THqbr0Q+h8>pSb@%7&^%8xMH?W2IV|?DsTPBYC>YwtyE;Z<pb;
z^qnyY8_NH9_Uz!@LIm(|WN$fUKGOJ#|49il55GFEk|)bmo+YkJ4}WAu@Tabluco|3
zF5F7bX0GoJp7QQ5Obv{dM77r9R5V&g7N4s9_UQBCMtO&n5&xnUy0031Ky9~W<sLD;
zlhA+*1ogz*R(3kMl5RFB;oq(BDBE~2Zkp1=P>)0AsHJl!%D(UL=;AvRnzl13@He|0
z7$BU>s`o1UBQ*ogdK~d?vNG;rWu>(=9qCa2OD^mwJyzwgKVBCi^&~Qz%+c=mHyL$?
zy45!);?NCh_+Yb^T=BZjF<)do+7M`OtcfLJXM3e<8{_2a4{U|{pkAQnX+$jI>VJ=?
zXN6g!)T-V&Y|)gT^NB7+siZxKxe6xP$y4Fd5z`F8Y8oGv5?_Ri?nJ}pjZN9hv{l|r
zpy<YQf^NOsp659}=A6m^1%<x^akh1J<=hOPeiQY1C5HzMkw+3eM&=}P|60?(9(0a7
zq%!3N5#9e-%u{Uvk=#rdrGmy=S01No&7iGIWyGQZ+BYCR-}Fb2|I*LmDY!0a>n(z4
zE{6m_F;66x1+XJQszAfn7frsoz*LEiW6Nsgp_>DzD!<Obt6>Cd_{!jaHkvdN4lYAW
zP62CyVG^N<%+kC-U+~z8n)!A?Iude{nN-VolUfM_`U-x6tj;%{mWH>*C0Tg0uiv$>
zkRsoOt)HQNia+dnuTVl8+x@P7x~Sqor*oQI$)t;o$1yj}Fi+fXLa3(DTDa2JKWV4^
z=6Xy_ljK*7>FWWbWF#{kv!oDtuU5ETPMiR|PSW&7wJT&IG`MUf5&yTj9y7|-d=Ri+
za(gsju_OQsrg*Hs3|F_lDZ&JYOQ@sg`yVPw)1hYWbF0*s_%;a%uRxqANv6Gu1Uj8X
zXvVtQ#&^5l1~VIu@S$Vy3OB9NIE1$&WQ3|kM^JsO#-+);KCY-O4p75jS`&<V-Q|lU
ze$CN0Tx_P2^>eUX4qOI(TN>ZYlSQLGb1Tzzg6DLJ3I9^N@#DgzQK|ZH`Ncc$+N0!0
zcs+F3PiN5U{lrJK0<KvCv48VQ5#6wX92K;g9h1^<$(B7Tw*GN_*u{q4Ol!{#Mu+>8
z+MKe23DL~tYC;IqxJ&8Jb2L0b6XThD6B$Y+grMpOP;`WyA}y7L7<LcM<Q~9k(p@7H
z0+TOcZQaWQ%B1gp{S=qJPcx_Ug?mrZJU4b>%aS<*u&vWqy}IJz6v~L|y9+nP9aS+P
zrvSAX=MFh`e?u6}I9_spZscWpi~XO})4Bn?y)~i7*_&-XQ2}ZJ$a&oWS0Q8g+f;jx
zMji!KS`}N5q(y}W4x|b_x-5kR1G-rS^&iND$AN-}r(J2VlnlH5)@#^9xx`|5g<7dA
zu=IWtmiI8>F`2?jg+Jl3FK6_u`dl3bxC6}S0T%kp9oc>(_BQ@AHLBL^05hsPMYmRB
z^2&)IapEJ_NjQ^#Z93Q|d(VkSP@tKM*Kg|MfVM(Rm&uN23$(i<hh&!_jSVTtope=s
z<`CUim#Z!9CC8Q?Dwnzo-z!*3fx%?WB*th3DdZ;g%}JgWx3*$f(k+c{QRfevYuN8{
zJ*6EpqDk;`l#vBD5(@gBzGW-NlZLuKG_al~#~^z?To(?(LRs&1>J{n{=Ul+9Nm8pO
zlqpJGmYjUNohct$`s3ypAj4y;sVEk)2g(0fp7EO9c{YW&^#_parMK*a6*lze@gp+W
zv8qH(1-XQka)V8UZGdwVuAl6xVKwt_tHu;q1k%`txxa#TYPy=;3z)XiTLYNwY~D|-
zv5BD}&@zid?p+(M6AX^Djt<f~e@7(SX(%QB+U=3%7c!bJk@^k8%Ciq`=b~oTmLN|$
zInAs3*K@O6E*UO~{OelDDO@G^^6fu<SS=*_#{Km<upJ@w`27Po_4M<Iq{_i)GOXHv
zy_gh8jC-~f_iL{Z0Z}2MiC`^!7#8&#ld#!z_ZuWlDjWv42PMI7*a1g&e(SsN4B)C(
z0MoClA&PR_*t8`=6H0)p)5oJr&KWE~IKaY-{+%(i3d<W0ompd&EFr*xv+*zzCYiem
z9OXWTfOcbhG{zYSjLmg9z9&%6-=^d@nQDG@AzVxNq%l~7Bre1eN^U+!EH@fELt+<U
zDwH~1AFFRy^`sZ-$IThiF0bOegX3R0!{17zdnj2sh$mLOg_!;$_6YO)L`D*z?8A^u
zoxuhrQC{%Iwt)L*UgO&+W~%RGu=!qpI`t{XE!oZsUwzAZ9&gIQ(%~=|i+(J^Ud!1W
zaWI(TgQ{}7Mz;y|T|u%&va-j8q84`QM)_)5OP@6T%?%Uyt8eheZLamZ_w$F}QNH_C
zJ|r^UQA=+l&ITjQ4{Bc9PnTG%o$l>Yvc_VF|J@`xp_3k_qzC2^Gbi#fhJpaW(eg_u
zPH)HCuYQDi>FCUUE-=fNb?;=DU;DAFXWaA2lYG{wc0*6dDDDU5B*cYO61_{<A8O>|
z2yJOeKC<oo6#{y`vA2JUhkhd=5PVCyodP1q3d9bC_OU+?WyD)Mc_}!CERslI(VK1D
zbFgk$i*+BTv<Lb&!AvnnPzK0PxhW9K<}At%#V_+_BxPsa*nex{)uk2{Q^cde@<Jdz
zonEiLkwEH?f{!wxEK-0(i|e!2Ekiv_Fno!;NU6&LE2w0e6Na6KB48nkB(^Mth47gL
zW)6Q9tiO!UR_xo{{u+m;P*xeyvX4{Yf31f&`7<aAy~kGy9h>aYBdiTkB<BJy>Y#_9
zhuZy=6|#*@<yhsb*H^zUF6U#J(<aabR2#5!4MHt<(#83PTfUjp-6Ny6<tQ+q7g{dQ
zIB!$VbNx0Hh&7khQaG~{0`D8D;tRNYYxdR~y7&+dwS3!W+68y(ly2NxpbvWSwaYMS
z9aZPVSSsPd=9@#mrdQ3y|Hjo$p+IS{>ri!zWj6L$jF?xS@;l5|`}5Txv+u_@(r9;F
zWR8}5be8)SCr0@eo23~j)6h4Qc(l-MUDrK1Oewa0hIbmdDPyT@pX}a7(1|gA6{S!q
zc^kc<!1v~AF6S59pqxCB&4+W`c6nW074l_Q$@jKxRVkFge}C_i61tX&OUFmj?IutJ
zwOISD@9SggTuEK<=DvF!9)XRoDpp1>qDnaqE^$Gxlor^%6zJVm!m)9XJ@@l!niaI5
zT`!xAw;K;12C!>)xXT)UuUO^jzlW@S0BK&fHuB9F17A_Jpm#dOMWgHK)*cq<IAO4`
zw4Iz3JNGf0OT1o?um3d+nlsK<<r(W|xeUnUiy%k7MeVgF{d;zH*6a(2`35FLbzUgH
zwdDDQkRTu=<Q}_=D^5~s_);3J9S^X63lKzbzWVz$Ab0L4g=D#=SR3SvW!!<5?+qm6
z=Gy2>K&i(6i#-0bD^~C*zvBJV#B^e((J0B3^SA!6qg5{kby}TmvG*>u1xs7#4%LY<
zX2wWH6gz|I?KH~VqgK0k$sQ6CR%btyL4pTQzSSLeOi54-<V~(}GSK1G9^?mM(C@NG
zYxUcGRrX!Ra~~qbaBL;vHhH?=$c?1aL$!JXXe9sSx%9_H&d%Zm2jnQRfE;bt{_2-`
z_>?Eu9H<QC8^;@sbO;>Zuf!`@DPC<~P(-6MV-eJaP*gWOIXt*9r5(h0Bkcc)$_!AD
zS|@sFj~GD9cLWV1meqz)Vln9!&EFRblka`9(k%__?B}qH!UBxd_C0$G-U%~52U@1B
z>104CHs6C1QmuG|miAZg`C7PAsdD3~#usyDXByKyq3oS3)Ovbcb$Md19#twi=^2u~
zA&{->a*3v?T%y%*Arc=f=nfV8(Ij8-8|2eVW#?KqnC~V`5OnDhb$_wqC`d1Bg!0kZ
zmAAiS`YSI2UGS;$=3)+%{GZe1-RY3(!%ziEnD5t;=Ht!B_%WYi>4+Y<APy5oRS4-(
z;#VARL%Bs28*{xmP;jd91^mPL#WgABQSsNnU=T&ei?(+_k`Ew6M>q|0zc=!NEq?u4
ztx*ACwRdnpc9lrpfGs}Tq;k-Q(W+X9$QwQ46&yy!or$zchieM17o_s<HLubocCgS*
z<B`@$TgIlEr(V*ye6dN=Pxuz3@wKnqg<37&<Hy&^d<G%<#LtEvjMAogr#VLaB0MU%
zE=(!CQF6+flZZ%s&9PZN6><gFJY-%xN_z$aD#n^YWp;%Gt=Pll=&~QtsdqttUbP!+
zr+w*H-7;*eCie}k%a_}kH?6^~gug+Zgm7k4lni-p>aOL1<4ub00hF##%+qPQ?{w+>
zygR4>-GQ|p1zm;F&5!5%O6Y1LFJDeTOz@Zk0@W&;D#db0vLM+9Szi(zWI`WnzD@g?
zYl<LMhgL-^U7~;nY6E8Q@<g1-_xj~g>v7#tBIJc9NvP={*LM$9UKL!ZBvl9k$6_J+
zxO<L8my6g-Gf$0-@McP36p1c}$1Cc)FKa4f-V>0m)y$=N%_EZdux#YlwXHvepv#^G
zKyD6)hc~CPkJfTw?$dH3>}o?K;La$UN|ap@rSn>pll&m+IoEm(fF6kTFYOVC^g7@!
zi;+ar$E4Eh0YjhW-R=3@oLA^$Y3?0$nMc_SQDUBHH}tA`j@zf)-*E9oX?NY1jk^6G
zM_1VvW!r?8?rx;J5lQJ3q@^3AL%MTmknV2jM(OVE?vU=5T6W)k-aoJ(_CBteIcLtO
zTLGLo`t#Q&KUB!VEmrw}+LvROJ-5A)vN!f%3O@iSaBslloPK*~%!q*;i!=ULuEiao
z<LO+pSeTo=2uiJY50Hkb*WJO_X!70X*W`TA{XJw4_y$Y|BJ*lH^B_CGGq(}cvby}%
zm1j*t;}ts7_{nm==>Lgjc@=go@<2z0-n_}cu0>xwI%zXaif|7fB_%baHk)b{j~c2X
zJ>uaqk)vPF6j1S;wne$`u>H4j9Aq(nUC_&oP{iCs0uJy!ms!_8Uka=;r{8hotCfn_
zhiQIh|J{SX@(MDnlT~njq~D2E*57wvi5Ex=zCApDQFG#ro$Z+nHj4V258wSO8nTvX
z^Wzs{6&z^Bt%YgcbQtFAxTiy9*8=yQ-7cN8+`u%yQWwnZzKP=#sZyw9lq9t?Ik(@4
zs&jROp`39Fz%vA4g9dN>d~HLRKlP6;>k+)kfp#0lcFj5|r=sNBdRwHwduj*+Zi%XZ
ze8IUAM$`GD5tk4CHsAE|zt)YqFBQ#Q+qJ6t3u4+F1!s(X5Tp(YRlgApysd8TSxV5H
zs6SXN7i)F7a6qDfV<&t1J-^2(q8seUF}qDM>MkSqDX&+JKr#Anosnr~Gg}m+xX8Jq
zgzldS;FxYq^i{2mky4!R(=cS2sqIu~-r#y+ArSI#asbJP9Q3@Z#sjU<0jbW$ez!7O
z*H%km%DI9vgz&0eB@MPqC0}w`-mg{w6dZ1Fz0^S{nZHVGXNM{!08o1h)-siR%MJt+
zzjVTv!-;gf;h2uDA5Zy}M1gI+tqDU^8aQ&%p?8!KhUuUW>oS^7+!~WJF(=aIX?h4s
zLu@SazB0B|v`Mw=NRD_@*!Y+|wiFCZE&m!S`q3(yXuk!gap88a|7mMp>bLZWD9FwZ
z9o38Bsi}@a#<k7ga#n2dRF0duD&=+=XS$qT#3YX+pG4+^zsrBmEI5Lk^7@Z8y}q~!
zQ;}!SZJo?f6<F1|_Lgx?E&rs07v+7<KQVHCr&OXGxJcmg!oW_<Em7&Xg`KI!S&KI*
zpn0azOlux$+A^4)_2~+b(c)l#*Uw9wnn(z9VZP2WQqhHLLiJW6mc<yEJPzU_N6w-W
zpFpo0w=oGy6whqXyv&ko5v2=R$U|F4eQ6G=Iq0*<Lu=)Tq@gVSvWjUo@EA0;w0{Ko
z0jG<fR}I0Ur&|%H%rl;Q3aBscdF1a{UEKrsO~11?ZpZFG`ZQnvd2)PHC5xWV_W<*H
zSw=lkzvvLB^4c7~^fehfnluPdD}GT}J55iTx}Yzx3%JZPcEbV@*_;v-vyQt|JyW8B
zu?*tHg%LqdX`(bQ#w&s2jXv-N#6i;h5)(P2<wPZB^Q4?gv4Bb;dwY7lU)fa^A0K}&
zD`}lMG0if+P>WMr+kCf02e7ObNC1}lHH86WQj6>Em+QQb0Bl~^3>ght^L9Ip{tB%|
z^uJ2j*uwRZm*s#hCsrD!xyP<a)cr){e(u)IMwUalg3jAe2bJg6G-IgqLhVJmX%~43
zMP}8AEH&zDp>S<i?`<`Q8vG*_Oo(7t-Qq(!tpvwllaUm?kq2Ef2Glqsl{9!!uhve&
z@X$v%5OV}aQP=I~KniAM$0rYb@fkYC7A1e^{_T%ynbj3<QG5GhU0TOy<ov6=`0&83
zTa_?LUigoWzO!Y&VjzW+G&#B*papm%b_Z*Zmu%rxzR*=l_X)pLy?7XWZu(JA_5H6U
zTTNIed{O(0cOOdAJXI|y>AE+JH^>RJv_apiyA1M3KNOj4ch-qae*-OKf-fxoN|j#S
zjtv1vLOvfAruXicTGhYJ0*}7FJEIpf@6X)<bEKl397YrID{&$MZmizi?uO{!Bl1s+
z%p!c@^4_#RuN-8$Z9}lcN+}@%xiFl?O>9~zcnUrpytE_Xd*)sfdD3Thj>qpr0Wpyx
zd8kA=Gr|lV5WKJ|5P)nTu=e>nhnfjQFhT(XRtXT+1C6v(`~<0d9qE%?i|cfN6@e`n
zsTk>T>v~P^0a-Nco{>{fw0k%stKw_LtLi$y+&p=q1zz37kM~$+m~>x0#|2-!MCoXR
zneQAHvbY^cN`1<D3%yp<T(x9OP|63qC^0!k4=jSG{}h@S+QNNa@hom#CK}bJojp8&
zHQXz`lCKZV`u9f@L4?5etn3IaG78T$hNCxWeQ6ta*D8RyePY@9;NLf&B1Ee9KOHU`
z04;JsGNefD4o$uK=KEu~Jc}7v1CgF77{~&F{|4v$^v`tJUuz1dOycYBl(8^xZ!I>C
z*QA%*UCI;hLSGB-S>ma{Qe}k(wH=yjBgOMVbkn>lRR$f@MJ0!7VcX0YhfLB^cJw-k
zG~~^DycE0M9}aiEAm@MQHA-9J#OUIu*0$a0cgW$a`IL=20+7Ivn<()yF;oCn-TJoo
z1${=iUubgwJILDj=iN^e{GDoI*rJB=nRmd;<eI-9?=J-QQsuaWc?9mGg?wu56WE;B
z;fGuI;+}~($?yRwg2*)_waz~y1;`aozd%HCOG)tLH_7^;sQEsz?rvmgDL?sG1CuOE
zGh4pJ1wAkr@+lrdBH-tGHgG?~HL<rZ<_#VRgEYw}<R@Owl4+nSeK>&3eFPdE@3`eS
zCr?<lRpV$~TfdWeLS*=rrd;))du(ez-kPIa&+XH}<Aj22XXtjtey2#ipB)f#_4ka1
zg95<Z)!yC-WsFq^ak%ZB;{bwgvX59&V_k7e8fox%NqmUlVb)YL9&>S=Ge7CyqH?+I
zzT$)eup8kQ+MP!7DwDo=m?}6KEm$~w;rhgKWszV?;R|2fq8USPz^Vr)`Zb{oZO_wF
z3nguS(oUW{W91An+RB;>UHD3GsjGEW%h$oaT1cc#@)1^tBb}K7V30mvIM4utCtUf#
zj{&uLp$?qIGD;FqJYukq(D*xfuLmt`kQw#eHdh3W=VaTZNgmaxTi&4xIcY-{6Ec>^
z+X(!<4}XY~LgYEn*1Q-{WQ3wAeai8~zp`-k!2OV1P&c*drO!O;QdLEPSMJxJ-;`8Z
z(r}Bk9ogSj4=<=<F)}_Td^h$GZ?sWn_3D}2!If4KP6wT*2%_}l4&pDdwf5GJV`zFR
zQDI@QQUwFtq@Ts=MDWse09jT|>qXGyX#ZCvAkdd5yb*7yPzq7_$=IQ-NFo1Xeg*Q#
zc{)5*B3nOGfZDF{%1aEa-%wq34DSmB5<YNBzp9LW>~#=BFx9sCRx+Dbi@{>ggB4pn
zgAP_Nd0)O}!0{)2*%3Db=yX5<*;F=v$&V<8EhrAyQM}#`c)NU16al!YYsPDqon^ia
zuTo1qLE|NuU_j<q22rGN&~2{0Sjp7WyaMM7w|nw$T0k|wDe;%G!G_JYlPLzjb+etX
zHpuBXiR;*~2gkPN_!4tCbfp=_a)CD*LCSQvJalQWLwq9l>6>&C`(=21wU?ek45hAp
zxjEKc7u8GO%@ew4zhl~On6P6kCY$Bkn<dyujcLH*m`mOGR*y`7fuhF}_qoCWXIKI-
zD&eeo#HNiZm?zwLs`x1a;=|zP`$m_$`dLPZ(TFq^jAw(yO#+wJLF=$Z=6sHN8J}NO
z4B&`|2m8AJamPWPI(RIGl%RUFhJ<XRC`BuHQ;A4cl<%sX73#%%8JEG9EG)qZ20DPz
zWhK1EpChHDg1o;Sa2b`(1yJ6*M-eo%0L^kBRgE2<W*XL>+&v#2z63j$0ev@5fy$n*
z#!N!ds^m8#$eWGqov7#dsVI)R$(M3BkiQ;ES3y$db1ROqAH1vrfM3Z4Cxgb>SUSp~
zbr}5E332p$sY|*uMkYHId(1G?y5r7`U3sGC#NT=6lrWBF6qipHc46b|#hx&~k*67i
zqvlBprm`z0JuvaPc%Fd^a|2&hq(J^mtDEa;?W+c)uygE}GEuLFS4g73X3PiW@p8|S
zi?TxpuRU=|S|QkiS{71(lR>LmEsUZKPj47vd#leQi(yZ|Ru|GkuCCk?oJrTVn4^Rs
zL%;L0gC0A=poe*}lFmFf_K5RO=~_aq^ZJN{6}A#N;?|4bD(1w%-h^~2N|{fnzgcm&
z#9Q}%b`V8U#TPEqM59XAAAC|mcBp|3v-uedPZJfocrJHYfu;XTYJ=Rur1`#XxbXem
z|7>=z6~HY=$5Afbeh@q)NE^`>{faqS)(gZ~ZSK046l8ER1#N5x1n^)VYysWxmw&-l
zqF-GZ{(3*GIS`@x9TgjZ{v3ao`J-qXR?^4@m3HfIc&Jijed;$GCt@e!Z?$5Qz-r|<
zT)H7!l2VObl3Agj@6vtWKE^NDT1W6uSBmrSz-UeA)cv~yZFha~?O0iOdSv<FOUPVM
z-1d}Dx-xn{=5(;Iaf1|t9H)>rOKgg0`9mz|hEvIil@lZk3o^E4CKQfU0AXnMi@rvl
zbp3&Xt;mAk^1fI9fol~1Hz7(Z3qQl<=4<5*d(u<wZ?9`J4^=H%g0Kl5uS9*1JHNo{
zkNBgVG$RdEOrLkv#e+y*uA!P;RJ~`q+a5x~;h}Rj<7-(kW$gL!cUpyE_dVJhC`Fes
z7g?gKbS76Qo$s4w`fEQ?cS2|NW8Doh;(NsaoTM16lU#uVsQpAC5Op~c;zPc>;n<^O
z3-5J5#cx3u1A0BLi*EO<vP%KmB#0yMLX9ml+B+oiigts4FBDM8d^?vX%}LqSOw6rh
z#bDYZbNv>nC(mtvM<`bvL5JKoJ6|fXNKD98Kp<MTW^0lrUq2{BHG8F_Qe%47t+_Lk
zQ<FM7_uMgNxRHmq(No&Uqilr_+ssoBy*Mq0eNs1MlxOl)?Zq5H#8s}AO<}hae0xcr
z!<30e;6gUkOeu=!1#-YzbXz**h9H&pylWg)8gRGfkB@09o=(9I(8#6-q&!nv=O=7%
z_j`WIVor#c>=^9P_|EBs5C^+6Y^LjK{s}oUA=51Xca;t8x1yZZ#E~TrZL|0$Jf@a-
zn;OrqWmdYU=-{4wuOIB`NiGe$_C0*_jur_ddE5)#x||_u-pU-<ZdJ{RKZMu2$Ndat
z!yy|J4~cMaN0#L{;!v4w^-0r;)<Y)5M1ow9sXKLR%jH;Tt_SoQ;kK9V2}iE*Y`XJv
zEY7p-PGt>EJk}Xq;{C_SjwkrT3b4k^sm1Fnzm7mIoB_`b@6J^vB@F0zpAHhAup;$n
zshRojU@Z9u$j}UcF0aAl4TwBbE>q6MMaIqeTm7G#`7&Q2bZfT-fPUSo*3EAfb1drE
z;?P?Mf*1?PChYugx7V%d?5<n-lU7{0*%cRUWwj%G5O(3NpUk3GFRc?oiPM+Sq%zk8
zuQ){OqFk(TU9at}QHn^OU|O7@4o%@I?q6;#@T!42QpSu+Y6{i&)(K;Ucuy*<lklum
zS`0PQD7*GT8`roC(!s2}C}?Egtw)l_rYoj{aFv?MtL9FOaMXp|ys~;dBO~NH)lwke
ztJ>{<=i;Tx8z0=<h+Tg?+<?D((^R8aVik8Jn%!Z2|9}D8So}9GszX(OJ56c;h%fo!
zZ*qWh%rd7$`Il3zKy95%UaV!tJ>NfnFePc751HH869aIU1mM+X#59taooXnNym3f~
z_x5!~RlPRarB7E_saI^emuG8!n~6*>EYldTr4#%V*!&}0cKGKx$J#VQ?tCFuIW4?v
zMkfHf((3uE&y4%<dVm-TINmCF!O^`<OM~25y9;3CZBejdl#7f)o=06@0JMdlor>0P
zL3(}d5mn)LU{Gh7t?pC~5Au&?$%q)+U3oj~4fu9zj4w<9((6@pb@4{bY<ZT%<hi!#
z1NKGWZ#*@QK{hkE*_sI`V3=MLufXSzr(<v?o2rj~M_@<^r+)<!tT5dPVxUyu@7Wd0
zaZ*o6a#CF%>laonACJ9$gdUXLC2eE!!_6kdZaJ-w?#XIM!Qa*<ifXHC%R(iXVEe>b
zZY5ZaHhGymJ_m`j1}5MlBgxXpU<3^uLrsu>A|to{L`uzq@0Z8@S)qv=u<+0^tD!nL
zY~sjPgVHU)w;01H5kv+I*ES%DfaKwPv1h9{9KB&!HYTGT<whdK5|5A3fj(I1N$aES
z_r?e-HFHxPY|PXF#2HLHNSgs`iDw7nJl#aGK_v_oISMhsWGcAk&$;V%&3IzjpYJAn
zIDaXTAtl~3pOjc1y^CZxF1`_LdkrTu(^XqE^|SNBep(K(Q764fZ-IIK>4}vRkb$Ry
z>lJ@nd)M8W%yTtiUO=r^!b-{LjF*Jlz&pL;kwxTs%vf#~xAh!t=}+8~#e$KLXSSZu
zq8kC;JTCI)oR$1BlY9UHPo}iG-!*=;Y&Ec-yY{!s2U`I>vz5EO0JP^$)e<5E>?b?M
z?@OZZdz3)wjt*8D)zJ47i*K_LciGWsK+zpz;hIgj0^$8IUu1I?xqHn0-(Y)tR{{;V
znxZ>VsS#ki7)T8M1<12lD)sJx78vZ{cNU&D-kb$`TruXfSrNrG^Ld%H65ZD_)C4t&
z3OQ`;?G?P?QNl9a#b{MXh`E2a?M02%vsuxmD$kA%W`PjHCsZ<N0dE`g;KQt`=Ip`*
z9HMZ4!kQ0W`e;%x*r2FV70W#rqwX599^Kc?KU7f*eraWVDKA`<&eLE*Zh*~XNHav8
zV)*ilE+uGpvdG*`5G9}b`?FCb?{wnf;V{GcKP>SDJt}gHYBPQrx09Pj$)N$nIV=2l
zaYFs{f3h)<%2iH6pD;}n^p<-URLnA~5#Eofc~(jtvlAVSJwUs`sq;qPr?bd1{(0Sd
zw^V~DMk?qSw&~{>0kb~p+Ru_0UpV(5Ugm^;47lf+>HI+(oR-GC_+edN^nF!gpv*kn
zp%S+cF>Og6?&sD&uBED%w0&Z`-r=QoX`Gelp731fk=rom03!;7VdpsI9L2deqUf{S
zpz4xll(W>#>(KY$d|_@5*z0Ohb-#~KF$t8Rj(K}sT0?jT5Vdui#4h(1UgMfHf`*NK
zhdNRiLE-FzPd~w@?V#-^&(rK4ATu*WjVNFCkRJ)sc!EpbXbA2+uNzT7?yRo7!?FTL
zQ;U~-;i$%b``$`%^T0d^6qS5-ybe7HI)o6qnVk>EeqcUqWtN8uCFEz^|7bX>EYJc_
z_rojWrEeRLB%?C~h4J060;OP^6f5kaNCD7c8@VX@ZSW_asVrczOC~=)Iv1L(V{wc}
zKJ|bcs5hE6v30*8G_=$qLG`D%(CLFm1zKxI>HDyB4vHm<1*U{mXuNKqVDS`dli{-E
zk=&awJFX0VfYtiZ)@<=+%34~4g>LF>_bzFe`yI>+UkZUiCGiH)$eKrgDWSHg>c^IX
zKp%ARm7iB?9w#?EA(zNgEPn&ZPWZBJL-UBiWhNT!e}r>xnonzB*VL(y(iCb>_BkH2
z(u1ata;yAg-Pil%ZxJs1&fSQHZJ3&?2<ENBYtx$1n!92=PplLi3i3V=2{X<~0DPdV
zYb6nT5-OgvfzIrn>5ezBq5RsM8C&ko3b{nbTQ*_nE%kT|E=7Dnn*+e{lL#F#L)I<I
zwFpY<a=$=iI@UI24UAV@OY=1!@~lc4<hX4lXt$`hg30wTYSz~({h>u{WoX<A<sq_Y
zoNjXCq2}74ESi$N>c^^egU7NMD_Dwp#rs%nPG;j!C49vPoNw*3oqu7>ARD|N3c<@#
zRw0EPFHoax(Lp%1TSTgR5F*JebSYv1pT>W?9F7yHVF9>dsZN~FiquO;*$~AeA|kF}
zoFWUXG<F#R10{WGWyGeYWF%%32;{DX<-%nICHq*_$HgNJbOxe`!Au)4TNV*0ks0tX
zE?Gop)@SC`3Zn=v;hi;)^FRC0HTm{Lnbuq-+h(Od+K0`k`fU)|=@-P@+uM#%W(GIf
zD4p0(aE1zg#PWQztxEC5HAF)#H}Smq?|TI|%3A~o5|BQ2cI?Nip5MI}--?A*>Xp~O
zlG$HFU&cYc(5-E|hm-Gl*zFdIyp@)0nDIvKHlRJE%iwH^f`8$v*+|CA^GUuv%K>=3
z{CwrG{=QrgYEvc@5rXNqDq=iE1v|qxkAq2>y+G9!;q&P|wSo)(oYQ+g7UPVh@ZIo=
z!7=iH+WE<6eN09BW~(bL@A7)*>&hp2=hKy3NwoPgp*O)aM&-_=q6bI&u8cZYHqRg-
zb_D8;PIma?-!I#!4|v@OAv`!qHJBib!w#;pS)lC%k=0Ccd;hvnFsSEZD{Tvs)i-Sg
zIbpPWor}-j;1(p)S0$lCVD9cJ20lJ2Jj}^f$*I`;zuw}EKj)fO;O|?tzt&OYn?Ba+
z>2$`CSy?dk=`3U+lKzl|y=(ftcniVV?`heZpLidGl-lwIy_xgCdT}p4LRJDUSDvAG
zw|JpUr7Yr?=-`=BjY)f|Lt|QOh=--1pZrPbdIbE&5Q_uzsb7>gAST*#b!*E4I#v0y
z^CAZ3?Jt|ZmO&-vC%l-w<*Ps0PabWrhvCQc3!``a?>q7!DRLn6*0ggADMP&73}f-#
zJo_l=qA2h6V`aq#%uMFSQ%2goeGKmlp=5O<p^cP)+r}*;BeYNIoLX4+85fOHDfF9z
z`dun&rC(@x;mel{G30!>0o>})6C|Fw=Pamfyy)tg2^GCL*(|EmY5P=#r)a$js@PCJ
z8yVGW`7!`V$qZdAJ~(@E$581i@<Z#ZM6RDuEhl3Br=O30#f#(7pX9{m9x;{w67P)e
zs%PWXXC9%zrfQP<L)|yIpz9yZm}_H^a0IK8=E;kXW#HdmgJt1lo2;@aoE^RcMgIy9
zbzNsr*_Hgu7+RMNw)ko#|L{DCe<C~j-tKF;FR*8ou(GCmA4igRuh>o75AnBZ7!_%!
zxc4KX?QK0;MB8oB_Db=51<yMpYJxQpUtW;-Pv3TWWb`-cEZ6mx3u>}qcqYkYi*j}-
z3A+c3J;6jWd;BxyVZNCeIMk`|c4Sw2pS7N6Zr*ItA+l!w+_yFtG1%T00)gwwkJeot
zm!$t`7F}NMEJ)moQyxRGMC<hyu3az+KEMd)Nk>EkJ3zM?s_H2$V9V5bgi25CI4#y?
z9RMl3Y8?z&GNuo_xCnqpVJxk?=lO56X_^Lx57$-3AXOlDC>>ugOXUqB4WH~YA5&SM
ze&n?Tew&SHa(+2A@y+JMQK{1NhsY&1iJampEVGV}pB^<#u>2)x){7G}Z`0dg1g*E}
ztUeJP{d?>ZM=3X?U2{Y{xzPyb^b$A6&bx{6do{ra<tc--e<86>q}I406fTt|V89L$
zC*Yr75a#QD%PWD^{(vyky#3FcjH7hRnW#HTCwqhjImWfI^F8m#Q7$+@?dbFMvju_)
zjMa#lXr1F-=VuB{!L)fP2AHD(B`$CjpIdr1E%CycNv3>qx}P@gX&c%dx?ei;@^_2B
z&vOs&2fEe(5hL(UW8aa^O5N4>ldenP&B)us6Zi_VYc&LxLSJ=lUj?#d;d9+p#q&cX
zJ>_S4fwtc)Z&Ij<>%KOr_~$@dHSlG1&pi6pJ0r);A8PTyo3lSL`%;B-BX|(@?O|wQ
zZ<`DM!OT0=qjg7Nd#r<BTsXjcFN|n@xNNBT_mqkd12kUQh9nkH<h-cbP2g#D;zSH3
z`e0$9TrR#-)~Fpdn-(~*lWzW<FH!gjA0yNvtf#Udx%_7f^^g$2k)A${s;g`QyL=k|
zh<`5`PEu_X7OhqRz}5EK&ltJqE2|OU`^<VS{vxSy-rOmF!yw6yGu39=ln`opB*0^o
zKl{)Siu!~9E1&()j|8h2Rl&qux`Ng1Owlb)%?0r36i&Nc=?dJLGvIxza9{zBMBhM_
zj^%_`S|SGCS}G)Y1R27P+Kg12Z3kSpX=*+-=7Y5bE;;R9kDHv;`X#0&o2JpW=xdvY
z&>4h!{Tc{qiXE>tN;PratvN%@^E|bE)fMD$#Vx(f6*$tY|MIg*z9X4fm3b{ICc-<H
z2~?zN^r{-mik}uD68Si4-V49Mg3H?2M2wGCSk4^k@O@4;5~6b7$~6^Zcgp6uP15CV
zSHSJV-3u|#_VD1NQi>=etTpWT+H)U^_7H8nN$v^GkLM(B`2@op@x^6VZv6OdA=xFi
zlI6jcr^uAd$q^uBWV=7eBijdR1H0zj<i}%!T_^9E4<U!(t@AboHjkJk7WNhZohs+)
z9CCvE%G3fhrw8>+&9eNo0}=tZ1-MC|!b2Zx01R(I9yWpjl@k-Kt=>&~DDdraX#S0%
zC5_N=WMX5Z$3s*u=|WkyTZo_mpEUGn+@w8}6y@|A{$SO|@GZ$&u?R28xlX-v{LOhT
zi9S}xlH6BbMCI?Os6jmB68CJ-TuXh9MkUzW(12<Ck{4p0_+8H~Bl;f2wl(6(ED#H~
z-MRFv-O(vU!8q_|(b=5mhyL}#fjI>2MsATRjW6>YiS2_`5MflG(oqUK5SC<S3df%R
znv^KYx09Y>*rmGUjDl~77&76`q&p&>e^YHD)JQJ98tsRX7JTcgZrD;I5L!5c!6-vq
z&?djQ-CU-{l`EQ0#H4U`Hrgp?3Tg2FZb0mTy5C;;Be9TNm{}0@^+r5PT2VF>vTyPl
z2;=az38h4F?Qd~Mn|598Ntf7K8RP(1FGU`QWzg;x(tQU#7adK}+dKATm}U-c=#H<$
zwP(h?;Sc30j&aVt6GhvP+d1v^0bEHu9CFIXlX*(cM+hLNOuJ$WWMEkU1Kyoxw?J*;
z2M>5E{$!cMl_aN{7J#*h9kiKEskI_dX@U<qfx7#ZCBK%BL~^4~odFbxuhcaW@Ivtw
zXbTQ7v)8Yfbugft0jz~TMW8ZTfDguQBz&A9A^H7u?i^kC`0WL5-RM;0qH@m1WPIT?
zdF&7JnPH0z{_2#jQX*=ZiZ!8Qt<l3yBGEW$wtCf}i?6UdHZ%t<@Lx;KXq7f{6HNW*
zJ6pIKZ^@X{9D;>$GttaGQB|d(rzU^C>7F~{`5=x`dI@56USfrXE)rM@s=a?P+l@);
zrRgqnJlvv_R|@!{-|n0ml+1MR?=%}0quMHd)M_J3m)Oc+S(Pr2zu60Ah-k*;?d&^4
z{1r9&gYsBb{4a5zu;S6!hVUh8sl_le3VE%CPN76b`uKv}U&=e4;5)ceLOEJ|JX8IO
z3Z!#ZyIba6$Rq<K#rmqW9)mF3v>^XI=R{V_NDaye06s>aKYZwU`BMzC+f{!8lY|$Q
zFS|ak#R8dN-X|Y{tV6PMRvtsf-2@gBbug3s#jA;hK{UGF;Wx%vg0RBgAPCQ$*tdFB
z#wXxJ!S<{VT5tJ>QwE#6lz{V^7B%+#_X{g^npT8BOGm7;8}a{_!c%4WE<{zxLy!+(
zRRIIGJl}RnZG|uvreL|2@XG{omAm51$Y%F#)ft{Td{HZ&)=vZ;T2!BZ%h>cd__Z@A
z)qazS#0V4L=`Dl%D|+(@#+8KRmHN<nQH=3FVPuxUa;u=2C}?cB9EIw=U0sF+q}(>S
zj_^im>fff|atmWVL<Zpq0AYd!q&g@<3ob7%LZJ<N)I0)Hmb_9my$7RtqQR&o(i{cN
z!J&7|ECLc+Z}GCRUihQzSWZ7SuPCUAeL&2sG_O>+`2FL*#3!Cd<P|<1vtWNr?N{f8
z=3x`I8^;53`}yXP`9%rM)qjR;g=sB_L7Vb8tMB0lkCoij9_=0s=cT8z?EBVtz&`J%
z-ITU#k7a#=jz7TPq!#%BW>X88JO8oQ^o?ImnHD%kytkBHS?fj5K25J=C%u<6CB_2a
zvVZsF3IhnEbBk}vm>^Qh$;;KGaEKh%<BK#3BM`=T$OH2MM1KYNuMn|5PNWWMNT~c^
z*YnTr3T*th)JC}J;qw((Va<R5b2%Rq>%Urevzrk~0od<fzGroV8k?$E<8WZ`#3HKA
z5d-$GfIek%F7L0L4)7CtFAiw=AF3=H1+;7rZoe(CdK?NBGuqA~C$tmfy;rHTp$>AP
zw8CF!ZEhkb-SEvDR8#(>=D!^9X-x~6@z)V=9c&)(j29ODx&puqLu*tjV4*GkjAL0q
zwi#_wW)*$|M4jbz{X6(G#dt-%hoXMqTf~7A@`OLeL)AQ+OAh{qO@Dex@vFDr4bT(~
zzCEt_+qyKjU#GxDTATIghqK#Y99v3P3WxQ<?(_Y7Y$zdx>n?B|M00@H_d6wGvQTS}
zKyJgM_B`BNL@a%{u9EO0t#ZE*Cax&*+^P}BxryN$@e%$y%zN?}cM(+3gT}(7C5C0e
z$Pa{cIbJVJ>0=G=2ny@2ZW%6I`|YQ@9X$aP&7;{do?G0DHV@Q1{#8u-Y6?dbZ634I
zL=1VfuP+1#t|tZ-5+k|j>dD{IF+(g30{%f;Q6dO3_K4-Dt}c?xi(Fs?qZ15>@&PcF
z$MHC%g-=>d=X}jSF?=uh?Gk|uU5$>KXt7`z70Loc-_~YIQ!5?VQGMWhr)zZF6k=gi
zJ)9rU#9t1Q#kwdWWnbb5r^VqD1cJm#-7Rd8an)5@r);f*BTB}wvW;Nx?EUBqH>p<J
z5>ydJnPHAl<}mV?wQPT;reeKw(b&N^HAaw}Sr+x`!s4W?a@eA9bpKj4RtfRv^ZC4I
ziG}WiZ_=3cx+I({?jf+=3uCx?lBSW#C^Cafy<~WA1qaz6f21Pw`H(I6%Yx=TZ#L*o
z7`gsmPPWT>H^DmjhUMdAV^Ae_K4Tg!Bq*Lw7Q6f1N}uxvBm7QMKf@AXy}52rw}T;)
z;R_jC{^*K9T0?_llY}Jqr4z+k%*4VVL_ZJ9H3Ra&+R?E9dR*MPb!}f3A~S4tHp}n6
zTk0tmd)phrx7P5ycvPYGR)S&3NXnXdbDWb*1s^ci8+trN6ZO_3^zb^Jm}E$mCuxv=
zouLDxiO@fby~#_q5rOZp)X@Q3CuA0yZBTWN50s_dcG^!0XR?^unc@<o*L@SlSsVg6
zvamm`7?6_S2asVt7qP7XUi%kWVF<Q;&e<DSIXH~Fb7BC{5;&I3xWF4o?=AB+@%;81
z{zGvgH7|*F*1qJQNq!O(s~XQNKjlmwYF|kMwA;)#sp!IVIPW%lW4iXlKpf;*jJYIL
z`4sw~a(xYZPRbE%QQB`gw5?o_!Bk3ki)zk~IQ5&NOxCIdv1qfg0P;A&1I-zX+EYh7
z7nW{w+``r#hE6l^S|8A6`&Pu-_d+)H<xJ<r9wxba*AaGIL?0$)V=~NBy@+Zu`v{d0
zMXbU8Cda;J^0sgt>tFll>9WyAW7pv%52eYb2v!v|hg;&ogqPlm;Na|?O}`@^*+j^O
zQ^}vrLo!d)JH7+%ofbz<XwyHv1>_M50QWHc5A7)D>qOg?KXXL4U*uD%E0LH(hr(Qg
z7w-uuZ-DU<gr$G=T89Xa_H1PZzR#aao)4@a&X)iY9iYfkLf-!*zJ(pOBTB8Lx6f6`
zYCd+1S8?`$T*jZ*?dnQet^fZ))UbP9kEcsKeM;H`0&b&?$)eKT^Oh(8H-&{euucLA
zuz*T;d7`o}RdnJCT{CJSdYvyv*}wU-`n9VF;^j%JkOEn3p*V@iwIPZi#c+q1y_j<b
zx7&n^PTZ>!*=9ykkV=xxB)b^HyV##gNj_*Z2@k>1%9^YYAgbVG1+yX1Ap9m;QI&S;
z=`-_>3RvpMG&yigcXn=WK*N<IJvj8K!27}-dCFK3d@BBC!w+Z82B2^B82YcnprD_(
zB2$z2mn22fG!7|6!+d;e{VTR0;YReP*#Ya88_18G+MOI<<wX1ZbMqHPQLk?)rwxsw
zSsq`_BY`);a{MK1pO0yIf;WX`NCK}bPS!?>$f=lcx|08od8g!Vax8|*j?l}`Ye~H$
zgI@Ftb9)dRzFP>H0^HSi{t|_|tDBjC?$TsVj?<nu{*Dvxl)X2xR>>XkG-IQ|770Io
z+P)u$weRuJ?@4EJ3wlSUV{o!Bu)Ttg^~nL5_{^#(iWH<Gz;t4I$m|Akv_424h9tPq
zf)`>yVx;kI+4+xI_kY24jZ_wG0q^Yr3{?R?VG_^*jy><qCk<L6UoZ+<pn0w<cpnz`
zf(q34q+s3OC@}!19bc-+h-{-^3cj&DX7R-$6s!Q&*#XcS(OoAg&L3&lI;PfbA(esU
z9@&43-Bf!@C_%(UCiDqN2)rRF#WH7KRP$*0=;`xb_E8f1i`9P0v(AR@@Kl8Gp(g4O
zt>n8(#~)<|q8!U{Hi+En;YUeNg*~RNHeWinGjjV|3f=n&JSeiC+Um@AXg+EIE+e+^
zXA1|u1&cE%@>{CYJlLR2%Kp<ogm={ws(s#p4k7rC`h?g==G+*MbTZiY)lMD+=Q(eA
zXL}*%Hg6*fK0kbl3WP2hQJ2?-TK$!IjlDPyt!RnWlc$I6KQ%x7<>%P}8lZMUh4n?n
zy8NO-YU+w-^_=n9keQ}qBT#;8%)KB9oHV}Gip_}U=2O8nWG%|b-(3a**1z0JC69yk
zgWg^jp1#2K`ZS4wW5*$_zj~S;J2|EJ$d3FvX_5REtqz4DfG9CPj`@4`N`9mR;YTJI
zPf*K)J^8;w=Y0tEC}f{hQ0z?HUgEUF2T;znyWV7XTIi4z@Vpn1XTABZa=Iu`P*Nev
z%Wy4Gi;2<>vx{A@sW5gZDDIy^csW7|tOlZxFIMt${2(FveZp;cLkK7CX*1D8d_E`7
zXH6E^H2WB)QFPLu0)suy6gmBIe^agqC9&ky!LRvCFxqJ-lW19{htq~~#gtQ@?#I(%
zkHwg&`w)1yN<!J0Yo%Q3t4~{a(&tr^DCO*b3j5JFiYn+&N-z|^5g%b(ddYpH4fKI6
zzx_Hg?tpqSyH_44v(10ld;%hs%+aIIOu{K(LU+lzrA)n)bGP{-9$Ue5gF;l03+CIR
z+C@=nLxMP*&FygaJJf{#@<h<q3_eRxIdd!YOa8IVw=s_H=OB1}|8XVZHzBfxQ}(sT
z$6t4n83ATno27X+=X1M|hN-6sNXekdg0NKPYs?7nd1PV^`Qz*u5&ysL0W(A>ovE%H
zYW4Xrwl$~vhyMzEU7G8U@oFXX9v+`l>yGC&{bB%Idzq1-YKhi{YE9x>5yK`Gn1Gl^
zFocA9BfyIhb#@5RtjW@kw09gHlcv6^y6!Ba6djui`RIJhf}W{~tVlj3^9U6f5yW0<
zIJQ}MQs`d~(R^9ivBhUfFa?~Xe|zDN`TDZ>XwjIJN(|7<Q0R@EjRdiq>bGm{;wFgJ
z&`VAw13085eHk7jqq}&h18H<r6UVG<lQA^5*3sYfi9#gV(NK&P{8+fqFIgv9WAsWM
zW=p+yOD}H7h<_!2`xq#FcyJ?O2D3WdgJ?Y017(+@E1sa_vGNM4(!``Q!nWv>L9<G#
zIO`*3If$qg1_o~7wil6#M|4DE4AtZBV}1{ac}~Ja=he0N=UI6wJN)ZE8Xi0HHKqHT
z6*G3+oTpi8V|>vyQKAkWOm23}`jvTYkvq(0-HB_B=E(L`u@CXyP3BrJYiSLtf^9W$
zm~(_o8)2b(NsFg~>-;V|Ss_*5UR1_k{=y&TzT-|ODrJ_r#O`RVKh7M#5#4OnKIohO
z>NROWIQRfZ9D~vgD+q@Es!bY1`}`fN;!>4O+!01@=CR%FB4azn<7hJTq3cJjhk*2d
zH%vqfjlz6-OmRWj0w?HxM}HH=$or6BtHeU&j|l8}6Y=+SkFNGP+!}eFa%yRu7cRl(
z{SF)&Abrqi8$dVemsY*Vs7q9E@zUOdq@<b(w^X&BVEpB$Aetj4|J*;;(-JsQL$4f%
zV8?Qv*1yv+D%PwXKa$^w6fTl<6S~B*hadO8TUm|70(}0-$HTjwIN=xMRn0gQ<Kv(B
zxEr>stiv-s0Dd<@adU_&>f@wjY1t?5MbuIJR~{S|tYGnuLbls<jaZ7+fQ<Z1KJ(9A
z^Uq9-+)6P5dn7^qKV5sL%~s!-7x-v75x{>zE|#6&bieaZagip~&m`yI$GfI5Y8hul
zm*qedPFd5boMq-3e(`%=p}QUFzb)>K1?N3M&Q?T=&-wq2c<aWE_l%?LQ$Skpdvt!_
zx!TdOTY<Nh#IEnk$RRRu7y0OLLrewRuu>@4851mMgU${65Gcy;zQz9w9BbL`aW&%S
zZBTvm;%HZy4>R(;Y<lqLAuf%JeN|)mzL+>ad9nsP-}p|Re)*qv4wW8IV5FP)7Z8)i
z+N=-2U+gN$u_&z4uB&uckpCEs4^U0zVUhdY)9u<_;-=^ttk-v<+ZV@#NJh=*7L@Uf
zmNlN^Mmf4X8})N2`FNe|t7%Qg7_x)hA89#jJun@)_MwESNb(<JrsD~NOtoS6fsf9~
zvX<+^vlNp&{9RZm@?KN*#nRd#aM0@pG^x%Km8u#i+^MLtfSX?khxfc;09XrqQ3lhP
z`o*E|o0GJ}N}hjZQ~_J@VaXKkHVTj8(WwAx-G{st*;YYP<-aTF%|s5o_Jemf3nhn$
zMHHB?Qid&i<X~1RjgG$aUKUF|Srtx53-u%Yw2ZC+9q-An_{i?3kO1!9WLRkFsc{4<
zdV0F^EjJo$e?7M#_OPxpcuL0*XwUflp=a9gwYKBsbh792IlUJ+bhy(AxQoR$I1$v=
zD3{*eShxkQ^S#|RPC$0UD*5wEXaUg5;!cgA3FZ}OM^xNdU&md_U4+}c&xiQ2lo&Kj
zFYe72d;nmg+?+9PymP{^;j)}G5gx(ZKq)w7$NPVud_TIEd1x%E>4~&-t?zWj9nyXe
zl0WCRi4=d0*dirzZvP2D)mmcyTF=9?a++{u@)?({IL6$Y8+q!%hGB_;l-|{5y}_D!
z=^eXA-q@;_j=c0Miy185UV!Bd7S6*(DhbR*cp&t2Pbc>lYX_jgOJ);D&V)B7T#2U!
z&!BYkuyb_pml}Q|H}gFme}TdG`jsLm_66O0w*KQp3q#vAWd_z?QrLw_8Nl?+`BSs7
zf}_l+LBq2%)s9Q|I2nFOXJ{Cvo`0Vz_eN-0GQ^6F1FDTmD1?uA>&ve>wGulQK6UQD
zy7Jk46cZygu1#w*T-XL*@@VUC%#`$m%G915x5>+kSkMB9O2j(Aoby*bm#>`RHMTsn
zNg(bOPVV!|fZfb~$k<959^@&#MQkxvIaO*SiAU!VTJc#*K`q|VNaDe!fP|10@dy<t
zG>0-{wD=5j+T|_@0si2&8Gq%v#@V{l77n*A6cd97=AHl0#=Q%FsqUD0TYEA)9D=da
z7~0~`o-$$`c%{q%V@yH?=G4V|Ri(Ue(#R#GP<J)i)ubd7szI_(`9iHvpOi+#kIsU3
zm{L%&<%cm%@xB-Eah1oSqj@}js!7inK-};IKr_H3RsUAW!ZsS(!t2!eQY~SsVE*RF
z!BGwN$Xg1SE8iof&RUL}z{$^t0Xc3;uy-sNZ;s_ph^pru<~PO$*6m`IM?f9_Pg?|e
zG~#9vzaEq-I&7>EM%kko;oYMomZd_DlU){0eFJ}3vo=q7TaQ!WmD1l>IV=fRSkyMC
z2tm>jN5%|C?lWCVJt5{_)9c*w9t3uWW9i?A#UKc|Jx(3MIO{tdxy#AzAWp(D!w)I{
z{G{z}szA+29gw06P}vEX5ESonBX*Bi|MHl77ybo0!rIxad!ye#{?$wn)zdAxFH#kZ
zcR2(bwYd!aNuvsEG~n=F%;ixgwcB-DsA-&eNABJG5BO0OYrYInf<Lk!LD$9s@u>c<
z(2XRML8n)uvF{~s7hmAgYf0osQW$9w=*b)kHTLs2JKb-E@jrCmhO4}B(X-iqb#9qL
z4#h{g&wECY6SMj}NLp1plPE69k;lGAWvYS~78pp33i2TFjLSmlL{i@0N7CB#xX4aI
z_Iz9y28R^dv%C7BODk>Fw&N5Gd;=7JBg5B~NpgRsS4(6tUA`Rx+DyQg-xX_6C)f9A
zx@4CysKx7I&s$a;?lT^B<omRKRZ)%q!Ne`vpIaeF2|fpq{sd!g5-T(4hFwK((_F7F
zYW*OJP*j!qI{IiA)4Fw0Y@gJ*!TYRn3rS+$0F4{o?5cGeU_SWnte25twGI3VhK3b|
z1ibT$5qlav$x|)yUS6NiscV9lE+K8dDSmW^^H{B2?xei_p6IFPhR|bEZDax&Y^MjU
z*07rP;vh4q;Om{PN>9*Fdxy%}g-yV22jq;tW(5#%(}!f*o*aCtS@9~{sr!#jq6hKj
zCUBxjD-P)6A(Q&DwYBxIQLWE>o1t7|oF$vs7u>X#8tmz4)X7(2A~ocS>^L)T^`eGg
znK@jY#oD8`|7>I~8Dq&UKe>-UzKB$(ZW)b!YRB6UuR%e!t2{Kt_?!G*SUh}?+rC~l
z+k{1Kq<Lnl?-wF3Z24a?J)LA6@j5h(iGT>q_z&pUI#K1l2v5NZMaB9spS|I%*IC%e
zwLw;^-~N_PA+4z2jWz;>lfePwn2!MaW}lNlpNprX-`~Xx121^%Gr_XkgbO|$8D;ko
z<Tm3wi~m4U3}#^pd%fN`>#Yuh*)+~M<^jri$dG(rNFZourAvebMzn1c0cf|G0G_72
zT-bT*5L(>3+1q*&ExhjbTmv3+Lp(<sj2VYS4WDFRI(tS8_xiR#{@Oc^t~#wa5lKOT
z=lS(29(mW_qZY^^Si_J^Vb>eETVu;@pgd@2qr)$RkH?T-!WcT%bVGRrdi%e0vBmvA
zv%H_aS&810{APvec_`sr%ROJ*+bdK6N5HpJxjUbELIo7@(>b4$g#j0&y)D~53fo@$
zv2NQkTSgjd+VqOQzE|p_Z^BZEL0)<UfOUcx>ahNa74`g0CrXJ{NfFkBFF3c}QrQfP
zwdMlIHr}Gz`cCD>fu4=9qN3Q(QY$6`ajV$7mFTKo)5#3>wtw*S<7kIwjZnfI8Ezsl
z+P5FF#ChS}R01(oVDI$!71^JVgwz&2KO+Jxs8gRadw~*%J>U9EV~f7Rq{VGCZ&uZ~
z0eM>OU9o>(OUxV%I_&d`mc@$iSfalNw~d#q2`0fsj7R@4zU%YW6CIlZA^SZY&p&Le
zkgFhpC?08rn?U)6X?I&R<3JGCg0B^n@Hqev?Y)mc*Y7+41H{*4PjG5&Wj1*FU$l82
zcz(y^+gp_{M75id2<Q&s-2MPP7xgWaWx(XrVk!cnEB3`iT%RFCz+3%OQt@bKt<F3V
za3(n5{c%O)9cq!dP)3o@0Ru=z0t`Z3VLy^nz_%myQcHYJ|NC4{SmoS+X*)Du1#sMI
zAENZTfr&?iMz?MnxglxIw3Q<T>NSrm0V=ASs;A?nA9n_VD?SrZZm&sEuhxu7bD3}t
zZIz%BdPN_xS2dEH^5cc5-5GQ#;`KYomL0t+lo(ob5k&dt){reU<{=9&B*aoBSIdPS
zCp$7}XI_MI3scN;v7K`uz+dD6$SL?U>MbEl$4Y@TzN{L2sc~Z9`^ItkbBX>R=hEw6
z=iQuZH=t}9F=6&ulgx^adD|=kQk>G4!TYEmDI-=JTg!cirxG?UZ{Oc1Yd+|h@u+Ko
zCU;z#@9^Kg-^bYBeva|W$@(`Le|x(J@$L-2Gnf}R1&Y0nMHvZyec%WC5UEfSI&7|%
zLC60Ax2qkk!rIHFPZODyvrr0$xMg|cIVt;>km(syjpv09+~;`tkG*{h8UsPBOMgFc
zrP_`e?fr^_jC_D-M$8o)`81NaWv6tG=6H|A$iG9eE}Mr&yZMU#!hG3wErYv+Ge0aD
zrrS>lPLN*pq7N?%q*@m6pcoC6f(zAkJJlI>d>MkHLKtS^0^qvj2sj6?oG-<q>`6()
z(A<4}>;EnquZrKuW%;Kd8*7HvYw9!mN*+>C-+lk3f$l0MB(Hlk<z%AIq7_y`T|D}p
zA&Lg6vB5(1yW;1xplgQrB4`K!sw!-qmX79Y2cvkz2!^{WGBftTQxV}PDTVou&(qI9
z&Y5BH=+BlPB?SG{ehe^BPv0rr3&rPpiJ(8qWviHqxc+_|el-_#%wCUpoR-*#;JeL9
z!g)!WoMw%ydcx`AjgnxT?3t{)ZfkC~&%8W$-h;sW{EahfFyYSl(uI$&T0Rgm@Pg)f
zlJEb*dD_kjp^bTPYnl{-l!CHp@*x6-8&>!AOo?E1r`|d2gb=X(i)NsQ5P(Q?+q0={
zA2<RubeOpJA!rKr0pN2UAHWoR)j;}WvCSt-@SkWyqZ&bdWea!}0Exhajy#bjP>Pw7
zOdagvf77UmG=P~O1}dpH9)#|N=HnZmp{mTG2b&d(m@gqXNa>Uma`+@TZAGpMa16m&
zhZvFUc?@1>yaW@9zSWj=F^iqae6Db}^Ofs_ir?iW>|_!Wge0ltCY9cft2^gG45%2#
zFV2UV*Q4lO4CJEub`!r430i*~_k4gd?vw#$-H^QP?Md$Wq1$0#=I3mdSw2@%r#0MR
zjCj>z&kOuy(8&N|q_C-_u#^Z7&T1EY-oHf2wG63J6uyI|hnhe??8Z+wIlI1fTz%bC
zSXDFh#f}R!kb&esu5Es9nT+QM>Vm>kP}03&04U-Iqb|XCP{8%mlaFdI612F~t$;2s
zYnVrLXXloo1wIO$clO^uw15Yi_>Q)lbStT@WzO6e%5YP*(ID`Tr=2dOAO9vKPo8&f
zf%r@dO-Hk#_z`DEQ`yWLP^*cYyx0y%o(#W&rq#SY>;-ZlDS*en1%~YZ8dk}+N=e8(
zOH(!gdvP3Ua-7P#)+xf_vP>1&;;=QJ?*mUIqFt3)oyL!z%@Ap^7#5@YaY*P7&K~V|
zlQ-&AFP*3gDxDpZ4;ML_HThY~OP<m$fVpNj9Z7n-)LByBy?KWq=e^n5SQd%;3wh0t
zfxCMI-&6OTHVh>Dw#KERnE(7D-I24NNR}8{MlP^xF#a>YHwU)@%rv1DGIJv=kPj5c
zGbuTO?%?8igqK6!**F%Ilh^LQ*wk$I8hOUid4wh?uQ*ru;}sk}o>hAIby>JJhLk-f
z#)_rU@d#Nq_>V?A8KB$^i8fg{CAWm7o!c9z;J<u|BjLYKM>ro}VLG6$?2M~gL0jr?
zpa9;b+yfKW<H!`_Dy~>KPoROGj&WZCySk=am#F`PyIYRnsBR%rz-&3qxyw<{Y}TdQ
z5is#?)jg(kle4)w<Zu4;yWJp=W91Fw#X4|?B(-VE;<9;R=@ybX1UWpr%GbBA+&IZU
z6C8pI)YA?SA*{(R5d-aEEg+sfr}L>Vy&FGPa%BHsTf7&vOP*?r{H#N}4uF~q{D;j3
z)qv@=0F=**aJ>$&^>_c$TV^(<6%+s!FV&IyLoJ_8_c2TnO^3t_kT<hpGzd-El`>=K
zexa5u$LR$q5($1@pbRIRxNx=mfSmU=suz4um*0{zwiAule<W6zCK9IgOZA70*1Gw!
zGtCF%{QSQhE7MBt*B}%aQW!P{amGe+3i^+c6bwAdOmNzBGQAigJ%UHiGr|=@$WE%h
zKVA%2kM-hdSW2VzwO)Etl=W$<tn^x9Ev9VzsPC$DN<+D#&8Da|tr0bGQl%6k%V<Bo
z3^8@CaD6XNxB8Nq-Op|zJn?P*jWoPzCk4CBhVZv*<!g6t#C|UX1K8R)<xyN`K>rHH
zg1(M77-;L&H-3|8KdTEb9>4!>u*CsQ0s@v@fX?e&N_VE8jspWe&*mGQkDzt@b#%H$
z*Wa;~BkujfMDcR{ELL$%G6+mO#1NUT)&}S$xZZp4wfcSDQFMk{cKBVu0C>yW+aWzr
zeS&x8>kC9G!~q~6?6k>p#WEJ`4mFK`c!#D2FhehQ2wtc2^xl_ED-X_kqm~%zs&piY
z^m{wo;N1JT$RGU~*m|JIzqQML12l=B`agLysw<Q%S-|pL{cI}cBtSEFsYO^OqNPyZ
z`_!Baw@@Csupmb-2MZqsXEW3-_jMcBhfEXyoR)ilVu%l?QE4+u|IJ>}L<u+2T<Ayz
z@hArHE+8Hui)1m9P0spmOEdV=*`TqSLIwmq2{O2_qPjBi$nFUkr|Z0pD81QUk+cb5
znLdvxZ3)OTZ0bV<Vr;n67%bFjsnqa6HG)BlOb>XItZCF5TOoGY3b~REgbjoP8BwZ-
zwvw*lbUE(=`>R-Zn<#lw#$l0J1cWP^<YvK%#a4lkV^4#4m}0p*G#?0OXu!k11H1t8
zLuU`-TZf-_kC!6J7FjCfedB*Don=&%@AvhGM!KazP*S?28>CA>I+gCu0i+wHyQEV(
z2T(w|JEey1j+y8F{{GLaS!-U+TG!n7b<RF}f6l`w{G*$fr*Regvoyb){xOR1-B|}*
zItqbvhn|dt(UfTcv0{pwj2$z?>$z4<2nM~p^K{yG_-ZZ*8eu3!yO^0M#QaHOt=_Zi
zIz;}#=vz_crIRj*CD~%Wp{nwCX#{XWGCfz!<jBsa=pbTBp2`K_orjei{Wl&IOWWZk
z{xIpL&{U)SD_EumV@yL`=8~C^E;zJG=N)#hT!{4pT@t@p7lF4cK?EDO?fYp0(f1}n
zB63!21x(3=3q=T7SIY3jsL`J?-zRvGC&IiJQ{L|5M~iSvXV%xMuH|tW9mxgZBw&8u
zME_{UUi_99KZ4u#lmEm+f&ffD>3hDDy4OlBZ=juZtOOWkpD)^GOH}oY4CQFi++LSR
zUobYCx4hbJI{6PF{`YSnTd4pCzVl2?RIDENz1C(>|3Ry;t~LpN#w?G3n@oyT3BMm+
z<MP!oPq~k=UHbNz8{Z!CU^@KumlU)U`Jt56kr86|qV&i0rGUq|eqxXGzPmu@FknV%
zvSSKXn3_<msY+9LFe$Z)ao?kvILi_S!`?$*=H|fl+BLA15O+*E&iUo0Dx2<N;MRuw
zJVV^>T-#A31o%$Kdkn)`@g*QfH~UWA4<*KFryh%Y<sm2tpE!a}b9}x3Q{|ActP1SI
z4wq6Uz_6=>jx`5CcGvn!HriT?YG07`ro>C&%@4?j3G55{XeOKek=W3ZPCG+Sb(!$x
z7b7~{a6{$bp2kq#?}8c0+9(&c8SkvYDs0&GDT$iD8aapJwmjbXe>`t{<H7WOf>4Yu
zH`#cwzL_6S>THy<K-O#d@2~TR#tk(bU(+6(1e9UdJL`MR^Ui#Yxk@)YR$}fpbfJR)
z=DiPF;=jQuLd~LPP^5K!u7X%;MLHsRsO`|SAch-lycY4TtxdgJtDjr}by+Z7p6R~!
znjkON>ye@o0UuRzu|8yrRGP@Y#S=Sh44GB<B_+@%;#S1Ogpv&7S~F3zHbi>7tL>^w
zOUJE!DnA&<u8mD#?!?#X_~HHk0vnFE`9TQXFF6S_CYc=gxUeDS&5gA!Y)%&V_Be#)
zZngCZ_qQsHRTwUk&InZgOaSoSKagp`;;8k%(-3^#=x}>!Kj87i&L03x(guA6O?q4V
z-kNk3-H=O&vN%+0Q7KNqUf;pvpChpT(~g3m@Hv(KRh|Euv^ptzgTqpG*RTZ>un{0&
zr(7Eu8NEl(PbvL-(TfJZ>;z#}hnCUt(`CJ}g6T%KHI2%JGN|dH%nJ&IN;$pnk2DR4
z$m5-<P*klC%_Mo%3WcVY&#DDOld$LJ>3lp82edyVy)r7N|N6Zi)HZ-GG534q^*)u`
zyPD9h_|YpSjb=yW{qxWHtxw_dk>%#Is+Xs<MYNv&7huI&3$W$%p3NFxzz>6xpRcZy
zI^)z6W?dSLEqbxG@-Of@<tkD?i~StOoDY}cZ#Ju;@iwo(SYx7-w;pV$plHHPfrln#
zBny>?T@+pJ4lPQv5S@H+Jq}^63E7^2JD{W1V2o}i9iRc!$myq%;J_y@Pd{~uVdx8d
z$?5sa@{0gzxGw`3BJ;Fxu`(3|KClN<P5944TCYoK!AsR(ra_QBG*5)Hs2n5gD<)FT
z(Y@O21=z6}_B;VMIa|4Nar@!4(S5?fMv@t*y#RhO9|m)gM^Yoysp>i3;6$JS=g^v;
z1vl?yvA;MpXGcXKV>9ettTpaBe7Lra#TWdZqKx!)Kd;ZXNytRwJxg!+;!pEJGMQLJ
z=I`i!eYsy1v6vI1Y}R((5)fZ!H#Wb)qb~nskUy>1^Rv6?mWoJ?(zecXSEx)O0iNPd
zS5RJmswcEKs~Va*A!K}vWcf$5xi+4Hg~cH+kJp_ZxG%8f8k98{w4?|_K5FDLqvTH;
zkst=2wQhm!sunHp{|pp?#wk?@upIa-SR^aYDye^6AmH>h;S#Q^iwJtpF!4KBql~hI
zuJGLYTnI&q1j?+PRCGjO?W$Y1g{nPKKM;+bH!PpG`AF1@jaQ9ci;m-R;Ty+fEMP@w
z<`Tnbk_*g5l%Qi;)BZd?e&Anf6WoT6mT7@Ub~N#MUccR#2f??uPSA(hO4#%`JS3bu
zURndDJ!JA;F&}bqXTEq*1Tq}I_v;b={3>#T3V!qy57&A&i8*Aq<Qg-dqc?;EioQ?_
zNY6L{$^;btUz{}84zFhmVe=F&DE1K-(fjlN-cs#MBaW#SQi6zy-Djt#zo?Pabzl3{
zQiC$ph^qlW?*ki%oKqUk^k(!ioJ)!5SpE8K*&G;?K+%r~WlEhz-kdH)WKIn6jboJX
z_xFt=^PSkgIk5VSf(o2erg({C$#S`5kBXHq{Z`7qST(tb*c>$`R9hQAk$$`=K3y(8
zxJUcy`R8vY83IWWXbSLf;1H)Dw-UV>NIF78(LJ^;Z#0no7yEee2aK?wjf0W2o60hU
zkJ0v*@K|56fT?Q9Tf4V9Rd2BV7FRIy0(l3kW+F;?KqV7yOSVWuG+Rn&RoyR$v2EHB
zWoY1t=&p@wzEOZz+Yh^Ok$8vlE&#8DqO0d=FrTs@LTS&d-ih7O&B3m^Tf}GJ9y~U3
z5bJv?Emm6-eKZkR16|Kkq3^i^GTjS-Ng^Qomrsd}SGD=4uoVN2N5}4yt)mAR(Lb<8
z$O~JuX1@5(+8DDHq!*2)3pG@v4^JBxkmky5@XQ%1eBnqG6Z99r=llcGpcPkt0it~f
zcP&K`fU?9~Q27*Bii!sRJ?i28;lE*9xOsuOhwfBA@Z&cL8C8qJDq~h_I)eH+oC$+W
zm4kLKZxdPtaU(VoIn^IjG*WX3V${h6Cfe}tZ4nTy+`pjWGO8&jrra8>{oF2LJMp6D
zci^ZNb$|!QBKrK4n>3}}LB2`W)qG-7!S&hB`sz>iu~o~%UhnNr0hc`be~M}mNcif^
z>NOR9G2IsWQL-G0X2JKccu`my7yey{#H)riitmYS@qfvq;{+@ARVaM_A%l@p(EeqT
zEgSxEO+xz9D8x?dDzzk|F`qRZaHK|@Dp(@<CI6)0Pt<El-0Zuke_=tj!^$S3ky2c>
zk*x9_e=_!x3lGUps{carG+=pao$r_n_$Oq$$%+3Fzszyu0*Qj_D)<@rsmK?(u<8nn
zdpOz)V~o_=?&zU-=O_cf;yhiY%HK1JY){<A4mq0C)yK209Uk3_p8!_C+acSwEG*FB
zn7vS(5klEuE?dreboc|%b35RI%CgE1MU4vKULPzM3vnWFcF84}<v<Th6M+odA*ur;
z*scJ!(y5r~2({u<If6uz%r{WQ&nj<_HEr6JFe3=x;4CfVSmVA*kw;5rRIt$6>D*{c
zJn)zIxOxp<f1KT`hipj|b={r{TS5IkTjk=hiQq^BVO#!siM}2~QzH9L(jhLr?E0bf
z(Dk3NX8Py{6juZu(|z30f;&S7Z7SwTu}XZ2h4M9={3Go+L@FT>^fEJM$=>o<5;Cvf
zH3S&y-W2(LTz|m9V*keDPEwau7aHT59O-cSvYKM*64<I09Lgwz?977q47PgMve8n^
z3vk50Z`!<#O^IVQ6N?|i*a(wAuMTLs;G$O|DAAM3Q%T2pRx7zdU5~gzdWp7Ju1T6^
z=W*t`O)5p~;QLltkR=UqIpHVtihY0x!JNy5Tcq<ihT91qzlkrGE*KjCW`K-%p5xP+
zgCqDJA~2Mo=q$l<x10x(hKzB!EcYwEd*c5j1qaoTWNu8nDM9d>#?OQ9*7t%deJ4QA
z4xibE`!?lCJs#m(&)Ixxso#emA|yt9<74t2juQYe?eCRq+YeR<|6j)*Lf5HZ;a>kg
z0~9cw+fEQyO)uA}2Aol0*Lw*0!^EwviDU?~cBXA$s6V^Nqy<y)Y+R&Wst|1;({e`F
zCk)9Xx@TD_{imJS>_e-eY4nX0Qgc6jf=?)xHDB;^7b=kP<<dH**uF_1{-v=05nK4H
z?VENVzd%VAGC3u^lUJgoBCf%}K2|-#x=eBcj&+Ag;KMFauZNQp2N}XBPrh`78m3-3
zO|+&-=tzi~cab8+TTW3LbWgi|QlS3gDR)nyc(aJycTQ1k!C<~ld&##w)%*q{XV(s|
z`<-VRU5Y?piV4p-N@lV!S`v+O^^X?=pO!Y<&u7WFk_gzy|MHlajLIbJnUc`wBY1BX
zGkMP9hlDQqd3VJtpEr21bSz8@FpCP?NwD4JubseNX|P`S7V^n4WnFr%`J@{Ret7hH
zni!1bW`3bDe?)vzds=*<^%~~BXJS!;zI*g6u|%WTBi=FZl>h6f7Eb)l-iS)3U!~|j
z9m^N*84>`c#{yaFX4m$vEFEqF<mW@apw5PdADzhVNS}6<I=I;MunTu9f4`9cxCMVr
zQ&~I=>a##Zfas#%25<%SCW4>KTz^iC=#ee0t))~hz%S(w-XUqdH|?X<di(w3XT^C5
z{n#4eH)e%8?;X6BDnXP-Et^KF=M>%QNC7U@H0btCK^X281Xb52HqH)6l0HmwAQPK`
z{y@FLV@K`7#1iEUGBhV#BC8X}zu!e0+Awr1ofN3hD*aTK1a0$sSq7W32X=uKctfOs
z-Ki8sSD6Zz=abjaBSl!2vR1qR3<~|U0yro%^H**&u5>dG5XoYVs`8FLD83&*Vsoeo
zai)@`v<P`H-N8jXv00f&Y<@8!rEtXx5mGAhUl;6T?RTM+x0iKbmF)Q7Kj_mBLwh4S
z%6uJWU-cpWM_iP$&vkJ^x20{{#9O$)vs&SFq7%{^cB`2Pjj!bUqh~Z~)Yq_t+8L*S
zo7e8!dn~@n$uxko!=!`<UUUiZpksy$-lAFU<|&4Pu`f#z;MWrgNcM!i4j>yjZ}dtD
zA=u!fFl?r@7yj}hyl~TijG)XlF89Ed_T8eS4X4<6%2(tOh*jm2%>HQi*4Fy}3K%_z
z{c}Yx>+#aPXuvW)$SthR^CWZMu_A;;_tCKW3r2-?fa}FZ8yYBugQ;<eHHj=lcIxy1
zIm*J=7TNgg$C>1%5=O>&N@N?xdiyrkZ%)-YDjFxc)@p3Ok-oIWkZTZ^Esd0IBP#!m
zBza?ov_*~PFimHPGhoj!jATK<JR3avH=T$@NS-P=afMvFC<VpqVDewLWSQO>+Y9%r
zNXFqlmjSTjAOoZdaUCnF?s+{A)!zQOCB<afdRcZ`&k#kZ*PoN1C%mE#+X87F{jaKi
z?|P|01!LrAzv$xVZ4DIbsyx&8B@?fZEwf&~%IUf2HKaY~w?jl(&*wz1Xj1N0X5(16
zDE%siGLqN%*CbaE<|LcK(Z<d`%sL}ab5)OtaewNUH+dF#rTfq+z6lwF`*x{S&aE}N
zK>#J(z+fWD=pc<A*g04l|DzxAqdZgDpHFp0@Hcr+3t2jdxt`Bw_i(&>xGqhuLRK0n
zsLs$U_nNW`Wef}9u?iV<Jw1^vNN)nWOuJ!fN=L~5{Z9%Qw9=9G&h%Hr=)A6l&0n$S
zJ*`4_0K~4U2K(!nu+N*AU9W<IjN>8;r?TdK9pywpz?<;QFm3KoWKxsvE%kjAa#k=N
z0s$I1s<?UkVrkGdyS3D^R431mueDa@5AUKwTmY<|<g!pP!R;yoh72!y3$n0jYHCno
zM7(@o)QL@X2KODcuT=m>$Gvhdf!%}jM(|byUBU+@4FUQfRG$N&-_OG^oUW8%ic&(-
z&F0)KK=)@zp|*M+G<sJ^X4ZD1#zZHuJjda9nDvdD+O5M}j7JAYZQ~6tcR>K1glecl
z=JboEplX<?wAkicbdZw!51PI4k}Qh7aE5eG<u$R;PV43?-UF!nv%Qfza>gykC`h+A
zH>&IkJd*2G!Pk>hGM(cXP$7vh@p62Ci;+I@lvjEdj|xJYH!b5UgW5I;kb_cfC`n=n
znKw;5FD9a4g~*R13(suN%i_XkPD!wa7734+JNnlsLNKpqu=+F2!qLshYxo-WoEh&B
z6RYnl0jKr1T8|unshRo^Kgi`S@22d!RWEb>0RQlU;{>L(&h@jjSLxOUh$McnoJ7%o
zuhXq5ucY%>!v(s|ReGrj<{0SDBWcgAmgD>~jEE3FzkU_js>)+vOP`|tFrKPBLt3AU
zr}!Ka`s>ZK=dBgHTHUclz_}=4w%iA6vT)_B(}{vAiNEBM!Q|oiY!ISXR!C<4#i>J!
z&Efdz9vRDC)2hT+?Qtktrp{_e|1~yfVeKp5p2DJ$DP0tP^_&$>v?_U2UF2z4@4m{Z
z5l0yaGD826ifUjsnUFf*=QHMZmKH?&aV+uTdIq~${i}fo-#+Li&Ble_m@p7}b8g@9
zttS>SP<WvS*ClP_A&ZraTO)MuZ`0_r^_q^4Mea27QCe5phrnk0EJ-2Qa~=!z3hD0C
ziCbdF-T3KD3aeYgrF)R^$=wm;0paBs+A?!?d+f9Ruu(ME`c(U}p)Vr8LVjXG?8l<}
zk`e^oJDeav%lB5=p+|TZz_J+2Rq|eO5PWzKue*mBpLU6}zz;g85v(nLCw~+W2Yd!i
zJCE?aET_k-iNM<i|J!4|HHkR`P;uT1*!{o$2Gib7I}T*q#UlCsnk6=rf#l#4C=lYw
z<YUs?2mjmr4de$j6cZ5Mr+IC72|D5wAk7h3)v3M>vgL$@+tXe6IKypUDZF8q;w?B9
zAxp{-e=GD+S)bQwc~NIm`PDGdTSMf~_<wE6D??d_tm3QKdZzXtxZ@QDzNflWb<Xgf
zoLQd7ivehvgqnRr;A6qLbb;y=vvrFzQrMEcmOXtq^YBzz#mDf%L?lXipY;+nAeJKL
zDoi3p(S^||;?2IkQI3bBS0N?OOUzByjv>X!5LZu3+wsTqkMfvTNu9y5u$L5}<SMkB
z?CR@`+p-QRWpMZI8!Ho~UJK;;2x!<Z{-i-?hR<)$JICQlTAQtk17HUw9sj#D%*Va6
zjp~IRQQ3Ta82Cj1?#he!p0G=cbAnNLzES@q9{e;!+urqp=-?opSM+q`2-gwZfGm<}
zk?e~5pd{yj0ntE^3So0I*J*L>B2NfLLjXJDaDvpjOw9h!(@Ke>!#;-&eS&K2g1d}M
zH=Md|QoSDjb531?m=1;$MFPA7W&w08&g)`sCWU#{E)<E^4Rj&xEt6PFoxs+fN8`kA
z!(1Q^x3<n*YEXErDlO8b{8ttpWT9=hcQE$sN2eM+@+~uDgeWZ!o}bJbwBBf;O&GI@
zRc~@vXQ?7<pf>12@q<`1&Zv?^o-64k+hp&1G}Gm^-1iApGD;Zn_qBY2QuXs!)ZQH*
z9~Vbfw-Z_jAjit&2)QpJHzV!zvCpl!m4BA}*I8<}v2zwr1ryi}gZ*uca{#MwWqLfx
zOX3V5Uo|71S5<nh=!Ltl?pT$T=1>$4kv`hlV-5-}y>;AF%B7rc+18_RtjYCZ=;HP1
z0&jJg^>QMgWX2k-+>uyz!^YuuMbnSrSrV5XUV==UCo7mjSJkDgO!>KY3#Q;91M<Bd
zo6Y+T80^Fa3eF&U%*pv7J5{Uu5NGhxNb33r-tUQXpSJou(!gu_)xPO6MyZM8M?4%b
zHN1lBj&#uIS03v$thCJVr5<2N0DQ1~InWEGfc!3?*qdKWcj~p#$5L}q;h+KL=0t?o
zWw}NgxIr3kTz|O&#?GhVS_sTnDiT<b^0Vq4?SMXhiOjh2gwy)tZUSD^T0q5%5=Sr>
z9Uo>$?g!Tv$4~5^1pa(ue5uMai@?0%>fHWFI~B%<cl+6)nj+Fs>H1ih@Y&@+{PNw~
zXmwoP0+PY2>oyFgRgri}8!41`CU~xY$LogP9?YQoQst8&=u~P8Ai96Rvvj}!zf%_M
z^YSrY5a2=!_MpSld<lGi4oN902-d528g|G`MbUKmH*3SnC#5g#q(Yj_4|#<ltCFj^
z$<VDK%Dw5s<MiY4%Om^3r$a@Etsy$EdA>w+E`iw)@5?}m_gwExD1*p+gEV+j?2AZK
zjZrjaWBKF?${BK=7tAF0b-TpUjmg?ll#*dk;c?(K{?BcYJk@n4T%g+rfBXq^6Oz+4
zJ`Cxe;dv1j=6E>m&V|-mJ=^s_-0g?Go+G|HIzO7x@2l?>{3lrc?t^D7aO@KY+4&W6
zfq1<ebKwhC3pGYXthzPcKCjsg@#f<GOErBTP@1s<LK%{^Iuj~C+DhEL(f=m}xHg5+
zUY7SC0ox-3i8ospZ;IyPy=8FfnKFQ+ZZ*`J{dr4Rydt)MyEn7JpDvgsgw`_JCTKsx
zorp~GoQT!NTdGlz)3u5a#nEPY8C55P^zJPN9&AbxP4j9{Jd%Y~WhK)!#Fds8zq|Z_
zjd1ug%8yBYN{*Tn;%JApR=Yc*U<}Kz$m+W$z{?bCf6@#wdqJsZcdrA_(8dz?WZjPm
zy7&(~)35(+)<}pcCO_dLt0>#tV{9NdUx4Gj{fdJ(F*WM9_lPsciy55p!kwc75!M!Z
z)T>Fh8_J2q)<5o_)$v?WGDE)!Ymio%xXovO?Q{z)8AdAz43?d(VI++h@>Zg(ea<ew
z0yF89Egg)Ra07tqQ<@iVQ5e-4z(ZMwvu@YG69WC&Je--5?p@GyQdWP;J<>obLm*u@
zA!qj-B)dWovww0Rb|~!im%BX0L`dtlfh_F5=Y@tCX3~0|Lt_?qh>+ht^`DKf+N%BF
z;jpq6NT;Xm)7I_7vx6ty@_+x2c1{P#&A!<Adi8RZI_((hP6*yW!gD81VBOT<*%q53
zNh;}g0zz_G2H+kQd)yaYQ|)s6qg<f>h1H`n^3`V?88%L6W?^8`;~aW2K5w}DVt8^j
zo_TB``Qaaa)2Ae#57Kzng~$bS;&mH*T*+GHKmUrTy+XlIsh;3bs8-H*n-p-m;1#|W
zZA5u=?C?|*E~71yWNT?@SwqhsL=<wD_$$1}O}wR0F-h-?B%$z*iJ40Nxdqj)P4mD>
zTC43vRpJ|C94!s*Yn*EOMaJ?k(UYMZn`t`ge>TKQKA14XN()>p6(7re{#gQh<<xuE
z{o`GMxQ>-+4wmd?<9owWhn$+w#uGatMBCwnfYe{}1@W4R+porHJYZ`1pEI_yQ?}nQ
zj|@kw3wIQc=*H*1UjEN1u>g5ayS`xbGBXb2R4<o%0BnrgP9Ap~f-VF#S0jpW^F_~}
z8*vEI-FqB<(q(DLPv&On>F(DR|8<$qt{?BkPaTZagfKr@00kAz+S`yL_%u%!^$XCv
zsNaFHK_B0F>bEPt07O$89rK9a|CM3>&a;hnAfTB3VO-{?g!JlvHg%KL|7Q4+8O*jN
zsGqfPK`#dEUXYj(h(2jtVd6;4PHZ|OwlsqhThVPuESZKSD$$yvg&KxdFMmpVNoZ6B
zO3RLixe?3tRW}xLczt<_6qhR}M=4HZ)7lDRKrY_%q>l5<O|#-kN@j`Y`*^nEAA$%p
zgZrdGHNt#fZ9qyVwZ2W)6o^8!n@u{zrN`Ic_K)c2kdo?RXzP|Z8p3q*KnR|#3f7z)
zSvj#a*@2j17p=eIjQn!rQXgN3cGN?qv%U}EUYO&q&33acH*tIUftYWI^B6Q5y}5T_
zV8CR{<MY1RcRlr)gs54ZoaQzen2pVez^I2hk@|{RjYmkR5i|uTn(b{pk`gf3pK6N9
zk;b24>}#D(W>n$`_b}X=N!{*S%;mMpdNm?a*o+;FcQen^vi@e(S{clRc6GH6)4O0?
zd{9Gx593F6j!c|)O;ekwQo%)!H2T4TiWlIfRX<2_Rqw=%!D`#W*f`)o_nikM0bfFS
zDur6*1=@ph)R$_&&r+@rFZ*5aUuPp$99C1qx~t}d^idu}G@c_o=u6mRUjOmnn0j=I
zC6&Z$8zC_St>Sf$s|Qr-kCmrPHhMYAP?0+NLTAvqVH9H^$yV>jbJt#PIctK%Z%ICb
zmx#eOp#~Gt4iXAfDvbnNe!6r~$}tpMXg4SWTLD4banY!ShCH7oxQ{8g@iqeq3(23}
z8ygdU-Txe%`_N~H$cL^RfrG&}sqL>LYF0s;C%G8_i}Q)chViI`4QY}JIP5$QuVltJ
zE;DG4p}YIgd27v>l;{<{B|=%EWs14`xvrWx7*>#gm=`#{xbiu<O+T@vPE~dnTmUK1
zkooY@tUtwrwnw+jvml3I-_Eq1_LG1DgE%q<BSy}5PYJs$XBHtiW9Bwo`$wZEZZ~1z
z5eXxG&yQ|0iHRrhf03t_0r4yfhD;`#iu>9H!D+a8QVJMzuQVFGzB^$Bf9=^%vpp9m
ztyd_AJ0G-11<zx6PAesX-=KnC%ojkZV>D`2)P=q%`60#;u>bVTGa!@Zi2<^v1_6w-
zL-n;A_*4&xhj($lJOcXPME1*FSA|CbcK@Y~4Yiu-cb=e>N>KUd4*#i*wXVk^XfBuy
zo_I)NfU<>{8-tWPfsam?`8%LagpRLK=0-1Whk}`xkRT%CZw|JZt`z<JR}5E4>(Ar_
za;K82prTDyTi4J>OBfG2g-XD5u6WJ15ry6i<}6>F4n|0l_?9B@Sp4NexG|ps1-I8U
zFphBhpcP+jP~{q_UqjpdoEAvt8qU8XqOfdCAey(~I1`2Me%OX^Hc3?zzxf!Gp^$j0
zDNSLJUUbLScEPWqeAGvWW;9Bm2a?h1@n=k=qroQ(%E5LyaO4@EzU5n>ndZW{#9PIR
zqbm4`IH4$>uj=`wm_(`Um2wGB+Kei*Sf&5yJnN{+2xFU`2~TIfkwq#DKz0}A|DlwG
zB^XuwE|&w5eW^q->S`}Mdoj&}jYB>66@mbdOd9+V(Wo6*8lc0KUdrtJq$EmErz-hG
z5(BjXcorDz^0DkMD+*Ik*|zoC<4Y(#p7a4&@Gj52BpC5zsk4(B5$>`o45Q(APE%up
zZ*+V7>tH#G`(=WGxM&ZHBnJV4XVbzjv7)R3UtZr;N)-gq9LQ9@G-iP}ttiN?viY~P
z1hAij+*w(dbsli?Wsgf8_03m;#P2u1x$nuIK|1{(ex>s%ONxS!p94q3_NHss{q9n<
z;F5CVsiugM6^Pb816pruij$aWoA{`s)OQB>nDLf%Ow&1^6O#ZdM>yW$(1e>w)_Vc~
z5K)nYoXK)^R>HBH5hF0>Cuz2xLF-Qcu0>1&E^VaTfERx4ZQ@jT0ZQICP?vaVG!3T8
z)mG0QQnbeo)c%`$Qq%gxLWo+ciJ&*{soN}GO{2yID>5a=dUa17h$M-+{4oiaaxutr
zmEjvn=n)f^rkS8Nh{#&tZvk&9x)AnYyZ>T3BE0acA<Dh9v7!uGl6rja+jpK^*9b>@
zB>|t<sECkVY@u<5&Z|s>W{v>}!~R)qEW&E>aDfjV$PeOfCsrk_OX&GF>+lO(UJ;H1
zr{|kN)=T5Z9rl116ZrQUlMVXSle;A7^ThP{6RtsH(~DG;g?iz=TlcBcX2msb%`?Z}
zuE<ctoKL$n32L%+55I0CBZA=57XVlQ{_yc1xcb!i9(?s!JAMQ$>!@=CE%60atR$NT
zUIKMS^r4u}6kv8OH~gWR2Y-gjiXzRvKpLU>87Jp>JwVVza{3*JjQRR`ID^NEKzsfO
z1o$jdyAw$2`@=9_U$gZ511go@z%(g*ik36hW}T9l;l{+^`0NaK`_Al{65G-s27Up*
zhQm1IOPpQl#b|$_W~0(2QhZEt#zSwBc-!jX;lZL^v6aY|sPNYy%6(-IiK&?x^eIw<
zm9sIRpIh35PSP=I&9bznY1o&QX{X@(S9b9?y*MxHvnKxI*N_L3HDT9!)VNPf3-V%o
z`MK<euhd112`tpNB_e)D6m`qNF5pnBjJMb~I8I(xM)p>c-t|BJ5PNvsM9zmB3=+Xh
zF<{DYsU?1DDMAahR32$*ue+Mkvxyw<F#a~TcFVDJ0TzUe0Rdt6+2db)o$tU6m*w+s
z)X`?bU4r-(cjV1Z8HX{RbM?d}H*-5F%1~`q)gAG6D0(S<PLwmPtD=ssd-yucL~vr@
zXQz7&?Eu~k(Lp%#nZ|`@p_@UA1hA;y*B8UD!L8De+sAD#5%@zw%}-WXtj_0^bke}u
zoAK*aSkm4}*$CwHbR-|&gGY2$O|!k#SvtrUWrxaLwUIC_l{xBX$YtW*pmRtI(fS%L
z>@3b*hQYmf8!o*KnYm_+fG45iuVKOmZ~U*aAXnh$r{V%o>c||^Mz#Elthla4?oAXS
zBYW(TPc>M##!wFejGXLLil{-4rGq(up$oV<1GfbNEvQ5k0Rwb_^hSuVrNr(A`kFQ9
zvmLEOQ)GG8$bH7Z%YXcDO#Ry5q$r*`NWJ6n9pzCk%;x)0*ET<#5fx5WdjdS1j|Auu
z_&m`MIdY$G!s>se{63g4wh*d(Pk@Y2_@^EQqKqo~)WXM};<oEt&w(WzNq~ay^sR-e
zBW5(ZK%6o&EY!HuD9$6b$iU=hmPok(;_bJ1KA;Xgg|`oQ9M7_ijz@WT5A<l1Zu)ws
zxr`#A2t5)9-;i#D8A`Q!XZKAU<3GI_XSIm7@_IFG%GV=aN3*UI?W1wpV5RcJ9aoCZ
z6w@5?W$5=G5A0;z^`3;U2m|8hWEpz&p+?y?H0F!_i0d{v_|dy)>z@OkRzG{%EGwo%
z8h>8*K3IR22|IALw}8U4&h!2<ztZx|D=jP|`tpbi_jcM5PJn$4I_rK?(1f{Nk!I@0
z`7<wWT&mi_&5vZn&P>KH-2|cHpEt$3ZYPq#Ca*I~CS`u*gC(4VU~0cawC_38XkZ*^
zUK2~-H0l@V$2Eu1e{iwD*A?TUN{pvGRoX+S5hlm9UL5nNaA+R!f${CH$(ax~DEx*L
zlx6|Cyj^j+vL;h<0l7y&3{jZ2RdZs6iw;jND=(b5U<c!HDM@gGO>=XL$vw<%qU&*E
zgjgd9UtbC>84dkxC!mgm8ztD4f5YCYRHj)ZtKpO5a#WV)$gP(_xV`n2VQ$8+)0Fvj
zzj=MRGI0-CoGRN#S2vTUV_lOjct^~f^L1kgwL?8HKWzrxprXfC|1Rj!Fro+G>ceIH
z$wwX2bJ`D?mHl-;JUdX=$3vW=U2{Z1pJ>I#?>Q(A9PyEN#U8g#2E8_ok4`4J2Xi{0
z#vKsX-$$(t!c?!}(4fooGGbNSik6(KrR$$Pc<(xMCLUY`PC@Gy@1;ymOKH$9g$)^H
zC>Cxl_5de>cUZ~VhmdD+73sic65+h=a0v0s!Ce$=ePfLbdn#hW323p;0=ISDVSTpm
z;Xj|%57YIZbdS1RyWuFy`q}x_)lh<*BG(cy*XJMzB`DFS5}Cg$x?A=skF5CV4|v#H
z%Ae^zuTw?{wjrMje8CDpx8eQ*B`5l|cM#D-ewsLLqvZ=40gkk7tH+wW3MAs%Kgm-e
z1uJJXu=9w^hY7G4Rn9yuumXb(z<&Nlb0o{l{UYeLr|es%!`^kK2|3!*)qRZ_Uhvli
z*oyVSLdvyPWz}+K`0V&IT{h_I3{|}aU$oDX2N^InImW%8ccVYFHi{4XuE%6Ir?rJ&
z9gX~@*~TgzAK?o3#3|4~xMJzYS8{FxSSXo-zeB*s(H6D{Ht!`$Q<@biR-SIg1VOoc
ztKYK}5`&supc)fMZak{HcQ`221B0oR{SrMw1=wpKa(i#GZ=c_x87BE&7iT|jhj)+a
zO@ob1GuD9Tq#-e)6n(4B0ODn<Jprs*4X{9MQlWvD-`WI1ar)b=!vjlrn$#Ah)_L<5
zBWwh-j<$xsEnQwIKUpAnC>!KXIAHkhJ%|(cC%d5Rc&=nTu<FiV1D-k|`_}6r$M&tW
zXO6SbnquFfYnFoDLsf2>GIMdq!(#D{@rjy;tBn}f6BCU`EzTZSdqsXK|Fe6r$hnC~
z8ee2Qa6WA>Qwbi@rQV(;N24+97VRq2R-d3gK=MQDYh1)v;L*X5xJlIkNS>fhz=9e=
zQAve^3&>UHy;BOcwRN&pB-LrgzC#=3D={Iys7(8a3lcUJL`y25c2=R&WFgu6%~goE
z0PVp}X8b43w}xnMgd84?uEMX(BuEdPnEIjw`Tk^CS-<0E#>loH{8wGmSsesIiTvYf
zA0ol?E^vG<>#`1>N_Ij^Zl3gcIN2RV&Wi)bL!>tZ>2M14Un(?zJ$mZjJyM~Ic8&@j
zWo9VZVT#Tm89aF4p=OSuSMzHqHxY)LPrcDQT#K&R9VbK0q*Qu`J$?BToox)(lbm&B
z$%CurN%!X&5fXq#64*<MuCh#Wn4M8M)}s}+ncp-!&t-!VPlfW38<6WpQ|j;ZRYVA0
z`qT^e{}##o`AbW-okUk&KwZgM05|~$6DVA6il0p&Nuu|&4MWRc<&;FHi+f?N;;75(
zLK&X$D;#iC828T6%O85U(}VD@959b{vHb2w2}&$LU%%dl@BDqz3WAHj#MB;Pg9;bY
zj()i2v@tePfaEgN*p1qgbJOU@Uwx9Fwwt-}Ym@6DYr=_m1rU?aB~pA|X8P@7ygyyf
zkbwhCs*G&7j<kc)I7}E;cCF&4^G}m2IvVNb25I1%%f4MgXT*X+v>FVV*@S(Tl--}d
zS)o4_e}8n;L3cp+#q0YL-Olz8d5c#t!ft+7-RlXq-uT9nxE(j#nZtZIc&aJ(ynCPc
z_Mn?|_5x8Gb|VMkLj{!e>OY>m`M<d$q$-tj*tcv+MC=z5{(8Mxec9L``XG1D;dT@H
z6@wo)^w(po+2E{AL%>~dDz-(?F4q{wE-wH>`xeQ6$}8pLVhA0s;0S0*8tK={x=vjZ
z`nSs<B+ND6bEeWc73P)e6Tg=9HEveM6Dj=0h1(HWfemV^2k+WJ;Z1*6vq@>_JEn23
zUubF$bYJUU*X2WBz)%WE@mdG#V5RAX7NCU#0}QX}+J5saReMM>Q2_g37VmkvSgGqc
z*lc1Z9)f~MN$GI2!8<x+SF;Uw`2#K}#e^?ZuHA8%z+fIXAoZM|tEQ>^JW!08k8q5U
zU2g3%;L?3^veEXMUIS<5t>Hv~fFp^N_=Zq{78D31Nx}e_Uo^YxOW@=6fhEdJCCCK)
za-&QwziZ+psg^5}frVayeo(4EbG;wbK6iY!;V*#aU8pBW5;YLINWiV^n9djWD=Xsj
z#{f#!T#RZo0;r&R$Xmj`Mx2n|a71O$h3nSmCY{9(-YUz2W`B<UqPUbL<cmee%J4Vx
zp+BMF4A-Mz;*?QyU<`kl#gxmPy6`6H36W?!X4F$3SHGBCwc_iw?ZRjk#8eZ6O(+Vz
z;aG6h043{(lhH^y*GQxpWFlD*9?JA;OF`U(^I<m90^ibyN--v8)@n1jUKf<dV{LcX
z#CFx_;4$8JNshbkelzX<z*Kb~JmKTX%{4X3MsOqlJI$w11kk6h9IoGzsl~<@pGm)b
z3VXckZc-C)T7$Mbn0k5Tc)o0iK1D6$c*3Z1F4KT;!hK8Da$S>0h-b4Ji7vcqhhug(
z?<xfxLxg7f?~H93Ofu)z(O?CVi#r1xD-eg<?Lqj<?%Lhm?d`PEoSyp~c-HJYJI}D2
zak()<SzZO)u+>?zt3Vd8Yd=8{nBaY)k8paxP46vZ3Y{CK)3MWid_J}kAE_A4T8S|Z
zE(iuWBpo^Kw_;ZGg2>jaK0RzKE&W^Z5__m=6Ud;bpe&ZbK(FuP;dRXg$?nG3EfI!%
z3D(No;nqUY4fp3H`Wkj42q}j!n%?*Z@Ig9CP~H`NHm*0`8;fRC8<gaKB|Q&AN;lVa
zrvGdR;zN&(EgnSqFy7<oL77jyMAh#z?w~8<e}{H8k)og`4&%WWh*n#y``5ghy2u~a
z6iS+c>aQw;Hhf0dt5G85%#v!gJXvhPR2%zcad*o*$5fFSZ8q5ATX^o|OI!V3`a*J$
z=lXEXYv(~5J>(Ncrmsb`W7_qcdkk*MGl9J<C(rQiiw&E<_Zysx_(A^Fxq<Qa;jl4C
zm$a-?o71Ez7y{eH-G2@f{%L3SW;c}~4K943l|W^G*7?zZc^dp`UzaEV_gXg@CfkO}
z#|mG%5pU^QZ4<kc$#BoaA;ojj7=$O*Zo|#C;A_HZ>vL=1_p|&SpK7$$<Z9s!UavWf
zm2EtDW-qNl5~A?Dn3moGKm1r6TmUzAzAq7L3V0@6G)nhV4-q(7uIwE-Y!P_2CE7X+
z6rCISsL|Amg2kTZBEZ*<fl>%wR||hb_Y!lWkc7fOG~B<@{Yzt5TyD2QpDFdqVl9>y
z6-&^x{CBA#m`U*=7lwj!C&QX3{h(zAof@8xuXfv&=s30Sebx3q0T(V&K4*0Cb0vO{
z;gAp|J*ap6UDK<}Ak)w1o@^AMbK4<s1`7r0)t@_we)xTRYgHifE2^@Dg8RAiY@21j
zM%<BqV^`84C#d5CJZ_<G1jw_Vq7oGK4j~Si3b;3GG#j?_rS>dpOe|7=lSh^jJ;g@r
z0FP;wr(+*@@)Zwkf$6a#a-&=Hztw^-gSmhW&WT|czBlH-ZbIJw@kp;W4WKI&9Pf3>
z0)136&<y_^??r;KeSd<i1@#$DT=S@xH1P_s&ME*19V)mmY*0CY051MG=eOaYpJrSk
zQ0i#A#bqT5uU*Ku14*o4iTd5z@5fW;bhp&k{r)WhWSjAWBasz(SYY4un`g&lk)Fx{
z<mkPR4C2~}vdw?VT)k=?%PDRyNWjAgu$<!o0;37I==;mL{s$R<H;Ixog)7(qF+m2w
zPoJt|l2xvZ?xpOH{{V*In{+qi2g#oH9pvOavWf5tdwDK>p3(!d5m|GG$LP_o=Z8=V
z6|t%^1NXB0VF+r>6g}tRLt&mRR#1Z_Rj%vg7xDDO)YX93e@wA@u-_=<ebEqGzn_q4
z)R=rG`qMWg)||(<E3a#wS+MJ#;H&OT^kcR(!1_suO!xkugFSaZrb|-X51L;En!97-
zb^q>yzU_4dgm&^&?S>*4Q;q~EOP4)soWq3#)TrPs?5<;SfsG#@fPxshaNR|7Jhkzd
zCG3vZ2>=ndH@@^yEme5#HZ>ZbP1!c(1y+v0=jX4IYp+M}pXb(0So;CfaJ+cE$xWJK
zLykH7%&iR;V;%-P2}cYB^IHa@p0D8}SJ?L)l`oj$F9+Zkvj!K1UBBho*)ht|pC-Jd
z?E^9t0t!FS%D%7;ma@g`dm#nLOp1c2x%>~5Ow4&K^xTzDjIO^vXuh4g;=Ju*rh94r
z0DAOuIvY5<HwcI5eF$==l+jjCmqp}GD_!GzYV675?&8hMt6-mDBDV$C{z42GO`TNe
zfTrV25YdjEhr3e|5q<gePvEBsHp1bXnFM4?TqfJVhxL!*fAT5QcseZ?{3PED;=@JD
zh#XLf+6CHvkGtaAEPrrYHJdMUk$7=a%+(W+YA>l<@>IN<Vq(Y|C_Z5&MA6+mFC1Dv
zUR`fE=btLMxI1$jR<H(9A>XZnj-KIKyA;#fg?jZ|Y9N*igl_mZzMQ9}PR9RaJ76=m
z+%8<v#x>^K!^as{pS1bPFPb|o&DYV8*FX9agrAp?XwjbCb~or=7}LO!+MK0e&*!}P
zrRKJ6xbhc(1T(V*Ux!tn9oDO)%qW6-++iEs0B_ClFg7b&r#%<0dk3DeqddRm6_r}E
zFZn;Xjy#%;h5;5x6g5B3cPxw=>4Vp4wm*>kw~U4alY<6$-!<BG%ON=-e(Xg!GlscT
zh2iQjkcrVmN>51hU8z(SW_(@i$VW0(L$^b^Q=&Wl-Oh%V;y@|bmNnMt1eN4J-}2L0
zmXw@G&Jprpp?t;UHR|~RXR`xOHj-a5C2~~TWX<G?aOpiK6~(g?a&=qa*m3iKD?k2D
zT#BmAG4Ct%le>bqypB^0>dkAhH}d`AD@15<ujVZjKfaV5S!v}CHSBdB%k|rq&}U6k
z|7poIT8ul+pF<adZ#>)Ayl1YLSX542YPAl&`M=RnVba4u9P!H@_(tjRLd}u5UqmOf
z=o$DQpVa^L(7bMG0h&NJ{a+?mY2$aeBnMz7x!~JOh-M9C-J&qeDLfvoh75OFy{~lI
zr`O)Q!01_Naf-=-(|i$_Vg)Hpx``t86yX#<sxNOo18X`O<_$o@V>XW22a+HfDqv$4
zJ)%fQVT_OA&OtD{GdA))(3_WT93pC+{<#h_c0|m7ZQ{@MqGLz?WoJ6?@`pGa$iOBU
z;?PEKZavsc?)Nle!5vkFp_(2O4CmqU+$l+=Bzxb6cb`Z;w!RQV`u^rEaBx-k0Ti4<
zPKQ!Qv~N_e7MGS3l+7$z4r<Xf{M&;O-tejG5RCt;E0T?-#>ll2Mi8gC$F-v2H$G^@
z-!V3Ue$iTbrluJVhq~bvx@++?Pu42DP!LhrOiRkXGomDQ6l3Iw+;afpI+=mu!x7M&
zTC3)@X$BULm2ITg#6;5%aM9uF`#gbCJ}2y7X({LZWq)u1Narb#7c_y{<^>*A(#%w^
ze4nv><5?t8AX_;{T|M7nGsfPm8VjIW;uM-^JOVw(H;|rE&H&RCu<hu5AH)Y#%#ZU0
z>*qEE0$8dIPVsx-d9a<M4eggd0Q3MA#H8X*FE6gW;IkbkV1LC!Tm*$fo_9GKd=x6j
z|BT;tp!F1mP$O;PBj$1dyBYWpt!aBSdewqK^7KKmfBUw_OMovD#9`?e-;49f9v}KJ
zIo>K|hL9r?4b>Pv^xo{ka=coabK%0w7g)c9zG`&o+n#zal=fATy;+PR!ef0Bgmw?y
zv4|(*WA<!ouE&3id@H3O`~~?ZXla$iX)?SjRmAePL8ty%FnqRrrAyMqhBxw{o-6^o
z4gxttKoS=D(&LFKfzQxtSfG9WDLSS`>&L}i9VvOX$oRcUR7IYK!%<E_mxJT;TI0FI
z;(N6|qt!dETh9?h{wDsi9P#`c2f^fixPo9)p#oXc5aj^p`MZ+|O7U-nQkQ-=e!tr@
zW(^L<k2Lq7YJ1CX3s#4`Moqebq7(uCQ%617JFd!v==lPAA}q7<{9^QYw?V$20J|S(
z?U?BIFzHHC{L0<|s}#=QKX?kuy#uocwAi6W^y`SiO%Op037}s_VAXYy2c!e}>FAzj
zq#zz%kSC80*M6;$Q_OU%37fu9S(}|r|GJH;dTAU8gfuPdKNsKmqO_OHjZJEtY2bq9
zf;HdxV&#)UPSHAqMQHzfM3Cu)e6coTOr8r-5t66+exe4s6X%m7%p#dKhX0g>1CXOy
z`JZ3aEj4tlZa_?TR^Qv^7B^1UZ_XSYAm?7oRLs(n^7fV$Y~OBqCCI*#3Z_zou3Wpa
zNVu)!OI&X`y_H%-zHLdmALk-inoVmSZ}S|Dqmf+-fes6mf&b-<5-#hcGoMd%cQP$C
zS+ujZ(_v$^rM_H1@7tDXh&R=~pVxN<IQVxDA87i%amyh*L!S7Eq}_%=|K*_i<x@k1
zSFZo>Q{+#=cjL>hcr<+JN=4gskWQ`TI@RD*zzW#lVM+n*UTIRCZ&6S$Ue?Pz{@`*&
zIYC&VA+>z*ZeKV;@OS&mT6lYa5q}w@0^V51TR%U6>4uz~w0&dFa@^sp)d^)6QBI8V
z4><FTU^O%B<w<Q7gfQ8sF4d#inkn0Z?=K%z?`fXsl4>0B*cr8SKe}I8>&rI;me~Ym
zaX=E=YI&tm7%YCHbvZ%Dyp_n1N*-Ldyz$3gO^5c0BjlAUJ2Y#Ybh<FPOY<M&VzEb{
zsvn<OVkNVu@S}%GJh`xOi`v2x@&?1<^k9Ogv4hL0&Jhmxiy~s9=Q(4IXPybY`nP!q
zDlPhJKj&NpW-2rafDIo{!QX4)*egp7`Ny}6MIfkGCc}@R0)Fl|*7KuMWcU|$s^kQ5
zfo?#dd3%wU<q8FWKqm>1`j!^xDZJleNispSQcP<c&Zth`1I!$zHLS);ZH8~an&zO7
zjt$G$?f9D-k{d#Pq4}7)WHG3S?}s6Q454+rzSyYxe|Xhgd=y9;2$0zVIOKwm7it5}
zb1??+OU(Uc9~FrigtA8S#<upfs<K1iZO7QHf`j<{c<+PXcNUbT;$e<)GUuNbXiGT^
z`;-W3?TxAnQf0FZqM~AqjHWUkc$Xk@9fY3aTxY8*Pu=mq7LXaIw`7k>>BW}3xD>-i
zdNo$=PYDS>Fo)Ob;7-(W?hR$T1`soH%u?H(_c%F_DQpU57%>mRzhmxQvXhDBPuq5|
z>MaganU}GiJUg%H=~x8aF~;qV`W7yyJ2MdNNsr{b=SoKdk2Fn$bdXAk5lp`)$eO-U
zrm~}4i7rMs%hYqv&UMF+mE;dk$>m&nSw^&3dYa8gHvk{9tEyVM8R~1N@~rsYHqUYi
zkKdn>b$$ymfgTlS@F%<OeRs8}lcLP|-J-Y2?5LI*^y0J;#vv2;$=GK^^l_L+))Yb(
zSj0J^LQS)*qn9opd*JnvLFe4?%1(C16?l`p=l+v&pnv?Fx8HkiBK0%dd$^D8ypq5v
z`l-%&S5$W23iuP0BN$(NG=k0j9ZnrDdU?GIX??(zzCqk1h~)Hp1=@SH;B$Bn6rg!H
z&k+9y`Ln|d<q@O8htjttok#ep$k-rjSvxFH*ngo`<*`JMUHB9*#}|>}M5&SaMRX0C
zlnBZka70v}<l)+t3u+>NHR~ZaA#{WfLYywJ>AquxPn^Z8)mQ|GNJo=xARr}0j~@_n
zIi>C=$+XL(;eD+A#vl{;JyW28%m#u~9e0D-q=@Cj#8r$|*czp$lBuNk<x<WY;GmCj
zA?G%i?6&@%RU3u#^egrf8E7RdH~lhlP!im9YIX~6>MJ<bFJ_eX(IGM)(P|SN97q+q
zT%EOxan+2h8&xi&v=e<~e0QTh{%H%{wUSiosoKaO25Rm16!zA%oH3LfVg4v!;>_#d
zuoZ`&|6AIZfbmo47m)~8d3T#_^-xI)MvAN6M@A3!@aculBc9MUQ%a<40iI1x7<DJG
zH4Vq3<H#ESvqMz`g7}fqf+6y1d;B5k6M7L>X@Ji7E=Tw>#H`Mzp)EvAGsBB@WH9?s
zB-OL%sdA#a$we+-+Uyia<_h>kg+!}gO*K}Ppv*G3*>cQ3MWZ>Kmj6?d0*~R*HE0J9
zMKg{Cy}|wY!{-nBD~C_ytq-Eix_yoODt9i)8PEr<Ly?}W&(x}?nflZNiT{j`YzR4L
zd5j2C!*QQad;lla2-%D-&UUVl>J>>?JiCZ5WN(HEo^1^aX_oIdp?}Hxt`?@Y*anWf
zhMe!i_sG<T-R+#CTr|Cj)$mHT!Ay{C@nWCLd+}j|*X!_q;txqctU1v&lRfHM83|Oe
z{@>=7j*D2eJTW#BN+S<CQB;R1xK;b)Km7kD(o&JapC3Bd?*VaY7%Fxr*p~%Tei(cv
zM|Vc{%XR*gR7Fd^*5$WbDfsh4$|m+-*5IJuODmTg1V%V*{s{@c{C?ecr2FbalWkSj
zvP4<Vlw-DQ@{s$8pEQUmE?bn{J`v7P5GIzCI&9FpHI|q4Z<9_V$dG1Kgw`iMjyLNy
z@Dj)svs^H6CU>_ca<^UrhoFq#FCkyjgDp9w*6}(ykFmhA{=8Egp(aFQoor~AW0>b1
zqjY(Ue{zP%BEei`ysPl*F9M+z(}&Twn;5{>Cv_CYqr4J>_nvZ+$Cd=m-zlJq35v&Q
z%XVF`j>d_>!NJDZBotpdPNE<JCx`mWzn9p%zRcu?k(8!`5uI>t^<sv5E%nQh`|oiI
zK3|7<e=tAoQgIC5FTjegrBAdRp78dBMYnsQJ<BIA96m=PDS^v1sVNo#L7hy``qUv;
zwNi#x>AjK4o3(Oyzl&oTf&LCEYoH{~n63S;<mU#%q8Yhgp1kod(9xN>a`>d@)}#p>
zGsH1Ft0Gy}SX~g!Ptj3dRXrLa8K{UREv))iAbpFp-$|P+8*0XTfD*;a_h-RD^oMJ@
zbzygXlm1uN#*^>ka2iDTZ(9WVwv%bQ$GN|{-!E%Ltd0bi1utC1&LE^I&;2@&qtRfT
z^Fi5*Qgb{`jt1q@>WW|U|4Y-C0?fZVCrG9ET--)Ta%tA^>FU&&lG7+_Z6_Tt{Ok>9
zT$4AO4g0QYIS{0<EiA$fm>>tycYQGv+y&XaUlMV}pE*$m03#l%FJTZW;_L?yO42+K
ztAx*PdEN}Gbsr#`lc>C{Lwj}VMOKbWr|hCzr^$;jFijB1YC!AouH<w42l~PiETs$^
z5`mjkM&(c6c4-^RN0}0L!|^cR#{W^z2Wa4Sn5-Hyh*1yFo=_T$FF)>T&hd4Rw>@MP
z(w$|Sk~cBjV4_&sQ}XY4I%xTa8MG}VVFo=ZGTQYcs7RkI%Wv~F9+fjFS3&jY!;c_T
z6dsbDJA?A<3eJ^)A=!NMPw^4f`=58#Qgd=%%l95XBs(=3GdKky_fICpV}ADQ?84N|
z4iNASg0&uf{eL8#Wk6J2*M?^(Noh%G0SRg8?ov{^ySrm(r9oOcr2C<3=<b#d0qK^W
z^NsKK|NNP==d8WrUe~ShSVite=1~~6XlC}U{kpG!RJAk<FfSKW+J*dx-x@8oErmPS
z6<}|m>JvWotU`l0>TmSUEP5P^wpX+`onoa)KXX~LywU3_l+OFTbWXmChp7^Z(QYXu
zZ-Lu@G1OdH1;*q!fnqE^UB>Q419-ZhVe5~!FS0!>QMPi?KHurlQyLqd|BFSY{tt>g
z<UG0eL**^57Ja3s<H`sDvPv#X++K@Ga*&lXYwLbhRPoEuRwCXL_g*l^Jn;}L!q@io
zv2>PZ!QukZ80Ug)n7@NJ(`sVQyn4z8yXb#o)z`pN55zXvAUuDX;q8C!os?C0RVynZ
z{7e8FztO0W-Pl9Q9M8rTXN+Se5!kmNTyyo3az*+raQJ5z*c&^CSj<b$*0`9>PSX9E
z*_+qCHC@a*c)Vy_GAwjS%l%;3ksfG=<Nl(tX9VxPIu=1spCMmmv6bFZyx$^xLy95h
zgfK0a26q}T>3n^$Sfv-L+q>a^Jt2+=s{T^3eYH0Rk4Y_Xj<;IXz&E8nYb0$U;Q6{w
zdt>pwc^Z1Tejy4x0r%Ll`M{pSD<zi(D)SU1?Wk!H?=&c_pY?Ua*np15yZXZ{fnrNJ
z13O6?@(~Ox6TN9BmJELRajtj0QjCn=?X{Hl2VG`W4iMA6V%7r^6!xlk2~wB+Fn+#{
zi5C`k?o-UlB)=!$P8uxKQo%K^bHh(lpHDinHf(|lmKQmbLLBuRAOjJ!F~72EBg`_c
zFv%w&oHu{E7ZBg3sWoqc&#ge$q^|GTQbAQry+QCb-zFQ#d2tD8M0SH&pGftILtUGT
z@FS+Y4(-og4|J)ae89xItoj#UU!%iG-$tD9U)=hSd*d}?%#3Go$Hdlcr>e#ie45CG
z0$K!*k#4#}A3sK!l%qbe1pz-tx^j#inp#rBW8fQ`YnHNRv0Y;*d90jM_=-kDF!;jb
z4CV_#I{)z0UMdpq4}3WnW%W7F7tg!1l$)E@1H;0H%f85nFRPR9c(?^Xyu!I;_yw&z
zxMPiYmGfWZsY^0%>r|cxEgmFAn%=UKr?q|=xN|u>Sj1L-+VY_N+4HE2jQXBAPQLw%
zLUi8w*9pKQ_D|+Pj$ev3=hulg$mjz!vE8HGN_ci!@UR0~B-T#Ly6!{GY#F(!z*oTi
z!+9jmskMg?bE%aql<n)vAzZR?+J1PyP6^xW^h-G+&7XceByX6$1Q>S#e^IW=r7;ZX
z%PsJ-Nd~PRY^;GODhoMk939xMb?C>Fftg?0Jx^EK2pUK|-v0XaOXv8~>Xw{17QxN~
zXr>DKP75fLt!_`lN&Oc{#u_l5E$Ox^iNvj&!?_Oq54T<t5n4t8JO)^$Hvfh+mDcZG
zg9zC`v2XZ(SAc5*ZXXVeUZyJTWoHUbs7T`8Clvk4l*`hkkBw=4$L#a9x-BV~R3Rmq
zW+b}*9gcqn?)S?Mibi{bAhd7HwkoElw)trfUgqq>#_D75&Q@@592GLnn>^%}6a7(s
zk}B2;7*JC{BG2ebsj1G*kIzC%LEpW{7>!xV>zyp-(MgD;Ri&!15v9e&MtOKGYWuG~
zWuQ)7<D-Kkh@W&9hqem!%+b<qrpy)}eFk@xuY3rc+*UkDka1(DJ9JO?|4bC(G%x#d
z#F90sj*gJFqjg(46W7;KO@)4xm62%G)TQ;o!tzj7<bP#tz#%kia6y`$U@Y=8CUonM
z26h5oJSj54DtJ_9bs=3Z)p3UT*dfC+M+O19oG+lSI(}1Mh7MBOU))H@jh(Plw#IV9
zwyRKz@<I5WNqD7Hj>SDj>yxA3!Xkm5^0w}zQ>6pNTYLj&hlzzD*f+rcGM6U1UtnM{
zd&J)+77yM<w+;WZ{H1s$$<xCbC$5T{c*z+v^czEfag_hhw^sBt1j=>Ei>1?Xa+N(p
zdd_S7H*<#+4U9@ghiT2#G`mU|VdZrVJS7|riT=X6>zqBUtsFEEHW8Ytg4yZ8NK++u
z?PpO&J*A-^zyVKQF7wKId&<s;^~%5nIcwABqWYz6OFV0nD|9ng?Dm~HO<5;Yp>8<s
z@3|z%2Rja!zs!wehJAG<d*0lHrmO*DxE9RLRc%pl*aq!H;nnBDPe~!!@emDDP$`4k
z<NP)7Se7#b_d1T9t;^xLURybJPKWPD#S;^^N(Dpiv4L%0ZUjtHRF!ye%qZ00RVgoc
zI)zz_rOZKxKCIU`i1d|lL&3-bv*_c#+Z4b6=#>6PjT5Z$SP?P)MgT9v>z%YF)!)E(
zif!sBL1hbxjP>u)JeqA*tvJV)OHGJ1GEy}JThnH={p5?k*j#?5J)diuz@6nUj+nBL
zxX{~QOr@4{Ny?+<ESlo}EnTM{)Bf|&5~R$fom+fjjc#E->R7(cjWc)&+Bt4N?RuL<
zC_18_OnQbMV}W}#RPU@)-)zEI?R;$c8ZS5B2K$;+lf-73Li6wG!EKDLX~gludfj8T
zDz!*Hc|}YIjO+K`ZDeS1(W+bsb^nTqgz=~Ug7ysdYh&`;zOJ+^CoFc@aApT*nnIg0
zQR}()_pl_DkL46y4ydYv-LrN-QH+^ns%$w^Prh0UOK~9=Yw;jMuGDnslUO%Zq*N1}
zYcr!{HX4ZzpUO3HA$Y7hmm)5z9aIQ<s;hg8^Kj3%15K`roF}ny=fSxudgsPR`2Z5R
z{96*kE)pUf+9hAXr|~#Gz4Hb**2c*n9-r_Yzr^?>9Fw#lpC8~+St1mc*)>R=z@CW^
zWl}7iY{TAje^gJ95o3`K?pajp4o#IU2niQ18S%^KH;i&le}+!Z{nrEBW<k#$85D|w
zVNGrc&@3l|p|Gz4pY3G}!=Ars>r#a_GhnNenD<<IyWVf^^oQ<31BMuo8r`2EIL}VH
zFsTcG>H)Zd<46|1uLDYlONC>5ce+!6TNkeK$spIQ@0WF152|%cO>7X{x(Wp(jBqh2
zcy7!fAVl*oX+!eul6FEQuF`w|CMd^eMl8r`DraHoYEjSW5(X1V;R6-O6)qtheE_~G
zxgRf96<z)zBpbjGGn#3KJOlS<z$4^|(ugu~?EIHluo9_jE}9aZ#Dj_X+mv)t{_>ci
zZ<HK8tEu@*seIHwQ-^6oE0AVfrD%*EEFEq2IHQ<O!t{s<>3R}TKQo)*+oE~e*DPqs
zn`M4~&-tllfEB2lQn!@rV$L?9ipI|PJIJ41G->sBZntqtQFFBHQ){{f1T|n`7cttz
z*`jfy{ll4|_Edpty79G0!`lF}0{%A0Nbu<e=c!-Drdq_NNp7oynoQMZ6YchYAgCKZ
z6;jIc$KTPa+gkabYS8CltXl04`$4YQT!i_fgVZk+$a+{I)`Cw6XTZ<d@#kCnj&obB
zKv5fvrp9q7GPk?`PDgvrO8qkWwK%;sH;W7B2dOUK1$8<ytR{s3_9@L#DgdF~3f48B
z_*DNX$uT%cjh&?_Fs1QiS0#orsG|SlkmQE+9AIB!GmJtIZIN)%Q}dh2w2-YkN*F#!
zDNJ8&`ft+t#i1$(-#^@%-h;Jf+Khhr_P=KVY&+uEwyu^DBb)X{6!kGB(b5e3-<*8$
zgKWA#3BXt^+BBaP?ksr%@|yIEK2INa_t7=EuarMVY}-74IVfeSPM660`cP7J+*X4l
zn_U}Au%zuHmJP@b?^Rnno&S}~Og+rFimf70mwD=7Vx96&`_8*z9mMuLzv8f=y#0|N
zJQ##k91u8r&(>{5i2~rR3cy5#V5?aa($8rNSG{*#ckx{nb;I@yP<I(#=AXo%KaaQm
zsu}Q^%KAQB`$z}JG+UaK?$n@woq_WUVmP6l0+DgHbjJzf3B%|f;W#D87Ku1`6VCm(
zMNkPhn#T~s0{a>a!nLzSR4To_JIJ+g6JdRvcRt{?u7Jzjj{6<RwJ3J~2wHYxRaBvp
zx}psgiqwSPaGzXp34h`{%`b74CGRv6JHA+c2$rA%q?q5~qEpIMqTY7P^UHxs%m~oj
zC;rPYQB>bgW|gvtmB`##wt7GrWefO30Xil(wpOmF{9Sh3hS?>U^(fLW;8WXX^65Kn
zz6{+*1D?*kt3rQ`y#)Qp>$-@>RLa=%pD}UF--`X!Y^GE^llWUSY>Q363a+h83I4po
z%mW&Oy;EYnLWdh;D}&W7zbZ=lZ3#|NkFeO|vPh9Mx>qTX;U-QXlyS8Xve}||5$NJn
zK~_^#<NFFnKDmcQ*km{>`!^Mr7p)#Uhdr@7d*us+t0JOsYW%GnU!hVjxkJHUEvv-E
zS7SfSa4b=-d`5dwo@^<++bgSV(=V{9F7LJmu|l|0EVmww*cTc#s&mH0-;71U{i;nv
ztPgXOns#G>xr4gNN2I4IWs=yfH@aW`pv})Tu~selExsL^P7W&j6S?#8Dpx?O73)iF
z7&xn<u!#n}UA6X|b`QOhUdFr<UdQ%*Kc>D?N~lYzxaA^#xTk4*22Gcmb<Kokw(KP}
z2l*guxae&*uqg^VZZnFP*>+#d<*ZrR2&_Z0XjGd}j2Y$qG%7mDEskA=WX$uEDd`Sg
z@JJ+Txj&1NKQ|Ym%HjeOFOYfBd9n<dY^f<&$tPP-w@crw;4@sMy|Mst-R=$7$U73+
zJ%ehME)_DDK0#X1ohft#$eG|xb2E_Ud`fq!9_U837lPsI0ymHo2zYdt&{@CL+Jp9k
z^WbtddG4rU-T8Sj_&O$XLz@BY-fz=?djR@3m+K@Cr68F<GYJ8uAag|vCavbUyo!gD
zL5<@8dr$4C<36@QtLlK3d^cueK$p$~RWbKphGxLz6xRmucRnEE*EVu@$tl&nolS6~
z+Q^^au`nmUW%YqZF0Xgr+M75w$c|GU@dv6W;}hH|sng8O*q1vV{}ya${cq3O<cJId
zA4UHA_f3)O7R_USN3#6q-G};$z+2;5*NNIZ%Wpy)q0BjbLGaduU26aD#PbYedskt#
z^iAVgO2$^-2%~i%?a51qe3cA~p-|k|gZKOjGg^R1nxm#2hM>^iSb{Dq!aL6nn~z6c
za68Aib1gOdvT|TZz%MtiGC5unwM2Ngqk@UV5H<_7MVg(a_B88PnJl2k{aBTf1*v_w
zDp)?4lEBwqMZwWO_QsSlvA18!ITC2#z;ZLcS6K>zG9&y)^8RXvl~{Ff=IaabuIT=r
zd?u}SyXtUbld)h40(W>5`#eGTvK;gvY>TRP);$XihE)L?>|gC~)WlEIi=yp@&pV&d
zj(c1?vn7m)t6qk>%#K|?_!F^I#*b90R}gI7|I$s#c<gYJ2G^daMg=UAQFmTL9=Y$K
zpR}IVJ>!cyjsfyUbbWfs0me!&nBzWYhSBY=8TXAHHj$^Z`z58;UFa6XVaz=%QUJDl
z_c<I;0aW|oBeWBv7ox{GA&|yHGwrCaUB=)C_0Azj(DZp*YZ(V<9#!Rrh?2V<L8Y_k
z*5?kvTj*cf2EMWs-sK@WymrfdB9H%8$cvZ&?>Vr68k0L=kH;OlRceNvuWOMIxWjOT
zQK*`f1z;~1UxAw!=-)-)Pp6CQwUcZwS%&*N{lwq=9A3Qabn<d<IOps6aBh|R+ZNRa
zr0K>J*VW`~-b^It>e0*p_fk1tj7yg{K2K&Tm$_=*_9{-96)g<6sg?EcROcb~ddqP>
zxJ9y4*~pcmT=3PGW+sVa!k=7`HcD@#wsshu<Kml0@r+^+?bOfIgWuHv>Va@z7uVTZ
z2}Q<QU!wf4Pu1D#HRy$~c4zS~=yUHP3oEYMF1Yse4|)l9sh3hWi0($`MXLUja$kI7
z;@rWMwUCltgSe)!2bZfUDRlWFvov`Q9!wGKag|Vx{(2!;e$Vkj`ur*C@?E7p7kL62
z@56+O=R(qsF#9-%R%h-vZyF7_s1)&e+-`iIR?9RsqhtSjezx0_N1z+zC#k!?9BtD|
zOL3rWcBwZ~Kf^iV>X~pN#>~g1I6uRh?8NYgGBsrmCb=1}7VpU4(sX?1bVH<oU2f$D
zKQ-s<O&c@DA)>>cN-c@7t#FEweK|v%=e0p8$1vJ9IUmDio89BeBxA>;z8EDEX4Z$T
z5WEP7TqeR>h5~JURf##mH$En;T;m8ubaGha*pH-y83JFw#B0!D7<9YTd=BPP)c48;
zrgovJC4w+t7NyPk|6qD4gZM1WWrgLk<wa}mTNL_(^;xO74N`F|uq9Ukm>wh!59TI?
zFTiv!N;<J2d<WB-thjSQlXJv{CC%2zrV?s+%~-*EZZll4!weZ<=@=M2g9xAp?2nUN
zM>IWTOJ2W@2f;<R9AcLs<7*`LY2%<mT;u6u&`|CO=wkaTCFxIj7toD*3PifRQVw;!
z)olpA{#x++>@>cs?&S({?4<Quwm?DLI@?WFv0tKPUmh*dS}U^=YKKOmX&DkTW{Zj$
zjwa8fD(U_X$E<;Cu3>!x;YV4Lcx3EzUYlncufAKegyg$aJdEw{2%b&Z_HQD-VJcfb
z>gZwgQ>5eTw!boL;WS4;MQ1g%;)gm$LEn27FYAUqw!LX#5AID+xZtA<FUZQrDCOz`
zYe#S__1a{(bS4k2p(nP+7gH^X^c20@mEh;irFawWy2bgGZwd1_3GEYi5v1W9E^iV*
z5Au>l>|S>r*g0)L<Sx>Cqh>ekgs|0{USujpAa#_8`r*R^3%iJtBh`q4Yep`orFU}P
z*@~29&cW)Vu}2C$A=_(SV{<#n5m9qmDH}IZ25brIQNJE+6VVN7Fq^$YTHWWD=yc9x
zZ`Aj(>(yJ_uTY@#Op*_*7EIC0ChZR9W?72`9%7}`dPA>)_Z``V-UM^-Ri%lgR6XRG
zsITOQ%1ONF-=%~|NQTu>c*&-UN8$FjbzW;IyG-7n!UG$~%RN;)jPBbV1pmI*n4Pz~
zcDhbpk{S!m^I_kw>uPP~BxNrwO5C<k5;W>{H(YHpIhq8jCcgpo8<ero2V2q8Oq3|i
zDR!=n2+a>tf6&rav4kv2qvsbt_NjIh#sjXy_wmQ@j;{48M=zkqI|+&XF?cj%j*z_X
z%GF)kN9UvH#5cD|daT4Nc-sMWG-oP(f*QtG8-jXPYx`)w1`7y%WUK?7#6;)%sV=Xt
zTg1kciF3lO6R>9TNRT6Mh(W(HNU>^FafT9=MIl!(;GB8)%=xN$q{Yx{tG*X+wFkdm
zo5XsND6v#_#eO^sBgGO3H!tN(8_i3sqFSvM?%7NXU!Z9A=~tKggDnYRkH)wu_T~v&
z;IpG@QHx|s%~ilB!ylwj!J<>A+)Wp}w*J!<&&xv+%m0vl7|r5*@6!+7XU#qT?D*&7
z-@Q+_`5hI(izwyQOShjqp+o74=DY4Zdhjl=_f*|``v-m^<R2GPlOF9zYK89(1e2@c
z@^6^i>kjIPj}BBs#SL->j|xdOCST!Ru{GtC;7?`KL~mu$ik+){DOTK!qH*wAI5^HR
zUdxLoak|p<d{7d+`Ps6Y=@B-zrX%3Adhbu{b=nv}`VHPR6-jPd)+cOM&z|a;oL*ho
za?PTecnEp(ezYsQ%8Js@iI#QN>Z(cZusJ~FD!q(M<ly0F{a?QL&_K#jo<I+lqm|lA
ziZ|?E%h)iWkMRMqEaYrl8ydm=Y@fuW@^`}lIj)~R5fZfGdZmXMOK4Yhmm%6-BZPlG
z6ILhge;H*<YLhOHq>I<tBf@i4W#wuh4XjO38@SRH6c-jj&98|Gn_X3vkv6u^i|1S_
zB`LLw_>p0KJY?1QQ7<0Jp}T&?2g|N-+ytH@^2}2;io#2SXRXi@-+<Gzb)_Ds6e1Dc
z*+5|g8KG&J=QPXWfkC&@+ub$EcENvi9>(W`!mj{b1=%_9n-`jI1$Z17InmNy1>ugo
zK#^E#49LXS0Yy;vjS}2+#``~MpqUo*bCb+6ug^phK6avK&o`)PVMh!a1gX$XSFqnS
zw++IDxzqvhS*NQ64HlAI_$MaUL#tiaq^?|1poI7A%<#e1B5Ic1+!|xSph|&htVW`t
z$fUT8+cDCzYD}b#f`ME^XD+#p^JBq&I{Uc9c!Ci^TIG&(B@}OaG=?Pbm8rc2N6OzG
zVgd<UE8K~y)o;v;6C2#%-e2Qjt-<N57$K3l!1P|z#%~x>N}7?p(t5cEi~~(>XCcv$
z{hR25@og@vzToY|gM}NN5&L5v>DsmqF2<`c1s=v<p0|2x-1%p=LA9K=DtuLtEnDYO
z8O*DzYs-6(0=tNjx%kC{maJJfM%i0ZN-^Ytml@)Oshoq4NwLy4_&)1<qiyC1+DQ8j
z%^$X;OpUI_9{m`1Qx6)DdUlZ+2Y#FV-Xk!R@<cbD%3Mk$@wGq5>7?}epa2>DucN7T
zFop3&FZnj9LuQnG(WS~_g|y0$a$zeW>Vt)!=oo3^$1fdWjr)>!n7g7M-w6>x=v0~y
z@);XUNftl+?Q-#cBOp1i;)#AEaJINSNi3f2Vrc6_zRwzOM)<wczjSs3V|%5WsUO+=
zJbq5(uTSX<i(RhiI~}=dGbJhwTCCT-0-DsqfB!>04N>%L`=W}<Q09~I<6(Ch0hc&*
z5|Dg(oU(0HgbOyTTUQjh{rrk<;e3v{8*R1scCP`4Wb}>IlAq`IMt;+m(9;9N9Vof*
zKjW8&*orjRb6d`fQ`D!+k$H8Uc$VYyG^0Qt^SeG>j|})CdwB;{$>-dhEPyuvK7LsW
z{7j_uRTcp*6i)B<*JB~hYwP&qBn%WRfpY}CK6FChR0wVr83C@oy)&g3vP32e$aXl<
z;t_J*X3t~s4BwJgP1chP&Ff&r(PVz^e*LX@8rD~Kw0L3YlX^uaIO(M6?u1BrcxHJL
zSljvZ?-C|wi@N>L!De!b<i`Z%FFgXkFsgTh##IW6R7sGNbyz35hdHqS+Py_{XYFb`
zk}fw`pC}Uf!Go?*Y(z|eimtI?@?<_MYGoIGVMjuq-0s2uN{2+o^Icl+VTXZ!IEfnr
z7xF?JMS3malFeyanCJep2f6E8`i6zBrLGm@Lmltk92?9<&HL32-?&dP65ESAtqpob
zJ^7wqHL7vrQzqk;l6H0w^|aGvzc4?P8g#|nDMA&HICao4Bm*aD7CU`SG!)mISht_r
z2qP@SmY{jOTNO7i#wK*N>!Vuxm45XSK@8i5Co#ZMS=lo%y(pk=(5}9s0TnI0$9|}T
zo?gihNYP#Ox+wW;4Hc6Pz7o)1U7Dq#;SI|xR(*QCJ3<g<U9mZqCm_n{;O(=q17Ur=
z^dk+Im>`+D5#=NI2e(B}${{jjH}av<`^5)y_HrE){RXn&923p8*PQs3@?h@r8}T1a
zzFm`m`pT_uE}Xp^1L2?EG_8|d6=aJY+`yl|gzf@xG5Mp9`vtJ}890VWNgqsqj)v6;
zL>|#UW+`5XYQhw6AnPwg9xqyKR!Hke3wkr7gol-o`RzKmVU`fR@h30{GkASZIv!j~
z1R}&#CnAv~9j<TzE!yfZ8-mQW9RCoK?{i+CQ39JA2CU~@R+vQ=>Hl@)cW{LmjbNxw
zv$8^{(Obxj`vYIQS(vKjI7tbWU{v84-)s0xvRu=8f2d<AzF)%_vPv&@SEB0VzWIg5
zi)sHh+Izp^Z_*mmDJI4-NM;rti!RYyt80Z@iHQTd|1DHf$MUp#@5|y~*!6_*Nc9&F
zeXT+5U4)3H>c|~EG$R8>#&Ef}#D5I?dW}h4PM38ruR0)2U#>8}?DG~GtLUw=?Q5eI
zera#_3m~9!D(((8v3EX+Rb4qtkb(b@#p$#rqeTYpKj|Q8Draq=j?(0{$ytbacp)LA
z-m?RPyagZtehs-$042R{+ID*%(X*&8d-Hvc<AA%b0KsTfUw0gkPI(Z<$RY|%ociGl
zjUhJ0rS#SZVcbs<0#zqp(yPNi_dGGUEX{MtD>JzHya?j+zV!1rY+?cx!Oqo6h1iLn
z#xB)DsIwvEn7&LJA$c)T0K%z*9wr}ydR9}T_E?hD55P``bj@R}ibdS*>rBg&THD5y
zP+aM*sK@g`957Aa_3D3gtGhW$$f@K@WOIVf4UolfOmoy)xyfeG*6&;NODJ<;OeYql
z?a(GqtDs`1Mh4W593&w0=3f)^RbXSd5BQG@=mnluXWZx~D&U%PBfxczwx}x1KWM%W
zf@_U~k`q6yciNo61D2m%?C8UcEN}+p&cX^y2&!#Z%cE*btc(~O=#428lc1(EPb$KH
z-9N$gI`Tc9Vs%WmjXE-cXNKM5Xsq$v%yClc__*qD@y2K=3v~PqZc5d1?t8Zh<aP`4
zGM+|<NHy`3&~Xd2%~sqGwH+B}d~Q>{5#yC9E6q3UHzg^OEBubDY}c{3A6Gg?SnO~<
zuW_y{zSNzm>FH*mVzsF^xb5s+a&0!{&lD_#b4$lYG#*G8Uy7lg#`WpHBckX1&L>Ia
zYEo@+l@5JNOAvw_=mlpUV$1j5!4Gs0GW9y5eY*PU*Ck~%AL)G!hrZ`s^}A+>$K8_M
zi)WJ&ee4AsDGvQdOnNl{knxH<mmd)y!D=>!1`snw&!A49KI$YrL3>-C&Zj)))H`!|
zs*WV~VYyEWACh0kqd&5O;)yWVXHA=rLAU6bot(RHUXm@f;t=TX-JQqM%=^r?dB)j4
zC2cFoZk!`^Vx3@a#;>VQFQPAO8!vW{({p{|h=EMPW`61Y9FMu(VvFVOeSSO)+QGjh
z)p^Xh2;@PT;<ZJFM2lv>62!+-sDEpn6T98CTi^$$$n$HbjI$TDo+JEcnX_sr+87&!
z)a9d@L5$;Wbx(E_|B`aQGMH%Bua%XXYGZ7G$(mHW!Iv~`Xphr}Ez_ew)3>jkx~0pU
zQSN8Zm#5$p91JhwA446smaW$EKNgE-=aO^~i-*g0(l{F#$f@7_(K~!B%|1;^4*jKL
zwDSq=?`NbX#2o5mv`}e(>Zk{oxT<*0)kl31b@S)9Kp9DX2_?|S=lLw^{>;E=)plR$
z;4XD(1TugNZfh)ZzdW@!kRCRK_&T4SbjZEAF5ZQv%>6rsN}~FQ!Z(0LXUE05=sh%0
zGX>K5=;XhG0-FC1I8+e3MwmVZZq(O-1?Sqv#*{dL)ibC=P_aO#lQ3v&H|z|2VGf_K
z%Qb_Sl|kie@!t~PKz%{>PEIw^uY&YDeKiqn?>m1K{#34Vj9?kT2%=`+s>(b7Mp!~k
z?~@*fyVz?lhU<O|;^X{{U`ExP9h^jF>OmKZ+t%m#oH3p3!{d;_jvRTuBC|?V)~lEq
z``TBAV5ng}x*dNwD8ys6i&nB*1|jLD#}=q1w}mL0$4TKA^Q=xFj$+$^VZ>vpD$<i$
zj||Fs*%>aCCBh*%VTfar%EC>7J`r~E2K!P;)yTmpxh~h!lMA`a{PJ+rcd_-8Z-hZ7
z816N5wSGrVCQRa09svFdTb9M)wDnqERdG>f%8?tEabYNF!}okHv5^&oEe*W|@}cU_
zcQBeR7lWF&z~#j55Jb56XM?v<;Zyu5l<66soy<%|rs+SFG!?>0?z`}Svl$29U<V!>
zv0;juFo!}_;5~d9_L$Vwd#xVsSs*Oj?!4XG;!^)x^?{)i=Ct-lX$GjWU4Pjmmn`Zl
z{P*6Gx_3Z-3V<CV=&&!6SSLuv9KNd|mr;PKu5VTJTfRts7Zg+5<#C)^Sg*SyLo3Fl
zmw5A4^d1Pjg#wiDt`s&miy>z{!MA)jQrpUu9n`7tM|3=NlpTyiCa~A!mv8ZC2z3-r
zitqKi|M1!zn4g$ZX8juPNc(7HgMaPLlE5J-r{`5yApyL6@;k?$|N648O@!wjvNSAa
z^~xgdeUpURJ>JmIm5BT=mE`T%4ny=|Ysyc%8*Xz_j{9LWTpvxM6k6{c)#xN;P3+yw
zGAUMc>#lyt%p1_#tb8G<_C@$J6SQ#T*e2`Sxl+=(uyOy)zoIMj`N7os$N0>6t;E|2
z{s8NWTn6)mWJXSVBe%a!?Vhbs@7@COlrY7o9QZ_m={ep`iYZWgHI5Xvs*>!59R>XR
zBFrKx7rkT!Ix<7rZ3S<J7gJI0nc@Aq&qJrGJEHK*e>Va^vl?87Z!?7gimpIO{dG-d
z+>y`o^Yv5>+~o3*p-tZBX#V}I^4JV0Au0f#0^Vp}>+a^1&4_ut4c`H0>jue!1cXHX
zP=Mk|iV$@A$JqS0F?=axoKU8>f{JE1Sz9+yE>`SnA_u_iL>WYe-oYmyLDEbwEN=u_
zeQ&3n*4kb!MR`Uo7PYr{AMg_(rqw?q_*5i?&}S-oCYX9K^}p?Yrl#xG*70FWQ5_gm
zqYAOiCd|7TS1*o-gDTsNNxg}J?0$^P<$+UWneeTT`#2@)EOjh*YxG$!HAr4JO_@|z
zC3tbxB!tyse>5^I<{cc{PQ64Nav5ZryQ+Knz=&2!Y34f3mMm^w?X*T0vgGlhl7-%0
z;*?D~zbyO7vXhtmV}NS7(Rn;vH`@eh?GPkguby!wkQy0^>HCBunQ01HjU(RuIYQ3)
zNoG!D@T2RA-Vm^o_CF5W8z7veQ_3R#2!2;gkIDuC$0>#jzM-j4UP)I@9C}U7xfsJU
zm!kC4NB?#%N8BjyA*U--04|SH&o18~&2EJGtg2z)g9RK}tV?>NnD$|W%&9}1RePLk
ztVS06*a|SfZDcRf^Q9=1D-7a6)=1;7v_ZOq<URAK7jTHEExTB~8ORrAG)G93*(kHg
z(hM%*;`jYnvNqKU_hSR@b7LrZD?ibqPu7-k*3Q=7G|c9v3XFwOaY`4PoM+*>N-8Io
zrLcW!X)Siy`PM7*p7xbxU7(zGF0uwk1U=SuLPVl3VT}7{1bn*cUBbsRAL136lu_3|
zYUDI!2h*l`BxMU`8Cc!UzkSn#Xk`au$+LFj*|nOY@$c=qHn|TG5exJAjL7nJUVRW-
zbaOYU>iJO%bEajzKSSZlXU?c1wEV&8qy0DJd!&kXlG42mp^bCd!JNVDk!86onEO-r
z*}C0s_5AGdJBj(kX-lKb^n~Hnb2sh9q<NkAW75ZMHwE^te|PV{oCnLnAM{LAppEU1
zTbfq73sGjnjs=c(pzVHU+$|fV&>ERmYv$G`r(+$+&cCA*gpF<QK+j;1<2%@t5vwmU
z1y8XmAh!v(1F7OouF@1$UH`qHWu$xtkiHnza4%&}a5rVZ#WJc^>lYf?vbU5MFKK}=
z;PPLQF3zqM<Z>XIpPdt3Hw^PNvTPMF6V{!zmV%2Fftgj%{7$h8=mh<B^svt4pXr~9
zN65~Xx#rQ8+TqE|pbHBbTU#YAb|czB{dAKT{K%MCg?d{Hl*#TBqzPp2{D_{qX6Bcj
zi}Z9vzk;pe;ciO|1`|hx@WCI1OBwW+VIvq{UI&cyAwYdTnr%gU=>O!@FJ88^G`9Sy
zDYs14L&N@Z<+R6)cYu8mu4*0W5cR=kCgsDDg9E46FzLlt<F53e#Nxp|e(1>B?n;Gn
zSa6U(9!XOyzO$%R6BWM_av2{a@Ewji7eXo}seVzEiS~3Peu=f;G4kk=VwH{4oiF+u
z#(En2LHqTOau!1N*(ePdzm9ymNA#wQlMGhZFE>{}JRZ*_alpg1IDG3^i=Z^TmgGMV
zuV*ORGm-9I3-h7CqJ`~$hD*LX{fdz&|57hFPk&|$NEs7-kv-BBg89U7wEM)k!fyDF
z=!)8bzvDTpDf^xda4(Da%pdm6dA}y1!*q03Ux)tfkt5~lKGgFD`GRBGfyWVB>NziX
z0sQyImRKdscP~YYyzWLotkYLAdQObomXA0V1Q&gEfN#!UM4uqQ+;8NdhLGTN5*HkZ
zY9yYPCGKxW1Q|QbB9rRL6Exz`OA-{vMZEfm_pihe(k-2d(9IFivgZVQv%$3Y_=+wL
zRN>e9pTq({wxTPZsa=9#*3<oq$-ywm+}L0=o$N-3LH#0&AQdPTVtt^7<MzT+?Grh>
z>rt{Hy+u%7(5B)8!s(y~;eB)R#x|l6Ir;uz)RhQ&+p3Ga)q{cof*^ZW+?e(XDY8kV
zczZJEh6%%9Cdon5XolLpat)S{I75zLN(aJs*>2x>&$}pUvGe@m9~4m`9CX^7w}!?9
zd{-*+bM)E}edPayM7E;P4F3EJNV~xH4kYI{-=xP|Ie7ls58V1HkL~C*rds$mb-ws>
zww13!CL~RS{&A+^81RdG0I)hY3gT9*zGlDpZDjEBBhYM^f;-b3pZJHqL=(~{mPN(j
z#bLXzp&H(Oaknu9uYKf`R<zG&W2U3N%b%;E7-_TdGAh)cB%lQmypyEkyHMwr8Y}t+
zM#09L-Vz@VLd*kW@(BxbrOSwx=V}@N$_Ht@pZED0u3?%l>4)V<<8eoRfZw$Xr&Es5
zln@Cm0Q9gu<D=xn14-Z^mmA7TU=2Nv_4AVTi<MYtA{Pp<dpao(&Z})MSrYX??iQ9K
zXGgpfLlO!}h~FY6#{lVpLhz{eF-ZUOCI#6o=FxQ%R`Y&C1Zk1mBL}6jzMa7x%NBmh
z7}Kihc#hAelZaqUX8hpm;xe!MY*V>%8M1pt+n64yk-FyUFHg^6*p_%%K(~S6?LEWR
z|Eb+CCA@(p$|vz%2?_QKhmv5?H!rYmT;^1Cwc#_y+9w<wEQ~$rB`bl@F3g_}$f$cT
z^K$whNxjp2?q)d>ztJzRQ%y#+<~~65U~e>P=00)0-eGCtWY8yJm}Fa(hmN&)|9Eqv
zKyuhSxLJaut-9<t;LShrVjU5FJ~D0<QRdIscY-T_>MP{tdzUZv>^Zc_7wyZ+git+X
zy?{o5RmFUPhYtxa#M62|bv_Q_pB;aNmW4lVHU#_&jz)UV2Ve1wu7qw$2(TQ60Ca$(
z_}FA3rcQIwEcsG(r0qbiqufRFrQqMgWxLSQ>(>{QixAYO6BrvSAihDYjSD&!-ASsd
ztdwe{Lxmw?0M5V=Vcmp^1H0>vK<$!S!NK|qK=HF0)FO+P3Q-D_f`&0;c7U|ef{@JN
z#3Ay@86^wTAS^11Ch(Td6~dMPZ3t)Y&Uxz;&<7!}Oi1(Cy&8-jXu*+WBMDZ6=nTJS
zjR--tIIXlFV@L6+Yr?qH2LII5L>zOMsr-=v^2>i9kYZ-*_=I18=>-$F1*c%}TLep4
z79&)!5me!<&Z5K<_3e}|7+GIO4hf{-ue0%nr&qY(oIY_ku`nh+ek|G^R$dkP54Pbi
zIAA{<|9gzv6d~zrVM8Vf%9wr>$92eim+vD4<NBle6IBux@8iJ`hLM|lrLB=0<(SYm
zHB0m?({EWnC`Ee6*VXUdSb78qv;W6iBNZN$77)4VBNI|Xz)nEqHE{A+7qIXOX4%Dl
zjJMlyX!q|l%Hgm0oD_zi(W+0%h~xR}v;!e{#?|ofcvqcK3i+rPx1l-K{x^{vo`IN>
zm+nIUC=CJaK|tTLWfQFA4?KC_==3wOAD0<?Md1xXkv{<_xAJ3@PX10UX?z&u3$46f
zHj8g%$niF1l-wo1b^olU1zKx|w})H%RA%G0<+}fHhK6<W+P_$ZDTD-Kc7Tb<*WAQj
zt3yTo{{F|D#s<A$F3Y3wd`LxeHdDPZR28cWQnf_sI8-Ul{KSRbiY0pXOYPtWZ?92~
zt=|xLoYe93aqoAC(<0$WM`<0-1YtTBby65NK7<J=;z+|<qtEnE!3D<3b2G00tE-|n
zwJdUXi(rl(>hOR<mp>lVvIZp9eUHKJt3l;Hbjr$it9LW(`OYfR7F677^Saa};X;5x
zq>5jGK}$0}5r5*>-_CSFsqZ?|MvH%qnm85(MY6+Nv821tgul|_lTB6Ziv=H7MBX#V
zjOt-BImqvGaJ{dN>07Rz8~+{@d3^G}5D5j1H9(p#MPuDx9$Fsb021Jmg-JeiE8gsj
z3D(dLG^g_QcI+65@7DjLCdUCatpC`&Db_DT!cBxFPmE^HYaGMW-=x2kQ(7du`}<&;
z!!G);hrBLyBZBc$0fHMLQqgSsd*RN>GG@@3f!dhL3Odf+)9w#_OIOovV7bn$503OT
z?Z)y3BVny|qX%CQ?Si&M-l7rb2HccE9}G_pMhM=&qn}-%F|0~34K|jL{4SS*JZDj$
zQ)(+MM2_Dou1+hhg>|`wNoUidenDuO@J6cOha9KLl$q)qe=QqD9u7v2<iJFtTh-8e
z`GRSxR38@x!z^hZqtT<ooe7B#`fGIKO{|=|wy1;m%I50GZzekUkRfGIj?d<~-_Za2
z!sszNhO~-yfgDN?R(C$~RA4Xpo;y^`u3e-a3Q~(M6)3qL6VT5vs>dV%qkd@=M6Q%@
z7PCjw?sVZ`@2o)oe3PQlzU%xr?yO)rz<h7#P58n6U&8Pf(DcQ++}!)Sx7<Ga^@$wF
z#~k?hUK<ub-p)}E-_j@n<HJcr3>>fjC3Bms&)7ofl>jt5@b&xJ`=A$K**3;uh#bN~
zOzk27_iF^;^=Eh~LC}yZx-o@t2Bpt$+><9SRM=3jC}r@*T&J=D*!f&-KJcB?g-8{}
z337J{cndb3!F5TJaQysm%|X01I;t>VUWf8p4sD72!%!V)t1Hx%t@9?*YZy712w6IP
zgvD8DYoNlDGpyIIZtL-8jzy;4RI}N}PhY(Ck+jQJW27SqDFb~b&r`LXSOBFLXRKpO
zF@$N=B7$mRjb5Od<t&OiUnt_Iqh!O2dnTRk7ied2A0FmHTrM$v+8D=m!G@-X_k;@H
z3?*_keb-|~S=y}E_ZWmZnX5?L?7e5*vFhXZrwcFNT6)eiwwOf5EB=m|DJNg@M#r3*
z=o$u6ar8@K;<dDSzSr(toEJ)S_LgqQ&he`QGwholWhyT6U^xAM@IN%%f})7W1oziG
zp#pXLRn>-3hN?NsI}mo=XO|A6|9|Kl3ueRzdDL>;=oq0G@Tn1n8NZ}$3?{G|$Mh4*
z0^7^3MQ{@P0P8sUGq&NxH>U`sF;SSezi*q@zkj(&@9$SM7ZRhgW=K+({-oX4Q=*ii
z;16tU*p?V#l{D5{sH>p7MEq)g5|fAXc#)So(bmB06=+^vNDJfQu-2Q9j#{!u^-@=#
zYHMbwR{wzQduefHQEBP8`j6VlRj3ViF7)-R|CLOpx$@N25Yf?76CyWDEj9AfjnFl`
z5$rsx2%g7)vTE5ljCuQALPnUozEXL&k{)ooZF{^e_^FnQf94f;#6e^7E>hW-#%CpZ
z?wWClZL}gSuHoaRd^#Ty{(1iVwuNY`%|o|3b=D#8($qB@#0ib)C#S|}l&FI+F$S%#
zhmZg}6N2ULV$iMK46Si`P3l8;3-v3x9xCIzvH&JNjAC68b+>|ljl(!#r|0-9psCOz
zh|U@~v@6<?Bhv%ehV@KXHgp^Zv)rde{eK2l!`b!v9x7t=)XM<d$_A47{>I;bb0H=&
zv355Dxe`;>-BCIMwJ_P%-}jfh9BgqMqHrzOjd|ejL|Of)XHl_1+=!{1GnXUuAit~=
ziOCs`#2{zqj9|Q=+{?ruZrlC(TI?_+S2e?l`uz`%uTV*aeGh*^E<ZF{&vzG*eI{2b
zrnlD*U=cgnAlmdclq+M2X^o$3*q-nGfriYR_~uqEM~aO5QV(BPo~0pGT?T*2V_`id
z2D8ks1S3iL6P`w<D85BGPv1sz8fFNu1wlT^kRh&$G|OId(q;^IqM`-Qv#0U>)83lK
zyQ%A8wBEyG-|TEMx{a~TL$VCmp2(K~su>EdV!FX1f5aS!t;j$}ao2LrQh?Iwqg22A
zbE-!glfDMh$-1Ap22z*>G3MCHie?*bGfHJfSHsW{&wjZ1-}L-jO@n{&0$zWl-Lusc
ziNL@8oTm{7GL@?*))+tz?yZds<el{E9;~V1Jm1abtbg6nUA{Ex-n=yGNP`N(^xzM+
zhXF?3XOX$id&%{TlJJJ@*A&pqFscq>4{$-Y2*3w#G8exdu_gE563uVxFG(C+XTY6V
zRKz5b6UO%(!;*PI&>i1HgTjR=xX9#p0@@ijpDyCC2kiNQ@n&}!>jX-Tn}vuP9U$P1
zT13czfH@U-ww|kRAPlV9)=fU&w|q12%{UCJxmLI@`}KZRwny*2m+^P;+s207A9ajA
z*QV9qE5wE|a&?^p^t*j7ZzHLOT{T_n1C4spm_D1yCY|%t>~K-LV!<S0Bkwd<zuRgT
z>><~&Dy39Jaq61~#>L<(b83apF!u`{ctnVsn$kaR5Aa5A7HCvHo!*EDb=72~u5i*N
zHM>p1jknot2IG+8=!m&?sYw$<svZpv<ckEFS{v+mPk;Hr<wz<kq_v}Zh~AU*-kI0T
zvCI!wDIFVbdSVy<3U_E&^!OC~%A|zFtLt#@=vp)}z$B+-ouLa0(eKLvoRsW#D)_=N
zGEjFI2eehi0s2})z!Em#q%R7rvIzlDQ>aTa*>VeUwHw(>ZVHlw4i50o9Ti-U4pU)`
zNguftg#|ue4q3_A@V#vN`cA0BlbgQe1X*fO6Ox#Egac?<u+I=A#)t~|chRc}c+%3m
z<91!DF^a!q3^fk&cPw!}xXgVT-voa8vrw^3N8Z!^;xVc+BXl7^8vNjW*qVeS@!?NM
zsjh_lp62aZlD^ZCDd9zj^Y`Myj0=JC(yn*(uD{-3E4dlY2w<g;g^i|?uyvKM@bV{?
znS*T`er~1ddaKnk4cZjE{|P78%b)Dfuw|gUsIjNF)ubL?gd}J>+J!J&n|G1Lw<kfd
zFi|$@rm!qE=6+pmMEm;ht_r9J6P@>S;rzC>0}Y|VeM@=KVF{Ms+%v0R*0~{aQ98%)
z6vn7~<r3J~g82LzEabjeMF3QR$BAQ(79*j@`I4<b@jsjGcg`xHlf!@BsG^;J?xDyR
zqQgi=upQurISZ?<Z~15n($g&`@xFWMJ}xvZSICF+i!&U~#`v}S5rAip17DnPDB}c)
zcE|So+GPL5cbD&|pfSk(X%HRQ#+lrB@|8(WqpjRf+}r43w?EU=EzAABf6q5RcgQkO
zQPxuf-S8t=Ni%$08%?oe17s@c*vxMW^aA;H*TCh+az9LJ8WO@$9~-6@rEDu@b*P5t
z&ppT#x@*OJoQ^I%QY_aUbZCV#x_Bamlv%pwj`a+?v)#4q=LL1dgPr^6h(AcX#|o|z
z>}p=YM<pZ$Ns&Ct*GjCGjJSL$tC`<1yh(RjQ^OT=|3EDfECOHV(p=FiBb}!Ev;PX+
zJajpfC0L|d%H!5P1z+Ba^>zu7CwjbRU(m84-k-0JNwhBe^qs$i)aqYAvC<ZVk<}U+
zJ+mP+T8s9at8G+80jd|+qoa+P%qA0UxQpe%eB}@!<N;RIA&5@pE_8I}63E#GQkdC*
zK@7HM7zq*(+7VO;@DT|`Q44l=quv1o52Arm3?TsZqbNe^t~sX^3>#%!8;p=CnG<YJ
z`xDYU^L%#5^VtAgE3FoN;T%GbrNszx?m$6sFeU?0w&NzabSDOhsUaaE74#Ym#XfoM
zlz}b~whF<ye)~%hT2wqMi`B*mZi5$;{(;S2jAFt`LSH(fwzb~`dsNCd9l69@lC>pA
zwgi_L+@*wdyN`r@^K#$s6?_M!hE@Y~`4L&?^|kU^9ydlhx^!eM_FO!H+z~Tq3W92y
z9k%ar#tGC?jv@MEg*?Fr?5djdj*<B-c}_{)&Sj}957F|61#&GSLNIDfqu+yANBp=)
zTvkAeLPFseAN5w|b4x6grhl!yG!M^bwuf_En3XotGnw&@a35fz9Y6CNT>*^;znE(P
zYw*DE&S?m;?mPrsU3m0E$e->Kv4MF=te-ZlFMKZ|IG#|piAt}Z*&OTSMb!VEeQ+>W
z(Cf7<fMs(IJ>nKZG+quP;=;@ImC5|`>2pm0{C=!TtT)9}<$O>5RR{|-WJ6U>KMUXV
zMd+uo-oOdw@&WhLf<q%5<x0TANuYbVZf{|)v(^-%DSsB;d3q59$?R6O-DVn^oE6e%
ziC9kI?-&LZlgP(=J6~hbJM?i@f31f!uFoM|67BBZ7pv+T&hP3`vwwfjzW1ZrJ+L6|
zFgKjqC03|)>guQY#}nd_^Bj!n;_p=Y<)x;!qz^v{3kI^L3Dm7~#;7&i2H7Hh6-%#c
z{+?wQ%dh_V)4)X!)Pay{Id;iGncvvt`EG4<e!T4@r0y(6a<^h%5SDiGuVfe9bOb+p
zRo?~{Oe~QAub>o*d1IGOB9((yii&;(vv?4l3#;XSNk=YHd7=Oe`r#gOXe#tK{yVR0
zGVT_Ptr=8n#c@V$M+GA`$3}f&m%Bo<sD-($pY+7APLMBQ>SY=5zBpyTZ#q4y5iy1=
zZ!!>`DR_$ytLQ6bpf{h-mAO<)o)4`z7=&wb`ql1rq6mu0*)Lo$SQr5TrT3)=9sinC
zr*EFlP}9g>niHa<x;|fS9lAiQ+@wppj#AXG3_6j=$Gco<&dJ=_YhDtHAR?&<_d_h0
zq|&2VX^^`=^s4(l_%i>a-TJ-b$8}pj-2XUkO%YzBoeyU(YF7Ac7q6Bh&HU)L1^K7&
z=mm5H8l-r{`i*rr5Kj$|TWv?LS27|XB*kv_Idf@mR8Jqp60ozUjI{KUBgN;N9x-Sy
z{3}n~1Y>320;?8AX3s|HCnerkmz0~7Pmgs_h{LUG1B`aF+Tf{o#hp$Y=Z>KVe%XyK
zT|!)r?u|;8z<`#M{{5-w{jwm$&&P+Prl7Lhun+Z_C?;|y4|U44BQyrAgK#-S<=6d4
ziTS;*n#*cZYCHE1wo7pan5~W`^?JSSUX+7>^&a1OpMD7?yfOsw$#PlGT;YWUd_+Y-
zz58J{6em4l@ED3mMS+c4G_djZxsIN#^{&4(rzP-E5!hw(6V+c-x$x*YnsZhB&%MVd
zdx<4bbHT!4da5(Gm|8>=YMpo^^^q^8Xbvyv?={wUr)Tr@Eb{U<Gx=965WlC%j9kZh
zicJ<RK8DS5D<*!`MUnl?DC@bwvW05XRJpn7va>HS!+gv5#%XOOIsA5;sx8)A9)xDw
zgd5-5IxU&#pHz9*l?rK$MOn{9_)svjSXEyW6F0w#8L}tTChgP><NlYX9hkz&a-^{6
za{4P$x^~6;+`GG)ZAR!Z#~U;ZSbOLe)Xs)2c9pml4iI_^=-qlgFP&}*KHv2{{vx=K
z#|oddTfTPcP=q)G@k{@*vR{bjq2jKnobvXlIinqSx$6K1U#6PX&5b~@!TemR!iY4A
z9+)PzH-KLMYmf%6mkT{Z2k29-2-co5W_Pf$VA=<RQIBu<U~(F-h^H3)l5UC*j0mYg
z{YDS7qQY)hdYdrjuh;=YMp|O*n%NRdQcCHL4`tyC@IG82+}o-EYHXw3Lg9<m#Lj7c
z&P8TEpsdkQn~hS4TfvK%M~r90g;ZZnerFQZXXDHpdSZ;&e>I6Cr>-e-;B!T${__3F
zR<w<GX+&Q9k-z!;xG97Dn%GH6a1iWBs$^~YI-_-m-~|6PlHY+cjS>Mep1xi$W7MNT
znknwp-XNF*U)>R{bgdoSjuK@=aPBw0CUZiP8UjAz=pg~?8lGs{Ly&9B0#t|3&t8Ev
zH3O@H{Xox@4FwWJ=M~Gd;@Fe*dwtpI;)GBE*jW9)lX0ScHq!CI4F+&9VnSg9aj!_s
z;yM9Ty4On(xMZf2_?&Nx?`%6frW;avJYS~%P4!J7;KsFl7jMUI{`!#2^BiDJg;U|*
z$ZuqQMUw_qFQW%BpO<V01%bC|u2@-F2LkwH<0;@^auM=I@9M0Hj6?zC>u-i2v|q1U
zUi_cSyq@<TC+t4^VZ|8AUMYiTA}f=kf3js{vsi!o9ih8j=0zX>lNCK;Vo^ASA^z)F
zfODB@<D2G{>yT886;6^9x!2WHYR|S{pIHxLdc}ayNCnQmS%s9vfO1w#fi=_nkMfwj
zReU+q#ZIHk)W_N?-Jd5I_)&J+(dIx7Z%{XFxjUP2>o3U^KuBvV|J|vqQ3Q3k_;s%=
zGzc5>z~%iRaELyN&%gLtQWXhkzTIzMU5&jE)h4ih*~ii~&_lBuCM4ZIw#ojLgz)q>
zrdKp{@lw?4@Sbhc>v^;OKE4}53H;gYw=qs2+E2i^_}UFQB;fGIJ}883JyM22hDi}C
zfoF2@5mg3+<Px(X>I&xzC{ou{ttVShRZ1IzaWvhVm4_lsDvV^17>)I8kct?K4oZk<
z6N0mkH<(I*p}@Tt19U4M`pSGTFehwHQVroglh=u6>f$HcbF=7iEnzo{#4V>bn`K~|
zWL-oQ<KzF4bk*^6|KI=WjxilqPB+8EmD5aoCZ}VznVOC(rcL(@!>}=?nYg-ZvoYP>
zeR19Q@9q2hhll&P{&~kauk)<)RANz*T5eW~;F}Z)o&d`x<wkCSSIIYvCdC$-2RYA!
zs7v<qwqN`5!=FVrzebpd2NPwnsZpvVJT9vmM7HX_zAHRB>Kz_^l}{rqHA+uxH_fY7
z=jS*eQCmFYWNoM>`inx~Xi+b~8(R2c_;%Gh6ug-A10#5<R>bpvEAv5EvkXi`-8Ae6
z1VRBi=Z{N`O=@~8uqoUMs{PHQC_nzjy4SAcH)Q?a)z;AeTal#`1Xd0zG#u>-Yy+g|
zr5N?lH?-R3QEO=U?>Wq6XT~Ea7*)s(&z1F~c&g#c(QPh0#H}ZPZC^oFL*$APqE;J&
zH8DwjUt@byRJOrYB(m^)(Bb|Uqng|TYXJ8HL!L4go>0iy*7(|0$aXGAE5z+x6h1cX
z8U_bvM@K1U;K6Hz&*tio@zQr1QXw>#mlb7iU--X7o7FbGxFW+uInJlc8B_TA<%Z+y
zE;_ZnO6wmTXqnqz@xnUgeRC?s>ib7;rg#PBDccT|7X~e!ZWXDJXVYuf{r6Y-dQLld
z^6R?hj817w2ZK@mGlCyGpXIi)-WEAa`<38ly?&@aQfXxYJY`Wg9#d5n(U~`R@-!)N
z#Vp~VfB8jurd>tI^$+SAQX<~_`2g{!QP>N$WPn_HKdf1>^YMe?-P`zIsP0)|f1*J<
zRG6p;E|V7%&>sd)Y$xq&@E7)RtM_Nt_Pq!id8F+sa0cZ^4&FdFFlou$BW__JndZH3
z@<e0bLR`Lv@pOPa(H_fz{1T5=;4{R!X>w|_<@j&F&yV1o&|v8i&#fnW($n!)93MBU
zG8uD4U4aEE?iSreKo(l0mPk?P*+tZWePJB|xx5&e6w|bXV4?GFmh})mcbk7p{cvnP
z6g?N-^Wlu?9SkVCI4^$wOp)RH<4JdhQHL;^Bh{m}5Tiz!Q-%~J;**t;QFY^O9M*Tu
zh()eTj(o(SDuGpfRh3Ex1_Rx<slvnBjOSd&v=}obli-f%PxAu{8x3z>iLp=#r^)|W
zJ^fz!@uqF)PrF^8>efX`hA5&A?~5_9iFE(Jx{V>(5<;cPk~_iXUww_4|NV3yp`w42
zweBOJT?%TM{F-HIe)#_Jz#(>SsAEirY@9=|NpedMFlg+KJFeYo!|r!e1E?QsFc)J~
z{wt!lcWy^f^>C6rMd=)1k#-cWM~-%KPK1ALxc}bM>*v#8_0>Q{?Ez}?=lShKjmy9!
zR2To_*<%}DNXJbg`yIIvV<nKp6Xq_W8*BpT<w`whWLCyazD3X6z#6C|?}o1%zb0?*
zTxi`t+y=9l1@jIC*x;P7B2@WUaW1m|!L=J3q|mM80D_f|A9A%bkbvx;Z4H^nR@wRv
z2^8%(Jr$L^iU1Csx=&MdafFHH)D^$B76s<ha0o>w399FOlRiP1N|Q`JqYl)eB1(2!
z-BlNg$J|sW!<(cgcvPI`pu|nkq6L`sEDXx?Dk&8cWV7a^q)24;J(m;g?Eb|2ERFTI
z9XCOd!&{F_M|1ICRXmtqvmc7Csi}A|dCbT3NDb!<|NBtjkJV^=fy9m)cPS05M0-aJ
zXe#}eg_m9JnDYkFgF3<bs~yhNm@|`1KH%I*%%7NGyg^i(d6LxssO{;@hJ(t!yn0Ip
zbU@9EHDuY09Q(PkPzfdWNaNmpm>774eNc&hpONMWT}jN!?p0X+lJ->aa|-4d{ku=|
zKNjy7h=Qe>0GWtbzY)3y0?(mGw^7GJ6&KYkmuNgJb|{Q_gCqepu9n8Nl8=>HS{cxM
zv_xvHcO?b#kks0zQVJ|&{TRz)64l^3I6NT;Hdc>V(;5U^V#`$t=Fl^f6G5Vb4`wAu
zNnf^MZnE8Mxoy5iT&)fbogMv-`=Y)m*wD(IsG^iARsF*WSA`N6mR!YFIbX?rWWm}e
z{~`UYG5Pen(f6E)Q`_&E2bx`-L8)z?uNcMkJxyDx>YCS$5Um{A>*A;s>1RErsY-g0
zGsl+k6=FXKSM4>TuZnNCiJLaG9Dd}oK6$l2P-ivHsnbV`q7F}HC~j4&VVn3_l@os^
z_2Jl2OLzBHN@(cKAMJ?N>7{ax>(O4Z(rnA|AnU@Bjfbe8LSrbw9|1DMs`-TY&6&2j
zNpPAl+(X5Qj&6M(;katdJ^{>s%pqd!D^vw^j{g(MtRb@V&6$AE?zNMke>arJPq-nr
z`A&j;1ffn7%!`kVI$aIGw}1g0V(4W+{?Qx@P-|s?zGx1-d|`Dx1l-sTQ9EGxLXwh5
zez(I@)Br`KlQNqH@F0*evLBp1q^9EAfFzA3XKcaDen9^<=6M|_NO`>(ExsH+j)8z}
zQ4r{6Pm~qCPtEy7xeTIvAdrfqk~Y>IMaIsu+<}4No`%QSqsK+CPDN^_P#0Ce0!7~e
z<F1tpt(WIzs&$9%bh5T#uBqkt7*6@UJ}I<MRF8?mpHXTX50z1+XL=h`zg3IHAEFoQ
zJP&g}X~(E%dE#;66PXn&&u6gzRf2FehV`ATB2D^psw__V#AwL~i~l~_XAtjqeJi?D
zHM7kPC69k!K1eQc`+1K$UW?T|FR)j4=4idmnBORlmZ;%r=(DFK+xxW}kDRAcyFR?r
z<CwIGZq2gV_{fz-F=>Xg^R-VgxLg#`+DyI8BSv#QC-d!A#pO~dL11w_<5Gpynog<X
zx9^+fK-z_H<7y!(Gs5WsrNtAgtL8O0k<u;_PIe4+Aysxiu|oLCwIIR&N#KgymuI}X
z%(M<VwW7b(1;<OUP#r`9{><?Rx8LA?B~(6|P_+6l?5=ltxhZN7CL1yNU=_vO*&Ael
zja=(b?0;nA`yPXWD=}ty-4geXL6M0|VF^G;fN<TL75j8ZVCAFBTRb8}z|BtS=ZLr?
zFkxoLD(K=r(myMPb-KmTf#6=!)M&ZlJNXCKrbJDxt&EAdD7FSn<n_ic%&h1JlPkYp
zp466XwZ#O5l!>fj!=8`W{?3~48pRbDOLOjQ_6kF#Q*6?-5FGzmo|irvP)YbksnuHL
z&qt8N{z~wlgyN$j5%0qG-x(aLDZVq*Od5SEqU@Es<?Panmg1&apHI!ZN)Wzpx>LhS
z&duV#eujH%2d;gk<(PyoSVk&e=v}1_`-}Pf@#V4=xYEoxTB9<}VBZ}ykJb<)P80a=
z=A(Yz)ll_B%$2=UCXZf3(1f8@%?ZVtnO75vDI*MrT)&-_b{}MLj`vy$FUZ5XXGi&X
zEIXQvo7UhvAUmG`JXw%IB`U#XPu2`%FR;`?NrQ>Eqg-Hi96(Ultktnq!rMdFQh+`$
z0l;YmUl~$h!&3v;>K@I^yG2pLKI-0eS*>5&tblh0GkneO2f81&f2S7;6mp3ya!v!Z
z$izgnogX&XQ#7m#w37i1MvQH95pMrk`uiqs8Wuo8E`$$=P%BJyk?>F}sPq+JM^{6B
zSVie!D`4xfNVGzhSHpwkqt6LAAxZfXVJ75VY$E;7LyRra=i?s)9+iigP4!9CCJop8
z`!ihUJ?Ig(LtZZN_x%TT=a6z3MR5gxWX?b);9{VnDeDzR+k)%P;GFm9{g?KX)>20z
z=|+<aFD4gXhu7^lE+zz1FVAR8TXQ&e8~Tq{$^=9;fAj85Xq(FEF%HUWPAPYRST_Gs
zNW7~0SOfK*(LjAIyXmid(UZWu5Z%{6+uKe8+|IaCueP7k-g6)=K_@hvqbd2!ScJrb
zpV4`<V>n00)IcxSM#27WZ9e<g@85^C*!=#u#YKq7j*fmq>=eF}Su+CWfA{?XJgo+V
z0;bzmZM1UUQ(Ka@Qxy|)o*ZT<=@j58QcnkUaQ;8t_<<%Mln1*5hkSWoTwM9%8*;R}
z4hnIO&m5MV6#iZfi#05^yMXpRJY@>*%M82SudPO_L4(9CrB7k;>6S->JI-+>m5_h4
zJZy=?xZG985PbEm<=F5$sbfrhb^0Ym0?0Z^qdT1RXsZ)?bcV6YMHUh|4p9w$NeDap
zin!{0ZHz#F9N==7ru5^AX?uYjs?k=V`n4)*`7`wc(|;F#il0#sE<6VlC#Gzrl^^^O
zm04LdPOYgaGavZVwbz70;pU5#&KVyjJ#<7Oo{hgDq?n_R;ePT(KuF@BtiNR--3&qg
z+@BGu|Bj`8zxkx&P|-H%YFJcp^D43+1;tkRZ7|piEB;$)-Ac^+jp)6krLa94T;&8S
z^wQugXK@B+vK*gPNucSAV15IBcW>8rM}}xZPxhSJ=21q$GLyWq6jAG|(=y_J^{-*7
zjn>>)k@Y7Wq5n0}mU6FhFx+$aR#)|(z|}pxXge)ZqPy1`+lig1d2rt7YDX*UZ|8G<
z2E0#T;|scA*9ULefjWRi2DIy;SKw&9<D~HLvy7#ONFjRclgw4fvma<OXFWK7Y69)}
z6!7jGN}TkIoL~<|oXSl!^M0!gUwv_sS|`DO_BR2uIPA6-#(*%KwOR|YkFKMuxFa>k
zz&!1{!f<!m+uJ(?$>nGJBVn6kIP0I7p1gY*>94f-f=u=}gy=e>>-I|s%D4StU5E%Q
zFQV6mha1aL(OfE^n*sfWJ+bwmbW!G`z^CYrvJRn>PJH%cGm1S2zd$>!-QHZSImQi1
z%3-COr-gza=;CIlg)@VK78~(Tje<`7Os6h__-0h|_uN&!>0}C7nko>YB;V#H`UG4t
zOI&!4m;HjaPkVhR)o~kiE;R3E?N`{UvE!bbTAHwYEvm6e7vmF5QSzaE?i}P$`5kU$
z<;<uUHfH=M3M0nS-qinmAZ9czIrboPQ(bPB;%Zl#?FeU{0C=Di7!)M%llFFdR%SEu
z3erHzKkALw+gTG<x&|zC1HET2&yeu7zTSU*?LlgWgJ6Yv0Q46vV2<H*F5&fltTRf`
z{3-WKD@yNjLXPbxC8~c4x}LbFUJ7hpU-khls$i~qlY(Euvwq;a0&wCjUx7G}NxTDq
zJFU!iHy#{R<S}BbPo5U&I`hH@()iJMq>NeJcC;tm<fx<7-Z-q)4>m*)S$r1M8d(7F
znIq&laq8dAxb;@^90*@Nk=}J4E_G-45Em+8LU4SqmU+h;LX^`~h8<$C3_=^En5aN`
zpB1v{88Bqnq$3{(N8#bkNmNZB87JgDa7+vrdybilx`Y7&u71P71r%=+UBFo)u*!g;
zdF0kTh1vQ>gx>$b<aqP%0iS1sZw6WaLy+i4d;Kw7px`73|7LzECPG}<S@3W6Z*A^^
zWQ@9B$=O4bp}SWcO$nF#at3{34?N^_buYvy4Ma)|+a~s(^2q5FyBr0r1$~qr3~~`>
zx9?M!mXm<KI4r&j4*SI|L1mJg)mkGYV4^8=;OE*uLGIqE9iaa_n5OVCrFH1!lE?n&
zhn}V*0&UX|PWCr0?$RQ?5j31Z#(gsX*lR_tk3A(G*M8Bb6DAr8IWAuVUUb1_A{*m?
z>Fy_Cr2+5Rs=+~q0M$>n{7}}#cAjL0jOV=Ng$`=^Z1?g6c$&k`KcD{l&koGR6_x4P
zeYyJl7+x$f%K}m%W5-Luc&3Rsl@Wl=MH6aY?C5UTsr_<%+}d1-^JuYSZTvHF!8=0Q
zC3bn*1_Ca<73oVfRL=ih2$%a!TpkuV`g%Zt8<H1zI@ojtWrnmaA-H9~37*&W8Z2S{
zAfc#xsqn3?@sYe1eIpX<t&~(O*7)xjh$S--ZH>wYrpiG9C&9}b7W@HOXRK<f4n`7W
zCymgHK=eYZkze!j7l{h~{xJS0it9C6Il}>K9-MxD+;MzDjtST&T;HOVzv`vZHJ@v)
z!lsTs?0q)o<Pa;k(O3<5_Ck>4%iE7bW$C?(1WFl;(i*c_98{R5lyHOR1zAE5<V)Z+
z@`Du*SGV55Wf7jSkPUqi75&W1zg87|WS;(l7P4(@!C&EHO3t*>?yfPufhkYMi^?v~
zlR3~0u!uJaho4ThhJKy-S@L=amZaJjenL*=2$aTSdQ?ax?mlEr3W-o(i{fW@Y~Ft1
zoUblQ6cL)%)Vri~NSD_I-zK`2s%{G)S8#*-w!gMl-29gxC}H-%y>409ill2K_kO?u
zO{Z&!t39@{K29^DCb1FMzGh0S>w{USJAdEizj{gTY3~J3)X}+B!_NN>eV8k4MF1ZS
z;<Azjg#bGiHR`YaG7r8lwkjd0OTb%!h4pp8PO-DuEWw!FPk0Gfz%Qw(6Y2Og#_N3X
z+4DM_)S|owEsw76j|7cCr;o4Ea^MD^Rkk9-6&PkS_K#@!dMiJl2^Kt0(jD?0Q%fy@
zvGeYYg9E2O=Kc}_ngMKIChmP;kgU0Y@l#*>nJBE*!ta()8~MJ=p;2f9%n0t(Zz3DL
z0uL;z`BUx1V>65?HfqXtRrCH7@^F=#X>0KBKk7CzEm5<q<Ba^-5#LVyJ7SnFQ&5q)
z^%11TH6ojv8aux%gu`NtT#kS;aeCW>M*J`!q2vhzrw6b>Q*-y}m7#gabvm<9>FztK
z>WupAYI3N`7}>2(%=%UNR^-mNW><64zMCOn(rf$Lw4P^E@AZTFGw)xrYnaqRZ>0a@
zYFYCnusblJVut)|buk=Qp0hk8-f3=ofbM1hS8v+1Q-|YdqVh)_MJS}v77uM1LEuIf
zy_YaAkM0+MtSq&+^_Q&ydx-$8-7@Z4%(qefy2I`OM_eKLt+xb#fc(55TFq=tZSU~=
zGQ1QphXSQ@tr94xT+EM!{b(G4tplXhwBq^f1W*nh3EG!{1;mY(+YA}(0jMMg3co;c
z<>?bKGxH$b^{=N-A46z~R?uWCz<jgk3@t5jygosQ0LBh|r=6XBBgz%=M-I}UK|w$4
z+wG2#lox#C{PKWt-XkPFdV($uB5HsS<`7(7GD^9z;NiT#Ow_~qSL;M3i@{ctIzesA
zdR?Rk47E&68zZ)zEpos0nvoNHxb<MQD_o7n+8I#IB(UWf%W=!jMIvNRUhP2ZJ1g}o
z$!&E;S_{j=q>5F)$HOQC^3O6JS-dJG&x*ycSv|_`<POTbvmSn0{R3&_v;%*e!0}n{
zN;e^Hy!%^Bp0&yd!=|gfjC5vc1<zLKc^E-T{f7Qkq;K;3#E02lf2RecExwMW8GcWG
zbnI2hvs#t^O(U80*ofx1#ON2Y6xIA>=I3^O?b6T6e1bbE+(-$1>hzN_NpoKE_<X^c
z;vPx;rLoeZ4NaQI;Xq&xXl=j?E8N8w5Q0V{UXY_je7eji5@{?bLq}G>V%`&Uz*sM+
zr`_9=Oi2f9&yzy`=Cdj<w{N_&hq^iO;GE9AMS_cQ*U#o-?vE{#chSI4uU~sDDuDaC
z6*(Gs4koB9!!%K{vO%4DT#L*oSOR{Oc8J`aCQ@$-U>&5W#_*+I%K^0jvym3dBZBgB
zE>R#?H^&iUP5%I>W(r|<?IOX`g!p1Czf8EFnjL%CLYA3U9<zy&%lxGpzwYq@(09sD
zs6uGse6{hvsA5$rOHnP)Hw#tBUd_9rNMN9@Bd+;}x-Z*Ko*L#e9pBu2y|G@c;NTQO
z|5FZ$;x$#F>4f#w*U{q6k?QUu%{sk(bp)EfBLl3geZ;0&lw$r7N{U|hX>1mhU^vK5
z27HYW>gkt^PIolR`mu%nrfKnCWqkc3kJFF5ZcGHoHtU95r1ukNzFzU5XG^)Sa231K
zw{&fj>i=qpQF-y%XSIu1UFdL97JIx>(&Io>+eaIqQPOY7bd=kJOHFW(aIkY?JX9PX
zX9f@$$IYA?TJ@((r|dPWqWZP4ao$_ka{Ux$*SyA(eralA2qO4iKeHnW`NXZPn6Bh~
zP&6}7&{|CmSZKRpBJ9ACoxk-bW$hpLrXLi5x-yy(kOVCJk(ebIy$ZE+l#=Uj6hjB#
zxcE=+!2eqJ1D~RRDy}=S^V2Ud^n3tN#DSt@IPO}?J<9HnT23r<yVJw%MEQWyMHA+^
zXmIS+=qxCM1f33Y75#-?{XnrJ^B35DPhQ@`)E^9#CicT{d+bOULDXNHaTJ43-WEe)
zvOh;b5Il-hPF(7<Mp-iAP^@7*BcxXv#Dc)V?z$f5WQ7_SpAPvzQ)1j6oL={_HS2bF
zF;kwJP5FzcWk|MlZG(~E&-pwvRGSSsBhMzU&~{kC>yj|SP%Ui*i}!eK%YNT(Ev-Fl
zKTp|DNE=7ToBh#h(M?*yT}qmJ_Wp^fi7&YrYF*=1sU7teZ-ezWtA?O!rc)YkffVzb
z)lo4~FLU8szfI3@=R@CH0m2ymk$kSX;dNE6<<oC3@)>B%0x=RPvln_?(d}WG>>3k;
z^TX9=)YQDh6(nytR?<ge*gxe*i+Y*}kC#8@ClyKMWM#$j46M~a_3MY#ZZo+hdE3D}
zpAj!NdTO0W$YQ*$w2E`bdhTiw+cg<t{W2H?Hqz={Kp80k&H@ptpMZ`W=`Oyxy--8(
z(0sb9)?ElDYv(O+@MU3Gae2v;gAo019LC$d-QEG{0uBpIz~CR3&VRZ{S2!Z?9Jro@
zA{m^<WUp=gWup-$n%o$LzY^OZZmJWA^z`(X9QV7COrJ-4U?<Q<b&!#*-4Ai$Sl8^$
z=wY%6rO>>Fm!R_CAX7A~lPClWL+Jr|8svoe?UTWDSP3PUuF5ELE95>MdLhhhk;@gw
zS}5dP@)*rRL4<dhe_dpCyZ$9z2yoG&h@~occ}{1tld>88=hh;;>L*t@&HKJMV`q0x
zS87fDKVRpw$pa>Jr~VKQ#8OWyY^V!!cqsb()Dglj3elSoGAlk11VetNigf>*Ay!q@
z;Y{USAW@Cewm3JOBz1`7oZ7KKuL}HoXz0+M?$WJ=bWxqJq&@C3`t!A}bSr^NOwHo@
zX?DkJPaF}f-m|&e$NTfo{k8}xH(xo%_B4GU4r4z(a3A;VjS)$530QDqI|(XB$;0uk
z7uEwuR8PfDydHKf(xGK8N&j6aoD+9IA7W~0ur7DkV68a29Poaxtep73ZS;a*{H@A(
zC@2bh6Ilp0yf8qS0y!Plk=X3V&RbFW%injCGDNH8a^m#xPJbqXB^W=a`Qdj|?JYB)
zQD><6FZ0h1{F_zclm8sV9)j|Ez8gf%X;%y4{0o34LdM7e{PSx<bmyxc=?SPf(GIZL
zWi_fL9m|UQMjdomz|`*g+lWy#bGm)%izyZ9ujO1W>f-!opAVL@V>om@hcFUF2jt>I
z1Qt^D<ad5hK`bGl3F)bV0x-l^)pM~0E<RWAj}pLQ#`DX3zlN=kJ6)!-eDzi~782L>
z8ydL26})x}z3Y`cgfD+atWU0PpJ8r^t#$J;Z_%xAr96C3^NRFacl`SoWf!#3{$iZG
zJv?G0n8XCibaD%h^h?BX6+JQDUe48JS`t;h4_rTC3jDY10X<J@)~-00v06^j@fE{O
ziEZkWa(p^EeZ-(Loyv?QeP{H8r3PE~Kf~znlh^0-6H9+zC-EpB?#cf5VqW)^_J=R7
z!q981G*^R7w`*Mc_Ak3}Khidi9M0^K2Z1yPn>U(m9;8|j4`|H3ioy6k%3<u|*2gXx
zX2h%bHoC@^IAB8Kh_DMm2Dd}|y(@Z}B#v)xlCaq=+(%TfCn81v5XVlT^UJfP%9o?H
z%GH!7R-LLUjrWnkRvnEG&_xrpbw`f5nGd&$jD^!6nKpu}yN+OwoCH8_6<xWEcrL$h
zf!cCa*fjsvK96++KiHjKx~eV>zF82*MJe~d6J1*WwG$~X?(V)i$Hmo4Oo_gY;Jduj
zs<KXl!O^Wz6>ea3*b}c^qj|Ot9VN>S37k9SJ!N5~Q~K@R#~XPK8oSJpIsWR4Ji5en
z4DvOM<M!M%3Ff=T_V&Y&0YxNK7i3rju0eU*8F(O#c?$!x$zrZQlf0)<K*kYa>?nks
z&{1ce&%-X(_0XrIAB|zE#Wc<>WyV*P73iD-<hdoexap!1H{AbsU1qJ!b#6=?4LovS
zGgQlHe4`=OeATx)V5G^_h}VPut&mMY&2lP>N^h(oCjQKNX>hd2sGE#Vv5|gvQ^RbH
zkXZHUCPO{0GAK1qs=s|Di=EneF!=bq@iYHBZ3*+y{@Cc$8jiB+nmfv~Uzeu!LRmCu
z1kR*MW7Q4sp3%id`Lx}N_5x3SE%CQZ?f1cIm}K@}{<d;{P!|8yIE@Lg&MUM2h?o1#
z6Wnonh}Tssr`TjxD3bCdfkA~hluslQB8#)y5NaLqX&mIJKrF?)7qB_jpU5XGAG}>R
zkd<KVEA4R4u(2e0kYGyuN7DZXqprExWxsXsk7M&J5Tc<+oBJX|#SVOKKn_NgxlpDX
zI8Xw%_R6hLcT0EWh}?wRxZAtI<HKkq*)aU{RCON29napaTRl8pp(EZ4MzSwHCcc8V
zhF?It;KIB$@ruqzfRufubb$XX?pJuTOU^Pu2%m*3EuA|yOJ3W}ZaHZwCU$Y+_%Z?c
zp_3HFF^KjbcH%*EKgsyKjV!{zS{1@se~RXmrx$*S?}V|l7Bs=UYSQtgK!0R<qKBWK
zqE<O-@f~Q4a{`u*NT?GfMJ+N2xUm;^9Q5aZ;wGn2{im(C_KEUO293#dS%S@w*pi`)
z-Cp~($!o_Tc36GRnab`<*7_U6(4~({DjT#N?|RJBdB;@XL|?tef`sY4((}cS6Gnoi
zWgdSQkvKLjkNVCvEig3P`dry+lxiBcvvKzE;MfD4qKbI!mImF?Yudmp<|ST%EJH&(
zuEba)B>N-V(B{DL-4@Bsl#$Zi;%O+*xu-&gflVAwZ8}f(_bRFe!i=fqUUlJnWCnF%
zSOy}UQ;~4F(FcrJzOXfTo$bwqtdDHMM)xJf<Y5y{uyLN+3I1@JbNLE_KY$AaZGL{1
zFee6%)NT+qvmEGh`FRZVl8Jz)z04}?Z+E@GHY^Msggyh1zzJ#x?j4zk0++$aFA5zW
zMc*LC=6tbSVNw4<=im?UhzUbL!j}}(K=A|k)Q92mAwhtY2t={>4kTkJ!sELyV7-iS
zx9AVch_7E~CZ{w(kgamCy2UI;oF2yYk%_SZQ{jQz&hIZXJCmhr!(hK032Sh}MMY37
zAFp&jBUbpQ7NpgrHuK{dKZu9+Dw^OYK`&OB4l@m84$a#)#y_F*;2SXHtypFn#P*j;
zj@e62_i}vJ^sZN-#W|+Q&aABN^p9$^L*5j<nk0uLCzdG|n-jkuSUPkfI5ZK#vM~N>
zA?YL*p!`zv^5ioo>Kh;B0e|3IZay(`JE+!22}+JLfk#>CkXhS~l6JnW*FpWNDpP;Y
zY6fW;q#UU1=U%rxgc!%i^UN_qO{Xro93n9xM_1T5Ar7;-*)$()vg|cE*#ByqLjbwa
zI0FTIPwza1r0_WJhr8ri!n`;c4YFu0lS6D^gYKw-Y4$Uc3<Iv-C2y;K+>k<z9(OGI
z0ZWZ7asgLW0nJv8QGh<i@(|+;XeF530T#O8K}B=TR46OZkJBZ8%Kl|k7q9~Pga-f&
zU;us@>j6SesmHKJOUS?eeA&nq4Dx(MWPd}*L*zK#3ViIOll+{F5b}n}7k#}IWqpu%
z(i6t`pbcw6FaQFo0~7U&qU2ms*UL&>J{5C8VBvqEpjM5Fg~J!3fT2*T`^n=U1q7!f
zP(G0v1H{m{m!oi4iLQ}Egds>!U1QPRC9K5^5g}`ZD7`ogKySc-mZfth*zpxs?Yjvx
zvig$jM-t@%-(T819?|48Fy&^T##J8Kx8<Ypnrnz?lSATCm)dwW=;qK;nr4cvrKQWJ
z-BL!#Xm&B_kK&sZ&iJc&v)HR)SHC4Me^7CvSXB1tB~d$*j<6ur=P%ByFTBc=OGniW
zQXlXi<iwU(f8DI5u`3OK;oPSv?5{iKg;ns;nxkhvDe?NR*9t~bzRU-{@6ApRgyDO*
z3jXrXTwcs7Az?js)0^T@lVkJz9=J!5Ez48KehEJ^qaEfIL`6%TR%1MH55Njt9?JYI
zI?i8ki=yRXv)C$f!pGFiuy(aQ9Gyt~Qig42HcLgNRPEXo0dHA>r{}*V1B|<$h!Xnr
zzm!=+>?17aUe5i3oBB9szNG`^DC5DORZGImkQ(r2YEk(R0e7fVV_o19I6KRSuS(lp
zXR4uXj^M3b7tmXD_%TdR5n&gCVA&t924DnetOf#4B4l@r?fy;T63;Gd<1^phxRUse
z^Ii!vwi_M?J}@!B`V2~L)Po7Z!@`byJRX6=x(SW+jU-75%CPW5Hfk9Y#^JU?R?)1%
zr1oT*5SlcIa?{fdWs{Kk%)9`Ah7@`|hPaEgTe`^a<(!D4`c<sWp>b3`%okl$Cmj2q
ze#N4Ne1LqhkM$w%MT12*6R=gTT0g-(l`*a!*^rE_H~rBhQAIq^@?XDkHBp9?jLuan
z>j3)+`HRiq$9T0JuGzfOW3fn{&F|-G&Nb}&{yO7#1(-Bf`ct20?G?nEwB>qQeq%5@
z>3NWIV*Huc$y2t-FD?$V)8EMcEb1_vML9^eVE{kN)4AsPW8vW+_*-m;ZHOdbCcext
zL2T<Zg&++?ho2{Yu{BUH3Jp`cWQ7ZfysTU%o@Ow2#~l@&H>U)g$oF4LTMTE$PA%EU
zI)oIuNvWuYb`N4nA>;^?aDbxWMuv#qncwkGKmSry?ey+}OIyy70DhIDMLu|8DhVjp
z>h0GkA>k$|nt?=SsF=!H>4`Fx?3DGwG*am0rgtA8`|lHa^7T_-=b{__;dP-r*XhBt
z8{l*}|K?gKXa^3W*j&<2jaP@)<gkOV(O9mo?}33O^A#8mg-16zIzRYkC$Po@V<}+A
zYku-lR}O!fR6QCaDd~6VQqKt_=|>bEfhA-d1_aKpVHrdDNeAzo+G}bsU98p%Q$9W;
zrcYsu>iYCjonDn+^fX?d8z0r5e>1bx{%$1>j(5<c9sgc;v63dikA%Oy{g`%ftCc<U
z+|N`%1<cED^xf3C+X)_#S<i2Uyv}3A`p>$kC#fub#<2csGMP<Wm)wafI)(C}e@Rz-
zZ^%+s`mL@}Utm<7eR5%dJSEkP*nf-Z?&&Jg`@W~NzJsL6j^DD*2JZw_gcEHlKc0Um
z>591V?>tC8<ZA3BW=O6wEt;45+U|s0XVf@Q#~fYj8r@RwueOYwH1>Q##9WwHoh!Mv
zON@1RHh=ZkI++Fim2^uiyqJziXqj^`dtKgl{Gix%)Xh<LJ@_;YI+W!B=h!ydW$(8e
zIcj)}?yi9*vREua!-)?UgQ737`KAti;UO800Z%25lY~|s=XHNDgI_@Sg-=Zh_QOm%
zz+b;+M~6?MyDlymSeIVaDJ16M`Rl$zEztre-zPQ$L5S~gbh>JC^x}kjpk4y?A|3<k
z8Yyzmwg;#B#gD;$p7jUatjzU6f>v%1XM&P0_oXQ+($rr1O@|_h`_DQji`Ia<7~q;=
z%k~r6=LS)K0-SEpEZ)uN0h!RS7P!ZCe>MD!1LgS#?iDB*hF+1x4DLPv%Yd{kd=&sc
zT?cvK)u_%3*uizF{oR%DV|vEAtcA(PnhRLgw_N$+?{&E`$We)ym}D5vP73xf&yswt
z^qyfZI~7NRXJBOo1^CITDmJbn6RDz-0T{r7xmos@8v}E)e%JbrjEJ;SIhG^@6y+YD
z_@FoQ@6bNXQoh^kR;-SX%l2QNIomV{x@`PoFc!c4i^(UBE}l~RO|Kz@8+mGLKA%p=
z_`AND+(EOyPfF)pY-21p*7}|OopuK8;~{MYX<nXm_ujCzW@ti8<fJySpWNdbp_=K4
zxdFtN@@z(fLm{kn&XtW!KWjpf=hzwjya)lip*2~GZc)6Ut1Ho#8nb8gdP411k#ljI
zpD4YCj(7xAzKor|V5EAQO;?p|BPxMK@ty5W_GuxL+!H!)!OSgwcob#=eZ8!4Z{t`H
zP3|`arhuo5Lp#@1S-m3)Ct^0)8eXSqj-=&*{&LT=KV4PEW2K+4&!Hdp0wyxH7``;$
zyHn^sgsw(%XCzEFWisOQOJOZHu8yxuD{y2G_Xk-OiexcjkIxK@Jd%^;|Ls63&;|e5
zg3N~meLsO8_u|0ghnI`+aK#20C)`oqSk)5seSmvj&9J=sGizJtT*RPOx5RONYgg*0
zcwpw0Z{{(giT~&gs&rmL%{&u5xzGz>Pq=_=4JjaVhY&*{T0J)9paHw3OO(u`Hg{2&
z5G8q}8#m+_Pnn4yNF#nXc#~KFuXe~K_TKr;318{L$v;J=a_QX|Dk7uY$xkr~(Q8sC
zvZ_P+D<aEM324u|0R&Srs7C$BrEzS?7!sYuSb>FbufxN{jLm{DFG8C7v-8icb7`=8
zFru$OQm*4UMrIN`(oQ7_N<>U(%Li#pZ0HVqIOl(Q1*H1O-N0{DeU>{0p%Nhpe3oV?
z-_HWXSe&bSe4C?tO4*;eiFLn*EL;&focj?75x<{)zraUb@JBV_l&(f4dm`R7=#kkM
z71pGjYg?rkS&tl_W`!I}8ubWgN?w#cRww@_+ox^sA>U0Kd!^lShA(a7?2*m)>|)*f
z)W=$LUu<&Spckw<f^XkMh#09@jYOeliB<>5OUXWGOLZ3YKqKgu^~Z=G4nFwuyC2oo
zx?5z=a)uOT<ghLkMJKGA$aHSIU5g4C-EB@LcZMf}Dl92B12Nw2ynYoucTuruj<rGL
z=f%|RA~3TGC>y~!qVjxNA~eSY@ccxtz0F3YjQb{Fcm6GP!6tvD=JT!rf3){XXO6Im
zZ2h6<u=Nj7>x`?<<G<@ZZk)=JU7NOE?KIOGW<DfY8>a`ph5~*TQ8gtWx~21e2!Bh6
z3M+TR##)SUvde$kxo94`xYuk3dZ)ZR-%BU>61Q&|Kpoe1NO}k#mLk}f*6WMbV80eR
z0XD381QM~ajbJoxZ%p0x`U3pf0baicGKGQ<<e<0P@XNQF%#CU9eY|hgdeHyQ>@R#a
zkcY^4cC@k^87^W4v{0Wc#87gry2Ez{5VRLi;{l>11{Sqr4LkO%?^K1Q2RYz_vQ<o5
zj5RB*yWc>G4YdBnnw7H%?-QzV$KQ?0g4ifVdtsA&?sqMIJzd!u6y23Ry!ukn7t@Uq
zV&NyEBnwGIBnBf(moBR0ZsN%H=in%zUQNHxnQ@XSV)QP(O=(Nn&rEG?Gdvw-i71`v
z%Id`Vo~EgdlN%dT2lW%R+D}no2n(yz_L{*cSO@VWvaA%jza$m(byJmus`#De@T5?O
zkG!exv-~^r`;}Pd*-xfki-%g_HKn|(YX3>Ao}UR1`hOiV-2Hx=P0+@LgOyk>c9*eW
zS5k<g1q18s{GQLU3{64M4Uu%K*A;!KLs<jT&uB5(uAAS<RCbu*eh&;cC_s;WrN8(<
zMVdmVhM~TjnQLhA)%G9V<Ir&#CZIMXR#r?pc&+K0(Yu#RkqApVzJE+VRw<u<nY(|w
zIlnYm#JU)}>Qw)0>+98sYo@~oZRUwwF%l|cB;mjZ?^6@*5OH*ixUPz<Wm&iC-dz?$
zC$*EGKyD8pO9L7$PJBNcBRY1TpeI40ejZ-q{1U^B9%o~zE^s7@wrV>aa;-jwR-l)w
zXHFBRueTBJ)}j69u>n9hYa$iyN#fNV`!|^HMct>&6vbz%C^&Gt1eL=~IUZlb`_-YZ
zC&LD+9!WJKeS5MX81C$an4jHw6`FeG;ki2+$Cw>4kZ3&ex)3R>KcicoU1BWKbrD}J
z!6PG98sr}hHm5>au=*UvG66(@Ll?h8p5qd`fDCq>3cnq{+d~ApEwxLca9pE<wHNu6
zQV77;ZsSXj+qz|m>aR@IT4DuOHmVZ=qIP@wTx(wOU)-6=5PL#}!ooU}L~T6SS>^ih
z`$w-sDdTm?jkms`MskG}e`9OKL%-S#X%DlphSb$1(J5rBi8wUdsn|}4e;gc%u(zju
zs#juZT+bdOPd8i9ry?BF-0dgWG-CgEDUH`rzpkL&{MX_he;AR((dZTBQmRq@MwZ{{
zOx^mBk9z9DnJkTigqZR2Bay@GL9bU6sxzWKvP5{r-`xl`g>I@g73heD>r(79{-9QC
zuDC!m_w><F(|G<H9vY7HzMA$C0$87|1BMeVF2;DlG1gLkKP2(rcS{mpeY8B`lx*<6
zM8PyEp7E{t``UR*U;dHbc7pwOxw6!5v0+)`yc(^K*aR}YPzWZ}l`YyEg}6d4?ZEU)
zfiGR7tH1@r7VJ2g?2Zeetr0NN1!i|_<S&86y;QJazh)1X*CK+1<|K9j&7NDbvT%2@
zMu3HgMVGb}n=N_=TSVx#gK$Q_Zu=0B;k>`yS_)GS%h+qMo1z{aRrfQ;9bwtZVB_>K
zLz}L0pLh|f-AZF)f@WRgRkl!%C^BfC>fWCw14V+4ISd0ahh&{O3`d?v5F7yqBkaKK
zD`0jUhZ9mwp&(d5Vd*!zU(GsSOzrkLou+4i?hy+O-v^0Zra4~02%M*bxeo=tn=EH1
z(~CKUmBzfaT~>ejBgh?#zwSAu3vMRIO6q&1I@4Xw-stN`h6k#a`F#fFjL0_o)+WX!
zXVKC*UTU3RlE18aS&skn5Rslp(8%6db6vk^;cfBDxG{f3mW;~yX`zf<X^#^bn~d4Q
zg$o(q$TzIoKP4*jW21HKv`9PRh^l;%S^v$=)RA}4sb9TD+YYmpo?SwSNleLdb3k~Y
z`73r3n3vcE#PbbED=)M>L<_|nmHGLzM*p|)$F2Gk7!Oi^jEFc$@0ivsIRIlw$pI2l
zFrLvR6d)FK;kwC>^S*&g9z(l;C^%4c7w9YeV*@<E`HjQ!@Yr*=APfI)q3?lh4D$B?
z<2pm1Y!6bYmmF<#tW5l)<5rCh;@&p#YL<;r(WV^1oSNrt2GR~pX(^5g5pc`0RWXqA
zqF{M>b<As6T7c2x1)BAWyYry)QL<vqz93!O@x|Yrsqfp1bIcAoC=tFY<}P)A>cr7q
zEfi>muPvbh>U@A|M91rVM>abD7Abn$eCj}r>9Z$!u{@*oHjY`*G9F>HDb7`o&?-6E
z6aQ&B7S!!gQ9hea&Q8!K&+62YeeYBr|LAA7dx3rPXB87sH=M_%v+8TgIWat~5miy_
zk`)LIy41dZPfBlj;pQa+f%&I%Thg|Mj?W%z#XqcF#_oRpkG8ks>thl_{^&Xy;Q$G^
zrX_{ni@25L{nQbkI~|YP_8ax$+ficb|9+(IelESiVK@Ay(2<gDJ!UNOYSX6=Zvl+Z
z^@#>5*MJgGY)t5FAa{OmR|lBZ5Xii5U?iq115jmk*M}#MTDQ8rez;@x^kORrsKB<K
z2Da%sGT`%8%^nU?@3x;kM0XCukjvpBt1y0`4DS}rO0&#d3A7$Tag}i+!qHz&mexD2
z>RuSYaNV}P)X8tCN>{#8c*VGbG0pt)W*H}G6@&O6X^8BE@aZbVzCW0?m5hj>*(0)y
zSe#<GmVu%HW1{T)`fDRx)Z!5uIj;E8ndVZ7W%&2dRF(E8b-|jFqF1UV+VQ-YtXWw3
zv=l<Lsr#23GvA+6ioZyorYiLo5aWmm(5+1K<cR&*6Zo*+{ZY=B$`mU~qkK9_3Y{VJ
z*FGF4DUBvjbN~2;LF~`515ql!RTJ@FFgXdsDRL<o>#)dk_jcQLgYehr!H}%PzF_Cd
z!My(8%}$pQ5rT5Qv#cRmXg;`F_sR0!jrrZH4|rl;)X#e>3`t(|YJLm&5ZmL!-Z>z3
z8j&q==X}8)KKuW|`uh+#7!9aK==6iFfjOc}$PO5=08Q+I`Fhx|@cF)Z)1reb<$dS0
z$SBg1cAU7@)>&%R<Yp6c0kv%38Gx;Jgep=;-=a^r!lKyova0$UN<XE62Mjzec`6nh
zEc-_0H+aXc{*xaIBbWygAuKlf(THx$2X?sDznXkcY)2CD3!7|h2sA^R5CbtC?BckT
zJy2@y{IJwzA*dRraS4ND`w0dcZ_dk)6z+UEuSUzn=^kElQ<9wI#MHj&+hGvGkI-J+
z3)B@>xR!K^-$P7~Yl!Oou_ovEOx>eqAz=7l7PC&4r$q+!uf`w6J`}ifT!`q<^<7zz
z4waex_AJqbPUc%o>#P1aa}j9k+ag<DBRi7F=xhe*%2+xrubapGE+)etT!kr96jztl
zMzOoK8P56SJY$>LCE!M;(uvLav85{_Cn`qWhh@FQf0jCC8VqGNQvW|(57eA4fa#SW
zxl->5Ux5)j0I2*s<hb&(QzpFK-~>Aw_p$SJDcIjJeYn(XLP@)TlePYp4F)Vs+;C_=
zUcoyRUvW~l>I(8Goe`Yr?|_WxeX-Ym8~=w+!q=JgH2zO+$T9v+Xh=xK1<`D`p9x1H
zje^O$RW{G_?O(ou_~Nl9FMs<Lx_2seB>#Y@azg52G}!5eSzXPV0-x0Wfpc?X6fDFm
zR@49K<r#IkFyN?qTjVJ5dR$emXRr0`aDZqA=3JKFB1_tuxsFtJG!plWBRgw^JSlL>
zwo+XTr&4hy+F)ktW}aRJSH)eFPEdzq!S+!HRmm@jn{Kbho76dtrBH&SthUbWY;TPZ
zZG(HGG(WUIt{R_P3~39SPSB93EKIO{Zx(r;!~{z<H+Ib?Kl<h;O_0MFaOmXN(YikI
zm9|LpHsb4p$`+EQpvu%Uq-=wH=ln^&(agOImCIvq_kZMvg=qQ_5<z*K9|6}y)l;Fv
zLtPGgMBo}or(lK;b}erMqb6III|KIrRll5xveCmNn1=g8x*2G36=lO2mnWla_TV&V
zAMg9xoMSQDoll^v6v&O~o2d8XkUj5iP!qCG)Ky5C5&)9QJTJ+D2w5m-GJbUUF51<(
zF3E$`Gm*hFOchlK9#^LX0pK30DX86j6oGR%Tc5d<`5h-i;o01UZj#_(0k^WI-S@hx
zNR#<lmYjTQehry;Z+~6~qNt!vFK7;S&+9@R)49rz4>f}+mh$=6dX8p148&3x;y>=f
zocAy|mH2LquuOK3otMQi-qs0mh1s8G5e#a-`AF;ZZ=^-F`=G}}Z&NZpqTySX;IO|$
zqG_pWout@domBHrL|_%I%@Gm%?p|;-a~k?_=UGjv5rIhI;{A2(*XjpAM+j`Q=>9L~
z{(H}v6(B&0RAbwQ5}|gQ<ky+sQ$QA*eu+t%pttx@;YWyfI~4P1_f|<=NZfVj9G*N}
z3e3M@v`@GpvnwFJ0p({n6h~bEsop)<rAH8R=E~y#gqb`Hri<-ng70>YSApD-eO64n
zFT{=ExHa}knI+kDmI4wg&zk_oRV2nK-zgH16X8TCyd?|8NT%t;_>7MqH%nZKC40~8
zeFE0z;OO1Y%1bvz0+NFHFLnrRlHapBOkF9Z?w6jrdwl2+`bwJFuPyXPyZDHoKuM|6
z7JEHlFT)sr8oR&DQfBCRX6(#_1L|S&7RK>S=~$rvZhNKd(sKn+?8=p0!n0VL{#ja$
z-pw?Nlk&?V0s4!y+UV9xjPgw#TTJ^jXQBSjX_|?x*i|GgBVvQ`um7#jU+#RW$jtuw
zwSD_Z1f;Mf$@-NxeNKYy;(heW=yTW`r|tX9kN+2;M**3}mcHZtc{e*((Q?54?l{ws
z5{z@)+~UCrbeaWqn+CV;w)%l0)6&xV^-<Ckuq*n3vYQj~4W@-agU<D%Tp$#Vhx!JD
z{AQ;9vzuQ7Ap6Jbzr7*&cl%S}X|fp6b{Ow_A%d!b;w;Q7P2`LW6f_K^_|aP?vdaWX
z)On~}0|f<28Wudf;+j9>Fmy8F`Y=76Cb%9qi<f>&vKrllyOKNhdgwaGU<0RJzqFz7
z4N6vWw<OvjSd(M^X5H}G@~^<(JCc&2|3d1P3!flD$ICIjWF8CL5bFxR{>Uo8;1z0<
zto*t1Q9k+w!meqZarwsLwtM}G!rGPgHW^kO|ItQeaNo{LB5?Ha$H2;uv{*rFUaYT5
zsY#JHf3XdVD}-~a(x0G>H$ZP#Zs{6)?Z18?rnSVPA9w<~U8ZpWW5K(@DkWs;FRiIk
zvo><%7EejWySpoLwDf)NkDlBxSUx<bDY|3dg)eX69iKZd!LcEgKqwG(Jgye7CD1*n
zFmUX;N_0{8;y$K^OXF}SIaEHWfS@(-y2dV(rHPPrZ>HAi4y?v&zgXNj3GZ1;JIrM{
zYG#U1jUQ59d?6>&RvP<GY@hk@HAAi}+5<P`{uaKXUTDS6#(7@Si`~B|Ky=qM&@!K}
z;b3OeBe?&4OPV{;dcq^99d`s=1Hf5tP4VLP6I*a>Ov(u4?^Rwj|ARLVEc6=Kd7B-r
zu(emE8%1!;7v2%MeOoR=O2LwUO|e@u)aD%V^}Tv9xyGwt)}g;p`NmK$eCoaUg)ZW3
z(KR8Es2ooRpRRu1*1}6{P5nYlbj|Vf9Vjflj~|5xPv1X^ul$`>fnK6g;1CS-HavRN
z$v(_Su$Y5|)_1?lS1v&KU+}ivv9AIUp%*6T&%_Enz`JiWM8L_p5cXYnGQ7^$>-eqm
zfxh0H9N3A1l4uhrze3dId9YV8L$V<I1ds!^OMIKEXA|_c4+hneJG3A~M40gw%^s>a
z9Le;<>pvf+V@z1CzJq8I(NRn8yK2VGGtQ11j|H!CwLt~>HcNY-xtaB7yF8LH{-i@3
zcFM;lj}ua1F02=;xbl;PQ5WV$#qWZ-n<ZWT=eO52Pf`%haL-M9)9~n(r7#;+^23p6
z%BBBKf(oO{r5i-59G-ntwjq$w_k_AQ5F{;WL8oh3DHkh6v65_jyq4azh|66HypEwv
ztrVe7;dy6xRJQIca^%4)Gec9O-R3HMxL7x1ecrZErFHL8oEy4No|$%$FzAQxA`t?|
z@TS1SzfQp`WH2-l!a3g7b2IL%??hh!J^)I|0=*Nu)A@m9A(;O;_il9m9SAvb>G{*1
zCD8=Rp;MB_<B(RQmFPa1tTxQYIRtVpXZJ}}YO%Lr94U6JDHs^ZKe&%EL)(-<T0nL_
zs*w5wj2VljBoD{5{P~XODcuGRz8co4d~ux+kD%Ex#LX_v06jd1(bFXvcryw7jcPwl
zCqw-cu0R;ETwd79C=LworF05B=UMU|inUdX3>&<`*&ll-eqJ6(Zrb{&z;dHGl{Cq^
z?ht)rNvB3^xXp)K{$G^q^qy+lwQ71T+JP2_E8BefEduAGXWhxfoOe^@`82rC9yg$H
z^g-E;YOYiJH?{W7tqj+Sj^(Qoxl!?(uJDEsZ7=N)sZ~w#2EMEOAz+F}-~Cx=seK!i
zzUC`}7DR1O5X6J78aK8=pJoDsiE?*?RyX+?*r=jp$uVPmbSONpRG|K>?g-jfFO770
zj@`kK`2ujChZ0>b&y43vQ&K^eH<EpqV27Po=4eFet<VKa$d$h42_)A$^i19lSH*=z
zh}?}zNdfT#+0)uBe5@2&r0Tdeo*zYF-B42l>hmUiNu5VP^;dK7p9j<;b=|{U)KY{*
zjF^}|>m)+Z$n2}2li7Mt;KUsL2YzfdHeV^(M`)&%YVkhk5Nie%jI~-a<dP(MN3Q*s
z+_|-CIh%j}W!z38o53zGy|@a$vG89U)2XS+_Z&F#DWB?&->{kX8FE@lm@Y}~y|drs
zUm)ObwVH4hM`+I))!Mf1TaFCMwFVakbJ6-NK^!}2Z@xCGJ6jM}G@gI>iV^qHt|n)`
zEU0he(c(-W7%xKz>NrIC?;lPdR2APYQm^X%VoqD$ptj&^{bT6CRH2#)GG&w4ODLRu
z7EXcwn+J5Ljb9xtjhkbxQpG~v462$o-G%!&xxT<hclzQjGF)s%1L}+?P(lb|rgPs}
zt>h5H4f^Oa6F3gYV%cj$IQVH>nIL(P32ruS7Fi6?#}TOik|~_Ry2)c>sN)3kL!DJh
z;|ZIvpyfY8mIi#}Lt5ci;t!DTuVg<3ttbo0-|h#%-()QzS2>O|)ROixU$48$6*=R6
zc97;SlDMnj-Q+qfe^WV@S@qF6`VrqHox0kEbldk>HJ$p#L}w3j^U;(yPN{T~+nEjY
zZ<0*KcT3wzTzZN<Uc3wV+F7B~wP1Zy?h_n-WjI(5=X)L7_T*<<aEr%9+@h;VaCkfu
zAes69Qv0R**~b+JW!2)hVF*V?A&|<7NZ3b@9x$;Z<rs!%MN9<4`>nfJ8|&k^lD~i!
zvTVh#n)l65xMlY}2y+5in*`+3G=M8V@V93N?D(b?C#w-BaV~(d`}br}%1^wGpbXm1
z7an-aTTO5<6wPm@D=c}qbwj}QbGdO<9}ZSV05&e8QqK6whlhH2EZq2DG_v4?&C1Es
zTB%eW+?AyWe>Pev=A`#*gtNb@g06kw+v5NOe6LFWxI-E4&MqMS`hC#1;-TLKpWm+1
z)YlD<*<T#YmRR&X{!?Y9HRi~d#cS7DgbQUQ;N1!GOBJ4dTz-@ijKs04>1@o^ZcIar
z{rf+Xt~;LU@Bd$Wmc8W?k?fI?aS_=&d++)nL^jvRiU?)rN@OIn?7e5o-q+san)l+4
z-@EVcKM(%9*E#3)dd{~_g%dIy&I3;`5=tBK>-(QFetFii^3#x@T2$>oZS8PDC93p$
zZPZeCYnhS1+^bC*Pnn#kK2Jf?Ux+H;MZUMpfF6I7fPSO!@81v5AL-#1QFdELD3OyY
z+py~f#j`K48#$gO48IzNsXNRMld1gEFp&XEb0Ol{vX$P1j;2!70N}33Wa>t?3~1hV
zUpK_Q(+4`3xFdtnM&KIG9t1Bq0F%uUeCQL#Kv<X+6>?ZJKnm5EBL@kD|9N=#&m+gk
zuD|!qy(h%Dd;We^|4K}o4{FtjDU2uS6xO+Zp>&s-lnT^KusVt7EK0xR)MagPy-=0r
zVyiP;>Cj8jW<9z8%X9nr2|rWvJjrF2>YEkQ%y<odE05A_CORl&EKtrOau?9UGWb}2
z0A=R>-l>JW_!^dLN%*^gqDk$)08L3<oQK$2#eEK`e4)UpQHeF~?^y?|qzWIW15xD!
z?fH^9*8y}$6YiCdmvl>&;k!aT&6?l-F1;IWt(?-Oe`TT|wk`RfEna&78mx=x7$N~Y
z?pLVsBOq7@h<OeRYuJT;T`4<5(qLg7h~kdhF8FOiN6!y&*huBg$0*E$b{R`_wwc~7
zwvLvQ4EOi^3eY-ooHj-wU@7}|Tdxs7@~yh!y<2Qo3wPH|3gqhSQxx<@&sbw_@-C2J
z=8fY$;SK#+K)4#h&eDx^@JTpUqh47f>Ku6QO`Y~M?rsoEHoX;Ib!qVr(D~2mv8@cy
z)PrAz&y$t2p=j=tK;)hX8VQj{O5065pQ8=B9Zhh*|MwNz_@F&=>hsz97-LM{lE%Jv
z$VZ2v;I&?Pdi;Xf=U&WpG|UqPQt1LBZG%~}p6c)R>gdV6gFfq<e6G_I<zeWndtRk0
zE#*JWT^@QNpD9pHnQcyC?<rlYwPGBiC4D2bQ0hC<<S#I!SKV99Mx0)kJigiRUy_^g
z!vZqpuhU%DN5e9o#3#f2rsXPogaoDaj^4=?_U#1a`uNtd0w>@2-^*>MP<0A70oVP1
z8fcT}-)En1e*+xffk@bqI}Q!OZVte&6&Y_P$;?}KAQWiQE0X*yBAmTUkFQh#SLaoy
zPrVUH2@33zcmPD=?J=g73y8xbp;X<_B_vfB?0rxux<T^h%V&Psf|Ad$o6L^!^l_3I
zBma_+fANUkblu%Ff|%1Va}8^Ab8GXW^^?oXH%yZ6=kBh?I(X0BrLL?4@{`svf`aCM
zk04Xh6geZs)mdXvD)x-@c!Bk*U(a5~B16X`@vfzDpJh(3=-}ri(1*k%e(5U7+?s}o
z0%mGsIW7y=o-C*F32BekzoYYeg*%Eo&BmXz2}Ny`O92i(YVgr=ND<y`p&~Q!YrLg$
zAeB0&?>V3?<)M=j<A_>q7%*!1e)Dnpe#v&^p;B3gu*1N}&)K(X4TdX&veG626s{0F
ziof*I^?$<5r$PWbVPHeR$={pDDM2onmpuk+vGyN7-y9RWnlTqz+xb2`%GwXUAoWMv
zQQmH!;`-mTCF@*?A%WZ;|1PM-H?4*UT+9sngUdUBpT>Uxr{;Zd)yU_G9$;5~6LJ+V
ztAZ54jsxbDi)*))TVuEJaKc+motwZwvsNV5UIWmn85aOM_%{ElomA|p4-`=u6Y1&V
zLUh_deAON!z6N+dYp(4JdT5rPiZ1lQQxtC=`WSdUB%(&^6B`#@y*=1d1laafMSVRY
zU6$EsKS_<HyAec$z*Ck<nrLiU;jPNKmiO1pHt4}Cl1@kZx46bXHGE3uKi5r&iF-rp
zu})p-AqAy*ar>HrId-=TBzEX!AgmWFGF#b>w&Jdn6|P5%DC@^`{ZW+`3c7Ent6`IV
zJ<R>I&vO0o+Gvh%)!8|WCrih`I?7#eS1UZcpE9FDo5uY!{b@j%&?7UrBPsn-T4&n*
zoYy*e<+GyqY?B2Nu)AK8QhSNt?A||UfIB+M<f2!awOw?|hI`&NSN;+;+|Op(I+wXF
z*R<&0x*5y!*sR>4kPvj$+pLw}rp;jGUFJ18Q5d%n1CL1mC-xMdFg(8<W(;pzb$$ti
z3Smb5-%FZ`C#U}|TmcdRH+=d2Ew0d#b*Mt}zq{TYo#~|9kbvW<w_Advpx0+3`ttwA
zHZep+(Y56zpy@-)op&8FfbwO@aH)2&_JC_?_wU!B1EKo$*Bg;$DM2V{3S8@tOpmGo
zFMGaVeWb-cT|r4Bwak}k_4VBX8CGhuKtg#S%tqn`Q!$^?AJgAyBuW>Aq{9&&O^`rD
zK?_jF*-DjYaX}0~kh2EAi$k@Q%S5#kX+nr)_E?lQ<UbR+bRHh1U`OrbLs4T8R>nnw
zOGmGSMr$N*yw+K;!CTAe*>bqc{rO?dq9NCV_jZV6p4RSn-AD8X$24i8LO%t~-*^1_
zT?&u!@qUkd1HGK<4x4lfFtf<~dexY=Pm=m~u0c@qeQlG@rLMNZgZK=WQ1<BzA_=`|
z=4&Fed1E=RhZ5|sYFEeu@0bx0B#4okW=~U`PUnlNYgHN{caOMlU$i%5Z%j72oRo=C
zB`m;`$k8<U%1I;%S4RP~TT@v&UDG$ZP3=IegV9tg9omcobPpFmj~(#`WQQZ{FD(MB
zkg7oB5(yB+eN%OvdP|2>xML=4joPmYKilX)v|rq}LI-s~X^&|%Lh{dgk#ig)w-|mh
zocHYxQnZxF0V<3{J5dPl_xV8i7a%BKDlB{d6GrUTp6!>Dr{vho=1FcW`~-M>FI3s7
zKVqUe(dQOl5XAOKu3m!9^{Cy#KT`RMVL8<x<y<m-&<kG87qZtLlYXiN1BJ0_)KCPm
zl6NP9J(0`$OQ1v2v7z`)S;aXO<mNXr>`J2Js;39NJrV<AUe^Jt?U%N!d9-?WY@0oY
zt&IdcW;Ltgqnw3!f*v|+Vrlr}t-ptt<g737%vYU<bfq`mv`LGSItoLL`^aSt^Go~c
zcWe5w4UrmM3#<b<G#{z?AC~JeEvB&l?UoO|eC(@uG9v90Ycaa<&EfL!QSPud^YFBZ
zS>LK`*HsYFkC%&tpI%q)*X3;dH_9`~v$!@R8BT9oGJ3}?8@6O~y_ArV_usU&ss-O_
z151!c+)}v9z1NPRt-nlZm%UJ4M<xMLxZ-<uzm`v-avP8h$p(Kuln5{geSV8;%9rcM
zr5#J7VYio=`PWMj)S7<i;SN-wukMVu@r`1;2)fS+TYUn1wgw>?{g%p<3`nT+`$GJ0
z^;`Xt8s403Y9bcBF)0HWG0xR-FfjN;E(45HhkgRHK;A(&R9{SXzpUhAg|>ka;h_56
zfdoztP<S<;k^y03^+VY`GGG-4Rva_NP_29rI~`~|T^iI1GME*7NS6_X-x=ecOhe6R
z%+54;*{R)mPvK_!sn@j`b~xqVaG9c=p&_zPhvCs&BJA1fvr<<{tT^A=NtmN|)rV^b
z`sK2|8<h-Z>E(TS!DhKr;R6r&_}`Dmsr!&jFY_n=-RXE}_Y&>SYgYa18;9<V&G$CR
z{!cL_f3@UrS;jkWh_8Z$r@p$}Iw0q7hC(OLJ^rg0wytinLKqN5?*jM`OZv}YdUv~_
zW!EQ1yK`RM9s&|C8TCDEnm+||e7xB|Q@uq0mvW7X#WlSr2Rl<~UVML#dz1mxsoaaf
zmG{eaLT%Nb=>tJEa%8v@MhtLlkSqY7ese}Qj{O@Fe%T!(QniXlfi)m3a`#6LFhoh$
zw;PGQ>bvcvx}{$g5uiksLh8|91^0V3t>dq#%&Q-maUg-vdRoeWlfVJFN6x#zN1w-j
zIhC%VNwzv~5>G&ImJz{;cB`VfOBsL5IW@o;4#YrEu)p=w(gFpb5Dp%6=2~xB1k5T`
z*AxYVQEk1)*g3rq^#sljl*lO`ej*KhzewDU^n_wWTG#HtFTZNnZN=S3lhre3PpjIt
z*7a6dq6b)ozE0e4d;GWN!LY;_sfEg9xm)O~XwV~Kg1_l8x1as+goI|_)`)TnneQv=
zOAx7M(3CS*5^wN)ahEV+%x`{m9roYVJH{#Q;J9l=gbcN=1ZdJzXS|>}`E25MmuXH8
z+mp4ias%3z4eAR8$EUU}cMqBqEUWd$vS&1YuIP?_;!WBrx6wYKLRWD$Dxe#zfX1^E
zcJ$8DuSE$@17zU1)5~R`emV2`xB%H#wC=a|ACWl5<HbJQlhwoD1JJW`0g;Z2vVicb
zf2c&~Bfc(h)m+-i)}E;*YFyo+B)!J56Jic9d>x8Z{WYb8N*0P16wt`HWbj3A0-YfW
z@@D^I+>Q_hPUcv|0>}M=Ki2Fg_9<Scd?*!qHeEN2LP`HoaRK*V&p~pWhKwoC3$~4@
z-bG-%1xnWj)b1kx7y;fcM&TF27U{C;JjQ4TZ6))=?N+r20vJy~DN1fS=0IMs$ah7*
z&kX&es)`DB(<>6TdJerv2|wH(6M!Wn(%C*SI(hl+)ZW_f3v}oo`}C1{qCY1V<Ctos
zj!dQ>uVh$$l=#dgGkhu7^L+M%fu^Vt`8)l-Nz?Spb;$O;YtE|t)awV@xBHbz4!UTy
zu=WqgoaeX1&c^z`<O<t&IOI;HFtRo&hZ0r}{il4j9&epx7JxkO(-j7prOl;_g-Ow0
zdvqS?F-@i7+m|gneKKZVWv=P{C#FlfHYR#k<8E`B)5ha@jIH_m7HTH8vLm8|`8?-d
zA-)}}P?qRqTye@)`vzndun_3Kx0J)Cm$v~~;IDT!aG{umlKdbbnwS)rV-2)SLm#2b
z+yDmEj5QkFCy;zYp#ojPPdvb4Zg*DSchS`$fVX~&89@~)5rz5kh93<G8M`6$q(Fko
zk{$JxlVZ&YJW(;|Z$f_X@Q|Ut!egTe<|pmyX_0s|^rAdWj0Gq`qRXC^?4`R1z?;=r
zKsY!+2w+)5F+>8EC(+{e{DKj(t9DN~L~(jn_-8bcYb!Aaw8`Z3i}+`zQ75-pJ5=h^
zI6Z7Bud$9RkM^JYdND;)Nip1--kMv!g-usC<y=0`Q}xsi+z;GiC0_ovGG%F~)-69{
zID6?JJYIgqc3I!A10+%8K5IET5xt||E`1!^^hq*5%_38D^GQsIbB)H}R1P1yZBg`-
zbzw?6W9^Jg(e-dr{L7Yb+q&Y2z+Vfzg_G-E0^Z+PwNuT;tZclv17b*6p!AZp9MKea
zq-UglA9<kp{#!dMlq$1uovz?+2H0!Bg1X~1!OTsDe1_@>#yF04cB#4qDR{kUyN7$z
z_M_e<7Rap$0D6LjH`V>`VtorBhnH>}Ca*73kb~V(kObo&oam>BOm(y}9!MW_%|wV}
zw$g6KaIXT^W2ev`Kv*qhA$NW>W(`mMCX*4^V%CtWKdsdD6}A8yKt3Z+GDT-1d72^N
zsv@@EAEfx0v&h1{(fW_23LdqBZdASGD*_Fu4-{&E4}2l<i=axoj^x_9YJ>g&1CYJ8
z(i0y}@>_=*8?B=?FMfXfb+3=t8D6(oVCBdF--Oi7S4z|ViZ6PAz4ZD%|9inh&79oH
zu#U~M(3(!7%Y(T=rmvSJstoC3)S{SUPg|R4l_<v5C7Je6^EsX2VytuWx5aEno%nCe
zw5fr2*X2g@9Gd@o7Om6f`K`=TGsm@TIONvT17hPim#sb;mc@>nnG@zn&(UOq{@RWg
zl!)r4e172W(VLapMPm3wp>)8n`FAroZ@9rJ|3`4;jd=ScCGK}OZV$W6ik&S<@Iz-7
zm+qH^cYHdv>oLz{QJ!bU6?+3Q!ES3%1>8T4KhY1ag=`cR)#Ofo2wWueNB|tlPliUZ
zy*<E%jt0=#em}LLMLYZ$NS(C+4nbwy7H;8}yBPR=7Zx=LN!j=rZ4X%u^d!X{T2M6L
zCQI~t_D>ux2P6VF6|d%;04bLSE)tT0mTJW(#I@Vohcw#(0Sv*GAByln{DO~ye%p7E
zSq?VN4R%Iol2JqBh&*^d0f2f$u$igpQX~P1YXv1Kg<}#00J^O|?xVM-im-Thhgtz-
znGbrgbK8#F!43bT!?<gxrd)Tv#digkB&41Wj%;a6E4)^)eNk*hX)I^4UGOS8L4E)<
zSGS{4u20t}b^aw=@c65%q>iFts!IlsM1A}<|7aTTP&1FFTIlPwi^-&dP;uFBF<(-J
zW=7LF0zNm`m<2sr%X78;W`3wfnaaSi|Mz<~YxX0NR5I7L94eP8&R;}nd(Mj=I-0OE
z?6X_$BL!$%&!y_~Vprk-TI^0x)<KH`+KwS4Kn{B%h;9%9ViMry1)yrchJV%lFJOi#
zi02jH^ftxutsYZ3*qLK?TE`(|d#CvmWagw-1BTd12~yl?RtFD|y9UXS1>p7MTeb--
zju|0hjv;%6li8<2{KUuBT!Q)D3^m%+h}a-bUMla4fr2fw%ms!z)*O3N)oK>(X}aw!
zKRW7w1U0B2Xm4~+cf6B!bv58d6BhQCnUo~Oaj56n<Fy7~{%1{EBU*UZmif!$J_hw~
zt0HPf??cKAMBMGvtU)PitcdVKpc^QEJZ#$x*A^r%{9$56S^4#5Z5I5!>cC_3x>Wvh
z%{1dTA?pg6dQ)<=pN*D`4<5~bFlxcr;aC2CE#<u>Rdx~Lj38X9&8r%bGpjSQZl_fH
z{Uf-`sW74J@cVdJQo!g7nLA0s#vHEC2E<)#GyNGT&nz70p6h!kR4Q@x>rpjv=t}%L
zn=|-&uhO?%s)@<0XJ@+Z<)iWQOvX;L@K2{;t`8dq)hc0v(^E1K>&peQtW%Y4Ar4~)
zbSU=t^}tUQEB>Qll|&{|VFy47veBUhEF7HJMA<jFC_8Zgn0kqVO7C@4xR$Kq#(oLF
z`cK2bw+Oj#ful=x)8sQ4_qBE=l-=cy;-?FrIWb}oKIXw1M6m^cg!5u>Ks<KW5#(&J
z8ogoe$$lOeP>m+w_29Bc*<X{u#=3E&b<bA}nXn-XqW`8^-EU&T1+}!jIF67*u}td^
zuF5n@4u-z*WT070wRgXKW2~AluM<AP9~4OVR{ShWDG*=KK^s(0P&A3h?nSoBj7OnI
z8t*Q@&~teN2-)Pr5r_s{-@L~)mu7?bhH{SiOQXXvW`mxv-{QeJ5f=Afr2eiKQsQZe
z+OQaD&JC)4)@EF-g;bk;VO2|3Y3SzifM%ST_aJ<>{lAQ=J=0v`wtC7bC)ID@;8C?@
zmS>pcpz_omI=bS&mG|t%vTnAK6%G@gvl+Gg(tW1je(`xBiJ$~jKacUulj&p@C<ny^
zB?mt-blB!steJ#oaNdG%c{|?%v)N`#Ia;b8SAMkEi#yc(SPxuRa9NZHbcyU=hzO0g
zsugfv8ulw|0tJ8^r$XGNVd^3~U^E(zIk&t1rH}5Cg9@k}s}F~uxALLMhx$X^<H9)p
zU0Bi?l>k;AQO|*gqo!oL#msm16W0PMfWSKszKjc9EvXR#bvSs_nd0OLm-pe5KLEV{
z&!iBaDZYIN4I`&*5{j9Y=Y>Fxw{l2AX>p^<_XMJ31w2$19r~Vppr}ZpcffjSSz~PT
zrfqqt!<Mg!%ajRiq*<E#413}pIP{PyKk%v!hZv}`yX}XJfo^=7GT(~O<)RnOTJ4S|
z2EU8YRiDkI^XBDHf0D01ay<3EUP*Np)p`BxUI$CRGPRL@lfU|7Wz+f63u)xsLGY1R
zkk3@}&8U1cI<fZgOJ|ujNAuP_Hr%I^{c^7Z-Q_nOgmpcXg@e1gi?6!coIS-ow%=o~
za;`JaKXz0d1yuaaQmT!KB^NDE0`GWll?zz@f-ld|jL2f{xQ{HO`V+@bnuJAe9e1)A
za{B)o!~a@^e)*Qfd)l)4qU@jb3)9~ajgx(YW-|Cks*2dpccEYhjZe`f_uk^ZyJsq*
zN8O;9ILK8hEX=O<rtTL68wo4wEjimWL9>|!mXohR^WQ=hFaNV&0|x=U@SDb+9i^lo
zFZRGqE(@$v5%lny#Df|eJq;lT9vtxd4g8*5x^Bqj)TFC1Oh{q*^?D;H5A_oih`$QB
zX{-7LLh$8*fI6|op>#gCKOXCfg8rvP5g!8lHwRPa`x2S1{UTa3VbMerfhzxlPe3ee
zob69NK);{zn{%={P=N@^HiJ9<Z673X@57dGzk11F0%zWMLhjUHGrF-EbA$Vx4+2a|
z%5s05h!c?f=h>x99{Yq?iOARS?7uq=oGY}|KtZ<iNZ6~R%c5s;Lcz{(hS<i#l203x
zQCvGi9XgoxIFC41C+@{e@X}V!A|oX&#w_Az|I>Q6{;E&(70**5?L_MXtx527Fb43@
z;1f5FI!Wp2(BfdlL6Ta<LMdC984*per*(P-YW;=*{TyM~4e!-l2uJB}=K5yu&LH5=
z0#8sU0hHLCC-KLmIInowiPH|fMHXDy-`T}3Xg>FuWGVon{KFgEY*B-95ahfahMps#
zGdv%+nPGhQ!fgrTBMI*`g`&5QD(1FxlB|J;1@Oi`a}Qsjl_TteRhux5N397G)A#yY
zDsEGCq$9w{n;t|4Hnaha^lSMCk@zi=mEiGsMKmImB*?r#hx$?Vv=mGyR^8`6Gl$}0
z?gEy|KV->Il&1iR<eykaJ9^2Yv%rrAy!qK$>#ZhQ3#Rf;DWET(Os=c#H)JK7VW>IW
z2|>qg2O5rT8xasev*NcpM`<P~k-oZ5_xWeVv$5dd{9lJn_FULBeE$27&YCY4i=RAV
z<da}<Vi~jD<w>Yu>Ugkrj?eg-ZYy3$DostmiK{F3=15b5MQid{+m+uw`7#7AQ6xww
zplxtN>)iihI6$IWFX*EDhCM;i>`JsR$$5KZ&u4r?X)AY7)^%V+dOJyCG&>YxqV6>4
zCt}S4h7a;*%fW672NvpIvbo5wC5^sqA?L8Opw(Q@VpkJcd$ozoFuZWB{D`UIxv9Q)
z3Qs>CI9dn#Ma(>pdC<uBSRa>$D4dbf$Ju1uA{ckVi^;t7;6o$B8}V%sRFKm|2zp=~
z+f#D-@aS|rzj#lv|1+&AwEiNR>Yh<GBs>;_O$E0BHO@cLPf-I5^1{QN`{Q`HRTk}+
zRnYP{<n6!<+-h;^gw&!Snwv4xOUPB>^5@XMdH5B#k36Um>|>y$$U)8jnUNaGEv)q9
z0VnqJlQ)n0i6clrfatvrc(AszK+0UA6cn)Z6%dz*BR;hw!QjpsYYG`<;yJAzai?z{
zEoUnpLNK2(B4qE5_j>Q5zC3yvcMNTW!r8(<47#|#Ew0vE68ln{q}onHNho3TZZ#oP
zsJj@&Zx|Oka4UXZBc~@lF8L)`FXrb+xCU>f%lvuNhLLc2AvlP1s_x^geox_GCUeKT
zE?$LaS8OzovwEHdX^oaD<d5Wv{~l`ggOvzsP3`7Jnc>yjhyPra)=B@&)O2)F$-)<P
zE_>fy`GW3}&QnyM?Q*ke`C$<=#0)uE7NYRGHpM@mdRk^O>ND%Ogq{Z-I;Cn9`EksP
z5$BQ@4<IKLxZF!bomD3^q}OU%9exUJS%;*uj4$>=-#p)x>c$;N|Hk}w!}dWj0cNIp
zRY@BRLc34josCC9))ZL^<-HW{%!amttuUwgHrRhW^9+<Knlbr08X<y2bI;k~%VLQD
zzw}D2MvbS(cbD&w@aS1rRF{{;$?^}J!m{4$DP#SB50;B~=1&uZ@ElHG8-O^0AMR>_
z<ek(&OP7-~kq`6Yh<y782xEyP6{!**C6oI(q>fqPzsT!pW#Gi>sS`c9OZzaHT-A1<
z!-5}i1jp{cQ3}v~5!_ClO-Q>jU002}uC!($kSn`w5;J=8u5^-+MgsZ00yGz(I;|$z
zr|~$;@UOv$%^z`t9|g)h3+!Z}WmB~NLzJ5L;YF6VJ*q!BJ6t?U7K)mydNH-%zL<uz
zo6LyoN6Cs`B5ZtS>1?VI1+isKy%54^>s5gxt2MnbJc)#(#`+sw;=6n|6V4nj@$|CX
z>W{aPUR@4+_oo@8Vz?%xGi2d_0`Kxp!mB&D4Mq?vK`T(@{K(S{?e&|a`3>6c@i7pA
z6L1$hf`yY|Glf9vFXdJtg|{$;+abH?7U;rc3VqeY7+@%V_=6D2`U88K1zGB`gq@9D
z2X>fNLmq;D!yw?rCqC<{9#n7cKbz^Hg6Nc{E4cnMr_Q}v{W5<B4#DqX6(DFnfa(Y2
z04UewJzi7%YW0Xv5(hA=&LiJDW=xE&yFgnbA#mFX7L7E5P<J9Bg5M0uJ}iukPx9ma
zrQ@`z!v|g4U+Syi(jvG7K!^swKQxbQKjI5V+!<)URqscEqE;U>KM1cBWF0(l<;fOx
zPo4kpQmo8mpnM)~BO%Nt0hUZ17yobOo!E4Sqt**CUr85-{ASCng#B8fyQY@S6%3AR
z0ixilsDW+n@~mbW%F!>8weBv78()btpH49<vU)zExMR5ZI@5v8{j!-~!Y|lPRA{#j
zQ~Ac`PHdfpRM#|$cN+O?v~uvdSZJYcBA?T>pR2gpcHS;-rVlEy^@<I*=v(A`fa(Z4
z?RZS`iUj9nP8E@iBgY;k;f`dD!e220ztO+Hkb{CyO4}*kb{pg1ZZM2GjCSCrYk{f@
zDohAnFPLbZ1z)3H?9*?-N7}H1=}{Q|?~vq{{;QcH5F)gXajx@31px;7PR0}lL$Eb)
zFxC7KWZVSkV2t|r@dr~z<R8nNs_;nj)VR|!ioOs#Wj)q0U+uP&=i+>!NOLNA9M)YJ
z|FHDsTH?Y)0ijao$Hq8vibs;=dpgstOccnyj}d{EN`X@PHF(v5=bHGTj-5}6vp|yh
zJHuVJu}ak20MG@$fzg0h*{T%Je_w|lZIS*wI+*)=+LS(;35HS-CuT=A1};9)ibanm
z1T@{NT6}qSH;-?*PI<%!&v51ec?C+Wa*6K|&#>!87AGogZ?j#~RpW9zq|Z6&ZPLN(
z=DWud(MJlJvF`^(1^ub>BDf@&UyGC;=T@k`|L(pr)5<o>#iDF@G$)d|Rw~0ltQv}z
z^4iS}{pu)a98^VO6&7+5hkiHEvhsAd57#{`?fS|}xXpD?D`ZPOKSVrbb;bGn5+nS{
z-wt{8^Q_s%EyiS*Y?95*cKb|(XXIIQJu%h|_25H8B+hvakQ-s<+EM!d6#~m9zyW{q
zr^SZ+y*ukGhk~`7uwoX;QQ(wIIgDLI14TD(a`j%HVYttUqB^pK{t|MUe0ne8A#PR7
zXPgGBnTHQr19xG)f$5qWD`MTa8odP`Km+SdFk=K*7w9+LV+z`kZrjt1zXB`oO#a&Z
zphEU12A6+q`oEivf2a|3Cfg#^#RhtX<u6U>iHZ}#t6r*;R(iJtc(}IDuLTA35XA8{
zc)~W#JYUi3mT!+h%IEJU@#eTWEw9&zKwh9gQcfw57u6~T_u2}y@OCD%Kb>$+psVfZ
z7TZtNzBY$DR(Azp<o_fl0%2sIj8X=U$qJvPJ^BXEu=<);_WbdGVVXZ9?Y{OelYAfj
zYhC`LIq{phA+%Da-mZ~5*3Lp@g3bNSa9A-vN=>@8tfyTn%s0V3QK$S1YN=`>PCT!i
z!!V%lvCJ}+_|{fnh>gXi&im{zhqVk*gUDag^@q6_U_41^a8zVuvNx+wI8?}s9m{S$
z4E-`;nLqoQ(p5E0cf8sc$S$;M9u5XlZ8<0U{14w5Hx?Nt@5Hj?G1?9*Zq^80f8UXG
zbaz#F;B?FyMh8EzU4@uwQgrO}qwMMv*pE@Ln|5OaWW%d`&Hoho=F+iszU71&$Y-=b
z*$uC-V5)#ijsdM6(J>+-e>&=h%jNqGgC2|9l6Ao*rb{{+k#^13=ON~3AKhC_n7Cq}
zeCs$k3My(4<bORt0|kuL(IXIi8@jj9kFr2gkz`=aG`L^1@iy*zG$JKV5G8GmY3lIT
z2-)AOPXi#FW^I0<=RtZ5D)cA!z;y9%=RoEc-Y~)t^2GbV<D`k&5fC6;&xr75BOtVG
z4-I0JtP~b(6~7yy{VpOj&mLu!W<@L`vcmTuB8=%1fkSHYy9n>%ldwg7bc{6nZ6!Ec
z=qNm1t~85Nx$~L-Guj|)_kI;RbKX})+4P#DL>qO%8%F3zg@An<78GT|=FJ?d$%^cl
z<lyMLF0T6{n>PvY=Mtn08}|<~Fyoi=r?p20uC+YT!t?NVhGL^{^lZhqT5+byamGOc
z#6_o-j-1B79q)MXj4ZWCef@YKsq=Er%yGCf#n+kYWW=v<r(G=n(fc5QZ`FMfIkNY5
znngVMBL_a#P!{AUpRx4@?GrJ3Mn09;QJuK4yl)*cMw;H=R!_aoU>po+E)rye_)y@T
z_;hmMI+Y{BjV%49%LVS&frz?r`eUH%sA7@hA8%tf=DYdOlL#HR+VOfX1_`-sf+Bi{
zhi6Uk!8_?85E7(Q11^#Qf2cbp1cr|uGfqSZZ*gPgL-4%|I)LZKr3+?*U29?uG$F-C
z`(4Mq#$(_mQu5H|I7jY&P3cGacXXJ6BbxCGN?J1hy>iR?)2(T?cnHEyh7}MgC9`Bf
z4dpuiDsjjSATWH&r<Z5&4n%s!By2s14>b?uo{kY6e-{yUdsV{}p{<O+Mz-o~4Z0%M
zaB;F3qKGI9L<4im3dwyH+&&C51ePaPi?9mUpu=|OY5rFsd0N@!?QJWP^`%B!e3-6p
zr`m+KA7t=0my+%)OzrGp7U`Xib)V8&8Ru<HQBxMh*xa1Aa5mn_ZnLbq8rowKc&4=N
zric@_gYd?#FI=sMJJGI@h&bZi<9C$lhQMwj@J&k$>!P#S@H{Hs@kUcZycXN5Dn%4e
zM(lmr9}_5N>5}JK$`bXw>d>ijf`K=+BgA1?yusr}nV6b3XvBOwID7saP1E|wHtd$Q
zZ_u*kqlwXs?#k#G8n!Pfk?NAn(D*2Fi(?JSsD)bgT*tjXU~+U}Z)*VpG`pb7*NX$t
zyYwsszx~nr76HWnxM}b;%{ns(NsHU*T8CaGxaD78BCD)XuYaPRqDp=)`yPUVXm&-~
zDbSqn>P#RM*Xu}YMSU|ZAT<*ZGzmeN>l|g-z3zPfXNuY2$B%@?Tmq{x`yo{*&_ATm
zf^N8v8Bz5AH?GM2Lyl%Si~EM>;h`!Z+7QB49OeXe>L1}pN&XSfkjB0Zulhkme-Lm6
zRU$k2h>AdwbF>jipulgx$dHUWCfq~1z|Ro`X3MI_SJjXHNz`TGqIC8!zJ#&$=r;U}
zq*y(oFu%QzIcNfZD(hWLbg2uT;WZ(Fbn{Zvizb0=N{8)tdAwP&5-i__+-H?v`lM=#
z$=Bwf{)izT1q$~JCb4MN=6zy#>iY!f*vroNRCMuNTp)qwE0L39mi9Db6Y*CsW~Gws
zX8vz#;?ILLWJ&}!3g40l{Q6+#TRJMo$^crEt_Qtv&7wbCuq@trxeOKejz8fcu}%J-
z|DQ{ev(=_18K=B0e}rYYaCU%5e`>b78B$D_^}*rusJFfhlR^XWi@(nswlnO^&|YIb
zi(LT5^z-Bez?;V6$Uuc!%)CUm&rnc;gLpS05R1$cY&xdxJ;HA6i(DtJ5F`_<j{tXJ
zgnxuyChX(~tp}Kz7Op}_m9vGg^8}#cGz1uz=CVS3;}i)m+402+RbV-0Ztd4#JzL-t
zU=$3yoI8O6j5gI81W^~x{#6it>nC6T{sPvVNdlMK+1$ODnzYew0k$*S=#Qi^`ixi1
zO_Eq<JA@%GAmFv<>if$_O09871M=Ma*Ij)O&C5XXpSiuwpNhg!N@deDS`bAo1%)`#
z15j}z$frwBo%p_^fMk_0KYn1$`|4_5=64bF6rHDJcon1?ublsKuNuQpR$$Wc@SPC&
z749bTYaAR+(Bcn4%+$=bKFj1(&VHvAD<XC;J6?A9t@Jtfbi`TlI<v$VkBmzeR+~(b
zHyYZV=JzD<YAaI>`M*AksnPL1jr@?7`XMQ<emF>sxH5uX$mZj|xDLh5ZfKu%$dHok
z7;_L`_A`AV!m%Q6h1U^0zWp1ULz|~DNMlpR36+GBnQ%R`{`A&e*RtnijPJP5@AUr>
zjX{s)v8bOf&mT1<k?dXbcaHpWcGc7Iy&cOV?h}uuwUtx9zTHLZh1!WVoLT$t-!4k!
zRIaHyvjp=O)I9!>H*zl6{a~k-?(@gCpgX5_ZU0;VxL@5+!%u+dS`Mp?)~D)4WqYXx
zfN?%~@I*J<Dp>w}-VW!tI$~}fm+Gn!${v0mrnr&<<L-u>AoZglRy&_mBH$!luu?)y
zB0w4H6W&lj{eZkw6uVYzo!fZ-RacJ+%B>6RLQOz{R}*CJt!4UzU*N2G^^;)=N_g6D
zx2&Wk_1~52o|#u4oTQ$$Djo6<$mQ`M>trwuvyQ%RPpl6zUyi;H8h14y?4$#6;QirS
zr8>p4|Dr!k%-4p8up%bY5q0s3*d(KFAX&9|@<fz`R0sO8p0qj>1W6%}q~lw`_Q!SH
z!mm@qLQ=vZ{=8F^#T?P%PgH}z9Gd;643&W$yLhW<{hwu}l>hiHmj`hVSqOz-_E_J@
zO+TyYBn{--&#iaPmY1wm&Yr(NHZ{XY8f*N#tlaBo-{{@h)RxR9iDeQq>!$;(mm)**
z4FjcJ5k7j?7)?==GzUg|7jvTM?NR16<)6pFcK8yhuA}8m_c`Q7vVtzfp|GnzQuHKr
zzPj6=oXRWYc`6L`k{eL5xjGAu4}Ek^ch2QkUkVZ7Un;0U7rO5xC^v?ns%~DBpgLF^
zKs>X{!9PnkphF$Yh>m@NWe%*_Ex@{;Fr!OaRB>Awy|b*i=)<tBg<n;;-`}egZ>N06
z+mVyctuOhTZUItJ<O&sX7m<0UH3BHil(F8euu;0eA@;+R5kd}z5X^>z+0`}G+yRxj
zcQ5#ictmmk4G^Qrk=6pG$8sU+Mj*7XoKO31?epi%z-e_)y*b?9yVM9${@Pf1U~<O_
zZxx?`ap0kS+;{DKkeD#&{<nMpPa)ot^W9FO&;lU=zE>uYK#Ar9J?J!N;hhz}H<g;w
zpQZKiSQMyL0Z@~hXH9<EAV9ptBUFo^q<(=<d}fMwxVMhj{TKQdidZ|rLZG<R9xrUy
z))ont(vT~(Z-N$<=E$4pv*+9Y8@6|$!J(ch2SY9m)#}Fj?thIR|MOqTk}>b$kDJF5
zDnbi-+?hL%Q_`4)v=1q`?HBmq_GeAD1XXW6E3*FUpEVg5;x-ctuYADHQ?J&}r1J|<
z0f&i{FEosg1Wup#CNrEZn?GE7Y;?Jt$HA+$q4dg<)3|R$W)Sl&-FGq+J`-r>B+h^F
zc(od7Vn*>aFIe?8C6AybDQCHjhs!x|z=}J)`B3APEn-=_&w+S}mvZYJM(iHEx~sTS
z58?k!LnQwOA20)G!@q8p95?G_i@DhPE~0q3B++pyw=)jC78g7yOIB_?=f2au3?7$h
zhPLWouY;{{G+QtNOlmxqstbZh&V$+~;?{cN6ezINH!=8}ws>qLHx!{lWFt_2<c?VN
zEv9eIgVFcj;A#xV|1)`z+G@QTW_g5SVbtWY<xG#2Z0~hUtN;|&X61lerTZoY6luO$
zb0@;P|IveBnzN@gO81pG8P6UZE_R>L^H84>t)@Mu%Q1aOU>$Ym9q8dd0)`r@$WQQr
zsvCmetE4PiQ4tCV>|v-yFzLH@&>K(q>5iT}8qskMKA!hQ{g8QPaQ-@BVjI?-`kPwC
z)x1?uZhN#>=FYZxii<yedQmpfkmS#W1gQ^%`i>TZ(xol;Z8IV_HU)bMS?;{D5(~fY
z^4)m)uFJPSB{#mrUd&%n=6B;8rLuaAGCZ8hQ*3AYTmmapLTtqq^9MeShW<$^;&Y|6
zxoop^S{IVYY%!5IG#+hst!`tETjI13(5qja4^~qY*~wstFJ)}*UkTw_MM&`1$x<+z
z=m^X^RPG3;7+({&N4gikAC+{A?9-I}!b<X3?wA&v;+Qo8-|#=Rdjn|W2XMStzPU+j
zz$E00z`gnloo;H0$C2(gyx!|WcA(GrU^f#?*f^1ORbKP!7(|LY{mlD6<A1Sarci!#
zT_2?BR=oi_MbH2>i^8e(;lBke8{2e%D<P$=#9}wVwF;6#N^#wP1ZlpJ=%ApAC;+X3
z=9By?DR2sHwm?ZxaPV@P$Wn|1p$hR6A9J2BgrpP#AMt@xxdPB+?<p<>1sl!d&kKj3
zGDf0;6TVl(o$%@JW(vua7cSZn`tLw6Myr7p5Qv+I{9;I;^ZW`FWCRkxd+P=aN6aaA
z(dkv;yNB~kiZ^=lxZ?)&Rzvu?G;R;f{0XlG?8r-`W91M?h$lTN!Xgmw+<zuxx!s=_
zKX_mNcd-ojq+W0)B;NU5)^*_m>vQraeexmt*(_t{oMx3QDX056I)=nbVimDImr*{9
z_bP;5seS@I7ZWPh8tRlh&+)k^Ol^fck#K8AXsa+!a_@=v<dyRb>F7%=6^oRI7xsHK
z6r99ZJZEI-I&nqGOEkaNbSbVMln5zI_%dl)Eq+p4{h&$hFOjRt5AT8;A5V9SE-JSh
zhpH*mc0TVW|09qVT2N*9XdP#sp+Oc15Is`BJ}HJ8mkB9B!Ro}0{m>26UUujPn|WGP
zO2LgC?sFm>wNa8Eb_EX0Aqq#Y@$lB~8<#6TyJ$GZsvzRT+>*mk0CT-OQi`pq(5}R;
zvRnrO^0EyDLEz6wQPmj8x)QV$IqwF0uh3HyzG_Evm=YKpd-qJryy(KS*J#!<3pl91
zG{fpIFPgvooKX`h(HFy`qw&8_{-2_hs9=6j@;7dRy6P0bvQjOyn%lmY>uSKvF6S=@
zW(wT>^M$<)(#PPfOhfHNU~dJ&CtPQ#Cqg9PwLa#Y2J4e(tbk<oK>PZ`pjs_FJgX%R
zP~j)Z+0$#-%_Iap<E4mo3%4IXe(LqUgj6zH<7wmoz(IcA?SGSu_s9E0YE%r8o<t?x
z_4~UnvtYF0W2dYVR>Du2Mz@T@u1k!(eVZ>)IiBxoh7WT~$g9<6YPxsa=UAva@CI`=
zJ>A(27sqDY)?O^pea|+wnhi7}++qeDoiwH{{zR*`Upjnm+F&Gq#<NW?{b402mFunw
z4ec}LpGpObBMdrx8Mchuxq)`51;YOnQr6FPb;<ueb3*$?PxV4OKq6j5xZIKibLuBp
zfjO2BKv%hc#49G;k$)YqaQ^R#O<5e%n8*RLa<4zRo7rA}HtRSIIy}J92l6_Tz*WB*
z{5HHDV&7q^;;)4iyG6f22Hfw2d0!%1;ZPuhAF*CUzYJ3#;=E8_O=g$HcLzE*PCvvz
z4=(|g8hw<B`ge$44LR}m7neI&l9j0vPUtmDpiRvQ%(~$Q=|}ZHtU!9`R|QK2X#_q<
z7_eg8{qVEY@I&SRk<mhWiXR$v%Hl{s`XHhwBSJe<ShFj%e+9%Zr4-5=O}LIywyt>)
zaiET+=k6><fn*`pW8MtvB)`+#K*dO5n!3XW5x3GE*YI!vO~+=!Z#pCC;NxNh4=*1{
z{3v4SECp%ct)NiCk4q`CEYt3MaP{+)J9BX$4RhpeU0rl{eK7gvKe4(y;a`UWvV$Ku
z$q&g0{pakHrc>qeN2&`c^{7ayYTmJ*^?DJWsv&VX>FrDe0>ih4Lb)55V|jb{7reUI
zglwi<RjfQe2!q|>qX9OOb}zVjVN%ZdCZG4`8c7|x#sx_=Let*3E4#8X-dD&<JcYeT
zMYIK>MY>c|o*xaK?xH#{2)pT#zE=de0nGBp7k3N+HElGmegtT|*+{K>eGicAc&)VF
zWFO4_4!fC%5rJ-}V0Xcxk61Kg{feDW-M%X}@Dki^xjhF~zQDLkaDHz0I$++?JUkr$
z%DAZHxkxUY0-zpzmoh*sZu+NbSTvew4Hks}Gf-2beO%UHM7YhHlj1)j+aXjJvy%$i
zVcVB}<7r66aY&hAECLqwRf&xHe2@+w)u{9k-~PvrF@AUV;r4mZXk0qJxAiLELfX>A
z6aLlmmNU|Ta1{yMt6xQ7$&?j(6b1C826?PXXu(LkAUvKm$j;gid`*^B+SC#9$Y>I{
z->TdS+N<qy@UL{+=MCb)nnxSy@*vXgWkb!jJ_XNSf}xz;Yzdl%>6Uu;Z9j!yCZ2|J
z<V9q}c?a+NK7Q));$*4@rE&<`H$LWkqF+`*TtPZoY)A3Kk*%ow5Wh>MyE`c)d8MsD
z&txhIL->5kk@MwO$2%XIgsn*at_L}Fie3sc*XiPywm&wj!IOWMs@WcsONa0hoMc2J
zf3z<FSSnRSTO8XFa3%gm*_}-jy&}T>;7cuQ-GF{gYJ-=fIv$??Snaq3z35UH^=c1w
zx?D0g6NbrQ&}P_y5<r<g>{0`RldZvzcibKxO876wDf3!kjO{xfq9>W>A+`=j0sYfp
zvy~lKei<i}NyIiO2&fEJ1_usB5<vq$fBTB>r1BQpu3iKs#iH44@x=dtmvE<SW<KPd
zM?!ZMF8$*EDGrx6Hv3FLJWb}WIUW*K-vcEIe8iLC=M=2G4%4RTR3};w#-f7)2~^z`
z*9i%QengAob>VaEk~{$EQF=#mI)8x(o(V4x$iH+4=}{MK(oi3`KO{0I>(s(a>o5*8
zB2zl#iokWB%r}HB!a{bxq<h^;kZ{PH&Mdc3h3-sDw8v%ZNwOfx?mB*9h-InP{7&wf
zzsBRb)UTrZ?8SW{$MA1~dGFI4uQ00frf|lj=-l1RUxlbUS#4u&^bzL+%|u)(L7<<6
z_hd;n-{4oos6-2GTW0VxCQ13zy!vGPX5(Ams(#pkm1DfFY`fUe^sSw=EQG0RowuTA
zENUC+xe9^di#7)U0`xMsy=(t&9I&%6;DGM9`QTjOYr~?T4ZD!ZoVG(xM)I-QSzBAH
zHdOZaS=;NWc?mFP6owvBw{^_K#wJ4PH@6hSUwwiN1q|8PVwDl<l2FG)$UY_Uea0y#
zHvX%0s#z{(=z5(!>F_1{6TgSeH+8Ei9lc)9zC+YeB-RLK4ck%<6{xt*ox@-6dQG9E
zH>2bJUph*DeEhfjF>1`PMR=XsS*j*D5xza3w8AeS)Cj7^kuTyemHAn7z75~J!)o>L
zPGRSrln8G1WL168D=!xpmwqjfR#cNn7k(2qJD#Q!kKpqXV?=0UN$mrN85w0IY>gS{
zK~-d)v?q;$lQ&{F_ip?3BJM}Sv0Nb9VaE-3yS0UZ*973$IR3~-a&2dGS&qKdMvm!<
z>8FqOADx!6#FEx|iJMWd4Ptbq^K)jH1$0!EAK+3PA4LCB`IRGLO0z&IQeRdn`9tM5
z1GBV5yReu`c#Vmxv62Y_!QpHSc57zfJ<B-%cp~>18vRb{Kery)XAMgO-!5N8Z+m>6
z4&7u}zl-P)Jyr)QWB^&>)R%;MX-Iot`tCpZdMu8-9l%=HX2)&Xg3fQuGX0NkHb$82
zP@|b0SL2Gl*WG5O6$L3c`?Z0v^$?U6`!C4RPD)T82k^}(SfSGUAlRm#17IXIYLZOf
z5RZD|!`tx2+gm?YdUfQ^+5Sm)3<WisgcMtrSd&D0rVcQ)w%uEH$LDUgkDHLb)dG$w
zv<rdXkW-TfX*c{J1<!;{RRg7>pXTc<OU15xtajg=@XviBQ{>MyfDheKMwr^nl#o+J
zAe`GA4){3ZZ)NLAyh9@jF7KJMbS~mEGd^O(XgI6pMOYZ&D;>1bAAe;6rcWFT8EsxN
zt>603h&h{;M<3!j;3<c;%mg(ioQf@oeI_$(9}qt8-H1a>OiVz?!U|4q@}bDxE({+$
z5HZ+*&EDF23U6wq6p5UV)Ul<dt4{4JZhRtj_xkjS*Nu#Bzy0})?1=LVVk2d)Y|b;!
zVSf0CVj|Ws`tQ2Wkd2?BT9a$6ri7=-HEXYHdpVdQOHpogbk}n@STkzg3L*5(<{hRx
zxZm;p;1XLHYK-(g+m=rei9=e=a-O}!wU+qYep6G9juvrow?T@D9)*_>UhRJpzgDd<
z0gmWHb09G6mJ!EPl#HXWeS`Mu`QX4_Sy5?wGdgz3CSubPdVJATg<C7>xS6I}rj#dR
z{%+QkMSbJMZ%WMbmGorGZ4F)$AYN@3daUIwZ%XM*6gjDWbM1T?_X(m1bkhh}q1Bvm
zSxmQRHLAEy@U!Ah01-VWC02Y50+Tu5F7_r*Whrois125r%N=g4fs_S$0BlQ4b5vIH
zBlRZf@a0I6PP|dP6Djsku`dDWc-2zUc%)=+L}(uZIyqARkikNaH^&&k8R_sPWd$@C
zusKLmf71f=s61TaONl^nn0Zi;_7a+mO(fun!4RbJi})YFNPYv(kNDajy(w4l0ph*u
zo`d!<9<5eXmU=HOB6PfjjL?zoF^fY*dt~C0iZ^04_ip$|Ll``l>fV#<&mmXr;V*)&
z?F}ghXYLdS3^lKZdDc|V8oA<ehfoA*^J*pOMvJ}1zfeo6`mw>7p6l4i(vEsvdr_E~
zepJ@}YOa4f$Ngm4Gjs8$%YSvtEqxAiyH;CXX|gl#53+LV8pw?oV@!3@#z_4e6opic
z`}YF`!g9`b3u-uI)t_LQWJ(r*9vP^>7UNonCGd;POqznD_qh6d2{`XH7?rz7Rb^%9
z%l7cg;!lZCAkWwKW;Zj6YyI_^!DzQ{M9MXdDad;hi+aj2&S%Ah-!U2uV;>zaams2E
zS`p^7!GyiOgz>Gx>%K#F`WU()vlM2~k5421$%^`KfeL$dhkSi{4RswluretT0R=|h
zwe=o@*&(15-Zp#BmALSA#aJIH^$*Y%%A-9sKED8l7nt(KyFNx%Q$q4yuU^v5rtb(+
z4L+hqpe-sZWp(5oD3D-Pn(+kCt*CL?<uiX+Cs0icC#QeNfPc2Dlxd&1NFL}OLCKg^
zPqN93x2K`(MuhDLYo37qhIx2_x?)N?W#w0uqEu))S&<byvNyW8iAXQG)2q154CKAg
z)ktZ@){t?mw6M?-^u^1U$fr`P_OD*heDxto`Y+KU(Zn-Dp4Xwk5y2>C`7G<9&f<?F
zLsx0@H^L}F<7Pvf03nus>RGn;LZIB&Cmx;6A6QmY6&lhUtyGUrB>w(h=9sKkD*6$e
zTa_dwJV|?>bxUe-ye#zUcHS5yWu+gR4R}Hs`=AL=(>=ZLaa*Z%fCz#S%Pk5kx&D;<
zJyXAGtg6j#V_|rNNmw!9hRqIoTui~#9vFjcNdzX7R(zG#Xk~nkgh;ala?s1Z9k@{^
zaEka9oVZ{{N)X2`JFMPcH21E@5tY1o1vlBP4-69Cvktq-a{?*-p?b!V;)Frmy~Pk5
zTC7qbugxLyPjNu1K6(?>1!2jOm)AeFHOJH;z>#Gn9qKnHFSWlZ3WO}|4_Ul_l;&5Z
zlJ7S~4sKcpBL13PhBzbR*U2A%2FNt{C=p;Tb56ZGoeGR$TIwY0(yipwf(EZ>Iu#ZQ
z#Y~lcUe$?5z)MgdcF_+T#Lh0DGu=S^haz~7$U)RUyo%&Ug?pd$rAG6tlgTI#@^6id
zub{V~w{n?Cc3DIx0sP|6aa$ZmPSY|85LbQid9dN<Prip_{t8prC#t!=@S;5``Myn;
zWPd|8I-Mg=(PHj%*)f+bB%8$-slQjxW{itsqQ;2v=ZZ)=UHjcqJ2S$kR1En?y60Ud
zgeagwSyF+}Mf|9H+*~hcJOyP|E{m%37{_aLN4vJ!Q{V8d&HVjA^lGH)30CCQ!Q)=s
zUhG~5qQi(57(o~T!Gv-n@?7BH{GcnXBitqIF)2#g)V4hw^C{CzI6wSaAG`TgB%2#K
zcsR{x3W}d`{29L142H$sN*4)%c4V|VNLSR)hA->VKv1LA68a(VcwYQMif2Y^738RG
zM*(dZv7>-hSw*t1QgzFnoKO+xbD3lKnL}SM^EX3?x}f%7z_Hx*H($f|j#GMXG3TrC
zL!4K)9lz;{0BM-eza#ep_B<kDCIiG{j{frgU-y?Af>6m(4b(xE7}~m{6dCHlSx;<W
zgwJWP3MeeUkPa>qAlQc^3P=Z4a6om@>ui7+>=NL71X2kRqY@zShUrlc;t}+Lhi*SY
z6w}G^(oecBZrN01l}sZe;6`|yB+nm_6g7ZakPoWxljqnPz@Y~X;c&KqlaQXsEt|Nz
zhl>H|FKxF}3WR4PD};lMl8cx>9xwNlR$hCvmGx;bITU*P=GLruE-`yXSujOW(s&M&
zn~?pj!mFUL2e?7|I!~^k<-zSA1`U6JR<<&8ke6ZBoL_T{&(hRwLaTAc+}Ovq?=K5G
z*d8Su8v^<xBGToB*0#xd(h`vMyY~q8Jygea^NBE)$}BdBdN=M@CZ^iIZ#>!nQekJ?
z5wx}S>lfP^@=_pE1V)C0H=a#bg`1Njg-BaZ8AnfwNw`S1pVAJTXeKJ2Gka;c*y=tb
zub*<`f=ts9g|NniUgyKFo%hCXZ??q3Jx&9*=p(quu!q~=#jP;x(0}0bu4#aFGDs#0
zGRMqs;Q$|ZHM&Pzvibb0Lb5;~BcKNyac(Gm_kZo3^<UHP_x}gd4bqLENT<@Ng3=`+
zB~qfo=w^f>sFZZMsR*bj-8H&lBGNHnjD|643>M$L-ami;gwKB156|0fyKUFC=gzs#
zd7S$L)K&@D_NgfU@7Ym|>H7%gSU`zyjyLWBZvv=x*ZTd0I{kY99erdpXMLOO^9Tf<
zq?gUP0U#rXefj`ClVwT$1z6R2#YjVOi+*=N6j1m)gk3oGJwRe<Yin3Z7<hg4a8)jX
zcB9kCJWkgis>Zqv918oY`7ZQZ<4VH->|239`eg*;Mr5CcI1hEFPWY)QHIrcY_Uf0{
zaJ0q6LlEk?`?c}t71RDg1E%X$<5ib*bn|>=4D-0xkzyRVTXJulFYOq>%-7sL-|w~C
zREmtT&~KZS<yy~2-AKx$<{f<xx9_#SVoNeV-9;ZV1XT|BD!H%lzjP4sO;ZEXK69{Y
zw$5z)%+#b;bbj?dk~1v}8~S0P!}q0^=rE#<eXhu-e0p{!>?6vTV<K7+4t8%Mxs3vE
z_%hiA|0nosdQUvTj^`nys8^oJtT1cS9PRDd>7r*7Jhip?Y^y4(s$2xAj##SNNoTsA
zoQ4MDJUQ2hon0ioYm4+wb<Lbqp%gFd(chpKCby)%KOuvNo+FwpD8kW)dR^?h*w)oM
zq<b$=XSf5`)2~B!Nq*Qqs6&hA?}w;13mpcSa>g<p+C4ZkytRL#6S_YKaML*oQR{M*
zC$W0`F-d>qWC+q1<ymFVK<DxtgBOz;E9kUnAcu?d1<FU`n})LtsX!KDF5N{cR6m&G
zuVEwho&#=^`@=@!>XSwbWXQ06ai1we<8}MmE$A+SgN1iDp2qWw0$>)dqEDZmQgxx&
z#p4n|MtavuKFc-ISV)dKUQ4d}#z9-m91AxQ{h~1lLyObYDuKy%oAu6_RwF69Lqk&-
zOV(06MRI-m1T)@~14oBHC*B`HW-OlC*2D#`_dzdNS{cK7m)qd}Pa|L=Yo7A-3_W~;
zH=bXAI|*Q=C{juLL`%$!q;%xd|3v+f(qZ0K;f=Jh@u;PO9z9#!@|{II3tZ!*(%&8-
zmsc2;@bZ()fy?)Q`76K?Jw-K9%Z{C}SLId!kFGb>jXs>7Kg&&5fu@opCd#HX_rc>5
zj8t%3ZcHJ&SlLr}@t?ekpl911%U^?~5HF#SS8;dqes;>V=IS4))!GAi>rAHQE^Qw9
zd^j%WHxpd;9t@o1_v!B3j^fSu`A&bTw{b_Aa&*9dnURDRDqZX$2BdXeTm7DZ@Iw$;
zq?g>5J8!u0I!as!p-t*5W*_6gt~_sx>;?ksyn^)pFwGoeb1q)b67MU%eQGJR`Epug
zKUG{$>Zz}*qhkx3n3)c}O?U`?J*Qz0o{X&KBJ9uWys>}<!~Uu&Vz4jiS_cm?obO)M
zCUjac4N0}Fi4I5o6WNY^47|r9yo_~UIl3zPV`c^Dvoz|_UKPsxc#jUB{f2-WB-c62
zD%GLFxPKu5*GXjs^k4vpWb+N6{Q|)pGqYTx87c`9U2ql#y~%Y_$AmjZQ<|VU!+c0s
zay?2^B`{<5ZvtLts)C--;?X5P?o`m@wS24o6Am}J8JVciUpcU5Sd~)f>c0YLD*$L6
zDOVXYoW=uF;z$-q%_R9i?E^D?3i=c%DaY+Ar@DU=kjC=HTesODUg$oZzE(EiBGsBA
zg&I&A^uj@c1>#=C$9}EHNT}+DpROznWzA1Ek1@L7^0aXfH0#V;r2fXHm%H|xl7Gi;
zfGq<}sjAb){iKWn#Y;!|LSFvkJ2p&aM9&;HH&8^u#_ESKM8SHKSCEO5Q&4EOx@^e%
zCpZ^#e-E5B2bz47xaK*>CE@(U{~c=Mz+Pn_#){&FeX;b(6D<VcUCSRyrD-mL*-DSZ
zfN4wZ2)ZDS520BnTgwT0`-KdWwdGVKqh#ch3$n8c+|y7Q$USMDiO!dt>Y_H|+`@ki
z^pZVlw+ae*R#_1-gfH%*74N)vM~+vu<ap{CE)PQ|ZWUVuAMf0%At@?Fk6eG_zo8+E
z^E5=Rn3q5jPm9OwT*J<Yyl7V2JHAe@q+_1*`5h;oyne0H)^_PxnuWm#UE&ESMr1>D
zG=qV=#^f0AG!Mj9iC@{@OUEWo3r*r4QJow65vnh;l8+&;ubkV%Svh0jwHI}(%yzM0
zW7P%JM?*nN+t{{Q*C+VcvbAG4B@x{CvXE3S^6l+g0~~pCZhL&=wcDwV+Y)_JokuV0
zyg!qesKWwSsWKSmc%>S3zMpaaRs^nzWBhZYJh|pX_vz*uNRY-Xn(Gb7X-oKFy!<$t
znlVJ!2_0cUH5S0>8x9prUMwc+;+`{jy{=UNz7N(ghv`1ul3<fPL}+NbH~TZ9HvhDX
z#<p&He-S$QmIK2EFv2_pY9JECr&SWnpeq}$iGgvf8ae8cTln_N4RliUP%qZsG)0K;
zOsy-qb}8-R8sc@A=Dizp*Ye2P-{~Q0k2}US#BM?MHfmkcXt%hk$IuKBQ*|xhy=7NV
z$~tjHRkVcY99T;%=_AO&0^VvN1@3F@h9&6C<UGyW5*LL#{qxy7$N7$7EbxlH$Qt>x
z1zd})ptql4W8Ofo%0f2$aavQt<FV`0hJIWT)*wZMI~4in0?bKh30KL3i^p7(+hT!D
zpW3B<C>CNXJ9{=i3b)&2J*$Yg*;zUU+-fdVX)G>I`fBcSs+Z=vxA+4nxyJ#Mm(@(#
zv_U{s;E&g#mv0$KF6CYs)uU+P)S^J`M>ru;po+lR*{Q)830Jf@p}Sqg2fhsSN02RP
zBJ%j%xgmGHnbGCDNL@Qis)(sOy@U0I2E1BB-U);gp?anuJ))9pocE(`7}DR98(lP-
zxTk)>a>V20Lhu4p!N|?)1PS@j0_ruPc9?qF;k^~%4MWiFGqs+)0kLBXy`ywZ4zOFS
z&ZPhaPVF4?nwh>Qc8q?HNUC>mFXKhBsqdLNZ7Fa{<4Rtwo=}la)`=Nu=O3c*Q@N8v
zO4{hY>%FiPanpVs?t(W>c?&Q8Seq{@gfPc}L-nmdrR@*JJxMb;^08>O^ANWvT{>=V
zNMa(K78u7(3QvxU*DeYVz2jNBm4pYWyEwpP{*OsK)Tr?lfb4-!5UC8QO7`gbgdV*!
z-ED3ZGBiGT>}frzQ};S}M0dcuB~jRC3jv&vQ^wb%EOGuh;y+5AqIsCoVjM-`Yz25|
z-+p0G%P6;-E66Z%6}Omyb%bl?xW5F|pSJ{Z)+{G{zU-`UBg^uct@q6Gh_FoBtuxtM
zC=q@VYAP_Y&{p|7{MB!d3M(ap-qyxo(@L~{b-e=rM%@w@)+~*&SLWEGc{Z^>^Zgyu
zn6|TLpT^ylD2u)uM<xw*8dZF~9#bnnj5>6z9u9DtwY5C_AOIWr+obKboE>8Z3*_5d
zc;>hEQ+m^8^CRyyjZPW+e*2*~$P7Za`_^t|P;Zcf(%l#B9<ev~8Tm8ejx#UOuG79-
z(CsO&&3Inww}b0kq(g?^h0cy;59;PC6FW7XsgG|f+Ko)CA5Dp6%AP)KFL(K)>f@Vt
zbx>ds(BYJx1(lezK(}$!%cVo32b66aM{}dy`eD%dbr>w-*S8$kXl4oFL8+yy0A_kp
z!JdXtEY58{w~!98oc#FabbDP$Bl8Uqr?)z&)OgXmkd6SOHEHo;*Kk^-sjTYLC?<B9
z^fTig?ML~0GA;<G|F|8CS_fajkqZQ$z%RMw6meD}7Q&F__cDNR*@TYOxh{M82u59+
zq`bI$)%m&`wSy)khg`z%El~>A=eL|n*j9%=ie6(U|6@Vjleiy{9!n=U)KhbP@!E})
zwf8dDz&T_;Vw}7msdWWV)W)R}M#9P4L!L^4<L0iocxcgSwCx_;b$`J5IWM4}RlmeZ
zXLW~sJAQ2;X!HKs&%}rqw<-67N{c^F9%s05$a{u=vv6|~4eAJ_r^w@o;k&<BBkQH#
zqU}#Lwpsv+_~OLF5F}opFkkueL&4+2JU4!cGuLkMe2a!s(VT|r<^e&N=FHnKTUl!C
zuRnNyZ7;dhU=Yi;*jW}HH2BwhV&LcV&cE*StEOMc3fvD@Y18BVeR4+R8p)66{M7_S
zNB7kf?nam>Xw!l|$Tc$2LUA*%cc^D?4jT68)W5m@TJD^$=7!$dul#!{#uNXL2dYCY
z3h_kcc;bC>;&mmNfERTa=9*s06NZWj9G2IJ5q`AtpDvg_9Kq=@q<MTUg54?NF4{GX
z@f}b#kpwy>m|nLBw*2Pi4u6GgYLuUg(9VG@Zd~0bx;t<lVZ(TZT9P2Y;+XS51eq02
zOtdB`EhWdYa-ir)N7<%{1g#a3YN7D*4URa(RkAZK0;qG$XGctCR^2T4jptuVAD~!+
zsLpQ_lF8z-Q|^yU!znIA6<<1$qR-%eiAAXW`UT*(d-4&^+sJ||?`zw@96;isuwm@D
z{@rzo)xvgiZf_Q;IICZA7t;Tcl3@_>VnKhB=*meDP5zgboYXO-5>rJ0!>PV?-F4u}
z_$&RO-IJNjmZrK%w>>FPM4%{(gZ&r#!>GfDjpAm477Q{v=0*U>;XE6Q7Oy=*N941L
zJCy~ijhNDQ%7gpbRnih)D``edzYMWNNUe!>-|g>`*qhAG5aGUqRO?Qd8&}`Hp|rmQ
zvJD*4wqjHNmhj;GN4JyY#n^>f_lCEK9vyRF3gr^G{zf7vZT7RJcD&bD9S7YO2dS8i
zoKZHO?(#{m69*~tNWTt)U$qa<XL-ne+<^v0owr;t&D7n3@v$XjKb>(TziS6sTWj1`
zX$-PSt78{S@n0Ro{Tf)LdA$M(Q!`7gmBAU-UYHkk5q~<rjUXpJj-_~2)E^6^^xM6t
zTF8aF7p)R)u#2QC5c5^2j2s$jMe!<*XcTbY7bo;a9+n&FEpxsj@TeQExfO$36~B$<
zD3)`@s?i^|r$ed+j)jQ4lUE3!b|W(t+c$q53b4fN$6vT>5Q=_%QaMjM<$9yhIvt9Y
z6DM-&-X<Q!Dm)xR_yu4Q{7RCtYArMWC&F0x`0gF0W1rd+DNXY5cxIRvzYd_Ae2siy
zk^D$ng6vrUZje;Aqd1xYXzR1gS}?sy91WVECuw&AA_0j~$D_@BmI^F1f~8XwUc&EH
zZRKxYyDdTKCBc$r#q{kul~{Cph;AfZ-?`7fe<!H?q}aNIE((F{J^gkr**LVDmGE~>
zWl7fdQ_0mnB(mo1!?%;*+T)tU;f6Ogv}F2<|D?lB6rb0c_Hr+H--aoC`Q20U6Rw#1
zEs?uFe8hv>r>O93=^q)tV?@!J@_@4zaHEdgOwbVf>Z82KrNi_jo4-lRPrO7~QA$P!
z&0Q&OIQxnWXBzF7l7!yVA6OX*B_Q;{nXeByS2O+}nB(y0xvz!|{rosOtH)E6=%E9A
zkr?kN?~S7%G-an*{qY64(v}oB@^DUh8Atp@$3`Jy_pbcDo5-IG%5}vKuo^r5x?zx8
zL)Q{bn&i)-h6`t(TW)iJV;uf))~nH<&^C%7VfxZS&_O+{Si4v@IZpGzjyD&Gqu!kE
zYIp<^TlYW+$FOusYQ+oQ5Ex??%!)TmXb>X92X;uDTLq)Ho)a#7(~g6Z;cvF);LI;U
z$D3pLr{*w^(_t8XNP6TsVWcR#^$0%2^gq~)hT=Tlq5bBn04m%tT1)%*S`>vYJp(0R
zCH^7BGqS2U`((`fwIOa6&gPsin(SoR$+ygyy6)ZDh_|T8f!|Y;PuN(clqV_SRy?Z!
z&%0i~<<+C`ey6I=`s&t-?zXo%*+f?j#ZeXqrP!NjR!<d(beA|I4MuB=qQQ71{=0G)
zP-Nfx7Wtyco6*l%BV@cBWSSG1tnU)&L3NZp8={>ftrwVM?ufaQxykKP-Q&7)+-o(S
zFa1w#16UY(hfU^9mV&T)gj`Qb{#=LLTp5rKjmT%89N*HSg-kK$aOC-$cDZ2v+aal;
z(m~RRReC8owfZV=W|zDtLkY_)hyOY}2`wwgXTxJL9X{E2r016Z(cQYqGK7k%Yh+iS
zToK+|Z~qPS8>su41xyiMeX!h;uJWNj|9i6nGozlN13A*k44XsW@Ward*fT27WJ>ih
zo(-Xu!Sa>TX&%2oLwLb7q&PZ2>6-R?yZrHPfQHo{phMf^W%?ty+)KNFn>bQ<XS=8x
z&|k0V6PJ~{91Xg&G8*!F4D;xKltx8pG9F~g4@TW8F;Ml6H1z_;ltCUHc{6KDWCUTQ
z`^rOEFU%Y+#yLCpYk`T;dhnbK$kWrmh3Q+vQ}UhOYhX$|`xgl6-5@|Oqh-Bosya&N
zrnWetg}no^NJ2P2Z%_I_Or+<kJz{#Pl_KSTGU!CDlamcl{2JSMLEY&ahklD~VxN=a
z^|rsr%LNg<La)ACl9RNOX~xc^f%-Hv1J;bhz%S^>Gwd`M!kjmB8PJpR5>G!Hlic-g
zGwPar;eOh!%5p25I?f?E7sP*)WMjg-F4s92kT7?v%a_WusVyA+)p(H3D$qLUhwQp2
z6%UApNF$l-du{t#JEaxGOaF?=Q<vSzvs#b!xxB5HCQa?E?lHrVAo+8{SK_NLJ<3$m
z+2_!ZQ*!wpO-IRM_KSB$)%ss-judZ^<*QFbSvz9pmldRL|9Yk?@9T1#d!4)1GjWkk
zHuihar@ia8`I=R4Zk8!~anlWF-?}9?bMu6@Odex*+w7kzx1IU4H)7?-f^SFGSwIx7
zqc7ZUgy$48s6X>^VZ?t?dsLBA)@QgDUwZoc=6<nGD#AmEm$TL>Eer33>kxa@7>#h;
zL=9o>BRsPmf68hwY07zk)H<K7Je;f5H`BhZpm%F9=fpl=jDMz;WG-kilVF^dT73O@
z;Yjg)1X862Zbj#09n%{KZM!X7q1Fo@n2}g4C{&Uu-n}3eB4YXPzICWvJl3EO-{9Wt
zIAp|OHj8SyAe4A2rthEt9!a=qrO3RD>uLX;rL%X7*Res439@xjAQP)U-c2z9`+|?s
zb+S!uqy0w9W!}#Jh3RWu-ohMx_#Crenoqj<-by@)#f{H^G?}9(0NT{hED<p}LL7Cp
z<!btEC--^bBj#0zz24g>`>TNTIMC3wD)O$B)p7nMW>^3vEH{Cmc7B@eN6_@saZmdg
zs@3djj(N1Z`x|^%)VD}}A-G5uzLt0BDonH42k4`V<5Y9`Lh~^9>bA0bdcm9^OGP(l
zJc<9qqIqosti=cEmI54rN%Rs)%AV<)30t3Objv<7<gba5t#oFzQ+(zgV#Q76#*-%Y
z-lv8$Cls>-qTl+po0Emt{((%)hXh7vCOo}0v6=dw!cta>&R#@!_-9uH+x1MD2L=Hw
zdn}G@h44r64{&p|1LMDU@bA^~*9y~Yi{0cIia3Ow?|rY!1v`PicXY(V6?$*2+T?1D
zuJR7l5C2-+T95I%%_Tqi*E9P`by4S5fKtr4ZCobe_HO;*54JKdSMl8fe)`381B8--
zEs05kaO#$_s%ww`aj8xJG{@)`a4>#RIYB??$<fgHj{(}Ir(+z_*CXyur75XSvl&SB
z4=+%SWKzlgs1X-!w!D|`awK2_4^??{YT!ybJu>2elDB8x1kRtn+xp2iFq=YtFNIoM
z4vH?cb@8^h65tVR?BZ+PMrx(E%_RwC&F8_u)Gpx{o)s~H^XU<6TGY(SVoy+sZJ;r^
zL|mWTZLZjrHb@Ff@{IX#sm|7;`1HD$I=^tkU1YNad&BCZdvh>WT#LZY3MB1t)ZOg{
zHp~A+eCKyT_?`8qu<!|7G5e`|KPe_vx*R8yM$)H9-UFw>@LRX?tyqOg(`L`p_3m4(
zc5y4Syt_tyLyxEH3T2(`ux~i3ca1gYuJ1KrTPli!yUG07<b6i*pJ~$*)RWzh8FZK8
z?3olOESTN{$iA}SBKCA|ObGu)Yf<xT;Mx-bkNUWT9p&I3E&$j9)mr%|_MzHvrp>qS
ze%9++9P<lzo6vibqk-SvW_CFYJbL9h<e918^C9Ymy|KqU$zw^1ZsmZJlNgM<@yM&9
zJH?j#`YhCT&@i8k<`!ipZMQ`E9XD2GeJ8*46bls*Wi@)pj`wxW#XM37@Lj~5Iw~M)
z$A6GemJ&DPc9q`M6tKJ~j`oHo71RG?87O;`M@jBYPw;~JF5F+1bd^Hgvc7Hr%3H{<
zFAi+G0p&VrR9jJSR|`xjqj)%9KGTs1{B7^fCE=c&@->MhbCF3RT6NhbVEqDu4;PxL
z!5*2c2WUug0fLedEw;4XA08+WurJ0~6P!fnoJkh|Jp9D6fn2#_fCw0e&YRCfdM}@;
zL8@nZ6LjK)G6s+O-#4&0X_*_)K%6csG^>x@b)d$EXCk*1rSZcJI+6F)S%-yY@K2?O
zp$<n$e}pc$Bc2$tUkXKRutr|)luI~s>>uCOIX^eSpkJ)(O3aZe(;%b&{`zA+{qkS@
z!(^BipMKp$kc>twOG%%n3llFKP)}$!jmuO7NJvmTQ{q<wkWmD9QpUeYg484d*vZAD
zFKoE)+@|_^<A!s@UR=8*kjzYiO>U5LHL^}$xSxIKkt9Q3jkYXu9Fq|6(UGxTby#@Q
zv#dY6&O~|Z17^Q<$cRebD*UZ4%QqQu(=V>yMhXY950wIP=U*y-l#!FdyEY6$9O>_d
zqpi=(<#TI6^M-sUB3liWUwS-kBA+V{2GdbF=&)jQn+)k)c*69<KW?1&hxzlRVr(>I
znjNt%DgG59>m4d&cUqFUjHGo+!`WbkCskxwz!)8emFQk0y}Og(gvs;e(^TFA6Hu(w
z*wFbCa@OhQ&qwfoGF!c3E~I|0<1mRWbbMjXcp;`${A#ev+TjGdm}MdO0SMWWnv%Em
z3yS!`;9Yq0P3SDk#_zy^b8$N2`tzp+DtWA@a~~=Y@cml3MgN;@v5ng&#%}Xlq3dmZ
zuHl_Vz<%M=trB8al~^~tca&9M3l#L(12>>BGAx{VbuC`#kpbH9nXyi0_4%N^X|w+4
zq#7z1Bz*my2rYJgH+}2xRF-wqIVM==zorriwE1KN@JQaCHlv;LMm%kw_kf<4Zi*@@
zYm<UZETZ^xyZ#?)5ls-T`Gw9W>D6jy-P_kL^1HdEliUV4T*&<!u36TVGp<7Gb95!h
zbywFUy8L4PJ^j}Ah|Y3|Q=C0Rt00btRjfOENK>qxu^9WQ@I-0HLD>1ke#m^xqH;D|
z#QwpxdH8ySEU<&8nejT_Uh+<KiR_}87ueL|=3t7zJ9MsW(GM-s?zay#C^Z3B5nC>V
zMtS;hnR~l^7dGS>n(s6F@>K>V8-Z6}pVp4m%%$BJOdgwypikd67Mx{YdY@x@4CZx5
z@U6%!g^53{J&~Dh`|Qf8eI=0mzTA|AEt=94{}z~<@_FHC27Mi@30_F3eBo`>;_W|D
z)bgD2>krV?j0!8_GpMMhT6N*rX5>iBK4!1zJ&2`gir4UO^D{Ur$pq2y0!3+3)J<fN
z!jzz$%R9?e@DnJ?&Y(rpHP<qa#Hwue@R_f#??)85<fK<w#hC`ro43@8^Fkmsam)yS
zP;cIdJx;uF10fhq<%D(1SnY~=xIY7V#*eKZpENj3C*@oM(s<J^ko3b|Eg|?|Kz=@)
z4RQY!I<H3?b<hND`vbQ&cPAM<fg}G)C^6eQ^1G%9k}H@WQzhT09wD326-8SnJhY$^
ze#l5P(B9#QlhVAVOEWt&<Ml{NaIxE#Ow1Ob`&y8u0`agJ$4?6;mq?J+q+&4_?sPAW
zOObeb$#=b-a{t7GEZK-0ame4q8TaMkZ2PpXB+bRYgnG*O4u(L!o~bzAMHa=nJ|oI+
zZ)eG<)lX}+KdNlz`Pv&8%`gvGDbkLa)FizQ_-zwm0{WrpPYbqZR_d!YS;+cvc%Wg%
zAlECo))aFAU(je08xC+^Oz`w2+2x#;emoCXey_pCs1Gmb9vweTT{s1ZR<u3d3O;uj
zmCh8SAy2dGK~KmpXljJKD(6*+T5wL${xlMJa$-^;n9gLJPO_h3E&HG$Bhz#@_3W;U
zlBO2y(qZjm6lfNqjgXtV>(#O^1Jy>7UCmA{iUvv8jZB>ww_?7!OHcWIaBhJd*CEvY
z9-e+xVUu}T7V&mkF)7w@QF=cxwc1qfhMezm`sOSce!soNeTz+{5g6;6i!9y3$XZ{T
zV~MTESn2IE)2s6zugW`aU|^4Qz`PgZRgc|uRAv@y)AmI%mDUd`$7dl<MXsrrC(hG*
zQ=hn}p-y)SAw&2lk*e*XI`AJ~s(#>V$|pSsK`Q5^F#g$-HU!Arfk|uo)oGMzY;g0~
zTj-zLP*ewlQm$9p(lp!}2h(*f#{ymUbO2AI`2ZPXbODb0Ouy(ns=#D++F(ChLs~G(
z#?9v*@8e{{#4}Y%ReZbqfy2&D`I)aF!qs{!^ej#&=jv*b_4C%C4gt+9@HiMd?6kEm
zn1hOX+yesf=Nv`EI>FN?;s+;rj#MKz;LLTHk{h#aq@O+!WV4C-&$Bgze{Ba0TKI&y
z*u+~fZ5)<VqCl>wiHa4V5Ff4>qrSOip}vy7clG6Sv0Z`GtB`S^U_%*7IWH>d_@pw*
z`RtX_{A|O@OBF)cmBhSJXITGyKx}6f%1j9_4IN`H-COp%2%C%8^GbRasgM;lh+&R#
z^>bR9(pZ$Z0tUn`NVEY7W}!^6E7qIAd(}$4E-;A_54B#T@^Kx?7|cVsRC}e)dz{&c
zoGHT9q|~}MN$y7yH=AYa3=7P4^k=eB9X(N(zG|(}$cYs9Bty`*7G%8xXb-iKgit#9
zFec+(`WgvA;eI3fcX(<waVr>}EmniU(y6O8O&i6IoQ|f4*IKZ-KC&n8F->C{=c2tE
z&5{ms*qURsV@Fgr)NIQ-H)E$`1)9TGPzn6V^~rnXtJ9cfM4p{-qeND(huWu^9Oh3G
zz_brzNgo?g6N5Xz4P?_0YUj|J=L?Q_bA&iW#JO<&tAoZ^cYF*$HSeFj$Nz|wtja@8
zX-)f9Vm#m>&Ml%^h-f*7RxP{-$O&!(8>!rNF4&7y&WDlK*KCk(uRLmY`R8nUB+>M&
z1C$o^dp2+42tved{o{%bS=wDzsP^CGA~<**%)}}agbYQA_~$rd;AGQ6pE(FzgvB2K
zW23gqf&aF3D*wI~^uxWZcfI_WPV*}SKT~o(*FgLSIjxwC`QaXwcDmPjTE7O2E=4mR
z33vejI8`C%;hLcRXmpHM*FN{3hp{G)`5IgfG;QT`X~IQ|PNL2dq6EGT@sO{7_ki(`
zH{7c{XUo5$p#e&7mtK1ovK!|`XG7hiGS2y3zPRWtg*#@}Qp<2FzA}$>^qyXRX!{;;
zTzW1#`^*09KYgHus^)n0if_Zm3~!-*&(MX`op)2<C0}C>18LWzUvCr^f@hwn`(GY}
zq#GDYyzsc`rDPIzHevL7B?o`9U6P4pFb2WB^^~E}Z8>~XnueRW<sRJWtl|at5J#x{
z&DjyT{;cJ_I@Pj2Lg=Q3YUu-vs{J_ABNaPB!_j_$U>e4QV<y|vrt=31+|x!kD#_SZ
z?ECL<CK~CLUNTt*FVhGM$Uk=}?Op$*0%-eo;B5a1XuIa?7Jj^l`kM+CcklhX7~D)B
z1O<+%j^ngD^GY8t?Prsa!i1>K5%sRa2K9~F(U`<Avk&Q}q2*AA@;=t^AS&mAt<8wu
z!~W-Z&1G>!+wRE8oHGhtqHa4|!Wvs}NOBW)@*dr9hQBme9eO#9>1~bU-TTX7keyQ;
zTD4U?0@V2XAb33WDkbz__SK-efDg<Pw4?b12K;n8&+4-C?&;$0i;icQfpJ8MrH=PY
zznT&vATAH52v{|&`5OJhe}YR0R>!pfTT%I}eob&evpSc0f0|W(DUP%7mIL2P@h39F
z{s`N`$jU5A!4Uq5h+CEFBrMe76qN_+`!{n;17E#5b$yDiF{RVH53V3=zqR{xn@-;K
za7M^EkAL_#;W3(IGr%W{&jpl72-wh<s{U#PT`KR_py6+N=alWWOc?7-aw10ZySd>!
z-x)!1x$&;{65ezXcVtPIbFLbG(m}XC_<G#T@ZKZiL>Y{cDqng80|Je^tz$;{uQrjq
zv$W@6JlATz?uyq3tx+HVE9vBM>&&?kdMQw@HRkh|4uU@mJBE+^I7H@o21ajRGZ(qD
zNUi4i53S5$FXC|0X7fr(dzbzF@}bdNtEDiMp_`hc_~gw>$rbO*oL^HeBY&40a4&2g
zeq<)fy1m1J9@S;^rzY?iN>ogFeK>1GZrb3avLNW6!Q%tEtvP<`sZN}K!Sty)A)^_L
zSq{}V1pdOZqb6^+46SLth_Jfb1}S>)Szzrl^fue;x~Kif!My#1eARK1=Wf71kJfCW
zZ21W-k{IQJ{fdkYCHDq#!$(&(>-R|#>F+M{P*fh`a!2{(^i-YcVt*q|-DhUUvI*r-
zwKJcJl_~o!<^0%?ZaT~F&z?ibivkGW;SW|+@YvY}A$I+^O5OSVcus4Y*br?|gdV4=
zD?g0d`C@Ra6$4jKSkCDDat9Y^T*uY%%Q7`a2r6D*@Rg6iJD<0L2oFNIK9u_@iklb!
zRi30v;J416<8=6EcFEKb2?IJ^>AehqIJn;JFLmU+@Y($NiHMM-I1H^na+`;IWq78D
zrq!}yBp^DNB3$55(?QoSye!U~>(9T|lH=tiJ^7!}b`P|?>AzdeHbkPQPb$r5eHTtl
zt*>63bO%+Db-?a`lGqf8-rq|;d$F59`<#>XK0T91Yg67C?eg*tzI3ohv+~YYKijzT
zA?nxb%8MdV(G`@ihC?9p8+BW>t=sL=gTFNVv#bl9<vq!kg&!2T4-W_=F2tvUjUa5p
z9hET1<sT?ocHA9L#IM_CRkWRa>q_HJe1GKP<(M7Vlyah>qeHtFtkS!u`cUn!S$Wbt
z!6)>gBuZlNM(5VUr}K+9#%y=x)?mu!O|E_2>3#047%_xtyJn2FB2Zh+<I29Q?)SRZ
zc+W(Cl`QZXQTkq)9kX58y<8}#EBIp$<3D33f-V+1#BQJV>6nOK=8`7n)$)3F1t1tt
zIveCCfHJRlua8Gg)wcYaGE35)8ELzY3#_V88Lyz)y;-&EJRRG{cN$u^JF2y3ojOOa
zvU@P|LaP(q8a>WnVzagP#E|W!B(co%ZG%Kj*&&26ic$7?b@vtfunD~73#uet0Cwl8
zg5jmWRw-<(Jy3a9NTF3{M7qlF6s9~(MdTKxe)jzI8qQ!F0JxB|$f=3??!BekE(C7e
zoNRKE{9F;)dFP()C-Q~u0X|wUxt5F@B|AYaHJ>e#;v|vzf#P0CU6x;fKij9>ET+y3
z#(DFL7#fijTeq|irH{UMp>gAFSL*w=nr~1+uY39gs5`?nr>Wka1i!vVwh-mrV=K<z
z+ynDT?J=^Ev~df2`H9y3tauaKp>gvCg;$d^u$DX2%=5O2tI18v@JESDJVM4Yp>2=7
z8olSuRIFF}1Vqna$2Ua=Yps#&?Hl!)%c%?~ke0x+pmG!F!gzL-E7B}AdOYxYz(zRq
z!jtENOnIkzry^4nzbZcuO6|kVgpN{T8g<s8bSLyp|G9LP=g%Y5&B0fib5(7@{<y_Y
z!M$r?bBfD~aL@L7T&Jh@xMEEp>{!|hAk$kApdblsQf=?MXWpAqlrq&ZuD`psgKkYG
zme*=fA>nIRBS>g@%!*1~`7tW>=%-(yoVz-H6B+!o?XS+Pk0(mmT2%YMBdL}5<SwlB
z&e%V<*c}s=7ysJs!M*0Y?6#m|FO)jRr!a{j*Hn(QzJHF|<Iw&&sdPO^9h$2+gZGOm
zl@no^2@1RpMJtRz&r)&cG0T<Z-ONXWU#<aG#aEf)TVo43R!hm=!G53Zknd6)DL1c(
z_}er(`erLF|LCB&>-)W=Lh?DPaxaV8>EqT4`7SqUPxIJ=Tb43l%h9Rna$`*7Yu>0w
zf7X#HZ&W2nD(={me?9yi)$52KNiHR~DIa-F$3InRwD@X9X<^LsEZZX}DPcmq62b1x
zZ%f_hGAdM7yJPn5@=&q2+x~}R$EdvVqp^~#)}qjIk3s?AD?;?Itb@HVWTk(ogOyrG
zkofxNSdHk)ll3+w%`^Oi&CT<hONXnE)Rgu^r;g{qeDobb-$&O`VqpvDunMNKY!#04
z={8Q+a%e~Q;z0`6PQmfFWx-Gt;Tz-LJR|j2`-N*Yjup-JiFjG<);1m$o(WDJZ+oTK
z2in{aWT~GDo)kpN^QfK-0EwMfJX?j<geo|`V1zi@?h-l?v~xu~J2Vq=Q}LZCGF(;(
zdtUUtLRR#+HLqy*e%_~Gc*$w&`(`+BcV?7<8bxno2g60R{scEU3QofYI@8zpp8I^|
z!}ip%cVt^mVeqoEDC4l<n`C7wgdkBUHh~os2cZ%G0Ce)k>&PdP)YI=qHP!V@l}HXe
z5%hmz|4MSZAuz~YZ>=HZv4SiAnZfJUOMZ_KCjK14e36_AD(%)k_Ab^b2~RH#tJ`U2
z@xKk&2a>5C6`1q<i^ve50y}>Wb4jx}Hw%(`Pz|?Dj;{0<Ox-qnt35plv*p(+yph;5
z%CA|NDZt!w9t=zzM_GL_j@(MjzGpuEi|M_Vx6#np$XAXBFz^u5M;I>R=ByEG$5o?L
z#WHHi!fGee_+IfXx{`xMT+Ud9|MGByqU*dv;uN<3xL$QdbGMNW!97|Zb4<)>2W_SO
zJgpk#-E<UU+$TVdPBbElq`k3LqGem?lq5?ev}G;zft+>46fB29%Hi+Irxv{9Z~?~1
zQO0{@+(HI*<CFEw2i&a_qmc(#qIDjzK>Va_dOxu!axGYNj}y`Q@a3knIWndbcJu%d
z6PoY6tfIX;elk#UI=^!m*{HH>_;2)%Z)_{+>iO~~;w>+FuwUeT*GoMamD5`>KaBVN
zwkuvepw=bv8)PPjgr-Z5UtC9im{u%bV?N4S<<^aZjB4ut$#Hr0R7pxD{jy@ydaDac
zIIA)|V;K|mYgd~HxH`gHVqduHE$b+cAO4r8mqa39C6v>vvoWTl8Y*k(fH#E_pYNo$
z{!Aj7fCy?A{xLk;N$x=`NAPD<T8Vvox*c{~Q2p<g=89nn-JRN_@9qp?LRTfMwD@ST
zjI^hJS@o>Ely6$@F`a2@r}3-zz6dJ#aZlGyXYacRbQX^$1e$yuOY?8vbD+yFySJNv
z10U@1k(Vm-0m|d<?(+xzkd<`Q;ImJPd*$w2o_1ahI5$k#<?ye+K^Tm-<RiPQ3QXW2
zD!SNxVe0DMdDvoB!Tua1Gh)O-aa?|A?0B2YYwL<;>k#M*BkZi4AB4U|G<ROPgtDjA
zBf`J?kA;Wg4D8QS?u~8&rC;M~qziI0m(iOSe?HoXrg5K47D%Ko|ELcSi+=E=Lt~{l
zl*zH|F%%Xa%&i!m*?p*f@X27=beILLgplj4dI=M4<U&V$99+wmKEt=2JmWuUm<>Zc
zW`ZkD;LZH*gCm>ZJdI;gtSV6hEu+wJC$QoK613TnLY*f;q9OfE@piGo>C`%TfF;!6
zbvTV**+e-9f_J2cLs-n}6$zQvB!J|Z7QY)5!xl2hb60E6rR1n0YFbB~$=quYg>$i+
z_6#+K-MLBZ*oB_Ztd_VGhy2{f(scAXouPK1dn+Y^I#udc<{`D<hLcY4;NZuo7jrr<
zMhFgrdoE`<OBBSA%sA=0`-NG@eb4)mZ*Z2qox!v_Jmr;ZlQ?xS%2Ivk_OeIQiq+3?
zwQ+cfz?ZFG2A5l5mISg@@orLDMRW$IRkt+Q*HPnsrNtMOQUtNPygUd-6kNM3F<RO?
zW;&UY#&EN&I7xUjZfxszv`H(mw1{MQaczstFupM8-YkiKqavg165E`gD1Xo=LDO3<
z6s|0WQLS$Ay74f6ca-l^n?@lfaI_@#w1fR#K^|T$q4@CZCY#`Iv`KPjwEo8PdT-<K
zvimMtt+!MTTp-5|=oL?Y?s9?Us5c6EVc()id)ZzG_jKt`LR9UK^C(0Vdt&6Ed$pc(
zw&>|9XRy<?qAS-8UWfjm^g#yKGUry;(6Ftdhj6v8c>&d#a(EhyoZ^{tOX}3O7A?c0
z95~*&3|=80Qm}6hD_O&2hkOjfNLxd1nu!v$rNoU-PRsL>K<z&dEC^5!EF?mP0A=19
z=6?ihPHxxQqtsdK9Bv&cIcz9CR1WLdRDX1~a?vRcOL^Tnp&hXNqr!WCzI}GEER9`z
zI0TD?gF|8_1jj-y2d-G(X{YMsN`s*b@aLV(&YP!g2Uq@`&qaxVBORJj=2i0{LX36y
zB!+F<db{?z@WgnB3H18Pt3MvdLvKj;-Wk@G?pX)n5lT2C;Y$o{d;4eHV$iT&VS2jH
zJ4aXGwl!OHPrQoK7-Mqny>8Us!;2NN0IbU=Mytl?7y6l-^!*1m`>%5OBrmTtL)=oz
z>oW4r56ajF_BX1RmqOG$&D!R9o`o$dtZNBubm9ZKgU1;m$BTVlp<6kSszcE<16h$_
z)w4EE2$#-a5_v~e4;%rMK6+w@g{(DcI_EOPmc0YtS;&nT2&jKAG>TLm3%BooS+=-~
zm;8OHQh|}_fHjXb@4%xC^2Dd66_>u9Z8EiOu4I`oT}l4J%8r~C06CExR=56kfI(U*
zq~ojuK_E=x^(b+-1MK)o0W*fy(0Q3#yeBg2&_Y|a_iL9Oh71+oRmQxH1Wl-0V-~iX
zc1K4QJC~lznG^iAicwjOJaB6~e0DGD{7VRfPEnf#{$P1Onj3Ib9?t}b(~+xGzQQ)n
zuR^Y9f6Yus7%tcD0gpGxoI}I7FW3|K91Mn)RBn%nG6dcGY-1MeyG4{uXQoGPD((p%
zls(JVV7a>gra1NK@piwwM(@U*41CeI5K^MKq`j9R530JVbgDe;T`7Jr32kW01)gb-
z^F~ROnAet(I~r>G`joRTjK0kbgNJFfZ2nt&^4s`e<g}tWr8%hlZ<aK=J?Sr=-A*P&
zblJE`QEn~`(U1K#3SlJITEsu*591V%GI`>J!728>48xhWYCQop50iMim_2KRIIeRj
z7iHzrmh~xa`A||8b!<(dsxVz$9g34qd-k0cSSZdFs5IRu2<0jFtKS-H)LfZ3`*U%-
z_axcHw6w{Qgx~$hw;geR<>fwp!f+G$F)`Bh++c7I<t&7u>Ii<V>G^Qh?ktvp|8(H8
zlz@88ie&<HQUwD5(Uw1JVsqE>ZJYXU83q-88A<~<rj^hRE|0})Sh+nnW|S46h&(jp
zxy1VMBzTLQqEZYs1|BP@ag>IxoD8K=g>i1w4;L%WbJRvdN)$R#eu;Q%p0Ro6c{h>E
zhvx7&iD9OX8qmLE0um86-=O12{=4RF%ju8vBXOnCQbcP#v)HT5Ojf=yWSNa(1{;`A
zwNwWQ^O-^n2E2qW-YSS%s9Vl))Y`A`&v7bI;kMmrO*Tdz7(ybZkoFyWqhZG}g$|u#
z#!>~*5HzCM&lA~5mXliQB4``&^5%!pXSIPPrTxxW$PV<y(1ZNjMAvV)s^e7T`1rvk
zG2yW$r+LY42fFiTRbXLE6(&#(ynmscyFK1oud-POo`}4vQ^(a^#s-|Pyr76RLwfV-
zbj`lWq(9;k%VWv!)CNwzMtRH46kX9OBcH)KXnrYgYY4gL8v+`5D6d}RcE6B(wm$j2
zS}8|dD~fB~sppTO$?nFR-(kwqONB;DsrpS}vA_PEm9ibQm7B4c&W=sI&ml@U4jz}|
zpM`CWj-cw>cJm`bOkD~hf`7u8ShWr=74ozQc6G)hzdPg{9s0+2J2XDpqHA}9BhR#R
z&i*Ua!t3AJZt9J88li$!5VsI_5HC4$A8R`xRf6)$Abn>yP21pUd+;Lf!wr{2tn90>
zHIAwg;wK_8{g{|1LtL)1^9nVe-Jd($WEw;hSj*2-rfFgZLzn+xlJOU+Jln|a*1gqE
z{-dxNB7S5@yUUfuj0|u=Z<W(pPO-0WudCzSTJ6srj>PJ4N!E}T@nf6sC|WO77HV{+
zFY=TYK80iIKwt@Z?J|MG&Ji#>xKKaFC4?<ue4`R&Y^lDS`?VB)v2CBvSYsuLQPkm!
zuo{bg^D(SfwsgO?4*h*O{ow7vWd|NzXZ|#U1Z5qA&j}>W!w84Z<FsGF?G~px(xXo6
zdJ65@_M(#1dh^|C$4c|l86fyZC=>KxK{M8;>}XEfbN6t^c_1feogsE4E70bNb~LKq
z;?mPeLkrh|ABC*1950v-x|wIyBY=uB&Y>IWyu@=Dav;E>Jor=<5|HhahgCt0x;1DP
zm|J;mosEr#!ca<hs|gdOyee>6fdLP&v(?E1^zQ9+mZlb7DO&d0M>BJ5WBDjhg2-Vo
zLL`1UhU{F4BH<y~yA_d`>P1U}d6SK6i!9$`$XVosPRtg@Ok*~pWOwjq($(3Tci3FH
zfE+O^f{E(1!t)<Y+@W3yfTMJy0sxY=`%wwrRMAVI{SA!;Vmhc$Pf`FZ@JEH!=&E9?
zlWhG(L60}gC%2B~SM<E!xJmP`r1l_a(cpBQ&_LB}4971gmvhmfkAh)o;f?oF05Z!)
zC$iS`S%#k{l#Q{G{J(qh(V5}nZ3oil4s)6zcf3>Vb2)P*W+hVsp!|#<{2>wle0;Hy
z3*}DBaQlf`O!aKYUXf=A)MW^B%J=xZHDWV~Fo7*8Dm4jRMCSymbp&SrRSb?mV1uRb
z0nsG52^^h`2@9x;bQ#XV${<9_4KcV2DJ0~Ws4+sUg1UIOpPf!*g=^-3T#GmAxF5qN
z!)7bEg0MT6wXxzlvG{Ed%*2(p>5$?p$glBO2lzDr0AOM}VtSw=OxdSXwA=yJOlS7o
zIMzPcxN{&#_owM-Z^C&z@bXsMC9>%*5GA*{Hq%5Hzp!`$Esc^Q;*8tShmLQMH-K+N
zSG?_^rLSZdxQP3<3-m2b3_#|Wd2S!I&CeADj@8z;*Tcff+iK`4mfyjx{H{Lw{PV!i
z9RJJp`f%!uoCX%;e!$g*5FJ4aoLJN_<nfc!@Rw+u%+$}Vbg<v}{_)a@6|-HT_X^5W
zx_7a+J@ZFii;LG2!ZyL0AVG|+$E64FmEnLYZs4dh<*1Go-Vdjr>Q0Wm+Zx@YNM8})
zn)ut<4#U;&<COF5hlvdPlwatg1Izmr`+Q%yu>m8!eNc*!1;1UWi&lDrzPjzE&a)XL
z^h3Fwj}=|;z@Yt`kYJUQwv^e&yvwC8CI3SDBTm8`@lRITjb8_F$sM%S9I0rGxQv`j
zGDa3w;o3i7xB`?0wjv_hN0_A|fJ<1JHQv|9(DxOK!EJSInP-o=&x@ivtqDMcTqnxz
zf19BD?4}e7b9bQzo({q#XWq$aZkOE%k8H{)mkO1-lA?@S0EV_kb76|UYWgklARN%q
zR`-ke9GjI2W5d=KIRj>q`Ji*E9{Lu$+Sr-!NAr;E1MrM<fe`FH(SGPp%h^_9yLmvZ
z5pi>3q^hmCCCmNH83t>LKwfWgxkCWsmLYsFE?exk(!r^d)b^?|uz~yF4HIXx5hA1#
z=h|Gs71=5?63g6+jF85?C)M|u|F@_R7<*3}CN`h1A8)w7QiJctEU&=mNd2d<w!Jy5
zCBZl#>}pvkTaGU=Z_6$;_6_`RGPdqcI4Dzdk-4x`4M~K;%s*KF_g4S+_`enS|E@qB
bDb!L6Z5f5lOR4()_lidk%<fm}KTY_5YYJtk

literal 131
zcmWN?OA^8$3;@tQr|1PN<r4_K4NZt;Ms3Gn7oJ|<=3VWr{di6L7^kk*-ut*cMCbqe
zXPfDGT0I-t%bBC|q<i&{R?jIl*%HJ|!g=vUn6U~FRsgINHoy?1AUBE5QN^XDk{o6x
Oz-+XC+vH=6!I>X5Q79b%

diff --git a/kramv/Assets.xcassets/AppIcon.appiconset/Icon-128.png b/kramv/Assets.xcassets/AppIcon.appiconset/Icon-128.png
index edb6e551fab0d9d477bc0b4910616d5ebcb4fe03..b8f2f369a05a52f7047e78c258bd4cc7dce9a086 100644
GIT binary patch
literal 12126
zcmaJ{^K&Fzu$@e7Z*1GPZEx&NHrB-UCYz0I+cv%pH@30K#G06I-yiT^)$QAL`=_qz
zuIh99oEUXgIaDM<Bme+_svs||`OgRcuOYzwyN5;1C;vG_7kLAB000^Le+>$dm4p8;
z2<5IRCkd#VCO-N1fU%WOkpKW16OiA`VF3VEh=R0)wl~y8fL#sEsONTN-{aP6n-65W
zCT9*sfEaCMkM<9WOw14YaZH#=NegOiVthm(g*vY^+U*PmFxK_sNUdfJ0R>93fWGm!
z=E!Ul^CXs?7ImwOJQ`?P(oNbeGUxE(thc?}sJu#bsq(b%%5w*lZ-#&M%el)L<mt&$
zKK(phskH>iZc+IE50R9nVj&tZ;jTQ8&h&Xg4@)>9AtP=2U`LoNFEa4JG$+!u*wTX~
zW@j*H)SAItR9>n7M7S;?;Alm)e>&YF;{J89u?%eu8eNz?((9u;X0Sw*V=<@WtGJ^P
z4gP2w9?%NT!YF!uxoE1X23BD{*i$17>O-%ZNBh-Qq+KS>Q`_LQn)=&&>}krxgFvug
zjYru`0N5~J2=sRh(`SDK5i#;v2wm)rJ+Hb&a;QEekOW1%nC|(``Lo_psO`L;MDt1R
z{%FjxYN;NRvJ(z_JoK~*ZeitJqu}rCV^d8nVL-ukrqlC(C4yU`$sHh^-gHwYN@k^6
zUiH)5&Rd|eEvH}Of{AyfZnYRkr^iu!kjoo#vcB-tI&}!DWhAOOU_)U)QH~T-P54H-
z>G3p*Ox|AVlLE8#b*a~N$LG%1P6)4Phf~jJ0dt;HeE1zrv(q-<<D=`-abErp;HLTc
z5fXTPIN)J7k;xI5mM$SJr`98CxBh(O_IexC>Gsjzd;fHhP(I{xAIUDULjdq3_@W>4
zxIS{pMnX*RGD}vy^9bR7p4*5YKREl~<H@)(@_#%Hqq)Rq5Mt~+ouZD(DEn#%dASDR
za5eh0o8z$Q#8Q7vE8Q_N>br*5B71w})A6%Wl(Cw%J!OJ60|$LT^3_ZN?w8g{<oX{<
zeL3%}D?PiE3O{)S-2vaYGZItS(Bc~?Ps@NsScCRD+Nd><N2~%rU(+=IEEJkxO{h2B
z4ALT73%h}{<aC^>-j6fnnZO%#r_RH)yQiCj6Z;Fz)R*6Y#pmSY$MX*R9plmD5--!u
z&KILx>m;sVvQFV#Q7U7U8iEU+Vi1wQ(M^A)+0hqsgdmOgYViVD(=k9#T-$-gTjG`S
z<ZNY?T5Vo;o~_ePC8=QscT%fDoS1U&JRM&8&Zpk}`Dc!f@-K;eZAsue#m#OQI-}L+
zY@x{O)kmk~<>Q_V>&IqibYQmWLzY;1*&^zo7F_)Pe-(Dp1SNAT0cj5-MpKm_UD{;5
zH&MU%UMA3t&3XR);mt{@rF{b5&@JR~U*v5ViifiWEH}C4Y?^S|YHDLivaF{6Yd!74
zRCtki`%y}#viimXp^d+s=q_FtZ8=04h9E{4anbK)?0}LJ{jP%=;03=^Ie<IFm)p*e
zKqB8Xz0FpuqYmlM)Q*iJS(X5gPNHs2s9aDwyh|c}%b?OmCH-{@uG<#$-Hlqiy$X~?
z?N~T9=`wM5?L{dN`gH$%GK^|4LsBtTi8#t#BS9I**hf50nl8ti)8ZuFZ`0y(IUImf
zj=O@@k^M!)2OV~1Wxcq_QG9tCDo|3dM4fwU7aNF-@T%Tq8N#aNDBO5(Z93)kv9}eA
z$Xc6@=&K90JRCtoyvo`1ncxF9+`hMK2KpH0&`ATy$ms~e`>oZkJGmVt*6k|u8Su-M
zRD^6Xa`puH?nB?G;O1!-2O8=igTmpb-sk>SvC6&h68Te_`ag7SZA=1Rw?q9t$lvpm
znqEO<uMe4Aws6v}|0!zCpcmIZ{&5a;-_3u&RQqtJJbi?t3c=Hz4BBqoOxlZGE=MGg
z@YT4;<G5naozu)U2bkB^ogbIji0h@;ejF<iRbeXJhh-4xh~P@?37vMS=1wQsApPpu
ztciEnlE*rn#9r1D<iS7-S3as0qshJOeeJ9N9F>YfTdMWzq&r2F-#r%dz55n0x*ZfS
zJnh)td#=H|HPbEjw2`y+mo-bf2g<T_aFCRKB&RBptO8x?&53Z4bw#Jgw*-h)N@+yO
zAvW?XcO`BQ`qCOst}q;z)F9&K6OpHKsWRm!LD4|Wd^T;==dz!dnkp7BEEp$=#w;P?
ziWZxTjK_-Nu;DwLPIItId=32Jav~-<gm#~$$__3`kYW};h4JKg`Ap4M=0cCPIgzb)
zCUzEWK#NeXx|S{5L5qR*PH~=`f|PUD=&EbwnpUH(x-s%lSl;hKLIL&O$xAB&Nl3bH
z;Hwixr>(x%hfkl=#Y(uW-C1azGywO@=x7l5sQO{copY>)X%#tU<C@|t>ooGc5Rmh&
z_*_a(T{NM!$AuSf<5?m7^)T9x=Eg2XRsOp`RcMklw&LIB=y!S<Zn2<`=7d0S{+oQR
zSAoSIlPju#pUd6VDkO1-@4tOow3YnJN+%!B&MSx}V8xR=R?i89NA}}3f#Jveh;{1R
z6=l+F>!z7t-&)P!w4+uRT7x31G^KC-Bl4KIG43Hde-C7*8rqFWg(tmC5ha-9^(fn!
z*6-nM`KK@ElMvx<2i!e5yokedsG=8RVMk-{*ut)AsKg#@UGJ}u*pD~A*ji-8BpGXS
zA9?yQwRV{B1HC3ctjH)qm&eA(y*f)Sm;1iSStmA}^aK(b@edO;BSo*RmBO^eD*?Bw
zs|)pV1%b(7EgCUER>02FZ$5jn=P_shBBp$W6`7A5pLI0SjEHx#EbZuCec_@yH6aER
zZe6^5yYk%0gWFUn{^xmK=-&g6`73*>pwKv3c6fU;Em!!2@iAhlS%TBMT1yuGmpvtR
z?!~zkr+!bwvNG7IDSDTRMHrx}DSL4695q0Cf7=@+q<0~0vbEMr+i^?eCJ~*C-({B)
zpGQ5biapr-Fja5l{M<o`_Rp(2+r}hwOkCagW+w(q#Y(!l895sv&nz|p3U8x+^~Rdx
zy=jh|3iH6*CRSYBxoi|7o3A!+66KK(1NPY2hm(EHz*jwcq~_Ij5R_A&t70mX7Hv^~
z&^cA}^++<HcA4EuAcodq%TO@kI$&#ES5~eRF0$=od1`73o6#AD{JRhXvyM5y=VYFS
zq-)XP=a&7Qo51X7Wf`r?sV8K#a?9!LyKi_(2|s!SCQk-uUoeI`Zgxu9flrjT-5>#9
z3BOp5X4gq1Y{%X#3Vv;t^dXG*ZXb>J+qi^IUzd$=3vuWgYpPs(MG@!4CjP1~{wrK@
zO^O17wJmbB8K4lK{KP}hGvYV;87276)_Kvs6~}Us(KC>5B0(qfd(SL{R&TuW$>jW~
z_pr=v<9W-@TYtz{t_-)$U^#a|K`&#?ZqianMgQA{j;$-cIJum3Q;@H37cU<lT>%iI
zW^td1iHRO+@mlfski{>43skBf!I{hx&9`G$s$a<0bWwZ!n-Ih~j-sLT&Nt`*NeB;@
z{b1-0n@{GH!dxNFKg{${E-mG`hZW2=;tDKUPbgI3`Y_&WQ~6m}tDr3wbd?-_SISB(
zCUpfMGaDNnX!FKTPY63=RWDMj*raBSsqZJ0IT#%U<*pLqiJ1#M+CRgK<&Kdk$3f4P
zUF&f|lZ=HjmNl)mX9+S6Z)9PqkX#`<9awrT_pmLNst$wT4Gx4<oVQJdSnr<!m5q&g
zT%qB1w3}cQ+k2V-7$q?**emCtv6O8V@@qf8dQ)z-%kO2Lfk8q$zp%4wupQ@&GDC0O
zaa64)FGMnSBch5fu$f$TMt4B+{ft{ahDrv=3th}SA_uDvR|SbI<oNoWO}v|c#gn*L
ze3C8DpUbUo?708Jle8=E7r1dRT8w{e_17&R=Qlyt9*7;}?$plxELa(i*6*!lf}`Ve
z8Aob~wn#O|zT?%s=i)~D2^JB&HYS)X$_I(uH72kQiaY*J6P&ZB$T_lMwJpn|PaE7u
zoL|ZgGj#p6Z^ojeLqJa8q<YS^Q2Oy=e@;u8%_}1$rcxN2$EY;lht4oLh0}Pl%8N0f
zdcMe-e<ah+{T0=N#8pJ3T<ACNTP>g3@LslU0a>`y&(|x%a9K-<?jl!TAD()1^%(@5
zECKXF8z%VnBAV-2DfCpQ&3PO0NYL3@84US#jl%n7^pifdfDCc=(Ny~f=9jYKFp*^G
zRYXFq?Wq@Vtz_$p)|RiY1w(TP^-xwMzE-doO~FYQQ0_7(+z}gLeHlrbMUCnk0daOj
zdA>r6PuyW0)AzCV%ya~taVw_?QNRvartZSGIw^&!A;1$HEW0KxkNWdy1aRyh)DhAt
zYBM=Ag?5vy45cv5<ksk0rCmK~IS!+>>A*{Pg_34LjII6~mDff}Y(FdlQX4!*;M7nF
zY5S>b{H|$=Z8*o75eioO$#4a($de!HfZnMq6DFwS*@eJSc#kZpBqt{~xPzzX=^K)8
z8yq)y@#SU3{;IGr#fIZ55jV)A_lPHm^DxSM3X%0j9m@@%RW8NB&w2FBPJcQ0QnwZ=
zQ(?mQ?FcHBp=YU?2cK+aH~CS`ZY1Fga^H^<JWk29dphOMb!e=*ekWZn;x9<WWhx91
z5*Bf|!^0u|O1|F>2|vli`Z&lj?vV?>6J)50>dwHcH6>MgEHZ2rSg?hYBwT&xCbU6=
z3KMmt$Iqmd^lm_^Q4VbTgZ2D;F%t%Vw1@p@qG_gPe`L>_{StjO;)_AGdlfjV!q;YY
zl}CX5VYv%mgJzPORoCE&#m~Ua4u@Nb1FerorL3ZoIA16<b<U~1%dlcrqx=1#_N<Yj
zgFwMzO&);EAHgT}%%9X#gA(o5KM=n%gBvR156?+dcA;-8mIjqsU!~L&ec9nCqmXUK
zJ~{lAmY-JfAk^%<vpPATUlXdUenOK0MI5!&^Gug`GE`jAyfo~Z_@_S2T^KaycbV%*
zbO{=GSs!w1MM?ug7dw|0vOXVOl|9zFuW&;EV)Q%Cbj;*^7Hd-0;T>V)sO5=*82K+#
zVYu7{&n&^2tFgaFN0~D9N|pTn{hs2Wt>L1r;P;65ERgz9Pymd$ENVRvOAVABqomv@
z&=l^^Tn8VGnV`2~z2$R%Rbuo4t7CXpd;+MB*VRyaF3Zp_UkLHB-CJ!gIE2X5V;LK(
zvm6TAH>l!EP-1DBr`Ed(J5&=f<f9nFQGcK+=k5gD%KsfmE_podkL&bQ$sBEK**gH4
zYHWA%YSR?^+IK0B!HgB0S%>=e7N7cRgh|FCJh)>Gh?@mAP*gqx=#@(Uh-X)llhX&Q
z6-HX@+_kqPlLoGN)JqHE5#bXNW&()148Pij>N}fNoO#mJ95R}HMt#|98-{#+dsL*3
ziAGc>!M8Q=Oa$#oA_u-b#c{&RnN5CikXow5cc!;IiNy=AE-nm*vpbp4z8|Xqd6u|V
zx#Vis<VRCIxBlEeRV~j#Cy~SX*-~0cQ|@=erpuS>WvE2b0%t`|EbMy(j;hNIA*#!V
zcj)lCAH3T6ein9UU}Ut2Qbjw>DA2>6u(G2cng8h~!7+!Al5oCwhOSI>G)el=mZ5zA
zV+h~^&IJ|iz)h&1U@=-TaA+OsW3I>B55hk_--KEWp50SKXO=iYOEx4<V8k<uxDJg3
zCVtefaKz6pi+8lKS*a0Uv~LT;zlyem_P^{e7QSGxlMoXxCIKc*B2D-^0c?Gny4cD>
zI^Zj}L5dd2@phlN0xpm;xpHeOl#Xx+u=59f&IUxU)*-@$Cqd1i)Z?!=N8M)4V)Q(#
z@#qgPEA|0F;iSQTqcNpVZU8QTE=gT}tbw|>Yi=SooGwTZ8oX^ce0|OOAOA+`aLsQ`
z_8(ORA_qcE`&D~Qfq)j^{ttoPdPL78xS$xd(F(<rikk_qzUh3UdDygpYnQ6cuH-HV
zUs6NOzXkwoH6YDz`XlXp4pR5{nH=-~9xS}bGvJ?1W1jCqkXEYTZSpplq(*uJP*SM<
zum#dR*ZN6@h%K5reNlI7)cFZp0pUovrOb+>%>1#w)!2&)BFQzuTdkHHgIRs{kha<N
zux^45x<9bf|LQa}cFbBz2!A1qTN7|T67d_@!7t10zX^-+1jpq(TO4m33)O)AlT-xB
zb|z0JObB-nh;f+?ce2@w0)p<eu60yDN_ASqhO3Yr;!AoF@^(RV>b9Qt#Sp4HFR2$O
zzW-iS{%WHIC|6}0nh6Y97_ej)>>{y`PBjYW^m$?aodjF8Fg@tQwaF;R1N&1~+yD@*
zT~$U6)e=K7#hnT<X#Si!o~i#h#FVtQ!u46b)^|M32*!-Z2sTWO0X>xw<l+e=UMEl{
zpYU-K?8OJF-2Fn@t=jDbgLlWvzbv*QvOJtYOF7MYk6Z5p1?H<xtX)gs$iIPX;i=cL
z5duK4RE`qG+Wkgf8AhlAgr9kwWJrunU|k;V?TQQ(WT5X+M137(PhMq2E)03eu$IOE
z{Ls<K;=NeuKqe-Fx7!in9Dxl@ac5v;BnY1@279jzDD3_Feffg9CFPfK0J7$5M^1Ac
zwazUxIfzCPxC+3zhC(uzC^#<v_1_~Er9uda`m(ka`q^+a(C;zOAC3^3X_>mFj?50O
zp$i8Ik-*-jt@iG{L~wbjq92Ly4+j@G;ctz=Nx83`5<W%lDaxY;%nW~rO#HQdhM~Q6
zMG`@D{bHzs+oUOf!+sd9qun>b>K|uK(G>@N3-5q#5Y6@Odvg`Eke3%bHO_ycW;$@>
z&11cBxoTVqy|Zura66w6rCED)2BILz_`vgm)iC%jyQYEf4DNkAEGqbIh|I9BtC5O;
zT^UIa#`ro)+F4=?M|~i>Dj?deIvN;kkl5zzJYXxjrM>?pJ&(uzIZ1KPf;2`j_lU@X
zT;9f5N8$QGfTy|K;f<R_SN@Z+bToTlC{aaZusCf5C&h5F>>QAYjQ|{nI&YxB3ckx2
z7j7;xJu1^Pb}8PX2^($Gn3X5^I_WbXPl&2fAvkq}wU&hyp>{FvOnrN%$AA~wi#@~k
zEkKM@unxpEC{KnbEOlWEkEI#?*qg#0(rUA@t9;dvrcZIUv9hfFxdCDVAQ}%U_8=e#
ztL&Bh`Y{;R05Lx0fdO!QP24g{+gB}O_{&(_PCpjb<7<6<;jVQ3hWrER*ila~D1Bna
zqwj=&e0@;Rr-vDR3Y=lkl0RUe8OX)@ryw$O`AZ|FjvGv-CZdV|ZkJ-cm&Vv~*l>q%
zb$Wtel^!-TO2PGt@7&IoN04y0zO>&X3ZrL<a=%0On)-xr1%Xynt$_pOg=hIJYS|<<
zlvAzj5_y;`d&?cGgEvKL5hGj$wQX-;qtWpIscUI1#=-oqAk=SE%5w)+0bd;&=|-mX
z3=0-XojY{Vh-Br-0>@_k5v2z@Z=rP*eIh~ii!2rC6$4U{Cn!c4m_3eSmU9A}RFlw7
z=yn?^*%Q?>s9^cVom_x%y)^02(^mpQ0!;gDbD4U<dFf@0Yy3QKqgA25tcdFiRX>;~
zku_KFmMT5Yeo@i8I&+tTwtEMbzWa#m1~ToN9Z-wM5xw-@)i7SH)9gMBBRjE4*pu8o
zxXxW^2a_yqPAYbpB$J2#w?V9Epf96MNMN2r5Go#4p;Gg;L6(J!!i+})q8lPxivqJA
zip7`^Q+FV5QVofnA6Z$RC|Blo&J^xmf<JkYI!4Zj;BrQlG<NnVttn}-A-0P?4q1nl
z8lR&4lABDj<ee`{RK#bbGh^k28X0FHMGq*y(gpA&WucCkw&<XG=fSaE7=V>nr^)9*
zK?&bjQazv2P}<^lWi`BpqReT_8B0H5&s7VI$C>1>4iUqxF@is4V9j7TNX=|)eoyfu
zkinj76V0-2^m*3>XCY!G+oTfk-HxPYCNkZw|2LDm<!RN*?V2ZJjqMv?{wNFwFVR^h
zbj_#6GIV=ez_T{+SjB!(v_hHGPpv?%dr5e_foYwM?Y-zt*+Qr8Ag|r<-h`VBHqna+
z43ywxvDyKQe_8OA*o~~=|4FS|PkTE1OS=HWmO6cCP&Ls1gHWsr_?`S0x{SEGXL%Fw
zn8~WNVt!rReazzb-R^WmXy%VX@eS`eGSC;FE8o&0`4STnZFy{Lpi{5)nv;)`c;;nO
zr3W0+drnBFS}cV=+P<`785lTb!6c=_5nM1mJqip)goWPK3;mfcplkXbpu^C%(AZoh
zOYR=~j$l1t$47bMF?<h1GZCtX;`;L`bOs>*reP1xD0zj<ha|}Jd76eY!iHy%VPsUH
z9fYhcHGdlspuIl-Dt~qZ)D*Df3C`;=$q3dZcCY%9mqvH%LE6a`;&=cNC$jE-yU-ZH
z^M^Vg$?gZ!#Z5(2B}?cKOR)acN0<rmZy}n<*djdT_<rma0RIPCo!FSoYO~hv7hUZs
zi;~RQ+wy@9_4we{7-);8a7`cPioxn&{aJTvPTt$@QJlMs@)0VyH^Ij-g9$$*qJ?a1
zyqiwsO0dqW*cg}(G*;rh?<$InCd^7;?vd5%s#d`7S#eAR`}V}jBw6B!M34<57M0Sw
zF4oLfNke%R#RoXchgzbk@M8H%)d0Q{BRP3CkZsKuy{Qi?A^2~<y)a6XEHa^q7fl~Y
zD+h<q2kYXQM?q3$D>yCSMld&(T#zuA7^XqlzzERhvSzX@(_rvzE@2zgh2F@OKrn(V
zumGu|iMQc)U;V$y^Vb}!HU)4I&fo_RMGJ#e%Vqr-x@kmLEm;ZAhcJE0wAd4Q&T%1N
z?w&lTiMmVS*BKhE3j|f^g$kQkP%0QA#WhPd>?Oln-NoB5Zv=2lU&I3BR3z2Kr4bDH
zAJB?7v5Ig=gNih;&TdVjbuZGO($qw_5EI^?X}k}4xO^-hh`)YD>h>ITSGH|9={&q<
zz(&L&=45AJDY6k*W>z)+Tpd^w7s3lRN2G>8!%XN^oZ^F6t4gfe>;;a_zi64J%6lYd
zBp?=h_P^&suB%R;=H^7MUx=i9=VWiTp_iiT9llnRK5kJJ2(Y6&?a(klyCx|8=NVU=
zEuJ)}aVijc>Zk(?V}I$EC--lcr$4I@!oSLqbN@9vcK>8Ws^rtI7Ro&M<iz@E;i?aM
zK`Zli@nSYnNE2;%CQuZpq;&Skh*iLhof6tE_Ds%8<)ls#!qbwRCvZ&?0~H=kWCL43
z5Um@WRSkiYpSD&2twxt~tLmi`FX=AdDUKk}<FcAK3_p?%f7`;u-|%hyxs1!3WoPFD
z2z<GFkrQ(?(OLEtpWI!Y`?;-}@T>nd;|on}4jVB+0dP>Fi=sIs8fww%7uS)bmQvT6
zL@vW+cy8Ot9$-C%hWMR7^o0hNzxVF8X8ti<C$asQ|NU!#x+$1q%^g|l`fdC^q`xYp
zL|*TznnAt80mRTsw$ZjDj$orj^8@#n`NjhYba=LOOpb<J*oGl|_*=;PzFG=ZUxS+n
ztFfw@h`Y?Yid3@zniN$xMJ3PHxe=gY@j0^@)%(E1Q%8KDUJpEX(Y}O3#Z1twhWDQn
zmInDdwD!{QM1dl?x>E2|jRBRjDFEgk5uWa&>k&<tKF3kI<X-iheAVx6hLygd9c6e}
zC}nyFttI@AKwXC1bVVbKYG1|Tf2Frqg{pM*HuclRRAj1V7$Ol~uV0H5RJyYo*q85<
z%6>J+8xly9vq!YeCenEUC1Dy-mdSo)T8;*4V6WCL7Tq*$iz-KL)Q)6JwKAQbaPqB0
zj`n@_O_bNI{N*7u93cOGK)Sro197ltotGHk62rTWd0khO!vm{jn?j7Isub%wRDL;&
zy5@(Iy}4-OTA*m!@%DI`OUmOwSqO!|$>XA^+xhTwN}$4(Y)mfq8R3YPEM&BNe66KL
z?k8v=#3aQhBy{T#$Lo_O)vK!+`21}C;p;hl`~|!C>Mtf#&B*T!Avz)WT1k#=^0#w5
zME~0mG;TO)37tO=Jl-Rm`$d{C!8%6kPYy<8C<9%Oc_?}QNVytPDf3F&P|98V56iOa
z<Z$$pFr^#-1ps6CCmy-WGv`YEtWuWH$GRX;<Xb%;Yl{gQ#njga?q|?r4<oq3Br8Ix
zQEG-7-LAMbJ~R9Dh?{Rky`Z6^bIdZ@y12gJj*1^v?rvKI`g%`D@P!E8gz-730|~C$
ziPy^d7ZZ*+;L-vHsZN|WVgrm;&|p4^mFcoLPz`HADdJ~=6j(Jtdf8blWJ>jpQ3mJP
z;LF{CmZJlhey^$|mf7>$|HJh9jEe(I=G)Q_lY&pR{xwfKUl!iOqX0{RaNdn8?gvvy
zHo#)`H+OuRhU@iOZ4}`bjy|2a<+Vm6&Y3=ppP0`ZrSRhr5QV8AO3<`JCEaSa&<O4C
zC#n|Kk<ZN`oWGd_MHLm+*6Q=w7Gs)L55CSok?&|u1b`D*V>M_+bO2B88Yxl8081sL
z!vKAb!ym;=VehGxaYmgrMKLSAE%G-I#xFtCi*&kgZEmA>Z%El(tei>0Ef+Q<uU!n<
z+0(&UqjwR*`}8Es9$Cy$8LMFD41->b+c>C_I_pEcbn5ocw@ivgA{nPKBY*_}1-V`W
zhw2fK<O6J)6<<1oy`MIp#iLXfR}Kby_qbVOmX&FWoM!*w7`xE^ZL4Ple%(grZ9~KV
zy}o<wtcO<TB^1*CVW(w)Tba#rZY$0hpqPOFvZR-eBjp@cX7P=&J!P)We7<IVy&;<1
zh$QX?K(f|GHg37DiNK2|9{)$CyhV@^cDd?|me><!;JP9aWWgJQ5~f81U4Db{_*WYb
zf^Z7vf?y&!SiCI~6-7kAsuyeC^|(s&75RisPWTQ^f9@8A5AGRvK}e*MTd}%#%)=ze
zImOpa@=DVMjTtO&F69!)q!B${fk(d4@V+fP->Mi^>YjN+p0Bk=eHJ^aVht#AY*Kv#
zNk27v*CR#GvR3PTtzP`3uY%ZN*m{8hG*(Ai<#;J4z7h%<WpE9Depl>h%Hk!Mv-&9<
z+k&@itAG=P2QMO8T3Qcs1^5BL?`bfPjSYX9<YzgWP>3W(5Dc=q0+8$#hP|BZ(^Y)j
z7nu#4HKrcE9}L(Ri}4iWb;%EldkLgH|H8Rgrbw}EL<{?qW&&(88JZS*-WL{r-K^0y
zrz*y-GC_ri_D*&>J4;RV+n?$%uj>{v`oVO9U?;SJL<SD4@Mg2{5xcw<xzma}GIWU_
z+{8tJ-Dn9@>gQ9^rjT{U#v+qdwg|iQD%N&`Px^?M?-*}CcDkJUOz3e<yc_GzJuCHK
z9u<H5=umnpaTBury7L|Iz5nYM<*q~e^DxJawsF+wZ#c2IZ{&$tRXKkI{FNCXW<OFB
zwV5vJcM<}gVq?nGBF;SiU|_ZIuaNyzzL2BTf;ft|)X%Zhw7bkPO5y8-PL)WDJfi;%
zE_vBfIJ-Weo3+%pyB5gGbsYZmyAw9*$~o?US$R(S&Z5x82;3r#V`Ee8{b7nK6!_+S
z0!p_5{AFD7u~%pLGh&kuR^R&%a~R=L7!bbpk2=S&oteJRl75$9fVj$^>r=6ub~P)w
zWstP5K_q@5S7Q8z;{7u}AMCa!Y3&QCHF!!saF(hTK`1WnFu2pDN~HeI)u?Ekdl8#*
z!qzQvH!G!{rpPt0HyqW9%P!=1xy8O8m5Yqnd1%l{c7&vI|B#Egn&jyoH_0%ZFN}(8
zdX>lKq2lh-gh-diW}0s*>_-rIjfaQxjbueAVr5Mjw!EYG-6gDmnXCD#r?}*MWB@Ep
zB{y8dPVt}Lqhc9H77W4En*4|iML2L+;i*)Qc48vmUUD^00Scyykow(%m}zgEX5^-^
zm054G+Z|GNK1ap-x5@>c41Be{-PUSH%mi#<vWgaW-{NmgS2S9cJ;~2t(AJs0Qu62F
zInn21Zet^d-D?$&j{SYF-{a89!cM-*!nB_RSm+#2wMjj!Gn`WlZ?-;xrL)Lom%$FR
z6z_m~8L@am8E$572swZGaO&ee<&3ygHqWoHjJa%IB6^wx=h=3em1(v^&+~(K;Off)
zIp4FZ07k@!f_r@W*dk4~@z<}4sW7YjaQs0Z!hKviT>e)_+R4t~-L6skrfRxh7T_1m
zSMWU_8_en;Yt9ZMJv$=6Y#{cv(>qYZ_U}*&#}P&CV6qx=hAF}sWc!(B>se`%XX`os
zNY~P7op^{{$Tt^d=Q{@;e6Y7V`NMr;Vxn@Zb!nnD&8-Hhpa*@o3ls^?f-?%L^CvyJ
zJVsB$mb~1!l`*vBx2*Ol;UEohgun#)CDXl2?{e%le_nMNX!RAB%--B5k3>1M)-HAU
z3>Nnx!1A+CrT>%T(<1cm6rtyR9AOA#l3krFqA>B|w?XP!ChmBDy;Z0K=Q>_lh00A=
zxAR0}4<}@OXee~E**3xdX)j7iDMYcb-kz1b+xy1iyX(ymx}2=n`;S$^>ui%`(LYjI
z%)`0OuE6q+keiSB`StE_kmlH7qE|{xC5XMT%oIglT3>bCALgyE6EWftJ-QR_m`E(A
zzPt=hd$e5#tUQcNS3X5&9!dMgm*T$d!TU-_u1G~WKw{$DS{7J#7xX%n$z_o06Elwz
zob&!qBcZs&k^8vgcY8=8<eaZlPvG7VaVAm5r$Y4t><v_`kTr;0o*<TYQkMG@@747R
z*HB%JimUfosB$HkIZ<%SE<v^%jJZ<!AZyeNs0hW8$P~thEX!an9(LZm<OwFz$7CKm
zL+`E(%%0z=H9bl*CPAyNGwMnYB$Tk41VQdhMT_r(UV@)H^z4kkDgY~{npaHxSOU&!
z>FPH<Ci~y4dpT|yOzF**ZiA8V@k_L!<1wwF;=0b@V&zBSj|5GW?!k-qw39EQCX>wG
zTst(o9R}!T-Jxm8iRaH0B%riz`vnMUmeln`-8j40yIAIMhos^?qo9{Z8s+BoS{Q$5
z)x0Gxq7xqb_()<kvUhxI6X$#H_&By+3e2$?*m5(HzSGgc4z2(NotQsZ%eQvCa;2~F
z`{SJ2;nA97;qc7?vJx$dpal}V#!%3T$7)+%@LXR2*K4_6eqp-WC-1~8e7SGtfq2g~
zE5eES4?7QqIH?z*;D&SZOnYn$7w-CM)udDW=&U>eZdlB?rIKv2M)PmH+xbm*12gH#
z;`Y-zh4Xy@m-Qd&@gJ!yhHW0(L7(r++N)e0O*ggpfH}ppzwV|1RSRGJbsK0K8dkZJ
z?!W`R-?TXdWA?i2pswTDw)h|}XcFt5QM87<hfD@0l#JMb5o804maCQBFhA!Woa{_s
z9z&=g2&H*qM4pa76gx#?KBv!WO-a{=U|C6!0Q1eM)rM3A^w8Lj^J1#;B8H307gsj{
zz*-6xY)eZ|PoK{5y?fM&nz~7>AI~zBp>egrzG7a++-gf+YWCxK;kO)}o(Y?w@{b{V
ze`r^ml=JjZyHyS5!6*otPANYS+*V$Zqok}*mN2n)F&t2j-1a|X>#vgi+~3_LN1K|M
zPBGbXu7FEZ%MZMpI-e2>`rDz8Q7}GGedmNSP8E$G1z$|Ah#ZMI21<zkd3HRcwuog-
z-k~2EeRE6#-0XI9wq_g=u_Q3;-}p_W?(?>dfaMmQdjj!ysd0wCl%M3n=%+O||9BS@
zd|wC6RLu3)_;Wk1g;ABF>_o`0KR7ix@AMtr-Q69(y}gatioV`|*3Ld^)HM?W{$7~v
zL?ZO1d2{Co%`0fDn{+FKB@7p`>(du3G<#*b!v|Wx(OGU2=eo6oMv8Fr)TyaKo;Wj{
z-A6MS4lx)%3F5MVV7CI$f5H6X*XM%M>RFki|CkfU-F(OP^t|`+gpUg`y}CPvd+aa|
zm(<DMN`UW{MOqcLf<boP-rgUjczrK4nXNO>0If$(Vv!+8efmpiB@6<*Dt9n7S;M7h
ze2z2-p$g%BP!VT*KH7T0&YRWIjUb-Nl>R5fxA3zQ^WIX!GL~fD+QPwdz{h_r6%DGN
z3UyI$W9c=0ZNh_nle@;p%HmL+WvMShK}YA0eKmqXs(<EL8Cy3KMIsIFOMvQ&`pgYQ
zBQyJcS0bO=MCE<2undWaZ|zqVd=VEC?rR%8!VMhYyizb#B8#IhXu8ZMH-ASkR9QQr
z;R6O6x5N@K-}bYi6cRUm5s;j($KzW|c9A>{x_1Mh`U?<r9C%ZDoM7?E<<FTv6RJ<#
zHaMn?KSc1U2D~EMngpH4-`w0pD5bNuHn90_8{;m^#WW!i{^KWyo12?0SGJzA`L;X`
z#NN9I<Ndn()Vss}tTec9v!v0MPgA<j1?01*%T+KVf#K+@SFy)-$l|v5IWeA{X(Y{!
zr>IPDah6cSG`kMXxg?LYb3f@!y{|Uaw$-7XZ9YHVA@-O3#hZO1GbQ(ZfNhLGIfVC_
zOhFqhJ=GcO+~tLXjk@mAH13CRRlMcEhhe$SDc~0V_6@Z@X`u-${peh%A@Q_xA{UK*
zr*V;Z$!k-IXic1$DZVK=6q3E2XnV5v&CCuW{<YxerhEU>Fq=`|(`0)i1az_9>hgNg
zUT*rjzOu{{-(><fd-`uHN2^6zq3lzHLzR;maCZI})Noc+d@pM`iKz=;;BEmJue)I#
z^Q1)n?Lwtk28E3nj3KV*f`Y|P<|iqLK~H`)hf||Wd7P24raWMS1=a0O;!pafvh9Wi
z>qGC&&nFac8T9s7?(>AnPB5Ty&0*SMqs{STERibrU^JoL4;*)21xQc8l6qQE_qB7e
zwzmF}l}+0FFk6mp47zUqv~l_N>g>kt<keG<zyRez@E}3aX&nsz_N1v<4W5SyQ{}`R
z_-t%WH>d$VK{)4GWh4Per_j$Hl-@xSeP92}7JhQiT^@2TR}BI)UA>>ia3MTtZ%kMB
zA6kz7Y<-=3zIzG8YWo|!KU-!IO2N}BRP~bz9_{^iD?EDO<&WVk%o38`w}r~4j&I9B
zrs0SfhwzQJcQ}vjcqc9=rO|o6my`{f`Bu5A&3<@)Qi`x?G&|XU5W|y)j%Td$F8h@5
zBJpQC*G`FnZsy0GdmnE3@-*ztO$2-wm9K{8|G~g?Q=h}sJYvzUiXD^!h*m|5*FQ;B
zq?;;tRAgjiDmT8a@Aq#G=PwukvUxe49h)u=3)?_nf`@Zf7v?%8_4-MjR*3yvSlLSZ
zjZtDtyrA>TgQXrG^0rbhjJ~5*wS}xIn^kTq^5mr6^wf0U`-l)Y&-eZIP_M4Sup{a`
zG%M6pQ}p8yv=gvWBY$EE-q?3WNK~x2+V1re=MdLY2$MLJw%gCQ4=)SgYx2Ly`YzgN
z>pdCLTje=2yIUtB7$zNSV|nHTA<?X9{+aUM7KZEEPxXC6TAtqKsJ^mW+!!<%Wt4V=
zR~_rkHD&qC<9~R`5!BUs5gmd@ON#kNh`7vKOlm_zbK5&C)~wpLPU8csJ+u<6QPH+$
z2+5tk?a6D>gjHyc=+yP|adN%<16hFJ+1S?QNYbe;JGE<+9I&Rc92y#!7#SI9IJP(a
zt7seHJ;VBQQ53pV-I_*2V~L+MRGPG_i<C}Nney+De_#$ur%>9HQ1B6(r(Ruamo`z5
zdz6Cxz2`mJ1C6ZB0i$mVfm1KR>G7m^(R=5JX{l2koV0Cy)d?=Ly*@4xcyE*Q&HoF2
zK!+?Dm7iuYF44~g)t}}uFsx}PXE1W?Xr49~uOs{?{84I;VeWx`I0st)2Iq3sDA-(f
zr(eXF)${(avAQ-sG9vA0Z?6dIYN8CkrQ1#OczN91B#!&}6Dci9eJ0d`e1Hlo&wc;#
zCc0QQ%f-rHcb@C5>m_;4os}gBe(T`GhvbAn$ELEUM3cmh(AZS(ehz$eVCdV~Yr;}3
zE66EPwN9<&6;K7pYe{|q5G~%+&0c?}o`lIFAocl+`R-&bO6$N=JrGZ?n!{gs#Q<_w
z7cG?yx2MiYLc6V9In#`%PT@?t)DR5RepZ+YyM*lIEQ*L&+z3n=sx+Uf>YYgBS_T5^
z51Kv10;78Cs%tY$z}2!nms3yKqQ_&69R5kI3z1%LI|Qe28Rwpu?=)VENma9QgyVC)
zyFI(xRWlW63j1#dVtH^g7tURmw`^6G=-8nDOH6)N)DKHV=4?FL;5GYC4i8X}QI)Qf
HGz<M7g1oP|

literal 129
zcmWN^OA^8$3;@tQr{DsXj|7_D#sm;%R62%s;py$_%{$9i`uONJ=PsKu&pvOjD$8X%
zXyNfTvKKa|Q=?}s?cpJAlnAUvf>a^rb%hY_4463k3XZwp)lgUV1Q5v*QKXC%iwDXM
LmGocH2ocpE&KxJ(

diff --git a/kramv/Assets.xcassets/AppIcon.appiconset/Icon-256 1.png b/kramv/Assets.xcassets/AppIcon.appiconset/Icon-256 1.png
new file mode 100644
index 0000000000000000000000000000000000000000..569e8c5e59a5ecad4a83c5564edc517ba1e388ce
GIT binary patch
literal 37503
zcmcdxWm6nXvt8VSdvFQv?iQR7+}+(>77q~I-2w!c;JR3FcMI;exGk{vdH=-yFx5R(
z^P#J{>&%>UCg%G$ISf=1Q~&^gp&&1#4gkRXm%;#$5&sK@?iJSm1r%3#Jr4i?4flTw
z29T3S^uHq>>T*(my6MlS|83xHBvm8<fW}0$H!}nPAO%lBMpDxk=CTK+p=`v9-20Wy
zC7Uc9MglfGs2zq<!W0<`UJ@&%!Z*flEm>hh|8u%Um!sZcl_uU;@`1c!MHTH~I}SUS
z^jAeicZQf*sd*gMTs+Pl)Mz3sY653U0(Xu+)5q+#?Xm?EpTaATS135(X=)R)Mfys<
zGxeVN0Rf9a*C3w!bMW*Wp(x6xu((-$|DWk95eHA`QlGBFjYB-=K>|kbd!(yT7%1la
zImRk*_qnD{*YM|v0OQ1luk&;MF246F*!smytq}XcowOsb%HXj@Bm?v}V4gFR9b)*X
zv&SxCuA(B2^iW1nf13#r4rPkd1YH04#ri3Q^KXl%As34VO(FD_9r_1&O$fXpWG&p!
zsu#bT<UU$9aA^VED}iLdKqzEEiQNB$%7lEywXpt|Nbn)|&1$;^&eNM<!VB3g;luum
zuks(3mHxw7);iMT&2J*dXV*@t;NxRDaw~(Ee2KCPiZh8IhKZm|FIR(S!7J$(@)v$&
zSIljcZ5Z%@>hSh&Sy#q(JX@c|Yf%@NMLQ*HO+@RY{gK36=^vl`!UC`Rkxt)U?$T{A
zDa6qChPh3Vflk)e!+SyrnB*e1nHd>z-JtI73$W<ZrpIGnC|uR_>8ag>m0I1#We%Z9
zVB^f8E5*vcrh~f5<Mu5a%e7mg?_2xlruvJ5C;3+Ijh9j9X)8!AwKZ}gxvn(pR&;eY
zN@{o9&J*B1uYJ<%4hSE?Hm>Vmd>vha1ePQkUAyF%pW6#zy8$YI(hvF5#O;b=?@mnM
z$IEe{%MM92e8lT=ICSHI`_gEIpOkwA_aUEky?0BrXC&zP-$8B8uveot5!}jB?<!B-
z2y<#s52~jy+G4>v!N%Ck52PjviPK)}kzWzPpnV`|{9~WbV$0We^<ACsE$A`lUSI&=
zOYI|{miNQQqlx;Z%!^pQS7P3-LwuCz!XU513<<CYIQ%HQ?UwNG^zY%qw%^(HOgB6g
zZtm4~tIbRSnay;b;LM7B#K2P;$8!NTpbU(<z|Ga;roWaa8qn&t9}hj)Vy^2wze41#
z_Ynncab8|dw$s(9gbYfO&G~dCtQu@A2FO12nyn9llVNwH3QR87{U!G-SNRrEvdrAF
zC+<z$LF|I3H%np<#zFym9WKH9q!e$7T|VF|@Mg!tfVo6vV(|U1p!c^YS;yTJLcrv+
z`odtE4={cM{3gD=VDAF#TOWqQ7`e!w9w4u@0-V!rNy_N>h*dZ*dk?17+HUR<qYU0#
zBms1*O$HZzibI5inAc&62y3D|);>7|6rOKfeG$rEC=5EG0Yf~FrS3~0rcT%r?P*34
zO6mZaH&SbVc;F_IeV@&?$5bLX=%1AjRP)!|tDD^ViV~bwZuX|vC)86P_&s^V7bbM`
zpRb6x#1s?WC;o$w+pbDm1L8g|-pkD6PO%^|*rIbFSsk9(ZGciIUnzw7oc%`INtp46
zq|`It<)gQcW-aKVZKZe1s0t$H<wSn}an|B&(~Bj82n8*IYu}a!<{P(Pr#(Qoum*_H
z*{+lVt>-DY@V^KEDN?eZ%ymZ^VrJGq62hUcp|TdQg+vR%CM-sN#@xmVJ5~$4H09O!
zC3OL<EG}Bv>R3xXa^eO(+Gfa1#PtQqx4g(9_`uJGA%B$FG9mgS65*yq>Z1?vd5-nS
zTm1s$ar{PNXvikHtdQb!3K^QfP4Lslzn4gG@HH-G&<?_H&sP>?-vinNv-vVu8mH6t
zA25HfjRvW<xj&}b-uB|VT1}v4K9`}dn7QJ{BM#Z~_vEz5h`29BW~Fa$XERoWA=%m5
zyuWmSCTq_6fAbkw0mN`b31&*=2DO;yZkST0o)D&Ob5<B7-kDnOyh5Y~Q&Y5}0yYM5
z7YNA9`$HIXduaoFLHGinM%kVgA@P$9#xxZQNAWBHM?x{ueEFUUy1aDpC=Z4lcNl(j
zw_f?Vkx#4)xgqAJgr8Rj)J>~1pv2Bp4@6IGxw>$KDuuPCe_$*)zi+`P=N=T7xof6R
zENhX8lWNtX3O59OJ|1PK*WYeqIB#p&ZYF43);`A%%A85Idwaej<9~ZLdJ25jnJx%?
zG!ccq{rJ{V9W<?xgkotx+}eyriR-<2op19&BbUGf?mRXT72s`o8Vrv*ILp}$av`*H
zrkQuV_j-??CtP`YID9KFZf=TlA9diU!OZWieaq@5Xz_+~!aroU#4D=tVV#Awas{pm
z8SK}l3Gwd))NJ*(+;l9rRo1WcE}pV|jPIMg27o$%(2s&ROvi`zz6#b5HScBQ(>2$*
zl_SizC6lg;EuVj{XytQnl~q7}zF%i>h(`^vYRwUK%NFYWudi@-G_^&l;f(5CczhVb
zxxB&E?-lC3%|t2w)Rm24r_M67lk}H1_R0s!2fX0MIDQ-}l^|g89NI$mKZG^wjW*u*
z%E!d|pSnrz;&Dlf+28|+Q@q5AK-*c5rkzsok9M2WS8s>M!KdzXXvhBKO?~k7%Q0}j
zR70C#k&rWjl@Xbw8G$M*Y&r3;YvCvq4oM#zur258|9;peZcS2Kh(pH+6#1Y@Ir93r
z*3RzGi(AQ_ZmcOAXkLm_H?HSjaah;4Ako=QMaj!nc&bgXQ`Fcv`6v~tw6`{3`%>@+
z$K|GnzIP)-s&bv)ARLbPN1qF*o<yTr$8+O|Z;UIHD!yVM=@T_hdMqV@#uZu@#Z94a
zybRPyE%PJ*j~0XbA@Rxh<#D90cNf$}oKf{_J8ErDtWO@H&tGL1#)~NGzq^R{^Bq4p
zaOB6w`qhMs|DxHgf7Q)P4SpU;3^T-6N?2yk0`}mza@YvDjTTouQletkIjUwhDn(5m
zw_yxElgDjWQ68u(CVfk$+<b$eW}Qr{?fe^irdn)xfAesMo@1JNT~qvz^1Ls7H6$HU
zi~xsnPoib#Olm-+upetmGuB!KPHuJ@{xOj3eQ7DWN&XT%)mg61Fxor(Vb~UMJe5e^
zeZ2kV<x_e3K?zTSg($;=YQ~`+%B2p&A!WBB;CpSlfD*F20e+8%x^?#4`50by!R-=1
znBlrH%2Jh$adG?_=9c7HPn$f<mQTf^9QVQ-vg*idcUFLor<TUW5pdCYRC&7<)|%6q
zbAztR#%_z5jPdrr!2AaP35kwn+m&lIp@`Ndp3?mzZdkuKq`5gmHsv%1(x<%Uox0aV
zK0M>6FXm_ms=Jb;Mtat9Z)alsr0oo__n&cro&euFfJf%API40%1>9yo3daPIS>mx;
zHPHd1lBE>L!hW}wcXGiOP0*UPf)7FZ^&!*huzRRHcj%sRZym2JTM>#g7WM?P2esk+
zgM5h=yi&=M_4l47k77KB4g38V&OgvsQO)kIIu(4)6%i$DU{vW(e4Zv&rpc*Ymr`cs
zCswWen$AZ;Elmp;O=Q2SQ0nOF-uhq(1KW-*-T2NQ$N5UvcTeKa*muIvt5D3&iP6qm
z$$s^$8YEfK#a^o@m^n_%N3)VoS<o#A)MVt1TRc;l)y!E3*XTfQ-_)>|Jv%M!#h89E
zse#FNv>FSZiP9F$iB;Nu9z5;TsDLox@W-Unn1-cbNVg{6u}?WvAw@bVXSYLCb1rXq
zRgu(kEsoGn<zCgbo&dhIrJ5C_M#j2vp}Dr_|LUzbt41ea%N%6dUt|=;rEQ7#ji^dn
zR!ibAU@!lw{t|C>`*m(6Yx@P-3kABz#yfYO(tGx*dYX;9FZK_q5CR-=Zit|b79;wY
zBG1Vtw$0_i2`n?9*^@fOTlJD<&eTc~+-(yB7v6By=ebrq@{%iC8&WQ~-Ue(VY@xZu
zFSLv*8XVHL-^GwDBJa0)8+;va>fWXcND+s!DG<{P_0#TxT#BHzwm5B?!WV!P&3@@|
zWqE{OrT!H;%QZK}^*`t{VKY!wW%NzMeZQ>ed`Y{vpl{I*20CHu(z$Ln>_M+-#Jxv@
zZ|=lz#P{odc^QITB9lW^@Bu81*q1(ofzfh(7nqZ(r%m6}qm{}TTQX|#ajFL?sc6Kj
z)K;3clfk?SSSQ~44<fnO{t+d;j*gNn+n5&5t>8pBa?%2Y!K~nkcRmZ!%f4Q|<y54d
zv(t^b0IO}31bk|<WZs-hhR<<Tpl=eNP?0lWbo}(Vb?QL=qQJ}+3BBO{(8_+T+JY~3
zT8rs(1R5c-4v)uIK`Sm<Gvc>Cl<=JfUy+H{WlRextf9MuKMa8Tav%GcQ_7bgm;OTz
z6*w#FzBSW={MB5e0st^N_q6p2)9I8usfzW%W%!E}ndzf-zi0KsZ}KPw_^O}8^2h^d
zVa0M8tb7Zl<%FmhYovdwcRX9*^IA0OtE={;%${j$OYp0A6_QTp_fA3Y&lb=V3-;b;
z^F*nFzH-!1l_r31J0^@c2h_xO`-e+T_WDDyZ`*JhO{Dx~mz8e1u5uX$PmxzM!6NR)
zIxSI{MeD23uZa*r$eav+e3ZpJ<A_c??|B_avZbVV$c+rUo=!E;yVwm@1{vESU=i%q
zExZR8aYYV4@G9Ab6+V$B2ZM-`?-u=ZVZAt8D^OU_b0O{Gzo07q^?n8vG>E}p`jx)+
zU)~EG4L)E{hQBrol*rgEm+;xHd%xeL^DVZgEhy#fdm+BGT;#Tn#}v|53fS6SS^1ah
zd7TbwR*w7p#S`Kc(@gzi!o}AkfWECyecJ^u<8*GkwtW1<-{q>dZu-~$8HfqlqSDOY
zKH4q;E{2@yI&i&K4{BubOKz<lKeE^%SOQC;X+SE3!?q)!Gv6?k^{ncAqsU<i!e?z`
zUp@(ArBELUSMi!!ucegXt0SENd%TJ4(+Y`Q`S&p9aU6W#OCuNj1fRh)STi6}X>U;*
zs&)*6ZZNHS__cM>V@7nP(+vtIe$22h<RHk)`_ahW9a~v=-4d0Z6Qh-lz1ulO(RTlD
zd|dEO8P_)WhwT~wxI%A2J|g;?QAhh|6P&V8jv-F->28ly&B7WauJfz5aI=tWpf;kM
za?*YEr+VQ)WD>wKEpT?`@af2B%mhQ%1z_zJVE+TxQRNtR>SYsnZzOIZV^V>&N^4Fh
z+x**BLxr+sT%$OzREM-)&)V=NaF}taS*!Ke_s`{c1aB1`SFgFionx}gHqKwXY<c#R
z^zE>uIjD{vcvDF41Ox;*U}$dPn{e%NEAW8bKQA`1D!MHp`)r!mEKNIx`bhrSZd2E`
z_j5<<o?t49GB!#wxffPYLvjP{e&@Ec&m6bd7VlwJ?^2GNvN-<4ePopOGp=GJ3L6qO
z2Fk0BR4Qsh%KB6+Xz%6Om-k=m4)>alH6@kiU55h=3+WeR{aQT}QFzI=uFw0V+1b@K
zgJChTU>W@oT*m%l_%ufiNe%6rAg?UFY0({@KO9y1)i3X-kkY$r>e)BQ$17OG_x1jC
zxwdU<J<bdBx1=1h$~tAVnE2Pi-@9RywfvwLFu3dN!RI|{aO6rR1%E0vn>iEver#PS
z<2aM#9X-(A_@ioio3%c(4w+&ca*Dy%Ci**L7*QrZ_Q|J9X=|}CjL-3#aP9UazTCh}
zoKCAWPKlVREeE#AX@S;d`VmLq!*#E9D3S)RdkbCec=~e+Y8psYh~3qBGhpz^);AP6
zm2AZwq9C2&_evV`h|Os6<A9a%R(+#n=fIyfzO;8;S-R5kCZ6_k>?37}@bqy;8Qcpj
zn|sdZ{?gm|+?W_NnG3}ff8)sD{T&~Mkt(3gas(6ewC+%^610sOd_ee_=FRc88cix4
zgC$0ui%*T-^U!^a>#9hDMMUrErot4*4=P24a3AOyD5R|Pcs^}KuXV{0zQL0;+dd=P
zMQ%l5^)5KNs%_<Av}B{zqrIQG{Oo)xMa2M!!_|ee%_K}HF}e+?a^MA#WG*2n(Hf-<
zRBJQ|RQj{0BR#b9O0~;Nrf$sqT%3Hbr+6R548H!4rIQ`wc=G#enl7e>YU4B1yw4Pf
zZgjst7Iuuibc!3~tk0w0D_>I37U%!<isk>y&D8j#hH}w1Y#6PBO^rXM(aInvv+B1Z
zF6}~dNYM@98>g}{7O%p}5cDxsuM)!0IpT8II@TK#P^LXUY5LbMltv8+O6+Frq&V4@
z`O!EBZajr~590P`z2VQOfJ<1vBqL`}5ES{89qQ44#=q;6*-pDAn<DkC)1LUZW;>N+
z<WC=Zp+@{1-amyUpqKWFZAuMS$V~534|Gp<<V__m+DupEcCM<Mhl$7wt3lg+E$fKX
zCo&Yg-MjaI1ZDgR?NV&`lN0Bd&0;a%Bc_FGH_)S*+0YY6n{(Ug&s%$#Q;8@0Du!ax
zP<Vx)WAT^sqZ_nG44T7v=av#)e$wa}u`V8(NsnWN<VMfw((Sa@b&4st6%l0}1l0B^
z6!==oZ0HeL|5V25!l5`FGrbz|{|cc*-l6YT(bE$IyRPdzdG$2~{zw>1+g_X3|D{zN
zBm{7f5&vo;L($|t<GT=6v9;HAd)5V9dT&mGO|-h0h<6x9A-yd*flB|UQ}n)`J?3uR
zt8TeIcwvfElBj1n@9JAG`l~4O(!21zB>iE|W~mn~M2XxsauGYXr1>^kNfKEcAH*Ml
zWI~_@w)QAjFJDi{@lk&SuABw<x1GosJ=gSp+DE|OvPlVvy%4z$s$LpV83os?x|ELx
z6@HFvdDh=x3bMGCF8wwu#W4#1>)uIyD&v-a%%?eQc3U(JwYlW!%j_jt1X|SmB*h|O
zaHiwLS_dKYjN<(xF>ttJB;UZfqwE-75=i3?$2Wu!y<X_tz8iHkKd*j1?r`<-yWM%Q
z=`g)Ykas8lZ=_*iEd);ic4P0KCWpA1;00Z_$*CJ@$?N;apD*Nm4`m&CNM3QJllJql
zYOSm*xxcA0Ukx)^#p9bjpN%5Z>7r*GxYWKqQoZY|oRlHpTxYoHJOv?Pi@2gXpbnN1
z2_{{dpAThd=S&5qIL*`%pg1n-A>@2JThCy{jt|nPlmjt~lbRm4I(+O}Wg0EJ5AGLy
z*>9kXCmj|;AC-bp@@HjZ;{&(v+c-P&+CVcvU&a-nrzKl*Yoi%56Z}7dMt00|+>&k(
zJwUrn|E&Mjqv&hd6G}^&4mrrcHQQEt@hC(dS6vcsVvDv(>{&k?dqTn_Y8mOXRzKPP
zwB{kv{kF`HOlit#X>Tj0k02eaI<Ia0vdVGKf71IleH#>B#H9QFIRO-SNgC`6*3vOr
z3Vd9%$t}@KAF+$zcq9U-KK6GB<$v?oPWO4MkX5y2I91Jf(gD_soc}ITS&khH$#`_=
zAw06JZ<8N2u!THa_}!{3%C*kcl$9U)q!CC&jyhgt)<s5z&qJ?%d!L27@>49eSP;F{
z1O;&*)N29TTZ0S=d%>X#_ffg30lPm1B!}tHI&*&4)~qZqDr=2NcnKErwl`X_L@{yZ
zzQy(mOgG;QlR}5Z;S(okW_}+~l2~@eBM;ldNI*Qk<bk(O9{i^s?-M}yVD%SH$6E{q
zBOOqy$fwgj*nd0>?lWOjt+lq{#Ui;tteZ%XvCH3F2uA9UD$TA%?&2%`5-Pi&%l9p}
z%4@xy7Ag&V59u4Wo_-dH%Njv)|AZ$TtR=|w-m3u2Pygntg7#WXdVON9fX8Y%`Iqws
zTg@^n#!WKJ6UW_X8R0$pX-g~B5GO^A#u~}FpN!KAa|9-;nylA_7K&QySvgxDZT&RN
z%*<lOQ4YyKcLUix79MXxO18*|Z@l2(W2pNNv>n5)&N8bssKLz)Za?A?O6EQZ+Hx$W
z29}V{s0*i_X&S727%TNm!R%%S=B?Y%Myxj-E2SKbV)5ViMeSHE^eX-0;OghpAxm`^
zMfz0uY&U~1mK8@i*k4s1x-<)_D|)VVc0`|Q)_qvp(Es_q>vcX|>E!=S=RGlS$^T{4
zCA4_X1_2n)){&UB1hKGC3`er*iLgg%-hQ!<)wnVlFzVKfACu2<G=$V4eH_BcC6fP@
zYTrUlGDwW%GVqneGGNh(*fH50g<A(TI*fckJWk_^@JcJ?)o`$1?>}i}(DVr2s%E3?
zb3bhbO+wjNS?4Dw^%q>vVFtfXT)mvGWheFcU~@PMCw%_*ZCmtDe)QzJSQip2a!Ox}
zfcYFHo>e-(JQ@Qr`qXwwPWTQevZhU)0%cdF(nwa!(xb{Ffi=RQt@|Yn7jQg2p3l*6
zKOI1Dn#@jFS+4SL+Rwh#`#Bkwmzvg&wwLQZ@^psjgTB9_5=v`3r@yokXmlP<yFQPs
z)K};)yJO+Kkq%yOvYDY_)4lQmk@!%$Iy%6{-l!7*rYf`ZfiX83$xKn-bjLJ8^E7Q9
zv<$xmr7Ej_kWb3??d_#-ldUAq9v)gjg?ljP-oL$dxe~RKefid$XYKGR2+OH?P@f4S
zYd7~NLv>Oy(Bwq-Ca_gZUNqH$>qb=O?4MS@GQ!<)a$J?`m;R7}SdP;2FhJ4ecJT4d
zJ;u2y#9Im60gMs5b4_*h_@Z}Q(Emx|JKqzd&bVU4X~So1`DarRyeB6!0MY!UYRT57
zmjSaUe!?c`^6{c8?~i16joZmAm5J9uS!@M4LYxO5<r^@JG5je{*r)Drcm!-won2aJ
znHAGSuuF}zCdClIkjfFql;&)N*Pe_fC$ExIP*5Hph0o8|+a0)UGvv3hoWzxbe}`+=
zO@FCDUtyc{)9<LbW;BvFEbKg4s)(gHLgS{!^L)M=la1~*tk7wC2DsC!qiq!&b-BUv
z&+Ayq6|Ukml1>)I@?0~3hV0KCWy)K&9rv-tfsn3i8bJy@Z|)#zh8tsP?pW9$t>KfI
z(bf^X1X_RJsBqLUvV{>A{Q0X-myxIG3f%S+GZ@9LF>;*bS?TYM{&D!Vr6?|3p5E6%
zSTTa}naN23#EWN;T7Sc(uAyP^UxeTI<vutWP;(_p4v=xIBkx=PXYC=v`X3K)obXF!
z{@oeR<cJ64@VB{X+X^eB!;g18M#5e9ts9BB1Pk$nE-I@5**{CSyJcpAgMJrkW8~Te
zf5cG;Ir5AjP#J7I*13%62eCo4<eJJnGY(_4i~kw91f7d_9*=#2KIjw#Ty@|Bed3(^
zR^b6}(7WTA-2eF+jR8+rkwKZ;?jF@GMi6~iE+J*E9&NaMnbd4*NHWIWZpPZ$h0Dag
zBmhXE?7wD}8ISQY*lbaS@i6;P4Pz!2w4h2@9;={lwbG7HG+z*@H>4d3+H;P0dG>yw
z>#vXvSl(oxxwjXQwVlst9a5LShcOJ8>b`rK5%eIs)ZbQ=YWsI4BU>-W-Yr9J^BnGK
zC+K5ZD}ky}P{2{HG2yYcEAtC)as1ydzjhtAk=Im^&o@!XF1x_r!^~0O+sz1Mzhtlx
z9%i52jQF6st`n8{oiG2fv(C~XM#fA6XT!iN;v2o(%->w*IHnQ(6*xBS-In)Or*jiR
z=S5S5Cd3Uv<2Eb7@wfuV$KYFLjpEoxVwr(#jwV$lxOI6Zfh;4$_aJu}6C^Ap$(vy~
zo6T*#P9oopOCzBm>xAk2!EqVx;BPSHX8e0Xu^%dPz&~`7%a5*?)#31A?ix-(taSNp
zIovB$pZ;h3@Z?eNb-;MX>lk6hIT2xf{EyFLlt6{OFKl@X7TRul=J`L{cCTRhD@b%)
z_~QbrcA}WbiamSQK)ij+@3cv4kxV!<ELI2Ke=RO}7x5kQ>j{6Rr@J|l+w1&xBVAkG
z({2=fw0*OTOE~q0{si*)yCev%{X97*!Nh4ue&8C>s*i87=aBwb8y3~V1s!*G=+C5J
z+1|}M)VC3Z=!LQYE_G)99HuPNgf&{i?h(d&OQP@7W@qQ-I`Akvh-WsH>0|dZe`>hq
zgd(ucz!0&q|HovsE@N8&lbDwv;{M*yq`2ZNag*<{iWM=+tgMWePYxbkA70u!0nx*1
zQ*Fw;H!U0n>V8vyZPr?A<(@Vla`@ln5U$C$JfI~3?64J78nbrV1E|BJ#HRkld43%v
zctM2iyl25TJvjf)-hQZm=OMi`Xmi=#0)X2E`g|Rs$QIe?c>Gcke31ZK0grWzalR*=
zq5L=GdCxd8++X9U5hFA&+U9%w4u}}%Fq4=>!|dDW*Vca;1>NI*E;Gf8GPIY+CdIj|
zpea>TV8OE%G`@+s8Cmh@FFn3l<9H#i_IN~Y`8{#2H(Er>Vr;b0AX)a;ORj>wf=H$E
zU<Bcm(i-)U$E31L-JpwYCE&eN3DAvl5UP=OZ++a4ELjiGehj^@4fNw(SIosx%GA*V
zCJ6u!<9akn>;W+n_p6-?FA0lAr;|%j7?g)+{wIkV<E8)G6uv1+%;KmpC#QrJNfxVO
z>f;Z_7tPl-O)4>iVnO`;`t>X7-W0g1uNVX78u)0&Tpe|bQI7qtwJXNla!*@#IjZMm
zgob`)Wo=tRV4c^-X#@$}oM0~Omv)JCMzRD53A;$#$P+Kdu)|+0laE?+jv&&nxl-Rd
zq1)~dRb#YBRAvA(QX<+g0h|O5Tne%G)#lo2%iXX;Wb7Aht1%gTwh(GVrS-U~;Oesn
zneIUSI~Zz5yQ4&`VQBWv;lm%w56;MUw&`^7ItzfkHwlVC2q^h`QY#PqLHKQ^<AeTu
z=`iHgr~4sI@xL;=Z!m)V7f6rtumFQdK7IAf5(k--N|$t!?30D4GkdTm{I5B+gDU>5
z0(dY|IO7=5tA8E~UXE(vnZqseqoW8RwcoTkE{7+t;u@IL6o%-`NYYId2^wkBqy4pU
zjov6(G!cv9gFb2}-^TFI?yBU1m{J{Sjq$0fa$i=@BqTkn9fQfN@aX@J@YE6ME%f^5
zvt|vAkH<I%O`IkpK%ycyoKNSpbfyKN6jI{IOHsJ6(U_jX6Z(E=gG%A@S21Nyf6FkC
z#-r26S@*R3)TGQ_xWf^o*~3XgC_6hImJLP*-a_2|0z^>%T^A-AxBm7qn&6>~QQT`6
z`=dIP9F#k0sxHmUf;YaKA#>8I&rU?~)K)c9PnAOA;Mz+I(f-JRE!5@`So!iD_k0gL
zFl48Qwq*Z{83of|#r(k@vx$53LRgmH7+AmucO|&$#u?J|?Od1&%Y*hU3lsw(AFo3W
z)tXmJiRAl@g0`44jozGYv&5`RrDz=uB0}k7(9ipR{4!yT+62@1iYW_tyxbtPt(x2V
zt9wwv2l*lU9iO`n+g=XQa`ox(06pg3JsE+`#J^GgcFle6LlP@S39)z_s!J`ea#b&5
zH{pAP!fxoof)B;V=|kl`oLH`WzGyu1K6)BGf9tVn^E%B3`^I};k_0ul==P2FJ&Dd1
zS3dfk4ev_qz+VBPkT|6P4UEW6G*JB%w(31C>^LjOa@Wk_i(bwiG?eKR?R%0)scO-n
zT2e7pEel=Ix_Qh0NP{~}UmTGffU=Rq8QqnCI}>^TVcTP8telH0`FG9%&A6=Pc%Zjk
z#@H9ZK^LXmce>{jabUTre$-brhed9y;orVr_v$EaV$;0`a}fWvTXv9=+>MBu&|lc}
z3BsqHsZftv_Y2v^nFJ>W1!U$Q0n=Nj9&MXoXyE?yZc-ec0!?3i$$r5FyM)R6XwT_{
zY%Z|G9hQ`bZhyUsJbZ!e|6OtLm3FsQu2JNQL}iA1r&xEshG3@iJ5dXu!pW=w;91;>
zH!8_Hp0+LWb9OO91x(9l=OM@eEi-$RyY{q-{sN1DY#loP7()zKSM;yJoBYT)BHxE#
zJ|{AC7lQBs3uBS|J*^)$|3xskRhqaz3_qeT-Rn=a_g%b4!fpaYrmz+^^OCCbefjIQ
zDo9Y#kx$!u?*WlHbdCX_a&I(&ph<JS>>iTGLwi$}PQ_TfUi!$!pqlkIo%8tD4xQt6
zEhMGVNxy33q6961`q3yrGzi_xgu+yjo$GB_6y(2%l~Ncn+(tq(yI!y+9J=<d_!Z-#
z{l7QpOzs27Nj%eZ*kv+C5J4BEeHXfZis^6}$+g?j3jKF<4r}UdgvI1xmu`$wWmUJ^
z_Rl@a+3${cJ=S&qKQy=_&BxNseiE>3`;UVv6%}l+DrZiT)}c{-f6A*h(64c&i@T28
zpLhDLTKKr^X{Zi7;8<P@sk^b>XmuLKw)_gPeH%}ed~Z$<C`SgA8>U^vzr1;J00&(f
zdx{OH640A5UG}VrMGH6QZ4s?0n!S8uXoxrV)|()d*JzR!?w^NE(Ogy8)?jzNO*c1R
zYQ$#9Rh#uQuEe4R-zJ*V|7$)P8ruCWv+j=7@B$KjvLwgjW|G+g*!*+m_0cxZ7z>!R
z&oq)~7NjSb!INWdlt$?CSML{R3ov-gM%G*^AF6>FhdfLTNP-{2@J6;aP{?gWRoKnp
z`lYOwpuU!dWufq|Nf8Di;uV7~`fZZy;Kf8S$@(7q&4;i%!td-Aeqab)vg1jDo&_=2
zoH3{nK-26M$Mf*EY&wh^SP>DOQoP}*9cF>3T%fSRm_8T?X?XK%U%4~zXI*$ClL2==
zp<k}{dMSS_?Mw@IdLCVTo?;Jqkp||d$T9Qnu&_SnL|AUY%R&)nHyQ1?cX-iuMz877
zGHF?^RpA)tH_-RA*odNwgba{rzFw@dgjCn}t_z@8Z~6-Vl%O8l{Ln!QUE~&x)<?#)
zvtB}xUjBZ-i_6u=784D`h;Rd5#BqbC9#2mvc*qgNAn(D=s6r@zsj^l|%W(-9DYL#G
z>k=iqzR}ZvQH)~Y=`wlojj8Dat_n0M_~T&s6*v3n{Md4Pa;Q5&Wo3KJqnnM@@NZ))
z@&k9aNhbz6Cu9i6-8CH1s7G_=uSQckib7Fjbx`BvaRc*z6|u(sF}IwT0(=(ol<z{J
z33F3MdK~!%cs5}f-bJ?7L2!eWG-HsVcE$?Nl_FY{HB=|-X;(v<8>Y{XgXN&}vqGS6
zzWER)Xupo2{*L<wO~<>q?*)1cgH`%qG>9$tj#I8FiQ}zJf0ht8o9HuBikyT;PeVs+
zB?q(cl-f_D1UnyC<BmDT!z`Jz8fVQ7^spk5L#<vf2y?!fwFkF~Nctbil~eA)dZA9h
z?W>biQ`r+;-MJ|=NmBp*;YbdCfSTQYt~nMBczi}0PX8q0{yQs^)B7XMrTu4TxUwgD
zU&fIT-9d~Ri{hvv?4rF(Q37<|MgAtjyQ}#vm?Ib~XP9$ZnM+bM{x8)R+r#v91aaqh
zVrR<l@YjJHs-?)<*~UA|N2^j2f!)8a?bY>qPxvM5B7LYIP4lrjcRO{AnxVSxZA5*~
z8)QDaWLWT{pw%kZZ0@>GTP-KYNY`5^7@#1(g_bVnlWA>F9IZ`u^E7oQ7EWT<$Vdac
z;b?poz-J4$d^h+8IeQq_l!5+mB#dR))t&(UXu&2A0{3+qEM+4HLE`x^9aPf{Z2Fa%
zBi9WZzd1UXyk7v7G3j)+K!r^`&;mzTaQsdHf37*Eq=^8vB7F9|Sh&nOlS)+D9EsxY
zQml}#kb5`bxxMgL;D!Og>H7Qh!-?t(5!%NrQsoU5@-@;=0MK=;)m*|Gx|FfyYZ_|o
zYq#Y91daUiDC~3%f5o7{Rm);gdCw}+ZtP^yI?*FW{3E3G1>2Y52sREuHXrqZH6&ly
z$6sAA;L(xNT3RsHsVp{Tg7f#u?<t)0d*8~m3`UQ5-+Do{9~gsLrYVxrTm~2%iZD}{
zh6=+s{o<ep-C0Eqjg9%EtWZW`{^CFqRt{xpY*jineVqMiJ;*`L<mh#>x}d35q+iHn
zFJ$xiqXjbfIhcbd>oYT>q=w}Fo&7j^d!pE%^d|gWR+p;<6@IqpQp)pvb}U$##wVta
zem0GSy)XJloWR6!`H7l`2(5Gv6w6>a-H8+pV7~^OH)<yXQ42!SCxzM1);fHo{zSBp
zKjkH%VGURsCJU^&OkX&y?f)uu^;m5uMA_h!)Uv?68N($gobga+!9$QF&Lb+Ip5(zP
z8_61!Of&ZxaK75dmhvf$HI;w#=dY@kywKj<@&mkmPA+}&KWDA|*rE8C702RyrH5-2
z7#6iz_9rx})`Roen)4`sg3TA(h=X87CF3Alye%Ihmjd!jtpq>={~jQWSU~oDkWR_N
zeRut&FjbTJrSBwioLfk;`n`ZPoc^S7=UG*??Mj`I0+wY-+@Wq^#m%X5vo=@2OTx3^
z3)R4b^WsP8*_u6I_(unNJOi=BFreJVI%cI|e+UaDJCd=poDXH#5rwu;;u)f?QzBz4
z!gXOjs-nPR)CLUQ@|(FSp7)<)zsH=YnwgraB!amTtUWRctF^`GKJR5yX*mvLLGg9!
zTt%u0>Fvu`YqwseH`ByFQ_8~R%FjV)M_uoPk{M5ScpRUc%>6<<cSwL6ruMh7w!Zk!
z8c%td=ZSx}I&<kE(-^Fg#Vza95`k%Ajf>YI|HlRR9I{hPb(ja=`-vWAVW)R-)Ei&W
z`sXW*Is=2GQn)*F%k0vXPCIlM?)cQHtx(TLpZztgm<3s6NRXC{QW%#2rY=)*Xey#o
zz^%gUNSdS>r5b_8h@_~NPFwyaD+LBi*(^12LJ)5tNyS;ux79EH7aS1TN{Effj~qb2
z)~8r*%M$^J7F76J97v0juE0`d7;0%}rC{1blvAeMxysZ>&lydwpB(x`2B^FL)|*EE
zaZOt^Uo}HAj6TGkNkKriCXD7{*Ye7?>a+X;LG$DUgpTxo%ckkOf!q3FR^1LyMBY7v
zfhl&2&}{PqVqGhs7}-JG5&vp%;9%vtU6GiC(3K}K)Y2Fjk`C`G>Gc+$S0a~S=V(1j
z0Dwa(b>!l|GdZ>SgIPVH1s((5{3zx;s8_<!4GO*s$^I->Z+S~}qR`Kc(0lqW;i{b*
zEyyrK5{dezNL2~vTdB-ia3{DWEWF{S?7CNyT^RZg_Wmm(iW`{#@(X4zzg2q#b3Hol
zkAaAa-kH9O+lXS_;pLq+dzZD+L%6GbmmzNHBYc%MSD5kqFgX~D>`&+lbW<6NI&6dL
z3329orDLV+Rh~U@tcnwW(?9Vr&VP>}FqEEHiRcl#pOf7UNd-jLxLe|eHDc#&sEFCl
z?sUs#0oD`?N#F2B&tlvXmi)m1BMR$14xU}qoUZOzu%Yw!$ADdbZ#NSD?$dY~{(l@`
zBH$b*yNARqlUeoNmS!;8$JDZ2W!9b-PQS#H%TQu{`zE6=9!CNtWw3HHz;P6=A;xtO
z^x9StGXEXLj#U+(8dLtM7wDk>+#tKi>7G}MCNU~jq{AU@0Oyd~#b>4Py0tNR|7TMF
zd)>6b-ySVF$Yb4Xz?N5FjmNVf7P6r{RiAU8HtMIX2R{DS#^dgp?jZuFjQj2sru=0X
zmuvg;cSK?z&Ml#PHo%vqc(`G6m?5%I4%DKfZPFX?4(iYir5xWF48LKRrfx#om$waj
zqg=qnxgrL8s@ZZ_jfPBt?-_{fS()ie!g*AkJo?Kag*5-Rfa=q~0QbS8YdFHbn8nvV
zV%N5GmQWwi#Jr}%^b8*D#<h0HkNNPDoPFQ^@ww0S_g`zOTw(f!8vusgqnSh_14R}}
z2#5y{fM=1nrmAzs;)YU3;`b)AP0&)#^2{ugmorv~EA|N-_Jl)-nGgA%S!r*){#Pcb
zRCvXX2^nWf`*7HwqerK<Jbb}-QY0^Z3O3m5HZ2yIICMj=CG*wVwfv{OXo<hCNO*`R
zI}I7EL=j9Bh-l%|$i52gfIrDe#wa*<?!$WhFCy^NO}|l7#o5d7JoX^@4UJZn*i%RP
zFx?*^lV|%p+YfAV1hOy;rNucc0IwpRRpi^wdsU6m#bt#}*95`0+1G0?el!6qUHc$p
z0P=jTDT>JJs-OQ^rAIJbv9U}43@$P=otma2U+zs>YVg2`R=Ec&izBCH5?K0Lka-<x
zkv&@kKs+Y9-*cZ(o`4)Ki$XkWiQpVameyjz;LvgutEN%<*7}yQ7HOI`3F@~9<*?eF
zQIxeg%9I95hjNbbe@otzw_M|;ESpNlw{wrudxodCV4`bG+RobV_o4V5l<5+DPbL6c
zr|1K4qBtbDJP$p@S1Np6;J%++A^25NLgefGm7fyTEY+uJFQnFUtVX~(6#Em}F4@dl
z&{4=M-7aj=4Tlb8%XGf*mv&JfzRn!-arl_;>kMcsDYNEM;V;d~TGRA0E>V!}#Pg?#
z_7Mbb@sK<6H#a+~qv%+C?{fV2dP_wDL3{>;a^Up`u7P>~w<sa1zF%;p`7EBnb#lPx
zo7DSxpA3|rrPu0qQ?k0o)1zx)I3UfPLT;wiND0P?Ka=m2>QbR_XIq`EnFD`z9P22<
z#$qHkci~(8D`s_4hclZ$M>^&E`Vf-OrA$)BZ^r}3J5%~ay%E@=Utna>XROpnfK9rz
z^H0z1IaT8iQB`JF+W}9*2#{qs70{xd^dUT;Eswtu+x6D+hu90nx31mwB1e6V4MS4z
zxyAc${AWD)M|M2rd#};E+3T90sMFOL|FxUWTKTiP%V;{$_sI*3XW)k>p_~MxA93sx
zqA6G>B}_-YA>K_W8I_%T5&KO_mb$$rcW(-S&|qPPe(q=!50SKzF*E<{EsPejsA!t2
zSeW-3r2iVxpq_Bjr{eJqkVnO1#mth<hy11e(?a17rG83X1`oq(VGm(rU$<;>$ZZ}3
zG4!Qp`NzObYUpdhsgYwsh{cw>NX_SFp-V&tyA1moKRf^%pvIn5d$U+=KvzBV=A6W-
zafdpZxn^9Nb_Gxr>r6g0df0OUX|tvoP%5uqW{t7>906C--|a-NE;ucY9A!76NeWr_
z5!np|s7Qaq=UX?&>5HKqSXIul6LbO|li8d+=saw;p%D8SShW-g<?WELFg7ROpeX^K
z$-H^2dafT_?oY3El0cl7Tahgo-p}~i*SX88cmRF$pV8CNVD)*h{Ww!*vV#C15C76<
z^_EU{@-3vX40hzA?4;>rXV`v{O!`@~er+#Xw<b9`V*+E@e;@D&v68~Hd5%Z*{Q$w!
zbNynEkaj?$MsG}Aa}p)I5Z=uKI@B*4z~+9SUy#QMkvDWZo}!ru$*GD6f=|)T0nNX?
z=kF(-ZT?-NJA9GlVEW0$GJ`m(=_o&JhZCL~m^Y!oHY<!Nhz8JWDWKvQ60?7@IUtZQ
zY#TLn`->l#V_oSF97#i?JQ~&3FQxvJIJ#up9w`3lT>)>|@0Agm&zcxueT+g1u&rL=
zfxh;ab^u;t)oRGt37&Yrd4td9MP0<3%EE%qVRT`!U>)fix_Fs>e(D#Kc})bh9`GZF
z@)h3Z0OfR@r%>P{)%v3)M9pD!gVr#Wm@m%pfqoz2YUG^qEAhr~aPQ#=04x9V+P0{5
zSx1C88tmxy#!oCt&iH9K%7uO3dF{zAbz{DP2znDgjz@hkpg)u>tb?;YQr65eUjNF=
z3g)WBG5=O_JY5{+*GNM!1Oa2Vyug)8880xOmD8m|8_>tKez3uRD>$2_=rW}U3%<T-
z{JeU9@0?TcZ$H6C^e&*G^oPm*S0%IBA!B~FUG4c_P(wcA3O`mrKT^sE&5e+fP|%i5
zfgk<m`q?i47QhuIfkOOh#_9{xX-yev8k#l3_+pNP>>?w^&pCZ1)VD|du(dh1Az@h%
zm=bdRmjj^oIhBt@{BM8mHixQ9*ZxID+<iS^h{1ot0SvK6$M@%>HiFYcalhE3WB*`r
zz)-cwU6-x6^+$I5A3WwXk^K~=`>3I!swLu?mXRXom0|Rrt|FR`RXX1ZmrtlLfDp70
zHNbDWvIb=&$u7b%vD2010_lN)Me1x4hdno`KTCQc79|13;|{Ygwvu3`+(;u44uolB
zyXK(IyT_*ca{lSLC!D?m_S6VAPtW6+`pG#Pai0urAu|)W=NHr9toH_Zrfw!&FX~?;
z9mwH!&vXk*r3IG7s8VV8Upm-m6#}DbZ5T9kmoP=|XGbgjKEw9$<HRTY3@4#`4`WZ#
zkx`47&|UJnH3D1of99sPOd+3opp_d;7(ws5U3PaApTwmYS+-F7ULM!C>LNYvvzRj~
zqybU_`ybnNUai-1CtGXwX%lwA3XCB4`!~UZ?sd^0sjN1>-iY4KT)2VEb}<XsoFb`H
zc7?v({7q1CBr{!WAP)<@6|7y>Fk-5nSeHtICpSGO9>-$?Ft7ZFA6ziWNDVIAV%xqf
z4wmWKGssgm#DW>w`fLappWG?G_IUKwGoF#-ry0aqw0q<kBuvbu7Ae<IGSiO&y1c_*
zW}b_-pP2gQQ##Pj*B&wKdVtse!Tv?lPtJL9X{W-?rVS+MTOOK&`yvUY-IBb6@%O9-
z<QgB${a;~QO6QBQH4@%r^PGM{N)eyXOh3b#vcyPmnwnwv-*=#uMj8-HrXzlzzm&?|
zN1g*d+bU*csB3iMm4W!Ecf7#F!*;Jog0>}5$p5JsJ0TSN`(E`1<y}_-I_F>y=dk<C
zY&+V!YpmS7zwP*;En>i*x80wn3&9<x+X^|kn1iFWk8vw+&;Qx9bZdIY72S;pPWmmw
ziA7&Y_W`Un`1ciN2IFuoCEf&^-RJ?v?OJi*=rniT1s!v{lG(cCVVL0zWca4Ypr$`9
zDGg}*6|F3QBXJxy&1^;IfROopBD%D}z(YmxUlHA*r5-R6hH6E6TJ&$>rrWvOSvI$B
z-_;Y8M=ePJSD7&qt}ecpQULe?f!bU&1Y`#Ee)kQ(^=nEP*ZXv<*JjeAyX+v&RsKUB
zac=?C$r)d4bYli8r*+)QvXw!bTW5(88z;UdUk(Ma10VU7^aHon^F5BDc<Hk(*wx$0
zzG#>}Lo;T|8pm-*XD`P@nIjFnF!jojl*VpDG!tusY;)0W-=(vg6}3KDdnmeIoYox~
zEcurI#OHs37V>RJEst4^jniBH0Q8wuH^2$^@wQ7Df-sB-`slW468`l)_uba9oD4<3
zSlrEwKq`e4V4!GORp){Ph_<4E_%y%%!Rv1_Z>^kK*)a@8UbzdD@}-l!mm@SS^><=H
z3k!LnYDuER<%yALP?J|Itqm-_&iohJn^}R*_mBUKxK1>4%!LRcq`j06-XkuTY6u7U
zwhHPq{`D2?(=WyWjO!^~it@F)`H8R?+S&KkZ9gNcGOUN@dx!03%*OCqVevNfHhSR^
z{Bbd9_0cwG`L@UnN-&=AV6$Q#F;f9WCBhwaHkXmWe+~X((xiQfmZYAaj5_HD;4%N@
zU4s_(-94ZtDWe-zBH;itj0;lFQf*SoT`l{w-1KZ>6zV(t#(PpsMaJ}xZ{s%-g@6i#
z4O}`U)A#&Lsx;k>>q#k3?oIj?cABB+GfW>ps&-PD`0Ig!|5^4+#mn5IQE*m0!qJo9
zdsL}SL4$R6!5=g%HcUCqBCRcP32DI(8>B7FFxo80JOUAKoRuV2J0K5PM$<Hz%l$Aa
zwX&2TYxhrL6@dyqy{5n)&0n-~h?HEFYQ=WLqA^k7-)dNN@4`+&;LxPCkzW7HKYJDc
z<7HCtI;s?4yL9rCh?{xA$Myb~lO+r0E%zarDmxD7A{%JQf1NLZp%SB{N|s{iS^xW}
z1+E25JdC#bk~ss|!<=vgKs3yY%8_9vVF^1dQ<PWjRDI-U{K_om7`uJz{~(jD!Bs4q
zWQa~FK+iqHN3S>?MO}ZfP?YPU?`aJs2I(QG3a!Vj1$Ni)x4x}AW`?wg?JQQZy^sC=
zHuus|XTm~&o;W75kTnP{`8det7Rj_wa6q0`r(*2n(UWlCu7$Oh2-?i><kn_97VJZ4
zu#mXdm4#{G_2n@ba_r-%q>~>xp~#NwChGVVL7%Nc*AE`Yp}5{}k?Fvh-$i7uGl36z
zp&P8{-Q;1CXZy~9blPe#BMcOG{2sa)?w*<)kBBVO3=@V-Flvo}OciGuFY<Ir9&|I;
z^vR8AoM&rBt`t}`)h}k8k{bk;S31?P#bEvnC@;fH8Y3Ag_jo58Kn`s+^plaxuyIe%
zZtR|+Gyr@rZ2<HcxcRzUhM791hMLn;-Z)B3+ulu3AG5?<C(6Z5VSn#$&uD^LKH{la
z2U0OOW|Ih01KbpVCi-yuh_+F7PiH>c{Kwf8LEYbvj(c7tbl<+r_zli`wam1@I7#O6
zP2fn776jhrVLtbFW9~zQ4NHNE+|afWFFcau1@v@jmwES^g9IWYzEe_mqM_#klC3{t
z#SE-Oiem?(Y-C6u@}T<B)Efw_E4}H(xqPySI~srq+PdMa<Oi5fM46wUptGcnIX;w5
z>)4~Y&jZ;0yn#k>t}pU;bY3TH5@?WVbW<k9Y5!MjM4w^Q$GnN{Y{I1(Jw@}Se^%iS
z1i@$&$>D+ONjM4OdsSz3p}3+{Dbr?+;uS>8N72_+z1P<j!u{uu8k^wn@B}v@W8fCo
zQ)gNEs^M+!SAQ<qJCR`RZ;a+95aKu=aYtY_Vw{u43&R(3x0m+TQXukRh<VhdyqRN+
zgd>b5AL4(%eaK`aO&xl_2PT0`w)GDM(;mj)X#{KBiyMgNfKfe}EvM}7fAvP*iU~~*
z%)*02SVp^WFz(Q2Y(gX?Kyy0C@FJiLN{RQwymvP^E{NE}@82h-caV2`k%T$3TUFId
zT5<>biZo*d{#MhEoQAybo9gHCJ^A;IlQQuyfUGaGr7)fq@}g0HQzMgW^ntNveeaN`
z4JB&i(bQ1ilYuYENeP*yCQOFj<AN)<VsIu+>idKs%?K|xZD{5kumG_e+ja0^q1i(Y
zOU(4cY=xu`8DZRF^%IbA-gv7qGu0_7=Gg=FMwVrp?C83%NaJ_FqT`S$1^qo`Z5!_;
z;SDnP9gk=_Wv!3Kq_~+r>nWe{_zW(chw)tg5?JeUCxH5jvXEECo(SvCfei(Q5~d$*
z(pE7vA?*V9ihJP>WEOMq)?Hh~E@pEObSZaZjy9OER);XZt!llF7+&b5eZk9ttb1<4
zu%Ai!<2@?ddt(1~KMpDI&H(LR4j4G_^%Yyy{?#q0aQU42-4#wsriZG6(!tr-OEKr!
zQ7vkx86S0gm?O2+AQvIr^i<u~Rg{J0zQKHsAeT{)d9Z<r!&e@?GV$~9?xbSX9WJ4}
z2rn3s<*wrH<7AC(fR}@UI2$-so~Jy|QG&k0JZ@Y{$?y(EGv0$xo35r=7>js(c8a(T
z#Ud$Z?M^xtAh;BHOTY7cMw!0&DfsRdSm;4lll;iLkP6kAONrf3`&f2os@H1H;q}U{
zD_97Le29I3aZL1mSDZp}96QqVWtv~Xh70-n*SA~K5a3l876Kc}BAZT$HDeXEJd{^f
z$vA#??EZqb5>__6U9l^U;`K9Ox&Nz^1Q%^X3@C%nH14DB7vR4Uzg^?LkH7^G;oH9;
zh<LvUzFk&2O<%Y`z6vjV6V1>hy=6$YwQ;jVf`_r(l3**y);ns`HN)o#q-;=9ue7S>
z7ssALPdOb@t0?s)asOY}`YQXnDWfrwt#Zf0y^%_voDqc7%H{k{!EY>?Qy}dZD^12A
zvj#m96RD0hd9rInT2fA)vDC=%JP3}jW3VQ`=lP_1!i@x6gX`V(#<SI+q$-R!qe2hs
zLbT`aLrJ_ko?P~-_Ui-{nV)-`F)}tezFZ_GULKJ|MQu6O<0JhXW~hfR<vS66T%I`x
zDkT?8hn&{XrxV$fa0y>=WIzY}zCDG;FhNS2E4Gv=&5t+4e*IAKEj9<5-xmY(0B7xm
z>$O_aczh1tr6SO6=98dGw9{Z4gcs7_``eBOx5pO?e8&iSXREb_*Pfq4!smN!u&*87
zSN0|PKLEKvM!$ogv|<zK*H~MnO|8^2jB`5hSS$c!0>;yVb8YFGX^>8tsb2QL!bJXY
zeMUxat(FQG05amg8390O*|h}F#!N}B0{qgoT!Hnh)H4iVMC9IkcKD$^Oy<eo!zTZq
zGfjibU$`NXzLIOur^-&%qP!fIBUMVctx3TNnoP-Pf|k>JZF;AXl@`EDg7I|Vaiju|
z#R9-D=K+KmfTcWmpOK<MCMi*)kPzt^${&M=3lu*V=9wj;5%TM1xC40SYB2}SQP_+W
z++qCuaSRC_xVcTY-9~<^G+2mawV-DKV3l&PnQrPJb)k$3L`JVk6Xr6U!kg$jG(ymy
zArbM5ZIJP+e9sn2jRh?43=~s=x3&wwF#tAOQVc*LDIheZ{EDcn7~`XK5TDPNtS)2a
zM-pX6C@^{9&#eRyuAAwi46`o(IIO1IZ)?))u3Dt7-;}ejY;wd1Lg0<4V%-;z*XLU$
zdQ{S`puN5@@6&-Cfv~9p{k-4-uiT>IGeui43V>li0Nm<udyBgO823AH;6Uny+Tm20
z_|kkc`8u0sqE5y5rl@P6%QjlV+h_}5nE1m+1^xyBn0u`?`Ny$^7vP=mXwr0h!Oy$b
zaz4f$a9FZRvIMxKzAi@$Ncr_tOP8joS_Pkcff_$Hm61Y$Ds;_l;a#WLt={q$0Q{8a
zo_mfr1~ERLSxD$8#*`$rH9l4XBh4|=!j*-sa2;$aa&f6Zo0as#u-1g5tq`CDKq&fr
zjR9~WOLh+fQ&Wb1;QR4?=ELF}R?np4FV~23^D|vgq8TerSA&uYk^<+O#$?4G|4N}z
zFN;LGg!{mUQhAE4!7hfZD=-!d0FCAK03flrvn>78&`1&tHX&7cNjBd%Ca?NqgwJIQ
z>6(1Aix2`^K>(~~aUi(1*+Z0owHSO{761TqB<{b@QfFnJFw4{!s}zk?21zIgW0Otp
zOfL*FCEcp4GHzv}-yZ(tQ$#f^pA}X-H?pOvUA&~YO$k78bI`HCSQ3GF>SEj&B*yEq
z)|Yz(%ETQjM4>JIGesbVg@_dlFSESz?yMJvyRGQ+OV}#yLmS4K_PZ9m?y3ge`j)O2
z0)3fuLwT&-3kNbuD=X#TH}=TSSi#&3Il+hN%_I%Z&n4Am43#-5nPvq&i@M~S(n`<~
zFq9s5+P*Y00U6)|#I3=iNu+~#obo6aUNj1E8m#pH`5I;UuCG^*Ch)xrx4*SX*Im2p
ztfpn^`}A>KjWovj2&F!^MD==~3S|?BuFUuoHan@zz8ZH{TavX7MGA<^HbisZ7OMh}
z6h`huVie$^haOVPf@5I>jra&67*liE?*!a$giNlqb+RS>ko+wnmSq!_*U29S%JJ*<
zJUdL?Yt^+@nNhT7RAcm`XzRuH=Z?1$oj7@(78adG?^{0~AKyPO$|1l;^Ia}X6|aHj
zdX2d47zs9q604!LRE7-z(zJ}!<eur|d%_b)F@*TjC#s`mgJD4T$IF6^)HNO3dH?`K
zF#VzvvVhS+y;3$x*_y5yZJJqmQ~WZ5`5iV54du722YEc!eId^dQ}@M|8AW@_K3+^c
z7ju8uGJ^n#kPqD7pg;N40`>Y0YS~%#J+)hIG7ZTfJedKOBnyjzrc{}l^ZZ();u7k(
z?z!SV+-SrCurtv6-~axR$E8MF8+lU(alrPzefy?QoH#M%8o%SO{Ix%Lv1-3t5eq1!
zZ9MLF7JRy=Dilw|CQ5a!h+i!H{#QSA+FxDIVio0TIip<YVwk$tDl>}q)a?v(Aux)z
zjtalWUhC6;`ZN|!xzeWzL!(p798{2oq9b9&pKfQ-&ovkZg2J&PkVvEm)g5+wi)lwI
zD$C0Jm2xctckPcq@x#}<K<}c%%c2tiD^EZDw4doTN+B?|7=UYdAA1A(`RNqBCzS^f
zr7w@cN<S)=k%7a!5To>&EER*mSw<+^Z2hpABLEY)aJcHqhKmXGsnbP{0EQ=Rm6iaK
zP^4WB;5-0%F0aQrt<^Lcj^(*1?_Xtzhbxn5!EKSxhz8Eh&GC@BA7L9cU5;%SFh(xD
z`H~Vdk`rXA%$X3EC1RdfVe@@MHTF|Rbe?=+FUr0dCdp+pVVuKj>w$8~w5<SdJkX#6
zH@To-7ny=RQY%qQ=H_1`Bl!h!iQ#KypZg&OldhG?LnMRNwSq*nMZJ$wkP&$RoCpMP
zF2L4L1uiEn<irKb|5rP$E1*=qgp&Df?P0#K%4^nwQO{>zFRaDj8@2S-TCi3=!e%8b
zz5T8h?btEzaD;q%p@AzteBkDSXyY%D3OdOMbjiRaTUKb*ig!}YGgV837X{_CdZ$?J
zmQMtdK|l-x`pH1jl4pFDPQUyms%P#bd8rnZ)fVf!P;Au+fQ=LgYbDsMm4UZWuvWs-
zMlPXV!h7D`qUouSACM;P6g-?<Q^nkt<OHB(tcn;^{x?4oXs#y8SI}!wjkboB5s?=J
z0bEjf06ccdlYs)i+c=Ct68m|KKbb!iKM*GYajiCK_G;FZ6#`qX-#!|q-ROjuVN86v
zBhF?3uSKvLY~~yaWD4H*PH6oFwK9(f8}z<Z6p5xX@n~b!QliWF7|Ga`A*(D{u2bf)
zB;}``r9w!A@`#@yn^vv|a7oMt#CRpfF0=oV5yrT+%*>@<MX5k>YSl+F(f^VOMP*t?
z_$Z^n7l>-3HSSu>%^P)Y7QjN;s3pLMg%W3bcAJnX*om2ff*g|ZZ}o~&DZQ7OI<qq6
z{C>VKmK`HfRH{4<E9$X2<ZZ<@<`kSGeg+<bG6hdv5(|RB1>lJbPI}q2CLpV4(vr_g
zgbGAz*;<(5{G9T?Y#Kgc14Gt!u>g&{(E<PrzM30QUBq(ZfVKZ><~`yA8}0ws#T;&V
zbDQ39pzGSRl!SoHA{FdSgXkq+l{tZwIGpn-q-U!cohnppYTuHr-YDEHuLm#=7@GwE
zvxPSXVTP@)ot`a&1eVkY9wWOhA;<DDyHByatnp%4Lz?}Y5fp^!yjHf}Fe~$n1BO|N
zXtNekty{$jLBY!}Uvl_d_QQj9VJCSd)2O^tRbV{CDCwI9)~%r|tF6_b)euL+WFRP2
z8-PX$W7!^@*8@P~rO#3#wGPSY9L((f;9l1F6paX5S`apmAd*!d=wd|YW)B7OdVp)K
zy$6lA)*_vw1Tg<vD-rQ(#+wOc{NX+Kw`gYCSpmJ3!rBbL7Noqcw0(`8kPUtfc&5*r
zgAPDLNt;$!1<?iYr#u-5*7cUp18`uRX0W^}aL`U`8{<++gth)zA{K@dhYsX5A~kZu
zF$AKEvjFNM@f%Il!>q20cA{My;i9lWTx$_h2&8fB3$Pw2-2tp2@Sgh{PO&ToS0IhY
zhx?^sqC%PEXwzySTnoQ8p<uCcRH>+2{>C_$KjnZrueLQx7>5AxE<l{aoC2lQ;T928
zNdXtW6Lx4p`lesjnafcQ)}&0pSg#cU7OsUZl37c$f7FXy4%>!d+z*)JAoy`wET|v-
zU(57oGkl|m|H_y{-8OEQU1sRcJNnc`P|zm{*{E!&&cG)v9c;ERt+oLYeUl6(#%1j}
zZYNg_v89zN=sBM9f*`N}z*8ev|1_RNAisJE0vEiV8L^vQMEND=<$Uer$D7~+NEOe%
zmNE`Z_{XTR7IAAtuvug8(6(}emDS3@Vj1On{0z|O!)}aPZl}IcP-^@^Li4~)ExPrV
z9`%qeY(u(mb~B}88tBlfb1||catGvMv?;y4j~s}mphvbiL<GW$NSGCM8o$}9<xzlx
z2M=Bv8-p;!u}kaym?%9H4w><z38t9e=xvy^Mi(zAkCR|fzQ9GB>~cJQv$6y6UNF=!
zem?I6%v*e4ETF%ri+34b3phTo8FMM5#>eq$BM`U0wMBa`b3(1N9QL2cW-3)vb+e_Z
zb_a?|NaYb0{Z``uiT-Wj!&=4o>00iWxuiA*5vL}NTPoqIRkRuh6_S(%LWxur!nHNX
z93jB$myEYrLV)uShGFV*SKkW}1kCun$KNo~d}hGS(EjB-`Epn1I(YDbmc!E`_4=KG
zII`)V{2p7z4s7v6)ezNAz8>b1{)ZMg6gJqsr1Ah5To4fEi}eyKc2F^ncA*pwBmx4%
z_3O2NiB!H|NDG{cfO7o56bKs$7*h4ytq&o9cvoMv7ti1i?Osh7@aCTj3kxZTH|#FM
zXcL~>4CV)*y;4XGvt$~Af<O2JEk`B`A=V^vQrO=&r9Y_&RbTfH+o;g3h*jn2K|IO7
z$<h!v;t#$VTmZP!Z~<rxFisc1qa`c}X0!PeJ1X;@0?2DJ9Mg!&8&)%U#-6qyNFdYd
zDDH&<Gzovm&CbE!wrv~D&(9BSg@6gRZ{O~P0p}I4K)@fq4<EDE#5>rP7zWT5v<dIo
zEGUQkhQ$xkh3~z`36Rdb584GEpQ%uAc8cHGLb(Wcfoux&L7@2Ei@1ye5lm#4SiOXl
zkbyf57vPde1ug;=eo3&@ePO+%uA0QQ;eq%sr&%_dt?*$i>3D!}4Z>$K>31}2pP@|1
z*~}b#Fy(rM00Y_$4Gm$yf6(xJ45nXi-aa!w+O!#dKcLk1>)kHxx@%i>+gtih2rT-b
zpyzK)Ax;b$AkS0L8arQ?yAi`3VqDfRI7M1zC1s?I5R@0bNeZOXr861G@-8N_@&1Ob
z>i#MX8zBsAask3e5%<#Yq5cdhrW(tRf+WkSx1-+qcD=w_9gH`wVZX<pIOG1#u384~
zhS`s{Y~~8Z_%_n@xR41*cii5l8?IaNnSx<!vA!DGQwc(mu(bV3I&cT2WenA;H$Kbd
zIg3?hxv~p@oSI7}2H+Z*B9;ye4`0bvo8mVdvo_Zgc$9#!B?5CjY_gRMKPqgMX~CPl
z7zf(0W5*6}nT%2xEid=!Gmo60#}A*S!%v)}Z+>f`wj6wJ{DJv5OY<|xDaUI?|5pQ~
z3-8()G6hK}Dw(5_<9C`CwQkptX}elR#jk}c3*d=XZ~=IJ&8T7IGc}w5@a^Yez;r_n
z(eb64Gh5v8W_`2*d{PD9HZ}Dan+63&ZKA`Zgul!mg~!}CI@i4f9v+0CMH??h6Q4id
zr9b)9Yjo;V*N<N}o6}AR^y!+btz-0@hk<wDy_*&AlK=rIzrn?P69fg{b#KcFzy&7+
znqeJ5qrLd@H=Xd1pb6%G!-#l9g{o^G)jSeL8+<Me{@4l_HH^gqfV7I~iz$j?L>82l
z<EvH#L;yBtRq+!I(@x7fhsp}$%i6qtL#)4UMB!qf)cVje8=Vz62*8N&<w5)xI{tt1
z<O+TKk6v-V9f!LnwOiX<y}QUXchhGMHJoK&y#R!;fe_pZ+mZob{j4|ozZyW$VRd0Y
zeb{!2MTBgoPk$<nd8R?6WUr84mRvZf^`fIr>E(EW!?)$lb;9dLA9x!1S9DU!$en@M
zca}pS#)!H5CYwm5Z^RLOACPfoo^9GBGGnYY1X7I6SzK1vF-m}jl^O<q=2ox*0Gz19
zLwNu&=`d~k#y98ai;tf2=6|!%re<rFrlxngXLop%&MmCaBcJWi559ML)6IPe=&S4>
z`aKH4^t!8>bm!Yv=*d5a`_phZX~y8PNx`~AghVNl&l##A1(-w9loQls&Po~3PF3YK
zF8C6xX)Fsaa%@IM+#|ODKKjv*_M^Yz8w)8$k&!FRDX+;)sBF`-51?OStAUkJmldM}
zMhoB(K&kbI&Cc6M;PHQURb&o8oyBt41^OO7bb`<gnEY6Vy*;(fo4-hq0sdeOoH@6P
zzVg+rwN+Y~a$LFt7X!Dvd5YffhCcQD8Uj7iaR6TBI{l-WJ99auk%S__jbu@qm7x_`
zh;kGFN;OUf8mVR*+r}U~cF9YEDJag%<&{-Ba71#mIpx=QZ#3JK$6~Y8<!ViOW<0<%
z1ewrEt6(bu+QjYREJ8V_;L*oU(9_SJcN%Vrn$FDvZd;Dor>Ay!VGzHD$2VdE-+Hc1
z+h;rUrZ<ie!9zm180k3XAKY<Ui;f*za-%IS9^fZs1p#Q4CXc_-Uq#MJqfyX8UF5IM
zGtg0S|E*l0*T`kTX~fh$ju?QBk63S@*OX)=S`jIrsJM&rCb$714q#>FsVbseoe<9>
zhtdkzO0j^|0L%hm0D;_SOw+&r<SF|4H&&c>pQY)U-88#xA8p@urHdu(3Cy28&-iPo
z<%IAvUvJat)0-~<5RVv__Gds)@WBVBoIqUkYYIw*U!k*j4jT2dSPQGSI*cH|)EwI|
z3V>lh4ya!;lYwNk5*^5SeOCOksc%SSR|#{73hkb^6_?S1h-DTW5y3gIGJ0Yy(}8im
znQQZ{2N3E1y+1tan7Hc$RoJy=%K7>T__bOyK6ypY#|Q^JMAhrH=<{D{Ijy*PLIBzx
ziAVB$Tx$;?Q}Ep`D7ey@^TEA9YgUXsbND}$@7pkyJ;%AUC7Y#){NsC8BnIhrIJbH%
z!I!Zt2qI5{1t9pFh~)O*Ic^sbKqN*9C{MmXW1E-|!wET|#*{r^^-RL-j*T8~to6BF
zoGQFo*@45t(W8s>&>y_)g#Z}6J%wQ3E}ELz<pO`xq<6m;#A!?pYB@`6X?co1|HaLT
z2k;1ojP#FtbCS#VHtDXnk&g%T5D$nxItxW2io(&kC0_dns9Paingq(oQdzB}Xo)8S
zjRVG>3XC9Nj^l~NEjWm32k&;uLWA$Zw?d9J`=-*S;3s%$aVj$=ri8|k2{<?(X3qX*
z!B?L?OP~0oR|$hMi1fFk)^AU3_b~u4Hjk_j6}FQc?()N64LW;%ik^6qHXKS{AC}j8
z(ta-#d{FQ}*D<;K#?bOchXHWh?=5>l$_#9+wpF>TEzZM1rqgT;hdcmG2EsCXaV`Lj
z62`J9sLWcD%SKCyDbsjm_EQPcdPtf+)+L2QtdcV8OTjRDfCehp3fPLH0)Q`k=_Gye
zi>G}c4??s(wbL=pE+6p+^UH5g;xu8A@23=N7Zm(<+YKSoh5?WH%Q^qn2Pikj8oB%6
z6z$u);;eubk)EzC+k{7f#H7^+)T>g0#ji~J#WH&_6_`c|V_6TteW(=j$g`K^Sm)|-
z5F%+P<&^>P3j+4D1F1g7VvFB#LOCa3J#+Rq0x+)siGTkJef6nxey7}q3+_Sd2j-vk
zmVm;O)$=i<(H3rTk$!dy8cujT`IL*4AE&VcuQedY`7sw_qxU7a@11QI6hx+AC;IG>
z4joMzhtF)nhks;_%OKWe`Yve}yo0#_2m)SGE&!xeJk22_k^LuqelDhEpw8rDZI54Q
zv0iUUl}$A;ZPH+5j~aO44F;aGzuDk}ea_!M`pU95?^|GgXZ8o?pK-%lEy@{_HZi*t
zq$v@Luf&W?`E43zivH}&&9Mgs!R)2!zZuhycWiIa1NTD+VL7pGa$`gEznNn}oD@X;
z?5HLSRYqGxAlqiEHn`2kavCtK>PuoeFcpH#7#TA4S4akzgvUdi*_RD|77cROA6Ei_
zmOd{MlEeOnrxsg*)G^xvY<|i$0w4r7b5!8u$z>PpJ3{B@x;}8)cBZ}$_PIX{@!_+l
z)YXVPnSCjBmX*TUNKqZW73x@AYSHJuU|gh}#sVd?Z?x%O0)m1E?=)dyQ9nG0Z(lKV
zE993~t1EV~vN4=(va*!-*#a2HY{0Q3bi)OR$qSyFd)-A+*?&Yj9XQ!XnVBno0*|)D
zi9sv9Gme;NVqHYFv5|;y*2v>z%Cl$Bs?~Eqq^D1xt{t8lXFHyL<~%+2<x@^GV>Q~Q
zPh*Al55s#szdMcxj9K@(9(R4yBAu#QhPUjhauj_&ea6z4zO+R5+)Wn*gK)y*{cL3c
zue@5(I4rGUzs)sQ6TRsT9eVcbU2kDETP~K;>8Ch=8tDoLq<mSvCZMF_QrnLs4`3Vu
z!0!tXGXXJNX4hm?^)%HQ3nkz(f>^c0S<r9XW!b~Hd`1u?=ElqMX--V3%TOCfJHCAQ
zG(CQJ-jD5LII7j&?xX!k?e)V`1)lniu;e->Te10X2N9)Ea5`>4@mWK!zBWbAeS3x8
zdfSCrsU=Rp=)5`+18}^1_wKQ3|6#w)+YiptsnbjJ>M`pRs**uHh;Hj>yY82Wm7V4F
z00z`@(J*copo}rC8VSh(&NcdH!cdlv1xRl~K0Z=IX>BlJ8E2(I!1x=nVTYf?f9N<J
zK77VAM%xWVg-HKyXZCN;%%4KD@wZ68s-n#1=WKo?-pUq3jO{d=ZJ#N4?6r$WK5&LV
z$^5`A1np)nK2pMc_ZsJ(V2SaVBt(>PDrr*UP%153V<DtTMcD%2VZc$tSf&HxSGWMO
zw<%F4XFFxevhg=R7CG4?(<<rca{~H3Q7`42n;`)x!~+nemO=mojHID)2>}HAKJmw|
zy5YF<!4z!<=AW77%)UkrKBf5;0+&+7Ywpf58h)OHNvm9iEK4A4KKI4OMFj<U0}`o;
zfz!3NBijrxrb~a}z1v(|YQbj;>WOZG4h4Pc0AN1O_z&MAOr#@=O?IAp?l}wtZvDLh
z(MT7t;QVFbrv-vaxqFfDB*|6OjWMKT2oqww9EJpCW}tj7UK=sO)&d9s^iAdij7tcd
zaKS!5hyTQ~kLvrhUTA$c)Ym{lir<&6v^$wLBk4t)TgBjcpN1Uv&p8;1xTVmDb@h8~
z`t-wCXmkCzuiJeHxCBnO+Jq}kO4_jl3yWgiNF)h`g+(jcKOs)vC=FsOo{a*~96NT5
zR|GPb#FAk6#j@b>l2Q|vH4D%D3pXN-8Btm?abZuzVHQlf2m(OdE1lL&0G^jG*Ae7d
zgQH3mg2w;Ezd!0N0L<b;Mqe=bcZAhwdG#c!rkRx1X^#BoD?Az_)3WuZ7*R;1&~{F3
zMb5$U@-#jA7_AF<%5us4r7N&iKu$oq{<=2ZcB}P5zz_Xdq1y!xHGP*~rb@LK28=6g
z)5z67jVTilzs_ty<1KFw&JcUzk1M@V<EJ)hAglh<faHr_zoY7qwQsEc&j4SQBi8bo
z4W=Dk3?SGCpFhmuM_TXpFqvm+yHCzZ(^g~oI#HPJ8R2>sCA8?s_?Pn@<c2QFg>|D2
zue>@%Pdz={7hs%7Mu)&ey&IkEGYr&CKYrWo(@qd{-0<KM@x--C=APBImI7WxG?s~r
zf?CW4;DvTa4P)6J{NTZZ{umKV5=LnGM>WUtK$@x*A~xCQv`zrXdg}>#<AE)K(j5Td
zT!;`ru<zkVj=M<z={WR+<er`W4?-ZXLr)rqG#KCPy911VRDnr<MjY#Vwm3!(!qZ=C
z((^B@o*BtQeUhQ-tbwfrgQg$feQ?^fc`3vLcxE6~>9O{>NCesl!8i7Zir>kG*sHW`
zJrarYa3)Nq^mGoG<QRdim{mv&>h+ezeI01oOkk}q$E*>^0<puq8#ZkirvAsB$^Z4Q
zpHG>6?WrAp=`IY1HvDY3VuDm5Jf+SNep&jnlwGp4AdC7g8NV{ws#d^6D*<Z=Jn>|G
zP!N6R48L++|E;R|0chW#wqXr{x8Kza8;>GMsNbj3k_rbImn6QGz_34Mpa1aT!*(3Y
zv5h4L;2MjFt@z?P*q8Fjtsozp?&d$fuV^R8Q(i+IgwpiEz!-et_gK5xyq^GCtQ;Kx
z!NWotW?ApR{4dkVQ!C!TZZ&5dp&{6}(?|Ln%?7Czl!U(-u%%g4iivnB=yfa1ei?$P
zL8MXZ*ku9FgzdLAk8kMpr$bP1CD$XD6_?D<(f+Nf`2njfJo?nrXX#HLK1Q9D`IvOo
z7s`miik}=;$%@OQBInMX<A=;RX10!H6aX%O3*7jKB}+=9G92QM5k@3ms~Gzx-8N~?
zv{-B*#t2%@9QIZKKc$n_HyvOuLen9UMQVLq1Hlhk&G%)@;s1T4_4d4p&uP7W8gG*$
z{bhhI93(sm>6>?ArgXiMl_{mQsMcSSqX$yvaD&sJO%@vszryVlefIOgDgX|^Kr%~`
zcNhXbw#tgbb+Fp><IjKbr2ku9aw9&_{>}yH_ic=i$e^D@szzXreWejJUy9x8QNvg*
z0GY^8Auv;#L-Cp%Y9iJw*)%e2p4wb<kOPfSI_~I_k-l|+iN4&V69mu8dP{5Ig%{`P
z6K<FfnzIq-@cT%=)BN!NvG#oC)zab-coGhJDko@AX%-o!R?e%i45ILsiCg^5#3dya
z5EMM-28JGeY@xEOIHLSuEM)y{wQ+t4gXSP0HuG`Jy8P;y;GVT-yr4#amKRddx&MVG
z9}s*2o_6ipWibpmE*J~8A+3%cJ?brhpxL@F*A^_bjXV^ZEJ}v5n4(KS!}*)t&SI`|
z3%LOdQV7Vn059teOJQzq&I^MRZhYVGkLWf3jy#9IiC|e$wPY2uMPMdRil<c7v6vJP
zB!6wSoVYL7a4_ONrw+(>BOr$1F|0lC(sy^#i!bK7A@DfqXDcl^UI%r)zn^&Zd*7v_
zuekoXpr?1w5SWFKW(y6VEG|)jP>AWAY~mT`0;I1amIWUfjALh@bet^!D=0%)0%U5W
zpTLGK2L2>XR%T8F^ERv=92REOl)bfphY1k~lzuRNvE>&8L<hr3?mw*4KRq4&eX-zV
z7Ksv%f}^d;W~MX)q)<@Lkl^Nm)+Cp!TnktlMqJKWnFxY``C+Y~?ct4<4nJv~X6rhQ
zzmj${+RCdb0l@s_vSVNW#(DbUmri&KzzgZAZLXh~e=!?pT^b@03*jLbMe0J3Cy4lE
zoVX$Nu}Q|U%msMx!3V2cCmB{{_}R?pQjI9HRKyDSSEfx0DdHIfJG@3eYAXW=0>SL%
zupoTlc6%nGqMOFn`^l<drzjrVifvR}mx4j4BY@boDHLPIwxm>-!gZ`u?)VhT!2d-6
z4DJ{YPs&GDIz9UA7qD>3*2?Q21Q68tlf#r>e*6?YeCRd5W0D_^bt5t@ulaYl^BD|H
zC7QRZ(sL<nGi^s}!*W2qTyc98Fc$0|I&>)YD0HIK)|sdzX{jS=Cy8hIDJHf`dJtBs
zaaKUDzf9wZnvMft4S={H@yihbEHZ%<-$uhYiy-Ek+niFYWSr`*A(@@0tglis+9F%f
z=2GcPC4njgf+dmhX8od>T{rxNVqtV*in72l)a|YK30%#_G%c^p(j%YW8Y?HQ1#-J_
zX!}3;Pp|l7ER4uB+tVQt3+pXlI2L;XB)Q4VT_`YtGb=CizvA<_7=RC`i{^|}t&JrJ
zh!{Z31dLaeeN)jX63W1!)L+TcO%`u;T|lxtMA-7Kz@;Gq@{T?Tjv*S(g1|&g5`utX
zS})R{Aq=`+7-V6{*h~7@T5WzN(75HN>+i_GUNM|&;j6mgyv~CEMHXTZRxPF1uyPJ!
z09c~{_t7WJT{wS!nht+uYYPE3{qcP;9Q#q{??3;-LfAaD1%H26TvB~o*t8~rqFiMo
z$-6QLtGqaO=5uJAe}y0*0_(BdNJ)AuslaF~01-_5N`?<iYYKwXy{hyuxw1m@q<f|o
zBVh_`7>c=+0K|rv0DhO2upz|?sicuO<1YjQTb}X2T+pn%R-$%_>cI~9rJD0x#`?AI
zy4_{MA}`(Ea#&v6x23voU~mN%eFZswk?Vv8{pNE`=Sy$RZ^R=tu+A_X`_bS3t_#q1
zWBT<rn)Yig>?mekQlZg>y;>7ik-thP^!w*zz!6Gy96{2N2LWXxk;LFS;Vjn95f>i4
zCJ)T@Ny<Dq15LUDDKR365naX~C+3hNtu_V(r>O%1q^&!yJP5+j;H)F6<;dVJuw<HP
z+v$LVAPg`V7-9oqFS~5Rn@~Qk4&*I@8Y0qe6~$PUjRK`la>Za+B8cV(p)9_iZ6vFs
z>(^=Mdlvv2-%^<im-p6#Q25GMiMDUEbnUgYr2rZjLzD3L@#ig<M%+Fv24#&;ziy;n
zaey{0W-DS*dbHK3XQ~_I_Fz6^O+ZWsP8PtHUlo{V1L{mbhHRpiHb5q&{9k?|l#r>Y
z(vQmKZ?-k~ItcJm*|nz%)@w=ln8r+bhFtM+fMrUUF;U!^FkFa$6joqrdQ&_M7s3i+
zD*?ed2*qytTZq3)`485FbD#W5!p>iZsS7-M^S#j&8ZLeh_bsqgHn!Bpvlt3%#o(WG
zPh`LH^Tw}8^yR}1n%l(PhDrd<K!738_vOQ<qRBrM(yx6U#;l)+7Fa0nM`37ynlGTF
zZ|S@R;pm6Z{Y|(4<4Ob?$1tED1#p2tyL<O;>nFm*Y_&9$6o-ov7|N0}#tTCVv`N~C
z%gJX<vI+{q$qEpN^vofr=3YvGwudFLDUAO&jdyK2oy9Q36#Z$_XcTyg+QMmmpt6C)
zn`Ax(B$rEqrFTkAW_Tl)ENWS(`1SGoAwv=0c&Z&1?zA<`9?i9M9NZa7?^s-J(Pv#y
z@V(#H@cHzc37}alsQQ_QUvuO9D{)EnDbM&2#`q3~M=xL<u{mRj(!jUXG;8YUbO1z)
z09mvKLg4DFuZCMa7U?*00l;R8YqU9f^r#PP^?S=NV^RofET$wj+AR~Bk%l5AVIh>f
zQYN83Te(j0JsgBZ1UCj$=+Xj1GPgd*fSP_?|3Gtnzqn_=+x3B}#PqI=Y1C*V$%quc
zL4jL*hbTKKFLUjN?Ra>VLV7O*hiOgcrYdWWsw1_<_hGP+bIzTcazVirdgnXFx6RjD
z0Ac&;v1P~nC;bp#BPRDC^}6lG_#p`LS__8M`W!N)x*TYcov$OqVvwe9J3fLQ*FPK^
z;f}>Qb`}7A^P6GI7&c>U4U<ZSv_;f^AV{K$zLu3jfU=<>-v>c-^5n@&BI7JWHY!63
zDp4uJ5Lq|PsbC#|34QK4nAK_LUY{5VoRXSXWeS!AVh4>_KW};Emt4>4IZo_p_&oqj
z(<sXzSqL^;Z&p}(3Q8fM?geY+#g|&NcTbPr^rnr^4?sV@^5kpu)YCnm+~gNp@vAHa
zliyofMz+f;!#3c#puGXn*fNhT-w0*hy>pIt`n<;mcxoIlj`aY{6<1tgo%V+dfFKIy
z1e_&d;(JJ)w`zKDy5=h*l97MnYne8+#N|vY)|A$&$m>?wv|~Mx;~^P0xsz5{1yTDp
zOz>A@050UE&-%Ck0$3r4Xj0Z*sL}?F?(esKaH|!w_b}}kK|e49>Q1qmIM<zjWn0v@
zvqUPvOSV6KCrVrccjAdZZ;WJN(e(hv_YZyc2tDzYIZs%v_Ea3--|d5bm~omXn6aO3
z6^Uk?THWU)2yDuMvjuIEd=3^sf8{(}0KV;9>anB(k0lfE`0?X`2BLk`>o1>AHzg57
zqUCYVSR!AuLLeAYwq-(gDImm&_n(EB#ram>#IqiZHZT1pVeUxe*uq4zQai<%L8IxT
z*Nx)O-*1Set>t@|K9v-w;y4%0AerBYzj)6iQ!bT7`!I8Gqk@9x&v)n#KlTDW^Xy^>
zq(XSQA-);U{4;*oFY7B|sVZ$2U8TA#KOt--&03)ZsSpM(2z!pLXTba4|9*SPWCFqh
z*uQ_jKVDc^u$`5~BjL^}HjlEGLRq2L$QFd8{CQhncN#_%z>9HQ8Y2S(Y~~y(6|`gX
zH9k+j8Mh@N=TocgQY*?C@r_b-rns*vBjp0rwu$>!yx&8^jaanl(|_8#poK-(11C-{
z(eMA^^K|lb&sjTdA6vm%3z&G-^x2GU(!+0B(57G}nZo4}c#{cPB&rcYul&q?>YRi1
zeCEs<d*sLwdg!5tqR}zht-;0-1F){K{&!#rfel-jj8Y5;@+g?x7IkH*EYpJO+)x0L
zY%m2CmcUkBbtZsJn##e_q)jbf7rl&(l%`cP#5jq*W;E{XfKMhP+BRDjvYo12aamt4
zj#Je}+tN-5EG@gB;1gXJ6dWndC-~}9XS~_(lbM{mgvmTOMzkNnKfi^V&E|duY+YY6
zuF7v<jIaulC0q~7OJ`m#R{)NJXrvNx<Q4!VlrvZS@!q|Ay@R;4bn=KUln*oWS8vym
zO{Mo`T|}f%JAih0A&#xMoi~F`e^w|Ip?Rp9EfHw2s&VQ)rF>@AXhSMq(qD~{Vh8*Z
z)swvJ3BSlINaqT?@KVz;>ge}W`1}`-JLW&?Q^w)zBg)Syy?!3QV~xUD31EaUEc&3V
ztXg?tm1|pzmsoGN+dVr!KW`lm*&A-SVRdP75irhBt37q<l>ePS_mU6*nJgq>NdhP>
zfNBsdTq_fXtf(cec54UYjc0dlT_KSB-AAy^YE0xZ86?h(x@cphz3v_IOp6p#T&;B-
zNPi<Eo|OqMR1sH{u5(!{etmG#34v$6)}ZgcJeqW21p7Yr@$b4|-|^@UZuc|zu+R!G
z;-On7@sMdXt%(?=gkXK?{fn!V`pJc|V<lHq4yDhY`{og(ghw7i3?PrVj1b1MDlp){
zfdlxlJ9g}_-~8t99gz$=D0?q}W^J7&sfC0QXipoC@Kg1Ix&m8S2*|85o)naOtc72(
zk8ir3#_@4(`-yN%;mJ?PgcYr)l4#Y>2ov54(X9o7DV{ECl<BZih{SK|wDhBoUVKmx
z8>xKs_g-?s<h&m~o$}kK?V{<K-7%lPsVPwg?ue*X{Pt}{H%p;s6;?8J%K$e&<NF*1
z%(Gwpog*;!_wCzfUyLiL_$9WsdMYrG6=MMYl>hp+msSPJA6yf;1SSSakY)PtAfn17
zTCzkvun^d?qXRZCg(l6*EcEif6)*yCmPk&-G9s9&iJX!ho1zfuvSex-LlK#%wP2w{
z&?Nc9W+us&g%<GYyBVx8Fh!sKOiwK=x*quY*U!<%KJi^Tb7tNTcTUgl4r?pAXg`Md
zNNl8%`WbU3!kUSsa43mS+AVS^I$Ka8yZt7sDXt*a7t8|iO56v)o;Yy=y8>DK(c1cK
z-7uD6Knz##4#5EIan|pwd@U6a1foB>Ca$`TlcWg9W|>ulD@kNnjPYWWTWNHF2_%??
z6p6>?Ky(eml)}=;a?Grfnd3+T<(OhB8wy1&CWZq`v1(XIm>PUbdzfD!K4J=M7{OTF
zzG4V>aj8v@d~W^mfIs{4N&3@6$7pe3#Rd0f!n%r>#tY_eO(o`6!jA=@Ev!^#c%%Dn
zOM#F+nh?51^ecVO*mp6^>zD<~6576x5jBB>3zL4Uz*u4cSQZQt4BLaD6Yi4j^_GuH
zJXSy;M4G0hsp$`6S+(e8Sy%ONO~875_*-k4?u`Vb`f~Kx)Y#A2Xx4%>X<bpS^gksp
zruw9W%~B=fnlA|=w)uc6X&&PJ={0=4=#MQ68xk~u--a{eTlDx7!wCWJ?|<TmD}Tzb
zp@1m=?CfRU?4R<hDz~Y273vZO^eNo3Bxn4krmNV1kgG<&O42&_)eW0}4a|-|qyb~E
zO&Sx7B?iFj0%8(F+#amg?JgXllDO<3=C<M_-4Txn$|OPDR-BaY7!oit03m>-t2cY@
zfHF0sxn5ab3c%L{-ejyG1rJ5J()9NNhDeW26~szX7&C#3GDcpL632)pTdS=LTYh}m
zr~EpusNvTTF!b%`R~G_E?)lx{dy$U3Jny&ep7M*T?ukoxZHr6CWyZ_z%v>(Dc5X?5
zW5$KGN>DN#hbsG8aESib^|jkuID#}_2ms8g#pF7X1Y~H0Ft&;E*wYL%nD*@1(?4_O
z%=TAadF;<`nEi{&<jYzolB@0KNiX8Qf<f0q4WyH-L%oZOVWEumnD_XQh`^3xKl)+Y
zv9hw97W_bVS_8B9gvqY(pzs+k<g80WdP&A`k2h9Dy@PVo^L2sKl;Bd{H^sAqqSi%O
zLtza8uAWwr+ws+Z_T^>THrt>*d#e2K(s$;at8mQWx#ueit0~WfWz^lX{oXS5Y<*jL
zy`|dsRoXA}F%0Q%BA<z_jS@5iAv}Eg)VE%6F7)i#vuEcp5`Z+|ajXRZ7>fYFsz5MX
zFn;NnAD7NO`}A)=ap1rYopZg|rAv#2Kk8r*WtP}3Gt+FIsNxKe^wfG1e}H@*9Vj<~
zyjU2f9qp-kpNsGR+OuKW?jQ*`5KlGbJRse)<*W14d21f^*sTaueO#FoL^VYRwULEc
zWDZ_AxUP<@B!UHDT~P3`$2;`i?{CtM?U{PM{P;=wv&T=l`%Q1n&CFaDw7y>-VG#cG
zHQx@|wt}9Bs^V_VAv!iEtR47XzV_@>zy64GL+5bQ(Vcgph5tXqM;;!=5(5A-FcRQ`
zY7hWD$4J=SwCAM!H>#!O*yuVWoT03V5@E&9QZa8Y9KiUfXLEvsMtCwj4sPp>z>@h&
zT=KQ#2IT4nk(4RxRX)1Lv8`q7;>pVQSmot>TQI)zwHw$?sYPFUG+1}=_y6?aSDj^W
z!Y^BfOulW~_T}~Z8g1E<wZUq0pWDFDDDa!-|JV6NHC;6|>jPl4qr3QLj`^Xn`bYu-
z55>f~(W`$NlLb(wEXF`!e`V$TV@?v^T}n^!U_PWe0wnF<Gc%}ILiGc`cHH<Wu8T_@
zzu1KId#Q{>sh0?C>7HL+KH-<?N>ghG`Y2^Dbh+haY%gVNp3&Ff$LcoK6sxHt@JTHP
z);(DCwDPw42L`{M@^_(Zpo!Czs=Nj-vd}2q@K8@!E$PH5qAxz$ac2K%I&*r?uURnV
zGy8VNK;Lxq_pNw8DP3XKp;Wn!zRk%y%1SC}Se=W$Sg{Xa6a^@|uyo>2owDqM@tq5b
zOh72pOz}PyV05Da<B9?J{_NhpyMNVHSM^;$t?x8w|NQwQk8Rs_<xg>!%Lr^nkf~uy
zTYoiuK$Bmrn?avBWT?|!run(k6p~?j=~*trgvkLj67R$`SPtgc{&8PE<ceouV#Kqm
zf5KnphG9|7v{8obR|SgAZ*R5QAql9WiKXwED4DDxtX#Ilx9p`dWz~H%yxOtiJyx#g
zXsUJl?c2)r9c^l($UYz`lDK@ruMJy{z3@_xmY14-;?4{<=7<|~G$Zc%@qL;3;y+fl
zLdD~Ao0a^|I5gs1E<CH-Ulph2+gKnmOP@ae&Bq))^jK+vhY(cf!1}0%0*C&-7g%>|
z|Ni~0BS((RY}>YNw%6<Jc<*2Q^@p8AzFJB@CLBp{3PI{fsFmV%Omw#Lg!Sv`FD;%*
z&#D9?Q}7q%`SO}kLXCdgXWR2@?wPnshkmc|%bpHGF4pveX41kcSx%}f0+1kP1hJ42
zC$l0ADZMJL=iW=L!x-jkz8~(W*=W{~Dv3AL3E!%K;pZ8<GEi2wx5O*fBE))zWe!M*
zlZqho!OW$wJ2YO`4;ytf<1%G=v68y_LL2c654EBlypSms_^LO2be~LxmTiG|ldRVA
z`@23I{nVfQ-3MLlVtxU`P|jF%BLJO;AAYzujvN5M*hT?>=ns701AUk;&W!0fbEfA^
zo9@!$iH~jD{<@!xr4s6hG+8jhr2N;$l>{cc{3MaqBv{mfO+vrib5KvIG!qH?nVCci
z>0m-R5F8fH1cW8AqY{ojpjwW+;Fo?s3xF!(#H)N=4IjO3m`0_?0nbFGEp3a6wC#{B
zCoOuzxfTS4j#f7_gAhq!o%LH@0#NDHf>E@f%>v8HhSc0cc3{T~;${PL_M=<T;`_pC
zGNK(O3xU)|BlUl&d~{oLFLcVZskBiat9q>LQ?6@q@%YD}{T=W77yxb~5ojFIe}-|c
zm(AVmJLX4c(5c=xp8c)gyzB0t`N=wVW=7->l4WaEV<aN-Tqh*7^6x2s89x!G?X@;9
zY%4<t$rK~1SOZ|@M#IG8aI*=1ba5q&F(X+AEAGhmaXn#Cl4$0~db?fg%j=;zDi|y`
z%&$4$x5+9YY0Dbf(t6PtN=B@u_l5RoBF&I+V{G*WtlUXFZ;40<P$D923Y%)%xk{rh
zQFX?<W<LmU4A=Q(#~mKoeITQ1t*;?{Y@7P85m3;jkYv){O2JPNoHtVV6@6yfOqPgf
z57+UX7asmY=PLKy5IU4)|M>CaeIlZ94FLkiHX9I;3gj;zJa`bj>+jmN3yBTgBS#LO
zSzbQ>urH)llYXA^2hhP2qMVgh^SO=wvrP1o%l%&?9VZ_gzN-}v$0>!L0a?Fsl-Gpv
ze&fsLtoQ3NmTic)DQAH};aXgsSZ2%kGHfxBxM$gJ3CG#LrzE7fC*e0TGxyWxUS@e%
z)rJbEl%M5tMrmIqODl3V)fpv2Y7=oXO#W8seN?SOmMQ52bzn0R<S(@18hutuahsnt
zWnHRrabSp4e1J@)F{#aEJwhPbgaLhC*5%7oG>^0O7tehBJKy<}@3`UN?)mfQdtiR>
z4lL+#ng4*Xx&R=CxKSvgnq3zI=sL;PIez@<-@N9Ud%rIfB#lfdIg?7JywD^G|J;|?
z!KC~wDUr>ukPMJGBx*0Mj&*y8stjmj!_ZIo=vY(}YwqLRZ=NCQ(i%SJhFkRxtNWlh
zP^d>EFWIDX>T`s0qge}4F{Z900l$NU4aNaZldq>_BO?e}as-j)!Zeg-<Yy(d5|NRu
zu*p2YBVkX(ZVaB98ocsT;#+Z77zZ|w<?|;`co1b0j%?WnNv6V$F#Hu{o1(yz@rzWS
zjpa=0@AJ?7@vpk!zpfiH$GB$ih8u3^Bbja-GXN!w(*i)-`%VD(Xr>En^yla25zOd5
z{pY{>Shq9(c))^9NH>}*L|v3=%9J+(@Hnw0F<vgTpH&*tDmvBm8B(g0k|7}Kl0<KV
zzaPJD%MUxX{oZ{()6VAhWp!ai?N+=xzCRwD_`VV>iOz@-1nD+$U^1EI-0!5JQ)Q`5
zRP(0lsS!BnYkgT$MWqi>7^v4N>p*-MOCE<^hf6PHTS%U2h-a*Lv5<;}1)J5f$Y{kP
zzoXIN{#*~YHJ+z*YLmaIoB>EABUj}+h-5AS$39k;PJiON-~CT7xl3Ia^zAyozwZQr
z=baknAE`7L>)Oj`JhB4aA(l5r44~_-b@2DCZ~Vb8_(BTjC0ANv(~pE@V3XLjO1tFx
z1Yt-CPMT;GZsR-Zgl+zaf>=!MWBN#u8F7xqB@n>FZaID@pR!`Bna)ZHWo=?Iz9%Es
zJVjNuyHNW@zpDLIR#HJz6^ql>;mp*?YTf44ha{v(w9!`DW`zZU;2x(3^M<1pCS`^)
zxndJtv-~cBL-8g<_3hRKSsc{imrK}ce@J{MOPcxRJDz*~Km1#F)O90e9j7RJ;1x`!
z<DtLm6QhHTSbQBF>ARu8o|9}{7vc0m;5*;>)OY6RU-{4wk0-@LG8;14cp4wtLLgnP
zV8Y4=$qGZ!Y_&y1QnOpt7E5g2HR8sZh#_=V=Ditvp-`teLPIGT4O!dU$}?1JLXnn<
zseY=0GMD3nj0lOVWXY5j^7{wTZ>SGA!rTYEr_~G_isouG^Jm2`S(}tVNNuUnzPZi0
zOt!$pt7^3Z2_;Z-FCN%6=yZPWl@ERU8=rUy=0AADxyn8Oc+*Wc^=(xnjvNe)8Di#=
zapp|p(4j-^-Me?sbUK|G7Z2EW`Q>l9>aMr{^h2i6ygIeU7Oh)N>eBi;5XN|CR<BnW
z7WtZ~=Zf;WuUAkeFT`n(Ifl!9PvEc!uo{gihW2XPRa&OBjy?}+cLd%|STb211cPh!
zy4LzutKA2kcRJ_%C{393)EchNf$xeB3dLQ=qjh+M@CmNQg@8%%fQYn=vzop*{2%@D
zqyOS>oIU&OaVG#4_UzfSaOTVz%tiNcfaw2g8*9XwfE%*?3Oa)E%mW7w^rxn#I`DJc
zU+2V$XHOk@`Afej%E_ndnL(XKO{dI4Q*({fwR$=h)S~=2p;1+77S8zt%V3lr#<j5~
zR+6TXc9aG4Az&D0<mo1;c3W|5)+w=V4Soi^s{T%3lUhH15Xj(ysWe4_$ANQ^kQaf?
zPup>tZhoeUamIrjgD*<tm`B+pQ(Q}QK~x*eFjVbRQPvi;Qi%;2`6w%faOuq1Z~tFs
z&OCG4G5?AS_^n{x42ALkJ`MOpW~;9XTmm;3!x{Vc@Av+V^K-h3i;F8x2&_DL_}@G_
zzwp{eh-9Xi20M^BP5d`w*{ACkb+ILtzo%f_;uEsqG3kXSl`*MotiO=%MiISC6tN`3
zets98xa}0KK<oIRq3x3BXfmBs(9h}`Gb8BwspygLLemAKp2>A_ZB%J}E~^5X1%b=i
za^j!#J}`Z+7y1kEOz#4xwJ{n=4>8ulE{@F5E6Z2<%g4fXN;u{Fb=J0w$;i8wG#!*U
zixP!tMA9q27c~EcM}FxK!2{9v$H#{@=*7AJV;%mZ)S?Z5Xtv<6aqF$OHeY+~wW+10
zrD-?bIqL)hT!Aa@eEWa<Q5QYGS~e|zV4X^u^%65^dQ-T<Lpa3;_35TQ0Y|IW9Ar~s
zK+|?SNFx=biK0~2*+Ha8LzV;Z)MHnKbb?bK?g4h&1+im?nnsJL=yQpaj4ho)OBwe?
zn{%EdvKjGA^*tVmIeM-7R>eEFNwzVU4UzniK5fGWrbwYSM%5QmfoEz7QsLNA`c^Wm
zV^t5Sz*qSj@dw;ASdfROI<6h<nr|a%!PI_#jSmvnHGLUx`v2LN{`ucLe*DjmxpDtR
z=l3r<UV!P3Ad$BM$_aJjf(>y2;*g-<I{cYup1}}Dx7}`ccI?=(0`mv1z}KGs$Ukt}
z`+U~VRxpy#MOJ@+%Da3)snxZ<54V-krhchXHp%id?wc(Own%eJxn)TsSgEaZ{ajBX
z9CyKQLEWAWZi2hi=`7KTGm(+}!Yle3+3uv!Wxl!<eau2f;!vhaOD&j~h6NT;h25)m
zfvt*QYrQb@mQ6P-w2IW{ut(u^PHRq@RKA1XPomZN*cj1PLrT<0?aPoDFiB#8dqcuv
zk)ngt^YZ<=Y|(G7mom*zYG=f`?VbPH*Z$o<ID7VMC*6e=Si;~1=N@<7|Ni%T?Y~jX
z@7qQj1Z)MkPBxx@{`tl$ue{Pedi3bDGk<1W^b=8+ZTH;!)BnYlSKal$E9GgK@oYuH
z>laXRucXFK6*=g3Vt_~VTBRAUR!4q72_{?j3}uqEMH@j3U<zap`f{fFT&d;3{5$~(
z*Sv+`H~DP(6lKXgm3LBR0^M&!+_A!t&2!}ox>goYsbcvQ%<i`h?<Mn`_a5XRrzz^-
z?<(Kbb;8akQwdy^bWq~C_E?mwYqqWFBR}WAjiV6=Dyn&Y5G@SDsr>F^uRi(HfBu#K
z<4>F=w1_`P`-{#sUdAXHh2Y<4Hb58>Hf#@O5ex7`9G6{o84@1g`@rm3cCNtklTZB1
zPoJMV^5LKotdjU-Nslp8*7kgoJ}}d|R>`p);`=NAtgOSP@(M~nTwW`KnBs@)SL5#{
zYs}nd*~f4X42;Y|zuPY+9=T)OMeqy+8)68HesrOS2}-;Mo$MngTroOXASB#a)n<Jw
z2%#{FvFrtDXJsM2b0sqURNPf~)<^R56lRGBtG_Gb*;+jkC!Q-AOa}O*kWPGUgq39V
z_q4J?E9xVurBjnclP%Q-LB%yw{4x#&`4VW`G5@C=FRVBhcqP(4vSWSHj}1^#oPye1
zf{TE0P6nz276Mr45Sf8U4~7tO!m!bF=8*Z~XMg@z?|b(L&CZ?s|9;Z29++b?0?IMg
zqzz=o7ZXzC{OZ`hEmVnkCRJ^VI!PUKJ(aXi$xy~<rc?FH*5_l@Qc*a#BTd-Vq`T7h
ze}0x+zh69?e?wdP$)Z4tZGkS`&L~asPOPLGB&Dg~j_nWG^=y5J?`Nx43TGAO?_**)
zLz?4_*1QQPIc8xsVspRwU7R6kGhPK%MPXUkaFUa)NQGwQl-_0157P*a=gxii*S`3<
zU--9<&Yjo0yyV>9W#{*=+;GDU-RGWruJ>a<_GA6<+^YP)=Kb};hFJg%QNnov@ZyUv
z_7DN3;0rA|Nx0nUbe4gr`SVA9O;brlmF6=O3DQXBEZ>_S&3wy*tlFoK1GkY*fU#1A
z)jpe3dy9LK0fSZ*!bvt%5T?2w+bqkg<3=-VZ{3)V+IYsVCFm^&KKI+Lo%cT!jvvPl
zM<NljXOWaGBYTzXQT8~Jc?g%4b2ga`nHi-|Wggk{&WfZ+GEU*_aX3fk-2VC_zVDyj
zKfZr^zaH<$`}KNy*qB<0=HW2dl@8tfTd}{kaV5PU#T+i(*!fk;FsAydM5^lgYOzVV
zx~fMnZ?i(kh{s|_7Iy5IZWc&BbU!`#_z(Fg?rV@%vvhfrufZLDh+O)gxCv$YmZ+8X
zg6G4{Xc%&ihGvlZHb8jp7*P;2$||e?cxdoKJM+T8!Hg4V5p|aMe8l8f#^oI0Tc|&2
zWRUboC(@yZ$)unfR=9N3e61?TM^^RG`GPG=m7SrRyO@HcPwoy@sH7jwDL?w9Y*kcK
zW7DH@wYox*zc$Rc+Bn#<)>qr%1!8`Wq!oOul?+kIF1o+C#g30~tN9gmpRY0HwbZS9
zfcxIWZK2$nIuDJzC)B?vEN$UxB@vZ?Sz=($_`B)Lj|YWMCFrkj(MZZ3Fk`_=?;&PD
z;uh;LZUWK5`1BrE#50#D#FR?%RXbMbwoYmysX6j%2j0@b&PZ*}v8DP(Do9$#LG!%8
z8;UhP=U2r0q@>wAw6?`iNp57#6q7mJW5U)uqycYTS5a4x*-q2c3Bk8%Z5DEtcB`kB
z9=&w=z=5BX8lUti)lOKa(^j|Ws0@)1#m4&neUNKZ)<qor;zO@zq@G{Y+^o~NfwM?^
zfELH}sOZTGlSk}E!Oe@xdJ?QA4Bkp5W!i`DoSwso{>o5$P`;>9RN;c@xx;G5TFz>-
zAjPXCZuGlM5A^SI#|4kohIr;CrF%8GJLSHTwcI-d53BOCMWJubv9z?KmDQXqRPI%n
z%cf?2tv|YmqO-r49Z=h@NWd42CTy}-lG8wuPY~Myu~QuJiU$jAV3Z5PSJJU6Z55XA
zhRtf>5qiZ??&?7*AZi0=A*l^IZ;|Qa0fYS8Fa_;Cnbd_6FL(5P@-EeGlNBn-F_L)p
zd|3F`?et&J+8WtuU(4`%o`<)FB6fsQ&Ms7St|Q054&#6Q4|WT&4H%i3rN-B4OuuCX
z^`ZQvc`o^a)4ICo0+67anSL|EFmmJZ{mskU>c2s%yV=eI=#)Tju7ek?>lsSm1<ma^
zp{eDH{obQJrvwZ#_N3&w&Q$c!t-Ok&GUlfBbh2X^z&UomZLZr{3@Za$mFeA~k0UAL
zZ*Bn9pYpzCER$M)o3Q=!9X$K|>HcsS^-Lea{Ke|8Q;wqb$g4!vGoti~Hl3zFkDx{-
zwAgdLln^6^7nd{7AFUN_x_gdfsb$wFy^|ME54d0zLDcmXebw}Y4GdGCD`3v~e4FTp
zkHT?#J(M)RVsEBgBR$i)aN~8ur2)8mR{%q1W`dlAvjED~QEIA+!dI7C4DARN{xV;0
zwtt;jr6crbEKzqi+;;BQ&x)?&Q5c~{CSh$z#zqgq?T3W;7IEP-z?jfRiHVI?7$-xR
z5}mn}mLe2w=oNrT3|C;Fp$q#}48O*HE3v*GS{*iW>EH?FO*f{3PHQ|^B#$7sZjE53
zhnS?JMjT2x?zOTon2*d#->{V7wN48IDx0&cMZJFdU@)lL;^c=T{Dot(Nn0_40cO?p
z*}<O~yl<i5Zf^VFEv@~+>WZ)C6vApyuYIwd-N&q(x~)O2BKCtWGeun9H?WwQ|D-Go
zD@lGK=C%;Wp__^cgky2aZa$Lc4}akw1*H7d)Te2kJt(>veiSb}g)NhpiXy0i?EQ}1
z0Dv^`@3z78+5BhXCnz*Dbiv~|VWPftp@+Z`xBPEYocgjn4o>f(4LAr?daIoY2AA-b
zgWg9$-1zO2_0JDk<O0U_t962FEvvaq9IkDh6wJcXT&}i+kz>DHnGSZ4%GrZtTFLT$
z188MNRWDcV`wHLYv1KdOVsJ#5_PyCf_L5-C72nphr0v+Q5)nj*DU4dvNZWi|sZD6O
zui%%aSvW_zhg)p%=e$O$mq2Im#tio=7#(L2@?x|O9eDtM4cD&xw~6wv45`qI@7{QU
zibC4n=K|cv@h9wK9@A*x#|UBuZE5fOMuN+iDBudNXz%tFFsY4f^wK=Bn2pii{vG-S
z>*aTbuQN!NGYSKZye2a&&j=4vFLJax23w&XEb*C{<22j=@^~n`1@X(=EeLo=*4alj
z)eiVh;Lb~BZ>Ha0W4MbhOV;bj$qpAX8ntDrwM&1?&oq*heH$|<FsFM@_KDMGWy~gO
znTr1WzLKmUpl=N!Fa-0jAFoj+=1lVv#9O?)Pe$Jo3X`pbnFW9^ZR1maoBdeAfas(=
zAKFGd`9!`)BJ26bZZf;|uZMa&s}{?&@7a}cr=Jpbc0MIgGm7|ZU@Ga})QsAo?SNOM
zVc&WD>IK@z&tJpY)XL>uAJxyAJZ`Knk}c=3bj$JnKp55^+RwtW-_Jho`!+0m<|vDl
z&TH!K$jP2B><*7DiTX};Oir(<q()l|pI0Oa){$kF-Y*2ixTuEugV3&p@}y!u-1E-O
zUO{8Vxn`@ba-EkJA1U_r#;_st#s9*4o1dJtC?fxT)u(;`l&~u<7Q3UWc;y?X>#(0#
z9#Plop&!UH45NHJ`k9uU-e#XR$L==4_#$xvhGr5E5YdPI?F2}!GRpfqwv}7_j0#VZ
zjYf*DF@|Wk{!9j+p--`hO%iyS3<_Pwb<>+sWlufSk-3!_OIeUZX{OVjkA7+^N1ow*
z2QFPKWg^+_gF4$^9Q^Cs)R@k#N;|fHv2|T<ewc5et==hKDtO)P>Z)%rgFxGr7nf$n
za=Ps|Uj3~8?+h!N<#7%r6sEcJh+RU*aUOFG3zk@e<_NcUulJ0DH=@n?(g2<QI=*G&
zCvkmFn_HeVPMd`N4jAD#7^J_yU4O2@#+#=1>M5lj5wp~_k}lj-tC&vOGu0>AEiD$c
zpD~{p(EX#pFf<TQ$eZ%z*FsP9nd38wFEvp(C{R*KAT5a>$a`0#^fBz16B&yw=vZun
zkTJ`P6$>!`xgLPz&(~Jk#!RZ1^iW^#_S!gRo_Ub~`^!JWLhfze+%R?Y)#Rbq1d40|
zV#MlPxGm2&;0*?<O<YfLx-Me;#kl55Pu53M%OqH-5t=RJaoyyFT~NU2Z}*Ot(}pi;
z`JmhJCx2|9)RN_=f0d!j%6hXc|Jb7?z?XTR{tf3JigP{@yQW`7NkiJ)+8Y(Um@ZQ&
zdGpV1Vz^Pn+J_OzS)G4BjXAcQ)XGX`w4(N$$0d<FRrMh<X_7$KgZHiF`sp!-JX;4t
zYKs}*ZV+T#UcB=$7U2dDX>a$?RqewVyjRAP?$%Me@o%S#8FCPgf;dzyqgOoE!#AKZ
zD439hC#o^L2cVyw{Ool8!Dd!eYk92=A=enzEXFNfC0b{QoQrLnE~gi^7<hUdOduon
zHbyJrcRtwYZ!9cD-&%`q$r3_hEU5~!js2UV&c7~N{JA}Xm1rQG5eM{YI8gDuZyvX5
z-5sMKGX7PT01mm%%XR^+0mb<;HtHZK?haO{v)_V!)}WR3a4t%my|!Tm$T%ShjOv68
zb5Txo57vmwV0at>L)h!x!8e?NC#PHDKy$&?*x=$C(FVV`44;dHjP<L$U--QDMLB{F
z(3Ab3D%6wYM_jy>-+6ji@4_7r5e=(VFVu6vnEIt*?ni!!%t{Y>g3La*PRaI2N9JZn
z{)2Lbsq6ju)ON3B&MkI^_odl`WsG89R(WyQ!j1xBmnp7q!X4S6($T9E&085BOFOMQ
zQ`h7feoxIOyk`C{&H8Hcn8Bh$l^WjBOP;?^Y5WOZBqA}kn52aJoMMALb11;%HI#?9
zx)SP2soV;J3|s?Eilgk=B6?7P_(OOS89aF9Su%hxkq&V*95;I`qEVI&NJ4*-7){L5
zj}jMidB#%6NdHDy6*rX|=@L6C!?Fi8>Jlc=E;!2@TkxyosiNn2r^L$a$k&so7)8PH
zqwc+w7)9jxFzMd!lDxHe)NHf7)YAPE>6trPPhWl;JL=>ozg<Z5v8&iU9K`Zak4F#O
zUXcs=1V7@0KS31ESz!Z``PcTeh0NkywDy;>bCw~-Qyv=s%|Q%z6e!S7&_n3t)D<%6
z7E3-dm98TXIIr_zzKc0`o6k#pFg^b}2Y*iB!`vhu8S8$PLn1JA9YCOPYpJTLey0={
z?=u5{jP`l5=%G%?h=nmCKaCLxykq27^|1eT5yQK8IlYR8uchstUjhaV-`4jjE|xxQ
z;bnF8ylQw_u-sgxBVN@|poQ738M?IQZ!2gwTS%4rrs8v~G0+vc%oVu44x=8wtReoT
z-;w78<P%<o?+PHW0j%S*_@?~J2VMP;N~gU@J1Xs9v4=V$YE3*1?5)QK_U23~T>@O#
z<1^U3;`~fP(w&LmEp%XPSf^1Y4QNP$GESaOI3$vPhKbLcyhE3Kk3Us9T>~1|SA?g4
zSq2APF40Q0uSHvKC^IK{gW<w@if}TdKau1-AO7u$me{|&w-9TI6>xJ|@c7%PuOtZx
zQDf8*LhkCgondQE#*w=xf8-ynvM&&~-sus4qSj`<GcFWUTy`a&$M2njoSd^MyQhFA
z2bQn~IeWLzN3+M0y`I)Dq&&TZP8k_hQsdkYSr0~Fi6(E^8I)VsOgZLngds}8#s8`{
zlC2mS?&aLPbP1RO5vHiIaz1D|v-C#{dr3&yj+|!`iCfoB#x=geMyIZ4iVzH)^ZKuI
z7e{VLx<hZf`=C|q4yD58<ltYH|GCRy1!eWtmIbjW=RE8u|Ka1Y?{~XSb5<az^rH&R
zBK!w%)sGta$o_qU*b&22^Hk%LJ8a0)^6}_X2vk_9h3rY|xDH;_X!8m~h;6-*qKgkj
z<>9;wyj0yVmQbXzVnt@Aj%2e&r)ZG~)P^;YaqwB$RRQG)bXro&1xma|me)LJL@_m)
zhc!Jd1tK1>jEG05h-f1lSAfj0{JT-tByUeE^}Uu>g{z_w+JudARJG6;zVp(Bae@-D
zMlL%7egW<vmw1{ppzETsYbbhD#IW^&Zy=u4up%+dr+1n<+cvpYgjpePXk$n>$m1c`
z|D@B<eSZ;O-?F*i9-1Qx1F#3=Q}C%{X!X1*gji3m7-Q}v?bdmJ`1fAX=I3scK>y)H
zsY%(`<H?Yg8naeqKU(_0$4M3urt4$k`krswulIiN3VS7bo8_70X(I-n!gYw5^k^E@
zWW-G-87zJdza}|gt{Jh;x+2g&Dq%udEqlHe&y)-qx>Jz^@7}#J9jq=dz+ssfCjt8l
zG1*?uCBu7>qqY@mF0cxQ*{t5teqY;+-gK3Kb9uN>jozRxIg;$M1A>X*6W{bo$M{$`
zv<_McbrgGlArHqt9>+ek(l{8F1Exff6Tz%T(3^xG6g9u#T5s2YsO!LBkoY+23^&gF
z#49#1l<kRek<l%>(Mnv0KmNscuXk2dW^2tG&&&A^!Iy_YJZ;HH@wALamef=qXQ5!f
z!bI|F$rF(V9{L6GH8FSYn%vcD(;K6NbybSkfnrD9rr22Ya9q>tdu4SeeDRK0fd_~^
znF(L2`#EF>bPk~NEt=$0j1>^e`!glOaFOHU+5QnorF;5?hX!)!?+Ww-XIZd(U-h$+
zjA{9EDA(cGvYCL{W2fIAV7`6LFxcISBK!<B^XS1GvKw*K<LiP@-(FM;dDnL6yxIhR
zjB^#5Xb5B~z#p47$lfQ4+H%gUeA>2on(<#oHwk=baK~*%7nO0|(u<L&xF2{;De3>-
cF8dtPnVh74i&!67_yW$#?2few+{i2Wf9X@U0ssI2

literal 0
HcmV?d00001

diff --git a/kramv/Assets.xcassets/AppIcon.appiconset/Icon-256.png b/kramv/Assets.xcassets/AppIcon.appiconset/Icon-256.png
index 5b502cba86c7b5fa84e267adf8f9da1504bcb252..569e8c5e59a5ecad4a83c5564edc517ba1e388ce 100644
GIT binary patch
literal 37503
zcmcdxWm6nXvt8VSdvFQv?iQR7+}+(>77q~I-2w!c;JR3FcMI;exGk{vdH=-yFx5R(
z^P#J{>&%>UCg%G$ISf=1Q~&^gp&&1#4gkRXm%;#$5&sK@?iJSm1r%3#Jr4i?4flTw
z29T3S^uHq>>T*(my6MlS|83xHBvm8<fW}0$H!}nPAO%lBMpDxk=CTK+p=`v9-20Wy
zC7Uc9MglfGs2zq<!W0<`UJ@&%!Z*flEm>hh|8u%Um!sZcl_uU;@`1c!MHTH~I}SUS
z^jAeicZQf*sd*gMTs+Pl)Mz3sY653U0(Xu+)5q+#?Xm?EpTaATS135(X=)R)Mfys<
zGxeVN0Rf9a*C3w!bMW*Wp(x6xu((-$|DWk95eHA`QlGBFjYB-=K>|kbd!(yT7%1la
zImRk*_qnD{*YM|v0OQ1luk&;MF246F*!smytq}XcowOsb%HXj@Bm?v}V4gFR9b)*X
zv&SxCuA(B2^iW1nf13#r4rPkd1YH04#ri3Q^KXl%As34VO(FD_9r_1&O$fXpWG&p!
zsu#bT<UU$9aA^VED}iLdKqzEEiQNB$%7lEywXpt|Nbn)|&1$;^&eNM<!VB3g;luum
zuks(3mHxw7);iMT&2J*dXV*@t;NxRDaw~(Ee2KCPiZh8IhKZm|FIR(S!7J$(@)v$&
zSIljcZ5Z%@>hSh&Sy#q(JX@c|Yf%@NMLQ*HO+@RY{gK36=^vl`!UC`Rkxt)U?$T{A
zDa6qChPh3Vflk)e!+SyrnB*e1nHd>z-JtI73$W<ZrpIGnC|uR_>8ag>m0I1#We%Z9
zVB^f8E5*vcrh~f5<Mu5a%e7mg?_2xlruvJ5C;3+Ijh9j9X)8!AwKZ}gxvn(pR&;eY
zN@{o9&J*B1uYJ<%4hSE?Hm>Vmd>vha1ePQkUAyF%pW6#zy8$YI(hvF5#O;b=?@mnM
z$IEe{%MM92e8lT=ICSHI`_gEIpOkwA_aUEky?0BrXC&zP-$8B8uveot5!}jB?<!B-
z2y<#s52~jy+G4>v!N%Ck52PjviPK)}kzWzPpnV`|{9~WbV$0We^<ACsE$A`lUSI&=
zOYI|{miNQQqlx;Z%!^pQS7P3-LwuCz!XU513<<CYIQ%HQ?UwNG^zY%qw%^(HOgB6g
zZtm4~tIbRSnay;b;LM7B#K2P;$8!NTpbU(<z|Ga;roWaa8qn&t9}hj)Vy^2wze41#
z_Ynncab8|dw$s(9gbYfO&G~dCtQu@A2FO12nyn9llVNwH3QR87{U!G-SNRrEvdrAF
zC+<z$LF|I3H%np<#zFym9WKH9q!e$7T|VF|@Mg!tfVo6vV(|U1p!c^YS;yTJLcrv+
z`odtE4={cM{3gD=VDAF#TOWqQ7`e!w9w4u@0-V!rNy_N>h*dZ*dk?17+HUR<qYU0#
zBms1*O$HZzibI5inAc&62y3D|);>7|6rOKfeG$rEC=5EG0Yf~FrS3~0rcT%r?P*34
zO6mZaH&SbVc;F_IeV@&?$5bLX=%1AjRP)!|tDD^ViV~bwZuX|vC)86P_&s^V7bbM`
zpRb6x#1s?WC;o$w+pbDm1L8g|-pkD6PO%^|*rIbFSsk9(ZGciIUnzw7oc%`INtp46
zq|`It<)gQcW-aKVZKZe1s0t$H<wSn}an|B&(~Bj82n8*IYu}a!<{P(Pr#(Qoum*_H
z*{+lVt>-DY@V^KEDN?eZ%ymZ^VrJGq62hUcp|TdQg+vR%CM-sN#@xmVJ5~$4H09O!
zC3OL<EG}Bv>R3xXa^eO(+Gfa1#PtQqx4g(9_`uJGA%B$FG9mgS65*yq>Z1?vd5-nS
zTm1s$ar{PNXvikHtdQb!3K^QfP4Lslzn4gG@HH-G&<?_H&sP>?-vinNv-vVu8mH6t
zA25HfjRvW<xj&}b-uB|VT1}v4K9`}dn7QJ{BM#Z~_vEz5h`29BW~Fa$XERoWA=%m5
zyuWmSCTq_6fAbkw0mN`b31&*=2DO;yZkST0o)D&Ob5<B7-kDnOyh5Y~Q&Y5}0yYM5
z7YNA9`$HIXduaoFLHGinM%kVgA@P$9#xxZQNAWBHM?x{ueEFUUy1aDpC=Z4lcNl(j
zw_f?Vkx#4)xgqAJgr8Rj)J>~1pv2Bp4@6IGxw>$KDuuPCe_$*)zi+`P=N=T7xof6R
zENhX8lWNtX3O59OJ|1PK*WYeqIB#p&ZYF43);`A%%A85Idwaej<9~ZLdJ25jnJx%?
zG!ccq{rJ{V9W<?xgkotx+}eyriR-<2op19&BbUGf?mRXT72s`o8Vrv*ILp}$av`*H
zrkQuV_j-??CtP`YID9KFZf=TlA9diU!OZWieaq@5Xz_+~!aroU#4D=tVV#Awas{pm
z8SK}l3Gwd))NJ*(+;l9rRo1WcE}pV|jPIMg27o$%(2s&ROvi`zz6#b5HScBQ(>2$*
zl_SizC6lg;EuVj{XytQnl~q7}zF%i>h(`^vYRwUK%NFYWudi@-G_^&l;f(5CczhVb
zxxB&E?-lC3%|t2w)Rm24r_M67lk}H1_R0s!2fX0MIDQ-}l^|g89NI$mKZG^wjW*u*
z%E!d|pSnrz;&Dlf+28|+Q@q5AK-*c5rkzsok9M2WS8s>M!KdzXXvhBKO?~k7%Q0}j
zR70C#k&rWjl@Xbw8G$M*Y&r3;YvCvq4oM#zur258|9;peZcS2Kh(pH+6#1Y@Ir93r
z*3RzGi(AQ_ZmcOAXkLm_H?HSjaah;4Ako=QMaj!nc&bgXQ`Fcv`6v~tw6`{3`%>@+
z$K|GnzIP)-s&bv)ARLbPN1qF*o<yTr$8+O|Z;UIHD!yVM=@T_hdMqV@#uZu@#Z94a
zybRPyE%PJ*j~0XbA@Rxh<#D90cNf$}oKf{_J8ErDtWO@H&tGL1#)~NGzq^R{^Bq4p
zaOB6w`qhMs|DxHgf7Q)P4SpU;3^T-6N?2yk0`}mza@YvDjTTouQletkIjUwhDn(5m
zw_yxElgDjWQ68u(CVfk$+<b$eW}Qr{?fe^irdn)xfAesMo@1JNT~qvz^1Ls7H6$HU
zi~xsnPoib#Olm-+upetmGuB!KPHuJ@{xOj3eQ7DWN&XT%)mg61Fxor(Vb~UMJe5e^
zeZ2kV<x_e3K?zTSg($;=YQ~`+%B2p&A!WBB;CpSlfD*F20e+8%x^?#4`50by!R-=1
znBlrH%2Jh$adG?_=9c7HPn$f<mQTf^9QVQ-vg*idcUFLor<TUW5pdCYRC&7<)|%6q
zbAztR#%_z5jPdrr!2AaP35kwn+m&lIp@`Ndp3?mzZdkuKq`5gmHsv%1(x<%Uox0aV
zK0M>6FXm_ms=Jb;Mtat9Z)alsr0oo__n&cro&euFfJf%API40%1>9yo3daPIS>mx;
zHPHd1lBE>L!hW}wcXGiOP0*UPf)7FZ^&!*huzRRHcj%sRZym2JTM>#g7WM?P2esk+
zgM5h=yi&=M_4l47k77KB4g38V&OgvsQO)kIIu(4)6%i$DU{vW(e4Zv&rpc*Ymr`cs
zCswWen$AZ;Elmp;O=Q2SQ0nOF-uhq(1KW-*-T2NQ$N5UvcTeKa*muIvt5D3&iP6qm
z$$s^$8YEfK#a^o@m^n_%N3)VoS<o#A)MVt1TRc;l)y!E3*XTfQ-_)>|Jv%M!#h89E
zse#FNv>FSZiP9F$iB;Nu9z5;TsDLox@W-Unn1-cbNVg{6u}?WvAw@bVXSYLCb1rXq
zRgu(kEsoGn<zCgbo&dhIrJ5C_M#j2vp}Dr_|LUzbt41ea%N%6dUt|=;rEQ7#ji^dn
zR!ibAU@!lw{t|C>`*m(6Yx@P-3kABz#yfYO(tGx*dYX;9FZK_q5CR-=Zit|b79;wY
zBG1Vtw$0_i2`n?9*^@fOTlJD<&eTc~+-(yB7v6By=ebrq@{%iC8&WQ~-Ue(VY@xZu
zFSLv*8XVHL-^GwDBJa0)8+;va>fWXcND+s!DG<{P_0#TxT#BHzwm5B?!WV!P&3@@|
zWqE{OrT!H;%QZK}^*`t{VKY!wW%NzMeZQ>ed`Y{vpl{I*20CHu(z$Ln>_M+-#Jxv@
zZ|=lz#P{odc^QITB9lW^@Bu81*q1(ofzfh(7nqZ(r%m6}qm{}TTQX|#ajFL?sc6Kj
z)K;3clfk?SSSQ~44<fnO{t+d;j*gNn+n5&5t>8pBa?%2Y!K~nkcRmZ!%f4Q|<y54d
zv(t^b0IO}31bk|<WZs-hhR<<Tpl=eNP?0lWbo}(Vb?QL=qQJ}+3BBO{(8_+T+JY~3
zT8rs(1R5c-4v)uIK`Sm<Gvc>Cl<=JfUy+H{WlRextf9MuKMa8Tav%GcQ_7bgm;OTz
z6*w#FzBSW={MB5e0st^N_q6p2)9I8usfzW%W%!E}ndzf-zi0KsZ}KPw_^O}8^2h^d
zVa0M8tb7Zl<%FmhYovdwcRX9*^IA0OtE={;%${j$OYp0A6_QTp_fA3Y&lb=V3-;b;
z^F*nFzH-!1l_r31J0^@c2h_xO`-e+T_WDDyZ`*JhO{Dx~mz8e1u5uX$PmxzM!6NR)
zIxSI{MeD23uZa*r$eav+e3ZpJ<A_c??|B_avZbVV$c+rUo=!E;yVwm@1{vESU=i%q
zExZR8aYYV4@G9Ab6+V$B2ZM-`?-u=ZVZAt8D^OU_b0O{Gzo07q^?n8vG>E}p`jx)+
zU)~EG4L)E{hQBrol*rgEm+;xHd%xeL^DVZgEhy#fdm+BGT;#Tn#}v|53fS6SS^1ah
zd7TbwR*w7p#S`Kc(@gzi!o}AkfWECyecJ^u<8*GkwtW1<-{q>dZu-~$8HfqlqSDOY
zKH4q;E{2@yI&i&K4{BubOKz<lKeE^%SOQC;X+SE3!?q)!Gv6?k^{ncAqsU<i!e?z`
zUp@(ArBELUSMi!!ucegXt0SENd%TJ4(+Y`Q`S&p9aU6W#OCuNj1fRh)STi6}X>U;*
zs&)*6ZZNHS__cM>V@7nP(+vtIe$22h<RHk)`_ahW9a~v=-4d0Z6Qh-lz1ulO(RTlD
zd|dEO8P_)WhwT~wxI%A2J|g;?QAhh|6P&V8jv-F->28ly&B7WauJfz5aI=tWpf;kM
za?*YEr+VQ)WD>wKEpT?`@af2B%mhQ%1z_zJVE+TxQRNtR>SYsnZzOIZV^V>&N^4Fh
z+x**BLxr+sT%$OzREM-)&)V=NaF}taS*!Ke_s`{c1aB1`SFgFionx}gHqKwXY<c#R
z^zE>uIjD{vcvDF41Ox;*U}$dPn{e%NEAW8bKQA`1D!MHp`)r!mEKNIx`bhrSZd2E`
z_j5<<o?t49GB!#wxffPYLvjP{e&@Ec&m6bd7VlwJ?^2GNvN-<4ePopOGp=GJ3L6qO
z2Fk0BR4Qsh%KB6+Xz%6Om-k=m4)>alH6@kiU55h=3+WeR{aQT}QFzI=uFw0V+1b@K
zgJChTU>W@oT*m%l_%ufiNe%6rAg?UFY0({@KO9y1)i3X-kkY$r>e)BQ$17OG_x1jC
zxwdU<J<bdBx1=1h$~tAVnE2Pi-@9RywfvwLFu3dN!RI|{aO6rR1%E0vn>iEver#PS
z<2aM#9X-(A_@ioio3%c(4w+&ca*Dy%Ci**L7*QrZ_Q|J9X=|}CjL-3#aP9UazTCh}
zoKCAWPKlVREeE#AX@S;d`VmLq!*#E9D3S)RdkbCec=~e+Y8psYh~3qBGhpz^);AP6
zm2AZwq9C2&_evV`h|Os6<A9a%R(+#n=fIyfzO;8;S-R5kCZ6_k>?37}@bqy;8Qcpj
zn|sdZ{?gm|+?W_NnG3}ff8)sD{T&~Mkt(3gas(6ewC+%^610sOd_ee_=FRc88cix4
zgC$0ui%*T-^U!^a>#9hDMMUrErot4*4=P24a3AOyD5R|Pcs^}KuXV{0zQL0;+dd=P
zMQ%l5^)5KNs%_<Av}B{zqrIQG{Oo)xMa2M!!_|ee%_K}HF}e+?a^MA#WG*2n(Hf-<
zRBJQ|RQj{0BR#b9O0~;Nrf$sqT%3Hbr+6R548H!4rIQ`wc=G#enl7e>YU4B1yw4Pf
zZgjst7Iuuibc!3~tk0w0D_>I37U%!<isk>y&D8j#hH}w1Y#6PBO^rXM(aInvv+B1Z
zF6}~dNYM@98>g}{7O%p}5cDxsuM)!0IpT8II@TK#P^LXUY5LbMltv8+O6+Frq&V4@
z`O!EBZajr~590P`z2VQOfJ<1vBqL`}5ES{89qQ44#=q;6*-pDAn<DkC)1LUZW;>N+
z<WC=Zp+@{1-amyUpqKWFZAuMS$V~534|Gp<<V__m+DupEcCM<Mhl$7wt3lg+E$fKX
zCo&Yg-MjaI1ZDgR?NV&`lN0Bd&0;a%Bc_FGH_)S*+0YY6n{(Ug&s%$#Q;8@0Du!ax
zP<Vx)WAT^sqZ_nG44T7v=av#)e$wa}u`V8(NsnWN<VMfw((Sa@b&4st6%l0}1l0B^
z6!==oZ0HeL|5V25!l5`FGrbz|{|cc*-l6YT(bE$IyRPdzdG$2~{zw>1+g_X3|D{zN
zBm{7f5&vo;L($|t<GT=6v9;HAd)5V9dT&mGO|-h0h<6x9A-yd*flB|UQ}n)`J?3uR
zt8TeIcwvfElBj1n@9JAG`l~4O(!21zB>iE|W~mn~M2XxsauGYXr1>^kNfKEcAH*Ml
zWI~_@w)QAjFJDi{@lk&SuABw<x1GosJ=gSp+DE|OvPlVvy%4z$s$LpV83os?x|ELx
z6@HFvdDh=x3bMGCF8wwu#W4#1>)uIyD&v-a%%?eQc3U(JwYlW!%j_jt1X|SmB*h|O
zaHiwLS_dKYjN<(xF>ttJB;UZfqwE-75=i3?$2Wu!y<X_tz8iHkKd*j1?r`<-yWM%Q
z=`g)Ykas8lZ=_*iEd);ic4P0KCWpA1;00Z_$*CJ@$?N;apD*Nm4`m&CNM3QJllJql
zYOSm*xxcA0Ukx)^#p9bjpN%5Z>7r*GxYWKqQoZY|oRlHpTxYoHJOv?Pi@2gXpbnN1
z2_{{dpAThd=S&5qIL*`%pg1n-A>@2JThCy{jt|nPlmjt~lbRm4I(+O}Wg0EJ5AGLy
z*>9kXCmj|;AC-bp@@HjZ;{&(v+c-P&+CVcvU&a-nrzKl*Yoi%56Z}7dMt00|+>&k(
zJwUrn|E&Mjqv&hd6G}^&4mrrcHQQEt@hC(dS6vcsVvDv(>{&k?dqTn_Y8mOXRzKPP
zwB{kv{kF`HOlit#X>Tj0k02eaI<Ia0vdVGKf71IleH#>B#H9QFIRO-SNgC`6*3vOr
z3Vd9%$t}@KAF+$zcq9U-KK6GB<$v?oPWO4MkX5y2I91Jf(gD_soc}ITS&khH$#`_=
zAw06JZ<8N2u!THa_}!{3%C*kcl$9U)q!CC&jyhgt)<s5z&qJ?%d!L27@>49eSP;F{
z1O;&*)N29TTZ0S=d%>X#_ffg30lPm1B!}tHI&*&4)~qZqDr=2NcnKErwl`X_L@{yZ
zzQy(mOgG;QlR}5Z;S(okW_}+~l2~@eBM;ldNI*Qk<bk(O9{i^s?-M}yVD%SH$6E{q
zBOOqy$fwgj*nd0>?lWOjt+lq{#Ui;tteZ%XvCH3F2uA9UD$TA%?&2%`5-Pi&%l9p}
z%4@xy7Ag&V59u4Wo_-dH%Njv)|AZ$TtR=|w-m3u2Pygntg7#WXdVON9fX8Y%`Iqws
zTg@^n#!WKJ6UW_X8R0$pX-g~B5GO^A#u~}FpN!KAa|9-;nylA_7K&QySvgxDZT&RN
z%*<lOQ4YyKcLUix79MXxO18*|Z@l2(W2pNNv>n5)&N8bssKLz)Za?A?O6EQZ+Hx$W
z29}V{s0*i_X&S727%TNm!R%%S=B?Y%Myxj-E2SKbV)5ViMeSHE^eX-0;OghpAxm`^
zMfz0uY&U~1mK8@i*k4s1x-<)_D|)VVc0`|Q)_qvp(Es_q>vcX|>E!=S=RGlS$^T{4
zCA4_X1_2n)){&UB1hKGC3`er*iLgg%-hQ!<)wnVlFzVKfACu2<G=$V4eH_BcC6fP@
zYTrUlGDwW%GVqneGGNh(*fH50g<A(TI*fckJWk_^@JcJ?)o`$1?>}i}(DVr2s%E3?
zb3bhbO+wjNS?4Dw^%q>vVFtfXT)mvGWheFcU~@PMCw%_*ZCmtDe)QzJSQip2a!Ox}
zfcYFHo>e-(JQ@Qr`qXwwPWTQevZhU)0%cdF(nwa!(xb{Ffi=RQt@|Yn7jQg2p3l*6
zKOI1Dn#@jFS+4SL+Rwh#`#Bkwmzvg&wwLQZ@^psjgTB9_5=v`3r@yokXmlP<yFQPs
z)K};)yJO+Kkq%yOvYDY_)4lQmk@!%$Iy%6{-l!7*rYf`ZfiX83$xKn-bjLJ8^E7Q9
zv<$xmr7Ej_kWb3??d_#-ldUAq9v)gjg?ljP-oL$dxe~RKefid$XYKGR2+OH?P@f4S
zYd7~NLv>Oy(Bwq-Ca_gZUNqH$>qb=O?4MS@GQ!<)a$J?`m;R7}SdP;2FhJ4ecJT4d
zJ;u2y#9Im60gMs5b4_*h_@Z}Q(Emx|JKqzd&bVU4X~So1`DarRyeB6!0MY!UYRT57
zmjSaUe!?c`^6{c8?~i16joZmAm5J9uS!@M4LYxO5<r^@JG5je{*r)Drcm!-won2aJ
znHAGSuuF}zCdClIkjfFql;&)N*Pe_fC$ExIP*5Hph0o8|+a0)UGvv3hoWzxbe}`+=
zO@FCDUtyc{)9<LbW;BvFEbKg4s)(gHLgS{!^L)M=la1~*tk7wC2DsC!qiq!&b-BUv
z&+Ayq6|Ukml1>)I@?0~3hV0KCWy)K&9rv-tfsn3i8bJy@Z|)#zh8tsP?pW9$t>KfI
z(bf^X1X_RJsBqLUvV{>A{Q0X-myxIG3f%S+GZ@9LF>;*bS?TYM{&D!Vr6?|3p5E6%
zSTTa}naN23#EWN;T7Sc(uAyP^UxeTI<vutWP;(_p4v=xIBkx=PXYC=v`X3K)obXF!
z{@oeR<cJ64@VB{X+X^eB!;g18M#5e9ts9BB1Pk$nE-I@5**{CSyJcpAgMJrkW8~Te
zf5cG;Ir5AjP#J7I*13%62eCo4<eJJnGY(_4i~kw91f7d_9*=#2KIjw#Ty@|Bed3(^
zR^b6}(7WTA-2eF+jR8+rkwKZ;?jF@GMi6~iE+J*E9&NaMnbd4*NHWIWZpPZ$h0Dag
zBmhXE?7wD}8ISQY*lbaS@i6;P4Pz!2w4h2@9;={lwbG7HG+z*@H>4d3+H;P0dG>yw
z>#vXvSl(oxxwjXQwVlst9a5LShcOJ8>b`rK5%eIs)ZbQ=YWsI4BU>-W-Yr9J^BnGK
zC+K5ZD}ky}P{2{HG2yYcEAtC)as1ydzjhtAk=Im^&o@!XF1x_r!^~0O+sz1Mzhtlx
z9%i52jQF6st`n8{oiG2fv(C~XM#fA6XT!iN;v2o(%->w*IHnQ(6*xBS-In)Or*jiR
z=S5S5Cd3Uv<2Eb7@wfuV$KYFLjpEoxVwr(#jwV$lxOI6Zfh;4$_aJu}6C^Ap$(vy~
zo6T*#P9oopOCzBm>xAk2!EqVx;BPSHX8e0Xu^%dPz&~`7%a5*?)#31A?ix-(taSNp
zIovB$pZ;h3@Z?eNb-;MX>lk6hIT2xf{EyFLlt6{OFKl@X7TRul=J`L{cCTRhD@b%)
z_~QbrcA}WbiamSQK)ij+@3cv4kxV!<ELI2Ke=RO}7x5kQ>j{6Rr@J|l+w1&xBVAkG
z({2=fw0*OTOE~q0{si*)yCev%{X97*!Nh4ue&8C>s*i87=aBwb8y3~V1s!*G=+C5J
z+1|}M)VC3Z=!LQYE_G)99HuPNgf&{i?h(d&OQP@7W@qQ-I`Akvh-WsH>0|dZe`>hq
zgd(ucz!0&q|HovsE@N8&lbDwv;{M*yq`2ZNag*<{iWM=+tgMWePYxbkA70u!0nx*1
zQ*Fw;H!U0n>V8vyZPr?A<(@Vla`@ln5U$C$JfI~3?64J78nbrV1E|BJ#HRkld43%v
zctM2iyl25TJvjf)-hQZm=OMi`Xmi=#0)X2E`g|Rs$QIe?c>Gcke31ZK0grWzalR*=
zq5L=GdCxd8++X9U5hFA&+U9%w4u}}%Fq4=>!|dDW*Vca;1>NI*E;Gf8GPIY+CdIj|
zpea>TV8OE%G`@+s8Cmh@FFn3l<9H#i_IN~Y`8{#2H(Er>Vr;b0AX)a;ORj>wf=H$E
zU<Bcm(i-)U$E31L-JpwYCE&eN3DAvl5UP=OZ++a4ELjiGehj^@4fNw(SIosx%GA*V
zCJ6u!<9akn>;W+n_p6-?FA0lAr;|%j7?g)+{wIkV<E8)G6uv1+%;KmpC#QrJNfxVO
z>f;Z_7tPl-O)4>iVnO`;`t>X7-W0g1uNVX78u)0&Tpe|bQI7qtwJXNla!*@#IjZMm
zgob`)Wo=tRV4c^-X#@$}oM0~Omv)JCMzRD53A;$#$P+Kdu)|+0laE?+jv&&nxl-Rd
zq1)~dRb#YBRAvA(QX<+g0h|O5Tne%G)#lo2%iXX;Wb7Aht1%gTwh(GVrS-U~;Oesn
zneIUSI~Zz5yQ4&`VQBWv;lm%w56;MUw&`^7ItzfkHwlVC2q^h`QY#PqLHKQ^<AeTu
z=`iHgr~4sI@xL;=Z!m)V7f6rtumFQdK7IAf5(k--N|$t!?30D4GkdTm{I5B+gDU>5
z0(dY|IO7=5tA8E~UXE(vnZqseqoW8RwcoTkE{7+t;u@IL6o%-`NYYId2^wkBqy4pU
zjov6(G!cv9gFb2}-^TFI?yBU1m{J{Sjq$0fa$i=@BqTkn9fQfN@aX@J@YE6ME%f^5
zvt|vAkH<I%O`IkpK%ycyoKNSpbfyKN6jI{IOHsJ6(U_jX6Z(E=gG%A@S21Nyf6FkC
z#-r26S@*R3)TGQ_xWf^o*~3XgC_6hImJLP*-a_2|0z^>%T^A-AxBm7qn&6>~QQT`6
z`=dIP9F#k0sxHmUf;YaKA#>8I&rU?~)K)c9PnAOA;Mz+I(f-JRE!5@`So!iD_k0gL
zFl48Qwq*Z{83of|#r(k@vx$53LRgmH7+AmucO|&$#u?J|?Od1&%Y*hU3lsw(AFo3W
z)tXmJiRAl@g0`44jozGYv&5`RrDz=uB0}k7(9ipR{4!yT+62@1iYW_tyxbtPt(x2V
zt9wwv2l*lU9iO`n+g=XQa`ox(06pg3JsE+`#J^GgcFle6LlP@S39)z_s!J`ea#b&5
zH{pAP!fxoof)B;V=|kl`oLH`WzGyu1K6)BGf9tVn^E%B3`^I};k_0ul==P2FJ&Dd1
zS3dfk4ev_qz+VBPkT|6P4UEW6G*JB%w(31C>^LjOa@Wk_i(bwiG?eKR?R%0)scO-n
zT2e7pEel=Ix_Qh0NP{~}UmTGffU=Rq8QqnCI}>^TVcTP8telH0`FG9%&A6=Pc%Zjk
z#@H9ZK^LXmce>{jabUTre$-brhed9y;orVr_v$EaV$;0`a}fWvTXv9=+>MBu&|lc}
z3BsqHsZftv_Y2v^nFJ>W1!U$Q0n=Nj9&MXoXyE?yZc-ec0!?3i$$r5FyM)R6XwT_{
zY%Z|G9hQ`bZhyUsJbZ!e|6OtLm3FsQu2JNQL}iA1r&xEshG3@iJ5dXu!pW=w;91;>
zH!8_Hp0+LWb9OO91x(9l=OM@eEi-$RyY{q-{sN1DY#loP7()zKSM;yJoBYT)BHxE#
zJ|{AC7lQBs3uBS|J*^)$|3xskRhqaz3_qeT-Rn=a_g%b4!fpaYrmz+^^OCCbefjIQ
zDo9Y#kx$!u?*WlHbdCX_a&I(&ph<JS>>iTGLwi$}PQ_TfUi!$!pqlkIo%8tD4xQt6
zEhMGVNxy33q6961`q3yrGzi_xgu+yjo$GB_6y(2%l~Ncn+(tq(yI!y+9J=<d_!Z-#
z{l7QpOzs27Nj%eZ*kv+C5J4BEeHXfZis^6}$+g?j3jKF<4r}UdgvI1xmu`$wWmUJ^
z_Rl@a+3${cJ=S&qKQy=_&BxNseiE>3`;UVv6%}l+DrZiT)}c{-f6A*h(64c&i@T28
zpLhDLTKKr^X{Zi7;8<P@sk^b>XmuLKw)_gPeH%}ed~Z$<C`SgA8>U^vzr1;J00&(f
zdx{OH640A5UG}VrMGH6QZ4s?0n!S8uXoxrV)|()d*JzR!?w^NE(Ogy8)?jzNO*c1R
zYQ$#9Rh#uQuEe4R-zJ*V|7$)P8ruCWv+j=7@B$KjvLwgjW|G+g*!*+m_0cxZ7z>!R
z&oq)~7NjSb!INWdlt$?CSML{R3ov-gM%G*^AF6>FhdfLTNP-{2@J6;aP{?gWRoKnp
z`lYOwpuU!dWufq|Nf8Di;uV7~`fZZy;Kf8S$@(7q&4;i%!td-Aeqab)vg1jDo&_=2
zoH3{nK-26M$Mf*EY&wh^SP>DOQoP}*9cF>3T%fSRm_8T?X?XK%U%4~zXI*$ClL2==
zp<k}{dMSS_?Mw@IdLCVTo?;Jqkp||d$T9Qnu&_SnL|AUY%R&)nHyQ1?cX-iuMz877
zGHF?^RpA)tH_-RA*odNwgba{rzFw@dgjCn}t_z@8Z~6-Vl%O8l{Ln!QUE~&x)<?#)
zvtB}xUjBZ-i_6u=784D`h;Rd5#BqbC9#2mvc*qgNAn(D=s6r@zsj^l|%W(-9DYL#G
z>k=iqzR}ZvQH)~Y=`wlojj8Dat_n0M_~T&s6*v3n{Md4Pa;Q5&Wo3KJqnnM@@NZ))
z@&k9aNhbz6Cu9i6-8CH1s7G_=uSQckib7Fjbx`BvaRc*z6|u(sF}IwT0(=(ol<z{J
z33F3MdK~!%cs5}f-bJ?7L2!eWG-HsVcE$?Nl_FY{HB=|-X;(v<8>Y{XgXN&}vqGS6
zzWER)Xupo2{*L<wO~<>q?*)1cgH`%qG>9$tj#I8FiQ}zJf0ht8o9HuBikyT;PeVs+
zB?q(cl-f_D1UnyC<BmDT!z`Jz8fVQ7^spk5L#<vf2y?!fwFkF~Nctbil~eA)dZA9h
z?W>biQ`r+;-MJ|=NmBp*;YbdCfSTQYt~nMBczi}0PX8q0{yQs^)B7XMrTu4TxUwgD
zU&fIT-9d~Ri{hvv?4rF(Q37<|MgAtjyQ}#vm?Ib~XP9$ZnM+bM{x8)R+r#v91aaqh
zVrR<l@YjJHs-?)<*~UA|N2^j2f!)8a?bY>qPxvM5B7LYIP4lrjcRO{AnxVSxZA5*~
z8)QDaWLWT{pw%kZZ0@>GTP-KYNY`5^7@#1(g_bVnlWA>F9IZ`u^E7oQ7EWT<$Vdac
z;b?poz-J4$d^h+8IeQq_l!5+mB#dR))t&(UXu&2A0{3+qEM+4HLE`x^9aPf{Z2Fa%
zBi9WZzd1UXyk7v7G3j)+K!r^`&;mzTaQsdHf37*Eq=^8vB7F9|Sh&nOlS)+D9EsxY
zQml}#kb5`bxxMgL;D!Og>H7Qh!-?t(5!%NrQsoU5@-@;=0MK=;)m*|Gx|FfyYZ_|o
zYq#Y91daUiDC~3%f5o7{Rm);gdCw}+ZtP^yI?*FW{3E3G1>2Y52sREuHXrqZH6&ly
z$6sAA;L(xNT3RsHsVp{Tg7f#u?<t)0d*8~m3`UQ5-+Do{9~gsLrYVxrTm~2%iZD}{
zh6=+s{o<ep-C0Eqjg9%EtWZW`{^CFqRt{xpY*jineVqMiJ;*`L<mh#>x}d35q+iHn
zFJ$xiqXjbfIhcbd>oYT>q=w}Fo&7j^d!pE%^d|gWR+p;<6@IqpQp)pvb}U$##wVta
zem0GSy)XJloWR6!`H7l`2(5Gv6w6>a-H8+pV7~^OH)<yXQ42!SCxzM1);fHo{zSBp
zKjkH%VGURsCJU^&OkX&y?f)uu^;m5uMA_h!)Uv?68N($gobga+!9$QF&Lb+Ip5(zP
z8_61!Of&ZxaK75dmhvf$HI;w#=dY@kywKj<@&mkmPA+}&KWDA|*rE8C702RyrH5-2
z7#6iz_9rx})`Roen)4`sg3TA(h=X87CF3Alye%Ihmjd!jtpq>={~jQWSU~oDkWR_N
zeRut&FjbTJrSBwioLfk;`n`ZPoc^S7=UG*??Mj`I0+wY-+@Wq^#m%X5vo=@2OTx3^
z3)R4b^WsP8*_u6I_(unNJOi=BFreJVI%cI|e+UaDJCd=poDXH#5rwu;;u)f?QzBz4
z!gXOjs-nPR)CLUQ@|(FSp7)<)zsH=YnwgraB!amTtUWRctF^`GKJR5yX*mvLLGg9!
zTt%u0>Fvu`YqwseH`ByFQ_8~R%FjV)M_uoPk{M5ScpRUc%>6<<cSwL6ruMh7w!Zk!
z8c%td=ZSx}I&<kE(-^Fg#Vza95`k%Ajf>YI|HlRR9I{hPb(ja=`-vWAVW)R-)Ei&W
z`sXW*Is=2GQn)*F%k0vXPCIlM?)cQHtx(TLpZztgm<3s6NRXC{QW%#2rY=)*Xey#o
zz^%gUNSdS>r5b_8h@_~NPFwyaD+LBi*(^12LJ)5tNyS;ux79EH7aS1TN{Effj~qb2
z)~8r*%M$^J7F76J97v0juE0`d7;0%}rC{1blvAeMxysZ>&lydwpB(x`2B^FL)|*EE
zaZOt^Uo}HAj6TGkNkKriCXD7{*Ye7?>a+X;LG$DUgpTxo%ckkOf!q3FR^1LyMBY7v
zfhl&2&}{PqVqGhs7}-JG5&vp%;9%vtU6GiC(3K}K)Y2Fjk`C`G>Gc+$S0a~S=V(1j
z0Dwa(b>!l|GdZ>SgIPVH1s((5{3zx;s8_<!4GO*s$^I->Z+S~}qR`Kc(0lqW;i{b*
zEyyrK5{dezNL2~vTdB-ia3{DWEWF{S?7CNyT^RZg_Wmm(iW`{#@(X4zzg2q#b3Hol
zkAaAa-kH9O+lXS_;pLq+dzZD+L%6GbmmzNHBYc%MSD5kqFgX~D>`&+lbW<6NI&6dL
z3329orDLV+Rh~U@tcnwW(?9Vr&VP>}FqEEHiRcl#pOf7UNd-jLxLe|eHDc#&sEFCl
z?sUs#0oD`?N#F2B&tlvXmi)m1BMR$14xU}qoUZOzu%Yw!$ADdbZ#NSD?$dY~{(l@`
zBH$b*yNARqlUeoNmS!;8$JDZ2W!9b-PQS#H%TQu{`zE6=9!CNtWw3HHz;P6=A;xtO
z^x9StGXEXLj#U+(8dLtM7wDk>+#tKi>7G}MCNU~jq{AU@0Oyd~#b>4Py0tNR|7TMF
zd)>6b-ySVF$Yb4Xz?N5FjmNVf7P6r{RiAU8HtMIX2R{DS#^dgp?jZuFjQj2sru=0X
zmuvg;cSK?z&Ml#PHo%vqc(`G6m?5%I4%DKfZPFX?4(iYir5xWF48LKRrfx#om$waj
zqg=qnxgrL8s@ZZ_jfPBt?-_{fS()ie!g*AkJo?Kag*5-Rfa=q~0QbS8YdFHbn8nvV
zV%N5GmQWwi#Jr}%^b8*D#<h0HkNNPDoPFQ^@ww0S_g`zOTw(f!8vusgqnSh_14R}}
z2#5y{fM=1nrmAzs;)YU3;`b)AP0&)#^2{ugmorv~EA|N-_Jl)-nGgA%S!r*){#Pcb
zRCvXX2^nWf`*7HwqerK<Jbb}-QY0^Z3O3m5HZ2yIICMj=CG*wVwfv{OXo<hCNO*`R
zI}I7EL=j9Bh-l%|$i52gfIrDe#wa*<?!$WhFCy^NO}|l7#o5d7JoX^@4UJZn*i%RP
zFx?*^lV|%p+YfAV1hOy;rNucc0IwpRRpi^wdsU6m#bt#}*95`0+1G0?el!6qUHc$p
z0P=jTDT>JJs-OQ^rAIJbv9U}43@$P=otma2U+zs>YVg2`R=Ec&izBCH5?K0Lka-<x
zkv&@kKs+Y9-*cZ(o`4)Ki$XkWiQpVameyjz;LvgutEN%<*7}yQ7HOI`3F@~9<*?eF
zQIxeg%9I95hjNbbe@otzw_M|;ESpNlw{wrudxodCV4`bG+RobV_o4V5l<5+DPbL6c
zr|1K4qBtbDJP$p@S1Np6;J%++A^25NLgefGm7fyTEY+uJFQnFUtVX~(6#Em}F4@dl
z&{4=M-7aj=4Tlb8%XGf*mv&JfzRn!-arl_;>kMcsDYNEM;V;d~TGRA0E>V!}#Pg?#
z_7Mbb@sK<6H#a+~qv%+C?{fV2dP_wDL3{>;a^Up`u7P>~w<sa1zF%;p`7EBnb#lPx
zo7DSxpA3|rrPu0qQ?k0o)1zx)I3UfPLT;wiND0P?Ka=m2>QbR_XIq`EnFD`z9P22<
z#$qHkci~(8D`s_4hclZ$M>^&E`Vf-OrA$)BZ^r}3J5%~ay%E@=Utna>XROpnfK9rz
z^H0z1IaT8iQB`JF+W}9*2#{qs70{xd^dUT;Eswtu+x6D+hu90nx31mwB1e6V4MS4z
zxyAc${AWD)M|M2rd#};E+3T90sMFOL|FxUWTKTiP%V;{$_sI*3XW)k>p_~MxA93sx
zqA6G>B}_-YA>K_W8I_%T5&KO_mb$$rcW(-S&|qPPe(q=!50SKzF*E<{EsPejsA!t2
zSeW-3r2iVxpq_Bjr{eJqkVnO1#mth<hy11e(?a17rG83X1`oq(VGm(rU$<;>$ZZ}3
zG4!Qp`NzObYUpdhsgYwsh{cw>NX_SFp-V&tyA1moKRf^%pvIn5d$U+=KvzBV=A6W-
zafdpZxn^9Nb_Gxr>r6g0df0OUX|tvoP%5uqW{t7>906C--|a-NE;ucY9A!76NeWr_
z5!np|s7Qaq=UX?&>5HKqSXIul6LbO|li8d+=saw;p%D8SShW-g<?WELFg7ROpeX^K
z$-H^2dafT_?oY3El0cl7Tahgo-p}~i*SX88cmRF$pV8CNVD)*h{Ww!*vV#C15C76<
z^_EU{@-3vX40hzA?4;>rXV`v{O!`@~er+#Xw<b9`V*+E@e;@D&v68~Hd5%Z*{Q$w!
zbNynEkaj?$MsG}Aa}p)I5Z=uKI@B*4z~+9SUy#QMkvDWZo}!ru$*GD6f=|)T0nNX?
z=kF(-ZT?-NJA9GlVEW0$GJ`m(=_o&JhZCL~m^Y!oHY<!Nhz8JWDWKvQ60?7@IUtZQ
zY#TLn`->l#V_oSF97#i?JQ~&3FQxvJIJ#up9w`3lT>)>|@0Agm&zcxueT+g1u&rL=
zfxh;ab^u;t)oRGt37&Yrd4td9MP0<3%EE%qVRT`!U>)fix_Fs>e(D#Kc})bh9`GZF
z@)h3Z0OfR@r%>P{)%v3)M9pD!gVr#Wm@m%pfqoz2YUG^qEAhr~aPQ#=04x9V+P0{5
zSx1C88tmxy#!oCt&iH9K%7uO3dF{zAbz{DP2znDgjz@hkpg)u>tb?;YQr65eUjNF=
z3g)WBG5=O_JY5{+*GNM!1Oa2Vyug)8880xOmD8m|8_>tKez3uRD>$2_=rW}U3%<T-
z{JeU9@0?TcZ$H6C^e&*G^oPm*S0%IBA!B~FUG4c_P(wcA3O`mrKT^sE&5e+fP|%i5
zfgk<m`q?i47QhuIfkOOh#_9{xX-yev8k#l3_+pNP>>?w^&pCZ1)VD|du(dh1Az@h%
zm=bdRmjj^oIhBt@{BM8mHixQ9*ZxID+<iS^h{1ot0SvK6$M@%>HiFYcalhE3WB*`r
zz)-cwU6-x6^+$I5A3WwXk^K~=`>3I!swLu?mXRXom0|Rrt|FR`RXX1ZmrtlLfDp70
zHNbDWvIb=&$u7b%vD2010_lN)Me1x4hdno`KTCQc79|13;|{Ygwvu3`+(;u44uolB
zyXK(IyT_*ca{lSLC!D?m_S6VAPtW6+`pG#Pai0urAu|)W=NHr9toH_Zrfw!&FX~?;
z9mwH!&vXk*r3IG7s8VV8Upm-m6#}DbZ5T9kmoP=|XGbgjKEw9$<HRTY3@4#`4`WZ#
zkx`47&|UJnH3D1of99sPOd+3opp_d;7(ws5U3PaApTwmYS+-F7ULM!C>LNYvvzRj~
zqybU_`ybnNUai-1CtGXwX%lwA3XCB4`!~UZ?sd^0sjN1>-iY4KT)2VEb}<XsoFb`H
zc7?v({7q1CBr{!WAP)<@6|7y>Fk-5nSeHtICpSGO9>-$?Ft7ZFA6ziWNDVIAV%xqf
z4wmWKGssgm#DW>w`fLappWG?G_IUKwGoF#-ry0aqw0q<kBuvbu7Ae<IGSiO&y1c_*
zW}b_-pP2gQQ##Pj*B&wKdVtse!Tv?lPtJL9X{W-?rVS+MTOOK&`yvUY-IBb6@%O9-
z<QgB${a;~QO6QBQH4@%r^PGM{N)eyXOh3b#vcyPmnwnwv-*=#uMj8-HrXzlzzm&?|
zN1g*d+bU*csB3iMm4W!Ecf7#F!*;Jog0>}5$p5JsJ0TSN`(E`1<y}_-I_F>y=dk<C
zY&+V!YpmS7zwP*;En>i*x80wn3&9<x+X^|kn1iFWk8vw+&;Qx9bZdIY72S;pPWmmw
ziA7&Y_W`Un`1ciN2IFuoCEf&^-RJ?v?OJi*=rniT1s!v{lG(cCVVL0zWca4Ypr$`9
zDGg}*6|F3QBXJxy&1^;IfROopBD%D}z(YmxUlHA*r5-R6hH6E6TJ&$>rrWvOSvI$B
z-_;Y8M=ePJSD7&qt}ecpQULe?f!bU&1Y`#Ee)kQ(^=nEP*ZXv<*JjeAyX+v&RsKUB
zac=?C$r)d4bYli8r*+)QvXw!bTW5(88z;UdUk(Ma10VU7^aHon^F5BDc<Hk(*wx$0
zzG#>}Lo;T|8pm-*XD`P@nIjFnF!jojl*VpDG!tusY;)0W-=(vg6}3KDdnmeIoYox~
zEcurI#OHs37V>RJEst4^jniBH0Q8wuH^2$^@wQ7Df-sB-`slW468`l)_uba9oD4<3
zSlrEwKq`e4V4!GORp){Ph_<4E_%y%%!Rv1_Z>^kK*)a@8UbzdD@}-l!mm@SS^><=H
z3k!LnYDuER<%yALP?J|Itqm-_&iohJn^}R*_mBUKxK1>4%!LRcq`j06-XkuTY6u7U
zwhHPq{`D2?(=WyWjO!^~it@F)`H8R?+S&KkZ9gNcGOUN@dx!03%*OCqVevNfHhSR^
z{Bbd9_0cwG`L@UnN-&=AV6$Q#F;f9WCBhwaHkXmWe+~X((xiQfmZYAaj5_HD;4%N@
zU4s_(-94ZtDWe-zBH;itj0;lFQf*SoT`l{w-1KZ>6zV(t#(PpsMaJ}xZ{s%-g@6i#
z4O}`U)A#&Lsx;k>>q#k3?oIj?cABB+GfW>ps&-PD`0Ig!|5^4+#mn5IQE*m0!qJo9
zdsL}SL4$R6!5=g%HcUCqBCRcP32DI(8>B7FFxo80JOUAKoRuV2J0K5PM$<Hz%l$Aa
zwX&2TYxhrL6@dyqy{5n)&0n-~h?HEFYQ=WLqA^k7-)dNN@4`+&;LxPCkzW7HKYJDc
z<7HCtI;s?4yL9rCh?{xA$Myb~lO+r0E%zarDmxD7A{%JQf1NLZp%SB{N|s{iS^xW}
z1+E25JdC#bk~ss|!<=vgKs3yY%8_9vVF^1dQ<PWjRDI-U{K_om7`uJz{~(jD!Bs4q
zWQa~FK+iqHN3S>?MO}ZfP?YPU?`aJs2I(QG3a!Vj1$Ni)x4x}AW`?wg?JQQZy^sC=
zHuus|XTm~&o;W75kTnP{`8det7Rj_wa6q0`r(*2n(UWlCu7$Oh2-?i><kn_97VJZ4
zu#mXdm4#{G_2n@ba_r-%q>~>xp~#NwChGVVL7%Nc*AE`Yp}5{}k?Fvh-$i7uGl36z
zp&P8{-Q;1CXZy~9blPe#BMcOG{2sa)?w*<)kBBVO3=@V-Flvo}OciGuFY<Ir9&|I;
z^vR8AoM&rBt`t}`)h}k8k{bk;S31?P#bEvnC@;fH8Y3Ag_jo58Kn`s+^plaxuyIe%
zZtR|+Gyr@rZ2<HcxcRzUhM791hMLn;-Z)B3+ulu3AG5?<C(6Z5VSn#$&uD^LKH{la
z2U0OOW|Ih01KbpVCi-yuh_+F7PiH>c{Kwf8LEYbvj(c7tbl<+r_zli`wam1@I7#O6
zP2fn776jhrVLtbFW9~zQ4NHNE+|afWFFcau1@v@jmwES^g9IWYzEe_mqM_#klC3{t
z#SE-Oiem?(Y-C6u@}T<B)Efw_E4}H(xqPySI~srq+PdMa<Oi5fM46wUptGcnIX;w5
z>)4~Y&jZ;0yn#k>t}pU;bY3TH5@?WVbW<k9Y5!MjM4w^Q$GnN{Y{I1(Jw@}Se^%iS
z1i@$&$>D+ONjM4OdsSz3p}3+{Dbr?+;uS>8N72_+z1P<j!u{uu8k^wn@B}v@W8fCo
zQ)gNEs^M+!SAQ<qJCR`RZ;a+95aKu=aYtY_Vw{u43&R(3x0m+TQXukRh<VhdyqRN+
zgd>b5AL4(%eaK`aO&xl_2PT0`w)GDM(;mj)X#{KBiyMgNfKfe}EvM}7fAvP*iU~~*
z%)*02SVp^WFz(Q2Y(gX?Kyy0C@FJiLN{RQwymvP^E{NE}@82h-caV2`k%T$3TUFId
zT5<>biZo*d{#MhEoQAybo9gHCJ^A;IlQQuyfUGaGr7)fq@}g0HQzMgW^ntNveeaN`
z4JB&i(bQ1ilYuYENeP*yCQOFj<AN)<VsIu+>idKs%?K|xZD{5kumG_e+ja0^q1i(Y
zOU(4cY=xu`8DZRF^%IbA-gv7qGu0_7=Gg=FMwVrp?C83%NaJ_FqT`S$1^qo`Z5!_;
z;SDnP9gk=_Wv!3Kq_~+r>nWe{_zW(chw)tg5?JeUCxH5jvXEECo(SvCfei(Q5~d$*
z(pE7vA?*V9ihJP>WEOMq)?Hh~E@pEObSZaZjy9OER);XZt!llF7+&b5eZk9ttb1<4
zu%Ai!<2@?ddt(1~KMpDI&H(LR4j4G_^%Yyy{?#q0aQU42-4#wsriZG6(!tr-OEKr!
zQ7vkx86S0gm?O2+AQvIr^i<u~Rg{J0zQKHsAeT{)d9Z<r!&e@?GV$~9?xbSX9WJ4}
z2rn3s<*wrH<7AC(fR}@UI2$-so~Jy|QG&k0JZ@Y{$?y(EGv0$xo35r=7>js(c8a(T
z#Ud$Z?M^xtAh;BHOTY7cMw!0&DfsRdSm;4lll;iLkP6kAONrf3`&f2os@H1H;q}U{
zD_97Le29I3aZL1mSDZp}96QqVWtv~Xh70-n*SA~K5a3l876Kc}BAZT$HDeXEJd{^f
z$vA#??EZqb5>__6U9l^U;`K9Ox&Nz^1Q%^X3@C%nH14DB7vR4Uzg^?LkH7^G;oH9;
zh<LvUzFk&2O<%Y`z6vjV6V1>hy=6$YwQ;jVf`_r(l3**y);ns`HN)o#q-;=9ue7S>
z7ssALPdOb@t0?s)asOY}`YQXnDWfrwt#Zf0y^%_voDqc7%H{k{!EY>?Qy}dZD^12A
zvj#m96RD0hd9rInT2fA)vDC=%JP3}jW3VQ`=lP_1!i@x6gX`V(#<SI+q$-R!qe2hs
zLbT`aLrJ_ko?P~-_Ui-{nV)-`F)}tezFZ_GULKJ|MQu6O<0JhXW~hfR<vS66T%I`x
zDkT?8hn&{XrxV$fa0y>=WIzY}zCDG;FhNS2E4Gv=&5t+4e*IAKEj9<5-xmY(0B7xm
z>$O_aczh1tr6SO6=98dGw9{Z4gcs7_``eBOx5pO?e8&iSXREb_*Pfq4!smN!u&*87
zSN0|PKLEKvM!$ogv|<zK*H~MnO|8^2jB`5hSS$c!0>;yVb8YFGX^>8tsb2QL!bJXY
zeMUxat(FQG05amg8390O*|h}F#!N}B0{qgoT!Hnh)H4iVMC9IkcKD$^Oy<eo!zTZq
zGfjibU$`NXzLIOur^-&%qP!fIBUMVctx3TNnoP-Pf|k>JZF;AXl@`EDg7I|Vaiju|
z#R9-D=K+KmfTcWmpOK<MCMi*)kPzt^${&M=3lu*V=9wj;5%TM1xC40SYB2}SQP_+W
z++qCuaSRC_xVcTY-9~<^G+2mawV-DKV3l&PnQrPJb)k$3L`JVk6Xr6U!kg$jG(ymy
zArbM5ZIJP+e9sn2jRh?43=~s=x3&wwF#tAOQVc*LDIheZ{EDcn7~`XK5TDPNtS)2a
zM-pX6C@^{9&#eRyuAAwi46`o(IIO1IZ)?))u3Dt7-;}ejY;wd1Lg0<4V%-;z*XLU$
zdQ{S`puN5@@6&-Cfv~9p{k-4-uiT>IGeui43V>li0Nm<udyBgO823AH;6Uny+Tm20
z_|kkc`8u0sqE5y5rl@P6%QjlV+h_}5nE1m+1^xyBn0u`?`Ny$^7vP=mXwr0h!Oy$b
zaz4f$a9FZRvIMxKzAi@$Ncr_tOP8joS_Pkcff_$Hm61Y$Ds;_l;a#WLt={q$0Q{8a
zo_mfr1~ERLSxD$8#*`$rH9l4XBh4|=!j*-sa2;$aa&f6Zo0as#u-1g5tq`CDKq&fr
zjR9~WOLh+fQ&Wb1;QR4?=ELF}R?np4FV~23^D|vgq8TerSA&uYk^<+O#$?4G|4N}z
zFN;LGg!{mUQhAE4!7hfZD=-!d0FCAK03flrvn>78&`1&tHX&7cNjBd%Ca?NqgwJIQ
z>6(1Aix2`^K>(~~aUi(1*+Z0owHSO{761TqB<{b@QfFnJFw4{!s}zk?21zIgW0Otp
zOfL*FCEcp4GHzv}-yZ(tQ$#f^pA}X-H?pOvUA&~YO$k78bI`HCSQ3GF>SEj&B*yEq
z)|Yz(%ETQjM4>JIGesbVg@_dlFSESz?yMJvyRGQ+OV}#yLmS4K_PZ9m?y3ge`j)O2
z0)3fuLwT&-3kNbuD=X#TH}=TSSi#&3Il+hN%_I%Z&n4Am43#-5nPvq&i@M~S(n`<~
zFq9s5+P*Y00U6)|#I3=iNu+~#obo6aUNj1E8m#pH`5I;UuCG^*Ch)xrx4*SX*Im2p
ztfpn^`}A>KjWovj2&F!^MD==~3S|?BuFUuoHan@zz8ZH{TavX7MGA<^HbisZ7OMh}
z6h`huVie$^haOVPf@5I>jra&67*liE?*!a$giNlqb+RS>ko+wnmSq!_*U29S%JJ*<
zJUdL?Yt^+@nNhT7RAcm`XzRuH=Z?1$oj7@(78adG?^{0~AKyPO$|1l;^Ia}X6|aHj
zdX2d47zs9q604!LRE7-z(zJ}!<eur|d%_b)F@*TjC#s`mgJD4T$IF6^)HNO3dH?`K
zF#VzvvVhS+y;3$x*_y5yZJJqmQ~WZ5`5iV54du722YEc!eId^dQ}@M|8AW@_K3+^c
z7ju8uGJ^n#kPqD7pg;N40`>Y0YS~%#J+)hIG7ZTfJedKOBnyjzrc{}l^ZZ();u7k(
z?z!SV+-SrCurtv6-~axR$E8MF8+lU(alrPzefy?QoH#M%8o%SO{Ix%Lv1-3t5eq1!
zZ9MLF7JRy=Dilw|CQ5a!h+i!H{#QSA+FxDIVio0TIip<YVwk$tDl>}q)a?v(Aux)z
zjtalWUhC6;`ZN|!xzeWzL!(p798{2oq9b9&pKfQ-&ovkZg2J&PkVvEm)g5+wi)lwI
zD$C0Jm2xctckPcq@x#}<K<}c%%c2tiD^EZDw4doTN+B?|7=UYdAA1A(`RNqBCzS^f
zr7w@cN<S)=k%7a!5To>&EER*mSw<+^Z2hpABLEY)aJcHqhKmXGsnbP{0EQ=Rm6iaK
zP^4WB;5-0%F0aQrt<^Lcj^(*1?_Xtzhbxn5!EKSxhz8Eh&GC@BA7L9cU5;%SFh(xD
z`H~Vdk`rXA%$X3EC1RdfVe@@MHTF|Rbe?=+FUr0dCdp+pVVuKj>w$8~w5<SdJkX#6
zH@To-7ny=RQY%qQ=H_1`Bl!h!iQ#KypZg&OldhG?LnMRNwSq*nMZJ$wkP&$RoCpMP
zF2L4L1uiEn<irKb|5rP$E1*=qgp&Df?P0#K%4^nwQO{>zFRaDj8@2S-TCi3=!e%8b
zz5T8h?btEzaD;q%p@AzteBkDSXyY%D3OdOMbjiRaTUKb*ig!}YGgV837X{_CdZ$?J
zmQMtdK|l-x`pH1jl4pFDPQUyms%P#bd8rnZ)fVf!P;Au+fQ=LgYbDsMm4UZWuvWs-
zMlPXV!h7D`qUouSACM;P6g-?<Q^nkt<OHB(tcn;^{x?4oXs#y8SI}!wjkboB5s?=J
z0bEjf06ccdlYs)i+c=Ct68m|KKbb!iKM*GYajiCK_G;FZ6#`qX-#!|q-ROjuVN86v
zBhF?3uSKvLY~~yaWD4H*PH6oFwK9(f8}z<Z6p5xX@n~b!QliWF7|Ga`A*(D{u2bf)
zB;}``r9w!A@`#@yn^vv|a7oMt#CRpfF0=oV5yrT+%*>@<MX5k>YSl+F(f^VOMP*t?
z_$Z^n7l>-3HSSu>%^P)Y7QjN;s3pLMg%W3bcAJnX*om2ff*g|ZZ}o~&DZQ7OI<qq6
z{C>VKmK`HfRH{4<E9$X2<ZZ<@<`kSGeg+<bG6hdv5(|RB1>lJbPI}q2CLpV4(vr_g
zgbGAz*;<(5{G9T?Y#Kgc14Gt!u>g&{(E<PrzM30QUBq(ZfVKZ><~`yA8}0ws#T;&V
zbDQ39pzGSRl!SoHA{FdSgXkq+l{tZwIGpn-q-U!cohnppYTuHr-YDEHuLm#=7@GwE
zvxPSXVTP@)ot`a&1eVkY9wWOhA;<DDyHByatnp%4Lz?}Y5fp^!yjHf}Fe~$n1BO|N
zXtNekty{$jLBY!}Uvl_d_QQj9VJCSd)2O^tRbV{CDCwI9)~%r|tF6_b)euL+WFRP2
z8-PX$W7!^@*8@P~rO#3#wGPSY9L((f;9l1F6paX5S`apmAd*!d=wd|YW)B7OdVp)K
zy$6lA)*_vw1Tg<vD-rQ(#+wOc{NX+Kw`gYCSpmJ3!rBbL7Noqcw0(`8kPUtfc&5*r
zgAPDLNt;$!1<?iYr#u-5*7cUp18`uRX0W^}aL`U`8{<++gth)zA{K@dhYsX5A~kZu
zF$AKEvjFNM@f%Il!>q20cA{My;i9lWTx$_h2&8fB3$Pw2-2tp2@Sgh{PO&ToS0IhY
zhx?^sqC%PEXwzySTnoQ8p<uCcRH>+2{>C_$KjnZrueLQx7>5AxE<l{aoC2lQ;T928
zNdXtW6Lx4p`lesjnafcQ)}&0pSg#cU7OsUZl37c$f7FXy4%>!d+z*)JAoy`wET|v-
zU(57oGkl|m|H_y{-8OEQU1sRcJNnc`P|zm{*{E!&&cG)v9c;ERt+oLYeUl6(#%1j}
zZYNg_v89zN=sBM9f*`N}z*8ev|1_RNAisJE0vEiV8L^vQMEND=<$Uer$D7~+NEOe%
zmNE`Z_{XTR7IAAtuvug8(6(}emDS3@Vj1On{0z|O!)}aPZl}IcP-^@^Li4~)ExPrV
z9`%qeY(u(mb~B}88tBlfb1||catGvMv?;y4j~s}mphvbiL<GW$NSGCM8o$}9<xzlx
z2M=Bv8-p;!u}kaym?%9H4w><z38t9e=xvy^Mi(zAkCR|fzQ9GB>~cJQv$6y6UNF=!
zem?I6%v*e4ETF%ri+34b3phTo8FMM5#>eq$BM`U0wMBa`b3(1N9QL2cW-3)vb+e_Z
zb_a?|NaYb0{Z``uiT-Wj!&=4o>00iWxuiA*5vL}NTPoqIRkRuh6_S(%LWxur!nHNX
z93jB$myEYrLV)uShGFV*SKkW}1kCun$KNo~d}hGS(EjB-`Epn1I(YDbmc!E`_4=KG
zII`)V{2p7z4s7v6)ezNAz8>b1{)ZMg6gJqsr1Ah5To4fEi}eyKc2F^ncA*pwBmx4%
z_3O2NiB!H|NDG{cfO7o56bKs$7*h4ytq&o9cvoMv7ti1i?Osh7@aCTj3kxZTH|#FM
zXcL~>4CV)*y;4XGvt$~Af<O2JEk`B`A=V^vQrO=&r9Y_&RbTfH+o;g3h*jn2K|IO7
z$<h!v;t#$VTmZP!Z~<rxFisc1qa`c}X0!PeJ1X;@0?2DJ9Mg!&8&)%U#-6qyNFdYd
zDDH&<Gzovm&CbE!wrv~D&(9BSg@6gRZ{O~P0p}I4K)@fq4<EDE#5>rP7zWT5v<dIo
zEGUQkhQ$xkh3~z`36Rdb584GEpQ%uAc8cHGLb(Wcfoux&L7@2Ei@1ye5lm#4SiOXl
zkbyf57vPde1ug;=eo3&@ePO+%uA0QQ;eq%sr&%_dt?*$i>3D!}4Z>$K>31}2pP@|1
z*~}b#Fy(rM00Y_$4Gm$yf6(xJ45nXi-aa!w+O!#dKcLk1>)kHxx@%i>+gtih2rT-b
zpyzK)Ax;b$AkS0L8arQ?yAi`3VqDfRI7M1zC1s?I5R@0bNeZOXr861G@-8N_@&1Ob
z>i#MX8zBsAask3e5%<#Yq5cdhrW(tRf+WkSx1-+qcD=w_9gH`wVZX<pIOG1#u384~
zhS`s{Y~~8Z_%_n@xR41*cii5l8?IaNnSx<!vA!DGQwc(mu(bV3I&cT2WenA;H$Kbd
zIg3?hxv~p@oSI7}2H+Z*B9;ye4`0bvo8mVdvo_Zgc$9#!B?5CjY_gRMKPqgMX~CPl
z7zf(0W5*6}nT%2xEid=!Gmo60#}A*S!%v)}Z+>f`wj6wJ{DJv5OY<|xDaUI?|5pQ~
z3-8()G6hK}Dw(5_<9C`CwQkptX}elR#jk}c3*d=XZ~=IJ&8T7IGc}w5@a^Yez;r_n
z(eb64Gh5v8W_`2*d{PD9HZ}Dan+63&ZKA`Zgul!mg~!}CI@i4f9v+0CMH??h6Q4id
zr9b)9Yjo;V*N<N}o6}AR^y!+btz-0@hk<wDy_*&AlK=rIzrn?P69fg{b#KcFzy&7+
znqeJ5qrLd@H=Xd1pb6%G!-#l9g{o^G)jSeL8+<Me{@4l_HH^gqfV7I~iz$j?L>82l
z<EvH#L;yBtRq+!I(@x7fhsp}$%i6qtL#)4UMB!qf)cVje8=Vz62*8N&<w5)xI{tt1
z<O+TKk6v-V9f!LnwOiX<y}QUXchhGMHJoK&y#R!;fe_pZ+mZob{j4|ozZyW$VRd0Y
zeb{!2MTBgoPk$<nd8R?6WUr84mRvZf^`fIr>E(EW!?)$lb;9dLA9x!1S9DU!$en@M
zca}pS#)!H5CYwm5Z^RLOACPfoo^9GBGGnYY1X7I6SzK1vF-m}jl^O<q=2ox*0Gz19
zLwNu&=`d~k#y98ai;tf2=6|!%re<rFrlxngXLop%&MmCaBcJWi559ML)6IPe=&S4>
z`aKH4^t!8>bm!Yv=*d5a`_phZX~y8PNx`~AghVNl&l##A1(-w9loQls&Po~3PF3YK
zF8C6xX)Fsaa%@IM+#|ODKKjv*_M^Yz8w)8$k&!FRDX+;)sBF`-51?OStAUkJmldM}
zMhoB(K&kbI&Cc6M;PHQURb&o8oyBt41^OO7bb`<gnEY6Vy*;(fo4-hq0sdeOoH@6P
zzVg+rwN+Y~a$LFt7X!Dvd5YffhCcQD8Uj7iaR6TBI{l-WJ99auk%S__jbu@qm7x_`
zh;kGFN;OUf8mVR*+r}U~cF9YEDJag%<&{-Ba71#mIpx=QZ#3JK$6~Y8<!ViOW<0<%
z1ewrEt6(bu+QjYREJ8V_;L*oU(9_SJcN%Vrn$FDvZd;Dor>Ay!VGzHD$2VdE-+Hc1
z+h;rUrZ<ie!9zm180k3XAKY<Ui;f*za-%IS9^fZs1p#Q4CXc_-Uq#MJqfyX8UF5IM
zGtg0S|E*l0*T`kTX~fh$ju?QBk63S@*OX)=S`jIrsJM&rCb$714q#>FsVbseoe<9>
zhtdkzO0j^|0L%hm0D;_SOw+&r<SF|4H&&c>pQY)U-88#xA8p@urHdu(3Cy28&-iPo
z<%IAvUvJat)0-~<5RVv__Gds)@WBVBoIqUkYYIw*U!k*j4jT2dSPQGSI*cH|)EwI|
z3V>lh4ya!;lYwNk5*^5SeOCOksc%SSR|#{73hkb^6_?S1h-DTW5y3gIGJ0Yy(}8im
znQQZ{2N3E1y+1tan7Hc$RoJy=%K7>T__bOyK6ypY#|Q^JMAhrH=<{D{Ijy*PLIBzx
ziAVB$Tx$;?Q}Ep`D7ey@^TEA9YgUXsbND}$@7pkyJ;%AUC7Y#){NsC8BnIhrIJbH%
z!I!Zt2qI5{1t9pFh~)O*Ic^sbKqN*9C{MmXW1E-|!wET|#*{r^^-RL-j*T8~to6BF
zoGQFo*@45t(W8s>&>y_)g#Z}6J%wQ3E}ELz<pO`xq<6m;#A!?pYB@`6X?co1|HaLT
z2k;1ojP#FtbCS#VHtDXnk&g%T5D$nxItxW2io(&kC0_dns9Paingq(oQdzB}Xo)8S
zjRVG>3XC9Nj^l~NEjWm32k&;uLWA$Zw?d9J`=-*S;3s%$aVj$=ri8|k2{<?(X3qX*
z!B?L?OP~0oR|$hMi1fFk)^AU3_b~u4Hjk_j6}FQc?()N64LW;%ik^6qHXKS{AC}j8
z(ta-#d{FQ}*D<;K#?bOchXHWh?=5>l$_#9+wpF>TEzZM1rqgT;hdcmG2EsCXaV`Lj
z62`J9sLWcD%SKCyDbsjm_EQPcdPtf+)+L2QtdcV8OTjRDfCehp3fPLH0)Q`k=_Gye
zi>G}c4??s(wbL=pE+6p+^UH5g;xu8A@23=N7Zm(<+YKSoh5?WH%Q^qn2Pikj8oB%6
z6z$u);;eubk)EzC+k{7f#H7^+)T>g0#ji~J#WH&_6_`c|V_6TteW(=j$g`K^Sm)|-
z5F%+P<&^>P3j+4D1F1g7VvFB#LOCa3J#+Rq0x+)siGTkJef6nxey7}q3+_Sd2j-vk
zmVm;O)$=i<(H3rTk$!dy8cujT`IL*4AE&VcuQedY`7sw_qxU7a@11QI6hx+AC;IG>
z4joMzhtF)nhks;_%OKWe`Yve}yo0#_2m)SGE&!xeJk22_k^LuqelDhEpw8rDZI54Q
zv0iUUl}$A;ZPH+5j~aO44F;aGzuDk}ea_!M`pU95?^|GgXZ8o?pK-%lEy@{_HZi*t
zq$v@Luf&W?`E43zivH}&&9Mgs!R)2!zZuhycWiIa1NTD+VL7pGa$`gEznNn}oD@X;
z?5HLSRYqGxAlqiEHn`2kavCtK>PuoeFcpH#7#TA4S4akzgvUdi*_RD|77cROA6Ei_
zmOd{MlEeOnrxsg*)G^xvY<|i$0w4r7b5!8u$z>PpJ3{B@x;}8)cBZ}$_PIX{@!_+l
z)YXVPnSCjBmX*TUNKqZW73x@AYSHJuU|gh}#sVd?Z?x%O0)m1E?=)dyQ9nG0Z(lKV
zE993~t1EV~vN4=(va*!-*#a2HY{0Q3bi)OR$qSyFd)-A+*?&Yj9XQ!XnVBno0*|)D
zi9sv9Gme;NVqHYFv5|;y*2v>z%Cl$Bs?~Eqq^D1xt{t8lXFHyL<~%+2<x@^GV>Q~Q
zPh*Al55s#szdMcxj9K@(9(R4yBAu#QhPUjhauj_&ea6z4zO+R5+)Wn*gK)y*{cL3c
zue@5(I4rGUzs)sQ6TRsT9eVcbU2kDETP~K;>8Ch=8tDoLq<mSvCZMF_QrnLs4`3Vu
z!0!tXGXXJNX4hm?^)%HQ3nkz(f>^c0S<r9XW!b~Hd`1u?=ElqMX--V3%TOCfJHCAQ
zG(CQJ-jD5LII7j&?xX!k?e)V`1)lniu;e->Te10X2N9)Ea5`>4@mWK!zBWbAeS3x8
zdfSCrsU=Rp=)5`+18}^1_wKQ3|6#w)+YiptsnbjJ>M`pRs**uHh;Hj>yY82Wm7V4F
z00z`@(J*copo}rC8VSh(&NcdH!cdlv1xRl~K0Z=IX>BlJ8E2(I!1x=nVTYf?f9N<J
zK77VAM%xWVg-HKyXZCN;%%4KD@wZ68s-n#1=WKo?-pUq3jO{d=ZJ#N4?6r$WK5&LV
z$^5`A1np)nK2pMc_ZsJ(V2SaVBt(>PDrr*UP%153V<DtTMcD%2VZc$tSf&HxSGWMO
zw<%F4XFFxevhg=R7CG4?(<<rca{~H3Q7`42n;`)x!~+nemO=mojHID)2>}HAKJmw|
zy5YF<!4z!<=AW77%)UkrKBf5;0+&+7Ywpf58h)OHNvm9iEK4A4KKI4OMFj<U0}`o;
zfz!3NBijrxrb~a}z1v(|YQbj;>WOZG4h4Pc0AN1O_z&MAOr#@=O?IAp?l}wtZvDLh
z(MT7t;QVFbrv-vaxqFfDB*|6OjWMKT2oqww9EJpCW}tj7UK=sO)&d9s^iAdij7tcd
zaKS!5hyTQ~kLvrhUTA$c)Ym{lir<&6v^$wLBk4t)TgBjcpN1Uv&p8;1xTVmDb@h8~
z`t-wCXmkCzuiJeHxCBnO+Jq}kO4_jl3yWgiNF)h`g+(jcKOs)vC=FsOo{a*~96NT5
zR|GPb#FAk6#j@b>l2Q|vH4D%D3pXN-8Btm?abZuzVHQlf2m(OdE1lL&0G^jG*Ae7d
zgQH3mg2w;Ezd!0N0L<b;Mqe=bcZAhwdG#c!rkRx1X^#BoD?Az_)3WuZ7*R;1&~{F3
zMb5$U@-#jA7_AF<%5us4r7N&iKu$oq{<=2ZcB}P5zz_Xdq1y!xHGP*~rb@LK28=6g
z)5z67jVTilzs_ty<1KFw&JcUzk1M@V<EJ)hAglh<faHr_zoY7qwQsEc&j4SQBi8bo
z4W=Dk3?SGCpFhmuM_TXpFqvm+yHCzZ(^g~oI#HPJ8R2>sCA8?s_?Pn@<c2QFg>|D2
zue>@%Pdz={7hs%7Mu)&ey&IkEGYr&CKYrWo(@qd{-0<KM@x--C=APBImI7WxG?s~r
zf?CW4;DvTa4P)6J{NTZZ{umKV5=LnGM>WUtK$@x*A~xCQv`zrXdg}>#<AE)K(j5Td
zT!;`ru<zkVj=M<z={WR+<er`W4?-ZXLr)rqG#KCPy911VRDnr<MjY#Vwm3!(!qZ=C
z((^B@o*BtQeUhQ-tbwfrgQg$feQ?^fc`3vLcxE6~>9O{>NCesl!8i7Zir>kG*sHW`
zJrarYa3)Nq^mGoG<QRdim{mv&>h+ezeI01oOkk}q$E*>^0<puq8#ZkirvAsB$^Z4Q
zpHG>6?WrAp=`IY1HvDY3VuDm5Jf+SNep&jnlwGp4AdC7g8NV{ws#d^6D*<Z=Jn>|G
zP!N6R48L++|E;R|0chW#wqXr{x8Kza8;>GMsNbj3k_rbImn6QGz_34Mpa1aT!*(3Y
zv5h4L;2MjFt@z?P*q8Fjtsozp?&d$fuV^R8Q(i+IgwpiEz!-et_gK5xyq^GCtQ;Kx
z!NWotW?ApR{4dkVQ!C!TZZ&5dp&{6}(?|Ln%?7Czl!U(-u%%g4iivnB=yfa1ei?$P
zL8MXZ*ku9FgzdLAk8kMpr$bP1CD$XD6_?D<(f+Nf`2njfJo?nrXX#HLK1Q9D`IvOo
z7s`miik}=;$%@OQBInMX<A=;RX10!H6aX%O3*7jKB}+=9G92QM5k@3ms~Gzx-8N~?
zv{-B*#t2%@9QIZKKc$n_HyvOuLen9UMQVLq1Hlhk&G%)@;s1T4_4d4p&uP7W8gG*$
z{bhhI93(sm>6>?ArgXiMl_{mQsMcSSqX$yvaD&sJO%@vszryVlefIOgDgX|^Kr%~`
zcNhXbw#tgbb+Fp><IjKbr2ku9aw9&_{>}yH_ic=i$e^D@szzXreWejJUy9x8QNvg*
z0GY^8Auv;#L-Cp%Y9iJw*)%e2p4wb<kOPfSI_~I_k-l|+iN4&V69mu8dP{5Ig%{`P
z6K<FfnzIq-@cT%=)BN!NvG#oC)zab-coGhJDko@AX%-o!R?e%i45ILsiCg^5#3dya
z5EMM-28JGeY@xEOIHLSuEM)y{wQ+t4gXSP0HuG`Jy8P;y;GVT-yr4#amKRddx&MVG
z9}s*2o_6ipWibpmE*J~8A+3%cJ?brhpxL@F*A^_bjXV^ZEJ}v5n4(KS!}*)t&SI`|
z3%LOdQV7Vn059teOJQzq&I^MRZhYVGkLWf3jy#9IiC|e$wPY2uMPMdRil<c7v6vJP
zB!6wSoVYL7a4_ONrw+(>BOr$1F|0lC(sy^#i!bK7A@DfqXDcl^UI%r)zn^&Zd*7v_
zuekoXpr?1w5SWFKW(y6VEG|)jP>AWAY~mT`0;I1amIWUfjALh@bet^!D=0%)0%U5W
zpTLGK2L2>XR%T8F^ERv=92REOl)bfphY1k~lzuRNvE>&8L<hr3?mw*4KRq4&eX-zV
z7Ksv%f}^d;W~MX)q)<@Lkl^Nm)+Cp!TnktlMqJKWnFxY``C+Y~?ct4<4nJv~X6rhQ
zzmj${+RCdb0l@s_vSVNW#(DbUmri&KzzgZAZLXh~e=!?pT^b@03*jLbMe0J3Cy4lE
zoVX$Nu}Q|U%msMx!3V2cCmB{{_}R?pQjI9HRKyDSSEfx0DdHIfJG@3eYAXW=0>SL%
zupoTlc6%nGqMOFn`^l<drzjrVifvR}mx4j4BY@boDHLPIwxm>-!gZ`u?)VhT!2d-6
z4DJ{YPs&GDIz9UA7qD>3*2?Q21Q68tlf#r>e*6?YeCRd5W0D_^bt5t@ulaYl^BD|H
zC7QRZ(sL<nGi^s}!*W2qTyc98Fc$0|I&>)YD0HIK)|sdzX{jS=Cy8hIDJHf`dJtBs
zaaKUDzf9wZnvMft4S={H@yihbEHZ%<-$uhYiy-Ek+niFYWSr`*A(@@0tglis+9F%f
z=2GcPC4njgf+dmhX8od>T{rxNVqtV*in72l)a|YK30%#_G%c^p(j%YW8Y?HQ1#-J_
zX!}3;Pp|l7ER4uB+tVQt3+pXlI2L;XB)Q4VT_`YtGb=CizvA<_7=RC`i{^|}t&JrJ
zh!{Z31dLaeeN)jX63W1!)L+TcO%`u;T|lxtMA-7Kz@;Gq@{T?Tjv*S(g1|&g5`utX
zS})R{Aq=`+7-V6{*h~7@T5WzN(75HN>+i_GUNM|&;j6mgyv~CEMHXTZRxPF1uyPJ!
z09c~{_t7WJT{wS!nht+uYYPE3{qcP;9Q#q{??3;-LfAaD1%H26TvB~o*t8~rqFiMo
z$-6QLtGqaO=5uJAe}y0*0_(BdNJ)AuslaF~01-_5N`?<iYYKwXy{hyuxw1m@q<f|o
zBVh_`7>c=+0K|rv0DhO2upz|?sicuO<1YjQTb}X2T+pn%R-$%_>cI~9rJD0x#`?AI
zy4_{MA}`(Ea#&v6x23voU~mN%eFZswk?Vv8{pNE`=Sy$RZ^R=tu+A_X`_bS3t_#q1
zWBT<rn)Yig>?mekQlZg>y;>7ik-thP^!w*zz!6Gy96{2N2LWXxk;LFS;Vjn95f>i4
zCJ)T@Ny<Dq15LUDDKR365naX~C+3hNtu_V(r>O%1q^&!yJP5+j;H)F6<;dVJuw<HP
z+v$LVAPg`V7-9oqFS~5Rn@~Qk4&*I@8Y0qe6~$PUjRK`la>Za+B8cV(p)9_iZ6vFs
z>(^=Mdlvv2-%^<im-p6#Q25GMiMDUEbnUgYr2rZjLzD3L@#ig<M%+Fv24#&;ziy;n
zaey{0W-DS*dbHK3XQ~_I_Fz6^O+ZWsP8PtHUlo{V1L{mbhHRpiHb5q&{9k?|l#r>Y
z(vQmKZ?-k~ItcJm*|nz%)@w=ln8r+bhFtM+fMrUUF;U!^FkFa$6joqrdQ&_M7s3i+
zD*?ed2*qytTZq3)`485FbD#W5!p>iZsS7-M^S#j&8ZLeh_bsqgHn!Bpvlt3%#o(WG
zPh`LH^Tw}8^yR}1n%l(PhDrd<K!738_vOQ<qRBrM(yx6U#;l)+7Fa0nM`37ynlGTF
zZ|S@R;pm6Z{Y|(4<4Ob?$1tED1#p2tyL<O;>nFm*Y_&9$6o-ov7|N0}#tTCVv`N~C
z%gJX<vI+{q$qEpN^vofr=3YvGwudFLDUAO&jdyK2oy9Q36#Z$_XcTyg+QMmmpt6C)
zn`Ax(B$rEqrFTkAW_Tl)ENWS(`1SGoAwv=0c&Z&1?zA<`9?i9M9NZa7?^s-J(Pv#y
z@V(#H@cHzc37}alsQQ_QUvuO9D{)EnDbM&2#`q3~M=xL<u{mRj(!jUXG;8YUbO1z)
z09mvKLg4DFuZCMa7U?*00l;R8YqU9f^r#PP^?S=NV^RofET$wj+AR~Bk%l5AVIh>f
zQYN83Te(j0JsgBZ1UCj$=+Xj1GPgd*fSP_?|3Gtnzqn_=+x3B}#PqI=Y1C*V$%quc
zL4jL*hbTKKFLUjN?Ra>VLV7O*hiOgcrYdWWsw1_<_hGP+bIzTcazVirdgnXFx6RjD
z0Ac&;v1P~nC;bp#BPRDC^}6lG_#p`LS__8M`W!N)x*TYcov$OqVvwe9J3fLQ*FPK^
z;f}>Qb`}7A^P6GI7&c>U4U<ZSv_;f^AV{K$zLu3jfU=<>-v>c-^5n@&BI7JWHY!63
zDp4uJ5Lq|PsbC#|34QK4nAK_LUY{5VoRXSXWeS!AVh4>_KW};Emt4>4IZo_p_&oqj
z(<sXzSqL^;Z&p}(3Q8fM?geY+#g|&NcTbPr^rnr^4?sV@^5kpu)YCnm+~gNp@vAHa
zliyofMz+f;!#3c#puGXn*fNhT-w0*hy>pIt`n<;mcxoIlj`aY{6<1tgo%V+dfFKIy
z1e_&d;(JJ)w`zKDy5=h*l97MnYne8+#N|vY)|A$&$m>?wv|~Mx;~^P0xsz5{1yTDp
zOz>A@050UE&-%Ck0$3r4Xj0Z*sL}?F?(esKaH|!w_b}}kK|e49>Q1qmIM<zjWn0v@
zvqUPvOSV6KCrVrccjAdZZ;WJN(e(hv_YZyc2tDzYIZs%v_Ea3--|d5bm~omXn6aO3
z6^Uk?THWU)2yDuMvjuIEd=3^sf8{(}0KV;9>anB(k0lfE`0?X`2BLk`>o1>AHzg57
zqUCYVSR!AuLLeAYwq-(gDImm&_n(EB#ram>#IqiZHZT1pVeUxe*uq4zQai<%L8IxT
z*Nx)O-*1Set>t@|K9v-w;y4%0AerBYzj)6iQ!bT7`!I8Gqk@9x&v)n#KlTDW^Xy^>
zq(XSQA-);U{4;*oFY7B|sVZ$2U8TA#KOt--&03)ZsSpM(2z!pLXTba4|9*SPWCFqh
z*uQ_jKVDc^u$`5~BjL^}HjlEGLRq2L$QFd8{CQhncN#_%z>9HQ8Y2S(Y~~y(6|`gX
zH9k+j8Mh@N=TocgQY*?C@r_b-rns*vBjp0rwu$>!yx&8^jaanl(|_8#poK-(11C-{
z(eMA^^K|lb&sjTdA6vm%3z&G-^x2GU(!+0B(57G}nZo4}c#{cPB&rcYul&q?>YRi1
zeCEs<d*sLwdg!5tqR}zht-;0-1F){K{&!#rfel-jj8Y5;@+g?x7IkH*EYpJO+)x0L
zY%m2CmcUkBbtZsJn##e_q)jbf7rl&(l%`cP#5jq*W;E{XfKMhP+BRDjvYo12aamt4
zj#Je}+tN-5EG@gB;1gXJ6dWndC-~}9XS~_(lbM{mgvmTOMzkNnKfi^V&E|duY+YY6
zuF7v<jIaulC0q~7OJ`m#R{)NJXrvNx<Q4!VlrvZS@!q|Ay@R;4bn=KUln*oWS8vym
zO{Mo`T|}f%JAih0A&#xMoi~F`e^w|Ip?Rp9EfHw2s&VQ)rF>@AXhSMq(qD~{Vh8*Z
z)swvJ3BSlINaqT?@KVz;>ge}W`1}`-JLW&?Q^w)zBg)Syy?!3QV~xUD31EaUEc&3V
ztXg?tm1|pzmsoGN+dVr!KW`lm*&A-SVRdP75irhBt37q<l>ePS_mU6*nJgq>NdhP>
zfNBsdTq_fXtf(cec54UYjc0dlT_KSB-AAy^YE0xZ86?h(x@cphz3v_IOp6p#T&;B-
zNPi<Eo|OqMR1sH{u5(!{etmG#34v$6)}ZgcJeqW21p7Yr@$b4|-|^@UZuc|zu+R!G
z;-On7@sMdXt%(?=gkXK?{fn!V`pJc|V<lHq4yDhY`{og(ghw7i3?PrVj1b1MDlp){
zfdlxlJ9g}_-~8t99gz$=D0?q}W^J7&sfC0QXipoC@Kg1Ix&m8S2*|85o)naOtc72(
zk8ir3#_@4(`-yN%;mJ?PgcYr)l4#Y>2ov54(X9o7DV{ECl<BZih{SK|wDhBoUVKmx
z8>xKs_g-?s<h&m~o$}kK?V{<K-7%lPsVPwg?ue*X{Pt}{H%p;s6;?8J%K$e&<NF*1
z%(Gwpog*;!_wCzfUyLiL_$9WsdMYrG6=MMYl>hp+msSPJA6yf;1SSSakY)PtAfn17
zTCzkvun^d?qXRZCg(l6*EcEif6)*yCmPk&-G9s9&iJX!ho1zfuvSex-LlK#%wP2w{
z&?Nc9W+us&g%<GYyBVx8Fh!sKOiwK=x*quY*U!<%KJi^Tb7tNTcTUgl4r?pAXg`Md
zNNl8%`WbU3!kUSsa43mS+AVS^I$Ka8yZt7sDXt*a7t8|iO56v)o;Yy=y8>DK(c1cK
z-7uD6Knz##4#5EIan|pwd@U6a1foB>Ca$`TlcWg9W|>ulD@kNnjPYWWTWNHF2_%??
z6p6>?Ky(eml)}=;a?Grfnd3+T<(OhB8wy1&CWZq`v1(XIm>PUbdzfD!K4J=M7{OTF
zzG4V>aj8v@d~W^mfIs{4N&3@6$7pe3#Rd0f!n%r>#tY_eO(o`6!jA=@Ev!^#c%%Dn
zOM#F+nh?51^ecVO*mp6^>zD<~6576x5jBB>3zL4Uz*u4cSQZQt4BLaD6Yi4j^_GuH
zJXSy;M4G0hsp$`6S+(e8Sy%ONO~875_*-k4?u`Vb`f~Kx)Y#A2Xx4%>X<bpS^gksp
zruw9W%~B=fnlA|=w)uc6X&&PJ={0=4=#MQ68xk~u--a{eTlDx7!wCWJ?|<TmD}Tzb
zp@1m=?CfRU?4R<hDz~Y273vZO^eNo3Bxn4krmNV1kgG<&O42&_)eW0}4a|-|qyb~E
zO&Sx7B?iFj0%8(F+#amg?JgXllDO<3=C<M_-4Txn$|OPDR-BaY7!oit03m>-t2cY@
zfHF0sxn5ab3c%L{-ejyG1rJ5J()9NNhDeW26~szX7&C#3GDcpL632)pTdS=LTYh}m
zr~EpusNvTTF!b%`R~G_E?)lx{dy$U3Jny&ep7M*T?ukoxZHr6CWyZ_z%v>(Dc5X?5
zW5$KGN>DN#hbsG8aESib^|jkuID#}_2ms8g#pF7X1Y~H0Ft&;E*wYL%nD*@1(?4_O
z%=TAadF;<`nEi{&<jYzolB@0KNiX8Qf<f0q4WyH-L%oZOVWEumnD_XQh`^3xKl)+Y
zv9hw97W_bVS_8B9gvqY(pzs+k<g80WdP&A`k2h9Dy@PVo^L2sKl;Bd{H^sAqqSi%O
zLtza8uAWwr+ws+Z_T^>THrt>*d#e2K(s$;at8mQWx#ueit0~WfWz^lX{oXS5Y<*jL
zy`|dsRoXA}F%0Q%BA<z_jS@5iAv}Eg)VE%6F7)i#vuEcp5`Z+|ajXRZ7>fYFsz5MX
zFn;NnAD7NO`}A)=ap1rYopZg|rAv#2Kk8r*WtP}3Gt+FIsNxKe^wfG1e}H@*9Vj<~
zyjU2f9qp-kpNsGR+OuKW?jQ*`5KlGbJRse)<*W14d21f^*sTaueO#FoL^VYRwULEc
zWDZ_AxUP<@B!UHDT~P3`$2;`i?{CtM?U{PM{P;=wv&T=l`%Q1n&CFaDw7y>-VG#cG
zHQx@|wt}9Bs^V_VAv!iEtR47XzV_@>zy64GL+5bQ(Vcgph5tXqM;;!=5(5A-FcRQ`
zY7hWD$4J=SwCAM!H>#!O*yuVWoT03V5@E&9QZa8Y9KiUfXLEvsMtCwj4sPp>z>@h&
zT=KQ#2IT4nk(4RxRX)1Lv8`q7;>pVQSmot>TQI)zwHw$?sYPFUG+1}=_y6?aSDj^W
z!Y^BfOulW~_T}~Z8g1E<wZUq0pWDFDDDa!-|JV6NHC;6|>jPl4qr3QLj`^Xn`bYu-
z55>f~(W`$NlLb(wEXF`!e`V$TV@?v^T}n^!U_PWe0wnF<Gc%}ILiGc`cHH<Wu8T_@
zzu1KId#Q{>sh0?C>7HL+KH-<?N>ghG`Y2^Dbh+haY%gVNp3&Ff$LcoK6sxHt@JTHP
z);(DCwDPw42L`{M@^_(Zpo!Czs=Nj-vd}2q@K8@!E$PH5qAxz$ac2K%I&*r?uURnV
zGy8VNK;Lxq_pNw8DP3XKp;Wn!zRk%y%1SC}Se=W$Sg{Xa6a^@|uyo>2owDqM@tq5b
zOh72pOz}PyV05Da<B9?J{_NhpyMNVHSM^;$t?x8w|NQwQk8Rs_<xg>!%Lr^nkf~uy
zTYoiuK$Bmrn?avBWT?|!run(k6p~?j=~*trgvkLj67R$`SPtgc{&8PE<ceouV#Kqm
zf5KnphG9|7v{8obR|SgAZ*R5QAql9WiKXwED4DDxtX#Ilx9p`dWz~H%yxOtiJyx#g
zXsUJl?c2)r9c^l($UYz`lDK@ruMJy{z3@_xmY14-;?4{<=7<|~G$Zc%@qL;3;y+fl
zLdD~Ao0a^|I5gs1E<CH-Ulph2+gKnmOP@ae&Bq))^jK+vhY(cf!1}0%0*C&-7g%>|
z|Ni~0BS((RY}>YNw%6<Jc<*2Q^@p8AzFJB@CLBp{3PI{fsFmV%Omw#Lg!Sv`FD;%*
z&#D9?Q}7q%`SO}kLXCdgXWR2@?wPnshkmc|%bpHGF4pveX41kcSx%}f0+1kP1hJ42
zC$l0ADZMJL=iW=L!x-jkz8~(W*=W{~Dv3AL3E!%K;pZ8<GEi2wx5O*fBE))zWe!M*
zlZqho!OW$wJ2YO`4;ytf<1%G=v68y_LL2c654EBlypSms_^LO2be~LxmTiG|ldRVA
z`@23I{nVfQ-3MLlVtxU`P|jF%BLJO;AAYzujvN5M*hT?>=ns701AUk;&W!0fbEfA^
zo9@!$iH~jD{<@!xr4s6hG+8jhr2N;$l>{cc{3MaqBv{mfO+vrib5KvIG!qH?nVCci
z>0m-R5F8fH1cW8AqY{ojpjwW+;Fo?s3xF!(#H)N=4IjO3m`0_?0nbFGEp3a6wC#{B
zCoOuzxfTS4j#f7_gAhq!o%LH@0#NDHf>E@f%>v8HhSc0cc3{T~;${PL_M=<T;`_pC
zGNK(O3xU)|BlUl&d~{oLFLcVZskBiat9q>LQ?6@q@%YD}{T=W77yxb~5ojFIe}-|c
zm(AVmJLX4c(5c=xp8c)gyzB0t`N=wVW=7->l4WaEV<aN-Tqh*7^6x2s89x!G?X@;9
zY%4<t$rK~1SOZ|@M#IG8aI*=1ba5q&F(X+AEAGhmaXn#Cl4$0~db?fg%j=;zDi|y`
z%&$4$x5+9YY0Dbf(t6PtN=B@u_l5RoBF&I+V{G*WtlUXFZ;40<P$D923Y%)%xk{rh
zQFX?<W<LmU4A=Q(#~mKoeITQ1t*;?{Y@7P85m3;jkYv){O2JPNoHtVV6@6yfOqPgf
z57+UX7asmY=PLKy5IU4)|M>CaeIlZ94FLkiHX9I;3gj;zJa`bj>+jmN3yBTgBS#LO
zSzbQ>urH)llYXA^2hhP2qMVgh^SO=wvrP1o%l%&?9VZ_gzN-}v$0>!L0a?Fsl-Gpv
ze&fsLtoQ3NmTic)DQAH};aXgsSZ2%kGHfxBxM$gJ3CG#LrzE7fC*e0TGxyWxUS@e%
z)rJbEl%M5tMrmIqODl3V)fpv2Y7=oXO#W8seN?SOmMQ52bzn0R<S(@18hutuahsnt
zWnHRrabSp4e1J@)F{#aEJwhPbgaLhC*5%7oG>^0O7tehBJKy<}@3`UN?)mfQdtiR>
z4lL+#ng4*Xx&R=CxKSvgnq3zI=sL;PIez@<-@N9Ud%rIfB#lfdIg?7JywD^G|J;|?
z!KC~wDUr>ukPMJGBx*0Mj&*y8stjmj!_ZIo=vY(}YwqLRZ=NCQ(i%SJhFkRxtNWlh
zP^d>EFWIDX>T`s0qge}4F{Z900l$NU4aNaZldq>_BO?e}as-j)!Zeg-<Yy(d5|NRu
zu*p2YBVkX(ZVaB98ocsT;#+Z77zZ|w<?|;`co1b0j%?WnNv6V$F#Hu{o1(yz@rzWS
zjpa=0@AJ?7@vpk!zpfiH$GB$ih8u3^Bbja-GXN!w(*i)-`%VD(Xr>En^yla25zOd5
z{pY{>Shq9(c))^9NH>}*L|v3=%9J+(@Hnw0F<vgTpH&*tDmvBm8B(g0k|7}Kl0<KV
zzaPJD%MUxX{oZ{()6VAhWp!ai?N+=xzCRwD_`VV>iOz@-1nD+$U^1EI-0!5JQ)Q`5
zRP(0lsS!BnYkgT$MWqi>7^v4N>p*-MOCE<^hf6PHTS%U2h-a*Lv5<;}1)J5f$Y{kP
zzoXIN{#*~YHJ+z*YLmaIoB>EABUj}+h-5AS$39k;PJiON-~CT7xl3Ia^zAyozwZQr
z=baknAE`7L>)Oj`JhB4aA(l5r44~_-b@2DCZ~Vb8_(BTjC0ANv(~pE@V3XLjO1tFx
z1Yt-CPMT;GZsR-Zgl+zaf>=!MWBN#u8F7xqB@n>FZaID@pR!`Bna)ZHWo=?Iz9%Es
zJVjNuyHNW@zpDLIR#HJz6^ql>;mp*?YTf44ha{v(w9!`DW`zZU;2x(3^M<1pCS`^)
zxndJtv-~cBL-8g<_3hRKSsc{imrK}ce@J{MOPcxRJDz*~Km1#F)O90e9j7RJ;1x`!
z<DtLm6QhHTSbQBF>ARu8o|9}{7vc0m;5*;>)OY6RU-{4wk0-@LG8;14cp4wtLLgnP
zV8Y4=$qGZ!Y_&y1QnOpt7E5g2HR8sZh#_=V=Ditvp-`teLPIGT4O!dU$}?1JLXnn<
zseY=0GMD3nj0lOVWXY5j^7{wTZ>SGA!rTYEr_~G_isouG^Jm2`S(}tVNNuUnzPZi0
zOt!$pt7^3Z2_;Z-FCN%6=yZPWl@ERU8=rUy=0AADxyn8Oc+*Wc^=(xnjvNe)8Di#=
zapp|p(4j-^-Me?sbUK|G7Z2EW`Q>l9>aMr{^h2i6ygIeU7Oh)N>eBi;5XN|CR<BnW
z7WtZ~=Zf;WuUAkeFT`n(Ifl!9PvEc!uo{gihW2XPRa&OBjy?}+cLd%|STb211cPh!
zy4LzutKA2kcRJ_%C{393)EchNf$xeB3dLQ=qjh+M@CmNQg@8%%fQYn=vzop*{2%@D
zqyOS>oIU&OaVG#4_UzfSaOTVz%tiNcfaw2g8*9XwfE%*?3Oa)E%mW7w^rxn#I`DJc
zU+2V$XHOk@`Afej%E_ndnL(XKO{dI4Q*({fwR$=h)S~=2p;1+77S8zt%V3lr#<j5~
zR+6TXc9aG4Az&D0<mo1;c3W|5)+w=V4Soi^s{T%3lUhH15Xj(ysWe4_$ANQ^kQaf?
zPup>tZhoeUamIrjgD*<tm`B+pQ(Q}QK~x*eFjVbRQPvi;Qi%;2`6w%faOuq1Z~tFs
z&OCG4G5?AS_^n{x42ALkJ`MOpW~;9XTmm;3!x{Vc@Av+V^K-h3i;F8x2&_DL_}@G_
zzwp{eh-9Xi20M^BP5d`w*{ACkb+ILtzo%f_;uEsqG3kXSl`*MotiO=%MiISC6tN`3
zets98xa}0KK<oIRq3x3BXfmBs(9h}`Gb8BwspygLLemAKp2>A_ZB%J}E~^5X1%b=i
za^j!#J}`Z+7y1kEOz#4xwJ{n=4>8ulE{@F5E6Z2<%g4fXN;u{Fb=J0w$;i8wG#!*U
zixP!tMA9q27c~EcM}FxK!2{9v$H#{@=*7AJV;%mZ)S?Z5Xtv<6aqF$OHeY+~wW+10
zrD-?bIqL)hT!Aa@eEWa<Q5QYGS~e|zV4X^u^%65^dQ-T<Lpa3;_35TQ0Y|IW9Ar~s
zK+|?SNFx=biK0~2*+Ha8LzV;Z)MHnKbb?bK?g4h&1+im?nnsJL=yQpaj4ho)OBwe?
zn{%EdvKjGA^*tVmIeM-7R>eEFNwzVU4UzniK5fGWrbwYSM%5QmfoEz7QsLNA`c^Wm
zV^t5Sz*qSj@dw;ASdfROI<6h<nr|a%!PI_#jSmvnHGLUx`v2LN{`ucLe*DjmxpDtR
z=l3r<UV!P3Ad$BM$_aJjf(>y2;*g-<I{cYup1}}Dx7}`ccI?=(0`mv1z}KGs$Ukt}
z`+U~VRxpy#MOJ@+%Da3)snxZ<54V-krhchXHp%id?wc(Own%eJxn)TsSgEaZ{ajBX
z9CyKQLEWAWZi2hi=`7KTGm(+}!Yle3+3uv!Wxl!<eau2f;!vhaOD&j~h6NT;h25)m
zfvt*QYrQb@mQ6P-w2IW{ut(u^PHRq@RKA1XPomZN*cj1PLrT<0?aPoDFiB#8dqcuv
zk)ngt^YZ<=Y|(G7mom*zYG=f`?VbPH*Z$o<ID7VMC*6e=Si;~1=N@<7|Ni%T?Y~jX
z@7qQj1Z)MkPBxx@{`tl$ue{Pedi3bDGk<1W^b=8+ZTH;!)BnYlSKal$E9GgK@oYuH
z>laXRucXFK6*=g3Vt_~VTBRAUR!4q72_{?j3}uqEMH@j3U<zap`f{fFT&d;3{5$~(
z*Sv+`H~DP(6lKXgm3LBR0^M&!+_A!t&2!}ox>goYsbcvQ%<i`h?<Mn`_a5XRrzz^-
z?<(Kbb;8akQwdy^bWq~C_E?mwYqqWFBR}WAjiV6=Dyn&Y5G@SDsr>F^uRi(HfBu#K
z<4>F=w1_`P`-{#sUdAXHh2Y<4Hb58>Hf#@O5ex7`9G6{o84@1g`@rm3cCNtklTZB1
zPoJMV^5LKotdjU-Nslp8*7kgoJ}}d|R>`p);`=NAtgOSP@(M~nTwW`KnBs@)SL5#{
zYs}nd*~f4X42;Y|zuPY+9=T)OMeqy+8)68HesrOS2}-;Mo$MngTroOXASB#a)n<Jw
z2%#{FvFrtDXJsM2b0sqURNPf~)<^R56lRGBtG_Gb*;+jkC!Q-AOa}O*kWPGUgq39V
z_q4J?E9xVurBjnclP%Q-LB%yw{4x#&`4VW`G5@C=FRVBhcqP(4vSWSHj}1^#oPye1
zf{TE0P6nz276Mr45Sf8U4~7tO!m!bF=8*Z~XMg@z?|b(L&CZ?s|9;Z29++b?0?IMg
zqzz=o7ZXzC{OZ`hEmVnkCRJ^VI!PUKJ(aXi$xy~<rc?FH*5_l@Qc*a#BTd-Vq`T7h
ze}0x+zh69?e?wdP$)Z4tZGkS`&L~asPOPLGB&Dg~j_nWG^=y5J?`Nx43TGAO?_**)
zLz?4_*1QQPIc8xsVspRwU7R6kGhPK%MPXUkaFUa)NQGwQl-_0157P*a=gxii*S`3<
zU--9<&Yjo0yyV>9W#{*=+;GDU-RGWruJ>a<_GA6<+^YP)=Kb};hFJg%QNnov@ZyUv
z_7DN3;0rA|Nx0nUbe4gr`SVA9O;brlmF6=O3DQXBEZ>_S&3wy*tlFoK1GkY*fU#1A
z)jpe3dy9LK0fSZ*!bvt%5T?2w+bqkg<3=-VZ{3)V+IYsVCFm^&KKI+Lo%cT!jvvPl
zM<NljXOWaGBYTzXQT8~Jc?g%4b2ga`nHi-|Wggk{&WfZ+GEU*_aX3fk-2VC_zVDyj
zKfZr^zaH<$`}KNy*qB<0=HW2dl@8tfTd}{kaV5PU#T+i(*!fk;FsAydM5^lgYOzVV
zx~fMnZ?i(kh{s|_7Iy5IZWc&BbU!`#_z(Fg?rV@%vvhfrufZLDh+O)gxCv$YmZ+8X
zg6G4{Xc%&ihGvlZHb8jp7*P;2$||e?cxdoKJM+T8!Hg4V5p|aMe8l8f#^oI0Tc|&2
zWRUboC(@yZ$)unfR=9N3e61?TM^^RG`GPG=m7SrRyO@HcPwoy@sH7jwDL?w9Y*kcK
zW7DH@wYox*zc$Rc+Bn#<)>qr%1!8`Wq!oOul?+kIF1o+C#g30~tN9gmpRY0HwbZS9
zfcxIWZK2$nIuDJzC)B?vEN$UxB@vZ?Sz=($_`B)Lj|YWMCFrkj(MZZ3Fk`_=?;&PD
z;uh;LZUWK5`1BrE#50#D#FR?%RXbMbwoYmysX6j%2j0@b&PZ*}v8DP(Do9$#LG!%8
z8;UhP=U2r0q@>wAw6?`iNp57#6q7mJW5U)uqycYTS5a4x*-q2c3Bk8%Z5DEtcB`kB
z9=&w=z=5BX8lUti)lOKa(^j|Ws0@)1#m4&neUNKZ)<qor;zO@zq@G{Y+^o~NfwM?^
zfELH}sOZTGlSk}E!Oe@xdJ?QA4Bkp5W!i`DoSwso{>o5$P`;>9RN;c@xx;G5TFz>-
zAjPXCZuGlM5A^SI#|4kohIr;CrF%8GJLSHTwcI-d53BOCMWJubv9z?KmDQXqRPI%n
z%cf?2tv|YmqO-r49Z=h@NWd42CTy}-lG8wuPY~Myu~QuJiU$jAV3Z5PSJJU6Z55XA
zhRtf>5qiZ??&?7*AZi0=A*l^IZ;|Qa0fYS8Fa_;Cnbd_6FL(5P@-EeGlNBn-F_L)p
zd|3F`?et&J+8WtuU(4`%o`<)FB6fsQ&Ms7St|Q054&#6Q4|WT&4H%i3rN-B4OuuCX
z^`ZQvc`o^a)4ICo0+67anSL|EFmmJZ{mskU>c2s%yV=eI=#)Tju7ek?>lsSm1<ma^
zp{eDH{obQJrvwZ#_N3&w&Q$c!t-Ok&GUlfBbh2X^z&UomZLZr{3@Za$mFeA~k0UAL
zZ*Bn9pYpzCER$M)o3Q=!9X$K|>HcsS^-Lea{Ke|8Q;wqb$g4!vGoti~Hl3zFkDx{-
zwAgdLln^6^7nd{7AFUN_x_gdfsb$wFy^|ME54d0zLDcmXebw}Y4GdGCD`3v~e4FTp
zkHT?#J(M)RVsEBgBR$i)aN~8ur2)8mR{%q1W`dlAvjED~QEIA+!dI7C4DARN{xV;0
zwtt;jr6crbEKzqi+;;BQ&x)?&Q5c~{CSh$z#zqgq?T3W;7IEP-z?jfRiHVI?7$-xR
z5}mn}mLe2w=oNrT3|C;Fp$q#}48O*HE3v*GS{*iW>EH?FO*f{3PHQ|^B#$7sZjE53
zhnS?JMjT2x?zOTon2*d#->{V7wN48IDx0&cMZJFdU@)lL;^c=T{Dot(Nn0_40cO?p
z*}<O~yl<i5Zf^VFEv@~+>WZ)C6vApyuYIwd-N&q(x~)O2BKCtWGeun9H?WwQ|D-Go
zD@lGK=C%;Wp__^cgky2aZa$Lc4}akw1*H7d)Te2kJt(>veiSb}g)NhpiXy0i?EQ}1
z0Dv^`@3z78+5BhXCnz*Dbiv~|VWPftp@+Z`xBPEYocgjn4o>f(4LAr?daIoY2AA-b
zgWg9$-1zO2_0JDk<O0U_t962FEvvaq9IkDh6wJcXT&}i+kz>DHnGSZ4%GrZtTFLT$
z188MNRWDcV`wHLYv1KdOVsJ#5_PyCf_L5-C72nphr0v+Q5)nj*DU4dvNZWi|sZD6O
zui%%aSvW_zhg)p%=e$O$mq2Im#tio=7#(L2@?x|O9eDtM4cD&xw~6wv45`qI@7{QU
zibC4n=K|cv@h9wK9@A*x#|UBuZE5fOMuN+iDBudNXz%tFFsY4f^wK=Bn2pii{vG-S
z>*aTbuQN!NGYSKZye2a&&j=4vFLJax23w&XEb*C{<22j=@^~n`1@X(=EeLo=*4alj
z)eiVh;Lb~BZ>Ha0W4MbhOV;bj$qpAX8ntDrwM&1?&oq*heH$|<FsFM@_KDMGWy~gO
znTr1WzLKmUpl=N!Fa-0jAFoj+=1lVv#9O?)Pe$Jo3X`pbnFW9^ZR1maoBdeAfas(=
zAKFGd`9!`)BJ26bZZf;|uZMa&s}{?&@7a}cr=Jpbc0MIgGm7|ZU@Ga})QsAo?SNOM
zVc&WD>IK@z&tJpY)XL>uAJxyAJZ`Knk}c=3bj$JnKp55^+RwtW-_Jho`!+0m<|vDl
z&TH!K$jP2B><*7DiTX};Oir(<q()l|pI0Oa){$kF-Y*2ixTuEugV3&p@}y!u-1E-O
zUO{8Vxn`@ba-EkJA1U_r#;_st#s9*4o1dJtC?fxT)u(;`l&~u<7Q3UWc;y?X>#(0#
z9#Plop&!UH45NHJ`k9uU-e#XR$L==4_#$xvhGr5E5YdPI?F2}!GRpfqwv}7_j0#VZ
zjYf*DF@|Wk{!9j+p--`hO%iyS3<_Pwb<>+sWlufSk-3!_OIeUZX{OVjkA7+^N1ow*
z2QFPKWg^+_gF4$^9Q^Cs)R@k#N;|fHv2|T<ewc5et==hKDtO)P>Z)%rgFxGr7nf$n
za=Ps|Uj3~8?+h!N<#7%r6sEcJh+RU*aUOFG3zk@e<_NcUulJ0DH=@n?(g2<QI=*G&
zCvkmFn_HeVPMd`N4jAD#7^J_yU4O2@#+#=1>M5lj5wp~_k}lj-tC&vOGu0>AEiD$c
zpD~{p(EX#pFf<TQ$eZ%z*FsP9nd38wFEvp(C{R*KAT5a>$a`0#^fBz16B&yw=vZun
zkTJ`P6$>!`xgLPz&(~Jk#!RZ1^iW^#_S!gRo_Ub~`^!JWLhfze+%R?Y)#Rbq1d40|
zV#MlPxGm2&;0*?<O<YfLx-Me;#kl55Pu53M%OqH-5t=RJaoyyFT~NU2Z}*Ot(}pi;
z`JmhJCx2|9)RN_=f0d!j%6hXc|Jb7?z?XTR{tf3JigP{@yQW`7NkiJ)+8Y(Um@ZQ&
zdGpV1Vz^Pn+J_OzS)G4BjXAcQ)XGX`w4(N$$0d<FRrMh<X_7$KgZHiF`sp!-JX;4t
zYKs}*ZV+T#UcB=$7U2dDX>a$?RqewVyjRAP?$%Me@o%S#8FCPgf;dzyqgOoE!#AKZ
zD439hC#o^L2cVyw{Ool8!Dd!eYk92=A=enzEXFNfC0b{QoQrLnE~gi^7<hUdOduon
zHbyJrcRtwYZ!9cD-&%`q$r3_hEU5~!js2UV&c7~N{JA}Xm1rQG5eM{YI8gDuZyvX5
z-5sMKGX7PT01mm%%XR^+0mb<;HtHZK?haO{v)_V!)}WR3a4t%my|!Tm$T%ShjOv68
zb5Txo57vmwV0at>L)h!x!8e?NC#PHDKy$&?*x=$C(FVV`44;dHjP<L$U--QDMLB{F
z(3Ab3D%6wYM_jy>-+6ji@4_7r5e=(VFVu6vnEIt*?ni!!%t{Y>g3La*PRaI2N9JZn
z{)2Lbsq6ju)ON3B&MkI^_odl`WsG89R(WyQ!j1xBmnp7q!X4S6($T9E&085BOFOMQ
zQ`h7feoxIOyk`C{&H8Hcn8Bh$l^WjBOP;?^Y5WOZBqA}kn52aJoMMALb11;%HI#?9
zx)SP2soV;J3|s?Eilgk=B6?7P_(OOS89aF9Su%hxkq&V*95;I`qEVI&NJ4*-7){L5
zj}jMidB#%6NdHDy6*rX|=@L6C!?Fi8>Jlc=E;!2@TkxyosiNn2r^L$a$k&so7)8PH
zqwc+w7)9jxFzMd!lDxHe)NHf7)YAPE>6trPPhWl;JL=>ozg<Z5v8&iU9K`Zak4F#O
zUXcs=1V7@0KS31ESz!Z``PcTeh0NkywDy;>bCw~-Qyv=s%|Q%z6e!S7&_n3t)D<%6
z7E3-dm98TXIIr_zzKc0`o6k#pFg^b}2Y*iB!`vhu8S8$PLn1JA9YCOPYpJTLey0={
z?=u5{jP`l5=%G%?h=nmCKaCLxykq27^|1eT5yQK8IlYR8uchstUjhaV-`4jjE|xxQ
z;bnF8ylQw_u-sgxBVN@|poQ738M?IQZ!2gwTS%4rrs8v~G0+vc%oVu44x=8wtReoT
z-;w78<P%<o?+PHW0j%S*_@?~J2VMP;N~gU@J1Xs9v4=V$YE3*1?5)QK_U23~T>@O#
z<1^U3;`~fP(w&LmEp%XPSf^1Y4QNP$GESaOI3$vPhKbLcyhE3Kk3Us9T>~1|SA?g4
zSq2APF40Q0uSHvKC^IK{gW<w@if}TdKau1-AO7u$me{|&w-9TI6>xJ|@c7%PuOtZx
zQDf8*LhkCgondQE#*w=xf8-ynvM&&~-sus4qSj`<GcFWUTy`a&$M2njoSd^MyQhFA
z2bQn~IeWLzN3+M0y`I)Dq&&TZP8k_hQsdkYSr0~Fi6(E^8I)VsOgZLngds}8#s8`{
zlC2mS?&aLPbP1RO5vHiIaz1D|v-C#{dr3&yj+|!`iCfoB#x=geMyIZ4iVzH)^ZKuI
z7e{VLx<hZf`=C|q4yD58<ltYH|GCRy1!eWtmIbjW=RE8u|Ka1Y?{~XSb5<az^rH&R
zBK!w%)sGta$o_qU*b&22^Hk%LJ8a0)^6}_X2vk_9h3rY|xDH;_X!8m~h;6-*qKgkj
z<>9;wyj0yVmQbXzVnt@Aj%2e&r)ZG~)P^;YaqwB$RRQG)bXro&1xma|me)LJL@_m)
zhc!Jd1tK1>jEG05h-f1lSAfj0{JT-tByUeE^}Uu>g{z_w+JudARJG6;zVp(Bae@-D
zMlL%7egW<vmw1{ppzETsYbbhD#IW^&Zy=u4up%+dr+1n<+cvpYgjpePXk$n>$m1c`
z|D@B<eSZ;O-?F*i9-1Qx1F#3=Q}C%{X!X1*gji3m7-Q}v?bdmJ`1fAX=I3scK>y)H
zsY%(`<H?Yg8naeqKU(_0$4M3urt4$k`krswulIiN3VS7bo8_70X(I-n!gYw5^k^E@
zWW-G-87zJdza}|gt{Jh;x+2g&Dq%udEqlHe&y)-qx>Jz^@7}#J9jq=dz+ssfCjt8l
zG1*?uCBu7>qqY@mF0cxQ*{t5teqY;+-gK3Kb9uN>jozRxIg;$M1A>X*6W{bo$M{$`
zv<_McbrgGlArHqt9>+ek(l{8F1Exff6Tz%T(3^xG6g9u#T5s2YsO!LBkoY+23^&gF
z#49#1l<kRek<l%>(Mnv0KmNscuXk2dW^2tG&&&A^!Iy_YJZ;HH@wALamef=qXQ5!f
z!bI|F$rF(V9{L6GH8FSYn%vcD(;K6NbybSkfnrD9rr22Ya9q>tdu4SeeDRK0fd_~^
znF(L2`#EF>bPk~NEt=$0j1>^e`!glOaFOHU+5QnorF;5?hX!)!?+Ww-XIZd(U-h$+
zjA{9EDA(cGvYCL{W2fIAV7`6LFxcISBK!<B^XS1GvKw*K<LiP@-(FM;dDnL6yxIhR
zjB^#5Xb5B~z#p47$lfQ4+H%gUeA>2on(<#oHwk=baK~*%7nO0|(u<L&xF2{;De3>-
cF8dtPnVh74i&!67_yW$#?2few+{i2Wf9X@U0ssI2

literal 130
zcmWN{yAi`65CGAfDrmrh6XK_CblhQ>v5m$c1+Lyi8@#>w+iUyCGRE0=rJi*>Ub!!`
z<-#Skx9TS+b%SV?WG}~R-M#@^XV4Qt^1&cl&l?lD7<|$jf+IM5*3*$(jzF07k>&@)
OlvmN~kL4Po0r3YA6e!#P

diff --git a/kramv/Assets.xcassets/AppIcon.appiconset/Icon-257.png b/kramv/Assets.xcassets/AppIcon.appiconset/Icon-257.png
deleted file mode 100644
index 5b502cba..00000000
--- a/kramv/Assets.xcassets/AppIcon.appiconset/Icon-257.png
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:41e563b7dd1c783bb57e4edd6987c04b16d1a7f0dd1bd6f91dbc0acbb51dedf2
-size 28931
diff --git a/kramv/Assets.xcassets/AppIcon.appiconset/Icon-32 1.png b/kramv/Assets.xcassets/AppIcon.appiconset/Icon-32 1.png
new file mode 100644
index 0000000000000000000000000000000000000000..67c03303cfbd120e25440e5874b79f97ecb7b6b3
GIT binary patch
literal 1498
zcmV<01tt24P)<h;3K|Lk000e1NJLTq001BW001Be1^@s6b9#F800009a7bBm000XU
z000XU0RWnu7ytkO0drDELIAGL9O(c600d`2O+f$vv5yP<VFdsH1#d}2K~#7F?N{AT
zTtyT=bMNj_VAo|!{RlDDwnQY`Y5{GCNDFVq#6&SMO%oD*=wIN2|AqfTc`!a$qX|mV
zm{h4rMQE`=par@TBbFG$$8Pt7-Mx2aJo9z$?A}Fv^QC8*+`F@L&iTzbzd3UNcq0F^
zkR#@u$q(n=rPP~n-L`YMDlH~C=RgR7|0=ltcg|UXIL=p}T^pVfQ~wHJa`M(G?zZ7#
zauJo$p(7V@Wf6!IsxT&gmv|*G1yC-tKo}c*g%a<*&##ScHSDy>zr)?c*Z8~$sHogB
zVEWysHR+U5Pdg{t3B~UeRXhnoI1TmHcYJ$=FlP!U+#>;8p8Vk>H1MiY;L@;s$H7<~
z9LH5gi4z(QwyWmFTt~a7^)(nUPcnMt%FPMeXUuvhoU#c96ilivy}1p^PA+%rI1W%s
zp<0c>&0Eg`asyhV6uOVsVCei`u>GhV9&8+!=^zrYN~F*H_Yv5a7(Lz$z#(odrGiTI
zA3?`NIuB5y9?`}^QYx*}FBT!&pO@czPgfCm8P+$Gay<uSFXkrBj*pLvQ2LPwpv99I
z?0}hyaE-eZP8s(A^C{3^s+oND_Z^_q3fR1UskZ>75{Ex_lA5H#8hL7VcGiicU}$Ja
zEYTmt8jY@%Z{=J_A_4H);2xalt~IdVNVdNKg~u)wOYJIZO&baTEG{m>nLeM`JCKZL
zya9k5%m;>f?P#a)#_+y;ia^+F%n7MfJd8pSd|#w8rjrpm2>^_Zjd6!KoXI#*G~gmB
z98}`L&IK26iPTT0eHa-&z?{+sju;R^_V<1GX}$x&b)zknTHX|<Koj#q7^Os!3v{S>
z5DB{ip21vS8rg+-j0L~jBGx|4)!_Ee8QggnT4Cx)>rXbD<q-f99E9gu7mvW8kyO%!
zbJ;^VCx(m)K5Y>9*Xyvd+KqF^bUk2UWbl1$Z4Fuh5S4(7{6>^4F^+6d28>yI^7#a$
zQZAkrD$^->A$5IOReO7TNlO4~p(>-c<5td^#gI=(G70edU|yz3&FK-8XoyAtNodNY
ztm{U}-kAYlJ2TFuF^h~{T>xjZC0(w?JwZX08V<Q!F3JHUVGw%71qMfna3l*7rekBX
z0_8HaJPD^>pwRzvO|r|_1x3PW1Oz;ZQih4^(YWho5cifU@XJyX<`z04&JCdcz8Yk@
zDl!TnFEs|WTCF7qP>&N8GGC(&0W{0}LK$*f8HmM>K|W7lX(isU-qsl%VbIwLI;yO7
z3P2`^DZXM6VtBXP65jnBZhTjU-?lTdvYB*k=3Ll(=td$XBp&B*;`n}0hV2wIR6@m6
z=?4FZR%qOPvFO46K?gXlxRiW#A&Jt9<KD81<YVy6(;<jjjl$i<3OxEVBX3fXik5Xi
zkdQK3Gz|!VxppBE2{}O1gqhn@-JA<|V@OI8fc<8s1UtgH+?U`0-9D-gaIapzFODG7
zEtVLf2~n4l8dMDGs}_t?-H!ReD3vJucpKo?trASnRImZbSihq^Rxb_=4ET{0ps>Tk
zy&LgF#|LIZ(=%arq*ANy{zSapx>WUMxIQiTImk1Zzmvo?sTNIjs%oiMeLQo0`Z{aU
z4y5z#cc#8~+{6XrHSBmy@nJ#WRI3HDyb{w#$l~SdZ(&y~Y*1QXUzq#&!tU;Fu~iC$
zRQB?#R~fB82#QD3GgRS0XblI<Zh}S>!V>NIl{>o!%a_r4*>>3KzYLO2r@PJ%esZ;~
zE&UcA$&BkHo|C5(8&+Zf5Yw)i6trlEfEDXrX`@;#e7m~*=@$hQ9m0PEU_|gEuAhmc
z)}~S^WP8zmC)$a4)_Zz-{GFYhI=+X5dLowm4QJY%2h|qKO#lD@07*qoM6N<$f`<OC
A4*&oF

literal 0
HcmV?d00001

diff --git a/kramv/Assets.xcassets/AppIcon.appiconset/Icon-32.png b/kramv/Assets.xcassets/AppIcon.appiconset/Icon-32.png
index 88ae8432d2428438ff55083e273223839cb5846e..67c03303cfbd120e25440e5874b79f97ecb7b6b3 100644
GIT binary patch
literal 1498
zcmV<01tt24P)<h;3K|Lk000e1NJLTq001BW001Be1^@s6b9#F800009a7bBm000XU
z000XU0RWnu7ytkO0drDELIAGL9O(c600d`2O+f$vv5yP<VFdsH1#d}2K~#7F?N{AT
zTtyT=bMNj_VAo|!{RlDDwnQY`Y5{GCNDFVq#6&SMO%oD*=wIN2|AqfTc`!a$qX|mV
zm{h4rMQE`=par@TBbFG$$8Pt7-Mx2aJo9z$?A}Fv^QC8*+`F@L&iTzbzd3UNcq0F^
zkR#@u$q(n=rPP~n-L`YMDlH~C=RgR7|0=ltcg|UXIL=p}T^pVfQ~wHJa`M(G?zZ7#
zauJo$p(7V@Wf6!IsxT&gmv|*G1yC-tKo}c*g%a<*&##ScHSDy>zr)?c*Z8~$sHogB
zVEWysHR+U5Pdg{t3B~UeRXhnoI1TmHcYJ$=FlP!U+#>;8p8Vk>H1MiY;L@;s$H7<~
z9LH5gi4z(QwyWmFTt~a7^)(nUPcnMt%FPMeXUuvhoU#c96ilivy}1p^PA+%rI1W%s
zp<0c>&0Eg`asyhV6uOVsVCei`u>GhV9&8+!=^zrYN~F*H_Yv5a7(Lz$z#(odrGiTI
zA3?`NIuB5y9?`}^QYx*}FBT!&pO@czPgfCm8P+$Gay<uSFXkrBj*pLvQ2LPwpv99I
z?0}hyaE-eZP8s(A^C{3^s+oND_Z^_q3fR1UskZ>75{Ex_lA5H#8hL7VcGiicU}$Ja
zEYTmt8jY@%Z{=J_A_4H);2xalt~IdVNVdNKg~u)wOYJIZO&baTEG{m>nLeM`JCKZL
zya9k5%m;>f?P#a)#_+y;ia^+F%n7MfJd8pSd|#w8rjrpm2>^_Zjd6!KoXI#*G~gmB
z98}`L&IK26iPTT0eHa-&z?{+sju;R^_V<1GX}$x&b)zknTHX|<Koj#q7^Os!3v{S>
z5DB{ip21vS8rg+-j0L~jBGx|4)!_Ee8QggnT4Cx)>rXbD<q-f99E9gu7mvW8kyO%!
zbJ;^VCx(m)K5Y>9*Xyvd+KqF^bUk2UWbl1$Z4Fuh5S4(7{6>^4F^+6d28>yI^7#a$
zQZAkrD$^->A$5IOReO7TNlO4~p(>-c<5td^#gI=(G70edU|yz3&FK-8XoyAtNodNY
ztm{U}-kAYlJ2TFuF^h~{T>xjZC0(w?JwZX08V<Q!F3JHUVGw%71qMfna3l*7rekBX
z0_8HaJPD^>pwRzvO|r|_1x3PW1Oz;ZQih4^(YWho5cifU@XJyX<`z04&JCdcz8Yk@
zDl!TnFEs|WTCF7qP>&N8GGC(&0W{0}LK$*f8HmM>K|W7lX(isU-qsl%VbIwLI;yO7
z3P2`^DZXM6VtBXP65jnBZhTjU-?lTdvYB*k=3Ll(=td$XBp&B*;`n}0hV2wIR6@m6
z=?4FZR%qOPvFO46K?gXlxRiW#A&Jt9<KD81<YVy6(;<jjjl$i<3OxEVBX3fXik5Xi
zkdQK3Gz|!VxppBE2{}O1gqhn@-JA<|V@OI8fc<8s1UtgH+?U`0-9D-gaIapzFODG7
zEtVLf2~n4l8dMDGs}_t?-H!ReD3vJucpKo?trASnRImZbSihq^Rxb_=4ET{0ps>Tk
zy&LgF#|LIZ(=%arq*ANy{zSapx>WUMxIQiTImk1Zzmvo?sTNIjs%oiMeLQo0`Z{aU
z4y5z#cc#8~+{6XrHSBmy@nJ#WRI3HDyb{w#$l~SdZ(&y~Y*1QXUzq#&!tU;Fu~iC$
zRQB?#R~fB82#QD3GgRS0XblI<Zh}S>!V>NIl{>o!%a_r4*>>3KzYLO2r@PJ%esZ;~
zE&UcA$&BkHo|C5(8&+Zf5Yw)i6trlEfEDXrX`@;#e7m~*=@$hQ9m0PEU_|gEuAhmc
z)}~S^WP8zmC)$a4)_Zz-{GFYhI=+X5dLowm4QJY%2h|qKO#lD@07*qoM6N<$f`<OC
A4*&oF

literal 129
zcmWN?K@!3s3;@78uiyg~Nhl=!4GpPfMx`UP2Vbvy*-JmYwzu8KIQrt;qmJ8i>(l@J
zTqkKS*AEqUdDCjKea|q*1@1(QSxP`;kg!7GG_MRHAaA7N6<8=H_7r1aB*_tzPnZ*_
M2Gi?Ttp`B!1B`_x2mk;8

diff --git a/kramv/Assets.xcassets/AppIcon.appiconset/Icon-33.png b/kramv/Assets.xcassets/AppIcon.appiconset/Icon-33.png
deleted file mode 100644
index 88ae8432..00000000
--- a/kramv/Assets.xcassets/AppIcon.appiconset/Icon-33.png
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:208893acd43392c4b2d975b924439f8ec1e29c867918664938ca637173a78ee4
-size 1223
diff --git a/kramv/Assets.xcassets/AppIcon.appiconset/Icon-512 1.png b/kramv/Assets.xcassets/AppIcon.appiconset/Icon-512 1.png
new file mode 100644
index 0000000000000000000000000000000000000000..0906482d448283c6d22a52e5fe43050251aef50c
GIT binary patch
literal 117920
zcmeEu=R2Iw`}S%<5G@HolxXqMd+!pR1c??z^eC(M-4!(=2%@({jfmds5~7#rEvzoe
zTCBC!wqL$~#&aCci@E2%=he(y*IaYWd7g78J~Pp!yUlhR007YG>*<&Q0Hpt3NdZ*1
z{#BQu^Edx0YF|C;004l7`G183kXOL^uaP9cOjip~JH>JM?}OY;(^wM#Xh@~OIZ^;f
zCS>(>G%Z3&PViy%<-<W-R7Krr?-y~a^$dJe3=|CHPonuWv|edObGNkfQ>-%<k%+eN
zg<Nu}+Lw8lni|<kt;{h94mHYtNO_>UysipQJZ~0larQ46ewhAv^s`Q$f9idj621@f
zw;D7b(B1xN#{M>JZIS@a4U?Ky%!T!!d%|A?0T)7|EYY_9SSiyYQPUzmw%1W4w^;xG
z<NwV$*!&(-N^(m<<7~rHVXpvreYi2|Hi*7pd-AEz5Lt_%-3#d1E;L0??PsGdcj=a1
z#p47pe<CwjFoj;0W4z;6HQ;pbo!|i2uM>M2ZB*5fbZtz<Wk*=nF@v|Mu^2oX#*GZ<
zTnrGKPRaJp*Bjr_N{$49mJ_5mQq-Z+q1)jb5$AA~ocqHr;J5&~NHY-s`9SN^_WS;>
zf1P4rodJ+h>FEsXZ^wA{nl9l~D%-*|>eWzHrsqA(u@aU25&B>JVEE)&<g0|x>|E-N
zq(+WP#>3}R@2hva8;#4W8Va~DX<U9KoWn5D<EL!rlO5d_@lFh&2N#>BSArVNPo&=e
zk#m(3f)I)<_D3Q;r()E*E90Z4y`}M=pw<<8!4<8R-uIk#LnkKMldrolAU56=w}OOm
zUzL;s%dGLGSCw#a>8X22c{?7c>aHV)9+Y>8<nsWUN!_@(O6h^=6G*Hdqmz0%Ddok5
zyzRRW6o$&`Pq6b`ZKdatQ6y5dRDR5wj{q;7{Iu5mL~CA2eNWMdQ>Ve<gdH#jdf4CE
z^q5Nyoyj#cZ82uT?I$9`RBgpedIPE4TXdDw`ApZLLr!#6aL!k4N*J-%)GK!`?L&#C
zSQ<FAnRb5AbhG`7#Uaq)`X@^_&K?V|nC!&{%^hjwD?oAg0jWIqNCwv$yjlWQmGh@s
z^VfXV9>)pg!&(hfqz?(t@C!egb6?t{w={$uXiURb`*wEVD*7;y1I}ru*0O89=Eu@$
zv7AW>g+#s@W|=-Z`WS#ObCjfX`hwCYKFOoA-UL8g#Q5Ff(Cx&&_p{=s14!h-bXTcv
zYPd?q+27LC_=EP-jSh>a0&*kh%Q;ithe1&LeSO4*G|t~T)D60}7$fBpZVP@}Z5Wgg
zJc`&<dV<wr`#cTblM%49m)hh>b19elmT3u2a!=X`c~vPw>C}hkx#3Pto)BN_+Y5k=
z_}h4Rt+lHbOfMD=ef=+ccql~>nsMFlGUCUZtgN<h?`Ca36V2oWdiC;X?N!BzCnmX|
zxOh#?&;@IuHtP2V$FQS)Vc+bq*RzwNe9?AEC25`0*K+sntb;XFYVm_6pk47b1HgyD
z-TgGP$9#irQplb3^)}8(%12;h;*uQs&UaIZGzL}xc5)ghR*eY0oRFT5#Zq#HMlCKa
z;rI6|2BiNOF0ZWkO{-yl92`h-IUsw;#j0Oakk8XSQV_0*6daES#I~Z#v%Sgd1twiq
zfNjtVtS#c@%a<xkZrFZssu3)F@jo)ed6L{nh*XVe8>&{8vlY}dG?s#|GOx6NEniLk
zO!GiBueu)txBjREp|<GMvq@Wjk=)v(b3k%51&H<_MM*v&cpe7^yn2h*^mQY7G)eSY
zi8!jN>_%ZfsnI68k%<V=^0s$x#v-#w^)SRUF2X=*J7E!sU$o5K?DLxN$mut%$@uDY
zvSqFNQRLhof*Cc1DRb6br?D5ldcdWo##!?a!Bp56p>A_nJ3O2y@X&+xA>!MJpMk)y
znfECX>Pj2<S?5P;KWBQr@QMzUgVj2-YZ=7sA`|a$brj-g*9EF`q>CnLK(LT<kBIRv
znAOfEkR-152WJJoPqR6w=%OxIwXrve#lA#40SJUv62L9qCqC1mc?IPUe(zzzyZT0p
zF*2r|Z>ANj7oD-kxIfrJX=Zi5?AEDB{LlRrX$Rs_YGK0@ea+~vrIcR-J@lDLHTB5&
zw*rc>f1X>B5w2i_wSzFBa%KjBvjm3dXrO}foqBSKkBjLV;~$F!oI>BBv{hL4F{Oeg
z1z&VI!**Z%(co>5#Q5tuwWPQ8B6Uq`>Nu5p<<xF`owrX1-BrNamMe)V)vz_kQNIJF
z_*34}?B9=Ovz~F@CA0X?$n?hJCpmLS#dEp!AHk>mut=@RlLL(EB&>37l^2Gh27oXm
z(s7l&2t(qC+~b1WrpUYt)eFpzNDzQ0&Cgj}Ygk30ehDl5t(uusz>VjX#^%*7RBdG)
z-*k$mH4g6WRbElT{ybF2^teH9cHB}p6r~))0h%O>%D)p$SgA<0Dj!FsTm!MKKxOer
zon^s(`N)L%sdz^r#|Fo^TUHjIN@SJoDbxUTT(reY==4Li9E?>7F+&}bF_SCp_9tLk
zRX#rKe7*`AwB6s<eGK0@9~xk03$amIU-MW%P~|DfvMQe!RfNKHbFMr1hL959dk@li
zmOYDW3-yHK?&N7b>i{+0$#v!2_N3q-c(-(9*R$VpA8-AAG#55o?Kba|J*f-sMI2VU
z*>t+gFM4>b_DKBkfH`3;D{|XXUUDs8*rIDB3*$ptr^4cXd%(X+6zdAUwo9TIAeB$?
z7(SM{()~+U2clmOaH|x#9!2!0OPqH(oW{nx;ZOHq_#t7)<=z3|ng~|Pl+7~|Ab2`p
zSHEG$jej+geZXE1t8<D_yuKWzehy!LJ-pK|Ri7jAaf~#2-6UJ!Md(nTT0U|`GO;Cs
ziX3*4O&m=e5Ke%Cyy1)VoN9r`$4YR#_CrPm$HOsunWg(DmgAY^=LT!_*{u!1@-=qI
z+muN6%{kT?70f}LHSM>|I}9R#^`^|sfu67f?*kR@UM9Lw8#|v5F7IIDz`R?*x$dj1
zQ)Jj*dp$P!&NhPl7V+^6iVJ3UW1~kAcAb|j(1w<<({Z05lsxpN+*BnW=b;hcZ=xU>
z95~wK0sxFV3(Y0Iq^;MJpaRg4&xV<X{!M`Gf)KItLDw;Jzk7iAugrDI=liK)Z8uvp
z?w33VLuenM-N+y)YsjB}B3SNu_W1n>W-)-}PYDI}HqF}r*|{GvyUo2{K~RTBu-g-J
z(-ZrSz7Zbd;4Hw1$Wy)j5p=8Z+!#DU9$|Itp*2bP3_lU))f%UKy`k2_)Nj0ju-p_u
z|Bx|a@p8+gkx;lmO_`njT3@em=n2J4L$4!<SBS`mhExnxN@xBF6j<iSo4MkmGJev!
zH8~MF5mH*xK2(ClEiWZQ65WF3X?SQCLo;sI__X%`3qI5?J^xCd`e${xlWBvIsq^Pq
zYnyoNPs^@EWpTz+@6C@YdHlYd#<3^JUZw527Y>0+8i(R}U6T>kutLma1e5FAckjM3
zY=cRhH*WSlntp_+n?%hW|K5?STkK|uw~%g;t37$4uzT6$>&!MM<}!8P{^l%FJP+cq
zR&wzruDRzkZ@*3A-N5_Nc2wlbb%Y7%^(c_|rt)KGu!1vJbTb)G*&XBa7qNqy4DBiZ
z?6E|>{9~Fq8oh!PUqt;PY%$8<L!by~Sfq^F#dW9*p<ir{TDie+WqCD-npuVcefR7~
z#MCJ|czG>WkNLKV`Rx5iPhWJ*HQQR-8;|UX=i=nDNKhJux0YDMyO*{990TXPuePzS
zs7!qA^^fLq*K}0Khzy(n_bhgH=V?P!PlpWR%|aI&0fXIHGg)eIr@orr$1qre_UE~I
z?=0Bj@Mr$o6{VoAOO*zcf+K(W&N8RDlw5FfhA6HTZ@FXE<(1!i9}LH(A_P08@KeM`
ze4HhRNLa`s3Go+_g>Zs6HzO&!@QHEmxU2$f7;hSaVz_-(|Lv!>WXqolIKnNYKqqy>
zUCO6;+qR9M;Q^Sue>K;!!01la-%H8y$AD{}ZB8EJsKmEVkJ&uT3@nGGfqLghxdOYL
zKQ6jmuVhJ*y@Sr2PQB4Gw|<PL9(RT0fa_6=E!g*L_}TBKm80cYYbOoqn`vVY-Uqh~
z`LNR|q7J&-r%Zl`YVGXjt1oCCPbD*D%or`SmP0x1+(ZYB2H02bRK|Yr0M#O2=l<Q}
zB>7UdBi9ngmTW7_c@&K7`Srp&1W!lXbk+p)nN}y#JXGvXz#t&NkO(9f-V}PXL{J>g
z>bpZ~MG@)|#T$3=@@M}eU(o+XOd_**MHY1^Xb!PoL&)HL)dr63(Ol~{@mMb6yYtW;
z%1)f%O|F|^wMTA<XBAX-i@!NhZ(_~05ziLw4e8#y@GOz1I|@cil6^jC^7X~cjyL=L
z4PJP<!c~LYv#X{r3!NSQlDG=8L(kwOlHVykx6B{)11Iv@SdCgHcXxK@Y)D0?&5U@n
zpCB>h(Q-wXlE~gpkT^k4ct*uL*YxZ!v(npRji!o<X9^L7wlxvQUJO@cV*01{B@7H7
z36ff=`9W&Nu*t1cj%n!o=f83iUS!>4n}#@$8dY4XWdKKsoL$0z?Z(X%R}cDcT9>+_
zzc9?fpsGAXF|0+uwLH#UYjS$U;u~y5>aoMIOmF{Vn~GC8bD-p7ALh1>Y=svdV<8?S
z+EUWi2O>427JRid{hrca<+eh4_Kqb#1))5<y^Uw3GJ;`84ZaqsHO_a-Pt|arrfJk}
zj@ArsI`R0Q8Zh}bU(?$bN~$|$1>$Tr*AL@{7yrz`v=-2Yj^C*|gK_6{AT1}n7ZC|_
zXv4rzn?1bp1Ol@z0!u)vUBU2!m?Yvj@m@5bRRDUuom7M;SNQL+QXPM&!9MrSwu;E{
zoF<o2fx%a%?*$n!C3;UgsPANDo<5mtOB0GzCZ3PlB0`r0*WLFh*T|nS&sx_v1>`o<
zN4YY9TyOB2N&2XKX>Z$oQApO1*`CzTk@BRbW@T^5Z{y3#dhfp6k9AG1Dh)gK#_Be|
z4;N~JQP;~B!9%>~q~Kpw^3uTux8)0kvi9DGefg+;Nzxv36s8-vw<ELeZ@$z-#E3(Q
z2K&UwloI4LM12LH7{EfbV<11<zdql|2<(kJ&~7}7Jfr_-G!DwhK88=-@3&9pSyG7P
zJHvioPG%rS^Ht~T^n^-5C|Vs?Oqg!lz)V6E*is=9&R!o}o*><0=38IACN%uGKM>Sd
zIN4Yb5HQcX_O$GvYT^Em;N?g4wUtHbOLB&7=Fr{FPZgPU;HPvowewJ}Pm58VP+3Wz
zT_2TzNtN=V&dJ&;{ifX31;_^ONUp<vTtif3FxW)6E`FA8DRk-0D>LLSdUdOY?fApi
zSc7}=Tyw{xwf#Y4mTWnFWRW_hBoQAy1(9ciz!FO<31_NY*F+o(FzlM>&oxDR_=99q
zo5ze%JZX$s5AXZ&{6&l=Kp}KQI<^hDRn$T-`T3CKu<M|dKJr=+d{(3k!&8>dtO|As
zg%k*S6o)n0z0f}v`5NS3V#+(;+U+s;wU{AO#03BLr+X7tXbFWz=h_|POv*h(HR}(X
z?XpAyiU`1NQ9dt&?}T0-W=A2t7TC&-in0FjL4T5~NzlS@vkm;moD$MDySP|`$^uiS
z@%yK7@#I;B%aOIU)OTdt+E|^<&l<2#u6nBH1Bb~n`nQBGiX!69`Kn+FSHzBZBoa$p
zBcA<?qgmeD?Y&v3)1r#fV8|lTP%r#j7Sr(Z4!7f{A9n!V))PBHA|2UW5D7G|2%;2$
zg{VZj@3g3MKDxByUq)z(8wnp2d;VCzl6<FVupHw7sT&6{l_a3Ot2hknYq|V#5Ativ
zAVHd+-9R+DW7pGbg80Kfv?|-cysX-KK9i+dgu$8C;{EKzU8X)<nDuhKR3`g##0{?G
zT_LgQa3ph@D7LfYpopi0oo9;US%}i4G+T^Qe0sxt*g&f13N;dNHdelOWYPbu0jT%H
zw`-1U1d<CjgDHj3Me;;=1~3#Jp{~eQgNvGg%5TeZZ7rKHhhz%Yz6^E!EL)n=F`m8~
z@p9Gr3(_jOlnU4cdUV%!8tZC(ci2tpJVL-1%kyEfND{6}<p>b)y>q_>HV1I=Xc`K2
z&Hp8TioklY-}nN<3BA5EphJ4n+96!s>tZAqDWLWxf6A+J474i(qw4_PI)q#%L9dH@
zquT{fxS}4CEy}<4y`#d)N**Pg(6_62ORgA6Cw+ghm7$}~7QqGDJcnFduin%Ur_<Qi
zmAxCeK3BL=01kyzY9bH6$tOKO`Y2U1bNkBd&ZObCb3bzC-G_EWgI(u_uuy5`4>F+u
z@VD5(urF`d)Z#{BqN;zJ8k1<0tZ<+*6<?zJMO-1Tvx<uR(MrUS;QeMk&fmD<(Ky@u
zIvk2fWFhW|tD*k{FiU`~$kIzUV?{~qy<{JVkYW@1DPR9vaSNn7rWq@SLog2luPdQ0
zVu$|E%@z=I$mQVBg<|O>M~``Hw5-peOiVIO*E7Ov4Vjlrh+`Mc0-{tFIo<ZzaKo+_
z?621EHw{*87{_2-1KwsAKkHE)oqqn^6n=C(eCc`9V*nf)Z@a0;zCndmwXMpS)=6x^
z2N-k3_`cc7RkP)5hOLgP<(2mIE>!NXqI;OVb8iagR%a3fE2acE*P8iLukfEjcMol*
zATKV|4j^BGmV60L$D21d-q5paT_Tp)-9;zUY<ruEWVR@DoBG;>;`psUQ}?Ym7s+(m
zl&556qp%;SgdUu)d%q1)n;vx0%N)`Tclh|{8vaNgRYUPKai!(R9PKUcVLH3p+7l%C
z`2`>Paszv@6S!>koJ@$?{+s#}?Z3p7m)4Em-Vv?x-i@%u$I}rnNd`ljpD9(OuC9A4
zCKIx9mRM__6!KksZf(X6$X*;b9!0|Cjy)aFn&B|E>Jew7$?b?Xb3P3<@sgKifp}E=
z$bNX~=?davd(Fx2`y{nEwzux?PVA<uUoGXZ#X2fy+>@O2r$<zcq%cMZEsRjx^!6!-
zxS|FY3W-&ID6Hxf#0nrGtZ?^Cwvf0%T*=KQUW$Vb4Iu}|h4)@ZYiR9%d`FF*uIH;)
zXB7cNkbCV6h<{4SRX^ndUCvD494{cm1OkjWI=a-ov-Ak*X8a}LaeZH*uYysi=~zT|
zVfib5<28RFdu2CP>kBFq#2oo?Ax@F*@aFEh&{)*S@u2LB)vzazCw2wb=q-+Z2Zb#>
z_@(*rTk)OxgceiSEY(Em&1anmbIyenqi;GLH8-9m!?ZEOPYyCP-*~N*Lw8Sa)vP3{
z1&eJZ^G<%TyGey=OX*LZ^#%60H~9cNSzIFOu-0hps&Q+<JLjNshwdTiVfk%aTF!9U
z5U*F1>2Zmzieo$aOh6RK!*ylbs`)rC%of*?g><M~BjU0Hck`woCGs3ld@_{qj-J>=
zRCWI^*rdPlwx@>D9aYO^Ae`+r+rRSxXCKWN6*C~=JZ_ZB^}BhcN%CnoLcJG$o{GCn
zO&!(+|1gaFBPIFVrt$WTirbq!=UdL+6Vd_IL^DGh(ekX(hP;zn*>|D?4Jf@5yJjC(
z+QrLBHYQlauStF~Ve%MgorL~mATo1i-^L)yrST7^_<J#Q#J>Dt(Xly#pKI^R*>%&%
ze(7;S?5+F7U5kBGZ%*%mpAk}&?njB0ei5b-ssMgFs`*hE(Di`D^C{1q!oqai^F+DO
zx#{m$QbSLoguKx@dFRUrfyu~B`lM+Nt=&3`Ti9v}67gE|2fGxt;k>?|6=f!f!udqI
z^c~M?UGT}jk<F*V!VU_Z6wG<!4L+27X#elsePlYoy_swK)&!ll?j$u+)u3NbsR>f>
z<d|OIUtkd+?)=)FrZvcECVeUF{WF8dH(gOePbe3W$j5kxyvRHxwVoVcWv5)Ss`)6H
zp!Pg}&S*SW&TzkKZ8rR1QW|62nW9BYL1t9!KHdV`y)dlX7?#UaK=gMXKe-35wJ=%&
zC!NOF#8er#nk@0J@;v}~=uO7vX#ozUydSc~cF6O*vLgLkM561Z`d`oY=d|yor&Skr
zbZ5i+@S1NYqzMs|!DBg0Rf&&~Il#||U8@zp+uo06ybmk9?gb@FO2GtHj|&dDI(v~0
z+XtBw5PJiTI*zxWz1RFR51_@kmBVbpE)2U4quYvgBos#xg2$G~pOgPf=1ffo<^Sp{
zXfWTR>7#u;L|-xePKaTiQsA){A_B9QJuG3`bKFDR;}uMW!dagBTwXnTmyzC=@!un=
zu7aOFHo}9lrvHhr<s|BjdpX?e-_vtc$>d06GCU~%_$oN>M{a0&bAyOoi0FEfj8=Yn
z#gGBkx7GUs^p*vZ@_Xb1O2fYLV&%3*h4OzqG5<}L<p@0qottnM*<rzRua^31p>zIb
z_>0SGylu9=L*jr=e&UfM^6bi-SM~Yn63(!(DE8NFK?C-(`|i2Wn?Sd`zhj1+ZL@-J
z?u5mebil$m?jcS0I%1#MH`mDsw$}V)UlH$}FMq%@lPpKXmrM|6{?Z8oA#>s?xKl3G
zC+F`k`=Ir>+u;*gQT3wSiXL+Sb4LYQ_uJRIC)uA8Rf(jtnpAz^#HMLA<(q04+7Jfq
z^dC<Rs!KIAdBpF*zi^v8B%PIopJ&0;fHz62M*ia$*GIWl;oiYhu|qU-`IQxqhM5aL
zI~QMk+Ss0Rjglfjot@MFF6Gtmf~GOe6tmA6&+M+LRyfUQHDbKUq~mad=B|rr_wQ%4
z-V@sm!1wSae2#eL?lk}UFABSCU#z~CF+|77I|>*JX=(8RTPno{5<mS?UJD6&Lr}{%
zDXRX2WGk9RT*#;4;&VR4snjTr1Md?^8-0bM(h58+h7>Y?;q&8UW`DKp-ZO&<6g*>j
zO>paOpT*P-?cYojxycJck##{b=mapzlCY{iEUiGf{p#MyoxdVYXOro=v|asdzv)wM
zxxc%uOkKs~X}laC9U(z9Uk_K$AwtfUdap-k-0{qBXVV3Ivg%jdf`6_#fLZ1ivG4DF
zH6@PlNBw!6j;gotVWqA78k}zn%b5El)+Z&BeLH<0V^lvP5&IIXSru8z*b2YMKNCiW
zqQyMRAV-p(`DWwTa3k-0m%6J=b-DuYZ*Tk`(Rnm|ue@qsc??9!Q=R{6!|H3HrXGFH
z2>m@&n)Un5bui-3TWThd)+f$n(%ZT^%H?}+^7KK^_n|)%gmZ`=d8<7WSbPYG6EDJF
zQ{vzbguYJM^Q87YOVkf$o@?V`D|tx;wlAc>MeN)=wW!TQ2zosvwXLm97k1%Yj2U!y
z!83<`6?k@2+2iB02=Ew-^>51c(HEwM-UZxALaefDdNCe(csJ_!+q*N0R`pLI@vYtW
zB|9PyT~-wW^Lc9LsE_RXuf)FuIg}%1=f2&%`}SsEw3_6_UXG;E9{Kj^V<6dX$b2^N
z_M8|@7IflLj3~zm+%d~fR6b|yFip&h{?wBIU}z&bl5*X%ZN>ooQjS-Saj>V{+zvE}
zNm9fNhO^T3Js-C;)U*n6f)8YHy`Z(qO=ZQJ1t8}ewJ6k~_$vd+PC+~jnt)vX12Z*|
zL~<EofhA69z(VFCT-Lg>O_ykrPxS)y_G$Lj2<|A);2|z0sX<4_#l6*%xn1kbb6Bc2
zq5DUdkEci-?5mL=9}3=Yo9oOK$M}8qKWv1(>)?VU$?>s+b!}v2Y}$W4Ku9*QvLUt&
zrD}8i^}){aUzWE!1|2}&Z_N0C!1hzvjl%xRPRweZg9LN&tzUK8`z0OCWl!>nN^rsf
zWXzsMm_~|dX}P3+z79h_xxpP2;bz9O#VG~*bh$uxd17_n-_uZ9eQ7w=QRV1MI?rH4
z<XB|stGw=?<C@^gBcdaTENY6JEc;wix3>k;y1BbvVNnY@-`KW~U~daEPIO9-RF_ze
z<inhMvnu+QF2Z~af$RqCvOG_01u$Wbx61mM#xFi1ZBrxc+}8K?OT~(pBKT)KZYw3X
zT6kA927SM*_R0|Z2=lBs5vI}9FWsTPzIErbU+pn(I_R;VzIuj>EpEi+zsMiC=E<cm
z4E@6~AJ_P>>)Vxf>Gd9NlGmM}-k&r6qS&%HdN!YVOkJGuOxA|nD=Y@u*m#wg8hLCA
zVruS)h!tJovDfgY<!CS)RwI$oh}Q&sk<g4@a-Ro00{5LvPjFFDm_oRhb)hJSzzO1S
z*aCC=$-V4B5w$wOn#{nkXuTnx!bv?=DQiQ&+Ihk8m)G#ih_G_1nAcQ|>n#=A`D|tr
zZpY~Vfb#SKVSm8Zz<HdCKI!(|AYTmDkj)2b4xhsdL9J8tMa&(Ne0ax4lH7V1d_|mQ
zzi>&MA9z}C$VC6uM;i-uT~>iykEi%0Yn8?n%yyc5O2J}K2WF33LVceYJ~?w)(aoe(
zw@yir{k|^moiKm#R9`sm{j90KSZ0G?+4<m=;*8V;Bt!#Qob{RzVt5zAqSaRzvO!J!
zQE$xN%$_@oA+f(!&9M$f{`}2I-MxHDxliAu9&zK)9qNa}A*Z_#gbfD_AA|9MbFA}j
zaJ-{yS!uZPXMxvyAmjG(K~H4s;y-{i^r~gSv>~~?x!--mgi%1?ISRarmN>c>y?xBY
z4k{ZLgKdV-WdT!kdp0&gb<HnOJ;`{-!m1;=i$Y76BgudgeC>hlZAa0;@HXrkH3H`>
zFI9i$YUw`r+;hy;!=d{sBv>r2TUeOAMAdFAZ9Iu@DmkEMs)jGOpCa1i-M7?Pe0wEK
zhR|I2m+!&%bDJ#>ag)*Ta*btqwQw<C(EEEN990JvTxufwIN-p>0deZ8{LD27HDWeF
zOgY8lr-`#@T+hP70`>6izwTjb&l#dM#^g;C*3Tra0Xpw50t;Lwa09ZWxRcY<P6TE{
zJksAq^esU#qu}@5%IBIx*WFKF3v8IY6rJrH8Wb8-RAW!e>c!^7^``!8+Q4$G+2lmP
z*H0F(awBbdpO<CV?`zK6Jo!)C7;IVIfL}8uUU%KVGHo!*{O`OZq$N-DJo0DhVuU@)
zcv+=gCpY{52xb(1a%^)tbzeDkR<_)hLOwje&bSx!28dosvj!vxt1({>wwtSJ0{)H(
z-?2AwU#(K-b}mz+{`vG1kAU4;aF^TKF4ECRzg>^CpXw)Rt*)?L*YEhWQ&SixWg^x_
zxcq4KGWgW9X6C42DWKYvo45a)Xs_5ft}V0+is8~07oWx@L|@GSi9$D35W@45JKP+?
zf2>Ib7yz&D>5X?4xyQ#sGNMZJIU-k%>0@c_3D?@6sB`dJXXt+cCeAd_#>^gx$Y_TT
zdV4DCAb0H5-+ez`WY%FqXT2<Wz%Zi%uYy)CmVDFAZ`#u#HRbzrZh_fKY;!S?;*}kH
zX%_@Lzb{@6@62mog6ZxnqUIMC!AYC16Ip#8l@^~we>G7O;L25-O?g^P{)T?Y<Y8$l
zEqLC2n&56dZZOZ}9B6m5)+}-NZtdf{oB0Ab(0S3*88g5ZcckHUQKrfUJbd+c<;rxP
zr&4iWTHu1polp}Mt1=9%Xs5wDBA3Xdo8>`3csUTT>i7&(4XT{L=170s^*J?{(8=U#
z${;kK&L^0JTOs~^4H(B#pkH5fIoO8ZMc35+xq#((&8OFUCa0(K86suk?r3rE=?~g=
zzkHeUsi+KOWZ>W_P!*H(w_9g!O%=$dz7&F5KW?T=j*eX24${rgYdg+S3!b~Md7p<e
zkyt?7+Hb#?#~_Q&Jk#9@&n(gJNf>=*%buarve3KObR{*@jHtm5Ovb@HA{`f~ev-sS
zyQ?m*uTQ;<K6kGXx;ydWisapWK!xbA$Q3Si$;)Gzh@&?Itm7Q46}tnG5KrtM7;XuA
zi6`QIf7R|178d5z>cgX9TobldcS8S>yH)@N1#%myc8@{;C=kbM`Iyd#xU8jBX;OGw
zt8aiFvfaCo$*ca--@`fhsCVe=Su<tJThb5-g>M3J=^x=hM>YYnjZb&eJzf8sLpR%c
zaNI0_d)G~_s%%Pt*)W@s#b2S1T+(X{UU~y_KaQ*BthuaibZX>hPyb%rKh!XCRN_jt
zba%q@d2@7TF3OwC9<7SXoGxw+@3h3nkJT7Y|7v^xlAk0dd~JO6V5S!PB~w;yZr@9{
zh7dCPp@FS8C<y+zriP1tA{c~sN5k2_0=YkP+YjuGS9*9JM=mAg$GT8qM7NK>B`e?L
zuW{Gh*|S{Qs+8(}QcNU7?W>C(;vDQ$ujn_h%Mi@S|58}+ey*FZfu{mAE~C7@|EoSh
zAc(fx8U!fN(fUF64l{5_uL*vAMZ5wM6VN^TK?p*J*x`BlB=;6)nN4*EJb}S9WN?nw
zq$yQy!p5}5$S*=dGhG-y!$v(RQR<+0U9<;q!c^DVRt$C=)7rpa&Yy;~rBg#h-LHhJ
z+50a=vEM#sB4*}2z6WW-jtSo7La36OyBzo{%0&*77tmI}d9)49Z{5FKR2r`a$QPrh
zl%je+CfpA?9MWqlnyq^V9fB{&w`tOh&G|K@CESl7Op^snLKNK%<%nHOsa(q8L<lka
zABYV;$Sx+N5iut#O$tyKd1{i3)1@SCAf-7bz28^}#c*#<JxBd2Y3!`GZ6o}6EF|*8
z4f-e74OU;NwJuU3%|3Zhc&o|iO@u7MB+ejjdED8%l5X&q=c7(bQn{}CbYtDNAoH#t
z`#BZC10U%GT9j946G;P(nz3myLS5dkb}nubF(iyyFFOZZx+YY&Mm`fd?=x`F^Dt<P
ztgcf}|6XEX<90W0x^qHNx2gs}_Wx5N#E@=M8Q6KB3gl7N5^)IMgYjwH78ExHK_kH1
znJq0X`no$YO=JHgF^4?lv3&YB&=nksPbWV>4o}97=ilctBG}s6maYMzARHWqQOCuo
z?IS9(MCV$}r?e2(2v5ofZ#U4Hd;4FuefDGG1=iQju9Chycs4D6?csflSn_aK+@)9j
z66jg}Jj4}WINsb5)+&8md|E3`@f5m|+sODvA?HHbJeUiay@&Gj4;EhO{*q08Eq66e
zA`!3VI2!Clbek0OvO)RGi$463P#%-4E!d@|{&Z)GseUe*GK*GDycj;@ByIJ^D~@uM
zHQR$Ak3{$p+71@{Cy94LHo&kL;)bBojaqzkg6_JRcotcxWt1UUUVh!IFURN6rx==4
zNC6LiWg0BUA>7f>z_^Wt?5=??D3=yfaK7qFsINAWr~3xQD8oEqjL|n~3c;q+mC$ZB
zPyZ(5ghN92Vo_IwuskZ)I^+expen`MWqt{z+MK(vpbft$$>v>Yc$dMpc+micHU&uL
ze0Q8pJ-#|#@u-8^+u2V_fEw&*-j7ynj{bRuc;ZHt61t91P~i4gEn*}4ZQJV8^dlh<
zEMN06*kR$&!0{UmD3$dVtGQRmAGj}PmqT#y>$F^Fx3S(a<R_ZtmEKRy>w87Ss}=Bp
zx-U5Zn(0zRd|2C!rK=?1n;b9~>Je<>z30imK1PZ8%kxx-N^ArXcc5-6pHHz_fw9>*
zU3xdrkk)>^y3{MD_1+=n0)5p+{Ee^c!GfTx{Hmum0*H^MNCuG?Ax3*Q)Vd&qxjoaA
z(yyVvqbipBCP$0L;^h9#On1W5$U9^maU6(wN@~{?*cCx4Q3Uz;FtNh)=-P-G@v^X@
z%3`9+<@mXf`W1V^e}&|e{!ARdOFY!2u$e;IVyo<w39ESRg*Mp-u816l1*fOZ&<Ht_
zWX_G>KN)si!Z9EeU*wOYnTfpb1;Hn_s86{-f3;7CPImHHw}U$y2E+k7!(dkDBk^m-
zanckoj?Tg?5MkWE&Y7UiU818qNGZC9Lx@fHp8cr<{T+88G}Q4t`+dGMJU|`2{2<DX
zL@$GsVqM0NM1GwKkfVN!y#f*%_fz@q1Qg2-3tNhSv}$MXbgR7}F)i8ce!+PW^l&Qu
zTWs&=>A<f_96v<X{KCO0lqT_myWvjZhkH|7PHVGJV6As{t1{9Er=<N&8*8HjE2|Rt
zF6@nx>+#7!4F$itb>Qh56t6c3)zTUd`APP0%JpBAr>FzXQ;KpG-3UL7p`B3Q`(jWn
z1KSHV`o(g6>)g;g<i!IihxJ9*!i#*#nnI8djraV4C;|0R_h*Zh-J#{W$vErd>Eu-#
z?*|Oo#xI(r9fO@`vlZo!e%O1in~?-t{RnbeP9%3+*}o{CbEF<N6;Ld`+g%p3pDaN<
z1nff$gqTI(I0Qi%d~ggy|058P=#9>f4#i&IEz&7Es`0)PTKC}i^)p}d$ZzufV1)_s
zUx);o`U^`a8VLyr3ff6%88miA$WF|XGtCN%CJstJv?|S-_0)Q9h$@?_;9HAKk=F_h
z2b~#V+270Tpy%u0*H0e=+1J=T_h_rKVY*r^Se}!y)ibbIvcB1y<+lQyKJ#$g{%F77
z^?WYD-1_V(y24&LWlF|qNpx6N#iP~o0KpYuhG>3zSk>|2ll!*JpI;$6@Y`8W!e?13
zzCo)`xz?r@-LAF*g_NO(GpEcvsf%p#E(9UIM6QG=L*=gQIk9BF)$oqm_{&Fco{)1`
zpJE(#wh8+XT8aYS!NrBS)+dET75&cgZsLst=n8@zD7=CWn7n&vniHtfKP)h)E$9<}
z4Umta;FgAj;+@vH5_GtS)Sv9D6UHHXb6iJ@j1UFGkM<mIG@Z)=n3kT{Orh!MG`367
zRVu^DygRsUb0xdVjko;o<Fes?###ex#xq{8rp%kA^aCSf+CN;q*j#8qL_7(n%fr^F
zynPs@wcYhb%Xl!7t?mOeBQrTuInP7Z{+G+eL<6fO2RlWb<6}Q{@W);^tTf6{3_Mt<
z+ZZSZHP}9(H8l(E56v33xl<twGZ84h&KhA5Vt(~za_sqxlIn}Su(_+nP~qepJqf0-
ze(4%v84g)9QV$a3cDpO4yXT(;<ua=?PK&AuL}o3i-Tz=m<~jPEY@Z&MGRXkkd~tEN
zhBU=JhxmpdY^fRtWy<m-qz)$#_MNy0b)0UM8fT=BW1rD(-KC1>F`@7Ny5-6MP?3h@
zgw<2Z<PsPU=~<<ff-h0T7X}Xk^k@T2gv8{inSI?3Wk*M=VT#5LyraRSdk=-)EDvdC
zLq1r*tEnqT;<;5sf&y%%n!e=y4g*fa+4u5l;|w90!1RZk((4zkP9qCCi*0K>7+?va
zfn0Fh_ehcBrggI)hTq~ef_TG2|3IczJLd*l8T8K+BL~|vyr;#h0$n<%K_a<a_x?yt
zgG@By9wZ9X>no<_{sjueZzQQPWs#cMFfkkVKQ<8UfQ?YSqX6!r(+F~np~qprQ5^mz
z)Z!_}wY!{kTztD1x720KN6T|tgFIIY_fyx|kJ*Q{qbDcZFPqk^DScuw#eVPBw6OWa
z0_@WqBH!#o<?w0s-h+N<xMzqWskok&#(U}*w=)Wj3wDrYA06J0RYd~)3P}x_Rz?3|
zHGyP|4yqDy$ihQnw(=Ztiz`cKAkT_-zCxx*hr&>3Cv@!5s#&dKQrlAFxvWFgTvu>D
z;I|jq*W+f}C%#gSmqFNo9YdSUCn!<b{uP=P_u1(q@IOh^DDb=IS@Q9$Sk**H3cS#;
za0P9~eW9DW@vu44R5o=~wt47x+iWJ-Ya_oAYt$Mp67;}1PxD2g%SQ`K%bizfLB#pf
z9LpD})x%5MKX5~fRxd;6*aBjj{cp~&Z~v3+HPn)wZht*9wRruAtK^P-D2km&izk>x
zL+`PzQnn`{BMb_|IAk#e{dn(^OVn{yDx(Z-?TR>r5)O;6ADe_cd7ZePz!~mNTG^Kh
zI&tdK2W%>?AX~=i<r^uuOYf8^$>EXG3Ix!28}7O)<r*&`Ns_ExByGsdZ@3a%B7`g)
z+%dBdna*^m!iP!b4wQXKmpAM?>@YDcGny>vmGWS6G={h7l)HcYEc@V;OjFw0&)iHN
znglT^=Rb(O61quq1{cdRl>1fp`u}Ij$yEmxfAPIn)E&E4m<u@$46W}B{xNrzN<rE1
zDnS@jB}4Aet38|dnI973@_^AOar^+`%ZIJoyqDOxEq_xmlPfubphz3Pc3-teW%jM%
zEY*yI?F?*hN;iL)*!DhG>Kr9!N*L$x|6_TyB=>ZOR8jGyirz>-=x&vwK}lf+rw^CU
z7JU>ysnsn8@U>Z+&fP3ZfsDNZ;)BX7b>fR`@fjTYR!2|!mXgxmUm$o1N;HM}Vc~75
zr=7>O&u0l=McyP`yIYZ3nVktOn@V95+$8W$<6IM}X8_RoFP7mt5S|h&P$tO4)FLGE
z{D*?(q_Cx4&{GLX9{$<P*25&N5#B|mxca@q%+nzIU#3U<w%cCsNh+)MGjpucqxKy7
zf9L5u|7@BLFyk?J>tE?A^i<!ucRlU!r-l6qxdO`W)Cgy5d+>e|SKXfkd9laS2(x})
zeq?3N(jmvQD>KzSO>RQPV9$W+6#3dB%bE)}EEh^_wRKHfb6YJKHy%{jdH0&rAe7tq
z6Q+kFVuk({*j1tS@+&VA-kAK>OUIU?_MkPi?)mC*!^mEk*~CKjt_vXn##mFcp~cSL
z7SBqZZ|kNUx3v(;(G>Nhh-m$|EZ>tbj*wU{zwnLT8HQb>LxO{~Xmd{WHyNpDQVU7E
zRD07XNOa{{`BI5dSl0Gb2yTa0C-Uf2`<sIf_BadhQtif@c!FJ&_rVT-h)VC>kI<;7
zv*67tEJ(lK=BsP+-iq3#WI4N479;vKlOY?>9M+Zzvp20?y2|uwJIT%ylD!Evz%<hJ
z9@u)@NPu&ix42t7J7*YAlUWfP*-|A5ZL|~|o}b{^PQ1fKdZ}G+=9wfXfVJ^QYMcJf
zWM1Len%Wt+^b2{vk*va6(Ui+Arst}?2AK5eg^=6Q;*&+pbNeqmb$?ml;D_|`WR>?)
z-s9d=GrsYBzfRWUGId^K`S0`-9d@8DPBJ(LU<o0NUpA^FqJ!3)qQ~<O)Xs_cD(Gcu
zSN?hzqE~s4^6w84=CQ^nOmRMa<x%WYq};jceD6{)JF!si-Jd&6wrDPO+!?F`=(Mv-
zFL_Ssoch+%u(DzEbILcl7}vqu*IGqox=qqd?UqblSsUgLY-X?cj4KOZKIWN0$DL+3
ze=Y`nM@(HFtE4%zc|5FIErgC){>%u}eye}TpK3RkS@fs8`!}Z_-4Jxa!(CaKMXNqN
zIhpzFf{r3g7Gua|5D>8_GkAT09t~dhvxJR-9*=z~A}oN|xI%q0c<01avv3djR=1yk
z*C351z@xTjHx{X1nL&MfH=WYgnf;9KA|@s82btpwV55lrMr4HHvP(dBT~Sj)$6&|B
zE3t*Ym{g5t$MsZ!P^y%G4qMJXvFLwJgu@ves2|u4{?iRLW!g_dB%#+V*`A)dkiQ@V
zR{hV1a)1@75{)0#J=$W)GAF~nWGeXrK%afS$G8Kl4td01zOGFa<YISp=3!mLWrW_Z
zo74>TM><;0l6x{Fjf0=k68$+lf?udKqWH3)E~KAt%qMdtGO1raTJ%!_K5i7#?!B1o
zj&m=QV>m#U*jaOn#Xh<HWNS!hNNDX7mJb%N9$ypQRIBftV_&8~6~ysCz4iRzoekXi
z=k_%2&~p5rgxvSflbDYXS%Z(6YGy4}Dx~WEfic|R{Z86^OV}vhX{{pXHDM`DAiNQV
zYP+koaua+sv+8YK!s)guIF{Dle<+I-f?vETkRdzwm@rs3xr{5l=-G%&FJyL6v5O0M
zOPIUF&`)wj0}8gkJap*1G<Yvs{~ig;e&c_GpCG0K{q~_n#pP2~NLbqeB%5%}eoCx`
zp3fSJQylmzBsx{HZurex%^G()Za!@WILE#_<Vt~5D$No9>82CjLw0mGkj2GEi?NUq
z_7BfL3Gw*HY3TTG|EKkfc$?ZhUwDrEMUYPP^Y^ToZd0eJqQJ0Oy=Zfrt^X>ViyLAc
zWSE7n70z|v_%CKO;}$^{%zT-)3zf(BKT4)*IzDcv_rzsS$L1iGw-{I<YKSPZ!B20Y
zms0yc0i=s)<a?d9Dsa6{=hVSSN(S!g)n`FotE}L+h|>=r*t`EO-9fc#&zbm;-+LF=
zEi|y8=&Wj%F9v$W%DJax06q*73sG~wWtLOOp`D%NXP}T4blh=RedS+le~23vdpGrg
zm7Ij?Ykt04V``rSv2Fu*5YAsH4r?X;!|}s+fhrdxFol&Ge2nEs27ZQw!G4xLFglrB
ze$`Q9<q|UdFr-~O;SSII&fl}GfgKYD*1Ed7R2XK8xC09*JzrzDu3{+M*k)g%@LThU
z<V^-w<{CvV4Rd%E*3MX|Y-_8GIm(t7m>Co}8@baoq<epcevkVOURT3Byy+NsPW_Hc
z8)+NUbT|oSBVy|BCw+7FJ=s}4<>xv?$o#7R%}rh4`lyG@6y!vetj!!Bj19Qad2QG>
z@x4i`?#SFc7H*F3{wPHHXe;|6U-fD}bN{4~?U4t{&>^?iy23SOE+WfyyYEK_e1JOj
zAGcOnHnSAT&3N>47H}qGQ{v3^kIJY+pw)@I53BrkZKfeos4Dq@DM(U`KOwbXtsT`$
zDG&Z9Jv^~g+SuwUe?#meqaXBJU-?=GMpa*?rAEg3$h4=40C;}W^HTB2=Ma?PF!<@|
ztvMoZD$cF4Q??u7XZ*;b{+B`hSKmfW{<2u$_>5Vzp$$#8#lhLUlwih4zgb;P6MyE-
zgw?BFaaa9wZ!ty^{+Vi<fQXB>vJf-2>>#BKhkQ)>gWjX#64@}#yBNiSI)-jaLH78^
zK0C<<U8tX==3_0OQfPvI{v_DL2X3+b^bjks9kC;AIN@Tgb}$ET(OFHr@PDvNYc8@Q
zj9gV~@fL&jo)>N#MkWa@!E0vodfmEbQ0-6iD`JWmuk2JeWbc$AdTt}t#pnuXKQi@K
zv^Ie1nQzrD;j%&96G(@vM6b1l9HMY_0&z%J?SlRke*I*b@wX&+*pLKp#=@1IV@uWi
zxn4`2>iUgx;PN!4r4A%Yd-v9P9E&~cLv?Uu;K4cq%cI)isc-xFTcXg1tM6h#@WHa*
zgM(5GCdHM{$`AjJ3i`_(wRynh&w$6wmHEv_h;&yqd=Gz*PY@3&(+9=H+6NbHffZn`
z7D;k9>N{4)f^-4Z&{xt!om1E2hyZ~!N(l49Q#&zPR^KE*#CgS&SNbZ#v5MOz+*dz8
zauf-2h~%*s8ER^7mOi?kUyEP7zI2L{CRbP6QM6d{7;@NcAnyTopESui7T<Cev)jYa
zYXY=1sHD5YloZNpYaV{}R3*|V8r8rKaM^nf;}}FHw><<4-iH{Dv>}PIbrEQK1mWrw
zcJu?Fzzc|uqEvq;tyo(eek9bHwi(X=NVkaBypsZnoaN%|uV!8)4$u>ePwng!FV{iL
zyro4j8b3zwXH>6PljnctRA6)rFK_alTS2tCOV5j1Z0E(lT`{VRrAOxewr|$n4eI@`
zqPfM$nWk3%nKQfD$i~zkb+%+J3%Q(?1MKbWAX@Az>DupGTXB5cpyQ7(%{?9FjPAQo
zn>}7S?j3a~C1*HdG`d`dQ>2P=9$nn@e&1a3(oA$!se7=h0?&4NF-~EU8q}wJ;IWUZ
zTtT&JOhGn<BgyJz`ck6L)qEayw<2Na?&{)}eRaD843~>2kHKZ+_CNPnzt|3eZW%-N
zGN$o|&`8XbGUBqjy80QXsxFm9+*Hv%4ZZvz6Tp*(>V{08PO`8elWeB&^;E8rK&dh!
z`U*DjkT6*WmEwidTV+%iqp}hC={T7;5BfVcUdj}=8hz>Xh6le+a9NySd%~i%XfFOf
z9*Jk}l&&1;r=Q?rT-n?d%!MA`b2sR!u7XLxx{89w{cWaeUfRISn+j{r8!D!)Jd^2L
zstWTYyifMMlOol*j<6dIr^U)~UyUNvO>>)S*c<27x}}j215m`foO{uIqWumOlK-{J
zIlvd^1$*{jC&=RQvk3RLQ4lwhC<?qOg5D`289)EGnXrUX7Y#7=C6Hr;HB%^}tyMc|
zH)HQdAs(n+{?_hKYLedis0@efT^`N}6Z;*o3*w6My>8-fY;9@s<m=W-@3#^=Z6t&l
zes|XsgHb3MPyMoq@UE`xpVn}yMk&*?N4AfIyd~6H@LVZmYJm|3O>O776fOGA;&$Za
zNU5}oe-bE>kSV|PQqqc8N~w`?ux3`ix*Op{kqDmu9$eI;BKpN|_w|AL{;o;Fj_^)0
zLz7me`Z5OctRA8y^-s_RbT?3cb%<MXK|v8kMfeL#!f017&X#!T@}d7-u(D_0)SRh$
zGOcE`A!!tX#W|bIhgDEx1D)!r=Vb-ee4UVjNhQde5>jDg`>7!<vC9swlJp-?kMrn1
zzg);L^<)qHBH_){Qj3&se94eUIu|~GF3YHB4}FSV+<%Z4=t@MPnA0Ah=4B@DerwUp
zoBV^G*_wj}_g<(hD6q=jwu_$)-hbR_+O<a<Ed9^85P|?P=d=mCV5;Z>CUF;Sh2Np4
zN<GbQ+lS1-y7)cLGaRMQPQkkjLGj?M+PSdGlpn`$2kZ~j|E+3-QOp@Ge41EWy8{Go
z)`8`5L%vC(yUWJ$4E^h{#}6MEjZfi73`GH<jF$<;n%IOLhmgbN`~at&MQD+~%-D$K
z=jY=2joSe&oD1zo9(Ic--D2yXxsNZ+$B!Wki-8s!7T}!M9b=(iS-}R{nHXC&WJ%9G
z<x)0*7i$ulTkG@z#Q!Gn$HM{Ip9O>r3msqH0okj1I<y(SO?`%~h0@Lc2ms!-r@kwu
zmMCts*LK?RnL+id%K6?K8`EVeTamR~8L-r$_%J$|Qnm<t<WM+dI_Zsw1Qy$2F%V)E
z&E>UI-K_FPuwsFDePQdJ&d+b__9Kyu_%IePLUzTtU*0#S+Jpb`F_(4<ESrOg%ueK=
zw7C@rDf*N}D9T)-%CiAJ<TM(r<X$sTNUmFyq<uAQ8%je;QweI9yy#hXT<an8<9h?P
zop*Wt=fx*Y5_)CT>hAw(d~AfL9mx+~?hXnUr?&Bp`j6VMNnqdp?xIaZaJi_9_as}(
zuO9TI3{_5JOPEr3!qdBG)}=$-U^Q^8b+2G3rHZDDauv?xjhICI3RzLhZt=aim1YFA
z1494a<e__P0#3-X5pbYs*h0^eR$M3|FLc?pb^*63RW5P-HqxIo)xd$bxcVTDt?m_d
z%f42G>uMlKzm|Z5jLY|E0pSDyFnmX#2Sai@=+XP%Q>U)iSFIU`8wZdF<DdUYkkzw8
zqmT~uk+ZN)1tdhq2GbV;K7nF4|6Q=NN+bVxtz+E^5a|-`n?;cJ*%-+)NKCiODD)$_
zQkAKv)<M$=gr;;QMpnrO+dF^#RKLEdMc)3(FV!OOxbm)Ndr`Q1T(ZUf(Zz*R(wX^t
zjn2!)$a_}-Y&;pTF|7|}E71~7R}56A?{PA&hZC_^R_2kip_sawhjb+7N5St^YJDnw
zvLo-!xz&tEtb;R~CCq?FDL->3%+Txy2R9}Ak5s?7+^HNBL@hoyRuE!dwMyn|e%0zy
zgcGejqF;AQG2&l&W#hfYj0q~@EXaID{rr#8#zLQqao~yxnkMxt@an6#`4P*)<OW}{
zD)9+?`*dudcytuY3ECC^+iELbeM#Sg60z%!WLG67YkMq>8|1L$SU%zT9|L;-L7aL-
zF);<n*@pw}(B}q=OySi1$33}}gHM+0dM{@U^*#*jYWXYVX>aXjxK=4rXOa1Qd)6nh
zvWfne7x=EPLRe+m8n3J(^W$x$9KKf754TKLJeMTt;dh<CmbKc{lOzZK{qNDgpljJ3
zbzGq(A?)$*UkV30)djgY{@h!}WlpMPEeVaUb#EdB=Yi>2u&3`1wdeZ1=Wi_a7?Y3K
zWoP!(e%tqSdYec%O7H3a9{|umFTWAYY?@8ay(Ds8GZ0xs&R-ssKtS@FZJqX=d}{hs
z;7m>E`iL!Sj=rup6HNOqMS!D**|}nzvVO->1KBq47uDDC9@<%88eWuD<uBkY^R5_(
z?Ozf&>cl)2ZMnwt|L~z*`q2BIub4l>BX56&4sC3Q2$k3&aOu*^orP?R#WULP{7>AF
zu5|u;5?byR%zyp5fyWCt|5sOU`QM#8cMi({!54r713}po%zvq2=>-5B_xt3NPZF=s
zVC!G^GwLL_(RY08|MkJ>{HKvB6J6o76v1fcN*k02Sx?5YZdN9VHYdltehL8_Ij7cW
zQz4Xt!l-H;QH#MeJ$Nj!CZXxc_YFqG%)pgC@p6)?zL9Myu(fp|L`FkE6s-3*g`jLI
z7S9qfYWJ6|D)lV4lZlo3bjKg3itz0M%QxrmOt_QVWc%6>L*J-@amtfmHvE8+r?byV
zb@^s5wr#dI^s|YorQO^V4ZhjkMQiusl^I9XHba6FQvuWHbDk>32T$$3^{revkM}yQ
zwzQQtU90(j`)!-_!H)SKI<!N5u)6B$vG<?pgktOtK-liJPEbBao85fj_Uvc=X90p_
zpUC#kIseIv&)?ttFGb5a-EQ|H&Ht{5<KOYIfBHdKle;D5aGnet%)x6Q0Ne;$0he|O
zfE_R%dgvkF4F<OUh4p_*Gq97~+_9tgeg_lQY+l5z)6C6Tq@#2%kya3qvfq^plz1zU
z*&?i%2tMa2LV}mKmPqpa$VGcmpDbUr*P^E&X8CAq)z<Qiosx1lIZ;GKScPd$^tBJ`
zo98-he@yEeCyGFn%k;Ux82E-npQm!ux7J!}d|U1F&BG*~NkPn;Z}ysKWV5sUW-kKv
zm~^F6QsZSuwD8Bu7hFVQkVGX4pzAZ==Fc$Cog!cPT^)(iHr#wL$v$+Y3&l4$FYfu8
z`^ft;wz6H`>+$s(2N@?-T@)EMFC7FSnQACwC7+3TaE<5xefOTv>;1Ff$YD?Kd-nzU
z;=jQ;0_$D&!=3qlC+rl!&KS%eZ@zQZ{P(S1+Rywag<*S|{~4TV#1hT_BS-Fhtg|ft
z>dBKQCvXOQ=9y<4cnx?C={2ktS}r^+tpI?5$2ts{KF)3q>-~4%9V9fI=yc01w~R)k
zwGX7knKH;JUQHbF%LJCS3W@YilqaYmNd{j7Get^kYvsA7y-c*cq)`HIYoZT$@Qr5_
zk=Wdf`*+vYwp2paFr7hmPLtmY$=t4|)sf3;=*A%&zO%l*5zL#;ozvf&B^n)#P0n=R
zQfDP&a3x}2EZ2$e@uJ`2)%o-1=$2bnOqrz|`Aw}@_YxlffAXg%xFM-DLzFY0ndaS{
zYIVNNAO%xlBB_b6=A%-OpeE5I(-vRSHgQh$yH`9!^2Ab<6GT}MXE_eGc1ObB!JBp`
zhRt)t%QiN?&q+xjTd(#o|I^&NlKKCpH=d>kA2>^Ufs-eE*ctF!zwKzeG72GuCw?ac
z##&uH6haD5oB#TIy#6+Rr{427*p8fvsCurr7Veo}TV(2^T&18#<mDVU`}!qq6?Inl
zB5_RSQPpc&Ou5*CyZm@{_3(FeyyX9eC1q?Y?|954DVhPEdg>`Cpc<){2plahaN9@x
z*O~vv-5Slx_V)JbJKpi1+;{4Y-}!HmKoTu)iG=Bag3yL&6dG7Do<is~{!xSAsXFpM
zIvLkzCzt(j$(+CIFMVCrap}^f@O$XcA-a};_Mq-=4Cv_5qamE{qChu^sgvaCpx|eI
z{%(?j6<~zZ0dbKAu_9tbhVk$NL8N?-us01f?X2D@luXaIQ;?U{)mD%fCm5l@Z9SPv
zI;!iDI9%H4)Fi0p4N<@FB8uShr!pJb+x)+HF$MzK&-`CmnRILVFVmf`xk&qk-~Z}}
z{@{-`La?CGc#RMVb9?)ICjhK<e@DnU*PQ=Nfr!j!DrRTLvyq<S37Qz)RRjXEh+uHe
zGE^e*oJb9_rGc`IHLgwh78bqpxR#2u;O(i#W1@z&sMT;szx})a=)vdy^k-gKU0vPY
z*x1;4{`u#3JEe2-^wUqn0x&TdmkO5E2O#Xj?AHE+_diTB*r(YI%qT4lxZCb{__3Tg
zNxx<AJt;LuK5`<BEBU>WTzyGHrR;o@-jsDoe(#j@>PbIi%yf;(<Vdv5mGT{FM82K?
zC#G)cC0rjnc5I=T|J49L*^Ni)c1s=XJ)B+01fCT9Vah{F*^z7hioT{Y^B<A9cS;e}
zjVo!=|Ds%-Ug4-O4;%DXXL6*31R^q)B3*A0L?rbW?RDjk`+^^|>hQ8p_{Z1t;abf9
zLmNBv_y@jmMa&=Ywzp2`-g{s@+l+yNI^04?Y}CKC)miz{dG~hyJJDy}^g$*g{7%#-
zhPzHr1)uo|*ITDH;L5AQuuHx?FVbTkBl5bxNoOlV#@{GtY`*rTZ%B=s#zl=!dJCqE
z9r56Of8{%n?3Gtu2|VW1sZ-e*uzNmAF#%{9VJRszOW#=k=Vyr7+?i+`_Wol#;Gx5}
zzQ1IMKbGFehjJpQNz_0b&L9bA!FQh#&Pl{e?Uu+WYvzfirkhthLw}1Xq?brvL)hBd
z>P+nCsgscD@ZrPM=je4K!S>E&+TGovo$e2j3<Gga)F}%DiY}z_bxxT;S=*QR!c4nJ
zex@wP^Pt98iMJ)nAZcw}64lHconnfPNYz_04?|q}8P5ZS0fHf9W-ke`5Kbf`X5#A1
z_p5J_4PXtq!R4;j{6BVdi{Ag97p6M<?FAlrc()UvHv>*h{B965VWq(_*jrm?gK%Ky
zzc&m`$~i1B$RXfwRk99$pJI&pbIrJr$t=O?u_Do*rVr(Ni5E_2I^LvZyp(8cam|^!
zEyyxS;*w0CD%d!D;sYHH;!q$g1BBfG><pNzTvB?sbOL}E(U;%@0A`Q4|99SbCl37y
z=QSTWa%9w*&(ivTB)f`;c~S&LPM(lOGVd%=rnI6gBCiyQybqZggoFf=wg{Q{^9|x9
z1%;}R>kJn!UJPdcZmq0a^IxZ%1z;ZKBa0yjVEfi2SOug+=(Tp0nVS49=?b9~!l2?r
zX5<b2sIo}EYZb@$+^&+LINLdcxZdETMa2p;)f_Sm!#P3xoobU5tTVH6^?W5SA_aJ_
zc+AzF|99W@D!uRB2gm#YYpX;b{J>5(mm+RK2!N#trwJlX0OIo_f}jMK8E0y*d0rA~
zL4kz8nfuN6Q<oxm$sJ2tm>{J|n>IS9#=j+GNNL2Icdo9Y2+E>7FYrv#R4YXDulMFV
zL|ewAwGX`d>Z^gbfXCo3MVo~cnwA-sP5^LDMGN6$e*<8E;QjxOcl^M;*#4)-y6vte
ziJuyHCdNWMj`-lxHDhW)Q61<c7UCMxXhC2>s+h1lCVF1iBqm%jIT>PijR9?d*1x;E
z8%+M=@tAHV*h!&W!$C&7mjjWG0aEG_QMg5Adsopqyka~o?lBJfjFjcM;(@6WE|;i;
z>h-w+KDCdULW$Po6Mm(HPEuL%RnjR>kp1$U^c>=Cl#x-3?{c~iNZ>!oIplqtpgYN1
zy!bb_^uXso9Q_{d*8CCZ=b+(8x8(lV`*&#ME(amN2O(gW;1jUEezd#a2}=UOLO_&J
zLv1Zh-zBwWX;C4OUOl7KUxLG1+)&F_d6dh4GM~!~9qA_(!74ir80V-ogwiR5;+ven
z<x*~$5VCD?>#0}{$lq7K*^B37@gyW3-Sg&mJb3~!0lH-XL<YoJGjKj9M4!!{v4pUU
zy8uE*V;?4t{p|*TAdPj5=vb$Tu5>2ol|#o~`>vgx3q4tV<JZ1pXr-Z^GFoNQjI!-}
z>d8+%c}uzM^vBQ%Mszg+>;0Wn+-W3h!R&vv>pKWQ8*M*b31FuM0^7h@YussPtKFI{
z)@?_lweBy9aW!4AxMJTi%!5yLI-T0$t8N2M11v<e56@e5(v<d4U{WPOQs11BZK=Ow
zTKRSR+?;KlJ^x|R;S=!s`UTE@Z+_F6p#2{pI#saI8PmS$K}TQuJuC+t>iE!VCuD<U
zkGeh{2dmSC3xD1T1joXCJ$^*f=?uSbk2jv~;H-j-$G)eWdGMw+X0m?#S)7sYr?uOr
z+YA0q!>v;O>+XF2NB)RD^NWW%Q%^_tw(q*@u5$=H_w>_G@49$q`GlrVl7)n&69Cfs
zf2OPtJn%sHg&DdV9M=(ecWC4GxAx=~zb&L{5e9khXOuNd81OxH$u{&&rWP`{-d%lw
zhd&r8b{MR#uF}m4h;E2;3>@utL(u(EQX8pkNo<czAMP+Xlim`#eg^5yJvHxRN^kD#
zy5#*V0acXbEiG)}sU-Z<aM`tLpnjOvzS;BNH``p=c<9hBz3Uw>b<15_bS>c2y`C;z
z-l9MHlM#)Rm14ZI7TSXCwGaljHqV9Qg|SmWx1FISgvocUu}yk2tW=&F>8F)Rjrv<m
zR$fb-@l(?E35>1Z*4*lAm#o`W<sCcr`uBB;&cDKgZpqmlKYpB^dFC0y7WDh?zu$l6
zGoNuOEIlm~ENvVBti7Pq5tsq%|J|dDD;;#dt4D8n{Wp)s>(t)sZlPZ?l<T5$|7+{s
z<RsoxDkEymLp*i4aesKL8v9Ls+x-rD8z8i3wD0!YZ(ll-KiYnf{jsvL8MIvp0U*ZX
z6>Q&%zePqZV;)FDvhNrCpzz+tRFGTET5%U*sAtOfrSV~UIaFGczN0v8_TjrNuMd~|
z_KCOB_qVMDW<ILEFr?p<yO;Toag2<$p8s(6`{s9^>2L^mf9<x{zUz@#|1)13ch)9E
z3tZ{u-fCzg#?Jd=-EzRW@Qtl4o${FGe)>~-F~+3e{*CFmPb?#seI)DarHu+dG|s7V
zsa~>~K92L$O=juKw1wxTnp@mX_u7u$UF?`Y&T!trG60;=I|~5eZyA|CU@3P2h&aq)
zWZ;y6{oQZ~Jbd`D>rAi@;iX&)(ohDftZlk<?@Wp|Oe0u%6;3Z^O@+9>*VQFe(l;r8
zfNKe`vLL_@_V?dx>-}x01A#?rU>*1;u0@hqqjh=xJCapR6mzLU7_4-5fnT{SXX^A8
z7j=7OStS4zk@X31jXLvOan7XTwAylE)7Q*@+8OK9=3D7sE?58B9DAStcfRJ*fjj%{
z1>X7aq!U1}J#Z^-Kiy3hC-NauAhrs^SD-9kM9>U+9-<M2fF6&D>Ym3?J(Y?Q6}slk
zyKFEci}!kKX(`v`GG!ha8)3R2scEbhcBxq>r|Sg!_e1kXk`pIRgj8@o#};&4<4F41
zLhRzf(r(m-_TP0r-F^4n;hy*Z-+%vKLd@A?Ef!u7raZHRGZPJ}rPS;_OS1%oIt4p?
z<zK2NO>v6q17A-fBD&g=C+h2vJv;l|===NU1*N@v5j~Myc0eK{4C-_H5^j;J@vkDR
z#c?p+d1_&y#Em`;e(E=o%rBo)Z(X$@qQAYBUIyclL0ByF(D{j!mp6$Xn8M`qlYP#A
zIQxC`JOARUIQ#7dKJwvRI<#>)#t85`!6`6;3qrug#)+_XFj@mrhwzV2EBOt<loPTN
zQY<_~MKIENtOKhmleFY0+!9rtY+<l8O`t|K2TDFi+7Yq!O>G)$fpLHpt|lGs3_x#s
z({~~CXV?vZBZA?3_LZ-Eg_iknprxYZ(aPd-_+RYB!~qRydqz01>8^Xe^`Vqn<&~1K
zH9_^6jRG?G20?T*LzdQ_;9P$BqaF!67Cwze*JiPK%%}SYIQzlVAN4@<Uq&<6RRuQ+
zT0eUm%aLHB+X87`Wx^ARbG3#dDl^aaKJI<X094g<WgzER6nJIXf_M}4;&1lemQSn+
zSERMMCG2<pBcA_W!}?w=!1ln8JigN{18|_A9r7Uv)ovE5*4B>`90Cz90M8{ciGAG?
zfXX$Sz;!Mo3<^3XL5OilSnU~GVQJzq?4v7o*;Tyhxt907x7<Gbo<`p8d*EZ>EhB`T
z$30@tKncC$jyrs}B<)grmx@3xohk=O3+FL+%qWa94rsu6&Db7z<j4_s_|R(~YUOL=
zq3=afOSJmR>L>8jl#pjL(0Ys4RDs|cFn>7uA-K-~8Y1K92(VlV@RG3`&4l^|KZz^Z
z(&XxQ^1juc*=?40?ebhpRix>7Xw%6yXY*>V)i=v@jdI$WU5jbEsY1(M=RZ#ALvSA)
z>a$32<OtCR-;cmkF;Eb80{98=uQ24*)g!?n5C?G;D?x!GY!+Lkv~GIxQxuevt}55m
z(MOt+uAS29U(TEB=@C{eu4}=$oky1Ed&;3H{Cnfj9S<GB0Cd^FbMCt9E-0g;uYUEb
zVLcSf0L!KzEu{belMB}SVS;(#g%_~?pDh66wT*j_&dsaC^dYI2X23RawFIhFhO3?(
z48y+KXvWMR+x)Kec>>D<fq`g#-x&l(Q#9IXh1CSi{@VAsPSKv3Fi#RsJsR<RSXZ^r
z65XD(eo8r(QMY^=<`)N4wh9g(+6`fSZ@YCdx3<3K4x&fifxuHSTyTg5;G+=0eGTV8
z1P<EVJWFLuW&~+s&&F@YH7Y{EG}tc{o0o1CUzGNHJN~UD1YI7dn^umaK3^QbYUC}h
zQ$7RFdH(t5L!yn1jgSe40$ojo>}tSL3IKqopMDy_7+?YLIH{?76t-uqjMtk>0Ig?X
z^xQNZK3!#k=xIuEGfFY_&M3f2)AjqShiF*q2lEHxADI6%@HqV^n?x!6*6YL(#TvM$
zN~$%LvXbd$wW6-GCxy3<4F}k3O}ULur|Hw^rok)?zGc0bl4Itx*8D$tVv|19G5_o3
z>^Cnw_$E&e+#e%QhLFL$9MB2w{6!D~uqz-60p)vIiiw1Z(p3Uy?HABY*i-gATJpSB
zqLM~PHO=zugVqv4Vg9vWLx>1?H%$EaW1fQ(Xu*r%lSWrxFFSBpT5qo802rJxy4zq8
z=$KWfe~pgeeIi3t8(|CdX*wZKHj`<yqe)d=ky+cd$fd7}QB(^Bu945S)Ac;RGLXz4
z+W!F4Aew-ju+<|lkY?E4I8}M^FR5qcI<2vCewt8?%e4kv-!*k^QuyXls_dAhzK&+q
zo3YbVDp#xft)(<?%G8d1-e{odnJab3*2y%o;{DA3`%j&xM<03Vx;y*L3)mTO+sVtc
zvl}e|*d36thxpsE6cB}gGu`{0{M<L_N{s<6+EXI%N$0uwIiGd(EYGP&XzRBUl{~r3
zY$<Bd-ty~4lJ+{zn#zjfua+MO0>J7+7cX9P2!O`t0^l8Z5nve|0hdYuU_uLK4+lV8
zcWW^i48)<20{wXPtr{UE;gy)=sf~1rShE+iw@B7U5Piv;5lWIWcCBe6dOIQ;e>e5!
zb&d4<1u%agH=O+j=088^kfdyimE+PHqdwxK*`E{mLkb5h0~GIkt1gW@ImB&d+$8E0
zX%um^xK7o5%em33`n~#=@UIdcTw!8fZ%eGc)AX5SCVQR#5546yz4e<;)3U*b9-DL~
z=*yiI080YfiOFjLW;|Y}^|ceUxpgj{UYQ~Fr|%OAqIBqOde!ebN4ewhyEV=zuY0o4
znh!F6jmzz4M3o{X@Jwz!%>s>`Bc5?E<#Xd>P%?4WOvstS(qkt;u5s`IxO#vA!43ev
z-`?IHAs`K#WnTaKCyp0V<vCMIElosT&{=~d`PUL;iCl&mEBy0jLKULi_Ec^WbKR~&
zL`51eC(xB69W#F%=rio`pATH(UO}{TX(~BM<QODgxgN6V8qaE?>?A(Qg<F|V6(t$2
z2Dz-IgqW&TPUD#;QWsuUn(C@#9{HZzE)E<byZ2nq<G%U9_p;r4oBs%8xrokw^8(uc
z(Z_dbeSI?s0dadETa1L3J6Z`LgSR%%Q`j94AdRlmii#dzA<X(#3!K3}!&ses7N2u{
zu1BDC=C>aV&Q<T&df-%YEztPJw}@2E6&!T#*xTR!*MpB4^Bn9#Lp0hao_Hc8#{qS;
zM6fghfZbk!FpL0rWAYjF_dW2PVh4a6bWI#v%lR1z4IcYS7%=5Y`sy^I>2_Eh%QAG6
zzG|BRC*L(^=G-ta|NQ}*>Fs)VKnd3(NKTmctI`_JSo`H`DO3tvbgg_|;9MI=<TkpT
z&-encCa&j@Yt-X)UED;m{Nc46MEPMBjzu~3qCd3-0sGuMn==24v)?8yGr%$ZvG;Gs
zt%2cK>fPl1pR7?K6&QIZ1Z*7ZmIKb4<p83(-{sTNf4ZlWIo~dgdeXwFWWoYZnx*<O
zpS);$FA3zjd>bE1(^_IhBHWVcyB;AoU9TUx153=|&^ZL4!!>vjtkO_Uu?*msXe;1S
zg#yMl3v2;Q5IPy&m^(W=BlrTWt{gc|hJzRUEC}h!f+CS3Jic5i1}l&^Quk7xvLZCF
zR8Fj%-m79u8?9iF+Ns*)%p`R^_09(a^WQ%>DgjZlBOpb+23nYt#3YS7TMcL`iIBwD
z;IGo4N#jMj$?*qVf6JM8Yc%qLsyfYs!=o0HmTe;~5NAt3EGU16GxIIy74KZp;K~7@
zje4ljH{w~}{6|#1cfaf9g?ILw7fzmlPrx?)&hJH`4r~7rH^;GC!m2=c6D$Fnm(O;>
zz)>2x)hJea__MmW8N~<zQkc?|CfaQz9a6N+TEQ~yB!N%-+Giyy<1Y!TkYwa3t*6F3
zvgNjvEuA?1&nH{@w(ObG9zS{?l+53aP4);wAI_lRkig3ZOCtbay#)jGg)e+zgd-Rc
zi~)WCr%#u4yS25Wcjp@(TS>2o8U)xdR7F@sVNHTV1>vk<P=7$8g*8Gus)dnE__9^O
z62X#ozc4WWD}&JUtTDU1T1`Z)khTR06)(-u7)#d>2<5ml>Pn)=OkE0(GQZc1*C)Kq
z!azzFgSklD6HHq+he!({PENL(@M?x`zzQ1`A+~}y)pMJ4n_Bb#p4Y!h55N5ty3qhT
z1Gcud=&OIo!Kp?ReBDN);Pe*J0XQ(CwIBqX3r7p%l)-T3i0a!0qYRfmk9A?xL>!37
z8T?dq(^(-wlrLy&x%3Sj^F8G_Qw}xiG4sh&#&a*5;%vjY0XTqZ^%xjGwgM*J<4&DA
zg+qbp{`>DIp`b1WERzp_g9Si7z)`>{E_0L}N2|x=WX~7QX#yeWU_lzM->E`K5ID+o
z&|4g-n6VI<y$G)%kUE=$WqV{>b48{KFNBF6dO8i*+kasGR{(<9@;C-x0HbkQfzlb~
z702ETq8FJJGIJqZmeV*Z@uuS0hIxw0TwOWU4zN_Wo9Bd0V+ePadvoxNsu8-~B8i%-
zRcFe&Jk6T_4?b}2hGhN>Z+pnoy>GZ2&IJft1Q8=(GD*y^FhGP3j>`e(!VZE0y7LPv
z$3Z9~oOWh#>dc&}2bU?2PZ3Z}ojlHlPm{=AKcs5@C>d8x3~%{=X}7RIXr%S3?7(9v
z>^cj#cieGDc+Oh^mn{&$GWh_6-_8OMezE?-+b|Ni@ygmgSrC-t?Y*LlAu<0WkuVb}
zvbZ8LJ3)N8ZKMgF7i|`0`G$O;+M*aki$FTnT-QfKX8_F8(EJhIZ(#oW2YDz{iD)*-
z16|&=G8_8W&|VeNs4~35GbzJ*PZi>NncwO5IMU^iNvr0IEyKz0uC*<}j|*Q=%LY!<
zSKD?b<W9Dw(I2vsp81chePH~H64v)>!aE=FVTogRXDb|89m56t<Q!PQb;|)Ot4BJk
z!Fln{D`89VSFeRvHI%7D`IGTjS^!!Csz}bbr35%xx-V^XMo=VC4L|oLk1W$#hbkg%
z+6Ne~tlxvJfOw0ofKWnp-TS4=*9Vqq2_XE!+p-e^5P$&(1Nbv%&NyhX&}c`a)w?6o
zdZn;Sfi91Z9VRdQjlH13p0v*ZdwI+E@&T9askuzA_LhpWy+vr>m>}NgM^3bn_JDF6
z;cwXIf8_v-86_sD*89_MrRnpn*Gj8|N-*syL$-nho;iW*oL!<}E>dI*@0ekFiT9>%
zS*~jcIKGrs`5}n5<ucO}m$QWm#v$87B+4*Mrp&>5{=?bto8R?PIL`NG0Gt`{!T0ad
z=l}I4UEXxG+x5xV#RG!^xH7)jB8WcP+B}=i5g0Ys@SRzX@+9bDF3;CAR^o9bdFtoc
z{e!eH;L=O$v(my<RX-J!a&|tMw3aS&!=0^%M%W7IE?v45b^$;MefHUBM`XT~mIRi{
z2@n8p%Lg8K0J{Lt(IE&>HxS{MN6Q%ksAxt*>U4=Zxr9}cAWGAgM5Metn*J$ku?@r&
zNG!2V`Xg{i*%sYb$Z3j^G<6op@V*1{KPV`O00Qeq#hJ)?R|%DzLor=`J`In+*1Rh?
zYzUs9S>$}9Y;LYV7_Z-?J6{@-IH+XsR&bGo*ON}?WggLHr7`k+^HdnO=`Tyvy(m-R
zf9fnb|BoNr4$S}X4dU!KFNB=|AJ_>(!0v9^a_1=t0ZwVZ5CX7Nz(PPItKKk0Ezm`r
z_6qM5eh@YMT*^BoAd7Oz>2eAk^@^ri<}EGw3SYdY(MmBRgQF>3g{C6SXuNuN?B65U
z3E;5}9q`aY4|yyHbPB4<w&<`_O8_`=37(eNc5&BTcj2+a@w5yhH@>?ds6J=iE5hf+
zuh$fg-!x3Gam|Z`0k5bJuZMZD0Odrzp35|oirdEJIw$Ipv^WtqA&-cHXeOX8nEi*?
z{s#%HCAef_-66XUE@(~8qZnO!CIw1w@G#<os|d1m+$;Q5;HP;;s%%|%o*!1)BqQvK
z?~3iv_2+p&Ufg>sC&>9cAF}Z4)XY=Vo9Bb<Fge$%(-Xz~f5Y7u=>6~ci<^%5Gw{v;
z_6P8^6MX_WP>>i$L<{W9+xaMAqwtII_C?rplq~~t++|>$kd|qpsWX$`l#WqCcuQsc
zAg(iC6o0Bhss+vq2{hvhr;<3^oTHQpZ4t2J#mJ40LHozsqeqX9uoMkzG`0XjIbF7B
zfJ<WmU<W{WTb@08Hdp{oo;*o75U6_$lS~K$EIm^rVmg6jA~v6r0+ApgvaFPeW|n>(
z$vDxG$hn**CP`XNB2LaTBX@c7$hG4{+NlT+>G)Kq&zMiRTpCSIeBW#d;KGFqw6d}?
zF#iLEQ5x$BsQ^7xk6(C3`uJM5Jdu<?!=vJ1T}GBUG#Px7M9+!5PB7twlU$O*BVB&M
zL05!anZh~IevOi$R-dob{Gi+pB57TS?J?i#OaG{4uk?+jHi~`kM7s3*;G50`hrgR0
z?mN}Z{fpc5)jx=w6fQrbQ6&2@coi>wx^Q*?`g3RJLbr@?MD;aE=9-_mg8Pi?PRG%A
zf_7!4Mvk^~%2E*dE=z?IEqxkAt!Vp<Elw$JOwkKvFG?YQvznu>&QDHN*+E<a2-T<U
zEYCgn+^DlkPoSJaNp-#3Tow<O#sYwomiQV>ANwx>FlmFi1;*A%XS}5&qihl_3)!*~
zVI)kh&H0O#VF+&4t)!If1hc04%yTCM2qw%{7v%NROjLbg9-3(TLZo%$E4Y05a!5BE
z>3h(i^=YLwNzH*69|g`)VX(}&ByS|rqwVB{)y^2_JsI_?;9j#VPGdA_reASe7yp-X
zcoM|XsgHN6JyM|1nZ2z$F%HIZa=P!h)jXeT&7bF9oL>*U<;=kR1-$L8*cp(H5Qdcs
zX9gs}mB%oSgAYduBaqN`Cj>}ZCuwTbyye(S;Y~9C0#bM+zIKjeFs=MYscqspmsqy!
zxolaG_~a~3I!=)od?|^fCAbn)8Q-YT-+*VM(XsNWe)P&KWk(5=Q)~lVrf7f*F92}J
zMRY1Uw==th5lK;)aPM$N^IfMtdW=nt+KO3_m%*ifi$>(L=2PBljf9^w<K{IfsddD1
zOm1VMl-7firWP30K5Wv=Lx06==D)eQ8MgZkaIoMFQk57FE)hk>5PqyJ0P&YH!;3;4
zH4Hj6kyPpl7eZb$+zO=juG^+?(~~gWl9@lsKubI;m~eAhqG9UWy=;e=3v#K0zc&la
zvMeD)m$G7m5!B~HANcc5>wT35(D2@OV`o6O9I$hlb|*XK^gur$>$twQcC1sfwz|Jd
z>4}sqBkj}pB50b^;w5X^DPd0MjiA6vSh+5Yo|U5TL<zq78*RB-c&O7dqZbn}H#5+8
zF&7ICPUy{V`;YF1(%C7W9)ajs8C}LeKuco*c<QOA!WbYB5RL%`_{42-4Dit-_Z-jW
zNrNPoNtVI1%+n{rt))pVV=`iqkS6P*oJh+E&Cg{!Ba&HJnaCMa8h?zFW;y<Z9wgu1
z-VV$khx;tq?vy2gNaCtxm`r%Y8hv9t%g%wqjHPvmHSnV0hFryZz>M5+TS~e~AnuXS
z$2DN=4ZqY8^^4(eJN)wfrTiDu#y{kp_!wHY9099^2A(szPZ~Yt>on5xrtq4I%|Tnh
z+C&|14jta5_rLcqg45psZTR2^c4=dyvjj{wvj9K>Pi3#JuOE*%vx6|n+B#&do{lU|
z7$~M8_1IcSnWow@iUB}G1|-uCYJ+PQB<qiY1q;0TYn1JaD9y#yxN^=8=pjK`iVm5Q
zAHY%&+=Ukau%5!i@QEj$2%U;!fNiqdZvCdavpG=!YbI^}r=?JPRo3-6t<#o-gpd(w
zO__pVYt6qkNhH%Rbvvpjm&(hMIMo1;|4WxH1<ijnTG~^e7aE+9{PL<|*Vl49td`UQ
zsZ*E0K0d$E`91NzZtU^i>6kiVyKiru@2owiyD@(`{B3QW?fza3>$wQ@g4pnrVEW}Y
zvzlL8fxp1?L;B4i9Bge}2xVQm{8Cr`D|GqN=@2|-d;3BXDz;Q0nL?`@!nJuPePZIh
z6qKy++wMPPMJ)fvJIx>4{RZd<><swmhj(e@(!oJNz++Q8nH8N;umo&(LSZ~S$TxM3
z>GU;}q%2GE3DwI1;<{K>&C;xBV_T_nHQpJEL<_un%S#X>mLtkLl)rZPw&Pe%<r8RO
zjqa3E_s~NRxhJ1|(i7$Ry6{54QaAw?M(jM!UhXu056u<;&r1vjPyI}Yjig$vv#v>u
zS?_4Rv&`MJYg(HbX?jgj#$?5i&0dLCyYx`>MQ@oL=x1YNV+izfRe{a!e6O|slEUO-
zgMk=dFHHDiH9au<c!cY2$84dsZHMuNP#2xKM%@_4Uj%iLL{K@$QU5o<1DxyimQja^
zQ3zrPDvvhh17~muO}xfoGu$rxg?q8=k$q&maqRQfK{2&g;JOvodA{Cv@40Tx|HcpQ
zb5H;w;Dhhqq0fJDGg}lV!4+VvERm5(k2~Br6iyIaT{#j%3JXa=)2_Z6eRb`pWGEIN
z%6Jzpopwq!3qZO29^FcmS<_HiM*lo1?b}#6di>(Wi$S?MbLI?jG{CQX<tzTJZ+$C#
z0C*10D(<?&vg`tY6(H{Qr($kF5FmbbXzgfr&R~;{Br=7-Xac91sAa%KfRvgF^=RF_
zjraZ9OVeT|*PcA7<%IISzXe#I8ut2ME8txl(aPc{K07go`$i7U<nq(>Za19k4rU)U
zYoD0uxMTFI-5(q|NBy`%0_TO*shz$s<e1JdS?`rqD&jv5ml@NFTgw7!!PVVlY1j_S
z2~luyG=@OXwVefn^0ivv=mj(1`1+x@oTg#DfA8>`*ATt)9ozIfUmnp!#0AJP0#Z8k
zGhzf_T(`E*2a7;_F-@GD&GqUPv$jH(jj5^5CJxW2&@1tPQ~`2+%mdsnYlm(*N=Ccy
ztkT#G5W)d>e{2O<_#FYuA^?PS6@&rCA37VId;IwE5CO3Jb0arCZZpl2>ZS<;iUB*F
z-pzyM(>}(PErIQJ5os?~QtQmplxB;7(;w18`yb$1fOLp*ArR2svUjHZ$!22sTVd=*
zqcvLT7^dqq^B9@UWhTh04vd$51kvl;9$d>M#<U`@6qsc_ymir<bz}Z<usE%(tcmBl
zS(w?`xf}_3Cj{WMIu>kZO(*-^tE-;g{jQgm{utlu2@gI<^y<Y;dgg1eAo&j8)`?ey
zg?n?jUs*X6ggGoHt-umMHSLi?gM2nk0G7bZX}!XLx|m+pEa(%Sd()?XIL~ObaV%}8
zR^fnQo$f3E!AhO101M6h0ZU^6=>8m-{{s&^;8HB+_@Wa8u+~i@LF{@8nM!D;n)6eB
zSMQCk>R{q5>WcX$M8>hn)IyRNTNBsRiID)yBJA|XOC$qaJ&;x*Ue|;Lz>R${g-<4J
zM42QFAM58}kYI+9;UA%vvnHD<WxevZY7@;CenpO|-0(e87!bmS-Hp<kQ@Wg$AQW)O
z*-2Q^*baZ&+ZVfM-IBq0CA??rKq;F~B}`@d7}gige#3hImBYIpAv%5LGQDyd;etm=
z<6k9`eV*vj+S<`hxj55Zt%QI<m4M0j(jqjYem2QRA-d$S!o*(nt(gMl9EIiO0kKRV
zA@CfL97-vGEdV}g1dA>NEQNR2r#|(m5hN;{0|4lVtB2zSdtf3k!|LkM*Gq$rU4kh{
z;mPPYq*Ic5uCAp?wXd!Q^0RiR%k#81@@(>XogeGM(Eh>v2dBSl1w01Hq7x2OiDP09
zUWp9vLU5$p-hMUssI09W?kqL8hP~G#Fi7W`0zeT5`!L2g(#E#As;%PQ_m^YkT<Pba
z%(rHa#pMv2QOp}R3d|ngys>eTR=YmHi>>6uh`wQ`LPV^SOBwL+fBdn*+3z6X@yEI~
z?e)vSDbT0wfyv>6GFeNEaDBZv1!^W;u(QQ10nYrU)J;pM7g*J2X_yUu#=U)J=8%X(
z5P!?Nm2tKJ1VA|8&ioG_fYQzgwCJ!*fq=p<!T@8f1=neZK%_9)gT<nZa5!RH4w3M$
zFZ)k9eas_Kyp}1;UTQ|*bWPMlbS3jnrqrh4OtzuCb7b;3(GnkH{KM=&F#l@@QF+1m
zq`lP6E8!610xfqlFt+oFh#v3$PKKy!!33U5^CSrmsplj{zQM>Vvgp$hus-<{Y?bYO
z*zUKTw*Kv25?cV{v#?Km7Yt!L{9!9$Y*QOYZ9B_yNcx@`q6_+9b?pcp>VyF>7~De$
zfH1JLeJMB_0`5O`zGGG|1oQs@2M4RG*ckw)Qp5*fAz(L-IU}8;qzz+vXM?tIgmA_Q
z?X*h7R8anvXHqz&-s@slu=rCeaW=maV@X2`7k$rj*Hb?D#W)-ZjPn5So5BHerdEXu
z3riCUm}4!=XFdf9;@Hf^{N#kz&4E*#GBcGc>uH+yS*Li}Wi8(IpqC2PTdovcmPk!`
z`(Ub-B*av*MAxiU3J)NZtKCdqUtgyIt|hR#TgEWr;0|qfOE6$w<8D2Fb?p{o-j<^&
za3kYPn{FtFrbK{wi(lR?5PStDSr80s>~K)@E?0xCikhy)<*Bf}kYc&sZ>D-;c`;zj
zD3#Y4?mN>y%E6JcZZu9fO+wQ4%`b?(R!-`WZ`eA(j%N@^u;uW9H(sCz?niqD{Jt6x
zcLwanodGcNk1-x=6t)MbVeGWul{hvKBq6x*R=Me*=^sPhb>(KHQN*;xq(y#Sud3>%
zW<I;~r4|MnTD*(P&bQohi^o|shz8gxUm+o^)Sv(S=OY(d0t>*>2mm;3>BSdc^f(C+
zg1`$eyg=e0ppFKu9m@>f8z!4IHBR03J}z6Z(mNF841zU?wPe%<0exEtz=RRYLJ}3@
zJ7*iE394ceE;v>}avx&+Uwep5aU91D+V;+7CyZV0)<_T0I+*^LcVT236xD=IT<4(|
z0!8<?8yGw^^4+{8kQwHvtA7->0mdy`Q9~J(+?l{qyT0ynrOW4(fa}C-DWLdxm<$93
zc>7~pB6Wh!cG@Q%w*5s<|Iv6H`zF^aoLZ|k`Kq5FSR6ilgx>m1XX&-C+3Ey=t+2$!
z=UEJvu7d^a40t5$42btDj<=cC_yecF&CRpqwnqWiHI7YT)M04LSo>7A#yy=kZ|f#O
zCmZRUC@-o|PUEg9k(Av{-&?_;<Suo(@#;}5JHPtstLiX1@F*Nr|M8E1eDs;me8!sc
z77~_e2_RSicnjdIx86$U&YcU_APC)n#gSk>YZ*K*P8$9i53(rtGAN5mlip_1sH9d&
zR9Ve(4TF%co=H?Mf{I0OFKDDd-lQeLbh-@E=8Xn{NOTMtanm=0S=7_|#))oC^?2C!
zrg1}hI16FU5FnI^06#FpV_p2x<(E5?_}Rd0!Sq&EH^Pyv8^L6Lywk#tc2<Us<Ybq$
zeOYfG^Oui^=L@&Xlb%6^XoD%hssW(^mJMtT3vF3D83d5%{J0hNj>C+PZ-8KgZ^~6V
zSM{SM!XdKXdtMAD`e8dOd|U7b;PXE^T0jFF9K88KqPM<zJB`;?T>jyc1BFGk_yb%H
zG)VJ&#v@5ry&z#uq~lkT<~j9O?5j1?r<}tnQGikoRZoM->-On3l*NkpT;gaa8XboP
z03qmMfd(%Ez)HPr=K(B*0Kj2@0na}BY#i3E<C!3Y`}mBCN!E!M>Vhf>GbPP~Y;dL^
zlT$cTm&^Bt(6(@p>+v-~R#C2;&07k?S{&Bo-15kAHw+-mG8jKX>O#{6BV6sYA(y<V
zy~Pdb9OVT)rD#bg4F8oBWp5{VWW!$oe=xT~+ro~4CX<gQ0Zr5@kc2N0%F01C3d#j7
zU6~h7=96{A?T@3;YS+e%AQ(hJ=tL(htcN9vt>6F|1%jRKTfHzZ=ee88g0tTT-uq$|
z))&GBh%U4)EP17P>|rbhI2d^Q+lcOZ-A-@{jDl!7Gr$lc_6LCAn`v1UwvQC;%g)mF
zxum&XFk`vAAZeu+G?E1fC0q-9&A2Y-gA+f_um#h&T&12*ybUlm*}LbTe?Gr^^2sNI
z&zjdU-WL*<f^$IP#A$#}KmBxA0+4`n07&b_1&Ji%^;Glf{E~B~d`=TY9dAAT3HnQ_
zZlX5vHHhZDl2a6~kP@_Sw-rnR1Q+HwT6i?OaRAH)Oee-$SnU|mu`mXsksy#g!(V8q
z3nu;T?YPFhxp^kUvjB5n@0jMs#_>*|IFht_fd`D!jALq9D?M{I9gz}-Q~d$UEWr~(
zOLlO~PJIay-nvTbYt#dq#CR=u`UmI7wd3I)&V`#>=jd|x-RJ{YQCM%g=k*tY=0DBT
zKZIlLtOAGw0>2kTWy2BS!`9b>f%m>=LdTA7hH(l}AK59;Yi0}qfSoqOV_yrH6>T!8
zB6KUoyGt6UpIe43(@G(N3wA^rztu&&R@PzPZf65n06l&Bw7=`FyTUUZRtF0}$U4LU
zI2cI#F=1YDnCMvl2fzXVjQ|5NURi&=v&dkCxmcL^l><&PuSSsOC~AS2l<~n+rBr3&
zOy*q!xl4iKV?w%^AWG(2!3<Z`HNaH?Fn%z70HlAcMX!v(aJ*I+!niR3(watylDE4V
zG+?|A%n`yvSS~PNiD3(Ro$W{oy}Xw`mz#0XAfHT=b~T6(OG%BupypV+Eg6+N4Sr0~
z$q&x_6TcIDST;MW0N|S*+@y!zvbCr6eU5m!wY3%YW@C&H8XOja0rm%Lt2j7lLcj8_
zFL$&xc$ZKJ0iE=2sn@Qp9_a+Rv)!2P6h~LaEr}lrj|EL5rtehK#gw#}ov4S->Ss!y
zGjpks*aqIVctdKl8;y^3Z5}c2!4d$1(Lp(d4*>N}%Eg1Fu^q##g5wsSdFGjX{3202
zjVU|8Nkv6w_S8QlzLACro%%I@Ugx*8SX!jKpHgoVU7SExuhPY>WfzjUd5H6WwShH$
zXqnJ9A#{TI10tVv!KpI`h)NWP#|F$dFu9IMZgm<j#&2!)7TP!z&HAw7NNZ2FfLdbM
z#9{qFDPRn)E=DS6&gPI>QpR1c@lngiT7#3XxgH@T=U*4M0Iqe*1-IOCoZkKD7Ttg9
zZ0KwJL7Sfz?%D!d1V9TgRyYg8#$p&?e&90$Mk5HUaZ6|rV3idB7cK|^Tbr-u<)q4u
z$tquxrWwUMmz(KqA5o|Fb9wC=GP0;gk=WK8llUsjr13Fm_h9~@2iDXNUgh_Kyw?#H
z-U7hdKQ7^50U-bufRiUr!U?FL6z2Mvv}L_PNV6g>6&Zn#HgVRT3=-J<EWygMDy6BK
z2<CZC=e2+!USeN)Tx(sYjKxcVYRd({J7|>9EWuRTTP*G2J(R~IhQ8hE2<PPx`W6T7
zz(YQ#p@JWh8fCn-_-1gndM2A-aKbtBTnP#(v-b@=v~!~5bD~Z~z*~8|XsXu&!QsQZ
z^zL`PoYwnem{|y2tkLuKRu;NufwTxfkQjCi>>o~^boB7sx9PWj+XZV4e2wCg)TnIx
zBntEhRxHGlK&W6l{)#42{Kpx=)Q~y#!WsNz9Ljho(sKEssH!32S{7;PvZ2q;SQKO_
za7+g;0#Cxu06gR+fMtM%7XZ@wfAFyU+Sk5Dr%s)MBVbrxLFad77!Qfx(<F{%i%6<b
zWb8@OD#_FOVulb4AaQO-WeqeRjG`Dmp9gQMB(BC|+o%>8_~Qig@Y*XZ*X2Bi9P+_*
zjBQ|8PgT1E9Sv}R0PPdZ2Z5EKRrc+$&|^mtA37~*a|e!5li(Ww2fn-|HQ@tgA=OYp
z%h44h_i4;sQ|Kt#SHef;wxmx#qbAPyLJ$Q`E8Nb0170-O<@8*z6U@Fy9!9oE<sLq~
zLmznWOVxTmm_GiX!Nb!Znid#182h}jE|3<?p9KNr8_vhrJG|i@qVunA)7PGjF#-_k
zI))1#2`)@Qz=bah&auGZPvmr5|3V`frvw)#5=_`DyipuImqi*6Mfs{|u8&-UCouJ=
zNS~>GuX#hPLkbMwJ;2S=r%#9b4&Q>3djI|RXQh=E9F{^Xh5&#)mbgYI-*wkrKJ5Ys
z&k%%RG;&^@Y9^2;@a)97%aOlgpQo3hcLY&O=dPDi0AnuB<AqIA?k0#O`NZJUeX5@=
z3h^(tK^}qWgv9I8)7YL#2>7$M%7=0IQtxtDn-(6Rd7e9WE-(#vnuB3X+g^#jAGvt+
zG`3@2zH}y7XjWG@I&I*#AONtSC=YXrz!=AYs)(p^VlkJtEvCr4o7#&f36IJFR0$6o
zIKwz4&o?ObOmZ8zJV~edj|`Di<^f+UCU?H(GJW{5=`sD8>9eyR#$_+ge#nn8gqapj
zmdK0W0rmzDy~WeL_iTlwEKe~O1;>Msf+oT{V|mhBJQB>`NSY*LLwLdbSy0Y`GvjQg
zm98-4RCL*`MVVH|%knTUX(efa_xpQ+qY`HsU!4?YUEnp2ccQ@pfMsbcPjhoPa_yqS
zGAscE6vJ`AkggC4xa(|}-i`1cjz7_l#8Kw`p7mw{S*sz%*j<(sbFE~_zS4+d5|KK$
zmCDZ5jP5ais^9sV_-5gkigw6K7f%FiIfI$aaroGjsAen!>=oGD4*_7W4rJI1O*@mY
zmW}=SYhhphy08kQfhq$^Hg+Tv$SVGXF!MxBg_Jg(1=lQlx>8-%-WgxLz)Pc?a6OFU
zz7CHf1!eVy(}qve1X)NfRP-&P`|o?TV=8B9FTkdKFmTLw{DHa8V-eu(gAgF#8!#*j
z>>b|ui0?GrO*(s?qW6FIY#jP;l7+so6wv)4)HJq&mN@!C!O6HRa4h_Tvb?6TNf|1p
zddAPBo%@(bTLk!5(?-ToC+DTWUCa}&6huxo3WAQbxOG;Ld;{we07@x-u|y3Ez_P%?
z&j3hUpZe6Na2#;x;Jfd>JG2>Qmu@l#^FIhdXLfqMmy%t;wqh!kT1`>hi}od%I0+3Z
zKchkfxxD<RNTnx9CcXS?X<beV3v4fN3VA0fJ~!gWJ2oG9m&+X3|NeosegN+;p9NW9
z2;S?HFfOaBw?xyYkD+s=2EqK_TBsx==kyY<<oGgXkicDQFVMkDNLii8NnzMxj1xjb
zZb_=2vVailDOM{lpL?Gy2q}MN`YC_1AiVvpXZOtfCE#R1Fmq^LSRUAmMSueep>NsI
zaDe#$I|DxS7=>jU__rVc5TDZ^@d?48;x46aMRT*rnJq+up$1$M+YG5#S;-5`X51&A
z>7dm5r!Uoc-BM4PPRpeNOHE_6Px&C6nD~0$5yY`*o`3#%hvjL8Ww8J(O&}on0Cduo
z!x8|Voj7qK(@zKqVUvn0gqCrqGBIg`Wrn!dpCwP1392%m5-G&wGJQo_r3C53L||b=
zBCEl!#vRORanJ2Yyz{E{1MCejZV(>v2ZjR8f10_U0Zao-2h0b7c~-`UBI74dcFoX2
zF`pM&=gQ06E}u>1C0`a1oG)oPpFts~i>xeOb;C;Ab32__zEX)<JaFY(1uE-4;O&Qw
zR#rVd_P&?tzEc<IAb_>{y$S-zhp|Rp5OJ*G`oV%W7hq?=M<3tqmc2IP>482S95nHX
zQN}RT(EbtdiFaXS1;-VE)_Y4SWtw;|Euqd8Ok^#+2&`TyXPkzNRe-z|1-zPna=UDv
ztH}b9c}fuPz49jkFS_H7I~;t|I1=Epz_J_%j5C(qLk~SfEC4WclQ|y?Z09i(TM5}p
zz&dwk?!H1G=gKOQlSG$uEA77~uB`oN&95L-ap)pdwy*<)wPkFVf&dVUvpxfk=HOcc
z2(ViJJRat}9S~;ZTW+}}X_703j1(cVXz~JQGVx3Iq~G<8qh(O?o`u??Wr-C}Uw)?v
zCeo@P%k;KRXX?r6ZGD2ocK;mew8jsA@Gt1LTMu62mVyBL2;g@XV9YW0=ojF^;2SU>
zKnQrxyLV^@@ho;W!|s4$1&WrhaLQ#T2q3zp1c`o5SV^^DDw#CrD+(LWCnw6Z>C3vl
zp~}vxSdDvu!=7XTZ2u}HcFIH81#srf8B<Ewp6>k+3GhH5k9B7S@Hh(q$1irB?#+9Z
zAtQy?&%(7ZO;Lj==4Amv;3U(Hzfr=J4kpgD!4lD2HWh8JNV%m=(X16uB!mC}6SH2L
z;2Mdf`FMcYz*^tDT0gWHjst*oV;Khk(f_owZ)A$2ux{wH*1*uJsVsLowzP{iP)BwN
ziwPd5Q(9GT9j<}DjYe=wjKx6uy-cmAaee!3TQi;g4gw$uU~Di(Sf0Tc&x!*OcOo1P
zdEgs>d;`pedtUG8ZEx8Lu>m+JSv27na=N~DEF3?LrGgf&ctJ1avZ`FtETU~Jn5{`p
z5_2-l-<gwEol{oaRvy`gHbje<gpQ6l;0}}l`~1l#pA5^^^H>zGH!QROz=Oi;|D6RO
zm|42X9_}&WL736$BZClmxSW3GBxBd8l7HEp#Dq2%<y<Dp+VD>mBk|(j#+Q+xsp?yq
zxT5rE`adMB51NSQ(gyxN6Tl3>6rjn^tM!8+fce8Opp{22D;2F~Vw{G;JJ-gBVht!;
zKeK7mYcJ`Xhe(+vB|Y?}&Q{)H*(y(+Sj)GXe8FZvy&hE}(Hrl(5VrZPu3kF?<y`~t
z6@WEh9;VK|0lejK2p}{Y-u4jDn;wXV2H`LG12|#DaxO%C+}^q{g$hz_Wujv8k7fB>
zUcq_aq<NCIa}9RMVJ}e16ZJU?(GXf&O5p<lB~>f|EIegiXaPWOT|#uFuG2j_89^7Y
z?gHWrDGV^_#WKT~r&~dj*@v0DPG20KYT-aiuJZ_fQj=@71=2H||Kbs^socCcK;juy
z%w_-$Y`zCm0HL4d{mn}RU<gP*Z9AacisV^S37UsaHxK>MrhQRILzs}ivF=uRZB4ym
z>|R@@xNPTi3#6@ss5Zk&c=MaiUy-xll>*0b05gXmFpH_P1pzn$Tp5-GX2ZK4ah-){
zGg|^eq`*mB1_17WcEO4Xr@<B|i;<H8tN0^+rZK!Z2h1eUIGpZ#>TiN~RXP?3I;AtQ
z3g<gs6yEVkwR@@Jdc!iD1psr(NIF(v&t<pH*Gp2RBZ=RK5N6G#g|X{^LDVaSihHWH
zrP4O)l@U3rmJ&dyZr5~6!vGxA2d27NOn|-nv4)H#f>}EPr2mL~mad4$#r300Pq_t0
z*cznUX@053vz)f4+){eG9?8_TZ;TO^%{8?BN8WLoUU#Qn|Ggd{xBr2uL(|V;!+QXN
z1M*-=V4n0hJ78zPFMh6@W0!#$BZ6@@p9uivKpMY5F*d-~)~n%=tG1OiU3*f?X>!ga
z%fC^^8ifdOR0{-^5N;-ookQMGSAGvFA}s*I2OtCj>I6ym)KgDQ*mAwdu;@zw*}4e}
z06O>XyYCJ@0H;r%4)3r80N@X)iQIf3ELWv|G9LcINlGmr7)yZq2D0B&YyI{<g!^@?
z;joyheO#t6k1fGc({%yh1_Xe4O#aaLp!GvYfaW;wc0hm>46}*dv#{ZXCL_Fzrxr|8
zP3`u&O4oDUHiIS;r>aL=0%Td!>N%X8{T6}wOJEcKyw(8V3Of}teHow)*dF-N4<kC@
zR&@Fc0fM6S1ncH8Hh^{rY|&aknKPsWW>0Ny-@<d(NJl--ywSkglv}@dX$b(!&an*d
z*a86Fgy(nMaff&8u(05;=t}^(_pu8A+W?<??zwP}PS*|?yjJ6>t(!>gSrg)-{pkFT
zgcDZXx>7^VV|6j#ye^2xb&`0u8z#%j>LHRI{`F)K-?#8`=CuV7crbH}%RDB32noEL
z1f1H7^9~D~u?}yXkcf0xDALv+1wOBN7V*+*{=RW7>#~}BDMw%{DV%K5N?+RHwVkOf
zd1wdNW5>3;_5PQyy0hPYfvo}1=rPu_d;^dkf+K(h!vWgB2M0|iTeOq52l@#|QKb+g
zBSaDOWm{mOdl4jlI+7Tdp33!r`fVvQ<R2Bh&sK#NU7q%w#CuVXhXr6lCLwi9Km1~u
z8o!<PzZ7cOG6Vt&K^5Rx*?1S8g-&<w7%6A0)<DWFQt7=DPjd}AttECOh-zz;^+uN3
zu+$bQ;yt2hVauP(p5!u5L1-_$QuedVaiaoz?L&hH!Jo#=IW33&-Rlvu#R<?S$3F^Z
z0gx039Z~J0(IPaI%<dYioJy~C9*ajMl`|!jC}-taLy%Ozq7+X>IPrFam8Un{eW@AK
ze-S}C{9){;F?QZ1fIPF(-t2JaYaG4v9XrV>Fa`@EAL9dHSIo-FdRW%1@xBO~B%WI4
z%-gcdJrVU#K5ey~P#fR5)RN0JDY_SPPzn>l`W!OA3h?5KFNRc5QoC!H0s$>NVP1Fv
zfEgV+m81-qVlj6U)JJp7=$Mc#iJC;0e+wa0>zmG*w48dRn7YeERiLBi-<m+S#RPAz
z@i(E}$xiT|C2!kNn1n57Z~~0LW_+yA&04!<$3Qms?k6m)jEfKOj054>C_VFrOeK6U
z+Dy6gL=`cN<w{|IMN};yl)QDF)U+DNv(SsRNfKqJ=%+Jf!rAYUcbvJVp8i(~?C=MH
z4>$l^m<GTn{0+{5)8WkzI(pLsaVcQ8vj*%Y=Re@xc)XFe3#R^)b44-Uwn|m7QLtkn
z5b!#iYf{6;{A-91^=8_#<4aD0ksIHQ-{gSqlOmd}(^vvnmN)>*-~<@p3t#vGe%z^3
zr+hjctP{v52nFoe>{12`;QM*68PGzLC897j^x?dBnZ-LV(uh3SbIOTwYw6S0!^9k0
zW>!PE-w|m6qOMDR>tF$(e0(fS__NmgVe&^j09Xj-J!ZBK0^OPLfgz-MyBJ%(C&S=t
z6J)_VjXNzdLc}i@zh2<J!Ww7llMX4q#v_em@d?+vvNEBE-g-7@|H}$s>KK2}lUWXb
zICB6y1mGNqbVxry4UazJy8hgzKmYSlq+=Zq9F12J&TMq^^kUbSM-whaY-$OsddCyv
za}s&^I=hH@L2~8kti?v=d*~?FCFf6RgUF|7yU(V6-~%4se*5it=LwdpRq?dA1bWde
z00^ewc?kfK01*cO04Km?0SM{vPqp^ir+9}ysRi{(pI!;3vP`F>S5v-Hm+G<Cc_P6M
zTWO&EuOz~vd>IUr@B`DAFfa3mCIVs)0YC=yxe}0}p0f!bLgPfnGcTe*$|X?Ej$;~k
z#+}wMCmib+0G*H{q!iqsMc(F=I=;-CCr*g$_ks7lywuE}fh|C=0?Y~vjI_Wp-YGIb
z8{YSxPPkg%q}`pU@rS?ezm?7k04G6gZ!S<XV-EGJUQ6q&nmhG)X-k(>(5R8Vp-6fC
zJXASyy6ObLhZ84G_?KRK$+HC@Hn(9;UO-rM0iZIYAPxY4Juj7)ZsxnLTa)!|=Ikg1
z^sz7Guf66cg&^ht5$PRkfBl4=GCq3mt7qFG;vZ`uvVE3lJ0Xdx3U5k)CJ%zld${|6
z0|8;|vEF|WC&6JHvG#AB?t<pyDBxfK*V5n9i`Mul*|hOKkH5{<z+)vyc+y6EflJPo
z(T3y4c7n6t@nh4?=F0@^@Q1#JMmKG(pJPb?=Z46EZNSceZ}|u;ESn)@@ML#4`o?rP
z2IuDJ3&0@oIMGJMkTdzy%$bx~OKbeHyj+fbJq^v((n2bJ65GPG8*~S}2lxUW#ZG{L
zC!TmB6|?xXd1(#<f)4=9E}aD+J6teuPzFYpNQ9{^&HHIY&WV4;l{W>bM7~baNX|sI
z!qcvym?StY4URIklq#(nd>=oQ&u_n30h5rm`DqLt>$be!&w-;33P!=q8L!KlWI@La
z&0+FXYbZ8FpT_PRdHM*BD}+>71PcC4YeNyFUwh}}rE&J#5AadYvmE}|hkz|l1Jr<>
z0U!FngvO(-9MlYff_Q6lwgT9#0F0+n@2Rf6GRH^N+)=f~W*Wn{f`z<nZBJ0Y<sVo8
zu(cF$*IjqHjvj^Q!)bsA11_0Z9ee<I?<J%y-T~09$3?V}<JdwRCW4YiTck+lT5pN0
zbA3EnHr13x#<1Xv3CRc)yHfj~A~agIa`UqO`I$eT0grXtR`|{<fs9`A!pdZ3=_rH6
zCzGf7T-8<I_o9xlL}-hEpn+3}CIo3X`@QGU(@W*-w;uqGKoH>FA$>3i0kh%cNk{K|
zIBpM&0>DnN2EZXOTLBc(x@zv!c<I{ATUrb39I<6_VRoKPo1<o8NLqq*e<-DEYimJj
zO#W)Ej0*<~zXX7Pj5Gyn00#-e<nv)L`UqhQS%frKA)G`C-He(bON44|@b0tHi?-rK
zEo2=-`!`9wDxaUFMl`CsSzun~FQWin6{jsH)+(I~%K?GiOS3v#oLWNXtihW~kaRj;
zH}$iG20>8sMpj2{qM6da?V)p>0Prf^EC4G2ul4h}?=2XFfZ1^WeU4uD+U>X$Fxd|N
z0M3mAo+5m(rL#PBGF0qS5_#8rmEZRX)|Q@In%@*8g!+}9K~PLifI%sRb$We$-D3#=
zR%`DoDP`ecX_f%+`<1VJ1wUcEr8<JqN{TIlYtwn(WVm_lsa0;it|d`EMUasv2e05X
zvdnZHPWwZ=e;*s5wvb!lrGZKdEnZ#)0I{8i`GaudZ<cSsm4Xc36M?Ui3AvA$xs=O$
zz0za_gjW8Fp}UN;j(P-+gzq&`^EDw%u(CP{&VKiwx<EG@@LE3$k$oVAfWa}ahOhp?
zCO!N0OP%JQjt}mv01yJO6oA9dT#HdR>9%hO&xKxk!^q{lYL?rYTTP3D&g)G+dm12?
z0AK-FilYE&Ndy3NCZsCt0=WC`yTfUKZ~|oMD<;A}1w)z+Q2{C9Nb>pSK94Ge(3M#z
zkPckZt28tA27xD`p_2&O(;~8vfOjs3dRDM5<|KZ4P3*<@Imu}9BxpzmHz2U4H4pQj
zM@Tp*C`N#?jmQp(V)iM3P^LKvFP)+?qHsz4_*w}AS^zD|kyr0qkke*D9MfW*|A`O1
z6pr(~Sz%rcpVJMy12lZ;cP`Lx{N}5H>4y^ocVldTAOMV30?Oez3&UAJmG{1I-8_eQ
zmZeC8?DIvw&`-HyP{78v5E6l)&*j8HudN1BXcuqT2LQb3nP;BCc>qhJmo5BR0BIl;
z1$*Iz7eWVjlg`5o6U=mpq4S{1b=TqrTDUBmOkI9%M7xw$o{P?0x-Hy!PEpdbRwj3Z
zc*@$^_6|b+9_DW)4m&~4m~+`TfosVEOZ;bPu<Z%}j(lv+Ul``kjGXaoG6_2X@HcW|
zJu{D2N|;Rc>7kWf3qD#6CF6h~P~Iz?$cZgJXY-a<#IH25%#qt}-J}nH@WmU}*>5kf
z7a@R`4Q}>eL7SVNKKILK=;<%L8XW%+8kjYI4*?)Nn*`dowH^C3Q%GOvXk@dzOy$Lv
z+ejy49-rrrmkN?p+=LJjV+2gnZUZXsNh@xas`4C*BsbQ`y8z$=0KhgtD5>B}Vf)&!
z3*gECj(+IGPkriBxhGdvR{WiJ-WkRNjyk*z(C2>&k<Ek7nYG|XIRcd+qTURASzF~0
zE1u*T(<Mf=69hh$De)?+T9MI*^)Gx+@QQUpYojcn2x%o3;qrvYQ{;vRcw}RuA#l$$
zbN;o~{AK(I2jVaq1;Q5osA%_+$$Fu|=jXXBC-7L;k+pjfcOY(ObtKVF<&#>GE0HT@
z7-_aRFuuXu|JeIpx#6At_5$+?0l-b*=*=89`24H8^m9M|3Vr?0k|iYj$@nxcSPSP<
z19$Ur!FJ9RboPmd3x+P|iX@38OQjeUYp<hnrbWrfx>i;qnTJT>I`b7$JBq;XVF5_X
z)PZL$gJpK1BLRvS0KV(k;QWU(mvI^(d;mI$+jY`*?r6ony^j6F7J_nun|Ld=dSOmw
z>p+Q#?u!*6Y3eF%F*yh=t_TB#zx%K?29dGK6UtFp`8#BpYOO3MbgB7^FKO-<I9@+T
zZR>+;&isi2K^u*dK;jsWMBPG0b9Lr|mn4}oWd+_z9G2UhBqsS%jXb8uWvS5%%0>;-
zXvy>Tx1Oi_@4NZd`{w}`0)UHvzNmq~Q9f|!TH@j&z{@Xh(=Yw<S$g$i6n=^|40B`@
zoc}0=Rgc#cc0X-*R)F=jqeZY4)~K>n7Oeo;;#SRt@~EJ;r1x_Ecp^z_4WG^>d#QM(
zlC?nZ(&1qt957n|y0Kpxg1j^qfX{s9Gg-QlaR7KbU}2boPg*TmnwL@{9hArC{P|bB
zFWQr^81NDSo9+3nFG!aq+l+OSU;&5%fUDlyNP&`oOTsPJ#BNmJbJ-D!wGXZb^9LyF
zkWU&;X6O?k7?M&?y!n&3D}{o%RFLvy+@eaFkeZW51%2<56(A-qg{*YiKD7RU`A>&=
zEdjt){3#!$8yx=lnalLqUpNz)zh`rO5&%Xx+I%%={P+#K01{&2+!LR+2$J{4G%sXm
zmxNtmDPx{YROq^rYvo+<zFxXz>sg}TxeTY{c7}xoVQyghj2&ngV<@Antb#{j31El=
zZ~(xannD4qcN_^2X?UxSWQ9x+`;Lal1lLK%&d;1}2St;RD?~J9&H;zKJYv`>b5pJ}
z<=x)7*qK;2L~9J)c9uyb>qTmdG>0vXsJRco@(ycMeE`OPJ(z!hgb#UDhK~d$r*MdA
zRG+9OpDPhCD?)`h#Y8>}u@WbpOytd6AQHk!5jF>BzrBGJ0;X*dgvAO=0xDAJ4GO>A
zt@VHLSI^Ss)+93R&MFp}|2Q)LmGz+P;1~Dt{Wvr8arZ6+Lv#8ROD`CE76!a8v|129
zDIiB)&GG&F)3Ctog_%DqcIpfDyh(2zA0N)V2ulDAg8*;<0PUU!JDmmyM+?vI_!pB+
zYEqIQ=HVhE5K=)DX*tN6RB1%g^e9WHxeQ-4{q#DOlPPzu)v-AW+W*dGHxbs9bwF7!
zyp(A5(nQ@!bVCA&E8??HWByEpufM6)WeY$!#+kga*wjZhTOm_g0N6>CsNh3hu@nNR
zlS@!VI-s%}LF2qtfR5g>L+^X{D}%G&9)X1b2%git{=pz{?C>-}VVMEWeqa1I=jgY8
zCt8kN)}+DwS3+b{Y;lgve<S?D3Q(AT1e_6ovrw?yS7^1Mp{~Ybrpeh^n%{~1&QX*0
zOQ_4GYN9yTbhK7Q@>u{_0+9I@9TxsHK<>mZeBleB!%v+$6%GW#VL*_+AYnrQP!=B{
z&B^T{Y8i-=Hf6n3H$M?&lr|;AONBPe7A9b3>N~qzVKT&c@vb3l+P3I#sq3H{4&X)2
zT7BOn0<oB9SHM*TYz0l`OnHb&AWTYb^JSer`-J!+6p|)Xj)eT+w8rj8Ybu;3<ENLU
z5$U@!OK_0SH$C_g4X{68uYZ_;q3O4rQ!&0sgCnZ(9s!1K6o9ke=YHu7y>fax3cbnN
z3FaQn{qYaXeKcMR&dzQWeE>$mSymM0aAM$UI7S#g0g?%OvjppeQ(<<qY6#ddFBk8q
z+Ff9-WqL`^twN}l20H6=+692q09_{p<Yj6T4RB#$;XPEi^PhO)3CGeE0s*nvg*`D_
zd@wUiN6Mti@A?EZt&O)CyS$b(*Lq~zcJ1z#u)i%Y(P?dJNxIhDyzC%7{`(j^I|gFx
zuV#>+nIHyP>@cRrQllgq>N=+p=`B%Ws}|qeJl^-<zb!Xq{-y_CrFTAjmIk;o0Pev;
zHSGW)78IbU+$cK(f~WsyU+I|t4)M|DaE4{3k03wL_M!d5>^~07e<dvm=sC@E1!)iz
z4Zv2f)-tE2z3G><kdVt3c`Svr%-=#OyMJ!N`kc-Jz-fTtnX~}-CD6{6#sbhW{k#hR
zfq*y?APWGOF1>7W^7+nJQg<#&{>((4zBC~i#$+=&h?R7F!BX_B@}zXiFFT8fO3!A;
zWivnG#6A$#K4OVJ0>OngwO4}50Xq%~HaE{@jR)h3nCCk?my?-ul(c;^<VmdOH!L~~
zQEQn{8TlT(U|g7?4FRyV4>A2;bNgiix-2V>Tus;yj-dab<wI!g6ULDSLMzfN6IdL1
z?tj^$U;mBs-SV8z1Ts7OLG$lS|6$)hg8g)VI1(81Is5VpoVRmed;8U}J1Q)XQb|+W
zvs{zD^33wow#eEtZCi{{qF4Z^o>%A(<~>kGJ9^~VQ5iw&mc;_F@B%=RqOcoa1nCOv
zES)gm(CJuTV<vFu&~2nyG*OF9N5ZOVAWdXzv?@e45@ilDB==LxwI@=pER;>emp9LL
zlJTwiM69}<uF)=Tw<YvC=gE`$cvkjOp0<X*Xn;w@=d<?#1Oz^?Wj}E8<ViXxSX<i)
z1P><8RjhSF;~5_c=FZW0MXcQ?3=o4?ggt2mkVHl}m{#BnKIpP6HUbVG-lg|EdWMc4
z13pBYSVkPV7O)>2frgK9n8x7wM9O9AZ;2yt_WSMM+3K|UTWD?FWg&OuR=bt36&iPp
zJ39PzT0flq!rb63fWT90fG3i4cYXa>Cj?+g_c+Pzdy>vFWaFEc0s${W6gsWxv8MGU
zk8Mxuo}b1tL4(-AldaL<P51-A32+DmbU+{j0WrV=P;awQ#{2jrd79=u(q6vzB7Gl}
zy{D{^mF;&Tjmg7*jDV1ck-sp1nXax=JDx-}Qm#i8=f=J=mJ4{@e;NUWfPi!deWdUH
zA+9626ka+KLv{#_f~g#*90;XHH7R5%-J5hd2oxtKc7W1^)t3z6{`j#SdhC6t!|woB
z2Y{2n&1u#Y;5Kj^&aKM^zy6!&Lu5YyYy7;$51M~8`7`rh%ijKl#aI$98Ew3V<qXjR
zC%ZeT9#_#>{`HycxoXL+%spNHbms4gDq(@|ok&-Hc^3eD0brfRE&yHv7<K_13^dPF
zz!Cs{d2KC%W`s^TkrXmq7uP4Hxx{oM0<d}2sTBBcL^1%2d{@AS^{p8Bs;_=6aTdp~
zh=gMM3Ec<`Ujm!;p=I=~`-5nLc-q4^t}pO72#3z@Z{%kCWkdl_868OqjJ$+Z-WKO4
zA}eyXuaO+ie%Rwbc=}&`0B%lW@X{Y(8P<mZF!lf9=g!dY|2}*AC%TuIKYRNJ?SHfp
zj{1$ve?`q@iMtxK#=R1@1YQ<-;yY*PWA?QojvY|b#+XJv7Xk#;mR_4*=5tDD@Ilby
zZR+p^fL|H_PJmbffB>)zW4!Rk0ozW5l!ZuuFuS1Ri#F&?##V8zfrFZODnv`kBhr&$
z!hGMqR$Xd!VVlcpiu+%JP)h04*Y&4VMQU}GU~wY=2qM<>`;P3zG7r*Ba}2yLkhcCs
z3ym~CdV_y<+$a&P##m+xl`vm=BV4MYd?l8~r=Gs)P0`tJ2<v-o0JsSpo#ya|B>>?1
zG8`X#{`@Ze%+I|-FTAiF&}x*le&YT8So&Hy6qr6}TX6V;{|ol{i*j1{N!7E6>nIC;
z06D-~fSO@5)Av;!(t<{&6)mCME)TTY$JAN3p*4om0?@KtFB&Yo4*->s!Mcm@)_^^v
zYyd(ovH-w36VWs+xooB?3+Ya!l^R%nVi2h?WQ!CcUzSi^a129`7!DJgjl#;yL~4LE
zz3ryHg~zfffU$%x$FXC_dedOtAHu=3P{7v%^r0bfEI{~2WBshgl5%QbPLm}~i@$n5
zb9iD<q-70w+go3yHx7~gt~mfl;S2_x?ek@Ur3+4juoNun*1&IkV~albD`)7^#ofG=
z6qmbJvR8lbbs0r3|KRvK8l^N;WMWGAnNU40V-o53yAs!99XK?IT#5c!>}5XR=d#Jn
zfy^=FGcSW*>bhmy6#r;D8!`=U`Eu|!KtLx1EWG5s43Plyuytk^hg|@CpH5v$L(K%0
zWR^-YWV9(s+JXXACE0i0AYZ9plYbwgT#ZNTb&0BJo~H2r^v<HAoD$0iV3shY&GMcG
zEUe>H5+aM!QWEUE#mDr0Qk$1(=*rky9Yl?<w@(sYJ6@MWWWSdO=6@Xl4q|{le*laX
zTM8D@O#GFvUJM8NY;8uLr|8=<4op5cy^dB>aGygd*w1?20T5#XG-y&rZ%=(13!h&R
zmH~Dy>n(qZzNaURT~ki~!%1nXmgHm|B&zXZ=|b1dW6U7J(}$z$mjxC+5+Gl5r{Xj~
zeg+FbWrhi3We`U^D{`HwtLM5;`g7!{!YYG5vD!)476bEGd^m;CyB>jq1#oJIHeOc1
z*g*s8)9fK2%yJC8uE3g3I0l%N_!J&kELCKoQ{>_6*4kzLL_Dy=?~64F>->YW-(mo8
zmYr5xu&7G`*y8tVzj3bU8!q`db!PqO<sbL=Bfw8^_8Y^3F`|s+)94%Z)6@7a(mDtj
z!D%|t?n+nl^=!$$S}h^kxvH*K{Y_|_0OG5}pWu+rJ^<YkfX5a<_5twTFEqhkcqc%<
z;C+>d0tjh~gD{ZLjdKpEI(hSqgis)$xgWF={>t0RsU?|kErZ6||7f&cf01u~W}E1%
z=I9j5DiDA$!t`$?X$jaiHO(>bdP9~oM~+Hd@%RElK^6#=KL|}FDlr2Ixvya{l8~O=
ze(QG7{0C>h#Q^pPfOBh~mJi&<GQcA23P4!jU;5=U^!s0pmZMQV$!`?Q{b2l~@p?GJ
zBnSfIwH)y;><EyBSq&V}6kdtMIzh?a(NS1TcRMRUG75-t<*yMQDvJS$U0>;U8i6&B
z6;I)$g6zYBgYktw8ba0c7Qjazebn(T0I9?-A}qQOfah%hDY$~?P{1695k3GphBPCY
z(6D;JweOWxsj|5HVybnG`tmxH%UcJxcP@47*U58T3L^?F%p`E4dYjhN>)mvTdr@JQ
z*FVPu=yMEQ2ngeyv=sLIcL_)2!o46MUXCA?Yvq!{W1AD`v=-iY-^Fgt|2z$_SO5(l
zL2#hux3;bVU+|nQVb==~+3&MIf4ckKAwFW9g4ur*gxgWt<G&IP^WoodmnQCjid$P#
zVM1MZLL+aQRj1JE@ycpACV0gk(lk$jpqq{w+Xfr;ws?YXr>|BR#GFmbJNfPHx8IH(
z9`H|doi$)-LOw3M4}cla>;u4&0L80JNQ=o_7A6ElmKIn7?LmNiuLJ^(bP|Efwv*(J
zIN1%iSQ{((8+m1UE@^$Ty^h*~gA?DfL;$n?H0I8x|HTiOQlv&cfG406YyUk$X3Qg=
zN#;)`jh~($jDKMMiwm#<;J~0hEgy^@xO2UtF~0orHhuONPSd&bST=HrjwOfXaFX9z
zII=f}_1y^9lur7~Cif%&2T<~=SIT#W9;Pbs9l?b{OaQ;5=8|t%)i}Q|W;3Fe;%(<o
zAKC1qx1{eo;Vs;O7eUi!-o#4)%K!`S1CXRT&r1O5G(dmy<Vj!00U)*a(+kZ!;&vMK
ztYHc1lpVJUhRI?@qguaGS97`S?GzR^LuT@+9%r#IKIecjMZ)zO;j<tjzGq2*9si~=
z_eD4TGx$QS@{-Vq4|$>nE~V>8sCt5&NC!ek`kBDb)zwK@?}zq3z~TZs{7qZyXMcff
z?GX6Ke{v}}`<?HYe<Wi*?eCB4{VU`35alv5{<RR@50U-wzL3_4rxV@|OO|f>g7eex
zy`f~G4Ol`AGtEo-&H_hGJ4?fyR*)nO<@T_EB9**coRVZrJ4+-$c^oiceSO_?9DpGb
z-~j<T66-3s#u5NR0rR_LT62nFt9ew2ZD!EvS=73Oy*+1XA(lig7BC7@SYnPgBQ`ud
zWXVdxUKibxl3flykON#6WYwKRwAg+o3P1><`7gEUKLHaVX$FmPAw0{0B&|K_a1-B*
z0zhh$e-;v{EGxil+^zFJ@%Srr`)yk^z~TdI`F%nF<^mYXwR8yl?Ju3DfBmaxI_pF-
z-v`IlaoB?AI_-O$qWZzvZzY-i!+|~=_)O?dOz#x#G9%~n0!WfKq0Xn}hHBSL_dgsA
zx6=VZrDR{|F4H}iXuHtby+|+2v$;)7`?Zs}S7}Q7O1oI#!HzWW9*#x}zcly&U<qKE
zEC7q{1K?6X2zUSe_fNV`A7g(lfPDZG_=1XFAqz`_wY9u3am_xRerE|X3yu6PJ#%^e
zA;Ru%G+n_-Mk6;@<M;!UML<6<UWsWhsuw!|<l2CkWQz{a{^9u7w?CSX_&t97I4uK&
zsDB;to=kA?iGB`XjLbAhDHS4(e5cYB)NrZH+iu&Y_kQ!4A-K<y0knKrwh$wrCH&$!
z4kCiEa5en_;OzGs|MndH(H~#Tp`~Nw%5gf;Z!PWXUybJe&J53tpF$iHUmWZ5nb+wb
z=ZcPEEta5OnSM^hu2(0NoK!4mxOee90Rmmv4#EKYRk6VBj2t29q08+mblh1ckZHd!
zS*gr7*(Mffm>(b}O=kfJN78po0GR;80>H{@#XlAh7CsUnM^l2AC4QfM_Sx`@br*Dg
zcCO(8A|7L{X6L~)5@8u0SJEeGmKm=X=A$SqOG#Jc<A0O(AGXMNRY{%<xm<3l6YW;!
zrzku)Q3uKOw&mdc0bnwlX4(hyhbFY>rhkSkm^mszfgtpiJ%J&e5<D5VGTMmo)(NZC
zru*-|L?3+r>4Et#Apoa2Ku`;SKbRX=(-#1d{XY8(uh1X;5tfaj<uGXUu4w)ttZ(r4
zk4O1}hPkw?6}eiXuxz0u%CsvhQ$l&knz}Hb%TqGdp+BiC7K(!-V%L`4NpwzhP8RJ*
zS|Oy!P22`YTJ7?#PUp(AVwn@X!3ySvK0^tG!h&sph*Aqce41ndSon#$@RSzOl%9O@
z$uJg(veYdBv^&)s!kH1u38Zqu>|^7n6$w@jvQueDF=8ng<oAiP<LF<kq1i2mEImu&
z&WwW*ng4haln-cs_96xou>j2<TmSk@`*4&(nx#6Kj<peCRmxc7in%c29!q&XTa984
z5`?PYu)N0AH$8ZP-aa_{Ej2&@fR(GyA#m3FD*-tB{p`=bOfS8>Rg|9;uOdYCTUm?2
zec<r7665(JtS`9|!F1w9_}#qxDO9LTuLQU<E21^l%nND=53ZCSkJ{eJ36)iimjieQ
z4htq;%r%nr_$;&+#-HAqCF1<fc_M97&HyJsD53HQX^sS#d;r`K2jB{TJMYt<{&e0}
zfh~YI42aJHfEgz2CC^H+SCVCVn4e`tGYF`pJ_*87&}+^S>tCUc$!;<auc$mqm~=AC
zWX5c{)SHxl7B+I9WZ}iv^6bI_G@ohA9oql09ZMVec)>H?3h1-d#%V(o73-74Id6o&
zg3Hm_?_Cd{qc=Y|MD|;DV28kITK?6r0{roxT<n<tE8QHN<aUHT{NohW4?%rWfS<VE
zKOW{2P5w@arR8JNKT7#<%2LXVm<0n?yjZj5IiLgq@eY7x0N)7!2n@va#yL_f2id_-
zDP)HE3+u2|;xyQN<yracuxrDC_oz%1&jLVzx?Jai#{pRQT>zaV<#-t&9kk$Z7?9Wo
zC@_wUW=$kDea$HIo`5X#Ws^LK5%sEgt(@f~MX&%u?sjWd0da{`nd*pNwJbqGNZ~3K
znrh^AMj%=6btXQ$p-=nAdzeU<!SPQ5Z~coPG@~?F-XIE(s65h{@l@&l2{(=$p3q~R
z$^Q-aTp2@P-wRk-(AQuv5V-IwulXc9{4Il3X&Hf|Uh<r<z;ocBtN>4c={$Y$-<<2}
zi(!37!Q9VB`9|ly__vYQ{=;8Nmmw0ca9LcRv<kv&-c0L?(j^$J({z^|_-f%o2mx3I
zzyU#|<qa<1Hc^QHj#T;1Rzig?ThcD+ZQ%*=<DqoI0>FM~QqzC>>8JhR1b9I3)KgD|
zU_jk6faiG2d>k-3odW@t@pOv0%0Uc289Thvx+)?}8X5RFR+a#zn3|md)9>!iW+zQ=
zsIuZKF<o4-bhW@bCrYH2Qmy=5RG78x5AXXN0~Z}w5Mygi*6f8vl2fw3fWjMrM_eFq
z^w{wodha)%-E(9=FnV5Y!5^PW2LWpo4y1DVPeKT$<$X@iA#4%I25G^#aJ{I9*DMoY
zE`V`>*|)5Ucz+PVfe_O7_rG!>)WKs91Pjh}=`=qu*Ra=rv=(*&u+_3yA0tMGe}%7#
z6BdBV={5Z}>6fs;CWC;{%t<M8xP(PwC+r3|BrKyTjWaaV>-xwD4AQb%oCr1jtOOF#
z7WT^RXjVRnmaWm)1%N+n1N{8wKhIm(-6C577G410H~{EHSODN?fvpzkbPzg*EMStB
z2>I7p#Aww;HRF$MNI98STZAe0`9wswyF5n~vA2?|MTvrpl`K;sqvRP@YF#woF`B0R
zgSjk=<DZ0VR>bc}90kmjO)LR`23TUH%#z~WJ)Opn_5OLB{lKs}&<vP1`Vsw!KX#~`
z<%!%UoS}UzM4)`sgQXk@0vJy)sAU|t>kTYS^$EsoCD`w2f$;SIxo*AxmD4+U?4yo5
zirW1+didiPeo8?g7_TH}z<yj&ic<Q=RUDY1NWdx-wqg7>i+rAGoLk8PsueR#Z58wO
zB7rmY%4Sz{Q<Rx^;)pyP2LOOw0B{0?e;S<Bu>=6Cb>dr#Or95C0KlN9C4ew)I1H$p
z7`anRAfS+%?C@u4mMqfL@1i!RrPA02U8+>vEaToyTUMl}D^c6Lq`$NAP6!Vw%%w{k
zVZ5hBKMZ)*+Wt~&|H2cRmw`rUHvr?7BrGjDX~+7_Z@Lg1{-y!yVRJn@R)QdbaUA5f
zBi8%@EWn{|UQWQ+Aq||@24P@tz*Yc^TWieFr|93k3IOo*55aw2-Jy6gUkXkI3y|Z;
zeo6BWe=9-j_g>?y2FWQ&Yv{%VUuafAdP_@fsTD1xVpZkqQE1yv*r}kJVrX@t-*S15
zl$0H}5RO!1rp?X1cESPxobSdlyh0?vPD!0$7eH(a%K(D~;EDk2DKM*al9fMu_N;s1
zg%^SaAjOl{Cju%2H4HwR;d#v}zX`v=ZHu2dm-m8FE|NKaIh(qy*T?*m$!2h{iIb^>
zg!Z;^4c%Ryd0i0LiF!(%JV)IEn*dmQyb-nk2x(NE)LDFpiZ;r7q#4)bAUUIP_-@IF
zg4RE6y&nvpPnbiWf{FB<q<FOe-zxX2Lm0p~vdNk)0|QJ4J`J$X3V?n@pYFv9@QrV5
z(dT~oG;MCh^?m@y>>snqAKU!WUVje!6D$M5{$6NWfxPbTCH;!&dC4o&C4t+P;!(c$
zWN9ETqesR*QSZ@i?5qlLRRn#Eb!+uq+6c7pA?UH6Xo1Hlf+To_<r}iid>9Y|?*d4t
z*Sdog$gc)0jR1h?N>4xibh9_(iE;_8@_Lk!0I5iZRB8D&2hC}-r63Y_#iWCTJs~*>
zuw`Xs<xrA*oe?}N(!0j9ELy}AP6{eA7kimY#9S=6k=OnKMZ=%u8<z-%6T3pBi;>oZ
z;@BNNJPGUleaHAR;|G8-^sOCV58&knd?(t;7CFpGXt4wI1N!HntN_3Nm5be)|5>FN
zM8SF-P5$uqPagh!jBgB6&J4~a5@VTMoAEIP;zd`NpP6JNn0iy!m+6Z7Q=TvlUg-n?
z2$_d-okSv%AGbo0Q>nl?vex0^L#5lRw{C0Yi$po>_=hEc+i$-;7tTl554!*k2&_jX
zpS=v|q+~C*%_I^m(9>n-7l6+L@0tjyy}GqfA=9N7>ggnG|D&*$R(8x<Cm9mgn(5nb
zkQR&_Xod<}C;5~X5!my*k9aM=_7CtHH+-=O<eedDr+}y8ASh}5;lsQ1!S|hMGJh~s
zM2}-Z3c?bY!Xj!FXcGhz1Vuqxu|$CV(S6?T1884>7GSJl?V07|`|H1PJ~00PSqP2V
zeca}k_Vx#p|3+f|X*U2BVN}y3hB!Z^Vr4h(6>M~h9_8RZ3HzO5!j7n7%9P733S68u
z!ID8@8#l>VDuwII-CR|frYhT3tDNPn;m$S75te8F(n~LejA^G@`Tije!2ThvpM+~z
z03LnxQ4c3T-VFdK$7~7)Pt4#Y!*4NhJ2tMo%1@lhRS=|Xf{AJ1{0GyTod~kVx*Yq=
z2?k%<ot?{MYn@KBNPx*PZA}_PFIsMv8`M@QD)<)N1B?R{Ifaj}yK|F1@}XCP5Fi0W
z-!lGdZR)=gfFOW2^PYBm3-2x%Km!X)Ex_Et9O)AfHn%+e>}Ox5FMs7#qHL}nWp94a
zqE&3~TZxQ6M)_Mwn*T^KO1q`CfLDr6p|3h0<FpraRa#9sZ>{vyb?W8M_%6mSfIxKF
z&`C$xN}z-bSst18kZ6x?E2$D%3Sd#nFm?z8B&x_u0HS{uk2nih_&5Oa-Yx;4^C2Wy
zM$*T0O2j=W!MgH}5kO@3z09?q6uUcHMN%bWeW=VxrXo|W6)tRg%Ul)>rbYGRegDvY
zmK}Iqkq<-RbwxkX#IO*Qc>l)xF4246eNK7$V?1F(2g770x=f7U0$T*2$>N(J6fQE(
zUJqa^Sl_;X^daVp?YHx<?$ZD7U;YI>|BYy2iR<}M04Ur0BJ+=de%6!LFFpMWJ+Z@*
z(|Db((%@}AK}Q+$JrTKHTGvvPYHLIctTql-Rz<6&l2i#3+E$<i5!UWWr6p5ByUhi<
zC7jS0wo1bSz$e!(11!A#6$8&sfB+l^%$@Dxalzh7$Wofqi02c|N(8mYV;fNPc_~=v
zDZwS}CMOjxQCJ_GDK!0-1Ac6&@tL>MxG0^(^1vbiGx$F9J~W;iWqnvj|D!zAVOt=P
z5j7*>diz5cy1!RdIbZ<i&YcU_%rtKlU||MU0DR+M4bU(x`X39*Qb0gh-=FI=|MRa-
z(jNbDz$|F?e27m7=^IS{hvJq$SOpO8pGf&P71AV_rWabTlkehnNiP?+_^q#LE_uP!
z$<RElUF+zW&h*7<u(DQ`Q0lSd=I@b93bI||J59UoRZBcb3aanstCLpgoN5V#-KAIp
zKrkR7Rk7nmUSA0o0N-HvhJo6ZfPTreYKQWGWrLHd!%x!s|0p-pH5hvDy(=xskD82K
zAY{YLya+Ij!Oz+muuQ<#9Tf*6VO)s-S;l5%WkMf(|Cvtvzo^Oq@yGspgo(wH)}nh3
z?+3uh@O@x*Yz-J70;~X-7kyR$^cnk7Ah6H>@ISuTX`XS}C^{lj2;B?KU&ZqejDIDV
z{T;96OD#B&8%;etP35QF>eson3ak;{+tjX=r@T{&TB5CC+0zJ+Qn(;fQ(QjiYoX2}
z<q_*XSjj3B`BPc~AUFXcLM^Uw8elpLfEFDVehEM_H+Ha)(pHWGkomEf1vJXk_tP~0
zT&`5(^1Km9?9={#S4`TjRkovw>HC6lx?(?lTJhAFiP+Z|z}%-X_*pvxmI+i`24Pl>
zdzdBS5FI`=p^tp{G~IsNw#pA?2*Q8t*s-NH_s<J#4Zsr7;2Tf?N=%;szyiW={r0)!
z>=$usltcQiq^*7H(a|pn0dYGrd-^+V=4V}F7WoiLlZ<c~UiFgBewtRmHJ&9c+*75f
zadN~mz)7ohom5*GXi7W1{>{POTz^veF2%qIug;!58!XWrp_bzS_$4p_EQJLCfq-}k
z0FUJ^06;LGnE_1&WMq=t|15&_fifhyhKwnMgvsu9oN)QQH*InnOM5GG=AP<r^JK-<
zg#l>)eT<B^irpxTF2{O+wm<R7Y{}+9%9IC8|09pTqQd&JIUIcfhIlgt(ZRRko3TS=
z5CS5=VurcX;u(nS_ghb2rhofeFQ+F=BV5w(SM!>GF#87|fVj<XG-fY;kxf_;<qoNS
za8nc#1CmBLQ?gW~qynX~x1Tf3!#4q6T4sM`VK9mo^Y-dX%SuS8hZ=R8YxX-!X8~Xd
zAX@-B{={biEW8O|p((M5q!jK`C}0Oa0GUpn2B@UkjBvh|;`*dvS04J7kxMda;iCxW
z6ih%_jhAhaYs9V=Aar9y9@mhRooV+10Zf%_7P9~XcSA*t^OT+ccz|$Z>L}ge@%{H*
zYOeRgas^Asjj$B5U*P@k5QHEQ+z3vg`-SM`J`VFoIyED*-@p2$SLmf~-QOd`c!-BT
zO3Z&XAK()_{Nd>5BICy~zlE_$wH<WH7iFkrrEG$dAZF7ZW~{EinDR`}IihyCNtnPH
z29w?MU6#Y<y|3#qb$R*D^`xCfTSXPgk{Ol&FmBlL`uy|H`|b~6fw{z%II&Q$6vqJr
zU;)4u3xoph)>}p#39ykXB1;|ugIp<zv26(k?VEeM>vWS+m`C)tofuM6R>66~g!37}
z6jlf!rONE2=<2CD(7=&dUAw-(asT@WIRt<kis8AkC5nwaB{sSbNBCOi&++s@IBsU<
z&j8IA%paD3zU^=~3qT;W&VE1p^DogWr?+!NKSY_N?CHNIPV(Cb!G6mAez`0qXosXX
zQY~iMB<LXLEhc@VHMPhN(@3VVtr#laH|T14-GrpX;rpTwz(lp%iFAdoOM02=Ozrcc
zjgHf1i?J_ddhty{K#&#yUJAe#^-f6*{%V5--~b?XESmv>1puLd*)jkk%4^O6SnXns
zKd;47K?0|x;WD^I40I$Afkow{w&(q_LKuK&YTR4vb<wQq%Q}h7q-aULb-mf7kg(F{
zwk!@nd+Brj<8iu?!o<StAB-8^XOlm$XRYgkcRhTLUiVtf(7@o}{D$@ZA#Q#F2#n|x
z2myebRWRbUe$10+{`4~a+OMCbtsoCXhhnVrW63F+_t(<C{`GXfnzkU1l9C~5$fz(d
zR?8#TqfE0{xi`vCx+sm5jL;Ktuq%-u6-3nJfAV>zyUCjBofc4>IDs{P3xzF@u(-zM
z4WB;A={$B9gLA5dPTk86D1w1L#ehhiWJoax9e&}E4zoW_0>lm;D68zWKAZ(`g&+<C
z3KjrJS%8j!PGEKcWRqDww#jMa%?wnOON<)^(QFwxpHCC3Fl1)HA@+N6w&C=SB}#Ip
zLWY>Igo-6IxqLG{UoIj_P22lF%{O3aA)EihO~8jkhj;12AG|<E4(oMoX8zY}y`N9^
z<70loWaQQ}c~tO0A1X8tuwTH<E(GAaf8#e1+-ECA|BSc-t$t<o5bf-4rg;AAX^;O}
ziue~bf5ptKTFngC8Jfsk&HGweajvE{LJ`rLw@O5nc9In`!H|_DKnN}AUEZ!JJ_)oo
zSX_Oixq(&lC;1YVJ!f#Kl=OYh839DmT1i~J;w9O-mtKp*fC8gu-qbPw?7hCo%pb56
z#{mN%Rl)n8nLauk-j+Cli35x{iM-XCQWjYgLI?!)B21*`31S#<*6fQ!%KRONR6rW%
zvdYAADlc0moItvWxZ+q?&X*-|U&@ekR0=uQm{_-f&HsJO4c37YweiLQxm?r<|MZ~`
zT<Vy=p7UV3So6P@nm?Zuh;fl)!^<<a9Z19Bi&;oOT3BrO_i6(R0q_anLx*lQ`1Rks
zNPqZ;XJ~cp2(dO01cBu27a8C<nEkOHPHvnuf2Xy6B28-;vXW7h9_hSVl$N3xMQ}<x
zRwjeKWmYH3bNrAr?V?g$Dccm3Ef?_#zIS(b2ge+Q=jzB~Nw<Xs)mP6<J>KgsYNZiv
zh42h|2Ih}A0L<u9BtUjtTWIDFSPBaO4={&W<Xr$bcext`ADoF|V$KwQND8z9v3&y(
z^HBLBrAiMLF31;2vovxNB5V;)LBa4#J)%C}mMoqAaH>HHF1|g%uM@C?UkjL_+^7T{
zBXR$IBYO9vyJSBRnl`lBYoYmr>EjP$fx4i{V@ZZJl|E=$ZP0Gsn~w4zz@WUVecB%j
z0U)e5)9wg3`+e@0UZIy?*$!rY?{^7D-@0&&?@Bzx2YdTRYe5(or7eC@po^4)89CDh
zCut3X7xXQF(`NseNGm`5jEZ7Xga=<~)Y+QqO<qBNCGE;Q+ttiB=G6CC?%3YO_U|L5
zUa|s@tLS?cTdY7t_S2<uMM{C=eqpOKmZ4cGl>i?A#IL#5mbAUXQXK{aAAnP*PI-<4
z01E&JQzwnNjGV4WrezI26=cVvhjHpy0wQhZ=Ptj)IxtQEi~@kug0NY4QcT{9P!o+w
z>P`KeUzW`ia1mhI!9O>o`9DC!v3I^>gFf)S)n4Wg279$U+%f*pMxZt07Yq}Y2K<5X
z?|0V)@*pqDKv}$P5A*G6AM%5~;($R*u$B6X0nGpBe*R^8`Sf;}?^xG|qg#mH2T%W%
zwG`P8Cqdp46pY{m2+cp8;^$jDo57ceq~%{KOP=|L%C#p&ui~vQN=x{M1rNzk%{%#`
zN9%T?V#R!^X4sL$b5J9n4F_4-BjuJ$%on_Dv)~c3_DqjSTM>MzI9Jq&+~^wDi_QIM
ziFN_NsR9;&XP<pmSpbO9wQ1r>!`_hK^b&zag8!Ahe}A^@sO|*uwaz*B-g-zXBn0wk
z=pMB#1Gd}Prl5O_+aTQT_IG-|{i7W5{TC+UJ25{_#QZ?;Cv;3q%+C`O6Y~&$fN4Ll
z0U0E~pb{8KHUe1)kP1CYD%HF0<D517lbI`Xt=zfKy;b+#s&no=OS-l9**kY;?%XTa
zJ6C4>H#5tiQ+X!b-bo2J*a->QVo*u5b_%d<uVVUaJ7mgcfuJ!F?-qd3p+7?c0JO-X
z=+nh3{qFDFOTYD-?YIX-`{TV2BdssS8?G-fV~j5tHJCIslf%%OQK#niM@Byge1|Cr
z=(x34`WziVE`Q|ozjAdaks1#4Bx<_rIj_HF_QQ6#unLHBJgf8&d8Go7C}iIIo;)Lr
zJYA?Z0O-M|@b#dYzO^8@4Rq{-g+m)gtM~n`fxY`=>QQq)#~d77(@HV4Qh9**K9iJe
zon(ZF!Wy`iAF@DXdzO>XQOHdbE<#D|+|`-~!11s}ghkH;*t`r203t5|>H?^j?pevx
zeKs-DY^aUAgjtk(Tb-#bT~LG%%K@1zB4yv1RoL~fYM`E67SG%Su!u+o$rk|TCHL7)
zI9O)*|Ne*f(g%OJy`!OO2#1mNQ1=Z06NU-$PH!57wjcy(M!&-n1k}uWRMQ4O|H4)J
z;-6m#yZT{9kHY%?Q84xErao~apA$4$!)|`-ai_md5Xjl^Dk4zm3>hVboHieylYDKy
z{=kko=0=FEp&(yx=~sGB8fFTyRf<=%>oO{=gz_OF&<;7%ewJ=_0cIDT<%9^Tlrhu6
z>RL6NF$fS!swM(jbn`!8(Om#xsC6@-ln5x?Kpp^qk-|ukH6uh1b5kkv@x8R`m3VX8
zY3qtH_#Ky{tR-bXtLK#zN_~MuOq;0zZU_Y`Q23c*EC;5Iy9HoK^R)k?n&S4o547JI
zLJt@snB+kM2op47;Jh;6yE(R+-yhZo-Ou2lv-+WajO9?8++_u%^!>)SVz6UmLS-A@
zQOEc<I#d5>#3Aw@8~VT%5O?zH1OqU?X<>dn=C0B9BA!k(j{P5P5b_MrnaQhqoKT>*
za>JaNeCF2`jck##nv{>j#y~N+wa`Iw_2hFdwAorKO8b6XYB{17asZuhn!5yEgdjjH
z0|Y$r#1l&sxUv+KS@Qq@AOJk{&_kjb=Ug5D63RQXS(TcKq-U?$Q?|h?zDNmlr9Q(2
z60S{7#rj2D8yShxgRzW}R)t(#pkB6p(+QF%vJ?cpyKKJzg2jdy(NQ)34}s?Qhvt8f
zrg+UB2tf->agnqO^a-v32myF^jXgLBz!+klL%2XJ!ZK)8^8ngDg8hE}`c|S9Dat?O
z^&jVT{WLxih((A2K#ma}Wf}zw0D|C#6cvBPTsL&6U&7MDK0V6Xe)Wv4SL<*dSgPwP
z3Q&)(67>X@x}CD94iC>73YyvX&s8kNGE2ksvn*;D5TH_jlZ8)o6@cBJ1}=bDHt?fG
zA=6D*3P}Zd03Lt*@i35(vNY#%4Uv(~^0_sa*GO61XGLo#@|wCHr6(?zKWqUUC$UP3
zVkSeG6@_UhFPE7dMdGZ2TB>vHH#N*_cMHJ$<CdgtRDy~4IBasG0KrtF<qrx#Y*g!C
z<lO>cvPU1$*Ml_MV>}^HEW0ZJ!G3@ICvSFse=v`tYvy3TvHJW^(E3Ja0l<Ib#QaG$
z*c99xm|4_-RgKN`MM|(=(ML?QJ(<TUF(NA&oSz~aH0T5Zkz6m4r&C%;>!}*e53qZ#
zyn_qTGUYSFPcDF-wk|P4pzmkCNqz8Rl+TQCA*P&`wdtAx*bNXzSg8@Oh7|y`1hDV|
zfNlhU%@)#bfO*#?4G;=DGdZLY$82>{luucZ*GUC=+BlYL?io{(UY(^SfKtj>f00!G
za*x$hHO0jBKO-oUc@I0e%eA`!VDLi`?zjiR;J7U1Qq%uI`u)L7v5MkuoBQ_)y19`q
z9Xu$&n4(TyQo3w_V81{9(>FUyMsblu!FiI*{%gtHzY+h!&VDiTU!MO`=6c)|2W}BN
z|4rhYbZ&#{a=TP35|R`m&NQcI!Y3;X)HRB6O?rA{`&J7QL-l1VE_ne&41LyR_K{Kh
z_m4W57+-$fcgqjh&{Er9tA0@`=db8q0MPO;T(}Ufb@QLoJ23SJYT;nvDYIr&(kM%8
z2Ba1M41PysY8f+0$l;87TSHa|CnUWxMKUgbJ!T6eh5vsvrn-HLT$Q+BCPA47<l0LU
z!1b{WDyZB|jc}g-f1YdLC<4sX2Wk38eHg=KTMw^G5g`!a9S#CQmc?#t4^pVOQ}Fx?
z*XTe0`32hAmXI|xGX8N$<P&%DJCXAF$LxMs_7@ZUKmeFhz1uxt6gI_C>tnjlT+uVn
z7_B%Ul~daVzER7lsO@M?dd8XQTKc85lCz5=KAV}dql1N(si0G}OwHZ}rO3yeNHtpA
ztBHFx`-0IOqx4&At$Be4`Kl7ySpb52`lXj%3Ki-qfJKL;p(?Nd=w?9H-(n$a$Aw1^
z0vlxTvMIT9EnUm0(cLf&fjhMsO4q5flTC+(L2p&yhDJXmRo{I8gBr2`%o7fd8bI?0
z!#gO`zcNlR|7G-hqtCi4<Ut7mx}0*E0s^1??v)Vmr&dSx^@k)rqqvQ4i0~iBt$e}s
zL6%Vt=6`H6-kSeG0GQdmCfHdA<0Z}vSS)LFUQel%k%}Q3D2h#AJ6F-b(=Rs9X<Ou&
za8D4BzP+NRo!K_alN<<Wl_6i=pG)s|dAFE)nX(&so&paSfbIoQE%ggq*C!Xi!cW|#
z5dh#<NiKkp2S9@>`sUV>AV<yo1>rp6Nl(ff0&gWy6WUB5fsfoo2&FX+p{lpP0Yx%F
zNvzeDw*pF*v$_0sy}L^SfsXM!7r=4Z;x%m#N(TYZaxtz4y^7#&0Nw$D(LrhM7~@6~
zmW6}O%^2+W%(Iun?~v6W+w^ku_m9&0{;<RkOZ~8|Z^$Q7_GB1`<$l}E`Ja>F$jrGn
z9c@TXe;b6yjzJfJ!BQsL<-E=eVBG_rN{V~jYx^FCxn*DT*XKvU^;$9XTKLdk745YD
z=BK4rB%NhklkfZYuMI}Gv`R=Q-7PXg8kBCOyYowrMhWTe?rsnU(jeX49ZE{>zu$d7
z-P7}RUgxpn6K}|a*%4KtBFqsKtN|b|^1K@&4SAHy*gz}d?*p=ydJ#c1tnznv2E+W?
zQH_VhYVPY~-5S*4{Bxvq2z^%LlWXQfobCF_p0~{J@~8cgT;#un%^l^Q_ox2N8}NM8
ziDy2QF+b2KE+xC-feeK6Q>`!{?(;wrH&fU!I9JG`ReV+0<(IzJ+7hp{zKM8sSUu`5
zYWb)_^KSVcn3e9AstG-!bYJX<Xz|Q)0?kv*GdC=u!N<G0&u*FVj<$wUwAo5ywutwD
zRiv=kp=19`^_C|oFqH+EEVET*DGo&sGU>>dS32_UxW6t~j3fs#3`t;0{~%_wwwN6g
zH3vwyg$larEf_|V;|Tz?gaosSABO(39lKV(P?@!@=MapU?m6zX!ZK_MXP7d*3S#wI
zd!LSSD(7JVlKL+TaLi~x1P$JV{SD@<tuRN>e)a$Y|KaucbRB5In%CE`gq}cc1+4aq
zB22nn_Pf|1!}m_Ey<_IuSjIKk=%b#TP*hrrIi|U+L!Al~WMxJ9yeO^P4<8b4$8iLg
z=dq1vll|&#?=7>QZeJ5G%X#;c?r_RX+E2o|-rK^WyE#gmst9{6d~85}rUrn?i0eSc
zb?LQE30OZeLfgq;U~Z7LY63sk=gQ@b<IJ@<F-p^3J50@3C1c&O&<sldWiK4CQ~N~6
z{=PxG9>6G85b`>C*S~Fg&9Bj)FaDRQA8{pP8n5i?%LkARL|f{=oi1TH3;*La$0}X_
z(L?H>r#apY2ip{}+b*F!;n9_&{W+UEA6~pU5LDICYVd}_p}zdBo`QGwFMZ2Ok-2`#
zzSB>+dCO|E%HvaxEbnr2aq@-Ha@KP@$GiS^p0sOCKXz_R?!Tf$#em(cGq;4-sB;0N
zaS$4u8iV*nM3YNOSv|T6wYB10HZye87zc_72v$QOlBVnDc$0(Cn9b>ycG=E`Guji%
zU`H&>P6`Xsy+OYe3=Yp{<vsLz4#L3d)n=<B**K9o&&PkhD2)y%4);%aR<lQ_Ox<r^
zTP_V08BZA&v~Uom`iLS_vhh(*rbWJH$L#i9e{bfuv(+P53Y63+>D=mh!Q6k|SjaEC
z8d$s2L{GzH_sxHT>H?pCoG-C@xF^SnjsHasbE%w_0ljY}=12_=Co3da2Qq={Ozy7@
z7&3=Yn^V!IjHrjDbUyR(UZ1GQg7|G?s=}IA-v7RK@pZv#TNrZK`)d%gY79iTK>+ZC
z_`f__DR`VC`2!j&<;H8b^vx##6_}|XG@nEA;4BV)o*VoiL<IajCfs{gzLaSY!x~FP
zOgJ+zbRcgIWYaJy{)bsqR!KMOZ@Jm58}S0K98z$F#18Xm^YoF7pJK8VKGPRn?UMCd
zA6<9v%x|U<wh`v|jYg5b)JJ%{6BNibEmxEGHp8gdxqG=65LVx~L2vM(@^upr8E%@4
zA*=t&D0A-_??6&{fxI-&igaJ+Xw}kGD(_O395EJt@ro@!Ub^cUwXtyixZ8J4ml^!A
z3O0Lw@Es4*7q2;h)e2*10Y?D@jw#DT=VWv9seXvtv;2eCTSS{;@>ag3Q#~Z3k6PwJ
zFY4D;M-i6d1lfnKu-YbBJ*#ibY(Yy{iiS<UC}qd3hn^zRsA)W8DNiZo*S#_`N<XR9
zoJnKY81-L&>LBTzxcu<FKf-<N4Mq)nqdclBXAJrclHjlr6@}}$6Dw15ZOqwjC}L<3
zu~=6wgQ_BJ(`Ei%t3!qKxbdp3qm{~7%zVg$%pwcx1tcQ)okCky>cF}!W4!tc=u*9^
zMaxd}%Ora|3L2nd6p{XL^F%FuwDA$WgTv9Hi_s@?F>fEWWj~ASX{}h9A(W)ijmtav
z#O~Rob-6+aVPpW6H=NAV$&FR~l@Mr2QRwma^Kb)&)lP5WvwCnM-uGgst;F~*dR{Ep
zyx*jqf7|tm&bt&BA=`Q|mbh^F4_Z#!^XKJsHhZ4>8uX^fwOmC4ii)HOXrj|{Z%!8L
z+Yhs_Q6f6AXmCo-&@lenP1GVssWeZ<QDz`5YiSq%f*`!)lN*d7jEnp?Eam<wY@(O7
zk@y_BT6!BBk;8Yorx;RYG9%EqxQ?k3>yr2OdM^Iv4=p1d1t}1Hf6*1RwD(WY3PJj3
zjKJ^41OlT0>5Ck^#=xia+azPWs;%Fy38D_1Gw-$)Q^K*my%VD>gu?qD2i{MU2Q=(J
z7q#+uY_IS~MyDF3j^F1uJ(WIN%ludM_9L|=<=0v;_$84Ud0h&BFt-akm@r-|i&b=1
z>lH&lSqg3e)u%W4IV#L1OAXDcL!I~Rh5jcgO5@;()*`b+%s)U*K};k-2CxGBB%C_-
z_6#3i{?z4hr>i`oXKEb1$I*`M^LebXl_G3k`!l4>Zl`7Ywcu(}zGTeW)NvS-l(aV0
zYw6ePKu<-#FOC~30YM9U+Kq_qI;}uRhd_nY1K;K^EZA;H1YXxgE~f86P$sTn8wu}?
zj_h9++J7AIj61R6$+M?Bd^B=3a76K2%^G8-w5mc9zSm3wCHN~X{LI$}7Z%G;ibuVN
zLQ;Icw^Tc)W_)*Y4#Py7d*top4A*T*QJwWP?s{41v(s-mO|YBm_dwyv!EPdkaBH3%
z6!0uT4Hm!-D4`a_CKsB<u3QYL@^pRt@bt%)yzb-5z2*3C)bH)eQS!^G3JFbt;j8=?
ziH$f9+<08x%AEGmzgJ<~RLIVb`mgn(4Tx>i`)YB4e~b9QcoGBOADQRflrxcfC6Q%;
zUs!KAuj|hI#vaEf(h&K1zbVl)t{{J}7`6I*JF`6~3qo-=5@{xFJgLEI>>gcPolW5y
z9qQusfdT5^D2+Wu3gv(p$+QmY@{$&n*H>n^Wu;2Z>KW-8Xa@-srvElJvZSeC(OOR6
zvME%KSFFG6VVfoF06+xlhQPXK3uvY!?b*$lf?5*%)eygqTyAtFzOxcKrvY<5^YJtf
z$=f^3Viu}O#5Ol?rqQ*svlI1i@9=t%vsD}_{0#esWykUvhCDI}Y)-!v$p@ni@-DMQ
zS_1Ka?E6(dVK?XEhv#$^0Ar}v(c`~o88JkI5F+-2pgnfQctVudcSv0wBhfVkW#pgk
zbhB}VXq$Cn!AcV!MoD@D?TjPax6c|DWkWboSu&uUcTTOO8da*)i<5p_CNMD)ANqB*
zr83R6B>h-pfSv)2rDIKvl+{llpTF~P@N(&6VM9l<osp(j>?A`&n21Z{bpgb%j6Bw?
z$l5wSP${+hZSP-FE~m?)hF(eV+mDy8;<+eVBq$-TmYt(0Fgv^d*u?m?ihwRhA6q;U
zf&jPl?I$S&^;J_hviN1=fe_$by{f|i-Da5I@0>^fx_>%!6=?wndVZxKz1{bpp||*b
z_tp7fo)?<B2>(fI^G45n;0#mGp`x))u;9{I{7<*L&cdJ!twm|0-I514T3}L$W`h#9
zgGRf^=ye-b?vk%jCGM8Y$UKE#H)Ryu)lI40k4=!(i-2ZciqtPDjLJ*4h71T~bF8cf
z=w0()C32|B1jP+_Dr20Wq6~-gOD(xAVt_wM8<9Xu*SCvf<!wT<7#`5#>SAk-pVJ?9
zr&h(eEkLCkB?G{#UgogvI3_0nK@93=?p6?hzap0R<Jnvw%zT-5by2=Mplq{VBL^h`
zDqzjC`yIsNSvIjR>4rJJfrVfvE-Ex^H23$5E+@*kA3J3yR(&TbY&Syr^xo}LqLB49
zefFX$w9zP-<@ls#Yj-W>e<@XEVH@5ctxPfhFEy`g&QNM_*kK_>&@`JxU0}na)%{eL
zT%8rAS7zcp(3hcc<S151W7*FlpyvGR3yf3HT<ajbH8tdiPvWJNp@~L`RBw2=bWfuy
zotA+O`M7@bIq|5;pG(xBe6S(^!{61QJL=@9ICqpt(DP0FXUv|IHuWTk-|nv$ZSHnq
ze*dQ2@!ZX8Qy<DR{PGWu`|Fxj^H%7h<WwR3oJ!_5X+pQDM(%FRb`_%+^8TSIopzrv
z%L^6c(|KO=@P1-Z@fvMXzIfU=@zZD%%1hGy@^Wpl1;shnC+J36Dhev4j%zef9)tC7
zG?m9l3>Ix+M3~Mh%%;zbZ0}_?rpU2JD|m<sbN*M{M5PmIQzNjgfzYgib7^tmq~HcJ
zPOGE>Wq;MU6exmOL?B6_g=rW~9L8%AO6%br1kS}OCxSKz*b|n}0IlqWCN%kSwQ~QQ
z)aT?Rn99ddqy;Fl`bNW`)@9v`<m5W(q^k>D>2P6%YpP5mldJdj#a!v}X<Btw)r~bn
zTt3*Mm;o7#S%>dZYYEcpp~qR*5AhhKc5@fuJuC$RMKec?_zR#ZJ6p8i{6(Db!?E~1
zy-+h}P_QB_rf$QbTeKGdoF!t9uqlOT<g-<;+-fxeU)aOAxsTDsi^w8kk3aH*bcwPl
z=E0|ugro~I8s^H<lI#x_I;PF>Yd0PkPWae+T<~o}&;kca*wBL@I${=9yYd4KKfq<u
z1%%W6_W8JREbyBT#&S1q49|0E(#tWYS7HAJT=4{Mj&jvT$7tB<8y!vy+hdRpHyB*2
z^IDF7R`716Fq;VN*4^w?LHiZSlb@{4(ddDdP)0P7mTC@h?fWQarTOS)vmxPrTSsT>
z)y^!Pw=Gn$=WF>is2<^h3hLqhIgHHF=MW}nt1ZfD;2`8>SbO-SzUX4ZBng;{iry&I
zi`)L#WF{sKH859t{-kT$Q~)y8!%vhme5;n>sLEJg$)J!coH{L1YxnIWQYl{PtR;nF
ziiX;WQ4AG57WtanC3cW4R0BEd#uOm{D#Y)|qn6S*zNo&^5;x`4w2)yO@dlIr-6i$6
zz&uiYFxWRGi`&}l;v)}Jq3Etj(>ykvg^Y!7a>q?oL#$eQYm2w074`nYTMlE#4`u&)
zc7@6k*%fM)g1p4W4Svx^W{vH51{Jukj)lar;4Xt~Xxemv|9M9gPFX)h>0^-rqX0&Q
zp!bdK7z!|P3L`|xF|Wax^Vg);Q?vP0AFbPJRuUraO<WV-7hYpq?MN!>5^Uf9b(x3h
z=kM1<({Y*qSz(2rzpV8=7eTpfc+Ag&IChQdLBbOeU9jjrzq}$7=?)PQd^{`y;(xXE
z+Cw!Y&o#W9tPgY=#KMf(&d>P25f!CGFU_rozZaUA9)u>42N&!W<1RE97KuqU2JVDl
zl|L;{wd&_No!slMNn`1`+gZ$(h`Thb>x%J+7{L@@b~dWNNk#WU$aUdyj|c4|JLVa9
zb-+FLutAL=7ZZ@SC6nPMQpvJrHcT;~>_h$zAxs<~jS@x>absvGy;7kUIJW$TTjzz*
zY0qyg?!32;`6jEIh3-f4u)1j#B-vTCi>5wyBLa;Q61Yq6H9(@d<p1)<rvIzC0I-q#
z+?V3_H2<y&=Q9XOQJc%t{u`lN`)$5MnAmEQH947H`+!kY|E@j4Y`Rf3!03+A56<Pv
z<XZVbHz2>s%Nh@4v$~=BPEXOTW5xSzZfL1ChwKrR-sIpCNLS*Mr^gQHv%pml{s(gV
zjz2xvsTOP~tbNQth9m%mL5u-j41ysT60OVNz7>Yv`)=8|P-s}C0FqgGn!~X;wp~pj
zN<ub`N)%L}`VD`}nnP)rJ>VTXQIb47I`1|!c-0wrWp)eqV|z8Q=*x}I8;5WY$=|^K
zVhjTQyzKgrE3me5a*$pX9X@{?Y;yfLP*q@*y0O4x%_q!!mD(bhmcnPTy;qj%7$I?L
z9^mR5^UVFR^6N2kLA3XHvL6c>wNRTv;`4ar2h$}*Q!?3wxTa)VFg;0*Yv65x4~_F?
z55xdMz+ZmMIv2-Uc!|Ex{%fiN48sP>0f5|a6=$%jo|V#Ps{dmoYq1U}!giP<m9nUi
z^<fJBb4AjE8@rL>TV_1Wa24bFa>iQj{l)cL?(xh7j~CIxLCfP0ufu4D+Cgk@en$RT
zIIr@EWY|N855r2}+6X_BX92my>!RC6Ep*jjh)Ae0B)R}yuWZ_3a(XEVu})?8vN2Cx
zUj9j{I<on5f?pAuJR)WFq2xNK{BP}FG*Gqq7HY!#GA;s9fgEp80S)v^AMlE1K^@L^
zQ13iW*_Ttp;Xrp*4*LO;O76ytIg4nfw=^xEU&Kx$ye`zYn%P|@oJWwXP%KoV`M*k%
zTf;k<5B66_1q!gAJrM4h(%Y3aP6Wo_t(O0OcV;0p(Q1lLdnB+$>-oo3<GoWxm5OCC
zagQJRqM4<C+zdxQ{l-1&i^g2`s2eA|)+lM?D3obs<Pr~AX?jR1p3>pQpJ65k^d?O>
z01WUE68X{SvOg9A#-t`od(Y8lY})>svw#)p+6Ett)=tUk$wCyDmz^BUMOGHo`=8&N
zgoqYdNODKFwm<Z(i--tVqT8i;#SL^zmc$zqSChxTNM~azzwlBT>9>iIE_`HMuR2qz
ztXcm#x8L2-hF`z>1ish&F?~AYB#<p8D=6<PRN^6o@oJGHQsnMWeiv81gO5%9i^w^v
zx+Zb34OI5Z1OqCZdA7pKB^#P^Wr?l!b(ZriSTQ|3fgoT^<!(`H-(@x{iy`h~LxiGc
zV71aZi&y%gwWJ{E#Pz)T$#%+co<>M`Wd}vKYLZL@4Y_9H0<_UEkaI1**eC{A(G%!d
z&(nKMk|0qb`yX;ArcZknCwyvuFfPyW*;4qQZGm=`n0v^K;d7q>)E9M3F&RC!7%xxd
z?MG?hQ$2}Z;%%*?=9uHEEi_v(OD95G2#a=i<uCN-5kjP|qsJH(33kW9`3{iek?}+q
zh1u4`s|Tx`D`ZRWEmx)Bl+WqN)@XQplzY3v-kX(2*;SLW4szf`p7(`3Go31q3ybIS
zM+G}q^uzSOepuS?5lrQ(fCQjwLvs9{udlBuwq`cKgC=zwT!4ZZ2(CMV<Qj7Et9F}1
z`4PAO+1*BIpfSjWc;1#hOb;ogDJt)IUEh2;y8fpw*ZF?JAq`*h<;bhOP@c=9FTGLr
zIjgVLyKeNx-$ae89F6!P&jxuWJ}3E{Ob*kZ{6l{S5xi?v-X20Ta)9Zk0;ZIL@QTob
z!T0xhx;{B>?UMs*zh;_Wsoz?!EuF2yRrg)3-nUNb$Stk2irL(A_M%5t@v%0<oaY-r
zaujA}A)J2(CAw8@(JK^~H@;3nu~C$R+2ZnMJfb|yy}TTIYN|(cv2do{1Ix*Cr;VBj
zuG@DA+e2Kpj?dPUA=1_P=nn=CohKu+c-S{DUplD=v55J(OT2ukmkBWxIiVs?Mjw;k
zY14bHuB<D`HMsLqYl&2Du~DMf$Hi?jY1@xv>RftruqwysKrsu@lA$<oiNKdFr@3=K
zF0TRLucI`84W&WU@Z)b5m~4V>UyyGhH>d{!Ah42j0p5GD)4_yOsi|h#M74-nKDBcr
zf-&PixW)9Z*6dAraQ+<=J?s|(Cj0XvX5eNg`n2r2l<Z`_HvaksCkvx+Om-KEq@O)1
z{$1@L^65li0v#3d{IGt`>o485kx4)vo?p-acR$;wu{2ZXt32S6{5zl;n;%Gef>Ezw
zOXS-yQp}J{re&qTa5gCswYufn*F<7js*$0%WXe9TkNE8%$0j^=Xz4av{n)wEjs_Fa
z4FX~-=TIx^S^wF?u(L5RUOd!iKe38E|J&X?E`dOkLDniZ^R?mdqVT-8^Ka_V&}Q6o
zSwEcA1*r*<UTKO`kK+IkXteBs|GI5(|(p*mU?up%nCb5YQK`LFS1yx1^~bJyhF
zNSo7dqhEebd+(~NpluuLcfEqzKu=TmrUNM}Xp;KNnka|l*e|Qu9n>KCFde$WD<r7u
zprC2tosjC;K+%1f*H2Rp{d&-nls%K%nS%XD5nA*UA6rtHWUC6h$-0=fO2gYbYJP*Q
zDUJ&T+`s4FUc7y^Ir|;}+R5O|Xol1|-(WE+oBxYvWv%iP5<r_#qG=!wi|jvH6#21Y
z8J(nT`?&t&p!6!8`JJgBA}F2th31PP)A*jJ;W3fJH$#6pMRU5+2Pv84MfNu|D!-k^
zv^}(ImS~+mbewk+KJ7UWh$4Cz`zi})!=v+@^LUH%#76l)$rdpdPW6|;;yfqbk?9}B
zl(i`uWg3p|JU;N9*3<r*du44#Y?s|Ccc%Ui22EgVc$Fw3ljZi6q?n}6|65ig>^lMc
zjLq&X0~-oX$DsY0+xgGiP<8l-VXW=}h1Mxr3TOo20|*qjw(2tZUB<$Pz7Qp)m6j_W
zj`d-nVp`ewm1CUHpLzAva?HQr6}+BRsErFj;h#Nhk2#+YgsbTM5Ji3XEodQ7rL&6?
zp0nM$Wb8@KDcyCy`J$nb;VM75=^{bKkS1kgX{i8->NCw%<tAoj%k6L7Z1POanVZaa
z@mj=st@?e>xNKI+Yd0BJced*)1i90T)2JJ1i`byOF|alYD$_E({Lsi*aJ?sExA-4$
zJ~Zprfi-7EUmn!nwyh%WWZ+6`$ixabf($={IO~xDfPL?T63)<G%ACg-&KDMqtZ|aO
znC4gSE@H&%zN`(J2kmC1IIpm0mFMnsyUd|5%>{D5qpCdz*N2csvf1f5&X@ov{d~<L
zt+Ti_M8KPqw-8P^_qTjJT7y3?^<3}d8^ELH)+$?QzsggRXq7}Q-+lUPNJ8Rbe3<K7
zVR1MrQ0J`1`a-vm_hD{C-V%~g5&x3qqx%bj#YeQ(uG|bV{iPm)#5z?ZL0i_PJG}-6
z`B;2H3hO;Q{%}<@aIkA37qEawE^~q=J8YEc|6bIRVdIJ`A8JAHMj;lBOO>M3FZtnQ
z#M!Ok^QRAbKjiatyCh|UR`Bk&)>n<|kz??-;nv!@NRPf{at)OS3@}tGX~r5b3Sz*G
zllr`2lC6K<Fn(&kvUSrfAtLH5-nga4!-dUvIZirqbf-&!Bvt7@ig%^1G^cHz^|P?h
ziQ6)_0@5b=9^dL^7s7g1JkWHxZtc9!npIi^-$pu%YGK1dGzd06fP@mA6u-e_QP?&-
zKyaKF^)O66r7MZ7Z}XqU)O4_|y9Kb9)E^xn-_T-^ZCp3_TACez$pdYlw`1{UN&1}!
z%4*9V616vD*hLCp05S#-&FuN!XNSSC^~?g^ZV_6Y&wt!rqLJhXeeA;qeG4Zh>bDa<
zDvL3SFppqYZ&U5)w`&_f8RtbM=aSd^p{#%yL>fzVv$?L~Y(9-zG1Sn*zW|08{1_8A
z4kV~8__B(Os9-}9xY*+_F;rS$o(4GB2&IARZa43}i69oWnum4cRy|#*CR__zP`>DE
z#Z;7$(*4UX6~!MoL95~a?Y%@hD~;=bQ5KrdD^}nkYG>e*JYVA#)YLX*|DhH+A`2dY
zf*VH;pC4qdYJWr@ES|67yhK-#0b6L}!cDf_9VW=i6%UxmoHh9OOCup}8e5fTe6P84
zH43pTFcY&L;Ut;A8)`KC{hB6$<`gKU<iq>;_G||NJ~q4?;s5ad#T7+K>+9mOkikEQ
zqZ1G&W`1ILge6=f8N9pda00L`m18Nr*i(PdXm->N6KSn)_lG*?gto<{)j{;4g(1x$
zEQ&j{;1_T2_87R-kBZ7xUWaa<paJ!Z)o5Lvom9p;(IRk?y03e_GX{WU;8wNIe^DR)
zbw|e*xpvcv0ZL54XSKN>VYI&8%-o@*&QFo#FFw5V?Q_0MBRl>@qQq{-jHcsJC0EbV
z#;P_|=qIE-N#)Y|J@(jcx|#O8zvIJ_I(*gRLOPhCCXT%W0*$3F1mgug_P=zfu}5=#
zy1&kXk!BUvb)$;Rb_&Zgcsf(-*k;)*tF9c5+uUjjkJ&ACrn)N#80Xdy5+;5lO>T?K
zXgHy_VRG;)rah-!i*f$Z76DBLpS>j@0K-?;T+In-PRo^>vHzj{BYmi)VHF;&C-%J(
z7;t^=n3%OmSNWqn>L2Xd{qA#X&clLk$FA++E3I+C(^XFYT4Gr~M@rM2R4P5p{P~yc
zl<kh=;B=AL2VLFquGzTQ1MhW$>c?(%qho${EH1Omj(Bph1Qi?51ne4#OKJuH%wETM
zydg+XB`L6T8tn<F>W1}-l2E97uv3&xwG!Ic&2|3Ad}H)howzjM9phF`pl!k6n^xPW
zul8x*|5-KnMgB*2{!yTJwDv##bCs;0m!qVLj)ORUuY6V>d}S<u{tF!tBL6(#AU67P
z+hYa=*tZkMeRZ!zE%SLpSs*K&_g7VbJWmTfdR@b0(N?b1x%_D!?>tX$?*U7e-kDlK
z*l>yg!I!zN-Xpf?GwW*J?jJn!`x;a;`R6=;|5DeU>Ez}XofjfPvna)Ti(U-x1$<fG
z_H7hKj`UG7C=OE^K<$}Gb;>F;s}Cc*CjBd4Tp9*O&^mVAfmK%Hj;2&#Z(n1j_L3|f
zrK##X`Go@Qg*p$2l2Dw5X_kC1i;d{Z1DP1Zv|BooYW*#;p?uJri~u*=R2C4#6Azr&
zxE<*b;O8gxdf54e)&jk0|BKfN>i_%h*LyPbv&>cpi66esl`QonU~o5aEE@sgMM$fP
zd0PgPips|;ZC>N5Ds3;ONoJ;D)1OH=XcgnlC9uM%4_3OKJ-Yg*o{OTKtJC&EjANdq
zxxT-qsK{a^v3`-ww>yvZXXiHH$``YL)PlL`YJm<k;SqQd_8_P<!`vyXU3?%EJ8zE^
z$4?h6CB+^|GP`zoLpRD8JS#<vX)q<T7pcFy?AaTswS`*KiptBQ(WG1OA=XAT;*EMS
zT~nOH>h{u(LuzN|c_tdJsZIC$W4H>)vQn~@;0BA}4X5(JBo?p|00B&Ds0HyruuFX)
z?bBAkgN*A<Gv_(~_7NaAD%OI6_3Ky?p-Z^2s6wmpm0LuKcbb_;e!ds&PEIK-V#=h^
z=l7U{RegQ-cXj!M0{DQOU-Dztl%H1Bnoy%Gn@BZtfU+g$b=gI!!qW^G&`wf>{ZGRD
z@vqfo|6i;O73O{#n&1O|$YA;~n@eSZ5;vuPD|A?&q;givOe{ai`0NqR%W&);*MKvy
zcg!In&HmB)`_XkSn5A~_R;}NGApY=78wf>(ZO5EbNs$GP`K1W-6QuPzHGpZJjk=b^
zQv4Moyc@JsM_&MrpDIy)JWmCGxEdkm$LZ+=sqJ7KkngbC?DQ2|X%IQoH7i#2qW&jr
z-nbvP5avBj=5-kGQfr}Z?Y}5&8*_GdY>SJq#-Q)B4IK>WoynSMzC-pQ!cmS^<>8`K
zh9RA<heTG4P{{vZ!juw$KsEbyZ2&PC#7A8tn@<@0-L>qyt1kz)8zPlt=sN2K`+u(l
zb_qD>)Qp!ZtJ!s0X!d`NPnk0`$TD!mAJ$hlGMrMekD6QzFswg`w}Ln|2ZpeKks)zQ
zgd^=gUuZPrsDg_=Haaz8L0I(PhO$0I%mr#m-Dl@4PxHBvG8t3FgFbnAQ^nJ7wDC^2
zB*J>|qd6?!C4HT@^o?(&KgSw#4a%!9pWME$>4`Gkt;Qf{oJdokY5vW4Nc^Fej+nZE
zD=d?Q5_I@SBkVok_J$^}9h%|rzw|RaY%(*ERlOZ#j*7W2#UDN`Syvp<-a95SVNt5s
zCu2&eU7@`0U6-L1S7<d)*O*PdWblxz^*ZIj17%MKu<d-v$1yZCG=FgFHmp$2{_t8~
zM@H0MVw0x9A2;Vc-Z26sxwoU~11}b9oy3&Zlf!`K6rw6XBXzi!ct{Evp7|-e)NHQ?
zl}D<Twseb%+>+K7kNE_T0}BgnDViEdKV$V$T_2be<5W1(Rt2Amqziv6*uvdS9i$r!
zj>1f*b5s$|y|RzpSq?0$0W5J~N)Q~_-N#T>wzO)?Np`>YR>aL`DENxFuk}(hM)l>E
z3VBo;7rW;CrgCdOo3XT9<`m6uzd}Y$;ofH+qGBoJM?!cbV57=k4`*5rVH+WWk@@z|
z>uf-c8~$M+;_2L>HwW4RE`pC!kktRQXi{flYgAAPwob}8vaJ0J3NVA6Su0(h_1`e*
zY4TLMBP}!8zfarj?Gp==;F6*tW1VCTet&2n+QoK1nUaim!3!Kv0EP`kVI@GtK&m1@
z?2lF}G~QqAv$dh*yi;9W<qki&d3b+csm=>Em;bg1-@o2Jvx=hGkt{mirU8JQA*$dz
zDotR<CBXpnUvp6SBRVu@p_-r;rzRcXqz6`A(hi#uZu>>)b|HlU(+%%q>Uy{<q9P{$
za~hu869pC}O5H~9gR84`&in6~T2WMWZW`XR|9I8@Vg6d6+*X_Igom$d<49+$*og#o
zrhtMCT^(a4`SSt!hfMxGCd%C)-H*XWn|Xa9+&pKDch9#~pRsWWFtk_BDa*+xi)okK
zXDXBpG_Ehiw2I1f!zq<{&Mj+JW2L*!h({w(1ljM-W>InXnzFA?j<5jxb?@_^GRpOk
z+R${A^OkiFwTKZF<>!1(htzF*?{|-v{`|Q=9+&t<Z+px)vbwF&krDx2v?^Qd%}SJh
zbZ*vI&$HL6e#FpR)ua#IZE;yx;ny$t?Q$$t98t)gpp|ozCK2_~*T~TjRHNyCUH&!k
zqs23<P!<vyi{z#oMKPdJjIi;d0qLNSj?tMoQ}m<N5eIujbW!n^!1ocmcj*PXkt=lC
zlBy(O+Rs!U#mHji%f4W=KnVBYMqk(gg8l8fO<dlkpzxhAPLOf7zMiW(aPGW2oS0Kf
z9Xw98hX|^9!-_IGeX$QgXMXILnLGIUo=6L=7Y1*6KhaKz{d#!+tbp0#6XmsfWSsNt
zCCO7yM|t4fo6Vz)Q&fi0#A+qNsy9O)PbNJA^$p(d^P?{9fT4!M%j)oj9>2-o2-rt0
zo{JO@b3{!)5f?yy(11mA4xu?6xS##@Cf!(1Y^VA*A$*{mo+MgTUVGVieUz_vPU(*K
zK|Q>t6|zNF%J}WUPb8jXJ?bNj5WcftiCQz+C__;}V%P;3kzVVpZ`GkxP{I>~43{RJ
zx%Q)j?!Hbdjk|zOKIYv2TeLs<Wc!wu1`REgL{Z)9h&4-2Q&l3!dG_Y-NwtMb<*t(l
zv0AdBMZEdu_EBiKM8ppPED)?wBZ(4A&#eJ-t;J7!nclAcQ`Z%H!lxDhDP4EvrnrMU
z4!$=choEuP5^ChJ6f%(J-PVxMjq#B<dQKF(%aUz(3nnB4#LX}B1Ri9wlQ~_R_SWFk
zu=1)K(b}A<(sgFPTZ~Ms)lyWUb+)mwkRDt$fC;fq^vYabfKCK-#_HhWIv`-s4g@N5
zcVna5cnza_7`k-;nfT~8AUx%wU9oq5(x+pJ2O!R*!qm9rK5L8o&;V`XGZ1BBGgmXe
z<jb5keXrZeDc^jO+Gxu>VD2k+tR|i5T%a&|Ng3-<kv&`fW_r_7Jq{ol3Cc|jUG%o1
zY1Po!bQUr;Zxs;*y<>{>LlNFYr}p7QoK+aC=(aY6*Zhf=rfER~fj|5Szz;sp2_VCC
zRg#a>#kOIMOlAp{Kl?kQD@6XqE*ZUym-4R@k&A<xX9wOpS6v@|>Vx2Nf|;2Y9d1@?
z5gDbN3WE{x)*fGIHr``*tDrE<pfjMtweYkxJz!~1pnLWQh$Vs`GCJr6KjkfOvWKV!
zi3cDDBGDqOto}?Cxm)hsIHMn4BO8M2_+8)kRLBdbeoAG34{?@Cnjme;4ln1{wh1NB
zi({r7HDzliz5$W%^7HsMwNZH+NfvYs(=y7E|FPQfqc@_RooC7vLW6q7R()>2UGx5b
z?h7dTKc4J6#r`+1O${KxK{)$9E1BkI6U~;QBJ&eNKzv5h8NJvIIG{WJAf1QEl;12>
zM*^3Au!T*BbEB*}PM~4%qQ+`nF))|^j9_$`^0oyP%nTjDJu5*mUZCXkxVzXb94>`*
z2HtHyP!nIuWRfojl{rutQpI;=5)XF$nMnC?uSb`5X1hl({*UEzN+!2?`ul-ag%Ivc
z7o1tkM{)0JVy4nWE{zq%%a5`7@-}3W2tl@PszQDxA%5RR-b{HhMZTL*Eu!C(%!Zo0
zDq5={*knDL*HWFCNN1!hYA_6|7uXO$qeulnY3MooV9r3~j?3IWmGf?3=)$3SF5&x~
zJc9z2ker|vW1ne4><T80hW8S>w_aK2%zUftfd}>;CeZd3?i7v!z_h8a_`3+03CJ-*
z+i(<6CuOck=CXshA5b`p$d~gw&%S~#&DZz`YATT5=J2mioPl5lZO}!(c%IIUd|Yka
zxSSmxLq6AANTK;&QnU_Zm074|Bk!=+2Sw)hP{*O9$qE=@YN3`=ouyhS*@z^Aqik1K
zo?Sh@lwvCI4K4f+Gd>&y%3~!&4L2jj7rpvSZmW&dZfXGU(y7gwtB^p}p(2Y}pWRUb
zhA;4Tl6Mu7i48>X^cv!!GP2sytl`#V!^LKOY_7A`P`P6~aLQVTVb?Y>Ixx`zpbu5h
zZdf<LKPr(1pZ|D1RP747n*Mc#PkSj?@T8HFyz7WGh^_*Mf#)JTg@>$IAPkIAkWZ*>
z8_!6_>t76?vePhOMA}PQJbrJ|%15**vt~S~qJD<J)BYA(^JGStTunK6m-q?@PI>=A
zUb2awEo}g)FVn^pJs6xJ>aF?3bC<+#J4#@MC_<LJ<!Q3isdBEOo>!Zo9d(D$4Dl&w
zC@K(Qc(l9_^>?1AUcS~V>ieuXYn*((#Obj?u&2oO>*8G)(4K{$`F&%EHi!=suDu(4
zgQ;Ptc~sPL-uZyXD_WiQ{P%2RLrwHuO0yQY|G_;~3_u!8o@dZ9SHk52&GV{&@<Rbj
z`J(DY#m~dKXa>n-c|~sZ8eJv}DC;6ITUIjO10;3{0d1)#XQk0v>U||=3SJydbKs+a
zdslX#R>#_8GGy-wl*CNJ>GO1Caj2j<m3%~W<@{N1@2ju&{sT2QJ`PLuz}A`-(5!Ql
zkT#Gh3wtS0X_#kL+SUXe<QvCzBzU>U`qJ<{O#+oGvDoN(nZYk2#|er4%D}V-jpI+G
zLc+3e5vsS-T|M?7*aL1bas%|%&zsGjy5mJNzO=e;FlZ@`4Kc^e`9qoCkEgLR$E#~7
zMQB>>XVfa*rk~sC2=r~_#ooPxl_Nur<LO<=0oz`){l{!g(4YTF8|9}kd=LSuU_+Ct
zSWk=;S%~WFQAA|U?Xq@C6+Uf=c<2Fm;n3!vkTm@LIlX>SXIJEi)0tH1@T!>(@;1-=
z+zlvUe0I4uN;%<Lvlz?3oFm~h<Qx(a>Ky*(=8F)wdBQ3)d!20NkJWDH;v)irvHc4P
zV|2aA&xdwx4a2PHvW8-kp;Zg%tJi{3Ec=tSl-6<O<<*h!?!7NaloN}DXc5ayL#M8;
z{Z49`)p;7<KydmH&nWFN^#yQX56h&1vtT3lC%+%{zo{-@=d9s!CVhHKSwb6vuNTr$
z{J9;wel%T;WLKMxR|VQb5VcgMT~J*zza+Z2AL-7q8%n@e2~5?vui$<*=5r9*aC!j%
z6X8Z>VyrGdNl<Ape(~jebH=0m{5F_^(+_u0a+wx;pVm_NKm5nn#U=ibj@!?$b^D&~
z%ARV!|1v-P8zEu^(Z1ufqqc4IRz{ebeaAX4@Z>Eqe>C&{;L&<LXg^QTmGc1nx2JzU
z|AVWffO6lSh8jyVn{o;VSkvn!4mjk0VfYq<^}}s3gEm@8F9hke3ItA>ftOYpi-of1
zzY-l-AaA9Z160b|Mr#>97GbaGVt#HNvdqcsOow`D@4tn5((biAuA*=TdzOQu)o60S
zj%1xSoTI-+!*WK+M~T(}MamDJ7!dN?WkQTUI9VTGmtgzTJkxkh{Hx{OtWnG$XNg=?
zS$Sc^!8zXh!@+rCF6l=$<XO>5h6gg1#|N|B6Fqd0%;KN8PSl}V)?Rz-Mgz?2-n{Zk
zs`_N1O(n#Hxzx)VPx>`6XvOcI2NV)rrEqMt3~_tgy9$Vy@!|~JUG66b9_nd-{|!T?
zZ=7~?(TuPUVMZXB^tW$Qgb{Cw(l&qS38<1nOd8(DK7XHj7jMA-hAGHUDxt0~SV?BF
z&H{pLN{o>9dE?QSo)#(n^;#_mFbKq&d@j6Myi!|dm>+1s_zTd;b7D4+HnxO$ES!f5
zjbI!#qpP89zgG+mb<*66*^{1NoK-^8iZuTrDP~I(kA?Z4^tcLH=e+ptpLJ|YynyY|
zN06jA(v*hY+M$Y+3s!PCd_Buwbfho_P(=}?ro}Q!+<K~oQ!=~f#w*3CmoV=O+xVRy
ze#wo9ef}to&vF`mg7vNofHJ3*UNvwR#4!x)zLsCpDkrG(A4jdZDBSJR;rcyZVGP{h
zE?{t`tBqxf%)gv`g>&%gfyuX;xSph*d)$lcI15XeKbGTqqB}v3`oCZ(d@<ofSeNB`
zsYK_uCBvQA*i*Thw%2A*-PJJKG8q#c%{_O4orKBzXnO$YN0WLPd8!OKJ?xh-EYL2-
z!3G8M=-Y6)kWkT7<Ww*&Q8N3J=ZsRDG-kiMHABO&m0arD8Z*L}yj|hJN(H}-SFV*8
z0>2qIwRVso9k)I%n5>oyL30|8PqAR*OH%yBqerH2{GAilDSl|2?kf9TdYo~~-!dfO
z=TWmrl8^nMyHh}cFzx$t!ze|Vc2P79{Op14!@NKZaXSdS0fxf*=zhH4A_DEwz`q9w
z3&19qQct3NKhm?_Rfwu43AXDh(=*2w@{_@K?&DaOj^nlI)k?KP2^w>1fB&rfxp8$C
z0nq5TY1kaEIk}>|+j#1Rofi-of?VYIyg1#iAbyJ1W?PicGxCnb;;CZy&iblPO-KZM
z?y3L&E}9_5e;dz)nNO8Y3%nQ-z?;+UBJppvjNc1~)dQrs7SHF48~8aIR5L~UEJ?(>
zTg<-sl)#=%j-VM5`rvoI`o9&Luz$KHeVcLjLt)cn@Op7ul@NkosS@PV9V8qebb@r)
z!jV8Sxfz4Zi+t7wXbmqvzX1ex3R5D;vAmzfVXhLo;n2w+3!zld;hqvIjpx2}3k+g<
znFKCMNRx2qIPN|Bn4zwyp89j)D&^s}{v{1I_(tcF-GgF}%QMC0$F?<q&5R1q_S3(S
zUBEVE<zyVlyfZm7M0QDjfbQdY`%pa<^4(9Mi%=%r;9#VJX;Dpk=n6Vrk1-7;)hf}b
zK4sj;QGFYojUd!{nV+NwJ?`g+6N7<>cA8b!Qx*dakc9Ksci7-E!M3L_gD2B(eAAY<
z4y?b0#(CYG==2ZsnbsIEfQ12BPhbVg_`*yMqrlviso(8k6YH{w>cV_yiu+AfLzc);
ztep;C04#-76KrpFeUx3tXF;!=SDhRhHTU&xz3e<0<d^$0DOvAv4*!UbCY&73if`wk
zGq9Cv!7(Y&=^2X$hF|4;>|S-uK)%?Q=CTi30e_WyCmzdx*i8iCzfm*9!NLKJS=~Yl
zy_GMk4l-noF4m%kQ)}f5$#wqb=+nuJfEI?yv69mel^-?rPZCU*Dyp=WEoKis_WliZ
zW<@NmVmCSndvpgS3@!hLS;`q&xuc-c0H4v3wrK)996yd1G_hPF8s^?EUt(%ktA196
z=akqHqR-3Zw@hnJ?kzTc++onybFlw}CGTR8bDN<qK!&n+cQ3vnKA2?WdtwA;rNzJq
zZZ}m?d%)|SUiS&h^4xpli?|!PdfKm=@B^gO^PkSuGrIG@`ZSk;rsu&o4x6_hW?Ms*
zZ66c^j1LNl(X0}gKk<k@R54Y4nkd2pKlo6`)WfDp;gxk;6}Y8eYHzJM*+FOX^F$I2
z+7`9OY*@_=t2h`-U8}wJDn^qK7$;l~sT|b-FbG^hmnwj_ZN}!gNc93^j^|hq?Nbry
zk3mWvVR;JBtl5M&c9}l9I<0|S2+dI=!wkO|8xa&BcSeB@>ga&pz(=wBi$`uqeVe;}
z|EBQWjBs#OL^#X9WIzsGcOK_cci9eW&IU*4$Yj__BH;*l_%UpYW3LbG6&j7fAL_^F
zA!Y4VED>~xhwMVZx8)POzc*(mv*y7P>k=%#+lVKUCoL<r$TY6;)~CG`0HZzhSxLo9
z4OW{q8Aj;4;xtUo=H~C91+wthP@Jk&+|Rc!@ge2g6EbdQ3-vd&An}qYoTN+_ZLAi%
zv+EGT%H{sqrO&}_2uj$3gNw&bhhB}|UYBP;-~g^V#zdrFgfP2S)yc&E^UnV3`Yt&q
zgq!|rNbape=Oep8YuoLAXsZIyTYl}(LRAki4Kco*J8DM?*-J<537zJ{69**;R$v7*
z(P{VDP=N&vXL5v;_wb8BGS`0I)87*92-~~+McPs3exLQP9^Lxq*i1qsj(#{jaOB7C
zji#Id(&|G1#rp5@??8Pvq#Y!|2}3YcgJm})l?Htm*9J}?=`sFu33di9T_Qc?23R~=
zp9(jMkEiNSnhCnBlokr9BY>1wlDZ!-5Kj1L6`*a1c{o@5^4*gSFpXHr&cK%dVDYBk
z&9v#h<YyjIKJ9)rIIj3`$u)aMis6rNQaBgEPw2*H^9}xZs%(uQrMjzTyejYNF*1X?
zqW8LXbH~5xwKz0Um|n;Aa;@%6()#rKjqkony-#KD>`bjpeYyK3-`Q=LZ5KV3E!?Oj
zSB;tibYu;J-TCi=1py#VJg!H0qygu1jB_o6PN}KDI4eo6gE_Nt(A9AJRR3+^0IRxd
z5t9w1X4i+b5{t{G;<(!}o0RYue8)~Am6Fp*(;mP#1USQjFj>FzZM2S!BSJVjsYVen
z%T~&ddq2AV>fTlgv^~7M8m<4O33Omj5u@PQti#MgJ#na9F#J7upcixq=S-?+4tfaW
zEXI_UX6xf}{aLPivzP8%wRjoN<4flkk^SvWAJs`e)mM*-()FLVwQm?7_|4`N6^1#N
zA=_VwrsueOfffxQrtZo-u9N88KGl`p=AIHKZ?fb&jmk8Cu7~E%ajg#R)>41Y%^OUq
zSE|m3vQhm@d!v0M2aQwM%DxuE#}3YaqAHs?$XUU_ZSE`{7Z)dh>!m9WdM%)h0w?Be
zMy$ddd@f&m3Cr9Mj0R*Rb@ZT%q-Nwe4LrUzdG%?6MT}z7h2y`A(sZ;<gFnjJb?w}D
z-k-%i!@KEZoe5Ek{4X(AKm5nif*M21Il-RFM_v-;Zq2nQ(Z$U_)2UnP=74oth7h|)
z31+RYULLV?8X2b1=*gLiK38aC+54a}ZGdvyzzCj#Uu)pNpMj!h0@QyCCO;sTST|sJ
zj>J(QQ!O1c?sW?*cxIQ{X_YAvUx&15kMHIVg=g374P$V^V6C6Kc~Qz2H_-yrGdc(B
zPv5nMX!Wb)?8xhbU3d`Ix(;U?B69Ar03~k9{O5H?e=~?>P(NBp;f}H{s&UQwG41fz
zkcoFvE;Wu(1IOaWVL%PPkQ^Nbc$E(Wh2!y8$`{{CPCvf*ErVzq^R89sKnWy81mjr`
z)k6*g>+!Z&cx43(Gtk|=DSwLSG1=`u7a<N_rHnum0TOFw{l50BYNIEF<$GF%VmpZY
zqEVI1ePEcXo`q*j&(7kmrrf7}!C=_|HwgBB1jro28(eIt%)E0U)9K9?p#=(d7a0(p
z3B;iPP1O2=*<6DKcla><3I5`_apZ3FUfpP@tMHFnBkrR0kaT1}ddYi>l)HD3Vkunz
zjHGL962ckT_#&lKJ8CsSrK;YmK2HCSkC53CUUJ89cq!oc>K1*01<EqBBTvIm`7YEZ
zELZMpD|aEs<9>D+9W1_@B$YkZ(=H)J4Amgx$A3iUg;Ltni;TXfrN>te)Gxfwy+2hc
zKD7h9{u4F1ukqcsvp?!EIE`Bsee$=?e7+yi-VI9V$Zx>^{@;2lk-81(1o-n9vl-ip
z+(^zh+$c6mJ=sPoj;Nn^Yo<3ZD#y)3c6<Jrh<AlXDBc@3>vR1LsC%XVtucQp?w&vu
zw5@Lf#I&$qJLRH)$aGoHeGue72^$b+Z#`ZxJi09*O6WrK?E@o4_PEZ_ydFoWy&nC;
zA163v*{NI5U!5;ks7RR`MGT>(Kmtb_77=4^d8;YBD7}+%wM##;3=dV&RXoF{if=AY
z*0#08PLkJ39bE-d%dr49m(<Sfj?8g9$~`N+2pn>mNZJ$-DkJJtw`+yO+-YE6EHbf~
zCx#diP^$T=Xy5nh>521_<?;uT@aFiM(a@Hmz21ecN|z!tPhefYSjHby+^)gZ@vRf;
zy2><1GXTnUOkYs~<rL8^@Br(NQ3B>euZNv<-j%{r5{S~nL&EEh6Kw9~Q>zv`5SstN
zcMseT{umele7NQhuU_t!tt)qlPG^#q+fSTQBc`(-i|rN7R|PWMU0miif1e=3cSZis
z<A}S87ic@>U!RUx=h}54H5{-ThkPaxZ0NH!XV<%{%N@0(jV(`Ri}^VUtpGj^GdqLd
zH+<a1+!+L8bC`L8nRA_XrT0(@!b?WdBj*X56^CjXuMW)%Ee^)$(i?8m&bb2da&6ep
z0Q<Qhs4#&kvWxF6APutn>uw;<@%+cS_^wMix%$8(yxY(4d7EqW`Q^96Y>a)_>8aAv
zQpe2Z{qLN_N{0+g|7~SxOKq$4SMM}UPIUDLY9c=mJqkyGO#!hwaU`=o_i)S{2Wy7+
zK=nqA;^t7{&LDF4K7mO=XR+r~FID7Q8}(xOafuLJsVFnvxAr_r^NRqs<By@{IivL7
zi`oYNbH8{3HLp31ft;se23W&E4GtJMmM!yyi~yV92?p|*rIEGilY$m<%wg$R%`pY(
z3EP6`8nIJhQL|pO`Cb}U-U4z4n<d)QG|6SE@JwQBs}jZ_5-`1+F-Q{NhNh<_lUuIE
zb(1uJB0-TTfv`Qo30d!o+At_=ouI<~1y<J68N>ZiJeg{NztkTUjH`515KWQ6>kzLd
z5BsUHE3ZKu^!|n>zoLhBfq#e~ILyB`^|OVx&v8!+8F5P>*pTXy75DN2%nC-1Fy3bo
zhk?aLxA06`s&TAXvl6#7Q5%esz%-I!(F*7S=D(EHraQ}Qf$wQ4DJUjx@8$>P2tWR<
ze|qt@zL!OZ*42X@SwS#XOEK}VcH2qN+=N5%DY*xnLzvjs3myTS8c1Q_e*NqiPM*0)
z3*)oJfraGyy|Of}`qHqDQl=9;S+yT2()DBmGfMMOG2L_f*&*ha83|ri0g&ITNV*3J
zuy>M3vT*|q0j=JUF`BrMz`jA>LE)@{7k|(<fujA9<m&18uilwsq7Rz%TbcZ1&owVr
zZhpg07$6Nj2r-@IoEI-|Cx>2~pGKb_E(yvpFvWnrEt>>sBimWRzdZky$0#x4SQ0ag
zH8DY&@DB%+Cx&q8F~5VMf2Jddy+-&X$9lPKwzm$LgoMk56IP!EBqQu~m$JK&zRI9;
z$o+9_xecG48(1dTer*?fLnR!YvmCIDHQIy#uc5ZJC8+?3Aqbk2lbp~>I}|&=ah>(0
zIS{mXyj30X^Q=(cMjY1a+E%}-kBc|dfVt-<R^gWG1v|d2dD1mZX+EPinOL55f;8hA
zJ`<GDyej|fxHp>m2lr3d!+aIC@89wD9O|b7UFq$O{qsNWl{}XY8PL1isUfxV<A#Y_
z_pWuxmt}{?jI4Z{nx1aNCg?P%aNj>Cx6-bEx8#v4R$5zuQ-2C$4Fdc>BH0TV5+*CW
zmP<Ne=DuyIsg{zoQ_e|vx<xF+tT^4*yuCTNt5;VU88n3HM8&U`+*_P}Kk<WQ^Ry~7
zEn5Ib)E`r)uxVx(L~|U8nKlFpSXdjO%!W#xFq()zTq~R|cR5(3awTnR_fp)BrTqAX
zdl%Pd_=HpSi{`Cb>F_B&>gt?Z?xOpCoA=gomDpI;5c2Pk&q|hN5W!afZjS(beYRZj
z>wS7eW>*)_%%?e#>?diQKYAh*&vSF<hr^ji(Z$o(Q4MWo>YvxJdJ+v^9*DXAB!&v6
zS5D4nn=k45E&(Yhmvc|YuXK_!3H>*7xqKh<<4wiCe+vvGfZ{GCS=F?R;8_dH_JL!%
zr_=^=UUsz>E$`gHTu^YJm5vM(3<u$$1ARbT$oFbeMlz}&QZq4iu4)QKrKthK#Ls+K
z?><~`p<}}s<Utio-}iWi9y2R?t;gzh))J}%afj3iUGO3mRC6@%X3WKI;LZOBC_&f0
zTLJ>jq^gm331GMEf4C3R^fLI(mK8#<-xpu+%>OZ>I%@vow9Idkg8fc(rsoqOm~X6c
z05J8lrKXCPl~7C7JP7wkbp6JJzV_7Wy8?F8?r|R$v1Ni^{grk4(1%9Z5<n>iDx!?Z
z5nyO3k+EoI=+o3$@cl=tQdZ{!e$6V{DV}y1%K$g3cev^VhpLX6<|asc0a!P5$WZ&z
zOD_dpm0SR_N(8h|ur2@v-3$m&5&^M&D4no3!^e~KulcPKWlQ_EQfmrGDcJ<Y*Jlt0
z8LO~7s)bm2LXh1Sh|2R^+RIgPEzXwyECc9@fO&fa%)7sW{lPQmF49+?e2X@>X2f<$
z7LQ`2|0G8G<8KrMfw;!MXTelqxJh9>F303pF;BpSw<mP&d&_~VhXufG;Q2i5KLl4U
zVP1?76nJKuGX(~Bz-&q-7C$11UG)@yH!3qq8&;WsmWE>0TD`J%-_lY8mY$NQ|5PP^
z<0^Q(n7dNHD}G@FHP#q_j(*hydEr&GrLzDa6CjSd9dMSglJ7~1`awlvv>%@{amv55
zRB65+*B?H2D5FFvS0Nv#<$%ErAXJh^#QK%^`utc@w5r~u@X!0(JTQwE57Y`Uq~&9Y
z8&(V~`qsG121x7s%(IuGS)Mp+^w`WNGJVbF7t{NKVGE-AL*DhQ@)yigL0J6!vA7(|
zQ<`g|5J>pLAB{RI#B$*3K?87lUgSUSFQfKv;nSZw(Y@bJ+%o_>2g=OMbum^UfM>wH
zYZ8TpVE*rLGxBiBikeaJ$lLbKv5rNsidJQj6{k!|)fb_X3@(<}xU;KQuVx+u3jo~I
zSFT*iL4d0~0Q-d60$}$7z`*MOCppfTd&EeLOn%)56#>s`@aDcEgv}6DGuw=muAX=;
zM6GuJ#ff0iy!fuHMNCY6G9W}cOmdpp!V8)KRQnmy@?il$02_j&R<M7-(!MAD=(U*E
zS5(u9Lclm=^Gj>|Lu%iXA<+-40l^)h%zwy&$t81!VsI2LR6I6wh12eVohJ9}-yhGQ
zKrnwWjiIDIi<b58b^t+vpZ*kZXeYQov3>ANmqc4aXDmf*6fJRjw!|C*$(@>&y2Pw~
zx?Uo3Pf}K<lrLDJ!9T%{19)MA^L<@T-ojk)Ak71S-{;Pq3oF-Fc>oRw(p~@|!t&Ku
zUzIur0LMa%^cPjqr}@m4={$Vu%;>oi1u7O?FCaB_WC1bV8+Dr1bZ0yLYDuYVEC>xy
zD+RODnl+bp5jmEJph5%EK&TdgNaEdwatEwnZ-9(`pZnw2=<Ro6E&x`>60!%zRK8&T
zA+P^<BQWPkXd~oEe=EAOv3(`&-&@0|2pVLJYKf2rcR<(1Q{UiD$U8nl0PuZYPJhe=
zlsO7pBtLNfm_G6^$H8<T@&hDyfL{?{m8JMk!;YHG|5m9}tn2yv%?YdHL8R~n>2Ew#
zL{;0W^#0}MJ#sMs5CRaNrnzcXc>oRwtP}<FN;k;ig02D}L!r2LQddUC{BAYxvT2Ya
zqq0@EX+`~;3rcXVKQ+!0;{awi9g%eGI;0LRf{PVjGLdDop%HT%F_%EnRKPoc(9P5G
zVFq0V1l~TJ|M^Y&+@G8eLO{W5PFeldQcnMp+820=O;+Ms>v3k>b{<59UPvq<WohFc
z7eZ_Sb;9a!?MBQLcpO#+sA(TdGKXL{>%g)>2nzfl$NT_l1t=6X7GkF6ZhT5U@vE}{
zU|DJp*l+o+%1|Af)o1J6RF|CYU-1TgHgEmLSxm{AAb61t_EUHDGQd)}0Md5AY5@q>
z5d)AQeTc*yjcJJOH8RXw#4vxNjDCi>Gi7(vAj0%~@5y{0%~KrAsH3nHD(#id>?~m-
zYFe^TTOi7o##V|QPHDOL05ktQEni&)D_6j+!*kDH3X%RdZ%(7>JuJr?1w4$5ej{l9
zBlZ1H*3u4Z2`fd&+PH{RK(^QX23wh^s#QvgLDwh90=bF(xpZmL_4Vircwp|p<ss&-
z)&b0kqXhrzV<+hS4^G3%0GZ(!V*--7H4BqH!+X?;g5VHiRT|?awE||iu~EjTqTOyc
z!d9o*^M>4*dD_GZ2m;WN2LQVPY9>G^so+(sL_h}s$;Y2v002z{R9OHdn{smSkq_GF
zGjV(L26~H(DwHzyT?0~`TPs0cE6{0GJJU@^AnkMP`cQs}>4T7$U`rd+>(jCTw0sc9
zp$P&j0Wt)xTmgHBr@#4jXI_6N39eBHrI3_oE&2O9&3_yN{i4}_oFe^4lryA-nk=ga
zNbk3?_V@5lT4kva>CkTX_rsTtc2MA6O#iSBv@@D6KYaRAC+XCQogf5YX<&#8Ncf;R
zq^v?jRB&l$XEO+>4!782qYW)k$aaP#A!LP#tn$@j@A03$!pDR74fiv?9kOjA20)cl
z-{r-o&I>I7z^`I00B6sh6<r0OP@-mQb7LW8Ehd>wI<d6V`fFq?<|XP05^RnZ$@CAV
z){qpaA%u|Wn>aQ<#MGtvKFdf#q2=p_HZ8!!!Q_G~V6`e>J|Nid^MCg9PNTjO-9%D$
zY10gU!QVeVVUBB=`Actt^bEG?sux6CLbC_%-%kh}w-Jt+0^fM^=m`OPG5zbZz(w5f
z=O6&-g8$&(o$TIkD=I;q1pwC2nOhy;#Y@E%P5$E<v@|_VHJ(zcLzN_T?b>opd}XRE
zT|{177zT|2ST<N1rxPpy=gysDy9z)dMwR<0QiV)>*b4)-sYf-MymExvjMXt`qn+16
zf+d#04ItaDj%Zo%5^eqLe1V}Qn=ThXGn%&>APD1b@dC#kLa<-w?}y(%2j;=mG>Qy1
znEOZ3=RZP}f7-@3m{zSCFJ2pJX=gRG1O$KI3*nTS(|DE;6xg{Rp8Ce<$OQ!gXXlyz
z74MHi;NLvJ%7BlbT@S&4SRNQRN=pKLrEDp_Kv>}9DypI9b)_LN8o&7PaK*=-A|-u%
zmpSd^95k^FX93VeK(yen=mJ1ar3?#zrUJH*_X4OinEIDRf@B?Xzl)^U*RHLW?-0op
z>{}Re2ZPCVXXmCX6D<wi-)aeOU5`!>M@$zVzvcqQ8*BgP1^lURbjVV0oZ;=ax9Ia<
z{5f5@yq)b?fw3m-dNR>%eB)BTafqlMky{>{poQ0inpMM6)zYlgnx<m06EM<E^mS)C
zPC<c3LkQ4iu0zZnxDK4h{i722HxC~Dr8WA<ua9G<!0=ac1hj-qBv6Cks?q!EcegaU
z1)g#DU8FVrwwB*k9puW|yS-9kR{=m-73tg677-R5`?3q5jsfVGB&SbX4JMk-{b!C^
zc+s^Za?H|Q)r!Q_C_Tno0{AStrQcE#{vhE>ToXIX^_93)p*jPhU8-nN&ABba?7h9j
zg1~OB0DvY4TJ0530KtA(<9`*A{<y%&<j-*@KF#U}%|BWJCh2dSHE@q#WyzEVr&9CJ
z6Hg&(;GxN2srN*WCVn8q>EPFx-n}%YbKhNl0RT7(-#>762!QMJG6Eh&_|1=O&<7rt
zurx4Q0j7@r=JO;NKxmC-+xVB`qogWd;bg`&D_qocT$FY*`B{+a^2qBgWt4eI_$O@!
z#G0KJdjUXM{p@EyyXgA>1z4IOz*Z_?48Cp#l!NZb2h&lyW>FxXd17Okxn5DXK{1mi
z#1o<+Sy1$*@e=?K5YPh2wx7U)lSwQJ0y%cUSR-n!UsB(epIk=R%@qJGNi7Adcz~GJ
z_o;8Z9e=Qz)JM^$53~Q;M#}CNGyDa{kG<55psD+0Se)1SQxMojwdkW|rlsbo*1Y=X
zT-s(U;Ko%7_=9!gCqKc;faU$Zz)j#NaCKhPKk$5+GydH!{F_glq*KTg*wH~)0m2f&
zS;EDH9@{(D!yXj109waj>2kar_+94(Fqu{@Ezb9eskI6r;PcGT`-+~MjDWd8yB#n9
z%Iar6^O>a}&r1;m2*99rlP7!zL|kGSpazLqs2Owq$Rb`aa7<b+`3pWjK<*Uf1U<0<
z^5|0z1x-Ncuto5V$n1^XlwE77ezHl4%F?Q;vqYA_E-|Pp0DjXU1`mx1t{&Zf^Ed*6
z{l5CQZ_#%;0ialeM#khHwSDaH7thzzy8q<!C)K#EZOmH7)N<ul#gw^2NhntQH<qp}
zk#eG$UM%pq%2}(@WwRQ$@ccjU(gp<rH=)(*?w2*#sQ+cq4i6eMC~$&IfynX~=6am(
znyUiW$9e6)57n-X@j{02TZ9#B87MbARy*SB=xr{dMhDXyi`SY62)h9yIUU^7FTM1V
zAQdo2=H)xRfUp#afOJh|=MNSPvaY$*B2;8LDkdEzSe_|oB9%UFZS5&SB0K3Zr#UrN
zfPy6HL@E&?Nwz?Q(Rdt|1cu$RlvuJQl*QLoUaTx_jMYh#=N|=9q6~gsz#rlP5Exhs
zea8V1?DvJv-~WRbujJq;tMzND-I(C#L|XcX=<-n^iej}X=?kg`C!=6cY4d6B6rq7h
z6Q`62Z6O;(z@<_u4T@LB(pg*?uuflpYWXq+0w*<SbqF*t@Tf-p?*$MP_}C}cgB4(^
z8wb%W2uIVMn=x-iVoQ-k8)b`39u&iwtv;657fPwY@^rn<87Fhqq&BZyhrQW2g%t-v
zA1nZ{R;zm&d(@azzzdDY6IyTp)9d^xK?pz)AOIEstgpr3BNDSi9=UnrovfWSr9z^z
z#LqBMHrh5BQ<PrH?gJ5rX<i!ihSSK*l@4+?aY={lTQ@J#WWB8WWpls0v^i&LGW{AZ
zUi3b64MbYD-St2u@i&wh1!IN#moHzw<>V<?ue*%!&O2N5w_ksou3X(Amys+903oRl
zc1-KWCYV+y+aVr+w>d5I(>*N!1(`r_@q1@d#a|ftc~7|Lt`&i`lrh&@&vKbT=qvoh
zU;K+HefpC`cY0&I+rbf-^%0{2ZEz@Wg_{0RW|`*XVbBpw+h2TSlm78VNn23IC8;|w
z|98Kqv*ZdCO3PZM;2sw`Z7sCU^{VB_>hwk!FiM@&q5tfE_Mp0f?+NDLDWxtxt>ahH
zvmP%XEW7}q02ct_03Lbd5xH^W#;g+n#u$7x8J{@$UQbt~@d=ecB}A|SL3Yh&1gi*H
zOK6Z*{l08XK({vErODboaVger#C511vVDoS?!Vt_#Trz%Y|D(!oH>Ktw1RX!v@I&!
zKPZEDyVXFd`|rP>mI+?$H2$Z*b%{2&PSE-q%;Zsf32A$i<{uOLtam88mQDVF@y{ya
z2vEyccOCv#05$@0#FMBYF!<9rcT`7HLTvbaB}lt^9YJ^9%2o=Pf`9Ph7Jc$}@4S)n
zR&WFiAO8OLzyJMnm;CKkW>Mj==*YkMo$f314f^qqDOs#0!3A_;0~ubqEr1d=l)5%P
zWhz|>5rYaMH-yw$pfHLW1fp(C9Lq^7)S}v7thT(!I|42PHip5|Wq`;7fNd8Cijk(6
zG`6C2H0DRu(}jw8$l`>x+|>2ZJmEr{Vq!yR^%{O773*|ovqDxXzZ-&pTfvYlR92up
zN}vIM^Wp(e2EreR6S4&!Re0vvi=EH^0&U*JI4JXvnf*8<_~}gkYbR1xKlq-bK;W!S
zIjA&Mn$4kgq&_(uEP;CDh^-z)%8g`I%~OlBWKBK7Y>73Sb%I3@*EU9U{^#R`4hjVB
z0SA%nXNa+DU?A}II4t?Q75LX5J3;rqXBJ!mTU(by@Lv!pTM7`_Yei}lgQM&<z=+}O
zaZU!5I;yR(whVjS5de3k3P+j=5Eg)rN5vj?kX8aLG>KjYjC7x+&IAYu<|*;Gp-+|%
zXSikkv29*`Qonn?Q2wYoa(Ub`X<bScn=%h{MUaIpZV@a!jmp(;+OPsmaY;~?Xpfo{
zfOr6C9B}ar<p+QtRef+rZDjz0{l4<#TlAf0F2-*RbH1AVN8`l&V@5wP{ut>GYYBYI
zYWhr&316w78`m<_c<$;bLbfmCtpp_tr<ouPfxvy+d@U6U@47}W{NtEj{_)}=IB+hx
zAO{&c-Vc_l<L>t#0t5vjUGQYQMcZ4jMsdm%s9Q@9Jy~h$dezcurTJQ>fzJ*qfuI(S
zGrcyQN>-7`Vmd}&=crkRl>iIx0#J=WT>#h(P?rG$V1@~afB?!UEs-fy#ZW$~7;Qzs
zYV2r9q^1tBZJTn|#UMrP3I1Aabs0>r-0wnv5EA2OUD05>e#E$c0{Hi#Vd=7}7QjV-
zZx13nPYYWnK(OCme(4Rma6y-hrr=ZE&?oKahe-br<v&>`-85yCf&&9;O5duf&_o$a
z02@MR6Iz7STo<R>Q-9vs@>(p-*&saNb!Ny9FzYl$=}hy39N~@vm_Kk1xVJVZNC6rm
z?jQARfBOLh1^&(_M)ajGOJM$^(Ue$5q#|U1%+Sq)&f;3EPc70d^DYdK&q1<km9ePZ
zhd=NnSO9dTng*^fIxGdn)U|*b1PIV&fY!vqj>t%&JJr{gp>IC$#e0ayeWEBW4Zd2N
znpX};2-<=Gkax$@c1ZfdhIU&T9aoWeG3erf1GFnOvkn0S1^j^^FS{iG%>VO$cAhT0
z-MNU2?J?};7kvFGr$3hXt#xMq!2IKeKBJV=zpd2oA|whKoNjAZgU@L>`>Z9#l|bIC
zAkB+7XG(uG$Z<Bq_QA3c+XpN2hgLr?2o+XI1S%iL_5QaCXYSkS+yVCnR{#P)!xF)q
z8zmKTR!}N9E7G<|NqMZv@OuQiJ#%_(CusxU*Q`d$%K))bO?LxaB?3ACNUH$YCIZ4>
zTgZC>DC5r@xmsG613}D3cSNtco|(UpvFQ^zDZ&yiRsD&wqrN7r9QOu@gl)@cMya5h
zS+mw84&$X~=Q8acLD0zRc~QvF#FkwMIDh^IeeRE6qbpapDW>Zk<;{H5%s<v6eEzTo
zaNJd|nA0`EPdZO^vm<sDu4m>=1qU775|7u4ha^>f<jaU>?l!w+5eRC#tCip-_Z@TP
z#`Q6M?dx|p-orBgd0IUp{_%HIclkR=z&OF>_v`=iB>l>VMqzni$eKFSoda!5Kw6+G
zEv9X8k(>DGGGM2CEOAM#uZMee+qP5Cc}H`saED;OhaY}8&m0H<%V3>dDi?s7{&9o_
z0Fd?qaJHg=IBx8V`Q81lRT8s)*@rU&Q*6=`D$IGvwpsf(P5(kHz}m^q?{zf<1#+*T
zO;v1;AcjdGDQ_Cx14k8L)`xi?%l_a;9WuG98)zBb0pEY_5<UIRcfxnp0Me-aj)K2`
zl2ZAO)B1kal)lB*9&rPeS)9!H!=CAinF}pq>NF~Y-{yC6d^YBa1xtK65mBu{=V&z-
zIrZ1Hhq7>eT)04V?%5qW`_Vh^Pv0x^N5sGG0=NqL+Z`}g%1D3nH%`zSZ(gT&FJUi$
zh%?iW8)BT$1N6_*T%x}HzoLjXzN@C`y=zjg<$4Vct8nZYJCf3cy#UUiKQDF#$)X1U
z0v0|MFb<juxFhm8(Bj>9-_7MZBoZ4M-)D!0k*YSL*R#gm0QSVM)XP=fQF|$TUWx^P
zjXaqX2~o5HbcijTq+2ygP7C8~d!jmZWkZBHlAuBWOm6d{{+H1b@bov|rf&r1KMPU*
zaZ1KWe@*NY{QiddN8y0U1=kr<s7r*^69$Ot-~h1#fx+iy6qdzHOCWoZg|>Y|u+8JS
z{cs^YE0(y&e*z-~Bj{WnFaC(>2QS{)Z@X9KuQ4mDi2uC-w113~3iTQkc<SUdSVu$B
zH0%v9n;J_Jz|dDIHmL{|uBk=L(&T)r5OudFi$0g@qtdPrRa94y2{7@h08my_48X#Z
z=Y>xNtmhciOP4N<5NH7xfLZ{Gf7qjeM+kGumA;$^T@>sr5v8VZP!K_N@bWvOiXo0A
zBAR?Z2$Afa6=3H^bwi&^?TNPvu>QkQBJJbj(FD!1wp-NyvIqf)^oPIy``>?;kO@tf
z*2QgnCm~K`GCom!{)qG+PXfV=%!NIECgN6Dzf>*iRq?m1<-iI7O*1Pw!J*H^8&7Z%
zplux@<$0@AS@E3m+$nPw0g%tXz;y9q{eFi7du9H1Rlq9Ze{TTozmB5_nF2p`B4!Dk
zO@liC^FtXlTPrPT#!64)vsh~7V<HmgiXyTWGfssZwKgs)cE3ID!2*Env~U4nH$cED
z2=D+PNmZ=NDG*?x%K)tfAQon0UZe^s%+k49LowHE&;}cg-W!+9V<dg60BfQNE_LQY
z>3SlzA^AmjZK_rPHuY5PWv$F$bXm)NXNck51|`C~G519m0uby6fB*TP-%La;YPib$
z<JP^MuRpBuFU+5LLV1sL^UVtm6oJ6niS%lXzsl%6!!^XO!Yj|ir4k0HnlI9O$j-~+
z640xh?`E85Q&XB7uz>7LC-k+aXNR6H_*R&|CZ<}u)2R!W6rlYBT8X^g`@on!es(<w
z0Xta`m<DS=afb>S88XvryT!1z+OqkJx9nW0h23;d<_N!(`BOf2g1J=pY0={Vmcjy{
z`zk`ZLLwlXpE+|T9FYnbF#z_1a{Bt^imNo8Sh7flaeM9GYAI~Y`Gs7Ot1tR-P5nvR
zkzTo2$7FIUY!akpEt#~nYb6#%-Th+%8hyPNAwZW1E>f}}1p7VlN3YW5E8F3lj?yMQ
z(CQ~?J%3!@7lnYr_{YVAR5)T!A5lDwvZ9dH(yeIeXF(|-HSxOzBrEz-{aC{?A+?Ai
zuD?pmb6>)I8AVZ{8>{O#q7ZOc0G!dKetTj5z{}&bz3+Y@WzrkG|Es^UK_C7ghrI%j
z-WvEavx=G&CQl@u@h5QXoNJ7%1lq=RW}aJDXMGmkkycj$0PHG&MPCW9RE{dU3=o5!
zb_>kkEStz{+9iD<p!dNIL9$Y#59U*;)2B(zC2Fdw(kZEn!FD3mHHvYVumaT5sqb@x
zI9JUb7uc(Yk20DpXedYfodg8?eg2C-@3bEXBq>inv5E9obN`9F&OgSAj54P$7SO^{
zxw0BjW|;BZTCnV&g+xcGx5`z@<nj#OSPHbwx<2!Z2k%j@C$@b{cvV2q-48EaTBCE%
z9=vb|Tv2oWyrq77wGgbp!#wTZ!oT|XNqYbLB*X(?c_4LGfUt3p%nD;Q3L2o`mQmJB
z0BZe=%hYPihaYuqX%2|kRKVfZTW`G;G66pN=%cJL0L!onU@4XXY7n6MSIoVTY7rkJ
zBS2BG2kWy$5uoRmP-bB6*Cn2yv2`cd=QBYq!bJ_y3UD%R6<jxC|HMCI`?}aN=Hb9z
zg#egj5kQDo5hXl#1CZ74>2F@3xWq3JowUr4gVw(h{r(*;V7))m`cg^&<Us8Aui{R)
z!r@BLjhFh?C5ul(6Gy!RR!Fsivp#C)Qq}EVndUR%uXC#(RFWh8@TD>R;KhRz0)QjH
z6$J9lGv}|E|LwzW+P{V0d+ZdQJh2@D0(Y=;Ac6wbb;Mp7YKfp$iDUgo#L#)8R5cj?
zJQfb#)8R=_LOTn9r0sxxFHCUYBEnLv0?^-Q&z=o209Xcy7yxw<fEhHIAyd=LV{RAI
zK{_gd%n0J7IIil^-^Pn%4F>6Vt%2pX^fcwb4r^e0t@VGqdT2h99%b085TL6FpxNE&
zt%MQmhs=KGzWXl4IF-26Z;XIGo&+s_Z5*}z=<iSY{5vgoJc_|&%Jdu#RmdTgizr>~
z$Fhj@gk+&ML#}aAWC+*17RIKp?)gR=E#ceZxqZ=WNlT@o@HC-kzc->c-rWBz0B&Fz
zA8=(}Am57l-#%#M|6biCVP!xk1n_7xy8{pp5azqckkQqhJI!AO$VzEbN?y}y$AGjB
zD$9;7q}>3k;;X#?ZX43zg%O8Y<=nY*&XQ=QE2}Nk%(%r=-B%Gq;G}7QchX|HCcQFj
zdSv_LOh~D)MY8ea6z%NX=+{S{1lqI8@OvyleYCsz`gH>#d|%`gxD&yFA+z5X&(n{8
z9QQN`oB0I4VD#&cg3lkpekaq8Xladq@b`~8Q)jHFFg>HKXWTX>^M=|jwY5ss?8<bi
z#LqHhUscVHtA@;(vrLe|bMvrUl}K&j+`c!tbI$(Nm>zjT-}*bb?Vv#50&oIjg|yT2
zmhfq80B~-_{BIqg{kNFsJU~$3zdpN>Tmf5QSzt)W!%RZ`1%NGqHk{`eFKN<lrO8}f
zuLa`v9H0;b02ctxmth|Oz%p0>)CC~A8(_)=$XcdDDlQ*f303)Qo>>;M<TW5$t`Iw^
zo}#K9i!h$aYpu35Uz@cb1ixnYhdtW8w&d%u5|3UTLdfF+&?eN^4-IYzz%W4IQ65Bm
zUfKP^+i!2t=l|?Ax^Mv*44Kr1K1vK;Gx|*?$m*B2@f{Vd-!8+mf}l`^xrUL}SubmC
zS}1petQ$;40iR={!duZ_{ah6D1Z<15VkL9-b=32-Cee1TxVkv*ka+#(g#Pv^qzk?^
zx(gftF2Irm+?W@%g|fi0+u4SC1tsg>?W~k|^wD+tjbEPxApjPDXa$&NocGF*kghOM
zT&{79q|opF^W|F4g<D1T10Uk$$g4CJaM}rw79Ex*4*;d{hm@t605t-WEF}zJk%CDo
zb{dIkpM2HCwJ>dIN{3chuwsRk8r|-FMx(2>9v}<+zdO?#l=EO$Evc5rh9FZ<yg8l#
zq5=&cX54vcg8|S$K<rU=-aZ4j3origN~ih17Aydov?rLW*)Q`0^N&KnS}^(Jp86t-
z=HAke4J3`ehnHMy&<2;O##1Xan`b1~W)5mPHOsKXGG;#?$duRXY}fxgb}#mc-g{-!
zZ6f+P@;hvVH14$cNoOV8yJbi7xdRvG1^WTOys$+zaBRi=ZyjJ|xfS1k4VX)>oxdL3
z06U$Mq^^L7BYuu(#=cB@$qJ^4H@G$ZCCM5SlM@$o4jAhJ0Pg~(Z}+2t3jlFyuvUk%
zQCL`L;=JfVfY}0|fI-II%Q)9u+aU&EHht9+GE3@+`hk3snP@>C?lsQ246~Ib3!<<Q
zFvK9E)|e&K#HS0nvjjf%63n4v3_H`CM3o&NqpJwjG1WqI7{oCG_-*08)j+O!08IpC
zv0=+W<_mb{+(r8GSKkP}|EQgEJZ7ywt?f_DUj>7blblJD<cN5NP3g=ZDc`pYy!LUD
z!R5?I$<6~Yc&XgpNUgPSkOYrxQAqW+^9NXhYp=J!<8v(_=t=;hB8<fClH>jful#gG
z&pki?TdEIV{r$j&c>rRMzx(dHZ<XZdHUaTB80Xu`kciE(zVOA@>9yCcceHUc>=Q7P
z?UW%<Nb~bcHCMzic%qEv4)MJ3z`c1EpOGX3r1{Bo8Chv@ODzBp0y?~6SgGy4HH#ht
zu;_aMNNq<+Yb`}H0cLP(Hv3~<tN|4fOCQ%Va>W^qG|epC5Ib>gm8W}pdrsl&`{<Fa
zhz+m;Y;9d38BZfo6H0+xN!Sq6Dl@H^zXuKY!^Fh6L*p1S#c5d(cU|+nn<h8}VEEto
z=G*jx7cYh1Ik8P(PLpKfPh0s0fBz&cXPZ>ozJfA3F#@e+#{RoI{G~*!*j6dVTA=up
z(4ReX_qcAJZ?u!6T&XRX=o{!PvF*@qxZCRk87fX?j!F^&5H!(^=W{P?)BE2q^w9hJ
z-(3Siffu`dYmA|$pI+tmzkN{c9|F{^!Vg}&Lf`n-1=`w--&kZERs`^rX_k^_!3AU~
zsl(rJ6ekC>!N)A{jyMYvk<J>t&L%a9_%74Sa62OhO^Pexv<y(vcEDxUrIl*5ps?_p
z0d?Ac7)(f2_)B{(V<L0%$8j+E<_|{cqjR|i2JC2o`C-3JLK<d^0A675$Wf5U`c?};
zWb$i4`$SvoSFI0#;IQrOYn`d_-qJq)XK@I;V*YIap$4%5@kVghZp0gmU*iH`5dd?z
z6=Dzb<X?X2^-fdSN<y0=5}5y*Vg6-pf6M||Rs|@oaFu>k^t)5RJ?7%wRPdr?c_!Wy
zDxyRY4tn@w<%`~YNjRu|<C2Ejn^!4WK8WmcEjMp0V>d!|l17TaUCpujIcxEYF%8ku
zA|s)%Kea>u%^y%_-cREp5={8Y%y-k|HMK9a*4trRD<}YK9E7b~6`){E`}Q;M<WJ66
zMFUISXjTIHy5BOMbt1sn&6Qek?B&XE$R6t<s|ecpk%Wj+8Aq#gGVg=IcUI|e3o!s%
zIkS_D7ZMg;0MKa4&bJbdSO(ac?&Q*?OW`-NG1;inKs;*Ao+T?(`D--Hl$JO-OD0$4
zswXXQWr<K>BG+e#p`?qYU$GC^{cSUuy2j8D$!q&UWP}^)A^Qp<ATUt1``z|s2XQ)c
z=FBZy145+#pGALv;rpZP?~eg~G2m~E4Squ8|2W46%)|(a?A=OU&@_x&wO}qn0=0;i
zK1c<>3e)C1H;Nm8sFEuvKkWo#Z!NRJb?$prS6OE*9$$MJzInE^p69h%O2i<(6-sao
zb7p%-=&!%BP5<E!!05MwFbE-Uw?IDtTmujm_F^Si!F+(l1wu9E)?Ohloqe6o|NI82
z6*U6qZyb|oNt|oy$?nlZ&WyXvC2RCgqNY*4R0!|Q2{Lu3L_n|r*w$d3MpjxI1F*^j
zcmRNiN|;#SUwP$~R~Wt)T!;B3Wn&ryT^~(c=}QrhA|!^d!6SLCwEgh*XbD@RJt6DI
z6yLTKWK8%-y|}6F;@adiZEs!6_wsv~I#+yucK{>T;K02k0z!Fp3xEmCiw$`B$JgkI
zKl)i<{(#Xq`k^}(lr{dz+KHn1r$~QI?vvN<8%vd5SKLsLRtLnPOhOAIF&O;isv@@y
zs(8V*hgfW$(SC~WN0|@OpD#q&vf*MA?4K*_>m)mygsMP8H>;^wUdD9&#+aV`+e_hF
z0tX-j?naP-Bf6vCiuvy!p#9%U;2-7^<`hz9XNwSz(gs=4k~Rtg^;))~kqXNO*jTN~
zHKH8FFXXLCb-ayUXEUc<-K051atp9dYa$?xTdVuEXt3}CfKFHqtiT{+6~O)X-=8f2
zIqF*?iJ`1%lhdHIj5Ev_Jjpr7YYoLP{65F0f-F>&oZKHn<cuI2Z;M~nXJM4KeZ{JP
zrX)(f|0@^<4G!FEz@J(J@LXqU8Vo4>%P;-B({R$@bJRrQx^y-7rzroi2K=q1avIAK
zbM`x?lo_=7UEx^oNacPO6kE?_Vn@zeV69ZH998(7?^Vd!5;Q8vZY(2^=la>VS=U+d
zdEc+<FC1n_7u#u`+c)U-H#g{&pPb$`(C=0Q16Qy+fG%Sk_quB0<yWrJ|MR(@c5XW?
z&n=ohgyt}>SegHN%%ee*C{QQM0H<SS?nDmHq{cC692@xVaJD;k*yYf?_VZUkfV9Z4
zG*K;TRskRn00v$2b!QiV(W2^)YXz~M#%&0YHT_#{g=>PCkL-w*QCfO*vK0;>)o<IU
zwYAeUL*k1RoYF(g`q~Nx0Zbj{Un~~{q2H@D0D=IR0|bHkne5l5M0~K0f3ohFzH0uV
ze3jNWZxkg(Gv*4P`~$b}I|&xt5)d5`EZUP;fuSXcm}l%Yt~oci`_?9g+$6+JvT5dq
zHDF{RqWi-#u@FQ%n|2!fnU49~L(l&M)5|XpCy4^nN2~~#6*T`<u;2dSR+s(3-~U&C
z^+ph=6>oKS%n(Pj7Wc_mTRU0i)npuPO2(q*;!LbyD4tXgLM<>8dBGUJQ=oym6+s)I
zly)uv<f;kxR@(s|46rq!V2!2jxP(-|x(twt?Z!M=G28*OrqeE@rgPFGgi;YmvKp6~
zVcD0>1j_c7i*S+qQ<aI*??&g(yjIH8LPY)C3g&{k1auAfZYDYv9Be#6Z9(9)%rBTH
zyT7oZPg>^38t{kUG}VIbM@-+(?R-ZA8g?i^4#%37fpk5)vWVIig`rm;V%9ASg!Vn7
zvkVF*Zmh+WZ?!Shrg>H`g9%_=_}1o?kWCYUGXiL{_SlX8x$ki=Q=mrqtI2<r+3(<B
zuYrH?_kZQ9Z_sz1d584FhG)XJTMKbB@h{G)xM@~$r3n{PKg|)G4dh#vZklr^F!Qd}
zg+S31-w<j6k9z#^$LDdLg@Q#F0Gu?Xy8&t<AOqa{OttEg37)vi?gv+8^ji2+1Z$u}
zt?9d4Yy^>7B&EI};*CooSq!w9ne330Xeupk1$zgW_7T~wX@U2W5fGLGOmy9QAjAg*
z%S_b#N8`w7VsfAL<g1*7>j9k5NNoXBRo@<N^trWYomyTWcMMZXo#m-;<O<KM79H5K
zG#In?qZTIgnl00qD_#(`by>kc%t5GcHuHbe!1(bEuZD34<IhVgHD5sYeRg)n^fzCb
zWtWX={Q%&?%H)4=085#!rkMv<u51T?KbHDMP;H*ww%8M-%EmdEKayrq6u2X(6>Ube
z7c!Xe%Au!})YUF<ugN1qd{@A9-B(0c0l+;C9))-{V*yxb;=B}C-$_(qz`^)+8K9=+
z1ZYx*n)OD4RH;gSDM0dnn&OU%61DC26=9<)YBcR(#5S2rjRsr}ZUsUEUDi4?=JwXr
zj=0@tG(RKxuHcqI(*o<da4>jS2X@m~bl>+7-Q4+@k;M;8KIHIE764uE-~TpdbDr#H
zN~S8sOMBkKSt;d{1Q2g!<SmTb@MJ4{xN=Ongg6FX`s+{o@8i#_`DOiST6NPWVV)<m
zHoJGP)Qh#?)1A$5L|%pQ=rnPqzQrz&U`7m#1wnyd`}z)j;<vW~{lNEMG5&)EHT@gw
zY!hBPe}n$&ORv+-&1rIbsULrogy*#+NUsOCTF9A!IR&kMWL88;0|%*^Tk%J6*BP3x
z6U7^+q$gOHi4zx{f>!8z*4h5BGoUPb^k0IdaCX73^28HQ1oMjS2B?tyFg%lK^bI)9
zTB}@A7_tg^2@}NH>qQKaC0A|FCdq6F8<qCVI%!4jUS-DrT5z`1tH+4^>z%f9qugWP
z)(UP9z|0Z!h3~M}*Z_TQedC^B;@{pzYSPX0oyS>=;lyBl{7L+wM9*p=-ggwA?S1Ep
z;1vPW<8^x4v}uJ`qv2G+F_NEbNvuZu=87q6-PY%zb1S}Z5qz8}z?O->*}iXjR_n#m
zSeW{eCj(=>zVV(;J3JYdgOz?#ng3~S!ek8Q^t0El(s%!Uy3+a&2{iC;UY>>TKX-{f
z|7Yi8w#@W>VgO%Elda4@|0SV1#@Qs?F?h?)?B($}6`+hyY-w&}=&?-o+#vhztWLA-
z^EhiQEe4P3cvV=rMoRz-Jp)p7rMre@fCvIaA|Oa#ITd4;!YrX9V+D{LA!_NL5}Qab
z456f=VMqF-gsMu!E7qK35Y6@>*J_DqL0S}_iR}ZGKBV99E5p<|8ttU*Vk6NUQJrjP
z1-A<7=SPwl;L~1{0b$%htk%Yea%S4ux|SI+%>T-31J2Y!K&|=58>NsFVEg#g>kCCo
zR9z-xn!`zP)mn{-#p9<?e`mkkj+t8yEN|E6)wG(4tGM@6+-F;-A=F^}$N>*iZ)6Oo
zi#3HT!S2t0JOdX2t|2h+CqET>?*lXX@P}4ueGd{~Rnb)xEday+&bfE!x#uqBYcbFd
z^K6tEJpyPs#L{e#F6oSUGratomGWGholgc|^F*wKXR=eE#$$1UXK5SdM($C-g$ox#
zLAU@qA%H*gna|7?eFXqu;YDEe$zT;gr=iK)Z@(>_5P%o}(NzF2;f5@rqynY=*oC;|
zBo9*9afTjtmFJ}<F8Z)&Y4gKrRqP3Q#xd@{qpi5e9)?bG2yO})0yplXm=us{#rh8p
zptZoy4=aG~_dY+6sb~{mN^5JU0>Oe=dh_O`;JY00gk1DLQ%SMU)wj2J>x2Z`A{9Z_
ze0`@iOU=+teXjc!pW}0x#!2aGX8KMTFdG(SOGjb$1H)C#2X^mS1ZgGS@bPa3#vkJ=
zPUdA_R;12zoBaI8wdwR`a24QLd?OT^PPgf~=eOyh_Y2*B|KbzrWdIGR91@)2@BfRx
zdc9+mo7pl(DH&GSCqvEtqm8`YKW>l}H+s@G^vK*7b~A3}`C%!fB6RMsSLCSP9g`fM
zN~+H)i5dd{UWEN=OhP)<Tn~TG@qNO=?*@o*)PRa}=gu)K0CqDVSP6pfHklQj7M&15
z^0he(Ge09qYokV4?kP?2w9-1v9la**zGPx+1(f=knn1^780%rn;9e(Ia9{wY4=qO5
zh7Y?1qI<?Bra_?6+S<LrJrDxawk`*u4NNB$p(5mQr7Tz9)48X#MY2yg@|K^ml#SWn
zuljE-17e$P2^YR|)m$*iiJNVDbxmxWRn@`FzjY;yoph4@`r19=Z!{X)22EEqW}wV}
z2o77@mqJc}we|bLywQG*LVgWwA3W`?3|PVK0iyng*8F2LADH}cjC^qvz5xW|;^#jR
zg*5Dw5w1;g#>~9D51>4}M{AiJ0vBnL%2}eVVxFP0uZmvjL1Ow=NYFbSod`eG0)Tk5
zPFdw;Sq8Z9E&v=gr}EjeXQdMYq|?-9-GB#bfOL#Njv36ZRx6;QT{)tu1h^t1Me-}!
zNRKo)4PL5i&4O8-Dw>opWnQ-GkC#Z*JQ`d99dYE@T1*pMNwO<A7*MwWgaF-QxR)Ln
zcqh}4hny^?C1EsJ1mt84SerqUy`H0+gB!=%eJah}3X9%4Bo0@&I@VSMu{~?hOg%=$
zi9H^Q^l-M;+D%c@jwifaKVvKtDk^~KPp4a9zxTL#(t2P7yIHw}ynuo_uoMuCV=}oX
z%n@IjC@$Fq(-L~}>)Z6-{(o+LfBF>^(4g#=>HquBy-QDj^X)98DL#Zvd|(|5CjTe|
z1VI2xX2aizjIv$6vugIqdKRuX=1ulcQH!Yy*;-izgCvyKf&*I%RjMx~pW{wJ2$$iO
z26X%>ECWmeK$ANx5-iO!KuA?Mf8>!z=#4ktU@QaFNYn0Pbht3PGDP2`I#OlI_ef&)
zT;q!PR9btqEcn)>sw3tS*o{Ry5oO>GrFxJi_!RAIU(0a=D>yuWAOJjrFn~$#5&{{k
z6IVe*Bj#+L4$IS^#bR@(jxoU^u(^3D_@%-4A|6?sk^xDv2(VVuc3_Di*GWuk&AB|{
zvA2}uM*U*jNrl+7X)b5smFzR=7&QHDq-cIO1@(=BwvJUV2nN#}?{y!f@f&q*c7ibk
z0L=rS%-<@8W^xBX;L_y@J@f2vlAsmL2aWp2{HRU;PkrNUdg|#5q?Rnr%b=TMt*xEt
zXxWLR{m13Ak@@RR6$!X-(yWX36ec2T#i|8k7bj@{95$IfF6`V&^@^=90ce9RNk@SC
zul?GuF@gY-0I<-^AF$9C0QJ`+s6wy7|4$TrEcly2JHrMHG@D*-F+<DTtvFV!1imbZ
zXUFsEY!O$7aAJ*2ULa9|88vvXX-Qp<3W1K1u+bH;vvaMRME9(OfI9#%Zz4Vb%l}~R
z1ksa@=`fZ6avvLJ;+L{TO=1)X9!;>5*fh8Wws&IA^O&$^EvJCCmZn-B$krjNMKif_
zQdzNvaK!mjjZsVMtD#wsTDPQ4?`)s2k2|vUg)LoElB5xEI5BHKFDt)nrHSD;++kq)
zc!#wRBQUAE)|>xv5zRPk0gEk!U;N>$Bji*1@CR2xfwu~n3o!fJ6&1+m|CJ}-q}N`%
z9tL|B@H+~JisJ##N9$Ey|50-DR198dkMyx$#>&p&APpSO_-|CiG%vUoDkNl^d`-(C
z4$bDI(Ix4f0I=}PAFwpb0CB_~%=UWskw&^5aFGt%(7LAvpmJVo+~n#NYHK4>{f>Pr
z*LdK$+N|Vd(OT*)PvRWJ4*tz)Sbjz;xI+M92SFgL8klVdEgnH;BHnUjwUKa*JmErs
z6EvTVPE&=(3sWfSgWvmRNNjUGFg8T6@9FbO^VH#<)nHWDcN%)S-xq}2ZS6+kqAz|c
zybIoGYwJ>E{2j5!@;$saTw0cC*VYF&=hSl09k)BifAi+MAyqbf$7@N5*vtG^;R2ux
z>D;r_Sr(UqW-ko168=Pg|7-Nx`Rn=H#a#a5n8;@wqx|vBLdMMG4&xLP;Idazo9@=k
zUZJMNYznn#e?E$|@x}<)Sz$}J;11MffXGS<fN}~R)wuxlv4w}FSO^OX05(`a3;=#V
z^w2{=6YHce={~}&P6TB4J+IC9eJOHAR0#pbN=RfhIlsqWDVY-i<#QD*#I?n)4e7NT
z%S8Qd_?hw+F+tFk7%{toI||Tvf)ECcU^eYsGFQV_nT%5ql(is||I)QK9|>e9i+9iB
zH`3YyFA+Dey_0qt?1<zzZ6*|=qPZ=*TU@E*oH=p#Tt@xzlpiw0H(8s=<Ep+}*|wPS
zgF7?ybrv_{=@>og2f>HGcYS;+X6VbkEWND>d|PlzZD5=1AoNaxsXuCtrO)O~=g$8q
z;BX#)-1z3Vx9Q^_#mWF$!7iYtf9v;0M!%=O@m5&w7o+=wsU-#TjYAecMEUbL`u)T5
zzM%1ssNN8(rl;u#wU#{FP};nw1|=Mo$_hs{?iwM0xmV7C=$)As3k5obfIIhxz&%}f
z(Q?2t>;{-OSO91Y00IOP@`bO~5uD5FIm#bT36<Jzh#U(yq@ofGT5IxCf08`N=64~M
zDhNn!Nm0fUGPlP7S-I857})E52!spFpp&Mz5iKoCg?C2+B5sX3!iMPOQ(-CU=FKae
zFZyzd`nTUTF}uchiq-?kc!V4R(2@{05H_?~yC-N*QwSQJ%K(~m@SCR<25=*203BQl
z5jf+Wl0=XD(Ndv=Q2Su<&bzK8)QyzXusY#SV;RsFSS~{Q9b*U#zq4$dIPsoNP&(DU
z%ZWtjZO^PIP}(cYtje|7EN<|FamAB}6IfqAoqYV|nOm9vI^H8z)AX3GUmw#q|87QG
z+pD=W4>W0(&7Aqv({IsNzWREH3)5mn$|ipeeBDSU|Bd9vOzr@)Y_G?2Egclf)Yme4
zE$0=rK20z6k-3xp>?$lX7s<3zwjQ2c(!E4*;nJl`qFdA>2=Iv~o{-flfCB<B`p<s$
zvmvtb<(FS(8v}ro?jwY>j5C>Db|1Hhapd*(LVWsqw@K;Cg}K{$H9*TtiN=dBl(deL
zb3paJOo&;kHcP!0iT%+E8wE|*CY=D#nTg?xYh|Wd!Qp|`<#9e)KM^)9iN5KMNg^6O
zCpH<!@8%SpiXjs>?s3j-4<efJPa;ExivWMY6+{$PB~aFUO9slAJ3V1hh%tyu<%+1v
zp*}CD#=7HAN!9|+{`#;<3zD^{1{bRF8d!c8(RVN6vf8>Vfz}R0L8Gh+h(g4K`pH!=
zY3v*{L%N6gud5$svpBw2u8!!LXQuRt-(F=290Jf9q|U~Q7-;){`K8yBzdxGrM`_Dz
zF!->dBjHJ0=U+_zDiG%{l5mX;!*z7l&{nGE3RB7RQs}l90rb>bg)F^Qk-1ooiU3?{
z=;rop>q-)SSaSg&J8d@}p)xT4l>l%+07j1w@`r!;hYacJ(MKN@NS~b*Ae*0pFaXWN
zA`FWsxS2m&=C2XWp^{<}Wvl&oL?G^wEFz02mOkYIo~h?XZ98m*jY6hu`$hOL^>oj{
zmj++7E>&B>odNiw$0MsJB*KRX=gulH3xR%HK~n}G(is9l+!;`xVaF)B5+aQ>nuW^r
zAYq%-f+vO$D#N8|JO&{&64$(G3R}yST9nedYeNh<1!axWZicqMlo`HZ-ns4c+eX7W
z+%7~ry>g0a*~)P(&=l9#&jg0jn~$L?I|JqR=l?9`1_*?|V@^N$>6q?+U`oHT$`sfG
zUGj%i$_V!R(=WUlju9U@SyW>7J|w{!>4rYZ<gdFk_QhX7VcleKBWvJXJtY;rWg@?V
z#^NavweGo|<fa_1dnFGr(q(|)LEuLLeLLDa>)h1AWi3&1W_HVi77v!f0-)zuVgUqH
za|>>#On}^J3|FK=ip0+;ZkD1HKsD`jo$T)lKMJjl1gauBb-8j$`yqY8Q<dlH;fSYF
z-oZ`}<{q*}Qzr*u2!atL^Lcr=S8#9;`CDS7BgG{y%(WwUAx>Zta{(Y=ZI%KCK^Q~4
zKr)Rx#>!-v96O<qs3J}opUT~=U8f9cx=6t?Ppe6f$0sVBBvWs^VujCKk752!Q09gu
z&1h9wunnsMcDgZ%s{$aTjCei7w#4mTsW-QpAb!klXZ~j<10k+!``mYT=z;f)=%I)9
z(t+av*6;u0pIoP}KKW+%dzw~NjM4~>Q<Q&9<*Vj@Hi>;GoBX}mzGl0YE*B<FRA+OI
zv{}5sY3a9W{A?8|uVkfKBJ$*CEzXD?2uhm)<%@_*+u7MEGsgOz78sTy20)2bCt1l$
zFTE53F78i}I!VHk&gXh5{qEP1&%{Z1Hr&*jR&6~wDphIwaOM7M8)QwoRAv~qa6$Y^
zbJ8-}HgRuhjP-@V7l&wEEK6C%1Kb%<!c$W7rmN;$zc0+7leJU9r@z+uuK|dT-%1hx
z2yTlecjJ3ZoS3QTt4=#IL{5LlL-qY@VKOsvbKjy?i}6}K&oM0Ggp!P>RD>5Y#9W_)
zdt__tT8L%9j*8KWg|WfPfKxtsk9kceB#|ZFrknem|FMp7aNGRtQ#*8Zl_@ZQX2k@J
zIrr?l^cR1D&3ty!-RNrKv<oH}{v>7egB2(G`Nv5h<%s85{1SsZ6uyZF8Ns0Zo0@XW
zK9jm6Xtt17zqMXvi<kIUk~2!@j>-@!c~tXbdwT6P%lz+eM2&VXfTfTemtq+p1{28@
zU|vBGAmHM~i#UW}z_1KOc49)O3RwV-*ik_)WzM!NF=dua2Du8MC9Tv$<OaSSW7f~;
z)(QeM<*BM)BWW|zi?7=<94t|T1wc1QTEXGM$Y{X%vngY=%B6wmBaJHv5Ga@TuScNU
zW(;)O+PWH-03xcMOQe0#Da7GqfEL5HxRiOnwIs}WzAhlt$9a8UCzlo>BTmQ|pRk=6
z{^r)ZF?(WXr3%4(8}}w=F)2!zlsZ%hvVslA%6)rzD(r9m=Up1R3p+a#`s*+6tbzhb
z^WIPBsi!Z}x4-jt*7~7%j7Pd1cIWS3+epj%Hi8g^?;3sn<D#(SZ)MzPYK2MPTpYez
zVSviF>M`en@J`VZ%5*(k5|1<fS+WvmX)w3R!DR%VbNck@p0ois^)kUy2mk=At<=Ho
zOx$V#2*H2|O4*U=<%|sJkCRkKUMVZmr}T6sk7^bP#NcAZq#f=^u^u_|Jz3LId5#N}
za?&RyI#d&{(vH4^uTuj7)kn30!vo2bMacOknH|w#su&ZIV>pwbF`e$riir3>-Qo8n
zB+G$iVDsk1m={0f1=x;J{u#eZlQq$@Rs=StD~Iz|eUhSO=RiI-UA0m}OYjJcdluXr
z+gsOzFoSjao81x1B*6sM!m5V%gcQjkFw(S}>#7BtG)8%7i3i8>Am)Fr629v0JKVT2
zp>KX`hmJFVmR!EFO<(xpTl9k$FPANON%3WzO#K^SnO`_+K7Yo}rIF3#l_mypB@tDz
zf}h^CQnkhFEGX8(V6N8JRV!BPpDX34Nz=)>8@m6d+n2#}6rhYECJg{374VtQd}au>
z77CUk)D?q?XbKGO+i$;(ZCe?Gu0ep1#$@aI<w$!?5R~=z?e;lTD0EtO^_(TLbLtcE
zI2W&pucN-M8lyfZr7yM~hs!NwrI?-)QK^Uput_>ra2OCjng)@D4>{{Fums%~A+2Vf
z0ho%3obuXeESZY6cmK`G^1HxfyWgz21Ckb|Tc(DH{E_<H6*a`off9VD#I&?;ktxyU
z{1Up{ZZ^eI!)Y)JLmfhY;J=4P7_zz{J0h*+ZL4!-OulY=ZpWw|VJ>sTS;B1jJb(5%
z|B3QE4CC3o!>g~&==;x2=_9{>{9<d6Z{?>yyG-AH27(=B>qw0Co5bAxi1bhXeqHer
zEIJe;{fRtXb3$JFI7({=1-$%y#9eo;8zLX{DMg8@am&W2<nrY4rv|;WxpU=mby*&M
z_~EciE&hJ-7k`mH``OR#!X*|1mSQ_#04xA-16Y7hN{e0Jx_tS)@ibmB+CcsaN+c+g
zMBi9NP?|);GU)~Z(xcSK#iqD4+EfIWMqf<ZJz=2HM<6jW)xi~j<0>fdkU)xC2NCSZ
zE}Q5haY7Vne8T{lFAn2iUcv`W3RBhXYZDo;1V#+4<Ywy9G-|kE0Pa8$zo{SRyy}?{
zj}lRqxK*y=G49S}<~oXZk`w?H*Z0R1#USo+wZk|rb4;5d_5APE{7^>h@V3Rw?eo-_
zH}%n-AI$vEd@3Yfn$Ytv>~y}~DgCn#9G|(=DO&XR-+zm~_xCsB3Kovrt%YTNBaQN3
zGXa0=Dat?k`}1d2Udvnpqb(SG=rLcb7}eK_dI6_Sz!2KA;HOrBx~|Y}qYs^8CD*Q9
z%e^^&{(LB<7Jy}eWv~Ebs)AJj81zoc3N8Sop`3-RV>8;m@n(<)#EF?z4UWl*yH;+n
zXV#=2zSJpA1}^ERT_$BP5_r`xDR9`sFl8gO_Z_9Qm`!<Bsrjr0HYgC5lHon=S8&^q
z9Q~U0%zlZyVx-o9r4-<vfFKcPGVwB!AyLuXLxFM7v`MgY9{Yw-Z_>`=B_UnUACICa
zNyRmlmIS6f+JgmQGR{76D@62pr}R`j8>yY&Cq0xG_9%A<7r6{oJw{{)&^BeLH`!<R
zGylm5E3r@G$%J6^_}1TT(F6Bmm&o><J4$f#<~Du#D?g!^{^?p^@_FnNj*NoVuR(q(
z(m!nWqq|&2T+SLfn-wcg=*!YUEawC{^Igf(VJ+vATX~Bp_v#?>v^-lKSi3QE*K<`B
z-dw%(%Bwg-3;_7i0}niaC4jQNzP@ybu`H_q7})?d&0=S6f!{@w0foPvovB1(9G>!(
zIDe4d7P2(WQG(1|4FP7OQt0!N{*2fuimBOw%BK9Rw3&(Bth8Sdd?kQ}You~yw5{qH
z1DdP@mXg)VfP)9S%Uw1z>gtj7oyzAiKP;m&XhPiMtds>q@~c3&R;R`P=Ij=T%d*BH
zB6~gOU`Jw|xD(q3P1a6?rGRT|Cmli0+MJ`>;)yhyJ~(1*e(SlxSvJf+3K?Sjz+!{}
zAy3)GGk<0?M-F5D$A`e&wvS0r;FZqb|G7VYiT>%wH_IkX9LFF;`A^i(pZ+wt&nTL}
z@>affY;`;{!zbzqUk<;y2ngx9{;0bNM&@CvX(lr@6uE>?sc&}Wm&M((qAy;1=gKR=
z1z;=y)L8(+Zh$xkuUeKwKue((N}_z&Xh8=VmoHz=t{8s#<<GyC%93Z?xf1eB+Yru*
zI2ERC5W-^gATzDoAk#*wPYLy0hFl1b)Wj<sYwdqn@Wgu6)yjawfylGj6Aqir%nFV6
znq_F|+%HS3aM+?}s#9E91I_D}(-p>9nu)kA22b-lnKn%$<zF#s)EqptO;gtDjio>u
zbFNjNRon6__lf#J&HrdUkz^T}(6t*Q`rEH<(@}-j&flQV{m~EUor}{TC>9(~yH$of
z{+$2`VR4j#{l=4I6^L%9Xnjgh;~{eyUh?0O@-?aiFk6?!Gjnd|hY=3TqxAw(>5<QV
zn^NY9sqgLApUsTtzWeSIEKA4vOE0}-+Y_6^W>H~j?83;d^uG7KZ)OJ>pjLozWV!6W
z>KrC(rBi2WCr=n+Bc<$MWvzRTa?h`Wohn$BP1nR;7MlBh2Cy;!E&xqixq<_NL>z>T
zY2I$qQxQM0*Q}LDHKZIsV3@CJy+aH?)lJp{(Il!BAr|pOGd-_ekfs3Z+sg@5dKLgG
z*HvS;LWHrLi4%s1tQDm+XMVeJ;A)E0%;DhXKZ;;WTbRDzx-h0^&K*4=;QP<LOMm<)
zFVW>I)1cX7Jfa0?l-KveQlx%=xB#N2KBAZg7`2!&uLL!{R+#Yse-t4e`g|;=N-re$
z+agbYU42~c=WpL=(7sdX`z$bjtO7uMRSn1t2ws(f02dw>UI0)j%5De*iR-}!9}MXz
z)BPZA&1SPJS%RcK!DNRF2029x5mX{#2z5iEBxk!dh|@rLvZg9W+^K3q8iX{J=Bi!y
zJnae)0uVD{m#?f~pP)?4X*|ZI;|n`nEM{^1$XvIS^9pgZ#sC&?rE16WLRVp<e8Rz=
zwJWvk1N_7vE1`s|dBs*hBJ;iuZ&SY{J}0eq>f%<DGeX5-%>P&<2ERcYe(^`6&P0B6
zmIq=ppC`ZeDos0FhD&IaH2P@vUk?F(#q@t7ELDxGRo0T`ZzVAU_IeYAk|<#DOD#m)
zB5YOMOl^5`{ix?cC7`+|og``M7biq?m=I#k5MKqQ6ISR87cRKU(`G=j=xKlfOOpvu
zeJYPW`Y65n>Z^=F?*=~)@aE>`OdtRx!e(@<@@KUT%h<yuLogho+RFL})RS^WTJO+%
zj_YS?!JyXU>b^VJ9na7nmIq>4&q@e5KwxewsN|1Lh*XZqYBHqPsP~&2<jR?|sYX9G
z)eF_yjA4`1Gu2k-b7M#<e3T)#gd3IApPqHCCd||xL_5wkLpY@QuUQ&3r~R|fN@ptH
zeeNwXz~BFWKk+mA?)ToH*=#4ofpF63V+=?P^owyK8!6g9ruq#5%p#SEyfq#bGY%k$
zxu#$Bw~(57M|zlZ3D~kNTHKQz+WnHAa+mtV#Z6`VF`I2^7C`CTs(j|mnV^gUR7qVH
z|36?^VgTwv&4YgJTFKQplbu)V3C!eWx;DY{_j*Vn)YrsnBGH5f@idaJH2UMTr<xeq
z{>_&;ngBNdRs=u@z~2h?2iEuRH}5ITy7&2dvRnCh{>%`d2J!NVObX%S3NhGNs_I(G
zRzOrKKH|=6nW8$OW}Heow>jJUFHvdYrFD1Al{G5iy5t?0|3T>Mm>_}zzxMSVy0)b0
zf-k&%Gi>Jb=9^bK%^%GFbV%7d9>pE}u+=!s{G;)DO6yy8@)L_UPN<|N^L2_%2F`Ne
zJjkvE5s#+1_QtQC5K4VuX&12V^c27m>8(zonq9hdDOdnL@PQA&O-)Ze`DDmjlVV4j
zXun9XG%)~H0Kj0^<`t|Em`!(La7KkxL%L18!2aH#>QR-OgI|`ATMeYwG&Shy9uT+Z
zb)db23IVVJtb~C5LCJ|<n7!_%*H_{VGZ_H3<&?zA5GGSavl_&g<Bo;yexplTmv2gJ
zHLhGdly~&}+py$%7`SR!iEAZ5a8<l?klXE~pJI#pC#$FIh1;3`X<bA&myjv&8{b;K
zc!2Lecai?|^RLpycW+>O+mL}WB$P_I9oYE%C*!!AAGCf-%S4^wrsaEgV8qgf)y0nc
z5S?rT_nMbE-@dM>_7*hn<w{~(GTVAJtwsx1I}1QA)>#1PvBw_MBtW!s0UQvh3jos9
z#>R$VCBR#6y~Xc&&wHek!m_VrXL|Fs{7kc=#!V4R!wXH)f|fD!F#YCT&ilk=c+&JW
zH7`W;4GOm+0z@GI?f?V@PD56=72Gy(Nh?dl*-R`g$~3ngUoKTw9<R%#D~cE8abt(5
zgv1^W*1mh1E1&c<b8Atrwdbb1V;>ySuBHE6!F}nyf756|hdloy_s^#C7!<fX@c>Uf
zeSyB-`TRFGkwj`U>`MV*CamoT)9=jwYZGJgpRC2@fEwUx0$n4XNzYp2@l|~JmT!MZ
zN;8(L9vvj9t4~5WxA9m8nyGM3i28j_ZEbBA9s}qs0P?=~y-zx~H1(QWSpW_eAYH-C
zf^8Qu=(uK=x5;$tibs$Lnef#FOU<TSt~nZ!#7V1J)I_vJsA}Znxzw<Kh1tIYn?E7{
zD$MP=b^~Mwge3suw-N$w50pv7HTT@VE1pT$&pLcDAGBt$886L;cerhU%{8G0Z?(@i
z2yufKQ%g}|>I)1GHzmJLW4<ss*GDxtb-%e$uVvcxvxh$a(>)acusra^9})fIi;FD;
zY;Mki$^ZM$zZ(R#?VTIZ>>rZ&tOd;<K7Y*%0T%%0UHnEVEGd3F8DOTCS=I<r#!>w5
z!FnhO)al!>FEhwprQZ3EdV;cTV>htR%w}7c5CZ_E6ST8aLc?`frQI_z`1RHa^Bsnz
zSOuUl0ATtsvq0L?wU=0S(P^^b`%b6Zmzl~oli~SXPyNBANcDDxH^3u}7$Q_B6pd(B
z(VCu*)b`DWv{)i%nZ4~mjr_9L0DuNS2!JJEB?R0aXmmSr^_SqJ{`aVOU;?|zlNb$j
zmW|tr$F-(X%%z!~l5R}o7g$-$EJQh;3-to3v>rofhnR8e9ZN4tJB3+@X`v<jN>x{*
zjU*h!RLAT&bAI;D&3`;N;*=S$Go7D%c1G`9oY5je4EB2^tn){(-?U@?>+7e3uOAWp
zlavrjck;v9{<zDZCW0ce=v{DB@;A{!d6{1|oGoEhzmmlcgMz+{R9jfvE;ZsvYT8>f
ze<u4T{^|U@okI|swyweg0G<<g&%5siVJhdRft6azt}Flt04hak3;-+ukhW&CSr}Yo
zxzio+E?xe~KgOqQcvB%tT%lV*wu0GG5l!m-E+niU_*F`3o$Q{VzE(%&x7N`kmZE3F
zOy+iBB?R0)lq`;#3OF)@oOajYi@5K~?ns8PG8MtH##e2(j|1_!8L<-rm>lkrGOvd3
zG|v;x^%v4ERXHIqd^GUa3b9ONgA<+L<Bt7z&b5ci%sj2en0n)T`19YYq@051{`TuT
zbalak0<nYN|J5=7D_3_2dqvEqo!Ni=R9wQB{>J0Dk&m83Fv!fGRkSQDQK$r%TA|C8
z7mxq3Y`Ic?bFRO+8IZ%Z^i7034r+?)1YEV%77y%wKj$LOk}uO;*xtTz1<BPqo+D{5
z8tG_gK*o+Y$>Wbdo=}5V%K#4w>_DD9dp4ZE_10UEz9d-yLYW&^-?&sy*&^Q+WGvDp
zCru@{l%Eq5A(9bT@>6703U)eJiQcvo#S(nCBfHdy=FeIaLM7Y&M7cd!2?4hc(F!ng
zf8lz%uQ*}B;z!K{ExRfi_}SH&+cso*n3-~~d~R8Mr`)_S?fY7(akb!e5p)f9iuT6#
zjmWpy*T=;6b|7k>8`tjS{Eq?-u3+cDaR>^0@@rUSvlz7hJI}mBU;gUrsXx-WPqDGj
zsi@_1%<LD}^=|}Mzy!&Cg67|u{ArYE4QeVPlv_DM-rsHo17c6R%67mxg;b`IVyLM3
z3aJ^+bd`zSH_qSEtjH-(3$YUe`Z%fQzD~{pM#0PY{y%;7%R$N1q-xlc23rAR7eD}F
z(^ks>4*_(4WlabC&_fS}!A~HFYX9+%zwjEROie{zO$EZg1(Cuv5wZzNiablXboxI&
z$>wP;$XX+C4PG~?(AU)yOK>F;cm~3zUj<S33o9XDACMf~x^a#tl={8<1g@%tJnL4{
zXo*)%_R@J3gyos2y<+^Sj?7BNL)%_?-k+6Ll)&5ixf+_%8?Td>cPb$=p5(iBnH%kw
zKGwavlk-3FG|q{0E?t_0?St<oz~BGpfAt!D`<V+_NZr}K8O;7*`b=>P->@&lWFtlU
zuXit^+e+I_&1#W#7U_=kagq&AFj4-?Ma0%u#+It5^jF2UO-oitlp=6)i{27B;SRq>
zl{={_(HA!2)$>33*5!@|&9D&=VyK{af)@d>6Cm8wD*@mT03Qk40qcf9=~^(uASENV
zXY4M?Y!)*$af8f?`LrIV6xrf=)(l(_kT{~k6*2K_m@_d{)M7Igl;lX&H!bOqHWjW3
zV%Fwb5w*7oDg<CmS4#wU2ew}^`#mEw9dT{BY2$|_R7?f0N(=s6G0@=yTjom6-|h7!
z>b0jhX6s|J!b-tlF*BjRoXg(xxAa<zz1P|nlh?TOdx?-#@6Bc;^0<kqw}Qi(|6=OU
z&6j@gBe`oqfx+MZxmP-cC~j6Woo;tN|HS-9DVyJ9BQNs{0f0HsZ$$cey(V7`8T&D=
z1;+eaxw3gq@zddxc}`q2+{3*pq>5a_)w5)3cDNzlI$)1luj09W=h<xQAF;U_vH-?m
z;b89TECBgI@F-PQmjRY(6##esm2d(JK*y}YAY(HIO8B(7xk)qG3MQH&y@p2%GD!X>
zR_dCNEZK&8TuRkQ=_FGzf8n<!8R<-Xd8?naHGZZNq*PA}se)S$RtQ+-3D`T7twl$a
z#eweH%y8lSr7B`*w>GV{W5p4x%fzP*lT;!mea6d_t06B`P{vi8J3uN5D!nrmP>2c-
z?U#zole2_tHRe)Q6t$^TSLRfUv6Mp@hnj4wC8Ph`;m?0jra6kE1R>zucV``QxU)fl
z2=e>CKlhVP2#6Ml*>ooa{cUXA7Zdxy*FTBDepvnopZ{bMH}s(t$5NI;Dg)G)?6;Tk
zA%kb;8BfGKq46@rWvtDY;Of|J)V9VwEj|kkVg(t~GqJc-o~iLn`ya+*id6G{Lca6P
zJ7IU)M<0DOxTC?N4pJ#QY*-or0I8LbY7v8sTmaa95rACGCL5qkw>QHL`!RczQOvm^
zyj+zjEZQTABbH)r)az6X%)<#gL5k+psK?W^{H(NQ=03m*0jspZ!%&+@?KuzOfYFvs
zigBv-p7<|kBvfKqn55}LMKLv6o*Ot~aa@;l*)}X57J;VB0CAQ0c5{D3ijW|N{n|FP
zZ;5l-{$;{l%~i{6n%Ww1lbGMh`5%RVs7Yg8{gYqczGFdw-~P_qVVU37X6!jM|CxaK
z-<vY|ZR99_{9$eXXgtosYj_=zv<z9wJ5j|XwQORfZcK^nz!ltbnS%^Q=5J7lTd}+t
z5w99TCObchLSzL?+fUEj$>XdQcx*Sx3)0&=H~z6xGDC~?R>0s%IEMvbKP$8DG)!oT
z0J|)uWftMPU@0Ir1e(oe!EX)sz|PL~S2yTA@uM@DsQdEu%jP6Tx=VjHJE<h<R|lAI
zW!??XD{<G#Dl8OBbW}THf89#Nk`v67I&1<pE7TqazG=q56zQ7HUMnaN{lcTf=w@CH
znCS=3g$qp-|2scp2_73_obZr}VohQREPJ#oIMnZR#PMQ5@Px1se<qJ-fe5lV%}U>k
z%&EssbWC!+nXzzT*^YUL>2q^@e1D1%tdDqW!0pa|+UNX_JZ%)L0n@3_U;oV({kQ+k
zdS`-fe2@bG`1_xF`c3-54=?AJ0Q2wk|DgFp>mTuY3i$IWePiZI^Ujw3+4oB)eZh<T
zm`LL$879t_W=ZC-iZ^-Ol^&Y!W4}x6kH=s0iOY|SYs0jeRS0R&?~D@|&DqUYsh~=T
z@_+Eb2L*fwtJ8chmIIb41|V1fkPHYL0)5~E9|!~8Spa4TT<bK|@KLT_`}sfWCvpTP
z&v0^tPfcD5nK%o14N82lk_A=j{02YOk*Ta*HWRE~ip9o!s(rAG-?p6lemHa`3jy2P
z+pDdF8xZj#Mg5OPWWP@S);#!d6WH8uSrJ+q4mI$XD+278tV8%32R*^h{mQ|7N?3_%
zZ7E=+6&OhRT`2!}jhoti9UQ-UUfa2`2=hOE1SAQ%adSe?JbO6t0Py$!$rpYWn12XT
z3_{0NV4WN5_l3kh@cU2JO2FS}q{)3)ds^NjUC5tF%GYAOMy+wPxL@k=NA9(wXvBu$
z)^&>E<(Xzyf@5x{y3Uor!nMF4mB3&ofQ|D2T)A@o2Ur3KK&({S2O!+mG{DOSOBDlv
ze-LGP=FFKubusuU6O&GF`st6q`UCfX3?QU_d@&zhp&}GK@T@~TN(@|obDlD}&YsV-
zZePntsYApQqneQS2>k(r3IVXPz#6bj=B-1AMD#Vv8i4U4)i8}VIrMx!S~S#iX_RNf
z*bEb{1p{ZAtqG;+ySvO4zm<afE(wc}V+N&7G0c#>uxp4oiKo+K+IIxl;=Gh1PH<=t
zbbI@5%>Qy(^X5<J<sTFM{c{H^1iW_sdNBFF{q|;Te-@YdL91T}^AF4VHsbpJl-8Gn
z=AZoimLXf3C5BlV>A=g4;siiuiz`I-cELX>-!To83KzMY#an9NbZbt`Io86RxOxjB
z7X0LV%=tFb{hz-2<Ug7$fZ=Axk3vkEZUiJtMj)3d1|S?eA4|pw41UTHkpJF!=kG6t
zrGS~}NHI5s67@`vFsLz^--?^PU8D@sS|qhhz}>Wj&Z$|R&XA$BMF>z#t<1xR2)dQ<
z+S*!(xqvXR0z)JWaSUVQ1T*O~8A}-x9@Ihto9`aZgdD+b;j$P!W44%8s9we)W$NBe
z?uaDOz&V@#n0M_l+SF0&uFyFlVuT10BTxtxp`o1H`6b#JhxX#eZBgdGE~$go{oM02
z`svROMhN)(?_cbg|7+cR+}6fuXL=*7?O$8JCq?<2h<|M4gMNf%eypF))2Q$&qHje`
z1JSFdY#Af*KeO^885gW#R0~sLTh)%~6^r+!7v^IOv_a<nz_(PB0vmj_t|78hqvI*>
zyz#xuF!v{GXiz?pkH*%_%M<}fOB4g3<BCj65CR@~<PqtJ?X3F%V+00vrwGhIfZ(i?
zg1<dkJNf(RNh-&PHQ_8tyOKDo^Y2+q7K0%PS+l=Ooo3-ijis52HQ%H74A~IA61*ku
z9+zvSBP2b{K3q`q7c2n~2oNlIoB`uMO1M+c`ttcRmCH}*H?_Yr!A*zWraH>-vWqBD
zK*M~>QSsSwwfNim!u1`r9q$_8$moO$j@PPuHqjblZC{b<`BaY{JD+YPi{uMJ)jSgN
zetFIuhl)0tdti=xng6zr`=9?=$RaEcB>L9h?a%`c2;Fn~_W8uuJAeQ8pNskY75{do
zH$znaB<$)pNv8hj>(@W#G6t5u#H()L%h;NsuwzH*y<~vS<-zPpjq%*NJEezHY<}vp
z%wlD8C~dyGSBC8P+WE(1ts12oHuXyH9cI(bA0Pk_%gvDmP?OIg2S7>!)Wq^dhh+)^
z%wHA>7tWnK7tZzO&d$#1^$t3OS-Wxb?YB<yd$qW$q=l<FYMLmUeM(B#3Xu$v&UAPl
z7#eJYk6fJ=`KX=%jkYWUNab(b@R~(us-nXJya)EchCt8_IV}rdOb&8Y1AMI^_>ZRn
zU&2zVK!~PU^P3BnKU>^VCL?W}aY1M;6{%Nub&a+EH_0)%R$Z-n_R>~eBr%X4Q<?i*
zi<r0TF2g8&##5J_Gyh>iACIpe!u*c~Z|-c<U;O2D`h$PB-dWUcO$fMpZHNB)Z(gSh
zF!`r%wKGF{-<?kDKOIf`%9uNM0Ps>~OdW=HmZ|9=pZ4J25rRe0$hrL;>*uQXk3-NJ
z_*Rp(GB&f7l;_0vvwx4sDOb&NB9DOF$I0~j>ech#>uBAnj()6lO6FFlbiRA>;zfD*
z;fKTEfG7RO|M(vTIRKW)2(VPE0FpEv1{~`yUwY{!L}AXhwzh&_rjyWSkO8h;ef2vh
zPd@nnOHYwSOmWwWG|b#@%OO~}=rtj+2vZYtMKUSG+!7SFTAD-5Qm5tdPD2vgsXLEs
za0Nj7$5I%Wywx&r{J;o{AhR8jrBV8DNh$3!ID=P?(AaARUyX@2Z$4y^!xFX@LDWhu
zf*bT?ZCeqH*a*UUzoOkWuFt{2tl@|9GjowD>9)~1;!10s5gDsN8GA90>ThoL?{3cj
z*k7azh8Dj=zxUW0?G?cE|N6^sbc`tF=IH92&9=k4t|QVPIsHR)|5{AzYoh##HI!`*
zt#Ayuyia*8PUAvkk;~CBeRn%3fp!_>>6v-VV<f%rlfS1OTVrjh1>`JjxbMLO=lX>G
zrPdwN=^fzOwKrbGFYp)So58Zb2OfAJ+(U3W6w+V(#b0FOl3of}Dggk1G>Ztf=>8ym
z<zzLTc6Y5a&#ZOGu=VrTo_gvR{)_)QKHvysfxc$dpw?`-Z~jf58ulopsmDpC+?Gsx
z0OU0jWH+%r-9LG<oqF^zm+zr&2mx4vi4B=T0NertEC2^N9x<Q#IBcUD)>rFJY9ZU&
zI1|Pk99rX8%cS1Ijdaf|whg@4Gp<L_OOwuY+_M6(45DXWA{Yj4y4TD*+m5(KEF-Dv
zBk2oIL;bnM`5)KHpDfQF(EN}6o>6r3P3hfB+w`NCm_G9ByOZGWKX;M-?mHJ~Ym-C&
z&=#yuSk8xFzsUU8vd<s$5CVX1;j2PUe`xG=N319dF;N-}_}z#4`&c1h9t_di@_5$%
z-Sv6+-l)^HHFUp}8Md~;`tsWMx^cbQ(U9%ar%!K#(PJ4p{;)j#vBw^pb*^bCE}!Lq
zWm*N`0Lv~g_*e;W`SNAyeA2U&no)lCv+rE$OkS^rf<<L^;Fqm1HNRPQP7+nB4AGDl
zX9I+!s*96Glidugl~?VWXY2N;YEKFGT>xDnFquq3T)=S*4pj8Wnru4Sc#T^BEI*}x
zt{Q){t)%hy7#`Q1shVjqSbcyT4uvZmD=PTuI^l(uTxI**wk^H6DC8^|b@ztmZT+6P
z|4a>4o)%&LM_=|R&HtDw@cYlt=$~Gm9~20G|JR;+Gj8U?vC^ICW?=Xm8}Eth`X`#;
zXFbeGxB^rQ7NUlrQ~J~%Ya<K}@=RkJaDzAW0Qt7KdQQhSeXrv07l-S;X9}UDJ|1!Z
zo%)^0&)Vcilr0|U%k;GuUw9(q0O%AQ<ud`WH9fBD9stY4`~gd40Z0-WBrbXDt+&E8
ztOVHJ-j<!69bKy)G6imLU;WO;#+m<ePHm*f)0cwFy;#5qo22K`XVNV##F@P6b<urI
zp=Y{s2wp?#>mIeS!veSfKzJYoAXoOWaS3Fa;+cWv7nK?~fFslw7VC!U>-95QZcnIC
zF)>p&=i9duVp?gymg$qqMU8hCbCR{ba-&Ybqu0drH@DF;EAQ6l=KS~fw9bQ=|0)1s
zu0U1}>4Kl#rU&jH)BX2*#)0HMfB9GE>1VH9%j1BQzS8|+H@~&DlO?GS{&-T%{$VG-
zw$|SlLTOH)ISoY-4l}y<1UoxW10TT?zmQm}G-)h5&eD>;MIJ+%1G2Yy<oc@T`NjOT
z?y$3U{oCf@yacfOn?3T#Bl6z&zBk-krcHnpmdyeXmRZ<MfnWu|w!qy-fmEiOLd%Wo
z@BCeOVo)N>=v-YhM`x9?2v2(_G^(AnBvWM&(&&ScunXbghTytqig&1hU_e*{;2(n^
zwq(m;Zxh6P!lb$AQ7Rm2n(Vcj29C15PkrshRyBYFyM6&}V-K3VdnPv^jrOHod`R#y
z<D+B8PW#I16!NvYE}q-pMVtTerK8nh+-dV`^yJsKT)JSa>wn^peoC*tc0E;rDD=+N
z*=fb__ph&=O3Z&F3juK(-%+#xbkEH8Gyg0@m(q31BQx;Ov$hQTW22}@L!i<=<Z9~1
zZxMR(fccOPulx&Mzy9WTDDs}5`Gd~cLRf`HOA1RR07zU@MU72?L_Z3aY2ZV>{L<&Y
zk{*f=SBUCatAWZFH5Zz7mU~6PvFl68Be*4=R?V<lPrf950e*o{55xzlnE}TlUqH_s
z8MlO$mKM_dP?p0NdC*+y*P}O$FgT?AHd$XSKuYzb44gOmCFZ2Z6RjJdF3(l_*4J+R
zu&Iv7cm@!?C6G|#+5*l05~Rq1X<xrSqHlaN#xnizM>put{_|U5GoQfFyZ-F#+z1JL
z*4FPW>-s~?Nm}Y>cJfm`#sht%A$e%dJ-<A(zS(%+eg*!R$~XJS)id1hM~h8UtFG4X
zR`=%K5c>HW=hOlacuQyLoIUWs17Qguco0fKDTM`K*%Y5;a{+`Pi_d=cv-I-IFNeW@
z<&{@LK4!!M+;h)8GM!Fm$kz1MThCsaPPczB9<P1aDawh~SXt8Hh(R(o$0n9k^P(<q
z5uDp}hz3noTvbZcm%dGX&;J7_LNg1|!8bQ=By-_S%Ko%DAf~Z?%&-m`(~!hr%j0}&
zYqKL<7lTUx`-~Soj_35DrIYz>J8f#tnwJr^=ewkKuU>tZ?zuOmb*1@Gf6uypX|UT|
zhIwv(a)sg%v5#l|gIiwN`VKtQjs=<5AzzHE$x?qGuKVsPkY4Rv2@z$kl+omt05VVW
zt3OcPs1{kI`LE~LlRm@)tkGK+w&{z1ev{sO>w2f{kHeDEo$Z_0f|k}Mr-R8Ke*X~c
zH;P(6kH#sn&$t~HBDAOPKldF=ud5fNp4`ZddhKIGp1-%sbM+2d&b}X4b=kJ4uD6F5
zCT|Bf%2%*6-F)#U|MX{Y^v{rP9y|uu(=LEmiH5ZFp`}g$z%oEuCRn;v017a>oI7_e
z(v?myICJI<n19$40PX?6&CPedeEQTwAFd~6A=EUeL9;<VPr;;346CC9afecl{X`I|
z+9RTz2*Xy;Ajr@E!ze66!+PJH=?-n)yb<;QrxZ;a-=}n>5ad{y82)Z|?kY<d#Sw5k
zKVo)LIj4au!Ctc<1aR<Au7zx9+OV61t(UJqQuYVW><Chw+@y@FX1OcvZEs)c%%{`T
zoMvg$7b;ZR`k@0GtT-6{ok@H?nsUA0RTH%2c8I+IWD!B!%X_HC)|JS^l-}Ll*YPMt
z9ToPC<Z<gUu_W}B@5$~vEAlFi*1Vw>7U~G;*8FM09HBEjYpH)dSE$dJ7iIp(y5ka3
zF!76*W`VJz<o5Qpfa_7~Ur+O=WB#3~9|AyN`oTglYV!Gc0+HU<cVn^N-q4%+IT@&%
zTFktv$Npw&dX2d84y>mJIi)c-J>It3FBRqYtYaHYceKp**43>2BLEP`_q^vlv(D{2
zgZnw%3)+7Q0vy1o#e}630MrVAKtQYnxOVN@tos{Xx^yXW9ExTdSOKnF`pI`roq9j%
zcBExF7G+{xkg}o%QV2Jbo%rTA^_O@_sykIeg_QcX0wc@KvH6$CiL-i4%&Z`;??*K)
zGGhYM4OW5fZ!}USYBJa*2CsYBXkbn>Co0pAflHYYFl7d(jgIuEl(MKBCd!@8e>v@}
z5~yr4Uf1DG-A?VLC4|~$?wcvBfkNi7JT{@*BXWXF7^6{nPY5DB8YjjthS78UKRqbv
zowb~fYUvwwL^zLh{7=%DhrIxtk(c@u(=ztV3d}uExa`Uin<s)Y@vFV(Og+m<g=_gb
z74gJAT^OmR`mLFQ<!b%s*p=0OvSEfT0xiw{<+8b5#sBiIwhlYy`d*7T|BHYI0Wy|>
z)oy2JlO~<UzqWR7!pl)G`In4-<Fx05&Pl&(p2xW|=lhm4F_wuT94v7zWSFlV(fOiZ
z{Y}a^i|MBg__X6*MpQBmvKec&9x=7py_qESoFXQ1eY4M$alCZt$A8nE&N|of?E3ZV
zVLN)noar6_;6YfDp7sHt)hd8P2E+k$E&xXIg}2^%D=>fLW4{0X`>_&0;GcmN04agL
z_w4`t?LYVr|J&sbZ-Qf*$udqmHB}<Dh*FTPq@$!PHL@dA<|!LNn~_gBleQ(Mh13Iw
zqlLg(;$z>T^qlfLaSApHj2BCR#?zoRkW52<8drbT-l%BN>0Hc2&5zmnZHb*)81RlE
zM#eDLM6;2WbT-`yS=MAWOXkZ_?yC|*7k}3MH_HNu_K&HkD>}Zj#{5qG6SLG3LQrQu
zos3VAQ+ddIuf$+NieCs$vzZ%@Xh~tp31M(G2^28d!cVK?6NR8z^*#HHiOjD;Tni%#
zPgQDawsZ(njg5t=v#Eg$*{&H^pV-?W*Kg_Z+LrATdk??|Z?z$+nX6@2mcjd@h{b<j
z7nye{<7j)j*ZEJbuXkepqs(k3oyBf5F#nB>GwFSGr^#`c&k$h8qv-E1%)cH_BB^lA
z(hEmZ$O{a8HYvplB7PKYdCSg;*~o^{A7`?{ad<h(bCvAq_wj?P?_()-EdbfK?+Zls
zUA>T(OLhI4O}EZ}@7#a-PPN}m=ni;K+6N$fq-9(I0AN}&&_KXH{KG#Sz5Mda(ELXo
z)1SQY#v7yi?z?a8^5x5uPFf2ofjcX}`oI16|LMO!efpvQj;ap~s3lELM#?#5=HwBn
zCM<fro=ighNk^u%ofcM_uC=7xd(%|S(~%Ywu7Itrt05Kudx85*SsbhZQM-){8-!+L
zGpaSfCdsfuLnM$XVTaMwXUZ&1%jnlMSp$%4C2mW~c92s24N2O4h$6sH76!yTJ2DlP
z>swow!`f+I)_*<9vXAY9DUpRFN|uyS5@yEHG#kva6W9O#_pY!dkEeZIaV>L$a9Og)
z#Jy*T$X%!EZGwxa18A2u2HTd2q-tTfKVFbB43t~*&-X~G{(1u4?-Nz?mWYfXS!wUI
zxA}kb=0(~#asM5i|5)C))tUSutWS~)0P`ngmJIlr68msuOmR%f&wnC=3%;+ry>saw
z*|!c5g!b;gwaF<W>I(r>;aC5g)$iZ$SF68V)mWGI-@kr#=pD=ReIe2J0kE}s`A?tx
z^8f#T>~MDrf1P59otJhVeDJ~T&dt2@>Z`AUfKJo?G_{bh6tIjgfN;c00GMN7>hJzS
zj?k14C@cx=K2TT|`0l%}eEan2U-<9R(~g|wEBIfA5iPrt({)8XORVZirq6nWv;@T>
zqQ{v&VJUqL_dKu2L@&~)aWH!$I~a%swC|wPN|}b^{9Jl4+%nR3#ensFbJ_ZRt#33j
z=26Aebd)A?x+88|nfZ)@KXXL={TO`xnXw1vJS$_731P$Rt=Ce&A$-J)W@Su0*^aIa
zXk$5MA_+EZT_)r;lPCnl{nbN6Hd_MJCVr08a2YrE-eieumP}H)>UytypAh=G?bDXr
zdZNyzot{u(QNoOkZ_4+#X5_Jdz89sxeFCm{#O_$$lU2*Mlg-t<+x+j3&|%I05SOxZ
zJqW~WYp2tWeUbS`oSG!7Q`GpwQH)7mTgmfZ+_($%P^yOyQeF4Ym2s;v#o}eHq%e6#
z&Q*Ep^@*sq43u#a^Nx0Iw~ltMH8Iw;)1$oAJpZR3{@MSE4S;Y58vx;mSTh8iBk&Ja
zXlw%<AC;xr13+OJg8;+#!{B#*moVa;Bo=%!oeN-g@#4kV>C>mNQD8_2^xXIUm+!p)
z{U7<x+S;j)4>4LjsCm~Ste-66J5eD&S9K~)sijow%l61gy*7J&C924^IKBYFF%4X)
z(?|uvz`7dc&9fDgR1;??m^o@@=vn(W_^mF7?8{`+HxsWwVy<OaC+Reb>2#B(+nqIF
zEvC@*W0lU+rYvmxWXCZxYa=M&KhVmsAxa5W)U0W`oHFNCP@<aNV>06EE4PT8d12tp
zXp{)ZxQq+c-)@NbQSmCUB~D_oCVvA<)>um#s#bVeoB5NgwU3|nTepa^u;f1Hz9^Hg
zxK4g^BP{fph-VRG?lUz;&Ap>xd}bb4ld`oy)b(q9@g3rudMm`?%zrTbolfh={M=Z-
zkFo?WaSm(0)k>x~>xb9Of3gCK@27<p!)(oM?YGC=ng($yQ;#cE<5XI<!cdLVW(t%!
zq2;-ZxsGkIF7@A~i8qZt){2CVC!+sfdMQT#;}3CVP&hl+UNHHC=REY#L-N>Tk5SV8
zX_qD!5|(lmK$_43(@SUm4-~Lt_S4QM12;fO4UC`-e6+6YZ1cv2uS_QY?Bn`;@#|`Z
zl!%9)sPVc~Q#{@fi!k<H4k9BrTctEo2q;;mPfgQzHfh@KX5J&cva0chz@jm7hfgw-
z)IM1O#1V}kIG8eBKSz|Jl&K~CQ_y(DnB%RKK4PNEZ5&%mWkz8+6Ji6dBTS(?KM_|7
z*bxOm9E~ZzK=RS&y3(e5!G$rK<zitB#)g4|C?J4}qJ@M?1XiefRK1rHUDP{E^(q4#
zryxgX5r_>Wfyi^ZdBljTyKMfY8fVxDI+i3G=1S~quL)xLI;92Mwh%chht-$DZx-7#
zR}a(hJG(wxhGPVZB#ZT|EoAy_`=I>`&y3;#lsdd{;@n_!=6|_Bhcf?zdv51iC^s=$
ze<(8(ujLv)zewrVy!mhU5%E?=+a+&V((x$sxX=sx9X-=e&k|J%0Gqds&t{A<-#Yh;
ziMYbb>aB>~tl<2PUic}(Q0YmT|MvZT?FpCI-@AJGXaD>9`Z^;0LqNH{O4<Q%28#eT
z00Ix9C5ENl3joLp$qQ21qmMosu60&`8JIsMi7g$cFaF^F{L}7<%ZB4+;Oasx&wBEs
zc)(8TjMa)qz(!kkVu@`}4ABxhz5J1@I;Cn#SG7f+#(DOs-!N<K;bGNu*u6!hE;`jl
zi2#m_;6S-9<t!?2kov}n67k}!Rq0=DKfMsvHLtIq34+A-)|C(l6@-^ic2qVP(m~QU
zOaX+^M_L$0_?=ff=zEwRb2-{KDFJ4wzPO&GxB+sXm~5gNF=HJC$Wf0GY9AbZ${)~R
zSQ17eQT9YGLz0RtL$p=t8^rfl0$ch}2y$XF=3MJ2@2A?8VcgmdTRbV1mip{hH+@dS
z$>w$0l<EzTHkpr7)ybJ@<(_I)_TAafA1f%5>8DiRL$t%`Zg2i)eA3Bgza_c4A?RW$
zDt!IxSl$;l?CH;c`+dnA*}jm0!TC>;$B659l=4PIrdqM}a(SOT|LypbPDQG%260aI
zyUWyI!XoB9s=3N`+{Hc3u1;}nWxCoZZwi?{nDg26{MVlRpZ;`fYb)>&RVK&d@f0~{
zu>21@(RSKDRsn=1=d`4-v;qKD0wD4dn=o{KnHfwnxPJfr_s>#l#t<6-AI@8EJ%9Py
z^*8<xeb!5ak~Qp1JgZp|wLMGjMCpw|v@!_>5sf`zH1s6jBc}cYGtZXGoa+$t7yC=O
zPf8YrJd#_DRvLs;$0i0M#|LLp{Mz}|W@U_sDk8(aO){@5g<gjCp#3AwDYWgIH{T5=
zQfNdoDSfvJS|%i7j^_QfGj3v2MN-ok^qx!iRO8MK5H-n3%~VbC@(sj1FOB<D#!>s^
zgwp{MBW5tPzyEK0?;fk^ao&eL^PO|J7w&~iNu{`=D4Js3)I$G|)uu%uJ91&5O%2y*
z0SQP^xPX<!Mw7oPHxAquvI{%#A1;gl4PxrjqE#JP6pBVl5F3dvv?R-R6j`!qin=Vx
zrMV(0k=pCo+d1d^Ci9zjo|)e~@4VkRXLq^Wb9Nr=+3$Vloy#+q=Q_8{3>}r>r4ry;
z2etHbh|4;NsE<GQIis`i!meaft&+bPlex{sj#;)EsRJA1ziu;M<zG$JFl~tGBP!R?
zI8njH{T%DcgNo_jd#cBX@*~XWIGF1~T<nK^{cBis>0H$&O#edlWGPn*v(mY3`uDep
z{!60Qlm5%N_vQHp=|5Mc;_Jers($eG@8;&^FX7r>>?DH>xq9^~6o7c;{A;hhw!rqk
z_q^vlxirWkx5xJXD>&Jspt}J=!_R^iwh{!r3tAcc$E!gBSi{Z$Tn`*N56f!T-uAY4
z-~WUE(a(J`XW+*DuP@e0ujj3_d=e0@9+1_0&XeZaMk8@~Sn)QDqcZN9ump2hS7S(Z
zjVI^lDFgHCs!>tT5po&L26T=3N~0XYi5HKdY%JE>+`1awqXYi4(x#?OsK{nr7_Vzr
z&&k0<C)({M1_)2vWnL!wyD=t2>T$4);#%d1Umlr-*HnK@jm@pJ5OdfXIsPtIU`&)_
z-KV4W-|o3F{>E9@X$g_ql`ZXlm+7!pb771e3S&f?Wm8c*pnojpiwF7yrS_hCj>PpL
za+~PCInHi?{^KryI9+jQx{O;)|6-zfLu1?~RHST2|D*A3(<gZ5nNR)~r@sD~7Y-gg
zh}Hj_!Q$Tt*8gVkE^h_*|5kAS&tY_iDFD8IW>3PN^#EYdBria`9S~s;J@gP314dU3
zT@So~Cim*APhZ@)e(tkFVyQJo8mFqYX3H-=`y5|2exdj=3r`Hfn-LKQt1m7|5-eKA
zh?sn-r>8tUePkw`$F~zI#E<eDaIKMKdZelLRGzQ#HSrw3=a-Z&7bq3Ot?C`is`v8a
zcC!Qb9FGATVOjuZq4T0TXWGj0l={Vw!YhI78^Vk5&4OKO22{%QIgd(6R+gcuJ0sz#
zJ)}Hm-sxoQx~6BZiCnvXc>D_GtvHG=<C6Q{alR}t>{xJ8TXWKYW+V9O)p7lX&q75M
z8YDs(c{Hj#{0NPzz}J!fVYxzq#n_GR{@~df&WpI6^xw{Jz?CwT?dV@_JN;+$e2x6=
zp#Pu%el`eyUHpe*L$mb{+49m$FU7^>hzr>>;NKW)dj$5Z0-z9tzr$P%G%~nm%<X`2
zF<{!tu{ibIZ~bFVo9EnIa`CAfi<qt**gUc5D-O>ZudgMAF9Vnt-}uqUP34QL+;EDM
z7$rjmkds0pX+%oJr92T$O6Y7oNRWBUkWzl$m$*KK=T*F!nK&8wBZ;YSy>mNrJ%}L=
zr=cB*3_{ljlvbiAbsy&pn4!D!PIalcr}d#yyJVb&n4zadoUsyY9cq7R3|}#sT&y#7
z5TD(ju`QfogoKApnY+r@dC?#-T$k$Us*v0Po+N<QJClMht5aC|GybKHRP$;~lxYQ?
zV)I0UHDgWfLU`vqwf()>HIU;l9tB<x`iJLjW8*@y`j18LN<~VyjsELwZ=C*1;O(aW
z%u}U|I}v$Z>HoRsf9oG(@gEkIL+Hc(A7sf~SO06GM<0E30R<r8SM}mvg1xK&;Npb^
z7GYo=09Ir|0r<{$zT<II6Lte!xpF0Ha5xiSV`F3f;`6_K=E{}t{)SpTtvS)c+$0Zh
zB{Ur*$_LEI7NG$s)->{E>r0^kDTCcBl@2p<N-VBMP}rK&1!@)3tr)HpCZ8#*)T`j9
zT#nP0I_1SUjpuA0I!(5aT6+*;2zCBmcy6^Y2u_0?0QcN;Z&U>0fhmExsv=M|O{3pf
z=vA!4xtj4f=J1~IbvZsW!qN}gj*f(1DaVb>S<?>0&~jT&2$vcUUo*r?FqHPx`nZvK
zEfv$N*1ZUEW#_9TRb6};H_n+c&GAOb2_MGWQ7?J2vZ-Cmb~qi|I&Nv#KqY)gL32cp
zuLJ#KOY8NGi}A!uxcjlMKc{=c=-=NW`j<D1{<S<-468AI3+aF3+WF5u`;E_@f#(>1
z5dKn*m-lOHYw<Z4o%v|Xgx|zqzw=>FdjMSOIb6jEfNKWR0Pq4pV>^2Es7?dSF%1AE
z;Hj_vpTP^TxMa5l4Nw@uz+Yx&@F}yAY`x@g8<;6usxeKREfd9vdFR1f2)@Vyml;J~
z=^uZU(sQi}O_MNlV~$>U0OL^MT{EC8{4%a?#C*o((0kU(om4`0gqq930DZA`&*8Xr
zD&BU8g@U*vorSF>Lt>VN>AKXOb)DL{7Uo?AJ4gK>6%Kjpm9Lo__d$7=8VANVl|wNy
zLmXl18g+0*y42P!wRdWdvS5qHpR3x=<vLYJ8%w>$KMOn(7I}+b!a)Uhp&ggga4xxx
zIG6-_UGEa7&=0jg(R9@&H$eYTN;fyxgHpN?Ps_vV{y5ivN6^2!Mf6WWZxH=!7CzOX
zschdQ{h#{QKa19X@EAwKk^jz~J)1B8!QTJ9zWQHb&)x<I;0{O^GPp+`c_gmX3|;_-
z(}0o}U>3XppvuMh^IyAgdHp-Taqp1_e}?iYnUZyqb;`s`i-zhds_=z6%^)0&$f}IO
zR89F6qrjl`h8TTaE-{A+6(&kYX&j-JB345=e<voE3NK}NsjL?v98|MUc{5(Q?F<vP
z0;<M6J^JC?fmyJyz;NMi#rgmoE4-2}8LGKRqf!>vTB6nEWkj4*%ASaftWv3QuJhJD
zY&LR(i8_M|<w=-oyK0-uD0e4zK;&3Fc4B5V^R8?r#};-9b<T7>H=03&$}FYunD*lw
zY*wy}8_lcWV8%9;SHqZgx%^y?lWpmLG2e`14=@~Sy?O`IKj(X+^v~$O@$?@@PF7w^
zx{B6HrsGOgO<i!fy#Dg9Kl|*zI0M&zkQH$MLvGArM8fii0-&q^;c+f#`?gZGM_{k7
z0f;TH+X2I4JOvQ<G=~nufj|o^229sD#y{-seCp5s?=TIpxKz|$+kuvussPZv<`dX>
zFH4bS=4A=L%7Yn&7pq;HOetw%iWR|89c&iPwQ*L$rm9v<mT@q2rxuH&yz4X-C(CKA
zt1sutIJ%Mr?l`~+YGA%w^J{Yb`UUa`41dmA(EK_Q{#2-8Xi1cooCu%FGHVEbSM}ip
zbINrbx6!y<6_#bby;d|xL2>OmjRE+Xv{jSaex!~N(>K;)_w_cz9L3l1S-Gk5$zwo_
zmFH?2avCO+kozK&{qE`C`?+jvUW%*ZG4^2bN8vwISeC)<rT<19ZixPAOvs%>|57wE
z(|x<A|Inu|z3}|+d?q|b>mOVHbOAXQltY%lvkV0QTg+komu&W~`yb$BA3#t5W>^gP
z>Z`B1OP4OW?|tukD`Ea-B?$Y~t*x!q5VnRbfuY0o@1FnUy}$62$B%vA&&CWYhBN`r
zSW_|<jyD7&X6VP|>3dPRg>R&5!C;-3ZGc>?GusmLuNx6(1QK*?z13QNgE@^gk<1__
z+ikLHrhOt_!~1Z_&_+CIEqeq^KG&|FlY<9NEGcB7;3+}sC~PNVXa_}Uo@yQL8m5?f
zBOF2u!8n6qd%Py{UDjrPStb3_;H<<ZE3X<alekbmBBXTrEX<m1O6B4*po0fbRG*Os
z_g8FfUW@L1EKZHr^3wMkhS!Pyw=H&u^bh44u$somol5`B@@zg6ZGHB8-}vyC{^TG0
z|KV{n$c&93EAquZ_<P`i2ev}k{7r9q(>!hc^Ypi;VbAXZ0DSnvAC9*%scYu+>C--F
zZaB7a5rltq<%AB6^EzpMXa2e0`}zNOYxC;A8FEl7M00d?0d`=K2EQ%ww>$<(yH>iR
zjGNCcF+mj!ZWMP^sK_?DU2)Nw;xwGm@FpD>!&?5xkZKeED4&TqRak|j%YC5VAn@;5
z8x~(-UiSL+i}7mJawV_Lw}Bg53C4IB{ye30RoISLfwpV2j>^y^kF6RvdglJI<ExcU
z`^k;Rgr`C17{5`}C4$_vS+UfYq-jCMg$^D{2-fc6cJfN_dlT6&6Vz?`M|>zr8=Duy
zl+$ow4;J{v3!m<I`d<z^r+>dE=)YN>&8I>Cu>Sw`AOA9}|9O!8TX6p)7@lKEUSljQ
zhfIO>k4xwV^kd&|-_UnB><a}TL3GWalfpa=_G>PJ=C(L-;snCtLO{&x=#Bs^_IUAy
zCx5jb#3?6dv`q1BnsWKSC0Kc*g}Th3wlidM^vn$<F&z_O#MpfiXxrgdr)q>Z>SvgS
zW8t;5yovCU`?rzKy7L;309a;gYlkDFhBp9H1Mx7HYIX89W$~{YbMIvgn7ztwN;tQY
z5;OBtIN5yCvuo6=lD{fx{1U+{#FS~gaU9oB3P$B{J_@c;ShFjL-ju;}njFPXCfhYM
z=zrli!<e}mUHuq0QScu)Qe6F<_Kv0hos9MUNB_%VT$iss^@+coR{iUqf2{t8tbpvm
zbaGx0_~esM>f*oX`rqsBf54vCU;v5XU{gRm9yoN=?97=nD_5>u!88E=))tG!O88p~
zVQU8t99RpU0Nlc{_M`v#Km55vhyI=ac9iK7L*<s>EXJ^NwR~_kaGUfNc!AQ6A=SCc
zo7-W62Di&^x8*PWt`9r3AaIdUT(C92j{Cjk@Zq=Qn+JD;Q?n~tZphujgvFB$F;+xc
z*8L8PZjG0G5+F`_WA}O<1cb;A>$r4W-s<tN(>OZ>c#bY#ekCf8D`DzvWp+SbuLWMO
zH~rrbH0VE00eBgv689ke=l4e5>z7`7{P%zNLs<F08TxrM$b^l<hYxQC8Ij%pVeTJZ
zfkpV8!@~?uGa!wN*HJ>;9@rNi0NoA<;UE6u^Qlv(;<k-26%ZG4;9wwVb(qWHZ5~hm
z`Ntj)g#QxKRK3{}deQ$jVQuY_8f`W$-p+vy8L=dDDx}50*u%$9WSn`9&>(!T|2OEs
zH{+J)Nvh#C9~s`oa*FQPR{92!<8mGdd@LBmsccv)P<sWI;6wR&7Q`7}zhl8zw|Q;B
zjB>ETze0el`-!4`qfFPth5D~rpk~XQ7D78KXPnJs|7~OFwc0ADoyv0wz<9u^eb=v_
zAC8~KSUGUtaDb2X;Osg2ms>#peoxc?)WE@oKVSIV-+w#^f4Kf(@yB_7>>5*#F+9k@
zkc2FWsgv+uxP0@|-iJNC4UoV)T814oHT<~HdC=S>%;Dsl0mC1*1mcnhOashbe)*~M
z-#+t4zwq{l-}{ka%q617;V!ramo91__Ah&>=FajcFY$^dUAt$~*Yq)kJ4yL2AyiL?
zh)^z70|@UYPNhU<zL1d_PZPh+V8%I?iclK>#6Ua0pd4?nJ({SCTF;c5o5H&#fKnzy
znN)s!O3#k!<4s+1`wiu+tlkq>ZDS>OoZH>NMvz%tKLEz>sm{*ir{(5tKip)V+NvqR
z95KTZm-(H2TE<?SoBHl^F0qu>y|kIpr<)9Odz%AXs`=rOHj>II<Do7m)zVkk2-jX@
z{Ef$EMJre#?)6vN_0}<;X|5g<Tnhe!n|~h9xnIO<sn^on{<X#CKB0e16(HA}rGH8B
z_dfme+LSYAzW56e`ZAmm6Q_sMI<a|LHx{@5J@(jRkRfpY=LXa%0NDqyuhsy-`iB?b
zi6@>|;CNt!Ve1Aox%=<GA6|fYQUY|Z=N2@;r=R-hzrK3q)qhe3ybQ!|1~}iq#Za;H
zK}v{J#INt3)qBzXvX0_VPMH{dF4B%UU*V9ST#a=L)3VRI5SHRp=dbxW!j<s#gtzBB
z^_vp8ukh7!3Xk(u&I3arVeIs|#BwRJXMDJB$wJR8L2<>QLTiVErFT3E|G_J8PPW3l
zuMYxXLWD~m0g0guRrFN8t5W3}ImX##@#e&`{Z{+)mojGR9rboxxXd7h;u&O2*~+F<
zs;Vz};j_)7RZbDXRb5=Ev0X_q@MSNh&n1t$F@qN=T9B`Bulug154iZz#~T~x<7txd
z2Fl<W!dRHORmmc%(|?_RqQ(6}{}(Tv`AFdTUy?NEuk-)dLN4X-pC~(E{loR2j{Ws%
zyQ1xgJq!EA0}uhb0rVLf8xC#2p3beUEf>51?!No(Tg(H5&p0Fqn%oTTpYT3=`l*jS
z{_gkukN@Gy+QIK1=#;F!QAs*@b&wVo#BMuRF-%WMX<%0jtQH_rg<7$sY{OT03@Fq@
z%&}{1m_X|<e80C^av9(F<D{~-bUTnV7{6hJ#7e6ed?1W6d&SWCA~tJ$Z4bEI?#{G<
z<eHT?C;+&82%eI7svFi$M5}N$dIjo*Us_M0+)5pU`l1>;-r^%wsuI7(O)+P&nOiGo
z?ufaLOQ^(?_v8`wO+4yxDYb=_F|s{q$}x<-6lv3zvc_Xv@)#J`i-pU$|9KqX)JR;%
zyI5>QZ%@3O=brmS9fox-o`mP#rhhy3Z=C)mS>Lkf>3>iH=P&)i@BY1C50CHwV>&o`
z0dT`#m@=M2MnLf6I^ff%Ps5O0RJHMY``{l7`^f{4H8b227=Ca+0PX=$F90qHQjJgV
zc!uzY7hnrh0bl>xCqLvDTbC?>>;KL?1B&%CXt`4sP7OZ)`bKye&|?d+5&%*U^4tz4
zq|T+zk;J-;Ey-G-<=T=3>M5*dRLuU^vZ&t*evG9P&wZeRAPa<XJs9U&7}?zY=>H+w
zFcp9)f;deONCWExbiDwS17+~a_zGL05n<4BV%ay_?lSS@a~ru)XnM~p*IVMrkDeWk
z8Q+ZE8fl*LPrt5=7;7z-)~Nr+ea+?CQioWc%46%ajjI=?srdq{_^-si+X#IJhK;)<
z*5biY2a9njV#c?&=YF7nLqq0kdE1Wu8PmN@|6#i4(lgI|@~6|8{@D|(bN`SFI6@vL
z$zqonpZcf9<$kjM0sG1W07wT+#8L3{(@)ED&poG?1T8{my5J3%r~R8Qtp|W7ARZI^
z+;ji(%tH_VwT~RT@B2PvW+6qbry;ahGLD&*FxeJ{=*jZ(#3YI)%Y&L3u4SK6O7|rN
z3y2n3Fe&(osF_TOX?Z;kc8JB~=_=gDB?;BPRbDOZyuqK@N5Rh}$*K!)bAAnNylOQx
zldCOSk+B6~RV5#%2(WwNP+Tv7T?E(`hIqJ~2y?~`2f;WeBc^FwnN|q8QQf5)X{GI?
z!k4hC@pham*TRk_KkAnOT^3|wM1>yfD!HzDDlLnRdX(DH<twcHC??Yh%1Ljj?{XC_
zBx2MP5f@n35iR}e(Vg$&cF=oA9-LhI$ek@_75-kQ|C-K3ZjAmF-~B-U7cTzgM_)Ys
zug+|4Zerel3sb=O!`wd>{-sm@=8rt`$Q;%`_WnQm=%aD#-+r?C8TOS2AVJpH&~do7
z5myC=PK~!YVOM}I@(3Njg~fo9wtT=P^tmVh=YMkb@+-eNWTXtvkw64EsdY?A%*dJf
z*)$;C*hx#aTt(vgK1^RF67Z54u}ICRIjyt#8JDZC2+cLwOU%cKu`KJH2mCx^E44*X
za8<_6IhFRZwJEJ&S&XmBP&|t@q5Gq&TDc;0>K+;ASxT)j!oo0Y2|IA$ShONt+)02f
zh392+V?8Jb*W-Bu&eU((PA>KLp4h$D)D}}auTA~vi-2kMgNQQy5^tQRuW&AXE5)jF
zA}plx*!Lzh^||#LFjKCrXU<XT=?SObcsxt48*bghR#z<K#4UWATh~Hg&4xLB+{U+h
zC@KMUyG@O>RHT{S+57Zg+T518JNmCN-HY^p`SN#v^N+vq;opFj507s;@;6%jM~)nc
z3;!S+aEv?zKdgTwg$%*`KQ+R>g8irfWH*g=91aJ9MhCY}`nEWB>{wj!3HOg){2;@;
z(AJ-Q<s*;Jx2}KFCE}}?nm2@>2YVt4M+4W_T6Y<mp2uh=5}pGa7MtWua%AEG6=d@C
zl)WpAiiycGFHd+XLFFA#zL2A@+gO;{Sl(SM9?9#qvVftwR6^1h=2xtdI$urbtI{Pm
zq&IAu8A&B@Wt23>&v^5Dv^Hk48m9@41cl&4R1D^GTv5C(*MhrwE4ZDv<{QBpT}WYV
zuEM8cx-?dt!JVzq(l$nLyVAb35b-p`$O3nDAH;atkHoOE@HJKsj~nW|QqBZEj-qQ@
zij=gTded`U>I}3g6tAu5+CQJRar(HB4_iDBggzPE`?I1H5vrwaq>RN(w?*ic9TraZ
zKK;urp#S3U*sJs(rte<<{FDFFufp;VuhHsHC;s8GxGl{6W05!%030BPj3F~f@b6o2
zvQI&eX-wM#UC`)e7cN|w1+6b05+uX)$I89;-it$mR_UIA`1jB|e)R4C{tx_($Ab&;
zLCv#v?N#JF;&_h3jWuK4h9fF6?@FZ0zH?Fdou@-yAf#G2NR4g@^L1dg?cEkaajImD
zD}xO;c{wV$FlEUuKrhquXq%>m-?gjf<lvzbReiZHxX<iPFFm`QcAHjpUZh2YJ}x@j
zih*&uz^$cegEc8*M`SzBlG{;Vrtqu!wy|)H;@WcKVNrohR@V+vAB<hve$>ZxPJW`C
zx}8m(wM`k{6o!XI@Tx5QR`jmS)6NXEp$l!ff?qeQ%Wt%wH1KE*evYem0k}r=y+r?2
z)CLYxqdQ6ebG-{7@R7Yp{}BGqeC?AzeddMVc`+yeo5}UR0VN;^{!RSB^&h5*u^wP6
z$OgU&SajvTY)`U#687C100mh3=sRo+4B(%B`su}|KmBQW?6JoLX9EO9z`gd`Ylt^X
zN&=JvhqD2~IB{XXoH=yp{-1sH(ZBxdf$JQNnebem(e_7Hm=11d!X&W}Gh%hHS82So
zttqDrNW{9u%5TbSf-4l_a=Bg=(ShKcuLxVqR(yruZJjA$W_YeukCI1rak{#~L%zB3
zMohyCF$%4N<#ZL3p?p}75&qtkQ`*Q^N|Ant``KY_K=2T(ttEvZC<4PO^y_hs8uQU{
zemeZkW-iZ9k2ni6?1sW2tuY~HEO-;wr))W`ciGa*eIcX_HQIyYh){bjDaUq28Ja_A
zP{5<Gj|x<1C-HG(PHd-@mDOa~uLvs(l~U3g6KQNaBWT$&Wpt{#h%~wV-lBhcpth;<
z-39$~*^005yWENNKfm<MQ=j;mGiRPW1A!0WFM|gFd;YiJ`iJm8fByU&NB#!kA6@^!
z`o}IZ<gnP!!XL0NrvMb71nBC`bcLf2gCOn-z)H{H_F1g2uWw-mC<X-FLbwnDYXLA|
zzV-Ddp84L{Z~WSbNaM@!V+n3FTzm=7g2UM`io_YZ`njg#f#s)vV;B+B6r=D}y>hrO
z`)sugW-4AxzM9^bdR5Glkj%`&i;R{UFcBLGlH051=_%Z02ILu8H-Wcl>#A!EGjSrd
zQjqa4ZRaTto@Z%S!Ek{LT+}#S4!6k7LY$4bs1VlX_3M}7T7sw~%r}F@yA=yAb*XkC
znT{}Rz;%$){>)Fce<U$gojYx{g~&MlC_ip9Dz}v5Db8ZJwK3FnSfzh4y7O`7J`^Q*
zN^o@b+C9gIH5d0Bj$1LU@NdyZMvVL2lcSi%r)j=XfUVEorGL2v^zU;Y?lt=V-fLg~
zH5LAnrg*XK5AP8EI2Q)1{&7wWriX)!Sitp<E#x@y59iAG);61c1p8V60I(A9{`bEh
zo&b*nA8@+U*|TTkg%9D=JcP~Dg21Q*q~jsy|K^W><ujKrfBU048Xb>bDPxvtHFE(P
z<%7JaN4?It<-sd5=HltOCjPM;$T&2Jt)dLiL`1|}2@78Yrz_=a<kANCErjQs6vmqc
zi_hVOnT!xLxH3a5&z?C@POf!WWV~8Rds~qwHs-x~AHHi{crJhB1xOrmP!vjkOJc70
z8!Y~nc;>)rysYSG+>fqT%P!{Wbi&Q`C=?+ap%`q&bp_X=8{EgOh{O7V+}|!2&xqIV
z_{^1MWKnV*yx8x)ZX4=59&aihV9Cc*7O@UuaXr-K%CMF4+6B1=LBA0MeOx0k9~SHd
z<>){V_6H9fkBZW2I(K14W;*Jd4|K_6mKK-MxT^Y+C6bh}S>lUWAz8}d@ihpSPXAi)
z-k|?WmtOqnAN}Dke<pZy!OL~Ue_r#2KkWI(ygwF)W8ojRjHhjX9$U$=24KIg{#V$y
zQveFX6iyjsI3e)WS6_AT0tB}Y7JVSt1x?T$K74qF_`w?x!}RY9fAI5<|EE9nOLFw+
zLx0O;u9Br<8iiVja;ze$OxZKjD1^@W7bfn+7p_+^3GW>VC7*w)71@cAaTX$F%g?})
z95E!WlS-7DAk-~hu;-5yM)fTm%~xD43l~RQogA}1!Y?OtrfpO}yTVI|sphuiv@G0n
zJBif2;iGV`7BWfc7w#h!P>d5*GS_cd=(Q38;Oh3dL5DQ+An<)!SKwvtuca}R1fr}&
zr9xtR%!KE}k!P7-pA9%IeDXec5`<DK1s=IC@+VKi5W;=v>%}0p7W1L~WDxo+MP|Wm
z55X{7S&4XJje(OD!ml1*x!-ahOBp+5*rxVVdJ_@HlE)hwb17XK^X!($g2GjO#@(cN
zxKzh7RtWY2{R@TN2>tttK=%awuU~xeqhI{||MIvDR)4hqp#TH}Z!;KrTbD0ij)vZw
z-~8rzkOlF$-|!x-{{ht@qcnZIuffUw2d4uAT3zq}I4A&@E?sh8{_>Z>m1bcetOQMQ
z^}>Y<t3e4^#f^bM8;lDBgUe_&Xoy(BwGxzop9xC9-*iQ#EsQAhZ&#SC6(UUCi5N5S
zRYc^gI5k(GIGHmt_ZZ@DZC;Cc>&<r~qRMJ}DSN~aYkn%a&1qam5nd$cZCb9&xO-$A
zx#TJOm2ezh{q|YK_au-*x~o^u#syxJ@h!foT<6BjpP7&!#i4d}%mB5|ld!YxUghnJ
zvf%6S;FBUSWIK2S;?9F?dCf(~1fKJXZ3t}|Q#cM6z+<EcRIU3=RG8v9?+Opf#DHH?
zK0<_9{4P~L*L|q})#FaMGn^}L-qKCoVUzvR7#q$A&sMtv;Hfxx;6CaDN6q2ucDjS<
zU-4#~Z-oAb8zX)6RP6=&U%z<fV}BTgKP-RT@)xZCO)TyU={9xVKPbXm7cXAKjepql
zACCYImOuVHj*(C0r1gLM>);;?PVN#=PXOk1pagvS)1OA&XX$U{{`>D=+1lD#4Fe-8
z0U`b>t`831R&jN3QUX?k67aX71R&2b@e+{0tOAz|_EB8dW<X71GD3T}4%iLXcs2_Z
z5-?I(fg>C%MpIi(V*zODNO4NJwN}sdami{}I`!6IJZ%Q!D&D9Hz#zzci9gAlL}~#p
z#K2Aea=fYZ2KU_px3TtDZrdy@ChB6s@j4uC<c8~jD;!-ze_=~eM)6w$Wug+J@6*lS
z5co+2@hySbd>_KHFq~+A%{%TCNlK!dvhI284G|g{zOnSbE$oo~qXH1V;daD3o&GOf
zeDPy{@OwY^cwp7A_~RA7r~+2{r&a%3So0Nl{TAZD^?&^M@wp0rtP{g@@qQQj4Euf!
z0D}%HT@Z-B;QqnyLaz-DZlI_kN^%=XIvYR-69(A&`k9Ya8N0C_HD*#GmYMN4YXKHw
zruno|krIzAro5cCDzgDZKm+lpRq4eNua=khB{QS)JYPz`+#V@Twwjp1xRMV=uTjm`
z)`0l1vb*G;=M$SGo9dJCR6($)3u;)ABHXT~j4T)^jRv?@zxYwURG5rF)0*xq<)!*?
z`)e8Wf0c<DU-fvBkpS26P{xJiG9`z(l3pKtYm5^vIu=}OtU8mnZnwO$f|$PwjJh(m
zd{0V}jLH(%5I89;r_1f=+VwK<#?wES+X&tj{mUIs|3UbFH1Ot+B;g-d`{S^mH1{9p
z{~`QOoH!Bp`{QI-+!jmo{&yufxr>0k0C<Nd;JN3Xn}w->><I`1WaZqsbF0BEw3^Nb
zSPgSWu>RMABCr}<N?13s`oll+D?feg*xNtI`4>0Xcp$gH#;dIWqS8?!$;88qhTU&l
zTV?<5u7QhomT1d6TSzUpV6&1k{^r~gjlW^J4zFwmP<%g~UiWz0e4O(;>Mx;UTh<?=
zy0lIr4bMfRzbipd%8b42C0kb_G_NZu8!Px5{gB<etB0DnwJbeW5^{U6b}}(+cLv(H
zJg(K-k;=|Z<)_n&$k^q-Gn&lj0PHY+Rr(!7|2G9xG!~%ZIrEV_kp9;%MYsP)fNpaA
zZzb116o3sFbohgT7Zijo%=-r;a0?5<LmI61&xih`y4-9F?iskNJOJ9UaP#m{!67Iv
z1i_*}XoulF-VBAmApEgC5PL|Y2LOW$zkl?F|MhVUJgJmjvQTOTzX^8C`;l3e@y^61
zNn<xrN}84oAzYlBLdaFw)JQUOFYnPZq34{|+4QaA3SqN*(A9>S;gH<66z<u&;PjP5
zuD;dQwCo(R(c&wiHx%Wh7{z{@zPHv--QW2eDPqOy*J2yiAtSoP%TaK1lekS=dSu0W
zsR#TGvQEc+>ZK}A-Try(O?H?c)uD`489N<#yt$F#S`Z_;8F_K`J^h;~cNqO&BH<4!
zKY44n(!Bo$&iMx)UtL{=+aG^87Cz4Nrz_)2UG6gB54fuofCN<s<8UAh1l$*Zy`VTB
zAOy#=p)X&)j8&uTM%qf}1jMNU47`gMUii4UhFj3LN;BY=a0kwc8L-ozqJ5WmRjLw3
zxMU_ZE`{)t2}I~S;qEGC=XNFpT8v?t%u<ElmAaR*8+kdyP`w{Fw=0>rR()h5{v_VM
z?ssm|opp6WweIT_*oZgAWBGWa-yH}k=j?+rWm13~7rvytZD3Rv7F^y6$AZh!c6Ce2
zFuH`5HB}-nM$q{>pF5KNDeR`{-zXe+2>rtV`{E${=ZQDRMSqSi`3c{)0x#bRUfsCe
z4?JEM{b9d9EdR7k{H~h&SGXG$fDGt(3;+y-FbxpT2MEFOR4DvHDS+z;t4GtIibqL8
z3Ba+EA<ow4e*fnmf9<<Z{nEnEFKx@{oPBMNw<cd8W;n$VWjlu0$gY`yg+mK`{;wul
z4R^95rcb#Tq?SbVwSdf4_sE=TWZ|ds^xHDhsXTLCx2dzg5!S*CcLH3pP${5J+sbe1
zcPa~Rn;fZ~R`@&T->3uQo&~5G=gYwRZQ6AwZBB-@4YPTT+qxj)D`jYTS><&v(0^M{
zzmfD0Ui;dsfALFX`9tvQNq^C63%-5y=+P}K@{`2VLzuV1UtRxhHyZjrg1fWU0}Qs~
z;P#ol|NZZmv>pKAD-S&I!0bEU`OZohKr4&IVkKyiYljXUS_#U)S{P{984%Y31g$dO
zDRA=PAA0)_yz3|b(TZDpur9)HD^G8_77ZF5&F@acyYX_9KQ8?yL+I9)5z9%xH;^xR
z86)r-fXT8_l@`p;sb0;#<a*mSm}-;l+G;0l$!$aN%baR<o;UN1J4Hg=VTE^{)#!~Y
z-KcAh`Ff0jWneqG5wsin8;y}h{yRWHzsMU!|01T|H`p(eYl+?<`kya0Uq1c(Z~xq>
zZ~T8>$9%t}n_TrT@Msupo9UR}tqT_}AUq!Y7N!Ljr%s)U`(}gSk8}TJf#Pi)wRa5M
z%^m<UaE9|%u^u2y2gLONa2Ew7Af6SCIixV4^dzZG^#r6NgQF+lrN8|Ai_iS|C;kTv
z&|<y}MYwU1Qh#6-+E*z0k!#M^@qHs-n~%C}Io#R!WAGJTDVgGs_MhuMbeD0FiF!3~
zH+eIZIc8s~sn*2=wl**C{X{wAxFw1&e2uM~l*Uzf_?qcuPXy&(S<k*=Xl<jzS3h-I
zdKv>7<V~{iNp|JgnxIy4rGCr_9e<K#UY24EH`<kP9@W{m$IDJ6rna|<%j-J^m$4lO
zQ-3X(sE^jkxq8mOk>?vl|5o_l82#5cy^-`kpI`gd*S_}u{GDh1;y0fWT91YGS(x(&
zZ&u+Co*jgLyx2#DKRmj@(92hK`f8xuop4uA0Z8h23^ptXOxptm0|f&HTSd;EJ)4yP
zT}=uXlkPLsBZEV-t#5z(b7y|%xBu3Eefjb$pK7bz8kYVl;`NO<^_G{CU4xO2f?Wwn
z<v7mYG7c7aBgRE2`0~Z4wi^|07^2Vr6Hd9FYIbYF6gzI!TTrWbrCKV)SGiNHoVOn%
zBsHRz>85bcwG?_{fy*bTc~ME=NA;t&^<2j~=P{#XrkdK$+c@=@a%TKgwMwV>l{(V%
zNZ_}awV0+F<LxZ|IWLZCzw>7smg;2%7Asq$cGNh$6QJ}*?w^qcX42<2GHp>+Z6Oss
zX+PZ&^v~&Tl>Y4)Dq**i{;ynm`H4UI;;;UlU}&AeRzJ4>!GG25AKs(ok41kl;2`|5
z-48rHcz3b-pXdGWx<!8$oOBRhjtd63pe5p<pjq$)#Os5@V8h=^PzF|lc8RTlE0-@{
zUJXhDt`1%afAOS1-6DwR_x|uN{*B`&zVCxhW=HDMia>Bo1d+EZ(K7|vXDOPo6vB_L
zEXuze1ayGPweCohZ!=B_mr**#FEKwaRh<f+zL8fcUu$j!t~2qy)b*^L+@9jNDg<!t
z+WFw#y-&E^Xt{TzF3alNs7pqJ`>DjQYrRbPNLBvwmN$5h`mojZLmS~z9>6dKP|9+4
ze2w~3M)}oPmG64wG1q0g@h79uiSbtDA&vSMh0vB{Q|cS001kvP;;R1CAHSqcEi4Ov
z{pr8mPP?c7_7uSFqyJ#Qub=zgb07cGpZxMClhDUXe;8bGu^%q?0h&1C7m7e~`_Cc#
zg8>&k0O^LmxZgkUb-4X~nkL)#XZ>#ncX#axydRFI`o&-T#aTKhK>ubzi;POZd_JEA
zA-{$-0TBE+Cm^_-ROqjUfr;={94j2&SKjf?|Lkq=`hlPPN3+>#To<s6bu9;1Kl)}3
zs<?v!&^ia8O^k&AZJxxi41pKsYLRL&n-<veOE{kkp63`FOmVCLh_r>bCk0?!{`h??
zJnA?tky6L2nLL7K(W+6)bXnW5Aok6Y+k(Hu9CGn8##rOD#z-rF6?~(-8iL&u@2|10
z%O3apgkTikz>VLh#*j>mzcD_HgDL?0A&>h?pouK2SF2xcC;b;?MRtJWH2n_>fRmNg
z1Gk<2L)oXk{`F7&y_f#-$!`aq8ojI#{%OJArds|i{L^0lc*<Xz^G7<5HC_<@gs7Je
z`~Nbaor1c8e3}?~`st_R7Qp}*NIBlcix=a4t<Wfg7eK<ili>0oTr-UDFgOLL28RLb
zUwHQS&V;}J=7)deSN`VFV-J1MO;hifab{u@p@k|^+{}70jb+Yl!mqz;ih_&Fu{fj_
zg0;}{6M~3vNWsILx}&-<R<=c4m3Kbmq^d=E&XyWk@-?1?8QC1heZkCVnWw>xtQ!_a
zb$SwDev#Ux?lZ29zLkCNTvV}XHEd|h2}X+ez8(`+Xx6~_`*E9C#l_30Ox{7j<CDu6
zk2{fC(EHI?<`L$qw4QL+)V915&h5vkOIy-6XFg_S%#Uj0%m}r0Sl;fpp8nh8Yxnf8
zsc$#^Uta&tr~dq@kA3{?cb~bC_V353R&4VNyf$9zi<|vI<V{S^V%zrp_un6dKc;SF
zu>3LSAFci*{Of`{-1Tb!nt(YXJuVoFMlgRA=8@E;6#p>bB%L#j#V27v>f%7`5ZuH?
z6gqG}_xpeUqks9r@BQ82a(xB)yH=r28Kx@P!@Tlfc`N&(|Fb4O%BE35VVQ25rMf!D
zx6E}~vu$7fG7h78Tl~2O`}bUTW(uCzP|e88>N`KQl`rMw8agMwQCDVtO%=e4sh_V(
z*M)YesdC>*ZZkK|(}JsGDTA%svy6x8Xk4vlmI=PZ;}X3W%hXv#kx||2KIfKK{A>y?
zv3zzT;aBqWRqC-Aw*tOB%G27gYP@>(mbik0H=}^XU!G-mrKCL1?X@PXh;r*VsoT_3
ztJLCg$k(=Sjnu8A|GI~7oc?ooqZE-_Pyh49#w%xDc=GQ)`MV$bwIKY@OWNj_)>~o0
zHF)jD;lqa^_&3wh?U5H_y%sEgz1<Isy3?G0s+*VQBGtPXoOGyw*8}j8!MH*2$tRy&
z;pu?aUVCjd3_!U3XL@XK@CM+9!8|RHwh6{#hOv&|hkxWBeDK(@hpQumv;ORb75~Z3
zVSU?;(hsTBy}J_TcR5eX`^ROpUEgdWm26yPov?LL<2GjeORXVkFn<%smZng<ajy1R
zrvO^*%Q$g+Y9@Njs-yN$aAqrjjmO`cWv6nSeU^G$P>C`YHf@IOS~#WOd79r{Sv@#W
z4wp48j!RjtvhZwwh}1%<@-9iMKgMyjoT;%h1tq^mKT>wruh(NFrbvS;AM-2Yz7iTc
zra*Bc{M)$QO8V!;ls7~Fx&}b`NS)2CrT@#9zx%|W{rN{fcIn)yi$d4<2Eo4&v)vB|
zv|-U73^82mi>ZN^UU~_4yV$LM>eapL=lfgGTf~6(`E2UgHn>6<zfcB(O9_htU2y%+
zPM<zKd&^ti0%c$|C<D4CAg%#O>jGAAcR=`!iw5zoe<$Dm-jhG@uK)i3n7P#ltL?N2
zhe-w@(?UYbvt7Kyg0il!kdUjxX<g)?IZrV=gJ?zCqx|c<M#7fBP2g>Xwa9c{`CSI)
ze&E1;%?54>t(HI{Z}5nbZmW!>7w7EMiiX!^iPZhj5_wZ?Ck7Xr2Czj6fS7u@R(Z?%
zNa{A=_HQM(&mvRfzFzpr!kgm^&uxXm_MOPI(#7jUDkWf4rY*^5{OlNLK{Zxh2l{V~
zUrR^a4pa2c3V@b*tLcAh<I0~tfBIkk!{<+Z=9@BjSQmPxKi;v^Djn?`=lUV|)0{t^
zu~RGV^+$H^d*A!!IOHc*!DltS3;zk|tuhU<Z7_yN35b`YV|PH%G7&Zl+UH8}1Vjt}
z$dMx}$rFGBgI3dlK~My+Lm+wvf=3`K0U^%p2mk8N{@9!D|Mxz)va<Hpp)46`>zOHw
zOj=eghG-XfRm5tAl;Ip!XjF{4DU55dc(s_!NHDtPxY)cZVQg^q6F7spAE*`q*6BF@
zRC`p+O$tNwWF3}_3e(FB({RmE{iMOTZT(ZgNWjT*sFjp3MsOXl8^%bh9QJzniN53V
zMT}sRMubLM8`gqWyEK)LiRV_EN&}DUIz}4xoor}Rh*b;hH<1b2y@m8|k-Sm*pOYbR
z%jw@Q*3X@P{^MW#+=o8}*S!qZ{X$p!;}7%wbQJqMUFkc=t$sM(7bpA$%O9@(c(Y%+
z+eMyu;)%u3W_R~CzwMwe0-T0w9H)Jwi*$pa1aE-g_~0Pi1=~x47r+Ni)CVO1!eDXe
z(4nY-VkIixL%gV!21Ov|14ZB~U;5wwdGH4O={w%>qd#`^*gJk|L`h1dZp?F*TZTSI
zQrfE+PsO<*NlFgQvShk(qQS$$n{lVYl_jwd%#5T_k3nu?&dya<n=(W{A&e}JE*Io;
zyd4Q0VuZ{fTgrx5*RZtPQBj%7uj0eLLc6{~o#NzY7CxjTalW2AD^`#YNRg^Mh1XM1
znN_2agFClbO4$-LoR_JucMahwGK%aeP0Y>+O~sc5aH#_@n@MY_V9rklT$sij|L3X}
z&B1;`IC9_liNWX1yMDh>`uBvu&C-9XjGLqX#rmZSFFx@X&wS!jm(D$lMSdPaKke_I
zrvujFb-wt+>3*2!51!XJ*B}0(@DIQ7Y=8XXcwg{gJ<FxT&cI2Boq$ts(g!~9fmvD)
z;KE>w(*avsTkg!6Gw=rBc_k<VxJnpj1+3usV4uzk2!nZ+t`weOZ2;B;ge>tqd*I}I
z-}e3A{}VrT=<u8W%Zm9WN-trB6YBpyZ7pM|G2gl#-&f*Mz^yAc6X9nLqjAb-sbeSC
zoojt=g0Iqwv`r%+DKUH;F@KRs!R-xxgVid=+8tQp^{W?xb$7g$N|nvTT-p_8=o>Y1
zx(ZuAQK?lz3Nv`M8nG}^om=IePi1<ZcTI6cPoygXj<X-R_I)`AU?q8pav#T+aXS=Y
zU0)R}_MMxH+G#uvw7seF3Ab{!t}|t(bR<=$EMuwKaYCUmzt`DI#2GnP)qiLrIt4J|
zH%qZS)lZ~a15maW5XrjH*uGi%*Kwk`-6;Lf=}7TgLI3NQUjFn`U;X&UFI{@}f(+Jv
zT)3CE`OWn_e^~q0<satz7g*_!8(ZNS;G=!hYX4|_<r+H2Xmn`6PWrKfNL~{Fz$(FX
zSWx89G$pVCB>)QqgNC{qyaBV|u7VN}7Xs>;0hkhyVH!Z!1?VEd**hNnk(2Ly>?b~W
z=+K+~dLiOisxToJ{%nbNcrJ5y2MPmWhMwl(H1Aq46~<5r1;Sy7%8Z5}GYOp&^(DEM
zAqMxvG%IFpg^{a7TEUx{WCIr!?$@sd=6<Mg4uB_?%RX0O=1d~c*9_Gv9V}#d?$K9m
zD~3Z(Jad5dw^dqHh%6PHJ}Cg@dz=Cg%u5|Ab@Zm*I!-v8`mFRZ;cCiZrDEJhLLMHY
z!0`hsP?fJ-HJiE}as#rE<9?hN|JSdci(_PT+g^-NuWYaDmp-^!Nh|9TSV1#1qkU@S
z^G(pd2+`0@(0^0_q9JimZifD^UVZJip8e)$KmNiCzyFd9^Y!y&@h{TBK6$QRk^%t1
zAB?EQv17-!E?&I2z+yk#*2?Gl<0jX}0YBaKzeDI#0J{VXFsv-aR>5?*gkT`T8xSmf
zf91-R6|6=@TnPU(C4e&nAm|Z}K^omw=^ax95dLvm;H4M8a3=iyZzoT__v7#U{{QNO
z2M!#6_iSbDtpfoU4c)W9G=s8nFO@3qeI;|X9?D2^niRccN8FkCXBIHb_Y5N(@LiO&
zo)}t@6On~@62dGbbDqmALBiEXccxJUGD<Ghr6KegNum0f@-pL;S}9_|mD}3Y0%Smq
zJqA*g4d(Gy;OD%o)h)%C%-JQO+)x5El6OOTPu60=zm%Q(X(X{F=c_`k2s$k%jm?JQ
zp?ZR+y0HM!_&Submqpv1$EWxr0Eft9B|;o&6BF$V>~q`akQ_ZL11vbR9-;Ayr^9ny
zq%=t$dW83owI8+Z&{nKy?Uw$f)c0oSpR&;+c256W!PS2TuKp*^e)sDaVd<y2{wVO>
zaD@5-f*<cesAsDmJQ4~(5dMoG{O3W)&r@1l-98Ufv7UuLcybs1y9Xy7b`9(ezyd++
z5D0_O1s9dOaN$BcJ{VI1Le@Qg@4fflI4uwcdh`f{c<K$9rDKFS&1`jbH7+{*!N2;U
zAG`m=V;@*qIr!K}gRf**W_iH_FiZgq)}1GdMzVmdfGuBZkyPJ^DQkuXkFmRv%aIA|
zP-HZjNO3uDk;3YRpSCb(%NL)oHx~h_i<^`Vt-BlNZ7uE6K9b8En#EV~ZC$J|ukfv_
zAyW;>3JGPi99N1wpC{{X?ciiNOZ$@Jur#RZBSrvAqhi!w;>UH+OjUajA;HzsQ7CT1
zGhpk)?-(No51kO6-cY47i%BoWQ(-+lzST$~oF_Gd9jC2i+*UiG|5`D<ar%!Q0GD<F
zB%HUS|E<j{U%7nwrO$uuFFy4zE?zu!F-(i%YZUhBK%YfA%@1?@u;x`E(7FBv_V;6z
zf8c=#_hH&~jxByTlZB7<h2Y2ftnTi=G3W}w4M0)=^uPd}4iLRk7)k)v1)M&8dL;~0
zoFR~fdiYyCeE6`g3s?zCK>U`r3}UxHjF)x`#OL50KsmFAAAaA-$NtJs{M3Paf>&U+
z_Er{9R`3kWz0B{{M9SuzSnk<|EMX)w^qOZ38K_05xU>YwG6r1m8F9W=&_rG$iL(lO
z53XHJ=e$n|Mk-_(Q!a6(5(d%~-pd=-3MOsCW;`hZr$#`;&WT$=+5$cYz?LK4H-%<x
z>5T`@l)|u`A~7_(YbXotd|G{2_q_;P)gwhoxSu6QzP@CqNRoa{p_j`MDftTb*D^v=
zHq7lfR_L_+oaYkypU|<!FWb<69wWh!SVsRiv;X3S7e9Ue{Ih@h%+sIv)8yrhf*+@%
zrg{FYv}h0R{zY2huU7vex&7lIYbQ>eSm;7OzReH9AKU!EYq8)j2>-><^xf*e5$FoQ
zje|>bPL2-@f+B$P0>1p^FS|z`c?6byJ~(KxSm1Xh4Djd;z(PSB925rkOxFXX-{=WQ
ziv?8)K)4GUaa0EW%Deu%AAHMO-t|KV4&3+d)zw3fS>Z5J07Pon`Gf_K8L`|56F2jd
zly6gd4-sTrNhLOUmb_J+x%|NblAJfb#dF>d+_#&8#)^Qet=wJb(}-8B$fdGxdl;3e
z7N4tyR(h`v0&3z%mia_oa&!0w9;KVIV%YR>G&A7{Qk9(|54P^ev*B&#;+)@cyN(qA
z-i5MUk!`}n@E!?l&U5&X-iJ(7iKx9k_4HGV7RLC>uIPW+7zsS))Rpz`e(v1)Q(t-N
z&;GBk1m3iOwVzz|i!`k{Pr_N}`J)1m-2L%ct$s<{041&JM?To%7c74tlz_OuA5*V-
zvfohM?()AG=nBA1f;Wo;F*uVV5WNF@qhL}39BvhqB;<o4FiYzLqIV!@#qn8p4B$DH
zp>Z7YlA-S~P2f(x{k;#KeDFVh_u+dVeD{F^M}v3Zp0`E?AVi6a010-)w5kL`nm?H(
z<R+XFTw}qZ0!bRyljgy~-A~3Jxld&*tnIhr0g%eu*%VH@gq?Ugv^<gR^@rUfkp8oR
zy4brTJuL1Nk+i_8VSKajnQkBQI4r5tcGE(Yb=k^?!z*F6gZy`a&oBjW-_rgWE1J7d
z40fjP?i}x`-)qa>?H0ENupzPV^OrZSo%`zL%ddRpo8S7UpTBtS*^5cwV;dhbT_j6C
zKBt3xB)R*euvbeT&%yYK(*k&gbss#Z5d2UGyk6$RE`K`KR|S83mJT-zT>-d7kemUL
zxUeojj}OMWfZz?71#f`U?Sl8;e?KgJmrf9z!6Oi+1#p0nOGgPuVGm^h-z7~2B=3Ne
zGMz9|20|I~@b|s%!TXLs@~(RizwMnzkH7stSX(>vn2fF^zC}QcdBhN#%v`0j$ab#v
z{JI18vE*4=X=)>^0BD*?<zfjgTxKOur*JNY=#J|<iHaKwaclIB1ii7sxx8T;pfB^g
zCJ&jwZ{+FO&UMlH-WVH`_1YB}uSKOer0<i$*$uk@4sPcqk|ljPu^Z(Ez!lR%s&y%H
z!;P~GSgc>)xbW3$m(RX-@zUw9p8574{mI!^zi~-FLg>qY(Sm*#>iWmRKDhhU%4flk
z=POsPMB$$n`Nb9f*xrYWd;+iZ!7z#k`#|`=?|tu!tBH;9?}GmpKvw{65%|U(0vNDq
zeZUO61fUe8=uv<}5x}$n{`_#CKos~o4FF{zC;>5U@(5r`0VfNpSD;b?0#U}W@Ge0a
zI09uL{9ZxW(WCD;`tZYl?Xi20o&0wX960vYd+s^@m~*qE!@|NHz!Ijn%!=Nzkk}!N
zve`wx?B2Z@bei%kTEW|UMJ6*?_L%~fDMQ;SB{p7b4M02N<@s#;eH4BZUMPBRDK$IC
zq1}hquGUik&GaLMXbALvHwehi6ic}Uq|<1xp%3Qki}`$gb8G!nu=HQvxW4|%`o%Nf
zdg-MvoO<=uZ(bTw$)UhQ5Pu!%;MPaDuE@t<3R|SBdb2VhgR5T!|03`+-2o5<KaTOm
zBEKZ~eGvSM)2B~IOCQ1?2d!a$f12y}rH#7qzg5r`fLjQ@OrANKpF>#OC4e}=D=<rn
zKor_IN*L1ux;_93!20^Sqp1O`5kPz_7=%|~MqU92g#dy-Efj<j5El&w!nzmlK?%63
z-$BSJEIua%VRrPyqemY;`QCS~tQ|Uf@X(33uFMV|SzSB);M&^Zctr6^m{ORntQ-*+
zya-Keml?6TXQvpVG@1EQXvNt{Iuox}02)&VJIG^cQMcMK+os%|2&ZLv&o_6b0N9zI
zEc6v_z=NX6+z!)CDqTwykfkt@X4xXZo%QcDGP@cdoXtdAOe@!m^~GYo4gnv?{}pV<
z+qiz^<;7wnykA>CfA(A7+SoX|{`~WQ@N(h_l1hkw5U^?C9)1spI0(BeN!Y9PpH}n7
z)A&^I)5`uu;8!>ibwRHFZ1L;+$&)8B&+pml$8o;tD)!MT|72)&1>hDzR{&loz(7q~
z1iigg5Yqy<R45RNJ9X;R?12X!(Ax!7F^GTZAR$Z-%!11-h6MtN(*UXjz%zh%cm-tW
zPnWz1@Bly|xT4`Qcnc6F=@;)nO%(!G5`@Fjy9>&~k^7E3c;wizcfJ)^-m@BzTs?Sn
z5y<(_!D9~utYTePW@|^?%G!}xItpO6cJQsGb!K_stzb8U8HUW$V}n5XNK+V=*X@j{
z4mnxo&Vp?zOflmrUe7-C>=GV-!^v^uT?n$oz1;fd8%ULJWtICFa9C8-EUf~tYk|5{
zl;3*DZRqMSeV9@_0ZZ%U(`7&>WZUwk;4qr*39y)NzQWt@T8i&5_U7yJ#pe2OdgCHS
zTP(KL{d{XZlFlr?Us`NlU-$8K=C5sDd?ijHZeCvB*t~q{`n7ZGTN~%2h5q7;fB3Q%
zp)^b-`xNGhzN0Wt!d_Ik8wCK1`!4Ho<}z6E)p;g(R)H^RDnQ--_zgTSdj@!cU&3qg
zp7*@R+qwQE?WMc?Uq9#y!0QO4t!g{RErWQ#BEc}|<GBGiMew1A9)cGjy7Q3^7YrRg
zemrg|R7F5YIgmjah(aIAgiaM8Z=EhkWyUzjle`2G2UP+XKgFHmR&`3#3#uSQw#JHs
zd5@rRmH9|fFtXGlh7<!x=@3(vK1b%o`J!BHdd5fE<2;8mQ%q&M^c>@8T?{^4UareT
z-8C=aG?Z4T-70()53Rq&S(>7yZX0e74VOkgHS(|eqfu^EKf+(A4&Eq{wq2UQ0?Lem
z3Qwuq)hn(B?~JQy%iJF=TpD%H<H6Ls*;Y0m`sOM9h!$*~oVQS4H}aBXy-T9oN@%9V
zd0uH+g*@e(|1@qA`dLt??R!31`SBYGexFwK<4%;Q0Km;3c$g2<ow&6xTKrh#cjn9)
zuNV06);{d*7wPl-w+s47fUhIy046KXt3rUmj`!s1!s0*)Zm=2LVekk*6ArLAapDAy
z6`lpxnbQS>I9*WJ3k3I?3&a<d222s8=>jMoQSigd5QtdU2?%)#@T`6{#~uOvl9C`4
zu8hSI6Uod~JqzhQGuafbZ6aDeD@TbDsdpmA!?T%I@tTKGM7Y@_gGXHN{Ycnq{KP0x
zo(crbFAEV18fJeR3_j<tOdiaT@o5RQDZv38k(SHRFj)e?LGw5GC=M(LWkS(u)=zOy
z?qN^)$y9kQ{5Vb;Lpf|pN#MS1DU2=PI#@wHCHz~;%0wEk<6l9?771ewkMR?IdG)}h
z)$}=?rb)Ed655X5bKYrgK94E=PUXg5eNKIcMSGH5`S|r|0UrdtPXa$)Y=%F?SAn1A
z_2X~gOCC4%;XVAa#gBu0V!YtZ%*w>Ug9k;ee(*YYOoLJoPg#|2oZc4b3czguqsd0@
z6$X2J2?~G`#OzIPdJ|5*f=2)=VR4WUgf&*k&UBrCB;k&QGhl*PD}XRuN)(7vt@&A6
zdjN$%(%J#G^rOO%h&>Abqzq)w0B$*yVQL{JP=!E-X#^o=g)lE=dZFcuj?9kokd|kG
zm|jk=3oG?k1q3roErasRLXGoMB}4@cD>j-=8MC&7){n|khMXC=W&B)MRdTd!Ehi~B
zHoVF!)iEnQBn)!f==W5A#>Z(nR2OghjB(E6RLe<W+y)hnM(8<Gs;ahB0Ytx!6+T;c
z4pU{1`&r*<97qzN)m#V(N>A-$y;{*tXRK!K>s(i=GYeqGO^l*k^`Wh23p73})0{u!
z&EG5@+LsXKD%3fh3_?HNW}2*e{T9O^^pVHGBJkBafH{2#avXaeg}%D`!`!~d0X|3v
z{sc=u{K6{`)9K<q6r{)a>g|21vEw-%>)WmW+W=k5dILbM7qy1<>;SyrB*8C#`O7i<
zvBw^ZRy~A&@C+a<3j5$rb7|*5bnOSib3v2Vxqm1FQJ4oJb!qJYyaX<l8>bB5DG0x@
z94H4t!<RIT5X0~v!lZZ$V)==gh$;{Io&f2m6o8n9@~VLI)ilby#JKViu5}>`sB(p~
zU^dp6O;dG^Rc<&gTT>h+MUXWuKEv=?ao{{Nj#3n+k>G6L83RmSV%vli4aY)5jN-ug
z+4hv;q4BM_jC%^N+MuC)uoYY7(Gq&L4n_gY-&$?DOyQK;V&?W|Wkc*ZV4So)(wKEB
zaQQjCd-c4fg>%l*LkiCxI+1*hxTN?hj5U5G;EB$1ItX-)&sII+CnF(R<Y`_%=98>@
zz#<qLF>Z42!@|dcK92=`LFi{M5DR{I5wNfi;b~QW%vV?TD?VwFUti$&20-@!ya8a`
z(~$}2R+kTa-~;mLqmN>@K>UumF(?Az57q~^$>1O%Oc7+4vML1U&!5j80E7oZ%+G<C
zvm$_ZC;{O)Mho90O&y3xwI0AF<`5YftOQjShHBK-Ii?#BhZP4vQWU%@4EUzj2fzw_
zlU9t<5yO+0!ZRL}&&cYwxY{%+U(ZYzap*m#({iaiW&V|NLyXzkca2B&ih_fRirjFj
z{#*ybD+?7}HO;8obXq2P38W#6gmLAzlG0yX7h8@u-lVF38;akkuJk+-nA*o2#%&2&
zBZ0dt?-4GxUC2GIf<2cf1RPh&SkH<PmFc9ho&n!f5GKVu3vj$6Jz4D4Hz5l@(7&SO
z^_yDY5dO)HkF!Ujg-=s#S@0)e9|b=I{_*3-J?8Xfn6lD!0t*OyUCocJd{iIY*e5*4
zuWo+Yh2#0R!|j9a0eFMK9w3CUblaewClJH1U=TtZo`N(zAi8ic5T3@1-hrSXxclzA
zFVF7>f{ovjmmuevwj07@Ajw+*uYurK<3LG(f&j$<zcMHhlGYq}@+3%l&PoG*g)BmR
zOVxJDh%_w#$_1^P$YWEPAU#{gDK;*rX?*udS#il4s=7oKnO0iDbrhb;^$EW!e)?YJ
zZE;K%nX~Z}FMQ*&8u;b1wH~%yQwNU2<;)uGlJPTbknu`_sH(e$nY!k7r#f@rHQUhU
z#eE4pQkY{rxa|yHTrV-@xAL#@F=MPLxHumG#=O#nj(Zw|dF&^;s%Rt85`Xg?mhg&#
zJke_w!ijD{<2ejvf$m}1C#xR(ATG#DEB0eN75K37&z(CL1wLH&u=25YKk%Tqpbz0N
zQo{75$BKU4!;ehV+D~))L&iq<cX(r<D*$&6IB$+|l{7s7ML@3}MwluCX&Yfqa`NQK
zsPfO}^Qai$8J+|v1v*`Tu;3}c4;B`RP9JC{h!Yrr1cu;}f)K;OEKmmCRbkLHNnt=(
z7Tk#mRTWGN7DXA7Lh?#@O5;qr9G=qpl#hBba-7z0Dnq0&Lr09VDv$9t&t8=vt{3H>
znK0GUtH*_(O*($hZr(=QX}z;B;`(!#=F4S}P^j9L-L4iVjUw&-$l=S%Y4k&`Bei8q
znag#d?+|*kwCY@BLdZ)QOI3d)e5!tE)Jv9uiQ`R}TGO!{ZWBp;%EFm&6eF~&_A+gV
z`S*-ZpuH@tL6c;?N8y@inX8}+^hAX{_yFSSI0*a!tM+{`B;?I+esc^Dd?bFu#XM=j
zvU&jGb?EpFd`al0z9{hN0H1PF+JwT?;r2sU0PaxWErd`2pa=leJCIHojLHDQ)w;%F
zKoBO*7YGVL48!_?2OfC91&tjC4`GWT2wQOOK+x=CoS@xDW(m&$rV_C0K&L8#k^sWS
z`2`uj@Gdyd0E&W{(x|L+%YgJ<m5ZcUz!J$*BO0dp*?7rv!LQfvsqQ(EhNW<qES=W3
zDo>OTrpj@&FEkE=P0wKy{aK~8_*h_kOuJNd-7YWteF`62hgSVsc~8}Sr|+$B#h2Tk
z>(Iy}j=ffU&=`nulCab~bGq&7v!vcg&tpISrttio$1dJ^%){NzeFItnsC7=yD!e7p
z--61Fbd(f(N!#!w(6xu+AAX|&khbOF7jZmz0@4RQ0m~lz0`vJXKkU~J0$<>DOl4r-
zzF<xtzc{!DRzAFwI+Y1;rXJwaxAfh)&=r6^06a|qAmYkb5d-wJ!7$&JUHJG-uDSS*
zX#;5R;a9?Mj+d@2j!X+m0>adb0Kl|DP!^yl2!54GCB-2!7Fyis)6|0m7h<jh6p7?@
zh`$y%T%TD`*==uNfJqUFzbG0OO?no^#inINh9NC#a-5#aP~h(q;K+@d<7-~5FsTKG
za#M8A9+@dzZJrvB+k?Xx8tvAspZz|Sr!CvIW2?OF@{Em`1exZ?_$|w8C;i8LVfs7@
zR2_HfA#3#~=g;|2pXTwdkX-7yF3acuc`6MtEs>?J0zJNiE&<6^p8@NI(wZVz=kN+b
z&<g~;B&8v_^5Z*%J)KJsr$K^n_etny*FGTa+ZQ!ZlEts<P`c2+!=Wz%+yg+1WTFxf
zh%AQdcEaRti$Q7KKwNMGq7TXd7>7%D5nxWwfsxQ!f^0?WysIh*y88g$1BAmJiN#D{
z97$n_cz~%Se5^MhGA?)<RGCQ3FfOD@HzUH`=+ai9xTs9y>RxMDDgan3YPFz@q>y;>
zV#GY`%J7sv#<xsK(;^&74xYilHC*w~w8$$76HKjm&0le%{HyPh6fLcjDpF{RU}<IC
zkO#gc@S3mI5ASw(@OC%grfIbN2E={#$<6Du998nPJ+v$>U)vRJqqu0@8B*S&by0V6
zYTMlA)E~KAtta9z4jKpF;CYg?O9J^KE{_H615#SEM@rj7aYuZF@t95RpX+McE}t`@
zc&T#1?TvA)<6g^BoYngV_c|<ZjQ@mhOap*2f$#dB(s-pc06j;64p5;Uynpc>^a}wF
zZy*-wVNF6>s3*y)k2qsJ0SpD)zNeObI-m!JMa=WZfBeVs#~w&O3=gmErL>gWufv@U
z-2<@qAiMeio-QHMg@iiqrneH}8A<{?1el-0JGL8w!11g{6Q@N=F@8t~h7i{nB=_D7
z_j2QapX3URxPY19K~I6h*#*g(h<GMXLyXHz34nKgRzO(#hBThjq<lQqIV5JM>0_9_
z>o;{}A}?i<`rR<i>b(`0^TD&lLDT5BR$d&&zZQSSi|c6Xu@n3iCz=Xs*3Z_3<7xT;
zt`*mm<7|2?7!%?*6XkpD_a!(u({`21o!g1a;y!7$k?G@DPabEspU8T4gop4Lx6jo6
zQE(=q9KRt?b-8OA&=H=MmJ&{qPST4?diox5fj|ea^{#-$d02f<i*LaT791xX*yD5#
zUsvsW((Kdm=2~}!zJ+g(LRSFxA4FzhTh9C8&};URbLafzHG~grlU*pwu}vU?^Me+!
zHfVj4*m3_rM$nGOfA?*fwTmh0gv43_*4W10_aDM8EK*i{?>d$5{)?GYRef43{^b=u
zD9{U@D*a@6Wkk$Mo#kCZ#z{eQY#y)rDY0i~-A=x>@4J#$$F-F6Judn2^rfP1`|UFI
zr#H;5Z1ac?-zj7GG%PhU>UGn$w{!2!joNVg>+)sWQ;Tn_pQwno+2Zp>clyD{L5<~n
z0kfZcs)+f#d#(2JrMsT|EN3`>boJ4BhYEQsZ3@`mXPta}eaDC7{o4K)K3wbidxPQS
z9=YecUfkfnP+chBtQd1Vah=S`Xv6jK8sWg5C`@ZK43<3ZQBH7NWF+x#>w)_RmrTjj
zJj|rbtnu0F-93x%D_%BiuM5^^b9kIpHTT<r-9}z57Z<7+>gPmzMeCH#yBwL_d0C?`
ziqANa_py<sukt+a+sifyJS&N6SAFST=|45U@M=||tY!RcckTHrtBmaKY<rXG#kKgk
ziSB|=NzsKrLjFeACKYtop2@v^YBOsl<GqZSN8!g+>X!dH)fXvt$K=?=^_L5N`)2wt
z{~5AEcy86)>DwK)zwUi8-%<R1>Fc`xz$knd^x$XJwP(F$>+iKZRIL=`VLmA+u#C%v
zyF#70!IotfUr+BI;hRj?j`jt3l|(T&SY++}@RM^_ZqVaw%USE^?ORdEGfz@{+k_Q9
z3%k8lj;{R_BxAvQy}#h$B1!%AKW`QB@^uxjTfVJ%-)EBvNBuu1_6xM%F4|FZY`XTr
z$}G!0UUP1@y-vBGJO9?c@QjD+au2=Cxi@S2qa(S8OP>}={e70HxNiB*kE{4i{+oRJ
zJY(DApte~1dzZNEvuArK+0Oemdlui6ExhM1hu-D=_1<Zpb332u3Z@6gudfN)&AaXQ
zV&{%o539l)6Y@T$IoB>X<|(o{;`!0-;hgDhH;qyfwiy4;`RsQ;{ny;W#}#w#J<9ty
zb5-!_zF&E#u2&fAZ4X`l_SF4#vbmPq&PKhxyNZ3g_u?~Qccb1KMPDn6a$C*2<-W;$
zw>>l0UJkL(niDp6`rhrg3e>+O)z5qv^8H(?U~=iaj_<L{%4f8_V|?*pSL6M<bq42W
zf4<`Kx^h*AV${vb370~+&js!14L+NG$$Wl6x!JsDE48m{uGK$kaa4PIV_R{xK;OJq
zC*S9>=LTM#9KOu^>5P@zoWfRbU0K(6yK}9xz1;$>>5ns6D;Ce)vZ}JG`F7)>w;yjP
zzrJ#1Z`2R>zlpDpy$$XaZFbMz(;>e;@BI1R{5khm*oD-me|UdkAuyz5txACFC`K8?
e1VTmg!~0AXCIVK~%(B%CK;Y@>=d#Wzp$P!O++uS8

literal 0
HcmV?d00001

diff --git a/kramv/Assets.xcassets/AppIcon.appiconset/Icon-512.png b/kramv/Assets.xcassets/AppIcon.appiconset/Icon-512.png
index a6bb07a1d2aee56240910324f90ae29b2018b333..0906482d448283c6d22a52e5fe43050251aef50c 100644
GIT binary patch
literal 117920
zcmeEu=R2Iw`}S%<5G@HolxXqMd+!pR1c??z^eC(M-4!(=2%@({jfmds5~7#rEvzoe
zTCBC!wqL$~#&aCci@E2%=he(y*IaYWd7g78J~Pp!yUlhR007YG>*<&Q0Hpt3NdZ*1
z{#BQu^Edx0YF|C;004l7`G183kXOL^uaP9cOjip~JH>JM?}OY;(^wM#Xh@~OIZ^;f
zCS>(>G%Z3&PViy%<-<W-R7Krr?-y~a^$dJe3=|CHPonuWv|edObGNkfQ>-%<k%+eN
zg<Nu}+Lw8lni|<kt;{h94mHYtNO_>UysipQJZ~0larQ46ewhAv^s`Q$f9idj621@f
zw;D7b(B1xN#{M>JZIS@a4U?Ky%!T!!d%|A?0T)7|EYY_9SSiyYQPUzmw%1W4w^;xG
z<NwV$*!&(-N^(m<<7~rHVXpvreYi2|Hi*7pd-AEz5Lt_%-3#d1E;L0??PsGdcj=a1
z#p47pe<CwjFoj;0W4z;6HQ;pbo!|i2uM>M2ZB*5fbZtz<Wk*=nF@v|Mu^2oX#*GZ<
zTnrGKPRaJp*Bjr_N{$49mJ_5mQq-Z+q1)jb5$AA~ocqHr;J5&~NHY-s`9SN^_WS;>
zf1P4rodJ+h>FEsXZ^wA{nl9l~D%-*|>eWzHrsqA(u@aU25&B>JVEE)&<g0|x>|E-N
zq(+WP#>3}R@2hva8;#4W8Va~DX<U9KoWn5D<EL!rlO5d_@lFh&2N#>BSArVNPo&=e
zk#m(3f)I)<_D3Q;r()E*E90Z4y`}M=pw<<8!4<8R-uIk#LnkKMldrolAU56=w}OOm
zUzL;s%dGLGSCw#a>8X22c{?7c>aHV)9+Y>8<nsWUN!_@(O6h^=6G*Hdqmz0%Ddok5
zyzRRW6o$&`Pq6b`ZKdatQ6y5dRDR5wj{q;7{Iu5mL~CA2eNWMdQ>Ve<gdH#jdf4CE
z^q5Nyoyj#cZ82uT?I$9`RBgpedIPE4TXdDw`ApZLLr!#6aL!k4N*J-%)GK!`?L&#C
zSQ<FAnRb5AbhG`7#Uaq)`X@^_&K?V|nC!&{%^hjwD?oAg0jWIqNCwv$yjlWQmGh@s
z^VfXV9>)pg!&(hfqz?(t@C!egb6?t{w={$uXiURb`*wEVD*7;y1I}ru*0O89=Eu@$
zv7AW>g+#s@W|=-Z`WS#ObCjfX`hwCYKFOoA-UL8g#Q5Ff(Cx&&_p{=s14!h-bXTcv
zYPd?q+27LC_=EP-jSh>a0&*kh%Q;ithe1&LeSO4*G|t~T)D60}7$fBpZVP@}Z5Wgg
zJc`&<dV<wr`#cTblM%49m)hh>b19elmT3u2a!=X`c~vPw>C}hkx#3Pto)BN_+Y5k=
z_}h4Rt+lHbOfMD=ef=+ccql~>nsMFlGUCUZtgN<h?`Ca36V2oWdiC;X?N!BzCnmX|
zxOh#?&;@IuHtP2V$FQS)Vc+bq*RzwNe9?AEC25`0*K+sntb;XFYVm_6pk47b1HgyD
z-TgGP$9#irQplb3^)}8(%12;h;*uQs&UaIZGzL}xc5)ghR*eY0oRFT5#Zq#HMlCKa
z;rI6|2BiNOF0ZWkO{-yl92`h-IUsw;#j0Oakk8XSQV_0*6daES#I~Z#v%Sgd1twiq
zfNjtVtS#c@%a<xkZrFZssu3)F@jo)ed6L{nh*XVe8>&{8vlY}dG?s#|GOx6NEniLk
zO!GiBueu)txBjREp|<GMvq@Wjk=)v(b3k%51&H<_MM*v&cpe7^yn2h*^mQY7G)eSY
zi8!jN>_%ZfsnI68k%<V=^0s$x#v-#w^)SRUF2X=*J7E!sU$o5K?DLxN$mut%$@uDY
zvSqFNQRLhof*Cc1DRb6br?D5ldcdWo##!?a!Bp56p>A_nJ3O2y@X&+xA>!MJpMk)y
znfECX>Pj2<S?5P;KWBQr@QMzUgVj2-YZ=7sA`|a$brj-g*9EF`q>CnLK(LT<kBIRv
znAOfEkR-152WJJoPqR6w=%OxIwXrve#lA#40SJUv62L9qCqC1mc?IPUe(zzzyZT0p
zF*2r|Z>ANj7oD-kxIfrJX=Zi5?AEDB{LlRrX$Rs_YGK0@ea+~vrIcR-J@lDLHTB5&
zw*rc>f1X>B5w2i_wSzFBa%KjBvjm3dXrO}foqBSKkBjLV;~$F!oI>BBv{hL4F{Oeg
z1z&VI!**Z%(co>5#Q5tuwWPQ8B6Uq`>Nu5p<<xF`owrX1-BrNamMe)V)vz_kQNIJF
z_*34}?B9=Ovz~F@CA0X?$n?hJCpmLS#dEp!AHk>mut=@RlLL(EB&>37l^2Gh27oXm
z(s7l&2t(qC+~b1WrpUYt)eFpzNDzQ0&Cgj}Ygk30ehDl5t(uusz>VjX#^%*7RBdG)
z-*k$mH4g6WRbElT{ybF2^teH9cHB}p6r~))0h%O>%D)p$SgA<0Dj!FsTm!MKKxOer
zon^s(`N)L%sdz^r#|Fo^TUHjIN@SJoDbxUTT(reY==4Li9E?>7F+&}bF_SCp_9tLk
zRX#rKe7*`AwB6s<eGK0@9~xk03$amIU-MW%P~|DfvMQe!RfNKHbFMr1hL959dk@li
zmOYDW3-yHK?&N7b>i{+0$#v!2_N3q-c(-(9*R$VpA8-AAG#55o?Kba|J*f-sMI2VU
z*>t+gFM4>b_DKBkfH`3;D{|XXUUDs8*rIDB3*$ptr^4cXd%(X+6zdAUwo9TIAeB$?
z7(SM{()~+U2clmOaH|x#9!2!0OPqH(oW{nx;ZOHq_#t7)<=z3|ng~|Pl+7~|Ab2`p
zSHEG$jej+geZXE1t8<D_yuKWzehy!LJ-pK|Ri7jAaf~#2-6UJ!Md(nTT0U|`GO;Cs
ziX3*4O&m=e5Ke%Cyy1)VoN9r`$4YR#_CrPm$HOsunWg(DmgAY^=LT!_*{u!1@-=qI
z+muN6%{kT?70f}LHSM>|I}9R#^`^|sfu67f?*kR@UM9Lw8#|v5F7IIDz`R?*x$dj1
zQ)Jj*dp$P!&NhPl7V+^6iVJ3UW1~kAcAb|j(1w<<({Z05lsxpN+*BnW=b;hcZ=xU>
z95~wK0sxFV3(Y0Iq^;MJpaRg4&xV<X{!M`Gf)KItLDw;Jzk7iAugrDI=liK)Z8uvp
z?w33VLuenM-N+y)YsjB}B3SNu_W1n>W-)-}PYDI}HqF}r*|{GvyUo2{K~RTBu-g-J
z(-ZrSz7Zbd;4Hw1$Wy)j5p=8Z+!#DU9$|Itp*2bP3_lU))f%UKy`k2_)Nj0ju-p_u
z|Bx|a@p8+gkx;lmO_`njT3@em=n2J4L$4!<SBS`mhExnxN@xBF6j<iSo4MkmGJev!
zH8~MF5mH*xK2(ClEiWZQ65WF3X?SQCLo;sI__X%`3qI5?J^xCd`e${xlWBvIsq^Pq
zYnyoNPs^@EWpTz+@6C@YdHlYd#<3^JUZw527Y>0+8i(R}U6T>kutLma1e5FAckjM3
zY=cRhH*WSlntp_+n?%hW|K5?STkK|uw~%g;t37$4uzT6$>&!MM<}!8P{^l%FJP+cq
zR&wzruDRzkZ@*3A-N5_Nc2wlbb%Y7%^(c_|rt)KGu!1vJbTb)G*&XBa7qNqy4DBiZ
z?6E|>{9~Fq8oh!PUqt;PY%$8<L!by~Sfq^F#dW9*p<ir{TDie+WqCD-npuVcefR7~
z#MCJ|czG>WkNLKV`Rx5iPhWJ*HQQR-8;|UX=i=nDNKhJux0YDMyO*{990TXPuePzS
zs7!qA^^fLq*K}0Khzy(n_bhgH=V?P!PlpWR%|aI&0fXIHGg)eIr@orr$1qre_UE~I
z?=0Bj@Mr$o6{VoAOO*zcf+K(W&N8RDlw5FfhA6HTZ@FXE<(1!i9}LH(A_P08@KeM`
ze4HhRNLa`s3Go+_g>Zs6HzO&!@QHEmxU2$f7;hSaVz_-(|Lv!>WXqolIKnNYKqqy>
zUCO6;+qR9M;Q^Sue>K;!!01la-%H8y$AD{}ZB8EJsKmEVkJ&uT3@nGGfqLghxdOYL
zKQ6jmuVhJ*y@Sr2PQB4Gw|<PL9(RT0fa_6=E!g*L_}TBKm80cYYbOoqn`vVY-Uqh~
z`LNR|q7J&-r%Zl`YVGXjt1oCCPbD*D%or`SmP0x1+(ZYB2H02bRK|Yr0M#O2=l<Q}
zB>7UdBi9ngmTW7_c@&K7`Srp&1W!lXbk+p)nN}y#JXGvXz#t&NkO(9f-V}PXL{J>g
z>bpZ~MG@)|#T$3=@@M}eU(o+XOd_**MHY1^Xb!PoL&)HL)dr63(Ol~{@mMb6yYtW;
z%1)f%O|F|^wMTA<XBAX-i@!NhZ(_~05ziLw4e8#y@GOz1I|@cil6^jC^7X~cjyL=L
z4PJP<!c~LYv#X{r3!NSQlDG=8L(kwOlHVykx6B{)11Iv@SdCgHcXxK@Y)D0?&5U@n
zpCB>h(Q-wXlE~gpkT^k4ct*uL*YxZ!v(npRji!o<X9^L7wlxvQUJO@cV*01{B@7H7
z36ff=`9W&Nu*t1cj%n!o=f83iUS!>4n}#@$8dY4XWdKKsoL$0z?Z(X%R}cDcT9>+_
zzc9?fpsGAXF|0+uwLH#UYjS$U;u~y5>aoMIOmF{Vn~GC8bD-p7ALh1>Y=svdV<8?S
z+EUWi2O>427JRid{hrca<+eh4_Kqb#1))5<y^Uw3GJ;`84ZaqsHO_a-Pt|arrfJk}
zj@ArsI`R0Q8Zh}bU(?$bN~$|$1>$Tr*AL@{7yrz`v=-2Yj^C*|gK_6{AT1}n7ZC|_
zXv4rzn?1bp1Ol@z0!u)vUBU2!m?Yvj@m@5bRRDUuom7M;SNQL+QXPM&!9MrSwu;E{
zoF<o2fx%a%?*$n!C3;UgsPANDo<5mtOB0GzCZ3PlB0`r0*WLFh*T|nS&sx_v1>`o<
zN4YY9TyOB2N&2XKX>Z$oQApO1*`CzTk@BRbW@T^5Z{y3#dhfp6k9AG1Dh)gK#_Be|
z4;N~JQP;~B!9%>~q~Kpw^3uTux8)0kvi9DGefg+;Nzxv36s8-vw<ELeZ@$z-#E3(Q
z2K&UwloI4LM12LH7{EfbV<11<zdql|2<(kJ&~7}7Jfr_-G!DwhK88=-@3&9pSyG7P
zJHvioPG%rS^Ht~T^n^-5C|Vs?Oqg!lz)V6E*is=9&R!o}o*><0=38IACN%uGKM>Sd
zIN4Yb5HQcX_O$GvYT^Em;N?g4wUtHbOLB&7=Fr{FPZgPU;HPvowewJ}Pm58VP+3Wz
zT_2TzNtN=V&dJ&;{ifX31;_^ONUp<vTtif3FxW)6E`FA8DRk-0D>LLSdUdOY?fApi
zSc7}=Tyw{xwf#Y4mTWnFWRW_hBoQAy1(9ciz!FO<31_NY*F+o(FzlM>&oxDR_=99q
zo5ze%JZX$s5AXZ&{6&l=Kp}KQI<^hDRn$T-`T3CKu<M|dKJr=+d{(3k!&8>dtO|As
zg%k*S6o)n0z0f}v`5NS3V#+(;+U+s;wU{AO#03BLr+X7tXbFWz=h_|POv*h(HR}(X
z?XpAyiU`1NQ9dt&?}T0-W=A2t7TC&-in0FjL4T5~NzlS@vkm;moD$MDySP|`$^uiS
z@%yK7@#I;B%aOIU)OTdt+E|^<&l<2#u6nBH1Bb~n`nQBGiX!69`Kn+FSHzBZBoa$p
zBcA<?qgmeD?Y&v3)1r#fV8|lTP%r#j7Sr(Z4!7f{A9n!V))PBHA|2UW5D7G|2%;2$
zg{VZj@3g3MKDxByUq)z(8wnp2d;VCzl6<FVupHw7sT&6{l_a3Ot2hknYq|V#5Ativ
zAVHd+-9R+DW7pGbg80Kfv?|-cysX-KK9i+dgu$8C;{EKzU8X)<nDuhKR3`g##0{?G
zT_LgQa3ph@D7LfYpopi0oo9;US%}i4G+T^Qe0sxt*g&f13N;dNHdelOWYPbu0jT%H
zw`-1U1d<CjgDHj3Me;;=1~3#Jp{~eQgNvGg%5TeZZ7rKHhhz%Yz6^E!EL)n=F`m8~
z@p9Gr3(_jOlnU4cdUV%!8tZC(ci2tpJVL-1%kyEfND{6}<p>b)y>q_>HV1I=Xc`K2
z&Hp8TioklY-}nN<3BA5EphJ4n+96!s>tZAqDWLWxf6A+J474i(qw4_PI)q#%L9dH@
zquT{fxS}4CEy}<4y`#d)N**Pg(6_62ORgA6Cw+ghm7$}~7QqGDJcnFduin%Ur_<Qi
zmAxCeK3BL=01kyzY9bH6$tOKO`Y2U1bNkBd&ZObCb3bzC-G_EWgI(u_uuy5`4>F+u
z@VD5(urF`d)Z#{BqN;zJ8k1<0tZ<+*6<?zJMO-1Tvx<uR(MrUS;QeMk&fmD<(Ky@u
zIvk2fWFhW|tD*k{FiU`~$kIzUV?{~qy<{JVkYW@1DPR9vaSNn7rWq@SLog2luPdQ0
zVu$|E%@z=I$mQVBg<|O>M~``Hw5-peOiVIO*E7Ov4Vjlrh+`Mc0-{tFIo<ZzaKo+_
z?621EHw{*87{_2-1KwsAKkHE)oqqn^6n=C(eCc`9V*nf)Z@a0;zCndmwXMpS)=6x^
z2N-k3_`cc7RkP)5hOLgP<(2mIE>!NXqI;OVb8iagR%a3fE2acE*P8iLukfEjcMol*
zATKV|4j^BGmV60L$D21d-q5paT_Tp)-9;zUY<ruEWVR@DoBG;>;`psUQ}?Ym7s+(m
zl&556qp%;SgdUu)d%q1)n;vx0%N)`Tclh|{8vaNgRYUPKai!(R9PKUcVLH3p+7l%C
z`2`>Paszv@6S!>koJ@$?{+s#}?Z3p7m)4Em-Vv?x-i@%u$I}rnNd`ljpD9(OuC9A4
zCKIx9mRM__6!KksZf(X6$X*;b9!0|Cjy)aFn&B|E>Jew7$?b?Xb3P3<@sgKifp}E=
z$bNX~=?davd(Fx2`y{nEwzux?PVA<uUoGXZ#X2fy+>@O2r$<zcq%cMZEsRjx^!6!-
zxS|FY3W-&ID6Hxf#0nrGtZ?^Cwvf0%T*=KQUW$Vb4Iu}|h4)@ZYiR9%d`FF*uIH;)
zXB7cNkbCV6h<{4SRX^ndUCvD494{cm1OkjWI=a-ov-Ak*X8a}LaeZH*uYysi=~zT|
zVfib5<28RFdu2CP>kBFq#2oo?Ax@F*@aFEh&{)*S@u2LB)vzazCw2wb=q-+Z2Zb#>
z_@(*rTk)OxgceiSEY(Em&1anmbIyenqi;GLH8-9m!?ZEOPYyCP-*~N*Lw8Sa)vP3{
z1&eJZ^G<%TyGey=OX*LZ^#%60H~9cNSzIFOu-0hps&Q+<JLjNshwdTiVfk%aTF!9U
z5U*F1>2Zmzieo$aOh6RK!*ylbs`)rC%of*?g><M~BjU0Hck`woCGs3ld@_{qj-J>=
zRCWI^*rdPlwx@>D9aYO^Ae`+r+rRSxXCKWN6*C~=JZ_ZB^}BhcN%CnoLcJG$o{GCn
zO&!(+|1gaFBPIFVrt$WTirbq!=UdL+6Vd_IL^DGh(ekX(hP;zn*>|D?4Jf@5yJjC(
z+QrLBHYQlauStF~Ve%MgorL~mATo1i-^L)yrST7^_<J#Q#J>Dt(Xly#pKI^R*>%&%
ze(7;S?5+F7U5kBGZ%*%mpAk}&?njB0ei5b-ssMgFs`*hE(Di`D^C{1q!oqai^F+DO
zx#{m$QbSLoguKx@dFRUrfyu~B`lM+Nt=&3`Ti9v}67gE|2fGxt;k>?|6=f!f!udqI
z^c~M?UGT}jk<F*V!VU_Z6wG<!4L+27X#elsePlYoy_swK)&!ll?j$u+)u3NbsR>f>
z<d|OIUtkd+?)=)FrZvcECVeUF{WF8dH(gOePbe3W$j5kxyvRHxwVoVcWv5)Ss`)6H
zp!Pg}&S*SW&TzkKZ8rR1QW|62nW9BYL1t9!KHdV`y)dlX7?#UaK=gMXKe-35wJ=%&
zC!NOF#8er#nk@0J@;v}~=uO7vX#ozUydSc~cF6O*vLgLkM561Z`d`oY=d|yor&Skr
zbZ5i+@S1NYqzMs|!DBg0Rf&&~Il#||U8@zp+uo06ybmk9?gb@FO2GtHj|&dDI(v~0
z+XtBw5PJiTI*zxWz1RFR51_@kmBVbpE)2U4quYvgBos#xg2$G~pOgPf=1ffo<^Sp{
zXfWTR>7#u;L|-xePKaTiQsA){A_B9QJuG3`bKFDR;}uMW!dagBTwXnTmyzC=@!un=
zu7aOFHo}9lrvHhr<s|BjdpX?e-_vtc$>d06GCU~%_$oN>M{a0&bAyOoi0FEfj8=Yn
z#gGBkx7GUs^p*vZ@_Xb1O2fYLV&%3*h4OzqG5<}L<p@0qottnM*<rzRua^31p>zIb
z_>0SGylu9=L*jr=e&UfM^6bi-SM~Yn63(!(DE8NFK?C-(`|i2Wn?Sd`zhj1+ZL@-J
z?u5mebil$m?jcS0I%1#MH`mDsw$}V)UlH$}FMq%@lPpKXmrM|6{?Z8oA#>s?xKl3G
zC+F`k`=Ir>+u;*gQT3wSiXL+Sb4LYQ_uJRIC)uA8Rf(jtnpAz^#HMLA<(q04+7Jfq
z^dC<Rs!KIAdBpF*zi^v8B%PIopJ&0;fHz62M*ia$*GIWl;oiYhu|qU-`IQxqhM5aL
zI~QMk+Ss0Rjglfjot@MFF6Gtmf~GOe6tmA6&+M+LRyfUQHDbKUq~mad=B|rr_wQ%4
z-V@sm!1wSae2#eL?lk}UFABSCU#z~CF+|77I|>*JX=(8RTPno{5<mS?UJD6&Lr}{%
zDXRX2WGk9RT*#;4;&VR4snjTr1Md?^8-0bM(h58+h7>Y?;q&8UW`DKp-ZO&<6g*>j
zO>paOpT*P-?cYojxycJck##{b=mapzlCY{iEUiGf{p#MyoxdVYXOro=v|asdzv)wM
zxxc%uOkKs~X}laC9U(z9Uk_K$AwtfUdap-k-0{qBXVV3Ivg%jdf`6_#fLZ1ivG4DF
zH6@PlNBw!6j;gotVWqA78k}zn%b5El)+Z&BeLH<0V^lvP5&IIXSru8z*b2YMKNCiW
zqQyMRAV-p(`DWwTa3k-0m%6J=b-DuYZ*Tk`(Rnm|ue@qsc??9!Q=R{6!|H3HrXGFH
z2>m@&n)Un5bui-3TWThd)+f$n(%ZT^%H?}+^7KK^_n|)%gmZ`=d8<7WSbPYG6EDJF
zQ{vzbguYJM^Q87YOVkf$o@?V`D|tx;wlAc>MeN)=wW!TQ2zosvwXLm97k1%Yj2U!y
z!83<`6?k@2+2iB02=Ew-^>51c(HEwM-UZxALaefDdNCe(csJ_!+q*N0R`pLI@vYtW
zB|9PyT~-wW^Lc9LsE_RXuf)FuIg}%1=f2&%`}SsEw3_6_UXG;E9{Kj^V<6dX$b2^N
z_M8|@7IflLj3~zm+%d~fR6b|yFip&h{?wBIU}z&bl5*X%ZN>ooQjS-Saj>V{+zvE}
zNm9fNhO^T3Js-C;)U*n6f)8YHy`Z(qO=ZQJ1t8}ewJ6k~_$vd+PC+~jnt)vX12Z*|
zL~<EofhA69z(VFCT-Lg>O_ykrPxS)y_G$Lj2<|A);2|z0sX<4_#l6*%xn1kbb6Bc2
zq5DUdkEci-?5mL=9}3=Yo9oOK$M}8qKWv1(>)?VU$?>s+b!}v2Y}$W4Ku9*QvLUt&
zrD}8i^}){aUzWE!1|2}&Z_N0C!1hzvjl%xRPRweZg9LN&tzUK8`z0OCWl!>nN^rsf
zWXzsMm_~|dX}P3+z79h_xxpP2;bz9O#VG~*bh$uxd17_n-_uZ9eQ7w=QRV1MI?rH4
z<XB|stGw=?<C@^gBcdaTENY6JEc;wix3>k;y1BbvVNnY@-`KW~U~daEPIO9-RF_ze
z<inhMvnu+QF2Z~af$RqCvOG_01u$Wbx61mM#xFi1ZBrxc+}8K?OT~(pBKT)KZYw3X
zT6kA927SM*_R0|Z2=lBs5vI}9FWsTPzIErbU+pn(I_R;VzIuj>EpEi+zsMiC=E<cm
z4E@6~AJ_P>>)Vxf>Gd9NlGmM}-k&r6qS&%HdN!YVOkJGuOxA|nD=Y@u*m#wg8hLCA
zVruS)h!tJovDfgY<!CS)RwI$oh}Q&sk<g4@a-Ro00{5LvPjFFDm_oRhb)hJSzzO1S
z*aCC=$-V4B5w$wOn#{nkXuTnx!bv?=DQiQ&+Ihk8m)G#ih_G_1nAcQ|>n#=A`D|tr
zZpY~Vfb#SKVSm8Zz<HdCKI!(|AYTmDkj)2b4xhsdL9J8tMa&(Ne0ax4lH7V1d_|mQ
zzi>&MA9z}C$VC6uM;i-uT~>iykEi%0Yn8?n%yyc5O2J}K2WF33LVceYJ~?w)(aoe(
zw@yir{k|^moiKm#R9`sm{j90KSZ0G?+4<m=;*8V;Bt!#Qob{RzVt5zAqSaRzvO!J!
zQE$xN%$_@oA+f(!&9M$f{`}2I-MxHDxliAu9&zK)9qNa}A*Z_#gbfD_AA|9MbFA}j
zaJ-{yS!uZPXMxvyAmjG(K~H4s;y-{i^r~gSv>~~?x!--mgi%1?ISRarmN>c>y?xBY
z4k{ZLgKdV-WdT!kdp0&gb<HnOJ;`{-!m1;=i$Y76BgudgeC>hlZAa0;@HXrkH3H`>
zFI9i$YUw`r+;hy;!=d{sBv>r2TUeOAMAdFAZ9Iu@DmkEMs)jGOpCa1i-M7?Pe0wEK
zhR|I2m+!&%bDJ#>ag)*Ta*btqwQw<C(EEEN990JvTxufwIN-p>0deZ8{LD27HDWeF
zOgY8lr-`#@T+hP70`>6izwTjb&l#dM#^g;C*3Tra0Xpw50t;Lwa09ZWxRcY<P6TE{
zJksAq^esU#qu}@5%IBIx*WFKF3v8IY6rJrH8Wb8-RAW!e>c!^7^``!8+Q4$G+2lmP
z*H0F(awBbdpO<CV?`zK6Jo!)C7;IVIfL}8uUU%KVGHo!*{O`OZq$N-DJo0DhVuU@)
zcv+=gCpY{52xb(1a%^)tbzeDkR<_)hLOwje&bSx!28dosvj!vxt1({>wwtSJ0{)H(
z-?2AwU#(K-b}mz+{`vG1kAU4;aF^TKF4ECRzg>^CpXw)Rt*)?L*YEhWQ&SixWg^x_
zxcq4KGWgW9X6C42DWKYvo45a)Xs_5ft}V0+is8~07oWx@L|@GSi9$D35W@45JKP+?
zf2>Ib7yz&D>5X?4xyQ#sGNMZJIU-k%>0@c_3D?@6sB`dJXXt+cCeAd_#>^gx$Y_TT
zdV4DCAb0H5-+ez`WY%FqXT2<Wz%Zi%uYy)CmVDFAZ`#u#HRbzrZh_fKY;!S?;*}kH
zX%_@Lzb{@6@62mog6ZxnqUIMC!AYC16Ip#8l@^~we>G7O;L25-O?g^P{)T?Y<Y8$l
zEqLC2n&56dZZOZ}9B6m5)+}-NZtdf{oB0Ab(0S3*88g5ZcckHUQKrfUJbd+c<;rxP
zr&4iWTHu1polp}Mt1=9%Xs5wDBA3Xdo8>`3csUTT>i7&(4XT{L=170s^*J?{(8=U#
z${;kK&L^0JTOs~^4H(B#pkH5fIoO8ZMc35+xq#((&8OFUCa0(K86suk?r3rE=?~g=
zzkHeUsi+KOWZ>W_P!*H(w_9g!O%=$dz7&F5KW?T=j*eX24${rgYdg+S3!b~Md7p<e
zkyt?7+Hb#?#~_Q&Jk#9@&n(gJNf>=*%buarve3KObR{*@jHtm5Ovb@HA{`f~ev-sS
zyQ?m*uTQ;<K6kGXx;ydWisapWK!xbA$Q3Si$;)Gzh@&?Itm7Q46}tnG5KrtM7;XuA
zi6`QIf7R|178d5z>cgX9TobldcS8S>yH)@N1#%myc8@{;C=kbM`Iyd#xU8jBX;OGw
zt8aiFvfaCo$*ca--@`fhsCVe=Su<tJThb5-g>M3J=^x=hM>YYnjZb&eJzf8sLpR%c
zaNI0_d)G~_s%%Pt*)W@s#b2S1T+(X{UU~y_KaQ*BthuaibZX>hPyb%rKh!XCRN_jt
zba%q@d2@7TF3OwC9<7SXoGxw+@3h3nkJT7Y|7v^xlAk0dd~JO6V5S!PB~w;yZr@9{
zh7dCPp@FS8C<y+zriP1tA{c~sN5k2_0=YkP+YjuGS9*9JM=mAg$GT8qM7NK>B`e?L
zuW{Gh*|S{Qs+8(}QcNU7?W>C(;vDQ$ujn_h%Mi@S|58}+ey*FZfu{mAE~C7@|EoSh
zAc(fx8U!fN(fUF64l{5_uL*vAMZ5wM6VN^TK?p*J*x`BlB=;6)nN4*EJb}S9WN?nw
zq$yQy!p5}5$S*=dGhG-y!$v(RQR<+0U9<;q!c^DVRt$C=)7rpa&Yy;~rBg#h-LHhJ
z+50a=vEM#sB4*}2z6WW-jtSo7La36OyBzo{%0&*77tmI}d9)49Z{5FKR2r`a$QPrh
zl%je+CfpA?9MWqlnyq^V9fB{&w`tOh&G|K@CESl7Op^snLKNK%<%nHOsa(q8L<lka
zABYV;$Sx+N5iut#O$tyKd1{i3)1@SCAf-7bz28^}#c*#<JxBd2Y3!`GZ6o}6EF|*8
z4f-e74OU;NwJuU3%|3Zhc&o|iO@u7MB+ejjdED8%l5X&q=c7(bQn{}CbYtDNAoH#t
z`#BZC10U%GT9j946G;P(nz3myLS5dkb}nubF(iyyFFOZZx+YY&Mm`fd?=x`F^Dt<P
ztgcf}|6XEX<90W0x^qHNx2gs}_Wx5N#E@=M8Q6KB3gl7N5^)IMgYjwH78ExHK_kH1
znJq0X`no$YO=JHgF^4?lv3&YB&=nksPbWV>4o}97=ilctBG}s6maYMzARHWqQOCuo
z?IS9(MCV$}r?e2(2v5ofZ#U4Hd;4FuefDGG1=iQju9Chycs4D6?csflSn_aK+@)9j
z66jg}Jj4}WINsb5)+&8md|E3`@f5m|+sODvA?HHbJeUiay@&Gj4;EhO{*q08Eq66e
zA`!3VI2!Clbek0OvO)RGi$463P#%-4E!d@|{&Z)GseUe*GK*GDycj;@ByIJ^D~@uM
zHQR$Ak3{$p+71@{Cy94LHo&kL;)bBojaqzkg6_JRcotcxWt1UUUVh!IFURN6rx==4
zNC6LiWg0BUA>7f>z_^Wt?5=??D3=yfaK7qFsINAWr~3xQD8oEqjL|n~3c;q+mC$ZB
zPyZ(5ghN92Vo_IwuskZ)I^+expen`MWqt{z+MK(vpbft$$>v>Yc$dMpc+micHU&uL
ze0Q8pJ-#|#@u-8^+u2V_fEw&*-j7ynj{bRuc;ZHt61t91P~i4gEn*}4ZQJV8^dlh<
zEMN06*kR$&!0{UmD3$dVtGQRmAGj}PmqT#y>$F^Fx3S(a<R_ZtmEKRy>w87Ss}=Bp
zx-U5Zn(0zRd|2C!rK=?1n;b9~>Je<>z30imK1PZ8%kxx-N^ArXcc5-6pHHz_fw9>*
zU3xdrkk)>^y3{MD_1+=n0)5p+{Ee^c!GfTx{Hmum0*H^MNCuG?Ax3*Q)Vd&qxjoaA
z(yyVvqbipBCP$0L;^h9#On1W5$U9^maU6(wN@~{?*cCx4Q3Uz;FtNh)=-P-G@v^X@
z%3`9+<@mXf`W1V^e}&|e{!ARdOFY!2u$e;IVyo<w39ESRg*Mp-u816l1*fOZ&<Ht_
zWX_G>KN)si!Z9EeU*wOYnTfpb1;Hn_s86{-f3;7CPImHHw}U$y2E+k7!(dkDBk^m-
zanckoj?Tg?5MkWE&Y7UiU818qNGZC9Lx@fHp8cr<{T+88G}Q4t`+dGMJU|`2{2<DX
zL@$GsVqM0NM1GwKkfVN!y#f*%_fz@q1Qg2-3tNhSv}$MXbgR7}F)i8ce!+PW^l&Qu
zTWs&=>A<f_96v<X{KCO0lqT_myWvjZhkH|7PHVGJV6As{t1{9Er=<N&8*8HjE2|Rt
zF6@nx>+#7!4F$itb>Qh56t6c3)zTUd`APP0%JpBAr>FzXQ;KpG-3UL7p`B3Q`(jWn
z1KSHV`o(g6>)g;g<i!IihxJ9*!i#*#nnI8djraV4C;|0R_h*Zh-J#{W$vErd>Eu-#
z?*|Oo#xI(r9fO@`vlZo!e%O1in~?-t{RnbeP9%3+*}o{CbEF<N6;Ld`+g%p3pDaN<
z1nff$gqTI(I0Qi%d~ggy|058P=#9>f4#i&IEz&7Es`0)PTKC}i^)p}d$ZzufV1)_s
zUx);o`U^`a8VLyr3ff6%88miA$WF|XGtCN%CJstJv?|S-_0)Q9h$@?_;9HAKk=F_h
z2b~#V+270Tpy%u0*H0e=+1J=T_h_rKVY*r^Se}!y)ibbIvcB1y<+lQyKJ#$g{%F77
z^?WYD-1_V(y24&LWlF|qNpx6N#iP~o0KpYuhG>3zSk>|2ll!*JpI;$6@Y`8W!e?13
zzCo)`xz?r@-LAF*g_NO(GpEcvsf%p#E(9UIM6QG=L*=gQIk9BF)$oqm_{&Fco{)1`
zpJE(#wh8+XT8aYS!NrBS)+dET75&cgZsLst=n8@zD7=CWn7n&vniHtfKP)h)E$9<}
z4Umta;FgAj;+@vH5_GtS)Sv9D6UHHXb6iJ@j1UFGkM<mIG@Z)=n3kT{Orh!MG`367
zRVu^DygRsUb0xdVjko;o<Fes?###ex#xq{8rp%kA^aCSf+CN;q*j#8qL_7(n%fr^F
zynPs@wcYhb%Xl!7t?mOeBQrTuInP7Z{+G+eL<6fO2RlWb<6}Q{@W);^tTf6{3_Mt<
z+ZZSZHP}9(H8l(E56v33xl<twGZ84h&KhA5Vt(~za_sqxlIn}Su(_+nP~qepJqf0-
ze(4%v84g)9QV$a3cDpO4yXT(;<ua=?PK&AuL}o3i-Tz=m<~jPEY@Z&MGRXkkd~tEN
zhBU=JhxmpdY^fRtWy<m-qz)$#_MNy0b)0UM8fT=BW1rD(-KC1>F`@7Ny5-6MP?3h@
zgw<2Z<PsPU=~<<ff-h0T7X}Xk^k@T2gv8{inSI?3Wk*M=VT#5LyraRSdk=-)EDvdC
zLq1r*tEnqT;<;5sf&y%%n!e=y4g*fa+4u5l;|w90!1RZk((4zkP9qCCi*0K>7+?va
zfn0Fh_ehcBrggI)hTq~ef_TG2|3IczJLd*l8T8K+BL~|vyr;#h0$n<%K_a<a_x?yt
zgG@By9wZ9X>no<_{sjueZzQQPWs#cMFfkkVKQ<8UfQ?YSqX6!r(+F~np~qprQ5^mz
z)Z!_}wY!{kTztD1x720KN6T|tgFIIY_fyx|kJ*Q{qbDcZFPqk^DScuw#eVPBw6OWa
z0_@WqBH!#o<?w0s-h+N<xMzqWskok&#(U}*w=)Wj3wDrYA06J0RYd~)3P}x_Rz?3|
zHGyP|4yqDy$ihQnw(=Ztiz`cKAkT_-zCxx*hr&>3Cv@!5s#&dKQrlAFxvWFgTvu>D
z;I|jq*W+f}C%#gSmqFNo9YdSUCn!<b{uP=P_u1(q@IOh^DDb=IS@Q9$Sk**H3cS#;
za0P9~eW9DW@vu44R5o=~wt47x+iWJ-Ya_oAYt$Mp67;}1PxD2g%SQ`K%bizfLB#pf
z9LpD})x%5MKX5~fRxd;6*aBjj{cp~&Z~v3+HPn)wZht*9wRruAtK^P-D2km&izk>x
zL+`PzQnn`{BMb_|IAk#e{dn(^OVn{yDx(Z-?TR>r5)O;6ADe_cd7ZePz!~mNTG^Kh
zI&tdK2W%>?AX~=i<r^uuOYf8^$>EXG3Ix!28}7O)<r*&`Ns_ExByGsdZ@3a%B7`g)
z+%dBdna*^m!iP!b4wQXKmpAM?>@YDcGny>vmGWS6G={h7l)HcYEc@V;OjFw0&)iHN
znglT^=Rb(O61quq1{cdRl>1fp`u}Ij$yEmxfAPIn)E&E4m<u@$46W}B{xNrzN<rE1
zDnS@jB}4Aet38|dnI973@_^AOar^+`%ZIJoyqDOxEq_xmlPfubphz3Pc3-teW%jM%
zEY*yI?F?*hN;iL)*!DhG>Kr9!N*L$x|6_TyB=>ZOR8jGyirz>-=x&vwK}lf+rw^CU
z7JU>ysnsn8@U>Z+&fP3ZfsDNZ;)BX7b>fR`@fjTYR!2|!mXgxmUm$o1N;HM}Vc~75
zr=7>O&u0l=McyP`yIYZ3nVktOn@V95+$8W$<6IM}X8_RoFP7mt5S|h&P$tO4)FLGE
z{D*?(q_Cx4&{GLX9{$<P*25&N5#B|mxca@q%+nzIU#3U<w%cCsNh+)MGjpucqxKy7
zf9L5u|7@BLFyk?J>tE?A^i<!ucRlU!r-l6qxdO`W)Cgy5d+>e|SKXfkd9laS2(x})
zeq?3N(jmvQD>KzSO>RQPV9$W+6#3dB%bE)}EEh^_wRKHfb6YJKHy%{jdH0&rAe7tq
z6Q+kFVuk({*j1tS@+&VA-kAK>OUIU?_MkPi?)mC*!^mEk*~CKjt_vXn##mFcp~cSL
z7SBqZZ|kNUx3v(;(G>Nhh-m$|EZ>tbj*wU{zwnLT8HQb>LxO{~Xmd{WHyNpDQVU7E
zRD07XNOa{{`BI5dSl0Gb2yTa0C-Uf2`<sIf_BadhQtif@c!FJ&_rVT-h)VC>kI<;7
zv*67tEJ(lK=BsP+-iq3#WI4N479;vKlOY?>9M+Zzvp20?y2|uwJIT%ylD!Evz%<hJ
z9@u)@NPu&ix42t7J7*YAlUWfP*-|A5ZL|~|o}b{^PQ1fKdZ}G+=9wfXfVJ^QYMcJf
zWM1Len%Wt+^b2{vk*va6(Ui+Arst}?2AK5eg^=6Q;*&+pbNeqmb$?ml;D_|`WR>?)
z-s9d=GrsYBzfRWUGId^K`S0`-9d@8DPBJ(LU<o0NUpA^FqJ!3)qQ~<O)Xs_cD(Gcu
zSN?hzqE~s4^6w84=CQ^nOmRMa<x%WYq};jceD6{)JF!si-Jd&6wrDPO+!?F`=(Mv-
zFL_Ssoch+%u(DzEbILcl7}vqu*IGqox=qqd?UqblSsUgLY-X?cj4KOZKIWN0$DL+3
ze=Y`nM@(HFtE4%zc|5FIErgC){>%u}eye}TpK3RkS@fs8`!}Z_-4Jxa!(CaKMXNqN
zIhpzFf{r3g7Gua|5D>8_GkAT09t~dhvxJR-9*=z~A}oN|xI%q0c<01avv3djR=1yk
z*C351z@xTjHx{X1nL&MfH=WYgnf;9KA|@s82btpwV55lrMr4HHvP(dBT~Sj)$6&|B
zE3t*Ym{g5t$MsZ!P^y%G4qMJXvFLwJgu@ves2|u4{?iRLW!g_dB%#+V*`A)dkiQ@V
zR{hV1a)1@75{)0#J=$W)GAF~nWGeXrK%afS$G8Kl4td01zOGFa<YISp=3!mLWrW_Z
zo74>TM><;0l6x{Fjf0=k68$+lf?udKqWH3)E~KAt%qMdtGO1raTJ%!_K5i7#?!B1o
zj&m=QV>m#U*jaOn#Xh<HWNS!hNNDX7mJb%N9$ypQRIBftV_&8~6~ysCz4iRzoekXi
z=k_%2&~p5rgxvSflbDYXS%Z(6YGy4}Dx~WEfic|R{Z86^OV}vhX{{pXHDM`DAiNQV
zYP+koaua+sv+8YK!s)guIF{Dle<+I-f?vETkRdzwm@rs3xr{5l=-G%&FJyL6v5O0M
zOPIUF&`)wj0}8gkJap*1G<Yvs{~ig;e&c_GpCG0K{q~_n#pP2~NLbqeB%5%}eoCx`
zp3fSJQylmzBsx{HZurex%^G()Za!@WILE#_<Vt~5D$No9>82CjLw0mGkj2GEi?NUq
z_7BfL3Gw*HY3TTG|EKkfc$?ZhUwDrEMUYPP^Y^ToZd0eJqQJ0Oy=Zfrt^X>ViyLAc
zWSE7n70z|v_%CKO;}$^{%zT-)3zf(BKT4)*IzDcv_rzsS$L1iGw-{I<YKSPZ!B20Y
zms0yc0i=s)<a?d9Dsa6{=hVSSN(S!g)n`FotE}L+h|>=r*t`EO-9fc#&zbm;-+LF=
zEi|y8=&Wj%F9v$W%DJax06q*73sG~wWtLOOp`D%NXP}T4blh=RedS+le~23vdpGrg
zm7Ij?Ykt04V``rSv2Fu*5YAsH4r?X;!|}s+fhrdxFol&Ge2nEs27ZQw!G4xLFglrB
ze$`Q9<q|UdFr-~O;SSII&fl}GfgKYD*1Ed7R2XK8xC09*JzrzDu3{+M*k)g%@LThU
z<V^-w<{CvV4Rd%E*3MX|Y-_8GIm(t7m>Co}8@baoq<epcevkVOURT3Byy+NsPW_Hc
z8)+NUbT|oSBVy|BCw+7FJ=s}4<>xv?$o#7R%}rh4`lyG@6y!vetj!!Bj19Qad2QG>
z@x4i`?#SFc7H*F3{wPHHXe;|6U-fD}bN{4~?U4t{&>^?iy23SOE+WfyyYEK_e1JOj
zAGcOnHnSAT&3N>47H}qGQ{v3^kIJY+pw)@I53BrkZKfeos4Dq@DM(U`KOwbXtsT`$
zDG&Z9Jv^~g+SuwUe?#meqaXBJU-?=GMpa*?rAEg3$h4=40C;}W^HTB2=Ma?PF!<@|
ztvMoZD$cF4Q??u7XZ*;b{+B`hSKmfW{<2u$_>5Vzp$$#8#lhLUlwih4zgb;P6MyE-
zgw?BFaaa9wZ!ty^{+Vi<fQXB>vJf-2>>#BKhkQ)>gWjX#64@}#yBNiSI)-jaLH78^
zK0C<<U8tX==3_0OQfPvI{v_DL2X3+b^bjks9kC;AIN@Tgb}$ET(OFHr@PDvNYc8@Q
zj9gV~@fL&jo)>N#MkWa@!E0vodfmEbQ0-6iD`JWmuk2JeWbc$AdTt}t#pnuXKQi@K
zv^Ie1nQzrD;j%&96G(@vM6b1l9HMY_0&z%J?SlRke*I*b@wX&+*pLKp#=@1IV@uWi
zxn4`2>iUgx;PN!4r4A%Yd-v9P9E&~cLv?Uu;K4cq%cI)isc-xFTcXg1tM6h#@WHa*
zgM(5GCdHM{$`AjJ3i`_(wRynh&w$6wmHEv_h;&yqd=Gz*PY@3&(+9=H+6NbHffZn`
z7D;k9>N{4)f^-4Z&{xt!om1E2hyZ~!N(l49Q#&zPR^KE*#CgS&SNbZ#v5MOz+*dz8
zauf-2h~%*s8ER^7mOi?kUyEP7zI2L{CRbP6QM6d{7;@NcAnyTopESui7T<Cev)jYa
zYXY=1sHD5YloZNpYaV{}R3*|V8r8rKaM^nf;}}FHw><<4-iH{Dv>}PIbrEQK1mWrw
zcJu?Fzzc|uqEvq;tyo(eek9bHwi(X=NVkaBypsZnoaN%|uV!8)4$u>ePwng!FV{iL
zyro4j8b3zwXH>6PljnctRA6)rFK_alTS2tCOV5j1Z0E(lT`{VRrAOxewr|$n4eI@`
zqPfM$nWk3%nKQfD$i~zkb+%+J3%Q(?1MKbWAX@Az>DupGTXB5cpyQ7(%{?9FjPAQo
zn>}7S?j3a~C1*HdG`d`dQ>2P=9$nn@e&1a3(oA$!se7=h0?&4NF-~EU8q}wJ;IWUZ
zTtT&JOhGn<BgyJz`ck6L)qEayw<2Na?&{)}eRaD843~>2kHKZ+_CNPnzt|3eZW%-N
zGN$o|&`8XbGUBqjy80QXsxFm9+*Hv%4ZZvz6Tp*(>V{08PO`8elWeB&^;E8rK&dh!
z`U*DjkT6*WmEwidTV+%iqp}hC={T7;5BfVcUdj}=8hz>Xh6le+a9NySd%~i%XfFOf
z9*Jk}l&&1;r=Q?rT-n?d%!MA`b2sR!u7XLxx{89w{cWaeUfRISn+j{r8!D!)Jd^2L
zstWTYyifMMlOol*j<6dIr^U)~UyUNvO>>)S*c<27x}}j215m`foO{uIqWumOlK-{J
zIlvd^1$*{jC&=RQvk3RLQ4lwhC<?qOg5D`289)EGnXrUX7Y#7=C6Hr;HB%^}tyMc|
zH)HQdAs(n+{?_hKYLedis0@efT^`N}6Z;*o3*w6My>8-fY;9@s<m=W-@3#^=Z6t&l
zes|XsgHb3MPyMoq@UE`xpVn}yMk&*?N4AfIyd~6H@LVZmYJm|3O>O776fOGA;&$Za
zNU5}oe-bE>kSV|PQqqc8N~w`?ux3`ix*Op{kqDmu9$eI;BKpN|_w|AL{;o;Fj_^)0
zLz7me`Z5OctRA8y^-s_RbT?3cb%<MXK|v8kMfeL#!f017&X#!T@}d7-u(D_0)SRh$
zGOcE`A!!tX#W|bIhgDEx1D)!r=Vb-ee4UVjNhQde5>jDg`>7!<vC9swlJp-?kMrn1
zzg);L^<)qHBH_){Qj3&se94eUIu|~GF3YHB4}FSV+<%Z4=t@MPnA0Ah=4B@DerwUp
zoBV^G*_wj}_g<(hD6q=jwu_$)-hbR_+O<a<Ed9^85P|?P=d=mCV5;Z>CUF;Sh2Np4
zN<GbQ+lS1-y7)cLGaRMQPQkkjLGj?M+PSdGlpn`$2kZ~j|E+3-QOp@Ge41EWy8{Go
z)`8`5L%vC(yUWJ$4E^h{#}6MEjZfi73`GH<jF$<;n%IOLhmgbN`~at&MQD+~%-D$K
z=jY=2joSe&oD1zo9(Ic--D2yXxsNZ+$B!Wki-8s!7T}!M9b=(iS-}R{nHXC&WJ%9G
z<x)0*7i$ulTkG@z#Q!Gn$HM{Ip9O>r3msqH0okj1I<y(SO?`%~h0@Lc2ms!-r@kwu
zmMCts*LK?RnL+id%K6?K8`EVeTamR~8L-r$_%J$|Qnm<t<WM+dI_Zsw1Qy$2F%V)E
z&E>UI-K_FPuwsFDePQdJ&d+b__9Kyu_%IePLUzTtU*0#S+Jpb`F_(4<ESrOg%ueK=
zw7C@rDf*N}D9T)-%CiAJ<TM(r<X$sTNUmFyq<uAQ8%je;QweI9yy#hXT<an8<9h?P
zop*Wt=fx*Y5_)CT>hAw(d~AfL9mx+~?hXnUr?&Bp`j6VMNnqdp?xIaZaJi_9_as}(
zuO9TI3{_5JOPEr3!qdBG)}=$-U^Q^8b+2G3rHZDDauv?xjhICI3RzLhZt=aim1YFA
z1494a<e__P0#3-X5pbYs*h0^eR$M3|FLc?pb^*63RW5P-HqxIo)xd$bxcVTDt?m_d
z%f42G>uMlKzm|Z5jLY|E0pSDyFnmX#2Sai@=+XP%Q>U)iSFIU`8wZdF<DdUYkkzw8
zqmT~uk+ZN)1tdhq2GbV;K7nF4|6Q=NN+bVxtz+E^5a|-`n?;cJ*%-+)NKCiODD)$_
zQkAKv)<M$=gr;;QMpnrO+dF^#RKLEdMc)3(FV!OOxbm)Ndr`Q1T(ZUf(Zz*R(wX^t
zjn2!)$a_}-Y&;pTF|7|}E71~7R}56A?{PA&hZC_^R_2kip_sawhjb+7N5St^YJDnw
zvLo-!xz&tEtb;R~CCq?FDL->3%+Txy2R9}Ak5s?7+^HNBL@hoyRuE!dwMyn|e%0zy
zgcGejqF;AQG2&l&W#hfYj0q~@EXaID{rr#8#zLQqao~yxnkMxt@an6#`4P*)<OW}{
zD)9+?`*dudcytuY3ECC^+iELbeM#Sg60z%!WLG67YkMq>8|1L$SU%zT9|L;-L7aL-
zF);<n*@pw}(B}q=OySi1$33}}gHM+0dM{@U^*#*jYWXYVX>aXjxK=4rXOa1Qd)6nh
zvWfne7x=EPLRe+m8n3J(^W$x$9KKf754TKLJeMTt;dh<CmbKc{lOzZK{qNDgpljJ3
zbzGq(A?)$*UkV30)djgY{@h!}WlpMPEeVaUb#EdB=Yi>2u&3`1wdeZ1=Wi_a7?Y3K
zWoP!(e%tqSdYec%O7H3a9{|umFTWAYY?@8ay(Ds8GZ0xs&R-ssKtS@FZJqX=d}{hs
z;7m>E`iL!Sj=rup6HNOqMS!D**|}nzvVO->1KBq47uDDC9@<%88eWuD<uBkY^R5_(
z?Ozf&>cl)2ZMnwt|L~z*`q2BIub4l>BX56&4sC3Q2$k3&aOu*^orP?R#WULP{7>AF
zu5|u;5?byR%zyp5fyWCt|5sOU`QM#8cMi({!54r713}po%zvq2=>-5B_xt3NPZF=s
zVC!G^GwLL_(RY08|MkJ>{HKvB6J6o76v1fcN*k02Sx?5YZdN9VHYdltehL8_Ij7cW
zQz4Xt!l-H;QH#MeJ$Nj!CZXxc_YFqG%)pgC@p6)?zL9Myu(fp|L`FkE6s-3*g`jLI
z7S9qfYWJ6|D)lV4lZlo3bjKg3itz0M%QxrmOt_QVWc%6>L*J-@amtfmHvE8+r?byV
zb@^s5wr#dI^s|YorQO^V4ZhjkMQiusl^I9XHba6FQvuWHbDk>32T$$3^{revkM}yQ
zwzQQtU90(j`)!-_!H)SKI<!N5u)6B$vG<?pgktOtK-liJPEbBao85fj_Uvc=X90p_
zpUC#kIseIv&)?ttFGb5a-EQ|H&Ht{5<KOYIfBHdKle;D5aGnet%)x6Q0Ne;$0he|O
zfE_R%dgvkF4F<OUh4p_*Gq97~+_9tgeg_lQY+l5z)6C6Tq@#2%kya3qvfq^plz1zU
z*&?i%2tMa2LV}mKmPqpa$VGcmpDbUr*P^E&X8CAq)z<Qiosx1lIZ;GKScPd$^tBJ`
zo98-he@yEeCyGFn%k;Ux82E-npQm!ux7J!}d|U1F&BG*~NkPn;Z}ysKWV5sUW-kKv
zm~^F6QsZSuwD8Bu7hFVQkVGX4pzAZ==Fc$Cog!cPT^)(iHr#wL$v$+Y3&l4$FYfu8
z`^ft;wz6H`>+$s(2N@?-T@)EMFC7FSnQACwC7+3TaE<5xefOTv>;1Ff$YD?Kd-nzU
z;=jQ;0_$D&!=3qlC+rl!&KS%eZ@zQZ{P(S1+Rywag<*S|{~4TV#1hT_BS-Fhtg|ft
z>dBKQCvXOQ=9y<4cnx?C={2ktS}r^+tpI?5$2ts{KF)3q>-~4%9V9fI=yc01w~R)k
zwGX7knKH;JUQHbF%LJCS3W@YilqaYmNd{j7Get^kYvsA7y-c*cq)`HIYoZT$@Qr5_
zk=Wdf`*+vYwp2paFr7hmPLtmY$=t4|)sf3;=*A%&zO%l*5zL#;ozvf&B^n)#P0n=R
zQfDP&a3x}2EZ2$e@uJ`2)%o-1=$2bnOqrz|`Aw}@_YxlffAXg%xFM-DLzFY0ndaS{
zYIVNNAO%xlBB_b6=A%-OpeE5I(-vRSHgQh$yH`9!^2Ab<6GT}MXE_eGc1ObB!JBp`
zhRt)t%QiN?&q+xjTd(#o|I^&NlKKCpH=d>kA2>^Ufs-eE*ctF!zwKzeG72GuCw?ac
z##&uH6haD5oB#TIy#6+Rr{427*p8fvsCurr7Veo}TV(2^T&18#<mDVU`}!qq6?Inl
zB5_RSQPpc&Ou5*CyZm@{_3(FeyyX9eC1q?Y?|954DVhPEdg>`Cpc<){2plahaN9@x
z*O~vv-5Slx_V)JbJKpi1+;{4Y-}!HmKoTu)iG=Bag3yL&6dG7Do<is~{!xSAsXFpM
zIvLkzCzt(j$(+CIFMVCrap}^f@O$XcA-a};_Mq-=4Cv_5qamE{qChu^sgvaCpx|eI
z{%(?j6<~zZ0dbKAu_9tbhVk$NL8N?-us01f?X2D@luXaIQ;?U{)mD%fCm5l@Z9SPv
zI;!iDI9%H4)Fi0p4N<@FB8uShr!pJb+x)+HF$MzK&-`CmnRILVFVmf`xk&qk-~Z}}
z{@{-`La?CGc#RMVb9?)ICjhK<e@DnU*PQ=Nfr!j!DrRTLvyq<S37Qz)RRjXEh+uHe
zGE^e*oJb9_rGc`IHLgwh78bqpxR#2u;O(i#W1@z&sMT;szx})a=)vdy^k-gKU0vPY
z*x1;4{`u#3JEe2-^wUqn0x&TdmkO5E2O#Xj?AHE+_diTB*r(YI%qT4lxZCb{__3Tg
zNxx<AJt;LuK5`<BEBU>WTzyGHrR;o@-jsDoe(#j@>PbIi%yf;(<Vdv5mGT{FM82K?
zC#G)cC0rjnc5I=T|J49L*^Ni)c1s=XJ)B+01fCT9Vah{F*^z7hioT{Y^B<A9cS;e}
zjVo!=|Ds%-Ug4-O4;%DXXL6*31R^q)B3*A0L?rbW?RDjk`+^^|>hQ8p_{Z1t;abf9
zLmNBv_y@jmMa&=Ywzp2`-g{s@+l+yNI^04?Y}CKC)miz{dG~hyJJDy}^g$*g{7%#-
zhPzHr1)uo|*ITDH;L5AQuuHx?FVbTkBl5bxNoOlV#@{GtY`*rTZ%B=s#zl=!dJCqE
z9r56Of8{%n?3Gtu2|VW1sZ-e*uzNmAF#%{9VJRszOW#=k=Vyr7+?i+`_Wol#;Gx5}
zzQ1IMKbGFehjJpQNz_0b&L9bA!FQh#&Pl{e?Uu+WYvzfirkhthLw}1Xq?brvL)hBd
z>P+nCsgscD@ZrPM=je4K!S>E&+TGovo$e2j3<Gga)F}%DiY}z_bxxT;S=*QR!c4nJ
zex@wP^Pt98iMJ)nAZcw}64lHconnfPNYz_04?|q}8P5ZS0fHf9W-ke`5Kbf`X5#A1
z_p5J_4PXtq!R4;j{6BVdi{Ag97p6M<?FAlrc()UvHv>*h{B965VWq(_*jrm?gK%Ky
zzc&m`$~i1B$RXfwRk99$pJI&pbIrJr$t=O?u_Do*rVr(Ni5E_2I^LvZyp(8cam|^!
zEyyxS;*w0CD%d!D;sYHH;!q$g1BBfG><pNzTvB?sbOL}E(U;%@0A`Q4|99SbCl37y
z=QSTWa%9w*&(ivTB)f`;c~S&LPM(lOGVd%=rnI6gBCiyQybqZggoFf=wg{Q{^9|x9
z1%;}R>kJn!UJPdcZmq0a^IxZ%1z;ZKBa0yjVEfi2SOug+=(Tp0nVS49=?b9~!l2?r
zX5<b2sIo}EYZb@$+^&+LINLdcxZdETMa2p;)f_Sm!#P3xoobU5tTVH6^?W5SA_aJ_
zc+AzF|99W@D!uRB2gm#YYpX;b{J>5(mm+RK2!N#trwJlX0OIo_f}jMK8E0y*d0rA~
zL4kz8nfuN6Q<oxm$sJ2tm>{J|n>IS9#=j+GNNL2Icdo9Y2+E>7FYrv#R4YXDulMFV
zL|ewAwGX`d>Z^gbfXCo3MVo~cnwA-sP5^LDMGN6$e*<8E;QjxOcl^M;*#4)-y6vte
ziJuyHCdNWMj`-lxHDhW)Q61<c7UCMxXhC2>s+h1lCVF1iBqm%jIT>PijR9?d*1x;E
z8%+M=@tAHV*h!&W!$C&7mjjWG0aEG_QMg5Adsopqyka~o?lBJfjFjcM;(@6WE|;i;
z>h-w+KDCdULW$Po6Mm(HPEuL%RnjR>kp1$U^c>=Cl#x-3?{c~iNZ>!oIplqtpgYN1
zy!bb_^uXso9Q_{d*8CCZ=b+(8x8(lV`*&#ME(amN2O(gW;1jUEezd#a2}=UOLO_&J
zLv1Zh-zBwWX;C4OUOl7KUxLG1+)&F_d6dh4GM~!~9qA_(!74ir80V-ogwiR5;+ven
z<x*~$5VCD?>#0}{$lq7K*^B37@gyW3-Sg&mJb3~!0lH-XL<YoJGjKj9M4!!{v4pUU
zy8uE*V;?4t{p|*TAdPj5=vb$Tu5>2ol|#o~`>vgx3q4tV<JZ1pXr-Z^GFoNQjI!-}
z>d8+%c}uzM^vBQ%Mszg+>;0Wn+-W3h!R&vv>pKWQ8*M*b31FuM0^7h@YussPtKFI{
z)@?_lweBy9aW!4AxMJTi%!5yLI-T0$t8N2M11v<e56@e5(v<d4U{WPOQs11BZK=Ow
zTKRSR+?;KlJ^x|R;S=!s`UTE@Z+_F6p#2{pI#saI8PmS$K}TQuJuC+t>iE!VCuD<U
zkGeh{2dmSC3xD1T1joXCJ$^*f=?uSbk2jv~;H-j-$G)eWdGMw+X0m?#S)7sYr?uOr
z+YA0q!>v;O>+XF2NB)RD^NWW%Q%^_tw(q*@u5$=H_w>_G@49$q`GlrVl7)n&69Cfs
zf2OPtJn%sHg&DdV9M=(ecWC4GxAx=~zb&L{5e9khXOuNd81OxH$u{&&rWP`{-d%lw
zhd&r8b{MR#uF}m4h;E2;3>@utL(u(EQX8pkNo<czAMP+Xlim`#eg^5yJvHxRN^kD#
zy5#*V0acXbEiG)}sU-Z<aM`tLpnjOvzS;BNH``p=c<9hBz3Uw>b<15_bS>c2y`C;z
z-l9MHlM#)Rm14ZI7TSXCwGaljHqV9Qg|SmWx1FISgvocUu}yk2tW=&F>8F)Rjrv<m
zR$fb-@l(?E35>1Z*4*lAm#o`W<sCcr`uBB;&cDKgZpqmlKYpB^dFC0y7WDh?zu$l6
zGoNuOEIlm~ENvVBti7Pq5tsq%|J|dDD;;#dt4D8n{Wp)s>(t)sZlPZ?l<T5$|7+{s
z<RsoxDkEymLp*i4aesKL8v9Ls+x-rD8z8i3wD0!YZ(ll-KiYnf{jsvL8MIvp0U*ZX
z6>Q&%zePqZV;)FDvhNrCpzz+tRFGTET5%U*sAtOfrSV~UIaFGczN0v8_TjrNuMd~|
z_KCOB_qVMDW<ILEFr?p<yO;Toag2<$p8s(6`{s9^>2L^mf9<x{zUz@#|1)13ch)9E
z3tZ{u-fCzg#?Jd=-EzRW@Qtl4o${FGe)>~-F~+3e{*CFmPb?#seI)DarHu+dG|s7V
zsa~>~K92L$O=juKw1wxTnp@mX_u7u$UF?`Y&T!trG60;=I|~5eZyA|CU@3P2h&aq)
zWZ;y6{oQZ~Jbd`D>rAi@;iX&)(ohDftZlk<?@Wp|Oe0u%6;3Z^O@+9>*VQFe(l;r8
zfNKe`vLL_@_V?dx>-}x01A#?rU>*1;u0@hqqjh=xJCapR6mzLU7_4-5fnT{SXX^A8
z7j=7OStS4zk@X31jXLvOan7XTwAylE)7Q*@+8OK9=3D7sE?58B9DAStcfRJ*fjj%{
z1>X7aq!U1}J#Z^-Kiy3hC-NauAhrs^SD-9kM9>U+9-<M2fF6&D>Ym3?J(Y?Q6}slk
zyKFEci}!kKX(`v`GG!ha8)3R2scEbhcBxq>r|Sg!_e1kXk`pIRgj8@o#};&4<4F41
zLhRzf(r(m-_TP0r-F^4n;hy*Z-+%vKLd@A?Ef!u7raZHRGZPJ}rPS;_OS1%oIt4p?
z<zK2NO>v6q17A-fBD&g=C+h2vJv;l|===NU1*N@v5j~Myc0eK{4C-_H5^j;J@vkDR
z#c?p+d1_&y#Em`;e(E=o%rBo)Z(X$@qQAYBUIyclL0ByF(D{j!mp6$Xn8M`qlYP#A
zIQxC`JOARUIQ#7dKJwvRI<#>)#t85`!6`6;3qrug#)+_XFj@mrhwzV2EBOt<loPTN
zQY<_~MKIENtOKhmleFY0+!9rtY+<l8O`t|K2TDFi+7Yq!O>G)$fpLHpt|lGs3_x#s
z({~~CXV?vZBZA?3_LZ-Eg_iknprxYZ(aPd-_+RYB!~qRydqz01>8^Xe^`Vqn<&~1K
zH9_^6jRG?G20?T*LzdQ_;9P$BqaF!67Cwze*JiPK%%}SYIQzlVAN4@<Uq&<6RRuQ+
zT0eUm%aLHB+X87`Wx^ARbG3#dDl^aaKJI<X094g<WgzER6nJIXf_M}4;&1lemQSn+
zSERMMCG2<pBcA_W!}?w=!1ln8JigN{18|_A9r7Uv)ovE5*4B>`90Cz90M8{ciGAG?
zfXX$Sz;!Mo3<^3XL5OilSnU~GVQJzq?4v7o*;Tyhxt907x7<Gbo<`p8d*EZ>EhB`T
z$30@tKncC$jyrs}B<)grmx@3xohk=O3+FL+%qWa94rsu6&Db7z<j4_s_|R(~YUOL=
zq3=afOSJmR>L>8jl#pjL(0Ys4RDs|cFn>7uA-K-~8Y1K92(VlV@RG3`&4l^|KZz^Z
z(&XxQ^1juc*=?40?ebhpRix>7Xw%6yXY*>V)i=v@jdI$WU5jbEsY1(M=RZ#ALvSA)
z>a$32<OtCR-;cmkF;Eb80{98=uQ24*)g!?n5C?G;D?x!GY!+Lkv~GIxQxuevt}55m
z(MOt+uAS29U(TEB=@C{eu4}=$oky1Ed&;3H{Cnfj9S<GB0Cd^FbMCt9E-0g;uYUEb
zVLcSf0L!KzEu{belMB}SVS;(#g%_~?pDh66wT*j_&dsaC^dYI2X23RawFIhFhO3?(
z48y+KXvWMR+x)Kec>>D<fq`g#-x&l(Q#9IXh1CSi{@VAsPSKv3Fi#RsJsR<RSXZ^r
z65XD(eo8r(QMY^=<`)N4wh9g(+6`fSZ@YCdx3<3K4x&fifxuHSTyTg5;G+=0eGTV8
z1P<EVJWFLuW&~+s&&F@YH7Y{EG}tc{o0o1CUzGNHJN~UD1YI7dn^umaK3^QbYUC}h
zQ$7RFdH(t5L!yn1jgSe40$ojo>}tSL3IKqopMDy_7+?YLIH{?76t-uqjMtk>0Ig?X
z^xQNZK3!#k=xIuEGfFY_&M3f2)AjqShiF*q2lEHxADI6%@HqV^n?x!6*6YL(#TvM$
zN~$%LvXbd$wW6-GCxy3<4F}k3O}ULur|Hw^rok)?zGc0bl4Itx*8D$tVv|19G5_o3
z>^Cnw_$E&e+#e%QhLFL$9MB2w{6!D~uqz-60p)vIiiw1Z(p3Uy?HABY*i-gATJpSB
zqLM~PHO=zugVqv4Vg9vWLx>1?H%$EaW1fQ(Xu*r%lSWrxFFSBpT5qo802rJxy4zq8
z=$KWfe~pgeeIi3t8(|CdX*wZKHj`<yqe)d=ky+cd$fd7}QB(^Bu945S)Ac;RGLXz4
z+W!F4Aew-ju+<|lkY?E4I8}M^FR5qcI<2vCewt8?%e4kv-!*k^QuyXls_dAhzK&+q
zo3YbVDp#xft)(<?%G8d1-e{odnJab3*2y%o;{DA3`%j&xM<03Vx;y*L3)mTO+sVtc
zvl}e|*d36thxpsE6cB}gGu`{0{M<L_N{s<6+EXI%N$0uwIiGd(EYGP&XzRBUl{~r3
zY$<Bd-ty~4lJ+{zn#zjfua+MO0>J7+7cX9P2!O`t0^l8Z5nve|0hdYuU_uLK4+lV8
zcWW^i48)<20{wXPtr{UE;gy)=sf~1rShE+iw@B7U5Piv;5lWIWcCBe6dOIQ;e>e5!
zb&d4<1u%agH=O+j=088^kfdyimE+PHqdwxK*`E{mLkb5h0~GIkt1gW@ImB&d+$8E0
zX%um^xK7o5%em33`n~#=@UIdcTw!8fZ%eGc)AX5SCVQR#5546yz4e<;)3U*b9-DL~
z=*yiI080YfiOFjLW;|Y}^|ceUxpgj{UYQ~Fr|%OAqIBqOde!ebN4ewhyEV=zuY0o4
znh!F6jmzz4M3o{X@Jwz!%>s>`Bc5?E<#Xd>P%?4WOvstS(qkt;u5s`IxO#vA!43ev
z-`?IHAs`K#WnTaKCyp0V<vCMIElosT&{=~d`PUL;iCl&mEBy0jLKULi_Ec^WbKR~&
zL`51eC(xB69W#F%=rio`pATH(UO}{TX(~BM<QODgxgN6V8qaE?>?A(Qg<F|V6(t$2
z2Dz-IgqW&TPUD#;QWsuUn(C@#9{HZzE)E<byZ2nq<G%U9_p;r4oBs%8xrokw^8(uc
z(Z_dbeSI?s0dadETa1L3J6Z`LgSR%%Q`j94AdRlmii#dzA<X(#3!K3}!&ses7N2u{
zu1BDC=C>aV&Q<T&df-%YEztPJw}@2E6&!T#*xTR!*MpB4^Bn9#Lp0hao_Hc8#{qS;
zM6fghfZbk!FpL0rWAYjF_dW2PVh4a6bWI#v%lR1z4IcYS7%=5Y`sy^I>2_Eh%QAG6
zzG|BRC*L(^=G-ta|NQ}*>Fs)VKnd3(NKTmctI`_JSo`H`DO3tvbgg_|;9MI=<TkpT
z&-encCa&j@Yt-X)UED;m{Nc46MEPMBjzu~3qCd3-0sGuMn==24v)?8yGr%$ZvG;Gs
zt%2cK>fPl1pR7?K6&QIZ1Z*7ZmIKb4<p83(-{sTNf4ZlWIo~dgdeXwFWWoYZnx*<O
zpS);$FA3zjd>bE1(^_IhBHWVcyB;AoU9TUx153=|&^ZL4!!>vjtkO_Uu?*msXe;1S
zg#yMl3v2;Q5IPy&m^(W=BlrTWt{gc|hJzRUEC}h!f+CS3Jic5i1}l&^Quk7xvLZCF
zR8Fj%-m79u8?9iF+Ns*)%p`R^_09(a^WQ%>DgjZlBOpb+23nYt#3YS7TMcL`iIBwD
z;IGo4N#jMj$?*qVf6JM8Yc%qLsyfYs!=o0HmTe;~5NAt3EGU16GxIIy74KZp;K~7@
zje4ljH{w~}{6|#1cfaf9g?ILw7fzmlPrx?)&hJH`4r~7rH^;GC!m2=c6D$Fnm(O;>
zz)>2x)hJea__MmW8N~<zQkc?|CfaQz9a6N+TEQ~yB!N%-+Giyy<1Y!TkYwa3t*6F3
zvgNjvEuA?1&nH{@w(ObG9zS{?l+53aP4);wAI_lRkig3ZOCtbay#)jGg)e+zgd-Rc
zi~)WCr%#u4yS25Wcjp@(TS>2o8U)xdR7F@sVNHTV1>vk<P=7$8g*8Gus)dnE__9^O
z62X#ozc4WWD}&JUtTDU1T1`Z)khTR06)(-u7)#d>2<5ml>Pn)=OkE0(GQZc1*C)Kq
z!azzFgSklD6HHq+he!({PENL(@M?x`zzQ1`A+~}y)pMJ4n_Bb#p4Y!h55N5ty3qhT
z1Gcud=&OIo!Kp?ReBDN);Pe*J0XQ(CwIBqX3r7p%l)-T3i0a!0qYRfmk9A?xL>!37
z8T?dq(^(-wlrLy&x%3Sj^F8G_Qw}xiG4sh&#&a*5;%vjY0XTqZ^%xjGwgM*J<4&DA
zg+qbp{`>DIp`b1WERzp_g9Si7z)`>{E_0L}N2|x=WX~7QX#yeWU_lzM->E`K5ID+o
z&|4g-n6VI<y$G)%kUE=$WqV{>b48{KFNBF6dO8i*+kasGR{(<9@;C-x0HbkQfzlb~
z702ETq8FJJGIJqZmeV*Z@uuS0hIxw0TwOWU4zN_Wo9Bd0V+ePadvoxNsu8-~B8i%-
zRcFe&Jk6T_4?b}2hGhN>Z+pnoy>GZ2&IJft1Q8=(GD*y^FhGP3j>`e(!VZE0y7LPv
z$3Z9~oOWh#>dc&}2bU?2PZ3Z}ojlHlPm{=AKcs5@C>d8x3~%{=X}7RIXr%S3?7(9v
z>^cj#cieGDc+Oh^mn{&$GWh_6-_8OMezE?-+b|Ni@ygmgSrC-t?Y*LlAu<0WkuVb}
zvbZ8LJ3)N8ZKMgF7i|`0`G$O;+M*aki$FTnT-QfKX8_F8(EJhIZ(#oW2YDz{iD)*-
z16|&=G8_8W&|VeNs4~35GbzJ*PZi>NncwO5IMU^iNvr0IEyKz0uC*<}j|*Q=%LY!<
zSKD?b<W9Dw(I2vsp81chePH~H64v)>!aE=FVTogRXDb|89m56t<Q!PQb;|)Ot4BJk
z!Fln{D`89VSFeRvHI%7D`IGTjS^!!Csz}bbr35%xx-V^XMo=VC4L|oLk1W$#hbkg%
z+6Ne~tlxvJfOw0ofKWnp-TS4=*9Vqq2_XE!+p-e^5P$&(1Nbv%&NyhX&}c`a)w?6o
zdZn;Sfi91Z9VRdQjlH13p0v*ZdwI+E@&T9askuzA_LhpWy+vr>m>}NgM^3bn_JDF6
z;cwXIf8_v-86_sD*89_MrRnpn*Gj8|N-*syL$-nho;iW*oL!<}E>dI*@0ekFiT9>%
zS*~jcIKGrs`5}n5<ucO}m$QWm#v$87B+4*Mrp&>5{=?bto8R?PIL`NG0Gt`{!T0ad
z=l}I4UEXxG+x5xV#RG!^xH7)jB8WcP+B}=i5g0Ys@SRzX@+9bDF3;CAR^o9bdFtoc
z{e!eH;L=O$v(my<RX-J!a&|tMw3aS&!=0^%M%W7IE?v45b^$;MefHUBM`XT~mIRi{
z2@n8p%Lg8K0J{Lt(IE&>HxS{MN6Q%ksAxt*>U4=Zxr9}cAWGAgM5Metn*J$ku?@r&
zNG!2V`Xg{i*%sYb$Z3j^G<6op@V*1{KPV`O00Qeq#hJ)?R|%DzLor=`J`In+*1Rh?
zYzUs9S>$}9Y;LYV7_Z-?J6{@-IH+XsR&bGo*ON}?WggLHr7`k+^HdnO=`Tyvy(m-R
zf9fnb|BoNr4$S}X4dU!KFNB=|AJ_>(!0v9^a_1=t0ZwVZ5CX7Nz(PPItKKk0Ezm`r
z_6qM5eh@YMT*^BoAd7Oz>2eAk^@^ri<}EGw3SYdY(MmBRgQF>3g{C6SXuNuN?B65U
z3E;5}9q`aY4|yyHbPB4<w&<`_O8_`=37(eNc5&BTcj2+a@w5yhH@>?ds6J=iE5hf+
zuh$fg-!x3Gam|Z`0k5bJuZMZD0Odrzp35|oirdEJIw$Ipv^WtqA&-cHXeOX8nEi*?
z{s#%HCAef_-66XUE@(~8qZnO!CIw1w@G#<os|d1m+$;Q5;HP;;s%%|%o*!1)BqQvK
z?~3iv_2+p&Ufg>sC&>9cAF}Z4)XY=Vo9Bb<Fge$%(-Xz~f5Y7u=>6~ci<^%5Gw{v;
z_6P8^6MX_WP>>i$L<{W9+xaMAqwtII_C?rplq~~t++|>$kd|qpsWX$`l#WqCcuQsc
zAg(iC6o0Bhss+vq2{hvhr;<3^oTHQpZ4t2J#mJ40LHozsqeqX9uoMkzG`0XjIbF7B
zfJ<WmU<W{WTb@08Hdp{oo;*o75U6_$lS~K$EIm^rVmg6jA~v6r0+ApgvaFPeW|n>(
z$vDxG$hn**CP`XNB2LaTBX@c7$hG4{+NlT+>G)Kq&zMiRTpCSIeBW#d;KGFqw6d}?
zF#iLEQ5x$BsQ^7xk6(C3`uJM5Jdu<?!=vJ1T}GBUG#Px7M9+!5PB7twlU$O*BVB&M
zL05!anZh~IevOi$R-dob{Gi+pB57TS?J?i#OaG{4uk?+jHi~`kM7s3*;G50`hrgR0
z?mN}Z{fpc5)jx=w6fQrbQ6&2@coi>wx^Q*?`g3RJLbr@?MD;aE=9-_mg8Pi?PRG%A
zf_7!4Mvk^~%2E*dE=z?IEqxkAt!Vp<Elw$JOwkKvFG?YQvznu>&QDHN*+E<a2-T<U
zEYCgn+^DlkPoSJaNp-#3Tow<O#sYwomiQV>ANwx>FlmFi1;*A%XS}5&qihl_3)!*~
zVI)kh&H0O#VF+&4t)!If1hc04%yTCM2qw%{7v%NROjLbg9-3(TLZo%$E4Y05a!5BE
z>3h(i^=YLwNzH*69|g`)VX(}&ByS|rqwVB{)y^2_JsI_?;9j#VPGdA_reASe7yp-X
zcoM|XsgHN6JyM|1nZ2z$F%HIZa=P!h)jXeT&7bF9oL>*U<;=kR1-$L8*cp(H5Qdcs
zX9gs}mB%oSgAYduBaqN`Cj>}ZCuwTbyye(S;Y~9C0#bM+zIKjeFs=MYscqspmsqy!
zxolaG_~a~3I!=)od?|^fCAbn)8Q-YT-+*VM(XsNWe)P&KWk(5=Q)~lVrf7f*F92}J
zMRY1Uw==th5lK;)aPM$N^IfMtdW=nt+KO3_m%*ifi$>(L=2PBljf9^w<K{IfsddD1
zOm1VMl-7firWP30K5Wv=Lx06==D)eQ8MgZkaIoMFQk57FE)hk>5PqyJ0P&YH!;3;4
zH4Hj6kyPpl7eZb$+zO=juG^+?(~~gWl9@lsKubI;m~eAhqG9UWy=;e=3v#K0zc&la
zvMeD)m$G7m5!B~HANcc5>wT35(D2@OV`o6O9I$hlb|*XK^gur$>$twQcC1sfwz|Jd
z>4}sqBkj}pB50b^;w5X^DPd0MjiA6vSh+5Yo|U5TL<zq78*RB-c&O7dqZbn}H#5+8
zF&7ICPUy{V`;YF1(%C7W9)ajs8C}LeKuco*c<QOA!WbYB5RL%`_{42-4Dit-_Z-jW
zNrNPoNtVI1%+n{rt))pVV=`iqkS6P*oJh+E&Cg{!Ba&HJnaCMa8h?zFW;y<Z9wgu1
z-VV$khx;tq?vy2gNaCtxm`r%Y8hv9t%g%wqjHPvmHSnV0hFryZz>M5+TS~e~AnuXS
z$2DN=4ZqY8^^4(eJN)wfrTiDu#y{kp_!wHY9099^2A(szPZ~Yt>on5xrtq4I%|Tnh
z+C&|14jta5_rLcqg45psZTR2^c4=dyvjj{wvj9K>Pi3#JuOE*%vx6|n+B#&do{lU|
z7$~M8_1IcSnWow@iUB}G1|-uCYJ+PQB<qiY1q;0TYn1JaD9y#yxN^=8=pjK`iVm5Q
zAHY%&+=Ukau%5!i@QEj$2%U;!fNiqdZvCdavpG=!YbI^}r=?JPRo3-6t<#o-gpd(w
zO__pVYt6qkNhH%Rbvvpjm&(hMIMo1;|4WxH1<ijnTG~^e7aE+9{PL<|*Vl49td`UQ
zsZ*E0K0d$E`91NzZtU^i>6kiVyKiru@2owiyD@(`{B3QW?fza3>$wQ@g4pnrVEW}Y
zvzlL8fxp1?L;B4i9Bge}2xVQm{8Cr`D|GqN=@2|-d;3BXDz;Q0nL?`@!nJuPePZIh
z6qKy++wMPPMJ)fvJIx>4{RZd<><swmhj(e@(!oJNz++Q8nH8N;umo&(LSZ~S$TxM3
z>GU;}q%2GE3DwI1;<{K>&C;xBV_T_nHQpJEL<_un%S#X>mLtkLl)rZPw&Pe%<r8RO
zjqa3E_s~NRxhJ1|(i7$Ry6{54QaAw?M(jM!UhXu056u<;&r1vjPyI}Yjig$vv#v>u
zS?_4Rv&`MJYg(HbX?jgj#$?5i&0dLCyYx`>MQ@oL=x1YNV+izfRe{a!e6O|slEUO-
zgMk=dFHHDiH9au<c!cY2$84dsZHMuNP#2xKM%@_4Uj%iLL{K@$QU5o<1DxyimQja^
zQ3zrPDvvhh17~muO}xfoGu$rxg?q8=k$q&maqRQfK{2&g;JOvodA{Cv@40Tx|HcpQ
zb5H;w;Dhhqq0fJDGg}lV!4+VvERm5(k2~Br6iyIaT{#j%3JXa=)2_Z6eRb`pWGEIN
z%6Jzpopwq!3qZO29^FcmS<_HiM*lo1?b}#6di>(Wi$S?MbLI?jG{CQX<tzTJZ+$C#
z0C*10D(<?&vg`tY6(H{Qr($kF5FmbbXzgfr&R~;{Br=7-Xac91sAa%KfRvgF^=RF_
zjraZ9OVeT|*PcA7<%IISzXe#I8ut2ME8txl(aPc{K07go`$i7U<nq(>Za19k4rU)U
zYoD0uxMTFI-5(q|NBy`%0_TO*shz$s<e1JdS?`rqD&jv5ml@NFTgw7!!PVVlY1j_S
z2~luyG=@OXwVefn^0ivv=mj(1`1+x@oTg#DfA8>`*ATt)9ozIfUmnp!#0AJP0#Z8k
zGhzf_T(`E*2a7;_F-@GD&GqUPv$jH(jj5^5CJxW2&@1tPQ~`2+%mdsnYlm(*N=Ccy
ztkT#G5W)d>e{2O<_#FYuA^?PS6@&rCA37VId;IwE5CO3Jb0arCZZpl2>ZS<;iUB*F
z-pzyM(>}(PErIQJ5os?~QtQmplxB;7(;w18`yb$1fOLp*ArR2svUjHZ$!22sTVd=*
zqcvLT7^dqq^B9@UWhTh04vd$51kvl;9$d>M#<U`@6qsc_ymir<bz}Z<usE%(tcmBl
zS(w?`xf}_3Cj{WMIu>kZO(*-^tE-;g{jQgm{utlu2@gI<^y<Y;dgg1eAo&j8)`?ey
zg?n?jUs*X6ggGoHt-umMHSLi?gM2nk0G7bZX}!XLx|m+pEa(%Sd()?XIL~ObaV%}8
zR^fnQo$f3E!AhO101M6h0ZU^6=>8m-{{s&^;8HB+_@Wa8u+~i@LF{@8nM!D;n)6eB
zSMQCk>R{q5>WcX$M8>hn)IyRNTNBsRiID)yBJA|XOC$qaJ&;x*Ue|;Lz>R${g-<4J
zM42QFAM58}kYI+9;UA%vvnHD<WxevZY7@;CenpO|-0(e87!bmS-Hp<kQ@Wg$AQW)O
z*-2Q^*baZ&+ZVfM-IBq0CA??rKq;F~B}`@d7}gige#3hImBYIpAv%5LGQDyd;etm=
z<6k9`eV*vj+S<`hxj55Zt%QI<m4M0j(jqjYem2QRA-d$S!o*(nt(gMl9EIiO0kKRV
zA@CfL97-vGEdV}g1dA>NEQNR2r#|(m5hN;{0|4lVtB2zSdtf3k!|LkM*Gq$rU4kh{
z;mPPYq*Ic5uCAp?wXd!Q^0RiR%k#81@@(>XogeGM(Eh>v2dBSl1w01Hq7x2OiDP09
zUWp9vLU5$p-hMUssI09W?kqL8hP~G#Fi7W`0zeT5`!L2g(#E#As;%PQ_m^YkT<Pba
z%(rHa#pMv2QOp}R3d|ngys>eTR=YmHi>>6uh`wQ`LPV^SOBwL+fBdn*+3z6X@yEI~
z?e)vSDbT0wfyv>6GFeNEaDBZv1!^W;u(QQ10nYrU)J;pM7g*J2X_yUu#=U)J=8%X(
z5P!?Nm2tKJ1VA|8&ioG_fYQzgwCJ!*fq=p<!T@8f1=neZK%_9)gT<nZa5!RH4w3M$
zFZ)k9eas_Kyp}1;UTQ|*bWPMlbS3jnrqrh4OtzuCb7b;3(GnkH{KM=&F#l@@QF+1m
zq`lP6E8!610xfqlFt+oFh#v3$PKKy!!33U5^CSrmsplj{zQM>Vvgp$hus-<{Y?bYO
z*zUKTw*Kv25?cV{v#?Km7Yt!L{9!9$Y*QOYZ9B_yNcx@`q6_+9b?pcp>VyF>7~De$
zfH1JLeJMB_0`5O`zGGG|1oQs@2M4RG*ckw)Qp5*fAz(L-IU}8;qzz+vXM?tIgmA_Q
z?X*h7R8anvXHqz&-s@slu=rCeaW=maV@X2`7k$rj*Hb?D#W)-ZjPn5So5BHerdEXu
z3riCUm}4!=XFdf9;@Hf^{N#kz&4E*#GBcGc>uH+yS*Li}Wi8(IpqC2PTdovcmPk!`
z`(Ub-B*av*MAxiU3J)NZtKCdqUtgyIt|hR#TgEWr;0|qfOE6$w<8D2Fb?p{o-j<^&
za3kYPn{FtFrbK{wi(lR?5PStDSr80s>~K)@E?0xCikhy)<*Bf}kYc&sZ>D-;c`;zj
zD3#Y4?mN>y%E6JcZZu9fO+wQ4%`b?(R!-`WZ`eA(j%N@^u;uW9H(sCz?niqD{Jt6x
zcLwanodGcNk1-x=6t)MbVeGWul{hvKBq6x*R=Me*=^sPhb>(KHQN*;xq(y#Sud3>%
zW<I;~r4|MnTD*(P&bQohi^o|shz8gxUm+o^)Sv(S=OY(d0t>*>2mm;3>BSdc^f(C+
zg1`$eyg=e0ppFKu9m@>f8z!4IHBR03J}z6Z(mNF841zU?wPe%<0exEtz=RRYLJ}3@
zJ7*iE394ceE;v>}avx&+Uwep5aU91D+V;+7CyZV0)<_T0I+*^LcVT236xD=IT<4(|
z0!8<?8yGw^^4+{8kQwHvtA7->0mdy`Q9~J(+?l{qyT0ynrOW4(fa}C-DWLdxm<$93
zc>7~pB6Wh!cG@Q%w*5s<|Iv6H`zF^aoLZ|k`Kq5FSR6ilgx>m1XX&-C+3Ey=t+2$!
z=UEJvu7d^a40t5$42btDj<=cC_yecF&CRpqwnqWiHI7YT)M04LSo>7A#yy=kZ|f#O
zCmZRUC@-o|PUEg9k(Av{-&?_;<Suo(@#;}5JHPtstLiX1@F*Nr|M8E1eDs;me8!sc
z77~_e2_RSicnjdIx86$U&YcU_APC)n#gSk>YZ*K*P8$9i53(rtGAN5mlip_1sH9d&
zR9Ve(4TF%co=H?Mf{I0OFKDDd-lQeLbh-@E=8Xn{NOTMtanm=0S=7_|#))oC^?2C!
zrg1}hI16FU5FnI^06#FpV_p2x<(E5?_}Rd0!Sq&EH^Pyv8^L6Lywk#tc2<Us<Ybq$
zeOYfG^Oui^=L@&Xlb%6^XoD%hssW(^mJMtT3vF3D83d5%{J0hNj>C+PZ-8KgZ^~6V
zSM{SM!XdKXdtMAD`e8dOd|U7b;PXE^T0jFF9K88KqPM<zJB`;?T>jyc1BFGk_yb%H
zG)VJ&#v@5ry&z#uq~lkT<~j9O?5j1?r<}tnQGikoRZoM->-On3l*NkpT;gaa8XboP
z03qmMfd(%Ez)HPr=K(B*0Kj2@0na}BY#i3E<C!3Y`}mBCN!E!M>Vhf>GbPP~Y;dL^
zlT$cTm&^Bt(6(@p>+v-~R#C2;&07k?S{&Bo-15kAHw+-mG8jKX>O#{6BV6sYA(y<V
zy~Pdb9OVT)rD#bg4F8oBWp5{VWW!$oe=xT~+ro~4CX<gQ0Zr5@kc2N0%F01C3d#j7
zU6~h7=96{A?T@3;YS+e%AQ(hJ=tL(htcN9vt>6F|1%jRKTfHzZ=ee88g0tTT-uq$|
z))&GBh%U4)EP17P>|rbhI2d^Q+lcOZ-A-@{jDl!7Gr$lc_6LCAn`v1UwvQC;%g)mF
zxum&XFk`vAAZeu+G?E1fC0q-9&A2Y-gA+f_um#h&T&12*ybUlm*}LbTe?Gr^^2sNI
z&zjdU-WL*<f^$IP#A$#}KmBxA0+4`n07&b_1&Ji%^;Glf{E~B~d`=TY9dAAT3HnQ_
zZlX5vHHhZDl2a6~kP@_Sw-rnR1Q+HwT6i?OaRAH)Oee-$SnU|mu`mXsksy#g!(V8q
z3nu;T?YPFhxp^kUvjB5n@0jMs#_>*|IFht_fd`D!jALq9D?M{I9gz}-Q~d$UEWr~(
zOLlO~PJIay-nvTbYt#dq#CR=u`UmI7wd3I)&V`#>=jd|x-RJ{YQCM%g=k*tY=0DBT
zKZIlLtOAGw0>2kTWy2BS!`9b>f%m>=LdTA7hH(l}AK59;Yi0}qfSoqOV_yrH6>T!8
zB6KUoyGt6UpIe43(@G(N3wA^rztu&&R@PzPZf65n06l&Bw7=`FyTUUZRtF0}$U4LU
zI2cI#F=1YDnCMvl2fzXVjQ|5NURi&=v&dkCxmcL^l><&PuSSsOC~AS2l<~n+rBr3&
zOy*q!xl4iKV?w%^AWG(2!3<Z`HNaH?Fn%z70HlAcMX!v(aJ*I+!niR3(watylDE4V
zG+?|A%n`yvSS~PNiD3(Ro$W{oy}Xw`mz#0XAfHT=b~T6(OG%BupypV+Eg6+N4Sr0~
z$q&x_6TcIDST;MW0N|S*+@y!zvbCr6eU5m!wY3%YW@C&H8XOja0rm%Lt2j7lLcj8_
zFL$&xc$ZKJ0iE=2sn@Qp9_a+Rv)!2P6h~LaEr}lrj|EL5rtehK#gw#}ov4S->Ss!y
zGjpks*aqIVctdKl8;y^3Z5}c2!4d$1(Lp(d4*>N}%Eg1Fu^q##g5wsSdFGjX{3202
zjVU|8Nkv6w_S8QlzLACro%%I@Ugx*8SX!jKpHgoVU7SExuhPY>WfzjUd5H6WwShH$
zXqnJ9A#{TI10tVv!KpI`h)NWP#|F$dFu9IMZgm<j#&2!)7TP!z&HAw7NNZ2FfLdbM
z#9{qFDPRn)E=DS6&gPI>QpR1c@lngiT7#3XxgH@T=U*4M0Iqe*1-IOCoZkKD7Ttg9
zZ0KwJL7Sfz?%D!d1V9TgRyYg8#$p&?e&90$Mk5HUaZ6|rV3idB7cK|^Tbr-u<)q4u
z$tquxrWwUMmz(KqA5o|Fb9wC=GP0;gk=WK8llUsjr13Fm_h9~@2iDXNUgh_Kyw?#H
z-U7hdKQ7^50U-bufRiUr!U?FL6z2Mvv}L_PNV6g>6&Zn#HgVRT3=-J<EWygMDy6BK
z2<CZC=e2+!USeN)Tx(sYjKxcVYRd({J7|>9EWuRTTP*G2J(R~IhQ8hE2<PPx`W6T7
zz(YQ#p@JWh8fCn-_-1gndM2A-aKbtBTnP#(v-b@=v~!~5bD~Z~z*~8|XsXu&!QsQZ
z^zL`PoYwnem{|y2tkLuKRu;NufwTxfkQjCi>>o~^boB7sx9PWj+XZV4e2wCg)TnIx
zBntEhRxHGlK&W6l{)#42{Kpx=)Q~y#!WsNz9Ljho(sKEssH!32S{7;PvZ2q;SQKO_
za7+g;0#Cxu06gR+fMtM%7XZ@wfAFyU+Sk5Dr%s)MBVbrxLFad77!Qfx(<F{%i%6<b
zWb8@OD#_FOVulb4AaQO-WeqeRjG`Dmp9gQMB(BC|+o%>8_~Qig@Y*XZ*X2Bi9P+_*
zjBQ|8PgT1E9Sv}R0PPdZ2Z5EKRrc+$&|^mtA37~*a|e!5li(Ww2fn-|HQ@tgA=OYp
z%h44h_i4;sQ|Kt#SHef;wxmx#qbAPyLJ$Q`E8Nb0170-O<@8*z6U@Fy9!9oE<sLq~
zLmznWOVxTmm_GiX!Nb!Znid#182h}jE|3<?p9KNr8_vhrJG|i@qVunA)7PGjF#-_k
zI))1#2`)@Qz=bah&auGZPvmr5|3V`frvw)#5=_`DyipuImqi*6Mfs{|u8&-UCouJ=
zNS~>GuX#hPLkbMwJ;2S=r%#9b4&Q>3djI|RXQh=E9F{^Xh5&#)mbgYI-*wkrKJ5Ys
z&k%%RG;&^@Y9^2;@a)97%aOlgpQo3hcLY&O=dPDi0AnuB<AqIA?k0#O`NZJUeX5@=
z3h^(tK^}qWgv9I8)7YL#2>7$M%7=0IQtxtDn-(6Rd7e9WE-(#vnuB3X+g^#jAGvt+
zG`3@2zH}y7XjWG@I&I*#AONtSC=YXrz!=AYs)(p^VlkJtEvCr4o7#&f36IJFR0$6o
zIKwz4&o?ObOmZ8zJV~edj|`Di<^f+UCU?H(GJW{5=`sD8>9eyR#$_+ge#nn8gqapj
zmdK0W0rmzDy~WeL_iTlwEKe~O1;>Msf+oT{V|mhBJQB>`NSY*LLwLdbSy0Y`GvjQg
zm98-4RCL*`MVVH|%knTUX(efa_xpQ+qY`HsU!4?YUEnp2ccQ@pfMsbcPjhoPa_yqS
zGAscE6vJ`AkggC4xa(|}-i`1cjz7_l#8Kw`p7mw{S*sz%*j<(sbFE~_zS4+d5|KK$
zmCDZ5jP5ais^9sV_-5gkigw6K7f%FiIfI$aaroGjsAen!>=oGD4*_7W4rJI1O*@mY
zmW}=SYhhphy08kQfhq$^Hg+Tv$SVGXF!MxBg_Jg(1=lQlx>8-%-WgxLz)Pc?a6OFU
zz7CHf1!eVy(}qve1X)NfRP-&P`|o?TV=8B9FTkdKFmTLw{DHa8V-eu(gAgF#8!#*j
z>>b|ui0?GrO*(s?qW6FIY#jP;l7+so6wv)4)HJq&mN@!C!O6HRa4h_Tvb?6TNf|1p
zddAPBo%@(bTLk!5(?-ToC+DTWUCa}&6huxo3WAQbxOG;Ld;{we07@x-u|y3Ez_P%?
z&j3hUpZe6Na2#;x;Jfd>JG2>Qmu@l#^FIhdXLfqMmy%t;wqh!kT1`>hi}od%I0+3Z
zKchkfxxD<RNTnx9CcXS?X<beV3v4fN3VA0fJ~!gWJ2oG9m&+X3|NeosegN+;p9NW9
z2;S?HFfOaBw?xyYkD+s=2EqK_TBsx==kyY<<oGgXkicDQFVMkDNLii8NnzMxj1xjb
zZb_=2vVailDOM{lpL?Gy2q}MN`YC_1AiVvpXZOtfCE#R1Fmq^LSRUAmMSueep>NsI
zaDe#$I|DxS7=>jU__rVc5TDZ^@d?48;x46aMRT*rnJq+up$1$M+YG5#S;-5`X51&A
z>7dm5r!Uoc-BM4PPRpeNOHE_6Px&C6nD~0$5yY`*o`3#%hvjL8Ww8J(O&}on0Cduo
z!x8|Voj7qK(@zKqVUvn0gqCrqGBIg`Wrn!dpCwP1392%m5-G&wGJQo_r3C53L||b=
zBCEl!#vRORanJ2Yyz{E{1MCejZV(>v2ZjR8f10_U0Zao-2h0b7c~-`UBI74dcFoX2
zF`pM&=gQ06E}u>1C0`a1oG)oPpFts~i>xeOb;C;Ab32__zEX)<JaFY(1uE-4;O&Qw
zR#rVd_P&?tzEc<IAb_>{y$S-zhp|Rp5OJ*G`oV%W7hq?=M<3tqmc2IP>482S95nHX
zQN}RT(EbtdiFaXS1;-VE)_Y4SWtw;|Euqd8Ok^#+2&`TyXPkzNRe-z|1-zPna=UDv
ztH}b9c}fuPz49jkFS_H7I~;t|I1=Epz_J_%j5C(qLk~SfEC4WclQ|y?Z09i(TM5}p
zz&dwk?!H1G=gKOQlSG$uEA77~uB`oN&95L-ap)pdwy*<)wPkFVf&dVUvpxfk=HOcc
z2(ViJJRat}9S~;ZTW+}}X_703j1(cVXz~JQGVx3Iq~G<8qh(O?o`u??Wr-C}Uw)?v
zCeo@P%k;KRXX?r6ZGD2ocK;mew8jsA@Gt1LTMu62mVyBL2;g@XV9YW0=ojF^;2SU>
zKnQrxyLV^@@ho;W!|s4$1&WrhaLQ#T2q3zp1c`o5SV^^DDw#CrD+(LWCnw6Z>C3vl
zp~}vxSdDvu!=7XTZ2u}HcFIH81#srf8B<Ewp6>k+3GhH5k9B7S@Hh(q$1irB?#+9Z
zAtQy?&%(7ZO;Lj==4Amv;3U(Hzfr=J4kpgD!4lD2HWh8JNV%m=(X16uB!mC}6SH2L
z;2Mdf`FMcYz*^tDT0gWHjst*oV;Khk(f_owZ)A$2ux{wH*1*uJsVsLowzP{iP)BwN
ziwPd5Q(9GT9j<}DjYe=wjKx6uy-cmAaee!3TQi;g4gw$uU~Di(Sf0Tc&x!*OcOo1P
zdEgs>d;`pedtUG8ZEx8Lu>m+JSv27na=N~DEF3?LrGgf&ctJ1avZ`FtETU~Jn5{`p
z5_2-l-<gwEol{oaRvy`gHbje<gpQ6l;0}}l`~1l#pA5^^^H>zGH!QROz=Oi;|D6RO
zm|42X9_}&WL736$BZClmxSW3GBxBd8l7HEp#Dq2%<y<Dp+VD>mBk|(j#+Q+xsp?yq
zxT5rE`adMB51NSQ(gyxN6Tl3>6rjn^tM!8+fce8Opp{22D;2F~Vw{G;JJ-gBVht!;
zKeK7mYcJ`Xhe(+vB|Y?}&Q{)H*(y(+Sj)GXe8FZvy&hE}(Hrl(5VrZPu3kF?<y`~t
z6@WEh9;VK|0lejK2p}{Y-u4jDn;wXV2H`LG12|#DaxO%C+}^q{g$hz_Wujv8k7fB>
zUcq_aq<NCIa}9RMVJ}e16ZJU?(GXf&O5p<lB~>f|EIegiXaPWOT|#uFuG2j_89^7Y
z?gHWrDGV^_#WKT~r&~dj*@v0DPG20KYT-aiuJZ_fQj=@71=2H||Kbs^socCcK;juy
z%w_-$Y`zCm0HL4d{mn}RU<gP*Z9AacisV^S37UsaHxK>MrhQRILzs}ivF=uRZB4ym
z>|R@@xNPTi3#6@ss5Zk&c=MaiUy-xll>*0b05gXmFpH_P1pzn$Tp5-GX2ZK4ah-){
zGg|^eq`*mB1_17WcEO4Xr@<B|i;<H8tN0^+rZK!Z2h1eUIGpZ#>TiN~RXP?3I;AtQ
z3g<gs6yEVkwR@@Jdc!iD1psr(NIF(v&t<pH*Gp2RBZ=RK5N6G#g|X{^LDVaSihHWH
zrP4O)l@U3rmJ&dyZr5~6!vGxA2d27NOn|-nv4)H#f>}EPr2mL~mad4$#r300Pq_t0
z*cznUX@053vz)f4+){eG9?8_TZ;TO^%{8?BN8WLoUU#Qn|Ggd{xBr2uL(|V;!+QXN
z1M*-=V4n0hJ78zPFMh6@W0!#$BZ6@@p9uivKpMY5F*d-~)~n%=tG1OiU3*f?X>!ga
z%fC^^8ifdOR0{-^5N;-ookQMGSAGvFA}s*I2OtCj>I6ym)KgDQ*mAwdu;@zw*}4e}
z06O>XyYCJ@0H;r%4)3r80N@X)iQIf3ELWv|G9LcINlGmr7)yZq2D0B&YyI{<g!^@?
z;joyheO#t6k1fGc({%yh1_Xe4O#aaLp!GvYfaW;wc0hm>46}*dv#{ZXCL_Fzrxr|8
zP3`u&O4oDUHiIS;r>aL=0%Td!>N%X8{T6}wOJEcKyw(8V3Of}teHow)*dF-N4<kC@
zR&@Fc0fM6S1ncH8Hh^{rY|&aknKPsWW>0Ny-@<d(NJl--ywSkglv}@dX$b(!&an*d
z*a86Fgy(nMaff&8u(05;=t}^(_pu8A+W?<??zwP}PS*|?yjJ6>t(!>gSrg)-{pkFT
zgcDZXx>7^VV|6j#ye^2xb&`0u8z#%j>LHRI{`F)K-?#8`=CuV7crbH}%RDB32noEL
z1f1H7^9~D~u?}yXkcf0xDALv+1wOBN7V*+*{=RW7>#~}BDMw%{DV%K5N?+RHwVkOf
zd1wdNW5>3;_5PQyy0hPYfvo}1=rPu_d;^dkf+K(h!vWgB2M0|iTeOq52l@#|QKb+g
zBSaDOWm{mOdl4jlI+7Tdp33!r`fVvQ<R2Bh&sK#NU7q%w#CuVXhXr6lCLwi9Km1~u
z8o!<PzZ7cOG6Vt&K^5Rx*?1S8g-&<w7%6A0)<DWFQt7=DPjd}AttECOh-zz;^+uN3
zu+$bQ;yt2hVauP(p5!u5L1-_$QuedVaiaoz?L&hH!Jo#=IW33&-Rlvu#R<?S$3F^Z
z0gx039Z~J0(IPaI%<dYioJy~C9*ajMl`|!jC}-taLy%Ozq7+X>IPrFam8Un{eW@AK
ze-S}C{9){;F?QZ1fIPF(-t2JaYaG4v9XrV>Fa`@EAL9dHSIo-FdRW%1@xBO~B%WI4
z%-gcdJrVU#K5ey~P#fR5)RN0JDY_SPPzn>l`W!OA3h?5KFNRc5QoC!H0s$>NVP1Fv
zfEgV+m81-qVlj6U)JJp7=$Mc#iJC;0e+wa0>zmG*w48dRn7YeERiLBi-<m+S#RPAz
z@i(E}$xiT|C2!kNn1n57Z~~0LW_+yA&04!<$3Qms?k6m)jEfKOj054>C_VFrOeK6U
z+Dy6gL=`cN<w{|IMN};yl)QDF)U+DNv(SsRNfKqJ=%+Jf!rAYUcbvJVp8i(~?C=MH
z4>$l^m<GTn{0+{5)8WkzI(pLsaVcQ8vj*%Y=Re@xc)XFe3#R^)b44-Uwn|m7QLtkn
z5b!#iYf{6;{A-91^=8_#<4aD0ksIHQ-{gSqlOmd}(^vvnmN)>*-~<@p3t#vGe%z^3
zr+hjctP{v52nFoe>{12`;QM*68PGzLC897j^x?dBnZ-LV(uh3SbIOTwYw6S0!^9k0
zW>!PE-w|m6qOMDR>tF$(e0(fS__NmgVe&^j09Xj-J!ZBK0^OPLfgz-MyBJ%(C&S=t
z6J)_VjXNzdLc}i@zh2<J!Ww7llMX4q#v_em@d?+vvNEBE-g-7@|H}$s>KK2}lUWXb
zICB6y1mGNqbVxry4UazJy8hgzKmYSlq+=Zq9F12J&TMq^^kUbSM-whaY-$OsddCyv
za}s&^I=hH@L2~8kti?v=d*~?FCFf6RgUF|7yU(V6-~%4se*5it=LwdpRq?dA1bWde
z00^ewc?kfK01*cO04Km?0SM{vPqp^ir+9}ysRi{(pI!;3vP`F>S5v-Hm+G<Cc_P6M
zTWO&EuOz~vd>IUr@B`DAFfa3mCIVs)0YC=yxe}0}p0f!bLgPfnGcTe*$|X?Ej$;~k
z#+}wMCmib+0G*H{q!iqsMc(F=I=;-CCr*g$_ks7lywuE}fh|C=0?Y~vjI_Wp-YGIb
z8{YSxPPkg%q}`pU@rS?ezm?7k04G6gZ!S<XV-EGJUQ6q&nmhG)X-k(>(5R8Vp-6fC
zJXASyy6ObLhZ84G_?KRK$+HC@Hn(9;UO-rM0iZIYAPxY4Juj7)ZsxnLTa)!|=Ikg1
z^sz7Guf66cg&^ht5$PRkfBl4=GCq3mt7qFG;vZ`uvVE3lJ0Xdx3U5k)CJ%zld${|6
z0|8;|vEF|WC&6JHvG#AB?t<pyDBxfK*V5n9i`Mul*|hOKkH5{<z+)vyc+y6EflJPo
z(T3y4c7n6t@nh4?=F0@^@Q1#JMmKG(pJPb?=Z46EZNSceZ}|u;ESn)@@ML#4`o?rP
z2IuDJ3&0@oIMGJMkTdzy%$bx~OKbeHyj+fbJq^v((n2bJ65GPG8*~S}2lxUW#ZG{L
zC!TmB6|?xXd1(#<f)4=9E}aD+J6teuPzFYpNQ9{^&HHIY&WV4;l{W>bM7~baNX|sI
z!qcvym?StY4URIklq#(nd>=oQ&u_n30h5rm`DqLt>$be!&w-;33P!=q8L!KlWI@La
z&0+FXYbZ8FpT_PRdHM*BD}+>71PcC4YeNyFUwh}}rE&J#5AadYvmE}|hkz|l1Jr<>
z0U!FngvO(-9MlYff_Q6lwgT9#0F0+n@2Rf6GRH^N+)=f~W*Wn{f`z<nZBJ0Y<sVo8
zu(cF$*IjqHjvj^Q!)bsA11_0Z9ee<I?<J%y-T~09$3?V}<JdwRCW4YiTck+lT5pN0
zbA3EnHr13x#<1Xv3CRc)yHfj~A~agIa`UqO`I$eT0grXtR`|{<fs9`A!pdZ3=_rH6
zCzGf7T-8<I_o9xlL}-hEpn+3}CIo3X`@QGU(@W*-w;uqGKoH>FA$>3i0kh%cNk{K|
zIBpM&0>DnN2EZXOTLBc(x@zv!c<I{ATUrb39I<6_VRoKPo1<o8NLqq*e<-DEYimJj
zO#W)Ej0*<~zXX7Pj5Gyn00#-e<nv)L`UqhQS%frKA)G`C-He(bON44|@b0tHi?-rK
zEo2=-`!`9wDxaUFMl`CsSzun~FQWin6{jsH)+(I~%K?GiOS3v#oLWNXtihW~kaRj;
zH}$iG20>8sMpj2{qM6da?V)p>0Prf^EC4G2ul4h}?=2XFfZ1^WeU4uD+U>X$Fxd|N
z0M3mAo+5m(rL#PBGF0qS5_#8rmEZRX)|Q@In%@*8g!+}9K~PLifI%sRb$We$-D3#=
zR%`DoDP`ecX_f%+`<1VJ1wUcEr8<JqN{TIlYtwn(WVm_lsa0;it|d`EMUasv2e05X
zvdnZHPWwZ=e;*s5wvb!lrGZKdEnZ#)0I{8i`GaudZ<cSsm4Xc36M?Ui3AvA$xs=O$
zz0za_gjW8Fp}UN;j(P-+gzq&`^EDw%u(CP{&VKiwx<EG@@LE3$k$oVAfWa}ahOhp?
zCO!N0OP%JQjt}mv01yJO6oA9dT#HdR>9%hO&xKxk!^q{lYL?rYTTP3D&g)G+dm12?
z0AK-FilYE&Ndy3NCZsCt0=WC`yTfUKZ~|oMD<;A}1w)z+Q2{C9Nb>pSK94Ge(3M#z
zkPckZt28tA27xD`p_2&O(;~8vfOjs3dRDM5<|KZ4P3*<@Imu}9BxpzmHz2U4H4pQj
zM@Tp*C`N#?jmQp(V)iM3P^LKvFP)+?qHsz4_*w}AS^zD|kyr0qkke*D9MfW*|A`O1
z6pr(~Sz%rcpVJMy12lZ;cP`Lx{N}5H>4y^ocVldTAOMV30?Oez3&UAJmG{1I-8_eQ
zmZeC8?DIvw&`-HyP{78v5E6l)&*j8HudN1BXcuqT2LQb3nP;BCc>qhJmo5BR0BIl;
z1$*Iz7eWVjlg`5o6U=mpq4S{1b=TqrTDUBmOkI9%M7xw$o{P?0x-Hy!PEpdbRwj3Z
zc*@$^_6|b+9_DW)4m&~4m~+`TfosVEOZ;bPu<Z%}j(lv+Ul``kjGXaoG6_2X@HcW|
zJu{D2N|;Rc>7kWf3qD#6CF6h~P~Iz?$cZgJXY-a<#IH25%#qt}-J}nH@WmU}*>5kf
z7a@R`4Q}>eL7SVNKKILK=;<%L8XW%+8kjYI4*?)Nn*`dowH^C3Q%GOvXk@dzOy$Lv
z+ejy49-rrrmkN?p+=LJjV+2gnZUZXsNh@xas`4C*BsbQ`y8z$=0KhgtD5>B}Vf)&!
z3*gECj(+IGPkriBxhGdvR{WiJ-WkRNjyk*z(C2>&k<Ek7nYG|XIRcd+qTURASzF~0
zE1u*T(<Mf=69hh$De)?+T9MI*^)Gx+@QQUpYojcn2x%o3;qrvYQ{;vRcw}RuA#l$$
zbN;o~{AK(I2jVaq1;Q5osA%_+$$Fu|=jXXBC-7L;k+pjfcOY(ObtKVF<&#>GE0HT@
z7-_aRFuuXu|JeIpx#6At_5$+?0l-b*=*=89`24H8^m9M|3Vr?0k|iYj$@nxcSPSP<
z19$Ur!FJ9RboPmd3x+P|iX@38OQjeUYp<hnrbWrfx>i;qnTJT>I`b7$JBq;XVF5_X
z)PZL$gJpK1BLRvS0KV(k;QWU(mvI^(d;mI$+jY`*?r6ony^j6F7J_nun|Ld=dSOmw
z>p+Q#?u!*6Y3eF%F*yh=t_TB#zx%K?29dGK6UtFp`8#BpYOO3MbgB7^FKO-<I9@+T
zZR>+;&isi2K^u*dK;jsWMBPG0b9Lr|mn4}oWd+_z9G2UhBqsS%jXb8uWvS5%%0>;-
zXvy>Tx1Oi_@4NZd`{w}`0)UHvzNmq~Q9f|!TH@j&z{@Xh(=Yw<S$g$i6n=^|40B`@
zoc}0=Rgc#cc0X-*R)F=jqeZY4)~K>n7Oeo;;#SRt@~EJ;r1x_Ecp^z_4WG^>d#QM(
zlC?nZ(&1qt957n|y0Kpxg1j^qfX{s9Gg-QlaR7KbU}2boPg*TmnwL@{9hArC{P|bB
zFWQr^81NDSo9+3nFG!aq+l+OSU;&5%fUDlyNP&`oOTsPJ#BNmJbJ-D!wGXZb^9LyF
zkWU&;X6O?k7?M&?y!n&3D}{o%RFLvy+@eaFkeZW51%2<56(A-qg{*YiKD7RU`A>&=
zEdjt){3#!$8yx=lnalLqUpNz)zh`rO5&%Xx+I%%={P+#K01{&2+!LR+2$J{4G%sXm
zmxNtmDPx{YROq^rYvo+<zFxXz>sg}TxeTY{c7}xoVQyghj2&ngV<@Antb#{j31El=
zZ~(xannD4qcN_^2X?UxSWQ9x+`;Lal1lLK%&d;1}2St;RD?~J9&H;zKJYv`>b5pJ}
z<=x)7*qK;2L~9J)c9uyb>qTmdG>0vXsJRco@(ycMeE`OPJ(z!hgb#UDhK~d$r*MdA
zRG+9OpDPhCD?)`h#Y8>}u@WbpOytd6AQHk!5jF>BzrBGJ0;X*dgvAO=0xDAJ4GO>A
zt@VHLSI^Ss)+93R&MFp}|2Q)LmGz+P;1~Dt{Wvr8arZ6+Lv#8ROD`CE76!a8v|129
zDIiB)&GG&F)3Ctog_%DqcIpfDyh(2zA0N)V2ulDAg8*;<0PUU!JDmmyM+?vI_!pB+
zYEqIQ=HVhE5K=)DX*tN6RB1%g^e9WHxeQ-4{q#DOlPPzu)v-AW+W*dGHxbs9bwF7!
zyp(A5(nQ@!bVCA&E8??HWByEpufM6)WeY$!#+kga*wjZhTOm_g0N6>CsNh3hu@nNR
zlS@!VI-s%}LF2qtfR5g>L+^X{D}%G&9)X1b2%git{=pz{?C>-}VVMEWeqa1I=jgY8
zCt8kN)}+DwS3+b{Y;lgve<S?D3Q(AT1e_6ovrw?yS7^1Mp{~Ybrpeh^n%{~1&QX*0
zOQ_4GYN9yTbhK7Q@>u{_0+9I@9TxsHK<>mZeBleB!%v+$6%GW#VL*_+AYnrQP!=B{
z&B^T{Y8i-=Hf6n3H$M?&lr|;AONBPe7A9b3>N~qzVKT&c@vb3l+P3I#sq3H{4&X)2
zT7BOn0<oB9SHM*TYz0l`OnHb&AWTYb^JSer`-J!+6p|)Xj)eT+w8rj8Ybu;3<ENLU
z5$U@!OK_0SH$C_g4X{68uYZ_;q3O4rQ!&0sgCnZ(9s!1K6o9ke=YHu7y>fax3cbnN
z3FaQn{qYaXeKcMR&dzQWeE>$mSymM0aAM$UI7S#g0g?%OvjppeQ(<<qY6#ddFBk8q
z+Ff9-WqL`^twN}l20H6=+692q09_{p<Yj6T4RB#$;XPEi^PhO)3CGeE0s*nvg*`D_
zd@wUiN6Mti@A?EZt&O)CyS$b(*Lq~zcJ1z#u)i%Y(P?dJNxIhDyzC%7{`(j^I|gFx
zuV#>+nIHyP>@cRrQllgq>N=+p=`B%Ws}|qeJl^-<zb!Xq{-y_CrFTAjmIk;o0Pev;
zHSGW)78IbU+$cK(f~WsyU+I|t4)M|DaE4{3k03wL_M!d5>^~07e<dvm=sC@E1!)iz
z4Zv2f)-tE2z3G><kdVt3c`Svr%-=#OyMJ!N`kc-Jz-fTtnX~}-CD6{6#sbhW{k#hR
zfq*y?APWGOF1>7W^7+nJQg<#&{>((4zBC~i#$+=&h?R7F!BX_B@}zXiFFT8fO3!A;
zWivnG#6A$#K4OVJ0>OngwO4}50Xq%~HaE{@jR)h3nCCk?my?-ul(c;^<VmdOH!L~~
zQEQn{8TlT(U|g7?4FRyV4>A2;bNgiix-2V>Tus;yj-dab<wI!g6ULDSLMzfN6IdL1
z?tj^$U;mBs-SV8z1Ts7OLG$lS|6$)hg8g)VI1(81Is5VpoVRmed;8U}J1Q)XQb|+W
zvs{zD^33wow#eEtZCi{{qF4Z^o>%A(<~>kGJ9^~VQ5iw&mc;_F@B%=RqOcoa1nCOv
zES)gm(CJuTV<vFu&~2nyG*OF9N5ZOVAWdXzv?@e45@ilDB==LxwI@=pER;>emp9LL
zlJTwiM69}<uF)=Tw<YvC=gE`$cvkjOp0<X*Xn;w@=d<?#1Oz^?Wj}E8<ViXxSX<i)
z1P><8RjhSF;~5_c=FZW0MXcQ?3=o4?ggt2mkVHl}m{#BnKIpP6HUbVG-lg|EdWMc4
z13pBYSVkPV7O)>2frgK9n8x7wM9O9AZ;2yt_WSMM+3K|UTWD?FWg&OuR=bt36&iPp
zJ39PzT0flq!rb63fWT90fG3i4cYXa>Cj?+g_c+Pzdy>vFWaFEc0s${W6gsWxv8MGU
zk8Mxuo}b1tL4(-AldaL<P51-A32+DmbU+{j0WrV=P;awQ#{2jrd79=u(q6vzB7Gl}
zy{D{^mF;&Tjmg7*jDV1ck-sp1nXax=JDx-}Qm#i8=f=J=mJ4{@e;NUWfPi!deWdUH
zA+9626ka+KLv{#_f~g#*90;XHH7R5%-J5hd2oxtKc7W1^)t3z6{`j#SdhC6t!|woB
z2Y{2n&1u#Y;5Kj^&aKM^zy6!&Lu5YyYy7;$51M~8`7`rh%ijKl#aI$98Ew3V<qXjR
zC%ZeT9#_#>{`HycxoXL+%spNHbms4gDq(@|ok&-Hc^3eD0brfRE&yHv7<K_13^dPF
zz!Cs{d2KC%W`s^TkrXmq7uP4Hxx{oM0<d}2sTBBcL^1%2d{@AS^{p8Bs;_=6aTdp~
zh=gMM3Ec<`Ujm!;p=I=~`-5nLc-q4^t}pO72#3z@Z{%kCWkdl_868OqjJ$+Z-WKO4
zA}eyXuaO+ie%Rwbc=}&`0B%lW@X{Y(8P<mZF!lf9=g!dY|2}*AC%TuIKYRNJ?SHfp
zj{1$ve?`q@iMtxK#=R1@1YQ<-;yY*PWA?QojvY|b#+XJv7Xk#;mR_4*=5tDD@Ilby
zZR+p^fL|H_PJmbffB>)zW4!Rk0ozW5l!ZuuFuS1Ri#F&?##V8zfrFZODnv`kBhr&$
z!hGMqR$Xd!VVlcpiu+%JP)h04*Y&4VMQU}GU~wY=2qM<>`;P3zG7r*Ba}2yLkhcCs
z3ym~CdV_y<+$a&P##m+xl`vm=BV4MYd?l8~r=Gs)P0`tJ2<v-o0JsSpo#ya|B>>?1
zG8`X#{`@Ze%+I|-FTAiF&}x*le&YT8So&Hy6qr6}TX6V;{|ol{i*j1{N!7E6>nIC;
z06D-~fSO@5)Av;!(t<{&6)mCME)TTY$JAN3p*4om0?@KtFB&Yo4*->s!Mcm@)_^^v
zYyd(ovH-w36VWs+xooB?3+Ya!l^R%nVi2h?WQ!CcUzSi^a129`7!DJgjl#;yL~4LE
zz3ryHg~zfffU$%x$FXC_dedOtAHu=3P{7v%^r0bfEI{~2WBshgl5%QbPLm}~i@$n5
zb9iD<q-70w+go3yHx7~gt~mfl;S2_x?ek@Ur3+4juoNun*1&IkV~albD`)7^#ofG=
z6qmbJvR8lbbs0r3|KRvK8l^N;WMWGAnNU40V-o53yAs!99XK?IT#5c!>}5XR=d#Jn
zfy^=FGcSW*>bhmy6#r;D8!`=U`Eu|!KtLx1EWG5s43Plyuytk^hg|@CpH5v$L(K%0
zWR^-YWV9(s+JXXACE0i0AYZ9plYbwgT#ZNTb&0BJo~H2r^v<HAoD$0iV3shY&GMcG
zEUe>H5+aM!QWEUE#mDr0Qk$1(=*rky9Yl?<w@(sYJ6@MWWWSdO=6@Xl4q|{le*laX
zTM8D@O#GFvUJM8NY;8uLr|8=<4op5cy^dB>aGygd*w1?20T5#XG-y&rZ%=(13!h&R
zmH~Dy>n(qZzNaURT~ki~!%1nXmgHm|B&zXZ=|b1dW6U7J(}$z$mjxC+5+Gl5r{Xj~
zeg+FbWrhi3We`U^D{`HwtLM5;`g7!{!YYG5vD!)476bEGd^m;CyB>jq1#oJIHeOc1
z*g*s8)9fK2%yJC8uE3g3I0l%N_!J&kELCKoQ{>_6*4kzLL_Dy=?~64F>->YW-(mo8
zmYr5xu&7G`*y8tVzj3bU8!q`db!PqO<sbL=Bfw8^_8Y^3F`|s+)94%Z)6@7a(mDtj
z!D%|t?n+nl^=!$$S}h^kxvH*K{Y_|_0OG5}pWu+rJ^<YkfX5a<_5twTFEqhkcqc%<
z;C+>d0tjh~gD{ZLjdKpEI(hSqgis)$xgWF={>t0RsU?|kErZ6||7f&cf01u~W}E1%
z=I9j5DiDA$!t`$?X$jaiHO(>bdP9~oM~+Hd@%RElK^6#=KL|}FDlr2Ixvya{l8~O=
ze(QG7{0C>h#Q^pPfOBh~mJi&<GQcA23P4!jU;5=U^!s0pmZMQV$!`?Q{b2l~@p?GJ
zBnSfIwH)y;><EyBSq&V}6kdtMIzh?a(NS1TcRMRUG75-t<*yMQDvJS$U0>;U8i6&B
z6;I)$g6zYBgYktw8ba0c7Qjazebn(T0I9?-A}qQOfah%hDY$~?P{1695k3GphBPCY
z(6D;JweOWxsj|5HVybnG`tmxH%UcJxcP@47*U58T3L^?F%p`E4dYjhN>)mvTdr@JQ
z*FVPu=yMEQ2ngeyv=sLIcL_)2!o46MUXCA?Yvq!{W1AD`v=-iY-^Fgt|2z$_SO5(l
zL2#hux3;bVU+|nQVb==~+3&MIf4ckKAwFW9g4ur*gxgWt<G&IP^WoodmnQCjid$P#
zVM1MZLL+aQRj1JE@ycpACV0gk(lk$jpqq{w+Xfr;ws?YXr>|BR#GFmbJNfPHx8IH(
z9`H|doi$)-LOw3M4}cla>;u4&0L80JNQ=o_7A6ElmKIn7?LmNiuLJ^(bP|Efwv*(J
zIN1%iSQ{((8+m1UE@^$Ty^h*~gA?DfL;$n?H0I8x|HTiOQlv&cfG406YyUk$X3Qg=
zN#;)`jh~($jDKMMiwm#<;J~0hEgy^@xO2UtF~0orHhuONPSd&bST=HrjwOfXaFX9z
zII=f}_1y^9lur7~Cif%&2T<~=SIT#W9;Pbs9l?b{OaQ;5=8|t%)i}Q|W;3Fe;%(<o
zAKC1qx1{eo;Vs;O7eUi!-o#4)%K!`S1CXRT&r1O5G(dmy<Vj!00U)*a(+kZ!;&vMK
ztYHc1lpVJUhRI?@qguaGS97`S?GzR^LuT@+9%r#IKIecjMZ)zO;j<tjzGq2*9si~=
z_eD4TGx$QS@{-Vq4|$>nE~V>8sCt5&NC!ek`kBDb)zwK@?}zq3z~TZs{7qZyXMcff
z?GX6Ke{v}}`<?HYe<Wi*?eCB4{VU`35alv5{<RR@50U-wzL3_4rxV@|OO|f>g7eex
zy`f~G4Ol`AGtEo-&H_hGJ4?fyR*)nO<@T_EB9**coRVZrJ4+-$c^oiceSO_?9DpGb
z-~j<T66-3s#u5NR0rR_LT62nFt9ew2ZD!EvS=73Oy*+1XA(lig7BC7@SYnPgBQ`ud
zWXVdxUKibxl3flykON#6WYwKRwAg+o3P1><`7gEUKLHaVX$FmPAw0{0B&|K_a1-B*
z0zhh$e-;v{EGxil+^zFJ@%Srr`)yk^z~TdI`F%nF<^mYXwR8yl?Ju3DfBmaxI_pF-
z-v`IlaoB?AI_-O$qWZzvZzY-i!+|~=_)O?dOz#x#G9%~n0!WfKq0Xn}hHBSL_dgsA
zx6=VZrDR{|F4H}iXuHtby+|+2v$;)7`?Zs}S7}Q7O1oI#!HzWW9*#x}zcly&U<qKE
zEC7q{1K?6X2zUSe_fNV`A7g(lfPDZG_=1XFAqz`_wY9u3am_xRerE|X3yu6PJ#%^e
zA;Ru%G+n_-Mk6;@<M;!UML<6<UWsWhsuw!|<l2CkWQz{a{^9u7w?CSX_&t97I4uK&
zsDB;to=kA?iGB`XjLbAhDHS4(e5cYB)NrZH+iu&Y_kQ!4A-K<y0knKrwh$wrCH&$!
z4kCiEa5en_;OzGs|MndH(H~#Tp`~Nw%5gf;Z!PWXUybJe&J53tpF$iHUmWZ5nb+wb
z=ZcPEEta5OnSM^hu2(0NoK!4mxOee90Rmmv4#EKYRk6VBj2t29q08+mblh1ckZHd!
zS*gr7*(Mffm>(b}O=kfJN78po0GR;80>H{@#XlAh7CsUnM^l2AC4QfM_Sx`@br*Dg
zcCO(8A|7L{X6L~)5@8u0SJEeGmKm=X=A$SqOG#Jc<A0O(AGXMNRY{%<xm<3l6YW;!
zrzku)Q3uKOw&mdc0bnwlX4(hyhbFY>rhkSkm^mszfgtpiJ%J&e5<D5VGTMmo)(NZC
zru*-|L?3+r>4Et#Apoa2Ku`;SKbRX=(-#1d{XY8(uh1X;5tfaj<uGXUu4w)ttZ(r4
zk4O1}hPkw?6}eiXuxz0u%CsvhQ$l&knz}Hb%TqGdp+BiC7K(!-V%L`4NpwzhP8RJ*
zS|Oy!P22`YTJ7?#PUp(AVwn@X!3ySvK0^tG!h&sph*Aqce41ndSon#$@RSzOl%9O@
z$uJg(veYdBv^&)s!kH1u38Zqu>|^7n6$w@jvQueDF=8ng<oAiP<LF<kq1i2mEImu&
z&WwW*ng4haln-cs_96xou>j2<TmSk@`*4&(nx#6Kj<peCRmxc7in%c29!q&XTa984
z5`?PYu)N0AH$8ZP-aa_{Ej2&@fR(GyA#m3FD*-tB{p`=bOfS8>Rg|9;uOdYCTUm?2
zec<r7665(JtS`9|!F1w9_}#qxDO9LTuLQU<E21^l%nND=53ZCSkJ{eJ36)iimjieQ
z4htq;%r%nr_$;&+#-HAqCF1<fc_M97&HyJsD53HQX^sS#d;r`K2jB{TJMYt<{&e0}
zfh~YI42aJHfEgz2CC^H+SCVCVn4e`tGYF`pJ_*87&}+^S>tCUc$!;<auc$mqm~=AC
zWX5c{)SHxl7B+I9WZ}iv^6bI_G@ohA9oql09ZMVec)>H?3h1-d#%V(o73-74Id6o&
zg3Hm_?_Cd{qc=Y|MD|;DV28kITK?6r0{roxT<n<tE8QHN<aUHT{NohW4?%rWfS<VE
zKOW{2P5w@arR8JNKT7#<%2LXVm<0n?yjZj5IiLgq@eY7x0N)7!2n@va#yL_f2id_-
zDP)HE3+u2|;xyQN<yracuxrDC_oz%1&jLVzx?Jai#{pRQT>zaV<#-t&9kk$Z7?9Wo
zC@_wUW=$kDea$HIo`5X#Ws^LK5%sEgt(@f~MX&%u?sjWd0da{`nd*pNwJbqGNZ~3K
znrh^AMj%=6btXQ$p-=nAdzeU<!SPQ5Z~coPG@~?F-XIE(s65h{@l@&l2{(=$p3q~R
z$^Q-aTp2@P-wRk-(AQuv5V-IwulXc9{4Il3X&Hf|Uh<r<z;ocBtN>4c={$Y$-<<2}
zi(!37!Q9VB`9|ly__vYQ{=;8Nmmw0ca9LcRv<kv&-c0L?(j^$J({z^|_-f%o2mx3I
zzyU#|<qa<1Hc^QHj#T;1Rzig?ThcD+ZQ%*=<DqoI0>FM~QqzC>>8JhR1b9I3)KgD|
zU_jk6faiG2d>k-3odW@t@pOv0%0Uc289Thvx+)?}8X5RFR+a#zn3|md)9>!iW+zQ=
zsIuZKF<o4-bhW@bCrYH2Qmy=5RG78x5AXXN0~Z}w5Mygi*6f8vl2fw3fWjMrM_eFq
z^w{wodha)%-E(9=FnV5Y!5^PW2LWpo4y1DVPeKT$<$X@iA#4%I25G^#aJ{I9*DMoY
zE`V`>*|)5Ucz+PVfe_O7_rG!>)WKs91Pjh}=`=qu*Ra=rv=(*&u+_3yA0tMGe}%7#
z6BdBV={5Z}>6fs;CWC;{%t<M8xP(PwC+r3|BrKyTjWaaV>-xwD4AQb%oCr1jtOOF#
z7WT^RXjVRnmaWm)1%N+n1N{8wKhIm(-6C577G410H~{EHSODN?fvpzkbPzg*EMStB
z2>I7p#Aww;HRF$MNI98STZAe0`9wswyF5n~vA2?|MTvrpl`K;sqvRP@YF#woF`B0R
zgSjk=<DZ0VR>bc}90kmjO)LR`23TUH%#z~WJ)Opn_5OLB{lKs}&<vP1`Vsw!KX#~`
z<%!%UoS}UzM4)`sgQXk@0vJy)sAU|t>kTYS^$EsoCD`w2f$;SIxo*AxmD4+U?4yo5
zirW1+didiPeo8?g7_TH}z<yj&ic<Q=RUDY1NWdx-wqg7>i+rAGoLk8PsueR#Z58wO
zB7rmY%4Sz{Q<Rx^;)pyP2LOOw0B{0?e;S<Bu>=6Cb>dr#Or95C0KlN9C4ew)I1H$p
z7`anRAfS+%?C@u4mMqfL@1i!RrPA02U8+>vEaToyTUMl}D^c6Lq`$NAP6!Vw%%w{k
zVZ5hBKMZ)*+Wt~&|H2cRmw`rUHvr?7BrGjDX~+7_Z@Lg1{-y!yVRJn@R)QdbaUA5f
zBi8%@EWn{|UQWQ+Aq||@24P@tz*Yc^TWieFr|93k3IOo*55aw2-Jy6gUkXkI3y|Z;
zeo6BWe=9-j_g>?y2FWQ&Yv{%VUuafAdP_@fsTD1xVpZkqQE1yv*r}kJVrX@t-*S15
zl$0H}5RO!1rp?X1cESPxobSdlyh0?vPD!0$7eH(a%K(D~;EDk2DKM*al9fMu_N;s1
zg%^SaAjOl{Cju%2H4HwR;d#v}zX`v=ZHu2dm-m8FE|NKaIh(qy*T?*m$!2h{iIb^>
zg!Z;^4c%Ryd0i0LiF!(%JV)IEn*dmQyb-nk2x(NE)LDFpiZ;r7q#4)bAUUIP_-@IF
zg4RE6y&nvpPnbiWf{FB<q<FOe-zxX2Lm0p~vdNk)0|QJ4J`J$X3V?n@pYFv9@QrV5
z(dT~oG;MCh^?m@y>>snqAKU!WUVje!6D$M5{$6NWfxPbTCH;!&dC4o&C4t+P;!(c$
zWN9ETqesR*QSZ@i?5qlLRRn#Eb!+uq+6c7pA?UH6Xo1Hlf+To_<r}iid>9Y|?*d4t
z*Sdog$gc)0jR1h?N>4xibh9_(iE;_8@_Lk!0I5iZRB8D&2hC}-r63Y_#iWCTJs~*>
zuw`Xs<xrA*oe?}N(!0j9ELy}AP6{eA7kimY#9S=6k=OnKMZ=%u8<z-%6T3pBi;>oZ
z;@BNNJPGUleaHAR;|G8-^sOCV58&knd?(t;7CFpGXt4wI1N!HntN_3Nm5be)|5>FN
zM8SF-P5$uqPagh!jBgB6&J4~a5@VTMoAEIP;zd`NpP6JNn0iy!m+6Z7Q=TvlUg-n?
z2$_d-okSv%AGbo0Q>nl?vex0^L#5lRw{C0Yi$po>_=hEc+i$-;7tTl554!*k2&_jX
zpS=v|q+~C*%_I^m(9>n-7l6+L@0tjyy}GqfA=9N7>ggnG|D&*$R(8x<Cm9mgn(5nb
zkQR&_Xod<}C;5~X5!my*k9aM=_7CtHH+-=O<eedDr+}y8ASh}5;lsQ1!S|hMGJh~s
zM2}-Z3c?bY!Xj!FXcGhz1Vuqxu|$CV(S6?T1884>7GSJl?V07|`|H1PJ~00PSqP2V
zeca}k_Vx#p|3+f|X*U2BVN}y3hB!Z^Vr4h(6>M~h9_8RZ3HzO5!j7n7%9P733S68u
z!ID8@8#l>VDuwII-CR|frYhT3tDNPn;m$S75te8F(n~LejA^G@`Tije!2ThvpM+~z
z03LnxQ4c3T-VFdK$7~7)Pt4#Y!*4NhJ2tMo%1@lhRS=|Xf{AJ1{0GyTod~kVx*Yq=
z2?k%<ot?{MYn@KBNPx*PZA}_PFIsMv8`M@QD)<)N1B?R{Ifaj}yK|F1@}XCP5Fi0W
z-!lGdZR)=gfFOW2^PYBm3-2x%Km!X)Ex_Et9O)AfHn%+e>}Ox5FMs7#qHL}nWp94a
zqE&3~TZxQ6M)_Mwn*T^KO1q`CfLDr6p|3h0<FpraRa#9sZ>{vyb?W8M_%6mSfIxKF
z&`C$xN}z-bSst18kZ6x?E2$D%3Sd#nFm?z8B&x_u0HS{uk2nih_&5Oa-Yx;4^C2Wy
zM$*T0O2j=W!MgH}5kO@3z09?q6uUcHMN%bWeW=VxrXo|W6)tRg%Ul)>rbYGRegDvY
zmK}Iqkq<-RbwxkX#IO*Qc>l)xF4246eNK7$V?1F(2g770x=f7U0$T*2$>N(J6fQE(
zUJqa^Sl_;X^daVp?YHx<?$ZD7U;YI>|BYy2iR<}M04Ur0BJ+=de%6!LFFpMWJ+Z@*
z(|Db((%@}AK}Q+$JrTKHTGvvPYHLIctTql-Rz<6&l2i#3+E$<i5!UWWr6p5ByUhi<
zC7jS0wo1bSz$e!(11!A#6$8&sfB+l^%$@Dxalzh7$Wofqi02c|N(8mYV;fNPc_~=v
zDZwS}CMOjxQCJ_GDK!0-1Ac6&@tL>MxG0^(^1vbiGx$F9J~W;iWqnvj|D!zAVOt=P
z5j7*>diz5cy1!RdIbZ<i&YcU_%rtKlU||MU0DR+M4bU(x`X39*Qb0gh-=FI=|MRa-
z(jNbDz$|F?e27m7=^IS{hvJq$SOpO8pGf&P71AV_rWabTlkehnNiP?+_^q#LE_uP!
z$<RElUF+zW&h*7<u(DQ`Q0lSd=I@b93bI||J59UoRZBcb3aanstCLpgoN5V#-KAIp
zKrkR7Rk7nmUSA0o0N-HvhJo6ZfPTreYKQWGWrLHd!%x!s|0p-pH5hvDy(=xskD82K
zAY{YLya+Ij!Oz+muuQ<#9Tf*6VO)s-S;l5%WkMf(|Cvtvzo^Oq@yGspgo(wH)}nh3
z?+3uh@O@x*Yz-J70;~X-7kyR$^cnk7Ah6H>@ISuTX`XS}C^{lj2;B?KU&ZqejDIDV
z{T;96OD#B&8%;etP35QF>eson3ak;{+tjX=r@T{&TB5CC+0zJ+Qn(;fQ(QjiYoX2}
z<q_*XSjj3B`BPc~AUFXcLM^Uw8elpLfEFDVehEM_H+Ha)(pHWGkomEf1vJXk_tP~0
zT&`5(^1Km9?9={#S4`TjRkovw>HC6lx?(?lTJhAFiP+Z|z}%-X_*pvxmI+i`24Pl>
zdzdBS5FI`=p^tp{G~IsNw#pA?2*Q8t*s-NH_s<J#4Zsr7;2Tf?N=%;szyiW={r0)!
z>=$usltcQiq^*7H(a|pn0dYGrd-^+V=4V}F7WoiLlZ<c~UiFgBewtRmHJ&9c+*75f
zadN~mz)7ohom5*GXi7W1{>{POTz^veF2%qIug;!58!XWrp_bzS_$4p_EQJLCfq-}k
z0FUJ^06;LGnE_1&WMq=t|15&_fifhyhKwnMgvsu9oN)QQH*InnOM5GG=AP<r^JK-<
zg#l>)eT<B^irpxTF2{O+wm<R7Y{}+9%9IC8|09pTqQd&JIUIcfhIlgt(ZRRko3TS=
z5CS5=VurcX;u(nS_ghb2rhofeFQ+F=BV5w(SM!>GF#87|fVj<XG-fY;kxf_;<qoNS
za8nc#1CmBLQ?gW~qynX~x1Tf3!#4q6T4sM`VK9mo^Y-dX%SuS8hZ=R8YxX-!X8~Xd
zAX@-B{={biEW8O|p((M5q!jK`C}0Oa0GUpn2B@UkjBvh|;`*dvS04J7kxMda;iCxW
z6ih%_jhAhaYs9V=Aar9y9@mhRooV+10Zf%_7P9~XcSA*t^OT+ccz|$Z>L}ge@%{H*
zYOeRgas^Asjj$B5U*P@k5QHEQ+z3vg`-SM`J`VFoIyED*-@p2$SLmf~-QOd`c!-BT
zO3Z&XAK()_{Nd>5BICy~zlE_$wH<WH7iFkrrEG$dAZF7ZW~{EinDR`}IihyCNtnPH
z29w?MU6#Y<y|3#qb$R*D^`xCfTSXPgk{Ol&FmBlL`uy|H`|b~6fw{z%II&Q$6vqJr
zU;)4u3xoph)>}p#39ykXB1;|ugIp<zv26(k?VEeM>vWS+m`C)tofuM6R>66~g!37}
z6jlf!rONE2=<2CD(7=&dUAw-(asT@WIRt<kis8AkC5nwaB{sSbNBCOi&++s@IBsU<
z&j8IA%paD3zU^=~3qT;W&VE1p^DogWr?+!NKSY_N?CHNIPV(Cb!G6mAez`0qXosXX
zQY~iMB<LXLEhc@VHMPhN(@3VVtr#laH|T14-GrpX;rpTwz(lp%iFAdoOM02=Ozrcc
zjgHf1i?J_ddhty{K#&#yUJAe#^-f6*{%V5--~b?XESmv>1puLd*)jkk%4^O6SnXns
zKd;47K?0|x;WD^I40I$Afkow{w&(q_LKuK&YTR4vb<wQq%Q}h7q-aULb-mf7kg(F{
zwk!@nd+Brj<8iu?!o<StAB-8^XOlm$XRYgkcRhTLUiVtf(7@o}{D$@ZA#Q#F2#n|x
z2myebRWRbUe$10+{`4~a+OMCbtsoCXhhnVrW63F+_t(<C{`GXfnzkU1l9C~5$fz(d
zR?8#TqfE0{xi`vCx+sm5jL;Ktuq%-u6-3nJfAV>zyUCjBofc4>IDs{P3xzF@u(-zM
z4WB;A={$B9gLA5dPTk86D1w1L#ehhiWJoax9e&}E4zoW_0>lm;D68zWKAZ(`g&+<C
z3KjrJS%8j!PGEKcWRqDww#jMa%?wnOON<)^(QFwxpHCC3Fl1)HA@+N6w&C=SB}#Ip
zLWY>Igo-6IxqLG{UoIj_P22lF%{O3aA)EihO~8jkhj;12AG|<E4(oMoX8zY}y`N9^
z<70loWaQQ}c~tO0A1X8tuwTH<E(GAaf8#e1+-ECA|BSc-t$t<o5bf-4rg;AAX^;O}
ziue~bf5ptKTFngC8Jfsk&HGweajvE{LJ`rLw@O5nc9In`!H|_DKnN}AUEZ!JJ_)oo
zSX_Oixq(&lC;1YVJ!f#Kl=OYh839DmT1i~J;w9O-mtKp*fC8gu-qbPw?7hCo%pb56
z#{mN%Rl)n8nLauk-j+Cli35x{iM-XCQWjYgLI?!)B21*`31S#<*6fQ!%KRONR6rW%
zvdYAADlc0moItvWxZ+q?&X*-|U&@ekR0=uQm{_-f&HsJO4c37YweiLQxm?r<|MZ~`
zT<Vy=p7UV3So6P@nm?Zuh;fl)!^<<a9Z19Bi&;oOT3BrO_i6(R0q_anLx*lQ`1Rks
zNPqZ;XJ~cp2(dO01cBu27a8C<nEkOHPHvnuf2Xy6B28-;vXW7h9_hSVl$N3xMQ}<x
zRwjeKWmYH3bNrAr?V?g$Dccm3Ef?_#zIS(b2ge+Q=jzB~Nw<Xs)mP6<J>KgsYNZiv
zh42h|2Ih}A0L<u9BtUjtTWIDFSPBaO4={&W<Xr$bcext`ADoF|V$KwQND8z9v3&y(
z^HBLBrAiMLF31;2vovxNB5V;)LBa4#J)%C}mMoqAaH>HHF1|g%uM@C?UkjL_+^7T{
zBXR$IBYO9vyJSBRnl`lBYoYmr>EjP$fx4i{V@ZZJl|E=$ZP0Gsn~w4zz@WUVecB%j
z0U)e5)9wg3`+e@0UZIy?*$!rY?{^7D-@0&&?@Bzx2YdTRYe5(or7eC@po^4)89CDh
zCut3X7xXQF(`NseNGm`5jEZ7Xga=<~)Y+QqO<qBNCGE;Q+ttiB=G6CC?%3YO_U|L5
zUa|s@tLS?cTdY7t_S2<uMM{C=eqpOKmZ4cGl>i?A#IL#5mbAUXQXK{aAAnP*PI-<4
z01E&JQzwnNjGV4WrezI26=cVvhjHpy0wQhZ=Ptj)IxtQEi~@kug0NY4QcT{9P!o+w
z>P`KeUzW`ia1mhI!9O>o`9DC!v3I^>gFf)S)n4Wg279$U+%f*pMxZt07Yq}Y2K<5X
z?|0V)@*pqDKv}$P5A*G6AM%5~;($R*u$B6X0nGpBe*R^8`Sf;}?^xG|qg#mH2T%W%
zwG`P8Cqdp46pY{m2+cp8;^$jDo57ceq~%{KOP=|L%C#p&ui~vQN=x{M1rNzk%{%#`
zN9%T?V#R!^X4sL$b5J9n4F_4-BjuJ$%on_Dv)~c3_DqjSTM>MzI9Jq&+~^wDi_QIM
ziFN_NsR9;&XP<pmSpbO9wQ1r>!`_hK^b&zag8!Ahe}A^@sO|*uwaz*B-g-zXBn0wk
z=pMB#1Gd}Prl5O_+aTQT_IG-|{i7W5{TC+UJ25{_#QZ?;Cv;3q%+C`O6Y~&$fN4Ll
z0U0E~pb{8KHUe1)kP1CYD%HF0<D517lbI`Xt=zfKy;b+#s&no=OS-l9**kY;?%XTa
zJ6C4>H#5tiQ+X!b-bo2J*a->QVo*u5b_%d<uVVUaJ7mgcfuJ!F?-qd3p+7?c0JO-X
z=+nh3{qFDFOTYD-?YIX-`{TV2BdssS8?G-fV~j5tHJCIslf%%OQK#niM@Byge1|Cr
z=(x34`WziVE`Q|ozjAdaks1#4Bx<_rIj_HF_QQ6#unLHBJgf8&d8Go7C}iIIo;)Lr
zJYA?Z0O-M|@b#dYzO^8@4Rq{-g+m)gtM~n`fxY`=>QQq)#~d77(@HV4Qh9**K9iJe
zon(ZF!Wy`iAF@DXdzO>XQOHdbE<#D|+|`-~!11s}ghkH;*t`r203t5|>H?^j?pevx
zeKs-DY^aUAgjtk(Tb-#bT~LG%%K@1zB4yv1RoL~fYM`E67SG%Su!u+o$rk|TCHL7)
zI9O)*|Ne*f(g%OJy`!OO2#1mNQ1=Z06NU-$PH!57wjcy(M!&-n1k}uWRMQ4O|H4)J
z;-6m#yZT{9kHY%?Q84xErao~apA$4$!)|`-ai_md5Xjl^Dk4zm3>hVboHieylYDKy
z{=kko=0=FEp&(yx=~sGB8fFTyRf<=%>oO{=gz_OF&<;7%ewJ=_0cIDT<%9^Tlrhu6
z>RL6NF$fS!swM(jbn`!8(Om#xsC6@-ln5x?Kpp^qk-|ukH6uh1b5kkv@x8R`m3VX8
zY3qtH_#Ky{tR-bXtLK#zN_~MuOq;0zZU_Y`Q23c*EC;5Iy9HoK^R)k?n&S4o547JI
zLJt@snB+kM2op47;Jh;6yE(R+-yhZo-Ou2lv-+WajO9?8++_u%^!>)SVz6UmLS-A@
zQOEc<I#d5>#3Aw@8~VT%5O?zH1OqU?X<>dn=C0B9BA!k(j{P5P5b_MrnaQhqoKT>*
za>JaNeCF2`jck##nv{>j#y~N+wa`Iw_2hFdwAorKO8b6XYB{17asZuhn!5yEgdjjH
z0|Y$r#1l&sxUv+KS@Qq@AOJk{&_kjb=Ug5D63RQXS(TcKq-U?$Q?|h?zDNmlr9Q(2
z60S{7#rj2D8yShxgRzW}R)t(#pkB6p(+QF%vJ?cpyKKJzg2jdy(NQ)34}s?Qhvt8f
zrg+UB2tf->agnqO^a-v32myF^jXgLBz!+klL%2XJ!ZK)8^8ngDg8hE}`c|S9Dat?O
z^&jVT{WLxih((A2K#ma}Wf}zw0D|C#6cvBPTsL&6U&7MDK0V6Xe)Wv4SL<*dSgPwP
z3Q&)(67>X@x}CD94iC>73YyvX&s8kNGE2ksvn*;D5TH_jlZ8)o6@cBJ1}=bDHt?fG
zA=6D*3P}Zd03Lt*@i35(vNY#%4Uv(~^0_sa*GO61XGLo#@|wCHr6(?zKWqUUC$UP3
zVkSeG6@_UhFPE7dMdGZ2TB>vHH#N*_cMHJ$<CdgtRDy~4IBasG0KrtF<qrx#Y*g!C
z<lO>cvPU1$*Ml_MV>}^HEW0ZJ!G3@ICvSFse=v`tYvy3TvHJW^(E3Ja0l<Ib#QaG$
z*c99xm|4_-RgKN`MM|(=(ML?QJ(<TUF(NA&oSz~aH0T5Zkz6m4r&C%;>!}*e53qZ#
zyn_qTGUYSFPcDF-wk|P4pzmkCNqz8Rl+TQCA*P&`wdtAx*bNXzSg8@Oh7|y`1hDV|
zfNlhU%@)#bfO*#?4G;=DGdZLY$82>{luucZ*GUC=+BlYL?io{(UY(^SfKtj>f00!G
za*x$hHO0jBKO-oUc@I0e%eA`!VDLi`?zjiR;J7U1Qq%uI`u)L7v5MkuoBQ_)y19`q
z9Xu$&n4(TyQo3w_V81{9(>FUyMsblu!FiI*{%gtHzY+h!&VDiTU!MO`=6c)|2W}BN
z|4rhYbZ&#{a=TP35|R`m&NQcI!Y3;X)HRB6O?rA{`&J7QL-l1VE_ne&41LyR_K{Kh
z_m4W57+-$fcgqjh&{Er9tA0@`=db8q0MPO;T(}Ufb@QLoJ23SJYT;nvDYIr&(kM%8
z2Ba1M41PysY8f+0$l;87TSHa|CnUWxMKUgbJ!T6eh5vsvrn-HLT$Q+BCPA47<l0LU
z!1b{WDyZB|jc}g-f1YdLC<4sX2Wk38eHg=KTMw^G5g`!a9S#CQmc?#t4^pVOQ}Fx?
z*XTe0`32hAmXI|xGX8N$<P&%DJCXAF$LxMs_7@ZUKmeFhz1uxt6gI_C>tnjlT+uVn
z7_B%Ul~daVzER7lsO@M?dd8XQTKc85lCz5=KAV}dql1N(si0G}OwHZ}rO3yeNHtpA
ztBHFx`-0IOqx4&At$Be4`Kl7ySpb52`lXj%3Ki-qfJKL;p(?Nd=w?9H-(n$a$Aw1^
z0vlxTvMIT9EnUm0(cLf&fjhMsO4q5flTC+(L2p&yhDJXmRo{I8gBr2`%o7fd8bI?0
z!#gO`zcNlR|7G-hqtCi4<Ut7mx}0*E0s^1??v)Vmr&dSx^@k)rqqvQ4i0~iBt$e}s
zL6%Vt=6`H6-kSeG0GQdmCfHdA<0Z}vSS)LFUQel%k%}Q3D2h#AJ6F-b(=Rs9X<Ou&
za8D4BzP+NRo!K_alN<<Wl_6i=pG)s|dAFE)nX(&so&paSfbIoQE%ggq*C!Xi!cW|#
z5dh#<NiKkp2S9@>`sUV>AV<yo1>rp6Nl(ff0&gWy6WUB5fsfoo2&FX+p{lpP0Yx%F
zNvzeDw*pF*v$_0sy}L^SfsXM!7r=4Z;x%m#N(TYZaxtz4y^7#&0Nw$D(LrhM7~@6~
zmW6}O%^2+W%(Iun?~v6W+w^ku_m9&0{;<RkOZ~8|Z^$Q7_GB1`<$l}E`Ja>F$jrGn
z9c@TXe;b6yjzJfJ!BQsL<-E=eVBG_rN{V~jYx^FCxn*DT*XKvU^;$9XTKLdk745YD
z=BK4rB%NhklkfZYuMI}Gv`R=Q-7PXg8kBCOyYowrMhWTe?rsnU(jeX49ZE{>zu$d7
z-P7}RUgxpn6K}|a*%4KtBFqsKtN|b|^1K@&4SAHy*gz}d?*p=ydJ#c1tnznv2E+W?
zQH_VhYVPY~-5S*4{Bxvq2z^%LlWXQfobCF_p0~{J@~8cgT;#un%^l^Q_ox2N8}NM8
ziDy2QF+b2KE+xC-feeK6Q>`!{?(;wrH&fU!I9JG`ReV+0<(IzJ+7hp{zKM8sSUu`5
zYWb)_^KSVcn3e9AstG-!bYJX<Xz|Q)0?kv*GdC=u!N<G0&u*FVj<$wUwAo5ywutwD
zRiv=kp=19`^_C|oFqH+EEVET*DGo&sGU>>dS32_UxW6t~j3fs#3`t;0{~%_wwwN6g
zH3vwyg$larEf_|V;|Tz?gaosSABO(39lKV(P?@!@=MapU?m6zX!ZK_MXP7d*3S#wI
zd!LSSD(7JVlKL+TaLi~x1P$JV{SD@<tuRN>e)a$Y|KaucbRB5In%CE`gq}cc1+4aq
zB22nn_Pf|1!}m_Ey<_IuSjIKk=%b#TP*hrrIi|U+L!Al~WMxJ9yeO^P4<8b4$8iLg
z=dq1vll|&#?=7>QZeJ5G%X#;c?r_RX+E2o|-rK^WyE#gmst9{6d~85}rUrn?i0eSc
zb?LQE30OZeLfgq;U~Z7LY63sk=gQ@b<IJ@<F-p^3J50@3C1c&O&<sldWiK4CQ~N~6
z{=PxG9>6G85b`>C*S~Fg&9Bj)FaDRQA8{pP8n5i?%LkARL|f{=oi1TH3;*La$0}X_
z(L?H>r#apY2ip{}+b*F!;n9_&{W+UEA6~pU5LDICYVd}_p}zdBo`QGwFMZ2Ok-2`#
zzSB>+dCO|E%HvaxEbnr2aq@-Ha@KP@$GiS^p0sOCKXz_R?!Tf$#em(cGq;4-sB;0N
zaS$4u8iV*nM3YNOSv|T6wYB10HZye87zc_72v$QOlBVnDc$0(Cn9b>ycG=E`Guji%
zU`H&>P6`Xsy+OYe3=Yp{<vsLz4#L3d)n=<B**K9o&&PkhD2)y%4);%aR<lQ_Ox<r^
zTP_V08BZA&v~Uom`iLS_vhh(*rbWJH$L#i9e{bfuv(+P53Y63+>D=mh!Q6k|SjaEC
z8d$s2L{GzH_sxHT>H?pCoG-C@xF^SnjsHasbE%w_0ljY}=12_=Co3da2Qq={Ozy7@
z7&3=Yn^V!IjHrjDbUyR(UZ1GQg7|G?s=}IA-v7RK@pZv#TNrZK`)d%gY79iTK>+ZC
z_`f__DR`VC`2!j&<;H8b^vx##6_}|XG@nEA;4BV)o*VoiL<IajCfs{gzLaSY!x~FP
zOgJ+zbRcgIWYaJy{)bsqR!KMOZ@Jm58}S0K98z$F#18Xm^YoF7pJK8VKGPRn?UMCd
zA6<9v%x|U<wh`v|jYg5b)JJ%{6BNibEmxEGHp8gdxqG=65LVx~L2vM(@^upr8E%@4
zA*=t&D0A-_??6&{fxI-&igaJ+Xw}kGD(_O395EJt@ro@!Ub^cUwXtyixZ8J4ml^!A
z3O0Lw@Es4*7q2;h)e2*10Y?D@jw#DT=VWv9seXvtv;2eCTSS{;@>ag3Q#~Z3k6PwJ
zFY4D;M-i6d1lfnKu-YbBJ*#ibY(Yy{iiS<UC}qd3hn^zRsA)W8DNiZo*S#_`N<XR9
zoJnKY81-L&>LBTzxcu<FKf-<N4Mq)nqdclBXAJrclHjlr6@}}$6Dw15ZOqwjC}L<3
zu~=6wgQ_BJ(`Ei%t3!qKxbdp3qm{~7%zVg$%pwcx1tcQ)okCky>cF}!W4!tc=u*9^
zMaxd}%Ora|3L2nd6p{XL^F%FuwDA$WgTv9Hi_s@?F>fEWWj~ASX{}h9A(W)ijmtav
z#O~Rob-6+aVPpW6H=NAV$&FR~l@Mr2QRwma^Kb)&)lP5WvwCnM-uGgst;F~*dR{Ep
zyx*jqf7|tm&bt&BA=`Q|mbh^F4_Z#!^XKJsHhZ4>8uX^fwOmC4ii)HOXrj|{Z%!8L
z+Yhs_Q6f6AXmCo-&@lenP1GVssWeZ<QDz`5YiSq%f*`!)lN*d7jEnp?Eam<wY@(O7
zk@y_BT6!BBk;8Yorx;RYG9%EqxQ?k3>yr2OdM^Iv4=p1d1t}1Hf6*1RwD(WY3PJj3
zjKJ^41OlT0>5Ck^#=xia+azPWs;%Fy38D_1Gw-$)Q^K*my%VD>gu?qD2i{MU2Q=(J
z7q#+uY_IS~MyDF3j^F1uJ(WIN%ludM_9L|=<=0v;_$84Ud0h&BFt-akm@r-|i&b=1
z>lH&lSqg3e)u%W4IV#L1OAXDcL!I~Rh5jcgO5@;()*`b+%s)U*K};k-2CxGBB%C_-
z_6#3i{?z4hr>i`oXKEb1$I*`M^LebXl_G3k`!l4>Zl`7Ywcu(}zGTeW)NvS-l(aV0
zYw6ePKu<-#FOC~30YM9U+Kq_qI;}uRhd_nY1K;K^EZA;H1YXxgE~f86P$sTn8wu}?
zj_h9++J7AIj61R6$+M?Bd^B=3a76K2%^G8-w5mc9zSm3wCHN~X{LI$}7Z%G;ibuVN
zLQ;Icw^Tc)W_)*Y4#Py7d*top4A*T*QJwWP?s{41v(s-mO|YBm_dwyv!EPdkaBH3%
z6!0uT4Hm!-D4`a_CKsB<u3QYL@^pRt@bt%)yzb-5z2*3C)bH)eQS!^G3JFbt;j8=?
ziH$f9+<08x%AEGmzgJ<~RLIVb`mgn(4Tx>i`)YB4e~b9QcoGBOADQRflrxcfC6Q%;
zUs!KAuj|hI#vaEf(h&K1zbVl)t{{J}7`6I*JF`6~3qo-=5@{xFJgLEI>>gcPolW5y
z9qQusfdT5^D2+Wu3gv(p$+QmY@{$&n*H>n^Wu;2Z>KW-8Xa@-srvElJvZSeC(OOR6
zvME%KSFFG6VVfoF06+xlhQPXK3uvY!?b*$lf?5*%)eygqTyAtFzOxcKrvY<5^YJtf
z$=f^3Viu}O#5Ol?rqQ*svlI1i@9=t%vsD}_{0#esWykUvhCDI}Y)-!v$p@ni@-DMQ
zS_1Ka?E6(dVK?XEhv#$^0Ar}v(c`~o88JkI5F+-2pgnfQctVudcSv0wBhfVkW#pgk
zbhB}VXq$Cn!AcV!MoD@D?TjPax6c|DWkWboSu&uUcTTOO8da*)i<5p_CNMD)ANqB*
zr83R6B>h-pfSv)2rDIKvl+{llpTF~P@N(&6VM9l<osp(j>?A`&n21Z{bpgb%j6Bw?
z$l5wSP${+hZSP-FE~m?)hF(eV+mDy8;<+eVBq$-TmYt(0Fgv^d*u?m?ihwRhA6q;U
zf&jPl?I$S&^;J_hviN1=fe_$by{f|i-Da5I@0>^fx_>%!6=?wndVZxKz1{bpp||*b
z_tp7fo)?<B2>(fI^G45n;0#mGp`x))u;9{I{7<*L&cdJ!twm|0-I514T3}L$W`h#9
zgGRf^=ye-b?vk%jCGM8Y$UKE#H)Ryu)lI40k4=!(i-2ZciqtPDjLJ*4h71T~bF8cf
z=w0()C32|B1jP+_Dr20Wq6~-gOD(xAVt_wM8<9Xu*SCvf<!wT<7#`5#>SAk-pVJ?9
zr&h(eEkLCkB?G{#UgogvI3_0nK@93=?p6?hzap0R<Jnvw%zT-5by2=Mplq{VBL^h`
zDqzjC`yIsNSvIjR>4rJJfrVfvE-Ex^H23$5E+@*kA3J3yR(&TbY&Syr^xo}LqLB49
zefFX$w9zP-<@ls#Yj-W>e<@XEVH@5ctxPfhFEy`g&QNM_*kK_>&@`JxU0}na)%{eL
zT%8rAS7zcp(3hcc<S151W7*FlpyvGR3yf3HT<ajbH8tdiPvWJNp@~L`RBw2=bWfuy
zotA+O`M7@bIq|5;pG(xBe6S(^!{61QJL=@9ICqpt(DP0FXUv|IHuWTk-|nv$ZSHnq
ze*dQ2@!ZX8Qy<DR{PGWu`|Fxj^H%7h<WwR3oJ!_5X+pQDM(%FRb`_%+^8TSIopzrv
z%L^6c(|KO=@P1-Z@fvMXzIfU=@zZD%%1hGy@^Wpl1;shnC+J36Dhev4j%zef9)tC7
zG?m9l3>Ix+M3~Mh%%;zbZ0}_?rpU2JD|m<sbN*M{M5PmIQzNjgfzYgib7^tmq~HcJ
zPOGE>Wq;MU6exmOL?B6_g=rW~9L8%AO6%br1kS}OCxSKz*b|n}0IlqWCN%kSwQ~QQ
z)aT?Rn99ddqy;Fl`bNW`)@9v`<m5W(q^k>D>2P6%YpP5mldJdj#a!v}X<Btw)r~bn
zTt3*Mm;o7#S%>dZYYEcpp~qR*5AhhKc5@fuJuC$RMKec?_zR#ZJ6p8i{6(Db!?E~1
zy-+h}P_QB_rf$QbTeKGdoF!t9uqlOT<g-<;+-fxeU)aOAxsTDsi^w8kk3aH*bcwPl
z=E0|ugro~I8s^H<lI#x_I;PF>Yd0PkPWae+T<~o}&;kca*wBL@I${=9yYd4KKfq<u
z1%%W6_W8JREbyBT#&S1q49|0E(#tWYS7HAJT=4{Mj&jvT$7tB<8y!vy+hdRpHyB*2
z^IDF7R`716Fq;VN*4^w?LHiZSlb@{4(ddDdP)0P7mTC@h?fWQarTOS)vmxPrTSsT>
z)y^!Pw=Gn$=WF>is2<^h3hLqhIgHHF=MW}nt1ZfD;2`8>SbO-SzUX4ZBng;{iry&I
zi`)L#WF{sKH859t{-kT$Q~)y8!%vhme5;n>sLEJg$)J!coH{L1YxnIWQYl{PtR;nF
ziiX;WQ4AG57WtanC3cW4R0BEd#uOm{D#Y)|qn6S*zNo&^5;x`4w2)yO@dlIr-6i$6
zz&uiYFxWRGi`&}l;v)}Jq3Etj(>ykvg^Y!7a>q?oL#$eQYm2w074`nYTMlE#4`u&)
zc7@6k*%fM)g1p4W4Svx^W{vH51{Jukj)lar;4Xt~Xxemv|9M9gPFX)h>0^-rqX0&Q
zp!bdK7z!|P3L`|xF|Wax^Vg);Q?vP0AFbPJRuUraO<WV-7hYpq?MN!>5^Uf9b(x3h
z=kM1<({Y*qSz(2rzpV8=7eTpfc+Ag&IChQdLBbOeU9jjrzq}$7=?)PQd^{`y;(xXE
z+Cw!Y&o#W9tPgY=#KMf(&d>P25f!CGFU_rozZaUA9)u>42N&!W<1RE97KuqU2JVDl
zl|L;{wd&_No!slMNn`1`+gZ$(h`Thb>x%J+7{L@@b~dWNNk#WU$aUdyj|c4|JLVa9
zb-+FLutAL=7ZZ@SC6nPMQpvJrHcT;~>_h$zAxs<~jS@x>absvGy;7kUIJW$TTjzz*
zY0qyg?!32;`6jEIh3-f4u)1j#B-vTCi>5wyBLa;Q61Yq6H9(@d<p1)<rvIzC0I-q#
z+?V3_H2<y&=Q9XOQJc%t{u`lN`)$5MnAmEQH947H`+!kY|E@j4Y`Rf3!03+A56<Pv
z<XZVbHz2>s%Nh@4v$~=BPEXOTW5xSzZfL1ChwKrR-sIpCNLS*Mr^gQHv%pml{s(gV
zjz2xvsTOP~tbNQth9m%mL5u-j41ysT60OVNz7>Yv`)=8|P-s}C0FqgGn!~X;wp~pj
zN<ub`N)%L}`VD`}nnP)rJ>VTXQIb47I`1|!c-0wrWp)eqV|z8Q=*x}I8;5WY$=|^K
zVhjTQyzKgrE3me5a*$pX9X@{?Y;yfLP*q@*y0O4x%_q!!mD(bhmcnPTy;qj%7$I?L
z9^mR5^UVFR^6N2kLA3XHvL6c>wNRTv;`4ar2h$}*Q!?3wxTa)VFg;0*Yv65x4~_F?
z55xdMz+ZmMIv2-Uc!|Ex{%fiN48sP>0f5|a6=$%jo|V#Ps{dmoYq1U}!giP<m9nUi
z^<fJBb4AjE8@rL>TV_1Wa24bFa>iQj{l)cL?(xh7j~CIxLCfP0ufu4D+Cgk@en$RT
zIIr@EWY|N855r2}+6X_BX92my>!RC6Ep*jjh)Ae0B)R}yuWZ_3a(XEVu})?8vN2Cx
zUj9j{I<on5f?pAuJR)WFq2xNK{BP}FG*Gqq7HY!#GA;s9fgEp80S)v^AMlE1K^@L^
zQ13iW*_Ttp;Xrp*4*LO;O76ytIg4nfw=^xEU&Kx$ye`zYn%P|@oJWwXP%KoV`M*k%
zTf;k<5B66_1q!gAJrM4h(%Y3aP6Wo_t(O0OcV;0p(Q1lLdnB+$>-oo3<GoWxm5OCC
zagQJRqM4<C+zdxQ{l-1&i^g2`s2eA|)+lM?D3obs<Pr~AX?jR1p3>pQpJ65k^d?O>
z01WUE68X{SvOg9A#-t`od(Y8lY})>svw#)p+6Ett)=tUk$wCyDmz^BUMOGHo`=8&N
zgoqYdNODKFwm<Z(i--tVqT8i;#SL^zmc$zqSChxTNM~azzwlBT>9>iIE_`HMuR2qz
ztXcm#x8L2-hF`z>1ish&F?~AYB#<p8D=6<PRN^6o@oJGHQsnMWeiv81gO5%9i^w^v
zx+Zb34OI5Z1OqCZdA7pKB^#P^Wr?l!b(ZriSTQ|3fgoT^<!(`H-(@x{iy`h~LxiGc
zV71aZi&y%gwWJ{E#Pz)T$#%+co<>M`Wd}vKYLZL@4Y_9H0<_UEkaI1**eC{A(G%!d
z&(nKMk|0qb`yX;ArcZknCwyvuFfPyW*;4qQZGm=`n0v^K;d7q>)E9M3F&RC!7%xxd
z?MG?hQ$2}Z;%%*?=9uHEEi_v(OD95G2#a=i<uCN-5kjP|qsJH(33kW9`3{iek?}+q
zh1u4`s|Tx`D`ZRWEmx)Bl+WqN)@XQplzY3v-kX(2*;SLW4szf`p7(`3Go31q3ybIS
zM+G}q^uzSOepuS?5lrQ(fCQjwLvs9{udlBuwq`cKgC=zwT!4ZZ2(CMV<Qj7Et9F}1
z`4PAO+1*BIpfSjWc;1#hOb;ogDJt)IUEh2;y8fpw*ZF?JAq`*h<;bhOP@c=9FTGLr
zIjgVLyKeNx-$ae89F6!P&jxuWJ}3E{Ob*kZ{6l{S5xi?v-X20Ta)9Zk0;ZIL@QTob
z!T0xhx;{B>?UMs*zh;_Wsoz?!EuF2yRrg)3-nUNb$Stk2irL(A_M%5t@v%0<oaY-r
zaujA}A)J2(CAw8@(JK^~H@;3nu~C$R+2ZnMJfb|yy}TTIYN|(cv2do{1Ix*Cr;VBj
zuG@DA+e2Kpj?dPUA=1_P=nn=CohKu+c-S{DUplD=v55J(OT2ukmkBWxIiVs?Mjw;k
zY14bHuB<D`HMsLqYl&2Du~DMf$Hi?jY1@xv>RftruqwysKrsu@lA$<oiNKdFr@3=K
zF0TRLucI`84W&WU@Z)b5m~4V>UyyGhH>d{!Ah42j0p5GD)4_yOsi|h#M74-nKDBcr
zf-&PixW)9Z*6dAraQ+<=J?s|(Cj0XvX5eNg`n2r2l<Z`_HvaksCkvx+Om-KEq@O)1
z{$1@L^65li0v#3d{IGt`>o485kx4)vo?p-acR$;wu{2ZXt32S6{5zl;n;%Gef>Ezw
zOXS-yQp}J{re&qTa5gCswYufn*F<7js*$0%WXe9TkNE8%$0j^=Xz4av{n)wEjs_Fa
z4FX~-=TIx^S^wF?u(L5RUOd!iKe38E|J&X?E`dOkLDniZ^R?mdqVT-8^Ka_V&}Q6o
zSwEcA1*r*<UTKO`kK+IkXteBs|GI5(|(p*mU?up%nCb5YQK`LFS1yx1^~bJyhF
zNSo7dqhEebd+(~NpluuLcfEqzKu=TmrUNM}Xp;KNnka|l*e|Qu9n>KCFde$WD<r7u
zprC2tosjC;K+%1f*H2Rp{d&-nls%K%nS%XD5nA*UA6rtHWUC6h$-0=fO2gYbYJP*Q
zDUJ&T+`s4FUc7y^Ir|;}+R5O|Xol1|-(WE+oBxYvWv%iP5<r_#qG=!wi|jvH6#21Y
z8J(nT`?&t&p!6!8`JJgBA}F2th31PP)A*jJ;W3fJH$#6pMRU5+2Pv84MfNu|D!-k^
zv^}(ImS~+mbewk+KJ7UWh$4Cz`zi})!=v+@^LUH%#76l)$rdpdPW6|;;yfqbk?9}B
zl(i`uWg3p|JU;N9*3<r*du44#Y?s|Ccc%Ui22EgVc$Fw3ljZi6q?n}6|65ig>^lMc
zjLq&X0~-oX$DsY0+xgGiP<8l-VXW=}h1Mxr3TOo20|*qjw(2tZUB<$Pz7Qp)m6j_W
zj`d-nVp`ewm1CUHpLzAva?HQr6}+BRsErFj;h#Nhk2#+YgsbTM5Ji3XEodQ7rL&6?
zp0nM$Wb8@KDcyCy`J$nb;VM75=^{bKkS1kgX{i8->NCw%<tAoj%k6L7Z1POanVZaa
z@mj=st@?e>xNKI+Yd0BJced*)1i90T)2JJ1i`byOF|alYD$_E({Lsi*aJ?sExA-4$
zJ~Zprfi-7EUmn!nwyh%WWZ+6`$ixabf($={IO~xDfPL?T63)<G%ACg-&KDMqtZ|aO
znC4gSE@H&%zN`(J2kmC1IIpm0mFMnsyUd|5%>{D5qpCdz*N2csvf1f5&X@ov{d~<L
zt+Ti_M8KPqw-8P^_qTjJT7y3?^<3}d8^ELH)+$?QzsggRXq7}Q-+lUPNJ8Rbe3<K7
zVR1MrQ0J`1`a-vm_hD{C-V%~g5&x3qqx%bj#YeQ(uG|bV{iPm)#5z?ZL0i_PJG}-6
z`B;2H3hO;Q{%}<@aIkA37qEawE^~q=J8YEc|6bIRVdIJ`A8JAHMj;lBOO>M3FZtnQ
z#M!Ok^QRAbKjiatyCh|UR`Bk&)>n<|kz??-;nv!@NRPf{at)OS3@}tGX~r5b3Sz*G
zllr`2lC6K<Fn(&kvUSrfAtLH5-nga4!-dUvIZirqbf-&!Bvt7@ig%^1G^cHz^|P?h
ziQ6)_0@5b=9^dL^7s7g1JkWHxZtc9!npIi^-$pu%YGK1dGzd06fP@mA6u-e_QP?&-
zKyaKF^)O66r7MZ7Z}XqU)O4_|y9Kb9)E^xn-_T-^ZCp3_TACez$pdYlw`1{UN&1}!
z%4*9V616vD*hLCp05S#-&FuN!XNSSC^~?g^ZV_6Y&wt!rqLJhXeeA;qeG4Zh>bDa<
zDvL3SFppqYZ&U5)w`&_f8RtbM=aSd^p{#%yL>fzVv$?L~Y(9-zG1Sn*zW|08{1_8A
z4kV~8__B(Os9-}9xY*+_F;rS$o(4GB2&IARZa43}i69oWnum4cRy|#*CR__zP`>DE
z#Z;7$(*4UX6~!MoL95~a?Y%@hD~;=bQ5KrdD^}nkYG>e*JYVA#)YLX*|DhH+A`2dY
zf*VH;pC4qdYJWr@ES|67yhK-#0b6L}!cDf_9VW=i6%UxmoHh9OOCup}8e5fTe6P84
zH43pTFcY&L;Ut;A8)`KC{hB6$<`gKU<iq>;_G||NJ~q4?;s5ad#T7+K>+9mOkikEQ
zqZ1G&W`1ILge6=f8N9pda00L`m18Nr*i(PdXm->N6KSn)_lG*?gto<{)j{;4g(1x$
zEQ&j{;1_T2_87R-kBZ7xUWaa<paJ!Z)o5Lvom9p;(IRk?y03e_GX{WU;8wNIe^DR)
zbw|e*xpvcv0ZL54XSKN>VYI&8%-o@*&QFo#FFw5V?Q_0MBRl>@qQq{-jHcsJC0EbV
z#;P_|=qIE-N#)Y|J@(jcx|#O8zvIJ_I(*gRLOPhCCXT%W0*$3F1mgug_P=zfu}5=#
zy1&kXk!BUvb)$;Rb_&Zgcsf(-*k;)*tF9c5+uUjjkJ&ACrn)N#80Xdy5+;5lO>T?K
zXgHy_VRG;)rah-!i*f$Z76DBLpS>j@0K-?;T+In-PRo^>vHzj{BYmi)VHF;&C-%J(
z7;t^=n3%OmSNWqn>L2Xd{qA#X&clLk$FA++E3I+C(^XFYT4Gr~M@rM2R4P5p{P~yc
zl<kh=;B=AL2VLFquGzTQ1MhW$>c?(%qho${EH1Omj(Bph1Qi?51ne4#OKJuH%wETM
zydg+XB`L6T8tn<F>W1}-l2E97uv3&xwG!Ic&2|3Ad}H)howzjM9phF`pl!k6n^xPW
zul8x*|5-KnMgB*2{!yTJwDv##bCs;0m!qVLj)ORUuY6V>d}S<u{tF!tBL6(#AU67P
z+hYa=*tZkMeRZ!zE%SLpSs*K&_g7VbJWmTfdR@b0(N?b1x%_D!?>tX$?*U7e-kDlK
z*l>yg!I!zN-Xpf?GwW*J?jJn!`x;a;`R6=;|5DeU>Ez}XofjfPvna)Ti(U-x1$<fG
z_H7hKj`UG7C=OE^K<$}Gb;>F;s}Cc*CjBd4Tp9*O&^mVAfmK%Hj;2&#Z(n1j_L3|f
zrK##X`Go@Qg*p$2l2Dw5X_kC1i;d{Z1DP1Zv|BooYW*#;p?uJri~u*=R2C4#6Azr&
zxE<*b;O8gxdf54e)&jk0|BKfN>i_%h*LyPbv&>cpi66esl`QonU~o5aEE@sgMM$fP
zd0PgPips|;ZC>N5Ds3;ONoJ;D)1OH=XcgnlC9uM%4_3OKJ-Yg*o{OTKtJC&EjANdq
zxxT-qsK{a^v3`-ww>yvZXXiHH$``YL)PlL`YJm<k;SqQd_8_P<!`vyXU3?%EJ8zE^
z$4?h6CB+^|GP`zoLpRD8JS#<vX)q<T7pcFy?AaTswS`*KiptBQ(WG1OA=XAT;*EMS
zT~nOH>h{u(LuzN|c_tdJsZIC$W4H>)vQn~@;0BA}4X5(JBo?p|00B&Ds0HyruuFX)
z?bBAkgN*A<Gv_(~_7NaAD%OI6_3Ky?p-Z^2s6wmpm0LuKcbb_;e!ds&PEIK-V#=h^
z=l7U{RegQ-cXj!M0{DQOU-Dztl%H1Bnoy%Gn@BZtfU+g$b=gI!!qW^G&`wf>{ZGRD
z@vqfo|6i;O73O{#n&1O|$YA;~n@eSZ5;vuPD|A?&q;givOe{ai`0NqR%W&);*MKvy
zcg!In&HmB)`_XkSn5A~_R;}NGApY=78wf>(ZO5EbNs$GP`K1W-6QuPzHGpZJjk=b^
zQv4Moyc@JsM_&MrpDIy)JWmCGxEdkm$LZ+=sqJ7KkngbC?DQ2|X%IQoH7i#2qW&jr
z-nbvP5avBj=5-kGQfr}Z?Y}5&8*_GdY>SJq#-Q)B4IK>WoynSMzC-pQ!cmS^<>8`K
zh9RA<heTG4P{{vZ!juw$KsEbyZ2&PC#7A8tn@<@0-L>qyt1kz)8zPlt=sN2K`+u(l
zb_qD>)Qp!ZtJ!s0X!d`NPnk0`$TD!mAJ$hlGMrMekD6QzFswg`w}Ln|2ZpeKks)zQ
zgd^=gUuZPrsDg_=Haaz8L0I(PhO$0I%mr#m-Dl@4PxHBvG8t3FgFbnAQ^nJ7wDC^2
zB*J>|qd6?!C4HT@^o?(&KgSw#4a%!9pWME$>4`Gkt;Qf{oJdokY5vW4Nc^Fej+nZE
zD=d?Q5_I@SBkVok_J$^}9h%|rzw|RaY%(*ERlOZ#j*7W2#UDN`Syvp<-a95SVNt5s
zCu2&eU7@`0U6-L1S7<d)*O*PdWblxz^*ZIj17%MKu<d-v$1yZCG=FgFHmp$2{_t8~
zM@H0MVw0x9A2;Vc-Z26sxwoU~11}b9oy3&Zlf!`K6rw6XBXzi!ct{Evp7|-e)NHQ?
zl}D<Twseb%+>+K7kNE_T0}BgnDViEdKV$V$T_2be<5W1(Rt2Amqziv6*uvdS9i$r!
zj>1f*b5s$|y|RzpSq?0$0W5J~N)Q~_-N#T>wzO)?Np`>YR>aL`DENxFuk}(hM)l>E
z3VBo;7rW;CrgCdOo3XT9<`m6uzd}Y$;ofH+qGBoJM?!cbV57=k4`*5rVH+WWk@@z|
z>uf-c8~$M+;_2L>HwW4RE`pC!kktRQXi{flYgAAPwob}8vaJ0J3NVA6Su0(h_1`e*
zY4TLMBP}!8zfarj?Gp==;F6*tW1VCTet&2n+QoK1nUaim!3!Kv0EP`kVI@GtK&m1@
z?2lF}G~QqAv$dh*yi;9W<qki&d3b+csm=>Em;bg1-@o2Jvx=hGkt{mirU8JQA*$dz
zDotR<CBXpnUvp6SBRVu@p_-r;rzRcXqz6`A(hi#uZu>>)b|HlU(+%%q>Uy{<q9P{$
za~hu869pC}O5H~9gR84`&in6~T2WMWZW`XR|9I8@Vg6d6+*X_Igom$d<49+$*og#o
zrhtMCT^(a4`SSt!hfMxGCd%C)-H*XWn|Xa9+&pKDch9#~pRsWWFtk_BDa*+xi)okK
zXDXBpG_Ehiw2I1f!zq<{&Mj+JW2L*!h({w(1ljM-W>InXnzFA?j<5jxb?@_^GRpOk
z+R${A^OkiFwTKZF<>!1(htzF*?{|-v{`|Q=9+&t<Z+px)vbwF&krDx2v?^Qd%}SJh
zbZ*vI&$HL6e#FpR)ua#IZE;yx;ny$t?Q$$t98t)gpp|ozCK2_~*T~TjRHNyCUH&!k
zqs23<P!<vyi{z#oMKPdJjIi;d0qLNSj?tMoQ}m<N5eIujbW!n^!1ocmcj*PXkt=lC
zlBy(O+Rs!U#mHji%f4W=KnVBYMqk(gg8l8fO<dlkpzxhAPLOf7zMiW(aPGW2oS0Kf
z9Xw98hX|^9!-_IGeX$QgXMXILnLGIUo=6L=7Y1*6KhaKz{d#!+tbp0#6XmsfWSsNt
zCCO7yM|t4fo6Vz)Q&fi0#A+qNsy9O)PbNJA^$p(d^P?{9fT4!M%j)oj9>2-o2-rt0
zo{JO@b3{!)5f?yy(11mA4xu?6xS##@Cf!(1Y^VA*A$*{mo+MgTUVGVieUz_vPU(*K
zK|Q>t6|zNF%J}WUPb8jXJ?bNj5WcftiCQz+C__;}V%P;3kzVVpZ`GkxP{I>~43{RJ
zx%Q)j?!Hbdjk|zOKIYv2TeLs<Wc!wu1`REgL{Z)9h&4-2Q&l3!dG_Y-NwtMb<*t(l
zv0AdBMZEdu_EBiKM8ppPED)?wBZ(4A&#eJ-t;J7!nclAcQ`Z%H!lxDhDP4EvrnrMU
z4!$=choEuP5^ChJ6f%(J-PVxMjq#B<dQKF(%aUz(3nnB4#LX}B1Ri9wlQ~_R_SWFk
zu=1)K(b}A<(sgFPTZ~Ms)lyWUb+)mwkRDt$fC;fq^vYabfKCK-#_HhWIv`-s4g@N5
zcVna5cnza_7`k-;nfT~8AUx%wU9oq5(x+pJ2O!R*!qm9rK5L8o&;V`XGZ1BBGgmXe
z<jb5keXrZeDc^jO+Gxu>VD2k+tR|i5T%a&|Ng3-<kv&`fW_r_7Jq{ol3Cc|jUG%o1
zY1Po!bQUr;Zxs;*y<>{>LlNFYr}p7QoK+aC=(aY6*Zhf=rfER~fj|5Szz;sp2_VCC
zRg#a>#kOIMOlAp{Kl?kQD@6XqE*ZUym-4R@k&A<xX9wOpS6v@|>Vx2Nf|;2Y9d1@?
z5gDbN3WE{x)*fGIHr``*tDrE<pfjMtweYkxJz!~1pnLWQh$Vs`GCJr6KjkfOvWKV!
zi3cDDBGDqOto}?Cxm)hsIHMn4BO8M2_+8)kRLBdbeoAG34{?@Cnjme;4ln1{wh1NB
zi({r7HDzliz5$W%^7HsMwNZH+NfvYs(=y7E|FPQfqc@_RooC7vLW6q7R()>2UGx5b
z?h7dTKc4J6#r`+1O${KxK{)$9E1BkI6U~;QBJ&eNKzv5h8NJvIIG{WJAf1QEl;12>
zM*^3Au!T*BbEB*}PM~4%qQ+`nF))|^j9_$`^0oyP%nTjDJu5*mUZCXkxVzXb94>`*
z2HtHyP!nIuWRfojl{rutQpI;=5)XF$nMnC?uSb`5X1hl({*UEzN+!2?`ul-ag%Ivc
z7o1tkM{)0JVy4nWE{zq%%a5`7@-}3W2tl@PszQDxA%5RR-b{HhMZTL*Eu!C(%!Zo0
zDq5={*knDL*HWFCNN1!hYA_6|7uXO$qeulnY3MooV9r3~j?3IWmGf?3=)$3SF5&x~
zJc9z2ker|vW1ne4><T80hW8S>w_aK2%zUftfd}>;CeZd3?i7v!z_h8a_`3+03CJ-*
z+i(<6CuOck=CXshA5b`p$d~gw&%S~#&DZz`YATT5=J2mioPl5lZO}!(c%IIUd|Yka
zxSSmxLq6AANTK;&QnU_Zm074|Bk!=+2Sw)hP{*O9$qE=@YN3`=ouyhS*@z^Aqik1K
zo?Sh@lwvCI4K4f+Gd>&y%3~!&4L2jj7rpvSZmW&dZfXGU(y7gwtB^p}p(2Y}pWRUb
zhA;4Tl6Mu7i48>X^cv!!GP2sytl`#V!^LKOY_7A`P`P6~aLQVTVb?Y>Ixx`zpbu5h
zZdf<LKPr(1pZ|D1RP747n*Mc#PkSj?@T8HFyz7WGh^_*Mf#)JTg@>$IAPkIAkWZ*>
z8_!6_>t76?vePhOMA}PQJbrJ|%15**vt~S~qJD<J)BYA(^JGStTunK6m-q?@PI>=A
zUb2awEo}g)FVn^pJs6xJ>aF?3bC<+#J4#@MC_<LJ<!Q3isdBEOo>!Zo9d(D$4Dl&w
zC@K(Qc(l9_^>?1AUcS~V>ieuXYn*((#Obj?u&2oO>*8G)(4K{$`F&%EHi!=suDu(4
zgQ;Ptc~sPL-uZyXD_WiQ{P%2RLrwHuO0yQY|G_;~3_u!8o@dZ9SHk52&GV{&@<Rbj
z`J(DY#m~dKXa>n-c|~sZ8eJv}DC;6ITUIjO10;3{0d1)#XQk0v>U||=3SJydbKs+a
zdslX#R>#_8GGy-wl*CNJ>GO1Caj2j<m3%~W<@{N1@2ju&{sT2QJ`PLuz}A`-(5!Ql
zkT#Gh3wtS0X_#kL+SUXe<QvCzBzU>U`qJ<{O#+oGvDoN(nZYk2#|er4%D}V-jpI+G
zLc+3e5vsS-T|M?7*aL1bas%|%&zsGjy5mJNzO=e;FlZ@`4Kc^e`9qoCkEgLR$E#~7
zMQB>>XVfa*rk~sC2=r~_#ooPxl_Nur<LO<=0oz`){l{!g(4YTF8|9}kd=LSuU_+Ct
zSWk=;S%~WFQAA|U?Xq@C6+Uf=c<2Fm;n3!vkTm@LIlX>SXIJEi)0tH1@T!>(@;1-=
z+zlvUe0I4uN;%<Lvlz?3oFm~h<Qx(a>Ky*(=8F)wdBQ3)d!20NkJWDH;v)irvHc4P
zV|2aA&xdwx4a2PHvW8-kp;Zg%tJi{3Ec=tSl-6<O<<*h!?!7NaloN}DXc5ayL#M8;
z{Z49`)p;7<KydmH&nWFN^#yQX56h&1vtT3lC%+%{zo{-@=d9s!CVhHKSwb6vuNTr$
z{J9;wel%T;WLKMxR|VQb5VcgMT~J*zza+Z2AL-7q8%n@e2~5?vui$<*=5r9*aC!j%
z6X8Z>VyrGdNl<Ape(~jebH=0m{5F_^(+_u0a+wx;pVm_NKm5nn#U=ibj@!?$b^D&~
z%ARV!|1v-P8zEu^(Z1ufqqc4IRz{ebeaAX4@Z>Eqe>C&{;L&<LXg^QTmGc1nx2JzU
z|AVWffO6lSh8jyVn{o;VSkvn!4mjk0VfYq<^}}s3gEm@8F9hke3ItA>ftOYpi-of1
zzY-l-AaA9Z160b|Mr#>97GbaGVt#HNvdqcsOow`D@4tn5((biAuA*=TdzOQu)o60S
zj%1xSoTI-+!*WK+M~T(}MamDJ7!dN?WkQTUI9VTGmtgzTJkxkh{Hx{OtWnG$XNg=?
zS$Sc^!8zXh!@+rCF6l=$<XO>5h6gg1#|N|B6Fqd0%;KN8PSl}V)?Rz-Mgz?2-n{Zk
zs`_N1O(n#Hxzx)VPx>`6XvOcI2NV)rrEqMt3~_tgy9$Vy@!|~JUG66b9_nd-{|!T?
zZ=7~?(TuPUVMZXB^tW$Qgb{Cw(l&qS38<1nOd8(DK7XHj7jMA-hAGHUDxt0~SV?BF
z&H{pLN{o>9dE?QSo)#(n^;#_mFbKq&d@j6Myi!|dm>+1s_zTd;b7D4+HnxO$ES!f5
zjbI!#qpP89zgG+mb<*66*^{1NoK-^8iZuTrDP~I(kA?Z4^tcLH=e+ptpLJ|YynyY|
zN06jA(v*hY+M$Y+3s!PCd_Buwbfho_P(=}?ro}Q!+<K~oQ!=~f#w*3CmoV=O+xVRy
ze#wo9ef}to&vF`mg7vNofHJ3*UNvwR#4!x)zLsCpDkrG(A4jdZDBSJR;rcyZVGP{h
zE?{t`tBqxf%)gv`g>&%gfyuX;xSph*d)$lcI15XeKbGTqqB}v3`oCZ(d@<ofSeNB`
zsYK_uCBvQA*i*Thw%2A*-PJJKG8q#c%{_O4orKBzXnO$YN0WLPd8!OKJ?xh-EYL2-
z!3G8M=-Y6)kWkT7<Ww*&Q8N3J=ZsRDG-kiMHABO&m0arD8Z*L}yj|hJN(H}-SFV*8
z0>2qIwRVso9k)I%n5>oyL30|8PqAR*OH%yBqerH2{GAilDSl|2?kf9TdYo~~-!dfO
z=TWmrl8^nMyHh}cFzx$t!ze|Vc2P79{Op14!@NKZaXSdS0fxf*=zhH4A_DEwz`q9w
z3&19qQct3NKhm?_Rfwu43AXDh(=*2w@{_@K?&DaOj^nlI)k?KP2^w>1fB&rfxp8$C
z0nq5TY1kaEIk}>|+j#1Rofi-of?VYIyg1#iAbyJ1W?PicGxCnb;;CZy&iblPO-KZM
z?y3L&E}9_5e;dz)nNO8Y3%nQ-z?;+UBJppvjNc1~)dQrs7SHF48~8aIR5L~UEJ?(>
zTg<-sl)#=%j-VM5`rvoI`o9&Luz$KHeVcLjLt)cn@Op7ul@NkosS@PV9V8qebb@r)
z!jV8Sxfz4Zi+t7wXbmqvzX1ex3R5D;vAmzfVXhLo;n2w+3!zld;hqvIjpx2}3k+g<
znFKCMNRx2qIPN|Bn4zwyp89j)D&^s}{v{1I_(tcF-GgF}%QMC0$F?<q&5R1q_S3(S
zUBEVE<zyVlyfZm7M0QDjfbQdY`%pa<^4(9Mi%=%r;9#VJX;Dpk=n6Vrk1-7;)hf}b
zK4sj;QGFYojUd!{nV+NwJ?`g+6N7<>cA8b!Qx*dakc9Ksci7-E!M3L_gD2B(eAAY<
z4y?b0#(CYG==2ZsnbsIEfQ12BPhbVg_`*yMqrlviso(8k6YH{w>cV_yiu+AfLzc);
ztep;C04#-76KrpFeUx3tXF;!=SDhRhHTU&xz3e<0<d^$0DOvAv4*!UbCY&73if`wk
zGq9Cv!7(Y&=^2X$hF|4;>|S-uK)%?Q=CTi30e_WyCmzdx*i8iCzfm*9!NLKJS=~Yl
zy_GMk4l-noF4m%kQ)}f5$#wqb=+nuJfEI?yv69mel^-?rPZCU*Dyp=WEoKis_WliZ
zW<@NmVmCSndvpgS3@!hLS;`q&xuc-c0H4v3wrK)996yd1G_hPF8s^?EUt(%ktA196
z=akqHqR-3Zw@hnJ?kzTc++onybFlw}CGTR8bDN<qK!&n+cQ3vnKA2?WdtwA;rNzJq
zZZ}m?d%)|SUiS&h^4xpli?|!PdfKm=@B^gO^PkSuGrIG@`ZSk;rsu&o4x6_hW?Ms*
zZ66c^j1LNl(X0}gKk<k@R54Y4nkd2pKlo6`)WfDp;gxk;6}Y8eYHzJM*+FOX^F$I2
z+7`9OY*@_=t2h`-U8}wJDn^qK7$;l~sT|b-FbG^hmnwj_ZN}!gNc93^j^|hq?Nbry
zk3mWvVR;JBtl5M&c9}l9I<0|S2+dI=!wkO|8xa&BcSeB@>ga&pz(=wBi$`uqeVe;}
z|EBQWjBs#OL^#X9WIzsGcOK_cci9eW&IU*4$Yj__BH;*l_%UpYW3LbG6&j7fAL_^F
zA!Y4VED>~xhwMVZx8)POzc*(mv*y7P>k=%#+lVKUCoL<r$TY6;)~CG`0HZzhSxLo9
z4OW{q8Aj;4;xtUo=H~C91+wthP@Jk&+|Rc!@ge2g6EbdQ3-vd&An}qYoTN+_ZLAi%
zv+EGT%H{sqrO&}_2uj$3gNw&bhhB}|UYBP;-~g^V#zdrFgfP2S)yc&E^UnV3`Yt&q
zgq!|rNbape=Oep8YuoLAXsZIyTYl}(LRAki4Kco*J8DM?*-J<537zJ{69**;R$v7*
z(P{VDP=N&vXL5v;_wb8BGS`0I)87*92-~~+McPs3exLQP9^Lxq*i1qsj(#{jaOB7C
zji#Id(&|G1#rp5@??8Pvq#Y!|2}3YcgJm})l?Htm*9J}?=`sFu33di9T_Qc?23R~=
zp9(jMkEiNSnhCnBlokr9BY>1wlDZ!-5Kj1L6`*a1c{o@5^4*gSFpXHr&cK%dVDYBk
z&9v#h<YyjIKJ9)rIIj3`$u)aMis6rNQaBgEPw2*H^9}xZs%(uQrMjzTyejYNF*1X?
zqW8LXbH~5xwKz0Um|n;Aa;@%6()#rKjqkony-#KD>`bjpeYyK3-`Q=LZ5KV3E!?Oj
zSB;tibYu;J-TCi=1py#VJg!H0qygu1jB_o6PN}KDI4eo6gE_Nt(A9AJRR3+^0IRxd
z5t9w1X4i+b5{t{G;<(!}o0RYue8)~Am6Fp*(;mP#1USQjFj>FzZM2S!BSJVjsYVen
z%T~&ddq2AV>fTlgv^~7M8m<4O33Omj5u@PQti#MgJ#na9F#J7upcixq=S-?+4tfaW
zEXI_UX6xf}{aLPivzP8%wRjoN<4flkk^SvWAJs`e)mM*-()FLVwQm?7_|4`N6^1#N
zA=_VwrsueOfffxQrtZo-u9N88KGl`p=AIHKZ?fb&jmk8Cu7~E%ajg#R)>41Y%^OUq
zSE|m3vQhm@d!v0M2aQwM%DxuE#}3YaqAHs?$XUU_ZSE`{7Z)dh>!m9WdM%)h0w?Be
zMy$ddd@f&m3Cr9Mj0R*Rb@ZT%q-Nwe4LrUzdG%?6MT}z7h2y`A(sZ;<gFnjJb?w}D
z-k-%i!@KEZoe5Ek{4X(AKm5nif*M21Il-RFM_v-;Zq2nQ(Z$U_)2UnP=74oth7h|)
z31+RYULLV?8X2b1=*gLiK38aC+54a}ZGdvyzzCj#Uu)pNpMj!h0@QyCCO;sTST|sJ
zj>J(QQ!O1c?sW?*cxIQ{X_YAvUx&15kMHIVg=g374P$V^V6C6Kc~Qz2H_-yrGdc(B
zPv5nMX!Wb)?8xhbU3d`Ix(;U?B69Ar03~k9{O5H?e=~?>P(NBp;f}H{s&UQwG41fz
zkcoFvE;Wu(1IOaWVL%PPkQ^Nbc$E(Wh2!y8$`{{CPCvf*ErVzq^R89sKnWy81mjr`
z)k6*g>+!Z&cx43(Gtk|=DSwLSG1=`u7a<N_rHnum0TOFw{l50BYNIEF<$GF%VmpZY
zqEVI1ePEcXo`q*j&(7kmrrf7}!C=_|HwgBB1jro28(eIt%)E0U)9K9?p#=(d7a0(p
z3B;iPP1O2=*<6DKcla><3I5`_apZ3FUfpP@tMHFnBkrR0kaT1}ddYi>l)HD3Vkunz
zjHGL962ckT_#&lKJ8CsSrK;YmK2HCSkC53CUUJ89cq!oc>K1*01<EqBBTvIm`7YEZ
zELZMpD|aEs<9>D+9W1_@B$YkZ(=H)J4Amgx$A3iUg;Ltni;TXfrN>te)Gxfwy+2hc
zKD7h9{u4F1ukqcsvp?!EIE`Bsee$=?e7+yi-VI9V$Zx>^{@;2lk-81(1o-n9vl-ip
z+(^zh+$c6mJ=sPoj;Nn^Yo<3ZD#y)3c6<Jrh<AlXDBc@3>vR1LsC%XVtucQp?w&vu
zw5@Lf#I&$qJLRH)$aGoHeGue72^$b+Z#`ZxJi09*O6WrK?E@o4_PEZ_ydFoWy&nC;
zA163v*{NI5U!5;ks7RR`MGT>(Kmtb_77=4^d8;YBD7}+%wM##;3=dV&RXoF{if=AY
z*0#08PLkJ39bE-d%dr49m(<Sfj?8g9$~`N+2pn>mNZJ$-DkJJtw`+yO+-YE6EHbf~
zCx#diP^$T=Xy5nh>521_<?;uT@aFiM(a@Hmz21ecN|z!tPhefYSjHby+^)gZ@vRf;
zy2><1GXTnUOkYs~<rL8^@Br(NQ3B>euZNv<-j%{r5{S~nL&EEh6Kw9~Q>zv`5SstN
zcMseT{umele7NQhuU_t!tt)qlPG^#q+fSTQBc`(-i|rN7R|PWMU0miif1e=3cSZis
z<A}S87ic@>U!RUx=h}54H5{-ThkPaxZ0NH!XV<%{%N@0(jV(`Ri}^VUtpGj^GdqLd
zH+<a1+!+L8bC`L8nRA_XrT0(@!b?WdBj*X56^CjXuMW)%Ee^)$(i?8m&bb2da&6ep
z0Q<Qhs4#&kvWxF6APutn>uw;<@%+cS_^wMix%$8(yxY(4d7EqW`Q^96Y>a)_>8aAv
zQpe2Z{qLN_N{0+g|7~SxOKq$4SMM}UPIUDLY9c=mJqkyGO#!hwaU`=o_i)S{2Wy7+
zK=nqA;^t7{&LDF4K7mO=XR+r~FID7Q8}(xOafuLJsVFnvxAr_r^NRqs<By@{IivL7
zi`oYNbH8{3HLp31ft;se23W&E4GtJMmM!yyi~yV92?p|*rIEGilY$m<%wg$R%`pY(
z3EP6`8nIJhQL|pO`Cb}U-U4z4n<d)QG|6SE@JwQBs}jZ_5-`1+F-Q{NhNh<_lUuIE
zb(1uJB0-TTfv`Qo30d!o+At_=ouI<~1y<J68N>ZiJeg{NztkTUjH`515KWQ6>kzLd
z5BsUHE3ZKu^!|n>zoLhBfq#e~ILyB`^|OVx&v8!+8F5P>*pTXy75DN2%nC-1Fy3bo
zhk?aLxA06`s&TAXvl6#7Q5%esz%-I!(F*7S=D(EHraQ}Qf$wQ4DJUjx@8$>P2tWR<
ze|qt@zL!OZ*42X@SwS#XOEK}VcH2qN+=N5%DY*xnLzvjs3myTS8c1Q_e*NqiPM*0)
z3*)oJfraGyy|Of}`qHqDQl=9;S+yT2()DBmGfMMOG2L_f*&*ha83|ri0g&ITNV*3J
zuy>M3vT*|q0j=JUF`BrMz`jA>LE)@{7k|(<fujA9<m&18uilwsq7Rz%TbcZ1&owVr
zZhpg07$6Nj2r-@IoEI-|Cx>2~pGKb_E(yvpFvWnrEt>>sBimWRzdZky$0#x4SQ0ag
zH8DY&@DB%+Cx&q8F~5VMf2Jddy+-&X$9lPKwzm$LgoMk56IP!EBqQu~m$JK&zRI9;
z$o+9_xecG48(1dTer*?fLnR!YvmCIDHQIy#uc5ZJC8+?3Aqbk2lbp~>I}|&=ah>(0
zIS{mXyj30X^Q=(cMjY1a+E%}-kBc|dfVt-<R^gWG1v|d2dD1mZX+EPinOL55f;8hA
zJ`<GDyej|fxHp>m2lr3d!+aIC@89wD9O|b7UFq$O{qsNWl{}XY8PL1isUfxV<A#Y_
z_pWuxmt}{?jI4Z{nx1aNCg?P%aNj>Cx6-bEx8#v4R$5zuQ-2C$4Fdc>BH0TV5+*CW
zmP<Ne=DuyIsg{zoQ_e|vx<xF+tT^4*yuCTNt5;VU88n3HM8&U`+*_P}Kk<WQ^Ry~7
zEn5Ib)E`r)uxVx(L~|U8nKlFpSXdjO%!W#xFq()zTq~R|cR5(3awTnR_fp)BrTqAX
zdl%Pd_=HpSi{`Cb>F_B&>gt?Z?xOpCoA=gomDpI;5c2Pk&q|hN5W!afZjS(beYRZj
z>wS7eW>*)_%%?e#>?diQKYAh*&vSF<hr^ji(Z$o(Q4MWo>YvxJdJ+v^9*DXAB!&v6
zS5D4nn=k45E&(Yhmvc|YuXK_!3H>*7xqKh<<4wiCe+vvGfZ{GCS=F?R;8_dH_JL!%
zr_=^=UUsz>E$`gHTu^YJm5vM(3<u$$1ARbT$oFbeMlz}&QZq4iu4)QKrKthK#Ls+K
z?><~`p<}}s<Utio-}iWi9y2R?t;gzh))J}%afj3iUGO3mRC6@%X3WKI;LZOBC_&f0
zTLJ>jq^gm331GMEf4C3R^fLI(mK8#<-xpu+%>OZ>I%@vow9Idkg8fc(rsoqOm~X6c
z05J8lrKXCPl~7C7JP7wkbp6JJzV_7Wy8?F8?r|R$v1Ni^{grk4(1%9Z5<n>iDx!?Z
z5nyO3k+EoI=+o3$@cl=tQdZ{!e$6V{DV}y1%K$g3cev^VhpLX6<|asc0a!P5$WZ&z
zOD_dpm0SR_N(8h|ur2@v-3$m&5&^M&D4no3!^e~KulcPKWlQ_EQfmrGDcJ<Y*Jlt0
z8LO~7s)bm2LXh1Sh|2R^+RIgPEzXwyECc9@fO&fa%)7sW{lPQmF49+?e2X@>X2f<$
z7LQ`2|0G8G<8KrMfw;!MXTelqxJh9>F303pF;BpSw<mP&d&_~VhXufG;Q2i5KLl4U
zVP1?76nJKuGX(~Bz-&q-7C$11UG)@yH!3qq8&;WsmWE>0TD`J%-_lY8mY$NQ|5PP^
z<0^Q(n7dNHD}G@FHP#q_j(*hydEr&GrLzDa6CjSd9dMSglJ7~1`awlvv>%@{amv55
zRB65+*B?H2D5FFvS0Nv#<$%ErAXJh^#QK%^`utc@w5r~u@X!0(JTQwE57Y`Uq~&9Y
z8&(V~`qsG121x7s%(IuGS)Mp+^w`WNGJVbF7t{NKVGE-AL*DhQ@)yigL0J6!vA7(|
zQ<`g|5J>pLAB{RI#B$*3K?87lUgSUSFQfKv;nSZw(Y@bJ+%o_>2g=OMbum^UfM>wH
zYZ8TpVE*rLGxBiBikeaJ$lLbKv5rNsidJQj6{k!|)fb_X3@(<}xU;KQuVx+u3jo~I
zSFT*iL4d0~0Q-d60$}$7z`*MOCppfTd&EeLOn%)56#>s`@aDcEgv}6DGuw=muAX=;
zM6GuJ#ff0iy!fuHMNCY6G9W}cOmdpp!V8)KRQnmy@?il$02_j&R<M7-(!MAD=(U*E
zS5(u9Lclm=^Gj>|Lu%iXA<+-40l^)h%zwy&$t81!VsI2LR6I6wh12eVohJ9}-yhGQ
zKrnwWjiIDIi<b58b^t+vpZ*kZXeYQov3>ANmqc4aXDmf*6fJRjw!|C*$(@>&y2Pw~
zx?Uo3Pf}K<lrLDJ!9T%{19)MA^L<@T-ojk)Ak71S-{;Pq3oF-Fc>oRw(p~@|!t&Ku
zUzIur0LMa%^cPjqr}@m4={$Vu%;>oi1u7O?FCaB_WC1bV8+Dr1bZ0yLYDuYVEC>xy
zD+RODnl+bp5jmEJph5%EK&TdgNaEdwatEwnZ-9(`pZnw2=<Ro6E&x`>60!%zRK8&T
zA+P^<BQWPkXd~oEe=EAOv3(`&-&@0|2pVLJYKf2rcR<(1Q{UiD$U8nl0PuZYPJhe=
zlsO7pBtLNfm_G6^$H8<T@&hDyfL{?{m8JMk!;YHG|5m9}tn2yv%?YdHL8R~n>2Ew#
zL{;0W^#0}MJ#sMs5CRaNrnzcXc>oRwtP}<FN;k;ig02D}L!r2LQddUC{BAYxvT2Ya
zqq0@EX+`~;3rcXVKQ+!0;{awi9g%eGI;0LRf{PVjGLdDop%HT%F_%EnRKPoc(9P5G
zVFq0V1l~TJ|M^Y&+@G8eLO{W5PFeldQcnMp+820=O;+Ms>v3k>b{<59UPvq<WohFc
z7eZ_Sb;9a!?MBQLcpO#+sA(TdGKXL{>%g)>2nzfl$NT_l1t=6X7GkF6ZhT5U@vE}{
zU|DJp*l+o+%1|Af)o1J6RF|CYU-1TgHgEmLSxm{AAb61t_EUHDGQd)}0Md5AY5@q>
z5d)AQeTc*yjcJJOH8RXw#4vxNjDCi>Gi7(vAj0%~@5y{0%~KrAsH3nHD(#id>?~m-
zYFe^TTOi7o##V|QPHDOL05ktQEni&)D_6j+!*kDH3X%RdZ%(7>JuJr?1w4$5ej{l9
zBlZ1H*3u4Z2`fd&+PH{RK(^QX23wh^s#QvgLDwh90=bF(xpZmL_4Vircwp|p<ss&-
z)&b0kqXhrzV<+hS4^G3%0GZ(!V*--7H4BqH!+X?;g5VHiRT|?awE||iu~EjTqTOyc
z!d9o*^M>4*dD_GZ2m;WN2LQVPY9>G^so+(sL_h}s$;Y2v002z{R9OHdn{smSkq_GF
zGjV(L26~H(DwHzyT?0~`TPs0cE6{0GJJU@^AnkMP`cQs}>4T7$U`rd+>(jCTw0sc9
zp$P&j0Wt)xTmgHBr@#4jXI_6N39eBHrI3_oE&2O9&3_yN{i4}_oFe^4lryA-nk=ga
zNbk3?_V@5lT4kva>CkTX_rsTtc2MA6O#iSBv@@D6KYaRAC+XCQogf5YX<&#8Ncf;R
zq^v?jRB&l$XEO+>4!782qYW)k$aaP#A!LP#tn$@j@A03$!pDR74fiv?9kOjA20)cl
z-{r-o&I>I7z^`I00B6sh6<r0OP@-mQb7LW8Ehd>wI<d6V`fFq?<|XP05^RnZ$@CAV
z){qpaA%u|Wn>aQ<#MGtvKFdf#q2=p_HZ8!!!Q_G~V6`e>J|Nid^MCg9PNTjO-9%D$
zY10gU!QVeVVUBB=`Actt^bEG?sux6CLbC_%-%kh}w-Jt+0^fM^=m`OPG5zbZz(w5f
z=O6&-g8$&(o$TIkD=I;q1pwC2nOhy;#Y@E%P5$E<v@|_VHJ(zcLzN_T?b>opd}XRE
zT|{177zT|2ST<N1rxPpy=gysDy9z)dMwR<0QiV)>*b4)-sYf-MymExvjMXt`qn+16
zf+d#04ItaDj%Zo%5^eqLe1V}Qn=ThXGn%&>APD1b@dC#kLa<-w?}y(%2j;=mG>Qy1
znEOZ3=RZP}f7-@3m{zSCFJ2pJX=gRG1O$KI3*nTS(|DE;6xg{Rp8Ce<$OQ!gXXlyz
z74MHi;NLvJ%7BlbT@S&4SRNQRN=pKLrEDp_Kv>}9DypI9b)_LN8o&7PaK*=-A|-u%
zmpSd^95k^FX93VeK(yen=mJ1ar3?#zrUJH*_X4OinEIDRf@B?Xzl)^U*RHLW?-0op
z>{}Re2ZPCVXXmCX6D<wi-)aeOU5`!>M@$zVzvcqQ8*BgP1^lURbjVV0oZ;=ax9Ia<
z{5f5@yq)b?fw3m-dNR>%eB)BTafqlMky{>{poQ0inpMM6)zYlgnx<m06EM<E^mS)C
zPC<c3LkQ4iu0zZnxDK4h{i722HxC~Dr8WA<ua9G<!0=ac1hj-qBv6Cks?q!EcegaU
z1)g#DU8FVrwwB*k9puW|yS-9kR{=m-73tg677-R5`?3q5jsfVGB&SbX4JMk-{b!C^
zc+s^Za?H|Q)r!Q_C_Tno0{AStrQcE#{vhE>ToXIX^_93)p*jPhU8-nN&ABba?7h9j
zg1~OB0DvY4TJ0530KtA(<9`*A{<y%&<j-*@KF#U}%|BWJCh2dSHE@q#WyzEVr&9CJ
z6Hg&(;GxN2srN*WCVn8q>EPFx-n}%YbKhNl0RT7(-#>762!QMJG6Eh&_|1=O&<7rt
zurx4Q0j7@r=JO;NKxmC-+xVB`qogWd;bg`&D_qocT$FY*`B{+a^2qBgWt4eI_$O@!
z#G0KJdjUXM{p@EyyXgA>1z4IOz*Z_?48Cp#l!NZb2h&lyW>FxXd17Okxn5DXK{1mi
z#1o<+Sy1$*@e=?K5YPh2wx7U)lSwQJ0y%cUSR-n!UsB(epIk=R%@qJGNi7Adcz~GJ
z_o;8Z9e=Qz)JM^$53~Q;M#}CNGyDa{kG<55psD+0Se)1SQxMojwdkW|rlsbo*1Y=X
zT-s(U;Ko%7_=9!gCqKc;faU$Zz)j#NaCKhPKk$5+GydH!{F_glq*KTg*wH~)0m2f&
zS;EDH9@{(D!yXj109waj>2kar_+94(Fqu{@Ezb9eskI6r;PcGT`-+~MjDWd8yB#n9
z%Iar6^O>a}&r1;m2*99rlP7!zL|kGSpazLqs2Owq$Rb`aa7<b+`3pWjK<*Uf1U<0<
z^5|0z1x-Ncuto5V$n1^XlwE77ezHl4%F?Q;vqYA_E-|Pp0DjXU1`mx1t{&Zf^Ed*6
z{l5CQZ_#%;0ialeM#khHwSDaH7thzzy8q<!C)K#EZOmH7)N<ul#gw^2NhntQH<qp}
zk#eG$UM%pq%2}(@WwRQ$@ccjU(gp<rH=)(*?w2*#sQ+cq4i6eMC~$&IfynX~=6am(
znyUiW$9e6)57n-X@j{02TZ9#B87MbARy*SB=xr{dMhDXyi`SY62)h9yIUU^7FTM1V
zAQdo2=H)xRfUp#afOJh|=MNSPvaY$*B2;8LDkdEzSe_|oB9%UFZS5&SB0K3Zr#UrN
zfPy6HL@E&?Nwz?Q(Rdt|1cu$RlvuJQl*QLoUaTx_jMYh#=N|=9q6~gsz#rlP5Exhs
zea8V1?DvJv-~WRbujJq;tMzND-I(C#L|XcX=<-n^iej}X=?kg`C!=6cY4d6B6rq7h
z6Q`62Z6O;(z@<_u4T@LB(pg*?uuflpYWXq+0w*<SbqF*t@Tf-p?*$MP_}C}cgB4(^
z8wb%W2uIVMn=x-iVoQ-k8)b`39u&iwtv;657fPwY@^rn<87Fhqq&BZyhrQW2g%t-v
zA1nZ{R;zm&d(@azzzdDY6IyTp)9d^xK?pz)AOIEstgpr3BNDSi9=UnrovfWSr9z^z
z#LqBMHrh5BQ<PrH?gJ5rX<i!ihSSK*l@4+?aY={lTQ@J#WWB8WWpls0v^i&LGW{AZ
zUi3b64MbYD-St2u@i&wh1!IN#moHzw<>V<?ue*%!&O2N5w_ksou3X(Amys+903oRl
zc1-KWCYV+y+aVr+w>d5I(>*N!1(`r_@q1@d#a|ftc~7|Lt`&i`lrh&@&vKbT=qvoh
zU;K+HefpC`cY0&I+rbf-^%0{2ZEz@Wg_{0RW|`*XVbBpw+h2TSlm78VNn23IC8;|w
z|98Kqv*ZdCO3PZM;2sw`Z7sCU^{VB_>hwk!FiM@&q5tfE_Mp0f?+NDLDWxtxt>ahH
zvmP%XEW7}q02ct_03Lbd5xH^W#;g+n#u$7x8J{@$UQbt~@d=ecB}A|SL3Yh&1gi*H
zOK6Z*{l08XK({vErODboaVger#C511vVDoS?!Vt_#Trz%Y|D(!oH>Ktw1RX!v@I&!
zKPZEDyVXFd`|rP>mI+?$H2$Z*b%{2&PSE-q%;Zsf32A$i<{uOLtam88mQDVF@y{ya
z2vEyccOCv#05$@0#FMBYF!<9rcT`7HLTvbaB}lt^9YJ^9%2o=Pf`9Ph7Jc$}@4S)n
zR&WFiAO8OLzyJMnm;CKkW>Mj==*YkMo$f314f^qqDOs#0!3A_;0~ubqEr1d=l)5%P
zWhz|>5rYaMH-yw$pfHLW1fp(C9Lq^7)S}v7thT(!I|42PHip5|Wq`;7fNd8Cijk(6
zG`6C2H0DRu(}jw8$l`>x+|>2ZJmEr{Vq!yR^%{O773*|ovqDxXzZ-&pTfvYlR92up
zN}vIM^Wp(e2EreR6S4&!Re0vvi=EH^0&U*JI4JXvnf*8<_~}gkYbR1xKlq-bK;W!S
zIjA&Mn$4kgq&_(uEP;CDh^-z)%8g`I%~OlBWKBK7Y>73Sb%I3@*EU9U{^#R`4hjVB
z0SA%nXNa+DU?A}II4t?Q75LX5J3;rqXBJ!mTU(by@Lv!pTM7`_Yei}lgQM&<z=+}O
zaZU!5I;yR(whVjS5de3k3P+j=5Eg)rN5vj?kX8aLG>KjYjC7x+&IAYu<|*;Gp-+|%
zXSikkv29*`Qonn?Q2wYoa(Ub`X<bScn=%h{MUaIpZV@a!jmp(;+OPsmaY;~?Xpfo{
zfOr6C9B}ar<p+QtRef+rZDjz0{l4<#TlAf0F2-*RbH1AVN8`l&V@5wP{ut>GYYBYI
zYWhr&316w78`m<_c<$;bLbfmCtpp_tr<ouPfxvy+d@U6U@47}W{NtEj{_)}=IB+hx
zAO{&c-Vc_l<L>t#0t5vjUGQYQMcZ4jMsdm%s9Q@9Jy~h$dezcurTJQ>fzJ*qfuI(S
zGrcyQN>-7`Vmd}&=crkRl>iIx0#J=WT>#h(P?rG$V1@~afB?!UEs-fy#ZW$~7;Qzs
zYV2r9q^1tBZJTn|#UMrP3I1Aabs0>r-0wnv5EA2OUD05>e#E$c0{Hi#Vd=7}7QjV-
zZx13nPYYWnK(OCme(4Rma6y-hrr=ZE&?oKahe-br<v&>`-85yCf&&9;O5duf&_o$a
z02@MR6Iz7STo<R>Q-9vs@>(p-*&saNb!Ny9FzYl$=}hy39N~@vm_Kk1xVJVZNC6rm
z?jQARfBOLh1^&(_M)ajGOJM$^(Ue$5q#|U1%+Sq)&f;3EPc70d^DYdK&q1<km9ePZ
zhd=NnSO9dTng*^fIxGdn)U|*b1PIV&fY!vqj>t%&JJr{gp>IC$#e0ayeWEBW4Zd2N
znpX};2-<=Gkax$@c1ZfdhIU&T9aoWeG3erf1GFnOvkn0S1^j^^FS{iG%>VO$cAhT0
z-MNU2?J?};7kvFGr$3hXt#xMq!2IKeKBJV=zpd2oA|whKoNjAZgU@L>`>Z9#l|bIC
zAkB+7XG(uG$Z<Bq_QA3c+XpN2hgLr?2o+XI1S%iL_5QaCXYSkS+yVCnR{#P)!xF)q
z8zmKTR!}N9E7G<|NqMZv@OuQiJ#%_(CusxU*Q`d$%K))bO?LxaB?3ACNUH$YCIZ4>
zTgZC>DC5r@xmsG613}D3cSNtco|(UpvFQ^zDZ&yiRsD&wqrN7r9QOu@gl)@cMya5h
zS+mw84&$X~=Q8acLD0zRc~QvF#FkwMIDh^IeeRE6qbpapDW>Zk<;{H5%s<v6eEzTo
zaNJd|nA0`EPdZO^vm<sDu4m>=1qU775|7u4ha^>f<jaU>?l!w+5eRC#tCip-_Z@TP
z#`Q6M?dx|p-orBgd0IUp{_%HIclkR=z&OF>_v`=iB>l>VMqzni$eKFSoda!5Kw6+G
zEv9X8k(>DGGGM2CEOAM#uZMee+qP5Cc}H`saED;OhaY}8&m0H<%V3>dDi?s7{&9o_
z0Fd?qaJHg=IBx8V`Q81lRT8s)*@rU&Q*6=`D$IGvwpsf(P5(kHz}m^q?{zf<1#+*T
zO;v1;AcjdGDQ_Cx14k8L)`xi?%l_a;9WuG98)zBb0pEY_5<UIRcfxnp0Me-aj)K2`
zl2ZAO)B1kal)lB*9&rPeS)9!H!=CAinF}pq>NF~Y-{yC6d^YBa1xtK65mBu{=V&z-
zIrZ1Hhq7>eT)04V?%5qW`_Vh^Pv0x^N5sGG0=NqL+Z`}g%1D3nH%`zSZ(gT&FJUi$
zh%?iW8)BT$1N6_*T%x}HzoLjXzN@C`y=zjg<$4Vct8nZYJCf3cy#UUiKQDF#$)X1U
z0v0|MFb<juxFhm8(Bj>9-_7MZBoZ4M-)D!0k*YSL*R#gm0QSVM)XP=fQF|$TUWx^P
zjXaqX2~o5HbcijTq+2ygP7C8~d!jmZWkZBHlAuBWOm6d{{+H1b@bov|rf&r1KMPU*
zaZ1KWe@*NY{QiddN8y0U1=kr<s7r*^69$Ot-~h1#fx+iy6qdzHOCWoZg|>Y|u+8JS
z{cs^YE0(y&e*z-~Bj{WnFaC(>2QS{)Z@X9KuQ4mDi2uC-w113~3iTQkc<SUdSVu$B
zH0%v9n;J_Jz|dDIHmL{|uBk=L(&T)r5OudFi$0g@qtdPrRa94y2{7@h08my_48X#Z
z=Y>xNtmhciOP4N<5NH7xfLZ{Gf7qjeM+kGumA;$^T@>sr5v8VZP!K_N@bWvOiXo0A
zBAR?Z2$Afa6=3H^bwi&^?TNPvu>QkQBJJbj(FD!1wp-NyvIqf)^oPIy``>?;kO@tf
z*2QgnCm~K`GCom!{)qG+PXfV=%!NIECgN6Dzf>*iRq?m1<-iI7O*1Pw!J*H^8&7Z%
zplux@<$0@AS@E3m+$nPw0g%tXz;y9q{eFi7du9H1Rlq9Ze{TTozmB5_nF2p`B4!Dk
zO@liC^FtXlTPrPT#!64)vsh~7V<HmgiXyTWGfssZwKgs)cE3ID!2*Env~U4nH$cED
z2=D+PNmZ=NDG*?x%K)tfAQon0UZe^s%+k49LowHE&;}cg-W!+9V<dg60BfQNE_LQY
z>3SlzA^AmjZK_rPHuY5PWv$F$bXm)NXNck51|`C~G519m0uby6fB*TP-%La;YPib$
z<JP^MuRpBuFU+5LLV1sL^UVtm6oJ6niS%lXzsl%6!!^XO!Yj|ir4k0HnlI9O$j-~+
z640xh?`E85Q&XB7uz>7LC-k+aXNR6H_*R&|CZ<}u)2R!W6rlYBT8X^g`@on!es(<w
z0Xta`m<DS=afb>S88XvryT!1z+OqkJx9nW0h23;d<_N!(`BOf2g1J=pY0={Vmcjy{
z`zk`ZLLwlXpE+|T9FYnbF#z_1a{Bt^imNo8Sh7flaeM9GYAI~Y`Gs7Ot1tR-P5nvR
zkzTo2$7FIUY!akpEt#~nYb6#%-Th+%8hyPNAwZW1E>f}}1p7VlN3YW5E8F3lj?yMQ
z(CQ~?J%3!@7lnYr_{YVAR5)T!A5lDwvZ9dH(yeIeXF(|-HSxOzBrEz-{aC{?A+?Ai
zuD?pmb6>)I8AVZ{8>{O#q7ZOc0G!dKetTj5z{}&bz3+Y@WzrkG|Es^UK_C7ghrI%j
z-WvEavx=G&CQl@u@h5QXoNJ7%1lq=RW}aJDXMGmkkycj$0PHG&MPCW9RE{dU3=o5!
zb_>kkEStz{+9iD<p!dNIL9$Y#59U*;)2B(zC2Fdw(kZEn!FD3mHHvYVumaT5sqb@x
zI9JUb7uc(Yk20DpXedYfodg8?eg2C-@3bEXBq>inv5E9obN`9F&OgSAj54P$7SO^{
zxw0BjW|;BZTCnV&g+xcGx5`z@<nj#OSPHbwx<2!Z2k%j@C$@b{cvV2q-48EaTBCE%
z9=vb|Tv2oWyrq77wGgbp!#wTZ!oT|XNqYbLB*X(?c_4LGfUt3p%nD;Q3L2o`mQmJB
z0BZe=%hYPihaYuqX%2|kRKVfZTW`G;G66pN=%cJL0L!onU@4XXY7n6MSIoVTY7rkJ
zBS2BG2kWy$5uoRmP-bB6*Cn2yv2`cd=QBYq!bJ_y3UD%R6<jxC|HMCI`?}aN=Hb9z
zg#egj5kQDo5hXl#1CZ74>2F@3xWq3JowUr4gVw(h{r(*;V7))m`cg^&<Us8Aui{R)
z!r@BLjhFh?C5ul(6Gy!RR!Fsivp#C)Qq}EVndUR%uXC#(RFWh8@TD>R;KhRz0)QjH
z6$J9lGv}|E|LwzW+P{V0d+ZdQJh2@D0(Y=;Ac6wbb;Mp7YKfp$iDUgo#L#)8R5cj?
zJQfb#)8R=_LOTn9r0sxxFHCUYBEnLv0?^-Q&z=o209Xcy7yxw<fEhHIAyd=LV{RAI
zK{_gd%n0J7IIil^-^Pn%4F>6Vt%2pX^fcwb4r^e0t@VGqdT2h99%b085TL6FpxNE&
zt%MQmhs=KGzWXl4IF-26Z;XIGo&+s_Z5*}z=<iSY{5vgoJc_|&%Jdu#RmdTgizr>~
z$Fhj@gk+&ML#}aAWC+*17RIKp?)gR=E#ceZxqZ=WNlT@o@HC-kzc->c-rWBz0B&Fz
zA8=(}Am57l-#%#M|6biCVP!xk1n_7xy8{pp5azqckkQqhJI!AO$VzEbN?y}y$AGjB
zD$9;7q}>3k;;X#?ZX43zg%O8Y<=nY*&XQ=QE2}Nk%(%r=-B%Gq;G}7QchX|HCcQFj
zdSv_LOh~D)MY8ea6z%NX=+{S{1lqI8@OvyleYCsz`gH>#d|%`gxD&yFA+z5X&(n{8
z9QQN`oB0I4VD#&cg3lkpekaq8Xladq@b`~8Q)jHFFg>HKXWTX>^M=|jwY5ss?8<bi
z#LqHhUscVHtA@;(vrLe|bMvrUl}K&j+`c!tbI$(Nm>zjT-}*bb?Vv#50&oIjg|yT2
zmhfq80B~-_{BIqg{kNFsJU~$3zdpN>Tmf5QSzt)W!%RZ`1%NGqHk{`eFKN<lrO8}f
zuLa`v9H0;b02ctxmth|Oz%p0>)CC~A8(_)=$XcdDDlQ*f303)Qo>>;M<TW5$t`Iw^
zo}#K9i!h$aYpu35Uz@cb1ixnYhdtW8w&d%u5|3UTLdfF+&?eN^4-IYzz%W4IQ65Bm
zUfKP^+i!2t=l|?Ax^Mv*44Kr1K1vK;Gx|*?$m*B2@f{Vd-!8+mf}l`^xrUL}SubmC
zS}1petQ$;40iR={!duZ_{ah6D1Z<15VkL9-b=32-Cee1TxVkv*ka+#(g#Pv^qzk?^
zx(gftF2Irm+?W@%g|fi0+u4SC1tsg>?W~k|^wD+tjbEPxApjPDXa$&NocGF*kghOM
zT&{79q|opF^W|F4g<D1T10Uk$$g4CJaM}rw79Ex*4*;d{hm@t605t-WEF}zJk%CDo
zb{dIkpM2HCwJ>dIN{3chuwsRk8r|-FMx(2>9v}<+zdO?#l=EO$Evc5rh9FZ<yg8l#
zq5=&cX54vcg8|S$K<rU=-aZ4j3origN~ih17Aydov?rLW*)Q`0^N&KnS}^(Jp86t-
z=HAke4J3`ehnHMy&<2;O##1Xan`b1~W)5mPHOsKXGG;#?$duRXY}fxgb}#mc-g{-!
zZ6f+P@;hvVH14$cNoOV8yJbi7xdRvG1^WTOys$+zaBRi=ZyjJ|xfS1k4VX)>oxdL3
z06U$Mq^^L7BYuu(#=cB@$qJ^4H@G$ZCCM5SlM@$o4jAhJ0Pg~(Z}+2t3jlFyuvUk%
zQCL`L;=JfVfY}0|fI-II%Q)9u+aU&EHht9+GE3@+`hk3snP@>C?lsQ246~Ib3!<<Q
zFvK9E)|e&K#HS0nvjjf%63n4v3_H`CM3o&NqpJwjG1WqI7{oCG_-*08)j+O!08IpC
zv0=+W<_mb{+(r8GSKkP}|EQgEJZ7ywt?f_DUj>7blblJD<cN5NP3g=ZDc`pYy!LUD
z!R5?I$<6~Yc&XgpNUgPSkOYrxQAqW+^9NXhYp=J!<8v(_=t=;hB8<fClH>jful#gG
z&pki?TdEIV{r$j&c>rRMzx(dHZ<XZdHUaTB80Xu`kciE(zVOA@>9yCcceHUc>=Q7P
z?UW%<Nb~bcHCMzic%qEv4)MJ3z`c1EpOGX3r1{Bo8Chv@ODzBp0y?~6SgGy4HH#ht
zu;_aMNNq<+Yb`}H0cLP(Hv3~<tN|4fOCQ%Va>W^qG|epC5Ib>gm8W}pdrsl&`{<Fa
zhz+m;Y;9d38BZfo6H0+xN!Sq6Dl@H^zXuKY!^Fh6L*p1S#c5d(cU|+nn<h8}VEEto
z=G*jx7cYh1Ik8P(PLpKfPh0s0fBz&cXPZ>ozJfA3F#@e+#{RoI{G~*!*j6dVTA=up
z(4ReX_qcAJZ?u!6T&XRX=o{!PvF*@qxZCRk87fX?j!F^&5H!(^=W{P?)BE2q^w9hJ
z-(3Siffu`dYmA|$pI+tmzkN{c9|F{^!Vg}&Lf`n-1=`w--&kZERs`^rX_k^_!3AU~
zsl(rJ6ekC>!N)A{jyMYvk<J>t&L%a9_%74Sa62OhO^Pexv<y(vcEDxUrIl*5ps?_p
z0d?Ac7)(f2_)B{(V<L0%$8j+E<_|{cqjR|i2JC2o`C-3JLK<d^0A675$Wf5U`c?};
zWb$i4`$SvoSFI0#;IQrOYn`d_-qJq)XK@I;V*YIap$4%5@kVghZp0gmU*iH`5dd?z
z6=Dzb<X?X2^-fdSN<y0=5}5y*Vg6-pf6M||Rs|@oaFu>k^t)5RJ?7%wRPdr?c_!Wy
zDxyRY4tn@w<%`~YNjRu|<C2Ejn^!4WK8WmcEjMp0V>d!|l17TaUCpujIcxEYF%8ku
zA|s)%Kea>u%^y%_-cREp5={8Y%y-k|HMK9a*4trRD<}YK9E7b~6`){E`}Q;M<WJ66
zMFUISXjTIHy5BOMbt1sn&6Qek?B&XE$R6t<s|ecpk%Wj+8Aq#gGVg=IcUI|e3o!s%
zIkS_D7ZMg;0MKa4&bJbdSO(ac?&Q*?OW`-NG1;inKs;*Ao+T?(`D--Hl$JO-OD0$4
zswXXQWr<K>BG+e#p`?qYU$GC^{cSUuy2j8D$!q&UWP}^)A^Qp<ATUt1``z|s2XQ)c
z=FBZy145+#pGALv;rpZP?~eg~G2m~E4Squ8|2W46%)|(a?A=OU&@_x&wO}qn0=0;i
zK1c<>3e)C1H;Nm8sFEuvKkWo#Z!NRJb?$prS6OE*9$$MJzInE^p69h%O2i<(6-sao
zb7p%-=&!%BP5<E!!05MwFbE-Uw?IDtTmujm_F^Si!F+(l1wu9E)?Ohloqe6o|NI82
z6*U6qZyb|oNt|oy$?nlZ&WyXvC2RCgqNY*4R0!|Q2{Lu3L_n|r*w$d3MpjxI1F*^j
zcmRNiN|;#SUwP$~R~Wt)T!;B3Wn&ryT^~(c=}QrhA|!^d!6SLCwEgh*XbD@RJt6DI
z6yLTKWK8%-y|}6F;@adiZEs!6_wsv~I#+yucK{>T;K02k0z!Fp3xEmCiw$`B$JgkI
zKl)i<{(#Xq`k^}(lr{dz+KHn1r$~QI?vvN<8%vd5SKLsLRtLnPOhOAIF&O;isv@@y
zs(8V*hgfW$(SC~WN0|@OpD#q&vf*MA?4K*_>m)mygsMP8H>;^wUdD9&#+aV`+e_hF
z0tX-j?naP-Bf6vCiuvy!p#9%U;2-7^<`hz9XNwSz(gs=4k~Rtg^;))~kqXNO*jTN~
zHKH8FFXXLCb-ayUXEUc<-K051atp9dYa$?xTdVuEXt3}CfKFHqtiT{+6~O)X-=8f2
zIqF*?iJ`1%lhdHIj5Ev_Jjpr7YYoLP{65F0f-F>&oZKHn<cuI2Z;M~nXJM4KeZ{JP
zrX)(f|0@^<4G!FEz@J(J@LXqU8Vo4>%P;-B({R$@bJRrQx^y-7rzroi2K=q1avIAK
zbM`x?lo_=7UEx^oNacPO6kE?_Vn@zeV69ZH998(7?^Vd!5;Q8vZY(2^=la>VS=U+d
zdEc+<FC1n_7u#u`+c)U-H#g{&pPb$`(C=0Q16Qy+fG%Sk_quB0<yWrJ|MR(@c5XW?
z&n=ohgyt}>SegHN%%ee*C{QQM0H<SS?nDmHq{cC692@xVaJD;k*yYf?_VZUkfV9Z4
zG*K;TRskRn00v$2b!QiV(W2^)YXz~M#%&0YHT_#{g=>PCkL-w*QCfO*vK0;>)o<IU
zwYAeUL*k1RoYF(g`q~Nx0Zbj{Un~~{q2H@D0D=IR0|bHkne5l5M0~K0f3ohFzH0uV
ze3jNWZxkg(Gv*4P`~$b}I|&xt5)d5`EZUP;fuSXcm}l%Yt~oci`_?9g+$6+JvT5dq
zHDF{RqWi-#u@FQ%n|2!fnU49~L(l&M)5|XpCy4^nN2~~#6*T`<u;2dSR+s(3-~U&C
z^+ph=6>oKS%n(Pj7Wc_mTRU0i)npuPO2(q*;!LbyD4tXgLM<>8dBGUJQ=oym6+s)I
zly)uv<f;kxR@(s|46rq!V2!2jxP(-|x(twt?Z!M=G28*OrqeE@rgPFGgi;YmvKp6~
zVcD0>1j_c7i*S+qQ<aI*??&g(yjIH8LPY)C3g&{k1auAfZYDYv9Be#6Z9(9)%rBTH
zyT7oZPg>^38t{kUG}VIbM@-+(?R-ZA8g?i^4#%37fpk5)vWVIig`rm;V%9ASg!Vn7
zvkVF*Zmh+WZ?!Shrg>H`g9%_=_}1o?kWCYUGXiL{_SlX8x$ki=Q=mrqtI2<r+3(<B
zuYrH?_kZQ9Z_sz1d584FhG)XJTMKbB@h{G)xM@~$r3n{PKg|)G4dh#vZklr^F!Qd}
zg+S31-w<j6k9z#^$LDdLg@Q#F0Gu?Xy8&t<AOqa{OttEg37)vi?gv+8^ji2+1Z$u}
zt?9d4Yy^>7B&EI};*CooSq!w9ne330Xeupk1$zgW_7T~wX@U2W5fGLGOmy9QAjAg*
z%S_b#N8`w7VsfAL<g1*7>j9k5NNoXBRo@<N^trWYomyTWcMMZXo#m-;<O<KM79H5K
zG#In?qZTIgnl00qD_#(`by>kc%t5GcHuHbe!1(bEuZD34<IhVgHD5sYeRg)n^fzCb
zWtWX={Q%&?%H)4=085#!rkMv<u51T?KbHDMP;H*ww%8M-%EmdEKayrq6u2X(6>Ube
z7c!Xe%Au!})YUF<ugN1qd{@A9-B(0c0l+;C9))-{V*yxb;=B}C-$_(qz`^)+8K9=+
z1ZYx*n)OD4RH;gSDM0dnn&OU%61DC26=9<)YBcR(#5S2rjRsr}ZUsUEUDi4?=JwXr
zj=0@tG(RKxuHcqI(*o<da4>jS2X@m~bl>+7-Q4+@k;M;8KIHIE764uE-~TpdbDr#H
zN~S8sOMBkKSt;d{1Q2g!<SmTb@MJ4{xN=Ongg6FX`s+{o@8i#_`DOiST6NPWVV)<m
zHoJGP)Qh#?)1A$5L|%pQ=rnPqzQrz&U`7m#1wnyd`}z)j;<vW~{lNEMG5&)EHT@gw
zY!hBPe}n$&ORv+-&1rIbsULrogy*#+NUsOCTF9A!IR&kMWL88;0|%*^Tk%J6*BP3x
z6U7^+q$gOHi4zx{f>!8z*4h5BGoUPb^k0IdaCX73^28HQ1oMjS2B?tyFg%lK^bI)9
zTB}@A7_tg^2@}NH>qQKaC0A|FCdq6F8<qCVI%!4jUS-DrT5z`1tH+4^>z%f9qugWP
z)(UP9z|0Z!h3~M}*Z_TQedC^B;@{pzYSPX0oyS>=;lyBl{7L+wM9*p=-ggwA?S1Ep
z;1vPW<8^x4v}uJ`qv2G+F_NEbNvuZu=87q6-PY%zb1S}Z5qz8}z?O->*}iXjR_n#m
zSeW{eCj(=>zVV(;J3JYdgOz?#ng3~S!ek8Q^t0El(s%!Uy3+a&2{iC;UY>>TKX-{f
z|7Yi8w#@W>VgO%Elda4@|0SV1#@Qs?F?h?)?B($}6`+hyY-w&}=&?-o+#vhztWLA-
z^EhiQEe4P3cvV=rMoRz-Jp)p7rMre@fCvIaA|Oa#ITd4;!YrX9V+D{LA!_NL5}Qab
z456f=VMqF-gsMu!E7qK35Y6@>*J_DqL0S}_iR}ZGKBV99E5p<|8ttU*Vk6NUQJrjP
z1-A<7=SPwl;L~1{0b$%htk%Yea%S4ux|SI+%>T-31J2Y!K&|=58>NsFVEg#g>kCCo
zR9z-xn!`zP)mn{-#p9<?e`mkkj+t8yEN|E6)wG(4tGM@6+-F;-A=F^}$N>*iZ)6Oo
zi#3HT!S2t0JOdX2t|2h+CqET>?*lXX@P}4ueGd{~Rnb)xEday+&bfE!x#uqBYcbFd
z^K6tEJpyPs#L{e#F6oSUGratomGWGholgc|^F*wKXR=eE#$$1UXK5SdM($C-g$ox#
zLAU@qA%H*gna|7?eFXqu;YDEe$zT;gr=iK)Z@(>_5P%o}(NzF2;f5@rqynY=*oC;|
zBo9*9afTjtmFJ}<F8Z)&Y4gKrRqP3Q#xd@{qpi5e9)?bG2yO})0yplXm=us{#rh8p
zptZoy4=aG~_dY+6sb~{mN^5JU0>Oe=dh_O`;JY00gk1DLQ%SMU)wj2J>x2Z`A{9Z_
ze0`@iOU=+teXjc!pW}0x#!2aGX8KMTFdG(SOGjb$1H)C#2X^mS1ZgGS@bPa3#vkJ=
zPUdA_R;12zoBaI8wdwR`a24QLd?OT^PPgf~=eOyh_Y2*B|KbzrWdIGR91@)2@BfRx
zdc9+mo7pl(DH&GSCqvEtqm8`YKW>l}H+s@G^vK*7b~A3}`C%!fB6RMsSLCSP9g`fM
zN~+H)i5dd{UWEN=OhP)<Tn~TG@qNO=?*@o*)PRa}=gu)K0CqDVSP6pfHklQj7M&15
z^0he(Ge09qYokV4?kP?2w9-1v9la**zGPx+1(f=knn1^780%rn;9e(Ia9{wY4=qO5
zh7Y?1qI<?Bra_?6+S<LrJrDxawk`*u4NNB$p(5mQr7Tz9)48X#MY2yg@|K^ml#SWn
zuljE-17e$P2^YR|)m$*iiJNVDbxmxWRn@`FzjY;yoph4@`r19=Z!{X)22EEqW}wV}
z2o77@mqJc}we|bLywQG*LVgWwA3W`?3|PVK0iyng*8F2LADH}cjC^qvz5xW|;^#jR
zg*5Dw5w1;g#>~9D51>4}M{AiJ0vBnL%2}eVVxFP0uZmvjL1Ow=NYFbSod`eG0)Tk5
zPFdw;Sq8Z9E&v=gr}EjeXQdMYq|?-9-GB#bfOL#Njv36ZRx6;QT{)tu1h^t1Me-}!
zNRKo)4PL5i&4O8-Dw>opWnQ-GkC#Z*JQ`d99dYE@T1*pMNwO<A7*MwWgaF-QxR)Ln
zcqh}4hny^?C1EsJ1mt84SerqUy`H0+gB!=%eJah}3X9%4Bo0@&I@VSMu{~?hOg%=$
zi9H^Q^l-M;+D%c@jwifaKVvKtDk^~KPp4a9zxTL#(t2P7yIHw}ynuo_uoMuCV=}oX
z%n@IjC@$Fq(-L~}>)Z6-{(o+LfBF>^(4g#=>HquBy-QDj^X)98DL#Zvd|(|5CjTe|
z1VI2xX2aizjIv$6vugIqdKRuX=1ulcQH!Yy*;-izgCvyKf&*I%RjMx~pW{wJ2$$iO
z26X%>ECWmeK$ANx5-iO!KuA?Mf8>!z=#4ktU@QaFNYn0Pbht3PGDP2`I#OlI_ef&)
zT;q!PR9btqEcn)>sw3tS*o{Ry5oO>GrFxJi_!RAIU(0a=D>yuWAOJjrFn~$#5&{{k
z6IVe*Bj#+L4$IS^#bR@(jxoU^u(^3D_@%-4A|6?sk^xDv2(VVuc3_Di*GWuk&AB|{
zvA2}uM*U*jNrl+7X)b5smFzR=7&QHDq-cIO1@(=BwvJUV2nN#}?{y!f@f&q*c7ibk
z0L=rS%-<@8W^xBX;L_y@J@f2vlAsmL2aWp2{HRU;PkrNUdg|#5q?Rnr%b=TMt*xEt
zXxWLR{m13Ak@@RR6$!X-(yWX36ec2T#i|8k7bj@{95$IfF6`V&^@^=90ce9RNk@SC
zul?GuF@gY-0I<-^AF$9C0QJ`+s6wy7|4$TrEcly2JHrMHG@D*-F+<DTtvFV!1imbZ
zXUFsEY!O$7aAJ*2ULa9|88vvXX-Qp<3W1K1u+bH;vvaMRME9(OfI9#%Zz4Vb%l}~R
z1ksa@=`fZ6avvLJ;+L{TO=1)X9!;>5*fh8Wws&IA^O&$^EvJCCmZn-B$krjNMKif_
zQdzNvaK!mjjZsVMtD#wsTDPQ4?`)s2k2|vUg)LoElB5xEI5BHKFDt)nrHSD;++kq)
zc!#wRBQUAE)|>xv5zRPk0gEk!U;N>$Bji*1@CR2xfwu~n3o!fJ6&1+m|CJ}-q}N`%
z9tL|B@H+~JisJ##N9$Ey|50-DR198dkMyx$#>&p&APpSO_-|CiG%vUoDkNl^d`-(C
z4$bDI(Ix4f0I=}PAFwpb0CB_~%=UWskw&^5aFGt%(7LAvpmJVo+~n#NYHK4>{f>Pr
z*LdK$+N|Vd(OT*)PvRWJ4*tz)Sbjz;xI+M92SFgL8klVdEgnH;BHnUjwUKa*JmErs
z6EvTVPE&=(3sWfSgWvmRNNjUGFg8T6@9FbO^VH#<)nHWDcN%)S-xq}2ZS6+kqAz|c
zybIoGYwJ>E{2j5!@;$saTw0cC*VYF&=hSl09k)BifAi+MAyqbf$7@N5*vtG^;R2ux
z>D;r_Sr(UqW-ko168=Pg|7-Nx`Rn=H#a#a5n8;@wqx|vBLdMMG4&xLP;Idazo9@=k
zUZJMNYznn#e?E$|@x}<)Sz$}J;11MffXGS<fN}~R)wuxlv4w}FSO^OX05(`a3;=#V
z^w2{=6YHce={~}&P6TB4J+IC9eJOHAR0#pbN=RfhIlsqWDVY-i<#QD*#I?n)4e7NT
z%S8Qd_?hw+F+tFk7%{toI||Tvf)ECcU^eYsGFQV_nT%5ql(is||I)QK9|>e9i+9iB
zH`3YyFA+Dey_0qt?1<zzZ6*|=qPZ=*TU@E*oH=p#Tt@xzlpiw0H(8s=<Ep+}*|wPS
zgF7?ybrv_{=@>og2f>HGcYS;+X6VbkEWND>d|PlzZD5=1AoNaxsXuCtrO)O~=g$8q
z;BX#)-1z3Vx9Q^_#mWF$!7iYtf9v;0M!%=O@m5&w7o+=wsU-#TjYAecMEUbL`u)T5
zzM%1ssNN8(rl;u#wU#{FP};nw1|=Mo$_hs{?iwM0xmV7C=$)As3k5obfIIhxz&%}f
z(Q?2t>;{-OSO91Y00IOP@`bO~5uD5FIm#bT36<Jzh#U(yq@ofGT5IxCf08`N=64~M
zDhNn!Nm0fUGPlP7S-I857})E52!spFpp&Mz5iKoCg?C2+B5sX3!iMPOQ(-CU=FKae
zFZyzd`nTUTF}uchiq-?kc!V4R(2@{05H_?~yC-N*QwSQJ%K(~m@SCR<25=*203BQl
z5jf+Wl0=XD(Ndv=Q2Su<&bzK8)QyzXusY#SV;RsFSS~{Q9b*U#zq4$dIPsoNP&(DU
z%ZWtjZO^PIP}(cYtje|7EN<|FamAB}6IfqAoqYV|nOm9vI^H8z)AX3GUmw#q|87QG
z+pD=W4>W0(&7Aqv({IsNzWREH3)5mn$|ipeeBDSU|Bd9vOzr@)Y_G?2Egclf)Yme4
zE$0=rK20z6k-3xp>?$lX7s<3zwjQ2c(!E4*;nJl`qFdA>2=Iv~o{-flfCB<B`p<s$
zvmvtb<(FS(8v}ro?jwY>j5C>Db|1Hhapd*(LVWsqw@K;Cg}K{$H9*TtiN=dBl(deL
zb3paJOo&;kHcP!0iT%+E8wE|*CY=D#nTg?xYh|Wd!Qp|`<#9e)KM^)9iN5KMNg^6O
zCpH<!@8%SpiXjs>?s3j-4<efJPa;ExivWMY6+{$PB~aFUO9slAJ3V1hh%tyu<%+1v
zp*}CD#=7HAN!9|+{`#;<3zD^{1{bRF8d!c8(RVN6vf8>Vfz}R0L8Gh+h(g4K`pH!=
zY3v*{L%N6gud5$svpBw2u8!!LXQuRt-(F=290Jf9q|U~Q7-;){`K8yBzdxGrM`_Dz
zF!->dBjHJ0=U+_zDiG%{l5mX;!*z7l&{nGE3RB7RQs}l90rb>bg)F^Qk-1ooiU3?{
z=;rop>q-)SSaSg&J8d@}p)xT4l>l%+07j1w@`r!;hYacJ(MKN@NS~b*Ae*0pFaXWN
zA`FWsxS2m&=C2XWp^{<}Wvl&oL?G^wEFz02mOkYIo~h?XZ98m*jY6hu`$hOL^>oj{
zmj++7E>&B>odNiw$0MsJB*KRX=gulH3xR%HK~n}G(is9l+!;`xVaF)B5+aQ>nuW^r
zAYq%-f+vO$D#N8|JO&{&64$(G3R}yST9nedYeNh<1!axWZicqMlo`HZ-ns4c+eX7W
z+%7~ry>g0a*~)P(&=l9#&jg0jn~$L?I|JqR=l?9`1_*?|V@^N$>6q?+U`oHT$`sfG
zUGj%i$_V!R(=WUlju9U@SyW>7J|w{!>4rYZ<gdFk_QhX7VcleKBWvJXJtY;rWg@?V
z#^NavweGo|<fa_1dnFGr(q(|)LEuLLeLLDa>)h1AWi3&1W_HVi77v!f0-)zuVgUqH
za|>>#On}^J3|FK=ip0+;ZkD1HKsD`jo$T)lKMJjl1gauBb-8j$`yqY8Q<dlH;fSYF
z-oZ`}<{q*}Qzr*u2!atL^Lcr=S8#9;`CDS7BgG{y%(WwUAx>Zta{(Y=ZI%KCK^Q~4
zKr)Rx#>!-v96O<qs3J}opUT~=U8f9cx=6t?Ppe6f$0sVBBvWs^VujCKk752!Q09gu
z&1h9wunnsMcDgZ%s{$aTjCei7w#4mTsW-QpAb!klXZ~j<10k+!``mYT=z;f)=%I)9
z(t+av*6;u0pIoP}KKW+%dzw~NjM4~>Q<Q&9<*Vj@Hi>;GoBX}mzGl0YE*B<FRA+OI
zv{}5sY3a9W{A?8|uVkfKBJ$*CEzXD?2uhm)<%@_*+u7MEGsgOz78sTy20)2bCt1l$
zFTE53F78i}I!VHk&gXh5{qEP1&%{Z1Hr&*jR&6~wDphIwaOM7M8)QwoRAv~qa6$Y^
zbJ8-}HgRuhjP-@V7l&wEEK6C%1Kb%<!c$W7rmN;$zc0+7leJU9r@z+uuK|dT-%1hx
z2yTlecjJ3ZoS3QTt4=#IL{5LlL-qY@VKOsvbKjy?i}6}K&oM0Ggp!P>RD>5Y#9W_)
zdt__tT8L%9j*8KWg|WfPfKxtsk9kceB#|ZFrknem|FMp7aNGRtQ#*8Zl_@ZQX2k@J
zIrr?l^cR1D&3ty!-RNrKv<oH}{v>7egB2(G`Nv5h<%s85{1SsZ6uyZF8Ns0Zo0@XW
zK9jm6Xtt17zqMXvi<kIUk~2!@j>-@!c~tXbdwT6P%lz+eM2&VXfTfTemtq+p1{28@
zU|vBGAmHM~i#UW}z_1KOc49)O3RwV-*ik_)WzM!NF=dua2Du8MC9Tv$<OaSSW7f~;
z)(QeM<*BM)BWW|zi?7=<94t|T1wc1QTEXGM$Y{X%vngY=%B6wmBaJHv5Ga@TuScNU
zW(;)O+PWH-03xcMOQe0#Da7GqfEL5HxRiOnwIs}WzAhlt$9a8UCzlo>BTmQ|pRk=6
z{^r)ZF?(WXr3%4(8}}w=F)2!zlsZ%hvVslA%6)rzD(r9m=Up1R3p+a#`s*+6tbzhb
z^WIPBsi!Z}x4-jt*7~7%j7Pd1cIWS3+epj%Hi8g^?;3sn<D#(SZ)MzPYK2MPTpYez
zVSviF>M`en@J`VZ%5*(k5|1<fS+WvmX)w3R!DR%VbNck@p0ois^)kUy2mk=At<=Ho
zOx$V#2*H2|O4*U=<%|sJkCRkKUMVZmr}T6sk7^bP#NcAZq#f=^u^u_|Jz3LId5#N}
za?&RyI#d&{(vH4^uTuj7)kn30!vo2bMacOknH|w#su&ZIV>pwbF`e$riir3>-Qo8n
zB+G$iVDsk1m={0f1=x;J{u#eZlQq$@Rs=StD~Iz|eUhSO=RiI-UA0m}OYjJcdluXr
z+gsOzFoSjao81x1B*6sM!m5V%gcQjkFw(S}>#7BtG)8%7i3i8>Am)Fr629v0JKVT2
zp>KX`hmJFVmR!EFO<(xpTl9k$FPANON%3WzO#K^SnO`_+K7Yo}rIF3#l_mypB@tDz
zf}h^CQnkhFEGX8(V6N8JRV!BPpDX34Nz=)>8@m6d+n2#}6rhYECJg{374VtQd}au>
z77CUk)D?q?XbKGO+i$;(ZCe?Gu0ep1#$@aI<w$!?5R~=z?e;lTD0EtO^_(TLbLtcE
zI2W&pucN-M8lyfZr7yM~hs!NwrI?-)QK^Uput_>ra2OCjng)@D4>{{Fums%~A+2Vf
z0ho%3obuXeESZY6cmK`G^1HxfyWgz21Ckb|Tc(DH{E_<H6*a`off9VD#I&?;ktxyU
z{1Up{ZZ^eI!)Y)JLmfhY;J=4P7_zz{J0h*+ZL4!-OulY=ZpWw|VJ>sTS;B1jJb(5%
z|B3QE4CC3o!>g~&==;x2=_9{>{9<d6Z{?>yyG-AH27(=B>qw0Co5bAxi1bhXeqHer
zEIJe;{fRtXb3$JFI7({=1-$%y#9eo;8zLX{DMg8@am&W2<nrY4rv|;WxpU=mby*&M
z_~EciE&hJ-7k`mH``OR#!X*|1mSQ_#04xA-16Y7hN{e0Jx_tS)@ibmB+CcsaN+c+g
zMBi9NP?|);GU)~Z(xcSK#iqD4+EfIWMqf<ZJz=2HM<6jW)xi~j<0>fdkU)xC2NCSZ
zE}Q5haY7Vne8T{lFAn2iUcv`W3RBhXYZDo;1V#+4<Ywy9G-|kE0Pa8$zo{SRyy}?{
zj}lRqxK*y=G49S}<~oXZk`w?H*Z0R1#USo+wZk|rb4;5d_5APE{7^>h@V3Rw?eo-_
zH}%n-AI$vEd@3Yfn$Ytv>~y}~DgCn#9G|(=DO&XR-+zm~_xCsB3Kovrt%YTNBaQN3
zGXa0=Dat?k`}1d2Udvnpqb(SG=rLcb7}eK_dI6_Sz!2KA;HOrBx~|Y}qYs^8CD*Q9
z%e^^&{(LB<7Jy}eWv~Ebs)AJj81zoc3N8Sop`3-RV>8;m@n(<)#EF?z4UWl*yH;+n
zXV#=2zSJpA1}^ERT_$BP5_r`xDR9`sFl8gO_Z_9Qm`!<Bsrjr0HYgC5lHon=S8&^q
z9Q~U0%zlZyVx-o9r4-<vfFKcPGVwB!AyLuXLxFM7v`MgY9{Yw-Z_>`=B_UnUACICa
zNyRmlmIS6f+JgmQGR{76D@62pr}R`j8>yY&Cq0xG_9%A<7r6{oJw{{)&^BeLH`!<R
zGylm5E3r@G$%J6^_}1TT(F6Bmm&o><J4$f#<~Du#D?g!^{^?p^@_FnNj*NoVuR(q(
z(m!nWqq|&2T+SLfn-wcg=*!YUEawC{^Igf(VJ+vATX~Bp_v#?>v^-lKSi3QE*K<`B
z-dw%(%Bwg-3;_7i0}niaC4jQNzP@ybu`H_q7})?d&0=S6f!{@w0foPvovB1(9G>!(
zIDe4d7P2(WQG(1|4FP7OQt0!N{*2fuimBOw%BK9Rw3&(Bth8Sdd?kQ}You~yw5{qH
z1DdP@mXg)VfP)9S%Uw1z>gtj7oyzAiKP;m&XhPiMtds>q@~c3&R;R`P=Ij=T%d*BH
zB6~gOU`Jw|xD(q3P1a6?rGRT|Cmli0+MJ`>;)yhyJ~(1*e(SlxSvJf+3K?Sjz+!{}
zAy3)GGk<0?M-F5D$A`e&wvS0r;FZqb|G7VYiT>%wH_IkX9LFF;`A^i(pZ+wt&nTL}
z@>affY;`;{!zbzqUk<;y2ngx9{;0bNM&@CvX(lr@6uE>?sc&}Wm&M((qAy;1=gKR=
z1z;=y)L8(+Zh$xkuUeKwKue((N}_z&Xh8=VmoHz=t{8s#<<GyC%93Z?xf1eB+Yru*
zI2ERC5W-^gATzDoAk#*wPYLy0hFl1b)Wj<sYwdqn@Wgu6)yjawfylGj6Aqir%nFV6
znq_F|+%HS3aM+?}s#9E91I_D}(-p>9nu)kA22b-lnKn%$<zF#s)EqptO;gtDjio>u
zbFNjNRon6__lf#J&HrdUkz^T}(6t*Q`rEH<(@}-j&flQV{m~EUor}{TC>9(~yH$of
z{+$2`VR4j#{l=4I6^L%9Xnjgh;~{eyUh?0O@-?aiFk6?!Gjnd|hY=3TqxAw(>5<QV
zn^NY9sqgLApUsTtzWeSIEKA4vOE0}-+Y_6^W>H~j?83;d^uG7KZ)OJ>pjLozWV!6W
z>KrC(rBi2WCr=n+Bc<$MWvzRTa?h`Wohn$BP1nR;7MlBh2Cy;!E&xqixq<_NL>z>T
zY2I$qQxQM0*Q}LDHKZIsV3@CJy+aH?)lJp{(Il!BAr|pOGd-_ekfs3Z+sg@5dKLgG
z*HvS;LWHrLi4%s1tQDm+XMVeJ;A)E0%;DhXKZ;;WTbRDzx-h0^&K*4=;QP<LOMm<)
zFVW>I)1cX7Jfa0?l-KveQlx%=xB#N2KBAZg7`2!&uLL!{R+#Yse-t4e`g|;=N-re$
z+agbYU42~c=WpL=(7sdX`z$bjtO7uMRSn1t2ws(f02dw>UI0)j%5De*iR-}!9}MXz
z)BPZA&1SPJS%RcK!DNRF2029x5mX{#2z5iEBxk!dh|@rLvZg9W+^K3q8iX{J=Bi!y
zJnae)0uVD{m#?f~pP)?4X*|ZI;|n`nEM{^1$XvIS^9pgZ#sC&?rE16WLRVp<e8Rz=
zwJWvk1N_7vE1`s|dBs*hBJ;iuZ&SY{J}0eq>f%<DGeX5-%>P&<2ERcYe(^`6&P0B6
zmIq=ppC`ZeDos0FhD&IaH2P@vUk?F(#q@t7ELDxGRo0T`ZzVAU_IeYAk|<#DOD#m)
zB5YOMOl^5`{ix?cC7`+|og``M7biq?m=I#k5MKqQ6ISR87cRKU(`G=j=xKlfOOpvu
zeJYPW`Y65n>Z^=F?*=~)@aE>`OdtRx!e(@<@@KUT%h<yuLogho+RFL})RS^WTJO+%
zj_YS?!JyXU>b^VJ9na7nmIq>4&q@e5KwxewsN|1Lh*XZqYBHqPsP~&2<jR?|sYX9G
z)eF_yjA4`1Gu2k-b7M#<e3T)#gd3IApPqHCCd||xL_5wkLpY@QuUQ&3r~R|fN@ptH
zeeNwXz~BFWKk+mA?)ToH*=#4ofpF63V+=?P^owyK8!6g9ruq#5%p#SEyfq#bGY%k$
zxu#$Bw~(57M|zlZ3D~kNTHKQz+WnHAa+mtV#Z6`VF`I2^7C`CTs(j|mnV^gUR7qVH
z|36?^VgTwv&4YgJTFKQplbu)V3C!eWx;DY{_j*Vn)YrsnBGH5f@idaJH2UMTr<xeq
z{>_&;ngBNdRs=u@z~2h?2iEuRH}5ITy7&2dvRnCh{>%`d2J!NVObX%S3NhGNs_I(G
zRzOrKKH|=6nW8$OW}Heow>jJUFHvdYrFD1Al{G5iy5t?0|3T>Mm>_}zzxMSVy0)b0
zf-k&%Gi>Jb=9^bK%^%GFbV%7d9>pE}u+=!s{G;)DO6yy8@)L_UPN<|N^L2_%2F`Ne
zJjkvE5s#+1_QtQC5K4VuX&12V^c27m>8(zonq9hdDOdnL@PQA&O-)Ze`DDmjlVV4j
zXun9XG%)~H0Kj0^<`t|Em`!(La7KkxL%L18!2aH#>QR-OgI|`ATMeYwG&Shy9uT+Z
zb)db23IVVJtb~C5LCJ|<n7!_%*H_{VGZ_H3<&?zA5GGSavl_&g<Bo;yexplTmv2gJ
zHLhGdly~&}+py$%7`SR!iEAZ5a8<l?klXE~pJI#pC#$FIh1;3`X<bA&myjv&8{b;K
zc!2Lecai?|^RLpycW+>O+mL}WB$P_I9oYE%C*!!AAGCf-%S4^wrsaEgV8qgf)y0nc
z5S?rT_nMbE-@dM>_7*hn<w{~(GTVAJtwsx1I}1QA)>#1PvBw_MBtW!s0UQvh3jos9
z#>R$VCBR#6y~Xc&&wHek!m_VrXL|Fs{7kc=#!V4R!wXH)f|fD!F#YCT&ilk=c+&JW
zH7`W;4GOm+0z@GI?f?V@PD56=72Gy(Nh?dl*-R`g$~3ngUoKTw9<R%#D~cE8abt(5
zgv1^W*1mh1E1&c<b8Atrwdbb1V;>ySuBHE6!F}nyf756|hdloy_s^#C7!<fX@c>Uf
zeSyB-`TRFGkwj`U>`MV*CamoT)9=jwYZGJgpRC2@fEwUx0$n4XNzYp2@l|~JmT!MZ
zN;8(L9vvj9t4~5WxA9m8nyGM3i28j_ZEbBA9s}qs0P?=~y-zx~H1(QWSpW_eAYH-C
zf^8Qu=(uK=x5;$tibs$Lnef#FOU<TSt~nZ!#7V1J)I_vJsA}Znxzw<Kh1tIYn?E7{
zD$MP=b^~Mwge3suw-N$w50pv7HTT@VE1pT$&pLcDAGBt$886L;cerhU%{8G0Z?(@i
z2yufKQ%g}|>I)1GHzmJLW4<ss*GDxtb-%e$uVvcxvxh$a(>)acusra^9})fIi;FD;
zY;Mki$^ZM$zZ(R#?VTIZ>>rZ&tOd;<K7Y*%0T%%0UHnEVEGd3F8DOTCS=I<r#!>w5
z!FnhO)al!>FEhwprQZ3EdV;cTV>htR%w}7c5CZ_E6ST8aLc?`frQI_z`1RHa^Bsnz
zSOuUl0ATtsvq0L?wU=0S(P^^b`%b6Zmzl~oli~SXPyNBANcDDxH^3u}7$Q_B6pd(B
z(VCu*)b`DWv{)i%nZ4~mjr_9L0DuNS2!JJEB?R0aXmmSr^_SqJ{`aVOU;?|zlNb$j
zmW|tr$F-(X%%z!~l5R}o7g$-$EJQh;3-to3v>rofhnR8e9ZN4tJB3+@X`v<jN>x{*
zjU*h!RLAT&bAI;D&3`;N;*=S$Go7D%c1G`9oY5je4EB2^tn){(-?U@?>+7e3uOAWp
zlavrjck;v9{<zDZCW0ce=v{DB@;A{!d6{1|oGoEhzmmlcgMz+{R9jfvE;ZsvYT8>f
ze<u4T{^|U@okI|swyweg0G<<g&%5siVJhdRft6azt}Flt04hak3;-+ukhW&CSr}Yo
zxzio+E?xe~KgOqQcvB%tT%lV*wu0GG5l!m-E+niU_*F`3o$Q{VzE(%&x7N`kmZE3F
zOy+iBB?R0)lq`;#3OF)@oOajYi@5K~?ns8PG8MtH##e2(j|1_!8L<-rm>lkrGOvd3
zG|v;x^%v4ERXHIqd^GUa3b9ONgA<+L<Bt7z&b5ci%sj2en0n)T`19YYq@051{`TuT
zbalak0<nYN|J5=7D_3_2dqvEqo!Ni=R9wQB{>J0Dk&m83Fv!fGRkSQDQK$r%TA|C8
z7mxq3Y`Ic?bFRO+8IZ%Z^i7034r+?)1YEV%77y%wKj$LOk}uO;*xtTz1<BPqo+D{5
z8tG_gK*o+Y$>Wbdo=}5V%K#4w>_DD9dp4ZE_10UEz9d-yLYW&^-?&sy*&^Q+WGvDp
zCru@{l%Eq5A(9bT@>6703U)eJiQcvo#S(nCBfHdy=FeIaLM7Y&M7cd!2?4hc(F!ng
zf8lz%uQ*}B;z!K{ExRfi_}SH&+cso*n3-~~d~R8Mr`)_S?fY7(akb!e5p)f9iuT6#
zjmWpy*T=;6b|7k>8`tjS{Eq?-u3+cDaR>^0@@rUSvlz7hJI}mBU;gUrsXx-WPqDGj
zsi@_1%<LD}^=|}Mzy!&Cg67|u{ArYE4QeVPlv_DM-rsHo17c6R%67mxg;b`IVyLM3
z3aJ^+bd`zSH_qSEtjH-(3$YUe`Z%fQzD~{pM#0PY{y%;7%R$N1q-xlc23rAR7eD}F
z(^ks>4*_(4WlabC&_fS}!A~HFYX9+%zwjEROie{zO$EZg1(Cuv5wZzNiablXboxI&
z$>wP;$XX+C4PG~?(AU)yOK>F;cm~3zUj<S33o9XDACMf~x^a#tl={8<1g@%tJnL4{
zXo*)%_R@J3gyos2y<+^Sj?7BNL)%_?-k+6Ll)&5ixf+_%8?Td>cPb$=p5(iBnH%kw
zKGwavlk-3FG|q{0E?t_0?St<oz~BGpfAt!D`<V+_NZr}K8O;7*`b=>P->@&lWFtlU
zuXit^+e+I_&1#W#7U_=kagq&AFj4-?Ma0%u#+It5^jF2UO-oitlp=6)i{27B;SRq>
zl{={_(HA!2)$>33*5!@|&9D&=VyK{af)@d>6Cm8wD*@mT03Qk40qcf9=~^(uASENV
zXY4M?Y!)*$af8f?`LrIV6xrf=)(l(_kT{~k6*2K_m@_d{)M7Igl;lX&H!bOqHWjW3
zV%Fwb5w*7oDg<CmS4#wU2ew}^`#mEw9dT{BY2$|_R7?f0N(=s6G0@=yTjom6-|h7!
z>b0jhX6s|J!b-tlF*BjRoXg(xxAa<zz1P|nlh?TOdx?-#@6Bc;^0<kqw}Qi(|6=OU
z&6j@gBe`oqfx+MZxmP-cC~j6Woo;tN|HS-9DVyJ9BQNs{0f0HsZ$$cey(V7`8T&D=
z1;+eaxw3gq@zddxc}`q2+{3*pq>5a_)w5)3cDNzlI$)1luj09W=h<xQAF;U_vH-?m
z;b89TECBgI@F-PQmjRY(6##esm2d(JK*y}YAY(HIO8B(7xk)qG3MQH&y@p2%GD!X>
zR_dCNEZK&8TuRkQ=_FGzf8n<!8R<-Xd8?naHGZZNq*PA}se)S$RtQ+-3D`T7twl$a
z#eweH%y8lSr7B`*w>GV{W5p4x%fzP*lT;!mea6d_t06B`P{vi8J3uN5D!nrmP>2c-
z?U#zole2_tHRe)Q6t$^TSLRfUv6Mp@hnj4wC8Ph`;m?0jra6kE1R>zucV``QxU)fl
z2=e>CKlhVP2#6Ml*>ooa{cUXA7Zdxy*FTBDepvnopZ{bMH}s(t$5NI;Dg)G)?6;Tk
zA%kb;8BfGKq46@rWvtDY;Of|J)V9VwEj|kkVg(t~GqJc-o~iLn`ya+*id6G{Lca6P
zJ7IU)M<0DOxTC?N4pJ#QY*-or0I8LbY7v8sTmaa95rACGCL5qkw>QHL`!RczQOvm^
zyj+zjEZQTABbH)r)az6X%)<#gL5k+psK?W^{H(NQ=03m*0jspZ!%&+@?KuzOfYFvs
zigBv-p7<|kBvfKqn55}LMKLv6o*Ot~aa@;l*)}X57J;VB0CAQ0c5{D3ijW|N{n|FP
zZ;5l-{$;{l%~i{6n%Ww1lbGMh`5%RVs7Yg8{gYqczGFdw-~P_qVVU37X6!jM|CxaK
z-<vY|ZR99_{9$eXXgtosYj_=zv<z9wJ5j|XwQORfZcK^nz!ltbnS%^Q=5J7lTd}+t
z5w99TCObchLSzL?+fUEj$>XdQcx*Sx3)0&=H~z6xGDC~?R>0s%IEMvbKP$8DG)!oT
z0J|)uWftMPU@0Ir1e(oe!EX)sz|PL~S2yTA@uM@DsQdEu%jP6Tx=VjHJE<h<R|lAI
zW!??XD{<G#Dl8OBbW}THf89#Nk`v67I&1<pE7TqazG=q56zQ7HUMnaN{lcTf=w@CH
znCS=3g$qp-|2scp2_73_obZr}VohQREPJ#oIMnZR#PMQ5@Px1se<qJ-fe5lV%}U>k
z%&EssbWC!+nXzzT*^YUL>2q^@e1D1%tdDqW!0pa|+UNX_JZ%)L0n@3_U;oV({kQ+k
zdS`-fe2@bG`1_xF`c3-54=?AJ0Q2wk|DgFp>mTuY3i$IWePiZI^Ujw3+4oB)eZh<T
zm`LL$879t_W=ZC-iZ^-Ol^&Y!W4}x6kH=s0iOY|SYs0jeRS0R&?~D@|&DqUYsh~=T
z@_+Eb2L*fwtJ8chmIIb41|V1fkPHYL0)5~E9|!~8Spa4TT<bK|@KLT_`}sfWCvpTP
z&v0^tPfcD5nK%o14N82lk_A=j{02YOk*Ta*HWRE~ip9o!s(rAG-?p6lemHa`3jy2P
z+pDdF8xZj#Mg5OPWWP@S);#!d6WH8uSrJ+q4mI$XD+278tV8%32R*^h{mQ|7N?3_%
zZ7E=+6&OhRT`2!}jhoti9UQ-UUfa2`2=hOE1SAQ%adSe?JbO6t0Py$!$rpYWn12XT
z3_{0NV4WN5_l3kh@cU2JO2FS}q{)3)ds^NjUC5tF%GYAOMy+wPxL@k=NA9(wXvBu$
z)^&>E<(Xzyf@5x{y3Uor!nMF4mB3&ofQ|D2T)A@o2Ur3KK&({S2O!+mG{DOSOBDlv
ze-LGP=FFKubusuU6O&GF`st6q`UCfX3?QU_d@&zhp&}GK@T@~TN(@|obDlD}&YsV-
zZePntsYApQqneQS2>k(r3IVXPz#6bj=B-1AMD#Vv8i4U4)i8}VIrMx!S~S#iX_RNf
z*bEb{1p{ZAtqG;+ySvO4zm<afE(wc}V+N&7G0c#>uxp4oiKo+K+IIxl;=Gh1PH<=t
zbbI@5%>Qy(^X5<J<sTFM{c{H^1iW_sdNBFF{q|;Te-@YdL91T}^AF4VHsbpJl-8Gn
z=AZoimLXf3C5BlV>A=g4;siiuiz`I-cELX>-!To83KzMY#an9NbZbt`Io86RxOxjB
z7X0LV%=tFb{hz-2<Ug7$fZ=Axk3vkEZUiJtMj)3d1|S?eA4|pw41UTHkpJF!=kG6t
zrGS~}NHI5s67@`vFsLz^--?^PU8D@sS|qhhz}>Wj&Z$|R&XA$BMF>z#t<1xR2)dQ<
z+S*!(xqvXR0z)JWaSUVQ1T*O~8A}-x9@Ihto9`aZgdD+b;j$P!W44%8s9we)W$NBe
z?uaDOz&V@#n0M_l+SF0&uFyFlVuT10BTxtxp`o1H`6b#JhxX#eZBgdGE~$go{oM02
z`svROMhN)(?_cbg|7+cR+}6fuXL=*7?O$8JCq?<2h<|M4gMNf%eypF))2Q$&qHje`
z1JSFdY#Af*KeO^885gW#R0~sLTh)%~6^r+!7v^IOv_a<nz_(PB0vmj_t|78hqvI*>
zyz#xuF!v{GXiz?pkH*%_%M<}fOB4g3<BCj65CR@~<PqtJ?X3F%V+00vrwGhIfZ(i?
zg1<dkJNf(RNh-&PHQ_8tyOKDo^Y2+q7K0%PS+l=Ooo3-ijis52HQ%H74A~IA61*ku
z9+zvSBP2b{K3q`q7c2n~2oNlIoB`uMO1M+c`ttcRmCH}*H?_Yr!A*zWraH>-vWqBD
zK*M~>QSsSwwfNim!u1`r9q$_8$moO$j@PPuHqjblZC{b<`BaY{JD+YPi{uMJ)jSgN
zetFIuhl)0tdti=xng6zr`=9?=$RaEcB>L9h?a%`c2;Fn~_W8uuJAeQ8pNskY75{do
zH$znaB<$)pNv8hj>(@W#G6t5u#H()L%h;NsuwzH*y<~vS<-zPpjq%*NJEezHY<}vp
z%wlD8C~dyGSBC8P+WE(1ts12oHuXyH9cI(bA0Pk_%gvDmP?OIg2S7>!)Wq^dhh+)^
z%wHA>7tWnK7tZzO&d$#1^$t3OS-Wxb?YB<yd$qW$q=l<FYMLmUeM(B#3Xu$v&UAPl
z7#eJYk6fJ=`KX=%jkYWUNab(b@R~(us-nXJya)EchCt8_IV}rdOb&8Y1AMI^_>ZRn
zU&2zVK!~PU^P3BnKU>^VCL?W}aY1M;6{%Nub&a+EH_0)%R$Z-n_R>~eBr%X4Q<?i*
zi<r0TF2g8&##5J_Gyh>iACIpe!u*c~Z|-c<U;O2D`h$PB-dWUcO$fMpZHNB)Z(gSh
zF!`r%wKGF{-<?kDKOIf`%9uNM0Ps>~OdW=HmZ|9=pZ4J25rRe0$hrL;>*uQXk3-NJ
z_*Rp(GB&f7l;_0vvwx4sDOb&NB9DOF$I0~j>ech#>uBAnj()6lO6FFlbiRA>;zfD*
z;fKTEfG7RO|M(vTIRKW)2(VPE0FpEv1{~`yUwY{!L}AXhwzh&_rjyWSkO8h;ef2vh
zPd@nnOHYwSOmWwWG|b#@%OO~}=rtj+2vZYtMKUSG+!7SFTAD-5Qm5tdPD2vgsXLEs
za0Nj7$5I%Wywx&r{J;o{AhR8jrBV8DNh$3!ID=P?(AaARUyX@2Z$4y^!xFX@LDWhu
zf*bT?ZCeqH*a*UUzoOkWuFt{2tl@|9GjowD>9)~1;!10s5gDsN8GA90>ThoL?{3cj
z*k7azh8Dj=zxUW0?G?cE|N6^sbc`tF=IH92&9=k4t|QVPIsHR)|5{AzYoh##HI!`*
zt#Ayuyia*8PUAvkk;~CBeRn%3fp!_>>6v-VV<f%rlfS1OTVrjh1>`JjxbMLO=lX>G
zrPdwN=^fzOwKrbGFYp)So58Zb2OfAJ+(U3W6w+V(#b0FOl3of}Dggk1G>Ztf=>8ym
z<zzLTc6Y5a&#ZOGu=VrTo_gvR{)_)QKHvysfxc$dpw?`-Z~jf58ulopsmDpC+?Gsx
z0OU0jWH+%r-9LG<oqF^zm+zr&2mx4vi4B=T0NertEC2^N9x<Q#IBcUD)>rFJY9ZU&
zI1|Pk99rX8%cS1Ijdaf|whg@4Gp<L_OOwuY+_M6(45DXWA{Yj4y4TD*+m5(KEF-Dv
zBk2oIL;bnM`5)KHpDfQF(EN}6o>6r3P3hfB+w`NCm_G9ByOZGWKX;M-?mHJ~Ym-C&
z&=#yuSk8xFzsUU8vd<s$5CVX1;j2PUe`xG=N319dF;N-}_}z#4`&c1h9t_di@_5$%
z-Sv6+-l)^HHFUp}8Md~;`tsWMx^cbQ(U9%ar%!K#(PJ4p{;)j#vBw^pb*^bCE}!Lq
zWm*N`0Lv~g_*e;W`SNAyeA2U&no)lCv+rE$OkS^rf<<L^;Fqm1HNRPQP7+nB4AGDl
zX9I+!s*96Glidugl~?VWXY2N;YEKFGT>xDnFquq3T)=S*4pj8Wnru4Sc#T^BEI*}x
zt{Q){t)%hy7#`Q1shVjqSbcyT4uvZmD=PTuI^l(uTxI**wk^H6DC8^|b@ztmZT+6P
z|4a>4o)%&LM_=|R&HtDw@cYlt=$~Gm9~20G|JR;+Gj8U?vC^ICW?=Xm8}Eth`X`#;
zXFbeGxB^rQ7NUlrQ~J~%Ya<K}@=RkJaDzAW0Qt7KdQQhSeXrv07l-S;X9}UDJ|1!Z
zo%)^0&)Vcilr0|U%k;GuUw9(q0O%AQ<ud`WH9fBD9stY4`~gd40Z0-WBrbXDt+&E8
ztOVHJ-j<!69bKy)G6imLU;WO;#+m<ePHm*f)0cwFy;#5qo22K`XVNV##F@P6b<urI
zp=Y{s2wp?#>mIeS!veSfKzJYoAXoOWaS3Fa;+cWv7nK?~fFslw7VC!U>-95QZcnIC
zF)>p&=i9duVp?gymg$qqMU8hCbCR{ba-&Ybqu0drH@DF;EAQ6l=KS~fw9bQ=|0)1s
zu0U1}>4Kl#rU&jH)BX2*#)0HMfB9GE>1VH9%j1BQzS8|+H@~&DlO?GS{&-T%{$VG-
zw$|SlLTOH)ISoY-4l}y<1UoxW10TT?zmQm}G-)h5&eD>;MIJ+%1G2Yy<oc@T`NjOT
z?y$3U{oCf@yacfOn?3T#Bl6z&zBk-krcHnpmdyeXmRZ<MfnWu|w!qy-fmEiOLd%Wo
z@BCeOVo)N>=v-YhM`x9?2v2(_G^(AnBvWM&(&&ScunXbghTytqig&1hU_e*{;2(n^
zwq(m;Zxh6P!lb$AQ7Rm2n(Vcj29C15PkrshRyBYFyM6&}V-K3VdnPv^jrOHod`R#y
z<D+B8PW#I16!NvYE}q-pMVtTerK8nh+-dV`^yJsKT)JSa>wn^peoC*tc0E;rDD=+N
z*=fb__ph&=O3Z&F3juK(-%+#xbkEH8Gyg0@m(q31BQx;Ov$hQTW22}@L!i<=<Z9~1
zZxMR(fccOPulx&Mzy9WTDDs}5`Gd~cLRf`HOA1RR07zU@MU72?L_Z3aY2ZV>{L<&Y
zk{*f=SBUCatAWZFH5Zz7mU~6PvFl68Be*4=R?V<lPrf950e*o{55xzlnE}TlUqH_s
z8MlO$mKM_dP?p0NdC*+y*P}O$FgT?AHd$XSKuYzb44gOmCFZ2Z6RjJdF3(l_*4J+R
zu&Iv7cm@!?C6G|#+5*l05~Rq1X<xrSqHlaN#xnizM>put{_|U5GoQfFyZ-F#+z1JL
z*4FPW>-s~?Nm}Y>cJfm`#sht%A$e%dJ-<A(zS(%+eg*!R$~XJS)id1hM~h8UtFG4X
zR`=%K5c>HW=hOlacuQyLoIUWs17Qguco0fKDTM`K*%Y5;a{+`Pi_d=cv-I-IFNeW@
z<&{@LK4!!M+;h)8GM!Fm$kz1MThCsaPPczB9<P1aDawh~SXt8Hh(R(o$0n9k^P(<q
z5uDp}hz3noTvbZcm%dGX&;J7_LNg1|!8bQ=By-_S%Ko%DAf~Z?%&-m`(~!hr%j0}&
zYqKL<7lTUx`-~Soj_35DrIYz>J8f#tnwJr^=ewkKuU>tZ?zuOmb*1@Gf6uypX|UT|
zhIwv(a)sg%v5#l|gIiwN`VKtQjs=<5AzzHE$x?qGuKVsPkY4Rv2@z$kl+omt05VVW
zt3OcPs1{kI`LE~LlRm@)tkGK+w&{z1ev{sO>w2f{kHeDEo$Z_0f|k}Mr-R8Ke*X~c
zH;P(6kH#sn&$t~HBDAOPKldF=ud5fNp4`ZddhKIGp1-%sbM+2d&b}X4b=kJ4uD6F5
zCT|Bf%2%*6-F)#U|MX{Y^v{rP9y|uu(=LEmiH5ZFp`}g$z%oEuCRn;v017a>oI7_e
z(v?myICJI<n19$40PX?6&CPedeEQTwAFd~6A=EUeL9;<VPr;;346CC9afecl{X`I|
z+9RTz2*Xy;Ajr@E!ze66!+PJH=?-n)yb<;QrxZ;a-=}n>5ad{y82)Z|?kY<d#Sw5k
zKVo)LIj4au!Ctc<1aR<Au7zx9+OV61t(UJqQuYVW><Chw+@y@FX1OcvZEs)c%%{`T
zoMvg$7b;ZR`k@0GtT-6{ok@H?nsUA0RTH%2c8I+IWD!B!%X_HC)|JS^l-}Ll*YPMt
z9ToPC<Z<gUu_W}B@5$~vEAlFi*1Vw>7U~G;*8FM09HBEjYpH)dSE$dJ7iIp(y5ka3
zF!76*W`VJz<o5Qpfa_7~Ur+O=WB#3~9|AyN`oTglYV!Gc0+HU<cVn^N-q4%+IT@&%
zTFktv$Npw&dX2d84y>mJIi)c-J>It3FBRqYtYaHYceKp**43>2BLEP`_q^vlv(D{2
zgZnw%3)+7Q0vy1o#e}630MrVAKtQYnxOVN@tos{Xx^yXW9ExTdSOKnF`pI`roq9j%
zcBExF7G+{xkg}o%QV2Jbo%rTA^_O@_sykIeg_QcX0wc@KvH6$CiL-i4%&Z`;??*K)
zGGhYM4OW5fZ!}USYBJa*2CsYBXkbn>Co0pAflHYYFl7d(jgIuEl(MKBCd!@8e>v@}
z5~yr4Uf1DG-A?VLC4|~$?wcvBfkNi7JT{@*BXWXF7^6{nPY5DB8YjjthS78UKRqbv
zowb~fYUvwwL^zLh{7=%DhrIxtk(c@u(=ztV3d}uExa`Uin<s)Y@vFV(Og+m<g=_gb
z74gJAT^OmR`mLFQ<!b%s*p=0OvSEfT0xiw{<+8b5#sBiIwhlYy`d*7T|BHYI0Wy|>
z)oy2JlO~<UzqWR7!pl)G`In4-<Fx05&Pl&(p2xW|=lhm4F_wuT94v7zWSFlV(fOiZ
z{Y}a^i|MBg__X6*MpQBmvKec&9x=7py_qESoFXQ1eY4M$alCZt$A8nE&N|of?E3ZV
zVLN)noar6_;6YfDp7sHt)hd8P2E+k$E&xXIg}2^%D=>fLW4{0X`>_&0;GcmN04agL
z_w4`t?LYVr|J&sbZ-Qf*$udqmHB}<Dh*FTPq@$!PHL@dA<|!LNn~_gBleQ(Mh13Iw
zqlLg(;$z>T^qlfLaSApHj2BCR#?zoRkW52<8drbT-l%BN>0Hc2&5zmnZHb*)81RlE
zM#eDLM6;2WbT-`yS=MAWOXkZ_?yC|*7k}3MH_HNu_K&HkD>}Zj#{5qG6SLG3LQrQu
zos3VAQ+ddIuf$+NieCs$vzZ%@Xh~tp31M(G2^28d!cVK?6NR8z^*#HHiOjD;Tni%#
zPgQDawsZ(njg5t=v#Eg$*{&H^pV-?W*Kg_Z+LrATdk??|Z?z$+nX6@2mcjd@h{b<j
z7nye{<7j)j*ZEJbuXkepqs(k3oyBf5F#nB>GwFSGr^#`c&k$h8qv-E1%)cH_BB^lA
z(hEmZ$O{a8HYvplB7PKYdCSg;*~o^{A7`?{ad<h(bCvAq_wj?P?_()-EdbfK?+Zls
zUA>T(OLhI4O}EZ}@7#a-PPN}m=ni;K+6N$fq-9(I0AN}&&_KXH{KG#Sz5Mda(ELXo
z)1SQY#v7yi?z?a8^5x5uPFf2ofjcX}`oI16|LMO!efpvQj;ap~s3lELM#?#5=HwBn
zCM<fro=ighNk^u%ofcM_uC=7xd(%|S(~%Ywu7Itrt05Kudx85*SsbhZQM-){8-!+L
zGpaSfCdsfuLnM$XVTaMwXUZ&1%jnlMSp$%4C2mW~c92s24N2O4h$6sH76!yTJ2DlP
z>swow!`f+I)_*<9vXAY9DUpRFN|uyS5@yEHG#kva6W9O#_pY!dkEeZIaV>L$a9Og)
z#Jy*T$X%!EZGwxa18A2u2HTd2q-tTfKVFbB43t~*&-X~G{(1u4?-Nz?mWYfXS!wUI
zxA}kb=0(~#asM5i|5)C))tUSutWS~)0P`ngmJIlr68msuOmR%f&wnC=3%;+ry>saw
z*|!c5g!b;gwaF<W>I(r>;aC5g)$iZ$SF68V)mWGI-@kr#=pD=ReIe2J0kE}s`A?tx
z^8f#T>~MDrf1P59otJhVeDJ~T&dt2@>Z`AUfKJo?G_{bh6tIjgfN;c00GMN7>hJzS
zj?k14C@cx=K2TT|`0l%}eEan2U-<9R(~g|wEBIfA5iPrt({)8XORVZirq6nWv;@T>
zqQ{v&VJUqL_dKu2L@&~)aWH!$I~a%swC|wPN|}b^{9Jl4+%nR3#ensFbJ_ZRt#33j
z=26Aebd)A?x+88|nfZ)@KXXL={TO`xnXw1vJS$_731P$Rt=Ce&A$-J)W@Su0*^aIa
zXk$5MA_+EZT_)r;lPCnl{nbN6Hd_MJCVr08a2YrE-eieumP}H)>UytypAh=G?bDXr
zdZNyzot{u(QNoOkZ_4+#X5_Jdz89sxeFCm{#O_$$lU2*Mlg-t<+x+j3&|%I05SOxZ
zJqW~WYp2tWeUbS`oSG!7Q`GpwQH)7mTgmfZ+_($%P^yOyQeF4Ym2s;v#o}eHq%e6#
z&Q*Ep^@*sq43u#a^Nx0Iw~ltMH8Iw;)1$oAJpZR3{@MSE4S;Y58vx;mSTh8iBk&Ja
zXlw%<AC;xr13+OJg8;+#!{B#*moVa;Bo=%!oeN-g@#4kV>C>mNQD8_2^xXIUm+!p)
z{U7<x+S;j)4>4LjsCm~Ste-66J5eD&S9K~)sijow%l61gy*7J&C924^IKBYFF%4X)
z(?|uvz`7dc&9fDgR1;??m^o@@=vn(W_^mF7?8{`+HxsWwVy<OaC+Reb>2#B(+nqIF
zEvC@*W0lU+rYvmxWXCZxYa=M&KhVmsAxa5W)U0W`oHFNCP@<aNV>06EE4PT8d12tp
zXp{)ZxQq+c-)@NbQSmCUB~D_oCVvA<)>um#s#bVeoB5NgwU3|nTepa^u;f1Hz9^Hg
zxK4g^BP{fph-VRG?lUz;&Ap>xd}bb4ld`oy)b(q9@g3rudMm`?%zrTbolfh={M=Z-
zkFo?WaSm(0)k>x~>xb9Of3gCK@27<p!)(oM?YGC=ng($yQ;#cE<5XI<!cdLVW(t%!
zq2;-ZxsGkIF7@A~i8qZt){2CVC!+sfdMQT#;}3CVP&hl+UNHHC=REY#L-N>Tk5SV8
zX_qD!5|(lmK$_43(@SUm4-~Lt_S4QM12;fO4UC`-e6+6YZ1cv2uS_QY?Bn`;@#|`Z
zl!%9)sPVc~Q#{@fi!k<H4k9BrTctEo2q;;mPfgQzHfh@KX5J&cva0chz@jm7hfgw-
z)IM1O#1V}kIG8eBKSz|Jl&K~CQ_y(DnB%RKK4PNEZ5&%mWkz8+6Ji6dBTS(?KM_|7
z*bxOm9E~ZzK=RS&y3(e5!G$rK<zitB#)g4|C?J4}qJ@M?1XiefRK1rHUDP{E^(q4#
zryxgX5r_>Wfyi^ZdBljTyKMfY8fVxDI+i3G=1S~quL)xLI;92Mwh%chht-$DZx-7#
zR}a(hJG(wxhGPVZB#ZT|EoAy_`=I>`&y3;#lsdd{;@n_!=6|_Bhcf?zdv51iC^s=$
ze<(8(ujLv)zewrVy!mhU5%E?=+a+&V((x$sxX=sx9X-=e&k|J%0Gqds&t{A<-#Yh;
ziMYbb>aB>~tl<2PUic}(Q0YmT|MvZT?FpCI-@AJGXaD>9`Z^;0LqNH{O4<Q%28#eT
z00Ix9C5ENl3joLp$qQ21qmMosu60&`8JIsMi7g$cFaF^F{L}7<%ZB4+;Oasx&wBEs
zc)(8TjMa)qz(!kkVu@`}4ABxhz5J1@I;Cn#SG7f+#(DOs-!N<K;bGNu*u6!hE;`jl
zi2#m_;6S-9<t!?2kov}n67k}!Rq0=DKfMsvHLtIq34+A-)|C(l6@-^ic2qVP(m~QU
zOaX+^M_L$0_?=ff=zEwRb2-{KDFJ4wzPO&GxB+sXm~5gNF=HJC$Wf0GY9AbZ${)~R
zSQ17eQT9YGLz0RtL$p=t8^rfl0$ch}2y$XF=3MJ2@2A?8VcgmdTRbV1mip{hH+@dS
z$>w$0l<EzTHkpr7)ybJ@<(_I)_TAafA1f%5>8DiRL$t%`Zg2i)eA3Bgza_c4A?RW$
zDt!IxSl$;l?CH;c`+dnA*}jm0!TC>;$B659l=4PIrdqM}a(SOT|LypbPDQG%260aI
zyUWyI!XoB9s=3N`+{Hc3u1;}nWxCoZZwi?{nDg26{MVlRpZ;`fYb)>&RVK&d@f0~{
zu>21@(RSKDRsn=1=d`4-v;qKD0wD4dn=o{KnHfwnxPJfr_s>#l#t<6-AI@8EJ%9Py
z^*8<xeb!5ak~Qp1JgZp|wLMGjMCpw|v@!_>5sf`zH1s6jBc}cYGtZXGoa+$t7yC=O
zPf8YrJd#_DRvLs;$0i0M#|LLp{Mz}|W@U_sDk8(aO){@5g<gjCp#3AwDYWgIH{T5=
zQfNdoDSfvJS|%i7j^_QfGj3v2MN-ok^qx!iRO8MK5H-n3%~VbC@(sj1FOB<D#!>s^
zgwp{MBW5tPzyEK0?;fk^ao&eL^PO|J7w&~iNu{`=D4Js3)I$G|)uu%uJ91&5O%2y*
z0SQP^xPX<!Mw7oPHxAquvI{%#A1;gl4PxrjqE#JP6pBVl5F3dvv?R-R6j`!qin=Vx
zrMV(0k=pCo+d1d^Ci9zjo|)e~@4VkRXLq^Wb9Nr=+3$Vloy#+q=Q_8{3>}r>r4ry;
z2etHbh|4;NsE<GQIis`i!meaft&+bPlex{sj#;)EsRJA1ziu;M<zG$JFl~tGBP!R?
zI8njH{T%DcgNo_jd#cBX@*~XWIGF1~T<nK^{cBis>0H$&O#edlWGPn*v(mY3`uDep
z{!60Qlm5%N_vQHp=|5Mc;_Jers($eG@8;&^FX7r>>?DH>xq9^~6o7c;{A;hhw!rqk
z_q^vlxirWkx5xJXD>&Jspt}J=!_R^iwh{!r3tAcc$E!gBSi{Z$Tn`*N56f!T-uAY4
z-~WUE(a(J`XW+*DuP@e0ujj3_d=e0@9+1_0&XeZaMk8@~Sn)QDqcZN9ump2hS7S(Z
zjVI^lDFgHCs!>tT5po&L26T=3N~0XYi5HKdY%JE>+`1awqXYi4(x#?OsK{nr7_Vzr
z&&k0<C)({M1_)2vWnL!wyD=t2>T$4);#%d1Umlr-*HnK@jm@pJ5OdfXIsPtIU`&)_
z-KV4W-|o3F{>E9@X$g_ql`ZXlm+7!pb771e3S&f?Wm8c*pnojpiwF7yrS_hCj>PpL
za+~PCInHi?{^KryI9+jQx{O;)|6-zfLu1?~RHST2|D*A3(<gZ5nNR)~r@sD~7Y-gg
zh}Hj_!Q$Tt*8gVkE^h_*|5kAS&tY_iDFD8IW>3PN^#EYdBria`9S~s;J@gP314dU3
zT@So~Cim*APhZ@)e(tkFVyQJo8mFqYX3H-=`y5|2exdj=3r`Hfn-LKQt1m7|5-eKA
zh?sn-r>8tUePkw`$F~zI#E<eDaIKMKdZelLRGzQ#HSrw3=a-Z&7bq3Ot?C`is`v8a
zcC!Qb9FGATVOjuZq4T0TXWGj0l={Vw!YhI78^Vk5&4OKO22{%QIgd(6R+gcuJ0sz#
zJ)}Hm-sxoQx~6BZiCnvXc>D_GtvHG=<C6Q{alR}t>{xJ8TXWKYW+V9O)p7lX&q75M
z8YDs(c{Hj#{0NPzz}J!fVYxzq#n_GR{@~df&WpI6^xw{Jz?CwT?dV@_JN;+$e2x6=
zp#Pu%el`eyUHpe*L$mb{+49m$FU7^>hzr>>;NKW)dj$5Z0-z9tzr$P%G%~nm%<X`2
zF<{!tu{ibIZ~bFVo9EnIa`CAfi<qt**gUc5D-O>ZudgMAF9Vnt-}uqUP34QL+;EDM
z7$rjmkds0pX+%oJr92T$O6Y7oNRWBUkWzl$m$*KK=T*F!nK&8wBZ;YSy>mNrJ%}L=
zr=cB*3_{ljlvbiAbsy&pn4!D!PIalcr}d#yyJVb&n4zadoUsyY9cq7R3|}#sT&y#7
z5TD(ju`QfogoKApnY+r@dC?#-T$k$Us*v0Po+N<QJClMht5aC|GybKHRP$;~lxYQ?
zV)I0UHDgWfLU`vqwf()>HIU;l9tB<x`iJLjW8*@y`j18LN<~VyjsELwZ=C*1;O(aW
z%u}U|I}v$Z>HoRsf9oG(@gEkIL+Hc(A7sf~SO06GM<0E30R<r8SM}mvg1xK&;Npb^
z7GYo=09Ir|0r<{$zT<II6Lte!xpF0Ha5xiSV`F3f;`6_K=E{}t{)SpTtvS)c+$0Zh
zB{Ur*$_LEI7NG$s)->{E>r0^kDTCcBl@2p<N-VBMP}rK&1!@)3tr)HpCZ8#*)T`j9
zT#nP0I_1SUjpuA0I!(5aT6+*;2zCBmcy6^Y2u_0?0QcN;Z&U>0fhmExsv=M|O{3pf
z=vA!4xtj4f=J1~IbvZsW!qN}gj*f(1DaVb>S<?>0&~jT&2$vcUUo*r?FqHPx`nZvK
zEfv$N*1ZUEW#_9TRb6};H_n+c&GAOb2_MGWQ7?J2vZ-Cmb~qi|I&Nv#KqY)gL32cp
zuLJ#KOY8NGi}A!uxcjlMKc{=c=-=NW`j<D1{<S<-468AI3+aF3+WF5u`;E_@f#(>1
z5dKn*m-lOHYw<Z4o%v|Xgx|zqzw=>FdjMSOIb6jEfNKWR0Pq4pV>^2Es7?dSF%1AE
z;Hj_vpTP^TxMa5l4Nw@uz+Yx&@F}yAY`x@g8<;6usxeKREfd9vdFR1f2)@Vyml;J~
z=^uZU(sQi}O_MNlV~$>U0OL^MT{EC8{4%a?#C*o((0kU(om4`0gqq930DZA`&*8Xr
zD&BU8g@U*vorSF>Lt>VN>AKXOb)DL{7Uo?AJ4gK>6%Kjpm9Lo__d$7=8VANVl|wNy
zLmXl18g+0*y42P!wRdWdvS5qHpR3x=<vLYJ8%w>$KMOn(7I}+b!a)Uhp&ggga4xxx
zIG6-_UGEa7&=0jg(R9@&H$eYTN;fyxgHpN?Ps_vV{y5ivN6^2!Mf6WWZxH=!7CzOX
zschdQ{h#{QKa19X@EAwKk^jz~J)1B8!QTJ9zWQHb&)x<I;0{O^GPp+`c_gmX3|;_-
z(}0o}U>3XppvuMh^IyAgdHp-Taqp1_e}?iYnUZyqb;`s`i-zhds_=z6%^)0&$f}IO
zR89F6qrjl`h8TTaE-{A+6(&kYX&j-JB345=e<voE3NK}NsjL?v98|MUc{5(Q?F<vP
z0;<M6J^JC?fmyJyz;NMi#rgmoE4-2}8LGKRqf!>vTB6nEWkj4*%ASaftWv3QuJhJD
zY&LR(i8_M|<w=-oyK0-uD0e4zK;&3Fc4B5V^R8?r#};-9b<T7>H=03&$}FYunD*lw
zY*wy}8_lcWV8%9;SHqZgx%^y?lWpmLG2e`14=@~Sy?O`IKj(X+^v~$O@$?@@PF7w^
zx{B6HrsGOgO<i!fy#Dg9Kl|*zI0M&zkQH$MLvGArM8fii0-&q^;c+f#`?gZGM_{k7
z0f;TH+X2I4JOvQ<G=~nufj|o^229sD#y{-seCp5s?=TIpxKz|$+kuvussPZv<`dX>
zFH4bS=4A=L%7Yn&7pq;HOetw%iWR|89c&iPwQ*L$rm9v<mT@q2rxuH&yz4X-C(CKA
zt1sutIJ%Mr?l`~+YGA%w^J{Yb`UUa`41dmA(EK_Q{#2-8Xi1cooCu%FGHVEbSM}ip
zbINrbx6!y<6_#bby;d|xL2>OmjRE+Xv{jSaex!~N(>K;)_w_cz9L3l1S-Gk5$zwo_
zmFH?2avCO+kozK&{qE`C`?+jvUW%*ZG4^2bN8vwISeC)<rT<19ZixPAOvs%>|57wE
z(|x<A|Inu|z3}|+d?q|b>mOVHbOAXQltY%lvkV0QTg+komu&W~`yb$BA3#t5W>^gP
z>Z`B1OP4OW?|tukD`Ea-B?$Y~t*x!q5VnRbfuY0o@1FnUy}$62$B%vA&&CWYhBN`r
zSW_|<jyD7&X6VP|>3dPRg>R&5!C;-3ZGc>?GusmLuNx6(1QK*?z13QNgE@^gk<1__
z+ikLHrhOt_!~1Z_&_+CIEqeq^KG&|FlY<9NEGcB7;3+}sC~PNVXa_}Uo@yQL8m5?f
zBOF2u!8n6qd%Py{UDjrPStb3_;H<<ZE3X<alekbmBBXTrEX<m1O6B4*po0fbRG*Os
z_g8FfUW@L1EKZHr^3wMkhS!Pyw=H&u^bh44u$somol5`B@@zg6ZGHB8-}vyC{^TG0
z|KV{n$c&93EAquZ_<P`i2ev}k{7r9q(>!hc^Ypi;VbAXZ0DSnvAC9*%scYu+>C--F
zZaB7a5rltq<%AB6^EzpMXa2e0`}zNOYxC;A8FEl7M00d?0d`=K2EQ%ww>$<(yH>iR
zjGNCcF+mj!ZWMP^sK_?DU2)Nw;xwGm@FpD>!&?5xkZKeED4&TqRak|j%YC5VAn@;5
z8x~(-UiSL+i}7mJawV_Lw}Bg53C4IB{ye30RoISLfwpV2j>^y^kF6RvdglJI<ExcU
z`^k;Rgr`C17{5`}C4$_vS+UfYq-jCMg$^D{2-fc6cJfN_dlT6&6Vz?`M|>zr8=Duy
zl+$ow4;J{v3!m<I`d<z^r+>dE=)YN>&8I>Cu>Sw`AOA9}|9O!8TX6p)7@lKEUSljQ
zhfIO>k4xwV^kd&|-_UnB><a}TL3GWalfpa=_G>PJ=C(L-;snCtLO{&x=#Bs^_IUAy
zCx5jb#3?6dv`q1BnsWKSC0Kc*g}Th3wlidM^vn$<F&z_O#MpfiXxrgdr)q>Z>SvgS
zW8t;5yovCU`?rzKy7L;309a;gYlkDFhBp9H1Mx7HYIX89W$~{YbMIvgn7ztwN;tQY
z5;OBtIN5yCvuo6=lD{fx{1U+{#FS~gaU9oB3P$B{J_@c;ShFjL-ju;}njFPXCfhYM
z=zrli!<e}mUHuq0QScu)Qe6F<_Kv0hos9MUNB_%VT$iss^@+coR{iUqf2{t8tbpvm
zbaGx0_~esM>f*oX`rqsBf54vCU;v5XU{gRm9yoN=?97=nD_5>u!88E=))tG!O88p~
zVQU8t99RpU0Nlc{_M`v#Km55vhyI=ac9iK7L*<s>EXJ^NwR~_kaGUfNc!AQ6A=SCc
zo7-W62Di&^x8*PWt`9r3AaIdUT(C92j{Cjk@Zq=Qn+JD;Q?n~tZphujgvFB$F;+xc
z*8L8PZjG0G5+F`_WA}O<1cb;A>$r4W-s<tN(>OZ>c#bY#ekCf8D`DzvWp+SbuLWMO
zH~rrbH0VE00eBgv689ke=l4e5>z7`7{P%zNLs<F08TxrM$b^l<hYxQC8Ij%pVeTJZ
zfkpV8!@~?uGa!wN*HJ>;9@rNi0NoA<;UE6u^Qlv(;<k-26%ZG4;9wwVb(qWHZ5~hm
z`Ntj)g#QxKRK3{}deQ$jVQuY_8f`W$-p+vy8L=dDDx}50*u%$9WSn`9&>(!T|2OEs
zH{+J)Nvh#C9~s`oa*FQPR{92!<8mGdd@LBmsccv)P<sWI;6wR&7Q`7}zhl8zw|Q;B
zjB>ETze0el`-!4`qfFPth5D~rpk~XQ7D78KXPnJs|7~OFwc0ADoyv0wz<9u^eb=v_
zAC8~KSUGUtaDb2X;Osg2ms>#peoxc?)WE@oKVSIV-+w#^f4Kf(@yB_7>>5*#F+9k@
zkc2FWsgv+uxP0@|-iJNC4UoV)T814oHT<~HdC=S>%;Dsl0mC1*1mcnhOashbe)*~M
z-#+t4zwq{l-}{ka%q617;V!ramo91__Ah&>=FajcFY$^dUAt$~*Yq)kJ4yL2AyiL?
zh)^z70|@UYPNhU<zL1d_PZPh+V8%I?iclK>#6Ua0pd4?nJ({SCTF;c5o5H&#fKnzy
znN)s!O3#k!<4s+1`wiu+tlkq>ZDS>OoZH>NMvz%tKLEz>sm{*ir{(5tKip)V+NvqR
z95KTZm-(H2TE<?SoBHl^F0qu>y|kIpr<)9Odz%AXs`=rOHj>II<Do7m)zVkk2-jX@
z{Ef$EMJre#?)6vN_0}<;X|5g<Tnhe!n|~h9xnIO<sn^on{<X#CKB0e16(HA}rGH8B
z_dfme+LSYAzW56e`ZAmm6Q_sMI<a|LHx{@5J@(jRkRfpY=LXa%0NDqyuhsy-`iB?b
zi6@>|;CNt!Ve1Aox%=<GA6|fYQUY|Z=N2@;r=R-hzrK3q)qhe3ybQ!|1~}iq#Za;H
zK}v{J#INt3)qBzXvX0_VPMH{dF4B%UU*V9ST#a=L)3VRI5SHRp=dbxW!j<s#gtzBB
z^_vp8ukh7!3Xk(u&I3arVeIs|#BwRJXMDJB$wJR8L2<>QLTiVErFT3E|G_J8PPW3l
zuMYxXLWD~m0g0guRrFN8t5W3}ImX##@#e&`{Z{+)mojGR9rboxxXd7h;u&O2*~+F<
zs;Vz};j_)7RZbDXRb5=Ev0X_q@MSNh&n1t$F@qN=T9B`Bulug154iZz#~T~x<7txd
z2Fl<W!dRHORmmc%(|?_RqQ(6}{}(Tv`AFdTUy?NEuk-)dLN4X-pC~(E{loR2j{Ws%
zyQ1xgJq!EA0}uhb0rVLf8xC#2p3beUEf>51?!No(Tg(H5&p0Fqn%oTTpYT3=`l*jS
z{_gkukN@Gy+QIK1=#;F!QAs*@b&wVo#BMuRF-%WMX<%0jtQH_rg<7$sY{OT03@Fq@
z%&}{1m_X|<e80C^av9(F<D{~-bUTnV7{6hJ#7e6ed?1W6d&SWCA~tJ$Z4bEI?#{G<
z<eHT?C;+&82%eI7svFi$M5}N$dIjo*Us_M0+)5pU`l1>;-r^%wsuI7(O)+P&nOiGo
z?ufaLOQ^(?_v8`wO+4yxDYb=_F|s{q$}x<-6lv3zvc_Xv@)#J`i-pU$|9KqX)JR;%
zyI5>QZ%@3O=brmS9fox-o`mP#rhhy3Z=C)mS>Lkf>3>iH=P&)i@BY1C50CHwV>&o`
z0dT`#m@=M2MnLf6I^ff%Ps5O0RJHMY``{l7`^f{4H8b227=Ca+0PX=$F90qHQjJgV
zc!uzY7hnrh0bl>xCqLvDTbC?>>;KL?1B&%CXt`4sP7OZ)`bKye&|?d+5&%*U^4tz4
zq|T+zk;J-;Ey-G-<=T=3>M5*dRLuU^vZ&t*evG9P&wZeRAPa<XJs9U&7}?zY=>H+w
zFcp9)f;deONCWExbiDwS17+~a_zGL05n<4BV%ay_?lSS@a~ru)XnM~p*IVMrkDeWk
z8Q+ZE8fl*LPrt5=7;7z-)~Nr+ea+?CQioWc%46%ajjI=?srdq{_^-si+X#IJhK;)<
z*5biY2a9njV#c?&=YF7nLqq0kdE1Wu8PmN@|6#i4(lgI|@~6|8{@D|(bN`SFI6@vL
z$zqonpZcf9<$kjM0sG1W07wT+#8L3{(@)ED&poG?1T8{my5J3%r~R8Qtp|W7ARZI^
z+;ji(%tH_VwT~RT@B2PvW+6qbry;ahGLD&*FxeJ{=*jZ(#3YI)%Y&L3u4SK6O7|rN
z3y2n3Fe&(osF_TOX?Z;kc8JB~=_=gDB?;BPRbDOZyuqK@N5Rh}$*K!)bAAnNylOQx
zldCOSk+B6~RV5#%2(WwNP+Tv7T?E(`hIqJ~2y?~`2f;WeBc^FwnN|q8QQf5)X{GI?
z!k4hC@pham*TRk_KkAnOT^3|wM1>yfD!HzDDlLnRdX(DH<twcHC??Yh%1Ljj?{XC_
zBx2MP5f@n35iR}e(Vg$&cF=oA9-LhI$ek@_75-kQ|C-K3ZjAmF-~B-U7cTzgM_)Ys
zug+|4Zerel3sb=O!`wd>{-sm@=8rt`$Q;%`_WnQm=%aD#-+r?C8TOS2AVJpH&~do7
z5myC=PK~!YVOM}I@(3Njg~fo9wtT=P^tmVh=YMkb@+-eNWTXtvkw64EsdY?A%*dJf
z*)$;C*hx#aTt(vgK1^RF67Z54u}ICRIjyt#8JDZC2+cLwOU%cKu`KJH2mCx^E44*X
za8<_6IhFRZwJEJ&S&XmBP&|t@q5Gq&TDc;0>K+;ASxT)j!oo0Y2|IA$ShONt+)02f
zh392+V?8Jb*W-Bu&eU((PA>KLp4h$D)D}}auTA~vi-2kMgNQQy5^tQRuW&AXE5)jF
zA}plx*!Lzh^||#LFjKCrXU<XT=?SObcsxt48*bghR#z<K#4UWATh~Hg&4xLB+{U+h
zC@KMUyG@O>RHT{S+57Zg+T518JNmCN-HY^p`SN#v^N+vq;opFj507s;@;6%jM~)nc
z3;!S+aEv?zKdgTwg$%*`KQ+R>g8irfWH*g=91aJ9MhCY}`nEWB>{wj!3HOg){2;@;
z(AJ-Q<s*;Jx2}KFCE}}?nm2@>2YVt4M+4W_T6Y<mp2uh=5}pGa7MtWua%AEG6=d@C
zl)WpAiiycGFHd+XLFFA#zL2A@+gO;{Sl(SM9?9#qvVftwR6^1h=2xtdI$urbtI{Pm
zq&IAu8A&B@Wt23>&v^5Dv^Hk48m9@41cl&4R1D^GTv5C(*MhrwE4ZDv<{QBpT}WYV
zuEM8cx-?dt!JVzq(l$nLyVAb35b-p`$O3nDAH;atkHoOE@HJKsj~nW|QqBZEj-qQ@
zij=gTded`U>I}3g6tAu5+CQJRar(HB4_iDBggzPE`?I1H5vrwaq>RN(w?*ic9TraZ
zKK;urp#S3U*sJs(rte<<{FDFFufp;VuhHsHC;s8GxGl{6W05!%030BPj3F~f@b6o2
zvQI&eX-wM#UC`)e7cN|w1+6b05+uX)$I89;-it$mR_UIA`1jB|e)R4C{tx_($Ab&;
zLCv#v?N#JF;&_h3jWuK4h9fF6?@FZ0zH?Fdou@-yAf#G2NR4g@^L1dg?cEkaajImD
zD}xO;c{wV$FlEUuKrhquXq%>m-?gjf<lvzbReiZHxX<iPFFm`QcAHjpUZh2YJ}x@j
zih*&uz^$cegEc8*M`SzBlG{;Vrtqu!wy|)H;@WcKVNrohR@V+vAB<hve$>ZxPJW`C
zx}8m(wM`k{6o!XI@Tx5QR`jmS)6NXEp$l!ff?qeQ%Wt%wH1KE*evYem0k}r=y+r?2
z)CLYxqdQ6ebG-{7@R7Yp{}BGqeC?AzeddMVc`+yeo5}UR0VN;^{!RSB^&h5*u^wP6
z$OgU&SajvTY)`U#687C100mh3=sRo+4B(%B`su}|KmBQW?6JoLX9EO9z`gd`Ylt^X
zN&=JvhqD2~IB{XXoH=yp{-1sH(ZBxdf$JQNnebem(e_7Hm=11d!X&W}Gh%hHS82So
zttqDrNW{9u%5TbSf-4l_a=Bg=(ShKcuLxVqR(yruZJjA$W_YeukCI1rak{#~L%zB3
zMohyCF$%4N<#ZL3p?p}75&qtkQ`*Q^N|Ant``KY_K=2T(ttEvZC<4PO^y_hs8uQU{
zemeZkW-iZ9k2ni6?1sW2tuY~HEO-;wr))W`ciGa*eIcX_HQIyYh){bjDaUq28Ja_A
zP{5<Gj|x<1C-HG(PHd-@mDOa~uLvs(l~U3g6KQNaBWT$&Wpt{#h%~wV-lBhcpth;<
z-39$~*^005yWENNKfm<MQ=j;mGiRPW1A!0WFM|gFd;YiJ`iJm8fByU&NB#!kA6@^!
z`o}IZ<gnP!!XL0NrvMb71nBC`bcLf2gCOn-z)H{H_F1g2uWw-mC<X-FLbwnDYXLA|
zzV-Ddp84L{Z~WSbNaM@!V+n3FTzm=7g2UM`io_YZ`njg#f#s)vV;B+B6r=D}y>hrO
z`)sugW-4AxzM9^bdR5Glkj%`&i;R{UFcBLGlH051=_%Z02ILu8H-Wcl>#A!EGjSrd
zQjqa4ZRaTto@Z%S!Ek{LT+}#S4!6k7LY$4bs1VlX_3M}7T7sw~%r}F@yA=yAb*XkC
znT{}Rz;%$){>)Fce<U$gojYx{g~&MlC_ip9Dz}v5Db8ZJwK3FnSfzh4y7O`7J`^Q*
zN^o@b+C9gIH5d0Bj$1LU@NdyZMvVL2lcSi%r)j=XfUVEorGL2v^zU;Y?lt=V-fLg~
zH5LAnrg*XK5AP8EI2Q)1{&7wWriX)!Sitp<E#x@y59iAG);61c1p8V60I(A9{`bEh
zo&b*nA8@+U*|TTkg%9D=JcP~Dg21Q*q~jsy|K^W><ujKrfBU048Xb>bDPxvtHFE(P
z<%7JaN4?It<-sd5=HltOCjPM;$T&2Jt)dLiL`1|}2@78Yrz_=a<kANCErjQs6vmqc
zi_hVOnT!xLxH3a5&z?C@POf!WWV~8Rds~qwHs-x~AHHi{crJhB1xOrmP!vjkOJc70
z8!Y~nc;>)rysYSG+>fqT%P!{Wbi&Q`C=?+ap%`q&bp_X=8{EgOh{O7V+}|!2&xqIV
z_{^1MWKnV*yx8x)ZX4=59&aihV9Cc*7O@UuaXr-K%CMF4+6B1=LBA0MeOx0k9~SHd
z<>){V_6H9fkBZW2I(K14W;*Jd4|K_6mKK-MxT^Y+C6bh}S>lUWAz8}d@ihpSPXAi)
z-k|?WmtOqnAN}Dke<pZy!OL~Ue_r#2KkWI(ygwF)W8ojRjHhjX9$U$=24KIg{#V$y
zQveFX6iyjsI3e)WS6_AT0tB}Y7JVSt1x?T$K74qF_`w?x!}RY9fAI5<|EE9nOLFw+
zLx0O;u9Br<8iiVja;ze$OxZKjD1^@W7bfn+7p_+^3GW>VC7*w)71@cAaTX$F%g?})
z95E!WlS-7DAk-~hu;-5yM)fTm%~xD43l~RQogA}1!Y?OtrfpO}yTVI|sphuiv@G0n
zJBif2;iGV`7BWfc7w#h!P>d5*GS_cd=(Q38;Oh3dL5DQ+An<)!SKwvtuca}R1fr}&
zr9xtR%!KE}k!P7-pA9%IeDXec5`<DK1s=IC@+VKi5W;=v>%}0p7W1L~WDxo+MP|Wm
z55X{7S&4XJje(OD!ml1*x!-ahOBp+5*rxVVdJ_@HlE)hwb17XK^X!($g2GjO#@(cN
zxKzh7RtWY2{R@TN2>tttK=%awuU~xeqhI{||MIvDR)4hqp#TH}Z!;KrTbD0ij)vZw
z-~8rzkOlF$-|!x-{{ht@qcnZIuffUw2d4uAT3zq}I4A&@E?sh8{_>Z>m1bcetOQMQ
z^}>Y<t3e4^#f^bM8;lDBgUe_&Xoy(BwGxzop9xC9-*iQ#EsQAhZ&#SC6(UUCi5N5S
zRYc^gI5k(GIGHmt_ZZ@DZC;Cc>&<r~qRMJ}DSN~aYkn%a&1qam5nd$cZCb9&xO-$A
zx#TJOm2ezh{q|YK_au-*x~o^u#syxJ@h!foT<6BjpP7&!#i4d}%mB5|ld!YxUghnJ
zvf%6S;FBUSWIK2S;?9F?dCf(~1fKJXZ3t}|Q#cM6z+<EcRIU3=RG8v9?+Opf#DHH?
zK0<_9{4P~L*L|q})#FaMGn^}L-qKCoVUzvR7#q$A&sMtv;Hfxx;6CaDN6q2ucDjS<
zU-4#~Z-oAb8zX)6RP6=&U%z<fV}BTgKP-RT@)xZCO)TyU={9xVKPbXm7cXAKjepql
zACCYImOuVHj*(C0r1gLM>);;?PVN#=PXOk1pagvS)1OA&XX$U{{`>D=+1lD#4Fe-8
z0U`b>t`831R&jN3QUX?k67aX71R&2b@e+{0tOAz|_EB8dW<X71GD3T}4%iLXcs2_Z
z5-?I(fg>C%MpIi(V*zODNO4NJwN}sdami{}I`!6IJZ%Q!D&D9Hz#zzci9gAlL}~#p
z#K2Aea=fYZ2KU_px3TtDZrdy@ChB6s@j4uC<c8~jD;!-ze_=~eM)6w$Wug+J@6*lS
z5co+2@hySbd>_KHFq~+A%{%TCNlK!dvhI284G|g{zOnSbE$oo~qXH1V;daD3o&GOf
zeDPy{@OwY^cwp7A_~RA7r~+2{r&a%3So0Nl{TAZD^?&^M@wp0rtP{g@@qQQj4Euf!
z0D}%HT@Z-B;QqnyLaz-DZlI_kN^%=XIvYR-69(A&`k9Ya8N0C_HD*#GmYMN4YXKHw
zruno|krIzAro5cCDzgDZKm+lpRq4eNua=khB{QS)JYPz`+#V@Twwjp1xRMV=uTjm`
z)`0l1vb*G;=M$SGo9dJCR6($)3u;)ABHXT~j4T)^jRv?@zxYwURG5rF)0*xq<)!*?
z`)e8Wf0c<DU-fvBkpS26P{xJiG9`z(l3pKtYm5^vIu=}OtU8mnZnwO$f|$PwjJh(m
zd{0V}jLH(%5I89;r_1f=+VwK<#?wES+X&tj{mUIs|3UbFH1Ot+B;g-d`{S^mH1{9p
z{~`QOoH!Bp`{QI-+!jmo{&yufxr>0k0C<Nd;JN3Xn}w->><I`1WaZqsbF0BEw3^Nb
zSPgSWu>RMABCr}<N?13s`oll+D?feg*xNtI`4>0Xcp$gH#;dIWqS8?!$;88qhTU&l
zTV?<5u7QhomT1d6TSzUpV6&1k{^r~gjlW^J4zFwmP<%g~UiWz0e4O(;>Mx;UTh<?=
zy0lIr4bMfRzbipd%8b42C0kb_G_NZu8!Px5{gB<etB0DnwJbeW5^{U6b}}(+cLv(H
zJg(K-k;=|Z<)_n&$k^q-Gn&lj0PHY+Rr(!7|2G9xG!~%ZIrEV_kp9;%MYsP)fNpaA
zZzb116o3sFbohgT7Zijo%=-r;a0?5<LmI61&xih`y4-9F?iskNJOJ9UaP#m{!67Iv
z1i_*}XoulF-VBAmApEgC5PL|Y2LOW$zkl?F|MhVUJgJmjvQTOTzX^8C`;l3e@y^61
zNn<xrN}84oAzYlBLdaFw)JQUOFYnPZq34{|+4QaA3SqN*(A9>S;gH<66z<u&;PjP5
zuD;dQwCo(R(c&wiHx%Wh7{z{@zPHv--QW2eDPqOy*J2yiAtSoP%TaK1lekS=dSu0W
zsR#TGvQEc+>ZK}A-Try(O?H?c)uD`489N<#yt$F#S`Z_;8F_K`J^h;~cNqO&BH<4!
zKY44n(!Bo$&iMx)UtL{=+aG^87Cz4Nrz_)2UG6gB54fuofCN<s<8UAh1l$*Zy`VTB
zAOy#=p)X&)j8&uTM%qf}1jMNU47`gMUii4UhFj3LN;BY=a0kwc8L-ozqJ5WmRjLw3
zxMU_ZE`{)t2}I~S;qEGC=XNFpT8v?t%u<ElmAaR*8+kdyP`w{Fw=0>rR()h5{v_VM
z?ssm|opp6WweIT_*oZgAWBGWa-yH}k=j?+rWm13~7rvytZD3Rv7F^y6$AZh!c6Ce2
zFuH`5HB}-nM$q{>pF5KNDeR`{-zXe+2>rtV`{E${=ZQDRMSqSi`3c{)0x#bRUfsCe
z4?JEM{b9d9EdR7k{H~h&SGXG$fDGt(3;+y-FbxpT2MEFOR4DvHDS+z;t4GtIibqL8
z3Ba+EA<ow4e*fnmf9<<Z{nEnEFKx@{oPBMNw<cd8W;n$VWjlu0$gY`yg+mK`{;wul
z4R^95rcb#Tq?SbVwSdf4_sE=TWZ|ds^xHDhsXTLCx2dzg5!S*CcLH3pP${5J+sbe1
zcPa~Rn;fZ~R`@&T->3uQo&~5G=gYwRZQ6AwZBB-@4YPTT+qxj)D`jYTS><&v(0^M{
zzmfD0Ui;dsfALFX`9tvQNq^C63%-5y=+P}K@{`2VLzuV1UtRxhHyZjrg1fWU0}Qs~
z;P#ol|NZZmv>pKAD-S&I!0bEU`OZohKr4&IVkKyiYljXUS_#U)S{P{984%Y31g$dO
zDRA=PAA0)_yz3|b(TZDpur9)HD^G8_77ZF5&F@acyYX_9KQ8?yL+I9)5z9%xH;^xR
z86)r-fXT8_l@`p;sb0;#<a*mSm}-;l+G;0l$!$aN%baR<o;UN1J4Hg=VTE^{)#!~Y
z-KcAh`Ff0jWneqG5wsin8;y}h{yRWHzsMU!|01T|H`p(eYl+?<`kya0Uq1c(Z~xq>
zZ~T8>$9%t}n_TrT@Msupo9UR}tqT_}AUq!Y7N!Ljr%s)U`(}gSk8}TJf#Pi)wRa5M
z%^m<UaE9|%u^u2y2gLONa2Ew7Af6SCIixV4^dzZG^#r6NgQF+lrN8|Ai_iS|C;kTv
z&|<y}MYwU1Qh#6-+E*z0k!#M^@qHs-n~%C}Io#R!WAGJTDVgGs_MhuMbeD0FiF!3~
zH+eIZIc8s~sn*2=wl**C{X{wAxFw1&e2uM~l*Uzf_?qcuPXy&(S<k*=Xl<jzS3h-I
zdKv>7<V~{iNp|JgnxIy4rGCr_9e<K#UY24EH`<kP9@W{m$IDJ6rna|<%j-J^m$4lO
zQ-3X(sE^jkxq8mOk>?vl|5o_l82#5cy^-`kpI`gd*S_}u{GDh1;y0fWT91YGS(x(&
zZ&u+Co*jgLyx2#DKRmj@(92hK`f8xuop4uA0Z8h23^ptXOxptm0|f&HTSd;EJ)4yP
zT}=uXlkPLsBZEV-t#5z(b7y|%xBu3Eefjb$pK7bz8kYVl;`NO<^_G{CU4xO2f?Wwn
z<v7mYG7c7aBgRE2`0~Z4wi^|07^2Vr6Hd9FYIbYF6gzI!TTrWbrCKV)SGiNHoVOn%
zBsHRz>85bcwG?_{fy*bTc~ME=NA;t&^<2j~=P{#XrkdK$+c@=@a%TKgwMwV>l{(V%
zNZ_}awV0+F<LxZ|IWLZCzw>7smg;2%7Asq$cGNh$6QJ}*?w^qcX42<2GHp>+Z6Oss
zX+PZ&^v~&Tl>Y4)Dq**i{;ynm`H4UI;;;UlU}&AeRzJ4>!GG25AKs(ok41kl;2`|5
z-48rHcz3b-pXdGWx<!8$oOBRhjtd63pe5p<pjq$)#Os5@V8h=^PzF|lc8RTlE0-@{
zUJXhDt`1%afAOS1-6DwR_x|uN{*B`&zVCxhW=HDMia>Bo1d+EZ(K7|vXDOPo6vB_L
zEXuze1ayGPweCohZ!=B_mr**#FEKwaRh<f+zL8fcUu$j!t~2qy)b*^L+@9jNDg<!t
z+WFw#y-&E^Xt{TzF3alNs7pqJ`>DjQYrRbPNLBvwmN$5h`mojZLmS~z9>6dKP|9+4
ze2w~3M)}oPmG64wG1q0g@h79uiSbtDA&vSMh0vB{Q|cS001kvP;;R1CAHSqcEi4Ov
z{pr8mPP?c7_7uSFqyJ#Qub=zgb07cGpZxMClhDUXe;8bGu^%q?0h&1C7m7e~`_Cc#
zg8>&k0O^LmxZgkUb-4X~nkL)#XZ>#ncX#axydRFI`o&-T#aTKhK>ubzi;POZd_JEA
zA-{$-0TBE+Cm^_-ROqjUfr;={94j2&SKjf?|Lkq=`hlPPN3+>#To<s6bu9;1Kl)}3
zs<?v!&^ia8O^k&AZJxxi41pKsYLRL&n-<veOE{kkp63`FOmVCLh_r>bCk0?!{`h??
zJnA?tky6L2nLL7K(W+6)bXnW5Aok6Y+k(Hu9CGn8##rOD#z-rF6?~(-8iL&u@2|10
z%O3apgkTikz>VLh#*j>mzcD_HgDL?0A&>h?pouK2SF2xcC;b;?MRtJWH2n_>fRmNg
z1Gk<2L)oXk{`F7&y_f#-$!`aq8ojI#{%OJArds|i{L^0lc*<Xz^G7<5HC_<@gs7Je
z`~Nbaor1c8e3}?~`st_R7Qp}*NIBlcix=a4t<Wfg7eK<ili>0oTr-UDFgOLL28RLb
zUwHQS&V;}J=7)deSN`VFV-J1MO;hifab{u@p@k|^+{}70jb+Yl!mqz;ih_&Fu{fj_
zg0;}{6M~3vNWsILx}&-<R<=c4m3Kbmq^d=E&XyWk@-?1?8QC1heZkCVnWw>xtQ!_a
zb$SwDev#Ux?lZ29zLkCNTvV}XHEd|h2}X+ez8(`+Xx6~_`*E9C#l_30Ox{7j<CDu6
zk2{fC(EHI?<`L$qw4QL+)V915&h5vkOIy-6XFg_S%#Uj0%m}r0Sl;fpp8nh8Yxnf8
zsc$#^Uta&tr~dq@kA3{?cb~bC_V353R&4VNyf$9zi<|vI<V{S^V%zrp_un6dKc;SF
zu>3LSAFci*{Of`{-1Tb!nt(YXJuVoFMlgRA=8@E;6#p>bB%L#j#V27v>f%7`5ZuH?
z6gqG}_xpeUqks9r@BQ82a(xB)yH=r28Kx@P!@Tlfc`N&(|Fb4O%BE35VVQ25rMf!D
zx6E}~vu$7fG7h78Tl~2O`}bUTW(uCzP|e88>N`KQl`rMw8agMwQCDVtO%=e4sh_V(
z*M)YesdC>*ZZkK|(}JsGDTA%svy6x8Xk4vlmI=PZ;}X3W%hXv#kx||2KIfKK{A>y?
zv3zzT;aBqWRqC-Aw*tOB%G27gYP@>(mbik0H=}^XU!G-mrKCL1?X@PXh;r*VsoT_3
ztJLCg$k(=Sjnu8A|GI~7oc?ooqZE-_Pyh49#w%xDc=GQ)`MV$bwIKY@OWNj_)>~o0
zHF)jD;lqa^_&3wh?U5H_y%sEgz1<Isy3?G0s+*VQBGtPXoOGyw*8}j8!MH*2$tRy&
z;pu?aUVCjd3_!U3XL@XK@CM+9!8|RHwh6{#hOv&|hkxWBeDK(@hpQumv;ORb75~Z3
zVSU?;(hsTBy}J_TcR5eX`^ROpUEgdWm26yPov?LL<2GjeORXVkFn<%smZng<ajy1R
zrvO^*%Q$g+Y9@Njs-yN$aAqrjjmO`cWv6nSeU^G$P>C`YHf@IOS~#WOd79r{Sv@#W
z4wp48j!RjtvhZwwh}1%<@-9iMKgMyjoT;%h1tq^mKT>wruh(NFrbvS;AM-2Yz7iTc
zra*Bc{M)$QO8V!;ls7~Fx&}b`NS)2CrT@#9zx%|W{rN{fcIn)yi$d4<2Eo4&v)vB|
zv|-U73^82mi>ZN^UU~_4yV$LM>eapL=lfgGTf~6(`E2UgHn>6<zfcB(O9_htU2y%+
zPM<zKd&^ti0%c$|C<D4CAg%#O>jGAAcR=`!iw5zoe<$Dm-jhG@uK)i3n7P#ltL?N2
zhe-w@(?UYbvt7Kyg0il!kdUjxX<g)?IZrV=gJ?zCqx|c<M#7fBP2g>Xwa9c{`CSI)
ze&E1;%?54>t(HI{Z}5nbZmW!>7w7EMiiX!^iPZhj5_wZ?Ck7Xr2Czj6fS7u@R(Z?%
zNa{A=_HQM(&mvRfzFzpr!kgm^&uxXm_MOPI(#7jUDkWf4rY*^5{OlNLK{Zxh2l{V~
zUrR^a4pa2c3V@b*tLcAh<I0~tfBIkk!{<+Z=9@BjSQmPxKi;v^Djn?`=lUV|)0{t^
zu~RGV^+$H^d*A!!IOHc*!DltS3;zk|tuhU<Z7_yN35b`YV|PH%G7&Zl+UH8}1Vjt}
z$dMx}$rFGBgI3dlK~My+Lm+wvf=3`K0U^%p2mk8N{@9!D|Mxz)va<Hpp)46`>zOHw
zOj=eghG-XfRm5tAl;Ip!XjF{4DU55dc(s_!NHDtPxY)cZVQg^q6F7spAE*`q*6BF@
zRC`p+O$tNwWF3}_3e(FB({RmE{iMOTZT(ZgNWjT*sFjp3MsOXl8^%bh9QJzniN53V
zMT}sRMubLM8`gqWyEK)LiRV_EN&}DUIz}4xoor}Rh*b;hH<1b2y@m8|k-Sm*pOYbR
z%jw@Q*3X@P{^MW#+=o8}*S!qZ{X$p!;}7%wbQJqMUFkc=t$sM(7bpA$%O9@(c(Y%+
z+eMyu;)%u3W_R~CzwMwe0-T0w9H)Jwi*$pa1aE-g_~0Pi1=~x47r+Ni)CVO1!eDXe
z(4nY-VkIixL%gV!21Ov|14ZB~U;5wwdGH4O={w%>qd#`^*gJk|L`h1dZp?F*TZTSI
zQrfE+PsO<*NlFgQvShk(qQS$$n{lVYl_jwd%#5T_k3nu?&dya<n=(W{A&e}JE*Io;
zyd4Q0VuZ{fTgrx5*RZtPQBj%7uj0eLLc6{~o#NzY7CxjTalW2AD^`#YNRg^Mh1XM1
znN_2agFClbO4$-LoR_JucMahwGK%aeP0Y>+O~sc5aH#_@n@MY_V9rklT$sij|L3X}
z&B1;`IC9_liNWX1yMDh>`uBvu&C-9XjGLqX#rmZSFFx@X&wS!jm(D$lMSdPaKke_I
zrvujFb-wt+>3*2!51!XJ*B}0(@DIQ7Y=8XXcwg{gJ<FxT&cI2Boq$ts(g!~9fmvD)
z;KE>w(*avsTkg!6Gw=rBc_k<VxJnpj1+3usV4uzk2!nZ+t`weOZ2;B;ge>tqd*I}I
z-}e3A{}VrT=<u8W%Zm9WN-trB6YBpyZ7pM|G2gl#-&f*Mz^yAc6X9nLqjAb-sbeSC
zoojt=g0Iqwv`r%+DKUH;F@KRs!R-xxgVid=+8tQp^{W?xb$7g$N|nvTT-p_8=o>Y1
zx(ZuAQK?lz3Nv`M8nG}^om=IePi1<ZcTI6cPoygXj<X-R_I)`AU?q8pav#T+aXS=Y
zU0)R}_MMxH+G#uvw7seF3Ab{!t}|t(bR<=$EMuwKaYCUmzt`DI#2GnP)qiLrIt4J|
zH%qZS)lZ~a15maW5XrjH*uGi%*Kwk`-6;Lf=}7TgLI3NQUjFn`U;X&UFI{@}f(+Jv
zT)3CE`OWn_e^~q0<satz7g*_!8(ZNS;G=!hYX4|_<r+H2Xmn`6PWrKfNL~{Fz$(FX
zSWx89G$pVCB>)QqgNC{qyaBV|u7VN}7Xs>;0hkhyVH!Z!1?VEd**hNnk(2Ly>?b~W
z=+K+~dLiOisxToJ{%nbNcrJ5y2MPmWhMwl(H1Aq46~<5r1;Sy7%8Z5}GYOp&^(DEM
zAqMxvG%IFpg^{a7TEUx{WCIr!?$@sd=6<Mg4uB_?%RX0O=1d~c*9_Gv9V}#d?$K9m
zD~3Z(Jad5dw^dqHh%6PHJ}Cg@dz=Cg%u5|Ab@Zm*I!-v8`mFRZ;cCiZrDEJhLLMHY
z!0`hsP?fJ-HJiE}as#rE<9?hN|JSdci(_PT+g^-NuWYaDmp-^!Nh|9TSV1#1qkU@S
z^G(pd2+`0@(0^0_q9JimZifD^UVZJip8e)$KmNiCzyFd9^Y!y&@h{TBK6$QRk^%t1
zAB?EQv17-!E?&I2z+yk#*2?Gl<0jX}0YBaKzeDI#0J{VXFsv-aR>5?*gkT`T8xSmf
zf91-R6|6=@TnPU(C4e&nAm|Z}K^omw=^ax95dLvm;H4M8a3=iyZzoT__v7#U{{QNO
z2M!#6_iSbDtpfoU4c)W9G=s8nFO@3qeI;|X9?D2^niRccN8FkCXBIHb_Y5N(@LiO&
zo)}t@6On~@62dGbbDqmALBiEXccxJUGD<Ghr6KegNum0f@-pL;S}9_|mD}3Y0%Smq
zJqA*g4d(Gy;OD%o)h)%C%-JQO+)x5El6OOTPu60=zm%Q(X(X{F=c_`k2s$k%jm?JQ
zp?ZR+y0HM!_&Submqpv1$EWxr0Eft9B|;o&6BF$V>~q`akQ_ZL11vbR9-;Ayr^9ny
zq%=t$dW83owI8+Z&{nKy?Uw$f)c0oSpR&;+c256W!PS2TuKp*^e)sDaVd<y2{wVO>
zaD@5-f*<cesAsDmJQ4~(5dMoG{O3W)&r@1l-98Ufv7UuLcybs1y9Xy7b`9(ezyd++
z5D0_O1s9dOaN$BcJ{VI1Le@Qg@4fflI4uwcdh`f{c<K$9rDKFS&1`jbH7+{*!N2;U
zAG`m=V;@*qIr!K}gRf**W_iH_FiZgq)}1GdMzVmdfGuBZkyPJ^DQkuXkFmRv%aIA|
zP-HZjNO3uDk;3YRpSCb(%NL)oHx~h_i<^`Vt-BlNZ7uE6K9b8En#EV~ZC$J|ukfv_
zAyW;>3JGPi99N1wpC{{X?ciiNOZ$@Jur#RZBSrvAqhi!w;>UH+OjUajA;HzsQ7CT1
zGhpk)?-(No51kO6-cY47i%BoWQ(-+lzST$~oF_Gd9jC2i+*UiG|5`D<ar%!Q0GD<F
zB%HUS|E<j{U%7nwrO$uuFFy4zE?zu!F-(i%YZUhBK%YfA%@1?@u;x`E(7FBv_V;6z
zf8c=#_hH&~jxByTlZB7<h2Y2ftnTi=G3W}w4M0)=^uPd}4iLRk7)k)v1)M&8dL;~0
zoFR~fdiYyCeE6`g3s?zCK>U`r3}UxHjF)x`#OL50KsmFAAAaA-$NtJs{M3Paf>&U+
z_Er{9R`3kWz0B{{M9SuzSnk<|EMX)w^qOZ38K_05xU>YwG6r1m8F9W=&_rG$iL(lO
z53XHJ=e$n|Mk-_(Q!a6(5(d%~-pd=-3MOsCW;`hZr$#`;&WT$=+5$cYz?LK4H-%<x
z>5T`@l)|u`A~7_(YbXotd|G{2_q_;P)gwhoxSu6QzP@CqNRoa{p_j`MDftTb*D^v=
zHq7lfR_L_+oaYkypU|<!FWb<69wWh!SVsRiv;X3S7e9Ue{Ih@h%+sIv)8yrhf*+@%
zrg{FYv}h0R{zY2huU7vex&7lIYbQ>eSm;7OzReH9AKU!EYq8)j2>-><^xf*e5$FoQ
zje|>bPL2-@f+B$P0>1p^FS|z`c?6byJ~(KxSm1Xh4Djd;z(PSB925rkOxFXX-{=WQ
ziv?8)K)4GUaa0EW%Deu%AAHMO-t|KV4&3+d)zw3fS>Z5J07Pon`Gf_K8L`|56F2jd
zly6gd4-sTrNhLOUmb_J+x%|NblAJfb#dF>d+_#&8#)^Qet=wJb(}-8B$fdGxdl;3e
z7N4tyR(h`v0&3z%mia_oa&!0w9;KVIV%YR>G&A7{Qk9(|54P^ev*B&#;+)@cyN(qA
z-i5MUk!`}n@E!?l&U5&X-iJ(7iKx9k_4HGV7RLC>uIPW+7zsS))Rpz`e(v1)Q(t-N
z&;GBk1m3iOwVzz|i!`k{Pr_N}`J)1m-2L%ct$s<{041&JM?To%7c74tlz_OuA5*V-
zvfohM?()AG=nBA1f;Wo;F*uVV5WNF@qhL}39BvhqB;<o4FiYzLqIV!@#qn8p4B$DH
zp>Z7YlA-S~P2f(x{k;#KeDFVh_u+dVeD{F^M}v3Zp0`E?AVi6a010-)w5kL`nm?H(
z<R+XFTw}qZ0!bRyljgy~-A~3Jxld&*tnIhr0g%eu*%VH@gq?Ugv^<gR^@rUfkp8oR
zy4brTJuL1Nk+i_8VSKajnQkBQI4r5tcGE(Yb=k^?!z*F6gZy`a&oBjW-_rgWE1J7d
z40fjP?i}x`-)qa>?H0ENupzPV^OrZSo%`zL%ddRpo8S7UpTBtS*^5cwV;dhbT_j6C
zKBt3xB)R*euvbeT&%yYK(*k&gbss#Z5d2UGyk6$RE`K`KR|S83mJT-zT>-d7kemUL
zxUeojj}OMWfZz?71#f`U?Sl8;e?KgJmrf9z!6Oi+1#p0nOGgPuVGm^h-z7~2B=3Ne
zGMz9|20|I~@b|s%!TXLs@~(RizwMnzkH7stSX(>vn2fF^zC}QcdBhN#%v`0j$ab#v
z{JI18vE*4=X=)>^0BD*?<zfjgTxKOur*JNY=#J|<iHaKwaclIB1ii7sxx8T;pfB^g
zCJ&jwZ{+FO&UMlH-WVH`_1YB}uSKOer0<i$*$uk@4sPcqk|ljPu^Z(Ez!lR%s&y%H
z!;P~GSgc>)xbW3$m(RX-@zUw9p8574{mI!^zi~-FLg>qY(Sm*#>iWmRKDhhU%4flk
z=POsPMB$$n`Nb9f*xrYWd;+iZ!7z#k`#|`=?|tu!tBH;9?}GmpKvw{65%|U(0vNDq
zeZUO61fUe8=uv<}5x}$n{`_#CKos~o4FF{zC;>5U@(5r`0VfNpSD;b?0#U}W@Ge0a
zI09uL{9ZxW(WCD;`tZYl?Xi20o&0wX960vYd+s^@m~*qE!@|NHz!Ijn%!=Nzkk}!N
zve`wx?B2Z@bei%kTEW|UMJ6*?_L%~fDMQ;SB{p7b4M02N<@s#;eH4BZUMPBRDK$IC
zq1}hquGUik&GaLMXbALvHwehi6ic}Uq|<1xp%3Qki}`$gb8G!nu=HQvxW4|%`o%Nf
zdg-MvoO<=uZ(bTw$)UhQ5Pu!%;MPaDuE@t<3R|SBdb2VhgR5T!|03`+-2o5<KaTOm
zBEKZ~eGvSM)2B~IOCQ1?2d!a$f12y}rH#7qzg5r`fLjQ@OrANKpF>#OC4e}=D=<rn
zKor_IN*L1ux;_93!20^Sqp1O`5kPz_7=%|~MqU92g#dy-Efj<j5El&w!nzmlK?%63
z-$BSJEIua%VRrPyqemY;`QCS~tQ|Uf@X(33uFMV|SzSB);M&^Zctr6^m{ORntQ-*+
zya-Keml?6TXQvpVG@1EQXvNt{Iuox}02)&VJIG^cQMcMK+os%|2&ZLv&o_6b0N9zI
zEc6v_z=NX6+z!)CDqTwykfkt@X4xXZo%QcDGP@cdoXtdAOe@!m^~GYo4gnv?{}pV<
z+qiz^<;7wnykA>CfA(A7+SoX|{`~WQ@N(h_l1hkw5U^?C9)1spI0(BeN!Y9PpH}n7
z)A&^I)5`uu;8!>ibwRHFZ1L;+$&)8B&+pml$8o;tD)!MT|72)&1>hDzR{&loz(7q~
z1iigg5Yqy<R45RNJ9X;R?12X!(Ax!7F^GTZAR$Z-%!11-h6MtN(*UXjz%zh%cm-tW
zPnWz1@Bly|xT4`Qcnc6F=@;)nO%(!G5`@Fjy9>&~k^7E3c;wizcfJ)^-m@BzTs?Sn
z5y<(_!D9~utYTePW@|^?%G!}xItpO6cJQsGb!K_stzb8U8HUW$V}n5XNK+V=*X@j{
z4mnxo&Vp?zOflmrUe7-C>=GV-!^v^uT?n$oz1;fd8%ULJWtICFa9C8-EUf~tYk|5{
zl;3*DZRqMSeV9@_0ZZ%U(`7&>WZUwk;4qr*39y)NzQWt@T8i&5_U7yJ#pe2OdgCHS
zTP(KL{d{XZlFlr?Us`NlU-$8K=C5sDd?ijHZeCvB*t~q{`n7ZGTN~%2h5q7;fB3Q%
zp)^b-`xNGhzN0Wt!d_Ik8wCK1`!4Ho<}z6E)p;g(R)H^RDnQ--_zgTSdj@!cU&3qg
zp7*@R+qwQE?WMc?Uq9#y!0QO4t!g{RErWQ#BEc}|<GBGiMew1A9)cGjy7Q3^7YrRg
zemrg|R7F5YIgmjah(aIAgiaM8Z=EhkWyUzjle`2G2UP+XKgFHmR&`3#3#uSQw#JHs
zd5@rRmH9|fFtXGlh7<!x=@3(vK1b%o`J!BHdd5fE<2;8mQ%q&M^c>@8T?{^4UareT
z-8C=aG?Z4T-70()53Rq&S(>7yZX0e74VOkgHS(|eqfu^EKf+(A4&Eq{wq2UQ0?Lem
z3Qwuq)hn(B?~JQy%iJF=TpD%H<H6Ls*;Y0m`sOM9h!$*~oVQS4H}aBXy-T9oN@%9V
zd0uH+g*@e(|1@qA`dLt??R!31`SBYGexFwK<4%;Q0Km;3c$g2<ow&6xTKrh#cjn9)
zuNV06);{d*7wPl-w+s47fUhIy046KXt3rUmj`!s1!s0*)Zm=2LVekk*6ArLAapDAy
z6`lpxnbQS>I9*WJ3k3I?3&a<d222s8=>jMoQSigd5QtdU2?%)#@T`6{#~uOvl9C`4
zu8hSI6Uod~JqzhQGuafbZ6aDeD@TbDsdpmA!?T%I@tTKGM7Y@_gGXHN{Ycnq{KP0x
zo(crbFAEV18fJeR3_j<tOdiaT@o5RQDZv38k(SHRFj)e?LGw5GC=M(LWkS(u)=zOy
z?qN^)$y9kQ{5Vb;Lpf|pN#MS1DU2=PI#@wHCHz~;%0wEk<6l9?771ewkMR?IdG)}h
z)$}=?rb)Ed655X5bKYrgK94E=PUXg5eNKIcMSGH5`S|r|0UrdtPXa$)Y=%F?SAn1A
z_2X~gOCC4%;XVAa#gBu0V!YtZ%*w>Ug9k;ee(*YYOoLJoPg#|2oZc4b3czguqsd0@
z6$X2J2?~G`#OzIPdJ|5*f=2)=VR4WUgf&*k&UBrCB;k&QGhl*PD}XRuN)(7vt@&A6
zdjN$%(%J#G^rOO%h&>Abqzq)w0B$*yVQL{JP=!E-X#^o=g)lE=dZFcuj?9kokd|kG
zm|jk=3oG?k1q3roErasRLXGoMB}4@cD>j-=8MC&7){n|khMXC=W&B)MRdTd!Ehi~B
zHoVF!)iEnQBn)!f==W5A#>Z(nR2OghjB(E6RLe<W+y)hnM(8<Gs;ahB0Ytx!6+T;c
z4pU{1`&r*<97qzN)m#V(N>A-$y;{*tXRK!K>s(i=GYeqGO^l*k^`Wh23p73})0{u!
z&EG5@+LsXKD%3fh3_?HNW}2*e{T9O^^pVHGBJkBafH{2#avXaeg}%D`!`!~d0X|3v
z{sc=u{K6{`)9K<q6r{)a>g|21vEw-%>)WmW+W=k5dILbM7qy1<>;SyrB*8C#`O7i<
zvBw^ZRy~A&@C+a<3j5$rb7|*5bnOSib3v2Vxqm1FQJ4oJb!qJYyaX<l8>bB5DG0x@
z94H4t!<RIT5X0~v!lZZ$V)==gh$;{Io&f2m6o8n9@~VLI)ilby#JKViu5}>`sB(p~
zU^dp6O;dG^Rc<&gTT>h+MUXWuKEv=?ao{{Nj#3n+k>G6L83RmSV%vli4aY)5jN-ug
z+4hv;q4BM_jC%^N+MuC)uoYY7(Gq&L4n_gY-&$?DOyQK;V&?W|Wkc*ZV4So)(wKEB
zaQQjCd-c4fg>%l*LkiCxI+1*hxTN?hj5U5G;EB$1ItX-)&sII+CnF(R<Y`_%=98>@
zz#<qLF>Z42!@|dcK92=`LFi{M5DR{I5wNfi;b~QW%vV?TD?VwFUti$&20-@!ya8a`
z(~$}2R+kTa-~;mLqmN>@K>UumF(?Az57q~^$>1O%Oc7+4vML1U&!5j80E7oZ%+G<C
zvm$_ZC;{O)Mho90O&y3xwI0AF<`5YftOQjShHBK-Ii?#BhZP4vQWU%@4EUzj2fzw_
zlU9t<5yO+0!ZRL}&&cYwxY{%+U(ZYzap*m#({iaiW&V|NLyXzkca2B&ih_fRirjFj
z{#*ybD+?7}HO;8obXq2P38W#6gmLAzlG0yX7h8@u-lVF38;akkuJk+-nA*o2#%&2&
zBZ0dt?-4GxUC2GIf<2cf1RPh&SkH<PmFc9ho&n!f5GKVu3vj$6Jz4D4Hz5l@(7&SO
z^_yDY5dO)HkF!Ujg-=s#S@0)e9|b=I{_*3-J?8Xfn6lD!0t*OyUCocJd{iIY*e5*4
zuWo+Yh2#0R!|j9a0eFMK9w3CUblaewClJH1U=TtZo`N(zAi8ic5T3@1-hrSXxclzA
zFVF7>f{ovjmmuevwj07@Ajw+*uYurK<3LG(f&j$<zcMHhlGYq}@+3%l&PoG*g)BmR
zOVxJDh%_w#$_1^P$YWEPAU#{gDK;*rX?*udS#il4s=7oKnO0iDbrhb;^$EW!e)?YJ
zZE;K%nX~Z}FMQ*&8u;b1wH~%yQwNU2<;)uGlJPTbknu`_sH(e$nY!k7r#f@rHQUhU
z#eE4pQkY{rxa|yHTrV-@xAL#@F=MPLxHumG#=O#nj(Zw|dF&^;s%Rt85`Xg?mhg&#
zJke_w!ijD{<2ejvf$m}1C#xR(ATG#DEB0eN75K37&z(CL1wLH&u=25YKk%Tqpbz0N
zQo{75$BKU4!;ehV+D~))L&iq<cX(r<D*$&6IB$+|l{7s7ML@3}MwluCX&Yfqa`NQK
zsPfO}^Qai$8J+|v1v*`Tu;3}c4;B`RP9JC{h!Yrr1cu;}f)K;OEKmmCRbkLHNnt=(
z7Tk#mRTWGN7DXA7Lh?#@O5;qr9G=qpl#hBba-7z0Dnq0&Lr09VDv$9t&t8=vt{3H>
znK0GUtH*_(O*($hZr(=QX}z;B;`(!#=F4S}P^j9L-L4iVjUw&-$l=S%Y4k&`Bei8q
znag#d?+|*kwCY@BLdZ)QOI3d)e5!tE)Jv9uiQ`R}TGO!{ZWBp;%EFm&6eF~&_A+gV
z`S*-ZpuH@tL6c;?N8y@inX8}+^hAX{_yFSSI0*a!tM+{`B;?I+esc^Dd?bFu#XM=j
zvU&jGb?EpFd`al0z9{hN0H1PF+JwT?;r2sU0PaxWErd`2pa=leJCIHojLHDQ)w;%F
zKoBO*7YGVL48!_?2OfC91&tjC4`GWT2wQOOK+x=CoS@xDW(m&$rV_C0K&L8#k^sWS
z`2`uj@Gdyd0E&W{(x|L+%YgJ<m5ZcUz!J$*BO0dp*?7rv!LQfvsqQ(EhNW<qES=W3
zDo>OTrpj@&FEkE=P0wKy{aK~8_*h_kOuJNd-7YWteF`62hgSVsc~8}Sr|+$B#h2Tk
z>(Iy}j=ffU&=`nulCab~bGq&7v!vcg&tpISrttio$1dJ^%){NzeFItnsC7=yD!e7p
z--61Fbd(f(N!#!w(6xu+AAX|&khbOF7jZmz0@4RQ0m~lz0`vJXKkU~J0$<>DOl4r-
zzF<xtzc{!DRzAFwI+Y1;rXJwaxAfh)&=r6^06a|qAmYkb5d-wJ!7$&JUHJG-uDSS*
zX#;5R;a9?Mj+d@2j!X+m0>adb0Kl|DP!^yl2!54GCB-2!7Fyis)6|0m7h<jh6p7?@
zh`$y%T%TD`*==uNfJqUFzbG0OO?no^#inINh9NC#a-5#aP~h(q;K+@d<7-~5FsTKG
za#M8A9+@dzZJrvB+k?Xx8tvAspZz|Sr!CvIW2?OF@{Em`1exZ?_$|w8C;i8LVfs7@
zR2_HfA#3#~=g;|2pXTwdkX-7yF3acuc`6MtEs>?J0zJNiE&<6^p8@NI(wZVz=kN+b
z&<g~;B&8v_^5Z*%J)KJsr$K^n_etny*FGTa+ZQ!ZlEts<P`c2+!=Wz%+yg+1WTFxf
zh%AQdcEaRti$Q7KKwNMGq7TXd7>7%D5nxWwfsxQ!f^0?WysIh*y88g$1BAmJiN#D{
z97$n_cz~%Se5^MhGA?)<RGCQ3FfOD@HzUH`=+ai9xTs9y>RxMDDgan3YPFz@q>y;>
zV#GY`%J7sv#<xsK(;^&74xYilHC*w~w8$$76HKjm&0le%{HyPh6fLcjDpF{RU}<IC
zkO#gc@S3mI5ASw(@OC%grfIbN2E={#$<6Du998nPJ+v$>U)vRJqqu0@8B*S&by0V6
zYTMlA)E~KAtta9z4jKpF;CYg?O9J^KE{_H615#SEM@rj7aYuZF@t95RpX+McE}t`@
zc&T#1?TvA)<6g^BoYngV_c|<ZjQ@mhOap*2f$#dB(s-pc06j;64p5;Uynpc>^a}wF
zZy*-wVNF6>s3*y)k2qsJ0SpD)zNeObI-m!JMa=WZfBeVs#~w&O3=gmErL>gWufv@U
z-2<@qAiMeio-QHMg@iiqrneH}8A<{?1el-0JGL8w!11g{6Q@N=F@8t~h7i{nB=_D7
z_j2QapX3URxPY19K~I6h*#*g(h<GMXLyXHz34nKgRzO(#hBThjq<lQqIV5JM>0_9_
z>o;{}A}?i<`rR<i>b(`0^TD&lLDT5BR$d&&zZQSSi|c6Xu@n3iCz=Xs*3Z_3<7xT;
zt`*mm<7|2?7!%?*6XkpD_a!(u({`21o!g1a;y!7$k?G@DPabEspU8T4gop4Lx6jo6
zQE(=q9KRt?b-8OA&=H=MmJ&{qPST4?diox5fj|ea^{#-$d02f<i*LaT791xX*yD5#
zUsvsW((Kdm=2~}!zJ+g(LRSFxA4FzhTh9C8&};URbLafzHG~grlU*pwu}vU?^Me+!
zHfVj4*m3_rM$nGOfA?*fwTmh0gv43_*4W10_aDM8EK*i{?>d$5{)?GYRef43{^b=u
zD9{U@D*a@6Wkk$Mo#kCZ#z{eQY#y)rDY0i~-A=x>@4J#$$F-F6Judn2^rfP1`|UFI
zr#H;5Z1ac?-zj7GG%PhU>UGn$w{!2!joNVg>+)sWQ;Tn_pQwno+2Zp>clyD{L5<~n
z0kfZcs)+f#d#(2JrMsT|EN3`>boJ4BhYEQsZ3@`mXPta}eaDC7{o4K)K3wbidxPQS
z9=YecUfkfnP+chBtQd1Vah=S`Xv6jK8sWg5C`@ZK43<3ZQBH7NWF+x#>w)_RmrTjj
zJj|rbtnu0F-93x%D_%BiuM5^^b9kIpHTT<r-9}z57Z<7+>gPmzMeCH#yBwL_d0C?`
ziqANa_py<sukt+a+sifyJS&N6SAFST=|45U@M=||tY!RcckTHrtBmaKY<rXG#kKgk
ziSB|=NzsKrLjFeACKYtop2@v^YBOsl<GqZSN8!g+>X!dH)fXvt$K=?=^_L5N`)2wt
z{~5AEcy86)>DwK)zwUi8-%<R1>Fc`xz$knd^x$XJwP(F$>+iKZRIL=`VLmA+u#C%v
zyF#70!IotfUr+BI;hRj?j`jt3l|(T&SY++}@RM^_ZqVaw%USE^?ORdEGfz@{+k_Q9
z3%k8lj;{R_BxAvQy}#h$B1!%AKW`QB@^uxjTfVJ%-)EBvNBuu1_6xM%F4|FZY`XTr
z$}G!0UUP1@y-vBGJO9?c@QjD+au2=Cxi@S2qa(S8OP>}={e70HxNiB*kE{4i{+oRJ
zJY(DApte~1dzZNEvuArK+0Oemdlui6ExhM1hu-D=_1<Zpb332u3Z@6gudfN)&AaXQ
zV&{%o539l)6Y@T$IoB>X<|(o{;`!0-;hgDhH;qyfwiy4;`RsQ;{ny;W#}#w#J<9ty
zb5-!_zF&E#u2&fAZ4X`l_SF4#vbmPq&PKhxyNZ3g_u?~Qccb1KMPDn6a$C*2<-W;$
zw>>l0UJkL(niDp6`rhrg3e>+O)z5qv^8H(?U~=iaj_<L{%4f8_V|?*pSL6M<bq42W
zf4<`Kx^h*AV${vb370~+&js!14L+NG$$Wl6x!JsDE48m{uGK$kaa4PIV_R{xK;OJq
zC*S9>=LTM#9KOu^>5P@zoWfRbU0K(6yK}9xz1;$>>5ns6D;Ce)vZ}JG`F7)>w;yjP
zzrJ#1Z`2R>zlpDpy$$XaZFbMz(;>e;@BI1R{5khm*oD-me|UdkAuyz5txACFC`K8?
e1VTmg!~0AXCIVK~%(B%CK;Y@>=d#Wzp$P!O++uS8

literal 130
zcmWN{OA^8$3;@u5Pr(H&2@*cN4FM|5sB{c=;py$_ZQfZw(#Ko1IS<*ie)M^J?y_FC
zFRyvLv>Y|-6PMA8l9mj)EfrhYMhM7-V9(^pW#(Kk0tT|)hZs#J!vz2=F`4`Ixz=QS
NLRh5#stTSEmp|pHCnNv>

diff --git a/kramv/Assets.xcassets/AppIcon.appiconset/Icon-513.png b/kramv/Assets.xcassets/AppIcon.appiconset/Icon-513.png
deleted file mode 100644
index a6bb07a1..00000000
--- a/kramv/Assets.xcassets/AppIcon.appiconset/Icon-513.png
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:ad4eb49081bd50fc5656cbbcd1901854778992c52bd00045b523b49beea27a10
-size 87511
diff --git a/kramv/Assets.xcassets/AppIcon.appiconset/Icon-64.png b/kramv/Assets.xcassets/AppIcon.appiconset/Icon-64.png
index a4b0c88053a11f816a117dbd66e25ace763d88ad..9b5b99b01be64bce07f03e047f2899aa8a546331 100644
GIT binary patch
literal 4054
zcmV;{4=M18P)<h;3K|Lk000e1NJLTq002M$002M;1^@s6s%dfF00009a7bBm000XU
z000XU0RWnu7ytkO0drDELIAGL9O(c600d`2O+f$vv5yP<VFdsH4{J$8K~#7F?Hk*U
z9L4o?&t>ns_O2JRv5hU87>v!$HW)Ag%SiANM2KW1c?fxkq6jG@FZlqHhbRhCUXge}
z2w4fj+(IA*3-E%BF&ATVvGI+_hPCl}&E95rc6MgED(6(ysjlwnp6N}Zykw8v?w;!E
zQ*}<AdzAqeVG$N#5&n<CgZ~aFJW@*i!_#Q#H$Nx+ZU#wx6WS#Fhw$$(z}vNJm!B&>
zydz)exKVn!)gZ-Ejm%q3ci#sP0`E9K(QF7}SPO<o5kM}VQ~1Ie$VvQk$_H;!$ojcj
zt^A3sJ+c4UXP*_wpan4{GXs$O-VcBFW`CF8{TtuQJtCx7;(0!#EdnwFgba|rTuL7g
zaq9QP1C}25J$)x7vJl6im2d3h;_=Sm{psCL-SIo@mzfb+8Q}f+r~9_#^S#gMRYu}O
zU?#>3$rE;;S!6x)>`t2ICk%r$L{&&X3tW1i811w%A2dGCGRa^$H&Z@;?+ed7csdIc
za;-Yh%Afl3o}cIQ9lLR`<)+n{s5_B{Z_UcZ+l9hG;;k7KIE;$}_H`J|<FVA}yHFBm
zUN0#WdLDZIrE_DSee&c{XaP+nfQWzKf!*tS`>%c<yY|J2&|D8!CMxP$G|3VuB7jB3
zcO)b(0o-4z%k%O^y#0t_1v_d~O(OFHn3|m(yZ5C%_kRdYjHM6h;BDHpsnD}z-Rt<=
z7mA-$gz#MxC+)<znI<KLbcn(=@_lEOJ=0GUt1sV5lQN<BZM|OA_)!%UPSamV8A_6j
zUWPS&-JJt_9(?fWL0ZT(B2!i1(MKOeOaIH?`?<pR!&aA(QV|r@NApGzO^fZNNC~g7
zgft=afYnRe3mLH~agfkxkkxY^<U4D}KYM!r@bIuq9w!A>u3Xu1*UrD5^uS*Qp4&B)
zg!rJLVKjmzwf?F;mR4qlRuS|lj4^R~tO5k<*G<7!BW+NbZ;$kfsz7X>s0#1A`J<tc
zk&(HC0U!^_;BCJ3&pVOERT6MD6C~)?Ef-*LSq+#OWUX_#s6Z(Tj#4sP_Ta7IE5Ogi
zCmuvlyK>_8DHa9Xw)G<P_SRrvAP4WgzYJBN2&}=HNldgC$Je*{*2i{^jQnITXnl=9
znho%Z`K}vC+}Hr|<r_VwJ3kWx3Wb7gi(h1)YxPP5PaKzLOnc-#j@{KM;O_0Cuw+S~
z`t>iBuxaD8Vy>E~`i#c$tXt;q8DBo%b0efkdm00LKbKz()<A{EiVaWLhGLXY6C?+r
z_2c1X4Vn<P&DeVNZ~xLV+<E&&=;)|NaIIc759_a<H10EQlmS3S$MJlOR#RwAo4Jpo
zxkOKrrX;LUQiGX5CQ0m*K{^&Z1Z^`cTD9WBOnU8_5^UTsX_(VE_YK#~L3z%Hk@MYp
z#jQF>Cu%f?6uc#{5D8ebW(|e$efndwTI_BNLD`AdSOQTq(nG6O2NBdQZp|QUqf1ng
zxN${%E4X1}8G3u>6(bdc9ha=BHIgOjzM?5B1JnyY-(&r%)I@7*PQ=$pM<5FXL4d>u
zYK4TTYsY}zSAXw2_8^a{Bw+h(<GA`WR>zw`p&((~)@kTytLdoHdRksE{p{z9E6<)i
z3z->!ojp*(==?Fv2os#v5EpqOl4L#!Z%2C_c6{$LEE}jPV@E(tAO<#rPBfBt+%|#I
zSaqZ{j5*XF49HLdATt9n1|YHWqrpr3o*G@+$+Ma;K{kg_k0FjebZ>Xxc?mi@LSp$F
zwl&S5t4qSoH=<_<s$f;hF{}jjdOgtwBpSh5t%f#0EtSsb*@DZ)_&o`D!mf>Ak@QHi
zl#8N{64tGqMSY)2#6wJA55qA<8qChlV1VSIBn{{U38-vf2L}v<7UTt#G4hO_vm}!a
zlNgiH;T4Vpi7c%eO`PA;T`EHr7;~a6^mo@yqrodfvjrx6#7MfQG2!r$5}Z8U0o~ob
zh`(+;m(325<pB&14tir_W008vK!pnMBH<Yk?-}+|;L&VF_?-@hG{GFlW6T7bHk4rH
zigHsNE-t#5xhz@p^AbMzs0`;X^ds>D#{KEYmGTh#y-uAqS7N%`B!Y4D+zSg6p^Iq*
zUhyQNv=*VU>kQCxPeD(2H6lS-7~v*DrBVQI>@PvJwjA2pd-aT^&M0*uE@zFf(6K?H
z(aNU~Vj^ql@Jqzvw1#g|3E^q?_SIqQP2*^}1M5azV@FpjsiJWvC#y(&1?TY{Xlv_o
zf}T+5n`FQd$fpb>mBWscA)Ut<7n%h|)OFR$3T)k6Q{qoqxNAe)_`emLzc2>}4gwN>
zm3FJvFq?rqr~GD$t}>)WW?}&M)gN*tCeS*jq{3?mEPdZk&DVRnbFl3ep=<%WH8)uz
zpf+X#8@@PveryIlKI%bN_YmrMK0aOh;DJ*>mAWqNYeUoksq!YJ%LpC(*Z{*#JV1*t
zO}d%*x+N_lF3u<x?=+}Z1$=RmEd2_V^=Xe{f*$FH>!4TN)Ef;2FgiNQMlc%#*zfC0
zIN@+38g_eWz^j{8$B994W)zD)Y}uSQ;|V)1FGRRFA`G&k3zgjn5(Mb;S<H0RwXQ|q
z;Bj@B!_}mMi2FV++G%k_(w$(<RT%KhOe>!+(WvX}(acZev>DXVmTL12TLMh7790oo
zkke%*<60smC&|7!_D#a-y753#pM`#N{U-DRQxzB;>(UiwW0N>OvHqVc(1w;I;x(Nq
zAV8R`N?{B2%t<>L(sykdp;f;uo?zQ8b!cy^7!fxSi=Cg06j6EJC;^GMnxw<5SJ8Ta
z2ig2<6qj#}WV7rpyR8Ud$6Zxar+GU^x3<7s;k=V#?FCB!B`UVmL{?V_>vQ8l0K2a&
zQjlce*o(wBKN6>9Txd6H!0p>ArVii~Ysk$~vBme2BtT|nfU&bi$4Kl$w~|=C1!%gu
zZ2Y(g%JBeQl}ZqCjjh}%Gw_N*3D>Nj(Q`9~4@^<ib&buE>85Vb6)LwiX@y#eErZGo
zO~6F`CnoCf@~cyD;$-XMN76XgZ^Zh=(43J_09y@$Fbbe+K`MYjgGClz5D}?3@Cvso
zUT_JV`kgp24SV;^;_o5&?064Mmzr0X)AEg*ZwgdxkO}}YGG~qrfK^}tfsjpy!V;5^
zwNwdEy`{|T4L&|vf)5T$D7FnDK}FSe@K75THj?`{OSXzd54PW4#kgQz3%pZ;*R~-&
z7f26<MO{~0`C_Y}j1KCq6jq1%VjaecQp3AY-)pe{?Qh}u7e%c1FE^5<#~|VlyqCi~
zKkUzi5WXi}^sb{rqEVcKdabPR33UmL8!w{**(6~3^5xDhjc@ND%|@rQvlG8a6<8hO
z?`CHOyt4OO%;<Wdy`w+$9uRtx>FFXIKUs|6BcMBl+i7FcefbqWssm@3ez+^(HF2qF
zFQgM=+dxAFTf$8*laU%R<y9jnm1^+n-U%#VT!~`e3yzRR#-v}T&*b3bDX=(PoUP#6
z^*$`^n>JD#;^oJNxh5eYBN&!GWF)yzD<)d&x$m3>s8s6kx4%!LZyv%`YzK&BaS;C2
zvsIz(kx%pR&BY*+GG~E<hbHj58XqCCbH_?|qKio@iwkweNxF4*C%lB#?g}7O0n%wH
zJ9Qc>ct1}Q?pkbZS8AXu(LTI;C=cawrlGJ(G!L)*V+_uG*$(aP{f@WNvq$xjnHfM3
z)N3P=T-$Yl9)+}Z$O<a(0i9v004S&_BPAUpr4LmIo9nCXw4k7vP^~q`e`#zEUfVZ`
z+wfE$>;rcTPt98d)2VYyYE;~^dM+cP#9Bc_;bsG50~7doI@<Gm--gwpo_eMVZ8kbh
z$UQmf!;z!u`NxS9lknQ>CB$<j&Z*$|ei!!w8Gu`3B>vN3vB4!pZ<!GxWPP*xea<YE
z#pe>avecdmxOp?f7CW4A&z{S{r^gz{dH3LDIP{^1t3IUagQ`bZ&b3h?O6wHTlwAe3
zY}q1%pz^iWdG`>M48u(srmmSn*qA{L_w^aS2=S|bIQh~nJAOP5qn9);vMDdV{572U
zXB+zefiQ)Z(irFvx5@}b?W>I&HzqP5G4@=$cCEO0dGuJZt&eBV_$|!L)Uhz(mZqv&
zv>o8gtyHR8eRqw&<TFIV$*yE{9H|VWZg}Ti+z8!Ph5f^quzeXufL(TL+?M@gUW#y)
zXGV`*cinZtv17*?XiZZtYF~E$51u)L_<Ci-OWN<RLUq2R)q!{8rQSTTC%c@pgvDZ~
zIwQpHm17H^10m;4c#KLPxAo0&Tb(w~t33tJFW9+oANOzsO9gtB^KZZP@J*#sX&e&H
z2&H8o5~IO#rF{9R_RfJ{YsQATx}IcCMRR(ldb(i>3RaXA13WOgF3svX{$G{Yr6HJG
zL5r|iRX#$a><+~dj)Bm9WHdPQLK{$-n|J|<uOtyqkm?3EY}hb=?(EBtqu5@sgv+Qo
zgqU40BwHgVjN7C4!mZXah6cOhdK@q@g8%>#)pH1aJlL4R!D;aY7v9+S$Yc1cl1680
z{y986OqT7duHJPIuUN5tH_BX(3IVLOHs^HB=)4gV*+8Q0FWOf$x=*{5l($p6-Kk_e
zhG=2V{n|5y!~kEPHMDr0vBKpDH$==PY3bDI=N`gWlh_ud)f$_{m^4J?$VZPKn3_EM
zM?i!;pcPdoKF}^mMq&oftPo3tBHBcQ6i8_a3A7<I66!n2mz{D<I;4zx6tH%K)uCRR
zo;v%7(ewL1-nDC2x!I}lR%c!4!`k?_9~`gD%^X-daP3ZyB!S(O$Ht4rKODzhjrbe-
ztB=h&*{1VeIs|dd5iWRWBJG$?PJz+pH|Iy*{>2xcKk?#y_uV(OXV0FdJ~~s07vqA$
z;lqcq3<-mG-}9&6c6JWlk5+dGc;CSDA_&>eN#{RI*rKUPQk0F5;g-Al)-8&fe%6y1
zCdrt!^tcuXvNT(|^p{g7pMRoKIdx`ye4I)vEd!KH3^3qhlGHXaG0`_WJKMK)>#y$Y
z?7s5ae6D>ZohZXyq#g`KyWJ0xj<L6XA60JG%`yb_xr?;zgQ0k-UZ1%zJADyd{F4U@
zg~BAxjn3O(eGFP3GlPXOfcw*#y8?!mg`S?CBD$zNUIkpWJVZU2mDNfmG{Wue?Lfcz
zZrYRx;BSxnFgShOc?fI|Iy*aqa=BdV@9(en_V!kBUUg%<CHxP74LL0#$vH6~PwHZx
z>0a2eV@KZo9eYmt9k~p?Lc%wBI?_+$%dDp%7GV(<VG&~RFMS1w*9`pa82|tP07*qo
IM6N<$g5D~u(f|Me

literal 129
zcmWN?%MrpL5CG6SRnUNe1r|uU;j;)cDj64ZuzG!$chQ^2e96An$%j(+v2KsX+yDN_
zTbWPAClz&pn4?PW$)G=Uuuy#6o3;exL-P=0@+Rc!{GOanm3FL<BRSHpMrf82Vh9aE
N$0T@sNm_$i`~b9uCkFrk


From 5ff2ff20e655da0e147eb65767cbc77a81d69c0d Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 29 Feb 2024 23:28:39 -0800
Subject: [PATCH 606/901] kram-profile - add test trace

---
 tests/traces/KramImage.json    |   1 +
 tests/traces/KramImage.json.gz | Bin 0 -> 7570 bytes
 2 files changed, 1 insertion(+)
 create mode 100644 tests/traces/KramImage.json
 create mode 100644 tests/traces/KramImage.json.gz

diff --git a/tests/traces/KramImage.json b/tests/traces/KramImage.json
new file mode 100644
index 00000000..9d1ef0c0
--- /dev/null
+++ b/tests/traces/KramImage.json
@@ -0,0 +1 @@
+{"traceEvents":[{"pid":42378,"tid":259,"ph":"X","ts":12187,"dur":956,"name":"ParseClass","args":{"detail":"kram::Int2"}},{"pid":42378,"tid":259,"ph":"X","ts":14109,"dur":2551,"name":"ParseClass","args":{"detail":"kram::KTX2Compressor"}},{"pid":42378,"tid":259,"ph":"X","ts":17922,"dur":1475,"name":"InstantiateClass","args":{"detail":"std::allocator_traits<std::allocator<unsigned char>>"}},{"pid":42378,"tid":259,"ph":"X","ts":21655,"dur":552,"name":"InstantiateClass","args":{"detail":"std::__compressed_pair<unsigned char *, std::allocator<unsigned char>>"}},{"pid":42378,"tid":259,"ph":"X","ts":16975,"dur":5363,"name":"InstantiateClass","args":{"detail":"std::vector<unsigned char>"}},{"pid":42378,"tid":259,"ph":"X","ts":22395,"dur":859,"name":"InstantiateClass","args":{"detail":"std::vector<std::pair<std::string, std::string>>"}},{"pid":42378,"tid":259,"ph":"X","ts":23772,"dur":673,"name":"InstantiateClass","args":{"detail":"std::__compressed_pair<kram::KTXImageLevel *, std::allocator<kram::KTXImageLevel>>"}},{"pid":42378,"tid":259,"ph":"X","ts":23264,"dur":1468,"name":"InstantiateClass","args":{"detail":"std::vector<kram::KTXImageLevel>"}},{"pid":42378,"tid":259,"ph":"X","ts":26236,"dur":528,"name":"InstantiateFunction","args":{"detail":"std::vector<kram::KTXImageLevel>::operator[]"}},{"pid":42378,"tid":259,"ph":"X","ts":16666,"dur":11197,"name":"ParseClass","args":{"detail":"kram::KTXImage"}},{"pid":42378,"tid":259,"ph":"X","ts":10748,"dur":17610,"name":"Source","args":{"detail":"/Users/Alec/devref/kram/libkram/kram/KTXImage.h"}},{"pid":42378,"tid":259,"ph":"X","ts":28414,"dur":1515,"name":"Source","args":{"detail":"/Users/Alec/devref/kram/libkram/kram/KramMipper.h"}},{"pid":42378,"tid":259,"ph":"X","ts":30963,"dur":2204,"name":"InstantiateFunction","args":{"detail":"std::basic_string<char>::basic_string<std::nullptr_t>"}},{"pid":42378,"tid":259,"ph":"X","ts":29999,"dur":3209,"name":"ParseClass","args":{"detail":"kram::ImageInfoArgs"}},{"pid":42378,"tid":259,"ph":"X","ts":28386,"dur":5029,"name":"Source","args":{"detail":"/Users/Alec/devref/kram/libkram/kram/KramImageInfo.h"}},{"pid":42378,"tid":259,"ph":"X","ts":33582,"dur":893,"name":"InstantiateClass","args":{"detail":"std::vector<kram::Color>"}},{"pid":42378,"tid":259,"ph":"X","ts":34485,"dur":756,"name":"InstantiateClass","args":{"detail":"std::vector<float __attribute__((ext_vector_type(4)))>"}},{"pid":42378,"tid":259,"ph":"X","ts":33476,"dur":1843,"name":"ParseClass","args":{"detail":"kram::Image"}},{"pid":42378,"tid":259,"ph":"X","ts":10658,"dur":24834,"name":"Source","args":{"detail":"/Users/Alec/devref/kram/libkram/kram/KramImage.h"}},{"pid":42378,"tid":259,"ph":"X","ts":36707,"dur":722,"name":"ParseClass","args":{"detail":"Etc::ColorFloatRGBA"}},{"pid":42378,"tid":259,"ph":"X","ts":36163,"dur":1268,"name":"Source","args":{"detail":"/Users/Alec/devref/kram/libkram/etc2comp/EtcColorFloatRGBA.h"}},{"pid":42378,"tid":259,"ph":"X","ts":37643,"dur":848,"name":"Source","args":{"detail":"/Users/Alec/devref/kram/libkram/etc2comp/EtcBlock4x4EncodingBits.h"}},{"pid":42378,"tid":259,"ph":"X","ts":35978,"dur":3175,"name":"Source","args":{"detail":"/Users/Alec/devref/kram/libkram/etc2comp/EtcImage.h"}},{"pid":42378,"tid":259,"ph":"X","ts":39385,"dur":588,"name":"Source","args":{"detail":"/Users/Alec/devref/kram/libkram/compressonator/bc6h/bc6h_definitions.h"}},{"pid":42378,"tid":259,"ph":"X","ts":39361,"dur":989,"name":"Source","args":{"detail":"/Users/Alec/devref/kram/libkram/compressonator/bc6h/bc6h_encode.h"}},{"pid":42378,"tid":259,"ph":"X","ts":40561,"dur":915,"name":"ParseClass","args":{"detail":"bc7decomp::color_rgba"}},{"pid":42378,"tid":259,"ph":"X","ts":40469,"dur":1077,"name":"Source","args":{"detail":"/Users/Alec/devref/kram/libkram/bc7enc/bc7decomp.h"}},{"pid":42378,"tid":259,"ph":"X","ts":42010,"dur":1482,"name":"Source","args":{"detail":"/Users/Alec/devref/kram/libkram/bc7enc/rgbcx.h"}},{"pid":42378,"tid":259,"ph":"X","ts":46362,"dur":1358,"name":"InstantiateClass","args":{"detail":"std::unique_ptr<mz_zip_archive>"}},{"pid":42378,"tid":259,"ph":"X","ts":47731,"dur":935,"name":"InstantiateClass","args":{"detail":"std::vector<kram::ZipEntry>"}},{"pid":42378,"tid":259,"ph":"X","ts":46238,"dur":3100,"name":"ParseClass","args":{"detail":"kram::ZipHelper"}},{"pid":42378,"tid":259,"ph":"X","ts":46188,"dur":3153,"name":"Source","args":{"detail":"/Users/Alec/devref/kram/libkram/kram/KramZipHelper.h"}},{"pid":42378,"tid":259,"ph":"X","ts":49403,"dur":1362,"name":"Source","args":{"detail":"/Users/Alec/devref/kram/libkram/miniz/miniz.h"}},{"pid":42378,"tid":259,"ph":"X","ts":57412,"dur":2322,"name":"Source","args":{"detail":"/Users/Alec/devref/kram/libkram/zstd/zstd.h"}},{"pid":42378,"tid":259,"ph":"X","ts":60136,"dur":718,"name":"InstantiateFunction","args":{"detail":"std::vector<kram::Color>::vector"}},{"pid":42378,"tid":259,"ph":"X","ts":61298,"dur":1267,"name":"InstantiateFunction","args":{"detail":"std::vector<kram::Color>::__destroy_vector::operator()"}},{"pid":42378,"tid":259,"ph":"X","ts":61208,"dur":1369,"name":"InstantiateFunction","args":{"detail":"std::vector<kram::Color>::~vector"}},{"pid":42378,"tid":259,"ph":"X","ts":65064,"dur":943,"name":"InstantiateFunction","args":{"detail":"std::vector<unsigned char>::max_size"}},{"pid":42378,"tid":259,"ph":"X","ts":65048,"dur":1076,"name":"InstantiateFunction","args":{"detail":"std::vector<unsigned char>::__recommend"}},{"pid":42378,"tid":259,"ph":"X","ts":66132,"dur":601,"name":"InstantiateClass","args":{"detail":"std::__split_buffer<unsigned char, std::allocator<unsigned char> &>"}},{"pid":42378,"tid":259,"ph":"X","ts":66758,"dur":629,"name":"InstantiateFunction","args":{"detail":"std::__split_buffer<unsigned char, std::allocator<unsigned char> &>::__split_buffer"}},{"pid":42378,"tid":259,"ph":"X","ts":73071,"dur":618,"name":"InstantiateClass","args":{"detail":"std::__iter_concept_cache<unsigned char *>"}},{"pid":42378,"tid":259,"ph":"X","ts":68346,"dur":8022,"name":"InstantiateClass","args":{"detail":"std::reverse_iterator<unsigned char *>"}},{"pid":42378,"tid":259,"ph":"X","ts":79089,"dur":2434,"name":"InstantiateClass","args":{"detail":"std::_HasToAddress<std::reverse_iterator<unsigned char *>>"}},{"pid":42378,"tid":259,"ph":"X","ts":78428,"dur":3116,"name":"InstantiateClass","args":{"detail":"std::_IsFancyPointer<std::reverse_iterator<unsigned char *>>"}},{"pid":42378,"tid":259,"ph":"X","ts":82593,"dur":910,"name":"InstantiateFunction","args":{"detail":"std::prev<unsigned char *>"}},{"pid":42378,"tid":259,"ph":"X","ts":81773,"dur":1763,"name":"InstantiateFunction","args":{"detail":"std::reverse_iterator<unsigned char *>::operator->"}},{"pid":42378,"tid":259,"ph":"X","ts":81735,"dur":1918,"name":"InstantiateFunction","args":{"detail":"std::__to_address_helper<std::reverse_iterator<unsigned char *>>::__call"}},{"pid":42378,"tid":259,"ph":"X","ts":81727,"dur":1958,"name":"InstantiateFunction","args":{"detail":"std::__to_address<std::reverse_iterator<unsigned char *>, void>"}},{"pid":42378,"tid":259,"ph":"X","ts":85040,"dur":2701,"name":"InstantiateClass","args":{"detail":"std::pair<std::reverse_iterator<unsigned char *>, std::reverse_iterator<unsigned char *>>"}},{"pid":42378,"tid":259,"ph":"X","ts":89426,"dur":947,"name":"InstantiateClass","args":{"detail":"std::__unwrap_range_impl<std::reverse_iterator<unsigned char *>, std::reverse_iterator<unsigned char *>>"}},{"pid":42378,"tid":259,"ph":"X","ts":90384,"dur":1308,"name":"InstantiateFunction","args":{"detail":"std::__unwrap_range_impl<std::reverse_iterator<unsigned char *>, std::reverse_iterator<unsigned char *>>::__unwrap"}},{"pid":42378,"tid":259,"ph":"X","ts":89379,"dur":2358,"name":"InstantiateFunction","args":{"detail":"std::__unwrap_range<std::reverse_iterator<unsigned char *>, std::reverse_iterator<unsigned char *>>"}},{"pid":42378,"tid":259,"ph":"X","ts":92150,"dur":676,"name":"InstantiateFunction","args":{"detail":"std::__move_loop<std::_ClassicAlgPolicy>::operator()<std::reverse_iterator<unsigned char *>, std::reverse_iterator<unsigned char *>, std::reverse_iterator<unsigned char *>>"}},{"pid":42378,"tid":259,"ph":"X","ts":89193,"dur":4021,"name":"InstantiateFunction","args":{"detail":"std::__unwrap_and_dispatch<std::__move_loop<std::_ClassicAlgPolicy>, std::reverse_iterator<unsigned char *>, std::reverse_iterator<unsigned char *>, std::reverse_iterator<unsigned char *>, 0>"}},{"pid":42378,"tid":259,"ph":"X","ts":88345,"dur":5269,"name":"InstantiateFunction","args":{"detail":"std::__dispatch_copy_or_move<std::_ClassicAlgPolicy, std::__move_loop<std::_ClassicAlgPolicy>, std::__move_trivial, std::reverse_iterator<unsigned char *>, std::reverse_iterator<unsigned char *>, std::reverse_iterator<unsigned char *>>"}},{"pid":42378,"tid":259,"ph":"X","ts":85031,"dur":8623,"name":"InstantiateFunction","args":{"detail":"std::__move<std::_ClassicAlgPolicy, std::reverse_iterator<unsigned char *>, std::reverse_iterator<unsigned char *>, std::reverse_iterator<unsigned char *>>"}},{"pid":42378,"tid":259,"ph":"X","ts":84655,"dur":9051,"name":"InstantiateFunction","args":{"detail":"std::move<std::reverse_iterator<unsigned char *>, std::reverse_iterator<unsigned char *>>"}},{"pid":42378,"tid":259,"ph":"X","ts":77314,"dur":16441,"name":"InstantiateFunction","args":{"detail":"std::__uninitialized_allocator_move_if_noexcept<std::allocator<unsigned char>, std::reverse_iterator<unsigned char *>, std::reverse_iterator<unsigned char *>, unsigned char, void>"}},{"pid":42378,"tid":259,"ph":"X","ts":68326,"dur":25785,"name":"InstantiateFunction","args":{"detail":"std::vector<unsigned char>::__swap_out_circular_buffer"}},{"pid":42378,"tid":259,"ph":"X","ts":64497,"dur":29620,"name":"InstantiateFunction","args":{"detail":"std::vector<unsigned char>::__append"}},{"pid":42378,"tid":259,"ph":"X","ts":64352,"dur":29834,"name":"InstantiateFunction","args":{"detail":"std::vector<unsigned char>::resize"}},{"pid":42378,"tid":259,"ph":"X","ts":94392,"dur":834,"name":"InstantiateFunction","args":{"detail":"std::vector<kram::Color>::__construct_at_end"}},{"pid":42378,"tid":259,"ph":"X","ts":95281,"dur":590,"name":"InstantiateFunction","args":{"detail":"std::vector<kram::Color>::__recommend"}},{"pid":42378,"tid":259,"ph":"X","ts":95946,"dur":898,"name":"InstantiateClass","args":{"detail":"std::__compressed_pair<kram::Color *, std::allocator<kram::Color> &>"}},{"pid":42378,"tid":259,"ph":"X","ts":95889,"dur":1294,"name":"InstantiateClass","args":{"detail":"std::__split_buffer<kram::Color, std::allocator<kram::Color> &>"}},{"pid":42378,"tid":259,"ph":"X","ts":97223,"dur":1928,"name":"InstantiateFunction","args":{"detail":"std::__split_buffer<kram::Color, std::allocator<kram::Color> &>::__split_buffer"}},{"pid":42378,"tid":259,"ph":"X","ts":100043,"dur":2631,"name":"InstantiateClass","args":{"detail":"std::reverse_iterator<kram::Color *>"}},{"pid":42378,"tid":259,"ph":"X","ts":104440,"dur":553,"name":"InstantiateFunction","args":{"detail":"std::__unwrap_range_impl<std::reverse_iterator<kram::Color *>, std::reverse_iterator<kram::Color *>>::__unwrap"}},{"pid":42378,"tid":259,"ph":"X","ts":104247,"dur":788,"name":"InstantiateFunction","args":{"detail":"std::__unwrap_range<std::reverse_iterator<kram::Color *>, std::reverse_iterator<kram::Color *>>"}},{"pid":42378,"tid":259,"ph":"X","ts":104235,"dur":1548,"name":"InstantiateFunction","args":{"detail":"std::__unwrap_and_dispatch<std::__move_loop<std::_ClassicAlgPolicy>, std::reverse_iterator<kram::Color *>, std::reverse_iterator<kram::Color *>, std::reverse_iterator<kram::Color *>, 0>"}},{"pid":42378,"tid":259,"ph":"X","ts":104141,"dur":1971,"name":"InstantiateFunction","args":{"detail":"std::__dispatch_copy_or_move<std::_ClassicAlgPolicy, std::__move_loop<std::_ClassicAlgPolicy>, std::__move_trivial, std::reverse_iterator<kram::Color *>, std::reverse_iterator<kram::Color *>, std::reverse_iterator<kram::Color *>>"}},{"pid":42378,"tid":259,"ph":"X","ts":103749,"dur":2405,"name":"InstantiateFunction","args":{"detail":"std::__move<std::_ClassicAlgPolicy, std::reverse_iterator<kram::Color *>, std::reverse_iterator<kram::Color *>, std::reverse_iterator<kram::Color *>>"}},{"pid":42378,"tid":259,"ph":"X","ts":103677,"dur":2529,"name":"InstantiateFunction","args":{"detail":"std::move<std::reverse_iterator<kram::Color *>, std::reverse_iterator<kram::Color *>>"}},{"pid":42378,"tid":259,"ph":"X","ts":102866,"dur":3383,"name":"InstantiateFunction","args":{"detail":"std::__uninitialized_allocator_move_if_noexcept<std::allocator<kram::Color>, std::reverse_iterator<kram::Color *>, std::reverse_iterator<kram::Color *>, kram::Color, void>"}},{"pid":42378,"tid":259,"ph":"X","ts":100004,"dur":6447,"name":"InstantiateFunction","args":{"detail":"std::vector<kram::Color>::__swap_out_circular_buffer"}},{"pid":42378,"tid":259,"ph":"X","ts":94340,"dur":12117,"name":"InstantiateFunction","args":{"detail":"std::vector<kram::Color>::__append"}},{"pid":42378,"tid":259,"ph":"X","ts":94322,"dur":12194,"name":"InstantiateFunction","args":{"detail":"std::vector<kram::Color>::resize"}},{"pid":42378,"tid":259,"ph":"X","ts":108266,"dur":2150,"name":"InstantiateClass","args":{"detail":"std::reverse_iterator<float * __attribute__((ext_vector_type(4)))>"}},{"pid":42378,"tid":259,"ph":"X","ts":111409,"dur":536,"name":"InstantiateClass","args":{"detail":"std::pair<std::reverse_iterator<float * __attribute__((ext_vector_type(4)))>, std::reverse_iterator<float * __attribute__((ext_vector_type(4)))>>"}},{"pid":42378,"tid":259,"ph":"X","ts":112262,"dur":538,"name":"InstantiateFunction","args":{"detail":"std::__unwrap_range_impl<std::reverse_iterator<float * __attribute__((ext_vector_type(4)))>, std::reverse_iterator<float * __attribute__((ext_vector_type(4)))>>::__unwrap"}},{"pid":42378,"tid":259,"ph":"X","ts":112072,"dur":770,"name":"InstantiateFunction","args":{"detail":"std::__unwrap_range<std::reverse_iterator<float * __attribute__((ext_vector_type(4)))>, std::reverse_iterator<float * __attribute__((ext_vector_type(4)))>>"}},{"pid":42378,"tid":259,"ph":"X","ts":112059,"dur":1514,"name":"InstantiateFunction","args":{"detail":"std::__unwrap_and_dispatch<std::__move_loop<std::_ClassicAlgPolicy>, std::reverse_iterator<float * __attribute__((ext_vector_type(4)))>, std::reverse_iterator<float * __attribute__((ext_vector_type(4)))>, std::reverse_iterator<float * __attribute__((ext_vector_type(4)))>, 0>"}},{"pid":42378,"tid":259,"ph":"X","ts":111967,"dur":1927,"name":"InstantiateFunction","args":{"detail":"std::__dispatch_copy_or_move<std::_ClassicAlgPolicy, std::__move_loop<std::_ClassicAlgPolicy>, std::__move_trivial, std::reverse_iterator<float * __attribute__((ext_vector_type(4)))>, std::reverse_iterator<float * __attribute__((ext_vector_type(4)))>, std::reverse_iterator<float * __attribute__((ext_vector_type(4)))>>"}},{"pid":42378,"tid":259,"ph":"X","ts":111406,"dur":2529,"name":"InstantiateFunction","args":{"detail":"std::__move<std::_ClassicAlgPolicy, std::reverse_iterator<float * __attribute__((ext_vector_type(4)))>, std::reverse_iterator<float * __attribute__((ext_vector_type(4)))>, std::reverse_iterator<float * __attribute__((ext_vector_type(4)))>>"}},{"pid":42378,"tid":259,"ph":"X","ts":111340,"dur":2647,"name":"InstantiateFunction","args":{"detail":"std::move<std::reverse_iterator<float * __attribute__((ext_vector_type(4)))>, std::reverse_iterator<float * __attribute__((ext_vector_type(4)))>>"}},{"pid":42378,"tid":259,"ph":"X","ts":110572,"dur":3457,"name":"InstantiateFunction","args":{"detail":"std::__uninitialized_allocator_move_if_noexcept<std::allocator<float __attribute__((ext_vector_type(4)))>, std::reverse_iterator<float * __attribute__((ext_vector_type(4)))>, std::reverse_iterator<float * __attribute__((ext_vector_type(4)))>, float __attribute__((ext_vector_type(4))), void>"}},{"pid":42378,"tid":259,"ph":"X","ts":108250,"dur":5963,"name":"InstantiateFunction","args":{"detail":"std::vector<float __attribute__((ext_vector_type(4)))>::__swap_out_circular_buffer"}},{"pid":42378,"tid":259,"ph":"X","ts":106748,"dur":7471,"name":"InstantiateFunction","args":{"detail":"std::vector<float __attribute__((ext_vector_type(4)))>::__append"}},{"pid":42378,"tid":259,"ph":"X","ts":106729,"dur":7545,"name":"InstantiateFunction","args":{"detail":"std::vector<float __attribute__((ext_vector_type(4)))>::resize"}},{"pid":42378,"tid":259,"ph":"X","ts":116698,"dur":505,"name":"InstantiateFunction","args":{"detail":"std::__unwrap_range<kram::Color *, kram::Color *>"}},{"pid":42378,"tid":259,"ph":"X","ts":116687,"dur":1065,"name":"InstantiateFunction","args":{"detail":"std::__unwrap_and_dispatch<std::__copy_loop<std::_ClassicAlgPolicy>, kram::Color *, kram::Color *, kram::Color *, 0>"}},{"pid":42378,"tid":259,"ph":"X","ts":118123,"dur":531,"name":"InstantiateFunction","args":{"detail":"std::__copy_trivial::operator()<kram::Color, kram::Color, 0>"}},{"pid":42378,"tid":259,"ph":"X","ts":117792,"dur":908,"name":"InstantiateFunction","args":{"detail":"std::__unwrap_and_dispatch<std::__overload<std::__copy_loop<std::_ClassicAlgPolicy>, std::__copy_trivial>, kram::Color *, kram::Color *, kram::Color *, 0>"}},{"pid":42378,"tid":259,"ph":"X","ts":116591,"dur":2116,"name":"InstantiateFunction","args":{"detail":"std::__dispatch_copy_or_move<std::_ClassicAlgPolicy, std::__copy_loop<std::_ClassicAlgPolicy>, std::__copy_trivial, kram::Color *, kram::Color *, kram::Color *>"}},{"pid":42378,"tid":259,"ph":"X","ts":116285,"dur":2428,"name":"InstantiateFunction","args":{"detail":"std::__copy<std::_ClassicAlgPolicy, kram::Color *, kram::Color *, kram::Color *>"}},{"pid":42378,"tid":259,"ph":"X","ts":116223,"dur":2497,"name":"InstantiateFunction","args":{"detail":"std::copy<kram::Color *, kram::Color *>"}},{"pid":42378,"tid":259,"ph":"X","ts":118970,"dur":592,"name":"InstantiateFunction","args":{"detail":"std::__uninitialized_allocator_copy<std::allocator<kram::Color>, kram::Color *, kram::Color *, kram::Color *>"}},{"pid":42378,"tid":259,"ph":"X","ts":118809,"dur":765,"name":"InstantiateFunction","args":{"detail":"std::vector<kram::Color>::__construct_at_end<kram::Color *, 0>"}},{"pid":42378,"tid":259,"ph":"X","ts":115914,"dur":3812,"name":"InstantiateFunction","args":{"detail":"std::vector<kram::Color>::assign<kram::Color *, 0>"}},{"pid":42378,"tid":259,"ph":"X","ts":115523,"dur":4211,"name":"InstantiateFunction","args":{"detail":"std::vector<kram::Color>::operator="}},{"pid":42378,"tid":259,"ph":"X","ts":121207,"dur":531,"name":"InstantiateFunction","args":{"detail":"std::vector<std::pair<std::string, std::string>>::capacity"}},{"pid":42378,"tid":259,"ph":"X","ts":121124,"dur":643,"name":"InstantiateFunction","args":{"detail":"std::vector<std::pair<std::string, std::string>>::__annotate_delete"}},{"pid":42378,"tid":259,"ph":"X","ts":121120,"dur":1167,"name":"InstantiateFunction","args":{"detail":"std::vector<std::pair<std::string, std::string>>::__destroy_vector::operator()"}},{"pid":42378,"tid":259,"ph":"X","ts":121075,"dur":1224,"name":"InstantiateFunction","args":{"detail":"std::vector<std::pair<std::string, std::string>>::~vector"}},{"pid":42378,"tid":259,"ph":"X","ts":122301,"dur":530,"name":"InstantiateFunction","args":{"detail":"std::vector<kram::KTXImageLevel>::~vector"}},{"pid":42378,"tid":259,"ph":"X","ts":127696,"dur":645,"name":"InstantiateFunction","args":{"detail":"std::basic_string<char>::empty"}},{"pid":42378,"tid":259,"ph":"X","ts":128441,"dur":666,"name":"InstantiateFunction","args":{"detail":"std::basic_string<char>::data"}},{"pid":42378,"tid":259,"ph":"X","ts":128422,"dur":697,"name":"InstantiateFunction","args":{"detail":"std::basic_string<char>::c_str"}},{"pid":42378,"tid":259,"ph":"X","ts":132279,"dur":533,"name":"InstantiateFunction","args":{"detail":"std::__unwrap_range<unsigned char *, unsigned char *>"}},{"pid":42378,"tid":259,"ph":"X","ts":132265,"dur":907,"name":"InstantiateFunction","args":{"detail":"std::__unwrap_and_dispatch<std::__copy_loop<std::_ClassicAlgPolicy>, unsigned char *, unsigned char *, unsigned char *, 0>"}},{"pid":42378,"tid":259,"ph":"X","ts":132152,"dur":1523,"name":"InstantiateFunction","args":{"detail":"std::__dispatch_copy_or_move<std::_ClassicAlgPolicy, std::__copy_loop<std::_ClassicAlgPolicy>, std::__copy_trivial, unsigned char *, unsigned char *, unsigned char *>"}},{"pid":42378,"tid":259,"ph":"X","ts":131790,"dur":1892,"name":"InstantiateFunction","args":{"detail":"std::__copy<std::_ClassicAlgPolicy, unsigned char *, unsigned char *, unsigned char *>"}},{"pid":42378,"tid":259,"ph":"X","ts":131774,"dur":1916,"name":"InstantiateFunction","args":{"detail":"std::copy<unsigned char *, unsigned char *>"}},{"pid":42378,"tid":259,"ph":"X","ts":131532,"dur":2798,"name":"InstantiateFunction","args":{"detail":"std::vector<unsigned char>::assign<unsigned char *, 0>"}},{"pid":42378,"tid":259,"ph":"X","ts":131323,"dur":3015,"name":"InstantiateFunction","args":{"detail":"std::vector<unsigned char>::operator="}},{"pid":42378,"tid":259,"ph":"X","ts":134588,"dur":656,"name":"InstantiateClass","args":{"detail":"std::is_constructible<std::pair<std::string, std::string>, std::pair<std::string, std::string> &>"}},{"pid":42378,"tid":259,"ph":"X","ts":135947,"dur":539,"name":"InstantiateFunction","args":{"detail":"std::__unwrap_range<std::pair<std::string, std::string> *, std::pair<std::string, std::string> *>"}},{"pid":42378,"tid":259,"ph":"X","ts":137423,"dur":1539,"name":"InstantiateFunction","args":{"detail":"std::basic_string<char>::__grow_by_and_replace"}},{"pid":42378,"tid":259,"ph":"X","ts":137149,"dur":1860,"name":"InstantiateFunction","args":{"detail":"std::basic_string<char>::__assign_no_alias<true>"}},{"pid":42378,"tid":259,"ph":"X","ts":136845,"dur":2481,"name":"InstantiateFunction","args":{"detail":"std::basic_string<char>::operator="}},{"pid":42378,"tid":259,"ph":"X","ts":136634,"dur":2807,"name":"InstantiateFunction","args":{"detail":"std::pair<std::string, std::string>::operator="}},{"pid":42378,"tid":259,"ph":"X","ts":136578,"dur":3276,"name":"InstantiateFunction","args":{"detail":"std::__copy_loop<std::_ClassicAlgPolicy>::operator()<std::pair<std::string, std::string> *, std::pair<std::string, std::string> *, std::pair<std::string, std::string> *>"}},{"pid":42378,"tid":259,"ph":"X","ts":135934,"dur":4361,"name":"InstantiateFunction","args":{"detail":"std::__unwrap_and_dispatch<std::__copy_loop<std::_ClassicAlgPolicy>, std::pair<std::string, std::string> *, std::pair<std::string, std::string> *, std::pair<std::string, std::string> *, 0>"}},{"pid":42378,"tid":259,"ph":"X","ts":143781,"dur":676,"name":"InstantiateClass","args":{"detail":"std::is_trivially_assignable<std::pair<std::string, std::string> &, const std::pair<std::string, std::string> &>"}},{"pid":42378,"tid":259,"ph":"X","ts":143693,"dur":925,"name":"InstantiateClass","args":{"detail":"std::__can_lower_copy_assignment_to_memmove<std::pair<std::string, std::string>, std::pair<std::string, std::string>>"}},{"pid":42378,"tid":259,"ph":"X","ts":140415,"dur":4620,"name":"InstantiateFunction","args":{"detail":"std::__unwrap_and_dispatch<std::__overload<std::__copy_loop<std::_ClassicAlgPolicy>, std::__copy_trivial>, std::pair<std::string, std::string> *, std::pair<std::string, std::string> *, std::pair<std::string, std::string> *, 0>"}},{"pid":42378,"tid":259,"ph":"X","ts":135835,"dur":9236,"name":"InstantiateFunction","args":{"detail":"std::__dispatch_copy_or_move<std::_ClassicAlgPolicy, std::__copy_loop<std::_ClassicAlgPolicy>, std::__copy_trivial, std::pair<std::string, std::string> *, std::pair<std::string, std::string> *, std::pair<std::string, std::string> *>"}},{"pid":42378,"tid":259,"ph":"X","ts":135519,"dur":9580,"name":"InstantiateFunction","args":{"detail":"std::__copy<std::_ClassicAlgPolicy, std::pair<std::string, std::string> *, std::pair<std::string, std::string> *, std::pair<std::string, std::string> *>"}},{"pid":42378,"tid":259,"ph":"X","ts":135507,"dur":9679,"name":"InstantiateFunction","args":{"detail":"std::copy<std::pair<std::string, std::string> *, std::pair<std::string, std::string> *>"}},{"pid":42378,"tid":259,"ph":"X","ts":146638,"dur":684,"name":"InstantiateFunction","args":{"detail":"std::__make_exception_guard<std::_AllocatorDestroyRangeReverse<std::allocator<std::pair<std::string, std::string>>, std::pair<std::string, std::string> *>>"}},{"pid":42378,"tid":259,"ph":"X","ts":148967,"dur":892,"name":"InstantiateFunction","args":{"detail":"std::basic_string<char>::basic_string"}},{"pid":42378,"tid":259,"ph":"X","ts":148725,"dur":1181,"name":"InstantiateFunction","args":{"detail":"std::construct_at<std::pair<std::string, std::string>, std::pair<std::string, std::string> &, std::pair<std::string, std::string> *>"}},{"pid":42378,"tid":259,"ph":"X","ts":147830,"dur":2088,"name":"InstantiateFunction","args":{"detail":"std::allocator_traits<std::allocator<std::pair<std::string, std::string>>>::construct<std::pair<std::string, std::string>, std::pair<std::string, std::string> &, void, void>"}},{"pid":42378,"tid":259,"ph":"X","ts":146458,"dur":3497,"name":"InstantiateFunction","args":{"detail":"std::__uninitialized_allocator_copy<std::allocator<std::pair<std::string, std::string>>, std::pair<std::string, std::string> *, std::pair<std::string, std::string> *, std::pair<std::string, std::string> *>"}},{"pid":42378,"tid":259,"ph":"X","ts":145304,"dur":4674,"name":"InstantiateFunction","args":{"detail":"std::vector<std::pair<std::string, std::string>>::__construct_at_end<std::pair<std::string, std::string> *, 0>"}},{"pid":42378,"tid":259,"ph":"X","ts":135278,"dur":15369,"name":"InstantiateFunction","args":{"detail":"std::vector<std::pair<std::string, std::string>>::assign<std::pair<std::string, std::string> *, 0>"}},{"pid":42378,"tid":259,"ph":"X","ts":134427,"dur":16230,"name":"InstantiateFunction","args":{"detail":"std::vector<std::pair<std::string, std::string>>::operator="}},{"pid":42378,"tid":259,"ph":"X","ts":151659,"dur":533,"name":"InstantiateFunction","args":{"detail":"std::__unwrap_range<kram::KTXImageLevel *, kram::KTXImageLevel *>"}},{"pid":42378,"tid":259,"ph":"X","ts":151644,"dur":946,"name":"InstantiateFunction","args":{"detail":"std::__unwrap_and_dispatch<std::__copy_loop<std::_ClassicAlgPolicy>, kram::KTXImageLevel *, kram::KTXImageLevel *, kram::KTXImageLevel *, 0>"}},{"pid":42378,"tid":259,"ph":"X","ts":151539,"dur":1548,"name":"InstantiateFunction","args":{"detail":"std::__dispatch_copy_or_move<std::_ClassicAlgPolicy, std::__copy_loop<std::_ClassicAlgPolicy>, std::__copy_trivial, kram::KTXImageLevel *, kram::KTXImageLevel *, kram::KTXImageLevel *>"}},{"pid":42378,"tid":259,"ph":"X","ts":151177,"dur":1917,"name":"InstantiateFunction","args":{"detail":"std::__copy<std::_ClassicAlgPolicy, kram::KTXImageLevel *, kram::KTXImageLevel *, kram::KTXImageLevel *>"}},{"pid":42378,"tid":259,"ph":"X","ts":151163,"dur":1938,"name":"InstantiateFunction","args":{"detail":"std::copy<kram::KTXImageLevel *, kram::KTXImageLevel *>"}},{"pid":42378,"tid":259,"ph":"X","ts":153132,"dur":575,"name":"InstantiateFunction","args":{"detail":"std::vector<kram::KTXImageLevel>::__construct_at_end<kram::KTXImageLevel *, 0>"}},{"pid":42378,"tid":259,"ph":"X","ts":150923,"dur":3249,"name":"InstantiateFunction","args":{"detail":"std::vector<kram::KTXImageLevel>::assign<kram::KTXImageLevel *, 0>"}},{"pid":42378,"tid":259,"ph":"X","ts":150704,"dur":3476,"name":"InstantiateFunction","args":{"detail":"std::vector<kram::KTXImageLevel>::operator="}},{"pid":42378,"tid":259,"ph":"X","ts":156258,"dur":505,"name":"InstantiateFunction","args":{"detail":"std::__unwrap_range<float * __attribute__((ext_vector_type(4))), float * __attribute__((ext_vector_type(4)))>"}},{"pid":42378,"tid":259,"ph":"X","ts":156244,"dur":872,"name":"InstantiateFunction","args":{"detail":"std::__unwrap_and_dispatch<std::__copy_loop<std::_ClassicAlgPolicy>, float * __attribute__((ext_vector_type(4))), float * __attribute__((ext_vector_type(4))), float * __attribute__((ext_vector_type(4))), 0>"}},{"pid":42378,"tid":259,"ph":"X","ts":156143,"dur":1461,"name":"InstantiateFunction","args":{"detail":"std::__dispatch_copy_or_move<std::_ClassicAlgPolicy, std::__copy_loop<std::_ClassicAlgPolicy>, std::__copy_trivial, float * __attribute__((ext_vector_type(4))), float * __attribute__((ext_vector_type(4))), float * __attribute__((ext_vector_type(4)))>"}},{"pid":42378,"tid":259,"ph":"X","ts":155818,"dur":1792,"name":"InstantiateFunction","args":{"detail":"std::__copy<std::_ClassicAlgPolicy, float * __attribute__((ext_vector_type(4))), float * __attribute__((ext_vector_type(4))), float * __attribute__((ext_vector_type(4)))>"}},{"pid":42378,"tid":259,"ph":"X","ts":155805,"dur":1812,"name":"InstantiateFunction","args":{"detail":"std::copy<float * __attribute__((ext_vector_type(4))), float * __attribute__((ext_vector_type(4)))>"}},{"pid":42378,"tid":259,"ph":"X","ts":155618,"dur":2621,"name":"InstantiateFunction","args":{"detail":"std::vector<float __attribute__((ext_vector_type(4)))>::assign<float * __attribute__((ext_vector_type(4))), 0>"}},{"pid":42378,"tid":259,"ph":"X","ts":155468,"dur":2781,"name":"InstantiateFunction","args":{"detail":"std::vector<float __attribute__((ext_vector_type(4)))>::operator="}},{"pid":42378,"tid":259,"ph":"X","ts":158501,"dur":1060,"name":"InstantiateClass","args":{"detail":"std::vector<simd::half4>"}},{"pid":42378,"tid":259,"ph":"X","ts":159588,"dur":1060,"name":"InstantiateClass","args":{"detail":"std::allocator_traits<std::allocator<kram::Int2>>"}},{"pid":42378,"tid":259,"ph":"X","ts":159580,"dur":1620,"name":"InstantiateClass","args":{"detail":"std::vector<kram::Int2>"}},{"pid":42378,"tid":259,"ph":"X","ts":158482,"dur":2770,"name":"ParseClass","args":{"detail":"kram::MipConstructData"}},{"pid":42378,"tid":259,"ph":"X","ts":163869,"dur":1585,"name":"InstantiateFunction","args":{"detail":"std::basic_string<char>::operator basic_string_view"}},{"pid":42378,"tid":259,"ph":"X","ts":163666,"dur":2596,"name":"InstantiateFunction","args":{"detail":"std::__1::operator==<char, std::char_traits<char>, std::allocator<char>>"}},{"pid":42378,"tid":259,"ph":"X","ts":167930,"dur":539,"name":"InstantiateFunction","args":{"detail":"std::vector<simd::half4>::~vector"}},{"pid":42378,"tid":259,"ph":"X","ts":172589,"dur":526,"name":"InstantiateFunction","args":{"detail":"std::vector<float __attribute__((ext_vector_type(4)))>::__make_iter"}},{"pid":42378,"tid":259,"ph":"X","ts":172203,"dur":948,"name":"InstantiateFunction","args":{"detail":"std::vector<float __attribute__((ext_vector_type(4)))>::begin"}},{"pid":42378,"tid":259,"ph":"X","ts":176577,"dur":8608,"name":"InstantiateClass","args":{"detail":"std::__split_buffer<simd::half4, std::allocator<simd::half4> &>"}},{"pid":42378,"tid":259,"ph":"X","ts":185761,"dur":10882,"name":"InstantiateFunction","args":{"detail":"std::__compressed_pair<simd::half4 *, std::allocator<simd::half4> &>::first"}},{"pid":42378,"tid":259,"ph":"X","ts":185759,"dur":10954,"name":"InstantiateFunction","args":{"detail":"std::__split_buffer<simd::half4, std::allocator<simd::half4> &>::__end_cap"}},{"pid":42378,"tid":259,"ph":"X","ts":185755,"dur":11013,"name":"InstantiateFunction","args":{"detail":"std::__split_buffer<simd::half4, std::allocator<simd::half4> &>::capacity"}},{"pid":42378,"tid":259,"ph":"X","ts":185636,"dur":11191,"name":"InstantiateFunction","args":{"detail":"std::__split_buffer<simd::half4, std::allocator<simd::half4> &>::~__split_buffer"}},{"pid":42378,"tid":259,"ph":"X","ts":196908,"dur":528,"name":"InstantiateFunction","args":{"detail":"std::__split_buffer<simd::half4, std::allocator<simd::half4> &>::__construct_at_end"}},{"pid":42378,"tid":259,"ph":"X","ts":198933,"dur":607,"name":"InstantiateClass","args":{"detail":"std::common_reference<simd::half4 *const &, simd::half4 *const &>"}},{"pid":42378,"tid":259,"ph":"X","ts":201537,"dur":513,"name":"InstantiateClass","args":{"detail":"std::__iter_concept_cache<simd::half4 *>"}},{"pid":42378,"tid":259,"ph":"X","ts":197504,"dur":5181,"name":"InstantiateClass","args":{"detail":"std::reverse_iterator<simd::half4 *>"}},{"pid":42378,"tid":259,"ph":"X","ts":204124,"dur":3618,"name":"InstantiateFunction","args":{"detail":"std::allocator_traits<std::allocator<simd::half4>>::construct<simd::half4, simd::half4, void, void>"}},{"pid":42378,"tid":259,"ph":"X","ts":202964,"dur":5124,"name":"InstantiateFunction","args":{"detail":"std::__uninitialized_allocator_move_if_noexcept<std::allocator<simd::half4>, std::reverse_iterator<simd::half4 *>, std::reverse_iterator<simd::half4 *>, std::reverse_iterator<simd::half4 *>>"}},{"pid":42378,"tid":259,"ph":"X","ts":197457,"dur":10911,"name":"InstantiateFunction","args":{"detail":"std::vector<simd::half4>::__swap_out_circular_buffer"}},{"pid":42378,"tid":259,"ph":"X","ts":175932,"dur":32444,"name":"InstantiateFunction","args":{"detail":"std::vector<simd::half4>::__append"}},{"pid":42378,"tid":259,"ph":"X","ts":175896,"dur":32545,"name":"InstantiateFunction","args":{"detail":"std::vector<simd::half4>::resize"}},{"pid":42378,"tid":259,"ph":"X","ts":208980,"dur":918,"name":"InstantiateClass","args":{"detail":"std::allocator_traits<std::allocator<kram::ImageData>>"}},{"pid":42378,"tid":259,"ph":"X","ts":208970,"dur":1591,"name":"InstantiateClass","args":{"detail":"std::vector<kram::ImageData>"}},{"pid":42378,"tid":259,"ph":"X","ts":210902,"dur":534,"name":"InstantiateFunction","args":{"detail":"std::vector<kram::ImageData>::~vector"}},{"pid":42378,"tid":259,"ph":"X","ts":212912,"dur":2259,"name":"InstantiateClass","args":{"detail":"std::reverse_iterator<kram::ImageData *>"}},{"pid":42378,"tid":259,"ph":"X","ts":216892,"dur":534,"name":"InstantiateFunction","args":{"detail":"std::__unwrap_range_impl<std::reverse_iterator<kram::ImageData *>, std::reverse_iterator<kram::ImageData *>>::__unwrap"}},{"pid":42378,"tid":259,"ph":"X","ts":216698,"dur":773,"name":"InstantiateFunction","args":{"detail":"std::__unwrap_range<std::reverse_iterator<kram::ImageData *>, std::reverse_iterator<kram::ImageData *>>"}},{"pid":42378,"tid":259,"ph":"X","ts":216683,"dur":1551,"name":"InstantiateFunction","args":{"detail":"std::__unwrap_and_dispatch<std::__move_loop<std::_ClassicAlgPolicy>, std::reverse_iterator<kram::ImageData *>, std::reverse_iterator<kram::ImageData *>, std::reverse_iterator<kram::ImageData *>, 0>"}},{"pid":42378,"tid":259,"ph":"X","ts":216588,"dur":1973,"name":"InstantiateFunction","args":{"detail":"std::__dispatch_copy_or_move<std::_ClassicAlgPolicy, std::__move_loop<std::_ClassicAlgPolicy>, std::__move_trivial, std::reverse_iterator<kram::ImageData *>, std::reverse_iterator<kram::ImageData *>, std::reverse_iterator<kram::ImageData *>>"}},{"pid":42378,"tid":259,"ph":"X","ts":216190,"dur":2412,"name":"InstantiateFunction","args":{"detail":"std::__move<std::_ClassicAlgPolicy, std::reverse_iterator<kram::ImageData *>, std::reverse_iterator<kram::ImageData *>, std::reverse_iterator<kram::ImageData *>>"}},{"pid":42378,"tid":259,"ph":"X","ts":216118,"dur":2535,"name":"InstantiateFunction","args":{"detail":"std::move<std::reverse_iterator<kram::ImageData *>, std::reverse_iterator<kram::ImageData *>>"}},{"pid":42378,"tid":259,"ph":"X","ts":215342,"dur":3353,"name":"InstantiateFunction","args":{"detail":"std::__uninitialized_allocator_move_if_noexcept<std::allocator<kram::ImageData>, std::reverse_iterator<kram::ImageData *>, std::reverse_iterator<kram::ImageData *>, kram::ImageData, void>"}},{"pid":42378,"tid":259,"ph":"X","ts":212900,"dur":5985,"name":"InstantiateFunction","args":{"detail":"std::vector<kram::ImageData>::__swap_out_circular_buffer"}},{"pid":42378,"tid":259,"ph":"X","ts":211489,"dur":7403,"name":"InstantiateFunction","args":{"detail":"std::vector<kram::ImageData>::__append"}},{"pid":42378,"tid":259,"ph":"X","ts":211472,"dur":7479,"name":"InstantiateFunction","args":{"detail":"std::vector<kram::ImageData>::resize"}},{"pid":42378,"tid":259,"ph":"X","ts":224909,"dur":1278,"name":"DebugType","args":{"detail":"std::__1::vector<kram::Color, std::__1::allocator<kram::Color> >"}},{"pid":42378,"tid":259,"ph":"X","ts":226188,"dur":966,"name":"DebugType","args":{"detail":"std::__1::vector<float __attribute__((ext_vector_type(4))), std::__1::allocator<float __attribute__((ext_vector_type(4)))> >"}},{"pid":42378,"tid":259,"ph":"X","ts":227188,"dur":740,"name":"DebugType","args":{"detail":"std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >"}},{"pid":42378,"tid":259,"ph":"X","ts":228003,"dur":1785,"name":"DebugType","args":{"detail":"std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >"}},{"pid":42378,"tid":259,"ph":"X","ts":227981,"dur":1861,"name":"DebugType","args":{"detail":"std::__1::pair<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >"}},{"pid":42378,"tid":259,"ph":"X","ts":227970,"dur":3008,"name":"DebugType","args":{"detail":"std::__1::vector<std::__1::pair<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >, std::__1::allocator<std::__1::pair<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > > >"}},{"pid":42378,"tid":259,"ph":"X","ts":230979,"dur":651,"name":"DebugType","args":{"detail":"std::__1::vector<kram::KTXImageLevel, std::__1::allocator<kram::KTXImageLevel> >"}},{"pid":42378,"tid":259,"ph":"X","ts":227183,"dur":4543,"name":"DebugType","args":{"detail":"kram::KTXImage"}},{"pid":42378,"tid":259,"ph":"X","ts":227183,"dur":4543,"name":"DebugType","args":{"detail":"const kram::KTXImage"}},{"pid":42378,"tid":259,"ph":"X","ts":227182,"dur":4544,"name":"DebugType","args":{"detail":"const kram::KTXImage &"}},{"pid":42378,"tid":259,"ph":"X","ts":227181,"dur":4546,"name":"DebugType","args":{"detail":"bool (const kram::KTXImage &, unsigned int)"}},{"pid":42378,"tid":259,"ph":"X","ts":224901,"dur":6860,"name":"DebugType","args":{"detail":"kram::Image"}},{"pid":42378,"tid":259,"ph":"X","ts":224826,"dur":7040,"name":"CodeGen Function","args":{"detail":"kram::Image::Image"}},{"pid":42378,"tid":259,"ph":"X","ts":232538,"dur":957,"name":"DebugType","args":{"detail":"__fp16"}},{"pid":42378,"tid":259,"ph":"X","ts":232538,"dur":959,"name":"DebugType","args":{"detail":"__attribute__((neon_vector_type(4))) __fp16"}},{"pid":42378,"tid":259,"ph":"X","ts":232538,"dur":960,"name":"DebugType","args":{"detail":"__attribute__((neon_vector_type(4))) __fp16"}},{"pid":42378,"tid":259,"ph":"X","ts":232537,"dur":966,"name":"DebugType","args":{"detail":"__attribute__((neon_vector_type(4))) __fp16"}},{"pid":42378,"tid":259,"ph":"X","ts":232534,"dur":984,"name":"DebugType","args":{"detail":"simd::half4::(anonymous union at /Users/Alec/devref/kram/libkram/kram/KramConfig.h:321:5)"}},{"pid":42378,"tid":259,"ph":"X","ts":232515,"dur":1026,"name":"DebugType","args":{"detail":"simd::half4"}},{"pid":42378,"tid":259,"ph":"X","ts":232121,"dur":1612,"name":"CodeGen Function","args":{"detail":"kram::Image::convertToFourChannel"}},{"pid":42378,"tid":259,"ph":"X","ts":234584,"dur":851,"name":"DebugType","args":{"detail":"std::__1::vector<simd::half4, std::__1::allocator<simd::half4> >"}},{"pid":42378,"tid":259,"ph":"X","ts":235438,"dur":579,"name":"DebugType","args":{"detail":"std::__1::vector<kram::Int2, std::__1::allocator<kram::Int2> >"}},{"pid":42378,"tid":259,"ph":"X","ts":234581,"dur":1440,"name":"DebugType","args":{"detail":"kram::MipConstructData"}},{"pid":42378,"tid":259,"ph":"X","ts":234581,"dur":1441,"name":"DebugType","args":{"detail":"kram::MipConstructData &"}},{"pid":42378,"tid":259,"ph":"X","ts":234580,"dur":1443,"name":"DebugType","args":{"detail":"bool (kram::ImageInfo &, kram::Image &, kram::MipConstructData &, __sFILE *, kram::KTXImage &)"}},{"pid":42378,"tid":259,"ph":"X","ts":234259,"dur":1777,"name":"DebugType","args":{"detail":"kram::KramEncoder"}},{"pid":42378,"tid":259,"ph":"X","ts":234242,"dur":2010,"name":"CodeGen Function","args":{"detail":"kram::KramEncoder::averageChannelsInBlock"}},{"pid":42378,"tid":259,"ph":"X","ts":236380,"dur":1268,"name":"CodeGen Function","args":{"detail":"kram::KramDecoder::decodeBlocks"}},{"pid":42378,"tid":259,"ph":"X","ts":238958,"dur":567,"name":"CodeGen Function","args":{"detail":"kram::KramEncoder::saveKTX2"}},{"pid":42378,"tid":259,"ph":"X","ts":240293,"dur":894,"name":"DebugType","args":{"detail":"std::__1::vector<kram::ImageData, std::__1::allocator<kram::ImageData> >"}},{"pid":42378,"tid":259,"ph":"X","ts":240292,"dur":895,"name":"DebugType","args":{"detail":"std::__1::vector<kram::ImageData, std::__1::allocator<kram::ImageData> > &"}},{"pid":42378,"tid":259,"ph":"X","ts":240292,"dur":895,"name":"DebugType","args":{"detail":"void (std::__1::vector<kram::ImageData, std::__1::allocator<kram::ImageData> > &)"}},{"pid":42378,"tid":259,"ph":"X","ts":240266,"dur":942,"name":"DebugType","args":{"detail":"kram::Mipper"}},{"pid":42378,"tid":259,"ph":"X","ts":239928,"dur":2020,"name":"CodeGen Function","args":{"detail":"kram::KramEncoder::createMipsFromChunks"}},{"pid":42378,"tid":259,"ph":"X","ts":241948,"dur":1367,"name":"CodeGen Function","args":{"detail":"kram::KramEncoder::compressMipLevel"}},{"pid":42378,"tid":259,"ph":"X","ts":245687,"dur":1688,"name":"InstantiateFunction","args":{"detail":"std::__murmur2_or_cityhash<unsigned long>::operator()"}},{"pid":42378,"tid":259,"ph":"X","ts":245264,"dur":2112,"name":"InstantiateFunction","args":{"detail":"std::__scalar_hash<std::_PairT>::operator()"}},{"pid":42378,"tid":259,"ph":"X","ts":248236,"dur":707,"name":"InstantiateFunction","args":{"detail":"std::__convert_to_timespec<timespec>"}},{"pid":42378,"tid":259,"ph":"X","ts":249388,"dur":1482,"name":"InstantiateFunction","args":{"detail":"std::__libcpp_thread_poll_with_backoff<std::__cxx_atomic_wait_test_fn_impl<const volatile std::__cxx_atomic_impl<bool>, bool> &, std::__libcpp_atomic_wait_backoff_impl<const volatile std::__cxx_atomic_impl<bool>, std::__cxx_atomic_wait_test_fn_impl<const volatile std::__cxx_atomic_impl<bool>, bool>> &>"}},{"pid":42378,"tid":259,"ph":"X","ts":249202,"dur":1669,"name":"InstantiateFunction","args":{"detail":"std::__cxx_atomic_wait<const volatile std::__cxx_atomic_impl<bool>, std::__cxx_atomic_wait_test_fn_impl<const volatile std::__cxx_atomic_impl<bool>, bool> &>"}},{"pid":42378,"tid":259,"ph":"X","ts":249102,"dur":1770,"name":"InstantiateFunction","args":{"detail":"std::__cxx_atomic_wait<const volatile std::__cxx_atomic_impl<bool>, bool>"}},{"pid":42378,"tid":259,"ph":"X","ts":251334,"dur":1348,"name":"InstantiateFunction","args":{"detail":"std::__cxx_atomic_wait_test_fn_impl<const std::__cxx_atomic_impl<bool>, bool>::operator()"}},{"pid":42378,"tid":259,"ph":"X","ts":251253,"dur":1719,"name":"InstantiateFunction","args":{"detail":"std::__libcpp_thread_poll_with_backoff<std::__cxx_atomic_wait_test_fn_impl<const std::__cxx_atomic_impl<bool>, bool> &, std::__libcpp_atomic_wait_backoff_impl<const std::__cxx_atomic_impl<bool>, std::__cxx_atomic_wait_test_fn_impl<const std::__cxx_atomic_impl<bool>, bool>> &>"}},{"pid":42378,"tid":259,"ph":"X","ts":251171,"dur":1804,"name":"InstantiateFunction","args":{"detail":"std::__cxx_atomic_wait<const std::__cxx_atomic_impl<bool>, std::__cxx_atomic_wait_test_fn_impl<const std::__cxx_atomic_impl<bool>, bool> &>"}},{"pid":42378,"tid":259,"ph":"X","ts":250873,"dur":2103,"name":"InstantiateFunction","args":{"detail":"std::__cxx_atomic_wait<const std::__cxx_atomic_impl<bool>, bool>"}},{"pid":42378,"tid":259,"ph":"X","ts":253834,"dur":1149,"name":"InstantiateFunction","args":{"detail":"std::__compressed_pair<void *, std::__builtin_new_allocator::__builtin_new_deleter>::__compressed_pair<void *&, std::__builtin_new_allocator::__builtin_new_deleter>"}},{"pid":42378,"tid":259,"ph":"X","ts":253532,"dur":1573,"name":"InstantiateFunction","args":{"detail":"std::unique_ptr<void, std::__builtin_new_allocator::__builtin_new_deleter>::unique_ptr<true, void>"}},{"pid":42378,"tid":259,"ph":"X","ts":245087,"dur":11739,"name":"PerformPendingInstantiations"},{"pid":42378,"tid":259,"ph":"X","ts":10046,"dur":268176,"name":"Frontend"},{"pid":42378,"tid":259,"ph":"X","ts":293359,"dur":657,"name":"CodeGen Function","args":{"detail":"simd_clamp"}},{"pid":42378,"tid":259,"ph":"X","ts":296194,"dur":3296,"name":"CodeGen Function","args":{"detail":"std::allocator_traits<std::allocator<kram::Color>>::construct<kram::Color, kram::Color &, void, void>"}},{"pid":42378,"tid":259,"ph":"X","ts":299492,"dur":4995,"name":"CodeGen Function","args":{"detail":"std::construct_at<kram::Color, kram::Color &, kram::Color *>"}},{"pid":42378,"tid":259,"ph":"X","ts":306035,"dur":1524,"name":"CodeGen Function","args":{"detail":"std::__exception_guard_noexceptions<std::_AllocatorDestroyRangeReverse<std::allocator<kram::Color>, kram::Color *>>::__complete"}},{"pid":42378,"tid":259,"ph":"X","ts":307567,"dur":1282,"name":"CodeGen Function","args":{"detail":"std::vector<kram::Color>::__vdeallocate"}},{"pid":42378,"tid":259,"ph":"X","ts":309137,"dur":8871,"name":"CodeGen Function","args":{"detail":"std::min<int, std::__less<int>>"}},{"pid":42378,"tid":259,"ph":"X","ts":318155,"dur":1901,"name":"CodeGen Function","args":{"detail":"kram::KTXImage::KTXImage"}},{"pid":42378,"tid":259,"ph":"X","ts":320721,"dur":1681,"name":"CodeGen Function","args":{"detail":"std::__compressed_pair<std::pair<std::string, std::string> *, std::allocator<std::pair<std::string, std::string>>>::__compressed_pair<std::nullptr_t, std::__default_init_tag>"}},{"pid":42378,"tid":259,"ph":"X","ts":324005,"dur":937,"name":"CodeGen Function","args":{"detail":"std::__compressed_pair_elem<std::allocator<kram::KTXImageLevel>, 1>::__compressed_pair_elem"}},{"pid":42378,"tid":259,"ph":"X","ts":325104,"dur":827,"name":"CodeGen Function","args":{"detail":"kram::KTXImage::~KTXImage"}},{"pid":42378,"tid":259,"ph":"X","ts":326023,"dur":545,"name":"CodeGen Function","args":{"detail":"std::vector<std::pair<std::string, std::string>>::~vector"}},{"pid":42378,"tid":259,"ph":"X","ts":326757,"dur":655,"name":"CodeGen Function","args":{"detail":"std::vector<std::pair<std::string, std::string>>::__destroy_vector::operator()"}},{"pid":42378,"tid":259,"ph":"X","ts":328143,"dur":3374,"name":"CodeGen Function","args":{"detail":"std::allocator<std::pair<std::string, std::string>>::deallocate"}},{"pid":42378,"tid":259,"ph":"X","ts":331522,"dur":4001,"name":"CodeGen Function","args":{"detail":"std::vector<std::pair<std::string, std::string>>::__alloc"}},{"pid":42378,"tid":259,"ph":"X","ts":336547,"dur":712,"name":"CodeGen Function","args":{"detail":"BC6HBlockDecoder::BC6HBlockDecoder"}},{"pid":42378,"tid":259,"ph":"X","ts":342943,"dur":1392,"name":"CodeGen Function","args":{"detail":"std::vector<unsigned char>::__construct_at_end<unsigned char *, 0>"}},{"pid":42378,"tid":259,"ph":"X","ts":344339,"dur":1703,"name":"CodeGen Function","args":{"detail":"std::__uninitialized_allocator_copy<std::allocator<unsigned char>, unsigned char *, unsigned char *, unsigned char *>"}},{"pid":42378,"tid":259,"ph":"X","ts":346130,"dur":768,"name":"CodeGen Function","args":{"detail":"std::__exception_guard_noexceptions<std::_AllocatorDestroyRangeReverse<std::allocator<unsigned char>, unsigned char *>>::__exception_guard_noexceptions"}},{"pid":42378,"tid":259,"ph":"X","ts":347728,"dur":549,"name":"CodeGen Function","args":{"detail":"std::_AllocatorDestroyRangeReverse<std::allocator<unsigned char>, unsigned char *>::_AllocatorDestroyRangeReverse"}},{"pid":42378,"tid":259,"ph":"X","ts":349963,"dur":587,"name":"CodeGen Function","args":{"detail":"std::vector<std::pair<std::string, std::string>>::assign<std::pair<std::string, std::string> *, 0>"}},{"pid":42378,"tid":259,"ph":"X","ts":350811,"dur":4487,"name":"CodeGen Function","args":{"detail":"std::advance<std::pair<std::string, std::string> *, unsigned long, unsigned long, void>"}},{"pid":42378,"tid":259,"ph":"X","ts":355688,"dur":1696,"name":"CodeGen Function","args":{"detail":"std::__unwrap_and_dispatch<std::__overload<std::__copy_loop<std::_ClassicAlgPolicy>, std::__copy_trivial>, std::pair<std::string, std::string> *, std::pair<std::string, std::string> *, std::pair<std::string, std::string> *, 0>"}},{"pid":42378,"tid":259,"ph":"X","ts":360455,"dur":535,"name":"DebugType","args":{"detail":"std::__1::__exception_guard_noexceptions<std::__1::_AllocatorDestroyRangeReverse<std::__1::allocator<std::__1::pair<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > >, std::__1::pair<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > *> >"}},{"pid":42378,"tid":259,"ph":"X","ts":360447,"dur":547,"name":"DebugType","args":{"detail":"std::__1::__exception_guard_noexceptions<std::__1::_AllocatorDestroyRangeReverse<std::__1::allocator<std::__1::pair<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > >, std::__1::pair<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > *> >"}},{"pid":42378,"tid":259,"ph":"X","ts":360309,"dur":835,"name":"CodeGen Function","args":{"detail":"std::__uninitialized_allocator_copy<std::allocator<std::pair<std::string, std::string>>, std::pair<std::string, std::string> *, std::pair<std::string, std::string> *, std::pair<std::string, std::string> *>"}},{"pid":42378,"tid":259,"ph":"X","ts":371434,"dur":508,"name":"CodeGen Function","args":{"detail":"std::basic_string_view<char>::basic_string_view"}},{"pid":42378,"tid":259,"ph":"X","ts":371951,"dur":1034,"name":"CodeGen Function","args":{"detail":"std::basic_string_view<char>::basic_string_view"}},{"pid":42378,"tid":259,"ph":"X","ts":373911,"dur":702,"name":"CodeGen Function","args":{"detail":"std::__compressed_pair<kram::Int2 *, std::allocator<kram::Int2>>::__compressed_pair<std::nullptr_t, std::__default_init_tag>"}},{"pid":42378,"tid":259,"ph":"X","ts":375092,"dur":858,"name":"CodeGen Function","args":{"detail":"std::vector<kram::Int2>::~vector"}},{"pid":42378,"tid":259,"ph":"X","ts":376346,"dur":758,"name":"CodeGen Function","args":{"detail":"std::vector<kram::Int2>::__annotate_delete"}},{"pid":42378,"tid":259,"ph":"X","ts":378903,"dur":512,"name":"CodeGen Function","args":{"detail":"std::vector<kram::KTXImageLevel>::vector"}},{"pid":42378,"tid":259,"ph":"X","ts":382703,"dur":1715,"name":"CodeGen Function","args":{"detail":"std::vector<simd::half4>::_ConstructTransaction::_ConstructTransaction"}},{"pid":42378,"tid":259,"ph":"X","ts":387789,"dur":1428,"name":"CodeGen Function","args":{"detail":"std::__split_buffer<simd::half4, std::allocator<simd::half4> &>::__end_cap"}},{"pid":42378,"tid":259,"ph":"X","ts":390114,"dur":2344,"name":"DebugType","args":{"detail":"std::__1::iterator<std::__1::random_access_iterator_tag, simd::half4, long, simd::half4 *, simd::half4 &>"}},{"pid":42378,"tid":259,"ph":"X","ts":390098,"dur":2589,"name":"DebugType","args":{"detail":"std::__1::reverse_iterator<simd::half4 *>"}},{"pid":42378,"tid":259,"ph":"X","ts":390096,"dur":2597,"name":"DebugType","args":{"detail":"std::__1::reverse_iterator<simd::half4 *>"}},{"pid":42378,"tid":259,"ph":"X","ts":389935,"dur":3109,"name":"CodeGen Function","args":{"detail":"std::vector<simd::half4>::__swap_out_circular_buffer"}},{"pid":42378,"tid":259,"ph":"X","ts":393475,"dur":1287,"name":"CodeGen Function","args":{"detail":"std::__uninitialized_allocator_move_if_noexcept<std::allocator<simd::half4>, std::reverse_iterator<simd::half4 *>, std::reverse_iterator<simd::half4 *>, std::reverse_iterator<simd::half4 *>>"}},{"pid":42378,"tid":259,"ph":"X","ts":395828,"dur":583,"name":"CodeGen Function","args":{"detail":"std::prev<simd::half4 *>"}},{"pid":42378,"tid":259,"ph":"X","ts":402278,"dur":2668,"name":"CodeGen Function","args":{"detail":"std::__move_loop<std::_ClassicAlgPolicy>::operator()<std::reverse_iterator<kram::ImageData *>, std::reverse_iterator<kram::ImageData *>, std::reverse_iterator<kram::ImageData *>>"}},{"pid":42378,"tid":259,"ph":"X","ts":278223,"dur":133185,"name":"Frontend"},{"pid":42378,"tid":259,"ph":"X","ts":412025,"dur":1283,"name":"AlwaysInlinerPass","args":{"detail":"[module]"}},{"pid":42378,"tid":259,"ph":"X","ts":413351,"dur":1793,"name":"ModuleToFunctionPassAdaptor","args":{"detail":"[module]"}},{"pid":42378,"tid":259,"ph":"X","ts":415147,"dur":909,"name":"ModuleToFunctionPassAdaptor","args":{"detail":"[module]"}},{"pid":42378,"tid":259,"ph":"X","ts":412017,"dur":4039,"name":"Optimizer"},{"pid":42378,"tid":259,"ph":"X","ts":422452,"dur":1659,"name":"RunPass","args":{"detail":"AArch64 Instruction Selection"}},{"pid":42378,"tid":259,"ph":"X","ts":421852,"dur":2841,"name":"OptFunction","args":{"detail":"_ZN4kram5Image20convertToFourChannelERKNS_8KTXImageEj"}},{"pid":42378,"tid":259,"ph":"X","ts":426825,"dur":1416,"name":"RunPass","args":{"detail":"AArch64 Instruction Selection"}},{"pid":42378,"tid":259,"ph":"X","ts":426281,"dur":2423,"name":"OptFunction","args":{"detail":"_ZN4kram5Image32convertToFourChannelForThumbnailERKNS_8KTXImageEj"}},{"pid":42378,"tid":259,"ph":"X","ts":429517,"dur":7512,"name":"RunPass","args":{"detail":"AArch64PostLegalizerLowering"}},{"pid":42378,"tid":259,"ph":"X","ts":438170,"dur":1099,"name":"RunPass","args":{"detail":"AArch64 Assembly Printer"}},{"pid":42378,"tid":259,"ph":"X","ts":429323,"dur":9964,"name":"OptFunction","args":{"detail":"_ZNK4kram11KramEncoder22averageChannelsInBlockEPKcRKNS_8KTXImageERNS_9ImageDataE"}},{"pid":42378,"tid":259,"ph":"X","ts":439645,"dur":781,"name":"OptFunction","args":{"detail":"_ZNK4kram11KramDecoder6decodeERKNS_8KTXImageEP7__sFILERKNS_17KramDecoderParamsE"}},{"pid":42378,"tid":259,"ph":"X","ts":440917,"dur":1024,"name":"RunPass","args":{"detail":"IRTranslator"}},{"pid":42378,"tid":259,"ph":"X","ts":440590,"dur":2306,"name":"OptFunction","args":{"detail":"_ZNK4kram11KramDecoder10decodeImplERKNS_8KTXImageEP7__sFILERS1_RKNS_17KramDecoderParamsE"}},{"pid":42378,"tid":259,"ph":"X","ts":443064,"dur":1936,"name":"OptFunction","args":{"detail":"_ZNK4kram11KramDecoder12decodeBlocksEiiPKhjNS_16MyMTLPixelFormatERNSt3__16vectorIhNS4_9allocatorIhEEEERKNS_17KramDecoderParamsE"}},{"pid":42378,"tid":259,"ph":"X","ts":447390,"dur":831,"name":"OptFunction","args":{"detail":"_ZN4kram23KTX2DescriptorFileBlockC2ENS_16MyMTLPixelFormatEbb"}},{"pid":42378,"tid":259,"ph":"X","ts":449461,"dur":1438,"name":"OptFunction","args":{"detail":"_ZNK4kram11KramEncoder8saveKTX2ERKNS_8KTXImageERKNS_14KTX2CompressorEP7__sFILE"}},{"pid":42378,"tid":259,"ph":"X","ts":451561,"dur":2628,"name":"OptFunction","args":{"detail":"_ZNK4kram11KramEncoder20createMipsFromChunksERNS_9ImageInfoERNS_5ImageERNS_16MipConstructDataEP7__sFILERNS_8KTXImageE"}},{"pid":42378,"tid":259,"ph":"X","ts":456811,"dur":514,"name":"RunPass","args":{"detail":"IRTranslator"}},{"pid":42378,"tid":259,"ph":"X","ts":459912,"dur":544,"name":"RunPass","args":{"detail":"AArch64 Assembly Printer"}},{"pid":42378,"tid":259,"ph":"X","ts":456749,"dur":3731,"name":"OptFunction","args":{"detail":"_ZNK4kram11KramEncoder16compressMipLevelERKNS_9ImageInfoERNS_8KTXImageERNS_9ImageDataERNS_11TextureDataEi"}},{"pid":42378,"tid":259,"ph":"X","ts":481393,"dur":4692,"name":"RunPass","args":{"detail":"AArch64 Instruction Selection"}},{"pid":42378,"tid":259,"ph":"X","ts":486208,"dur":797,"name":"RunPass","args":{"detail":"Live DEBUG_VALUE analysis"}},{"pid":42378,"tid":259,"ph":"X","ts":481282,"dur":5810,"name":"OptFunction","args":{"detail":"_ZN4simd8saturateERKDv4_f"}},{"pid":42378,"tid":259,"ph":"X","ts":487340,"dur":639,"name":"OptFunction","args":{"detail":"_ZNSt3__16vectorIN4kram5ColorENS_9allocatorIS2_EEE6assignIPS2_Li0EEEvT_S8_"}},{"pid":42378,"tid":259,"ph":"X","ts":508297,"dur":658,"name":"OptFunction","args":{"detail":"_ZNSt3__130__uninitialized_allocator_copyB7v160006INS_9allocatorIDv4_fEEPS2_S4_S4_EET2_RT_T0_T1_S5_"}},{"pid":42378,"tid":259,"ph":"X","ts":510545,"dur":2327,"name":"RunPass","args":{"detail":"Live DEBUG_VALUE analysis"}},{"pid":42378,"tid":259,"ph":"X","ts":510443,"dur":2460,"name":"OptFunction","args":{"detail":"_ZNSt3__16vectorIDv4_fNS_9allocatorIS1_EEE7__clearB7v160006Ev"}},{"pid":42378,"tid":259,"ph":"X","ts":521642,"dur":852,"name":"RunPass","args":{"detail":"Analysis for ComputingKnownBits"}},{"pid":42378,"tid":259,"ph":"X","ts":521632,"dur":1052,"name":"OptFunction","args":{"detail":"_ZNSt3__112__to_addressB7v160006IN4simd5half4EEEPT_S4_"}},{"pid":42378,"tid":259,"ph":"X","ts":524376,"dur":513,"name":"OptFunction","args":{"detail":"_ZNSt3__116allocator_traitsINS_9allocatorIN4simd5half4EEEE8max_sizeB7v160006IS4_vvEEmRKS4_"}},{"pid":42378,"tid":259,"ph":"X","ts":525231,"dur":592,"name":"OptFunction","args":{"detail":"_ZNSt3__114__split_bufferIN4simd5half4ERNS_9allocatorIS2_EEEC2EmmS5_"}},{"pid":42378,"tid":259,"ph":"X","ts":533403,"dur":1238,"name":"RunPass","args":{"detail":"Legalizer"}},{"pid":42378,"tid":259,"ph":"X","ts":533389,"dur":1427,"name":"OptFunction","args":{"detail":"_ZNSt3__114__split_bufferIN4kram9ImageDataERNS_9allocatorIS2_EEE21_ConstructTransactionD2B7v160006Ev"}},{"pid":42378,"tid":259,"ph":"X","ts":535233,"dur":615,"name":"OptFunction","args":{"detail":"_ZNSt3__142__uninitialized_allocator_move_if_noexceptB7v160006INS_9allocatorIN4kram9ImageDataEEENS_16reverse_iteratorIPS3_EES7_S3_vEET1_RT_T0_SB_S8_"}},{"pid":42378,"tid":259,"ph":"X","ts":416566,"dur":139396,"name":"OptModule","args":{"detail":"/Users/Alec/devref/kram/libkram/kram/KramImage.cpp"}},{"pid":42378,"tid":259,"ph":"X","ts":416560,"dur":140266,"name":"CodeGenPasses"},{"pid":42378,"tid":259,"ph":"X","ts":411834,"dur":145085,"name":"Backend"},{"pid":42378,"tid":259,"ph":"X","ts":91,"dur":559586,"name":"ExecuteCompiler"},{"pid":42378,"tid":260,"ph":"X","ts":0,"dur":559586,"name":"Total ExecuteCompiler","args":{"count":1,"avg ms":559}},{"pid":42378,"tid":261,"ph":"X","ts":0,"dur":401361,"name":"Total Frontend","args":{"count":2,"avg ms":200}},{"pid":42378,"tid":262,"ph":"X","ts":0,"dur":148089,"name":"Total InstantiateFunction","args":{"count":116,"avg ms":1}},{"pid":42378,"tid":263,"ph":"X","ts":0,"dur":147325,"name":"Total CodeGen Function","args":{"count":1127,"avg ms":0}},{"pid":42378,"tid":264,"ph":"X","ts":0,"dur":145085,"name":"Total Backend","args":{"count":1,"avg ms":145}},{"pid":42378,"tid":265,"ph":"X","ts":0,"dur":140266,"name":"Total CodeGenPasses","args":{"count":1,"avg ms":140}},{"pid":42378,"tid":266,"ph":"X","ts":0,"dur":139396,"name":"Total OptModule","args":{"count":1,"avg ms":139}},{"pid":42378,"tid":267,"ph":"X","ts":0,"dur":122482,"name":"Total OptFunction","args":{"count":2220,"avg ms":0}},{"pid":42378,"tid":268,"ph":"X","ts":0,"dur":116399,"name":"Total RunPass","args":{"count":66600,"avg ms":0}},{"pid":42378,"tid":269,"ph":"X","ts":0,"dur":70609,"name":"Total InstantiateClass","args":{"count":530,"avg ms":0}},{"pid":42378,"tid":270,"ph":"X","ts":0,"dur":40337,"name":"Total Source","args":{"count":17,"avg ms":2}},{"pid":42378,"tid":271,"ph":"X","ts":0,"dur":31956,"name":"Total ParseClass","args":{"count":67,"avg ms":0}},{"pid":42378,"tid":272,"ph":"X","ts":0,"dur":24547,"name":"Total DebugType","args":{"count":6468,"avg ms":0}},{"pid":42378,"tid":273,"ph":"X","ts":0,"dur":11739,"name":"Total PerformPendingInstantiations","args":{"count":1,"avg ms":11}},{"pid":42378,"tid":274,"ph":"X","ts":0,"dur":4039,"name":"Total Optimizer","args":{"count":1,"avg ms":4}},{"pid":42378,"tid":275,"ph":"X","ts":0,"dur":2701,"name":"Total ModuleToFunctionPassAdaptor","args":{"count":2,"avg ms":1}},{"pid":42378,"tid":276,"ph":"X","ts":0,"dur":1537,"name":"Total LowerMatrixIntrinsicsPass","args":{"count":1110,"avg ms":0}},{"pid":42378,"tid":277,"ph":"X","ts":0,"dur":1282,"name":"Total AlwaysInlinerPass","args":{"count":1,"avg ms":1}},{"pid":42378,"tid":278,"ph":"X","ts":0,"dur":983,"name":"Total TargetIRAnalysis","args":{"count":1140,"avg ms":0}},{"pid":42378,"tid":279,"ph":"X","ts":0,"dur":662,"name":"Total AnnotationRemarksPass","args":{"count":1110,"avg ms":0}},{"pid":42378,"tid":280,"ph":"X","ts":0,"dur":486,"name":"Total AAManager","args":{"count":17,"avg ms":0}},{"pid":42378,"tid":281,"ph":"X","ts":0,"dur":454,"name":"Total AssumptionAnalysis","args":{"count":30,"avg ms":0}},{"pid":42378,"tid":282,"ph":"X","ts":0,"dur":447,"name":"Total BasicAA","args":{"count":17,"avg ms":0}},{"pid":42378,"tid":283,"ph":"X","ts":0,"dur":279,"name":"Total TargetLibraryAnalysis","args":{"count":1140,"avg ms":0}},{"pid":42378,"tid":284,"ph":"X","ts":0,"dur":187,"name":"Total DebugConstGlobalVariable","args":{"count":176,"avg ms":0}},{"pid":42378,"tid":285,"ph":"X","ts":0,"dur":180,"name":"Total BlockFrequencyAnalysis","args":{"count":30,"avg ms":0}},{"pid":42378,"tid":286,"ph":"X","ts":0,"dur":170,"name":"Total ParseTemplate","args":{"count":8,"avg ms":0}},{"pid":42378,"tid":287,"ph":"X","ts":0,"dur":143,"name":"Total BranchProbabilityAnalysis","args":{"count":30,"avg ms":0}},{"pid":42378,"tid":288,"ph":"X","ts":0,"dur":45,"name":"Total LoopAnalysis","args":{"count":30,"avg ms":0}},{"pid":42378,"tid":289,"ph":"X","ts":0,"dur":28,"name":"Total PostDominatorTreeAnalysis","args":{"count":30,"avg ms":0}},{"pid":42378,"tid":290,"ph":"X","ts":0,"dur":20,"name":"Total DominatorTreeAnalysis","args":{"count":30,"avg ms":0}},{"pid":42378,"tid":291,"ph":"X","ts":0,"dur":3,"name":"Total OuterAnalysisManagerProxy<ModuleAnalysisManager, Function>","args":{"count":17,"avg ms":0}},{"pid":42378,"tid":292,"ph":"X","ts":0,"dur":2,"name":"Total DebugGlobalVariable","args":{"count":2,"avg ms":0}},{"pid":42378,"tid":293,"ph":"X","ts":0,"dur":2,"name":"Total CoroConditionalWrapper","args":{"count":1,"avg ms":0}},{"pid":42378,"tid":294,"ph":"X","ts":0,"dur":2,"name":"Total ScopedNoAliasAA","args":{"count":17,"avg ms":0}},{"pid":42378,"tid":295,"ph":"X","ts":0,"dur":2,"name":"Total TypeBasedAA","args":{"count":17,"avg ms":0}},{"pid":42378,"tid":296,"ph":"X","ts":0,"dur":1,"name":"Total ProfileSummaryAnalysis","args":{"count":1,"avg ms":0}},{"pid":42378,"tid":297,"ph":"X","ts":0,"dur":0,"name":"Total InnerAnalysisManagerProxy<FunctionAnalysisManager, Module>","args":{"count":2,"avg ms":0}},{"cat":"","pid":42378,"tid":259,"ts":0,"ph":"M","name":"process_name","args":{"name":"clang"}},{"cat":"","pid":42378,"tid":259,"ts":0,"ph":"M","name":"thread_name","args":{"name":""}}],"beginningOfTime":1708387310315986}
\ No newline at end of file
diff --git a/tests/traces/KramImage.json.gz b/tests/traces/KramImage.json.gz
new file mode 100644
index 0000000000000000000000000000000000000000..80cea915912f82581c534295835c0da808c54531
GIT binary patch
literal 7570
zcmV;D9c|(tiwFqr>(gZb150vYZAoomXJsyGb8l_{?OlCu+sM)XDuVww+#T{X+;^^#
z9^fR-)ktzR5?`O9MM2OMZL=qlDn&W*742t#mZZ2#QKY=Hq!M4B9uAI0$(rBJ&d%G;
z&i?bH$ipZ;zmL;meRBHOf1a$8`N=88MEK86it=~N<g=62-O1_6pHI{y^(lmrFP@#u
zH~Gn_Wc=Al8ZKk?{$-f2<5!Dty;dI#^IP@Nf1b?aB1{(Q-Cy!>d3t)07Wm{}|9WOe
zh$2sJ31P+%h|=4uKjT-~a+SyHb(Y)H6cXbtO+*FLG%wP15vD~F7Q>X+#r*U%Tr9FE
zEV4XMyiJPri`}O$Ht9OKP2>42x(o9key~P{5oc_Rj4=!c76j3jAMrd`g-Hj>?E7c4
ziJ&4bg@#Hvfn&XoqkRDOtinV#^!@T(NAlJ06{o7-)<vGAw_7gNFCg<rglHs-3kRO;
z&!3j`i)DBlzl-nVMb9kud<p~`bE+2v_nn#92g4o($AmW$g?mA~*`!gCWT^{|)6;Af
z=OwrP`Zs`-tN$9#AS4A;KUDzi$$5hM4LQM)r^$WHHhC2Hk$V2~I?mV6&lYj?d>-HD
z@y+v+`twC{UHx1AXpsEw&VD+4inLHL#3qBHe!NdsYAyg^h$l-GQ=19wQ77TJ4%bN(
z)QkE>rENOTs!yex#bQ+`%?^-Uss7udBiNHLb6L*IMS7E+mG9fn4)OJ3@vxk7b~GZ$
z1QF&pn$MT+)}ZFet89@0YyqLvZ)|~Rc`H!jH;XJRW<d}ZN*=B^MH~d*ej9%-g8KcS
zcv!{X((k_e4gif%p%sWv30yP)%7e2;d0^@jI=M_h<beyXaUcc8mcu?TqV43rDW~GY
zk1x*v6p>aD2)B%w`{?5$!lgZau5j(5f^b4`+Til()UaME58xO2nVzRnHdi|ArSe80
z;FuJR%n&4|Q1WebV5yNrFJ$IV!K=od%F@#EKfjLnUG*cF$2Uoul=cF|FR8?=iHrOx
zwC*6lxSUZ4^QgxRwwAp<yyUJUF^@|Sr>9XV;z53U9Rg@l-dH~6yod=Hq5!F(Ki9Y*
z9Kc@bGHv}>A5VuyF(vv8LBa{wAtqrC9`z<o{;`Pz<%zsl{u=z6tb#C)?vnf1-u6%-
zh!zOq5a#aK<i8}V^R&nx0Q8kZZ<LGYfoz{5_lJ0)yhH#w<QsB~Olkf!8i))?>S=>i
ziNk0-OQo`Yt$zZ5FhNlxj4ITd0?4mQKB^x87~DfTz7uGI_^$R6n}_x|5SG4i6vT8u
z0%cWHD3LvE9Yzy!e)}B+p645}>CJ4o!TUFqQOwh60ZDs(dRrj7NuJZwW%xN*C%@V|
zw+f4ij6Koo<GX?hg1ode%Q&6e<Kc+tOpk})x-|&at3^@-*PEN0xP$4DB;D*AdmYTV
z&_MymZo)JUYWMH0p$p=P#(eR9eeoEJlOoQ8C`+SwRRmEO-NoH}D1ehHqH5Ha?_r1J
zY#!e$r5abL_IU-tBBbZ*tT`pU>5xZw^M`PKm7UG!rSJK|4jurXPq8t@$RQ#RF4k|t
zG<vwql2qZD9HFls5$!B{PU}MX?f33;=p!Mtdm#D~=#N5plq1Fj{SOElX~Hlh9Zhu*
z6j>0~0v6m=&XesdR(WuxQXqh_(182hZ4h8fq|aveSuzJOR#~woK*7SrY?z%23viP!
z8c8wN4neQu^q5B8q@VI|735)hs~{~`i?22q=@DP&w~6N}Ge;kgAp)2uBD8rJh(3Ws
z`+C!a5o?wVcl7&$V42;=!6M66^*Dk`Oq1wraeJ99lIY=wwyQqzSdXy|kv3k`!@X+6
zetyGr9?X;VDlDSA7q<MGdhqt2^8kEOE}Kp#V#iP_(9BVu<mw^F@`^hHjM<LEo`c&@
z7I|`?gp04mR*(tPNhP0SH@79QVBu5d0X1bB(qnGx&v4)?jG)X6YU9T#byp!(5<Og~
zSbiRuvY1MylbayT;?HI9b+|xg`Wf%Us4y>0MS9xJ!A$tQnN#;<!}^m_y4gkrq)8ra
z7GVwzjMaR}#@oe`W4GvK5MQ`jfifYKFr8hLet$6Pibnb3kSvj;MCx7WaY(wEy(m-P
zk#C|R2#Wy1AIY%aC`=|t%~cl@JZ&YJG)WBUlvC+1P^v%;7ADn@A$cnm#OVwN!qO$f
zv75DO0htb@z*vXy(y?U89j9|<4SAlY_oOi=4sqz74&D(1KpCM&sSb%5b9E<dqp)3+
zL-u6%Q!Wvh8WyIy{=q+D1m-j%&^Sygh|!S)nKaaJ8M*DJ06~Ug5u!+I0paROJ%K3W
zcpQbWlL%^Z+n$^A<rH8%k>PWpojc~ZV{;(3I5vFj>w;NAe0P2B7FV<_<h0YgW0zs^
zL}_llCfZWY`=mS0qoWDsGiw8kFhY}}1I7{xi#eHrQNP>gh6NZ&AkW8Ikn0rWXcwt2
zFa3Vn0#k$#)hB2eaoB1*u%!S&JhYG?-vJpm#=63akz;4?zc?dYd`^TtefmTQw@A~<
z=X}y);V~UIGUTQ}j^nR>X|7H49S~0#A<4})EOxUbPr?)aEzT9tX7I62rjw8TAJ16h
zOkwWm=^gOipYqT>rmYX5%ysd*dj9rjcb<w^J)2o$hDRJe*1P6RIu;j!Pen}<k06hm
zLy&?x=2M)o1EZA^DQ=t%!RV-cA_!7wEQ2`L70=8$qq5@|cC!Ai*<XcB_ZcY9uTAf+
zRX)UFzu?eV_FjVgAs^}e0oI>ow-{VCf~~)@Q>L<$lGu3x0x5*vGm~S>p!#M&Dehbe
z_Z%=IJtIHrJaHzGj`jP?RgdMjx^V-<3=k)<K7fa*^Nv;tb#UNEL$22vrqa#*sBljO
z$d|&H;NEia5$=%T@KEyeNNo0fooy11`r2}by*m-06|EL@lW`?Ja^rq4&Qh9h({b3C
zmQ<?L&2-or+wgmPFt|(_jWkh!PAY1Ng{v@1iU$ZpgiZE*lq+ioQ8{60nicAMgL%A&
z3xKCpV%5oXH12GfE!YXpoGIL%?&gCq9+h{u!wo%*rce=2*U^v(>S(&{Vc>`ams|^`
zn=b6@NfIwtp!MNXz5iTwtC<MKJS^-Ff|n@t)(v+wzkMjG*Y+?7#=7*Kktxba_B&`f
zANeWKaZ@<zo+23&?L0P|o(B-?MljOlfQUKQ2Y*pt8q5+1Kah~xBKXdJ;6ac2$oPe>
z6qb&$QH6d|P)wh`RqK7!S@Lel>DIv;w+PfSXk0vHwI(efc5w0tWx6v0cj=Fmtc`b`
zTrXl98^3*Z_(PCtOGKEY##eRSwAkO@d<5e6b|V4Fub`%I)O9F*)-nig^XyY_{ZLtl
zJYFrr2;jaFfpqN^^0}Ln)sL%QRzaGnAT|ltFO)0+TA3iieQFXDzFVNuhs<R;aZVbQ
zh<!&heps3uaWT`(2|J&ad(?{VpOu?v1s!3bF{!7DR;`=9g2(k0SOhayRK2Ssh<>?w
z)IMyTsl~%~rH7VgV)o6mS!G)$Hy3KY%@Kd>(Eg}Agr2Io{S?>vw(ZE4aaxo|#h3BY
zIHeQVLdeiOiW+T5T@L_#nWX)b7k<KgUEwZqe*xW73s|1I(q*XeA|&%&rSo9O^W-_8
zv#OE{H~-Fz|H}`6Di=pb48Cg>-7@?o4(f6Z^}*nF6XuOVIcs(;Uf0>n52ZW)p)RNB
z?rYfSPFog0c;K6AHrK%Yv6jOSGy>}kEpl{#+gmIX#&AcFKj9NyN98%5pL(oIzl~h|
z&|=!894iZYz-2v@Q{ByrxNdU7HWepad|yo+Q%3ap3(Cb8*xl&a%{+?hf_1=$*vMXi
zb_3gH$Br==N=*+Zj>qlr*bT-Z#%5)$$h7+l12;VMJ_hiUm3<v_vnE%!c8ba>yP*v}
zh6oX>vuR_GtA2$_o>5E!aR>=hqwjdy%c{cX3y#P%ewU6%fm)S6(*q=?S19YhZfM-j
z^<eGHxV7X-vx$M}DC5$Pd#9FWBvzp<e0}4>)L30?1;^T3o2cv-P+#7Zmrs4(P>o94
z6<@g5wT-bW$6^EU!ypOeNVnWje*72h65wi?o&=jAJZ3)9{;)XCUi)v5C!Wq5xK=_|
z9C;Fh;d+8GcWiv<aNuHVsE(%`Sas{#ww}Q;x#KV{7o+AgU51Ce{w-p=i}#YHdT|#n
zZYYFP(v&i|k$)^Jw7q&MEifcm$m&FQ6TP~#r9~bxDbzRhFambd#`|RTs&OA*gKGXb
z@y+co%&|}2p-uT&`_14!i9bQWnXCKM+pYmHYORvrzi73*FMn-BsNKcTND}x)O(AvC
z)3qq1XK^`HwhHCA>MREy<xRs*;8i;Y<!MR?R;;gCk&Y3+1J>*KHc0^}In!ac&-;&A
zf%@6CW_VBV+OrSZGWVI_M!EaGy9c$~Bd`Uir$=B1?&;}GlCKL8YSZ(<lgxFy%9X$6
z->R59h#<{6O3-FM$3w2EM0e2elNdhZq<tHzYY^j(`QNS`4<(oSRvp*NWyeW~a|2N%
zeMuS($NS684y%r`OK6q`d8}Z^X%z3S$GZBa+{Ej73b+K-LyYM1%zk-xxUXdg6hM_p
z!E{E54eXZ!`!_j4!5%dSn8?6QHsd!lTN1asX}Ke&?H4Xvkl4eL8@@W86moC#&>rSc
z6NcS+n(|X1`Kx+hG)8IMF~41RZi*%g%3|uAG{KaP+Y{};gZ5Dn%G=aU1M$d43cFx{
ztrD>3OC84z+<Y@hcv+^F%U}y~OS{)PQH)r>^@YTk8iPFqCCQ%Yx;8<3J_7<CreK7n
zx!ACJf1)<V8Wl*-Bks$w%z?HtZX-S1%Ch}Qm-ceF-9@Cjy6U(sjU7h@8jo)dVL0C6
zaLm@mZXAGo5|E=*>}qA+r1Shgf#z;shPxovB7H^>Q^y?rh?d7sixnor&-yPd9^d}>
z_`^{8U%OXiL|gkYZIF#^J<jxZo`FLNWxhSrA!Xb(EBoHW1YR;mWf8D$Mdpp_J<*2{
z=qQ(XOovzMSXVxX2kI+hDs?XyR8=Bw(Y%hYH@8<0tKO@nO<sM!hly6>Xz<z#fPCTB
zh2XN)7y$+k(4?;qynRj?G&Es`CaC8)w7#2sJMcT#`9dK2p1J<KeTkAK#=U)^Ho&3H
zmkxS3Xd(4UIOMmgG%P`zlGCZj(0+5muLUwk$DhRWT-W<=np^#E|E7lo69VE%-C%_?
z@B$gGu{)&ucZ>0$ic&Y*Vy*5`2l4JwJvOyE)jozMHZ-X_&Dl3Fs>o2aG_KcKwwQg}
zPrKC*BuNYV-c^+wqE9ckn-*A1F_hQT*Ea(SuVr^%W%KyQIGr7GzFHK)gb8N)Jf3Xz
zRz5hyAh=l}?n*@364~)=(l|@^T$|Y_B(^hpG$aD*Lmmx@KF{G>F7sA0{PgtOFwN43
zWwu$XrKW^*rX17fKd)7w_xx-TN6+W+eIDODFXi<4BDt>qt$w^!KVD_&O>+CYyHkSE
zDFZA|g<GFhM;^9y!wql<DTKB6##?z5@bXkDN(I2hRrV&^<gf0+G>z?VqsL{x9bF>r
zY$Nygm%Cm^dy^Sr!j)yxd*Q-zoe!AsvQX`i<*o|ZK;RVb^c+&#&}L*MTbHWkl=V}E
zn9-81XXsiw47C|;3G79Bla&^<^~k*3$M;MrpY@xIcjx=3TV~%tmNvzv@J0wrv$Q3s
zex9e1T9|-*j3TJhYS=?A_P0e>Nvm4qYS7ySzP?CbD(U?NfQJ(^eVF@>cwWa_JoD<8
z3d9<K!k6ZpNvr3mBU9FjD@q)&Jpk%q-D$;_wx(%7p_p{y0j;95RuI)v!qm3|OK?JB
z988E-Z~|H;UuWM=2N$3R6kc6wD=7vl&lN}m$m*Wmtp>oOH9(ZdVG%17>o<9}e08@;
zAuEp}sjsR)Wc<==>LwMQD%1raVXcc4xbuax%T2ES$K}3USsHj3uJ23=aFOkFCIj>r
zR`9T{8Y+7vyVl=DVVTCSVAYQ<!z90Q1?6L6qB-}Iu$8M=c7!UDWxQU+(TnC6h;ov|
zY_C%1%3I~PN2^s(+$p)92diwc2tFmnU2q*nzhpNz`i$M@&niaAmPr(R3KIoBUKhbl
zTGt!Y@%eqW2#aJ9&-VYM`c&zJ{P1j6{i9oeXaMam-_myY^5dWd)kLd_!Mfz7EP-$p
zlie{q1wH`TfD9YlZx<ZJhUy=C$P5u3F(cyK48Vfx7O>q^6hoMqO%&uReA8)#V<>~E
zWPkFdU-n8cyFs37MOVMff5B112JQJmU!;N@_q^Q(4PD>FH|rZYHYnIXtWr7B-|H#3
z-Xx16NrN>0WGXPaZ`AihH20YFecv~3UkKrb>4}Y)V|~^pP5!ZogH=((c4b2AYsx;q
z;K~qM3Zb4^(K-uv8Rs`yzPyand6M3mp{YSBzumIl*z>498OVLqY8Rn`fV8MeGAxlw
zLUam*Ggtsn=I?@N5iS8mn37kwVsA^OTc;YW2-+7Y*VPs~2MLC*8*}YuhNYxB%%!pw
z=UR_tr=Q&+e2&*05s!PiIVfVS(+<`UgKh&vU9?q-tV1`%1GhZ=p!E;v)*9joX7269
zJ}hs(UsGhRbh@9%Ti_5h66q}$-*4UjXpLr>q%V@RFm8y_#^nP*1s_2^GUxNE4Muoq
zzs;#S?CO&8DsQuu;X5#)=M-l7*s}W`xJ%*yxpcEwC^Hb4$(YAC;bu_;Wo2Vfgtve>
z!PL{eEF=Lc+g_do%49A3SJ-aN!Lu3a8)WtOfB`V152g9oDP5#x>fa7R#XWN$C3VoZ
zN4?=m!4vw(6lceVH_63W1pB6~4<W+AK)_a7xkmt3sYKZ>ZD~EAGIL>V1rdOcGkvp%
z0Pfqqe8vAz?U?9Yl<pII9285_q=`s(L)g8vrKdyNm<whIrNs0(6s;f)pb8;fV0XuJ
z)EkEg<w&=|5WqdP<G5UpjJ_86Lo@+{7eeoEuvSQGKlmvJ_Q=-(h9#x$(a3xU%l$N(
z)Djh!B3<@IT?P`)@5AZ{z8$!o&6mBGuz-s&#?2lBcL^T<duK$&NbXS`9JCK9S?(lT
zDX8j$k1yu`tV-ne2Y!nK_#RS_P>PYc(2QD^&-_=6{wqdZF(UdLmQNhwrvG<00+jdM
z>8%6iIrx}+jp_Ps_keY*Q=lX>-M5HCEO``EL~55ucn%@aerCv&hW6x}+Wi;5Oj=2l
zovwAcu7fb_w{CREeh5zVu7sEhh5J?J>OHB??=fIZzK^HdP8-S{AB)y$LNp>dwA-sj
zSH@oDVY&{huNZg?04sz(HjF5C3SqxwGddzYgmifpu{}d%Y?x`6q#r51&9h|?Mv+>(
z`W$(=GCN(U>PV*BA+Yrluz9Tp;^`hGbq`C}K&I;XRRL*r9|ik%lL7Ii)H_%Nc@7zc
zWA&gSl2D<ySg=bl{h!}Qj7a92gplu$=u^h?-c@?2hs_N`n7bw89&O&Tf1m8r?^7at
zY;JHS1hvk$Lu&{q!X9p|12e~Gi%;Rh`XXH<X`EjU^s)NuGMjG}@!#wxRpB*$rjUl-
z`|6vnvSw0Cl4tX9Ra)w4C^Mu3UfI$`$0v+EB=iPItDfYKDp+1B-H}_*7A}jKbrP+0
zO!C7f9h#%Fvpl-vbav6)p`N9)kIEl|`4to^G1JFi{MIQYHHt$EG5E_*w3K#M2|M=s
zPPd(Zc>B}Gz;FDt^S=XNao@-<Mf^*0gkybf3sX$oI6`nAN8V)l)!k-!ovJ?>!yOgW
z>AkJCBmFpcF0*y<F21ddTmG)RhZu5YBqcsF{Q^A+lGHO5a4oMF53@@}6Vi`cjhAT6
zD63wf)>HDfniGUtXCg4}KcjVi`8Mjz$p`gHYIS}N;FChx$Zz}d1sLG%W+Si9aCHUd
zQf!ahRnL)V{l#UdeqRIdQcoh&TC`P^bYNaDK2(v+B7{U;iV`!8$%x0tu?BfHZ!VUr
zy#;0X^AQD8@JON6wV{%XMULAi^Ujmx^6lN<ON{*e!~3gum&xZ!T`a?*l){227oOK{
z^TpjyA88=9<9Ts+uKt^1fP(0$@d?-zU{W6OdmXQ%JSolWn`BXs?iD`oAK3LZ0Gy<z
zk34N%$>yX3{N_YrS7>TB((<iW+bmI*8&R^y%gXDSw3~}Z;#CH==TuosKb6OItC!4Z
zF2(WAIo;Np*cmloSlsMZGIT^e!K7)#+HOH|R71nKpiP#A2+$JfnKZ<^hwp0f>de@n
zxvAz5UB#b^O<q4q?1$nbq7TqgE+Ia`SP#nQSU1KOtvtm6%I}i<c=r1I<<CC`e>!{j
z^Z6`H!^Oiou`hE_1ID^(4f9(Exh>RJ8Oth_p!lrTn3|;5_cXWxkQRhCij(g~0*_GM
zRqR^_vMP2hRc1@af5d@O^1LoXxVTg=-zA=Ua(@+k^aE?C%=57{1#u9qR6`}+;hmkA
z;vR9&^Z3Os{)(gL=Ou7uywv~a=T|uRa1~s6!4(QVGQb#+$Fy^W3I0L?R@PbSN-j(}
zM8I~Zx#C4<dQmyON;yZ1ILtK@&+qNwVZ^D~a<E(PTOQ1AvV2yWyG@~@fwyV)DSep~
z0M;nl=AekzB5unXgoE<5YdD{m>arz7)zV~DmPs+_@~RXa04_=d*VYOW%57os?$L&h
zSa<N9`^)fiP`11@Lr{S4@6XSdAKt>CGfcFhl@1)Gy9xhYs2}>Yi}KT!OVCOrgqkfB
z+;Yket;8n!gpeWa%?~jJGfGX{v9)^)!KioF<~4R&phV56zH_Cu-DEomitTgLLuz<$
z+~>7x-pynx3quqKK8is7tE5+{=vr5Qd<n8Q6mh2eIFufV+zFfNBwgRAt(!<yL-}2_
zvfrAjFqsZHb)$*h9CRtWvGw6Eicss+V0qBYOzz7t`UQ}<m3qmSxBnSj=bz(fQ^ci?
zR#DYZX0aUA&ewVU-*%N1;bPYPPBRNpwn+;$6!ql(cD7tsUogPlGRWzrNj*eb-A-$o
zI+49c3>#wD^M;7w0b+>yUTbTvCU*FgVl&7xx!6!fLv+a?T|qGG&^@q`p^27Ctf4wM
zS~^H|N5*QZjieo#M)ik7wAdi6odvQp!tF9SOmT2@e2C(%ny4xEDT^LzWKc}S5VfiU
z<36?F$-}tp5%cKe`9lPiuOz#ox^FMH#5m`k4YjT!(beuA_YSO!?o}pRs*G6B6hqoZ
z`2?+#R5i_y*(Q(rCa<*)@DQOP6+@6@T|w!+p0r`6#iA2KDhAVbJ!MU-Z|A+CRb9XP
zD78VwvU5avJG4Uwl@Bh9A-yJsG|bM1mR=LvZLLG3=n$nL{i4LEYb5Np?)L4*;3x+*
z4cn8`YD?sOSmep)i?pn5TPM-Fe`z9wY*xvz;@ML>hc;(>W_5_-kmixCec+nlmHPI$
zxcJZnf_;OeHiH~eKb-H4u`V7{)A}J^hWRg}nClO!pgnKr?CiY?`EPs0*Fr7$gKkdi
zZqO~I^?I|c3Z{qIYNKEMAx%X0^w7()7W3@PacDz22#c-`sO9Bda-E0y!&ss52i+UL
zSI<_V*pG|sI$ZoI%#-lC*BKDpg7ToV<9l7smKzvv^7tQ{IE@Yr*=j-u^^oY2t*qEp
zyj(5HLz#QjEG&yZsCQ^r3%pdJZFF~;E6!dgDo7qX%pv_lyDsayEL)AFHKcf28!lUp
zX6xd0woKA8yuHffcpPClq<L_6DyJkm=m>RfwTkQVhSpZ^srmi<@S?V@owuIpP~`_V
z6(R?nroM&z*c!$b1P682UZj#|N^#7Sa&+P1fAg?v4tdC~4$>UbS6!MPqihw=f6C4l
zNw{{^%5u<Q>gr_Wek%PR&z&g^s-^A{%Cnnf5r5n)mxtBA1<fJt)LlInX*wXujmYlF
zaxLOLlDsEZQK+7ts1NpMX12I0obOLGuvMN_1<2J)LuC7Iv<RDm8n>_Cc2w*CcJ;-7
odv;P?6rZXX;Ey*~N%>qE5TE#hAdes>eg3ci15G_{siO4&0P;xu3;+NC

literal 0
HcmV?d00001


From ab38726b55256a9b1be40ea5d9fcef922defabc8 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 1 Mar 2024 00:02:48 -0800
Subject: [PATCH 607/901] kram-profile - update TODO: in readme

---
 kram-profile/README.md | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/kram-profile/README.md b/kram-profile/README.md
index c00b25e4..c7fa73d5 100644
--- a/kram-profile/README.md
+++ b/kram-profile/README.md
@@ -17,17 +17,18 @@ There are pre-built version of kram-profile for macOS 13.0 and higher.
 
 ----------------
 
-TODO:
-* Fix document support, so can double click and have app open files. readFromURL like kramv.
-* Support binary Perfetto traces.  Test with Google sample code.
-* Fixup "Source" tags in clang json to use filename (no extension) from detail field
+TODO: (x are done)
+* x Fix document support, so can double click and have app open files. readFromURL like kramv.
+* x Support binary Perfetto traces.  Test with Google sample code.
+* x Fixup "Source" tags in clang json to use filename (no extension) from detail field
+* x Find start/end time of each json files. 
+* x Support gzip trace files
+* Add frame type for perf traces for vsync ticker (binary format prob has it)
+* Add zip archive support, can drop archive of 1+ traces
 * Tie in with the excellent ClangBuildAnalyzer tool
 * Scale specific traces to a single duration.  That way the next file comes in at that scale. 
 * Move away from Catapult json to own binary format.  Can then translate to json or use the Perfetto SDK to convert to protobufs.
-*
-* Find start/end time of each json files. 
 * Add sort by duration
-* Preserve timeline duration across traces. 
 
 ----------------
 

From 2a9d23820ed40d58acc3ea82ae69842acfddca19 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 2 Mar 2024 19:17:15 -0800
Subject: [PATCH 608/901] kram - fix fullscreen, add filter, fix list search

Having the timing first throws off SwiftUI search, which seems to only check the first text field present in the HStack.
So this looks ugly, but it works.
Added searchable
Early work on colorizeBackground, but it's not working and messes up the list selection colors.
  Could add dividers, but it's should account for filter so can't really be cached this way
---
 kram-profile/kram-profile/Info.plist          |  44 +---
 .../kram-profile/kram_profileApp.swift        | 211 +++++++++++++++---
 2 files changed, 183 insertions(+), 72 deletions(-)

diff --git a/kram-profile/kram-profile/Info.plist b/kram-profile/kram-profile/Info.plist
index 545741a3..240865af 100644
--- a/kram-profile/kram-profile/Info.plist
+++ b/kram-profile/kram-profile/Info.plist
@@ -62,48 +62,6 @@
 		</dict>
 	</array>
 	<key>UTImportedTypeDeclarations</key>
-	<array>
-		<dict>
-			<key>UTTypeDescription</key>
-			<string></string>
-			<key>UTTypeIcons</key>
-			<dict/>
-			<key>UTTypeIdentifier</key>
-			<string></string>
-			<key>UTTypeTagSpecification</key>
-			<dict>
-				<key>public.filename-extension</key>
-				<array/>
-			</dict>
-		</dict>
-		<dict>
-			<key>UTTypeDescription</key>
-			<string></string>
-			<key>UTTypeIcons</key>
-			<dict/>
-			<key>UTTypeIdentifier</key>
-			<string></string>
-			<key>UTTypeTagSpecification</key>
-			<dict>
-				<key>public.filename-extension</key>
-				<array/>
-			</dict>
-		</dict>
-		<dict>
-			<key>UTTypeDescription</key>
-			<string></string>
-			<key>UTTypeIcons</key>
-			<dict/>
-			<key>UTTypeIdentifier</key>
-			<string></string>
-			<key>UTTypeTagSpecification</key>
-			<dict>
-				<key>public.filename-extension</key>
-				<array>
-					<string>\</string>
-				</array>
-			</dict>
-		</dict>
-	</array>
+	<array/>
 </dict>
 </plist>
diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 8614e767..23e700e1 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -44,6 +44,8 @@ import UniformTypeIdentifiers
 // TODO track when files change or get deleted, update the list item then
 //   can disable list items that are deleted in case they return (can still pick if current)
 //   https://developer.apple.com/documentation/coreservices/file_system_events?language=objc
+// TODO: here's how to sign builds for GitHub Actions
+// https://docs.github.com/en/actions/deployment/deploying-xcode-applications/installing-an-apple-certificate-on-macos-runners-for-xcode-development
 
 // See here about Navigation API
 // https://developer.apple.com/videos/play/wwdc2022/10054/
@@ -61,8 +63,6 @@ import UniformTypeIdentifiers
 
 // https://stackoverflow.com/questions/37820666/how-can-i-send-data-from-swift-to-javascript-and-display-them-in-my-web-view
 
-
-
 func fileModificationDate(url: URL) -> Date? {
     do {
         let attr = try FileManager.default.attributesOfItem(atPath: url.path)
@@ -90,6 +90,7 @@ func buildShortDirectory(url: URL) -> String {
     return str
 }
 
+// TODO: may want to make a class.
 struct File: Identifiable, Hashable, Equatable, Comparable
 {
     var id: String { url.absoluteString }
@@ -101,6 +102,9 @@ struct File: Identifiable, Hashable, Equatable, Comparable
     var modStamp: Date?
     var loadStamp: Date?
     
+    // alter the list color based on comparison
+    var colorizeBackground = false
+    
     init(url: URL) {
         self.url = url
         self.modStamp = fileModificationDate(url:url)
@@ -223,8 +227,26 @@ struct MyWKWebView : NSViewRepresentable {
     func updateNSView(_ webView: WKWebView, context: Context) {
         
     }
+    
+    // Here's sample code to do a screenshot, can this use actual dimensions
+    // https://nshipster.com/wkwebview/
+//    func webView(_ webView: WKWebView, didFinish navigation: WKNavigation!)
+//    {
+//        var snapshotConfiguration = WKSnapshotConfiguration()
+//        snapshotConfiguration.snapshotWidth = 1440
+//
+//        webView.takeSnapshot(with: snapshotConfiguration) { (image, error) in
+//            guard let image = image, error == nil else {
+//                return
+//            }
+//
+//            // TODO: save out the image
+//        }
+//    }
+    
 }
 
+// TODO: fix the previewz
 //#Preview {
 //    MyWKWebView()
 //}
@@ -698,14 +720,7 @@ struct kram_profileApp: App {
             }
         }
     }
-            
-    func focusFindTextEdit(_ webView: WKWebView) {
-        let script = """
-            window.editText.requestFocus();
-        """
-        runJavascript(webView, script)
-    }
-    
+     
     func isSupportedFilename(_ url: URL) -> Bool {
         let ext = url.pathExtension
         
@@ -832,6 +847,24 @@ struct kram_profileApp: App {
                     // load first file in the list
                     selection = files[0].id
                 }
+                
+                // update the colorize setting
+                // this is to avoid dividers and give context about per dir sorting
+                files[0].colorizeBackground = false
+                updateFileCache(file:files[0])
+                
+                for i in 1..<files.count {
+                    let file = files[i]
+                    
+                    var colorizeBackground = false
+                    if file.url.path != files[i-1].url.path {
+                        colorizeBackground = true
+                    }
+                    // change the struct and update the cache
+                    files[i].colorizeBackground = colorizeBackground
+                    updateFileCache(file:files[i])
+                }
+                
             }
         }
     }
@@ -940,21 +973,74 @@ A tool to help profile mem, perf, and builds.
         UTType(filenameExtension:"trace", conformingTo:.data)!, // conformingTo: .json didn't work
         UTType(filenameExtension:"vmatrace", conformingTo:.data)!,
     ]
-       
+    
+    func selectFile(_ selection: String?, _ advanceList: Bool) -> String? {
+        guard let sel = selection else { return nil }
+        if files.count == 1 { return selection }
+        
+        var index = 0
+        for i in 0..<files.count {
+            let file = files[i]
+            if file.id == sel {
+                index = i
+                break
+            }
+        }
+        
+        index = (index + files.count + (advanceList == true ? 1:-1)) % files.count
+        return files[index].id
+    }
+    
     // about hideSideBar
     // https://github.com/google/perfetto/issues/716
     // Allocating here only works for a single Window, not for WindowGroup
     @State var myWebView = newWebView(request: URLRequest(url:URL(string: ORIGIN + "/?hideSidebar=true")!))
     
+    enum Field: Hashable {
+        case webView
+        //case listView
+    }
+    @FocusState private var focusedField: Field?
+
+    // https://developer.apple.com/documentation/swiftui/adding-a-search-interface-to-your-app
+    // can filter list items off this
+    // Rename to filterText, ...
+    @State private var searchText: String = ""
+    @State private var searchIsActive = false
+    var searchResults: [File] {
+        //print(searchText)
+        if searchText.isEmpty {
+            return files
+        }
+        else if searchText.count == 1 {
+            // Some items with k in the rest of the name will be filtered
+            // but will appear with more characters
+            let lowercaseSearchText = searchText.lowercased()
+            let uppercaseSearchText = searchText.uppercased()
+            
+            return files.filter {
+                $0.name.starts(with: uppercaseSearchText) ||
+                $0.name.starts(with: lowercaseSearchText)
+            }
+        }
+        else {
+            // TODO: should be case insensitive
+            return files.filter {
+                // Here use the name, or else the directory will have say "kram" in it
+                $0.name.localizedCaseInsensitiveContains(searchText)
+            }
+        }
+    }
+
     var body: some Scene {
         
         // WindowGroup brings up old windows which isn't really what I want
-        
+    
         Window("Main", id: "main"){
         //WindowGroup {
             NavigationSplitView {
                 VStack {
-                    List(files, selection:$selection) { file in
+                    List(searchResults, selection:$selection) { file in
                         // compare to previous file (use active sort comparator)
                         // if it differs, then toggle the button bg colors
 //                        if lastUrl && url.path != lastUrl!.path {
@@ -962,32 +1048,86 @@ A tool to help profile mem, perf, and builds.
 //                        }
                         
                         HStack() {
-                            Text(generateDuration(file: file))
-                                .frame(maxWidth: 70)
-                                //.alignment(.trailing)
-                                .font(durationFont)
+                            // If number is first, then that's all SwiftUI
+                            // uses for typeahead search.
+                            
                             // name gets truncated too soo if it's first
                             // and try to align the text with trailing
                             Text(generateName(file: file))
                                 .help(file.shortDirectory)
                                 .truncationMode(.tail)
+                            
+                            Text(generateDuration(file: file))
+                                .frame(maxWidth: 70)
+                                //.alignment(.trailing)
+                                .font(durationFont)
+                            
                         }
+                        // This messes up color highlighting, and is all black
+                        //.listRowBackground(file.colorizeBackground ? Color.pink : Color.black)
                     }
+                    
+                    //.focused($focusedField, equals: .listView)
                 }
+                //.focusable(false)
             }
             detail: {
                 MyWKWebView(webView: myWebView)
+                //.focusable()
+                //.focused($focusedField, equals: .webView)
             }
+            // This searchText seems to ignore focus tab
+            .searchable(text: $searchText, isPresented: $searchIsActive, placement: .sidebar, prompt: "Filter")
+            
+            // Need to do this, but don't know how.
+            // Other than to bump the version.  This is also more @ViewBuilder
+            // extension function syntax.
+            
+            /* These don't really work anyways
+            //if #available(macOS 14.0, *) {
+                .onKeyPress(.upArrow, action: {
+                    // don't change focus, just advance the selection
+                    if focusedField == .webView {
+                        //$selection = selectFile(selection, false)
+                        return .handled
+                    }
+                    return .ignored
+                })
+                .onKeyPress(.downArrow, action: {
+                    if focusedField == .webView {
+                        //$selection = selectFile(selection, false)
+                        return .handled
+                    }
+                    return .ignored
+                })
+                .onKeyPress(.delete, action: {
+                    // block this so WKWebView doesn't page back
+                    if focusedField == .webView {
+                        // Unlcar if this should be 0 or 1
+                        myWebView.go(to: myWebView.backForwardList.item(at:0)!)
+                    }
+                    return .handled
+                })
+//            }
+//            else {
+//                self
+//            }
+//
+             */
+            
             .onChange(of: selection) { newState in
-               openFileSelection(myWebView)
+                openFileSelection(myWebView)
+                focusedField = .webView
             }
             .navigationTitle(generateNavigationTitle(selection))
             .onOpenURL { url in
                 openFilesFromURLs(urls: [url])
+                focusedField = .webView
             }
             .dropDestination(for: URL.self) { (items, _) in
                 // This acutally works!
                 openFilesFromURLs(urls: items)
+                focusedField = .webView
                 return true
             }
         }
@@ -1017,20 +1157,33 @@ A tool to help profile mem, perf, and builds.
                 .keyboardShortcut("R")
                 .disabled(!isReloadEnabled(selection))
                 
-                // TODO: make it easy to focus the editText in the Pefetto view
-//                Button("Find") {
-//                    if selection != nil {
-//                        focusFindTextEdit(myWebView);
-//                    }
-//                }
-//                .keyboardShortcut("F")
+            
+                // TODO: these may need to be attached to detail view
+                // The list view eats them, and doesn't fwd onto the web view
+                
+                // Can see the commands.  Wish I could remap this.
+                // https://github.com/google/perfetto/blob/98921c2a0c99fa8e97f5e6c369cc3e16473c695e/ui/src/frontend/app.ts#L718
+                // Perfetto Command
+                Button("Search") {
+                    // Don't need to do anything
+                }
+                .keyboardShortcut("S")
+                .disabled(selection != nil)
+                          
+                // Perfetto command
+                Button("Parse Command") {
+                    // don't need to do anything
+                }
+                .keyboardShortcut("P", modifiers:[.shift, .command])
+                .disabled(selection != nil) // what if selection didn't load
             }
             CommandGroup(after: .toolbar) {
-                // must call through NSWindow
+                // TODO: this automatically inserts the fn+F menu item
+                // which is then redundant with this menu item that lack the shortcut
                 Button("Toggle Fullscreen") {
-                    // This crashes, since window isn't set in AppDelegate.
-                    // But ena
-                    appDelegate.window?.toggleFullScreen(nil)
+                    Task { @MainActor in
+                        NSApplication.shared.windows.last?.toggleFullScreen(nil)
+                    }
                 }
             }
             CommandGroup(replacing: .appInfo) {

From 7db05a1ebdb586cf6aa02f98ee10d9f949472e22 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 2 Mar 2024 19:54:47 -0800
Subject: [PATCH 609/901] kram-profile - conditionally use searchable since
 it's macOS 14, remove colorizeBackground

---
 .../kram-profile/kram_profileApp.swift        | 132 +++++++++---------
 1 file changed, 66 insertions(+), 66 deletions(-)

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 23e700e1..19e56b66 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -41,6 +41,11 @@ import UniformTypeIdentifiers
 // TODO: for perf traces, compute duration between frame
 //   markers.  Multiple frames in a file, then show max frame duration
 //   instead of the entire file.
+// TODO: can't type ahead search in the list while the webview is loading (f.e. e will advance)
+//    but arrow keys work to move to next
+// TODO: can't overide "delete" key doing a back in the WKWebView history
+//    Perfetto warns that content will be lost
+// TODO: list view type ahead search doesn't work unless name is the first Text entry
 // TODO track when files change or get deleted, update the list item then
 //   can disable list items that are deleted in case they return (can still pick if current)
 //   https://developer.apple.com/documentation/coreservices/file_system_events?language=objc
@@ -63,6 +68,10 @@ import UniformTypeIdentifiers
 
 // https://stackoverflow.com/questions/37820666/how-can-i-send-data-from-swift-to-javascript-and-display-them-in-my-web-view
 
+// Dealing with available and Swift and SwiftUI.  Ugh.
+// https://www.swiftyplace.com/blog/swift-available#:~:text=Conditional%20Handling%20with%20if%20%23available&text=If%20the%20device%20is%20running%20an%20earlier%20version%20of%20iOS,a%20fallback%20for%20earlier%20versions.
+
+
 func fileModificationDate(url: URL) -> Date? {
     do {
         let attr = try FileManager.default.attributesOfItem(atPath: url.path)
@@ -102,9 +111,6 @@ struct File: Identifiable, Hashable, Equatable, Comparable
     var modStamp: Date?
     var loadStamp: Date?
     
-    // alter the list color based on comparison
-    var colorizeBackground = false
-    
     init(url: URL) {
         self.url = url
         self.modStamp = fileModificationDate(url:url)
@@ -279,6 +285,18 @@ extension String {
 }
 
     
+extension View {
+    public func possiblySearchable<S>(text: Binding<String>, isPresented: Binding<Bool>, placement: SearchFieldPlacement = .automatic, prompt: S) -> some View where S : StringProtocol {
+        if #available(macOS 14.0, *) {
+            return self.searchable(text: text, isPresented: isPresented, placement:
+                        .sidebar, prompt: prompt)
+        }
+        else {
+            return self
+        }
+    }
+}
+
 // What if the start time in the file isn't 0.0 based for the start
 struct TimeRange {
     var timeStart: Double = 0.0
@@ -847,24 +865,6 @@ struct kram_profileApp: App {
                     // load first file in the list
                     selection = files[0].id
                 }
-                
-                // update the colorize setting
-                // this is to avoid dividers and give context about per dir sorting
-                files[0].colorizeBackground = false
-                updateFileCache(file:files[0])
-                
-                for i in 1..<files.count {
-                    let file = files[i]
-                    
-                    var colorizeBackground = false
-                    if file.url.path != files[i-1].url.path {
-                        colorizeBackground = true
-                    }
-                    // change the struct and update the cache
-                    files[i].colorizeBackground = colorizeBackground
-                    updateFileCache(file:files[i])
-                }
-                
             }
         }
     }
@@ -1008,7 +1008,6 @@ A tool to help profile mem, perf, and builds.
     @State private var searchText: String = ""
     @State private var searchIsActive = false
     var searchResults: [File] {
-        //print(searchText)
         if searchText.isEmpty {
             return files
         }
@@ -1032,13 +1031,52 @@ A tool to help profile mem, perf, and builds.
         }
     }
 
+    /* These are also macOS 14, using Extension form
+    
+    @ViewBuilder func MyNavigationSplitView() -> some View {
+        if #available(macOS 14.0, *) {
+            NavigationSplitView()
+            .searchable(text: $searchText, isPresented: $searchIsActive, placement:
+                    .sidebar, prompt: "Filter")
+            
+            .onKeyPress(.upArrow, action: {
+                // don't change focus, just advance the selection
+                if focusedField == .webView {
+                    //$selection = selectFile(selection, false)
+                    return .handled
+                }
+                return .ignored
+            })
+            .onKeyPress(.downArrow, action: {
+                if focusedField == .webView {
+                    //$selection = selectFile(selection, false)
+                    return .handled
+                }
+                return .ignored
+            })
+            .onKeyPress(.delete, action: {
+                // block this so WKWebView doesn't page back
+                if focusedField == .webView {
+                    // Unlcar if this should be 0 or 1
+                    myWebView.go(to: myWebView.backForwardList.item(at:0)!)
+                }
+                return .handled
+            })
+            
+        }
+        else {
+            NavigationSplitView()
+        }
+    }
+    */
+    
     var body: some Scene {
         
         // WindowGroup brings up old windows which isn't really what I want
     
-        Window("Main", id: "main"){
+        Window("Main", id: "main") {
         //WindowGroup {
-            NavigationSplitView {
+            NavigationSplitView() {
                 VStack {
                     List(searchResults, selection:$selection) { file in
                         // compare to previous file (use active sort comparator)
@@ -1063,10 +1101,9 @@ A tool to help profile mem, perf, and builds.
                                 .font(durationFont)
                             
                         }
-                        // This messes up color highlighting, and is all black
-                        //.listRowBackground(file.colorizeBackground ? Color.pink : Color.black)
+                        .listRowSeparatorTint(.red)
+                        
                     }
-                    
                     //.focused($focusedField, equals: .listView)
                 }
                 //.focusable(false)
@@ -1076,46 +1113,9 @@ A tool to help profile mem, perf, and builds.
                 //.focusable()
                 //.focused($focusedField, equals: .webView)
             }
-            // This searchText seems to ignore focus tab
-            .searchable(text: $searchText, isPresented: $searchIsActive, placement: .sidebar, prompt: "Filter")
-            
-            // Need to do this, but don't know how.
-            // Other than to bump the version.  This is also more @ViewBuilder
-            // extension function syntax.
-            
-            /* These don't really work anyways
-            //if #available(macOS 14.0, *) {
-                .onKeyPress(.upArrow, action: {
-                    // don't change focus, just advance the selection
-                    if focusedField == .webView {
-                        //$selection = selectFile(selection, false)
-                        return .handled
-                    }
-                    return .ignored
-                })
-                .onKeyPress(.downArrow, action: {
-                    if focusedField == .webView {
-                        //$selection = selectFile(selection, false)
-                        return .handled
-                    }
-                    return .ignored
-                })
-                .onKeyPress(.delete, action: {
-                    // block this so WKWebView doesn't page back
-                    if focusedField == .webView {
-                        // Unlcar if this should be 0 or 1
-                        myWebView.go(to: myWebView.backForwardList.item(at:0)!)
-                    }
-                    return .handled
-                })
-//            }
-//            else {
-//                self
-//            }
-//
-             */
+            .possiblySearchable(text: $searchText, isPresented: $searchIsActive, placement: .sidebar, prompt: "Filter")
             
-            .onChange(of: selection) { newState in
+            .onChange(of: selection /*, initial: true*/) { newState in
                 openFileSelection(myWebView)
                 focusedField = .webView
             }

From 77d4e9eee17e2fabb5b3a3bc8bbe789a3c437d7e Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 2 Mar 2024 20:41:55 -0800
Subject: [PATCH 610/901] kram-profile - fixup.modStamp is gone since directory
 went away

---
 build2/kram.xcodeproj/project.pbxproj | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index a07ee7ce..2d67b108 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -1969,7 +1969,7 @@
 				GCC_NO_COMMON_BLOCKS = YES;
 				GCC_OPTIMIZATION_LEVEL = 0;
 				GCC_PRECOMPILE_PREFIX_HEADER = YES;
-				GCC_PREFIX_HEADER = "$(PROJECT_DIR)/../libkram/kram/KramPrefix.pch";
+				GCC_PREFIX_HEADER = "$(PROJECT_DIR)/../libkram/kram/KramPrefix.h";
 				GCC_PREPROCESSOR_DEFINITIONS = (
 					"DEBUG=1",
 					"$(inherited)",
@@ -2063,7 +2063,7 @@
 				GCC_INCREASE_PRECOMPILED_HEADER_SHARING = YES;
 				GCC_NO_COMMON_BLOCKS = YES;
 				GCC_PRECOMPILE_PREFIX_HEADER = YES;
-				GCC_PREFIX_HEADER = "$(PROJECT_DIR)/../libkram/kram/KramPrefix.pch";
+				GCC_PREFIX_HEADER = "$(PROJECT_DIR)/../libkram/kram/KramPrefix.h";
 				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
 				"GCC_WARN_64_TO_32_BIT_CONVERSION[arch=*64]" = NO;
 				GCC_WARN_ABOUT_MISSING_NEWLINE = YES;

From 22f9838ece91edc4611fa43a51d5119edf851cda Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 2 Mar 2024 20:43:08 -0800
Subject: [PATCH 611/901] kram and kram-profile - fix modStamp when file goes
 away, add time-trace to GLTF

---
 gtlf/GLTF/GLTF.xcodeproj/project.pbxproj        | 6 +++++-
 gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj  | 6 +++++-
 kram-profile/kram-profile/kram_profileApp.swift | 9 +++++++--
 3 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj b/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj
index 4173bfc9..91e2481b 100644
--- a/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj
+++ b/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj
@@ -407,6 +407,7 @@
 				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				MTL_ENABLE_DEBUG_INFO = YES;
 				ONLY_ACTIVE_ARCH = YES;
+				OTHER_CFLAGS = "-ftime-trace";
 				SDKROOT = macosx;
 				VERSIONING_SYSTEM = "apple-generic";
 				VERSION_INFO_PREFIX = "";
@@ -466,7 +467,10 @@
 				IPHONEOS_DEPLOYMENT_TARGET = 16.0;
 				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				MTL_ENABLE_DEBUG_INFO = NO;
-				OTHER_CFLAGS = "-DNDEBUG=1";
+				OTHER_CFLAGS = (
+					"-ftime-trace",
+					"-DNDEBUG=1",
+				);
 				SDKROOT = macosx;
 				VERSIONING_SYSTEM = "apple-generic";
 				VERSION_INFO_PREFIX = "";
diff --git a/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj b/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj
index d32a1e09..78dc7491 100644
--- a/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj
+++ b/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj
@@ -267,6 +267,7 @@
 				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				MTL_ENABLE_DEBUG_INFO = YES;
 				ONLY_ACTIVE_ARCH = YES;
+				OTHER_CFLAGS = "-ftime-trace";
 				SDKROOT = macosx;
 				SUPPORTED_PLATFORMS = "macosx iphoneos";
 				VALID_ARCHS = "i386 x86_64 armv7s armv7 arm64";
@@ -328,7 +329,10 @@
 				IPHONEOS_DEPLOYMENT_TARGET = 16.0;
 				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				MTL_ENABLE_DEBUG_INFO = NO;
-				OTHER_CFLAGS = "-DNDEBUG=1";
+				OTHER_CFLAGS = (
+					"-DNDEBUG=1",
+					"-ftime-trace",
+				);
 				SDKROOT = macosx;
 				SUPPORTED_PLATFORMS = "macosx iphoneos";
 				VALID_ARCHS = "i386 x86_64 armv7s armv7 arm64";
diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 19e56b66..ecbc77cb 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -43,8 +43,12 @@ import UniformTypeIdentifiers
 //   instead of the entire file.
 // TODO: can't type ahead search in the list while the webview is loading (f.e. e will advance)
 //    but arrow keys work to move to next
+// TODO: if list hidden, then can't advance
 // TODO: can't overide "delete" key doing a back in the WKWebView history
 //    Perfetto warns that content will be lost
+// TODO: track duration would be useful (esp. for memory traces)
+//    Would have to modify the thread_name, and process the tid and timings
+//    Better if Perfetto could display this
 // TODO: list view type ahead search doesn't work unless name is the first Text entry
 // TODO track when files change or get deleted, update the list item then
 //   can disable list items that are deleted in case they return (can still pick if current)
@@ -171,7 +175,8 @@ func lookupFile(url: URL) -> File {
     // This preseves the duration previously parsed and stored
     
     if let fileOld = fileCache[file.url] {
-        if fileOld.modStamp! == file.modStamp! {
+        if file.modStamp == nil || // means file and/or dir went away, so return fileOld
+            file.modStamp! == fileOld.modStamp! {
             return fileOld
         }
     }
@@ -450,7 +455,7 @@ func loadFileJS(_ path: String) -> String? {
     // Note may need to modify directly
     var file = lookupFile(url: fileURL)
     
-    print(path)
+    // print(path)
     
     
     // https://stackoverflow.com/questions/62035494/how-to-call-postmessage-in-wkwebview-to-js

From d8e78e2fdebbbf56381c6fe5dd955f1bbe6de81e Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 2 Mar 2024 21:34:18 -0800
Subject: [PATCH 612/901] kram - optimize build by fixing pch

---
 .../AppIcon.appiconset/Icon-32 1.png           | Bin 1498 -> 129 bytes
 kramv/KramViewerMain.mm                        |   4 ++--
 libkram/kram/KramConfig.h                      |  13 +++++++++++++
 libkram/kram/KramPrefix.pch                    |   9 ---------
 4 files changed, 15 insertions(+), 11 deletions(-)
 delete mode 100644 libkram/kram/KramPrefix.pch

diff --git a/kramv/Assets.xcassets/AppIcon.appiconset/Icon-32 1.png b/kramv/Assets.xcassets/AppIcon.appiconset/Icon-32 1.png
index 67c03303cfbd120e25440e5874b79f97ecb7b6b3..20407b3deb8398a1a4d52a63588c015e54214411 100644
GIT binary patch
literal 129
zcmWN`Ne;sx3_#I6r{Dq&Ll~0W25eK1+7i_yi=Mu$zV!Z0UvB*)>lh~=N<G_nyb3R~
z^~wwMx8jpP-A43=l54U)9KKi!4q?}z%}T~4!n_)?9=zK$LP4M0o+HL&gN_B${CT?0
OC1@+S{YV<OZ{iQ-wkT!*

literal 1498
zcmV<01tt24P)<h;3K|Lk000e1NJLTq001BW001Be1^@s6b9#F800009a7bBm000XU
z000XU0RWnu7ytkO0drDELIAGL9O(c600d`2O+f$vv5yP<VFdsH1#d}2K~#7F?N{AT
zTtyT=bMNj_VAo|!{RlDDwnQY`Y5{GCNDFVq#6&SMO%oD*=wIN2|AqfTc`!a$qX|mV
zm{h4rMQE`=par@TBbFG$$8Pt7-Mx2aJo9z$?A}Fv^QC8*+`F@L&iTzbzd3UNcq0F^
zkR#@u$q(n=rPP~n-L`YMDlH~C=RgR7|0=ltcg|UXIL=p}T^pVfQ~wHJa`M(G?zZ7#
zauJo$p(7V@Wf6!IsxT&gmv|*G1yC-tKo}c*g%a<*&##ScHSDy>zr)?c*Z8~$sHogB
zVEWysHR+U5Pdg{t3B~UeRXhnoI1TmHcYJ$=FlP!U+#>;8p8Vk>H1MiY;L@;s$H7<~
z9LH5gi4z(QwyWmFTt~a7^)(nUPcnMt%FPMeXUuvhoU#c96ilivy}1p^PA+%rI1W%s
zp<0c>&0Eg`asyhV6uOVsVCei`u>GhV9&8+!=^zrYN~F*H_Yv5a7(Lz$z#(odrGiTI
zA3?`NIuB5y9?`}^QYx*}FBT!&pO@czPgfCm8P+$Gay<uSFXkrBj*pLvQ2LPwpv99I
z?0}hyaE-eZP8s(A^C{3^s+oND_Z^_q3fR1UskZ>75{Ex_lA5H#8hL7VcGiicU}$Ja
zEYTmt8jY@%Z{=J_A_4H);2xalt~IdVNVdNKg~u)wOYJIZO&baTEG{m>nLeM`JCKZL
zya9k5%m;>f?P#a)#_+y;ia^+F%n7MfJd8pSd|#w8rjrpm2>^_Zjd6!KoXI#*G~gmB
z98}`L&IK26iPTT0eHa-&z?{+sju;R^_V<1GX}$x&b)zknTHX|<Koj#q7^Os!3v{S>
z5DB{ip21vS8rg+-j0L~jBGx|4)!_Ee8QggnT4Cx)>rXbD<q-f99E9gu7mvW8kyO%!
zbJ;^VCx(m)K5Y>9*Xyvd+KqF^bUk2UWbl1$Z4Fuh5S4(7{6>^4F^+6d28>yI^7#a$
zQZAkrD$^->A$5IOReO7TNlO4~p(>-c<5td^#gI=(G70edU|yz3&FK-8XoyAtNodNY
ztm{U}-kAYlJ2TFuF^h~{T>xjZC0(w?JwZX08V<Q!F3JHUVGw%71qMfna3l*7rekBX
z0_8HaJPD^>pwRzvO|r|_1x3PW1Oz;ZQih4^(YWho5cifU@XJyX<`z04&JCdcz8Yk@
zDl!TnFEs|WTCF7qP>&N8GGC(&0W{0}LK$*f8HmM>K|W7lX(isU-qsl%VbIwLI;yO7
z3P2`^DZXM6VtBXP65jnBZhTjU-?lTdvYB*k=3Ll(=td$XBp&B*;`n}0hV2wIR6@m6
z=?4FZR%qOPvFO46K?gXlxRiW#A&Jt9<KD81<YVy6(;<jjjl$i<3OxEVBX3fXik5Xi
zkdQK3Gz|!VxppBE2{}O1gqhn@-JA<|V@OI8fc<8s1UtgH+?U`0-9D-gaIapzFODG7
zEtVLf2~n4l8dMDGs}_t?-H!ReD3vJucpKo?trASnRImZbSihq^Rxb_=4ET{0ps>Tk
zy&LgF#|LIZ(=%arq*ANy{zSapx>WUMxIQiTImk1Zzmvo?sTNIjs%oiMeLQo0`Z{aU
z4y5z#cc#8~+{6XrHSBmy@nJ#WRI3HDyb{w#$l~SdZ(&y~Y*1QXUzq#&!tU;Fu~iC$
zRQB?#R~fB82#QD3GgRS0XblI<Zh}S>!V>NIl{>o!%a_r4*>>3KzYLO2r@PJ%esZ;~
zE&UcA$&BkHo|C5(8&+Zf5Yw)i6trlEfEDXrX`@;#e7m~*=@$hQ9m0PEU_|gEuAhmc
z)}~S^WP8zmC)$a4)_Zz-{GFYhI=+X5dLowm4QJY%2h|qKO#lD@07*qoM6N<$f`<OC
A4*&oF

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 9e9f02c2..9e1467f7 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -419,7 +419,7 @@ - (void)application:(NSApplication *)sender
 }
 
 // this isn't filtered by the document types specified, NSDocumentController?
-// added public.folder instead, this would nedd to call readFromURL
+// added public.folder instead, this would need to call readFromURL
 - (IBAction)openDocument:(id)sender
 {
     // need to implement, or default NSOpenPanel can't specify a directory
@@ -494,7 +494,7 @@ - (IBAction)showAboutDialog:(id)sender
 
     // want to embed the git tag here
     options[@"Copyright"] =
-        [NSString stringWithUTF8String:"kram ©2020-2023 by Alec Miller"];
+        [NSString stringWithUTF8String:"kram ©2020-2024 by Alec Miller"];
 
     // add a link to kram website, skip the Visit text
     NSMutableAttributedString* str = [[NSMutableAttributedString alloc]
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index a9525bbf..7432e61f 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -275,6 +275,19 @@ import std.regex;
 #include <unordered_map>
 #include <vector>
 
+#include <mutex>
+#include <condition_variable>
+#include <thread>
+#include <chrono>
+#include <random>
+
+#include <cstdlib>
+//#include <exception>
+#include <type_traits>
+#include <typeinfo>
+#include <utility>
+#include <filesystem>
+#include <variant>
 
 #endif
 
diff --git a/libkram/kram/KramPrefix.pch b/libkram/kram/KramPrefix.pch
deleted file mode 100644
index 3df9e61d..00000000
--- a/libkram/kram/KramPrefix.pch
+++ /dev/null
@@ -1,9 +0,0 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
-// The license and copyright notice shall be included
-// in all copies or substantial portions of the Software.
-
-#pragma once
-
-#include "KramConfig.h"
-
-//#include "KramLib.h"

From b268c95c72003d86d1d4eaa7b2a83d8fdb9883eb Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 2 Mar 2024 22:03:14 -0800
Subject: [PATCH 613/901] kram-profile - added separators doing slow O(n^2)
 update

---
 .../kram-profile/kram_profileApp.swift        | 36 +++++++++++++------
 1 file changed, 26 insertions(+), 10 deletions(-)

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index ecbc77cb..0f5103b3 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -1075,6 +1075,24 @@ A tool to help profile mem, perf, and builds.
     }
     */
     
+    // TODO: do this when building the searchResults
+    // can do O(N) then and mark which items need separator
+    func isSeparatorVisible(_ file: File, _ searchResults: [File]) -> Bool {
+        
+        // TODO: faster way to find index of file
+        // probably worth caching up these
+        for i in 0..<searchResults.count-1 {
+            if file == searchResults[i] {
+                let nextFile = searchResults[i+1]
+                if file.url.deletingLastPathComponent().path != nextFile.url.deletingLastPathComponent().path {
+                    return true
+                }
+            }
+        }
+        
+        return false
+    }
+    
     var body: some Scene {
         
         // WindowGroup brings up old windows which isn't really what I want
@@ -1084,21 +1102,18 @@ A tool to help profile mem, perf, and builds.
             NavigationSplitView() {
                 VStack {
                     List(searchResults, selection:$selection) { file in
-                        // compare to previous file (use active sort comparator)
-                        // if it differs, then toggle the button bg colors
-//                        if lastUrl && url.path != lastUrl!.path {
-//                            files.append(Divider())
-//                        }
-                        
                         HStack() {
                             // If number is first, then that's all SwiftUI
-                            // uses for typeahead search.
+                            // uses for typeahead list search.  Seems to
+                            // be no control over that.
+                            
+                            // name gets truncated too soon if it's first
+                            // and try to align the text with trailing.
                             
-                            // name gets truncated too soo if it's first
-                            // and try to align the text with trailing
                             Text(generateName(file: file))
                                 .help(file.shortDirectory)
                                 .truncationMode(.tail)
+                                
                             
                             Text(generateDuration(file: file))
                                 .frame(maxWidth: 70)
@@ -1106,7 +1121,8 @@ A tool to help profile mem, perf, and builds.
                                 .font(durationFont)
                             
                         }
-                        .listRowSeparatorTint(.red)
+                        .listRowSeparator(isSeparatorVisible(file, searchResults) ? .visible : .hidden)
+                        .listRowSeparatorTint(.white)
                         
                     }
                     //.focused($focusedField, equals: .listView)

From a2eb15c4a3af26f0bb21437907ad5d75925efcd4 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 2 Mar 2024 22:51:09 -0800
Subject: [PATCH 614/901] kram-profile - add Log wrapper

Since os_log is so terrible.  Use this logger wrapper.
---
 .../kram-profile.xcodeproj/project.pbxproj    |   4 +
 kram-profile/kram-profile/Log.swift           | 264 ++++++++++++++++++
 .../kram-profile/kram_profileApp.swift        |  50 +++-
 3 files changed, 309 insertions(+), 9 deletions(-)
 create mode 100644 kram-profile/kram-profile/Log.swift

diff --git a/kram-profile/kram-profile.xcodeproj/project.pbxproj b/kram-profile/kram-profile.xcodeproj/project.pbxproj
index 75f4149d..8ebed5e3 100644
--- a/kram-profile/kram-profile.xcodeproj/project.pbxproj
+++ b/kram-profile/kram-profile.xcodeproj/project.pbxproj
@@ -15,6 +15,7 @@
 		705F68E32B87EB8000437FAA /* AnyEncodable.swift in Sources */ = {isa = PBXBuildFile; fileRef = 705F68E02B87EB8000437FAA /* AnyEncodable.swift */; };
 		705F68E52B89907700437FAA /* README.md in Resources */ = {isa = PBXBuildFile; fileRef = 705F68E42B89907700437FAA /* README.md */; };
 		705F68E72B8BEB7100437FAA /* DataCompression.swift in Sources */ = {isa = PBXBuildFile; fileRef = 705F68E62B8BEB7000437FAA /* DataCompression.swift */; };
+		705F68E92B9451CC00437FAA /* Log.swift in Sources */ = {isa = PBXBuildFile; fileRef = 705F68E82B9451CC00437FAA /* Log.swift */; };
 /* End PBXBuildFile section */
 
 /* Begin PBXFileReference section */
@@ -29,6 +30,7 @@
 		705F68E02B87EB8000437FAA /* AnyEncodable.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = AnyEncodable.swift; sourceTree = "<group>"; };
 		705F68E42B89907700437FAA /* README.md */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = SOURCE_ROOT; };
 		705F68E62B8BEB7000437FAA /* DataCompression.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = DataCompression.swift; sourceTree = "<group>"; };
+		705F68E82B9451CC00437FAA /* Log.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Log.swift; sourceTree = "<group>"; };
 /* End PBXFileReference section */
 
 /* Begin PBXFrameworksBuildPhase section */
@@ -68,6 +70,7 @@
 				705F68DE2B87EB8000437FAA /* AnyDecodable.swift */,
 				705F68E02B87EB8000437FAA /* AnyEncodable.swift */,
 				705F68E62B8BEB7000437FAA /* DataCompression.swift */,
+				705F68E82B9451CC00437FAA /* Log.swift */,
 				705F68D52B820AD200437FAA /* kram_profile.entitlements */,
 				705F68E42B89907700437FAA /* README.md */,
 				705F68D22B820AD200437FAA /* Preview Content */,
@@ -156,6 +159,7 @@
 			files = (
 				705F68E12B87EB8000437FAA /* AnyDecodable.swift in Sources */,
 				705F68E32B87EB8000437FAA /* AnyEncodable.swift in Sources */,
+				705F68E92B9451CC00437FAA /* Log.swift in Sources */,
 				705F68E22B87EB8000437FAA /* AnyCodable.swift in Sources */,
 				705F68CD2B820AD100437FAA /* kram_profileApp.swift in Sources */,
 				705F68E72B8BEB7100437FAA /* DataCompression.swift in Sources */,
diff --git a/kram-profile/kram-profile/Log.swift b/kram-profile/kram-profile/Log.swift
new file mode 100644
index 00000000..e84f3e59
--- /dev/null
+++ b/kram-profile/kram-profile/Log.swift
@@ -0,0 +1,264 @@
+/*
+See LICENSE folder for this sample’s licensing information.
+
+Abstract:
+Consolidates use of os_log to improve log message output.
+*/
+
+import Foundation
+
+import os.log
+import Darwin
+
+/*
+ Can strip away logs or add adititional data to messages more easily
+
+ To use, in the Swift source file specify the following variable:
+   private let log = Log() -> Log(#file) <- paths are stripped
+ or
+   private let log = Log("File/Groupname")
+
+   log.debug("debug text")
+   log.info("info text")
+   log.error("error text")
+
+ A few boolean flags control the output:
+   configure(prints, timestamps, stacktraces)
+
+ For more expensive functions, use the isInfoEnabled() call to skip blocks.
+   if log.isInfoEnabled() {
+      log.info("ComputeWorldPopulation:", countPeopleInAllCountries())
+   }
+
+ Output:
+ prints = true (via print)
+   14:40:21.185 D[GameSceneViewController] debug text
+   14:40:21.201 I[GameSceneViewController] info text
+   14:40:21.321 E[GameSceneViewController] error text
+     at GameSceneViewController:75@init(_:file:)
+     on thread:queue
+
+ or
+   0.001s D[GameSceneViewController] debug text
+   0.002s I[GameSceneViewController] info text
+   0.003s E[GameSceneViewController] error text
+     at GameSceneViewController:75@init(_:file:)
+     on thread:queue
+
+ prints = false (via os_log)
+   2018-04-11 14:59:07.122127-0700 SwiftShot[581:21310] [GameSceneViewController] debug text
+   2018-04-11 14:59:07.122166-0700 SwiftShot[581:21310] [GameSceneViewController] info text
+   2018-04-11 14:59:07.122186-0700 SwiftShot[581:21310] [GameSceneViewController] error text
+*/
+
+class Log {
+    // verbose: Whether os_log or print is used to report logs.
+    static var prints = true
+    // stacktrace: Whether stack trace is logged on errors.
+    static var stacktraces = false
+    // timestamp: Show timestamps on all entries when printing statements.
+    static var timestamps = false
+    // absoluteTimestamps: Show relative or absolute timestampes.
+    static var absoluteTimestamps = true
+    
+    // Custom logging group - usually based on source filename.
+    // This has a very limited output, but does go to console
+    private var log: OSLog
+    
+    // Cache the filename for reporting it on errors.
+    private var file: String
+    // All logs go to this category for filtering.
+    private var category: String
+    // This can be filtered from command line arguments.
+    private static var subsystem = Bundle.main.bundleIdentifier!
+    
+    // Store data for timestamps.
+    private static var timestampToSeconds: Double = 0
+    private static var timestampStart = timestamp()
+    private static var timestampStartDate = Date()
+    private static var timestampFormatter = initTimestampFormatter()
+    
+    init(_ category: String = #file, file: String = #file) {
+        // Strip the path, but allow hierachical category f.e. "Group/Subgroup" wihtout .swift.
+        self.category = category
+        if category.hasSuffix(".swift") {
+            self.category = Log.stripFilePathAndExtension(category)
+        }
+        
+        // Compute once for use in logs.
+        self.file = Log.stripFilePathAndExtension(file)
+        
+        self.log = OSLog(subsystem: Log.subsystem, category: self.category)
+    }
+    
+    // Test whether messages are logged for the given levels
+    func isWarnEnabled() -> Bool {
+        return log.isEnabled(type: .default)
+    }
+    func isInfoEnabled() -> Bool {
+        return log.isEnabled(type: .info)
+    }
+    func isDebugEnabled() -> Bool {
+        #if DEBUG
+        return log.isEnabled(type: .debug)
+        #else
+        return false
+        #endif
+    }
+    
+    func fault(_ message: String) {
+        let text = formatMessage(message, .fault)
+        if Log.prints {
+            print(text)
+        } else {
+            os_log("%@", log: log, type: .fault, text)
+        }
+    }
+    
+    func error(_ message: String, _ function: String = #function, _ line: Int = #line) {
+        let text = formatMessage(message, .error, function, line)
+        if Log.prints {
+            print(text)
+        } else {
+            os_log("%@", log: log, type: .error, text)
+        }
+    }
+    
+    // os_log left out warnings, so reuse default type for that
+    func warn(_ message: String) {
+        let text = formatMessage(message, .default)
+        if Log.prints {
+            print(text)
+        } else {
+            os_log("%@", log: log, type: .default, text)
+        }
+    }
+    
+    func info(_ message: String) {
+        let text = formatMessage(message, .info)
+        if Log.prints {
+            print(text)
+        } else {
+            os_log("%@", log: log, type: .info, text)
+        }
+    }
+    
+    func debug(_ message: String) {
+        // debug logs are stripped from release builds
+        #if DEBUG
+        let text = formatMessage(message, .debug)
+        if Log.prints {
+            print(text)
+        } else {
+            os_log("%@", log: log, type: .debug, text)
+        }
+        #endif
+    }
+    
+    // Customize this printing as desired.
+    private func formatMessage(_ message: String, _ type: OSLogType, _ function: String = "", _ line: Int = 0) -> String {
+        var text = ""
+        
+        if Log.prints {
+            let timestamp = Log.formatTimestamp()
+            
+            // These messages never go out to the system console, just the debugger.
+            switch type {
+            case .debug:
+                text += "\(timestamp)D[\(category)] \(message)"
+            case .info:
+                text += "\(timestamp)I[\(category)] \(message)"
+            case .default: // not a keyword
+                text += "\(timestamp)W[\(category)] \(message)"
+            case .error:
+                text += "\(timestamp)E[\(category)] \(message)\n"
+                text += Log.formatLocation(file, line, function)
+            case .fault:
+                text += "\(timestamp)F[\(category)] \(message)\n"
+                text += Log.formatLocation(file, line, function)
+            default:
+                text += message
+            }
+        } else {
+            // Consider reporting the data above to os_log.
+            // os_log reports data, time, app, threadId and message to stderr.
+            text += message
+        }
+        
+        if Log.stacktraces && (type == .error || type == .fault) {
+            text += "\n"
+            
+            // Improve this - these are mangled symbols without file/line of where
+            Thread.callStackSymbols.forEach { text += $0 + "\n" }
+        }
+        
+        return text
+    }
+    
+    // location support
+    private static func formatLocation(_ file: String, _ line: Int, _ function: String) -> String {
+        var text = ""
+        let threadName = Thread.current.name ?? ""
+        var queueName = OperationQueue.current?.name ?? ""
+        if !queueName.isEmpty {
+            queueName = ":" + queueName
+        }
+        
+        text += " at \(file):\(line)@\(function)\n"
+        text += " on \(threadName)\(queueName)"
+        return text
+    }
+    
+    private static func stripFilePathAndExtension(_ path: String) -> String {
+        let str = path as NSString
+        return (str.deletingPathExtension as NSString).lastPathComponent
+    }
+    
+    // timestamp support
+    private static func initTimestampFormatter() -> DateFormatter {
+        let formatter = DateFormatter()
+        formatter.locale = Locale.current
+        formatter.setLocalizedDateFormatFromTemplate("HH:mm:ss.SSS") // ms resolution
+        return formatter
+    }
+    
+    private static func timeFromStart() -> Double {
+        return abs(Log.timestamp() - Log.timestampStart)
+    }
+    
+    private static func timeAbsolute() -> String {
+        let timestamp = Log.timeFromStart()
+        let date = Date(timeInterval: timestamp, since: Log.timestampStartDate)
+        return timestampFormatter.string(from: date)
+    }
+    
+    private static func formatTimestamp() -> String {
+        var timestamp = ""
+        if Log.timestamps {
+            if Log.absoluteTimestamps {
+                timestamp = Log.timeAbsolute() + " "
+            } else {
+                timestamp = String(format: "%.3fs ", Log.timeFromStart())
+            }
+        }
+        return timestamp
+    }
+    
+    // need timestamps in other parts of the app
+    static func timestamp() -> Double {
+        if Log.timestampToSeconds == 0 {
+            // Cache the conversion.  Note that clock rate can change with cpu throttling.
+            // These are high-resolution timestamps taken from the system timer.
+            var info = mach_timebase_info(numer: 0, denom: 0)
+            mach_timebase_info(&info)
+            let numer = Double(info.numer)
+            let denom = Double(info.denom)
+            Log.timestampToSeconds = 1e-9 * (numer / denom) // inverse so we can multiply
+        }
+        
+        let timestamp = Double(mach_absolute_time())
+        let time = timestamp * Log.timestampToSeconds
+        return time
+    }
+}
+
diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 0f5103b3..766fffdf 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -14,7 +14,7 @@ import UniformTypeIdentifiers
 // This is really just a wrapper to turn WKWebView into something SwiftUI
 // can interop with.  SwiftUI has not browser widget.
 
-// TODO: add bg list color depending on sort
+// DONE: add bg list color depending on sort
 // TODO: add sort mode for name, time and incorporating dir or not
 // TODO: fix the js wait, even with listener, there's still a race
 //    maybe there's some ServiceWorker still loading the previous json?
@@ -22,6 +22,7 @@ import UniformTypeIdentifiers
 // TODO: still getting race condition.  Perfetto is trying to
 //  load the previous file, and we’re sending a new one.
 // TODO: update recent document list
+// TODO: have a way to reload dropped folder (not just subfiles)
 // TODO: nav title and list item text is set before duration is computed
 //  need some way to update that.
 // TODO: support WindowGroup and multiwindow, each needs own webView, problem
@@ -75,6 +76,8 @@ import UniformTypeIdentifiers
 // Dealing with available and Swift and SwiftUI.  Ugh.
 // https://www.swiftyplace.com/blog/swift-available#:~:text=Conditional%20Handling%20with%20if%20%23available&text=If%20the%20device%20is%20running%20an%20earlier%20version%20of%20iOS,a%20fallback%20for%20earlier%20versions.
 
+private let log = Log("kram-profile")
+
 
 func fileModificationDate(url: URL) -> Date? {
     do {
@@ -291,7 +294,7 @@ extension String {
 
     
 extension View {
-    public func possiblySearchable<S>(text: Binding<String>, isPresented: Binding<Bool>, placement: SearchFieldPlacement = .automatic, prompt: S) -> some View where S : StringProtocol {
+    public func searchableOptional<S>(text: Binding<String>, isPresented: Binding<Bool>, placement: SearchFieldPlacement = .automatic, prompt: S) -> some View where S : StringProtocol {
         if #available(macOS 14.0, *) {
             return self.searchable(text: text, isPresented: isPresented, placement:
                         .sidebar, prompt: prompt)
@@ -300,6 +303,18 @@ extension View {
             return self
         }
     }
+    
+    /*
+    // This one is hard to wrap, since KeyPress.result is macOS 14.0 only
+    public func onKeyPressOptional(_ key: KeyEquivalent, action: @escaping () -> KeyPress.Result) -> some View {
+        if #available(macOS 14.0, *) {
+            return onKeyPress(.upArrow, action: action)
+        }
+        else {
+            return self
+        }
+    }
+    */
 }
 
 // What if the start time in the file isn't 0.0 based for the start
@@ -420,7 +435,7 @@ func showTimeRangeJS(_ timeRange: TimeRange) -> String? {
         return script
     }
     catch {
-        print(error)
+        log.error(error.localizedDescription)
         return nil
     }
 }
@@ -455,8 +470,7 @@ func loadFileJS(_ path: String) -> String? {
     // Note may need to modify directly
     var file = lookupFile(url: fileURL)
     
-    // print(path)
-    
+    log.debug(path)
     
     // https://stackoverflow.com/questions/62035494/how-to-call-postmessage-in-wkwebview-to-js
     struct PerfettoFile: Codable {
@@ -711,7 +725,7 @@ func loadFileJS(_ path: String) -> String? {
         
         return script
     } catch {
-      print(error)
+        log.error(error.localizedDescription)
         return nil
     }
 }
@@ -739,7 +753,7 @@ struct kram_profileApp: App {
     func runJavascript(_ webView: WKWebView, _ script: String) {
         webView.evaluateJavaScript(script) { (result, error) in
             if error != nil {
-                print("problem running script")
+                log.error("problem running script")
             }
         }
     }
@@ -849,7 +863,7 @@ struct kram_profileApp: App {
                 // which is the full url
                 files.sort()
                 
-                print("found \(files.count) files")
+                log.debug("found \(files.count) files")
                 
                 // preserve the original selection if still present
                 if selection != nil {
@@ -1134,7 +1148,25 @@ A tool to help profile mem, perf, and builds.
                 //.focusable()
                 //.focused($focusedField, equals: .webView)
             }
-            .possiblySearchable(text: $searchText, isPresented: $searchIsActive, placement: .sidebar, prompt: "Filter")
+            .searchableOptional(text: $searchText, isPresented: $searchIsActive, placement: .sidebar, prompt: "Filter")
+            
+            /* this wrapper doesn't work
+            // need these macOS 14 calls to advance the list when list is closed
+            .onKeyPressOptional(.upArrow, action: {
+                if focusedField == .webView {
+                    //$selection = selectFile(selection, false)
+                    return .handled
+                }
+                return .ignored
+            })
+            .onKeyPressOptional(.upArrow, action: {
+                if focusedField == .webView {
+                    //$selection = selectFile(selection, false)
+                    return .handled
+                }
+                return .ignored
+            })
+            */
             
             .onChange(of: selection /*, initial: true*/) { newState in
                 openFileSelection(myWebView)

From 4ba86f231a1a78ec53ed9075b2bf591d0bcad417 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 3 Mar 2024 17:12:24 -0800
Subject: [PATCH 615/901] kram-profile - fix keyboard handling, add next file,
 fix disabled states

Added keycodes.  Use log instead of print.
Remove many attempts to try to keep up/down arrow keys working.  These only work when listView focused.
---
 .../kram-profile.xcodeproj/project.pbxproj    |   4 +
 kram-profile/kram-profile/Keycode.swift       | 134 +++++++++++
 kram-profile/kram-profile/Log.swift           |   9 +-
 .../kram-profile/kram_profileApp.swift        | 212 +++++++++++-------
 4 files changed, 270 insertions(+), 89 deletions(-)
 create mode 100644 kram-profile/kram-profile/Keycode.swift

diff --git a/kram-profile/kram-profile.xcodeproj/project.pbxproj b/kram-profile/kram-profile.xcodeproj/project.pbxproj
index 8ebed5e3..5b00d80d 100644
--- a/kram-profile/kram-profile.xcodeproj/project.pbxproj
+++ b/kram-profile/kram-profile.xcodeproj/project.pbxproj
@@ -16,6 +16,7 @@
 		705F68E52B89907700437FAA /* README.md in Resources */ = {isa = PBXBuildFile; fileRef = 705F68E42B89907700437FAA /* README.md */; };
 		705F68E72B8BEB7100437FAA /* DataCompression.swift in Sources */ = {isa = PBXBuildFile; fileRef = 705F68E62B8BEB7000437FAA /* DataCompression.swift */; };
 		705F68E92B9451CC00437FAA /* Log.swift in Sources */ = {isa = PBXBuildFile; fileRef = 705F68E82B9451CC00437FAA /* Log.swift */; };
+		705F68EB2B94E33800437FAA /* Keycode.swift in Sources */ = {isa = PBXBuildFile; fileRef = 705F68EA2B94E33800437FAA /* Keycode.swift */; };
 /* End PBXBuildFile section */
 
 /* Begin PBXFileReference section */
@@ -31,6 +32,7 @@
 		705F68E42B89907700437FAA /* README.md */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = SOURCE_ROOT; };
 		705F68E62B8BEB7000437FAA /* DataCompression.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = DataCompression.swift; sourceTree = "<group>"; };
 		705F68E82B9451CC00437FAA /* Log.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Log.swift; sourceTree = "<group>"; };
+		705F68EA2B94E33800437FAA /* Keycode.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Keycode.swift; sourceTree = "<group>"; };
 /* End PBXFileReference section */
 
 /* Begin PBXFrameworksBuildPhase section */
@@ -65,6 +67,7 @@
 			children = (
 				705F68DD2B86AB2000437FAA /* Info.plist */,
 				705F68CC2B820AD100437FAA /* kram_profileApp.swift */,
+				705F68EA2B94E33800437FAA /* Keycode.swift */,
 				705F68D02B820AD200437FAA /* Assets.xcassets */,
 				705F68DF2B87EB8000437FAA /* AnyCodable.swift */,
 				705F68DE2B87EB8000437FAA /* AnyDecodable.swift */,
@@ -160,6 +163,7 @@
 				705F68E12B87EB8000437FAA /* AnyDecodable.swift in Sources */,
 				705F68E32B87EB8000437FAA /* AnyEncodable.swift in Sources */,
 				705F68E92B9451CC00437FAA /* Log.swift in Sources */,
+				705F68EB2B94E33800437FAA /* Keycode.swift in Sources */,
 				705F68E22B87EB8000437FAA /* AnyCodable.swift in Sources */,
 				705F68CD2B820AD100437FAA /* kram_profileApp.swift in Sources */,
 				705F68E72B8BEB7100437FAA /* DataCompression.swift in Sources */,
diff --git a/kram-profile/kram-profile/Keycode.swift b/kram-profile/kram-profile/Keycode.swift
new file mode 100644
index 00000000..1c4bcb1c
--- /dev/null
+++ b/kram-profile/kram-profile/Keycode.swift
@@ -0,0 +1,134 @@
+// kram - Copyright 2020-2024 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
+
+import Foundation
+
+// https://gist.github.com/swillits/df648e87016772c7f7e5dbed2b345066
+struct Keycode {
+    
+    // Layout-independent Keys
+    // eg.These key codes are always the same key on all layouts.
+    static let returnKey                 : UInt16 = 0x24
+    static let enter                     : UInt16 = 0x4C
+    static let tab                       : UInt16 = 0x30
+    static let space                     : UInt16 = 0x31
+    static let delete                    : UInt16 = 0x33
+    static let escape                    : UInt16 = 0x35
+    static let command                   : UInt16 = 0x37
+    static let shift                     : UInt16 = 0x38
+    static let capsLock                  : UInt16 = 0x39
+    static let option                    : UInt16 = 0x3A
+    static let control                   : UInt16 = 0x3B
+    static let rightCommand              : UInt16 = 0x36
+    static let rightShift                : UInt16 = 0x3C
+    static let rightOption               : UInt16 = 0x3D
+    static let rightControl              : UInt16 = 0x3E
+    static let leftArrow                 : UInt16 = 0x7B
+    static let rightArrow                : UInt16 = 0x7C
+    static let downArrow                 : UInt16 = 0x7D
+    static let upArrow                   : UInt16 = 0x7E
+    static let volumeUp                  : UInt16 = 0x48
+    static let volumeDown                : UInt16 = 0x49
+    static let mute                      : UInt16 = 0x4A
+    static let help                      : UInt16 = 0x72
+    static let home                      : UInt16 = 0x73
+    static let pageUp                    : UInt16 = 0x74
+    static let forwardDelete             : UInt16 = 0x75
+    static let end                       : UInt16 = 0x77
+    static let pageDown                  : UInt16 = 0x79
+    static let function                  : UInt16 = 0x3F
+    static let f1                        : UInt16 = 0x7A
+    static let f2                        : UInt16 = 0x78
+    static let f4                        : UInt16 = 0x76
+    static let f5                        : UInt16 = 0x60
+    static let f6                        : UInt16 = 0x61
+    static let f7                        : UInt16 = 0x62
+    static let f3                        : UInt16 = 0x63
+    static let f8                        : UInt16 = 0x64
+    static let f9                        : UInt16 = 0x65
+    static let f10                       : UInt16 = 0x6D
+    static let f11                       : UInt16 = 0x67
+    static let f12                       : UInt16 = 0x6F
+    static let f13                       : UInt16 = 0x69
+    static let f14                       : UInt16 = 0x6B
+    static let f15                       : UInt16 = 0x71
+    static let f16                       : UInt16 = 0x6A
+    static let f17                       : UInt16 = 0x40
+    static let f18                       : UInt16 = 0x4F
+    static let f19                       : UInt16 = 0x50
+    static let f20                       : UInt16 = 0x5A
+    
+    // US-ANSI Keyboard Positions
+    // eg. These key codes are for the physical key (in any keyboard layout)
+    // at the location of the named key in the US-ANSI layout.
+    static let a                         : UInt16 = 0x00
+    static let b                         : UInt16 = 0x0B
+    static let c                         : UInt16 = 0x08
+    static let d                         : UInt16 = 0x02
+    static let e                         : UInt16 = 0x0E
+    static let f                         : UInt16 = 0x03
+    static let g                         : UInt16 = 0x05
+    static let h                         : UInt16 = 0x04
+    static let i                         : UInt16 = 0x22
+    static let j                         : UInt16 = 0x26
+    static let k                         : UInt16 = 0x28
+    static let l                         : UInt16 = 0x25
+    static let m                         : UInt16 = 0x2E
+    static let n                         : UInt16 = 0x2D
+    static let o                         : UInt16 = 0x1F
+    static let p                         : UInt16 = 0x23
+    static let q                         : UInt16 = 0x0C
+    static let r                         : UInt16 = 0x0F
+    static let s                         : UInt16 = 0x01
+    static let t                         : UInt16 = 0x11
+    static let u                         : UInt16 = 0x20
+    static let v                         : UInt16 = 0x09
+    static let w                         : UInt16 = 0x0D
+    static let x                         : UInt16 = 0x07
+    static let y                         : UInt16 = 0x10
+    static let z                         : UInt16 = 0x06
+
+    static let zero                      : UInt16 = 0x1D
+    static let one                       : UInt16 = 0x12
+    static let two                       : UInt16 = 0x13
+    static let three                     : UInt16 = 0x14
+    static let four                      : UInt16 = 0x15
+    static let five                      : UInt16 = 0x17
+    static let six                       : UInt16 = 0x16
+    static let seven                     : UInt16 = 0x1A
+    static let eight                     : UInt16 = 0x1C
+    static let nine                      : UInt16 = 0x19
+    
+    static let equals                    : UInt16 = 0x18
+    static let minus                     : UInt16 = 0x1B
+    static let semicolon                 : UInt16 = 0x29
+    static let apostrophe                : UInt16 = 0x27
+    static let comma                     : UInt16 = 0x2B
+    static let period                    : UInt16 = 0x2F
+    static let forwardSlash              : UInt16 = 0x2C
+    static let backslash                 : UInt16 = 0x2A
+    static let grave                     : UInt16 = 0x32
+    static let leftBracket               : UInt16 = 0x21
+    static let rightBracket              : UInt16 = 0x1E
+    
+    static let keypadDecimal             : UInt16 = 0x41
+    static let keypadMultiply            : UInt16 = 0x43
+    static let keypadPlus                : UInt16 = 0x45
+    static let keypadClear               : UInt16 = 0x47
+    static let keypadDivide              : UInt16 = 0x4B
+    static let keypadEnter               : UInt16 = 0x4C
+    static let keypadMinus               : UInt16 = 0x4E
+    static let keypadEquals              : UInt16 = 0x51
+    static let keypad0                   : UInt16 = 0x52
+    static let keypad1                   : UInt16 = 0x53
+    static let keypad2                   : UInt16 = 0x54
+    static let keypad3                   : UInt16 = 0x55
+    static let keypad4                   : UInt16 = 0x56
+    static let keypad5                   : UInt16 = 0x57
+    static let keypad6                   : UInt16 = 0x58
+    static let keypad7                   : UInt16 = 0x59
+    static let keypad8                   : UInt16 = 0x5B
+    static let keypad9                   : UInt16 = 0x5C
+}
+
diff --git a/kram-profile/kram-profile/Log.swift b/kram-profile/kram-profile/Log.swift
index e84f3e59..063b4c45 100644
--- a/kram-profile/kram-profile/Log.swift
+++ b/kram-profile/kram-profile/Log.swift
@@ -1,9 +1,6 @@
-/*
-See LICENSE folder for this sample’s licensing information.
-
-Abstract:
-Consolidates use of os_log to improve log message output.
-*/
+// kram - Copyright 2020-2024 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
 
 import Foundation
 
diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 766fffdf..33ef5373 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -44,13 +44,14 @@ import UniformTypeIdentifiers
 //   instead of the entire file.
 // TODO: can't type ahead search in the list while the webview is loading (f.e. e will advance)
 //    but arrow keys work to move to next
-// TODO: if list hidden, then can't advance
+// DONE: if list hidden, then can't advance
 // TODO: can't overide "delete" key doing a back in the WKWebView history
 //    Perfetto warns that content will be lost
 // TODO: track duration would be useful (esp. for memory traces)
 //    Would have to modify the thread_name, and process the tid and timings
 //    Better if Perfetto could display this
 // TODO: list view type ahead search doesn't work unless name is the first Text entry
+// TODO: switch to dark mode
 // TODO track when files change or get deleted, update the list item then
 //   can disable list items that are deleted in case they return (can still pick if current)
 //   https://developer.apple.com/documentation/coreservices/file_system_events?language=objc
@@ -78,6 +79,9 @@ import UniformTypeIdentifiers
 
 private let log = Log("kram-profile")
 
+public func clamp<T>(_ value: T, _ minValue: T, _ maxValue: T) -> T where T : Comparable {
+    return min(max(value, minValue), maxValue)
+}
 
 func fileModificationDate(url: URL) -> Date? {
     do {
@@ -195,6 +199,34 @@ func updateFileCache(file: File) {
     fileCache[file.url] = file
 }
   
+class MyWebView : WKWebView {
+    
+    // This is ugly.
+    override var acceptsFirstResponder: Bool { true }
+    
+    override func performKeyEquivalent(with event: NSEvent) -> Bool {
+        // unclear why all events are going to WebView
+        // so have to return false to not have them filtered
+        
+        // allow all menu shortcuts to keep working
+        if event.modifierFlags.contains(.command) {
+            return false
+        }
+        
+        // allow list to use up/down
+        if event.keyCode == Keycode.upArrow ||
+            event.keyCode == Keycode.downArrow {
+            return false
+        }
+        
+        // delete is still unloading the currently loaded page.  Augh!!!
+        
+        // true means it doesn't bonk, but WKWebView still gets to
+        // respond to the keys.  Ugh.  Stupid system.
+        return true
+   }
+}
+
 
 func newWebView(request: URLRequest) -> WKWebView {
     // set preference to run javascript on the view, can then do PostMessage
@@ -210,7 +242,7 @@ func newWebView(request: URLRequest) -> WKWebView {
     configuration.defaultWebpagePreferences = webpagePreferences
     
     // here frame is entire screen
-    let webView = WKWebView(frame: .zero, configuration: configuration)
+    let webView = MyWebView(frame: .zero, configuration: configuration)
     
     // The page is complaining it's going to lose the data if fwd/back hit
     webView.allowsBackForwardNavigationGestures = false
@@ -221,7 +253,7 @@ func newWebView(request: URLRequest) -> WKWebView {
 
 // This is just an adaptor to allow WkWebView to interop with SwiftUI.
 // It's unclear if WindowGroup can even have this hold state.
-struct MyWKWebView : NSViewRepresentable {
+struct WebView : NSViewRepresentable {
     //let request: URLRequest
     let webView: WKWebView
     
@@ -241,7 +273,7 @@ struct MyWKWebView : NSViewRepresentable {
     func updateNSView(_ webView: WKWebView, context: Context) {
         
     }
-    
+
     // Here's sample code to do a screenshot, can this use actual dimensions
     // https://nshipster.com/wkwebview/
 //    func webView(_ webView: WKWebView, didFinish navigation: WKNavigation!)
@@ -730,8 +762,6 @@ func loadFileJS(_ path: String) -> String? {
     }
 }
 
-
-
 class AppDelegate: NSObject, NSApplicationDelegate {
     // Where to set this
     var window: NSWindow?
@@ -993,21 +1023,28 @@ A tool to help profile mem, perf, and builds.
         UTType(filenameExtension:"vmatrace", conformingTo:.data)!,
     ]
     
-    func selectFile(_ selection: String?, _ advanceList: Bool) -> String? {
+    func selectFile(_ selection: String?, _ fileList: [File], _ advanceList: Bool) -> String? {
         guard let sel = selection else { return nil }
-        if files.count == 1 { return selection }
+        if fileList.count == 1 { return selection }
         
         var index = 0
-        for i in 0..<files.count {
-            let file = files[i]
+        for i in 0..<fileList.count {
+            let file = fileList[i]
             if file.id == sel {
                 index = i
                 break
             }
         }
         
-        index = (index + files.count + (advanceList == true ? 1:-1)) % files.count
-        return files[index].id
+        let doClamp = true
+        if doClamp {
+            index = clamp(index + (advanceList == true ? 1:-1), 0, fileList.count-1)
+        }
+        else {
+            // this wraps, but List view needs to scroll to this if not visible
+            index = (index + fileList.count + (advanceList == true ? 1:-1)) % fileList.count
+        }
+        return fileList[index].id
     }
     
     // about hideSideBar
@@ -1017,13 +1054,15 @@ A tool to help profile mem, perf, and builds.
     
     enum Field: Hashable {
         case webView
-        //case listView
+        case listView
     }
     @FocusState private var focusedField: Field?
-
+    @State var keyMonitor: Any?
+        
     // https://developer.apple.com/documentation/swiftui/adding-a-search-interface-to-your-app
     // can filter list items off this
-    // Rename to filterText, ...
+    // TODO: Rename to filterText, ...
+    // Note: this filter needs macOS14
     @State private var searchText: String = ""
     @State private var searchIsActive = false
     var searchResults: [File] {
@@ -1042,52 +1081,12 @@ A tool to help profile mem, perf, and builds.
             }
         }
         else {
-            // TODO: should be case insensitive
             return files.filter {
                 // Here use the name, or else the directory will have say "kram" in it
                 $0.name.localizedCaseInsensitiveContains(searchText)
             }
         }
     }
-
-    /* These are also macOS 14, using Extension form
-    
-    @ViewBuilder func MyNavigationSplitView() -> some View {
-        if #available(macOS 14.0, *) {
-            NavigationSplitView()
-            .searchable(text: $searchText, isPresented: $searchIsActive, placement:
-                    .sidebar, prompt: "Filter")
-            
-            .onKeyPress(.upArrow, action: {
-                // don't change focus, just advance the selection
-                if focusedField == .webView {
-                    //$selection = selectFile(selection, false)
-                    return .handled
-                }
-                return .ignored
-            })
-            .onKeyPress(.downArrow, action: {
-                if focusedField == .webView {
-                    //$selection = selectFile(selection, false)
-                    return .handled
-                }
-                return .ignored
-            })
-            .onKeyPress(.delete, action: {
-                // block this so WKWebView doesn't page back
-                if focusedField == .webView {
-                    // Unlcar if this should be 0 or 1
-                    myWebView.go(to: myWebView.backForwardList.item(at:0)!)
-                }
-                return .handled
-            })
-            
-        }
-        else {
-            NavigationSplitView()
-        }
-    }
-    */
     
     // TODO: do this when building the searchResults
     // can do O(N) then and mark which items need separator
@@ -1127,7 +1126,6 @@ A tool to help profile mem, perf, and builds.
                             Text(generateName(file: file))
                                 .help(file.shortDirectory)
                                 .truncationMode(.tail)
-                                
                             
                             Text(generateDuration(file: file))
                                 .frame(maxWidth: 70)
@@ -1139,48 +1137,89 @@ A tool to help profile mem, perf, and builds.
                         .listRowSeparatorTint(.white)
                         
                     }
-                    //.focused($focusedField, equals: .listView)
+                    .focused($focusedField, equals: .listView)
+                    .focusable()
                 }
-                //.focusable(false)
+                
             }
             detail: {
-                MyWKWebView(webView: myWebView)
-                //.focusable()
-                //.focused($focusedField, equals: .webView)
+                WebView(webView: myWebView)
+                .focused($focusedField, equals: .webView)
+                .focusable()
             }
             .searchableOptional(text: $searchText, isPresented: $searchIsActive, placement: .sidebar, prompt: "Filter")
             
-            /* this wrapper doesn't work
+            /*
+            #if false
+            .onAppear {
+                // https://stackoverflow.com/questions/73252399/swiftui-keyboard-navigation-in-lists-on-macos
+                if keyMonitor != nil {
+                    return
+                }
+                
+                keyMonitor = NSEvent.addLocalMonitorForEvents(matching: [.keyDown]) { event in
+                    
+                    // This is advancing 2 entries, since List also responds
+                    // This gets called 2x after error dialog post, but unclear why.
+                    // Should just be down/up.
+                    
+                    if event.isARepeat {
+                        return event
+                    }
+                    
+                    if focusedField != .listView {
+                        if selection != nil {
+                            if event.keyCode == Keycode.downArrow {
+                                selection = selectFile(selection, searchResults, true)
+                            }
+                            else if event.keyCode == Keycode.upArrow {
+                                selection = selectFile(selection, searchResults, false)
+                            }
+                        }
+                    }
+                    return event
+                }
+            }
+            .onDisappear {
+                if keyMonitor != nil {
+                    NSEvent.removeMonitor(keyMonitor!)
+                    keyMonitor = nil
+                }
+            }
+            #else
+            
             // need these macOS 14 calls to advance the list when list is closed
-            .onKeyPressOptional(.upArrow, action: {
-                if focusedField == .webView {
-                    //$selection = selectFile(selection, false)
+            .onKeyPress(.downArrow, action: {
+                if focusedField != .listView {
+                    selection = selectFile(selection, searchResults, true)
                     return .handled
                 }
                 return .ignored
             })
-            .onKeyPressOptional(.upArrow, action: {
-                if focusedField == .webView {
-                    //$selection = selectFile(selection, false)
+            .onKeyPress(.upArrow, action: {
+                if focusedField != .listView {
+                    selection = selectFile(selection, searchResults, false)
                     return .handled
                 }
                 return .ignored
             })
+        
+            #endif
             */
             
             .onChange(of: selection /*, initial: true*/) { newState in
                 openFileSelection(myWebView)
-                focusedField = .webView
+                //focusedField = .webView
             }
             .navigationTitle(generateNavigationTitle(selection))
             .onOpenURL { url in
                 openFilesFromURLs(urls: [url])
-                focusedField = .webView
+                //focusedField = .webView
             }
             .dropDestination(for: URL.self) { (items, _) in
                 // This acutally works!
                 openFilesFromURLs(urls: items)
-                focusedField = .webView
+                //focusedField = .webView
                 return true
             }
         }
@@ -1189,7 +1228,7 @@ A tool to help profile mem, perf, and builds.
         // https://nilcoalescing.com/blog/CustomiseAboutPanelOnMacOSInSwiftUI/
         .commands {
             CommandGroup(after: .newItem) {
-                Button("Open File") {
+                Button("Open...") {
                     openFile()
                 }
                 .keyboardShortcut("O")
@@ -1210,6 +1249,22 @@ A tool to help profile mem, perf, and builds.
                 .keyboardShortcut("R")
                 .disabled(!isReloadEnabled(selection))
                 
+                // These work even if the list view is collapsed
+                Button("Prev File") {
+                    if selection != nil {
+                        selection = selectFile(selection, searchResults, false)
+                    }
+                }
+                .keyboardShortcut("N", modifiers:[.shift, .command])
+                .disabled(selection == nil)
+                
+                Button("Next File") {
+                    if selection != nil {
+                        selection = selectFile(selection, searchResults, true)
+                    }
+                }
+                .keyboardShortcut("N", modifiers:[.command])
+                .disabled(selection == nil)
             
                 // TODO: these may need to be attached to detail view
                 // The list view eats them, and doesn't fwd onto the web view
@@ -1221,23 +1276,14 @@ A tool to help profile mem, perf, and builds.
                     // Don't need to do anything
                 }
                 .keyboardShortcut("S")
-                .disabled(selection != nil)
+                .disabled(selection == nil && focusedField == .webView)
                           
                 // Perfetto command
                 Button("Parse Command") {
                     // don't need to do anything
                 }
                 .keyboardShortcut("P", modifiers:[.shift, .command])
-                .disabled(selection != nil) // what if selection didn't load
-            }
-            CommandGroup(after: .toolbar) {
-                // TODO: this automatically inserts the fn+F menu item
-                // which is then redundant with this menu item that lack the shortcut
-                Button("Toggle Fullscreen") {
-                    Task { @MainActor in
-                        NSApplication.shared.windows.last?.toggleFullScreen(nil)
-                    }
-                }
+                .disabled(selection == nil && focusedField == .webView) // what if selection didn't load
             }
             CommandGroup(replacing: .appInfo) {
                 Button("About kram-profile") {

From a0215651796b0407fcb4ede77100c3633edf2579 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 3 Mar 2024 19:04:46 -0800
Subject: [PATCH 616/901] kram - bump build for macos-13 runner

There's also a macos-14 runner.  Hoping this bumps Xcode 14.2 to 15.1, since getting weird build failures on macos-latest (macos-12 + Xcode 14.2) trying to build SwiftUI-based kram-profile.
---
 .github/workflows/pre-release.yml    | 3 ++-
 .github/workflows/tagged-release.yml | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/pre-release.yml b/.github/workflows/pre-release.yml
index 0c532581..6e23a987 100644
--- a/.github/workflows/pre-release.yml
+++ b/.github/workflows/pre-release.yml
@@ -13,7 +13,8 @@ jobs:
     strategy:
       matrix:
         #os: [ubuntu-latest, macos-latest, windows-latest]
-        os: [macos-latest, windows-latest]
+        #os: [macos-latest, windows-latest]
+        os: [macos-13, windows-latest]
         
     steps:
       - name: Update CMake
diff --git a/.github/workflows/tagged-release.yml b/.github/workflows/tagged-release.yml
index f88cce58..f8af27f1 100644
--- a/.github/workflows/tagged-release.yml
+++ b/.github/workflows/tagged-release.yml
@@ -12,7 +12,8 @@ jobs:
     strategy:
       matrix:
         #os: [ubuntu-latest, macos-latest, windows-latest]
-        os: [macos-latest, windows-latest]
+        #os: [macos-latest, windows-latest]
+        os: [macos-13, windows-latest]
         
     steps:
       - name: Update CMake

From 7653d3bf75959c6b6ad57add4bfca87249b9694b Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 3 Mar 2024 19:20:05 -0800
Subject: [PATCH 617/901] kram-profile - small fix to wrapper

---
 kram-profile/kram-profile/kram_profileApp.swift | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 33ef5373..96925e9c 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -329,7 +329,7 @@ extension View {
     public func searchableOptional<S>(text: Binding<String>, isPresented: Binding<Bool>, placement: SearchFieldPlacement = .automatic, prompt: S) -> some View where S : StringProtocol {
         if #available(macOS 14.0, *) {
             return self.searchable(text: text, isPresented: isPresented, placement:
-                        .sidebar, prompt: prompt)
+                        placement, prompt: prompt)
         }
         else {
             return self

From ed8fbcdc47b579c948f882f9f56ff5688baa4d8b Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 4 Mar 2024 21:52:21 -0800
Subject: [PATCH 618/901] kram-profile - log count, duration about each track.

Can eventually store with the File object and display in a hud.  But for now, just useful to see the count/duration for mem traces.   Less useful for build or perf traces.
---
 kram-profile/kram-profile/Log.swift           |  30 ++-
 .../kram-profile/kram_profileApp.swift        | 205 ++++++++++++------
 2 files changed, 147 insertions(+), 88 deletions(-)

diff --git a/kram-profile/kram-profile/Log.swift b/kram-profile/kram-profile/Log.swift
index 063b4c45..c045ba2e 100644
--- a/kram-profile/kram-profile/Log.swift
+++ b/kram-profile/kram-profile/Log.swift
@@ -50,7 +50,7 @@ import Darwin
 
 class Log {
     // verbose: Whether os_log or print is used to report logs.
-    static var prints = true
+    static var prints = false
     // stacktrace: Whether stack trace is logged on errors.
     static var stacktraces = false
     // timestamp: Show timestamps on all entries when printing statements.
@@ -103,15 +103,6 @@ class Log {
         #endif
     }
     
-    func fault(_ message: String) {
-        let text = formatMessage(message, .fault)
-        if Log.prints {
-            print(text)
-        } else {
-            os_log("%@", log: log, type: .fault, text)
-        }
-    }
-    
     func error(_ message: String, _ function: String = #function, _ line: Int = #line) {
         let text = formatMessage(message, .error, function, line)
         if Log.prints {
@@ -122,8 +113,8 @@ class Log {
     }
     
     // os_log left out warnings, so reuse default type for that
-    func warn(_ message: String) {
-        let text = formatMessage(message, .default)
+    func warn(_ message: String, _ function: String = #function, _ line: Int = #line) {
+        let text = formatMessage(message, .default, function, line)
         if Log.prints {
             print(text)
         } else {
@@ -167,12 +158,10 @@ class Log {
                 text += "\(timestamp)I[\(category)] \(message)"
             case .default: // not a keyword
                 text += "\(timestamp)W[\(category)] \(message)"
+                text += Log.formatLocation(file, line, function)
             case .error:
                 text += "\(timestamp)E[\(category)] \(message)\n"
                 text += Log.formatLocation(file, line, function)
-            case .fault:
-                text += "\(timestamp)F[\(category)] \(message)\n"
-                text += Log.formatLocation(file, line, function)
             default:
                 text += message
             }
@@ -180,9 +169,18 @@ class Log {
             // Consider reporting the data above to os_log.
             // os_log reports data, time, app, threadId and message to stderr.
             text += message
+            
+            // os_log can't show correct file/line, since it uses addrReturnAddress - ugh
+            switch type {
+                case .default: fallthrough
+                case .error:
+                    text += Log.formatLocation(file, line, function)
+                default:
+                    break
+            }
         }
         
-        if Log.stacktraces && (type == .error || type == .fault) {
+        if Log.stacktraces && (type == .error) {
             text += "\n"
             
             // Improve this - these are mangled symbols without file/line of where
diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 96925e9c..0d81e628 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -15,28 +15,31 @@ import UniformTypeIdentifiers
 // can interop with.  SwiftUI has not browser widget.
 
 // DONE: add bg list color depending on sort
+// DONE: fn+F doesn't honor fullscreen
+// DONE: Perfetto can only read .gz files, and not .zip files.
+//   But could decode zip files here, and send over gz compressed.
+//   Would need to idenfity zip archive > 1 file vs. zip single file.
+// DONE: add gz compression to all file data.  Use libCompression
+//   but it only has zlib compression.  Use DataCompression which
+//   messages zlib deflate to gzip.
+// DONE: if list hidden, then can't advance
+// DONE: be nice to focus the search input on cmd+F just to make me happy.  (using cmd+S)
+//  Browser goes to its own search which doesn’t help.
+
+
 // TODO: add sort mode for name, time and incorporating dir or not
 // TODO: fix the js wait, even with listener, there's still a race
 //    maybe there's some ServiceWorker still loading the previous json?
 //    Perfetto is using a ServiceWorker, Safari uses those now, and ping/pong unware.
-// TODO: still getting race condition.  Perfetto is trying to
+// TODO: still getting web race condition.  Perfetto is trying to
 //  load the previous file, and we’re sending a new one.
-// TODO: update recent document list
+// TODO: add/update recent document list (need to hold onto dropped/opened folder)
 // TODO: have a way to reload dropped folder (not just subfiles)
 // TODO: nav title and list item text is set before duration is computed
 //  need some way to update that.
 // TODO: support WindowGroup and multiwindow, each needs own webView, problem
 //   is that onOpenURL opens a new window always.
-// DONE: fn+F doesn't honor fullscreen
-// TODO: be nice to focus the search input on cmd+F just to make me happy.
-//  Browser goes to its own search which doesn’t help.
 // TODO: work on sending a more efficient form.  Could use Perfetto SDK to write to prototbuf.  The Catapult json format is overly verbose.  Need some thread and scope strings, some open/close timings that reference a scope string and thread.
-// DONE: Perfetto can only read .gz files, and not .zip files.
-//   But could decode zip files here, and send over gz compressed.
-//   Would need to idenfity zip archive > 1 file vs. zip single file.
-// DONE: add gz compression to all file data.  Use libCompression
-//   but it only has zlib compression.  Use DataCompression which
-//   messages zlib deflate to gzip.
 // TODO: switch font to Inter, bundle that with the app?
 //   .environment(\.font, Font.custom("CustomFont", size: 14))
 // TODO: for perf traces, compute duration between frame
@@ -44,7 +47,6 @@ import UniformTypeIdentifiers
 //   instead of the entire file.
 // TODO: can't type ahead search in the list while the webview is loading (f.e. e will advance)
 //    but arrow keys work to move to next
-// DONE: if list hidden, then can't advance
 // TODO: can't overide "delete" key doing a back in the WKWebView history
 //    Perfetto warns that content will be lost
 // TODO: track duration would be useful (esp. for memory traces)
@@ -52,6 +54,8 @@ import UniformTypeIdentifiers
 //    Better if Perfetto could display this
 // TODO: list view type ahead search doesn't work unless name is the first Text entry
 // TODO: switch to dark mode
+// TODO: no simple scrollTo, since this is all React style
+//   There is a ScrollViewReader, but value only usable within.  UITableView has this.
 // TODO track when files change or get deleted, update the list item then
 //   can disable list items that are deleted in case they return (can still pick if current)
 //   https://developer.apple.com/documentation/coreservices/file_system_events?language=objc
@@ -201,9 +205,10 @@ func updateFileCache(file: File) {
   
 class MyWebView : WKWebView {
     
-    // This is ugly.
+    // So that keyboard events are routed
     override var acceptsFirstResponder: Bool { true }
     
+    // This is to prevent bonk
     override func performKeyEquivalent(with event: NSEvent) -> Bool {
         // unclear why all events are going to WebView
         // so have to return false to not have them filtered
@@ -224,6 +229,10 @@ class MyWebView : WKWebView {
         // true means it doesn't bonk, but WKWebView still gets to
         // respond to the keys.  Ugh.  Stupid system.
         return true
+        
+        // or this ?
+        // return super.performKeyEquivalent(with: event)
+        
    }
 }
 
@@ -297,7 +306,33 @@ struct WebView : NSViewRepresentable {
 //    MyWKWebView()
 //}
 
-// has to be https to work for some reason, but all data is previewed locally
+/*
+class MyMTKView: MTKView {
+    
+    
+    
+}
+
+// wraps MTKView (NSView) into SwiftUI, so it can be a part of the hierarcy,
+// updateNSView called when app foreground/backgrounded, or the size is changed
+struct MTKViewWrapper: NSViewRepresentable {
+    var mtkView: MyMTKView
+    
+    // TODO: could hand this down without rebuilding wrapper, could be @Published from UserData
+    //var currentPath: String
+    
+    func makeNSView(context: NSViewRepresentableContext<MTKViewWrapper>) -> MyMTKView {
+        return mtkView
+    }
+
+    func updateNSView(_ view: MyMTKView, context: NSViewRepresentableContext<MTKViewWrapper>) {
+        //view.updateUserData(currentPath: currentPath)
+        
+    }
+}
+*/
+
+// https to work for some reason, but all data is previewed locally
 var ORIGIN = "https://ui.perfetto.dev"
 
 // https://gist.github.com/pwightman/64c57076b89c5d7f8e8c
@@ -523,6 +558,77 @@ func loadFileJS(_ path: String) -> String? {
         var perfetto: PerfettoFile
     }
     
+    class ThreadInfo : Hashable, Equatable, Comparable {
+       
+        var id: Int = 0
+        var threadName: String = ""
+        var startTime: Int = Int.max
+        var endTime: Int = Int.min
+        var count: Int = 0
+        
+        // id doesn't implement Hashable
+        func hash(into hasher: inout Hasher) {
+            hasher.combine(id)
+        }
+        
+        public static func == (lhs: ThreadInfo, rhs: ThreadInfo) -> Bool {
+            return lhs.id == rhs.id
+        }
+        public static func < (lhs: ThreadInfo, rhs: ThreadInfo) -> Bool {
+            return lhs.id < rhs.id
+        }
+        
+        func combine(_ s: Int, _ d: Int) {
+            startTime = min(startTime, s)
+            endTime = max(endTime, s+d)
+            count += 1
+        }
+        
+        var description: String {
+            let duration = Double(endTime - startTime) * 1e-6
+            return "\(id) \(threadName) \(duration) \(count)x"
+        }
+        
+    }
+    
+    // parse json trace
+    func updateThreadInfo(_ catapultProfile: CatapultProfile, _ file: inout File) {
+        
+        
+        // was using Set<>, but having trouble with lookup
+        var threadInfos: [Int: ThreadInfo] = [:]
+        
+        for i in 0..<catapultProfile.traceEvents!.count {
+            let event = catapultProfile.traceEvents![i]
+            
+            // have to have tid to associate with ThreadInfo
+            guard let tid = event.tid else { continue }
+            
+            if threadInfos[tid] == nil {
+                var info = ThreadInfo()
+                info.id = tid
+                
+                threadInfos[tid] = info
+            }
+            
+            if event.name != nil && event.name! == "thread_name" {
+                let threadName = event.args!["name"]!.value as! String
+                threadInfos[tid]!.threadName = threadName
+            }
+            else if event.ts != nil && event.dur != nil {
+                let s = event.ts!
+                let d = event.dur!
+                
+                threadInfos[tid]!.combine(s, d)
+            }
+        }
+        
+        // TODO: could store this in the File object, just append with \n
+        for threadInfo in threadInfos.values.sorted() {
+            log.info(threadInfo.description)
+        }
+    }
+    
     func updateDuration(_ catapultProfile: CatapultProfile, _ file: inout File) {
         var startTime = Int.max
         var endTime = Int.min
@@ -612,6 +718,11 @@ func loadFileJS(_ path: String) -> String? {
                     }
                     
                     updateDuration(catapultProfile, &file)
+                    
+                    // For now, just log the per-thread info
+                    if type == FileType.Memory {
+                        updateThreadInfo(catapultProfile, &file)
+                    }
                 }
             }
         }
@@ -673,6 +784,11 @@ func loadFileJS(_ path: String) -> String? {
                 // walk the file and compute the duration if we don't already have ti
                 if file.duration == nil {
                     updateDuration(catapultProfile, &file)
+                    
+                    // For now, just log the per-thread info
+                    if type == FileType.Memory {
+                        updateThreadInfo(catapultProfile, &file)
+                    }
                 }
             }
             
@@ -1149,64 +1265,6 @@ A tool to help profile mem, perf, and builds.
             }
             .searchableOptional(text: $searchText, isPresented: $searchIsActive, placement: .sidebar, prompt: "Filter")
             
-            /*
-            #if false
-            .onAppear {
-                // https://stackoverflow.com/questions/73252399/swiftui-keyboard-navigation-in-lists-on-macos
-                if keyMonitor != nil {
-                    return
-                }
-                
-                keyMonitor = NSEvent.addLocalMonitorForEvents(matching: [.keyDown]) { event in
-                    
-                    // This is advancing 2 entries, since List also responds
-                    // This gets called 2x after error dialog post, but unclear why.
-                    // Should just be down/up.
-                    
-                    if event.isARepeat {
-                        return event
-                    }
-                    
-                    if focusedField != .listView {
-                        if selection != nil {
-                            if event.keyCode == Keycode.downArrow {
-                                selection = selectFile(selection, searchResults, true)
-                            }
-                            else if event.keyCode == Keycode.upArrow {
-                                selection = selectFile(selection, searchResults, false)
-                            }
-                        }
-                    }
-                    return event
-                }
-            }
-            .onDisappear {
-                if keyMonitor != nil {
-                    NSEvent.removeMonitor(keyMonitor!)
-                    keyMonitor = nil
-                }
-            }
-            #else
-            
-            // need these macOS 14 calls to advance the list when list is closed
-            .onKeyPress(.downArrow, action: {
-                if focusedField != .listView {
-                    selection = selectFile(selection, searchResults, true)
-                    return .handled
-                }
-                return .ignored
-            })
-            .onKeyPress(.upArrow, action: {
-                if focusedField != .listView {
-                    selection = selectFile(selection, searchResults, false)
-                    return .handled
-                }
-                return .ignored
-            })
-        
-            #endif
-            */
-            
             .onChange(of: selection /*, initial: true*/) { newState in
                 openFileSelection(myWebView)
                 //focusedField = .webView
@@ -1253,6 +1311,9 @@ A tool to help profile mem, perf, and builds.
                 Button("Prev File") {
                     if selection != nil {
                         selection = selectFile(selection, searchResults, false)
+                        
+                        // TODO: no simple scrollTo, since this is all React style
+                        // There is a ScrollViewReader, but valud only usable within
                     }
                 }
                 .keyboardShortcut("N", modifiers:[.shift, .command])

From 498b436249efc82aa9edc2c8b6b4a10937c4454e Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 4 Mar 2024 23:35:08 -0800
Subject: [PATCH 619/901] kram-profile - add info popup with thread info for
 mem traces, log improvements

---
 kram-profile/kram-profile/Log.swift           | 59 ++++++++++------
 .../kram-profile/kram_profileApp.swift        | 67 ++++++++++++++-----
 2 files changed, 91 insertions(+), 35 deletions(-)

diff --git a/kram-profile/kram-profile/Log.swift b/kram-profile/kram-profile/Log.swift
index c045ba2e..6d106df9 100644
--- a/kram-profile/kram-profile/Log.swift
+++ b/kram-profile/kram-profile/Log.swift
@@ -103,64 +103,82 @@ class Log {
         #endif
     }
     
-    func error(_ message: String, _ function: String = #function, _ line: Int = #line) {
-        let text = formatMessage(message, .error, function, line)
+    private func logToOSLog(_ text: String, _ type: OSLogType) {
+        // TODO: this needs to split the string up, since os_log limits length to
+        // some paltry 1023 chars.
+        os_log("%s", log: log, type: type, text)
+    }
+    
+    func error(_ message: @autoclosure () -> String, _ function: String = #function, _ line: Int = #line) {
+        let text = formatMessage(message(), .error, function, line)
         if Log.prints {
             print(text)
         } else {
-            os_log("%@", log: log, type: .error, text)
+            logToOSLog(text, .error)
         }
     }
     
     // os_log left out warnings, so reuse default type for that
-    func warn(_ message: String, _ function: String = #function, _ line: Int = #line) {
-        let text = formatMessage(message, .default, function, line)
+    func warn(_ message: @autoclosure () -> String, _ function: String = #function, _ line: Int = #line) {
+        let text = formatMessage(message(), .default, function, line)
         if Log.prints {
             print(text)
         } else {
-            os_log("%@", log: log, type: .default, text)
+            logToOSLog(text, .default) // this doesn't get colored yellow like a warning
         }
     }
     
-    func info(_ message: String) {
-        let text = formatMessage(message, .info)
+    func info(_ message: @autoclosure () -> String) {
+        let text = formatMessage(message(), .info)
         if Log.prints {
             print(text)
         } else {
-            os_log("%@", log: log, type: .info, text)
+            logToOSLog(text, .info)
         }
     }
     
-    func debug(_ message: String) {
+    func debug(_ message: @autoclosure () -> String) {
         // debug logs are stripped from release builds
         #if DEBUG
-        let text = formatMessage(message, .debug)
+        let text = formatMessage(message(), .debug)
         if Log.prints {
             print(text)
         } else {
-            os_log("%@", log: log, type: .debug, text)
+            logToOSLog(text, .debug)
         }
         #endif
     }
     
+    private func formatLevel(_ level: OSLogType) -> String {
+        switch level {
+            case .debug:    return ""
+            case .info:     return ""
+            case .default:  return "⚠️"
+            case .error:    return "🛑"
+            default:        return ""
+        }
+    }
+    
     // Customize this printing as desired.
-    private func formatMessage(_ message: String, _ type: OSLogType, _ function: String = "", _ line: Int = 0) -> String {
+    private func formatMessage(_ message: String, _ level: OSLogType, _ function: String = "", _ line: Int = 0) -> String {
         var text = ""
         
+        let levelText = formatLevel(level)
+        
         if Log.prints {
             let timestamp = Log.formatTimestamp()
             
             // These messages never go out to the system console, just the debugger.
-            switch type {
+            switch level {
             case .debug:
-                text += "\(timestamp)D[\(category)] \(message)"
+                text += "\(timestamp)\(levelText)D[\(category)] \(message)"
             case .info:
-                text += "\(timestamp)I[\(category)] \(message)"
+                text += "\(timestamp)\(levelText)I[\(category)] \(message)"
             case .default: // not a keyword
-                text += "\(timestamp)W[\(category)] \(message)"
+                text += "\(timestamp)\(levelText)W[\(category)] \(message)"
                 text += Log.formatLocation(file, line, function)
             case .error:
-                text += "\(timestamp)E[\(category)] \(message)\n"
+                text += "\(timestamp)\(levelText)E[\(category)] \(message)\n"
                 text += Log.formatLocation(file, line, function)
             default:
                 text += message
@@ -168,10 +186,11 @@ class Log {
         } else {
             // Consider reporting the data above to os_log.
             // os_log reports data, time, app, threadId and message to stderr.
+            text += levelText
             text += message
             
             // os_log can't show correct file/line, since it uses addrReturnAddress - ugh
-            switch type {
+            switch level {
                 case .default: fallthrough
                 case .error:
                     text += Log.formatLocation(file, line, function)
@@ -180,7 +199,7 @@ class Log {
             }
         }
         
-        if Log.stacktraces && (type == .error) {
+        if Log.stacktraces && (level == .error) {
             text += "\n"
             
             // Improve this - these are mangled symbols without file/line of where
diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 0d81e628..ca02c47b 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -126,6 +126,9 @@ struct File: Identifiable, Hashable, Equatable, Comparable
     var modStamp: Date?
     var loadStamp: Date?
     
+    // only available for memory file type right now
+    var threadInfo = ""
+    
     init(url: URL) {
         self.url = url
         self.modStamp = fileModificationDate(url:url)
@@ -164,12 +167,12 @@ func generateDuration(file: File) -> String {
     }
 }
 
-func generateNavigationTitle(_ str: String?) -> String {
-    if str == nil {
+func generateNavigationTitle(_ sel: String?) -> String {
+    if sel == nil {
         return ""
     }
     
-    let f = lookupFile(url: URL(string:str!)!)
+    let f = lookupFile(selection: sel!)
     return generateDuration(file: f) + " " + generateName(file: f)
 }
 
@@ -198,6 +201,10 @@ func lookupFile(url: URL) -> File {
     return file
 }
 
+func lookupFile(selection: String) -> File {
+    return lookupFile(url:URL(string:selection)!)
+}
+
 // This one won't be one in the list, though
 func updateFileCache(file: File) {
     fileCache[file.url] = file
@@ -586,7 +593,7 @@ func loadFileJS(_ path: String) -> String? {
         
         var description: String {
             let duration = Double(endTime - startTime) * 1e-6
-            return "\(id) \(threadName) \(duration) \(count)x"
+            return "\(id) '\(threadName)' \(duration)s \so(count)x"
         }
         
     }
@@ -623,10 +630,16 @@ func loadFileJS(_ path: String) -> String? {
             }
         }
         
-        // TODO: could store this in the File object, just append with \n
+        // DONE: could store this in the File object, just append with \n
+        var text = ""
         for threadInfo in threadInfos.values.sorted() {
-            log.info(threadInfo.description)
+            // log.info(threadInfo.description)
+            text += threadInfo.description
+            text += "\n"
         }
+        
+        file.threadInfo = text
+        updateFileCache(file: file)
     }
     
     func updateDuration(_ catapultProfile: CatapultProfile, _ file: inout File) {
@@ -1046,7 +1059,7 @@ struct kram_profileApp: App {
     
     func isReloadEnabled(_ selection: String?) -> Bool {
         guard let sel = selection else { return false }
-        let file = lookupFile(url:URL(string: sel)!)
+        let file = lookupFile(selection: sel)
         return file.isReloadNeeded()
     }
     
@@ -1074,7 +1087,7 @@ struct kram_profileApp: App {
             if str != nil {
                 runJavascript(webView, str!)
                 
-                var file = lookupFile(url: URL(string: sel)!)
+                var file = lookupFile(selection: sel)
                 file.setLoadStamp()
                 updateFileCache(file: file)
             }
@@ -1222,12 +1235,20 @@ A tool to help profile mem, perf, and builds.
         return false
     }
     
+    @State private var isShowingPopover = false
+    
+    func getSelectedThreadInfo(_ selection: String?) -> String {
+        if selection == nil {
+            return ""
+        }
+        return lookupFile(selection: selection!).threadInfo
+    }
+    
     var body: some Scene {
         
-        // WindowGroup brings up old windows which isn't really what I want
+        // TODO: WindowGroup brings up old windows which isn't really what I want
     
         Window("Main", id: "main") {
-        //WindowGroup {
             NavigationSplitView() {
                 VStack {
                     List(searchResults, selection:$selection) { file in
@@ -1245,7 +1266,7 @@ A tool to help profile mem, perf, and builds.
                             
                             Text(generateDuration(file: file))
                                 .frame(maxWidth: 70)
-                                //.alignment(.trailing)
+                            //.alignment(.trailing)
                                 .font(durationFont)
                             
                         }
@@ -1259,12 +1280,28 @@ A tool to help profile mem, perf, and builds.
                 
             }
             detail: {
-                WebView(webView: myWebView)
-                .focused($focusedField, equals: .webView)
-                .focusable()
+                VStack {
+                    // This button conveys data Perfetto does not
+                    // It's basically a hud.
+                    Button("Info") {
+                        self.isShowingPopover.toggle()
+                    }
+                    .keyboardShortcut("I", modifiers:.command)
+                    .disabled(selection == nil)
+                    .popover(isPresented: $isShowingPopover) {
+                        Text(getSelectedThreadInfo(selection))
+                            .multilineTextAlignment(.leading)
+                            .lineLimit(16)
+                            .padding()
+                    }
+                    
+                    WebView(webView: myWebView)
+                        .focused($focusedField, equals: .webView)
+                        .focusable()
+                }
             }
             .searchableOptional(text: $searchText, isPresented: $searchIsActive, placement: .sidebar, prompt: "Filter")
-            
+                
             .onChange(of: selection /*, initial: true*/) { newState in
                 openFileSelection(myWebView)
                 //focusedField = .webView

From da782417e78a8c8b37c74066009f32921321205a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 5 Mar 2024 08:13:30 -0800
Subject: [PATCH 620/901] kram-profie - fix typo

---
 kram-profile/kram-profile/kram_profileApp.swift | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index ca02c47b..443a4c6a 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -593,7 +593,7 @@ func loadFileJS(_ path: String) -> String? {
         
         var description: String {
             let duration = Double(endTime - startTime) * 1e-6
-            return "\(id) '\(threadName)' \(duration)s \so(count)x"
+            return "\(id) '\(threadName)' \(duration)s \(count)x"
         }
         
     }

From 0fe6326f256bb8f6c347aa74c58054e646804d3f Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 5 Mar 2024 08:56:02 -0800
Subject: [PATCH 621/901] kram - fix builds to group output for my sanity

The xcodebuild output is voluminous with args.  So group the output.
---
 scripts/cibuild.sh | 61 ++++++++++++++++++++++++++++++++++------------
 1 file changed, 46 insertions(+), 15 deletions(-)

diff --git a/scripts/cibuild.sh b/scripts/cibuild.sh
index 59d18ba3..df65f239 100755
--- a/scripts/cibuild.sh
+++ b/scripts/cibuild.sh
@@ -2,6 +2,9 @@
 
 # note: zsh works on  osx, but not on Win git bash, so using bash
 
+# here is a post about grouping output using echo
+# https://github.com/orgs/community/discussions/25314
+    
 #-----------------------------------------------
 
 # write out the git tag as a version.h file in a 
@@ -52,7 +55,8 @@ fi
 # can't just use cmake .. on osx, cmake gets confused about metal files since language not recognized
 # but Xcode knows how to build these.  I don't want to setup special rule for metal files right now.
 if [[ $buildType == macos ]]; then
-
+    echo "::group::kram-mac"
+    
 	# not using CMake anymore on mac/iOS.  Using custom workspace and projects.
 	#cmake .. -G Xcode
 	# build the release build
@@ -66,31 +70,49 @@ if [[ $buildType == macos ]]; then
 	# build libraries
 	# see here about destination arg
 	# https://github.com/appcelerator/titanium_mobile/pull/13098
-	xcodebuild build -sdk iphoneos -workspace kram.xcworkspace -scheme kram-ios -configuration Release -destination generic/platform=iOS CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
-	xcodebuild build -sdk macosx -workspace kram.xcworkspace -scheme kram -configuration Release -destination generic/platform=macOS CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
-	
+    echo "::group::kram-ios"
+    xcodebuild build -sdk iphoneos -workspace kram.xcworkspace -scheme kram-ios -configuration Release -destination generic/platform=iOS CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
+    echo "::endgroup::"
+ 
+    echo "::group::kram"
+    xcodebuild build -sdk macosx -workspace kram.xcworkspace -scheme kram -configuration Release -destination generic/platform=macOS CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
+    echo "::endgroup::"
+ 
 	# install apps so they are signed
 	# can't specify empty INSTALL_PATH, or xcodebuild succeeds but copies nothing to bin
-	xcodebuild install -sdk macosx -workspace kram.xcworkspace -scheme kramc -configuration Release -destination generic/platform=macOS DSTROOT=${binHolderPath} INSTALL_PATH=bin
+    echo "::group::kramc"
+    xcodebuild install -sdk macosx -workspace kram.xcworkspace -scheme kramc -configuration Release -destination generic/platform=macOS DSTROOT=${binHolderPath} INSTALL_PATH=bin
+    echo "::endgroup::"
+      
+    echo "::group::kramv"
 	xcodebuild install -sdk macosx -workspace kram.xcworkspace -scheme kramv -configuration Release -destination generic/platform=macOS DSTROOT=${binHolderPath} INSTALL_PATH=bin
-
+    echo "::endgroup::"
+    
 	popd
 
 	# build hlslparser to bin directory
 	pushd hlslparser
-	xcodebuild install -sdk macosx -project hlslparser.xcodeproj -configuration Release -destination generic/platform=macOS DSTROOT=${binHolderPath} INSTALL_PATH=bin
+    echo "::group::hlsl-parser"
+    xcodebuild install -sdk macosx -project hlslparser.xcodeproj -configuration Release -destination generic/platform=macOS DSTROOT=${binHolderPath} INSTALL_PATH=bin
+    echo "::endgroup::"
 	popd
 
     # build kram-profile to bin directory
     pushd kram-profile
+    echo "::group::kram-profiler"
     xcodebuild install -sdk macosx -project kram-profile.xcodeproj -configuration Release -destination generic/platform=macOS DSTROOT=${binHolderPath} INSTALL_PATH=bin
+    echo "::endgroup::"
     popd
     
+    echo "::endgroup::"
+    
 elif [[ $buildType == windows ]]; then
-	mkdir -p build
+    echo "::group::kram-win"
+    mkdir -p build
 
 	pushd build
 
+    
 	# DONE: update to VS2022 and use clang
 	cmake .. -G "Visual Studio 17 2022" -T ClangCL -A x64
 
@@ -98,10 +120,11 @@ elif [[ $buildType == windows ]]; then
 	cmake --build . --config Release
 
 	# copy it to bin directory
-	cmake --install . --config Release
-
-	popd
+    cmake --install . --config Release
 
+    popd
+    echo "::endgroup::"
+    
 	# mingW doesn't like running this Win style command line
 	# see here for another method https://github.com/microsoft/setup-msbuild
 	# just added the parser to cmake
@@ -112,11 +135,13 @@ elif [[ $buildType == windows ]]; then
 	#popd
 	
 elif [[ $buildType == linux ]]; then
-	mkdir -p build
+    echo "::group::kram-linux"
+
+    mkdir -p build
 
 	pushd build
 
-	cmake .. 
+    cmake ..
 
 	# build the release build
 	cmake --build . --config Release
@@ -124,12 +149,16 @@ elif [[ $buildType == linux ]]; then
 	# copy it to bin directory
 	cmake --install . --config Release
 
-	popd
+    popd
+    echo "::endgroup::"
+
 fi
 
 
 #---------------------------------
 
+echo "::group::archive"
+
 pushd bin
 
 # put these in with the zip
@@ -143,5 +172,7 @@ if [[ $buildType == windows ]]; then
 else 
 	zip -r "kram-${buildType}.zip" .
 fi
-
+    
 popd
+
+echo "::endgroup::"

From 8d850a0f1b7a29f567b17d9d6bb96a0b121c6709 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 5 Mar 2024 10:26:56 -0800
Subject: [PATCH 622/901] kram-profile - small fix to Info, format it, add
 jsonDetector2

---
 .../kram-profile/kram_profileApp.swift        | 64 +++++++++++++++++--
 1 file changed, 57 insertions(+), 7 deletions(-)

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 443a4c6a..7150a29c 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -81,6 +81,53 @@ import UniformTypeIdentifiers
 // Dealing with available and Swift and SwiftUI.  Ugh.
 // https://www.swiftyplace.com/blog/swift-available#:~:text=Conditional%20Handling%20with%20if%20%23available&text=If%20the%20device%20is%20running%20an%20earlier%20version%20of%20iOS,a%20fallback%20for%20earlier%20versions.
 
+// https://stackoverflow.com/questions/24074479/how-to-create-a-string-with-format
+extension String.StringInterpolation {
+
+    /// Quick formatting for *floating point* values.
+    mutating func appendInterpolation(float: Double, decimals: UInt = 2) {
+        let floatDescription = String(format: "%.\(decimals)f%", float)
+        appendLiteral(floatDescription)
+    }
+
+    /// Quick formatting for *hexadecimal* values.
+    mutating func appendInterpolation(hex: Int) {
+        let hexDescription = String(format: "0x%X", hex)
+        appendLiteral(hexDescription)
+    }
+
+    /// Quick formatting for *percents*.
+    mutating func appendInterpolation(percent: Double, decimals: UInt = 2) {
+        let percentDescription = String(format: "%.\(decimals)f%%", percent * 100)
+        appendLiteral(percentDescription)
+    }
+
+    /// Formats the *elapsed time* since the specified start time.
+    mutating func appendInterpolation(timeSince startTime: TimeInterval, decimals: UInt = 2) {
+        let elapsedTime = CACurrentMediaTime() - startTime
+        let elapsedTimeDescription = String(format: "%.\(decimals)fs", elapsedTime)
+        appendLiteral(elapsedTimeDescription)
+    }
+}
+
+/* usage
+ let number = 1.2345
+ "Float: \(float: number)" // "Float: 1.23"
+ "Float: \(float: number, decimals: 1)" // "Float: 1.2"
+
+ let integer = 255
+ "Hex: \(hex: integer)" // "Hex: 0xFF"
+
+ let rate = 0.15
+ "Percent: \(percent: rate)" // "Percent: 15.00%"
+ "Percent: \(percent: rate, decimals: 0)" // "Percent: 15%"
+
+ let startTime = CACurrentMediaTime()
+ Thread.sleep(forTimeInterval: 2.8)
+ "∆t was \(timeSince: startTime)" // "∆t was 2.80s"
+ "∆t was \(timeSince: startTime, decimals: 0)" // "∆t was 3s"
+ */
+
 private let log = Log("kram-profile")
 
 public func clamp<T>(_ value: T, _ minValue: T, _ maxValue: T) -> T where T : Comparable {
@@ -593,7 +640,7 @@ func loadFileJS(_ path: String) -> String? {
         
         var description: String {
             let duration = Double(endTime - startTime) * 1e-6
-            return "\(id) '\(threadName)' \(duration)s \(count)x"
+            return "\(id) '\(threadName)' \(float: duration, decimals:6)s \(count)x"
         }
         
     }
@@ -712,8 +759,9 @@ func loadFileJS(_ path: String) -> String? {
                 // see if it's binary or json.  If binary, then can't parse duration below
                 // https://forums.swift.org/t/improving-indexing-into-swift-strings/41450/18
                 let jsonDetector = "ewoiZG" // "{\""
-                let firstSixChars = fileContentBase64.prefix(6)
-                let isJson = firstSixChars == jsonDetector
+                let jsonDetector2 = "eyJ0cm" // utf16?
+                let firstTwoChars = fileContentBase64.prefix(6)
+                let isJson = firstTwoChars == jsonDetector || firstTwoChars == jsonDetector2
                 
                 // convert to gzip format, so send less data across to Safari
                 if doCompress {
@@ -1289,10 +1337,12 @@ A tool to help profile mem, perf, and builds.
                     .keyboardShortcut("I", modifiers:.command)
                     .disabled(selection == nil)
                     .popover(isPresented: $isShowingPopover) {
-                        Text(getSelectedThreadInfo(selection))
-                            .multilineTextAlignment(.leading)
-                            .lineLimit(16)
-                            .padding()
+                        ScrollView() {
+                            Text(getSelectedThreadInfo(selection))
+                                .multilineTextAlignment(.leading)
+                                //.lineLimit(16)
+                                .padding()
+                        }
                     }
                     
                     WebView(webView: myWebView)

From 4968c3f0224d123a30f493ab12f01d8d5d4835d2 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 5 Mar 2024 10:40:03 -0800
Subject: [PATCH 623/901] kram-profile - ignore "Free" blocks in info total

---
 kram-profile/kram-profile/kram_profileApp.swift | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 7150a29c..dc1e740a 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -670,10 +670,14 @@ func loadFileJS(_ path: String) -> String? {
                 threadInfos[tid]!.threadName = threadName
             }
             else if event.ts != nil && event.dur != nil {
-                let s = event.ts!
-                let d = event.dur!
-                
-                threadInfos[tid]!.combine(s, d)
+                // using Free blocks to mark the end of the heap, so
+                // don't include them in the totals
+                if event.name != "Free" {
+                    let s = event.ts!
+                    let d = event.dur!
+                    
+                    threadInfos[tid]!.combine(s, d)
+                }
             }
         }
         

From f2776d8e3699d1b11f9cb55f6b932bf99d69ab09 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 5 Mar 2024 10:58:39 -0800
Subject: [PATCH 624/901] kram-profile - fix info

---
 .../kram-profile/kram_profileApp.swift        | 50 +++++++++++++++----
 scripts/cibuild.sh                            |  9 ++--
 2 files changed, 42 insertions(+), 17 deletions(-)

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index dc1e740a..6736a0cd 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -618,6 +618,7 @@ func loadFileJS(_ path: String) -> String? {
         var threadName: String = ""
         var startTime: Int = Int.max
         var endTime: Int = Int.min
+        var endTimeFree: Int = Int.min
         var count: Int = 0
         
         // id doesn't implement Hashable
@@ -632,15 +633,45 @@ func loadFileJS(_ path: String) -> String? {
             return lhs.id < rhs.id
         }
         
-        func combine(_ s: Int, _ d: Int) {
-            startTime = min(startTime, s)
-            endTime = max(endTime, s+d)
+        func combine(_ s: Int, _ d: Int, _ name: String?) {
+            let isFreeBlock = name != nil && name! == "Free"
+            let e = s+d
+            
+            if isFreeBlock {
+                endTimeFree = max(endTimeFree, e)
+                
+                // If all free block, this doesn't work
+                // so update start/endTime assuming first block isn't Free
+                if startTime > endTime {
+                    startTime = min(startTime, s)
+                    endTime = max(endTime, e)
+                }
+            }
+            else {
+                startTime = min(startTime, s)
+                endTime = max(endTime, e)
+            }
+            
             count += 1
         }
         
         var description: String {
             let duration = Double(endTime - startTime) * 1e-6
-            return "\(id) '\(threadName)' \(float: duration, decimals:6)s \(count)x"
+            
+            // TODO: could display freeDuration (heap size)
+            var freeDuration = duration
+            if endTimeFree != Int.min {
+                freeDuration = Double(endTimeFree - startTime) * 1e-6
+            }
+            let percentage = freeDuration > 0.0 ? ((duration / freeDuration) * 100.0) : 0.0
+            
+            // only disply percentage if needed
+            if percentage > 99.9 {
+                return "\(id) '\(threadName)' \(float: duration, decimals:6)s \(count)x"
+            }
+            else {
+                return "\(id) '\(threadName)' \(float: duration, decimals:6)s \(float:percentage, decimals:0)% \(count)x"
+            }
         }
         
     }
@@ -670,14 +701,11 @@ func loadFileJS(_ path: String) -> String? {
                 threadInfos[tid]!.threadName = threadName
             }
             else if event.ts != nil && event.dur != nil {
-                // using Free blocks to mark the end of the heap, so
-                // don't include them in the totals
-                if event.name != "Free" {
-                    let s = event.ts!
-                    let d = event.dur!
+                let s = event.ts!
+                let d = event.dur!
                     
-                    threadInfos[tid]!.combine(s, d)
-                }
+                threadInfos[tid]!.combine(s, d, event.name)
+
             }
         }
         
diff --git a/scripts/cibuild.sh b/scripts/cibuild.sh
index df65f239..bd233584 100755
--- a/scripts/cibuild.sh
+++ b/scripts/cibuild.sh
@@ -3,6 +3,7 @@
 # note: zsh works on  osx, but not on Win git bash, so using bash
 
 # here is a post about grouping output using echo
+# these cannot be nested
 # https://github.com/orgs/community/discussions/25314
     
 #-----------------------------------------------
@@ -55,8 +56,6 @@ fi
 # can't just use cmake .. on osx, cmake gets confused about metal files since language not recognized
 # but Xcode knows how to build these.  I don't want to setup special rule for metal files right now.
 if [[ $buildType == macos ]]; then
-    echo "::group::kram-mac"
-    
 	# not using CMake anymore on mac/iOS.  Using custom workspace and projects.
 	#cmake .. -G Xcode
 	# build the release build
@@ -103,16 +102,14 @@ if [[ $buildType == macos ]]; then
     xcodebuild install -sdk macosx -project kram-profile.xcodeproj -configuration Release -destination generic/platform=macOS DSTROOT=${binHolderPath} INSTALL_PATH=bin
     echo "::endgroup::"
     popd
-    
-    echo "::endgroup::"
-    
+
 elif [[ $buildType == windows ]]; then
+    # this builds kram.exe and thumbnailer
     echo "::group::kram-win"
     mkdir -p build
 
 	pushd build
 
-    
 	# DONE: update to VS2022 and use clang
 	cmake .. -G "Visual Studio 17 2022" -T ClangCL -A x64
 

From 9c1682096083715d928b2a660ea2a2d926d625d6 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 8 Mar 2024 16:20:38 -0800
Subject: [PATCH 625/901] kram-profile - rename DebugType, track manipulator,
 attempts to block drop

The WKWebView has no user overrides of drag/drop, or backspace dropping the page.  Super annoying in an API this old.
---
 .../kram-profile/kram_profileApp.swift        | 144 ++++++++++++++++--
 1 file changed, 135 insertions(+), 9 deletions(-)

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 6736a0cd..fc0160dc 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -26,14 +26,40 @@ import UniformTypeIdentifiers
 // DONE: be nice to focus the search input on cmd+F just to make me happy.  (using cmd+S)
 //  Browser goes to its own search which doesn’t help.
 
+// TODO: sort thread by size - repack the memory graph largest to smallest by reordering each track
+//   then can focus on the bigger values.
+// TODO: Sort by name and convert to count - can then see common counts
+//   so keep the json loaded in Swift.  Can json be cloned and modded?
 
-// TODO: add sort mode for name, time and incorporating dir or not
+// TODO: option to colesce to count and name with sort
+
+// TODO: filter files by mem, perf, build
+// TODO: import zip, and run cba on contents, mmap and decompress each
+//  can use incremental mode?
+// TODO: can't mmap web link, but can load zip off server with timings
+
+// TODO: add/update recent document list (need to hold onto dropped/opened folder)
+
+// TODO: add compressed format, build up Pefetto json or binary from this
+//  have vrious durtion forms.
+//  could have ascii form of below.
+//  flags to identify optional param
+// n nid name // nid is repacked 0..table
+// t tid name // tid is repacked to 0... table
+// s opt(tid nid color) dur opt(time), opt means uses prior tid/nid/color of that tid
+//  (defaults if none).  May need to buffer per thread, top of buffer explicit
+//  then merge with the other buffers, compare last tid data written.
+// s = 1 + 3 + 3 + 8 + 8 + 4 = 29B
+// smin = 1 + 8B
+
+// TODO: block drop onto the WKWebView
+
+// TODO: add list sort mode for name, time and incorporating dir or not
 // TODO: fix the js wait, even with listener, there's still a race
 //    maybe there's some ServiceWorker still loading the previous json?
 //    Perfetto is using a ServiceWorker, Safari uses those now, and ping/pong unware.
 // TODO: still getting web race condition.  Perfetto is trying to
 //  load the previous file, and we’re sending a new one.
-// TODO: add/update recent document list (need to hold onto dropped/opened folder)
 // TODO: have a way to reload dropped folder (not just subfiles)
 // TODO: nav title and list item text is set before duration is computed
 //  need some way to update that.
@@ -256,7 +282,7 @@ func lookupFile(selection: String) -> File {
 func updateFileCache(file: File) {
     fileCache[file.url] = file
 }
-  
+
 class MyWebView : WKWebView {
     
     // So that keyboard events are routed
@@ -287,7 +313,28 @@ class MyWebView : WKWebView {
         // or this ?
         // return super.performKeyEquivalent(with: event)
         
+        
    }
+    
+    /* still not working
+    override func performDragOperation(_ sender: NSDraggingInfo) -> Bool {
+        //let myWebView = superview as! MyWebView
+        //if let dropDelegate = myWebView.dropDelegate {
+        //    return dropDelegate.webView(myWebView, performDragOperation: sender)
+        //}
+        return false
+    }
+    
+    // really want to shim drop
+    func shimDrag() {
+        // https://stackoverflow.com/questions/25096910/receiving-nsdraggingdestination-messages-with-a-wkwebview
+        
+        // Override the performDragOperation: method implemented on WKView so that we may get drop notification.
+        let originalMethod = class_getInstanceMethod(object_getClass(subviews[0]), #selector(NSDraggingDestination.performDragOperation(_:)))
+        let overridingMethod = class_getInstanceMethod(object_getClass(self), #selector(NSDraggingDestination.performDragOperation(_:)))
+            method_exchangeImplementations(originalMethod!, overridingMethod!)
+    }
+    */
 }
 
 
@@ -306,6 +353,7 @@ func newWebView(request: URLRequest) -> WKWebView {
     
     // here frame is entire screen
     let webView = MyWebView(frame: .zero, configuration: configuration)
+    webView.shimDrag()
     
     // The page is complaining it's going to lose the data if fwd/back hit
     webView.allowsBackForwardNavigationGestures = false
@@ -498,6 +546,7 @@ func showTimeRangeJS(_ timeRange: TimeRange) -> String? {
 
     do {
         struct PerfettoTimeRange: Codable {
+            // Note: can use Decimal for BigInt style
             // Pass the values to Perfetto in seconds.
             var timeStart: Double // in seconds
             var timeEnd: Double
@@ -613,7 +662,7 @@ func loadFileJS(_ path: String) -> String? {
     }
     
     class ThreadInfo : Hashable, Equatable, Comparable {
-       
+        
         var id: Int = 0
         var threadName: String = ""
         var startTime: Int = Int.max
@@ -676,10 +725,67 @@ func loadFileJS(_ path: String) -> String? {
         
     }
     
-    // parse json trace
-    func updateThreadInfo(_ catapultProfile: CatapultProfile, _ file: inout File) {
+    func sortByName(_ catapultProfile: inout CatapultProfile) {
         
+        var threads: [Int: [Int]] = [:]
         
+        // first sort each thread by
+        for i in 0..<catapultProfile.traceEvents!.count {
+            let event = catapultProfile.traceEvents![i]
+            
+            guard let tid = event.tid else { continue }
+            if event.ts == nil || event.dur == nil { continue }
+            
+            if event.name != nil && (event.name! == "thread_name" || event.name! == "process_name") {
+                continue
+            }
+            
+            if threads[tid] == nil {
+                threads[tid] = []
+            }
+            // just store the even index
+            threads[tid]!.append(i)
+        }
+        
+        for var thread in threads.values {
+            // TODO: want to buble the top allocators by count or mem
+            // to the front of the list.  Once have names sorted
+            // then can group totals
+            
+            // sort each thread by name then dur
+            thread = thread.sorted {
+                let lval = catapultProfile.traceEvents![$0]
+                let rval = catapultProfile.traceEvents![$1]
+                
+                let lname = lval.name ?? ""
+                let rname = lval.name ?? ""
+                
+                if lname < rname {
+                    return true
+                }
+                return lval.dur! < rval.dur!
+            }
+                          
+            // rewrite the start of the events
+            // Note this 0's them out, but could preserve min startTime
+            var startTime = 0
+            for i in thread {
+                catapultProfile.traceEvents![i].ts = startTime
+                startTime += catapultProfile.traceEvents![i].dur!
+            }
+            
+            // combine nodes, and store a count into the name
+            // easier to mke a new array, and replace the other
+            //var combineIndex = 0
+           // for i in 1..<catapultProfile.traceEvents![i]
+            
+        }
+        
+        // have option to consolidate and rename, but must remove nodes
+    }
+
+    // parse json trace
+    func updateThreadInfo(_ catapultProfile: CatapultProfile, _ file: inout File) {
         // was using Set<>, but having trouble with lookup
         var threadInfos: [Int: ThreadInfo] = [:]
         
@@ -690,7 +796,7 @@ func loadFileJS(_ path: String) -> String? {
             guard let tid = event.tid else { continue }
             
             if threadInfos[tid] == nil {
-                var info = ThreadInfo()
+                let info = ThreadInfo()
                 info.id = tid
                 
                 threadInfos[tid] = info
@@ -705,7 +811,6 @@ func loadFileJS(_ path: String) -> String? {
                 let d = event.dur!
                     
                 threadInfos[tid]!.combine(s, d, event.name)
-
             }
         }
         
@@ -854,7 +959,8 @@ func loadFileJS(_ path: String) -> String? {
                 for i in 0..<catapultProfile.traceEvents!.count {
                     let event = catapultProfile.traceEvents![i]
                     if event.name == "Source" ||
-                        event.name == "OptModule"
+                        event.name == "OptModule" ||
+                        event.name == "DebugType" // these take a while
                     {
                         // This is a path
                         let detail = event.args!["detail"]!.value as! String
@@ -956,14 +1062,34 @@ func loadFileJS(_ path: String) -> String? {
                 
                 // was win, but use window instead
                 win.postMessage(obj,'\(ORIGIN)');
+        
+                    // ugh, document doesnt 'work either
+                    if (false) {
+                        // tried adding to various parts above.  Need to install
+                        // after the page is open, but this doesn't override the default
+                        // turn off drop handling, or it won't fixup json or appear in list
+                        // This doesn't work
+                        window.addEventListener('drop', function(e) {
+                          e.preventDefault();
+                          e.stopPropagation();
+                        });
+                        window.addEventListener('dragover', function(e) {
+                          e.preventDefault();
+                          e.stopPropagation();
+                        });
+                    }
             }
         
             window.addEventListener('message', onMessageHandler);
+        
+            
         }
         
         waitForUI(obj);
         """
         
+       
+        
         return script
     } catch {
         log.error(error.localizedDescription)

From 7e36556ba49650ff3d8fad0c5262e84ebe34aea8 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 8 Mar 2024 16:33:44 -0800
Subject: [PATCH 626/901] kram-profile - fix build

---
 kram-profile/kram-profile/kram_profileApp.swift | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index fc0160dc..08fbe2d7 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -959,8 +959,7 @@ func loadFileJS(_ path: String) -> String? {
                 for i in 0..<catapultProfile.traceEvents!.count {
                     let event = catapultProfile.traceEvents![i]
                     if event.name == "Source" ||
-                        event.name == "OptModule" ||
-                        event.name == "DebugType" // these take a while
+                        event.name == "OptModule"
                     {
                         // This is a path
                         let detail = event.args!["detail"]!.value as! String
@@ -972,7 +971,8 @@ func loadFileJS(_ path: String) -> String? {
                     else if event.name == "InstantiateFunction" ||
                                 event.name == "InstantiateClass" ||
                                 event.name == "OptFunction" ||
-                                event.name == "ParseClass"
+                                event.name == "ParseClass" ||
+                                event.name == "DebugType" // these take a while
                     {
                         // This is a name
                         let detail = event.args!["detail"]!.value as! String

From 98f46d73a9e944cf73fd6a030dfdcbd8a06f146b Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 8 Mar 2024 16:33:58 -0800
Subject: [PATCH 627/901] kram-profile - fix build

---
 kram-profile/kram-profile/kram_profileApp.swift | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 08fbe2d7..22e476a1 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -353,7 +353,7 @@ func newWebView(request: URLRequest) -> WKWebView {
     
     // here frame is entire screen
     let webView = MyWebView(frame: .zero, configuration: configuration)
-    webView.shimDrag()
+    //webView.shimDrag()
     
     // The page is complaining it's going to lose the data if fwd/back hit
     webView.allowsBackForwardNavigationGestures = false

From 3169d7ad8909f0829bdb0f12156265e3cfc8ab84 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 9 Mar 2024 15:45:12 -0800
Subject: [PATCH 628/901] kram-profile - cleanup sort/search

Still not working, but close.
---
 .../kram-profile/kram_profileApp.swift        | 375 ++++++++++++++----
 1 file changed, 306 insertions(+), 69 deletions(-)

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 22e476a1..a5025e0e 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -6,6 +6,10 @@ import SwiftUI
 import WebKit
 import UniformTypeIdentifiers
 
+//import CoreData
+//import SwiftData
+
+
 // https://github.com/gualtierofrigerio/WkWebViewJavascript/blob/master/WkWebViewJavascript/WebViewHandler.swift
 
 // https://levelup.gitconnected.com/how-to-use-wkwebview-on-mac-with-swiftui-10266989ed11
@@ -44,13 +48,26 @@ import UniformTypeIdentifiers
 //  have vrious durtion forms.
 //  could have ascii form of below.
 //  flags to identify optional param
-// n nid name // nid is repacked 0..table
-// t tid name // tid is repacked to 0... table
-// s opt(tid nid color) dur opt(time), opt means uses prior tid/nid/color of that tid
+// 4B leader
+// n len nid name  // nid is repacked 0..table
+// t len tid name  // tid is repacked to 0... table
+// s len sid name  // symbols
+// i tid tmin tmax count // have this written at end of file for each thread
+// f fid nid line nid (file line func)
+// r rid len sid sid sid sid
+// s opt(tid nid fid color) dur opt(time), opt means uses prior tid/nid/color of that tid
 //  (defaults if none).  May need to buffer per thread, top of buffer explicit
 //  then merge with the other buffers, compare last tid data written.
 // s = 1 + 3 + 3 + 8 + 8 + 4 = 29B
 // smin = 1 + 8B
+// need a way to tag file/line, and count into the dump
+
+// timings can delta encoded, but with ts/dur a parent scope writes after.
+// they aren't ordered by startTime.  Also missing any unclosed scoping.
+// compared to -t +t sequences.  Note -0 = 0, so 0 is an open scope (check <=0 )
+
+// 4-bit, 12-bit, 16-bit, variable, pad to 4B
+
 
 // TODO: block drop onto the WKWebView
 
@@ -107,6 +124,58 @@ import UniformTypeIdentifiers
 // Dealing with available and Swift and SwiftUI.  Ugh.
 // https://www.swiftyplace.com/blog/swift-available#:~:text=Conditional%20Handling%20with%20if%20%23available&text=If%20the%20device%20is%20running%20an%20earlier%20version%20of%20iOS,a%20fallback%20for%20earlier%20versions.
 
+// Video about Combine
+// https://www.youtube.com/watch?v=TshpcKZmma8
+
+class FileSearcher: ObservableObject {
+//    enum SortOption {
+//        case name, duration
+//    }
+    
+    @Published var searchIsActive = false
+    @Published var searchText = ""
+    
+    var files: [File] = []
+        
+    @Published var filesSearched: [File] = []
+    //@Published var sortOption = SortOption.duration // name
+    
+    // duplicate code, but init() doesn't have self defined
+    func updateFilesSearched(_ sortByDuration: Bool = false) {
+        // may not want to sort everytime, or the list will change as duration is updated
+        // really want to do this off a button, and then set files to that
+        let sortedResults = files.sorted {
+            if !sortByDuration || $0.duration == $1.duration {
+                return $0.id < $1.id
+            }
+            return $0.duration < $1.duration
+        }
+        
+        if searchText.isEmpty || sortedResults.count <= 1  {
+            filesSearched = sortedResults
+        }
+        else if searchText.count == 1 {
+            let lowercaseSearchText = searchText.lowercased()
+            let uppercaseSearchText = searchText.uppercased()
+
+            filesSearched = sortedResults.filter {
+                $0.name.starts(with: uppercaseSearchText) ||
+                $0.name.starts(with: lowercaseSearchText)
+            }
+        }
+        else {
+            // is search text multistring?
+            $searchText.map { searchText in
+                sortedResults.filter { file in
+                    // Here use the name, or else the directory will have say "kram" in it and filter will trigger for all files "kr"
+                    file.name.localizedCaseInsensitiveContains(searchText)
+                }
+            }
+            .assign(to: &$filesSearched)
+        }
+    }
+}
+
 // https://stackoverflow.com/questions/24074479/how-to-create-a-string-with-format
 extension String.StringInterpolation {
 
@@ -195,7 +264,7 @@ struct File: Identifiable, Hashable, Equatable, Comparable
     let url: URL
     let shortDirectory: String
     
-    var duration: Double?
+    var duration = 0.0
     var modStamp: Date?
     var loadStamp: Date?
     
@@ -232,8 +301,9 @@ func generateName(file: File) -> String {
 
 func generateDuration(file: File) -> String {
     let f = lookupFile(url: file.url)
-    if f.duration != nil {
-        return String(format:"%0.3f", f.duration!) // sec vis to ms for now
+    if f.duration != 0.0 {
+        // TODO: may want to add s/mb based on file type
+        return String(format:"%0.3f", f.duration) // sec vis to ms for now
     }
     else {
         return ""
@@ -472,7 +542,14 @@ extension View {
             return self
         }
     }
-    
+    public func disableAlternatingRowBackgrondsOptional() -> some View {
+        if #available(macOS 14.0, *) {
+            return self.alternatingRowBackgrounds(.disabled) // skip the row coloring
+        }
+        else {
+            return self
+        }
+    }
     /*
     // This one is hard to wrap, since KeyPress.result is macOS 14.0 only
     public func onKeyPressOptional(_ key: KeyEquivalent, action: @escaping () -> KeyPress.Result) -> some View {
@@ -907,7 +984,7 @@ func loadFileJS(_ path: String) -> String? {
                 }
                 
                 // walk the file and compute the duration if we don't already have it
-                if isJson && file.duration == nil {
+                if isJson && file.duration == 0.0 {
                     let decoder = JSONDecoder()
                     let catapultProfile = try decoder.decode(CatapultProfile.self, from: fileContent)
                     
@@ -981,7 +1058,7 @@ func loadFileJS(_ path: String) -> String? {
                 }
                 
                 // walk the file and compute the duration if we don't already have ti
-                if file.duration == nil {
+                if file.duration == 0.0 {
                     updateDuration(catapultProfile, &file)
                     
                     // For now, just log the per-thread info
@@ -1109,7 +1186,9 @@ class AppDelegate: NSObject, NSApplicationDelegate {
 
 @main
 struct kram_profileApp: App {
-    @State private var files: [File] = []
+    //@State private var files: [File] = []
+    
+    // @State private var files: [File] = []
     @State private var selection: String?
     
     // close app when last window is
@@ -1206,7 +1285,7 @@ struct kram_profileApp: App {
         Font.custom("Inter Variable", size: 14)
         .monospaced()
     
-    func openFilesFromURLs(urls: [URL]) {
+    func openFileFromURLs(urls: [URL]) {
         // turning this off for now
         let mergeFiles = false
         
@@ -1216,24 +1295,21 @@ struct kram_profileApp: App {
             // for now wipe the old list
             if filesNew.count > 0 {
                 if mergeFiles {
-                    files = Array(Set(files + filesNew))
+                    fileSearcher.files = Array(Set(fileSearcher.files + filesNew))
                 }
                 else {
                     // reset the list
-                    files = filesNew
+                    fileSearcher.files = filesNew
                 }
                 
-                // for some reason, their listed out in pretty random order
-                // TODO: add different sorts - id, name, size.  id is default
-                // which is the full url
-                files.sort()
+                fileSearcher.updateFilesSearched()
                 
-                log.debug("found \(files.count) files")
+                log.debug("found \(fileSearcher.files.count) files")
                 
                 // preserve the original selection if still present
                 if selection != nil {
                     var found = false
-                    for file in files {
+                    for file in fileSearcher.files {
                         if file.id == selection {
                             found = true
                             break;
@@ -1242,12 +1318,12 @@ struct kram_profileApp: App {
                     
                     // load first file in the list
                     if !found {
-                        selection = files[0].id
+                        selection = fileSearcher.files[0].id
                     }
                 }
                 else {
                     // load first file in the list
-                    selection = files[0].id
+                    selection = fileSearcher.files[0].id
                 }
             }
         }
@@ -1278,7 +1354,7 @@ struct kram_profileApp: App {
         
         panel.begin { reponse in
             if reponse == .OK {
-                openFilesFromURLs(urls: panel.urls)
+                openFileFromURLs(urls: panel.urls)
             }
         }
     }
@@ -1387,41 +1463,101 @@ A tool to help profile mem, perf, and builds.
     // Allocating here only works for a single Window, not for WindowGroup
     @State var myWebView = newWebView(request: URLRequest(url:URL(string: ORIGIN + "/?hideSidebar=true")!))
     
+    // Focus state says which panel has keyboard routing
     enum Field: Hashable {
         case webView
         case listView
     }
     @FocusState private var focusedField: Field?
-    @State var keyMonitor: Any?
         
+    // Description of CoreData/SwiftData and how it works
+    // https://davedelong.com/blog/2021/04/03/core-data-and-swiftui/
+    // CoreData/SwiftData types
+    //@FetchRequest(sortDescriptors:
+    //    [SortDescriptor(\.name, order: .reverse),
+    //     SortDescriptor(\.duration, order: .reverse),], animation: .default)
+    //
+    //private var videos: FetchedResults<File>
+    //private var videos: NSFetchedResultsController<File>
+
+    // Can also use to sort by multiple value
+//    @State private var sortOrderName = [
+//        KeyPathComparator(\File.name) // might need to sort by id (full url)
+//    ]
+//
+//    @State private var sortOrderDuration = [
+//        KeyPathComparator(\File.duration)
+//    ]
+//    
+//    @State private var sortOrderCurrent = [
+//        KeyPathComparator(\File.name) // might need to sort by id (full url)
+//    ]
+   
+    // List sort picker
+    // https://xavier7t.com/swiftui-list-with-sort-options
+    
+    
+//    var sortedFiles: [Task] {
+//        switch sortOption {
+//        case .name:
+//            return tasks.sorted { $0.name < $1.name }
+//        case .dueDate:
+//            return tasks.sorted { $0.dueDate < $1.dueDate }
+//        }
+//    }
+    
     // https://developer.apple.com/documentation/swiftui/adding-a-search-interface-to-your-app
     // can filter list items off this
-    // TODO: Rename to filterText, ...
+    // TODO: Rename var to filterText, ...
     // Note: this filter needs macOS14
-    @State private var searchText: String = ""
-    @State private var searchIsActive = false
-    var searchResults: [File] {
-        if searchText.isEmpty {
-            return files
-        }
-        else if searchText.count == 1 {
-            // Some items with k in the rest of the name will be filtered
-            // but will appear with more characters
-            let lowercaseSearchText = searchText.lowercased()
-            let uppercaseSearchText = searchText.uppercased()
-            
-            return files.filter {
-                $0.name.starts(with: uppercaseSearchText) ||
-                $0.name.starts(with: lowercaseSearchText)
-            }
+    //@State private var searchText: String = ""
+    @StateObject var fileSearcher = FileSearcher()
+    // this is a var that executes code when called?
+//    private var searchResults: [File] {
+//        var results = fileSearcher.filesSearched
+//        
+        /*
+        if fileSearcher.searchText.isEmpty {
+            results = fileSearcher.files
         }
+//        else if fileSearcher.searchText.count == 1 {
+//            // Some items with k in the rest of the name will be filtered
+//            // but will appear with more characters
+//            let lowercaseSearchText = searchText.lowercased()
+//            let uppercaseSearchText = searchText.uppercased()
+//            
+//            results = oo.files.filter {
+//                $0.name.starts(with: uppercaseSearchText) ||
+//                $0.name.starts(with: lowercaseSearchText)
+//            }
+//        }
         else {
-            return files.filter {
-                // Here use the name, or else the directory will have say "kram" in it
-                $0.name.localizedCaseInsensitiveContains(searchText)
-            }
+//            results = files.filter {
+//                // Here use the name, or else the directory will have say "kram" in it and filter will trigger for all files "kr"
+//                $0.name.localizedCaseInsensitiveContains(searchText)
+//            }
+            results = fileSearcher.filesSearched
         }
-    }
+        
+        // This sort isn't reflected in list
+//        if sortOption == .duration {
+//            results.sort(by: {
+//                if $0.duration == $1.duration {
+//                    return $0.id < $1.id
+//                }
+//                return $0.duration > $1.duration
+//            })
+//        }
+        
+        //results.sort(using: sortOrderCurrent)
+         */
+//        return results
+//    }
+    
+    // https://stackoverflow.com/questions/70652964/how-to-search-a-table-using-swiftui-on-macos
+   
+   
+
     
     // TODO: do this when building the searchResults
     // can do O(N) then and mark which items need separator
@@ -1450,14 +1586,91 @@ A tool to help profile mem, perf, and builds.
         return lookupFile(selection: selection!).threadInfo
     }
     
+    func isMemoryFileType(_ selection: String?) -> Bool {
+        if selection == nil {
+            return false
+        }
+        // TODO: store type with name, then can have icon too
+        return filenameToType(selection!) == .Memory
+    }
+    
+    // iOS can go compact reduces to 1 column
+    // can add as a subheading of the text then
+//    private var isCompact: Bool {
+//        horizontalSizeClass == .compact
+//      }
+    
+    // here's about inheriting from CoreData, @FetchRquest, NSManagedObject, @NSManaged, etc.
+    // so it's retained, and not dealing with silly Swift struct getting destroyed.
+    // and using a class.
+    // https://useyourloaf.com/blog/swiftui-tables-quick-guide/
+    
+    // Comparators aren't same as KeyPathComparator, ugh
+    // https://useyourloaf.com/blog/custom-sort-comparators/
+    
     var body: some Scene {
         
         // TODO: WindowGroup brings up old windows which isn't really what I want
     
         Window("Main", id: "main") {
             NavigationSplitView() {
+                #if false
+                // https://www.swiftyplace.com/blog/chy7hvne
+                Table(fileSearcher.filesSearched, selection:$selection, sortOrder:$sortOrderCurrent) {
+                    
+                    // This displays the sort caret
+                    TableColumn("name", value:\.name)
+                    
+                    // This doesn't not display the caret
+//                    TableColumn("name" /*, comparator: $sortOrderName */) { searchResult in
+//                        Text(generateName(file: searchResult))
+//                        .help(searchResult.shortDirectory)
+//                        .truncationMode(.tail)
+//                    }
+                    //.width(min: 20, ideal: 180, max: 180)
+                    .width(170)
+                    
+                    // This has to format Double
+                    TableColumn("range" /*, comparator: $sortOrderDuration*/) { searchResult in
+                        Text(generateDuration(file: searchResult))
+                            .frame(maxWidth: 60, alignment: .trailing)
+                
+                    }
+                    // last column will take up remaining space, but that's dumb
+                    // then it can be resizes to too much space
+                    //.width(min: 20, ideal: 60, max: 70)
+                    .width(60)
+                }
+                .navigationSplitViewColumnWidth(min: 40, ideal: 260, max: 260)
+                .tableStyle(.inset) // more space
+                // macOS 14
+                .disableAlternatingRowBackgrondsOptional()// skip the row coloring
+                
+                // can customize each row
+                //rows: {
+                // }
+                // This is macOS14 too
+//                .onChange(of: sortOrder, initial:false) { newOrder, cond in
+//                    //self.searchResults.sort(using: newOrder)
+//                }
+//                .onChange(of: sortOrderCurrent) { newOrder in
+//                    //self.searchResults.sort(using: newOrder)
+//                    sortOrderCurrent = newOrder
+//                }
+                //.help(file.shortDirectory)
+                .focused($focusedField, equals: .listView)
+                .focusable()
+                
+                #else
                 VStack {
-                    List(searchResults, selection:$selection) { file in
+                    // Poor mans table
+                    // spaces needed, or it's right against left edge
+//                    Picker("   Sort By", selection: $fileSearcher.sortOption) {
+//                        Text("Name").tag(FileSearcher.SortOption.name)
+//                        Text("Range").tag(FileSearcher.SortOption.duration)
+//                    }.pickerStyle(SegmentedPickerStyle())
+                    
+                    List(fileSearcher.filesSearched, selection:$selection) { file in
                         HStack() {
                             // If number is first, then that's all SwiftUI
                             // uses for typeahead list search.  Seems to
@@ -1469,46 +1682,64 @@ A tool to help profile mem, perf, and builds.
                             Text(generateName(file: file))
                                 .help(file.shortDirectory)
                                 .truncationMode(.tail)
-                            
+                                
                             Text(generateDuration(file: file))
                                 .frame(maxWidth: 70)
                             //.alignment(.trailing)
                                 .font(durationFont)
-                            
                         }
-                        .listRowSeparator(isSeparatorVisible(file, searchResults) ? .visible : .hidden)
-                        .listRowSeparatorTint(.white)
                         
+// everytime a duration is updated, need to resort the list
+// need icon to indicate if file is stale or deleted too
+//                        .onChange(of: sortOption) { newSort in
+//                            // This sort isn't reflected in list
+//                            if newSort == .duration {
+//                                self.searchResults.sort(by: {
+//                                    if $0.duration == $1.duration {
+//                                        return $0.id < $1.id
+//                                    }
+//                                    return $0.duration > $1.duration
+//                                })
+//                            }
+//                        }
+                        .listRowSeparator(isSeparatorVisible(file, fileSearcher.filesSearched) ? .visible : .hidden)
+                        .listRowSeparatorTint(.white)
                     }
                     .focused($focusedField, equals: .listView)
                     .focusable()
                 }
-                
+                #endif
             }
             detail: {
                 VStack {
                     // This button conveys data Perfetto does not
                     // It's basically a hud.
-                    Button("Info") {
-                        self.isShowingPopover.toggle()
-                    }
-                    .keyboardShortcut("I", modifiers:.command)
-                    .disabled(selection == nil)
-                    .popover(isPresented: $isShowingPopover) {
-                        ScrollView() {
-                            Text(getSelectedThreadInfo(selection))
-                                .multilineTextAlignment(.leading)
-                                //.lineLimit(16)
-                                .padding()
+                    HStack {
+                        // only display if FileType.Memory
+                        if isMemoryFileType(selection) {
+                            Button("Info") {
+                                self.isShowingPopover.toggle()
+                            }
+                            .keyboardShortcut("I", modifiers:.command)
+                            .disabled(selection == nil)
+                            .popover(isPresented: $isShowingPopover) {
+                                ScrollView() {
+                                    Text(getSelectedThreadInfo(selection))
+                                        .multilineTextAlignment(.leading)
+                                    //.lineLimit(16)
+                                        .padding()
+                                }
+                            }
                         }
                     }
-                    
                     WebView(webView: myWebView)
                         .focused($focusedField, equals: .webView)
                         .focusable()
                 }
             }
-            .searchableOptional(text: $searchText, isPresented: $searchIsActive, placement: .sidebar, prompt: "Filter")
+            
+            // This only seems to display with List, not with Table.  Or it's under Table
+            .searchableOptional(text: $fileSearcher.searchText, isPresented: $fileSearcher.searchIsActive, placement: .sidebar, prompt: "Filter")
                 
             .onChange(of: selection /*, initial: true*/) { newState in
                 openFileSelection(myWebView)
@@ -1516,12 +1747,12 @@ A tool to help profile mem, perf, and builds.
             }
             .navigationTitle(generateNavigationTitle(selection))
             .onOpenURL { url in
-                openFilesFromURLs(urls: [url])
+                openFileFromURLs(urls: [url])
                 //focusedField = .webView
             }
             .dropDestination(for: URL.self) { (items, _) in
                 // This acutally works!
-                openFilesFromURLs(urls: items)
+                openFileFromURLs(urls: items)
                 //focusedField = .webView
                 return true
             }
@@ -1546,6 +1777,12 @@ A tool to help profile mem, perf, and builds.
                 }
                 .keyboardShortcut("G")
                 
+                Button("Sort Range") {
+                    fileSearcher.updateFilesSearched(true)
+                }
+                .keyboardShortcut("T")
+                //.disabled()
+                
                 Button("Reload File") {
                     openFileSelection(myWebView)
                 }
@@ -1555,7 +1792,7 @@ A tool to help profile mem, perf, and builds.
                 // These work even if the list view is collapsed
                 Button("Prev File") {
                     if selection != nil {
-                        selection = selectFile(selection, searchResults, false)
+                        selection = selectFile(selection, fileSearcher.filesSearched, false)
                         
                         // TODO: no simple scrollTo, since this is all React style
                         // There is a ScrollViewReader, but valud only usable within
@@ -1566,12 +1803,12 @@ A tool to help profile mem, perf, and builds.
                 
                 Button("Next File") {
                     if selection != nil {
-                        selection = selectFile(selection, searchResults, true)
+                        selection = selectFile(selection, fileSearcher.filesSearched, true)
                     }
                 }
                 .keyboardShortcut("N", modifiers:[.command])
                 .disabled(selection == nil)
-            
+                
                 // TODO: these may need to be attached to detail view
                 // The list view eats them, and doesn't fwd onto the web view
                 

From 80a1d15d311369f4b2487549b61c7bc4ce8c45fd Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 9 Mar 2024 18:11:46 -0800
Subject: [PATCH 629/901] kram-profile - cleanup cruft

---
 kram-profile/README.md                        |  23 +-
 .../kram-profile/kram_profileApp.swift        | 218 ++++++------------
 2 files changed, 88 insertions(+), 153 deletions(-)

diff --git a/kram-profile/README.md b/kram-profile/README.md
index c7fa73d5..ae91fa70 100644
--- a/kram-profile/README.md
+++ b/kram-profile/README.md
@@ -41,10 +41,29 @@ Cpu Profilers. See for more details
 * Flutter (using Perfetto) https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview#heading=h.yr4qxyxotyw
 * Optick - https://github.com/bombomby/optick
 * Tracy - https://github.com/wolfpld/tracy
+
+* ClangBuildAnalyzer - https://github.com/aras-p/ClangBuildAnalyzer
+* Microprofile 
+* Microprofile 2
+* Microprofiler
+* EasyProfiler
+* VerySleepy
+* LukeStackwalker
+* geiger
+* Palanteer
+* Intel IACA
+* Coz
+* heaptrack
+* hotspot
+* dprofiler
+* spall
+
+* Commercial
+* Telemetry - httpd://www.radgametools.com/telemeetry.htm
+* Superluminal -
 * Xcode Instruments - see Xcode
 * AMD Code Analyst - see Xcode
-* Intel Vtune
-* ClangBuildAnalyzer - https://github.com/aras-p/ClangBuildAnalyzer
+* Intel Vtune -
 
 Gpu Profilers. See for more details
 
diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index a5025e0e..66d49f31 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -6,10 +6,6 @@ import SwiftUI
 import WebKit
 import UniformTypeIdentifiers
 
-//import CoreData
-//import SwiftData
-
-
 // https://github.com/gualtierofrigerio/WkWebViewJavascript/blob/master/WkWebViewJavascript/WebViewHandler.swift
 
 // https://levelup.gitconnected.com/how-to-use-wkwebview-on-mac-with-swiftui-10266989ed11
@@ -37,7 +33,7 @@ import UniformTypeIdentifiers
 
 // TODO: option to colesce to count and name with sort
 
-// TODO: filter files by mem, perf, build
+// DONE: sort files by range
 // TODO: import zip, and run cba on contents, mmap and decompress each
 //  can use incremental mode?
 // TODO: can't mmap web link, but can load zip off server with timings
@@ -69,9 +65,11 @@ import UniformTypeIdentifiers
 // 4-bit, 12-bit, 16-bit, variable, pad to 4B
 
 
-// TODO: block drop onto the WKWebView
+// TODO: can't block drop onto the WKWebView
+// TODO: can't overide "delete" key doing a back in the WKWebView history
+//    Perfetto warns that content will be lost
 
-// TODO: add list sort mode for name, time and incorporating dir or not
+// DONE: add list sort mode for name, range
 // TODO: fix the js wait, even with listener, there's still a race
 //    maybe there's some ServiceWorker still loading the previous json?
 //    Perfetto is using a ServiceWorker, Safari uses those now, and ping/pong unware.
@@ -90,9 +88,7 @@ import UniformTypeIdentifiers
 //   instead of the entire file.
 // TODO: can't type ahead search in the list while the webview is loading (f.e. e will advance)
 //    but arrow keys work to move to next
-// TODO: can't overide "delete" key doing a back in the WKWebView history
-//    Perfetto warns that content will be lost
-// TODO: track duration would be useful (esp. for memory traces)
+// DONE: track duration would be useful (esp. for memory traces)
 //    Would have to modify the thread_name, and process the tid and timings
 //    Better if Perfetto could display this
 // TODO: list view type ahead search doesn't work unless name is the first Text entry
@@ -127,18 +123,24 @@ import UniformTypeIdentifiers
 // Video about Combine
 // https://www.youtube.com/watch?v=TshpcKZmma8
 
+// Description of CoreData/SwiftData and how it works
+// https://davedelong.com/blog/2021/04/03/core-data-and-swiftui/
+//
+// List sort picker
+// https://xavier7t.com/swiftui-list-with-sort-options
+//
+// https://stackoverflow.com/questions/70652964/how-to-search-a-table-using-swiftui-on-macos
+//
+// https://developer.apple.com/documentation/swiftui/adding-a-search-interface-to-your-app
+// can filter list items off this
+
 class FileSearcher: ObservableObject {
-//    enum SortOption {
-//        case name, duration
-//    }
-    
     @Published var searchIsActive = false
     @Published var searchText = ""
     
     var files: [File] = []
         
     @Published var filesSearched: [File] = []
-    //@Published var sortOption = SortOption.duration // name
     
     // duplicate code, but init() doesn't have self defined
     func updateFilesSearched(_ sortByDuration: Bool = false) {
@@ -148,7 +150,8 @@ class FileSearcher: ObservableObject {
             if !sortByDuration || $0.duration == $1.duration {
                 return $0.id < $1.id
             }
-            return $0.duration < $1.duration
+            // TODO: may want to also search by last
+            return $0.duration > $1.duration
         }
         
         if searchText.isEmpty || sortedResults.count <= 1  {
@@ -256,8 +259,8 @@ func buildShortDirectory(url: URL) -> String {
     return str
 }
 
-// TODO: may want to make a class.
-struct File: Identifiable, Hashable, Equatable, Comparable
+// DONE: may want to make a class.
+class File: Identifiable, /*Hashable, */ Equatable, Comparable
 {
     var id: String { url.absoluteString }
     var name: String { url.lastPathComponent }
@@ -285,7 +288,7 @@ struct File: Identifiable, Hashable, Equatable, Comparable
     }
     
     // call this when the file is loaded
-    public mutating func setLoadStamp()  {
+    public func setLoadStamp()  {
         loadStamp = modStamp
     }
     public func isReloadNeeded() -> Bool {
@@ -374,17 +377,14 @@ class MyWebView : WKWebView {
             return false
         }
         
-        // delete is still unloading the currently loaded page.  Augh!!!
+        // TODO: delete is still unloading the currently loaded page.  Augh!!!
         
         // true means it doesn't bonk, but WKWebView still gets to
         // respond to the keys.  Ugh.  Stupid system.
-        return true
-        
-        // or this ?
-        // return super.performKeyEquivalent(with: event)
+        // return true
         
-        
-   }
+        return super.performKeyEquivalent(with: event)
+    }
     
     /* still not working
     override func performDragOperation(_ sender: NSDraggingInfo) -> Bool {
@@ -487,6 +487,7 @@ class MyMTKView: MTKView {
 
 // wraps MTKView (NSView) into SwiftUI, so it can be a part of the hierarcy,
 // updateNSView called when app foreground/backgrounded, or the size is changed
+// also look at Tracy server
 struct MTKViewWrapper: NSViewRepresentable {
     var mtkView: MyMTKView
     
@@ -610,6 +611,8 @@ func filenameToTimeRange(_ filename: String) -> TimeRange {
     switch filenameToType(filename) {
         case .Archive: fallthrough
         case .Compressed: fallthrough
+            
+        
         case .Build: duration = 1.0
         case .Memory: duration = 64.0
         case .Perf: duration = 0.1 // 100ms
@@ -625,18 +628,33 @@ func showTimeRangeJS(_ timeRange: TimeRange) -> String? {
         struct PerfettoTimeRange: Codable {
             // Note: can use Decimal for BigInt style
             // Pass the values to Perfetto in seconds.
-            var timeStart: Double // in seconds
-            var timeEnd: Double
+            var timeStart: Int // in nanos
+            var timeEnd: Int
                 
             // The time range should take up 80% of the visible window.
             var viewPercentage: Double
         }
         
+        // /master/packages/devtools_app/lib/src/screens/performance/panes/timeline_events/perfetto/_perfetto_web.dart#L179
+        // Dart DateTime class, then microseconds call returns int, didn't find inMicroseconds
+        // find TimeRange
+        // 'timeStart': timeRange.start!.inMicroseconds / 1000000,
+        // 'timeEnd': timeRange.end!.inMicroseconds / 1000000,
+        
+        
         struct Perfetto: Codable {
             var perfetto: PerfettoTimeRange
         }
         
-        let perfetto = Perfetto(perfetto:PerfettoTimeRange(timeStart: timeRange.timeStart, timeEnd: timeRange.timeEnd, viewPercentage:timeRange.viewPercentage))
+        // TODO: tried Double as seconds, Int/Decimal (BitInt) and none of these work
+//        func toNanos(_ timeSeconds: Double) -> Decimal {
+//            return Decimal(timeSeconds * 1e9)
+//        }
+        
+        let perfetto = Perfetto(perfetto:PerfettoTimeRange(
+            timeStart: Int(timeRange.timeStart),
+            timeEnd: min(1, Int(timeRange.timeEnd)),
+            viewPercentage:timeRange.viewPercentage))
         
         var perfettoEncode = ""
         
@@ -1286,21 +1304,23 @@ struct kram_profileApp: App {
         .monospaced()
     
     func openFileFromURLs(urls: [URL]) {
-        // turning this off for now
-        let mergeFiles = false
         
         if urls.count >= 1 {
             let filesNew = listFilesFromURLs(urls)
             
             // for now wipe the old list
             if filesNew.count > 0 {
-                if mergeFiles {
-                    fileSearcher.files = Array(Set(fileSearcher.files + filesNew))
-                }
-                else {
+                // turning this off for now, File must impl Hashable
+//                let mergeFiles = false
+//                
+//                if mergeFiles {
+//                    fileSearcher.files = Array(Set(fileSearcher.files + filesNew))
+//                }
+//                else 
+                //{
                     // reset the list
                     fileSearcher.files = filesNew
-                }
+                //}
                 
                 fileSearcher.updateFilesSearched()
                 
@@ -1469,96 +1489,9 @@ A tool to help profile mem, perf, and builds.
         case listView
     }
     @FocusState private var focusedField: Field?
-        
-    // Description of CoreData/SwiftData and how it works
-    // https://davedelong.com/blog/2021/04/03/core-data-and-swiftui/
-    // CoreData/SwiftData types
-    //@FetchRequest(sortDescriptors:
-    //    [SortDescriptor(\.name, order: .reverse),
-    //     SortDescriptor(\.duration, order: .reverse),], animation: .default)
-    //
-    //private var videos: FetchedResults<File>
-    //private var videos: NSFetchedResultsController<File>
-
-    // Can also use to sort by multiple value
-//    @State private var sortOrderName = [
-//        KeyPathComparator(\File.name) // might need to sort by id (full url)
-//    ]
-//
-//    @State private var sortOrderDuration = [
-//        KeyPathComparator(\File.duration)
-//    ]
-//    
-//    @State private var sortOrderCurrent = [
-//        KeyPathComparator(\File.name) // might need to sort by id (full url)
-//    ]
-   
-    // List sort picker
-    // https://xavier7t.com/swiftui-list-with-sort-options
-    
-    
-//    var sortedFiles: [Task] {
-//        switch sortOption {
-//        case .name:
-//            return tasks.sorted { $0.name < $1.name }
-//        case .dueDate:
-//            return tasks.sorted { $0.dueDate < $1.dueDate }
-//        }
-//    }
-    
-    // https://developer.apple.com/documentation/swiftui/adding-a-search-interface-to-your-app
-    // can filter list items off this
-    // TODO: Rename var to filterText, ...
-    // Note: this filter needs macOS14
-    //@State private var searchText: String = ""
+
     @StateObject var fileSearcher = FileSearcher()
-    // this is a var that executes code when called?
-//    private var searchResults: [File] {
-//        var results = fileSearcher.filesSearched
-//        
-        /*
-        if fileSearcher.searchText.isEmpty {
-            results = fileSearcher.files
-        }
-//        else if fileSearcher.searchText.count == 1 {
-//            // Some items with k in the rest of the name will be filtered
-//            // but will appear with more characters
-//            let lowercaseSearchText = searchText.lowercased()
-//            let uppercaseSearchText = searchText.uppercased()
-//            
-//            results = oo.files.filter {
-//                $0.name.starts(with: uppercaseSearchText) ||
-//                $0.name.starts(with: lowercaseSearchText)
-//            }
-//        }
-        else {
-//            results = files.filter {
-//                // Here use the name, or else the directory will have say "kram" in it and filter will trigger for all files "kr"
-//                $0.name.localizedCaseInsensitiveContains(searchText)
-//            }
-            results = fileSearcher.filesSearched
-        }
-        
-        // This sort isn't reflected in list
-//        if sortOption == .duration {
-//            results.sort(by: {
-//                if $0.duration == $1.duration {
-//                    return $0.id < $1.id
-//                }
-//                return $0.duration > $1.duration
-//            })
-//        }
-        
-        //results.sort(using: sortOrderCurrent)
-         */
-//        return results
-//    }
-    
-    // https://stackoverflow.com/questions/70652964/how-to-search-a-table-using-swiftui-on-macos
-   
-   
 
-    
     // TODO: do this when building the searchResults
     // can do O(N) then and mark which items need separator
     func isSeparatorVisible(_ file: File, _ searchResults: [File]) -> Bool {
@@ -1663,13 +1596,6 @@ A tool to help profile mem, perf, and builds.
                 
                 #else
                 VStack {
-                    // Poor mans table
-                    // spaces needed, or it's right against left edge
-//                    Picker("   Sort By", selection: $fileSearcher.sortOption) {
-//                        Text("Name").tag(FileSearcher.SortOption.name)
-//                        Text("Range").tag(FileSearcher.SortOption.duration)
-//                    }.pickerStyle(SegmentedPickerStyle())
-                    
                     List(fileSearcher.filesSearched, selection:$selection) { file in
                         HStack() {
                             // If number is first, then that's all SwiftUI
@@ -1688,20 +1614,6 @@ A tool to help profile mem, perf, and builds.
                             //.alignment(.trailing)
                                 .font(durationFont)
                         }
-                        
-// everytime a duration is updated, need to resort the list
-// need icon to indicate if file is stale or deleted too
-//                        .onChange(of: sortOption) { newSort in
-//                            // This sort isn't reflected in list
-//                            if newSort == .duration {
-//                                self.searchResults.sort(by: {
-//                                    if $0.duration == $1.duration {
-//                                        return $0.id < $1.id
-//                                    }
-//                                    return $0.duration > $1.duration
-//                                })
-//                            }
-//                        }
                         .listRowSeparator(isSeparatorVisible(file, fileSearcher.filesSearched) ? .visible : .hidden)
                         .listRowSeparatorTint(.white)
                     }
@@ -1765,7 +1677,7 @@ A tool to help profile mem, perf, and builds.
                 Button("Open...") {
                     openFile()
                 }
-                .keyboardShortcut("O")
+                .keyboardShortcut("O", modifiers:[.command])
                 
                 // Really want to go to .h selected in flamegraph, but that would violate sandbox.
                 // This just goes to the trace json file somewhere in DerviceData which is less useful.
@@ -1775,18 +1687,22 @@ A tool to help profile mem, perf, and builds.
                         openContainingFolder(selection!);
                     }
                 }
-                .keyboardShortcut("G")
+                .keyboardShortcut("G", modifiers:[.command])
+                
+                Button("Sort Name") {
+                    fileSearcher.updateFilesSearched(false)
+                }
+                .keyboardShortcut("T", modifiers:[.shift, .command])
                 
                 Button("Sort Range") {
                     fileSearcher.updateFilesSearched(true)
                 }
-                .keyboardShortcut("T")
-                //.disabled()
+                .keyboardShortcut("T", modifiers:[.command])
                 
                 Button("Reload File") {
                     openFileSelection(myWebView)
                 }
-                .keyboardShortcut("R")
+                .keyboardShortcut("R", modifiers:[.command])
                 .disabled(!isReloadEnabled(selection))
                 
                 // These work even if the list view is collapsed
@@ -1818,7 +1734,7 @@ A tool to help profile mem, perf, and builds.
                 Button("Search") {
                     // Don't need to do anything
                 }
-                .keyboardShortcut("S")
+                .keyboardShortcut("S", modifiers:[.command])
                 .disabled(selection == nil && focusedField == .webView)
                           
                 // Perfetto command

From 23df206e128b6fb0a8aa62d11d9edc6ae9a88f06 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 9 Mar 2024 22:00:06 -0800
Subject: [PATCH 630/901] kram-profile - finally fixed time range, but Perfetto
 wipes it out

After the first time sent, the next file loaded trashes the setting.
Broke out more of the script processing code.
---
 .../kram-profile/kram_profileApp.swift        | 590 +++++++++---------
 1 file changed, 296 insertions(+), 294 deletions(-)

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 66d49f31..694b4f14 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -609,8 +609,9 @@ func filenameToTimeRange(_ filename: String) -> TimeRange {
     var duration = 1.0
     
     switch filenameToType(filename) {
+        // TODO: need underlying type of these archives
         case .Archive: fallthrough
-        case .Compressed: fallthrough
+        case .Compressed: duration = 0.0
             
         
         case .Build: duration = 1.0
@@ -621,88 +622,85 @@ func filenameToTimeRange(_ filename: String) -> TimeRange {
     return TimeRange(timeStart:0.0, timeEnd:duration)
 }
 
+func buildTimeRangeJson(_ timeRange:TimeRange) -> String? {
+    if timeRange.timeEnd == 0.0 {
+        return nil
+    }
+    
+    // This is in nanos
+    let timeStartInt = Int(timeRange.timeStart * 1e9)
+    let timeEndInt = Int(timeRange.timeEnd * 1e9)
+    
+    // Time is not found, it's in ui/src/base/time.ts
+    // Sending down nanos seems to work provided the number has n suffix
+    // TODO: Perfetto seems to only honor this the first time it's sent.
+    
+    // This one doesn't go thorugh JSON.oarse()
+    // timeStart: Time.fromSeconds(\(timeRange.timeStart)),
+    // timeEnd: Time.fromSeconds(\(timeRange.timeEnd)),
+    
+    let script = """
+        var objTime = {
+            perfetto:{
+                timeStart:\(timeStartInt)n,
+                timeEnd:\(timeEndInt)n,
+                viewPercentage:\(timeRange.viewPercentage)
+            }};
+        """
+    
+    return script
+}
+
 // Flutter uses this to jump to a time range
-func showTimeRangeJS(_ timeRange: TimeRange) -> String? {
+func showTimeRangeJS(objTimeScript: String) -> String? {
 
-    do {
-        struct PerfettoTimeRange: Codable {
-            // Note: can use Decimal for BigInt style
-            // Pass the values to Perfetto in seconds.
-            var timeStart: Int // in nanos
-            var timeEnd: Int
-                
-            // The time range should take up 80% of the visible window.
-            var viewPercentage: Double
-        }
-        
-        // /master/packages/devtools_app/lib/src/screens/performance/panes/timeline_events/perfetto/_perfetto_web.dart#L179
-        // Dart DateTime class, then microseconds call returns int, didn't find inMicroseconds
-        // find TimeRange
-        // 'timeStart': timeRange.start!.inMicroseconds / 1000000,
-        // 'timeEnd': timeRange.end!.inMicroseconds / 1000000,
-        
-        
-        struct Perfetto: Codable {
-            var perfetto: PerfettoTimeRange
-        }
-        
-        // TODO: tried Double as seconds, Int/Decimal (BitInt) and none of these work
-//        func toNanos(_ timeSeconds: Double) -> Decimal {
-//            return Decimal(timeSeconds * 1e9)
-//        }
-        
-        let perfetto = Perfetto(perfetto:PerfettoTimeRange(
-            timeStart: Int(timeRange.timeStart),
-            timeEnd: min(1, Int(timeRange.timeEnd)),
-            viewPercentage:timeRange.viewPercentage))
-        
-        var perfettoEncode = ""
-        
-        if true {
-            let encoder = JSONEncoder()
-            let data = try encoder.encode(perfetto)
-            let encodedString = String(decoding: data, as: UTF8.self)
-            perfettoEncode = String(encodedString.dropLast().dropFirst())
-            perfettoEncode = perfettoEncode.replacingOccurrences(of: "\u{2028}", with: "\\u2028")
-                .replacingOccurrences(of: "\u{2029}", with: "\\u2029")
-        }
-        
-        let script = """
-            // convert from string -> Uint8Array -> ArrayBuffer
-            var obj = JSON.parse('{\(perfettoEncode)}');
-        
-            // https://jsfiddle.net/vrsofx1p/
-            function waitForUI(obj)
-            {
-                // have already opened and loaded the inwdow
-                const win = window; // .open('\(ORIGIN)');
-                if (!win) { return; }
+   
+    // https://github.com/flutter/devtools/blob/master/packages/devtools_app/lib/src/screens/performance/panes/timeline_events/perfetto/_perfetto_web.dart#L174
+    
+
+    /*
+     
+     // The |time| type represents trace time in the same units and domain as trace
+     // processor (i.e. typically boot time in nanoseconds, but most of the UI should
+     // be completely agnostic to this).
+     export type time = Brand<bigint, 'time'>;
+     
+     https://github.com/google/perfetto/blob/45fe47bfe4111454ba7063b9b4d438369090d6ba/ui/src/common/actions.ts#L97
+     export interface PostedScrollToRange {
+       timeStart: time;
+       timeEnd: time; // ugh?
+       viewPercentage?: number;
+     }
+
+     // https://github.com/flutter/devtools/blob/8bf64b754a4677b66d22fe6f1212bd72d1e789b8/packages/devtools_app/lib/src/screens/performance/panes/flutter_frames/flutter_frame_model.dart#L29
+    
+     */
+    
+    let script = """
+       
+        // https://jsfiddle.net/vrsofx1p/
+        function waitForUI(objTime)
+        {
+            const timer = setInterval(() => window.postMessage('PING', '\(ORIGIN)'), 50);
             
-                const timer = setInterval(() => win.postMessage('PING', '\(ORIGIN)'), 50);
+            const onMessageHandler = (evt) => {
+                if (evt.data !== 'PONG') return;
                 
-                const onMessageHandler = (evt) => {
-                    if (evt.data !== 'PONG') return;
-                    
-                    // We got a PONG, the UI is ready.
-                    window.clearInterval(timer);
-                    window.removeEventListener('message', onMessageHandler);
-                    
-                    // was win, but use window instead
-                    win.postMessage(obj,'\(ORIGIN)');
-                }
-            
-                window.addEventListener('message', onMessageHandler);
-            }
+                // We got a PONG, the UI is ready.
+                window.clearInterval(timer);
+                window.removeEventListener('message', onMessageHandler);
                 
-            waitForUI(obj);
-        """
+                // was win, but use window instead
+                window.postMessage(objTime, '\(ORIGIN)');
+            }
         
-        return script
-    }
-    catch {
-        log.error(error.localizedDescription)
-        return nil
-    }
+            window.addEventListener('message', onMessageHandler);
+        }
+            
+        waitForUI(objTime);
+    """
+    
+    return objTimeScript + script
 }
 
 struct CatapultEvent: Codable {
@@ -728,222 +726,204 @@ struct CatapultProfile: Codable {
     var beginningOfTime: Int?
 }
 
-func loadFileJS(_ path: String) -> String? {
+class ThreadInfo : Hashable, Equatable, Comparable {
     
-    let fileURL = URL(string: path)!
+    var id: Int = 0
+    var threadName: String = ""
+    var startTime: Int = Int.max
+    var endTime: Int = Int.min
+    var endTimeFree: Int = Int.min
+    var count: Int = 0
     
-    // Note may need to modify directly
-    var file = lookupFile(url: fileURL)
+    // id doesn't implement Hashable
+    func hash(into hasher: inout Hasher) {
+        hasher.combine(id)
+    }
     
-    log.debug(path)
+    public static func == (lhs: ThreadInfo, rhs: ThreadInfo) -> Bool {
+        return lhs.id == rhs.id
+    }
+    public static func < (lhs: ThreadInfo, rhs: ThreadInfo) -> Bool {
+        return lhs.id < rhs.id
+    }
     
-    // https://stackoverflow.com/questions/62035494/how-to-call-postmessage-in-wkwebview-to-js
-    struct PerfettoFile: Codable {
-        var buffer: String // really ArrayBuffer, but will get replaced
-        var title: String
+    func combine(_ s: Int, _ d: Int, _ name: String?) {
+        let isFreeBlock = name != nil && name! == "Free"
+        let e = s+d
         
-        // About keepApiOpen
-        // https://github.com/flutter/devtools/blob/master/packages/devtools_app/lib/src/screens/performance/panes/timeline_events/perfetto/_perfetto_web.dart#L174
-        var keepApiOpen: Bool
+        if isFreeBlock {
+            endTimeFree = max(endTimeFree, e)
+            
+            // If all free block, this doesn't work
+            // so update start/endTime assuming first block isn't Free
+            if startTime > endTime {
+                startTime = min(startTime, s)
+                endTime = max(endTime, e)
+            }
+        }
+        else {
+            startTime = min(startTime, s)
+            endTime = max(endTime, e)
+        }
         
-        // optional fields
-        //var fileName: String?
-        // url cannot be file://, has to be http served.  Can we set fileName?
-        //var url: String?
-    }
-    
-    struct Perfetto: Codable {
-        var perfetto: PerfettoFile
+        count += 1
     }
     
-    class ThreadInfo : Hashable, Equatable, Comparable {
+    var description: String {
+        let duration = Double(endTime - startTime) * 1e-6
         
-        var id: Int = 0
-        var threadName: String = ""
-        var startTime: Int = Int.max
-        var endTime: Int = Int.min
-        var endTimeFree: Int = Int.min
-        var count: Int = 0
-        
-        // id doesn't implement Hashable
-        func hash(into hasher: inout Hasher) {
-            hasher.combine(id)
+        // TODO: could display freeDuration (heap size)
+        var freeDuration = duration
+        if endTimeFree != Int.min {
+            freeDuration = Double(endTimeFree - startTime) * 1e-6
         }
+        let percentage = freeDuration > 0.0 ? ((duration / freeDuration) * 100.0) : 0.0
         
-        public static func == (lhs: ThreadInfo, rhs: ThreadInfo) -> Bool {
-            return lhs.id == rhs.id
+        // only disply percentage if needed
+        if percentage > 99.9 {
+            return "\(id) '\(threadName)' \(float: duration, decimals:6)s \(count)x"
         }
-        public static func < (lhs: ThreadInfo, rhs: ThreadInfo) -> Bool {
-            return lhs.id < rhs.id
+        else {
+            return "\(id) '\(threadName)' \(float: duration, decimals:6)s \(float:percentage, decimals:0)% \(count)x"
         }
+    }
+    
+}
+
+func sortByName(_ catapultProfile: inout CatapultProfile) {
+    
+    var threads: [Int: [Int]] = [:]
+    
+    // first sort each thread by
+    for i in 0..<catapultProfile.traceEvents!.count {
+        let event = catapultProfile.traceEvents![i]
         
-        func combine(_ s: Int, _ d: Int, _ name: String?) {
-            let isFreeBlock = name != nil && name! == "Free"
-            let e = s+d
-            
-            if isFreeBlock {
-                endTimeFree = max(endTimeFree, e)
-                
-                // If all free block, this doesn't work
-                // so update start/endTime assuming first block isn't Free
-                if startTime > endTime {
-                    startTime = min(startTime, s)
-                    endTime = max(endTime, e)
-                }
-            }
-            else {
-                startTime = min(startTime, s)
-                endTime = max(endTime, e)
-            }
-            
-            count += 1
-        }
+        guard let tid = event.tid else { continue }
+        if event.ts == nil || event.dur == nil { continue }
         
-        var description: String {
-            let duration = Double(endTime - startTime) * 1e-6
-            
-            // TODO: could display freeDuration (heap size)
-            var freeDuration = duration
-            if endTimeFree != Int.min {
-                freeDuration = Double(endTimeFree - startTime) * 1e-6
-            }
-            let percentage = freeDuration > 0.0 ? ((duration / freeDuration) * 100.0) : 0.0
-            
-            // only disply percentage if needed
-            if percentage > 99.9 {
-                return "\(id) '\(threadName)' \(float: duration, decimals:6)s \(count)x"
-            }
-            else {
-                return "\(id) '\(threadName)' \(float: duration, decimals:6)s \(float:percentage, decimals:0)% \(count)x"
-            }
+        if event.name != nil && (event.name! == "thread_name" || event.name! == "process_name") {
+            continue
         }
         
+        if threads[tid] == nil {
+            threads[tid] = []
+        }
+        // just store the even index
+        threads[tid]!.append(i)
     }
     
-    func sortByName(_ catapultProfile: inout CatapultProfile) {
+    for var thread in threads.values {
+        // TODO: want to buble the top allocators by count or mem
+        // to the front of the list.  Once have names sorted
+        // then can group totals
         
-        var threads: [Int: [Int]] = [:]
-        
-        // first sort each thread by
-        for i in 0..<catapultProfile.traceEvents!.count {
-            let event = catapultProfile.traceEvents![i]
-            
-            guard let tid = event.tid else { continue }
-            if event.ts == nil || event.dur == nil { continue }
+        // sort each thread by name then dur
+        thread = thread.sorted {
+            let lval = catapultProfile.traceEvents![$0]
+            let rval = catapultProfile.traceEvents![$1]
             
-            if event.name != nil && (event.name! == "thread_name" || event.name! == "process_name") {
-                continue
-            }
+            let lname = lval.name ?? ""
+            let rname = lval.name ?? ""
             
-            if threads[tid] == nil {
-                threads[tid] = []
+            if lname < rname {
+                return true
             }
-            // just store the even index
-            threads[tid]!.append(i)
+            return lval.dur! < rval.dur!
         }
         
-        for var thread in threads.values {
-            // TODO: want to buble the top allocators by count or mem
-            // to the front of the list.  Once have names sorted
-            // then can group totals
-            
-            // sort each thread by name then dur
-            thread = thread.sorted {
-                let lval = catapultProfile.traceEvents![$0]
-                let rval = catapultProfile.traceEvents![$1]
-                
-                let lname = lval.name ?? ""
-                let rname = lval.name ?? ""
-                
-                if lname < rname {
-                    return true
-                }
-                return lval.dur! < rval.dur!
-            }
-                          
-            // rewrite the start of the events
-            // Note this 0's them out, but could preserve min startTime
-            var startTime = 0
-            for i in thread {
-                catapultProfile.traceEvents![i].ts = startTime
-                startTime += catapultProfile.traceEvents![i].dur!
-            }
-            
-            // combine nodes, and store a count into the name
-            // easier to mke a new array, and replace the other
-            //var combineIndex = 0
-           // for i in 1..<catapultProfile.traceEvents![i]
-            
+        // rewrite the start of the events
+        // Note this 0's them out, but could preserve min startTime
+        var startTime = 0
+        for i in thread {
+            catapultProfile.traceEvents![i].ts = startTime
+            startTime += catapultProfile.traceEvents![i].dur!
         }
         
-        // have option to consolidate and rename, but must remove nodes
+        // combine nodes, and store a count into the name
+        // easier to mke a new array, and replace the other
+        //var combineIndex = 0
+        // for i in 1..<catapultProfile.traceEvents![i]
+        
     }
+    
+    // have option to consolidate and rename, but must remove nodes
+}
 
-    // parse json trace
-    func updateThreadInfo(_ catapultProfile: CatapultProfile, _ file: inout File) {
-        // was using Set<>, but having trouble with lookup
-        var threadInfos: [Int: ThreadInfo] = [:]
+// parse json trace
+func updateThreadInfo(_ catapultProfile: CatapultProfile, _ file: inout File) {
+    // was using Set<>, but having trouble with lookup
+    var threadInfos: [Int: ThreadInfo] = [:]
+    
+    for i in 0..<catapultProfile.traceEvents!.count {
+        let event = catapultProfile.traceEvents![i]
         
-        for i in 0..<catapultProfile.traceEvents!.count {
-            let event = catapultProfile.traceEvents![i]
-            
-            // have to have tid to associate with ThreadInfo
-            guard let tid = event.tid else { continue }
-            
-            if threadInfos[tid] == nil {
-                let info = ThreadInfo()
-                info.id = tid
-                
-                threadInfos[tid] = info
-            }
+        // have to have tid to associate with ThreadInfo
+        guard let tid = event.tid else { continue }
+        
+        if threadInfos[tid] == nil {
+            let info = ThreadInfo()
+            info.id = tid
             
-            if event.name != nil && event.name! == "thread_name" {
-                let threadName = event.args!["name"]!.value as! String
-                threadInfos[tid]!.threadName = threadName
-            }
-            else if event.ts != nil && event.dur != nil {
-                let s = event.ts!
-                let d = event.dur!
-                    
-                threadInfos[tid]!.combine(s, d, event.name)
-            }
+            threadInfos[tid] = info
         }
         
-        // DONE: could store this in the File object, just append with \n
-        var text = ""
-        for threadInfo in threadInfos.values.sorted() {
-            // log.info(threadInfo.description)
-            text += threadInfo.description
-            text += "\n"
+        if event.name != nil && event.name! == "thread_name" {
+            let threadName = event.args!["name"]!.value as! String
+            threadInfos[tid]!.threadName = threadName
+        }
+        else if event.ts != nil && event.dur != nil {
+            let s = event.ts!
+            let d = event.dur!
+            
+            threadInfos[tid]!.combine(s, d, event.name)
         }
-        
-        file.threadInfo = text
-        updateFileCache(file: file)
     }
     
-    func updateDuration(_ catapultProfile: CatapultProfile, _ file: inout File) {
-        var startTime = Int.max
-        var endTime = Int.min
+    // DONE: could store this in the File object, just append with \n
+    var text = ""
+    for threadInfo in threadInfos.values.sorted() {
+        // log.info(threadInfo.description)
+        text += threadInfo.description
+        text += "\n"
+    }
+    
+    file.threadInfo = text
+    updateFileCache(file: file)
+}
+
+func updateDuration(_ catapultProfile: CatapultProfile, _ file: inout File) {
+    var startTime = Int.max
+    var endTime = Int.min
+    
+    for i in 0..<catapultProfile.traceEvents!.count {
+        let event = catapultProfile.traceEvents![i]
         
-        for i in 0..<catapultProfile.traceEvents!.count {
-            let event = catapultProfile.traceEvents![i]
+        if event.ts != nil && event.dur != nil {
+            let s = event.ts!
+            let d = event.dur!
             
-            if event.ts != nil && event.dur != nil {
-                let s = event.ts!
-                let d = event.dur!
-                
-                startTime = min(startTime, s)
-                endTime = max(endTime, s+d)
-            }
+            startTime = min(startTime, s)
+            endTime = max(endTime, s+d)
         }
+    }
+    
+    if startTime <= endTime {
+        // for now assume micros
+        file.duration = Double(endTime - startTime) * 1e-6
         
-        if startTime <= endTime {
-            // for now assume micros
-            file.duration = Double(endTime - startTime) * 1e-6
-            
-            updateFileCache(file: file)
-        }
+        updateFileCache(file: file)
     }
+}
+
+
+func loadFileJS(_ path: String) -> String? {
+    
+    let fileURL = URL(string: path)!
+    
+    // Note may need to modify directly
+    var file = lookupFile(url: fileURL)
+    
+    log.debug(path)
     
     do {
         // use this for binary data, but need to fixup some json before it's sent
@@ -964,7 +944,7 @@ func loadFileJS(_ path: String) -> String? {
             isBuildFile = true
         }
         
-        var fileContent = try Data(contentsOf: fileURL)
+        let fileContent = try Data(contentsOf: fileURL)
         
         // decompress archive from zip, since Perfetto can't yet decompress zip
         if type == .Archive {
@@ -972,7 +952,7 @@ func loadFileJS(_ path: String) -> String? {
             // so need new call.  Have this in kram as C++ helper.
             // This is unzlib() to avoid confusion.
             //if guard let unzippedContent = fileContent.unzlib() else {
-                return nil
+            return nil
             //}
             //fileContent = unzippedContent
         }
@@ -1099,8 +1079,39 @@ func loadFileJS(_ path: String) -> String? {
             }
         }
         
+        return generateLoadFileJS(fileContentBase64: fileContentBase64, title:fileURL.lastPathComponent)
+    }
+    catch {
+        log.error(error.localizedDescription)
+        return nil
+    }
+}
+
+func generateLoadFileJS(fileContentBase64: String, title: String) -> String?
+{
+    do {
+        // https://stackoverflow.com/questions/62035494/how-to-call-postmessage-in-wkwebview-to-js
+        struct PerfettoFile: Codable {
+            var buffer: String // really ArrayBuffer, but will get replaced
+            var title: String
+            
+            // About keepApiOpen
+            // https://github.com/flutter/devtools/blob/master/packages/devtools_app/lib/src/screens/performance/panes/timeline_events/perfetto/_perfetto_web.dart#L174
+            var keepApiOpen: Bool
+            
+            // optional fields
+            //var fileName: String?
+            // url cannot be file://, has to be http served.  Can we set fileName?
+            //var url: String?
+        }
+        
+        struct Perfetto: Codable {
+            var perfetto: PerfettoFile
+        }
+        
+        // really the url is the only part that needs encoded
         let perfetto = Perfetto(perfetto: PerfettoFile(buffer: "",
-                                                       title: fileURL.lastPathComponent,
+                                                       title: title,
                                                        keepApiOpen: true))
         var perfettoEncode = ""
         
@@ -1142,11 +1153,7 @@ func loadFileJS(_ path: String) -> String? {
         // https://jsfiddle.net/vrsofx1p/
         function waitForUI(obj)
         {
-            // have already opened and loaded the inwdow
-            const win = window; // .open('\(ORIGIN)');
-            if (!win) { return; }
-        
-            const timer = setInterval(() => win.postMessage('PING', '\(ORIGIN)'), 50);
+            const timer = setInterval(() => window.postMessage('PING', '\(ORIGIN)'), 50);
             
             const onMessageHandler = (evt) => {
                 if (evt.data !== 'PONG') return;
@@ -1155,38 +1162,35 @@ func loadFileJS(_ path: String) -> String? {
                 window.clearInterval(timer);
                 window.removeEventListener('message', onMessageHandler);
                 
-                // was win, but use window instead
-                win.postMessage(obj,'\(ORIGIN)');
-        
-                    // ugh, document doesnt 'work either
-                    if (false) {
-                        // tried adding to various parts above.  Need to install
-                        // after the page is open, but this doesn't override the default
-                        // turn off drop handling, or it won't fixup json or appear in list
-                        // This doesn't work
-                        window.addEventListener('drop', function(e) {
-                          e.preventDefault();
-                          e.stopPropagation();
-                        });
-                        window.addEventListener('dragover', function(e) {
-                          e.preventDefault();
-                          e.stopPropagation();
-                        });
-                    }
+                window.postMessage(obj, '\(ORIGIN)');
             }
         
             window.addEventListener('message', onMessageHandler);
-        
-            
         }
         
         waitForUI(obj);
         """
         
-       
+// This was trying to block the native drop handler
+//                    // ugh, document does notwork either
+//                    if (false) {
+//                        // tried adding to various parts above.  Need to install
+//                        // after the page is open, but this doesn't override the default
+//                        // turn off drop handling, or it won't fixup json or appear in list
+//                        // This doesn't work
+//                        window.addEventListener('drop', function(e) {
+//                          e.preventDefault();
+//                          e.stopPropagation();
+//                        });
+//                        window.addEventListener('dragover', function(e) {
+//                          e.preventDefault();
+//                          e.stopPropagation();
+//                        });
+//                    }
         
         return script
-    } catch {
+    }
+    catch {
         log.error(error.localizedDescription)
         return nil
     }
@@ -1385,26 +1389,24 @@ struct kram_profileApp: App {
             // This should only reload if selection previously loaded
             // to a valid file, or if modstamp changed on current selection
             
+            // TODO: fix this
+            let objTimeScript: String? = nil // buildTimeRangeJson(filenameToTimeRange(sel))
+            
             var str = loadFileJS(sel)
             if str != nil {
                 runJavascript(webView, str!)
                 
-                var file = lookupFile(selection: sel)
+                let file = lookupFile(selection: sel)
                 file.setLoadStamp()
                 updateFileCache(file: file)
             }
             
-            // now based on the type, set a reasonable range of time
-            // don't really want start/end, since we don't know start
-            // works for Flutter, but not for this app.  Also would
-            // have to parse timeStart/End from file.  May want that for
-            // sorting anyways.
-            //
-            // Note have duration on files now, so could pull that
-            // or adjust the timing range across all known durations
-            
-            if false {
-                str = showTimeRangeJS(filenameToTimeRange(sel))
+            // Want to be able to lock the scale of the
+            // trace, so that when moving across traces the range is consistent.
+            // Otherwise, small traces get expanded to look huge.
+            // This only works the first time a file loads.
+            if objTimeScript != nil {
+                str = showTimeRangeJS(objTimeScript: objTimeScript!)
                 if str != nil {
                     runJavascript(webView, str!)
                 }

From 655d348fe8593d33b07f9ef16ee37616fdefd1c2 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 9 Mar 2024 23:14:46 -0800
Subject: [PATCH 631/901] kram-profile - keep the groupings by parentFolders

---
 .../kram-profile/kram_profileApp.swift        | 99 ++++++++++---------
 1 file changed, 54 insertions(+), 45 deletions(-)

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 694b4f14..a87b0b05 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -147,11 +147,20 @@ class FileSearcher: ObservableObject {
         // may not want to sort everytime, or the list will change as duration is updated
         // really want to do this off a button, and then set files to that
         let sortedResults = files.sorted {
-            if !sortByDuration || $0.duration == $1.duration {
+            if !sortByDuration {
                 return $0.id < $1.id
             }
-            // TODO: may want to also search by last
-            return $0.duration > $1.duration
+            else {
+                // keep the groupings, just sort the duration within
+                if $0.parentFolders != $1.parentFolders {
+                    return $0.parentFolders < $1.parentFolders
+                }
+                if $0.duration == $1.duration {
+                    return $0.id < $1.id
+                }
+                // TODO: may want to also search by last
+                return $0.duration > $1.duration
+            }
         }
         
         if searchText.isEmpty || sortedResults.count <= 1  {
@@ -232,33 +241,6 @@ public func clamp<T>(_ value: T, _ minValue: T, _ maxValue: T) -> T where T : Co
     return min(max(value, minValue), maxValue)
 }
 
-func fileModificationDate(url: URL) -> Date? {
-    do {
-        let attr = try FileManager.default.attributesOfItem(atPath: url.path)
-        return attr[FileAttributeKey.modificationDate] as? Date
-    } catch {
-        return nil
-    }
-}
-
-func buildShortDirectory(url: URL) -> String {
-    let count = url.pathComponents.count
-    
-    // dir0/dir1/file.ext
-    // -3/-2/-1
-    
-    var str = ""
-    if count >= 3 {
-        str += url.pathComponents[count-3]
-        str += "/"
-    }
-    if count >= 2 {
-        str += url.pathComponents[count-2]
-    }
-    
-    return str
-}
-
 // DONE: may want to make a class.
 class File: Identifiable, /*Hashable, */ Equatable, Comparable
 {
@@ -266,6 +248,7 @@ class File: Identifiable, /*Hashable, */ Equatable, Comparable
     var name: String { url.lastPathComponent }
     let url: URL
     let shortDirectory: String
+    let parentFolders: String
     
     var duration = 0.0
     var modStamp: Date?
@@ -276,8 +259,9 @@ class File: Identifiable, /*Hashable, */ Equatable, Comparable
     
     init(url: URL) {
         self.url = url
-        self.modStamp = fileModificationDate(url:url)
-        self.shortDirectory = buildShortDirectory(url:url)
+        self.modStamp = File.fileModificationDate(url:url)
+        self.shortDirectory = File.buildShortDirectory(url:url)
+        self.parentFolders = url.deletingLastPathComponent().absoluteString
     }
     
     public static func == (lhs: File, rhs: File) -> Bool {
@@ -294,8 +278,36 @@ class File: Identifiable, /*Hashable, */ Equatable, Comparable
     public func isReloadNeeded() -> Bool {
         return modStamp != loadStamp
     }
+    
+    private static func fileModificationDate(url: URL) -> Date? {
+        do {
+            let attr = try FileManager.default.attributesOfItem(atPath: url.path)
+            return attr[FileAttributeKey.modificationDate] as? Date
+        } catch {
+            return nil
+        }
+    }
+
+    private static func buildShortDirectory(url: URL) -> String {
+        let count = url.pathComponents.count
+        
+        // dir0/dir1/file.ext
+        // -3/-2/-1
+        
+        var str = ""
+        if count >= 3 {
+            str += url.pathComponents[count-3]
+            str += "/"
+        }
+        if count >= 2 {
+            str += url.pathComponents[count-2]
+        }
+        
+        return str
+    }
 }
 
+// TODO: now that it's a class, can probably elimiante that lookuFile calls
 func generateName(file: File) -> String {
     // need to do lookup to get duration
     let f = lookupFile(url: file.url)
@@ -635,10 +647,11 @@ func buildTimeRangeJson(_ timeRange:TimeRange) -> String? {
     // Sending down nanos seems to work provided the number has n suffix
     // TODO: Perfetto seems to only honor this the first time it's sent.
     
-    // This one doesn't go thorugh JSON.oarse()
+    // This one doesn't go thorugh JSON.parse()
     // timeStart: Time.fromSeconds(\(timeRange.timeStart)),
     // timeEnd: Time.fromSeconds(\(timeRange.timeEnd)),
     
+    // The postMessage if using Json.stringify can't handle the BigInt
     let script = """
         var objTime = {
             perfetto:{
@@ -779,7 +792,7 @@ class ThreadInfo : Hashable, Equatable, Comparable {
         }
         let percentage = freeDuration > 0.0 ? ((duration / freeDuration) * 100.0) : 0.0
         
-        // only disply percentage if needed
+        // only display percentage if needed
         if percentage > 99.9 {
             return "\(id) '\(threadName)' \(float: duration, decimals:6)s \(count)x"
         }
@@ -790,7 +803,8 @@ class ThreadInfo : Hashable, Equatable, Comparable {
     
 }
 
-func sortByName(_ catapultProfile: inout CatapultProfile) {
+// TODO: Hook this up, build more efficient array of thread events
+func sortThreadsByName(_ catapultProfile: inout CatapultProfile) {
     
     var threads: [Int: [Int]] = [:]
     
@@ -1301,12 +1315,8 @@ struct kram_profileApp: App {
     }
     
     // What is used when Inter isn't installed.  Can this be bundled?
-    let customFont = Font.custom("Inter Variable", size: 14)
+    //let customFont = Font.custom("Inter Variable", size: 14)
                 
-    let durationFont =
-        Font.custom("Inter Variable", size: 14)
-        .monospaced()
-    
     func openFileFromURLs(urls: [URL]) {
         
         if urls.count >= 1 {
@@ -1353,10 +1363,10 @@ struct kram_profileApp: App {
         }
     }
     
-    func shortFilename(_ str: String) -> String {
-        let url = URL(string: str)!
-        return url.lastPathComponent
-    }
+//    func shortFilename(_ str: String) -> String {
+//        let url = URL(string: str)!
+//        return url.lastPathComponent
+//    }
     
     func openContainingFolder(_ str: String) {
         let url = URL(string: str)!
@@ -1614,7 +1624,6 @@ A tool to help profile mem, perf, and builds.
                             Text(generateDuration(file: file))
                                 .frame(maxWidth: 70)
                             //.alignment(.trailing)
-                                .font(durationFont)
                         }
                         .listRowSeparator(isSeparatorVisible(file, fileSearcher.filesSearched) ? .visible : .hidden)
                         .listRowSeparatorTint(.white)

From d67c7ddd23e5ada591ce113ea38c98097c2d5914 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 10 Mar 2024 11:06:38 -0700
Subject: [PATCH 632/901] kram-profile - update extensions (vmatrace ->
 memtrace, buildtrace, perftrace)

Cleanup of file and container type.  Really Archive needs flattened into list of files, but Archive then references an mmaped zip archive.
---
 kram-profile/kram-profile/Info.plist          |  76 ++++++-
 .../kram-profile/kram_profileApp.swift        | 200 +++++++-----------
 2 files changed, 149 insertions(+), 127 deletions(-)

diff --git a/kram-profile/kram-profile/Info.plist b/kram-profile/kram-profile/Info.plist
index 240865af..040aae3d 100644
--- a/kram-profile/kram-profile/Info.plist
+++ b/kram-profile/kram-profile/Info.plist
@@ -6,14 +6,14 @@
 	<array>
 		<dict>
 			<key>CFBundleTypeName</key>
-			<string>vmatrace</string>
+			<string>memtrace</string>
 			<key>CFBundleTypeRole</key>
 			<string>Viewer</string>
 			<key>LSHandlerRank</key>
 			<string>Default</string>
 			<key>LSItemContentTypes</key>
 			<array>
-				<string>vmatrace</string>
+				<string>memtrace</string>
 			</array>
 			<key>NSDocumentClass</key>
 			<string>KramDocument</string>
@@ -60,8 +60,78 @@
 			<key>NSDocumentClass</key>
 			<string>KramDocument</string>
 		</dict>
+		<dict>
+			<key>CFBundleTypeName</key>
+			<string>perftrace</string>
+			<key>CFBundleTypeRole</key>
+			<string>Viewer</string>
+			<key>LSHandlerRank</key>
+			<string>Default</string>
+			<key>LSItemContentTypes</key>
+			<array>
+				<string>perftrace</string>
+			</array>
+			<key>NSDocumentClass</key>
+			<string>KramDocument</string>
+		</dict>
+		<dict>
+			<key>CFBundleTypeName</key>
+			<string>buildtrace</string>
+			<key>CFBundleTypeRole</key>
+			<string>Viewer</string>
+			<key>LSHandlerRank</key>
+			<string>Default</string>
+			<key>LSItemContentTypes</key>
+			<array>
+				<string>buildtrace</string>
+			</array>
+			<key>NSDocumentClass</key>
+			<string>KramDoument</string>
+		</dict>
 	</array>
 	<key>UTImportedTypeDeclarations</key>
-	<array/>
+	<array>
+		<dict>
+			<key>UTTypeDescription</key>
+			<string></string>
+			<key>UTTypeIcons</key>
+			<dict/>
+			<key>UTTypeIdentifier</key>
+			<string></string>
+			<key>UTTypeTagSpecification</key>
+			<dict>
+				<key>public.filename-extension</key>
+				<array/>
+			</dict>
+		</dict>
+		<dict>
+			<key>UTTypeDescription</key>
+			<string></string>
+			<key>UTTypeIcons</key>
+			<dict/>
+			<key>UTTypeIdentifier</key>
+			<string></string>
+			<key>UTTypeTagSpecification</key>
+			<dict>
+				<key>public.filename-extension</key>
+				<array/>
+			</dict>
+		</dict>
+		<dict>
+			<key>UTTypeDescription</key>
+			<string></string>
+			<key>UTTypeIcons</key>
+			<dict/>
+			<key>UTTypeIdentifier</key>
+			<string></string>
+			<key>UTTypeTagSpecification</key>
+			<dict>
+				<key>public.filename-extension</key>
+				<array>
+					<string>\</string>
+				</array>
+			</dict>
+		</dict>
+	</array>
 </dict>
 </plist>
diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index a87b0b05..c55dca14 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -241,7 +241,21 @@ public func clamp<T>(_ value: T, _ minValue: T, _ maxValue: T) -> T where T : Co
     return min(max(value, minValue), maxValue)
 }
 
-// DONE: may want to make a class.
+//-------------
+
+enum ContainerType {
+    case Archive // zip of 1+ files, can't enforce
+    case Compressed // gzip of 1 file, can't enforce
+    case File
+}
+
+enum FileType {
+    case Build
+    case Memory
+    case Perf
+    case Unknown
+}
+
 class File: Identifiable, /*Hashable, */ Equatable, Comparable
 {
     var id: String { url.absoluteString }
@@ -249,6 +263,8 @@ class File: Identifiable, /*Hashable, */ Equatable, Comparable
     let url: URL
     let shortDirectory: String
     let parentFolders: String
+    let containerType: ContainerType
+    let fileType: FileType
     
     var duration = 0.0
     var modStamp: Date?
@@ -262,6 +278,8 @@ class File: Identifiable, /*Hashable, */ Equatable, Comparable
         self.modStamp = File.fileModificationDate(url:url)
         self.shortDirectory = File.buildShortDirectory(url:url)
         self.parentFolders = url.deletingLastPathComponent().absoluteString
+        self.containerType = File.filenameToContainerType(url)
+        self.fileType = File.filenameToFileType(url)
     }
     
     public static func == (lhs: File, rhs: File) -> Bool {
@@ -305,16 +323,44 @@ class File: Identifiable, /*Hashable, */ Equatable, Comparable
         
         return str
     }
-}
+    
+    private static func filenameToContainerType(_ url: URL) -> ContainerType {
+        let ext = url.pathExtension
+        
+        if ext == "zip" {
+            return .Archive
+        }
+        if ext == "gz" { // could be a tarball archive, but don't support that
+            return .Compressed
+        }
+        return .File
+    }
 
-// TODO: now that it's a class, can probably elimiante that lookuFile calls
-func generateName(file: File) -> String {
-    // need to do lookup to get duration
-    let f = lookupFile(url: file.url)
-    return f.name
+    public static func filenameToFileType(_ url: URL) -> FileType {
+        let ext = url.pathExtension
+        
+        if File.filenameToContainerType(url) != .File {
+            // strip the .gz/.zip
+            return filenameToFileType(url.deletingPathExtension())
+        }
+        
+        if ext == "json" || ext == "buildtrace" { // build
+            return .Build
+        }
+        else if ext == "memtrace" { // memory
+            return .Memory
+        }
+        // TODO: eliminate trace
+        else if ext == "trace" || ext == "perftrace" { // profile
+            return .Perf
+        }
+        return .Unknown
+    }
 }
 
+// TODO: now that it's a class, can probably elimiante that lookuFile calls
 func generateDuration(file: File) -> String {
+    // need for duration
     let f = lookupFile(url: file.url)
     if f.duration != 0.0 {
         // TODO: may want to add s/mb based on file type
@@ -331,9 +377,10 @@ func generateNavigationTitle(_ sel: String?) -> String {
     }
     
     let f = lookupFile(selection: sel!)
-    return generateDuration(file: f) + " " + generateName(file: f)
+    return generateDuration(file: f) + " " + f.name
 }
 
+//-------------
 // Note: if a file is deleted which happens often with builds,
 // then want to identify that and update the list.  At least
 // indicate the item is gone, and await its return.
@@ -368,6 +415,8 @@ func updateFileCache(file: File) {
     fileCache[file.url] = file
 }
 
+//-------------
+
 class MyWebView : WKWebView {
     
     // So that keyboard events are routed
@@ -437,7 +486,8 @@ func newWebView(request: URLRequest) -> WKWebView {
     let webView = MyWebView(frame: .zero, configuration: configuration)
     //webView.shimDrag()
     
-    // The page is complaining it's going to lose the data if fwd/back hit
+    // The page is complaining it's going to lose the data.  This disables swipe fwd/back.
+    // Still occuring because this doesn't disable the "delete" key which goes back in history
     webView.allowsBackForwardNavigationGestures = false
    
     webView.load(request)
@@ -490,6 +540,8 @@ struct WebView : NSViewRepresentable {
 //    MyWKWebView()
 //}
 
+//-------------
+
 /*
 class MyMTKView: MTKView {
     
@@ -555,14 +607,7 @@ extension View {
             return self
         }
     }
-    public func disableAlternatingRowBackgrondsOptional() -> some View {
-        if #available(macOS 14.0, *) {
-            return self.alternatingRowBackgrounds(.disabled) // skip the row coloring
-        }
-        else {
-            return self
-        }
-    }
+  
     /*
     // This one is hard to wrap, since KeyPress.result is macOS 14.0 only
     public func onKeyPressOptional(_ key: KeyEquivalent, action: @escaping () -> KeyPress.Result) -> some View {
@@ -585,50 +630,16 @@ struct TimeRange {
     var viewPercentage: Double = 0.8
 }
 
-enum FileType {
-    case Archive // zip of 1+ files, can't enforce
-    case Compressed // gzip of 1 file, can't enforce
-    
-    case Build
-    case Memory
-    case Perf
-}
 
-func filenameToType(_ filename: String) -> FileType {
-    let url = URL(string: filename)!
-    let ext = url.pathExtension
-    
-    if ext == "zip" {
-        return .Archive
-    }
-    if ext == "gz" {
-        return .Compressed
-    }
-    
-    if ext == "json" { // build
-        return .Build
-    }
-    else if ext == "vmatrace" { // memory
-        return .Memory
-    }
-    else if ext == "trace" { // profile
-        return .Perf
-    }
-    return .Build
-}
 
 func filenameToTimeRange(_ filename: String) -> TimeRange {
     var duration = 1.0
     
-    switch filenameToType(filename) {
-        // TODO: need underlying type of these archives
-        case .Archive: fallthrough
-        case .Compressed: duration = 0.0
-            
-        
+    switch File.filenameToFileType(URL(string: filename)!) {
         case .Build: duration = 1.0
         case .Memory: duration = 64.0
         case .Perf: duration = 0.1 // 100ms
+        case .Unknown: duration = 1.0
     }
     
     return TimeRange(timeStart:0.0, timeEnd:duration)
@@ -943,31 +954,22 @@ func loadFileJS(_ path: String) -> String? {
         // use this for binary data, but need to fixup some json before it's sent
         var fileContentBase64 = ""
         
-        let type = filenameToType(fileURL.absoluteString)
-        
-        let isFileGzip = type == .Compressed
+        let isFileGzip = file.containerType == .Compressed
         //let isFileZip = type == .Archive
         
         // Note: json.gz and json.zip build files are not marked as Build
         // but need to rewrite them.
-        var isBuildFile = type == FileType.Build
+        var isBuildFile = file.fileType == .Build
         
-        let filename = fileURL.lastPathComponent
-        
-        if filename.hasSuffix("json.gz") || filename.hasSuffix("json.zip") {
-            isBuildFile = true
-        }
+        //let filename = fileURL.lastPathComponent
         
         let fileContent = try Data(contentsOf: fileURL)
         
         // decompress archive from zip, since Perfetto can't yet decompress zip
-        if type == .Archive {
-            // unzlib is for a zlib file and not a zip archive,
-            // so need new call.  Have this in kram as C++ helper.
-            // This is unzlib() to avoid confusion.
-            //if guard let unzippedContent = fileContent.unzlib() else {
+        // Note this will typically be fileType unknown, but have to flatten
+        // content within to the list.  This just means part of a zip archive.
+        if file.containerType == .Archive {
             return nil
-            //}
             //fileContent = unzippedContent
         }
         
@@ -1007,7 +1009,7 @@ func loadFileJS(_ path: String) -> String? {
                     updateDuration(catapultProfile, &file)
                     
                     // For now, just log the per-thread info
-                    if type == FileType.Memory {
+                    if file.fileType == .Memory {
                         updateThreadInfo(catapultProfile, &file)
                     }
                 }
@@ -1024,13 +1026,13 @@ func loadFileJS(_ path: String) -> String? {
             
             var json : Data
             
-            if type == .Compressed {
+            if file.containerType == .Compressed {
                 guard let unzippedContent = fileContent.gunzip() else {
                     return nil
                 }
                 json = unzippedContent
             }
-            else if type == .Archive {
+            else if file.containerType == .Archive {
                 // this has already been decoded to json
                 json = fileContent
             }
@@ -1074,7 +1076,7 @@ func loadFileJS(_ path: String) -> String? {
                     updateDuration(catapultProfile, &file)
                     
                     // For now, just log the per-thread info
-                    if type == FileType.Memory {
+                    if file.fileType == .Memory {
                         updateThreadInfo(catapultProfile, &file)
                     }
                 }
@@ -1535,8 +1537,7 @@ A tool to help profile mem, perf, and builds.
         if selection == nil {
             return false
         }
-        // TODO: store type with name, then can have icon too
-        return filenameToType(selection!) == .Memory
+        return lookupFile(selection: selection!).fileType == .Memory
     }
     
     // iOS can go compact reduces to 1 column
@@ -1559,54 +1560,6 @@ A tool to help profile mem, perf, and builds.
     
         Window("Main", id: "main") {
             NavigationSplitView() {
-                #if false
-                // https://www.swiftyplace.com/blog/chy7hvne
-                Table(fileSearcher.filesSearched, selection:$selection, sortOrder:$sortOrderCurrent) {
-                    
-                    // This displays the sort caret
-                    TableColumn("name", value:\.name)
-                    
-                    // This doesn't not display the caret
-//                    TableColumn("name" /*, comparator: $sortOrderName */) { searchResult in
-//                        Text(generateName(file: searchResult))
-//                        .help(searchResult.shortDirectory)
-//                        .truncationMode(.tail)
-//                    }
-                    //.width(min: 20, ideal: 180, max: 180)
-                    .width(170)
-                    
-                    // This has to format Double
-                    TableColumn("range" /*, comparator: $sortOrderDuration*/) { searchResult in
-                        Text(generateDuration(file: searchResult))
-                            .frame(maxWidth: 60, alignment: .trailing)
-                
-                    }
-                    // last column will take up remaining space, but that's dumb
-                    // then it can be resizes to too much space
-                    //.width(min: 20, ideal: 60, max: 70)
-                    .width(60)
-                }
-                .navigationSplitViewColumnWidth(min: 40, ideal: 260, max: 260)
-                .tableStyle(.inset) // more space
-                // macOS 14
-                .disableAlternatingRowBackgrondsOptional()// skip the row coloring
-                
-                // can customize each row
-                //rows: {
-                // }
-                // This is macOS14 too
-//                .onChange(of: sortOrder, initial:false) { newOrder, cond in
-//                    //self.searchResults.sort(using: newOrder)
-//                }
-//                .onChange(of: sortOrderCurrent) { newOrder in
-//                    //self.searchResults.sort(using: newOrder)
-//                    sortOrderCurrent = newOrder
-//                }
-                //.help(file.shortDirectory)
-                .focused($focusedField, equals: .listView)
-                .focusable()
-                
-                #else
                 VStack {
                     List(fileSearcher.filesSearched, selection:$selection) { file in
                         HStack() {
@@ -1617,7 +1570,7 @@ A tool to help profile mem, perf, and builds.
                             // name gets truncated too soon if it's first
                             // and try to align the text with trailing.
                             
-                            Text(generateName(file: file))
+                            Text(file.name)
                                 .help(file.shortDirectory)
                                 .truncationMode(.tail)
                                 
@@ -1631,7 +1584,6 @@ A tool to help profile mem, perf, and builds.
                     .focused($focusedField, equals: .listView)
                     .focusable()
                 }
-                #endif
             }
             detail: {
                 VStack {

From 8796c09918474ed76b7e9fb511974ce1110638e7 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 10 Mar 2024 13:55:18 -0700
Subject: [PATCH 633/901] kram-profile - add simpler Reload

---
 .../kram-profile/kram_profileApp.swift        | 176 ++++++++++++------
 1 file changed, 124 insertions(+), 52 deletions(-)

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index c55dca14..d13fd698 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -26,25 +26,34 @@ import UniformTypeIdentifiers
 // DONE: be nice to focus the search input on cmd+F just to make me happy.  (using cmd+S)
 //  Browser goes to its own search which doesn’t help.
 
+// Memory traces
 // TODO: sort thread by size - repack the memory graph largest to smallest by reordering each track
 //   then can focus on the bigger values.
 // TODO: Sort by name and convert to count - can then see common counts
 //   so keep the json loaded in Swift.  Can json be cloned and modded?
-
 // TODO: option to colesce to count and name with sort
 
+// TODO: across all files, many of the strings are the same.  Could replace all strings
+// with an index, compress, and zip archive with the index table.  buid, perf, mem strings
+// filenames are also redundant.  Just need to use @[F|S|B]hex, and then do lookup before CBA final report.
+// Can rebuild references on JS side to send less data.  JS can then alias strings.
+
 // DONE: sort files by range
 // TODO: import zip, and run cba on contents, mmap and decompress each
 //  can use incremental mode?
 // TODO: can't mmap web link, but can load zip off server with timings
-
 // TODO: add/update recent document list (need to hold onto dropped/opened folder)
+// TODO: save/load the duration and modstamps for File.
 
+// TODO: work on sending a more efficient form.  Could use Perfetto SDK to write to prototbuf.  The Catapult json format is overly verbose.  Need some thread and scope strings, some open/close timings that reference a scope string and thread.
 // TODO: add compressed format, build up Pefetto json or binary from this
-//  have vrious durtion forms.
+//  may need one for mmap, other for super compact deltas
+//  can still alias strings from mmap
+//
+//  have various duration forms.
 //  could have ascii form of below.
 //  flags to identify optional param
-// 4B leader
+// 4B magic
 // n len nid name  // nid is repacked 0..table
 // t len tid name  // tid is repacked to 0... table
 // s len sid name  // symbols
@@ -64,38 +73,46 @@ import UniformTypeIdentifiers
 
 // 4-bit, 12-bit, 16-bit, variable, pad to 4B
 
+// DONE: have a way to reload dropped folder
+// TODO: track parent archives, folder, and loose drop files
+// and when reload is hit, then reload all of that rebuild the list
+// and then reload the selected file
+// TODO: need mmapHelper and zipHelper to deal with archives
+// also FileHelper, since may not be able to mmap.
 
+// TODO: fix duration update modding the List item and nav title after it updates
+// Currently select list, it updates, then duration is calcualated.
+//   there is a objectWillChange.send() Could maybe send that from The File
+// https://www.hackingwithswift.com/forums/swiftui/update-content-of-existing-row-of-list/3029
+
+// WKWebView
 // TODO: can't block drop onto the WKWebView
 // TODO: can't overide "delete" key doing a back in the WKWebView history
 //    Perfetto warns that content will be lost
 
-// DONE: add list sort mode for name, range
+// Perfetto Bugs
 // TODO: fix the js wait, even with listener, there's still a race
 //    maybe there's some ServiceWorker still loading the previous json?
 //    Perfetto is using a ServiceWorker, Safari uses those now, and ping/pong unware.
-// TODO: still getting web race condition.  Perfetto is trying to
-//  load the previous file, and we’re sending a new one.
-// TODO: have a way to reload dropped folder (not just subfiles)
-// TODO: nav title and list item text is set before duration is computed
-//  need some way to update that.
+// TODO: switch to Perfetto dark mode
+
+// Multi-window
 // TODO: support WindowGroup and multiwindow, each needs own webView, problem
 //   is that onOpenURL opens a new window always.
-// TODO: work on sending a more efficient form.  Could use Perfetto SDK to write to prototbuf.  The Catapult json format is overly verbose.  Need some thread and scope strings, some open/close timings that reference a scope string and thread.
+// TODO: look in to hosting remotery web and/or tracy server, see if Remotery as flamechart render
+//   but these don't review traces offline, and are live profilers
+// TODO: add Metal capture and imgui backend to Tracy
+// TODO: add Metal capture to Remotery
+
+
 // TODO: switch font to Inter, bundle that with the app?
 //   .environment(\.font, Font.custom("CustomFont", size: 14))
 // TODO: for perf traces, compute duration between frame
 //   markers.  Multiple frames in a file, then show max frame duration
 //   instead of the entire file.
-// TODO: can't type ahead search in the list while the webview is loading (f.e. e will advance)
-//    but arrow keys work to move to next
-// DONE: track duration would be useful (esp. for memory traces)
-//    Would have to modify the thread_name, and process the tid and timings
-//    Better if Perfetto could display this
-// TODO: list view type ahead search doesn't work unless name is the first Text entry
-// TODO: switch to dark mode
 // TODO: no simple scrollTo, since this is all React style
 //   There is a ScrollViewReader, but value only usable within.  UITableView has this.
-// TODO track when files change or get deleted, update the list item then
+// TODO: track when files change or get deleted, update the list item then
 //   can disable list items that are deleted in case they return (can still pick if current)
 //   https://developer.apple.com/documentation/coreservices/file_system_events?language=objc
 // TODO: here's how to sign builds for GitHub Actions
@@ -246,7 +263,8 @@ public func clamp<T>(_ value: T, _ minValue: T, _ maxValue: T) -> T where T : Co
 enum ContainerType {
     case Archive // zip of 1+ files, can't enforce
     case Compressed // gzip of 1 file, can't enforce
-    case File
+    case Folder // from a folder drop
+    case File // means file was dropped or opened directly
 }
 
 enum FileType {
@@ -265,7 +283,7 @@ class File: Identifiable, /*Hashable, */ Equatable, Comparable
     let parentFolders: String
     let containerType: ContainerType
     let fileType: FileType
-    
+
     var duration = 0.0
     var modStamp: Date?
     var loadStamp: Date?
@@ -306,6 +324,7 @@ class File: Identifiable, /*Hashable, */ Equatable, Comparable
         }
     }
 
+    // show some of dir file is in, TODO: 2 levels not enough
     private static func buildShortDirectory(url: URL) -> String {
         let count = url.pathComponents.count
         
@@ -324,7 +343,7 @@ class File: Identifiable, /*Hashable, */ Equatable, Comparable
         return str
     }
     
-    private static func filenameToContainerType(_ url: URL) -> ContainerType {
+    public static func filenameToContainerType(_ url: URL) -> ContainerType {
         let ext = url.pathExtension
         
         if ext == "zip" {
@@ -386,6 +405,10 @@ func generateNavigationTitle(_ sel: String?) -> String {
 // indicate the item is gone, and await its return.
 // Does macOS have a FileWatcher?
 
+// Holds supported files dropped or opened from Finder, reload reparses this
+var droppedFileCache : [URL] = []
+
+// Flattened list of supported files from folders and archives
 var fileCache : [URL:File] = [:]
 
 func lookupFile(url: URL) -> File {
@@ -959,7 +982,7 @@ func loadFileJS(_ path: String) -> String? {
         
         // Note: json.gz and json.zip build files are not marked as Build
         // but need to rewrite them.
-        var isBuildFile = file.fileType == .Build
+        let isBuildFile = file.fileType == .Build
         
         //let filename = fileURL.lastPathComponent
         
@@ -1244,7 +1267,7 @@ struct kram_profileApp: App {
         let ext = url.pathExtension
         
         // what ext does trace.zip, or trace.gz come in as ?
-        // should this limit compressed files to the names supported below - json, trace, vmatrace?
+        // should this limit compressed files to the names supported below - json, trace, memtrace?
         
         if ext == "gz" {
             return true
@@ -1253,8 +1276,8 @@ struct kram_profileApp: App {
 //            return true
 //        }
             
-        // clang build files use generic .json format
-        if ext == "json" {
+        // clang build files use generic .json ext
+        if ext == "json" || ext == "buildtrace" {
             let filename = url.lastPathComponent
             
             // filter out some by name, so don't have to open files
@@ -1270,17 +1293,29 @@ struct kram_profileApp: App {
             }
             return true
         }
+        
         // profiling
-        if ext == "trace" {
+        if ext == "perftrace" || ext == "trace" {
             return true
         }
+        
         // memory
-        if ext == "vmatrace" {
+        if ext == "memtrace" {
             return true
         }
+        
         return false
     }
     
+    func listFilesFromArchive(_ url: URL) -> [File] {
+        // TODO:
+        // also add zip to supported format
+        // flatten archive and add archive to list
+        // need to mmap the archive, point content files to archive loc
+        
+        return []
+    }
+    
     func listFilesFromURLs(_ urls: [URL]) -> [File]
     {
         var files: [File] = []
@@ -1291,7 +1326,7 @@ struct kram_profileApp: App {
                 // list out all matching files
                 // also these [.skipsHiddenFiles, .skipsSubdirectoryDescendants]
                 
-                // This doesn't default to recursive, see what kram does
+                // recurse into directory
                 let directoryEnumerator = FileManager.default.enumerator(
                     at: url,
                     includingPropertiesForKeys: nil
@@ -1301,14 +1336,26 @@ struct kram_profileApp: App {
                 while let fileURL = directoryEnumerator?.nextObject() as? URL {
                     let isSupported = isSupportedFilename(fileURL)
                     if isSupported {
-                        files.append(lookupFile(url:fileURL));
+                        let isArchive  = File.filenameToContainerType(url) == .Archive
+                        if isArchive {
+                           files += listFilesFromArchive(fileURL)
+                        }
+                        else {
+                            files.append(lookupFile(url:fileURL));
+                        }
                     }
                 }
             }
             else if url.isFileURL {
                 let isSupported = isSupportedFilename(url)
                 if isSupported {
-                    files.append(lookupFile(url:url))
+                    let isArchive = File.filenameToContainerType(url) == .Archive
+                    if isArchive {
+                        files += listFilesFromArchive(url)
+                    }
+                    else {
+                        files.append(lookupFile(url:url))
+                    }
                 }
             }
         }
@@ -1320,6 +1367,12 @@ struct kram_profileApp: App {
     //let customFont = Font.custom("Inter Variable", size: 14)
                 
     func openFileFromURLs(urls: [URL]) {
+        droppedFileCache = urls
+        reopenFileFromURLs()
+    }
+    
+    func reopenFileFromURLs() {
+        let urls = droppedFileCache
         
         if urls.count >= 1 {
             let filesNew = listFilesFromURLs(urls)
@@ -1365,21 +1418,16 @@ struct kram_profileApp: App {
         }
     }
     
-//    func shortFilename(_ str: String) -> String {
-//        let url = URL(string: str)!
-//        return url.lastPathComponent
-//    }
-    
     func openContainingFolder(_ str: String) {
         let url = URL(string: str)!
         NSWorkspace.shared.activateFileViewerSelecting([url]);
     }
     
-    func isReloadEnabled(_ selection: String?) -> Bool {
-        guard let sel = selection else { return false }
-        let file = lookupFile(selection: sel)
-        return file.isReloadNeeded()
-    }
+//    func isReloadEnabled(_ selection: String?) -> Bool {
+//        guard let sel = selection else { return false }
+//        let file = lookupFile(selection: sel)
+//        return file.isReloadNeeded()
+//    }
     
     func openFile() {
         let panel = NSOpenPanel()
@@ -1448,11 +1496,10 @@ A tool to help profile mem, perf, and builds.
         )
     }
     
-    // DONE: have files ending in .vmatrace, .trace, and .json
+    // DONE: have files ending in .memtrace, .trace, and .json
     // TODO: archives in the zip file.
     
     let fileTypes: [UTType] = [
-        // single-file zip, not dealing with archives yet, but have C++ code to
         // This is what macOS generates when doing "compress file".  But could be archive.
         // These are 12x smaller often times.  Decompression lib only handles zlib.
         // TODO: need zip archive util to extract the 1+ files, can still use libCompression to decompress
@@ -1465,7 +1512,9 @@ A tool to help profile mem, perf, and builds.
         // A mix of json or binary format files
         .json, // clang build files
         UTType(filenameExtension:"trace", conformingTo:.data)!, // conformingTo: .json didn't work
-        UTType(filenameExtension:"vmatrace", conformingTo:.data)!,
+        UTType(filenameExtension:"memtrace", conformingTo:.data)!,
+        UTType(filenameExtension:"perftrace", conformingTo:.data)!,
+        UTType(filenameExtension:"buildtrace", conformingTo:.data)!,
     ]
     
     func selectFile(_ selection: String?, _ fileList: [File], _ advanceList: Bool) -> String? {
@@ -1554,6 +1603,21 @@ A tool to help profile mem, perf, and builds.
     // Comparators aren't same as KeyPathComparator, ugh
     // https://useyourloaf.com/blog/custom-sort-comparators/
     
+    let buildIcon = Image(systemName: "c.square") // compile
+    let perfIcon = Image(systemName: "p.square")
+    let memoryIcon = Image(systemName: "m.square")
+    let unknownIcon = Image(systemName: "questionmark.app")
+    
+    // https://www.hackingwithswift.com/example-code/uikit/how-to-use-system-icons-in-your-app
+    func generateIcon(_ file: File) -> Image {
+        switch file.fileType {
+            case .Build: return buildIcon
+            case .Memory: return memoryIcon
+            case .Perf: return perfIcon
+            case .Unknown: return unknownIcon
+        }
+    }
+    
     var body: some Scene {
         
         // TODO: WindowGroup brings up old windows which isn't really what I want
@@ -1563,20 +1627,21 @@ A tool to help profile mem, perf, and builds.
                 VStack {
                     List(fileSearcher.filesSearched, selection:$selection) { file in
                         HStack() {
-                            // If number is first, then that's all SwiftUI
+                            // If number ir icon is first, then that's all SwiftUI
                             // uses for typeahead list search.  Seems to
                             // be no control over that.
+                            generateIcon(file)
                             
                             // name gets truncated too soon if it's first
                             // and try to align the text with trailing.
-                            
                             Text(file.name)
                                 .help(file.shortDirectory)
                                 .truncationMode(.tail)
-                                
+                            
+                            // TODO: how to fix align to right of nav panel?
                             Text(generateDuration(file: file))
-                                .frame(maxWidth: 70)
-                            //.alignment(.trailing)
+                                //.frame(maxWidth: 70)
+                                .frame(maxWidth: .infinity, alignment: .trailing)
                         }
                         .listRowSeparator(isSeparatorVisible(file, fileSearcher.filesSearched) ? .visible : .hidden)
                         .listRowSeparatorTint(.white)
@@ -1662,11 +1727,18 @@ A tool to help profile mem, perf, and builds.
                 }
                 .keyboardShortcut("T", modifiers:[.command])
                 
-                Button("Reload File") {
+                Button("Reload") {
+                    // TODO: This loses the order if sorted by duration
+                    // but easy enough to resort
+                    
+                    // this reloads the entier file list
+                    reopenFileFromURLs()
+                    
+                    // this tries to reload the selection
                     openFileSelection(myWebView)
                 }
                 .keyboardShortcut("R", modifiers:[.command])
-                .disabled(!isReloadEnabled(selection))
+                .disabled(selection == nil) // !isReloadEnabled(selection))
                 
                 // These work even if the list view is collapsed
                 Button("Prev File") {
@@ -1674,7 +1746,7 @@ A tool to help profile mem, perf, and builds.
                         selection = selectFile(selection, fileSearcher.filesSearched, false)
                         
                         // TODO: no simple scrollTo, since this is all React style
-                        // There is a ScrollViewReader, but valud only usable within
+                        // There is a ScrollViewReader, but valid only usable within
                     }
                 }
                 .keyboardShortcut("N", modifiers:[.shift, .command])

From b5700ca563a34247089a32f3c4dc14f58badef5d Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 10 Mar 2024 15:19:05 -0700
Subject: [PATCH 634/901] kram-profile - fix recent documents, fix menus, fix
 range alignment

---
 .../kram-profile/kram_profileApp.swift        | 105 ++++++++++++------
 1 file changed, 72 insertions(+), 33 deletions(-)

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index d13fd698..b2c5e5d1 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -33,17 +33,21 @@ import UniformTypeIdentifiers
 //   so keep the json loaded in Swift.  Can json be cloned and modded?
 // TODO: option to colesce to count and name with sort
 
+// Build traces
+// TODO: parse totals from build traces, what CBA is doing
+// TODO: present total time, and % of total in the nav panel
+
 // TODO: across all files, many of the strings are the same.  Could replace all strings
 // with an index, compress, and zip archive with the index table.  buid, perf, mem strings
 // filenames are also redundant.  Just need to use @[F|S|B]hex, and then do lookup before CBA final report.
 // Can rebuild references on JS side to send less data.  JS can then alias strings.
 
-// DONE: sort files by range
 // TODO: import zip, and run cba on contents, mmap and decompress each
 //  can use incremental mode?
 // TODO: can't mmap web link, but can load zip off server with timings
-// TODO: add/update recent document list (need to hold onto dropped/opened folder)
-// TODO: save/load the duration and modstamps for File.
+// DONE: add/update recent document list (need to hold onto dropped/opened folder)
+// TODO: save/load the duration and modstamps for File, and any other metadata (totals per section)
+// TODO: add jump to source, but path would need to be correct (sandbox block?)
 
 // TODO: work on sending a more efficient form.  Could use Perfetto SDK to write to prototbuf.  The Catapult json format is overly verbose.  Need some thread and scope strings, some open/close timings that reference a scope string and thread.
 // TODO: add compressed format, build up Pefetto json or binary from this
@@ -978,24 +982,27 @@ func loadFileJS(_ path: String) -> String? {
         var fileContentBase64 = ""
         
         let isFileGzip = file.containerType == .Compressed
-        //let isFileZip = type == .Archive
-        
-        // Note: json.gz and json.zip build files are not marked as Build
-        // but need to rewrite them.
-        let isBuildFile = file.fileType == .Build
-        
-        //let filename = fileURL.lastPathComponent
-        
-        let fileContent = try Data(contentsOf: fileURL)
         
         // decompress archive from zip, since Perfetto can't yet decompress zip
         // Note this will typically be fileType unknown, but have to flatten
         // content within to the list.  This just means part of a zip archive.
+        var fileContent: Data
+        
         if file.containerType == .Archive {
             return nil
-            //fileContent = unzippedContent
+            
+            // this will point to a section of an mmaped zip archive
+            // fileContent = unzippedContent
+        }
+        else {
+            // This uses mmap if safe.  Does not count towars memory totals, since can be paged out
+            fileContent = try Data(contentsOf: fileURL, options: [.mappedIfSafe])
         }
         
+        // Note: json.gz and json.zip build files are not marked as Build
+        // but need to rewrite them.
+        let isBuildFile = file.fileType == .Build
+        
         if !isBuildFile {
             // perfetto only supports gzip, comments indicate zip is possible but only with refactor
             // don't recompress gzip, note can't do timing if not decompressed
@@ -1072,7 +1079,7 @@ func loadFileJS(_ path: String) -> String? {
             else {
                 for i in 0..<catapultProfile.traceEvents!.count {
                     let event = catapultProfile.traceEvents![i]
-                    if event.name == "Source" ||
+                    if  event.name == "Source" ||
                         event.name == "OptModule"
                     {
                         // This is a path
@@ -1083,10 +1090,12 @@ func loadFileJS(_ path: String) -> String? {
                         catapultProfile.traceEvents![i].name = url.lastPathComponent
                     }
                     else if event.name == "InstantiateFunction" ||
-                                event.name == "InstantiateClass" ||
-                                event.name == "OptFunction" ||
-                                event.name == "ParseClass" ||
-                                event.name == "DebugType" // these take a while
+                            event.name == "InstantiateClass" ||
+                            event.name == "OptFunction" ||
+                            event.name == "ParseClass" ||
+                            event.name == "DebugType" || // these take a while
+                            event.name == "CodeGen Function" ||
+                            event.name == "RunPass"
                     {
                         // This is a name
                         let detail = event.args!["detail"]!.value as! String
@@ -1369,6 +1378,16 @@ struct kram_profileApp: App {
     func openFileFromURLs(urls: [URL]) {
         droppedFileCache = urls
         reopenFileFromURLs()
+        
+        // update the document list
+        let documentController = NSDocumentController.shared
+        if urls.count >= 1 {
+            for url in urls {
+                if url.hasDirectoryPath || isSupportedFilename(url) {
+                    documentController.noteNewRecentDocumentURL(url)
+                }
+            }
+        }
     }
     
     func reopenFileFromURLs() {
@@ -1618,6 +1637,19 @@ A tool to help profile mem, perf, and builds.
         }
     }
     
+    func recentDocumentsMenu() -> some View {
+        let documentController = NSDocumentController.shared
+        let urls = documentController.recentDocumentURLs
+        
+        return Menu("Recent Documents…") {
+            ForEach(0..<urls.count, id: \.self) { i in
+                Button(urls[i].lastPathComponent) {
+                    openFileFromURLs(urls: [urls[i]])
+                }
+            }
+        }
+    }
+    
     var body: some Scene {
         
         // TODO: WindowGroup brings up old windows which isn't really what I want
@@ -1637,11 +1669,10 @@ A tool to help profile mem, perf, and builds.
                             Text(file.name)
                                 .help(file.shortDirectory)
                                 .truncationMode(.tail)
+                                .frame(maxWidth: .infinity, alignment: .leading)
                             
-                            // TODO: how to fix align to right of nav panel?
                             Text(generateDuration(file: file))
-                                //.frame(maxWidth: 70)
-                                .frame(maxWidth: .infinity, alignment: .trailing)
+                                .frame(maxWidth: 70, alignment: .trailing)
                         }
                         .listRowSeparator(isSeparatorVisible(file, fileSearcher.filesSearched) ? .visible : .hidden)
                         .listRowSeparatorTint(.white)
@@ -1671,6 +1702,8 @@ A tool to help profile mem, perf, and builds.
                                 }
                             }
                         }
+                        
+                        // add other buttons
                     }
                     WebView(webView: myWebView)
                         .focused($focusedField, equals: .webView)
@@ -1702,11 +1735,13 @@ A tool to help profile mem, perf, and builds.
         // https://nilcoalescing.com/blog/CustomiseAboutPanelOnMacOSInSwiftUI/
         .commands {
             CommandGroup(after: .newItem) {
-                Button("Open...") {
+                Button("Open…") {
                     openFile()
                 }
                 .keyboardShortcut("O", modifiers:[.command])
                 
+                recentDocumentsMenu()
+                
                 // Really want to go to .h selected in flamegraph, but that would violate sandbox.
                 // This just goes to the trace json file somewhere in DerviceData which is less useful.
                 // For selected file can only put on clipboard.
@@ -1716,17 +1751,7 @@ A tool to help profile mem, perf, and builds.
                     }
                 }
                 .keyboardShortcut("G", modifiers:[.command])
-                
-                Button("Sort Name") {
-                    fileSearcher.updateFilesSearched(false)
-                }
-                .keyboardShortcut("T", modifiers:[.shift, .command])
-                
-                Button("Sort Range") {
-                    fileSearcher.updateFilesSearched(true)
-                }
-                .keyboardShortcut("T", modifiers:[.command])
-                
+            
                 Button("Reload") {
                     // TODO: This loses the order if sorted by duration
                     // but easy enough to resort
@@ -1760,6 +1785,20 @@ A tool to help profile mem, perf, and builds.
                 .keyboardShortcut("N", modifiers:[.command])
                 .disabled(selection == nil)
                 
+                Divider()
+                
+                Button("Sort Name") {
+                    fileSearcher.updateFilesSearched(false)
+                }
+                .keyboardShortcut("T", modifiers:[.shift, .command])
+                
+                Button("Sort Range") {
+                    fileSearcher.updateFilesSearched(true)
+                }
+                .keyboardShortcut("T", modifiers:[.command])
+                
+                Divider()
+                
                 // TODO: these may need to be attached to detail view
                 // The list view eats them, and doesn't fwd onto the web view
                 

From db5b08062b9ed7ad75e44cb2be22dee4aac5ef2b Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 10 Mar 2024 21:30:40 -0700
Subject: [PATCH 635/901] kram-profile - deal with more stupid SwiftUI bugs

Try to create a sorted and searchable List in SwiftUI, and then pull your hair out.
If add .searchable, then the editText focuses itself and can't tab-switch focus anywhere else.  This totally blocks any interaction with the List or WebView.  This some horrible bug in SwiftUI, but it's hard to tell with so much secret code changing state in the black box.
Turn on Swift C++ support
---
 .../kram-profile.xcodeproj/project.pbxproj    |  5 ++
 kram-profile/kram-profile/Log.swift           |  2 +-
 .../kram-profile/kram_profileApp.swift        | 77 ++++++++++++-------
 3 files changed, 57 insertions(+), 27 deletions(-)

diff --git a/kram-profile/kram-profile.xcodeproj/project.pbxproj b/kram-profile/kram-profile.xcodeproj/project.pbxproj
index 5b00d80d..f0bbb04d 100644
--- a/kram-profile/kram-profile.xcodeproj/project.pbxproj
+++ b/kram-profile/kram-profile.xcodeproj/project.pbxproj
@@ -232,7 +232,9 @@
 				ONLY_ACTIVE_ARCH = YES;
 				SDKROOT = macosx;
 				SWIFT_ACTIVE_COMPILATION_CONDITIONS = "DEBUG $(inherited)";
+				SWIFT_OBJC_INTEROP_MODE = objcxx;
 				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
+				SWIFT_VERSION = 5.0;
 			};
 			name = Debug;
 		};
@@ -287,7 +289,10 @@
 				MTL_ENABLE_DEBUG_INFO = NO;
 				MTL_FAST_MATH = YES;
 				SDKROOT = macosx;
+				SWIFT_ACTIVE_COMPILATION_CONDITIONS = "";
 				SWIFT_COMPILATION_MODE = wholemodule;
+				SWIFT_OBJC_INTEROP_MODE = objcxx;
+				SWIFT_VERSION = 5.0;
 			};
 			name = Release;
 		};
diff --git a/kram-profile/kram-profile/Log.swift b/kram-profile/kram-profile/Log.swift
index 6d106df9..83169af8 100644
--- a/kram-profile/kram-profile/Log.swift
+++ b/kram-profile/kram-profile/Log.swift
@@ -237,7 +237,7 @@ class Log {
     }
     
     private static func timeFromStart() -> Double {
-        return abs(Log.timestamp() - Log.timestampStart)
+        return max(0.0, Log.timestamp() - Log.timestampStart)
     }
     
     private static func timeAbsolute() -> String {
diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index b2c5e5d1..1d61f959 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -37,11 +37,6 @@ import UniformTypeIdentifiers
 // TODO: parse totals from build traces, what CBA is doing
 // TODO: present total time, and % of total in the nav panel
 
-// TODO: across all files, many of the strings are the same.  Could replace all strings
-// with an index, compress, and zip archive with the index table.  buid, perf, mem strings
-// filenames are also redundant.  Just need to use @[F|S|B]hex, and then do lookup before CBA final report.
-// Can rebuild references on JS side to send less data.  JS can then alias strings.
-
 // TODO: import zip, and run cba on contents, mmap and decompress each
 //  can use incremental mode?
 // TODO: can't mmap web link, but can load zip off server with timings
@@ -49,6 +44,12 @@ import UniformTypeIdentifiers
 // TODO: save/load the duration and modstamps for File, and any other metadata (totals per section)
 // TODO: add jump to source, but path would need to be correct (sandbox block?)
 
+// TODO: across all files, many of the strings are the same.  Could replace all strings
+// with an index, compress, and zip archive with the index table.  buid, perf, mem strings
+// filenames are also redundant.  Just need to use @[F|S|B]num, and then do lookup before CBA final report.
+// table if global would need to use same index across all files.
+// Can rebuild references on JS side to send less data.  JS can then alias strings ftw.
+// Just add special ph type that is ignored by web to specify the alias.
 // TODO: work on sending a more efficient form.  Could use Perfetto SDK to write to prototbuf.  The Catapult json format is overly verbose.  Need some thread and scope strings, some open/close timings that reference a scope string and thread.
 // TODO: add compressed format, build up Pefetto json or binary from this
 //  may need one for mmap, other for super compact deltas
@@ -78,11 +79,13 @@ import UniformTypeIdentifiers
 // 4-bit, 12-bit, 16-bit, variable, pad to 4B
 
 // DONE: have a way to reload dropped folder
-// TODO: track parent archives, folder, and loose drop files
+// DONE: track parent archives, folder, and loose drop files
 // and when reload is hit, then reload all of that rebuild the list
 // and then reload the selected file
-// TODO: need mmapHelper and zipHelper to deal with archives
-// also FileHelper, since may not be able to mmap.
+// TODO: zipHelper to deal with archives, can use Swift Data to mmap content if needed
+//   mmap the zip, list out the files and locations, and then defalte the content somewhere
+//   only then can data be handed off toe Pefertto or CBA.  And CBA needs all files.
+//   Maybe extend CBA to read a zip file.  Can just use ZipHelper.
 
 // TODO: fix duration update modding the List item and nav title after it updates
 // Currently select list, it updates, then duration is calcualated.
@@ -160,11 +163,14 @@ class FileSearcher: ObservableObject {
     @Published var searchText = ""
     
     var files: [File] = []
-        
-    @Published var filesSearched: [File] = []
+    
+    // I made this plublished so sort would also cause update to filesSearched
+    // but the search field keeps re-focusing
+    // @Published var filesSorted: [File] = []
+    var filesSorted: [File] = []
     
     // duplicate code, but init() doesn't have self defined
-    func updateFilesSearched(_ sortByDuration: Bool = false) {
+    func updateFilesSorted(_ sortByDuration: Bool = false) {
         // may not want to sort everytime, or the list will change as duration is updated
         // really want to do this off a button, and then set files to that
         let sortedResults = files.sorted {
@@ -184,27 +190,33 @@ class FileSearcher: ObservableObject {
             }
         }
         
-        if searchText.isEmpty || sortedResults.count <= 1  {
-            filesSearched = sortedResults
+        filesSorted = sortedResults
+        
+        // TODO: important or filesSearched isn't updated in the list when
+        // the sort occurs.  This is causing filter to re-focus since
+        // it thinks the searchText which is also Published changed.
+        objectWillChange.send()
+    }
+       
+    var filesSearched: [File] {
+        
+        if searchText.isEmpty || filesSorted.count <= 1  {
+            return filesSorted
         }
         else if searchText.count == 1 {
             let lowercaseSearchText = searchText.lowercased()
             let uppercaseSearchText = searchText.uppercased()
 
-            filesSearched = sortedResults.filter {
+            return filesSorted.filter {
                 $0.name.starts(with: uppercaseSearchText) ||
                 $0.name.starts(with: lowercaseSearchText)
             }
         }
         else {
             // is search text multistring?
-            $searchText.map { searchText in
-                sortedResults.filter { file in
-                    // Here use the name, or else the directory will have say "kram" in it and filter will trigger for all files "kr"
-                    file.name.localizedCaseInsensitiveContains(searchText)
-                }
+            return filesSorted.filter {
+                $0.name.localizedCaseInsensitiveContains(searchText)
             }
-            .assign(to: &$filesSearched)
         }
     }
 }
@@ -454,6 +466,10 @@ class MyWebView : WKWebView {
         // unclear why all events are going to WebView
         // so have to return false to not have them filtered
         
+        if event.keyCode == Keycode.tab {
+            return false
+        }
+        
         // allow all menu shortcuts to keep working
         if event.modifierFlags.contains(.command) {
             return false
@@ -471,7 +487,7 @@ class MyWebView : WKWebView {
         // respond to the keys.  Ugh.  Stupid system.
         // return true
         
-        return super.performKeyEquivalent(with: event)
+        return true // super.performKeyEquivalent(with: event)
     }
     
     /* still not working
@@ -1410,7 +1426,7 @@ struct kram_profileApp: App {
                     fileSearcher.files = filesNew
                 //}
                 
-                fileSearcher.updateFilesSearched()
+                fileSearcher.updateFilesSorted()
                 
                 log.debug("found \(fileSearcher.files.count) files")
                 
@@ -1437,6 +1453,8 @@ struct kram_profileApp: App {
         }
     }
     
+    // This isn't so valuable to open a file, but opening a referenced header from build
+    // would be.
     func openContainingFolder(_ str: String) {
         let url = URL(string: str)!
         NSWorkspace.shared.activateFileViewerSelecting([url]);
@@ -1540,6 +1558,8 @@ A tool to help profile mem, perf, and builds.
         guard let sel = selection else { return nil }
         if fileList.count == 1 { return selection }
         
+        // TOOD: fix this for search, where the selected item may no longer be
+        // in the list, find element in the list bounding it
         var index = 0
         for i in 0..<fileList.count {
             let file = fileList[i]
@@ -1712,8 +1732,13 @@ A tool to help profile mem, perf, and builds.
             }
             
             // This only seems to display with List, not with Table.  Or it's under Table
-            .searchableOptional(text: $fileSearcher.searchText, isPresented: $fileSearcher.searchIsActive, placement: .sidebar, prompt: "Filter")
-                
+            // This also keeps stealing focus back to itself.  Ugh.  Can't even tab out.
+            // Also this interferes with type search in the list and arrows when focused.
+            // Can select a list item with cursor, but then focus is set back to search.
+//            .searchableOptional(text: $fileSearcher.searchText, isPresented: $fileSearcher.searchIsActive,
+//                                placement: .sidebar,
+//                                prompt: "Filter")
+            
             .onChange(of: selection /*, initial: true*/) { newState in
                 openFileSelection(myWebView)
                 //focusedField = .webView
@@ -1788,12 +1813,12 @@ A tool to help profile mem, perf, and builds.
                 Divider()
                 
                 Button("Sort Name") {
-                    fileSearcher.updateFilesSearched(false)
+                    fileSearcher.updateFilesSorted(false)
                 }
                 .keyboardShortcut("T", modifiers:[.shift, .command])
                 
                 Button("Sort Range") {
-                    fileSearcher.updateFilesSearched(true)
+                    fileSearcher.updateFilesSorted(true)
                 }
                 .keyboardShortcut("T", modifiers:[.command])
                 

From 3f04328d2a9a502cbd420f86562b40b751f5fd38 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 10 Mar 2024 23:22:45 -0700
Subject: [PATCH 636/901] kram-profile - fix keyboard handling again

---
 .../kram-profile/kram_profileApp.swift        | 68 ++++++++++---------
 1 file changed, 37 insertions(+), 31 deletions(-)

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 1d61f959..23457e63 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -9,7 +9,7 @@ import UniformTypeIdentifiers
 // https://github.com/gualtierofrigerio/WkWebViewJavascript/blob/master/WkWebViewJavascript/WebViewHandler.swift
 
 // https://levelup.gitconnected.com/how-to-use-wkwebview-on-mac-with-swiftui-10266989ed11
-// Signing & Capabilites set App Sanbox (allow outgoing connections)
+// Signing & Capabilites set App Sandbox (allow outgoing connections)
 
 // This is really just a wrapper to turn WKWebView into something SwiftUI
 // can interop with.  SwiftUI has not browser widget.
@@ -78,6 +78,9 @@ import UniformTypeIdentifiers
 
 // 4-bit, 12-bit, 16-bit, variable, pad to 4B
 
+// TODO: recent documents list doesn't survive relaunch, but only when app is rebuilt
+// but still kind of annoying for development
+
 // DONE: have a way to reload dropped folder
 // DONE: track parent archives, folder, and loose drop files
 // and when reload is hit, then reload all of that rebuild the list
@@ -94,7 +97,7 @@ import UniformTypeIdentifiers
 
 // WKWebView
 // TODO: can't block drop onto the WKWebView
-// TODO: can't overide "delete" key doing a back in the WKWebView history
+// TODO: can't overide "delete" or "shift+Delete" key doing a back/fwd in the WKWebView history
 //    Perfetto warns that content will be lost
 
 // Perfetto Bugs
@@ -106,10 +109,10 @@ import UniformTypeIdentifiers
 // Multi-window
 // TODO: support WindowGroup and multiwindow, each needs own webView, problem
 //   is that onOpenURL opens a new window always.
-// TODO: look in to hosting remotery web and/or tracy server, see if Remotery as flamechart render
+// TODO: look in to hosting Remotery web and/or Tracy server, Tracy is imgui
 //   but these don't review traces offline, and are live profilers
 // TODO: add Metal capture and imgui backend to Tracy
-// TODO: add Metal capture to Remotery
+// TODO: add Metal capture to Remotery (this isn't a flamegraph)
 
 
 // TODO: switch font to Inter, bundle that with the app?
@@ -461,33 +464,38 @@ class MyWebView : WKWebView {
     // So that keyboard events are routed
     override var acceptsFirstResponder: Bool { true }
     
-    // This is to prevent bonk
-    override func performKeyEquivalent(with event: NSEvent) -> Bool {
-        // unclear why all events are going to WebView
-        // so have to return false to not have them filtered
-        
-        if event.keyCode == Keycode.tab {
-            return false
-        }
+        // https://developer.apple.com/library/archive/documentation/Cocoa/Conceptual/EventOverview/HandlingKeyEvents/HandlingKeyEvents.html
+    
+// https://nshipster.com/wkwebview/
+    
+    func isKeyHandled(_ event: NSEvent) -> Bool {
+    
+        // can't block delete or shift+delete
+        // so the WKWebView goes back/foward through it's 1 page history.
+        // that loses all context for the user.
         
-        // allow all menu shortcuts to keep working
-        if event.modifierFlags.contains(.command) {
-            return false
+        // prevent super annoying bonk/NSBeep
+        // if don't check modifier flags (can't check isEmpty since 256 is often set
+        // then the cmd+S stops working
+        if !(event.modifierFlags.contains(.command) ||
+             event.modifierFlags.contains(.control) ||
+             event.modifierFlags.contains(.option)) {
+            // wasd
+            if event.keyCode == Keycode.w || event.keyCode == Keycode.a || event.keyCode == Keycode.s || event.keyCode == Keycode.d {
+                return true
+            }
         }
-        
-        // allow list to use up/down
-        if event.keyCode == Keycode.upArrow ||
-            event.keyCode == Keycode.downArrow {
-            return false
+        return false
+    }
+    
+    // Apple doesn't want this to be overridden by user, but key handling
+    // just doesn't work atop the WKWebView without this.  KeyUp/KeyDown
+    // overrides don't matter, since we need the WKWebView to forward them
+    override func performKeyEquivalent(with event: NSEvent) -> Bool {
+        if !isKeyHandled(event) {
+            return super.performKeyEquivalent(with: event)
         }
-        
-        // TODO: delete is still unloading the currently loaded page.  Augh!!!
-        
-        // true means it doesn't bonk, but WKWebView still gets to
-        // respond to the keys.  Ugh.  Stupid system.
-        // return true
-        
-        return true // super.performKeyEquivalent(with: event)
+        return true
     }
     
     /* still not working
@@ -1735,9 +1743,7 @@ A tool to help profile mem, perf, and builds.
             // This also keeps stealing focus back to itself.  Ugh.  Can't even tab out.
             // Also this interferes with type search in the list and arrows when focused.
             // Can select a list item with cursor, but then focus is set back to search.
-//            .searchableOptional(text: $fileSearcher.searchText, isPresented: $fileSearcher.searchIsActive,
-//                                placement: .sidebar,
-//                                prompt: "Filter")
+            // .searchableOptional(text: $fileSearcher.searchText, isPresented: $fileSearcher.searchIsActive, placement: .sidebar, prompt: "Filter")
             
             .onChange(of: selection /*, initial: true*/) { newState in
                 openFileSelection(myWebView)

From abb0944e57f23ed714a458832355c7073b462870 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 11 Mar 2024 00:08:16 -0700
Subject: [PATCH 637/901] kram-profile - fix fullscreen again

---
 .../kram-profile.xcodeproj/project.pbxproj         |  2 ++
 kram-profile/kram-profile/kram_profileApp.swift    | 14 ++++++++++++++
 2 files changed, 16 insertions(+)

diff --git a/kram-profile/kram-profile.xcodeproj/project.pbxproj b/kram-profile/kram-profile.xcodeproj/project.pbxproj
index f0bbb04d..aa1747dc 100644
--- a/kram-profile/kram-profile.xcodeproj/project.pbxproj
+++ b/kram-profile/kram-profile.xcodeproj/project.pbxproj
@@ -309,6 +309,7 @@
 				ENABLE_PREVIEWS = YES;
 				GENERATE_INFOPLIST_FILE = YES;
 				INFOPLIST_FILE = "kram-profile/Info.plist";
+				INFOPLIST_KEY_LSApplicationCategoryType = "public.app-category.developer-tools";
 				INFOPLIST_KEY_NSHumanReadableCopyright = "";
 				LD_RUNPATH_SEARCH_PATHS = (
 					"$(inherited)",
@@ -335,6 +336,7 @@
 				ENABLE_PREVIEWS = YES;
 				GENERATE_INFOPLIST_FILE = YES;
 				INFOPLIST_FILE = "kram-profile/Info.plist";
+				INFOPLIST_KEY_LSApplicationCategoryType = "public.app-category.developer-tools";
 				INFOPLIST_KEY_NSHumanReadableCopyright = "";
 				LD_RUNPATH_SEARCH_PATHS = (
 					"$(inherited)",
diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 23457e63..74b6d915 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -1765,6 +1765,7 @@ A tool to help profile mem, perf, and builds.
         //.environment(\.font, customFont)
         // https://nilcoalescing.com/blog/CustomiseAboutPanelOnMacOSInSwiftUI/
         .commands {
+            // These are in Edit menu
             CommandGroup(after: .newItem) {
                 Button("Open…") {
                     openFile()
@@ -1849,6 +1850,19 @@ A tool to help profile mem, perf, and builds.
                 .keyboardShortcut("P", modifiers:[.shift, .command])
                 .disabled(selection == nil && focusedField == .webView) // what if selection didn't load
             }
+            
+            // only way to get non-empty View menu, and for fn+F to automagically add (fn+f)
+            // https://forums.developer.apple.com/forums/thread/740591
+            
+            CommandGroup(after: .toolbar) {
+                // must call through NSWindow
+                Button("See Below") {
+                    // Window isn't set in AppDelegate, so menu item is skipped.
+                    // But add fn+F menu item into app.  Suo many stupid hacks.
+                    appDelegate.window?.toggleFullScreen(nil)
+                }
+            }
+            
             CommandGroup(replacing: .appInfo) {
                 Button("About kram-profile") {
                     aboutPanel()

From 1d9de2d0e0ea2ea3fbb867edbafefb7ad365c199 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 14 Mar 2024 01:01:54 -0700
Subject: [PATCH 638/901] kramv - fixup zip archive handling, cleanup ZipHelper

This needed to decompress the data if it is compressed.
ZipHelper extractRaw needs to return the compressed, not uncompressed size.
ZipHelper now supports libcompression zlib decompress on Apple
---
 build2/kramc.xcodeproj/project.pbxproj |  4 ++
 build2/kramv.xcodeproj/project.pbxproj |  8 +++
 kramv/KramViewerBase.cpp               | 42 ++++++++++--
 libkram/kram/KramZipHelper.cpp         | 89 ++++++++++++++------------
 libkram/kram/KramZipHelper.h           | 10 +--
 5 files changed, 100 insertions(+), 53 deletions(-)

diff --git a/build2/kramc.xcodeproj/project.pbxproj b/build2/kramc.xcodeproj/project.pbxproj
index 2f50dca8..5242dd82 100644
--- a/build2/kramc.xcodeproj/project.pbxproj
+++ b/build2/kramc.xcodeproj/project.pbxproj
@@ -7,6 +7,7 @@
 	objects = {
 
 /* Begin PBXBuildFile section */
+		705F68F82BA2DD2000437FAA /* libcompression.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 705F68F72BA2DD1100437FAA /* libcompression.tbd */; };
 		706EF28326D18251001C950E /* libkram.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF28226D18251001C950E /* libkram.a */; };
 		706EF28526D1825D001C950E /* libate.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF28426D18257001C950E /* libate.tbd */; };
 		706EF28726D18290001C950E /* KramMain.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EF28026D18223001C950E /* KramMain.cpp */; };
@@ -26,6 +27,7 @@
 /* End PBXCopyFilesBuildPhase section */
 
 /* Begin PBXFileReference section */
+		705F68F72BA2DD1100437FAA /* libcompression.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = libcompression.tbd; path = usr/lib/libcompression.tbd; sourceTree = SDKROOT; };
 		706EF27226D18082001C950E /* kram */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = kram; sourceTree = BUILT_PRODUCTS_DIR; };
 		706EF28026D18223001C950E /* KramMain.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = KramMain.cpp; sourceTree = "<group>"; };
 		706EF28226D18251001C950E /* libkram.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; path = libkram.a; sourceTree = BUILT_PRODUCTS_DIR; };
@@ -41,6 +43,7 @@
 				706EF28326D18251001C950E /* libkram.a in Frameworks */,
 				706EF28B26D182CB001C950E /* Foundation.framework in Frameworks */,
 				706EF28526D1825D001C950E /* libate.tbd in Frameworks */,
+				705F68F82BA2DD2000437FAA /* libcompression.tbd in Frameworks */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@@ -76,6 +79,7 @@
 		706EF28126D18251001C950E /* Frameworks */ = {
 			isa = PBXGroup;
 			children = (
+				705F68F72BA2DD1100437FAA /* libcompression.tbd */,
 				706EF28A26D182CB001C950E /* Foundation.framework */,
 				706EF28426D18257001C950E /* libate.tbd */,
 				706EF28226D18251001C950E /* libkram.a */,
diff --git a/build2/kramv.xcodeproj/project.pbxproj b/build2/kramv.xcodeproj/project.pbxproj
index 0e222705..8aa3fc53 100644
--- a/build2/kramv.xcodeproj/project.pbxproj
+++ b/build2/kramv.xcodeproj/project.pbxproj
@@ -7,6 +7,9 @@
 	objects = {
 
 /* Begin PBXBuildFile section */
+		705F68FA2BA2DD4800437FAA /* libcompression.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 705F68F92BA2DD3E00437FAA /* libcompression.tbd */; };
+		705F68FB2BA2DD5900437FAA /* libcompression.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 705F68F92BA2DD3E00437FAA /* libcompression.tbd */; };
+		705F68FC2BA2DD6200437FAA /* libcompression.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 705F68F92BA2DD3E00437FAA /* libcompression.tbd */; };
 		706EF23926D17A81001C950E /* KramViewerMain.mm in Sources */ = {isa = PBXBuildFile; fileRef = 706EF22C26D17A81001C950E /* KramViewerMain.mm */; };
 		706EF23B26D17A81001C950E /* KramViewerBase.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EF22F26D17A81001C950E /* KramViewerBase.cpp */; };
 		706EF23C26D17A81001C950E /* KramLoader.mm in Sources */ = {isa = PBXBuildFile; fileRef = 706EF23026D17A81001C950E /* KramLoader.mm */; };
@@ -98,6 +101,7 @@
 /* End PBXCopyFilesBuildPhase section */
 
 /* Begin PBXFileReference section */
+		705F68F92BA2DD3E00437FAA /* libcompression.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = libcompression.tbd; path = usr/lib/libcompression.tbd; sourceTree = SDKROOT; };
 		706EF20F26D17A26001C950E /* kramv.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = kramv.app; sourceTree = BUILT_PRODUCTS_DIR; };
 		706EF22A26D17A81001C950E /* KramViewerBase.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = KramViewerBase.h; sourceTree = "<group>"; };
 		706EF22B26D17A81001C950E /* kramv.entitlements */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.entitlements; path = kramv.entitlements; sourceTree = "<group>"; };
@@ -165,6 +169,7 @@
 				706EF26726D17DFA001C950E /* libate.tbd in Frameworks */,
 				706EF24F26D17C43001C950E /* Foundation.framework in Frameworks */,
 				70833665271575E50077BCB6 /* GLTF.framework in Frameworks */,
+				705F68FA2BA2DD4800437FAA /* libcompression.tbd in Frameworks */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@@ -173,6 +178,7 @@
 			buildActionMask = 2147483647;
 			files = (
 				70E33EC826E536BF00CBA422 /* QuickLookThumbnailing.framework in Frameworks */,
+				705F68FB2BA2DD5900437FAA /* libcompression.tbd in Frameworks */,
 				70E33ECA26E536BF00CBA422 /* Quartz.framework in Frameworks */,
 				70E33ED826E5377000CBA422 /* libkram.a in Frameworks */,
 				70E33EDD26E537AD00CBA422 /* Accelerate.framework in Frameworks */,
@@ -186,6 +192,7 @@
 			buildActionMask = 2147483647;
 			files = (
 				70E33EF526E548D800CBA422 /* CoreGraphics.framework in Frameworks */,
+				705F68FC2BA2DD6200437FAA /* libcompression.tbd in Frameworks */,
 				70E33EF426E548CF00CBA422 /* libate.tbd in Frameworks */,
 				70E33EF626E548E200CBA422 /* Accelerate.framework in Frameworks */,
 				70E33EF726E553B900CBA422 /* AppKit.framework in Frameworks */,
@@ -242,6 +249,7 @@
 		706EF24726D17BC2001C950E /* Frameworks */ = {
 			isa = PBXGroup;
 			children = (
+				705F68F92BA2DD3E00437FAA /* libcompression.tbd */,
 				70B5BFF728F5253F00CD83D8 /* CoreText.framework */,
 				7099CFBC28E8319B008D4ABF /* UniformTypeIdentifiers.framework */,
 				70833668271575EA0077BCB6 /* GLTFMTL.framework */,
diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index 3bf74fbb..3892ef14 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -1381,11 +1381,33 @@ bool Data::loadFileFromArchive()
     const uint8_t* imageData = nullptr;
     uint64_t imageDataLength = 0;
 
-    // search for main file - can be albedo or normal
-    if (!zip.extractRaw(filename, &imageData, imageDataLength)) {
-        return false;
-    }
+    // DONE: logic is bust below.  Can only use extractRaw
+    // if the file in the archive isn't compressed.  Have Apple
+    // zip that compressed png files.  So then the raw ptr/size
+    // needs deflated.
+    bool isFileUncompressed = entry->compressedSize == entry->uncompressedSize;
+    
+    vector<uint8_t> bufferForImage;
+    
+    if (isFileUncompressed) {
+        // search for main file - can be albedo or normal
+        if (!zip.extractRaw(filename, &imageData, imageDataLength)) {
+            return false;
+        }
 
+    }
+    else {
+        // need to decompress first
+        if (!zip.extract(filename, bufferForImage)) {
+            return false;
+        }
+        
+        imageData = bufferForImage.data();
+        imageDataLength = bufferForImage.size();
+    }
+    
+    vector<uint8_t> bufferForNormal;
+    
     const uint8_t* imageNormalData = nullptr;
     uint64_t imageNormalDataLength = 0;
     
@@ -1402,6 +1424,18 @@ bool Data::loadFileFromArchive()
                                         imageNormalDataLength);
             if (hasNormal) {
                 normalFilename = name;
+                
+                bool isNormalUncompressed = entry->compressedSize == entry->uncompressedSize;
+                
+                if (!isNormalUncompressed) {
+                    // need to decompress first
+                    if (!zip.extract(filename, bufferForNormal)) {
+                        return false;
+                    }
+                    
+                    imageNormalData = bufferForNormal.data();
+                    imageNormalDataLength = bufferForNormal.size();
+                }
                 break;
             }
         }
diff --git a/libkram/kram/KramZipHelper.cpp b/libkram/kram/KramZipHelper.cpp
index 22215684..1c8208b7 100644
--- a/libkram/kram/KramZipHelper.cpp
+++ b/libkram/kram/KramZipHelper.cpp
@@ -1,20 +1,19 @@
 #include "KramZipHelper.h"
 
-// these are no longer a dependency
-//#include "KramMmapHelper.h"
-//#include "KramFileHelper.h"
-
-//#include <sys/mman.h>
-//#include <sys/stat.h>
-//#include <stdio.h>
-//#include <unistd.h>
-
 // // for copy_if on Win#include <algorithm>
 //#include <iterator> // for copy_if on Win
 //#include <vector>
 
 #include "miniz.h"
 
+#ifndef USE_LIBCOMPRESSION
+#define USE_LIBCOMPRESSION (KRAM_MAC || KRAM_IOS)
+#endif
+
+#ifdef USE_LIBCOMPRESSION
+#include <compression.h>
+#endif
+
 namespace kram {
 using namespace NAMESPACE_STL;
 
@@ -28,13 +27,7 @@ ZipHelper::~ZipHelper()
 }
 
 bool ZipHelper::openForRead(const uint8_t* zipData_, uint64_t zipDataSize)
-{  // const char* filename) {
-    //    mmap = std::make_unique<MmapHelper>();
-    //    if (!mmap->openForRead(filename)) {
-    //        close();
-    //        return false;
-    //    }
-
+{
     zipData = zipData_;
 
     zip = std::make_unique<mz_zip_archive>();
@@ -73,11 +66,6 @@ void ZipHelper::close()
         mz_zip_end(zip.get());
         zip.reset();
     }
-
-    //    if (mmap != nullptr) {
-    //        mmap->close();
-    //        mmap.reset();
-    //    }
 }
 
 void ZipHelper::initZipEntryTables()
@@ -173,7 +161,7 @@ bool ZipHelper::extract(const char* filename, vector<uint8_t>& buffer) const
     }
 
     buffer.resize(entry->uncompressedSize);
-    if (!extract(entry->fileIndex, buffer.data(), buffer.size())) {
+    if (!extract(*entry, buffer.data(), buffer.size())) {
         return false;
     }
 
@@ -182,7 +170,7 @@ bool ZipHelper::extract(const char* filename, vector<uint8_t>& buffer) const
 
 bool ZipHelper::extractPartial(const char* filename, vector<uint8_t>& buffer) const
 {
-    if (buffer.size() == 0) {
+    if (buffer.empty()) {
         assert(false);
         return false;
     }
@@ -207,27 +195,40 @@ bool ZipHelper::extractPartial(const char* filename, vector<uint8_t>& buffer) co
     return success;
 }
 
-bool ZipHelper::extract(int32_t fileIndex, void* buffer, uint64_t bufferSize) const
+bool ZipHelper::extract(const ZipEntry& entry, void* buffer, uint64_t bufferSize) const
 {
-    // TODO: here could use the compression lib with optimized deflate
-
-    // this pulls pages from mmap, no allocations
-    mz_bool success = mz_zip_reader_extract_to_mem(
-        zip.get(), fileIndex, buffer, bufferSize, 0);
-
-    /* TODO: alternative using optimized Apple library libCompression
-     
-       this can do partial compression, so don't check uncompressedSize always
-       f.e. can look at first 64-byte header on KTX files which is much faster.
-     
-     uint64_t a = compression_decode_buffer((uint8_t*)dstBuffer, header.uncompressedSize,
-                                         (const uint8_t*)data, header.compressedSize,
-                                         NULL, COMPRESSION_ALGORITHM_ZLIB);
-
-    if (a != header.uncompressedSize)
+    // Some more info on doing deflate on M1
+    // https://aras-p.info/blog/2021/08/09/EXR-libdeflate-is-great/
+    // https://dougallj.wordpress.com/2022/08/20/faster-zlib-deflate-decompression-on-the-apple-m1-and-x86/
+
+    // https://developer.apple.com/documentation/compression/1481000-compression_decode_buffer?language=objc
+    
+    // This call is internal, so caller has already tested failure cases.
+    
+#if USE_LIBCOMPRESSION
+    const uint8_t* data = mz_zip_reader_get_raw_data(zip.get(), entry.fileIndex);
+    if (!data) {
+        return false;
+    }
+    // need to extra data and header
+    
+    uint64_t bytesDecoded = compression_decode_buffer(
+        (uint8_t*)buffer, entry.uncompressedSize,
+        (const uint8_t*)data, entry.compressedSize,
+        NULL, // scratch-buffer that could speed up to pass
+        COMPRESSION_ZLIB);
+    
+    bool success = false;
+    if (bytesDecoded == entry.uncompressedSize)
     {
+        success = true;
     }
-    */
+#else
+    
+    // this pulls pages from mmap, no allocations
+    mz_bool success = mz_zip_reader_extract_to_mem(
+        zip.get(), entry.fileIndex, buffer, bufferSize, 0);
+#endif
 
     return success;
 }
@@ -253,7 +254,11 @@ bool ZipHelper::extractRaw(const char* filename, const uint8_t** bufferData, uin
     }
 
     *bufferData = data;
-    bufferDataSize = stat.m_uncomp_size;
+    
+    // This isn't correct, need to return comp_size.
+    // Caller may need the uncompressed size though to decompress fully into.
+    //bufferDataSize = stat.m_uncomp_size;
+    bufferDataSize = stat.m_comp_size;
 
     return true;
 }
diff --git a/libkram/kram/KramZipHelper.h b/libkram/kram/KramZipHelper.h
index 0f62c6ac..7942fc1d 100644
--- a/libkram/kram/KramZipHelper.h
+++ b/libkram/kram/KramZipHelper.h
@@ -1,12 +1,11 @@
 #pragma once
 
-//#include <memory>
 #include <stdint.h>
+
+//#include <memory>
 //#include <vector>
 //#include <unordered_map>
 
-//#include "Images/HashHelper.h"
-
 // from miniz
 // had to change miniz from anonymous struct typedef, or can't fwd declare
 struct mz_zip_archive;
@@ -55,7 +54,7 @@ struct ZipHelper {
     const ZipEntry* zipEntry(const char* name) const;
 
 private:
-    bool extract(int32_t fileIndex, void* buffer, uint64_t bufferSize) const;
+    bool extract(const ZipEntry& fileIndex, void* buffer, uint64_t bufferSize) const;
 
     void initZipEntryTables();
 
@@ -69,9 +68,6 @@ struct ZipHelper {
 
     const uint8_t* zipData;  // aliased
 
-    // DONE: eliminated this
-    // unique_ptr<MmapHelper> mmap;
-
     vector<char> allFilenames;
 };
 }  // namespace kram

From f4c9b055981af68298e8d7221be4243c25ecb6e4 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 14 Mar 2024 01:11:05 -0700
Subject: [PATCH 639/901] kramv - fix normal loading from zip archive

---
 kramv/KramViewerBase.cpp | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index 3892ef14..02b23eb3 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -1420,14 +1420,19 @@ bool Data::loadFileFromArchive()
         findPossibleNormalMapFromAlbedoFilename(filename, normalFilenames);
      
         for (const auto& name: normalFilenames) {
-            hasNormal = zip.extractRaw(name.c_str(), &imageNormalData,
-                                        imageNormalDataLength);
+            const auto* normalEntry = zip.zipEntry(name.c_str());
+            
+            hasNormal = normalEntry != nullptr;
             if (hasNormal) {
                 normalFilename = name;
                 
-                bool isNormalUncompressed = entry->compressedSize == entry->uncompressedSize;
+                bool isNormalUncompressed = normalEntry->compressedSize == entry->uncompressedSize;
                 
-                if (!isNormalUncompressed) {
+                if (isNormalUncompressed) {
+                    zip.extractRaw(name.c_str(), &imageNormalData,
+                                   imageNormalDataLength);
+                }
+                else {
                     // need to decompress first
                     if (!zip.extract(filename, bufferForNormal)) {
                         return false;

From 6042472851b6d849929e7f692668370c29c78e02 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 14 Mar 2024 23:19:32 -0700
Subject: [PATCH 640/901] kram-profile - add zip support via wrapper to
 KramZipHelper

This is using mmapped zip archives, and then decompresses them when opened.  Can also compare crc32 from old and new archives and avoid reloading unchanged files in the archive.  The build files are 10x smaller to store compressed.  And this is use libCompression for fast decompress.
---
 build2/kram.xcodeproj/project.pbxproj         |    2 +-
 kram-profile/README.md                        |   50 +-
 kram-profile/Source/KramZipHelper.cpp         |  302 +
 kram-profile/Source/KramZipHelper.h           |   82 +
 kram-profile/Source/KramZipHelperW.h          |   37 +
 kram-profile/Source/KramZipHelperW.mm         |   60 +
 .../Source/kram-profile-Bridging-Header.h     |    5 +
 kram-profile/Source/miniz.cpp                 | 7766 +++++++++++++++++
 kram-profile/Source/miniz.h                   | 1381 +++
 kram-profile/Source/track_event_parser.cpp    | 1804 ++++
 .../kram-profile.xcodeproj/project.pbxproj    |   60 +-
 kram-profile/kram-profile/File.swift          |  450 +
 kram-profile/kram-profile/Info.plist          |   16 +-
 .../kram-profile/kram_profileApp.swift        |  307 +-
 libkram/kram/KramZipHelper.cpp                |   23 +-
 libkram/kram/KramZipHelper.h                  |    7 +-
 16 files changed, 12042 insertions(+), 310 deletions(-)
 create mode 100644 kram-profile/Source/KramZipHelper.cpp
 create mode 100644 kram-profile/Source/KramZipHelper.h
 create mode 100644 kram-profile/Source/KramZipHelperW.h
 create mode 100644 kram-profile/Source/KramZipHelperW.mm
 create mode 100644 kram-profile/Source/kram-profile-Bridging-Header.h
 create mode 100644 kram-profile/Source/miniz.cpp
 create mode 100644 kram-profile/Source/miniz.h
 create mode 100644 kram-profile/Source/track_event_parser.cpp
 create mode 100644 kram-profile/kram-profile/File.swift

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index 2d67b108..f0fc688e 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -843,10 +843,10 @@
 				706EEDFB26D1583E001C950E /* transcoder */,
 				706EEE1026D1583F001C950E /* miniz */,
 				706EEE1326D1583F001C950E /* heman */,
+				706EEE1826D1583F001C950E /* kram */,
 				70D222D52AC800AC00B9EA23 /* json11 */,
 				70D222E82ADAF25E00B9EA23 /* simdjson */,
 				706EEE1626D1583F001C950E /* stb */,
-				706EEE1826D1583F001C950E /* kram */,
 				706EEE3926D1583F001C950E /* squish */,
 				706EEE4F26D1583F001C950E /* zstd */,
 				706EEE5326D1583F001C950E /* lodepng */,
diff --git a/kram-profile/README.md b/kram-profile/README.md
index ae91fa70..53bcae3d 100644
--- a/kram-profile/README.md
+++ b/kram-profile/README.md
@@ -9,9 +9,9 @@ This is also a discussion of profilers and optimizing.
 
 Supported files
 
-* .vmatrace - memory report generated by Kram scripts folder.
-* .trace - performance timings in the form catapult trace json files
-* .json - clang timing output generated using -ftime-trace
+* .memtrace - memory report generated by Kram scripts folder.
+* .trace/.perftrace - performance timings in the form catapult trace json files
+* .json/.buildtrace - clang timing output generated using -ftime-trace
 
 There are pre-built version of kram-profile for macOS 13.0 and higher.
 
@@ -23,12 +23,13 @@ TODO: (x are done)
 * x Fixup "Source" tags in clang json to use filename (no extension) from detail field
 * x Find start/end time of each json files. 
 * x Support gzip trace files
+* x Add sort by range (useful for mem/build traces)
+
 * Add frame type for perf traces for vsync ticker (binary format prob has it)
 * Add zip archive support, can drop archive of 1+ traces
 * Tie in with the excellent ClangBuildAnalyzer tool
 * Scale specific traces to a single duration.  That way the next file comes in at that scale. 
 * Move away from Catapult json to own binary format.  Can then translate to json or use the Perfetto SDK to convert to protobufs.
-* Add sort by duration
 
 ----------------
 
@@ -49,6 +50,7 @@ Cpu Profilers. See for more details
 * EasyProfiler
 * VerySleepy
 * LukeStackwalker
+* Remotery
 * geiger
 * Palanteer
 * Intel IACA
@@ -136,6 +138,11 @@ Simd libraries, and especially files like STL with heavy template generation wil
 
 Ideally run the traces, run CBA, reduce headers and identify pch candidates.  Then repeat, until overall timings go down.  Remember that PCH is per link, so one per DLL or app.  It also break isolation of headers in files, so may want a CI build not using it to catch unspecified headers.
 
+Ninja Build
+---------
+
+This is a minimal version of Make.  But code must generate the Ninja file.  Cmake is one generator, and GN is the primary generator.  But Ninja is so simple that it's fairly easy to specify directly.  I'm experimenting with this in the hlslparser, where I wrote the Ninja files manually just to work with the syntax.
+
 # Optimization
 
 Unity builds
@@ -150,6 +157,41 @@ These are a precursor to C++ modules.  pch are universally support across compil
 
 pch spread headers into files.  So the build can break if some don't use it, or configs skip it.  Occasionally fixup missing headers by disabling it. Templates are parsed by only specializations are instatiated.  So may be worth defining specializations in the pch. STL is always a top offender with vector/unordered_map, function, and others at the top.
 
+There are broken examples of setting up pch for Makefiles all over the internet.  Maybe cmake has a valid setup, but the jist is below for gcc/clang.  Make sure to verify the parse time is gone in kram-profile by looking at the clang build profiles.
+
+    # gen the .d file, written to tmp and only replaces if it changes
+    cppFlags = ... -MMD -MP (or -MD)
+
+    # setup the files involved, only get 1 pch per DLL/App since
+    pchSrc = Precompile.h
+    pchHdr = Precompile-$(platform)($config).h
+    pchDeps = $(pchHdr).d
+    pchObj = $(pchHdr).gch
+    pchIncludesDirs = -Idir1 -Idir2
+            
+    # important - only copy the hdr if it changes, don't want full rebuild every time
+    $(pchHdr): $(pchSrc)
+        $cp $< $@
+        
+    # this will output the .d and .gch file
+    $(pchObj): $(pchHdr)
+        clang++ -x c++header $(cppFlags) -c $< -o $@ -$(pchIncludesDirs)
+        
+    # this makes sure that the pch is rebuilt if hdrs within pchHdr changee
+    # the - sign ignores the deps file on the first run where it does not exist.
+    $(pchDeps): ;
+    -include $(pchDeps)
+    
+    ....
+    
+    # force include Precompile.h, 
+    # and then use the pch obj to avoid parsing (appends to top of .o)
+    cppPchFlags = -include $(pchHdr) -include-pch $(pchObj))
+   
+    # now build the files
+    *.cpp: ... $(pchHdr)
+        clang++ $(cppFlags) -c $< -o $@ $(cppPchFlags)
+
 SIMD
 -----------
 
diff --git a/kram-profile/Source/KramZipHelper.cpp b/kram-profile/Source/KramZipHelper.cpp
new file mode 100644
index 00000000..ea053301
--- /dev/null
+++ b/kram-profile/Source/KramZipHelper.cpp
@@ -0,0 +1,302 @@
+#include "KramZipHelper.h"
+
+#include <algorithm>
+//#include <iterator> // for copy_if on Win
+#include <vector>
+#include <string>
+
+#include "miniz.h"
+
+// test for perf of this compared to one in miniz also see
+// comments about faster algs.
+// libcompress can only encode lvl 5, but here it's only decompress.
+#ifndef USE_LIBCOMPRESSION
+#define USE_LIBCOMPRESSION (KRAM_MAC || KRAM_IOS)
+#endif
+
+#ifdef USE_LIBCOMPRESSION
+#include <compression.h>
+#endif
+
+namespace kram {
+using namespace NAMESPACE_STL;
+
+// Copied out of KramLog.cpp
+inline bool endsWithExtension(const char* str, const string& substring)
+{
+    const char* search = strrchr(str, '.');
+    if (search == NULL) {
+        return false;
+    }
+
+    return strcmp(search, substring.c_str()) == 0;
+}
+
+ZipHelper::ZipHelper()
+{
+}
+
+ZipHelper::~ZipHelper()
+{
+    close();
+}
+
+bool ZipHelper::openForRead(const uint8_t* zipData_, uint64_t zipDataSize)
+{
+    zipData = zipData_;
+
+    zip = std::make_unique<mz_zip_archive>();
+    mz_zip_zero_struct(zip.get());
+
+    mz_uint flags = 0;
+    mz_bool success = mz_zip_reader_init_mem(zip.get(), zipData, zipDataSize, flags);
+    if (!success) {
+        close();
+        return false;
+    }
+
+    initZipEntryTables();
+    return true;
+}
+
+void ZipHelper::filterExtensions(const vector<string>& extensions)
+{
+    vector<ZipEntry> zipEntrysFiltered;
+
+    std::copy_if(_zipEntrys.begin(), _zipEntrys.end(), std::back_inserter(zipEntrysFiltered), [&extensions](const auto& zipEntry) {
+        for (const auto& ext : extensions) {
+            if (endsWithExtension(zipEntry.filename, ext)) {
+                return true;
+            }
+        }
+        return false;
+    });
+
+    _zipEntrys = zipEntrysFiltered;
+}
+
+void ZipHelper::close()
+{
+    if (zip != nullptr) {
+        mz_zip_end(zip.get());
+        zip.reset();
+    }
+}
+
+void ZipHelper::initZipEntryTables()
+{
+    int32_t numFiles = mz_zip_reader_get_num_files(zip.get());
+
+    // allocate array to hold all filenames in one block of memory
+    uint64_t totalFilenameSizes = 0;
+    for (int32_t i = 0; i < numFiles; ++i) {
+        totalFilenameSizes += mz_zip_reader_get_filename(zip.get(), i, nullptr, 0);
+    }
+
+    const uint32_t* remappedIndices = mz_zip_reader_sorted_file_indices(zip.get());
+
+    allFilenames.resize(totalFilenameSizes);
+
+    // allocate an array with the data from the archive that we care about
+    _zipEntrys.resize(numFiles);
+
+    int32_t index = 0;
+    uint64_t length = 0;
+
+    for (int32_t i = 0; i < numFiles; ++i) {
+        uint32_t sortedFileIndex = remappedIndices[i];
+
+        // file_stat does quite a bit of work, but only want a few fields out of it
+        mz_zip_archive_file_stat stat;
+        mz_zip_reader_file_stat(zip.get(), sortedFileIndex, &stat);
+        if (stat.m_is_directory || !stat.m_is_supported) {
+            continue;
+        }
+
+        // skipping directories and unsupported items
+        // also the ordering here is in filename not fileIndex order
+
+        // copy all filenames into fixed storage that's all
+        // contguous, so that can alis the strings for lookup
+        uint64_t filenameLength = std::min((uint64_t)512, (uint64_t)strlen(stat.m_filename) + 1);
+        char* filename = &allFilenames[length];
+        strncpy(filename, stat.m_filename, filenameLength);
+        length += filenameLength;
+
+        ZipEntry& zipEntry = _zipEntrys[index];
+        zipEntry.fileIndex = stat.m_file_index;
+        zipEntry.filename = filename;  // can alias
+        zipEntry.uncompressedSize = stat.m_uncomp_size;
+        zipEntry.compressedSize = stat.m_comp_size;
+        zipEntry.modificationDate = (int32_t)stat.m_time;  // really a time_t
+        #undef crc32
+        zipEntry.crc32 = stat.m_crc32;
+        
+        // TODO: stat.m_time, state.m_crc32
+
+        index++;
+    }
+
+    // resize, since entries and filenames were skipped
+    // this should change the addresses used above
+    allFilenames.resize(length);
+    _zipEntrys.resize(index);
+}
+
+int32_t ZipHelper::zipEntryIndex(const char* name) const
+{
+    // central directory is sorted, so this does binary search on entries
+    return mz_zip_reader_locate_file(zip.get(), name, "", 0);
+}
+
+const ZipEntry* ZipHelper::zipEntry(const char* name) const
+{
+    int32_t index = zipEntryIndex(name);
+    if (index < 0) {
+        return nullptr;
+    }
+
+    // have to find the zipEntry, have skipped and sorted entries by filename
+    // the array build skips directories, so those can throw off the fileIndex
+    // TODO: do a binary search here, and don't use mz_zip call?
+
+    int32_t numEntries = (int32_t)_zipEntrys.size();
+    for (int32_t i = 0; i < numEntries; ++i) {
+        if (_zipEntrys[i].fileIndex == index) {
+            return &_zipEntrys[i];
+        }
+    }
+
+    return nullptr;
+}
+
+bool ZipHelper::extract(const char* filename, vector<uint8_t>& buffer) const
+{
+    auto entry = zipEntry(filename);
+    if (!entry) {
+        return false;
+    }
+
+    buffer.resize(entry->uncompressedSize);
+    if (!extract(*entry, buffer.data(), buffer.size())) {
+        return false;
+    }
+
+    return true;
+}
+
+bool ZipHelper::extract(const char* filename, uint8_t* bufferData, uint64_t bufferDataSize) const
+{
+    auto entry = zipEntry(filename);
+    if (!entry) {
+        return false;
+    }
+
+    if (bufferDataSize < entry->uncompressedSize) {
+        return false;
+    }
+    
+    if (!extract(*entry, bufferData, bufferDataSize)) {
+        return false;
+    }
+
+    return true;
+}
+
+
+bool ZipHelper::extractPartial(const char* filename, vector<uint8_t>& buffer) const
+{
+    if (buffer.empty()) {
+        assert(false);
+        return false;
+    }
+
+    auto entry = zipEntry(filename);
+    if (!entry) {
+        return false;
+    }
+
+    if (buffer.size() > entry->uncompressedSize) {
+        return false;
+    }
+
+    bool success = false;
+
+    mz_zip_reader_extract_iter_state* iter = mz_zip_reader_extract_iter_new(zip.get(), entry->fileIndex, 0);
+    uint64_t bytesRead = mz_zip_reader_extract_iter_read(iter, buffer.data(), buffer.size());
+    if (bytesRead == buffer.size()) {
+        success = true;
+    }
+    mz_zip_reader_extract_iter_free(iter);
+    return success;
+}
+
+bool ZipHelper::extract(const ZipEntry& entry, void* buffer, uint64_t bufferSize) const
+{
+    // Some more info on doing deflate on M1
+    // https://aras-p.info/blog/2021/08/09/EXR-libdeflate-is-great/
+    // https://dougallj.wordpress.com/2022/08/20/faster-zlib-deflate-decompression-on-the-apple-m1-and-x86/
+
+    // https://developer.apple.com/documentation/compression/1481000-compression_decode_buffer?language=objc
+    
+    // This call is internal, so caller has already tested failure cases.
+    
+#if USE_LIBCOMPRESSION
+    const uint8_t* data = mz_zip_reader_get_raw_data(zip.get(), entry.fileIndex);
+    if (!data) {
+        return false;
+    }
+    // need to extra data and header
+    
+    uint64_t bytesDecoded = compression_decode_buffer(
+        (uint8_t*)buffer, entry.uncompressedSize,
+        (const uint8_t*)data, entry.compressedSize,
+        NULL, // scratch-buffer that could speed up to pass
+        COMPRESSION_ZLIB);
+    
+    bool success = false;
+    if (bytesDecoded == entry.uncompressedSize)
+    {
+        success = true;
+    }
+#else
+    
+    // this pulls pages from mmap, no allocations
+    mz_bool success = mz_zip_reader_extract_to_mem(
+        zip.get(), entry.fileIndex, buffer, bufferSize, 0);
+#endif
+
+    return success;
+}
+
+// uncompressed content in the archive can be aliased directly by offset into the archive
+bool ZipHelper::extractRaw(const char* filename, const uint8_t** bufferData, uint64_t& bufferDataSize) const
+{
+    auto entry = zipEntry(filename);
+    if (!entry) {
+        return false;
+    }
+
+    mz_zip_archive_file_stat stat;
+    mz_zip_reader_file_stat(zip.get(), entry->fileIndex, &stat);
+    if (stat.m_is_directory || !stat.m_is_supported) {
+        return false;
+    }
+
+    // this should really be cached with zipEntry data
+    const uint8_t* data = mz_zip_reader_get_raw_data(zip.get(), entry->fileIndex);
+    if (!data) {
+        return false;
+    }
+
+    *bufferData = data;
+    
+    // This isn't correct, need to return comp_size.
+    // Caller may need the uncompressed size though to decompress fully into.
+    //bufferDataSize = stat.m_uncomp_size;
+    bufferDataSize = stat.m_comp_size;
+
+    return true;
+}
+
+}  // namespace kram
diff --git a/kram-profile/Source/KramZipHelper.h b/kram-profile/Source/KramZipHelper.h
new file mode 100644
index 00000000..b6618fe6
--- /dev/null
+++ b/kram-profile/Source/KramZipHelper.h
@@ -0,0 +1,82 @@
+#pragma once
+
+// TODO: move to KramConfig.h
+#define KRAM_MAC 1
+#define KRAM_IOS 0
+#define NAMESPACE_STL std
+
+#include <stdint.h>
+
+#include <memory>
+#include <vector>
+#include <string>
+
+// from miniz
+// had to change miniz from anonymous struct typedef, or can't fwd declare
+struct mz_zip_archive;
+
+namespace kram {
+
+//struct MmapHelper;
+using namespace NAMESPACE_STL;
+
+struct ZipEntry {
+    const char* filename;  // max 512, aliased
+    int32_t fileIndex;
+
+    // attributes
+    uint64_t uncompressedSize;
+    uint64_t compressedSize;
+    int32_t modificationDate;
+    uint32_t crc32;
+};
+
+// this does very fast zip archive reading via miniz and mmap
+// provides data structures to help lookup content
+struct ZipHelper {
+    ZipHelper();
+    ~ZipHelper();
+
+    bool openForRead(const uint8_t* zipData, uint64_t zipDataSize);
+    void close();
+
+    // Only keep entries that match the extensions provided
+    void filterExtensions(const vector<string>& extensions);
+
+    // buffer is resized if smaller, can use to lookat headers (f.e. ktx or mod)
+    // the zip decodes only the length of the buffer passed in, and this should be small
+    // since an iterator is called once to extract data
+    bool extractPartial(const char* filename, vector<uint8_t>& buffer) const;
+
+    // must read the entire contents (resizes vector)
+    bool extract(const char* filename, vector<uint8_t>& buffer) const;
+
+    // must read the entire contents
+    bool extract(const char* filename, uint8_t* bufferData, uint64_t bufferDataSize) const;
+
+    // uncompressed content in the archive like ktx2 files can be aliased directly
+    // while referencing this data, don't close mmap() since bufferData is offset into that
+    bool extractRaw(const char* filename, const uint8_t** bufferData, uint64_t& bufferDataSize) const;
+
+    const vector<ZipEntry>& zipEntrys() const { return _zipEntrys; }
+
+    const ZipEntry* zipEntry(const char* name) const;
+
+private:
+    bool extract(const ZipEntry& fileIndex, void* buffer, uint64_t bufferSize) const;
+
+    void initZipEntryTables();
+
+    // returns -1 if file not found, does binary search off sorted names
+    // to find fileIndex, then lookups the array index from that
+    int32_t zipEntryIndex(const char* name) const;
+
+private:
+    std::unique_ptr<mz_zip_archive> zip;
+    vector<ZipEntry> _zipEntrys;
+
+    const uint8_t* zipData;  // aliased
+
+    vector<char> allFilenames;
+};
+}  // namespace kram
diff --git a/kram-profile/Source/KramZipHelperW.h b/kram-profile/Source/KramZipHelperW.h
new file mode 100644
index 00000000..2fca76f6
--- /dev/null
+++ b/kram-profile/Source/KramZipHelperW.h
@@ -0,0 +1,37 @@
+#pragma once
+
+#import <Foundation/Foundation.h>
+
+typedef struct ZipEntryW {
+    const char* _Nonnull filename;  // max 512, aliased
+    int32_t fileIndex;
+
+    // attributes
+    uint64_t uncompressedSize;
+    uint64_t compressedSize;
+    int32_t modificationDate;
+    uint32_t crc32;
+} ZipEntryW;
+
+
+// Use this to bridge the C++ over to Swift for now
+// TODO: form a clang module and reference C++ directly
+@interface ZipHelperW : NSObject
+    - (nonnull instancetype)initWithData:(nonnull NSData*)data;
+    
+    // extract the data.  Can alias into the file.
+    - (nullable NSData*)extract:(nonnull const char*)filename;
+    
+    // pass back vector this way for now, should be property
+    - (nonnull const ZipEntryW*)zipEntrys;
+
+    - (NSInteger)zipEntrysCount;
+
+    // This isn't the fileIndex, but uses count above to avoid needing to do unsafe
+    - (ZipEntryW)zipEntry:(NSInteger)index;
+
+    // retrieve an entry by filename
+    - (ZipEntryW)zipEntryByName:(nonnull const char*)name;
+
+@end
+
diff --git a/kram-profile/Source/KramZipHelperW.mm b/kram-profile/Source/KramZipHelperW.mm
new file mode 100644
index 00000000..9a944527
--- /dev/null
+++ b/kram-profile/Source/KramZipHelperW.mm
@@ -0,0 +1,60 @@
+#include "KramZipHelperW.h"
+#include "KramZipHelper.h"
+
+using namespace kram;
+
+@implementation ZipHelperW {
+    ZipHelper _helper;
+}
+
+- (nonnull instancetype)initWithData:(nonnull NSData*)data {
+    _helper.openForRead((const uint8_t*)data.bytes, data.length);
+    return self;
+}
+
+- (nullable NSData*)extract:(nonnull const char*)filename {
+   
+    NSData* data = nil;
+    
+    auto entry = _helper.zipEntry(filename);
+    if (!entry) {
+        return nil;
+    }
+    
+    bool isCompressed = entry->uncompressedSize != entry->compressedSize;
+    if (isCompressed) {
+        data = [NSMutableData dataWithLength:entry->uncompressedSize];
+        _helper.extract(filename, (uint8_t*)data.bytes, data.length);
+    }
+    else {
+        const uint8_t* bytes = nullptr;
+        uint64_t bytesLength = 0;
+        
+        _helper.extractRaw(filename, &bytes, bytesLength);
+        data = [NSData dataWithBytesNoCopy:(void*)bytes length:bytesLength freeWhenDone:NO];
+    }
+    
+    return data;
+}
+
+// Need this for the list data
+- (nonnull const ZipEntryW*)zipEntrys {
+    return (const ZipEntryW*)_helper.zipEntrys().data();
+}
+- (NSInteger)zipEntrysCount {
+    return _helper.zipEntrys().size();
+}
+
+- (ZipEntryW)zipEntry:(NSInteger)index {
+    return *(const ZipEntryW*)&_helper.zipEntrys()[index];
+}
+
+- (ZipEntryW)zipEntryByName:(nonnull const char*)name {
+    // TODO: fix to return a dummy type, since zips can be missing files
+    // from one iteration to the next.
+    return *(const ZipEntryW*)_helper.zipEntry(name);
+}
+
+
+@end
+
diff --git a/kram-profile/Source/kram-profile-Bridging-Header.h b/kram-profile/Source/kram-profile-Bridging-Header.h
new file mode 100644
index 00000000..f5f1741e
--- /dev/null
+++ b/kram-profile/Source/kram-profile-Bridging-Header.h
@@ -0,0 +1,5 @@
+//
+//  Use this file to import your target's public headers that you would like to expose to Swift.
+//
+
+#include "KramZipHelperW.h"
diff --git a/kram-profile/Source/miniz.cpp b/kram-profile/Source/miniz.cpp
new file mode 100644
index 00000000..a62263fc
--- /dev/null
+++ b/kram-profile/Source/miniz.cpp
@@ -0,0 +1,7766 @@
+/**************************************************************************
+ *
+ * Copyright 2013-2014 RAD Game Tools and Valve Software
+ * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "miniz.h"
+
+typedef unsigned char mz_validate_uint16[sizeof(mz_uint16) == 2 ? 1 : -1];
+typedef unsigned char mz_validate_uint32[sizeof(mz_uint32) == 4 ? 1 : -1];
+typedef unsigned char mz_validate_uint64[sizeof(mz_uint64) == 8 ? 1 : -1];
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* ------------------- zlib-style API's */
+
+mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len)
+{
+    mz_uint32 i, s1 = (mz_uint32)(adler & 0xffff), s2 = (mz_uint32)(adler >> 16);
+    size_t block_len = buf_len % 5552;
+    if (!ptr)
+        return MZ_ADLER32_INIT;
+    while (buf_len)
+    {
+        for (i = 0; i + 7 < block_len; i += 8, ptr += 8)
+        {
+            s1 += ptr[0]; s2 += s1;
+            s1 += ptr[1]; s2 += s1;
+            s1 += ptr[2]; s2 += s1;
+            s1 += ptr[3]; s2 += s1;
+            s1 += ptr[4]; s2 += s1;
+            s1 += ptr[5]; s2 += s1;
+            s1 += ptr[6]; s2 += s1;
+            s1 += ptr[7]; s2 += s1;
+        }
+        for (; i < block_len; ++i) {
+            s1 += *ptr++; s2 += s1;
+        }
+        s1 %= 65521U; s2 %= 65521U;
+        buf_len -= block_len;
+        block_len = 5552;
+    }
+    return (s2 << 16) + s1;
+}
+
+/* Karl Malbrain's compact CRC-32. See "A compact CCITT crc16 and crc32 C implementation that balances processor cache usage against speed": http://www.geocities.com/malbrain/ */
+#if 0
+    mz_ulong mz_crc32(mz_ulong crc, const mz_uint8 *ptr, size_t buf_len)
+    {
+        static const mz_uint32 s_crc32[16] = { 0, 0x1db71064, 0x3b6e20c8, 0x26d930ac, 0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c,
+                                               0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c, 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c };
+        mz_uint32 crcu32 = (mz_uint32)crc;
+        if (!ptr)
+            return MZ_CRC32_INIT;
+        crcu32 = ~crcu32;
+        while (buf_len--)
+        {
+            mz_uint8 b = *ptr++;
+            crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b & 0xF)];
+            crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b >> 4)];
+        }
+        return ~crcu32;
+    }
+#else
+/* Faster, but larger CPU cache footprint.
+ */
+mz_ulong mz_crc32(mz_ulong crc, const mz_uint8 *ptr, size_t buf_len)
+{
+    static const mz_uint32 s_crc_table[256] =
+        {
+          0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, 0xE963A535,
+          0x9E6495A3, 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, 0x09B64C2B, 0x7EB17CBD,
+          0xE7B82D07, 0x90BF1D91, 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D,
+          0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC,
+          0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, 0x3B6E20C8, 0x4C69105E, 0xD56041E4,
+          0xA2677172, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, 0x35B5A8FA, 0x42B2986C,
+          0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, 0x26D930AC,
+          0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F,
+          0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, 0x2F6F7C87, 0x58684C11, 0xC1611DAB,
+          0xB6662D3D, 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F,
+          0x9FBFE4A5, 0xE8B8D433, 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB,
+          0x086D3D2D, 0x91646C97, 0xE6635C01, 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E,
+          0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA,
+          0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, 0x4DB26158, 0x3AB551CE,
+          0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, 0x4369E96A,
+          0x346ED9FC, 0xAD678846, 0xDA60B8D0, 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9,
+          0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409,
+          0xCE61E49F, 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81,
+          0xB7BD5C3B, 0xC0BA6CAD, 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, 0xEAD54739,
+          0x9DD277AF, 0x04DB2615, 0x73DC1683, 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8,
+          0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, 0xF00F9344, 0x8708A3D2, 0x1E01F268,
+          0x6906C2FE, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, 0xFED41B76, 0x89D32BE0,
+          0x10DA7A5A, 0x67DD4ACC, 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, 0xD6D6A3E8,
+          0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B,
+          0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF,
+          0x4669BE79, 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, 0xCC0C7795, 0xBB0B4703,
+          0x220216B9, 0x5505262F, 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7,
+          0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A,
+          0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE,
+          0x0CB61B38, 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, 0x86D3D2D4, 0xF1D4E242,
+          0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, 0x88085AE6,
+          0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45,
+          0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, 0xA7672661, 0xD06016F7, 0x4969474D,
+          0x3E6E77DB, 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5,
+          0x47B2CF7F, 0x30B5FFE9, 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605,
+          0xCDD70693, 0x54DE5729, 0x23D967BF, 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94,
+          0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D
+        };
+
+    mz_uint32 crc32 = (mz_uint32)crc ^ 0xFFFFFFFF;
+    const mz_uint8 *pByte_buf = (const mz_uint8 *)ptr;
+
+    while (buf_len >= 4)
+    {
+        crc32 = (crc32 >> 8) ^ s_crc_table[(crc32 ^ pByte_buf[0]) & 0xFF];
+        crc32 = (crc32 >> 8) ^ s_crc_table[(crc32 ^ pByte_buf[1]) & 0xFF];
+        crc32 = (crc32 >> 8) ^ s_crc_table[(crc32 ^ pByte_buf[2]) & 0xFF];
+        crc32 = (crc32 >> 8) ^ s_crc_table[(crc32 ^ pByte_buf[3]) & 0xFF];
+        pByte_buf += 4;
+        buf_len -= 4;
+    }
+
+    while (buf_len)
+    {
+        crc32 = (crc32 >> 8) ^ s_crc_table[(crc32 ^ pByte_buf[0]) & 0xFF];
+        ++pByte_buf;
+        --buf_len;
+    }
+
+    return ~crc32;
+}
+#endif
+
+void mz_free(void *p)
+{
+    MZ_FREE(p);
+}
+
+void *miniz_def_alloc_func(void *opaque, size_t items, size_t size)
+{
+    (void)opaque, (void)items, (void)size;
+    return MZ_MALLOC(items * size);
+}
+void miniz_def_free_func(void *opaque, void *address)
+{
+    (void)opaque, (void)address;
+    MZ_FREE(address);
+}
+void *miniz_def_realloc_func(void *opaque, void *address, size_t items, size_t size)
+{
+    (void)opaque, (void)address, (void)items, (void)size;
+    return MZ_REALLOC(address, items * size);
+}
+
+const char *mz_version(void)
+{
+    return MZ_VERSION;
+}
+
+#ifndef MINIZ_NO_ZLIB_APIS
+
+int mz_deflateInit(mz_streamp pStream, int level)
+{
+    return mz_deflateInit2(pStream, level, MZ_DEFLATED, MZ_DEFAULT_WINDOW_BITS, 9, MZ_DEFAULT_STRATEGY);
+}
+
+int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy)
+{
+    tdefl_compressor *pComp;
+    mz_uint comp_flags = TDEFL_COMPUTE_ADLER32 | tdefl_create_comp_flags_from_zip_params(level, window_bits, strategy);
+
+    if (!pStream)
+        return MZ_STREAM_ERROR;
+    if ((method != MZ_DEFLATED) || ((mem_level < 1) || (mem_level > 9)) || ((window_bits != MZ_DEFAULT_WINDOW_BITS) && (-window_bits != MZ_DEFAULT_WINDOW_BITS)))
+        return MZ_PARAM_ERROR;
+
+    pStream->data_type = 0;
+    pStream->adler = MZ_ADLER32_INIT;
+    pStream->msg = NULL;
+    pStream->reserved = 0;
+    pStream->total_in = 0;
+    pStream->total_out = 0;
+    if (!pStream->zalloc)
+        pStream->zalloc = miniz_def_alloc_func;
+    if (!pStream->zfree)
+        pStream->zfree = miniz_def_free_func;
+
+    pComp = (tdefl_compressor *)pStream->zalloc(pStream->opaque, 1, sizeof(tdefl_compressor));
+    if (!pComp)
+        return MZ_MEM_ERROR;
+
+    pStream->state = (struct mz_internal_state *)pComp;
+
+    if (tdefl_init(pComp, NULL, NULL, comp_flags) != TDEFL_STATUS_OKAY)
+    {
+        mz_deflateEnd(pStream);
+        return MZ_PARAM_ERROR;
+    }
+
+    return MZ_OK;
+}
+
+int mz_deflateReset(mz_streamp pStream)
+{
+    if ((!pStream) || (!pStream->state) || (!pStream->zalloc) || (!pStream->zfree))
+        return MZ_STREAM_ERROR;
+    pStream->total_in = pStream->total_out = 0;
+    tdefl_init((tdefl_compressor *)pStream->state, NULL, NULL, ((tdefl_compressor *)pStream->state)->m_flags);
+    return MZ_OK;
+}
+
+int mz_deflate(mz_streamp pStream, int flush)
+{
+    size_t in_bytes, out_bytes;
+    mz_ulong orig_total_in, orig_total_out;
+    int mz_status = MZ_OK;
+
+    if ((!pStream) || (!pStream->state) || (flush < 0) || (flush > MZ_FINISH) || (!pStream->next_out))
+        return MZ_STREAM_ERROR;
+    if (!pStream->avail_out)
+        return MZ_BUF_ERROR;
+
+    if (flush == MZ_PARTIAL_FLUSH)
+        flush = MZ_SYNC_FLUSH;
+
+    if (((tdefl_compressor *)pStream->state)->m_prev_return_status == TDEFL_STATUS_DONE)
+        return (flush == MZ_FINISH) ? MZ_STREAM_END : MZ_BUF_ERROR;
+
+    orig_total_in = pStream->total_in;
+    orig_total_out = pStream->total_out;
+    for (;;)
+    {
+        tdefl_status defl_status;
+        in_bytes = pStream->avail_in;
+        out_bytes = pStream->avail_out;
+
+        defl_status = tdefl_compress((tdefl_compressor *)pStream->state, pStream->next_in, &in_bytes, pStream->next_out, &out_bytes, (tdefl_flush)flush);
+        pStream->next_in += (mz_uint)in_bytes;
+        pStream->avail_in -= (mz_uint)in_bytes;
+        pStream->total_in += (mz_uint)in_bytes;
+        pStream->adler = tdefl_get_adler32((tdefl_compressor *)pStream->state);
+
+        pStream->next_out += (mz_uint)out_bytes;
+        pStream->avail_out -= (mz_uint)out_bytes;
+        pStream->total_out += (mz_uint)out_bytes;
+
+        if (defl_status < 0)
+        {
+            mz_status = MZ_STREAM_ERROR;
+            break;
+        }
+        else if (defl_status == TDEFL_STATUS_DONE)
+        {
+            mz_status = MZ_STREAM_END;
+            break;
+        }
+        else if (!pStream->avail_out)
+            break;
+        else if ((!pStream->avail_in) && (flush != MZ_FINISH))
+        {
+            if ((flush) || (pStream->total_in != orig_total_in) || (pStream->total_out != orig_total_out))
+                break;
+            return MZ_BUF_ERROR; /* Can't make forward progress without some input.
+ */
+        }
+    }
+    return mz_status;
+}
+
+int mz_deflateEnd(mz_streamp pStream)
+{
+    if (!pStream)
+        return MZ_STREAM_ERROR;
+    if (pStream->state)
+    {
+        pStream->zfree(pStream->opaque, pStream->state);
+        pStream->state = NULL;
+    }
+    return MZ_OK;
+}
+
+mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len)
+{
+    (void)pStream;
+    /* This is really over conservative. (And lame, but it's actually pretty tricky to compute a true upper bound given the way tdefl's blocking works.) */
+    return MZ_MAX(128 + (source_len * 110) / 100, 128 + source_len + ((source_len / (31 * 1024)) + 1) * 5);
+}
+
+int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len, int level)
+{
+    int status;
+    mz_stream stream;
+    memset(&stream, 0, sizeof(stream));
+
+    /* In case mz_ulong is 64-bits (argh I hate longs). */
+    if ((source_len | *pDest_len) > 0xFFFFFFFFU)
+        return MZ_PARAM_ERROR;
+
+    stream.next_in = pSource;
+    stream.avail_in = (mz_uint32)source_len;
+    stream.next_out = pDest;
+    stream.avail_out = (mz_uint32)*pDest_len;
+
+    status = mz_deflateInit(&stream, level);
+    if (status != MZ_OK)
+        return status;
+
+    status = mz_deflate(&stream, MZ_FINISH);
+    if (status != MZ_STREAM_END)
+    {
+        mz_deflateEnd(&stream);
+        return (status == MZ_OK) ? MZ_BUF_ERROR : status;
+    }
+
+    *pDest_len = stream.total_out;
+    return mz_deflateEnd(&stream);
+}
+
+int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len)
+{
+    return mz_compress2(pDest, pDest_len, pSource, source_len, MZ_DEFAULT_COMPRESSION);
+}
+
+mz_ulong mz_compressBound(mz_ulong source_len)
+{
+    return mz_deflateBound(NULL, source_len);
+}
+
+typedef struct
+{
+    tinfl_decompressor m_decomp;
+    mz_uint m_dict_ofs, m_dict_avail, m_first_call, m_has_flushed;
+    int m_window_bits;
+    mz_uint8 m_dict[TINFL_LZ_DICT_SIZE];
+    tinfl_status m_last_status;
+} inflate_state;
+
+int mz_inflateInit2(mz_streamp pStream, int window_bits)
+{
+    inflate_state *pDecomp;
+    if (!pStream)
+        return MZ_STREAM_ERROR;
+    if ((window_bits != MZ_DEFAULT_WINDOW_BITS) && (-window_bits != MZ_DEFAULT_WINDOW_BITS))
+        return MZ_PARAM_ERROR;
+
+    pStream->data_type = 0;
+    pStream->adler = 0;
+    pStream->msg = NULL;
+    pStream->total_in = 0;
+    pStream->total_out = 0;
+    pStream->reserved = 0;
+    if (!pStream->zalloc)
+        pStream->zalloc = miniz_def_alloc_func;
+    if (!pStream->zfree)
+        pStream->zfree = miniz_def_free_func;
+
+    pDecomp = (inflate_state *)pStream->zalloc(pStream->opaque, 1, sizeof(inflate_state));
+    if (!pDecomp)
+        return MZ_MEM_ERROR;
+
+    pStream->state = (struct mz_internal_state *)pDecomp;
+
+    tinfl_init(&pDecomp->m_decomp);
+    pDecomp->m_dict_ofs = 0;
+    pDecomp->m_dict_avail = 0;
+    pDecomp->m_last_status = TINFL_STATUS_NEEDS_MORE_INPUT;
+    pDecomp->m_first_call = 1;
+    pDecomp->m_has_flushed = 0;
+    pDecomp->m_window_bits = window_bits;
+
+    return MZ_OK;
+}
+
+int mz_inflateInit(mz_streamp pStream)
+{
+    return mz_inflateInit2(pStream, MZ_DEFAULT_WINDOW_BITS);
+}
+
+int mz_inflateReset(mz_streamp pStream)
+{
+    inflate_state *pDecomp;
+    if (!pStream)
+        return MZ_STREAM_ERROR;
+
+    pStream->data_type = 0;
+    pStream->adler = 0;
+    pStream->msg = NULL;
+    pStream->total_in = 0;
+    pStream->total_out = 0;
+    pStream->reserved = 0;
+
+    pDecomp = (inflate_state *)pStream->state;
+
+    tinfl_init(&pDecomp->m_decomp);
+    pDecomp->m_dict_ofs = 0;
+    pDecomp->m_dict_avail = 0;
+    pDecomp->m_last_status = TINFL_STATUS_NEEDS_MORE_INPUT;
+    pDecomp->m_first_call = 1;
+    pDecomp->m_has_flushed = 0;
+    /* pDecomp->m_window_bits = window_bits */;
+
+    return MZ_OK;
+}
+
+int mz_inflate(mz_streamp pStream, int flush)
+{
+    inflate_state *pState;
+    mz_uint n, first_call, decomp_flags = TINFL_FLAG_COMPUTE_ADLER32;
+    size_t in_bytes, out_bytes, orig_avail_in;
+    tinfl_status status;
+
+    if ((!pStream) || (!pStream->state))
+        return MZ_STREAM_ERROR;
+    if (flush == MZ_PARTIAL_FLUSH)
+        flush = MZ_SYNC_FLUSH;
+    if ((flush) && (flush != MZ_SYNC_FLUSH) && (flush != MZ_FINISH))
+        return MZ_STREAM_ERROR;
+
+    pState = (inflate_state *)pStream->state;
+    if (pState->m_window_bits > 0)
+        decomp_flags |= TINFL_FLAG_PARSE_ZLIB_HEADER;
+    orig_avail_in = pStream->avail_in;
+
+    first_call = pState->m_first_call;
+    pState->m_first_call = 0;
+    if (pState->m_last_status < 0)
+        return MZ_DATA_ERROR;
+
+    if (pState->m_has_flushed && (flush != MZ_FINISH))
+        return MZ_STREAM_ERROR;
+    pState->m_has_flushed |= (flush == MZ_FINISH);
+
+    if ((flush == MZ_FINISH) && (first_call))
+    {
+        /* MZ_FINISH on the first call implies that the input and output buffers are large enough to hold the entire compressed/decompressed file. */
+        decomp_flags |= TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF;
+        in_bytes = pStream->avail_in;
+        out_bytes = pStream->avail_out;
+        status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes, pStream->next_out, pStream->next_out, &out_bytes, decomp_flags);
+        pState->m_last_status = status;
+        pStream->next_in += (mz_uint)in_bytes;
+        pStream->avail_in -= (mz_uint)in_bytes;
+        pStream->total_in += (mz_uint)in_bytes;
+        pStream->adler = tinfl_get_adler32(&pState->m_decomp);
+        pStream->next_out += (mz_uint)out_bytes;
+        pStream->avail_out -= (mz_uint)out_bytes;
+        pStream->total_out += (mz_uint)out_bytes;
+
+        if (status < 0)
+            return MZ_DATA_ERROR;
+        else if (status != TINFL_STATUS_DONE)
+        {
+            pState->m_last_status = TINFL_STATUS_FAILED;
+            return MZ_BUF_ERROR;
+        }
+        return MZ_STREAM_END;
+    }
+    /* flush != MZ_FINISH then we must assume there's more input. */
+    if (flush != MZ_FINISH)
+        decomp_flags |= TINFL_FLAG_HAS_MORE_INPUT;
+
+    if (pState->m_dict_avail)
+    {
+        n = MZ_MIN(pState->m_dict_avail, pStream->avail_out);
+        memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n);
+        pStream->next_out += n;
+        pStream->avail_out -= n;
+        pStream->total_out += n;
+        pState->m_dict_avail -= n;
+        pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1);
+        return ((pState->m_last_status == TINFL_STATUS_DONE) && (!pState->m_dict_avail)) ? MZ_STREAM_END : MZ_OK;
+    }
+
+    for (;;)
+    {
+        in_bytes = pStream->avail_in;
+        out_bytes = TINFL_LZ_DICT_SIZE - pState->m_dict_ofs;
+
+        status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes, pState->m_dict, pState->m_dict + pState->m_dict_ofs, &out_bytes, decomp_flags);
+        pState->m_last_status = status;
+
+        pStream->next_in += (mz_uint)in_bytes;
+        pStream->avail_in -= (mz_uint)in_bytes;
+        pStream->total_in += (mz_uint)in_bytes;
+        pStream->adler = tinfl_get_adler32(&pState->m_decomp);
+
+        pState->m_dict_avail = (mz_uint)out_bytes;
+
+        n = MZ_MIN(pState->m_dict_avail, pStream->avail_out);
+        memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n);
+        pStream->next_out += n;
+        pStream->avail_out -= n;
+        pStream->total_out += n;
+        pState->m_dict_avail -= n;
+        pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1);
+
+        if (status < 0)
+            return MZ_DATA_ERROR; /* Stream is corrupted (there could be some uncompressed data left in the output dictionary - oh well). */
+        else if ((status == TINFL_STATUS_NEEDS_MORE_INPUT) && (!orig_avail_in))
+            return MZ_BUF_ERROR; /* Signal caller that we can't make forward progress without supplying more input or by setting flush to MZ_FINISH. */
+        else if (flush == MZ_FINISH)
+        {
+            /* The output buffer MUST be large to hold the remaining uncompressed data when flush==MZ_FINISH. */
+            if (status == TINFL_STATUS_DONE)
+                return pState->m_dict_avail ? MZ_BUF_ERROR : MZ_STREAM_END;
+            /* status here must be TINFL_STATUS_HAS_MORE_OUTPUT, which means there's at least 1 more byte on the way. If there's no more room left in the output buffer then something is wrong. */
+            else if (!pStream->avail_out)
+                return MZ_BUF_ERROR;
+        }
+        else if ((status == TINFL_STATUS_DONE) || (!pStream->avail_in) || (!pStream->avail_out) || (pState->m_dict_avail))
+            break;
+    }
+
+    return ((status == TINFL_STATUS_DONE) && (!pState->m_dict_avail)) ? MZ_STREAM_END : MZ_OK;
+}
+
+int mz_inflateEnd(mz_streamp pStream)
+{
+    if (!pStream)
+        return MZ_STREAM_ERROR;
+    if (pStream->state)
+    {
+        pStream->zfree(pStream->opaque, pStream->state);
+        pStream->state = NULL;
+    }
+    return MZ_OK;
+}
+
+int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len)
+{
+    mz_stream stream;
+    int status;
+    memset(&stream, 0, sizeof(stream));
+
+    /* In case mz_ulong is 64-bits (argh I hate longs). */
+    if ((source_len | *pDest_len) > 0xFFFFFFFFU)
+        return MZ_PARAM_ERROR;
+
+    stream.next_in = pSource;
+    stream.avail_in = (mz_uint32)source_len;
+    stream.next_out = pDest;
+    stream.avail_out = (mz_uint32)*pDest_len;
+
+    status = mz_inflateInit(&stream);
+    if (status != MZ_OK)
+        return status;
+
+    status = mz_inflate(&stream, MZ_FINISH);
+    if (status != MZ_STREAM_END)
+    {
+        mz_inflateEnd(&stream);
+        return ((status == MZ_BUF_ERROR) && (!stream.avail_in)) ? MZ_DATA_ERROR : status;
+    }
+    *pDest_len = stream.total_out;
+
+    return mz_inflateEnd(&stream);
+}
+
+const char *mz_error(int err)
+{
+    static struct
+    {
+        int m_err;
+        const char *m_pDesc;
+    } s_error_descs[] =
+        {
+          { MZ_OK, "" }, { MZ_STREAM_END, "stream end" }, { MZ_NEED_DICT, "need dictionary" }, { MZ_ERRNO, "file error" }, { MZ_STREAM_ERROR, "stream error" }, { MZ_DATA_ERROR, "data error" }, { MZ_MEM_ERROR, "out of memory" }, { MZ_BUF_ERROR, "buf error" }, { MZ_VERSION_ERROR, "version error" }, { MZ_PARAM_ERROR, "parameter error" }
+        };
+    mz_uint i;
+    for (i = 0; i < sizeof(s_error_descs) / sizeof(s_error_descs[0]); ++i)
+        if (s_error_descs[i].m_err == err)
+            return s_error_descs[i].m_pDesc;
+    return NULL;
+}
+
+#endif /*MINIZ_NO_ZLIB_APIS */
+
+#ifdef __cplusplus
+}
+#endif
+
+/*
+  This is free and unencumbered software released into the public domain.
+
+  Anyone is free to copy, modify, publish, use, compile, sell, or
+  distribute this software, either in source code form or as a compiled
+  binary, for any purpose, commercial or non-commercial, and by any
+  means.
+
+  In jurisdictions that recognize copyright laws, the author or authors
+  of this software dedicate any and all copyright interest in the
+  software to the public domain. We make this dedication for the benefit
+  of the public at large and to the detriment of our heirs and
+  successors. We intend this dedication to be an overt act of
+  relinquishment in perpetuity of all present and future rights to this
+  software under copyright law.
+
+  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+  IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+  OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+  ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+  OTHER DEALINGS IN THE SOFTWARE.
+
+  For more information, please refer to <http://unlicense.org/>
+*/
+/**************************************************************************
+ *
+ * Copyright 2013-2014 RAD Game Tools and Valve Software
+ * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* ------------------- Low-level Compression (independent from all decompression API's) */
+
+/* Purposely making these tables static for faster init and thread safety. */
+static const mz_uint16 s_tdefl_len_sym[256] =
+    {
+      257, 258, 259, 260, 261, 262, 263, 264, 265, 265, 266, 266, 267, 267, 268, 268, 269, 269, 269, 269, 270, 270, 270, 270, 271, 271, 271, 271, 272, 272, 272, 272,
+      273, 273, 273, 273, 273, 273, 273, 273, 274, 274, 274, 274, 274, 274, 274, 274, 275, 275, 275, 275, 275, 275, 275, 275, 276, 276, 276, 276, 276, 276, 276, 276,
+      277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278,
+      279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280,
+      281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281,
+      282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282,
+      283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283,
+      284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 285
+    };
+
+static const mz_uint8 s_tdefl_len_extra[256] =
+    {
+      0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+      4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+      5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+      5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0
+    };
+
+static const mz_uint8 s_tdefl_small_dist_sym[512] =
+    {
+      0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11,
+      11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13,
+      13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+      14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+      16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+      16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+      16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+      17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+      17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+      17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17
+    };
+
+static const mz_uint8 s_tdefl_small_dist_extra[512] =
+    {
+      0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5,
+      5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+      6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+      6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+      7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+      7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+      7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+      7, 7, 7, 7, 7, 7, 7, 7
+    };
+
+static const mz_uint8 s_tdefl_large_dist_sym[128] =
+    {
+      0, 0, 18, 19, 20, 20, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+      26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+      28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29
+    };
+
+static const mz_uint8 s_tdefl_large_dist_extra[128] =
+    {
+      0, 0, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+      12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+      13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13
+    };
+
+/* Radix sorts tdefl_sym_freq[] array by 16-bit key m_key. Returns ptr to sorted values. */
+typedef struct
+{
+    mz_uint16 m_key, m_sym_index;
+} tdefl_sym_freq;
+static tdefl_sym_freq *tdefl_radix_sort_syms(mz_uint num_syms, tdefl_sym_freq *pSyms0, tdefl_sym_freq *pSyms1)
+{
+    mz_uint32 total_passes = 2, pass_shift, pass, i, hist[256 * 2];
+    tdefl_sym_freq *pCur_syms = pSyms0, *pNew_syms = pSyms1;
+    MZ_CLEAR_OBJ(hist);
+    for (i = 0; i < num_syms; i++)
+    {
+        mz_uint freq = pSyms0[i].m_key;
+        hist[freq & 0xFF]++;
+        hist[256 + ((freq >> 8) & 0xFF)]++;
+    }
+    while ((total_passes > 1) && (num_syms == hist[(total_passes - 1) * 256]))
+        total_passes--;
+    for (pass_shift = 0, pass = 0; pass < total_passes; pass++, pass_shift += 8)
+    {
+        const mz_uint32 *pHist = &hist[pass << 8];
+        mz_uint offsets[256], cur_ofs = 0;
+        for (i = 0; i < 256; i++)
+        {
+            offsets[i] = cur_ofs;
+            cur_ofs += pHist[i];
+        }
+        for (i = 0; i < num_syms; i++)
+            pNew_syms[offsets[(pCur_syms[i].m_key >> pass_shift) & 0xFF]++] = pCur_syms[i];
+        {
+            tdefl_sym_freq *t = pCur_syms;
+            pCur_syms = pNew_syms;
+            pNew_syms = t;
+        }
+    }
+    return pCur_syms;
+}
+
+/* tdefl_calculate_minimum_redundancy() originally written by: Alistair Moffat, alistair@cs.mu.oz.au, Jyrki Katajainen, jyrki@diku.dk, November 1996. */
+static void tdefl_calculate_minimum_redundancy(tdefl_sym_freq *A, int n)
+{
+    int root, leaf, next, avbl, used, dpth;
+    if (n == 0)
+        return;
+    else if (n == 1)
+    {
+        A[0].m_key = 1;
+        return;
+    }
+    A[0].m_key += A[1].m_key;
+    root = 0;
+    leaf = 2;
+    for (next = 1; next < n - 1; next++)
+    {
+        if (leaf >= n || A[root].m_key < A[leaf].m_key)
+        {
+            A[next].m_key = A[root].m_key;
+            A[root++].m_key = (mz_uint16)next;
+        }
+        else
+            A[next].m_key = A[leaf++].m_key;
+        if (leaf >= n || (root < next && A[root].m_key < A[leaf].m_key))
+        {
+            A[next].m_key = (mz_uint16)(A[next].m_key + A[root].m_key);
+            A[root++].m_key = (mz_uint16)next;
+        }
+        else
+            A[next].m_key = (mz_uint16)(A[next].m_key + A[leaf++].m_key);
+    }
+    A[n - 2].m_key = 0;
+    for (next = n - 3; next >= 0; next--)
+        A[next].m_key = A[A[next].m_key].m_key + 1;
+    avbl = 1;
+    used = dpth = 0;
+    root = n - 2;
+    next = n - 1;
+    while (avbl > 0)
+    {
+        while (root >= 0 && (int)A[root].m_key == dpth)
+        {
+            used++;
+            root--;
+        }
+        while (avbl > used)
+        {
+            A[next--].m_key = (mz_uint16)(dpth);
+            avbl--;
+        }
+        avbl = 2 * used;
+        dpth++;
+        used = 0;
+    }
+}
+
+/* Limits canonical Huffman code table's max code size. */
+enum
+{
+    TDEFL_MAX_SUPPORTED_HUFF_CODESIZE = 32
+};
+static void tdefl_huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size)
+{
+    int i;
+    mz_uint32 total = 0;
+    if (code_list_len <= 1)
+        return;
+    for (i = max_code_size + 1; i <= TDEFL_MAX_SUPPORTED_HUFF_CODESIZE; i++)
+        pNum_codes[max_code_size] += pNum_codes[i];
+    for (i = max_code_size; i > 0; i--)
+        total += (((mz_uint32)pNum_codes[i]) << (max_code_size - i));
+    while (total != (1UL << max_code_size))
+    {
+        pNum_codes[max_code_size]--;
+        for (i = max_code_size - 1; i > 0; i--)
+            if (pNum_codes[i])
+            {
+                pNum_codes[i]--;
+                pNum_codes[i + 1] += 2;
+                break;
+            }
+        total--;
+    }
+}
+
+static void tdefl_optimize_huffman_table(tdefl_compressor *d, int table_num, int table_len, int code_size_limit, int static_table)
+{
+    int i, j, l, num_codes[1 + TDEFL_MAX_SUPPORTED_HUFF_CODESIZE];
+    mz_uint next_code[TDEFL_MAX_SUPPORTED_HUFF_CODESIZE + 1];
+    MZ_CLEAR_OBJ(num_codes);
+    if (static_table)
+    {
+        for (i = 0; i < table_len; i++)
+            num_codes[d->m_huff_code_sizes[table_num][i]]++;
+    }
+    else
+    {
+        tdefl_sym_freq syms0[TDEFL_MAX_HUFF_SYMBOLS], syms1[TDEFL_MAX_HUFF_SYMBOLS], *pSyms;
+        int num_used_syms = 0;
+        const mz_uint16 *pSym_count = &d->m_huff_count[table_num][0];
+        for (i = 0; i < table_len; i++)
+            if (pSym_count[i])
+            {
+                syms0[num_used_syms].m_key = (mz_uint16)pSym_count[i];
+                syms0[num_used_syms++].m_sym_index = (mz_uint16)i;
+            }
+
+        pSyms = tdefl_radix_sort_syms(num_used_syms, syms0, syms1);
+        tdefl_calculate_minimum_redundancy(pSyms, num_used_syms);
+
+        for (i = 0; i < num_used_syms; i++)
+            num_codes[pSyms[i].m_key]++;
+
+        tdefl_huffman_enforce_max_code_size(num_codes, num_used_syms, code_size_limit);
+
+        MZ_CLEAR_OBJ(d->m_huff_code_sizes[table_num]);
+        MZ_CLEAR_OBJ(d->m_huff_codes[table_num]);
+        for (i = 1, j = num_used_syms; i <= code_size_limit; i++)
+            for (l = num_codes[i]; l > 0; l--)
+                d->m_huff_code_sizes[table_num][pSyms[--j].m_sym_index] = (mz_uint8)(i);
+    }
+
+    next_code[1] = 0;
+    for (j = 0, i = 2; i <= code_size_limit; i++)
+        next_code[i] = j = ((j + num_codes[i - 1]) << 1);
+
+    for (i = 0; i < table_len; i++)
+    {
+        mz_uint rev_code = 0, code, code_size;
+        if ((code_size = d->m_huff_code_sizes[table_num][i]) == 0)
+            continue;
+        code = next_code[code_size]++;
+        for (l = code_size; l > 0; l--, code >>= 1)
+            rev_code = (rev_code << 1) | (code & 1);
+        d->m_huff_codes[table_num][i] = (mz_uint16)rev_code;
+    }
+}
+
+#define TDEFL_PUT_BITS(b, l)                                       \
+    do                                                             \
+    {                                                              \
+        mz_uint bits = b;                                          \
+        mz_uint len = l;                                           \
+        MZ_ASSERT(bits <= ((1U << len) - 1U));                     \
+        d->m_bit_buffer |= (bits << d->m_bits_in);                 \
+        d->m_bits_in += len;                                       \
+        while (d->m_bits_in >= 8)                                  \
+        {                                                          \
+            if (d->m_pOutput_buf < d->m_pOutput_buf_end)           \
+                *d->m_pOutput_buf++ = (mz_uint8)(d->m_bit_buffer); \
+            d->m_bit_buffer >>= 8;                                 \
+            d->m_bits_in -= 8;                                     \
+        }                                                          \
+    }                                                              \
+    MZ_MACRO_END
+
+#define TDEFL_RLE_PREV_CODE_SIZE()                                                                                       \
+    {                                                                                                                    \
+        if (rle_repeat_count)                                                                                            \
+        {                                                                                                                \
+            if (rle_repeat_count < 3)                                                                                    \
+            {                                                                                                            \
+                d->m_huff_count[2][prev_code_size] = (mz_uint16)(d->m_huff_count[2][prev_code_size] + rle_repeat_count); \
+                while (rle_repeat_count--)                                                                               \
+                    packed_code_sizes[num_packed_code_sizes++] = prev_code_size;                                         \
+            }                                                                                                            \
+            else                                                                                                         \
+            {                                                                                                            \
+                d->m_huff_count[2][16] = (mz_uint16)(d->m_huff_count[2][16] + 1);                                        \
+                packed_code_sizes[num_packed_code_sizes++] = 16;                                                         \
+                packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_repeat_count - 3);                           \
+            }                                                                                                            \
+            rle_repeat_count = 0;                                                                                        \
+        }                                                                                                                \
+    }
+
+#define TDEFL_RLE_ZERO_CODE_SIZE()                                                         \
+    {                                                                                      \
+        if (rle_z_count)                                                                   \
+        {                                                                                  \
+            if (rle_z_count < 3)                                                           \
+            {                                                                              \
+                d->m_huff_count[2][0] = (mz_uint16)(d->m_huff_count[2][0] + rle_z_count);  \
+                while (rle_z_count--)                                                      \
+                    packed_code_sizes[num_packed_code_sizes++] = 0;                        \
+            }                                                                              \
+            else if (rle_z_count <= 10)                                                    \
+            {                                                                              \
+                d->m_huff_count[2][17] = (mz_uint16)(d->m_huff_count[2][17] + 1);          \
+                packed_code_sizes[num_packed_code_sizes++] = 17;                           \
+                packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_z_count - 3);  \
+            }                                                                              \
+            else                                                                           \
+            {                                                                              \
+                d->m_huff_count[2][18] = (mz_uint16)(d->m_huff_count[2][18] + 1);          \
+                packed_code_sizes[num_packed_code_sizes++] = 18;                           \
+                packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_z_count - 11); \
+            }                                                                              \
+            rle_z_count = 0;                                                               \
+        }                                                                                  \
+    }
+
+static mz_uint8 s_tdefl_packed_code_size_syms_swizzle[] = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
+
+static void tdefl_start_dynamic_block(tdefl_compressor *d)
+{
+    int num_lit_codes, num_dist_codes, num_bit_lengths;
+    mz_uint i, total_code_sizes_to_pack, num_packed_code_sizes, rle_z_count, rle_repeat_count, packed_code_sizes_index;
+    mz_uint8 code_sizes_to_pack[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], packed_code_sizes[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], prev_code_size = 0xFF;
+
+    d->m_huff_count[0][256] = 1;
+
+    tdefl_optimize_huffman_table(d, 0, TDEFL_MAX_HUFF_SYMBOLS_0, 15, MZ_FALSE);
+    tdefl_optimize_huffman_table(d, 1, TDEFL_MAX_HUFF_SYMBOLS_1, 15, MZ_FALSE);
+
+    for (num_lit_codes = 286; num_lit_codes > 257; num_lit_codes--)
+        if (d->m_huff_code_sizes[0][num_lit_codes - 1])
+            break;
+    for (num_dist_codes = 30; num_dist_codes > 1; num_dist_codes--)
+        if (d->m_huff_code_sizes[1][num_dist_codes - 1])
+            break;
+
+    memcpy(code_sizes_to_pack, &d->m_huff_code_sizes[0][0], num_lit_codes);
+    memcpy(code_sizes_to_pack + num_lit_codes, &d->m_huff_code_sizes[1][0], num_dist_codes);
+    total_code_sizes_to_pack = num_lit_codes + num_dist_codes;
+    num_packed_code_sizes = 0;
+    rle_z_count = 0;
+    rle_repeat_count = 0;
+
+    memset(&d->m_huff_count[2][0], 0, sizeof(d->m_huff_count[2][0]) * TDEFL_MAX_HUFF_SYMBOLS_2);
+    for (i = 0; i < total_code_sizes_to_pack; i++)
+    {
+        mz_uint8 code_size = code_sizes_to_pack[i];
+        if (!code_size)
+        {
+            TDEFL_RLE_PREV_CODE_SIZE();
+            if (++rle_z_count == 138)
+            {
+                TDEFL_RLE_ZERO_CODE_SIZE();
+            }
+        }
+        else
+        {
+            TDEFL_RLE_ZERO_CODE_SIZE();
+            if (code_size != prev_code_size)
+            {
+                TDEFL_RLE_PREV_CODE_SIZE();
+                d->m_huff_count[2][code_size] = (mz_uint16)(d->m_huff_count[2][code_size] + 1);
+                packed_code_sizes[num_packed_code_sizes++] = code_size;
+            }
+            else if (++rle_repeat_count == 6)
+            {
+                TDEFL_RLE_PREV_CODE_SIZE();
+            }
+        }
+        prev_code_size = code_size;
+    }
+    if (rle_repeat_count)
+    {
+        TDEFL_RLE_PREV_CODE_SIZE();
+    }
+    else
+    {
+        TDEFL_RLE_ZERO_CODE_SIZE();
+    }
+
+    tdefl_optimize_huffman_table(d, 2, TDEFL_MAX_HUFF_SYMBOLS_2, 7, MZ_FALSE);
+
+    TDEFL_PUT_BITS(2, 2);
+
+    TDEFL_PUT_BITS(num_lit_codes - 257, 5);
+    TDEFL_PUT_BITS(num_dist_codes - 1, 5);
+
+    for (num_bit_lengths = 18; num_bit_lengths >= 0; num_bit_lengths--)
+        if (d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[num_bit_lengths]])
+            break;
+    num_bit_lengths = MZ_MAX(4, (num_bit_lengths + 1));
+    TDEFL_PUT_BITS(num_bit_lengths - 4, 4);
+    for (i = 0; (int)i < num_bit_lengths; i++)
+        TDEFL_PUT_BITS(d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[i]], 3);
+
+    for (packed_code_sizes_index = 0; packed_code_sizes_index < num_packed_code_sizes;)
+    {
+        mz_uint code = packed_code_sizes[packed_code_sizes_index++];
+        MZ_ASSERT(code < TDEFL_MAX_HUFF_SYMBOLS_2);
+        TDEFL_PUT_BITS(d->m_huff_codes[2][code], d->m_huff_code_sizes[2][code]);
+        if (code >= 16)
+            TDEFL_PUT_BITS(packed_code_sizes[packed_code_sizes_index++], "\02\03\07"[code - 16]);
+    }
+}
+
+static void tdefl_start_static_block(tdefl_compressor *d)
+{
+    mz_uint i;
+    mz_uint8 *p = &d->m_huff_code_sizes[0][0];
+
+    for (i = 0; i <= 143; ++i)
+        *p++ = 8;
+    for (; i <= 255; ++i)
+        *p++ = 9;
+    for (; i <= 279; ++i)
+        *p++ = 7;
+    for (; i <= 287; ++i)
+        *p++ = 8;
+
+    memset(d->m_huff_code_sizes[1], 5, 32);
+
+    tdefl_optimize_huffman_table(d, 0, 288, 15, MZ_TRUE);
+    tdefl_optimize_huffman_table(d, 1, 32, 15, MZ_TRUE);
+
+    TDEFL_PUT_BITS(1, 2);
+}
+
+static const mz_uint mz_bitmasks[17] = { 0x0000, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F, 0x003F, 0x007F, 0x00FF, 0x01FF, 0x03FF, 0x07FF, 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF };
+
+#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && MINIZ_HAS_64BIT_REGISTERS
+static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d)
+{
+    mz_uint flags;
+    mz_uint8 *pLZ_codes;
+    mz_uint8 *pOutput_buf = d->m_pOutput_buf;
+    mz_uint8 *pLZ_code_buf_end = d->m_pLZ_code_buf;
+    mz_uint64 bit_buffer = d->m_bit_buffer;
+    mz_uint bits_in = d->m_bits_in;
+
+#define TDEFL_PUT_BITS_FAST(b, l)                    \
+    {                                                \
+        bit_buffer |= (((mz_uint64)(b)) << bits_in); \
+        bits_in += (l);                              \
+    }
+
+    flags = 1;
+    for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < pLZ_code_buf_end; flags >>= 1)
+    {
+        if (flags == 1)
+            flags = *pLZ_codes++ | 0x100;
+
+        if (flags & 1)
+        {
+            mz_uint s0, s1, n0, n1, sym, num_extra_bits;
+            mz_uint match_len = pLZ_codes[0], match_dist = *(const mz_uint16 *)(pLZ_codes + 1);
+            pLZ_codes += 3;
+
+            MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]);
+            TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][s_tdefl_len_sym[match_len]], d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]);
+            TDEFL_PUT_BITS_FAST(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], s_tdefl_len_extra[match_len]);
+
+            /* This sequence coaxes MSVC into using cmov's vs. jmp's. */
+            s0 = s_tdefl_small_dist_sym[match_dist & 511];
+            n0 = s_tdefl_small_dist_extra[match_dist & 511];
+            s1 = s_tdefl_large_dist_sym[match_dist >> 8];
+            n1 = s_tdefl_large_dist_extra[match_dist >> 8];
+            sym = (match_dist < 512) ? s0 : s1;
+            num_extra_bits = (match_dist < 512) ? n0 : n1;
+
+            MZ_ASSERT(d->m_huff_code_sizes[1][sym]);
+            TDEFL_PUT_BITS_FAST(d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym]);
+            TDEFL_PUT_BITS_FAST(match_dist & mz_bitmasks[num_extra_bits], num_extra_bits);
+        }
+        else
+        {
+            mz_uint lit = *pLZ_codes++;
+            MZ_ASSERT(d->m_huff_code_sizes[0][lit]);
+            TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]);
+
+            if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end))
+            {
+                flags >>= 1;
+                lit = *pLZ_codes++;
+                MZ_ASSERT(d->m_huff_code_sizes[0][lit]);
+                TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]);
+
+                if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end))
+                {
+                    flags >>= 1;
+                    lit = *pLZ_codes++;
+                    MZ_ASSERT(d->m_huff_code_sizes[0][lit]);
+                    TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]);
+                }
+            }
+        }
+
+        if (pOutput_buf >= d->m_pOutput_buf_end)
+            return MZ_FALSE;
+
+        *(mz_uint64 *)pOutput_buf = bit_buffer;
+        pOutput_buf += (bits_in >> 3);
+        bit_buffer >>= (bits_in & ~7);
+        bits_in &= 7;
+    }
+
+#undef TDEFL_PUT_BITS_FAST
+
+    d->m_pOutput_buf = pOutput_buf;
+    d->m_bits_in = 0;
+    d->m_bit_buffer = 0;
+
+    while (bits_in)
+    {
+        mz_uint32 n = MZ_MIN(bits_in, 16);
+        TDEFL_PUT_BITS((mz_uint)bit_buffer & mz_bitmasks[n], n);
+        bit_buffer >>= n;
+        bits_in -= n;
+    }
+
+    TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]);
+
+    return (d->m_pOutput_buf < d->m_pOutput_buf_end);
+}
+#else
+static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d)
+{
+    mz_uint flags;
+    mz_uint8 *pLZ_codes;
+
+    flags = 1;
+    for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < d->m_pLZ_code_buf; flags >>= 1)
+    {
+        if (flags == 1)
+            flags = *pLZ_codes++ | 0x100;
+        if (flags & 1)
+        {
+            mz_uint sym, num_extra_bits;
+            mz_uint match_len = pLZ_codes[0], match_dist = (pLZ_codes[1] | (pLZ_codes[2] << 8));
+            pLZ_codes += 3;
+
+            MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]);
+            TDEFL_PUT_BITS(d->m_huff_codes[0][s_tdefl_len_sym[match_len]], d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]);
+            TDEFL_PUT_BITS(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], s_tdefl_len_extra[match_len]);
+
+            if (match_dist < 512)
+            {
+                sym = s_tdefl_small_dist_sym[match_dist];
+                num_extra_bits = s_tdefl_small_dist_extra[match_dist];
+            }
+            else
+            {
+                sym = s_tdefl_large_dist_sym[match_dist >> 8];
+                num_extra_bits = s_tdefl_large_dist_extra[match_dist >> 8];
+            }
+            MZ_ASSERT(d->m_huff_code_sizes[1][sym]);
+            TDEFL_PUT_BITS(d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym]);
+            TDEFL_PUT_BITS(match_dist & mz_bitmasks[num_extra_bits], num_extra_bits);
+        }
+        else
+        {
+            mz_uint lit = *pLZ_codes++;
+            MZ_ASSERT(d->m_huff_code_sizes[0][lit]);
+            TDEFL_PUT_BITS(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]);
+        }
+    }
+
+    TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]);
+
+    return (d->m_pOutput_buf < d->m_pOutput_buf_end);
+}
+#endif /* MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && MINIZ_HAS_64BIT_REGISTERS */
+
+static mz_bool tdefl_compress_block(tdefl_compressor *d, mz_bool static_block)
+{
+    if (static_block)
+        tdefl_start_static_block(d);
+    else
+        tdefl_start_dynamic_block(d);
+    return tdefl_compress_lz_codes(d);
+}
+
+static int tdefl_flush_block(tdefl_compressor *d, int flush)
+{
+    mz_uint saved_bit_buf, saved_bits_in;
+    mz_uint8 *pSaved_output_buf;
+    mz_bool comp_block_succeeded = MZ_FALSE;
+    int n, use_raw_block = ((d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS) != 0) && (d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size;
+    mz_uint8 *pOutput_buf_start = ((d->m_pPut_buf_func == NULL) && ((*d->m_pOut_buf_size - d->m_out_buf_ofs) >= TDEFL_OUT_BUF_SIZE)) ? ((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs) : d->m_output_buf;
+
+    d->m_pOutput_buf = pOutput_buf_start;
+    d->m_pOutput_buf_end = d->m_pOutput_buf + TDEFL_OUT_BUF_SIZE - 16;
+
+    MZ_ASSERT(!d->m_output_flush_remaining);
+    d->m_output_flush_ofs = 0;
+    d->m_output_flush_remaining = 0;
+
+    *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> d->m_num_flags_left);
+    d->m_pLZ_code_buf -= (d->m_num_flags_left == 8);
+
+    if ((d->m_flags & TDEFL_WRITE_ZLIB_HEADER) && (!d->m_block_index))
+    {
+        TDEFL_PUT_BITS(0x78, 8);
+        TDEFL_PUT_BITS(0x01, 8);
+    }
+
+    TDEFL_PUT_BITS(flush == TDEFL_FINISH, 1);
+
+    pSaved_output_buf = d->m_pOutput_buf;
+    saved_bit_buf = d->m_bit_buffer;
+    saved_bits_in = d->m_bits_in;
+
+    if (!use_raw_block)
+        comp_block_succeeded = tdefl_compress_block(d, (d->m_flags & TDEFL_FORCE_ALL_STATIC_BLOCKS) || (d->m_total_lz_bytes < 48));
+
+    /* If the block gets expanded, forget the current contents of the output buffer and send a raw block instead. */
+    if (((use_raw_block) || ((d->m_total_lz_bytes) && ((d->m_pOutput_buf - pSaved_output_buf + 1U) >= d->m_total_lz_bytes))) &&
+        ((d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size))
+    {
+        mz_uint i;
+        d->m_pOutput_buf = pSaved_output_buf;
+        d->m_bit_buffer = saved_bit_buf; d->m_bits_in = saved_bits_in;
+        TDEFL_PUT_BITS(0, 2);
+        if (d->m_bits_in)
+        {
+            TDEFL_PUT_BITS(0, 8 - d->m_bits_in);
+        }
+        for (i = 2; i; --i, d->m_total_lz_bytes ^= 0xFFFF)
+        {
+            TDEFL_PUT_BITS(d->m_total_lz_bytes & 0xFFFF, 16);
+        }
+        for (i = 0; i < d->m_total_lz_bytes; ++i)
+        {
+            TDEFL_PUT_BITS(d->m_dict[(d->m_lz_code_buf_dict_pos + i) & TDEFL_LZ_DICT_SIZE_MASK], 8);
+        }
+    }
+    /* Check for the extremely unlikely (if not impossible) case of the compressed block not fitting into the output buffer when using dynamic codes. */
+    else if (!comp_block_succeeded)
+    {
+        d->m_pOutput_buf = pSaved_output_buf;
+        d->m_bit_buffer = saved_bit_buf; d->m_bits_in = saved_bits_in;
+        tdefl_compress_block(d, MZ_TRUE);
+    }
+
+    if (flush)
+    {
+        if (flush == TDEFL_FINISH)
+        {
+            if (d->m_bits_in)
+            {
+                TDEFL_PUT_BITS(0, 8 - d->m_bits_in);
+            }
+            if (d->m_flags & TDEFL_WRITE_ZLIB_HEADER)
+            {
+                mz_uint i, a = d->m_adler32;
+                for (i = 0; i < 4; i++)
+                {
+                    TDEFL_PUT_BITS((a >> 24) & 0xFF, 8);
+                    a <<= 8;
+                }
+            }
+        }
+        else
+        {
+            mz_uint i, z = 0;
+            TDEFL_PUT_BITS(0, 3);
+            if (d->m_bits_in)
+            {
+                TDEFL_PUT_BITS(0, 8 - d->m_bits_in);
+            }
+            for (i = 2; i; --i, z ^= 0xFFFF)
+            {
+                TDEFL_PUT_BITS(z & 0xFFFF, 16);
+            }
+        }
+    }
+
+    MZ_ASSERT(d->m_pOutput_buf < d->m_pOutput_buf_end);
+
+    memset(&d->m_huff_count[0][0], 0, sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0);
+    memset(&d->m_huff_count[1][0], 0, sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1);
+
+    d->m_pLZ_code_buf = d->m_lz_code_buf + 1;
+    d->m_pLZ_flags = d->m_lz_code_buf;
+    d->m_num_flags_left = 8;
+    d->m_lz_code_buf_dict_pos += d->m_total_lz_bytes;
+    d->m_total_lz_bytes = 0;
+    d->m_block_index++;
+
+    if ((n = (int)(d->m_pOutput_buf - pOutput_buf_start)) != 0)
+    {
+        if (d->m_pPut_buf_func)
+        {
+            *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf;
+            if (!(*d->m_pPut_buf_func)(d->m_output_buf, n, d->m_pPut_buf_user))
+                return (d->m_prev_return_status = TDEFL_STATUS_PUT_BUF_FAILED);
+        }
+        else if (pOutput_buf_start == d->m_output_buf)
+        {
+            int bytes_to_copy = (int)MZ_MIN((size_t)n, (size_t)(*d->m_pOut_buf_size - d->m_out_buf_ofs));
+            memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf, bytes_to_copy);
+            d->m_out_buf_ofs += bytes_to_copy;
+            if ((n -= bytes_to_copy) != 0)
+            {
+                d->m_output_flush_ofs = bytes_to_copy;
+                d->m_output_flush_remaining = n;
+            }
+        }
+        else
+        {
+            d->m_out_buf_ofs += n;
+        }
+    }
+
+    return d->m_output_flush_remaining;
+}
+
+#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES
+#ifdef MINIZ_UNALIGNED_USE_MEMCPY
+static mz_uint16 TDEFL_READ_UNALIGNED_WORD(const mz_uint8* p)
+{
+	mz_uint16 ret;
+	memcpy(&ret, p, sizeof(mz_uint16));
+	return ret;
+}
+static mz_uint16 TDEFL_READ_UNALIGNED_WORD2(const mz_uint16* p)
+{
+	mz_uint16 ret;
+	memcpy(&ret, p, sizeof(mz_uint16));
+	return ret;
+}
+#else
+#define TDEFL_READ_UNALIGNED_WORD(p) *(const mz_uint16 *)(p)
+#define TDEFL_READ_UNALIGNED_WORD2(p) *(const mz_uint16 *)(p)
+#endif
+static MZ_FORCEINLINE void tdefl_find_match(tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len)
+{
+    mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, match_len = *pMatch_len, probe_pos = pos, next_probe_pos, probe_len;
+    mz_uint num_probes_left = d->m_max_probes[match_len >= 32];
+    const mz_uint16 *s = (const mz_uint16 *)(d->m_dict + pos), *p, *q;
+    mz_uint16 c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]), s01 = TDEFL_READ_UNALIGNED_WORD2(s);
+    MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN);
+    if (max_match_len <= match_len)
+        return;
+    for (;;)
+    {
+        for (;;)
+        {
+            if (--num_probes_left == 0)
+                return;
+#define TDEFL_PROBE                                                                             \
+    next_probe_pos = d->m_next[probe_pos];                                                      \
+    if ((!next_probe_pos) || ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) \
+        return;                                                                                 \
+    probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK;                                       \
+    if (TDEFL_READ_UNALIGNED_WORD(&d->m_dict[probe_pos + match_len - 1]) == c01)                \
+        break;
+            TDEFL_PROBE;
+            TDEFL_PROBE;
+            TDEFL_PROBE;
+        }
+        if (!dist)
+            break;
+        q = (const mz_uint16 *)(d->m_dict + probe_pos);
+        if (TDEFL_READ_UNALIGNED_WORD2(q) != s01)
+            continue;
+        p = s;
+        probe_len = 32;
+        do
+        {
+        } while ((TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) &&
+                 (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (--probe_len > 0));
+        if (!probe_len)
+        {
+            *pMatch_dist = dist;
+            *pMatch_len = MZ_MIN(max_match_len, (mz_uint)TDEFL_MAX_MATCH_LEN);
+            break;
+        }
+        else if ((probe_len = ((mz_uint)(p - s) * 2) + (mz_uint)(*(const mz_uint8 *)p == *(const mz_uint8 *)q)) > match_len)
+        {
+            *pMatch_dist = dist;
+            if ((*pMatch_len = match_len = MZ_MIN(max_match_len, probe_len)) == max_match_len)
+                break;
+            c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]);
+        }
+    }
+}
+#else
+static MZ_FORCEINLINE void tdefl_find_match(tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len)
+{
+    mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, match_len = *pMatch_len, probe_pos = pos, next_probe_pos, probe_len;
+    mz_uint num_probes_left = d->m_max_probes[match_len >= 32];
+    const mz_uint8 *s = d->m_dict + pos, *p, *q;
+    mz_uint8 c0 = d->m_dict[pos + match_len], c1 = d->m_dict[pos + match_len - 1];
+    MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN);
+    if (max_match_len <= match_len)
+        return;
+    for (;;)
+    {
+        for (;;)
+        {
+            if (--num_probes_left == 0)
+                return;
+#define TDEFL_PROBE                                                                               \
+    next_probe_pos = d->m_next[probe_pos];                                                        \
+    if ((!next_probe_pos) || ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist))   \
+        return;                                                                                   \
+    probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK;                                         \
+    if ((d->m_dict[probe_pos + match_len] == c0) && (d->m_dict[probe_pos + match_len - 1] == c1)) \
+        break;
+            TDEFL_PROBE;
+            TDEFL_PROBE;
+            TDEFL_PROBE;
+        }
+        if (!dist)
+            break;
+        p = s;
+        q = d->m_dict + probe_pos;
+        for (probe_len = 0; probe_len < max_match_len; probe_len++)
+            if (*p++ != *q++)
+                break;
+        if (probe_len > match_len)
+        {
+            *pMatch_dist = dist;
+            if ((*pMatch_len = match_len = probe_len) == max_match_len)
+                return;
+            c0 = d->m_dict[pos + match_len];
+            c1 = d->m_dict[pos + match_len - 1];
+        }
+    }
+}
+#endif /* #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES */
+
+#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN
+#ifdef MINIZ_UNALIGNED_USE_MEMCPY
+static mz_uint32 TDEFL_READ_UNALIGNED_WORD32(const mz_uint8* p)
+{
+	mz_uint32 ret;
+	memcpy(&ret, p, sizeof(mz_uint32));
+	return ret;
+}
+#else
+#define TDEFL_READ_UNALIGNED_WORD32(p) *(const mz_uint32 *)(p)
+#endif
+static mz_bool tdefl_compress_fast(tdefl_compressor *d)
+{
+    /* Faster, minimally featured LZRW1-style match+parse loop with better register utilization. Intended for applications where raw throughput is valued more highly than ratio. */
+    mz_uint lookahead_pos = d->m_lookahead_pos, lookahead_size = d->m_lookahead_size, dict_size = d->m_dict_size, total_lz_bytes = d->m_total_lz_bytes, num_flags_left = d->m_num_flags_left;
+    mz_uint8 *pLZ_code_buf = d->m_pLZ_code_buf, *pLZ_flags = d->m_pLZ_flags;
+    mz_uint cur_pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK;
+
+    while ((d->m_src_buf_left) || ((d->m_flush) && (lookahead_size)))
+    {
+        const mz_uint TDEFL_COMP_FAST_LOOKAHEAD_SIZE = 4096;
+        mz_uint dst_pos = (lookahead_pos + lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK;
+        mz_uint num_bytes_to_process = (mz_uint)MZ_MIN(d->m_src_buf_left, TDEFL_COMP_FAST_LOOKAHEAD_SIZE - lookahead_size);
+        d->m_src_buf_left -= num_bytes_to_process;
+        lookahead_size += num_bytes_to_process;
+
+        while (num_bytes_to_process)
+        {
+            mz_uint32 n = MZ_MIN(TDEFL_LZ_DICT_SIZE - dst_pos, num_bytes_to_process);
+            memcpy(d->m_dict + dst_pos, d->m_pSrc, n);
+            if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1))
+                memcpy(d->m_dict + TDEFL_LZ_DICT_SIZE + dst_pos, d->m_pSrc, MZ_MIN(n, (TDEFL_MAX_MATCH_LEN - 1) - dst_pos));
+            d->m_pSrc += n;
+            dst_pos = (dst_pos + n) & TDEFL_LZ_DICT_SIZE_MASK;
+            num_bytes_to_process -= n;
+        }
+
+        dict_size = MZ_MIN(TDEFL_LZ_DICT_SIZE - lookahead_size, dict_size);
+        if ((!d->m_flush) && (lookahead_size < TDEFL_COMP_FAST_LOOKAHEAD_SIZE))
+            break;
+
+        while (lookahead_size >= 4)
+        {
+            mz_uint cur_match_dist, cur_match_len = 1;
+            mz_uint8 *pCur_dict = d->m_dict + cur_pos;
+            mz_uint first_trigram = TDEFL_READ_UNALIGNED_WORD32(pCur_dict) & 0xFFFFFF;
+            mz_uint hash = (first_trigram ^ (first_trigram >> (24 - (TDEFL_LZ_HASH_BITS - 8)))) & TDEFL_LEVEL1_HASH_SIZE_MASK;
+            mz_uint probe_pos = d->m_hash[hash];
+            d->m_hash[hash] = (mz_uint16)lookahead_pos;
+
+            if (((cur_match_dist = (mz_uint16)(lookahead_pos - probe_pos)) <= dict_size) && ((TDEFL_READ_UNALIGNED_WORD32(d->m_dict + (probe_pos &= TDEFL_LZ_DICT_SIZE_MASK)) & 0xFFFFFF) == first_trigram))
+            {
+                const mz_uint16 *p = (const mz_uint16 *)pCur_dict;
+                const mz_uint16 *q = (const mz_uint16 *)(d->m_dict + probe_pos);
+                mz_uint32 probe_len = 32;
+                do
+                {
+                } while ((TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) &&
+                         (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (--probe_len > 0));
+                cur_match_len = ((mz_uint)(p - (const mz_uint16 *)pCur_dict) * 2) + (mz_uint)(*(const mz_uint8 *)p == *(const mz_uint8 *)q);
+                if (!probe_len)
+                    cur_match_len = cur_match_dist ? TDEFL_MAX_MATCH_LEN : 0;
+
+                if ((cur_match_len < TDEFL_MIN_MATCH_LEN) || ((cur_match_len == TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 8U * 1024U)))
+                {
+                    cur_match_len = 1;
+                    *pLZ_code_buf++ = (mz_uint8)first_trigram;
+                    *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1);
+                    d->m_huff_count[0][(mz_uint8)first_trigram]++;
+                }
+                else
+                {
+                    mz_uint32 s0, s1;
+                    cur_match_len = MZ_MIN(cur_match_len, lookahead_size);
+
+                    MZ_ASSERT((cur_match_len >= TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 1) && (cur_match_dist <= TDEFL_LZ_DICT_SIZE));
+
+                    cur_match_dist--;
+
+                    pLZ_code_buf[0] = (mz_uint8)(cur_match_len - TDEFL_MIN_MATCH_LEN);
+#ifdef MINIZ_UNALIGNED_USE_MEMCPY
+					memcpy(&pLZ_code_buf[1], &cur_match_dist, sizeof(cur_match_dist));
+#else
+                    *(mz_uint16 *)(&pLZ_code_buf[1]) = (mz_uint16)cur_match_dist;
+#endif
+                    pLZ_code_buf += 3;
+                    *pLZ_flags = (mz_uint8)((*pLZ_flags >> 1) | 0x80);
+
+                    s0 = s_tdefl_small_dist_sym[cur_match_dist & 511];
+                    s1 = s_tdefl_large_dist_sym[cur_match_dist >> 8];
+                    d->m_huff_count[1][(cur_match_dist < 512) ? s0 : s1]++;
+
+                    d->m_huff_count[0][s_tdefl_len_sym[cur_match_len - TDEFL_MIN_MATCH_LEN]]++;
+                }
+            }
+            else
+            {
+                *pLZ_code_buf++ = (mz_uint8)first_trigram;
+                *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1);
+                d->m_huff_count[0][(mz_uint8)first_trigram]++;
+            }
+
+            if (--num_flags_left == 0)
+            {
+                num_flags_left = 8;
+                pLZ_flags = pLZ_code_buf++;
+            }
+
+            total_lz_bytes += cur_match_len;
+            lookahead_pos += cur_match_len;
+            dict_size = MZ_MIN(dict_size + cur_match_len, (mz_uint)TDEFL_LZ_DICT_SIZE);
+            cur_pos = (cur_pos + cur_match_len) & TDEFL_LZ_DICT_SIZE_MASK;
+            MZ_ASSERT(lookahead_size >= cur_match_len);
+            lookahead_size -= cur_match_len;
+
+            if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8])
+            {
+                int n;
+                d->m_lookahead_pos = lookahead_pos;
+                d->m_lookahead_size = lookahead_size;
+                d->m_dict_size = dict_size;
+                d->m_total_lz_bytes = total_lz_bytes;
+                d->m_pLZ_code_buf = pLZ_code_buf;
+                d->m_pLZ_flags = pLZ_flags;
+                d->m_num_flags_left = num_flags_left;
+                if ((n = tdefl_flush_block(d, 0)) != 0)
+                    return (n < 0) ? MZ_FALSE : MZ_TRUE;
+                total_lz_bytes = d->m_total_lz_bytes;
+                pLZ_code_buf = d->m_pLZ_code_buf;
+                pLZ_flags = d->m_pLZ_flags;
+                num_flags_left = d->m_num_flags_left;
+            }
+        }
+
+        while (lookahead_size)
+        {
+            mz_uint8 lit = d->m_dict[cur_pos];
+
+            total_lz_bytes++;
+            *pLZ_code_buf++ = lit;
+            *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1);
+            if (--num_flags_left == 0)
+            {
+                num_flags_left = 8;
+                pLZ_flags = pLZ_code_buf++;
+            }
+
+            d->m_huff_count[0][lit]++;
+
+            lookahead_pos++;
+            dict_size = MZ_MIN(dict_size + 1, (mz_uint)TDEFL_LZ_DICT_SIZE);
+            cur_pos = (cur_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK;
+            lookahead_size--;
+
+            if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8])
+            {
+                int n;
+                d->m_lookahead_pos = lookahead_pos;
+                d->m_lookahead_size = lookahead_size;
+                d->m_dict_size = dict_size;
+                d->m_total_lz_bytes = total_lz_bytes;
+                d->m_pLZ_code_buf = pLZ_code_buf;
+                d->m_pLZ_flags = pLZ_flags;
+                d->m_num_flags_left = num_flags_left;
+                if ((n = tdefl_flush_block(d, 0)) != 0)
+                    return (n < 0) ? MZ_FALSE : MZ_TRUE;
+                total_lz_bytes = d->m_total_lz_bytes;
+                pLZ_code_buf = d->m_pLZ_code_buf;
+                pLZ_flags = d->m_pLZ_flags;
+                num_flags_left = d->m_num_flags_left;
+            }
+        }
+    }
+
+    d->m_lookahead_pos = lookahead_pos;
+    d->m_lookahead_size = lookahead_size;
+    d->m_dict_size = dict_size;
+    d->m_total_lz_bytes = total_lz_bytes;
+    d->m_pLZ_code_buf = pLZ_code_buf;
+    d->m_pLZ_flags = pLZ_flags;
+    d->m_num_flags_left = num_flags_left;
+    return MZ_TRUE;
+}
+#endif /* MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN */
+
+static MZ_FORCEINLINE void tdefl_record_literal(tdefl_compressor *d, mz_uint8 lit)
+{
+    d->m_total_lz_bytes++;
+    *d->m_pLZ_code_buf++ = lit;
+    *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> 1);
+    if (--d->m_num_flags_left == 0)
+    {
+        d->m_num_flags_left = 8;
+        d->m_pLZ_flags = d->m_pLZ_code_buf++;
+    }
+    d->m_huff_count[0][lit]++;
+}
+
+static MZ_FORCEINLINE void tdefl_record_match(tdefl_compressor *d, mz_uint match_len, mz_uint match_dist)
+{
+    mz_uint32 s0, s1;
+
+    MZ_ASSERT((match_len >= TDEFL_MIN_MATCH_LEN) && (match_dist >= 1) && (match_dist <= TDEFL_LZ_DICT_SIZE));
+
+    d->m_total_lz_bytes += match_len;
+
+    d->m_pLZ_code_buf[0] = (mz_uint8)(match_len - TDEFL_MIN_MATCH_LEN);
+
+    match_dist -= 1;
+    d->m_pLZ_code_buf[1] = (mz_uint8)(match_dist & 0xFF);
+    d->m_pLZ_code_buf[2] = (mz_uint8)(match_dist >> 8);
+    d->m_pLZ_code_buf += 3;
+
+    *d->m_pLZ_flags = (mz_uint8)((*d->m_pLZ_flags >> 1) | 0x80);
+    if (--d->m_num_flags_left == 0)
+    {
+        d->m_num_flags_left = 8;
+        d->m_pLZ_flags = d->m_pLZ_code_buf++;
+    }
+
+    s0 = s_tdefl_small_dist_sym[match_dist & 511];
+    s1 = s_tdefl_large_dist_sym[(match_dist >> 8) & 127];
+    d->m_huff_count[1][(match_dist < 512) ? s0 : s1]++;
+
+    if (match_len >= TDEFL_MIN_MATCH_LEN)
+        d->m_huff_count[0][s_tdefl_len_sym[match_len - TDEFL_MIN_MATCH_LEN]]++;
+}
+
+static mz_bool tdefl_compress_normal(tdefl_compressor *d)
+{
+    const mz_uint8 *pSrc = d->m_pSrc;
+    size_t src_buf_left = d->m_src_buf_left;
+    tdefl_flush flush = d->m_flush;
+
+    while ((src_buf_left) || ((flush) && (d->m_lookahead_size)))
+    {
+        mz_uint len_to_move, cur_match_dist, cur_match_len, cur_pos;
+        /* Update dictionary and hash chains. Keeps the lookahead size equal to TDEFL_MAX_MATCH_LEN. */
+        if ((d->m_lookahead_size + d->m_dict_size) >= (TDEFL_MIN_MATCH_LEN - 1))
+        {
+            mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK, ins_pos = d->m_lookahead_pos + d->m_lookahead_size - 2;
+            mz_uint hash = (d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] << TDEFL_LZ_HASH_SHIFT) ^ d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK];
+            mz_uint num_bytes_to_process = (mz_uint)MZ_MIN(src_buf_left, TDEFL_MAX_MATCH_LEN - d->m_lookahead_size);
+            const mz_uint8 *pSrc_end = pSrc + num_bytes_to_process;
+            src_buf_left -= num_bytes_to_process;
+            d->m_lookahead_size += num_bytes_to_process;
+            while (pSrc != pSrc_end)
+            {
+                mz_uint8 c = *pSrc++;
+                d->m_dict[dst_pos] = c;
+                if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1))
+                    d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c;
+                hash = ((hash << TDEFL_LZ_HASH_SHIFT) ^ c) & (TDEFL_LZ_HASH_SIZE - 1);
+                d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash];
+                d->m_hash[hash] = (mz_uint16)(ins_pos);
+                dst_pos = (dst_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK;
+                ins_pos++;
+            }
+        }
+        else
+        {
+            while ((src_buf_left) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN))
+            {
+                mz_uint8 c = *pSrc++;
+                mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK;
+                src_buf_left--;
+                d->m_dict[dst_pos] = c;
+                if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1))
+                    d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c;
+                if ((++d->m_lookahead_size + d->m_dict_size) >= TDEFL_MIN_MATCH_LEN)
+                {
+                    mz_uint ins_pos = d->m_lookahead_pos + (d->m_lookahead_size - 1) - 2;
+                    mz_uint hash = ((d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] << (TDEFL_LZ_HASH_SHIFT * 2)) ^ (d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK] << TDEFL_LZ_HASH_SHIFT) ^ c) & (TDEFL_LZ_HASH_SIZE - 1);
+                    d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash];
+                    d->m_hash[hash] = (mz_uint16)(ins_pos);
+                }
+            }
+        }
+        d->m_dict_size = MZ_MIN(TDEFL_LZ_DICT_SIZE - d->m_lookahead_size, d->m_dict_size);
+        if ((!flush) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN))
+            break;
+
+        /* Simple lazy/greedy parsing state machine. */
+        len_to_move = 1;
+        cur_match_dist = 0;
+        cur_match_len = d->m_saved_match_len ? d->m_saved_match_len : (TDEFL_MIN_MATCH_LEN - 1);
+        cur_pos = d->m_lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK;
+        if (d->m_flags & (TDEFL_RLE_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS))
+        {
+            if ((d->m_dict_size) && (!(d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS)))
+            {
+                mz_uint8 c = d->m_dict[(cur_pos - 1) & TDEFL_LZ_DICT_SIZE_MASK];
+                cur_match_len = 0;
+                while (cur_match_len < d->m_lookahead_size)
+                {
+                    if (d->m_dict[cur_pos + cur_match_len] != c)
+                        break;
+                    cur_match_len++;
+                }
+                if (cur_match_len < TDEFL_MIN_MATCH_LEN)
+                    cur_match_len = 0;
+                else
+                    cur_match_dist = 1;
+            }
+        }
+        else
+        {
+            tdefl_find_match(d, d->m_lookahead_pos, d->m_dict_size, d->m_lookahead_size, &cur_match_dist, &cur_match_len);
+        }
+        if (((cur_match_len == TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 8U * 1024U)) || (cur_pos == cur_match_dist) || ((d->m_flags & TDEFL_FILTER_MATCHES) && (cur_match_len <= 5)))
+        {
+            cur_match_dist = cur_match_len = 0;
+        }
+        if (d->m_saved_match_len)
+        {
+            if (cur_match_len > d->m_saved_match_len)
+            {
+                tdefl_record_literal(d, (mz_uint8)d->m_saved_lit);
+                if (cur_match_len >= 128)
+                {
+                    tdefl_record_match(d, cur_match_len, cur_match_dist);
+                    d->m_saved_match_len = 0;
+                    len_to_move = cur_match_len;
+                }
+                else
+                {
+                    d->m_saved_lit = d->m_dict[cur_pos];
+                    d->m_saved_match_dist = cur_match_dist;
+                    d->m_saved_match_len = cur_match_len;
+                }
+            }
+            else
+            {
+                tdefl_record_match(d, d->m_saved_match_len, d->m_saved_match_dist);
+                len_to_move = d->m_saved_match_len - 1;
+                d->m_saved_match_len = 0;
+            }
+        }
+        else if (!cur_match_dist)
+            tdefl_record_literal(d, d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]);
+        else if ((d->m_greedy_parsing) || (d->m_flags & TDEFL_RLE_MATCHES) || (cur_match_len >= 128))
+        {
+            tdefl_record_match(d, cur_match_len, cur_match_dist);
+            len_to_move = cur_match_len;
+        }
+        else
+        {
+            d->m_saved_lit = d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)];
+            d->m_saved_match_dist = cur_match_dist;
+            d->m_saved_match_len = cur_match_len;
+        }
+        /* Move the lookahead forward by len_to_move bytes. */
+        d->m_lookahead_pos += len_to_move;
+        MZ_ASSERT(d->m_lookahead_size >= len_to_move);
+        d->m_lookahead_size -= len_to_move;
+        d->m_dict_size = MZ_MIN(d->m_dict_size + len_to_move, (mz_uint)TDEFL_LZ_DICT_SIZE);
+        /* Check if it's time to flush the current LZ codes to the internal output buffer. */
+        if ((d->m_pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) ||
+            ((d->m_total_lz_bytes > 31 * 1024) && (((((mz_uint)(d->m_pLZ_code_buf - d->m_lz_code_buf) * 115) >> 7) >= d->m_total_lz_bytes) || (d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS))))
+        {
+            int n;
+            d->m_pSrc = pSrc;
+            d->m_src_buf_left = src_buf_left;
+            if ((n = tdefl_flush_block(d, 0)) != 0)
+                return (n < 0) ? MZ_FALSE : MZ_TRUE;
+        }
+    }
+
+    d->m_pSrc = pSrc;
+    d->m_src_buf_left = src_buf_left;
+    return MZ_TRUE;
+}
+
+static tdefl_status tdefl_flush_output_buffer(tdefl_compressor *d)
+{
+    if (d->m_pIn_buf_size)
+    {
+        *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf;
+    }
+
+    if (d->m_pOut_buf_size)
+    {
+        size_t n = MZ_MIN(*d->m_pOut_buf_size - d->m_out_buf_ofs, d->m_output_flush_remaining);
+        memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf + d->m_output_flush_ofs, n);
+        d->m_output_flush_ofs += (mz_uint)n;
+        d->m_output_flush_remaining -= (mz_uint)n;
+        d->m_out_buf_ofs += n;
+
+        *d->m_pOut_buf_size = d->m_out_buf_ofs;
+    }
+
+    return (d->m_finished && !d->m_output_flush_remaining) ? TDEFL_STATUS_DONE : TDEFL_STATUS_OKAY;
+}
+
+tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, size_t *pIn_buf_size, void *pOut_buf, size_t *pOut_buf_size, tdefl_flush flush)
+{
+    if (!d)
+    {
+        if (pIn_buf_size)
+            *pIn_buf_size = 0;
+        if (pOut_buf_size)
+            *pOut_buf_size = 0;
+        return TDEFL_STATUS_BAD_PARAM;
+    }
+
+    d->m_pIn_buf = pIn_buf;
+    d->m_pIn_buf_size = pIn_buf_size;
+    d->m_pOut_buf = pOut_buf;
+    d->m_pOut_buf_size = pOut_buf_size;
+    d->m_pSrc = (const mz_uint8 *)(pIn_buf);
+    d->m_src_buf_left = pIn_buf_size ? *pIn_buf_size : 0;
+    d->m_out_buf_ofs = 0;
+    d->m_flush = flush;
+
+    if (((d->m_pPut_buf_func != NULL) == ((pOut_buf != NULL) || (pOut_buf_size != NULL))) || (d->m_prev_return_status != TDEFL_STATUS_OKAY) ||
+        (d->m_wants_to_finish && (flush != TDEFL_FINISH)) || (pIn_buf_size && *pIn_buf_size && !pIn_buf) || (pOut_buf_size && *pOut_buf_size && !pOut_buf))
+    {
+        if (pIn_buf_size)
+            *pIn_buf_size = 0;
+        if (pOut_buf_size)
+            *pOut_buf_size = 0;
+        return (d->m_prev_return_status = TDEFL_STATUS_BAD_PARAM);
+    }
+    d->m_wants_to_finish |= (flush == TDEFL_FINISH);
+
+    if ((d->m_output_flush_remaining) || (d->m_finished))
+        return (d->m_prev_return_status = tdefl_flush_output_buffer(d));
+
+#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN
+    if (((d->m_flags & TDEFL_MAX_PROBES_MASK) == 1) &&
+        ((d->m_flags & TDEFL_GREEDY_PARSING_FLAG) != 0) &&
+        ((d->m_flags & (TDEFL_FILTER_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS | TDEFL_RLE_MATCHES)) == 0))
+    {
+        if (!tdefl_compress_fast(d))
+            return d->m_prev_return_status;
+    }
+    else
+#endif /* #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN */
+    {
+        if (!tdefl_compress_normal(d))
+            return d->m_prev_return_status;
+    }
+
+    if ((d->m_flags & (TDEFL_WRITE_ZLIB_HEADER | TDEFL_COMPUTE_ADLER32)) && (pIn_buf))
+        d->m_adler32 = (mz_uint32)mz_adler32(d->m_adler32, (const mz_uint8 *)pIn_buf, d->m_pSrc - (const mz_uint8 *)pIn_buf);
+
+    if ((flush) && (!d->m_lookahead_size) && (!d->m_src_buf_left) && (!d->m_output_flush_remaining))
+    {
+        if (tdefl_flush_block(d, flush) < 0)
+            return d->m_prev_return_status;
+        d->m_finished = (flush == TDEFL_FINISH);
+        if (flush == TDEFL_FULL_FLUSH)
+        {
+            MZ_CLEAR_OBJ(d->m_hash);
+            MZ_CLEAR_OBJ(d->m_next);
+            d->m_dict_size = 0;
+        }
+    }
+
+    return (d->m_prev_return_status = tdefl_flush_output_buffer(d));
+}
+
+tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, size_t in_buf_size, tdefl_flush flush)
+{
+    MZ_ASSERT(d->m_pPut_buf_func);
+    return tdefl_compress(d, pIn_buf, &in_buf_size, NULL, NULL, flush);
+}
+
+tdefl_status tdefl_init(tdefl_compressor *d, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags)
+{
+    d->m_pPut_buf_func = pPut_buf_func;
+    d->m_pPut_buf_user = pPut_buf_user;
+    d->m_flags = (mz_uint)(flags);
+    d->m_max_probes[0] = 1 + ((flags & 0xFFF) + 2) / 3;
+    d->m_greedy_parsing = (flags & TDEFL_GREEDY_PARSING_FLAG) != 0;
+    d->m_max_probes[1] = 1 + (((flags & 0xFFF) >> 2) + 2) / 3;
+    if (!(flags & TDEFL_NONDETERMINISTIC_PARSING_FLAG))
+        MZ_CLEAR_OBJ(d->m_hash);
+    d->m_lookahead_pos = d->m_lookahead_size = d->m_dict_size = d->m_total_lz_bytes = d->m_lz_code_buf_dict_pos = d->m_bits_in = 0;
+    d->m_output_flush_ofs = d->m_output_flush_remaining = d->m_finished = d->m_block_index = d->m_bit_buffer = d->m_wants_to_finish = 0;
+    d->m_pLZ_code_buf = d->m_lz_code_buf + 1;
+    d->m_pLZ_flags = d->m_lz_code_buf;
+    d->m_num_flags_left = 8;
+    d->m_pOutput_buf = d->m_output_buf;
+    d->m_pOutput_buf_end = d->m_output_buf;
+    d->m_prev_return_status = TDEFL_STATUS_OKAY;
+    d->m_saved_match_dist = d->m_saved_match_len = d->m_saved_lit = 0;
+    d->m_adler32 = 1;
+    d->m_pIn_buf = NULL;
+    d->m_pOut_buf = NULL;
+    d->m_pIn_buf_size = NULL;
+    d->m_pOut_buf_size = NULL;
+    d->m_flush = TDEFL_NO_FLUSH;
+    d->m_pSrc = NULL;
+    d->m_src_buf_left = 0;
+    d->m_out_buf_ofs = 0;
+    if (!(flags & TDEFL_NONDETERMINISTIC_PARSING_FLAG))
+        MZ_CLEAR_OBJ(d->m_dict);
+    memset(&d->m_huff_count[0][0], 0, sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0);
+    memset(&d->m_huff_count[1][0], 0, sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1);
+    return TDEFL_STATUS_OKAY;
+}
+
+tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d)
+{
+    return d->m_prev_return_status;
+}
+
+mz_uint32 tdefl_get_adler32(tdefl_compressor *d)
+{
+    return d->m_adler32;
+}
+
+mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags)
+{
+    tdefl_compressor *pComp;
+    mz_bool succeeded;
+    if (((buf_len) && (!pBuf)) || (!pPut_buf_func))
+        return MZ_FALSE;
+    pComp = (tdefl_compressor *)MZ_MALLOC(sizeof(tdefl_compressor));
+    if (!pComp)
+        return MZ_FALSE;
+    succeeded = (tdefl_init(pComp, pPut_buf_func, pPut_buf_user, flags) == TDEFL_STATUS_OKAY);
+    succeeded = succeeded && (tdefl_compress_buffer(pComp, pBuf, buf_len, TDEFL_FINISH) == TDEFL_STATUS_DONE);
+    MZ_FREE(pComp);
+    return succeeded;
+}
+
+typedef struct
+{
+    size_t m_size, m_capacity;
+    mz_uint8 *m_pBuf;
+    mz_bool m_expandable;
+} tdefl_output_buffer;
+
+static mz_bool tdefl_output_buffer_putter(const void *pBuf, int len, void *pUser)
+{
+    tdefl_output_buffer *p = (tdefl_output_buffer *)pUser;
+    size_t new_size = p->m_size + len;
+    if (new_size > p->m_capacity)
+    {
+        size_t new_capacity = p->m_capacity;
+        mz_uint8 *pNew_buf;
+        if (!p->m_expandable)
+            return MZ_FALSE;
+        do
+        {
+            new_capacity = MZ_MAX(128U, new_capacity << 1U);
+        } while (new_size > new_capacity);
+        pNew_buf = (mz_uint8 *)MZ_REALLOC(p->m_pBuf, new_capacity);
+        if (!pNew_buf)
+            return MZ_FALSE;
+        p->m_pBuf = pNew_buf;
+        p->m_capacity = new_capacity;
+    }
+    memcpy((mz_uint8 *)p->m_pBuf + p->m_size, pBuf, len);
+    p->m_size = new_size;
+    return MZ_TRUE;
+}
+
+void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags)
+{
+    tdefl_output_buffer out_buf;
+    MZ_CLEAR_OBJ(out_buf);
+    if (!pOut_len)
+        return MZ_FALSE;
+    else
+        *pOut_len = 0;
+    out_buf.m_expandable = MZ_TRUE;
+    if (!tdefl_compress_mem_to_output(pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags))
+        return NULL;
+    *pOut_len = out_buf.m_size;
+    return out_buf.m_pBuf;
+}
+
+size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags)
+{
+    tdefl_output_buffer out_buf;
+    MZ_CLEAR_OBJ(out_buf);
+    if (!pOut_buf)
+        return 0;
+    out_buf.m_pBuf = (mz_uint8 *)pOut_buf;
+    out_buf.m_capacity = out_buf_len;
+    if (!tdefl_compress_mem_to_output(pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags))
+        return 0;
+    return out_buf.m_size;
+}
+
+static const mz_uint s_tdefl_num_probes[11] = { 0, 1, 6, 32, 16, 32, 128, 256, 512, 768, 1500 };
+
+/* level may actually range from [0,10] (10 is a "hidden" max level, where we want a bit more compression and it's fine if throughput to fall off a cliff on some files). */
+mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, int strategy)
+{
+    mz_uint comp_flags = s_tdefl_num_probes[(level >= 0) ? MZ_MIN(10, level) : MZ_DEFAULT_LEVEL] | ((level <= 3) ? TDEFL_GREEDY_PARSING_FLAG : 0);
+    if (window_bits > 0)
+        comp_flags |= TDEFL_WRITE_ZLIB_HEADER;
+
+    if (!level)
+        comp_flags |= TDEFL_FORCE_ALL_RAW_BLOCKS;
+    else if (strategy == MZ_FILTERED)
+        comp_flags |= TDEFL_FILTER_MATCHES;
+    else if (strategy == MZ_HUFFMAN_ONLY)
+        comp_flags &= ~TDEFL_MAX_PROBES_MASK;
+    else if (strategy == MZ_FIXED)
+        comp_flags |= TDEFL_FORCE_ALL_STATIC_BLOCKS;
+    else if (strategy == MZ_RLE)
+        comp_flags |= TDEFL_RLE_MATCHES;
+
+    return comp_flags;
+}
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4204) /* nonstandard extension used : non-constant aggregate initializer (also supported by GNU C and C99, so no big deal) */
+#endif
+
+/* Simple PNG writer function by Alex Evans, 2011. Released into the public domain: https://gist.github.com/908299, more context at
+ http://altdevblogaday.org/2011/04/06/a-smaller-jpg-encoder/.
+ This is actually a modification of Alex's original code so PNG files generated by this function pass pngcheck. */
+void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, int h, int num_chans, size_t *pLen_out, mz_uint level, mz_bool flip)
+{
+    /* Using a local copy of this array here in case MINIZ_NO_ZLIB_APIS was defined. */
+    static const mz_uint s_tdefl_png_num_probes[11] = { 0, 1, 6, 32, 16, 32, 128, 256, 512, 768, 1500 };
+    tdefl_compressor *pComp = (tdefl_compressor *)MZ_MALLOC(sizeof(tdefl_compressor));
+    tdefl_output_buffer out_buf;
+    int i, bpl = w * num_chans, y, z;
+    mz_uint32 c;
+    *pLen_out = 0;
+    if (!pComp)
+        return NULL;
+    MZ_CLEAR_OBJ(out_buf);
+    out_buf.m_expandable = MZ_TRUE;
+    out_buf.m_capacity = 57 + MZ_MAX(64, (1 + bpl) * h);
+    if (NULL == (out_buf.m_pBuf = (mz_uint8 *)MZ_MALLOC(out_buf.m_capacity)))
+    {
+        MZ_FREE(pComp);
+        return NULL;
+    }
+    /* write dummy header */
+    for (z = 41; z; --z)
+        tdefl_output_buffer_putter(&z, 1, &out_buf);
+    /* compress image data */
+    tdefl_init(pComp, tdefl_output_buffer_putter, &out_buf, s_tdefl_png_num_probes[MZ_MIN(10, level)] | TDEFL_WRITE_ZLIB_HEADER);
+    for (y = 0; y < h; ++y)
+    {
+        tdefl_compress_buffer(pComp, &z, 1, TDEFL_NO_FLUSH);
+        tdefl_compress_buffer(pComp, (mz_uint8 *)pImage + (flip ? (h - 1 - y) : y) * bpl, bpl, TDEFL_NO_FLUSH);
+    }
+    if (tdefl_compress_buffer(pComp, NULL, 0, TDEFL_FINISH) != TDEFL_STATUS_DONE)
+    {
+        MZ_FREE(pComp);
+        MZ_FREE(out_buf.m_pBuf);
+        return NULL;
+    }
+    /* write real header */
+    *pLen_out = out_buf.m_size - 41;
+    {
+        static const mz_uint8 chans[] = { 0x00, 0x00, 0x04, 0x02, 0x06 };
+        mz_uint8 pnghdr[41] = { 0x89, 0x50, 0x4e, 0x47, 0x0d,
+                                0x0a, 0x1a, 0x0a, 0x00, 0x00,
+                                0x00, 0x0d, 0x49, 0x48, 0x44,
+                                0x52, 0x00, 0x00, 0x00, 0x00,
+                                0x00, 0x00, 0x00, 0x00, 0x08,
+                                0x00, 0x00, 0x00, 0x00, 0x00,
+                                0x00, 0x00, 0x00, 0x00, 0x00,
+                                0x00, 0x00, 0x49, 0x44, 0x41,
+                                0x54 };
+        pnghdr[18] = (mz_uint8)(w >> 8);
+        pnghdr[19] = (mz_uint8)w;
+        pnghdr[22] = (mz_uint8)(h >> 8);
+        pnghdr[23] = (mz_uint8)h;
+        pnghdr[25] = chans[num_chans];
+        pnghdr[33] = (mz_uint8)(*pLen_out >> 24);
+        pnghdr[34] = (mz_uint8)(*pLen_out >> 16);
+        pnghdr[35] = (mz_uint8)(*pLen_out >> 8);
+        pnghdr[36] = (mz_uint8)*pLen_out;
+        c = (mz_uint32)mz_crc32(MZ_CRC32_INIT, pnghdr + 12, 17);
+        for (i = 0; i < 4; ++i, c <<= 8)
+            ((mz_uint8 *)(pnghdr + 29))[i] = (mz_uint8)(c >> 24);
+        memcpy(out_buf.m_pBuf, pnghdr, 41);
+    }
+    /* write footer (IDAT CRC-32, followed by IEND chunk) */
+    if (!tdefl_output_buffer_putter("\0\0\0\0\0\0\0\0\x49\x45\x4e\x44\xae\x42\x60\x82", 16, &out_buf))
+    {
+        *pLen_out = 0;
+        MZ_FREE(pComp);
+        MZ_FREE(out_buf.m_pBuf);
+        return NULL;
+    }
+    c = (mz_uint32)mz_crc32(MZ_CRC32_INIT, out_buf.m_pBuf + 41 - 4, *pLen_out + 4);
+    for (i = 0; i < 4; ++i, c <<= 8)
+        (out_buf.m_pBuf + out_buf.m_size - 16)[i] = (mz_uint8)(c >> 24);
+    /* compute final size of file, grab compressed data buffer and return */
+    *pLen_out += 57;
+    MZ_FREE(pComp);
+    return out_buf.m_pBuf;
+}
+void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, int num_chans, size_t *pLen_out)
+{
+    /* Level 6 corresponds to TDEFL_DEFAULT_MAX_PROBES or MZ_DEFAULT_LEVEL (but we can't depend on MZ_DEFAULT_LEVEL being available in case the zlib API's where #defined out) */
+    return tdefl_write_image_to_png_file_in_memory_ex(pImage, w, h, num_chans, pLen_out, 6, MZ_FALSE);
+}
+
+#ifndef MINIZ_NO_MALLOC
+/* Allocate the tdefl_compressor and tinfl_decompressor structures in C so that */
+/* non-C language bindings to tdefL_ and tinfl_ API don't need to worry about */
+/* structure size and allocation mechanism. */
+tdefl_compressor *tdefl_compressor_alloc()
+{
+    return (tdefl_compressor *)MZ_MALLOC(sizeof(tdefl_compressor));
+}
+
+void tdefl_compressor_free(tdefl_compressor *pComp)
+{
+    MZ_FREE(pComp);
+}
+#endif
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+/**************************************************************************
+ *
+ * Copyright 2013-2014 RAD Game Tools and Valve Software
+ * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* ------------------- Low-level Decompression (completely independent from all compression API's) */
+
+#define TINFL_MEMCPY(d, s, l) memcpy(d, s, l)
+#define TINFL_MEMSET(p, c, l) memset(p, c, l)
+
+#define TINFL_CR_BEGIN  \
+    switch (r->m_state) \
+    {                   \
+        case 0:
+#define TINFL_CR_RETURN(state_index, result) \
+    do                                       \
+    {                                        \
+        status = result;                     \
+        r->m_state = state_index;            \
+        goto common_exit;                    \
+        case state_index:;                   \
+    }                                        \
+    MZ_MACRO_END
+#define TINFL_CR_RETURN_FOREVER(state_index, result) \
+    do                                               \
+    {                                                \
+        for (;;)                                     \
+        {                                            \
+            TINFL_CR_RETURN(state_index, result);    \
+        }                                            \
+    }                                                \
+    MZ_MACRO_END
+#define TINFL_CR_FINISH }
+
+#define TINFL_GET_BYTE(state_index, c)                                                                                                                           \
+    do                                                                                                                                                           \
+    {                                                                                                                                                            \
+        while (pIn_buf_cur >= pIn_buf_end)                                                                                                                       \
+        {                                                                                                                                                        \
+            TINFL_CR_RETURN(state_index, (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) ? TINFL_STATUS_NEEDS_MORE_INPUT : TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS); \
+        }                                                                                                                                                        \
+        c = *pIn_buf_cur++;                                                                                                                                      \
+    }                                                                                                                                                            \
+    MZ_MACRO_END
+
+#define TINFL_NEED_BITS(state_index, n)                \
+    do                                                 \
+    {                                                  \
+        mz_uint c;                                     \
+        TINFL_GET_BYTE(state_index, c);                \
+        bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); \
+        num_bits += 8;                                 \
+    } while (num_bits < (mz_uint)(n))
+#define TINFL_SKIP_BITS(state_index, n)      \
+    do                                       \
+    {                                        \
+        if (num_bits < (mz_uint)(n))         \
+        {                                    \
+            TINFL_NEED_BITS(state_index, n); \
+        }                                    \
+        bit_buf >>= (n);                     \
+        num_bits -= (n);                     \
+    }                                        \
+    MZ_MACRO_END
+#define TINFL_GET_BITS(state_index, b, n)    \
+    do                                       \
+    {                                        \
+        if (num_bits < (mz_uint)(n))         \
+        {                                    \
+            TINFL_NEED_BITS(state_index, n); \
+        }                                    \
+        b = bit_buf & ((1 << (n)) - 1);      \
+        bit_buf >>= (n);                     \
+        num_bits -= (n);                     \
+    }                                        \
+    MZ_MACRO_END
+
+/* TINFL_HUFF_BITBUF_FILL() is only used rarely, when the number of bytes remaining in the input buffer falls below 2. */
+/* It reads just enough bytes from the input stream that are needed to decode the next Huffman code (and absolutely no more). It works by trying to fully decode a */
+/* Huffman code by using whatever bits are currently present in the bit buffer. If this fails, it reads another byte, and tries again until it succeeds or until the */
+/* bit buffer contains >=15 bits (deflate's max. Huffman code size). */
+#define TINFL_HUFF_BITBUF_FILL(state_index, pHuff)                             \
+    do                                                                         \
+    {                                                                          \
+        temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)];     \
+        if (temp >= 0)                                                         \
+        {                                                                      \
+            code_len = temp >> 9;                                              \
+            if ((code_len) && (num_bits >= code_len))                          \
+                break;                                                         \
+        }                                                                      \
+        else if (num_bits > TINFL_FAST_LOOKUP_BITS)                            \
+        {                                                                      \
+            code_len = TINFL_FAST_LOOKUP_BITS;                                 \
+            do                                                                 \
+            {                                                                  \
+                temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)]; \
+            } while ((temp < 0) && (num_bits >= (code_len + 1)));              \
+            if (temp >= 0)                                                     \
+                break;                                                         \
+        }                                                                      \
+        TINFL_GET_BYTE(state_index, c);                                        \
+        bit_buf |= (((tinfl_bit_buf_t)c) << num_bits);                         \
+        num_bits += 8;                                                         \
+    } while (num_bits < 15);
+
+/* TINFL_HUFF_DECODE() decodes the next Huffman coded symbol. It's more complex than you would initially expect because the zlib API expects the decompressor to never read */
+/* beyond the final byte of the deflate stream. (In other words, when this macro wants to read another byte from the input, it REALLY needs another byte in order to fully */
+/* decode the next Huffman code.) Handling this properly is particularly important on raw deflate (non-zlib) streams, which aren't followed by a byte aligned adler-32. */
+/* The slow path is only executed at the very end of the input buffer. */
+/* v1.16: The original macro handled the case at the very end of the passed-in input buffer, but we also need to handle the case where the user passes in 1+zillion bytes */
+/* following the deflate data and our non-conservative read-ahead path won't kick in here on this code. This is much trickier. */
+#define TINFL_HUFF_DECODE(state_index, sym, pHuff)                                                                                  \
+    do                                                                                                                              \
+    {                                                                                                                               \
+        int temp;                                                                                                                   \
+        mz_uint code_len, c;                                                                                                        \
+        if (num_bits < 15)                                                                                                          \
+        {                                                                                                                           \
+            if ((pIn_buf_end - pIn_buf_cur) < 2)                                                                                    \
+            {                                                                                                                       \
+                TINFL_HUFF_BITBUF_FILL(state_index, pHuff);                                                                         \
+            }                                                                                                                       \
+            else                                                                                                                    \
+            {                                                                                                                       \
+                bit_buf |= (((tinfl_bit_buf_t)pIn_buf_cur[0]) << num_bits) | (((tinfl_bit_buf_t)pIn_buf_cur[1]) << (num_bits + 8)); \
+                pIn_buf_cur += 2;                                                                                                   \
+                num_bits += 16;                                                                                                     \
+            }                                                                                                                       \
+        }                                                                                                                           \
+        if ((temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0)                                               \
+            code_len = temp >> 9, temp &= 511;                                                                                      \
+        else                                                                                                                        \
+        {                                                                                                                           \
+            code_len = TINFL_FAST_LOOKUP_BITS;                                                                                      \
+            do                                                                                                                      \
+            {                                                                                                                       \
+                temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)];                                                      \
+            } while (temp < 0);                                                                                                     \
+        }                                                                                                                           \
+        sym = temp;                                                                                                                 \
+        bit_buf >>= code_len;                                                                                                       \
+        num_bits -= code_len;                                                                                                       \
+    }                                                                                                                               \
+    MZ_MACRO_END
+
+tinfl_status tinfl_decompress(tinfl_decompressor *r, const mz_uint8 *pIn_buf_next, size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, const mz_uint32 decomp_flags)
+{
+    static const int s_length_base[31] = { 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0 };
+    static const int s_length_extra[31] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 0, 0 };
+    static const int s_dist_base[32] = { 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 0, 0 };
+    static const int s_dist_extra[32] = { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13 };
+    static const mz_uint8 s_length_dezigzag[19] = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
+    static const int s_min_table_sizes[3] = { 257, 1, 4 };
+
+    tinfl_status status = TINFL_STATUS_FAILED;
+    mz_uint32 num_bits, dist, counter, num_extra;
+    tinfl_bit_buf_t bit_buf;
+    const mz_uint8 *pIn_buf_cur = pIn_buf_next, *const pIn_buf_end = pIn_buf_next + *pIn_buf_size;
+    mz_uint8 *pOut_buf_cur = pOut_buf_next, *const pOut_buf_end = pOut_buf_next + *pOut_buf_size;
+    size_t out_buf_size_mask = (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF) ? (size_t)-1 : ((pOut_buf_next - pOut_buf_start) + *pOut_buf_size) - 1, dist_from_out_buf_start;
+
+    /* Ensure the output buffer's size is a power of 2, unless the output buffer is large enough to hold the entire output file (in which case it doesn't matter). */
+    if (((out_buf_size_mask + 1) & out_buf_size_mask) || (pOut_buf_next < pOut_buf_start))
+    {
+        *pIn_buf_size = *pOut_buf_size = 0;
+        return TINFL_STATUS_BAD_PARAM;
+    }
+
+    num_bits = r->m_num_bits;
+    bit_buf = r->m_bit_buf;
+    dist = r->m_dist;
+    counter = r->m_counter;
+    num_extra = r->m_num_extra;
+    dist_from_out_buf_start = r->m_dist_from_out_buf_start;
+    TINFL_CR_BEGIN
+
+    bit_buf = num_bits = dist = counter = num_extra = r->m_zhdr0 = r->m_zhdr1 = 0;
+    r->m_z_adler32 = r->m_check_adler32 = 1;
+    if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER)
+    {
+        TINFL_GET_BYTE(1, r->m_zhdr0);
+        TINFL_GET_BYTE(2, r->m_zhdr1);
+        counter = (((r->m_zhdr0 * 256 + r->m_zhdr1) % 31 != 0) || (r->m_zhdr1 & 32) || ((r->m_zhdr0 & 15) != 8));
+        if (!(decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF))
+            // TODO: fix warning C4334: '<<': result of 32-bit shift implicitly converted to 64 bits (was 64-bit shift intended?)
+            counter |= (((1U << (8U + (r->m_zhdr0 >> 4))) > 32768U) || ((out_buf_size_mask + 1) < (size_t)(1U << (8U + (r->m_zhdr0 >> 4)))));
+        if (counter)
+        {
+            TINFL_CR_RETURN_FOREVER(36, TINFL_STATUS_FAILED);
+        }
+    }
+
+    do
+    {
+        TINFL_GET_BITS(3, r->m_final, 3);
+        r->m_type = r->m_final >> 1;
+        if (r->m_type == 0)
+        {
+            TINFL_SKIP_BITS(5, num_bits & 7);
+            for (counter = 0; counter < 4; ++counter)
+            {
+                if (num_bits)
+                    TINFL_GET_BITS(6, r->m_raw_header[counter], 8);
+                else
+                    TINFL_GET_BYTE(7, r->m_raw_header[counter]);
+            }
+            if ((counter = (r->m_raw_header[0] | (r->m_raw_header[1] << 8))) != (mz_uint)(0xFFFF ^ (r->m_raw_header[2] | (r->m_raw_header[3] << 8))))
+            {
+                TINFL_CR_RETURN_FOREVER(39, TINFL_STATUS_FAILED);
+            }
+            while ((counter) && (num_bits))
+            {
+                TINFL_GET_BITS(51, dist, 8);
+                while (pOut_buf_cur >= pOut_buf_end)
+                {
+                    TINFL_CR_RETURN(52, TINFL_STATUS_HAS_MORE_OUTPUT);
+                }
+                *pOut_buf_cur++ = (mz_uint8)dist;
+                counter--;
+            }
+            while (counter)
+            {
+                size_t n;
+                while (pOut_buf_cur >= pOut_buf_end)
+                {
+                    TINFL_CR_RETURN(9, TINFL_STATUS_HAS_MORE_OUTPUT);
+                }
+                while (pIn_buf_cur >= pIn_buf_end)
+                {
+                    TINFL_CR_RETURN(38, (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) ? TINFL_STATUS_NEEDS_MORE_INPUT : TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS);
+                }
+                n = MZ_MIN(MZ_MIN((size_t)(pOut_buf_end - pOut_buf_cur), (size_t)(pIn_buf_end - pIn_buf_cur)), counter);
+                TINFL_MEMCPY(pOut_buf_cur, pIn_buf_cur, n);
+                pIn_buf_cur += n;
+                pOut_buf_cur += n;
+                counter -= (mz_uint)n;
+            }
+        }
+        else if (r->m_type == 3)
+        {
+            TINFL_CR_RETURN_FOREVER(10, TINFL_STATUS_FAILED);
+        }
+        else
+        {
+            if (r->m_type == 1)
+            {
+                mz_uint8 *p = r->m_tables[0].m_code_size;
+                mz_uint i;
+                r->m_table_sizes[0] = 288;
+                r->m_table_sizes[1] = 32;
+                TINFL_MEMSET(r->m_tables[1].m_code_size, 5, 32);
+                for (i = 0; i <= 143; ++i)
+                    *p++ = 8;
+                for (; i <= 255; ++i)
+                    *p++ = 9;
+                for (; i <= 279; ++i)
+                    *p++ = 7;
+                for (; i <= 287; ++i)
+                    *p++ = 8;
+            }
+            else
+            {
+                for (counter = 0; counter < 3; counter++)
+                {
+                    TINFL_GET_BITS(11, r->m_table_sizes[counter], "\05\05\04"[counter]);
+                    r->m_table_sizes[counter] += s_min_table_sizes[counter];
+                }
+                MZ_CLEAR_OBJ(r->m_tables[2].m_code_size);
+                for (counter = 0; counter < r->m_table_sizes[2]; counter++)
+                {
+                    mz_uint s;
+                    TINFL_GET_BITS(14, s, 3);
+                    r->m_tables[2].m_code_size[s_length_dezigzag[counter]] = (mz_uint8)s;
+                }
+                r->m_table_sizes[2] = 19;
+            }
+            for (; (int)r->m_type >= 0; r->m_type--)
+            {
+                int tree_next, tree_cur;
+                tinfl_huff_table *pTable;
+                mz_uint i, j, used_syms, total, sym_index, next_code[17], total_syms[16];
+                pTable = &r->m_tables[r->m_type];
+                MZ_CLEAR_OBJ(total_syms);
+                MZ_CLEAR_OBJ(pTable->m_look_up);
+                MZ_CLEAR_OBJ(pTable->m_tree);
+                for (i = 0; i < r->m_table_sizes[r->m_type]; ++i)
+                    total_syms[pTable->m_code_size[i]]++;
+                used_syms = 0; total = 0;
+                next_code[0] = next_code[1] = 0;
+                for (i = 1; i <= 15; ++i)
+                {
+                    used_syms += total_syms[i];
+                    next_code[i + 1] = (total = ((total + total_syms[i]) << 1));
+                }
+                if ((65536 != total) && (used_syms > 1))
+                {
+                    TINFL_CR_RETURN_FOREVER(35, TINFL_STATUS_FAILED);
+                }
+                for (tree_next = -1, sym_index = 0; sym_index < r->m_table_sizes[r->m_type]; ++sym_index)
+                {
+                    mz_uint rev_code = 0, l, cur_code, code_size = pTable->m_code_size[sym_index];
+                    if (!code_size)
+                        continue;
+                    cur_code = next_code[code_size]++;
+                    for (l = code_size; l > 0; l--, cur_code >>= 1)
+                        rev_code = (rev_code << 1) | (cur_code & 1);
+                    if (code_size <= TINFL_FAST_LOOKUP_BITS)
+                    {
+                        mz_int16 k = (mz_int16)((code_size << 9) | sym_index);
+                        while (rev_code < TINFL_FAST_LOOKUP_SIZE)
+                        {
+                            pTable->m_look_up[rev_code] = k;
+                            rev_code += (1 << code_size);
+                        }
+                        continue;
+                    }
+                    if (0 == (tree_cur = pTable->m_look_up[rev_code & (TINFL_FAST_LOOKUP_SIZE - 1)]))
+                    {
+                        pTable->m_look_up[rev_code & (TINFL_FAST_LOOKUP_SIZE - 1)] = (mz_int16)tree_next;
+                        tree_cur = tree_next;
+                        tree_next -= 2;
+                    }
+                    rev_code >>= (TINFL_FAST_LOOKUP_BITS - 1);
+                    for (j = code_size; j > (TINFL_FAST_LOOKUP_BITS + 1); j--)
+                    {
+                        tree_cur -= ((rev_code >>= 1) & 1);
+                        if (!pTable->m_tree[-tree_cur - 1])
+                        {
+                            pTable->m_tree[-tree_cur - 1] = (mz_int16)tree_next;
+                            tree_cur = tree_next;
+                            tree_next -= 2;
+                        }
+                        else
+                            tree_cur = pTable->m_tree[-tree_cur - 1];
+                    }
+                    tree_cur -= ((rev_code >>= 1) & 1);
+                    pTable->m_tree[-tree_cur - 1] = (mz_int16)sym_index;
+                }
+                if (r->m_type == 2)
+                {
+                    for (counter = 0; counter < (r->m_table_sizes[0] + r->m_table_sizes[1]);)
+                    {
+                        mz_uint s;
+                        TINFL_HUFF_DECODE(16, dist, &r->m_tables[2]);
+                        if (dist < 16)
+                        {
+                            r->m_len_codes[counter++] = (mz_uint8)dist;
+                            continue;
+                        }
+                        if ((dist == 16) && (!counter))
+                        {
+                            TINFL_CR_RETURN_FOREVER(17, TINFL_STATUS_FAILED);
+                        }
+                        num_extra = "\02\03\07"[dist - 16];
+                        TINFL_GET_BITS(18, s, num_extra);
+                        s += "\03\03\013"[dist - 16];
+                        TINFL_MEMSET(r->m_len_codes + counter, (dist == 16) ? r->m_len_codes[counter - 1] : 0, s);
+                        counter += s;
+                    }
+                    if ((r->m_table_sizes[0] + r->m_table_sizes[1]) != counter)
+                    {
+                        TINFL_CR_RETURN_FOREVER(21, TINFL_STATUS_FAILED);
+                    }
+                    TINFL_MEMCPY(r->m_tables[0].m_code_size, r->m_len_codes, r->m_table_sizes[0]);
+                    TINFL_MEMCPY(r->m_tables[1].m_code_size, r->m_len_codes + r->m_table_sizes[0], r->m_table_sizes[1]);
+                }
+            }
+            for (;;)
+            {
+                mz_uint8 *pSrc;
+                for (;;)
+                {
+                    if (((pIn_buf_end - pIn_buf_cur) < 4) || ((pOut_buf_end - pOut_buf_cur) < 2))
+                    {
+                        TINFL_HUFF_DECODE(23, counter, &r->m_tables[0]);
+                        if (counter >= 256)
+                            break;
+                        while (pOut_buf_cur >= pOut_buf_end)
+                        {
+                            TINFL_CR_RETURN(24, TINFL_STATUS_HAS_MORE_OUTPUT);
+                        }
+                        *pOut_buf_cur++ = (mz_uint8)counter;
+                    }
+                    else
+                    {
+                        int sym2;
+                        mz_uint code_len;
+#if TINFL_USE_64BIT_BITBUF
+                        if (num_bits < 30)
+                        {
+                            bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE32(pIn_buf_cur)) << num_bits);
+                            pIn_buf_cur += 4;
+                            num_bits += 32;
+                        }
+#else
+                        if (num_bits < 15)
+                        {
+                            bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits);
+                            pIn_buf_cur += 2;
+                            num_bits += 16;
+                        }
+#endif
+                        if ((sym2 = r->m_tables[0].m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0)
+                            code_len = sym2 >> 9;
+                        else
+                        {
+                            code_len = TINFL_FAST_LOOKUP_BITS;
+                            do
+                            {
+                                sym2 = r->m_tables[0].m_tree[~sym2 + ((bit_buf >> code_len++) & 1)];
+                            } while (sym2 < 0);
+                        }
+                        counter = sym2;
+                        bit_buf >>= code_len;
+                        num_bits -= code_len;
+                        if (counter & 256)
+                            break;
+
+#if !TINFL_USE_64BIT_BITBUF
+                        if (num_bits < 15)
+                        {
+                            bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits);
+                            pIn_buf_cur += 2;
+                            num_bits += 16;
+                        }
+#endif
+                        if ((sym2 = r->m_tables[0].m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0)
+                            code_len = sym2 >> 9;
+                        else
+                        {
+                            code_len = TINFL_FAST_LOOKUP_BITS;
+                            do
+                            {
+                                sym2 = r->m_tables[0].m_tree[~sym2 + ((bit_buf >> code_len++) & 1)];
+                            } while (sym2 < 0);
+                        }
+                        bit_buf >>= code_len;
+                        num_bits -= code_len;
+
+                        pOut_buf_cur[0] = (mz_uint8)counter;
+                        if (sym2 & 256)
+                        {
+                            pOut_buf_cur++;
+                            counter = sym2;
+                            break;
+                        }
+                        pOut_buf_cur[1] = (mz_uint8)sym2;
+                        pOut_buf_cur += 2;
+                    }
+                }
+                if ((counter &= 511) == 256)
+                    break;
+
+                num_extra = s_length_extra[counter - 257];
+                counter = s_length_base[counter - 257];
+                if (num_extra)
+                {
+                    mz_uint extra_bits;
+                    TINFL_GET_BITS(25, extra_bits, num_extra);
+                    counter += extra_bits;
+                }
+
+                TINFL_HUFF_DECODE(26, dist, &r->m_tables[1]);
+                num_extra = s_dist_extra[dist];
+                dist = s_dist_base[dist];
+                if (num_extra)
+                {
+                    mz_uint extra_bits;
+                    TINFL_GET_BITS(27, extra_bits, num_extra);
+                    dist += extra_bits;
+                }
+
+                dist_from_out_buf_start = pOut_buf_cur - pOut_buf_start;
+                if ((dist > dist_from_out_buf_start) && (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF))
+                {
+                    TINFL_CR_RETURN_FOREVER(37, TINFL_STATUS_FAILED);
+                }
+
+                pSrc = pOut_buf_start + ((dist_from_out_buf_start - dist) & out_buf_size_mask);
+
+                if ((MZ_MAX(pOut_buf_cur, pSrc) + counter) > pOut_buf_end)
+                {
+                    while (counter--)
+                    {
+                        while (pOut_buf_cur >= pOut_buf_end)
+                        {
+                            TINFL_CR_RETURN(53, TINFL_STATUS_HAS_MORE_OUTPUT);
+                        }
+                        *pOut_buf_cur++ = pOut_buf_start[(dist_from_out_buf_start++ - dist) & out_buf_size_mask];
+                    }
+                    continue;
+                }
+#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES
+                else if ((counter >= 9) && (counter <= dist))
+                {
+                    const mz_uint8 *pSrc_end = pSrc + (counter & ~7);
+                    do
+                    {
+#ifdef MINIZ_UNALIGNED_USE_MEMCPY
+						memcpy(pOut_buf_cur, pSrc, sizeof(mz_uint32)*2);
+#else
+                        ((mz_uint32 *)pOut_buf_cur)[0] = ((const mz_uint32 *)pSrc)[0];
+                        ((mz_uint32 *)pOut_buf_cur)[1] = ((const mz_uint32 *)pSrc)[1];
+#endif
+                        pOut_buf_cur += 8;
+                    } while ((pSrc += 8) < pSrc_end);
+                    if ((counter &= 7) < 3)
+                    {
+                        if (counter)
+                        {
+                            pOut_buf_cur[0] = pSrc[0];
+                            if (counter > 1)
+                                pOut_buf_cur[1] = pSrc[1];
+                            pOut_buf_cur += counter;
+                        }
+                        continue;
+                    }
+                }
+#endif
+                while(counter>2)
+                {
+                    pOut_buf_cur[0] = pSrc[0];
+                    pOut_buf_cur[1] = pSrc[1];
+                    pOut_buf_cur[2] = pSrc[2];
+                    pOut_buf_cur += 3;
+                    pSrc += 3;
+					counter -= 3;
+                }
+                if (counter > 0)
+                {
+                    pOut_buf_cur[0] = pSrc[0];
+                    if (counter > 1)
+                        pOut_buf_cur[1] = pSrc[1];
+                    pOut_buf_cur += counter;
+                }
+            }
+        }
+    } while (!(r->m_final & 1));
+
+    /* Ensure byte alignment and put back any bytes from the bitbuf if we've looked ahead too far on gzip, or other Deflate streams followed by arbitrary data. */
+    /* I'm being super conservative here. A number of simplifications can be made to the byte alignment part, and the Adler32 check shouldn't ever need to worry about reading from the bitbuf now. */
+    TINFL_SKIP_BITS(32, num_bits & 7);
+    while ((pIn_buf_cur > pIn_buf_next) && (num_bits >= 8))
+    {
+        --pIn_buf_cur;
+        num_bits -= 8;
+    }
+    bit_buf &= (tinfl_bit_buf_t)((((mz_uint64)1) << num_bits) - (mz_uint64)1);
+    MZ_ASSERT(!num_bits); /* if this assert fires then we've read beyond the end of non-deflate/zlib streams with following data (such as gzip streams). */
+
+    if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER)
+    {
+        for (counter = 0; counter < 4; ++counter)
+        {
+            mz_uint s;
+            if (num_bits)
+                TINFL_GET_BITS(41, s, 8);
+            else
+                TINFL_GET_BYTE(42, s);
+            r->m_z_adler32 = (r->m_z_adler32 << 8) | s;
+        }
+    }
+    TINFL_CR_RETURN_FOREVER(34, TINFL_STATUS_DONE);
+
+    TINFL_CR_FINISH
+
+common_exit:
+    /* As long as we aren't telling the caller that we NEED more input to make forward progress: */
+    /* Put back any bytes from the bitbuf in case we've looked ahead too far on gzip, or other Deflate streams followed by arbitrary data. */
+    /* We need to be very careful here to NOT push back any bytes we definitely know we need to make forward progress, though, or we'll lock the caller up into an inf loop. */
+    if ((status != TINFL_STATUS_NEEDS_MORE_INPUT) && (status != TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS))
+    {
+        while ((pIn_buf_cur > pIn_buf_next) && (num_bits >= 8))
+        {
+            --pIn_buf_cur;
+            num_bits -= 8;
+        }
+    }
+    r->m_num_bits = num_bits;
+    r->m_bit_buf = bit_buf & (tinfl_bit_buf_t)((((mz_uint64)1) << num_bits) - (mz_uint64)1);
+    r->m_dist = dist;
+    r->m_counter = counter;
+    r->m_num_extra = num_extra;
+    r->m_dist_from_out_buf_start = dist_from_out_buf_start;
+    *pIn_buf_size = pIn_buf_cur - pIn_buf_next;
+    *pOut_buf_size = pOut_buf_cur - pOut_buf_next;
+    if ((decomp_flags & (TINFL_FLAG_PARSE_ZLIB_HEADER | TINFL_FLAG_COMPUTE_ADLER32)) && (status >= 0))
+    {
+        const mz_uint8 *ptr = pOut_buf_next;
+        size_t buf_len = *pOut_buf_size;
+        mz_uint32 i, s1 = r->m_check_adler32 & 0xffff, s2 = r->m_check_adler32 >> 16;
+        size_t block_len = buf_len % 5552;
+        while (buf_len)
+        {
+            for (i = 0; i + 7 < block_len; i += 8, ptr += 8)
+            {
+                s1 += ptr[0]; s2 += s1;
+                s1 += ptr[1]; s2 += s1;
+                s1 += ptr[2]; s2 += s1;
+                s1 += ptr[3]; s2 += s1;
+                s1 += ptr[4]; s2 += s1;
+                s1 += ptr[5]; s2 += s1;
+                s1 += ptr[6]; s2 += s1;
+                s1 += ptr[7]; s2 += s1;
+            }
+            for (; i < block_len; ++i) {
+                s1 += *ptr++; s2 += s1;
+            }
+            s1 %= 65521U; s2 %= 65521U;
+            buf_len -= block_len;
+            block_len = 5552;
+        }
+        r->m_check_adler32 = (s2 << 16) + s1;
+        if ((status == TINFL_STATUS_DONE) && (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) && (r->m_check_adler32 != r->m_z_adler32))
+            status = TINFL_STATUS_ADLER32_MISMATCH;
+    }
+    return status;
+}
+
+/* Higher level helper functions. */
+void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags)
+{
+    tinfl_decompressor decomp;
+    void *pBuf = NULL, *pNew_buf;
+    size_t src_buf_ofs = 0, out_buf_capacity = 0;
+    *pOut_len = 0;
+    tinfl_init(&decomp);
+    for (;;)
+    {
+        size_t src_buf_size = src_buf_len - src_buf_ofs, dst_buf_size = out_buf_capacity - *pOut_len, new_out_buf_capacity;
+        tinfl_status status = tinfl_decompress(&decomp, (const mz_uint8 *)pSrc_buf + src_buf_ofs, &src_buf_size, (mz_uint8 *)pBuf, pBuf ? (mz_uint8 *)pBuf + *pOut_len : NULL, &dst_buf_size,
+                                               (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF);
+        if ((status < 0) || (status == TINFL_STATUS_NEEDS_MORE_INPUT))
+        {
+            MZ_FREE(pBuf);
+            *pOut_len = 0;
+            return NULL;
+        }
+        src_buf_ofs += src_buf_size;
+        *pOut_len += dst_buf_size;
+        if (status == TINFL_STATUS_DONE)
+            break;
+        new_out_buf_capacity = out_buf_capacity * 2;
+        if (new_out_buf_capacity < 128)
+            new_out_buf_capacity = 128;
+        pNew_buf = MZ_REALLOC(pBuf, new_out_buf_capacity);
+        if (!pNew_buf)
+        {
+            MZ_FREE(pBuf);
+            *pOut_len = 0;
+            return NULL;
+        }
+        pBuf = pNew_buf;
+        out_buf_capacity = new_out_buf_capacity;
+    }
+    return pBuf;
+}
+
+size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags)
+{
+    tinfl_decompressor decomp;
+    tinfl_status status;
+    tinfl_init(&decomp);
+    status = tinfl_decompress(&decomp, (const mz_uint8 *)pSrc_buf, &src_buf_len, (mz_uint8 *)pOut_buf, (mz_uint8 *)pOut_buf, &out_buf_len, (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF);
+    return (status != TINFL_STATUS_DONE) ? TINFL_DECOMPRESS_MEM_TO_MEM_FAILED : out_buf_len;
+}
+
+int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags)
+{
+    int result = 0;
+    tinfl_decompressor decomp;
+    mz_uint8 *pDict = (mz_uint8 *)MZ_MALLOC(TINFL_LZ_DICT_SIZE);
+    size_t in_buf_ofs = 0, dict_ofs = 0;
+    if (!pDict)
+        return TINFL_STATUS_FAILED;
+    tinfl_init(&decomp);
+    for (;;)
+    {
+        size_t in_buf_size = *pIn_buf_size - in_buf_ofs, dst_buf_size = TINFL_LZ_DICT_SIZE - dict_ofs;
+        tinfl_status status = tinfl_decompress(&decomp, (const mz_uint8 *)pIn_buf + in_buf_ofs, &in_buf_size, pDict, pDict + dict_ofs, &dst_buf_size,
+                                               (flags & ~(TINFL_FLAG_HAS_MORE_INPUT | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)));
+        in_buf_ofs += in_buf_size;
+        if ((dst_buf_size) && (!(*pPut_buf_func)(pDict + dict_ofs, (int)dst_buf_size, pPut_buf_user)))
+            break;
+        if (status != TINFL_STATUS_HAS_MORE_OUTPUT)
+        {
+            result = (status == TINFL_STATUS_DONE);
+            break;
+        }
+        dict_ofs = (dict_ofs + dst_buf_size) & (TINFL_LZ_DICT_SIZE - 1);
+    }
+    MZ_FREE(pDict);
+    *pIn_buf_size = in_buf_ofs;
+    return result;
+}
+
+#ifndef MINIZ_NO_MALLOC
+tinfl_decompressor *tinfl_decompressor_alloc()
+{
+    tinfl_decompressor *pDecomp = (tinfl_decompressor *)MZ_MALLOC(sizeof(tinfl_decompressor));
+    if (pDecomp)
+        tinfl_init(pDecomp);
+    return pDecomp;
+}
+
+void tinfl_decompressor_free(tinfl_decompressor *pDecomp)
+{
+    MZ_FREE(pDecomp);
+}
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+/**************************************************************************
+ *
+ * Copyright 2013-2014 RAD Game Tools and Valve Software
+ * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC
+ * Copyright 2016 Martin Raiber
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef MINIZ_NO_ARCHIVE_APIS
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* ------------------- .ZIP archive reading */
+
+#ifdef MINIZ_NO_STDIO
+#define MZ_FILE void *
+#else
+#include <sys/stat.h>
+
+#if defined(_MSC_VER) || defined(__MINGW64__)
+static FILE *mz_fopen(const char *pFilename, const char *pMode)
+{
+    FILE *pFile = NULL;
+    fopen_s(&pFile, pFilename, pMode);
+    return pFile;
+}
+static FILE *mz_freopen(const char *pPath, const char *pMode, FILE *pStream)
+{
+    FILE *pFile = NULL;
+    if (freopen_s(&pFile, pPath, pMode, pStream))
+        return NULL;
+    return pFile;
+}
+#ifndef MINIZ_NO_TIME
+#include <sys/utime.h>
+#endif
+#define MZ_FOPEN mz_fopen
+#define MZ_FCLOSE fclose
+#define MZ_FREAD fread
+#define MZ_FWRITE fwrite
+#define MZ_FTELL64 _ftelli64
+#define MZ_FSEEK64 _fseeki64
+#define MZ_FILE_STAT_STRUCT _stat64
+#define MZ_FILE_STAT _stat64
+#define MZ_FFLUSH fflush
+#define MZ_FREOPEN mz_freopen
+#define MZ_DELETE_FILE remove
+#elif defined(__MINGW32__)
+#ifndef MINIZ_NO_TIME
+#include <sys/utime.h>
+#endif
+#define MZ_FOPEN(f, m) fopen(f, m)
+#define MZ_FCLOSE fclose
+#define MZ_FREAD fread
+#define MZ_FWRITE fwrite
+#define MZ_FTELL64 ftello64
+#define MZ_FSEEK64 fseeko64
+#define MZ_FILE_STAT_STRUCT _stat
+#define MZ_FILE_STAT _stat
+#define MZ_FFLUSH fflush
+#define MZ_FREOPEN(f, m, s) freopen(f, m, s)
+#define MZ_DELETE_FILE remove
+#elif defined(__TINYC__)
+#ifndef MINIZ_NO_TIME
+#include <sys/utime.h>
+#endif
+#define MZ_FOPEN(f, m) fopen(f, m)
+#define MZ_FCLOSE fclose
+#define MZ_FREAD fread
+#define MZ_FWRITE fwrite
+#define MZ_FTELL64 ftell
+#define MZ_FSEEK64 fseek
+#define MZ_FILE_STAT_STRUCT stat
+#define MZ_FILE_STAT stat
+#define MZ_FFLUSH fflush
+#define MZ_FREOPEN(f, m, s) freopen(f, m, s)
+#define MZ_DELETE_FILE remove
+#elif defined(__GNUC__) && defined(_LARGEFILE64_SOURCE)
+#ifndef MINIZ_NO_TIME
+#include <utime.h>
+#endif
+#define MZ_FOPEN(f, m) fopen64(f, m)
+#define MZ_FCLOSE fclose
+#define MZ_FREAD fread
+#define MZ_FWRITE fwrite
+#define MZ_FTELL64 ftello64
+#define MZ_FSEEK64 fseeko64
+#define MZ_FILE_STAT_STRUCT stat64
+#define MZ_FILE_STAT stat64
+#define MZ_FFLUSH fflush
+#define MZ_FREOPEN(p, m, s) freopen64(p, m, s)
+#define MZ_DELETE_FILE remove
+#elif defined(__APPLE__)
+#ifndef MINIZ_NO_TIME
+#include <utime.h>
+#endif
+#define MZ_FOPEN(f, m) fopen(f, m)
+#define MZ_FCLOSE fclose
+#define MZ_FREAD fread
+#define MZ_FWRITE fwrite
+#define MZ_FTELL64 ftello
+#define MZ_FSEEK64 fseeko
+#define MZ_FILE_STAT_STRUCT stat
+#define MZ_FILE_STAT stat
+#define MZ_FFLUSH fflush
+#define MZ_FREOPEN(p, m, s) freopen(p, m, s)
+#define MZ_DELETE_FILE remove
+
+#else
+#pragma message("Using fopen, ftello, fseeko, stat() etc. path for file I/O - this path may not support large files.")
+#ifndef MINIZ_NO_TIME
+#include <utime.h>
+#endif
+#define MZ_FOPEN(f, m) fopen(f, m)
+#define MZ_FCLOSE fclose
+#define MZ_FREAD fread
+#define MZ_FWRITE fwrite
+#ifdef __STRICT_ANSI__
+#define MZ_FTELL64 ftell
+#define MZ_FSEEK64 fseek
+#else
+#define MZ_FTELL64 ftello
+#define MZ_FSEEK64 fseeko
+#endif
+#define MZ_FILE_STAT_STRUCT stat
+#define MZ_FILE_STAT stat
+#define MZ_FFLUSH fflush
+#define MZ_FREOPEN(f, m, s) freopen(f, m, s)
+#define MZ_DELETE_FILE remove
+#endif /* #ifdef _MSC_VER */
+#endif /* #ifdef MINIZ_NO_STDIO */
+
+#define MZ_TOLOWER(c) ((((c) >= 'A') && ((c) <= 'Z')) ? ((c) - 'A' + 'a') : (c))
+
+/* Various ZIP archive enums. To completely avoid cross platform compiler alignment and platform endian issues, miniz.c doesn't use structs for any of this stuff. */
+enum
+{
+    /* ZIP archive identifiers and record sizes */
+    MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG = 0x06054b50,
+    MZ_ZIP_CENTRAL_DIR_HEADER_SIG = 0x02014b50,
+    MZ_ZIP_LOCAL_DIR_HEADER_SIG = 0x04034b50,
+    MZ_ZIP_LOCAL_DIR_HEADER_SIZE = 30,
+    MZ_ZIP_CENTRAL_DIR_HEADER_SIZE = 46,
+    MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE = 22,
+
+    /* ZIP64 archive identifier and record sizes */
+    MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIG = 0x06064b50,
+    MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIG = 0x07064b50,
+    MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE = 56,
+    MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE = 20,
+    MZ_ZIP64_EXTENDED_INFORMATION_FIELD_HEADER_ID = 0x0001,
+    MZ_ZIP_DATA_DESCRIPTOR_ID = 0x08074b50,
+    MZ_ZIP_DATA_DESCRIPTER_SIZE64 = 24,
+    MZ_ZIP_DATA_DESCRIPTER_SIZE32 = 16,
+
+    /* Central directory header record offsets */
+    MZ_ZIP_CDH_SIG_OFS = 0,
+    MZ_ZIP_CDH_VERSION_MADE_BY_OFS = 4,
+    MZ_ZIP_CDH_VERSION_NEEDED_OFS = 6,
+    MZ_ZIP_CDH_BIT_FLAG_OFS = 8,
+    MZ_ZIP_CDH_METHOD_OFS = 10,
+    MZ_ZIP_CDH_FILE_TIME_OFS = 12,
+    MZ_ZIP_CDH_FILE_DATE_OFS = 14,
+    MZ_ZIP_CDH_CRC32_OFS = 16,
+    MZ_ZIP_CDH_COMPRESSED_SIZE_OFS = 20,
+    MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS = 24,
+    MZ_ZIP_CDH_FILENAME_LEN_OFS = 28,
+    MZ_ZIP_CDH_EXTRA_LEN_OFS = 30,
+    MZ_ZIP_CDH_COMMENT_LEN_OFS = 32,
+    MZ_ZIP_CDH_DISK_START_OFS = 34,
+    MZ_ZIP_CDH_INTERNAL_ATTR_OFS = 36,
+    MZ_ZIP_CDH_EXTERNAL_ATTR_OFS = 38,
+    MZ_ZIP_CDH_LOCAL_HEADER_OFS = 42,
+
+    /* Local directory header offsets */
+    MZ_ZIP_LDH_SIG_OFS = 0,
+    MZ_ZIP_LDH_VERSION_NEEDED_OFS = 4,
+    MZ_ZIP_LDH_BIT_FLAG_OFS = 6,
+    MZ_ZIP_LDH_METHOD_OFS = 8,
+    MZ_ZIP_LDH_FILE_TIME_OFS = 10,
+    MZ_ZIP_LDH_FILE_DATE_OFS = 12,
+    MZ_ZIP_LDH_CRC32_OFS = 14,
+    MZ_ZIP_LDH_COMPRESSED_SIZE_OFS = 18,
+    MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS = 22,
+    MZ_ZIP_LDH_FILENAME_LEN_OFS = 26,
+    MZ_ZIP_LDH_EXTRA_LEN_OFS = 28,
+    MZ_ZIP_LDH_BIT_FLAG_HAS_LOCATOR = 1 << 3,
+
+    /* End of central directory offsets */
+    MZ_ZIP_ECDH_SIG_OFS = 0,
+    MZ_ZIP_ECDH_NUM_THIS_DISK_OFS = 4,
+    MZ_ZIP_ECDH_NUM_DISK_CDIR_OFS = 6,
+    MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS = 8,
+    MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS = 10,
+    MZ_ZIP_ECDH_CDIR_SIZE_OFS = 12,
+    MZ_ZIP_ECDH_CDIR_OFS_OFS = 16,
+    MZ_ZIP_ECDH_COMMENT_SIZE_OFS = 20,
+
+    /* ZIP64 End of central directory locator offsets */
+    MZ_ZIP64_ECDL_SIG_OFS = 0,                    /* 4 bytes */
+    MZ_ZIP64_ECDL_NUM_DISK_CDIR_OFS = 4,          /* 4 bytes */
+    MZ_ZIP64_ECDL_REL_OFS_TO_ZIP64_ECDR_OFS = 8,  /* 8 bytes */
+    MZ_ZIP64_ECDL_TOTAL_NUMBER_OF_DISKS_OFS = 16, /* 4 bytes */
+
+    /* ZIP64 End of central directory header offsets */
+    MZ_ZIP64_ECDH_SIG_OFS = 0,                       /* 4 bytes */
+    MZ_ZIP64_ECDH_SIZE_OF_RECORD_OFS = 4,            /* 8 bytes */
+    MZ_ZIP64_ECDH_VERSION_MADE_BY_OFS = 12,          /* 2 bytes */
+    MZ_ZIP64_ECDH_VERSION_NEEDED_OFS = 14,           /* 2 bytes */
+    MZ_ZIP64_ECDH_NUM_THIS_DISK_OFS = 16,            /* 4 bytes */
+    MZ_ZIP64_ECDH_NUM_DISK_CDIR_OFS = 20,            /* 4 bytes */
+    MZ_ZIP64_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS = 24, /* 8 bytes */
+    MZ_ZIP64_ECDH_CDIR_TOTAL_ENTRIES_OFS = 32,       /* 8 bytes */
+    MZ_ZIP64_ECDH_CDIR_SIZE_OFS = 40,                /* 8 bytes */
+    MZ_ZIP64_ECDH_CDIR_OFS_OFS = 48,                 /* 8 bytes */
+    MZ_ZIP_VERSION_MADE_BY_DOS_FILESYSTEM_ID = 0,
+    MZ_ZIP_DOS_DIR_ATTRIBUTE_BITFLAG = 0x10,
+    MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_IS_ENCRYPTED = 1,
+    MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_COMPRESSED_PATCH_FLAG = 32,
+    MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_USES_STRONG_ENCRYPTION = 64,
+    MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_LOCAL_DIR_IS_MASKED = 8192,
+    MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_UTF8 = 1 << 11
+};
+
+typedef struct
+{
+    void *m_p;
+    size_t m_size, m_capacity;
+    mz_uint m_element_size;
+} mz_zip_array;
+
+struct mz_zip_internal_state_tag
+{
+    mz_zip_array m_central_dir;
+    mz_zip_array m_central_dir_offsets;
+    mz_zip_array m_sorted_central_dir_offsets;
+
+    /* The flags passed in when the archive is initially opened. */
+    uint32_t m_init_flags;
+
+    /* MZ_TRUE if the archive has a zip64 end of central directory headers, etc. */
+    mz_bool m_zip64;
+
+    /* MZ_TRUE if we found zip64 extended info in the central directory (m_zip64 will also be slammed to true too, even if we didn't find a zip64 end of central dir header, etc.) */
+    mz_bool m_zip64_has_extended_info_fields;
+
+    /* These fields are used by the file, FILE, memory, and memory/heap read/write helpers. */
+    MZ_FILE *m_pFile;
+    mz_uint64 m_file_archive_start_ofs;
+
+    void *m_pMem;
+    size_t m_mem_size;
+    size_t m_mem_capacity;
+};
+
+#define MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(array_ptr, element_size) (array_ptr)->m_element_size = element_size
+
+#if defined(DEBUG) || defined(_DEBUG) || defined(NDEBUG)
+static MZ_FORCEINLINE mz_uint mz_zip_array_range_check(const mz_zip_array *pArray, mz_uint index)
+{
+    (void)pArray;
+    MZ_ASSERT(index < pArray->m_size);
+    return index;
+}
+#define MZ_ZIP_ARRAY_ELEMENT(array_ptr, element_type, index) ((element_type *)((array_ptr)->m_p))[mz_zip_array_range_check(array_ptr, index)]
+#else
+#define MZ_ZIP_ARRAY_ELEMENT(array_ptr, element_type, index) ((element_type *)((array_ptr)->m_p))[index]
+#endif
+
+static MZ_FORCEINLINE void mz_zip_array_init(mz_zip_array *pArray, mz_uint32 element_size)
+{
+    memset(pArray, 0, sizeof(mz_zip_array));
+    pArray->m_element_size = element_size;
+}
+
+static MZ_FORCEINLINE void mz_zip_array_clear(mz_zip_archive *pZip, mz_zip_array *pArray)
+{
+    pZip->m_pFree(pZip->m_pAlloc_opaque, pArray->m_p);
+    memset(pArray, 0, sizeof(mz_zip_array));
+}
+
+static mz_bool mz_zip_array_ensure_capacity(mz_zip_archive *pZip, mz_zip_array *pArray, size_t min_new_capacity, mz_uint growing)
+{
+    void *pNew_p;
+    size_t new_capacity = min_new_capacity;
+    MZ_ASSERT(pArray->m_element_size);
+    if (pArray->m_capacity >= min_new_capacity)
+        return MZ_TRUE;
+    if (growing)
+    {
+        new_capacity = MZ_MAX(1, pArray->m_capacity);
+        while (new_capacity < min_new_capacity)
+            new_capacity *= 2;
+    }
+    if (NULL == (pNew_p = pZip->m_pRealloc(pZip->m_pAlloc_opaque, pArray->m_p, pArray->m_element_size, new_capacity)))
+        return MZ_FALSE;
+    pArray->m_p = pNew_p;
+    pArray->m_capacity = new_capacity;
+    return MZ_TRUE;
+}
+
+#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS
+static MZ_FORCEINLINE mz_bool mz_zip_array_reserve(mz_zip_archive *pZip, mz_zip_array *pArray, size_t new_capacity, mz_uint growing)
+{
+    if (new_capacity > pArray->m_capacity)
+    {
+        if (!mz_zip_array_ensure_capacity(pZip, pArray, new_capacity, growing))
+            return MZ_FALSE;
+    }
+    return MZ_TRUE;
+}
+#endif
+
+static MZ_FORCEINLINE mz_bool mz_zip_array_resize(mz_zip_archive *pZip, mz_zip_array *pArray, size_t new_size, mz_uint growing)
+{
+    if (new_size > pArray->m_capacity)
+    {
+        if (!mz_zip_array_ensure_capacity(pZip, pArray, new_size, growing))
+            return MZ_FALSE;
+    }
+    pArray->m_size = new_size;
+    return MZ_TRUE;
+}
+
+#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS
+static MZ_FORCEINLINE mz_bool mz_zip_array_ensure_room(mz_zip_archive *pZip, mz_zip_array *pArray, size_t n)
+{
+    return mz_zip_array_reserve(pZip, pArray, pArray->m_size + n, MZ_TRUE);
+}
+
+static MZ_FORCEINLINE mz_bool mz_zip_array_push_back(mz_zip_archive *pZip, mz_zip_array *pArray, const void *pElements, size_t n)
+{
+    size_t orig_size = pArray->m_size;
+    if (!mz_zip_array_resize(pZip, pArray, orig_size + n, MZ_TRUE))
+        return MZ_FALSE;
+    if (n > 0)
+        memcpy((mz_uint8 *)pArray->m_p + orig_size * pArray->m_element_size, pElements, n * pArray->m_element_size);
+    return MZ_TRUE;
+}
+#endif
+
+#ifndef MINIZ_NO_TIME
+static MZ_TIME_T mz_zip_dos_to_time_t(int dos_time, int dos_date)
+{
+    struct tm tm;
+    memset(&tm, 0, sizeof(tm));
+    tm.tm_isdst = -1;
+    tm.tm_year = ((dos_date >> 9) & 127) + 1980 - 1900;
+    tm.tm_mon = ((dos_date >> 5) & 15) - 1;
+    tm.tm_mday = dos_date & 31;
+    tm.tm_hour = (dos_time >> 11) & 31;
+    tm.tm_min = (dos_time >> 5) & 63;
+    tm.tm_sec = (dos_time << 1) & 62;
+    return mktime(&tm);
+}
+
+#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS
+static void mz_zip_time_t_to_dos_time(MZ_TIME_T time, mz_uint16 *pDOS_time, mz_uint16 *pDOS_date)
+{
+#ifdef _MSC_VER
+    struct tm tm_struct;
+    struct tm *tm = &tm_struct;
+    errno_t err = localtime_s(tm, &time);
+    if (err)
+    {
+        *pDOS_date = 0;
+        *pDOS_time = 0;
+        return;
+    }
+#else
+    struct tm *tm = localtime(&time);
+#endif /* #ifdef _MSC_VER */
+
+    *pDOS_time = (mz_uint16)(((tm->tm_hour) << 11) + ((tm->tm_min) << 5) + ((tm->tm_sec) >> 1));
+    *pDOS_date = (mz_uint16)(((tm->tm_year + 1900 - 1980) << 9) + ((tm->tm_mon + 1) << 5) + tm->tm_mday);
+}
+#endif /* MINIZ_NO_ARCHIVE_WRITING_APIS */
+
+#ifndef MINIZ_NO_STDIO
+#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS
+static mz_bool mz_zip_get_file_modified_time(const char *pFilename, MZ_TIME_T *pTime)
+{
+    struct MZ_FILE_STAT_STRUCT file_stat;
+
+    /* On Linux with x86 glibc, this call will fail on large files (I think >= 0x80000000 bytes) unless you compiled with _LARGEFILE64_SOURCE. Argh. */
+    if (MZ_FILE_STAT(pFilename, &file_stat) != 0)
+        return MZ_FALSE;
+
+    *pTime = file_stat.st_mtime;
+
+    return MZ_TRUE;
+}
+#endif /* #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS*/
+
+static mz_bool mz_zip_set_file_times(const char *pFilename, MZ_TIME_T access_time, MZ_TIME_T modified_time)
+{
+    struct utimbuf t;
+
+    memset(&t, 0, sizeof(t));
+    t.actime = access_time;
+    t.modtime = modified_time;
+
+    return !utime(pFilename, &t);
+}
+#endif /* #ifndef MINIZ_NO_STDIO */
+#endif /* #ifndef MINIZ_NO_TIME */
+
+static MZ_FORCEINLINE mz_bool mz_zip_set_error(mz_zip_archive *pZip, mz_zip_error err_num)
+{
+    if (pZip)
+        pZip->m_last_error = err_num;
+    return MZ_FALSE;
+}
+
+static mz_bool mz_zip_reader_init_internal(mz_zip_archive *pZip, mz_uint flags)
+{
+    (void)flags;
+    if ((!pZip) || (pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_INVALID))
+        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
+
+    if (!pZip->m_pAlloc)
+        pZip->m_pAlloc = miniz_def_alloc_func;
+    if (!pZip->m_pFree)
+        pZip->m_pFree = miniz_def_free_func;
+    if (!pZip->m_pRealloc)
+        pZip->m_pRealloc = miniz_def_realloc_func;
+
+    pZip->m_archive_size = 0;
+    pZip->m_central_directory_file_ofs = 0;
+    pZip->m_total_files = 0;
+    pZip->m_last_error = MZ_ZIP_NO_ERROR;
+
+    if (NULL == (pZip->m_pState = (mz_zip_internal_state *)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(mz_zip_internal_state))))
+        return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
+
+    memset(pZip->m_pState, 0, sizeof(mz_zip_internal_state));
+    MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir, sizeof(mz_uint8));
+    MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir_offsets, sizeof(mz_uint32));
+    MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_sorted_central_dir_offsets, sizeof(mz_uint32));
+    pZip->m_pState->m_init_flags = flags;
+    pZip->m_pState->m_zip64 = MZ_FALSE;
+    pZip->m_pState->m_zip64_has_extended_info_fields = MZ_FALSE;
+
+    pZip->m_zip_mode = MZ_ZIP_MODE_READING;
+
+    return MZ_TRUE;
+}
+
+const mz_uint32* mz_zip_reader_sorted_file_indices(mz_zip_archive *pZip)
+{
+    // these aren't offsets, it's a sorted array of the file index elements
+    return (const mz_uint32*)(pZip->m_pState->m_sorted_central_dir_offsets.m_p);
+}
+
+
+static MZ_FORCEINLINE mz_bool mz_zip_reader_filename_less(const mz_zip_array *pCentral_dir_array, const mz_zip_array *pCentral_dir_offsets, mz_uint l_index, mz_uint r_index)
+{
+    const mz_uint8 *pL = &MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_array, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, l_index)), *pE;
+    const mz_uint8 *pR = &MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_array, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, r_index));
+    mz_uint l_len = MZ_READ_LE16(pL + MZ_ZIP_CDH_FILENAME_LEN_OFS), r_len = MZ_READ_LE16(pR + MZ_ZIP_CDH_FILENAME_LEN_OFS);
+    mz_uint8 l = 0, r = 0;
+    pL += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE;
+    pR += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE;
+    pE = pL + MZ_MIN(l_len, r_len);
+    while (pL < pE)
+    {
+        if ((l = MZ_TOLOWER(*pL)) != (r = MZ_TOLOWER(*pR)))
+            break;
+        pL++;
+        pR++;
+    }
+    return (pL == pE) ? (l_len < r_len) : (l < r);
+}
+
+#define MZ_SWAP_UINT32(a, b) \
+    do                       \
+    {                        \
+        mz_uint32 t = a;     \
+        a = b;               \
+        b = t;               \
+    }                        \
+    MZ_MACRO_END
+
+/* Heap sort of lowercased filenames, used to help accelerate plain central directory searches by mz_zip_reader_locate_file(). (Could also use qsort(), but it could allocate memory.) */
+static void mz_zip_reader_sort_central_dir_offsets_by_filename(mz_zip_archive *pZip)
+{
+    mz_zip_internal_state *pState = pZip->m_pState;
+    const mz_zip_array *pCentral_dir_offsets = &pState->m_central_dir_offsets;
+    const mz_zip_array *pCentral_dir = &pState->m_central_dir;
+    mz_uint32 *pIndices;
+    mz_uint32 start, end;
+    const mz_uint32 size = pZip->m_total_files;
+
+    if (size <= 1U)
+        return;
+
+    pIndices = &MZ_ZIP_ARRAY_ELEMENT(&pState->m_sorted_central_dir_offsets, mz_uint32, 0);
+
+    start = (size - 2U) >> 1U;
+    for (;;)
+    {
+        mz_uint64 child, root = start;
+        for (;;)
+        {
+            if ((child = (root << 1U) + 1U) >= size)
+                break;
+            child += (((child + 1U) < size) && (mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[child], pIndices[child + 1U])));
+            if (!mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[root], pIndices[child]))
+                break;
+            MZ_SWAP_UINT32(pIndices[root], pIndices[child]);
+            root = child;
+        }
+        if (!start)
+            break;
+        start--;
+    }
+
+    end = size - 1;
+    while (end > 0)
+    {
+        mz_uint64 child, root = 0;
+        MZ_SWAP_UINT32(pIndices[end], pIndices[0]);
+        for (;;)
+        {
+            if ((child = (root << 1U) + 1U) >= end)
+                break;
+            child += (((child + 1U) < end) && mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[child], pIndices[child + 1U]));
+            if (!mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[root], pIndices[child]))
+                break;
+            MZ_SWAP_UINT32(pIndices[root], pIndices[child]);
+            root = child;
+        }
+        end--;
+    }
+}
+
+static mz_bool mz_zip_reader_locate_header_sig(mz_zip_archive *pZip, mz_uint32 record_sig, mz_uint32 record_size, mz_int64 *pOfs)
+{
+    mz_int64 cur_file_ofs;
+    mz_uint32 buf_u32[4096 / sizeof(mz_uint32)];
+    mz_uint8 *pBuf = (mz_uint8 *)buf_u32;
+
+    /* Basic sanity checks - reject files which are too small */
+    if (pZip->m_archive_size < record_size)
+        return MZ_FALSE;
+
+    /* Find the record by scanning the file from the end towards the beginning. */
+    cur_file_ofs = MZ_MAX((mz_int64)pZip->m_archive_size - (mz_int64)sizeof(buf_u32), 0);
+    for (;;)
+    {
+        int i, n = (int)MZ_MIN(sizeof(buf_u32), pZip->m_archive_size - cur_file_ofs);
+
+        if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, n) != (mz_uint)n)
+            return MZ_FALSE;
+
+        for (i = n - 4; i >= 0; --i)
+        {
+            mz_uint s = MZ_READ_LE32(pBuf + i);
+            if (s == record_sig)
+            {
+                if ((pZip->m_archive_size - (cur_file_ofs + i)) >= record_size)
+                    break;
+            }
+        }
+
+        if (i >= 0)
+        {
+            cur_file_ofs += i;
+            break;
+        }
+
+        /* Give up if we've searched the entire file, or we've gone back "too far" (~64kb) */
+        if ((!cur_file_ofs) || ((pZip->m_archive_size - cur_file_ofs) >= (MZ_UINT16_MAX + record_size)))
+            return MZ_FALSE;
+
+        cur_file_ofs = MZ_MAX(cur_file_ofs - (sizeof(buf_u32) - 3), 0);
+    }
+
+    *pOfs = cur_file_ofs;
+    return MZ_TRUE;
+}
+
+static mz_bool mz_zip_reader_read_central_dir(mz_zip_archive *pZip, mz_uint flags)
+{
+    mz_uint cdir_size = 0, cdir_entries_on_this_disk = 0, num_this_disk = 0, cdir_disk_index = 0;
+    mz_uint64 cdir_ofs = 0;
+    mz_int64 cur_file_ofs = 0;
+    const mz_uint8 *p;
+
+    mz_uint32 buf_u32[4096 / sizeof(mz_uint32)];
+    mz_uint8 *pBuf = (mz_uint8 *)buf_u32;
+    mz_bool sort_central_dir = ((flags & MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY) == 0);
+    mz_uint32 zip64_end_of_central_dir_locator_u32[(MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)];
+    mz_uint8 *pZip64_locator = (mz_uint8 *)zip64_end_of_central_dir_locator_u32;
+
+    mz_uint32 zip64_end_of_central_dir_header_u32[(MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)];
+    mz_uint8 *pZip64_end_of_central_dir = (mz_uint8 *)zip64_end_of_central_dir_header_u32;
+
+    mz_uint64 zip64_end_of_central_dir_ofs = 0;
+
+    /* Basic sanity checks - reject files which are too small, and check the first 4 bytes of the file to make sure a local header is there. */
+    if (pZip->m_archive_size < MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE)
+        return mz_zip_set_error(pZip, MZ_ZIP_NOT_AN_ARCHIVE);
+
+    if (!mz_zip_reader_locate_header_sig(pZip, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE, &cur_file_ofs))
+        return mz_zip_set_error(pZip, MZ_ZIP_FAILED_FINDING_CENTRAL_DIR);
+
+    /* Read and verify the end of central directory record. */
+    if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) != MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE)
+        return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED);
+
+    if (MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_SIG_OFS) != MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG)
+        return mz_zip_set_error(pZip, MZ_ZIP_NOT_AN_ARCHIVE);
+
+    if (cur_file_ofs >= (MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE + MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE))
+    {
+        if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs - MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE, pZip64_locator, MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE) == MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE)
+        {
+            if (MZ_READ_LE32(pZip64_locator + MZ_ZIP64_ECDL_SIG_OFS) == MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIG)
+            {
+                zip64_end_of_central_dir_ofs = MZ_READ_LE64(pZip64_locator + MZ_ZIP64_ECDL_REL_OFS_TO_ZIP64_ECDR_OFS);
+                if (zip64_end_of_central_dir_ofs > (pZip->m_archive_size - MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE))
+                    return mz_zip_set_error(pZip, MZ_ZIP_NOT_AN_ARCHIVE);
+
+                if (pZip->m_pRead(pZip->m_pIO_opaque, zip64_end_of_central_dir_ofs, pZip64_end_of_central_dir, MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE) == MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE)
+                {
+                    if (MZ_READ_LE32(pZip64_end_of_central_dir + MZ_ZIP64_ECDH_SIG_OFS) == MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIG)
+                    {
+                        pZip->m_pState->m_zip64 = MZ_TRUE;
+                    }
+                }
+            }
+        }
+    }
+
+    pZip->m_total_files = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS);
+    cdir_entries_on_this_disk = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS);
+    num_this_disk = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_NUM_THIS_DISK_OFS);
+    cdir_disk_index = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_NUM_DISK_CDIR_OFS);
+    cdir_size = MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_CDIR_SIZE_OFS);
+    cdir_ofs = MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_CDIR_OFS_OFS);
+
+    if (pZip->m_pState->m_zip64)
+    {
+        mz_uint32 zip64_total_num_of_disks = MZ_READ_LE32(pZip64_locator + MZ_ZIP64_ECDL_TOTAL_NUMBER_OF_DISKS_OFS);
+        mz_uint64 zip64_cdir_total_entries = MZ_READ_LE64(pZip64_end_of_central_dir + MZ_ZIP64_ECDH_CDIR_TOTAL_ENTRIES_OFS);
+        mz_uint64 zip64_cdir_total_entries_on_this_disk = MZ_READ_LE64(pZip64_end_of_central_dir + MZ_ZIP64_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS);
+        mz_uint64 zip64_size_of_end_of_central_dir_record = MZ_READ_LE64(pZip64_end_of_central_dir + MZ_ZIP64_ECDH_SIZE_OF_RECORD_OFS);
+        mz_uint64 zip64_size_of_central_directory = MZ_READ_LE64(pZip64_end_of_central_dir + MZ_ZIP64_ECDH_CDIR_SIZE_OFS);
+
+        if (zip64_size_of_end_of_central_dir_record < (MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE - 12))
+            return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
+
+        if (zip64_total_num_of_disks != 1U)
+            return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_MULTIDISK);
+
+        /* Check for miniz's practical limits */
+        if (zip64_cdir_total_entries > MZ_UINT32_MAX)
+            return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES);
+
+        pZip->m_total_files = (mz_uint32)zip64_cdir_total_entries;
+
+        if (zip64_cdir_total_entries_on_this_disk > MZ_UINT32_MAX)
+            return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES);
+
+        cdir_entries_on_this_disk = (mz_uint32)zip64_cdir_total_entries_on_this_disk;
+
+        /* Check for miniz's current practical limits (sorry, this should be enough for millions of files) */
+        if (zip64_size_of_central_directory > MZ_UINT32_MAX)
+            return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_CDIR_SIZE);
+
+        cdir_size = (mz_uint32)zip64_size_of_central_directory;
+
+        num_this_disk = MZ_READ_LE32(pZip64_end_of_central_dir + MZ_ZIP64_ECDH_NUM_THIS_DISK_OFS);
+
+        cdir_disk_index = MZ_READ_LE32(pZip64_end_of_central_dir + MZ_ZIP64_ECDH_NUM_DISK_CDIR_OFS);
+
+        cdir_ofs = MZ_READ_LE64(pZip64_end_of_central_dir + MZ_ZIP64_ECDH_CDIR_OFS_OFS);
+    }
+
+    if (pZip->m_total_files != cdir_entries_on_this_disk)
+        return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_MULTIDISK);
+
+    if (((num_this_disk | cdir_disk_index) != 0) && ((num_this_disk != 1) || (cdir_disk_index != 1)))
+        return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_MULTIDISK);
+
+    if (cdir_size < pZip->m_total_files * MZ_ZIP_CENTRAL_DIR_HEADER_SIZE)
+        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
+
+    if ((cdir_ofs + (mz_uint64)cdir_size) > pZip->m_archive_size)
+        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
+
+    pZip->m_central_directory_file_ofs = cdir_ofs;
+
+    if (pZip->m_total_files)
+    {
+        mz_uint i, n;
+        /* Read the entire central directory into a heap block, and allocate another heap block to hold the unsorted central dir file record offsets, and possibly another to hold the sorted indices. */
+        if ((!mz_zip_array_resize(pZip, &pZip->m_pState->m_central_dir, cdir_size, MZ_FALSE)) ||
+            (!mz_zip_array_resize(pZip, &pZip->m_pState->m_central_dir_offsets, pZip->m_total_files, MZ_FALSE)))
+            return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
+
+        if (sort_central_dir)
+        {
+            if (!mz_zip_array_resize(pZip, &pZip->m_pState->m_sorted_central_dir_offsets, pZip->m_total_files, MZ_FALSE))
+                return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
+        }
+
+        if (pZip->m_pRead(pZip->m_pIO_opaque, cdir_ofs, pZip->m_pState->m_central_dir.m_p, cdir_size) != cdir_size)
+            return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED);
+
+        /* Now create an index into the central directory file records, do some basic sanity checking on each record */
+        p = (const mz_uint8 *)pZip->m_pState->m_central_dir.m_p;
+        for (n = cdir_size, i = 0; i < pZip->m_total_files; ++i)
+        {
+            mz_uint total_header_size, disk_index, bit_flags, filename_size, ext_data_size;
+            mz_uint64 comp_size, decomp_size, local_header_ofs;
+
+            if ((n < MZ_ZIP_CENTRAL_DIR_HEADER_SIZE) || (MZ_READ_LE32(p) != MZ_ZIP_CENTRAL_DIR_HEADER_SIG))
+                return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
+
+            MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, i) = (mz_uint32)(p - (const mz_uint8 *)pZip->m_pState->m_central_dir.m_p);
+
+            if (sort_central_dir)
+                MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_sorted_central_dir_offsets, mz_uint32, i) = i;
+
+            comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS);
+            decomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS);
+            local_header_ofs = MZ_READ_LE32(p + MZ_ZIP_CDH_LOCAL_HEADER_OFS);
+            filename_size = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS);
+            ext_data_size = MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS);
+
+            if ((!pZip->m_pState->m_zip64_has_extended_info_fields) &&
+                (ext_data_size) &&
+                (MZ_MAX(MZ_MAX(comp_size, decomp_size), local_header_ofs) == MZ_UINT32_MAX))
+            {
+                /* Attempt to find zip64 extended information field in the entry's extra data */
+                mz_uint32 extra_size_remaining = ext_data_size;
+
+                if (extra_size_remaining)
+                {
+					const mz_uint8 *pExtra_data;
+					void* buf = NULL;
+
+					if (MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_size + ext_data_size > n)
+					{
+						buf = MZ_MALLOC(ext_data_size);
+						if(buf==NULL)
+							return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
+
+						if (pZip->m_pRead(pZip->m_pIO_opaque, cdir_ofs + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_size, buf, ext_data_size) != ext_data_size)
+						{
+							MZ_FREE(buf);
+							return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED);
+						}
+
+						pExtra_data = (mz_uint8*)buf;
+					}
+					else
+					{
+						pExtra_data = p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_size;
+					}
+
+                    do
+                    {
+                        mz_uint32 field_id;
+                        mz_uint32 field_data_size;
+
+						if (extra_size_remaining < (sizeof(mz_uint16) * 2))
+						{
+							MZ_FREE(buf);
+							return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
+						}
+
+                        field_id = MZ_READ_LE16(pExtra_data);
+                        field_data_size = MZ_READ_LE16(pExtra_data + sizeof(mz_uint16));
+
+						if ((field_data_size + sizeof(mz_uint16) * 2) > extra_size_remaining)
+						{
+							MZ_FREE(buf);
+							return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
+						}
+
+                        if (field_id == MZ_ZIP64_EXTENDED_INFORMATION_FIELD_HEADER_ID)
+                        {
+                            /* Ok, the archive didn't have any zip64 headers but it uses a zip64 extended information field so mark it as zip64 anyway (this can occur with infozip's zip util when it reads compresses files from stdin). */
+                            pZip->m_pState->m_zip64 = MZ_TRUE;
+                            pZip->m_pState->m_zip64_has_extended_info_fields = MZ_TRUE;
+                            break;
+                        }
+
+                        pExtra_data += sizeof(mz_uint16) * 2 + field_data_size;
+                        extra_size_remaining = extra_size_remaining - sizeof(mz_uint16) * 2 - field_data_size;
+                    } while (extra_size_remaining);
+
+					MZ_FREE(buf);
+                }
+            }
+
+            /* I've seen archives that aren't marked as zip64 that uses zip64 ext data, argh */
+            if ((comp_size != MZ_UINT32_MAX) && (decomp_size != MZ_UINT32_MAX))
+            {
+                if (((!MZ_READ_LE32(p + MZ_ZIP_CDH_METHOD_OFS)) && (decomp_size != comp_size)) || (decomp_size && !comp_size))
+                    return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
+            }
+
+            disk_index = MZ_READ_LE16(p + MZ_ZIP_CDH_DISK_START_OFS);
+            if ((disk_index == MZ_UINT16_MAX) || ((disk_index != num_this_disk) && (disk_index != 1)))
+                return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_MULTIDISK);
+
+            if (comp_size != MZ_UINT32_MAX)
+            {
+                if (((mz_uint64)MZ_READ_LE32(p + MZ_ZIP_CDH_LOCAL_HEADER_OFS) + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + comp_size) > pZip->m_archive_size)
+                    return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
+            }
+
+            bit_flags = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS);
+            if (bit_flags & MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_LOCAL_DIR_IS_MASKED)
+                return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_ENCRYPTION);
+
+            if ((total_header_size = MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS) + MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS) + MZ_READ_LE16(p + MZ_ZIP_CDH_COMMENT_LEN_OFS)) > n)
+                return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
+
+            n -= total_header_size;
+            p += total_header_size;
+        }
+    }
+
+    if (sort_central_dir)
+        mz_zip_reader_sort_central_dir_offsets_by_filename(pZip);
+
+    return MZ_TRUE;
+}
+
+void mz_zip_zero_struct(mz_zip_archive *pZip)
+{
+    if (pZip)
+        MZ_CLEAR_OBJ(*pZip);
+}
+
+static mz_bool mz_zip_reader_end_internal(mz_zip_archive *pZip, mz_bool set_last_error)
+{
+    mz_bool status = MZ_TRUE;
+
+    if (!pZip)
+        return MZ_FALSE;
+
+    if ((!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree) || (pZip->m_zip_mode != MZ_ZIP_MODE_READING))
+    {
+        if (set_last_error)
+            pZip->m_last_error = MZ_ZIP_INVALID_PARAMETER;
+
+        return MZ_FALSE;
+    }
+
+    if (pZip->m_pState)
+    {
+        mz_zip_internal_state *pState = pZip->m_pState;
+        pZip->m_pState = NULL;
+
+        mz_zip_array_clear(pZip, &pState->m_central_dir);
+        mz_zip_array_clear(pZip, &pState->m_central_dir_offsets);
+        mz_zip_array_clear(pZip, &pState->m_sorted_central_dir_offsets);
+
+#ifndef MINIZ_NO_STDIO
+        if (pState->m_pFile)
+        {
+            if (pZip->m_zip_type == MZ_ZIP_TYPE_FILE)
+            {
+                if (MZ_FCLOSE(pState->m_pFile) == EOF)
+                {
+                    if (set_last_error)
+                        pZip->m_last_error = MZ_ZIP_FILE_CLOSE_FAILED;
+                    status = MZ_FALSE;
+                }
+            }
+            pState->m_pFile = NULL;
+        }
+#endif /* #ifndef MINIZ_NO_STDIO */
+
+        pZip->m_pFree(pZip->m_pAlloc_opaque, pState);
+    }
+    pZip->m_zip_mode = MZ_ZIP_MODE_INVALID;
+
+    return status;
+}
+
+mz_bool mz_zip_reader_end(mz_zip_archive *pZip)
+{
+    return mz_zip_reader_end_internal(pZip, MZ_TRUE);
+}
+mz_bool mz_zip_reader_init(mz_zip_archive *pZip, mz_uint64 size, mz_uint flags)
+{
+    if ((!pZip) || (!pZip->m_pRead))
+        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
+
+    if (!mz_zip_reader_init_internal(pZip, flags))
+        return MZ_FALSE;
+
+    pZip->m_zip_type = MZ_ZIP_TYPE_USER;
+    pZip->m_archive_size = size;
+
+    if (!mz_zip_reader_read_central_dir(pZip, flags))
+    {
+        mz_zip_reader_end_internal(pZip, MZ_FALSE);
+        return MZ_FALSE;
+    }
+
+    return MZ_TRUE;
+}
+
+static size_t mz_zip_mem_read_func(void *pOpaque, mz_uint64 file_ofs, void *pBuf, size_t n)
+{
+    mz_zip_archive *pZip = (mz_zip_archive *)pOpaque;
+    size_t s = (file_ofs >= pZip->m_archive_size) ? 0 : (size_t)MZ_MIN(pZip->m_archive_size - file_ofs, n);
+    memcpy(pBuf, (const mz_uint8 *)pZip->m_pState->m_pMem + file_ofs, s);
+    return s;
+}
+
+mz_bool mz_zip_reader_init_mem(mz_zip_archive *pZip, const void *pMem, size_t size, mz_uint flags)
+{
+    if (!pMem)
+        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
+
+    if (size < MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE)
+        return mz_zip_set_error(pZip, MZ_ZIP_NOT_AN_ARCHIVE);
+
+    if (!mz_zip_reader_init_internal(pZip, flags))
+        return MZ_FALSE;
+
+    pZip->m_zip_type = MZ_ZIP_TYPE_MEMORY;
+    pZip->m_archive_size = size;
+    pZip->m_pRead = mz_zip_mem_read_func;
+    pZip->m_pIO_opaque = pZip;
+    pZip->m_pNeeds_keepalive = NULL;
+
+#ifdef __cplusplus
+    pZip->m_pState->m_pMem = const_cast<void *>(pMem);
+#else
+    pZip->m_pState->m_pMem = (void *)pMem;
+#endif
+
+    pZip->m_pState->m_mem_size = size;
+
+    if (!mz_zip_reader_read_central_dir(pZip, flags))
+    {
+        mz_zip_reader_end_internal(pZip, MZ_FALSE);
+        return MZ_FALSE;
+    }
+
+    return MZ_TRUE;
+}
+
+#ifndef MINIZ_NO_STDIO
+static size_t mz_zip_file_read_func(void *pOpaque, mz_uint64 file_ofs, void *pBuf, size_t n)
+{
+    mz_zip_archive *pZip = (mz_zip_archive *)pOpaque;
+    mz_int64 cur_ofs = MZ_FTELL64(pZip->m_pState->m_pFile);
+
+    file_ofs += pZip->m_pState->m_file_archive_start_ofs;
+
+    if (((mz_int64)file_ofs < 0) || (((cur_ofs != (mz_int64)file_ofs)) && (MZ_FSEEK64(pZip->m_pState->m_pFile, (mz_int64)file_ofs, SEEK_SET))))
+        return 0;
+
+    return MZ_FREAD(pBuf, 1, n, pZip->m_pState->m_pFile);
+}
+
+mz_bool mz_zip_reader_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint32 flags)
+{
+    return mz_zip_reader_init_file_v2(pZip, pFilename, flags, 0, 0);
+}
+
+mz_bool mz_zip_reader_init_file_v2(mz_zip_archive *pZip, const char *pFilename, mz_uint flags, mz_uint64 file_start_ofs, mz_uint64 archive_size)
+{
+    mz_uint64 file_size;
+    MZ_FILE *pFile;
+
+    if ((!pZip) || (!pFilename) || ((archive_size) && (archive_size < MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE)))
+        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
+
+    pFile = MZ_FOPEN(pFilename, "rb");
+    if (!pFile)
+        return mz_zip_set_error(pZip, MZ_ZIP_FILE_OPEN_FAILED);
+
+    file_size = archive_size;
+    if (!file_size)
+    {
+        if (MZ_FSEEK64(pFile, 0, SEEK_END))
+        {
+            MZ_FCLOSE(pFile);
+            return mz_zip_set_error(pZip, MZ_ZIP_FILE_SEEK_FAILED);
+        }
+
+        file_size = MZ_FTELL64(pFile);
+    }
+
+    /* TODO: Better sanity check archive_size and the # of actual remaining bytes */
+
+    if (file_size < MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE)
+    {
+	MZ_FCLOSE(pFile);
+        return mz_zip_set_error(pZip, MZ_ZIP_NOT_AN_ARCHIVE);
+    }
+
+    if (!mz_zip_reader_init_internal(pZip, flags))
+    {
+        MZ_FCLOSE(pFile);
+        return MZ_FALSE;
+    }
+
+    pZip->m_zip_type = MZ_ZIP_TYPE_FILE;
+    pZip->m_pRead = mz_zip_file_read_func;
+    pZip->m_pIO_opaque = pZip;
+    pZip->m_pState->m_pFile = pFile;
+    pZip->m_archive_size = file_size;
+    pZip->m_pState->m_file_archive_start_ofs = file_start_ofs;
+
+    if (!mz_zip_reader_read_central_dir(pZip, flags))
+    {
+        mz_zip_reader_end_internal(pZip, MZ_FALSE);
+        return MZ_FALSE;
+    }
+
+    return MZ_TRUE;
+}
+
+mz_bool mz_zip_reader_init_cfile(mz_zip_archive *pZip, MZ_FILE *pFile, mz_uint64 archive_size, mz_uint flags)
+{
+    mz_uint64 cur_file_ofs;
+
+    if ((!pZip) || (!pFile))
+        return mz_zip_set_error(pZip, MZ_ZIP_FILE_OPEN_FAILED);
+
+    cur_file_ofs = MZ_FTELL64(pFile);
+
+    if (!archive_size)
+    {
+        if (MZ_FSEEK64(pFile, 0, SEEK_END))
+            return mz_zip_set_error(pZip, MZ_ZIP_FILE_SEEK_FAILED);
+
+        archive_size = MZ_FTELL64(pFile) - cur_file_ofs;
+
+        if (archive_size < MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE)
+            return mz_zip_set_error(pZip, MZ_ZIP_NOT_AN_ARCHIVE);
+    }
+
+    if (!mz_zip_reader_init_internal(pZip, flags))
+        return MZ_FALSE;
+
+    pZip->m_zip_type = MZ_ZIP_TYPE_CFILE;
+    pZip->m_pRead = mz_zip_file_read_func;
+
+    pZip->m_pIO_opaque = pZip;
+    pZip->m_pState->m_pFile = pFile;
+    pZip->m_archive_size = archive_size;
+    pZip->m_pState->m_file_archive_start_ofs = cur_file_ofs;
+
+    if (!mz_zip_reader_read_central_dir(pZip, flags))
+    {
+        mz_zip_reader_end_internal(pZip, MZ_FALSE);
+        return MZ_FALSE;
+    }
+
+    return MZ_TRUE;
+}
+
+#endif /* #ifndef MINIZ_NO_STDIO */
+
+static MZ_FORCEINLINE const mz_uint8 *mz_zip_get_cdh(mz_zip_archive *pZip, mz_uint file_index)
+{
+    if ((!pZip) || (!pZip->m_pState) || (file_index >= pZip->m_total_files))
+        return NULL;
+    return &MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, file_index));
+}
+
+mz_bool mz_zip_reader_is_file_encrypted(mz_zip_archive *pZip, mz_uint file_index)
+{
+    mz_uint m_bit_flag;
+    const mz_uint8 *p = mz_zip_get_cdh(pZip, file_index);
+    if (!p)
+    {
+        mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
+        return MZ_FALSE;
+    }
+
+    m_bit_flag = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS);
+    return (m_bit_flag & (MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_IS_ENCRYPTED | MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_USES_STRONG_ENCRYPTION)) != 0;
+}
+
+mz_bool mz_zip_reader_is_file_supported(mz_zip_archive *pZip, mz_uint file_index)
+{
+    mz_uint bit_flag;
+    mz_uint method;
+
+    const mz_uint8 *p = mz_zip_get_cdh(pZip, file_index);
+    if (!p)
+    {
+        mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
+        return MZ_FALSE;
+    }
+
+    method = MZ_READ_LE16(p + MZ_ZIP_CDH_METHOD_OFS);
+    bit_flag = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS);
+
+    if ((method != 0) && (method != MZ_DEFLATED))
+    {
+        mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_METHOD);
+        return MZ_FALSE;
+    }
+
+    if (bit_flag & (MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_IS_ENCRYPTED | MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_USES_STRONG_ENCRYPTION))
+    {
+        mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_ENCRYPTION);
+        return MZ_FALSE;
+    }
+
+    if (bit_flag & MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_COMPRESSED_PATCH_FLAG)
+    {
+        mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_FEATURE);
+        return MZ_FALSE;
+    }
+
+    return MZ_TRUE;
+}
+
+mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive *pZip, mz_uint file_index)
+{
+    mz_uint filename_len, attribute_mapping_id, external_attr;
+    const mz_uint8 *p = mz_zip_get_cdh(pZip, file_index);
+    if (!p)
+    {
+        mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
+        return MZ_FALSE;
+    }
+
+    filename_len = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS);
+    if (filename_len)
+    {
+        if (*(p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_len - 1) == '/')
+            return MZ_TRUE;
+    }
+
+    /* Bugfix: This code was also checking if the internal attribute was non-zero, which wasn't correct. */
+    /* Most/all zip writers (hopefully) set DOS file/directory attributes in the low 16-bits, so check for the DOS directory flag and ignore the source OS ID in the created by field. */
+    /* FIXME: Remove this check? Is it necessary - we already check the filename. */
+    attribute_mapping_id = MZ_READ_LE16(p + MZ_ZIP_CDH_VERSION_MADE_BY_OFS) >> 8;
+    (void)attribute_mapping_id;
+
+    external_attr = MZ_READ_LE32(p + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS);
+    if ((external_attr & MZ_ZIP_DOS_DIR_ATTRIBUTE_BITFLAG) != 0)
+    {
+        return MZ_TRUE;
+    }
+
+    return MZ_FALSE;
+}
+
+static mz_bool mz_zip_file_stat_internal(mz_zip_archive *pZip, mz_uint file_index, const mz_uint8 *pCentral_dir_header, mz_zip_archive_file_stat *pStat, mz_bool *pFound_zip64_extra_data)
+{
+    mz_uint n;
+    const mz_uint8 *p = pCentral_dir_header;
+
+    if (pFound_zip64_extra_data)
+        *pFound_zip64_extra_data = MZ_FALSE;
+
+    if ((!p) || (!pStat))
+        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
+
+    /* Extract fields from the central directory record. */
+    pStat->m_file_index = file_index;
+    pStat->m_central_dir_ofs = MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, file_index);
+    pStat->m_version_made_by = MZ_READ_LE16(p + MZ_ZIP_CDH_VERSION_MADE_BY_OFS);
+    pStat->m_version_needed = MZ_READ_LE16(p + MZ_ZIP_CDH_VERSION_NEEDED_OFS);
+    pStat->m_bit_flag = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS);
+    pStat->m_method = MZ_READ_LE16(p + MZ_ZIP_CDH_METHOD_OFS);
+#ifndef MINIZ_NO_TIME
+    pStat->m_time = mz_zip_dos_to_time_t(MZ_READ_LE16(p + MZ_ZIP_CDH_FILE_TIME_OFS), MZ_READ_LE16(p + MZ_ZIP_CDH_FILE_DATE_OFS));
+#endif
+    pStat->m_crc32 = MZ_READ_LE32(p + MZ_ZIP_CDH_CRC32_OFS);
+    pStat->m_comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS);
+    pStat->m_uncomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS);
+    pStat->m_internal_attr = MZ_READ_LE16(p + MZ_ZIP_CDH_INTERNAL_ATTR_OFS);
+    pStat->m_external_attr = MZ_READ_LE32(p + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS);
+    pStat->m_local_header_ofs = MZ_READ_LE32(p + MZ_ZIP_CDH_LOCAL_HEADER_OFS);
+
+    /* Copy as much of the filename and comment as possible. */
+    n = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS);
+    n = MZ_MIN(n, MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE - 1);
+    memcpy(pStat->m_filename, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n);
+    pStat->m_filename[n] = '\0';
+
+    n = MZ_READ_LE16(p + MZ_ZIP_CDH_COMMENT_LEN_OFS);
+    n = MZ_MIN(n, MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE - 1);
+    pStat->m_comment_size = n;
+    memcpy(pStat->m_comment, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS) + MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS), n);
+    pStat->m_comment[n] = '\0';
+
+    /* Set some flags for convienance */
+    pStat->m_is_directory = mz_zip_reader_is_file_a_directory(pZip, file_index);
+    pStat->m_is_encrypted = mz_zip_reader_is_file_encrypted(pZip, file_index);
+    pStat->m_is_supported = mz_zip_reader_is_file_supported(pZip, file_index);
+
+    /* See if we need to read any zip64 extended information fields. */
+    /* Confusingly, these zip64 fields can be present even on non-zip64 archives (Debian zip on a huge files from stdin piped to stdout creates them). */
+    if (MZ_MAX(MZ_MAX(pStat->m_comp_size, pStat->m_uncomp_size), pStat->m_local_header_ofs) == MZ_UINT32_MAX)
+    {
+        /* Attempt to find zip64 extended information field in the entry's extra data */
+        mz_uint32 extra_size_remaining = MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS);
+
+        if (extra_size_remaining)
+        {
+            const mz_uint8 *pExtra_data = p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS);
+
+            do
+            {
+                mz_uint32 field_id;
+                mz_uint32 field_data_size;
+
+                if (extra_size_remaining < (sizeof(mz_uint16) * 2))
+                    return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
+
+                field_id = MZ_READ_LE16(pExtra_data);
+                field_data_size = MZ_READ_LE16(pExtra_data + sizeof(mz_uint16));
+
+                if ((field_data_size + sizeof(mz_uint16) * 2) > extra_size_remaining)
+                    return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
+
+                if (field_id == MZ_ZIP64_EXTENDED_INFORMATION_FIELD_HEADER_ID)
+                {
+                    const mz_uint8 *pField_data = pExtra_data + sizeof(mz_uint16) * 2;
+                    mz_uint32 field_data_remaining = field_data_size;
+
+                    if (pFound_zip64_extra_data)
+                        *pFound_zip64_extra_data = MZ_TRUE;
+
+                    if (pStat->m_uncomp_size == MZ_UINT32_MAX)
+                    {
+                        if (field_data_remaining < sizeof(mz_uint64))
+                            return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
+
+                        pStat->m_uncomp_size = MZ_READ_LE64(pField_data);
+                        pField_data += sizeof(mz_uint64);
+                        field_data_remaining -= sizeof(mz_uint64);
+                    }
+
+                    if (pStat->m_comp_size == MZ_UINT32_MAX)
+                    {
+                        if (field_data_remaining < sizeof(mz_uint64))
+                            return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
+
+                        pStat->m_comp_size = MZ_READ_LE64(pField_data);
+                        pField_data += sizeof(mz_uint64);
+                        field_data_remaining -= sizeof(mz_uint64);
+                    }
+
+                    if (pStat->m_local_header_ofs == MZ_UINT32_MAX)
+                    {
+                        if (field_data_remaining < sizeof(mz_uint64))
+                            return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
+
+                        pStat->m_local_header_ofs = MZ_READ_LE64(pField_data);
+                        pField_data += sizeof(mz_uint64);
+                        field_data_remaining -= sizeof(mz_uint64);
+                    }
+
+                    break;
+                }
+
+                pExtra_data += sizeof(mz_uint16) * 2 + field_data_size;
+                extra_size_remaining = extra_size_remaining - sizeof(mz_uint16) * 2 - field_data_size;
+            } while (extra_size_remaining);
+        }
+    }
+
+    return MZ_TRUE;
+}
+
+static MZ_FORCEINLINE mz_bool mz_zip_string_equal(const char *pA, const char *pB, mz_uint len, mz_uint flags)
+{
+    mz_uint i;
+    if (flags & MZ_ZIP_FLAG_CASE_SENSITIVE)
+        return 0 == memcmp(pA, pB, len);
+    for (i = 0; i < len; ++i)
+        if (MZ_TOLOWER(pA[i]) != MZ_TOLOWER(pB[i]))
+            return MZ_FALSE;
+    return MZ_TRUE;
+}
+
+static MZ_FORCEINLINE int mz_zip_filename_compare(const mz_zip_array *pCentral_dir_array, const mz_zip_array *pCentral_dir_offsets, mz_uint l_index, const char *pR, mz_uint r_len)
+{
+    const mz_uint8 *pL = &MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_array, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, l_index)), *pE;
+    mz_uint l_len = MZ_READ_LE16(pL + MZ_ZIP_CDH_FILENAME_LEN_OFS);
+    mz_uint8 l = 0, r = 0;
+    pL += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE;
+    pE = pL + MZ_MIN(l_len, r_len);
+    while (pL < pE)
+    {
+        if ((l = MZ_TOLOWER(*pL)) != (r = MZ_TOLOWER(*pR)))
+            break;
+        pL++;
+        pR++;
+    }
+    return (pL == pE) ? (int)(l_len - r_len) : (l - r);
+}
+
+static mz_bool mz_zip_locate_file_binary_search(mz_zip_archive *pZip, const char *pFilename, mz_uint32 *pIndex)
+{
+    mz_zip_internal_state *pState = pZip->m_pState;
+    const mz_zip_array *pCentral_dir_offsets = &pState->m_central_dir_offsets;
+    const mz_zip_array *pCentral_dir = &pState->m_central_dir;
+    mz_uint32 *pIndices = &MZ_ZIP_ARRAY_ELEMENT(&pState->m_sorted_central_dir_offsets, mz_uint32, 0);
+    const uint32_t size = pZip->m_total_files;
+    const mz_uint filename_len = (mz_uint)strlen(pFilename);
+
+    if (pIndex)
+        *pIndex = 0;
+
+    if (size)
+    {
+        /* yes I could use uint32_t's, but then we would have to add some special case checks in the loop, argh, and */
+        /* honestly the major expense here on 32-bit CPU's will still be the filename compare */
+        mz_int64 l = 0, h = (mz_int64)size - 1;
+
+        while (l <= h)
+        {
+            mz_int64 m = l + ((h - l) >> 1);
+            uint32_t file_index = pIndices[(uint32_t)m];
+
+            int comp = mz_zip_filename_compare(pCentral_dir, pCentral_dir_offsets, file_index, pFilename, filename_len);
+            if (!comp)
+            {
+                if (pIndex)
+                    *pIndex = file_index;
+                return MZ_TRUE;
+            }
+            else if (comp < 0)
+                l = m + 1;
+            else
+                h = m - 1;
+        }
+    }
+
+    return mz_zip_set_error(pZip, MZ_ZIP_FILE_NOT_FOUND);
+}
+
+int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags)
+{
+    mz_uint32 index;
+    if (!mz_zip_reader_locate_file_v2(pZip, pName, pComment, flags, &index))
+        return -1;
+    else
+        return (int)index;
+}
+
+mz_bool mz_zip_reader_locate_file_v2(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags, mz_uint32 *pIndex)
+{
+    mz_uint file_index;
+    size_t name_len, comment_len;
+
+    if (pIndex)
+        *pIndex = 0;
+
+    if ((!pZip) || (!pZip->m_pState) || (!pName))
+        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
+
+    /* See if we can use a binary search */
+    if (((pZip->m_pState->m_init_flags & MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY) == 0) &&
+        (pZip->m_zip_mode == MZ_ZIP_MODE_READING) &&
+        ((flags & (MZ_ZIP_FLAG_IGNORE_PATH | MZ_ZIP_FLAG_CASE_SENSITIVE)) == 0) && (!pComment) && (pZip->m_pState->m_sorted_central_dir_offsets.m_size))
+    {
+        return mz_zip_locate_file_binary_search(pZip, pName, pIndex);
+    }
+
+    /* Locate the entry by scanning the entire central directory */
+    name_len = strlen(pName);
+    if (name_len > MZ_UINT16_MAX)
+        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
+
+    comment_len = pComment ? strlen(pComment) : 0;
+    if (comment_len > MZ_UINT16_MAX)
+        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
+
+    for (file_index = 0; file_index < pZip->m_total_files; file_index++)
+    {
+        const mz_uint8 *pHeader = &MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, file_index));
+        mz_uint filename_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_FILENAME_LEN_OFS);
+        const char *pFilename = (const char *)pHeader + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE;
+        if (filename_len < name_len)
+            continue;
+        if (comment_len)
+        {
+            mz_uint file_extra_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_EXTRA_LEN_OFS), file_comment_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_COMMENT_LEN_OFS);
+            const char *pFile_comment = pFilename + filename_len + file_extra_len;
+            if ((file_comment_len != comment_len) || (!mz_zip_string_equal(pComment, pFile_comment, file_comment_len, flags)))
+                continue;
+        }
+        if ((flags & MZ_ZIP_FLAG_IGNORE_PATH) && (filename_len))
+        {
+            int ofs = filename_len - 1;
+            do
+            {
+                if ((pFilename[ofs] == '/') || (pFilename[ofs] == '\\') || (pFilename[ofs] == ':'))
+                    break;
+            } while (--ofs >= 0);
+            ofs++;
+            pFilename += ofs;
+            filename_len -= ofs;
+        }
+        if ((filename_len == name_len) && (mz_zip_string_equal(pName, pFilename, filename_len, flags)))
+        {
+            if (pIndex)
+                *pIndex = file_index;
+            return MZ_TRUE;
+        }
+    }
+
+    return mz_zip_set_error(pZip, MZ_ZIP_FILE_NOT_FOUND);
+}
+
+mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size)
+{
+    int status = TINFL_STATUS_DONE;
+    mz_uint64 needed_size, cur_file_ofs, comp_remaining, out_buf_ofs = 0, read_buf_size, read_buf_ofs = 0, read_buf_avail;
+    mz_zip_archive_file_stat file_stat;
+    void *pRead_buf;
+    mz_uint32 local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)];
+    mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32;
+    tinfl_decompressor inflator;
+
+    if ((!pZip) || (!pZip->m_pState) || ((buf_size) && (!pBuf)) || ((user_read_buf_size) && (!pUser_read_buf)) || (!pZip->m_pRead))
+        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
+
+    if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat))
+        return MZ_FALSE;
+
+    /* A directory or zero length file */
+    if ((file_stat.m_is_directory) || (!file_stat.m_comp_size))
+        return MZ_TRUE;
+
+    /* Encryption and patch files are not supported. */
+    if (file_stat.m_bit_flag & (MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_IS_ENCRYPTED | MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_USES_STRONG_ENCRYPTION | MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_COMPRESSED_PATCH_FLAG))
+        return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_ENCRYPTION);
+
+    /* This function only supports decompressing stored and deflate. */
+    if ((!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (file_stat.m_method != 0) && (file_stat.m_method != MZ_DEFLATED))
+        return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_METHOD);
+
+    /* Ensure supplied output buffer is large enough. */
+    needed_size = (flags & MZ_ZIP_FLAG_COMPRESSED_DATA) ? file_stat.m_comp_size : file_stat.m_uncomp_size;
+    if (buf_size < needed_size)
+        return mz_zip_set_error(pZip, MZ_ZIP_BUF_TOO_SMALL);
+
+    /* Read and parse the local directory entry. */
+    cur_file_ofs = file_stat.m_local_header_ofs;
+    if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE)
+        return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED);
+
+    if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG)
+        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
+
+    cur_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS);
+    if ((cur_file_ofs + file_stat.m_comp_size) > pZip->m_archive_size)
+        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
+
+    if ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!file_stat.m_method))
+    {
+        /* The file is stored or the caller has requested the compressed data. */
+        if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, (size_t)needed_size) != needed_size)
+            return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED);
+
+#ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS
+        if ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) == 0)
+        {
+            if (mz_crc32(MZ_CRC32_INIT, (const mz_uint8 *)pBuf, (size_t)file_stat.m_uncomp_size) != file_stat.m_crc32)
+                return mz_zip_set_error(pZip, MZ_ZIP_CRC_CHECK_FAILED);
+        }
+#endif
+
+        return MZ_TRUE;
+    }
+
+    /* Decompress the file either directly from memory or from a file input buffer. */
+    tinfl_init(&inflator);
+
+    if (pZip->m_pState->m_pMem)
+    {
+        /* Read directly from the archive in memory. */
+        pRead_buf = (mz_uint8 *)pZip->m_pState->m_pMem + cur_file_ofs;
+        read_buf_size = read_buf_avail = file_stat.m_comp_size;
+        comp_remaining = 0;
+    }
+    else if (pUser_read_buf)
+    {
+        /* Use a user provided read buffer. */
+        if (!user_read_buf_size)
+            return MZ_FALSE;
+        pRead_buf = (mz_uint8 *)pUser_read_buf;
+        read_buf_size = user_read_buf_size;
+        read_buf_avail = 0;
+        comp_remaining = file_stat.m_comp_size;
+    }
+    else
+    {
+        /* Temporarily allocate a read buffer. */
+        read_buf_size = MZ_MIN(file_stat.m_comp_size, (mz_uint64)MZ_ZIP_MAX_IO_BUF_SIZE);
+        if (((sizeof(size_t) == sizeof(mz_uint32))) && (read_buf_size > 0x7FFFFFFF))
+            return mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR);
+
+        if (NULL == (pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)read_buf_size)))
+            return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
+
+        read_buf_avail = 0;
+        comp_remaining = file_stat.m_comp_size;
+    }
+
+    do
+    {
+        /* The size_t cast here should be OK because we've verified that the output buffer is >= file_stat.m_uncomp_size above */
+        size_t in_buf_size, out_buf_size = (size_t)(file_stat.m_uncomp_size - out_buf_ofs);
+        if ((!read_buf_avail) && (!pZip->m_pState->m_pMem))
+        {
+            read_buf_avail = MZ_MIN(read_buf_size, comp_remaining);
+            if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, (size_t)read_buf_avail) != read_buf_avail)
+            {
+                status = TINFL_STATUS_FAILED;
+                mz_zip_set_error(pZip, MZ_ZIP_DECOMPRESSION_FAILED);
+                break;
+            }
+            cur_file_ofs += read_buf_avail;
+            comp_remaining -= read_buf_avail;
+            read_buf_ofs = 0;
+        }
+        in_buf_size = (size_t)read_buf_avail;
+        status = tinfl_decompress(&inflator, (mz_uint8 *)pRead_buf + read_buf_ofs, &in_buf_size, (mz_uint8 *)pBuf, (mz_uint8 *)pBuf + out_buf_ofs, &out_buf_size, TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF | (comp_remaining ? TINFL_FLAG_HAS_MORE_INPUT : 0));
+        read_buf_avail -= in_buf_size;
+        read_buf_ofs += in_buf_size;
+        out_buf_ofs += out_buf_size;
+    } while (status == TINFL_STATUS_NEEDS_MORE_INPUT);
+
+    if (status == TINFL_STATUS_DONE)
+    {
+        /* Make sure the entire file was decompressed, and check its CRC. */
+        if (out_buf_ofs != file_stat.m_uncomp_size)
+        {
+            mz_zip_set_error(pZip, MZ_ZIP_UNEXPECTED_DECOMPRESSED_SIZE);
+            status = TINFL_STATUS_FAILED;
+        }
+#ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS
+        else if (mz_crc32(MZ_CRC32_INIT, (const mz_uint8 *)pBuf, (size_t)file_stat.m_uncomp_size) != file_stat.m_crc32)
+        {
+            mz_zip_set_error(pZip, MZ_ZIP_CRC_CHECK_FAILED);
+            status = TINFL_STATUS_FAILED;
+        }
+#endif
+    }
+
+    if ((!pZip->m_pState->m_pMem) && (!pUser_read_buf))
+        pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf);
+
+    return status == TINFL_STATUS_DONE;
+}
+
+mz_bool mz_zip_reader_extract_file_to_mem_no_alloc(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size)
+{
+    mz_uint32 file_index;
+    if (!mz_zip_reader_locate_file_v2(pZip, pFilename, NULL, flags, &file_index))
+        return MZ_FALSE;
+    return mz_zip_reader_extract_to_mem_no_alloc(pZip, file_index, pBuf, buf_size, flags, pUser_read_buf, user_read_buf_size);
+}
+
+mz_bool mz_zip_reader_extract_to_mem(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags)
+{
+    return mz_zip_reader_extract_to_mem_no_alloc(pZip, file_index, pBuf, buf_size, flags, NULL, 0);
+}
+
+mz_bool mz_zip_reader_extract_file_to_mem(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags)
+{
+    return mz_zip_reader_extract_file_to_mem_no_alloc(pZip, pFilename, pBuf, buf_size, flags, NULL, 0);
+}
+
+/*
+void *mz_zip_reader_extract_file_uncompressed(mz_zip_archive *pZip, mz_uint file_index, size_t *pSize, mz_uint flags)
+{
+    mz_uint64 comp_size, uncomp_size, alloc_size;
+    const mz_uint8 *p = mz_zip_get_cdh(pZip, file_index);
+    void *pBuf;
+
+    if (pSize)
+        *pSize = 0;
+
+    if (!p)
+    {
+        mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
+        return NULL;
+    }
+
+    comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS);
+    uncomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS);
+
+    alloc_size = (flags & MZ_ZIP_FLAG_COMPRESSED_DATA) ? comp_size : uncomp_size;
+    
+    if (((sizeof(size_t) == sizeof(mz_uint32))) && (alloc_size > 0x7FFFFFFF))
+    {
+        mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR);
+        return NULL;
+    }
+
+    if (NULL == (pBuf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)alloc_size)))
+    {
+        mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
+        return NULL;
+    }
+    
+    *pSize = alloc_size;
+    return
+}
+*/
+
+void *mz_zip_reader_extract_to_heap(mz_zip_archive *pZip, mz_uint file_index, size_t *pSize, mz_uint flags)
+{
+    
+    mz_uint64 comp_size, uncomp_size, alloc_size;
+    const mz_uint8 *p = mz_zip_get_cdh(pZip, file_index);
+    void *pBuf;
+
+    if (pSize)
+        *pSize = 0;
+
+    if (!p)
+    {
+        mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
+        return NULL;
+    }
+
+    comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS);
+    uncomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS);
+
+    alloc_size = (flags & MZ_ZIP_FLAG_COMPRESSED_DATA) ? comp_size : uncomp_size;
+    if (((sizeof(size_t) == sizeof(mz_uint32))) && (alloc_size > 0x7FFFFFFF))
+    {
+        mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR);
+        return NULL;
+    }
+
+    if (NULL == (pBuf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)alloc_size)))
+    {
+        mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
+        return NULL;
+    }
+
+    if (!mz_zip_reader_extract_to_mem(pZip, file_index, pBuf, (size_t)alloc_size, flags))
+    {
+        pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf);
+        return NULL;
+    }
+
+    if (pSize)
+        *pSize = (size_t)alloc_size;
+    return pBuf;
+
+     
+}
+
+void *mz_zip_reader_extract_file_to_heap(mz_zip_archive *pZip, const char *pFilename, size_t *pSize, mz_uint flags)
+{
+    mz_uint32 file_index;
+    if (!mz_zip_reader_locate_file_v2(pZip, pFilename, NULL, flags, &file_index))
+    {
+        if (pSize)
+            *pSize = 0;
+        return MZ_FALSE;
+    }
+    return mz_zip_reader_extract_to_heap(pZip, file_index, pSize, flags);
+}
+
+mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive *pZip, mz_uint file_index, mz_file_write_func pCallback, void *pOpaque, mz_uint flags)
+{
+    int status = TINFL_STATUS_DONE;
+#ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS
+    mz_uint file_crc32 = MZ_CRC32_INIT;
+#endif
+    mz_uint64 read_buf_size, read_buf_ofs = 0, read_buf_avail, comp_remaining, out_buf_ofs = 0, cur_file_ofs;
+    mz_zip_archive_file_stat file_stat;
+    void *pRead_buf = NULL;
+    void *pWrite_buf = NULL;
+    mz_uint32 local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)];
+    mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32;
+
+    if ((!pZip) || (!pZip->m_pState) || (!pCallback) || (!pZip->m_pRead))
+        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
+
+    if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat))
+        return MZ_FALSE;
+
+    /* A directory or zero length file */
+    if ((file_stat.m_is_directory) || (!file_stat.m_comp_size))
+        return MZ_TRUE;
+
+    /* Encryption and patch files are not supported. */
+    if (file_stat.m_bit_flag & (MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_IS_ENCRYPTED | MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_USES_STRONG_ENCRYPTION | MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_COMPRESSED_PATCH_FLAG))
+        return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_ENCRYPTION);
+
+    /* This function only supports decompressing stored and deflate. */
+    if ((!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (file_stat.m_method != 0) && (file_stat.m_method != MZ_DEFLATED))
+        return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_METHOD);
+
+    /* Read and do some minimal validation of the local directory entry (this doesn't crack the zip64 stuff, which we already have from the central dir) */
+    cur_file_ofs = file_stat.m_local_header_ofs;
+    if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE)
+        return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED);
+
+    if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG)
+        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
+
+    cur_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS);
+    if ((cur_file_ofs + file_stat.m_comp_size) > pZip->m_archive_size)
+        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
+
+    /* Decompress the file either directly from memory or from a file input buffer. */
+    if (pZip->m_pState->m_pMem)
+    {
+        pRead_buf = (mz_uint8 *)pZip->m_pState->m_pMem + cur_file_ofs;
+        read_buf_size = read_buf_avail = file_stat.m_comp_size;
+        comp_remaining = 0;
+    }
+    else
+    {
+        read_buf_size = MZ_MIN(file_stat.m_comp_size, (mz_uint64)MZ_ZIP_MAX_IO_BUF_SIZE);
+        if (NULL == (pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)read_buf_size)))
+            return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
+
+        read_buf_avail = 0;
+        comp_remaining = file_stat.m_comp_size;
+    }
+
+    if ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!file_stat.m_method))
+    {
+        /* The file is stored or the caller has requested the compressed data. */
+        if (pZip->m_pState->m_pMem)
+        {
+            if (((sizeof(size_t) == sizeof(mz_uint32))) && (file_stat.m_comp_size > MZ_UINT32_MAX))
+                return mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR);
+
+            if (pCallback(pOpaque, out_buf_ofs, pRead_buf, (size_t)file_stat.m_comp_size) != file_stat.m_comp_size)
+            {
+                mz_zip_set_error(pZip, MZ_ZIP_WRITE_CALLBACK_FAILED);
+                status = TINFL_STATUS_FAILED;
+            }
+            else if (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA))
+            {
+#ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS
+                file_crc32 = (mz_uint32)mz_crc32(file_crc32, (const mz_uint8 *)pRead_buf, (size_t)file_stat.m_comp_size);
+#endif
+            }
+
+            cur_file_ofs += file_stat.m_comp_size;
+            out_buf_ofs += file_stat.m_comp_size;
+            comp_remaining = 0;
+        }
+        else
+        {
+            while (comp_remaining)
+            {
+                read_buf_avail = MZ_MIN(read_buf_size, comp_remaining);
+                if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, (size_t)read_buf_avail) != read_buf_avail)
+                {
+                    mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED);
+                    status = TINFL_STATUS_FAILED;
+                    break;
+                }
+
+#ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS
+                if (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA))
+                {
+                    file_crc32 = (mz_uint32)mz_crc32(file_crc32, (const mz_uint8 *)pRead_buf, (size_t)read_buf_avail);
+                }
+#endif
+
+                if (pCallback(pOpaque, out_buf_ofs, pRead_buf, (size_t)read_buf_avail) != read_buf_avail)
+                {
+                    mz_zip_set_error(pZip, MZ_ZIP_WRITE_CALLBACK_FAILED);
+                    status = TINFL_STATUS_FAILED;
+                    break;
+                }
+
+                cur_file_ofs += read_buf_avail;
+                out_buf_ofs += read_buf_avail;
+                comp_remaining -= read_buf_avail;
+            }
+        }
+    }
+    else
+    {
+        tinfl_decompressor inflator;
+        tinfl_init(&inflator);
+
+        if (NULL == (pWrite_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, TINFL_LZ_DICT_SIZE)))
+        {
+            mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
+            status = TINFL_STATUS_FAILED;
+        }
+        else
+        {
+            do
+            {
+                mz_uint8 *pWrite_buf_cur = (mz_uint8 *)pWrite_buf + (out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1));
+                size_t in_buf_size, out_buf_size = TINFL_LZ_DICT_SIZE - (out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1));
+                if ((!read_buf_avail) && (!pZip->m_pState->m_pMem))
+                {
+                    read_buf_avail = MZ_MIN(read_buf_size, comp_remaining);
+                    if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, (size_t)read_buf_avail) != read_buf_avail)
+                    {
+                        mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED);
+                        status = TINFL_STATUS_FAILED;
+                        break;
+                    }
+                    cur_file_ofs += read_buf_avail;
+                    comp_remaining -= read_buf_avail;
+                    read_buf_ofs = 0;
+                }
+
+                in_buf_size = (size_t)read_buf_avail;
+                status = tinfl_decompress(&inflator, (const mz_uint8 *)pRead_buf + read_buf_ofs, &in_buf_size, (mz_uint8 *)pWrite_buf, pWrite_buf_cur, &out_buf_size, comp_remaining ? TINFL_FLAG_HAS_MORE_INPUT : 0);
+                read_buf_avail -= in_buf_size;
+                read_buf_ofs += in_buf_size;
+
+                if (out_buf_size)
+                {
+                    if (pCallback(pOpaque, out_buf_ofs, pWrite_buf_cur, out_buf_size) != out_buf_size)
+                    {
+                        mz_zip_set_error(pZip, MZ_ZIP_WRITE_CALLBACK_FAILED);
+                        status = TINFL_STATUS_FAILED;
+                        break;
+                    }
+
+#ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS
+                    file_crc32 = (mz_uint32)mz_crc32(file_crc32, pWrite_buf_cur, out_buf_size);
+#endif
+                    if ((out_buf_ofs += out_buf_size) > file_stat.m_uncomp_size)
+                    {
+                        mz_zip_set_error(pZip, MZ_ZIP_DECOMPRESSION_FAILED);
+                        status = TINFL_STATUS_FAILED;
+                        break;
+                    }
+                }
+            } while ((status == TINFL_STATUS_NEEDS_MORE_INPUT) || (status == TINFL_STATUS_HAS_MORE_OUTPUT));
+        }
+    }
+
+    if ((status == TINFL_STATUS_DONE) && (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)))
+    {
+        /* Make sure the entire file was decompressed, and check its CRC. */
+        if (out_buf_ofs != file_stat.m_uncomp_size)
+        {
+            mz_zip_set_error(pZip, MZ_ZIP_UNEXPECTED_DECOMPRESSED_SIZE);
+            status = TINFL_STATUS_FAILED;
+        }
+#ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS
+        else if (file_crc32 != file_stat.m_crc32)
+        {
+            mz_zip_set_error(pZip, MZ_ZIP_DECOMPRESSION_FAILED);
+            status = TINFL_STATUS_FAILED;
+        }
+#endif
+    }
+
+    if (!pZip->m_pState->m_pMem)
+        pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf);
+
+    if (pWrite_buf)
+        pZip->m_pFree(pZip->m_pAlloc_opaque, pWrite_buf);
+
+    return status == TINFL_STATUS_DONE;
+}
+
+mz_bool mz_zip_reader_extract_file_to_callback(mz_zip_archive *pZip, const char *pFilename, mz_file_write_func pCallback, void *pOpaque, mz_uint flags)
+{
+    mz_uint32 file_index;
+    if (!mz_zip_reader_locate_file_v2(pZip, pFilename, NULL, flags, &file_index))
+        return MZ_FALSE;
+
+    return mz_zip_reader_extract_to_callback(pZip, file_index, pCallback, pOpaque, flags);
+}
+
+size_t mz_zip_reader_get_raw_data_offset(mz_zip_archive *pZip, mz_uint file_index)
+{
+    mz_zip_archive_file_stat file_stat;
+    mz_uint32 local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)];
+    mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32;
+    
+    // just so can read the data in debugger
+    //mz_local_file_header* header = (mz_local_file_header*)&local_header_u32;
+    
+    if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat))
+        return 0;
+    
+    // TODO: for now assume mmap only case
+    const mz_uint8* pMem = (const mz_uint8 *)pZip->m_pState->m_pMem;
+    
+    // DONE: lookup the file_stat from the index, and do we need to setup fileIO to read the header
+    // to then find out the filename length to then add to the offset to get to the file data.
+    
+    mz_uint64 cur_file_ofs = file_stat.m_local_header_ofs;
+    
+    cur_file_ofs += pZip->m_pState->m_file_archive_start_ofs;
+    
+    memcpy(pLocal_header, pMem + cur_file_ofs, MZ_ZIP_LOCAL_DIR_HEADER_SIZE);
+    
+    // all local headers start with same signature
+    if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG)
+        return 0;
+
+    // advance past filename and extra length data
+    mz_uint64 headerOffset = MZ_ZIP_LOCAL_DIR_HEADER_SIZE +
+        MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) +
+        MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS);
+    
+    cur_file_ofs += headerOffset;
+    
+    //if ((cur_file_ofs + file_stat.m_comp_size) > pZip->m_archive_size)
+    //    return 0;
+
+    return cur_file_ofs;
+}
+
+const uint8_t* mz_zip_reader_get_raw_data(mz_zip_archive *pZip, mz_uint file_index)
+{
+    size_t offset = mz_zip_reader_get_raw_data_offset(pZip, file_index);
+    if (offset == 0)
+        return NULL;
+    
+    const mz_uint8* pMem = (const mz_uint8 *)pZip->m_pState->m_pMem;
+    return pMem + offset;
+}
+
+mz_zip_reader_extract_iter_state* mz_zip_reader_extract_iter_new(mz_zip_archive *pZip, mz_uint file_index, mz_uint flags)
+{
+    mz_zip_reader_extract_iter_state *pState;
+    mz_uint32 local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)];
+    mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32;
+
+    /* Argument sanity check */
+    if ((!pZip) || (!pZip->m_pState))
+        return NULL;
+
+    /* Allocate an iterator status structure */
+    pState = (mz_zip_reader_extract_iter_state*)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(mz_zip_reader_extract_iter_state));
+    if (!pState)
+    {
+        mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
+        return NULL;
+    }
+
+    /* Fetch file details */
+    if (!mz_zip_reader_file_stat(pZip, file_index, &pState->file_stat))
+    {
+        pZip->m_pFree(pZip->m_pAlloc_opaque, pState);
+        return NULL;
+    }
+
+    /* Encryption and patch files are not supported. */
+    if (pState->file_stat.m_bit_flag & (MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_IS_ENCRYPTED | MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_USES_STRONG_ENCRYPTION | MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_COMPRESSED_PATCH_FLAG))
+    {
+        mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_ENCRYPTION);
+        pZip->m_pFree(pZip->m_pAlloc_opaque, pState);
+        return NULL;
+    }
+
+    /* This function only supports decompressing stored and deflate. */
+    if ((!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (pState->file_stat.m_method != 0) && (pState->file_stat.m_method != MZ_DEFLATED))
+    {
+        mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_METHOD);
+        pZip->m_pFree(pZip->m_pAlloc_opaque, pState);
+        return NULL;
+    }
+
+    /* Init state - save args */
+    pState->pZip = pZip;
+    pState->flags = flags;
+
+    /* Init state - reset variables to defaults */
+    pState->status = TINFL_STATUS_DONE;
+#ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS
+    pState->file_crc32 = MZ_CRC32_INIT;
+#endif
+    pState->read_buf_ofs = 0;
+    pState->out_buf_ofs = 0;
+    pState->pRead_buf = NULL;
+    pState->pWrite_buf = NULL;
+    pState->out_blk_remain = 0;
+
+    /* Read and parse the local directory entry. */
+    pState->cur_file_ofs = pState->file_stat.m_local_header_ofs;
+    if (pZip->m_pRead(pZip->m_pIO_opaque, pState->cur_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE)
+    {
+        mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED);
+        pZip->m_pFree(pZip->m_pAlloc_opaque, pState);
+        return NULL;
+    }
+
+    if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG)
+    {
+        mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
+        pZip->m_pFree(pZip->m_pAlloc_opaque, pState);
+        return NULL;
+    }
+
+    pState->cur_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS);
+    if ((pState->cur_file_ofs + pState->file_stat.m_comp_size) > pZip->m_archive_size)
+    {
+        mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
+        pZip->m_pFree(pZip->m_pAlloc_opaque, pState);
+        return NULL;
+    }
+
+    /* Decompress the file either directly from memory or from a file input buffer. */
+    if (pZip->m_pState->m_pMem)
+    {
+        pState->pRead_buf = (mz_uint8 *)pZip->m_pState->m_pMem + pState->cur_file_ofs;
+        pState->read_buf_size = pState->read_buf_avail = pState->file_stat.m_comp_size;
+        pState->comp_remaining = pState->file_stat.m_comp_size;
+    }
+    else
+    {
+        if (!((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!pState->file_stat.m_method)))
+        {
+            /* Decompression required, therefore intermediate read buffer required */
+            pState->read_buf_size = MZ_MIN(pState->file_stat.m_comp_size, MZ_ZIP_MAX_IO_BUF_SIZE);
+            if (NULL == (pState->pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)pState->read_buf_size)))
+            {
+                mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
+                pZip->m_pFree(pZip->m_pAlloc_opaque, pState);
+                return NULL;
+            }
+        }
+        else
+        {
+            /* Decompression not required - we will be reading directly into user buffer, no temp buf required */
+            pState->read_buf_size = 0;
+        }
+        pState->read_buf_avail = 0;
+        pState->comp_remaining = pState->file_stat.m_comp_size;
+    }
+
+    if (!((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!pState->file_stat.m_method)))
+    {
+        /* Decompression required, init decompressor */
+        tinfl_init( &pState->inflator );
+
+        /* Allocate write buffer */
+        if (NULL == (pState->pWrite_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, TINFL_LZ_DICT_SIZE)))
+        {
+            mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
+            if (pState->pRead_buf)
+                pZip->m_pFree(pZip->m_pAlloc_opaque, pState->pRead_buf);
+            pZip->m_pFree(pZip->m_pAlloc_opaque, pState);
+            return NULL;
+        }
+    }
+
+    return pState;
+}
+
+mz_zip_reader_extract_iter_state* mz_zip_reader_extract_file_iter_new(mz_zip_archive *pZip, const char *pFilename, mz_uint flags)
+{
+    mz_uint32 file_index;
+
+    /* Locate file index by name */
+    if (!mz_zip_reader_locate_file_v2(pZip, pFilename, NULL, flags, &file_index))
+        return NULL;
+
+    /* Construct iterator */
+    return mz_zip_reader_extract_iter_new(pZip, file_index, flags);
+}
+
+size_t mz_zip_reader_extract_iter_read(mz_zip_reader_extract_iter_state* pState, void* pvBuf, size_t buf_size)
+{
+    size_t copied_to_caller = 0;
+
+    /* Argument sanity check */
+    if ((!pState) || (!pState->pZip) || (!pState->pZip->m_pState) || (!pvBuf))
+        return 0;
+
+    if ((pState->flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!pState->file_stat.m_method))
+    {
+        /* The file is stored or the caller has requested the compressed data, calc amount to return. */
+        copied_to_caller = (size_t)MZ_MIN( buf_size, pState->comp_remaining );
+
+        /* Zip is in memory....or requires reading from a file? */
+        if (pState->pZip->m_pState->m_pMem)
+        {
+            /* Copy data to caller's buffer */
+            memcpy( pvBuf, pState->pRead_buf, copied_to_caller );
+            pState->pRead_buf = ((mz_uint8*)pState->pRead_buf) + copied_to_caller;
+        }
+        else
+        {
+            /* Read directly into caller's buffer */
+            if (pState->pZip->m_pRead(pState->pZip->m_pIO_opaque, pState->cur_file_ofs, pvBuf, copied_to_caller) != copied_to_caller)
+            {
+                /* Failed to read all that was asked for, flag failure and alert user */
+                mz_zip_set_error(pState->pZip, MZ_ZIP_FILE_READ_FAILED);
+                pState->status = TINFL_STATUS_FAILED;
+                copied_to_caller = 0;
+            }
+        }
+
+#ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS
+        /* Compute CRC if not returning compressed data only */
+        if (!(pState->flags & MZ_ZIP_FLAG_COMPRESSED_DATA))
+            pState->file_crc32 = (mz_uint32)mz_crc32(pState->file_crc32, (const mz_uint8 *)pvBuf, copied_to_caller);
+#endif
+
+        /* Advance offsets, dec counters */
+        pState->cur_file_ofs += copied_to_caller;
+        pState->out_buf_ofs += copied_to_caller;
+        pState->comp_remaining -= copied_to_caller;
+    }
+    else
+    {
+        do
+        {
+            /* Calc ptr to write buffer - given current output pos and block size */
+            mz_uint8 *pWrite_buf_cur = (mz_uint8 *)pState->pWrite_buf + (pState->out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1));
+
+            /* Calc max output size - given current output pos and block size */
+            size_t in_buf_size, out_buf_size = TINFL_LZ_DICT_SIZE - (pState->out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1));
+
+            if (!pState->out_blk_remain)
+            {
+                /* Read more data from file if none available (and reading from file) */
+                if ((!pState->read_buf_avail) && (!pState->pZip->m_pState->m_pMem))
+                {
+                    /* Calc read size */
+                    pState->read_buf_avail = MZ_MIN(pState->read_buf_size, pState->comp_remaining);
+                    if (pState->pZip->m_pRead(pState->pZip->m_pIO_opaque, pState->cur_file_ofs, pState->pRead_buf, (size_t)pState->read_buf_avail) != pState->read_buf_avail)
+                    {
+                        mz_zip_set_error(pState->pZip, MZ_ZIP_FILE_READ_FAILED);
+                        pState->status = TINFL_STATUS_FAILED;
+                        break;
+                    }
+
+                    /* Advance offsets, dec counters */
+                    pState->cur_file_ofs += pState->read_buf_avail;
+                    pState->comp_remaining -= pState->read_buf_avail;
+                    pState->read_buf_ofs = 0;
+                }
+
+                /* Perform decompression */
+                in_buf_size = (size_t)pState->read_buf_avail;
+                pState->status = tinfl_decompress(&pState->inflator, (const mz_uint8 *)pState->pRead_buf + pState->read_buf_ofs, &in_buf_size, (mz_uint8 *)pState->pWrite_buf, pWrite_buf_cur, &out_buf_size, pState->comp_remaining ? TINFL_FLAG_HAS_MORE_INPUT : 0);
+                pState->read_buf_avail -= in_buf_size;
+                pState->read_buf_ofs += in_buf_size;
+
+                /* Update current output block size remaining */
+                pState->out_blk_remain = out_buf_size;
+            }
+
+            if (pState->out_blk_remain)
+            {
+                /* Calc amount to return. */
+                size_t to_copy = MZ_MIN( (buf_size - copied_to_caller), pState->out_blk_remain );
+
+                /* Copy data to caller's buffer */
+                memcpy( (uint8_t*)pvBuf + copied_to_caller, pWrite_buf_cur, to_copy );
+
+#ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS
+                /* Perform CRC */
+                pState->file_crc32 = (mz_uint32)mz_crc32(pState->file_crc32, pWrite_buf_cur, to_copy);
+#endif
+
+                /* Decrement data consumed from block */
+                pState->out_blk_remain -= to_copy;
+
+                /* Inc output offset, while performing sanity check */
+                if ((pState->out_buf_ofs += to_copy) > pState->file_stat.m_uncomp_size)
+                {
+                    mz_zip_set_error(pState->pZip, MZ_ZIP_DECOMPRESSION_FAILED);
+                    pState->status = TINFL_STATUS_FAILED;
+                    break;
+                }
+
+                /* Increment counter of data copied to caller */
+                copied_to_caller += to_copy;
+            }
+        } while ( (copied_to_caller < buf_size) && ((pState->status == TINFL_STATUS_NEEDS_MORE_INPUT) || (pState->status == TINFL_STATUS_HAS_MORE_OUTPUT)) );
+    }
+
+    /* Return how many bytes were copied into user buffer */
+    return copied_to_caller;
+}
+
+mz_bool mz_zip_reader_extract_iter_free(mz_zip_reader_extract_iter_state* pState)
+{
+    int status;
+
+    /* Argument sanity check */
+    if ((!pState) || (!pState->pZip) || (!pState->pZip->m_pState))
+        return MZ_FALSE;
+
+    /* Was decompression completed and requested? */
+    if ((pState->status == TINFL_STATUS_DONE) && (!(pState->flags & MZ_ZIP_FLAG_COMPRESSED_DATA)))
+    {
+        /* Make sure the entire file was decompressed, and check its CRC. */
+        if (pState->out_buf_ofs != pState->file_stat.m_uncomp_size)
+        {
+            mz_zip_set_error(pState->pZip, MZ_ZIP_UNEXPECTED_DECOMPRESSED_SIZE);
+            pState->status = TINFL_STATUS_FAILED;
+        }
+#ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS
+        else if (pState->file_crc32 != pState->file_stat.m_crc32)
+        {
+            mz_zip_set_error(pState->pZip, MZ_ZIP_DECOMPRESSION_FAILED);
+            pState->status = TINFL_STATUS_FAILED;
+        }
+#endif
+    }
+
+    /* Free buffers */
+    if (!pState->pZip->m_pState->m_pMem)
+        pState->pZip->m_pFree(pState->pZip->m_pAlloc_opaque, pState->pRead_buf);
+    if (pState->pWrite_buf)
+        pState->pZip->m_pFree(pState->pZip->m_pAlloc_opaque, pState->pWrite_buf);
+
+    /* Save status */
+    status = pState->status;
+
+    /* Free context */
+    pState->pZip->m_pFree(pState->pZip->m_pAlloc_opaque, pState);
+
+    return status == TINFL_STATUS_DONE;
+}
+
+#ifndef MINIZ_NO_STDIO
+static size_t mz_zip_file_write_callback(void *pOpaque, mz_uint64 ofs, const void *pBuf, size_t n)
+{
+    (void)ofs;
+
+    return MZ_FWRITE(pBuf, 1, n, (MZ_FILE *)pOpaque);
+}
+
+mz_bool mz_zip_reader_extract_to_file(mz_zip_archive *pZip, mz_uint file_index, const char *pDst_filename, mz_uint flags)
+{
+    mz_bool status;
+    mz_zip_archive_file_stat file_stat;
+    MZ_FILE *pFile;
+
+    if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat))
+        return MZ_FALSE;
+
+    if ((file_stat.m_is_directory) || (!file_stat.m_is_supported))
+        return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_FEATURE);
+
+    pFile = MZ_FOPEN(pDst_filename, "wb");
+    if (!pFile)
+        return mz_zip_set_error(pZip, MZ_ZIP_FILE_OPEN_FAILED);
+
+    status = mz_zip_reader_extract_to_callback(pZip, file_index, mz_zip_file_write_callback, pFile, flags);
+
+    if (MZ_FCLOSE(pFile) == EOF)
+    {
+        if (status)
+            mz_zip_set_error(pZip, MZ_ZIP_FILE_CLOSE_FAILED);
+
+        status = MZ_FALSE;
+    }
+
+#if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_STDIO)
+    if (status)
+        mz_zip_set_file_times(pDst_filename, file_stat.m_time, file_stat.m_time);
+#endif
+
+    return status;
+}
+
+mz_bool mz_zip_reader_extract_file_to_file(mz_zip_archive *pZip, const char *pArchive_filename, const char *pDst_filename, mz_uint flags)
+{
+    mz_uint32 file_index;
+    if (!mz_zip_reader_locate_file_v2(pZip, pArchive_filename, NULL, flags, &file_index))
+        return MZ_FALSE;
+
+    return mz_zip_reader_extract_to_file(pZip, file_index, pDst_filename, flags);
+}
+
+mz_bool mz_zip_reader_extract_to_cfile(mz_zip_archive *pZip, mz_uint file_index, MZ_FILE *pFile, mz_uint flags)
+{
+    mz_zip_archive_file_stat file_stat;
+
+    if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat))
+        return MZ_FALSE;
+
+    if ((file_stat.m_is_directory) || (!file_stat.m_is_supported))
+        return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_FEATURE);
+
+    return mz_zip_reader_extract_to_callback(pZip, file_index, mz_zip_file_write_callback, pFile, flags);
+}
+
+mz_bool mz_zip_reader_extract_file_to_cfile(mz_zip_archive *pZip, const char *pArchive_filename, MZ_FILE *pFile, mz_uint flags)
+{
+    mz_uint32 file_index;
+    if (!mz_zip_reader_locate_file_v2(pZip, pArchive_filename, NULL, flags, &file_index))
+        return MZ_FALSE;
+
+    return mz_zip_reader_extract_to_cfile(pZip, file_index, pFile, flags);
+}
+#endif /* #ifndef MINIZ_NO_STDIO */
+
+static size_t mz_zip_compute_crc32_callback(void *pOpaque, mz_uint64 file_ofs, const void *pBuf, size_t n)
+{
+    mz_uint32 *p = (mz_uint32 *)pOpaque;
+    (void)file_ofs;
+    *p = (mz_uint32)mz_crc32(*p, (const mz_uint8 *)pBuf, n);
+    return n;
+}
+
+mz_bool mz_zip_validate_file(mz_zip_archive *pZip, mz_uint file_index, mz_uint flags)
+{
+    mz_zip_archive_file_stat file_stat;
+    mz_zip_internal_state *pState;
+    const mz_uint8 *pCentral_dir_header;
+    mz_bool found_zip64_ext_data_in_cdir = MZ_FALSE;
+    mz_bool found_zip64_ext_data_in_ldir = MZ_FALSE;
+    mz_uint32 local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)];
+    mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32;
+    mz_uint64 local_header_ofs = 0;
+    mz_uint32 local_header_filename_len, local_header_extra_len, local_header_crc32;
+    mz_uint64 local_header_comp_size, local_header_uncomp_size;
+    mz_uint32 uncomp_crc32 = MZ_CRC32_INIT;
+    mz_bool has_data_descriptor;
+    mz_uint32 local_header_bit_flags;
+
+    mz_zip_array file_data_array;
+    mz_zip_array_init(&file_data_array, 1);
+
+    if ((!pZip) || (!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree) || (!pZip->m_pRead))
+        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
+
+    if (file_index > pZip->m_total_files)
+        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
+
+    pState = pZip->m_pState;
+
+    pCentral_dir_header = mz_zip_get_cdh(pZip, file_index);
+
+    if (!mz_zip_file_stat_internal(pZip, file_index, pCentral_dir_header, &file_stat, &found_zip64_ext_data_in_cdir))
+        return MZ_FALSE;
+
+    /* A directory or zero length file */
+    if ((file_stat.m_is_directory) || (!file_stat.m_uncomp_size))
+        return MZ_TRUE;
+
+    /* Encryption and patch files are not supported. */
+    if (file_stat.m_is_encrypted)
+        return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_ENCRYPTION);
+
+    /* This function only supports stored and deflate. */
+    if ((file_stat.m_method != 0) && (file_stat.m_method != MZ_DEFLATED))
+        return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_METHOD);
+
+    if (!file_stat.m_is_supported)
+        return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_FEATURE);
+
+    /* Read and parse the local directory entry. */
+    local_header_ofs = file_stat.m_local_header_ofs;
+    if (pZip->m_pRead(pZip->m_pIO_opaque, local_header_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE)
+        return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED);
+
+    if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG)
+        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
+
+    local_header_filename_len = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS);
+    local_header_extra_len = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS);
+    local_header_comp_size = MZ_READ_LE32(pLocal_header + MZ_ZIP_LDH_COMPRESSED_SIZE_OFS);
+    local_header_uncomp_size = MZ_READ_LE32(pLocal_header + MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS);
+    local_header_crc32 = MZ_READ_LE32(pLocal_header + MZ_ZIP_LDH_CRC32_OFS);
+    local_header_bit_flags = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_BIT_FLAG_OFS);
+    has_data_descriptor = (local_header_bit_flags & 8) != 0;
+
+    if (local_header_filename_len != strlen(file_stat.m_filename))
+        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
+
+    if ((local_header_ofs + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + local_header_filename_len + local_header_extra_len + file_stat.m_comp_size) > pZip->m_archive_size)
+        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
+
+    if (!mz_zip_array_resize(pZip, &file_data_array, MZ_MAX(local_header_filename_len, local_header_extra_len), MZ_FALSE))
+        return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
+
+    if (local_header_filename_len)
+    {
+        if (pZip->m_pRead(pZip->m_pIO_opaque, local_header_ofs + MZ_ZIP_LOCAL_DIR_HEADER_SIZE, file_data_array.m_p, local_header_filename_len) != local_header_filename_len)
+        {
+            mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED);
+            goto handle_failure;
+        }
+
+        /* I've seen 1 archive that had the same pathname, but used backslashes in the local dir and forward slashes in the central dir. Do we care about this? For now, this case will fail validation. */
+        if (memcmp(file_stat.m_filename, file_data_array.m_p, local_header_filename_len) != 0)
+        {
+            mz_zip_set_error(pZip, MZ_ZIP_VALIDATION_FAILED);
+            goto handle_failure;
+        }
+    }
+
+    if ((local_header_extra_len) && ((local_header_comp_size == MZ_UINT32_MAX) || (local_header_uncomp_size == MZ_UINT32_MAX)))
+    {
+        mz_uint32 extra_size_remaining = local_header_extra_len;
+        const mz_uint8 *pExtra_data = (const mz_uint8 *)file_data_array.m_p;
+
+        if (pZip->m_pRead(pZip->m_pIO_opaque, local_header_ofs + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + local_header_filename_len, file_data_array.m_p, local_header_extra_len) != local_header_extra_len)
+        {
+            mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED);
+            goto handle_failure;
+        }
+
+        do
+        {
+            mz_uint32 field_id, field_data_size, field_total_size;
+
+            if (extra_size_remaining < (sizeof(mz_uint16) * 2))
+                return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
+
+            field_id = MZ_READ_LE16(pExtra_data);
+            field_data_size = MZ_READ_LE16(pExtra_data + sizeof(mz_uint16));
+            field_total_size = field_data_size + sizeof(mz_uint16) * 2;
+
+            if (field_total_size > extra_size_remaining)
+                return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
+
+            if (field_id == MZ_ZIP64_EXTENDED_INFORMATION_FIELD_HEADER_ID)
+            {
+                const mz_uint8 *pSrc_field_data = pExtra_data + sizeof(mz_uint32);
+
+                if (field_data_size < sizeof(mz_uint64) * 2)
+                {
+                    mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
+                    goto handle_failure;
+                }
+
+                local_header_uncomp_size = MZ_READ_LE64(pSrc_field_data);
+                local_header_comp_size = MZ_READ_LE64(pSrc_field_data + sizeof(mz_uint64));
+
+                found_zip64_ext_data_in_ldir = MZ_TRUE;
+                break;
+            }
+
+            pExtra_data += field_total_size;
+            extra_size_remaining -= field_total_size;
+        } while (extra_size_remaining);
+    }
+
+    /* TODO: parse local header extra data when local_header_comp_size is 0xFFFFFFFF! (big_descriptor.zip) */
+    /* I've seen zips in the wild with the data descriptor bit set, but proper local header values and bogus data descriptors */
+    if ((has_data_descriptor) && (!local_header_comp_size) && (!local_header_crc32))
+    {
+        mz_uint8 descriptor_buf[32];
+        mz_bool has_id;
+        const mz_uint8 *pSrc;
+        mz_uint32 file_crc32;
+        mz_uint64 comp_size = 0, uncomp_size = 0;
+
+        mz_uint32 num_descriptor_uint32s = ((pState->m_zip64) || (found_zip64_ext_data_in_ldir)) ? 6 : 4;
+
+        if (pZip->m_pRead(pZip->m_pIO_opaque, local_header_ofs + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + local_header_filename_len + local_header_extra_len + file_stat.m_comp_size, descriptor_buf, sizeof(mz_uint32) * num_descriptor_uint32s) != (sizeof(mz_uint32) * num_descriptor_uint32s))
+        {
+            mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED);
+            goto handle_failure;
+        }
+
+        has_id = (MZ_READ_LE32(descriptor_buf) == MZ_ZIP_DATA_DESCRIPTOR_ID);
+        pSrc = has_id ? (descriptor_buf + sizeof(mz_uint32)) : descriptor_buf;
+
+        file_crc32 = MZ_READ_LE32(pSrc);
+
+        if ((pState->m_zip64) || (found_zip64_ext_data_in_ldir))
+        {
+            comp_size = MZ_READ_LE64(pSrc + sizeof(mz_uint32));
+            uncomp_size = MZ_READ_LE64(pSrc + sizeof(mz_uint32) + sizeof(mz_uint64));
+        }
+        else
+        {
+            comp_size = MZ_READ_LE32(pSrc + sizeof(mz_uint32));
+            uncomp_size = MZ_READ_LE32(pSrc + sizeof(mz_uint32) + sizeof(mz_uint32));
+        }
+
+        if ((file_crc32 != file_stat.m_crc32) || (comp_size != file_stat.m_comp_size) || (uncomp_size != file_stat.m_uncomp_size))
+        {
+            mz_zip_set_error(pZip, MZ_ZIP_VALIDATION_FAILED);
+            goto handle_failure;
+        }
+    }
+    else
+    {
+        if ((local_header_crc32 != file_stat.m_crc32) || (local_header_comp_size != file_stat.m_comp_size) || (local_header_uncomp_size != file_stat.m_uncomp_size))
+        {
+            mz_zip_set_error(pZip, MZ_ZIP_VALIDATION_FAILED);
+            goto handle_failure;
+        }
+    }
+
+    mz_zip_array_clear(pZip, &file_data_array);
+
+    if ((flags & MZ_ZIP_FLAG_VALIDATE_HEADERS_ONLY) == 0)
+    {
+        if (!mz_zip_reader_extract_to_callback(pZip, file_index, mz_zip_compute_crc32_callback, &uncomp_crc32, 0))
+            return MZ_FALSE;
+
+        /* 1 more check to be sure, although the extract checks too. */
+        if (uncomp_crc32 != file_stat.m_crc32)
+        {
+            mz_zip_set_error(pZip, MZ_ZIP_VALIDATION_FAILED);
+            return MZ_FALSE;
+        }
+    }
+
+    return MZ_TRUE;
+
+handle_failure:
+    mz_zip_array_clear(pZip, &file_data_array);
+    return MZ_FALSE;
+}
+
+mz_bool mz_zip_validate_archive(mz_zip_archive *pZip, mz_uint flags)
+{
+    mz_zip_internal_state *pState;
+    uint32_t i;
+
+    if ((!pZip) || (!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree) || (!pZip->m_pRead))
+        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
+
+    pState = pZip->m_pState;
+
+    /* Basic sanity checks */
+    if (!pState->m_zip64)
+    {
+        if (pZip->m_total_files > MZ_UINT16_MAX)
+            return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE);
+
+        if (pZip->m_archive_size > MZ_UINT32_MAX)
+            return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE);
+    }
+    else
+    {
+        if (pZip->m_total_files >= MZ_UINT32_MAX)
+            return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE);
+
+        if (pState->m_central_dir.m_size >= MZ_UINT32_MAX)
+            return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE);
+    }
+
+    for (i = 0; i < pZip->m_total_files; i++)
+    {
+        if (MZ_ZIP_FLAG_VALIDATE_LOCATE_FILE_FLAG & flags)
+        {
+            mz_uint32 found_index;
+            mz_zip_archive_file_stat stat;
+
+            if (!mz_zip_reader_file_stat(pZip, i, &stat))
+                return MZ_FALSE;
+
+            if (!mz_zip_reader_locate_file_v2(pZip, stat.m_filename, NULL, 0, &found_index))
+                return MZ_FALSE;
+
+            /* This check can fail if there are duplicate filenames in the archive (which we don't check for when writing - that's up to the user) */
+            if (found_index != i)
+                return mz_zip_set_error(pZip, MZ_ZIP_VALIDATION_FAILED);
+        }
+
+        if (!mz_zip_validate_file(pZip, i, flags))
+            return MZ_FALSE;
+    }
+
+    return MZ_TRUE;
+}
+
+mz_bool mz_zip_validate_mem_archive(const void *pMem, size_t size, mz_uint flags, mz_zip_error *pErr)
+{
+    mz_bool success = MZ_TRUE;
+    mz_zip_archive zip;
+    mz_zip_error actual_err = MZ_ZIP_NO_ERROR;
+
+    if ((!pMem) || (!size))
+    {
+        if (pErr)
+            *pErr = MZ_ZIP_INVALID_PARAMETER;
+        return MZ_FALSE;
+    }
+
+    mz_zip_zero_struct(&zip);
+
+    if (!mz_zip_reader_init_mem(&zip, pMem, size, flags))
+    {
+        if (pErr)
+            *pErr = zip.m_last_error;
+        return MZ_FALSE;
+    }
+
+    if (!mz_zip_validate_archive(&zip, flags))
+    {
+        actual_err = zip.m_last_error;
+        success = MZ_FALSE;
+    }
+
+    if (!mz_zip_reader_end_internal(&zip, success))
+    {
+        if (!actual_err)
+            actual_err = zip.m_last_error;
+        success = MZ_FALSE;
+    }
+
+    if (pErr)
+        *pErr = actual_err;
+
+    return success;
+}
+
+#ifndef MINIZ_NO_STDIO
+mz_bool mz_zip_validate_file_archive(const char *pFilename, mz_uint flags, mz_zip_error *pErr)
+{
+    mz_bool success = MZ_TRUE;
+    mz_zip_archive zip;
+    mz_zip_error actual_err = MZ_ZIP_NO_ERROR;
+
+    if (!pFilename)
+    {
+        if (pErr)
+            *pErr = MZ_ZIP_INVALID_PARAMETER;
+        return MZ_FALSE;
+    }
+
+    mz_zip_zero_struct(&zip);
+
+    if (!mz_zip_reader_init_file_v2(&zip, pFilename, flags, 0, 0))
+    {
+        if (pErr)
+            *pErr = zip.m_last_error;
+        return MZ_FALSE;
+    }
+
+    if (!mz_zip_validate_archive(&zip, flags))
+    {
+        actual_err = zip.m_last_error;
+        success = MZ_FALSE;
+    }
+
+    if (!mz_zip_reader_end_internal(&zip, success))
+    {
+        if (!actual_err)
+            actual_err = zip.m_last_error;
+        success = MZ_FALSE;
+    }
+
+    if (pErr)
+        *pErr = actual_err;
+
+    return success;
+}
+#endif /* #ifndef MINIZ_NO_STDIO */
+
+/* ------------------- .ZIP archive writing */
+
+#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS
+
+static MZ_FORCEINLINE void mz_write_le16(mz_uint8 *p, mz_uint16 v)
+{
+    p[0] = (mz_uint8)v;
+    p[1] = (mz_uint8)(v >> 8);
+}
+static MZ_FORCEINLINE void mz_write_le32(mz_uint8 *p, mz_uint32 v)
+{
+    p[0] = (mz_uint8)v;
+    p[1] = (mz_uint8)(v >> 8);
+    p[2] = (mz_uint8)(v >> 16);
+    p[3] = (mz_uint8)(v >> 24);
+}
+static MZ_FORCEINLINE void mz_write_le64(mz_uint8 *p, mz_uint64 v)
+{
+    mz_write_le32(p, (mz_uint32)v);
+    mz_write_le32(p + sizeof(mz_uint32), (mz_uint32)(v >> 32));
+}
+
+#define MZ_WRITE_LE16(p, v) mz_write_le16((mz_uint8 *)(p), (mz_uint16)(v))
+#define MZ_WRITE_LE32(p, v) mz_write_le32((mz_uint8 *)(p), (mz_uint32)(v))
+#define MZ_WRITE_LE64(p, v) mz_write_le64((mz_uint8 *)(p), (mz_uint64)(v))
+
+static size_t mz_zip_heap_write_func(void *pOpaque, mz_uint64 file_ofs, const void *pBuf, size_t n)
+{
+    mz_zip_archive *pZip = (mz_zip_archive *)pOpaque;
+    mz_zip_internal_state *pState = pZip->m_pState;
+    mz_uint64 new_size = MZ_MAX(file_ofs + n, pState->m_mem_size);
+
+    if (!n)
+        return 0;
+
+    /* An allocation this big is likely to just fail on 32-bit systems, so don't even go there. */
+    if ((sizeof(size_t) == sizeof(mz_uint32)) && (new_size > 0x7FFFFFFF))
+    {
+        mz_zip_set_error(pZip, MZ_ZIP_FILE_TOO_LARGE);
+        return 0;
+    }
+
+    if (new_size > pState->m_mem_capacity)
+    {
+        void *pNew_block;
+        size_t new_capacity = MZ_MAX(64, pState->m_mem_capacity);
+
+        while (new_capacity < new_size)
+            new_capacity *= 2;
+
+        if (NULL == (pNew_block = pZip->m_pRealloc(pZip->m_pAlloc_opaque, pState->m_pMem, 1, new_capacity)))
+        {
+            mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
+            return 0;
+        }
+
+        pState->m_pMem = pNew_block;
+        pState->m_mem_capacity = new_capacity;
+    }
+    memcpy((mz_uint8 *)pState->m_pMem + file_ofs, pBuf, n);
+    pState->m_mem_size = (size_t)new_size;
+    return n;
+}
+
+static mz_bool mz_zip_writer_end_internal(mz_zip_archive *pZip, mz_bool set_last_error)
+{
+    mz_zip_internal_state *pState;
+    mz_bool status = MZ_TRUE;
+
+    if ((!pZip) || (!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree) || ((pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) && (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED)))
+    {
+        if (set_last_error)
+            mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
+        return MZ_FALSE;
+    }
+
+    pState = pZip->m_pState;
+    pZip->m_pState = NULL;
+    mz_zip_array_clear(pZip, &pState->m_central_dir);
+    mz_zip_array_clear(pZip, &pState->m_central_dir_offsets);
+    mz_zip_array_clear(pZip, &pState->m_sorted_central_dir_offsets);
+
+#ifndef MINIZ_NO_STDIO
+    if (pState->m_pFile)
+    {
+        if (pZip->m_zip_type == MZ_ZIP_TYPE_FILE)
+        {
+            if (MZ_FCLOSE(pState->m_pFile) == EOF)
+            {
+                if (set_last_error)
+                    mz_zip_set_error(pZip, MZ_ZIP_FILE_CLOSE_FAILED);
+                status = MZ_FALSE;
+            }
+        }
+
+        pState->m_pFile = NULL;
+    }
+#endif /* #ifndef MINIZ_NO_STDIO */
+
+    if ((pZip->m_pWrite == mz_zip_heap_write_func) && (pState->m_pMem))
+    {
+        pZip->m_pFree(pZip->m_pAlloc_opaque, pState->m_pMem);
+        pState->m_pMem = NULL;
+    }
+
+    pZip->m_pFree(pZip->m_pAlloc_opaque, pState);
+    pZip->m_zip_mode = MZ_ZIP_MODE_INVALID;
+    return status;
+}
+
+mz_bool mz_zip_writer_init_v2(mz_zip_archive *pZip, mz_uint64 existing_size, mz_uint flags)
+{
+    mz_bool zip64 = (flags & MZ_ZIP_FLAG_WRITE_ZIP64) != 0;
+
+    if ((!pZip) || (pZip->m_pState) || (!pZip->m_pWrite) || (pZip->m_zip_mode != MZ_ZIP_MODE_INVALID))
+        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
+
+    if (flags & MZ_ZIP_FLAG_WRITE_ALLOW_READING)
+    {
+        if (!pZip->m_pRead)
+            return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
+    }
+
+    if (pZip->m_file_offset_alignment)
+    {
+        /* Ensure user specified file offset alignment is a power of 2. */
+        if (pZip->m_file_offset_alignment & (pZip->m_file_offset_alignment - 1))
+            return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
+    }
+
+    if (!pZip->m_pAlloc)
+        pZip->m_pAlloc = miniz_def_alloc_func;
+    if (!pZip->m_pFree)
+        pZip->m_pFree = miniz_def_free_func;
+    if (!pZip->m_pRealloc)
+        pZip->m_pRealloc = miniz_def_realloc_func;
+
+    pZip->m_archive_size = existing_size;
+    pZip->m_central_directory_file_ofs = 0;
+    pZip->m_total_files = 0;
+
+    if (NULL == (pZip->m_pState = (mz_zip_internal_state *)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(mz_zip_internal_state))))
+        return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
+
+    memset(pZip->m_pState, 0, sizeof(mz_zip_internal_state));
+
+    MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir, sizeof(mz_uint8));
+    MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir_offsets, sizeof(mz_uint32));
+    MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_sorted_central_dir_offsets, sizeof(mz_uint32));
+
+    pZip->m_pState->m_zip64 = zip64;
+    pZip->m_pState->m_zip64_has_extended_info_fields = zip64;
+
+    pZip->m_zip_type = MZ_ZIP_TYPE_USER;
+    pZip->m_zip_mode = MZ_ZIP_MODE_WRITING;
+
+    return MZ_TRUE;
+}
+
+mz_bool mz_zip_writer_init(mz_zip_archive *pZip, mz_uint64 existing_size)
+{
+    return mz_zip_writer_init_v2(pZip, existing_size, 0);
+}
+
+mz_bool mz_zip_writer_init_heap_v2(mz_zip_archive *pZip, size_t size_to_reserve_at_beginning, size_t initial_allocation_size, mz_uint flags)
+{
+    pZip->m_pWrite = mz_zip_heap_write_func;
+    pZip->m_pNeeds_keepalive = NULL;
+
+    if (flags & MZ_ZIP_FLAG_WRITE_ALLOW_READING)
+        pZip->m_pRead = mz_zip_mem_read_func;
+
+    pZip->m_pIO_opaque = pZip;
+
+    if (!mz_zip_writer_init_v2(pZip, size_to_reserve_at_beginning, flags))
+        return MZ_FALSE;
+
+    pZip->m_zip_type = MZ_ZIP_TYPE_HEAP;
+
+    if (0 != (initial_allocation_size = MZ_MAX(initial_allocation_size, size_to_reserve_at_beginning)))
+    {
+        if (NULL == (pZip->m_pState->m_pMem = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, initial_allocation_size)))
+        {
+            mz_zip_writer_end_internal(pZip, MZ_FALSE);
+            return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
+        }
+        pZip->m_pState->m_mem_capacity = initial_allocation_size;
+    }
+
+    return MZ_TRUE;
+}
+
+mz_bool mz_zip_writer_init_heap(mz_zip_archive *pZip, size_t size_to_reserve_at_beginning, size_t initial_allocation_size)
+{
+    return mz_zip_writer_init_heap_v2(pZip, size_to_reserve_at_beginning, initial_allocation_size, 0);
+}
+
+#ifndef MINIZ_NO_STDIO
+static size_t mz_zip_file_write_func(void *pOpaque, mz_uint64 file_ofs, const void *pBuf, size_t n)
+{
+    mz_zip_archive *pZip = (mz_zip_archive *)pOpaque;
+    mz_int64 cur_ofs = MZ_FTELL64(pZip->m_pState->m_pFile);
+
+    file_ofs += pZip->m_pState->m_file_archive_start_ofs;
+
+    if (((mz_int64)file_ofs < 0) || (((cur_ofs != (mz_int64)file_ofs)) && (MZ_FSEEK64(pZip->m_pState->m_pFile, (mz_int64)file_ofs, SEEK_SET))))
+    {
+        mz_zip_set_error(pZip, MZ_ZIP_FILE_SEEK_FAILED);
+        return 0;
+    }
+
+    return MZ_FWRITE(pBuf, 1, n, pZip->m_pState->m_pFile);
+}
+
+mz_bool mz_zip_writer_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint64 size_to_reserve_at_beginning)
+{
+    return mz_zip_writer_init_file_v2(pZip, pFilename, size_to_reserve_at_beginning, 0);
+}
+
+mz_bool mz_zip_writer_init_file_v2(mz_zip_archive *pZip, const char *pFilename, mz_uint64 size_to_reserve_at_beginning, mz_uint flags)
+{
+    MZ_FILE *pFile;
+
+    pZip->m_pWrite = mz_zip_file_write_func;
+    pZip->m_pNeeds_keepalive = NULL;
+
+    if (flags & MZ_ZIP_FLAG_WRITE_ALLOW_READING)
+        pZip->m_pRead = mz_zip_file_read_func;
+
+    pZip->m_pIO_opaque = pZip;
+
+    if (!mz_zip_writer_init_v2(pZip, size_to_reserve_at_beginning, flags))
+        return MZ_FALSE;
+
+    if (NULL == (pFile = MZ_FOPEN(pFilename, (flags & MZ_ZIP_FLAG_WRITE_ALLOW_READING) ? "w+b" : "wb")))
+    {
+        mz_zip_writer_end(pZip);
+        return mz_zip_set_error(pZip, MZ_ZIP_FILE_OPEN_FAILED);
+    }
+
+    pZip->m_pState->m_pFile = pFile;
+    pZip->m_zip_type = MZ_ZIP_TYPE_FILE;
+
+    if (size_to_reserve_at_beginning)
+    {
+        mz_uint64 cur_ofs = 0;
+        char buf[4096];
+
+        MZ_CLEAR_OBJ(buf);
+
+        do
+        {
+            size_t n = (size_t)MZ_MIN(sizeof(buf), size_to_reserve_at_beginning);
+            if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_ofs, buf, n) != n)
+            {
+                mz_zip_writer_end(pZip);
+                return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
+            }
+            cur_ofs += n;
+            size_to_reserve_at_beginning -= n;
+        } while (size_to_reserve_at_beginning);
+    }
+
+    return MZ_TRUE;
+}
+
+mz_bool mz_zip_writer_init_cfile(mz_zip_archive *pZip, MZ_FILE *pFile, mz_uint flags)
+{
+    pZip->m_pWrite = mz_zip_file_write_func;
+    pZip->m_pNeeds_keepalive = NULL;
+
+    if (flags & MZ_ZIP_FLAG_WRITE_ALLOW_READING)
+        pZip->m_pRead = mz_zip_file_read_func;
+
+    pZip->m_pIO_opaque = pZip;
+
+    if (!mz_zip_writer_init_v2(pZip, 0, flags))
+        return MZ_FALSE;
+
+    pZip->m_pState->m_pFile = pFile;
+    pZip->m_pState->m_file_archive_start_ofs = MZ_FTELL64(pZip->m_pState->m_pFile);
+    pZip->m_zip_type = MZ_ZIP_TYPE_CFILE;
+
+    return MZ_TRUE;
+}
+#endif /* #ifndef MINIZ_NO_STDIO */
+
+mz_bool mz_zip_writer_init_from_reader_v2(mz_zip_archive *pZip, const char *pFilename, mz_uint flags)
+{
+    mz_zip_internal_state *pState;
+
+    if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_READING))
+        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
+
+    if (flags & MZ_ZIP_FLAG_WRITE_ZIP64)
+    {
+        /* We don't support converting a non-zip64 file to zip64 - this seems like more trouble than it's worth. (What about the existing 32-bit data descriptors that could follow the compressed data?) */
+        if (!pZip->m_pState->m_zip64)
+            return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
+    }
+
+    /* No sense in trying to write to an archive that's already at the support max size */
+    if (pZip->m_pState->m_zip64)
+    {
+        if (pZip->m_total_files == MZ_UINT32_MAX)
+            return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES);
+    }
+    else
+    {
+        if (pZip->m_total_files == MZ_UINT16_MAX)
+            return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES);
+
+        if ((pZip->m_archive_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + MZ_ZIP_LOCAL_DIR_HEADER_SIZE) > MZ_UINT32_MAX)
+            return mz_zip_set_error(pZip, MZ_ZIP_FILE_TOO_LARGE);
+    }
+
+    pState = pZip->m_pState;
+
+    if (pState->m_pFile)
+    {
+#ifdef MINIZ_NO_STDIO
+        (void)pFilename;
+        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
+#else
+        if (pZip->m_pIO_opaque != pZip)
+            return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
+
+        if (pZip->m_zip_type == MZ_ZIP_TYPE_FILE)
+        {
+            if (!pFilename)
+                return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
+
+            /* Archive is being read from stdio and was originally opened only for reading. Try to reopen as writable. */
+            if (NULL == (pState->m_pFile = MZ_FREOPEN(pFilename, "r+b", pState->m_pFile)))
+            {
+                /* The mz_zip_archive is now in a bogus state because pState->m_pFile is NULL, so just close it. */
+                mz_zip_reader_end_internal(pZip, MZ_FALSE);
+                return mz_zip_set_error(pZip, MZ_ZIP_FILE_OPEN_FAILED);
+            }
+        }
+
+        pZip->m_pWrite = mz_zip_file_write_func;
+        pZip->m_pNeeds_keepalive = NULL;
+#endif /* #ifdef MINIZ_NO_STDIO */
+    }
+    else if (pState->m_pMem)
+    {
+        /* Archive lives in a memory block. Assume it's from the heap that we can resize using the realloc callback. */
+        if (pZip->m_pIO_opaque != pZip)
+            return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
+
+        pState->m_mem_capacity = pState->m_mem_size;
+        pZip->m_pWrite = mz_zip_heap_write_func;
+        pZip->m_pNeeds_keepalive = NULL;
+    }
+    /* Archive is being read via a user provided read function - make sure the user has specified a write function too. */
+    else if (!pZip->m_pWrite)
+        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
+
+    /* Start writing new files at the archive's current central directory location. */
+    /* TODO: We could add a flag that lets the user start writing immediately AFTER the existing central dir - this would be safer. */
+    pZip->m_archive_size = pZip->m_central_directory_file_ofs;
+    pZip->m_central_directory_file_ofs = 0;
+
+    /* Clear the sorted central dir offsets, they aren't useful or maintained now. */
+    /* Even though we're now in write mode, files can still be extracted and verified, but file locates will be slow. */
+    /* TODO: We could easily maintain the sorted central directory offsets. */
+    mz_zip_array_clear(pZip, &pZip->m_pState->m_sorted_central_dir_offsets);
+
+    pZip->m_zip_mode = MZ_ZIP_MODE_WRITING;
+
+    return MZ_TRUE;
+}
+
+mz_bool mz_zip_writer_init_from_reader(mz_zip_archive *pZip, const char *pFilename)
+{
+    return mz_zip_writer_init_from_reader_v2(pZip, pFilename, 0);
+}
+
+/* TODO: pArchive_name is a terrible name here! */
+mz_bool mz_zip_writer_add_mem(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, mz_uint level_and_flags)
+{
+    return mz_zip_writer_add_mem_ex(pZip, pArchive_name, pBuf, buf_size, NULL, 0, level_and_flags, 0, 0);
+}
+
+typedef struct
+{
+    mz_zip_archive *m_pZip;
+    mz_uint64 m_cur_archive_file_ofs;
+    mz_uint64 m_comp_size;
+} mz_zip_writer_add_state;
+
+static mz_bool mz_zip_writer_add_put_buf_callback(const void *pBuf, int len, void *pUser)
+{
+    mz_zip_writer_add_state *pState = (mz_zip_writer_add_state *)pUser;
+    if ((int)pState->m_pZip->m_pWrite(pState->m_pZip->m_pIO_opaque, pState->m_cur_archive_file_ofs, pBuf, len) != len)
+        return MZ_FALSE;
+
+    pState->m_cur_archive_file_ofs += len;
+    pState->m_comp_size += len;
+    return MZ_TRUE;
+}
+
+#define MZ_ZIP64_MAX_LOCAL_EXTRA_FIELD_SIZE (sizeof(mz_uint16) * 2 + sizeof(mz_uint64) * 2)
+#define MZ_ZIP64_MAX_CENTRAL_EXTRA_FIELD_SIZE (sizeof(mz_uint16) * 2 + sizeof(mz_uint64) * 3)
+static mz_uint32 mz_zip_writer_create_zip64_extra_data(mz_uint8 *pBuf, mz_uint64 *pUncomp_size, mz_uint64 *pComp_size, mz_uint64 *pLocal_header_ofs)
+{
+    mz_uint8 *pDst = pBuf;
+    mz_uint32 field_size = 0;
+
+    MZ_WRITE_LE16(pDst + 0, MZ_ZIP64_EXTENDED_INFORMATION_FIELD_HEADER_ID);
+    MZ_WRITE_LE16(pDst + 2, 0);
+    pDst += sizeof(mz_uint16) * 2;
+
+    if (pUncomp_size)
+    {
+        MZ_WRITE_LE64(pDst, *pUncomp_size);
+        pDst += sizeof(mz_uint64);
+        field_size += sizeof(mz_uint64);
+    }
+
+    if (pComp_size)
+    {
+        MZ_WRITE_LE64(pDst, *pComp_size);
+        pDst += sizeof(mz_uint64);
+        field_size += sizeof(mz_uint64);
+    }
+
+    if (pLocal_header_ofs)
+    {
+        MZ_WRITE_LE64(pDst, *pLocal_header_ofs);
+        pDst += sizeof(mz_uint64);
+        field_size += sizeof(mz_uint64);
+    }
+
+    MZ_WRITE_LE16(pBuf + 2, field_size);
+
+    return (mz_uint32)(pDst - pBuf);
+}
+
+static mz_bool mz_zip_writer_create_local_dir_header(mz_zip_archive *pZip, mz_uint8 *pDst, mz_uint16 filename_size, mz_uint16 extra_size, mz_uint64 uncomp_size, mz_uint64 comp_size, mz_uint32 uncomp_crc32, mz_uint16 method, mz_uint16 bit_flags, mz_uint16 dos_time, mz_uint16 dos_date)
+{
+    (void)pZip;
+    memset(pDst, 0, MZ_ZIP_LOCAL_DIR_HEADER_SIZE);
+    MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_SIG_OFS, MZ_ZIP_LOCAL_DIR_HEADER_SIG);
+    MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_VERSION_NEEDED_OFS, method ? 20 : 0);
+    MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_BIT_FLAG_OFS, bit_flags);
+    MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_METHOD_OFS, method);
+    MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILE_TIME_OFS, dos_time);
+    MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILE_DATE_OFS, dos_date);
+    MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_CRC32_OFS, uncomp_crc32);
+    MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_COMPRESSED_SIZE_OFS, MZ_MIN(comp_size, MZ_UINT32_MAX));
+    MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS, MZ_MIN(uncomp_size, MZ_UINT32_MAX));
+    MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILENAME_LEN_OFS, filename_size);
+    MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_EXTRA_LEN_OFS, extra_size);
+    return MZ_TRUE;
+}
+
+static mz_bool mz_zip_writer_create_central_dir_header(mz_zip_archive *pZip, mz_uint8 *pDst,
+                                                       mz_uint16 filename_size, mz_uint16 extra_size, mz_uint16 comment_size,
+                                                       mz_uint64 uncomp_size, mz_uint64 comp_size, mz_uint32 uncomp_crc32,
+                                                       mz_uint16 method, mz_uint16 bit_flags, mz_uint16 dos_time, mz_uint16 dos_date,
+                                                       mz_uint64 local_header_ofs, mz_uint32 ext_attributes)
+{
+    (void)pZip;
+    memset(pDst, 0, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE);
+    MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_SIG_OFS, MZ_ZIP_CENTRAL_DIR_HEADER_SIG);
+    MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_VERSION_NEEDED_OFS, method ? 20 : 0);
+    MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_BIT_FLAG_OFS, bit_flags);
+    MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_METHOD_OFS, method);
+    MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILE_TIME_OFS, dos_time);
+    MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILE_DATE_OFS, dos_date);
+    MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_CRC32_OFS, uncomp_crc32);
+    MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS, MZ_MIN(comp_size, MZ_UINT32_MAX));
+    MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS, MZ_MIN(uncomp_size, MZ_UINT32_MAX));
+    MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILENAME_LEN_OFS, filename_size);
+    MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_EXTRA_LEN_OFS, extra_size);
+    MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_COMMENT_LEN_OFS, comment_size);
+    MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS, ext_attributes);
+    MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_LOCAL_HEADER_OFS, MZ_MIN(local_header_ofs, MZ_UINT32_MAX));
+    return MZ_TRUE;
+}
+
+static mz_bool mz_zip_writer_add_to_central_dir(mz_zip_archive *pZip, const char *pFilename, mz_uint16 filename_size,
+                                                const void *pExtra, mz_uint16 extra_size, const void *pComment, mz_uint16 comment_size,
+                                                mz_uint64 uncomp_size, mz_uint64 comp_size, mz_uint32 uncomp_crc32,
+                                                mz_uint16 method, mz_uint16 bit_flags, mz_uint16 dos_time, mz_uint16 dos_date,
+                                                mz_uint64 local_header_ofs, mz_uint32 ext_attributes,
+                                                const char *user_extra_data, mz_uint user_extra_data_len)
+{
+    mz_zip_internal_state *pState = pZip->m_pState;
+    mz_uint32 central_dir_ofs = (mz_uint32)pState->m_central_dir.m_size;
+    size_t orig_central_dir_size = pState->m_central_dir.m_size;
+    mz_uint8 central_dir_header[MZ_ZIP_CENTRAL_DIR_HEADER_SIZE];
+
+    if (!pZip->m_pState->m_zip64)
+    {
+        if (local_header_ofs > 0xFFFFFFFF)
+            return mz_zip_set_error(pZip, MZ_ZIP_FILE_TOO_LARGE);
+    }
+
+    /* miniz doesn't support central dirs >= MZ_UINT32_MAX bytes yet */
+    if (((mz_uint64)pState->m_central_dir.m_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_size + extra_size + user_extra_data_len + comment_size) >= MZ_UINT32_MAX)
+        return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_CDIR_SIZE);
+
+    if (!mz_zip_writer_create_central_dir_header(pZip, central_dir_header, filename_size, (mz_uint16)(extra_size + user_extra_data_len), comment_size, uncomp_size, comp_size, uncomp_crc32, method, bit_flags, dos_time, dos_date, local_header_ofs, ext_attributes))
+        return mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR);
+
+    if ((!mz_zip_array_push_back(pZip, &pState->m_central_dir, central_dir_header, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE)) ||
+        (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pFilename, filename_size)) ||
+        (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pExtra, extra_size)) ||
+        (!mz_zip_array_push_back(pZip, &pState->m_central_dir, user_extra_data, user_extra_data_len)) ||
+        (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pComment, comment_size)) ||
+        (!mz_zip_array_push_back(pZip, &pState->m_central_dir_offsets, &central_dir_ofs, 1)))
+    {
+        /* Try to resize the central directory array back into its original state. */
+        mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE);
+        return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
+    }
+
+    return MZ_TRUE;
+}
+
+static mz_bool mz_zip_writer_validate_archive_name(const char *pArchive_name)
+{
+    /* Basic ZIP archive filename validity checks: Valid filenames cannot start with a forward slash, cannot contain a drive letter, and cannot use DOS-style backward slashes. */
+    if (*pArchive_name == '/')
+        return MZ_FALSE;
+
+    /* Making sure the name does not contain drive letters or DOS style backward slashes is the responsibility of the program using miniz*/
+
+    return MZ_TRUE;
+}
+
+static mz_uint mz_zip_writer_compute_padding_needed_for_file_alignment(mz_zip_archive *pZip)
+{
+    mz_uint32 n;
+    if (!pZip->m_file_offset_alignment)
+        return 0;
+    n = (mz_uint32)(pZip->m_archive_size & (pZip->m_file_offset_alignment - 1));
+    return (mz_uint)((pZip->m_file_offset_alignment - n) & (pZip->m_file_offset_alignment - 1));
+}
+
+static mz_bool mz_zip_writer_write_zeros(mz_zip_archive *pZip, mz_uint64 cur_file_ofs, mz_uint32 n)
+{
+    char buf[4096];
+    memset(buf, 0, MZ_MIN(sizeof(buf), n));
+    while (n)
+    {
+        mz_uint32 s = MZ_MIN(sizeof(buf), n);
+        if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_file_ofs, buf, s) != s)
+            return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
+
+        cur_file_ofs += s;
+        n -= s;
+    }
+    return MZ_TRUE;
+}
+
+mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags,
+                                 mz_uint64 uncomp_size, mz_uint32 uncomp_crc32)
+{
+    return mz_zip_writer_add_mem_ex_v2(pZip, pArchive_name, pBuf, buf_size, pComment, comment_size, level_and_flags, uncomp_size, uncomp_crc32, NULL, NULL, 0, NULL, 0);
+}
+
+mz_bool mz_zip_writer_add_mem_ex_v2(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size,
+                                    mz_uint level_and_flags, mz_uint64 uncomp_size, mz_uint32 uncomp_crc32, MZ_TIME_T *last_modified,
+                                    const char *user_extra_data, mz_uint user_extra_data_len, const char *user_extra_data_central, mz_uint user_extra_data_central_len)
+{
+    mz_uint16 method = 0, dos_time = 0, dos_date = 0;
+    mz_uint level, ext_attributes = 0, num_alignment_padding_bytes;
+    mz_uint64 local_dir_header_ofs = pZip->m_archive_size, cur_archive_file_ofs = pZip->m_archive_size, comp_size = 0;
+    size_t archive_name_size;
+    mz_uint8 local_dir_header[MZ_ZIP_LOCAL_DIR_HEADER_SIZE];
+    tdefl_compressor *pComp = NULL;
+    mz_bool store_data_uncompressed;
+    mz_zip_internal_state *pState;
+    mz_uint8 *pExtra_data = NULL;
+    mz_uint32 extra_size = 0;
+    mz_uint8 extra_data[MZ_ZIP64_MAX_CENTRAL_EXTRA_FIELD_SIZE];
+    mz_uint16 bit_flags = 0;
+
+    if ((int)level_and_flags < 0)
+        level_and_flags = MZ_DEFAULT_LEVEL;
+
+    if (uncomp_size || (buf_size && !(level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)))
+        bit_flags |= MZ_ZIP_LDH_BIT_FLAG_HAS_LOCATOR;
+
+    if (!(level_and_flags & MZ_ZIP_FLAG_ASCII_FILENAME))
+        bit_flags |= MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_UTF8;
+
+    level = level_and_flags & 0xF;
+    store_data_uncompressed = ((!level) || (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA));
+
+    if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) || ((buf_size) && (!pBuf)) || (!pArchive_name) || ((comment_size) && (!pComment)) || (level > MZ_UBER_COMPRESSION))
+        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
+
+    pState = pZip->m_pState;
+
+    if (pState->m_zip64)
+    {
+        if (pZip->m_total_files == MZ_UINT32_MAX)
+            return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES);
+    }
+    else
+    {
+        if (pZip->m_total_files == MZ_UINT16_MAX)
+        {
+            pState->m_zip64 = MZ_TRUE;
+            /*return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES); */
+        }
+        if ((buf_size > 0xFFFFFFFF) || (uncomp_size > 0xFFFFFFFF))
+        {
+            pState->m_zip64 = MZ_TRUE;
+            /*return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); */
+        }
+    }
+
+    if ((!(level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (uncomp_size))
+        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
+
+    if (!mz_zip_writer_validate_archive_name(pArchive_name))
+        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_FILENAME);
+
+#ifndef MINIZ_NO_TIME
+    if (last_modified != NULL)
+    {
+        mz_zip_time_t_to_dos_time(*last_modified, &dos_time, &dos_date);
+    }
+    else
+    {
+        MZ_TIME_T cur_time;
+        time(&cur_time);
+        mz_zip_time_t_to_dos_time(cur_time, &dos_time, &dos_date);
+    }
+#endif /* #ifndef MINIZ_NO_TIME */
+
+	if (!(level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA))
+	{
+		uncomp_crc32 = (mz_uint32)mz_crc32(MZ_CRC32_INIT, (const mz_uint8 *)pBuf, buf_size);
+		uncomp_size = buf_size;
+		if (uncomp_size <= 3)
+		{
+			level = 0;
+			store_data_uncompressed = MZ_TRUE;
+		}
+	}
+
+    archive_name_size = strlen(pArchive_name);
+    if (archive_name_size > MZ_UINT16_MAX)
+        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_FILENAME);
+
+    num_alignment_padding_bytes = mz_zip_writer_compute_padding_needed_for_file_alignment(pZip);
+
+    /* miniz doesn't support central dirs >= MZ_UINT32_MAX bytes yet */
+    if (((mz_uint64)pState->m_central_dir.m_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + archive_name_size + MZ_ZIP64_MAX_CENTRAL_EXTRA_FIELD_SIZE + comment_size) >= MZ_UINT32_MAX)
+        return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_CDIR_SIZE);
+
+    if (!pState->m_zip64)
+    {
+        /* Bail early if the archive would obviously become too large */
+        if ((pZip->m_archive_size + num_alignment_padding_bytes + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + archive_name_size 
+			+ MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + archive_name_size + comment_size + user_extra_data_len + 
+			pState->m_central_dir.m_size + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE + user_extra_data_central_len
+			+ MZ_ZIP_DATA_DESCRIPTER_SIZE32) > 0xFFFFFFFF)
+        {
+            pState->m_zip64 = MZ_TRUE;
+            /*return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); */
+        }
+    }
+
+    if ((archive_name_size) && (pArchive_name[archive_name_size - 1] == '/'))
+    {
+        /* Set DOS Subdirectory attribute bit. */
+        ext_attributes |= MZ_ZIP_DOS_DIR_ATTRIBUTE_BITFLAG;
+
+        /* Subdirectories cannot contain data. */
+        if ((buf_size) || (uncomp_size))
+            return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
+    }
+
+    /* Try to do any allocations before writing to the archive, so if an allocation fails the file remains unmodified. (A good idea if we're doing an in-place modification.) */
+    if ((!mz_zip_array_ensure_room(pZip, &pState->m_central_dir, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + archive_name_size + comment_size + (pState->m_zip64 ? MZ_ZIP64_MAX_CENTRAL_EXTRA_FIELD_SIZE : 0))) || (!mz_zip_array_ensure_room(pZip, &pState->m_central_dir_offsets, 1)))
+        return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
+
+    if ((!store_data_uncompressed) && (buf_size))
+    {
+        if (NULL == (pComp = (tdefl_compressor *)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(tdefl_compressor))))
+            return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
+    }
+
+    if (!mz_zip_writer_write_zeros(pZip, cur_archive_file_ofs, num_alignment_padding_bytes))
+    {
+        pZip->m_pFree(pZip->m_pAlloc_opaque, pComp);
+        return MZ_FALSE;
+    }
+
+    local_dir_header_ofs += num_alignment_padding_bytes;
+    if (pZip->m_file_offset_alignment)
+    {
+        MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) == 0);
+    }
+    cur_archive_file_ofs += num_alignment_padding_bytes;
+
+    MZ_CLEAR_OBJ(local_dir_header);
+
+    if (!store_data_uncompressed || (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA))
+    {
+        method = MZ_DEFLATED;
+    }
+
+    if (pState->m_zip64)
+    {
+        if (uncomp_size >= MZ_UINT32_MAX || local_dir_header_ofs >= MZ_UINT32_MAX)
+        {
+            pExtra_data = extra_data;
+            extra_size = mz_zip_writer_create_zip64_extra_data(extra_data, (uncomp_size >= MZ_UINT32_MAX) ? &uncomp_size : NULL,
+                                                               (uncomp_size >= MZ_UINT32_MAX) ? &comp_size : NULL, (local_dir_header_ofs >= MZ_UINT32_MAX) ? &local_dir_header_ofs : NULL);
+        }
+
+        if (!mz_zip_writer_create_local_dir_header(pZip, local_dir_header, (mz_uint16)archive_name_size, (mz_uint16)(extra_size + user_extra_data_len), 0, 0, 0, method, bit_flags, dos_time, dos_date))
+            return mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR);
+
+        if (pZip->m_pWrite(pZip->m_pIO_opaque, local_dir_header_ofs, local_dir_header, sizeof(local_dir_header)) != sizeof(local_dir_header))
+            return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
+
+        cur_archive_file_ofs += sizeof(local_dir_header);
+
+        if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name, archive_name_size) != archive_name_size)
+        {
+            pZip->m_pFree(pZip->m_pAlloc_opaque, pComp);
+            return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
+        }
+        cur_archive_file_ofs += archive_name_size;
+
+        if (pExtra_data != NULL)
+        {
+            if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, extra_data, extra_size) != extra_size)
+                return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
+
+            cur_archive_file_ofs += extra_size;
+        }
+    }
+    else
+    {
+        if ((comp_size > MZ_UINT32_MAX) || (cur_archive_file_ofs > MZ_UINT32_MAX))
+            return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE);
+        if (!mz_zip_writer_create_local_dir_header(pZip, local_dir_header, (mz_uint16)archive_name_size, (mz_uint16)user_extra_data_len, 0, 0, 0, method, bit_flags, dos_time, dos_date))
+            return mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR);
+
+        if (pZip->m_pWrite(pZip->m_pIO_opaque, local_dir_header_ofs, local_dir_header, sizeof(local_dir_header)) != sizeof(local_dir_header))
+            return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
+
+        cur_archive_file_ofs += sizeof(local_dir_header);
+
+        if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name, archive_name_size) != archive_name_size)
+        {
+            pZip->m_pFree(pZip->m_pAlloc_opaque, pComp);
+            return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
+        }
+        cur_archive_file_ofs += archive_name_size;
+    }
+
+	if (user_extra_data_len > 0)
+	{
+		if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, user_extra_data, user_extra_data_len) != user_extra_data_len)
+			return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
+
+		cur_archive_file_ofs += user_extra_data_len;
+	}
+
+    if (store_data_uncompressed)
+    {
+        if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pBuf, buf_size) != buf_size)
+        {
+            pZip->m_pFree(pZip->m_pAlloc_opaque, pComp);
+            return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
+        }
+
+        cur_archive_file_ofs += buf_size;
+        comp_size = buf_size;
+    }
+    else if (buf_size)
+    {
+        mz_zip_writer_add_state state;
+
+        state.m_pZip = pZip;
+        state.m_cur_archive_file_ofs = cur_archive_file_ofs;
+        state.m_comp_size = 0;
+
+        if ((tdefl_init(pComp, mz_zip_writer_add_put_buf_callback, &state, tdefl_create_comp_flags_from_zip_params(level, -15, MZ_DEFAULT_STRATEGY)) != TDEFL_STATUS_OKAY) ||
+            (tdefl_compress_buffer(pComp, pBuf, buf_size, TDEFL_FINISH) != TDEFL_STATUS_DONE))
+        {
+            pZip->m_pFree(pZip->m_pAlloc_opaque, pComp);
+            return mz_zip_set_error(pZip, MZ_ZIP_COMPRESSION_FAILED);
+        }
+
+        comp_size = state.m_comp_size;
+        cur_archive_file_ofs = state.m_cur_archive_file_ofs;
+    }
+
+    pZip->m_pFree(pZip->m_pAlloc_opaque, pComp);
+    pComp = NULL;
+
+    if (uncomp_size)
+    {
+        mz_uint8 local_dir_footer[MZ_ZIP_DATA_DESCRIPTER_SIZE64];
+        mz_uint32 local_dir_footer_size = MZ_ZIP_DATA_DESCRIPTER_SIZE32;
+
+        MZ_ASSERT(bit_flags & MZ_ZIP_LDH_BIT_FLAG_HAS_LOCATOR);
+
+        MZ_WRITE_LE32(local_dir_footer + 0, MZ_ZIP_DATA_DESCRIPTOR_ID);
+        MZ_WRITE_LE32(local_dir_footer + 4, uncomp_crc32);
+        if (pExtra_data == NULL)
+        {
+            if (comp_size > MZ_UINT32_MAX)
+                return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE);
+
+            MZ_WRITE_LE32(local_dir_footer + 8, comp_size);
+            MZ_WRITE_LE32(local_dir_footer + 12, uncomp_size);
+        }
+        else
+        {
+            MZ_WRITE_LE64(local_dir_footer + 8, comp_size);
+            MZ_WRITE_LE64(local_dir_footer + 16, uncomp_size);
+            local_dir_footer_size = MZ_ZIP_DATA_DESCRIPTER_SIZE64;
+        }
+
+        if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, local_dir_footer, local_dir_footer_size) != local_dir_footer_size)
+            return MZ_FALSE;
+
+        cur_archive_file_ofs += local_dir_footer_size;
+    }
+
+    if (pExtra_data != NULL)
+    {
+        extra_size = mz_zip_writer_create_zip64_extra_data(extra_data, (uncomp_size >= MZ_UINT32_MAX) ? &uncomp_size : NULL,
+                                                           (uncomp_size >= MZ_UINT32_MAX) ? &comp_size : NULL, (local_dir_header_ofs >= MZ_UINT32_MAX) ? &local_dir_header_ofs : NULL);
+    }
+
+    if (!mz_zip_writer_add_to_central_dir(pZip, pArchive_name, (mz_uint16)archive_name_size, pExtra_data, (mz_uint16)extra_size, pComment,
+                                          comment_size, uncomp_size, comp_size, uncomp_crc32, method, bit_flags, dos_time, dos_date, local_dir_header_ofs, ext_attributes,
+                                          user_extra_data_central, user_extra_data_central_len))
+        return MZ_FALSE;
+
+    pZip->m_total_files++;
+    pZip->m_archive_size = cur_archive_file_ofs;
+
+    return MZ_TRUE;
+}
+
+mz_bool mz_zip_writer_add_read_buf_callback(mz_zip_archive *pZip, const char *pArchive_name, mz_file_read_func read_callback, void* callback_opaque, mz_uint64 size_to_add, const MZ_TIME_T *pFile_time, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags,
+                                const char *user_extra_data, mz_uint user_extra_data_len, const char *user_extra_data_central, mz_uint user_extra_data_central_len)
+{
+    mz_uint16 gen_flags = MZ_ZIP_LDH_BIT_FLAG_HAS_LOCATOR;
+    mz_uint uncomp_crc32 = MZ_CRC32_INIT, level, num_alignment_padding_bytes;
+    mz_uint16 method = 0, dos_time = 0, dos_date = 0, ext_attributes = 0;
+    mz_uint64 local_dir_header_ofs, cur_archive_file_ofs = pZip->m_archive_size, uncomp_size = size_to_add, comp_size = 0;
+    size_t archive_name_size;
+    mz_uint8 local_dir_header[MZ_ZIP_LOCAL_DIR_HEADER_SIZE];
+    mz_uint8 *pExtra_data = NULL;
+    mz_uint32 extra_size = 0;
+    mz_uint8 extra_data[MZ_ZIP64_MAX_CENTRAL_EXTRA_FIELD_SIZE];
+    mz_zip_internal_state *pState;
+	mz_uint64 file_ofs = 0;
+
+    if (!(level_and_flags & MZ_ZIP_FLAG_ASCII_FILENAME))
+        gen_flags |= MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_UTF8;
+
+    if ((int)level_and_flags < 0)
+        level_and_flags = MZ_DEFAULT_LEVEL;
+    level = level_and_flags & 0xF;
+
+    /* Sanity checks */
+    if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) || (!pArchive_name) || ((comment_size) && (!pComment)) || (level > MZ_UBER_COMPRESSION))
+        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
+
+    pState = pZip->m_pState;
+
+    if ((!pState->m_zip64) && (uncomp_size > MZ_UINT32_MAX))
+    {
+        /* Source file is too large for non-zip64 */
+        /*return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); */
+        pState->m_zip64 = MZ_TRUE;
+    }
+
+    /* We could support this, but why? */
+    if (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)
+        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
+
+    if (!mz_zip_writer_validate_archive_name(pArchive_name))
+        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_FILENAME);
+
+    if (pState->m_zip64)
+    {
+        if (pZip->m_total_files == MZ_UINT32_MAX)
+            return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES);
+    }
+    else
+    {
+        if (pZip->m_total_files == MZ_UINT16_MAX)
+        {
+            pState->m_zip64 = MZ_TRUE;
+            /*return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES); */
+        }
+    }
+
+    archive_name_size = strlen(pArchive_name);
+    if (archive_name_size > MZ_UINT16_MAX)
+        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_FILENAME);
+
+    num_alignment_padding_bytes = mz_zip_writer_compute_padding_needed_for_file_alignment(pZip);
+
+    /* miniz doesn't support central dirs >= MZ_UINT32_MAX bytes yet */
+    if (((mz_uint64)pState->m_central_dir.m_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + archive_name_size + MZ_ZIP64_MAX_CENTRAL_EXTRA_FIELD_SIZE + comment_size) >= MZ_UINT32_MAX)
+        return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_CDIR_SIZE);
+
+    if (!pState->m_zip64)
+    {
+        /* Bail early if the archive would obviously become too large */
+        if ((pZip->m_archive_size + num_alignment_padding_bytes + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + archive_name_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE
+			+ archive_name_size + comment_size + user_extra_data_len + pState->m_central_dir.m_size + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE + 1024
+			+ MZ_ZIP_DATA_DESCRIPTER_SIZE32 + user_extra_data_central_len) > 0xFFFFFFFF)
+        {
+            pState->m_zip64 = MZ_TRUE;
+            /*return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); */
+        }
+    }
+
+#ifndef MINIZ_NO_TIME
+    if (pFile_time)
+    {
+        mz_zip_time_t_to_dos_time(*pFile_time, &dos_time, &dos_date);
+    }
+#endif
+
+    if (uncomp_size <= 3)
+        level = 0;
+
+    if (!mz_zip_writer_write_zeros(pZip, cur_archive_file_ofs, num_alignment_padding_bytes))
+    {
+        return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
+    }
+
+    cur_archive_file_ofs += num_alignment_padding_bytes;
+    local_dir_header_ofs = cur_archive_file_ofs;
+
+    if (pZip->m_file_offset_alignment)
+    {
+        MZ_ASSERT((cur_archive_file_ofs & (pZip->m_file_offset_alignment - 1)) == 0);
+    }
+
+    if (uncomp_size && level)
+    {
+        method = MZ_DEFLATED;
+    }
+
+    MZ_CLEAR_OBJ(local_dir_header);
+    if (pState->m_zip64)
+    {
+        if (uncomp_size >= MZ_UINT32_MAX || local_dir_header_ofs >= MZ_UINT32_MAX)
+        {
+            pExtra_data = extra_data;
+            extra_size = mz_zip_writer_create_zip64_extra_data(extra_data, (uncomp_size >= MZ_UINT32_MAX) ? &uncomp_size : NULL,
+                                                               (uncomp_size >= MZ_UINT32_MAX) ? &comp_size : NULL, (local_dir_header_ofs >= MZ_UINT32_MAX) ? &local_dir_header_ofs : NULL);
+        }
+
+        if (!mz_zip_writer_create_local_dir_header(pZip, local_dir_header, (mz_uint16)archive_name_size, (mz_uint16)(extra_size + user_extra_data_len), 0, 0, 0, method, gen_flags, dos_time, dos_date))
+            return mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR);
+
+        if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, local_dir_header, sizeof(local_dir_header)) != sizeof(local_dir_header))
+            return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
+
+        cur_archive_file_ofs += sizeof(local_dir_header);
+
+        if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name, archive_name_size) != archive_name_size)
+        {
+            return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
+        }
+
+        cur_archive_file_ofs += archive_name_size;
+
+        if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, extra_data, extra_size) != extra_size)
+            return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
+
+        cur_archive_file_ofs += extra_size;
+    }
+    else
+    {
+        if ((comp_size > MZ_UINT32_MAX) || (cur_archive_file_ofs > MZ_UINT32_MAX))
+            return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE);
+        if (!mz_zip_writer_create_local_dir_header(pZip, local_dir_header, (mz_uint16)archive_name_size, (mz_uint16)user_extra_data_len, 0, 0, 0, method, gen_flags, dos_time, dos_date))
+            return mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR);
+
+        if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, local_dir_header, sizeof(local_dir_header)) != sizeof(local_dir_header))
+            return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
+
+        cur_archive_file_ofs += sizeof(local_dir_header);
+
+        if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name, archive_name_size) != archive_name_size)
+        {
+            return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
+        }
+
+        cur_archive_file_ofs += archive_name_size;
+    }
+
+    if (user_extra_data_len > 0)
+    {
+        if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, user_extra_data, user_extra_data_len) != user_extra_data_len)
+            return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
+
+        cur_archive_file_ofs += user_extra_data_len;
+    }
+
+    if (uncomp_size)
+    {
+        mz_uint64 uncomp_remaining = uncomp_size;
+        void *pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, MZ_ZIP_MAX_IO_BUF_SIZE);
+        if (!pRead_buf)
+        {
+            return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
+        }
+
+        if (!level)
+        {
+            while (uncomp_remaining)
+            {
+                mz_uint n = (mz_uint)MZ_MIN((mz_uint64)MZ_ZIP_MAX_IO_BUF_SIZE, uncomp_remaining);
+                if ((read_callback(callback_opaque, file_ofs, pRead_buf, n) != n) || (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pRead_buf, n) != n))
+                {
+                    pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf);
+                    return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED);
+                }
+				file_ofs += n;
+                uncomp_crc32 = (mz_uint32)mz_crc32(uncomp_crc32, (const mz_uint8 *)pRead_buf, n);
+                uncomp_remaining -= n;
+                cur_archive_file_ofs += n;
+            }
+            comp_size = uncomp_size;
+        }
+        else
+        {
+            mz_bool result = MZ_FALSE;
+            mz_zip_writer_add_state state;
+            tdefl_compressor *pComp = (tdefl_compressor *)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(tdefl_compressor));
+            if (!pComp)
+            {
+                pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf);
+                return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
+            }
+
+            state.m_pZip = pZip;
+            state.m_cur_archive_file_ofs = cur_archive_file_ofs;
+            state.m_comp_size = 0;
+
+            if (tdefl_init(pComp, mz_zip_writer_add_put_buf_callback, &state, tdefl_create_comp_flags_from_zip_params(level, -15, MZ_DEFAULT_STRATEGY)) != TDEFL_STATUS_OKAY)
+            {
+                pZip->m_pFree(pZip->m_pAlloc_opaque, pComp);
+                pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf);
+                return mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR);
+            }
+
+            for (;;)
+            {
+                size_t in_buf_size = (mz_uint32)MZ_MIN(uncomp_remaining, (mz_uint64)MZ_ZIP_MAX_IO_BUF_SIZE);
+                tdefl_status status;
+                tdefl_flush flush = TDEFL_NO_FLUSH;
+
+                if (read_callback(callback_opaque, file_ofs, pRead_buf, in_buf_size)!= in_buf_size)
+                {
+                    mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED);
+                    break;
+                }
+
+				file_ofs += in_buf_size;
+                uncomp_crc32 = (mz_uint32)mz_crc32(uncomp_crc32, (const mz_uint8 *)pRead_buf, in_buf_size);
+                uncomp_remaining -= in_buf_size;
+
+                if (pZip->m_pNeeds_keepalive != NULL && pZip->m_pNeeds_keepalive(pZip->m_pIO_opaque))
+                    flush = TDEFL_FULL_FLUSH;
+
+                status = tdefl_compress_buffer(pComp, pRead_buf, in_buf_size, uncomp_remaining ? flush : TDEFL_FINISH);
+                if (status == TDEFL_STATUS_DONE)
+                {
+                    result = MZ_TRUE;
+                    break;
+                }
+                else if (status != TDEFL_STATUS_OKAY)
+                {
+                    mz_zip_set_error(pZip, MZ_ZIP_COMPRESSION_FAILED);
+                    break;
+                }
+            }
+
+            pZip->m_pFree(pZip->m_pAlloc_opaque, pComp);
+
+            if (!result)
+            {
+                pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf);
+                return MZ_FALSE;
+            }
+
+            comp_size = state.m_comp_size;
+            cur_archive_file_ofs = state.m_cur_archive_file_ofs;
+        }
+
+        pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf);
+    }
+
+    {
+        mz_uint8 local_dir_footer[MZ_ZIP_DATA_DESCRIPTER_SIZE64];
+        mz_uint32 local_dir_footer_size = MZ_ZIP_DATA_DESCRIPTER_SIZE32;
+
+        MZ_WRITE_LE32(local_dir_footer + 0, MZ_ZIP_DATA_DESCRIPTOR_ID);
+        MZ_WRITE_LE32(local_dir_footer + 4, uncomp_crc32);
+        if (pExtra_data == NULL)
+        {
+            if (comp_size > MZ_UINT32_MAX)
+                return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE);
+
+            MZ_WRITE_LE32(local_dir_footer + 8, comp_size);
+            MZ_WRITE_LE32(local_dir_footer + 12, uncomp_size);
+        }
+        else
+        {
+            MZ_WRITE_LE64(local_dir_footer + 8, comp_size);
+            MZ_WRITE_LE64(local_dir_footer + 16, uncomp_size);
+            local_dir_footer_size = MZ_ZIP_DATA_DESCRIPTER_SIZE64;
+        }
+
+        if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, local_dir_footer, local_dir_footer_size) != local_dir_footer_size)
+            return MZ_FALSE;
+
+        cur_archive_file_ofs += local_dir_footer_size;
+    }
+
+    if (pExtra_data != NULL)
+    {
+        extra_size = mz_zip_writer_create_zip64_extra_data(extra_data, (uncomp_size >= MZ_UINT32_MAX) ? &uncomp_size : NULL,
+                                                           (uncomp_size >= MZ_UINT32_MAX) ? &comp_size : NULL, (local_dir_header_ofs >= MZ_UINT32_MAX) ? &local_dir_header_ofs : NULL);
+    }
+
+    if (!mz_zip_writer_add_to_central_dir(pZip, pArchive_name, (mz_uint16)archive_name_size, pExtra_data, (mz_uint16)extra_size, pComment, comment_size,
+                                          uncomp_size, comp_size, uncomp_crc32, method, gen_flags, dos_time, dos_date, local_dir_header_ofs, ext_attributes,
+                                          user_extra_data_central, user_extra_data_central_len))
+        return MZ_FALSE;
+
+    pZip->m_total_files++;
+    pZip->m_archive_size = cur_archive_file_ofs;
+
+    return MZ_TRUE;
+}
+
+#ifndef MINIZ_NO_STDIO
+
+static size_t mz_file_read_func_stdio(void *pOpaque, mz_uint64 file_ofs, void *pBuf, size_t n)
+{
+	MZ_FILE *pSrc_file = (MZ_FILE *)pOpaque;
+	mz_int64 cur_ofs = MZ_FTELL64(pSrc_file);
+
+	if (((mz_int64)file_ofs < 0) || (((cur_ofs != (mz_int64)file_ofs)) && (MZ_FSEEK64(pSrc_file, (mz_int64)file_ofs, SEEK_SET))))
+		return 0;
+
+	return MZ_FREAD(pBuf, 1, n, pSrc_file);
+}
+
+mz_bool mz_zip_writer_add_cfile(mz_zip_archive *pZip, const char *pArchive_name, MZ_FILE *pSrc_file, mz_uint64 size_to_add, const MZ_TIME_T *pFile_time, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags,
+	const char *user_extra_data, mz_uint user_extra_data_len, const char *user_extra_data_central, mz_uint user_extra_data_central_len)
+{
+	return mz_zip_writer_add_read_buf_callback(pZip, pArchive_name, mz_file_read_func_stdio, pSrc_file, size_to_add, pFile_time, pComment, comment_size, level_and_flags,
+		user_extra_data, user_extra_data_len, user_extra_data_central, user_extra_data_central_len);
+}
+
+mz_bool mz_zip_writer_add_file(mz_zip_archive *pZip, const char *pArchive_name, const char *pSrc_filename, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags)
+{
+    MZ_FILE *pSrc_file = NULL;
+    mz_uint64 uncomp_size = 0;
+    MZ_TIME_T file_modified_time;
+    MZ_TIME_T *pFile_time = NULL;
+    mz_bool status;
+
+    memset(&file_modified_time, 0, sizeof(file_modified_time));
+
+#if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_STDIO)
+    pFile_time = &file_modified_time;
+    if (!mz_zip_get_file_modified_time(pSrc_filename, &file_modified_time))
+        return mz_zip_set_error(pZip, MZ_ZIP_FILE_STAT_FAILED);
+#endif
+
+    pSrc_file = MZ_FOPEN(pSrc_filename, "rb");
+    if (!pSrc_file)
+        return mz_zip_set_error(pZip, MZ_ZIP_FILE_OPEN_FAILED);
+
+    MZ_FSEEK64(pSrc_file, 0, SEEK_END);
+    uncomp_size = MZ_FTELL64(pSrc_file);
+    MZ_FSEEK64(pSrc_file, 0, SEEK_SET);
+
+    status = mz_zip_writer_add_cfile(pZip, pArchive_name, pSrc_file, uncomp_size, pFile_time, pComment, comment_size, level_and_flags, NULL, 0, NULL, 0);
+
+    MZ_FCLOSE(pSrc_file);
+
+    return status;
+}
+#endif /* #ifndef MINIZ_NO_STDIO */
+
+static mz_bool mz_zip_writer_update_zip64_extension_block(mz_zip_array *pNew_ext, mz_zip_archive *pZip, const mz_uint8 *pExt, uint32_t ext_len, mz_uint64 *pComp_size, mz_uint64 *pUncomp_size, mz_uint64 *pLocal_header_ofs, mz_uint32 *pDisk_start)
+{
+    /* + 64 should be enough for any new zip64 data */
+    if (!mz_zip_array_reserve(pZip, pNew_ext, ext_len + 64, MZ_FALSE))
+        return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
+
+    mz_zip_array_resize(pZip, pNew_ext, 0, MZ_FALSE);
+
+    if ((pUncomp_size) || (pComp_size) || (pLocal_header_ofs) || (pDisk_start))
+    {
+        mz_uint8 new_ext_block[64];
+        mz_uint8 *pDst = new_ext_block;
+        mz_write_le16(pDst, MZ_ZIP64_EXTENDED_INFORMATION_FIELD_HEADER_ID);
+        mz_write_le16(pDst + sizeof(mz_uint16), 0);
+        pDst += sizeof(mz_uint16) * 2;
+
+        if (pUncomp_size)
+        {
+            mz_write_le64(pDst, *pUncomp_size);
+            pDst += sizeof(mz_uint64);
+        }
+
+        if (pComp_size)
+        {
+            mz_write_le64(pDst, *pComp_size);
+            pDst += sizeof(mz_uint64);
+        }
+
+        if (pLocal_header_ofs)
+        {
+            mz_write_le64(pDst, *pLocal_header_ofs);
+            pDst += sizeof(mz_uint64);
+        }
+
+        if (pDisk_start)
+        {
+            mz_write_le32(pDst, *pDisk_start);
+            pDst += sizeof(mz_uint32);
+        }
+
+        mz_write_le16(new_ext_block + sizeof(mz_uint16), (mz_uint16)((pDst - new_ext_block) - sizeof(mz_uint16) * 2));
+
+        if (!mz_zip_array_push_back(pZip, pNew_ext, new_ext_block, pDst - new_ext_block))
+            return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
+    }
+
+    if ((pExt) && (ext_len))
+    {
+        mz_uint32 extra_size_remaining = ext_len;
+        const mz_uint8 *pExtra_data = pExt;
+
+        do
+        {
+            mz_uint32 field_id, field_data_size, field_total_size;
+
+            if (extra_size_remaining < (sizeof(mz_uint16) * 2))
+                return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
+
+            field_id = MZ_READ_LE16(pExtra_data);
+            field_data_size = MZ_READ_LE16(pExtra_data + sizeof(mz_uint16));
+            field_total_size = field_data_size + sizeof(mz_uint16) * 2;
+
+            if (field_total_size > extra_size_remaining)
+                return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
+
+            if (field_id != MZ_ZIP64_EXTENDED_INFORMATION_FIELD_HEADER_ID)
+            {
+                if (!mz_zip_array_push_back(pZip, pNew_ext, pExtra_data, field_total_size))
+                    return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
+            }
+
+            pExtra_data += field_total_size;
+            extra_size_remaining -= field_total_size;
+        } while (extra_size_remaining);
+    }
+
+    return MZ_TRUE;
+}
+
+/* TODO: This func is now pretty freakin complex due to zip64, split it up? */
+mz_bool mz_zip_writer_add_from_zip_reader(mz_zip_archive *pZip, mz_zip_archive *pSource_zip, mz_uint src_file_index)
+{
+    mz_uint n, bit_flags, num_alignment_padding_bytes, src_central_dir_following_data_size;
+    mz_uint64 src_archive_bytes_remaining, local_dir_header_ofs;
+    mz_uint64 cur_src_file_ofs, cur_dst_file_ofs;
+    mz_uint32 local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)];
+    mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32;
+    mz_uint8 new_central_header[MZ_ZIP_CENTRAL_DIR_HEADER_SIZE];
+    size_t orig_central_dir_size;
+    mz_zip_internal_state *pState;
+    void *pBuf;
+    const mz_uint8 *pSrc_central_header;
+    mz_zip_archive_file_stat src_file_stat;
+    mz_uint32 src_filename_len, src_comment_len, src_ext_len;
+    mz_uint32 local_header_filename_size, local_header_extra_len;
+    mz_uint64 local_header_comp_size, local_header_uncomp_size;
+    mz_bool found_zip64_ext_data_in_ldir = MZ_FALSE;
+
+    /* Sanity checks */
+    if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) || (!pSource_zip->m_pRead))
+        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
+
+    pState = pZip->m_pState;
+
+    /* Don't support copying files from zip64 archives to non-zip64, even though in some cases this is possible */
+    if ((pSource_zip->m_pState->m_zip64) && (!pZip->m_pState->m_zip64))
+        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
+
+    /* Get pointer to the source central dir header and crack it */
+    if (NULL == (pSrc_central_header = mz_zip_get_cdh(pSource_zip, src_file_index)))
+        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
+
+    if (MZ_READ_LE32(pSrc_central_header + MZ_ZIP_CDH_SIG_OFS) != MZ_ZIP_CENTRAL_DIR_HEADER_SIG)
+        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
+
+    src_filename_len = MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_FILENAME_LEN_OFS);
+    src_comment_len = MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_COMMENT_LEN_OFS);
+    src_ext_len = MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_EXTRA_LEN_OFS);
+    src_central_dir_following_data_size = src_filename_len + src_ext_len + src_comment_len;
+
+    /* TODO: We don't support central dir's >= MZ_UINT32_MAX bytes right now (+32 fudge factor in case we need to add more extra data) */
+    if ((pState->m_central_dir.m_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + src_central_dir_following_data_size + 32) >= MZ_UINT32_MAX)
+        return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_CDIR_SIZE);
+
+    num_alignment_padding_bytes = mz_zip_writer_compute_padding_needed_for_file_alignment(pZip);
+
+    if (!pState->m_zip64)
+    {
+        if (pZip->m_total_files == MZ_UINT16_MAX)
+            return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES);
+    }
+    else
+    {
+        /* TODO: Our zip64 support still has some 32-bit limits that may not be worth fixing. */
+        if (pZip->m_total_files == MZ_UINT32_MAX)
+            return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES);
+    }
+
+    if (!mz_zip_file_stat_internal(pSource_zip, src_file_index, pSrc_central_header, &src_file_stat, NULL))
+        return MZ_FALSE;
+
+    cur_src_file_ofs = src_file_stat.m_local_header_ofs;
+    cur_dst_file_ofs = pZip->m_archive_size;
+
+    /* Read the source archive's local dir header */
+    if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE)
+        return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED);
+
+    if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG)
+        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
+
+    cur_src_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE;
+
+    /* Compute the total size we need to copy (filename+extra data+compressed data) */
+    local_header_filename_size = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS);
+    local_header_extra_len = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS);
+    local_header_comp_size = MZ_READ_LE32(pLocal_header + MZ_ZIP_LDH_COMPRESSED_SIZE_OFS);
+    local_header_uncomp_size = MZ_READ_LE32(pLocal_header + MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS);
+    src_archive_bytes_remaining = local_header_filename_size + local_header_extra_len + src_file_stat.m_comp_size;
+
+    /* Try to find a zip64 extended information field */
+    if ((local_header_extra_len) && ((local_header_comp_size == MZ_UINT32_MAX) || (local_header_uncomp_size == MZ_UINT32_MAX)))
+    {
+        mz_zip_array file_data_array;
+        const mz_uint8 *pExtra_data;
+        mz_uint32 extra_size_remaining = local_header_extra_len;
+
+        mz_zip_array_init(&file_data_array, 1);
+        if (!mz_zip_array_resize(pZip, &file_data_array, local_header_extra_len, MZ_FALSE))
+        {
+            return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
+        }
+
+        if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, src_file_stat.m_local_header_ofs + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + local_header_filename_size, file_data_array.m_p, local_header_extra_len) != local_header_extra_len)
+        {
+            mz_zip_array_clear(pZip, &file_data_array);
+            return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED);
+        }
+
+        pExtra_data = (const mz_uint8 *)file_data_array.m_p;
+
+        do
+        {
+            mz_uint32 field_id, field_data_size, field_total_size;
+
+            if (extra_size_remaining < (sizeof(mz_uint16) * 2))
+            {
+                mz_zip_array_clear(pZip, &file_data_array);
+                return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
+            }
+
+            field_id = MZ_READ_LE16(pExtra_data);
+            field_data_size = MZ_READ_LE16(pExtra_data + sizeof(mz_uint16));
+            field_total_size = field_data_size + sizeof(mz_uint16) * 2;
+
+            if (field_total_size > extra_size_remaining)
+            {
+                mz_zip_array_clear(pZip, &file_data_array);
+                return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
+            }
+
+            if (field_id == MZ_ZIP64_EXTENDED_INFORMATION_FIELD_HEADER_ID)
+            {
+                const mz_uint8 *pSrc_field_data = pExtra_data + sizeof(mz_uint32);
+
+                if (field_data_size < sizeof(mz_uint64) * 2)
+                {
+                    mz_zip_array_clear(pZip, &file_data_array);
+                    return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
+                }
+
+                local_header_uncomp_size = MZ_READ_LE64(pSrc_field_data);
+                local_header_comp_size = MZ_READ_LE64(pSrc_field_data + sizeof(mz_uint64)); /* may be 0 if there's a descriptor */
+
+                found_zip64_ext_data_in_ldir = MZ_TRUE;
+                break;
+            }
+
+            pExtra_data += field_total_size;
+            extra_size_remaining -= field_total_size;
+        } while (extra_size_remaining);
+
+        mz_zip_array_clear(pZip, &file_data_array);
+    }
+
+    if (!pState->m_zip64)
+    {
+        /* Try to detect if the new archive will most likely wind up too big and bail early (+(sizeof(mz_uint32) * 4) is for the optional descriptor which could be present, +64 is a fudge factor). */
+        /* We also check when the archive is finalized so this doesn't need to be perfect. */
+        mz_uint64 approx_new_archive_size = cur_dst_file_ofs + num_alignment_padding_bytes + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + src_archive_bytes_remaining + (sizeof(mz_uint32) * 4) +
+                                            pState->m_central_dir.m_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + src_central_dir_following_data_size + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE + 64;
+
+        if (approx_new_archive_size >= MZ_UINT32_MAX)
+            return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE);
+    }
+
+    /* Write dest archive padding */
+    if (!mz_zip_writer_write_zeros(pZip, cur_dst_file_ofs, num_alignment_padding_bytes))
+        return MZ_FALSE;
+
+    cur_dst_file_ofs += num_alignment_padding_bytes;
+
+    local_dir_header_ofs = cur_dst_file_ofs;
+    if (pZip->m_file_offset_alignment)
+    {
+        MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) == 0);
+    }
+
+    /* The original zip's local header+ext block doesn't change, even with zip64, so we can just copy it over to the dest zip */
+    if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE)
+        return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
+
+    cur_dst_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE;
+
+    /* Copy over the source archive bytes to the dest archive, also ensure we have enough buf space to handle optional data descriptor */
+    if (NULL == (pBuf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)MZ_MAX(32U, MZ_MIN((mz_uint64)MZ_ZIP_MAX_IO_BUF_SIZE, src_archive_bytes_remaining)))))
+        return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
+
+    while (src_archive_bytes_remaining)
+    {
+        n = (mz_uint)MZ_MIN((mz_uint64)MZ_ZIP_MAX_IO_BUF_SIZE, src_archive_bytes_remaining);
+        if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pBuf, n) != n)
+        {
+            pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf);
+            return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED);
+        }
+        cur_src_file_ofs += n;
+
+        if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pBuf, n) != n)
+        {
+            pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf);
+            return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
+        }
+        cur_dst_file_ofs += n;
+
+        src_archive_bytes_remaining -= n;
+    }
+
+    /* Now deal with the optional data descriptor */
+    bit_flags = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_BIT_FLAG_OFS);
+    if (bit_flags & 8)
+    {
+        /* Copy data descriptor */
+        if ((pSource_zip->m_pState->m_zip64) || (found_zip64_ext_data_in_ldir))
+        {
+            /* src is zip64, dest must be zip64 */
+
+            /* name			uint32_t's */
+            /* id				1 (optional in zip64?) */
+            /* crc			1 */
+            /* comp_size	2 */
+            /* uncomp_size 2 */
+            if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pBuf, (sizeof(mz_uint32) * 6)) != (sizeof(mz_uint32) * 6))
+            {
+                pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf);
+                return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED);
+            }
+
+            n = sizeof(mz_uint32) * ((MZ_READ_LE32(pBuf) == MZ_ZIP_DATA_DESCRIPTOR_ID) ? 6 : 5);
+        }
+        else
+        {
+            /* src is NOT zip64 */
+            mz_bool has_id;
+
+            if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pBuf, sizeof(mz_uint32) * 4) != sizeof(mz_uint32) * 4)
+            {
+                pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf);
+                return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED);
+            }
+
+            has_id = (MZ_READ_LE32(pBuf) == MZ_ZIP_DATA_DESCRIPTOR_ID);
+
+            if (pZip->m_pState->m_zip64)
+            {
+                /* dest is zip64, so upgrade the data descriptor */
+                const mz_uint32 *pSrc_descriptor = (const mz_uint32 *)((const mz_uint8 *)pBuf + (has_id ? sizeof(mz_uint32) : 0));
+                const mz_uint32 src_crc32 = pSrc_descriptor[0];
+                const mz_uint64 src_comp_size = pSrc_descriptor[1];
+                const mz_uint64 src_uncomp_size = pSrc_descriptor[2];
+
+                mz_write_le32((mz_uint8 *)pBuf, MZ_ZIP_DATA_DESCRIPTOR_ID);
+                mz_write_le32((mz_uint8 *)pBuf + sizeof(mz_uint32) * 1, src_crc32);
+                mz_write_le64((mz_uint8 *)pBuf + sizeof(mz_uint32) * 2, src_comp_size);
+                mz_write_le64((mz_uint8 *)pBuf + sizeof(mz_uint32) * 4, src_uncomp_size);
+
+                n = sizeof(mz_uint32) * 6;
+            }
+            else
+            {
+                /* dest is NOT zip64, just copy it as-is */
+                n = sizeof(mz_uint32) * (has_id ? 4 : 3);
+            }
+        }
+
+        if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pBuf, n) != n)
+        {
+            pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf);
+            return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
+        }
+
+        cur_src_file_ofs += n;
+        cur_dst_file_ofs += n;
+    }
+    pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf);
+
+    /* Finally, add the new central dir header */
+    orig_central_dir_size = pState->m_central_dir.m_size;
+
+    memcpy(new_central_header, pSrc_central_header, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE);
+
+    if (pState->m_zip64)
+    {
+        /* This is the painful part: We need to write a new central dir header + ext block with updated zip64 fields, and ensure the old fields (if any) are not included. */
+        const mz_uint8 *pSrc_ext = pSrc_central_header + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + src_filename_len;
+        mz_zip_array new_ext_block;
+
+        mz_zip_array_init(&new_ext_block, sizeof(mz_uint8));
+
+        MZ_WRITE_LE32(new_central_header + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS, MZ_UINT32_MAX);
+        MZ_WRITE_LE32(new_central_header + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS, MZ_UINT32_MAX);
+        MZ_WRITE_LE32(new_central_header + MZ_ZIP_CDH_LOCAL_HEADER_OFS, MZ_UINT32_MAX);
+
+        if (!mz_zip_writer_update_zip64_extension_block(&new_ext_block, pZip, pSrc_ext, src_ext_len, &src_file_stat.m_comp_size, &src_file_stat.m_uncomp_size, &local_dir_header_ofs, NULL))
+        {
+            mz_zip_array_clear(pZip, &new_ext_block);
+            return MZ_FALSE;
+        }
+
+        MZ_WRITE_LE16(new_central_header + MZ_ZIP_CDH_EXTRA_LEN_OFS, new_ext_block.m_size);
+
+        if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, new_central_header, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE))
+        {
+            mz_zip_array_clear(pZip, &new_ext_block);
+            return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
+        }
+
+        if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pSrc_central_header + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, src_filename_len))
+        {
+            mz_zip_array_clear(pZip, &new_ext_block);
+            mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE);
+            return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
+        }
+
+        if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, new_ext_block.m_p, new_ext_block.m_size))
+        {
+            mz_zip_array_clear(pZip, &new_ext_block);
+            mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE);
+            return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
+        }
+
+        if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pSrc_central_header + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + src_filename_len + src_ext_len, src_comment_len))
+        {
+            mz_zip_array_clear(pZip, &new_ext_block);
+            mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE);
+            return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
+        }
+
+        mz_zip_array_clear(pZip, &new_ext_block);
+    }
+    else
+    {
+        /* sanity checks */
+        if (cur_dst_file_ofs > MZ_UINT32_MAX)
+            return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE);
+
+        if (local_dir_header_ofs >= MZ_UINT32_MAX)
+            return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE);
+
+        MZ_WRITE_LE32(new_central_header + MZ_ZIP_CDH_LOCAL_HEADER_OFS, local_dir_header_ofs);
+
+        if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, new_central_header, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE))
+            return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
+
+        if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pSrc_central_header + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, src_central_dir_following_data_size))
+        {
+            mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE);
+            return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
+        }
+    }
+
+    /* This shouldn't trigger unless we screwed up during the initial sanity checks */
+    if (pState->m_central_dir.m_size >= MZ_UINT32_MAX)
+    {
+        /* TODO: Support central dirs >= 32-bits in size */
+        mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE);
+        return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_CDIR_SIZE);
+    }
+
+    n = (mz_uint32)orig_central_dir_size;
+    if (!mz_zip_array_push_back(pZip, &pState->m_central_dir_offsets, &n, 1))
+    {
+        mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE);
+        return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
+    }
+
+    pZip->m_total_files++;
+    pZip->m_archive_size = cur_dst_file_ofs;
+
+    return MZ_TRUE;
+}
+
+mz_bool mz_zip_writer_finalize_archive(mz_zip_archive *pZip)
+{
+    mz_zip_internal_state *pState;
+    mz_uint64 central_dir_ofs, central_dir_size;
+    mz_uint8 hdr[256];
+
+    if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING))
+        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
+
+    pState = pZip->m_pState;
+
+    if (pState->m_zip64)
+    {
+        if ((pZip->m_total_files > MZ_UINT32_MAX) || (pState->m_central_dir.m_size >= MZ_UINT32_MAX))
+            return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES);
+    }
+    else
+    {
+        if ((pZip->m_total_files > MZ_UINT16_MAX) || ((pZip->m_archive_size + pState->m_central_dir.m_size + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) > MZ_UINT32_MAX))
+            return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES);
+    }
+
+    central_dir_ofs = 0;
+    central_dir_size = 0;
+    if (pZip->m_total_files)
+    {
+        /* Write central directory */
+        central_dir_ofs = pZip->m_archive_size;
+        central_dir_size = pState->m_central_dir.m_size;
+        pZip->m_central_directory_file_ofs = central_dir_ofs;
+        if (pZip->m_pWrite(pZip->m_pIO_opaque, central_dir_ofs, pState->m_central_dir.m_p, (size_t)central_dir_size) != central_dir_size)
+            return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
+
+        pZip->m_archive_size += central_dir_size;
+    }
+
+    if (pState->m_zip64)
+    {
+        /* Write zip64 end of central directory header */
+        mz_uint64 rel_ofs_to_zip64_ecdr = pZip->m_archive_size;
+
+        MZ_CLEAR_OBJ(hdr);
+        MZ_WRITE_LE32(hdr + MZ_ZIP64_ECDH_SIG_OFS, MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIG);
+        MZ_WRITE_LE64(hdr + MZ_ZIP64_ECDH_SIZE_OF_RECORD_OFS, MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE - sizeof(mz_uint32) - sizeof(mz_uint64));
+        MZ_WRITE_LE16(hdr + MZ_ZIP64_ECDH_VERSION_MADE_BY_OFS, 0x031E); /* TODO: always Unix */
+        MZ_WRITE_LE16(hdr + MZ_ZIP64_ECDH_VERSION_NEEDED_OFS, 0x002D);
+        MZ_WRITE_LE64(hdr + MZ_ZIP64_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS, pZip->m_total_files);
+        MZ_WRITE_LE64(hdr + MZ_ZIP64_ECDH_CDIR_TOTAL_ENTRIES_OFS, pZip->m_total_files);
+        MZ_WRITE_LE64(hdr + MZ_ZIP64_ECDH_CDIR_SIZE_OFS, central_dir_size);
+        MZ_WRITE_LE64(hdr + MZ_ZIP64_ECDH_CDIR_OFS_OFS, central_dir_ofs);
+        if (pZip->m_pWrite(pZip->m_pIO_opaque, pZip->m_archive_size, hdr, MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE) != MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE)
+            return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
+
+        pZip->m_archive_size += MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE;
+
+        /* Write zip64 end of central directory locator */
+        MZ_CLEAR_OBJ(hdr);
+        MZ_WRITE_LE32(hdr + MZ_ZIP64_ECDL_SIG_OFS, MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIG);
+        MZ_WRITE_LE64(hdr + MZ_ZIP64_ECDL_REL_OFS_TO_ZIP64_ECDR_OFS, rel_ofs_to_zip64_ecdr);
+        MZ_WRITE_LE32(hdr + MZ_ZIP64_ECDL_TOTAL_NUMBER_OF_DISKS_OFS, 1);
+        if (pZip->m_pWrite(pZip->m_pIO_opaque, pZip->m_archive_size, hdr, MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE) != MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE)
+            return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
+
+        pZip->m_archive_size += MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE;
+    }
+
+    /* Write end of central directory record */
+    MZ_CLEAR_OBJ(hdr);
+    MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_SIG_OFS, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG);
+    MZ_WRITE_LE16(hdr + MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS, MZ_MIN(MZ_UINT16_MAX, pZip->m_total_files));
+    MZ_WRITE_LE16(hdr + MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS, MZ_MIN(MZ_UINT16_MAX, pZip->m_total_files));
+    MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_CDIR_SIZE_OFS, MZ_MIN(MZ_UINT32_MAX, central_dir_size));
+    MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_CDIR_OFS_OFS, MZ_MIN(MZ_UINT32_MAX, central_dir_ofs));
+
+    if (pZip->m_pWrite(pZip->m_pIO_opaque, pZip->m_archive_size, hdr, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) != MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE)
+        return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
+
+#ifndef MINIZ_NO_STDIO
+    if ((pState->m_pFile) && (MZ_FFLUSH(pState->m_pFile) == EOF))
+        return mz_zip_set_error(pZip, MZ_ZIP_FILE_CLOSE_FAILED);
+#endif /* #ifndef MINIZ_NO_STDIO */
+
+    pZip->m_archive_size += MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE;
+
+    pZip->m_zip_mode = MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED;
+    return MZ_TRUE;
+}
+
+mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive *pZip, void **ppBuf, size_t *pSize)
+{
+    if ((!ppBuf) || (!pSize))
+        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
+
+    *ppBuf = NULL;
+    *pSize = 0;
+
+    if ((!pZip) || (!pZip->m_pState))
+        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
+
+    if (pZip->m_pWrite != mz_zip_heap_write_func)
+        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
+
+    if (!mz_zip_writer_finalize_archive(pZip))
+        return MZ_FALSE;
+
+    *ppBuf = pZip->m_pState->m_pMem;
+    *pSize = pZip->m_pState->m_mem_size;
+    pZip->m_pState->m_pMem = NULL;
+    pZip->m_pState->m_mem_size = pZip->m_pState->m_mem_capacity = 0;
+
+    return MZ_TRUE;
+}
+
+mz_bool mz_zip_writer_end(mz_zip_archive *pZip)
+{
+    return mz_zip_writer_end_internal(pZip, MZ_TRUE);
+}
+
+#ifndef MINIZ_NO_STDIO
+mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags)
+{
+    return mz_zip_add_mem_to_archive_file_in_place_v2(pZip_filename, pArchive_name, pBuf, buf_size, pComment, comment_size, level_and_flags, NULL);
+}
+
+mz_bool mz_zip_add_mem_to_archive_file_in_place_v2(const char *pZip_filename, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, mz_zip_error *pErr)
+{
+    mz_bool status, created_new_archive = MZ_FALSE;
+    mz_zip_archive zip_archive;
+    struct MZ_FILE_STAT_STRUCT file_stat;
+    mz_zip_error actual_err = MZ_ZIP_NO_ERROR;
+
+    mz_zip_zero_struct(&zip_archive);
+    if ((int)level_and_flags < 0)
+        level_and_flags = MZ_DEFAULT_LEVEL;
+
+    if ((!pZip_filename) || (!pArchive_name) || ((buf_size) && (!pBuf)) || ((comment_size) && (!pComment)) || ((level_and_flags & 0xF) > MZ_UBER_COMPRESSION))
+    {
+        if (pErr)
+            *pErr = MZ_ZIP_INVALID_PARAMETER;
+        return MZ_FALSE;
+    }
+
+    if (!mz_zip_writer_validate_archive_name(pArchive_name))
+    {
+        if (pErr)
+            *pErr = MZ_ZIP_INVALID_FILENAME;
+        return MZ_FALSE;
+    }
+
+    /* Important: The regular non-64 bit version of stat() can fail here if the file is very large, which could cause the archive to be overwritten. */
+    /* So be sure to compile with _LARGEFILE64_SOURCE 1 */
+    if (MZ_FILE_STAT(pZip_filename, &file_stat) != 0)
+    {
+        /* Create a new archive. */
+        if (!mz_zip_writer_init_file_v2(&zip_archive, pZip_filename, 0, level_and_flags))
+        {
+            if (pErr)
+                *pErr = zip_archive.m_last_error;
+            return MZ_FALSE;
+        }
+
+        created_new_archive = MZ_TRUE;
+    }
+    else
+    {
+        /* Append to an existing archive. */
+        if (!mz_zip_reader_init_file_v2(&zip_archive, pZip_filename, level_and_flags | MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY, 0, 0))
+        {
+            if (pErr)
+                *pErr = zip_archive.m_last_error;
+            return MZ_FALSE;
+        }
+
+        if (!mz_zip_writer_init_from_reader_v2(&zip_archive, pZip_filename, level_and_flags))
+        {
+            if (pErr)
+                *pErr = zip_archive.m_last_error;
+
+            mz_zip_reader_end_internal(&zip_archive, MZ_FALSE);
+
+            return MZ_FALSE;
+        }
+    }
+
+    status = mz_zip_writer_add_mem_ex(&zip_archive, pArchive_name, pBuf, buf_size, pComment, comment_size, level_and_flags, 0, 0);
+    actual_err = zip_archive.m_last_error;
+
+    /* Always finalize, even if adding failed for some reason, so we have a valid central directory. (This may not always succeed, but we can try.) */
+    if (!mz_zip_writer_finalize_archive(&zip_archive))
+    {
+        if (!actual_err)
+            actual_err = zip_archive.m_last_error;
+
+        status = MZ_FALSE;
+    }
+
+    if (!mz_zip_writer_end_internal(&zip_archive, status))
+    {
+        if (!actual_err)
+            actual_err = zip_archive.m_last_error;
+
+        status = MZ_FALSE;
+    }
+
+    if ((!status) && (created_new_archive))
+    {
+        /* It's a new archive and something went wrong, so just delete it. */
+        int ignoredStatus = MZ_DELETE_FILE(pZip_filename);
+        (void)ignoredStatus;
+    }
+
+    if (pErr)
+        *pErr = actual_err;
+
+    return status;
+}
+
+void *mz_zip_extract_archive_file_to_heap_v2(const char *pZip_filename, const char *pArchive_name, const char *pComment, size_t *pSize, mz_uint flags, mz_zip_error *pErr)
+{
+    mz_uint32 file_index;
+    mz_zip_archive zip_archive;
+    void *p = NULL;
+
+    if (pSize)
+        *pSize = 0;
+
+    if ((!pZip_filename) || (!pArchive_name))
+    {
+        if (pErr)
+            *pErr = MZ_ZIP_INVALID_PARAMETER;
+
+        return NULL;
+    }
+
+    mz_zip_zero_struct(&zip_archive);
+    if (!mz_zip_reader_init_file_v2(&zip_archive, pZip_filename, flags | MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY, 0, 0))
+    {
+        if (pErr)
+            *pErr = zip_archive.m_last_error;
+
+        return NULL;
+    }
+
+    if (mz_zip_reader_locate_file_v2(&zip_archive, pArchive_name, pComment, flags, &file_index))
+    {
+        p = mz_zip_reader_extract_to_heap(&zip_archive, file_index, pSize, flags);
+    }
+
+    mz_zip_reader_end_internal(&zip_archive, p != NULL);
+
+    if (pErr)
+        *pErr = zip_archive.m_last_error;
+
+    return p;
+}
+
+void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name, size_t *pSize, mz_uint flags)
+{
+    return mz_zip_extract_archive_file_to_heap_v2(pZip_filename, pArchive_name, NULL, pSize, flags, NULL);
+}
+
+#endif /* #ifndef MINIZ_NO_STDIO */
+
+#endif /* #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS */
+
+/* ------------------- Misc utils */
+
+mz_zip_mode mz_zip_get_mode(mz_zip_archive *pZip)
+{
+    return pZip ? pZip->m_zip_mode : MZ_ZIP_MODE_INVALID;
+}
+
+mz_zip_type mz_zip_get_type(mz_zip_archive *pZip)
+{
+    return pZip ? pZip->m_zip_type : MZ_ZIP_TYPE_INVALID;
+}
+
+mz_zip_error mz_zip_set_last_error(mz_zip_archive *pZip, mz_zip_error err_num)
+{
+    mz_zip_error prev_err;
+
+    if (!pZip)
+        return MZ_ZIP_INVALID_PARAMETER;
+
+    prev_err = pZip->m_last_error;
+
+    pZip->m_last_error = err_num;
+    return prev_err;
+}
+
+mz_zip_error mz_zip_peek_last_error(mz_zip_archive *pZip)
+{
+    if (!pZip)
+        return MZ_ZIP_INVALID_PARAMETER;
+
+    return pZip->m_last_error;
+}
+
+mz_zip_error mz_zip_clear_last_error(mz_zip_archive *pZip)
+{
+    return mz_zip_set_last_error(pZip, MZ_ZIP_NO_ERROR);
+}
+
+mz_zip_error mz_zip_get_last_error(mz_zip_archive *pZip)
+{
+    mz_zip_error prev_err;
+
+    if (!pZip)
+        return MZ_ZIP_INVALID_PARAMETER;
+
+    prev_err = pZip->m_last_error;
+
+    pZip->m_last_error = MZ_ZIP_NO_ERROR;
+    return prev_err;
+}
+
+const char *mz_zip_get_error_string(mz_zip_error mz_err)
+{
+    switch (mz_err)
+    {
+        case MZ_ZIP_NO_ERROR:
+            return "no error";
+        case MZ_ZIP_UNDEFINED_ERROR:
+            return "undefined error";
+        case MZ_ZIP_TOO_MANY_FILES:
+            return "too many files";
+        case MZ_ZIP_FILE_TOO_LARGE:
+            return "file too large";
+        case MZ_ZIP_UNSUPPORTED_METHOD:
+            return "unsupported method";
+        case MZ_ZIP_UNSUPPORTED_ENCRYPTION:
+            return "unsupported encryption";
+        case MZ_ZIP_UNSUPPORTED_FEATURE:
+            return "unsupported feature";
+        case MZ_ZIP_FAILED_FINDING_CENTRAL_DIR:
+            return "failed finding central directory";
+        case MZ_ZIP_NOT_AN_ARCHIVE:
+            return "not a ZIP archive";
+        case MZ_ZIP_INVALID_HEADER_OR_CORRUPTED:
+            return "invalid header or archive is corrupted";
+        case MZ_ZIP_UNSUPPORTED_MULTIDISK:
+            return "unsupported multidisk archive";
+        case MZ_ZIP_DECOMPRESSION_FAILED:
+            return "decompression failed or archive is corrupted";
+        case MZ_ZIP_COMPRESSION_FAILED:
+            return "compression failed";
+        case MZ_ZIP_UNEXPECTED_DECOMPRESSED_SIZE:
+            return "unexpected decompressed size";
+        case MZ_ZIP_CRC_CHECK_FAILED:
+            return "CRC-32 check failed";
+        case MZ_ZIP_UNSUPPORTED_CDIR_SIZE:
+            return "unsupported central directory size";
+        case MZ_ZIP_ALLOC_FAILED:
+            return "allocation failed";
+        case MZ_ZIP_FILE_OPEN_FAILED:
+            return "file open failed";
+        case MZ_ZIP_FILE_CREATE_FAILED:
+            return "file create failed";
+        case MZ_ZIP_FILE_WRITE_FAILED:
+            return "file write failed";
+        case MZ_ZIP_FILE_READ_FAILED:
+            return "file read failed";
+        case MZ_ZIP_FILE_CLOSE_FAILED:
+            return "file close failed";
+        case MZ_ZIP_FILE_SEEK_FAILED:
+            return "file seek failed";
+        case MZ_ZIP_FILE_STAT_FAILED:
+            return "file stat failed";
+        case MZ_ZIP_INVALID_PARAMETER:
+            return "invalid parameter";
+        case MZ_ZIP_INVALID_FILENAME:
+            return "invalid filename";
+        case MZ_ZIP_BUF_TOO_SMALL:
+            return "buffer too small";
+        case MZ_ZIP_INTERNAL_ERROR:
+            return "internal error";
+        case MZ_ZIP_FILE_NOT_FOUND:
+            return "file not found";
+        case MZ_ZIP_ARCHIVE_TOO_LARGE:
+            return "archive is too large";
+        case MZ_ZIP_VALIDATION_FAILED:
+            return "validation failed";
+        case MZ_ZIP_WRITE_CALLBACK_FAILED:
+            return "write calledback failed";
+        default:
+            break;
+    }
+
+    return "unknown error";
+}
+
+/* Note: Just because the archive is not zip64 doesn't necessarily mean it doesn't have Zip64 extended information extra field, argh. */
+mz_bool mz_zip_is_zip64(mz_zip_archive *pZip)
+{
+    if ((!pZip) || (!pZip->m_pState))
+        return MZ_FALSE;
+
+    return pZip->m_pState->m_zip64;
+}
+
+size_t mz_zip_get_central_dir_size(mz_zip_archive *pZip)
+{
+    if ((!pZip) || (!pZip->m_pState))
+        return 0;
+
+    return pZip->m_pState->m_central_dir.m_size;
+}
+
+mz_uint mz_zip_reader_get_num_files(mz_zip_archive *pZip)
+{
+    return pZip ? pZip->m_total_files : 0;
+}
+
+mz_uint64 mz_zip_get_archive_size(mz_zip_archive *pZip)
+{
+    if (!pZip)
+        return 0;
+    return pZip->m_archive_size;
+}
+
+mz_uint64 mz_zip_get_archive_file_start_offset(mz_zip_archive *pZip)
+{
+    if ((!pZip) || (!pZip->m_pState))
+        return 0;
+    return pZip->m_pState->m_file_archive_start_ofs;
+}
+
+MZ_FILE *mz_zip_get_cfile(mz_zip_archive *pZip)
+{
+    if ((!pZip) || (!pZip->m_pState))
+        return 0;
+    return pZip->m_pState->m_pFile;
+}
+
+size_t mz_zip_read_archive_data(mz_zip_archive *pZip, mz_uint64 file_ofs, void *pBuf, size_t n)
+{
+    if ((!pZip) || (!pZip->m_pState) || (!pBuf) || (!pZip->m_pRead))
+        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
+
+    return pZip->m_pRead(pZip->m_pIO_opaque, file_ofs, pBuf, n);
+}
+
+mz_uint mz_zip_reader_get_filename(mz_zip_archive *pZip, mz_uint file_index, char *pFilename, mz_uint filename_buf_size)
+{
+    mz_uint n;
+    const mz_uint8 *p = mz_zip_get_cdh(pZip, file_index);
+    if (!p)
+    {
+        if (filename_buf_size)
+            pFilename[0] = '\0';
+        mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
+        return 0;
+    }
+    n = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS);
+    if (filename_buf_size)
+    {
+        n = MZ_MIN(n, filename_buf_size - 1);
+        memcpy(pFilename, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n);
+        pFilename[n] = '\0';
+    }
+    return n + 1;
+}
+
+mz_bool mz_zip_reader_file_stat(mz_zip_archive *pZip, mz_uint file_index, mz_zip_archive_file_stat *pStat)
+{
+    return mz_zip_file_stat_internal(pZip, file_index, mz_zip_get_cdh(pZip, file_index), pStat, NULL);
+}
+
+mz_bool mz_zip_end(mz_zip_archive *pZip)
+{
+    if (!pZip)
+        return MZ_FALSE;
+
+    if (pZip->m_zip_mode == MZ_ZIP_MODE_READING)
+        return mz_zip_reader_end(pZip);
+#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS
+    else if ((pZip->m_zip_mode == MZ_ZIP_MODE_WRITING) || (pZip->m_zip_mode == MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED))
+        return mz_zip_writer_end(pZip);
+#endif
+
+    return MZ_FALSE;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /*#ifndef MINIZ_NO_ARCHIVE_APIS*/
diff --git a/kram-profile/Source/miniz.h b/kram-profile/Source/miniz.h
new file mode 100644
index 00000000..8867c3c6
--- /dev/null
+++ b/kram-profile/Source/miniz.h
@@ -0,0 +1,1381 @@
+/* miniz.c 2.1.0 - public domain deflate/inflate, zlib-subset, ZIP reading/writing/appending, PNG writing
+   See "unlicense" statement at the end of this file.
+   Rich Geldreich <richgel99@gmail.com>, last updated Oct. 13, 2013
+   Implements RFC 1950: http://www.ietf.org/rfc/rfc1950.txt and RFC 1951: http://www.ietf.org/rfc/rfc1951.txt
+
+   Most API's defined in miniz.c are optional. For example, to disable the archive related functions just define
+   MINIZ_NO_ARCHIVE_APIS, or to get rid of all stdio usage define MINIZ_NO_STDIO (see the list below for more macros).
+
+   * Low-level Deflate/Inflate implementation notes:
+
+     Compression: Use the "tdefl" API's. The compressor supports raw, static, and dynamic blocks, lazy or
+     greedy parsing, match length filtering, RLE-only, and Huffman-only streams. It performs and compresses
+     approximately as well as zlib.
+
+     Decompression: Use the "tinfl" API's. The entire decompressor is implemented as a single function
+     coroutine: see tinfl_decompress(). It supports decompression into a 32KB (or larger power of 2) wrapping buffer, or into a memory
+     block large enough to hold the entire file.
+
+     The low-level tdefl/tinfl API's do not make any use of dynamic memory allocation.
+
+   * zlib-style API notes:
+
+     miniz.c implements a fairly large subset of zlib. There's enough functionality present for it to be a drop-in
+     zlib replacement in many apps:
+        The z_stream struct, optional memory allocation callbacks
+        deflateInit/deflateInit2/deflate/deflateReset/deflateEnd/deflateBound
+        inflateInit/inflateInit2/inflate/inflateReset/inflateEnd
+        compress, compress2, compressBound, uncompress
+        CRC-32, Adler-32 - Using modern, minimal code size, CPU cache friendly routines.
+        Supports raw deflate streams or standard zlib streams with adler-32 checking.
+
+     Limitations:
+      The callback API's are not implemented yet. No support for gzip headers or zlib static dictionaries.
+      I've tried to closely emulate zlib's various flavors of stream flushing and return status codes, but
+      there are no guarantees that miniz.c pulls this off perfectly.
+
+   * PNG writing: See the tdefl_write_image_to_png_file_in_memory() function, originally written by
+     Alex Evans. Supports 1-4 bytes/pixel images.
+
+   * ZIP archive API notes:
+
+     The ZIP archive API's where designed with simplicity and efficiency in mind, with just enough abstraction to
+     get the job done with minimal fuss. There are simple API's to retrieve file information, read files from
+     existing archives, create new archives, append new files to existing archives, or clone archive data from
+     one archive to another. It supports archives located in memory or the heap, on disk (using stdio.h),
+     or you can specify custom file read/write callbacks.
+
+     - Archive reading: Just call this function to read a single file from a disk archive:
+
+      void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name,
+        size_t *pSize, mz_uint zip_flags);
+
+     For more complex cases, use the "mz_zip_reader" functions. Upon opening an archive, the entire central
+     directory is located and read as-is into memory, and subsequent file access only occurs when reading individual files.
+
+     - Archives file scanning: The simple way is to use this function to scan a loaded archive for a specific file:
+
+     int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags);
+
+     The locate operation can optionally check file comments too, which (as one example) can be used to identify
+     multiple versions of the same file in an archive. This function uses a simple linear search through the central
+     directory, so it's not very fast.
+
+     Alternately, you can iterate through all the files in an archive (using mz_zip_reader_get_num_files()) and
+     retrieve detailed info on each file by calling mz_zip_reader_file_stat().
+
+     - Archive creation: Use the "mz_zip_writer" functions. The ZIP writer immediately writes compressed file data
+     to disk and builds an exact image of the central directory in memory. The central directory image is written
+     all at once at the end of the archive file when the archive is finalized.
+
+     The archive writer can optionally align each file's local header and file data to any power of 2 alignment,
+     which can be useful when the archive will be read from optical media. Also, the writer supports placing
+     arbitrary data blobs at the very beginning of ZIP archives. Archives written using either feature are still
+     readable by any ZIP tool.
+
+     - Archive appending: The simple way to add a single file to an archive is to call this function:
+
+      mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name,
+        const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags);
+
+     The archive will be created if it doesn't already exist, otherwise it'll be appended to.
+     Note the appending is done in-place and is not an atomic operation, so if something goes wrong
+     during the operation it's possible the archive could be left without a central directory (although the local
+     file headers and file data will be fine, so the archive will be recoverable).
+
+     For more complex archive modification scenarios:
+     1. The safest way is to use a mz_zip_reader to read the existing archive, cloning only those bits you want to
+     preserve into a new archive using using the mz_zip_writer_add_from_zip_reader() function (which compiles the
+     compressed file data as-is). When you're done, delete the old archive and rename the newly written archive, and
+     you're done. This is safe but requires a bunch of temporary disk space or heap memory.
+
+     2. Or, you can convert an mz_zip_reader in-place to an mz_zip_writer using mz_zip_writer_init_from_reader(),
+     append new files as needed, then finalize the archive which will write an updated central directory to the
+     original archive. (This is basically what mz_zip_add_mem_to_archive_file_in_place() does.) There's a
+     possibility that the archive's central directory could be lost with this method if anything goes wrong, though.
+
+     - ZIP archive support limitations:
+     No zip64 or spanning support. Extraction functions can only handle unencrypted, stored or deflated files.
+     Requires streams capable of seeking.
+
+   * This is a header file library, like stb_image.c. To get only a header file, either cut and paste the
+     below header, or create miniz.h, #define MINIZ_HEADER_FILE_ONLY, and then include miniz.c from it.
+
+   * Important: For best perf. be sure to customize the below macros for your target platform:
+     #define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1
+     #define MINIZ_LITTLE_ENDIAN 1
+     #define MINIZ_HAS_64BIT_REGISTERS 1
+
+   * On platforms using glibc, Be sure to "#define _LARGEFILE64_SOURCE 1" before including miniz.c to ensure miniz
+     uses the 64-bit variants: fopen64(), stat64(), etc. Otherwise you won't be able to process large files
+     (i.e. 32-bit stat() fails for me on files > 0x7FFFFFFF bytes).
+*/
+#pragma once
+
+#if 1
+// Alec add this for now (move to define on projects?)
+
+// skip crc read checks to speed up reads
+#define MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS
+
+// not writing any zip content, just reading it
+#define MINIZ_NO_ARCHIVE_WRITING_APIS
+
+// handling file io separately
+#define MINIZ_NO_STDIO
+
+#endif
+
+
+
+/* Defines to completely disable specific portions of miniz.c: 
+   If all macros here are defined the only functionality remaining will be CRC-32, adler-32, tinfl, and tdefl. */
+
+/* Define MINIZ_NO_STDIO to disable all usage and any functions which rely on stdio for file I/O. */
+/*#define MINIZ_NO_STDIO */
+
+/* If MINIZ_NO_TIME is specified then the ZIP archive functions will not be able to get the current time, or */
+/* get/set file times, and the C run-time funcs that get/set times won't be called. */
+/* The current downside is the times written to your archives will be from 1979. */
+/*#define MINIZ_NO_TIME */
+
+/* Define MINIZ_NO_ARCHIVE_APIS to disable all ZIP archive API's. */
+/*#define MINIZ_NO_ARCHIVE_APIS */
+
+/* Define MINIZ_NO_ARCHIVE_WRITING_APIS to disable all writing related ZIP archive API's. */
+/*#define MINIZ_NO_ARCHIVE_WRITING_APIS */
+
+/* Define MINIZ_NO_ZLIB_APIS to remove all ZLIB-style compression/decompression API's. */
+/*#define MINIZ_NO_ZLIB_APIS */
+
+/* Define MINIZ_NO_ZLIB_COMPATIBLE_NAME to disable zlib names, to prevent conflicts against stock zlib. */
+/*#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES */
+
+/* Define MINIZ_NO_MALLOC to disable all calls to malloc, free, and realloc. 
+   Note if MINIZ_NO_MALLOC is defined then the user must always provide custom user alloc/free/realloc
+   callbacks to the zlib and archive API's, and a few stand-alone helper API's which don't provide custom user
+   functions (such as tdefl_compress_mem_to_heap() and tinfl_decompress_mem_to_heap()) won't work. */
+/*#define MINIZ_NO_MALLOC */
+
+#if defined(__TINYC__) && (defined(__linux) || defined(__linux__))
+/* TODO: Work around "error: include file 'sys\utime.h' when compiling with tcc on Linux */
+#define MINIZ_NO_TIME
+#endif
+
+#include <stddef.h>
+
+#if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_ARCHIVE_APIS)
+#include <time.h>
+#endif
+
+#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__i386) || defined(__i486__) || defined(__i486) || defined(i386) || defined(__ia64__) || defined(__x86_64__)
+/* MINIZ_X86_OR_X64_CPU is only used to help set the below macros. */
+#define MINIZ_X86_OR_X64_CPU 1
+#else
+#define MINIZ_X86_OR_X64_CPU 0
+#endif
+
+#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) || MINIZ_X86_OR_X64_CPU
+/* Set MINIZ_LITTLE_ENDIAN to 1 if the processor is little endian. */
+#define MINIZ_LITTLE_ENDIAN 1
+#else
+#define MINIZ_LITTLE_ENDIAN 0
+#endif
+
+/* Set MINIZ_USE_UNALIGNED_LOADS_AND_STORES only if not set */
+#if !defined(MINIZ_USE_UNALIGNED_LOADS_AND_STORES)
+#if MINIZ_X86_OR_X64_CPU
+/* Set MINIZ_USE_UNALIGNED_LOADS_AND_STORES to 1 on CPU's that permit efficient integer loads and stores from unaligned addresses. */
+#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1
+#define MINIZ_UNALIGNED_USE_MEMCPY
+#else
+#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 0
+#endif
+#endif
+
+#if defined(_M_X64) || defined(_WIN64) || defined(__MINGW64__) || defined(_LP64) || defined(__LP64__) || defined(__ia64__) || defined(__x86_64__)
+/* Set MINIZ_HAS_64BIT_REGISTERS to 1 if operations on 64-bit integers are reasonably fast (and don't involve compiler generated calls to helper functions). */
+#define MINIZ_HAS_64BIT_REGISTERS 1
+#else
+#define MINIZ_HAS_64BIT_REGISTERS 0
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* ------------------- zlib-style API Definitions. */
+
+/* For more compatibility with zlib, miniz.c uses unsigned long for some parameters/struct members. Beware: mz_ulong can be either 32 or 64-bits! */
+typedef unsigned long mz_ulong;
+
+/* mz_free() internally uses the MZ_FREE() macro (which by default calls free() unless you've modified the MZ_MALLOC macro) to release a block allocated from the heap. */
+void mz_free(void *p);
+
+#define MZ_ADLER32_INIT (1)
+/* mz_adler32() returns the initial adler-32 value to use when called with ptr==NULL. */
+mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len);
+
+#define MZ_CRC32_INIT (0)
+/* mz_crc32() returns the initial CRC-32 value to use when called with ptr==NULL. */
+mz_ulong mz_crc32(mz_ulong crc, const unsigned char *ptr, size_t buf_len);
+
+/* Compression strategies. */
+enum
+{
+    MZ_DEFAULT_STRATEGY = 0,
+    MZ_FILTERED = 1,
+    MZ_HUFFMAN_ONLY = 2,
+    MZ_RLE = 3,
+    MZ_FIXED = 4
+};
+
+/* Method */
+#define MZ_DEFLATED 8
+
+/* Heap allocation callbacks.
+Note that mz_alloc_func parameter types purpsosely differ from zlib's: items/size is size_t, not unsigned long. */
+typedef void *(*mz_alloc_func)(void *opaque, size_t items, size_t size);
+typedef void (*mz_free_func)(void *opaque, void *address);
+typedef void *(*mz_realloc_func)(void *opaque, void *address, size_t items, size_t size);
+
+/* Compression levels: 0-9 are the standard zlib-style levels, 10 is best possible compression (not zlib compatible, and may be very slow), MZ_DEFAULT_COMPRESSION=MZ_DEFAULT_LEVEL. */
+enum
+{
+    MZ_NO_COMPRESSION = 0,
+    MZ_BEST_SPEED = 1,
+    MZ_BEST_COMPRESSION = 9,
+    MZ_UBER_COMPRESSION = 10,
+    MZ_DEFAULT_LEVEL = 6,
+    MZ_DEFAULT_COMPRESSION = -1
+};
+
+#define MZ_VERSION "10.1.0"
+#define MZ_VERNUM 0xA100
+#define MZ_VER_MAJOR 10
+#define MZ_VER_MINOR 1
+#define MZ_VER_REVISION 0
+#define MZ_VER_SUBREVISION 0
+
+#ifndef MINIZ_NO_ZLIB_APIS
+
+/* Flush values. For typical usage you only need MZ_NO_FLUSH and MZ_FINISH. The other values are for advanced use (refer to the zlib docs). */
+enum
+{
+    MZ_NO_FLUSH = 0,
+    MZ_PARTIAL_FLUSH = 1,
+    MZ_SYNC_FLUSH = 2,
+    MZ_FULL_FLUSH = 3,
+    MZ_FINISH = 4,
+    MZ_BLOCK = 5
+};
+
+/* Return status codes. MZ_PARAM_ERROR is non-standard. */
+enum
+{
+    MZ_OK = 0,
+    MZ_STREAM_END = 1,
+    MZ_NEED_DICT = 2,
+    MZ_ERRNO = -1,
+    MZ_STREAM_ERROR = -2,
+    MZ_DATA_ERROR = -3,
+    MZ_MEM_ERROR = -4,
+    MZ_BUF_ERROR = -5,
+    MZ_VERSION_ERROR = -6,
+    MZ_PARAM_ERROR = -10000
+};
+
+/* Window bits */
+#define MZ_DEFAULT_WINDOW_BITS 15
+
+struct mz_internal_state;
+
+/* Compression/decompression stream struct. */
+typedef struct mz_stream_s
+{
+    const unsigned char *next_in; /* pointer to next byte to read */
+    unsigned int avail_in;        /* number of bytes available at next_in */
+    mz_ulong total_in;            /* total number of bytes consumed so far */
+
+    unsigned char *next_out; /* pointer to next byte to write */
+    unsigned int avail_out;  /* number of bytes that can be written to next_out */
+    mz_ulong total_out;      /* total number of bytes produced so far */
+
+    char *msg;                       /* error msg (unused) */
+    struct mz_internal_state *state; /* internal state, allocated by zalloc/zfree */
+
+    mz_alloc_func zalloc; /* optional heap allocation function (defaults to malloc) */
+    mz_free_func zfree;   /* optional heap free function (defaults to free) */
+    void *opaque;         /* heap alloc function user pointer */
+
+    int data_type;     /* data_type (unused) */
+    mz_ulong adler;    /* adler32 of the source or uncompressed data */
+    mz_ulong reserved; /* not used */
+} mz_stream;
+
+typedef mz_stream *mz_streamp;
+
+/* Returns the version string of miniz.c. */
+const char *mz_version(void);
+
+/* mz_deflateInit() initializes a compressor with default options: */
+/* Parameters: */
+/*  pStream must point to an initialized mz_stream struct. */
+/*  level must be between [MZ_NO_COMPRESSION, MZ_BEST_COMPRESSION]. */
+/*  level 1 enables a specially optimized compression function that's been optimized purely for performance, not ratio. */
+/*  (This special func. is currently only enabled when MINIZ_USE_UNALIGNED_LOADS_AND_STORES and MINIZ_LITTLE_ENDIAN are defined.) */
+/* Return values: */
+/*  MZ_OK on success. */
+/*  MZ_STREAM_ERROR if the stream is bogus. */
+/*  MZ_PARAM_ERROR if the input parameters are bogus. */
+/*  MZ_MEM_ERROR on out of memory. */
+int mz_deflateInit(mz_streamp pStream, int level);
+
+/* mz_deflateInit2() is like mz_deflate(), except with more control: */
+/* Additional parameters: */
+/*   method must be MZ_DEFLATED */
+/*   window_bits must be MZ_DEFAULT_WINDOW_BITS (to wrap the deflate stream with zlib header/adler-32 footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate/no header or footer) */
+/*   mem_level must be between [1, 9] (it's checked but ignored by miniz.c) */
+int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy);
+
+/* Quickly resets a compressor without having to reallocate anything. Same as calling mz_deflateEnd() followed by mz_deflateInit()/mz_deflateInit2(). */
+int mz_deflateReset(mz_streamp pStream);
+
+/* mz_deflate() compresses the input to output, consuming as much of the input and producing as much output as possible. */
+/* Parameters: */
+/*   pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members. */
+/*   flush may be MZ_NO_FLUSH, MZ_PARTIAL_FLUSH/MZ_SYNC_FLUSH, MZ_FULL_FLUSH, or MZ_FINISH. */
+/* Return values: */
+/*   MZ_OK on success (when flushing, or if more input is needed but not available, and/or there's more output to be written but the output buffer is full). */
+/*   MZ_STREAM_END if all input has been consumed and all output bytes have been written. Don't call mz_deflate() on the stream anymore. */
+/*   MZ_STREAM_ERROR if the stream is bogus. */
+/*   MZ_PARAM_ERROR if one of the parameters is invalid. */
+/*   MZ_BUF_ERROR if no forward progress is possible because the input and/or output buffers are empty. (Fill up the input buffer or free up some output space and try again.) */
+int mz_deflate(mz_streamp pStream, int flush);
+
+/* mz_deflateEnd() deinitializes a compressor: */
+/* Return values: */
+/*  MZ_OK on success. */
+/*  MZ_STREAM_ERROR if the stream is bogus. */
+int mz_deflateEnd(mz_streamp pStream);
+
+/* mz_deflateBound() returns a (very) conservative upper bound on the amount of data that could be generated by deflate(), assuming flush is set to only MZ_NO_FLUSH or MZ_FINISH. */
+mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len);
+
+/* Single-call compression functions mz_compress() and mz_compress2(): */
+/* Returns MZ_OK on success, or one of the error codes from mz_deflate() on failure. */
+int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len);
+int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len, int level);
+
+/* mz_compressBound() returns a (very) conservative upper bound on the amount of data that could be generated by calling mz_compress(). */
+mz_ulong mz_compressBound(mz_ulong source_len);
+
+/* Initializes a decompressor. */
+int mz_inflateInit(mz_streamp pStream);
+
+/* mz_inflateInit2() is like mz_inflateInit() with an additional option that controls the window size and whether or not the stream has been wrapped with a zlib header/footer: */
+/* window_bits must be MZ_DEFAULT_WINDOW_BITS (to parse zlib header/footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate). */
+int mz_inflateInit2(mz_streamp pStream, int window_bits);
+
+/* Quickly resets a compressor without having to reallocate anything. Same as calling mz_inflateEnd() followed by mz_inflateInit()/mz_inflateInit2(). */
+int mz_inflateReset(mz_streamp pStream);
+
+/* Decompresses the input stream to the output, consuming only as much of the input as needed, and writing as much to the output as possible. */
+/* Parameters: */
+/*   pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members. */
+/*   flush may be MZ_NO_FLUSH, MZ_SYNC_FLUSH, or MZ_FINISH. */
+/*   On the first call, if flush is MZ_FINISH it's assumed the input and output buffers are both sized large enough to decompress the entire stream in a single call (this is slightly faster). */
+/*   MZ_FINISH implies that there are no more source bytes available beside what's already in the input buffer, and that the output buffer is large enough to hold the rest of the decompressed data. */
+/* Return values: */
+/*   MZ_OK on success. Either more input is needed but not available, and/or there's more output to be written but the output buffer is full. */
+/*   MZ_STREAM_END if all needed input has been consumed and all output bytes have been written. For zlib streams, the adler-32 of the decompressed data has also been verified. */
+/*   MZ_STREAM_ERROR if the stream is bogus. */
+/*   MZ_DATA_ERROR if the deflate stream is invalid. */
+/*   MZ_PARAM_ERROR if one of the parameters is invalid. */
+/*   MZ_BUF_ERROR if no forward progress is possible because the input buffer is empty but the inflater needs more input to continue, or if the output buffer is not large enough. Call mz_inflate() again */
+/*   with more input data, or with more room in the output buffer (except when using single call decompression, described above). */
+int mz_inflate(mz_streamp pStream, int flush);
+
+/* Deinitializes a decompressor. */
+int mz_inflateEnd(mz_streamp pStream);
+
+/* Single-call decompression. */
+/* Returns MZ_OK on success, or one of the error codes from mz_inflate() on failure. */
+int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len);
+
+/* Returns a string description of the specified error code, or NULL if the error code is invalid. */
+const char *mz_error(int err);
+
+/* Redefine zlib-compatible names to miniz equivalents, so miniz.c can be used as a drop-in replacement for the subset of zlib that miniz.c supports. */
+/* Define MINIZ_NO_ZLIB_COMPATIBLE_NAMES to disable zlib-compatibility if you use zlib in the same project. */
+#ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES
+typedef unsigned char Byte;
+typedef unsigned int uInt;
+typedef mz_ulong uLong;
+typedef Byte Bytef;
+typedef uInt uIntf;
+typedef char charf;
+typedef int intf;
+typedef void *voidpf;
+typedef uLong uLongf;
+typedef void *voidp;
+typedef void *const voidpc;
+#define Z_NULL 0
+#define Z_NO_FLUSH MZ_NO_FLUSH
+#define Z_PARTIAL_FLUSH MZ_PARTIAL_FLUSH
+#define Z_SYNC_FLUSH MZ_SYNC_FLUSH
+#define Z_FULL_FLUSH MZ_FULL_FLUSH
+#define Z_FINISH MZ_FINISH
+#define Z_BLOCK MZ_BLOCK
+#define Z_OK MZ_OK
+#define Z_STREAM_END MZ_STREAM_END
+#define Z_NEED_DICT MZ_NEED_DICT
+#define Z_ERRNO MZ_ERRNO
+#define Z_STREAM_ERROR MZ_STREAM_ERROR
+#define Z_DATA_ERROR MZ_DATA_ERROR
+#define Z_MEM_ERROR MZ_MEM_ERROR
+#define Z_BUF_ERROR MZ_BUF_ERROR
+#define Z_VERSION_ERROR MZ_VERSION_ERROR
+#define Z_PARAM_ERROR MZ_PARAM_ERROR
+#define Z_NO_COMPRESSION MZ_NO_COMPRESSION
+#define Z_BEST_SPEED MZ_BEST_SPEED
+#define Z_BEST_COMPRESSION MZ_BEST_COMPRESSION
+#define Z_DEFAULT_COMPRESSION MZ_DEFAULT_COMPRESSION
+#define Z_DEFAULT_STRATEGY MZ_DEFAULT_STRATEGY
+#define Z_FILTERED MZ_FILTERED
+#define Z_HUFFMAN_ONLY MZ_HUFFMAN_ONLY
+#define Z_RLE MZ_RLE
+#define Z_FIXED MZ_FIXED
+#define Z_DEFLATED MZ_DEFLATED
+#define Z_DEFAULT_WINDOW_BITS MZ_DEFAULT_WINDOW_BITS
+#define alloc_func mz_alloc_func
+#define free_func mz_free_func
+#define internal_state mz_internal_state
+#define z_stream mz_stream
+#define deflateInit mz_deflateInit
+#define deflateInit2 mz_deflateInit2
+#define deflateReset mz_deflateReset
+#define deflate mz_deflate
+#define deflateEnd mz_deflateEnd
+#define deflateBound mz_deflateBound
+#define compress mz_compress
+#define compress2 mz_compress2
+#define compressBound mz_compressBound
+#define inflateInit mz_inflateInit
+#define inflateInit2 mz_inflateInit2
+#define inflateReset mz_inflateReset
+#define inflate mz_inflate
+#define inflateEnd mz_inflateEnd
+#define uncompress mz_uncompress
+#define crc32 mz_crc32
+#define adler32 mz_adler32
+#define MAX_WBITS 15
+#define MAX_MEM_LEVEL 9
+#define zError mz_error
+#define ZLIB_VERSION MZ_VERSION
+#define ZLIB_VERNUM MZ_VERNUM
+#define ZLIB_VER_MAJOR MZ_VER_MAJOR
+#define ZLIB_VER_MINOR MZ_VER_MINOR
+#define ZLIB_VER_REVISION MZ_VER_REVISION
+#define ZLIB_VER_SUBREVISION MZ_VER_SUBREVISION
+#define zlibVersion mz_version
+#define zlib_version mz_version()
+#endif /* #ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES */
+
+#endif /* MINIZ_NO_ZLIB_APIS */
+
+#ifdef __cplusplus
+}
+#endif
+#pragma once
+#include <assert.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* ------------------- Types and macros */
+typedef unsigned char mz_uint8;
+typedef signed short mz_int16;
+typedef unsigned short mz_uint16;
+typedef unsigned int mz_uint32;
+typedef unsigned int mz_uint;
+typedef int64_t mz_int64;
+typedef uint64_t mz_uint64;
+typedef int mz_bool;
+
+#define MZ_FALSE (0)
+#define MZ_TRUE (1)
+
+/* Works around MSVC's spammy "warning C4127: conditional expression is constant" message. */
+#ifdef _MSC_VER
+#define MZ_MACRO_END while (0, 0)
+#else
+#define MZ_MACRO_END while (0)
+#endif
+
+#ifdef MINIZ_NO_STDIO
+#define MZ_FILE void *
+#else
+#include <stdio.h>
+#define MZ_FILE FILE
+#endif /* #ifdef MINIZ_NO_STDIO */
+
+#ifdef MINIZ_NO_TIME
+typedef struct mz_dummy_time_t_tag
+{
+    int m_dummy;
+} mz_dummy_time_t;
+#define MZ_TIME_T mz_dummy_time_t
+#else
+#define MZ_TIME_T time_t
+#endif
+
+#define MZ_ASSERT(x) assert(x)
+
+#ifdef MINIZ_NO_MALLOC
+#define MZ_MALLOC(x) NULL
+#define MZ_FREE(x) (void)x, ((void)0)
+#define MZ_REALLOC(p, x) NULL
+#else
+#define MZ_MALLOC(x) malloc(x)
+#define MZ_FREE(x) free(x)
+#define MZ_REALLOC(p, x) realloc(p, x)
+#endif
+
+#define MZ_MAX(a, b) (((a) > (b)) ? (a) : (b))
+#define MZ_MIN(a, b) (((a) < (b)) ? (a) : (b))
+#define MZ_CLEAR_OBJ(obj) memset(&(obj), 0, sizeof(obj))
+
+#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN
+#define MZ_READ_LE16(p) *((const mz_uint16 *)(p))
+#define MZ_READ_LE32(p) *((const mz_uint32 *)(p))
+#else
+#define MZ_READ_LE16(p) ((mz_uint32)(((const mz_uint8 *)(p))[0]) | ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U))
+#define MZ_READ_LE32(p) ((mz_uint32)(((const mz_uint8 *)(p))[0]) | ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U) | ((mz_uint32)(((const mz_uint8 *)(p))[2]) << 16U) | ((mz_uint32)(((const mz_uint8 *)(p))[3]) << 24U))
+#endif
+
+#define MZ_READ_LE64(p) (((mz_uint64)MZ_READ_LE32(p)) | (((mz_uint64)MZ_READ_LE32((const mz_uint8 *)(p) + sizeof(mz_uint32))) << 32U))
+
+#ifdef _MSC_VER
+#define MZ_FORCEINLINE __forceinline
+#elif defined(__GNUC__)
+#define MZ_FORCEINLINE __inline__ __attribute__((__always_inline__))
+#else
+#define MZ_FORCEINLINE inline
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern void *miniz_def_alloc_func(void *opaque, size_t items, size_t size);
+extern void miniz_def_free_func(void *opaque, void *address);
+extern void *miniz_def_realloc_func(void *opaque, void *address, size_t items, size_t size);
+
+#define MZ_UINT16_MAX (0xFFFFU)
+#define MZ_UINT32_MAX (0xFFFFFFFFU)
+
+#ifdef __cplusplus
+}
+#endif
+#pragma once
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+/* ------------------- Low-level Compression API Definitions */
+
+/* Set TDEFL_LESS_MEMORY to 1 to use less memory (compression will be slightly slower, and raw/dynamic blocks will be output more frequently). */
+#define TDEFL_LESS_MEMORY 0
+
+/* tdefl_init() compression flags logically OR'd together (low 12 bits contain the max. number of probes per dictionary search): */
+/* TDEFL_DEFAULT_MAX_PROBES: The compressor defaults to 128 dictionary probes per dictionary search. 0=Huffman only, 1=Huffman+LZ (fastest/crap compression), 4095=Huffman+LZ (slowest/best compression). */
+enum
+{
+    TDEFL_HUFFMAN_ONLY = 0,
+    TDEFL_DEFAULT_MAX_PROBES = 128,
+    TDEFL_MAX_PROBES_MASK = 0xFFF
+};
+
+/* TDEFL_WRITE_ZLIB_HEADER: If set, the compressor outputs a zlib header before the deflate data, and the Adler-32 of the source data at the end. Otherwise, you'll get raw deflate data. */
+/* TDEFL_COMPUTE_ADLER32: Always compute the adler-32 of the input data (even when not writing zlib headers). */
+/* TDEFL_GREEDY_PARSING_FLAG: Set to use faster greedy parsing, instead of more efficient lazy parsing. */
+/* TDEFL_NONDETERMINISTIC_PARSING_FLAG: Enable to decrease the compressor's initialization time to the minimum, but the output may vary from run to run given the same input (depending on the contents of memory). */
+/* TDEFL_RLE_MATCHES: Only look for RLE matches (matches with a distance of 1) */
+/* TDEFL_FILTER_MATCHES: Discards matches <= 5 chars if enabled. */
+/* TDEFL_FORCE_ALL_STATIC_BLOCKS: Disable usage of optimized Huffman tables. */
+/* TDEFL_FORCE_ALL_RAW_BLOCKS: Only use raw (uncompressed) deflate blocks. */
+/* The low 12 bits are reserved to control the max # of hash probes per dictionary lookup (see TDEFL_MAX_PROBES_MASK). */
+enum
+{
+    TDEFL_WRITE_ZLIB_HEADER = 0x01000,
+    TDEFL_COMPUTE_ADLER32 = 0x02000,
+    TDEFL_GREEDY_PARSING_FLAG = 0x04000,
+    TDEFL_NONDETERMINISTIC_PARSING_FLAG = 0x08000,
+    TDEFL_RLE_MATCHES = 0x10000,
+    TDEFL_FILTER_MATCHES = 0x20000,
+    TDEFL_FORCE_ALL_STATIC_BLOCKS = 0x40000,
+    TDEFL_FORCE_ALL_RAW_BLOCKS = 0x80000
+};
+
+/* High level compression functions: */
+/* tdefl_compress_mem_to_heap() compresses a block in memory to a heap block allocated via malloc(). */
+/* On entry: */
+/*  pSrc_buf, src_buf_len: Pointer and size of source block to compress. */
+/*  flags: The max match finder probes (default is 128) logically OR'd against the above flags. Higher probes are slower but improve compression. */
+/* On return: */
+/*  Function returns a pointer to the compressed data, or NULL on failure. */
+/*  *pOut_len will be set to the compressed data's size, which could be larger than src_buf_len on uncompressible data. */
+/*  The caller must free() the returned block when it's no longer needed. */
+void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags);
+
+/* tdefl_compress_mem_to_mem() compresses a block in memory to another block in memory. */
+/* Returns 0 on failure. */
+size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags);
+
+/* Compresses an image to a compressed PNG file in memory. */
+/* On entry: */
+/*  pImage, w, h, and num_chans describe the image to compress. num_chans may be 1, 2, 3, or 4. */
+/*  The image pitch in bytes per scanline will be w*num_chans. The leftmost pixel on the top scanline is stored first in memory. */
+/*  level may range from [0,10], use MZ_NO_COMPRESSION, MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc. or a decent default is MZ_DEFAULT_LEVEL */
+/*  If flip is true, the image will be flipped on the Y axis (useful for OpenGL apps). */
+/* On return: */
+/*  Function returns a pointer to the compressed data, or NULL on failure. */
+/*  *pLen_out will be set to the size of the PNG image file. */
+/*  The caller must mz_free() the returned heap block (which will typically be larger than *pLen_out) when it's no longer needed. */
+void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, int h, int num_chans, size_t *pLen_out, mz_uint level, mz_bool flip);
+void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, int num_chans, size_t *pLen_out);
+
+/* Output stream interface. The compressor uses this interface to write compressed data. It'll typically be called TDEFL_OUT_BUF_SIZE at a time. */
+typedef mz_bool (*tdefl_put_buf_func_ptr)(const void *pBuf, int len, void *pUser);
+
+/* tdefl_compress_mem_to_output() compresses a block to an output stream. The above helpers use this function internally. */
+mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags);
+
+enum
+{
+    TDEFL_MAX_HUFF_TABLES = 3,
+    TDEFL_MAX_HUFF_SYMBOLS_0 = 288,
+    TDEFL_MAX_HUFF_SYMBOLS_1 = 32,
+    TDEFL_MAX_HUFF_SYMBOLS_2 = 19,
+    TDEFL_LZ_DICT_SIZE = 32768,
+    TDEFL_LZ_DICT_SIZE_MASK = TDEFL_LZ_DICT_SIZE - 1,
+    TDEFL_MIN_MATCH_LEN = 3,
+    TDEFL_MAX_MATCH_LEN = 258
+};
+
+/* TDEFL_OUT_BUF_SIZE MUST be large enough to hold a single entire compressed output block (using static/fixed Huffman codes). */
+#if TDEFL_LESS_MEMORY
+enum
+{
+    TDEFL_LZ_CODE_BUF_SIZE = 24 * 1024,
+    TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13) / 10,
+    TDEFL_MAX_HUFF_SYMBOLS = 288,
+    TDEFL_LZ_HASH_BITS = 12,
+    TDEFL_LEVEL1_HASH_SIZE_MASK = 4095,
+    TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3,
+    TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS
+};
+#else
+enum
+{
+    TDEFL_LZ_CODE_BUF_SIZE = 64 * 1024,
+    TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13) / 10,
+    TDEFL_MAX_HUFF_SYMBOLS = 288,
+    TDEFL_LZ_HASH_BITS = 15,
+    TDEFL_LEVEL1_HASH_SIZE_MASK = 4095,
+    TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3,
+    TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS
+};
+#endif
+
+/* The low-level tdefl functions below may be used directly if the above helper functions aren't flexible enough. The low-level functions don't make any heap allocations, unlike the above helper functions. */
+typedef enum {
+    TDEFL_STATUS_BAD_PARAM = -2,
+    TDEFL_STATUS_PUT_BUF_FAILED = -1,
+    TDEFL_STATUS_OKAY = 0,
+    TDEFL_STATUS_DONE = 1
+} tdefl_status;
+
+/* Must map to MZ_NO_FLUSH, MZ_SYNC_FLUSH, etc. enums */
+typedef enum {
+    TDEFL_NO_FLUSH = 0,
+    TDEFL_SYNC_FLUSH = 2,
+    TDEFL_FULL_FLUSH = 3,
+    TDEFL_FINISH = 4
+} tdefl_flush;
+
+/* tdefl's compression state structure. */
+typedef struct
+{
+    tdefl_put_buf_func_ptr m_pPut_buf_func;
+    void *m_pPut_buf_user;
+    mz_uint m_flags, m_max_probes[2];
+    int m_greedy_parsing;
+    mz_uint m_adler32, m_lookahead_pos, m_lookahead_size, m_dict_size;
+    mz_uint8 *m_pLZ_code_buf, *m_pLZ_flags, *m_pOutput_buf, *m_pOutput_buf_end;
+    mz_uint m_num_flags_left, m_total_lz_bytes, m_lz_code_buf_dict_pos, m_bits_in, m_bit_buffer;
+    mz_uint m_saved_match_dist, m_saved_match_len, m_saved_lit, m_output_flush_ofs, m_output_flush_remaining, m_finished, m_block_index, m_wants_to_finish;
+    tdefl_status m_prev_return_status;
+    const void *m_pIn_buf;
+    void *m_pOut_buf;
+    size_t *m_pIn_buf_size, *m_pOut_buf_size;
+    tdefl_flush m_flush;
+    const mz_uint8 *m_pSrc;
+    size_t m_src_buf_left, m_out_buf_ofs;
+    mz_uint8 m_dict[TDEFL_LZ_DICT_SIZE + TDEFL_MAX_MATCH_LEN - 1];
+    mz_uint16 m_huff_count[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS];
+    mz_uint16 m_huff_codes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS];
+    mz_uint8 m_huff_code_sizes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS];
+    mz_uint8 m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE];
+    mz_uint16 m_next[TDEFL_LZ_DICT_SIZE];
+    mz_uint16 m_hash[TDEFL_LZ_HASH_SIZE];
+    mz_uint8 m_output_buf[TDEFL_OUT_BUF_SIZE];
+} tdefl_compressor;
+
+/* Initializes the compressor. */
+/* There is no corresponding deinit() function because the tdefl API's do not dynamically allocate memory. */
+/* pBut_buf_func: If NULL, output data will be supplied to the specified callback. In this case, the user should call the tdefl_compress_buffer() API for compression. */
+/* If pBut_buf_func is NULL the user should always call the tdefl_compress() API. */
+/* flags: See the above enums (TDEFL_HUFFMAN_ONLY, TDEFL_WRITE_ZLIB_HEADER, etc.) */
+tdefl_status tdefl_init(tdefl_compressor *d, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags);
+
+/* Compresses a block of data, consuming as much of the specified input buffer as possible, and writing as much compressed data to the specified output buffer as possible. */
+tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, size_t *pIn_buf_size, void *pOut_buf, size_t *pOut_buf_size, tdefl_flush flush);
+
+/* tdefl_compress_buffer() is only usable when the tdefl_init() is called with a non-NULL tdefl_put_buf_func_ptr. */
+/* tdefl_compress_buffer() always consumes the entire input buffer. */
+tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, size_t in_buf_size, tdefl_flush flush);
+
+tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d);
+mz_uint32 tdefl_get_adler32(tdefl_compressor *d);
+
+/* Create tdefl_compress() flags given zlib-style compression parameters. */
+/* level may range from [0,10] (where 10 is absolute max compression, but may be much slower on some files) */
+/* window_bits may be -15 (raw deflate) or 15 (zlib) */
+/* strategy may be either MZ_DEFAULT_STRATEGY, MZ_FILTERED, MZ_HUFFMAN_ONLY, MZ_RLE, or MZ_FIXED */
+mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, int strategy);
+
+#ifndef MINIZ_NO_MALLOC
+/* Allocate the tdefl_compressor structure in C so that */
+/* non-C language bindings to tdefl_ API don't need to worry about */
+/* structure size and allocation mechanism. */
+tdefl_compressor *tdefl_compressor_alloc(void);
+void tdefl_compressor_free(tdefl_compressor *pComp);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+#pragma once
+
+/* ------------------- Low-level Decompression API Definitions */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+/* Decompression flags used by tinfl_decompress(). */
+/* TINFL_FLAG_PARSE_ZLIB_HEADER: If set, the input has a valid zlib header and ends with an adler32 checksum (it's a valid zlib stream). Otherwise, the input is a raw deflate stream. */
+/* TINFL_FLAG_HAS_MORE_INPUT: If set, there are more input bytes available beyond the end of the supplied input buffer. If clear, the input buffer contains all remaining input. */
+/* TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF: If set, the output buffer is large enough to hold the entire decompressed stream. If clear, the output buffer is at least the size of the dictionary (typically 32KB). */
+/* TINFL_FLAG_COMPUTE_ADLER32: Force adler-32 checksum computation of the decompressed bytes. */
+enum
+{
+    TINFL_FLAG_PARSE_ZLIB_HEADER = 1,
+    TINFL_FLAG_HAS_MORE_INPUT = 2,
+    TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF = 4,
+    TINFL_FLAG_COMPUTE_ADLER32 = 8
+};
+
+/* High level decompression functions: */
+/* tinfl_decompress_mem_to_heap() decompresses a block in memory to a heap block allocated via malloc(). */
+/* On entry: */
+/*  pSrc_buf, src_buf_len: Pointer and size of the Deflate or zlib source data to decompress. */
+/* On return: */
+/*  Function returns a pointer to the decompressed data, or NULL on failure. */
+/*  *pOut_len will be set to the decompressed data's size, which could be larger than src_buf_len on uncompressible data. */
+/*  The caller must call mz_free() on the returned block when it's no longer needed. */
+void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags);
+
+/* tinfl_decompress_mem_to_mem() decompresses a block in memory to another block in memory. */
+/* Returns TINFL_DECOMPRESS_MEM_TO_MEM_FAILED on failure, or the number of bytes written on success. */
+#define TINFL_DECOMPRESS_MEM_TO_MEM_FAILED ((size_t)(-1))
+size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags);
+
+/* tinfl_decompress_mem_to_callback() decompresses a block in memory to an internal 32KB buffer, and a user provided callback function will be called to flush the buffer. */
+/* Returns 1 on success or 0 on failure. */
+typedef int (*tinfl_put_buf_func_ptr)(const void *pBuf, int len, void *pUser);
+int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags);
+
+struct tinfl_decompressor_tag;
+typedef struct tinfl_decompressor_tag tinfl_decompressor;
+
+#ifndef MINIZ_NO_MALLOC
+/* Allocate the tinfl_decompressor structure in C so that */
+/* non-C language bindings to tinfl_ API don't need to worry about */
+/* structure size and allocation mechanism. */
+tinfl_decompressor *tinfl_decompressor_alloc(void);
+void tinfl_decompressor_free(tinfl_decompressor *pDecomp);
+#endif
+
+/* Max size of LZ dictionary. */
+#define TINFL_LZ_DICT_SIZE 32768
+
+/* Return status. */
+typedef enum {
+    /* This flags indicates the inflator needs 1 or more input bytes to make forward progress, but the caller is indicating that no more are available. The compressed data */
+    /* is probably corrupted. If you call the inflator again with more bytes it'll try to continue processing the input but this is a BAD sign (either the data is corrupted or you called it incorrectly). */
+    /* If you call it again with no input you'll just get TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS again. */
+    TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS = -4,
+
+    /* This flag indicates that one or more of the input parameters was obviously bogus. (You can try calling it again, but if you get this error the calling code is wrong.) */
+    TINFL_STATUS_BAD_PARAM = -3,
+
+    /* This flags indicate the inflator is finished but the adler32 check of the uncompressed data didn't match. If you call it again it'll return TINFL_STATUS_DONE. */
+    TINFL_STATUS_ADLER32_MISMATCH = -2,
+
+    /* This flags indicate the inflator has somehow failed (bad code, corrupted input, etc.). If you call it again without resetting via tinfl_init() it it'll just keep on returning the same status failure code. */
+    TINFL_STATUS_FAILED = -1,
+
+    /* Any status code less than TINFL_STATUS_DONE must indicate a failure. */
+
+    /* This flag indicates the inflator has returned every byte of uncompressed data that it can, has consumed every byte that it needed, has successfully reached the end of the deflate stream, and */
+    /* if zlib headers and adler32 checking enabled that it has successfully checked the uncompressed data's adler32. If you call it again you'll just get TINFL_STATUS_DONE over and over again. */
+    TINFL_STATUS_DONE = 0,
+
+    /* This flag indicates the inflator MUST have more input data (even 1 byte) before it can make any more forward progress, or you need to clear the TINFL_FLAG_HAS_MORE_INPUT */
+    /* flag on the next call if you don't have any more source data. If the source data was somehow corrupted it's also possible (but unlikely) for the inflator to keep on demanding input to */
+    /* proceed, so be sure to properly set the TINFL_FLAG_HAS_MORE_INPUT flag. */
+    TINFL_STATUS_NEEDS_MORE_INPUT = 1,
+
+    /* This flag indicates the inflator definitely has 1 or more bytes of uncompressed data available, but it cannot write this data into the output buffer. */
+    /* Note if the source compressed data was corrupted it's possible for the inflator to return a lot of uncompressed data to the caller. I've been assuming you know how much uncompressed data to expect */
+    /* (either exact or worst case) and will stop calling the inflator and fail after receiving too much. In pure streaming scenarios where you have no idea how many bytes to expect this may not be possible */
+    /* so I may need to add some code to address this. */
+    TINFL_STATUS_HAS_MORE_OUTPUT = 2
+} tinfl_status;
+
+/* Initializes the decompressor to its initial state. */
+#define tinfl_init(r)     \
+    do                    \
+    {                     \
+        (r)->m_state = 0; \
+    }                     \
+    MZ_MACRO_END
+#define tinfl_get_adler32(r) (r)->m_check_adler32
+
+/* Main low-level decompressor coroutine function. This is the only function actually needed for decompression. All the other functions are just high-level helpers for improved usability. */
+/* This is a universal API, i.e. it can be used as a building block to build any desired higher level decompression API. In the limit case, it can be called once per every byte input or output. */
+tinfl_status tinfl_decompress(tinfl_decompressor *r, const mz_uint8 *pIn_buf_next, size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, const mz_uint32 decomp_flags);
+
+/* Internal/private bits follow. */
+enum
+{
+    TINFL_MAX_HUFF_TABLES = 3,
+    TINFL_MAX_HUFF_SYMBOLS_0 = 288,
+    TINFL_MAX_HUFF_SYMBOLS_1 = 32,
+    TINFL_MAX_HUFF_SYMBOLS_2 = 19,
+    TINFL_FAST_LOOKUP_BITS = 10,
+    TINFL_FAST_LOOKUP_SIZE = 1 << TINFL_FAST_LOOKUP_BITS
+};
+
+typedef struct
+{
+    mz_uint8 m_code_size[TINFL_MAX_HUFF_SYMBOLS_0];
+    mz_int16 m_look_up[TINFL_FAST_LOOKUP_SIZE], m_tree[TINFL_MAX_HUFF_SYMBOLS_0 * 2];
+} tinfl_huff_table;
+
+#if MINIZ_HAS_64BIT_REGISTERS
+#define TINFL_USE_64BIT_BITBUF 1
+#else
+#define TINFL_USE_64BIT_BITBUF 0
+#endif
+
+#if TINFL_USE_64BIT_BITBUF
+typedef mz_uint64 tinfl_bit_buf_t;
+#define TINFL_BITBUF_SIZE (64)
+#else
+typedef mz_uint32 tinfl_bit_buf_t;
+#define TINFL_BITBUF_SIZE (32)
+#endif
+
+struct tinfl_decompressor_tag
+{
+    mz_uint32 m_state, m_num_bits, m_zhdr0, m_zhdr1, m_z_adler32, m_final, m_type, m_check_adler32, m_dist, m_counter, m_num_extra, m_table_sizes[TINFL_MAX_HUFF_TABLES];
+    tinfl_bit_buf_t m_bit_buf;
+    size_t m_dist_from_out_buf_start;
+    tinfl_huff_table m_tables[TINFL_MAX_HUFF_TABLES];
+    mz_uint8 m_raw_header[4], m_len_codes[TINFL_MAX_HUFF_SYMBOLS_0 + TINFL_MAX_HUFF_SYMBOLS_1 + 137];
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#pragma once
+
+
+/* ------------------- ZIP archive reading/writing */
+
+#ifndef MINIZ_NO_ARCHIVE_APIS
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum
+{
+    /* Note: These enums can be reduced as needed to save memory or stack space - they are pretty conservative. */
+    MZ_ZIP_MAX_IO_BUF_SIZE = 64 * 1024,
+    MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE = 512,
+    MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE = 512
+};
+
+typedef struct
+{
+    /* Central directory file index. */
+    mz_uint32 m_file_index;
+
+    /* Byte offset of this entry in the archive's central directory. Note we currently only support up to UINT_MAX or less bytes in the central dir. */
+    mz_uint64 m_central_dir_ofs;
+
+    /* These fields are copied directly from the zip's central dir. */
+    mz_uint16 m_version_made_by;
+    mz_uint16 m_version_needed;
+    mz_uint16 m_bit_flag;
+    mz_uint16 m_method;
+
+#ifndef MINIZ_NO_TIME
+    MZ_TIME_T m_time;
+#endif
+
+    /* CRC-32 of uncompressed data. */
+    mz_uint32 m_crc32;
+
+    /* File's compressed size. */
+    mz_uint64 m_comp_size;
+
+    /* File's uncompressed size. Note, I've seen some old archives where directory entries had 512 bytes for their uncompressed sizes, but when you try to unpack them you actually get 0 bytes. */
+    mz_uint64 m_uncomp_size;
+
+    /* Zip internal and external file attributes. */
+    mz_uint16 m_internal_attr;
+    mz_uint32 m_external_attr;
+
+    /* Entry's local header file offset in bytes. */
+    mz_uint64 m_local_header_ofs;
+
+    /* Size of comment in bytes. */
+    mz_uint32 m_comment_size;
+
+    /* MZ_TRUE if the entry appears to be a directory. */
+    mz_bool m_is_directory;
+
+    /* MZ_TRUE if the entry uses encryption/strong encryption (which miniz_zip doesn't support) */
+    mz_bool m_is_encrypted;
+
+    /* MZ_TRUE if the file is not encrypted, a patch file, and if it uses a compression method we support. */
+    mz_bool m_is_supported;
+
+    /* Filename. If string ends in '/' it's a subdirectory entry. */
+    /* Guaranteed to be zero terminated, may be truncated to fit. */
+    char m_filename[MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE];
+
+    /* Comment field. */
+    /* Guaranteed to be zero terminated, may be truncated to fit. */
+    char m_comment[MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE];
+
+} mz_zip_archive_file_stat;
+
+typedef size_t (*mz_file_read_func)(void *pOpaque, mz_uint64 file_ofs, void *pBuf, size_t n);
+typedef size_t (*mz_file_write_func)(void *pOpaque, mz_uint64 file_ofs, const void *pBuf, size_t n);
+typedef mz_bool (*mz_file_needs_keepalive)(void *pOpaque);
+
+struct mz_zip_internal_state_tag;
+typedef struct mz_zip_internal_state_tag mz_zip_internal_state;
+
+typedef enum {
+    MZ_ZIP_MODE_INVALID = 0,
+    MZ_ZIP_MODE_READING = 1,
+    MZ_ZIP_MODE_WRITING = 2,
+    MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED = 3
+} mz_zip_mode;
+
+typedef enum {
+    MZ_ZIP_FLAG_CASE_SENSITIVE = 0x0100,
+    MZ_ZIP_FLAG_IGNORE_PATH = 0x0200,
+    MZ_ZIP_FLAG_COMPRESSED_DATA = 0x0400,
+    MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY = 0x0800,
+    MZ_ZIP_FLAG_VALIDATE_LOCATE_FILE_FLAG = 0x1000, /* if enabled, mz_zip_reader_locate_file() will be called on each file as its validated to ensure the func finds the file in the central dir (intended for testing) */
+    MZ_ZIP_FLAG_VALIDATE_HEADERS_ONLY = 0x2000,     /* validate the local headers, but don't decompress the entire file and check the crc32 */
+    MZ_ZIP_FLAG_WRITE_ZIP64 = 0x4000,               /* always use the zip64 file format, instead of the original zip file format with automatic switch to zip64. Use as flags parameter with mz_zip_writer_init*_v2 */
+    MZ_ZIP_FLAG_WRITE_ALLOW_READING = 0x8000,
+    MZ_ZIP_FLAG_ASCII_FILENAME = 0x10000
+} mz_zip_flags;
+
+typedef enum {
+    MZ_ZIP_TYPE_INVALID = 0,
+    MZ_ZIP_TYPE_USER,
+    MZ_ZIP_TYPE_MEMORY,
+    MZ_ZIP_TYPE_HEAP,
+    MZ_ZIP_TYPE_FILE,
+    MZ_ZIP_TYPE_CFILE,
+    MZ_ZIP_TOTAL_TYPES
+} mz_zip_type;
+
+/* miniz error codes. Be sure to update mz_zip_get_error_string() if you add or modify this enum. */
+typedef enum {
+    MZ_ZIP_NO_ERROR = 0,
+    MZ_ZIP_UNDEFINED_ERROR,
+    MZ_ZIP_TOO_MANY_FILES,
+    MZ_ZIP_FILE_TOO_LARGE,
+    MZ_ZIP_UNSUPPORTED_METHOD,
+    MZ_ZIP_UNSUPPORTED_ENCRYPTION,
+    MZ_ZIP_UNSUPPORTED_FEATURE,
+    MZ_ZIP_FAILED_FINDING_CENTRAL_DIR,
+    MZ_ZIP_NOT_AN_ARCHIVE,
+    MZ_ZIP_INVALID_HEADER_OR_CORRUPTED,
+    MZ_ZIP_UNSUPPORTED_MULTIDISK,
+    MZ_ZIP_DECOMPRESSION_FAILED,
+    MZ_ZIP_COMPRESSION_FAILED,
+    MZ_ZIP_UNEXPECTED_DECOMPRESSED_SIZE,
+    MZ_ZIP_CRC_CHECK_FAILED,
+    MZ_ZIP_UNSUPPORTED_CDIR_SIZE,
+    MZ_ZIP_ALLOC_FAILED,
+    MZ_ZIP_FILE_OPEN_FAILED,
+    MZ_ZIP_FILE_CREATE_FAILED,
+    MZ_ZIP_FILE_WRITE_FAILED,
+    MZ_ZIP_FILE_READ_FAILED,
+    MZ_ZIP_FILE_CLOSE_FAILED,
+    MZ_ZIP_FILE_SEEK_FAILED,
+    MZ_ZIP_FILE_STAT_FAILED,
+    MZ_ZIP_INVALID_PARAMETER,
+    MZ_ZIP_INVALID_FILENAME,
+    MZ_ZIP_BUF_TOO_SMALL,
+    MZ_ZIP_INTERNAL_ERROR,
+    MZ_ZIP_FILE_NOT_FOUND,
+    MZ_ZIP_ARCHIVE_TOO_LARGE,
+    MZ_ZIP_VALIDATION_FAILED,
+    MZ_ZIP_WRITE_CALLBACK_FAILED,
+    MZ_ZIP_TOTAL_ERRORS
+} mz_zip_error;
+
+typedef struct mz_zip_archive
+{
+    mz_uint64 m_archive_size;
+    mz_uint64 m_central_directory_file_ofs;
+
+    /* We only support up to UINT32_MAX files in zip64 mode. */
+    mz_uint32 m_total_files;
+    mz_zip_mode m_zip_mode;
+    mz_zip_type m_zip_type;
+    mz_zip_error m_last_error;
+
+    mz_uint64 m_file_offset_alignment;
+
+    mz_alloc_func m_pAlloc;
+    mz_free_func m_pFree;
+    mz_realloc_func m_pRealloc;
+    void *m_pAlloc_opaque;
+
+    mz_file_read_func m_pRead;
+    mz_file_write_func m_pWrite;
+    mz_file_needs_keepalive m_pNeeds_keepalive;
+    void *m_pIO_opaque;
+
+    mz_zip_internal_state *m_pState;
+
+} mz_zip_archive;
+
+typedef struct
+{
+    mz_zip_archive *pZip;
+    mz_uint flags;
+
+    int status;
+#ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS
+    mz_uint file_crc32;
+#endif
+    mz_uint64 read_buf_size, read_buf_ofs, read_buf_avail, comp_remaining, out_buf_ofs, cur_file_ofs;
+    mz_zip_archive_file_stat file_stat;
+    void *pRead_buf;
+    void *pWrite_buf;
+
+    size_t out_blk_remain;
+
+    tinfl_decompressor inflator;
+
+} mz_zip_reader_extract_iter_state;
+
+// this is purely for looking at struct in debugger
+typedef struct {
+    mz_uint32 local_file_signature; // 0x04034b50 read as LE number
+    mz_uint16 version;
+    mz_uint16 bit_flags;
+    mz_uint16 compression_method;
+    mz_uint16 modification_time;
+    mz_uint16 modification_date; // ugh, +2 bytes throws off alignment of remaining fields
+    mz_uint16 crc32a; // of uncompressed data
+    mz_uint16 crc32b; // of uncompressed data
+    mz_uint16 compressed_sizea; // 0xfffff for zip64
+    mz_uint16 compressed_sizeb; // 0xfffff for zip64
+    mz_uint16 uncompressed_sizea;  // 0xfffff for zip64
+    mz_uint16 uncompressed_sizeb;  // 0xfffff for zip64
+    mz_uint16 filename_length;
+    mz_uint16 extra_field_length;
+    // filename
+    // extra field
+} mz_local_file_header;
+
+/* -------- ZIP reading */
+
+/* Inits a ZIP archive reader. */
+/* These functions read and validate the archive's central directory. */
+mz_bool mz_zip_reader_init(mz_zip_archive *pZip, mz_uint64 size, mz_uint flags);
+
+mz_bool mz_zip_reader_init_mem(mz_zip_archive *pZip, const void *pMem, size_t size, mz_uint flags);
+
+#ifndef MINIZ_NO_STDIO
+/* Read a archive from a disk file. */
+/* file_start_ofs is the file offset where the archive actually begins, or 0. */
+/* actual_archive_size is the true total size of the archive, which may be smaller than the file's actual size on disk. If zero the entire file is treated as the archive. */
+mz_bool mz_zip_reader_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint32 flags);
+mz_bool mz_zip_reader_init_file_v2(mz_zip_archive *pZip, const char *pFilename, mz_uint flags, mz_uint64 file_start_ofs, mz_uint64 archive_size);
+
+/* Read an archive from an already opened FILE, beginning at the current file position. */
+/* The archive is assumed to be archive_size bytes long. If archive_size is < 0, then the entire rest of the file is assumed to contain the archive. */
+/* The FILE will NOT be closed when mz_zip_reader_end() is called. */
+mz_bool mz_zip_reader_init_cfile(mz_zip_archive *pZip, MZ_FILE *pFile, mz_uint64 archive_size, mz_uint flags);
+#endif
+
+/* Ends archive reading, freeing all allocations, and closing the input archive file if mz_zip_reader_init_file() was used. */
+mz_bool mz_zip_reader_end(mz_zip_archive *pZip);
+
+/* -------- ZIP reading or writing */
+
+/* Clears a mz_zip_archive struct to all zeros. */
+/* Important: This must be done before passing the struct to any mz_zip functions. */
+void mz_zip_zero_struct(mz_zip_archive *pZip);
+
+mz_zip_mode mz_zip_get_mode(mz_zip_archive *pZip);
+mz_zip_type mz_zip_get_type(mz_zip_archive *pZip);
+
+/* Returns the total number of files in the archive. */
+mz_uint mz_zip_reader_get_num_files(mz_zip_archive *pZip);
+
+mz_uint64 mz_zip_get_archive_size(mz_zip_archive *pZip);
+mz_uint64 mz_zip_get_archive_file_start_offset(mz_zip_archive *pZip);
+MZ_FILE *mz_zip_get_cfile(mz_zip_archive *pZip);
+
+/* Reads n bytes of raw archive data, starting at file offset file_ofs, to pBuf. */
+size_t mz_zip_read_archive_data(mz_zip_archive *pZip, mz_uint64 file_ofs, void *pBuf, size_t n);
+
+/* All mz_zip funcs set the m_last_error field in the mz_zip_archive struct. These functions retrieve/manipulate this field. */
+/* Note that the m_last_error functionality is not thread safe. */
+mz_zip_error mz_zip_set_last_error(mz_zip_archive *pZip, mz_zip_error err_num);
+mz_zip_error mz_zip_peek_last_error(mz_zip_archive *pZip);
+mz_zip_error mz_zip_clear_last_error(mz_zip_archive *pZip);
+mz_zip_error mz_zip_get_last_error(mz_zip_archive *pZip);
+const char *mz_zip_get_error_string(mz_zip_error mz_err);
+
+/* MZ_TRUE if the archive file entry is a directory entry. */
+mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive *pZip, mz_uint file_index);
+
+/* MZ_TRUE if the file is encrypted/strong encrypted. */
+mz_bool mz_zip_reader_is_file_encrypted(mz_zip_archive *pZip, mz_uint file_index);
+
+/* MZ_TRUE if the compression method is supported, and the file is not encrypted, and the file is not a compressed patch file. */
+mz_bool mz_zip_reader_is_file_supported(mz_zip_archive *pZip, mz_uint file_index);
+
+/* Retrieves the filename of an archive file entry. */
+/* Returns the number of bytes written to pFilename, or if filename_buf_size is 0 this function returns the number of bytes needed to fully store the filename. */
+mz_uint mz_zip_reader_get_filename(mz_zip_archive *pZip, mz_uint file_index, char *pFilename, mz_uint filename_buf_size);
+
+/* Attempts to locates a file in the archive's central directory. */
+/* Valid flags: MZ_ZIP_FLAG_CASE_SENSITIVE, MZ_ZIP_FLAG_IGNORE_PATH */
+/* Returns -1 if the file cannot be found. */
+int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags);
+int mz_zip_reader_locate_file_v2(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags, mz_uint32 *file_index);
+
+/* Returns detailed information about an archive file entry. */
+mz_bool mz_zip_reader_file_stat(mz_zip_archive *pZip, mz_uint file_index, mz_zip_archive_file_stat *pStat);
+
+/* MZ_TRUE if the file is in zip64 format. */
+/* A file is considered zip64 if it contained a zip64 end of central directory marker, or if it contained any zip64 extended file information fields in the central directory. */
+mz_bool mz_zip_is_zip64(mz_zip_archive *pZip);
+
+/* Returns the total central directory size in bytes. */
+/* The current max supported size is <= MZ_UINT32_MAX. */
+size_t mz_zip_get_central_dir_size(mz_zip_archive *pZip);
+
+/* Alec change - if files are sorted by filename, then this returns the remap table for each fileIndex */
+/* This was previously internal state, so use with caution.  It's an array of mz_uint32 */
+const mz_uint32* mz_zip_reader_sorted_file_indices(mz_zip_archive *pZip);
+
+/* Extracts a archive file to a memory buffer using no memory allocation. */
+/* There must be at least enough room on the stack to store the inflator's state (~34KB or so). */
+mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size);
+mz_bool mz_zip_reader_extract_file_to_mem_no_alloc(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size);
+
+/* Extracts a archive file to a memory buffer. */
+mz_bool mz_zip_reader_extract_to_mem(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags);
+mz_bool mz_zip_reader_extract_file_to_mem(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags);
+
+/* Extracts a archive file to a dynamically allocated heap buffer. */
+/* The memory will be allocated via the mz_zip_archive's alloc/realloc functions. */
+/* Returns NULL and sets the last error on failure. */
+void *mz_zip_reader_extract_to_heap(mz_zip_archive *pZip, mz_uint file_index, size_t *pSize, mz_uint flags);
+void *mz_zip_reader_extract_file_to_heap(mz_zip_archive *pZip, const char *pFilename, size_t *pSize, mz_uint flags);
+
+/* Extracts a archive file using a callback function to output the file's data. */
+mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive *pZip, mz_uint file_index, mz_file_write_func pCallback, void *pOpaque, mz_uint flags);
+mz_bool mz_zip_reader_extract_file_to_callback(mz_zip_archive *pZip, const char *pFilename, mz_file_write_func pCallback, void *pOpaque, mz_uint flags);
+
+/* Extract a file iteratively */
+mz_zip_reader_extract_iter_state* mz_zip_reader_extract_iter_new(mz_zip_archive *pZip, mz_uint file_index, mz_uint flags);
+mz_zip_reader_extract_iter_state* mz_zip_reader_extract_file_iter_new(mz_zip_archive *pZip, const char *pFilename, mz_uint flags);
+size_t mz_zip_reader_extract_iter_read(mz_zip_reader_extract_iter_state* pState, void* pvBuf, size_t buf_size);
+mz_bool mz_zip_reader_extract_iter_free(mz_zip_reader_extract_iter_state* pState);
+
+#ifndef MINIZ_NO_STDIO
+/* Extracts a archive file to a disk file and sets its last accessed and modified times. */
+/* This function only extracts files, not archive directory records. */
+mz_bool mz_zip_reader_extract_to_file(mz_zip_archive *pZip, mz_uint file_index, const char *pDst_filename, mz_uint flags);
+mz_bool mz_zip_reader_extract_file_to_file(mz_zip_archive *pZip, const char *pArchive_filename, const char *pDst_filename, mz_uint flags);
+
+/* Extracts a archive file starting at the current position in the destination FILE stream. */
+mz_bool mz_zip_reader_extract_to_cfile(mz_zip_archive *pZip, mz_uint file_index, MZ_FILE *File, mz_uint flags);
+mz_bool mz_zip_reader_extract_file_to_cfile(mz_zip_archive *pZip, const char *pArchive_filename, MZ_FILE *pFile, mz_uint flags);
+#endif
+
+#if 0
+/* TODO */
+	typedef void *mz_zip_streaming_extract_state_ptr;
+	mz_zip_streaming_extract_state_ptr mz_zip_streaming_extract_begin(mz_zip_archive *pZip, mz_uint file_index, mz_uint flags);
+	uint64_t mz_zip_streaming_extract_get_size(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState);
+	uint64_t mz_zip_streaming_extract_get_cur_ofs(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState);
+	mz_bool mz_zip_streaming_extract_seek(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState, uint64_t new_ofs);
+	size_t mz_zip_streaming_extract_read(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState, void *pBuf, size_t buf_size);
+	mz_bool mz_zip_streaming_extract_end(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState);
+#endif
+
+/* This function compares the archive's local headers, the optional local zip64 extended information block, and the optional descriptor following the compressed data vs. the data in the central directory. */
+/* It also validates that each file can be successfully uncompressed unless the MZ_ZIP_FLAG_VALIDATE_HEADERS_ONLY is specified. */
+mz_bool mz_zip_validate_file(mz_zip_archive *pZip, mz_uint file_index, mz_uint flags);
+
+/* Validates an entire archive by calling mz_zip_validate_file() on each file. */
+mz_bool mz_zip_validate_archive(mz_zip_archive *pZip, mz_uint flags);
+
+/* Misc utils/helpers, valid for ZIP reading or writing */
+mz_bool mz_zip_validate_mem_archive(const void *pMem, size_t size, mz_uint flags, mz_zip_error *pErr);
+mz_bool mz_zip_validate_file_archive(const char *pFilename, mz_uint flags, mz_zip_error *pErr);
+
+/* For memory-mapped archive, can directly access data using offset into the archive.  No reads needed. */
+size_t mz_zip_reader_get_raw_data_offset(mz_zip_archive *pZip, mz_uint file_index);
+
+/* Return the start of the raw data, NULL if error */
+const uint8_t* mz_zip_reader_get_raw_data(mz_zip_archive *pZip, mz_uint file_index);
+
+
+/* Universal end function - calls either mz_zip_reader_end() or mz_zip_writer_end(). */
+mz_bool mz_zip_end(mz_zip_archive *pZip);
+
+/* -------- ZIP writing */
+
+#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS
+
+/* Inits a ZIP archive writer. */
+/*Set pZip->m_pWrite (and pZip->m_pIO_opaque) before calling mz_zip_writer_init or mz_zip_writer_init_v2*/
+/*The output is streamable, i.e. file_ofs in mz_file_write_func always increases only by n*/
+mz_bool mz_zip_writer_init(mz_zip_archive *pZip, mz_uint64 existing_size);
+mz_bool mz_zip_writer_init_v2(mz_zip_archive *pZip, mz_uint64 existing_size, mz_uint flags);
+
+mz_bool mz_zip_writer_init_heap(mz_zip_archive *pZip, size_t size_to_reserve_at_beginning, size_t initial_allocation_size);
+mz_bool mz_zip_writer_init_heap_v2(mz_zip_archive *pZip, size_t size_to_reserve_at_beginning, size_t initial_allocation_size, mz_uint flags);
+
+#ifndef MINIZ_NO_STDIO
+mz_bool mz_zip_writer_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint64 size_to_reserve_at_beginning);
+mz_bool mz_zip_writer_init_file_v2(mz_zip_archive *pZip, const char *pFilename, mz_uint64 size_to_reserve_at_beginning, mz_uint flags);
+mz_bool mz_zip_writer_init_cfile(mz_zip_archive *pZip, MZ_FILE *pFile, mz_uint flags);
+#endif
+
+/* Converts a ZIP archive reader object into a writer object, to allow efficient in-place file appends to occur on an existing archive. */
+/* For archives opened using mz_zip_reader_init_file, pFilename must be the archive's filename so it can be reopened for writing. If the file can't be reopened, mz_zip_reader_end() will be called. */
+/* For archives opened using mz_zip_reader_init_mem, the memory block must be growable using the realloc callback (which defaults to realloc unless you've overridden it). */
+/* Finally, for archives opened using mz_zip_reader_init, the mz_zip_archive's user provided m_pWrite function cannot be NULL. */
+/* Note: In-place archive modification is not recommended unless you know what you're doing, because if execution stops or something goes wrong before */
+/* the archive is finalized the file's central directory will be hosed. */
+mz_bool mz_zip_writer_init_from_reader(mz_zip_archive *pZip, const char *pFilename);
+mz_bool mz_zip_writer_init_from_reader_v2(mz_zip_archive *pZip, const char *pFilename, mz_uint flags);
+
+/* Adds the contents of a memory buffer to an archive. These functions record the current local time into the archive. */
+/* To add a directory entry, call this method with an archive name ending in a forwardslash with an empty buffer. */
+/* level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION. */
+mz_bool mz_zip_writer_add_mem(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, mz_uint level_and_flags);
+
+/* Like mz_zip_writer_add_mem(), except you can specify a file comment field, and optionally supply the function with already compressed data. */
+/* uncomp_size/uncomp_crc32 are only used if the MZ_ZIP_FLAG_COMPRESSED_DATA flag is specified. */
+mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags,
+                                 mz_uint64 uncomp_size, mz_uint32 uncomp_crc32);
+
+mz_bool mz_zip_writer_add_mem_ex_v2(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags,
+                                    mz_uint64 uncomp_size, mz_uint32 uncomp_crc32, MZ_TIME_T *last_modified, const char *user_extra_data_local, mz_uint user_extra_data_local_len,
+                                    const char *user_extra_data_central, mz_uint user_extra_data_central_len);
+
+/* Adds the contents of a file to an archive. This function also records the disk file's modified time into the archive. */
+/* File data is supplied via a read callback function. User mz_zip_writer_add_(c)file to add a file directly.*/
+mz_bool mz_zip_writer_add_read_buf_callback(mz_zip_archive *pZip, const char *pArchive_name, mz_file_read_func read_callback, void* callback_opaque, mz_uint64 size_to_add,
+	const MZ_TIME_T *pFile_time, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, const char *user_extra_data_local, mz_uint user_extra_data_local_len,
+	const char *user_extra_data_central, mz_uint user_extra_data_central_len);
+
+#ifndef MINIZ_NO_STDIO
+/* Adds the contents of a disk file to an archive. This function also records the disk file's modified time into the archive. */
+/* level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION. */
+mz_bool mz_zip_writer_add_file(mz_zip_archive *pZip, const char *pArchive_name, const char *pSrc_filename, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags);
+
+/* Like mz_zip_writer_add_file(), except the file data is read from the specified FILE stream. */
+mz_bool mz_zip_writer_add_cfile(mz_zip_archive *pZip, const char *pArchive_name, MZ_FILE *pSrc_file, mz_uint64 size_to_add,
+                                const MZ_TIME_T *pFile_time, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, const char *user_extra_data_local, mz_uint user_extra_data_local_len,
+                                const char *user_extra_data_central, mz_uint user_extra_data_central_len);
+#endif
+
+/* Adds a file to an archive by fully cloning the data from another archive. */
+/* This function fully clones the source file's compressed data (no recompression), along with its full filename, extra data (it may add or modify the zip64 local header extra data field), and the optional descriptor following the compressed data. */
+mz_bool mz_zip_writer_add_from_zip_reader(mz_zip_archive *pZip, mz_zip_archive *pSource_zip, mz_uint src_file_index);
+
+/* Finalizes the archive by writing the central directory records followed by the end of central directory record. */
+/* After an archive is finalized, the only valid call on the mz_zip_archive struct is mz_zip_writer_end(). */
+/* An archive must be manually finalized by calling this function for it to be valid. */
+mz_bool mz_zip_writer_finalize_archive(mz_zip_archive *pZip);
+
+/* Finalizes a heap archive, returning a poiner to the heap block and its size. */
+/* The heap block will be allocated using the mz_zip_archive's alloc/realloc callbacks. */
+mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive *pZip, void **ppBuf, size_t *pSize);
+
+/* Ends archive writing, freeing all allocations, and closing the output file if mz_zip_writer_init_file() was used. */
+/* Note for the archive to be valid, it *must* have been finalized before ending (this function will not do it for you). */
+mz_bool mz_zip_writer_end(mz_zip_archive *pZip);
+
+/* -------- Misc. high-level helper functions: */
+
+/* mz_zip_add_mem_to_archive_file_in_place() efficiently (but not atomically) appends a memory blob to a ZIP archive. */
+/* Note this is NOT a fully safe operation. If it crashes or dies in some way your archive can be left in a screwed up state (without a central directory). */
+/* level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION. */
+/* TODO: Perhaps add an option to leave the existing central dir in place in case the add dies? We could then truncate the file (so the old central dir would be at the end) if something goes wrong. */
+mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags);
+mz_bool mz_zip_add_mem_to_archive_file_in_place_v2(const char *pZip_filename, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, mz_zip_error *pErr);
+
+/* Reads a single file from an archive into a heap block. */
+/* If pComment is not NULL, only the file with the specified comment will be extracted. */
+/* Returns NULL on failure. */
+void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name, size_t *pSize, mz_uint flags);
+void *mz_zip_extract_archive_file_to_heap_v2(const char *pZip_filename, const char *pArchive_name, const char *pComment, size_t *pSize, mz_uint flags, mz_zip_error *pErr);
+
+#endif /* #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* MINIZ_NO_ARCHIVE_APIS */
diff --git a/kram-profile/Source/track_event_parser.cpp b/kram-profile/Source/track_event_parser.cpp
new file mode 100644
index 00000000..d1291c86
--- /dev/null
+++ b/kram-profile/Source/track_event_parser.cpp
@@ -0,0 +1,1804 @@
+#if 0 // just for reference
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/trace_processor/importers/proto/track_event_parser.h"
+
+#include <iostream>
+#include <optional>
+#include <string>
+
+#include "perfetto/base/logging.h"
+#include "perfetto/ext/base/base64.h"
+#include "perfetto/ext/base/string_writer.h"
+#include "perfetto/trace_processor/status.h"
+#include "src/trace_processor/importers/common/args_tracker.h"
+#include "src/trace_processor/importers/common/args_translation_table.h"
+#include "src/trace_processor/importers/common/event_tracker.h"
+#include "src/trace_processor/importers/common/flow_tracker.h"
+#include "src/trace_processor/importers/common/process_tracker.h"
+#include "src/trace_processor/importers/common/track_tracker.h"
+#include "src/trace_processor/importers/json/json_utils.h"
+#include "src/trace_processor/importers/proto/packet_analyzer.h"
+#include "src/trace_processor/importers/proto/packet_sequence_state.h"
+#include "src/trace_processor/importers/proto/profile_packet_utils.h"
+#include "src/trace_processor/importers/proto/stack_profile_sequence_state.h"
+#include "src/trace_processor/importers/proto/track_event_tracker.h"
+#include "src/trace_processor/util/debug_annotation_parser.h"
+#include "src/trace_processor/util/proto_to_args_parser.h"
+#include "src/trace_processor/util/status_macros.h"
+
+#include "protos/perfetto/common/android_log_constants.pbzero.h"
+#include "protos/perfetto/trace/extension_descriptor.pbzero.h"
+#include "protos/perfetto/trace/interned_data/interned_data.pbzero.h"
+#include "protos/perfetto/trace/track_event/chrome_active_processes.pbzero.h"
+#include "protos/perfetto/trace/track_event/chrome_compositor_scheduler_state.pbzero.h"
+#include "protos/perfetto/trace/track_event/chrome_histogram_sample.pbzero.h"
+#include "protos/perfetto/trace/track_event/chrome_legacy_ipc.pbzero.h"
+#include "protos/perfetto/trace/track_event/chrome_process_descriptor.pbzero.h"
+#include "protos/perfetto/trace/track_event/chrome_thread_descriptor.pbzero.h"
+#include "protos/perfetto/trace/track_event/counter_descriptor.pbzero.h"
+#include "protos/perfetto/trace/track_event/debug_annotation.pbzero.h"
+#include "protos/perfetto/trace/track_event/log_message.pbzero.h"
+#include "protos/perfetto/trace/track_event/process_descriptor.pbzero.h"
+#include "protos/perfetto/trace/track_event/source_location.pbzero.h"
+#include "protos/perfetto/trace/track_event/task_execution.pbzero.h"
+#include "protos/perfetto/trace/track_event/thread_descriptor.pbzero.h"
+#include "protos/perfetto/trace/track_event/track_descriptor.pbzero.h"
+#include "protos/perfetto/trace/track_event/track_event.pbzero.h"
+
+namespace perfetto {
+namespace trace_processor {
+
+namespace {
+using BoundInserter = ArgsTracker::BoundInserter;
+using protos::pbzero::TrackEvent;
+using LegacyEvent = TrackEvent::LegacyEvent;
+using protozero::ConstBytes;
+
+// Slices which have been opened but haven't been closed yet will be marked
+// with these placeholder values.
+constexpr int64_t kPendingThreadDuration = -1;
+constexpr int64_t kPendingThreadInstructionDelta = -1;
+
+class TrackEventArgsParser : public util::ProtoToArgsParser::Delegate {
+ public:
+  TrackEventArgsParser(int64_t packet_timestamp,
+                       BoundInserter& inserter,
+                       TraceStorage& storage,
+                       PacketSequenceStateGeneration& sequence_state)
+      : packet_timestamp_(packet_timestamp),
+        inserter_(inserter),
+        storage_(storage),
+        sequence_state_(sequence_state) {}
+
+  ~TrackEventArgsParser() override;
+
+  using Key = util::ProtoToArgsParser::Key;
+
+  void AddInteger(const Key& key, int64_t value) final {
+    inserter_.AddArg(storage_.InternString(base::StringView(key.flat_key)),
+                     storage_.InternString(base::StringView(key.key)),
+                     Variadic::Integer(value));
+  }
+  void AddUnsignedInteger(const Key& key, uint64_t value) final {
+    inserter_.AddArg(storage_.InternString(base::StringView(key.flat_key)),
+                     storage_.InternString(base::StringView(key.key)),
+                     Variadic::UnsignedInteger(value));
+  }
+  void AddString(const Key& key, const protozero::ConstChars& value) final {
+    inserter_.AddArg(storage_.InternString(base::StringView(key.flat_key)),
+                     storage_.InternString(base::StringView(key.key)),
+                     Variadic::String(storage_.InternString(value)));
+  }
+  void AddString(const Key& key, const std::string& value) final {
+    inserter_.AddArg(
+        storage_.InternString(base::StringView(key.flat_key)),
+        storage_.InternString(base::StringView(key.key)),
+        Variadic::String(storage_.InternString(base::StringView(value))));
+  }
+  void AddDouble(const Key& key, double value) final {
+    inserter_.AddArg(storage_.InternString(base::StringView(key.flat_key)),
+                     storage_.InternString(base::StringView(key.key)),
+                     Variadic::Real(value));
+  }
+  void AddPointer(const Key& key, const void* value) final {
+    inserter_.AddArg(storage_.InternString(base::StringView(key.flat_key)),
+                     storage_.InternString(base::StringView(key.key)),
+                     Variadic::Pointer(reinterpret_cast<uintptr_t>(value)));
+  }
+  void AddBoolean(const Key& key, bool value) final {
+    inserter_.AddArg(storage_.InternString(base::StringView(key.flat_key)),
+                     storage_.InternString(base::StringView(key.key)),
+                     Variadic::Boolean(value));
+  }
+  void AddBytes(const Key& key, const protozero::ConstBytes& value) final {
+    std::string b64_data = base::Base64Encode(value.data, value.size);
+    AddString(key, b64_data);
+  }
+  bool AddJson(const Key& key, const protozero::ConstChars& value) final {
+    auto json_value = json::ParseJsonString(value);
+    if (!json_value)
+      return false;
+    return json::AddJsonValueToArgs(*json_value, base::StringView(key.flat_key),
+                                    base::StringView(key.key), &storage_,
+                                    &inserter_);
+  }
+  void AddNull(const Key& key) final {
+    inserter_.AddArg(storage_.InternString(base::StringView(key.flat_key)),
+                     storage_.InternString(base::StringView(key.key)),
+                     Variadic::Null());
+  }
+
+  size_t GetArrayEntryIndex(const std::string& array_key) final {
+    return inserter_.GetNextArrayEntryIndex(
+        storage_.InternString(base::StringView(array_key)));
+  }
+
+  size_t IncrementArrayEntryIndex(const std::string& array_key) final {
+    return inserter_.IncrementArrayEntryIndex(
+        storage_.InternString(base::StringView(array_key)));
+  }
+
+  InternedMessageView* GetInternedMessageView(uint32_t field_id,
+                                              uint64_t iid) final {
+    return sequence_state_.GetInternedMessageView(field_id, iid);
+  }
+
+  int64_t packet_timestamp() final { return packet_timestamp_; }
+
+  PacketSequenceStateGeneration* seq_state() final { return &sequence_state_; }
+
+ private:
+  int64_t packet_timestamp_;
+  BoundInserter& inserter_;
+  TraceStorage& storage_;
+  PacketSequenceStateGeneration& sequence_state_;
+};
+
+TrackEventArgsParser::~TrackEventArgsParser() = default;
+
+// Paths on Windows use backslash rather than slash as a separator.
+// Normalise the paths by replacing backslashes with slashes to make it
+// easier to write cross-platform scripts.
+std::string NormalizePathSeparators(const protozero::ConstChars& path) {
+  std::string result(path.data, path.size);
+  for (char& c : result) {
+    if (c == '\\')
+      c = '/';
+  }
+  return result;
+}
+
+std::optional<base::Status> MaybeParseUnsymbolizedSourceLocation(
+    std::string prefix,
+    const protozero::Field& field,
+    util::ProtoToArgsParser::Delegate& delegate) {
+  auto* decoder = delegate.GetInternedMessage(
+      protos::pbzero::InternedData::kUnsymbolizedSourceLocations,
+      field.as_uint64());
+  if (!decoder) {
+    // Lookup failed fall back on default behaviour which will just put
+    // the iid into the args table.
+    return std::nullopt;
+  }
+  // Interned mapping_id loses it's meaning when the sequence ends. So we need
+  // to get an id from stack_profile_mapping table.
+  auto mapping_id = delegate.seq_state()
+                        ->GetOrCreate<StackProfileSequenceState>()
+                        ->FindOrInsertMapping(decoder->mapping_id());
+  if (!mapping_id) {
+    return std::nullopt;
+  }
+  delegate.AddUnsignedInteger(
+      util::ProtoToArgsParser::Key(prefix + ".mapping_id"), mapping_id->value);
+  delegate.AddUnsignedInteger(util::ProtoToArgsParser::Key(prefix + ".rel_pc"),
+                              decoder->rel_pc());
+  return base::OkStatus();
+}
+
+std::optional<base::Status> MaybeParseSourceLocation(
+    std::string prefix,
+    const protozero::Field& field,
+    util::ProtoToArgsParser::Delegate& delegate) {
+  auto* decoder = delegate.GetInternedMessage(
+      protos::pbzero::InternedData::kSourceLocations, field.as_uint64());
+  if (!decoder) {
+    // Lookup failed fall back on default behaviour which will just put
+    // the source_location_iid into the args table.
+    return std::nullopt;
+  }
+
+  delegate.AddString(util::ProtoToArgsParser::Key(prefix + ".file_name"),
+                     NormalizePathSeparators(decoder->file_name()));
+  delegate.AddString(util::ProtoToArgsParser::Key(prefix + ".function_name"),
+                     decoder->function_name());
+  if (decoder->has_line_number()) {
+    delegate.AddInteger(util::ProtoToArgsParser::Key(prefix + ".line_number"),
+                        decoder->line_number());
+  }
+
+  return base::OkStatus();
+}
+
+protos::pbzero::AndroidLogPriority ToAndroidLogPriority(
+    protos::pbzero::LogMessage::Priority prio) {
+  switch (prio) {
+    case protos::pbzero::LogMessage::Priority::PRIO_UNSPECIFIED:
+      return protos::pbzero::AndroidLogPriority::PRIO_UNSPECIFIED;
+    case protos::pbzero::LogMessage::Priority::PRIO_UNUSED:
+      return protos::pbzero::AndroidLogPriority::PRIO_UNUSED;
+    case protos::pbzero::LogMessage::Priority::PRIO_VERBOSE:
+      return protos::pbzero::AndroidLogPriority::PRIO_VERBOSE;
+    case protos::pbzero::LogMessage::Priority::PRIO_DEBUG:
+      return protos::pbzero::AndroidLogPriority::PRIO_DEBUG;
+    case protos::pbzero::LogMessage::Priority::PRIO_INFO:
+      return protos::pbzero::AndroidLogPriority::PRIO_INFO;
+    case protos::pbzero::LogMessage::Priority::PRIO_WARN:
+      return protos::pbzero::AndroidLogPriority::PRIO_WARN;
+    case protos::pbzero::LogMessage::Priority::PRIO_ERROR:
+      return protos::pbzero::AndroidLogPriority::PRIO_ERROR;
+    case protos::pbzero::LogMessage::Priority::PRIO_FATAL:
+      return protos::pbzero::AndroidLogPriority::PRIO_FATAL;
+  }
+  return protos::pbzero::AndroidLogPriority::PRIO_UNSPECIFIED;
+}
+
+}  // namespace
+
+class TrackEventParser::EventImporter {
+ public:
+  EventImporter(TrackEventParser* parser,
+                int64_t ts,
+                const TrackEventData* event_data,
+                ConstBytes blob,
+                uint32_t packet_sequence_id)
+      : context_(parser->context_),
+        track_event_tracker_(parser->track_event_tracker_),
+        storage_(context_->storage.get()),
+        parser_(parser),
+        args_translation_table_(context_->args_translation_table.get()),
+        ts_(ts),
+        event_data_(event_data),
+        sequence_state_(event_data->trace_packet_data.sequence_state.get()),
+        blob_(std::move(blob)),
+        event_(blob_),
+        legacy_event_(event_.legacy_event()),
+        defaults_(event_data->trace_packet_data.sequence_state
+                      ->GetTrackEventDefaults()),
+        thread_timestamp_(event_data->thread_timestamp),
+        thread_instruction_count_(event_data->thread_instruction_count),
+        packet_sequence_id_(packet_sequence_id) {}
+
+  util::Status Import() {
+    // TODO(eseckler): This legacy event field will eventually be replaced by
+    // fields in TrackEvent itself.
+    if (PERFETTO_UNLIKELY(!event_.type() && !legacy_event_.has_phase()))
+      return util::ErrStatus("TrackEvent without type or phase");
+
+    category_id_ = ParseTrackEventCategory();
+    name_id_ = ParseTrackEventName();
+
+    if (context_->content_analyzer) {
+      PacketAnalyzer::SampleAnnotation annotation;
+      annotation.push_back({parser_->event_category_key_id_, category_id_});
+      annotation.push_back({parser_->event_name_key_id_, name_id_});
+      PacketAnalyzer::Get(context_)->ProcessPacket(
+          event_data_->trace_packet_data.packet, annotation);
+    }
+
+    RETURN_IF_ERROR(ParseTrackAssociation());
+
+    // Counter-type events don't support arguments (those are on the
+    // CounterDescriptor instead). All they have is a |{double_,}counter_value|.
+    if (event_.type() == TrackEvent::TYPE_COUNTER) {
+      ParseCounterEvent();
+      return util::OkStatus();
+    }
+
+    // If we have legacy thread time / instruction count fields, also parse them
+    // into the counters tables.
+    ParseLegacyThreadTimeAndInstructionsAsCounters();
+
+    // Parse extra counter values before parsing the actual event. This way, we
+    // can update the slice's thread time / instruction count fields based on
+    // these counter values and also parse them as slice attributes / arguments.
+    ParseExtraCounterValues();
+
+    // TODO(eseckler): Replace phase with type and remove handling of
+    // legacy_event_.phase() once it is no longer used by producers.
+    char phase = static_cast<char>(ParsePhaseOrType());
+
+    switch (phase) {
+      case 'B':  // TRACE_EVENT_PHASE_BEGIN.
+        return ParseThreadBeginEvent();
+      case 'E':  // TRACE_EVENT_PHASE_END.
+        return ParseThreadEndEvent();
+      case 'X':  // TRACE_EVENT_PHASE_COMPLETE.
+        return ParseThreadCompleteEvent();
+      case 's':  // TRACE_EVENT_PHASE_FLOW_BEGIN.
+      case 't':  // TRACE_EVENT_PHASE_FLOW_STEP.
+      case 'f':  // TRACE_EVENT_PHASE_FLOW_END.
+        return ParseFlowEventV1(phase);
+      case 'i':
+      case 'I':  // TRACE_EVENT_PHASE_INSTANT.
+      case 'R':  // TRACE_EVENT_PHASE_MARK.
+        return ParseThreadInstantEvent(phase);
+      case 'b':  // TRACE_EVENT_PHASE_NESTABLE_ASYNC_BEGIN
+      case 'S':
+        return ParseAsyncBeginEvent(phase);
+      case 'e':  // TRACE_EVENT_PHASE_NESTABLE_ASYNC_END
+      case 'F':
+        return ParseAsyncEndEvent();
+      case 'n':  // TRACE_EVENT_PHASE_NESTABLE_ASYNC_INSTANT
+        return ParseAsyncInstantEvent();
+      case 'T':
+      case 'p':
+        return ParseAsyncStepEvent(phase);
+      case 'M':  // TRACE_EVENT_PHASE_METADATA (process and thread names).
+        return ParseMetadataEvent();
+      default:
+        // Other events are proxied via the raw table for JSON export.
+        return ParseLegacyEventAsRawEvent();
+    }
+  }
+
+ private:
+  StringId ParseTrackEventCategory() {
+    StringId category_id = kNullStringId;
+
+    std::vector<uint64_t> category_iids;
+    for (auto it = event_.category_iids(); it; ++it) {
+      category_iids.push_back(*it);
+    }
+    std::vector<protozero::ConstChars> category_strings;
+    for (auto it = event_.categories(); it; ++it) {
+      category_strings.push_back(*it);
+    }
+
+    // If there's a single category, we can avoid building a concatenated
+    // string.
+    if (PERFETTO_LIKELY(category_iids.size() == 1 &&
+                        category_strings.empty())) {
+      auto* decoder = sequence_state_->LookupInternedMessage<
+          protos::pbzero::InternedData::kEventCategoriesFieldNumber,
+          protos::pbzero::EventCategory>(category_iids[0]);
+      if (decoder) {
+        category_id = storage_->InternString(decoder->name());
+      } else {
+        char buffer[32];
+        base::StringWriter writer(buffer, sizeof(buffer));
+        writer.AppendLiteral("unknown(");
+        writer.AppendUnsignedInt(category_iids[0]);
+        writer.AppendChar(')');
+        category_id = storage_->InternString(writer.GetStringView());
+      }
+    } else if (category_iids.empty() && category_strings.size() == 1) {
+      category_id = storage_->InternString(category_strings[0]);
+    } else if (category_iids.size() + category_strings.size() > 1) {
+      // We concatenate the category strings together since we currently only
+      // support a single "cat" column.
+      // TODO(eseckler): Support multi-category events in the table schema.
+      std::string categories;
+      for (uint64_t iid : category_iids) {
+        auto* decoder = sequence_state_->LookupInternedMessage<
+            protos::pbzero::InternedData::kEventCategoriesFieldNumber,
+            protos::pbzero::EventCategory>(iid);
+        if (!decoder)
+          continue;
+        base::StringView name = decoder->name();
+        if (!categories.empty())
+          categories.append(",");
+        categories.append(name.data(), name.size());
+      }
+      for (const protozero::ConstChars& cat : category_strings) {
+        if (!categories.empty())
+          categories.append(",");
+        categories.append(cat.data, cat.size);
+      }
+      if (!categories.empty())
+        category_id = storage_->InternString(base::StringView(categories));
+    }
+
+    return category_id;
+  }
+
+  StringId ParseTrackEventName() {
+    uint64_t name_iid = event_.name_iid();
+    if (!name_iid)
+      name_iid = legacy_event_.name_iid();
+
+    if (PERFETTO_LIKELY(name_iid)) {
+      auto* decoder = sequence_state_->LookupInternedMessage<
+          protos::pbzero::InternedData::kEventNamesFieldNumber,
+          protos::pbzero::EventName>(name_iid);
+      if (decoder)
+        return storage_->InternString(decoder->name());
+    } else if (event_.has_name()) {
+      return storage_->InternString(event_.name());
+    }
+
+    return kNullStringId;
+  }
+
+  util::Status ParseTrackAssociation() {
+    TrackTracker* track_tracker = context_->track_tracker.get();
+    ProcessTracker* procs = context_->process_tracker.get();
+
+    // Consider track_uuid from the packet and TrackEventDefaults, fall back to
+    // the default descriptor track (uuid 0).
+    track_uuid_ = event_.has_track_uuid()
+                      ? event_.track_uuid()
+                      : (defaults_ && defaults_->has_track_uuid()
+                             ? defaults_->track_uuid()
+                             : 0u);
+
+    // Determine track from track_uuid specified in either TrackEvent or
+    // TrackEventDefaults. If a non-default track is not set, we either:
+    //   a) fall back to the track specified by the sequence's (or event's) pid
+    //      + tid (only in case of legacy tracks/events, i.e. events that don't
+    //      specify an explicit track uuid or use legacy event phases instead of
+    //      TrackEvent types), or
+    //   b) a default track.
+    if (track_uuid_) {
+      std::optional<TrackId> opt_track_id =
+          track_event_tracker_->GetDescriptorTrack(track_uuid_, name_id_,
+                                                   packet_sequence_id_);
+      if (!opt_track_id) {
+        track_event_tracker_->ReserveDescriptorChildTrack(track_uuid_,
+                                                          /*parent_uuid=*/0,
+                                                          name_id_);
+        opt_track_id = track_event_tracker_->GetDescriptorTrack(
+            track_uuid_, name_id_, packet_sequence_id_);
+      }
+      track_id_ = *opt_track_id;
+
+      auto thread_track_row =
+          storage_->thread_track_table().id().IndexOf(track_id_);
+      if (thread_track_row) {
+        utid_ = storage_->thread_track_table().utid()[*thread_track_row];
+        upid_ = storage_->thread_table().upid()[*utid_];
+      } else {
+        auto process_track_row =
+            storage_->process_track_table().id().IndexOf(track_id_);
+        if (process_track_row) {
+          upid_ = storage_->process_track_table().upid()[*process_track_row];
+          if (sequence_state_->state()->pid_and_tid_valid()) {
+            uint32_t pid =
+                static_cast<uint32_t>(sequence_state_->state()->pid());
+            uint32_t tid =
+                static_cast<uint32_t>(sequence_state_->state()->tid());
+            UniqueTid utid_candidate = procs->UpdateThread(tid, pid);
+            if (storage_->thread_table().upid()[utid_candidate] == upid_)
+              legacy_passthrough_utid_ = utid_candidate;
+          }
+        } else {
+          auto* tracks = context_->storage->mutable_track_table();
+          auto track_index = tracks->id().IndexOf(track_id_);
+          if (track_index) {
+            const StringPool::Id& id = tracks->name()[*track_index];
+            if (id.is_null())
+              tracks->mutable_name()->Set(*track_index, name_id_);
+          }
+
+          if (sequence_state_->state()->pid_and_tid_valid()) {
+            uint32_t pid =
+                static_cast<uint32_t>(sequence_state_->state()->pid());
+            uint32_t tid =
+                static_cast<uint32_t>(sequence_state_->state()->tid());
+            legacy_passthrough_utid_ = procs->UpdateThread(tid, pid);
+          }
+        }
+      }
+    } else {
+      bool pid_tid_state_valid = sequence_state_->state()->pid_and_tid_valid();
+
+      // We have a 0-value |track_uuid|. Nevertheless, we should only fall back
+      // if we have either no |track_uuid| specified at all or |track_uuid| was
+      // set explicitly to 0 (e.g. to override a default track_uuid) and we have
+      // a legacy phase. Events with real phases should use |track_uuid| to
+      // specify a different track (or use the pid/tid_override fields).
+      bool fallback_to_legacy_pid_tid_tracks =
+          (!event_.has_track_uuid() || !event_.has_type()) &&
+          pid_tid_state_valid;
+
+      // Always allow fallback if we have a process override.
+      fallback_to_legacy_pid_tid_tracks |= legacy_event_.has_pid_override();
+
+      // A thread override requires a valid pid.
+      fallback_to_legacy_pid_tid_tracks |=
+          legacy_event_.has_tid_override() && pid_tid_state_valid;
+
+      if (fallback_to_legacy_pid_tid_tracks) {
+        uint32_t pid = static_cast<uint32_t>(sequence_state_->state()->pid());
+        uint32_t tid = static_cast<uint32_t>(sequence_state_->state()->tid());
+        if (legacy_event_.has_pid_override()) {
+          pid = static_cast<uint32_t>(legacy_event_.pid_override());
+          tid = static_cast<uint32_t>(-1);
+        }
+        if (legacy_event_.has_tid_override())
+          tid = static_cast<uint32_t>(legacy_event_.tid_override());
+
+        utid_ = procs->UpdateThread(tid, pid);
+        upid_ = storage_->thread_table().upid()[*utid_];
+        track_id_ = track_tracker->InternThreadTrack(*utid_);
+      } else {
+        track_id_ = track_event_tracker_->GetOrCreateDefaultDescriptorTrack();
+      }
+    }
+
+    if (!legacy_event_.has_phase())
+      return util::OkStatus();
+
+    // Legacy phases may imply a different track than the one specified by
+    // the fallback (or default track uuid) above.
+    switch (legacy_event_.phase()) {
+      case 'b':
+      case 'e':
+      case 'n':
+      case 'S':
+      case 'T':
+      case 'p':
+      case 'F': {
+        // Intern tracks for legacy async events based on legacy event ids.
+        int64_t source_id = 0;
+        bool source_id_is_process_scoped = false;
+        if (legacy_event_.has_unscoped_id()) {
+          source_id = static_cast<int64_t>(legacy_event_.unscoped_id());
+        } else if (legacy_event_.has_global_id()) {
+          source_id = static_cast<int64_t>(legacy_event_.global_id());
+        } else if (legacy_event_.has_local_id()) {
+          if (!upid_) {
+            return util::ErrStatus(
+                "TrackEvent with local_id without process association");
+          }
+
+          source_id = static_cast<int64_t>(legacy_event_.local_id());
+          source_id_is_process_scoped = true;
+        } else {
+          return util::ErrStatus("Async LegacyEvent without ID");
+        }
+
+        // Catapult treats nestable async events of different categories with
+        // the same ID as separate tracks. We replicate the same behavior
+        // here. For legacy async events, it uses different tracks based on
+        // event names.
+        const bool legacy_async =
+            legacy_event_.phase() == 'S' || legacy_event_.phase() == 'T' ||
+            legacy_event_.phase() == 'p' || legacy_event_.phase() == 'F';
+        StringId id_scope = legacy_async ? name_id_ : category_id_;
+        if (legacy_event_.has_id_scope()) {
+          std::string concat = storage_->GetString(category_id_).ToStdString() +
+                               ":" + legacy_event_.id_scope().ToStdString();
+          id_scope = storage_->InternString(base::StringView(concat));
+        }
+
+        track_id_ = context_->track_tracker->InternLegacyChromeAsyncTrack(
+            name_id_, upid_.value_or(0), source_id, source_id_is_process_scoped,
+            id_scope);
+        legacy_passthrough_utid_ = utid_;
+        break;
+      }
+      case 'i':
+      case 'I': {
+        // Intern tracks for global or process-scoped legacy instant events.
+        switch (legacy_event_.instant_event_scope()) {
+          case LegacyEvent::SCOPE_UNSPECIFIED:
+          case LegacyEvent::SCOPE_THREAD:
+            // Thread-scoped legacy instant events already have the right
+            // track based on the tid/pid of the sequence.
+            if (!utid_) {
+              return util::ErrStatus(
+                  "Thread-scoped instant event without thread association");
+            }
+            break;
+          case LegacyEvent::SCOPE_GLOBAL:
+            track_id_ = context_->track_tracker
+                            ->GetOrCreateLegacyChromeGlobalInstantTrack();
+            legacy_passthrough_utid_ = utid_;
+            utid_ = std::nullopt;
+            break;
+          case LegacyEvent::SCOPE_PROCESS:
+            if (!upid_) {
+              return util::ErrStatus(
+                  "Process-scoped instant event without process association");
+            }
+
+            track_id_ =
+                context_->track_tracker->InternLegacyChromeProcessInstantTrack(
+                    *upid_);
+            legacy_passthrough_utid_ = utid_;
+            utid_ = std::nullopt;
+            break;
+        }
+        break;
+      }
+      default:
+        break;
+    }
+
+    return util::OkStatus();
+  }
+
+  int32_t ParsePhaseOrType() {
+    if (legacy_event_.has_phase())
+      return legacy_event_.phase();
+
+    switch (event_.type()) {
+      case TrackEvent::TYPE_SLICE_BEGIN:
+        return utid_ ? 'B' : 'b';
+      case TrackEvent::TYPE_SLICE_END:
+        return utid_ ? 'E' : 'e';
+      case TrackEvent::TYPE_INSTANT:
+        return utid_ ? 'i' : 'n';
+      default:
+        PERFETTO_ELOG("unexpected event type %d", event_.type());
+        return 0;
+    }
+  }
+
+  void ParseCounterEvent() {
+    // Tokenizer ensures that TYPE_COUNTER events are associated with counter
+    // tracks and have values.
+    PERFETTO_DCHECK(storage_->counter_track_table().id().IndexOf(track_id_));
+    PERFETTO_DCHECK(event_.has_counter_value() ||
+                    event_.has_double_counter_value());
+
+    context_->event_tracker->PushCounter(
+        ts_, static_cast<double>(event_data_->counter_value), track_id_);
+  }
+
+  void ParseLegacyThreadTimeAndInstructionsAsCounters() {
+    if (!utid_)
+      return;
+    // When these fields are set, we don't expect TrackDescriptor-based counters
+    // for thread time or instruction count for this thread in the trace, so we
+    // intern separate counter tracks based on name + utid. Note that we cannot
+    // import the counter values from the end of a complete event, because the
+    // EventTracker expects counters to be pushed in order of their timestamps.
+    // One more reason to switch to split begin/end events.
+    if (thread_timestamp_) {
+      TrackId track_id = context_->track_tracker->InternThreadCounterTrack(
+          parser_->counter_name_thread_time_id_, *utid_);
+      context_->event_tracker->PushCounter(
+          ts_, static_cast<double>(*thread_timestamp_), track_id);
+    }
+    if (thread_instruction_count_) {
+      TrackId track_id = context_->track_tracker->InternThreadCounterTrack(
+          parser_->counter_name_thread_instruction_count_id_, *utid_);
+      context_->event_tracker->PushCounter(
+          ts_, static_cast<double>(*thread_instruction_count_), track_id);
+    }
+  }
+
+  void ParseExtraCounterValues() {
+    if (!event_.has_extra_counter_values() &&
+        !event_.has_extra_double_counter_values()) {
+      return;
+    }
+
+    // Add integer extra counter values.
+    size_t index = 0;
+    protozero::RepeatedFieldIterator<uint64_t> track_uuid_it;
+    if (event_.has_extra_counter_track_uuids()) {
+      track_uuid_it = event_.extra_counter_track_uuids();
+    } else if (defaults_ && defaults_->has_extra_counter_track_uuids()) {
+      track_uuid_it = defaults_->extra_counter_track_uuids();
+    }
+    for (auto value_it = event_.extra_counter_values(); value_it;
+         ++value_it, ++track_uuid_it, ++index) {
+      AddExtraCounterValue(track_uuid_it, index);
+    }
+
+    // Add double extra counter values.
+    track_uuid_it = protozero::RepeatedFieldIterator<uint64_t>();
+    if (event_.has_extra_double_counter_track_uuids()) {
+      track_uuid_it = event_.extra_double_counter_track_uuids();
+    } else if (defaults_ && defaults_->has_extra_double_counter_track_uuids()) {
+      track_uuid_it = defaults_->extra_double_counter_track_uuids();
+    }
+    for (auto value_it = event_.extra_double_counter_values(); value_it;
+         ++value_it, ++track_uuid_it, ++index) {
+      AddExtraCounterValue(track_uuid_it, index);
+    }
+  }
+
+  void AddExtraCounterValue(
+      protozero::RepeatedFieldIterator<uint64_t> track_uuid_it,
+      size_t index) {
+    // Tokenizer ensures that there aren't more values than uuids, that we
+    // don't have more values than kMaxNumExtraCounters and that the
+    // track_uuids are for valid counter tracks.
+    PERFETTO_DCHECK(track_uuid_it);
+    PERFETTO_DCHECK(index < TrackEventData::kMaxNumExtraCounters);
+
+    std::optional<TrackId> track_id = track_event_tracker_->GetDescriptorTrack(
+        *track_uuid_it, kNullStringId, packet_sequence_id_);
+    std::optional<uint32_t> counter_row =
+        storage_->counter_track_table().id().IndexOf(*track_id);
+
+    double value = event_data_->extra_counter_values[index];
+    context_->event_tracker->PushCounter(ts_, value, *track_id);
+
+    // Also import thread_time and thread_instruction_count counters into
+    // slice columns to simplify JSON export.
+    StringId counter_name =
+        storage_->counter_track_table().name()[*counter_row];
+    if (counter_name == parser_->counter_name_thread_time_id_) {
+      thread_timestamp_ = static_cast<int64_t>(value);
+    } else if (counter_name ==
+               parser_->counter_name_thread_instruction_count_id_) {
+      thread_instruction_count_ = static_cast<int64_t>(value);
+    }
+  }
+
+  util::Status ParseThreadBeginEvent() {
+    if (!utid_) {
+      return util::ErrStatus(
+          "TrackEvent with phase B without thread association");
+    }
+
+    auto* thread_slices = storage_->mutable_slice_table();
+    auto opt_slice_id = context_->slice_tracker->BeginTyped(
+        thread_slices, MakeThreadSliceRow(),
+        [this](BoundInserter* inserter) { ParseTrackEventArgs(inserter); });
+
+    if (opt_slice_id.has_value()) {
+      MaybeParseFlowEvents(opt_slice_id.value());
+    }
+    return util::OkStatus();
+  }
+
+  util::Status ParseThreadEndEvent() {
+    if (!utid_) {
+      return util::ErrStatus(
+          "TrackEvent with phase E without thread association");
+    }
+    auto opt_slice_id = context_->slice_tracker->End(
+        ts_, track_id_, category_id_, name_id_,
+        [this](BoundInserter* inserter) { ParseTrackEventArgs(inserter); });
+    if (!opt_slice_id)
+      return base::OkStatus();
+
+    MaybeParseFlowEvents(*opt_slice_id);
+    auto* thread_slices = storage_->mutable_slice_table();
+    auto opt_thread_slice_ref = thread_slices->FindById(*opt_slice_id);
+    if (!opt_thread_slice_ref) {
+      // This means that the end event did not match a corresponding track event
+      // begin packet so we likely closed the wrong slice. There's not much we
+      // can do about this beyond flag it as a stat.
+      context_->storage->IncrementStats(stats::track_event_thread_invalid_end);
+      return base::OkStatus();
+    }
+
+    tables::SliceTable::RowReference slice_ref = *opt_thread_slice_ref;
+    std::optional<int64_t> tts = slice_ref.thread_ts();
+    if (tts) {
+      PERFETTO_DCHECK(thread_timestamp_);
+      slice_ref.set_thread_dur(*thread_timestamp_ - *tts);
+    }
+    std::optional<int64_t> tic = slice_ref.thread_instruction_count();
+    if (tic) {
+      PERFETTO_DCHECK(event_data_->thread_instruction_count);
+      slice_ref.set_thread_instruction_delta(
+          *event_data_->thread_instruction_count - *tic);
+    }
+    return util::OkStatus();
+  }
+
+  util::Status ParseThreadCompleteEvent() {
+    if (!utid_) {
+      return util::ErrStatus(
+          "TrackEvent with phase X without thread association");
+    }
+
+    auto duration_ns = legacy_event_.duration_us() * 1000;
+    if (duration_ns < 0)
+      return util::ErrStatus("TrackEvent with phase X with negative duration");
+
+    auto* thread_slices = storage_->mutable_slice_table();
+    tables::SliceTable::Row row = MakeThreadSliceRow();
+    row.dur = duration_ns;
+    if (legacy_event_.has_thread_duration_us()) {
+      row.thread_dur = legacy_event_.thread_duration_us() * 1000;
+    }
+    if (legacy_event_.has_thread_instruction_delta()) {
+      row.thread_instruction_delta = legacy_event_.thread_instruction_delta();
+    }
+    auto opt_slice_id = context_->slice_tracker->ScopedTyped(
+        thread_slices, std::move(row),
+        [this](BoundInserter* inserter) { ParseTrackEventArgs(inserter); });
+
+    if (opt_slice_id.has_value()) {
+      MaybeParseFlowEvents(opt_slice_id.value());
+    }
+    return util::OkStatus();
+  }
+
+  std::optional<uint64_t> GetLegacyEventId() {
+    if (legacy_event_.has_unscoped_id())
+      return legacy_event_.unscoped_id();
+    // TODO(andrewbb): Catapult doesn't support global_id and local_id on flow
+    // events. We could add support in trace processor (e.g. because there seem
+    // to be some callsites supplying local_id in chromium), but we would have
+    // to consider the process ID for local IDs and use a separate ID scope for
+    // global_id and unscoped_id.
+    return std::nullopt;
+  }
+
+  util::Status ParseFlowEventV1(char phase) {
+    auto opt_source_id = GetLegacyEventId();
+    if (!opt_source_id) {
+      storage_->IncrementStats(stats::flow_invalid_id);
+      return util::ErrStatus("Invalid id for flow event v1");
+    }
+    FlowId flow_id = context_->flow_tracker->GetFlowIdForV1Event(
+        opt_source_id.value(), category_id_, name_id_);
+    switch (phase) {
+      case 's':
+        context_->flow_tracker->Begin(track_id_, flow_id);
+        break;
+      case 't':
+        context_->flow_tracker->Step(track_id_, flow_id);
+        break;
+      case 'f':
+        context_->flow_tracker->End(track_id_, flow_id,
+                                    legacy_event_.bind_to_enclosing(),
+                                    /* close_flow = */ false);
+        break;
+    }
+    return util::OkStatus();
+  }
+
+  void MaybeParseTrackEventFlows(SliceId slice_id) {
+    if (event_.has_flow_ids_old() || event_.has_flow_ids()) {
+      auto it =
+          event_.has_flow_ids() ? event_.flow_ids() : event_.flow_ids_old();
+      for (; it; ++it) {
+        FlowId flow_id = *it;
+        if (!context_->flow_tracker->IsActive(flow_id)) {
+          context_->flow_tracker->Begin(slice_id, flow_id);
+          continue;
+        }
+        context_->flow_tracker->Step(slice_id, flow_id);
+      }
+    }
+    if (event_.has_terminating_flow_ids_old() ||
+        event_.has_terminating_flow_ids()) {
+      auto it = event_.has_terminating_flow_ids()
+                    ? event_.terminating_flow_ids()
+                    : event_.terminating_flow_ids_old();
+      for (; it; ++it) {
+        FlowId flow_id = *it;
+        if (!context_->flow_tracker->IsActive(flow_id)) {
+          // If we should terminate a flow, do not begin a new one if it's not
+          // active already.
+          continue;
+        }
+        context_->flow_tracker->End(slice_id, flow_id,
+                                    /* close_flow = */ true);
+      }
+    }
+  }
+
+  void MaybeParseFlowEventV2(SliceId slice_id) {
+    if (!legacy_event_.has_bind_id()) {
+      return;
+    }
+    if (!legacy_event_.has_flow_direction()) {
+      storage_->IncrementStats(stats::flow_without_direction);
+      return;
+    }
+
+    auto bind_id = legacy_event_.bind_id();
+    switch (legacy_event_.flow_direction()) {
+      case LegacyEvent::FLOW_OUT:
+        context_->flow_tracker->Begin(slice_id, bind_id);
+        break;
+      case LegacyEvent::FLOW_INOUT:
+        context_->flow_tracker->Step(slice_id, bind_id);
+        break;
+      case LegacyEvent::FLOW_IN:
+        context_->flow_tracker->End(slice_id, bind_id,
+                                    /* close_flow = */ false);
+        break;
+      default:
+        storage_->IncrementStats(stats::flow_without_direction);
+    }
+  }
+
+  void MaybeParseFlowEvents(SliceId slice_id) {
+    MaybeParseFlowEventV2(slice_id);
+    MaybeParseTrackEventFlows(slice_id);
+  }
+
+  util::Status ParseThreadInstantEvent(char phase) {
+    // Handle instant events as slices with zero duration, so that they end
+    // up nested underneath their parent slices.
+    int64_t duration_ns = 0;
+    int64_t tidelta = 0;
+    std::optional<tables::SliceTable::Id> opt_slice_id;
+    auto args_inserter = [this, phase](BoundInserter* inserter) {
+      ParseTrackEventArgs(inserter);
+      // For legacy MARK event, add phase for JSON exporter.
+      if (phase == 'R') {
+        std::string phase_string(1, static_cast<char>(phase));
+        StringId phase_id = storage_->InternString(phase_string.c_str());
+        inserter->AddArg(parser_->legacy_event_phase_key_id_,
+                         Variadic::String(phase_id));
+      }
+    };
+    if (utid_) {
+      auto* thread_slices = storage_->mutable_slice_table();
+      tables::SliceTable::Row row = MakeThreadSliceRow();
+      row.dur = duration_ns;
+      if (thread_timestamp_) {
+        row.thread_dur = duration_ns;
+      }
+      if (thread_instruction_count_) {
+        row.thread_instruction_delta = tidelta;
+      }
+      opt_slice_id = context_->slice_tracker->ScopedTyped(
+          thread_slices, row, std::move(args_inserter));
+    } else {
+      opt_slice_id = context_->slice_tracker->Scoped(
+          ts_, track_id_, category_id_, name_id_, duration_ns,
+          std::move(args_inserter));
+    }
+    if (!opt_slice_id.has_value()) {
+      return util::OkStatus();
+    }
+    MaybeParseFlowEvents(opt_slice_id.value());
+    return util::OkStatus();
+  }
+
+  util::Status ParseAsyncBeginEvent(char phase) {
+    auto args_inserter = [this, phase](BoundInserter* inserter) {
+      ParseTrackEventArgs(inserter);
+
+      if (phase == 'b')
+        return;
+      PERFETTO_DCHECK(phase == 'S');
+      // For legacy ASYNC_BEGIN, add phase for JSON exporter.
+      std::string phase_string(1, static_cast<char>(phase));
+      StringId phase_id = storage_->InternString(phase_string.c_str());
+      inserter->AddArg(parser_->legacy_event_phase_key_id_,
+                       Variadic::String(phase_id));
+    };
+    auto opt_slice_id = context_->slice_tracker->Begin(
+        ts_, track_id_, category_id_, name_id_, args_inserter);
+    if (!opt_slice_id.has_value()) {
+      return util::OkStatus();
+    }
+    MaybeParseFlowEvents(opt_slice_id.value());
+    // For the time being, we only create vtrack slice rows if we need to
+    // store thread timestamps/counters.
+    if (legacy_event_.use_async_tts()) {
+      auto* vtrack_slices = storage_->mutable_virtual_track_slices();
+      PERFETTO_DCHECK(!vtrack_slices->slice_count() ||
+                      vtrack_slices->slice_ids().back() < opt_slice_id.value());
+      int64_t tts = thread_timestamp_.value_or(0);
+      int64_t tic = thread_instruction_count_.value_or(0);
+      vtrack_slices->AddVirtualTrackSlice(opt_slice_id.value(), tts,
+                                          kPendingThreadDuration, tic,
+                                          kPendingThreadInstructionDelta);
+    }
+    return util::OkStatus();
+  }
+
+  util::Status ParseAsyncEndEvent() {
+    auto opt_slice_id = context_->slice_tracker->End(
+        ts_, track_id_, category_id_, name_id_,
+        [this](BoundInserter* inserter) { ParseTrackEventArgs(inserter); });
+    if (!opt_slice_id)
+      return base::OkStatus();
+
+    MaybeParseFlowEvents(*opt_slice_id);
+    if (legacy_event_.use_async_tts()) {
+      auto* vtrack_slices = storage_->mutable_virtual_track_slices();
+      int64_t tts = event_data_->thread_timestamp.value_or(0);
+      int64_t tic = event_data_->thread_instruction_count.value_or(0);
+      vtrack_slices->UpdateThreadDeltasForSliceId(*opt_slice_id, tts, tic);
+    }
+    return util::OkStatus();
+  }
+
+  util::Status ParseAsyncStepEvent(char phase) {
+    // Parse step events as instant events. Reconstructing the begin/end times
+    // of the child slice would be too complicated, see b/178540838. For JSON
+    // export, we still record the original step's phase in an arg.
+    int64_t duration_ns = 0;
+    context_->slice_tracker->Scoped(
+        ts_, track_id_, category_id_, name_id_, duration_ns,
+        [this, phase](BoundInserter* inserter) {
+          ParseTrackEventArgs(inserter);
+
+          PERFETTO_DCHECK(phase == 'T' || phase == 'p');
+          std::string phase_string(1, static_cast<char>(phase));
+          StringId phase_id = storage_->InternString(phase_string.c_str());
+          inserter->AddArg(parser_->legacy_event_phase_key_id_,
+                           Variadic::String(phase_id));
+        });
+    // Step events don't support thread timestamps, so no need to add a row to
+    // virtual_track_slices.
+    return util::OkStatus();
+  }
+
+  util::Status ParseAsyncInstantEvent() {
+    // Handle instant events as slices with zero duration, so that they end
+    // up nested underneath their parent slices.
+    int64_t duration_ns = 0;
+    int64_t tidelta = 0;
+    auto opt_slice_id = context_->slice_tracker->Scoped(
+        ts_, track_id_, category_id_, name_id_, duration_ns,
+        [this](BoundInserter* inserter) { ParseTrackEventArgs(inserter); });
+    if (!opt_slice_id.has_value()) {
+      return util::OkStatus();
+    }
+    MaybeParseFlowEvents(opt_slice_id.value());
+    if (legacy_event_.use_async_tts()) {
+      auto* vtrack_slices = storage_->mutable_virtual_track_slices();
+      PERFETTO_DCHECK(!vtrack_slices->slice_count() ||
+                      vtrack_slices->slice_ids().back() < opt_slice_id.value());
+      int64_t tts = thread_timestamp_.value_or(0);
+      int64_t tic = thread_instruction_count_.value_or(0);
+      vtrack_slices->AddVirtualTrackSlice(opt_slice_id.value(), tts,
+                                          duration_ns, tic, tidelta);
+    }
+    return util::OkStatus();
+  }
+
+  util::Status ParseMetadataEvent() {
+    ProcessTracker* procs = context_->process_tracker.get();
+
+    if (name_id_ == kNullStringId)
+      return util::ErrStatus("Metadata event without name");
+
+    // Parse process and thread names from correspondingly named events.
+    NullTermStringView event_name = storage_->GetString(name_id_);
+    PERFETTO_DCHECK(event_name.data());
+    if (strcmp(event_name.c_str(), "thread_name") == 0) {
+      if (!utid_) {
+        return util::ErrStatus(
+            "thread_name metadata event without thread association");
+      }
+
+      auto it = event_.debug_annotations();
+      if (!it) {
+        return util::ErrStatus(
+            "thread_name metadata event without debug annotations");
+      }
+      protos::pbzero::DebugAnnotation::Decoder annotation(*it);
+      auto thread_name = annotation.string_value();
+      if (!thread_name.size)
+        return util::OkStatus();
+      auto thread_name_id = storage_->InternString(thread_name);
+      procs->UpdateThreadNameByUtid(
+          *utid_, thread_name_id,
+          ThreadNamePriority::kTrackDescriptorThreadType);
+      return util::OkStatus();
+    }
+    if (strcmp(event_name.c_str(), "process_name") == 0) {
+      if (!upid_) {
+        return util::ErrStatus(
+            "process_name metadata event without process association");
+      }
+
+      auto it = event_.debug_annotations();
+      if (!it) {
+        return util::ErrStatus(
+            "process_name metadata event without debug annotations");
+      }
+      protos::pbzero::DebugAnnotation::Decoder annotation(*it);
+      auto process_name = annotation.string_value();
+      if (!process_name.size)
+        return util::OkStatus();
+      auto process_name_id =
+          storage_->InternString(base::StringView(process_name));
+      // Don't override system-provided names.
+      procs->SetProcessNameIfUnset(*upid_, process_name_id);
+      return util::OkStatus();
+    }
+    // Other metadata events are proxied via the raw table for JSON export.
+    ParseLegacyEventAsRawEvent();
+    return util::OkStatus();
+  }
+
+  util::Status ParseLegacyEventAsRawEvent() {
+    if (!utid_)
+      return util::ErrStatus("raw legacy event without thread association");
+
+    RawId id = storage_->mutable_raw_table()
+                   ->Insert({ts_, parser_->raw_legacy_event_id_, 0, *utid_})
+                   .id;
+
+    auto inserter = context_->args_tracker->AddArgsTo(id);
+    inserter
+        .AddArg(parser_->legacy_event_category_key_id_,
+                Variadic::String(category_id_))
+        .AddArg(parser_->legacy_event_name_key_id_, Variadic::String(name_id_));
+
+    std::string phase_string(1, static_cast<char>(legacy_event_.phase()));
+    StringId phase_id = storage_->InternString(phase_string.c_str());
+    inserter.AddArg(parser_->legacy_event_phase_key_id_,
+                    Variadic::String(phase_id));
+
+    if (legacy_event_.has_duration_us()) {
+      inserter.AddArg(parser_->legacy_event_duration_ns_key_id_,
+                      Variadic::Integer(legacy_event_.duration_us() * 1000));
+    }
+
+    if (thread_timestamp_) {
+      inserter.AddArg(parser_->legacy_event_thread_timestamp_ns_key_id_,
+                      Variadic::Integer(*thread_timestamp_));
+      if (legacy_event_.has_thread_duration_us()) {
+        inserter.AddArg(
+            parser_->legacy_event_thread_duration_ns_key_id_,
+            Variadic::Integer(legacy_event_.thread_duration_us() * 1000));
+      }
+    }
+
+    if (thread_instruction_count_) {
+      inserter.AddArg(parser_->legacy_event_thread_instruction_count_key_id_,
+                      Variadic::Integer(*thread_instruction_count_));
+      if (legacy_event_.has_thread_instruction_delta()) {
+        inserter.AddArg(
+            parser_->legacy_event_thread_instruction_delta_key_id_,
+            Variadic::Integer(legacy_event_.thread_instruction_delta()));
+      }
+    }
+
+    if (legacy_event_.use_async_tts()) {
+      inserter.AddArg(parser_->legacy_event_use_async_tts_key_id_,
+                      Variadic::Boolean(true));
+    }
+
+    bool has_id = false;
+    if (legacy_event_.has_unscoped_id()) {
+      // Unscoped ids are either global or local depending on the phase. Pass
+      // them through as unscoped IDs to JSON export to preserve this behavior.
+      inserter.AddArg(parser_->legacy_event_unscoped_id_key_id_,
+                      Variadic::UnsignedInteger(legacy_event_.unscoped_id()));
+      has_id = true;
+    } else if (legacy_event_.has_global_id()) {
+      inserter.AddArg(parser_->legacy_event_global_id_key_id_,
+                      Variadic::UnsignedInteger(legacy_event_.global_id()));
+      has_id = true;
+    } else if (legacy_event_.has_local_id()) {
+      inserter.AddArg(parser_->legacy_event_local_id_key_id_,
+                      Variadic::UnsignedInteger(legacy_event_.local_id()));
+      has_id = true;
+    }
+
+    if (has_id && legacy_event_.has_id_scope() &&
+        legacy_event_.id_scope().size) {
+      inserter.AddArg(
+          parser_->legacy_event_id_scope_key_id_,
+          Variadic::String(storage_->InternString(legacy_event_.id_scope())));
+    }
+
+    // No need to parse legacy_event.instant_event_scope() because we import
+    // instant events into the slice table.
+
+    ParseTrackEventArgs(&inserter);
+    return util::OkStatus();
+  }
+
+  void ParseTrackEventArgs(BoundInserter* inserter) {
+    auto log_errors = [this](util::Status status) {
+      if (status.ok())
+        return;
+      // Log error but continue parsing the other args.
+      storage_->IncrementStats(stats::track_event_parser_errors);
+      PERFETTO_DLOG("ParseTrackEventArgs error: %s", status.c_message());
+    };
+
+    if (event_.has_source_location_iid()) {
+      log_errors(AddSourceLocationArgs(event_.source_location_iid(), inserter));
+    }
+
+    if (event_.has_task_execution()) {
+      log_errors(ParseTaskExecutionArgs(event_.task_execution(), inserter));
+    }
+    if (event_.has_log_message()) {
+      log_errors(ParseLogMessage(event_.log_message(), inserter));
+    }
+    if (event_.has_chrome_histogram_sample()) {
+      log_errors(
+          ParseHistogramName(event_.chrome_histogram_sample(), inserter));
+    }
+    if (event_.has_chrome_active_processes()) {
+      protos::pbzero::ChromeActiveProcesses::Decoder message(
+          event_.chrome_active_processes());
+      for (auto it = message.pid(); it; ++it) {
+        parser_->AddActiveProcess(ts_, *it);
+      }
+    }
+
+    TrackEventArgsParser args_writer(ts_, *inserter, *storage_,
+                                     *sequence_state_);
+    int unknown_extensions = 0;
+    log_errors(parser_->args_parser_.ParseMessage(
+        blob_, ".perfetto.protos.TrackEvent", &parser_->reflect_fields_,
+        args_writer, &unknown_extensions));
+    if (unknown_extensions > 0) {
+      context_->storage->IncrementStats(stats::unknown_extension_fields,
+                                        unknown_extensions);
+    }
+
+    {
+      auto key = parser_->args_parser_.EnterDictionary("debug");
+      util::DebugAnnotationParser parser(parser_->args_parser_);
+      for (auto it = event_.debug_annotations(); it; ++it) {
+        log_errors(parser.Parse(*it, args_writer));
+      }
+    }
+
+    if (legacy_passthrough_utid_) {
+      inserter->AddArg(parser_->legacy_event_passthrough_utid_id_,
+                       Variadic::UnsignedInteger(*legacy_passthrough_utid_),
+                       ArgsTracker::UpdatePolicy::kSkipIfExists);
+    }
+  }
+
+  util::Status ParseTaskExecutionArgs(ConstBytes task_execution,
+                                      BoundInserter* inserter) {
+    protos::pbzero::TaskExecution::Decoder task(task_execution);
+    uint64_t iid = task.posted_from_iid();
+    if (!iid)
+      return util::ErrStatus("TaskExecution with invalid posted_from_iid");
+
+    auto* decoder = sequence_state_->LookupInternedMessage<
+        protos::pbzero::InternedData::kSourceLocationsFieldNumber,
+        protos::pbzero::SourceLocation>(iid);
+    if (!decoder)
+      return util::ErrStatus("TaskExecution with invalid posted_from_iid");
+
+    StringId file_name_id = kNullStringId;
+    StringId function_name_id = kNullStringId;
+    uint32_t line_number = 0;
+
+    std::string file_name = NormalizePathSeparators(decoder->file_name());
+    file_name_id = storage_->InternString(base::StringView(file_name));
+    function_name_id = storage_->InternString(decoder->function_name());
+    line_number = decoder->line_number();
+
+    inserter->AddArg(parser_->task_file_name_args_key_id_,
+                     Variadic::String(file_name_id));
+    inserter->AddArg(parser_->task_function_name_args_key_id_,
+                     Variadic::String(function_name_id));
+    inserter->AddArg(parser_->task_line_number_args_key_id_,
+                     Variadic::UnsignedInteger(line_number));
+    return util::OkStatus();
+  }
+
+  util::Status AddSourceLocationArgs(uint64_t iid, BoundInserter* inserter) {
+    if (!iid)
+      return util::ErrStatus("SourceLocation with invalid iid");
+
+    auto* decoder = sequence_state_->LookupInternedMessage<
+        protos::pbzero::InternedData::kSourceLocationsFieldNumber,
+        protos::pbzero::SourceLocation>(iid);
+    if (!decoder)
+      return util::ErrStatus("SourceLocation with invalid iid");
+
+    StringId file_name_id = kNullStringId;
+    StringId function_name_id = kNullStringId;
+    uint32_t line_number = 0;
+
+    std::string file_name = NormalizePathSeparators(decoder->file_name());
+    file_name_id = storage_->InternString(base::StringView(file_name));
+    function_name_id = storage_->InternString(decoder->function_name());
+    line_number = decoder->line_number();
+
+    inserter->AddArg(parser_->source_location_file_name_key_id_,
+                     Variadic::String(file_name_id));
+    inserter->AddArg(parser_->source_location_function_name_key_id_,
+                     Variadic::String(function_name_id));
+    inserter->AddArg(parser_->source_location_line_number_key_id_,
+                     Variadic::UnsignedInteger(line_number));
+    return util::OkStatus();
+  }
+
+  util::Status ParseLogMessage(ConstBytes blob, BoundInserter* inserter) {
+    if (!utid_)
+      return util::ErrStatus("LogMessage without thread association");
+
+    protos::pbzero::LogMessage::Decoder message(blob);
+
+    auto* body_decoder = sequence_state_->LookupInternedMessage<
+        protos::pbzero::InternedData::kLogMessageBodyFieldNumber,
+        protos::pbzero::LogMessageBody>(message.body_iid());
+    if (!body_decoder)
+      return util::ErrStatus("LogMessage with invalid body_iid");
+
+    const StringId log_message_id =
+        storage_->InternString(body_decoder->body());
+    inserter->AddArg(parser_->log_message_body_key_id_,
+                     Variadic::String(log_message_id));
+
+    StringId source_location_id = kNullStringId;
+    if (message.has_source_location_iid()) {
+      auto* source_location_decoder = sequence_state_->LookupInternedMessage<
+          protos::pbzero::InternedData::kSourceLocationsFieldNumber,
+          protos::pbzero::SourceLocation>(message.source_location_iid());
+      if (!source_location_decoder)
+        return util::ErrStatus("LogMessage with invalid source_location_iid");
+      const std::string source_location =
+          source_location_decoder->file_name().ToStdString() + ":" +
+          std::to_string(source_location_decoder->line_number());
+      source_location_id =
+          storage_->InternString(base::StringView(source_location));
+
+      inserter->AddArg(parser_->log_message_source_location_file_name_key_id_,
+                       Variadic::String(storage_->InternString(
+                           source_location_decoder->file_name())));
+      inserter->AddArg(
+          parser_->log_message_source_location_function_name_key_id_,
+          Variadic::String(storage_->InternString(
+              source_location_decoder->function_name())));
+      inserter->AddArg(
+          parser_->log_message_source_location_line_number_key_id_,
+          Variadic::Integer(source_location_decoder->line_number()));
+    }
+
+    // The track event log message doesn't specify any priority. UI never
+    // displays priorities < 2 (VERBOSE in android). Let's make all the track
+    // event logs show up as INFO.
+    int32_t priority = protos::pbzero::AndroidLogPriority::PRIO_INFO;
+    if (message.has_prio()) {
+      priority = ToAndroidLogPriority(
+          static_cast<protos::pbzero::LogMessage::Priority>(message.prio()));
+      inserter->AddArg(parser_->log_message_priority_id_,
+                       Variadic::Integer(priority));
+    }
+
+    storage_->mutable_android_log_table()->Insert(
+        {ts_, *utid_,
+         /*priority*/ static_cast<uint32_t>(priority),
+         /*tag_id*/ source_location_id, log_message_id});
+
+    return util::OkStatus();
+  }
+
+  util::Status ParseHistogramName(ConstBytes blob, BoundInserter* inserter) {
+    protos::pbzero::ChromeHistogramSample::Decoder sample(blob);
+    if (!sample.has_name_iid())
+      return util::OkStatus();
+
+    if (sample.has_name()) {
+      return util::ErrStatus(
+          "name is already set for ChromeHistogramSample: only one of name and "
+          "name_iid can be set.");
+    }
+
+    auto* decoder = sequence_state_->LookupInternedMessage<
+        protos::pbzero::InternedData::kHistogramNamesFieldNumber,
+        protos::pbzero::HistogramName>(sample.name_iid());
+    if (!decoder)
+      return util::ErrStatus("HistogramName with invalid name_iid");
+
+    inserter->AddArg(parser_->histogram_name_key_id_,
+                     Variadic::String(storage_->InternString(decoder->name())));
+    return util::OkStatus();
+  }
+
+  tables::SliceTable::Row MakeThreadSliceRow() {
+    tables::SliceTable::Row row;
+    row.ts = ts_;
+    row.track_id = track_id_;
+    row.category = category_id_;
+    row.name = name_id_;
+    row.thread_ts = thread_timestamp_;
+    row.thread_dur = std::nullopt;
+    row.thread_instruction_count = thread_instruction_count_;
+    row.thread_instruction_delta = std::nullopt;
+    return row;
+  }
+
+  TraceProcessorContext* context_;
+  TrackEventTracker* track_event_tracker_;
+  TraceStorage* storage_;
+  TrackEventParser* parser_;
+  ArgsTranslationTable* args_translation_table_;
+  int64_t ts_;
+  const TrackEventData* event_data_;
+  PacketSequenceStateGeneration* sequence_state_;
+  ConstBytes blob_;
+  TrackEvent::Decoder event_;
+  LegacyEvent::Decoder legacy_event_;
+  protos::pbzero::TrackEventDefaults::Decoder* defaults_;
+
+  // Importing state.
+  StringId category_id_;
+  StringId name_id_;
+  uint64_t track_uuid_;
+  TrackId track_id_;
+  std::optional<UniqueTid> utid_;
+  std::optional<UniqueTid> upid_;
+  std::optional<int64_t> thread_timestamp_;
+  std::optional<int64_t> thread_instruction_count_;
+  // All events in legacy JSON require a thread ID, but for some types of
+  // events (e.g. async events or process/global-scoped instants), we don't
+  // store it in the slice/track model. To pass the utid through to the json
+  // export, we store it in an arg.
+  std::optional<UniqueTid> legacy_passthrough_utid_;
+
+  uint32_t packet_sequence_id_;
+};
+
+TrackEventParser::TrackEventParser(TraceProcessorContext* context,
+                                   TrackEventTracker* track_event_tracker)
+    : args_parser_(*context->descriptor_pool_.get()),
+      context_(context),
+      track_event_tracker_(track_event_tracker),
+      counter_name_thread_time_id_(
+          context->storage->InternString("thread_time")),
+      counter_name_thread_instruction_count_id_(
+          context->storage->InternString("thread_instruction_count")),
+      task_file_name_args_key_id_(
+          context->storage->InternString("task.posted_from.file_name")),
+      task_function_name_args_key_id_(
+          context->storage->InternString("task.posted_from.function_name")),
+      task_line_number_args_key_id_(
+          context->storage->InternString("task.posted_from.line_number")),
+      log_message_body_key_id_(
+          context->storage->InternString("track_event.log_message")),
+      log_message_source_location_function_name_key_id_(
+          context->storage->InternString(
+              "track_event.log_message.function_name")),
+      log_message_source_location_file_name_key_id_(
+          context->storage->InternString("track_event.log_message.file_name")),
+      log_message_source_location_line_number_key_id_(
+          context->storage->InternString(
+              "track_event.log_message.line_number")),
+      log_message_priority_id_(
+          context->storage->InternString("track_event.priority")),
+      source_location_function_name_key_id_(
+          context->storage->InternString("source.function_name")),
+      source_location_file_name_key_id_(
+          context->storage->InternString("source.file_name")),
+      source_location_line_number_key_id_(
+          context->storage->InternString("source.line_number")),
+      raw_legacy_event_id_(
+          context->storage->InternString("track_event.legacy_event")),
+      legacy_event_passthrough_utid_id_(
+          context->storage->InternString("legacy_event.passthrough_utid")),
+      legacy_event_category_key_id_(
+          context->storage->InternString("legacy_event.category")),
+      legacy_event_name_key_id_(
+          context->storage->InternString("legacy_event.name")),
+      legacy_event_phase_key_id_(
+          context->storage->InternString("legacy_event.phase")),
+      legacy_event_duration_ns_key_id_(
+          context->storage->InternString("legacy_event.duration_ns")),
+      legacy_event_thread_timestamp_ns_key_id_(
+          context->storage->InternString("legacy_event.thread_timestamp_ns")),
+      legacy_event_thread_duration_ns_key_id_(
+          context->storage->InternString("legacy_event.thread_duration_ns")),
+      legacy_event_thread_instruction_count_key_id_(
+          context->storage->InternString(
+              "legacy_event.thread_instruction_count")),
+      legacy_event_thread_instruction_delta_key_id_(
+          context->storage->InternString(
+              "legacy_event.thread_instruction_delta")),
+      legacy_event_use_async_tts_key_id_(
+          context->storage->InternString("legacy_event.use_async_tts")),
+      legacy_event_unscoped_id_key_id_(
+          context->storage->InternString("legacy_event.unscoped_id")),
+      legacy_event_global_id_key_id_(
+          context->storage->InternString("legacy_event.global_id")),
+      legacy_event_local_id_key_id_(
+          context->storage->InternString("legacy_event.local_id")),
+      legacy_event_id_scope_key_id_(
+          context->storage->InternString("legacy_event.id_scope")),
+      legacy_event_bind_id_key_id_(
+          context->storage->InternString("legacy_event.bind_id")),
+      legacy_event_bind_to_enclosing_key_id_(
+          context->storage->InternString("legacy_event.bind_to_enclosing")),
+      legacy_event_flow_direction_key_id_(
+          context->storage->InternString("legacy_event.flow_direction")),
+      histogram_name_key_id_(
+          context->storage->InternString("chrome_histogram_sample.name")),
+      flow_direction_value_in_id_(context->storage->InternString("in")),
+      flow_direction_value_out_id_(context->storage->InternString("out")),
+      flow_direction_value_inout_id_(context->storage->InternString("inout")),
+      chrome_legacy_ipc_class_args_key_id_(
+          context->storage->InternString("legacy_ipc.class")),
+      chrome_legacy_ipc_line_args_key_id_(
+          context->storage->InternString("legacy_ipc.line")),
+      chrome_host_app_package_name_id_(
+          context->storage->InternString("chrome.host_app_package_name")),
+      chrome_crash_trace_id_name_id_(
+          context->storage->InternString("chrome.crash_trace_id")),
+      chrome_process_label_flat_key_id_(
+          context->storage->InternString("chrome.process_label")),
+      chrome_process_type_id_(
+          context_->storage->InternString("chrome.process_type")),
+      event_category_key_id_(context_->storage->InternString("event.category")),
+      event_name_key_id_(context_->storage->InternString("event.name")),
+      chrome_string_lookup_(context->storage.get()),
+      counter_unit_ids_{{kNullStringId, context_->storage->InternString("ns"),
+                         context_->storage->InternString("count"),
+                         context_->storage->InternString("bytes")}},
+      active_chrome_processes_tracker_(context) {
+  args_parser_.AddParsingOverrideForField(
+      "chrome_mojo_event_info.mojo_interface_method_iid",
+      [](const protozero::Field& field,
+         util::ProtoToArgsParser::Delegate& delegate) {
+        return MaybeParseUnsymbolizedSourceLocation(
+            "chrome_mojo_event_info.mojo_interface_method.native_symbol", field,
+            delegate);
+      });
+  // Switch |source_location_iid| into its interned data variant.
+  args_parser_.AddParsingOverrideForField(
+      "begin_impl_frame_args.current_args.source_location_iid",
+      [](const protozero::Field& field,
+         util::ProtoToArgsParser::Delegate& delegate) {
+        return MaybeParseSourceLocation("begin_impl_frame_args.current_args",
+                                        field, delegate);
+      });
+  args_parser_.AddParsingOverrideForField(
+      "begin_impl_frame_args.last_args.source_location_iid",
+      [](const protozero::Field& field,
+         util::ProtoToArgsParser::Delegate& delegate) {
+        return MaybeParseSourceLocation("begin_impl_frame_args.last_args",
+                                        field, delegate);
+      });
+  args_parser_.AddParsingOverrideForField(
+      "begin_frame_observer_state.last_begin_frame_args.source_location_iid",
+      [](const protozero::Field& field,
+         util::ProtoToArgsParser::Delegate& delegate) {
+        return MaybeParseSourceLocation(
+            "begin_frame_observer_state.last_begin_frame_args", field,
+            delegate);
+      });
+  args_parser_.AddParsingOverrideForField(
+      "chrome_memory_pressure_notification.creation_location_iid",
+      [](const protozero::Field& field,
+         util::ProtoToArgsParser::Delegate& delegate) {
+        return MaybeParseSourceLocation("chrome_memory_pressure_notification",
+                                        field, delegate);
+      });
+
+  // Parse DebugAnnotations.
+  args_parser_.AddParsingOverrideForType(
+      ".perfetto.protos.DebugAnnotation",
+      [&](util::ProtoToArgsParser::ScopedNestedKeyContext& key,
+          const protozero::ConstBytes& data,
+          util::ProtoToArgsParser::Delegate& delegate) {
+        // Do not add "debug_annotations" to the final key.
+        key.RemoveFieldSuffix();
+        util::DebugAnnotationParser annotation_parser(args_parser_);
+        return annotation_parser.Parse(data, delegate);
+      });
+
+  args_parser_.AddParsingOverrideForField(
+      "active_processes.pid", [&](const protozero::Field& field,
+                                  util::ProtoToArgsParser::Delegate& delegate) {
+        AddActiveProcess(delegate.packet_timestamp(), field.as_int32());
+        // Fallthrough so that the parser adds pid as a regular arg.
+        return std::nullopt;
+      });
+
+  for (uint16_t index : kReflectFields) {
+    reflect_fields_.push_back(index);
+  }
+}
+
+void TrackEventParser::ParseTrackDescriptor(
+    int64_t packet_timestamp,
+    protozero::ConstBytes track_descriptor,
+    uint32_t packet_sequence_id) {
+  protos::pbzero::TrackDescriptor::Decoder decoder(track_descriptor);
+
+  // Ensure that the track and its parents are resolved. This may start a new
+  // process and/or thread (i.e. new upid/utid).
+  TrackId track_id = *track_event_tracker_->GetDescriptorTrack(
+      decoder.uuid(), kNullStringId, packet_sequence_id);
+
+  if (decoder.has_thread()) {
+    UniqueTid utid = ParseThreadDescriptor(decoder.thread());
+    if (decoder.has_chrome_thread())
+      ParseChromeThreadDescriptor(utid, decoder.chrome_thread());
+  } else if (decoder.has_process()) {
+    UniquePid upid =
+        ParseProcessDescriptor(packet_timestamp, decoder.process());
+    if (decoder.has_chrome_process())
+      ParseChromeProcessDescriptor(upid, decoder.chrome_process());
+  } else if (decoder.has_counter()) {
+    ParseCounterDescriptor(track_id, decoder.counter());
+  }
+
+  // Override the name with the most recent name seen (after sorting by ts).
+  if (decoder.has_name()) {
+    auto* tracks = context_->storage->mutable_track_table();
+    StringId name_id = context_->storage->InternString(decoder.name());
+    tracks->mutable_name()->Set(*tracks->id().IndexOf(track_id), name_id);
+  }
+}
+
+UniquePid TrackEventParser::ParseProcessDescriptor(
+    int64_t packet_timestamp,
+    protozero::ConstBytes process_descriptor) {
+  protos::pbzero::ProcessDescriptor::Decoder decoder(process_descriptor);
+  UniquePid upid = context_->process_tracker->GetOrCreateProcess(
+      static_cast<uint32_t>(decoder.pid()));
+  active_chrome_processes_tracker_.AddProcessDescriptor(packet_timestamp, upid);
+  if (decoder.has_process_name() && decoder.process_name().size) {
+    // Don't override system-provided names.
+    context_->process_tracker->SetProcessNameIfUnset(
+        upid, context_->storage->InternString(decoder.process_name()));
+  }
+  if (decoder.has_start_timestamp_ns() && decoder.start_timestamp_ns() > 0) {
+    context_->process_tracker->SetStartTsIfUnset(upid,
+                                                 decoder.start_timestamp_ns());
+  }
+  // TODO(skyostil): Remove parsing for legacy chrome_process_type field.
+  if (decoder.has_chrome_process_type()) {
+    StringId name_id =
+        chrome_string_lookup_.GetProcessName(decoder.chrome_process_type());
+    // Don't override system-provided names.
+    context_->process_tracker->SetProcessNameIfUnset(upid, name_id);
+  }
+  int label_index = 0;
+  for (auto it = decoder.process_labels(); it; it++) {
+    StringId label_id = context_->storage->InternString(*it);
+    std::string key = "chrome.process_label[";
+    key.append(std::to_string(label_index++));
+    key.append("]");
+    context_->process_tracker->AddArgsTo(upid).AddArg(
+        chrome_process_label_flat_key_id_,
+        context_->storage->InternString(base::StringView(key)),
+        Variadic::String(label_id));
+  }
+  return upid;
+}
+
+void TrackEventParser::ParseChromeProcessDescriptor(
+    UniquePid upid,
+    protozero::ConstBytes chrome_process_descriptor) {
+  protos::pbzero::ChromeProcessDescriptor::Decoder decoder(
+      chrome_process_descriptor);
+
+  StringId name_id =
+      chrome_string_lookup_.GetProcessName(decoder.process_type());
+  // Don't override system-provided names.
+  context_->process_tracker->SetProcessNameIfUnset(upid, name_id);
+
+  ArgsTracker::BoundInserter process_args =
+      context_->process_tracker->AddArgsTo(upid);
+  // For identifying Chrome processes in system traces.
+  process_args.AddArg(chrome_process_type_id_, Variadic::String(name_id));
+  if (decoder.has_host_app_package_name()) {
+    process_args.AddArg(chrome_host_app_package_name_id_,
+                        Variadic::String(context_->storage->InternString(
+                            decoder.host_app_package_name())));
+  }
+  if (decoder.has_crash_trace_id()) {
+    process_args.AddArg(chrome_crash_trace_id_name_id_,
+                        Variadic::UnsignedInteger(decoder.crash_trace_id()));
+  }
+}
+
+UniqueTid TrackEventParser::ParseThreadDescriptor(
+    protozero::ConstBytes thread_descriptor) {
+  protos::pbzero::ThreadDescriptor::Decoder decoder(thread_descriptor);
+  UniqueTid utid = context_->process_tracker->UpdateThread(
+      static_cast<uint32_t>(decoder.tid()),
+      static_cast<uint32_t>(decoder.pid()));
+  StringId name_id = kNullStringId;
+  if (decoder.has_thread_name() && decoder.thread_name().size) {
+    name_id = context_->storage->InternString(decoder.thread_name());
+  } else if (decoder.has_chrome_thread_type()) {
+    // TODO(skyostil): Remove parsing for legacy chrome_thread_type field.
+    name_id = chrome_string_lookup_.GetThreadName(decoder.chrome_thread_type());
+  }
+  context_->process_tracker->UpdateThreadNameByUtid(
+      utid, name_id, ThreadNamePriority::kTrackDescriptor);
+  return utid;
+}
+
+void TrackEventParser::ParseChromeThreadDescriptor(
+    UniqueTid utid,
+    protozero::ConstBytes chrome_thread_descriptor) {
+  protos::pbzero::ChromeThreadDescriptor::Decoder decoder(
+      chrome_thread_descriptor);
+  if (!decoder.has_thread_type())
+    return;
+
+  StringId name_id = chrome_string_lookup_.GetThreadName(decoder.thread_type());
+  context_->process_tracker->UpdateThreadNameByUtid(
+      utid, name_id, ThreadNamePriority::kTrackDescriptorThreadType);
+}
+
+void TrackEventParser::ParseCounterDescriptor(
+    TrackId track_id,
+    protozero::ConstBytes counter_descriptor) {
+  using protos::pbzero::CounterDescriptor;
+
+  CounterDescriptor::Decoder decoder(counter_descriptor);
+  auto* counter_tracks = context_->storage->mutable_counter_track_table();
+
+  size_t unit_index = static_cast<size_t>(decoder.unit());
+  if (unit_index >= counter_unit_ids_.size())
+    unit_index = CounterDescriptor::UNIT_UNSPECIFIED;
+
+  auto opt_track_idx = counter_tracks->id().IndexOf(track_id);
+  if (!opt_track_idx) {
+    context_->storage->IncrementStats(stats::track_event_parser_errors);
+    return;
+  }
+
+  auto track_idx = *opt_track_idx;
+
+  switch (decoder.type()) {
+    case CounterDescriptor::COUNTER_UNSPECIFIED:
+      break;
+    case CounterDescriptor::COUNTER_THREAD_TIME_NS:
+      unit_index = CounterDescriptor::UNIT_TIME_NS;
+      counter_tracks->mutable_name()->Set(track_idx,
+                                          counter_name_thread_time_id_);
+      break;
+    case CounterDescriptor::COUNTER_THREAD_INSTRUCTION_COUNT:
+      unit_index = CounterDescriptor::UNIT_COUNT;
+      counter_tracks->mutable_name()->Set(
+          track_idx, counter_name_thread_instruction_count_id_);
+      break;
+  }
+
+  counter_tracks->mutable_unit()->Set(track_idx, counter_unit_ids_[unit_index]);
+}
+
+void TrackEventParser::ParseTrackEvent(int64_t ts,
+                                       const TrackEventData* event_data,
+                                       ConstBytes blob,
+                                       uint32_t packet_sequence_id) {
+  const auto range_of_interest_start_us =
+      track_event_tracker_->range_of_interest_start_us();
+  if (context_->config.drop_track_event_data_before ==
+          DropTrackEventDataBefore::kTrackEventRangeOfInterest &&
+      range_of_interest_start_us && ts < *range_of_interest_start_us * 1000) {
+    // The event is outside of the range of interest, and dropping is enabled.
+    // So we drop the event.
+    context_->storage->IncrementStats(
+        stats::track_event_dropped_packets_outside_of_range_of_interest);
+    return;
+  }
+  util::Status status =
+      EventImporter(this, ts, event_data, std::move(blob), packet_sequence_id)
+          .Import();
+  if (!status.ok()) {
+    context_->storage->IncrementStats(stats::track_event_parser_errors);
+    PERFETTO_DLOG("ParseTrackEvent error: %s", status.c_message());
+  }
+}
+
+void TrackEventParser::AddActiveProcess(int64_t packet_timestamp, int32_t pid) {
+  UniquePid upid =
+      context_->process_tracker->GetOrCreateProcess(static_cast<uint32_t>(pid));
+  active_chrome_processes_tracker_.AddActiveProcessMetadata(packet_timestamp,
+                                                            upid);
+}
+
+void TrackEventParser::NotifyEndOfFile() {
+  active_chrome_processes_tracker_.NotifyEndOfFile();
+}
+
+}  // namespace trace_processor
+}  // namespace perfetto
+
+#endif
diff --git a/kram-profile/kram-profile.xcodeproj/project.pbxproj b/kram-profile/kram-profile.xcodeproj/project.pbxproj
index aa1747dc..18fc85ba 100644
--- a/kram-profile/kram-profile.xcodeproj/project.pbxproj
+++ b/kram-profile/kram-profile.xcodeproj/project.pbxproj
@@ -17,6 +17,11 @@
 		705F68E72B8BEB7100437FAA /* DataCompression.swift in Sources */ = {isa = PBXBuildFile; fileRef = 705F68E62B8BEB7000437FAA /* DataCompression.swift */; };
 		705F68E92B9451CC00437FAA /* Log.swift in Sources */ = {isa = PBXBuildFile; fileRef = 705F68E82B9451CC00437FAA /* Log.swift */; };
 		705F68EB2B94E33800437FAA /* Keycode.swift in Sources */ = {isa = PBXBuildFile; fileRef = 705F68EA2B94E33800437FAA /* Keycode.swift */; };
+		705F69052BA2ED1300437FAA /* KramZipHelper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 705F68FF2BA2ED1200437FAA /* KramZipHelper.cpp */; };
+		705F69062BA2ED1300437FAA /* miniz.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 705F69002BA2ED1200437FAA /* miniz.cpp */; };
+		705F69072BA2ED1300437FAA /* track_event_parser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 705F69042BA2ED1200437FAA /* track_event_parser.cpp */; };
+		705F690A2BA3801D00437FAA /* KramZipHelperW.mm in Sources */ = {isa = PBXBuildFile; fileRef = 705F69092BA3801D00437FAA /* KramZipHelperW.mm */; };
+		705F690C2BA3CDEC00437FAA /* File.swift in Sources */ = {isa = PBXBuildFile; fileRef = 705F690B2BA3CDEC00437FAA /* File.swift */; };
 /* End PBXBuildFile section */
 
 /* Begin PBXFileReference section */
@@ -33,6 +38,15 @@
 		705F68E62B8BEB7000437FAA /* DataCompression.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = DataCompression.swift; sourceTree = "<group>"; };
 		705F68E82B9451CC00437FAA /* Log.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Log.swift; sourceTree = "<group>"; };
 		705F68EA2B94E33800437FAA /* Keycode.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Keycode.swift; sourceTree = "<group>"; };
+		705F68FE2BA2ED1200437FAA /* KramZipHelper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = KramZipHelper.h; sourceTree = "<group>"; };
+		705F68FF2BA2ED1200437FAA /* KramZipHelper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = KramZipHelper.cpp; sourceTree = "<group>"; };
+		705F69002BA2ED1200437FAA /* miniz.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = miniz.cpp; sourceTree = "<group>"; };
+		705F69022BA2ED1200437FAA /* miniz.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = miniz.h; sourceTree = "<group>"; };
+		705F69032BA2ED1200437FAA /* kram-profile-Bridging-Header.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "kram-profile-Bridging-Header.h"; sourceTree = "<group>"; };
+		705F69042BA2ED1200437FAA /* track_event_parser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = track_event_parser.cpp; sourceTree = "<group>"; };
+		705F69082BA3801D00437FAA /* KramZipHelperW.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = KramZipHelperW.h; sourceTree = "<group>"; };
+		705F69092BA3801D00437FAA /* KramZipHelperW.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = KramZipHelperW.mm; sourceTree = "<group>"; };
+		705F690B2BA3CDEC00437FAA /* File.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = File.swift; sourceTree = "<group>"; };
 /* End PBXFileReference section */
 
 /* Begin PBXFrameworksBuildPhase section */
@@ -49,6 +63,7 @@
 		705F68C02B820AD100437FAA = {
 			isa = PBXGroup;
 			children = (
+				705F68FD2BA2ED1200437FAA /* Source */,
 				705F68CB2B820AD100437FAA /* kram-profile */,
 				705F68CA2B820AD100437FAA /* Products */,
 			);
@@ -67,6 +82,7 @@
 			children = (
 				705F68DD2B86AB2000437FAA /* Info.plist */,
 				705F68CC2B820AD100437FAA /* kram_profileApp.swift */,
+				705F690B2BA3CDEC00437FAA /* File.swift */,
 				705F68EA2B94E33800437FAA /* Keycode.swift */,
 				705F68D02B820AD200437FAA /* Assets.xcassets */,
 				705F68DF2B87EB8000437FAA /* AnyCodable.swift */,
@@ -89,6 +105,21 @@
 			path = "Preview Content";
 			sourceTree = "<group>";
 		};
+		705F68FD2BA2ED1200437FAA /* Source */ = {
+			isa = PBXGroup;
+			children = (
+				705F69032BA2ED1200437FAA /* kram-profile-Bridging-Header.h */,
+				705F68FE2BA2ED1200437FAA /* KramZipHelper.h */,
+				705F68FF2BA2ED1200437FAA /* KramZipHelper.cpp */,
+				705F69082BA3801D00437FAA /* KramZipHelperW.h */,
+				705F69092BA3801D00437FAA /* KramZipHelperW.mm */,
+				705F69022BA2ED1200437FAA /* miniz.h */,
+				705F69002BA2ED1200437FAA /* miniz.cpp */,
+				705F69042BA2ED1200437FAA /* track_event_parser.cpp */,
+			);
+			path = Source;
+			sourceTree = "<group>";
+		};
 /* End PBXGroup section */
 
 /* Begin PBXNativeTarget section */
@@ -121,6 +152,7 @@
 				TargetAttributes = {
 					705F68C82B820AD100437FAA = {
 						CreatedOnToolsVersion = 15.2;
+						LastSwiftMigration = 1520;
 					};
 				};
 			};
@@ -161,12 +193,17 @@
 			buildActionMask = 2147483647;
 			files = (
 				705F68E12B87EB8000437FAA /* AnyDecodable.swift in Sources */,
+				705F690C2BA3CDEC00437FAA /* File.swift in Sources */,
 				705F68E32B87EB8000437FAA /* AnyEncodable.swift in Sources */,
 				705F68E92B9451CC00437FAA /* Log.swift in Sources */,
 				705F68EB2B94E33800437FAA /* Keycode.swift in Sources */,
+				705F69062BA2ED1300437FAA /* miniz.cpp in Sources */,
+				705F69052BA2ED1300437FAA /* KramZipHelper.cpp in Sources */,
 				705F68E22B87EB8000437FAA /* AnyCodable.swift in Sources */,
+				705F69072BA2ED1300437FAA /* track_event_parser.cpp in Sources */,
 				705F68CD2B820AD100437FAA /* kram_profileApp.swift in Sources */,
 				705F68E72B8BEB7100437FAA /* DataCompression.swift in Sources */,
+				705F690A2BA3801D00437FAA /* KramZipHelperW.mm in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@@ -178,8 +215,14 @@
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
 				ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
+				CLANG_ANALYZER_GCD_PERFORMANCE = YES;
+				CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES;
 				CLANG_ANALYZER_NONNULL = YES;
 				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+				CLANG_ANALYZER_OSOBJECT_C_STYLE_CAST = YES;
+				CLANG_ANALYZER_SECURITY_FLOATLOOPCOUNTER = YES;
+				CLANG_ANALYZER_SECURITY_INSECUREAPI_RAND = YES;
+				CLANG_ANALYZER_SECURITY_INSECUREAPI_STRCPY = YES;
 				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
 				CLANG_ENABLE_MODULES = YES;
 				CLANG_ENABLE_OBJC_ARC = YES;
@@ -232,7 +275,7 @@
 				ONLY_ACTIVE_ARCH = YES;
 				SDKROOT = macosx;
 				SWIFT_ACTIVE_COMPILATION_CONDITIONS = "DEBUG $(inherited)";
-				SWIFT_OBJC_INTEROP_MODE = objcxx;
+				SWIFT_OBJC_INTEROP_MODE = objc;
 				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
 				SWIFT_VERSION = 5.0;
 			};
@@ -243,8 +286,14 @@
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
 				ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
+				CLANG_ANALYZER_GCD_PERFORMANCE = YES;
+				CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES;
 				CLANG_ANALYZER_NONNULL = YES;
 				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+				CLANG_ANALYZER_OSOBJECT_C_STYLE_CAST = YES;
+				CLANG_ANALYZER_SECURITY_FLOATLOOPCOUNTER = YES;
+				CLANG_ANALYZER_SECURITY_INSECUREAPI_RAND = YES;
+				CLANG_ANALYZER_SECURITY_INSECUREAPI_STRCPY = YES;
 				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
 				CLANG_ENABLE_MODULES = YES;
 				CLANG_ENABLE_OBJC_ARC = YES;
@@ -291,7 +340,7 @@
 				SDKROOT = macosx;
 				SWIFT_ACTIVE_COMPILATION_CONDITIONS = "";
 				SWIFT_COMPILATION_MODE = wholemodule;
-				SWIFT_OBJC_INTEROP_MODE = objcxx;
+				SWIFT_OBJC_INTEROP_MODE = objc;
 				SWIFT_VERSION = 5.0;
 			};
 			name = Release;
@@ -301,6 +350,7 @@
 			buildSettings = {
 				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
 				ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
+				CLANG_ENABLE_MODULES = YES;
 				CODE_SIGN_ENTITLEMENTS = "kram-profile/kram_profile.entitlements";
 				CODE_SIGN_STYLE = Automatic;
 				COMBINE_HIDPI_IMAGES = YES;
@@ -309,7 +359,6 @@
 				ENABLE_PREVIEWS = YES;
 				GENERATE_INFOPLIST_FILE = YES;
 				INFOPLIST_FILE = "kram-profile/Info.plist";
-				INFOPLIST_KEY_LSApplicationCategoryType = "public.app-category.developer-tools";
 				INFOPLIST_KEY_NSHumanReadableCopyright = "";
 				LD_RUNPATH_SEARCH_PATHS = (
 					"$(inherited)",
@@ -319,6 +368,8 @@
 				PRODUCT_BUNDLE_IDENTIFIER = "com.hialec.kram-profile";
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SWIFT_EMIT_LOC_STRINGS = YES;
+				SWIFT_OBJC_BRIDGING_HEADER = "Source/kram-profile-Bridging-Header.h";
+				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
 				SWIFT_VERSION = 5.0;
 			};
 			name = Debug;
@@ -328,6 +379,7 @@
 			buildSettings = {
 				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
 				ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
+				CLANG_ENABLE_MODULES = YES;
 				CODE_SIGN_ENTITLEMENTS = "kram-profile/kram_profile.entitlements";
 				CODE_SIGN_STYLE = Automatic;
 				COMBINE_HIDPI_IMAGES = YES;
@@ -336,7 +388,6 @@
 				ENABLE_PREVIEWS = YES;
 				GENERATE_INFOPLIST_FILE = YES;
 				INFOPLIST_FILE = "kram-profile/Info.plist";
-				INFOPLIST_KEY_LSApplicationCategoryType = "public.app-category.developer-tools";
 				INFOPLIST_KEY_NSHumanReadableCopyright = "";
 				LD_RUNPATH_SEARCH_PATHS = (
 					"$(inherited)",
@@ -346,6 +397,7 @@
 				PRODUCT_BUNDLE_IDENTIFIER = "com.hialec.kram-profile";
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SWIFT_EMIT_LOC_STRINGS = YES;
+				SWIFT_OBJC_BRIDGING_HEADER = "Source/kram-profile-Bridging-Header.h";
 				SWIFT_VERSION = 5.0;
 			};
 			name = Release;
diff --git a/kram-profile/kram-profile/File.swift b/kram-profile/kram-profile/File.swift
new file mode 100644
index 00000000..f0220e1f
--- /dev/null
+++ b/kram-profile/kram-profile/File.swift
@@ -0,0 +1,450 @@
+// kram - Copyright 2020-2024 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
+
+import Foundation
+
+private let log = Log("kram/File")
+
+//-------------
+
+enum ContainerType {
+    case Archive // zip of 1+ files, can't enforce
+    case Compressed // gzip of 1 file, can't enforce
+    case Folder // from a folder drop
+    case File // means file was dropped or opened directly
+}
+
+enum FileType {
+    case Build
+    case Memory
+    case Perf
+    case Unknown
+}
+
+class File: Identifiable, /*Hashable, */ Equatable, Comparable
+{
+    var id: String { url.absoluteString }
+    var name: String { url.lastPathComponent }
+    let url: URL
+    let shortDirectory: String
+    let parentFolders: String
+    let fileType: FileType
+    
+    // optional container
+    let containerType: ContainerType
+    var archive: Archive?
+    
+    var duration = 0.0
+    
+    var fileContent: Data?
+    var modStamp: Date?
+    var loadStamp: Date?
+    
+    // only available for memory file type right now
+    var threadInfo = ""
+    
+    init(url: URL) {
+        self.url = url
+        self.modStamp = File.fileModificationDate(url:url)
+        self.shortDirectory = File.buildShortDirectory(url:url)
+        self.parentFolders = url.deletingLastPathComponent().absoluteString
+        self.containerType = File.filenameToContainerType(url)
+        self.fileType = File.filenameToFileType(url)
+    }
+    
+    public static func == (lhs: File, rhs: File) -> Bool {
+        return lhs.id == rhs.id
+    }
+    public static func < (lhs: File, rhs: File) -> Bool {
+        return lhs.id < rhs.id
+    }
+    
+    // call this when the file is loaded
+    public func setLoadStamp()  {
+        loadStamp = modStamp
+    }
+    public func isReloadNeeded() -> Bool {
+        return modStamp != loadStamp
+    }
+    
+    public static func fileModificationDate(url: URL) -> Date? {
+        do {
+            let attr = try FileManager.default.attributesOfItem(atPath: url.path)
+            return attr[FileAttributeKey.modificationDate] as? Date
+        } catch {
+            return nil
+        }
+    }
+
+    // show some of dir file is in, TODO: 2 levels not enough
+    public static func buildShortDirectory(url: URL) -> String {
+        let count = url.pathComponents.count
+        
+        // dir0/dir1/file.ext
+        // -3/-2/-1
+        
+        var str = ""
+        if count >= 3 {
+            str += url.pathComponents[count-3]
+            str += "/"
+        }
+        if count >= 2 {
+            str += url.pathComponents[count-2]
+        }
+        
+        return str
+    }
+    
+    public static func filenameToContainerType(_ url: URL) -> ContainerType {
+        let ext = url.pathExtension
+        
+        if ext == "zip" {
+            return .Archive
+        }
+        if ext == "gz" { // could be a tarball archive, but don't support that
+            return .Compressed
+        }
+        return .File
+    }
+
+    public static func filenameToFileType(_ url: URL) -> FileType {
+        let ext = url.pathExtension
+        
+        if File.filenameToContainerType(url) != .File {
+            // strip the .gz/.zip
+            return filenameToFileType(url.deletingPathExtension())
+        }
+        
+        if ext == "json" || ext == "buildtrace" { // build
+            return .Build
+        }
+        else if ext == "memtrace" { // memory
+            return .Memory
+        }
+        // TODO: eliminate trace
+        else if ext == "trace" || ext == "perftrace" { // profile
+            return .Perf
+        }
+        return .Unknown
+    }
+}
+
+// TODO: now that it's a class, can probably elimiante that lookuFile calls
+func generateDuration(file: File) -> String {
+    // need for duration
+    let f = lookupFile(url: file.url)
+    if f.duration != 0.0 {
+        // TODO: may want to add s/mb based on file type
+        return String(format:"%0.3f", f.duration) // sec vis to ms for now
+    }
+    else {
+        return ""
+    }
+}
+
+func generateNavigationTitle(_ sel: String?) -> String {
+    if sel == nil {
+        return ""
+    }
+    
+    let f = lookupFile(selection: sel!)
+    return generateDuration(file: f) + " " + f.name
+}
+
+//-------------
+// Note: if a file is deleted which happens often with builds,
+// then want to identify that and update the list.  At least
+// indicate the item is gone, and await its return.
+// Does macOS have a FileWatcher?
+
+// Holds supported files dropped or opened from Finder, reload reparses this
+var droppedFileCache : [URL] = []
+
+// Flattened list of supported files from folders and archives
+var fileCache : [URL:File] = [:]
+
+func lookupFile(url: URL) -> File {
+    let file = File(url:url)
+    
+    // This preseves the duration previously parsed and stored
+    
+    if let fileOld = fileCache[file.url] {
+        if file.modStamp == nil || // means file and/or dir went away, so return fileOld
+            file.modStamp! == fileOld.modStamp! {
+            return fileOld
+        }
+    }
+    
+    // This wipes the duration, so it can be recomputed
+    fileCache[file.url] = file
+    
+    return file
+}
+
+func lookupFile(selection: String) -> File {
+    return lookupFile(url:URL(string:selection)!)
+}
+
+// This one won't be one in the list, though
+func updateFileCache(file: File) {
+    fileCache[file.url] = file
+}
+
+//-------------
+
+class Archive: Identifiable, /*Hashable, */ Equatable, Comparable {
+    // this doesn't change on reload
+    var id: String { url.absoluteString }
+    var name: String { url.lastPathComponent }
+    let url: URL
+    let shortDirectory: String
+    let parentFolders: String
+   
+    // This can call change
+    var modStamp: Date?
+    var loadStamp: Date?
+    
+    var archiveContent: Data?
+    var archive: ZipHelperW?
+      
+    init(_ url: URL) {
+        self.url = url
+        self.modStamp = File.fileModificationDate(url:url)
+        self.shortDirectory = File.buildShortDirectory(url:url)
+        self.parentFolders = url.deletingLastPathComponent().absoluteString
+    }
+    
+    func open() {
+        if loadStamp == nil {
+            loadStamp = modStamp
+            
+            do {
+                archiveContent = try Data(contentsOf: url, options: [.mappedIfSafe])
+                archive = ZipHelperW(data: archiveContent!)
+            }
+            catch {
+                log.error(error.localizedDescription)
+            }
+        }
+    }
+    
+    public static func == (lhs: Archive, rhs: Archive) -> Bool {
+        return lhs.id == rhs.id
+    }
+    public static func < (lhs: Archive, rhs: Archive) -> Bool {
+        return lhs.id < rhs.id
+    }
+    
+    public func isReloadNeeded() -> Bool {
+        return modStamp != loadStamp
+    }
+}
+
+// cache of archives to avoid creating these each time
+var archiveCache: [URL:Archive] = [:]
+
+func lookupArchive(_ url: URL) -> Archive {
+    let archive = Archive(url)
+    
+    // This preseves the content in the archive, and across all files with held content
+    if let archiveOld = archiveCache[archive.url] {
+        if archive.modStamp == nil || // means file and/or dir went away, so return fileOld
+            archive.modStamp! == archiveOld.modStamp! {
+            return archiveOld
+        }
+        
+        archive.open()
+        
+        // replace any archives with this one
+        for file in fileCache.values {
+            if file.archive == archiveOld {
+                
+                // update the archive
+                file.archive = archive
+                // Only need to release content if hash differs
+                // TODO: file may be gone in the new archive
+                let filename = file.url.absoluteString
+               
+                
+                let oldEntry = archiveOld.archive!.zipEntry(byName: filename)
+                let newEntry = archive.archive!.zipEntry(byName: filename)
+                
+                // convert zip modStamp to Data object (only valid to seconds)
+                file.modStamp = Date(timeIntervalSince1970: Double(newEntry.modificationDate)) // TODO: may need to be TimeInterval?
+                
+                if oldEntry.crc32 == newEntry.crc32 {
+                    file.loadStamp = file.modStamp
+                }
+                else {
+                    file.loadStamp = nil
+                    
+                    // erase fileContent
+                    file.fileContent = nil
+                    
+                    file.duration = 0.0
+                    
+                    // TODO: stil may need to point to new mmap to release the old
+                    // but don't need to reprocess and build data if crc is same
+                    
+                    // TODO: may need to release other calcs (f.e. duration, histogram, etc)
+                    // can point to new archive content here
+                }
+            }
+        }
+    }
+    else {
+        archive.open()
+    }
+    
+    // Files will need to reopen content, but only if the hash is different.
+    archiveCache[archive.url] = archive
+    
+    return archive
+}
+
+//-------------
+
+func loadFileContent(_ file: File) -> Data {
+    if file.fileContent != nil {
+        return file.fileContent!
+    }
+    
+    if file.archive != nil {
+        // this will point to a section of an mmaped zip archive
+        // but it may have to decompress content to a Data object
+        file.fileContent = file.archive!.archive!.extract(file.url.absoluteString)
+    }
+    else {
+        // This uses mmap if safe.  Does not count towars memory totals, since can be paged out
+        do {
+            file.fileContent = try Data(contentsOf: file.url, options: [.mappedIfSafe])
+        }
+        catch {
+            log.error(error.localizedDescription)
+        }
+    }
+    
+    return file.fileContent!
+}
+
+func isSupportedFilename(_ url: URL) -> Bool {
+    let ext = url.pathExtension
+    
+    // what ext does trace.zip, or trace.gz come in as ?
+    // should this limit compressed files to the names supported below - json, trace, memtrace?
+    
+    if ext == "gz" {
+        return true
+    }
+    if ext == "zip" {
+        return true
+    }
+        
+    // clang build files use generic .json ext
+    if ext == "json" || ext == "buildtrace" {
+        let filename = url.lastPathComponent
+        
+        // filter out some by name, so don't have to open files
+        if filename == "build-description.json" ||
+            filename == "build-request.json" ||
+            filename == "manifest.json" ||
+            filename.hasSuffix("diagnostic-filename-map.json") ||
+            filename.hasSuffix(".abi.json") ||
+            filename.hasSuffix("-OutputFileMap.json") ||
+            filename.hasSuffix("_const_extract_protocols.json")
+        {
+            return false
+        }
+        return true
+    }
+    
+    // profiling
+    if ext == "perftrace" || ext == "trace" {
+        return true
+    }
+    
+    // memory
+    if ext == "memtrace" {
+        return true
+    }
+    
+    return false
+}
+
+func listFilesFromArchive(_ urlArchive: URL) -> [File] {
+    
+    let archive = lookupArchive(urlArchive)
+    var files: [File] = []
+    
+    let arc = archive.archive!
+    for i in 0..<arc.zipEntrysCount() {
+        let filename = String(cString: arc.zipEntry(i).filename)
+        let url = URL(string: filename)!
+        
+        let isFileSupported = isSupportedFilename(url)
+        if !isFileSupported {
+            continue
+        }
+        
+        // don't support archives within archives
+        let isArchive  = File.filenameToContainerType(url) == .Archive
+        if isArchive {
+            continue
+        }
+            
+        let file = lookupFile(url:url)
+        if file.archive != archive {
+            file.archive = archive
+        }
+        files.append(file)
+    }
+    return files
+}
+
+func listFilesFromURLs(_ urls: [URL]) -> [File]
+{
+    var files: [File] = []
+   
+    for url in urls {
+        // now filter a list of all the files under the dir
+        if url.hasDirectoryPath {
+            // list out all matching files
+            // also these [.skipsHiddenFiles, .skipsSubdirectoryDescendants]
+            
+            // recurse into directory
+            let directoryEnumerator = FileManager.default.enumerator(
+                at: url,
+                includingPropertiesForKeys: nil
+                // options: [.skipsHiddenFiles]
+            )
+            
+            while let fileURL = directoryEnumerator?.nextObject() as? URL {
+                let isSupported = isSupportedFilename(fileURL)
+                if isSupported {
+                    let isArchive  = File.filenameToContainerType(url) == .Archive
+                    if isArchive {
+                       files += listFilesFromArchive(fileURL)
+                    }
+                    else {
+                        files.append(lookupFile(url:fileURL));
+                    }
+                }
+            }
+        }
+        else if url.isFileURL {
+            let isSupported = isSupportedFilename(url)
+            if isSupported {
+                let isArchive = File.filenameToContainerType(url) == .Archive
+                if isArchive {
+                    files += listFilesFromArchive(url)
+                }
+                else {
+                    files.append(lookupFile(url:url))
+                }
+            }
+        }
+    }
+
+    return files
+}
diff --git a/kram-profile/kram-profile/Info.plist b/kram-profile/kram-profile/Info.plist
index 040aae3d..d8f7160e 100644
--- a/kram-profile/kram-profile/Info.plist
+++ b/kram-profile/kram-profile/Info.plist
@@ -86,7 +86,21 @@
 				<string>buildtrace</string>
 			</array>
 			<key>NSDocumentClass</key>
-			<string>KramDoument</string>
+			<string>KramDocument</string>
+		</dict>
+		<dict>
+			<key>CFBundleTypeName</key>
+			<string>zip</string>
+			<key>CFBundleTypeRole</key>
+			<string>Viewer</string>
+			<key>LSHandlerRank</key>
+			<string>Default</string>
+			<key>LSItemContentTypes</key>
+			<array>
+				<string>public.zip-archive</string>
+			</array>
+			<key>NSDocumentClass</key>
+			<string>KramDocument</string>
 		</dict>
 	</array>
 	<key>UTImportedTypeDeclarations</key>
diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 74b6d915..12f4e430 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -114,7 +114,6 @@ import UniformTypeIdentifiers
 // TODO: add Metal capture and imgui backend to Tracy
 // TODO: add Metal capture to Remotery (this isn't a flamegraph)
 
-
 // TODO: switch font to Inter, bundle that with the app?
 //   .environment(\.font, Font.custom("CustomFont", size: 14))
 // TODO: for perf traces, compute duration between frame
@@ -271,191 +270,12 @@ extension String.StringInterpolation {
  "∆t was \(timeSince: startTime, decimals: 0)" // "∆t was 3s"
  */
 
-private let log = Log("kram-profile")
+private let log = Log("kram/App")
 
 public func clamp<T>(_ value: T, _ minValue: T, _ maxValue: T) -> T where T : Comparable {
     return min(max(value, minValue), maxValue)
 }
 
-//-------------
-
-enum ContainerType {
-    case Archive // zip of 1+ files, can't enforce
-    case Compressed // gzip of 1 file, can't enforce
-    case Folder // from a folder drop
-    case File // means file was dropped or opened directly
-}
-
-enum FileType {
-    case Build
-    case Memory
-    case Perf
-    case Unknown
-}
-
-class File: Identifiable, /*Hashable, */ Equatable, Comparable
-{
-    var id: String { url.absoluteString }
-    var name: String { url.lastPathComponent }
-    let url: URL
-    let shortDirectory: String
-    let parentFolders: String
-    let containerType: ContainerType
-    let fileType: FileType
-
-    var duration = 0.0
-    var modStamp: Date?
-    var loadStamp: Date?
-    
-    // only available for memory file type right now
-    var threadInfo = ""
-    
-    init(url: URL) {
-        self.url = url
-        self.modStamp = File.fileModificationDate(url:url)
-        self.shortDirectory = File.buildShortDirectory(url:url)
-        self.parentFolders = url.deletingLastPathComponent().absoluteString
-        self.containerType = File.filenameToContainerType(url)
-        self.fileType = File.filenameToFileType(url)
-    }
-    
-    public static func == (lhs: File, rhs: File) -> Bool {
-        return lhs.id == rhs.id
-    }
-    public static func < (lhs: File, rhs: File) -> Bool {
-        return lhs.id < rhs.id
-    }
-    
-    // call this when the file is loaded
-    public func setLoadStamp()  {
-        loadStamp = modStamp
-    }
-    public func isReloadNeeded() -> Bool {
-        return modStamp != loadStamp
-    }
-    
-    private static func fileModificationDate(url: URL) -> Date? {
-        do {
-            let attr = try FileManager.default.attributesOfItem(atPath: url.path)
-            return attr[FileAttributeKey.modificationDate] as? Date
-        } catch {
-            return nil
-        }
-    }
-
-    // show some of dir file is in, TODO: 2 levels not enough
-    private static func buildShortDirectory(url: URL) -> String {
-        let count = url.pathComponents.count
-        
-        // dir0/dir1/file.ext
-        // -3/-2/-1
-        
-        var str = ""
-        if count >= 3 {
-            str += url.pathComponents[count-3]
-            str += "/"
-        }
-        if count >= 2 {
-            str += url.pathComponents[count-2]
-        }
-        
-        return str
-    }
-    
-    public static func filenameToContainerType(_ url: URL) -> ContainerType {
-        let ext = url.pathExtension
-        
-        if ext == "zip" {
-            return .Archive
-        }
-        if ext == "gz" { // could be a tarball archive, but don't support that
-            return .Compressed
-        }
-        return .File
-    }
-
-    public static func filenameToFileType(_ url: URL) -> FileType {
-        let ext = url.pathExtension
-        
-        if File.filenameToContainerType(url) != .File {
-            // strip the .gz/.zip
-            return filenameToFileType(url.deletingPathExtension())
-        }
-        
-        if ext == "json" || ext == "buildtrace" { // build
-            return .Build
-        }
-        else if ext == "memtrace" { // memory
-            return .Memory
-        }
-        // TODO: eliminate trace
-        else if ext == "trace" || ext == "perftrace" { // profile
-            return .Perf
-        }
-        return .Unknown
-    }
-}
-
-// TODO: now that it's a class, can probably elimiante that lookuFile calls
-func generateDuration(file: File) -> String {
-    // need for duration
-    let f = lookupFile(url: file.url)
-    if f.duration != 0.0 {
-        // TODO: may want to add s/mb based on file type
-        return String(format:"%0.3f", f.duration) // sec vis to ms for now
-    }
-    else {
-        return ""
-    }
-}
-
-func generateNavigationTitle(_ sel: String?) -> String {
-    if sel == nil {
-        return ""
-    }
-    
-    let f = lookupFile(selection: sel!)
-    return generateDuration(file: f) + " " + f.name
-}
-
-//-------------
-// Note: if a file is deleted which happens often with builds,
-// then want to identify that and update the list.  At least
-// indicate the item is gone, and await its return.
-// Does macOS have a FileWatcher?
-
-// Holds supported files dropped or opened from Finder, reload reparses this
-var droppedFileCache : [URL] = []
-
-// Flattened list of supported files from folders and archives
-var fileCache : [URL:File] = [:]
-
-func lookupFile(url: URL) -> File {
-    let file = File(url:url)
-    
-    // This preseves the duration previously parsed and stored
-    
-    if let fileOld = fileCache[file.url] {
-        if file.modStamp == nil || // means file and/or dir went away, so return fileOld
-            file.modStamp! == fileOld.modStamp! {
-            return fileOld
-        }
-    }
-    
-    // This wipes the duration, so it can be recomputed
-    fileCache[file.url] = file
-    
-    return file
-}
-
-func lookupFile(selection: String) -> File {
-    return lookupFile(url:URL(string:selection)!)
-}
-
-// This one won't be one in the list, though
-func updateFileCache(file: File) {
-    fileCache[file.url] = file
-}
 
 //-------------
 
@@ -1010,18 +830,7 @@ func loadFileJS(_ path: String) -> String? {
         // decompress archive from zip, since Perfetto can't yet decompress zip
         // Note this will typically be fileType unknown, but have to flatten
         // content within to the list.  This just means part of a zip archive.
-        var fileContent: Data
-        
-        if file.containerType == .Archive {
-            return nil
-            
-            // this will point to a section of an mmaped zip archive
-            // fileContent = unzippedContent
-        }
-        else {
-            // This uses mmap if safe.  Does not count towars memory totals, since can be paged out
-            fileContent = try Data(contentsOf: fileURL, options: [.mappedIfSafe])
-        }
+        let fileContent = loadFileContent(file)
         
         // Note: json.gz and json.zip build files are not marked as Build
         // but need to rewrite them.
@@ -1111,6 +920,7 @@ func loadFileJS(_ path: String) -> String? {
                         let url = URL(string:detail)!
                         
                         // stupid immutable arrays.  Makes this code untempable
+                        // maybe can use class instead of struct?
                         catapultProfile.traceEvents![i].name = url.lastPathComponent
                     }
                     else if event.name == "InstantiateFunction" ||
@@ -1296,106 +1106,7 @@ struct kram_profileApp: App {
         }
     }
      
-    func isSupportedFilename(_ url: URL) -> Bool {
-        let ext = url.pathExtension
-        
-        // what ext does trace.zip, or trace.gz come in as ?
-        // should this limit compressed files to the names supported below - json, trace, memtrace?
-        
-        if ext == "gz" {
-            return true
-        }
-//        if ext == "zip" {
-//            return true
-//        }
-            
-        // clang build files use generic .json ext
-        if ext == "json" || ext == "buildtrace" {
-            let filename = url.lastPathComponent
-            
-            // filter out some by name, so don't have to open files
-            if filename == "build-description.json" ||
-                filename == "build-request.json" ||
-                filename == "manifest.json" ||
-                filename.hasSuffix("diagnostic-filename-map.json") ||
-                filename.hasSuffix(".abi.json") ||
-                filename.hasSuffix("-OutputFileMap.json") ||
-                filename.hasSuffix("_const_extract_protocols.json")
-            {
-                return false
-            }
-            return true
-        }
-        
-        // profiling
-        if ext == "perftrace" || ext == "trace" {
-            return true
-        }
-        
-        // memory
-        if ext == "memtrace" {
-            return true
-        }
-        
-        return false
-    }
-    
-    func listFilesFromArchive(_ url: URL) -> [File] {
-        // TODO:
-        // also add zip to supported format
-        // flatten archive and add archive to list
-        // need to mmap the archive, point content files to archive loc
-        
-        return []
-    }
-    
-    func listFilesFromURLs(_ urls: [URL]) -> [File]
-    {
-        var files: [File] = []
-       
-        for url in urls {
-            // now filter a list of all the files under the dir
-            if url.hasDirectoryPath {
-                // list out all matching files
-                // also these [.skipsHiddenFiles, .skipsSubdirectoryDescendants]
-                
-                // recurse into directory
-                let directoryEnumerator = FileManager.default.enumerator(
-                    at: url,
-                    includingPropertiesForKeys: nil
-                    // options: [.skipsHiddenFiles]
-                )
-                
-                while let fileURL = directoryEnumerator?.nextObject() as? URL {
-                    let isSupported = isSupportedFilename(fileURL)
-                    if isSupported {
-                        let isArchive  = File.filenameToContainerType(url) == .Archive
-                        if isArchive {
-                           files += listFilesFromArchive(fileURL)
-                        }
-                        else {
-                            files.append(lookupFile(url:fileURL));
-                        }
-                    }
-                }
-            }
-            else if url.isFileURL {
-                let isSupported = isSupportedFilename(url)
-                if isSupported {
-                    let isArchive = File.filenameToContainerType(url) == .Archive
-                    if isArchive {
-                        files += listFilesFromArchive(url)
-                    }
-                    else {
-                        files.append(lookupFile(url:url))
-                    }
-                }
-            }
-        }
-    
-        return files
-    }
-    
+   
     // What is used when Inter isn't installed.  Can this be bundled?
     //let customFont = Font.custom("Inter Variable", size: 14)
                 
@@ -1547,8 +1258,8 @@ A tool to help profile mem, perf, and builds.
     let fileTypes: [UTType] = [
         // This is what macOS generates when doing "compress file".  But could be archive.
         // These are 12x smaller often times.  Decompression lib only handles zlib.
-        // TODO: need zip archive util to extract the 1+ files, can still use libCompression to decompress
-        // .zip,
+        // DONE: need zip archive util to extract the 1+ files, can still use libCompression to decompress
+        .zip,
        
         // Perfetto can only open gzip and not zip yet
         // These are 12x smaller often times
@@ -1591,6 +1302,7 @@ A tool to help profile mem, perf, and builds.
     // about hideSideBar
     // https://github.com/google/perfetto/issues/716
     // Allocating here only works for a single Window, not for WindowGroup
+    // WindowGroup is supposed to make new state, but doesn't.
     @State var myWebView = newWebView(request: URLRequest(url:URL(string: ORIGIN + "/?hideSidebar=true")!))
     
     // Focus state says which panel has keyboard routing
@@ -1683,6 +1395,7 @@ A tool to help profile mem, perf, and builds.
         // TODO: WindowGroup brings up old windows which isn't really what I want
     
         Window("Main", id: "main") {
+        //WindowGroup() {
             NavigationSplitView() {
                 VStack {
                     List(fileSearcher.filesSearched, selection:$selection) { file in
@@ -1858,7 +1571,9 @@ A tool to help profile mem, perf, and builds.
                 // must call through NSWindow
                 Button("See Below") {
                     // Window isn't set in AppDelegate, so menu item is skipped.
-                    // But add fn+F menu item into app.  Suo many stupid hacks.
+                    // But add fn+F menu item into app.  So many stupid hacks.
+                    // Can also go NSApp.windows.first, but not good w/multiple windows.
+                    
                     appDelegate.window?.toggleFullScreen(nil)
                 }
             }
diff --git a/libkram/kram/KramZipHelper.cpp b/libkram/kram/KramZipHelper.cpp
index 1c8208b7..51e0b927 100644
--- a/libkram/kram/KramZipHelper.cpp
+++ b/libkram/kram/KramZipHelper.cpp
@@ -1,11 +1,14 @@
 #include "KramZipHelper.h"
 
-// // for copy_if on Win#include <algorithm>
+//#include <algorithm>
 //#include <iterator> // for copy_if on Win
 //#include <vector>
 
 #include "miniz.h"
 
+// test for perf of this compared to one in miniz also see 
+// comments about faster algs.
+// libcompress can only encode lvl 5, but here it's only decompress.
 #ifndef USE_LIBCOMPRESSION
 #define USE_LIBCOMPRESSION (KRAM_MAC || KRAM_IOS)
 #endif
@@ -114,7 +117,9 @@ void ZipHelper::initZipEntryTables()
         zipEntry.uncompressedSize = stat.m_uncomp_size;
         zipEntry.compressedSize = stat.m_comp_size;
         zipEntry.modificationDate = (int32_t)stat.m_time;  // really a time_t
-
+#undef crc32
+        zipEntry.crc32 = stat.m_crc32;
+        
         // TODO: stat.m_time, state.m_crc32
 
         index++;
@@ -168,6 +173,20 @@ bool ZipHelper::extract(const char* filename, vector<uint8_t>& buffer) const
     return true;
 }
 
+bool ZipHelper::extract(const char* filename, uint8_t* bufferData, uint64_t bufferDataSize) const
+{
+    auto entry = zipEntry(filename);
+    if (!entry) {
+        return false;
+    }
+
+    if (!extract(*entry, bufferData, bufferDataSize)) {
+        return false;
+    }
+
+    return true;
+}
+
 bool ZipHelper::extractPartial(const char* filename, vector<uint8_t>& buffer) const
 {
     if (buffer.empty()) {
diff --git a/libkram/kram/KramZipHelper.h b/libkram/kram/KramZipHelper.h
index 7942fc1d..4e07af35 100644
--- a/libkram/kram/KramZipHelper.h
+++ b/libkram/kram/KramZipHelper.h
@@ -4,7 +4,6 @@
 
 //#include <memory>
 //#include <vector>
-//#include <unordered_map>
 
 // from miniz
 // had to change miniz from anonymous struct typedef, or can't fwd declare
@@ -23,6 +22,7 @@ struct ZipEntry {
     uint64_t uncompressedSize;
     uint64_t compressedSize;
     int32_t modificationDate;
+    uint32_t crc32;
 };
 
 // this does very fast zip archive reading via miniz and mmap
@@ -42,9 +42,12 @@ struct ZipHelper {
     // since an iterator is called once to extract data
     bool extractPartial(const char* filename, vector<uint8_t>& buffer) const;
 
-    // must read the entire contents
+    // must read the entire contents (resizes vector)
     bool extract(const char* filename, vector<uint8_t>& buffer) const;
 
+    // must read the entire contents
+    bool extract(const char* filename, uint8_t* bufferData, uint64_t bufferDataSize) const;
+
     // uncompressed content in the archive like ktx2 files can be aliased directly
     // while referencing this data, don't close mmap() since bufferData is offset into that
     bool extractRaw(const char* filename, const uint8_t** bufferData, uint64_t& bufferDataSize) const;

From 6c36e1baed330318a79c4515628654020266f546 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 14 Mar 2024 23:50:10 -0700
Subject: [PATCH 641/901] kram - fix inclusion of libCompression

---
 kram-profile/Source/KramZipHelper.cpp | 2 +-
 kram-profile/Source/KramZipHelperW.mm | 9 ++++++++-
 kram-profile/kram-profile/File.swift  | 6 +++++-
 libkram/kram/KramZipHelper.cpp        | 2 +-
 4 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/kram-profile/Source/KramZipHelper.cpp b/kram-profile/Source/KramZipHelper.cpp
index ea053301..8a54e09a 100644
--- a/kram-profile/Source/KramZipHelper.cpp
+++ b/kram-profile/Source/KramZipHelper.cpp
@@ -14,7 +14,7 @@
 #define USE_LIBCOMPRESSION (KRAM_MAC || KRAM_IOS)
 #endif
 
-#ifdef USE_LIBCOMPRESSION
+#if USE_LIBCOMPRESSION
 #include <compression.h>
 #endif
 
diff --git a/kram-profile/Source/KramZipHelperW.mm b/kram-profile/Source/KramZipHelperW.mm
index 9a944527..4e5f3d3c 100644
--- a/kram-profile/Source/KramZipHelperW.mm
+++ b/kram-profile/Source/KramZipHelperW.mm
@@ -52,7 +52,14 @@ - (ZipEntryW)zipEntry:(NSInteger)index {
 - (ZipEntryW)zipEntryByName:(nonnull const char*)name {
     // TODO: fix to return a dummy type, since zips can be missing files
     // from one iteration to the next.
-    return *(const ZipEntryW*)_helper.zipEntry(name);
+    static ZipEntry nilEntry = {};
+    ZipEntry* entry = _helper.zipEntry(name);
+    if (entry) {
+        return *(const ZipEntryW*)entry;
+    }
+    else {
+        return (ZipEntryW&)nilEntry;
+    }
 }
 
 
diff --git a/kram-profile/kram-profile/File.swift b/kram-profile/kram-profile/File.swift
index f0220e1f..dae7d274 100644
--- a/kram-profile/kram-profile/File.swift
+++ b/kram-profile/kram-profile/File.swift
@@ -270,6 +270,10 @@ func lookupArchive(_ url: URL) -> Archive {
                 let oldEntry = archiveOld.archive!.zipEntry(byName: filename)
                 let newEntry = archive.archive!.zipEntry(byName: filename)
                 
+                if newEntry.filename == nil {
+                    // TODO: handle new archive missing the file
+                }
+                
                 // convert zip modStamp to Data object (only valid to seconds)
                 file.modStamp = Date(timeIntervalSince1970: Double(newEntry.modificationDate)) // TODO: may need to be TimeInterval?
                 
@@ -284,7 +288,7 @@ func lookupArchive(_ url: URL) -> Archive {
                     
                     file.duration = 0.0
                     
-                    // TODO: stil may need to point to new mmap to release the old
+                    // TODO: still may need to point to new mmap to release the old
                     // but don't need to reprocess and build data if crc is same
                     
                     // TODO: may need to release other calcs (f.e. duration, histogram, etc)
diff --git a/libkram/kram/KramZipHelper.cpp b/libkram/kram/KramZipHelper.cpp
index 51e0b927..d91b6a3f 100644
--- a/libkram/kram/KramZipHelper.cpp
+++ b/libkram/kram/KramZipHelper.cpp
@@ -13,7 +13,7 @@
 #define USE_LIBCOMPRESSION (KRAM_MAC || KRAM_IOS)
 #endif
 
-#ifdef USE_LIBCOMPRESSION
+#if USE_LIBCOMPRESSION
 #include <compression.h>
 #endif
 

From de06d294fa7572aa7d74c2ca43ce26f352fa4b14 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 15 Mar 2024 00:01:06 -0700
Subject: [PATCH 642/901] kram - fix zip helper

---
 kram-profile/Source/KramZipHelperW.mm | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kram-profile/Source/KramZipHelperW.mm b/kram-profile/Source/KramZipHelperW.mm
index 4e5f3d3c..6c3aaee5 100644
--- a/kram-profile/Source/KramZipHelperW.mm
+++ b/kram-profile/Source/KramZipHelperW.mm
@@ -52,13 +52,13 @@ - (ZipEntryW)zipEntry:(NSInteger)index {
 - (ZipEntryW)zipEntryByName:(nonnull const char*)name {
     // TODO: fix to return a dummy type, since zips can be missing files
     // from one iteration to the next.
-    static ZipEntry nilEntry = {};
-    ZipEntry* entry = _helper.zipEntry(name);
+    static ZipEntryW nilEntry = {};
+    const ZipEntry* entry = _helper.zipEntry(name);
     if (entry) {
         return *(const ZipEntryW*)entry;
     }
     else {
-        return (ZipEntryW&)nilEntry;
+        return nilEntry;
     }
 }
 

From 75e0845f3ab5e5ead3a40f7d92055d372dd43734 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 15 Mar 2024 19:19:58 -0700
Subject: [PATCH 643/901] kram-profile - fix archive, add build report

---
 kram-profile/Source/KramZipHelperW.mm         |   2 +-
 kram-profile/kram-profile/File.swift          |  20 ++-
 .../kram-profile/kram_profileApp.swift        | 158 +++++++++++++++++-
 3 files changed, 172 insertions(+), 8 deletions(-)

diff --git a/kram-profile/Source/KramZipHelperW.mm b/kram-profile/Source/KramZipHelperW.mm
index 6c3aaee5..d5e84e9a 100644
--- a/kram-profile/Source/KramZipHelperW.mm
+++ b/kram-profile/Source/KramZipHelperW.mm
@@ -52,7 +52,7 @@ - (ZipEntryW)zipEntry:(NSInteger)index {
 - (ZipEntryW)zipEntryByName:(nonnull const char*)name {
     // TODO: fix to return a dummy type, since zips can be missing files
     // from one iteration to the next.
-    static ZipEntryW nilEntry = {};
+    static ZipEntryW nilEntry = { "" };
     const ZipEntry* entry = _helper.zipEntry(name);
     if (entry) {
         return *(const ZipEntryW*)entry;
diff --git a/kram-profile/kram-profile/File.swift b/kram-profile/kram-profile/File.swift
index dae7d274..bf8393cc 100644
--- a/kram-profile/kram-profile/File.swift
+++ b/kram-profile/kram-profile/File.swift
@@ -41,6 +41,9 @@ class File: Identifiable, /*Hashable, */ Equatable, Comparable
     var modStamp: Date?
     var loadStamp: Date?
     
+    // This is only updated for Build fileType
+    var buildTimings: [String:BuildTiming] = [:]
+    
     // only available for memory file type right now
     var threadInfo = ""
     
@@ -149,7 +152,13 @@ func generateNavigationTitle(_ sel: String?) -> String {
     }
     
     let f = lookupFile(selection: sel!)
-    return generateDuration(file: f) + " " + f.name
+    var text = generateDuration(file: f) + " " + f.name
+    
+    if let fileArchive = f.archive {
+        text += "in (" + fileArchive.name + ")"
+    }
+    
+    return text
 }
 
 //-------------
@@ -270,7 +279,7 @@ func lookupArchive(_ url: URL) -> Archive {
                 let oldEntry = archiveOld.archive!.zipEntry(byName: filename)
                 let newEntry = archive.archive!.zipEntry(byName: filename)
                 
-                if newEntry.filename == nil {
+                if String(cString:newEntry.filename) == "" {
                     // TODO: handle new archive missing the file
                 }
                 
@@ -291,8 +300,9 @@ func lookupArchive(_ url: URL) -> Archive {
                     // TODO: still may need to point to new mmap to release the old
                     // but don't need to reprocess and build data if crc is same
                     
-                    // TODO: may need to release other calcs (f.e. duration, histogram, etc)
+                    // release other calcs (f.e. duration, histogram, etc)
                     // can point to new archive content here
+                    file.buildTimings.removeAll()
                 }
             }
         }
@@ -424,9 +434,11 @@ func listFilesFromURLs(_ urls: [URL]) -> [File]
             )
             
             while let fileURL = directoryEnumerator?.nextObject() as? URL {
+                if fileURL.hasDirectoryPath { continue }
+                
                 let isSupported = isSupportedFilename(fileURL)
                 if isSupported {
-                    let isArchive  = File.filenameToContainerType(url) == .Archive
+                    let isArchive  = File.filenameToContainerType(fileURL) == .Archive
                     if isArchive {
                        files += listFilesFromArchive(fileURL)
                     }
diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 12f4e430..2195f015 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -685,7 +685,141 @@ class ThreadInfo : Hashable, Equatable, Comparable {
     
 }
 
-// TODO: Hook this up, build more efficient array of thread events
+// Could also process each build timings in a threaded task.  That what CBA is doing.
+class BuildTiming {
+    var name = ""
+    var count = 0
+    var duration = 0
+    var type = ""
+    
+    func combine(_ duration: Int) {
+        self.duration += duration
+        self.count += 1
+    }
+}
+
+func updateFileBuildTimings(_ catapultProfile: CatapultProfile) -> [String:BuildTiming] {
+    var buildTimings: [String:BuildTiming] = [:]
+    
+    // TODO: would be nice to compute the self times.  This involves
+    // sorting the startTime on a track, then by largest duration on ties
+    // and then subtracting the immediate children.
+    // See what CBA and Perfetto do to establish this.
+    
+    // run through each file, and build a local map of name to size count
+    for i in 0..<catapultProfile.traceEvents!.count {
+        let event = catapultProfile.traceEvents![i]
+        
+        // TODO: may want to mark parsed vs. optimized
+        if  event.name == "Source" || // will be .h
+            event.name == "OptModule" // will be .c/.cpp
+        {
+            // This is a path
+            let sourceFile = event.args!["detail"]!.value as! String
+            
+            if let buildTiming = buildTimings[sourceFile] {
+                buildTiming.combine(event.dur!)
+            }
+            else {
+                let buildTiming = BuildTiming()
+                buildTiming.name = sourceFile
+                buildTiming.combine(event.dur!)
+                buildTiming.type = event.name!
+                
+                buildTimings[sourceFile] = buildTiming
+            }
+        }
+    }
+    
+    return buildTimings
+}
+
+func postBuildTimingsReport(files: [File]) -> String? {
+    let buildTimings = mergeFileBuildTimings(files: files)
+    let buildJsonBase64 = buildPerfettoJsonFromBuildTimings(buildTimings: buildTimings)
+    let buildJS = postLoadFileJS(fileContentBase64: buildJsonBase64, title: "BuildTimings")
+    return buildJS
+}
+
+func mergeFileBuildTimings(files: [File]) -> [String:BuildTiming] {
+    var buildTimings: [String:BuildTiming] = [:]
+    
+    // run through all files, and zip the maps together
+    // then turn that into build events that can be shown.
+    for file in files {
+        // merge and combine duplicates
+        buildTimings.merge(file.buildTimings) { (current, newValue) in
+            current.combine(newValue.duration)
+            return current
+        }
+    }
+    
+    return buildTimings
+}
+
+func buildPerfettoJsonFromBuildTimings(buildTimings: [String:BuildTiming]) -> String {
+    // now convert those timings back into a perfetto displayable report
+    // So just need to buid up the json above into events on tracks
+    var events: [CatapultEvent] = []
+
+    // Also sort or assign a sort_index to the tracks.  Sort biggest to smallest.
+    // Make the threadName for the track be the short filename.
+    
+    for buildTiming in buildTimings {
+        let t = buildTiming.value
+        
+        // TODO: could throw average time in too
+        let shortFilename = URL(string: buildTiming.key)!.lastPathComponent
+        
+        let dur = Double(t.duration) * 1e-6
+        var event = CatapultEvent()
+        event.name = "\(shortFilename) \(t.count)x \(float: dur, decimals:2)s"
+        event.ts = 0 // do we need this, just more data to encode
+        event.dur = t.duration
+        event.ph = "X"
+        event.tid = t.type == "Source" ? 0 : 1
+        events.append(event)
+    }
+    
+    // TODO: sort this by the duration
+    events.sort {
+        if $0.tid! != $1.tid! {
+            return $0.tid! < $1.tid!
+        }
+        
+        // has to be > to work as a single tid
+        if $0.dur != $1.dur! {
+            return $0.dur! > $1.dur!
+        }
+        
+        return $0.name! < $1.name!
+    }
+   
+    // assign thread id, may not need names or tid
+    // since Perfetto will just treat the events as subevents
+    
+    let catapultProfile = CatapultProfile(traceEvents: events)
+    
+    do {
+        // json encode, compress, and then base64 encode that
+        let encoder = JSONEncoder()
+        let fileContentFixed = try encoder.encode(catapultProfile)
+        
+        // gzip compress the data before sending it over
+        guard let compressedData = fileContentFixed.gzip() else { return "" }
+        let fileContentBase64 = compressedData.base64EncodedString()
+        
+        return fileContentBase64
+    }
+    catch {
+        log.error(error.localizedDescription)
+    }
+    
+    return ""
+}
+
+
+// TODO: Hook this up for memory traces, build more efficient array of thread events
 func sortThreadsByName(_ catapultProfile: inout CatapultProfile) {
     
     var threads: [Int: [Int]] = [:]
@@ -910,6 +1044,11 @@ func loadFileJS(_ path: String) -> String? {
                 return nil
             }
             else {
+                // Do this before the names are replaced below
+                if file.buildTimings.isEmpty && file.fileType == .Build {
+                    file.buildTimings = updateFileBuildTimings(catapultProfile)
+                }
+                
                 for i in 0..<catapultProfile.traceEvents!.count {
                     let event = catapultProfile.traceEvents![i]
                     if  event.name == "Source" ||
@@ -939,6 +1078,8 @@ func loadFileJS(_ path: String) -> String? {
                 
                 // walk the file and compute the duration if we don't already have ti
                 if file.duration == 0.0 {
+                    
+                    
                     updateDuration(catapultProfile, &file)
                     
                     // For now, just log the per-thread info
@@ -961,7 +1102,7 @@ func loadFileJS(_ path: String) -> String? {
             }
         }
         
-        return generateLoadFileJS(fileContentBase64: fileContentBase64, title:fileURL.lastPathComponent)
+        return postLoadFileJS(fileContentBase64: fileContentBase64, title:fileURL.lastPathComponent)
     }
     catch {
         log.error(error.localizedDescription)
@@ -969,7 +1110,7 @@ func loadFileJS(_ path: String) -> String? {
     }
 }
 
-func generateLoadFileJS(fileContentBase64: String, title: String) -> String?
+func postLoadFileJS(fileContentBase64: String, title: String) -> String?
 {
     do {
         // https://stackoverflow.com/questions/62035494/how-to-call-postmessage-in-wkwebview-to-js
@@ -1576,6 +1717,17 @@ A tool to help profile mem, perf, and builds.
                     
                     appDelegate.window?.toggleFullScreen(nil)
                 }
+                
+                // TODO: only enable if build files are present
+                // eventually don't run this on all, maybe find those related to selection
+                Button("Build Report") {
+                    // should this be on all or just those seached?
+                    let buildJS = postBuildTimingsReport(files: fileSearcher.filesSearched)
+                    if buildJS != nil {
+                        runJavascript(myWebView, buildJS!)
+                    }
+                }
+                .disabled(selection == nil)
             }
             
             CommandGroup(replacing: .appInfo) {

From 7451ad29bc5433eefd30d6e6b2a302cf6f2fd65a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 15 Mar 2024 21:29:00 -0700
Subject: [PATCH 644/901] kram-profile - improve report with count, thread
 names

Add back fullscreen menu too.  Can't add menus after that Button.
---
 .../kram-profile/kram_profileApp.swift        | 47 +++++++++++++++----
 1 file changed, 37 insertions(+), 10 deletions(-)

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 2195f015..fd1f91f8 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -765,6 +765,25 @@ func buildPerfettoJsonFromBuildTimings(buildTimings: [String:BuildTiming]) -> St
     // Also sort or assign a sort_index to the tracks.  Sort biggest to smallest.
     // Make the threadName for the track be the short filename.
     
+    // add the thread names, only using 3 threads
+    if true {
+        var event = CatapultEvent()
+        event.name = "thread_name"
+        let names = ["ParseTime", "ParseCount", "OptimizeTime"]
+        for i in 0..<3 {
+            event.args = [:]
+            event.args!["name"] = AnyCodable(names[i])
+            event.tid = i
+            event.ph = "M"
+            
+            // may not need these, but needed for sort below
+            event.ts = 0
+            event.dur = 0
+            
+            events.append(event)
+        }
+    }
+    
     for buildTiming in buildTimings {
         let t = buildTiming.value
         
@@ -777,8 +796,16 @@ func buildPerfettoJsonFromBuildTimings(buildTimings: [String:BuildTiming]) -> St
         event.ts = 0 // do we need this, just more data to encode
         event.dur = t.duration
         event.ph = "X"
-        event.tid = t.type == "Source" ? 0 : 1
+        let isHeader = t.type == "Source"
+        event.tid = isHeader ? 0 : 2
         events.append(event)
+        
+        // add count in seconds, so can view sorted by count below the duration above
+        if isHeader {
+            event.tid = 1
+            event.dur = t.count * 10000 // in 0.1 secs, but high counts dominate the range then
+            events.append(event)
+        }
     }
     
     // TODO: sort this by the duration
@@ -1709,15 +1736,6 @@ A tool to help profile mem, perf, and builds.
             // https://forums.developer.apple.com/forums/thread/740591
             
             CommandGroup(after: .toolbar) {
-                // must call through NSWindow
-                Button("See Below") {
-                    // Window isn't set in AppDelegate, so menu item is skipped.
-                    // But add fn+F menu item into app.  So many stupid hacks.
-                    // Can also go NSApp.windows.first, but not good w/multiple windows.
-                    
-                    appDelegate.window?.toggleFullScreen(nil)
-                }
-                
                 // TODO: only enable if build files are present
                 // eventually don't run this on all, maybe find those related to selection
                 Button("Build Report") {
@@ -1728,6 +1746,15 @@ A tool to help profile mem, perf, and builds.
                     }
                 }
                 .disabled(selection == nil)
+                
+                // must call through NSWindow
+                Button("See Below") {
+                    // Window isn't set in AppDelegate, so menu item is skipped.
+                    // But add fn+F menu item into app.  So many stupid hacks.
+                    // Can also go NSApp.windows.first, but not good w/multiple windows.
+                    
+                    appDelegate.window?.toggleFullScreen(nil)
+                }
             }
             
             CommandGroup(replacing: .appInfo) {

From 50dfba7e8ec1d7bb30a288f7042bed96ad06fb16 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 16 Mar 2024 13:47:03 -0700
Subject: [PATCH 645/901] kram-profile - demangle backend names, remove
 updateFileCache, store totalFronted/Backend times

---
 kram-profile/Source/KramZipHelper.cpp         |  41 ++++++
 kram-profile/Source/KramZipHelperW.h          |   6 +
 kram-profile/kram-profile/File.swift          |  13 +-
 .../kram-profile/kram_profileApp.swift        | 134 ++++++++++++++----
 4 files changed, 163 insertions(+), 31 deletions(-)

diff --git a/kram-profile/Source/KramZipHelper.cpp b/kram-profile/Source/KramZipHelper.cpp
index 8a54e09a..42c175e9 100644
--- a/kram-profile/Source/KramZipHelper.cpp
+++ b/kram-profile/Source/KramZipHelper.cpp
@@ -18,6 +18,47 @@
 #include <compression.h>
 #endif
 
+// Throwing this in for now, since it's the only .cpp file
+#if KRAM_MAC || KRAM_IOS
+#include <cxxabi.h> // demangle
+#include <unordered_map>
+#include <mutex>
+#endif
+
+extern "C" const char* _Nonnull demangleSymbolName(const char* _Nonnull symbolName_) {
+    using namespace NAMESPACE_STL;
+    
+    // serialize to multiple threads
+    static mutex sMutex;
+    static unordered_map<string, const char*> sSymbolToDemangleName;
+    lock_guard<mutex> lock(sMutex);
+    
+    string symbolName(symbolName_);
+    auto it = sSymbolToDemangleName.find(symbolName);
+    if (it != sSymbolToDemangleName.end()) {
+        return it->second;
+    }
+    
+    // see CBA if want a generalized demangle for Win/Linux
+    size_t size = 0;
+    int status = 0;
+    char* symbol = abi::__cxa_demangle(symbolName.c_str(), nullptr, &size, &status);
+    const char* result = nullptr;
+    if (status == 0) {
+        
+        sSymbolToDemangleName[symbolName] = symbol;
+        result = symbol;
+        // not freeing the symbols here
+        //free(symbol);
+    }
+    else {
+        // This will do repeated demangle though.  Maybe should add to table?
+        result = symbolName_;
+    }
+    
+    return result;
+}
+
 namespace kram {
 using namespace NAMESPACE_STL;
 
diff --git a/kram-profile/Source/KramZipHelperW.h b/kram-profile/Source/KramZipHelperW.h
index 2fca76f6..84baad85 100644
--- a/kram-profile/Source/KramZipHelperW.h
+++ b/kram-profile/Source/KramZipHelperW.h
@@ -35,3 +35,9 @@ typedef struct ZipEntryW {
 
 @end
 
+// This is only needed for OptFunction and backend names
+// Can't prepend extern "C" onto the call.
+extern "C" {
+const char* _Nonnull demangleSymbolName(const char* _Nonnull symbolName_);
+}
+
diff --git a/kram-profile/kram-profile/File.swift b/kram-profile/kram-profile/File.swift
index bf8393cc..ba24ea2c 100644
--- a/kram-profile/kram-profile/File.swift
+++ b/kram-profile/kram-profile/File.swift
@@ -35,7 +35,7 @@ class File: Identifiable, /*Hashable, */ Equatable, Comparable
     let containerType: ContainerType
     var archive: Archive?
     
-    var duration = 0.0
+    var duration = 0.0 // in seconds
     
     var fileContent: Data?
     var modStamp: Date?
@@ -43,6 +43,8 @@ class File: Identifiable, /*Hashable, */ Equatable, Comparable
     
     // This is only updated for Build fileType
     var buildTimings: [String:BuildTiming] = [:]
+    var totalFrontend = 0 // in micros
+    var totalBackend = 0
     
     // only available for memory file type right now
     var threadInfo = ""
@@ -155,7 +157,7 @@ func generateNavigationTitle(_ sel: String?) -> String {
     var text = generateDuration(file: f) + " " + f.name
     
     if let fileArchive = f.archive {
-        text += "in (" + fileArchive.name + ")"
+        text += " in (" + fileArchive.name + ")"
     }
     
     return text
@@ -195,11 +197,6 @@ func lookupFile(selection: String) -> File {
     return lookupFile(url:URL(string:selection)!)
 }
 
-// This one won't be one in the list, though
-func updateFileCache(file: File) {
-    fileCache[file.url] = file
-}
-
 //-------------
 
 class Archive: Identifiable, /*Hashable, */ Equatable, Comparable {
@@ -303,6 +300,8 @@ func lookupArchive(_ url: URL) -> Archive {
                     // release other calcs (f.e. duration, histogram, etc)
                     // can point to new archive content here
                     file.buildTimings.removeAll()
+                    file.totalFrontend = 0
+                    file.totalBackend = 0
                 }
             }
         }
diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index fd1f91f8..b4d85db2 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -31,18 +31,33 @@ import UniformTypeIdentifiers
 //   then can focus on the bigger values.
 // TODO: Sort by name and convert to count - can then see common counts
 //   so keep the json loaded in Swift.  Can json be cloned and modded?
-// TODO: option to colesce to count and name with sort
+// TODO: option to coalesce to count and name with sort
 
 // Build traces
 // TODO: parse totals from build traces, what CBA is doing
 // TODO: present total time, and % of total in the nav panel
 
-// TODO: import zip, and run cba on contents, mmap and decompress each
-//  can use incremental mode?
-// TODO: can't mmap web link, but can load zip off server with timings
+// Perf traces
+// TODO: ...
+
+// TODO: track kram-profile memory use, jettison Data that isn't needed after have built up timings.
+// can re-decompress from zip mmap.
+
+// TODO: background process to compute duration and buildTimings across all files
+//   how to refresh the list as these are updated.  Use Swift Task?  Or could do on C++ side with TaskSystem.
+
+// DONE: import zip
 // DONE: add/update recent document list (need to hold onto dropped/opened folder)
-// TODO: save/load the duration and modstamps for File, and any other metadata (totals per section)
-// TODO: add jump to source, but path would need to be correct (sandbox block?)
+// DONE: can't mmap web link, but can load zip off server with timings
+
+// TODO: run cba on files, mmap and decompress each can use incremental mode?
+// TODO: save/load the duration and modstamps for File at quit, and any other metadata (totals per section)
+// TODO: add jump to source/header, but path would need to be correct (sandbox block?)
+
+// Build traces
+// TODO: OptFunction needs demangled.  All backend strings are still mangled.
+//  Don’t need the library CBA uses just use api::__cxa_demangle() on macOS.
+//  https://github.com/llvm/llvm-project/issues/459
 
 // TODO: across all files, many of the strings are the same.  Could replace all strings
 // with an index, compress, and zip archive with the index table.  buid, perf, mem strings
@@ -78,14 +93,14 @@ import UniformTypeIdentifiers
 
 // 4-bit, 12-bit, 16-bit, variable, pad to 4B
 
-// TODO: recent documents list doesn't survive relaunch, but only when app is rebuilt
+// DONE: recent documents list doesn't survive relaunch, but only when app is rebuilt
 // but still kind of annoying for development
 
 // DONE: have a way to reload dropped folder
 // DONE: track parent archives, folder, and loose drop files
 // and when reload is hit, then reload all of that rebuild the list
 // and then reload the selected file
-// TODO: zipHelper to deal with archives, can use Swift Data to mmap content if needed
+// DONE: zipHelper to deal with archives, can use Swift Data to mmap content if needed
 //   mmap the zip, list out the files and locations, and then defalte the content somewhere
 //   only then can data be handed off toe Pefertto or CBA.  And CBA needs all files.
 //   Maybe extend CBA to read a zip file.  Can just use ZipHelper.
@@ -706,6 +721,9 @@ func updateFileBuildTimings(_ catapultProfile: CatapultProfile) -> [String:Build
     // and then subtracting the immediate children.
     // See what CBA and Perfetto do to establish this.
     
+    // Would be good to establish this nesting once and store the level
+    // with each event.d
+    
     // run through each file, and build a local map of name to size count
     for i in 0..<catapultProfile.traceEvents!.count {
         let event = catapultProfile.traceEvents![i]
@@ -759,7 +777,7 @@ func mergeFileBuildTimings(files: [File]) -> [String:BuildTiming] {
 
 func buildPerfettoJsonFromBuildTimings(buildTimings: [String:BuildTiming]) -> String {
     // now convert those timings back into a perfetto displayable report
-    // So just need to buid up the json above into events on tracks
+    // So just need to build up the json above into events on tracks
     var events: [CatapultEvent] = []
 
     // Also sort or assign a sort_index to the tracks.  Sort biggest to smallest.
@@ -808,23 +826,27 @@ func buildPerfettoJsonFromBuildTimings(buildTimings: [String:BuildTiming]) -> St
         }
     }
     
-    // TODO: sort this by the duration
     events.sort {
+        // want threadnames first, could just prepend these to array?
+        if $0.ph! != $1.ph! {
+            return $0.ph! < $1.ph!
+        }
+        
+        // then thread id
         if $0.tid! != $1.tid! {
             return $0.tid! < $1.tid!
         }
         
+        // then duration
         // has to be > to work as a single tid
         if $0.dur != $1.dur! {
             return $0.dur! > $1.dur!
         }
         
+        // then name
         return $0.name! < $1.name!
     }
    
-    // assign thread id, may not need names or tid
-    // since Perfetto will just treat the events as subevents
-    
     let catapultProfile = CatapultProfile(traceEvents: events)
     
     do {
@@ -945,7 +967,6 @@ func updateThreadInfo(_ catapultProfile: CatapultProfile, _ file: inout File) {
     }
     
     file.threadInfo = text
-    updateFileCache(file: file)
 }
 
 func updateDuration(_ catapultProfile: CatapultProfile, _ file: inout File) {
@@ -967,8 +988,6 @@ func updateDuration(_ catapultProfile: CatapultProfile, _ file: inout File) {
     if startTime <= endTime {
         // for now assume micros
         file.duration = Double(endTime - startTime) * 1e-6
-        
-        updateFileCache(file: file)
     }
 }
 
@@ -1076,6 +1095,56 @@ func loadFileJS(_ path: String) -> String? {
                     file.buildTimings = updateFileBuildTimings(catapultProfile)
                 }
                 
+                /* These are types CBA is looking at.  It's not looking at any totals
+                   DebugType isn't in this.
+                 
+                if (StrEqual(name, "ExecuteCompiler"))
+                    event.type = BuildEventType::kCompiler;
+                else if (StrEqual(name, "Frontend"))
+                    event.type = BuildEventType::kFrontend;
+                else if (StrEqual(name, "Backend"))
+                    event.type = BuildEventType::kBackend;
+                else if (StrEqual(name, "Source"))
+                    event.type = BuildEventType::kParseFile;
+                else if (StrEqual(name, "ParseTemplate"))
+                    event.type = BuildEventType::kParseTemplate;
+                else if (StrEqual(name, "ParseClass"))
+                    event.type = BuildEventType::kParseClass;
+                else if (StrEqual(name, "InstantiateClass"))
+                    event.type = BuildEventType::kInstantiateClass;
+                else if (StrEqual(name, "InstantiateFunction"))
+                    event.type = BuildEventType::kInstantiateFunction;
+                else if (StrEqual(name, "OptModule"))
+                    event.type = BuildEventType::kOptModule;
+                else if (StrEqual(name, "OptFunction"))
+                    event.type = BuildEventType::kOptFunction;
+                 
+                // here are totals that are in the file
+                // Total ExecuteCompiler = Total Frontend + Total Backend
+                // 2 frontend blocks though,
+                //  1. Source, InstantiateFunction, CodeGenFunction, ...
+                //  2. CodeGenFunction, DebugType, and big gaps
+                //
+                // 1 backend block
+                //   OptModule
+                 
+                "Total ExecuteCompiler" <- important
+                "Total Frontend" <- important <- important
+                "Total InstantiateFunction"
+                "Total CodeGen Function"
+                "Total Backend"
+                "Total CodeGenPasses"
+                "Total OptModule" <- important
+                "Total OptFunction"
+                "Total RunPass"
+                "Total InstantiatePass"
+                "Total Source"
+                "Total ParseClass"
+                "Total DebugType"
+                "Total PerformPendingInstantiations"
+                "Total Optimizer"
+                */
+                
                 for i in 0..<catapultProfile.traceEvents!.count {
                     let event = catapultProfile.traceEvents![i]
                     if  event.name == "Source" ||
@@ -1097,16 +1166,34 @@ func loadFileJS(_ path: String) -> String? {
                             event.name == "CodeGen Function" ||
                             event.name == "RunPass"
                     {
-                        // This is a name
-                        let detail = event.args!["detail"]!.value as! String
-                        catapultProfile.traceEvents![i].name = detail
+                        // backend symbols need demangle
+                        let isDemangleNeeded = event.name == "OptFunction"
+                        
+                        if isDemangleNeeded {
+                            let detail = event.args!["detail"]!.value as! String
+                            let symbolName = String(cString: demangleSymbolName(detail))
+                            
+                            catapultProfile.traceEvents![i].name = symbolName
+                        }
+                        else {
+                            // This is a name
+                            let detail = event.args!["detail"]!.value as! String
+                            catapultProfile.traceEvents![i].name = detail
+                        }
+                    }
+                    
+                    // These aren't renamed but are useful data for report
+                    // and are already calculated.
+                    else if  event.name == "Total Backend" {
+                        file.totalBackend = event.dur!
+                    }
+                    else if  event.name == "Total Ffrontend" {
+                        file.totalFrontend = event.dur!
                     }
                 }
                 
-                // walk the file and compute the duration if we don't already have ti
+                // walk the file and compute the duration if we don't already have it
                 if file.duration == 0.0 {
-                    
-                    
                     updateDuration(catapultProfile, &file)
                     
                     // For now, just log the per-thread info
@@ -1341,7 +1428,7 @@ struct kram_profileApp: App {
     }
     
     // This isn't so valuable to open a file, but opening a referenced header from build
-    // would be.
+    // would be.  But would to respond to/retrieve selection in JS side.
     func openContainingFolder(_ str: String) {
         let url = URL(string: str)!
         NSWorkspace.shared.activateFileViewerSelecting([url]);
@@ -1382,7 +1469,6 @@ struct kram_profileApp: App {
                 
                 let file = lookupFile(selection: sel)
                 file.setLoadStamp()
-                updateFileCache(file: file)
             }
             
             // Want to be able to lock the scale of the

From e12834e41e81d2a01c68a3c89a340adff4be8ae8 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 16 Mar 2024 15:10:23 -0700
Subject: [PATCH 646/901] kram-profile - fix build, and work towards self times

---
 kram-profile/Source/KramZipHelperW.h          |  3 -
 .../kram-profile/kram_profileApp.swift        | 78 ++++++++++++++++++-
 2 files changed, 77 insertions(+), 4 deletions(-)

diff --git a/kram-profile/Source/KramZipHelperW.h b/kram-profile/Source/KramZipHelperW.h
index 84baad85..bb5659b7 100644
--- a/kram-profile/Source/KramZipHelperW.h
+++ b/kram-profile/Source/KramZipHelperW.h
@@ -36,8 +36,5 @@ typedef struct ZipEntryW {
 @end
 
 // This is only needed for OptFunction and backend names
-// Can't prepend extern "C" onto the call.
-extern "C" {
 const char* _Nonnull demangleSymbolName(const char* _Nonnull symbolName_);
-}
 
diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index b4d85db2..c71236be 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -26,6 +26,8 @@ import UniformTypeIdentifiers
 // DONE: be nice to focus the search input on cmd+F just to make me happy.  (using cmd+S)
 //  Browser goes to its own search which doesn’t help.
 
+// TODO: Hitting T to sort, changes the selection.  That shouldn't happen.
+
 // Memory traces
 // TODO: sort thread by size - repack the memory graph largest to smallest by reordering each track
 //   then can focus on the bigger values.
@@ -897,7 +899,7 @@ func sortThreadsByName(_ catapultProfile: inout CatapultProfile) {
         // then can group totals
         
         // sort each thread by name then dur
-        thread = thread.sorted {
+        thread.sort {
             let lval = catapultProfile.traceEvents![$0]
             let rval = catapultProfile.traceEvents![$1]
             
@@ -991,6 +993,80 @@ func updateDuration(_ catapultProfile: CatapultProfile, _ file: inout File) {
     }
 }
 
+func computeEventParents(_ catapultProfile: CatapultProfile) {
+    // see CBA FindParentChildrenIndices for inspiration here
+    let events = catapultProfile.traceEvents!
+    
+    var sortedIndices: [Int] = []
+    for i in 0..<events.count {
+        sortedIndices.append(i)
+    }
+    
+    sortedIndices.sort {
+        let e0 = events[$0]
+        let e1 = events[$1]
+        
+        // build events do have tid for the totals, but they only have 1 event
+        if e0.tid! != e1.tid! {
+            return e0.tid! < e1.tid!
+        }
+        if e0.ts! != e1.ts! {
+            return e0.ts! < e1.ts!
+        }
+        
+        // sort larger to smaller dur
+        if e0.dur! != e1.dur! {
+            return e0.ts! > e1.ts!
+        }
+        
+        // later events assumed to be parent with "X" events written out when done
+        return $0 > $1
+    }
+    
+    var parentIndices: [Int] = []
+    for _ in 0..<events.count {
+        parentIndices.append(-1)
+    }
+    
+    /*
+    // so now can look at the range of a node vs. next node
+    var root = 0;
+    CatapultEvent evRoot = events[sortedIndices[root]];
+    evRoot->parent.idx = -1;
+    for (int i = 1, n = (int)events.size(); i != n; ++i)
+    {
+        CatapultEvent ev2 = events[sortedIndices[i]];
+        while root != -1 {
+            
+            // add slice if within bounds
+            if ev2.ts >= evRoot.ts && ev2.ts+ev2.dur <= evRoot.ts+evRoot.dur {
+                ev2->parent.idx = root;
+                
+                // here can subtract the time from the parent duration
+                // but do it to temp, and then update duration later
+                
+                //evRoot->children.push_back(EventIndex(i));
+                
+                
+                break;
+            }
+
+            root = evRoot->parent.idx;
+            if (root != -1)
+                evRoot = &events[sortedIndices[root]];
+        }
+        if root == -1 {
+            ev2->parent.idx = -1;
+        }
+        
+        // move the root to the one we found
+        root = i;
+        evRoot = events[sortedIndices[i]];
+    }
+    */
+    
+}
+
 
 func loadFileJS(_ path: String) -> String? {
     

From b66390a67c133c6402322da1abf0a28a60b680e0 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 16 Mar 2024 19:06:40 -0700
Subject: [PATCH 647/901] kram-profile - add self time for builds to the Build
 Report

---
 kram-profile/Source/KramZipHelperW.mm         |   4 +-
 kram-profile/kram-profile/File.swift          |   5 +-
 .../kram-profile/kram_profileApp.swift        | 168 +++++++++++++-----
 3 files changed, 133 insertions(+), 44 deletions(-)

diff --git a/kram-profile/Source/KramZipHelperW.mm b/kram-profile/Source/KramZipHelperW.mm
index d5e84e9a..2ad4da7d 100644
--- a/kram-profile/Source/KramZipHelperW.mm
+++ b/kram-profile/Source/KramZipHelperW.mm
@@ -23,6 +23,7 @@ - (nullable NSData*)extract:(nonnull const char*)filename {
     
     bool isCompressed = entry->uncompressedSize != entry->compressedSize;
     if (isCompressed) {
+        // this allocates memory
         data = [NSMutableData dataWithLength:entry->uncompressedSize];
         _helper.extract(filename, (uint8_t*)data.bytes, data.length);
     }
@@ -30,6 +31,7 @@ - (nullable NSData*)extract:(nonnull const char*)filename {
         const uint8_t* bytes = nullptr;
         uint64_t bytesLength = 0;
         
+        // this aliases the archive
         _helper.extractRaw(filename, &bytes, bytesLength);
         data = [NSData dataWithBytesNoCopy:(void*)bytes length:bytesLength freeWhenDone:NO];
     }
@@ -50,7 +52,7 @@ - (ZipEntryW)zipEntry:(NSInteger)index {
 }
 
 - (ZipEntryW)zipEntryByName:(nonnull const char*)name {
-    // TODO: fix to return a dummy type, since zips can be missing files
+    // DONE: fix to return a dummy type, since zips can be missing files
     // from one iteration to the next.
     static ZipEntryW nilEntry = { "" };
     const ZipEntry* entry = _helper.zipEntry(name);
diff --git a/kram-profile/kram-profile/File.swift b/kram-profile/kram-profile/File.swift
index ba24ea2c..90defaf0 100644
--- a/kram-profile/kram-profile/File.swift
+++ b/kram-profile/kram-profile/File.swift
@@ -167,7 +167,6 @@ func generateNavigationTitle(_ sel: String?) -> String {
 // Note: if a file is deleted which happens often with builds,
 // then want to identify that and update the list.  At least
 // indicate the item is gone, and await its return.
-// Does macOS have a FileWatcher?
 
 // Holds supported files dropped or opened from Finder, reload reparses this
 var droppedFileCache : [URL] = []
@@ -188,6 +187,8 @@ func lookupFile(url: URL) -> File {
     }
     
     // This wipes the duration, so it can be recomputed
+    // TODO: may want to check crc32 if present before wiping all data
+    
     fileCache[file.url] = file
     
     return file
@@ -345,7 +346,7 @@ func isSupportedFilename(_ url: URL) -> Bool {
     let ext = url.pathExtension
     
     // what ext does trace.zip, or trace.gz come in as ?
-    // should this limit compressed files to the names supported below - json, trace, memtrace?
+    // should this limit compressed files to the names supported below
     
     if ext == "gz" {
         return true
diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index c71236be..ec8e9670 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -244,8 +244,21 @@ class FileSearcher: ObservableObject {
 extension String.StringInterpolation {
 
     /// Quick formatting for *floating point* values.
-    mutating func appendInterpolation(float: Double, decimals: UInt = 2) {
-        let floatDescription = String(format: "%.\(decimals)f%", float)
+    mutating func appendInterpolation(float: Float, decimals: UInt = 2, stripTrailingZeros: Bool = true) {
+        let floatDescription = String(format:"%.\(decimals)f%", float)
+//        if stripTrailingZeros && decimals > 0 {
+//            // https://stackoverflow.com/questions/29560743/swift-remove-trailing-zeros-from-double
+//            floatDescription = floatDescription.replacingOccurrences(of: "^([\\d,]+)$|^([\\d,]+)\\.0*$|^([\\d,]+\\.[0-9]*?)0*$", with: "$1$2$3", options: .regularExpression)
+//        }
+        appendLiteral(floatDescription)
+    }
+    
+    mutating func appendInterpolation(double: Double, decimals: UInt = 2, stripTrailingZeros: Bool = true) {
+        let floatDescription = String(format:"%.\(decimals)f%", double)
+//        if stripTrailingZeros && decimals > 0 {
+//            // https://stackoverflow.com/questions/29560743/swift-remove-trailing-zeros-from-double
+//            floatDescription = floatDescription.replacingOccurrences(of: "^([\\d,]+)$|^([\\d,]+)\\.0*$|^([\\d,]+\\.[0-9]*?)0*$", with: "$1$2$3", options: .regularExpression)
+//        }
         appendLiteral(floatDescription)
     }
 
@@ -628,6 +641,15 @@ struct CatapultEvent: Codable {
     // var tts: Int? - thread clock timestamp
     // var cname: String? - color name from table
     // Also can have stack frames
+    
+    // These are data computed from the events
+    var durSub: Int?
+    var parentIndex: Int?
+    
+    // only encode/decode some of the keys
+    enum CodingKeys: String, CodingKey {
+        case cat, pid, tid, ph, ts, dur, name, args
+    }
 }
 
 struct CatapultProfile: Codable {
@@ -693,10 +715,10 @@ class ThreadInfo : Hashable, Equatable, Comparable {
         
         // only display percentage if needed
         if percentage > 99.9 {
-            return "\(id) '\(threadName)' \(float: duration, decimals:6)s \(count)x"
+            return "\(id) '\(threadName)' \(double: duration, decimals:6)s \(count)x"
         }
         else {
-            return "\(id) '\(threadName)' \(float: duration, decimals:6)s \(float:percentage, decimals:0)% \(count)x"
+            return "\(id) '\(threadName)' \(double: duration, decimals:6)s \(double:percentage, decimals:0)% \(count)x"
         }
     }
     
@@ -707,10 +729,13 @@ class BuildTiming {
     var name = ""
     var count = 0
     var duration = 0
+    var durationSub = 0
+    var durationSelf: Int { return max(0, duration - durationSub) }
     var type = ""
     
-    func combine(_ duration: Int) {
+    func combine(_ duration: Int, _ durationSub: Int) {
         self.duration += duration
+        self.durationSub += durationSub
         self.count += 1
     }
 }
@@ -718,7 +743,7 @@ class BuildTiming {
 func updateFileBuildTimings(_ catapultProfile: CatapultProfile) -> [String:BuildTiming] {
     var buildTimings: [String:BuildTiming] = [:]
     
-    // TODO: would be nice to compute the self times.  This involves
+    // DONE: would be nice to compute the self times.  This involves
     // sorting the startTime on a track, then by largest duration on ties
     // and then subtracting the immediate children.
     // See what CBA and Perfetto do to establish this.
@@ -738,12 +763,12 @@ func updateFileBuildTimings(_ catapultProfile: CatapultProfile) -> [String:Build
             let sourceFile = event.args!["detail"]!.value as! String
             
             if let buildTiming = buildTimings[sourceFile] {
-                buildTiming.combine(event.dur!)
+                buildTiming.combine(event.dur!, event.durSub ?? 0)
             }
             else {
                 let buildTiming = BuildTiming()
                 buildTiming.name = sourceFile
-                buildTiming.combine(event.dur!)
+                buildTiming.combine(event.dur!, event.durSub ?? 0)
                 buildTiming.type = event.name!
                 
                 buildTimings[sourceFile] = buildTiming
@@ -769,7 +794,7 @@ func mergeFileBuildTimings(files: [File]) -> [String:BuildTiming] {
     for file in files {
         // merge and combine duplicates
         buildTimings.merge(file.buildTimings) { (current, newValue) in
-            current.combine(newValue.duration)
+            current.combine(newValue.duration, newValue.durationSub)
             return current
         }
     }
@@ -789,8 +814,8 @@ func buildPerfettoJsonFromBuildTimings(buildTimings: [String:BuildTiming]) -> St
     if true {
         var event = CatapultEvent()
         event.name = "thread_name"
-        let names = ["ParseTime", "ParseCount", "OptimizeTime"]
-        for i in 0..<3 {
+        let names = ["ParseTime", "ParseCount", "ParseSelf", "OptimizeTime"]
+        for i in 0..<names.count {
             event.args = [:]
             event.args!["name"] = AnyCodable(names[i])
             event.tid = i
@@ -812,19 +837,35 @@ func buildPerfettoJsonFromBuildTimings(buildTimings: [String:BuildTiming]) -> St
         
         let dur = Double(t.duration) * 1e-6
         var event = CatapultEvent()
-        event.name = "\(shortFilename) \(t.count)x \(float: dur, decimals:2)s"
+        event.name = "\(shortFilename) \(t.count)x \(double: dur, decimals:2)s"
         event.ts = 0 // do we need this, just more data to encode
         event.dur = t.duration
         event.ph = "X"
         let isHeader = t.type == "Source"
-        event.tid = isHeader ? 0 : 2
-        events.append(event)
         
         // add count in seconds, so can view sorted by count below the duration above
         if isHeader {
+            
+            // ParseTime
+            event.tid = 0
+            events.append(event)
+            
+            // ParseCount
             event.tid = 1
             event.dur = t.count * 10000 // in 0.1 secs, but high counts dominate the range then
             events.append(event)
+            
+            let selfTime = t.durationSelf
+            if selfTime > 0 {
+                // ParseSelf
+                event.tid = 2
+                event.dur = t.durationSelf
+                events.append(event)
+            }
+        }
+        else {
+            // OptimizeTime
+            event.tid = 3
         }
     }
     
@@ -993,9 +1034,16 @@ func updateDuration(_ catapultProfile: CatapultProfile, _ file: inout File) {
     }
 }
 
-func computeEventParents(_ catapultProfile: CatapultProfile) {
-    // see CBA FindParentChildrenIndices for inspiration here
-    let events = catapultProfile.traceEvents!
+// After calling this, can compute the self time, and have the parent hierarchy to draw
+// events as a flamegraph.
+func computeEventParentsAndDurSub(_ catapultProfile: inout CatapultProfile) {
+    // see CBA FindParentChildrenIndices for the adaption here
+    // Clang Build Analyzer https://github.com/aras-p/ClangBuildAnalyzer
+    // SPDX-License-Identifier: Unlicense
+    // https://github.com/aras-p/ClangBuildAnalyzer/blob/main/src/Analysis.cpp
+    
+    // copy the events, going to replace this array with more data
+    var events = catapultProfile.traceEvents!
     
     var sortedIndices: [Int] = []
     for i in 0..<events.count {
@@ -1006,6 +1054,15 @@ func computeEventParents(_ catapultProfile: CatapultProfile) {
         let e0 = events[$0]
         let e1 = events[$1]
         
+        if e0.ph! != e1.ph! {
+            return e0.ph! < e1.ph!
+        }
+        
+        // for thread names, just sort by tid
+        if e0.ph! == "M" {
+            return e0.tid! < e1.tid!
+        }
+        
         // build events do have tid for the totals, but they only have 1 event
         if e0.tid! != e1.tid! {
             return e0.tid! < e1.tid!
@@ -1023,48 +1080,68 @@ func computeEventParents(_ catapultProfile: CatapultProfile) {
         return $0 > $1
     }
     
-    var parentIndices: [Int] = []
-    for _ in 0..<events.count {
-        parentIndices.append(-1)
-    }
-    
-    /*
     // so now can look at the range of a node vs. next node
     var root = 0;
-    CatapultEvent evRoot = events[sortedIndices[root]];
-    evRoot->parent.idx = -1;
-    for (int i = 1, n = (int)events.size(); i != n; ++i)
-    {
-        CatapultEvent ev2 = events[sortedIndices[i]];
+    
+    // skip the thread names
+    while events[sortedIndices[root]].ph! == "M" {
+        root += 1
+    }
+    
+    var evRootIndex = sortedIndices[root]
+    var evRoot = events[evRootIndex];
+    events[evRootIndex].parentIndex = Int(-1)
+    
+    for i in (root+1)..<events.count {
+        let ev2Index = sortedIndices[i]
+        let ev2 = events[ev2Index]
+        
+        // This only works on a per thread basis, but build events only have a single thread
+        // annoying that it's not tid == 0.  So this is the build event specific code.  Would
+        // need to run this logic for each unique tid in the events array.
+        // TODO: generalize so can use on perf effects.  Memory doesn't need.
+        let kBuildEventTid = 259
+        if ev2.tid != kBuildEventTid { continue }
+        
+        // walk up the stack of parents, to find one that this event is within
         while root != -1 {
             
             // add slice if within bounds
-            if ev2.ts >= evRoot.ts && ev2.ts+ev2.dur <= evRoot.ts+evRoot.dur {
-                ev2->parent.idx = root;
-                
-                // here can subtract the time from the parent duration
-                // but do it to temp, and then update duration later
+            if ev2.ts! >= evRoot.ts! && ev2.ts!+ev2.dur! <= evRoot.ts!+evRoot.dur! {
+                events[ev2Index].parentIndex = Int(root)
                 
-                //evRoot->children.push_back(EventIndex(i));
+                // Could store children for full hierarchy, but don't need this
+                //evRoot.children.append(evt2Index);
                 
+                // All flamegraph really needs is for events to store a level
+                // of how deep they are on a given thread.  Having to make this up is kinda costly.
                 
+                // Can create selfTime by subtracting durations of all children
+                if events[evRootIndex].durSub == nil {
+                    events[evRootIndex].durSub = Int() // 0
+                }
+                events[evRootIndex].durSub! += ev2.dur!
                 break;
             }
 
-            root = evRoot->parent.idx;
-            if (root != -1)
-                evRoot = &events[sortedIndices[root]];
+            // walk up to the next parent
+            root = evRoot.parentIndex!
+            if root != -1 {
+                evRootIndex = sortedIndices[root]
+                evRoot = events[evRootIndex]
+            }
         }
         if root == -1 {
-            ev2->parent.idx = -1;
+            events[ev2Index].parentIndex = -1
         }
         
         // move the root to the one we found
         root = i;
-        evRoot = events[sortedIndices[i]];
+        evRootIndex = sortedIndices[i]
+        evRoot = events[evRootIndex]
     }
-    */
     
+    catapultProfile.traceEvents = events
 }
 
 
@@ -1124,13 +1201,19 @@ func loadFileJS(_ path: String) -> String? {
                     if catapultProfile.traceEvents == nil {
                         return nil
                     }
-                    
+            
                     updateDuration(catapultProfile, &file)
                     
                     // For now, just log the per-thread info
                     if file.fileType == .Memory {
                         updateThreadInfo(catapultProfile, &file)
                     }
+                    
+                    // This mods the catapult profile to store parentIndex and durSub
+                    // the call has build specific code right now
+                    //else if file.fileType == .Perf {
+                    //    computeEventParentsAndDurSub(&catapultProfile)
+                    //}
                 }
             }
         }
@@ -1168,6 +1251,9 @@ func loadFileJS(_ path: String) -> String? {
             else {
                 // Do this before the names are replaced below
                 if file.buildTimings.isEmpty && file.fileType == .Build {
+                    // update the durSub in the events, can then track self time
+                    computeEventParentsAndDurSub(&catapultProfile)
+                    
                     file.buildTimings = updateFileBuildTimings(catapultProfile)
                 }
                 

From 2a093f025bb370a7e9aefe3a3dd10e9eed46bad2 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 16 Mar 2024 21:36:21 -0700
Subject: [PATCH 648/901] kram-profile - add task to compute all build timings

 This saves visiting every file manually.  Could do same for duration and threadInfo on memory, and duration on perf traces.
---
 .../kram-profile/kram_profileApp.swift        | 337 ++++++++++--------
 1 file changed, 197 insertions(+), 140 deletions(-)

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index ec8e9670..631e913a 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -36,18 +36,17 @@ import UniformTypeIdentifiers
 // TODO: option to coalesce to count and name with sort
 
 // Build traces
+// DONE: build hierarchy and self times
+// DONE: background process to compute buildTimings across all files
 // TODO: parse totals from build traces, what CBA is doing
 // TODO: present total time, and % of total in the nav panel
 
 // Perf traces
-// TODO: ...
+// TODO: build per-thread hierarchy and self times
 
 // TODO: track kram-profile memory use, jettison Data that isn't needed after have built up timings.
 // can re-decompress from zip mmap.
 
-// TODO: background process to compute duration and buildTimings across all files
-//   how to refresh the list as these are updated.  Use Swift Task?  Or could do on C++ side with TaskSystem.
-
 // DONE: import zip
 // DONE: add/update recent document list (need to hold onto dropped/opened folder)
 // DONE: can't mmap web link, but can load zip off server with timings
@@ -57,7 +56,7 @@ import UniformTypeIdentifiers
 // TODO: add jump to source/header, but path would need to be correct (sandbox block?)
 
 // Build traces
-// TODO: OptFunction needs demangled.  All backend strings are still mangled.
+// DONE: OptFunction needs demangled.  All backend strings are still mangled.
 //  Don’t need the library CBA uses just use api::__cxa_demangle() on macOS.
 //  https://github.com/llvm/llvm-project/issues/459
 
@@ -740,7 +739,7 @@ class BuildTiming {
     }
 }
 
-func updateFileBuildTimings(_ catapultProfile: CatapultProfile) -> [String:BuildTiming] {
+func updateFileBuildTimings(_ events: [CatapultEvent]) -> [String:BuildTiming] {
     var buildTimings: [String:BuildTiming] = [:]
     
     // DONE: would be nice to compute the self times.  This involves
@@ -752,8 +751,8 @@ func updateFileBuildTimings(_ catapultProfile: CatapultProfile) -> [String:Build
     // with each event.d
     
     // run through each file, and build a local map of name to size count
-    for i in 0..<catapultProfile.traceEvents!.count {
-        let event = catapultProfile.traceEvents![i]
+    for i in 0..<events.count {
+        let event = events[i]
         
         // TODO: may want to mark parsed vs. optimized
         if  event.name == "Source" || // will be .h
@@ -1012,12 +1011,12 @@ func updateThreadInfo(_ catapultProfile: CatapultProfile, _ file: inout File) {
     file.threadInfo = text
 }
 
-func updateDuration(_ catapultProfile: CatapultProfile, _ file: inout File) {
+func updateDuration(_ events: [CatapultEvent]) -> Double {
     var startTime = Int.max
     var endTime = Int.min
     
-    for i in 0..<catapultProfile.traceEvents!.count {
-        let event = catapultProfile.traceEvents![i]
+    for i in 0..<events.count {
+        let event = events[i]
         
         if event.ts != nil && event.dur != nil {
             let s = event.ts!
@@ -1028,22 +1027,24 @@ func updateDuration(_ catapultProfile: CatapultProfile, _ file: inout File) {
         }
     }
     
+    var duration = 0.0
     if startTime <= endTime {
         // for now assume micros
-        file.duration = Double(endTime - startTime) * 1e-6
+        duration = Double(endTime - startTime) * 1e-6
     }
+    return duration
 }
 
 // After calling this, can compute the self time, and have the parent hierarchy to draw
 // events as a flamegraph.
-func computeEventParentsAndDurSub(_ catapultProfile: inout CatapultProfile) {
+func computeEventParentsAndDurSub(_ events: inout [CatapultEvent]) {
     // see CBA FindParentChildrenIndices for the adaption here
     // Clang Build Analyzer https://github.com/aras-p/ClangBuildAnalyzer
     // SPDX-License-Identifier: Unlicense
     // https://github.com/aras-p/ClangBuildAnalyzer/blob/main/src/Analysis.cpp
     
     // copy the events, going to replace this array with more data
-    var events = catapultProfile.traceEvents!
+    //var events = catapultProfile.traceEvents!
     
     var sortedIndices: [Int] = []
     for i in 0..<events.count {
@@ -1141,9 +1142,145 @@ func computeEventParentsAndDurSub(_ catapultProfile: inout CatapultProfile) {
         evRoot = events[evRootIndex]
     }
     
-    catapultProfile.traceEvents = events
+    //catapultProfile.traceEvents = events
 }
 
+// Fire this off any time the list changes and there
+// are build events in it.  This will update the data within,
+// so that the user doesn't have to visit every file manually.
+
+func updateBuildTimingsTask(_ files: [File]) {
+    let _ = Task(priority: .medium, operation: {
+        for file in files {
+            if file.fileType == .Build {
+                do {
+                    try updateBuildTimingTask(file)
+                }
+                catch {
+                    log.error(error.localizedDescription)
+                }
+            }
+        }
+    })
+}
+    
+func updateBuildTimingTask(_ file: File) throws {
+    assert(file.fileType == .Build)
+    
+    // only checking this, and not duration == 0
+    if !file.buildTimings.isEmpty { return }
+    
+    let fileContent = loadFileContent(file)
+   
+    var json : Data
+    
+    if file.containerType == .Compressed {
+        guard let unzippedContent = fileContent.gunzip() else {
+            return
+        }
+        json = unzippedContent
+    }
+    else if file.containerType == .Archive {
+        // this has already been extracted and decrypted
+        json = fileContent
+    }
+    else {
+        json = fileContent
+    }
+    
+    let decoder = JSONDecoder()
+    let catapultProfile = try decoder.decode(CatapultProfile.self, from: json)
+    if catapultProfile.traceEvents == nil { // an array
+        return
+    }
+
+    var events = catapultProfile.traceEvents!
+    
+    // demangle the OptFunction name
+    for i in 0..<events.count {
+        let event = events[i]
+        if event.name == "OptFunction" {
+            let detail = event.args!["detail"]!.value as! String
+            let symbolName = String(cString: demangleSymbolName(detail))
+            
+            events[i].args!["detail"] = AnyCodable(symbolName)
+        }
+    }
+    
+    // walk the file and compute the duration if we don't already have it
+    if file.duration == 0.0 {
+        file.duration = updateDuration(events)
+    }
+    
+    // Do this before the names are replaced below
+    if file.buildTimings.isEmpty {
+        // useful totals to track, many more in the files
+        for i in 0..<events.count {
+            let event = events[i]
+            if event.name == "Total Backend" {
+                file.totalBackend = event.dur!
+            }
+            else if event.name == "Total Frontend" {
+                file.totalFrontend = event.dur!
+            }
+        }
+        
+        // update the durSub in the events, can then track self time
+        computeEventParentsAndDurSub(&events)
+        
+        file.buildTimings = updateFileBuildTimings(events)
+    }
+    
+    /* These are types CBA is looking at.  It's not looking at any totals
+     DebugType isn't in this.
+     
+     if (StrEqual(name, "ExecuteCompiler"))
+     event.type = BuildEventType::kCompiler;
+     else if (StrEqual(name, "Frontend"))
+     event.type = BuildEventType::kFrontend;
+     else if (StrEqual(name, "Backend"))
+     event.type = BuildEventType::kBackend;
+     else if (StrEqual(name, "Source"))
+     event.type = BuildEventType::kParseFile;
+     else if (StrEqual(name, "ParseTemplate"))
+     event.type = BuildEventType::kParseTemplate;
+     else if (StrEqual(name, "ParseClass"))
+     event.type = BuildEventType::kParseClass;
+     else if (StrEqual(name, "InstantiateClass"))
+     event.type = BuildEventType::kInstantiateClass;
+     else if (StrEqual(name, "InstantiateFunction"))
+     event.type = BuildEventType::kInstantiateFunction;
+     else if (StrEqual(name, "OptModule"))
+     event.type = BuildEventType::kOptModule;
+     else if (StrEqual(name, "OptFunction"))
+     event.type = BuildEventType::kOptFunction;
+     
+     // here are totals that are in the file
+     // Total ExecuteCompiler = Total Frontend + Total Backend
+     // 2 frontend blocks though,
+     //  1. Source, InstantiateFunction, CodeGenFunction, ...
+     //  2. CodeGenFunction, DebugType, and big gaps
+     //
+     // 1 backend block
+     //   OptModule
+     
+     "Total ExecuteCompiler" <- important
+     "Total Frontend" <- important <- important
+     "Total InstantiateFunction"
+     "Total CodeGen Function"
+     "Total Backend"
+     "Total CodeGenPasses"
+     "Total OptModule" <- important
+     "Total OptFunction"
+     "Total RunPass"
+     "Total InstantiatePass"
+     "Total Source"
+     "Total ParseClass"
+     "Total DebugType"
+     "Total PerformPendingInstantiations"
+     "Total Optimizer"
+     */
+}
 
 func loadFileJS(_ path: String) -> String? {
     
@@ -1165,8 +1302,6 @@ func loadFileJS(_ path: String) -> String? {
         // content within to the list.  This just means part of a zip archive.
         let fileContent = loadFileContent(file)
         
-        // Note: json.gz and json.zip build files are not marked as Build
-        // but need to rewrite them.
         let isBuildFile = file.fileType == .Build
         
         if !isBuildFile {
@@ -1202,7 +1337,7 @@ func loadFileJS(_ path: String) -> String? {
                         return nil
                     }
             
-                    updateDuration(catapultProfile, &file)
+                    file.duration = updateDuration(catapultProfile.traceEvents!)
                     
                     // For now, just log the per-thread info
                     if file.fileType == .Memory {
@@ -1218,11 +1353,10 @@ func loadFileJS(_ path: String) -> String? {
             }
         }
         else {
-            // replace Source with actual file name on Clang.json files
-            // That's just for the parse phase, probably need for optimization
-            // phase too.  The optimized function names need demangled - ugh.
+            // The data for these is being generated in an async task
+            // So that the build report can be generated.
             
-            // Clang has some build data as durations on fake threads
+            // Clang has some build totals as durations on fake threads
             // but those are smaller than the full duration.
             let doCompress = true
             
@@ -1235,133 +1369,56 @@ func loadFileJS(_ path: String) -> String? {
                 json = unzippedContent
             }
             else if file.containerType == .Archive {
-                // this has already been decoded to json
+                // this has already been extracted and decrypted
                 json = fileContent
             }
             else {
                 json = fileContent
             }
             
+            // here having to ungzip and decode just to display the content
+            // have already processed the build files in an async task
             let decoder = JSONDecoder()
             var catapultProfile = try decoder.decode(CatapultProfile.self, from: json)
             
-            if catapultProfile.traceEvents == nil { // an array
-                return nil
-            }
-            else {
-                // Do this before the names are replaced below
-                if file.buildTimings.isEmpty && file.fileType == .Build {
-                    // update the durSub in the events, can then track self time
-                    computeEventParentsAndDurSub(&catapultProfile)
+            // demangle the OptFunction name
+            for i in 0..<catapultProfile.traceEvents!.count {
+                let event = catapultProfile.traceEvents![i]
+                if event.name == "OptFunction" {
+                    let detail = event.args!["detail"]!.value as! String
+                    let symbolName = String(cString: demangleSymbolName(detail))
                     
-                    file.buildTimings = updateFileBuildTimings(catapultProfile)
+                    catapultProfile.traceEvents![i].args!["detail"] = AnyCodable(symbolName)
                 }
-                
-                /* These are types CBA is looking at.  It's not looking at any totals
-                   DebugType isn't in this.
-                 
-                if (StrEqual(name, "ExecuteCompiler"))
-                    event.type = BuildEventType::kCompiler;
-                else if (StrEqual(name, "Frontend"))
-                    event.type = BuildEventType::kFrontend;
-                else if (StrEqual(name, "Backend"))
-                    event.type = BuildEventType::kBackend;
-                else if (StrEqual(name, "Source"))
-                    event.type = BuildEventType::kParseFile;
-                else if (StrEqual(name, "ParseTemplate"))
-                    event.type = BuildEventType::kParseTemplate;
-                else if (StrEqual(name, "ParseClass"))
-                    event.type = BuildEventType::kParseClass;
-                else if (StrEqual(name, "InstantiateClass"))
-                    event.type = BuildEventType::kInstantiateClass;
-                else if (StrEqual(name, "InstantiateFunction"))
-                    event.type = BuildEventType::kInstantiateFunction;
-                else if (StrEqual(name, "OptModule"))
-                    event.type = BuildEventType::kOptModule;
-                else if (StrEqual(name, "OptFunction"))
-                    event.type = BuildEventType::kOptFunction;
-                 
-                // here are totals that are in the file
-                // Total ExecuteCompiler = Total Frontend + Total Backend
-                // 2 frontend blocks though,
-                //  1. Source, InstantiateFunction, CodeGenFunction, ...
-                //  2. CodeGenFunction, DebugType, and big gaps
-                //
-                // 1 backend block
-                //   OptModule
-                 
-                "Total ExecuteCompiler" <- important
-                "Total Frontend" <- important <- important
-                "Total InstantiateFunction"
-                "Total CodeGen Function"
-                "Total Backend"
-                "Total CodeGenPasses"
-                "Total OptModule" <- important
-                "Total OptFunction"
-                "Total RunPass"
-                "Total InstantiatePass"
-                "Total Source"
-                "Total ParseClass"
-                "Total DebugType"
-                "Total PerformPendingInstantiations"
-                "Total Optimizer"
-                */
-                
-                for i in 0..<catapultProfile.traceEvents!.count {
-                    let event = catapultProfile.traceEvents![i]
-                    if  event.name == "Source" ||
-                        event.name == "OptModule"
-                    {
-                        // This is a path
-                        let detail = event.args!["detail"]!.value as! String
-                        let url = URL(string:detail)!
-                        
-                        // stupid immutable arrays.  Makes this code untempable
-                        // maybe can use class instead of struct?
-                        catapultProfile.traceEvents![i].name = url.lastPathComponent
-                    }
-                    else if event.name == "InstantiateFunction" ||
-                            event.name == "InstantiateClass" ||
-                            event.name == "OptFunction" ||
-                            event.name == "ParseClass" ||
-                            event.name == "DebugType" || // these take a while
-                            event.name == "CodeGen Function" ||
-                            event.name == "RunPass"
-                    {
-                        // backend symbols need demangle
-                        let isDemangleNeeded = event.name == "OptFunction"
-                        
-                        if isDemangleNeeded {
-                            let detail = event.args!["detail"]!.value as! String
-                            let symbolName = String(cString: demangleSymbolName(detail))
-                            
-                            catapultProfile.traceEvents![i].name = symbolName
-                        }
-                        else {
-                            // This is a name
-                            let detail = event.args!["detail"]!.value as! String
-                            catapultProfile.traceEvents![i].name = detail
-                        }
-                    }
+            }
+            
+            // Replace Source with actual file name on Clang.json files
+            // That's just for the parse phase, probably need for optimization
+            // phase too.
+            for i in 0..<catapultProfile.traceEvents!.count {
+                let event = catapultProfile.traceEvents![i]
+                if  event.name == "Source" ||
+                    event.name == "OptModule"
+                {
+                    // This is a path
+                    let detail = event.args!["detail"]!.value as! String
+                    let url = URL(string:detail)!
                     
-                    // These aren't renamed but are useful data for report
-                    // and are already calculated.
-                    else if  event.name == "Total Backend" {
-                        file.totalBackend = event.dur!
-                    }
-                    else if  event.name == "Total Ffrontend" {
-                        file.totalFrontend = event.dur!
-                    }
+                    // stupid immutable struct.  Makes this code untempable
+                    // maybe can use class instead of struct?
+                    catapultProfile.traceEvents![i].name = url.lastPathComponent
                 }
-                
-                // walk the file and compute the duration if we don't already have it
-                if file.duration == 0.0 {
-                    updateDuration(catapultProfile, &file)
-                    
-                    // For now, just log the per-thread info
-                    if file.fileType == .Memory {
-                        updateThreadInfo(catapultProfile, &file)
-                    }
+                else if event.name == "InstantiateFunction" ||
+                        event.name == "InstantiateClass" ||
+                        event.name == "OptFunction" ||
+                        event.name == "ParseClass" ||
+                        event.name == "DebugType" || // these take a while
+                        event.name == "CodeGen Function" ||
+                        event.name == "RunPass"
+                {
+                    // This is a symbol name
+                    let detail = event.args!["detail"]!.value as! String
+                    catapultProfile.traceEvents![i].name = detail
                 }
             }
             
@@ -1564,6 +1621,10 @@ struct kram_profileApp: App {
                 
                 fileSearcher.updateFilesSorted()
                 
+                // task to update any build timings
+                // this saves having to manually visit every file
+                updateBuildTimingsTask(fileSearcher.files)
+                
                 log.debug("found \(fileSearcher.files.count) files")
                 
                 // preserve the original selection if still present
@@ -1668,13 +1729,9 @@ A tool to help profile mem, perf, and builds.
         )
     }
     
-    // DONE: have files ending in .memtrace, .trace, and .json
-    // TODO: archives in the zip file.
-    
     let fileTypes: [UTType] = [
         // This is what macOS generates when doing "compress file".  But could be archive.
         // These are 12x smaller often times.  Decompression lib only handles zlib.
-        // DONE: need zip archive util to extract the 1+ files, can still use libCompression to decompress
         .zip,
        
         // Perfetto can only open gzip and not zip yet

From d90afd3e258adf0da919dc4969a2a1f5d6502c29 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 16 Mar 2024 22:25:17 -0700
Subject: [PATCH 649/901] kram-profile - fixup build report

---
 kram-profile/kram-profile/kram_profileApp.swift | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 631e913a..48a9879a 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -243,7 +243,7 @@ class FileSearcher: ObservableObject {
 extension String.StringInterpolation {
 
     /// Quick formatting for *floating point* values.
-    mutating func appendInterpolation(float: Float, decimals: UInt = 2, stripTrailingZeros: Bool = true) {
+    mutating func appendInterpolation(float: Float, decimals: UInt = 2, zero: Bool = true) {
         let floatDescription = String(format:"%.\(decimals)f%", float)
 //        if stripTrailingZeros && decimals > 0 {
 //            // https://stackoverflow.com/questions/29560743/swift-remove-trailing-zeros-from-double
@@ -252,7 +252,7 @@ extension String.StringInterpolation {
         appendLiteral(floatDescription)
     }
     
-    mutating func appendInterpolation(double: Double, decimals: UInt = 2, stripTrailingZeros: Bool = true) {
+    mutating func appendInterpolation(double: Double, decimals: UInt = 2, zero: Bool = true) {
         let floatDescription = String(format:"%.\(decimals)f%", double)
 //        if stripTrailingZeros && decimals > 0 {
 //            // https://stackoverflow.com/questions/29560743/swift-remove-trailing-zeros-from-double
@@ -835,8 +835,11 @@ func buildPerfettoJsonFromBuildTimings(buildTimings: [String:BuildTiming]) -> St
         let shortFilename = URL(string: buildTiming.key)!.lastPathComponent
         
         let dur = Double(t.duration) * 1e-6
+        let durSelf = Double(t.durationSelf) * 1e-6
         var event = CatapultEvent()
-        event.name = "\(shortFilename) \(t.count)x \(double: dur, decimals:2)s"
+        
+        // Need to see this in the name due to multiple sorts
+        
         event.ts = 0 // do we need this, just more data to encode
         event.dur = t.duration
         event.ph = "X"
@@ -844,6 +847,7 @@ func buildPerfettoJsonFromBuildTimings(buildTimings: [String:BuildTiming]) -> St
         
         // add count in seconds, so can view sorted by count below the duration above
         if isHeader {
+            event.name = "\(shortFilename) \(t.count)x \(double: dur, decimals:2, zero: false)s"
             
             // ParseTime
             event.tid = 0
@@ -856,6 +860,8 @@ func buildPerfettoJsonFromBuildTimings(buildTimings: [String:BuildTiming]) -> St
             
             let selfTime = t.durationSelf
             if selfTime > 0 {
+                event.name = "\(shortFilename) \(t.count)x \(double: durSelf, decimals:2, zero: false)s"
+                
                 // ParseSelf
                 event.tid = 2
                 event.dur = t.durationSelf
@@ -863,6 +869,8 @@ func buildPerfettoJsonFromBuildTimings(buildTimings: [String:BuildTiming]) -> St
             }
         }
         else {
+            event.name = "\(shortFilename) \(double: dur, decimals:2, zero: false)s"
+            
             // OptimizeTime
             event.tid = 3
         }

From 96c80879008d53a5e04ced03438e1c19b77f3aba Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 16 Mar 2024 23:10:03 -0700
Subject: [PATCH 650/901] kram-profile - fix Build Report for OptModule

---
 kram-profile/kram-profile/kram_profileApp.swift | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 48a9879a..1097eb0a 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -873,6 +873,7 @@ func buildPerfettoJsonFromBuildTimings(buildTimings: [String:BuildTiming]) -> St
             
             // OptimizeTime
             event.tid = 3
+            events.append(event)
         }
     }
     

From 781e37b5c67300c8188aa93b5b7c4d9cb21facab Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 16 Mar 2024 23:39:00 -0700
Subject: [PATCH 651/901] kram-profile - fix self time

---
 kram-profile/kram-profile/kram_profileApp.swift | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 1097eb0a..bb1cfd47 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -754,7 +754,6 @@ func updateFileBuildTimings(_ events: [CatapultEvent]) -> [String:BuildTiming] {
     for i in 0..<events.count {
         let event = events[i]
         
-        // TODO: may want to mark parsed vs. optimized
         if  event.name == "Source" || // will be .h
             event.name == "OptModule" // will be .c/.cpp
         {
@@ -1127,10 +1126,13 @@ func computeEventParentsAndDurSub(_ events: inout [CatapultEvent]) {
                 // of how deep they are on a given thread.  Having to make this up is kinda costly.
                 
                 // Can create selfTime by subtracting durations of all children
-                if events[evRootIndex].durSub == nil {
-                    events[evRootIndex].durSub = Int() // 0
+                // if the name matches (so Source from Source)
+                if ev2.name == evRoot.name {
+                    if events[evRootIndex].durSub == nil {
+                        events[evRootIndex].durSub = Int() // 0
+                    }
+                    events[evRootIndex].durSub! += ev2.dur!
                 }
-                events[evRootIndex].durSub! += ev2.dur!
                 break;
             }
 

From e407855f2aed686ca07281ccfee21da7648659ee Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 17 Mar 2024 10:20:06 -0700
Subject: [PATCH 652/901] kram-profile - fix buildTimings, time how long took
 to generate

---
 .../kram-profile/kram_profileApp.swift        | 56 +++++++++++++++++--
 1 file changed, 50 insertions(+), 6 deletions(-)

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index bb1cfd47..1e01796a 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -724,7 +724,7 @@ class ThreadInfo : Hashable, Equatable, Comparable {
 }
 
 // Could also process each build timings in a threaded task.  That what CBA is doing.
-class BuildTiming {
+class BuildTiming: NSCopying {
     var name = ""
     var count = 0
     var duration = 0
@@ -737,6 +737,17 @@ class BuildTiming {
         self.durationSub += durationSub
         self.count += 1
     }
+    
+    // This is annoying in Swift
+    func copy(with zone: NSZone? = nil) -> Any {
+        let copy = BuildTiming()
+        copy.name = name
+        copy.count = count
+        copy.duration = duration
+        copy.durationSub = durationSub
+        copy.type = type
+        return copy
+      }
 }
 
 func updateFileBuildTimings(_ events: [CatapultEvent]) -> [String:BuildTiming] {
@@ -791,10 +802,16 @@ func mergeFileBuildTimings(files: [File]) -> [String:BuildTiming] {
     // then turn that into build events that can be shown.
     for file in files {
         // merge and combine duplicates
-        buildTimings.merge(file.buildTimings) { (current, newValue) in
-            current.combine(newValue.duration, newValue.durationSub)
-            return current
+        for buildTiming in file.buildTimings {
+            if buildTimings[buildTiming.key] == nil {
+                buildTimings[buildTiming.key] = (buildTiming.value.copy() as! BuildTiming)
+            }
+            else {
+                let v = buildTiming.value
+                buildTimings[buildTiming.key]!.combine(v.duration, v.durationSub)
+            }
         }
+        // buildTimings.merge didn't work, combine src values
     }
     
     return buildTimings
@@ -1152,16 +1169,40 @@ func computeEventParentsAndDurSub(_ events: inout [CatapultEvent]) {
         evRootIndex = sortedIndices[i]
         evRoot = events[evRootIndex]
     }
-    
-    //catapultProfile.traceEvents = events
 }
 
 // Fire this off any time the list changes and there
 // are build events in it.  This will update the data within,
 // so that the user doesn't have to visit every file manually.
 
+// TODO: move to timer class
+var kTickToSeconds = 0.0
+
+func updateTimebase() {
+    if kTickToSeconds != 0.0 { return }
+    
+    var machTimebase = mach_timebase_info(numer: 0, denom: 0)
+    mach_timebase_info(&machTimebase)
+    kTickToSeconds = 1e-9 * Double(machTimebase.numer) / Double(machTimebase.denom) // 125/3
+}
+
 func updateBuildTimingsTask(_ files: [File]) {
+    // Can use counter for progress.  Could add up size instead of just count.
+    var counter = 0
+    for file in files {
+        if !file.buildTimings.isEmpty { return }
+        
+        counter += 1
+    }
+    
+    if counter == 0 { return }
+    
+    
+    updateTimebase()
+    
     let _ = Task(priority: .medium, operation: {
+        var time = -Double(mach_absolute_time()) * kTickToSeconds
+        
         for file in files {
             if file.fileType == .Build {
                 do {
@@ -1172,6 +1213,9 @@ func updateBuildTimingsTask(_ files: [File]) {
                 }
             }
         }
+        
+        time += Double(mach_absolute_time()) * kTickToSeconds
+        log.info("finished updating build timings in \(double:time, decimals:3)s")
     })
 }
     

From 841736b9a4e0945a086f23172b21eb0590809e46 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 17 Mar 2024 11:17:46 -0700
Subject: [PATCH 653/901] kram-profile - more archive support, ignore
 unsupported files

Notes on archives which are tricky due to storage of relative paths.  These can confuse the List id mechanism which is based on URL.  Just make sure to store unique identifying folder when building archives.
Can now build a report based on selected file - this will look at archive or same parent folder.  This allows multiple archives to be loaded.
---
 kram-profile/README.md                        |  6 +-
 kram-profile/kram-profile/File.swift          | 27 +++++----
 .../kram-profile/kram_profileApp.swift        | 55 +++++++++++++------
 3 files changed, 55 insertions(+), 33 deletions(-)

diff --git a/kram-profile/README.md b/kram-profile/README.md
index 53bcae3d..dbd18959 100644
--- a/kram-profile/README.md
+++ b/kram-profile/README.md
@@ -24,9 +24,9 @@ TODO: (x are done)
 * x Find start/end time of each json files. 
 * x Support gzip trace files
 * x Add sort by range (useful for mem/build traces)
+* x Add zip archive support, can drop archive of 1+ traces
 
 * Add frame type for perf traces for vsync ticker (binary format prob has it)
-* Add zip archive support, can drop archive of 1+ traces
 * Tie in with the excellent ClangBuildAnalyzer tool
 * Scale specific traces to a single duration.  That way the next file comes in at that scale. 
 * Move away from Catapult json to own binary format.  Can then translate to json or use the Perfetto SDK to convert to protobufs.
@@ -61,8 +61,8 @@ Cpu Profilers. See for more details
 * spall
 
 * Commercial
-* Telemetry - httpd://www.radgametools.com/telemeetry.htm
-* Superluminal -
+* Telemetry - httpd://www.radgametools.com/telemetry.htm
+* Superluminal - higher-rate sampling profiler
 * Xcode Instruments - see Xcode
 * AMD Code Analyst - see Xcode
 * Intel Vtune -
diff --git a/kram-profile/kram-profile/File.swift b/kram-profile/kram-profile/File.swift
index 90defaf0..89f9cf52 100644
--- a/kram-profile/kram-profile/File.swift
+++ b/kram-profile/kram-profile/File.swift
@@ -24,6 +24,10 @@ enum FileType {
 
 class File: Identifiable, /*Hashable, */ Equatable, Comparable
 {
+    // TODO: archive url relative to archive so not unqique if multiple archives dropped
+    // but currently all lookup is by url, and not url + archive.  Just make sure to
+    // include unique dir when building archives.  zip has max 512 char path.
+    
     var id: String { url.absoluteString }
     var name: String { url.lastPathComponent }
     let url: URL
@@ -82,7 +86,7 @@ class File: Identifiable, /*Hashable, */ Equatable, Comparable
         }
     }
 
-    // show some of dir file is in, TODO: 2 levels not enough
+    // show some of dir file is in, TODO: 2 levels not enough?
     public static func buildShortDirectory(url: URL) -> String {
         let count = url.pathComponents.count
         
@@ -135,27 +139,20 @@ class File: Identifiable, /*Hashable, */ Equatable, Comparable
     }
 }
 
-// TODO: now that it's a class, can probably elimiante that lookuFile calls
 func generateDuration(file: File) -> String {
-    // need for duration
-    let f = lookupFile(url: file.url)
-    if f.duration != 0.0 {
-        // TODO: may want to add s/mb based on file type
-        return String(format:"%0.3f", f.duration) // sec vis to ms for now
-    }
-    else {
-        return ""
-    }
+    if file.duration == 0.0 { return "" }
+    
+    let unitText = file.fileType == .Memory ? "m" : "s"
+    return "\(double:file.duration, decimals:3)\(unitText)"
 }
 
 func generateNavigationTitle(_ sel: String?) -> String {
-    if sel == nil {
-        return ""
-    }
+    if sel == nil { return "" }
     
     let f = lookupFile(selection: sel!)
     var text = generateDuration(file: f) + " " + f.name
     
+    // add the archive name
     if let fileArchive = f.archive {
         text += " in (" + fileArchive.name + ")"
     }
@@ -407,6 +404,8 @@ func listFilesFromArchive(_ urlArchive: URL) -> [File] {
             continue
         }
             
+        // TODO: archives don't have full paths, so lookup can get confused
+        // if there are multiple archives with same paths.
         let file = lookupFile(url:url)
         if file.archive != archive {
             file.archive = archive
diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 1e01796a..9d5a87b8 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -788,8 +788,25 @@ func updateFileBuildTimings(_ events: [CatapultEvent]) -> [String:BuildTiming] {
     return buildTimings
 }
 
+func findFilesForBuildTimings(files: [File], selection: String) -> [File] {
+    let selectedFile = lookupFile(url:URL(string:selection)!)
+    let isArchive = selectedFile.archive != nil
+    
+    let filteredFiles = files.filter { file in
+        if isArchive {
+            return file.archive != nil && file.archive! == selectedFile.archive!
+        }
+        else {
+            return file.parentFolders == selectedFile.parentFolders
+        }
+    }
+    
+    return filteredFiles;
+}
+
 func postBuildTimingsReport(files: [File]) -> String? {
     let buildTimings = mergeFileBuildTimings(files: files)
+    if buildTimings.isEmpty { return nil }
     let buildJsonBase64 = buildPerfettoJsonFromBuildTimings(buildTimings: buildTimings)
     let buildJS = postLoadFileJS(fileContentBase64: buildJsonBase64, title: "BuildTimings")
     return buildJS
@@ -1413,8 +1430,6 @@ func loadFileJS(_ path: String) -> String? {
             
             // Clang has some build totals as durations on fake threads
             // but those are smaller than the full duration.
-            let doCompress = true
-            
             var json : Data
             
             if file.containerType == .Compressed {
@@ -1436,6 +1451,10 @@ func loadFileJS(_ path: String) -> String? {
             let decoder = JSONDecoder()
             var catapultProfile = try decoder.decode(CatapultProfile.self, from: json)
             
+            if catapultProfile.traceEvents == nil {
+                return nil
+            }
+            
             // demangle the OptFunction name
             for i in 0..<catapultProfile.traceEvents!.count {
                 let event = catapultProfile.traceEvents![i]
@@ -1481,13 +1500,8 @@ func loadFileJS(_ path: String) -> String? {
             let fileContentFixed = try encoder.encode(catapultProfile)
             
             // gzip compress the data before sending it over
-            if doCompress {
-                guard let compressedData = fileContentFixed.gzip() else { return nil }
-                fileContentBase64 = compressedData.base64EncodedString()
-            }
-            else {
-                fileContentBase64 = fileContentFixed.base64EncodedString()
-            }
+            guard let compressedData = fileContentFixed.gzip() else { return nil }
+            fileContentBase64 = compressedData.base64EncodedString()
         }
         
         return postLoadFileJS(fileContentBase64: fileContentBase64, title:fileURL.lastPathComponent)
@@ -1685,7 +1699,7 @@ struct kram_profileApp: App {
                 // preserve the original selection if still present
                 if selection != nil {
                     var found = false
-                    for file in fileSearcher.files {
+                    for file in fileSearcher.filesSorted {
                         if file.id == selection {
                             found = true
                             break;
@@ -1694,12 +1708,12 @@ struct kram_profileApp: App {
                     
                     // load first file in the list
                     if !found {
-                        selection = fileSearcher.files[0].id
+                        selection = fileSearcher.filesSorted[0].id
                     }
                 }
                 else {
                     // load first file in the list
-                    selection = fileSearcher.files[0].id
+                    selection = fileSearcher.filesSorted[0].id
                 }
             }
         }
@@ -2097,16 +2111,25 @@ A tool to help profile mem, perf, and builds.
             
             CommandGroup(after: .toolbar) {
                 // TODO: only enable if build files are present
-                // eventually don't run this on all, maybe find those related to selection
-                Button("Build Report") {
-                    // should this be on all or just those seached?
-                    let buildJS = postBuildTimingsReport(files: fileSearcher.filesSearched)
+                Button("Build Report All") {
+                    let buildFiles = fileSearcher.files
+                    let buildJS = postBuildTimingsReport(files: buildFiles)
+                    if buildJS != nil {
+                        runJavascript(myWebView, buildJS!)
+                    }
+                }
+                .disabled(selection == nil)
+                
+                Button("Build Report Selected") {
+                    let buildFiles = findFilesForBuildTimings(files: fileSearcher.files, selection: selection!)
+                    let buildJS = postBuildTimingsReport(files: buildFiles)
                     if buildJS != nil {
                         runJavascript(myWebView, buildJS!)
                     }
                 }
                 .disabled(selection == nil)
                 
+                
                 // must call through NSWindow
                 Button("See Below") {
                     // Window isn't set in AppDelegate, so menu item is skipped.

From b5360462d4f0b2d4baf86e384ef61b77f3f22cda Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 17 Mar 2024 12:01:00 -0700
Subject: [PATCH 654/901] kram-profile - fix Win profiling

macOS uses tid = 259 for the build profiles, but Win uses a random tid
---
 kram-profile/kram-profile/kram_profileApp.swift | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 9d5a87b8..25149b78 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -1127,7 +1127,9 @@ func computeEventParentsAndDurSub(_ events: inout [CatapultEvent]) {
     var root = 0;
     
     // skip the thread names
+    var buildThreadId = Int.max
     while events[sortedIndices[root]].ph! == "M" {
+        buildThreadId = min(buildThreadId, events[sortedIndices[root]].tid!)
         root += 1
     }
     
@@ -1142,9 +1144,8 @@ func computeEventParentsAndDurSub(_ events: inout [CatapultEvent]) {
         // This only works on a per thread basis, but build events only have a single thread
         // annoying that it's not tid == 0.  So this is the build event specific code.  Would
         // need to run this logic for each unique tid in the events array.
-        // TODO: generalize so can use on perf effects.  Memory doesn't need.
-        let kBuildEventTid = 259
-        if ev2.tid != kBuildEventTid { continue }
+        // TODO: generalize so can use on perf effects.  Memory doesn't need this call.
+        if ev2.tid != buildThreadId { continue }
         
         // walk up the stack of parents, to find one that this event is within
         while root != -1 {

From c7ba9f5dbf61e04c34b488d44a8661100c0f5a15 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 17 Mar 2024 12:24:23 -0700
Subject: [PATCH 655/901] kram-profile - re-enable mergeFiles if option held

Instead of wiping the list, this combines the lists into one.  Useful for opening or drag/drop multiple archives.
---
 kram-profile/kram-profile/File.swift            |  7 ++++++-
 kram-profile/kram-profile/kram_profileApp.swift | 16 ++++++++--------
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/kram-profile/kram-profile/File.swift b/kram-profile/kram-profile/File.swift
index 89f9cf52..6a0d2ffe 100644
--- a/kram-profile/kram-profile/File.swift
+++ b/kram-profile/kram-profile/File.swift
@@ -22,7 +22,7 @@ enum FileType {
     case Unknown
 }
 
-class File: Identifiable, /*Hashable, */ Equatable, Comparable
+class File: Identifiable, Hashable, Equatable, Comparable
 {
     // TODO: archive url relative to archive so not unqique if multiple archives dropped
     // but currently all lookup is by url, and not url + archive.  Just make sure to
@@ -69,6 +69,11 @@ class File: Identifiable, /*Hashable, */ Equatable, Comparable
         return lhs.id < rhs.id
     }
     
+    // Hashable
+    public func hash(into hasher: inout Hasher) {
+        hasher.combine(id)
+    }
+    
     // call this when the file is loaded
     public func setLoadStamp()  {
         loadStamp = modStamp
diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 25149b78..05ed52c9 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -1678,16 +1678,16 @@ struct kram_profileApp: App {
             // for now wipe the old list
             if filesNew.count > 0 {
                 // turning this off for now, File must impl Hashable
-//                let mergeFiles = false
-//                
-//                if mergeFiles {
-//                    fileSearcher.files = Array(Set(fileSearcher.files + filesNew))
-//                }
-//                else 
-                //{
+                let mergeFiles = NSEvent.modifierFlags.contains(.option);
+
+                if mergeFiles {
+                    fileSearcher.files = Array(Set(fileSearcher.files + filesNew))
+                }
+                else
+                {
                     // reset the list
                     fileSearcher.files = filesNew
-                //}
+                }
                 
                 fileSearcher.updateFilesSorted()
                 

From 67abcd28ebe1118ba43dd51bf2a28798b79b2c14 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 17 Mar 2024 19:08:05 -0700
Subject: [PATCH 656/901] kram-profile - add build stats

This is a better version of the totals drawn as a single track
---
 kram-profile/kram-profile/File.swift          | 106 +++--
 .../kram-profile/kram_profileApp.swift        | 364 +++++++++++++-----
 2 files changed, 358 insertions(+), 112 deletions(-)

diff --git a/kram-profile/kram-profile/File.swift b/kram-profile/kram-profile/File.swift
index 6a0d2ffe..7b0995c6 100644
--- a/kram-profile/kram-profile/File.swift
+++ b/kram-profile/kram-profile/File.swift
@@ -22,9 +22,60 @@ enum FileType {
     case Unknown
 }
 
+class BuildStats {
+    var frontendStart = Int.max
+    var backendStart = Int.max
+
+    var totalExecuteCompiler = 0
+    
+    var totalFrontend = 0
+    var totalSource = 0
+    var totalInstantiateFunction = 0
+    var totalInstantiateClass = 0
+    var totalCodeGenFunction = 0
+    
+    var totalBackend = 0
+    var totalOptimizer = 0
+    var totalCodeGenPasses = 0
+    var totalOptFunction = 0
+    
+    func combine(_ rhs: BuildStats) {
+        totalExecuteCompiler += rhs.totalExecuteCompiler
+        
+        totalFrontend += rhs.totalFrontend
+        totalSource += rhs.totalSource
+        totalInstantiateFunction += rhs.totalInstantiateFunction
+        totalInstantiateClass += rhs.totalInstantiateClass
+        totalCodeGenFunction += rhs.totalCodeGenFunction
+        
+        totalBackend += rhs.totalBackend
+        totalOptimizer += rhs.totalOptimizer
+        totalCodeGenPasses += rhs.totalCodeGenPasses
+        totalOptFunction += rhs.totalOptFunction
+    }
+    
+    func divideBy(_ s: Int) {
+        frontendStart /= s
+        backendStart /= s
+
+        totalExecuteCompiler /= s
+        
+        totalFrontend /= s
+        totalSource /= s
+        totalInstantiateFunction /= s
+        totalInstantiateClass /= s
+        totalCodeGenFunction /= s
+        
+        totalBackend /= s
+        totalOptimizer /= s
+        totalCodeGenPasses /= s
+        totalOptFunction /= s
+    }
+}
+
 class File: Identifiable, Hashable, Equatable, Comparable
 {
-    // TODO: archive url relative to archive so not unqique if multiple archives dropped
+    // TODO: archive url relative to archive so not unique if multiple archives dropped
     // but currently all lookup is by url, and not url + archive.  Just make sure to
     // include unique dir when building archives.  zip has max 512 char path.
     
@@ -47,8 +98,7 @@ class File: Identifiable, Hashable, Equatable, Comparable
     
     // This is only updated for Build fileType
     var buildTimings: [String:BuildTiming] = [:]
-    var totalFrontend = 0 // in micros
-    var totalBackend = 0
+    var buildStats: BuildStats!
     
     // only available for memory file type right now
     var threadInfo = ""
@@ -269,42 +319,50 @@ func lookupArchive(_ url: URL) -> Archive {
         for file in fileCache.values {
             if file.archive == archiveOld {
                 
-                // update the archive
-                file.archive = archive
-                // Only need to release content if hash differs
-                // TODO: file may be gone in the new archive
+                // Only need to release caches if hash differs
                 let filename = file.url.absoluteString
                
-                
                 let oldEntry = archiveOld.archive!.zipEntry(byName: filename)
                 let newEntry = archive.archive!.zipEntry(byName: filename)
                 
-                if String(cString:newEntry.filename) == "" {
-                    // TODO: handle new archive missing the file
-                }
-                
-                // convert zip modStamp to Data object (only valid to seconds)
-                file.modStamp = Date(timeIntervalSince1970: Double(newEntry.modificationDate)) // TODO: may need to be TimeInterval?
+                let isNewEntryMissing = String(cString:newEntry.filename) == ""
                 
-                if oldEntry.crc32 == newEntry.crc32 {
-                    file.loadStamp = file.modStamp
+                if isNewEntryMissing {
+                    // TODO: handle new archive missing the file
+                    // need to release file
                 }
                 else {
+                    // update the archive
+                    file.archive = archive
+                    
+                    // convert zip modStamp to Data object (only valid to seconds)
+                    file.modStamp = Date(timeIntervalSince1970: Double(newEntry.modificationDate))
+                }
+                
+                if !isNewEntryMissing && (oldEntry.crc32 == newEntry.crc32) {
+                    
+                    // erase fileContent since it may alias mmap going away
                     file.loadStamp = nil
+                    file.fileContent = nil
                     
+                    // keep any caches
+                }
+                else {
                     // erase fileContent
+                    file.loadStamp = nil
                     file.fileContent = nil
                     
                     file.duration = 0.0
                     
-                    // TODO: still may need to point to new mmap to release the old
-                    // but don't need to reprocess and build data if crc is same
-                    
-                    // release other calcs (f.e. duration, histogram, etc)
-                    // can point to new archive content here
-                    file.buildTimings.removeAll()
-                    file.totalFrontend = 0
-                    file.totalBackend = 0
+                    if file.fileType == .Build {
+                        // for build fileType
+                        file.buildTimings.removeAll()
+                        file.buildStats = nil
+                    }
+                    else if file.fileType == .Memory {
+                        // for memory fileType
+                        file.threadInfo.removeAll()
+                    }
                 }
             }
         }
diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 05ed52c9..a8d96be0 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -645,6 +645,25 @@ struct CatapultEvent: Codable {
     var durSub: Int?
     var parentIndex: Int?
     
+    // can't have setters on a Struct, only init
+    init(_ tid: Int, _ name: String, _ dur: Int) {
+        self.ts = 0
+        self.tid = tid
+        self.dur = dur
+        self.name = name
+        self.ph = "X"
+    }
+    
+    init(tid: Int, threadName: String) {
+        self.ts = 0
+        self.dur = 0
+        self.name = "thread_name"
+        self.ph = "M"
+        self.tid = tid
+        self.args = [:]
+        self.args!["name"] = AnyCodable(threadName)
+    }
+
     // only encode/decode some of the keys
     enum CodingKeys: String, CodingKey {
         case cat, pid, tid, ph, ts, dur, name, args
@@ -807,11 +826,29 @@ func findFilesForBuildTimings(files: [File], selection: String) -> [File] {
 func postBuildTimingsReport(files: [File]) -> String? {
     let buildTimings = mergeFileBuildTimings(files: files)
     if buildTimings.isEmpty { return nil }
-    let buildJsonBase64 = buildPerfettoJsonFromBuildTimings(buildTimings: buildTimings)
+    let buildStats = mergeFileBuildStats(files:files)
+    
+    let buildJsonBase64 = generateBuildReport(buildTimings: buildTimings, buildStats: buildStats)
     let buildJS = postLoadFileJS(fileContentBase64: buildJsonBase64, title: "BuildTimings")
     return buildJS
 }
 
+func mergeFileBuildStats(files: [File]) -> BuildStats {
+    let buildStats = BuildStats()
+    for file in files {
+        buildStats.combine(file.buildStats)
+    }
+    
+    buildStats.frontendStart = 0
+    buildStats.backendStart = buildStats.totalFrontend
+    
+    // This will scale way beyond the graph, so make it an average
+    // But will have lost the totals doing this.  Could embed them in string.
+    buildStats.divideBy(10)
+    
+    return buildStats
+}
+    
 func mergeFileBuildTimings(files: [File]) -> [String:BuildTiming] {
     var buildTimings: [String:BuildTiming] = [:]
     
@@ -834,7 +871,7 @@ func mergeFileBuildTimings(files: [File]) -> [String:BuildTiming] {
     return buildTimings
 }
 
-func buildPerfettoJsonFromBuildTimings(buildTimings: [String:BuildTiming]) -> String {
+func generateBuildReport(buildTimings: [String:BuildTiming], buildStats: BuildStats) -> String {
     // now convert those timings back into a perfetto displayable report
     // So just need to build up the json above into events on tracks
     var events: [CatapultEvent] = []
@@ -844,19 +881,9 @@ func buildPerfettoJsonFromBuildTimings(buildTimings: [String:BuildTiming]) -> St
     
     // add the thread names, only using 3 threads
     if true {
-        var event = CatapultEvent()
-        event.name = "thread_name"
         let names = ["ParseTime", "ParseCount", "ParseSelf", "OptimizeTime"]
         for i in 0..<names.count {
-            event.args = [:]
-            event.args!["name"] = AnyCodable(names[i])
-            event.tid = i
-            event.ph = "M"
-            
-            // may not need these, but needed for sort below
-            event.ts = 0
-            event.dur = 0
-            
+            let event = CatapultEvent(tid: i+1, threadName: names[i])
             events.append(event)
         }
     }
@@ -869,13 +896,10 @@ func buildPerfettoJsonFromBuildTimings(buildTimings: [String:BuildTiming]) -> St
         
         let dur = Double(t.duration) * 1e-6
         let durSelf = Double(t.durationSelf) * 1e-6
-        var event = CatapultEvent()
+        var event = CatapultEvent(0, "", t.duration)
         
         // Need to see this in the name due to multiple sorts
         
-        event.ts = 0 // do we need this, just more data to encode
-        event.dur = t.duration
-        event.ph = "X"
         let isHeader = t.type == "Source"
         
         // add count in seconds, so can view sorted by count below the duration above
@@ -883,11 +907,11 @@ func buildPerfettoJsonFromBuildTimings(buildTimings: [String:BuildTiming]) -> St
             event.name = "\(shortFilename) \(t.count)x \(double: dur, decimals:2, zero: false)s"
             
             // ParseTime
-            event.tid = 0
+            event.tid = 1
             events.append(event)
             
             // ParseCount
-            event.tid = 1
+            event.tid = 2
             event.dur = t.count * 10000 // in 0.1 secs, but high counts dominate the range then
             events.append(event)
             
@@ -896,7 +920,7 @@ func buildPerfettoJsonFromBuildTimings(buildTimings: [String:BuildTiming]) -> St
                 event.name = "\(shortFilename) \(t.count)x \(double: durSelf, decimals:2, zero: false)s"
                 
                 // ParseSelf
-                event.tid = 2
+                event.tid = 3
                 event.dur = t.durationSelf
                 events.append(event)
             }
@@ -905,7 +929,7 @@ func buildPerfettoJsonFromBuildTimings(buildTimings: [String:BuildTiming]) -> St
             event.name = "\(shortFilename) \(double: dur, decimals:2, zero: false)s"
             
             // OptimizeTime
-            event.tid = 3
+            event.tid = 4
             events.append(event)
         }
     }
@@ -931,6 +955,10 @@ func buildPerfettoJsonFromBuildTimings(buildTimings: [String:BuildTiming]) -> St
         return $0.name! < $1.name!
     }
    
+    // add in the summary of % spent across the build
+    let totalTrackEvents = convertStatsToTotalTrack(buildStats)
+    events += totalTrackEvents
+    
     let catapultProfile = CatapultProfile(traceEvents: events)
     
     do {
@@ -1244,7 +1272,7 @@ func updateBuildTimingTask(_ file: File) throws {
     if !file.buildTimings.isEmpty { return }
     
     let fileContent = loadFileContent(file)
-   
+    
     var json : Data
     
     if file.containerType == .Compressed {
@@ -1266,7 +1294,7 @@ func updateBuildTimingTask(_ file: File) throws {
     if catapultProfile.traceEvents == nil { // an array
         return
     }
-
+    
     var events = catapultProfile.traceEvents!
     
     // demangle the OptFunction name
@@ -1287,72 +1315,221 @@ func updateBuildTimingTask(_ file: File) throws {
     
     // Do this before the names are replaced below
     if file.buildTimings.isEmpty {
-        // useful totals to track, many more in the files
-        for i in 0..<events.count {
-            let event = events[i]
-            if event.name == "Total Backend" {
-                file.totalBackend = event.dur!
-            }
-            else if event.name == "Total Frontend" {
-                file.totalFrontend = event.dur!
-            }
-        }
+        file.buildStats = generateStatsForTotalTrack(events)
         
         // update the durSub in the events, can then track self time
         computeEventParentsAndDurSub(&events)
         
         file.buildTimings = updateFileBuildTimings(events)
     }
+}
+
+
+/* These are types CBA is looking at.  It's not looking at any totals
+ DebugType isn't in this.
+ 
+ if (StrEqual(name, "ExecuteCompiler"))
+ event.type = BuildEventType::kCompiler;
+ else if (StrEqual(name, "Frontend"))
+ event.type = BuildEventType::kFrontend;
+ else if (StrEqual(name, "Backend"))
+ event.type = BuildEventType::kBackend;
+ else if (StrEqual(name, "Source"))
+ event.type = BuildEventType::kParseFile;
+ else if (StrEqual(name, "ParseTemplate"))
+ event.type = BuildEventType::kParseTemplate;
+ else if (StrEqual(name, "ParseClass"))
+ event.type = BuildEventType::kParseClass;
+ else if (StrEqual(name, "InstantiateClass"))
+ event.type = BuildEventType::kInstantiateClass;
+ else if (StrEqual(name, "InstantiateFunction"))
+ event.type = BuildEventType::kInstantiateFunction;
+ else if (StrEqual(name, "OptModule"))
+ event.type = BuildEventType::kOptModule;
+ else if (StrEqual(name, "OptFunction"))
+ event.type = BuildEventType::kOptFunction;
+ 
+ // here are totals that are in the file
+ // Total ExecuteCompiler = Total Frontend + Total Backend
+ // 2 frontend blocks though,
+ //  1. Source, InstantiateFunction, CodeGenFunction, ...
+ //  2. CodeGenFunction, DebugType, and big gaps
+ //
+ // 1 backend block
+ //   OptModule
+ 
+ "Total ExecuteCompiler" <- important (total of frontend + backend)
+ 
+ //----------
+ // frontend
+ "Total Frontend" <- total
+ 
+ "Total Source" <- parsing
+ "Total ParseClass"
+ "Total InstantiateClass"
+ 
+ "Total PerformPendingInstantiations"
+ "Total InstantiateFunction"
+ "InstantiateClass"
+ "Total InstantiatePass"
+ 
+ "CodeGen Function"
+ "Debug Type"
+ 
+ //----------
+ // backend
+ "Total Backend" <- total, usually all OptModule
+ "Total Optimizer"
+ "Total CodeGenPasses"
+ "Total OptModule" <- important, time to optimize source file
+ "Total OptFunction"
+ "Total RunPass"
+ 
+ */
+
+// add a single track with hierarchical totals
+func generateStatsForTotalTrack(_ events: [CatapultEvent]) -> BuildStats {
+    let stats = BuildStats()
     
-    /* These are types CBA is looking at.  It's not looking at any totals
-     DebugType isn't in this.
-     
-     if (StrEqual(name, "ExecuteCompiler"))
-     event.type = BuildEventType::kCompiler;
-     else if (StrEqual(name, "Frontend"))
-     event.type = BuildEventType::kFrontend;
-     else if (StrEqual(name, "Backend"))
-     event.type = BuildEventType::kBackend;
-     else if (StrEqual(name, "Source"))
-     event.type = BuildEventType::kParseFile;
-     else if (StrEqual(name, "ParseTemplate"))
-     event.type = BuildEventType::kParseTemplate;
-     else if (StrEqual(name, "ParseClass"))
-     event.type = BuildEventType::kParseClass;
-     else if (StrEqual(name, "InstantiateClass"))
-     event.type = BuildEventType::kInstantiateClass;
-     else if (StrEqual(name, "InstantiateFunction"))
-     event.type = BuildEventType::kInstantiateFunction;
-     else if (StrEqual(name, "OptModule"))
-     event.type = BuildEventType::kOptModule;
-     else if (StrEqual(name, "OptFunction"))
-     event.type = BuildEventType::kOptFunction;
-     
-     // here are totals that are in the file
-     // Total ExecuteCompiler = Total Frontend + Total Backend
-     // 2 frontend blocks though,
-     //  1. Source, InstantiateFunction, CodeGenFunction, ...
-     //  2. CodeGenFunction, DebugType, and big gaps
-     //
-     // 1 backend block
-     //   OptModule
-     
-     "Total ExecuteCompiler" <- important
-     "Total Frontend" <- important <- important
-     "Total InstantiateFunction"
-     "Total CodeGen Function"
-     "Total Backend"
-     "Total CodeGenPasses"
-     "Total OptModule" <- important
-     "Total OptFunction"
-     "Total RunPass"
-     "Total InstantiatePass"
-     "Total Source"
-     "Total ParseClass"
-     "Total DebugType"
-     "Total PerformPendingInstantiations"
-     "Total Optimizer"
+    // useful totals to track, many more in the files
+    for i in 0..<events.count {
+        let event = events[i]
+        
+        // total
+        if event.name == "Total ExecuteCompiler" {
+            stats.totalExecuteCompiler = event.dur!
+        }
+        
+        // frontend
+        else if event.name == "Total Frontend" {
+            stats.totalFrontend = event.dur!
+        }
+        else if event.name == "Frontend" {
+            stats.frontendStart = min(stats.frontendStart, event.ts!)
+        }
+        
+        else if event.name == "Total Source" {
+            stats.totalSource = event.dur!
+        }
+        else if event.name == "Total InstantiateFunction" {
+            stats.totalInstantiateFunction = event.dur!
+        }
+        else if event.name == "Total InstantiateClass" {
+            stats.totalInstantiateClass = event.dur!
+        }
+        else if event.name == "Total CodeGen Function" {
+            stats.totalCodeGenFunction = event.dur!
+        }
+        
+        // backend
+        else if event.name == "Total Backend" {
+            stats.totalBackend = event.dur!
+        }
+        else if event.name == "Backend" {
+            stats.backendStart = min(stats.backendStart, event.ts!)
+        }
+        else if event.name == "Total Optimizer" {
+            stats.totalOptimizer = event.dur!
+        }
+        else if event.name == "Total CodeGenPasses" {
+            stats.totalCodeGenPasses = event.dur!
+        }
+        else if event.name == "Total OptFunction" {
+            stats.totalOptFunction = event.dur!
+        }
+    }
+    
+    // fix these up in case they're missing
+    if stats.frontendStart == Int.max {
+        stats.frontendStart = 0
+    }
+    
+    stats.backendStart = max(stats.backendStart, stats.frontendStart + stats.totalFrontend)
+    
+    return stats
+}
+
+func convertStatsToTotalTrack(_ stats: BuildStats) -> [CatapultEvent] {
+    
+    var totalEvents: [CatapultEvent] = []
+    
+    // This is really ugly, having these be a struct
+    
+    let tid = 0
+    let trackEvent = CatapultEvent(tid: tid, threadName: "Build Totals")
+    totalEvents.append(trackEvent)
+    
+    // This is a struct, so can modify copy and add
+    var event: CatapultEvent
+    
+    func makeDurEvent(_ tid: Int, _ name: String, _ dur: Int, _ total: Int) -> CatapultEvent {
+        let percent = 100.0 * Double(dur) / Double(total)
+        return CatapultEvent(tid, "\(name) \(double:percent, decimals:0)%", dur)
+    }
+    let total = stats.totalExecuteCompiler
+    
+    event = makeDurEvent(tid, "Total ExecuteCompiler", stats.totalExecuteCompiler, total)
+    totalEvents.append(event)
+    
+    event = makeDurEvent(tid, "Total Frontend", stats.totalFrontend, total)
+    event.ts = stats.frontendStart
+    totalEvents.append(event)
+    
+    // sub-areas of frontend
+    event = makeDurEvent(tid, "Total Source", stats.totalSource, total)
+    event.ts = stats.frontendStart
+    totalEvents.append(event)
+    
+    event = makeDurEvent(tid, "Total InstantiateFunction", stats.totalInstantiateFunction, total)
+    event.ts = stats.frontendStart + stats.totalSource
+    totalEvents.append(event)
+    
+    event = makeDurEvent(tid, "Total InstantiateClass", stats.totalInstantiateClass, total)
+    event.ts = stats.frontendStart + stats.totalSource
+    totalEvents.append(event)
+    
+    event = makeDurEvent(tid, "Total CodeGen Function", stats.totalCodeGenFunction, total)
+    event.ts = stats.frontendStart + stats.totalSource + stats.totalInstantiateFunction
+    totalEvents.append(event)
+    
+    // backend
+    event = makeDurEvent(tid, "Total Backend", stats.totalBackend, total)
+    event.ts = stats.backendStart
+    totalEvents.append(event)
+    
+    event = makeDurEvent(tid, "Total Optimizer", stats.totalOptimizer, total)
+    event.ts = stats.backendStart
+    totalEvents.append(event)
+    
+    // event = makeDurEvent(tid, "Total OptModule", stats.totalOptModule, total)
+    // event.ts = stats.backendStart + stats.totalOptimizer
+    // totalEvents.append(event)
+    
+    event = makeDurEvent(tid, "Total CodeGenPasses", stats.totalCodeGenPasses, total)
+    event.ts = stats.backendStart + stats.totalOptimizer
+    totalEvents.append(event)
+    
+    event = makeDurEvent(tid, "Total OptFunction", stats.totalOptFunction, total)
+    event.ts = stats.backendStart + stats.totalOptimizer
+    totalEvents.append(event)
+    
+    /*
+    "Total ExecuteCompiler"
+    "Total Frontend"
+      "Total Source"
+      "Total InstantiateFunction"
+        "Total InstantiateClass"
+      "Total Codegen Function"
+
+
+    "Total Backend"
+      "Total Optimizer"
+      "Total CodeGenPasses"
+        "Total OptModule"
+          "Total OptFunction"
      */
+    
+    return totalEvents
 }
 
 func loadFileJS(_ path: String) -> String? {
@@ -1473,7 +1650,7 @@ func loadFileJS(_ path: String) -> String? {
             for i in 0..<catapultProfile.traceEvents!.count {
                 let event = catapultProfile.traceEvents![i]
                 if  event.name == "Source" ||
-                    event.name == "OptModule"
+                        event.name == "OptModule"
                 {
                     // This is a path
                     let detail = event.args!["detail"]!.value as! String
@@ -1484,17 +1661,28 @@ func loadFileJS(_ path: String) -> String? {
                     catapultProfile.traceEvents![i].name = url.lastPathComponent
                 }
                 else if event.name == "InstantiateFunction" ||
-                        event.name == "InstantiateClass" ||
-                        event.name == "OptFunction" ||
-                        event.name == "ParseClass" ||
-                        event.name == "DebugType" || // these take a while
-                        event.name == "CodeGen Function" ||
-                        event.name == "RunPass"
+                            event.name == "InstantiateClass" ||
+                            event.name == "OptFunction" ||
+                            event.name == "ParseClass" ||
+                            event.name == "DebugType" || // these take a while
+                            event.name == "CodeGen Function" ||
+                            event.name == "RunPass"
                 {
                     // This is a symbol name
                     let detail = event.args!["detail"]!.value as! String
                     catapultProfile.traceEvents![i].name = detail
                 }
+                
+                // knock out the pid.  There are "M" events setting the process_name
+                // Otherwise, display will collapse pid sections since totalTrack has no pid
+                catapultProfile.traceEvents![i].pid = nil
+            }
+            
+            if file.buildStats != nil {
+                let totalEvents = convertStatsToTotalTrack(file.buildStats!)
+                
+                // combine these onto the end, could remove the individual tracks storing these
+                catapultProfile.traceEvents! += totalEvents
             }
             
             let encoder = JSONEncoder()

From cc111d0c6d04b9f6d33663f2ce9503f77bcaa1ef Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 17 Mar 2024 19:17:21 -0700
Subject: [PATCH 657/901] kram-profile - fix InstantiateClass ordering,
 Perfetto seems to have bug establishing hierarchy

---
 kram-profile/kram-profile/kram_profileApp.swift | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index a8d96be0..43e72a15 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -1484,14 +1484,16 @@ func convertStatsToTotalTrack(_ stats: BuildStats) -> [CatapultEvent] {
     event.ts = stats.frontendStart + stats.totalSource
     totalEvents.append(event)
     
-    event = makeDurEvent(tid, "Total InstantiateClass", stats.totalInstantiateClass, total)
-    event.ts = stats.frontendStart + stats.totalSource
-    totalEvents.append(event)
-    
+    // put this first, or else InstantiateClass isn't ordered properly
+    // Perfetto must not be sorting the events properly.  So order this one first
     event = makeDurEvent(tid, "Total CodeGen Function", stats.totalCodeGenFunction, total)
     event.ts = stats.frontendStart + stats.totalSource + stats.totalInstantiateFunction
     totalEvents.append(event)
     
+    event = makeDurEvent(tid, "Total InstantiateClass", stats.totalInstantiateClass, total)
+    event.ts = stats.frontendStart + stats.totalSource
+    totalEvents.append(event)
+    
     // backend
     event = makeDurEvent(tid, "Total Backend", stats.totalBackend, total)
     event.ts = stats.backendStart

From 7137432281d9d60b6cce67b79287ded0d8424a25 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 17 Mar 2024 19:28:24 -0700
Subject: [PATCH 658/901] kram-profile - one more stab at fixing total

---
 .../kram-profile/kram_profileApp.swift         | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 43e72a15..59937398 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -1484,16 +1484,22 @@ func convertStatsToTotalTrack(_ stats: BuildStats) -> [CatapultEvent] {
     event.ts = stats.frontendStart + stats.totalSource
     totalEvents.append(event)
     
-    // put this first, or else InstantiateClass isn't ordered properly
-    // Perfetto must not be sorting the events properly.  So order this one first
-    event = makeDurEvent(tid, "Total CodeGen Function", stats.totalCodeGenFunction, total)
-    event.ts = stats.frontendStart + stats.totalSource + stats.totalInstantiateFunction
-    totalEvents.append(event)
+    // This is nearly always bigger than its parent total 14% vs. 16%, so are totals wrong
+    var totalInstantiateClass = stats.totalInstantiateClass
+    if totalInstantiateClass > stats.totalInstantiateFunction {
+        totalInstantiateClass = stats.totalInstantiateFunction
+    }
     
-    event = makeDurEvent(tid, "Total InstantiateClass", stats.totalInstantiateClass, total)
+    // This overlaps with some Source, and some InstantiateFunction, so it's sort of double
+    // counted, so clamp it for now so Perfetto doesn't freak out and get the event order wrong.
+    event = makeDurEvent(tid, "Total InstantiateClass", totalInstantiateClass, total)
     event.ts = stats.frontendStart + stats.totalSource
     totalEvents.append(event)
     
+    event = makeDurEvent(tid, "Total CodeGen Function", stats.totalCodeGenFunction, total)
+    event.ts = stats.frontendStart + stats.totalSource + stats.totalInstantiateFunction
+    totalEvents.append(event)
+    
     // backend
     event = makeDurEvent(tid, "Total Backend", stats.totalBackend, total)
     event.ts = stats.backendStart

From 3d436f9dd0e9b3805b3384b7b10169323ebc8e16 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 18 Mar 2024 00:06:22 -0700
Subject: [PATCH 659/901] kram-profile - fix total report

---
 .../kram-profile/kram_profileApp.swift        | 48 ++++++++++++-------
 1 file changed, 31 insertions(+), 17 deletions(-)

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 59937398..82ac2149 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -985,21 +985,21 @@ func sortThreadsByName(_ catapultProfile: inout CatapultProfile) {
     
     var threads: [Int: [Int]] = [:]
     
-    // first sort each thread by
+    // first sort each thread
     for i in 0..<catapultProfile.traceEvents!.count {
         let event = catapultProfile.traceEvents![i]
         
         guard let tid = event.tid else { continue }
         if event.ts == nil || event.dur == nil { continue }
         
-        if event.name != nil && (event.name! == "thread_name" || event.name! == "process_name") {
+        if event.ph! == "M" && event.name != nil && (event.name! == "thread_name" || event.name! == "process_name") {
             continue
         }
         
         if threads[tid] == nil {
             threads[tid] = []
         }
-        // just store the even index
+        // just store the event index
         threads[tid]!.append(i)
     }
     
@@ -1040,7 +1040,7 @@ func sortThreadsByName(_ catapultProfile: inout CatapultProfile) {
     // have option to consolidate and rename, but must remove nodes
 }
 
-// parse json trace
+// these are per thread min/max for memory reports
 func updateThreadInfo(_ catapultProfile: CatapultProfile, _ file: inout File) {
     // was using Set<>, but having trouble with lookup
     var threadInfos: [Int: ThreadInfo] = [:]
@@ -1049,7 +1049,8 @@ func updateThreadInfo(_ catapultProfile: CatapultProfile, _ file: inout File) {
         let event = catapultProfile.traceEvents![i]
         
         // have to have tid to associate with ThreadInfo
-        guard let tid = event.tid else { continue }
+        guard let tid = event.tid, 
+              let phase = event.ph else { continue }
         
         if threadInfos[tid] == nil {
             let info = ThreadInfo()
@@ -1058,15 +1059,19 @@ func updateThreadInfo(_ catapultProfile: CatapultProfile, _ file: inout File) {
             threadInfos[tid] = info
         }
         
-        if event.name != nil && event.name! == "thread_name" {
-            let threadName = event.args!["name"]!.value as! String
-            threadInfos[tid]!.threadName = threadName
+        if phase == "M" {
+            if event.name != nil && event.name! == "thread_name" {
+                let threadName = event.args!["name"]!.value as! String
+                threadInfos[tid]!.threadName = threadName
+            }
         }
-        else if event.ts != nil && event.dur != nil {
-            let s = event.ts!
-            let d = event.dur!
-            
-            threadInfos[tid]!.combine(s, d, event.name)
+        else if phase == "X" {
+            if event.ts != nil && event.dur != nil {
+                let s = event.ts!
+                let d = event.dur!
+                
+                threadInfos[tid]!.combine(s, d, event.name)
+            }
         }
     }
     
@@ -1088,7 +1093,7 @@ func updateDuration(_ events: [CatapultEvent]) -> Double {
     for i in 0..<events.count {
         let event = events[i]
         
-        if event.ts != nil && event.dur != nil {
+        if event.ph != nil && event.ph! == "X" && event.ts != nil && event.dur != nil {
             let s = event.ts!
             let d = event.dur!
             
@@ -1453,7 +1458,7 @@ func convertStatsToTotalTrack(_ stats: BuildStats) -> [CatapultEvent] {
     
     var totalEvents: [CatapultEvent] = []
     
-    // This is really ugly, having these be a struct
+    // This is really ugly, change to using class?
     
     let tid = 0
     let trackEvent = CatapultEvent(tid: tid, threadName: "Build Totals")
@@ -1496,8 +1501,17 @@ func convertStatsToTotalTrack(_ stats: BuildStats) -> [CatapultEvent] {
     event.ts = stats.frontendStart + stats.totalSource
     totalEvents.append(event)
     
-    event = makeDurEvent(tid, "Total CodeGen Function", stats.totalCodeGenFunction, total)
-    event.ts = stats.frontendStart + stats.totalSource + stats.totalInstantiateFunction
+    
+    // This total can exceed when backend start, so clamp it too
+    let tsCodeGenFunction = stats.frontendStart + stats.totalSource + stats.totalInstantiateFunction
+    
+    var totalCodeGenFunction = stats.totalCodeGenFunction
+    if tsCodeGenFunction + totalCodeGenFunction > stats.backendStart {
+        totalCodeGenFunction = stats.backendStart - tsCodeGenFunction
+    }
+    
+    event = makeDurEvent(tid, "Total CodeGen Function", totalCodeGenFunction, total)
+    event.ts = tsCodeGenFunction
     totalEvents.append(event)
     
     // backend

From e4c6a4ebfa005743d6e6599d6f3d5006a559ffe5 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 18 Mar 2024 10:31:22 -0700
Subject: [PATCH 660/901] kram-profile - fix reload of raw file data

---
 kram-profile/kram-profile/File.swift          | 40 +++++++++++++------
 .../kram-profile/kram_profileApp.swift        |  6 +++
 2 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/kram-profile/kram-profile/File.swift b/kram-profile/kram-profile/File.swift
index 7b0995c6..5f122918 100644
--- a/kram-profile/kram-profile/File.swift
+++ b/kram-profile/kram-profile/File.swift
@@ -132,6 +132,25 @@ class File: Identifiable, Hashable, Equatable, Comparable
         return modStamp != loadStamp
     }
     
+    public func eraseFileContent() {
+        loadStamp = nil
+        fileContent = nil
+    }
+    
+    public func eraseCaches() {
+        duration = 0.0
+        
+        if fileType == .Build {
+            // for build fileType
+            buildTimings.removeAll()
+            buildStats = nil
+        }
+        else if fileType == .Memory {
+            // for memory fileType
+            threadInfo.removeAll()
+        }
+    }
+    
     public static func fileModificationDate(url: URL) -> Date? {
         do {
             let attr = try FileManager.default.attributesOfItem(atPath: url.path)
@@ -241,6 +260,11 @@ func lookupFile(url: URL) -> File {
     // This wipes the duration, so it can be recomputed
     // TODO: may want to check crc32 if present before wiping all data
     
+    if file.archive == nil {
+        file.eraseFileContent()
+        file.eraseCaches()
+    }
+    
     fileCache[file.url] = file
     
     return file
@@ -342,8 +366,7 @@ func lookupArchive(_ url: URL) -> Archive {
                 if !isNewEntryMissing && (oldEntry.crc32 == newEntry.crc32) {
                     
                     // erase fileContent since it may alias mmap going away
-                    file.loadStamp = nil
-                    file.fileContent = nil
+                    file.eraseFileContent()
                     
                     // keep any caches
                 }
@@ -352,17 +375,8 @@ func lookupArchive(_ url: URL) -> Archive {
                     file.loadStamp = nil
                     file.fileContent = nil
                     
-                    file.duration = 0.0
-                    
-                    if file.fileType == .Build {
-                        // for build fileType
-                        file.buildTimings.removeAll()
-                        file.buildStats = nil
-                    }
-                    else if file.fileType == .Memory {
-                        // for memory fileType
-                        file.threadInfo.removeAll()
-                    }
+                    file.eraseFileContent()
+                    file.eraseCaches()
                 }
             }
         }
diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 82ac2149..a6ce85c6 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -55,6 +55,9 @@ import UniformTypeIdentifiers
 // TODO: save/load the duration and modstamps for File at quit, and any other metadata (totals per section)
 // TODO: add jump to source/header, but path would need to be correct (sandbox block?)
 
+// TODO: look into fast crc32 ops on M1
+// https://dougallj.wordpress.com/2022/05/22/faster-crc32-on-the-apple-m1/
+
 // Build traces
 // DONE: OptFunction needs demangled.  All backend strings are still mangled.
 //  Don’t need the library CBA uses just use api::__cxa_demangle() on macOS.
@@ -542,6 +545,8 @@ func filenameToTimeRange(_ filename: String) -> TimeRange {
         case .Unknown: duration = 1.0
     }
     
+    duration = 10.0
+    
     return TimeRange(timeStart:0.0, timeEnd:duration)
 }
 
@@ -566,6 +571,7 @@ func buildTimeRangeJson(_ timeRange:TimeRange) -> String? {
     let script = """
         var objTime = {
             perfetto:{
+                keepApiOpen: true,
                 timeStart:\(timeStartInt)n,
                 timeEnd:\(timeEndInt)n,
                 viewPercentage:\(timeRange.viewPercentage)

From 49dacbeff23c3cd673b0a06294ca7cdbc35f2b9b Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 18 Mar 2024 13:21:18 -0700
Subject: [PATCH 661/901] kram-profile - buildStats should be optional

---
 kram-profile/kram-profile/File.swift            | 2 +-
 kram-profile/kram-profile/kram_profileApp.swift | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/kram-profile/kram-profile/File.swift b/kram-profile/kram-profile/File.swift
index 5f122918..54b0d7bb 100644
--- a/kram-profile/kram-profile/File.swift
+++ b/kram-profile/kram-profile/File.swift
@@ -98,7 +98,7 @@ class File: Identifiable, Hashable, Equatable, Comparable
     
     // This is only updated for Build fileType
     var buildTimings: [String:BuildTiming] = [:]
-    var buildStats: BuildStats!
+    var buildStats: BuildStats?
     
     // only available for memory file type right now
     var threadInfo = ""
diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index a6ce85c6..11d7c2be 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -842,7 +842,9 @@ func postBuildTimingsReport(files: [File]) -> String? {
 func mergeFileBuildStats(files: [File]) -> BuildStats {
     let buildStats = BuildStats()
     for file in files {
-        buildStats.combine(file.buildStats)
+        if file.buildStats != nil {
+            buildStats.combine(file.buildStats!)
+        }
     }
     
     buildStats.frontendStart = 0

From ac02908760b104c1b9fa9a74529d9f1fe734b806 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 18 Mar 2024 15:23:14 -0700
Subject: [PATCH 662/901] kram-profile - split update from lookup

---
 kram-profile/kram-profile/File.swift          | 21 ++++++++++++++-----
 .../kram-profile/kram_profileApp.swift        | 21 +++++++------------
 2 files changed, 24 insertions(+), 18 deletions(-)

diff --git a/kram-profile/kram-profile/File.swift b/kram-profile/kram-profile/File.swift
index 54b0d7bb..4730993c 100644
--- a/kram-profile/kram-profile/File.swift
+++ b/kram-profile/kram-profile/File.swift
@@ -133,8 +133,11 @@ class File: Identifiable, Hashable, Equatable, Comparable
     }
     
     public func eraseFileContent() {
-        loadStamp = nil
+        // fileContent should get reloaded
         fileContent = nil
+        
+        // Perfetto should reload the fileContent
+        loadStamp = nil
     }
     
     public func eraseCaches() {
@@ -245,7 +248,7 @@ var droppedFileCache : [URL] = []
 // Flattened list of supported files from folders and archives
 var fileCache : [URL:File] = [:]
 
-func lookupFile(url: URL) -> File {
+func updateFile(url: URL) -> File {
     let file = File(url:url)
     
     // This preseves the duration previously parsed and stored
@@ -270,6 +273,14 @@ func lookupFile(url: URL) -> File {
     return file
 }
 
+func lookupFile(url: URL) -> File {
+    let file = File(url:url)
+    if let fileOld = fileCache[file.url] {
+        return fileOld
+    }
+    return file
+}
+
 func lookupFile(selection: String) -> File {
     return lookupFile(url:URL(string:selection)!)
 }
@@ -483,7 +494,7 @@ func listFilesFromArchive(_ urlArchive: URL) -> [File] {
             
         // TODO: archives don't have full paths, so lookup can get confused
         // if there are multiple archives with same paths.
-        let file = lookupFile(url:url)
+        let file = updateFile(url:url)
         if file.archive != archive {
             file.archive = archive
         }
@@ -519,7 +530,7 @@ func listFilesFromURLs(_ urls: [URL]) -> [File]
                        files += listFilesFromArchive(fileURL)
                     }
                     else {
-                        files.append(lookupFile(url:fileURL));
+                        files.append(updateFile(url:fileURL));
                     }
                 }
             }
@@ -532,7 +543,7 @@ func listFilesFromURLs(_ urls: [URL]) -> [File]
                     files += listFilesFromArchive(url)
                 }
                 else {
-                    files.append(lookupFile(url:url))
+                    files.append(updateFile(url:url))
                 }
             }
         }
diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 11d7c2be..8c7c0391 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -1049,7 +1049,7 @@ func sortThreadsByName(_ catapultProfile: inout CatapultProfile) {
 }
 
 // these are per thread min/max for memory reports
-func updateThreadInfo(_ catapultProfile: CatapultProfile, _ file: inout File) {
+func updateThreadInfo(_ catapultProfile: CatapultProfile, _ file: File) {
     // was using Set<>, but having trouble with lookup
     var threadInfos: [Int: ThreadInfo] = [:]
     
@@ -1562,14 +1562,7 @@ func convertStatsToTotalTrack(_ stats: BuildStats) -> [CatapultEvent] {
     return totalEvents
 }
 
-func loadFileJS(_ path: String) -> String? {
-    
-    let fileURL = URL(string: path)!
-    
-    // Note may need to modify directly
-    var file = lookupFile(url: fileURL)
-    
-    log.debug(path)
+func loadFileJS(_ file: File) -> String? {
     
     do {
         // use this for binary data, but need to fixup some json before it's sent
@@ -1621,7 +1614,7 @@ func loadFileJS(_ path: String) -> String? {
                     
                     // For now, just log the per-thread info
                     if file.fileType == .Memory {
-                        updateThreadInfo(catapultProfile, &file)
+                        updateThreadInfo(catapultProfile, file)
                     }
                     
                     // This mods the catapult profile to store parentIndex and durSub
@@ -1723,7 +1716,7 @@ func loadFileJS(_ path: String) -> String? {
             fileContentBase64 = compressedData.base64EncodedString()
         }
         
-        return postLoadFileJS(fileContentBase64: fileContentBase64, title:fileURL.lastPathComponent)
+        return postLoadFileJS(fileContentBase64: fileContentBase64, title:file.name)
     }
     catch {
         log.error(error.localizedDescription)
@@ -1968,17 +1961,19 @@ struct kram_profileApp: App {
     func openFileSelection(_ webView: WKWebView) {
         if let sel = selection {
             
+            let file = lookupFile(selection: sel)
+            
             // This should only reload if selection previously loaded
             // to a valid file, or if modstamp changed on current selection
             
             // TODO: fix this
             let objTimeScript: String? = nil // buildTimeRangeJson(filenameToTimeRange(sel))
             
-            var str = loadFileJS(sel)
+            var str = loadFileJS(file)
             if str != nil {
                 runJavascript(webView, str!)
                 
-                let file = lookupFile(selection: sel)
+                // This means Perfetto UI loaded the fileContent, not that fileContent was loaded
                 file.setLoadStamp()
             }
             

From cccf986a9cf00a26cf0eba9861890aad3f258273 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 19 Mar 2024 10:19:19 -0700
Subject: [PATCH 663/901] kram-profile - drop < 1% from report.

---
 .../kram-profile/kram_profileApp.swift        | 41 ++++++++++++++++---
 1 file changed, 36 insertions(+), 5 deletions(-)

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 8c7c0391..c15487da 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -51,6 +51,11 @@ import UniformTypeIdentifiers
 // DONE: add/update recent document list (need to hold onto dropped/opened folder)
 // DONE: can't mmap web link, but can load zip off server with timings
 
+// DONE: drop anything in the build report < 1% in one sub-track
+// Could display on subtract but would have to fit under the parent timing (but should).
+// Could stop when remaining total child time < parent.
+// This stops the long tail.  So just total the results, and start setting ts once reached.
+
 // TODO: run cba on files, mmap and decompress each can use incremental mode?
 // TODO: save/load the duration and modstamps for File at quit, and any other metadata (totals per section)
 // TODO: add jump to source/header, but path would need to be correct (sandbox block?)
@@ -277,11 +282,12 @@ extension String.StringInterpolation {
     }
 
     /// Formats the *elapsed time* since the specified start time.
-    mutating func appendInterpolation(timeSince startTime: TimeInterval, decimals: UInt = 2) {
-        let elapsedTime = CACurrentMediaTime() - startTime
-        let elapsedTimeDescription = String(format: "%.\(decimals)fs", elapsedTime)
-        appendLiteral(elapsedTimeDescription)
-    }
+// don't use logging for profiling
+//    mutating func appendInterpolation(timeSince startTime: TimeInterval, decimals: UInt = 2) {
+//        let elapsedTime = CACurrentMediaTime() - startTime
+//        let elapsedTimeDescription = String(format: "%.\(decimals)fs", elapsedTime)
+//        appendLiteral(elapsedTimeDescription)
+//    }
 }
 
 /* usage
@@ -896,6 +902,23 @@ func generateBuildReport(buildTimings: [String:BuildTiming], buildStats: BuildSt
         }
     }
     
+    var parseTiming = 0
+    var optimizeTiming = 0
+    
+    for t in buildTimings.values {
+        let isHeader = t.type == "Source"
+        
+        if isHeader {
+            parseTiming += t.duration
+        }
+        else {
+            optimizeTiming += t.duration
+        }
+    }
+    
+    let parseTimingInv = 1.0 / Double(parseTiming)
+    let optimizeTimingInv = 1.0 / Double(optimizeTiming)
+    
     for buildTiming in buildTimings {
         let t = buildTiming.value
         
@@ -912,6 +935,10 @@ func generateBuildReport(buildTimings: [String:BuildTiming], buildStats: BuildSt
         
         // add count in seconds, so can view sorted by count below the duration above
         if isHeader {
+            // for now skip small contributions
+            let percent = Double(t.duration) * parseTimingInv
+            if percent < 0.01 { continue }
+            
             event.name = "\(shortFilename) \(t.count)x \(double: dur, decimals:2, zero: false)s"
             
             // ParseTime
@@ -934,6 +961,10 @@ func generateBuildReport(buildTimings: [String:BuildTiming], buildStats: BuildSt
             }
         }
         else {
+            // for now skip small contributions
+            let percent = Double(t.duration) * optimizeTimingInv
+            if percent < 0.01 { continue }
+            
             event.name = "\(shortFilename) \(double: dur, decimals:2, zero: false)s"
             
             // OptimizeTime

From 8f893c08e19faf1895b9966aeb7161e1d48d0876 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 20 Mar 2024 22:58:23 -0700
Subject: [PATCH 664/901] kram-profile - fix demangle crash on Win clang-cli
 symbols

No idea why every platform but Microsoft can't use the same mangled names.  Really clang should demangle these on a per-platform basis so the tools don't have to.  Note that CBA has a Microsoft demangle that could correct this, but would need try both on failure.
---
 kram-profile/Source/KramZipHelper.cpp           | 10 ++++++++--
 kram-profile/Source/KramZipHelperW.h            |  2 +-
 kram-profile/kram-profile/kram_profileApp.swift | 16 ++++++++++------
 3 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/kram-profile/Source/KramZipHelper.cpp b/kram-profile/Source/KramZipHelper.cpp
index 42c175e9..f653ef29 100644
--- a/kram-profile/Source/KramZipHelper.cpp
+++ b/kram-profile/Source/KramZipHelper.cpp
@@ -25,7 +25,7 @@
 #include <mutex>
 #endif
 
-extern "C" const char* _Nonnull demangleSymbolName(const char* _Nonnull symbolName_) {
+extern "C" const char* _Nullable demangleSymbolName(const char* _Nonnull symbolName_) {
     using namespace NAMESPACE_STL;
     
     // serialize to multiple threads
@@ -53,7 +53,13 @@ extern "C" const char* _Nonnull demangleSymbolName(const char* _Nonnull symbolNa
     }
     else {
         // This will do repeated demangle though.  Maybe should add to table?
-        result = symbolName_;
+        // Swift fails when returning back the string it marshalled back to stuff back
+        // into String(cstring: ...).   Ugh.  So return empty string.
+        // status = -2 on most of the mangled Win clang-cli symbols.  Nice one
+        // Microsoft.
+        //result = symbolName_;
+        
+        result = nullptr;
     }
     
     return result;
diff --git a/kram-profile/Source/KramZipHelperW.h b/kram-profile/Source/KramZipHelperW.h
index bb5659b7..304ad783 100644
--- a/kram-profile/Source/KramZipHelperW.h
+++ b/kram-profile/Source/KramZipHelperW.h
@@ -36,5 +36,5 @@ typedef struct ZipEntryW {
 @end
 
 // This is only needed for OptFunction and backend names
-const char* _Nonnull demangleSymbolName(const char* _Nonnull symbolName_);
+const char* _Nullable demangleSymbolName(const char* _Nonnull symbolName_);
 
diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index c15487da..815f1680 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -1346,9 +1346,11 @@ func updateBuildTimingTask(_ file: File) throws {
         let event = events[i]
         if event.name == "OptFunction" {
             let detail = event.args!["detail"]!.value as! String
-            let symbolName = String(cString: demangleSymbolName(detail))
-            
-            events[i].args!["detail"] = AnyCodable(symbolName)
+            if let demangledName = demangleSymbolName(detail) {
+                let symbolName = String(cString: demangledName)
+                
+                events[i].args!["detail"] = AnyCodable(symbolName)
+            }
         }
     }
     
@@ -1692,9 +1694,11 @@ func loadFileJS(_ file: File) -> String? {
                 let event = catapultProfile.traceEvents![i]
                 if event.name == "OptFunction" {
                     let detail = event.args!["detail"]!.value as! String
-                    let symbolName = String(cString: demangleSymbolName(detail))
-                    
-                    catapultProfile.traceEvents![i].args!["detail"] = AnyCodable(symbolName)
+                    if let demangledName = demangleSymbolName(detail) {
+                        let symbolName = String(cString: demangledName)
+                        
+                        catapultProfile.traceEvents![i].args!["detail"] = AnyCodable(symbolName)
+                    }
                 }
             }
             

From be60a10d7b28dcc8d90152f20434d0b8799db502 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 23 Mar 2024 21:01:37 -0700
Subject: [PATCH 665/901] kram-profile - add in CBA support

The profiler just collects loaded files (as NSData) and then sends them to CBA to generate the report.  Note the code runs json parsing on all the files, and demangle OptFunction again, and then prints the results to the console via fprintf.   Will tie to some UI or report.
---
 kram-profile/CBA/Analysis.cpp                 |   618 +
 kram-profile/CBA/Analysis.h                   |     8 +
 kram-profile/CBA/Arena.cpp                    |    49 +
 kram-profile/CBA/Arena.h                      |     7 +
 kram-profile/CBA/BuildEvents.cpp              |   669 +
 kram-profile/CBA/BuildEvents.h                |   109 +
 kram-profile/CBA/CBA.h                        |     6 +
 kram-profile/CBA/CBA.mm                       |    60 +
 kram-profile/CBA/Utils.cpp                    |    91 +
 kram-profile/CBA/Utils.h                      |    18 +
 kram-profile/CBA/simdjson.cpp                 | 15984 ++++++++
 kram-profile/CBA/simdjson.h                   | 31622 ++++++++++++++++
 .../Source/kram-profile-Bridging-Header.h     |     1 +
 .../kram-profile.xcodeproj/project.pbxproj    |    44 +
 .../kram-profile/kram_profileApp.swift        |    23 +
 15 files changed, 49309 insertions(+)
 create mode 100755 kram-profile/CBA/Analysis.cpp
 create mode 100755 kram-profile/CBA/Analysis.h
 create mode 100755 kram-profile/CBA/Arena.cpp
 create mode 100755 kram-profile/CBA/Arena.h
 create mode 100755 kram-profile/CBA/BuildEvents.cpp
 create mode 100755 kram-profile/CBA/BuildEvents.h
 create mode 100644 kram-profile/CBA/CBA.h
 create mode 100644 kram-profile/CBA/CBA.mm
 create mode 100755 kram-profile/CBA/Utils.cpp
 create mode 100755 kram-profile/CBA/Utils.h
 create mode 100644 kram-profile/CBA/simdjson.cpp
 create mode 100644 kram-profile/CBA/simdjson.h

diff --git a/kram-profile/CBA/Analysis.cpp b/kram-profile/CBA/Analysis.cpp
new file mode 100755
index 00000000..216eed1f
--- /dev/null
+++ b/kram-profile/CBA/Analysis.cpp
@@ -0,0 +1,618 @@
+// Clang Build Analyzer https://github.com/aras-p/ClangBuildAnalyzer
+// SPDX-License-Identifier: Unlicense
+
+#ifdef _MSC_VER
+struct IUnknown; // workaround for old Win SDK header failures when using /permissive-
+#endif
+#define NOMINMAX
+
+#include "Analysis.h"
+#include "Arena.h"
+//#include "Colors.h"
+#include "Utils.h"
+//#include "external/flat_hash_map/bytell_hash_map.hpp"
+//#include "external/inih/cpp/INIReader.h"
+#include <unordered_map>
+#include <algorithm>
+#include <assert.h>
+#include <string>
+#include <string.h>
+#include <map>
+#include <vector>
+
+namespace col
+{
+    const char* kBold = "";
+    const char* kRed = "";
+    const char* kGreen = "";
+    const char* kYellow = "";
+    const char* kBlue = "";
+    const char* kMagenta = "";
+    const char* kCyan = "";
+    const char* kWhite = "";
+    const char* kReset = "";
+
+    /* not using
+    kBold = "\x1B[1m";
+    kRed = "\x1B[91m";
+    kGreen = "\x1B[32m";
+    kYellow = "\x1B[33m";
+    kBlue = "\x1B[34m";
+    kMagenta = "\x1B[35m";
+    kCyan = "\x1B[36m";
+    kWhite = "\x1B[37m";
+    kReset = "\x1B[0m";
+    */
+}
+
+struct Config
+{
+    int fileParseCount = 10;
+    int fileCodegenCount = 10;
+    int templateCount = 30;
+    int functionCount = 30;
+    int headerCount = 10;
+    int headerChainCount = 5;
+
+    int minFileTime = 10;
+
+    int maxName = 70;
+
+    bool onlyRootHeaders = true;
+};
+
+struct pair_hash
+{
+    template <class T1, class T2>
+    std::size_t operator () (const std::pair<T1,T2>& p) const
+    {
+        auto h1 = std::hash<T1>{}(p.first);
+        auto h2 = std::hash<T2>{}(p.second);
+        return h1 + 0x9e3779b9 + (h2<<6) + (h2>>2);
+    }
+};
+
+
+struct Analysis
+{
+    Analysis(const BuildEvents& events_, BuildNames& buildNames_, FILE* out_)
+    : events(events_)
+    , buildNames(buildNames_)
+    , out(out_)
+    {
+        functions.reserve(256);
+        instantiations.reserve(256);
+        parseFiles.reserve(64);
+        codegenFiles.reserve(64);
+        headerMap.reserve(256);
+    }
+
+    const BuildEvents& events;
+    BuildNames& buildNames;
+
+    FILE* out;
+
+    std::string_view GetBuildName(DetailIndex index)
+    {
+        assert(index.idx >= 0);
+        assert(index.idx < static_cast<int>(buildNames.size()));
+
+        return buildNames[index];
+    }
+
+    void ProcessEvent(EventIndex eventIndex);
+    int largestDetailIndex = 0;
+    void EndAnalysis();
+
+    void FindExpensiveHeaders();
+    void ReadConfig();
+
+    DetailIndex FindPath(EventIndex eventIndex) const;
+
+    struct InstantiateEntry
+    {
+        int count = 0;
+        int64_t us = 0;
+    };
+    struct FileEntry
+    {
+        DetailIndex file;
+        int64_t us;
+    };
+    struct IncludeChain
+    {
+        std::vector<DetailIndex> files;
+        int64_t us = 0;
+        int count = 0;
+    };
+    struct IncludeEntry
+    {
+        int64_t us = 0;
+        int count = 0;
+        bool root = false;
+        std::vector<IncludeChain> includePaths;
+    };
+
+    std::unordered_map<DetailIndex, std::string_view> collapsedNames;
+    std::string_view GetCollapsedName(DetailIndex idx);
+    void EmitCollapsedTemplates();
+    void EmitCollapsedTemplateOpt();
+    void EmitCollapsedInfo(
+        const std::unordered_map<std::string_view, InstantiateEntry> &collapsed,
+        const char *header_string);
+
+    // key is (name,objfile), value is milliseconds
+    typedef std::pair<DetailIndex, DetailIndex> IndexPair;
+    std::unordered_map<IndexPair, int64_t, pair_hash> functions;
+    std::unordered_map<EventIndex, InstantiateEntry> instantiations;
+    std::vector<FileEntry> parseFiles;
+    std::vector<FileEntry> codegenFiles;
+    int64_t totalParseUs = 0;
+    int64_t totalCodegenUs = 0;
+    int totalParseCount = 0;
+
+    std::unordered_map<std::string_view, IncludeEntry> headerMap;
+    std::vector<std::pair<std::string_view, int64_t>> expensiveHeaders;
+
+    Config config;
+};
+
+DetailIndex Analysis::FindPath(EventIndex eventIndex) const
+{
+    while(eventIndex > EventIndex())
+    {
+        const BuildEvent& ev = events[eventIndex];
+        if (ev.type == BuildEventType::kCompiler || ev.type == BuildEventType::kFrontend || ev.type == BuildEventType::kBackend || ev.type == BuildEventType::kOptModule)
+            if (ev.detailIndex != DetailIndex())
+                return ev.detailIndex;
+        eventIndex = ev.parent;
+    }
+    return DetailIndex();
+}
+
+void Analysis::ProcessEvent(EventIndex eventIndex)
+{
+    const BuildEvent& event = events[eventIndex];
+    largestDetailIndex = (std::max)(largestDetailIndex, event.detailIndex.idx);
+
+    if (event.type == BuildEventType::kOptFunction)
+    {
+        auto funKey = std::make_pair(event.detailIndex, FindPath(eventIndex));
+        functions[funKey] += event.dur;
+    }
+
+    if (event.type == BuildEventType::kInstantiateClass || event.type == BuildEventType::kInstantiateFunction)
+    {
+        auto& e = instantiations[eventIndex];
+        ++e.count;
+        e.us += event.dur;
+    }
+
+    if (event.type == BuildEventType::kFrontend)
+    {
+        totalParseUs += event.dur;
+        ++totalParseCount;
+        if (event.dur >= config.minFileTime * 1000)
+        {
+            FileEntry fe;
+            fe.file = FindPath(eventIndex);
+            fe.us = event.dur;
+            parseFiles.emplace_back(fe);
+        }
+    }
+    if (event.type == BuildEventType::kBackend)
+    {
+        totalCodegenUs += event.dur;
+        if (event.dur >= config.minFileTime * 1000)
+        {
+            FileEntry fe;
+            fe.file = FindPath(eventIndex);
+            fe.us = event.dur;
+            codegenFiles.emplace_back(fe);
+        }
+    }
+    if (event.type == BuildEventType::kParseFile)
+    {
+        std::string_view path = GetBuildName(event.detailIndex);
+        if (utils::IsHeader(path))
+        {
+            IncludeEntry& e = headerMap[path];
+            e.us += event.dur;
+            ++e.count;
+
+            // record chain of ParseFile entries leading up to this one
+            IncludeChain chain;
+            chain.us = event.dur;
+            EventIndex parseIndex = event.parent;
+            bool hasHeaderBefore = false;
+            while(parseIndex.idx >= 0)
+            {
+                const BuildEvent& ev2 = events[parseIndex];
+                if (ev2.type != BuildEventType::kParseFile)
+                    break;
+                std::string_view ev2path = GetBuildName(ev2.detailIndex);
+                bool isHeader = utils::IsHeader(ev2path);
+                if (!isHeader)
+                    break;
+                chain.files.push_back(ev2.detailIndex);
+                hasHeaderBefore |= isHeader;
+                parseIndex = ev2.parent;
+            }
+
+            e.root |= !hasHeaderBefore;
+            e.includePaths.push_back(chain);
+        }
+    }
+}
+
+static std::string_view CollapseName(const std::string_view& elt)
+{
+    // Parsing op<, op<<, op>, and op>> seems hard.  Just skip'm all
+    if (elt.find("operator") != std::string::npos)
+        return elt;
+
+    std::string retval;
+    retval.reserve(elt.size());
+    auto b_range = elt.begin();
+    auto e_range = elt.begin();
+    while (b_range != elt.end())
+    {
+       e_range = std::find(b_range, elt.end(), '<');
+        if (e_range == elt.end())
+            break;
+        ++e_range;
+        retval.append(b_range, e_range);
+        retval.append("$");
+        b_range = e_range;
+        int open_count = 1;
+        // find the matching close angle bracket
+        for (; b_range != elt.end(); ++b_range)
+        {
+            if (*b_range == '<')
+            {
+                ++open_count;
+                continue;
+            }
+            if (*b_range == '>')
+            {
+                if (--open_count == 0)
+                {
+                    break;
+                }
+                continue;
+            }
+        }
+        // b_range is now pointing at a close angle, or it is at the end of the string
+    }
+    if (b_range > e_range)
+    {
+       // we are in a wacky case where something like op> showed up in a mangled name.
+       // just bail.
+       // TODO: this still isn't correct, but it avoids crashes.
+       return elt;
+    }
+    // append the footer
+    retval.append(b_range, e_range);
+
+    size_t size = retval.size();
+    char* ptr = (char*)ArenaAllocate(size+1);
+    memcpy(ptr, retval.c_str(), size+1);
+    return std::string_view(ptr, size);
+}
+
+std::string_view Analysis::GetCollapsedName(DetailIndex detail)
+{
+    std::string_view& name = collapsedNames[detail];
+    if (name.empty())
+        name = CollapseName(GetBuildName(detail));
+    return name;
+}
+
+void Analysis::EmitCollapsedInfo(
+    const std::unordered_map<std::string_view, InstantiateEntry> &collapsed,
+    const char *header_string)
+{
+    std::vector<std::pair<std::string, InstantiateEntry>> sorted_collapsed;
+    sorted_collapsed.resize(std::min<size_t>(config.templateCount, collapsed.size()));
+    auto cmp = [](const auto &lhs, const auto &rhs) {
+        return std::tie(lhs.second.us, lhs.second.count, lhs.first) > std::tie(rhs.second.us, rhs.second.count, rhs.first);
+    };
+    std::partial_sort_copy(
+        collapsed.begin(), collapsed.end(),
+        sorted_collapsed.begin(), sorted_collapsed.end(),
+        cmp);
+
+    fprintf(out, "%s%s**** %s%s:\n", col::kBold, col::kMagenta, header_string, col::kReset);
+    for (const auto &elt : sorted_collapsed)
+    {
+        std::string dname = elt.first;
+        if (static_cast<int>(dname.size()) > config.maxName)
+            dname = dname.substr(0, config.maxName - 2) + "...";
+        int ms = int(elt.second.us / 1000);
+        int avg = int(ms / elt.second.count);
+        fprintf(out, "%s%6i%s ms: %s (%i times, avg %i ms)\n", col::kBold, ms, col::kReset, dname.c_str(), elt.second.count, avg);
+    }
+    fprintf(out, "\n");
+}
+void Analysis::EmitCollapsedTemplates()
+{
+    std::unordered_map<std::string_view, InstantiateEntry> collapsed;
+    for (const auto& inst : instantiations)
+    {
+        const std::string_view name = GetCollapsedName(events[inst.first].detailIndex);
+        auto &stats = collapsed[name];
+
+        bool recursive = false;
+        EventIndex p = events[inst.first].parent;
+        while (p != EventIndex(-1))
+        {
+            auto &event = events[p];
+            if (event.type == BuildEventType::kInstantiateClass || event.type == BuildEventType::kInstantiateFunction)
+            {
+                const std::string_view ancestor_name = GetCollapsedName(event.detailIndex);
+                if (ancestor_name == name)
+                {
+                    recursive = true;
+                    break;
+                }
+            }
+            p = event.parent;
+        }
+        if (!recursive)
+        {
+            stats.us += inst.second.us;
+            stats.count += inst.second.count;
+        }
+    }
+    EmitCollapsedInfo(collapsed, "Template sets that took longest to instantiate");
+}
+
+void Analysis::EmitCollapsedTemplateOpt()
+{
+    std::unordered_map<std::string_view, InstantiateEntry> collapsed;
+    for (const auto& fn : functions)
+    {
+        auto fnNameIndex = fn.first.first;
+        const std::string_view fnName = GetBuildName(fnNameIndex);
+        // if we're not related to templates at all, skip
+        if (fnName.find('<') == std::string::npos)
+            continue;
+
+        auto &stats = collapsed[GetCollapsedName(fnNameIndex)];
+        ++stats.count;
+        stats.us += fn.second;
+    }
+    EmitCollapsedInfo(collapsed, "Function sets that took longest to compile / optimize");
+}
+
+void Analysis::EndAnalysis()
+{
+    if (totalParseUs || totalCodegenUs)
+    {
+        fprintf(out, "%s%s**** Time summary%s:\n", col::kBold, col::kMagenta, col::kReset);
+        fprintf(out, "Compilation (%i times):\n", totalParseCount);
+        fprintf(out, "  Parsing (frontend):        %s%7.1f%s s\n", col::kBold, static_cast<double>(totalParseUs) / 1000000.0, col::kReset);
+        fprintf(out, "  Codegen & opts (backend):  %s%7.1f%s s\n", col::kBold, static_cast<double>(totalCodegenUs) / 1000000.0, col::kReset);
+        fprintf(out, "\n");
+    }
+
+    if (!parseFiles.empty())
+    {
+        std::vector<int> indices;
+        indices.resize(parseFiles.size());
+        for (size_t i = 0; i < indices.size(); ++i)
+            indices[i] = int(i);
+        std::sort(indices.begin(), indices.end(), [&](int indexA, int indexB) {
+            const auto& a = parseFiles[indexA];
+            const auto& b = parseFiles[indexB];
+            if (a.us != b.us)
+                return a.us > b.us;
+            return GetBuildName(a.file) < GetBuildName(b.file);
+            });
+        fprintf(out, "%s%s**** Files that took longest to parse (compiler frontend)%s:\n", col::kBold, col::kMagenta, col::kReset);
+        for (size_t i = 0, n = std::min<size_t>(config.fileParseCount, indices.size()); i != n; ++i)
+        {
+            const auto& e = parseFiles[indices[i]];
+            fprintf(out, "%s%6i%s ms: %s\n", col::kBold, int(e.us/1000), col::kReset, GetBuildName(e.file).data());
+        }
+        fprintf(out, "\n");
+    }
+    if (!codegenFiles.empty())
+    {
+        std::vector<int> indices;
+        indices.resize(codegenFiles.size());
+        for (size_t i = 0; i < indices.size(); ++i)
+            indices[i] = int(i);
+        std::sort(indices.begin(), indices.end(), [&](int indexA, int indexB) {
+            const auto& a = codegenFiles[indexA];
+            const auto& b = codegenFiles[indexB];
+            if (a.us != b.us)
+                return a.us > b.us;
+            return GetBuildName(a.file) < GetBuildName(b.file);
+            });
+        fprintf(out, "%s%s**** Files that took longest to codegen (compiler backend)%s:\n", col::kBold, col::kMagenta, col::kReset);
+        for (size_t i = 0, n = std::min<size_t>(config.fileCodegenCount, indices.size()); i != n; ++i)
+        {
+            const auto& e = codegenFiles[indices[i]];
+            fprintf(out, "%s%6i%s ms: %s\n", col::kBold, int(e.us/1000), col::kReset, GetBuildName(e.file).data());
+        }
+        fprintf(out, "\n");
+    }
+
+    if (!instantiations.empty())
+    {
+        std::vector<std::pair<DetailIndex, InstantiateEntry>> instArray;
+        instArray.resize(largestDetailIndex+1);
+        for (const auto& inst : instantiations) //collapse the events
+        {
+            DetailIndex d = events[inst.first].detailIndex;
+            instArray[d.idx].first = d;
+            instArray[d.idx].second.us += inst.second.us;
+            instArray[d.idx].second.count += inst.second.count;
+        }
+        size_t n = std::min<size_t>(config.templateCount, instArray.size());
+        auto cmp = [&](const auto&a, const auto &b) {
+            return
+                std::tie(a.second.us, a.second.count, a.first) >
+                std::tie(b.second.us, b.second.count, b.first);
+        };
+        std::partial_sort(instArray.begin(), instArray.begin()+n, instArray.end(), cmp);
+        fprintf(out, "%s%s**** Templates that took longest to instantiate%s:\n", col::kBold, col::kMagenta, col::kReset);
+        for (size_t i = 0; i != n; ++i)
+        {
+            const auto& e = instArray[i];
+            std::string dname = std::string(GetBuildName(e.first));
+            if (static_cast<int>(dname.size()) > config.maxName)
+                dname = dname.substr(0, config.maxName-2) + "...";
+            int ms = int(e.second.us / 1000);
+            int avg = int(ms / std::max(e.second.count,1));
+            fprintf(out, "%s%6i%s ms: %s (%i times, avg %i ms)\n", col::kBold, ms, col::kReset, dname.c_str(), e.second.count, avg);
+        }
+        fprintf(out, "\n");
+
+        EmitCollapsedTemplates();
+    }
+
+    if (!functions.empty())
+    {
+        std::vector<std::pair<IndexPair, int64_t>> functionsArray;
+        std::vector<int> indices;
+        functionsArray.reserve(functions.size());
+        indices.reserve(functions.size());
+        for (const auto& fn : functions)
+        {
+            functionsArray.emplace_back(fn);
+            indices.emplace_back((int)indices.size());
+        }
+
+        std::sort(indices.begin(), indices.end(), [&](int indexA, int indexB) {
+            const auto& a = functionsArray[indexA];
+            const auto& b = functionsArray[indexB];
+            if (a.second != b.second)
+                return a.second > b.second;
+            return GetBuildName(a.first.first) < GetBuildName(b.first.first);
+            });
+        fprintf(out, "%s%s**** Functions that took longest to compile%s:\n", col::kBold, col::kMagenta, col::kReset);
+        for (size_t i = 0, n = std::min<size_t>(config.functionCount, indices.size()); i != n; ++i)
+        {
+            const auto& e = functionsArray[indices[i]];
+            std::string dname = std::string(GetBuildName(e.first.first));
+            if (static_cast<int>(dname.size()) > config.maxName)
+                dname = dname.substr(0, config.maxName-2) + "...";
+            int ms = int(e.second / 1000);
+            fprintf(out, "%s%6i%s ms: %s (%s)\n", col::kBold, ms, col::kReset, dname.c_str(), GetBuildName(e.first.second).data());
+        }
+        fprintf(out, "\n");
+        EmitCollapsedTemplateOpt();
+    }
+
+    FindExpensiveHeaders();
+
+    if (!expensiveHeaders.empty())
+    {
+        fprintf(out, "%s%s**** Expensive headers%s:\n", col::kBold, col::kMagenta, col::kReset);
+        for (const auto& e : expensiveHeaders)
+        {
+            const auto& es = headerMap[e.first];
+            int ms = int(e.second / 1000);
+            int avg = ms / es.count;
+            fprintf(out, "%s%i%s ms: %s%s%s (included %i times, avg %i ms), included via:\n", col::kBold, ms, col::kReset, col::kBold, e.first.data(), col::kReset, es.count, avg);
+            int pathCount = 0;
+
+            // print most costly include chains
+            // merge identical include chains, recording their (count, totalTimeUs)
+            std::map<std::vector<DetailIndex>, std::pair<int,int64_t>> mergedIncludeChains;
+            for (const auto& chain : es.includePaths)
+            {
+                auto& dst = mergedIncludeChains[chain.files];
+                dst.first++;
+                dst.second += chain.us;
+            }
+            std::vector<IncludeChain> sortedIncludeChains;
+            sortedIncludeChains.reserve(mergedIncludeChains.size());
+            for (const auto& chain : mergedIncludeChains)
+            {
+                IncludeChain dst;
+                dst.files = chain.first;
+                dst.count = chain.second.first;
+                dst.us = chain.second.second;
+                sortedIncludeChains.emplace_back(dst);
+            }
+            std::sort(sortedIncludeChains.begin(), sortedIncludeChains.end(), [](const auto& a, const auto& b)
+            {
+                if (a.count != b.count)
+                    return a.count > b.count;
+                if (a.us != b.us)
+                    return a.us > b.us;
+                return a.files < b.files;
+            });
+            for (const auto& chain : sortedIncludeChains)
+            {
+                fprintf(out, "  %ix: ", chain.count);
+                for (auto it = chain.files.rbegin(), itEnd = chain.files.rend(); it != itEnd; ++it)
+                {
+                    fprintf(out, "%s ", utils::GetFilename(GetBuildName(*it)).data());
+                }
+                if (chain.files.empty())
+                    fprintf(out, "<direct include>");
+                fprintf(out, "\n");
+                ++pathCount;
+                if (pathCount > config.headerChainCount)
+                    break;
+            }
+            if (pathCount > config.headerChainCount)
+            {
+                fprintf(out, "  ...\n");
+            }
+
+            fprintf(out, "\n");
+        }
+    }
+}
+
+void Analysis::FindExpensiveHeaders()
+{
+    expensiveHeaders.reserve(headerMap.size());
+    for (const auto& kvp : headerMap)
+    {
+        if (config.onlyRootHeaders && !kvp.second.root)
+            continue;
+        expensiveHeaders.push_back(std::make_pair(kvp.first, kvp.second.us));
+    }
+    std::sort(expensiveHeaders.begin(), expensiveHeaders.end(), [&](const auto& a, const auto& b)
+    {
+        if (a.second != b.second)
+            return a.second > b.second;
+        return a.first < b.first;
+    });
+    if (static_cast<int>(expensiveHeaders.size()) > config.headerCount)
+        expensiveHeaders.resize(config.headerCount);
+}
+
+void Analysis::ReadConfig()
+{
+// No longer reading ini file
+//    INIReader ini("ClangBuildAnalyzer.ini");
+//
+//    config.fileParseCount   = (int)ini.GetInteger("counts", "fileParse",    config.fileParseCount);
+//    config.fileCodegenCount = (int)ini.GetInteger("counts", "fileCodegen",  config.fileCodegenCount);
+//    config.functionCount    = (int)ini.GetInteger("counts", "function",     config.functionCount);
+//    config.templateCount    = (int)ini.GetInteger("counts", "template",     config.templateCount);
+//    config.headerCount      = (int)ini.GetInteger("counts", "header",       config.headerCount);
+//    config.headerChainCount = (int)ini.GetInteger("counts", "headerChain",  config.headerChainCount);
+//
+//    config.minFileTime      = (int)ini.GetInteger("minTimes", "file",       config.minFileTime);
+//
+//    config.maxName          = (int)ini.GetInteger("misc", "maxNameLength",  config.maxName);
+//    config.onlyRootHeaders  =      ini.GetBoolean("misc", "onlyRootHeaders",config.onlyRootHeaders);
+}
+
+
+void DoAnalysis(const BuildEvents& events, BuildNames& names, FILE* out)
+{
+    Analysis a(events, names, out);
+    a.ReadConfig();
+    for (int i = 0, n = (int)events.size(); i != n; ++i)
+        a.ProcessEvent(EventIndex(i));
+    a.EndAnalysis();
+}
diff --git a/kram-profile/CBA/Analysis.h b/kram-profile/CBA/Analysis.h
new file mode 100755
index 00000000..b570d2d3
--- /dev/null
+++ b/kram-profile/CBA/Analysis.h
@@ -0,0 +1,8 @@
+// Clang Build Analyzer https://github.com/aras-p/ClangBuildAnalyzer
+// SPDX-License-Identifier: Unlicense
+#pragma once
+
+#include "BuildEvents.h"
+#include <stdio.h>
+
+void DoAnalysis(const BuildEvents& events, BuildNames& names, FILE* out);
diff --git a/kram-profile/CBA/Arena.cpp b/kram-profile/CBA/Arena.cpp
new file mode 100755
index 00000000..c5d69e75
--- /dev/null
+++ b/kram-profile/CBA/Arena.cpp
@@ -0,0 +1,49 @@
+// Clang Build Analyzer https://github.com/aras-p/ClangBuildAnalyzer
+// SPDX-License-Identifier: Unlicense
+
+#include <cstdint>
+#include <algorithm>
+#include <vector>
+
+struct ArenaBlock
+{
+    uint8_t* buffer;
+    size_t bufferSize;
+    size_t used;
+};
+
+static std::vector<ArenaBlock> s_Blocks;
+
+const size_t kDefaultBlockSize = 65536;
+
+
+void ArenaInitialize()
+{
+}
+
+void ArenaDelete()
+{
+    for (auto& b : s_Blocks)
+        delete[] b.buffer;
+    s_Blocks.clear();
+}
+
+void* ArenaAllocate(size_t size)
+{
+    // do we need a new block?
+    if (s_Blocks.empty() || s_Blocks.back().used + size > s_Blocks.back().bufferSize)
+    {
+        ArenaBlock block;
+        block.bufferSize = std::max(size, kDefaultBlockSize);
+        block.buffer = new uint8_t[block.bufferSize];
+        block.used = 0;
+        s_Blocks.emplace_back(block);
+    }
+    
+    // allocate from the last block
+    ArenaBlock& b = s_Blocks.back();
+    void* ptr = b.buffer + b.used;
+    b.used += size;
+    return ptr;
+}
+
diff --git a/kram-profile/CBA/Arena.h b/kram-profile/CBA/Arena.h
new file mode 100755
index 00000000..0656d73b
--- /dev/null
+++ b/kram-profile/CBA/Arena.h
@@ -0,0 +1,7 @@
+// Clang Build Analyzer https://github.com/aras-p/ClangBuildAnalyzer
+// SPDX-License-Identifier: Unlicense
+#pragma once
+
+void ArenaInitialize();
+void ArenaDelete();
+void* ArenaAllocate(size_t size);
diff --git a/kram-profile/CBA/BuildEvents.cpp b/kram-profile/CBA/BuildEvents.cpp
new file mode 100755
index 00000000..3de11682
--- /dev/null
+++ b/kram-profile/CBA/BuildEvents.cpp
@@ -0,0 +1,669 @@
+// Clang Build Analyzer https://github.com/aras-p/ClangBuildAnalyzer
+// SPDX-License-Identifier: Unlicense
+#include "BuildEvents.h"
+
+#ifndef NOMINMAX
+#define NOMINMAX
+#endif
+
+#include "Arena.h"
+//#include "Colors.h"
+#include "Utils.h"
+//#include "external/cute_files.h"
+//#include "external/flat_hash_map/bytell_hash_map.hpp"
+//#include "external/llvm-Demangle/include/Demangle.h"
+#include "simdjson.h"
+//#include "external/xxHash/xxhash.h"
+#include <assert.h>
+#include <iterator>
+#include <mutex>
+#include <string>
+
+// Taken out of kram
+// case-sensitive fnv1a hash, can pass existing hash to continue a hash
+inline uint32_t HashFnv1a(const char* val, uint32_t hash = 0x811c9dc5) {
+    const uint32_t prime  = 0x01000193; // 16777619 (32-bit)
+    while (*val) {
+        hash = (hash * prime) ^ (uint32_t)*val++;
+    }
+    return hash;
+}
+
+extern "C" const char* _Nullable demangleSymbolName(const char* _Nonnull symbolName_);
+
+struct HashedString
+{
+    explicit HashedString(const char* s)
+    {
+        len = strlen(s);
+        hash = HashFnv1a(s); // , len, 0);
+        str = s;
+    }
+    size_t hash;
+    size_t len;
+    const char* str;
+};
+namespace std
+{
+    template<> struct hash<HashedString>
+    {
+        size_t operator()(const HashedString& v) const
+        {
+            return v.hash;
+        }
+    };
+    template<> struct equal_to<HashedString>
+    {
+        bool operator()(const HashedString& a, const HashedString& b) const
+        {
+            return a.hash == b.hash && a.len == b.len && memcmp(a.str, b.str, a.len) == 0;
+        }
+    };
+} // namespace std
+
+typedef std::unordered_map<HashedString, DetailIndex> NameToIndexMap;
+
+/*
+static void DebugPrintEvents(const BuildEvents& events, const BuildNames& names)
+{
+    for (size_t i = 0; i < events.size(); ++i)
+    {
+        const BuildEvent& event = events[EventIndex(int(i))];
+        const std::string_view namesSubstr = names[event.detailIndex].substr(0, 130);
+        printf("%4zi: t=%i t1=%7lld t2=%7lld par=%4i ch=%4zi det=%.*s\n", i, (int) event.type, event.ts, event.ts+event.dur, event.parent.idx, event.children.size(), (int)namesSubstr.size(), namesSubstr.data());
+    }
+}
+*/
+
+static void FindParentChildrenIndices(BuildEvents& events)
+{
+    if (events.empty())
+        return;
+
+    // sort events by start time so that parent events go before child events
+    std::vector<EventIndex> sortedIndices;
+    sortedIndices.resize(events.size());
+    for (int i = 0, n = (int)events.size(); i != n; ++i)
+        sortedIndices[i] = EventIndex(i);
+    std::sort(sortedIndices.begin(), sortedIndices.end(), [&](EventIndex ia, EventIndex ib){
+        const auto& ea = events[ia];
+        const auto& eb = events[ib];
+        if (ea.ts != eb.ts)
+            return ea.ts < eb.ts;
+        // break start time ties by making longer events go first (they must be parent)
+        if (ea.dur != eb.dur)
+            return ea.dur > eb.dur;
+        // break ties by assuming that later events in sequence must start parent
+        return ia > ib;
+    });
+
+    // figure out the event hierarchy; for now the parent/child indices are into
+    // the "sortedIndices" array and not event indices in the "events" array.
+    // As a result, we will be digging into .idx members a lot, as we are temporarily
+    // putting the wrong kind of index into 'parent'.
+    int root = 0;
+    BuildEvent* evRoot = &events[sortedIndices[root]];
+    evRoot->parent.idx = -1;
+    for (int i = 1, n = (int)events.size(); i != n; ++i)
+    {
+        BuildEvent* ev2 = &events[sortedIndices[i]];
+        while (root != -1)
+        {
+            // add slice if within bounds
+            if (ev2->ts >= evRoot->ts && ev2->ts+ev2->dur <= evRoot->ts+evRoot->dur)
+            {
+                ev2->parent.idx = root;
+                evRoot->children.push_back(EventIndex(i));
+                break;
+            }
+
+            root = evRoot->parent.idx;
+            if (root != -1)
+                evRoot = &events[sortedIndices[root]];
+        }
+        if (root == -1)
+        {
+            ev2->parent.idx = -1;
+        }
+        root = i;
+        evRoot = &events[sortedIndices[i]];
+    }
+
+    // fixup event parent/child indices to be into "events" array
+    for (auto& e : events)
+    {
+        for (auto& c : e.children)
+            c = sortedIndices[c.idx];
+        if (e.parent.idx != -1)
+            e.parent = sortedIndices[e.parent.idx];
+    }
+
+#ifndef NDEBUG
+    for (int i = 0, n = (int)events.size(); i != n; ++i)
+    {
+        assert(i != events[EventIndex(i)].parent.idx);
+    }
+#endif
+}
+
+struct BuildEventsParser
+{
+    BuildEventsParser()
+    {
+        // make sure zero index is empty
+        NameToIndex("", resultNameToIndex);
+        resultNames.push_back(std::string_view(resultNameToIndex.begin()->first.str, 0));
+
+        resultEvents.reserve(2048);
+        resultNames.reserve(2048);
+    }
+
+    BuildEvents resultEvents;
+    BuildNames resultNames;
+    NameToIndexMap resultNameToIndex;
+    std::mutex resultMutex;
+    std::mutex arenaMutex;
+
+    void AddEvents(BuildEvents& add, const NameToIndexMap& nameToIndex)
+    {
+        // we got job-local build events and name-to-index mapping;
+        // add them to the global result with any necessary remapping.
+        // gotta take a mutex since we're modifying shared state here.
+        std::scoped_lock lock(resultMutex);
+
+        // move events to end of result events list
+        int offset = (int)resultEvents.size();
+        std::move(add.begin(), add.end(), std::back_inserter(resultEvents));
+        add.clear();
+
+        // create remapping from name indices, adding them to global remapping
+        // list if necessary.
+        std::unordered_map<DetailIndex, DetailIndex> detailRemap;
+        for (const auto& kvp : nameToIndex)
+        {
+            const auto& existing = resultNameToIndex.find(kvp.first);
+            if (existing == resultNameToIndex.end())
+            {
+                DetailIndex index((int)resultNameToIndex.size());
+                resultNameToIndex.insert(std::make_pair(kvp.first, index));
+                resultNames.push_back(std::string_view(kvp.first.str, kvp.first.len));
+                detailRemap[kvp.second] = index;
+            }
+            else
+            {
+                detailRemap[kvp.second] = existing->second;
+            }
+        }
+
+        // adjust the added event indices
+        for (size_t i = offset, n = resultEvents.size(); i != n; ++i)
+        {
+            BuildEvent& ev = resultEvents[EventIndex(int(i))];
+            if (ev.parent.idx >= 0)
+                ev.parent.idx += offset;
+            for (auto& ch : ev.children)
+                ch.idx += offset;
+            if (ev.detailIndex.idx != 0)
+            {
+                assert(ev.detailIndex.idx >= 0);
+                assert(ev.detailIndex.idx < static_cast<int>(nameToIndex.size()));
+                ev.detailIndex = detailRemap[ev.detailIndex];
+                assert(ev.detailIndex.idx >= 0);
+                assert(ev.detailIndex.idx < static_cast<int>(resultNameToIndex.size()));
+            }
+        }
+
+        assert(resultNameToIndex.size() == resultNames.size());
+    }
+
+
+    DetailIndex NameToIndex(const char* str, NameToIndexMap& nameToIndex)
+    {
+        HashedString hashedName(str);
+        auto it = nameToIndex.find(hashedName);
+        if (it != nameToIndex.end())
+            return it->second;
+
+        char* strCopy;
+        {
+            // arena allocator is not thread safe, take a mutex
+            std::scoped_lock lock(arenaMutex);
+            strCopy = (char*)ArenaAllocate(hashedName.len+1);
+        }
+        memcpy(strCopy, str, hashedName.len+1);
+        hashedName.str = strCopy;
+
+        DetailIndex index((int)nameToIndex.size());
+        nameToIndex.insert(std::make_pair(hashedName, index));
+        return index;
+    }
+
+    bool ParseRoot(simdjson::dom::element& it, const std::string& curFileName)
+    {
+        simdjson::dom::element nit;
+        if (it["traceEvents"].get(nit))
+            return false;
+        return ParseTraceEvents(nit, curFileName);
+    }
+
+    bool ParseTraceEvents(simdjson::dom::element& it, const std::string& curFileName)
+    {
+        if (!it.is_array())
+            return false;
+
+        NameToIndexMap nameToIndexLocal;
+        NameToIndex("", nameToIndexLocal); // make sure zero index is empty
+        BuildEvents fileEvents;
+        fileEvents.reserve(256);
+        for (simdjson::dom::element nit : it)
+        {
+            ParseEvent(nit, curFileName, fileEvents, nameToIndexLocal);
+        }
+        if (fileEvents.empty())
+            return false;
+
+        FindParentChildrenIndices(fileEvents);
+        if (fileEvents.back().parent.idx != -1)
+        {
+            //printf("%sWARN: the last trace event should be root; was not in '%s'.%s\n", col::kRed, curFileName.c_str(), col::kReset);
+            return false;
+        }
+        AddEvents(fileEvents, nameToIndexLocal);
+        return true;
+    }
+
+    static bool StrEqual(std::string_view a, const char* b)
+    {
+        return a == b;
+    }
+
+    static bool StartsWith(std::string_view a, const char* b, int blen)
+    {
+        return static_cast<int>(a.size()) >= blen && a.compare(0, blen, b) == 0;
+    }
+
+    const char* kPid = "pid";
+    const char* kTid = "tid";
+    const char* kPh = "ph";
+    const char* kName = "name";
+    const char* kTs = "ts";
+    const char* kDur = "dur";
+    const char* kArgs = "args";
+    const char* kDetail = "detail";
+
+    void ParseEvent(simdjson::dom::element& it, const std::string& curFileName, BuildEvents& fileEvents, NameToIndexMap& nameToIndexLocal)
+    {
+        simdjson::dom::object node;
+        if (it.get(node))
+        {
+            //printf("%sERROR: 'traceEvents' elements in JSON should be objects.%s\n", col::kRed, col::kReset);
+            resultEvents.clear();
+            return;
+        }
+
+        BuildEvent event;
+        bool valid = true;
+        std::string_view detailPtr;
+        for (simdjson::dom::key_value_pair kv : node)
+        {
+            std::string_view nodeKey = kv.key;
+            if (StrEqual(nodeKey, kPid))
+            {
+                if (!kv.value.is_int64())  // starting with Clang/LLVM 11 process IDs are not necessarily 1
+                    valid = false;
+            }
+            else if (StrEqual(nodeKey, kTid))
+            {
+                if (!kv.value.is_int64()) // starting with Clang/LLVM 11 thread IDs are not necessarily 0
+                    valid = false;
+            }
+            else if (StrEqual(nodeKey, kPh))
+            {
+                if (!kv.value.is_string() || !StrEqual(kv.value.get_string(), "X"))
+                    valid = false;
+            }
+            else if (StrEqual(nodeKey, kName) && kv.value.is_string() && valid)
+            {
+                std::string_view name = kv.value.get_string();
+                if (StrEqual(name, "ExecuteCompiler"))
+                    event.type = BuildEventType::kCompiler;
+                else if (StrEqual(name, "Frontend"))
+                    event.type = BuildEventType::kFrontend;
+                else if (StrEqual(name, "Backend"))
+                    event.type = BuildEventType::kBackend;
+                else if (StrEqual(name, "Source"))
+                    event.type = BuildEventType::kParseFile;
+                else if (StrEqual(name, "ParseTemplate"))
+                    event.type = BuildEventType::kParseTemplate;
+                else if (StrEqual(name, "ParseClass"))
+                    event.type = BuildEventType::kParseClass;
+                else if (StrEqual(name, "InstantiateClass"))
+                    event.type = BuildEventType::kInstantiateClass;
+                else if (StrEqual(name, "InstantiateFunction"))
+                    event.type = BuildEventType::kInstantiateFunction;
+                else if (StrEqual(name, "OptModule"))
+                    event.type = BuildEventType::kOptModule;
+                else if (StrEqual(name, "OptFunction"))
+                    event.type = BuildEventType::kOptFunction;
+            }
+            else if (StrEqual(nodeKey, kTs))
+            {
+                if (kv.value.is_int64())
+                    event.ts = kv.value.get_int64();
+                else
+                    valid = false;
+            }
+            else if (StrEqual(nodeKey, kDur))
+            {
+                if (kv.value.is_int64())
+                    event.dur = kv.value.get_int64();
+                else
+                    valid = false;
+            }
+            else if (StrEqual(nodeKey, kArgs))
+            {
+                if (kv.value.is_object())
+                {
+                    simdjson::dom::object kvo(kv.value);
+                    simdjson::dom::key_value_pair args = *kvo.begin();
+                    if (args.value.is_string())
+                        detailPtr = args.value.get_string();
+                }
+            }
+        };
+
+        if (event.type== BuildEventType::kUnknown || !valid)
+            return;
+
+        // if the "compiler" event has no detail name, use the current json file name
+        if (detailPtr.empty() && event.type == BuildEventType::kCompiler)
+            detailPtr = curFileName;
+        if (!detailPtr.empty())
+        {
+            std::string detailString;
+            if (event.type == BuildEventType::kParseFile || event.type == BuildEventType::kOptModule)
+            {
+                // do various cleanups/nice-ifications of the detail name:
+                // make paths shorter (i.e. relative to project) where possible
+                detailString = utils::GetNicePath(detailPtr);
+            }
+            else
+                detailString = detailPtr;
+
+            /* don't *do this
+            // don't report the clang trace .json file, instead get the object file at the same location if it's there
+            if (utils::EndsWith(detailString, ".json"))
+            {
+                std::string candidate = std::string(detailString.substr(0, detailString.length()-4)) + "o";
+                // check for .o
+                if (cf_file_exists(candidate.c_str()))
+                    detailString = candidate;
+                else
+                {
+                    // check for .obj
+                    candidate += "bj";
+                    if (cf_file_exists(candidate.c_str()))
+                        detailString = candidate;
+                }
+            }
+            
+            // TODO: may need to demangle again
+            // demangle possibly mangled names
+            if (event.type == BuildEventType::kOptFunction)
+                detailString = llvm::demangle(detailString);
+            */
+            
+            // Use the built in call
+            if (event.type == BuildEventType::kOptFunction)
+            {
+                const char* demangledName = demangleSymbolName(detailString.c_str());
+                if (demangledName != nullptr)
+                    detailString = demangledName;
+            }
+            
+            event.detailIndex = NameToIndex(detailString.c_str(), nameToIndexLocal);
+        }
+
+        fileEvents.emplace_back(event);
+    }
+};
+
+BuildEventsParser* CreateBuildEventsParser()
+{
+    BuildEventsParser* p = new BuildEventsParser();
+    return p;
+}
+void DeleteBuildEventsParser(BuildEventsParser* parser)
+{
+    delete parser;
+}
+
+BuildEvents& GetBuildEvents(BuildEventsParser& parser)
+{
+    return parser.resultEvents;
+}
+BuildNames& GetBuildNames(BuildEventsParser& parser)
+{
+    return parser.resultNames;
+}
+
+bool ParseBuildEvents(BuildEventsParser* parser, const uint8_t* buf, size_t bufSize, const std::string& fileName)
+{
+    using namespace simdjson;
+    dom::parser p;
+    dom::element doc;
+    auto error = p.parse(buf, bufSize).get(doc);
+    if (error)
+    {
+//        printf("%sWARN: JSON parse error in %s: %s.%s\n", col::kYellow, fileName.c_str(), error_message(error), col::kReset);
+        return false;
+    }
+
+    return parser->ParseRoot(doc, fileName);
+    //DebugPrintEvents(outEvents, outNames);
+}
+
+/*
+struct BufferedWriter
+{
+    BufferedWriter(FILE* f)
+    : size(0)
+    , file(f)
+    {
+        hasher = XXH64_createState();
+        XXH64_reset(hasher, 0);
+    }
+    ~BufferedWriter()
+    {
+        Flush();
+        XXH64_hash_t hash = XXH64_digest(hasher);
+        fwrite(&hash, sizeof(hash), 1, file);
+        fclose(file);
+        XXH64_freeState(hasher);
+    }
+
+    template<typename T> void Write(const T& t)
+    {
+        Write(&t, sizeof(t));
+    }
+    void Write(const void* ptr, size_t sz)
+    {
+        if (sz == 0) return;
+        if (sz >= kBufferSize)
+        {
+            if( size > 0 )
+            {
+                Flush();
+            }
+
+            XXH64_update(hasher, ptr, sz);
+            fwrite(ptr, sz, 1, file);
+            return;
+        }
+        if (sz + size > kBufferSize)
+            Flush();
+        memcpy(&buffer[size], ptr, sz);
+        size += sz;
+    }
+
+
+    void Flush()
+    {
+        fwrite(buffer, size, 1, file);
+        XXH64_update(hasher, buffer, size);
+        size = 0;
+    }
+
+    enum { kBufferSize = 65536 };
+    uint8_t buffer[kBufferSize];
+    size_t size;
+    FILE* file;
+    XXH64_state_t* hasher;
+};
+
+struct BufferedReader
+{
+    BufferedReader(FILE* f)
+    : pos(0)
+    {
+        fseek(f, 0, SEEK_END);
+        size_t fsize = ftello64(f);
+        fseek(f, 0, SEEK_SET);
+        buffer = new uint8_t[fsize];
+        bufferSize = fsize;
+        fread(buffer, bufferSize, 1, f);
+        fclose(f);
+    }
+    ~BufferedReader()
+    {
+        delete[] buffer;
+    }
+
+    template<typename T> void Read(T& t)
+    {
+        Read(&t, sizeof(t));
+    }
+    void Read(void* ptr, size_t sz)
+    {
+        if (pos + sz > bufferSize)
+        {
+            memset(ptr, 0, sz);
+            return;
+        }
+        memcpy(ptr, &buffer[pos], sz);
+        pos += sz;
+    }
+
+    uint8_t* buffer;
+    size_t pos;
+    size_t bufferSize;
+};
+
+const uint32_t kFileMagic = 'CBA0';
+
+bool SaveBuildEvents(BuildEventsParser* parser, const std::string& fileName)
+{
+    FILE* f = fopen(fileName.c_str(), "wb");
+    if (f == nullptr)
+    {
+        printf("%sERROR: failed to save to file '%s'%s\n", col::kRed, fileName.c_str(), col::kReset);
+        return false;
+    }
+
+    BufferedWriter w(f);
+
+    w.Write(kFileMagic);
+    int64_t eventsCount = parser->resultEvents.size();
+    w.Write(eventsCount);
+    for(const auto& e : parser->resultEvents)
+    {
+        int32_t eType = (int32_t)e.type;
+        w.Write(eType);
+        w.Write(e.ts);
+        w.Write(e.dur);
+        w.Write(e.detailIndex.idx);
+        w.Write(e.parent.idx);
+        int64_t childCount = e.children.size();
+        w.Write(childCount);
+        w.Write(e.children.data(), childCount * sizeof(e.children[0]));
+    }
+
+    int64_t namesCount = parser->resultNames.size();
+    w.Write(namesCount);
+    for(const auto& n : parser->resultNames)
+    {
+        uint32_t nSize = (uint32_t)n.size();
+        w.Write(nSize);
+        w.Write(n.data(), nSize);
+    }
+
+    return true;
+}
+
+bool LoadBuildEvents(const std::string& fileName, BuildEvents& outEvents, BuildNames& outNames)
+{
+    FILE* f = fopen(fileName.c_str(), "rb");
+    if (f == nullptr)
+    {
+        printf("%sERROR: failed to open file '%s'%s\n", col::kRed, fileName.c_str(), col::kReset);
+        return false;
+    }
+
+    BufferedReader r(f);
+    if (r.bufferSize < 12) // 4 bytes magic header, 8 bytes hash at end
+    {
+        printf("%sERROR: corrupt input file '%s' (size too small)%s\n", col::kRed, fileName.c_str(), col::kReset);
+        return false;
+    }
+    // check header magic
+    int32_t magic = 0;
+    r.Read(magic);
+    if (magic != kFileMagic)
+    {
+        printf("%sERROR: unknown format of input file '%s'%s\n", col::kRed, fileName.c_str(), col::kReset);
+        return false;
+    }
+    // chech hash checksum
+    XXH64_hash_t hash = XXH64(r.buffer, r.bufferSize-sizeof(XXH64_hash_t), 0);
+    if (memcmp(&hash, r.buffer+r.bufferSize-sizeof(XXH64_hash_t), sizeof(XXH64_hash_t)) != 0)
+    {
+        printf("%sERROR: corrupt input file '%s' (checksum mismatch)%s\n", col::kRed, fileName.c_str(), col::kReset);
+        return false;
+    }
+
+    int64_t eventsCount = 0;
+    r.Read(eventsCount);
+    outEvents.resize(eventsCount);
+    for(auto& e : outEvents)
+    {
+        int32_t eType;
+        r.Read(eType);
+        e.type = (BuildEventType)eType;
+        r.Read(e.ts);
+        r.Read(e.dur);
+        r.Read(e.detailIndex.idx);
+        r.Read(e.parent.idx);
+        int64_t childCount = 0;
+        r.Read(childCount);
+        e.children.resize(childCount);
+        if (childCount != 0)
+            r.Read(&e.children[0], childCount * sizeof(e.children[0]));
+    }
+
+    int64_t namesCount = 0;
+    r.Read(namesCount);
+    outNames.resize(namesCount);
+    for(auto& n : outNames)
+    {
+        uint32_t nSize = 0;
+        r.Read(nSize);
+        char* ptr = (char*)ArenaAllocate(nSize+1);
+        memset(ptr, 0, nSize+1);
+        n = std::string_view(ptr, nSize);
+        if (nSize != 0)
+            r.Read(ptr, nSize);
+    }
+
+    return true;
+}
+*/
diff --git a/kram-profile/CBA/BuildEvents.h b/kram-profile/CBA/BuildEvents.h
new file mode 100755
index 00000000..564fa529
--- /dev/null
+++ b/kram-profile/CBA/BuildEvents.h
@@ -0,0 +1,109 @@
+// Clang Build Analyzer https://github.com/aras-p/ClangBuildAnalyzer
+// SPDX-License-Identifier: Unlicense
+#pragma once
+#define _CRT_SECURE_NO_WARNINGS
+#include <stdint.h>
+#include <string>
+#include <string_view>
+#include <vector>
+#include <utility>
+
+
+#ifdef _MSC_VER
+#define ftello64 _ftelli64
+#elif defined(__APPLE__)
+#define ftello64 ftello
+#endif
+
+
+enum class BuildEventType
+{
+    kUnknown,
+    kCompiler,
+    kFrontend,
+    kBackend,
+    kParseFile,
+    kParseTemplate,
+    kParseClass,
+    kInstantiateClass,
+    kInstantiateFunction,
+    kOptModule,
+    kOptFunction,
+};
+
+struct DetailIndex
+{
+    int idx;
+    explicit DetailIndex(int d = 0) : idx(d) {}
+    bool operator==(DetailIndex rhs) const { return idx == rhs.idx; }
+    bool operator!=(DetailIndex rhs) const { return idx != rhs.idx; }
+    bool operator<(DetailIndex rhs) const { return idx < rhs.idx; }
+    bool operator>(DetailIndex rhs) const { return idx > rhs.idx; }
+    bool operator<=(DetailIndex rhs) const { return idx <= rhs.idx; }
+    bool operator>=(DetailIndex rhs) const { return idx >= rhs.idx; }
+};
+
+struct EventIndex
+{
+    int idx;
+    explicit EventIndex(int e = -1) : idx(e) {}
+    bool operator==(EventIndex rhs) const { return idx == rhs.idx; }
+    bool operator!=(EventIndex rhs) const { return idx != rhs.idx; }
+    bool operator<(EventIndex rhs) const { return idx < rhs.idx; }
+    bool operator>(EventIndex rhs) const { return idx > rhs.idx; }
+    bool operator<=(EventIndex rhs) const { return idx <= rhs.idx; }
+    bool operator>=(EventIndex rhs) const { return idx >= rhs.idx; }
+};
+
+namespace std
+{
+    template <> struct hash<DetailIndex>
+    {
+        size_t operator()(DetailIndex x) const
+        {
+            return hash<int>()(x.idx);
+        }
+    };
+    template <> struct hash<EventIndex>
+    {
+        size_t operator()(EventIndex x) const
+        {
+            return hash<int>()(x.idx);
+        }
+    };
+}
+
+struct BuildEvent
+{
+    BuildEventType type = BuildEventType::kUnknown;
+    int64_t ts = 0;
+    int64_t dur = 0;
+    DetailIndex detailIndex;
+    EventIndex parent{ -1 };
+    std::vector<EventIndex> children;
+};
+
+template <typename T, typename Idx>
+struct IndexedVector : std::vector<T>
+{
+    using std::vector<T>::vector;
+    typename std::vector<T>::reference       operator[](Idx pos) { return this->begin()[pos.idx]; }
+    typename std::vector<T>::const_reference operator[](Idx pos) const { return this->begin()[pos.idx]; }
+};
+typedef IndexedVector<std::string_view, DetailIndex> BuildNames;
+typedef IndexedVector<BuildEvent, EventIndex> BuildEvents;
+
+struct BuildEventsParser;
+BuildEventsParser* CreateBuildEventsParser();
+void DeleteBuildEventsParser(BuildEventsParser* parser);
+
+// NOTE: can be called in parallel
+bool ParseBuildEvents(BuildEventsParser* parser, const uint8_t* buf, size_t bufSize, const std::string& fileName);
+
+//bool SaveBuildEvents(BuildEventsParser* parser, const std::string& fileName);
+//
+//bool LoadBuildEvents(const std::string& fileName, BuildEvents& outEvents, BuildNames& outNames);
+
+BuildEvents& GetBuildEvents(BuildEventsParser& parser);
+BuildNames& GetBuildNames(BuildEventsParser& parser);
+
diff --git a/kram-profile/CBA/CBA.h b/kram-profile/CBA/CBA.h
new file mode 100644
index 00000000..b00042ff
--- /dev/null
+++ b/kram-profile/CBA/CBA.h
@@ -0,0 +1,6 @@
+#import "Foundation/Foundation.h"
+
+// TODO: move to header
+@interface CBA : NSObject
++ (void)RunCBA:(NSArray<NSData*> *)files filenames:(NSArray<NSString*> *)filenames;
+@end
diff --git a/kram-profile/CBA/CBA.mm b/kram-profile/CBA/CBA.mm
new file mode 100644
index 00000000..c34e6e69
--- /dev/null
+++ b/kram-profile/CBA/CBA.mm
@@ -0,0 +1,60 @@
+// Clang Build Analyzer https://github.com/aras-p/ClangBuildAnalyzer
+// SPDX-License-Identifier: Unlicense
+
+#import "CBA.h"
+
+#define _CRT_SECURE_NO_WARNINGS
+#define NOMINMAX
+
+#include "Analysis.h"
+#include "Arena.h"
+#include "BuildEvents.h"
+//#include "Colors.h"
+#include "Utils.h"
+
+#include <stdio.h>
+#include <string>
+#include <time.h>
+#include <algorithm>
+#include <set>
+#include <cassert>
+
+#ifdef _MSC_VER
+struct IUnknown; // workaround for old Win SDK header failures when using /permissive-
+#endif
+
+    
+@implementation CBA {
+}
+
+// Logs out to stdout right now.  But will change.
++ (void)RunCBA:(NSArray<NSData*> *)files filenames:(NSArray<NSString*> *)filenames
+{
+    ArenaInitialize();
+    
+    BuildEventsParser* parser = CreateBuildEventsParser();
+   
+    std::vector<uint8_t> buffer;
+    
+    for (uint32_t i = 0; i < files.count; ++i)
+    {
+        // extract the file from disk or archive,
+        //   run it through simdjson
+        // and then feed the results to the parser
+        //if (!zip.extract(entry.filename, buffer))
+        //    continue;
+        NSData* data = files[i];
+        
+        ParseBuildEvents(parser, (const uint8_t*)data.bytes, data.length, [filenames[i] UTF8String]);
+    }
+
+    // Run the analysis on data from the parser.
+    DoAnalysis(GetBuildEvents(*parser), GetBuildNames(*parser), stdout);
+    
+    // Shutdown the parser
+    DeleteBuildEventsParser(parser);
+    ArenaDelete();
+}
+
+@end
+
diff --git a/kram-profile/CBA/Utils.cpp b/kram-profile/CBA/Utils.cpp
new file mode 100755
index 00000000..6b38da9d
--- /dev/null
+++ b/kram-profile/CBA/Utils.cpp
@@ -0,0 +1,91 @@
+// Clang Build Analyzer https://github.com/aras-p/ClangBuildAnalyzer
+// SPDX-License-Identifier: Unlicense
+#include "Utils.h"
+
+//#include "external/cwalk/cwalk.h"
+#include <string.h>
+
+inline char ToLower(char c) { return (c >= 'A' && c <= 'Z') ? (c + 'a' - 'A') : c; }
+inline char ToUpper(char c) { return (c >= 'a' && c <= 'z') ? (c - ('a' - 'A')) : c; }
+
+void utils::Lowercase(std::string& path)
+{
+    for (size_t i = 0, n = path.size(); i != n; ++i)
+        path[i] = ToLower(path[i]);
+}
+
+
+bool utils::BeginsWith(const std::string& str, const std::string& prefix)
+{
+    if (str.size() < prefix.size())
+        return false;
+    for (size_t i = 0, n = prefix.size(); i != n; ++i)
+    {
+        char c1 = ToLower(str[i]);
+        char c2 = ToLower(prefix[i]);
+        if (c1 != c2)
+            return false;
+    }
+    return true;
+}
+
+bool utils::EndsWith(const std::string_view& str, const std::string& suffix)
+{
+    if (str.size() < suffix.size())
+        return false;
+    size_t start = str.size() - suffix.size();
+    for (size_t i = 0, n = suffix.size(); i != n; ++i)
+    {
+        char c1 = ToLower(str[i+start]);
+        char c2 = ToLower(suffix[i]);
+        if (c1 != c2)
+            return false;
+    }
+    return true;
+}
+
+
+bool utils::IsHeader(std::string_view path)
+{
+    path = utils::GetFilename(path);
+    size_t dot = path.rfind('.');
+    if (dot == std::string::npos)
+        return true; // no extension is likely a header, e.g. <vector>
+
+    size_t len = path.size();
+    if (dot + 1 < len && (ToLower(path[dot + 1]) == 'h' || ToLower(path[dot + 1]) == 'i'))
+        return true; // extension starting with .h or .i (h, hpp, hxx, inc etc) likely a header
+
+    return false;
+}
+
+std::string utils::GetNicePath(const std::string_view& path)
+{
+    char input[FILENAME_MAX];
+    size_t len = std::min<size_t>(path.size(), FILENAME_MAX-1);
+    memcpy(input, path.data(), len);
+    input[len] = 0;
+    char result[FILENAME_MAX];
+
+    // kram: skip the normalization
+    // cwk_path_normalize(input, result, sizeof(result));
+    strlcpy(result, input, sizeof(result));
+    
+    // convert to forward slashes
+    char *p = result;
+    while (*p)
+    {
+      if (*p == '\\')
+          *p = '/';
+      ++p;
+    }
+    return result;
+}
+
+std::string_view utils::GetFilename(const std::string_view& path)
+{
+    size_t dirIdx = path.rfind('/');
+    if (dirIdx != std::string::npos)
+        return path.substr(dirIdx + 1, path.size() - dirIdx - 1);
+    return path;
+}
diff --git a/kram-profile/CBA/Utils.h b/kram-profile/CBA/Utils.h
new file mode 100755
index 00000000..fdd2b6e0
--- /dev/null
+++ b/kram-profile/CBA/Utils.h
@@ -0,0 +1,18 @@
+// Clang Build Analyzer https://github.com/aras-p/ClangBuildAnalyzer
+// SPDX-License-Identifier: Unlicense
+#pragma once
+#include <string>
+#include <string_view>
+
+namespace utils
+{
+    [[nodiscard]] std::string GetNicePath(const std::string_view& path);
+    [[nodiscard]] std::string_view GetFilename(const std::string_view& path);
+
+    [[nodiscard]] bool IsHeader(std::string_view path);
+
+    void Lowercase(std::string& path);
+
+    [[nodiscard]] bool BeginsWith(const std::string& str, const std::string& prefix);
+    [[nodiscard]] bool EndsWith(const std::string_view& str, const std::string& suffix);
+}
diff --git a/kram-profile/CBA/simdjson.cpp b/kram-profile/CBA/simdjson.cpp
new file mode 100644
index 00000000..2b7b3132
--- /dev/null
+++ b/kram-profile/CBA/simdjson.cpp
@@ -0,0 +1,15984 @@
+/* auto-generated on 2022-10-16 16:59:15 +0000. Do not edit! */
+/* begin file src/simdjson.cpp */
+#include "simdjson.h"
+
+SIMDJSON_PUSH_DISABLE_WARNINGS
+SIMDJSON_DISABLE_UNDESIRED_WARNINGS
+
+/* begin file src/to_chars.cpp */
+#include <cstring>
+#include <cstdint>
+#include <array>
+#include <cmath>
+
+namespace simdjson {
+namespace internal {
+/*!
+implements the Grisu2 algorithm for binary to decimal floating-point
+conversion.
+Adapted from JSON for Modern C++
+
+This implementation is a slightly modified version of the reference
+implementation which may be obtained from
+http://florian.loitsch.com/publications (bench.tar.gz).
+The code is distributed under the MIT license, Copyright (c) 2009 Florian
+Loitsch. For a detailed description of the algorithm see: [1] Loitsch, "Printing
+Floating-Point Numbers Quickly and Accurately with Integers", Proceedings of the
+ACM SIGPLAN 2010 Conference on Programming Language Design and Implementation,
+PLDI 2010 [2] Burger, Dybvig, "Printing Floating-Point Numbers Quickly and
+Accurately", Proceedings of the ACM SIGPLAN 1996 Conference on Programming
+Language Design and Implementation, PLDI 1996
+*/
+namespace dtoa_impl {
+
+template <typename Target, typename Source>
+Target reinterpret_bits(const Source source) {
+  static_assert(sizeof(Target) == sizeof(Source), "size mismatch");
+
+  Target target;
+  std::memcpy(&target, &source, sizeof(Source));
+  return target;
+}
+
+struct diyfp // f * 2^e
+{
+  static constexpr int kPrecision = 64; // = q
+
+  std::uint64_t f = 0;
+  int e = 0;
+
+  constexpr diyfp(std::uint64_t f_, int e_) noexcept : f(f_), e(e_) {}
+
+  /*!
+  @brief returns x - y
+  @pre x.e == y.e and x.f >= y.f
+  */
+  static diyfp sub(const diyfp &x, const diyfp &y) noexcept {
+
+    return {x.f - y.f, x.e};
+  }
+
+  /*!
+  @brief returns x * y
+  @note The result is rounded. (Only the upper q bits are returned.)
+  */
+  static diyfp mul(const diyfp &x, const diyfp &y) noexcept {
+    static_assert(kPrecision == 64, "internal error");
+
+    // Computes:
+    //  f = round((x.f * y.f) / 2^q)
+    //  e = x.e + y.e + q
+
+    // Emulate the 64-bit * 64-bit multiplication:
+    //
+    // p = u * v
+    //   = (u_lo + 2^32 u_hi) (v_lo + 2^32 v_hi)
+    //   = (u_lo v_lo         ) + 2^32 ((u_lo v_hi         ) + (u_hi v_lo )) +
+    //   2^64 (u_hi v_hi         ) = (p0                ) + 2^32 ((p1 ) + (p2 ))
+    //   + 2^64 (p3                ) = (p0_lo + 2^32 p0_hi) + 2^32 ((p1_lo +
+    //   2^32 p1_hi) + (p2_lo + 2^32 p2_hi)) + 2^64 (p3                ) =
+    //   (p0_lo             ) + 2^32 (p0_hi + p1_lo + p2_lo ) + 2^64 (p1_hi +
+    //   p2_hi + p3) = (p0_lo             ) + 2^32 (Q ) + 2^64 (H ) = (p0_lo ) +
+    //   2^32 (Q_lo + 2^32 Q_hi                           ) + 2^64 (H )
+    //
+    // (Since Q might be larger than 2^32 - 1)
+    //
+    //   = (p0_lo + 2^32 Q_lo) + 2^64 (Q_hi + H)
+    //
+    // (Q_hi + H does not overflow a 64-bit int)
+    //
+    //   = p_lo + 2^64 p_hi
+
+    const std::uint64_t u_lo = x.f & 0xFFFFFFFFu;
+    const std::uint64_t u_hi = x.f >> 32u;
+    const std::uint64_t v_lo = y.f & 0xFFFFFFFFu;
+    const std::uint64_t v_hi = y.f >> 32u;
+
+    const std::uint64_t p0 = u_lo * v_lo;
+    const std::uint64_t p1 = u_lo * v_hi;
+    const std::uint64_t p2 = u_hi * v_lo;
+    const std::uint64_t p3 = u_hi * v_hi;
+
+    const std::uint64_t p0_hi = p0 >> 32u;
+    const std::uint64_t p1_lo = p1 & 0xFFFFFFFFu;
+    const std::uint64_t p1_hi = p1 >> 32u;
+    const std::uint64_t p2_lo = p2 & 0xFFFFFFFFu;
+    const std::uint64_t p2_hi = p2 >> 32u;
+
+    std::uint64_t Q = p0_hi + p1_lo + p2_lo;
+
+    // The full product might now be computed as
+    //
+    // p_hi = p3 + p2_hi + p1_hi + (Q >> 32)
+    // p_lo = p0_lo + (Q << 32)
+    //
+    // But in this particular case here, the full p_lo is not required.
+    // Effectively we only need to add the highest bit in p_lo to p_hi (and
+    // Q_hi + 1 does not overflow).
+
+    Q += std::uint64_t{1} << (64u - 32u - 1u); // round, ties up
+
+    const std::uint64_t h = p3 + p2_hi + p1_hi + (Q >> 32u);
+
+    return {h, x.e + y.e + 64};
+  }
+
+  /*!
+  @brief normalize x such that the significand is >= 2^(q-1)
+  @pre x.f != 0
+  */
+  static diyfp normalize(diyfp x) noexcept {
+
+    while ((x.f >> 63u) == 0) {
+      x.f <<= 1u;
+      x.e--;
+    }
+
+    return x;
+  }
+
+  /*!
+  @brief normalize x such that the result has the exponent E
+  @pre e >= x.e and the upper e - x.e bits of x.f must be zero.
+  */
+  static diyfp normalize_to(const diyfp &x,
+                            const int target_exponent) noexcept {
+    const int delta = x.e - target_exponent;
+
+    return {x.f << delta, target_exponent};
+  }
+};
+
+struct boundaries {
+  diyfp w;
+  diyfp minus;
+  diyfp plus;
+};
+
+/*!
+Compute the (normalized) diyfp representing the input number 'value' and its
+boundaries.
+@pre value must be finite and positive
+*/
+template <typename FloatType> boundaries compute_boundaries(FloatType value) {
+
+  // Convert the IEEE representation into a diyfp.
+  //
+  // If v is denormal:
+  //      value = 0.F * 2^(1 - bias) = (          F) * 2^(1 - bias - (p-1))
+  // If v is normalized:
+  //      value = 1.F * 2^(E - bias) = (2^(p-1) + F) * 2^(E - bias - (p-1))
+
+  static_assert(std::numeric_limits<FloatType>::is_iec559,
+                "internal error: dtoa_short requires an IEEE-754 "
+                "floating-point implementation");
+
+  constexpr int kPrecision =
+      std::numeric_limits<FloatType>::digits; // = p (includes the hidden bit)
+  constexpr int kBias =
+      std::numeric_limits<FloatType>::max_exponent - 1 + (kPrecision - 1);
+  constexpr int kMinExp = 1 - kBias;
+  constexpr std::uint64_t kHiddenBit = std::uint64_t{1}
+                                       << (kPrecision - 1); // = 2^(p-1)
+
+  using bits_type = typename std::conditional<kPrecision == 24, std::uint32_t,
+                                              std::uint64_t>::type;
+
+  const std::uint64_t bits = reinterpret_bits<bits_type>(value);
+  const std::uint64_t E = bits >> (kPrecision - 1);
+  const std::uint64_t F = bits & (kHiddenBit - 1);
+
+  const bool is_denormal = E == 0;
+  const diyfp v = is_denormal
+                      ? diyfp(F, kMinExp)
+                      : diyfp(F + kHiddenBit, static_cast<int>(E) - kBias);
+
+  // Compute the boundaries m- and m+ of the floating-point value
+  // v = f * 2^e.
+  //
+  // Determine v- and v+, the floating-point predecessor and successor if v,
+  // respectively.
+  //
+  //      v- = v - 2^e        if f != 2^(p-1) or e == e_min                (A)
+  //         = v - 2^(e-1)    if f == 2^(p-1) and e > e_min                (B)
+  //
+  //      v+ = v + 2^e
+  //
+  // Let m- = (v- + v) / 2 and m+ = (v + v+) / 2. All real numbers _strictly_
+  // between m- and m+ round to v, regardless of how the input rounding
+  // algorithm breaks ties.
+  //
+  //      ---+-------------+-------------+-------------+-------------+---  (A)
+  //         v-            m-            v             m+            v+
+  //
+  //      -----------------+------+------+-------------+-------------+---  (B)
+  //                       v-     m-     v             m+            v+
+
+  const bool lower_boundary_is_closer = F == 0 && E > 1;
+  const diyfp m_plus = diyfp(2 * v.f + 1, v.e - 1);
+  const diyfp m_minus = lower_boundary_is_closer
+                            ? diyfp(4 * v.f - 1, v.e - 2)  // (B)
+                            : diyfp(2 * v.f - 1, v.e - 1); // (A)
+
+  // Determine the normalized w+ = m+.
+  const diyfp w_plus = diyfp::normalize(m_plus);
+
+  // Determine w- = m- such that e_(w-) = e_(w+).
+  const diyfp w_minus = diyfp::normalize_to(m_minus, w_plus.e);
+
+  return {diyfp::normalize(v), w_minus, w_plus};
+}
+
+// Given normalized diyfp w, Grisu needs to find a (normalized) cached
+// power-of-ten c, such that the exponent of the product c * w = f * 2^e lies
+// within a certain range [alpha, gamma] (Definition 3.2 from [1])
+//
+//      alpha <= e = e_c + e_w + q <= gamma
+//
+// or
+//
+//      f_c * f_w * 2^alpha <= f_c 2^(e_c) * f_w 2^(e_w) * 2^q
+//                          <= f_c * f_w * 2^gamma
+//
+// Since c and w are normalized, i.e. 2^(q-1) <= f < 2^q, this implies
+//
+//      2^(q-1) * 2^(q-1) * 2^alpha <= c * w * 2^q < 2^q * 2^q * 2^gamma
+//
+// or
+//
+//      2^(q - 2 + alpha) <= c * w < 2^(q + gamma)
+//
+// The choice of (alpha,gamma) determines the size of the table and the form of
+// the digit generation procedure. Using (alpha,gamma)=(-60,-32) works out well
+// in practice:
+//
+// The idea is to cut the number c * w = f * 2^e into two parts, which can be
+// processed independently: An integral part p1, and a fractional part p2:
+//
+//      f * 2^e = ( (f div 2^-e) * 2^-e + (f mod 2^-e) ) * 2^e
+//              = (f div 2^-e) + (f mod 2^-e) * 2^e
+//              = p1 + p2 * 2^e
+//
+// The conversion of p1 into decimal form requires a series of divisions and
+// modulos by (a power of) 10. These operations are faster for 32-bit than for
+// 64-bit integers, so p1 should ideally fit into a 32-bit integer. This can be
+// achieved by choosing
+//
+//      -e >= 32   or   e <= -32 := gamma
+//
+// In order to convert the fractional part
+//
+//      p2 * 2^e = p2 / 2^-e = d[-1] / 10^1 + d[-2] / 10^2 + ...
+//
+// into decimal form, the fraction is repeatedly multiplied by 10 and the digits
+// d[-i] are extracted in order:
+//
+//      (10 * p2) div 2^-e = d[-1]
+//      (10 * p2) mod 2^-e = d[-2] / 10^1 + ...
+//
+// The multiplication by 10 must not overflow. It is sufficient to choose
+//
+//      10 * p2 < 16 * p2 = 2^4 * p2 <= 2^64.
+//
+// Since p2 = f mod 2^-e < 2^-e,
+//
+//      -e <= 60   or   e >= -60 := alpha
+
+constexpr int kAlpha = -60;
+constexpr int kGamma = -32;
+
+struct cached_power // c = f * 2^e ~= 10^k
+{
+  std::uint64_t f;
+  int e;
+  int k;
+};
+
+/*!
+For a normalized diyfp w = f * 2^e, this function returns a (normalized) cached
+power-of-ten c = f_c * 2^e_c, such that the exponent of the product w * c
+satisfies (Definition 3.2 from [1])
+     alpha <= e_c + e + q <= gamma.
+*/
+inline cached_power get_cached_power_for_binary_exponent(int e) {
+  // Now
+  //
+  //      alpha <= e_c + e + q <= gamma                                    (1)
+  //      ==> f_c * 2^alpha <= c * 2^e * 2^q
+  //
+  // and since the c's are normalized, 2^(q-1) <= f_c,
+  //
+  //      ==> 2^(q - 1 + alpha) <= c * 2^(e + q)
+  //      ==> 2^(alpha - e - 1) <= c
+  //
+  // If c were an exact power of ten, i.e. c = 10^k, one may determine k as
+  //
+  //      k = ceil( log_10( 2^(alpha - e - 1) ) )
+  //        = ceil( (alpha - e - 1) * log_10(2) )
+  //
+  // From the paper:
+  // "In theory the result of the procedure could be wrong since c is rounded,
+  //  and the computation itself is approximated [...]. In practice, however,
+  //  this simple function is sufficient."
+  //
+  // For IEEE double precision floating-point numbers converted into
+  // normalized diyfp's w = f * 2^e, with q = 64,
+  //
+  //      e >= -1022      (min IEEE exponent)
+  //           -52        (p - 1)
+  //           -52        (p - 1, possibly normalize denormal IEEE numbers)
+  //           -11        (normalize the diyfp)
+  //         = -1137
+  //
+  // and
+  //
+  //      e <= +1023      (max IEEE exponent)
+  //           -52        (p - 1)
+  //           -11        (normalize the diyfp)
+  //         = 960
+  //
+  // This binary exponent range [-1137,960] results in a decimal exponent
+  // range [-307,324]. One does not need to store a cached power for each
+  // k in this range. For each such k it suffices to find a cached power
+  // such that the exponent of the product lies in [alpha,gamma].
+  // This implies that the difference of the decimal exponents of adjacent
+  // table entries must be less than or equal to
+  //
+  //      floor( (gamma - alpha) * log_10(2) ) = 8.
+  //
+  // (A smaller distance gamma-alpha would require a larger table.)
+
+  // NB:
+  // Actually this function returns c, such that -60 <= e_c + e + 64 <= -34.
+
+  constexpr int kCachedPowersMinDecExp = -300;
+  constexpr int kCachedPowersDecStep = 8;
+
+  static constexpr std::array<cached_power, 79> kCachedPowers = {{
+      {0xAB70FE17C79AC6CA, -1060, -300}, {0xFF77B1FCBEBCDC4F, -1034, -292},
+      {0xBE5691EF416BD60C, -1007, -284}, {0x8DD01FAD907FFC3C, -980, -276},
+      {0xD3515C2831559A83, -954, -268},  {0x9D71AC8FADA6C9B5, -927, -260},
+      {0xEA9C227723EE8BCB, -901, -252},  {0xAECC49914078536D, -874, -244},
+      {0x823C12795DB6CE57, -847, -236},  {0xC21094364DFB5637, -821, -228},
+      {0x9096EA6F3848984F, -794, -220},  {0xD77485CB25823AC7, -768, -212},
+      {0xA086CFCD97BF97F4, -741, -204},  {0xEF340A98172AACE5, -715, -196},
+      {0xB23867FB2A35B28E, -688, -188},  {0x84C8D4DFD2C63F3B, -661, -180},
+      {0xC5DD44271AD3CDBA, -635, -172},  {0x936B9FCEBB25C996, -608, -164},
+      {0xDBAC6C247D62A584, -582, -156},  {0xA3AB66580D5FDAF6, -555, -148},
+      {0xF3E2F893DEC3F126, -529, -140},  {0xB5B5ADA8AAFF80B8, -502, -132},
+      {0x87625F056C7C4A8B, -475, -124},  {0xC9BCFF6034C13053, -449, -116},
+      {0x964E858C91BA2655, -422, -108},  {0xDFF9772470297EBD, -396, -100},
+      {0xA6DFBD9FB8E5B88F, -369, -92},   {0xF8A95FCF88747D94, -343, -84},
+      {0xB94470938FA89BCF, -316, -76},   {0x8A08F0F8BF0F156B, -289, -68},
+      {0xCDB02555653131B6, -263, -60},   {0x993FE2C6D07B7FAC, -236, -52},
+      {0xE45C10C42A2B3B06, -210, -44},   {0xAA242499697392D3, -183, -36},
+      {0xFD87B5F28300CA0E, -157, -28},   {0xBCE5086492111AEB, -130, -20},
+      {0x8CBCCC096F5088CC, -103, -12},   {0xD1B71758E219652C, -77, -4},
+      {0x9C40000000000000, -50, 4},      {0xE8D4A51000000000, -24, 12},
+      {0xAD78EBC5AC620000, 3, 20},       {0x813F3978F8940984, 30, 28},
+      {0xC097CE7BC90715B3, 56, 36},      {0x8F7E32CE7BEA5C70, 83, 44},
+      {0xD5D238A4ABE98068, 109, 52},     {0x9F4F2726179A2245, 136, 60},
+      {0xED63A231D4C4FB27, 162, 68},     {0xB0DE65388CC8ADA8, 189, 76},
+      {0x83C7088E1AAB65DB, 216, 84},     {0xC45D1DF942711D9A, 242, 92},
+      {0x924D692CA61BE758, 269, 100},    {0xDA01EE641A708DEA, 295, 108},
+      {0xA26DA3999AEF774A, 322, 116},    {0xF209787BB47D6B85, 348, 124},
+      {0xB454E4A179DD1877, 375, 132},    {0x865B86925B9BC5C2, 402, 140},
+      {0xC83553C5C8965D3D, 428, 148},    {0x952AB45CFA97A0B3, 455, 156},
+      {0xDE469FBD99A05FE3, 481, 164},    {0xA59BC234DB398C25, 508, 172},
+      {0xF6C69A72A3989F5C, 534, 180},    {0xB7DCBF5354E9BECE, 561, 188},
+      {0x88FCF317F22241E2, 588, 196},    {0xCC20CE9BD35C78A5, 614, 204},
+      {0x98165AF37B2153DF, 641, 212},    {0xE2A0B5DC971F303A, 667, 220},
+      {0xA8D9D1535CE3B396, 694, 228},    {0xFB9B7CD9A4A7443C, 720, 236},
+      {0xBB764C4CA7A44410, 747, 244},    {0x8BAB8EEFB6409C1A, 774, 252},
+      {0xD01FEF10A657842C, 800, 260},    {0x9B10A4E5E9913129, 827, 268},
+      {0xE7109BFBA19C0C9D, 853, 276},    {0xAC2820D9623BF429, 880, 284},
+      {0x80444B5E7AA7CF85, 907, 292},    {0xBF21E44003ACDD2D, 933, 300},
+      {0x8E679C2F5E44FF8F, 960, 308},    {0xD433179D9C8CB841, 986, 316},
+      {0x9E19DB92B4E31BA9, 1013, 324},
+  }};
+
+  // This computation gives exactly the same results for k as
+  //      k = ceil((kAlpha - e - 1) * 0.30102999566398114)
+  // for |e| <= 1500, but doesn't require floating-point operations.
+  // NB: log_10(2) ~= 78913 / 2^18
+  const int f = kAlpha - e - 1;
+  const int k = (f * 78913) / (1 << 18) + static_cast<int>(f > 0);
+
+  const int index = (-kCachedPowersMinDecExp + k + (kCachedPowersDecStep - 1)) /
+                    kCachedPowersDecStep;
+
+  const cached_power cached = kCachedPowers[static_cast<std::size_t>(index)];
+
+  return cached;
+}
+
+/*!
+For n != 0, returns k, such that pow10 := 10^(k-1) <= n < 10^k.
+For n == 0, returns 1 and sets pow10 := 1.
+*/
+inline int find_largest_pow10(const std::uint32_t n, std::uint32_t &pow10) {
+  // LCOV_EXCL_START
+  if (n >= 1000000000) {
+    pow10 = 1000000000;
+    return 10;
+  }
+  // LCOV_EXCL_STOP
+  else if (n >= 100000000) {
+    pow10 = 100000000;
+    return 9;
+  } else if (n >= 10000000) {
+    pow10 = 10000000;
+    return 8;
+  } else if (n >= 1000000) {
+    pow10 = 1000000;
+    return 7;
+  } else if (n >= 100000) {
+    pow10 = 100000;
+    return 6;
+  } else if (n >= 10000) {
+    pow10 = 10000;
+    return 5;
+  } else if (n >= 1000) {
+    pow10 = 1000;
+    return 4;
+  } else if (n >= 100) {
+    pow10 = 100;
+    return 3;
+  } else if (n >= 10) {
+    pow10 = 10;
+    return 2;
+  } else {
+    pow10 = 1;
+    return 1;
+  }
+}
+
+inline void grisu2_round(char *buf, int len, std::uint64_t dist,
+                         std::uint64_t delta, std::uint64_t rest,
+                         std::uint64_t ten_k) {
+
+  //               <--------------------------- delta ---->
+  //                                  <---- dist --------->
+  // --------------[------------------+-------------------]--------------
+  //               M-                 w                   M+
+  //
+  //                                  ten_k
+  //                                <------>
+  //                                       <---- rest ---->
+  // --------------[------------------+----+--------------]--------------
+  //                                  w    V
+  //                                       = buf * 10^k
+  //
+  // ten_k represents a unit-in-the-last-place in the decimal representation
+  // stored in buf.
+  // Decrement buf by ten_k while this takes buf closer to w.
+
+  // The tests are written in this order to avoid overflow in unsigned
+  // integer arithmetic.
+
+  while (rest < dist && delta - rest >= ten_k &&
+         (rest + ten_k < dist || dist - rest > rest + ten_k - dist)) {
+    buf[len - 1]--;
+    rest += ten_k;
+  }
+}
+
+/*!
+Generates V = buffer * 10^decimal_exponent, such that M- <= V <= M+.
+M- and M+ must be normalized and share the same exponent -60 <= e <= -32.
+*/
+inline void grisu2_digit_gen(char *buffer, int &length, int &decimal_exponent,
+                             diyfp M_minus, diyfp w, diyfp M_plus) {
+  static_assert(kAlpha >= -60, "internal error");
+  static_assert(kGamma <= -32, "internal error");
+
+  // Generates the digits (and the exponent) of a decimal floating-point
+  // number V = buffer * 10^decimal_exponent in the range [M-, M+]. The diyfp's
+  // w, M- and M+ share the same exponent e, which satisfies alpha <= e <=
+  // gamma.
+  //
+  //               <--------------------------- delta ---->
+  //                                  <---- dist --------->
+  // --------------[------------------+-------------------]--------------
+  //               M-                 w                   M+
+  //
+  // Grisu2 generates the digits of M+ from left to right and stops as soon as
+  // V is in [M-,M+].
+
+  std::uint64_t delta =
+      diyfp::sub(M_plus, M_minus)
+          .f; // (significand of (M+ - M-), implicit exponent is e)
+  std::uint64_t dist =
+      diyfp::sub(M_plus, w)
+          .f; // (significand of (M+ - w ), implicit exponent is e)
+
+  // Split M+ = f * 2^e into two parts p1 and p2 (note: e < 0):
+  //
+  //      M+ = f * 2^e
+  //         = ((f div 2^-e) * 2^-e + (f mod 2^-e)) * 2^e
+  //         = ((p1        ) * 2^-e + (p2        )) * 2^e
+  //         = p1 + p2 * 2^e
+
+  const diyfp one(std::uint64_t{1} << -M_plus.e, M_plus.e);
+
+  auto p1 = static_cast<std::uint32_t>(
+      M_plus.f >>
+      -one.e); // p1 = f div 2^-e (Since -e >= 32, p1 fits into a 32-bit int.)
+  std::uint64_t p2 = M_plus.f & (one.f - 1); // p2 = f mod 2^-e
+
+  // 1)
+  //
+  // Generate the digits of the integral part p1 = d[n-1]...d[1]d[0]
+
+  std::uint32_t pow10;
+  const int k = find_largest_pow10(p1, pow10);
+
+  //      10^(k-1) <= p1 < 10^k, pow10 = 10^(k-1)
+  //
+  //      p1 = (p1 div 10^(k-1)) * 10^(k-1) + (p1 mod 10^(k-1))
+  //         = (d[k-1]         ) * 10^(k-1) + (p1 mod 10^(k-1))
+  //
+  //      M+ = p1                                             + p2 * 2^e
+  //         = d[k-1] * 10^(k-1) + (p1 mod 10^(k-1))          + p2 * 2^e
+  //         = d[k-1] * 10^(k-1) + ((p1 mod 10^(k-1)) * 2^-e + p2) * 2^e
+  //         = d[k-1] * 10^(k-1) + (                         rest) * 2^e
+  //
+  // Now generate the digits d[n] of p1 from left to right (n = k-1,...,0)
+  //
+  //      p1 = d[k-1]...d[n] * 10^n + d[n-1]...d[0]
+  //
+  // but stop as soon as
+  //
+  //      rest * 2^e = (d[n-1]...d[0] * 2^-e + p2) * 2^e <= delta * 2^e
+
+  int n = k;
+  while (n > 0) {
+    // Invariants:
+    //      M+ = buffer * 10^n + (p1 + p2 * 2^e)    (buffer = 0 for n = k)
+    //      pow10 = 10^(n-1) <= p1 < 10^n
+    //
+    const std::uint32_t d = p1 / pow10; // d = p1 div 10^(n-1)
+    const std::uint32_t r = p1 % pow10; // r = p1 mod 10^(n-1)
+    //
+    //      M+ = buffer * 10^n + (d * 10^(n-1) + r) + p2 * 2^e
+    //         = (buffer * 10 + d) * 10^(n-1) + (r + p2 * 2^e)
+    //
+    buffer[length++] = static_cast<char>('0' + d); // buffer := buffer * 10 + d
+    //
+    //      M+ = buffer * 10^(n-1) + (r + p2 * 2^e)
+    //
+    p1 = r;
+    n--;
+    //
+    //      M+ = buffer * 10^n + (p1 + p2 * 2^e)
+    //      pow10 = 10^n
+    //
+
+    // Now check if enough digits have been generated.
+    // Compute
+    //
+    //      p1 + p2 * 2^e = (p1 * 2^-e + p2) * 2^e = rest * 2^e
+    //
+    // Note:
+    // Since rest and delta share the same exponent e, it suffices to
+    // compare the significands.
+    const std::uint64_t rest = (std::uint64_t{p1} << -one.e) + p2;
+    if (rest <= delta) {
+      // V = buffer * 10^n, with M- <= V <= M+.
+
+      decimal_exponent += n;
+
+      // We may now just stop. But instead look if the buffer could be
+      // decremented to bring V closer to w.
+      //
+      // pow10 = 10^n is now 1 ulp in the decimal representation V.
+      // The rounding procedure works with diyfp's with an implicit
+      // exponent of e.
+      //
+      //      10^n = (10^n * 2^-e) * 2^e = ulp * 2^e
+      //
+      const std::uint64_t ten_n = std::uint64_t{pow10} << -one.e;
+      grisu2_round(buffer, length, dist, delta, rest, ten_n);
+
+      return;
+    }
+
+    pow10 /= 10;
+    //
+    //      pow10 = 10^(n-1) <= p1 < 10^n
+    // Invariants restored.
+  }
+
+  // 2)
+  //
+  // The digits of the integral part have been generated:
+  //
+  //      M+ = d[k-1]...d[1]d[0] + p2 * 2^e
+  //         = buffer            + p2 * 2^e
+  //
+  // Now generate the digits of the fractional part p2 * 2^e.
+  //
+  // Note:
+  // No decimal point is generated: the exponent is adjusted instead.
+  //
+  // p2 actually represents the fraction
+  //
+  //      p2 * 2^e
+  //          = p2 / 2^-e
+  //          = d[-1] / 10^1 + d[-2] / 10^2 + ...
+  //
+  // Now generate the digits d[-m] of p1 from left to right (m = 1,2,...)
+  //
+  //      p2 * 2^e = d[-1]d[-2]...d[-m] * 10^-m
+  //                      + 10^-m * (d[-m-1] / 10^1 + d[-m-2] / 10^2 + ...)
+  //
+  // using
+  //
+  //      10^m * p2 = ((10^m * p2) div 2^-e) * 2^-e + ((10^m * p2) mod 2^-e)
+  //                = (                   d) * 2^-e + (                   r)
+  //
+  // or
+  //      10^m * p2 * 2^e = d + r * 2^e
+  //
+  // i.e.
+  //
+  //      M+ = buffer + p2 * 2^e
+  //         = buffer + 10^-m * (d + r * 2^e)
+  //         = (buffer * 10^m + d) * 10^-m + 10^-m * r * 2^e
+  //
+  // and stop as soon as 10^-m * r * 2^e <= delta * 2^e
+
+  int m = 0;
+  for (;;) {
+    // Invariant:
+    //      M+ = buffer * 10^-m + 10^-m * (d[-m-1] / 10 + d[-m-2] / 10^2 + ...)
+    //      * 2^e
+    //         = buffer * 10^-m + 10^-m * (p2                                 )
+    //         * 2^e = buffer * 10^-m + 10^-m * (1/10 * (10 * p2) ) * 2^e =
+    //         buffer * 10^-m + 10^-m * (1/10 * ((10*p2 div 2^-e) * 2^-e +
+    //         (10*p2 mod 2^-e)) * 2^e
+    //
+    p2 *= 10;
+    const std::uint64_t d = p2 >> -one.e;     // d = (10 * p2) div 2^-e
+    const std::uint64_t r = p2 & (one.f - 1); // r = (10 * p2) mod 2^-e
+    //
+    //      M+ = buffer * 10^-m + 10^-m * (1/10 * (d * 2^-e + r) * 2^e
+    //         = buffer * 10^-m + 10^-m * (1/10 * (d + r * 2^e))
+    //         = (buffer * 10 + d) * 10^(-m-1) + 10^(-m-1) * r * 2^e
+    //
+    buffer[length++] = static_cast<char>('0' + d); // buffer := buffer * 10 + d
+    //
+    //      M+ = buffer * 10^(-m-1) + 10^(-m-1) * r * 2^e
+    //
+    p2 = r;
+    m++;
+    //
+    //      M+ = buffer * 10^-m + 10^-m * p2 * 2^e
+    // Invariant restored.
+
+    // Check if enough digits have been generated.
+    //
+    //      10^-m * p2 * 2^e <= delta * 2^e
+    //              p2 * 2^e <= 10^m * delta * 2^e
+    //                    p2 <= 10^m * delta
+    delta *= 10;
+    dist *= 10;
+    if (p2 <= delta) {
+      break;
+    }
+  }
+
+  // V = buffer * 10^-m, with M- <= V <= M+.
+
+  decimal_exponent -= m;
+
+  // 1 ulp in the decimal representation is now 10^-m.
+  // Since delta and dist are now scaled by 10^m, we need to do the
+  // same with ulp in order to keep the units in sync.
+  //
+  //      10^m * 10^-m = 1 = 2^-e * 2^e = ten_m * 2^e
+  //
+  const std::uint64_t ten_m = one.f;
+  grisu2_round(buffer, length, dist, delta, p2, ten_m);
+
+  // By construction this algorithm generates the shortest possible decimal
+  // number (Loitsch, Theorem 6.2) which rounds back to w.
+  // For an input number of precision p, at least
+  //
+  //      N = 1 + ceil(p * log_10(2))
+  //
+  // decimal digits are sufficient to identify all binary floating-point
+  // numbers (Matula, "In-and-Out conversions").
+  // This implies that the algorithm does not produce more than N decimal
+  // digits.
+  //
+  //      N = 17 for p = 53 (IEEE double precision)
+  //      N = 9  for p = 24 (IEEE single precision)
+}
+
+/*!
+v = buf * 10^decimal_exponent
+len is the length of the buffer (number of decimal digits)
+The buffer must be large enough, i.e. >= max_digits10.
+*/
+inline void grisu2(char *buf, int &len, int &decimal_exponent, diyfp m_minus,
+                   diyfp v, diyfp m_plus) {
+
+  //  --------(-----------------------+-----------------------)--------    (A)
+  //          m-                      v                       m+
+  //
+  //  --------------------(-----------+-----------------------)--------    (B)
+  //                      m-          v                       m+
+  //
+  // First scale v (and m- and m+) such that the exponent is in the range
+  // [alpha, gamma].
+
+  const cached_power cached = get_cached_power_for_binary_exponent(m_plus.e);
+
+  const diyfp c_minus_k(cached.f, cached.e); // = c ~= 10^-k
+
+  // The exponent of the products is = v.e + c_minus_k.e + q and is in the range
+  // [alpha,gamma]
+  const diyfp w = diyfp::mul(v, c_minus_k);
+  const diyfp w_minus = diyfp::mul(m_minus, c_minus_k);
+  const diyfp w_plus = diyfp::mul(m_plus, c_minus_k);
+
+  //  ----(---+---)---------------(---+---)---------------(---+---)----
+  //          w-                      w                       w+
+  //          = c*m-                  = c*v                   = c*m+
+  //
+  // diyfp::mul rounds its result and c_minus_k is approximated too. w, w- and
+  // w+ are now off by a small amount.
+  // In fact:
+  //
+  //      w - v * 10^k < 1 ulp
+  //
+  // To account for this inaccuracy, add resp. subtract 1 ulp.
+  //
+  //  --------+---[---------------(---+---)---------------]---+--------
+  //          w-  M-                  w                   M+  w+
+  //
+  // Now any number in [M-, M+] (bounds included) will round to w when input,
+  // regardless of how the input rounding algorithm breaks ties.
+  //
+  // And digit_gen generates the shortest possible such number in [M-, M+].
+  // Note that this does not mean that Grisu2 always generates the shortest
+  // possible number in the interval (m-, m+).
+  const diyfp M_minus(w_minus.f + 1, w_minus.e);
+  const diyfp M_plus(w_plus.f - 1, w_plus.e);
+
+  decimal_exponent = -cached.k; // = -(-k) = k
+
+  grisu2_digit_gen(buf, len, decimal_exponent, M_minus, w, M_plus);
+}
+
+/*!
+v = buf * 10^decimal_exponent
+len is the length of the buffer (number of decimal digits)
+The buffer must be large enough, i.e. >= max_digits10.
+*/
+template <typename FloatType>
+void grisu2(char *buf, int &len, int &decimal_exponent, FloatType value) {
+  static_assert(diyfp::kPrecision >= std::numeric_limits<FloatType>::digits + 3,
+                "internal error: not enough precision");
+
+  // If the neighbors (and boundaries) of 'value' are always computed for
+  // double-precision numbers, all float's can be recovered using strtod (and
+  // strtof). However, the resulting decimal representations are not exactly
+  // "short".
+  //
+  // The documentation for 'std::to_chars'
+  // (https://en.cppreference.com/w/cpp/utility/to_chars) says "value is
+  // converted to a string as if by std::sprintf in the default ("C") locale"
+  // and since sprintf promotes float's to double's, I think this is exactly
+  // what 'std::to_chars' does. On the other hand, the documentation for
+  // 'std::to_chars' requires that "parsing the representation using the
+  // corresponding std::from_chars function recovers value exactly". That
+  // indicates that single precision floating-point numbers should be recovered
+  // using 'std::strtof'.
+  //
+  // NB: If the neighbors are computed for single-precision numbers, there is a
+  // single float
+  //     (7.0385307e-26f) which can't be recovered using strtod. The resulting
+  //     double precision value is off by 1 ulp.
+#if 0
+    const boundaries w = compute_boundaries(static_cast<double>(value));
+#else
+  const boundaries w = compute_boundaries(value);
+#endif
+
+  grisu2(buf, len, decimal_exponent, w.minus, w.w, w.plus);
+}
+
+/*!
+@brief appends a decimal representation of e to buf
+@return a pointer to the element following the exponent.
+@pre -1000 < e < 1000
+*/
+inline char *append_exponent(char *buf, int e) {
+
+  if (e < 0) {
+    e = -e;
+    *buf++ = '-';
+  } else {
+    *buf++ = '+';
+  }
+
+  auto k = static_cast<std::uint32_t>(e);
+  if (k < 10) {
+    // Always print at least two digits in the exponent.
+    // This is for compatibility with printf("%g").
+    *buf++ = '0';
+    *buf++ = static_cast<char>('0' + k);
+  } else if (k < 100) {
+    *buf++ = static_cast<char>('0' + k / 10);
+    k %= 10;
+    *buf++ = static_cast<char>('0' + k);
+  } else {
+    *buf++ = static_cast<char>('0' + k / 100);
+    k %= 100;
+    *buf++ = static_cast<char>('0' + k / 10);
+    k %= 10;
+    *buf++ = static_cast<char>('0' + k);
+  }
+
+  return buf;
+}
+
+/*!
+@brief prettify v = buf * 10^decimal_exponent
+If v is in the range [10^min_exp, 10^max_exp) it will be printed in fixed-point
+notation. Otherwise it will be printed in exponential notation.
+@pre min_exp < 0
+@pre max_exp > 0
+*/
+inline char *format_buffer(char *buf, int len, int decimal_exponent,
+                           int min_exp, int max_exp) {
+
+  const int k = len;
+  const int n = len + decimal_exponent;
+
+  // v = buf * 10^(n-k)
+  // k is the length of the buffer (number of decimal digits)
+  // n is the position of the decimal point relative to the start of the buffer.
+
+  if (k <= n && n <= max_exp) {
+    // digits[000]
+    // len <= max_exp + 2
+
+    std::memset(buf + k, '0', static_cast<size_t>(n) - static_cast<size_t>(k));
+    // Make it look like a floating-point number (#362, #378)
+    // buf[n + 0] = '.';
+    // buf[n + 1] = '0';
+    return buf + (static_cast<size_t>(n));
+  }
+
+  if (0 < n && n <= max_exp) {
+    // dig.its
+    // len <= max_digits10 + 1
+    std::memmove(buf + (static_cast<size_t>(n) + 1), buf + n,
+                 static_cast<size_t>(k) - static_cast<size_t>(n));
+    buf[n] = '.';
+    return buf + (static_cast<size_t>(k) + 1U);
+  }
+
+  if (min_exp < n && n <= 0) {
+    // 0.[000]digits
+    // len <= 2 + (-min_exp - 1) + max_digits10
+
+    std::memmove(buf + (2 + static_cast<size_t>(-n)), buf,
+                 static_cast<size_t>(k));
+    buf[0] = '0';
+    buf[1] = '.';
+    std::memset(buf + 2, '0', static_cast<size_t>(-n));
+    return buf + (2U + static_cast<size_t>(-n) + static_cast<size_t>(k));
+  }
+
+  if (k == 1) {
+    // dE+123
+    // len <= 1 + 5
+
+    buf += 1;
+  } else {
+    // d.igitsE+123
+    // len <= max_digits10 + 1 + 5
+
+    std::memmove(buf + 2, buf + 1, static_cast<size_t>(k) - 1);
+    buf[1] = '.';
+    buf += 1 + static_cast<size_t>(k);
+  }
+
+  *buf++ = 'e';
+  return append_exponent(buf, n - 1);
+}
+
+} // namespace dtoa_impl
+
+/*!
+The format of the resulting decimal representation is similar to printf's %g
+format. Returns an iterator pointing past-the-end of the decimal representation.
+@note The input number must be finite, i.e. NaN's and Inf's are not supported.
+@note The buffer must be large enough.
+@note The result is NOT null-terminated.
+*/
+char *to_chars(char *first, const char *last, double value) {
+  static_cast<void>(last); // maybe unused - fix warning
+  bool negative = std::signbit(value);
+  if (negative) {
+    value = -value;
+    *first++ = '-';
+  }
+
+  if (value == 0) // +-0
+  {
+    *first++ = '0';
+    // Make it look like a floating-point number (#362, #378)
+    if(negative) {
+      *first++ = '.';
+      *first++ = '0';
+    }
+    return first;
+  }
+  // Compute v = buffer * 10^decimal_exponent.
+  // The decimal digits are stored in the buffer, which needs to be interpreted
+  // as an unsigned decimal integer.
+  // len is the length of the buffer, i.e. the number of decimal digits.
+  int len = 0;
+  int decimal_exponent = 0;
+  dtoa_impl::grisu2(first, len, decimal_exponent, value);
+  // Format the buffer like printf("%.*g", prec, value)
+  constexpr int kMinExp = -4;
+  constexpr int kMaxExp = std::numeric_limits<double>::digits10;
+
+  return dtoa_impl::format_buffer(first, len, decimal_exponent, kMinExp,
+                                  kMaxExp);
+}
+} // namespace internal
+} // namespace simdjson
+/* end file src/to_chars.cpp */
+/* begin file src/from_chars.cpp */
+#include <limits>
+namespace simdjson {
+namespace internal {
+
+/**
+ * The code in the internal::from_chars function is meant to handle the floating-point number parsing
+ * when we have more than 19 digits in the decimal mantissa. This should only be seen
+ * in adversarial scenarios: we do not expect production systems to even produce
+ * such floating-point numbers.
+ *
+ * The parser is based on work by Nigel Tao (at https://github.com/google/wuffs/)
+ * who credits Ken Thompson for the design (via a reference to the Go source
+ * code). See
+ * https://github.com/google/wuffs/blob/aa46859ea40c72516deffa1b146121952d6dfd3b/internal/cgen/base/floatconv-submodule-data.c
+ * https://github.com/google/wuffs/blob/46cd8105f47ca07ae2ba8e6a7818ef9c0df6c152/internal/cgen/base/floatconv-submodule-code.c
+ * It is probably not very fast but it is a fallback that should almost never be
+ * called in real life. Google Wuffs is published under APL 2.0.
+ **/
+
+namespace {
+constexpr uint32_t max_digits = 768;
+constexpr int32_t decimal_point_range = 2047;
+} // namespace
+
+struct adjusted_mantissa {
+  uint64_t mantissa;
+  int power2;
+  adjusted_mantissa() : mantissa(0), power2(0) {}
+};
+
+struct decimal {
+  uint32_t num_digits;
+  int32_t decimal_point;
+  bool negative;
+  bool truncated;
+  uint8_t digits[max_digits];
+};
+
+template <typename T> struct binary_format {
+  static constexpr int mantissa_explicit_bits();
+  static constexpr int minimum_exponent();
+  static constexpr int infinite_power();
+  static constexpr int sign_index();
+};
+
+template <> constexpr int binary_format<double>::mantissa_explicit_bits() {
+  return 52;
+}
+
+template <> constexpr int binary_format<double>::minimum_exponent() {
+  return -1023;
+}
+template <> constexpr int binary_format<double>::infinite_power() {
+  return 0x7FF;
+}
+
+template <> constexpr int binary_format<double>::sign_index() { return 63; }
+
+bool is_integer(char c)  noexcept  { return (c >= '0' && c <= '9'); }
+
+// This should always succeed since it follows a call to parse_number.
+decimal parse_decimal(const char *&p) noexcept {
+  decimal answer;
+  answer.num_digits = 0;
+  answer.decimal_point = 0;
+  answer.truncated = false;
+  answer.negative = (*p == '-');
+  if ((*p == '-') || (*p == '+')) {
+    ++p;
+  }
+
+  while (*p == '0') {
+    ++p;
+  }
+  while (is_integer(*p)) {
+    if (answer.num_digits < max_digits) {
+      answer.digits[answer.num_digits] = uint8_t(*p - '0');
+    }
+    answer.num_digits++;
+    ++p;
+  }
+  if (*p == '.') {
+    ++p;
+    const char *first_after_period = p;
+    // if we have not yet encountered a zero, we have to skip it as well
+    if (answer.num_digits == 0) {
+      // skip zeros
+      while (*p == '0') {
+        ++p;
+      }
+    }
+    while (is_integer(*p)) {
+      if (answer.num_digits < max_digits) {
+        answer.digits[answer.num_digits] = uint8_t(*p - '0');
+      }
+      answer.num_digits++;
+      ++p;
+    }
+    answer.decimal_point = int32_t(first_after_period - p);
+  }
+  if(answer.num_digits > 0) {
+    const char *preverse = p - 1;
+    int32_t trailing_zeros = 0;
+    while ((*preverse == '0') || (*preverse == '.')) {
+      if(*preverse == '0') { trailing_zeros++; };
+      --preverse;
+    }
+    answer.decimal_point += int32_t(answer.num_digits);
+    answer.num_digits -= uint32_t(trailing_zeros);
+  }
+  if(answer.num_digits > max_digits ) {
+    answer.num_digits = max_digits;
+    answer.truncated = true;
+  }
+  if (('e' == *p) || ('E' == *p)) {
+    ++p;
+    bool neg_exp = false;
+    if ('-' == *p) {
+      neg_exp = true;
+      ++p;
+    } else if ('+' == *p) {
+      ++p;
+    }
+    int32_t exp_number = 0; // exponential part
+    while (is_integer(*p)) {
+      uint8_t digit = uint8_t(*p - '0');
+      if (exp_number < 0x10000) {
+        exp_number = 10 * exp_number + digit;
+      }
+      ++p;
+    }
+    answer.decimal_point += (neg_exp ? -exp_number : exp_number);
+  }
+  return answer;
+}
+
+// This should always succeed since it follows a call to parse_number.
+// Will not read at or beyond the "end" pointer.
+decimal parse_decimal(const char *&p, const char * end) noexcept {
+  decimal answer;
+  answer.num_digits = 0;
+  answer.decimal_point = 0;
+  answer.truncated = false;
+  if(p == end) { return answer; } // should never happen
+  answer.negative = (*p == '-');
+  if ((*p == '-') || (*p == '+')) {
+    ++p;
+  }
+
+  while ((p != end) && (*p == '0')) {
+    ++p;
+  }
+  while ((p != end) && is_integer(*p)) {
+    if (answer.num_digits < max_digits) {
+      answer.digits[answer.num_digits] = uint8_t(*p - '0');
+    }
+    answer.num_digits++;
+    ++p;
+  }
+  if ((p != end) && (*p == '.')) {
+    ++p;
+    if(p == end) { return answer; } // should never happen
+    const char *first_after_period = p;
+    // if we have not yet encountered a zero, we have to skip it as well
+    if (answer.num_digits == 0) {
+      // skip zeros
+      while (*p == '0') {
+        ++p;
+      }
+    }
+    while ((p != end) && is_integer(*p)) {
+      if (answer.num_digits < max_digits) {
+        answer.digits[answer.num_digits] = uint8_t(*p - '0');
+      }
+      answer.num_digits++;
+      ++p;
+    }
+    answer.decimal_point = int32_t(first_after_period - p);
+  }
+  if(answer.num_digits > 0) {
+    const char *preverse = p - 1;
+    int32_t trailing_zeros = 0;
+    while ((*preverse == '0') || (*preverse == '.')) {
+      if(*preverse == '0') { trailing_zeros++; };
+      --preverse;
+    }
+    answer.decimal_point += int32_t(answer.num_digits);
+    answer.num_digits -= uint32_t(trailing_zeros);
+  }
+  if(answer.num_digits > max_digits ) {
+    answer.num_digits = max_digits;
+    answer.truncated = true;
+  }
+  if ((p != end) && (('e' == *p) || ('E' == *p))) {
+    ++p;
+    if(p == end) { return answer; } // should never happen
+    bool neg_exp = false;
+    if ('-' == *p) {
+      neg_exp = true;
+      ++p;
+    } else if ('+' == *p) {
+      ++p;
+    }
+    int32_t exp_number = 0; // exponential part
+    while ((p != end) && is_integer(*p)) {
+      uint8_t digit = uint8_t(*p - '0');
+      if (exp_number < 0x10000) {
+        exp_number = 10 * exp_number + digit;
+      }
+      ++p;
+    }
+    answer.decimal_point += (neg_exp ? -exp_number : exp_number);
+  }
+  return answer;
+}
+
+namespace {
+
+// remove all final zeroes
+inline void trim(decimal &h) {
+  while ((h.num_digits > 0) && (h.digits[h.num_digits - 1] == 0)) {
+    h.num_digits--;
+  }
+}
+
+uint32_t number_of_digits_decimal_left_shift(decimal &h, uint32_t shift) {
+  shift &= 63;
+  const static uint16_t number_of_digits_decimal_left_shift_table[65] = {
+      0x0000, 0x0800, 0x0801, 0x0803, 0x1006, 0x1009, 0x100D, 0x1812, 0x1817,
+      0x181D, 0x2024, 0x202B, 0x2033, 0x203C, 0x2846, 0x2850, 0x285B, 0x3067,
+      0x3073, 0x3080, 0x388E, 0x389C, 0x38AB, 0x38BB, 0x40CC, 0x40DD, 0x40EF,
+      0x4902, 0x4915, 0x4929, 0x513E, 0x5153, 0x5169, 0x5180, 0x5998, 0x59B0,
+      0x59C9, 0x61E3, 0x61FD, 0x6218, 0x6A34, 0x6A50, 0x6A6D, 0x6A8B, 0x72AA,
+      0x72C9, 0x72E9, 0x7B0A, 0x7B2B, 0x7B4D, 0x8370, 0x8393, 0x83B7, 0x83DC,
+      0x8C02, 0x8C28, 0x8C4F, 0x9477, 0x949F, 0x94C8, 0x9CF2, 0x051C, 0x051C,
+      0x051C, 0x051C,
+  };
+  uint32_t x_a = number_of_digits_decimal_left_shift_table[shift];
+  uint32_t x_b = number_of_digits_decimal_left_shift_table[shift + 1];
+  uint32_t num_new_digits = x_a >> 11;
+  uint32_t pow5_a = 0x7FF & x_a;
+  uint32_t pow5_b = 0x7FF & x_b;
+  const static uint8_t
+      number_of_digits_decimal_left_shift_table_powers_of_5[0x051C] = {
+          5, 2, 5, 1, 2, 5, 6, 2, 5, 3, 1, 2, 5, 1, 5, 6, 2, 5, 7, 8, 1, 2, 5,
+          3, 9, 0, 6, 2, 5, 1, 9, 5, 3, 1, 2, 5, 9, 7, 6, 5, 6, 2, 5, 4, 8, 8,
+          2, 8, 1, 2, 5, 2, 4, 4, 1, 4, 0, 6, 2, 5, 1, 2, 2, 0, 7, 0, 3, 1, 2,
+          5, 6, 1, 0, 3, 5, 1, 5, 6, 2, 5, 3, 0, 5, 1, 7, 5, 7, 8, 1, 2, 5, 1,
+          5, 2, 5, 8, 7, 8, 9, 0, 6, 2, 5, 7, 6, 2, 9, 3, 9, 4, 5, 3, 1, 2, 5,
+          3, 8, 1, 4, 6, 9, 7, 2, 6, 5, 6, 2, 5, 1, 9, 0, 7, 3, 4, 8, 6, 3, 2,
+          8, 1, 2, 5, 9, 5, 3, 6, 7, 4, 3, 1, 6, 4, 0, 6, 2, 5, 4, 7, 6, 8, 3,
+          7, 1, 5, 8, 2, 0, 3, 1, 2, 5, 2, 3, 8, 4, 1, 8, 5, 7, 9, 1, 0, 1, 5,
+          6, 2, 5, 1, 1, 9, 2, 0, 9, 2, 8, 9, 5, 5, 0, 7, 8, 1, 2, 5, 5, 9, 6,
+          0, 4, 6, 4, 4, 7, 7, 5, 3, 9, 0, 6, 2, 5, 2, 9, 8, 0, 2, 3, 2, 2, 3,
+          8, 7, 6, 9, 5, 3, 1, 2, 5, 1, 4, 9, 0, 1, 1, 6, 1, 1, 9, 3, 8, 4, 7,
+          6, 5, 6, 2, 5, 7, 4, 5, 0, 5, 8, 0, 5, 9, 6, 9, 2, 3, 8, 2, 8, 1, 2,
+          5, 3, 7, 2, 5, 2, 9, 0, 2, 9, 8, 4, 6, 1, 9, 1, 4, 0, 6, 2, 5, 1, 8,
+          6, 2, 6, 4, 5, 1, 4, 9, 2, 3, 0, 9, 5, 7, 0, 3, 1, 2, 5, 9, 3, 1, 3,
+          2, 2, 5, 7, 4, 6, 1, 5, 4, 7, 8, 5, 1, 5, 6, 2, 5, 4, 6, 5, 6, 6, 1,
+          2, 8, 7, 3, 0, 7, 7, 3, 9, 2, 5, 7, 8, 1, 2, 5, 2, 3, 2, 8, 3, 0, 6,
+          4, 3, 6, 5, 3, 8, 6, 9, 6, 2, 8, 9, 0, 6, 2, 5, 1, 1, 6, 4, 1, 5, 3,
+          2, 1, 8, 2, 6, 9, 3, 4, 8, 1, 4, 4, 5, 3, 1, 2, 5, 5, 8, 2, 0, 7, 6,
+          6, 0, 9, 1, 3, 4, 6, 7, 4, 0, 7, 2, 2, 6, 5, 6, 2, 5, 2, 9, 1, 0, 3,
+          8, 3, 0, 4, 5, 6, 7, 3, 3, 7, 0, 3, 6, 1, 3, 2, 8, 1, 2, 5, 1, 4, 5,
+          5, 1, 9, 1, 5, 2, 2, 8, 3, 6, 6, 8, 5, 1, 8, 0, 6, 6, 4, 0, 6, 2, 5,
+          7, 2, 7, 5, 9, 5, 7, 6, 1, 4, 1, 8, 3, 4, 2, 5, 9, 0, 3, 3, 2, 0, 3,
+          1, 2, 5, 3, 6, 3, 7, 9, 7, 8, 8, 0, 7, 0, 9, 1, 7, 1, 2, 9, 5, 1, 6,
+          6, 0, 1, 5, 6, 2, 5, 1, 8, 1, 8, 9, 8, 9, 4, 0, 3, 5, 4, 5, 8, 5, 6,
+          4, 7, 5, 8, 3, 0, 0, 7, 8, 1, 2, 5, 9, 0, 9, 4, 9, 4, 7, 0, 1, 7, 7,
+          2, 9, 2, 8, 2, 3, 7, 9, 1, 5, 0, 3, 9, 0, 6, 2, 5, 4, 5, 4, 7, 4, 7,
+          3, 5, 0, 8, 8, 6, 4, 6, 4, 1, 1, 8, 9, 5, 7, 5, 1, 9, 5, 3, 1, 2, 5,
+          2, 2, 7, 3, 7, 3, 6, 7, 5, 4, 4, 3, 2, 3, 2, 0, 5, 9, 4, 7, 8, 7, 5,
+          9, 7, 6, 5, 6, 2, 5, 1, 1, 3, 6, 8, 6, 8, 3, 7, 7, 2, 1, 6, 1, 6, 0,
+          2, 9, 7, 3, 9, 3, 7, 9, 8, 8, 2, 8, 1, 2, 5, 5, 6, 8, 4, 3, 4, 1, 8,
+          8, 6, 0, 8, 0, 8, 0, 1, 4, 8, 6, 9, 6, 8, 9, 9, 4, 1, 4, 0, 6, 2, 5,
+          2, 8, 4, 2, 1, 7, 0, 9, 4, 3, 0, 4, 0, 4, 0, 0, 7, 4, 3, 4, 8, 4, 4,
+          9, 7, 0, 7, 0, 3, 1, 2, 5, 1, 4, 2, 1, 0, 8, 5, 4, 7, 1, 5, 2, 0, 2,
+          0, 0, 3, 7, 1, 7, 4, 2, 2, 4, 8, 5, 3, 5, 1, 5, 6, 2, 5, 7, 1, 0, 5,
+          4, 2, 7, 3, 5, 7, 6, 0, 1, 0, 0, 1, 8, 5, 8, 7, 1, 1, 2, 4, 2, 6, 7,
+          5, 7, 8, 1, 2, 5, 3, 5, 5, 2, 7, 1, 3, 6, 7, 8, 8, 0, 0, 5, 0, 0, 9,
+          2, 9, 3, 5, 5, 6, 2, 1, 3, 3, 7, 8, 9, 0, 6, 2, 5, 1, 7, 7, 6, 3, 5,
+          6, 8, 3, 9, 4, 0, 0, 2, 5, 0, 4, 6, 4, 6, 7, 7, 8, 1, 0, 6, 6, 8, 9,
+          4, 5, 3, 1, 2, 5, 8, 8, 8, 1, 7, 8, 4, 1, 9, 7, 0, 0, 1, 2, 5, 2, 3,
+          2, 3, 3, 8, 9, 0, 5, 3, 3, 4, 4, 7, 2, 6, 5, 6, 2, 5, 4, 4, 4, 0, 8,
+          9, 2, 0, 9, 8, 5, 0, 0, 6, 2, 6, 1, 6, 1, 6, 9, 4, 5, 2, 6, 6, 7, 2,
+          3, 6, 3, 2, 8, 1, 2, 5, 2, 2, 2, 0, 4, 4, 6, 0, 4, 9, 2, 5, 0, 3, 1,
+          3, 0, 8, 0, 8, 4, 7, 2, 6, 3, 3, 3, 6, 1, 8, 1, 6, 4, 0, 6, 2, 5, 1,
+          1, 1, 0, 2, 2, 3, 0, 2, 4, 6, 2, 5, 1, 5, 6, 5, 4, 0, 4, 2, 3, 6, 3,
+          1, 6, 6, 8, 0, 9, 0, 8, 2, 0, 3, 1, 2, 5, 5, 5, 5, 1, 1, 1, 5, 1, 2,
+          3, 1, 2, 5, 7, 8, 2, 7, 0, 2, 1, 1, 8, 1, 5, 8, 3, 4, 0, 4, 5, 4, 1,
+          0, 1, 5, 6, 2, 5, 2, 7, 7, 5, 5, 5, 7, 5, 6, 1, 5, 6, 2, 8, 9, 1, 3,
+          5, 1, 0, 5, 9, 0, 7, 9, 1, 7, 0, 2, 2, 7, 0, 5, 0, 7, 8, 1, 2, 5, 1,
+          3, 8, 7, 7, 7, 8, 7, 8, 0, 7, 8, 1, 4, 4, 5, 6, 7, 5, 5, 2, 9, 5, 3,
+          9, 5, 8, 5, 1, 1, 3, 5, 2, 5, 3, 9, 0, 6, 2, 5, 6, 9, 3, 8, 8, 9, 3,
+          9, 0, 3, 9, 0, 7, 2, 2, 8, 3, 7, 7, 6, 4, 7, 6, 9, 7, 9, 2, 5, 5, 6,
+          7, 6, 2, 6, 9, 5, 3, 1, 2, 5, 3, 4, 6, 9, 4, 4, 6, 9, 5, 1, 9, 5, 3,
+          6, 1, 4, 1, 8, 8, 8, 2, 3, 8, 4, 8, 9, 6, 2, 7, 8, 3, 8, 1, 3, 4, 7,
+          6, 5, 6, 2, 5, 1, 7, 3, 4, 7, 2, 3, 4, 7, 5, 9, 7, 6, 8, 0, 7, 0, 9,
+          4, 4, 1, 1, 9, 2, 4, 4, 8, 1, 3, 9, 1, 9, 0, 6, 7, 3, 8, 2, 8, 1, 2,
+          5, 8, 6, 7, 3, 6, 1, 7, 3, 7, 9, 8, 8, 4, 0, 3, 5, 4, 7, 2, 0, 5, 9,
+          6, 2, 2, 4, 0, 6, 9, 5, 9, 5, 3, 3, 6, 9, 1, 4, 0, 6, 2, 5,
+      };
+  const uint8_t *pow5 =
+      &number_of_digits_decimal_left_shift_table_powers_of_5[pow5_a];
+  uint32_t i = 0;
+  uint32_t n = pow5_b - pow5_a;
+  for (; i < n; i++) {
+    if (i >= h.num_digits) {
+      return num_new_digits - 1;
+    } else if (h.digits[i] == pow5[i]) {
+      continue;
+    } else if (h.digits[i] < pow5[i]) {
+      return num_new_digits - 1;
+    } else {
+      return num_new_digits;
+    }
+  }
+  return num_new_digits;
+}
+
+} // end of anonymous namespace
+
+uint64_t round(decimal &h) {
+  if ((h.num_digits == 0) || (h.decimal_point < 0)) {
+    return 0;
+  } else if (h.decimal_point > 18) {
+    return UINT64_MAX;
+  }
+  // at this point, we know that h.decimal_point >= 0
+  uint32_t dp = uint32_t(h.decimal_point);
+  uint64_t n = 0;
+  for (uint32_t i = 0; i < dp; i++) {
+    n = (10 * n) + ((i < h.num_digits) ? h.digits[i] : 0);
+  }
+  bool round_up = false;
+  if (dp < h.num_digits) {
+    round_up = h.digits[dp] >= 5; // normally, we round up
+    // but we may need to round to even!
+    if ((h.digits[dp] == 5) && (dp + 1 == h.num_digits)) {
+      round_up = h.truncated || ((dp > 0) && (1 & h.digits[dp - 1]));
+    }
+  }
+  if (round_up) {
+    n++;
+  }
+  return n;
+}
+
+// computes h * 2^-shift
+void decimal_left_shift(decimal &h, uint32_t shift) {
+  if (h.num_digits == 0) {
+    return;
+  }
+  uint32_t num_new_digits = number_of_digits_decimal_left_shift(h, shift);
+  int32_t read_index = int32_t(h.num_digits - 1);
+  uint32_t write_index = h.num_digits - 1 + num_new_digits;
+  uint64_t n = 0;
+
+  while (read_index >= 0) {
+    n += uint64_t(h.digits[read_index]) << shift;
+    uint64_t quotient = n / 10;
+    uint64_t remainder = n - (10 * quotient);
+    if (write_index < max_digits) {
+      h.digits[write_index] = uint8_t(remainder);
+    } else if (remainder > 0) {
+      h.truncated = true;
+    }
+    n = quotient;
+    write_index--;
+    read_index--;
+  }
+  while (n > 0) {
+    uint64_t quotient = n / 10;
+    uint64_t remainder = n - (10 * quotient);
+    if (write_index < max_digits) {
+      h.digits[write_index] = uint8_t(remainder);
+    } else if (remainder > 0) {
+      h.truncated = true;
+    }
+    n = quotient;
+    write_index--;
+  }
+  h.num_digits += num_new_digits;
+  if (h.num_digits > max_digits) {
+    h.num_digits = max_digits;
+  }
+  h.decimal_point += int32_t(num_new_digits);
+  trim(h);
+}
+
+// computes h * 2^shift
+void decimal_right_shift(decimal &h, uint32_t shift) {
+  uint32_t read_index = 0;
+  uint32_t write_index = 0;
+
+  uint64_t n = 0;
+
+  while ((n >> shift) == 0) {
+    if (read_index < h.num_digits) {
+      n = (10 * n) + h.digits[read_index++];
+    } else if (n == 0) {
+      return;
+    } else {
+      while ((n >> shift) == 0) {
+        n = 10 * n;
+        read_index++;
+      }
+      break;
+    }
+  }
+  h.decimal_point -= int32_t(read_index - 1);
+  if (h.decimal_point < -decimal_point_range) { // it is zero
+    h.num_digits = 0;
+    h.decimal_point = 0;
+    h.negative = false;
+    h.truncated = false;
+    return;
+  }
+  uint64_t mask = (uint64_t(1) << shift) - 1;
+  while (read_index < h.num_digits) {
+    uint8_t new_digit = uint8_t(n >> shift);
+    n = (10 * (n & mask)) + h.digits[read_index++];
+    h.digits[write_index++] = new_digit;
+  }
+  while (n > 0) {
+    uint8_t new_digit = uint8_t(n >> shift);
+    n = 10 * (n & mask);
+    if (write_index < max_digits) {
+      h.digits[write_index++] = new_digit;
+    } else if (new_digit > 0) {
+      h.truncated = true;
+    }
+  }
+  h.num_digits = write_index;
+  trim(h);
+}
+
+template <typename binary> adjusted_mantissa compute_float(decimal &d) {
+  adjusted_mantissa answer;
+  if (d.num_digits == 0) {
+    // should be zero
+    answer.power2 = 0;
+    answer.mantissa = 0;
+    return answer;
+  }
+  // At this point, going further, we can assume that d.num_digits > 0.
+  // We want to guard against excessive decimal point values because
+  // they can result in long running times. Indeed, we do
+  // shifts by at most 60 bits. We have that log(10**400)/log(2**60) ~= 22
+  // which is fine, but log(10**299995)/log(2**60) ~= 16609 which is not
+  // fine (runs for a long time).
+  //
+  if(d.decimal_point < -324) {
+    // We have something smaller than 1e-324 which is always zero
+    // in binary64 and binary32.
+    // It should be zero.
+    answer.power2 = 0;
+    answer.mantissa = 0;
+    return answer;
+  } else if(d.decimal_point >= 310) {
+    // We have something at least as large as 0.1e310 which is
+    // always infinite.
+    answer.power2 = binary::infinite_power();
+    answer.mantissa = 0;
+    return answer;
+  }
+
+  static const uint32_t max_shift = 60;
+  static const uint32_t num_powers = 19;
+  static const uint8_t powers[19] = {
+      0,  3,  6,  9,  13, 16, 19, 23, 26, 29, //
+      33, 36, 39, 43, 46, 49, 53, 56, 59,     //
+  };
+  int32_t exp2 = 0;
+  while (d.decimal_point > 0) {
+    uint32_t n = uint32_t(d.decimal_point);
+    uint32_t shift = (n < num_powers) ? powers[n] : max_shift;
+    decimal_right_shift(d, shift);
+    if (d.decimal_point < -decimal_point_range) {
+      // should be zero
+      answer.power2 = 0;
+      answer.mantissa = 0;
+      return answer;
+    }
+    exp2 += int32_t(shift);
+  }
+  // We shift left toward [1/2 ... 1].
+  while (d.decimal_point <= 0) {
+    uint32_t shift;
+    if (d.decimal_point == 0) {
+      if (d.digits[0] >= 5) {
+        break;
+      }
+      shift = (d.digits[0] < 2) ? 2 : 1;
+    } else {
+      uint32_t n = uint32_t(-d.decimal_point);
+      shift = (n < num_powers) ? powers[n] : max_shift;
+    }
+    decimal_left_shift(d, shift);
+    if (d.decimal_point > decimal_point_range) {
+      // we want to get infinity:
+      answer.power2 = 0xFF;
+      answer.mantissa = 0;
+      return answer;
+    }
+    exp2 -= int32_t(shift);
+  }
+  // We are now in the range [1/2 ... 1] but the binary format uses [1 ... 2].
+  exp2--;
+  constexpr int32_t minimum_exponent = binary::minimum_exponent();
+  while ((minimum_exponent + 1) > exp2) {
+    uint32_t n = uint32_t((minimum_exponent + 1) - exp2);
+    if (n > max_shift) {
+      n = max_shift;
+    }
+    decimal_right_shift(d, n);
+    exp2 += int32_t(n);
+  }
+  if ((exp2 - minimum_exponent) >= binary::infinite_power()) {
+    answer.power2 = binary::infinite_power();
+    answer.mantissa = 0;
+    return answer;
+  }
+
+  const int mantissa_size_in_bits = binary::mantissa_explicit_bits() + 1;
+  decimal_left_shift(d, mantissa_size_in_bits);
+
+  uint64_t mantissa = round(d);
+  // It is possible that we have an overflow, in which case we need
+  // to shift back.
+  if (mantissa >= (uint64_t(1) << mantissa_size_in_bits)) {
+    decimal_right_shift(d, 1);
+    exp2 += 1;
+    mantissa = round(d);
+    if ((exp2 - minimum_exponent) >= binary::infinite_power()) {
+      answer.power2 = binary::infinite_power();
+      answer.mantissa = 0;
+      return answer;
+    }
+  }
+  answer.power2 = exp2 - binary::minimum_exponent();
+  if (mantissa < (uint64_t(1) << binary::mantissa_explicit_bits())) {
+    answer.power2--;
+  }
+  answer.mantissa =
+      mantissa & ((uint64_t(1) << binary::mantissa_explicit_bits()) - 1);
+  return answer;
+}
+
+template <typename binary>
+adjusted_mantissa parse_long_mantissa(const char *first) {
+  decimal d = parse_decimal(first);
+  return compute_float<binary>(d);
+}
+
+template <typename binary>
+adjusted_mantissa parse_long_mantissa(const char *first, const char *end) {
+  decimal d = parse_decimal(first, end);
+  return compute_float<binary>(d);
+}
+
+double from_chars(const char *first) noexcept {
+  bool negative = first[0] == '-';
+  if (negative) {
+    first++;
+  }
+  adjusted_mantissa am = parse_long_mantissa<binary_format<double>>(first);
+  uint64_t word = am.mantissa;
+  word |= uint64_t(am.power2)
+          << binary_format<double>::mantissa_explicit_bits();
+  word = negative ? word | (uint64_t(1) << binary_format<double>::sign_index())
+                  : word;
+  double value;
+  std::memcpy(&value, &word, sizeof(double));
+  return value;
+}
+
+
+double from_chars(const char *first, const char *end) noexcept {
+  bool negative = first[0] == '-';
+  if (negative) {
+    first++;
+  }
+  adjusted_mantissa am = parse_long_mantissa<binary_format<double>>(first, end);
+  uint64_t word = am.mantissa;
+  word |= uint64_t(am.power2)
+          << binary_format<double>::mantissa_explicit_bits();
+  word = negative ? word | (uint64_t(1) << binary_format<double>::sign_index())
+                  : word;
+  double value;
+  std::memcpy(&value, &word, sizeof(double));
+  return value;
+}
+
+} // internal
+} // simdjson
+/* end file src/from_chars.cpp */
+/* begin file src/internal/error_tables.cpp */
+
+namespace simdjson {
+namespace internal {
+
+  SIMDJSON_DLLIMPORTEXPORT const error_code_info error_codes[] {
+    { SUCCESS, "No error" },
+    { CAPACITY, "This parser can't support a document that big" },
+    { MEMALLOC, "Error allocating memory, we're most likely out of memory" },
+    { TAPE_ERROR, "The JSON document has an improper structure: missing or superfluous commas, braces, missing keys, etc." },
+    { DEPTH_ERROR, "The JSON document was too deep (too many nested objects and arrays)" },
+    { STRING_ERROR, "Problem while parsing a string" },
+    { T_ATOM_ERROR, "Problem while parsing an atom starting with the letter 't'" },
+    { F_ATOM_ERROR, "Problem while parsing an atom starting with the letter 'f'" },
+    { N_ATOM_ERROR, "Problem while parsing an atom starting with the letter 'n'" },
+    { NUMBER_ERROR, "Problem while parsing a number" },
+    { UTF8_ERROR, "The input is not valid UTF-8" },
+    { UNINITIALIZED, "Uninitialized" },
+    { EMPTY, "Empty: no JSON found" },
+    { UNESCAPED_CHARS, "Within strings, some characters must be escaped, we found unescaped characters" },
+    { UNCLOSED_STRING, "A string is opened, but never closed." },
+    { UNSUPPORTED_ARCHITECTURE, "simdjson does not have an implementation supported by this CPU architecture (perhaps it's a non-SIMD CPU?)." },
+    { INCORRECT_TYPE, "The JSON element does not have the requested type." },
+    { NUMBER_OUT_OF_RANGE, "The JSON number is too large or too small to fit within the requested type." },
+    { INDEX_OUT_OF_BOUNDS, "Attempted to access an element of a JSON array that is beyond its length." },
+    { NO_SUCH_FIELD, "The JSON field referenced does not exist in this object." },
+    { IO_ERROR, "Error reading the file." },
+    { INVALID_JSON_POINTER, "Invalid JSON pointer syntax." },
+    { INVALID_URI_FRAGMENT, "Invalid URI fragment syntax." },
+    { UNEXPECTED_ERROR, "Unexpected error, consider reporting this problem as you may have found a bug in simdjson" },
+    { PARSER_IN_USE, "Cannot parse a new document while a document is still in use." },
+    { OUT_OF_ORDER_ITERATION, "Objects and arrays can only be iterated when they are first encountered." },
+    { INSUFFICIENT_PADDING, "simdjson requires the input JSON string to have at least SIMDJSON_PADDING extra bytes allocated, beyond the string's length. Consider using the simdjson::padded_string class if needed." },
+    { INCOMPLETE_ARRAY_OR_OBJECT, "JSON document ended early in the middle of an object or array." },
+    { SCALAR_DOCUMENT_AS_VALUE, "A JSON document made of a scalar (number, Boolean, null or string) is treated as a value. Use get_bool(), get_double(), etc. on the document instead. "},
+    { OUT_OF_BOUNDS, "Attempted to access location outside of document."},
+    { TRAILING_CONTENT, "Unexpected trailing content in the JSON input."}
+  }; // error_messages[]
+
+} // namespace internal
+} // namespace simdjson
+/* end file src/internal/error_tables.cpp */
+/* begin file src/internal/jsoncharutils_tables.cpp */
+
+namespace simdjson {
+namespace internal {
+
+// structural chars here are
+// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c (and NULL)
+// we are also interested in the four whitespace characters
+// space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d
+
+SIMDJSON_DLLIMPORTEXPORT const bool structural_or_whitespace_negated[256] = {
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
+
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1,
+
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+
+SIMDJSON_DLLIMPORTEXPORT const bool structural_or_whitespace[256] = {
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+SIMDJSON_DLLIMPORTEXPORT const uint32_t digit_to_val32[886] = {
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0x0,        0x1,        0x2,        0x3,        0x4,        0x5,
+    0x6,        0x7,        0x8,        0x9,        0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xa,
+    0xb,        0xc,        0xd,        0xe,        0xf,        0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xa,        0xb,        0xc,        0xd,        0xe,
+    0xf,        0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0x0,        0x10,       0x20,       0x30,       0x40,       0x50,
+    0x60,       0x70,       0x80,       0x90,       0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xa0,
+    0xb0,       0xc0,       0xd0,       0xe0,       0xf0,       0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xa0,       0xb0,       0xc0,       0xd0,       0xe0,
+    0xf0,       0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0x0,        0x100,      0x200,      0x300,      0x400,      0x500,
+    0x600,      0x700,      0x800,      0x900,      0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xa00,
+    0xb00,      0xc00,      0xd00,      0xe00,      0xf00,      0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xa00,      0xb00,      0xc00,      0xd00,      0xe00,
+    0xf00,      0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0x0,        0x1000,     0x2000,     0x3000,     0x4000,     0x5000,
+    0x6000,     0x7000,     0x8000,     0x9000,     0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xa000,
+    0xb000,     0xc000,     0xd000,     0xe000,     0xf000,     0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xa000,     0xb000,     0xc000,     0xd000,     0xe000,
+    0xf000,     0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF};
+
+} // namespace internal
+} // namespace simdjson
+/* end file src/internal/jsoncharutils_tables.cpp */
+/* begin file src/internal/numberparsing_tables.cpp */
+
+namespace simdjson {
+namespace internal {
+
+// Precomputed powers of ten from 10^0 to 10^22. These
+// can be represented exactly using the double type.
+SIMDJSON_DLLIMPORTEXPORT const double power_of_ten[] = {
+    1e0,  1e1,  1e2,  1e3,  1e4,  1e5,  1e6,  1e7,  1e8,  1e9,  1e10, 1e11,
+    1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22};
+
+/**
+ * When mapping numbers from decimal to binary,
+ * we go from w * 10^q to m * 2^p but we have
+ * 10^q = 5^q * 2^q, so effectively
+ * we are trying to match
+ * w * 2^q * 5^q to m * 2^p. Thus the powers of two
+ * are not a concern since they can be represented
+ * exactly using the binary notation, only the powers of five
+ * affect the binary significand.
+ */
+
+
+// The truncated powers of five from 5^-342 all the way to 5^308
+// The mantissa is truncated to 128 bits, and
+// never rounded up. Uses about 10KB.
+SIMDJSON_DLLIMPORTEXPORT const uint64_t power_of_five_128[]= {
+        0xeef453d6923bd65a,0x113faa2906a13b3f,
+        0x9558b4661b6565f8,0x4ac7ca59a424c507,
+        0xbaaee17fa23ebf76,0x5d79bcf00d2df649,
+        0xe95a99df8ace6f53,0xf4d82c2c107973dc,
+        0x91d8a02bb6c10594,0x79071b9b8a4be869,
+        0xb64ec836a47146f9,0x9748e2826cdee284,
+        0xe3e27a444d8d98b7,0xfd1b1b2308169b25,
+        0x8e6d8c6ab0787f72,0xfe30f0f5e50e20f7,
+        0xb208ef855c969f4f,0xbdbd2d335e51a935,
+        0xde8b2b66b3bc4723,0xad2c788035e61382,
+        0x8b16fb203055ac76,0x4c3bcb5021afcc31,
+        0xaddcb9e83c6b1793,0xdf4abe242a1bbf3d,
+        0xd953e8624b85dd78,0xd71d6dad34a2af0d,
+        0x87d4713d6f33aa6b,0x8672648c40e5ad68,
+        0xa9c98d8ccb009506,0x680efdaf511f18c2,
+        0xd43bf0effdc0ba48,0x212bd1b2566def2,
+        0x84a57695fe98746d,0x14bb630f7604b57,
+        0xa5ced43b7e3e9188,0x419ea3bd35385e2d,
+        0xcf42894a5dce35ea,0x52064cac828675b9,
+        0x818995ce7aa0e1b2,0x7343efebd1940993,
+        0xa1ebfb4219491a1f,0x1014ebe6c5f90bf8,
+        0xca66fa129f9b60a6,0xd41a26e077774ef6,
+        0xfd00b897478238d0,0x8920b098955522b4,
+        0x9e20735e8cb16382,0x55b46e5f5d5535b0,
+        0xc5a890362fddbc62,0xeb2189f734aa831d,
+        0xf712b443bbd52b7b,0xa5e9ec7501d523e4,
+        0x9a6bb0aa55653b2d,0x47b233c92125366e,
+        0xc1069cd4eabe89f8,0x999ec0bb696e840a,
+        0xf148440a256e2c76,0xc00670ea43ca250d,
+        0x96cd2a865764dbca,0x380406926a5e5728,
+        0xbc807527ed3e12bc,0xc605083704f5ecf2,
+        0xeba09271e88d976b,0xf7864a44c633682e,
+        0x93445b8731587ea3,0x7ab3ee6afbe0211d,
+        0xb8157268fdae9e4c,0x5960ea05bad82964,
+        0xe61acf033d1a45df,0x6fb92487298e33bd,
+        0x8fd0c16206306bab,0xa5d3b6d479f8e056,
+        0xb3c4f1ba87bc8696,0x8f48a4899877186c,
+        0xe0b62e2929aba83c,0x331acdabfe94de87,
+        0x8c71dcd9ba0b4925,0x9ff0c08b7f1d0b14,
+        0xaf8e5410288e1b6f,0x7ecf0ae5ee44dd9,
+        0xdb71e91432b1a24a,0xc9e82cd9f69d6150,
+        0x892731ac9faf056e,0xbe311c083a225cd2,
+        0xab70fe17c79ac6ca,0x6dbd630a48aaf406,
+        0xd64d3d9db981787d,0x92cbbccdad5b108,
+        0x85f0468293f0eb4e,0x25bbf56008c58ea5,
+        0xa76c582338ed2621,0xaf2af2b80af6f24e,
+        0xd1476e2c07286faa,0x1af5af660db4aee1,
+        0x82cca4db847945ca,0x50d98d9fc890ed4d,
+        0xa37fce126597973c,0xe50ff107bab528a0,
+        0xcc5fc196fefd7d0c,0x1e53ed49a96272c8,
+        0xff77b1fcbebcdc4f,0x25e8e89c13bb0f7a,
+        0x9faacf3df73609b1,0x77b191618c54e9ac,
+        0xc795830d75038c1d,0xd59df5b9ef6a2417,
+        0xf97ae3d0d2446f25,0x4b0573286b44ad1d,
+        0x9becce62836ac577,0x4ee367f9430aec32,
+        0xc2e801fb244576d5,0x229c41f793cda73f,
+        0xf3a20279ed56d48a,0x6b43527578c1110f,
+        0x9845418c345644d6,0x830a13896b78aaa9,
+        0xbe5691ef416bd60c,0x23cc986bc656d553,
+        0xedec366b11c6cb8f,0x2cbfbe86b7ec8aa8,
+        0x94b3a202eb1c3f39,0x7bf7d71432f3d6a9,
+        0xb9e08a83a5e34f07,0xdaf5ccd93fb0cc53,
+        0xe858ad248f5c22c9,0xd1b3400f8f9cff68,
+        0x91376c36d99995be,0x23100809b9c21fa1,
+        0xb58547448ffffb2d,0xabd40a0c2832a78a,
+        0xe2e69915b3fff9f9,0x16c90c8f323f516c,
+        0x8dd01fad907ffc3b,0xae3da7d97f6792e3,
+        0xb1442798f49ffb4a,0x99cd11cfdf41779c,
+        0xdd95317f31c7fa1d,0x40405643d711d583,
+        0x8a7d3eef7f1cfc52,0x482835ea666b2572,
+        0xad1c8eab5ee43b66,0xda3243650005eecf,
+        0xd863b256369d4a40,0x90bed43e40076a82,
+        0x873e4f75e2224e68,0x5a7744a6e804a291,
+        0xa90de3535aaae202,0x711515d0a205cb36,
+        0xd3515c2831559a83,0xd5a5b44ca873e03,
+        0x8412d9991ed58091,0xe858790afe9486c2,
+        0xa5178fff668ae0b6,0x626e974dbe39a872,
+        0xce5d73ff402d98e3,0xfb0a3d212dc8128f,
+        0x80fa687f881c7f8e,0x7ce66634bc9d0b99,
+        0xa139029f6a239f72,0x1c1fffc1ebc44e80,
+        0xc987434744ac874e,0xa327ffb266b56220,
+        0xfbe9141915d7a922,0x4bf1ff9f0062baa8,
+        0x9d71ac8fada6c9b5,0x6f773fc3603db4a9,
+        0xc4ce17b399107c22,0xcb550fb4384d21d3,
+        0xf6019da07f549b2b,0x7e2a53a146606a48,
+        0x99c102844f94e0fb,0x2eda7444cbfc426d,
+        0xc0314325637a1939,0xfa911155fefb5308,
+        0xf03d93eebc589f88,0x793555ab7eba27ca,
+        0x96267c7535b763b5,0x4bc1558b2f3458de,
+        0xbbb01b9283253ca2,0x9eb1aaedfb016f16,
+        0xea9c227723ee8bcb,0x465e15a979c1cadc,
+        0x92a1958a7675175f,0xbfacd89ec191ec9,
+        0xb749faed14125d36,0xcef980ec671f667b,
+        0xe51c79a85916f484,0x82b7e12780e7401a,
+        0x8f31cc0937ae58d2,0xd1b2ecb8b0908810,
+        0xb2fe3f0b8599ef07,0x861fa7e6dcb4aa15,
+        0xdfbdcece67006ac9,0x67a791e093e1d49a,
+        0x8bd6a141006042bd,0xe0c8bb2c5c6d24e0,
+        0xaecc49914078536d,0x58fae9f773886e18,
+        0xda7f5bf590966848,0xaf39a475506a899e,
+        0x888f99797a5e012d,0x6d8406c952429603,
+        0xaab37fd7d8f58178,0xc8e5087ba6d33b83,
+        0xd5605fcdcf32e1d6,0xfb1e4a9a90880a64,
+        0x855c3be0a17fcd26,0x5cf2eea09a55067f,
+        0xa6b34ad8c9dfc06f,0xf42faa48c0ea481e,
+        0xd0601d8efc57b08b,0xf13b94daf124da26,
+        0x823c12795db6ce57,0x76c53d08d6b70858,
+        0xa2cb1717b52481ed,0x54768c4b0c64ca6e,
+        0xcb7ddcdda26da268,0xa9942f5dcf7dfd09,
+        0xfe5d54150b090b02,0xd3f93b35435d7c4c,
+        0x9efa548d26e5a6e1,0xc47bc5014a1a6daf,
+        0xc6b8e9b0709f109a,0x359ab6419ca1091b,
+        0xf867241c8cc6d4c0,0xc30163d203c94b62,
+        0x9b407691d7fc44f8,0x79e0de63425dcf1d,
+        0xc21094364dfb5636,0x985915fc12f542e4,
+        0xf294b943e17a2bc4,0x3e6f5b7b17b2939d,
+        0x979cf3ca6cec5b5a,0xa705992ceecf9c42,
+        0xbd8430bd08277231,0x50c6ff782a838353,
+        0xece53cec4a314ebd,0xa4f8bf5635246428,
+        0x940f4613ae5ed136,0x871b7795e136be99,
+        0xb913179899f68584,0x28e2557b59846e3f,
+        0xe757dd7ec07426e5,0x331aeada2fe589cf,
+        0x9096ea6f3848984f,0x3ff0d2c85def7621,
+        0xb4bca50b065abe63,0xfed077a756b53a9,
+        0xe1ebce4dc7f16dfb,0xd3e8495912c62894,
+        0x8d3360f09cf6e4bd,0x64712dd7abbbd95c,
+        0xb080392cc4349dec,0xbd8d794d96aacfb3,
+        0xdca04777f541c567,0xecf0d7a0fc5583a0,
+        0x89e42caaf9491b60,0xf41686c49db57244,
+        0xac5d37d5b79b6239,0x311c2875c522ced5,
+        0xd77485cb25823ac7,0x7d633293366b828b,
+        0x86a8d39ef77164bc,0xae5dff9c02033197,
+        0xa8530886b54dbdeb,0xd9f57f830283fdfc,
+        0xd267caa862a12d66,0xd072df63c324fd7b,
+        0x8380dea93da4bc60,0x4247cb9e59f71e6d,
+        0xa46116538d0deb78,0x52d9be85f074e608,
+        0xcd795be870516656,0x67902e276c921f8b,
+        0x806bd9714632dff6,0xba1cd8a3db53b6,
+        0xa086cfcd97bf97f3,0x80e8a40eccd228a4,
+        0xc8a883c0fdaf7df0,0x6122cd128006b2cd,
+        0xfad2a4b13d1b5d6c,0x796b805720085f81,
+        0x9cc3a6eec6311a63,0xcbe3303674053bb0,
+        0xc3f490aa77bd60fc,0xbedbfc4411068a9c,
+        0xf4f1b4d515acb93b,0xee92fb5515482d44,
+        0x991711052d8bf3c5,0x751bdd152d4d1c4a,
+        0xbf5cd54678eef0b6,0xd262d45a78a0635d,
+        0xef340a98172aace4,0x86fb897116c87c34,
+        0x9580869f0e7aac0e,0xd45d35e6ae3d4da0,
+        0xbae0a846d2195712,0x8974836059cca109,
+        0xe998d258869facd7,0x2bd1a438703fc94b,
+        0x91ff83775423cc06,0x7b6306a34627ddcf,
+        0xb67f6455292cbf08,0x1a3bc84c17b1d542,
+        0xe41f3d6a7377eeca,0x20caba5f1d9e4a93,
+        0x8e938662882af53e,0x547eb47b7282ee9c,
+        0xb23867fb2a35b28d,0xe99e619a4f23aa43,
+        0xdec681f9f4c31f31,0x6405fa00e2ec94d4,
+        0x8b3c113c38f9f37e,0xde83bc408dd3dd04,
+        0xae0b158b4738705e,0x9624ab50b148d445,
+        0xd98ddaee19068c76,0x3badd624dd9b0957,
+        0x87f8a8d4cfa417c9,0xe54ca5d70a80e5d6,
+        0xa9f6d30a038d1dbc,0x5e9fcf4ccd211f4c,
+        0xd47487cc8470652b,0x7647c3200069671f,
+        0x84c8d4dfd2c63f3b,0x29ecd9f40041e073,
+        0xa5fb0a17c777cf09,0xf468107100525890,
+        0xcf79cc9db955c2cc,0x7182148d4066eeb4,
+        0x81ac1fe293d599bf,0xc6f14cd848405530,
+        0xa21727db38cb002f,0xb8ada00e5a506a7c,
+        0xca9cf1d206fdc03b,0xa6d90811f0e4851c,
+        0xfd442e4688bd304a,0x908f4a166d1da663,
+        0x9e4a9cec15763e2e,0x9a598e4e043287fe,
+        0xc5dd44271ad3cdba,0x40eff1e1853f29fd,
+        0xf7549530e188c128,0xd12bee59e68ef47c,
+        0x9a94dd3e8cf578b9,0x82bb74f8301958ce,
+        0xc13a148e3032d6e7,0xe36a52363c1faf01,
+        0xf18899b1bc3f8ca1,0xdc44e6c3cb279ac1,
+        0x96f5600f15a7b7e5,0x29ab103a5ef8c0b9,
+        0xbcb2b812db11a5de,0x7415d448f6b6f0e7,
+        0xebdf661791d60f56,0x111b495b3464ad21,
+        0x936b9fcebb25c995,0xcab10dd900beec34,
+        0xb84687c269ef3bfb,0x3d5d514f40eea742,
+        0xe65829b3046b0afa,0xcb4a5a3112a5112,
+        0x8ff71a0fe2c2e6dc,0x47f0e785eaba72ab,
+        0xb3f4e093db73a093,0x59ed216765690f56,
+        0xe0f218b8d25088b8,0x306869c13ec3532c,
+        0x8c974f7383725573,0x1e414218c73a13fb,
+        0xafbd2350644eeacf,0xe5d1929ef90898fa,
+        0xdbac6c247d62a583,0xdf45f746b74abf39,
+        0x894bc396ce5da772,0x6b8bba8c328eb783,
+        0xab9eb47c81f5114f,0x66ea92f3f326564,
+        0xd686619ba27255a2,0xc80a537b0efefebd,
+        0x8613fd0145877585,0xbd06742ce95f5f36,
+        0xa798fc4196e952e7,0x2c48113823b73704,
+        0xd17f3b51fca3a7a0,0xf75a15862ca504c5,
+        0x82ef85133de648c4,0x9a984d73dbe722fb,
+        0xa3ab66580d5fdaf5,0xc13e60d0d2e0ebba,
+        0xcc963fee10b7d1b3,0x318df905079926a8,
+        0xffbbcfe994e5c61f,0xfdf17746497f7052,
+        0x9fd561f1fd0f9bd3,0xfeb6ea8bedefa633,
+        0xc7caba6e7c5382c8,0xfe64a52ee96b8fc0,
+        0xf9bd690a1b68637b,0x3dfdce7aa3c673b0,
+        0x9c1661a651213e2d,0x6bea10ca65c084e,
+        0xc31bfa0fe5698db8,0x486e494fcff30a62,
+        0xf3e2f893dec3f126,0x5a89dba3c3efccfa,
+        0x986ddb5c6b3a76b7,0xf89629465a75e01c,
+        0xbe89523386091465,0xf6bbb397f1135823,
+        0xee2ba6c0678b597f,0x746aa07ded582e2c,
+        0x94db483840b717ef,0xa8c2a44eb4571cdc,
+        0xba121a4650e4ddeb,0x92f34d62616ce413,
+        0xe896a0d7e51e1566,0x77b020baf9c81d17,
+        0x915e2486ef32cd60,0xace1474dc1d122e,
+        0xb5b5ada8aaff80b8,0xd819992132456ba,
+        0xe3231912d5bf60e6,0x10e1fff697ed6c69,
+        0x8df5efabc5979c8f,0xca8d3ffa1ef463c1,
+        0xb1736b96b6fd83b3,0xbd308ff8a6b17cb2,
+        0xddd0467c64bce4a0,0xac7cb3f6d05ddbde,
+        0x8aa22c0dbef60ee4,0x6bcdf07a423aa96b,
+        0xad4ab7112eb3929d,0x86c16c98d2c953c6,
+        0xd89d64d57a607744,0xe871c7bf077ba8b7,
+        0x87625f056c7c4a8b,0x11471cd764ad4972,
+        0xa93af6c6c79b5d2d,0xd598e40d3dd89bcf,
+        0xd389b47879823479,0x4aff1d108d4ec2c3,
+        0x843610cb4bf160cb,0xcedf722a585139ba,
+        0xa54394fe1eedb8fe,0xc2974eb4ee658828,
+        0xce947a3da6a9273e,0x733d226229feea32,
+        0x811ccc668829b887,0x806357d5a3f525f,
+        0xa163ff802a3426a8,0xca07c2dcb0cf26f7,
+        0xc9bcff6034c13052,0xfc89b393dd02f0b5,
+        0xfc2c3f3841f17c67,0xbbac2078d443ace2,
+        0x9d9ba7832936edc0,0xd54b944b84aa4c0d,
+        0xc5029163f384a931,0xa9e795e65d4df11,
+        0xf64335bcf065d37d,0x4d4617b5ff4a16d5,
+        0x99ea0196163fa42e,0x504bced1bf8e4e45,
+        0xc06481fb9bcf8d39,0xe45ec2862f71e1d6,
+        0xf07da27a82c37088,0x5d767327bb4e5a4c,
+        0x964e858c91ba2655,0x3a6a07f8d510f86f,
+        0xbbe226efb628afea,0x890489f70a55368b,
+        0xeadab0aba3b2dbe5,0x2b45ac74ccea842e,
+        0x92c8ae6b464fc96f,0x3b0b8bc90012929d,
+        0xb77ada0617e3bbcb,0x9ce6ebb40173744,
+        0xe55990879ddcaabd,0xcc420a6a101d0515,
+        0x8f57fa54c2a9eab6,0x9fa946824a12232d,
+        0xb32df8e9f3546564,0x47939822dc96abf9,
+        0xdff9772470297ebd,0x59787e2b93bc56f7,
+        0x8bfbea76c619ef36,0x57eb4edb3c55b65a,
+        0xaefae51477a06b03,0xede622920b6b23f1,
+        0xdab99e59958885c4,0xe95fab368e45eced,
+        0x88b402f7fd75539b,0x11dbcb0218ebb414,
+        0xaae103b5fcd2a881,0xd652bdc29f26a119,
+        0xd59944a37c0752a2,0x4be76d3346f0495f,
+        0x857fcae62d8493a5,0x6f70a4400c562ddb,
+        0xa6dfbd9fb8e5b88e,0xcb4ccd500f6bb952,
+        0xd097ad07a71f26b2,0x7e2000a41346a7a7,
+        0x825ecc24c873782f,0x8ed400668c0c28c8,
+        0xa2f67f2dfa90563b,0x728900802f0f32fa,
+        0xcbb41ef979346bca,0x4f2b40a03ad2ffb9,
+        0xfea126b7d78186bc,0xe2f610c84987bfa8,
+        0x9f24b832e6b0f436,0xdd9ca7d2df4d7c9,
+        0xc6ede63fa05d3143,0x91503d1c79720dbb,
+        0xf8a95fcf88747d94,0x75a44c6397ce912a,
+        0x9b69dbe1b548ce7c,0xc986afbe3ee11aba,
+        0xc24452da229b021b,0xfbe85badce996168,
+        0xf2d56790ab41c2a2,0xfae27299423fb9c3,
+        0x97c560ba6b0919a5,0xdccd879fc967d41a,
+        0xbdb6b8e905cb600f,0x5400e987bbc1c920,
+        0xed246723473e3813,0x290123e9aab23b68,
+        0x9436c0760c86e30b,0xf9a0b6720aaf6521,
+        0xb94470938fa89bce,0xf808e40e8d5b3e69,
+        0xe7958cb87392c2c2,0xb60b1d1230b20e04,
+        0x90bd77f3483bb9b9,0xb1c6f22b5e6f48c2,
+        0xb4ecd5f01a4aa828,0x1e38aeb6360b1af3,
+        0xe2280b6c20dd5232,0x25c6da63c38de1b0,
+        0x8d590723948a535f,0x579c487e5a38ad0e,
+        0xb0af48ec79ace837,0x2d835a9df0c6d851,
+        0xdcdb1b2798182244,0xf8e431456cf88e65,
+        0x8a08f0f8bf0f156b,0x1b8e9ecb641b58ff,
+        0xac8b2d36eed2dac5,0xe272467e3d222f3f,
+        0xd7adf884aa879177,0x5b0ed81dcc6abb0f,
+        0x86ccbb52ea94baea,0x98e947129fc2b4e9,
+        0xa87fea27a539e9a5,0x3f2398d747b36224,
+        0xd29fe4b18e88640e,0x8eec7f0d19a03aad,
+        0x83a3eeeef9153e89,0x1953cf68300424ac,
+        0xa48ceaaab75a8e2b,0x5fa8c3423c052dd7,
+        0xcdb02555653131b6,0x3792f412cb06794d,
+        0x808e17555f3ebf11,0xe2bbd88bbee40bd0,
+        0xa0b19d2ab70e6ed6,0x5b6aceaeae9d0ec4,
+        0xc8de047564d20a8b,0xf245825a5a445275,
+        0xfb158592be068d2e,0xeed6e2f0f0d56712,
+        0x9ced737bb6c4183d,0x55464dd69685606b,
+        0xc428d05aa4751e4c,0xaa97e14c3c26b886,
+        0xf53304714d9265df,0xd53dd99f4b3066a8,
+        0x993fe2c6d07b7fab,0xe546a8038efe4029,
+        0xbf8fdb78849a5f96,0xde98520472bdd033,
+        0xef73d256a5c0f77c,0x963e66858f6d4440,
+        0x95a8637627989aad,0xdde7001379a44aa8,
+        0xbb127c53b17ec159,0x5560c018580d5d52,
+        0xe9d71b689dde71af,0xaab8f01e6e10b4a6,
+        0x9226712162ab070d,0xcab3961304ca70e8,
+        0xb6b00d69bb55c8d1,0x3d607b97c5fd0d22,
+        0xe45c10c42a2b3b05,0x8cb89a7db77c506a,
+        0x8eb98a7a9a5b04e3,0x77f3608e92adb242,
+        0xb267ed1940f1c61c,0x55f038b237591ed3,
+        0xdf01e85f912e37a3,0x6b6c46dec52f6688,
+        0x8b61313bbabce2c6,0x2323ac4b3b3da015,
+        0xae397d8aa96c1b77,0xabec975e0a0d081a,
+        0xd9c7dced53c72255,0x96e7bd358c904a21,
+        0x881cea14545c7575,0x7e50d64177da2e54,
+        0xaa242499697392d2,0xdde50bd1d5d0b9e9,
+        0xd4ad2dbfc3d07787,0x955e4ec64b44e864,
+        0x84ec3c97da624ab4,0xbd5af13bef0b113e,
+        0xa6274bbdd0fadd61,0xecb1ad8aeacdd58e,
+        0xcfb11ead453994ba,0x67de18eda5814af2,
+        0x81ceb32c4b43fcf4,0x80eacf948770ced7,
+        0xa2425ff75e14fc31,0xa1258379a94d028d,
+        0xcad2f7f5359a3b3e,0x96ee45813a04330,
+        0xfd87b5f28300ca0d,0x8bca9d6e188853fc,
+        0x9e74d1b791e07e48,0x775ea264cf55347e,
+        0xc612062576589dda,0x95364afe032a81a0,
+        0xf79687aed3eec551,0x3a83ddbd83f52210,
+        0x9abe14cd44753b52,0xc4926a9672793580,
+        0xc16d9a0095928a27,0x75b7053c0f178400,
+        0xf1c90080baf72cb1,0x5324c68b12dd6800,
+        0x971da05074da7bee,0xd3f6fc16ebca8000,
+        0xbce5086492111aea,0x88f4bb1ca6bd0000,
+        0xec1e4a7db69561a5,0x2b31e9e3d0700000,
+        0x9392ee8e921d5d07,0x3aff322e62600000,
+        0xb877aa3236a4b449,0x9befeb9fad487c3,
+        0xe69594bec44de15b,0x4c2ebe687989a9b4,
+        0x901d7cf73ab0acd9,0xf9d37014bf60a11,
+        0xb424dc35095cd80f,0x538484c19ef38c95,
+        0xe12e13424bb40e13,0x2865a5f206b06fba,
+        0x8cbccc096f5088cb,0xf93f87b7442e45d4,
+        0xafebff0bcb24aafe,0xf78f69a51539d749,
+        0xdbe6fecebdedd5be,0xb573440e5a884d1c,
+        0x89705f4136b4a597,0x31680a88f8953031,
+        0xabcc77118461cefc,0xfdc20d2b36ba7c3e,
+        0xd6bf94d5e57a42bc,0x3d32907604691b4d,
+        0x8637bd05af6c69b5,0xa63f9a49c2c1b110,
+        0xa7c5ac471b478423,0xfcf80dc33721d54,
+        0xd1b71758e219652b,0xd3c36113404ea4a9,
+        0x83126e978d4fdf3b,0x645a1cac083126ea,
+        0xa3d70a3d70a3d70a,0x3d70a3d70a3d70a4,
+        0xcccccccccccccccc,0xcccccccccccccccd,
+        0x8000000000000000,0x0,
+        0xa000000000000000,0x0,
+        0xc800000000000000,0x0,
+        0xfa00000000000000,0x0,
+        0x9c40000000000000,0x0,
+        0xc350000000000000,0x0,
+        0xf424000000000000,0x0,
+        0x9896800000000000,0x0,
+        0xbebc200000000000,0x0,
+        0xee6b280000000000,0x0,
+        0x9502f90000000000,0x0,
+        0xba43b74000000000,0x0,
+        0xe8d4a51000000000,0x0,
+        0x9184e72a00000000,0x0,
+        0xb5e620f480000000,0x0,
+        0xe35fa931a0000000,0x0,
+        0x8e1bc9bf04000000,0x0,
+        0xb1a2bc2ec5000000,0x0,
+        0xde0b6b3a76400000,0x0,
+        0x8ac7230489e80000,0x0,
+        0xad78ebc5ac620000,0x0,
+        0xd8d726b7177a8000,0x0,
+        0x878678326eac9000,0x0,
+        0xa968163f0a57b400,0x0,
+        0xd3c21bcecceda100,0x0,
+        0x84595161401484a0,0x0,
+        0xa56fa5b99019a5c8,0x0,
+        0xcecb8f27f4200f3a,0x0,
+        0x813f3978f8940984,0x4000000000000000,
+        0xa18f07d736b90be5,0x5000000000000000,
+        0xc9f2c9cd04674ede,0xa400000000000000,
+        0xfc6f7c4045812296,0x4d00000000000000,
+        0x9dc5ada82b70b59d,0xf020000000000000,
+        0xc5371912364ce305,0x6c28000000000000,
+        0xf684df56c3e01bc6,0xc732000000000000,
+        0x9a130b963a6c115c,0x3c7f400000000000,
+        0xc097ce7bc90715b3,0x4b9f100000000000,
+        0xf0bdc21abb48db20,0x1e86d40000000000,
+        0x96769950b50d88f4,0x1314448000000000,
+        0xbc143fa4e250eb31,0x17d955a000000000,
+        0xeb194f8e1ae525fd,0x5dcfab0800000000,
+        0x92efd1b8d0cf37be,0x5aa1cae500000000,
+        0xb7abc627050305ad,0xf14a3d9e40000000,
+        0xe596b7b0c643c719,0x6d9ccd05d0000000,
+        0x8f7e32ce7bea5c6f,0xe4820023a2000000,
+        0xb35dbf821ae4f38b,0xdda2802c8a800000,
+        0xe0352f62a19e306e,0xd50b2037ad200000,
+        0x8c213d9da502de45,0x4526f422cc340000,
+        0xaf298d050e4395d6,0x9670b12b7f410000,
+        0xdaf3f04651d47b4c,0x3c0cdd765f114000,
+        0x88d8762bf324cd0f,0xa5880a69fb6ac800,
+        0xab0e93b6efee0053,0x8eea0d047a457a00,
+        0xd5d238a4abe98068,0x72a4904598d6d880,
+        0x85a36366eb71f041,0x47a6da2b7f864750,
+        0xa70c3c40a64e6c51,0x999090b65f67d924,
+        0xd0cf4b50cfe20765,0xfff4b4e3f741cf6d,
+        0x82818f1281ed449f,0xbff8f10e7a8921a4,
+        0xa321f2d7226895c7,0xaff72d52192b6a0d,
+        0xcbea6f8ceb02bb39,0x9bf4f8a69f764490,
+        0xfee50b7025c36a08,0x2f236d04753d5b4,
+        0x9f4f2726179a2245,0x1d762422c946590,
+        0xc722f0ef9d80aad6,0x424d3ad2b7b97ef5,
+        0xf8ebad2b84e0d58b,0xd2e0898765a7deb2,
+        0x9b934c3b330c8577,0x63cc55f49f88eb2f,
+        0xc2781f49ffcfa6d5,0x3cbf6b71c76b25fb,
+        0xf316271c7fc3908a,0x8bef464e3945ef7a,
+        0x97edd871cfda3a56,0x97758bf0e3cbb5ac,
+        0xbde94e8e43d0c8ec,0x3d52eeed1cbea317,
+        0xed63a231d4c4fb27,0x4ca7aaa863ee4bdd,
+        0x945e455f24fb1cf8,0x8fe8caa93e74ef6a,
+        0xb975d6b6ee39e436,0xb3e2fd538e122b44,
+        0xe7d34c64a9c85d44,0x60dbbca87196b616,
+        0x90e40fbeea1d3a4a,0xbc8955e946fe31cd,
+        0xb51d13aea4a488dd,0x6babab6398bdbe41,
+        0xe264589a4dcdab14,0xc696963c7eed2dd1,
+        0x8d7eb76070a08aec,0xfc1e1de5cf543ca2,
+        0xb0de65388cc8ada8,0x3b25a55f43294bcb,
+        0xdd15fe86affad912,0x49ef0eb713f39ebe,
+        0x8a2dbf142dfcc7ab,0x6e3569326c784337,
+        0xacb92ed9397bf996,0x49c2c37f07965404,
+        0xd7e77a8f87daf7fb,0xdc33745ec97be906,
+        0x86f0ac99b4e8dafd,0x69a028bb3ded71a3,
+        0xa8acd7c0222311bc,0xc40832ea0d68ce0c,
+        0xd2d80db02aabd62b,0xf50a3fa490c30190,
+        0x83c7088e1aab65db,0x792667c6da79e0fa,
+        0xa4b8cab1a1563f52,0x577001b891185938,
+        0xcde6fd5e09abcf26,0xed4c0226b55e6f86,
+        0x80b05e5ac60b6178,0x544f8158315b05b4,
+        0xa0dc75f1778e39d6,0x696361ae3db1c721,
+        0xc913936dd571c84c,0x3bc3a19cd1e38e9,
+        0xfb5878494ace3a5f,0x4ab48a04065c723,
+        0x9d174b2dcec0e47b,0x62eb0d64283f9c76,
+        0xc45d1df942711d9a,0x3ba5d0bd324f8394,
+        0xf5746577930d6500,0xca8f44ec7ee36479,
+        0x9968bf6abbe85f20,0x7e998b13cf4e1ecb,
+        0xbfc2ef456ae276e8,0x9e3fedd8c321a67e,
+        0xefb3ab16c59b14a2,0xc5cfe94ef3ea101e,
+        0x95d04aee3b80ece5,0xbba1f1d158724a12,
+        0xbb445da9ca61281f,0x2a8a6e45ae8edc97,
+        0xea1575143cf97226,0xf52d09d71a3293bd,
+        0x924d692ca61be758,0x593c2626705f9c56,
+        0xb6e0c377cfa2e12e,0x6f8b2fb00c77836c,
+        0xe498f455c38b997a,0xb6dfb9c0f956447,
+        0x8edf98b59a373fec,0x4724bd4189bd5eac,
+        0xb2977ee300c50fe7,0x58edec91ec2cb657,
+        0xdf3d5e9bc0f653e1,0x2f2967b66737e3ed,
+        0x8b865b215899f46c,0xbd79e0d20082ee74,
+        0xae67f1e9aec07187,0xecd8590680a3aa11,
+        0xda01ee641a708de9,0xe80e6f4820cc9495,
+        0x884134fe908658b2,0x3109058d147fdcdd,
+        0xaa51823e34a7eede,0xbd4b46f0599fd415,
+        0xd4e5e2cdc1d1ea96,0x6c9e18ac7007c91a,
+        0x850fadc09923329e,0x3e2cf6bc604ddb0,
+        0xa6539930bf6bff45,0x84db8346b786151c,
+        0xcfe87f7cef46ff16,0xe612641865679a63,
+        0x81f14fae158c5f6e,0x4fcb7e8f3f60c07e,
+        0xa26da3999aef7749,0xe3be5e330f38f09d,
+        0xcb090c8001ab551c,0x5cadf5bfd3072cc5,
+        0xfdcb4fa002162a63,0x73d9732fc7c8f7f6,
+        0x9e9f11c4014dda7e,0x2867e7fddcdd9afa,
+        0xc646d63501a1511d,0xb281e1fd541501b8,
+        0xf7d88bc24209a565,0x1f225a7ca91a4226,
+        0x9ae757596946075f,0x3375788de9b06958,
+        0xc1a12d2fc3978937,0x52d6b1641c83ae,
+        0xf209787bb47d6b84,0xc0678c5dbd23a49a,
+        0x9745eb4d50ce6332,0xf840b7ba963646e0,
+        0xbd176620a501fbff,0xb650e5a93bc3d898,
+        0xec5d3fa8ce427aff,0xa3e51f138ab4cebe,
+        0x93ba47c980e98cdf,0xc66f336c36b10137,
+        0xb8a8d9bbe123f017,0xb80b0047445d4184,
+        0xe6d3102ad96cec1d,0xa60dc059157491e5,
+        0x9043ea1ac7e41392,0x87c89837ad68db2f,
+        0xb454e4a179dd1877,0x29babe4598c311fb,
+        0xe16a1dc9d8545e94,0xf4296dd6fef3d67a,
+        0x8ce2529e2734bb1d,0x1899e4a65f58660c,
+        0xb01ae745b101e9e4,0x5ec05dcff72e7f8f,
+        0xdc21a1171d42645d,0x76707543f4fa1f73,
+        0x899504ae72497eba,0x6a06494a791c53a8,
+        0xabfa45da0edbde69,0x487db9d17636892,
+        0xd6f8d7509292d603,0x45a9d2845d3c42b6,
+        0x865b86925b9bc5c2,0xb8a2392ba45a9b2,
+        0xa7f26836f282b732,0x8e6cac7768d7141e,
+        0xd1ef0244af2364ff,0x3207d795430cd926,
+        0x8335616aed761f1f,0x7f44e6bd49e807b8,
+        0xa402b9c5a8d3a6e7,0x5f16206c9c6209a6,
+        0xcd036837130890a1,0x36dba887c37a8c0f,
+        0x802221226be55a64,0xc2494954da2c9789,
+        0xa02aa96b06deb0fd,0xf2db9baa10b7bd6c,
+        0xc83553c5c8965d3d,0x6f92829494e5acc7,
+        0xfa42a8b73abbf48c,0xcb772339ba1f17f9,
+        0x9c69a97284b578d7,0xff2a760414536efb,
+        0xc38413cf25e2d70d,0xfef5138519684aba,
+        0xf46518c2ef5b8cd1,0x7eb258665fc25d69,
+        0x98bf2f79d5993802,0xef2f773ffbd97a61,
+        0xbeeefb584aff8603,0xaafb550ffacfd8fa,
+        0xeeaaba2e5dbf6784,0x95ba2a53f983cf38,
+        0x952ab45cfa97a0b2,0xdd945a747bf26183,
+        0xba756174393d88df,0x94f971119aeef9e4,
+        0xe912b9d1478ceb17,0x7a37cd5601aab85d,
+        0x91abb422ccb812ee,0xac62e055c10ab33a,
+        0xb616a12b7fe617aa,0x577b986b314d6009,
+        0xe39c49765fdf9d94,0xed5a7e85fda0b80b,
+        0x8e41ade9fbebc27d,0x14588f13be847307,
+        0xb1d219647ae6b31c,0x596eb2d8ae258fc8,
+        0xde469fbd99a05fe3,0x6fca5f8ed9aef3bb,
+        0x8aec23d680043bee,0x25de7bb9480d5854,
+        0xada72ccc20054ae9,0xaf561aa79a10ae6a,
+        0xd910f7ff28069da4,0x1b2ba1518094da04,
+        0x87aa9aff79042286,0x90fb44d2f05d0842,
+        0xa99541bf57452b28,0x353a1607ac744a53,
+        0xd3fa922f2d1675f2,0x42889b8997915ce8,
+        0x847c9b5d7c2e09b7,0x69956135febada11,
+        0xa59bc234db398c25,0x43fab9837e699095,
+        0xcf02b2c21207ef2e,0x94f967e45e03f4bb,
+        0x8161afb94b44f57d,0x1d1be0eebac278f5,
+        0xa1ba1ba79e1632dc,0x6462d92a69731732,
+        0xca28a291859bbf93,0x7d7b8f7503cfdcfe,
+        0xfcb2cb35e702af78,0x5cda735244c3d43e,
+        0x9defbf01b061adab,0x3a0888136afa64a7,
+        0xc56baec21c7a1916,0x88aaa1845b8fdd0,
+        0xf6c69a72a3989f5b,0x8aad549e57273d45,
+        0x9a3c2087a63f6399,0x36ac54e2f678864b,
+        0xc0cb28a98fcf3c7f,0x84576a1bb416a7dd,
+        0xf0fdf2d3f3c30b9f,0x656d44a2a11c51d5,
+        0x969eb7c47859e743,0x9f644ae5a4b1b325,
+        0xbc4665b596706114,0x873d5d9f0dde1fee,
+        0xeb57ff22fc0c7959,0xa90cb506d155a7ea,
+        0x9316ff75dd87cbd8,0x9a7f12442d588f2,
+        0xb7dcbf5354e9bece,0xc11ed6d538aeb2f,
+        0xe5d3ef282a242e81,0x8f1668c8a86da5fa,
+        0x8fa475791a569d10,0xf96e017d694487bc,
+        0xb38d92d760ec4455,0x37c981dcc395a9ac,
+        0xe070f78d3927556a,0x85bbe253f47b1417,
+        0x8c469ab843b89562,0x93956d7478ccec8e,
+        0xaf58416654a6babb,0x387ac8d1970027b2,
+        0xdb2e51bfe9d0696a,0x6997b05fcc0319e,
+        0x88fcf317f22241e2,0x441fece3bdf81f03,
+        0xab3c2fddeeaad25a,0xd527e81cad7626c3,
+        0xd60b3bd56a5586f1,0x8a71e223d8d3b074,
+        0x85c7056562757456,0xf6872d5667844e49,
+        0xa738c6bebb12d16c,0xb428f8ac016561db,
+        0xd106f86e69d785c7,0xe13336d701beba52,
+        0x82a45b450226b39c,0xecc0024661173473,
+        0xa34d721642b06084,0x27f002d7f95d0190,
+        0xcc20ce9bd35c78a5,0x31ec038df7b441f4,
+        0xff290242c83396ce,0x7e67047175a15271,
+        0x9f79a169bd203e41,0xf0062c6e984d386,
+        0xc75809c42c684dd1,0x52c07b78a3e60868,
+        0xf92e0c3537826145,0xa7709a56ccdf8a82,
+        0x9bbcc7a142b17ccb,0x88a66076400bb691,
+        0xc2abf989935ddbfe,0x6acff893d00ea435,
+        0xf356f7ebf83552fe,0x583f6b8c4124d43,
+        0x98165af37b2153de,0xc3727a337a8b704a,
+        0xbe1bf1b059e9a8d6,0x744f18c0592e4c5c,
+        0xeda2ee1c7064130c,0x1162def06f79df73,
+        0x9485d4d1c63e8be7,0x8addcb5645ac2ba8,
+        0xb9a74a0637ce2ee1,0x6d953e2bd7173692,
+        0xe8111c87c5c1ba99,0xc8fa8db6ccdd0437,
+        0x910ab1d4db9914a0,0x1d9c9892400a22a2,
+        0xb54d5e4a127f59c8,0x2503beb6d00cab4b,
+        0xe2a0b5dc971f303a,0x2e44ae64840fd61d,
+        0x8da471a9de737e24,0x5ceaecfed289e5d2,
+        0xb10d8e1456105dad,0x7425a83e872c5f47,
+        0xdd50f1996b947518,0xd12f124e28f77719,
+        0x8a5296ffe33cc92f,0x82bd6b70d99aaa6f,
+        0xace73cbfdc0bfb7b,0x636cc64d1001550b,
+        0xd8210befd30efa5a,0x3c47f7e05401aa4e,
+        0x8714a775e3e95c78,0x65acfaec34810a71,
+        0xa8d9d1535ce3b396,0x7f1839a741a14d0d,
+        0xd31045a8341ca07c,0x1ede48111209a050,
+        0x83ea2b892091e44d,0x934aed0aab460432,
+        0xa4e4b66b68b65d60,0xf81da84d5617853f,
+        0xce1de40642e3f4b9,0x36251260ab9d668e,
+        0x80d2ae83e9ce78f3,0xc1d72b7c6b426019,
+        0xa1075a24e4421730,0xb24cf65b8612f81f,
+        0xc94930ae1d529cfc,0xdee033f26797b627,
+        0xfb9b7cd9a4a7443c,0x169840ef017da3b1,
+        0x9d412e0806e88aa5,0x8e1f289560ee864e,
+        0xc491798a08a2ad4e,0xf1a6f2bab92a27e2,
+        0xf5b5d7ec8acb58a2,0xae10af696774b1db,
+        0x9991a6f3d6bf1765,0xacca6da1e0a8ef29,
+        0xbff610b0cc6edd3f,0x17fd090a58d32af3,
+        0xeff394dcff8a948e,0xddfc4b4cef07f5b0,
+        0x95f83d0a1fb69cd9,0x4abdaf101564f98e,
+        0xbb764c4ca7a4440f,0x9d6d1ad41abe37f1,
+        0xea53df5fd18d5513,0x84c86189216dc5ed,
+        0x92746b9be2f8552c,0x32fd3cf5b4e49bb4,
+        0xb7118682dbb66a77,0x3fbc8c33221dc2a1,
+        0xe4d5e82392a40515,0xfabaf3feaa5334a,
+        0x8f05b1163ba6832d,0x29cb4d87f2a7400e,
+        0xb2c71d5bca9023f8,0x743e20e9ef511012,
+        0xdf78e4b2bd342cf6,0x914da9246b255416,
+        0x8bab8eefb6409c1a,0x1ad089b6c2f7548e,
+        0xae9672aba3d0c320,0xa184ac2473b529b1,
+        0xda3c0f568cc4f3e8,0xc9e5d72d90a2741e,
+        0x8865899617fb1871,0x7e2fa67c7a658892,
+        0xaa7eebfb9df9de8d,0xddbb901b98feeab7,
+        0xd51ea6fa85785631,0x552a74227f3ea565,
+        0x8533285c936b35de,0xd53a88958f87275f,
+        0xa67ff273b8460356,0x8a892abaf368f137,
+        0xd01fef10a657842c,0x2d2b7569b0432d85,
+        0x8213f56a67f6b29b,0x9c3b29620e29fc73,
+        0xa298f2c501f45f42,0x8349f3ba91b47b8f,
+        0xcb3f2f7642717713,0x241c70a936219a73,
+        0xfe0efb53d30dd4d7,0xed238cd383aa0110,
+        0x9ec95d1463e8a506,0xf4363804324a40aa,
+        0xc67bb4597ce2ce48,0xb143c6053edcd0d5,
+        0xf81aa16fdc1b81da,0xdd94b7868e94050a,
+        0x9b10a4e5e9913128,0xca7cf2b4191c8326,
+        0xc1d4ce1f63f57d72,0xfd1c2f611f63a3f0,
+        0xf24a01a73cf2dccf,0xbc633b39673c8cec,
+        0x976e41088617ca01,0xd5be0503e085d813,
+        0xbd49d14aa79dbc82,0x4b2d8644d8a74e18,
+        0xec9c459d51852ba2,0xddf8e7d60ed1219e,
+        0x93e1ab8252f33b45,0xcabb90e5c942b503,
+        0xb8da1662e7b00a17,0x3d6a751f3b936243,
+        0xe7109bfba19c0c9d,0xcc512670a783ad4,
+        0x906a617d450187e2,0x27fb2b80668b24c5,
+        0xb484f9dc9641e9da,0xb1f9f660802dedf6,
+        0xe1a63853bbd26451,0x5e7873f8a0396973,
+        0x8d07e33455637eb2,0xdb0b487b6423e1e8,
+        0xb049dc016abc5e5f,0x91ce1a9a3d2cda62,
+        0xdc5c5301c56b75f7,0x7641a140cc7810fb,
+        0x89b9b3e11b6329ba,0xa9e904c87fcb0a9d,
+        0xac2820d9623bf429,0x546345fa9fbdcd44,
+        0xd732290fbacaf133,0xa97c177947ad4095,
+        0x867f59a9d4bed6c0,0x49ed8eabcccc485d,
+        0xa81f301449ee8c70,0x5c68f256bfff5a74,
+        0xd226fc195c6a2f8c,0x73832eec6fff3111,
+        0x83585d8fd9c25db7,0xc831fd53c5ff7eab,
+        0xa42e74f3d032f525,0xba3e7ca8b77f5e55,
+        0xcd3a1230c43fb26f,0x28ce1bd2e55f35eb,
+        0x80444b5e7aa7cf85,0x7980d163cf5b81b3,
+        0xa0555e361951c366,0xd7e105bcc332621f,
+        0xc86ab5c39fa63440,0x8dd9472bf3fefaa7,
+        0xfa856334878fc150,0xb14f98f6f0feb951,
+        0x9c935e00d4b9d8d2,0x6ed1bf9a569f33d3,
+        0xc3b8358109e84f07,0xa862f80ec4700c8,
+        0xf4a642e14c6262c8,0xcd27bb612758c0fa,
+        0x98e7e9cccfbd7dbd,0x8038d51cb897789c,
+        0xbf21e44003acdd2c,0xe0470a63e6bd56c3,
+        0xeeea5d5004981478,0x1858ccfce06cac74,
+        0x95527a5202df0ccb,0xf37801e0c43ebc8,
+        0xbaa718e68396cffd,0xd30560258f54e6ba,
+        0xe950df20247c83fd,0x47c6b82ef32a2069,
+        0x91d28b7416cdd27e,0x4cdc331d57fa5441,
+        0xb6472e511c81471d,0xe0133fe4adf8e952,
+        0xe3d8f9e563a198e5,0x58180fddd97723a6,
+        0x8e679c2f5e44ff8f,0x570f09eaa7ea7648,};
+
+} // namespace internal
+} // namespace simdjson
+/* end file src/internal/numberparsing_tables.cpp */
+/* begin file src/internal/simdprune_tables.cpp */
+#if SIMDJSON_IMPLEMENTATION_ARM64 || SIMDJSON_IMPLEMENTATION_ICELAKE || SIMDJSON_IMPLEMENTATION_HASWELL || SIMDJSON_IMPLEMENTATION_WESTMERE || SIMDJSON_IMPLEMENTATION_PPC64
+
+#include <cstdint>
+
+namespace simdjson { // table modified and copied from
+namespace internal { // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetTable
+SIMDJSON_DLLIMPORTEXPORT  const unsigned char BitsSetTable256mul2[256] = {
+    0,  2,  2,  4,  2,  4,  4,  6,  2,  4,  4,  6,  4,  6,  6,  8,  2,  4,  4,
+    6,  4,  6,  6,  8,  4,  6,  6,  8,  6,  8,  8,  10, 2,  4,  4,  6,  4,  6,
+    6,  8,  4,  6,  6,  8,  6,  8,  8,  10, 4,  6,  6,  8,  6,  8,  8,  10, 6,
+    8,  8,  10, 8,  10, 10, 12, 2,  4,  4,  6,  4,  6,  6,  8,  4,  6,  6,  8,
+    6,  8,  8,  10, 4,  6,  6,  8,  6,  8,  8,  10, 6,  8,  8,  10, 8,  10, 10,
+    12, 4,  6,  6,  8,  6,  8,  8,  10, 6,  8,  8,  10, 8,  10, 10, 12, 6,  8,
+    8,  10, 8,  10, 10, 12, 8,  10, 10, 12, 10, 12, 12, 14, 2,  4,  4,  6,  4,
+    6,  6,  8,  4,  6,  6,  8,  6,  8,  8,  10, 4,  6,  6,  8,  6,  8,  8,  10,
+    6,  8,  8,  10, 8,  10, 10, 12, 4,  6,  6,  8,  6,  8,  8,  10, 6,  8,  8,
+    10, 8,  10, 10, 12, 6,  8,  8,  10, 8,  10, 10, 12, 8,  10, 10, 12, 10, 12,
+    12, 14, 4,  6,  6,  8,  6,  8,  8,  10, 6,  8,  8,  10, 8,  10, 10, 12, 6,
+    8,  8,  10, 8,  10, 10, 12, 8,  10, 10, 12, 10, 12, 12, 14, 6,  8,  8,  10,
+    8,  10, 10, 12, 8,  10, 10, 12, 10, 12, 12, 14, 8,  10, 10, 12, 10, 12, 12,
+    14, 10, 12, 12, 14, 12, 14, 14, 16};
+
+SIMDJSON_DLLIMPORTEXPORT  const uint8_t pshufb_combine_table[272] = {
+    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b,
+    0x0c, 0x0d, 0x0e, 0x0f, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x08,
+    0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0x00, 0x01, 0x02, 0x03,
+    0x04, 0x05, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff,
+    0x00, 0x01, 0x02, 0x03, 0x04, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
+    0x0f, 0xff, 0xff, 0xff, 0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b,
+    0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x02, 0x08,
+    0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0x00, 0x01, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0x00, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
+    0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x08, 0x09, 0x0a, 0x0b,
+    0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+};
+
+// 256 * 8 bytes = 2kB, easily fits in cache.
+SIMDJSON_DLLIMPORTEXPORT  const uint64_t thintable_epi8[256] = {
+    0x0706050403020100, 0x0007060504030201, 0x0007060504030200,
+    0x0000070605040302, 0x0007060504030100, 0x0000070605040301,
+    0x0000070605040300, 0x0000000706050403, 0x0007060504020100,
+    0x0000070605040201, 0x0000070605040200, 0x0000000706050402,
+    0x0000070605040100, 0x0000000706050401, 0x0000000706050400,
+    0x0000000007060504, 0x0007060503020100, 0x0000070605030201,
+    0x0000070605030200, 0x0000000706050302, 0x0000070605030100,
+    0x0000000706050301, 0x0000000706050300, 0x0000000007060503,
+    0x0000070605020100, 0x0000000706050201, 0x0000000706050200,
+    0x0000000007060502, 0x0000000706050100, 0x0000000007060501,
+    0x0000000007060500, 0x0000000000070605, 0x0007060403020100,
+    0x0000070604030201, 0x0000070604030200, 0x0000000706040302,
+    0x0000070604030100, 0x0000000706040301, 0x0000000706040300,
+    0x0000000007060403, 0x0000070604020100, 0x0000000706040201,
+    0x0000000706040200, 0x0000000007060402, 0x0000000706040100,
+    0x0000000007060401, 0x0000000007060400, 0x0000000000070604,
+    0x0000070603020100, 0x0000000706030201, 0x0000000706030200,
+    0x0000000007060302, 0x0000000706030100, 0x0000000007060301,
+    0x0000000007060300, 0x0000000000070603, 0x0000000706020100,
+    0x0000000007060201, 0x0000000007060200, 0x0000000000070602,
+    0x0000000007060100, 0x0000000000070601, 0x0000000000070600,
+    0x0000000000000706, 0x0007050403020100, 0x0000070504030201,
+    0x0000070504030200, 0x0000000705040302, 0x0000070504030100,
+    0x0000000705040301, 0x0000000705040300, 0x0000000007050403,
+    0x0000070504020100, 0x0000000705040201, 0x0000000705040200,
+    0x0000000007050402, 0x0000000705040100, 0x0000000007050401,
+    0x0000000007050400, 0x0000000000070504, 0x0000070503020100,
+    0x0000000705030201, 0x0000000705030200, 0x0000000007050302,
+    0x0000000705030100, 0x0000000007050301, 0x0000000007050300,
+    0x0000000000070503, 0x0000000705020100, 0x0000000007050201,
+    0x0000000007050200, 0x0000000000070502, 0x0000000007050100,
+    0x0000000000070501, 0x0000000000070500, 0x0000000000000705,
+    0x0000070403020100, 0x0000000704030201, 0x0000000704030200,
+    0x0000000007040302, 0x0000000704030100, 0x0000000007040301,
+    0x0000000007040300, 0x0000000000070403, 0x0000000704020100,
+    0x0000000007040201, 0x0000000007040200, 0x0000000000070402,
+    0x0000000007040100, 0x0000000000070401, 0x0000000000070400,
+    0x0000000000000704, 0x0000000703020100, 0x0000000007030201,
+    0x0000000007030200, 0x0000000000070302, 0x0000000007030100,
+    0x0000000000070301, 0x0000000000070300, 0x0000000000000703,
+    0x0000000007020100, 0x0000000000070201, 0x0000000000070200,
+    0x0000000000000702, 0x0000000000070100, 0x0000000000000701,
+    0x0000000000000700, 0x0000000000000007, 0x0006050403020100,
+    0x0000060504030201, 0x0000060504030200, 0x0000000605040302,
+    0x0000060504030100, 0x0000000605040301, 0x0000000605040300,
+    0x0000000006050403, 0x0000060504020100, 0x0000000605040201,
+    0x0000000605040200, 0x0000000006050402, 0x0000000605040100,
+    0x0000000006050401, 0x0000000006050400, 0x0000000000060504,
+    0x0000060503020100, 0x0000000605030201, 0x0000000605030200,
+    0x0000000006050302, 0x0000000605030100, 0x0000000006050301,
+    0x0000000006050300, 0x0000000000060503, 0x0000000605020100,
+    0x0000000006050201, 0x0000000006050200, 0x0000000000060502,
+    0x0000000006050100, 0x0000000000060501, 0x0000000000060500,
+    0x0000000000000605, 0x0000060403020100, 0x0000000604030201,
+    0x0000000604030200, 0x0000000006040302, 0x0000000604030100,
+    0x0000000006040301, 0x0000000006040300, 0x0000000000060403,
+    0x0000000604020100, 0x0000000006040201, 0x0000000006040200,
+    0x0000000000060402, 0x0000000006040100, 0x0000000000060401,
+    0x0000000000060400, 0x0000000000000604, 0x0000000603020100,
+    0x0000000006030201, 0x0000000006030200, 0x0000000000060302,
+    0x0000000006030100, 0x0000000000060301, 0x0000000000060300,
+    0x0000000000000603, 0x0000000006020100, 0x0000000000060201,
+    0x0000000000060200, 0x0000000000000602, 0x0000000000060100,
+    0x0000000000000601, 0x0000000000000600, 0x0000000000000006,
+    0x0000050403020100, 0x0000000504030201, 0x0000000504030200,
+    0x0000000005040302, 0x0000000504030100, 0x0000000005040301,
+    0x0000000005040300, 0x0000000000050403, 0x0000000504020100,
+    0x0000000005040201, 0x0000000005040200, 0x0000000000050402,
+    0x0000000005040100, 0x0000000000050401, 0x0000000000050400,
+    0x0000000000000504, 0x0000000503020100, 0x0000000005030201,
+    0x0000000005030200, 0x0000000000050302, 0x0000000005030100,
+    0x0000000000050301, 0x0000000000050300, 0x0000000000000503,
+    0x0000000005020100, 0x0000000000050201, 0x0000000000050200,
+    0x0000000000000502, 0x0000000000050100, 0x0000000000000501,
+    0x0000000000000500, 0x0000000000000005, 0x0000000403020100,
+    0x0000000004030201, 0x0000000004030200, 0x0000000000040302,
+    0x0000000004030100, 0x0000000000040301, 0x0000000000040300,
+    0x0000000000000403, 0x0000000004020100, 0x0000000000040201,
+    0x0000000000040200, 0x0000000000000402, 0x0000000000040100,
+    0x0000000000000401, 0x0000000000000400, 0x0000000000000004,
+    0x0000000003020100, 0x0000000000030201, 0x0000000000030200,
+    0x0000000000000302, 0x0000000000030100, 0x0000000000000301,
+    0x0000000000000300, 0x0000000000000003, 0x0000000000020100,
+    0x0000000000000201, 0x0000000000000200, 0x0000000000000002,
+    0x0000000000000100, 0x0000000000000001, 0x0000000000000000,
+    0x0000000000000000,
+}; //static uint64_t thintable_epi8[256]
+
+} // namespace internal
+} // namespace simdjson
+
+#endif //  SIMDJSON_IMPLEMENTATION_ARM64 || SIMDJSON_IMPLEMENTATION_ICELAKE || SIMDJSON_IMPLEMENTATION_HASWELL || SIMDJSON_IMPLEMENTATION_WESTMERE || SIMDJSON_IMPLEMENTATION_PPC64
+/* end file src/internal/simdprune_tables.cpp */
+/* begin file src/implementation.cpp */
+#include <initializer_list>
+
+namespace simdjson {
+
+bool implementation::supported_by_runtime_system() const {
+  uint32_t required_instruction_sets = this->required_instruction_sets();
+  uint32_t supported_instruction_sets = internal::detect_supported_architectures();
+  return ((supported_instruction_sets & required_instruction_sets) == required_instruction_sets);
+}
+
+namespace internal {
+
+// Static array of known implementations. We're hoping these get baked into the executable
+// without requiring a static initializer.
+
+#if SIMDJSON_IMPLEMENTATION_ICELAKE
+static const icelake::implementation* get_icelake_singleton() {
+  static const icelake::implementation icelake_singleton{};
+  return &icelake_singleton;
+}
+#endif
+#if SIMDJSON_IMPLEMENTATION_HASWELL
+static const haswell::implementation* get_haswell_singleton() {
+  static const haswell::implementation haswell_singleton{};
+  return &haswell_singleton;
+}
+#endif
+#if SIMDJSON_IMPLEMENTATION_WESTMERE
+static const westmere::implementation* get_westmere_singleton() {
+  static const westmere::implementation westmere_singleton{};
+  return &westmere_singleton;
+}
+#endif // SIMDJSON_IMPLEMENTATION_WESTMERE
+#if SIMDJSON_IMPLEMENTATION_ARM64
+static const arm64::implementation* get_arm64_singleton() {
+  static const arm64::implementation arm64_singleton{};
+  return &arm64_singleton;
+}
+#endif // SIMDJSON_IMPLEMENTATION_ARM64
+#if SIMDJSON_IMPLEMENTATION_PPC64
+static const ppc64::implementation* get_ppc64_singleton() {
+  static const ppc64::implementation ppc64_singleton{};
+  return &ppc64_singleton;
+}
+#endif // SIMDJSON_IMPLEMENTATION_PPC64
+#if SIMDJSON_IMPLEMENTATION_FALLBACK
+static const fallback::implementation* get_fallback_singleton() {
+  static const fallback::implementation fallback_singleton{};
+  return &fallback_singleton;
+}
+#endif // SIMDJSON_IMPLEMENTATION_FALLBACK
+
+/**
+ * @private Detects best supported implementation on first use, and sets it
+ */
+class detect_best_supported_implementation_on_first_use final : public implementation {
+public:
+  const std::string &name() const noexcept final { return set_best()->name(); }
+  const std::string &description() const noexcept final { return set_best()->description(); }
+  uint32_t required_instruction_sets() const noexcept final { return set_best()->required_instruction_sets(); }
+  simdjson_warn_unused error_code create_dom_parser_implementation(
+    size_t capacity,
+    size_t max_length,
+    std::unique_ptr<internal::dom_parser_implementation>& dst
+  ) const noexcept final {
+    return set_best()->create_dom_parser_implementation(capacity, max_length, dst);
+  }
+  simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final {
+    return set_best()->minify(buf, len, dst, dst_len);
+  }
+  simdjson_warn_unused bool validate_utf8(const char * buf, size_t len) const noexcept final override {
+    return set_best()->validate_utf8(buf, len);
+  }
+  simdjson_inline detect_best_supported_implementation_on_first_use() noexcept : implementation("best_supported_detector", "Detects the best supported implementation and sets it", 0) {}
+private:
+  const implementation *set_best() const noexcept;
+};
+
+static const std::initializer_list<const implementation *>& get_available_implementation_pointers() {
+  static const std::initializer_list<const implementation *> available_implementation_pointers {
+#if SIMDJSON_IMPLEMENTATION_ICELAKE
+    get_icelake_singleton(),
+#endif
+#if SIMDJSON_IMPLEMENTATION_HASWELL
+    get_haswell_singleton(),
+#endif
+#if SIMDJSON_IMPLEMENTATION_WESTMERE
+    get_westmere_singleton(),
+#endif
+#if SIMDJSON_IMPLEMENTATION_ARM64
+    get_arm64_singleton(),
+#endif
+#if SIMDJSON_IMPLEMENTATION_PPC64
+    get_ppc64_singleton(),
+#endif
+#if SIMDJSON_IMPLEMENTATION_FALLBACK
+    get_fallback_singleton(),
+#endif
+  }; // available_implementation_pointers
+  return available_implementation_pointers;
+}
+
+// So we can return UNSUPPORTED_ARCHITECTURE from the parser when there is no support
+class unsupported_implementation final : public implementation {
+public:
+  simdjson_warn_unused error_code create_dom_parser_implementation(
+    size_t,
+    size_t,
+    std::unique_ptr<internal::dom_parser_implementation>&
+  ) const noexcept final {
+    return UNSUPPORTED_ARCHITECTURE;
+  }
+  simdjson_warn_unused error_code minify(const uint8_t *, size_t, uint8_t *, size_t &) const noexcept final override {
+    return UNSUPPORTED_ARCHITECTURE;
+  }
+  simdjson_warn_unused bool validate_utf8(const char *, size_t) const noexcept final override {
+    return false; // Just refuse to validate. Given that we have a fallback implementation
+    // it seems unlikely that unsupported_implementation will ever be used. If it is used,
+    // then it will flag all strings as invalid. The alternative is to return an error_code
+    // from which the user has to figure out whether the string is valid UTF-8... which seems
+    // like a lot of work just to handle the very unlikely case that we have an unsupported
+    // implementation. And, when it does happen (that we have an unsupported implementation),
+    // what are the chances that the programmer has a fallback? Given that *we* provide the
+    // fallback, it implies that the programmer would need a fallback for our fallback.
+  }
+  unsupported_implementation() : implementation("unsupported", "Unsupported CPU (no detected SIMD instructions)", 0) {}
+};
+
+const unsupported_implementation* get_unsupported_singleton() {
+    static const unsupported_implementation unsupported_singleton{};
+    return &unsupported_singleton;
+}
+
+size_t available_implementation_list::size() const noexcept {
+  return internal::get_available_implementation_pointers().size();
+}
+const implementation * const *available_implementation_list::begin() const noexcept {
+  return internal::get_available_implementation_pointers().begin();
+}
+const implementation * const *available_implementation_list::end() const noexcept {
+  return internal::get_available_implementation_pointers().end();
+}
+const implementation *available_implementation_list::detect_best_supported() const noexcept {
+  // They are prelisted in priority order, so we just go down the list
+  uint32_t supported_instruction_sets = internal::detect_supported_architectures();
+  for (const implementation *impl : internal::get_available_implementation_pointers()) {
+    uint32_t required_instruction_sets = impl->required_instruction_sets();
+    if ((supported_instruction_sets & required_instruction_sets) == required_instruction_sets) { return impl; }
+  }
+  return get_unsupported_singleton(); // this should never happen?
+}
+
+const implementation *detect_best_supported_implementation_on_first_use::set_best() const noexcept {
+  SIMDJSON_PUSH_DISABLE_WARNINGS
+  SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe
+  char *force_implementation_name = getenv("SIMDJSON_FORCE_IMPLEMENTATION");
+  SIMDJSON_POP_DISABLE_WARNINGS
+
+  if (force_implementation_name) {
+    auto force_implementation = get_available_implementations()[force_implementation_name];
+    if (force_implementation) {
+      return get_active_implementation() = force_implementation;
+    } else {
+      // Note: abort() and stderr usage within the library is forbidden.
+      return get_active_implementation() = get_unsupported_singleton();
+    }
+  }
+  return get_active_implementation() = get_available_implementations().detect_best_supported();
+}
+
+} // namespace internal
+
+SIMDJSON_DLLIMPORTEXPORT const internal::available_implementation_list& get_available_implementations() {
+  static const internal::available_implementation_list available_implementations{};
+  return available_implementations;
+}
+
+SIMDJSON_DLLIMPORTEXPORT internal::atomic_ptr<const implementation>& get_active_implementation() {
+    static const internal::detect_best_supported_implementation_on_first_use detect_best_supported_implementation_on_first_use_singleton;
+    static internal::atomic_ptr<const implementation> active_implementation{&detect_best_supported_implementation_on_first_use_singleton};
+    return active_implementation;
+}
+
+simdjson_warn_unused error_code minify(const char *buf, size_t len, char *dst, size_t &dst_len) noexcept {
+  return get_active_implementation()->minify(reinterpret_cast<const uint8_t *>(buf), len, reinterpret_cast<uint8_t *>(dst), dst_len);
+}
+simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) noexcept {
+  return get_active_implementation()->validate_utf8(buf, len);
+}
+const implementation * builtin_implementation() {
+  static const implementation * builtin_impl = get_available_implementations()[SIMDJSON_STRINGIFY(SIMDJSON_BUILTIN_IMPLEMENTATION)];
+  assert(builtin_impl);
+  return builtin_impl;
+}
+
+
+} // namespace simdjson
+/* end file src/implementation.cpp */
+
+#if SIMDJSON_IMPLEMENTATION_ARM64
+/* begin file src/arm64/implementation.cpp */
+/* begin file include/simdjson/arm64/begin.h */
+// redefining SIMDJSON_IMPLEMENTATION to "arm64"
+// #define SIMDJSON_IMPLEMENTATION arm64
+/* end file include/simdjson/arm64/begin.h */
+
+namespace simdjson {
+namespace arm64 {
+
+simdjson_warn_unused error_code implementation::create_dom_parser_implementation(
+  size_t capacity,
+  size_t max_depth,
+  std::unique_ptr<internal::dom_parser_implementation>& dst
+) const noexcept {
+  dst.reset( new (std::nothrow) dom_parser_implementation() );
+  if (!dst) { return MEMALLOC; }
+  if (auto err = dst->set_capacity(capacity))
+    return err;
+  if (auto err = dst->set_max_depth(max_depth))
+    return err;
+  return SUCCESS;
+}
+
+} // namespace arm64
+} // namespace simdjson
+
+/* begin file include/simdjson/arm64/end.h */
+/* end file include/simdjson/arm64/end.h */
+/* end file src/arm64/implementation.cpp */
+/* begin file src/arm64/dom_parser_implementation.cpp */
+/* begin file include/simdjson/arm64/begin.h */
+// redefining SIMDJSON_IMPLEMENTATION to "arm64"
+// #define SIMDJSON_IMPLEMENTATION arm64
+/* end file include/simdjson/arm64/begin.h */
+
+//
+// Stage 1
+//
+namespace simdjson {
+namespace arm64 {
+namespace {
+
+using namespace simd;
+
+struct json_character_block {
+  static simdjson_inline json_character_block classify(const simd::simd8x64<uint8_t>& in);
+
+  simdjson_inline uint64_t whitespace() const noexcept { return _whitespace; }
+  simdjson_inline uint64_t op() const noexcept { return _op; }
+  simdjson_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); }
+
+  uint64_t _whitespace;
+  uint64_t _op;
+};
+
+simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64<uint8_t>& in) {
+  // Functional programming causes trouble with Visual Studio.
+  // Keeping this version in comments since it is much nicer:
+  // auto v = in.map<uint8_t>([&](simd8<uint8_t> chunk) {
+  //  auto nib_lo = chunk & 0xf;
+  //  auto nib_hi = chunk.shr<4>();
+  //  auto shuf_lo = nib_lo.lookup_16<uint8_t>(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0);
+  //  auto shuf_hi = nib_hi.lookup_16<uint8_t>(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0);
+  //  return shuf_lo & shuf_hi;
+  // });
+  const simd8<uint8_t> table1(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0);
+  const simd8<uint8_t> table2(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0);
+
+  simd8x64<uint8_t> v(
+     (in.chunks[0] & 0xf).lookup_16(table1) & (in.chunks[0].shr<4>()).lookup_16(table2),
+     (in.chunks[1] & 0xf).lookup_16(table1) & (in.chunks[1].shr<4>()).lookup_16(table2),
+     (in.chunks[2] & 0xf).lookup_16(table1) & (in.chunks[2].shr<4>()).lookup_16(table2),
+     (in.chunks[3] & 0xf).lookup_16(table1) & (in.chunks[3].shr<4>()).lookup_16(table2)
+  );
+
+
+  // We compute whitespace and op separately. If the code later only use one or the
+  // other, given the fact that all functions are aggressively inlined, we can
+  // hope that useless computations will be omitted. This is namely case when
+  // minifying (we only need whitespace). *However* if we only need spaces,
+  // it is likely that we will still compute 'v' above with two lookup_16: one
+  // could do it a bit cheaper. This is in contrast with the x64 implementations
+  // where we can, efficiently, do the white space and structural matching
+  // separately. One reason for this difference is that on ARM NEON, the table
+  // lookups either zero or leave unchanged the characters exceeding 0xF whereas
+  // on x64, the equivalent instruction (pshufb) automatically applies a mask,
+  // ignoring the 4 most significant bits. Thus the x64 implementation is
+  // optimized differently. This being said, if you use this code strictly
+  // just for minification (or just to identify the structural characters),
+  // there is a small untaken optimization opportunity here. We deliberately
+  // do not pick it up.
+
+  uint64_t op = simd8x64<bool>(
+        v.chunks[0].any_bits_set(0x7),
+        v.chunks[1].any_bits_set(0x7),
+        v.chunks[2].any_bits_set(0x7),
+        v.chunks[3].any_bits_set(0x7)
+  ).to_bitmask();
+
+  uint64_t whitespace = simd8x64<bool>(
+        v.chunks[0].any_bits_set(0x18),
+        v.chunks[1].any_bits_set(0x18),
+        v.chunks[2].any_bits_set(0x18),
+        v.chunks[3].any_bits_set(0x18)
+  ).to_bitmask();
+
+  return { whitespace, op };
+}
+
+simdjson_inline bool is_ascii(const simd8x64<uint8_t>& input) {
+    simd8<uint8_t> bits = input.reduce_or();
+    return bits.max_val() < 0x80u;
+}
+
+simdjson_unused simdjson_inline simd8<bool> must_be_continuation(const simd8<uint8_t> prev1, const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
+    simd8<bool> is_second_byte = prev1 >= uint8_t(0xc0u);
+    simd8<bool> is_third_byte  = prev2 >= uint8_t(0xe0u);
+    simd8<bool> is_fourth_byte = prev3 >= uint8_t(0xf0u);
+    // Use ^ instead of | for is_*_byte, because ^ is commutative, and the caller is using ^ as well.
+    // This will work fine because we only have to report errors for cases with 0-1 lead bytes.
+    // Multiple lead bytes implies 2 overlapping multibyte characters, and if that happens, there is
+    // guaranteed to be at least *one* lead byte that is part of only 1 other multibyte character.
+    // The error will be detected there.
+    return is_second_byte ^ is_third_byte ^ is_fourth_byte;
+}
+
+simdjson_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
+    simd8<bool> is_third_byte  = prev2 >= uint8_t(0xe0u);
+    simd8<bool> is_fourth_byte = prev3 >= uint8_t(0xf0u);
+    return is_third_byte ^ is_fourth_byte;
+}
+
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdjson
+
+/* begin file src/generic/stage1/utf8_lookup4_algorithm.h */
+namespace simdjson {
+namespace arm64 {
+namespace {
+namespace utf8_validation {
+
+using namespace simd;
+
+  simdjson_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
+// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII)
+// Bit 1 = Too Long (ASCII followed by continuation)
+// Bit 2 = Overlong 3-byte
+// Bit 4 = Surrogate
+// Bit 5 = Overlong 2-byte
+// Bit 7 = Two Continuations
+    constexpr const uint8_t TOO_SHORT   = 1<<0; // 11______ 0_______
+                                                // 11______ 11______
+    constexpr const uint8_t TOO_LONG    = 1<<1; // 0_______ 10______
+    constexpr const uint8_t OVERLONG_3  = 1<<2; // 11100000 100_____
+    constexpr const uint8_t SURROGATE   = 1<<4; // 11101101 101_____
+    constexpr const uint8_t OVERLONG_2  = 1<<5; // 1100000_ 10______
+    constexpr const uint8_t TWO_CONTS   = 1<<7; // 10______ 10______
+    constexpr const uint8_t TOO_LARGE   = 1<<3; // 11110100 1001____
+                                                // 11110100 101_____
+                                                // 11110101 1001____
+                                                // 11110101 101_____
+                                                // 1111011_ 1001____
+                                                // 1111011_ 101_____
+                                                // 11111___ 1001____
+                                                // 11111___ 101_____
+    constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
+                                                // 11110101 1000____
+                                                // 1111011_ 1000____
+                                                // 11111___ 1000____
+    constexpr const uint8_t OVERLONG_4  = 1<<6; // 11110000 1000____
+
+    const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
+      // 0_______ ________ <ASCII in byte 1>
+      TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
+      TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
+      // 10______ ________ <continuation in byte 1>
+      TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS,
+      // 1100____ ________ <two byte lead in byte 1>
+      TOO_SHORT | OVERLONG_2,
+      // 1101____ ________ <two byte lead in byte 1>
+      TOO_SHORT,
+      // 1110____ ________ <three byte lead in byte 1>
+      TOO_SHORT | OVERLONG_3 | SURROGATE,
+      // 1111____ ________ <four+ byte lead in byte 1>
+      TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4
+    );
+    constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
+    const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
+      // ____0000 ________
+      CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4,
+      // ____0001 ________
+      CARRY | OVERLONG_2,
+      // ____001_ ________
+      CARRY,
+      CARRY,
+
+      // ____0100 ________
+      CARRY | TOO_LARGE,
+      // ____0101 ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      // ____011_ ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+
+      // ____1___ ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      // ____1101 ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000
+    );
+    const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
+      // ________ 0_______ <ASCII in byte 2>
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
+
+      // ________ 1000____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4,
+      // ________ 1001____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE,
+      // ________ 101_____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE  | TOO_LARGE,
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE  | TOO_LARGE,
+
+      // ________ 11______
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT
+    );
+    return (byte_1_high & byte_1_low & byte_2_high);
+  }
+  simdjson_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
+      const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
+    simd8<uint8_t> prev2 = input.prev<2>(prev_input);
+    simd8<uint8_t> prev3 = input.prev<3>(prev_input);
+    simd8<uint8_t> must23 = simd8<uint8_t>(must_be_2_3_continuation(prev2, prev3));
+    simd8<uint8_t> must23_80 = must23 & uint8_t(0x80);
+    return must23_80 ^ sc;
+  }
+
+  //
+  // Return nonzero if there are incomplete multibyte characters at the end of the block:
+  // e.g. if there is a 4-byte character, but it's 3 bytes from the end.
+  //
+  simdjson_inline simd8<uint8_t> is_incomplete(const simd8<uint8_t> input) {
+    // If the previous input's last 3 bytes match this, they're too short (they ended at EOF):
+    // ... 1111____ 111_____ 11______
+#if SIMDJSON_IMPLEMENTATION_ICELAKE
+    static const uint8_t max_array[64] = {
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1
+    };
+#else
+    static const uint8_t max_array[32] = {
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1
+    };
+#endif
+    const simd8<uint8_t> max_value(&max_array[sizeof(max_array)-sizeof(simd8<uint8_t>)]);
+    return input.gt_bits(max_value);
+  }
+
+  struct utf8_checker {
+    // If this is nonzero, there has been a UTF-8 error.
+    simd8<uint8_t> error;
+    // The last input we received
+    simd8<uint8_t> prev_input_block;
+    // Whether the last input we received was incomplete (used for ASCII fast path)
+    simd8<uint8_t> prev_incomplete;
+
+    //
+    // Check whether the current bytes are valid UTF-8.
+    //
+    simdjson_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
+      // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes
+      // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers)
+      simd8<uint8_t> prev1 = input.prev<1>(prev_input);
+      simd8<uint8_t> sc = check_special_cases(input, prev1);
+      this->error |= check_multibyte_lengths(input, prev_input, sc);
+    }
+
+    // The only problem that can happen at EOF is that a multibyte character is too short
+    // or a byte value too large in the last bytes: check_special_cases only checks for bytes
+    // too large in the first of two bytes.
+    simdjson_inline void check_eof() {
+      // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't
+      // possibly finish them.
+      this->error |= this->prev_incomplete;
+    }
+
+#ifndef SIMDJSON_IF_CONSTEXPR
+#if SIMDJSON_CPLUSPLUS17
+#define SIMDJSON_IF_CONSTEXPR if constexpr
+#else
+#define SIMDJSON_IF_CONSTEXPR if
+#endif
+#endif
+
+    simdjson_inline void check_next_input(const simd8x64<uint8_t>& input) {
+      if(simdjson_likely(is_ascii(input))) {
+        this->error |= this->prev_incomplete;
+      } else {
+        // you might think that a for-loop would work, but under Visual Studio, it is not good enough.
+        static_assert((simd8x64<uint8_t>::NUM_CHUNKS == 1)
+                ||(simd8x64<uint8_t>::NUM_CHUNKS == 2)
+                || (simd8x64<uint8_t>::NUM_CHUNKS == 4),
+                "We support one, two or four chunks per 64-byte block.");
+        SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 1) {
+          this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
+        } else SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 2) {
+          this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
+          this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+        } else SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 4) {
+          this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
+          this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+          this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
+          this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
+        }
+        this->prev_incomplete = is_incomplete(input.chunks[simd8x64<uint8_t>::NUM_CHUNKS-1]);
+        this->prev_input_block = input.chunks[simd8x64<uint8_t>::NUM_CHUNKS-1];
+      }
+    }
+    // do not forget to call check_eof!
+    simdjson_inline error_code errors() {
+      return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS;
+    }
+
+  }; // struct utf8_checker
+} // namespace utf8_validation
+
+using utf8_validation::utf8_checker;
+
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdjson
+/* end file src/generic/stage1/utf8_lookup4_algorithm.h */
+/* begin file src/generic/stage1/json_structural_indexer.h */
+// This file contains the common code every implementation uses in stage1
+// It is intended to be included multiple times and compiled multiple times
+// We assume the file in which it is included already includes
+// "simdjson/stage1.h" (this simplifies amalgation)
+
+/* begin file src/generic/stage1/buf_block_reader.h */
+namespace simdjson {
+namespace arm64 {
+namespace {
+
+// Walks through a buffer in block-sized increments, loading the last part with spaces
+template<size_t STEP_SIZE>
+struct buf_block_reader {
+public:
+  simdjson_inline buf_block_reader(const uint8_t *_buf, size_t _len);
+  simdjson_inline size_t block_index();
+  simdjson_inline bool has_full_block() const;
+  simdjson_inline const uint8_t *full_block() const;
+  /**
+   * Get the last block, padded with spaces.
+   *
+   * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this
+   * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there
+   * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding.
+   *
+   * @return the number of effective characters in the last block.
+   */
+  simdjson_inline size_t get_remainder(uint8_t *dst) const;
+  simdjson_inline void advance();
+private:
+  const uint8_t *buf;
+  const size_t len;
+  const size_t lenminusstep;
+  size_t idx;
+};
+
+// Routines to print masks and text for debugging bitmask operations
+simdjson_unused static char * format_input_text_64(const uint8_t *text) {
+  static char buf[sizeof(simd8x64<uint8_t>) + 1];
+  for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
+    buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]);
+  }
+  buf[sizeof(simd8x64<uint8_t>)] = '\0';
+  return buf;
+}
+
+// Routines to print masks and text for debugging bitmask operations
+simdjson_unused static char * format_input_text(const simd8x64<uint8_t>& in) {
+  static char buf[sizeof(simd8x64<uint8_t>) + 1];
+  in.store(reinterpret_cast<uint8_t*>(buf));
+  for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
+    if (buf[i] < ' ') { buf[i] = '_'; }
+  }
+  buf[sizeof(simd8x64<uint8_t>)] = '\0';
+  return buf;
+}
+
+simdjson_unused static char * format_mask(uint64_t mask) {
+  static char buf[sizeof(simd8x64<uint8_t>) + 1];
+  for (size_t i=0; i<64; i++) {
+    buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' ';
+  }
+  buf[64] = '\0';
+  return buf;
+}
+
+template<size_t STEP_SIZE>
+simdjson_inline buf_block_reader<STEP_SIZE>::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {}
+
+template<size_t STEP_SIZE>
+simdjson_inline size_t buf_block_reader<STEP_SIZE>::block_index() { return idx; }
+
+template<size_t STEP_SIZE>
+simdjson_inline bool buf_block_reader<STEP_SIZE>::has_full_block() const {
+  return idx < lenminusstep;
+}
+
+template<size_t STEP_SIZE>
+simdjson_inline const uint8_t *buf_block_reader<STEP_SIZE>::full_block() const {
+  return &buf[idx];
+}
+
+template<size_t STEP_SIZE>
+simdjson_inline size_t buf_block_reader<STEP_SIZE>::get_remainder(uint8_t *dst) const {
+  if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers
+  std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once.
+  std::memcpy(dst, buf + idx, len - idx);
+  return len - idx;
+}
+
+template<size_t STEP_SIZE>
+simdjson_inline void buf_block_reader<STEP_SIZE>::advance() {
+  idx += STEP_SIZE;
+}
+
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdjson
+/* end file src/generic/stage1/buf_block_reader.h */
+/* begin file src/generic/stage1/json_string_scanner.h */
+namespace simdjson {
+namespace arm64 {
+namespace {
+namespace stage1 {
+
+struct json_string_block {
+  // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017
+  simdjson_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) :
+  _backslash(backslash), _escaped(escaped), _quote(quote), _in_string(in_string) {}
+
+  // Escaped characters (characters following an escape() character)
+  simdjson_inline uint64_t escaped() const { return _escaped; }
+  // Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \)
+  simdjson_inline uint64_t escape() const { return _backslash & ~_escaped; }
+  // Real (non-backslashed) quotes
+  simdjson_inline uint64_t quote() const { return _quote; }
+  // Start quotes of strings
+  simdjson_inline uint64_t string_start() const { return _quote & _in_string; }
+  // End quotes of strings
+  simdjson_inline uint64_t string_end() const { return _quote & ~_in_string; }
+  // Only characters inside the string (not including the quotes)
+  simdjson_inline uint64_t string_content() const { return _in_string & ~_quote; }
+  // Return a mask of whether the given characters are inside a string (only works on non-quotes)
+  simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; }
+  // Return a mask of whether the given characters are inside a string (only works on non-quotes)
+  simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; }
+  // Tail of string (everything except the start quote)
+  simdjson_inline uint64_t string_tail() const { return _in_string ^ _quote; }
+
+  // backslash characters
+  uint64_t _backslash;
+  // escaped characters (backslashed--does not include the hex characters after \u)
+  uint64_t _escaped;
+  // real quotes (non-backslashed ones)
+  uint64_t _quote;
+  // string characters (includes start quote but not end quote)
+  uint64_t _in_string;
+};
+
+// Scans blocks for string characters, storing the state necessary to do so
+class json_string_scanner {
+public:
+  simdjson_inline json_string_block next(const simd::simd8x64<uint8_t>& in);
+  // Returns either UNCLOSED_STRING or SUCCESS
+  simdjson_inline error_code finish();
+
+private:
+  // Intended to be defined by the implementation
+  simdjson_inline uint64_t find_escaped(uint64_t escape);
+  simdjson_inline uint64_t find_escaped_branchless(uint64_t escape);
+
+  // Whether the last iteration was still inside a string (all 1's = true, all 0's = false).
+  uint64_t prev_in_string = 0ULL;
+  // Whether the first character of the next iteration is escaped.
+  uint64_t prev_escaped = 0ULL;
+};
+
+//
+// Finds escaped characters (characters following \).
+//
+// Handles runs of backslashes like \\\" and \\\\" correctly (yielding 0101 and 01010, respectively).
+//
+// Does this by:
+// - Shift the escape mask to get potentially escaped characters (characters after backslashes).
+// - Mask escaped sequences that start on *even* bits with 1010101010 (odd bits are escaped, even bits are not)
+// - Mask escaped sequences that start on *odd* bits with 0101010101 (even bits are escaped, odd bits are not)
+//
+// To distinguish between escaped sequences starting on even/odd bits, it finds the start of all
+// escape sequences, filters out the ones that start on even bits, and adds that to the mask of
+// escape sequences. This causes the addition to clear out the sequences starting on odd bits (since
+// the start bit causes a carry), and leaves even-bit sequences alone.
+//
+// Example:
+//
+// text           |  \\\ | \\\"\\\" \\\" \\"\\" |
+// escape         |  xxx |  xx xxx  xxx  xx xx  | Removed overflow backslash; will | it into follows_escape
+// odd_starts     |  x   |  x       x       x   | escape & ~even_bits & ~follows_escape
+// even_seq       |     c|    cxxx     c xx   c | c = carry bit -- will be masked out later
+// invert_mask    |      |     cxxx     c xx   c| even_seq << 1
+// follows_escape |   xx | x xx xxx  xxx  xx xx | Includes overflow bit
+// escaped        |   x  | x x  x x  x x  x  x  |
+// desired        |   x  | x x  x x  x x  x  x  |
+// text           |  \\\ | \\\"\\\" \\\" \\"\\" |
+//
+simdjson_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) {
+  // If there was overflow, pretend the first character isn't a backslash
+  backslash &= ~prev_escaped;
+  uint64_t follows_escape = backslash << 1 | prev_escaped;
+
+  // Get sequences starting on even bits by clearing out the odd series using +
+  const uint64_t even_bits = 0x5555555555555555ULL;
+  uint64_t odd_sequence_starts = backslash & ~even_bits & ~follows_escape;
+  uint64_t sequences_starting_on_even_bits;
+  prev_escaped = add_overflow(odd_sequence_starts, backslash, &sequences_starting_on_even_bits);
+  uint64_t invert_mask = sequences_starting_on_even_bits << 1; // The mask we want to return is the *escaped* bits, not escapes.
+
+  // Mask every other backslashed character as an escaped character
+  // Flip the mask for sequences that start on even bits, to correct them
+  return (even_bits ^ invert_mask) & follows_escape;
+}
+
+//
+// Return a mask of all string characters plus end quotes.
+//
+// prev_escaped is overflow saying whether the next character is escaped.
+// prev_in_string is overflow saying whether we're still in a string.
+//
+// Backslash sequences outside of quotes will be detected in stage 2.
+//
+simdjson_inline json_string_block json_string_scanner::next(const simd::simd8x64<uint8_t>& in) {
+  const uint64_t backslash = in.eq('\\');
+  const uint64_t escaped = find_escaped(backslash);
+  const uint64_t quote = in.eq('"') & ~escaped;
+
+  //
+  // prefix_xor flips on bits inside the string (and flips off the end quote).
+  //
+  // Then we xor with prev_in_string: if we were in a string already, its effect is flipped
+  // (characters inside strings are outside, and characters outside strings are inside).
+  //
+  const uint64_t in_string = prefix_xor(quote) ^ prev_in_string;
+
+  //
+  // Check if we're still in a string at the end of the box so the next block will know
+  //
+  // right shift of a signed value expected to be well-defined and standard
+  // compliant as of C++20, John Regher from Utah U. says this is fine code
+  //
+  prev_in_string = uint64_t(static_cast<int64_t>(in_string) >> 63);
+
+  // Use ^ to turn the beginning quote off, and the end quote on.
+
+  // We are returning a function-local object so either we get a move constructor
+  // or we get copy elision.
+  return json_string_block(
+    backslash,
+    escaped,
+    quote,
+    in_string
+  );
+}
+
+simdjson_inline error_code json_string_scanner::finish() {
+  if (prev_in_string) {
+    return UNCLOSED_STRING;
+  }
+  return SUCCESS;
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdjson
+/* end file src/generic/stage1/json_string_scanner.h */
+/* begin file src/generic/stage1/json_scanner.h */
+namespace simdjson {
+namespace arm64 {
+namespace {
+namespace stage1 {
+
+/**
+ * A block of scanned json, with information on operators and scalars.
+ *
+ * We seek to identify pseudo-structural characters. Anything that is inside
+ * a string must be omitted (hence  & ~_string.string_tail()).
+ * Otherwise, pseudo-structural characters come in two forms.
+ * 1. We have the structural characters ([,],{,},:, comma). The
+ *    term 'structural character' is from the JSON RFC.
+ * 2. We have the 'scalar pseudo-structural characters'.
+ *    Scalars are quotes, and any character except structural characters and white space.
+ *
+ * To identify the scalar pseudo-structural characters, we must look at what comes
+ * before them: it must be a space, a quote or a structural characters.
+ * Starting with simdjson v0.3, we identify them by
+ * negation: we identify everything that is followed by a non-quote scalar,
+ * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'.
+ */
+struct json_block {
+public:
+  // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017
+  simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) :
+  _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {}
+  simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) :
+  _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {}
+
+  /**
+   * The start of structurals.
+   * In simdjson prior to v0.3, these were called the pseudo-structural characters.
+   **/
+  simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); }
+  /** All JSON whitespace (i.e. not in a string) */
+  simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); }
+
+  // Helpers
+
+  /** Whether the given characters are inside a string (only works on non-quotes) */
+  simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); }
+  /** Whether the given characters are outside a string (only works on non-quotes) */
+  simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); }
+
+  // string and escape characters
+  json_string_block _string;
+  // whitespace, structural characters ('operators'), scalars
+  json_character_block _characters;
+  // whether the previous character was a scalar
+  uint64_t _follows_potential_nonquote_scalar;
+private:
+  // Potential structurals (i.e. disregarding strings)
+
+  /**
+   * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc".
+   * They may reside inside a string.
+   **/
+  simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); }
+  /**
+   * The start of non-operator runs, like 123, true and "abc".
+   * It main reside inside a string.
+   **/
+  simdjson_inline uint64_t potential_scalar_start() const noexcept {
+    // The term "scalar" refers to anything except structural characters and white space
+    // (so letters, numbers, quotes).
+    // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space
+    // then we know that it is irrelevant structurally.
+    return _characters.scalar() & ~follows_potential_scalar();
+  }
+  /**
+   * Whether the given character is immediately after a non-operator like 123, true.
+   * The characters following a quote are not included.
+   */
+  simdjson_inline uint64_t follows_potential_scalar() const noexcept {
+    // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character
+    // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a
+    // white space.
+    // It is understood that within quoted region, anything at all could be marked (irrelevant).
+    return _follows_potential_nonquote_scalar;
+  }
+};
+
+/**
+ * Scans JSON for important bits: structural characters or 'operators', strings, and scalars.
+ *
+ * The scanner starts by calculating two distinct things:
+ * - string characters (taking \" into account)
+ * - structural characters or 'operators' ([]{},:, comma)
+ *   and scalars (runs of non-operators like 123, true and "abc")
+ *
+ * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel:
+ * in particular, the operator/scalar bit will find plenty of things that are actually part of
+ * strings. When we're done, json_block will fuse the two together by masking out tokens that are
+ * part of a string.
+ */
+class json_scanner {
+public:
+  json_scanner() = default;
+  simdjson_inline json_block next(const simd::simd8x64<uint8_t>& in);
+  // Returns either UNCLOSED_STRING or SUCCESS
+  simdjson_inline error_code finish();
+
+private:
+  // Whether the last character of the previous iteration is part of a scalar token
+  // (anything except whitespace or a structural character/'operator').
+  uint64_t prev_scalar = 0ULL;
+  json_string_scanner string_scanner{};
+};
+
+
+//
+// Check if the current character immediately follows a matching character.
+//
+// For example, this checks for quotes with backslashes in front of them:
+//
+//     const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash);
+//
+simdjson_inline uint64_t follows(const uint64_t match, uint64_t &overflow) {
+  const uint64_t result = match << 1 | overflow;
+  overflow = match >> 63;
+  return result;
+}
+
+simdjson_inline json_block json_scanner::next(const simd::simd8x64<uint8_t>& in) {
+  json_string_block strings = string_scanner.next(in);
+  // identifies the white-space and the structural characters
+  json_character_block characters = json_character_block::classify(in);
+  // The term "scalar" refers to anything except structural characters and white space
+  // (so letters, numbers, quotes).
+  // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers).
+  //
+  // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon)
+  // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential
+  // pseudo-structural character just like we would if we had  ' "a string" true '; otherwise we
+  // may need to add an extra check when parsing strings.
+  //
+  // Performance: there are many ways to skin this cat.
+  const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote();
+  uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar);
+  // We are returning a function-local object so either we get a move constructor
+  // or we get copy elision.
+  return json_block(
+    strings,// strings is a function-local object so either it moves or the copy is elided.
+    characters,
+    follows_nonquote_scalar
+  );
+}
+
+simdjson_inline error_code json_scanner::finish() {
+  return string_scanner.finish();
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdjson
+/* end file src/generic/stage1/json_scanner.h */
+/* begin file src/generic/stage1/json_minifier.h */
+// This file contains the common code every implementation uses in stage1
+// It is intended to be included multiple times and compiled multiple times
+// We assume the file in which it is included already includes
+// "simdjson/stage1.h" (this simplifies amalgation)
+
+namespace simdjson {
+namespace arm64 {
+namespace {
+namespace stage1 {
+
+class json_minifier {
+public:
+  template<size_t STEP_SIZE>
+  static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept;
+
+private:
+  simdjson_inline json_minifier(uint8_t *_dst)
+  : dst{_dst}
+  {}
+  template<size_t STEP_SIZE>
+  simdjson_inline void step(const uint8_t *block_buf, buf_block_reader<STEP_SIZE> &reader) noexcept;
+  simdjson_inline void next(const simd::simd8x64<uint8_t>& in, const json_block& block);
+  simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len);
+  json_scanner scanner{};
+  uint8_t *dst;
+};
+
+simdjson_inline void json_minifier::next(const simd::simd8x64<uint8_t>& in, const json_block& block) {
+  uint64_t mask = block.whitespace();
+  dst += in.compress(mask, dst);
+}
+
+simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) {
+  error_code error = scanner.finish();
+  if (error) { dst_len = 0; return error; }
+  dst_len = dst - dst_start;
+  return SUCCESS;
+}
+
+template<>
+simdjson_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept {
+  simd::simd8x64<uint8_t> in_1(block_buf);
+  simd::simd8x64<uint8_t> in_2(block_buf+64);
+  json_block block_1 = scanner.next(in_1);
+  json_block block_2 = scanner.next(in_2);
+  this->next(in_1, block_1);
+  this->next(in_2, block_2);
+  reader.advance();
+}
+
+template<>
+simdjson_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept {
+  simd::simd8x64<uint8_t> in_1(block_buf);
+  json_block block_1 = scanner.next(in_1);
+  this->next(block_buf, block_1);
+  reader.advance();
+}
+
+template<size_t STEP_SIZE>
+error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept {
+  buf_block_reader<STEP_SIZE> reader(buf, len);
+  json_minifier minifier(dst);
+
+  // Index the first n-1 blocks
+  while (reader.has_full_block()) {
+    minifier.step<STEP_SIZE>(reader.full_block(), reader);
+  }
+
+  // Index the last (remainder) block, padded with spaces
+  uint8_t block[STEP_SIZE];
+  size_t remaining_bytes = reader.get_remainder(block);
+  if (remaining_bytes > 0) {
+    // We do not want to write directly to the output stream. Rather, we write
+    // to a local buffer (for safety).
+    uint8_t out_block[STEP_SIZE];
+    uint8_t * const guarded_dst{minifier.dst};
+    minifier.dst = out_block;
+    minifier.step<STEP_SIZE>(block, reader);
+    size_t to_write = minifier.dst - out_block;
+    // In some cases, we could be enticed to consider the padded spaces
+    // as part of the string. This is fine as long as we do not write more
+    // than we consumed.
+    if(to_write > remaining_bytes) { to_write = remaining_bytes; }
+    memcpy(guarded_dst, out_block, to_write);
+    minifier.dst = guarded_dst + to_write;
+  }
+  return minifier.finish(dst, dst_len);
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdjson
+/* end file src/generic/stage1/json_minifier.h */
+/* begin file src/generic/stage1/find_next_document_index.h */
+namespace simdjson {
+namespace arm64 {
+namespace {
+
+/**
+  * This algorithm is used to quickly identify the last structural position that
+  * makes up a complete document.
+  *
+  * It does this by going backwards and finding the last *document boundary* (a
+  * place where one value follows another without a comma between them). If the
+  * last document (the characters after the boundary) has an equal number of
+  * start and end brackets, it is considered complete.
+  *
+  * Simply put, we iterate over the structural characters, starting from
+  * the end. We consider that we found the end of a JSON document when the
+  * first element of the pair is NOT one of these characters: '{' '[' ':' ','
+  * and when the second element is NOT one of these characters: '}' ']' ':' ','.
+  *
+  * This simple comparison works most of the time, but it does not cover cases
+  * where the batch's structural indexes contain a perfect amount of documents.
+  * In such a case, we do not have access to the structural index which follows
+  * the last document, therefore, we do not have access to the second element in
+  * the pair, and that means we cannot identify the last document. To fix this
+  * issue, we keep a count of the open and closed curly/square braces we found
+  * while searching for the pair. When we find a pair AND the count of open and
+  * closed curly/square braces is the same, we know that we just passed a
+  * complete document, therefore the last json buffer location is the end of the
+  * batch.
+  */
+simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) {
+  // Variant: do not count separately, just figure out depth
+  if(parser.n_structural_indexes == 0) { return 0; }
+  auto arr_cnt = 0;
+  auto obj_cnt = 0;
+  for (auto i = parser.n_structural_indexes - 1; i > 0; i--) {
+    auto idxb = parser.structural_indexes[i];
+    switch (parser.buf[idxb]) {
+    case ':':
+    case ',':
+      continue;
+    case '}':
+      obj_cnt--;
+      continue;
+    case ']':
+      arr_cnt--;
+      continue;
+    case '{':
+      obj_cnt++;
+      break;
+    case '[':
+      arr_cnt++;
+      break;
+    }
+    auto idxa = parser.structural_indexes[i - 1];
+    switch (parser.buf[idxa]) {
+    case '{':
+    case '[':
+    case ':':
+    case ',':
+      continue;
+    }
+    // Last document is complete, so the next document will appear after!
+    if (!arr_cnt && !obj_cnt) {
+      return parser.n_structural_indexes;
+    }
+    // Last document is incomplete; mark the document at i + 1 as the next one
+    return i;
+  }
+  // If we made it to the end, we want to finish counting to see if we have a full document.
+  switch (parser.buf[parser.structural_indexes[0]]) {
+    case '}':
+      obj_cnt--;
+      break;
+    case ']':
+      arr_cnt--;
+      break;
+    case '{':
+      obj_cnt++;
+      break;
+    case '[':
+      arr_cnt++;
+      break;
+  }
+  if (!arr_cnt && !obj_cnt) {
+    // We have a complete document.
+    return parser.n_structural_indexes;
+  }
+  return 0;
+}
+
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdjson
+/* end file src/generic/stage1/find_next_document_index.h */
+
+namespace simdjson {
+namespace arm64 {
+namespace {
+namespace stage1 {
+
+class bit_indexer {
+public:
+  uint32_t *tail;
+
+  simdjson_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {}
+
+  // flatten out values in 'bits' assuming that they are are to have values of idx
+  // plus their position in the bitvector, and store these indexes at
+  // base_ptr[base] incrementing base as we go
+  // will potentially store extra values beyond end of valid bits, so base_ptr
+  // needs to be large enough to handle this
+  //
+  // If the kernel sets SIMDJSON_CUSTOM_BIT_INDEXER, then it will provide its own
+  // version of the code.
+#ifdef SIMDJSON_CUSTOM_BIT_INDEXER
+  simdjson_inline void write(uint32_t idx, uint64_t bits);
+#else
+  simdjson_inline void write(uint32_t idx, uint64_t bits) {
+    // In some instances, the next branch is expensive because it is mispredicted.
+    // Unfortunately, in other cases,
+    // it helps tremendously.
+    if (bits == 0)
+        return;
+#if defined(SIMDJSON_PREFER_REVERSE_BITS)
+    /**
+     * ARM lacks a fast trailing zero instruction, but it has a fast
+     * bit reversal instruction and a fast leading zero instruction.
+     * Thus it may be profitable to reverse the bits (once) and then
+     * to rely on a sequence of instructions that call the leading
+     * zero instruction.
+     *
+     * Performance notes:
+     * The chosen routine is not optimal in terms of data dependency
+     * since zero_leading_bit might require two instructions. However,
+     * it tends to minimize the total number of instructions which is
+     * beneficial.
+     */
+
+    uint64_t rev_bits = reverse_bits(bits);
+    int cnt = static_cast<int>(count_ones(bits));
+    int i = 0;
+    // Do the first 8 all together
+    for (; i<8; i++) {
+      int lz = leading_zeroes(rev_bits);
+      this->tail[i] = static_cast<uint32_t>(idx) + lz;
+      rev_bits = zero_leading_bit(rev_bits, lz);
+    }
+    // Do the next 8 all together (we hope in most cases it won't happen at all
+    // and the branch is easily predicted).
+    if (simdjson_unlikely(cnt > 8)) {
+      i = 8;
+      for (; i<16; i++) {
+        int lz = leading_zeroes(rev_bits);
+        this->tail[i] = static_cast<uint32_t>(idx) + lz;
+        rev_bits = zero_leading_bit(rev_bits, lz);
+      }
+
+
+      // Most files don't have 16+ structurals per block, so we take several basically guaranteed
+      // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :)
+      // or the start of a value ("abc" true 123) every four characters.
+      if (simdjson_unlikely(cnt > 16)) {
+        i = 16;
+        while (rev_bits != 0) {
+          int lz = leading_zeroes(rev_bits);
+          this->tail[i++] = static_cast<uint32_t>(idx) + lz;
+          rev_bits = zero_leading_bit(rev_bits, lz);
+        }
+      }
+    }
+    this->tail += cnt;
+#else // SIMDJSON_PREFER_REVERSE_BITS
+    /**
+     * Under recent x64 systems, we often have both a fast trailing zero
+     * instruction and a fast 'clear-lower-bit' instruction so the following
+     * algorithm can be competitive.
+     */
+
+    int cnt = static_cast<int>(count_ones(bits));
+    // Do the first 8 all together
+    for (int i=0; i<8; i++) {
+      this->tail[i] = idx + trailing_zeroes(bits);
+      bits = clear_lowest_bit(bits);
+    }
+
+    // Do the next 8 all together (we hope in most cases it won't happen at all
+    // and the branch is easily predicted).
+    if (simdjson_unlikely(cnt > 8)) {
+      for (int i=8; i<16; i++) {
+        this->tail[i] = idx + trailing_zeroes(bits);
+        bits = clear_lowest_bit(bits);
+      }
+
+      // Most files don't have 16+ structurals per block, so we take several basically guaranteed
+      // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :)
+      // or the start of a value ("abc" true 123) every four characters.
+      if (simdjson_unlikely(cnt > 16)) {
+        int i = 16;
+        do {
+          this->tail[i] = idx + trailing_zeroes(bits);
+          bits = clear_lowest_bit(bits);
+          i++;
+        } while (i < cnt);
+      }
+    }
+
+    this->tail += cnt;
+#endif
+  }
+#endif // SIMDJSON_CUSTOM_BIT_INDEXER
+
+};
+
+class json_structural_indexer {
+public:
+  /**
+   * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes.
+   *
+   * @param partial Setting the partial parameter to true allows the find_structural_bits to
+   *   tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If
+   *   you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8.
+   */
+  template<size_t STEP_SIZE>
+  static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept;
+
+private:
+  simdjson_inline json_structural_indexer(uint32_t *structural_indexes);
+  template<size_t STEP_SIZE>
+  simdjson_inline void step(const uint8_t *block, buf_block_reader<STEP_SIZE> &reader) noexcept;
+  simdjson_inline void next(const simd::simd8x64<uint8_t>& in, const json_block& block, size_t idx);
+  simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial);
+
+  json_scanner scanner{};
+  utf8_checker checker{};
+  bit_indexer indexer;
+  uint64_t prev_structurals = 0;
+  uint64_t unescaped_chars_error = 0;
+};
+
+simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {}
+
+// Skip the last character if it is partial
+simdjson_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) {
+  if (simdjson_unlikely(len < 3)) {
+    switch (len) {
+      case 2:
+        if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left
+        if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 2 bytes left
+        return len;
+      case 1:
+        if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left
+        return len;
+      case 0:
+        return len;
+    }
+  }
+  if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left
+  if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 1 byte left
+  if (buf[len-3] >= 0xf0) { return len-3; } // 4-byte characters with only 3 bytes left
+  return len;
+}
+
+//
+// PERF NOTES:
+// We pipe 2 inputs through these stages:
+// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load
+//    2 inputs' worth at once so that by the time step 2 is looking for them input, it's available.
+// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path.
+//    The output of step 1 depends entirely on this information. These functions don't quite use
+//    up enough CPU: the second half of the functions is highly serial, only using 1 execution core
+//    at a time. The second input's scans has some dependency on the first ones finishing it, but
+//    they can make a lot of progress before they need that information.
+// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that
+//    to finish: utf-8 checks and generating the output from the last iteration.
+//
+// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all
+// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough
+// workout.
+//
+template<size_t STEP_SIZE>
+error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept {
+  if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; }
+  // We guard the rest of the code so that we can assume that len > 0 throughout.
+  if (len == 0) { return EMPTY; }
+  if (is_streaming(partial)) {
+    len = trim_partial_utf8(buf, len);
+    // If you end up with an empty window after trimming
+    // the partial UTF-8 bytes, then chances are good that you
+    // have an UTF-8 formatting error.
+    if(len == 0) { return UTF8_ERROR; }
+  }
+  buf_block_reader<STEP_SIZE> reader(buf, len);
+  json_structural_indexer indexer(parser.structural_indexes.get());
+
+  // Read all but the last block
+  while (reader.has_full_block()) {
+    indexer.step<STEP_SIZE>(reader.full_block(), reader);
+  }
+  // Take care of the last block (will always be there unless file is empty which is
+  // not supposed to happen.)
+  uint8_t block[STEP_SIZE];
+  if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; }
+  indexer.step<STEP_SIZE>(block, reader);
+  return indexer.finish(parser, reader.block_index(), len, partial);
+}
+
+template<>
+simdjson_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept {
+  simd::simd8x64<uint8_t> in_1(block);
+  simd::simd8x64<uint8_t> in_2(block+64);
+  json_block block_1 = scanner.next(in_1);
+  json_block block_2 = scanner.next(in_2);
+  this->next(in_1, block_1, reader.block_index());
+  this->next(in_2, block_2, reader.block_index()+64);
+  reader.advance();
+}
+
+template<>
+simdjson_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept {
+  simd::simd8x64<uint8_t> in_1(block);
+  json_block block_1 = scanner.next(in_1);
+  this->next(in_1, block_1, reader.block_index());
+  reader.advance();
+}
+
+simdjson_inline void json_structural_indexer::next(const simd::simd8x64<uint8_t>& in, const json_block& block, size_t idx) {
+  uint64_t unescaped = in.lteq(0x1F);
+  checker.check_next_input(in);
+  indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser
+  prev_structurals = block.structural_start();
+  unescaped_chars_error |= block.non_quote_inside_string(unescaped);
+}
+
+simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) {
+  // Write out the final iteration's structurals
+  indexer.write(uint32_t(idx-64), prev_structurals);
+  error_code error = scanner.finish();
+  // We deliberately break down the next expression so that it is
+  // human readable.
+  const bool should_we_exit = is_streaming(partial) ?
+    ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING
+    : (error != SUCCESS); // if partial is false, we must have SUCCESS
+  const bool have_unclosed_string = (error == UNCLOSED_STRING);
+  if (simdjson_unlikely(should_we_exit)) { return error; }
+
+  if (unescaped_chars_error) {
+    return UNESCAPED_CHARS;
+  }
+  parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get());
+  /***
+   * The On Demand API requires special padding.
+   *
+   * This is related to https://github.com/simdjson/simdjson/issues/906
+   * Basically, we want to make sure that if the parsing continues beyond the last (valid)
+   * structural character, it quickly stops.
+   * Only three structural characters can be repeated without triggering an error in JSON:  [,] and }.
+   * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing
+   * continues, then it must be [,] or }.
+   * Suppose it is ] or }. We backtrack to the first character, what could it be that would
+   * not trigger an error? It could be ] or } but no, because you can't start a document that way.
+   * It can't be a comma, a colon or any simple value. So the only way we could continue is
+   * if the repeated character is [. But if so, the document must start with [. But if the document
+   * starts with [, it should end with ]. If we enforce that rule, then we would get
+   * ][[ which is invalid.
+   *
+   * This is illustrated with the test array_iterate_unclosed_error() on the following input:
+   * R"({ "a": [,,)"
+   **/
+  parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final
+  parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len);
+  parser.structural_indexes[parser.n_structural_indexes + 2] = 0;
+  parser.next_structural_index = 0;
+  // a valid JSON file cannot have zero structural indexes - we should have found something
+  if (simdjson_unlikely(parser.n_structural_indexes == 0u)) {
+    return EMPTY;
+  }
+  if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) {
+    return UNEXPECTED_ERROR;
+  }
+  if (partial == stage1_mode::streaming_partial) {
+    // If we have an unclosed string, then the last structural
+    // will be the quote and we want to make sure to omit it.
+    if(have_unclosed_string) {
+      parser.n_structural_indexes--;
+      // a valid JSON file cannot have zero structural indexes - we should have found something
+      if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; }
+    }
+    // We truncate the input to the end of the last complete document (or zero).
+    auto new_structural_indexes = find_next_document_index(parser);
+    if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) {
+      if(parser.structural_indexes[0] == 0) {
+        // If the buffer is partial and we started at index 0 but the document is
+        // incomplete, it's too big to parse.
+        return CAPACITY;
+      } else {
+        // It is possible that the document could be parsed, we just had a lot
+        // of white space.
+        parser.n_structural_indexes = 0;
+        return EMPTY;
+      }
+    }
+
+    parser.n_structural_indexes = new_structural_indexes;
+  } else if (partial == stage1_mode::streaming_final) {
+    if(have_unclosed_string) { parser.n_structural_indexes--; }
+    // We truncate the input to the end of the last complete document (or zero).
+    // Because partial == stage1_mode::streaming_final, it means that we may
+    // silently ignore trailing garbage. Though it sounds bad, we do it
+    // deliberately because many people who have streams of JSON documents
+    // will truncate them for processing. E.g., imagine that you are uncompressing
+    // the data from a size file or receiving it in chunks from the network. You
+    // may not know where exactly the last document will be. Meanwhile the
+    // document_stream instances allow people to know the JSON documents they are
+    // parsing (see the iterator.source() method).
+    parser.n_structural_indexes = find_next_document_index(parser);
+    // We store the initial n_structural_indexes so that the client can see
+    // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes,
+    // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len,
+    // otherwise, it will copy some prior index.
+    parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes];
+    // This next line is critical, do not change it unless you understand what you are
+    // doing.
+    parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len);
+    if (simdjson_unlikely(parser.n_structural_indexes == 0u)) {
+        // We tolerate an unclosed string at the very end of the stream. Indeed, users
+        // often load their data in bulk without being careful and they want us to ignore
+        // the trailing garbage.
+        return EMPTY;
+    }
+  }
+  checker.check_eof();
+  return checker.errors();
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdjson
+/* end file src/generic/stage1/json_structural_indexer.h */
+/* begin file src/generic/stage1/utf8_validator.h */
+namespace simdjson {
+namespace arm64 {
+namespace {
+namespace stage1 {
+
+/**
+ * Validates that the string is actual UTF-8.
+ */
+template<class checker>
+bool generic_validate_utf8(const uint8_t * input, size_t length) {
+    checker c{};
+    buf_block_reader<64> reader(input, length);
+    while (reader.has_full_block()) {
+      simd::simd8x64<uint8_t> in(reader.full_block());
+      c.check_next_input(in);
+      reader.advance();
+    }
+    uint8_t block[64]{};
+    reader.get_remainder(block);
+    simd::simd8x64<uint8_t> in(block);
+    c.check_next_input(in);
+    reader.advance();
+    c.check_eof();
+    return c.errors() == error_code::SUCCESS;
+}
+
+bool generic_validate_utf8(const char * input, size_t length) {
+    return generic_validate_utf8<utf8_checker>(reinterpret_cast<const uint8_t *>(input),length);
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdjson
+/* end file src/generic/stage1/utf8_validator.h */
+
+//
+// Stage 2
+//
+
+/* begin file src/generic/stage2/stringparsing.h */
+// This file contains the common code every implementation uses
+// It is intended to be included multiple times and compiled multiple times
+
+namespace simdjson {
+namespace arm64 {
+namespace {
+/// @private
+namespace stringparsing {
+
+// begin copypasta
+// These chars yield themselves: " \ /
+// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab
+// u not handled in this table as it's complex
+static const uint8_t escape_map[256] = {
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0, // 0x0.
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0x22, 0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0x2f,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0, // 0x4.
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0x5c, 0, 0,    0, // 0x5.
+    0, 0, 0x08, 0, 0,    0, 0x0c, 0, 0, 0, 0, 0, 0,    0, 0x0a, 0, // 0x6.
+    0, 0, 0x0d, 0, 0x09, 0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0, // 0x7.
+
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+};
+
+// handle a unicode codepoint
+// write appropriate values into dest
+// src will advance 6 bytes or 12 bytes
+// dest will advance a variable amount (return via pointer)
+// return true if the unicode codepoint was valid
+// We work in little-endian then swap at write time
+simdjson_warn_unused
+simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr,
+                                            uint8_t **dst_ptr) {
+  // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the
+  // conversion isn't valid; we defer the check for this to inside the
+  // multilingual plane check
+  uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2);
+  *src_ptr += 6;
+
+  // If we found a high surrogate, we must
+  // check for low surrogate for characters
+  // outside the Basic
+  // Multilingual Plane.
+  if (code_point >= 0xd800 && code_point < 0xdc00) {
+    const uint8_t *src_data = *src_ptr;
+    /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */
+    if (((src_data[0] << 8) | src_data[1]) != ((static_cast<uint8_t> ('\\') << 8) | static_cast<uint8_t> ('u'))) {
+      return false;
+    }
+    uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2);
+
+    // We have already checked that the high surrogate is valid and
+    // (code_point - 0xd800) < 1024.
+    //
+    // Check that code_point_2 is in the range 0xdc00..0xdfff
+    // and that code_point_2 was parsed from valid hex.
+    uint32_t low_bit = code_point_2 - 0xdc00;
+    if (low_bit >> 10) {
+      return false;
+    }
+
+    code_point =
+        (((code_point - 0xd800) << 10) | low_bit) + 0x10000;
+    *src_ptr += 6;
+  } else if (code_point >= 0xdc00 && code_point <= 0xdfff) {
+      // If we encounter a low surrogate (not preceded by a high surrogate)
+      // then we have an error.
+      return false;
+  }
+  size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr);
+  *dst_ptr += offset;
+  return offset > 0;
+}
+
+/**
+ * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There
+ * must be an unescaped quote terminating the string. It returns the final output
+ * position as pointer. In case of error (e.g., the string has bad escaped codes),
+ * then null_nullptrptr is returned. It is assumed that the output buffer is large
+ * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes +
+ * SIMDJSON_PADDING bytes.
+ */
+simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) {
+  while (1) {
+    // Copy the next n bytes, and find the backslash and quote in them.
+    auto bs_quote = backslash_and_quote::copy_and_find(src, dst);
+    // If the next thing is the end quote, copy and return
+    if (bs_quote.has_quote_first()) {
+      // we encountered quotes first. Move dst to point to quotes and exit
+      return dst + bs_quote.quote_index();
+    }
+    if (bs_quote.has_backslash()) {
+      /* find out where the backspace is */
+      auto bs_dist = bs_quote.backslash_index();
+      uint8_t escape_char = src[bs_dist + 1];
+      /* we encountered backslash first. Handle backslash */
+      if (escape_char == 'u') {
+        /* move src/dst up to the start; they will be further adjusted
+           within the unicode codepoint handling code. */
+        src += bs_dist;
+        dst += bs_dist;
+        if (!handle_unicode_codepoint(&src, &dst)) {
+          return nullptr;
+        }
+      } else {
+        /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and
+         * write bs_dist+1 characters to output
+         * note this may reach beyond the part of the buffer we've actually
+         * seen. I think this is ok */
+        uint8_t escape_result = escape_map[escape_char];
+        if (escape_result == 0u) {
+          return nullptr; /* bogus escape value is an error */
+        }
+        dst[bs_dist] = escape_result;
+        src += bs_dist + 2;
+        dst += bs_dist + 1;
+      }
+    } else {
+      /* they are the same. Since they can't co-occur, it means we
+       * encountered neither. */
+      src += backslash_and_quote::BYTES_PROCESSED;
+      dst += backslash_and_quote::BYTES_PROCESSED;
+    }
+  }
+  /* can't be reached */
+  return nullptr;
+}
+
+} // namespace stringparsing
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdjson
+/* end file src/generic/stage2/stringparsing.h */
+/* begin file src/generic/stage2/tape_builder.h */
+/* begin file src/generic/stage2/json_iterator.h */
+/* begin file src/generic/stage2/logger.h */
+// This is for an internal-only stage 2 specific logger.
+// Set LOG_ENABLED = true to log what stage 2 is doing!
+namespace simdjson {
+namespace arm64 {
+namespace {
+namespace logger {
+
+  static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------";
+
+#if SIMDJSON_VERBOSE_LOGGING
+  static constexpr const bool LOG_ENABLED = true;
+#else
+  static constexpr const bool LOG_ENABLED = false;
+#endif
+  static constexpr const int LOG_EVENT_LEN = 20;
+  static constexpr const int LOG_BUFFER_LEN = 30;
+  static constexpr const int LOG_SMALL_BUFFER_LEN = 10;
+  static constexpr const int LOG_INDEX_LEN = 5;
+
+  static int log_depth; // Not threadsafe. Log only.
+
+  // Helper to turn unprintable or newline characters into spaces
+  static simdjson_inline char printable_char(char c) {
+    if (c >= 0x20) {
+      return c;
+    } else {
+      return ' ';
+    }
+  }
+
+  // Print the header and set up log_start
+  static simdjson_inline void log_start() {
+    if (LOG_ENABLED) {
+      log_depth = 0;
+      printf("\n");
+      printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#");
+      printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES);
+    }
+  }
+
+  simdjson_unused static simdjson_inline void log_string(const char *message) {
+    if (LOG_ENABLED) {
+      printf("%s\n", message);
+    }
+  }
+
+  // Logs a single line from the stage 2 DOM parser
+  template<typename S>
+  static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) {
+    if (LOG_ENABLED) {
+      printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title);
+      auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1;
+      auto next_index = structurals.next_structural;
+      auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast<const uint8_t*>("                                                       ");
+      auto next = &structurals.buf[*next_index];
+      {
+        // Print the next N characters in the buffer.
+        printf("| ");
+        // Otherwise, print the characters starting from the buffer position.
+        // Print spaces for unprintable or newline characters.
+        for (int i=0;i<LOG_BUFFER_LEN;i++) {
+          printf("%c", printable_char(current[i]));
+        }
+        printf(" ");
+        // Print the next N characters in the buffer.
+        printf("| ");
+        // Otherwise, print the characters starting from the buffer position.
+        // Print spaces for unprintable or newline characters.
+        for (int i=0;i<LOG_SMALL_BUFFER_LEN;i++) {
+          printf("%c", printable_char(next[i]));
+        }
+        printf(" ");
+      }
+      if (current_index) {
+        printf("| %*u ", LOG_INDEX_LEN, *current_index);
+      } else {
+        printf("| %-*s ", LOG_INDEX_LEN, "");
+      }
+      // printf("| %*u ", LOG_INDEX_LEN, structurals.next_tape_index());
+      printf("| %-s ", detail);
+      printf("|\n");
+    }
+  }
+
+} // namespace logger
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdjson
+/* end file src/generic/stage2/logger.h */
+
+namespace simdjson {
+namespace arm64 {
+namespace {
+namespace stage2 {
+
+class json_iterator {
+public:
+  const uint8_t* const buf;
+  uint32_t *next_structural;
+  dom_parser_implementation &dom_parser;
+  uint32_t depth{0};
+
+  /**
+   * Walk the JSON document.
+   *
+   * The visitor receives callbacks when values are encountered. All callbacks pass the iterator as
+   * the first parameter; some callbacks have other parameters as well:
+   *
+   * - visit_document_start() - at the beginning.
+   * - visit_document_end() - at the end (if things were successful).
+   *
+   * - visit_array_start() - at the start `[` of a non-empty array.
+   * - visit_array_end() - at the end `]` of a non-empty array.
+   * - visit_empty_array() - when an empty array is encountered.
+   *
+   * - visit_object_end() - at the start `]` of a non-empty object.
+   * - visit_object_start() - at the end `]` of a non-empty object.
+   * - visit_empty_object() - when an empty object is encountered.
+   * - visit_key(const uint8_t *key) - when a key in an object field is encountered. key is
+   *                                   guaranteed to point at the first quote of the string (`"key"`).
+   * - visit_primitive(const uint8_t *value) - when a value is a string, number, boolean or null.
+   * - visit_root_primitive(iter, uint8_t *value) - when the top-level value is a string, number, boolean or null.
+   *
+   * - increment_count(iter) - each time a value is found in an array or object.
+   */
+  template<bool STREAMING, typename V>
+  simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept;
+
+  /**
+   * Create an iterator capable of walking a JSON document.
+   *
+   * The document must have already passed through stage 1.
+   */
+  simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index);
+
+  /**
+   * Look at the next token.
+   *
+   * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)).
+   *
+   * They may include invalid JSON as well (such as `1.2.3` or `ture`).
+   */
+  simdjson_inline const uint8_t *peek() const noexcept;
+  /**
+   * Advance to the next token.
+   *
+   * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)).
+   *
+   * They may include invalid JSON as well (such as `1.2.3` or `ture`).
+   */
+  simdjson_inline const uint8_t *advance() noexcept;
+  /**
+   * Get the remaining length of the document, from the start of the current token.
+   */
+  simdjson_inline size_t remaining_len() const noexcept;
+  /**
+   * Check if we are at the end of the document.
+   *
+   * If this is true, there are no more tokens.
+   */
+  simdjson_inline bool at_eof() const noexcept;
+  /**
+   * Check if we are at the beginning of the document.
+   */
+  simdjson_inline bool at_beginning() const noexcept;
+  simdjson_inline uint8_t last_structural() const noexcept;
+
+  /**
+   * Log that a value has been found.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_value(const char *type) const noexcept;
+  /**
+   * Log the start of a multipart value.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_start_value(const char *type) const noexcept;
+  /**
+   * Log the end of a multipart value.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_end_value(const char *type) const noexcept;
+  /**
+   * Log an error.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_error(const char *error) const noexcept;
+
+  template<typename V>
+  simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept;
+  template<typename V>
+  simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept;
+};
+
+template<bool STREAMING, typename V>
+simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept {
+  logger::log_start();
+
+  //
+  // Start the document
+  //
+  if (at_eof()) { return EMPTY; }
+  log_start_value("document");
+  SIMDJSON_TRY( visitor.visit_document_start(*this) );
+
+  //
+  // Read first value
+  //
+  {
+    auto value = advance();
+
+    // Make sure the outer object or array is closed before continuing; otherwise, there are ways we
+    // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906
+    if (!STREAMING) {
+      switch (*value) {
+        case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break;
+        case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break;
+      }
+    }
+
+    switch (*value) {
+      case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin;
+      case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin;
+      default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break;
+    }
+  }
+  goto document_end;
+
+//
+// Object parser states
+//
+object_begin:
+  log_start_value("object");
+  depth++;
+  if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; }
+  dom_parser.is_array[depth] = false;
+  SIMDJSON_TRY( visitor.visit_object_start(*this) );
+
+  {
+    auto key = advance();
+    if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; }
+    SIMDJSON_TRY( visitor.increment_count(*this) );
+    SIMDJSON_TRY( visitor.visit_key(*this, key) );
+  }
+
+object_field:
+  if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; }
+  {
+    auto value = advance();
+    switch (*value) {
+      case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin;
+      case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin;
+      default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break;
+    }
+  }
+
+object_continue:
+  switch (*advance()) {
+    case ',':
+      SIMDJSON_TRY( visitor.increment_count(*this) );
+      {
+        auto key = advance();
+        if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; }
+        SIMDJSON_TRY( visitor.visit_key(*this, key) );
+      }
+      goto object_field;
+    case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end;
+    default: log_error("No comma between object fields"); return TAPE_ERROR;
+  }
+
+scope_end:
+  depth--;
+  if (depth == 0) { goto document_end; }
+  if (dom_parser.is_array[depth]) { goto array_continue; }
+  goto object_continue;
+
+//
+// Array parser states
+//
+array_begin:
+  log_start_value("array");
+  depth++;
+  if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; }
+  dom_parser.is_array[depth] = true;
+  SIMDJSON_TRY( visitor.visit_array_start(*this) );
+  SIMDJSON_TRY( visitor.increment_count(*this) );
+
+array_value:
+  {
+    auto value = advance();
+    switch (*value) {
+      case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin;
+      case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin;
+      default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break;
+    }
+  }
+
+array_continue:
+  switch (*advance()) {
+    case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value;
+    case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end;
+    default: log_error("Missing comma between array values"); return TAPE_ERROR;
+  }
+
+document_end:
+  log_end_value("document");
+  SIMDJSON_TRY( visitor.visit_document_end(*this) );
+
+  dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]);
+
+  // If we didn't make it to the end, it's an error
+  if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) {
+    log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!");
+    return TAPE_ERROR;
+  }
+
+  return SUCCESS;
+
+} // walk_document()
+
+simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index)
+  : buf{_dom_parser.buf},
+    next_structural{&_dom_parser.structural_indexes[start_structural_index]},
+    dom_parser{_dom_parser} {
+}
+
+simdjson_inline const uint8_t *json_iterator::peek() const noexcept {
+  return &buf[*(next_structural)];
+}
+simdjson_inline const uint8_t *json_iterator::advance() noexcept {
+  return &buf[*(next_structural++)];
+}
+simdjson_inline size_t json_iterator::remaining_len() const noexcept {
+  return dom_parser.len - *(next_structural-1);
+}
+
+simdjson_inline bool json_iterator::at_eof() const noexcept {
+  return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes];
+}
+simdjson_inline bool json_iterator::at_beginning() const noexcept {
+  return next_structural == dom_parser.structural_indexes.get();
+}
+simdjson_inline uint8_t json_iterator::last_structural() const noexcept {
+  return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]];
+}
+
+simdjson_inline void json_iterator::log_value(const char *type) const noexcept {
+  logger::log_line(*this, "", type, "");
+}
+
+simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept {
+  logger::log_line(*this, "+", type, "");
+  if (logger::LOG_ENABLED) { logger::log_depth++; }
+}
+
+simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept {
+  if (logger::LOG_ENABLED) { logger::log_depth--; }
+  logger::log_line(*this, "-", type, "");
+}
+
+simdjson_inline void json_iterator::log_error(const char *error) const noexcept {
+  logger::log_line(*this, "", "ERROR", error);
+}
+
+template<typename V>
+simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept {
+  switch (*value) {
+    case '"': return visitor.visit_root_string(*this, value);
+    case 't': return visitor.visit_root_true_atom(*this, value);
+    case 'f': return visitor.visit_root_false_atom(*this, value);
+    case 'n': return visitor.visit_root_null_atom(*this, value);
+    case '-':
+    case '0': case '1': case '2': case '3': case '4':
+    case '5': case '6': case '7': case '8': case '9':
+      return visitor.visit_root_number(*this, value);
+    default:
+      log_error("Document starts with a non-value character");
+      return TAPE_ERROR;
+  }
+}
+template<typename V>
+simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept {
+  switch (*value) {
+    case '"': return visitor.visit_string(*this, value);
+    case 't': return visitor.visit_true_atom(*this, value);
+    case 'f': return visitor.visit_false_atom(*this, value);
+    case 'n': return visitor.visit_null_atom(*this, value);
+    case '-':
+    case '0': case '1': case '2': case '3': case '4':
+    case '5': case '6': case '7': case '8': case '9':
+      return visitor.visit_number(*this, value);
+    default:
+      log_error("Non-value found when value was expected!");
+      return TAPE_ERROR;
+  }
+}
+
+} // namespace stage2
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdjson
+/* end file src/generic/stage2/json_iterator.h */
+/* begin file src/generic/stage2/tape_writer.h */
+namespace simdjson {
+namespace arm64 {
+namespace {
+namespace stage2 {
+
+struct tape_writer {
+  /** The next place to write to tape */
+  uint64_t *next_tape_loc;
+
+  /** Write a signed 64-bit value to tape. */
+  simdjson_inline void append_s64(int64_t value) noexcept;
+
+  /** Write an unsigned 64-bit value to tape. */
+  simdjson_inline void append_u64(uint64_t value) noexcept;
+
+  /** Write a double value to tape. */
+  simdjson_inline void append_double(double value) noexcept;
+
+  /**
+   * Append a tape entry (an 8-bit type,and 56 bits worth of value).
+   */
+  simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept;
+
+  /**
+   * Skip the current tape entry without writing.
+   *
+   * Used to skip the start of the container, since we'll come back later to fill it in when the
+   * container ends.
+   */
+  simdjson_inline void skip() noexcept;
+
+  /**
+   * Skip the number of tape entries necessary to write a large u64 or i64.
+   */
+  simdjson_inline void skip_large_integer() noexcept;
+
+  /**
+   * Skip the number of tape entries necessary to write a double.
+   */
+  simdjson_inline void skip_double() noexcept;
+
+  /**
+   * Write a value to a known location on tape.
+   *
+   * Used to go back and write out the start of a container after the container ends.
+   */
+  simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept;
+
+private:
+  /**
+   * Append both the tape entry, and a supplementary value following it. Used for types that need
+   * all 64 bits, such as double and uint64_t.
+   */
+  template<typename T>
+  simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept;
+}; // struct number_writer
+
+simdjson_inline void tape_writer::append_s64(int64_t value) noexcept {
+  append2(0, value, internal::tape_type::INT64);
+}
+
+simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept {
+  append(0, internal::tape_type::UINT64);
+  *next_tape_loc = value;
+  next_tape_loc++;
+}
+
+/** Write a double value to tape. */
+simdjson_inline void tape_writer::append_double(double value) noexcept {
+  append2(0, value, internal::tape_type::DOUBLE);
+}
+
+simdjson_inline void tape_writer::skip() noexcept {
+  next_tape_loc++;
+}
+
+simdjson_inline void tape_writer::skip_large_integer() noexcept {
+  next_tape_loc += 2;
+}
+
+simdjson_inline void tape_writer::skip_double() noexcept {
+  next_tape_loc += 2;
+}
+
+simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept {
+  *next_tape_loc = val | ((uint64_t(char(t))) << 56);
+  next_tape_loc++;
+}
+
+template<typename T>
+simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept {
+  append(val, t);
+  static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!");
+  memcpy(next_tape_loc, &val2, sizeof(val2));
+  next_tape_loc++;
+}
+
+simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept {
+  tape_loc = val | ((uint64_t(char(t))) << 56);
+}
+
+} // namespace stage2
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdjson
+/* end file src/generic/stage2/tape_writer.h */
+
+namespace simdjson {
+namespace arm64 {
+namespace {
+namespace stage2 {
+
+struct tape_builder {
+  template<bool STREAMING>
+  simdjson_warn_unused static simdjson_inline error_code parse_document(
+    dom_parser_implementation &dom_parser,
+    dom::document &doc) noexcept;
+
+  /** Called when a non-empty document starts. */
+  simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept;
+  /** Called when a non-empty document ends without error. */
+  simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept;
+
+  /** Called when a non-empty array starts. */
+  simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept;
+  /** Called when a non-empty array ends. */
+  simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept;
+  /** Called when an empty array is found. */
+  simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept;
+
+  /** Called when a non-empty object starts. */
+  simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept;
+  /**
+   * Called when a key in a field is encountered.
+   *
+   * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array
+   * will be called after this with the field value.
+   */
+  simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept;
+  /** Called when a non-empty object ends. */
+  simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept;
+  /** Called when an empty object is found. */
+  simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept;
+
+  /**
+   * Called when a string, number, boolean or null is found.
+   */
+  simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept;
+  /**
+   * Called when a string, number, boolean or null is found at the top level of a document (i.e.
+   * when there is no array or object and the entire document is a single string, number, boolean or
+   * null.
+   *
+   * This is separate from primitive() because simdjson's normal primitive parsing routines assume
+   * there is at least one more token after the value, which is only true in an array or object.
+   */
+  simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept;
+
+  simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept;
+
+  simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept;
+
+  /** Called each time a new field or element in an array or object is found. */
+  simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept;
+
+  /** Next location to write to tape */
+  tape_writer tape;
+private:
+  /** Next write location in the string buf for stage 2 parsing */
+  uint8_t *current_string_buf_loc;
+
+  simdjson_inline tape_builder(dom::document &doc) noexcept;
+
+  simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept;
+  simdjson_inline void start_container(json_iterator &iter) noexcept;
+  simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept;
+  simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept;
+  simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept;
+  simdjson_inline void on_end_string(uint8_t *dst) noexcept;
+}; // class tape_builder
+
+template<bool STREAMING>
+simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document(
+    dom_parser_implementation &dom_parser,
+    dom::document &doc) noexcept {
+  dom_parser.doc = &doc;
+  json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0);
+  tape_builder builder(doc);
+  return iter.walk_document<STREAMING>(builder);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept {
+  return iter.visit_root_primitive(*this, value);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept {
+  return iter.visit_primitive(*this, value);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept {
+  return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept {
+  return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept {
+  start_container(iter);
+  return SUCCESS;
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept {
+  start_container(iter);
+  return SUCCESS;
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept {
+  start_container(iter);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept {
+  return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept {
+  return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept {
+  constexpr uint32_t start_tape_index = 0;
+  tape.append(start_tape_index, internal::tape_type::ROOT);
+  tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT);
+  return SUCCESS;
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept {
+  return visit_string(iter, key, true);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept {
+  iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1
+  return SUCCESS;
+}
+
+simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept {
+  iter.log_value(key ? "key" : "string");
+  uint8_t *dst = on_start_string(iter);
+  dst = stringparsing::parse_string(value+1, dst);
+  if (dst == nullptr) {
+    iter.log_error("Invalid escape in string");
+    return STRING_ERROR;
+  }
+  on_end_string(dst);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept {
+  return visit_string(iter, value);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("number");
+  return numberparsing::parse_number(value, tape);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept {
+  //
+  // We need to make a copy to make sure that the string is space terminated.
+  // This is not about padding the input, which should already padded up
+  // to len + SIMDJSON_PADDING. However, we have no control at this stage
+  // on how the padding was done. What if the input string was padded with nulls?
+  // It is quite common for an input string to have an extra null character (C string).
+  // We do not want to allow 9\0 (where \0 is the null character) inside a JSON
+  // document, but the string "9\0" by itself is fine. So we make a copy and
+  // pad the input with spaces when we know that there is just one input element.
+  // This copy is relatively expensive, but it will almost never be called in
+  // practice unless you are in the strange scenario where you have many JSON
+  // documents made of single atoms.
+  //
+  std::unique_ptr<uint8_t[]>copy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]);
+  if (copy.get() == nullptr) { return MEMALLOC; }
+  std::memcpy(copy.get(), value, iter.remaining_len());
+  std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING);
+  error_code error = visit_number(iter, copy.get());
+  return error;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("true");
+  if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::TRUE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("true");
+  if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::TRUE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("false");
+  if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::FALSE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("false");
+  if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::FALSE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("null");
+  if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::NULL_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("null");
+  if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::NULL_VALUE);
+  return SUCCESS;
+}
+
+// private:
+
+simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept {
+  return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get());
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept {
+  auto start_index = next_tape_index(iter);
+  tape.append(start_index+2, start);
+  tape.append(start_index, end);
+  return SUCCESS;
+}
+
+simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept {
+  iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter);
+  iter.dom_parser.open_containers[iter.depth].count = 0;
+  tape.skip(); // We don't actually *write* the start element until the end.
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept {
+  // Write the ending tape element, pointing at the start location
+  const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index;
+  tape.append(start_tape_index, end);
+  // Write the start tape element, pointing at the end location (and including count)
+  // count can overflow if it exceeds 24 bits... so we saturate
+  // the convention being that a cnt of 0xffffff or more is undetermined in value (>=  0xffffff).
+  const uint32_t count = iter.dom_parser.open_containers[iter.depth].count;
+  const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count;
+  tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start);
+  return SUCCESS;
+}
+
+simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept {
+  // we advance the point, accounting for the fact that we have a NULL termination
+  tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING);
+  return current_string_buf_loc + sizeof(uint32_t);
+}
+
+simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept {
+  uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t)));
+  // TODO check for overflow in case someone has a crazy string (>=4GB?)
+  // But only add the overflow check when the document itself exceeds 4GB
+  // Currently unneeded because we refuse to parse docs larger or equal to 4GB.
+  memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t));
+  // NULL termination is still handy if you expect all your strings to
+  // be NULL terminated? It comes at a small cost
+  *dst = 0;
+  current_string_buf_loc = dst + 1;
+}
+
+} // namespace stage2
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdjson
+/* end file src/generic/stage2/tape_builder.h */
+
+//
+// Implementation-specific overrides
+//
+namespace simdjson {
+namespace arm64 {
+namespace {
+namespace stage1 {
+
+simdjson_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) {
+  // On ARM, we don't short-circuit this if there are no backslashes, because the branch gives us no
+  // benefit and therefore makes things worse.
+  // if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; }
+  return find_escaped_branchless(backslash);
+}
+
+} // namespace stage1
+} // unnamed namespace
+
+simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept {
+  return arm64::stage1::json_minifier::minify<64>(buf, len, dst, dst_len);
+}
+
+simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept {
+  this->buf = _buf;
+  this->len = _len;
+  return arm64::stage1::json_structural_indexer::index<64>(buf, len, *this, streaming);
+}
+
+simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
+  return arm64::stage1::generic_validate_utf8(buf,len);
+}
+
+simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept {
+  return stage2::tape_builder::parse_document<false>(*this, _doc);
+}
+
+simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept {
+  return stage2::tape_builder::parse_document<true>(*this, _doc);
+}
+
+simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst) const noexcept {
+  return arm64::stringparsing::parse_string(src, dst);
+}
+
+simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept {
+  auto error = stage1(_buf, _len, stage1_mode::regular);
+  if (error) { return error; }
+  return stage2(_doc);
+}
+
+} // namespace arm64
+} // namespace simdjson
+
+/* begin file include/simdjson/arm64/end.h */
+/* end file include/simdjson/arm64/end.h */
+/* end file src/arm64/dom_parser_implementation.cpp */
+#endif
+#if SIMDJSON_IMPLEMENTATION_FALLBACK
+/* begin file src/fallback/implementation.cpp */
+/* begin file include/simdjson/fallback/begin.h */
+// redefining SIMDJSON_IMPLEMENTATION to "fallback"
+// #define SIMDJSON_IMPLEMENTATION fallback
+/* end file include/simdjson/fallback/begin.h */
+
+namespace simdjson {
+namespace fallback {
+
+simdjson_warn_unused error_code implementation::create_dom_parser_implementation(
+  size_t capacity,
+  size_t max_depth,
+  std::unique_ptr<internal::dom_parser_implementation>& dst
+) const noexcept {
+  dst.reset( new (std::nothrow) dom_parser_implementation() );
+  if (!dst) { return MEMALLOC; }
+  if (auto err = dst->set_capacity(capacity))
+    return err;
+  if (auto err = dst->set_max_depth(max_depth))
+    return err;
+  return SUCCESS;
+}
+
+} // namespace fallback
+} // namespace simdjson
+
+/* begin file include/simdjson/fallback/end.h */
+/* end file include/simdjson/fallback/end.h */
+/* end file src/fallback/implementation.cpp */
+/* begin file src/fallback/dom_parser_implementation.cpp */
+/* begin file include/simdjson/fallback/begin.h */
+// redefining SIMDJSON_IMPLEMENTATION to "fallback"
+// #define SIMDJSON_IMPLEMENTATION fallback
+/* end file include/simdjson/fallback/begin.h */
+
+//
+// Stage 1
+//
+/* begin file src/generic/stage1/find_next_document_index.h */
+namespace simdjson {
+namespace fallback {
+namespace {
+
+/**
+  * This algorithm is used to quickly identify the last structural position that
+  * makes up a complete document.
+  *
+  * It does this by going backwards and finding the last *document boundary* (a
+  * place where one value follows another without a comma between them). If the
+  * last document (the characters after the boundary) has an equal number of
+  * start and end brackets, it is considered complete.
+  *
+  * Simply put, we iterate over the structural characters, starting from
+  * the end. We consider that we found the end of a JSON document when the
+  * first element of the pair is NOT one of these characters: '{' '[' ':' ','
+  * and when the second element is NOT one of these characters: '}' ']' ':' ','.
+  *
+  * This simple comparison works most of the time, but it does not cover cases
+  * where the batch's structural indexes contain a perfect amount of documents.
+  * In such a case, we do not have access to the structural index which follows
+  * the last document, therefore, we do not have access to the second element in
+  * the pair, and that means we cannot identify the last document. To fix this
+  * issue, we keep a count of the open and closed curly/square braces we found
+  * while searching for the pair. When we find a pair AND the count of open and
+  * closed curly/square braces is the same, we know that we just passed a
+  * complete document, therefore the last json buffer location is the end of the
+  * batch.
+  */
+simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) {
+  // Variant: do not count separately, just figure out depth
+  if(parser.n_structural_indexes == 0) { return 0; }
+  auto arr_cnt = 0;
+  auto obj_cnt = 0;
+  for (auto i = parser.n_structural_indexes - 1; i > 0; i--) {
+    auto idxb = parser.structural_indexes[i];
+    switch (parser.buf[idxb]) {
+    case ':':
+    case ',':
+      continue;
+    case '}':
+      obj_cnt--;
+      continue;
+    case ']':
+      arr_cnt--;
+      continue;
+    case '{':
+      obj_cnt++;
+      break;
+    case '[':
+      arr_cnt++;
+      break;
+    }
+    auto idxa = parser.structural_indexes[i - 1];
+    switch (parser.buf[idxa]) {
+    case '{':
+    case '[':
+    case ':':
+    case ',':
+      continue;
+    }
+    // Last document is complete, so the next document will appear after!
+    if (!arr_cnt && !obj_cnt) {
+      return parser.n_structural_indexes;
+    }
+    // Last document is incomplete; mark the document at i + 1 as the next one
+    return i;
+  }
+  // If we made it to the end, we want to finish counting to see if we have a full document.
+  switch (parser.buf[parser.structural_indexes[0]]) {
+    case '}':
+      obj_cnt--;
+      break;
+    case ']':
+      arr_cnt--;
+      break;
+    case '{':
+      obj_cnt++;
+      break;
+    case '[':
+      arr_cnt++;
+      break;
+  }
+  if (!arr_cnt && !obj_cnt) {
+    // We have a complete document.
+    return parser.n_structural_indexes;
+  }
+  return 0;
+}
+
+} // unnamed namespace
+} // namespace fallback
+} // namespace simdjson
+/* end file src/generic/stage1/find_next_document_index.h */
+
+namespace simdjson {
+namespace fallback {
+namespace {
+namespace stage1 {
+
+class structural_scanner {
+public:
+
+simdjson_inline structural_scanner(dom_parser_implementation &_parser, stage1_mode _partial)
+  : buf{_parser.buf},
+    next_structural_index{_parser.structural_indexes.get()},
+    parser{_parser},
+    len{static_cast<uint32_t>(_parser.len)},
+    partial{_partial} {
+}
+
+simdjson_inline void add_structural() {
+  *next_structural_index = idx;
+  next_structural_index++;
+}
+
+simdjson_inline bool is_continuation(uint8_t c) {
+  return (c & 0xc0) == 0x80;
+}
+
+simdjson_inline void validate_utf8_character() {
+  // Continuation
+  if (simdjson_unlikely((buf[idx] & 0x40) == 0)) {
+    // extra continuation
+    error = UTF8_ERROR;
+    idx++;
+    return;
+  }
+
+  // 2-byte
+  if ((buf[idx] & 0x20) == 0) {
+    // missing continuation
+    if (simdjson_unlikely(idx+1 > len || !is_continuation(buf[idx+1]))) {
+      if (idx+1 > len && is_streaming(partial)) { idx = len; return; }
+      error = UTF8_ERROR;
+      idx++;
+      return;
+    }
+    // overlong: 1100000_ 10______
+    if (buf[idx] <= 0xc1) { error = UTF8_ERROR; }
+    idx += 2;
+    return;
+  }
+
+  // 3-byte
+  if ((buf[idx] & 0x10) == 0) {
+    // missing continuation
+    if (simdjson_unlikely(idx+2 > len || !is_continuation(buf[idx+1]) || !is_continuation(buf[idx+2]))) {
+      if (idx+2 > len && is_streaming(partial)) { idx = len; return; }
+      error = UTF8_ERROR;
+      idx++;
+      return;
+    }
+    // overlong: 11100000 100_____ ________
+    if (buf[idx] == 0xe0 && buf[idx+1] <= 0x9f) { error = UTF8_ERROR; }
+    // surrogates: U+D800-U+DFFF 11101101 101_____
+    if (buf[idx] == 0xed && buf[idx+1] >= 0xa0) { error = UTF8_ERROR; }
+    idx += 3;
+    return;
+  }
+
+  // 4-byte
+  // missing continuation
+  if (simdjson_unlikely(idx+3 > len || !is_continuation(buf[idx+1]) || !is_continuation(buf[idx+2]) || !is_continuation(buf[idx+3]))) {
+    if (idx+2 > len && is_streaming(partial)) { idx = len; return; }
+    error = UTF8_ERROR;
+    idx++;
+    return;
+  }
+  // overlong: 11110000 1000____ ________ ________
+  if (buf[idx] == 0xf0 && buf[idx+1] <= 0x8f) { error = UTF8_ERROR; }
+  // too large: > U+10FFFF:
+  // 11110100 (1001|101_)____
+  // 1111(1___|011_|0101) 10______
+  // also includes 5, 6, 7 and 8 byte characters:
+  // 11111___
+  if (buf[idx] == 0xf4 && buf[idx+1] >= 0x90) { error = UTF8_ERROR; }
+  if (buf[idx] >= 0xf5) { error = UTF8_ERROR; }
+  idx += 4;
+}
+
+// Returns true if the string is unclosed.
+simdjson_inline bool validate_string() {
+  idx++; // skip first quote
+  while (idx < len && buf[idx] != '"') {
+    if (buf[idx] == '\\') {
+      idx += 2;
+    } else if (simdjson_unlikely(buf[idx] & 0x80)) {
+      validate_utf8_character();
+    } else {
+      if (buf[idx] < 0x20) { error = UNESCAPED_CHARS; }
+      idx++;
+    }
+  }
+  if (idx >= len) { return true; }
+  return false;
+}
+
+simdjson_inline bool is_whitespace_or_operator(uint8_t c) {
+  switch (c) {
+    case '{': case '}': case '[': case ']': case ',': case ':':
+    case ' ': case '\r': case '\n': case '\t':
+      return true;
+    default:
+      return false;
+  }
+}
+
+//
+// Parse the entire input in STEP_SIZE-byte chunks.
+//
+simdjson_inline error_code scan() {
+  bool unclosed_string = false;
+  for (;idx<len;idx++) {
+    switch (buf[idx]) {
+      // String
+      case '"':
+        add_structural();
+        unclosed_string |= validate_string();
+        break;
+      // Operator
+      case '{': case '}': case '[': case ']': case ',': case ':':
+        add_structural();
+        break;
+      // Whitespace
+      case ' ': case '\r': case '\n': case '\t':
+        break;
+      // Primitive or invalid character (invalid characters will be checked in stage 2)
+      default:
+        // Anything else, add the structural and go until we find the next one
+        add_structural();
+        while (idx+1<len && !is_whitespace_or_operator(buf[idx+1])) {
+          idx++;
+        };
+        break;
+    }
+  }
+  // We pad beyond.
+  // https://github.com/simdjson/simdjson/issues/906
+  // See json_structural_indexer.h for an explanation.
+  *next_structural_index = len; // assumed later in partial == stage1_mode::streaming_final
+  next_structural_index[1] = len;
+  next_structural_index[2] = 0;
+  parser.n_structural_indexes = uint32_t(next_structural_index - parser.structural_indexes.get());
+  if (simdjson_unlikely(parser.n_structural_indexes == 0)) { return EMPTY; }
+  parser.next_structural_index = 0;
+  if (partial == stage1_mode::streaming_partial) {
+    if(unclosed_string) {
+      parser.n_structural_indexes--;
+      if (simdjson_unlikely(parser.n_structural_indexes == 0)) { return CAPACITY; }
+    }
+    // We truncate the input to the end of the last complete document (or zero).
+    auto new_structural_indexes = find_next_document_index(parser);
+    if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) {
+      if(parser.structural_indexes[0] == 0) {
+        // If the buffer is partial and we started at index 0 but the document is
+        // incomplete, it's too big to parse.
+        return CAPACITY;
+      } else {
+        // It is possible that the document could be parsed, we just had a lot
+        // of white space.
+        parser.n_structural_indexes = 0;
+        return EMPTY;
+      }
+    }
+    parser.n_structural_indexes = new_structural_indexes;
+  } else if(partial == stage1_mode::streaming_final) {
+    if(unclosed_string) { parser.n_structural_indexes--; }
+    // We truncate the input to the end of the last complete document (or zero).
+    // Because partial == stage1_mode::streaming_final, it means that we may
+    // silently ignore trailing garbage. Though it sounds bad, we do it
+    // deliberately because many people who have streams of JSON documents
+    // will truncate them for processing. E.g., imagine that you are uncompressing
+    // the data from a size file or receiving it in chunks from the network. You
+    // may not know where exactly the last document will be. Meanwhile the
+    // document_stream instances allow people to know the JSON documents they are
+    // parsing (see the iterator.source() method).
+    parser.n_structural_indexes = find_next_document_index(parser);
+    // We store the initial n_structural_indexes so that the client can see
+    // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes,
+    // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len,
+    // otherwise, it will copy some prior index.
+    parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes];
+    // This next line is critical, do not change it unless you understand what you are
+    // doing.
+    parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len);
+    if (parser.n_structural_indexes == 0) { return EMPTY; }
+  } else if(unclosed_string) { error = UNCLOSED_STRING; }
+  return error;
+}
+
+private:
+  const uint8_t *buf;
+  uint32_t *next_structural_index;
+  dom_parser_implementation &parser;
+  uint32_t len;
+  uint32_t idx{0};
+  error_code error{SUCCESS};
+  stage1_mode partial;
+}; // structural_scanner
+
+} // namespace stage1
+} // unnamed namespace
+
+simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode partial) noexcept {
+  this->buf = _buf;
+  this->len = _len;
+  stage1::structural_scanner scanner(*this, partial);
+  return scanner.scan();
+}
+
+// big table for the minifier
+static uint8_t jump_table[256 * 3] = {
+    0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
+    1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1,
+    1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
+    0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0,
+    1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
+    1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
+    0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
+    1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
+    1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
+    0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
+    1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
+    1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
+    0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
+    1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
+    1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
+    0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
+    1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
+    1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
+    0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
+    1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
+    1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
+    0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
+    1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
+    1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
+    0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
+    1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
+    1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
+    0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
+    1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
+    1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
+    0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
+};
+
+simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept {
+  size_t i = 0, pos = 0;
+  uint8_t quote = 0;
+  uint8_t nonescape = 1;
+
+  while (i < len) {
+    unsigned char c = buf[i];
+    uint8_t *meta = jump_table + 3 * c;
+
+    quote = quote ^ (meta[0] & nonescape);
+    dst[pos] = c;
+    pos += meta[2] | quote;
+
+    i += 1;
+    nonescape = uint8_t(~nonescape) | (meta[1]);
+  }
+  dst_len = pos; // we intentionally do not work with a reference
+  // for fear of aliasing
+  return quote ? UNCLOSED_STRING : SUCCESS;
+}
+
+// credit: based on code from Google Fuchsia (Apache Licensed)
+simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
+  const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
+  uint64_t pos = 0;
+  uint32_t code_point = 0;
+  while (pos < len) {
+    // check of the next 8 bytes are ascii.
+    uint64_t next_pos = pos + 16;
+    if (next_pos <= len) { // if it is safe to read 8 more bytes, check that they are ascii
+      uint64_t v1;
+      memcpy(&v1, data + pos, sizeof(uint64_t));
+      uint64_t v2;
+      memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
+      uint64_t v{v1 | v2};
+      if ((v & 0x8080808080808080) == 0) {
+        pos = next_pos;
+        continue;
+      }
+    }
+    unsigned char byte = data[pos];
+    if (byte < 0x80) {
+      pos++;
+      continue;
+    } else if ((byte & 0xe0) == 0xc0) {
+      next_pos = pos + 2;
+      if (next_pos > len) { return false; }
+      if ((data[pos + 1] & 0xc0) != 0x80) { return false; }
+      // range check
+      code_point = (byte & 0x1f) << 6 | (data[pos + 1] & 0x3f);
+      if (code_point < 0x80 || 0x7ff < code_point) { return false; }
+    } else if ((byte & 0xf0) == 0xe0) {
+      next_pos = pos + 3;
+      if (next_pos > len) { return false; }
+      if ((data[pos + 1] & 0xc0) != 0x80) { return false; }
+      if ((data[pos + 2] & 0xc0) != 0x80) { return false; }
+      // range check
+      code_point = (byte & 0x0f) << 12 |
+                   (data[pos + 1] & 0x3f) << 6 |
+                   (data[pos + 2] & 0x3f);
+      if (code_point < 0x800 || 0xffff < code_point ||
+          (0xd7ff < code_point && code_point < 0xe000)) {
+        return false;
+      }
+    } else if ((byte & 0xf8) == 0xf0) { // 0b11110000
+      next_pos = pos + 4;
+      if (next_pos > len) { return false; }
+      if ((data[pos + 1] & 0xc0) != 0x80) { return false; }
+      if ((data[pos + 2] & 0xc0) != 0x80) { return false; }
+      if ((data[pos + 3] & 0xc0) != 0x80) { return false; }
+      // range check
+      code_point =
+          (byte & 0x07) << 18 | (data[pos + 1] & 0x3f) << 12 |
+          (data[pos + 2] & 0x3f) << 6 | (data[pos + 3] & 0x3f);
+      if (code_point <= 0xffff || 0x10ffff < code_point) { return false; }
+    } else {
+      // we may have a continuation
+      return false;
+    }
+    pos = next_pos;
+  }
+  return true;
+}
+
+} // namespace fallback
+} // namespace simdjson
+
+//
+// Stage 2
+//
+/* begin file src/generic/stage2/stringparsing.h */
+// This file contains the common code every implementation uses
+// It is intended to be included multiple times and compiled multiple times
+
+namespace simdjson {
+namespace fallback {
+namespace {
+/// @private
+namespace stringparsing {
+
+// begin copypasta
+// These chars yield themselves: " \ /
+// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab
+// u not handled in this table as it's complex
+static const uint8_t escape_map[256] = {
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0, // 0x0.
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0x22, 0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0x2f,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0, // 0x4.
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0x5c, 0, 0,    0, // 0x5.
+    0, 0, 0x08, 0, 0,    0, 0x0c, 0, 0, 0, 0, 0, 0,    0, 0x0a, 0, // 0x6.
+    0, 0, 0x0d, 0, 0x09, 0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0, // 0x7.
+
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+};
+
+// handle a unicode codepoint
+// write appropriate values into dest
+// src will advance 6 bytes or 12 bytes
+// dest will advance a variable amount (return via pointer)
+// return true if the unicode codepoint was valid
+// We work in little-endian then swap at write time
+simdjson_warn_unused
+simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr,
+                                            uint8_t **dst_ptr) {
+  // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the
+  // conversion isn't valid; we defer the check for this to inside the
+  // multilingual plane check
+  uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2);
+  *src_ptr += 6;
+
+  // If we found a high surrogate, we must
+  // check for low surrogate for characters
+  // outside the Basic
+  // Multilingual Plane.
+  if (code_point >= 0xd800 && code_point < 0xdc00) {
+    const uint8_t *src_data = *src_ptr;
+    /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */
+    if (((src_data[0] << 8) | src_data[1]) != ((static_cast<uint8_t> ('\\') << 8) | static_cast<uint8_t> ('u'))) {
+      return false;
+    }
+    uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2);
+
+    // We have already checked that the high surrogate is valid and
+    // (code_point - 0xd800) < 1024.
+    //
+    // Check that code_point_2 is in the range 0xdc00..0xdfff
+    // and that code_point_2 was parsed from valid hex.
+    uint32_t low_bit = code_point_2 - 0xdc00;
+    if (low_bit >> 10) {
+      return false;
+    }
+
+    code_point =
+        (((code_point - 0xd800) << 10) | low_bit) + 0x10000;
+    *src_ptr += 6;
+  } else if (code_point >= 0xdc00 && code_point <= 0xdfff) {
+      // If we encounter a low surrogate (not preceded by a high surrogate)
+      // then we have an error.
+      return false;
+  }
+  size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr);
+  *dst_ptr += offset;
+  return offset > 0;
+}
+
+/**
+ * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There
+ * must be an unescaped quote terminating the string. It returns the final output
+ * position as pointer. In case of error (e.g., the string has bad escaped codes),
+ * then null_nullptrptr is returned. It is assumed that the output buffer is large
+ * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes +
+ * SIMDJSON_PADDING bytes.
+ */
+simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) {
+  while (1) {
+    // Copy the next n bytes, and find the backslash and quote in them.
+    auto bs_quote = backslash_and_quote::copy_and_find(src, dst);
+    // If the next thing is the end quote, copy and return
+    if (bs_quote.has_quote_first()) {
+      // we encountered quotes first. Move dst to point to quotes and exit
+      return dst + bs_quote.quote_index();
+    }
+    if (bs_quote.has_backslash()) {
+      /* find out where the backspace is */
+      auto bs_dist = bs_quote.backslash_index();
+      uint8_t escape_char = src[bs_dist + 1];
+      /* we encountered backslash first. Handle backslash */
+      if (escape_char == 'u') {
+        /* move src/dst up to the start; they will be further adjusted
+           within the unicode codepoint handling code. */
+        src += bs_dist;
+        dst += bs_dist;
+        if (!handle_unicode_codepoint(&src, &dst)) {
+          return nullptr;
+        }
+      } else {
+        /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and
+         * write bs_dist+1 characters to output
+         * note this may reach beyond the part of the buffer we've actually
+         * seen. I think this is ok */
+        uint8_t escape_result = escape_map[escape_char];
+        if (escape_result == 0u) {
+          return nullptr; /* bogus escape value is an error */
+        }
+        dst[bs_dist] = escape_result;
+        src += bs_dist + 2;
+        dst += bs_dist + 1;
+      }
+    } else {
+      /* they are the same. Since they can't co-occur, it means we
+       * encountered neither. */
+      src += backslash_and_quote::BYTES_PROCESSED;
+      dst += backslash_and_quote::BYTES_PROCESSED;
+    }
+  }
+  /* can't be reached */
+  return nullptr;
+}
+
+} // namespace stringparsing
+} // unnamed namespace
+} // namespace fallback
+} // namespace simdjson
+/* end file src/generic/stage2/stringparsing.h */
+/* begin file src/generic/stage2/tape_builder.h */
+/* begin file src/generic/stage2/json_iterator.h */
+/* begin file src/generic/stage2/logger.h */
+// This is for an internal-only stage 2 specific logger.
+// Set LOG_ENABLED = true to log what stage 2 is doing!
+namespace simdjson {
+namespace fallback {
+namespace {
+namespace logger {
+
+  static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------";
+
+#if SIMDJSON_VERBOSE_LOGGING
+  static constexpr const bool LOG_ENABLED = true;
+#else
+  static constexpr const bool LOG_ENABLED = false;
+#endif
+  static constexpr const int LOG_EVENT_LEN = 20;
+  static constexpr const int LOG_BUFFER_LEN = 30;
+  static constexpr const int LOG_SMALL_BUFFER_LEN = 10;
+  static constexpr const int LOG_INDEX_LEN = 5;
+
+  static int log_depth; // Not threadsafe. Log only.
+
+  // Helper to turn unprintable or newline characters into spaces
+  static simdjson_inline char printable_char(char c) {
+    if (c >= 0x20) {
+      return c;
+    } else {
+      return ' ';
+    }
+  }
+
+  // Print the header and set up log_start
+  static simdjson_inline void log_start() {
+    if (LOG_ENABLED) {
+      log_depth = 0;
+      printf("\n");
+      printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#");
+      printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES);
+    }
+  }
+
+  simdjson_unused static simdjson_inline void log_string(const char *message) {
+    if (LOG_ENABLED) {
+      printf("%s\n", message);
+    }
+  }
+
+  // Logs a single line from the stage 2 DOM parser
+  template<typename S>
+  static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) {
+    if (LOG_ENABLED) {
+      printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title);
+      auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1;
+      auto next_index = structurals.next_structural;
+      auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast<const uint8_t*>("                                                       ");
+      auto next = &structurals.buf[*next_index];
+      {
+        // Print the next N characters in the buffer.
+        printf("| ");
+        // Otherwise, print the characters starting from the buffer position.
+        // Print spaces for unprintable or newline characters.
+        for (int i=0;i<LOG_BUFFER_LEN;i++) {
+          printf("%c", printable_char(current[i]));
+        }
+        printf(" ");
+        // Print the next N characters in the buffer.
+        printf("| ");
+        // Otherwise, print the characters starting from the buffer position.
+        // Print spaces for unprintable or newline characters.
+        for (int i=0;i<LOG_SMALL_BUFFER_LEN;i++) {
+          printf("%c", printable_char(next[i]));
+        }
+        printf(" ");
+      }
+      if (current_index) {
+        printf("| %*u ", LOG_INDEX_LEN, *current_index);
+      } else {
+        printf("| %-*s ", LOG_INDEX_LEN, "");
+      }
+      // printf("| %*u ", LOG_INDEX_LEN, structurals.next_tape_index());
+      printf("| %-s ", detail);
+      printf("|\n");
+    }
+  }
+
+} // namespace logger
+} // unnamed namespace
+} // namespace fallback
+} // namespace simdjson
+/* end file src/generic/stage2/logger.h */
+
+namespace simdjson {
+namespace fallback {
+namespace {
+namespace stage2 {
+
+class json_iterator {
+public:
+  const uint8_t* const buf;
+  uint32_t *next_structural;
+  dom_parser_implementation &dom_parser;
+  uint32_t depth{0};
+
+  /**
+   * Walk the JSON document.
+   *
+   * The visitor receives callbacks when values are encountered. All callbacks pass the iterator as
+   * the first parameter; some callbacks have other parameters as well:
+   *
+   * - visit_document_start() - at the beginning.
+   * - visit_document_end() - at the end (if things were successful).
+   *
+   * - visit_array_start() - at the start `[` of a non-empty array.
+   * - visit_array_end() - at the end `]` of a non-empty array.
+   * - visit_empty_array() - when an empty array is encountered.
+   *
+   * - visit_object_end() - at the start `]` of a non-empty object.
+   * - visit_object_start() - at the end `]` of a non-empty object.
+   * - visit_empty_object() - when an empty object is encountered.
+   * - visit_key(const uint8_t *key) - when a key in an object field is encountered. key is
+   *                                   guaranteed to point at the first quote of the string (`"key"`).
+   * - visit_primitive(const uint8_t *value) - when a value is a string, number, boolean or null.
+   * - visit_root_primitive(iter, uint8_t *value) - when the top-level value is a string, number, boolean or null.
+   *
+   * - increment_count(iter) - each time a value is found in an array or object.
+   */
+  template<bool STREAMING, typename V>
+  simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept;
+
+  /**
+   * Create an iterator capable of walking a JSON document.
+   *
+   * The document must have already passed through stage 1.
+   */
+  simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index);
+
+  /**
+   * Look at the next token.
+   *
+   * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)).
+   *
+   * They may include invalid JSON as well (such as `1.2.3` or `ture`).
+   */
+  simdjson_inline const uint8_t *peek() const noexcept;
+  /**
+   * Advance to the next token.
+   *
+   * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)).
+   *
+   * They may include invalid JSON as well (such as `1.2.3` or `ture`).
+   */
+  simdjson_inline const uint8_t *advance() noexcept;
+  /**
+   * Get the remaining length of the document, from the start of the current token.
+   */
+  simdjson_inline size_t remaining_len() const noexcept;
+  /**
+   * Check if we are at the end of the document.
+   *
+   * If this is true, there are no more tokens.
+   */
+  simdjson_inline bool at_eof() const noexcept;
+  /**
+   * Check if we are at the beginning of the document.
+   */
+  simdjson_inline bool at_beginning() const noexcept;
+  simdjson_inline uint8_t last_structural() const noexcept;
+
+  /**
+   * Log that a value has been found.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_value(const char *type) const noexcept;
+  /**
+   * Log the start of a multipart value.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_start_value(const char *type) const noexcept;
+  /**
+   * Log the end of a multipart value.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_end_value(const char *type) const noexcept;
+  /**
+   * Log an error.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_error(const char *error) const noexcept;
+
+  template<typename V>
+  simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept;
+  template<typename V>
+  simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept;
+};
+
+template<bool STREAMING, typename V>
+simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept {
+  logger::log_start();
+
+  //
+  // Start the document
+  //
+  if (at_eof()) { return EMPTY; }
+  log_start_value("document");
+  SIMDJSON_TRY( visitor.visit_document_start(*this) );
+
+  //
+  // Read first value
+  //
+  {
+    auto value = advance();
+
+    // Make sure the outer object or array is closed before continuing; otherwise, there are ways we
+    // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906
+    if (!STREAMING) {
+      switch (*value) {
+        case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break;
+        case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break;
+      }
+    }
+
+    switch (*value) {
+      case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin;
+      case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin;
+      default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break;
+    }
+  }
+  goto document_end;
+
+//
+// Object parser states
+//
+object_begin:
+  log_start_value("object");
+  depth++;
+  if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; }
+  dom_parser.is_array[depth] = false;
+  SIMDJSON_TRY( visitor.visit_object_start(*this) );
+
+  {
+    auto key = advance();
+    if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; }
+    SIMDJSON_TRY( visitor.increment_count(*this) );
+    SIMDJSON_TRY( visitor.visit_key(*this, key) );
+  }
+
+object_field:
+  if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; }
+  {
+    auto value = advance();
+    switch (*value) {
+      case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin;
+      case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin;
+      default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break;
+    }
+  }
+
+object_continue:
+  switch (*advance()) {
+    case ',':
+      SIMDJSON_TRY( visitor.increment_count(*this) );
+      {
+        auto key = advance();
+        if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; }
+        SIMDJSON_TRY( visitor.visit_key(*this, key) );
+      }
+      goto object_field;
+    case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end;
+    default: log_error("No comma between object fields"); return TAPE_ERROR;
+  }
+
+scope_end:
+  depth--;
+  if (depth == 0) { goto document_end; }
+  if (dom_parser.is_array[depth]) { goto array_continue; }
+  goto object_continue;
+
+//
+// Array parser states
+//
+array_begin:
+  log_start_value("array");
+  depth++;
+  if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; }
+  dom_parser.is_array[depth] = true;
+  SIMDJSON_TRY( visitor.visit_array_start(*this) );
+  SIMDJSON_TRY( visitor.increment_count(*this) );
+
+array_value:
+  {
+    auto value = advance();
+    switch (*value) {
+      case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin;
+      case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin;
+      default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break;
+    }
+  }
+
+array_continue:
+  switch (*advance()) {
+    case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value;
+    case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end;
+    default: log_error("Missing comma between array values"); return TAPE_ERROR;
+  }
+
+document_end:
+  log_end_value("document");
+  SIMDJSON_TRY( visitor.visit_document_end(*this) );
+
+  dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]);
+
+  // If we didn't make it to the end, it's an error
+  if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) {
+    log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!");
+    return TAPE_ERROR;
+  }
+
+  return SUCCESS;
+
+} // walk_document()
+
+simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index)
+  : buf{_dom_parser.buf},
+    next_structural{&_dom_parser.structural_indexes[start_structural_index]},
+    dom_parser{_dom_parser} {
+}
+
+simdjson_inline const uint8_t *json_iterator::peek() const noexcept {
+  return &buf[*(next_structural)];
+}
+simdjson_inline const uint8_t *json_iterator::advance() noexcept {
+  return &buf[*(next_structural++)];
+}
+simdjson_inline size_t json_iterator::remaining_len() const noexcept {
+  return dom_parser.len - *(next_structural-1);
+}
+
+simdjson_inline bool json_iterator::at_eof() const noexcept {
+  return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes];
+}
+simdjson_inline bool json_iterator::at_beginning() const noexcept {
+  return next_structural == dom_parser.structural_indexes.get();
+}
+simdjson_inline uint8_t json_iterator::last_structural() const noexcept {
+  return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]];
+}
+
+simdjson_inline void json_iterator::log_value(const char *type) const noexcept {
+  logger::log_line(*this, "", type, "");
+}
+
+simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept {
+  logger::log_line(*this, "+", type, "");
+  if (logger::LOG_ENABLED) { logger::log_depth++; }
+}
+
+simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept {
+  if (logger::LOG_ENABLED) { logger::log_depth--; }
+  logger::log_line(*this, "-", type, "");
+}
+
+simdjson_inline void json_iterator::log_error(const char *error) const noexcept {
+  logger::log_line(*this, "", "ERROR", error);
+}
+
+template<typename V>
+simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept {
+  switch (*value) {
+    case '"': return visitor.visit_root_string(*this, value);
+    case 't': return visitor.visit_root_true_atom(*this, value);
+    case 'f': return visitor.visit_root_false_atom(*this, value);
+    case 'n': return visitor.visit_root_null_atom(*this, value);
+    case '-':
+    case '0': case '1': case '2': case '3': case '4':
+    case '5': case '6': case '7': case '8': case '9':
+      return visitor.visit_root_number(*this, value);
+    default:
+      log_error("Document starts with a non-value character");
+      return TAPE_ERROR;
+  }
+}
+template<typename V>
+simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept {
+  switch (*value) {
+    case '"': return visitor.visit_string(*this, value);
+    case 't': return visitor.visit_true_atom(*this, value);
+    case 'f': return visitor.visit_false_atom(*this, value);
+    case 'n': return visitor.visit_null_atom(*this, value);
+    case '-':
+    case '0': case '1': case '2': case '3': case '4':
+    case '5': case '6': case '7': case '8': case '9':
+      return visitor.visit_number(*this, value);
+    default:
+      log_error("Non-value found when value was expected!");
+      return TAPE_ERROR;
+  }
+}
+
+} // namespace stage2
+} // unnamed namespace
+} // namespace fallback
+} // namespace simdjson
+/* end file src/generic/stage2/json_iterator.h */
+/* begin file src/generic/stage2/tape_writer.h */
+namespace simdjson {
+namespace fallback {
+namespace {
+namespace stage2 {
+
+struct tape_writer {
+  /** The next place to write to tape */
+  uint64_t *next_tape_loc;
+
+  /** Write a signed 64-bit value to tape. */
+  simdjson_inline void append_s64(int64_t value) noexcept;
+
+  /** Write an unsigned 64-bit value to tape. */
+  simdjson_inline void append_u64(uint64_t value) noexcept;
+
+  /** Write a double value to tape. */
+  simdjson_inline void append_double(double value) noexcept;
+
+  /**
+   * Append a tape entry (an 8-bit type,and 56 bits worth of value).
+   */
+  simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept;
+
+  /**
+   * Skip the current tape entry without writing.
+   *
+   * Used to skip the start of the container, since we'll come back later to fill it in when the
+   * container ends.
+   */
+  simdjson_inline void skip() noexcept;
+
+  /**
+   * Skip the number of tape entries necessary to write a large u64 or i64.
+   */
+  simdjson_inline void skip_large_integer() noexcept;
+
+  /**
+   * Skip the number of tape entries necessary to write a double.
+   */
+  simdjson_inline void skip_double() noexcept;
+
+  /**
+   * Write a value to a known location on tape.
+   *
+   * Used to go back and write out the start of a container after the container ends.
+   */
+  simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept;
+
+private:
+  /**
+   * Append both the tape entry, and a supplementary value following it. Used for types that need
+   * all 64 bits, such as double and uint64_t.
+   */
+  template<typename T>
+  simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept;
+}; // struct number_writer
+
+simdjson_inline void tape_writer::append_s64(int64_t value) noexcept {
+  append2(0, value, internal::tape_type::INT64);
+}
+
+simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept {
+  append(0, internal::tape_type::UINT64);
+  *next_tape_loc = value;
+  next_tape_loc++;
+}
+
+/** Write a double value to tape. */
+simdjson_inline void tape_writer::append_double(double value) noexcept {
+  append2(0, value, internal::tape_type::DOUBLE);
+}
+
+simdjson_inline void tape_writer::skip() noexcept {
+  next_tape_loc++;
+}
+
+simdjson_inline void tape_writer::skip_large_integer() noexcept {
+  next_tape_loc += 2;
+}
+
+simdjson_inline void tape_writer::skip_double() noexcept {
+  next_tape_loc += 2;
+}
+
+simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept {
+  *next_tape_loc = val | ((uint64_t(char(t))) << 56);
+  next_tape_loc++;
+}
+
+template<typename T>
+simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept {
+  append(val, t);
+  static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!");
+  memcpy(next_tape_loc, &val2, sizeof(val2));
+  next_tape_loc++;
+}
+
+simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept {
+  tape_loc = val | ((uint64_t(char(t))) << 56);
+}
+
+} // namespace stage2
+} // unnamed namespace
+} // namespace fallback
+} // namespace simdjson
+/* end file src/generic/stage2/tape_writer.h */
+
+namespace simdjson {
+namespace fallback {
+namespace {
+namespace stage2 {
+
+struct tape_builder {
+  template<bool STREAMING>
+  simdjson_warn_unused static simdjson_inline error_code parse_document(
+    dom_parser_implementation &dom_parser,
+    dom::document &doc) noexcept;
+
+  /** Called when a non-empty document starts. */
+  simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept;
+  /** Called when a non-empty document ends without error. */
+  simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept;
+
+  /** Called when a non-empty array starts. */
+  simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept;
+  /** Called when a non-empty array ends. */
+  simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept;
+  /** Called when an empty array is found. */
+  simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept;
+
+  /** Called when a non-empty object starts. */
+  simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept;
+  /**
+   * Called when a key in a field is encountered.
+   *
+   * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array
+   * will be called after this with the field value.
+   */
+  simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept;
+  /** Called when a non-empty object ends. */
+  simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept;
+  /** Called when an empty object is found. */
+  simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept;
+
+  /**
+   * Called when a string, number, boolean or null is found.
+   */
+  simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept;
+  /**
+   * Called when a string, number, boolean or null is found at the top level of a document (i.e.
+   * when there is no array or object and the entire document is a single string, number, boolean or
+   * null.
+   *
+   * This is separate from primitive() because simdjson's normal primitive parsing routines assume
+   * there is at least one more token after the value, which is only true in an array or object.
+   */
+  simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept;
+
+  simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept;
+
+  simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept;
+
+  /** Called each time a new field or element in an array or object is found. */
+  simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept;
+
+  /** Next location to write to tape */
+  tape_writer tape;
+private:
+  /** Next write location in the string buf for stage 2 parsing */
+  uint8_t *current_string_buf_loc;
+
+  simdjson_inline tape_builder(dom::document &doc) noexcept;
+
+  simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept;
+  simdjson_inline void start_container(json_iterator &iter) noexcept;
+  simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept;
+  simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept;
+  simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept;
+  simdjson_inline void on_end_string(uint8_t *dst) noexcept;
+}; // class tape_builder
+
+template<bool STREAMING>
+simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document(
+    dom_parser_implementation &dom_parser,
+    dom::document &doc) noexcept {
+  dom_parser.doc = &doc;
+  json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0);
+  tape_builder builder(doc);
+  return iter.walk_document<STREAMING>(builder);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept {
+  return iter.visit_root_primitive(*this, value);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept {
+  return iter.visit_primitive(*this, value);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept {
+  return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept {
+  return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept {
+  start_container(iter);
+  return SUCCESS;
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept {
+  start_container(iter);
+  return SUCCESS;
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept {
+  start_container(iter);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept {
+  return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept {
+  return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept {
+  constexpr uint32_t start_tape_index = 0;
+  tape.append(start_tape_index, internal::tape_type::ROOT);
+  tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT);
+  return SUCCESS;
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept {
+  return visit_string(iter, key, true);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept {
+  iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1
+  return SUCCESS;
+}
+
+simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept {
+  iter.log_value(key ? "key" : "string");
+  uint8_t *dst = on_start_string(iter);
+  dst = stringparsing::parse_string(value+1, dst);
+  if (dst == nullptr) {
+    iter.log_error("Invalid escape in string");
+    return STRING_ERROR;
+  }
+  on_end_string(dst);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept {
+  return visit_string(iter, value);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("number");
+  return numberparsing::parse_number(value, tape);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept {
+  //
+  // We need to make a copy to make sure that the string is space terminated.
+  // This is not about padding the input, which should already padded up
+  // to len + SIMDJSON_PADDING. However, we have no control at this stage
+  // on how the padding was done. What if the input string was padded with nulls?
+  // It is quite common for an input string to have an extra null character (C string).
+  // We do not want to allow 9\0 (where \0 is the null character) inside a JSON
+  // document, but the string "9\0" by itself is fine. So we make a copy and
+  // pad the input with spaces when we know that there is just one input element.
+  // This copy is relatively expensive, but it will almost never be called in
+  // practice unless you are in the strange scenario where you have many JSON
+  // documents made of single atoms.
+  //
+  std::unique_ptr<uint8_t[]>copy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]);
+  if (copy.get() == nullptr) { return MEMALLOC; }
+  std::memcpy(copy.get(), value, iter.remaining_len());
+  std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING);
+  error_code error = visit_number(iter, copy.get());
+  return error;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("true");
+  if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::TRUE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("true");
+  if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::TRUE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("false");
+  if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::FALSE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("false");
+  if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::FALSE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("null");
+  if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::NULL_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("null");
+  if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::NULL_VALUE);
+  return SUCCESS;
+}
+
+// private:
+
+simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept {
+  return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get());
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept {
+  auto start_index = next_tape_index(iter);
+  tape.append(start_index+2, start);
+  tape.append(start_index, end);
+  return SUCCESS;
+}
+
+simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept {
+  iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter);
+  iter.dom_parser.open_containers[iter.depth].count = 0;
+  tape.skip(); // We don't actually *write* the start element until the end.
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept {
+  // Write the ending tape element, pointing at the start location
+  const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index;
+  tape.append(start_tape_index, end);
+  // Write the start tape element, pointing at the end location (and including count)
+  // count can overflow if it exceeds 24 bits... so we saturate
+  // the convention being that a cnt of 0xffffff or more is undetermined in value (>=  0xffffff).
+  const uint32_t count = iter.dom_parser.open_containers[iter.depth].count;
+  const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count;
+  tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start);
+  return SUCCESS;
+}
+
+simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept {
+  // we advance the point, accounting for the fact that we have a NULL termination
+  tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING);
+  return current_string_buf_loc + sizeof(uint32_t);
+}
+
+simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept {
+  uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t)));
+  // TODO check for overflow in case someone has a crazy string (>=4GB?)
+  // But only add the overflow check when the document itself exceeds 4GB
+  // Currently unneeded because we refuse to parse docs larger or equal to 4GB.
+  memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t));
+  // NULL termination is still handy if you expect all your strings to
+  // be NULL terminated? It comes at a small cost
+  *dst = 0;
+  current_string_buf_loc = dst + 1;
+}
+
+} // namespace stage2
+} // unnamed namespace
+} // namespace fallback
+} // namespace simdjson
+/* end file src/generic/stage2/tape_builder.h */
+
+namespace simdjson {
+namespace fallback {
+
+simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept {
+  return stage2::tape_builder::parse_document<false>(*this, _doc);
+}
+
+simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept {
+  return stage2::tape_builder::parse_document<true>(*this, _doc);
+}
+
+simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst) const noexcept {
+  return fallback::stringparsing::parse_string(src, dst);
+}
+
+simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept {
+  auto error = stage1(_buf, _len, stage1_mode::regular);
+  if (error) { return error; }
+  return stage2(_doc);
+}
+
+} // namespace fallback
+} // namespace simdjson
+
+/* begin file include/simdjson/fallback/end.h */
+/* end file include/simdjson/fallback/end.h */
+/* end file src/fallback/dom_parser_implementation.cpp */
+#endif
+#if SIMDJSON_IMPLEMENTATION_ICELAKE
+/* begin file src/icelake/implementation.cpp */
+/* begin file include/simdjson/icelake/begin.h */
+// redefining SIMDJSON_IMPLEMENTATION to "icelake"
+// #define SIMDJSON_IMPLEMENTATION icelake
+SIMDJSON_TARGET_ICELAKE
+/* end file include/simdjson/icelake/begin.h */
+
+namespace simdjson {
+namespace icelake {
+
+simdjson_warn_unused error_code implementation::create_dom_parser_implementation(
+  size_t capacity,
+  size_t max_depth,
+  std::unique_ptr<internal::dom_parser_implementation>& dst
+) const noexcept {
+  dst.reset( new (std::nothrow) dom_parser_implementation() );
+  if (!dst) { return MEMALLOC; }
+  if (auto err = dst->set_capacity(capacity))
+    return err;
+  if (auto err = dst->set_max_depth(max_depth))
+    return err;
+  return SUCCESS;
+}
+
+} // namespace icelake
+} // namespace simdjson
+
+/* begin file include/simdjson/icelake/end.h */
+SIMDJSON_UNTARGET_ICELAKE
+/* end file include/simdjson/icelake/end.h */
+
+/* end file src/icelake/implementation.cpp */
+/* begin file src/icelake/dom_parser_implementation.cpp */
+/* begin file include/simdjson/icelake/begin.h */
+// redefining SIMDJSON_IMPLEMENTATION to "icelake"
+// #define SIMDJSON_IMPLEMENTATION icelake
+SIMDJSON_TARGET_ICELAKE
+/* end file include/simdjson/icelake/begin.h */
+
+//
+// Stage 1
+//
+
+namespace simdjson {
+namespace icelake {
+namespace {
+
+using namespace simd;
+
+struct json_character_block {
+  static simdjson_inline json_character_block classify(const simd::simd8x64<uint8_t>& in);
+  //  ASCII white-space ('\r','\n','\t',' ')
+  simdjson_inline uint64_t whitespace() const noexcept;
+  // non-quote structural characters (comma, colon, braces, brackets)
+  simdjson_inline uint64_t op() const noexcept;
+  // neither a structural character nor a white-space, so letters, numbers and quotes
+  simdjson_inline uint64_t scalar() const noexcept;
+
+  uint64_t _whitespace; // ASCII white-space ('\r','\n','\t',' ')
+  uint64_t _op; // structural characters (comma, colon, braces, brackets but not quotes)
+};
+
+simdjson_inline uint64_t json_character_block::whitespace() const noexcept { return _whitespace; }
+simdjson_inline uint64_t json_character_block::op() const noexcept { return _op; }
+simdjson_inline uint64_t json_character_block::scalar() const noexcept { return ~(op() | whitespace()); }
+
+// This identifies structural characters (comma, colon, braces, brackets),
+// and ASCII white-space ('\r','\n','\t',' ').
+simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64<uint8_t>& in) {
+  // These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why
+  // we can't use the generic lookup_16.
+  const auto whitespace_table = simd8<uint8_t>::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100);
+
+  // The 6 operators (:,[]{}) have these values:
+  //
+  // , 2C
+  // : 3A
+  // [ 5B
+  // { 7B
+  // ] 5D
+  // } 7D
+  //
+  // If you use | 0x20 to turn [ and ] into { and }, the lower 4 bits of each character is unique.
+  // We exploit this, using a simd 4-bit lookup to tell us which character match against, and then
+  // match it (against | 0x20).
+  //
+  // To prevent recognizing other characters, everything else gets compared with 0, which cannot
+  // match due to the | 0x20.
+  //
+  // NOTE: Due to the | 0x20, this ALSO treats <FF> and <SUB> (control characters 0C and 1A) like ,
+  // and :. This gets caught in stage 2, which checks the actual character to ensure the right
+  // operators are in the right places.
+  const auto op_table = simd8<uint8_t>::repeat_16(
+    0, 0, 0, 0,
+    0, 0, 0, 0,
+    0, 0, ':', '{', // : = 3A, [ = 5B, { = 7B
+    ',', '}', 0, 0  // , = 2C, ] = 5D, } = 7D
+  );
+
+  // We compute whitespace and op separately. If later code only uses one or the
+  // other, given the fact that all functions are aggressively inlined, we can
+  // hope that useless computations will be omitted. This is namely case when
+  // minifying (we only need whitespace).
+
+  const uint64_t whitespace = in.eq({
+    _mm512_shuffle_epi8(whitespace_table, in.chunks[0])
+  });
+  // Turn [ and ] into { and }
+  const simd8x64<uint8_t> curlified{
+    in.chunks[0] | 0x20
+  };
+  const uint64_t op = curlified.eq({
+    _mm512_shuffle_epi8(op_table, in.chunks[0])
+  });
+
+  return { whitespace, op };
+}
+
+simdjson_inline bool is_ascii(const simd8x64<uint8_t>& input) {
+  return input.reduce_or().is_ascii();
+}
+
+simdjson_unused simdjson_inline simd8<bool> must_be_continuation(const simd8<uint8_t> prev1, const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
+  simd8<uint8_t> is_second_byte = prev1.saturating_sub(0xc0u-1); // Only 11______ will be > 0
+  simd8<uint8_t> is_third_byte  = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0
+  simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0
+  // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine.
+  return simd8<int8_t>(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0);
+}
+
+simdjson_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
+  simd8<uint8_t> is_third_byte  = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0
+  simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0
+  // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine.
+  return simd8<int8_t>(is_third_byte | is_fourth_byte) > int8_t(0);
+}
+
+} // unnamed namespace
+} // namespace icelake
+} // namespace simdjson
+
+/* begin file src/generic/stage1/utf8_lookup4_algorithm.h */
+namespace simdjson {
+namespace icelake {
+namespace {
+namespace utf8_validation {
+
+using namespace simd;
+
+  simdjson_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
+// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII)
+// Bit 1 = Too Long (ASCII followed by continuation)
+// Bit 2 = Overlong 3-byte
+// Bit 4 = Surrogate
+// Bit 5 = Overlong 2-byte
+// Bit 7 = Two Continuations
+    constexpr const uint8_t TOO_SHORT   = 1<<0; // 11______ 0_______
+                                                // 11______ 11______
+    constexpr const uint8_t TOO_LONG    = 1<<1; // 0_______ 10______
+    constexpr const uint8_t OVERLONG_3  = 1<<2; // 11100000 100_____
+    constexpr const uint8_t SURROGATE   = 1<<4; // 11101101 101_____
+    constexpr const uint8_t OVERLONG_2  = 1<<5; // 1100000_ 10______
+    constexpr const uint8_t TWO_CONTS   = 1<<7; // 10______ 10______
+    constexpr const uint8_t TOO_LARGE   = 1<<3; // 11110100 1001____
+                                                // 11110100 101_____
+                                                // 11110101 1001____
+                                                // 11110101 101_____
+                                                // 1111011_ 1001____
+                                                // 1111011_ 101_____
+                                                // 11111___ 1001____
+                                                // 11111___ 101_____
+    constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
+                                                // 11110101 1000____
+                                                // 1111011_ 1000____
+                                                // 11111___ 1000____
+    constexpr const uint8_t OVERLONG_4  = 1<<6; // 11110000 1000____
+
+    const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
+      // 0_______ ________ <ASCII in byte 1>
+      TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
+      TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
+      // 10______ ________ <continuation in byte 1>
+      TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS,
+      // 1100____ ________ <two byte lead in byte 1>
+      TOO_SHORT | OVERLONG_2,
+      // 1101____ ________ <two byte lead in byte 1>
+      TOO_SHORT,
+      // 1110____ ________ <three byte lead in byte 1>
+      TOO_SHORT | OVERLONG_3 | SURROGATE,
+      // 1111____ ________ <four+ byte lead in byte 1>
+      TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4
+    );
+    constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
+    const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
+      // ____0000 ________
+      CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4,
+      // ____0001 ________
+      CARRY | OVERLONG_2,
+      // ____001_ ________
+      CARRY,
+      CARRY,
+
+      // ____0100 ________
+      CARRY | TOO_LARGE,
+      // ____0101 ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      // ____011_ ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+
+      // ____1___ ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      // ____1101 ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000
+    );
+    const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
+      // ________ 0_______ <ASCII in byte 2>
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
+
+      // ________ 1000____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4,
+      // ________ 1001____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE,
+      // ________ 101_____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE  | TOO_LARGE,
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE  | TOO_LARGE,
+
+      // ________ 11______
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT
+    );
+    return (byte_1_high & byte_1_low & byte_2_high);
+  }
+  simdjson_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
+      const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
+    simd8<uint8_t> prev2 = input.prev<2>(prev_input);
+    simd8<uint8_t> prev3 = input.prev<3>(prev_input);
+    simd8<uint8_t> must23 = simd8<uint8_t>(must_be_2_3_continuation(prev2, prev3));
+    simd8<uint8_t> must23_80 = must23 & uint8_t(0x80);
+    return must23_80 ^ sc;
+  }
+
+  //
+  // Return nonzero if there are incomplete multibyte characters at the end of the block:
+  // e.g. if there is a 4-byte character, but it's 3 bytes from the end.
+  //
+  simdjson_inline simd8<uint8_t> is_incomplete(const simd8<uint8_t> input) {
+    // If the previous input's last 3 bytes match this, they're too short (they ended at EOF):
+    // ... 1111____ 111_____ 11______
+#if SIMDJSON_IMPLEMENTATION_ICELAKE
+    static const uint8_t max_array[64] = {
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1
+    };
+#else
+    static const uint8_t max_array[32] = {
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1
+    };
+#endif
+    const simd8<uint8_t> max_value(&max_array[sizeof(max_array)-sizeof(simd8<uint8_t>)]);
+    return input.gt_bits(max_value);
+  }
+
+  struct utf8_checker {
+    // If this is nonzero, there has been a UTF-8 error.
+    simd8<uint8_t> error;
+    // The last input we received
+    simd8<uint8_t> prev_input_block;
+    // Whether the last input we received was incomplete (used for ASCII fast path)
+    simd8<uint8_t> prev_incomplete;
+
+    //
+    // Check whether the current bytes are valid UTF-8.
+    //
+    simdjson_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
+      // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes
+      // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers)
+      simd8<uint8_t> prev1 = input.prev<1>(prev_input);
+      simd8<uint8_t> sc = check_special_cases(input, prev1);
+      this->error |= check_multibyte_lengths(input, prev_input, sc);
+    }
+
+    // The only problem that can happen at EOF is that a multibyte character is too short
+    // or a byte value too large in the last bytes: check_special_cases only checks for bytes
+    // too large in the first of two bytes.
+    simdjson_inline void check_eof() {
+      // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't
+      // possibly finish them.
+      this->error |= this->prev_incomplete;
+    }
+
+#ifndef SIMDJSON_IF_CONSTEXPR
+#if SIMDJSON_CPLUSPLUS17
+#define SIMDJSON_IF_CONSTEXPR if constexpr
+#else
+#define SIMDJSON_IF_CONSTEXPR if
+#endif
+#endif
+
+    simdjson_inline void check_next_input(const simd8x64<uint8_t>& input) {
+      if(simdjson_likely(is_ascii(input))) {
+        this->error |= this->prev_incomplete;
+      } else {
+        // you might think that a for-loop would work, but under Visual Studio, it is not good enough.
+        static_assert((simd8x64<uint8_t>::NUM_CHUNKS == 1)
+                ||(simd8x64<uint8_t>::NUM_CHUNKS == 2)
+                || (simd8x64<uint8_t>::NUM_CHUNKS == 4),
+                "We support one, two or four chunks per 64-byte block.");
+        SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 1) {
+          this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
+        } else SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 2) {
+          this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
+          this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+        } else SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 4) {
+          this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
+          this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+          this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
+          this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
+        }
+        this->prev_incomplete = is_incomplete(input.chunks[simd8x64<uint8_t>::NUM_CHUNKS-1]);
+        this->prev_input_block = input.chunks[simd8x64<uint8_t>::NUM_CHUNKS-1];
+      }
+    }
+    // do not forget to call check_eof!
+    simdjson_inline error_code errors() {
+      return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS;
+    }
+
+  }; // struct utf8_checker
+} // namespace utf8_validation
+
+using utf8_validation::utf8_checker;
+
+} // unnamed namespace
+} // namespace icelake
+} // namespace simdjson
+/* end file src/generic/stage1/utf8_lookup4_algorithm.h */
+// defining SIMDJSON_CUSTOM_BIT_INDEXER allows us to provide our own bit_indexer::write
+#define SIMDJSON_CUSTOM_BIT_INDEXER
+/* begin file src/generic/stage1/json_structural_indexer.h */
+// This file contains the common code every implementation uses in stage1
+// It is intended to be included multiple times and compiled multiple times
+// We assume the file in which it is included already includes
+// "simdjson/stage1.h" (this simplifies amalgation)
+
+/* begin file src/generic/stage1/buf_block_reader.h */
+namespace simdjson {
+namespace icelake {
+namespace {
+
+// Walks through a buffer in block-sized increments, loading the last part with spaces
+template<size_t STEP_SIZE>
+struct buf_block_reader {
+public:
+  simdjson_inline buf_block_reader(const uint8_t *_buf, size_t _len);
+  simdjson_inline size_t block_index();
+  simdjson_inline bool has_full_block() const;
+  simdjson_inline const uint8_t *full_block() const;
+  /**
+   * Get the last block, padded with spaces.
+   *
+   * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this
+   * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there
+   * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding.
+   *
+   * @return the number of effective characters in the last block.
+   */
+  simdjson_inline size_t get_remainder(uint8_t *dst) const;
+  simdjson_inline void advance();
+private:
+  const uint8_t *buf;
+  const size_t len;
+  const size_t lenminusstep;
+  size_t idx;
+};
+
+// Routines to print masks and text for debugging bitmask operations
+simdjson_unused static char * format_input_text_64(const uint8_t *text) {
+  static char buf[sizeof(simd8x64<uint8_t>) + 1];
+  for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
+    buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]);
+  }
+  buf[sizeof(simd8x64<uint8_t>)] = '\0';
+  return buf;
+}
+
+// Routines to print masks and text for debugging bitmask operations
+simdjson_unused static char * format_input_text(const simd8x64<uint8_t>& in) {
+  static char buf[sizeof(simd8x64<uint8_t>) + 1];
+  in.store(reinterpret_cast<uint8_t*>(buf));
+  for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
+    if (buf[i] < ' ') { buf[i] = '_'; }
+  }
+  buf[sizeof(simd8x64<uint8_t>)] = '\0';
+  return buf;
+}
+
+simdjson_unused static char * format_mask(uint64_t mask) {
+  static char buf[sizeof(simd8x64<uint8_t>) + 1];
+  for (size_t i=0; i<64; i++) {
+    buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' ';
+  }
+  buf[64] = '\0';
+  return buf;
+}
+
+template<size_t STEP_SIZE>
+simdjson_inline buf_block_reader<STEP_SIZE>::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {}
+
+template<size_t STEP_SIZE>
+simdjson_inline size_t buf_block_reader<STEP_SIZE>::block_index() { return idx; }
+
+template<size_t STEP_SIZE>
+simdjson_inline bool buf_block_reader<STEP_SIZE>::has_full_block() const {
+  return idx < lenminusstep;
+}
+
+template<size_t STEP_SIZE>
+simdjson_inline const uint8_t *buf_block_reader<STEP_SIZE>::full_block() const {
+  return &buf[idx];
+}
+
+template<size_t STEP_SIZE>
+simdjson_inline size_t buf_block_reader<STEP_SIZE>::get_remainder(uint8_t *dst) const {
+  if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers
+  std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once.
+  std::memcpy(dst, buf + idx, len - idx);
+  return len - idx;
+}
+
+template<size_t STEP_SIZE>
+simdjson_inline void buf_block_reader<STEP_SIZE>::advance() {
+  idx += STEP_SIZE;
+}
+
+} // unnamed namespace
+} // namespace icelake
+} // namespace simdjson
+/* end file src/generic/stage1/buf_block_reader.h */
+/* begin file src/generic/stage1/json_string_scanner.h */
+namespace simdjson {
+namespace icelake {
+namespace {
+namespace stage1 {
+
+struct json_string_block {
+  // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017
+  simdjson_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) :
+  _backslash(backslash), _escaped(escaped), _quote(quote), _in_string(in_string) {}
+
+  // Escaped characters (characters following an escape() character)
+  simdjson_inline uint64_t escaped() const { return _escaped; }
+  // Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \)
+  simdjson_inline uint64_t escape() const { return _backslash & ~_escaped; }
+  // Real (non-backslashed) quotes
+  simdjson_inline uint64_t quote() const { return _quote; }
+  // Start quotes of strings
+  simdjson_inline uint64_t string_start() const { return _quote & _in_string; }
+  // End quotes of strings
+  simdjson_inline uint64_t string_end() const { return _quote & ~_in_string; }
+  // Only characters inside the string (not including the quotes)
+  simdjson_inline uint64_t string_content() const { return _in_string & ~_quote; }
+  // Return a mask of whether the given characters are inside a string (only works on non-quotes)
+  simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; }
+  // Return a mask of whether the given characters are inside a string (only works on non-quotes)
+  simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; }
+  // Tail of string (everything except the start quote)
+  simdjson_inline uint64_t string_tail() const { return _in_string ^ _quote; }
+
+  // backslash characters
+  uint64_t _backslash;
+  // escaped characters (backslashed--does not include the hex characters after \u)
+  uint64_t _escaped;
+  // real quotes (non-backslashed ones)
+  uint64_t _quote;
+  // string characters (includes start quote but not end quote)
+  uint64_t _in_string;
+};
+
+// Scans blocks for string characters, storing the state necessary to do so
+class json_string_scanner {
+public:
+  simdjson_inline json_string_block next(const simd::simd8x64<uint8_t>& in);
+  // Returns either UNCLOSED_STRING or SUCCESS
+  simdjson_inline error_code finish();
+
+private:
+  // Intended to be defined by the implementation
+  simdjson_inline uint64_t find_escaped(uint64_t escape);
+  simdjson_inline uint64_t find_escaped_branchless(uint64_t escape);
+
+  // Whether the last iteration was still inside a string (all 1's = true, all 0's = false).
+  uint64_t prev_in_string = 0ULL;
+  // Whether the first character of the next iteration is escaped.
+  uint64_t prev_escaped = 0ULL;
+};
+
+//
+// Finds escaped characters (characters following \).
+//
+// Handles runs of backslashes like \\\" and \\\\" correctly (yielding 0101 and 01010, respectively).
+//
+// Does this by:
+// - Shift the escape mask to get potentially escaped characters (characters after backslashes).
+// - Mask escaped sequences that start on *even* bits with 1010101010 (odd bits are escaped, even bits are not)
+// - Mask escaped sequences that start on *odd* bits with 0101010101 (even bits are escaped, odd bits are not)
+//
+// To distinguish between escaped sequences starting on even/odd bits, it finds the start of all
+// escape sequences, filters out the ones that start on even bits, and adds that to the mask of
+// escape sequences. This causes the addition to clear out the sequences starting on odd bits (since
+// the start bit causes a carry), and leaves even-bit sequences alone.
+//
+// Example:
+//
+// text           |  \\\ | \\\"\\\" \\\" \\"\\" |
+// escape         |  xxx |  xx xxx  xxx  xx xx  | Removed overflow backslash; will | it into follows_escape
+// odd_starts     |  x   |  x       x       x   | escape & ~even_bits & ~follows_escape
+// even_seq       |     c|    cxxx     c xx   c | c = carry bit -- will be masked out later
+// invert_mask    |      |     cxxx     c xx   c| even_seq << 1
+// follows_escape |   xx | x xx xxx  xxx  xx xx | Includes overflow bit
+// escaped        |   x  | x x  x x  x x  x  x  |
+// desired        |   x  | x x  x x  x x  x  x  |
+// text           |  \\\ | \\\"\\\" \\\" \\"\\" |
+//
+simdjson_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) {
+  // If there was overflow, pretend the first character isn't a backslash
+  backslash &= ~prev_escaped;
+  uint64_t follows_escape = backslash << 1 | prev_escaped;
+
+  // Get sequences starting on even bits by clearing out the odd series using +
+  const uint64_t even_bits = 0x5555555555555555ULL;
+  uint64_t odd_sequence_starts = backslash & ~even_bits & ~follows_escape;
+  uint64_t sequences_starting_on_even_bits;
+  prev_escaped = add_overflow(odd_sequence_starts, backslash, &sequences_starting_on_even_bits);
+  uint64_t invert_mask = sequences_starting_on_even_bits << 1; // The mask we want to return is the *escaped* bits, not escapes.
+
+  // Mask every other backslashed character as an escaped character
+  // Flip the mask for sequences that start on even bits, to correct them
+  return (even_bits ^ invert_mask) & follows_escape;
+}
+
+//
+// Return a mask of all string characters plus end quotes.
+//
+// prev_escaped is overflow saying whether the next character is escaped.
+// prev_in_string is overflow saying whether we're still in a string.
+//
+// Backslash sequences outside of quotes will be detected in stage 2.
+//
+simdjson_inline json_string_block json_string_scanner::next(const simd::simd8x64<uint8_t>& in) {
+  const uint64_t backslash = in.eq('\\');
+  const uint64_t escaped = find_escaped(backslash);
+  const uint64_t quote = in.eq('"') & ~escaped;
+
+  //
+  // prefix_xor flips on bits inside the string (and flips off the end quote).
+  //
+  // Then we xor with prev_in_string: if we were in a string already, its effect is flipped
+  // (characters inside strings are outside, and characters outside strings are inside).
+  //
+  const uint64_t in_string = prefix_xor(quote) ^ prev_in_string;
+
+  //
+  // Check if we're still in a string at the end of the box so the next block will know
+  //
+  // right shift of a signed value expected to be well-defined and standard
+  // compliant as of C++20, John Regher from Utah U. says this is fine code
+  //
+  prev_in_string = uint64_t(static_cast<int64_t>(in_string) >> 63);
+
+  // Use ^ to turn the beginning quote off, and the end quote on.
+
+  // We are returning a function-local object so either we get a move constructor
+  // or we get copy elision.
+  return json_string_block(
+    backslash,
+    escaped,
+    quote,
+    in_string
+  );
+}
+
+simdjson_inline error_code json_string_scanner::finish() {
+  if (prev_in_string) {
+    return UNCLOSED_STRING;
+  }
+  return SUCCESS;
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace icelake
+} // namespace simdjson
+/* end file src/generic/stage1/json_string_scanner.h */
+/* begin file src/generic/stage1/json_scanner.h */
+namespace simdjson {
+namespace icelake {
+namespace {
+namespace stage1 {
+
+/**
+ * A block of scanned json, with information on operators and scalars.
+ *
+ * We seek to identify pseudo-structural characters. Anything that is inside
+ * a string must be omitted (hence  & ~_string.string_tail()).
+ * Otherwise, pseudo-structural characters come in two forms.
+ * 1. We have the structural characters ([,],{,},:, comma). The
+ *    term 'structural character' is from the JSON RFC.
+ * 2. We have the 'scalar pseudo-structural characters'.
+ *    Scalars are quotes, and any character except structural characters and white space.
+ *
+ * To identify the scalar pseudo-structural characters, we must look at what comes
+ * before them: it must be a space, a quote or a structural characters.
+ * Starting with simdjson v0.3, we identify them by
+ * negation: we identify everything that is followed by a non-quote scalar,
+ * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'.
+ */
+struct json_block {
+public:
+  // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017
+  simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) :
+  _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {}
+  simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) :
+  _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {}
+
+  /**
+   * The start of structurals.
+   * In simdjson prior to v0.3, these were called the pseudo-structural characters.
+   **/
+  simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); }
+  /** All JSON whitespace (i.e. not in a string) */
+  simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); }
+
+  // Helpers
+
+  /** Whether the given characters are inside a string (only works on non-quotes) */
+  simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); }
+  /** Whether the given characters are outside a string (only works on non-quotes) */
+  simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); }
+
+  // string and escape characters
+  json_string_block _string;
+  // whitespace, structural characters ('operators'), scalars
+  json_character_block _characters;
+  // whether the previous character was a scalar
+  uint64_t _follows_potential_nonquote_scalar;
+private:
+  // Potential structurals (i.e. disregarding strings)
+
+  /**
+   * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc".
+   * They may reside inside a string.
+   **/
+  simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); }
+  /**
+   * The start of non-operator runs, like 123, true and "abc".
+   * It main reside inside a string.
+   **/
+  simdjson_inline uint64_t potential_scalar_start() const noexcept {
+    // The term "scalar" refers to anything except structural characters and white space
+    // (so letters, numbers, quotes).
+    // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space
+    // then we know that it is irrelevant structurally.
+    return _characters.scalar() & ~follows_potential_scalar();
+  }
+  /**
+   * Whether the given character is immediately after a non-operator like 123, true.
+   * The characters following a quote are not included.
+   */
+  simdjson_inline uint64_t follows_potential_scalar() const noexcept {
+    // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character
+    // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a
+    // white space.
+    // It is understood that within quoted region, anything at all could be marked (irrelevant).
+    return _follows_potential_nonquote_scalar;
+  }
+};
+
+/**
+ * Scans JSON for important bits: structural characters or 'operators', strings, and scalars.
+ *
+ * The scanner starts by calculating two distinct things:
+ * - string characters (taking \" into account)
+ * - structural characters or 'operators' ([]{},:, comma)
+ *   and scalars (runs of non-operators like 123, true and "abc")
+ *
+ * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel:
+ * in particular, the operator/scalar bit will find plenty of things that are actually part of
+ * strings. When we're done, json_block will fuse the two together by masking out tokens that are
+ * part of a string.
+ */
+class json_scanner {
+public:
+  json_scanner() = default;
+  simdjson_inline json_block next(const simd::simd8x64<uint8_t>& in);
+  // Returns either UNCLOSED_STRING or SUCCESS
+  simdjson_inline error_code finish();
+
+private:
+  // Whether the last character of the previous iteration is part of a scalar token
+  // (anything except whitespace or a structural character/'operator').
+  uint64_t prev_scalar = 0ULL;
+  json_string_scanner string_scanner{};
+};
+
+
+//
+// Check if the current character immediately follows a matching character.
+//
+// For example, this checks for quotes with backslashes in front of them:
+//
+//     const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash);
+//
+simdjson_inline uint64_t follows(const uint64_t match, uint64_t &overflow) {
+  const uint64_t result = match << 1 | overflow;
+  overflow = match >> 63;
+  return result;
+}
+
+simdjson_inline json_block json_scanner::next(const simd::simd8x64<uint8_t>& in) {
+  json_string_block strings = string_scanner.next(in);
+  // identifies the white-space and the structural characters
+  json_character_block characters = json_character_block::classify(in);
+  // The term "scalar" refers to anything except structural characters and white space
+  // (so letters, numbers, quotes).
+  // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers).
+  //
+  // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon)
+  // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential
+  // pseudo-structural character just like we would if we had  ' "a string" true '; otherwise we
+  // may need to add an extra check when parsing strings.
+  //
+  // Performance: there are many ways to skin this cat.
+  const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote();
+  uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar);
+  // We are returning a function-local object so either we get a move constructor
+  // or we get copy elision.
+  return json_block(
+    strings,// strings is a function-local object so either it moves or the copy is elided.
+    characters,
+    follows_nonquote_scalar
+  );
+}
+
+simdjson_inline error_code json_scanner::finish() {
+  return string_scanner.finish();
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace icelake
+} // namespace simdjson
+/* end file src/generic/stage1/json_scanner.h */
+/* begin file src/generic/stage1/json_minifier.h */
+// This file contains the common code every implementation uses in stage1
+// It is intended to be included multiple times and compiled multiple times
+// We assume the file in which it is included already includes
+// "simdjson/stage1.h" (this simplifies amalgation)
+
+namespace simdjson {
+namespace icelake {
+namespace {
+namespace stage1 {
+
+class json_minifier {
+public:
+  template<size_t STEP_SIZE>
+  static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept;
+
+private:
+  simdjson_inline json_minifier(uint8_t *_dst)
+  : dst{_dst}
+  {}
+  template<size_t STEP_SIZE>
+  simdjson_inline void step(const uint8_t *block_buf, buf_block_reader<STEP_SIZE> &reader) noexcept;
+  simdjson_inline void next(const simd::simd8x64<uint8_t>& in, const json_block& block);
+  simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len);
+  json_scanner scanner{};
+  uint8_t *dst;
+};
+
+simdjson_inline void json_minifier::next(const simd::simd8x64<uint8_t>& in, const json_block& block) {
+  uint64_t mask = block.whitespace();
+  dst += in.compress(mask, dst);
+}
+
+simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) {
+  error_code error = scanner.finish();
+  if (error) { dst_len = 0; return error; }
+  dst_len = dst - dst_start;
+  return SUCCESS;
+}
+
+template<>
+simdjson_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept {
+  simd::simd8x64<uint8_t> in_1(block_buf);
+  simd::simd8x64<uint8_t> in_2(block_buf+64);
+  json_block block_1 = scanner.next(in_1);
+  json_block block_2 = scanner.next(in_2);
+  this->next(in_1, block_1);
+  this->next(in_2, block_2);
+  reader.advance();
+}
+
+template<>
+simdjson_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept {
+  simd::simd8x64<uint8_t> in_1(block_buf);
+  json_block block_1 = scanner.next(in_1);
+  this->next(block_buf, block_1);
+  reader.advance();
+}
+
+template<size_t STEP_SIZE>
+error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept {
+  buf_block_reader<STEP_SIZE> reader(buf, len);
+  json_minifier minifier(dst);
+
+  // Index the first n-1 blocks
+  while (reader.has_full_block()) {
+    minifier.step<STEP_SIZE>(reader.full_block(), reader);
+  }
+
+  // Index the last (remainder) block, padded with spaces
+  uint8_t block[STEP_SIZE];
+  size_t remaining_bytes = reader.get_remainder(block);
+  if (remaining_bytes > 0) {
+    // We do not want to write directly to the output stream. Rather, we write
+    // to a local buffer (for safety).
+    uint8_t out_block[STEP_SIZE];
+    uint8_t * const guarded_dst{minifier.dst};
+    minifier.dst = out_block;
+    minifier.step<STEP_SIZE>(block, reader);
+    size_t to_write = minifier.dst - out_block;
+    // In some cases, we could be enticed to consider the padded spaces
+    // as part of the string. This is fine as long as we do not write more
+    // than we consumed.
+    if(to_write > remaining_bytes) { to_write = remaining_bytes; }
+    memcpy(guarded_dst, out_block, to_write);
+    minifier.dst = guarded_dst + to_write;
+  }
+  return minifier.finish(dst, dst_len);
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace icelake
+} // namespace simdjson
+/* end file src/generic/stage1/json_minifier.h */
+/* begin file src/generic/stage1/find_next_document_index.h */
+namespace simdjson {
+namespace icelake {
+namespace {
+
+/**
+  * This algorithm is used to quickly identify the last structural position that
+  * makes up a complete document.
+  *
+  * It does this by going backwards and finding the last *document boundary* (a
+  * place where one value follows another without a comma between them). If the
+  * last document (the characters after the boundary) has an equal number of
+  * start and end brackets, it is considered complete.
+  *
+  * Simply put, we iterate over the structural characters, starting from
+  * the end. We consider that we found the end of a JSON document when the
+  * first element of the pair is NOT one of these characters: '{' '[' ':' ','
+  * and when the second element is NOT one of these characters: '}' ']' ':' ','.
+  *
+  * This simple comparison works most of the time, but it does not cover cases
+  * where the batch's structural indexes contain a perfect amount of documents.
+  * In such a case, we do not have access to the structural index which follows
+  * the last document, therefore, we do not have access to the second element in
+  * the pair, and that means we cannot identify the last document. To fix this
+  * issue, we keep a count of the open and closed curly/square braces we found
+  * while searching for the pair. When we find a pair AND the count of open and
+  * closed curly/square braces is the same, we know that we just passed a
+  * complete document, therefore the last json buffer location is the end of the
+  * batch.
+  */
+simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) {
+  // Variant: do not count separately, just figure out depth
+  if(parser.n_structural_indexes == 0) { return 0; }
+  auto arr_cnt = 0;
+  auto obj_cnt = 0;
+  for (auto i = parser.n_structural_indexes - 1; i > 0; i--) {
+    auto idxb = parser.structural_indexes[i];
+    switch (parser.buf[idxb]) {
+    case ':':
+    case ',':
+      continue;
+    case '}':
+      obj_cnt--;
+      continue;
+    case ']':
+      arr_cnt--;
+      continue;
+    case '{':
+      obj_cnt++;
+      break;
+    case '[':
+      arr_cnt++;
+      break;
+    }
+    auto idxa = parser.structural_indexes[i - 1];
+    switch (parser.buf[idxa]) {
+    case '{':
+    case '[':
+    case ':':
+    case ',':
+      continue;
+    }
+    // Last document is complete, so the next document will appear after!
+    if (!arr_cnt && !obj_cnt) {
+      return parser.n_structural_indexes;
+    }
+    // Last document is incomplete; mark the document at i + 1 as the next one
+    return i;
+  }
+  // If we made it to the end, we want to finish counting to see if we have a full document.
+  switch (parser.buf[parser.structural_indexes[0]]) {
+    case '}':
+      obj_cnt--;
+      break;
+    case ']':
+      arr_cnt--;
+      break;
+    case '{':
+      obj_cnt++;
+      break;
+    case '[':
+      arr_cnt++;
+      break;
+  }
+  if (!arr_cnt && !obj_cnt) {
+    // We have a complete document.
+    return parser.n_structural_indexes;
+  }
+  return 0;
+}
+
+} // unnamed namespace
+} // namespace icelake
+} // namespace simdjson
+/* end file src/generic/stage1/find_next_document_index.h */
+
+namespace simdjson {
+namespace icelake {
+namespace {
+namespace stage1 {
+
+class bit_indexer {
+public:
+  uint32_t *tail;
+
+  simdjson_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {}
+
+  // flatten out values in 'bits' assuming that they are are to have values of idx
+  // plus their position in the bitvector, and store these indexes at
+  // base_ptr[base] incrementing base as we go
+  // will potentially store extra values beyond end of valid bits, so base_ptr
+  // needs to be large enough to handle this
+  //
+  // If the kernel sets SIMDJSON_CUSTOM_BIT_INDEXER, then it will provide its own
+  // version of the code.
+#ifdef SIMDJSON_CUSTOM_BIT_INDEXER
+  simdjson_inline void write(uint32_t idx, uint64_t bits);
+#else
+  simdjson_inline void write(uint32_t idx, uint64_t bits) {
+    // In some instances, the next branch is expensive because it is mispredicted.
+    // Unfortunately, in other cases,
+    // it helps tremendously.
+    if (bits == 0)
+        return;
+#if defined(SIMDJSON_PREFER_REVERSE_BITS)
+    /**
+     * ARM lacks a fast trailing zero instruction, but it has a fast
+     * bit reversal instruction and a fast leading zero instruction.
+     * Thus it may be profitable to reverse the bits (once) and then
+     * to rely on a sequence of instructions that call the leading
+     * zero instruction.
+     *
+     * Performance notes:
+     * The chosen routine is not optimal in terms of data dependency
+     * since zero_leading_bit might require two instructions. However,
+     * it tends to minimize the total number of instructions which is
+     * beneficial.
+     */
+
+    uint64_t rev_bits = reverse_bits(bits);
+    int cnt = static_cast<int>(count_ones(bits));
+    int i = 0;
+    // Do the first 8 all together
+    for (; i<8; i++) {
+      int lz = leading_zeroes(rev_bits);
+      this->tail[i] = static_cast<uint32_t>(idx) + lz;
+      rev_bits = zero_leading_bit(rev_bits, lz);
+    }
+    // Do the next 8 all together (we hope in most cases it won't happen at all
+    // and the branch is easily predicted).
+    if (simdjson_unlikely(cnt > 8)) {
+      i = 8;
+      for (; i<16; i++) {
+        int lz = leading_zeroes(rev_bits);
+        this->tail[i] = static_cast<uint32_t>(idx) + lz;
+        rev_bits = zero_leading_bit(rev_bits, lz);
+      }
+
+
+      // Most files don't have 16+ structurals per block, so we take several basically guaranteed
+      // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :)
+      // or the start of a value ("abc" true 123) every four characters.
+      if (simdjson_unlikely(cnt > 16)) {
+        i = 16;
+        while (rev_bits != 0) {
+          int lz = leading_zeroes(rev_bits);
+          this->tail[i++] = static_cast<uint32_t>(idx) + lz;
+          rev_bits = zero_leading_bit(rev_bits, lz);
+        }
+      }
+    }
+    this->tail += cnt;
+#else // SIMDJSON_PREFER_REVERSE_BITS
+    /**
+     * Under recent x64 systems, we often have both a fast trailing zero
+     * instruction and a fast 'clear-lower-bit' instruction so the following
+     * algorithm can be competitive.
+     */
+
+    int cnt = static_cast<int>(count_ones(bits));
+    // Do the first 8 all together
+    for (int i=0; i<8; i++) {
+      this->tail[i] = idx + trailing_zeroes(bits);
+      bits = clear_lowest_bit(bits);
+    }
+
+    // Do the next 8 all together (we hope in most cases it won't happen at all
+    // and the branch is easily predicted).
+    if (simdjson_unlikely(cnt > 8)) {
+      for (int i=8; i<16; i++) {
+        this->tail[i] = idx + trailing_zeroes(bits);
+        bits = clear_lowest_bit(bits);
+      }
+
+      // Most files don't have 16+ structurals per block, so we take several basically guaranteed
+      // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :)
+      // or the start of a value ("abc" true 123) every four characters.
+      if (simdjson_unlikely(cnt > 16)) {
+        int i = 16;
+        do {
+          this->tail[i] = idx + trailing_zeroes(bits);
+          bits = clear_lowest_bit(bits);
+          i++;
+        } while (i < cnt);
+      }
+    }
+
+    this->tail += cnt;
+#endif
+  }
+#endif // SIMDJSON_CUSTOM_BIT_INDEXER
+
+};
+
+class json_structural_indexer {
+public:
+  /**
+   * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes.
+   *
+   * @param partial Setting the partial parameter to true allows the find_structural_bits to
+   *   tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If
+   *   you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8.
+   */
+  template<size_t STEP_SIZE>
+  static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept;
+
+private:
+  simdjson_inline json_structural_indexer(uint32_t *structural_indexes);
+  template<size_t STEP_SIZE>
+  simdjson_inline void step(const uint8_t *block, buf_block_reader<STEP_SIZE> &reader) noexcept;
+  simdjson_inline void next(const simd::simd8x64<uint8_t>& in, const json_block& block, size_t idx);
+  simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial);
+
+  json_scanner scanner{};
+  utf8_checker checker{};
+  bit_indexer indexer;
+  uint64_t prev_structurals = 0;
+  uint64_t unescaped_chars_error = 0;
+};
+
+simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {}
+
+// Skip the last character if it is partial
+simdjson_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) {
+  if (simdjson_unlikely(len < 3)) {
+    switch (len) {
+      case 2:
+        if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left
+        if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 2 bytes left
+        return len;
+      case 1:
+        if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left
+        return len;
+      case 0:
+        return len;
+    }
+  }
+  if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left
+  if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 1 byte left
+  if (buf[len-3] >= 0xf0) { return len-3; } // 4-byte characters with only 3 bytes left
+  return len;
+}
+
+//
+// PERF NOTES:
+// We pipe 2 inputs through these stages:
+// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load
+//    2 inputs' worth at once so that by the time step 2 is looking for them input, it's available.
+// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path.
+//    The output of step 1 depends entirely on this information. These functions don't quite use
+//    up enough CPU: the second half of the functions is highly serial, only using 1 execution core
+//    at a time. The second input's scans has some dependency on the first ones finishing it, but
+//    they can make a lot of progress before they need that information.
+// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that
+//    to finish: utf-8 checks and generating the output from the last iteration.
+//
+// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all
+// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough
+// workout.
+//
+template<size_t STEP_SIZE>
+error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept {
+  if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; }
+  // We guard the rest of the code so that we can assume that len > 0 throughout.
+  if (len == 0) { return EMPTY; }
+  if (is_streaming(partial)) {
+    len = trim_partial_utf8(buf, len);
+    // If you end up with an empty window after trimming
+    // the partial UTF-8 bytes, then chances are good that you
+    // have an UTF-8 formatting error.
+    if(len == 0) { return UTF8_ERROR; }
+  }
+  buf_block_reader<STEP_SIZE> reader(buf, len);
+  json_structural_indexer indexer(parser.structural_indexes.get());
+
+  // Read all but the last block
+  while (reader.has_full_block()) {
+    indexer.step<STEP_SIZE>(reader.full_block(), reader);
+  }
+  // Take care of the last block (will always be there unless file is empty which is
+  // not supposed to happen.)
+  uint8_t block[STEP_SIZE];
+  if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; }
+  indexer.step<STEP_SIZE>(block, reader);
+  return indexer.finish(parser, reader.block_index(), len, partial);
+}
+
+template<>
+simdjson_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept {
+  simd::simd8x64<uint8_t> in_1(block);
+  simd::simd8x64<uint8_t> in_2(block+64);
+  json_block block_1 = scanner.next(in_1);
+  json_block block_2 = scanner.next(in_2);
+  this->next(in_1, block_1, reader.block_index());
+  this->next(in_2, block_2, reader.block_index()+64);
+  reader.advance();
+}
+
+template<>
+simdjson_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept {
+  simd::simd8x64<uint8_t> in_1(block);
+  json_block block_1 = scanner.next(in_1);
+  this->next(in_1, block_1, reader.block_index());
+  reader.advance();
+}
+
+simdjson_inline void json_structural_indexer::next(const simd::simd8x64<uint8_t>& in, const json_block& block, size_t idx) {
+  uint64_t unescaped = in.lteq(0x1F);
+  checker.check_next_input(in);
+  indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser
+  prev_structurals = block.structural_start();
+  unescaped_chars_error |= block.non_quote_inside_string(unescaped);
+}
+
+simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) {
+  // Write out the final iteration's structurals
+  indexer.write(uint32_t(idx-64), prev_structurals);
+  error_code error = scanner.finish();
+  // We deliberately break down the next expression so that it is
+  // human readable.
+  const bool should_we_exit = is_streaming(partial) ?
+    ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING
+    : (error != SUCCESS); // if partial is false, we must have SUCCESS
+  const bool have_unclosed_string = (error == UNCLOSED_STRING);
+  if (simdjson_unlikely(should_we_exit)) { return error; }
+
+  if (unescaped_chars_error) {
+    return UNESCAPED_CHARS;
+  }
+  parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get());
+  /***
+   * The On Demand API requires special padding.
+   *
+   * This is related to https://github.com/simdjson/simdjson/issues/906
+   * Basically, we want to make sure that if the parsing continues beyond the last (valid)
+   * structural character, it quickly stops.
+   * Only three structural characters can be repeated without triggering an error in JSON:  [,] and }.
+   * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing
+   * continues, then it must be [,] or }.
+   * Suppose it is ] or }. We backtrack to the first character, what could it be that would
+   * not trigger an error? It could be ] or } but no, because you can't start a document that way.
+   * It can't be a comma, a colon or any simple value. So the only way we could continue is
+   * if the repeated character is [. But if so, the document must start with [. But if the document
+   * starts with [, it should end with ]. If we enforce that rule, then we would get
+   * ][[ which is invalid.
+   *
+   * This is illustrated with the test array_iterate_unclosed_error() on the following input:
+   * R"({ "a": [,,)"
+   **/
+  parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final
+  parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len);
+  parser.structural_indexes[parser.n_structural_indexes + 2] = 0;
+  parser.next_structural_index = 0;
+  // a valid JSON file cannot have zero structural indexes - we should have found something
+  if (simdjson_unlikely(parser.n_structural_indexes == 0u)) {
+    return EMPTY;
+  }
+  if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) {
+    return UNEXPECTED_ERROR;
+  }
+  if (partial == stage1_mode::streaming_partial) {
+    // If we have an unclosed string, then the last structural
+    // will be the quote and we want to make sure to omit it.
+    if(have_unclosed_string) {
+      parser.n_structural_indexes--;
+      // a valid JSON file cannot have zero structural indexes - we should have found something
+      if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; }
+    }
+    // We truncate the input to the end of the last complete document (or zero).
+    auto new_structural_indexes = find_next_document_index(parser);
+    if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) {
+      if(parser.structural_indexes[0] == 0) {
+        // If the buffer is partial and we started at index 0 but the document is
+        // incomplete, it's too big to parse.
+        return CAPACITY;
+      } else {
+        // It is possible that the document could be parsed, we just had a lot
+        // of white space.
+        parser.n_structural_indexes = 0;
+        return EMPTY;
+      }
+    }
+
+    parser.n_structural_indexes = new_structural_indexes;
+  } else if (partial == stage1_mode::streaming_final) {
+    if(have_unclosed_string) { parser.n_structural_indexes--; }
+    // We truncate the input to the end of the last complete document (or zero).
+    // Because partial == stage1_mode::streaming_final, it means that we may
+    // silently ignore trailing garbage. Though it sounds bad, we do it
+    // deliberately because many people who have streams of JSON documents
+    // will truncate them for processing. E.g., imagine that you are uncompressing
+    // the data from a size file or receiving it in chunks from the network. You
+    // may not know where exactly the last document will be. Meanwhile the
+    // document_stream instances allow people to know the JSON documents they are
+    // parsing (see the iterator.source() method).
+    parser.n_structural_indexes = find_next_document_index(parser);
+    // We store the initial n_structural_indexes so that the client can see
+    // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes,
+    // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len,
+    // otherwise, it will copy some prior index.
+    parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes];
+    // This next line is critical, do not change it unless you understand what you are
+    // doing.
+    parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len);
+    if (simdjson_unlikely(parser.n_structural_indexes == 0u)) {
+        // We tolerate an unclosed string at the very end of the stream. Indeed, users
+        // often load their data in bulk without being careful and they want us to ignore
+        // the trailing garbage.
+        return EMPTY;
+    }
+  }
+  checker.check_eof();
+  return checker.errors();
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace icelake
+} // namespace simdjson
+/* end file src/generic/stage1/json_structural_indexer.h */
+// We must not forget to undefine it now:
+#undef SIMDJSON_CUSTOM_BIT_INDEXER
+
+/**
+ * We provide a custom version of bit_indexer::write using
+ * naked intrinsics.
+ * TODO: make this code more elegant.
+ */
+// Under GCC 12, the intrinsic _mm512_extracti32x4_epi32 may generate 'maybe uninitialized'.
+// as a workaround, we disable warnings within the following function.
+SIMDJSON_PUSH_DISABLE_ALL_WARNINGS
+namespace simdjson { namespace icelake { namespace { namespace stage1 {
+simdjson_inline void bit_indexer::write(uint32_t idx, uint64_t bits) {
+    // In some instances, the next branch is expensive because it is mispredicted.
+    // Unfortunately, in other cases,
+    // it helps tremendously.
+    if (bits == 0) { return; }
+
+    const __m512i indexes = _mm512_maskz_compress_epi8(bits, _mm512_set_epi32(
+      0x3f3e3d3c, 0x3b3a3938, 0x37363534, 0x33323130,
+      0x2f2e2d2c, 0x2b2a2928, 0x27262524, 0x23222120,
+      0x1f1e1d1c, 0x1b1a1918, 0x17161514, 0x13121110,
+      0x0f0e0d0c, 0x0b0a0908, 0x07060504, 0x03020100
+    ));
+    const __m512i start_index = _mm512_set1_epi32(idx);
+
+    const auto count = count_ones(bits);
+    __m512i t0 = _mm512_cvtepu8_epi32(_mm512_castsi512_si128(indexes));
+    _mm512_storeu_si512(this->tail, _mm512_add_epi32(t0, start_index));
+
+    if(count > 16) {
+      const __m512i t1 = _mm512_cvtepu8_epi32(_mm512_extracti32x4_epi32(indexes, 1));
+      _mm512_storeu_si512(this->tail + 16, _mm512_add_epi32(t1, start_index));
+      if(count > 32) {
+        const __m512i t2 = _mm512_cvtepu8_epi32(_mm512_extracti32x4_epi32(indexes, 2));
+        _mm512_storeu_si512(this->tail + 32, _mm512_add_epi32(t2, start_index));
+        if(count > 48) {
+          const __m512i t3 = _mm512_cvtepu8_epi32(_mm512_extracti32x4_epi32(indexes, 3));
+          _mm512_storeu_si512(this->tail + 48, _mm512_add_epi32(t3, start_index));
+        }
+      }
+    }
+    this->tail += count;
+}
+}}}}
+SIMDJSON_POP_DISABLE_WARNINGS
+
+/* begin file src/generic/stage1/utf8_validator.h */
+namespace simdjson {
+namespace icelake {
+namespace {
+namespace stage1 {
+
+/**
+ * Validates that the string is actual UTF-8.
+ */
+template<class checker>
+bool generic_validate_utf8(const uint8_t * input, size_t length) {
+    checker c{};
+    buf_block_reader<64> reader(input, length);
+    while (reader.has_full_block()) {
+      simd::simd8x64<uint8_t> in(reader.full_block());
+      c.check_next_input(in);
+      reader.advance();
+    }
+    uint8_t block[64]{};
+    reader.get_remainder(block);
+    simd::simd8x64<uint8_t> in(block);
+    c.check_next_input(in);
+    reader.advance();
+    c.check_eof();
+    return c.errors() == error_code::SUCCESS;
+}
+
+bool generic_validate_utf8(const char * input, size_t length) {
+    return generic_validate_utf8<utf8_checker>(reinterpret_cast<const uint8_t *>(input),length);
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace icelake
+} // namespace simdjson
+/* end file src/generic/stage1/utf8_validator.h */
+
+//
+// Stage 2
+//
+/* begin file src/generic/stage2/stringparsing.h */
+// This file contains the common code every implementation uses
+// It is intended to be included multiple times and compiled multiple times
+
+namespace simdjson {
+namespace icelake {
+namespace {
+/// @private
+namespace stringparsing {
+
+// begin copypasta
+// These chars yield themselves: " \ /
+// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab
+// u not handled in this table as it's complex
+static const uint8_t escape_map[256] = {
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0, // 0x0.
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0x22, 0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0x2f,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0, // 0x4.
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0x5c, 0, 0,    0, // 0x5.
+    0, 0, 0x08, 0, 0,    0, 0x0c, 0, 0, 0, 0, 0, 0,    0, 0x0a, 0, // 0x6.
+    0, 0, 0x0d, 0, 0x09, 0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0, // 0x7.
+
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+};
+
+// handle a unicode codepoint
+// write appropriate values into dest
+// src will advance 6 bytes or 12 bytes
+// dest will advance a variable amount (return via pointer)
+// return true if the unicode codepoint was valid
+// We work in little-endian then swap at write time
+simdjson_warn_unused
+simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr,
+                                            uint8_t **dst_ptr) {
+  // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the
+  // conversion isn't valid; we defer the check for this to inside the
+  // multilingual plane check
+  uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2);
+  *src_ptr += 6;
+
+  // If we found a high surrogate, we must
+  // check for low surrogate for characters
+  // outside the Basic
+  // Multilingual Plane.
+  if (code_point >= 0xd800 && code_point < 0xdc00) {
+    const uint8_t *src_data = *src_ptr;
+    /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */
+    if (((src_data[0] << 8) | src_data[1]) != ((static_cast<uint8_t> ('\\') << 8) | static_cast<uint8_t> ('u'))) {
+      return false;
+    }
+    uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2);
+
+    // We have already checked that the high surrogate is valid and
+    // (code_point - 0xd800) < 1024.
+    //
+    // Check that code_point_2 is in the range 0xdc00..0xdfff
+    // and that code_point_2 was parsed from valid hex.
+    uint32_t low_bit = code_point_2 - 0xdc00;
+    if (low_bit >> 10) {
+      return false;
+    }
+
+    code_point =
+        (((code_point - 0xd800) << 10) | low_bit) + 0x10000;
+    *src_ptr += 6;
+  } else if (code_point >= 0xdc00 && code_point <= 0xdfff) {
+      // If we encounter a low surrogate (not preceded by a high surrogate)
+      // then we have an error.
+      return false;
+  }
+  size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr);
+  *dst_ptr += offset;
+  return offset > 0;
+}
+
+/**
+ * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There
+ * must be an unescaped quote terminating the string. It returns the final output
+ * position as pointer. In case of error (e.g., the string has bad escaped codes),
+ * then null_nullptrptr is returned. It is assumed that the output buffer is large
+ * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes +
+ * SIMDJSON_PADDING bytes.
+ */
+simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) {
+  while (1) {
+    // Copy the next n bytes, and find the backslash and quote in them.
+    auto bs_quote = backslash_and_quote::copy_and_find(src, dst);
+    // If the next thing is the end quote, copy and return
+    if (bs_quote.has_quote_first()) {
+      // we encountered quotes first. Move dst to point to quotes and exit
+      return dst + bs_quote.quote_index();
+    }
+    if (bs_quote.has_backslash()) {
+      /* find out where the backspace is */
+      auto bs_dist = bs_quote.backslash_index();
+      uint8_t escape_char = src[bs_dist + 1];
+      /* we encountered backslash first. Handle backslash */
+      if (escape_char == 'u') {
+        /* move src/dst up to the start; they will be further adjusted
+           within the unicode codepoint handling code. */
+        src += bs_dist;
+        dst += bs_dist;
+        if (!handle_unicode_codepoint(&src, &dst)) {
+          return nullptr;
+        }
+      } else {
+        /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and
+         * write bs_dist+1 characters to output
+         * note this may reach beyond the part of the buffer we've actually
+         * seen. I think this is ok */
+        uint8_t escape_result = escape_map[escape_char];
+        if (escape_result == 0u) {
+          return nullptr; /* bogus escape value is an error */
+        }
+        dst[bs_dist] = escape_result;
+        src += bs_dist + 2;
+        dst += bs_dist + 1;
+      }
+    } else {
+      /* they are the same. Since they can't co-occur, it means we
+       * encountered neither. */
+      src += backslash_and_quote::BYTES_PROCESSED;
+      dst += backslash_and_quote::BYTES_PROCESSED;
+    }
+  }
+  /* can't be reached */
+  return nullptr;
+}
+
+} // namespace stringparsing
+} // unnamed namespace
+} // namespace icelake
+} // namespace simdjson
+/* end file src/generic/stage2/stringparsing.h */
+/* begin file src/generic/stage2/tape_builder.h */
+/* begin file src/generic/stage2/json_iterator.h */
+/* begin file src/generic/stage2/logger.h */
+// This is for an internal-only stage 2 specific logger.
+// Set LOG_ENABLED = true to log what stage 2 is doing!
+namespace simdjson {
+namespace icelake {
+namespace {
+namespace logger {
+
+  static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------";
+
+#if SIMDJSON_VERBOSE_LOGGING
+  static constexpr const bool LOG_ENABLED = true;
+#else
+  static constexpr const bool LOG_ENABLED = false;
+#endif
+  static constexpr const int LOG_EVENT_LEN = 20;
+  static constexpr const int LOG_BUFFER_LEN = 30;
+  static constexpr const int LOG_SMALL_BUFFER_LEN = 10;
+  static constexpr const int LOG_INDEX_LEN = 5;
+
+  static int log_depth; // Not threadsafe. Log only.
+
+  // Helper to turn unprintable or newline characters into spaces
+  static simdjson_inline char printable_char(char c) {
+    if (c >= 0x20) {
+      return c;
+    } else {
+      return ' ';
+    }
+  }
+
+  // Print the header and set up log_start
+  static simdjson_inline void log_start() {
+    if (LOG_ENABLED) {
+      log_depth = 0;
+      printf("\n");
+      printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#");
+      printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES);
+    }
+  }
+
+  simdjson_unused static simdjson_inline void log_string(const char *message) {
+    if (LOG_ENABLED) {
+      printf("%s\n", message);
+    }
+  }
+
+  // Logs a single line from the stage 2 DOM parser
+  template<typename S>
+  static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) {
+    if (LOG_ENABLED) {
+      printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title);
+      auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1;
+      auto next_index = structurals.next_structural;
+      auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast<const uint8_t*>("                                                       ");
+      auto next = &structurals.buf[*next_index];
+      {
+        // Print the next N characters in the buffer.
+        printf("| ");
+        // Otherwise, print the characters starting from the buffer position.
+        // Print spaces for unprintable or newline characters.
+        for (int i=0;i<LOG_BUFFER_LEN;i++) {
+          printf("%c", printable_char(current[i]));
+        }
+        printf(" ");
+        // Print the next N characters in the buffer.
+        printf("| ");
+        // Otherwise, print the characters starting from the buffer position.
+        // Print spaces for unprintable or newline characters.
+        for (int i=0;i<LOG_SMALL_BUFFER_LEN;i++) {
+          printf("%c", printable_char(next[i]));
+        }
+        printf(" ");
+      }
+      if (current_index) {
+        printf("| %*u ", LOG_INDEX_LEN, *current_index);
+      } else {
+        printf("| %-*s ", LOG_INDEX_LEN, "");
+      }
+      // printf("| %*u ", LOG_INDEX_LEN, structurals.next_tape_index());
+      printf("| %-s ", detail);
+      printf("|\n");
+    }
+  }
+
+} // namespace logger
+} // unnamed namespace
+} // namespace icelake
+} // namespace simdjson
+/* end file src/generic/stage2/logger.h */
+
+namespace simdjson {
+namespace icelake {
+namespace {
+namespace stage2 {
+
+class json_iterator {
+public:
+  const uint8_t* const buf;
+  uint32_t *next_structural;
+  dom_parser_implementation &dom_parser;
+  uint32_t depth{0};
+
+  /**
+   * Walk the JSON document.
+   *
+   * The visitor receives callbacks when values are encountered. All callbacks pass the iterator as
+   * the first parameter; some callbacks have other parameters as well:
+   *
+   * - visit_document_start() - at the beginning.
+   * - visit_document_end() - at the end (if things were successful).
+   *
+   * - visit_array_start() - at the start `[` of a non-empty array.
+   * - visit_array_end() - at the end `]` of a non-empty array.
+   * - visit_empty_array() - when an empty array is encountered.
+   *
+   * - visit_object_end() - at the start `]` of a non-empty object.
+   * - visit_object_start() - at the end `]` of a non-empty object.
+   * - visit_empty_object() - when an empty object is encountered.
+   * - visit_key(const uint8_t *key) - when a key in an object field is encountered. key is
+   *                                   guaranteed to point at the first quote of the string (`"key"`).
+   * - visit_primitive(const uint8_t *value) - when a value is a string, number, boolean or null.
+   * - visit_root_primitive(iter, uint8_t *value) - when the top-level value is a string, number, boolean or null.
+   *
+   * - increment_count(iter) - each time a value is found in an array or object.
+   */
+  template<bool STREAMING, typename V>
+  simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept;
+
+  /**
+   * Create an iterator capable of walking a JSON document.
+   *
+   * The document must have already passed through stage 1.
+   */
+  simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index);
+
+  /**
+   * Look at the next token.
+   *
+   * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)).
+   *
+   * They may include invalid JSON as well (such as `1.2.3` or `ture`).
+   */
+  simdjson_inline const uint8_t *peek() const noexcept;
+  /**
+   * Advance to the next token.
+   *
+   * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)).
+   *
+   * They may include invalid JSON as well (such as `1.2.3` or `ture`).
+   */
+  simdjson_inline const uint8_t *advance() noexcept;
+  /**
+   * Get the remaining length of the document, from the start of the current token.
+   */
+  simdjson_inline size_t remaining_len() const noexcept;
+  /**
+   * Check if we are at the end of the document.
+   *
+   * If this is true, there are no more tokens.
+   */
+  simdjson_inline bool at_eof() const noexcept;
+  /**
+   * Check if we are at the beginning of the document.
+   */
+  simdjson_inline bool at_beginning() const noexcept;
+  simdjson_inline uint8_t last_structural() const noexcept;
+
+  /**
+   * Log that a value has been found.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_value(const char *type) const noexcept;
+  /**
+   * Log the start of a multipart value.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_start_value(const char *type) const noexcept;
+  /**
+   * Log the end of a multipart value.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_end_value(const char *type) const noexcept;
+  /**
+   * Log an error.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_error(const char *error) const noexcept;
+
+  template<typename V>
+  simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept;
+  template<typename V>
+  simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept;
+};
+
+template<bool STREAMING, typename V>
+simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept {
+  logger::log_start();
+
+  //
+  // Start the document
+  //
+  if (at_eof()) { return EMPTY; }
+  log_start_value("document");
+  SIMDJSON_TRY( visitor.visit_document_start(*this) );
+
+  //
+  // Read first value
+  //
+  {
+    auto value = advance();
+
+    // Make sure the outer object or array is closed before continuing; otherwise, there are ways we
+    // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906
+    if (!STREAMING) {
+      switch (*value) {
+        case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break;
+        case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break;
+      }
+    }
+
+    switch (*value) {
+      case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin;
+      case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin;
+      default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break;
+    }
+  }
+  goto document_end;
+
+//
+// Object parser states
+//
+object_begin:
+  log_start_value("object");
+  depth++;
+  if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; }
+  dom_parser.is_array[depth] = false;
+  SIMDJSON_TRY( visitor.visit_object_start(*this) );
+
+  {
+    auto key = advance();
+    if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; }
+    SIMDJSON_TRY( visitor.increment_count(*this) );
+    SIMDJSON_TRY( visitor.visit_key(*this, key) );
+  }
+
+object_field:
+  if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; }
+  {
+    auto value = advance();
+    switch (*value) {
+      case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin;
+      case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin;
+      default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break;
+    }
+  }
+
+object_continue:
+  switch (*advance()) {
+    case ',':
+      SIMDJSON_TRY( visitor.increment_count(*this) );
+      {
+        auto key = advance();
+        if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; }
+        SIMDJSON_TRY( visitor.visit_key(*this, key) );
+      }
+      goto object_field;
+    case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end;
+    default: log_error("No comma between object fields"); return TAPE_ERROR;
+  }
+
+scope_end:
+  depth--;
+  if (depth == 0) { goto document_end; }
+  if (dom_parser.is_array[depth]) { goto array_continue; }
+  goto object_continue;
+
+//
+// Array parser states
+//
+array_begin:
+  log_start_value("array");
+  depth++;
+  if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; }
+  dom_parser.is_array[depth] = true;
+  SIMDJSON_TRY( visitor.visit_array_start(*this) );
+  SIMDJSON_TRY( visitor.increment_count(*this) );
+
+array_value:
+  {
+    auto value = advance();
+    switch (*value) {
+      case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin;
+      case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin;
+      default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break;
+    }
+  }
+
+array_continue:
+  switch (*advance()) {
+    case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value;
+    case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end;
+    default: log_error("Missing comma between array values"); return TAPE_ERROR;
+  }
+
+document_end:
+  log_end_value("document");
+  SIMDJSON_TRY( visitor.visit_document_end(*this) );
+
+  dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]);
+
+  // If we didn't make it to the end, it's an error
+  if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) {
+    log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!");
+    return TAPE_ERROR;
+  }
+
+  return SUCCESS;
+
+} // walk_document()
+
+simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index)
+  : buf{_dom_parser.buf},
+    next_structural{&_dom_parser.structural_indexes[start_structural_index]},
+    dom_parser{_dom_parser} {
+}
+
+simdjson_inline const uint8_t *json_iterator::peek() const noexcept {
+  return &buf[*(next_structural)];
+}
+simdjson_inline const uint8_t *json_iterator::advance() noexcept {
+  return &buf[*(next_structural++)];
+}
+simdjson_inline size_t json_iterator::remaining_len() const noexcept {
+  return dom_parser.len - *(next_structural-1);
+}
+
+simdjson_inline bool json_iterator::at_eof() const noexcept {
+  return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes];
+}
+simdjson_inline bool json_iterator::at_beginning() const noexcept {
+  return next_structural == dom_parser.structural_indexes.get();
+}
+simdjson_inline uint8_t json_iterator::last_structural() const noexcept {
+  return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]];
+}
+
+simdjson_inline void json_iterator::log_value(const char *type) const noexcept {
+  logger::log_line(*this, "", type, "");
+}
+
+simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept {
+  logger::log_line(*this, "+", type, "");
+  if (logger::LOG_ENABLED) { logger::log_depth++; }
+}
+
+simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept {
+  if (logger::LOG_ENABLED) { logger::log_depth--; }
+  logger::log_line(*this, "-", type, "");
+}
+
+simdjson_inline void json_iterator::log_error(const char *error) const noexcept {
+  logger::log_line(*this, "", "ERROR", error);
+}
+
+template<typename V>
+simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept {
+  switch (*value) {
+    case '"': return visitor.visit_root_string(*this, value);
+    case 't': return visitor.visit_root_true_atom(*this, value);
+    case 'f': return visitor.visit_root_false_atom(*this, value);
+    case 'n': return visitor.visit_root_null_atom(*this, value);
+    case '-':
+    case '0': case '1': case '2': case '3': case '4':
+    case '5': case '6': case '7': case '8': case '9':
+      return visitor.visit_root_number(*this, value);
+    default:
+      log_error("Document starts with a non-value character");
+      return TAPE_ERROR;
+  }
+}
+template<typename V>
+simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept {
+  switch (*value) {
+    case '"': return visitor.visit_string(*this, value);
+    case 't': return visitor.visit_true_atom(*this, value);
+    case 'f': return visitor.visit_false_atom(*this, value);
+    case 'n': return visitor.visit_null_atom(*this, value);
+    case '-':
+    case '0': case '1': case '2': case '3': case '4':
+    case '5': case '6': case '7': case '8': case '9':
+      return visitor.visit_number(*this, value);
+    default:
+      log_error("Non-value found when value was expected!");
+      return TAPE_ERROR;
+  }
+}
+
+} // namespace stage2
+} // unnamed namespace
+} // namespace icelake
+} // namespace simdjson
+/* end file src/generic/stage2/json_iterator.h */
+/* begin file src/generic/stage2/tape_writer.h */
+namespace simdjson {
+namespace icelake {
+namespace {
+namespace stage2 {
+
+struct tape_writer {
+  /** The next place to write to tape */
+  uint64_t *next_tape_loc;
+
+  /** Write a signed 64-bit value to tape. */
+  simdjson_inline void append_s64(int64_t value) noexcept;
+
+  /** Write an unsigned 64-bit value to tape. */
+  simdjson_inline void append_u64(uint64_t value) noexcept;
+
+  /** Write a double value to tape. */
+  simdjson_inline void append_double(double value) noexcept;
+
+  /**
+   * Append a tape entry (an 8-bit type,and 56 bits worth of value).
+   */
+  simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept;
+
+  /**
+   * Skip the current tape entry without writing.
+   *
+   * Used to skip the start of the container, since we'll come back later to fill it in when the
+   * container ends.
+   */
+  simdjson_inline void skip() noexcept;
+
+  /**
+   * Skip the number of tape entries necessary to write a large u64 or i64.
+   */
+  simdjson_inline void skip_large_integer() noexcept;
+
+  /**
+   * Skip the number of tape entries necessary to write a double.
+   */
+  simdjson_inline void skip_double() noexcept;
+
+  /**
+   * Write a value to a known location on tape.
+   *
+   * Used to go back and write out the start of a container after the container ends.
+   */
+  simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept;
+
+private:
+  /**
+   * Append both the tape entry, and a supplementary value following it. Used for types that need
+   * all 64 bits, such as double and uint64_t.
+   */
+  template<typename T>
+  simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept;
+}; // struct number_writer
+
+simdjson_inline void tape_writer::append_s64(int64_t value) noexcept {
+  append2(0, value, internal::tape_type::INT64);
+}
+
+simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept {
+  append(0, internal::tape_type::UINT64);
+  *next_tape_loc = value;
+  next_tape_loc++;
+}
+
+/** Write a double value to tape. */
+simdjson_inline void tape_writer::append_double(double value) noexcept {
+  append2(0, value, internal::tape_type::DOUBLE);
+}
+
+simdjson_inline void tape_writer::skip() noexcept {
+  next_tape_loc++;
+}
+
+simdjson_inline void tape_writer::skip_large_integer() noexcept {
+  next_tape_loc += 2;
+}
+
+simdjson_inline void tape_writer::skip_double() noexcept {
+  next_tape_loc += 2;
+}
+
+simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept {
+  *next_tape_loc = val | ((uint64_t(char(t))) << 56);
+  next_tape_loc++;
+}
+
+template<typename T>
+simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept {
+  append(val, t);
+  static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!");
+  memcpy(next_tape_loc, &val2, sizeof(val2));
+  next_tape_loc++;
+}
+
+simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept {
+  tape_loc = val | ((uint64_t(char(t))) << 56);
+}
+
+} // namespace stage2
+} // unnamed namespace
+} // namespace icelake
+} // namespace simdjson
+/* end file src/generic/stage2/tape_writer.h */
+
+namespace simdjson {
+namespace icelake {
+namespace {
+namespace stage2 {
+
+struct tape_builder {
+  template<bool STREAMING>
+  simdjson_warn_unused static simdjson_inline error_code parse_document(
+    dom_parser_implementation &dom_parser,
+    dom::document &doc) noexcept;
+
+  /** Called when a non-empty document starts. */
+  simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept;
+  /** Called when a non-empty document ends without error. */
+  simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept;
+
+  /** Called when a non-empty array starts. */
+  simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept;
+  /** Called when a non-empty array ends. */
+  simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept;
+  /** Called when an empty array is found. */
+  simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept;
+
+  /** Called when a non-empty object starts. */
+  simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept;
+  /**
+   * Called when a key in a field is encountered.
+   *
+   * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array
+   * will be called after this with the field value.
+   */
+  simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept;
+  /** Called when a non-empty object ends. */
+  simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept;
+  /** Called when an empty object is found. */
+  simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept;
+
+  /**
+   * Called when a string, number, boolean or null is found.
+   */
+  simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept;
+  /**
+   * Called when a string, number, boolean or null is found at the top level of a document (i.e.
+   * when there is no array or object and the entire document is a single string, number, boolean or
+   * null.
+   *
+   * This is separate from primitive() because simdjson's normal primitive parsing routines assume
+   * there is at least one more token after the value, which is only true in an array or object.
+   */
+  simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept;
+
+  simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept;
+
+  simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept;
+
+  /** Called each time a new field or element in an array or object is found. */
+  simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept;
+
+  /** Next location to write to tape */
+  tape_writer tape;
+private:
+  /** Next write location in the string buf for stage 2 parsing */
+  uint8_t *current_string_buf_loc;
+
+  simdjson_inline tape_builder(dom::document &doc) noexcept;
+
+  simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept;
+  simdjson_inline void start_container(json_iterator &iter) noexcept;
+  simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept;
+  simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept;
+  simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept;
+  simdjson_inline void on_end_string(uint8_t *dst) noexcept;
+}; // class tape_builder
+
+template<bool STREAMING>
+simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document(
+    dom_parser_implementation &dom_parser,
+    dom::document &doc) noexcept {
+  dom_parser.doc = &doc;
+  json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0);
+  tape_builder builder(doc);
+  return iter.walk_document<STREAMING>(builder);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept {
+  return iter.visit_root_primitive(*this, value);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept {
+  return iter.visit_primitive(*this, value);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept {
+  return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept {
+  return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept {
+  start_container(iter);
+  return SUCCESS;
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept {
+  start_container(iter);
+  return SUCCESS;
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept {
+  start_container(iter);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept {
+  return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept {
+  return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept {
+  constexpr uint32_t start_tape_index = 0;
+  tape.append(start_tape_index, internal::tape_type::ROOT);
+  tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT);
+  return SUCCESS;
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept {
+  return visit_string(iter, key, true);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept {
+  iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1
+  return SUCCESS;
+}
+
+simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept {
+  iter.log_value(key ? "key" : "string");
+  uint8_t *dst = on_start_string(iter);
+  dst = stringparsing::parse_string(value+1, dst);
+  if (dst == nullptr) {
+    iter.log_error("Invalid escape in string");
+    return STRING_ERROR;
+  }
+  on_end_string(dst);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept {
+  return visit_string(iter, value);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("number");
+  return numberparsing::parse_number(value, tape);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept {
+  //
+  // We need to make a copy to make sure that the string is space terminated.
+  // This is not about padding the input, which should already padded up
+  // to len + SIMDJSON_PADDING. However, we have no control at this stage
+  // on how the padding was done. What if the input string was padded with nulls?
+  // It is quite common for an input string to have an extra null character (C string).
+  // We do not want to allow 9\0 (where \0 is the null character) inside a JSON
+  // document, but the string "9\0" by itself is fine. So we make a copy and
+  // pad the input with spaces when we know that there is just one input element.
+  // This copy is relatively expensive, but it will almost never be called in
+  // practice unless you are in the strange scenario where you have many JSON
+  // documents made of single atoms.
+  //
+  std::unique_ptr<uint8_t[]>copy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]);
+  if (copy.get() == nullptr) { return MEMALLOC; }
+  std::memcpy(copy.get(), value, iter.remaining_len());
+  std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING);
+  error_code error = visit_number(iter, copy.get());
+  return error;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("true");
+  if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::TRUE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("true");
+  if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::TRUE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("false");
+  if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::FALSE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("false");
+  if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::FALSE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("null");
+  if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::NULL_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("null");
+  if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::NULL_VALUE);
+  return SUCCESS;
+}
+
+// private:
+
+simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept {
+  return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get());
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept {
+  auto start_index = next_tape_index(iter);
+  tape.append(start_index+2, start);
+  tape.append(start_index, end);
+  return SUCCESS;
+}
+
+simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept {
+  iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter);
+  iter.dom_parser.open_containers[iter.depth].count = 0;
+  tape.skip(); // We don't actually *write* the start element until the end.
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept {
+  // Write the ending tape element, pointing at the start location
+  const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index;
+  tape.append(start_tape_index, end);
+  // Write the start tape element, pointing at the end location (and including count)
+  // count can overflow if it exceeds 24 bits... so we saturate
+  // the convention being that a cnt of 0xffffff or more is undetermined in value (>=  0xffffff).
+  const uint32_t count = iter.dom_parser.open_containers[iter.depth].count;
+  const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count;
+  tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start);
+  return SUCCESS;
+}
+
+simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept {
+  // we advance the point, accounting for the fact that we have a NULL termination
+  tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING);
+  return current_string_buf_loc + sizeof(uint32_t);
+}
+
+simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept {
+  uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t)));
+  // TODO check for overflow in case someone has a crazy string (>=4GB?)
+  // But only add the overflow check when the document itself exceeds 4GB
+  // Currently unneeded because we refuse to parse docs larger or equal to 4GB.
+  memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t));
+  // NULL termination is still handy if you expect all your strings to
+  // be NULL terminated? It comes at a small cost
+  *dst = 0;
+  current_string_buf_loc = dst + 1;
+}
+
+} // namespace stage2
+} // unnamed namespace
+} // namespace icelake
+} // namespace simdjson
+/* end file src/generic/stage2/tape_builder.h */
+
+//
+// Implementation-specific overrides
+//
+namespace simdjson {
+namespace icelake {
+namespace {
+namespace stage1 {
+
+simdjson_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) {
+  if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; }
+  return find_escaped_branchless(backslash);
+}
+
+} // namespace stage1
+} // unnamed namespace
+
+simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept {
+  return icelake::stage1::json_minifier::minify<128>(buf, len, dst, dst_len);
+}
+
+simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept {
+  this->buf = _buf;
+  this->len = _len;
+  return icelake::stage1::json_structural_indexer::index<128>(_buf, _len, *this, streaming);
+}
+
+simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
+  return icelake::stage1::generic_validate_utf8(buf,len);
+}
+
+simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept {
+  return stage2::tape_builder::parse_document<false>(*this, _doc);
+}
+
+simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept {
+  return stage2::tape_builder::parse_document<true>(*this, _doc);
+}
+
+simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst) const noexcept {
+  return icelake::stringparsing::parse_string(src, dst);
+}
+
+simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept {
+  auto error = stage1(_buf, _len, stage1_mode::regular);
+  if (error) { return error; }
+  return stage2(_doc);
+}
+
+} // namespace icelake
+} // namespace simdjson
+
+/* begin file include/simdjson/icelake/end.h */
+SIMDJSON_UNTARGET_ICELAKE
+/* end file include/simdjson/icelake/end.h */
+/* end file src/icelake/dom_parser_implementation.cpp */
+#endif
+#if SIMDJSON_IMPLEMENTATION_HASWELL
+/* begin file src/haswell/implementation.cpp */
+/* begin file include/simdjson/haswell/begin.h */
+// redefining SIMDJSON_IMPLEMENTATION to "haswell"
+// #define SIMDJSON_IMPLEMENTATION haswell
+SIMDJSON_TARGET_HASWELL
+/* end file include/simdjson/haswell/begin.h */
+
+namespace simdjson {
+namespace haswell {
+
+simdjson_warn_unused error_code implementation::create_dom_parser_implementation(
+  size_t capacity,
+  size_t max_depth,
+  std::unique_ptr<internal::dom_parser_implementation>& dst
+) const noexcept {
+  dst.reset( new (std::nothrow) dom_parser_implementation() );
+  if (!dst) { return MEMALLOC; }
+  if (auto err = dst->set_capacity(capacity))
+    return err;
+  if (auto err = dst->set_max_depth(max_depth))
+    return err;
+  return SUCCESS;
+}
+
+} // namespace haswell
+} // namespace simdjson
+
+/* begin file include/simdjson/haswell/end.h */
+SIMDJSON_UNTARGET_HASWELL
+/* end file include/simdjson/haswell/end.h */
+
+/* end file src/haswell/implementation.cpp */
+/* begin file src/haswell/dom_parser_implementation.cpp */
+/* begin file include/simdjson/haswell/begin.h */
+// redefining SIMDJSON_IMPLEMENTATION to "haswell"
+// #define SIMDJSON_IMPLEMENTATION haswell
+SIMDJSON_TARGET_HASWELL
+/* end file include/simdjson/haswell/begin.h */
+
+//
+// Stage 1
+//
+
+namespace simdjson {
+namespace haswell {
+namespace {
+
+using namespace simd;
+
+struct json_character_block {
+  static simdjson_inline json_character_block classify(const simd::simd8x64<uint8_t>& in);
+  //  ASCII white-space ('\r','\n','\t',' ')
+  simdjson_inline uint64_t whitespace() const noexcept;
+  // non-quote structural characters (comma, colon, braces, brackets)
+  simdjson_inline uint64_t op() const noexcept;
+  // neither a structural character nor a white-space, so letters, numbers and quotes
+  simdjson_inline uint64_t scalar() const noexcept;
+
+  uint64_t _whitespace; // ASCII white-space ('\r','\n','\t',' ')
+  uint64_t _op; // structural characters (comma, colon, braces, brackets but not quotes)
+};
+
+simdjson_inline uint64_t json_character_block::whitespace() const noexcept { return _whitespace; }
+simdjson_inline uint64_t json_character_block::op() const noexcept { return _op; }
+simdjson_inline uint64_t json_character_block::scalar() const noexcept { return ~(op() | whitespace()); }
+
+// This identifies structural characters (comma, colon, braces, brackets),
+// and ASCII white-space ('\r','\n','\t',' ').
+simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64<uint8_t>& in) {
+  // These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why
+  // we can't use the generic lookup_16.
+  const auto whitespace_table = simd8<uint8_t>::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100);
+
+  // The 6 operators (:,[]{}) have these values:
+  //
+  // , 2C
+  // : 3A
+  // [ 5B
+  // { 7B
+  // ] 5D
+  // } 7D
+  //
+  // If you use | 0x20 to turn [ and ] into { and }, the lower 4 bits of each character is unique.
+  // We exploit this, using a simd 4-bit lookup to tell us which character match against, and then
+  // match it (against | 0x20).
+  //
+  // To prevent recognizing other characters, everything else gets compared with 0, which cannot
+  // match due to the | 0x20.
+  //
+  // NOTE: Due to the | 0x20, this ALSO treats <FF> and <SUB> (control characters 0C and 1A) like ,
+  // and :. This gets caught in stage 2, which checks the actual character to ensure the right
+  // operators are in the right places.
+  const auto op_table = simd8<uint8_t>::repeat_16(
+    0, 0, 0, 0,
+    0, 0, 0, 0,
+    0, 0, ':', '{', // : = 3A, [ = 5B, { = 7B
+    ',', '}', 0, 0  // , = 2C, ] = 5D, } = 7D
+  );
+
+  // We compute whitespace and op separately. If later code only uses one or the
+  // other, given the fact that all functions are aggressively inlined, we can
+  // hope that useless computations will be omitted. This is namely case when
+  // minifying (we only need whitespace).
+
+  const uint64_t whitespace = in.eq({
+    _mm256_shuffle_epi8(whitespace_table, in.chunks[0]),
+    _mm256_shuffle_epi8(whitespace_table, in.chunks[1])
+  });
+  // Turn [ and ] into { and }
+  const simd8x64<uint8_t> curlified{
+    in.chunks[0] | 0x20,
+    in.chunks[1] | 0x20
+  };
+  const uint64_t op = curlified.eq({
+    _mm256_shuffle_epi8(op_table, in.chunks[0]),
+    _mm256_shuffle_epi8(op_table, in.chunks[1])
+  });
+
+  return { whitespace, op };
+}
+
+simdjson_inline bool is_ascii(const simd8x64<uint8_t>& input) {
+  return input.reduce_or().is_ascii();
+}
+
+simdjson_unused simdjson_inline simd8<bool> must_be_continuation(const simd8<uint8_t> prev1, const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
+  simd8<uint8_t> is_second_byte = prev1.saturating_sub(0xc0u-1); // Only 11______ will be > 0
+  simd8<uint8_t> is_third_byte  = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0
+  simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0
+  // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine.
+  return simd8<int8_t>(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0);
+}
+
+simdjson_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
+  simd8<uint8_t> is_third_byte  = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0
+  simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0
+  // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine.
+  return simd8<int8_t>(is_third_byte | is_fourth_byte) > int8_t(0);
+}
+
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdjson
+
+/* begin file src/generic/stage1/utf8_lookup4_algorithm.h */
+namespace simdjson {
+namespace haswell {
+namespace {
+namespace utf8_validation {
+
+using namespace simd;
+
+  simdjson_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
+// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII)
+// Bit 1 = Too Long (ASCII followed by continuation)
+// Bit 2 = Overlong 3-byte
+// Bit 4 = Surrogate
+// Bit 5 = Overlong 2-byte
+// Bit 7 = Two Continuations
+    constexpr const uint8_t TOO_SHORT   = 1<<0; // 11______ 0_______
+                                                // 11______ 11______
+    constexpr const uint8_t TOO_LONG    = 1<<1; // 0_______ 10______
+    constexpr const uint8_t OVERLONG_3  = 1<<2; // 11100000 100_____
+    constexpr const uint8_t SURROGATE   = 1<<4; // 11101101 101_____
+    constexpr const uint8_t OVERLONG_2  = 1<<5; // 1100000_ 10______
+    constexpr const uint8_t TWO_CONTS   = 1<<7; // 10______ 10______
+    constexpr const uint8_t TOO_LARGE   = 1<<3; // 11110100 1001____
+                                                // 11110100 101_____
+                                                // 11110101 1001____
+                                                // 11110101 101_____
+                                                // 1111011_ 1001____
+                                                // 1111011_ 101_____
+                                                // 11111___ 1001____
+                                                // 11111___ 101_____
+    constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
+                                                // 11110101 1000____
+                                                // 1111011_ 1000____
+                                                // 11111___ 1000____
+    constexpr const uint8_t OVERLONG_4  = 1<<6; // 11110000 1000____
+
+    const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
+      // 0_______ ________ <ASCII in byte 1>
+      TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
+      TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
+      // 10______ ________ <continuation in byte 1>
+      TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS,
+      // 1100____ ________ <two byte lead in byte 1>
+      TOO_SHORT | OVERLONG_2,
+      // 1101____ ________ <two byte lead in byte 1>
+      TOO_SHORT,
+      // 1110____ ________ <three byte lead in byte 1>
+      TOO_SHORT | OVERLONG_3 | SURROGATE,
+      // 1111____ ________ <four+ byte lead in byte 1>
+      TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4
+    );
+    constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
+    const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
+      // ____0000 ________
+      CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4,
+      // ____0001 ________
+      CARRY | OVERLONG_2,
+      // ____001_ ________
+      CARRY,
+      CARRY,
+
+      // ____0100 ________
+      CARRY | TOO_LARGE,
+      // ____0101 ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      // ____011_ ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+
+      // ____1___ ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      // ____1101 ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000
+    );
+    const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
+      // ________ 0_______ <ASCII in byte 2>
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
+
+      // ________ 1000____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4,
+      // ________ 1001____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE,
+      // ________ 101_____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE  | TOO_LARGE,
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE  | TOO_LARGE,
+
+      // ________ 11______
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT
+    );
+    return (byte_1_high & byte_1_low & byte_2_high);
+  }
+  simdjson_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
+      const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
+    simd8<uint8_t> prev2 = input.prev<2>(prev_input);
+    simd8<uint8_t> prev3 = input.prev<3>(prev_input);
+    simd8<uint8_t> must23 = simd8<uint8_t>(must_be_2_3_continuation(prev2, prev3));
+    simd8<uint8_t> must23_80 = must23 & uint8_t(0x80);
+    return must23_80 ^ sc;
+  }
+
+  //
+  // Return nonzero if there are incomplete multibyte characters at the end of the block:
+  // e.g. if there is a 4-byte character, but it's 3 bytes from the end.
+  //
+  simdjson_inline simd8<uint8_t> is_incomplete(const simd8<uint8_t> input) {
+    // If the previous input's last 3 bytes match this, they're too short (they ended at EOF):
+    // ... 1111____ 111_____ 11______
+#if SIMDJSON_IMPLEMENTATION_ICELAKE
+    static const uint8_t max_array[64] = {
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1
+    };
+#else
+    static const uint8_t max_array[32] = {
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1
+    };
+#endif
+    const simd8<uint8_t> max_value(&max_array[sizeof(max_array)-sizeof(simd8<uint8_t>)]);
+    return input.gt_bits(max_value);
+  }
+
+  struct utf8_checker {
+    // If this is nonzero, there has been a UTF-8 error.
+    simd8<uint8_t> error;
+    // The last input we received
+    simd8<uint8_t> prev_input_block;
+    // Whether the last input we received was incomplete (used for ASCII fast path)
+    simd8<uint8_t> prev_incomplete;
+
+    //
+    // Check whether the current bytes are valid UTF-8.
+    //
+    simdjson_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
+      // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes
+      // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers)
+      simd8<uint8_t> prev1 = input.prev<1>(prev_input);
+      simd8<uint8_t> sc = check_special_cases(input, prev1);
+      this->error |= check_multibyte_lengths(input, prev_input, sc);
+    }
+
+    // The only problem that can happen at EOF is that a multibyte character is too short
+    // or a byte value too large in the last bytes: check_special_cases only checks for bytes
+    // too large in the first of two bytes.
+    simdjson_inline void check_eof() {
+      // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't
+      // possibly finish them.
+      this->error |= this->prev_incomplete;
+    }
+
+#ifndef SIMDJSON_IF_CONSTEXPR
+#if SIMDJSON_CPLUSPLUS17
+#define SIMDJSON_IF_CONSTEXPR if constexpr
+#else
+#define SIMDJSON_IF_CONSTEXPR if
+#endif
+#endif
+
+    simdjson_inline void check_next_input(const simd8x64<uint8_t>& input) {
+      if(simdjson_likely(is_ascii(input))) {
+        this->error |= this->prev_incomplete;
+      } else {
+        // you might think that a for-loop would work, but under Visual Studio, it is not good enough.
+        static_assert((simd8x64<uint8_t>::NUM_CHUNKS == 1)
+                ||(simd8x64<uint8_t>::NUM_CHUNKS == 2)
+                || (simd8x64<uint8_t>::NUM_CHUNKS == 4),
+                "We support one, two or four chunks per 64-byte block.");
+        SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 1) {
+          this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
+        } else SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 2) {
+          this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
+          this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+        } else SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 4) {
+          this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
+          this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+          this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
+          this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
+        }
+        this->prev_incomplete = is_incomplete(input.chunks[simd8x64<uint8_t>::NUM_CHUNKS-1]);
+        this->prev_input_block = input.chunks[simd8x64<uint8_t>::NUM_CHUNKS-1];
+      }
+    }
+    // do not forget to call check_eof!
+    simdjson_inline error_code errors() {
+      return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS;
+    }
+
+  }; // struct utf8_checker
+} // namespace utf8_validation
+
+using utf8_validation::utf8_checker;
+
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdjson
+/* end file src/generic/stage1/utf8_lookup4_algorithm.h */
+/* begin file src/generic/stage1/json_structural_indexer.h */
+// This file contains the common code every implementation uses in stage1
+// It is intended to be included multiple times and compiled multiple times
+// We assume the file in which it is included already includes
+// "simdjson/stage1.h" (this simplifies amalgation)
+
+/* begin file src/generic/stage1/buf_block_reader.h */
+namespace simdjson {
+namespace haswell {
+namespace {
+
+// Walks through a buffer in block-sized increments, loading the last part with spaces
+template<size_t STEP_SIZE>
+struct buf_block_reader {
+public:
+  simdjson_inline buf_block_reader(const uint8_t *_buf, size_t _len);
+  simdjson_inline size_t block_index();
+  simdjson_inline bool has_full_block() const;
+  simdjson_inline const uint8_t *full_block() const;
+  /**
+   * Get the last block, padded with spaces.
+   *
+   * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this
+   * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there
+   * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding.
+   *
+   * @return the number of effective characters in the last block.
+   */
+  simdjson_inline size_t get_remainder(uint8_t *dst) const;
+  simdjson_inline void advance();
+private:
+  const uint8_t *buf;
+  const size_t len;
+  const size_t lenminusstep;
+  size_t idx;
+};
+
+// Routines to print masks and text for debugging bitmask operations
+simdjson_unused static char * format_input_text_64(const uint8_t *text) {
+  static char buf[sizeof(simd8x64<uint8_t>) + 1];
+  for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
+    buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]);
+  }
+  buf[sizeof(simd8x64<uint8_t>)] = '\0';
+  return buf;
+}
+
+// Routines to print masks and text for debugging bitmask operations
+simdjson_unused static char * format_input_text(const simd8x64<uint8_t>& in) {
+  static char buf[sizeof(simd8x64<uint8_t>) + 1];
+  in.store(reinterpret_cast<uint8_t*>(buf));
+  for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
+    if (buf[i] < ' ') { buf[i] = '_'; }
+  }
+  buf[sizeof(simd8x64<uint8_t>)] = '\0';
+  return buf;
+}
+
+simdjson_unused static char * format_mask(uint64_t mask) {
+  static char buf[sizeof(simd8x64<uint8_t>) + 1];
+  for (size_t i=0; i<64; i++) {
+    buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' ';
+  }
+  buf[64] = '\0';
+  return buf;
+}
+
+template<size_t STEP_SIZE>
+simdjson_inline buf_block_reader<STEP_SIZE>::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {}
+
+template<size_t STEP_SIZE>
+simdjson_inline size_t buf_block_reader<STEP_SIZE>::block_index() { return idx; }
+
+template<size_t STEP_SIZE>
+simdjson_inline bool buf_block_reader<STEP_SIZE>::has_full_block() const {
+  return idx < lenminusstep;
+}
+
+template<size_t STEP_SIZE>
+simdjson_inline const uint8_t *buf_block_reader<STEP_SIZE>::full_block() const {
+  return &buf[idx];
+}
+
+template<size_t STEP_SIZE>
+simdjson_inline size_t buf_block_reader<STEP_SIZE>::get_remainder(uint8_t *dst) const {
+  if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers
+  std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once.
+  std::memcpy(dst, buf + idx, len - idx);
+  return len - idx;
+}
+
+template<size_t STEP_SIZE>
+simdjson_inline void buf_block_reader<STEP_SIZE>::advance() {
+  idx += STEP_SIZE;
+}
+
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdjson
+/* end file src/generic/stage1/buf_block_reader.h */
+/* begin file src/generic/stage1/json_string_scanner.h */
+namespace simdjson {
+namespace haswell {
+namespace {
+namespace stage1 {
+
+struct json_string_block {
+  // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017
+  simdjson_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) :
+  _backslash(backslash), _escaped(escaped), _quote(quote), _in_string(in_string) {}
+
+  // Escaped characters (characters following an escape() character)
+  simdjson_inline uint64_t escaped() const { return _escaped; }
+  // Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \)
+  simdjson_inline uint64_t escape() const { return _backslash & ~_escaped; }
+  // Real (non-backslashed) quotes
+  simdjson_inline uint64_t quote() const { return _quote; }
+  // Start quotes of strings
+  simdjson_inline uint64_t string_start() const { return _quote & _in_string; }
+  // End quotes of strings
+  simdjson_inline uint64_t string_end() const { return _quote & ~_in_string; }
+  // Only characters inside the string (not including the quotes)
+  simdjson_inline uint64_t string_content() const { return _in_string & ~_quote; }
+  // Return a mask of whether the given characters are inside a string (only works on non-quotes)
+  simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; }
+  // Return a mask of whether the given characters are inside a string (only works on non-quotes)
+  simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; }
+  // Tail of string (everything except the start quote)
+  simdjson_inline uint64_t string_tail() const { return _in_string ^ _quote; }
+
+  // backslash characters
+  uint64_t _backslash;
+  // escaped characters (backslashed--does not include the hex characters after \u)
+  uint64_t _escaped;
+  // real quotes (non-backslashed ones)
+  uint64_t _quote;
+  // string characters (includes start quote but not end quote)
+  uint64_t _in_string;
+};
+
+// Scans blocks for string characters, storing the state necessary to do so
+class json_string_scanner {
+public:
+  simdjson_inline json_string_block next(const simd::simd8x64<uint8_t>& in);
+  // Returns either UNCLOSED_STRING or SUCCESS
+  simdjson_inline error_code finish();
+
+private:
+  // Intended to be defined by the implementation
+  simdjson_inline uint64_t find_escaped(uint64_t escape);
+  simdjson_inline uint64_t find_escaped_branchless(uint64_t escape);
+
+  // Whether the last iteration was still inside a string (all 1's = true, all 0's = false).
+  uint64_t prev_in_string = 0ULL;
+  // Whether the first character of the next iteration is escaped.
+  uint64_t prev_escaped = 0ULL;
+};
+
+//
+// Finds escaped characters (characters following \).
+//
+// Handles runs of backslashes like \\\" and \\\\" correctly (yielding 0101 and 01010, respectively).
+//
+// Does this by:
+// - Shift the escape mask to get potentially escaped characters (characters after backslashes).
+// - Mask escaped sequences that start on *even* bits with 1010101010 (odd bits are escaped, even bits are not)
+// - Mask escaped sequences that start on *odd* bits with 0101010101 (even bits are escaped, odd bits are not)
+//
+// To distinguish between escaped sequences starting on even/odd bits, it finds the start of all
+// escape sequences, filters out the ones that start on even bits, and adds that to the mask of
+// escape sequences. This causes the addition to clear out the sequences starting on odd bits (since
+// the start bit causes a carry), and leaves even-bit sequences alone.
+//
+// Example:
+//
+// text           |  \\\ | \\\"\\\" \\\" \\"\\" |
+// escape         |  xxx |  xx xxx  xxx  xx xx  | Removed overflow backslash; will | it into follows_escape
+// odd_starts     |  x   |  x       x       x   | escape & ~even_bits & ~follows_escape
+// even_seq       |     c|    cxxx     c xx   c | c = carry bit -- will be masked out later
+// invert_mask    |      |     cxxx     c xx   c| even_seq << 1
+// follows_escape |   xx | x xx xxx  xxx  xx xx | Includes overflow bit
+// escaped        |   x  | x x  x x  x x  x  x  |
+// desired        |   x  | x x  x x  x x  x  x  |
+// text           |  \\\ | \\\"\\\" \\\" \\"\\" |
+//
+simdjson_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) {
+  // If there was overflow, pretend the first character isn't a backslash
+  backslash &= ~prev_escaped;
+  uint64_t follows_escape = backslash << 1 | prev_escaped;
+
+  // Get sequences starting on even bits by clearing out the odd series using +
+  const uint64_t even_bits = 0x5555555555555555ULL;
+  uint64_t odd_sequence_starts = backslash & ~even_bits & ~follows_escape;
+  uint64_t sequences_starting_on_even_bits;
+  prev_escaped = add_overflow(odd_sequence_starts, backslash, &sequences_starting_on_even_bits);
+  uint64_t invert_mask = sequences_starting_on_even_bits << 1; // The mask we want to return is the *escaped* bits, not escapes.
+
+  // Mask every other backslashed character as an escaped character
+  // Flip the mask for sequences that start on even bits, to correct them
+  return (even_bits ^ invert_mask) & follows_escape;
+}
+
+//
+// Return a mask of all string characters plus end quotes.
+//
+// prev_escaped is overflow saying whether the next character is escaped.
+// prev_in_string is overflow saying whether we're still in a string.
+//
+// Backslash sequences outside of quotes will be detected in stage 2.
+//
+simdjson_inline json_string_block json_string_scanner::next(const simd::simd8x64<uint8_t>& in) {
+  const uint64_t backslash = in.eq('\\');
+  const uint64_t escaped = find_escaped(backslash);
+  const uint64_t quote = in.eq('"') & ~escaped;
+
+  //
+  // prefix_xor flips on bits inside the string (and flips off the end quote).
+  //
+  // Then we xor with prev_in_string: if we were in a string already, its effect is flipped
+  // (characters inside strings are outside, and characters outside strings are inside).
+  //
+  const uint64_t in_string = prefix_xor(quote) ^ prev_in_string;
+
+  //
+  // Check if we're still in a string at the end of the box so the next block will know
+  //
+  // right shift of a signed value expected to be well-defined and standard
+  // compliant as of C++20, John Regher from Utah U. says this is fine code
+  //
+  prev_in_string = uint64_t(static_cast<int64_t>(in_string) >> 63);
+
+  // Use ^ to turn the beginning quote off, and the end quote on.
+
+  // We are returning a function-local object so either we get a move constructor
+  // or we get copy elision.
+  return json_string_block(
+    backslash,
+    escaped,
+    quote,
+    in_string
+  );
+}
+
+simdjson_inline error_code json_string_scanner::finish() {
+  if (prev_in_string) {
+    return UNCLOSED_STRING;
+  }
+  return SUCCESS;
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdjson
+/* end file src/generic/stage1/json_string_scanner.h */
+/* begin file src/generic/stage1/json_scanner.h */
+namespace simdjson {
+namespace haswell {
+namespace {
+namespace stage1 {
+
+/**
+ * A block of scanned json, with information on operators and scalars.
+ *
+ * We seek to identify pseudo-structural characters. Anything that is inside
+ * a string must be omitted (hence  & ~_string.string_tail()).
+ * Otherwise, pseudo-structural characters come in two forms.
+ * 1. We have the structural characters ([,],{,},:, comma). The
+ *    term 'structural character' is from the JSON RFC.
+ * 2. We have the 'scalar pseudo-structural characters'.
+ *    Scalars are quotes, and any character except structural characters and white space.
+ *
+ * To identify the scalar pseudo-structural characters, we must look at what comes
+ * before them: it must be a space, a quote or a structural characters.
+ * Starting with simdjson v0.3, we identify them by
+ * negation: we identify everything that is followed by a non-quote scalar,
+ * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'.
+ */
+struct json_block {
+public:
+  // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017
+  simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) :
+  _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {}
+  simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) :
+  _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {}
+
+  /**
+   * The start of structurals.
+   * In simdjson prior to v0.3, these were called the pseudo-structural characters.
+   **/
+  simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); }
+  /** All JSON whitespace (i.e. not in a string) */
+  simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); }
+
+  // Helpers
+
+  /** Whether the given characters are inside a string (only works on non-quotes) */
+  simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); }
+  /** Whether the given characters are outside a string (only works on non-quotes) */
+  simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); }
+
+  // string and escape characters
+  json_string_block _string;
+  // whitespace, structural characters ('operators'), scalars
+  json_character_block _characters;
+  // whether the previous character was a scalar
+  uint64_t _follows_potential_nonquote_scalar;
+private:
+  // Potential structurals (i.e. disregarding strings)
+
+  /**
+   * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc".
+   * They may reside inside a string.
+   **/
+  simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); }
+  /**
+   * The start of non-operator runs, like 123, true and "abc".
+   * It main reside inside a string.
+   **/
+  simdjson_inline uint64_t potential_scalar_start() const noexcept {
+    // The term "scalar" refers to anything except structural characters and white space
+    // (so letters, numbers, quotes).
+    // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space
+    // then we know that it is irrelevant structurally.
+    return _characters.scalar() & ~follows_potential_scalar();
+  }
+  /**
+   * Whether the given character is immediately after a non-operator like 123, true.
+   * The characters following a quote are not included.
+   */
+  simdjson_inline uint64_t follows_potential_scalar() const noexcept {
+    // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character
+    // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a
+    // white space.
+    // It is understood that within quoted region, anything at all could be marked (irrelevant).
+    return _follows_potential_nonquote_scalar;
+  }
+};
+
+/**
+ * Scans JSON for important bits: structural characters or 'operators', strings, and scalars.
+ *
+ * The scanner starts by calculating two distinct things:
+ * - string characters (taking \" into account)
+ * - structural characters or 'operators' ([]{},:, comma)
+ *   and scalars (runs of non-operators like 123, true and "abc")
+ *
+ * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel:
+ * in particular, the operator/scalar bit will find plenty of things that are actually part of
+ * strings. When we're done, json_block will fuse the two together by masking out tokens that are
+ * part of a string.
+ */
+class json_scanner {
+public:
+  json_scanner() = default;
+  simdjson_inline json_block next(const simd::simd8x64<uint8_t>& in);
+  // Returns either UNCLOSED_STRING or SUCCESS
+  simdjson_inline error_code finish();
+
+private:
+  // Whether the last character of the previous iteration is part of a scalar token
+  // (anything except whitespace or a structural character/'operator').
+  uint64_t prev_scalar = 0ULL;
+  json_string_scanner string_scanner{};
+};
+
+
+//
+// Check if the current character immediately follows a matching character.
+//
+// For example, this checks for quotes with backslashes in front of them:
+//
+//     const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash);
+//
+simdjson_inline uint64_t follows(const uint64_t match, uint64_t &overflow) {
+  const uint64_t result = match << 1 | overflow;
+  overflow = match >> 63;
+  return result;
+}
+
+simdjson_inline json_block json_scanner::next(const simd::simd8x64<uint8_t>& in) {
+  json_string_block strings = string_scanner.next(in);
+  // identifies the white-space and the structural characters
+  json_character_block characters = json_character_block::classify(in);
+  // The term "scalar" refers to anything except structural characters and white space
+  // (so letters, numbers, quotes).
+  // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers).
+  //
+  // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon)
+  // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential
+  // pseudo-structural character just like we would if we had  ' "a string" true '; otherwise we
+  // may need to add an extra check when parsing strings.
+  //
+  // Performance: there are many ways to skin this cat.
+  const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote();
+  uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar);
+  // We are returning a function-local object so either we get a move constructor
+  // or we get copy elision.
+  return json_block(
+    strings,// strings is a function-local object so either it moves or the copy is elided.
+    characters,
+    follows_nonquote_scalar
+  );
+}
+
+simdjson_inline error_code json_scanner::finish() {
+  return string_scanner.finish();
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdjson
+/* end file src/generic/stage1/json_scanner.h */
+/* begin file src/generic/stage1/json_minifier.h */
+// This file contains the common code every implementation uses in stage1
+// It is intended to be included multiple times and compiled multiple times
+// We assume the file in which it is included already includes
+// "simdjson/stage1.h" (this simplifies amalgation)
+
+namespace simdjson {
+namespace haswell {
+namespace {
+namespace stage1 {
+
+class json_minifier {
+public:
+  template<size_t STEP_SIZE>
+  static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept;
+
+private:
+  simdjson_inline json_minifier(uint8_t *_dst)
+  : dst{_dst}
+  {}
+  template<size_t STEP_SIZE>
+  simdjson_inline void step(const uint8_t *block_buf, buf_block_reader<STEP_SIZE> &reader) noexcept;
+  simdjson_inline void next(const simd::simd8x64<uint8_t>& in, const json_block& block);
+  simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len);
+  json_scanner scanner{};
+  uint8_t *dst;
+};
+
+simdjson_inline void json_minifier::next(const simd::simd8x64<uint8_t>& in, const json_block& block) {
+  uint64_t mask = block.whitespace();
+  dst += in.compress(mask, dst);
+}
+
+simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) {
+  error_code error = scanner.finish();
+  if (error) { dst_len = 0; return error; }
+  dst_len = dst - dst_start;
+  return SUCCESS;
+}
+
+template<>
+simdjson_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept {
+  simd::simd8x64<uint8_t> in_1(block_buf);
+  simd::simd8x64<uint8_t> in_2(block_buf+64);
+  json_block block_1 = scanner.next(in_1);
+  json_block block_2 = scanner.next(in_2);
+  this->next(in_1, block_1);
+  this->next(in_2, block_2);
+  reader.advance();
+}
+
+template<>
+simdjson_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept {
+  simd::simd8x64<uint8_t> in_1(block_buf);
+  json_block block_1 = scanner.next(in_1);
+  this->next(block_buf, block_1);
+  reader.advance();
+}
+
+template<size_t STEP_SIZE>
+error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept {
+  buf_block_reader<STEP_SIZE> reader(buf, len);
+  json_minifier minifier(dst);
+
+  // Index the first n-1 blocks
+  while (reader.has_full_block()) {
+    minifier.step<STEP_SIZE>(reader.full_block(), reader);
+  }
+
+  // Index the last (remainder) block, padded with spaces
+  uint8_t block[STEP_SIZE];
+  size_t remaining_bytes = reader.get_remainder(block);
+  if (remaining_bytes > 0) {
+    // We do not want to write directly to the output stream. Rather, we write
+    // to a local buffer (for safety).
+    uint8_t out_block[STEP_SIZE];
+    uint8_t * const guarded_dst{minifier.dst};
+    minifier.dst = out_block;
+    minifier.step<STEP_SIZE>(block, reader);
+    size_t to_write = minifier.dst - out_block;
+    // In some cases, we could be enticed to consider the padded spaces
+    // as part of the string. This is fine as long as we do not write more
+    // than we consumed.
+    if(to_write > remaining_bytes) { to_write = remaining_bytes; }
+    memcpy(guarded_dst, out_block, to_write);
+    minifier.dst = guarded_dst + to_write;
+  }
+  return minifier.finish(dst, dst_len);
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdjson
+/* end file src/generic/stage1/json_minifier.h */
+/* begin file src/generic/stage1/find_next_document_index.h */
+namespace simdjson {
+namespace haswell {
+namespace {
+
+/**
+  * This algorithm is used to quickly identify the last structural position that
+  * makes up a complete document.
+  *
+  * It does this by going backwards and finding the last *document boundary* (a
+  * place where one value follows another without a comma between them). If the
+  * last document (the characters after the boundary) has an equal number of
+  * start and end brackets, it is considered complete.
+  *
+  * Simply put, we iterate over the structural characters, starting from
+  * the end. We consider that we found the end of a JSON document when the
+  * first element of the pair is NOT one of these characters: '{' '[' ':' ','
+  * and when the second element is NOT one of these characters: '}' ']' ':' ','.
+  *
+  * This simple comparison works most of the time, but it does not cover cases
+  * where the batch's structural indexes contain a perfect amount of documents.
+  * In such a case, we do not have access to the structural index which follows
+  * the last document, therefore, we do not have access to the second element in
+  * the pair, and that means we cannot identify the last document. To fix this
+  * issue, we keep a count of the open and closed curly/square braces we found
+  * while searching for the pair. When we find a pair AND the count of open and
+  * closed curly/square braces is the same, we know that we just passed a
+  * complete document, therefore the last json buffer location is the end of the
+  * batch.
+  */
+simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) {
+  // Variant: do not count separately, just figure out depth
+  if(parser.n_structural_indexes == 0) { return 0; }
+  auto arr_cnt = 0;
+  auto obj_cnt = 0;
+  for (auto i = parser.n_structural_indexes - 1; i > 0; i--) {
+    auto idxb = parser.structural_indexes[i];
+    switch (parser.buf[idxb]) {
+    case ':':
+    case ',':
+      continue;
+    case '}':
+      obj_cnt--;
+      continue;
+    case ']':
+      arr_cnt--;
+      continue;
+    case '{':
+      obj_cnt++;
+      break;
+    case '[':
+      arr_cnt++;
+      break;
+    }
+    auto idxa = parser.structural_indexes[i - 1];
+    switch (parser.buf[idxa]) {
+    case '{':
+    case '[':
+    case ':':
+    case ',':
+      continue;
+    }
+    // Last document is complete, so the next document will appear after!
+    if (!arr_cnt && !obj_cnt) {
+      return parser.n_structural_indexes;
+    }
+    // Last document is incomplete; mark the document at i + 1 as the next one
+    return i;
+  }
+  // If we made it to the end, we want to finish counting to see if we have a full document.
+  switch (parser.buf[parser.structural_indexes[0]]) {
+    case '}':
+      obj_cnt--;
+      break;
+    case ']':
+      arr_cnt--;
+      break;
+    case '{':
+      obj_cnt++;
+      break;
+    case '[':
+      arr_cnt++;
+      break;
+  }
+  if (!arr_cnt && !obj_cnt) {
+    // We have a complete document.
+    return parser.n_structural_indexes;
+  }
+  return 0;
+}
+
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdjson
+/* end file src/generic/stage1/find_next_document_index.h */
+
+namespace simdjson {
+namespace haswell {
+namespace {
+namespace stage1 {
+
+class bit_indexer {
+public:
+  uint32_t *tail;
+
+  simdjson_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {}
+
+  // flatten out values in 'bits' assuming that they are are to have values of idx
+  // plus their position in the bitvector, and store these indexes at
+  // base_ptr[base] incrementing base as we go
+  // will potentially store extra values beyond end of valid bits, so base_ptr
+  // needs to be large enough to handle this
+  //
+  // If the kernel sets SIMDJSON_CUSTOM_BIT_INDEXER, then it will provide its own
+  // version of the code.
+#ifdef SIMDJSON_CUSTOM_BIT_INDEXER
+  simdjson_inline void write(uint32_t idx, uint64_t bits);
+#else
+  simdjson_inline void write(uint32_t idx, uint64_t bits) {
+    // In some instances, the next branch is expensive because it is mispredicted.
+    // Unfortunately, in other cases,
+    // it helps tremendously.
+    if (bits == 0)
+        return;
+#if defined(SIMDJSON_PREFER_REVERSE_BITS)
+    /**
+     * ARM lacks a fast trailing zero instruction, but it has a fast
+     * bit reversal instruction and a fast leading zero instruction.
+     * Thus it may be profitable to reverse the bits (once) and then
+     * to rely on a sequence of instructions that call the leading
+     * zero instruction.
+     *
+     * Performance notes:
+     * The chosen routine is not optimal in terms of data dependency
+     * since zero_leading_bit might require two instructions. However,
+     * it tends to minimize the total number of instructions which is
+     * beneficial.
+     */
+
+    uint64_t rev_bits = reverse_bits(bits);
+    int cnt = static_cast<int>(count_ones(bits));
+    int i = 0;
+    // Do the first 8 all together
+    for (; i<8; i++) {
+      int lz = leading_zeroes(rev_bits);
+      this->tail[i] = static_cast<uint32_t>(idx) + lz;
+      rev_bits = zero_leading_bit(rev_bits, lz);
+    }
+    // Do the next 8 all together (we hope in most cases it won't happen at all
+    // and the branch is easily predicted).
+    if (simdjson_unlikely(cnt > 8)) {
+      i = 8;
+      for (; i<16; i++) {
+        int lz = leading_zeroes(rev_bits);
+        this->tail[i] = static_cast<uint32_t>(idx) + lz;
+        rev_bits = zero_leading_bit(rev_bits, lz);
+      }
+
+
+      // Most files don't have 16+ structurals per block, so we take several basically guaranteed
+      // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :)
+      // or the start of a value ("abc" true 123) every four characters.
+      if (simdjson_unlikely(cnt > 16)) {
+        i = 16;
+        while (rev_bits != 0) {
+          int lz = leading_zeroes(rev_bits);
+          this->tail[i++] = static_cast<uint32_t>(idx) + lz;
+          rev_bits = zero_leading_bit(rev_bits, lz);
+        }
+      }
+    }
+    this->tail += cnt;
+#else // SIMDJSON_PREFER_REVERSE_BITS
+    /**
+     * Under recent x64 systems, we often have both a fast trailing zero
+     * instruction and a fast 'clear-lower-bit' instruction so the following
+     * algorithm can be competitive.
+     */
+
+    int cnt = static_cast<int>(count_ones(bits));
+    // Do the first 8 all together
+    for (int i=0; i<8; i++) {
+      this->tail[i] = idx + trailing_zeroes(bits);
+      bits = clear_lowest_bit(bits);
+    }
+
+    // Do the next 8 all together (we hope in most cases it won't happen at all
+    // and the branch is easily predicted).
+    if (simdjson_unlikely(cnt > 8)) {
+      for (int i=8; i<16; i++) {
+        this->tail[i] = idx + trailing_zeroes(bits);
+        bits = clear_lowest_bit(bits);
+      }
+
+      // Most files don't have 16+ structurals per block, so we take several basically guaranteed
+      // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :)
+      // or the start of a value ("abc" true 123) every four characters.
+      if (simdjson_unlikely(cnt > 16)) {
+        int i = 16;
+        do {
+          this->tail[i] = idx + trailing_zeroes(bits);
+          bits = clear_lowest_bit(bits);
+          i++;
+        } while (i < cnt);
+      }
+    }
+
+    this->tail += cnt;
+#endif
+  }
+#endif // SIMDJSON_CUSTOM_BIT_INDEXER
+
+};
+
+class json_structural_indexer {
+public:
+  /**
+   * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes.
+   *
+   * @param partial Setting the partial parameter to true allows the find_structural_bits to
+   *   tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If
+   *   you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8.
+   */
+  template<size_t STEP_SIZE>
+  static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept;
+
+private:
+  simdjson_inline json_structural_indexer(uint32_t *structural_indexes);
+  template<size_t STEP_SIZE>
+  simdjson_inline void step(const uint8_t *block, buf_block_reader<STEP_SIZE> &reader) noexcept;
+  simdjson_inline void next(const simd::simd8x64<uint8_t>& in, const json_block& block, size_t idx);
+  simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial);
+
+  json_scanner scanner{};
+  utf8_checker checker{};
+  bit_indexer indexer;
+  uint64_t prev_structurals = 0;
+  uint64_t unescaped_chars_error = 0;
+};
+
+simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {}
+
+// Skip the last character if it is partial
+simdjson_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) {
+  if (simdjson_unlikely(len < 3)) {
+    switch (len) {
+      case 2:
+        if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left
+        if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 2 bytes left
+        return len;
+      case 1:
+        if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left
+        return len;
+      case 0:
+        return len;
+    }
+  }
+  if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left
+  if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 1 byte left
+  if (buf[len-3] >= 0xf0) { return len-3; } // 4-byte characters with only 3 bytes left
+  return len;
+}
+
+//
+// PERF NOTES:
+// We pipe 2 inputs through these stages:
+// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load
+//    2 inputs' worth at once so that by the time step 2 is looking for them input, it's available.
+// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path.
+//    The output of step 1 depends entirely on this information. These functions don't quite use
+//    up enough CPU: the second half of the functions is highly serial, only using 1 execution core
+//    at a time. The second input's scans has some dependency on the first ones finishing it, but
+//    they can make a lot of progress before they need that information.
+// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that
+//    to finish: utf-8 checks and generating the output from the last iteration.
+//
+// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all
+// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough
+// workout.
+//
+template<size_t STEP_SIZE>
+error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept {
+  if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; }
+  // We guard the rest of the code so that we can assume that len > 0 throughout.
+  if (len == 0) { return EMPTY; }
+  if (is_streaming(partial)) {
+    len = trim_partial_utf8(buf, len);
+    // If you end up with an empty window after trimming
+    // the partial UTF-8 bytes, then chances are good that you
+    // have an UTF-8 formatting error.
+    if(len == 0) { return UTF8_ERROR; }
+  }
+  buf_block_reader<STEP_SIZE> reader(buf, len);
+  json_structural_indexer indexer(parser.structural_indexes.get());
+
+  // Read all but the last block
+  while (reader.has_full_block()) {
+    indexer.step<STEP_SIZE>(reader.full_block(), reader);
+  }
+  // Take care of the last block (will always be there unless file is empty which is
+  // not supposed to happen.)
+  uint8_t block[STEP_SIZE];
+  if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; }
+  indexer.step<STEP_SIZE>(block, reader);
+  return indexer.finish(parser, reader.block_index(), len, partial);
+}
+
+template<>
+simdjson_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept {
+  simd::simd8x64<uint8_t> in_1(block);
+  simd::simd8x64<uint8_t> in_2(block+64);
+  json_block block_1 = scanner.next(in_1);
+  json_block block_2 = scanner.next(in_2);
+  this->next(in_1, block_1, reader.block_index());
+  this->next(in_2, block_2, reader.block_index()+64);
+  reader.advance();
+}
+
+template<>
+simdjson_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept {
+  simd::simd8x64<uint8_t> in_1(block);
+  json_block block_1 = scanner.next(in_1);
+  this->next(in_1, block_1, reader.block_index());
+  reader.advance();
+}
+
+simdjson_inline void json_structural_indexer::next(const simd::simd8x64<uint8_t>& in, const json_block& block, size_t idx) {
+  uint64_t unescaped = in.lteq(0x1F);
+  checker.check_next_input(in);
+  indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser
+  prev_structurals = block.structural_start();
+  unescaped_chars_error |= block.non_quote_inside_string(unescaped);
+}
+
+simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) {
+  // Write out the final iteration's structurals
+  indexer.write(uint32_t(idx-64), prev_structurals);
+  error_code error = scanner.finish();
+  // We deliberately break down the next expression so that it is
+  // human readable.
+  const bool should_we_exit = is_streaming(partial) ?
+    ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING
+    : (error != SUCCESS); // if partial is false, we must have SUCCESS
+  const bool have_unclosed_string = (error == UNCLOSED_STRING);
+  if (simdjson_unlikely(should_we_exit)) { return error; }
+
+  if (unescaped_chars_error) {
+    return UNESCAPED_CHARS;
+  }
+  parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get());
+  /***
+   * The On Demand API requires special padding.
+   *
+   * This is related to https://github.com/simdjson/simdjson/issues/906
+   * Basically, we want to make sure that if the parsing continues beyond the last (valid)
+   * structural character, it quickly stops.
+   * Only three structural characters can be repeated without triggering an error in JSON:  [,] and }.
+   * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing
+   * continues, then it must be [,] or }.
+   * Suppose it is ] or }. We backtrack to the first character, what could it be that would
+   * not trigger an error? It could be ] or } but no, because you can't start a document that way.
+   * It can't be a comma, a colon or any simple value. So the only way we could continue is
+   * if the repeated character is [. But if so, the document must start with [. But if the document
+   * starts with [, it should end with ]. If we enforce that rule, then we would get
+   * ][[ which is invalid.
+   *
+   * This is illustrated with the test array_iterate_unclosed_error() on the following input:
+   * R"({ "a": [,,)"
+   **/
+  parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final
+  parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len);
+  parser.structural_indexes[parser.n_structural_indexes + 2] = 0;
+  parser.next_structural_index = 0;
+  // a valid JSON file cannot have zero structural indexes - we should have found something
+  if (simdjson_unlikely(parser.n_structural_indexes == 0u)) {
+    return EMPTY;
+  }
+  if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) {
+    return UNEXPECTED_ERROR;
+  }
+  if (partial == stage1_mode::streaming_partial) {
+    // If we have an unclosed string, then the last structural
+    // will be the quote and we want to make sure to omit it.
+    if(have_unclosed_string) {
+      parser.n_structural_indexes--;
+      // a valid JSON file cannot have zero structural indexes - we should have found something
+      if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; }
+    }
+    // We truncate the input to the end of the last complete document (or zero).
+    auto new_structural_indexes = find_next_document_index(parser);
+    if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) {
+      if(parser.structural_indexes[0] == 0) {
+        // If the buffer is partial and we started at index 0 but the document is
+        // incomplete, it's too big to parse.
+        return CAPACITY;
+      } else {
+        // It is possible that the document could be parsed, we just had a lot
+        // of white space.
+        parser.n_structural_indexes = 0;
+        return EMPTY;
+      }
+    }
+
+    parser.n_structural_indexes = new_structural_indexes;
+  } else if (partial == stage1_mode::streaming_final) {
+    if(have_unclosed_string) { parser.n_structural_indexes--; }
+    // We truncate the input to the end of the last complete document (or zero).
+    // Because partial == stage1_mode::streaming_final, it means that we may
+    // silently ignore trailing garbage. Though it sounds bad, we do it
+    // deliberately because many people who have streams of JSON documents
+    // will truncate them for processing. E.g., imagine that you are uncompressing
+    // the data from a size file or receiving it in chunks from the network. You
+    // may not know where exactly the last document will be. Meanwhile the
+    // document_stream instances allow people to know the JSON documents they are
+    // parsing (see the iterator.source() method).
+    parser.n_structural_indexes = find_next_document_index(parser);
+    // We store the initial n_structural_indexes so that the client can see
+    // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes,
+    // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len,
+    // otherwise, it will copy some prior index.
+    parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes];
+    // This next line is critical, do not change it unless you understand what you are
+    // doing.
+    parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len);
+    if (simdjson_unlikely(parser.n_structural_indexes == 0u)) {
+        // We tolerate an unclosed string at the very end of the stream. Indeed, users
+        // often load their data in bulk without being careful and they want us to ignore
+        // the trailing garbage.
+        return EMPTY;
+    }
+  }
+  checker.check_eof();
+  return checker.errors();
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdjson
+/* end file src/generic/stage1/json_structural_indexer.h */
+/* begin file src/generic/stage1/utf8_validator.h */
+namespace simdjson {
+namespace haswell {
+namespace {
+namespace stage1 {
+
+/**
+ * Validates that the string is actual UTF-8.
+ */
+template<class checker>
+bool generic_validate_utf8(const uint8_t * input, size_t length) {
+    checker c{};
+    buf_block_reader<64> reader(input, length);
+    while (reader.has_full_block()) {
+      simd::simd8x64<uint8_t> in(reader.full_block());
+      c.check_next_input(in);
+      reader.advance();
+    }
+    uint8_t block[64]{};
+    reader.get_remainder(block);
+    simd::simd8x64<uint8_t> in(block);
+    c.check_next_input(in);
+    reader.advance();
+    c.check_eof();
+    return c.errors() == error_code::SUCCESS;
+}
+
+bool generic_validate_utf8(const char * input, size_t length) {
+    return generic_validate_utf8<utf8_checker>(reinterpret_cast<const uint8_t *>(input),length);
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdjson
+/* end file src/generic/stage1/utf8_validator.h */
+
+//
+// Stage 2
+//
+/* begin file src/generic/stage2/stringparsing.h */
+// This file contains the common code every implementation uses
+// It is intended to be included multiple times and compiled multiple times
+
+namespace simdjson {
+namespace haswell {
+namespace {
+/// @private
+namespace stringparsing {
+
+// begin copypasta
+// These chars yield themselves: " \ /
+// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab
+// u not handled in this table as it's complex
+static const uint8_t escape_map[256] = {
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0, // 0x0.
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0x22, 0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0x2f,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0, // 0x4.
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0x5c, 0, 0,    0, // 0x5.
+    0, 0, 0x08, 0, 0,    0, 0x0c, 0, 0, 0, 0, 0, 0,    0, 0x0a, 0, // 0x6.
+    0, 0, 0x0d, 0, 0x09, 0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0, // 0x7.
+
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+};
+
+// handle a unicode codepoint
+// write appropriate values into dest
+// src will advance 6 bytes or 12 bytes
+// dest will advance a variable amount (return via pointer)
+// return true if the unicode codepoint was valid
+// We work in little-endian then swap at write time
+simdjson_warn_unused
+simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr,
+                                            uint8_t **dst_ptr) {
+  // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the
+  // conversion isn't valid; we defer the check for this to inside the
+  // multilingual plane check
+  uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2);
+  *src_ptr += 6;
+
+  // If we found a high surrogate, we must
+  // check for low surrogate for characters
+  // outside the Basic
+  // Multilingual Plane.
+  if (code_point >= 0xd800 && code_point < 0xdc00) {
+    const uint8_t *src_data = *src_ptr;
+    /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */
+    if (((src_data[0] << 8) | src_data[1]) != ((static_cast<uint8_t> ('\\') << 8) | static_cast<uint8_t> ('u'))) {
+      return false;
+    }
+    uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2);
+
+    // We have already checked that the high surrogate is valid and
+    // (code_point - 0xd800) < 1024.
+    //
+    // Check that code_point_2 is in the range 0xdc00..0xdfff
+    // and that code_point_2 was parsed from valid hex.
+    uint32_t low_bit = code_point_2 - 0xdc00;
+    if (low_bit >> 10) {
+      return false;
+    }
+
+    code_point =
+        (((code_point - 0xd800) << 10) | low_bit) + 0x10000;
+    *src_ptr += 6;
+  } else if (code_point >= 0xdc00 && code_point <= 0xdfff) {
+      // If we encounter a low surrogate (not preceded by a high surrogate)
+      // then we have an error.
+      return false;
+  }
+  size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr);
+  *dst_ptr += offset;
+  return offset > 0;
+}
+
+/**
+ * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There
+ * must be an unescaped quote terminating the string. It returns the final output
+ * position as pointer. In case of error (e.g., the string has bad escaped codes),
+ * then null_nullptrptr is returned. It is assumed that the output buffer is large
+ * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes +
+ * SIMDJSON_PADDING bytes.
+ */
+simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) {
+  while (1) {
+    // Copy the next n bytes, and find the backslash and quote in them.
+    auto bs_quote = backslash_and_quote::copy_and_find(src, dst);
+    // If the next thing is the end quote, copy and return
+    if (bs_quote.has_quote_first()) {
+      // we encountered quotes first. Move dst to point to quotes and exit
+      return dst + bs_quote.quote_index();
+    }
+    if (bs_quote.has_backslash()) {
+      /* find out where the backspace is */
+      auto bs_dist = bs_quote.backslash_index();
+      uint8_t escape_char = src[bs_dist + 1];
+      /* we encountered backslash first. Handle backslash */
+      if (escape_char == 'u') {
+        /* move src/dst up to the start; they will be further adjusted
+           within the unicode codepoint handling code. */
+        src += bs_dist;
+        dst += bs_dist;
+        if (!handle_unicode_codepoint(&src, &dst)) {
+          return nullptr;
+        }
+      } else {
+        /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and
+         * write bs_dist+1 characters to output
+         * note this may reach beyond the part of the buffer we've actually
+         * seen. I think this is ok */
+        uint8_t escape_result = escape_map[escape_char];
+        if (escape_result == 0u) {
+          return nullptr; /* bogus escape value is an error */
+        }
+        dst[bs_dist] = escape_result;
+        src += bs_dist + 2;
+        dst += bs_dist + 1;
+      }
+    } else {
+      /* they are the same. Since they can't co-occur, it means we
+       * encountered neither. */
+      src += backslash_and_quote::BYTES_PROCESSED;
+      dst += backslash_and_quote::BYTES_PROCESSED;
+    }
+  }
+  /* can't be reached */
+  return nullptr;
+}
+
+} // namespace stringparsing
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdjson
+/* end file src/generic/stage2/stringparsing.h */
+/* begin file src/generic/stage2/tape_builder.h */
+/* begin file src/generic/stage2/json_iterator.h */
+/* begin file src/generic/stage2/logger.h */
+// This is for an internal-only stage 2 specific logger.
+// Set LOG_ENABLED = true to log what stage 2 is doing!
+namespace simdjson {
+namespace haswell {
+namespace {
+namespace logger {
+
+  static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------";
+
+#if SIMDJSON_VERBOSE_LOGGING
+  static constexpr const bool LOG_ENABLED = true;
+#else
+  static constexpr const bool LOG_ENABLED = false;
+#endif
+  static constexpr const int LOG_EVENT_LEN = 20;
+  static constexpr const int LOG_BUFFER_LEN = 30;
+  static constexpr const int LOG_SMALL_BUFFER_LEN = 10;
+  static constexpr const int LOG_INDEX_LEN = 5;
+
+  static int log_depth; // Not threadsafe. Log only.
+
+  // Helper to turn unprintable or newline characters into spaces
+  static simdjson_inline char printable_char(char c) {
+    if (c >= 0x20) {
+      return c;
+    } else {
+      return ' ';
+    }
+  }
+
+  // Print the header and set up log_start
+  static simdjson_inline void log_start() {
+    if (LOG_ENABLED) {
+      log_depth = 0;
+      printf("\n");
+      printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#");
+      printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES);
+    }
+  }
+
+  simdjson_unused static simdjson_inline void log_string(const char *message) {
+    if (LOG_ENABLED) {
+      printf("%s\n", message);
+    }
+  }
+
+  // Logs a single line from the stage 2 DOM parser
+  template<typename S>
+  static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) {
+    if (LOG_ENABLED) {
+      printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title);
+      auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1;
+      auto next_index = structurals.next_structural;
+      auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast<const uint8_t*>("                                                       ");
+      auto next = &structurals.buf[*next_index];
+      {
+        // Print the next N characters in the buffer.
+        printf("| ");
+        // Otherwise, print the characters starting from the buffer position.
+        // Print spaces for unprintable or newline characters.
+        for (int i=0;i<LOG_BUFFER_LEN;i++) {
+          printf("%c", printable_char(current[i]));
+        }
+        printf(" ");
+        // Print the next N characters in the buffer.
+        printf("| ");
+        // Otherwise, print the characters starting from the buffer position.
+        // Print spaces for unprintable or newline characters.
+        for (int i=0;i<LOG_SMALL_BUFFER_LEN;i++) {
+          printf("%c", printable_char(next[i]));
+        }
+        printf(" ");
+      }
+      if (current_index) {
+        printf("| %*u ", LOG_INDEX_LEN, *current_index);
+      } else {
+        printf("| %-*s ", LOG_INDEX_LEN, "");
+      }
+      // printf("| %*u ", LOG_INDEX_LEN, structurals.next_tape_index());
+      printf("| %-s ", detail);
+      printf("|\n");
+    }
+  }
+
+} // namespace logger
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdjson
+/* end file src/generic/stage2/logger.h */
+
+namespace simdjson {
+namespace haswell {
+namespace {
+namespace stage2 {
+
+class json_iterator {
+public:
+  const uint8_t* const buf;
+  uint32_t *next_structural;
+  dom_parser_implementation &dom_parser;
+  uint32_t depth{0};
+
+  /**
+   * Walk the JSON document.
+   *
+   * The visitor receives callbacks when values are encountered. All callbacks pass the iterator as
+   * the first parameter; some callbacks have other parameters as well:
+   *
+   * - visit_document_start() - at the beginning.
+   * - visit_document_end() - at the end (if things were successful).
+   *
+   * - visit_array_start() - at the start `[` of a non-empty array.
+   * - visit_array_end() - at the end `]` of a non-empty array.
+   * - visit_empty_array() - when an empty array is encountered.
+   *
+   * - visit_object_end() - at the start `]` of a non-empty object.
+   * - visit_object_start() - at the end `]` of a non-empty object.
+   * - visit_empty_object() - when an empty object is encountered.
+   * - visit_key(const uint8_t *key) - when a key in an object field is encountered. key is
+   *                                   guaranteed to point at the first quote of the string (`"key"`).
+   * - visit_primitive(const uint8_t *value) - when a value is a string, number, boolean or null.
+   * - visit_root_primitive(iter, uint8_t *value) - when the top-level value is a string, number, boolean or null.
+   *
+   * - increment_count(iter) - each time a value is found in an array or object.
+   */
+  template<bool STREAMING, typename V>
+  simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept;
+
+  /**
+   * Create an iterator capable of walking a JSON document.
+   *
+   * The document must have already passed through stage 1.
+   */
+  simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index);
+
+  /**
+   * Look at the next token.
+   *
+   * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)).
+   *
+   * They may include invalid JSON as well (such as `1.2.3` or `ture`).
+   */
+  simdjson_inline const uint8_t *peek() const noexcept;
+  /**
+   * Advance to the next token.
+   *
+   * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)).
+   *
+   * They may include invalid JSON as well (such as `1.2.3` or `ture`).
+   */
+  simdjson_inline const uint8_t *advance() noexcept;
+  /**
+   * Get the remaining length of the document, from the start of the current token.
+   */
+  simdjson_inline size_t remaining_len() const noexcept;
+  /**
+   * Check if we are at the end of the document.
+   *
+   * If this is true, there are no more tokens.
+   */
+  simdjson_inline bool at_eof() const noexcept;
+  /**
+   * Check if we are at the beginning of the document.
+   */
+  simdjson_inline bool at_beginning() const noexcept;
+  simdjson_inline uint8_t last_structural() const noexcept;
+
+  /**
+   * Log that a value has been found.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_value(const char *type) const noexcept;
+  /**
+   * Log the start of a multipart value.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_start_value(const char *type) const noexcept;
+  /**
+   * Log the end of a multipart value.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_end_value(const char *type) const noexcept;
+  /**
+   * Log an error.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_error(const char *error) const noexcept;
+
+  template<typename V>
+  simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept;
+  template<typename V>
+  simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept;
+};
+
+template<bool STREAMING, typename V>
+simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept {
+  logger::log_start();
+
+  //
+  // Start the document
+  //
+  if (at_eof()) { return EMPTY; }
+  log_start_value("document");
+  SIMDJSON_TRY( visitor.visit_document_start(*this) );
+
+  //
+  // Read first value
+  //
+  {
+    auto value = advance();
+
+    // Make sure the outer object or array is closed before continuing; otherwise, there are ways we
+    // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906
+    if (!STREAMING) {
+      switch (*value) {
+        case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break;
+        case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break;
+      }
+    }
+
+    switch (*value) {
+      case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin;
+      case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin;
+      default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break;
+    }
+  }
+  goto document_end;
+
+//
+// Object parser states
+//
+object_begin:
+  log_start_value("object");
+  depth++;
+  if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; }
+  dom_parser.is_array[depth] = false;
+  SIMDJSON_TRY( visitor.visit_object_start(*this) );
+
+  {
+    auto key = advance();
+    if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; }
+    SIMDJSON_TRY( visitor.increment_count(*this) );
+    SIMDJSON_TRY( visitor.visit_key(*this, key) );
+  }
+
+object_field:
+  if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; }
+  {
+    auto value = advance();
+    switch (*value) {
+      case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin;
+      case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin;
+      default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break;
+    }
+  }
+
+object_continue:
+  switch (*advance()) {
+    case ',':
+      SIMDJSON_TRY( visitor.increment_count(*this) );
+      {
+        auto key = advance();
+        if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; }
+        SIMDJSON_TRY( visitor.visit_key(*this, key) );
+      }
+      goto object_field;
+    case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end;
+    default: log_error("No comma between object fields"); return TAPE_ERROR;
+  }
+
+scope_end:
+  depth--;
+  if (depth == 0) { goto document_end; }
+  if (dom_parser.is_array[depth]) { goto array_continue; }
+  goto object_continue;
+
+//
+// Array parser states
+//
+array_begin:
+  log_start_value("array");
+  depth++;
+  if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; }
+  dom_parser.is_array[depth] = true;
+  SIMDJSON_TRY( visitor.visit_array_start(*this) );
+  SIMDJSON_TRY( visitor.increment_count(*this) );
+
+array_value:
+  {
+    auto value = advance();
+    switch (*value) {
+      case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin;
+      case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin;
+      default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break;
+    }
+  }
+
+array_continue:
+  switch (*advance()) {
+    case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value;
+    case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end;
+    default: log_error("Missing comma between array values"); return TAPE_ERROR;
+  }
+
+document_end:
+  log_end_value("document");
+  SIMDJSON_TRY( visitor.visit_document_end(*this) );
+
+  dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]);
+
+  // If we didn't make it to the end, it's an error
+  if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) {
+    log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!");
+    return TAPE_ERROR;
+  }
+
+  return SUCCESS;
+
+} // walk_document()
+
+simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index)
+  : buf{_dom_parser.buf},
+    next_structural{&_dom_parser.structural_indexes[start_structural_index]},
+    dom_parser{_dom_parser} {
+}
+
+simdjson_inline const uint8_t *json_iterator::peek() const noexcept {
+  return &buf[*(next_structural)];
+}
+simdjson_inline const uint8_t *json_iterator::advance() noexcept {
+  return &buf[*(next_structural++)];
+}
+simdjson_inline size_t json_iterator::remaining_len() const noexcept {
+  return dom_parser.len - *(next_structural-1);
+}
+
+simdjson_inline bool json_iterator::at_eof() const noexcept {
+  return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes];
+}
+simdjson_inline bool json_iterator::at_beginning() const noexcept {
+  return next_structural == dom_parser.structural_indexes.get();
+}
+simdjson_inline uint8_t json_iterator::last_structural() const noexcept {
+  return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]];
+}
+
+simdjson_inline void json_iterator::log_value(const char *type) const noexcept {
+  logger::log_line(*this, "", type, "");
+}
+
+simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept {
+  logger::log_line(*this, "+", type, "");
+  if (logger::LOG_ENABLED) { logger::log_depth++; }
+}
+
+simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept {
+  if (logger::LOG_ENABLED) { logger::log_depth--; }
+  logger::log_line(*this, "-", type, "");
+}
+
+simdjson_inline void json_iterator::log_error(const char *error) const noexcept {
+  logger::log_line(*this, "", "ERROR", error);
+}
+
+template<typename V>
+simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept {
+  switch (*value) {
+    case '"': return visitor.visit_root_string(*this, value);
+    case 't': return visitor.visit_root_true_atom(*this, value);
+    case 'f': return visitor.visit_root_false_atom(*this, value);
+    case 'n': return visitor.visit_root_null_atom(*this, value);
+    case '-':
+    case '0': case '1': case '2': case '3': case '4':
+    case '5': case '6': case '7': case '8': case '9':
+      return visitor.visit_root_number(*this, value);
+    default:
+      log_error("Document starts with a non-value character");
+      return TAPE_ERROR;
+  }
+}
+template<typename V>
+simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept {
+  switch (*value) {
+    case '"': return visitor.visit_string(*this, value);
+    case 't': return visitor.visit_true_atom(*this, value);
+    case 'f': return visitor.visit_false_atom(*this, value);
+    case 'n': return visitor.visit_null_atom(*this, value);
+    case '-':
+    case '0': case '1': case '2': case '3': case '4':
+    case '5': case '6': case '7': case '8': case '9':
+      return visitor.visit_number(*this, value);
+    default:
+      log_error("Non-value found when value was expected!");
+      return TAPE_ERROR;
+  }
+}
+
+} // namespace stage2
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdjson
+/* end file src/generic/stage2/json_iterator.h */
+/* begin file src/generic/stage2/tape_writer.h */
+namespace simdjson {
+namespace haswell {
+namespace {
+namespace stage2 {
+
+struct tape_writer {
+  /** The next place to write to tape */
+  uint64_t *next_tape_loc;
+
+  /** Write a signed 64-bit value to tape. */
+  simdjson_inline void append_s64(int64_t value) noexcept;
+
+  /** Write an unsigned 64-bit value to tape. */
+  simdjson_inline void append_u64(uint64_t value) noexcept;
+
+  /** Write a double value to tape. */
+  simdjson_inline void append_double(double value) noexcept;
+
+  /**
+   * Append a tape entry (an 8-bit type,and 56 bits worth of value).
+   */
+  simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept;
+
+  /**
+   * Skip the current tape entry without writing.
+   *
+   * Used to skip the start of the container, since we'll come back later to fill it in when the
+   * container ends.
+   */
+  simdjson_inline void skip() noexcept;
+
+  /**
+   * Skip the number of tape entries necessary to write a large u64 or i64.
+   */
+  simdjson_inline void skip_large_integer() noexcept;
+
+  /**
+   * Skip the number of tape entries necessary to write a double.
+   */
+  simdjson_inline void skip_double() noexcept;
+
+  /**
+   * Write a value to a known location on tape.
+   *
+   * Used to go back and write out the start of a container after the container ends.
+   */
+  simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept;
+
+private:
+  /**
+   * Append both the tape entry, and a supplementary value following it. Used for types that need
+   * all 64 bits, such as double and uint64_t.
+   */
+  template<typename T>
+  simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept;
+}; // struct number_writer
+
+simdjson_inline void tape_writer::append_s64(int64_t value) noexcept {
+  append2(0, value, internal::tape_type::INT64);
+}
+
+simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept {
+  append(0, internal::tape_type::UINT64);
+  *next_tape_loc = value;
+  next_tape_loc++;
+}
+
+/** Write a double value to tape. */
+simdjson_inline void tape_writer::append_double(double value) noexcept {
+  append2(0, value, internal::tape_type::DOUBLE);
+}
+
+simdjson_inline void tape_writer::skip() noexcept {
+  next_tape_loc++;
+}
+
+simdjson_inline void tape_writer::skip_large_integer() noexcept {
+  next_tape_loc += 2;
+}
+
+simdjson_inline void tape_writer::skip_double() noexcept {
+  next_tape_loc += 2;
+}
+
+simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept {
+  *next_tape_loc = val | ((uint64_t(char(t))) << 56);
+  next_tape_loc++;
+}
+
+template<typename T>
+simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept {
+  append(val, t);
+  static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!");
+  memcpy(next_tape_loc, &val2, sizeof(val2));
+  next_tape_loc++;
+}
+
+simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept {
+  tape_loc = val | ((uint64_t(char(t))) << 56);
+}
+
+} // namespace stage2
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdjson
+/* end file src/generic/stage2/tape_writer.h */
+
+namespace simdjson {
+namespace haswell {
+namespace {
+namespace stage2 {
+
+struct tape_builder {
+  template<bool STREAMING>
+  simdjson_warn_unused static simdjson_inline error_code parse_document(
+    dom_parser_implementation &dom_parser,
+    dom::document &doc) noexcept;
+
+  /** Called when a non-empty document starts. */
+  simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept;
+  /** Called when a non-empty document ends without error. */
+  simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept;
+
+  /** Called when a non-empty array starts. */
+  simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept;
+  /** Called when a non-empty array ends. */
+  simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept;
+  /** Called when an empty array is found. */
+  simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept;
+
+  /** Called when a non-empty object starts. */
+  simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept;
+  /**
+   * Called when a key in a field is encountered.
+   *
+   * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array
+   * will be called after this with the field value.
+   */
+  simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept;
+  /** Called when a non-empty object ends. */
+  simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept;
+  /** Called when an empty object is found. */
+  simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept;
+
+  /**
+   * Called when a string, number, boolean or null is found.
+   */
+  simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept;
+  /**
+   * Called when a string, number, boolean or null is found at the top level of a document (i.e.
+   * when there is no array or object and the entire document is a single string, number, boolean or
+   * null.
+   *
+   * This is separate from primitive() because simdjson's normal primitive parsing routines assume
+   * there is at least one more token after the value, which is only true in an array or object.
+   */
+  simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept;
+
+  simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept;
+
+  simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept;
+
+  /** Called each time a new field or element in an array or object is found. */
+  simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept;
+
+  /** Next location to write to tape */
+  tape_writer tape;
+private:
+  /** Next write location in the string buf for stage 2 parsing */
+  uint8_t *current_string_buf_loc;
+
+  simdjson_inline tape_builder(dom::document &doc) noexcept;
+
+  simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept;
+  simdjson_inline void start_container(json_iterator &iter) noexcept;
+  simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept;
+  simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept;
+  simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept;
+  simdjson_inline void on_end_string(uint8_t *dst) noexcept;
+}; // class tape_builder
+
+template<bool STREAMING>
+simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document(
+    dom_parser_implementation &dom_parser,
+    dom::document &doc) noexcept {
+  dom_parser.doc = &doc;
+  json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0);
+  tape_builder builder(doc);
+  return iter.walk_document<STREAMING>(builder);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept {
+  return iter.visit_root_primitive(*this, value);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept {
+  return iter.visit_primitive(*this, value);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept {
+  return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept {
+  return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept {
+  start_container(iter);
+  return SUCCESS;
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept {
+  start_container(iter);
+  return SUCCESS;
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept {
+  start_container(iter);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept {
+  return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept {
+  return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept {
+  constexpr uint32_t start_tape_index = 0;
+  tape.append(start_tape_index, internal::tape_type::ROOT);
+  tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT);
+  return SUCCESS;
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept {
+  return visit_string(iter, key, true);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept {
+  iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1
+  return SUCCESS;
+}
+
+simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept {
+  iter.log_value(key ? "key" : "string");
+  uint8_t *dst = on_start_string(iter);
+  dst = stringparsing::parse_string(value+1, dst);
+  if (dst == nullptr) {
+    iter.log_error("Invalid escape in string");
+    return STRING_ERROR;
+  }
+  on_end_string(dst);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept {
+  return visit_string(iter, value);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("number");
+  return numberparsing::parse_number(value, tape);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept {
+  //
+  // We need to make a copy to make sure that the string is space terminated.
+  // This is not about padding the input, which should already padded up
+  // to len + SIMDJSON_PADDING. However, we have no control at this stage
+  // on how the padding was done. What if the input string was padded with nulls?
+  // It is quite common for an input string to have an extra null character (C string).
+  // We do not want to allow 9\0 (where \0 is the null character) inside a JSON
+  // document, but the string "9\0" by itself is fine. So we make a copy and
+  // pad the input with spaces when we know that there is just one input element.
+  // This copy is relatively expensive, but it will almost never be called in
+  // practice unless you are in the strange scenario where you have many JSON
+  // documents made of single atoms.
+  //
+  std::unique_ptr<uint8_t[]>copy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]);
+  if (copy.get() == nullptr) { return MEMALLOC; }
+  std::memcpy(copy.get(), value, iter.remaining_len());
+  std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING);
+  error_code error = visit_number(iter, copy.get());
+  return error;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("true");
+  if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::TRUE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("true");
+  if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::TRUE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("false");
+  if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::FALSE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("false");
+  if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::FALSE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("null");
+  if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::NULL_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("null");
+  if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::NULL_VALUE);
+  return SUCCESS;
+}
+
+// private:
+
+simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept {
+  return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get());
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept {
+  auto start_index = next_tape_index(iter);
+  tape.append(start_index+2, start);
+  tape.append(start_index, end);
+  return SUCCESS;
+}
+
+simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept {
+  iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter);
+  iter.dom_parser.open_containers[iter.depth].count = 0;
+  tape.skip(); // We don't actually *write* the start element until the end.
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept {
+  // Write the ending tape element, pointing at the start location
+  const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index;
+  tape.append(start_tape_index, end);
+  // Write the start tape element, pointing at the end location (and including count)
+  // count can overflow if it exceeds 24 bits... so we saturate
+  // the convention being that a cnt of 0xffffff or more is undetermined in value (>=  0xffffff).
+  const uint32_t count = iter.dom_parser.open_containers[iter.depth].count;
+  const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count;
+  tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start);
+  return SUCCESS;
+}
+
+simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept {
+  // we advance the point, accounting for the fact that we have a NULL termination
+  tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING);
+  return current_string_buf_loc + sizeof(uint32_t);
+}
+
+simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept {
+  uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t)));
+  // TODO check for overflow in case someone has a crazy string (>=4GB?)
+  // But only add the overflow check when the document itself exceeds 4GB
+  // Currently unneeded because we refuse to parse docs larger or equal to 4GB.
+  memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t));
+  // NULL termination is still handy if you expect all your strings to
+  // be NULL terminated? It comes at a small cost
+  *dst = 0;
+  current_string_buf_loc = dst + 1;
+}
+
+} // namespace stage2
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdjson
+/* end file src/generic/stage2/tape_builder.h */
+
+//
+// Implementation-specific overrides
+//
+namespace simdjson {
+namespace haswell {
+namespace {
+namespace stage1 {
+
+simdjson_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) {
+  if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; }
+  return find_escaped_branchless(backslash);
+}
+
+} // namespace stage1
+} // unnamed namespace
+
+simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept {
+  return haswell::stage1::json_minifier::minify<128>(buf, len, dst, dst_len);
+}
+
+simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept {
+  this->buf = _buf;
+  this->len = _len;
+  return haswell::stage1::json_structural_indexer::index<128>(_buf, _len, *this, streaming);
+}
+
+simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
+  return haswell::stage1::generic_validate_utf8(buf,len);
+}
+
+simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept {
+  return stage2::tape_builder::parse_document<false>(*this, _doc);
+}
+
+simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept {
+  return stage2::tape_builder::parse_document<true>(*this, _doc);
+}
+
+simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst) const noexcept {
+  return haswell::stringparsing::parse_string(src, dst);
+}
+
+simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept {
+  auto error = stage1(_buf, _len, stage1_mode::regular);
+  if (error) { return error; }
+  return stage2(_doc);
+}
+
+} // namespace haswell
+} // namespace simdjson
+
+/* begin file include/simdjson/haswell/end.h */
+SIMDJSON_UNTARGET_HASWELL
+/* end file include/simdjson/haswell/end.h */
+/* end file src/haswell/dom_parser_implementation.cpp */
+#endif
+#if SIMDJSON_IMPLEMENTATION_PPC64
+/* begin file src/ppc64/implementation.cpp */
+/* begin file include/simdjson/ppc64/begin.h */
+// redefining SIMDJSON_IMPLEMENTATION to "ppc64"
+// #define SIMDJSON_IMPLEMENTATION ppc64
+/* end file include/simdjson/ppc64/begin.h */
+
+namespace simdjson {
+namespace ppc64 {
+
+simdjson_warn_unused error_code implementation::create_dom_parser_implementation(
+  size_t capacity,
+  size_t max_depth,
+  std::unique_ptr<internal::dom_parser_implementation>& dst
+) const noexcept {
+  dst.reset( new (std::nothrow) dom_parser_implementation() );
+  if (!dst) { return MEMALLOC; }
+  if (auto err = dst->set_capacity(capacity))
+    return err;
+  if (auto err = dst->set_max_depth(max_depth))
+    return err;
+  return SUCCESS;
+}
+
+} // namespace ppc64
+} // namespace simdjson
+
+/* begin file include/simdjson/ppc64/end.h */
+/* end file include/simdjson/ppc64/end.h */
+/* end file src/ppc64/implementation.cpp */
+/* begin file src/ppc64/dom_parser_implementation.cpp */
+/* begin file include/simdjson/ppc64/begin.h */
+// redefining SIMDJSON_IMPLEMENTATION to "ppc64"
+// #define SIMDJSON_IMPLEMENTATION ppc64
+/* end file include/simdjson/ppc64/begin.h */
+
+//
+// Stage 1
+//
+namespace simdjson {
+namespace ppc64 {
+namespace {
+
+using namespace simd;
+
+struct json_character_block {
+  static simdjson_inline json_character_block classify(const simd::simd8x64<uint8_t>& in);
+
+  simdjson_inline uint64_t whitespace() const noexcept { return _whitespace; }
+  simdjson_inline uint64_t op() const noexcept { return _op; }
+  simdjson_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); }
+
+  uint64_t _whitespace;
+  uint64_t _op;
+};
+
+simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64<uint8_t>& in) {
+  const simd8<uint8_t> table1(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0);
+  const simd8<uint8_t> table2(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0);
+
+  simd8x64<uint8_t> v(
+     (in.chunks[0] & 0xf).lookup_16(table1) & (in.chunks[0].shr<4>()).lookup_16(table2),
+     (in.chunks[1] & 0xf).lookup_16(table1) & (in.chunks[1].shr<4>()).lookup_16(table2),
+     (in.chunks[2] & 0xf).lookup_16(table1) & (in.chunks[2].shr<4>()).lookup_16(table2),
+     (in.chunks[3] & 0xf).lookup_16(table1) & (in.chunks[3].shr<4>()).lookup_16(table2)
+  );
+
+  uint64_t op = simd8x64<bool>(
+        v.chunks[0].any_bits_set(0x7),
+        v.chunks[1].any_bits_set(0x7),
+        v.chunks[2].any_bits_set(0x7),
+        v.chunks[3].any_bits_set(0x7)
+  ).to_bitmask();
+
+  uint64_t whitespace = simd8x64<bool>(
+        v.chunks[0].any_bits_set(0x18),
+        v.chunks[1].any_bits_set(0x18),
+        v.chunks[2].any_bits_set(0x18),
+        v.chunks[3].any_bits_set(0x18)
+  ).to_bitmask();
+
+  return { whitespace, op };
+}
+
+simdjson_inline bool is_ascii(const simd8x64<uint8_t>& input) {
+  // careful: 0x80 is not ascii.
+  return input.reduce_or().saturating_sub(0x7fu).bits_not_set_anywhere();
+}
+
+simdjson_unused simdjson_inline simd8<bool> must_be_continuation(const simd8<uint8_t> prev1, const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
+  simd8<uint8_t> is_second_byte = prev1.saturating_sub(0xc0u-1); // Only 11______ will be > 0
+  simd8<uint8_t> is_third_byte  = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0
+  simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0
+  // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine.
+  return simd8<int8_t>(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0);
+}
+
+simdjson_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
+  simd8<uint8_t> is_third_byte  = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0
+  simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0
+  // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine.
+  return simd8<int8_t>(is_third_byte | is_fourth_byte) > int8_t(0);
+}
+
+} // unnamed namespace
+} // namespace ppc64
+} // namespace simdjson
+
+/* begin file src/generic/stage1/utf8_lookup4_algorithm.h */
+namespace simdjson {
+namespace ppc64 {
+namespace {
+namespace utf8_validation {
+
+using namespace simd;
+
+  simdjson_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
+// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII)
+// Bit 1 = Too Long (ASCII followed by continuation)
+// Bit 2 = Overlong 3-byte
+// Bit 4 = Surrogate
+// Bit 5 = Overlong 2-byte
+// Bit 7 = Two Continuations
+    constexpr const uint8_t TOO_SHORT   = 1<<0; // 11______ 0_______
+                                                // 11______ 11______
+    constexpr const uint8_t TOO_LONG    = 1<<1; // 0_______ 10______
+    constexpr const uint8_t OVERLONG_3  = 1<<2; // 11100000 100_____
+    constexpr const uint8_t SURROGATE   = 1<<4; // 11101101 101_____
+    constexpr const uint8_t OVERLONG_2  = 1<<5; // 1100000_ 10______
+    constexpr const uint8_t TWO_CONTS   = 1<<7; // 10______ 10______
+    constexpr const uint8_t TOO_LARGE   = 1<<3; // 11110100 1001____
+                                                // 11110100 101_____
+                                                // 11110101 1001____
+                                                // 11110101 101_____
+                                                // 1111011_ 1001____
+                                                // 1111011_ 101_____
+                                                // 11111___ 1001____
+                                                // 11111___ 101_____
+    constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
+                                                // 11110101 1000____
+                                                // 1111011_ 1000____
+                                                // 11111___ 1000____
+    constexpr const uint8_t OVERLONG_4  = 1<<6; // 11110000 1000____
+
+    const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
+      // 0_______ ________ <ASCII in byte 1>
+      TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
+      TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
+      // 10______ ________ <continuation in byte 1>
+      TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS,
+      // 1100____ ________ <two byte lead in byte 1>
+      TOO_SHORT | OVERLONG_2,
+      // 1101____ ________ <two byte lead in byte 1>
+      TOO_SHORT,
+      // 1110____ ________ <three byte lead in byte 1>
+      TOO_SHORT | OVERLONG_3 | SURROGATE,
+      // 1111____ ________ <four+ byte lead in byte 1>
+      TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4
+    );
+    constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
+    const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
+      // ____0000 ________
+      CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4,
+      // ____0001 ________
+      CARRY | OVERLONG_2,
+      // ____001_ ________
+      CARRY,
+      CARRY,
+
+      // ____0100 ________
+      CARRY | TOO_LARGE,
+      // ____0101 ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      // ____011_ ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+
+      // ____1___ ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      // ____1101 ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000
+    );
+    const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
+      // ________ 0_______ <ASCII in byte 2>
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
+
+      // ________ 1000____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4,
+      // ________ 1001____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE,
+      // ________ 101_____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE  | TOO_LARGE,
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE  | TOO_LARGE,
+
+      // ________ 11______
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT
+    );
+    return (byte_1_high & byte_1_low & byte_2_high);
+  }
+  simdjson_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
+      const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
+    simd8<uint8_t> prev2 = input.prev<2>(prev_input);
+    simd8<uint8_t> prev3 = input.prev<3>(prev_input);
+    simd8<uint8_t> must23 = simd8<uint8_t>(must_be_2_3_continuation(prev2, prev3));
+    simd8<uint8_t> must23_80 = must23 & uint8_t(0x80);
+    return must23_80 ^ sc;
+  }
+
+  //
+  // Return nonzero if there are incomplete multibyte characters at the end of the block:
+  // e.g. if there is a 4-byte character, but it's 3 bytes from the end.
+  //
+  simdjson_inline simd8<uint8_t> is_incomplete(const simd8<uint8_t> input) {
+    // If the previous input's last 3 bytes match this, they're too short (they ended at EOF):
+    // ... 1111____ 111_____ 11______
+#if SIMDJSON_IMPLEMENTATION_ICELAKE
+    static const uint8_t max_array[64] = {
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1
+    };
+#else
+    static const uint8_t max_array[32] = {
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1
+    };
+#endif
+    const simd8<uint8_t> max_value(&max_array[sizeof(max_array)-sizeof(simd8<uint8_t>)]);
+    return input.gt_bits(max_value);
+  }
+
+  struct utf8_checker {
+    // If this is nonzero, there has been a UTF-8 error.
+    simd8<uint8_t> error;
+    // The last input we received
+    simd8<uint8_t> prev_input_block;
+    // Whether the last input we received was incomplete (used for ASCII fast path)
+    simd8<uint8_t> prev_incomplete;
+
+    //
+    // Check whether the current bytes are valid UTF-8.
+    //
+    simdjson_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
+      // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes
+      // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers)
+      simd8<uint8_t> prev1 = input.prev<1>(prev_input);
+      simd8<uint8_t> sc = check_special_cases(input, prev1);
+      this->error |= check_multibyte_lengths(input, prev_input, sc);
+    }
+
+    // The only problem that can happen at EOF is that a multibyte character is too short
+    // or a byte value too large in the last bytes: check_special_cases only checks for bytes
+    // too large in the first of two bytes.
+    simdjson_inline void check_eof() {
+      // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't
+      // possibly finish them.
+      this->error |= this->prev_incomplete;
+    }
+
+#ifndef SIMDJSON_IF_CONSTEXPR
+#if SIMDJSON_CPLUSPLUS17
+#define SIMDJSON_IF_CONSTEXPR if constexpr
+#else
+#define SIMDJSON_IF_CONSTEXPR if
+#endif
+#endif
+
+    simdjson_inline void check_next_input(const simd8x64<uint8_t>& input) {
+      if(simdjson_likely(is_ascii(input))) {
+        this->error |= this->prev_incomplete;
+      } else {
+        // you might think that a for-loop would work, but under Visual Studio, it is not good enough.
+        static_assert((simd8x64<uint8_t>::NUM_CHUNKS == 1)
+                ||(simd8x64<uint8_t>::NUM_CHUNKS == 2)
+                || (simd8x64<uint8_t>::NUM_CHUNKS == 4),
+                "We support one, two or four chunks per 64-byte block.");
+        SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 1) {
+          this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
+        } else SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 2) {
+          this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
+          this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+        } else SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 4) {
+          this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
+          this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+          this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
+          this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
+        }
+        this->prev_incomplete = is_incomplete(input.chunks[simd8x64<uint8_t>::NUM_CHUNKS-1]);
+        this->prev_input_block = input.chunks[simd8x64<uint8_t>::NUM_CHUNKS-1];
+      }
+    }
+    // do not forget to call check_eof!
+    simdjson_inline error_code errors() {
+      return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS;
+    }
+
+  }; // struct utf8_checker
+} // namespace utf8_validation
+
+using utf8_validation::utf8_checker;
+
+} // unnamed namespace
+} // namespace ppc64
+} // namespace simdjson
+/* end file src/generic/stage1/utf8_lookup4_algorithm.h */
+/* begin file src/generic/stage1/json_structural_indexer.h */
+// This file contains the common code every implementation uses in stage1
+// It is intended to be included multiple times and compiled multiple times
+// We assume the file in which it is included already includes
+// "simdjson/stage1.h" (this simplifies amalgation)
+
+/* begin file src/generic/stage1/buf_block_reader.h */
+namespace simdjson {
+namespace ppc64 {
+namespace {
+
+// Walks through a buffer in block-sized increments, loading the last part with spaces
+template<size_t STEP_SIZE>
+struct buf_block_reader {
+public:
+  simdjson_inline buf_block_reader(const uint8_t *_buf, size_t _len);
+  simdjson_inline size_t block_index();
+  simdjson_inline bool has_full_block() const;
+  simdjson_inline const uint8_t *full_block() const;
+  /**
+   * Get the last block, padded with spaces.
+   *
+   * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this
+   * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there
+   * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding.
+   *
+   * @return the number of effective characters in the last block.
+   */
+  simdjson_inline size_t get_remainder(uint8_t *dst) const;
+  simdjson_inline void advance();
+private:
+  const uint8_t *buf;
+  const size_t len;
+  const size_t lenminusstep;
+  size_t idx;
+};
+
+// Routines to print masks and text for debugging bitmask operations
+simdjson_unused static char * format_input_text_64(const uint8_t *text) {
+  static char buf[sizeof(simd8x64<uint8_t>) + 1];
+  for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
+    buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]);
+  }
+  buf[sizeof(simd8x64<uint8_t>)] = '\0';
+  return buf;
+}
+
+// Routines to print masks and text for debugging bitmask operations
+simdjson_unused static char * format_input_text(const simd8x64<uint8_t>& in) {
+  static char buf[sizeof(simd8x64<uint8_t>) + 1];
+  in.store(reinterpret_cast<uint8_t*>(buf));
+  for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
+    if (buf[i] < ' ') { buf[i] = '_'; }
+  }
+  buf[sizeof(simd8x64<uint8_t>)] = '\0';
+  return buf;
+}
+
+simdjson_unused static char * format_mask(uint64_t mask) {
+  static char buf[sizeof(simd8x64<uint8_t>) + 1];
+  for (size_t i=0; i<64; i++) {
+    buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' ';
+  }
+  buf[64] = '\0';
+  return buf;
+}
+
+template<size_t STEP_SIZE>
+simdjson_inline buf_block_reader<STEP_SIZE>::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {}
+
+template<size_t STEP_SIZE>
+simdjson_inline size_t buf_block_reader<STEP_SIZE>::block_index() { return idx; }
+
+template<size_t STEP_SIZE>
+simdjson_inline bool buf_block_reader<STEP_SIZE>::has_full_block() const {
+  return idx < lenminusstep;
+}
+
+template<size_t STEP_SIZE>
+simdjson_inline const uint8_t *buf_block_reader<STEP_SIZE>::full_block() const {
+  return &buf[idx];
+}
+
+template<size_t STEP_SIZE>
+simdjson_inline size_t buf_block_reader<STEP_SIZE>::get_remainder(uint8_t *dst) const {
+  if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers
+  std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once.
+  std::memcpy(dst, buf + idx, len - idx);
+  return len - idx;
+}
+
+template<size_t STEP_SIZE>
+simdjson_inline void buf_block_reader<STEP_SIZE>::advance() {
+  idx += STEP_SIZE;
+}
+
+} // unnamed namespace
+} // namespace ppc64
+} // namespace simdjson
+/* end file src/generic/stage1/buf_block_reader.h */
+/* begin file src/generic/stage1/json_string_scanner.h */
+namespace simdjson {
+namespace ppc64 {
+namespace {
+namespace stage1 {
+
+struct json_string_block {
+  // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017
+  simdjson_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) :
+  _backslash(backslash), _escaped(escaped), _quote(quote), _in_string(in_string) {}
+
+  // Escaped characters (characters following an escape() character)
+  simdjson_inline uint64_t escaped() const { return _escaped; }
+  // Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \)
+  simdjson_inline uint64_t escape() const { return _backslash & ~_escaped; }
+  // Real (non-backslashed) quotes
+  simdjson_inline uint64_t quote() const { return _quote; }
+  // Start quotes of strings
+  simdjson_inline uint64_t string_start() const { return _quote & _in_string; }
+  // End quotes of strings
+  simdjson_inline uint64_t string_end() const { return _quote & ~_in_string; }
+  // Only characters inside the string (not including the quotes)
+  simdjson_inline uint64_t string_content() const { return _in_string & ~_quote; }
+  // Return a mask of whether the given characters are inside a string (only works on non-quotes)
+  simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; }
+  // Return a mask of whether the given characters are inside a string (only works on non-quotes)
+  simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; }
+  // Tail of string (everything except the start quote)
+  simdjson_inline uint64_t string_tail() const { return _in_string ^ _quote; }
+
+  // backslash characters
+  uint64_t _backslash;
+  // escaped characters (backslashed--does not include the hex characters after \u)
+  uint64_t _escaped;
+  // real quotes (non-backslashed ones)
+  uint64_t _quote;
+  // string characters (includes start quote but not end quote)
+  uint64_t _in_string;
+};
+
+// Scans blocks for string characters, storing the state necessary to do so
+class json_string_scanner {
+public:
+  simdjson_inline json_string_block next(const simd::simd8x64<uint8_t>& in);
+  // Returns either UNCLOSED_STRING or SUCCESS
+  simdjson_inline error_code finish();
+
+private:
+  // Intended to be defined by the implementation
+  simdjson_inline uint64_t find_escaped(uint64_t escape);
+  simdjson_inline uint64_t find_escaped_branchless(uint64_t escape);
+
+  // Whether the last iteration was still inside a string (all 1's = true, all 0's = false).
+  uint64_t prev_in_string = 0ULL;
+  // Whether the first character of the next iteration is escaped.
+  uint64_t prev_escaped = 0ULL;
+};
+
+//
+// Finds escaped characters (characters following \).
+//
+// Handles runs of backslashes like \\\" and \\\\" correctly (yielding 0101 and 01010, respectively).
+//
+// Does this by:
+// - Shift the escape mask to get potentially escaped characters (characters after backslashes).
+// - Mask escaped sequences that start on *even* bits with 1010101010 (odd bits are escaped, even bits are not)
+// - Mask escaped sequences that start on *odd* bits with 0101010101 (even bits are escaped, odd bits are not)
+//
+// To distinguish between escaped sequences starting on even/odd bits, it finds the start of all
+// escape sequences, filters out the ones that start on even bits, and adds that to the mask of
+// escape sequences. This causes the addition to clear out the sequences starting on odd bits (since
+// the start bit causes a carry), and leaves even-bit sequences alone.
+//
+// Example:
+//
+// text           |  \\\ | \\\"\\\" \\\" \\"\\" |
+// escape         |  xxx |  xx xxx  xxx  xx xx  | Removed overflow backslash; will | it into follows_escape
+// odd_starts     |  x   |  x       x       x   | escape & ~even_bits & ~follows_escape
+// even_seq       |     c|    cxxx     c xx   c | c = carry bit -- will be masked out later
+// invert_mask    |      |     cxxx     c xx   c| even_seq << 1
+// follows_escape |   xx | x xx xxx  xxx  xx xx | Includes overflow bit
+// escaped        |   x  | x x  x x  x x  x  x  |
+// desired        |   x  | x x  x x  x x  x  x  |
+// text           |  \\\ | \\\"\\\" \\\" \\"\\" |
+//
+simdjson_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) {
+  // If there was overflow, pretend the first character isn't a backslash
+  backslash &= ~prev_escaped;
+  uint64_t follows_escape = backslash << 1 | prev_escaped;
+
+  // Get sequences starting on even bits by clearing out the odd series using +
+  const uint64_t even_bits = 0x5555555555555555ULL;
+  uint64_t odd_sequence_starts = backslash & ~even_bits & ~follows_escape;
+  uint64_t sequences_starting_on_even_bits;
+  prev_escaped = add_overflow(odd_sequence_starts, backslash, &sequences_starting_on_even_bits);
+  uint64_t invert_mask = sequences_starting_on_even_bits << 1; // The mask we want to return is the *escaped* bits, not escapes.
+
+  // Mask every other backslashed character as an escaped character
+  // Flip the mask for sequences that start on even bits, to correct them
+  return (even_bits ^ invert_mask) & follows_escape;
+}
+
+//
+// Return a mask of all string characters plus end quotes.
+//
+// prev_escaped is overflow saying whether the next character is escaped.
+// prev_in_string is overflow saying whether we're still in a string.
+//
+// Backslash sequences outside of quotes will be detected in stage 2.
+//
+simdjson_inline json_string_block json_string_scanner::next(const simd::simd8x64<uint8_t>& in) {
+  const uint64_t backslash = in.eq('\\');
+  const uint64_t escaped = find_escaped(backslash);
+  const uint64_t quote = in.eq('"') & ~escaped;
+
+  //
+  // prefix_xor flips on bits inside the string (and flips off the end quote).
+  //
+  // Then we xor with prev_in_string: if we were in a string already, its effect is flipped
+  // (characters inside strings are outside, and characters outside strings are inside).
+  //
+  const uint64_t in_string = prefix_xor(quote) ^ prev_in_string;
+
+  //
+  // Check if we're still in a string at the end of the box so the next block will know
+  //
+  // right shift of a signed value expected to be well-defined and standard
+  // compliant as of C++20, John Regher from Utah U. says this is fine code
+  //
+  prev_in_string = uint64_t(static_cast<int64_t>(in_string) >> 63);
+
+  // Use ^ to turn the beginning quote off, and the end quote on.
+
+  // We are returning a function-local object so either we get a move constructor
+  // or we get copy elision.
+  return json_string_block(
+    backslash,
+    escaped,
+    quote,
+    in_string
+  );
+}
+
+simdjson_inline error_code json_string_scanner::finish() {
+  if (prev_in_string) {
+    return UNCLOSED_STRING;
+  }
+  return SUCCESS;
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace ppc64
+} // namespace simdjson
+/* end file src/generic/stage1/json_string_scanner.h */
+/* begin file src/generic/stage1/json_scanner.h */
+namespace simdjson {
+namespace ppc64 {
+namespace {
+namespace stage1 {
+
+/**
+ * A block of scanned json, with information on operators and scalars.
+ *
+ * We seek to identify pseudo-structural characters. Anything that is inside
+ * a string must be omitted (hence  & ~_string.string_tail()).
+ * Otherwise, pseudo-structural characters come in two forms.
+ * 1. We have the structural characters ([,],{,},:, comma). The
+ *    term 'structural character' is from the JSON RFC.
+ * 2. We have the 'scalar pseudo-structural characters'.
+ *    Scalars are quotes, and any character except structural characters and white space.
+ *
+ * To identify the scalar pseudo-structural characters, we must look at what comes
+ * before them: it must be a space, a quote or a structural characters.
+ * Starting with simdjson v0.3, we identify them by
+ * negation: we identify everything that is followed by a non-quote scalar,
+ * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'.
+ */
+struct json_block {
+public:
+  // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017
+  simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) :
+  _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {}
+  simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) :
+  _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {}
+
+  /**
+   * The start of structurals.
+   * In simdjson prior to v0.3, these were called the pseudo-structural characters.
+   **/
+  simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); }
+  /** All JSON whitespace (i.e. not in a string) */
+  simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); }
+
+  // Helpers
+
+  /** Whether the given characters are inside a string (only works on non-quotes) */
+  simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); }
+  /** Whether the given characters are outside a string (only works on non-quotes) */
+  simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); }
+
+  // string and escape characters
+  json_string_block _string;
+  // whitespace, structural characters ('operators'), scalars
+  json_character_block _characters;
+  // whether the previous character was a scalar
+  uint64_t _follows_potential_nonquote_scalar;
+private:
+  // Potential structurals (i.e. disregarding strings)
+
+  /**
+   * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc".
+   * They may reside inside a string.
+   **/
+  simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); }
+  /**
+   * The start of non-operator runs, like 123, true and "abc".
+   * It main reside inside a string.
+   **/
+  simdjson_inline uint64_t potential_scalar_start() const noexcept {
+    // The term "scalar" refers to anything except structural characters and white space
+    // (so letters, numbers, quotes).
+    // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space
+    // then we know that it is irrelevant structurally.
+    return _characters.scalar() & ~follows_potential_scalar();
+  }
+  /**
+   * Whether the given character is immediately after a non-operator like 123, true.
+   * The characters following a quote are not included.
+   */
+  simdjson_inline uint64_t follows_potential_scalar() const noexcept {
+    // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character
+    // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a
+    // white space.
+    // It is understood that within quoted region, anything at all could be marked (irrelevant).
+    return _follows_potential_nonquote_scalar;
+  }
+};
+
+/**
+ * Scans JSON for important bits: structural characters or 'operators', strings, and scalars.
+ *
+ * The scanner starts by calculating two distinct things:
+ * - string characters (taking \" into account)
+ * - structural characters or 'operators' ([]{},:, comma)
+ *   and scalars (runs of non-operators like 123, true and "abc")
+ *
+ * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel:
+ * in particular, the operator/scalar bit will find plenty of things that are actually part of
+ * strings. When we're done, json_block will fuse the two together by masking out tokens that are
+ * part of a string.
+ */
+class json_scanner {
+public:
+  json_scanner() = default;
+  simdjson_inline json_block next(const simd::simd8x64<uint8_t>& in);
+  // Returns either UNCLOSED_STRING or SUCCESS
+  simdjson_inline error_code finish();
+
+private:
+  // Whether the last character of the previous iteration is part of a scalar token
+  // (anything except whitespace or a structural character/'operator').
+  uint64_t prev_scalar = 0ULL;
+  json_string_scanner string_scanner{};
+};
+
+
+//
+// Check if the current character immediately follows a matching character.
+//
+// For example, this checks for quotes with backslashes in front of them:
+//
+//     const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash);
+//
+simdjson_inline uint64_t follows(const uint64_t match, uint64_t &overflow) {
+  const uint64_t result = match << 1 | overflow;
+  overflow = match >> 63;
+  return result;
+}
+
+simdjson_inline json_block json_scanner::next(const simd::simd8x64<uint8_t>& in) {
+  json_string_block strings = string_scanner.next(in);
+  // identifies the white-space and the structural characters
+  json_character_block characters = json_character_block::classify(in);
+  // The term "scalar" refers to anything except structural characters and white space
+  // (so letters, numbers, quotes).
+  // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers).
+  //
+  // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon)
+  // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential
+  // pseudo-structural character just like we would if we had  ' "a string" true '; otherwise we
+  // may need to add an extra check when parsing strings.
+  //
+  // Performance: there are many ways to skin this cat.
+  const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote();
+  uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar);
+  // We are returning a function-local object so either we get a move constructor
+  // or we get copy elision.
+  return json_block(
+    strings,// strings is a function-local object so either it moves or the copy is elided.
+    characters,
+    follows_nonquote_scalar
+  );
+}
+
+simdjson_inline error_code json_scanner::finish() {
+  return string_scanner.finish();
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace ppc64
+} // namespace simdjson
+/* end file src/generic/stage1/json_scanner.h */
+/* begin file src/generic/stage1/json_minifier.h */
+// This file contains the common code every implementation uses in stage1
+// It is intended to be included multiple times and compiled multiple times
+// We assume the file in which it is included already includes
+// "simdjson/stage1.h" (this simplifies amalgation)
+
+namespace simdjson {
+namespace ppc64 {
+namespace {
+namespace stage1 {
+
+class json_minifier {
+public:
+  template<size_t STEP_SIZE>
+  static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept;
+
+private:
+  simdjson_inline json_minifier(uint8_t *_dst)
+  : dst{_dst}
+  {}
+  template<size_t STEP_SIZE>
+  simdjson_inline void step(const uint8_t *block_buf, buf_block_reader<STEP_SIZE> &reader) noexcept;
+  simdjson_inline void next(const simd::simd8x64<uint8_t>& in, const json_block& block);
+  simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len);
+  json_scanner scanner{};
+  uint8_t *dst;
+};
+
+simdjson_inline void json_minifier::next(const simd::simd8x64<uint8_t>& in, const json_block& block) {
+  uint64_t mask = block.whitespace();
+  dst += in.compress(mask, dst);
+}
+
+simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) {
+  error_code error = scanner.finish();
+  if (error) { dst_len = 0; return error; }
+  dst_len = dst - dst_start;
+  return SUCCESS;
+}
+
+template<>
+simdjson_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept {
+  simd::simd8x64<uint8_t> in_1(block_buf);
+  simd::simd8x64<uint8_t> in_2(block_buf+64);
+  json_block block_1 = scanner.next(in_1);
+  json_block block_2 = scanner.next(in_2);
+  this->next(in_1, block_1);
+  this->next(in_2, block_2);
+  reader.advance();
+}
+
+template<>
+simdjson_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept {
+  simd::simd8x64<uint8_t> in_1(block_buf);
+  json_block block_1 = scanner.next(in_1);
+  this->next(block_buf, block_1);
+  reader.advance();
+}
+
+template<size_t STEP_SIZE>
+error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept {
+  buf_block_reader<STEP_SIZE> reader(buf, len);
+  json_minifier minifier(dst);
+
+  // Index the first n-1 blocks
+  while (reader.has_full_block()) {
+    minifier.step<STEP_SIZE>(reader.full_block(), reader);
+  }
+
+  // Index the last (remainder) block, padded with spaces
+  uint8_t block[STEP_SIZE];
+  size_t remaining_bytes = reader.get_remainder(block);
+  if (remaining_bytes > 0) {
+    // We do not want to write directly to the output stream. Rather, we write
+    // to a local buffer (for safety).
+    uint8_t out_block[STEP_SIZE];
+    uint8_t * const guarded_dst{minifier.dst};
+    minifier.dst = out_block;
+    minifier.step<STEP_SIZE>(block, reader);
+    size_t to_write = minifier.dst - out_block;
+    // In some cases, we could be enticed to consider the padded spaces
+    // as part of the string. This is fine as long as we do not write more
+    // than we consumed.
+    if(to_write > remaining_bytes) { to_write = remaining_bytes; }
+    memcpy(guarded_dst, out_block, to_write);
+    minifier.dst = guarded_dst + to_write;
+  }
+  return minifier.finish(dst, dst_len);
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace ppc64
+} // namespace simdjson
+/* end file src/generic/stage1/json_minifier.h */
+/* begin file src/generic/stage1/find_next_document_index.h */
+namespace simdjson {
+namespace ppc64 {
+namespace {
+
+/**
+  * This algorithm is used to quickly identify the last structural position that
+  * makes up a complete document.
+  *
+  * It does this by going backwards and finding the last *document boundary* (a
+  * place where one value follows another without a comma between them). If the
+  * last document (the characters after the boundary) has an equal number of
+  * start and end brackets, it is considered complete.
+  *
+  * Simply put, we iterate over the structural characters, starting from
+  * the end. We consider that we found the end of a JSON document when the
+  * first element of the pair is NOT one of these characters: '{' '[' ':' ','
+  * and when the second element is NOT one of these characters: '}' ']' ':' ','.
+  *
+  * This simple comparison works most of the time, but it does not cover cases
+  * where the batch's structural indexes contain a perfect amount of documents.
+  * In such a case, we do not have access to the structural index which follows
+  * the last document, therefore, we do not have access to the second element in
+  * the pair, and that means we cannot identify the last document. To fix this
+  * issue, we keep a count of the open and closed curly/square braces we found
+  * while searching for the pair. When we find a pair AND the count of open and
+  * closed curly/square braces is the same, we know that we just passed a
+  * complete document, therefore the last json buffer location is the end of the
+  * batch.
+  */
+simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) {
+  // Variant: do not count separately, just figure out depth
+  if(parser.n_structural_indexes == 0) { return 0; }
+  auto arr_cnt = 0;
+  auto obj_cnt = 0;
+  for (auto i = parser.n_structural_indexes - 1; i > 0; i--) {
+    auto idxb = parser.structural_indexes[i];
+    switch (parser.buf[idxb]) {
+    case ':':
+    case ',':
+      continue;
+    case '}':
+      obj_cnt--;
+      continue;
+    case ']':
+      arr_cnt--;
+      continue;
+    case '{':
+      obj_cnt++;
+      break;
+    case '[':
+      arr_cnt++;
+      break;
+    }
+    auto idxa = parser.structural_indexes[i - 1];
+    switch (parser.buf[idxa]) {
+    case '{':
+    case '[':
+    case ':':
+    case ',':
+      continue;
+    }
+    // Last document is complete, so the next document will appear after!
+    if (!arr_cnt && !obj_cnt) {
+      return parser.n_structural_indexes;
+    }
+    // Last document is incomplete; mark the document at i + 1 as the next one
+    return i;
+  }
+  // If we made it to the end, we want to finish counting to see if we have a full document.
+  switch (parser.buf[parser.structural_indexes[0]]) {
+    case '}':
+      obj_cnt--;
+      break;
+    case ']':
+      arr_cnt--;
+      break;
+    case '{':
+      obj_cnt++;
+      break;
+    case '[':
+      arr_cnt++;
+      break;
+  }
+  if (!arr_cnt && !obj_cnt) {
+    // We have a complete document.
+    return parser.n_structural_indexes;
+  }
+  return 0;
+}
+
+} // unnamed namespace
+} // namespace ppc64
+} // namespace simdjson
+/* end file src/generic/stage1/find_next_document_index.h */
+
+namespace simdjson {
+namespace ppc64 {
+namespace {
+namespace stage1 {
+
+class bit_indexer {
+public:
+  uint32_t *tail;
+
+  simdjson_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {}
+
+  // flatten out values in 'bits' assuming that they are are to have values of idx
+  // plus their position in the bitvector, and store these indexes at
+  // base_ptr[base] incrementing base as we go
+  // will potentially store extra values beyond end of valid bits, so base_ptr
+  // needs to be large enough to handle this
+  //
+  // If the kernel sets SIMDJSON_CUSTOM_BIT_INDEXER, then it will provide its own
+  // version of the code.
+#ifdef SIMDJSON_CUSTOM_BIT_INDEXER
+  simdjson_inline void write(uint32_t idx, uint64_t bits);
+#else
+  simdjson_inline void write(uint32_t idx, uint64_t bits) {
+    // In some instances, the next branch is expensive because it is mispredicted.
+    // Unfortunately, in other cases,
+    // it helps tremendously.
+    if (bits == 0)
+        return;
+#if defined(SIMDJSON_PREFER_REVERSE_BITS)
+    /**
+     * ARM lacks a fast trailing zero instruction, but it has a fast
+     * bit reversal instruction and a fast leading zero instruction.
+     * Thus it may be profitable to reverse the bits (once) and then
+     * to rely on a sequence of instructions that call the leading
+     * zero instruction.
+     *
+     * Performance notes:
+     * The chosen routine is not optimal in terms of data dependency
+     * since zero_leading_bit might require two instructions. However,
+     * it tends to minimize the total number of instructions which is
+     * beneficial.
+     */
+
+    uint64_t rev_bits = reverse_bits(bits);
+    int cnt = static_cast<int>(count_ones(bits));
+    int i = 0;
+    // Do the first 8 all together
+    for (; i<8; i++) {
+      int lz = leading_zeroes(rev_bits);
+      this->tail[i] = static_cast<uint32_t>(idx) + lz;
+      rev_bits = zero_leading_bit(rev_bits, lz);
+    }
+    // Do the next 8 all together (we hope in most cases it won't happen at all
+    // and the branch is easily predicted).
+    if (simdjson_unlikely(cnt > 8)) {
+      i = 8;
+      for (; i<16; i++) {
+        int lz = leading_zeroes(rev_bits);
+        this->tail[i] = static_cast<uint32_t>(idx) + lz;
+        rev_bits = zero_leading_bit(rev_bits, lz);
+      }
+
+
+      // Most files don't have 16+ structurals per block, so we take several basically guaranteed
+      // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :)
+      // or the start of a value ("abc" true 123) every four characters.
+      if (simdjson_unlikely(cnt > 16)) {
+        i = 16;
+        while (rev_bits != 0) {
+          int lz = leading_zeroes(rev_bits);
+          this->tail[i++] = static_cast<uint32_t>(idx) + lz;
+          rev_bits = zero_leading_bit(rev_bits, lz);
+        }
+      }
+    }
+    this->tail += cnt;
+#else // SIMDJSON_PREFER_REVERSE_BITS
+    /**
+     * Under recent x64 systems, we often have both a fast trailing zero
+     * instruction and a fast 'clear-lower-bit' instruction so the following
+     * algorithm can be competitive.
+     */
+
+    int cnt = static_cast<int>(count_ones(bits));
+    // Do the first 8 all together
+    for (int i=0; i<8; i++) {
+      this->tail[i] = idx + trailing_zeroes(bits);
+      bits = clear_lowest_bit(bits);
+    }
+
+    // Do the next 8 all together (we hope in most cases it won't happen at all
+    // and the branch is easily predicted).
+    if (simdjson_unlikely(cnt > 8)) {
+      for (int i=8; i<16; i++) {
+        this->tail[i] = idx + trailing_zeroes(bits);
+        bits = clear_lowest_bit(bits);
+      }
+
+      // Most files don't have 16+ structurals per block, so we take several basically guaranteed
+      // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :)
+      // or the start of a value ("abc" true 123) every four characters.
+      if (simdjson_unlikely(cnt > 16)) {
+        int i = 16;
+        do {
+          this->tail[i] = idx + trailing_zeroes(bits);
+          bits = clear_lowest_bit(bits);
+          i++;
+        } while (i < cnt);
+      }
+    }
+
+    this->tail += cnt;
+#endif
+  }
+#endif // SIMDJSON_CUSTOM_BIT_INDEXER
+
+};
+
+class json_structural_indexer {
+public:
+  /**
+   * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes.
+   *
+   * @param partial Setting the partial parameter to true allows the find_structural_bits to
+   *   tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If
+   *   you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8.
+   */
+  template<size_t STEP_SIZE>
+  static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept;
+
+private:
+  simdjson_inline json_structural_indexer(uint32_t *structural_indexes);
+  template<size_t STEP_SIZE>
+  simdjson_inline void step(const uint8_t *block, buf_block_reader<STEP_SIZE> &reader) noexcept;
+  simdjson_inline void next(const simd::simd8x64<uint8_t>& in, const json_block& block, size_t idx);
+  simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial);
+
+  json_scanner scanner{};
+  utf8_checker checker{};
+  bit_indexer indexer;
+  uint64_t prev_structurals = 0;
+  uint64_t unescaped_chars_error = 0;
+};
+
+simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {}
+
+// Skip the last character if it is partial
+simdjson_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) {
+  if (simdjson_unlikely(len < 3)) {
+    switch (len) {
+      case 2:
+        if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left
+        if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 2 bytes left
+        return len;
+      case 1:
+        if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left
+        return len;
+      case 0:
+        return len;
+    }
+  }
+  if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left
+  if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 1 byte left
+  if (buf[len-3] >= 0xf0) { return len-3; } // 4-byte characters with only 3 bytes left
+  return len;
+}
+
+//
+// PERF NOTES:
+// We pipe 2 inputs through these stages:
+// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load
+//    2 inputs' worth at once so that by the time step 2 is looking for them input, it's available.
+// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path.
+//    The output of step 1 depends entirely on this information. These functions don't quite use
+//    up enough CPU: the second half of the functions is highly serial, only using 1 execution core
+//    at a time. The second input's scans has some dependency on the first ones finishing it, but
+//    they can make a lot of progress before they need that information.
+// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that
+//    to finish: utf-8 checks and generating the output from the last iteration.
+//
+// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all
+// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough
+// workout.
+//
+template<size_t STEP_SIZE>
+error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept {
+  if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; }
+  // We guard the rest of the code so that we can assume that len > 0 throughout.
+  if (len == 0) { return EMPTY; }
+  if (is_streaming(partial)) {
+    len = trim_partial_utf8(buf, len);
+    // If you end up with an empty window after trimming
+    // the partial UTF-8 bytes, then chances are good that you
+    // have an UTF-8 formatting error.
+    if(len == 0) { return UTF8_ERROR; }
+  }
+  buf_block_reader<STEP_SIZE> reader(buf, len);
+  json_structural_indexer indexer(parser.structural_indexes.get());
+
+  // Read all but the last block
+  while (reader.has_full_block()) {
+    indexer.step<STEP_SIZE>(reader.full_block(), reader);
+  }
+  // Take care of the last block (will always be there unless file is empty which is
+  // not supposed to happen.)
+  uint8_t block[STEP_SIZE];
+  if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; }
+  indexer.step<STEP_SIZE>(block, reader);
+  return indexer.finish(parser, reader.block_index(), len, partial);
+}
+
+template<>
+simdjson_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept {
+  simd::simd8x64<uint8_t> in_1(block);
+  simd::simd8x64<uint8_t> in_2(block+64);
+  json_block block_1 = scanner.next(in_1);
+  json_block block_2 = scanner.next(in_2);
+  this->next(in_1, block_1, reader.block_index());
+  this->next(in_2, block_2, reader.block_index()+64);
+  reader.advance();
+}
+
+template<>
+simdjson_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept {
+  simd::simd8x64<uint8_t> in_1(block);
+  json_block block_1 = scanner.next(in_1);
+  this->next(in_1, block_1, reader.block_index());
+  reader.advance();
+}
+
+simdjson_inline void json_structural_indexer::next(const simd::simd8x64<uint8_t>& in, const json_block& block, size_t idx) {
+  uint64_t unescaped = in.lteq(0x1F);
+  checker.check_next_input(in);
+  indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser
+  prev_structurals = block.structural_start();
+  unescaped_chars_error |= block.non_quote_inside_string(unescaped);
+}
+
+simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) {
+  // Write out the final iteration's structurals
+  indexer.write(uint32_t(idx-64), prev_structurals);
+  error_code error = scanner.finish();
+  // We deliberately break down the next expression so that it is
+  // human readable.
+  const bool should_we_exit = is_streaming(partial) ?
+    ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING
+    : (error != SUCCESS); // if partial is false, we must have SUCCESS
+  const bool have_unclosed_string = (error == UNCLOSED_STRING);
+  if (simdjson_unlikely(should_we_exit)) { return error; }
+
+  if (unescaped_chars_error) {
+    return UNESCAPED_CHARS;
+  }
+  parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get());
+  /***
+   * The On Demand API requires special padding.
+   *
+   * This is related to https://github.com/simdjson/simdjson/issues/906
+   * Basically, we want to make sure that if the parsing continues beyond the last (valid)
+   * structural character, it quickly stops.
+   * Only three structural characters can be repeated without triggering an error in JSON:  [,] and }.
+   * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing
+   * continues, then it must be [,] or }.
+   * Suppose it is ] or }. We backtrack to the first character, what could it be that would
+   * not trigger an error? It could be ] or } but no, because you can't start a document that way.
+   * It can't be a comma, a colon or any simple value. So the only way we could continue is
+   * if the repeated character is [. But if so, the document must start with [. But if the document
+   * starts with [, it should end with ]. If we enforce that rule, then we would get
+   * ][[ which is invalid.
+   *
+   * This is illustrated with the test array_iterate_unclosed_error() on the following input:
+   * R"({ "a": [,,)"
+   **/
+  parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final
+  parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len);
+  parser.structural_indexes[parser.n_structural_indexes + 2] = 0;
+  parser.next_structural_index = 0;
+  // a valid JSON file cannot have zero structural indexes - we should have found something
+  if (simdjson_unlikely(parser.n_structural_indexes == 0u)) {
+    return EMPTY;
+  }
+  if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) {
+    return UNEXPECTED_ERROR;
+  }
+  if (partial == stage1_mode::streaming_partial) {
+    // If we have an unclosed string, then the last structural
+    // will be the quote and we want to make sure to omit it.
+    if(have_unclosed_string) {
+      parser.n_structural_indexes--;
+      // a valid JSON file cannot have zero structural indexes - we should have found something
+      if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; }
+    }
+    // We truncate the input to the end of the last complete document (or zero).
+    auto new_structural_indexes = find_next_document_index(parser);
+    if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) {
+      if(parser.structural_indexes[0] == 0) {
+        // If the buffer is partial and we started at index 0 but the document is
+        // incomplete, it's too big to parse.
+        return CAPACITY;
+      } else {
+        // It is possible that the document could be parsed, we just had a lot
+        // of white space.
+        parser.n_structural_indexes = 0;
+        return EMPTY;
+      }
+    }
+
+    parser.n_structural_indexes = new_structural_indexes;
+  } else if (partial == stage1_mode::streaming_final) {
+    if(have_unclosed_string) { parser.n_structural_indexes--; }
+    // We truncate the input to the end of the last complete document (or zero).
+    // Because partial == stage1_mode::streaming_final, it means that we may
+    // silently ignore trailing garbage. Though it sounds bad, we do it
+    // deliberately because many people who have streams of JSON documents
+    // will truncate them for processing. E.g., imagine that you are uncompressing
+    // the data from a size file or receiving it in chunks from the network. You
+    // may not know where exactly the last document will be. Meanwhile the
+    // document_stream instances allow people to know the JSON documents they are
+    // parsing (see the iterator.source() method).
+    parser.n_structural_indexes = find_next_document_index(parser);
+    // We store the initial n_structural_indexes so that the client can see
+    // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes,
+    // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len,
+    // otherwise, it will copy some prior index.
+    parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes];
+    // This next line is critical, do not change it unless you understand what you are
+    // doing.
+    parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len);
+    if (simdjson_unlikely(parser.n_structural_indexes == 0u)) {
+        // We tolerate an unclosed string at the very end of the stream. Indeed, users
+        // often load their data in bulk without being careful and they want us to ignore
+        // the trailing garbage.
+        return EMPTY;
+    }
+  }
+  checker.check_eof();
+  return checker.errors();
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace ppc64
+} // namespace simdjson
+/* end file src/generic/stage1/json_structural_indexer.h */
+/* begin file src/generic/stage1/utf8_validator.h */
+namespace simdjson {
+namespace ppc64 {
+namespace {
+namespace stage1 {
+
+/**
+ * Validates that the string is actual UTF-8.
+ */
+template<class checker>
+bool generic_validate_utf8(const uint8_t * input, size_t length) {
+    checker c{};
+    buf_block_reader<64> reader(input, length);
+    while (reader.has_full_block()) {
+      simd::simd8x64<uint8_t> in(reader.full_block());
+      c.check_next_input(in);
+      reader.advance();
+    }
+    uint8_t block[64]{};
+    reader.get_remainder(block);
+    simd::simd8x64<uint8_t> in(block);
+    c.check_next_input(in);
+    reader.advance();
+    c.check_eof();
+    return c.errors() == error_code::SUCCESS;
+}
+
+bool generic_validate_utf8(const char * input, size_t length) {
+    return generic_validate_utf8<utf8_checker>(reinterpret_cast<const uint8_t *>(input),length);
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace ppc64
+} // namespace simdjson
+/* end file src/generic/stage1/utf8_validator.h */
+
+//
+// Stage 2
+//
+/* begin file src/generic/stage2/stringparsing.h */
+// This file contains the common code every implementation uses
+// It is intended to be included multiple times and compiled multiple times
+
+namespace simdjson {
+namespace ppc64 {
+namespace {
+/// @private
+namespace stringparsing {
+
+// begin copypasta
+// These chars yield themselves: " \ /
+// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab
+// u not handled in this table as it's complex
+static const uint8_t escape_map[256] = {
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0, // 0x0.
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0x22, 0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0x2f,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0, // 0x4.
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0x5c, 0, 0,    0, // 0x5.
+    0, 0, 0x08, 0, 0,    0, 0x0c, 0, 0, 0, 0, 0, 0,    0, 0x0a, 0, // 0x6.
+    0, 0, 0x0d, 0, 0x09, 0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0, // 0x7.
+
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+};
+
+// handle a unicode codepoint
+// write appropriate values into dest
+// src will advance 6 bytes or 12 bytes
+// dest will advance a variable amount (return via pointer)
+// return true if the unicode codepoint was valid
+// We work in little-endian then swap at write time
+simdjson_warn_unused
+simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr,
+                                            uint8_t **dst_ptr) {
+  // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the
+  // conversion isn't valid; we defer the check for this to inside the
+  // multilingual plane check
+  uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2);
+  *src_ptr += 6;
+
+  // If we found a high surrogate, we must
+  // check for low surrogate for characters
+  // outside the Basic
+  // Multilingual Plane.
+  if (code_point >= 0xd800 && code_point < 0xdc00) {
+    const uint8_t *src_data = *src_ptr;
+    /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */
+    if (((src_data[0] << 8) | src_data[1]) != ((static_cast<uint8_t> ('\\') << 8) | static_cast<uint8_t> ('u'))) {
+      return false;
+    }
+    uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2);
+
+    // We have already checked that the high surrogate is valid and
+    // (code_point - 0xd800) < 1024.
+    //
+    // Check that code_point_2 is in the range 0xdc00..0xdfff
+    // and that code_point_2 was parsed from valid hex.
+    uint32_t low_bit = code_point_2 - 0xdc00;
+    if (low_bit >> 10) {
+      return false;
+    }
+
+    code_point =
+        (((code_point - 0xd800) << 10) | low_bit) + 0x10000;
+    *src_ptr += 6;
+  } else if (code_point >= 0xdc00 && code_point <= 0xdfff) {
+      // If we encounter a low surrogate (not preceded by a high surrogate)
+      // then we have an error.
+      return false;
+  }
+  size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr);
+  *dst_ptr += offset;
+  return offset > 0;
+}
+
+/**
+ * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There
+ * must be an unescaped quote terminating the string. It returns the final output
+ * position as pointer. In case of error (e.g., the string has bad escaped codes),
+ * then null_nullptrptr is returned. It is assumed that the output buffer is large
+ * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes +
+ * SIMDJSON_PADDING bytes.
+ */
+simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) {
+  while (1) {
+    // Copy the next n bytes, and find the backslash and quote in them.
+    auto bs_quote = backslash_and_quote::copy_and_find(src, dst);
+    // If the next thing is the end quote, copy and return
+    if (bs_quote.has_quote_first()) {
+      // we encountered quotes first. Move dst to point to quotes and exit
+      return dst + bs_quote.quote_index();
+    }
+    if (bs_quote.has_backslash()) {
+      /* find out where the backspace is */
+      auto bs_dist = bs_quote.backslash_index();
+      uint8_t escape_char = src[bs_dist + 1];
+      /* we encountered backslash first. Handle backslash */
+      if (escape_char == 'u') {
+        /* move src/dst up to the start; they will be further adjusted
+           within the unicode codepoint handling code. */
+        src += bs_dist;
+        dst += bs_dist;
+        if (!handle_unicode_codepoint(&src, &dst)) {
+          return nullptr;
+        }
+      } else {
+        /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and
+         * write bs_dist+1 characters to output
+         * note this may reach beyond the part of the buffer we've actually
+         * seen. I think this is ok */
+        uint8_t escape_result = escape_map[escape_char];
+        if (escape_result == 0u) {
+          return nullptr; /* bogus escape value is an error */
+        }
+        dst[bs_dist] = escape_result;
+        src += bs_dist + 2;
+        dst += bs_dist + 1;
+      }
+    } else {
+      /* they are the same. Since they can't co-occur, it means we
+       * encountered neither. */
+      src += backslash_and_quote::BYTES_PROCESSED;
+      dst += backslash_and_quote::BYTES_PROCESSED;
+    }
+  }
+  /* can't be reached */
+  return nullptr;
+}
+
+} // namespace stringparsing
+} // unnamed namespace
+} // namespace ppc64
+} // namespace simdjson
+/* end file src/generic/stage2/stringparsing.h */
+/* begin file src/generic/stage2/tape_builder.h */
+/* begin file src/generic/stage2/json_iterator.h */
+/* begin file src/generic/stage2/logger.h */
+// This is for an internal-only stage 2 specific logger.
+// Set LOG_ENABLED = true to log what stage 2 is doing!
+namespace simdjson {
+namespace ppc64 {
+namespace {
+namespace logger {
+
+  static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------";
+
+#if SIMDJSON_VERBOSE_LOGGING
+  static constexpr const bool LOG_ENABLED = true;
+#else
+  static constexpr const bool LOG_ENABLED = false;
+#endif
+  static constexpr const int LOG_EVENT_LEN = 20;
+  static constexpr const int LOG_BUFFER_LEN = 30;
+  static constexpr const int LOG_SMALL_BUFFER_LEN = 10;
+  static constexpr const int LOG_INDEX_LEN = 5;
+
+  static int log_depth; // Not threadsafe. Log only.
+
+  // Helper to turn unprintable or newline characters into spaces
+  static simdjson_inline char printable_char(char c) {
+    if (c >= 0x20) {
+      return c;
+    } else {
+      return ' ';
+    }
+  }
+
+  // Print the header and set up log_start
+  static simdjson_inline void log_start() {
+    if (LOG_ENABLED) {
+      log_depth = 0;
+      printf("\n");
+      printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#");
+      printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES);
+    }
+  }
+
+  simdjson_unused static simdjson_inline void log_string(const char *message) {
+    if (LOG_ENABLED) {
+      printf("%s\n", message);
+    }
+  }
+
+  // Logs a single line from the stage 2 DOM parser
+  template<typename S>
+  static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) {
+    if (LOG_ENABLED) {
+      printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title);
+      auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1;
+      auto next_index = structurals.next_structural;
+      auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast<const uint8_t*>("                                                       ");
+      auto next = &structurals.buf[*next_index];
+      {
+        // Print the next N characters in the buffer.
+        printf("| ");
+        // Otherwise, print the characters starting from the buffer position.
+        // Print spaces for unprintable or newline characters.
+        for (int i=0;i<LOG_BUFFER_LEN;i++) {
+          printf("%c", printable_char(current[i]));
+        }
+        printf(" ");
+        // Print the next N characters in the buffer.
+        printf("| ");
+        // Otherwise, print the characters starting from the buffer position.
+        // Print spaces for unprintable or newline characters.
+        for (int i=0;i<LOG_SMALL_BUFFER_LEN;i++) {
+          printf("%c", printable_char(next[i]));
+        }
+        printf(" ");
+      }
+      if (current_index) {
+        printf("| %*u ", LOG_INDEX_LEN, *current_index);
+      } else {
+        printf("| %-*s ", LOG_INDEX_LEN, "");
+      }
+      // printf("| %*u ", LOG_INDEX_LEN, structurals.next_tape_index());
+      printf("| %-s ", detail);
+      printf("|\n");
+    }
+  }
+
+} // namespace logger
+} // unnamed namespace
+} // namespace ppc64
+} // namespace simdjson
+/* end file src/generic/stage2/logger.h */
+
+namespace simdjson {
+namespace ppc64 {
+namespace {
+namespace stage2 {
+
+class json_iterator {
+public:
+  const uint8_t* const buf;
+  uint32_t *next_structural;
+  dom_parser_implementation &dom_parser;
+  uint32_t depth{0};
+
+  /**
+   * Walk the JSON document.
+   *
+   * The visitor receives callbacks when values are encountered. All callbacks pass the iterator as
+   * the first parameter; some callbacks have other parameters as well:
+   *
+   * - visit_document_start() - at the beginning.
+   * - visit_document_end() - at the end (if things were successful).
+   *
+   * - visit_array_start() - at the start `[` of a non-empty array.
+   * - visit_array_end() - at the end `]` of a non-empty array.
+   * - visit_empty_array() - when an empty array is encountered.
+   *
+   * - visit_object_end() - at the start `]` of a non-empty object.
+   * - visit_object_start() - at the end `]` of a non-empty object.
+   * - visit_empty_object() - when an empty object is encountered.
+   * - visit_key(const uint8_t *key) - when a key in an object field is encountered. key is
+   *                                   guaranteed to point at the first quote of the string (`"key"`).
+   * - visit_primitive(const uint8_t *value) - when a value is a string, number, boolean or null.
+   * - visit_root_primitive(iter, uint8_t *value) - when the top-level value is a string, number, boolean or null.
+   *
+   * - increment_count(iter) - each time a value is found in an array or object.
+   */
+  template<bool STREAMING, typename V>
+  simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept;
+
+  /**
+   * Create an iterator capable of walking a JSON document.
+   *
+   * The document must have already passed through stage 1.
+   */
+  simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index);
+
+  /**
+   * Look at the next token.
+   *
+   * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)).
+   *
+   * They may include invalid JSON as well (such as `1.2.3` or `ture`).
+   */
+  simdjson_inline const uint8_t *peek() const noexcept;
+  /**
+   * Advance to the next token.
+   *
+   * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)).
+   *
+   * They may include invalid JSON as well (such as `1.2.3` or `ture`).
+   */
+  simdjson_inline const uint8_t *advance() noexcept;
+  /**
+   * Get the remaining length of the document, from the start of the current token.
+   */
+  simdjson_inline size_t remaining_len() const noexcept;
+  /**
+   * Check if we are at the end of the document.
+   *
+   * If this is true, there are no more tokens.
+   */
+  simdjson_inline bool at_eof() const noexcept;
+  /**
+   * Check if we are at the beginning of the document.
+   */
+  simdjson_inline bool at_beginning() const noexcept;
+  simdjson_inline uint8_t last_structural() const noexcept;
+
+  /**
+   * Log that a value has been found.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_value(const char *type) const noexcept;
+  /**
+   * Log the start of a multipart value.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_start_value(const char *type) const noexcept;
+  /**
+   * Log the end of a multipart value.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_end_value(const char *type) const noexcept;
+  /**
+   * Log an error.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_error(const char *error) const noexcept;
+
+  template<typename V>
+  simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept;
+  template<typename V>
+  simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept;
+};
+
+template<bool STREAMING, typename V>
+simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept {
+  logger::log_start();
+
+  //
+  // Start the document
+  //
+  if (at_eof()) { return EMPTY; }
+  log_start_value("document");
+  SIMDJSON_TRY( visitor.visit_document_start(*this) );
+
+  //
+  // Read first value
+  //
+  {
+    auto value = advance();
+
+    // Make sure the outer object or array is closed before continuing; otherwise, there are ways we
+    // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906
+    if (!STREAMING) {
+      switch (*value) {
+        case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break;
+        case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break;
+      }
+    }
+
+    switch (*value) {
+      case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin;
+      case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin;
+      default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break;
+    }
+  }
+  goto document_end;
+
+//
+// Object parser states
+//
+object_begin:
+  log_start_value("object");
+  depth++;
+  if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; }
+  dom_parser.is_array[depth] = false;
+  SIMDJSON_TRY( visitor.visit_object_start(*this) );
+
+  {
+    auto key = advance();
+    if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; }
+    SIMDJSON_TRY( visitor.increment_count(*this) );
+    SIMDJSON_TRY( visitor.visit_key(*this, key) );
+  }
+
+object_field:
+  if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; }
+  {
+    auto value = advance();
+    switch (*value) {
+      case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin;
+      case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin;
+      default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break;
+    }
+  }
+
+object_continue:
+  switch (*advance()) {
+    case ',':
+      SIMDJSON_TRY( visitor.increment_count(*this) );
+      {
+        auto key = advance();
+        if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; }
+        SIMDJSON_TRY( visitor.visit_key(*this, key) );
+      }
+      goto object_field;
+    case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end;
+    default: log_error("No comma between object fields"); return TAPE_ERROR;
+  }
+
+scope_end:
+  depth--;
+  if (depth == 0) { goto document_end; }
+  if (dom_parser.is_array[depth]) { goto array_continue; }
+  goto object_continue;
+
+//
+// Array parser states
+//
+array_begin:
+  log_start_value("array");
+  depth++;
+  if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; }
+  dom_parser.is_array[depth] = true;
+  SIMDJSON_TRY( visitor.visit_array_start(*this) );
+  SIMDJSON_TRY( visitor.increment_count(*this) );
+
+array_value:
+  {
+    auto value = advance();
+    switch (*value) {
+      case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin;
+      case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin;
+      default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break;
+    }
+  }
+
+array_continue:
+  switch (*advance()) {
+    case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value;
+    case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end;
+    default: log_error("Missing comma between array values"); return TAPE_ERROR;
+  }
+
+document_end:
+  log_end_value("document");
+  SIMDJSON_TRY( visitor.visit_document_end(*this) );
+
+  dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]);
+
+  // If we didn't make it to the end, it's an error
+  if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) {
+    log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!");
+    return TAPE_ERROR;
+  }
+
+  return SUCCESS;
+
+} // walk_document()
+
+simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index)
+  : buf{_dom_parser.buf},
+    next_structural{&_dom_parser.structural_indexes[start_structural_index]},
+    dom_parser{_dom_parser} {
+}
+
+simdjson_inline const uint8_t *json_iterator::peek() const noexcept {
+  return &buf[*(next_structural)];
+}
+simdjson_inline const uint8_t *json_iterator::advance() noexcept {
+  return &buf[*(next_structural++)];
+}
+simdjson_inline size_t json_iterator::remaining_len() const noexcept {
+  return dom_parser.len - *(next_structural-1);
+}
+
+simdjson_inline bool json_iterator::at_eof() const noexcept {
+  return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes];
+}
+simdjson_inline bool json_iterator::at_beginning() const noexcept {
+  return next_structural == dom_parser.structural_indexes.get();
+}
+simdjson_inline uint8_t json_iterator::last_structural() const noexcept {
+  return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]];
+}
+
+simdjson_inline void json_iterator::log_value(const char *type) const noexcept {
+  logger::log_line(*this, "", type, "");
+}
+
+simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept {
+  logger::log_line(*this, "+", type, "");
+  if (logger::LOG_ENABLED) { logger::log_depth++; }
+}
+
+simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept {
+  if (logger::LOG_ENABLED) { logger::log_depth--; }
+  logger::log_line(*this, "-", type, "");
+}
+
+simdjson_inline void json_iterator::log_error(const char *error) const noexcept {
+  logger::log_line(*this, "", "ERROR", error);
+}
+
+template<typename V>
+simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept {
+  switch (*value) {
+    case '"': return visitor.visit_root_string(*this, value);
+    case 't': return visitor.visit_root_true_atom(*this, value);
+    case 'f': return visitor.visit_root_false_atom(*this, value);
+    case 'n': return visitor.visit_root_null_atom(*this, value);
+    case '-':
+    case '0': case '1': case '2': case '3': case '4':
+    case '5': case '6': case '7': case '8': case '9':
+      return visitor.visit_root_number(*this, value);
+    default:
+      log_error("Document starts with a non-value character");
+      return TAPE_ERROR;
+  }
+}
+template<typename V>
+simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept {
+  switch (*value) {
+    case '"': return visitor.visit_string(*this, value);
+    case 't': return visitor.visit_true_atom(*this, value);
+    case 'f': return visitor.visit_false_atom(*this, value);
+    case 'n': return visitor.visit_null_atom(*this, value);
+    case '-':
+    case '0': case '1': case '2': case '3': case '4':
+    case '5': case '6': case '7': case '8': case '9':
+      return visitor.visit_number(*this, value);
+    default:
+      log_error("Non-value found when value was expected!");
+      return TAPE_ERROR;
+  }
+}
+
+} // namespace stage2
+} // unnamed namespace
+} // namespace ppc64
+} // namespace simdjson
+/* end file src/generic/stage2/json_iterator.h */
+/* begin file src/generic/stage2/tape_writer.h */
+namespace simdjson {
+namespace ppc64 {
+namespace {
+namespace stage2 {
+
+struct tape_writer {
+  /** The next place to write to tape */
+  uint64_t *next_tape_loc;
+
+  /** Write a signed 64-bit value to tape. */
+  simdjson_inline void append_s64(int64_t value) noexcept;
+
+  /** Write an unsigned 64-bit value to tape. */
+  simdjson_inline void append_u64(uint64_t value) noexcept;
+
+  /** Write a double value to tape. */
+  simdjson_inline void append_double(double value) noexcept;
+
+  /**
+   * Append a tape entry (an 8-bit type,and 56 bits worth of value).
+   */
+  simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept;
+
+  /**
+   * Skip the current tape entry without writing.
+   *
+   * Used to skip the start of the container, since we'll come back later to fill it in when the
+   * container ends.
+   */
+  simdjson_inline void skip() noexcept;
+
+  /**
+   * Skip the number of tape entries necessary to write a large u64 or i64.
+   */
+  simdjson_inline void skip_large_integer() noexcept;
+
+  /**
+   * Skip the number of tape entries necessary to write a double.
+   */
+  simdjson_inline void skip_double() noexcept;
+
+  /**
+   * Write a value to a known location on tape.
+   *
+   * Used to go back and write out the start of a container after the container ends.
+   */
+  simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept;
+
+private:
+  /**
+   * Append both the tape entry, and a supplementary value following it. Used for types that need
+   * all 64 bits, such as double and uint64_t.
+   */
+  template<typename T>
+  simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept;
+}; // struct number_writer
+
+simdjson_inline void tape_writer::append_s64(int64_t value) noexcept {
+  append2(0, value, internal::tape_type::INT64);
+}
+
+simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept {
+  append(0, internal::tape_type::UINT64);
+  *next_tape_loc = value;
+  next_tape_loc++;
+}
+
+/** Write a double value to tape. */
+simdjson_inline void tape_writer::append_double(double value) noexcept {
+  append2(0, value, internal::tape_type::DOUBLE);
+}
+
+simdjson_inline void tape_writer::skip() noexcept {
+  next_tape_loc++;
+}
+
+simdjson_inline void tape_writer::skip_large_integer() noexcept {
+  next_tape_loc += 2;
+}
+
+simdjson_inline void tape_writer::skip_double() noexcept {
+  next_tape_loc += 2;
+}
+
+simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept {
+  *next_tape_loc = val | ((uint64_t(char(t))) << 56);
+  next_tape_loc++;
+}
+
+template<typename T>
+simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept {
+  append(val, t);
+  static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!");
+  memcpy(next_tape_loc, &val2, sizeof(val2));
+  next_tape_loc++;
+}
+
+simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept {
+  tape_loc = val | ((uint64_t(char(t))) << 56);
+}
+
+} // namespace stage2
+} // unnamed namespace
+} // namespace ppc64
+} // namespace simdjson
+/* end file src/generic/stage2/tape_writer.h */
+
+namespace simdjson {
+namespace ppc64 {
+namespace {
+namespace stage2 {
+
+struct tape_builder {
+  template<bool STREAMING>
+  simdjson_warn_unused static simdjson_inline error_code parse_document(
+    dom_parser_implementation &dom_parser,
+    dom::document &doc) noexcept;
+
+  /** Called when a non-empty document starts. */
+  simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept;
+  /** Called when a non-empty document ends without error. */
+  simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept;
+
+  /** Called when a non-empty array starts. */
+  simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept;
+  /** Called when a non-empty array ends. */
+  simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept;
+  /** Called when an empty array is found. */
+  simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept;
+
+  /** Called when a non-empty object starts. */
+  simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept;
+  /**
+   * Called when a key in a field is encountered.
+   *
+   * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array
+   * will be called after this with the field value.
+   */
+  simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept;
+  /** Called when a non-empty object ends. */
+  simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept;
+  /** Called when an empty object is found. */
+  simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept;
+
+  /**
+   * Called when a string, number, boolean or null is found.
+   */
+  simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept;
+  /**
+   * Called when a string, number, boolean or null is found at the top level of a document (i.e.
+   * when there is no array or object and the entire document is a single string, number, boolean or
+   * null.
+   *
+   * This is separate from primitive() because simdjson's normal primitive parsing routines assume
+   * there is at least one more token after the value, which is only true in an array or object.
+   */
+  simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept;
+
+  simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept;
+
+  simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept;
+
+  /** Called each time a new field or element in an array or object is found. */
+  simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept;
+
+  /** Next location to write to tape */
+  tape_writer tape;
+private:
+  /** Next write location in the string buf for stage 2 parsing */
+  uint8_t *current_string_buf_loc;
+
+  simdjson_inline tape_builder(dom::document &doc) noexcept;
+
+  simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept;
+  simdjson_inline void start_container(json_iterator &iter) noexcept;
+  simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept;
+  simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept;
+  simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept;
+  simdjson_inline void on_end_string(uint8_t *dst) noexcept;
+}; // class tape_builder
+
+template<bool STREAMING>
+simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document(
+    dom_parser_implementation &dom_parser,
+    dom::document &doc) noexcept {
+  dom_parser.doc = &doc;
+  json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0);
+  tape_builder builder(doc);
+  return iter.walk_document<STREAMING>(builder);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept {
+  return iter.visit_root_primitive(*this, value);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept {
+  return iter.visit_primitive(*this, value);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept {
+  return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept {
+  return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept {
+  start_container(iter);
+  return SUCCESS;
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept {
+  start_container(iter);
+  return SUCCESS;
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept {
+  start_container(iter);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept {
+  return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept {
+  return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept {
+  constexpr uint32_t start_tape_index = 0;
+  tape.append(start_tape_index, internal::tape_type::ROOT);
+  tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT);
+  return SUCCESS;
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept {
+  return visit_string(iter, key, true);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept {
+  iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1
+  return SUCCESS;
+}
+
+simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept {
+  iter.log_value(key ? "key" : "string");
+  uint8_t *dst = on_start_string(iter);
+  dst = stringparsing::parse_string(value+1, dst);
+  if (dst == nullptr) {
+    iter.log_error("Invalid escape in string");
+    return STRING_ERROR;
+  }
+  on_end_string(dst);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept {
+  return visit_string(iter, value);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("number");
+  return numberparsing::parse_number(value, tape);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept {
+  //
+  // We need to make a copy to make sure that the string is space terminated.
+  // This is not about padding the input, which should already padded up
+  // to len + SIMDJSON_PADDING. However, we have no control at this stage
+  // on how the padding was done. What if the input string was padded with nulls?
+  // It is quite common for an input string to have an extra null character (C string).
+  // We do not want to allow 9\0 (where \0 is the null character) inside a JSON
+  // document, but the string "9\0" by itself is fine. So we make a copy and
+  // pad the input with spaces when we know that there is just one input element.
+  // This copy is relatively expensive, but it will almost never be called in
+  // practice unless you are in the strange scenario where you have many JSON
+  // documents made of single atoms.
+  //
+  std::unique_ptr<uint8_t[]>copy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]);
+  if (copy.get() == nullptr) { return MEMALLOC; }
+  std::memcpy(copy.get(), value, iter.remaining_len());
+  std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING);
+  error_code error = visit_number(iter, copy.get());
+  return error;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("true");
+  if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::TRUE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("true");
+  if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::TRUE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("false");
+  if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::FALSE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("false");
+  if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::FALSE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("null");
+  if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::NULL_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("null");
+  if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::NULL_VALUE);
+  return SUCCESS;
+}
+
+// private:
+
+simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept {
+  return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get());
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept {
+  auto start_index = next_tape_index(iter);
+  tape.append(start_index+2, start);
+  tape.append(start_index, end);
+  return SUCCESS;
+}
+
+simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept {
+  iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter);
+  iter.dom_parser.open_containers[iter.depth].count = 0;
+  tape.skip(); // We don't actually *write* the start element until the end.
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept {
+  // Write the ending tape element, pointing at the start location
+  const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index;
+  tape.append(start_tape_index, end);
+  // Write the start tape element, pointing at the end location (and including count)
+  // count can overflow if it exceeds 24 bits... so we saturate
+  // the convention being that a cnt of 0xffffff or more is undetermined in value (>=  0xffffff).
+  const uint32_t count = iter.dom_parser.open_containers[iter.depth].count;
+  const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count;
+  tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start);
+  return SUCCESS;
+}
+
+simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept {
+  // we advance the point, accounting for the fact that we have a NULL termination
+  tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING);
+  return current_string_buf_loc + sizeof(uint32_t);
+}
+
+simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept {
+  uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t)));
+  // TODO check for overflow in case someone has a crazy string (>=4GB?)
+  // But only add the overflow check when the document itself exceeds 4GB
+  // Currently unneeded because we refuse to parse docs larger or equal to 4GB.
+  memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t));
+  // NULL termination is still handy if you expect all your strings to
+  // be NULL terminated? It comes at a small cost
+  *dst = 0;
+  current_string_buf_loc = dst + 1;
+}
+
+} // namespace stage2
+} // unnamed namespace
+} // namespace ppc64
+} // namespace simdjson
+/* end file src/generic/stage2/tape_builder.h */
+
+//
+// Implementation-specific overrides
+//
+namespace simdjson {
+namespace ppc64 {
+namespace {
+namespace stage1 {
+
+simdjson_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) {
+  // On PPC, we don't short-circuit this if there are no backslashes, because the branch gives us no
+  // benefit and therefore makes things worse.
+  // if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; }
+  return find_escaped_branchless(backslash);
+}
+
+} // namespace stage1
+} // unnamed namespace
+
+simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept {
+  return ppc64::stage1::json_minifier::minify<64>(buf, len, dst, dst_len);
+}
+
+simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept {
+  this->buf = _buf;
+  this->len = _len;
+  return ppc64::stage1::json_structural_indexer::index<64>(buf, len, *this, streaming);
+}
+
+simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
+  return ppc64::stage1::generic_validate_utf8(buf,len);
+}
+
+simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept {
+  return stage2::tape_builder::parse_document<false>(*this, _doc);
+}
+
+simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept {
+  return stage2::tape_builder::parse_document<true>(*this, _doc);
+}
+
+simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst) const noexcept {
+  return ppc64::stringparsing::parse_string(src, dst);
+}
+
+simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept {
+  auto error = stage1(_buf, _len, stage1_mode::regular);
+  if (error) { return error; }
+  return stage2(_doc);
+}
+
+} // namespace ppc64
+} // namespace simdjson
+
+/* begin file include/simdjson/ppc64/end.h */
+/* end file include/simdjson/ppc64/end.h */
+/* end file src/ppc64/dom_parser_implementation.cpp */
+#endif
+#if SIMDJSON_IMPLEMENTATION_WESTMERE
+/* begin file src/westmere/implementation.cpp */
+/* begin file include/simdjson/westmere/begin.h */
+// redefining SIMDJSON_IMPLEMENTATION to "westmere"
+// #define SIMDJSON_IMPLEMENTATION westmere
+SIMDJSON_TARGET_WESTMERE
+/* end file include/simdjson/westmere/begin.h */
+
+namespace simdjson {
+namespace westmere {
+
+simdjson_warn_unused error_code implementation::create_dom_parser_implementation(
+  size_t capacity,
+  size_t max_depth,
+  std::unique_ptr<internal::dom_parser_implementation>& dst
+) const noexcept {
+  dst.reset( new (std::nothrow) dom_parser_implementation() );
+  if (!dst) { return MEMALLOC; }
+  if (auto err = dst->set_capacity(capacity))
+    return err;
+  if (auto err = dst->set_max_depth(max_depth))
+    return err;
+  return SUCCESS;
+}
+
+} // namespace westmere
+} // namespace simdjson
+
+/* begin file include/simdjson/westmere/end.h */
+SIMDJSON_UNTARGET_WESTMERE
+/* end file include/simdjson/westmere/end.h */
+/* end file src/westmere/implementation.cpp */
+/* begin file src/westmere/dom_parser_implementation.cpp */
+/* begin file include/simdjson/westmere/begin.h */
+// redefining SIMDJSON_IMPLEMENTATION to "westmere"
+// #define SIMDJSON_IMPLEMENTATION westmere
+SIMDJSON_TARGET_WESTMERE
+/* end file include/simdjson/westmere/begin.h */
+
+//
+// Stage 1
+//
+
+namespace simdjson {
+namespace westmere {
+namespace {
+
+using namespace simd;
+
+struct json_character_block {
+  static simdjson_inline json_character_block classify(const simd::simd8x64<uint8_t>& in);
+
+  simdjson_inline uint64_t whitespace() const noexcept { return _whitespace; }
+  simdjson_inline uint64_t op() const noexcept { return _op; }
+  simdjson_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); }
+
+  uint64_t _whitespace;
+  uint64_t _op;
+};
+
+simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64<uint8_t>& in) {
+  // These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why
+  // we can't use the generic lookup_16.
+  auto whitespace_table = simd8<uint8_t>::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100);
+
+  // The 6 operators (:,[]{}) have these values:
+  //
+  // , 2C
+  // : 3A
+  // [ 5B
+  // { 7B
+  // ] 5D
+  // } 7D
+  //
+  // If you use | 0x20 to turn [ and ] into { and }, the lower 4 bits of each character is unique.
+  // We exploit this, using a simd 4-bit lookup to tell us which character match against, and then
+  // match it (against | 0x20).
+  //
+  // To prevent recognizing other characters, everything else gets compared with 0, which cannot
+  // match due to the | 0x20.
+  //
+  // NOTE: Due to the | 0x20, this ALSO treats <FF> and <SUB> (control characters 0C and 1A) like ,
+  // and :. This gets caught in stage 2, which checks the actual character to ensure the right
+  // operators are in the right places.
+  const auto op_table = simd8<uint8_t>::repeat_16(
+    0, 0, 0, 0,
+    0, 0, 0, 0,
+    0, 0, ':', '{', // : = 3A, [ = 5B, { = 7B
+    ',', '}', 0, 0  // , = 2C, ] = 5D, } = 7D
+  );
+
+  // We compute whitespace and op separately. If the code later only use one or the
+  // other, given the fact that all functions are aggressively inlined, we can
+  // hope that useless computations will be omitted. This is namely case when
+  // minifying (we only need whitespace).
+
+
+  const uint64_t whitespace = in.eq({
+    _mm_shuffle_epi8(whitespace_table, in.chunks[0]),
+    _mm_shuffle_epi8(whitespace_table, in.chunks[1]),
+    _mm_shuffle_epi8(whitespace_table, in.chunks[2]),
+    _mm_shuffle_epi8(whitespace_table, in.chunks[3])
+  });
+  // Turn [ and ] into { and }
+  const simd8x64<uint8_t> curlified{
+    in.chunks[0] | 0x20,
+    in.chunks[1] | 0x20,
+    in.chunks[2] | 0x20,
+    in.chunks[3] | 0x20
+  };
+  const uint64_t op = curlified.eq({
+    _mm_shuffle_epi8(op_table, in.chunks[0]),
+    _mm_shuffle_epi8(op_table, in.chunks[1]),
+    _mm_shuffle_epi8(op_table, in.chunks[2]),
+    _mm_shuffle_epi8(op_table, in.chunks[3])
+  });
+    return { whitespace, op };
+}
+
+simdjson_inline bool is_ascii(const simd8x64<uint8_t>& input) {
+  return input.reduce_or().is_ascii();
+}
+
+simdjson_unused simdjson_inline simd8<bool> must_be_continuation(const simd8<uint8_t> prev1, const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
+  simd8<uint8_t> is_second_byte = prev1.saturating_sub(0xc0u-1); // Only 11______ will be > 0
+  simd8<uint8_t> is_third_byte  = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0
+  simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0
+  // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine.
+  return simd8<int8_t>(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0);
+}
+
+simdjson_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
+  simd8<uint8_t> is_third_byte  = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0
+  simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0
+  // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine.
+  return simd8<int8_t>(is_third_byte | is_fourth_byte) > int8_t(0);
+}
+
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdjson
+
+/* begin file src/generic/stage1/utf8_lookup4_algorithm.h */
+namespace simdjson {
+namespace westmere {
+namespace {
+namespace utf8_validation {
+
+using namespace simd;
+
+  simdjson_inline simd8<uint8_t> check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
+// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII)
+// Bit 1 = Too Long (ASCII followed by continuation)
+// Bit 2 = Overlong 3-byte
+// Bit 4 = Surrogate
+// Bit 5 = Overlong 2-byte
+// Bit 7 = Two Continuations
+    constexpr const uint8_t TOO_SHORT   = 1<<0; // 11______ 0_______
+                                                // 11______ 11______
+    constexpr const uint8_t TOO_LONG    = 1<<1; // 0_______ 10______
+    constexpr const uint8_t OVERLONG_3  = 1<<2; // 11100000 100_____
+    constexpr const uint8_t SURROGATE   = 1<<4; // 11101101 101_____
+    constexpr const uint8_t OVERLONG_2  = 1<<5; // 1100000_ 10______
+    constexpr const uint8_t TWO_CONTS   = 1<<7; // 10______ 10______
+    constexpr const uint8_t TOO_LARGE   = 1<<3; // 11110100 1001____
+                                                // 11110100 101_____
+                                                // 11110101 1001____
+                                                // 11110101 101_____
+                                                // 1111011_ 1001____
+                                                // 1111011_ 101_____
+                                                // 11111___ 1001____
+                                                // 11111___ 101_____
+    constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
+                                                // 11110101 1000____
+                                                // 1111011_ 1000____
+                                                // 11111___ 1000____
+    constexpr const uint8_t OVERLONG_4  = 1<<6; // 11110000 1000____
+
+    const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
+      // 0_______ ________ <ASCII in byte 1>
+      TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
+      TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
+      // 10______ ________ <continuation in byte 1>
+      TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS,
+      // 1100____ ________ <two byte lead in byte 1>
+      TOO_SHORT | OVERLONG_2,
+      // 1101____ ________ <two byte lead in byte 1>
+      TOO_SHORT,
+      // 1110____ ________ <three byte lead in byte 1>
+      TOO_SHORT | OVERLONG_3 | SURROGATE,
+      // 1111____ ________ <four+ byte lead in byte 1>
+      TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4
+    );
+    constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
+    const simd8<uint8_t> byte_1_low = (prev1 & 0x0F).lookup_16<uint8_t>(
+      // ____0000 ________
+      CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4,
+      // ____0001 ________
+      CARRY | OVERLONG_2,
+      // ____001_ ________
+      CARRY,
+      CARRY,
+
+      // ____0100 ________
+      CARRY | TOO_LARGE,
+      // ____0101 ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      // ____011_ ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+
+      // ____1___ ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      // ____1101 ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000
+    );
+    const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
+      // ________ 0_______ <ASCII in byte 2>
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
+
+      // ________ 1000____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4,
+      // ________ 1001____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE,
+      // ________ 101_____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE  | TOO_LARGE,
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE  | TOO_LARGE,
+
+      // ________ 11______
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT
+    );
+    return (byte_1_high & byte_1_low & byte_2_high);
+  }
+  simdjson_inline simd8<uint8_t> check_multibyte_lengths(const simd8<uint8_t> input,
+      const simd8<uint8_t> prev_input, const simd8<uint8_t> sc) {
+    simd8<uint8_t> prev2 = input.prev<2>(prev_input);
+    simd8<uint8_t> prev3 = input.prev<3>(prev_input);
+    simd8<uint8_t> must23 = simd8<uint8_t>(must_be_2_3_continuation(prev2, prev3));
+    simd8<uint8_t> must23_80 = must23 & uint8_t(0x80);
+    return must23_80 ^ sc;
+  }
+
+  //
+  // Return nonzero if there are incomplete multibyte characters at the end of the block:
+  // e.g. if there is a 4-byte character, but it's 3 bytes from the end.
+  //
+  simdjson_inline simd8<uint8_t> is_incomplete(const simd8<uint8_t> input) {
+    // If the previous input's last 3 bytes match this, they're too short (they ended at EOF):
+    // ... 1111____ 111_____ 11______
+#if SIMDJSON_IMPLEMENTATION_ICELAKE
+    static const uint8_t max_array[64] = {
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1
+    };
+#else
+    static const uint8_t max_array[32] = {
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1
+    };
+#endif
+    const simd8<uint8_t> max_value(&max_array[sizeof(max_array)-sizeof(simd8<uint8_t>)]);
+    return input.gt_bits(max_value);
+  }
+
+  struct utf8_checker {
+    // If this is nonzero, there has been a UTF-8 error.
+    simd8<uint8_t> error;
+    // The last input we received
+    simd8<uint8_t> prev_input_block;
+    // Whether the last input we received was incomplete (used for ASCII fast path)
+    simd8<uint8_t> prev_incomplete;
+
+    //
+    // Check whether the current bytes are valid UTF-8.
+    //
+    simdjson_inline void check_utf8_bytes(const simd8<uint8_t> input, const simd8<uint8_t> prev_input) {
+      // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes
+      // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers)
+      simd8<uint8_t> prev1 = input.prev<1>(prev_input);
+      simd8<uint8_t> sc = check_special_cases(input, prev1);
+      this->error |= check_multibyte_lengths(input, prev_input, sc);
+    }
+
+    // The only problem that can happen at EOF is that a multibyte character is too short
+    // or a byte value too large in the last bytes: check_special_cases only checks for bytes
+    // too large in the first of two bytes.
+    simdjson_inline void check_eof() {
+      // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't
+      // possibly finish them.
+      this->error |= this->prev_incomplete;
+    }
+
+#ifndef SIMDJSON_IF_CONSTEXPR
+#if SIMDJSON_CPLUSPLUS17
+#define SIMDJSON_IF_CONSTEXPR if constexpr
+#else
+#define SIMDJSON_IF_CONSTEXPR if
+#endif
+#endif
+
+    simdjson_inline void check_next_input(const simd8x64<uint8_t>& input) {
+      if(simdjson_likely(is_ascii(input))) {
+        this->error |= this->prev_incomplete;
+      } else {
+        // you might think that a for-loop would work, but under Visual Studio, it is not good enough.
+        static_assert((simd8x64<uint8_t>::NUM_CHUNKS == 1)
+                ||(simd8x64<uint8_t>::NUM_CHUNKS == 2)
+                || (simd8x64<uint8_t>::NUM_CHUNKS == 4),
+                "We support one, two or four chunks per 64-byte block.");
+        SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 1) {
+          this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
+        } else SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 2) {
+          this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
+          this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+        } else SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 4) {
+          this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
+          this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+          this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
+          this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
+        }
+        this->prev_incomplete = is_incomplete(input.chunks[simd8x64<uint8_t>::NUM_CHUNKS-1]);
+        this->prev_input_block = input.chunks[simd8x64<uint8_t>::NUM_CHUNKS-1];
+      }
+    }
+    // do not forget to call check_eof!
+    simdjson_inline error_code errors() {
+      return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS;
+    }
+
+  }; // struct utf8_checker
+} // namespace utf8_validation
+
+using utf8_validation::utf8_checker;
+
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdjson
+/* end file src/generic/stage1/utf8_lookup4_algorithm.h */
+/* begin file src/generic/stage1/json_structural_indexer.h */
+// This file contains the common code every implementation uses in stage1
+// It is intended to be included multiple times and compiled multiple times
+// We assume the file in which it is included already includes
+// "simdjson/stage1.h" (this simplifies amalgation)
+
+/* begin file src/generic/stage1/buf_block_reader.h */
+namespace simdjson {
+namespace westmere {
+namespace {
+
+// Walks through a buffer in block-sized increments, loading the last part with spaces
+template<size_t STEP_SIZE>
+struct buf_block_reader {
+public:
+  simdjson_inline buf_block_reader(const uint8_t *_buf, size_t _len);
+  simdjson_inline size_t block_index();
+  simdjson_inline bool has_full_block() const;
+  simdjson_inline const uint8_t *full_block() const;
+  /**
+   * Get the last block, padded with spaces.
+   *
+   * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this
+   * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there
+   * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding.
+   *
+   * @return the number of effective characters in the last block.
+   */
+  simdjson_inline size_t get_remainder(uint8_t *dst) const;
+  simdjson_inline void advance();
+private:
+  const uint8_t *buf;
+  const size_t len;
+  const size_t lenminusstep;
+  size_t idx;
+};
+
+// Routines to print masks and text for debugging bitmask operations
+simdjson_unused static char * format_input_text_64(const uint8_t *text) {
+  static char buf[sizeof(simd8x64<uint8_t>) + 1];
+  for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
+    buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]);
+  }
+  buf[sizeof(simd8x64<uint8_t>)] = '\0';
+  return buf;
+}
+
+// Routines to print masks and text for debugging bitmask operations
+simdjson_unused static char * format_input_text(const simd8x64<uint8_t>& in) {
+  static char buf[sizeof(simd8x64<uint8_t>) + 1];
+  in.store(reinterpret_cast<uint8_t*>(buf));
+  for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
+    if (buf[i] < ' ') { buf[i] = '_'; }
+  }
+  buf[sizeof(simd8x64<uint8_t>)] = '\0';
+  return buf;
+}
+
+simdjson_unused static char * format_mask(uint64_t mask) {
+  static char buf[sizeof(simd8x64<uint8_t>) + 1];
+  for (size_t i=0; i<64; i++) {
+    buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' ';
+  }
+  buf[64] = '\0';
+  return buf;
+}
+
+template<size_t STEP_SIZE>
+simdjson_inline buf_block_reader<STEP_SIZE>::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {}
+
+template<size_t STEP_SIZE>
+simdjson_inline size_t buf_block_reader<STEP_SIZE>::block_index() { return idx; }
+
+template<size_t STEP_SIZE>
+simdjson_inline bool buf_block_reader<STEP_SIZE>::has_full_block() const {
+  return idx < lenminusstep;
+}
+
+template<size_t STEP_SIZE>
+simdjson_inline const uint8_t *buf_block_reader<STEP_SIZE>::full_block() const {
+  return &buf[idx];
+}
+
+template<size_t STEP_SIZE>
+simdjson_inline size_t buf_block_reader<STEP_SIZE>::get_remainder(uint8_t *dst) const {
+  if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers
+  std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once.
+  std::memcpy(dst, buf + idx, len - idx);
+  return len - idx;
+}
+
+template<size_t STEP_SIZE>
+simdjson_inline void buf_block_reader<STEP_SIZE>::advance() {
+  idx += STEP_SIZE;
+}
+
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdjson
+/* end file src/generic/stage1/buf_block_reader.h */
+/* begin file src/generic/stage1/json_string_scanner.h */
+namespace simdjson {
+namespace westmere {
+namespace {
+namespace stage1 {
+
+struct json_string_block {
+  // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017
+  simdjson_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) :
+  _backslash(backslash), _escaped(escaped), _quote(quote), _in_string(in_string) {}
+
+  // Escaped characters (characters following an escape() character)
+  simdjson_inline uint64_t escaped() const { return _escaped; }
+  // Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \)
+  simdjson_inline uint64_t escape() const { return _backslash & ~_escaped; }
+  // Real (non-backslashed) quotes
+  simdjson_inline uint64_t quote() const { return _quote; }
+  // Start quotes of strings
+  simdjson_inline uint64_t string_start() const { return _quote & _in_string; }
+  // End quotes of strings
+  simdjson_inline uint64_t string_end() const { return _quote & ~_in_string; }
+  // Only characters inside the string (not including the quotes)
+  simdjson_inline uint64_t string_content() const { return _in_string & ~_quote; }
+  // Return a mask of whether the given characters are inside a string (only works on non-quotes)
+  simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; }
+  // Return a mask of whether the given characters are inside a string (only works on non-quotes)
+  simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; }
+  // Tail of string (everything except the start quote)
+  simdjson_inline uint64_t string_tail() const { return _in_string ^ _quote; }
+
+  // backslash characters
+  uint64_t _backslash;
+  // escaped characters (backslashed--does not include the hex characters after \u)
+  uint64_t _escaped;
+  // real quotes (non-backslashed ones)
+  uint64_t _quote;
+  // string characters (includes start quote but not end quote)
+  uint64_t _in_string;
+};
+
+// Scans blocks for string characters, storing the state necessary to do so
+class json_string_scanner {
+public:
+  simdjson_inline json_string_block next(const simd::simd8x64<uint8_t>& in);
+  // Returns either UNCLOSED_STRING or SUCCESS
+  simdjson_inline error_code finish();
+
+private:
+  // Intended to be defined by the implementation
+  simdjson_inline uint64_t find_escaped(uint64_t escape);
+  simdjson_inline uint64_t find_escaped_branchless(uint64_t escape);
+
+  // Whether the last iteration was still inside a string (all 1's = true, all 0's = false).
+  uint64_t prev_in_string = 0ULL;
+  // Whether the first character of the next iteration is escaped.
+  uint64_t prev_escaped = 0ULL;
+};
+
+//
+// Finds escaped characters (characters following \).
+//
+// Handles runs of backslashes like \\\" and \\\\" correctly (yielding 0101 and 01010, respectively).
+//
+// Does this by:
+// - Shift the escape mask to get potentially escaped characters (characters after backslashes).
+// - Mask escaped sequences that start on *even* bits with 1010101010 (odd bits are escaped, even bits are not)
+// - Mask escaped sequences that start on *odd* bits with 0101010101 (even bits are escaped, odd bits are not)
+//
+// To distinguish between escaped sequences starting on even/odd bits, it finds the start of all
+// escape sequences, filters out the ones that start on even bits, and adds that to the mask of
+// escape sequences. This causes the addition to clear out the sequences starting on odd bits (since
+// the start bit causes a carry), and leaves even-bit sequences alone.
+//
+// Example:
+//
+// text           |  \\\ | \\\"\\\" \\\" \\"\\" |
+// escape         |  xxx |  xx xxx  xxx  xx xx  | Removed overflow backslash; will | it into follows_escape
+// odd_starts     |  x   |  x       x       x   | escape & ~even_bits & ~follows_escape
+// even_seq       |     c|    cxxx     c xx   c | c = carry bit -- will be masked out later
+// invert_mask    |      |     cxxx     c xx   c| even_seq << 1
+// follows_escape |   xx | x xx xxx  xxx  xx xx | Includes overflow bit
+// escaped        |   x  | x x  x x  x x  x  x  |
+// desired        |   x  | x x  x x  x x  x  x  |
+// text           |  \\\ | \\\"\\\" \\\" \\"\\" |
+//
+simdjson_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) {
+  // If there was overflow, pretend the first character isn't a backslash
+  backslash &= ~prev_escaped;
+  uint64_t follows_escape = backslash << 1 | prev_escaped;
+
+  // Get sequences starting on even bits by clearing out the odd series using +
+  const uint64_t even_bits = 0x5555555555555555ULL;
+  uint64_t odd_sequence_starts = backslash & ~even_bits & ~follows_escape;
+  uint64_t sequences_starting_on_even_bits;
+  prev_escaped = add_overflow(odd_sequence_starts, backslash, &sequences_starting_on_even_bits);
+  uint64_t invert_mask = sequences_starting_on_even_bits << 1; // The mask we want to return is the *escaped* bits, not escapes.
+
+  // Mask every other backslashed character as an escaped character
+  // Flip the mask for sequences that start on even bits, to correct them
+  return (even_bits ^ invert_mask) & follows_escape;
+}
+
+//
+// Return a mask of all string characters plus end quotes.
+//
+// prev_escaped is overflow saying whether the next character is escaped.
+// prev_in_string is overflow saying whether we're still in a string.
+//
+// Backslash sequences outside of quotes will be detected in stage 2.
+//
+simdjson_inline json_string_block json_string_scanner::next(const simd::simd8x64<uint8_t>& in) {
+  const uint64_t backslash = in.eq('\\');
+  const uint64_t escaped = find_escaped(backslash);
+  const uint64_t quote = in.eq('"') & ~escaped;
+
+  //
+  // prefix_xor flips on bits inside the string (and flips off the end quote).
+  //
+  // Then we xor with prev_in_string: if we were in a string already, its effect is flipped
+  // (characters inside strings are outside, and characters outside strings are inside).
+  //
+  const uint64_t in_string = prefix_xor(quote) ^ prev_in_string;
+
+  //
+  // Check if we're still in a string at the end of the box so the next block will know
+  //
+  // right shift of a signed value expected to be well-defined and standard
+  // compliant as of C++20, John Regher from Utah U. says this is fine code
+  //
+  prev_in_string = uint64_t(static_cast<int64_t>(in_string) >> 63);
+
+  // Use ^ to turn the beginning quote off, and the end quote on.
+
+  // We are returning a function-local object so either we get a move constructor
+  // or we get copy elision.
+  return json_string_block(
+    backslash,
+    escaped,
+    quote,
+    in_string
+  );
+}
+
+simdjson_inline error_code json_string_scanner::finish() {
+  if (prev_in_string) {
+    return UNCLOSED_STRING;
+  }
+  return SUCCESS;
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdjson
+/* end file src/generic/stage1/json_string_scanner.h */
+/* begin file src/generic/stage1/json_scanner.h */
+namespace simdjson {
+namespace westmere {
+namespace {
+namespace stage1 {
+
+/**
+ * A block of scanned json, with information on operators and scalars.
+ *
+ * We seek to identify pseudo-structural characters. Anything that is inside
+ * a string must be omitted (hence  & ~_string.string_tail()).
+ * Otherwise, pseudo-structural characters come in two forms.
+ * 1. We have the structural characters ([,],{,},:, comma). The
+ *    term 'structural character' is from the JSON RFC.
+ * 2. We have the 'scalar pseudo-structural characters'.
+ *    Scalars are quotes, and any character except structural characters and white space.
+ *
+ * To identify the scalar pseudo-structural characters, we must look at what comes
+ * before them: it must be a space, a quote or a structural characters.
+ * Starting with simdjson v0.3, we identify them by
+ * negation: we identify everything that is followed by a non-quote scalar,
+ * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'.
+ */
+struct json_block {
+public:
+  // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017
+  simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) :
+  _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {}
+  simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) :
+  _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {}
+
+  /**
+   * The start of structurals.
+   * In simdjson prior to v0.3, these were called the pseudo-structural characters.
+   **/
+  simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); }
+  /** All JSON whitespace (i.e. not in a string) */
+  simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); }
+
+  // Helpers
+
+  /** Whether the given characters are inside a string (only works on non-quotes) */
+  simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); }
+  /** Whether the given characters are outside a string (only works on non-quotes) */
+  simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); }
+
+  // string and escape characters
+  json_string_block _string;
+  // whitespace, structural characters ('operators'), scalars
+  json_character_block _characters;
+  // whether the previous character was a scalar
+  uint64_t _follows_potential_nonquote_scalar;
+private:
+  // Potential structurals (i.e. disregarding strings)
+
+  /**
+   * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc".
+   * They may reside inside a string.
+   **/
+  simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); }
+  /**
+   * The start of non-operator runs, like 123, true and "abc".
+   * It main reside inside a string.
+   **/
+  simdjson_inline uint64_t potential_scalar_start() const noexcept {
+    // The term "scalar" refers to anything except structural characters and white space
+    // (so letters, numbers, quotes).
+    // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space
+    // then we know that it is irrelevant structurally.
+    return _characters.scalar() & ~follows_potential_scalar();
+  }
+  /**
+   * Whether the given character is immediately after a non-operator like 123, true.
+   * The characters following a quote are not included.
+   */
+  simdjson_inline uint64_t follows_potential_scalar() const noexcept {
+    // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character
+    // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a
+    // white space.
+    // It is understood that within quoted region, anything at all could be marked (irrelevant).
+    return _follows_potential_nonquote_scalar;
+  }
+};
+
+/**
+ * Scans JSON for important bits: structural characters or 'operators', strings, and scalars.
+ *
+ * The scanner starts by calculating two distinct things:
+ * - string characters (taking \" into account)
+ * - structural characters or 'operators' ([]{},:, comma)
+ *   and scalars (runs of non-operators like 123, true and "abc")
+ *
+ * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel:
+ * in particular, the operator/scalar bit will find plenty of things that are actually part of
+ * strings. When we're done, json_block will fuse the two together by masking out tokens that are
+ * part of a string.
+ */
+class json_scanner {
+public:
+  json_scanner() = default;
+  simdjson_inline json_block next(const simd::simd8x64<uint8_t>& in);
+  // Returns either UNCLOSED_STRING or SUCCESS
+  simdjson_inline error_code finish();
+
+private:
+  // Whether the last character of the previous iteration is part of a scalar token
+  // (anything except whitespace or a structural character/'operator').
+  uint64_t prev_scalar = 0ULL;
+  json_string_scanner string_scanner{};
+};
+
+
+//
+// Check if the current character immediately follows a matching character.
+//
+// For example, this checks for quotes with backslashes in front of them:
+//
+//     const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash);
+//
+simdjson_inline uint64_t follows(const uint64_t match, uint64_t &overflow) {
+  const uint64_t result = match << 1 | overflow;
+  overflow = match >> 63;
+  return result;
+}
+
+simdjson_inline json_block json_scanner::next(const simd::simd8x64<uint8_t>& in) {
+  json_string_block strings = string_scanner.next(in);
+  // identifies the white-space and the structural characters
+  json_character_block characters = json_character_block::classify(in);
+  // The term "scalar" refers to anything except structural characters and white space
+  // (so letters, numbers, quotes).
+  // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers).
+  //
+  // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon)
+  // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential
+  // pseudo-structural character just like we would if we had  ' "a string" true '; otherwise we
+  // may need to add an extra check when parsing strings.
+  //
+  // Performance: there are many ways to skin this cat.
+  const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote();
+  uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar);
+  // We are returning a function-local object so either we get a move constructor
+  // or we get copy elision.
+  return json_block(
+    strings,// strings is a function-local object so either it moves or the copy is elided.
+    characters,
+    follows_nonquote_scalar
+  );
+}
+
+simdjson_inline error_code json_scanner::finish() {
+  return string_scanner.finish();
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdjson
+/* end file src/generic/stage1/json_scanner.h */
+/* begin file src/generic/stage1/json_minifier.h */
+// This file contains the common code every implementation uses in stage1
+// It is intended to be included multiple times and compiled multiple times
+// We assume the file in which it is included already includes
+// "simdjson/stage1.h" (this simplifies amalgation)
+
+namespace simdjson {
+namespace westmere {
+namespace {
+namespace stage1 {
+
+class json_minifier {
+public:
+  template<size_t STEP_SIZE>
+  static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept;
+
+private:
+  simdjson_inline json_minifier(uint8_t *_dst)
+  : dst{_dst}
+  {}
+  template<size_t STEP_SIZE>
+  simdjson_inline void step(const uint8_t *block_buf, buf_block_reader<STEP_SIZE> &reader) noexcept;
+  simdjson_inline void next(const simd::simd8x64<uint8_t>& in, const json_block& block);
+  simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len);
+  json_scanner scanner{};
+  uint8_t *dst;
+};
+
+simdjson_inline void json_minifier::next(const simd::simd8x64<uint8_t>& in, const json_block& block) {
+  uint64_t mask = block.whitespace();
+  dst += in.compress(mask, dst);
+}
+
+simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) {
+  error_code error = scanner.finish();
+  if (error) { dst_len = 0; return error; }
+  dst_len = dst - dst_start;
+  return SUCCESS;
+}
+
+template<>
+simdjson_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept {
+  simd::simd8x64<uint8_t> in_1(block_buf);
+  simd::simd8x64<uint8_t> in_2(block_buf+64);
+  json_block block_1 = scanner.next(in_1);
+  json_block block_2 = scanner.next(in_2);
+  this->next(in_1, block_1);
+  this->next(in_2, block_2);
+  reader.advance();
+}
+
+template<>
+simdjson_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept {
+  simd::simd8x64<uint8_t> in_1(block_buf);
+  json_block block_1 = scanner.next(in_1);
+  this->next(block_buf, block_1);
+  reader.advance();
+}
+
+template<size_t STEP_SIZE>
+error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept {
+  buf_block_reader<STEP_SIZE> reader(buf, len);
+  json_minifier minifier(dst);
+
+  // Index the first n-1 blocks
+  while (reader.has_full_block()) {
+    minifier.step<STEP_SIZE>(reader.full_block(), reader);
+  }
+
+  // Index the last (remainder) block, padded with spaces
+  uint8_t block[STEP_SIZE];
+  size_t remaining_bytes = reader.get_remainder(block);
+  if (remaining_bytes > 0) {
+    // We do not want to write directly to the output stream. Rather, we write
+    // to a local buffer (for safety).
+    uint8_t out_block[STEP_SIZE];
+    uint8_t * const guarded_dst{minifier.dst};
+    minifier.dst = out_block;
+    minifier.step<STEP_SIZE>(block, reader);
+    size_t to_write = minifier.dst - out_block;
+    // In some cases, we could be enticed to consider the padded spaces
+    // as part of the string. This is fine as long as we do not write more
+    // than we consumed.
+    if(to_write > remaining_bytes) { to_write = remaining_bytes; }
+    memcpy(guarded_dst, out_block, to_write);
+    minifier.dst = guarded_dst + to_write;
+  }
+  return minifier.finish(dst, dst_len);
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdjson
+/* end file src/generic/stage1/json_minifier.h */
+/* begin file src/generic/stage1/find_next_document_index.h */
+namespace simdjson {
+namespace westmere {
+namespace {
+
+/**
+  * This algorithm is used to quickly identify the last structural position that
+  * makes up a complete document.
+  *
+  * It does this by going backwards and finding the last *document boundary* (a
+  * place where one value follows another without a comma between them). If the
+  * last document (the characters after the boundary) has an equal number of
+  * start and end brackets, it is considered complete.
+  *
+  * Simply put, we iterate over the structural characters, starting from
+  * the end. We consider that we found the end of a JSON document when the
+  * first element of the pair is NOT one of these characters: '{' '[' ':' ','
+  * and when the second element is NOT one of these characters: '}' ']' ':' ','.
+  *
+  * This simple comparison works most of the time, but it does not cover cases
+  * where the batch's structural indexes contain a perfect amount of documents.
+  * In such a case, we do not have access to the structural index which follows
+  * the last document, therefore, we do not have access to the second element in
+  * the pair, and that means we cannot identify the last document. To fix this
+  * issue, we keep a count of the open and closed curly/square braces we found
+  * while searching for the pair. When we find a pair AND the count of open and
+  * closed curly/square braces is the same, we know that we just passed a
+  * complete document, therefore the last json buffer location is the end of the
+  * batch.
+  */
+simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) {
+  // Variant: do not count separately, just figure out depth
+  if(parser.n_structural_indexes == 0) { return 0; }
+  auto arr_cnt = 0;
+  auto obj_cnt = 0;
+  for (auto i = parser.n_structural_indexes - 1; i > 0; i--) {
+    auto idxb = parser.structural_indexes[i];
+    switch (parser.buf[idxb]) {
+    case ':':
+    case ',':
+      continue;
+    case '}':
+      obj_cnt--;
+      continue;
+    case ']':
+      arr_cnt--;
+      continue;
+    case '{':
+      obj_cnt++;
+      break;
+    case '[':
+      arr_cnt++;
+      break;
+    }
+    auto idxa = parser.structural_indexes[i - 1];
+    switch (parser.buf[idxa]) {
+    case '{':
+    case '[':
+    case ':':
+    case ',':
+      continue;
+    }
+    // Last document is complete, so the next document will appear after!
+    if (!arr_cnt && !obj_cnt) {
+      return parser.n_structural_indexes;
+    }
+    // Last document is incomplete; mark the document at i + 1 as the next one
+    return i;
+  }
+  // If we made it to the end, we want to finish counting to see if we have a full document.
+  switch (parser.buf[parser.structural_indexes[0]]) {
+    case '}':
+      obj_cnt--;
+      break;
+    case ']':
+      arr_cnt--;
+      break;
+    case '{':
+      obj_cnt++;
+      break;
+    case '[':
+      arr_cnt++;
+      break;
+  }
+  if (!arr_cnt && !obj_cnt) {
+    // We have a complete document.
+    return parser.n_structural_indexes;
+  }
+  return 0;
+}
+
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdjson
+/* end file src/generic/stage1/find_next_document_index.h */
+
+namespace simdjson {
+namespace westmere {
+namespace {
+namespace stage1 {
+
+class bit_indexer {
+public:
+  uint32_t *tail;
+
+  simdjson_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {}
+
+  // flatten out values in 'bits' assuming that they are are to have values of idx
+  // plus their position in the bitvector, and store these indexes at
+  // base_ptr[base] incrementing base as we go
+  // will potentially store extra values beyond end of valid bits, so base_ptr
+  // needs to be large enough to handle this
+  //
+  // If the kernel sets SIMDJSON_CUSTOM_BIT_INDEXER, then it will provide its own
+  // version of the code.
+#ifdef SIMDJSON_CUSTOM_BIT_INDEXER
+  simdjson_inline void write(uint32_t idx, uint64_t bits);
+#else
+  simdjson_inline void write(uint32_t idx, uint64_t bits) {
+    // In some instances, the next branch is expensive because it is mispredicted.
+    // Unfortunately, in other cases,
+    // it helps tremendously.
+    if (bits == 0)
+        return;
+#if defined(SIMDJSON_PREFER_REVERSE_BITS)
+    /**
+     * ARM lacks a fast trailing zero instruction, but it has a fast
+     * bit reversal instruction and a fast leading zero instruction.
+     * Thus it may be profitable to reverse the bits (once) and then
+     * to rely on a sequence of instructions that call the leading
+     * zero instruction.
+     *
+     * Performance notes:
+     * The chosen routine is not optimal in terms of data dependency
+     * since zero_leading_bit might require two instructions. However,
+     * it tends to minimize the total number of instructions which is
+     * beneficial.
+     */
+
+    uint64_t rev_bits = reverse_bits(bits);
+    int cnt = static_cast<int>(count_ones(bits));
+    int i = 0;
+    // Do the first 8 all together
+    for (; i<8; i++) {
+      int lz = leading_zeroes(rev_bits);
+      this->tail[i] = static_cast<uint32_t>(idx) + lz;
+      rev_bits = zero_leading_bit(rev_bits, lz);
+    }
+    // Do the next 8 all together (we hope in most cases it won't happen at all
+    // and the branch is easily predicted).
+    if (simdjson_unlikely(cnt > 8)) {
+      i = 8;
+      for (; i<16; i++) {
+        int lz = leading_zeroes(rev_bits);
+        this->tail[i] = static_cast<uint32_t>(idx) + lz;
+        rev_bits = zero_leading_bit(rev_bits, lz);
+      }
+
+
+      // Most files don't have 16+ structurals per block, so we take several basically guaranteed
+      // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :)
+      // or the start of a value ("abc" true 123) every four characters.
+      if (simdjson_unlikely(cnt > 16)) {
+        i = 16;
+        while (rev_bits != 0) {
+          int lz = leading_zeroes(rev_bits);
+          this->tail[i++] = static_cast<uint32_t>(idx) + lz;
+          rev_bits = zero_leading_bit(rev_bits, lz);
+        }
+      }
+    }
+    this->tail += cnt;
+#else // SIMDJSON_PREFER_REVERSE_BITS
+    /**
+     * Under recent x64 systems, we often have both a fast trailing zero
+     * instruction and a fast 'clear-lower-bit' instruction so the following
+     * algorithm can be competitive.
+     */
+
+    int cnt = static_cast<int>(count_ones(bits));
+    // Do the first 8 all together
+    for (int i=0; i<8; i++) {
+      this->tail[i] = idx + trailing_zeroes(bits);
+      bits = clear_lowest_bit(bits);
+    }
+
+    // Do the next 8 all together (we hope in most cases it won't happen at all
+    // and the branch is easily predicted).
+    if (simdjson_unlikely(cnt > 8)) {
+      for (int i=8; i<16; i++) {
+        this->tail[i] = idx + trailing_zeroes(bits);
+        bits = clear_lowest_bit(bits);
+      }
+
+      // Most files don't have 16+ structurals per block, so we take several basically guaranteed
+      // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :)
+      // or the start of a value ("abc" true 123) every four characters.
+      if (simdjson_unlikely(cnt > 16)) {
+        int i = 16;
+        do {
+          this->tail[i] = idx + trailing_zeroes(bits);
+          bits = clear_lowest_bit(bits);
+          i++;
+        } while (i < cnt);
+      }
+    }
+
+    this->tail += cnt;
+#endif
+  }
+#endif // SIMDJSON_CUSTOM_BIT_INDEXER
+
+};
+
+class json_structural_indexer {
+public:
+  /**
+   * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes.
+   *
+   * @param partial Setting the partial parameter to true allows the find_structural_bits to
+   *   tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If
+   *   you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8.
+   */
+  template<size_t STEP_SIZE>
+  static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept;
+
+private:
+  simdjson_inline json_structural_indexer(uint32_t *structural_indexes);
+  template<size_t STEP_SIZE>
+  simdjson_inline void step(const uint8_t *block, buf_block_reader<STEP_SIZE> &reader) noexcept;
+  simdjson_inline void next(const simd::simd8x64<uint8_t>& in, const json_block& block, size_t idx);
+  simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial);
+
+  json_scanner scanner{};
+  utf8_checker checker{};
+  bit_indexer indexer;
+  uint64_t prev_structurals = 0;
+  uint64_t unescaped_chars_error = 0;
+};
+
+simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {}
+
+// Skip the last character if it is partial
+simdjson_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) {
+  if (simdjson_unlikely(len < 3)) {
+    switch (len) {
+      case 2:
+        if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left
+        if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 2 bytes left
+        return len;
+      case 1:
+        if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left
+        return len;
+      case 0:
+        return len;
+    }
+  }
+  if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left
+  if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 1 byte left
+  if (buf[len-3] >= 0xf0) { return len-3; } // 4-byte characters with only 3 bytes left
+  return len;
+}
+
+//
+// PERF NOTES:
+// We pipe 2 inputs through these stages:
+// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load
+//    2 inputs' worth at once so that by the time step 2 is looking for them input, it's available.
+// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path.
+//    The output of step 1 depends entirely on this information. These functions don't quite use
+//    up enough CPU: the second half of the functions is highly serial, only using 1 execution core
+//    at a time. The second input's scans has some dependency on the first ones finishing it, but
+//    they can make a lot of progress before they need that information.
+// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that
+//    to finish: utf-8 checks and generating the output from the last iteration.
+//
+// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all
+// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough
+// workout.
+//
+template<size_t STEP_SIZE>
+error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept {
+  if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; }
+  // We guard the rest of the code so that we can assume that len > 0 throughout.
+  if (len == 0) { return EMPTY; }
+  if (is_streaming(partial)) {
+    len = trim_partial_utf8(buf, len);
+    // If you end up with an empty window after trimming
+    // the partial UTF-8 bytes, then chances are good that you
+    // have an UTF-8 formatting error.
+    if(len == 0) { return UTF8_ERROR; }
+  }
+  buf_block_reader<STEP_SIZE> reader(buf, len);
+  json_structural_indexer indexer(parser.structural_indexes.get());
+
+  // Read all but the last block
+  while (reader.has_full_block()) {
+    indexer.step<STEP_SIZE>(reader.full_block(), reader);
+  }
+  // Take care of the last block (will always be there unless file is empty which is
+  // not supposed to happen.)
+  uint8_t block[STEP_SIZE];
+  if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; }
+  indexer.step<STEP_SIZE>(block, reader);
+  return indexer.finish(parser, reader.block_index(), len, partial);
+}
+
+template<>
+simdjson_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept {
+  simd::simd8x64<uint8_t> in_1(block);
+  simd::simd8x64<uint8_t> in_2(block+64);
+  json_block block_1 = scanner.next(in_1);
+  json_block block_2 = scanner.next(in_2);
+  this->next(in_1, block_1, reader.block_index());
+  this->next(in_2, block_2, reader.block_index()+64);
+  reader.advance();
+}
+
+template<>
+simdjson_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept {
+  simd::simd8x64<uint8_t> in_1(block);
+  json_block block_1 = scanner.next(in_1);
+  this->next(in_1, block_1, reader.block_index());
+  reader.advance();
+}
+
+simdjson_inline void json_structural_indexer::next(const simd::simd8x64<uint8_t>& in, const json_block& block, size_t idx) {
+  uint64_t unescaped = in.lteq(0x1F);
+  checker.check_next_input(in);
+  indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser
+  prev_structurals = block.structural_start();
+  unescaped_chars_error |= block.non_quote_inside_string(unescaped);
+}
+
+simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) {
+  // Write out the final iteration's structurals
+  indexer.write(uint32_t(idx-64), prev_structurals);
+  error_code error = scanner.finish();
+  // We deliberately break down the next expression so that it is
+  // human readable.
+  const bool should_we_exit = is_streaming(partial) ?
+    ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING
+    : (error != SUCCESS); // if partial is false, we must have SUCCESS
+  const bool have_unclosed_string = (error == UNCLOSED_STRING);
+  if (simdjson_unlikely(should_we_exit)) { return error; }
+
+  if (unescaped_chars_error) {
+    return UNESCAPED_CHARS;
+  }
+  parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get());
+  /***
+   * The On Demand API requires special padding.
+   *
+   * This is related to https://github.com/simdjson/simdjson/issues/906
+   * Basically, we want to make sure that if the parsing continues beyond the last (valid)
+   * structural character, it quickly stops.
+   * Only three structural characters can be repeated without triggering an error in JSON:  [,] and }.
+   * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing
+   * continues, then it must be [,] or }.
+   * Suppose it is ] or }. We backtrack to the first character, what could it be that would
+   * not trigger an error? It could be ] or } but no, because you can't start a document that way.
+   * It can't be a comma, a colon or any simple value. So the only way we could continue is
+   * if the repeated character is [. But if so, the document must start with [. But if the document
+   * starts with [, it should end with ]. If we enforce that rule, then we would get
+   * ][[ which is invalid.
+   *
+   * This is illustrated with the test array_iterate_unclosed_error() on the following input:
+   * R"({ "a": [,,)"
+   **/
+  parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final
+  parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len);
+  parser.structural_indexes[parser.n_structural_indexes + 2] = 0;
+  parser.next_structural_index = 0;
+  // a valid JSON file cannot have zero structural indexes - we should have found something
+  if (simdjson_unlikely(parser.n_structural_indexes == 0u)) {
+    return EMPTY;
+  }
+  if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) {
+    return UNEXPECTED_ERROR;
+  }
+  if (partial == stage1_mode::streaming_partial) {
+    // If we have an unclosed string, then the last structural
+    // will be the quote and we want to make sure to omit it.
+    if(have_unclosed_string) {
+      parser.n_structural_indexes--;
+      // a valid JSON file cannot have zero structural indexes - we should have found something
+      if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; }
+    }
+    // We truncate the input to the end of the last complete document (or zero).
+    auto new_structural_indexes = find_next_document_index(parser);
+    if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) {
+      if(parser.structural_indexes[0] == 0) {
+        // If the buffer is partial and we started at index 0 but the document is
+        // incomplete, it's too big to parse.
+        return CAPACITY;
+      } else {
+        // It is possible that the document could be parsed, we just had a lot
+        // of white space.
+        parser.n_structural_indexes = 0;
+        return EMPTY;
+      }
+    }
+
+    parser.n_structural_indexes = new_structural_indexes;
+  } else if (partial == stage1_mode::streaming_final) {
+    if(have_unclosed_string) { parser.n_structural_indexes--; }
+    // We truncate the input to the end of the last complete document (or zero).
+    // Because partial == stage1_mode::streaming_final, it means that we may
+    // silently ignore trailing garbage. Though it sounds bad, we do it
+    // deliberately because many people who have streams of JSON documents
+    // will truncate them for processing. E.g., imagine that you are uncompressing
+    // the data from a size file or receiving it in chunks from the network. You
+    // may not know where exactly the last document will be. Meanwhile the
+    // document_stream instances allow people to know the JSON documents they are
+    // parsing (see the iterator.source() method).
+    parser.n_structural_indexes = find_next_document_index(parser);
+    // We store the initial n_structural_indexes so that the client can see
+    // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes,
+    // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len,
+    // otherwise, it will copy some prior index.
+    parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes];
+    // This next line is critical, do not change it unless you understand what you are
+    // doing.
+    parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len);
+    if (simdjson_unlikely(parser.n_structural_indexes == 0u)) {
+        // We tolerate an unclosed string at the very end of the stream. Indeed, users
+        // often load their data in bulk without being careful and they want us to ignore
+        // the trailing garbage.
+        return EMPTY;
+    }
+  }
+  checker.check_eof();
+  return checker.errors();
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdjson
+/* end file src/generic/stage1/json_structural_indexer.h */
+/* begin file src/generic/stage1/utf8_validator.h */
+namespace simdjson {
+namespace westmere {
+namespace {
+namespace stage1 {
+
+/**
+ * Validates that the string is actual UTF-8.
+ */
+template<class checker>
+bool generic_validate_utf8(const uint8_t * input, size_t length) {
+    checker c{};
+    buf_block_reader<64> reader(input, length);
+    while (reader.has_full_block()) {
+      simd::simd8x64<uint8_t> in(reader.full_block());
+      c.check_next_input(in);
+      reader.advance();
+    }
+    uint8_t block[64]{};
+    reader.get_remainder(block);
+    simd::simd8x64<uint8_t> in(block);
+    c.check_next_input(in);
+    reader.advance();
+    c.check_eof();
+    return c.errors() == error_code::SUCCESS;
+}
+
+bool generic_validate_utf8(const char * input, size_t length) {
+    return generic_validate_utf8<utf8_checker>(reinterpret_cast<const uint8_t *>(input),length);
+}
+
+} // namespace stage1
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdjson
+/* end file src/generic/stage1/utf8_validator.h */
+
+//
+// Stage 2
+//
+/* begin file src/generic/stage2/stringparsing.h */
+// This file contains the common code every implementation uses
+// It is intended to be included multiple times and compiled multiple times
+
+namespace simdjson {
+namespace westmere {
+namespace {
+/// @private
+namespace stringparsing {
+
+// begin copypasta
+// These chars yield themselves: " \ /
+// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab
+// u not handled in this table as it's complex
+static const uint8_t escape_map[256] = {
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0, // 0x0.
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0x22, 0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0x2f,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0, // 0x4.
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0x5c, 0, 0,    0, // 0x5.
+    0, 0, 0x08, 0, 0,    0, 0x0c, 0, 0, 0, 0, 0, 0,    0, 0x0a, 0, // 0x6.
+    0, 0, 0x0d, 0, 0x09, 0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0, // 0x7.
+
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+    0, 0, 0,    0, 0,    0, 0,    0, 0, 0, 0, 0, 0,    0, 0,    0,
+};
+
+// handle a unicode codepoint
+// write appropriate values into dest
+// src will advance 6 bytes or 12 bytes
+// dest will advance a variable amount (return via pointer)
+// return true if the unicode codepoint was valid
+// We work in little-endian then swap at write time
+simdjson_warn_unused
+simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr,
+                                            uint8_t **dst_ptr) {
+  // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the
+  // conversion isn't valid; we defer the check for this to inside the
+  // multilingual plane check
+  uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2);
+  *src_ptr += 6;
+
+  // If we found a high surrogate, we must
+  // check for low surrogate for characters
+  // outside the Basic
+  // Multilingual Plane.
+  if (code_point >= 0xd800 && code_point < 0xdc00) {
+    const uint8_t *src_data = *src_ptr;
+    /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */
+    if (((src_data[0] << 8) | src_data[1]) != ((static_cast<uint8_t> ('\\') << 8) | static_cast<uint8_t> ('u'))) {
+      return false;
+    }
+    uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2);
+
+    // We have already checked that the high surrogate is valid and
+    // (code_point - 0xd800) < 1024.
+    //
+    // Check that code_point_2 is in the range 0xdc00..0xdfff
+    // and that code_point_2 was parsed from valid hex.
+    uint32_t low_bit = code_point_2 - 0xdc00;
+    if (low_bit >> 10) {
+      return false;
+    }
+
+    code_point =
+        (((code_point - 0xd800) << 10) | low_bit) + 0x10000;
+    *src_ptr += 6;
+  } else if (code_point >= 0xdc00 && code_point <= 0xdfff) {
+      // If we encounter a low surrogate (not preceded by a high surrogate)
+      // then we have an error.
+      return false;
+  }
+  size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr);
+  *dst_ptr += offset;
+  return offset > 0;
+}
+
+/**
+ * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There
+ * must be an unescaped quote terminating the string. It returns the final output
+ * position as pointer. In case of error (e.g., the string has bad escaped codes),
+ * then null_nullptrptr is returned. It is assumed that the output buffer is large
+ * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes +
+ * SIMDJSON_PADDING bytes.
+ */
+simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) {
+  while (1) {
+    // Copy the next n bytes, and find the backslash and quote in them.
+    auto bs_quote = backslash_and_quote::copy_and_find(src, dst);
+    // If the next thing is the end quote, copy and return
+    if (bs_quote.has_quote_first()) {
+      // we encountered quotes first. Move dst to point to quotes and exit
+      return dst + bs_quote.quote_index();
+    }
+    if (bs_quote.has_backslash()) {
+      /* find out where the backspace is */
+      auto bs_dist = bs_quote.backslash_index();
+      uint8_t escape_char = src[bs_dist + 1];
+      /* we encountered backslash first. Handle backslash */
+      if (escape_char == 'u') {
+        /* move src/dst up to the start; they will be further adjusted
+           within the unicode codepoint handling code. */
+        src += bs_dist;
+        dst += bs_dist;
+        if (!handle_unicode_codepoint(&src, &dst)) {
+          return nullptr;
+        }
+      } else {
+        /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and
+         * write bs_dist+1 characters to output
+         * note this may reach beyond the part of the buffer we've actually
+         * seen. I think this is ok */
+        uint8_t escape_result = escape_map[escape_char];
+        if (escape_result == 0u) {
+          return nullptr; /* bogus escape value is an error */
+        }
+        dst[bs_dist] = escape_result;
+        src += bs_dist + 2;
+        dst += bs_dist + 1;
+      }
+    } else {
+      /* they are the same. Since they can't co-occur, it means we
+       * encountered neither. */
+      src += backslash_and_quote::BYTES_PROCESSED;
+      dst += backslash_and_quote::BYTES_PROCESSED;
+    }
+  }
+  /* can't be reached */
+  return nullptr;
+}
+
+} // namespace stringparsing
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdjson
+/* end file src/generic/stage2/stringparsing.h */
+/* begin file src/generic/stage2/tape_builder.h */
+/* begin file src/generic/stage2/json_iterator.h */
+/* begin file src/generic/stage2/logger.h */
+// This is for an internal-only stage 2 specific logger.
+// Set LOG_ENABLED = true to log what stage 2 is doing!
+namespace simdjson {
+namespace westmere {
+namespace {
+namespace logger {
+
+  static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------";
+
+#if SIMDJSON_VERBOSE_LOGGING
+  static constexpr const bool LOG_ENABLED = true;
+#else
+  static constexpr const bool LOG_ENABLED = false;
+#endif
+  static constexpr const int LOG_EVENT_LEN = 20;
+  static constexpr const int LOG_BUFFER_LEN = 30;
+  static constexpr const int LOG_SMALL_BUFFER_LEN = 10;
+  static constexpr const int LOG_INDEX_LEN = 5;
+
+  static int log_depth; // Not threadsafe. Log only.
+
+  // Helper to turn unprintable or newline characters into spaces
+  static simdjson_inline char printable_char(char c) {
+    if (c >= 0x20) {
+      return c;
+    } else {
+      return ' ';
+    }
+  }
+
+  // Print the header and set up log_start
+  static simdjson_inline void log_start() {
+    if (LOG_ENABLED) {
+      log_depth = 0;
+      printf("\n");
+      printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#");
+      printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES);
+    }
+  }
+
+  simdjson_unused static simdjson_inline void log_string(const char *message) {
+    if (LOG_ENABLED) {
+      printf("%s\n", message);
+    }
+  }
+
+  // Logs a single line from the stage 2 DOM parser
+  template<typename S>
+  static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) {
+    if (LOG_ENABLED) {
+      printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title);
+      auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1;
+      auto next_index = structurals.next_structural;
+      auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast<const uint8_t*>("                                                       ");
+      auto next = &structurals.buf[*next_index];
+      {
+        // Print the next N characters in the buffer.
+        printf("| ");
+        // Otherwise, print the characters starting from the buffer position.
+        // Print spaces for unprintable or newline characters.
+        for (int i=0;i<LOG_BUFFER_LEN;i++) {
+          printf("%c", printable_char(current[i]));
+        }
+        printf(" ");
+        // Print the next N characters in the buffer.
+        printf("| ");
+        // Otherwise, print the characters starting from the buffer position.
+        // Print spaces for unprintable or newline characters.
+        for (int i=0;i<LOG_SMALL_BUFFER_LEN;i++) {
+          printf("%c", printable_char(next[i]));
+        }
+        printf(" ");
+      }
+      if (current_index) {
+        printf("| %*u ", LOG_INDEX_LEN, *current_index);
+      } else {
+        printf("| %-*s ", LOG_INDEX_LEN, "");
+      }
+      // printf("| %*u ", LOG_INDEX_LEN, structurals.next_tape_index());
+      printf("| %-s ", detail);
+      printf("|\n");
+    }
+  }
+
+} // namespace logger
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdjson
+/* end file src/generic/stage2/logger.h */
+
+namespace simdjson {
+namespace westmere {
+namespace {
+namespace stage2 {
+
+class json_iterator {
+public:
+  const uint8_t* const buf;
+  uint32_t *next_structural;
+  dom_parser_implementation &dom_parser;
+  uint32_t depth{0};
+
+  /**
+   * Walk the JSON document.
+   *
+   * The visitor receives callbacks when values are encountered. All callbacks pass the iterator as
+   * the first parameter; some callbacks have other parameters as well:
+   *
+   * - visit_document_start() - at the beginning.
+   * - visit_document_end() - at the end (if things were successful).
+   *
+   * - visit_array_start() - at the start `[` of a non-empty array.
+   * - visit_array_end() - at the end `]` of a non-empty array.
+   * - visit_empty_array() - when an empty array is encountered.
+   *
+   * - visit_object_end() - at the start `]` of a non-empty object.
+   * - visit_object_start() - at the end `]` of a non-empty object.
+   * - visit_empty_object() - when an empty object is encountered.
+   * - visit_key(const uint8_t *key) - when a key in an object field is encountered. key is
+   *                                   guaranteed to point at the first quote of the string (`"key"`).
+   * - visit_primitive(const uint8_t *value) - when a value is a string, number, boolean or null.
+   * - visit_root_primitive(iter, uint8_t *value) - when the top-level value is a string, number, boolean or null.
+   *
+   * - increment_count(iter) - each time a value is found in an array or object.
+   */
+  template<bool STREAMING, typename V>
+  simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept;
+
+  /**
+   * Create an iterator capable of walking a JSON document.
+   *
+   * The document must have already passed through stage 1.
+   */
+  simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index);
+
+  /**
+   * Look at the next token.
+   *
+   * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)).
+   *
+   * They may include invalid JSON as well (such as `1.2.3` or `ture`).
+   */
+  simdjson_inline const uint8_t *peek() const noexcept;
+  /**
+   * Advance to the next token.
+   *
+   * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)).
+   *
+   * They may include invalid JSON as well (such as `1.2.3` or `ture`).
+   */
+  simdjson_inline const uint8_t *advance() noexcept;
+  /**
+   * Get the remaining length of the document, from the start of the current token.
+   */
+  simdjson_inline size_t remaining_len() const noexcept;
+  /**
+   * Check if we are at the end of the document.
+   *
+   * If this is true, there are no more tokens.
+   */
+  simdjson_inline bool at_eof() const noexcept;
+  /**
+   * Check if we are at the beginning of the document.
+   */
+  simdjson_inline bool at_beginning() const noexcept;
+  simdjson_inline uint8_t last_structural() const noexcept;
+
+  /**
+   * Log that a value has been found.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_value(const char *type) const noexcept;
+  /**
+   * Log the start of a multipart value.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_start_value(const char *type) const noexcept;
+  /**
+   * Log the end of a multipart value.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_end_value(const char *type) const noexcept;
+  /**
+   * Log an error.
+   *
+   * Set LOG_ENABLED=true in logger.h to see logging.
+   */
+  simdjson_inline void log_error(const char *error) const noexcept;
+
+  template<typename V>
+  simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept;
+  template<typename V>
+  simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept;
+};
+
+template<bool STREAMING, typename V>
+simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept {
+  logger::log_start();
+
+  //
+  // Start the document
+  //
+  if (at_eof()) { return EMPTY; }
+  log_start_value("document");
+  SIMDJSON_TRY( visitor.visit_document_start(*this) );
+
+  //
+  // Read first value
+  //
+  {
+    auto value = advance();
+
+    // Make sure the outer object or array is closed before continuing; otherwise, there are ways we
+    // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906
+    if (!STREAMING) {
+      switch (*value) {
+        case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break;
+        case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break;
+      }
+    }
+
+    switch (*value) {
+      case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin;
+      case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin;
+      default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break;
+    }
+  }
+  goto document_end;
+
+//
+// Object parser states
+//
+object_begin:
+  log_start_value("object");
+  depth++;
+  if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; }
+  dom_parser.is_array[depth] = false;
+  SIMDJSON_TRY( visitor.visit_object_start(*this) );
+
+  {
+    auto key = advance();
+    if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; }
+    SIMDJSON_TRY( visitor.increment_count(*this) );
+    SIMDJSON_TRY( visitor.visit_key(*this, key) );
+  }
+
+object_field:
+  if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; }
+  {
+    auto value = advance();
+    switch (*value) {
+      case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin;
+      case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin;
+      default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break;
+    }
+  }
+
+object_continue:
+  switch (*advance()) {
+    case ',':
+      SIMDJSON_TRY( visitor.increment_count(*this) );
+      {
+        auto key = advance();
+        if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; }
+        SIMDJSON_TRY( visitor.visit_key(*this, key) );
+      }
+      goto object_field;
+    case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end;
+    default: log_error("No comma between object fields"); return TAPE_ERROR;
+  }
+
+scope_end:
+  depth--;
+  if (depth == 0) { goto document_end; }
+  if (dom_parser.is_array[depth]) { goto array_continue; }
+  goto object_continue;
+
+//
+// Array parser states
+//
+array_begin:
+  log_start_value("array");
+  depth++;
+  if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; }
+  dom_parser.is_array[depth] = true;
+  SIMDJSON_TRY( visitor.visit_array_start(*this) );
+  SIMDJSON_TRY( visitor.increment_count(*this) );
+
+array_value:
+  {
+    auto value = advance();
+    switch (*value) {
+      case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin;
+      case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin;
+      default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break;
+    }
+  }
+
+array_continue:
+  switch (*advance()) {
+    case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value;
+    case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end;
+    default: log_error("Missing comma between array values"); return TAPE_ERROR;
+  }
+
+document_end:
+  log_end_value("document");
+  SIMDJSON_TRY( visitor.visit_document_end(*this) );
+
+  dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]);
+
+  // If we didn't make it to the end, it's an error
+  if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) {
+    log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!");
+    return TAPE_ERROR;
+  }
+
+  return SUCCESS;
+
+} // walk_document()
+
+simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index)
+  : buf{_dom_parser.buf},
+    next_structural{&_dom_parser.structural_indexes[start_structural_index]},
+    dom_parser{_dom_parser} {
+}
+
+simdjson_inline const uint8_t *json_iterator::peek() const noexcept {
+  return &buf[*(next_structural)];
+}
+simdjson_inline const uint8_t *json_iterator::advance() noexcept {
+  return &buf[*(next_structural++)];
+}
+simdjson_inline size_t json_iterator::remaining_len() const noexcept {
+  return dom_parser.len - *(next_structural-1);
+}
+
+simdjson_inline bool json_iterator::at_eof() const noexcept {
+  return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes];
+}
+simdjson_inline bool json_iterator::at_beginning() const noexcept {
+  return next_structural == dom_parser.structural_indexes.get();
+}
+simdjson_inline uint8_t json_iterator::last_structural() const noexcept {
+  return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]];
+}
+
+simdjson_inline void json_iterator::log_value(const char *type) const noexcept {
+  logger::log_line(*this, "", type, "");
+}
+
+simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept {
+  logger::log_line(*this, "+", type, "");
+  if (logger::LOG_ENABLED) { logger::log_depth++; }
+}
+
+simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept {
+  if (logger::LOG_ENABLED) { logger::log_depth--; }
+  logger::log_line(*this, "-", type, "");
+}
+
+simdjson_inline void json_iterator::log_error(const char *error) const noexcept {
+  logger::log_line(*this, "", "ERROR", error);
+}
+
+template<typename V>
+simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept {
+  switch (*value) {
+    case '"': return visitor.visit_root_string(*this, value);
+    case 't': return visitor.visit_root_true_atom(*this, value);
+    case 'f': return visitor.visit_root_false_atom(*this, value);
+    case 'n': return visitor.visit_root_null_atom(*this, value);
+    case '-':
+    case '0': case '1': case '2': case '3': case '4':
+    case '5': case '6': case '7': case '8': case '9':
+      return visitor.visit_root_number(*this, value);
+    default:
+      log_error("Document starts with a non-value character");
+      return TAPE_ERROR;
+  }
+}
+template<typename V>
+simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept {
+  switch (*value) {
+    case '"': return visitor.visit_string(*this, value);
+    case 't': return visitor.visit_true_atom(*this, value);
+    case 'f': return visitor.visit_false_atom(*this, value);
+    case 'n': return visitor.visit_null_atom(*this, value);
+    case '-':
+    case '0': case '1': case '2': case '3': case '4':
+    case '5': case '6': case '7': case '8': case '9':
+      return visitor.visit_number(*this, value);
+    default:
+      log_error("Non-value found when value was expected!");
+      return TAPE_ERROR;
+  }
+}
+
+} // namespace stage2
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdjson
+/* end file src/generic/stage2/json_iterator.h */
+/* begin file src/generic/stage2/tape_writer.h */
+namespace simdjson {
+namespace westmere {
+namespace {
+namespace stage2 {
+
+struct tape_writer {
+  /** The next place to write to tape */
+  uint64_t *next_tape_loc;
+
+  /** Write a signed 64-bit value to tape. */
+  simdjson_inline void append_s64(int64_t value) noexcept;
+
+  /** Write an unsigned 64-bit value to tape. */
+  simdjson_inline void append_u64(uint64_t value) noexcept;
+
+  /** Write a double value to tape. */
+  simdjson_inline void append_double(double value) noexcept;
+
+  /**
+   * Append a tape entry (an 8-bit type,and 56 bits worth of value).
+   */
+  simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept;
+
+  /**
+   * Skip the current tape entry without writing.
+   *
+   * Used to skip the start of the container, since we'll come back later to fill it in when the
+   * container ends.
+   */
+  simdjson_inline void skip() noexcept;
+
+  /**
+   * Skip the number of tape entries necessary to write a large u64 or i64.
+   */
+  simdjson_inline void skip_large_integer() noexcept;
+
+  /**
+   * Skip the number of tape entries necessary to write a double.
+   */
+  simdjson_inline void skip_double() noexcept;
+
+  /**
+   * Write a value to a known location on tape.
+   *
+   * Used to go back and write out the start of a container after the container ends.
+   */
+  simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept;
+
+private:
+  /**
+   * Append both the tape entry, and a supplementary value following it. Used for types that need
+   * all 64 bits, such as double and uint64_t.
+   */
+  template<typename T>
+  simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept;
+}; // struct number_writer
+
+simdjson_inline void tape_writer::append_s64(int64_t value) noexcept {
+  append2(0, value, internal::tape_type::INT64);
+}
+
+simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept {
+  append(0, internal::tape_type::UINT64);
+  *next_tape_loc = value;
+  next_tape_loc++;
+}
+
+/** Write a double value to tape. */
+simdjson_inline void tape_writer::append_double(double value) noexcept {
+  append2(0, value, internal::tape_type::DOUBLE);
+}
+
+simdjson_inline void tape_writer::skip() noexcept {
+  next_tape_loc++;
+}
+
+simdjson_inline void tape_writer::skip_large_integer() noexcept {
+  next_tape_loc += 2;
+}
+
+simdjson_inline void tape_writer::skip_double() noexcept {
+  next_tape_loc += 2;
+}
+
+simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept {
+  *next_tape_loc = val | ((uint64_t(char(t))) << 56);
+  next_tape_loc++;
+}
+
+template<typename T>
+simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept {
+  append(val, t);
+  static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!");
+  memcpy(next_tape_loc, &val2, sizeof(val2));
+  next_tape_loc++;
+}
+
+simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept {
+  tape_loc = val | ((uint64_t(char(t))) << 56);
+}
+
+} // namespace stage2
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdjson
+/* end file src/generic/stage2/tape_writer.h */
+
+namespace simdjson {
+namespace westmere {
+namespace {
+namespace stage2 {
+
+struct tape_builder {
+  template<bool STREAMING>
+  simdjson_warn_unused static simdjson_inline error_code parse_document(
+    dom_parser_implementation &dom_parser,
+    dom::document &doc) noexcept;
+
+  /** Called when a non-empty document starts. */
+  simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept;
+  /** Called when a non-empty document ends without error. */
+  simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept;
+
+  /** Called when a non-empty array starts. */
+  simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept;
+  /** Called when a non-empty array ends. */
+  simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept;
+  /** Called when an empty array is found. */
+  simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept;
+
+  /** Called when a non-empty object starts. */
+  simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept;
+  /**
+   * Called when a key in a field is encountered.
+   *
+   * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array
+   * will be called after this with the field value.
+   */
+  simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept;
+  /** Called when a non-empty object ends. */
+  simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept;
+  /** Called when an empty object is found. */
+  simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept;
+
+  /**
+   * Called when a string, number, boolean or null is found.
+   */
+  simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept;
+  /**
+   * Called when a string, number, boolean or null is found at the top level of a document (i.e.
+   * when there is no array or object and the entire document is a single string, number, boolean or
+   * null.
+   *
+   * This is separate from primitive() because simdjson's normal primitive parsing routines assume
+   * there is at least one more token after the value, which is only true in an array or object.
+   */
+  simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept;
+
+  simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept;
+
+  simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept;
+  simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept;
+
+  /** Called each time a new field or element in an array or object is found. */
+  simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept;
+
+  /** Next location to write to tape */
+  tape_writer tape;
+private:
+  /** Next write location in the string buf for stage 2 parsing */
+  uint8_t *current_string_buf_loc;
+
+  simdjson_inline tape_builder(dom::document &doc) noexcept;
+
+  simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept;
+  simdjson_inline void start_container(json_iterator &iter) noexcept;
+  simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept;
+  simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept;
+  simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept;
+  simdjson_inline void on_end_string(uint8_t *dst) noexcept;
+}; // class tape_builder
+
+template<bool STREAMING>
+simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document(
+    dom_parser_implementation &dom_parser,
+    dom::document &doc) noexcept {
+  dom_parser.doc = &doc;
+  json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0);
+  tape_builder builder(doc);
+  return iter.walk_document<STREAMING>(builder);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept {
+  return iter.visit_root_primitive(*this, value);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept {
+  return iter.visit_primitive(*this, value);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept {
+  return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept {
+  return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept {
+  start_container(iter);
+  return SUCCESS;
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept {
+  start_container(iter);
+  return SUCCESS;
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept {
+  start_container(iter);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept {
+  return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept {
+  return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY);
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept {
+  constexpr uint32_t start_tape_index = 0;
+  tape.append(start_tape_index, internal::tape_type::ROOT);
+  tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT);
+  return SUCCESS;
+}
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept {
+  return visit_string(iter, key, true);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept {
+  iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1
+  return SUCCESS;
+}
+
+simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept {
+  iter.log_value(key ? "key" : "string");
+  uint8_t *dst = on_start_string(iter);
+  dst = stringparsing::parse_string(value+1, dst);
+  if (dst == nullptr) {
+    iter.log_error("Invalid escape in string");
+    return STRING_ERROR;
+  }
+  on_end_string(dst);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept {
+  return visit_string(iter, value);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("number");
+  return numberparsing::parse_number(value, tape);
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept {
+  //
+  // We need to make a copy to make sure that the string is space terminated.
+  // This is not about padding the input, which should already padded up
+  // to len + SIMDJSON_PADDING. However, we have no control at this stage
+  // on how the padding was done. What if the input string was padded with nulls?
+  // It is quite common for an input string to have an extra null character (C string).
+  // We do not want to allow 9\0 (where \0 is the null character) inside a JSON
+  // document, but the string "9\0" by itself is fine. So we make a copy and
+  // pad the input with spaces when we know that there is just one input element.
+  // This copy is relatively expensive, but it will almost never be called in
+  // practice unless you are in the strange scenario where you have many JSON
+  // documents made of single atoms.
+  //
+  std::unique_ptr<uint8_t[]>copy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]);
+  if (copy.get() == nullptr) { return MEMALLOC; }
+  std::memcpy(copy.get(), value, iter.remaining_len());
+  std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING);
+  error_code error = visit_number(iter, copy.get());
+  return error;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("true");
+  if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::TRUE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("true");
+  if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::TRUE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("false");
+  if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::FALSE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("false");
+  if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::FALSE_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("null");
+  if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::NULL_VALUE);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept {
+  iter.log_value("null");
+  if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; }
+  tape.append(0, internal::tape_type::NULL_VALUE);
+  return SUCCESS;
+}
+
+// private:
+
+simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept {
+  return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get());
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept {
+  auto start_index = next_tape_index(iter);
+  tape.append(start_index+2, start);
+  tape.append(start_index, end);
+  return SUCCESS;
+}
+
+simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept {
+  iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter);
+  iter.dom_parser.open_containers[iter.depth].count = 0;
+  tape.skip(); // We don't actually *write* the start element until the end.
+}
+
+simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept {
+  // Write the ending tape element, pointing at the start location
+  const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index;
+  tape.append(start_tape_index, end);
+  // Write the start tape element, pointing at the end location (and including count)
+  // count can overflow if it exceeds 24 bits... so we saturate
+  // the convention being that a cnt of 0xffffff or more is undetermined in value (>=  0xffffff).
+  const uint32_t count = iter.dom_parser.open_containers[iter.depth].count;
+  const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count;
+  tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start);
+  return SUCCESS;
+}
+
+simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept {
+  // we advance the point, accounting for the fact that we have a NULL termination
+  tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING);
+  return current_string_buf_loc + sizeof(uint32_t);
+}
+
+simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept {
+  uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t)));
+  // TODO check for overflow in case someone has a crazy string (>=4GB?)
+  // But only add the overflow check when the document itself exceeds 4GB
+  // Currently unneeded because we refuse to parse docs larger or equal to 4GB.
+  memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t));
+  // NULL termination is still handy if you expect all your strings to
+  // be NULL terminated? It comes at a small cost
+  *dst = 0;
+  current_string_buf_loc = dst + 1;
+}
+
+} // namespace stage2
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdjson
+/* end file src/generic/stage2/tape_builder.h */
+
+//
+// Implementation-specific overrides
+//
+
+namespace simdjson {
+namespace westmere {
+namespace {
+namespace stage1 {
+
+simdjson_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) {
+  if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; }
+  return find_escaped_branchless(backslash);
+}
+
+} // namespace stage1
+} // unnamed namespace
+
+simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept {
+  return westmere::stage1::json_minifier::minify<64>(buf, len, dst, dst_len);
+}
+
+simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept {
+  this->buf = _buf;
+  this->len = _len;
+  return westmere::stage1::json_structural_indexer::index<64>(_buf, _len, *this, streaming);
+}
+
+simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
+  return westmere::stage1::generic_validate_utf8(buf,len);
+}
+
+simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept {
+  return stage2::tape_builder::parse_document<false>(*this, _doc);
+}
+
+simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept {
+  return stage2::tape_builder::parse_document<true>(*this, _doc);
+}
+
+simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst) const noexcept {
+  return westmere::stringparsing::parse_string(src, dst);
+}
+
+simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept {
+  auto error = stage1(_buf, _len, stage1_mode::regular);
+  if (error) { return error; }
+  return stage2(_doc);
+}
+
+} // namespace westmere
+} // namespace simdjson
+
+/* begin file include/simdjson/westmere/end.h */
+SIMDJSON_UNTARGET_WESTMERE
+/* end file include/simdjson/westmere/end.h */
+/* end file src/westmere/dom_parser_implementation.cpp */
+#endif
+
+SIMDJSON_POP_DISABLE_WARNINGS
+/* end file src/simdjson.cpp */
diff --git a/kram-profile/CBA/simdjson.h b/kram-profile/CBA/simdjson.h
new file mode 100644
index 00000000..4ad510d3
--- /dev/null
+++ b/kram-profile/CBA/simdjson.h
@@ -0,0 +1,31622 @@
+/* auto-generated on 2022-10-16 16:59:15 +0000. Do not edit! */
+/* begin file include/simdjson.h */
+#ifndef SIMDJSON_H
+#define SIMDJSON_H
+
+/**
+ * @mainpage
+ *
+ * Check the [README.md](https://github.com/simdjson/simdjson/blob/master/README.md#simdjson--parsing-gigabytes-of-json-per-second).
+ *
+ * Sample code. See https://github.com/simdjson/simdjson/blob/master/doc/basics.md for more examples.
+
+    #include "simdjson.h"
+
+    int main(void) {
+      // load from `twitter.json` file:
+      simdjson::dom::parser parser;
+      simdjson::dom::element tweets = parser.load("twitter.json");
+      std::cout << tweets["search_metadata"]["count"] << " results." << std::endl;
+
+      // Parse and iterate through an array of objects
+      auto abstract_json = R"( [
+        {  "12345" : {"a":12.34, "b":56.78, "c": 9998877}   },
+        {  "12545" : {"a":11.44, "b":12.78, "c": 11111111}  }
+        ] )"_padded;
+
+      for (simdjson::dom::object obj : parser.parse(abstract_json)) {
+        for(const auto key_value : obj) {
+          cout << "key: " << key_value.key << " : ";
+          simdjson::dom::object innerobj = key_value.value;
+          cout << "a: " << double(innerobj["a"]) << ", ";
+          cout << "b: " << double(innerobj["b"]) << ", ";
+          cout << "c: " << int64_t(innerobj["c"]) << endl;
+        }
+      }
+    }
+ */
+
+/* begin file include/simdjson/simdjson_version.h */
+// /include/simdjson/simdjson_version.h automatically generated by release.py,
+// do not change by hand
+#ifndef SIMDJSON_SIMDJSON_VERSION_H
+#define SIMDJSON_SIMDJSON_VERSION_H
+
+/** The version of simdjson being used (major.minor.revision) */
+#define SIMDJSON_VERSION 3.0.0
+
+namespace simdjson {
+enum {
+  /**
+   * The major version (MAJOR.minor.revision) of simdjson being used.
+   */
+  SIMDJSON_VERSION_MAJOR = 3,
+  /**
+   * The minor version (major.MINOR.revision) of simdjson being used.
+   */
+  SIMDJSON_VERSION_MINOR = 0,
+  /**
+   * The revision (major.minor.REVISION) of simdjson being used.
+   */
+  SIMDJSON_VERSION_REVISION = 0
+};
+} // namespace simdjson
+
+#endif // SIMDJSON_SIMDJSON_VERSION_H
+/* end file include/simdjson/simdjson_version.h */
+/* begin file include/simdjson/dom.h */
+#ifndef SIMDJSON_DOM_H
+#define SIMDJSON_DOM_H
+
+/* begin file include/simdjson/base.h */
+#ifndef SIMDJSON_BASE_H
+#define SIMDJSON_BASE_H
+
+/* begin file include/simdjson/compiler_check.h */
+#ifndef SIMDJSON_COMPILER_CHECK_H
+#define SIMDJSON_COMPILER_CHECK_H
+
+#ifndef __cplusplus
+#error simdjson requires a C++ compiler
+#endif
+
+#ifndef SIMDJSON_CPLUSPLUS
+#if defined(_MSVC_LANG) && !defined(__clang__)
+#define SIMDJSON_CPLUSPLUS (_MSC_VER == 1900 ? 201103L : _MSVC_LANG)
+#else
+#define SIMDJSON_CPLUSPLUS __cplusplus
+#endif
+#endif
+
+// C++ 17
+#if !defined(SIMDJSON_CPLUSPLUS17) && (SIMDJSON_CPLUSPLUS >= 201703L)
+#define SIMDJSON_CPLUSPLUS17 1
+#endif
+
+// C++ 14
+#if !defined(SIMDJSON_CPLUSPLUS14) && (SIMDJSON_CPLUSPLUS >= 201402L)
+#define SIMDJSON_CPLUSPLUS14 1
+#endif
+
+// C++ 11
+#if !defined(SIMDJSON_CPLUSPLUS11) && (SIMDJSON_CPLUSPLUS >= 201103L)
+#define SIMDJSON_CPLUSPLUS11 1
+#endif
+
+#ifndef SIMDJSON_CPLUSPLUS11
+#error simdjson requires a compiler compliant with the C++11 standard
+#endif
+
+#endif // SIMDJSON_COMPILER_CHECK_H
+/* end file include/simdjson/compiler_check.h */
+/* begin file include/simdjson/common_defs.h */
+#ifndef SIMDJSON_COMMON_DEFS_H
+#define SIMDJSON_COMMON_DEFS_H
+
+#include <cassert>
+/* begin file include/simdjson/portability.h */
+#ifndef SIMDJSON_PORTABILITY_H
+#define SIMDJSON_PORTABILITY_H
+
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <cfloat>
+#include <cassert>
+#ifndef _WIN32
+// strcasecmp, strncasecmp
+#include <strings.h>
+#endif
+
+#ifdef _MSC_VER
+#define SIMDJSON_VISUAL_STUDIO 1
+/**
+ * We want to differentiate carefully between
+ * clang under visual studio and regular visual
+ * studio.
+ *
+ * Under clang for Windows, we enable:
+ *  * target pragmas so that part and only part of the
+ *     code gets compiled for advanced instructions.
+ *
+ */
+#ifdef __clang__
+// clang under visual studio
+#define SIMDJSON_CLANG_VISUAL_STUDIO 1
+#else
+// just regular visual studio (best guess)
+#define SIMDJSON_REGULAR_VISUAL_STUDIO 1
+#endif // __clang__
+#endif // _MSC_VER
+
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+// https://en.wikipedia.org/wiki/C_alternative_tokens
+// This header should have no effect, except maybe
+// under Visual Studio.
+#include <iso646.h>
+#endif
+
+#if defined(__x86_64__) || defined(_M_AMD64)
+#define SIMDJSON_IS_X86_64 1
+#elif defined(__aarch64__) || defined(_M_ARM64)
+#define SIMDJSON_IS_ARM64 1
+#elif defined(__PPC64__) || defined(_M_PPC64)
+#define SIMDJSON_IS_PPC64 1
+#else
+#define SIMDJSON_IS_32BITS 1
+
+// We do not support 32-bit platforms, but it can be
+// handy to identify them.
+#if defined(_M_IX86) || defined(__i386__)
+#define SIMDJSON_IS_X86_32BITS 1
+#elif defined(__arm__) || defined(_M_ARM)
+#define SIMDJSON_IS_ARM_32BITS 1
+#elif defined(__PPC__) || defined(_M_PPC)
+#define SIMDJSON_IS_PPC_32BITS 1
+#endif
+
+#endif // defined(__x86_64__) || defined(_M_AMD64)
+
+#ifdef SIMDJSON_IS_32BITS
+#ifndef SIMDJSON_NO_PORTABILITY_WARNING
+#pragma message("The simdjson library is designed \
+for 64-bit processors and it seems that you are not \
+compiling for a known 64-bit platform. All fast kernels \
+will be disabled and performance may be poor. Please \
+use a 64-bit target such as x64, 64-bit ARM or 64-bit PPC.")
+#endif // SIMDJSON_NO_PORTABILITY_WARNING
+#endif // SIMDJSON_IS_32BITS
+
+// this is almost standard?
+#undef SIMDJSON_STRINGIFY_IMPLEMENTATION_
+#undef SIMDJSON_STRINGIFY
+#define SIMDJSON_STRINGIFY_IMPLEMENTATION_(a) #a
+#define SIMDJSON_STRINGIFY(a) SIMDJSON_STRINGIFY_IMPLEMENTATION_(a)
+
+// Our fast kernels require 64-bit systems.
+//
+// On 32-bit x86, we lack 64-bit popcnt, lzcnt, blsr instructions.
+// Furthermore, the number of SIMD registers is reduced.
+//
+// On 32-bit ARM, we would have smaller registers.
+//
+// The simdjson users should still have the fallback kernel. It is
+// slower, but it should run everywhere.
+
+//
+// Enable valid runtime implementations, and select SIMDJSON_BUILTIN_IMPLEMENTATION
+//
+
+// We are going to use runtime dispatch.
+#ifdef SIMDJSON_IS_X86_64
+#ifdef __clang__
+// clang does not have GCC push pop
+// warning: clang attribute push can't be used within a namespace in clang up
+// til 8.0 so SIMDJSON_TARGET_REGION and SIMDJSON_UNTARGET_REGION must be *outside* of a
+// namespace.
+#define SIMDJSON_TARGET_REGION(T)                                                       \
+  _Pragma(SIMDJSON_STRINGIFY(                                                           \
+      clang attribute push(__attribute__((target(T))), apply_to = function)))
+#define SIMDJSON_UNTARGET_REGION _Pragma("clang attribute pop")
+#elif defined(__GNUC__)
+// GCC is easier
+#define SIMDJSON_TARGET_REGION(T)                                                       \
+  _Pragma("GCC push_options") _Pragma(SIMDJSON_STRINGIFY(GCC target(T)))
+#define SIMDJSON_UNTARGET_REGION _Pragma("GCC pop_options")
+#endif // clang then gcc
+
+#endif // x86
+
+// Default target region macros don't do anything.
+#ifndef SIMDJSON_TARGET_REGION
+#define SIMDJSON_TARGET_REGION(T)
+#define SIMDJSON_UNTARGET_REGION
+#endif
+
+// Is threading enabled?
+#if defined(_REENTRANT) || defined(_MT)
+#ifndef SIMDJSON_THREADS_ENABLED
+#define SIMDJSON_THREADS_ENABLED
+#endif
+#endif
+
+// workaround for large stack sizes under -O0.
+// https://github.com/simdjson/simdjson/issues/691
+#ifdef __APPLE__
+#ifndef __OPTIMIZE__
+// Apple systems have small stack sizes in secondary threads.
+// Lack of compiler optimization may generate high stack usage.
+// Users may want to disable threads for safety, but only when
+// in debug mode which we detect by the fact that the __OPTIMIZE__
+// macro is not defined.
+#undef SIMDJSON_THREADS_ENABLED
+#endif
+#endif
+
+
+#if defined(__clang__)
+#define SIMDJSON_NO_SANITIZE_UNDEFINED __attribute__((no_sanitize("undefined")))
+#elif defined(__GNUC__)
+#define SIMDJSON_NO_SANITIZE_UNDEFINED __attribute__((no_sanitize_undefined))
+#else
+#define SIMDJSON_NO_SANITIZE_UNDEFINED
+#endif
+
+#ifdef SIMDJSON_VISUAL_STUDIO
+// This is one case where we do not distinguish between
+// regular visual studio and clang under visual studio.
+// clang under Windows has _stricmp (like visual studio) but not strcasecmp (as clang normally has)
+#define simdjson_strcasecmp _stricmp
+#define simdjson_strncasecmp _strnicmp
+#else
+// The strcasecmp, strncasecmp, and strcasestr functions do not work with multibyte strings (e.g. UTF-8).
+// So they are only useful for ASCII in our context.
+// https://www.gnu.org/software/libunistring/manual/libunistring.html#char-_002a-strings
+#define simdjson_strcasecmp strcasecmp
+#define simdjson_strncasecmp strncasecmp
+#endif
+
+#ifdef NDEBUG
+
+#ifdef SIMDJSON_VISUAL_STUDIO
+#define SIMDJSON_UNREACHABLE() __assume(0)
+#define SIMDJSON_ASSUME(COND) __assume(COND)
+#else
+#define SIMDJSON_UNREACHABLE() __builtin_unreachable();
+#define SIMDJSON_ASSUME(COND) do { if (!(COND)) __builtin_unreachable(); } while (0)
+#endif
+
+#else // NDEBUG
+
+#define SIMDJSON_UNREACHABLE() assert(0);
+#define SIMDJSON_ASSUME(COND) assert(COND)
+
+#endif
+
+#endif // SIMDJSON_PORTABILITY_H
+/* end file include/simdjson/portability.h */
+
+namespace simdjson {
+
+namespace internal {
+/**
+ * @private
+ * Our own implementation of the C++17 to_chars function.
+ * Defined in src/to_chars
+ */
+char *to_chars(char *first, const char *last, double value);
+/**
+ * @private
+ * A number parsing routine.
+ * Defined in src/from_chars
+ */
+double from_chars(const char *first) noexcept;
+double from_chars(const char *first, const char* end) noexcept;
+
+}
+
+#ifndef SIMDJSON_EXCEPTIONS
+#if __cpp_exceptions
+#define SIMDJSON_EXCEPTIONS 1
+#else
+#define SIMDJSON_EXCEPTIONS 0
+#endif
+#endif
+
+/** The maximum document size supported by simdjson. */
+constexpr size_t SIMDJSON_MAXSIZE_BYTES = 0xFFFFFFFF;
+
+/**
+ * The amount of padding needed in a buffer to parse JSON.
+ *
+ * The input buf should be readable up to buf + SIMDJSON_PADDING
+ * this is a stopgap; there should be a better description of the
+ * main loop and its behavior that abstracts over this
+ * See https://github.com/simdjson/simdjson/issues/174
+ */
+constexpr size_t SIMDJSON_PADDING = 64;
+
+/**
+ * By default, simdjson supports this many nested objects and arrays.
+ *
+ * This is the default for parser::max_depth().
+ */
+constexpr size_t DEFAULT_MAX_DEPTH = 1024;
+
+} // namespace simdjson
+
+#if defined(__GNUC__)
+  // Marks a block with a name so that MCA analysis can see it.
+  #define SIMDJSON_BEGIN_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-BEGIN " #name);
+  #define SIMDJSON_END_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-END " #name);
+  #define SIMDJSON_DEBUG_BLOCK(name, block) BEGIN_DEBUG_BLOCK(name); block; END_DEBUG_BLOCK(name);
+#else
+  #define SIMDJSON_BEGIN_DEBUG_BLOCK(name)
+  #define SIMDJSON_END_DEBUG_BLOCK(name)
+  #define SIMDJSON_DEBUG_BLOCK(name, block)
+#endif
+
+// Align to N-byte boundary
+#define SIMDJSON_ROUNDUP_N(a, n) (((a) + ((n)-1)) & ~((n)-1))
+#define SIMDJSON_ROUNDDOWN_N(a, n) ((a) & ~((n)-1))
+
+#define SIMDJSON_ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n)-1)) == 0)
+
+#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO)
+
+  #define simdjson_really_inline __forceinline
+  #define simdjson_never_inline __declspec(noinline)
+
+  #define simdjson_unused
+  #define simdjson_warn_unused
+
+  #ifndef simdjson_likely
+  #define simdjson_likely(x) x
+  #endif
+  #ifndef simdjson_unlikely
+  #define simdjson_unlikely(x) x
+  #endif
+
+  #define SIMDJSON_PUSH_DISABLE_WARNINGS __pragma(warning( push ))
+  #define SIMDJSON_PUSH_DISABLE_ALL_WARNINGS __pragma(warning( push, 0 ))
+  #define SIMDJSON_DISABLE_VS_WARNING(WARNING_NUMBER) __pragma(warning( disable : WARNING_NUMBER ))
+  // Get rid of Intellisense-only warnings (Code Analysis)
+  // Though __has_include is C++17, it is supported in Visual Studio 2017 or better (_MSC_VER>=1910).
+  #ifdef __has_include
+  #if __has_include(<CppCoreCheck\Warnings.h>)
+  #include <CppCoreCheck\Warnings.h>
+  #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS SIMDJSON_DISABLE_VS_WARNING(ALL_CPPCORECHECK_WARNINGS)
+  #endif
+  #endif
+
+  #ifndef SIMDJSON_DISABLE_UNDESIRED_WARNINGS
+  #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS
+  #endif
+
+  #define SIMDJSON_DISABLE_DEPRECATED_WARNING SIMDJSON_DISABLE_VS_WARNING(4996)
+  #define SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING
+  #define SIMDJSON_POP_DISABLE_WARNINGS __pragma(warning( pop ))
+
+#else // SIMDJSON_REGULAR_VISUAL_STUDIO
+
+  #define simdjson_really_inline inline __attribute__((always_inline))
+  #define simdjson_never_inline inline __attribute__((noinline))
+
+  #define simdjson_unused __attribute__((unused))
+  #define simdjson_warn_unused __attribute__((warn_unused_result))
+
+  #ifndef simdjson_likely
+  #define simdjson_likely(x) __builtin_expect(!!(x), 1)
+  #endif
+  #ifndef simdjson_unlikely
+  #define simdjson_unlikely(x) __builtin_expect(!!(x), 0)
+  #endif
+
+  #define SIMDJSON_PUSH_DISABLE_WARNINGS _Pragma("GCC diagnostic push")
+  // gcc doesn't seem to disable all warnings with all and extra, add warnings here as necessary
+  // We do it separately for clang since it has different warnings.
+  #ifdef __clang__
+  // clang is missing -Wmaybe-uninitialized.
+  #define SIMDJSON_PUSH_DISABLE_ALL_WARNINGS SIMDJSON_PUSH_DISABLE_WARNINGS \
+    SIMDJSON_DISABLE_GCC_WARNING(-Weffc++) \
+    SIMDJSON_DISABLE_GCC_WARNING(-Wall) \
+    SIMDJSON_DISABLE_GCC_WARNING(-Wconversion) \
+    SIMDJSON_DISABLE_GCC_WARNING(-Wextra) \
+    SIMDJSON_DISABLE_GCC_WARNING(-Wattributes) \
+    SIMDJSON_DISABLE_GCC_WARNING(-Wimplicit-fallthrough) \
+    SIMDJSON_DISABLE_GCC_WARNING(-Wnon-virtual-dtor) \
+    SIMDJSON_DISABLE_GCC_WARNING(-Wreturn-type) \
+    SIMDJSON_DISABLE_GCC_WARNING(-Wshadow) \
+    SIMDJSON_DISABLE_GCC_WARNING(-Wunused-parameter) \
+    SIMDJSON_DISABLE_GCC_WARNING(-Wunused-variable)
+  #else // __clang__
+  #define SIMDJSON_PUSH_DISABLE_ALL_WARNINGS SIMDJSON_PUSH_DISABLE_WARNINGS \
+    SIMDJSON_DISABLE_GCC_WARNING(-Weffc++) \
+    SIMDJSON_DISABLE_GCC_WARNING(-Wall) \
+    SIMDJSON_DISABLE_GCC_WARNING(-Wconversion) \
+    SIMDJSON_DISABLE_GCC_WARNING(-Wextra) \
+    SIMDJSON_DISABLE_GCC_WARNING(-Wattributes) \
+    SIMDJSON_DISABLE_GCC_WARNING(-Wimplicit-fallthrough) \
+    SIMDJSON_DISABLE_GCC_WARNING(-Wnon-virtual-dtor) \
+    SIMDJSON_DISABLE_GCC_WARNING(-Wreturn-type) \
+    SIMDJSON_DISABLE_GCC_WARNING(-Wshadow) \
+    SIMDJSON_DISABLE_GCC_WARNING(-Wunused-parameter) \
+    SIMDJSON_DISABLE_GCC_WARNING(-Wunused-variable) \
+    SIMDJSON_DISABLE_GCC_WARNING(-Wmaybe-uninitialized)
+  #endif // __clang__
+
+  #define SIMDJSON_PRAGMA(P) _Pragma(#P)
+  #define SIMDJSON_DISABLE_GCC_WARNING(WARNING) SIMDJSON_PRAGMA(GCC diagnostic ignored #WARNING)
+  #if defined(SIMDJSON_CLANG_VISUAL_STUDIO)
+  #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS SIMDJSON_DISABLE_GCC_WARNING(-Wmicrosoft-include)
+  #else
+  #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS
+  #endif
+  #define SIMDJSON_DISABLE_DEPRECATED_WARNING SIMDJSON_DISABLE_GCC_WARNING(-Wdeprecated-declarations)
+  #define SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING SIMDJSON_DISABLE_GCC_WARNING(-Wstrict-overflow)
+  #define SIMDJSON_POP_DISABLE_WARNINGS _Pragma("GCC diagnostic pop")
+
+
+
+#endif // MSC_VER
+
+#if defined(simdjson_inline)
+  // Prefer the user's definition of simdjson_inline; don't define it ourselves.
+#elif defined(__GNUC__) && !defined(__OPTIMIZE__)
+  // If optimizations are disabled, forcing inlining can lead to significant
+  // code bloat and high compile times. Don't use simdjson_really_inline for
+  // unoptimized builds.
+  #define simdjson_inline inline
+#else
+  // Force inlining for most simdjson functions.
+  #define simdjson_inline simdjson_really_inline
+#endif
+
+#if defined(SIMDJSON_VISUAL_STUDIO)
+    /**
+     * Windows users need to do some extra work when building
+     * or using a dynamic library (DLL). When building, we need
+     * to set SIMDJSON_DLLIMPORTEXPORT to __declspec(dllexport).
+     * When *using* the DLL, the user needs to set
+     * SIMDJSON_DLLIMPORTEXPORT __declspec(dllimport).
+     *
+     * Static libraries not need require such work.
+     *
+     * It does not matter here whether you are using
+     * the regular visual studio or clang under visual
+     * studio, you still need to handle these issues.
+     *
+     * Non-Windows systems do not have this complexity.
+     */
+    #if SIMDJSON_BUILDING_WINDOWS_DYNAMIC_LIBRARY
+    // We set SIMDJSON_BUILDING_WINDOWS_DYNAMIC_LIBRARY when we build a DLL under Windows.
+    // It should never happen that both SIMDJSON_BUILDING_WINDOWS_DYNAMIC_LIBRARY and
+    // SIMDJSON_USING_WINDOWS_DYNAMIC_LIBRARY are set.
+    #define SIMDJSON_DLLIMPORTEXPORT __declspec(dllexport)
+    #elif SIMDJSON_USING_WINDOWS_DYNAMIC_LIBRARY
+    // Windows user who call a dynamic library should set SIMDJSON_USING_WINDOWS_DYNAMIC_LIBRARY to 1.
+    #define SIMDJSON_DLLIMPORTEXPORT __declspec(dllimport)
+    #else
+    // We assume by default static linkage
+    #define SIMDJSON_DLLIMPORTEXPORT
+    #endif
+
+/**
+ * Workaround for the vcpkg package manager. Only vcpkg should
+ * ever touch the next line. The SIMDJSON_USING_LIBRARY macro is otherwise unused.
+ */
+#if SIMDJSON_USING_LIBRARY
+#define SIMDJSON_DLLIMPORTEXPORT __declspec(dllimport)
+#endif
+/**
+ * End of workaround for the vcpkg package manager.
+ */
+#else
+    #define SIMDJSON_DLLIMPORTEXPORT
+#endif
+
+// C++17 requires string_view.
+#if SIMDJSON_CPLUSPLUS17
+#define SIMDJSON_HAS_STRING_VIEW
+#include <string_view> // by the standard, this has to be safe.
+#endif
+
+// This macro (__cpp_lib_string_view) has to be defined
+// for C++17 and better, but if it is otherwise defined,
+// we are going to assume that string_view is available
+// even if we do not have C++17 support.
+#ifdef __cpp_lib_string_view
+#define SIMDJSON_HAS_STRING_VIEW
+#endif
+
+// Some systems have string_view even if we do not have C++17 support,
+// and even if __cpp_lib_string_view is undefined, it is the case
+// with Apple clang version 11.
+// We must handle it. *This is important.*
+#ifndef SIMDJSON_HAS_STRING_VIEW
+#if defined __has_include
+// do not combine the next #if with the previous one (unsafe)
+#if __has_include (<string_view>)
+// now it is safe to trigger the include
+#include <string_view> // though the file is there, it does not follow that we got the implementation
+#if defined(_LIBCPP_STRING_VIEW)
+// Ah! So we under libc++ which under its Library Fundamentals Technical Specification, which preceded C++17,
+// included string_view.
+// This means that we have string_view *even though* we may not have C++17.
+#define SIMDJSON_HAS_STRING_VIEW
+#endif // _LIBCPP_STRING_VIEW
+#endif // __has_include (<string_view>)
+#endif // defined __has_include
+#endif // def SIMDJSON_HAS_STRING_VIEW
+// end of complicated but important routine to try to detect string_view.
+
+//
+// Backfill std::string_view using nonstd::string_view on systems where
+// we expect that string_view is missing. Important: if we get this wrong,
+// we will end up with two string_view definitions and potential trouble.
+// That is why we work so hard above to avoid it.
+//
+#ifndef SIMDJSON_HAS_STRING_VIEW
+SIMDJSON_PUSH_DISABLE_ALL_WARNINGS
+/* begin file include/simdjson/nonstd/string_view.hpp */
+// Copyright 2017-2020 by Martin Moene
+//
+// string-view lite, a C++17-like string_view for C++98 and later.
+// For more information see https://github.com/martinmoene/string-view-lite
+//
+// Distributed under the Boost Software License, Version 1.0.
+// (See accompanying file LICENSE.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#pragma once
+
+#ifndef NONSTD_SV_LITE_H_INCLUDED
+#define NONSTD_SV_LITE_H_INCLUDED
+
+#define string_view_lite_MAJOR  1
+#define string_view_lite_MINOR  6
+#define string_view_lite_PATCH  0
+
+#define string_view_lite_VERSION  nssv_STRINGIFY(string_view_lite_MAJOR) "." nssv_STRINGIFY(string_view_lite_MINOR) "." nssv_STRINGIFY(string_view_lite_PATCH)
+
+#define nssv_STRINGIFY(  x )  nssv_STRINGIFY_( x )
+#define nssv_STRINGIFY_( x )  #x
+
+// string-view lite configuration:
+
+#define nssv_STRING_VIEW_DEFAULT  0
+#define nssv_STRING_VIEW_NONSTD   1
+#define nssv_STRING_VIEW_STD      2
+
+// tweak header support:
+
+#ifdef __has_include
+# if __has_include(<nonstd/string_view.tweak.hpp>)
+#  include <nonstd/string_view.tweak.hpp>
+# endif
+#define nssv_HAVE_TWEAK_HEADER  1
+#else
+#define nssv_HAVE_TWEAK_HEADER  0
+//# pragma message("string_view.hpp: Note: Tweak header not supported.")
+#endif
+
+// string_view selection and configuration:
+
+#if !defined( nssv_CONFIG_SELECT_STRING_VIEW )
+# define nssv_CONFIG_SELECT_STRING_VIEW  ( nssv_HAVE_STD_STRING_VIEW ? nssv_STRING_VIEW_STD : nssv_STRING_VIEW_NONSTD )
+#endif
+
+#ifndef  nssv_CONFIG_STD_SV_OPERATOR
+# define nssv_CONFIG_STD_SV_OPERATOR  0
+#endif
+
+#ifndef  nssv_CONFIG_USR_SV_OPERATOR
+# define nssv_CONFIG_USR_SV_OPERATOR  1
+#endif
+
+#ifdef   nssv_CONFIG_CONVERSION_STD_STRING
+# define nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS   nssv_CONFIG_CONVERSION_STD_STRING
+# define nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS  nssv_CONFIG_CONVERSION_STD_STRING
+#endif
+
+#ifndef  nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS
+# define nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS  1
+#endif
+
+#ifndef  nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS
+# define nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS  1
+#endif
+
+#ifndef  nssv_CONFIG_NO_STREAM_INSERTION
+# define nssv_CONFIG_NO_STREAM_INSERTION  0
+#endif
+
+// Control presence of exception handling (try and auto discover):
+
+#ifndef nssv_CONFIG_NO_EXCEPTIONS
+# if _MSC_VER
+#  include <cstddef>    // for _HAS_EXCEPTIONS
+# endif
+# if defined(__cpp_exceptions) || defined(__EXCEPTIONS) || (_HAS_EXCEPTIONS)
+#  define nssv_CONFIG_NO_EXCEPTIONS  0
+# else
+#  define nssv_CONFIG_NO_EXCEPTIONS  1
+# endif
+#endif
+
+// C++ language version detection (C++20 is speculative):
+// Note: VC14.0/1900 (VS2015) lacks too much from C++14.
+
+#ifndef   nssv_CPLUSPLUS
+# if defined(_MSVC_LANG ) && !defined(__clang__)
+#  define nssv_CPLUSPLUS  (_MSC_VER == 1900 ? 201103L : _MSVC_LANG )
+# else
+#  define nssv_CPLUSPLUS  __cplusplus
+# endif
+#endif
+
+#define nssv_CPP98_OR_GREATER  ( nssv_CPLUSPLUS >= 199711L )
+#define nssv_CPP11_OR_GREATER  ( nssv_CPLUSPLUS >= 201103L )
+#define nssv_CPP11_OR_GREATER_ ( nssv_CPLUSPLUS >= 201103L )
+#define nssv_CPP14_OR_GREATER  ( nssv_CPLUSPLUS >= 201402L )
+#define nssv_CPP17_OR_GREATER  ( nssv_CPLUSPLUS >= 201703L )
+#define nssv_CPP20_OR_GREATER  ( nssv_CPLUSPLUS >= 202000L )
+
+// use C++17 std::string_view if available and requested:
+
+#if nssv_CPP17_OR_GREATER && defined(__has_include )
+# if __has_include( <string_view> )
+#  define nssv_HAVE_STD_STRING_VIEW  1
+# else
+#  define nssv_HAVE_STD_STRING_VIEW  0
+# endif
+#else
+# define  nssv_HAVE_STD_STRING_VIEW  0
+#endif
+
+#define  nssv_USES_STD_STRING_VIEW  ( (nssv_CONFIG_SELECT_STRING_VIEW == nssv_STRING_VIEW_STD) || ((nssv_CONFIG_SELECT_STRING_VIEW == nssv_STRING_VIEW_DEFAULT) && nssv_HAVE_STD_STRING_VIEW) )
+
+#define nssv_HAVE_STARTS_WITH ( nssv_CPP20_OR_GREATER || !nssv_USES_STD_STRING_VIEW )
+#define nssv_HAVE_ENDS_WITH     nssv_HAVE_STARTS_WITH
+
+//
+// Use C++17 std::string_view:
+//
+
+#if nssv_USES_STD_STRING_VIEW
+
+#include <string_view>
+
+// Extensions for std::string:
+
+#if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS
+
+namespace nonstd {
+
+template< class CharT, class Traits, class Allocator = std::allocator<CharT> >
+std::basic_string<CharT, Traits, Allocator>
+to_string( std::basic_string_view<CharT, Traits> v, Allocator const & a = Allocator() )
+{
+    return std::basic_string<CharT,Traits, Allocator>( v.begin(), v.end(), a );
+}
+
+template< class CharT, class Traits, class Allocator >
+std::basic_string_view<CharT, Traits>
+to_string_view( std::basic_string<CharT, Traits, Allocator> const & s )
+{
+    return std::basic_string_view<CharT, Traits>( s.data(), s.size() );
+}
+
+// Literal operators sv and _sv:
+
+#if nssv_CONFIG_STD_SV_OPERATOR
+
+using namespace std::literals::string_view_literals;
+
+#endif
+
+#if nssv_CONFIG_USR_SV_OPERATOR
+
+inline namespace literals {
+inline namespace string_view_literals {
+
+
+constexpr std::string_view operator "" _sv( const char* str, size_t len ) noexcept  // (1)
+{
+    return std::string_view{ str, len };
+}
+
+constexpr std::u16string_view operator "" _sv( const char16_t* str, size_t len ) noexcept  // (2)
+{
+    return std::u16string_view{ str, len };
+}
+
+constexpr std::u32string_view operator "" _sv( const char32_t* str, size_t len ) noexcept  // (3)
+{
+    return std::u32string_view{ str, len };
+}
+
+constexpr std::wstring_view operator "" _sv( const wchar_t* str, size_t len ) noexcept  // (4)
+{
+    return std::wstring_view{ str, len };
+}
+
+}} // namespace literals::string_view_literals
+
+#endif // nssv_CONFIG_USR_SV_OPERATOR
+
+} // namespace nonstd
+
+#endif // nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS
+
+namespace nonstd {
+
+using std::string_view;
+using std::wstring_view;
+using std::u16string_view;
+using std::u32string_view;
+using std::basic_string_view;
+
+// literal "sv" and "_sv", see above
+
+using std::operator==;
+using std::operator!=;
+using std::operator<;
+using std::operator<=;
+using std::operator>;
+using std::operator>=;
+
+using std::operator<<;
+
+} // namespace nonstd
+
+#else // nssv_HAVE_STD_STRING_VIEW
+
+//
+// Before C++17: use string_view lite:
+//
+
+// Compiler versions:
+//
+// MSVC++  6.0  _MSC_VER == 1200  nssv_COMPILER_MSVC_VERSION ==  60  (Visual Studio 6.0)
+// MSVC++  7.0  _MSC_VER == 1300  nssv_COMPILER_MSVC_VERSION ==  70  (Visual Studio .NET 2002)
+// MSVC++  7.1  _MSC_VER == 1310  nssv_COMPILER_MSVC_VERSION ==  71  (Visual Studio .NET 2003)
+// MSVC++  8.0  _MSC_VER == 1400  nssv_COMPILER_MSVC_VERSION ==  80  (Visual Studio 2005)
+// MSVC++  9.0  _MSC_VER == 1500  nssv_COMPILER_MSVC_VERSION ==  90  (Visual Studio 2008)
+// MSVC++ 10.0  _MSC_VER == 1600  nssv_COMPILER_MSVC_VERSION == 100  (Visual Studio 2010)
+// MSVC++ 11.0  _MSC_VER == 1700  nssv_COMPILER_MSVC_VERSION == 110  (Visual Studio 2012)
+// MSVC++ 12.0  _MSC_VER == 1800  nssv_COMPILER_MSVC_VERSION == 120  (Visual Studio 2013)
+// MSVC++ 14.0  _MSC_VER == 1900  nssv_COMPILER_MSVC_VERSION == 140  (Visual Studio 2015)
+// MSVC++ 14.1  _MSC_VER >= 1910  nssv_COMPILER_MSVC_VERSION == 141  (Visual Studio 2017)
+// MSVC++ 14.2  _MSC_VER >= 1920  nssv_COMPILER_MSVC_VERSION == 142  (Visual Studio 2019)
+
+#if defined(_MSC_VER ) && !defined(__clang__)
+# define nssv_COMPILER_MSVC_VER      (_MSC_VER )
+# define nssv_COMPILER_MSVC_VERSION  (_MSC_VER / 10 - 10 * ( 5 + (_MSC_VER < 1900 ) ) )
+#else
+# define nssv_COMPILER_MSVC_VER      0
+# define nssv_COMPILER_MSVC_VERSION  0
+#endif
+
+#define nssv_COMPILER_VERSION( major, minor, patch )  ( 10 * ( 10 * (major) + (minor) ) + (patch) )
+
+#if defined( __apple_build_version__ )
+# define nssv_COMPILER_APPLECLANG_VERSION  nssv_COMPILER_VERSION(__clang_major__, __clang_minor__, __clang_patchlevel__)
+# define nssv_COMPILER_CLANG_VERSION       0
+#elif defined( __clang__ )
+# define nssv_COMPILER_APPLECLANG_VERSION  0
+# define nssv_COMPILER_CLANG_VERSION       nssv_COMPILER_VERSION(__clang_major__, __clang_minor__, __clang_patchlevel__)
+#else
+# define nssv_COMPILER_APPLECLANG_VERSION  0
+# define nssv_COMPILER_CLANG_VERSION       0
+#endif
+
+#if defined(__GNUC__) && !defined(__clang__)
+# define nssv_COMPILER_GNUC_VERSION  nssv_COMPILER_VERSION(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__)
+#else
+# define nssv_COMPILER_GNUC_VERSION  0
+#endif
+
+// half-open range [lo..hi):
+#define nssv_BETWEEN( v, lo, hi ) ( (lo) <= (v) && (v) < (hi) )
+
+// Presence of language and library features:
+
+#ifdef _HAS_CPP0X
+# define nssv_HAS_CPP0X  _HAS_CPP0X
+#else
+# define nssv_HAS_CPP0X  0
+#endif
+
+// Unless defined otherwise below, consider VC14 as C++11 for variant-lite:
+
+#if nssv_COMPILER_MSVC_VER >= 1900
+# undef  nssv_CPP11_OR_GREATER
+# define nssv_CPP11_OR_GREATER  1
+#endif
+
+#define nssv_CPP11_90   (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1500)
+#define nssv_CPP11_100  (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1600)
+#define nssv_CPP11_110  (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1700)
+#define nssv_CPP11_120  (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1800)
+#define nssv_CPP11_140  (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1900)
+#define nssv_CPP11_141  (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1910)
+
+#define nssv_CPP14_000  (nssv_CPP14_OR_GREATER)
+#define nssv_CPP17_000  (nssv_CPP17_OR_GREATER)
+
+// Presence of C++11 language features:
+
+#define nssv_HAVE_CONSTEXPR_11          nssv_CPP11_140
+#define nssv_HAVE_EXPLICIT_CONVERSION   nssv_CPP11_140
+#define nssv_HAVE_INLINE_NAMESPACE      nssv_CPP11_140
+#define nssv_HAVE_NOEXCEPT              nssv_CPP11_140
+#define nssv_HAVE_NULLPTR               nssv_CPP11_100
+#define nssv_HAVE_REF_QUALIFIER         nssv_CPP11_140
+#define nssv_HAVE_UNICODE_LITERALS      nssv_CPP11_140
+#define nssv_HAVE_USER_DEFINED_LITERALS nssv_CPP11_140
+#define nssv_HAVE_WCHAR16_T             nssv_CPP11_100
+#define nssv_HAVE_WCHAR32_T             nssv_CPP11_100
+
+#if ! ( ( nssv_CPP11_OR_GREATER && nssv_COMPILER_CLANG_VERSION ) || nssv_BETWEEN( nssv_COMPILER_CLANG_VERSION, 300, 400 ) )
+# define nssv_HAVE_STD_DEFINED_LITERALS  nssv_CPP11_140
+#else
+# define nssv_HAVE_STD_DEFINED_LITERALS  0
+#endif
+
+// Presence of C++14 language features:
+
+#define nssv_HAVE_CONSTEXPR_14          nssv_CPP14_000
+
+// Presence of C++17 language features:
+
+#define nssv_HAVE_NODISCARD             nssv_CPP17_000
+
+// Presence of C++ library features:
+
+#define nssv_HAVE_STD_HASH              nssv_CPP11_120
+
+// Presence of compiler intrinsics:
+
+// Providing char-type specializations for compare() and length() that
+// use compiler intrinsics can improve compile- and run-time performance.
+//
+// The challenge is in using the right combinations of builtin availability
+// and its constexpr-ness.
+//
+// | compiler | __builtin_memcmp (constexpr) | memcmp  (constexpr) |
+// |----------|------------------------------|---------------------|
+// | clang    | 4.0              (>= 4.0   ) | any     (?        ) |
+// | clang-a  | 9.0              (>= 9.0   ) | any     (?        ) |
+// | gcc      | any              (constexpr) | any     (?        ) |
+// | msvc     | >= 14.2 C++17    (>= 14.2  ) | any     (?        ) |
+
+#define nssv_HAVE_BUILTIN_VER     ( (nssv_CPP17_000 && nssv_COMPILER_MSVC_VERSION >= 142) || nssv_COMPILER_GNUC_VERSION > 0 || nssv_COMPILER_CLANG_VERSION >= 400 || nssv_COMPILER_APPLECLANG_VERSION >= 900 )
+#define nssv_HAVE_BUILTIN_CE      (  nssv_HAVE_BUILTIN_VER )
+
+#define nssv_HAVE_BUILTIN_MEMCMP  ( (nssv_HAVE_CONSTEXPR_14 && nssv_HAVE_BUILTIN_CE) || !nssv_HAVE_CONSTEXPR_14 )
+#define nssv_HAVE_BUILTIN_STRLEN  ( (nssv_HAVE_CONSTEXPR_11 && nssv_HAVE_BUILTIN_CE) || !nssv_HAVE_CONSTEXPR_11 )
+
+#ifdef __has_builtin
+# define nssv_HAVE_BUILTIN( x )  __has_builtin( x )
+#else
+# define nssv_HAVE_BUILTIN( x )  0
+#endif
+
+#if nssv_HAVE_BUILTIN(__builtin_memcmp) || nssv_HAVE_BUILTIN_VER
+# define nssv_BUILTIN_MEMCMP  __builtin_memcmp
+#else
+# define nssv_BUILTIN_MEMCMP  memcmp
+#endif
+
+#if nssv_HAVE_BUILTIN(__builtin_strlen) || nssv_HAVE_BUILTIN_VER
+# define nssv_BUILTIN_STRLEN  __builtin_strlen
+#else
+# define nssv_BUILTIN_STRLEN  strlen
+#endif
+
+// C++ feature usage:
+
+#if nssv_HAVE_CONSTEXPR_11
+# define nssv_constexpr  constexpr
+#else
+# define nssv_constexpr  /*constexpr*/
+#endif
+
+#if  nssv_HAVE_CONSTEXPR_14
+# define nssv_constexpr14  constexpr
+#else
+# define nssv_constexpr14  /*constexpr*/
+#endif
+
+#if nssv_HAVE_EXPLICIT_CONVERSION
+# define nssv_explicit  explicit
+#else
+# define nssv_explicit  /*explicit*/
+#endif
+
+#if nssv_HAVE_INLINE_NAMESPACE
+# define nssv_inline_ns  inline
+#else
+# define nssv_inline_ns  /*inline*/
+#endif
+
+#if nssv_HAVE_NOEXCEPT
+# define nssv_noexcept  noexcept
+#else
+# define nssv_noexcept  /*noexcept*/
+#endif
+
+//#if nssv_HAVE_REF_QUALIFIER
+//# define nssv_ref_qual  &
+//# define nssv_refref_qual  &&
+//#else
+//# define nssv_ref_qual  /*&*/
+//# define nssv_refref_qual  /*&&*/
+//#endif
+
+#if nssv_HAVE_NULLPTR
+# define nssv_nullptr  nullptr
+#else
+# define nssv_nullptr  NULL
+#endif
+
+#if nssv_HAVE_NODISCARD
+# define nssv_nodiscard  [[nodiscard]]
+#else
+# define nssv_nodiscard  /*[[nodiscard]]*/
+#endif
+
+// Additional includes:
+
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <limits>
+#include <string>   // std::char_traits<>
+
+#if ! nssv_CONFIG_NO_STREAM_INSERTION
+# include <ostream>
+#endif
+
+#if ! nssv_CONFIG_NO_EXCEPTIONS
+# include <stdexcept>
+#endif
+
+#if nssv_CPP11_OR_GREATER
+# include <type_traits>
+#endif
+
+// Clang, GNUC, MSVC warning suppression macros:
+
+#if defined(__clang__)
+# pragma clang diagnostic ignored "-Wreserved-user-defined-literal"
+# pragma clang diagnostic push
+# pragma clang diagnostic ignored "-Wuser-defined-literals"
+#elif defined(__GNUC__)
+# pragma  GCC  diagnostic push
+# pragma  GCC  diagnostic ignored "-Wliteral-suffix"
+#endif // __clang__
+
+#if nssv_COMPILER_MSVC_VERSION >= 140
+# define nssv_SUPPRESS_MSGSL_WARNING(expr)        [[gsl::suppress(expr)]]
+# define nssv_SUPPRESS_MSVC_WARNING(code, descr)  __pragma(warning(suppress: code) )
+# define nssv_DISABLE_MSVC_WARNINGS(codes)        __pragma(warning(push))  __pragma(warning(disable: codes))
+#else
+# define nssv_SUPPRESS_MSGSL_WARNING(expr)
+# define nssv_SUPPRESS_MSVC_WARNING(code, descr)
+# define nssv_DISABLE_MSVC_WARNINGS(codes)
+#endif
+
+#if defined(__clang__)
+# define nssv_RESTORE_WARNINGS()  _Pragma("clang diagnostic pop")
+#elif defined(__GNUC__)
+# define nssv_RESTORE_WARNINGS()  _Pragma("GCC diagnostic pop")
+#elif nssv_COMPILER_MSVC_VERSION >= 140
+# define nssv_RESTORE_WARNINGS()  __pragma(warning(pop ))
+#else
+# define nssv_RESTORE_WARNINGS()
+#endif
+
+// Suppress the following MSVC (GSL) warnings:
+// - C4455, non-gsl   : 'operator ""sv': literal suffix identifiers that do not
+//                      start with an underscore are reserved
+// - C26472, gsl::t.1 : don't use a static_cast for arithmetic conversions;
+//                      use brace initialization, gsl::narrow_cast or gsl::narow
+// - C26481: gsl::b.1 : don't use pointer arithmetic. Use span instead
+
+nssv_DISABLE_MSVC_WARNINGS( 4455 26481 26472 )
+//nssv_DISABLE_CLANG_WARNINGS( "-Wuser-defined-literals" )
+//nssv_DISABLE_GNUC_WARNINGS( -Wliteral-suffix )
+
+namespace nonstd { namespace sv_lite {
+
+namespace detail {
+
+// support constexpr comparison in C++14;
+// for C++17 and later, use provided traits:
+
+template< typename CharT >
+inline nssv_constexpr14 int compare( CharT const * s1, CharT const * s2, std::size_t count )
+{
+    while ( count-- != 0 )
+    {
+        if ( *s1 < *s2 ) return -1;
+        if ( *s1 > *s2 ) return +1;
+        ++s1; ++s2;
+    }
+    return 0;
+}
+
+#if nssv_HAVE_BUILTIN_MEMCMP
+
+// specialization of compare() for char, see also generic compare() above:
+
+inline nssv_constexpr14 int compare( char const * s1, char const * s2, std::size_t count )
+{
+    return nssv_BUILTIN_MEMCMP( s1, s2, count );
+}
+
+#endif
+
+#if nssv_HAVE_BUILTIN_STRLEN
+
+// specialization of length() for char, see also generic length() further below:
+
+inline nssv_constexpr std::size_t length( char const * s )
+{
+    return nssv_BUILTIN_STRLEN( s );
+}
+
+#endif
+
+#if defined(__OPTIMIZE__)
+
+// gcc, clang provide __OPTIMIZE__
+// Expect tail call optimization to make length() non-recursive:
+
+template< typename CharT >
+inline nssv_constexpr std::size_t length( CharT * s, std::size_t result = 0 )
+{
+    return *s == '\0' ? result : length( s + 1, result + 1 );
+}
+
+#else // OPTIMIZE
+
+// non-recursive:
+
+template< typename CharT >
+inline nssv_constexpr14 std::size_t length( CharT * s )
+{
+    std::size_t result = 0;
+    while ( *s++ != '\0' )
+    {
+       ++result;
+    }
+    return result;
+}
+
+#endif // OPTIMIZE
+
+} // namespace detail
+
+template
+<
+    class CharT,
+    class Traits = std::char_traits<CharT>
+>
+class basic_string_view;
+
+//
+// basic_string_view:
+//
+
+template
+<
+    class CharT,
+    class Traits /* = std::char_traits<CharT> */
+>
+class basic_string_view
+{
+public:
+    // Member types:
+
+    typedef Traits traits_type;
+    typedef CharT  value_type;
+
+    typedef CharT       * pointer;
+    typedef CharT const * const_pointer;
+    typedef CharT       & reference;
+    typedef CharT const & const_reference;
+
+    typedef const_pointer iterator;
+    typedef const_pointer const_iterator;
+    typedef std::reverse_iterator< const_iterator > reverse_iterator;
+    typedef	std::reverse_iterator< const_iterator > const_reverse_iterator;
+
+    typedef std::size_t     size_type;
+    typedef std::ptrdiff_t  difference_type;
+
+    // 24.4.2.1 Construction and assignment:
+
+    nssv_constexpr basic_string_view() nssv_noexcept
+        : data_( nssv_nullptr )
+        , size_( 0 )
+    {}
+
+#if nssv_CPP11_OR_GREATER
+    nssv_constexpr basic_string_view( basic_string_view const & other ) nssv_noexcept = default;
+#else
+    nssv_constexpr basic_string_view( basic_string_view const & other ) nssv_noexcept
+        : data_( other.data_)
+        , size_( other.size_)
+    {}
+#endif
+
+    nssv_constexpr basic_string_view( CharT const * s, size_type count ) nssv_noexcept // non-standard noexcept
+        : data_( s )
+        , size_( count )
+    {}
+
+    nssv_constexpr basic_string_view( CharT const * s) nssv_noexcept // non-standard noexcept
+        : data_( s )
+#if nssv_CPP17_OR_GREATER
+        , size_( Traits::length(s) )
+#elif nssv_CPP11_OR_GREATER
+        , size_( detail::length(s) )
+#else
+        , size_( Traits::length(s) )
+#endif
+    {}
+
+    // Assignment:
+
+#if nssv_CPP11_OR_GREATER
+    nssv_constexpr14 basic_string_view & operator=( basic_string_view const & other ) nssv_noexcept = default;
+#else
+    nssv_constexpr14 basic_string_view & operator=( basic_string_view const & other ) nssv_noexcept
+    {
+        data_ = other.data_;
+        size_ = other.size_;
+        return *this;
+    }
+#endif
+
+    // 24.4.2.2 Iterator support:
+
+    nssv_constexpr const_iterator begin()  const nssv_noexcept { return data_;         }
+    nssv_constexpr const_iterator end()    const nssv_noexcept { return data_ + size_; }
+
+    nssv_constexpr const_iterator cbegin() const nssv_noexcept { return begin(); }
+    nssv_constexpr const_iterator cend()   const nssv_noexcept { return end();   }
+
+    nssv_constexpr const_reverse_iterator rbegin()  const nssv_noexcept { return const_reverse_iterator( end() );   }
+    nssv_constexpr const_reverse_iterator rend()    const nssv_noexcept { return const_reverse_iterator( begin() ); }
+
+    nssv_constexpr const_reverse_iterator crbegin() const nssv_noexcept { return rbegin(); }
+    nssv_constexpr const_reverse_iterator crend()   const nssv_noexcept { return rend();   }
+
+    // 24.4.2.3 Capacity:
+
+    nssv_constexpr size_type size()     const nssv_noexcept { return size_; }
+    nssv_constexpr size_type length()   const nssv_noexcept { return size_; }
+    nssv_constexpr size_type max_size() const nssv_noexcept { return (std::numeric_limits< size_type >::max)(); }
+
+    // since C++20
+    nssv_nodiscard nssv_constexpr bool empty() const nssv_noexcept
+    {
+        return 0 == size_;
+    }
+
+    // 24.4.2.4 Element access:
+
+    nssv_constexpr const_reference operator[]( size_type pos ) const
+    {
+        return data_at( pos );
+    }
+
+    nssv_constexpr14 const_reference at( size_type pos ) const
+    {
+#if nssv_CONFIG_NO_EXCEPTIONS
+        assert( pos < size() );
+#else
+        if ( pos >= size() )
+        {
+            throw std::out_of_range("nonstd::string_view::at()");
+        }
+#endif
+        return data_at( pos );
+    }
+
+    nssv_constexpr const_reference front() const { return data_at( 0 );          }
+    nssv_constexpr const_reference back()  const { return data_at( size() - 1 ); }
+
+    nssv_constexpr const_pointer   data()  const nssv_noexcept { return data_; }
+
+    // 24.4.2.5 Modifiers:
+
+    nssv_constexpr14 void remove_prefix( size_type n )
+    {
+        assert( n <= size() );
+        data_ += n;
+        size_ -= n;
+    }
+
+    nssv_constexpr14 void remove_suffix( size_type n )
+    {
+        assert( n <= size() );
+        size_ -= n;
+    }
+
+    nssv_constexpr14 void swap( basic_string_view & other ) nssv_noexcept
+    {
+        const basic_string_view tmp(other);
+        other = *this;
+        *this = tmp;
+    }
+
+    // 24.4.2.6 String operations:
+
+    size_type copy( CharT * dest, size_type n, size_type pos = 0 ) const
+    {
+#if nssv_CONFIG_NO_EXCEPTIONS
+        assert( pos <= size() );
+#else
+        if ( pos > size() )
+        {
+            throw std::out_of_range("nonstd::string_view::copy()");
+        }
+#endif
+        const size_type rlen = (std::min)( n, size() - pos );
+
+        (void) Traits::copy( dest, data() + pos, rlen );
+
+        return rlen;
+    }
+
+    nssv_constexpr14 basic_string_view substr( size_type pos = 0, size_type n = npos ) const
+    {
+#if nssv_CONFIG_NO_EXCEPTIONS
+        assert( pos <= size() );
+#else
+        if ( pos > size() )
+        {
+            throw std::out_of_range("nonstd::string_view::substr()");
+        }
+#endif
+        return basic_string_view( data() + pos, (std::min)( n, size() - pos ) );
+    }
+
+    // compare(), 6x:
+
+    nssv_constexpr14 int compare( basic_string_view other ) const nssv_noexcept // (1)
+    {
+#if nssv_CPP17_OR_GREATER
+        if ( const int result = Traits::compare( data(), other.data(), (std::min)( size(), other.size() ) ) )
+#else
+        if ( const int result = detail::compare( data(), other.data(), (std::min)( size(), other.size() ) ) )
+#endif
+        {
+            return result;
+        }
+
+        return size() == other.size() ? 0 : size() < other.size() ? -1 : 1;
+    }
+
+    nssv_constexpr int compare( size_type pos1, size_type n1, basic_string_view other ) const // (2)
+    {
+        return substr( pos1, n1 ).compare( other );
+    }
+
+    nssv_constexpr int compare( size_type pos1, size_type n1, basic_string_view other, size_type pos2, size_type n2 ) const // (3)
+    {
+        return substr( pos1, n1 ).compare( other.substr( pos2, n2 ) );
+    }
+
+    nssv_constexpr int compare( CharT const * s ) const // (4)
+    {
+        return compare( basic_string_view( s ) );
+    }
+
+    nssv_constexpr int compare( size_type pos1, size_type n1, CharT const * s ) const // (5)
+    {
+        return substr( pos1, n1 ).compare( basic_string_view( s ) );
+    }
+
+    nssv_constexpr int compare( size_type pos1, size_type n1, CharT const * s, size_type n2 ) const // (6)
+    {
+        return substr( pos1, n1 ).compare( basic_string_view( s, n2 ) );
+    }
+
+    // 24.4.2.7 Searching:
+
+    // starts_with(), 3x, since C++20:
+
+    nssv_constexpr bool starts_with( basic_string_view v ) const nssv_noexcept  // (1)
+    {
+        return size() >= v.size() && compare( 0, v.size(), v ) == 0;
+    }
+
+    nssv_constexpr bool starts_with( CharT c ) const nssv_noexcept  // (2)
+    {
+        return starts_with( basic_string_view( &c, 1 ) );
+    }
+
+    nssv_constexpr bool starts_with( CharT const * s ) const  // (3)
+    {
+        return starts_with( basic_string_view( s ) );
+    }
+
+    // ends_with(), 3x, since C++20:
+
+    nssv_constexpr bool ends_with( basic_string_view v ) const nssv_noexcept  // (1)
+    {
+        return size() >= v.size() && compare( size() - v.size(), npos, v ) == 0;
+    }
+
+    nssv_constexpr bool ends_with( CharT c ) const nssv_noexcept  // (2)
+    {
+        return ends_with( basic_string_view( &c, 1 ) );
+    }
+
+    nssv_constexpr bool ends_with( CharT const * s ) const  // (3)
+    {
+        return ends_with( basic_string_view( s ) );
+    }
+
+    // find(), 4x:
+
+    nssv_constexpr14 size_type find( basic_string_view v, size_type pos = 0 ) const nssv_noexcept  // (1)
+    {
+        return assert( v.size() == 0 || v.data() != nssv_nullptr )
+            , pos >= size()
+            ? npos
+            : to_pos( std::search( cbegin() + pos, cend(), v.cbegin(), v.cend(), Traits::eq ) );
+    }
+
+    nssv_constexpr14 size_type find( CharT c, size_type pos = 0 ) const nssv_noexcept  // (2)
+    {
+        return find( basic_string_view( &c, 1 ), pos );
+    }
+
+    nssv_constexpr14 size_type find( CharT const * s, size_type pos, size_type n ) const  // (3)
+    {
+        return find( basic_string_view( s, n ), pos );
+    }
+
+    nssv_constexpr14 size_type find( CharT const * s, size_type pos = 0 ) const  // (4)
+    {
+        return find( basic_string_view( s ), pos );
+    }
+
+    // rfind(), 4x:
+
+    nssv_constexpr14 size_type rfind( basic_string_view v, size_type pos = npos ) const nssv_noexcept  // (1)
+    {
+        if ( size() < v.size() )
+        {
+            return npos;
+        }
+
+        if ( v.empty() )
+        {
+            return (std::min)( size(), pos );
+        }
+
+        const_iterator last   = cbegin() + (std::min)( size() - v.size(), pos ) + v.size();
+        const_iterator result = std::find_end( cbegin(), last, v.cbegin(), v.cend(), Traits::eq );
+
+        return result != last ? size_type( result - cbegin() ) : npos;
+    }
+
+    nssv_constexpr14 size_type rfind( CharT c, size_type pos = npos ) const nssv_noexcept  // (2)
+    {
+        return rfind( basic_string_view( &c, 1 ), pos );
+    }
+
+    nssv_constexpr14 size_type rfind( CharT const * s, size_type pos, size_type n ) const  // (3)
+    {
+        return rfind( basic_string_view( s, n ), pos );
+    }
+
+    nssv_constexpr14 size_type rfind( CharT const * s, size_type pos = npos ) const  // (4)
+    {
+        return rfind( basic_string_view( s ), pos );
+    }
+
+    // find_first_of(), 4x:
+
+    nssv_constexpr size_type find_first_of( basic_string_view v, size_type pos = 0 ) const nssv_noexcept  // (1)
+    {
+        return pos >= size()
+            ? npos
+            : to_pos( std::find_first_of( cbegin() + pos, cend(), v.cbegin(), v.cend(), Traits::eq ) );
+    }
+
+    nssv_constexpr size_type find_first_of( CharT c, size_type pos = 0 ) const nssv_noexcept  // (2)
+    {
+        return find_first_of( basic_string_view( &c, 1 ), pos );
+    }
+
+    nssv_constexpr size_type find_first_of( CharT const * s, size_type pos, size_type n ) const  // (3)
+    {
+        return find_first_of( basic_string_view( s, n ), pos );
+    }
+
+    nssv_constexpr size_type find_first_of(  CharT const * s, size_type pos = 0 ) const  // (4)
+    {
+        return find_first_of( basic_string_view( s ), pos );
+    }
+
+    // find_last_of(), 4x:
+
+    nssv_constexpr size_type find_last_of( basic_string_view v, size_type pos = npos ) const nssv_noexcept  // (1)
+    {
+        return empty()
+            ? npos
+            : pos >= size()
+            ? find_last_of( v, size() - 1 )
+            : to_pos( std::find_first_of( const_reverse_iterator( cbegin() + pos + 1 ), crend(), v.cbegin(), v.cend(), Traits::eq ) );
+    }
+
+    nssv_constexpr size_type find_last_of( CharT c, size_type pos = npos ) const nssv_noexcept  // (2)
+    {
+        return find_last_of( basic_string_view( &c, 1 ), pos );
+    }
+
+    nssv_constexpr size_type find_last_of( CharT const * s, size_type pos, size_type count ) const  // (3)
+    {
+        return find_last_of( basic_string_view( s, count ), pos );
+    }
+
+    nssv_constexpr size_type find_last_of( CharT const * s, size_type pos = npos ) const  // (4)
+    {
+        return find_last_of( basic_string_view( s ), pos );
+    }
+
+    // find_first_not_of(), 4x:
+
+    nssv_constexpr size_type find_first_not_of( basic_string_view v, size_type pos = 0 ) const nssv_noexcept  // (1)
+    {
+        return pos >= size()
+            ? npos
+            : to_pos( std::find_if( cbegin() + pos, cend(), not_in_view( v ) ) );
+    }
+
+    nssv_constexpr size_type find_first_not_of( CharT c, size_type pos = 0 ) const nssv_noexcept  // (2)
+    {
+        return find_first_not_of( basic_string_view( &c, 1 ), pos );
+    }
+
+    nssv_constexpr size_type find_first_not_of( CharT const * s, size_type pos, size_type count ) const  // (3)
+    {
+        return find_first_not_of( basic_string_view( s, count ), pos );
+    }
+
+    nssv_constexpr size_type find_first_not_of( CharT const * s, size_type pos = 0 ) const  // (4)
+    {
+        return find_first_not_of( basic_string_view( s ), pos );
+    }
+
+    // find_last_not_of(), 4x:
+
+    nssv_constexpr size_type find_last_not_of( basic_string_view v, size_type pos = npos ) const nssv_noexcept  // (1)
+    {
+        return empty()
+            ? npos
+            : pos >= size()
+            ? find_last_not_of( v, size() - 1 )
+            : to_pos( std::find_if( const_reverse_iterator( cbegin() + pos + 1 ), crend(), not_in_view( v ) ) );
+    }
+
+    nssv_constexpr size_type find_last_not_of( CharT c, size_type pos = npos ) const nssv_noexcept  // (2)
+    {
+        return find_last_not_of( basic_string_view( &c, 1 ), pos );
+    }
+
+    nssv_constexpr size_type find_last_not_of( CharT const * s, size_type pos, size_type count ) const  // (3)
+    {
+        return find_last_not_of( basic_string_view( s, count ), pos );
+    }
+
+    nssv_constexpr size_type find_last_not_of( CharT const * s, size_type pos = npos ) const  // (4)
+    {
+        return find_last_not_of( basic_string_view( s ), pos );
+    }
+
+    // Constants:
+
+#if nssv_CPP17_OR_GREATER
+    static nssv_constexpr size_type npos = size_type(-1);
+#elif nssv_CPP11_OR_GREATER
+    enum : size_type { npos = size_type(-1) };
+#else
+    enum { npos = size_type(-1) };
+#endif
+
+private:
+    struct not_in_view
+    {
+        const basic_string_view v;
+
+        nssv_constexpr explicit not_in_view( basic_string_view v_ ) : v( v_ ) {}
+
+        nssv_constexpr bool operator()( CharT c ) const
+        {
+            return npos == v.find_first_of( c );
+        }
+    };
+
+    nssv_constexpr size_type to_pos( const_iterator it ) const
+    {
+        return it == cend() ? npos : size_type( it - cbegin() );
+    }
+
+    nssv_constexpr size_type to_pos( const_reverse_iterator it ) const
+    {
+        return it == crend() ? npos : size_type( crend() - it - 1 );
+    }
+
+    nssv_constexpr const_reference data_at( size_type pos ) const
+    {
+#if nssv_BETWEEN( nssv_COMPILER_GNUC_VERSION, 1, 500 )
+        return data_[pos];
+#else
+        return assert( pos < size() ), data_[pos];
+#endif
+    }
+
+private:
+    const_pointer data_;
+    size_type     size_;
+
+public:
+#if nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS
+
+    template< class Allocator >
+    basic_string_view( std::basic_string<CharT, Traits, Allocator> const & s ) nssv_noexcept
+        : data_( s.data() )
+        , size_( s.size() )
+    {}
+
+#if nssv_HAVE_EXPLICIT_CONVERSION
+
+    template< class Allocator >
+    explicit operator std::basic_string<CharT, Traits, Allocator>() const
+    {
+        return to_string( Allocator() );
+    }
+
+#endif // nssv_HAVE_EXPLICIT_CONVERSION
+
+#if nssv_CPP11_OR_GREATER
+
+    template< class Allocator = std::allocator<CharT> >
+    std::basic_string<CharT, Traits, Allocator>
+    to_string( Allocator const & a = Allocator() ) const
+    {
+        return std::basic_string<CharT, Traits, Allocator>( begin(), end(), a );
+    }
+
+#else
+
+    std::basic_string<CharT, Traits>
+    to_string() const
+    {
+        return std::basic_string<CharT, Traits>( begin(), end() );
+    }
+
+    template< class Allocator >
+    std::basic_string<CharT, Traits, Allocator>
+    to_string( Allocator const & a ) const
+    {
+        return std::basic_string<CharT, Traits, Allocator>( begin(), end(), a );
+    }
+
+#endif // nssv_CPP11_OR_GREATER
+
+#endif // nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS
+};
+
+//
+// Non-member functions:
+//
+
+// 24.4.3 Non-member comparison functions:
+// lexicographically compare two string views (function template):
+
+template< class CharT, class Traits >
+nssv_constexpr bool operator== (
+    basic_string_view <CharT, Traits> lhs,
+    basic_string_view <CharT, Traits> rhs ) nssv_noexcept
+{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; }
+
+template< class CharT, class Traits >
+nssv_constexpr bool operator!= (
+    basic_string_view <CharT, Traits> lhs,
+    basic_string_view <CharT, Traits> rhs ) nssv_noexcept
+{ return !( lhs == rhs ); }
+
+template< class CharT, class Traits >
+nssv_constexpr bool operator< (
+    basic_string_view <CharT, Traits> lhs,
+    basic_string_view <CharT, Traits> rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) < 0; }
+
+template< class CharT, class Traits >
+nssv_constexpr bool operator<= (
+    basic_string_view <CharT, Traits> lhs,
+    basic_string_view <CharT, Traits> rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) <= 0; }
+
+template< class CharT, class Traits >
+nssv_constexpr bool operator> (
+    basic_string_view <CharT, Traits> lhs,
+    basic_string_view <CharT, Traits> rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) > 0; }
+
+template< class CharT, class Traits >
+nssv_constexpr bool operator>= (
+    basic_string_view <CharT, Traits> lhs,
+    basic_string_view <CharT, Traits> rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) >= 0; }
+
+// Let S be basic_string_view<CharT, Traits>, and sv be an instance of S.
+// Implementations shall provide sufficient additional overloads marked
+// constexpr and noexcept so that an object t with an implicit conversion
+// to S can be compared according to Table 67.
+
+#if ! nssv_CPP11_OR_GREATER || nssv_BETWEEN( nssv_COMPILER_MSVC_VERSION, 100, 141 )
+
+// accommodate for older compilers:
+
+// ==
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator==(
+    basic_string_view<CharT, Traits> lhs,
+    CharT const * rhs ) nssv_noexcept
+{ return lhs.size() == detail::length( rhs ) && lhs.compare( rhs ) == 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator==(
+    CharT const * lhs,
+    basic_string_view<CharT, Traits> rhs ) nssv_noexcept
+{ return detail::length( lhs ) == rhs.size() && rhs.compare( lhs ) == 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator==(
+    basic_string_view<CharT, Traits> lhs,
+    std::basic_string<CharT, Traits> rhs ) nssv_noexcept
+{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator==(
+    std::basic_string<CharT, Traits> rhs,
+    basic_string_view<CharT, Traits> lhs ) nssv_noexcept
+{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; }
+
+// !=
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator!=(
+    basic_string_view<CharT, Traits> lhs,
+    CharT const * rhs ) nssv_noexcept
+{ return !( lhs == rhs ); }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator!=(
+    CharT const * lhs,
+    basic_string_view<CharT, Traits> rhs ) nssv_noexcept
+{ return !( lhs == rhs ); }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator!=(
+    basic_string_view<CharT, Traits> lhs,
+    std::basic_string<CharT, Traits> rhs ) nssv_noexcept
+{ return !( lhs == rhs ); }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator!=(
+    std::basic_string<CharT, Traits> rhs,
+    basic_string_view<CharT, Traits> lhs ) nssv_noexcept
+{ return !( lhs == rhs ); }
+
+// <
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator<(
+    basic_string_view<CharT, Traits> lhs,
+    CharT const * rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) < 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator<(
+    CharT const * lhs,
+    basic_string_view<CharT, Traits> rhs ) nssv_noexcept
+{ return rhs.compare( lhs ) > 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator<(
+    basic_string_view<CharT, Traits> lhs,
+    std::basic_string<CharT, Traits> rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) < 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator<(
+    std::basic_string<CharT, Traits> rhs,
+    basic_string_view<CharT, Traits> lhs ) nssv_noexcept
+{ return rhs.compare( lhs ) > 0; }
+
+// <=
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator<=(
+    basic_string_view<CharT, Traits> lhs,
+    CharT const * rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) <= 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator<=(
+    CharT const * lhs,
+    basic_string_view<CharT, Traits> rhs ) nssv_noexcept
+{ return rhs.compare( lhs ) >= 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator<=(
+    basic_string_view<CharT, Traits> lhs,
+    std::basic_string<CharT, Traits> rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) <= 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator<=(
+    std::basic_string<CharT, Traits> rhs,
+    basic_string_view<CharT, Traits> lhs ) nssv_noexcept
+{ return rhs.compare( lhs ) >= 0; }
+
+// >
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator>(
+    basic_string_view<CharT, Traits> lhs,
+    CharT const * rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) > 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator>(
+    CharT const * lhs,
+    basic_string_view<CharT, Traits> rhs ) nssv_noexcept
+{ return rhs.compare( lhs ) < 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator>(
+    basic_string_view<CharT, Traits> lhs,
+    std::basic_string<CharT, Traits> rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) > 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator>(
+    std::basic_string<CharT, Traits> rhs,
+    basic_string_view<CharT, Traits> lhs ) nssv_noexcept
+{ return rhs.compare( lhs ) < 0; }
+
+// >=
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator>=(
+    basic_string_view<CharT, Traits> lhs,
+    CharT const * rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) >= 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator>=(
+    CharT const * lhs,
+    basic_string_view<CharT, Traits> rhs ) nssv_noexcept
+{ return rhs.compare( lhs ) <= 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator>=(
+    basic_string_view<CharT, Traits> lhs,
+    std::basic_string<CharT, Traits> rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) >= 0; }
+
+template< class CharT, class Traits>
+nssv_constexpr bool operator>=(
+    std::basic_string<CharT, Traits> rhs,
+    basic_string_view<CharT, Traits> lhs ) nssv_noexcept
+{ return rhs.compare( lhs ) <= 0; }
+
+#else // newer compilers:
+
+#define nssv_BASIC_STRING_VIEW_I(T,U)  typename std::decay< basic_string_view<T,U> >::type
+
+#if defined(_MSC_VER)       // issue 40
+# define nssv_MSVC_ORDER(x)  , int=x
+#else
+# define nssv_MSVC_ORDER(x)  /*, int=x*/
+#endif
+
+// ==
+
+template< class CharT, class Traits  nssv_MSVC_ORDER(1) >
+nssv_constexpr bool operator==(
+         basic_string_view  <CharT, Traits> lhs,
+    nssv_BASIC_STRING_VIEW_I(CharT, Traits) rhs ) nssv_noexcept
+{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; }
+
+template< class CharT, class Traits  nssv_MSVC_ORDER(2) >
+nssv_constexpr bool operator==(
+    nssv_BASIC_STRING_VIEW_I(CharT, Traits) lhs,
+         basic_string_view  <CharT, Traits> rhs ) nssv_noexcept
+{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; }
+
+// !=
+
+template< class CharT, class Traits  nssv_MSVC_ORDER(1) >
+nssv_constexpr bool operator!= (
+         basic_string_view  < CharT, Traits > lhs,
+    nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept
+{ return !( lhs == rhs ); }
+
+template< class CharT, class Traits  nssv_MSVC_ORDER(2) >
+nssv_constexpr bool operator!= (
+    nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs,
+         basic_string_view  < CharT, Traits > rhs ) nssv_noexcept
+{ return !( lhs == rhs ); }
+
+// <
+
+template< class CharT, class Traits  nssv_MSVC_ORDER(1) >
+nssv_constexpr bool operator< (
+         basic_string_view  < CharT, Traits > lhs,
+    nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) < 0; }
+
+template< class CharT, class Traits  nssv_MSVC_ORDER(2) >
+nssv_constexpr bool operator< (
+    nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs,
+         basic_string_view  < CharT, Traits > rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) < 0; }
+
+// <=
+
+template< class CharT, class Traits  nssv_MSVC_ORDER(1) >
+nssv_constexpr bool operator<= (
+         basic_string_view  < CharT, Traits > lhs,
+    nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) <= 0; }
+
+template< class CharT, class Traits  nssv_MSVC_ORDER(2) >
+nssv_constexpr bool operator<= (
+    nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs,
+         basic_string_view  < CharT, Traits > rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) <= 0; }
+
+// >
+
+template< class CharT, class Traits  nssv_MSVC_ORDER(1) >
+nssv_constexpr bool operator> (
+         basic_string_view  < CharT, Traits > lhs,
+    nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) > 0; }
+
+template< class CharT, class Traits  nssv_MSVC_ORDER(2) >
+nssv_constexpr bool operator> (
+    nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs,
+         basic_string_view  < CharT, Traits > rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) > 0; }
+
+// >=
+
+template< class CharT, class Traits  nssv_MSVC_ORDER(1) >
+nssv_constexpr bool operator>= (
+         basic_string_view  < CharT, Traits > lhs,
+    nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) >= 0; }
+
+template< class CharT, class Traits  nssv_MSVC_ORDER(2) >
+nssv_constexpr bool operator>= (
+    nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs,
+         basic_string_view  < CharT, Traits > rhs ) nssv_noexcept
+{ return lhs.compare( rhs ) >= 0; }
+
+#undef nssv_MSVC_ORDER
+#undef nssv_BASIC_STRING_VIEW_I
+
+#endif // compiler-dependent approach to comparisons
+
+// 24.4.4 Inserters and extractors:
+
+#if ! nssv_CONFIG_NO_STREAM_INSERTION
+
+namespace detail {
+
+template< class Stream >
+void write_padding( Stream & os, std::streamsize n )
+{
+    for ( std::streamsize i = 0; i < n; ++i )
+        os.rdbuf()->sputc( os.fill() );
+}
+
+template< class Stream, class View >
+Stream & write_to_stream( Stream & os, View const & sv )
+{
+    typename Stream::sentry sentry( os );
+
+    if ( !os )
+        return os;
+
+    const std::streamsize length = static_cast<std::streamsize>( sv.length() );
+
+    // Whether, and how, to pad:
+    const bool      pad = ( length < os.width() );
+    const bool left_pad = pad && ( os.flags() & std::ios_base::adjustfield ) == std::ios_base::right;
+
+    if ( left_pad )
+        write_padding( os, os.width() - length );
+
+    // Write span characters:
+    os.rdbuf()->sputn( sv.begin(), length );
+
+    if ( pad && !left_pad )
+        write_padding( os, os.width() - length );
+
+    // Reset output stream width:
+    os.width( 0 );
+
+    return os;
+}
+
+} // namespace detail
+
+template< class CharT, class Traits >
+std::basic_ostream<CharT, Traits> &
+operator<<(
+    std::basic_ostream<CharT, Traits>& os,
+    basic_string_view <CharT, Traits> sv )
+{
+    return detail::write_to_stream( os, sv );
+}
+
+#endif // nssv_CONFIG_NO_STREAM_INSERTION
+
+// Several typedefs for common character types are provided:
+
+typedef basic_string_view<char>      string_view;
+typedef basic_string_view<wchar_t>   wstring_view;
+#if nssv_HAVE_WCHAR16_T
+typedef basic_string_view<char16_t>  u16string_view;
+typedef basic_string_view<char32_t>  u32string_view;
+#endif
+
+}} // namespace nonstd::sv_lite
+
+//
+// 24.4.6 Suffix for basic_string_view literals:
+//
+
+#if nssv_HAVE_USER_DEFINED_LITERALS
+
+namespace nonstd {
+nssv_inline_ns namespace literals {
+nssv_inline_ns namespace string_view_literals {
+
+#if nssv_CONFIG_STD_SV_OPERATOR && nssv_HAVE_STD_DEFINED_LITERALS
+
+nssv_constexpr nonstd::sv_lite::string_view operator "" sv( const char* str, size_t len ) nssv_noexcept  // (1)
+{
+    return nonstd::sv_lite::string_view{ str, len };
+}
+
+nssv_constexpr nonstd::sv_lite::u16string_view operator "" sv( const char16_t* str, size_t len ) nssv_noexcept  // (2)
+{
+    return nonstd::sv_lite::u16string_view{ str, len };
+}
+
+nssv_constexpr nonstd::sv_lite::u32string_view operator "" sv( const char32_t* str, size_t len ) nssv_noexcept  // (3)
+{
+    return nonstd::sv_lite::u32string_view{ str, len };
+}
+
+nssv_constexpr nonstd::sv_lite::wstring_view operator "" sv( const wchar_t* str, size_t len ) nssv_noexcept  // (4)
+{
+    return nonstd::sv_lite::wstring_view{ str, len };
+}
+
+#endif // nssv_CONFIG_STD_SV_OPERATOR && nssv_HAVE_STD_DEFINED_LITERALS
+
+#if nssv_CONFIG_USR_SV_OPERATOR
+
+nssv_constexpr nonstd::sv_lite::string_view operator "" _sv( const char* str, size_t len ) nssv_noexcept  // (1)
+{
+    return nonstd::sv_lite::string_view{ str, len };
+}
+
+nssv_constexpr nonstd::sv_lite::u16string_view operator "" _sv( const char16_t* str, size_t len ) nssv_noexcept  // (2)
+{
+    return nonstd::sv_lite::u16string_view{ str, len };
+}
+
+nssv_constexpr nonstd::sv_lite::u32string_view operator "" _sv( const char32_t* str, size_t len ) nssv_noexcept  // (3)
+{
+    return nonstd::sv_lite::u32string_view{ str, len };
+}
+
+nssv_constexpr nonstd::sv_lite::wstring_view operator "" _sv( const wchar_t* str, size_t len ) nssv_noexcept  // (4)
+{
+    return nonstd::sv_lite::wstring_view{ str, len };
+}
+
+#endif // nssv_CONFIG_USR_SV_OPERATOR
+
+}}} // namespace nonstd::literals::string_view_literals
+
+#endif
+
+//
+// Extensions for std::string:
+//
+
+#if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS
+
+namespace nonstd {
+namespace sv_lite {
+
+// Exclude MSVC 14 (19.00): it yields ambiguous to_string():
+
+#if nssv_CPP11_OR_GREATER && nssv_COMPILER_MSVC_VERSION != 140
+
+template< class CharT, class Traits, class Allocator = std::allocator<CharT> >
+std::basic_string<CharT, Traits, Allocator>
+to_string( basic_string_view<CharT, Traits> v, Allocator const & a = Allocator() )
+{
+    return std::basic_string<CharT,Traits, Allocator>( v.begin(), v.end(), a );
+}
+
+#else
+
+template< class CharT, class Traits >
+std::basic_string<CharT, Traits>
+to_string( basic_string_view<CharT, Traits> v )
+{
+    return std::basic_string<CharT, Traits>( v.begin(), v.end() );
+}
+
+template< class CharT, class Traits, class Allocator >
+std::basic_string<CharT, Traits, Allocator>
+to_string( basic_string_view<CharT, Traits> v, Allocator const & a )
+{
+    return std::basic_string<CharT, Traits, Allocator>( v.begin(), v.end(), a );
+}
+
+#endif // nssv_CPP11_OR_GREATER
+
+template< class CharT, class Traits, class Allocator >
+basic_string_view<CharT, Traits>
+to_string_view( std::basic_string<CharT, Traits, Allocator> const & s )
+{
+    return basic_string_view<CharT, Traits>( s.data(), s.size() );
+}
+
+}} // namespace nonstd::sv_lite
+
+#endif // nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS
+
+//
+// make types and algorithms available in namespace nonstd:
+//
+
+namespace nonstd {
+
+using sv_lite::basic_string_view;
+using sv_lite::string_view;
+using sv_lite::wstring_view;
+
+#if nssv_HAVE_WCHAR16_T
+using sv_lite::u16string_view;
+#endif
+#if nssv_HAVE_WCHAR32_T
+using sv_lite::u32string_view;
+#endif
+
+// literal "sv"
+
+using sv_lite::operator==;
+using sv_lite::operator!=;
+using sv_lite::operator<;
+using sv_lite::operator<=;
+using sv_lite::operator>;
+using sv_lite::operator>=;
+
+#if ! nssv_CONFIG_NO_STREAM_INSERTION
+using sv_lite::operator<<;
+#endif
+
+#if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS
+using sv_lite::to_string;
+using sv_lite::to_string_view;
+#endif
+
+} // namespace nonstd
+
+// 24.4.5 Hash support (C++11):
+
+// Note: The hash value of a string view object is equal to the hash value of
+// the corresponding string object.
+
+#if nssv_HAVE_STD_HASH
+
+#include <functional>
+
+namespace std {
+
+template<>
+struct hash< nonstd::string_view >
+{
+public:
+    std::size_t operator()( nonstd::string_view v ) const nssv_noexcept
+    {
+        return std::hash<std::string>()( std::string( v.data(), v.size() ) );
+    }
+};
+
+template<>
+struct hash< nonstd::wstring_view >
+{
+public:
+    std::size_t operator()( nonstd::wstring_view v ) const nssv_noexcept
+    {
+        return std::hash<std::wstring>()( std::wstring( v.data(), v.size() ) );
+    }
+};
+
+template<>
+struct hash< nonstd::u16string_view >
+{
+public:
+    std::size_t operator()( nonstd::u16string_view v ) const nssv_noexcept
+    {
+        return std::hash<std::u16string>()( std::u16string( v.data(), v.size() ) );
+    }
+};
+
+template<>
+struct hash< nonstd::u32string_view >
+{
+public:
+    std::size_t operator()( nonstd::u32string_view v ) const nssv_noexcept
+    {
+        return std::hash<std::u32string>()( std::u32string( v.data(), v.size() ) );
+    }
+};
+
+} // namespace std
+
+#endif // nssv_HAVE_STD_HASH
+
+nssv_RESTORE_WARNINGS()
+
+#endif // nssv_HAVE_STD_STRING_VIEW
+#endif // NONSTD_SV_LITE_H_INCLUDED
+/* end file include/simdjson/nonstd/string_view.hpp */
+SIMDJSON_POP_DISABLE_WARNINGS
+
+namespace std {
+  using string_view = nonstd::string_view;
+}
+#endif // SIMDJSON_HAS_STRING_VIEW
+#undef SIMDJSON_HAS_STRING_VIEW // We are not going to need this macro anymore.
+
+/// If EXPR is an error, returns it.
+#define SIMDJSON_TRY(EXPR) { auto _err = (EXPR); if (_err) { return _err; } }
+
+// Unless the programmer has already set SIMDJSON_DEVELOPMENT_CHECKS,
+// we want to set it under debug builds. We detect a debug build
+// under Visual Studio when the _DEBUG macro is set. Under the other
+// compilers, we use the fact that they define __OPTIMIZE__ whenever
+// they allow optimizations.
+// It is possible that this could miss some cases where SIMDJSON_DEVELOPMENT_CHECKS
+// is helpful, but the programmer can set the macro SIMDJSON_DEVELOPMENT_CHECKS.
+// It could also wrongly set SIMDJSON_DEVELOPMENT_CHECKS (e.g., if the programmer
+// sets _DEBUG in a release build under Visual Studio, or if some compiler fails to
+// set the __OPTIMIZE__ macro).
+#ifndef SIMDJSON_DEVELOPMENT_CHECKS
+#ifdef _MSC_VER
+// Visual Studio seems to set _DEBUG for debug builds.
+#ifdef _DEBUG
+#define SIMDJSON_DEVELOPMENT_CHECKS 1
+#endif // _DEBUG
+#else // _MSC_VER
+// All other compilers appear to set __OPTIMIZE__ to a positive integer
+// when the compiler is optimizing.
+#ifndef __OPTIMIZE__
+#define SIMDJSON_DEVELOPMENT_CHECKS 1
+#endif // __OPTIMIZE__
+#endif // _MSC_VER
+#endif // SIMDJSON_DEVELOPMENT_CHECKS
+
+// The SIMDJSON_CHECK_EOF macro is a feature flag for the "don't require padding"
+// feature.
+
+#if SIMDJSON_CPLUSPLUS17
+// if we have C++, then fallthrough is a default attribute
+# define simdjson_fallthrough [[fallthrough]]
+// check if we have __attribute__ support
+#elif defined(__has_attribute)
+// check if we have the __fallthrough__ attribute
+#if __has_attribute(__fallthrough__)
+// we are good to go:
+# define simdjson_fallthrough                    __attribute__((__fallthrough__))
+#endif // __has_attribute(__fallthrough__)
+#endif // SIMDJSON_CPLUSPLUS17
+// on some systems, we simply do not have support for fallthrough, so use a default:
+#ifndef simdjson_fallthrough
+# define simdjson_fallthrough do {} while (0)  /* fallthrough */
+#endif // simdjson_fallthrough
+
+#endif // SIMDJSON_COMMON_DEFS_H
+/* end file include/simdjson/common_defs.h */
+
+SIMDJSON_PUSH_DISABLE_WARNINGS
+SIMDJSON_DISABLE_UNDESIRED_WARNINGS
+
+// Public API
+/* begin file include/simdjson/error.h */
+#ifndef SIMDJSON_ERROR_H
+#define SIMDJSON_ERROR_H
+
+#include <string>
+
+namespace simdjson {
+
+/**
+ * All possible errors returned by simdjson. These error codes are subject to change
+ * and not all simdjson kernel returns the same error code given the same input: it is not
+ * well defined which error a given input should produce.
+ *
+ * Only SUCCESS evaluates to false as a Boolean. All other error codes will evaluate
+ * to true as a Boolean.
+ */
+enum error_code {
+  SUCCESS = 0,                ///< No error
+  CAPACITY,                   ///< This parser can't support a document that big
+  MEMALLOC,                   ///< Error allocating memory, most likely out of memory
+  TAPE_ERROR,                 ///< Something went wrong, this is a generic error
+  DEPTH_ERROR,                ///< Your document exceeds the user-specified depth limitation
+  STRING_ERROR,               ///< Problem while parsing a string
+  T_ATOM_ERROR,               ///< Problem while parsing an atom starting with the letter 't'
+  F_ATOM_ERROR,               ///< Problem while parsing an atom starting with the letter 'f'
+  N_ATOM_ERROR,               ///< Problem while parsing an atom starting with the letter 'n'
+  NUMBER_ERROR,               ///< Problem while parsing a number
+  UTF8_ERROR,                 ///< the input is not valid UTF-8
+  UNINITIALIZED,              ///< unknown error, or uninitialized document
+  EMPTY,                      ///< no structural element found
+  UNESCAPED_CHARS,            ///< found unescaped characters in a string.
+  UNCLOSED_STRING,            ///< missing quote at the end
+  UNSUPPORTED_ARCHITECTURE,   ///< unsupported architecture
+  INCORRECT_TYPE,             ///< JSON element has a different type than user expected
+  NUMBER_OUT_OF_RANGE,        ///< JSON number does not fit in 64 bits
+  INDEX_OUT_OF_BOUNDS,        ///< JSON array index too large
+  NO_SUCH_FIELD,              ///< JSON field not found in object
+  IO_ERROR,                   ///< Error reading a file
+  INVALID_JSON_POINTER,       ///< Invalid JSON pointer reference
+  INVALID_URI_FRAGMENT,       ///< Invalid URI fragment
+  UNEXPECTED_ERROR,           ///< indicative of a bug in simdjson
+  PARSER_IN_USE,              ///< parser is already in use.
+  OUT_OF_ORDER_ITERATION,     ///< tried to iterate an array or object out of order
+  INSUFFICIENT_PADDING,       ///< The JSON doesn't have enough padding for simdjson to safely parse it.
+  INCOMPLETE_ARRAY_OR_OBJECT, ///< The document ends early.
+  SCALAR_DOCUMENT_AS_VALUE,   ///< A scalar document is treated as a value.
+  OUT_OF_BOUNDS,              ///< Attempted to access location outside of document.
+  TRAILING_CONTENT,           ///< Unexpected trailing content in the JSON input
+  NUM_ERROR_CODES
+};
+
+/**
+ * Get the error message for the given error code.
+ *
+ *   dom::parser parser;
+ *   dom::element doc;
+ *   auto error = parser.parse("foo",3).get(doc);
+ *   if (error) { printf("Error: %s\n", error_message(error)); }
+ *
+ * @return The error message.
+ */
+inline const char *error_message(error_code error) noexcept;
+
+/**
+ * Write the error message to the output stream
+ */
+inline std::ostream& operator<<(std::ostream& out, error_code error) noexcept;
+
+/**
+ * Exception thrown when an exception-supporting simdjson method is called
+ */
+struct simdjson_error : public std::exception {
+  /**
+   * Create an exception from a simdjson error code.
+   * @param error The error code
+   */
+  simdjson_error(error_code error) noexcept : _error{error} { }
+  /** The error message */
+  const char *what() const noexcept { return error_message(error()); }
+  /** The error code */
+  error_code error() const noexcept { return _error; }
+private:
+  /** The error code that was used */
+  error_code _error;
+};
+
+namespace internal {
+
+/**
+ * The result of a simdjson operation that could fail.
+ *
+ * Gives the option of reading error codes, or throwing an exception by casting to the desired result.
+ *
+ * This is a base class for implementations that want to add functions to the result type for
+ * chaining.
+ *
+ * Override like:
+ *
+ *   struct simdjson_result<T> : public internal::simdjson_result_base<T> {
+ *     simdjson_result() noexcept : internal::simdjson_result_base<T>() {}
+ *     simdjson_result(error_code error) noexcept : internal::simdjson_result_base<T>(error) {}
+ *     simdjson_result(T &&value) noexcept : internal::simdjson_result_base<T>(std::forward(value)) {}
+ *     simdjson_result(T &&value, error_code error) noexcept : internal::simdjson_result_base<T>(value, error) {}
+ *     // Your extra methods here
+ *   }
+ *
+ * Then any method returning simdjson_result<T> will be chainable with your methods.
+ */
+template<typename T>
+struct simdjson_result_base : protected std::pair<T, error_code> {
+
+  /**
+   * Create a new empty result with error = UNINITIALIZED.
+   */
+  simdjson_inline simdjson_result_base() noexcept;
+
+  /**
+   * Create a new error result.
+   */
+  simdjson_inline simdjson_result_base(error_code error) noexcept;
+
+  /**
+   * Create a new successful result.
+   */
+  simdjson_inline simdjson_result_base(T &&value) noexcept;
+
+  /**
+   * Create a new result with both things (use if you don't want to branch when creating the result).
+   */
+  simdjson_inline simdjson_result_base(T &&value, error_code error) noexcept;
+
+  /**
+   * Move the value and the error to the provided variables.
+   *
+   * @param value The variable to assign the value to. May not be set if there is an error.
+   * @param error The variable to assign the error to. Set to SUCCESS if there is no error.
+   */
+  simdjson_inline void tie(T &value, error_code &error) && noexcept;
+
+  /**
+   * Move the value to the provided variable.
+   *
+   * @param value The variable to assign the value to. May not be set if there is an error.
+   */
+  simdjson_inline error_code get(T &value) && noexcept;
+
+  /**
+   * The error.
+   */
+  simdjson_inline error_code error() const noexcept;
+
+#if SIMDJSON_EXCEPTIONS
+
+  /**
+   * Get the result value.
+   *
+   * @throw simdjson_error if there was an error.
+   */
+  simdjson_inline T& value() & noexcept(false);
+
+  /**
+   * Take the result value (move it).
+   *
+   * @throw simdjson_error if there was an error.
+   */
+  simdjson_inline T&& value() && noexcept(false);
+
+  /**
+   * Take the result value (move it).
+   *
+   * @throw simdjson_error if there was an error.
+   */
+  simdjson_inline T&& take_value() && noexcept(false);
+
+  /**
+   * Cast to the value (will throw on error).
+   *
+   * @throw simdjson_error if there was an error.
+   */
+  simdjson_inline operator T&&() && noexcept(false);
+#endif // SIMDJSON_EXCEPTIONS
+
+  /**
+   * Get the result value. This function is safe if and only
+   * the error() method returns a value that evaluates to false.
+   */
+  simdjson_inline const T& value_unsafe() const& noexcept;
+
+  /**
+   * Take the result value (move it). This function is safe if and only
+   * the error() method returns a value that evaluates to false.
+   */
+  simdjson_inline T&& value_unsafe() && noexcept;
+
+}; // struct simdjson_result_base
+
+} // namespace internal
+
+/**
+ * The result of a simdjson operation that could fail.
+ *
+ * Gives the option of reading error codes, or throwing an exception by casting to the desired result.
+ */
+template<typename T>
+struct simdjson_result : public internal::simdjson_result_base<T> {
+  /**
+   * @private Create a new empty result with error = UNINITIALIZED.
+   */
+  simdjson_inline simdjson_result() noexcept;
+  /**
+   * @private Create a new error result.
+   */
+  simdjson_inline simdjson_result(T &&value) noexcept;
+  /**
+   * @private Create a new successful result.
+   */
+  simdjson_inline simdjson_result(error_code error_code) noexcept;
+  /**
+   * @private Create a new result with both things (use if you don't want to branch when creating the result).
+   */
+  simdjson_inline simdjson_result(T &&value, error_code error) noexcept;
+
+  /**
+   * Move the value and the error to the provided variables.
+   *
+   * @param value The variable to assign the value to. May not be set if there is an error.
+   * @param error The variable to assign the error to. Set to SUCCESS if there is no error.
+   */
+  simdjson_inline void tie(T &value, error_code &error) && noexcept;
+
+  /**
+   * Move the value to the provided variable.
+   *
+   * @param value The variable to assign the value to. May not be set if there is an error.
+   */
+  simdjson_warn_unused simdjson_inline error_code get(T &value) && noexcept;
+
+  /**
+   * The error.
+   */
+  simdjson_inline error_code error() const noexcept;
+
+#if SIMDJSON_EXCEPTIONS
+
+  /**
+   * Get the result value.
+   *
+   * @throw simdjson_error if there was an error.
+   */
+  simdjson_inline T& value() & noexcept(false);
+
+  /**
+   * Take the result value (move it).
+   *
+   * @throw simdjson_error if there was an error.
+   */
+  simdjson_inline T&& value() && noexcept(false);
+
+  /**
+   * Take the result value (move it).
+   *
+   * @throw simdjson_error if there was an error.
+   */
+  simdjson_inline T&& take_value() && noexcept(false);
+
+  /**
+   * Cast to the value (will throw on error).
+   *
+   * @throw simdjson_error if there was an error.
+   */
+  simdjson_inline operator T&&() && noexcept(false);
+#endif // SIMDJSON_EXCEPTIONS
+
+  /**
+   * Get the result value. This function is safe if and only
+   * the error() method returns a value that evaluates to false.
+   */
+  simdjson_inline const T& value_unsafe() const& noexcept;
+
+  /**
+   * Take the result value (move it). This function is safe if and only
+   * the error() method returns a value that evaluates to false.
+   */
+  simdjson_inline T&& value_unsafe() && noexcept;
+
+}; // struct simdjson_result
+
+#if SIMDJSON_EXCEPTIONS
+
+template<typename T>
+inline std::ostream& operator<<(std::ostream& out, simdjson_result<T> value) { return out << value.value(); }
+#endif // SIMDJSON_EXCEPTIONS
+
+#ifndef SIMDJSON_DISABLE_DEPRECATED_API
+/**
+ * @deprecated This is an alias and will be removed, use error_code instead
+ */
+using ErrorValues [[deprecated("This is an alias and will be removed, use error_code instead")]] = error_code;
+
+/**
+ * @deprecated Error codes should be stored and returned as `error_code`, use `error_message()` instead.
+ */
+[[deprecated("Error codes should be stored and returned as `error_code`, use `error_message()` instead.")]]
+inline const std::string error_message(int error) noexcept;
+#endif // SIMDJSON_DISABLE_DEPRECATED_API
+} // namespace simdjson
+
+#endif // SIMDJSON_ERROR_H
+/* end file include/simdjson/error.h */
+/* begin file include/simdjson/minify.h */
+#ifndef SIMDJSON_MINIFY_H
+#define SIMDJSON_MINIFY_H
+
+/* begin file include/simdjson/padded_string.h */
+#ifndef SIMDJSON_PADDED_STRING_H
+#define SIMDJSON_PADDED_STRING_H
+
+#include <cstring>
+#include <memory>
+#include <string>
+#include <ostream>
+
+namespace simdjson {
+
+class padded_string_view;
+
+/**
+ * String with extra allocation for ease of use with parser::parse()
+ *
+ * This is a move-only class, it cannot be copied.
+ */
+struct padded_string final {
+
+  /**
+   * Create a new, empty padded string.
+   */
+  explicit inline padded_string() noexcept;
+  /**
+   * Create a new padded string buffer.
+   *
+   * @param length the size of the string.
+   */
+  explicit inline padded_string(size_t length) noexcept;
+  /**
+   * Create a new padded string by copying the given input.
+   *
+   * @param data the buffer to copy
+   * @param length the number of bytes to copy
+   */
+  explicit inline padded_string(const char *data, size_t length) noexcept;
+  /**
+   * Create a new padded string by copying the given input.
+   *
+   * @param str_ the string to copy
+   */
+  inline padded_string(const std::string & str_ ) noexcept;
+  /**
+   * Create a new padded string by copying the given input.
+   *
+   * @param sv_ the string to copy
+   */
+  inline padded_string(std::string_view sv_) noexcept;
+  /**
+   * Move one padded string into another.
+   *
+   * The original padded string will be reduced to zero capacity.
+   *
+   * @param o the string to move.
+   */
+  inline padded_string(padded_string &&o) noexcept;
+  /**
+   * Move one padded string into another.
+   *
+   * The original padded string will be reduced to zero capacity.
+   *
+   * @param o the string to move.
+   */
+  inline padded_string &operator=(padded_string &&o) noexcept;
+  inline void swap(padded_string &o) noexcept;
+  ~padded_string() noexcept;
+
+  /**
+   * The length of the string.
+   *
+   * Does not include padding.
+   */
+  size_t size() const noexcept;
+
+  /**
+   * The length of the string.
+   *
+   * Does not include padding.
+   */
+  size_t length() const noexcept;
+
+  /**
+   * The string data.
+   **/
+  const char *data() const noexcept;
+  const uint8_t *u8data() const noexcept { return static_cast<const uint8_t*>(static_cast<const void*>(data_ptr));}
+
+  /**
+   * The string data.
+   **/
+  char *data() noexcept;
+
+  /**
+   * Create a std::string_view with the same content.
+   */
+  operator std::string_view() const;
+
+  /**
+   * Create a padded_string_view with the same content.
+   */
+  operator padded_string_view() const noexcept;
+
+  /**
+   * Load this padded string from a file.
+   *
+   * @return IO_ERROR on error. Be mindful that on some 32-bit systems,
+   * the file size might be limited to 2 GB.
+   *
+   * @param path the path to the file.
+   **/
+  inline static simdjson_result<padded_string> load(std::string_view path) noexcept;
+
+private:
+  padded_string &operator=(const padded_string &o) = delete;
+  padded_string(const padded_string &o) = delete;
+
+  size_t viable_size{0};
+  char *data_ptr{nullptr};
+
+}; // padded_string
+
+/**
+ * Send padded_string instance to an output stream.
+ *
+ * @param out The output stream.
+ * @param s The padded_string instance.
+ * @throw if there is an error with the underlying output stream. simdjson itself will not throw.
+ */
+inline std::ostream& operator<<(std::ostream& out, const padded_string& s) { return out << s.data(); }
+
+#if SIMDJSON_EXCEPTIONS
+/**
+ * Send padded_string instance to an output stream.
+ *
+ * @param out The output stream.
+ * @param s The padded_string instance.
+  * @throw simdjson_error if the result being printed has an error. If there is an error with the
+ *        underlying output stream, that error will be propagated (simdjson_error will not be
+ *        thrown).
+ */
+inline std::ostream& operator<<(std::ostream& out, simdjson_result<padded_string> &s) noexcept(false) { return out << s.value(); }
+#endif
+
+} // namespace simdjson
+
+// This is deliberately outside of simdjson so that people get it without having to use the namespace
+inline simdjson::padded_string operator "" _padded(const char *str, size_t len) {
+  return simdjson::padded_string(str, len);
+}
+
+namespace simdjson {
+namespace internal {
+
+// The allocate_padded_buffer function is a low-level function to allocate memory
+// with padding so we can read past the "length" bytes safely. It is used by
+// the padded_string class automatically. It returns nullptr in case
+// of error: the caller should check for a null pointer.
+// The length parameter is the maximum size in bytes of the string.
+// The caller is responsible to free the memory (e.g., delete[] (...)).
+inline char *allocate_padded_buffer(size_t length) noexcept;
+
+} // namespace internal
+} // namespace simdjson
+
+#endif // SIMDJSON_PADDED_STRING_H
+/* end file include/simdjson/padded_string.h */
+#include <string>
+#include <ostream>
+#include <sstream>
+
+namespace simdjson {
+
+
+
+/**
+ *
+ * Minify the input string assuming that it represents a JSON string, does not parse or validate.
+ * This function is much faster than parsing a JSON string and then writing a minified version of it.
+ * However, it does not validate the input. It will merely return an error in simple cases (e.g., if
+ * there is a string that was never terminated).
+ *
+ *
+ * @param buf the json document to minify.
+ * @param len the length of the json document.
+ * @param dst the buffer to write the minified document to. *MUST* be allocated up to len bytes.
+ * @param dst_len the number of bytes written. Output only.
+ * @return the error code, or SUCCESS if there was no error.
+ */
+simdjson_warn_unused error_code minify(const char *buf, size_t len, char *dst, size_t &dst_len) noexcept;
+
+} // namespace simdjson
+
+#endif // SIMDJSON_MINIFY_H
+/* end file include/simdjson/minify.h */
+/* begin file include/simdjson/padded_string_view.h */
+#ifndef SIMDJSON_PADDED_STRING_VIEW_H
+#define SIMDJSON_PADDED_STRING_VIEW_H
+
+
+#include <cstring>
+#include <memory>
+#include <string>
+#include <ostream>
+
+namespace simdjson {
+
+/**
+ * User-provided string that promises it has extra padded bytes at the end for use with parser::parse().
+ */
+class padded_string_view : public std::string_view {
+private:
+  size_t _capacity;
+
+public:
+  /** Create an empty padded_string_view. */
+  inline padded_string_view() noexcept = default;
+
+  /**
+   * Promise the given buffer has at least SIMDJSON_PADDING extra bytes allocated to it.
+   *
+   * @param s The string.
+   * @param len The length of the string (not including padding).
+   * @param capacity The allocated length of the string, including padding.
+   */
+  explicit inline padded_string_view(const char* s, size_t len, size_t capacity) noexcept;
+  /** overload explicit inline padded_string_view(const char* s, size_t len) noexcept */
+  explicit inline padded_string_view(const uint8_t* s, size_t len, size_t capacity) noexcept;
+
+  /**
+   * Promise the given string has at least SIMDJSON_PADDING extra bytes allocated to it.
+   *
+   * The capacity of the string will be used to determine its padding.
+   *
+   * @param s The string.
+   */
+  explicit inline padded_string_view(const std::string &s) noexcept;
+
+  /**
+   * Promise the given string_view has at least SIMDJSON_PADDING extra bytes allocated to it.
+   *
+   * @param s The string.
+   * @param capacity The allocated length of the string, including padding.
+   */
+  explicit inline padded_string_view(std::string_view s, size_t capacity) noexcept;
+
+  /** The number of allocated bytes. */
+  inline size_t capacity() const noexcept;
+
+  /** The amount of padding on the string (capacity() - length()) */
+  inline size_t padding() const noexcept;
+
+}; // padded_string_view
+
+#if SIMDJSON_EXCEPTIONS
+/**
+ * Send padded_string instance to an output stream.
+ *
+ * @param out The output stream.
+ * @param s The padded_string_view.
+ * @throw simdjson_error if the result being printed has an error. If there is an error with the
+ *        underlying output stream, that error will be propagated (simdjson_error will not be
+ *        thrown).
+ */
+inline std::ostream& operator<<(std::ostream& out, simdjson_result<padded_string_view> &s) noexcept(false) { return out << s.value(); }
+#endif
+
+} // namespace simdjson
+
+#endif // SIMDJSON_PADDED_STRING_VIEW_H
+/* end file include/simdjson/padded_string_view.h */
+/* begin file include/simdjson/implementation.h */
+#ifndef SIMDJSON_IMPLEMENTATION_H
+#define SIMDJSON_IMPLEMENTATION_H
+
+/* begin file include/simdjson/internal/dom_parser_implementation.h */
+#ifndef SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H
+#define SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H
+
+#include <memory>
+
+namespace simdjson {
+
+namespace dom {
+class document;
+} // namespace dom
+
+/**
+* This enum is used with the dom_parser_implementation::stage1 function.
+* 1) The regular mode expects a fully formed JSON document.
+* 2) The streaming_partial mode expects a possibly truncated
+* input within a stream on JSON documents.
+* 3) The stream_final mode allows us to truncate final
+* unterminated strings. It is useful in conjunction with streaming_partial.
+*/
+enum class stage1_mode { regular, streaming_partial, streaming_final};
+
+/**
+ * Returns true if mode == streaming_partial or mode == streaming_final
+ */
+inline bool is_streaming(stage1_mode mode) {
+  // performance note: it is probably faster to check that mode is different
+  // from regular than checking that it is either streaming_partial or streaming_final.
+  return (mode != stage1_mode::regular);
+  // return (mode == stage1_mode::streaming_partial || mode == stage1_mode::streaming_final);
+}
+
+
+namespace internal {
+
+
+/**
+ * An implementation of simdjson's DOM parser for a particular CPU architecture.
+ *
+ * This class is expected to be accessed only by pointer, and never move in memory (though the
+ * pointer can move).
+ */
+class dom_parser_implementation {
+public:
+
+  /**
+   * @private For internal implementation use
+   *
+   * Run a full JSON parse on a single document (stage1 + stage2).
+   *
+   * Guaranteed only to be called when capacity > document length.
+   *
+   * Overridden by each implementation.
+   *
+   * @param buf The json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes.
+   * @param len The length of the json document.
+   * @return The error code, or SUCCESS if there was no error.
+   */
+  simdjson_warn_unused virtual error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept = 0;
+
+  /**
+   * @private For internal implementation use
+   *
+   * Stage 1 of the document parser.
+   *
+   * Guaranteed only to be called when capacity > document length.
+   *
+   * Overridden by each implementation.
+   *
+   * @param buf The json document to parse.
+   * @param len The length of the json document.
+   * @param streaming Whether this is being called by parser::parse_many.
+   * @return The error code, or SUCCESS if there was no error.
+   */
+  simdjson_warn_unused virtual error_code stage1(const uint8_t *buf, size_t len, stage1_mode streaming) noexcept = 0;
+
+  /**
+   * @private For internal implementation use
+   *
+   * Stage 2 of the document parser.
+   *
+   * Called after stage1().
+   *
+   * Overridden by each implementation.
+   *
+   * @param doc The document to output to.
+   * @return The error code, or SUCCESS if there was no error.
+   */
+  simdjson_warn_unused virtual error_code stage2(dom::document &doc) noexcept = 0;
+
+  /**
+   * @private For internal implementation use
+   *
+   * Stage 2 of the document parser for parser::parse_many.
+   *
+   * Guaranteed only to be called after stage1().
+   * Overridden by each implementation.
+   *
+   * @param doc The document to output to.
+   * @return The error code, SUCCESS if there was no error, or EMPTY if all documents have been parsed.
+   */
+  simdjson_warn_unused virtual error_code stage2_next(dom::document &doc) noexcept = 0;
+
+  /**
+   * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There
+   * must be an unescaped quote terminating the string. It returns the final output
+   * position as pointer. In case of error (e.g., the string has bad escaped codes),
+   * then null_nullptrptr is returned. It is assumed that the output buffer is large
+   * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes +
+   * SIMDJSON_PADDING bytes.
+   *
+   * Overridden by each implementation.
+   *
+   * @param src pointer to the beginning of a valid UTF-8 JSON string, must end with an unescaped quote.
+   * @param dst pointer to a destination buffer, it must point a region in memory of sufficient size.
+   * @return end of the of the written region (exclusive) or nullptr in case of error.
+   */
+  simdjson_warn_unused virtual uint8_t *parse_string(const uint8_t *src, uint8_t *dst) const noexcept = 0;
+
+  /**
+   * Change the capacity of this parser.
+   *
+   * The capacity can never exceed SIMDJSON_MAXSIZE_BYTES (e.g., 4 GB)
+   * and an CAPACITY error is returned if it is attempted.
+   *
+   * Generally used for reallocation.
+   *
+   * @param capacity The new capacity.
+   * @return The error code, or SUCCESS if there was no error.
+   */
+  virtual error_code set_capacity(size_t capacity) noexcept = 0;
+
+  /**
+   * Change the max depth of this parser.
+   *
+   * Generally used for reallocation.
+   *
+   * @param max_depth The new max_depth.
+   * @return The error code, or SUCCESS if there was no error.
+   */
+  virtual error_code set_max_depth(size_t max_depth) noexcept = 0;
+
+  /**
+   * Deallocate this parser.
+   */
+  virtual ~dom_parser_implementation() = default;
+
+  /** Number of structural indices passed from stage 1 to stage 2 */
+  uint32_t n_structural_indexes{0};
+  /** Structural indices passed from stage 1 to stage 2 */
+  std::unique_ptr<uint32_t[]> structural_indexes{};
+  /** Next structural index to parse */
+  uint32_t next_structural_index{0};
+
+  /**
+   * The largest document this parser can support without reallocating.
+   *
+   * @return Current capacity, in bytes.
+   */
+  simdjson_inline size_t capacity() const noexcept;
+
+  /**
+   * The maximum level of nested object and arrays supported by this parser.
+   *
+   * @return Maximum depth, in bytes.
+   */
+  simdjson_inline size_t max_depth() const noexcept;
+
+  /**
+   * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length
+   * and `max_depth` depth.
+   *
+   * @param capacity The new capacity.
+   * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH.
+   * @return The error, if there is one.
+   */
+  simdjson_warn_unused inline error_code allocate(size_t capacity, size_t max_depth) noexcept;
+
+
+protected:
+  /**
+   * The maximum document length this parser supports.
+   *
+   * Buffers are large enough to handle any document up to this length.
+   */
+  size_t _capacity{0};
+
+  /**
+   * The maximum depth (number of nested objects and arrays) supported by this parser.
+   *
+   * Defaults to DEFAULT_MAX_DEPTH.
+   */
+  size_t _max_depth{0};
+
+  // Declaring these so that subclasses can use them to implement their constructors.
+  simdjson_inline dom_parser_implementation() noexcept;
+  simdjson_inline dom_parser_implementation(dom_parser_implementation &&other) noexcept;
+  simdjson_inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept;
+
+  simdjson_inline dom_parser_implementation(const dom_parser_implementation &) noexcept = delete;
+  simdjson_inline dom_parser_implementation &operator=(const dom_parser_implementation &other) noexcept = delete;
+}; // class dom_parser_implementation
+
+simdjson_inline dom_parser_implementation::dom_parser_implementation() noexcept = default;
+simdjson_inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default;
+simdjson_inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default;
+
+simdjson_inline size_t dom_parser_implementation::capacity() const noexcept {
+  return _capacity;
+}
+
+simdjson_inline size_t dom_parser_implementation::max_depth() const noexcept {
+  return _max_depth;
+}
+
+simdjson_warn_unused
+inline error_code dom_parser_implementation::allocate(size_t capacity, size_t max_depth) noexcept {
+  if (this->max_depth() != max_depth) {
+    error_code err = set_max_depth(max_depth);
+    if (err) { return err; }
+  }
+  if (_capacity != capacity) {
+    error_code err = set_capacity(capacity);
+    if (err) { return err; }
+  }
+  return SUCCESS;
+}
+
+} // namespace internal
+} // namespace simdjson
+
+#endif // SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H
+/* end file include/simdjson/internal/dom_parser_implementation.h */
+/* begin file include/simdjson/internal/isadetection.h */
+/* From
+https://github.com/endorno/pytorch/blob/master/torch/lib/TH/generic/simd/simd.h
+Highly modified.
+
+Copyright (c) 2016-     Facebook, Inc            (Adam Paszke)
+Copyright (c) 2014-     Facebook, Inc            (Soumith Chintala)
+Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)
+Copyright (c) 2012-2014 Deepmind Technologies    (Koray Kavukcuoglu)
+Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)
+Copyright (c) 2011-2013 NYU                      (Clement Farabet)
+Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou,
+Iain Melvin, Jason Weston) Copyright (c) 2006      Idiap Research Institute
+(Samy Bengio) Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert,
+Samy Bengio, Johnny Mariethoz)
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories
+America and IDIAP Research Institute nor the names of its contributors may be
+   used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef SIMDJSON_INTERNAL_ISADETECTION_H
+#define SIMDJSON_INTERNAL_ISADETECTION_H
+
+#include <cstdint>
+#include <cstdlib>
+#if defined(_MSC_VER)
+#include <intrin.h>
+#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
+#include <cpuid.h>
+#endif
+
+namespace simdjson {
+namespace internal {
+
+
+enum instruction_set {
+  DEFAULT = 0x0,
+  NEON = 0x1,
+  AVX2 = 0x4,
+  SSE42 = 0x8,
+  PCLMULQDQ = 0x10,
+  BMI1 = 0x20,
+  BMI2 = 0x40,
+  ALTIVEC = 0x80,
+  AVX512F = 0x100,
+  AVX512DQ = 0x200,
+  AVX512IFMA = 0x400,
+  AVX512PF = 0x800,
+  AVX512ER = 0x1000,
+  AVX512CD = 0x2000,
+  AVX512BW = 0x4000,
+  AVX512VL = 0x8000,
+  AVX512VBMI2 = 0x10000
+};
+
+#if defined(__PPC64__)
+
+static inline uint32_t detect_supported_architectures() {
+  return instruction_set::ALTIVEC;
+}
+
+#elif defined(__arm__) || defined(__aarch64__) // incl. armel, armhf, arm64
+
+#if defined(__ARM_NEON)
+
+static inline uint32_t detect_supported_architectures() {
+  return instruction_set::NEON;
+}
+
+#else // ARM without NEON
+
+static inline uint32_t detect_supported_architectures() {
+  return instruction_set::DEFAULT;
+}
+
+#endif
+
+#elif defined(__x86_64__) || defined(_M_AMD64) // x64
+
+
+namespace {
+// Can be found on Intel ISA Reference for CPUID
+constexpr uint32_t cpuid_avx2_bit = 1 << 5;         ///< @private Bit 5 of EBX for EAX=0x7
+constexpr uint32_t cpuid_bmi1_bit = 1 << 3;         ///< @private bit 3 of EBX for EAX=0x7
+constexpr uint32_t cpuid_bmi2_bit = 1 << 8;         ///< @private bit 8 of EBX for EAX=0x7
+constexpr uint32_t cpuid_avx512f_bit = 1 << 16;     ///< @private bit 16 of EBX for EAX=0x7
+constexpr uint32_t cpuid_avx512dq_bit = 1 << 17;    ///< @private bit 17 of EBX for EAX=0x7
+constexpr uint32_t cpuid_avx512ifma_bit = 1 << 21;  ///< @private bit 21 of EBX for EAX=0x7
+constexpr uint32_t cpuid_avx512pf_bit = 1 << 26;    ///< @private bit 26 of EBX for EAX=0x7
+constexpr uint32_t cpuid_avx512er_bit = 1 << 27;    ///< @private bit 27 of EBX for EAX=0x7
+constexpr uint32_t cpuid_avx512cd_bit = 1 << 28;    ///< @private bit 28 of EBX for EAX=0x7
+constexpr uint32_t cpuid_avx512bw_bit = 1 << 30;    ///< @private bit 30 of EBX for EAX=0x7
+constexpr uint32_t cpuid_avx512vl_bit = 1U << 31;    ///< @private bit 31 of EBX for EAX=0x7
+constexpr uint32_t cpuid_avx512vbmi2_bit = 1 << 6;  ///< @private bit 6 of ECX for EAX=0x7
+constexpr uint32_t cpuid_sse42_bit = 1 << 20;       ///< @private bit 20 of ECX for EAX=0x1
+constexpr uint32_t cpuid_pclmulqdq_bit = 1 << 1;    ///< @private bit  1 of ECX for EAX=0x1
+}
+
+
+
+static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx,
+                         uint32_t *edx) {
+#if defined(_MSC_VER)
+  int cpu_info[4];
+  __cpuid(cpu_info, *eax);
+  *eax = cpu_info[0];
+  *ebx = cpu_info[1];
+  *ecx = cpu_info[2];
+  *edx = cpu_info[3];
+#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
+  uint32_t level = *eax;
+  __get_cpuid(level, eax, ebx, ecx, edx);
+#else
+  uint32_t a = *eax, b, c = *ecx, d;
+  asm volatile("cpuid\n\t" : "+a"(a), "=b"(b), "+c"(c), "=d"(d));
+  *eax = a;
+  *ebx = b;
+  *ecx = c;
+  *edx = d;
+#endif
+}
+
+static inline uint32_t detect_supported_architectures() {
+  uint32_t eax, ebx, ecx, edx;
+  uint32_t host_isa = 0x0;
+
+  // ECX for EAX=0x7
+  eax = 0x7;
+  ecx = 0x0;
+  cpuid(&eax, &ebx, &ecx, &edx);
+  if (ebx & cpuid_avx2_bit) {
+    host_isa |= instruction_set::AVX2;
+  }
+  if (ebx & cpuid_bmi1_bit) {
+    host_isa |= instruction_set::BMI1;
+  }
+
+  if (ebx & cpuid_bmi2_bit) {
+    host_isa |= instruction_set::BMI2;
+  }
+
+  if (ebx & cpuid_avx512f_bit) {
+    host_isa |= instruction_set::AVX512F;
+  }
+
+  if (ebx & cpuid_avx512dq_bit) {
+    host_isa |= instruction_set::AVX512DQ;
+  }
+
+  if (ebx & cpuid_avx512ifma_bit) {
+    host_isa |= instruction_set::AVX512IFMA;
+  }
+
+  if (ebx & cpuid_avx512pf_bit) {
+    host_isa |= instruction_set::AVX512PF;
+  }
+
+  if (ebx & cpuid_avx512er_bit) {
+    host_isa |= instruction_set::AVX512ER;
+  }
+
+  if (ebx & cpuid_avx512cd_bit) {
+    host_isa |= instruction_set::AVX512CD;
+  }
+
+  if (ebx & cpuid_avx512bw_bit) {
+    host_isa |= instruction_set::AVX512BW;
+  }
+
+  if (ebx & cpuid_avx512vl_bit) {
+    host_isa |= instruction_set::AVX512VL;
+  }
+
+  if (ecx & cpuid_avx512vbmi2_bit) {
+    host_isa |= instruction_set::AVX512VBMI2;
+  }
+
+  // EBX for EAX=0x1
+  eax = 0x1;
+  cpuid(&eax, &ebx, &ecx, &edx);
+
+  if (ecx & cpuid_sse42_bit) {
+    host_isa |= instruction_set::SSE42;
+  }
+
+  if (ecx & cpuid_pclmulqdq_bit) {
+    host_isa |= instruction_set::PCLMULQDQ;
+  }
+
+  return host_isa;
+}
+#else // fallback
+
+
+static inline uint32_t detect_supported_architectures() {
+  return instruction_set::DEFAULT;
+}
+
+
+#endif // end SIMD extension detection code
+
+} // namespace internal
+} // namespace simdjson
+
+#endif // SIMDJSON_INTERNAL_ISADETECTION_H
+/* end file include/simdjson/internal/isadetection.h */
+#include <string>
+#include <atomic>
+#include <vector>
+
+namespace simdjson {
+
+/**
+ * Validate the UTF-8 string.
+ *
+ * @param buf the string to validate.
+ * @param len the length of the string in bytes.
+ * @return true if the string is valid UTF-8.
+ */
+simdjson_warn_unused bool validate_utf8(const char * buf, size_t len) noexcept;
+/**
+ * Validate the UTF-8 string.
+ *
+ * @param sv the string_view to validate.
+ * @return true if the string is valid UTF-8.
+ */
+simdjson_inline simdjson_warn_unused bool validate_utf8(const std::string_view sv) noexcept {
+  return validate_utf8(sv.data(), sv.size());
+}
+
+/**
+ * Validate the UTF-8 string.
+ *
+ * @param s the string to validate.
+ * @return true if the string is valid UTF-8.
+ */
+simdjson_inline simdjson_warn_unused bool validate_utf8(const std::string& s) noexcept {
+  return validate_utf8(s.data(), s.size());
+}
+
+namespace dom {
+  class document;
+} // namespace dom
+
+/**
+ * An implementation of simdjson for a particular CPU architecture.
+ *
+ * Also used to maintain the currently active implementation. The active implementation is
+ * automatically initialized on first use to the most advanced implementation supported by the host.
+ */
+class implementation {
+public:
+
+  /**
+   * The name of this implementation.
+   *
+   *     const implementation *impl = simdjson::get_active_implementation();
+   *     cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl;
+   *
+   * @return the name of the implementation, e.g. "haswell", "westmere", "arm64".
+   */
+  virtual const std::string &name() const { return _name; }
+
+  /**
+   * The description of this implementation.
+   *
+   *     const implementation *impl = simdjson::get_active_implementation();
+   *     cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl;
+   *
+   * @return the description of the implementation, e.g. "Intel/AMD AVX2", "Intel/AMD SSE4.2", "ARM NEON".
+   */
+  virtual const std::string &description() const { return _description; }
+
+  /**
+   * The instruction sets this implementation is compiled against
+   * and the current CPU match. This function may poll the current CPU/system
+   * and should therefore not be called too often if performance is a concern.
+   *
+   * @return true if the implementation can be safely used on the current system (determined at runtime).
+   */
+  bool supported_by_runtime_system() const;
+
+  /**
+   * @private For internal implementation use
+   *
+   * The instruction sets this implementation is compiled against.
+   *
+   * @return a mask of all required `internal::instruction_set::` values.
+   */
+  virtual uint32_t required_instruction_sets() const { return _required_instruction_sets; };
+
+  /**
+   * @private For internal implementation use
+   *
+   *     const implementation *impl = simdjson::get_active_implementation();
+   *     cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl;
+   *
+   * @param capacity The largest document that will be passed to the parser.
+   * @param max_depth The maximum JSON object/array nesting this parser is expected to handle.
+   * @param dst The place to put the resulting parser implementation.
+   * @return the error code, or SUCCESS if there was no error.
+   */
+  virtual error_code create_dom_parser_implementation(
+    size_t capacity,
+    size_t max_depth,
+    std::unique_ptr<internal::dom_parser_implementation> &dst
+  ) const noexcept = 0;
+
+  /**
+   * @private For internal implementation use
+   *
+   * Minify the input string assuming that it represents a JSON string, does not parse or validate.
+   *
+   * Overridden by each implementation.
+   *
+   * @param buf the json document to minify.
+   * @param len the length of the json document.
+   * @param dst the buffer to write the minified document to. *MUST* be allocated up to len + SIMDJSON_PADDING bytes.
+   * @param dst_len the number of bytes written. Output only.
+   * @return the error code, or SUCCESS if there was no error.
+   */
+  simdjson_warn_unused virtual error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept = 0;
+
+
+  /**
+   * Validate the UTF-8 string.
+   *
+   * Overridden by each implementation.
+   *
+   * @param buf the string to validate.
+   * @param len the length of the string in bytes.
+   * @return true if and only if the string is valid UTF-8.
+   */
+  simdjson_warn_unused virtual bool validate_utf8(const char *buf, size_t len) const noexcept = 0;
+
+protected:
+  /** @private Construct an implementation with the given name and description. For subclasses. */
+  simdjson_inline implementation(
+    std::string_view name,
+    std::string_view description,
+    uint32_t required_instruction_sets
+  ) :
+    _name(name),
+    _description(description),
+    _required_instruction_sets(required_instruction_sets)
+  {
+  }
+  virtual ~implementation()=default;
+
+private:
+  /**
+   * The name of this implementation.
+   */
+  const std::string _name;
+
+  /**
+   * The description of this implementation.
+   */
+  const std::string _description;
+
+  /**
+   * Instruction sets required for this implementation.
+   */
+  const uint32_t _required_instruction_sets;
+};
+
+/** @private */
+namespace internal {
+
+/**
+ * The list of available implementations compiled into simdjson.
+ */
+class available_implementation_list {
+public:
+  /** Get the list of available implementations compiled into simdjson */
+  simdjson_inline available_implementation_list() {}
+  /** Number of implementations */
+  size_t size() const noexcept;
+  /** STL const begin() iterator */
+  const implementation * const *begin() const noexcept;
+  /** STL const end() iterator */
+  const implementation * const *end() const noexcept;
+
+  /**
+   * Get the implementation with the given name.
+   *
+   * Case sensitive.
+   *
+   *     const implementation *impl = simdjson::get_available_implementations()["westmere"];
+   *     if (!impl) { exit(1); }
+   *     if (!imp->supported_by_runtime_system()) { exit(1); }
+   *     simdjson::get_active_implementation() = impl;
+   *
+   * @param name the implementation to find, e.g. "westmere", "haswell", "arm64"
+   * @return the implementation, or nullptr if the parse failed.
+   */
+  const implementation * operator[](const std::string_view &name) const noexcept {
+    for (const implementation * impl : *this) {
+      if (impl->name() == name) { return impl; }
+    }
+    return nullptr;
+  }
+
+  /**
+   * Detect the most advanced implementation supported by the current host.
+   *
+   * This is used to initialize the implementation on startup.
+   *
+   *     const implementation *impl = simdjson::available_implementation::detect_best_supported();
+   *     simdjson::get_active_implementation() = impl;
+   *
+   * @return the most advanced supported implementation for the current host, or an
+   *         implementation that returns UNSUPPORTED_ARCHITECTURE if there is no supported
+   *         implementation. Will never return nullptr.
+   */
+  const implementation *detect_best_supported() const noexcept;
+};
+
+template<typename T>
+class atomic_ptr {
+public:
+  atomic_ptr(T *_ptr) : ptr{_ptr} {}
+
+  operator const T*() const { return ptr.load(); }
+  const T& operator*() const { return *ptr; }
+  const T* operator->() const { return ptr.load(); }
+
+  operator T*() { return ptr.load(); }
+  T& operator*() { return *ptr; }
+  T* operator->() { return ptr.load(); }
+  atomic_ptr& operator=(T *_ptr) { ptr = _ptr; return *this; }
+
+private:
+  std::atomic<T*> ptr;
+};
+
+} // namespace internal
+
+/**
+ * The list of available implementations compiled into simdjson.
+ */
+extern SIMDJSON_DLLIMPORTEXPORT const internal::available_implementation_list& get_available_implementations();
+
+/**
+  * The active implementation.
+  *
+  * Automatically initialized on first use to the most advanced implementation supported by this hardware.
+  */
+extern SIMDJSON_DLLIMPORTEXPORT internal::atomic_ptr<const implementation>& get_active_implementation();
+
+} // namespace simdjson
+
+#endif // SIMDJSON_IMPLEMENTATION_H
+/* end file include/simdjson/implementation.h */
+
+// Inline functions
+/* begin file include/simdjson/error-inl.h */
+#ifndef SIMDJSON_INLINE_ERROR_H
+#define SIMDJSON_INLINE_ERROR_H
+
+#include <cstring>
+#include <string>
+#include <utility>
+
+namespace simdjson {
+namespace internal {
+  // We store the error code so we can validate the error message is associated with the right code
+  struct error_code_info {
+    error_code code;
+    const char* message; // do not use a fancy std::string where a simple C string will do (no alloc, no destructor)
+  };
+  // These MUST match the codes in error_code. We check this constraint in basictests.
+  extern SIMDJSON_DLLIMPORTEXPORT const error_code_info error_codes[];
+} // namespace internal
+
+
+inline const char *error_message(error_code error) noexcept {
+  // If you're using error_code, we're trusting you got it from the enum.
+  return internal::error_codes[int(error)].message;
+}
+
+// deprecated function
+#ifndef SIMDJSON_DISABLE_DEPRECATED_API
+inline const std::string error_message(int error) noexcept {
+  if (error < 0 || error >= error_code::NUM_ERROR_CODES) {
+    return internal::error_codes[UNEXPECTED_ERROR].message;
+  }
+  return internal::error_codes[error].message;
+}
+#endif // SIMDJSON_DISABLE_DEPRECATED_API
+
+inline std::ostream& operator<<(std::ostream& out, error_code error) noexcept {
+  return out << error_message(error);
+}
+
+namespace internal {
+
+//
+// internal::simdjson_result_base<T> inline implementation
+//
+
+template<typename T>
+simdjson_inline void simdjson_result_base<T>::tie(T &value, error_code &error) && noexcept {
+  error = this->second;
+  if (!error) {
+    value = std::forward<simdjson_result_base<T>>(*this).first;
+  }
+}
+
+template<typename T>
+simdjson_warn_unused simdjson_inline error_code simdjson_result_base<T>::get(T &value) && noexcept {
+  error_code error;
+  std::forward<simdjson_result_base<T>>(*this).tie(value, error);
+  return error;
+}
+
+template<typename T>
+simdjson_inline error_code simdjson_result_base<T>::error() const noexcept {
+  return this->second;
+}
+
+#if SIMDJSON_EXCEPTIONS
+
+template<typename T>
+simdjson_inline T& simdjson_result_base<T>::value() & noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return this->first;
+}
+
+template<typename T>
+simdjson_inline T&& simdjson_result_base<T>::value() && noexcept(false) {
+  return std::forward<simdjson_result_base<T>>(*this).take_value();
+}
+
+template<typename T>
+simdjson_inline T&& simdjson_result_base<T>::take_value() && noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return std::forward<T>(this->first);
+}
+
+template<typename T>
+simdjson_inline simdjson_result_base<T>::operator T&&() && noexcept(false) {
+  return std::forward<simdjson_result_base<T>>(*this).take_value();
+}
+
+#endif // SIMDJSON_EXCEPTIONS
+
+template<typename T>
+simdjson_inline const T& simdjson_result_base<T>::value_unsafe() const& noexcept {
+  return this->first;
+}
+
+template<typename T>
+simdjson_inline T&& simdjson_result_base<T>::value_unsafe() && noexcept {
+  return std::forward<T>(this->first);
+}
+
+template<typename T>
+simdjson_inline simdjson_result_base<T>::simdjson_result_base(T &&value, error_code error) noexcept
+    : std::pair<T, error_code>(std::forward<T>(value), error) {}
+template<typename T>
+simdjson_inline simdjson_result_base<T>::simdjson_result_base(error_code error) noexcept
+    : simdjson_result_base(T{}, error) {}
+template<typename T>
+simdjson_inline simdjson_result_base<T>::simdjson_result_base(T &&value) noexcept
+    : simdjson_result_base(std::forward<T>(value), SUCCESS) {}
+template<typename T>
+simdjson_inline simdjson_result_base<T>::simdjson_result_base() noexcept
+    : simdjson_result_base(T{}, UNINITIALIZED) {}
+
+} // namespace internal
+
+///
+/// simdjson_result<T> inline implementation
+///
+
+template<typename T>
+simdjson_inline void simdjson_result<T>::tie(T &value, error_code &error) && noexcept {
+  std::forward<internal::simdjson_result_base<T>>(*this).tie(value, error);
+}
+
+template<typename T>
+simdjson_warn_unused simdjson_inline error_code simdjson_result<T>::get(T &value) && noexcept {
+  return std::forward<internal::simdjson_result_base<T>>(*this).get(value);
+}
+
+template<typename T>
+simdjson_inline error_code simdjson_result<T>::error() const noexcept {
+  return internal::simdjson_result_base<T>::error();
+}
+
+#if SIMDJSON_EXCEPTIONS
+
+template<typename T>
+simdjson_inline T& simdjson_result<T>::value() & noexcept(false) {
+  return internal::simdjson_result_base<T>::value();
+}
+
+template<typename T>
+simdjson_inline T&& simdjson_result<T>::value() && noexcept(false) {
+  return std::forward<internal::simdjson_result_base<T>>(*this).value();
+}
+
+template<typename T>
+simdjson_inline T&& simdjson_result<T>::take_value() && noexcept(false) {
+  return std::forward<internal::simdjson_result_base<T>>(*this).take_value();
+}
+
+template<typename T>
+simdjson_inline simdjson_result<T>::operator T&&() && noexcept(false) {
+  return std::forward<internal::simdjson_result_base<T>>(*this).take_value();
+}
+
+#endif // SIMDJSON_EXCEPTIONS
+
+template<typename T>
+simdjson_inline const T& simdjson_result<T>::value_unsafe() const& noexcept {
+  return internal::simdjson_result_base<T>::value_unsafe();
+}
+
+template<typename T>
+simdjson_inline T&& simdjson_result<T>::value_unsafe() && noexcept {
+  return std::forward<internal::simdjson_result_base<T>>(*this).value_unsafe();
+}
+
+template<typename T>
+simdjson_inline simdjson_result<T>::simdjson_result(T &&value, error_code error) noexcept
+    : internal::simdjson_result_base<T>(std::forward<T>(value), error) {}
+template<typename T>
+simdjson_inline simdjson_result<T>::simdjson_result(error_code error) noexcept
+    : internal::simdjson_result_base<T>(error) {}
+template<typename T>
+simdjson_inline simdjson_result<T>::simdjson_result(T &&value) noexcept
+    : internal::simdjson_result_base<T>(std::forward<T>(value)) {}
+template<typename T>
+simdjson_inline simdjson_result<T>::simdjson_result() noexcept
+    : internal::simdjson_result_base<T>() {}
+
+} // namespace simdjson
+
+#endif // SIMDJSON_INLINE_ERROR_H
+/* end file include/simdjson/error-inl.h */
+/* begin file include/simdjson/padded_string-inl.h */
+#ifndef SIMDJSON_INLINE_PADDED_STRING_H
+#define SIMDJSON_INLINE_PADDED_STRING_H
+
+
+#include <climits>
+#include <cstring>
+#include <memory>
+#include <string>
+
+namespace simdjson {
+namespace internal {
+
+// The allocate_padded_buffer function is a low-level function to allocate memory
+// with padding so we can read past the "length" bytes safely. It is used by
+// the padded_string class automatically. It returns nullptr in case
+// of error: the caller should check for a null pointer.
+// The length parameter is the maximum size in bytes of the string.
+// The caller is responsible to free the memory (e.g., delete[] (...)).
+inline char *allocate_padded_buffer(size_t length) noexcept {
+  const size_t totalpaddedlength = length + SIMDJSON_PADDING;
+  if(totalpaddedlength<length) {
+    // overflow
+    return nullptr;
+  }
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+  // avoid getting out of memory
+  if (totalpaddedlength>(1UL<<20)) {
+    return nullptr;
+  }
+#endif
+
+  char *padded_buffer = new (std::nothrow) char[totalpaddedlength];
+  if (padded_buffer == nullptr) {
+    return nullptr;
+  }
+  // We write zeroes in the padded region to avoid having uninitized
+  // garbage. If nothing else, garbage getting read might trigger a
+  // warning in a memory checking.
+  std::memset(padded_buffer + length, 0, totalpaddedlength - length);
+  return padded_buffer;
+} // allocate_padded_buffer()
+
+} // namespace internal
+
+
+inline padded_string::padded_string() noexcept = default;
+inline padded_string::padded_string(size_t length) noexcept
+    : viable_size(length), data_ptr(internal::allocate_padded_buffer(length)) {
+}
+inline padded_string::padded_string(const char *data, size_t length) noexcept
+    : viable_size(length), data_ptr(internal::allocate_padded_buffer(length)) {
+  if ((data != nullptr) && (data_ptr != nullptr)) {
+    std::memcpy(data_ptr, data, length);
+  }
+}
+// note: do not pass std::string arguments by value
+inline padded_string::padded_string(const std::string & str_ ) noexcept
+    : viable_size(str_.size()), data_ptr(internal::allocate_padded_buffer(str_.size())) {
+  if (data_ptr != nullptr) {
+    std::memcpy(data_ptr, str_.data(), str_.size());
+  }
+}
+// note: do pass std::string_view arguments by value
+inline padded_string::padded_string(std::string_view sv_) noexcept
+    : viable_size(sv_.size()), data_ptr(internal::allocate_padded_buffer(sv_.size())) {
+  if(simdjson_unlikely(!data_ptr)) {
+    //allocation failed or zero size
+    viable_size=0;
+    return;
+  }
+  if (sv_.size()) {
+    std::memcpy(data_ptr, sv_.data(), sv_.size());
+  }
+}
+inline padded_string::padded_string(padded_string &&o) noexcept
+    : viable_size(o.viable_size), data_ptr(o.data_ptr) {
+  o.data_ptr = nullptr; // we take ownership
+}
+
+inline padded_string &padded_string::operator=(padded_string &&o) noexcept {
+  delete[] data_ptr;
+  data_ptr = o.data_ptr;
+  viable_size = o.viable_size;
+  o.data_ptr = nullptr; // we take ownership
+  o.viable_size = 0;
+  return *this;
+}
+
+inline void padded_string::swap(padded_string &o) noexcept {
+  size_t tmp_viable_size = viable_size;
+  char *tmp_data_ptr = data_ptr;
+  viable_size = o.viable_size;
+  data_ptr = o.data_ptr;
+  o.data_ptr = tmp_data_ptr;
+  o.viable_size = tmp_viable_size;
+}
+
+inline padded_string::~padded_string() noexcept {
+  delete[] data_ptr;
+}
+
+inline size_t padded_string::size() const noexcept { return viable_size; }
+
+inline size_t padded_string::length() const noexcept { return viable_size; }
+
+inline const char *padded_string::data() const noexcept { return data_ptr; }
+
+inline char *padded_string::data() noexcept { return data_ptr; }
+
+inline padded_string::operator std::string_view() const { return std::string_view(data(), length()); }
+
+inline padded_string::operator padded_string_view() const noexcept {
+  return padded_string_view(data(), length(), length() + SIMDJSON_PADDING);
+}
+
+inline simdjson_result<padded_string> padded_string::load(std::string_view filename) noexcept {
+  // Open the file
+  SIMDJSON_PUSH_DISABLE_WARNINGS
+  SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe
+  std::FILE *fp = std::fopen(filename.data(), "rb");
+  SIMDJSON_POP_DISABLE_WARNINGS
+
+  if (fp == nullptr) {
+    return IO_ERROR;
+  }
+
+  // Get the file size
+  int ret;
+#if defined(SIMDJSON_VISUAL_STUDIO) && !SIMDJSON_IS_32BITS
+  ret = _fseeki64(fp, 0, SEEK_END);
+#else
+  ret = std::fseek(fp, 0, SEEK_END);
+#endif // _WIN64
+  if(ret < 0) {
+    std::fclose(fp);
+    return IO_ERROR;
+  }
+#if defined(SIMDJSON_VISUAL_STUDIO) && !SIMDJSON_IS_32BITS
+  __int64 llen = _ftelli64(fp);
+  if(llen == -1L) {
+    std::fclose(fp);
+    return IO_ERROR;
+  }
+#else
+  long llen = std::ftell(fp);
+  if((llen < 0) || (llen == LONG_MAX)) {
+    std::fclose(fp);
+    return IO_ERROR;
+  }
+#endif
+
+  // Allocate the padded_string
+  size_t len = static_cast<size_t>(llen);
+  padded_string s(len);
+  if (s.data() == nullptr) {
+    std::fclose(fp);
+    return MEMALLOC;
+  }
+
+  // Read the padded_string
+  std::rewind(fp);
+  size_t bytes_read = std::fread(s.data(), 1, len, fp);
+  if (std::fclose(fp) != 0 || bytes_read != len) {
+    return IO_ERROR;
+  }
+
+  return s;
+}
+
+} // namespace simdjson
+
+#endif // SIMDJSON_INLINE_PADDED_STRING_H
+/* end file include/simdjson/padded_string-inl.h */
+/* begin file include/simdjson/padded_string_view-inl.h */
+#ifndef SIMDJSON_PADDED_STRING_VIEW_INL_H
+#define SIMDJSON_PADDED_STRING_VIEW_INL_H
+
+
+#include <climits>
+#include <cstring>
+#include <memory>
+#include <string>
+
+namespace simdjson {
+
+inline padded_string_view::padded_string_view(const char* s, size_t len, size_t capacity) noexcept
+  : std::string_view(s, len), _capacity(capacity)
+{
+}
+
+inline padded_string_view::padded_string_view(const uint8_t* s, size_t len, size_t capacity) noexcept
+  : padded_string_view(reinterpret_cast<const char*>(s), len, capacity)
+{
+}
+
+inline padded_string_view::padded_string_view(const std::string &s) noexcept
+  : std::string_view(s), _capacity(s.capacity())
+{
+}
+
+inline padded_string_view::padded_string_view(std::string_view s, size_t capacity) noexcept
+  : std::string_view(s), _capacity(capacity)
+{
+}
+
+inline size_t padded_string_view::capacity() const noexcept { return _capacity; }
+
+inline size_t padded_string_view::padding() const noexcept { return capacity() - length(); }
+
+} // namespace simdjson
+
+#endif // SIMDJSON_PADDED_STRING_VIEW_INL_H
+/* end file include/simdjson/padded_string_view-inl.h */
+
+SIMDJSON_POP_DISABLE_WARNINGS
+
+#endif // SIMDJSON_BASE_H
+/* end file include/simdjson/base.h */
+
+SIMDJSON_PUSH_DISABLE_WARNINGS
+SIMDJSON_DISABLE_UNDESIRED_WARNINGS
+
+/* begin file include/simdjson/dom/array.h */
+#ifndef SIMDJSON_DOM_ARRAY_H
+#define SIMDJSON_DOM_ARRAY_H
+
+/* begin file include/simdjson/internal/tape_ref.h */
+#ifndef SIMDJSON_INTERNAL_TAPE_REF_H
+#define SIMDJSON_INTERNAL_TAPE_REF_H
+
+/* begin file include/simdjson/internal/tape_type.h */
+#ifndef SIMDJSON_INTERNAL_TAPE_TYPE_H
+#define SIMDJSON_INTERNAL_TAPE_TYPE_H
+
+namespace simdjson {
+namespace internal {
+
+/**
+ * The possible types in the tape.
+ */
+enum class tape_type {
+  ROOT = 'r',
+  START_ARRAY = '[',
+  START_OBJECT = '{',
+  END_ARRAY = ']',
+  END_OBJECT = '}',
+  STRING = '"',
+  INT64 = 'l',
+  UINT64 = 'u',
+  DOUBLE = 'd',
+  TRUE_VALUE = 't',
+  FALSE_VALUE = 'f',
+  NULL_VALUE = 'n'
+}; // enum class tape_type
+
+} // namespace internal
+} // namespace simdjson
+
+#endif // SIMDJSON_INTERNAL_TAPE_TYPE_H
+/* end file include/simdjson/internal/tape_type.h */
+
+namespace simdjson {
+
+namespace dom {
+  class document;
+}
+
+namespace internal {
+
+constexpr const uint64_t JSON_VALUE_MASK = 0x00FFFFFFFFFFFFFF;
+constexpr const uint32_t JSON_COUNT_MASK = 0xFFFFFF;
+
+/**
+ * A reference to an element on the tape. Internal only.
+ */
+class tape_ref {
+public:
+  simdjson_inline tape_ref() noexcept;
+  simdjson_inline tape_ref(const dom::document *doc, size_t json_index) noexcept;
+  inline size_t after_element() const noexcept;
+  simdjson_inline tape_type tape_ref_type() const noexcept;
+  simdjson_inline uint64_t tape_value() const noexcept;
+  simdjson_inline bool is_double() const noexcept;
+  simdjson_inline bool is_int64() const noexcept;
+  simdjson_inline bool is_uint64() const noexcept;
+  simdjson_inline bool is_false() const noexcept;
+  simdjson_inline bool is_true() const noexcept;
+  simdjson_inline bool is_null_on_tape() const noexcept;// different name to avoid clash with is_null.
+  simdjson_inline uint32_t matching_brace_index() const noexcept;
+  simdjson_inline uint32_t scope_count() const noexcept;
+  template<typename T>
+  simdjson_inline T next_tape_value() const noexcept;
+  simdjson_inline uint32_t get_string_length() const noexcept;
+  simdjson_inline const char * get_c_str() const noexcept;
+  inline std::string_view get_string_view() const noexcept;
+  simdjson_inline bool is_document_root() const noexcept;
+  simdjson_inline bool usable() const noexcept;
+
+  /** The document this element references. */
+  const dom::document *doc;
+
+  /** The index of this element on `doc.tape[]` */
+  size_t json_index;
+};
+
+} // namespace internal
+} // namespace simdjson
+
+#endif // SIMDJSON_INTERNAL_TAPE_REF_H
+/* end file include/simdjson/internal/tape_ref.h */
+
+namespace simdjson {
+
+namespace internal {
+template<typename T>
+class string_builder;
+}
+namespace dom {
+
+class document;
+class element;
+
+/**
+ * JSON array.
+ */
+class array {
+public:
+  /** Create a new, invalid array */
+  simdjson_inline array() noexcept;
+
+  class iterator {
+  public:
+    using value_type = element;
+    using difference_type = std::ptrdiff_t;
+
+    /**
+     * Get the actual value
+     */
+    inline value_type operator*() const noexcept;
+    /**
+     * Get the next value.
+     *
+     * Part of the std::iterator interface.
+     */
+    inline iterator& operator++() noexcept;
+    /**
+     * Get the next value.
+     *
+     * Part of the  std::iterator interface.
+     */
+    inline iterator operator++(int) noexcept;
+    /**
+     * Check if these values come from the same place in the JSON.
+     *
+     * Part of the std::iterator interface.
+     */
+    inline bool operator!=(const iterator& other) const noexcept;
+    inline bool operator==(const iterator& other) const noexcept;
+
+    inline bool operator<(const iterator& other) const noexcept;
+    inline bool operator<=(const iterator& other) const noexcept;
+    inline bool operator>=(const iterator& other) const noexcept;
+    inline bool operator>(const iterator& other) const noexcept;
+
+    iterator() noexcept = default;
+    iterator(const iterator&) noexcept = default;
+    iterator& operator=(const iterator&) noexcept = default;
+  private:
+    simdjson_inline iterator(const internal::tape_ref &tape) noexcept;
+    internal::tape_ref tape;
+    friend class array;
+  };
+
+  /**
+   * Return the first array element.
+   *
+   * Part of the std::iterable interface.
+   */
+  inline iterator begin() const noexcept;
+  /**
+   * One past the last array element.
+   *
+   * Part of the std::iterable interface.
+   */
+  inline iterator end() const noexcept;
+  /**
+   * Get the size of the array (number of immediate children).
+   * It is a saturated value with a maximum of 0xFFFFFF: if the value
+   * is 0xFFFFFF then the size is 0xFFFFFF or greater.
+   */
+  inline size_t size() const noexcept;
+  /**
+   * Get the total number of slots used by this array on the tape.
+   *
+   * Note that this is not the same thing as `size()`, which reports the
+   * number of actual elements within an array (not counting its children).
+   *
+   * Since an element can use 1 or 2 slots on the tape, you can only use this
+   * to figure out the total size of an array (including its children,
+   * recursively) if you know its structure ahead of time.
+   **/
+  inline size_t number_of_slots() const noexcept;
+  /**
+   * Get the value associated with the given JSON pointer.  We use the RFC 6901
+   * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node
+   * as the root of its own JSON document.
+   *
+   *   dom::parser parser;
+   *   array a = parser.parse(R"([ { "foo": { "a": [ 10, 20, 30 ] }} ])"_padded);
+   *   a.at_pointer("/0/foo/a/1") == 20
+   *   a.at_pointer("0")["foo"]["a"].at(1) == 20
+   *
+   * @return The value associated with the given JSON pointer, or:
+   *         - NO_SUCH_FIELD if a field does not exist in an object
+   *         - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length
+   *         - INCORRECT_TYPE if a non-integer is used to access an array
+   *         - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed
+   */
+  inline simdjson_result<element> at_pointer(std::string_view json_pointer) const noexcept;
+
+  /**
+   * Get the value at the given index. This function has linear-time complexity and
+   * is equivalent to the following:
+   *
+   *    size_t i=0;
+   *    for (auto element : *this) {
+   *      if (i == index) { return element; }
+   *      i++;
+   *    }
+   *    return INDEX_OUT_OF_BOUNDS;
+   *
+   * Avoid calling the at() function repeatedly.
+   *
+   * @return The value at the given index, or:
+   *         - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length
+   */
+  inline simdjson_result<element> at(size_t index) const noexcept;
+
+private:
+  simdjson_inline array(const internal::tape_ref &tape) noexcept;
+  internal::tape_ref tape;
+  friend class element;
+  friend struct simdjson_result<element>;
+  template<typename T>
+  friend class simdjson::internal::string_builder;
+};
+
+
+} // namespace dom
+
+/** The result of a JSON conversion that may fail. */
+template<>
+struct simdjson_result<dom::array> : public internal::simdjson_result_base<dom::array> {
+public:
+  simdjson_inline simdjson_result() noexcept; ///< @private
+  simdjson_inline simdjson_result(dom::array value) noexcept; ///< @private
+  simdjson_inline simdjson_result(error_code error) noexcept; ///< @private
+
+  inline simdjson_result<dom::element> at_pointer(std::string_view json_pointer) const noexcept;
+  inline simdjson_result<dom::element> at(size_t index) const noexcept;
+
+#if SIMDJSON_EXCEPTIONS
+  inline dom::array::iterator begin() const noexcept(false);
+  inline dom::array::iterator end() const noexcept(false);
+  inline size_t size() const noexcept(false);
+#endif // SIMDJSON_EXCEPTIONS
+};
+
+
+
+} // namespace simdjson
+
+#if defined(__cpp_lib_ranges)
+#include <ranges>
+
+namespace std {
+namespace ranges {
+template<>
+inline constexpr bool enable_view<simdjson::dom::array> = true;
+#if SIMDJSON_EXCEPTIONS
+template<>
+inline constexpr bool enable_view<simdjson::simdjson_result<simdjson::dom::array>> = true;
+#endif // SIMDJSON_EXCEPTIONS
+} // namespace ranges
+} // namespace std
+#endif // defined(__cpp_lib_ranges)
+
+#endif // SIMDJSON_DOM_ARRAY_H
+/* end file include/simdjson/dom/array.h */
+/* begin file include/simdjson/dom/document_stream.h */
+#ifndef SIMDJSON_DOCUMENT_STREAM_H
+#define SIMDJSON_DOCUMENT_STREAM_H
+
+/* begin file include/simdjson/dom/parser.h */
+#ifndef SIMDJSON_DOM_PARSER_H
+#define SIMDJSON_DOM_PARSER_H
+
+/* begin file include/simdjson/dom/document.h */
+#ifndef SIMDJSON_DOM_DOCUMENT_H
+#define SIMDJSON_DOM_DOCUMENT_H
+
+#include <memory>
+#include <ostream>
+
+namespace simdjson {
+namespace dom {
+
+class element;
+
+/**
+ * A parsed JSON document.
+ *
+ * This class cannot be copied, only moved, to avoid unintended allocations.
+ */
+class document {
+public:
+  /**
+   * Create a document container with zero capacity.
+   *
+   * The parser will allocate capacity as needed.
+   */
+  document() noexcept = default;
+  ~document() noexcept = default;
+
+  /**
+   * Take another document's buffers.
+   *
+   * @param other The document to take. Its capacity is zeroed and it is invalidated.
+   */
+  document(document &&other) noexcept = default;
+  /** @private */
+  document(const document &) = delete; // Disallow copying
+  /**
+   * Take another document's buffers.
+   *
+   * @param other The document to take. Its capacity is zeroed.
+   */
+  document &operator=(document &&other) noexcept = default;
+  /** @private */
+  document &operator=(const document &) = delete; // Disallow copying
+
+  /**
+   * Get the root element of this document as a JSON array.
+   */
+  element root() const noexcept;
+
+  /**
+   * @private Dump the raw tape for debugging.
+   *
+   * @param os the stream to output to.
+   * @return false if the tape is likely wrong (e.g., you did not parse a valid JSON).
+   */
+  bool dump_raw_tape(std::ostream &os) const noexcept;
+
+  /** @private Structural values. */
+  std::unique_ptr<uint64_t[]> tape{};
+
+  /** @private String values.
+   *
+   * Should be at least byte_capacity.
+   */
+  std::unique_ptr<uint8_t[]> string_buf{};
+  /** @private Allocate memory to support
+   * input JSON documents of up to len bytes.
+   *
+   * When calling this function, you lose
+   * all the data.
+   *
+   * The memory allocation is strict: you
+   * can you use this function to increase
+   * or lower the amount of allocated memory.
+   * Passsing zero clears the memory.
+   */
+  error_code allocate(size_t len) noexcept;
+  /** @private Capacity in bytes, in terms
+   * of how many bytes of input JSON we can
+   * support.
+   */
+  size_t capacity() const noexcept;
+
+
+private:
+  size_t allocated_capacity{0};
+  friend class parser;
+}; // class document
+
+} // namespace dom
+} // namespace simdjson
+
+#endif // SIMDJSON_DOM_DOCUMENT_H
+/* end file include/simdjson/dom/document.h */
+#include <memory>
+#include <ostream>
+#include <string>
+
+namespace simdjson {
+
+namespace dom {
+
+class document_stream;
+class element;
+
+/** The default batch size for parser.parse_many() and parser.load_many() */
+static constexpr size_t DEFAULT_BATCH_SIZE = 1000000;
+/**
+ * Some adversary might try to set the batch size to 0 or 1, which might cause problems.
+ * We set a minimum of 32B since anything else is highly likely to be an error. In practice,
+ * most users will want a much larger batch size.
+ *
+ * All non-negative MINIMAL_BATCH_SIZE values should be 'safe' except that, obviously, no JSON
+ * document can ever span 0 or 1 byte and that very large values would create memory allocation issues.
+ */
+static constexpr size_t MINIMAL_BATCH_SIZE = 32;
+
+/**
+ * It is wasteful to allocate memory for tiny documents (e.g., 4 bytes).
+ */
+static constexpr size_t MINIMAL_DOCUMENT_CAPACITY = 32;
+
+/**
+ * A persistent document parser.
+ *
+ * The parser is designed to be reused, holding the internal buffers necessary to do parsing,
+ * as well as memory for a single document. The parsed document is overwritten on each parse.
+ *
+ * This class cannot be copied, only moved, to avoid unintended allocations.
+ *
+ * @note Moving a parser instance may invalidate "dom::element" instances. If you need to
+ * preserve both the "dom::element" instances and the parser, consider wrapping the parser
+ * instance in a std::unique_ptr instance:
+ *
+ *   std::unique_ptr<dom::parser> parser(new dom::parser{});
+ *   auto error = parser->load(f).get(root);
+ *
+ * You can then move std::unique_ptr safely.
+ *
+ * @note This is not thread safe: one parser cannot produce two documents at the same time!
+ */
+class parser {
+public:
+  /**
+   * Create a JSON parser.
+   *
+   * The new parser will have zero capacity.
+   *
+   * @param max_capacity The maximum document length the parser can automatically handle. The parser
+   *    will allocate more capacity on an as needed basis (when it sees documents too big to handle)
+   *    up to this amount. The parser still starts with zero capacity no matter what this number is:
+   *    to allocate an initial capacity, call allocate() after constructing the parser.
+   *    Defaults to SIMDJSON_MAXSIZE_BYTES (the largest single document simdjson can process).
+   */
+  simdjson_inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept;
+  /**
+   * Take another parser's buffers and state.
+   *
+   * @param other The parser to take. Its capacity is zeroed.
+   */
+  simdjson_inline parser(parser &&other) noexcept;
+  parser(const parser &) = delete; ///< @private Disallow copying
+  /**
+   * Take another parser's buffers and state.
+   *
+   * @param other The parser to take. Its capacity is zeroed.
+   */
+  simdjson_inline parser &operator=(parser &&other) noexcept;
+  parser &operator=(const parser &) = delete; ///< @private Disallow copying
+
+  /** Deallocate the JSON parser. */
+  ~parser()=default;
+
+  /**
+   * Load a JSON document from a file and return a reference to it.
+   *
+   *   dom::parser parser;
+   *   const element doc = parser.load("jsonexamples/twitter.json");
+   *
+   * The function is eager: the file's content is loaded in memory inside the parser instance
+   * and immediately parsed. The file can be deleted after the  `parser.load` call.
+   *
+   * ### IMPORTANT: Document Lifetime
+   *
+   * The JSON document still lives in the parser: this is the most efficient way to parse JSON
+   * documents because it reuses the same buffers, but you *must* use the document before you
+   * destroy the parser or call parse() again.
+   *
+   * Moving the parser instance is safe, but it invalidates the element instances. You may store
+   * the parser instance without moving it by wrapping it inside an `unique_ptr` instance like
+   * so: `std::unique_ptr<dom::parser> parser(new dom::parser{});`.
+   *
+   * ### Parser Capacity
+   *
+   * If the parser's current capacity is less than the file length, it will allocate enough capacity
+   * to handle it (up to max_capacity).
+   *
+   * @param path The path to load.
+   * @return The document, or an error:
+   *         - IO_ERROR if there was an error opening or reading the file.
+   *           Be mindful that on some 32-bit systems,
+   *           the file size might be limited to 2 GB.
+   *         - MEMALLOC if the parser does not have enough capacity and memory allocation fails.
+   *         - CAPACITY if the parser does not have enough capacity and len > max_capacity.
+   *         - other json errors if parsing fails. You should not rely on these errors to always the same for the
+   *           same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware).
+   */
+  inline simdjson_result<element> load(const std::string &path) & noexcept;
+  inline simdjson_result<element> load(const std::string &path) &&  = delete ;
+  /**
+   * Parse a JSON document and return a temporary reference to it.
+   *
+   *   dom::parser parser;
+   *   element doc_root = parser.parse(buf, len);
+   *
+   * The function eagerly parses the input: the input can be modified and discarded after
+   * the `parser.parse(buf, len)` call has completed.
+   *
+   * ### IMPORTANT: Document Lifetime
+   *
+   * The JSON document still lives in the parser: this is the most efficient way to parse JSON
+   * documents because it reuses the same buffers, but you *must* use the document before you
+   * destroy the parser or call parse() again.
+   *
+   * Moving the parser instance is safe, but it invalidates the element instances. You may store
+   * the parser instance without moving it by wrapping it inside an `unique_ptr` instance like
+   * so: `std::unique_ptr<dom::parser> parser(new dom::parser{});`.
+   *
+   * ### REQUIRED: Buffer Padding
+   *
+   * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
+   * those bytes are initialized to, as long as they are allocated.
+   *
+   * If realloc_if_needed is true (the default), it is assumed that the buffer does *not* have enough padding,
+   * and it is copied into an enlarged temporary buffer before parsing. Thus the following is safe:
+   *
+   *   const char *json      = R"({"key":"value"})";
+   *   const size_t json_len = std::strlen(json);
+   *   simdjson::dom::parser parser;
+   *   simdjson::dom::element element = parser.parse(json, json_len);
+   *
+   * If you set realloc_if_needed to false (e.g., parser.parse(json, json_len, false)),
+   * you must provide a buffer with at least SIMDJSON_PADDING extra bytes at the end.
+   * The benefit of setting realloc_if_needed to false is that you avoid a temporary
+   * memory allocation and a copy.
+   *
+   * The padded bytes may be read. It is not important how you initialize
+   * these bytes though we recommend a sensible default like null character values or spaces.
+   * For example, the following low-level code is safe:
+   *
+   *   const char *json      = R"({"key":"value"})";
+   *   const size_t json_len = std::strlen(json);
+   *   std::unique_ptr<char[]> padded_json_copy{new char[json_len + SIMDJSON_PADDING]};
+   *   std::memcpy(padded_json_copy.get(), json, json_len);
+   *   std::memset(padded_json_copy.get() + json_len, '\0', SIMDJSON_PADDING);
+   *   simdjson::dom::parser parser;
+   *   simdjson::dom::element element = parser.parse(padded_json_copy.get(), json_len, false);
+   *
+   * ### Parser Capacity
+   *
+   * If the parser's current capacity is less than len, it will allocate enough capacity
+   * to handle it (up to max_capacity).
+   *
+   * @param buf The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, unless
+   *            realloc_if_needed is true.
+   * @param len The length of the JSON.
+   * @param realloc_if_needed Whether to reallocate and enlarge the JSON buffer to add padding.
+   * @return An element pointing at the root of the document, or an error:
+   *         - MEMALLOC if realloc_if_needed is true or the parser does not have enough capacity,
+   *           and memory allocation fails.
+   *         - CAPACITY if the parser does not have enough capacity and len > max_capacity.
+   *         - other json errors if parsing fails. You should not rely on these errors to always the same for the
+   *           same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware).
+   */
+  inline simdjson_result<element> parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true) & noexcept;
+  inline simdjson_result<element> parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true) && =delete;
+  /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */
+  simdjson_inline simdjson_result<element> parse(const char *buf, size_t len, bool realloc_if_needed = true) & noexcept;
+  simdjson_inline simdjson_result<element> parse(const char *buf, size_t len, bool realloc_if_needed = true) && =delete;
+  /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */
+  simdjson_inline simdjson_result<element> parse(const std::string &s) & noexcept;
+  simdjson_inline simdjson_result<element> parse(const std::string &s) && =delete;
+  /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */
+  simdjson_inline simdjson_result<element> parse(const padded_string &s) & noexcept;
+  simdjson_inline simdjson_result<element> parse(const padded_string &s) && =delete;
+
+  /** @private We do not want to allow implicit conversion from C string to std::string. */
+  simdjson_inline simdjson_result<element> parse(const char *buf) noexcept = delete;
+
+  /**
+   * Parse a JSON document into a provide document instance and return a temporary reference to it.
+   * It is similar to the function `parse` except that instead of parsing into the internal
+   * `document` instance associated with the parser, it allows the user to provide a document
+   * instance.
+   *
+   *   dom::parser parser;
+   *   dom::document doc;
+   *   element doc_root = parser.parse_into_document(doc, buf, len);
+   *
+   * The function eagerly parses the input: the input can be modified and discarded after
+   * the `parser.parse(buf, len)` call has completed.
+   *
+   * ### IMPORTANT: Document Lifetime
+   *
+   * After the call to parse_into_document, the parser is no longer needed.
+   *
+   * The JSON document lives in the document instance: you must keep the document
+   * instance alive while you navigate through it (i.e., used the returned value from
+   * parse_into_document). You are encourage to reuse the document instance
+   * many times with new data to avoid reallocations:
+   *
+   *   dom::document doc;
+   *   element doc_root1 = parser.parse_into_document(doc, buf1, len);
+   *   //... doc_root1 is a pointer inside doc
+   *   element doc_root2 = parser.parse_into_document(doc, buf1, len);
+   *   //... doc_root2 is a pointer inside doc
+   *   // at this point doc_root1 is no longer safe
+   *
+   * Moving the document instance is safe, but it invalidates the element instances. After
+   * moving a document, you can recover safe access to the document root with its `root()` method.
+   *
+   * @param doc The document instance where the parsed data will be stored (on success).
+   * @param buf The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, unless
+   *            realloc_if_needed is true.
+   * @param len The length of the JSON.
+   * @param realloc_if_needed Whether to reallocate and enlarge the JSON buffer to add padding.
+   * @return An element pointing at the root of document, or an error:
+   *         - MEMALLOC if realloc_if_needed is true or the parser does not have enough capacity,
+   *           and memory allocation fails.
+   *         - CAPACITY if the parser does not have enough capacity and len > max_capacity.
+   *         - other json errors if parsing fails. You should not rely on these errors to always the same for the
+   *           same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware).
+   */
+  inline simdjson_result<element> parse_into_document(document& doc, const uint8_t *buf, size_t len, bool realloc_if_needed = true) & noexcept;
+  inline simdjson_result<element> parse_into_document(document& doc, const uint8_t *buf, size_t len, bool realloc_if_needed = true) && =delete;
+  /** @overload parse_into_document(const uint8_t *buf, size_t len, bool realloc_if_needed) */
+  simdjson_inline simdjson_result<element> parse_into_document(document& doc, const char *buf, size_t len, bool realloc_if_needed = true) & noexcept;
+  simdjson_inline simdjson_result<element> parse_into_document(document& doc, const char *buf, size_t len, bool realloc_if_needed = true) && =delete;
+  /** @overload parse_into_document(const uint8_t *buf, size_t len, bool realloc_if_needed) */
+  simdjson_inline simdjson_result<element> parse_into_document(document& doc, const std::string &s) & noexcept;
+  simdjson_inline simdjson_result<element> parse_into_document(document& doc, const std::string &s) && =delete;
+  /** @overload parse_into_document(const uint8_t *buf, size_t len, bool realloc_if_needed) */
+  simdjson_inline simdjson_result<element> parse_into_document(document& doc, const padded_string &s) & noexcept;
+  simdjson_inline simdjson_result<element> parse_into_document(document& doc, const padded_string &s) && =delete;
+
+  /** @private We do not want to allow implicit conversion from C string to std::string. */
+  simdjson_inline simdjson_result<element> parse_into_document(document& doc, const char *buf) noexcept = delete;
+
+  /**
+   * Load a file containing many JSON documents.
+   *
+   *   dom::parser parser;
+   *   for (const element doc : parser.load_many(path)) {
+   *     cout << std::string(doc["title"]) << endl;
+   *   }
+   *
+   * The file is loaded in memory and can be safely deleted after the `parser.load_many(path)`
+   * function has returned. The memory is held by the `parser` instance.
+   *
+   * The function is lazy: it may be that no more than one JSON document at a time is parsed.
+   * And, possibly, no document many have been parsed when the `parser.load_many(path)` function
+   * returned.
+   *
+   * ### Format
+   *
+   * The file must contain a series of one or more JSON documents, concatenated into a single
+   * buffer, separated by whitespace. It effectively parses until it has a fully valid document,
+   * then starts parsing the next document at that point. (It does this with more parallelism and
+   * lookahead than you might think, though.)
+   *
+   * Documents that consist of an object or array may omit the whitespace between them, concatenating
+   * with no separator. documents that consist of a single primitive (i.e. documents that are not
+   * arrays or objects) MUST be separated with whitespace.
+   *
+   * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse.
+   * Setting batch_size to excessively large or excesively small values may impact negatively the
+   * performance.
+   *
+   * ### Error Handling
+   *
+   * All errors are returned during iteration: if there is a global error such as memory allocation,
+   * it will be yielded as the first result. Iteration always stops after the first error.
+   *
+   * As with all other simdjson methods, non-exception error handling is readily available through
+   * the same interface, requiring you to check the error before using the document:
+   *
+   *   dom::parser parser;
+   *   dom::document_stream docs;
+   *   auto error = parser.load_many(path).get(docs);
+   *   if (error) { cerr << error << endl; exit(1); }
+   *   for (auto doc : docs) {
+   *     std::string_view title;
+   *     if ((error = doc["title"].get(title)) { cerr << error << endl; exit(1); }
+   *     cout << title << endl;
+   *   }
+   *
+   * ### Threads
+   *
+   * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the
+   * hood to do some lookahead.
+   *
+   * ### Parser Capacity
+   *
+   * If the parser's current capacity is less than batch_size, it will allocate enough capacity
+   * to handle it (up to max_capacity).
+   *
+   * @param path File name pointing at the concatenated JSON to parse.
+   * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet
+   *                   spot is cache-related: small enough to fit in cache, yet big enough to
+   *                   parse as many documents as possible in one tight loop.
+   *                   Defaults to 1MB (as simdjson::dom::DEFAULT_BATCH_SIZE), which has been a reasonable sweet
+   *                   spot in our tests.
+   *                   If you set the batch_size to a value smaller than simdjson::dom::MINIMAL_BATCH_SIZE
+   *                   (currently 32B), it will be replaced by simdjson::dom::MINIMAL_BATCH_SIZE.
+   * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors:
+   *         - IO_ERROR if there was an error opening or reading the file.
+   *         - MEMALLOC if the parser does not have enough capacity and memory allocation fails.
+   *         - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity.
+   *         - other json errors if parsing fails. You should not rely on these errors to always the same for the
+   *           same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware).
+   */
+  inline simdjson_result<document_stream> load_many(const std::string &path, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept;
+
+  /**
+   * Parse a buffer containing many JSON documents.
+   *
+   *   dom::parser parser;
+   *   for (element doc : parser.parse_many(buf, len)) {
+   *     cout << std::string(doc["title"]) << endl;
+   *   }
+   *
+   * No copy of the input buffer is made.
+   *
+   * The function is lazy: it may be that no more than one JSON document at a time is parsed.
+   * And, possibly, no document many have been parsed when the `parser.load_many(path)` function
+   * returned.
+   *
+   * The caller is responsabile to ensure that the input string data remains unchanged and is
+   * not deleted during the loop. In particular, the following is unsafe and will not compile:
+   *
+   *   auto docs = parser.parse_many("[\"temporary data\"]"_padded);
+   *   // here the string "[\"temporary data\"]" may no longer exist in memory
+   *   // the parser instance may not have even accessed the input yet
+   *   for (element doc : docs) {
+   *     cout << std::string(doc["title"]) << endl;
+   *   }
+   *
+   * The following is safe:
+   *
+   *   auto json = "[\"temporary data\"]"_padded;
+   *   auto docs = parser.parse_many(json);
+   *   for (element doc : docs) {
+   *     cout << std::string(doc["title"]) << endl;
+   *   }
+   *
+   * ### Format
+   *
+   * The buffer must contain a series of one or more JSON documents, concatenated into a single
+   * buffer, separated by whitespace. It effectively parses until it has a fully valid document,
+   * then starts parsing the next document at that point. (It does this with more parallelism and
+   * lookahead than you might think, though.)
+   *
+   * documents that consist of an object or array may omit the whitespace between them, concatenating
+   * with no separator. documents that consist of a single primitive (i.e. documents that are not
+   * arrays or objects) MUST be separated with whitespace.
+   *
+   * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse.
+   * Setting batch_size to excessively large or excesively small values may impact negatively the
+   * performance.
+   *
+   * ### Error Handling
+   *
+   * All errors are returned during iteration: if there is a global error such as memory allocation,
+   * it will be yielded as the first result. Iteration always stops after the first error.
+   *
+   * As with all other simdjson methods, non-exception error handling is readily available through
+   * the same interface, requiring you to check the error before using the document:
+   *
+   *   dom::parser parser;
+   *   dom::document_stream docs;
+   *   auto error = parser.load_many(path).get(docs);
+   *   if (error) { cerr << error << endl; exit(1); }
+   *   for (auto doc : docs) {
+   *     std::string_view title;
+   *     if ((error = doc["title"].get(title)) { cerr << error << endl; exit(1); }
+   *     cout << title << endl;
+   *   }
+   *
+   * ### REQUIRED: Buffer Padding
+   *
+   * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
+   * those bytes are initialized to, as long as they are allocated.
+   *
+   * ### Threads
+   *
+   * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the
+   * hood to do some lookahead.
+   *
+   * ### Parser Capacity
+   *
+   * If the parser's current capacity is less than batch_size, it will allocate enough capacity
+   * to handle it (up to max_capacity).
+   *
+   * @param buf The concatenated JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes.
+   * @param len The length of the concatenated JSON.
+   * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet
+   *                   spot is cache-related: small enough to fit in cache, yet big enough to
+   *                   parse as many documents as possible in one tight loop.
+   *                   Defaults to 10MB, which has been a reasonable sweet spot in our tests.
+   * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors:
+   *         - MEMALLOC if the parser does not have enough capacity and memory allocation fails
+   *         - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity.
+   *         - other json errors if parsing fails. You should not rely on these errors to always the same for the
+   *           same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware).
+   */
+  inline simdjson_result<document_stream> parse_many(const uint8_t *buf, size_t len, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept;
+  /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */
+  inline simdjson_result<document_stream> parse_many(const char *buf, size_t len, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept;
+  /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */
+  inline simdjson_result<document_stream> parse_many(const std::string &s, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept;
+  inline simdjson_result<document_stream> parse_many(const std::string &&s, size_t batch_size) = delete;// unsafe
+  /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */
+  inline simdjson_result<document_stream> parse_many(const padded_string &s, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept;
+  inline simdjson_result<document_stream> parse_many(const padded_string &&s, size_t batch_size) = delete;// unsafe
+
+  /** @private We do not want to allow implicit conversion from C string to std::string. */
+  simdjson_result<document_stream> parse_many(const char *buf, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept = delete;
+
+  /**
+   * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length
+   * and `max_depth` depth.
+   *
+   * @param capacity The new capacity.
+   * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH.
+   * @return The error, if there is one.
+   */
+  simdjson_warn_unused inline error_code allocate(size_t capacity, size_t max_depth = DEFAULT_MAX_DEPTH) noexcept;
+
+#ifndef SIMDJSON_DISABLE_DEPRECATED_API
+  /**
+   * @private deprecated because it returns bool instead of error_code, which is our standard for
+   * failures. Use allocate() instead.
+   *
+   * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length
+   * and `max_depth` depth.
+   *
+   * @param capacity The new capacity.
+   * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH.
+   * @return true if successful, false if allocation failed.
+   */
+  [[deprecated("Use allocate() instead.")]]
+  simdjson_warn_unused inline bool allocate_capacity(size_t capacity, size_t max_depth = DEFAULT_MAX_DEPTH) noexcept;
+#endif // SIMDJSON_DISABLE_DEPRECATED_API
+  /**
+   * The largest document this parser can support without reallocating.
+   *
+   * @return Current capacity, in bytes.
+   */
+  simdjson_inline size_t capacity() const noexcept;
+
+  /**
+   * The largest document this parser can automatically support.
+   *
+   * The parser may reallocate internal buffers as needed up to this amount.
+   *
+   * @return Maximum capacity, in bytes.
+   */
+  simdjson_inline size_t max_capacity() const noexcept;
+
+  /**
+   * The maximum level of nested object and arrays supported by this parser.
+   *
+   * @return Maximum depth, in bytes.
+   */
+  simdjson_inline size_t max_depth() const noexcept;
+
+  /**
+   * Set max_capacity. This is the largest document this parser can automatically support.
+   *
+   * The parser may reallocate internal buffers as needed up to this amount as documents are passed
+   * to it.
+   *
+   * Note: To avoid limiting the memory to an absurd value, such as zero or two bytes,
+   * iff you try to set max_capacity to a value lower than MINIMAL_DOCUMENT_CAPACITY,
+   * then the maximal capacity is set to MINIMAL_DOCUMENT_CAPACITY.
+   *
+   * This call will not allocate or deallocate, even if capacity is currently above max_capacity.
+   *
+   * @param max_capacity The new maximum capacity, in bytes.
+   */
+  simdjson_inline void set_max_capacity(size_t max_capacity) noexcept;
+
+#ifdef SIMDJSON_THREADS_ENABLED
+  /**
+   * The parser instance can use threads when they are available to speed up some
+   * operations. It is enabled by default. Changing this attribute will change the
+   * behavior of the parser for future operations.
+   */
+  bool threaded{true};
+#endif
+  /** @private Use the new DOM API instead */
+  class Iterator;
+  /** @private Use simdjson_error instead */
+  using InvalidJSON [[deprecated("Use simdjson_error instead")]] = simdjson_error;
+
+  /** @private [for benchmarking access] The implementation to use */
+  std::unique_ptr<internal::dom_parser_implementation> implementation{};
+
+  /** @private Use `if (parser.parse(...).error())` instead */
+  bool valid{false};
+  /** @private Use `parser.parse(...).error()` instead */
+  error_code error{UNINITIALIZED};
+
+  /** @private Use `parser.parse(...).value()` instead */
+  document doc{};
+
+  /** @private returns true if the document parsed was valid */
+  [[deprecated("Use the result of parser.parse() instead")]]
+  inline bool is_valid() const noexcept;
+
+  /**
+   * @private return an error code corresponding to the last parsing attempt, see
+   * simdjson.h will return UNINITIALIZED if no parsing was attempted
+   */
+  [[deprecated("Use the result of parser.parse() instead")]]
+  inline int get_error_code() const noexcept;
+
+  /** @private return the string equivalent of "get_error_code" */
+  [[deprecated("Use error_message() on the result of parser.parse() instead, or cout << error")]]
+  inline std::string get_error_message() const noexcept;
+
+  /** @private */
+  [[deprecated("Use cout << on the result of parser.parse() instead")]]
+  inline bool print_json(std::ostream &os) const noexcept;
+
+  /** @private Private and deprecated: use `parser.parse(...).doc.dump_raw_tape()` instead */
+  inline bool dump_raw_tape(std::ostream &os) const noexcept;
+
+
+private:
+  /**
+   * The maximum document length this parser will automatically support.
+   *
+   * The parser will not be automatically allocated above this amount.
+   */
+  size_t _max_capacity;
+
+  /**
+   * The loaded buffer (reused each time load() is called)
+   */
+  std::unique_ptr<char[]> loaded_bytes;
+
+  /** Capacity of loaded_bytes buffer. */
+  size_t _loaded_bytes_capacity{0};
+
+  // all nodes are stored on the doc.tape using a 64-bit word.
+  //
+  // strings, double and ints are stored as
+  //  a 64-bit word with a pointer to the actual value
+  //
+  //
+  //
+  // for objects or arrays, store [ or {  at the beginning and } and ] at the
+  // end. For the openings ([ or {), we annotate them with a reference to the
+  // location on the doc.tape of the end, and for then closings (} and ]), we
+  // annotate them with a reference to the location of the opening
+  //
+  //
+
+  /**
+   * Ensure we have enough capacity to handle at least desired_capacity bytes,
+   * and auto-allocate if not. This also allocates memory if needed in the
+   * internal document.
+   */
+  inline error_code ensure_capacity(size_t desired_capacity) noexcept;
+  /**
+   * Ensure we have enough capacity to handle at least desired_capacity bytes,
+   * and auto-allocate if not. This also allocates memory if needed in the
+   * provided document.
+   */
+  inline error_code ensure_capacity(document& doc, size_t desired_capacity) noexcept;
+
+  /** Read the file into loaded_bytes */
+  inline simdjson_result<size_t> read_file(const std::string &path) noexcept;
+
+  friend class parser::Iterator;
+  friend class document_stream;
+
+
+}; // class parser
+
+} // namespace dom
+} // namespace simdjson
+
+#endif // SIMDJSON_DOM_PARSER_H
+/* end file include/simdjson/dom/parser.h */
+#ifdef SIMDJSON_THREADS_ENABLED
+#include <thread>
+#include <mutex>
+#include <condition_variable>
+#endif
+
+namespace simdjson {
+namespace dom {
+
+
+#ifdef SIMDJSON_THREADS_ENABLED
+/** @private Custom worker class **/
+struct stage1_worker {
+  stage1_worker() noexcept = default;
+  stage1_worker(const stage1_worker&) = delete;
+  stage1_worker(stage1_worker&&) = delete;
+  stage1_worker operator=(const stage1_worker&) = delete;
+  ~stage1_worker();
+  /**
+   * We only start the thread when it is needed, not at object construction, this may throw.
+   * You should only call this once.
+   **/
+  void start_thread();
+  /**
+   * Start a stage 1 job. You should first call 'run', then 'finish'.
+   * You must call start_thread once before.
+   */
+  void run(document_stream * ds, dom::parser * stage1, size_t next_batch_start);
+  /** Wait for the run to finish (blocking). You should first call 'run', then 'finish'. **/
+  void finish();
+
+private:
+
+  /**
+   * Normally, we would never stop the thread. But we do in the destructor.
+   * This function is only safe assuming that you are not waiting for results. You
+   * should have called run, then finish, and be done.
+   **/
+  void stop_thread();
+
+  std::thread thread{};
+  /** These three variables define the work done by the thread. **/
+  dom::parser * stage1_thread_parser{};
+  size_t _next_batch_start{};
+  document_stream * owner{};
+  /**
+   * We have two state variables. This could be streamlined to one variable in the future but
+   * we use two for clarity.
+   */
+  bool has_work{false};
+  bool can_work{true};
+
+  /**
+   * We lock using a mutex.
+   */
+  std::mutex locking_mutex{};
+  std::condition_variable cond_var{};
+};
+#endif
+
+/**
+ * A forward-only stream of documents.
+ *
+ * Produced by parser::parse_many.
+ *
+ */
+class document_stream {
+public:
+  /**
+   * Construct an uninitialized document_stream.
+   *
+   *  ```c++
+   *  document_stream docs;
+   *  error = parser.parse_many(json).get(docs);
+   *  ```
+   */
+  simdjson_inline document_stream() noexcept;
+  /** Move one document_stream to another. */
+  simdjson_inline document_stream(document_stream &&other) noexcept = default;
+  /** Move one document_stream to another. */
+  simdjson_inline document_stream &operator=(document_stream &&other) noexcept = default;
+
+  simdjson_inline ~document_stream() noexcept;
+  /**
+   * Returns the input size in bytes.
+   */
+  inline size_t size_in_bytes() const noexcept;
+  /**
+   * After iterating through the stream, this method
+   * returns the number of bytes that were not parsed at the end
+   * of the stream. If truncated_bytes() differs from zero,
+   * then the input was truncated maybe because incomplete JSON
+   * documents were found at the end of the stream. You
+   * may need to process the bytes in the interval [size_in_bytes()-truncated_bytes(), size_in_bytes()).
+   *
+   * You should only call truncated_bytes() after streaming through all
+   * documents, like so:
+   *
+   *   document_stream stream = parser.parse_many(json,window);
+   *   for(auto doc : stream) {
+   *      // do something with doc
+   *   }
+   *   size_t truncated = stream.truncated_bytes();
+   *
+   */
+  inline size_t truncated_bytes() const noexcept;
+  /**
+   * An iterator through a forward-only stream of documents.
+   */
+  class iterator {
+  public:
+    using value_type = simdjson_result<element>;
+    using reference  = value_type;
+
+    using difference_type   = std::ptrdiff_t;
+
+    using iterator_category = std::input_iterator_tag;
+
+    /**
+     * Default constructor.
+     */
+    simdjson_inline iterator() noexcept;
+    /**
+     * Get the current document (or error).
+     */
+    simdjson_inline reference operator*() noexcept;
+    /**
+     * Advance to the next document (prefix).
+     */
+    inline iterator& operator++() noexcept;
+    /**
+     * Check if we're at the end yet.
+     * @param other the end iterator to compare to.
+     */
+    simdjson_inline bool operator!=(const iterator &other) const noexcept;
+    /**
+     * @private
+     *
+     * Gives the current index in the input document in bytes.
+     *
+     *   document_stream stream = parser.parse_many(json,window);
+     *   for(auto i = stream.begin(); i != stream.end(); ++i) {
+     *      auto doc = *i;
+     *      size_t index = i.current_index();
+     *   }
+     *
+     * This function (current_index()) is experimental and the usage
+     * may change in future versions of simdjson: we find the API somewhat
+     * awkward and we would like to offer something friendlier.
+     */
+     simdjson_inline size_t current_index() const noexcept;
+    /**
+     * @private
+     *
+     * Gives a view of the current document.
+     *
+     *   document_stream stream = parser.parse_many(json,window);
+     *   for(auto i = stream.begin(); i != stream.end(); ++i) {
+     *      auto doc = *i;
+     *      std::string_view v = i->source();
+     *   }
+     *
+     * The returned string_view instance is simply a map to the (unparsed)
+     * source string: it may thus include white-space characters and all manner
+     * of padding.
+     *
+     * This function (source()) is experimental and the usage
+     * may change in future versions of simdjson: we find the API somewhat
+     * awkward and we would like to offer something friendlier.
+     */
+     simdjson_inline std::string_view source() const noexcept;
+
+  private:
+    simdjson_inline iterator(document_stream *s, bool finished) noexcept;
+    /** The document_stream we're iterating through. */
+    document_stream* stream;
+    /** Whether we're finished or not. */
+    bool finished;
+    friend class document_stream;
+  };
+
+  /**
+   * Start iterating the documents in the stream.
+   */
+  simdjson_inline iterator begin() noexcept;
+  /**
+   * The end of the stream, for iterator comparison purposes.
+   */
+  simdjson_inline iterator end() noexcept;
+
+private:
+
+  document_stream &operator=(const document_stream &) = delete; // Disallow copying
+  document_stream(const document_stream &other) = delete; // Disallow copying
+
+  /**
+   * Construct a document_stream. Does not allocate or parse anything until the iterator is
+   * used.
+   *
+   * @param parser is a reference to the parser instance used to generate this document_stream
+   * @param buf is the raw byte buffer we need to process
+   * @param len is the length of the raw byte buffer in bytes
+   * @param batch_size is the size of the windows (must be strictly greater or equal to the largest JSON document)
+   */
+  simdjson_inline document_stream(
+    dom::parser &parser,
+    const uint8_t *buf,
+    size_t len,
+    size_t batch_size
+  ) noexcept;
+
+  /**
+   * Parse the first document in the buffer. Used by begin(), to handle allocation and
+   * initialization.
+   */
+  inline void start() noexcept;
+
+  /**
+   * Parse the next document found in the buffer previously given to document_stream.
+   *
+   * The content should be a valid JSON document encoded as UTF-8. If there is a
+   * UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are
+   * discouraged.
+   *
+   * You do NOT need to pre-allocate a parser.  This function takes care of
+   * pre-allocating a capacity defined by the batch_size defined when creating the
+   * document_stream object.
+   *
+   * The function returns simdjson::EMPTY if there is no more data to be parsed.
+   *
+   * The function returns simdjson::SUCCESS (as integer = 0) in case of success
+   * and indicates that the buffer has successfully been parsed to the end.
+   * Every document it contained has been parsed without error.
+   *
+   * The function returns an error code from simdjson/simdjson.h in case of failure
+   * such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth;
+   * the simdjson::error_message function converts these error codes into a string).
+   *
+   * You can also check validity by calling parser.is_valid(). The same parser can
+   * and should be reused for the other documents in the buffer.
+   */
+  inline void next() noexcept;
+
+  /**
+   * Pass the next batch through stage 1 and return when finished.
+   * When threads are enabled, this may wait for the stage 1 thread to finish.
+   */
+  inline void load_batch() noexcept;
+
+  /** Get the next document index. */
+  inline size_t next_batch_start() const noexcept;
+
+  /** Pass the next batch through stage 1 with the given parser. */
+  inline error_code run_stage1(dom::parser &p, size_t batch_start) noexcept;
+
+  dom::parser *parser;
+  const uint8_t *buf;
+  size_t len;
+  size_t batch_size;
+  /** The error (or lack thereof) from the current document. */
+  error_code error;
+  size_t batch_start{0};
+  size_t doc_index{};
+#ifdef SIMDJSON_THREADS_ENABLED
+  /** Indicates whether we use threads. Note that this needs to be a constant during the execution of the parsing. */
+  bool use_thread;
+
+  inline void load_from_stage1_thread() noexcept;
+
+  /** Start a thread to run stage 1 on the next batch. */
+  inline void start_stage1_thread() noexcept;
+
+  /** Wait for the stage 1 thread to finish and capture the results. */
+  inline void finish_stage1_thread() noexcept;
+
+  /** The error returned from the stage 1 thread. */
+  error_code stage1_thread_error{UNINITIALIZED};
+  /** The thread used to run stage 1 against the next batch in the background. */
+  friend struct stage1_worker;
+  std::unique_ptr<stage1_worker> worker{new(std::nothrow) stage1_worker()};
+  /**
+   * The parser used to run stage 1 in the background. Will be swapped
+   * with the regular parser when finished.
+   */
+  dom::parser stage1_thread_parser{};
+#endif // SIMDJSON_THREADS_ENABLED
+
+  friend class dom::parser;
+  friend struct simdjson_result<dom::document_stream>;
+  friend struct internal::simdjson_result_base<dom::document_stream>;
+
+}; // class document_stream
+
+} // namespace dom
+
+template<>
+struct simdjson_result<dom::document_stream> : public internal::simdjson_result_base<dom::document_stream> {
+public:
+  simdjson_inline simdjson_result() noexcept; ///< @private
+  simdjson_inline simdjson_result(error_code error) noexcept; ///< @private
+  simdjson_inline simdjson_result(dom::document_stream &&value) noexcept; ///< @private
+
+#if SIMDJSON_EXCEPTIONS
+  simdjson_inline dom::document_stream::iterator begin() noexcept(false);
+  simdjson_inline dom::document_stream::iterator end() noexcept(false);
+#else // SIMDJSON_EXCEPTIONS
+#ifndef SIMDJSON_DISABLE_DEPRECATED_API
+  [[deprecated("parse_many() and load_many() may return errors. Use document_stream stream; error = parser.parse_many().get(doc); instead.")]]
+  simdjson_inline dom::document_stream::iterator begin() noexcept;
+  [[deprecated("parse_many() and load_many() may return errors. Use document_stream stream; error = parser.parse_many().get(doc); instead.")]]
+  simdjson_inline dom::document_stream::iterator end() noexcept;
+#endif // SIMDJSON_DISABLE_DEPRECATED_API
+#endif // SIMDJSON_EXCEPTIONS
+}; // struct simdjson_result<dom::document_stream>
+
+} // namespace simdjson
+
+#endif // SIMDJSON_DOCUMENT_STREAM_H
+/* end file include/simdjson/dom/document_stream.h */
+/* begin file include/simdjson/dom/element.h */
+#ifndef SIMDJSON_DOM_ELEMENT_H
+#define SIMDJSON_DOM_ELEMENT_H
+
+#include <ostream>
+
+namespace simdjson {
+namespace internal {
+template<typename T>
+class string_builder;
+}
+namespace dom {
+class array;
+class document;
+class object;
+
+/**
+ * The actual concrete type of a JSON element
+ * This is the type it is most easily cast to with get<>.
+ */
+enum class element_type {
+  ARRAY = '[',     ///< dom::array
+  OBJECT = '{',    ///< dom::object
+  INT64 = 'l',     ///< int64_t
+  UINT64 = 'u',    ///< uint64_t: any integer that fits in uint64_t but *not* int64_t
+  DOUBLE = 'd',    ///< double: Any number with a "." or "e" that fits in double.
+  STRING = '"',    ///< std::string_view
+  BOOL = 't',      ///< bool
+  NULL_VALUE = 'n' ///< null
+};
+
+/**
+ * A JSON element.
+ *
+ * References an element in a JSON document, representing a JSON null, boolean, string, number,
+ * array or object.
+ */
+class element {
+public:
+  /** Create a new, invalid element. */
+  simdjson_inline element() noexcept;
+
+  /** The type of this element. */
+  simdjson_inline element_type type() const noexcept;
+
+  /**
+   * Cast this element to an array.
+   *
+   * @returns An object that can be used to iterate the array, or:
+   *          INCORRECT_TYPE if the JSON element is not an array.
+   */
+  inline simdjson_result<array> get_array() const noexcept;
+  /**
+   * Cast this element to an object.
+   *
+   * @returns An object that can be used to look up or iterate the object's fields, or:
+   *          INCORRECT_TYPE if the JSON element is not an object.
+   */
+  inline simdjson_result<object> get_object() const noexcept;
+  /**
+   * Cast this element to a null-terminated C string.
+   *
+   * The string is guaranteed to be valid UTF-8.
+   *
+   * The length of the string is given by get_string_length(). Because JSON strings
+   * may contain null characters, it may be incorrect to use strlen to determine the
+   * string length.
+   *
+   * It is possible to get a single string_view instance which represents both the string
+   * content and its length: see get_string().
+   *
+   * @returns A pointer to a null-terminated UTF-8 string. This string is stored in the parser and will
+   *          be invalidated the next time it parses a document or when it is destroyed.
+   *          Returns INCORRECT_TYPE if the JSON element is not a string.
+   */
+  inline simdjson_result<const char *> get_c_str() const noexcept;
+  /**
+   * Gives the length in bytes of the string.
+   *
+   * It is possible to get a single string_view instance which represents both the string
+   * content and its length: see get_string().
+   *
+   * @returns A string length in bytes.
+   *          Returns INCORRECT_TYPE if the JSON element is not a string.
+   */
+  inline simdjson_result<size_t> get_string_length() const noexcept;
+  /**
+   * Cast this element to a string.
+   *
+   * The string is guaranteed to be valid UTF-8.
+   *
+   * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next time it
+   *          parses a document or when it is destroyed.
+   *          Returns INCORRECT_TYPE if the JSON element is not a string.
+   */
+  inline simdjson_result<std::string_view> get_string() const noexcept;
+  /**
+   * Cast this element to a signed integer.
+   *
+   * @returns A signed 64-bit integer.
+   *          Returns INCORRECT_TYPE if the JSON element is not an integer, or NUMBER_OUT_OF_RANGE
+   *          if it is negative.
+   */
+  inline simdjson_result<int64_t> get_int64() const noexcept;
+  /**
+   * Cast this element to an unsigned integer.
+   *
+   * @returns An unsigned 64-bit integer.
+   *          Returns INCORRECT_TYPE if the JSON element is not an integer, or NUMBER_OUT_OF_RANGE
+   *          if it is too large.
+   */
+  inline simdjson_result<uint64_t> get_uint64() const noexcept;
+  /**
+   * Cast this element to a double floating-point.
+   *
+   * @returns A double value.
+   *          Returns INCORRECT_TYPE if the JSON element is not a number.
+   */
+  inline simdjson_result<double> get_double() const noexcept;
+  /**
+   * Cast this element to a bool.
+   *
+   * @returns A bool value.
+   *          Returns INCORRECT_TYPE if the JSON element is not a boolean.
+   */
+  inline simdjson_result<bool> get_bool() const noexcept;
+
+  /**
+   * Whether this element is a json array.
+   *
+   * Equivalent to is<array>().
+   */
+  inline bool is_array() const noexcept;
+  /**
+   * Whether this element is a json object.
+   *
+   * Equivalent to is<object>().
+   */
+  inline bool is_object() const noexcept;
+  /**
+   * Whether this element is a json string.
+   *
+   * Equivalent to is<std::string_view>() or is<const char *>().
+   */
+  inline bool is_string() const noexcept;
+  /**
+   * Whether this element is a json number that fits in a signed 64-bit integer.
+   *
+   * Equivalent to is<int64_t>().
+   */
+  inline bool is_int64() const noexcept;
+  /**
+   * Whether this element is a json number that fits in an unsigned 64-bit integer.
+   *
+   * Equivalent to is<uint64_t>().
+   */
+  inline bool is_uint64() const noexcept;
+  /**
+   * Whether this element is a json number that fits in a double.
+   *
+   * Equivalent to is<double>().
+   */
+  inline bool is_double() const noexcept;
+
+  /**
+   * Whether this element is a json number.
+   *
+   * Both integers and floating points will return true.
+   */
+  inline bool is_number() const noexcept;
+
+  /**
+   * Whether this element is a json `true` or `false`.
+   *
+   * Equivalent to is<bool>().
+   */
+  inline bool is_bool() const noexcept;
+  /**
+   * Whether this element is a json `null`.
+   */
+  inline bool is_null() const noexcept;
+
+  /**
+   * Tell whether the value can be cast to provided type (T).
+   *
+   * Supported types:
+   * - Boolean: bool
+   * - Number: double, uint64_t, int64_t
+   * - String: std::string_view, const char *
+   * - Array: dom::array
+   * - Object: dom::object
+   *
+   * @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object
+   */
+  template<typename T>
+  simdjson_inline bool is() const noexcept;
+
+  /**
+   * Get the value as the provided type (T).
+   *
+   * Supported types:
+   * - Boolean: bool
+   * - Number: double, uint64_t, int64_t
+   * - String: std::string_view, const char *
+   * - Array: dom::array
+   * - Object: dom::object
+   *
+   * You may use get_double(), get_bool(), get_uint64(), get_int64(),
+   * get_object(), get_array() or get_string() instead.
+   *
+   * @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object
+   *
+   * @returns The value cast to the given type, or:
+   *          INCORRECT_TYPE if the value cannot be cast to the given type.
+   */
+
+  template<typename T>
+  inline simdjson_result<T> get() const noexcept {
+    // Unless the simdjson library provides an inline implementation, calling this method should
+    // immediately fail.
+    static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library.");
+  }
+
+  /**
+   * Get the value as the provided type (T).
+   *
+   * Supported types:
+   * - Boolean: bool
+   * - Number: double, uint64_t, int64_t
+   * - String: std::string_view, const char *
+   * - Array: dom::array
+   * - Object: dom::object
+   *
+   * @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object
+   *
+   * @param value The variable to set to the value. May not be set if there is an error.
+   *
+   * @returns The error that occurred, or SUCCESS if there was no error.
+   */
+  template<typename T>
+  simdjson_warn_unused simdjson_inline error_code get(T &value) const noexcept;
+
+  /**
+   * Get the value as the provided type (T), setting error if it's not the given type.
+   *
+   * Supported types:
+   * - Boolean: bool
+   * - Number: double, uint64_t, int64_t
+   * - String: std::string_view, const char *
+   * - Array: dom::array
+   * - Object: dom::object
+   *
+   * @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object
+   *
+   * @param value The variable to set to the given type. value is undefined if there is an error.
+   * @param error The variable to store the error. error is set to error_code::SUCCEED if there is an error.
+   */
+  template<typename T>
+  inline void tie(T &value, error_code &error) && noexcept;
+
+#if SIMDJSON_EXCEPTIONS
+  /**
+   * Read this element as a boolean.
+   *
+   * @return The boolean value
+   * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a boolean.
+   */
+  inline operator bool() const noexcept(false);
+
+  /**
+   * Read this element as a null-terminated UTF-8 string.
+   *
+   * Be mindful that JSON allows strings to contain null characters.
+   *
+   * Does *not* convert other types to a string; requires that the JSON type of the element was
+   * an actual string.
+   *
+   * @return The string value.
+   * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a string.
+   */
+  inline explicit operator const char*() const noexcept(false);
+
+  /**
+   * Read this element as a null-terminated UTF-8 string.
+   *
+   * Does *not* convert other types to a string; requires that the JSON type of the element was
+   * an actual string.
+   *
+   * @return The string value.
+   * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a string.
+   */
+  inline operator std::string_view() const noexcept(false);
+
+  /**
+   * Read this element as an unsigned integer.
+   *
+   * @return The integer value.
+   * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an integer
+   * @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits or is negative
+   */
+  inline operator uint64_t() const noexcept(false);
+  /**
+   * Read this element as an signed integer.
+   *
+   * @return The integer value.
+   * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an integer
+   * @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits
+   */
+  inline operator int64_t() const noexcept(false);
+  /**
+   * Read this element as an double.
+   *
+   * @return The double value.
+   * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a number
+   * @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits or is negative
+   */
+  inline operator double() const noexcept(false);
+  /**
+   * Read this element as a JSON array.
+   *
+   * @return The JSON array.
+   * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an array
+   */
+  inline operator array() const noexcept(false);
+  /**
+   * Read this element as a JSON object (key/value pairs).
+   *
+   * @return The JSON object.
+   * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an object
+   */
+  inline operator object() const noexcept(false);
+
+  /**
+   * Iterate over each element in this array.
+   *
+   * @return The beginning of the iteration.
+   * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an array
+   */
+  inline dom::array::iterator begin() const noexcept(false);
+
+  /**
+   * Iterate over each element in this array.
+   *
+   * @return The end of the iteration.
+   * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an array
+   */
+  inline dom::array::iterator end() const noexcept(false);
+#endif // SIMDJSON_EXCEPTIONS
+
+  /**
+   * Get the value associated with the given key.
+   *
+   * The key will be matched against **unescaped** JSON:
+   *
+   *   dom::parser parser;
+   *   int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1
+   *   parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD
+   *
+   * @return The value associated with this field, or:
+   *         - NO_SUCH_FIELD if the field does not exist in the object
+   *         - INCORRECT_TYPE if this is not an object
+   */
+  inline simdjson_result<element> operator[](std::string_view key) const noexcept;
+
+  /**
+   * Get the value associated with the given key.
+   *
+   * The key will be matched against **unescaped** JSON:
+   *
+   *   dom::parser parser;
+   *   int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1
+   *   parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD
+   *
+   * @return The value associated with this field, or:
+   *         - NO_SUCH_FIELD if the field does not exist in the object
+   *         - INCORRECT_TYPE if this is not an object
+   */
+  inline simdjson_result<element> operator[](const char *key) const noexcept;
+
+  /**
+   * Get the value associated with the given JSON pointer.  We use the RFC 6901
+   * https://tools.ietf.org/html/rfc6901 standard.
+   *
+   *   dom::parser parser;
+   *   element doc = parser.parse(R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded);
+   *   doc.at_pointer("/foo/a/1") == 20
+   *   doc.at_pointer("/foo")["a"].at(1) == 20
+   *   doc.at_pointer("")["foo"]["a"].at(1) == 20
+   *
+   * It is allowed for a key to be the empty string:
+   *
+   *   dom::parser parser;
+   *   object obj = parser.parse(R"({ "": { "a": [ 10, 20, 30 ] }})"_padded);
+   *   obj.at_pointer("//a/1") == 20
+   *
+   * @return The value associated with the given JSON pointer, or:
+   *         - NO_SUCH_FIELD if a field does not exist in an object
+   *         - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length
+   *         - INCORRECT_TYPE if a non-integer is used to access an array
+   *         - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed
+   */
+  inline simdjson_result<element> at_pointer(const std::string_view json_pointer) const noexcept;
+
+#ifndef SIMDJSON_DISABLE_DEPRECATED_API
+  /**
+   *
+   * Version 0.4 of simdjson used an incorrect interpretation of the JSON Pointer standard
+   * and allowed the following :
+   *
+   *   dom::parser parser;
+   *   element doc = parser.parse(R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded);
+   *   doc.at("foo/a/1") == 20
+   *
+   * Though it is intuitive, it is not compliant with RFC 6901
+   * https://tools.ietf.org/html/rfc6901
+   *
+   * For standard compliance, use the at_pointer function instead.
+   *
+   * @return The value associated with the given JSON pointer, or:
+   *         - NO_SUCH_FIELD if a field does not exist in an object
+   *         - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length
+   *         - INCORRECT_TYPE if a non-integer is used to access an array
+   *         - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed
+   */
+  [[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]]
+  inline simdjson_result<element> at(const std::string_view json_pointer) const noexcept;
+#endif // SIMDJSON_DISABLE_DEPRECATED_API
+
+  /**
+   * Get the value at the given index.
+   *
+   * @return The value at the given index, or:
+   *         - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length
+   */
+  inline simdjson_result<element> at(size_t index) const noexcept;
+
+  /**
+   * Get the value associated with the given key.
+   *
+   * The key will be matched against **unescaped** JSON:
+   *
+   *   dom::parser parser;
+   *   int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1
+   *   parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD
+   *
+   * @return The value associated with this field, or:
+   *         - NO_SUCH_FIELD if the field does not exist in the object
+   */
+  inline simdjson_result<element> at_key(std::string_view key) const noexcept;
+
+  /**
+   * Get the value associated with the given key in a case-insensitive manner.
+   *
+   * Note: The key will be matched against **unescaped** JSON.
+   *
+   * @return The value associated with this field, or:
+   *         - NO_SUCH_FIELD if the field does not exist in the object
+   */
+  inline simdjson_result<element> at_key_case_insensitive(std::string_view key) const noexcept;
+
+  /** @private for debugging. Prints out the root element. */
+  inline bool dump_raw_tape(std::ostream &out) const noexcept;
+
+private:
+  simdjson_inline element(const internal::tape_ref &tape) noexcept;
+  internal::tape_ref tape;
+  friend class document;
+  friend class object;
+  friend class array;
+  friend struct simdjson_result<element>;
+  template<typename T>
+  friend class simdjson::internal::string_builder;
+
+};
+
+} // namespace dom
+
+/** The result of a JSON navigation that may fail. */
+template<>
+struct simdjson_result<dom::element> : public internal::simdjson_result_base<dom::element> {
+public:
+  simdjson_inline simdjson_result() noexcept; ///< @private
+  simdjson_inline simdjson_result(dom::element &&value) noexcept; ///< @private
+  simdjson_inline simdjson_result(error_code error) noexcept; ///< @private
+
+  simdjson_inline simdjson_result<dom::element_type> type() const noexcept;
+  template<typename T>
+  simdjson_inline bool is() const noexcept;
+  template<typename T>
+  simdjson_inline simdjson_result<T> get() const noexcept;
+  template<typename T>
+  simdjson_warn_unused simdjson_inline error_code get(T &value) const noexcept;
+
+  simdjson_inline simdjson_result<dom::array> get_array() const noexcept;
+  simdjson_inline simdjson_result<dom::object> get_object() const noexcept;
+  simdjson_inline simdjson_result<const char *> get_c_str() const noexcept;
+  simdjson_inline simdjson_result<size_t> get_string_length() const noexcept;
+  simdjson_inline simdjson_result<std::string_view> get_string() const noexcept;
+  simdjson_inline simdjson_result<int64_t> get_int64() const noexcept;
+  simdjson_inline simdjson_result<uint64_t> get_uint64() const noexcept;
+  simdjson_inline simdjson_result<double> get_double() const noexcept;
+  simdjson_inline simdjson_result<bool> get_bool() const noexcept;
+
+  simdjson_inline bool is_array() const noexcept;
+  simdjson_inline bool is_object() const noexcept;
+  simdjson_inline bool is_string() const noexcept;
+  simdjson_inline bool is_int64() const noexcept;
+  simdjson_inline bool is_uint64() const noexcept;
+  simdjson_inline bool is_double() const noexcept;
+  simdjson_inline bool is_number() const noexcept;
+  simdjson_inline bool is_bool() const noexcept;
+  simdjson_inline bool is_null() const noexcept;
+
+  simdjson_inline simdjson_result<dom::element> operator[](std::string_view key) const noexcept;
+  simdjson_inline simdjson_result<dom::element> operator[](const char *key) const noexcept;
+  simdjson_inline simdjson_result<dom::element> at_pointer(const std::string_view json_pointer) const noexcept;
+  [[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]]
+  simdjson_inline simdjson_result<dom::element> at(const std::string_view json_pointer) const noexcept;
+  simdjson_inline simdjson_result<dom::element> at(size_t index) const noexcept;
+  simdjson_inline simdjson_result<dom::element> at_key(std::string_view key) const noexcept;
+  simdjson_inline simdjson_result<dom::element> at_key_case_insensitive(std::string_view key) const noexcept;
+
+#if SIMDJSON_EXCEPTIONS
+  simdjson_inline operator bool() const noexcept(false);
+  simdjson_inline explicit operator const char*() const noexcept(false);
+  simdjson_inline operator std::string_view() const noexcept(false);
+  simdjson_inline operator uint64_t() const noexcept(false);
+  simdjson_inline operator int64_t() const noexcept(false);
+  simdjson_inline operator double() const noexcept(false);
+  simdjson_inline operator dom::array() const noexcept(false);
+  simdjson_inline operator dom::object() const noexcept(false);
+
+  simdjson_inline dom::array::iterator begin() const noexcept(false);
+  simdjson_inline dom::array::iterator end() const noexcept(false);
+#endif // SIMDJSON_EXCEPTIONS
+};
+
+
+} // namespace simdjson
+
+#endif // SIMDJSON_DOM_DOCUMENT_H
+/* end file include/simdjson/dom/element.h */
+/* begin file include/simdjson/dom/object.h */
+#ifndef SIMDJSON_DOM_OBJECT_H
+#define SIMDJSON_DOM_OBJECT_H
+
+
+namespace simdjson {
+namespace internal {
+template<typename T>
+class string_builder;
+}
+namespace dom {
+
+class document;
+class element;
+class key_value_pair;
+
+/**
+ * JSON object.
+ */
+class object {
+public:
+  /** Create a new, invalid object */
+  simdjson_inline object() noexcept;
+
+  class iterator {
+  public:
+    using value_type = key_value_pair;
+    using difference_type = std::ptrdiff_t;
+
+    /**
+     * Get the actual key/value pair
+     */
+    inline const value_type operator*() const noexcept;
+    /**
+     * Get the next key/value pair.
+     *
+     * Part of the std::iterator interface.
+     *
+     */
+    inline iterator& operator++() noexcept;
+    /**
+     * Get the next key/value pair.
+     *
+     * Part of the std::iterator interface.
+     *
+     */
+    inline iterator operator++(int) noexcept;
+    /**
+     * Check if these values come from the same place in the JSON.
+     *
+     * Part of the std::iterator interface.
+     */
+    inline bool operator!=(const iterator& other) const noexcept;
+    inline bool operator==(const iterator& other) const noexcept;
+
+    inline bool operator<(const iterator& other) const noexcept;
+    inline bool operator<=(const iterator& other) const noexcept;
+    inline bool operator>=(const iterator& other) const noexcept;
+    inline bool operator>(const iterator& other) const noexcept;
+    /**
+     * Get the key of this key/value pair.
+     */
+    inline std::string_view key() const noexcept;
+    /**
+     * Get the length (in bytes) of the key in this key/value pair.
+     * You should expect this function to be faster than key().size().
+     */
+    inline uint32_t key_length() const noexcept;
+    /**
+     * Returns true if the key in this key/value pair is equal
+     * to the provided string_view.
+     */
+    inline bool key_equals(std::string_view o) const noexcept;
+    /**
+     * Returns true if the key in this key/value pair is equal
+     * to the provided string_view in a case-insensitive manner.
+     * Case comparisons may only be handled correctly for ASCII strings.
+     */
+    inline bool key_equals_case_insensitive(std::string_view o) const noexcept;
+    /**
+     * Get the key of this key/value pair.
+     */
+    inline const char *key_c_str() const noexcept;
+    /**
+     * Get the value of this key/value pair.
+     */
+    inline element value() const noexcept;
+
+    iterator() noexcept = default;
+    iterator(const iterator&) noexcept = default;
+    iterator& operator=(const iterator&) noexcept = default;
+  private:
+    simdjson_inline iterator(const internal::tape_ref &tape) noexcept;
+
+    internal::tape_ref tape;
+
+    friend class object;
+  };
+
+  /**
+   * Return the first key/value pair.
+   *
+   * Part of the std::iterable interface.
+   */
+  inline iterator begin() const noexcept;
+  /**
+   * One past the last key/value pair.
+   *
+   * Part of the std::iterable interface.
+   */
+  inline iterator end() const noexcept;
+  /**
+   * Get the size of the object (number of keys).
+   * It is a saturated value with a maximum of 0xFFFFFF: if the value
+   * is 0xFFFFFF then the size is 0xFFFFFF or greater.
+   */
+  inline size_t size() const noexcept;
+  /**
+   * Get the value associated with the given key.
+   *
+   * The key will be matched against **unescaped** JSON:
+   *
+   *   dom::parser parser;
+   *   int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1
+   *   parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD
+   *
+   * This function has linear-time complexity: the keys are checked one by one.
+   *
+   * @return The value associated with this field, or:
+   *         - NO_SUCH_FIELD if the field does not exist in the object
+   *         - INCORRECT_TYPE if this is not an object
+   */
+  inline simdjson_result<element> operator[](std::string_view key) const noexcept;
+
+  /**
+   * Get the value associated with the given key.
+   *
+   * The key will be matched against **unescaped** JSON:
+   *
+   *   dom::parser parser;
+   *   int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1
+   *   parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD
+   *
+   * This function has linear-time complexity: the keys are checked one by one.
+   *
+   * @return The value associated with this field, or:
+   *         - NO_SUCH_FIELD if the field does not exist in the object
+   *         - INCORRECT_TYPE if this is not an object
+   */
+  inline simdjson_result<element> operator[](const char *key) const noexcept;
+
+  /**
+   * Get the value associated with the given JSON pointer. We use the RFC 6901
+   * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node
+   * as the root of its own JSON document.
+   *
+   *   dom::parser parser;
+   *   object obj = parser.parse(R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded);
+   *   obj.at_pointer("/foo/a/1") == 20
+   *   obj.at_pointer("/foo")["a"].at(1) == 20
+   *
+   * It is allowed for a key to be the empty string:
+   *
+   *   dom::parser parser;
+   *   object obj = parser.parse(R"({ "": { "a": [ 10, 20, 30 ] }})"_padded);
+   *   obj.at_pointer("//a/1") == 20
+   *   obj.at_pointer("/")["a"].at(1) == 20
+   *
+   * @return The value associated with the given JSON pointer, or:
+   *         - NO_SUCH_FIELD if a field does not exist in an object
+   *         - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length
+   *         - INCORRECT_TYPE if a non-integer is used to access an array
+   *         - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed
+   */
+  inline simdjson_result<element> at_pointer(std::string_view json_pointer) const noexcept;
+
+  /**
+   * Get the value associated with the given key.
+   *
+   * The key will be matched against **unescaped** JSON:
+   *
+   *   dom::parser parser;
+   *   int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1
+   *   parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD
+   *
+   * This function has linear-time complexity: the keys are checked one by one.
+   *
+   * @return The value associated with this field, or:
+   *         - NO_SUCH_FIELD if the field does not exist in the object
+   */
+  inline simdjson_result<element> at_key(std::string_view key) const noexcept;
+
+  /**
+   * Get the value associated with the given key in a case-insensitive manner.
+   * It is only guaranteed to work over ASCII inputs.
+   *
+   * Note: The key will be matched against **unescaped** JSON.
+   *
+   * This function has linear-time complexity: the keys are checked one by one.
+   *
+   * @return The value associated with this field, or:
+   *         - NO_SUCH_FIELD if the field does not exist in the object
+   */
+  inline simdjson_result<element> at_key_case_insensitive(std::string_view key) const noexcept;
+
+private:
+  simdjson_inline object(const internal::tape_ref &tape) noexcept;
+
+  internal::tape_ref tape;
+
+  friend class element;
+  friend struct simdjson_result<element>;
+  template<typename T>
+  friend class simdjson::internal::string_builder;
+};
+
+/**
+ * Key/value pair in an object.
+ */
+class key_value_pair {
+public:
+  /** key in the key-value pair **/
+  std::string_view key;
+  /** value in the key-value pair **/
+  element value;
+
+private:
+  simdjson_inline key_value_pair(std::string_view _key, element _value) noexcept;
+  friend class object;
+};
+
+} // namespace dom
+
+/** The result of a JSON conversion that may fail. */
+template<>
+struct simdjson_result<dom::object> : public internal::simdjson_result_base<dom::object> {
+public:
+  simdjson_inline simdjson_result() noexcept; ///< @private
+  simdjson_inline simdjson_result(dom::object value) noexcept; ///< @private
+  simdjson_inline simdjson_result(error_code error) noexcept; ///< @private
+
+  inline simdjson_result<dom::element> operator[](std::string_view key) const noexcept;
+  inline simdjson_result<dom::element> operator[](const char *key) const noexcept;
+  inline simdjson_result<dom::element> at_pointer(std::string_view json_pointer) const noexcept;
+  inline simdjson_result<dom::element> at_key(std::string_view key) const noexcept;
+  inline simdjson_result<dom::element> at_key_case_insensitive(std::string_view key) const noexcept;
+
+#if SIMDJSON_EXCEPTIONS
+  inline dom::object::iterator begin() const noexcept(false);
+  inline dom::object::iterator end() const noexcept(false);
+  inline size_t size() const noexcept(false);
+#endif // SIMDJSON_EXCEPTIONS
+};
+
+} // namespace simdjson
+
+#if defined(__cpp_lib_ranges)
+#include <ranges>
+
+namespace std {
+namespace ranges {
+template<>
+inline constexpr bool enable_view<simdjson::dom::object> = true;
+#if SIMDJSON_EXCEPTIONS
+template<>
+inline constexpr bool enable_view<simdjson::simdjson_result<simdjson::dom::object>> = true;
+#endif // SIMDJSON_EXCEPTIONS
+} // namespace ranges
+} // namespace std
+#endif // defined(__cpp_lib_ranges)
+
+#endif // SIMDJSON_DOM_OBJECT_H
+/* end file include/simdjson/dom/object.h */
+/* begin file include/simdjson/dom/serialization.h */
+#ifndef SIMDJSON_SERIALIZATION_H
+#define SIMDJSON_SERIALIZATION_H
+
+#include <vector>
+
+namespace simdjson {
+
+/**
+ * The string_builder template and mini_formatter class
+ * are not part of  our public API and are subject to change
+ * at any time!
+ */
+namespace internal {
+
+class mini_formatter;
+
+/**
+ * @private The string_builder template allows us to construct
+ * a string from a document element. It is parametrized
+ * by a "formatter" which handles the details. Thus
+ * the string_builder template could support both minification
+ * and prettification, and various other tradeoffs.
+ */
+template <class formatter = mini_formatter>
+class string_builder {
+public:
+  /** Construct an initially empty builder, would print the empty string **/
+  string_builder() = default;
+  /** Append an element to the builder (to be printed) **/
+  inline void append(simdjson::dom::element value);
+  /** Append an array to the builder (to be printed) **/
+  inline void append(simdjson::dom::array value);
+  /** Append an object to the builder (to be printed) **/
+  inline void append(simdjson::dom::object value);
+  /** Reset the builder (so that it would print the empty string) **/
+  simdjson_inline void clear();
+  /**
+   * Get access to the string. The string_view is owned by the builder
+   * and it is invalid to use it after the string_builder has been
+   * destroyed.
+   * However you can make a copy of the string_view on memory that you
+   * own.
+   */
+  simdjson_inline std::string_view str() const;
+  /** Append a key_value_pair to the builder (to be printed) **/
+  simdjson_inline void append(simdjson::dom::key_value_pair value);
+private:
+  formatter format{};
+};
+
+/**
+ * @private This is the class that we expect to use with the string_builder
+ * template. It tries to produce a compact version of the JSON element
+ * as quickly as possible.
+ */
+class mini_formatter {
+public:
+  mini_formatter() = default;
+  /** Add a comma **/
+  simdjson_inline void comma();
+  /** Start an array, prints [ **/
+  simdjson_inline void start_array();
+  /** End an array, prints ] **/
+  simdjson_inline void end_array();
+  /** Start an array, prints { **/
+  simdjson_inline void start_object();
+  /** Start an array, prints } **/
+  simdjson_inline void end_object();
+  /** Prints a true **/
+  simdjson_inline void true_atom();
+  /** Prints a false **/
+  simdjson_inline void false_atom();
+  /** Prints a null **/
+  simdjson_inline void null_atom();
+  /** Prints a number **/
+  simdjson_inline void number(int64_t x);
+  /** Prints a number **/
+  simdjson_inline void number(uint64_t x);
+  /** Prints a number **/
+  simdjson_inline void number(double x);
+  /** Prints a key (string + colon) **/
+  simdjson_inline void key(std::string_view unescaped);
+  /** Prints a string. The string is escaped as needed. **/
+  simdjson_inline void string(std::string_view unescaped);
+  /** Clears out the content. **/
+  simdjson_inline void clear();
+  /**
+   * Get access to the buffer, it is owned by the instance, but
+   * the user can make a copy.
+   **/
+  simdjson_inline std::string_view str() const;
+
+private:
+  // implementation details (subject to change)
+  /** Prints one character **/
+  simdjson_inline void one_char(char c);
+  /** Backing buffer **/
+  std::vector<char> buffer{}; // not ideal!
+};
+
+} // internal
+
+namespace dom {
+
+/**
+ * Print JSON to an output stream.
+ *
+ * @param out The output stream.
+ * @param value The element.
+ * @throw if there is an error with the underlying output stream. simdjson itself will not throw.
+ */
+inline std::ostream& operator<<(std::ostream& out, simdjson::dom::element value) {
+    simdjson::internal::string_builder<> sb;
+    sb.append(value);
+    return (out << sb.str());
+}
+#if SIMDJSON_EXCEPTIONS
+inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<simdjson::dom::element> x) {
+    if (x.error()) { throw simdjson::simdjson_error(x.error()); }
+    return (out << x.value());
+}
+#endif
+/**
+ * Print JSON to an output stream.
+ *
+ * @param out The output stream.
+ * @param value The array.
+ * @throw if there is an error with the underlying output stream. simdjson itself will not throw.
+ */
+inline std::ostream& operator<<(std::ostream& out, simdjson::dom::array value)  {
+    simdjson::internal::string_builder<> sb;
+    sb.append(value);
+    return (out << sb.str());
+}
+#if SIMDJSON_EXCEPTIONS
+inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<simdjson::dom::array> x) {
+    if (x.error()) { throw simdjson::simdjson_error(x.error()); }
+    return (out << x.value());
+}
+#endif
+/**
+ * Print JSON to an output stream.
+ *
+ * @param out The output stream.
+ * @param value The object.
+ * @throw if there is an error with the underlying output stream. simdjson itself will not throw.
+ */
+inline std::ostream& operator<<(std::ostream& out, simdjson::dom::object value)   {
+    simdjson::internal::string_builder<> sb;
+    sb.append(value);
+    return (out << sb.str());
+}
+#if SIMDJSON_EXCEPTIONS
+inline std::ostream& operator<<(std::ostream& out,  simdjson::simdjson_result<simdjson::dom::object> x) {
+    if (x.error()) { throw  simdjson::simdjson_error(x.error()); }
+    return (out << x.value());
+}
+#endif
+} // namespace dom
+
+/**
+ * Converts JSON to a string.
+ *
+ *   dom::parser parser;
+ *   element doc = parser.parse("   [ 1 , 2 , 3 ] "_padded);
+ *   cout << to_string(doc) << endl; // prints [1,2,3]
+ *
+ */
+template <class T>
+std::string to_string(T x)   {
+    // in C++, to_string is standard: http://www.cplusplus.com/reference/string/to_string/
+    // Currently minify and to_string are identical but in the future, they may
+    // differ.
+    simdjson::internal::string_builder<> sb;
+    sb.append(x);
+    std::string_view answer = sb.str();
+    return std::string(answer.data(), answer.size());
+}
+#if SIMDJSON_EXCEPTIONS
+template <class T>
+std::string to_string(simdjson_result<T> x) {
+    if (x.error()) { throw simdjson_error(x.error()); }
+    return to_string(x.value());
+}
+#endif
+
+/**
+ * Minifies a JSON element or document, printing the smallest possible valid JSON.
+ *
+ *   dom::parser parser;
+ *   element doc = parser.parse("   [ 1 , 2 , 3 ] "_padded);
+ *   cout << minify(doc) << endl; // prints [1,2,3]
+ *
+ */
+template <class T>
+std::string minify(T x)  {
+  return to_string(x);
+}
+
+#if SIMDJSON_EXCEPTIONS
+template <class T>
+std::string minify(simdjson_result<T> x) {
+    if (x.error()) { throw simdjson_error(x.error()); }
+    return to_string(x.value());
+}
+#endif
+
+
+} // namespace simdjson
+
+
+#endif
+/* end file include/simdjson/dom/serialization.h */
+
+// Deprecated API
+/* begin file include/simdjson/dom/jsonparser.h */
+// TODO Remove this -- deprecated API and files
+
+#ifndef SIMDJSON_DOM_JSONPARSER_H
+#define SIMDJSON_DOM_JSONPARSER_H
+
+/* begin file include/simdjson/dom/parsedjson.h */
+// TODO Remove this -- deprecated API and files
+
+#ifndef SIMDJSON_DOM_PARSEDJSON_H
+#define SIMDJSON_DOM_PARSEDJSON_H
+
+
+namespace simdjson {
+
+/**
+ * @deprecated Use `dom::parser` instead.
+ */
+using ParsedJson [[deprecated("Use dom::parser instead")]] = dom::parser;
+
+} // namespace simdjson
+
+#endif // SIMDJSON_DOM_PARSEDJSON_H
+/* end file include/simdjson/dom/parsedjson.h */
+/* begin file include/simdjson/jsonioutil.h */
+#ifndef SIMDJSON_JSONIOUTIL_H
+#define SIMDJSON_JSONIOUTIL_H
+
+
+namespace simdjson {
+
+#if SIMDJSON_EXCEPTIONS
+#ifndef SIMDJSON_DISABLE_DEPRECATED_API
+[[deprecated("Use padded_string::load() instead")]]
+inline padded_string get_corpus(const char *path) {
+  return padded_string::load(path);
+}
+#endif // SIMDJSON_DISABLE_DEPRECATED_API
+#endif // SIMDJSON_EXCEPTIONS
+
+} // namespace simdjson
+
+#endif // SIMDJSON_JSONIOUTIL_H
+/* end file include/simdjson/jsonioutil.h */
+
+namespace simdjson {
+
+//
+// C API (json_parse and build_parsed_json) declarations
+//
+
+#ifndef SIMDJSON_DISABLE_DEPRECATED_API
+[[deprecated("Use parser.parse() instead")]]
+inline int json_parse(const uint8_t *buf, size_t len, dom::parser &parser, bool realloc_if_needed = true) noexcept {
+  error_code code = parser.parse(buf, len, realloc_if_needed).error();
+  // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid
+  // bits in the parser instead of heeding the result code. The normal parser unsets those in
+  // anticipation of making the error code ephemeral.
+  // Here we put the code back into the parser, until we've removed this method.
+  parser.valid = code == SUCCESS;
+  parser.error = code;
+  return code;
+}
+[[deprecated("Use parser.parse() instead")]]
+inline int json_parse(const char *buf, size_t len, dom::parser &parser, bool realloc_if_needed = true) noexcept {
+  error_code code = parser.parse(buf, len, realloc_if_needed).error();
+  // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid
+  // bits in the parser instead of heeding the result code. The normal parser unsets those in
+  // anticipation of making the error code ephemeral.
+  // Here we put the code back into the parser, until we've removed this method.
+  parser.valid = code == SUCCESS;
+  parser.error = code;
+  return code;
+}
+[[deprecated("Use parser.parse() instead")]]
+inline int json_parse(const std::string &s, dom::parser &parser, bool realloc_if_needed = true) noexcept {
+  error_code code = parser.parse(s.data(), s.length(), realloc_if_needed).error();
+  // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid
+  // bits in the parser instead of heeding the result code. The normal parser unsets those in
+  // anticipation of making the error code ephemeral.
+  // Here we put the code back into the parser, until we've removed this method.
+  parser.valid = code == SUCCESS;
+  parser.error = code;
+  return code;
+}
+[[deprecated("Use parser.parse() instead")]]
+inline int json_parse(const padded_string &s, dom::parser &parser) noexcept {
+  error_code code = parser.parse(s).error();
+  // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid
+  // bits in the parser instead of heeding the result code. The normal parser unsets those in
+  // anticipation of making the error code ephemeral.
+  // Here we put the code back into the parser, until we've removed this method.
+  parser.valid = code == SUCCESS;
+  parser.error = code;
+  return code;
+}
+
+[[deprecated("Use parser.parse() instead")]]
+simdjson_warn_unused inline dom::parser build_parsed_json(const uint8_t *buf, size_t len, bool realloc_if_needed = true) noexcept {
+  dom::parser parser;
+  error_code code = parser.parse(buf, len, realloc_if_needed).error();
+  // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid
+  // bits in the parser instead of heeding the result code. The normal parser unsets those in
+  // anticipation of making the error code ephemeral.
+  // Here we put the code back into the parser, until we've removed this method.
+  parser.valid = code == SUCCESS;
+  parser.error = code;
+  return parser;
+}
+[[deprecated("Use parser.parse() instead")]]
+simdjson_warn_unused inline dom::parser build_parsed_json(const char *buf, size_t len, bool realloc_if_needed = true) noexcept {
+  dom::parser parser;
+  error_code code = parser.parse(buf, len, realloc_if_needed).error();
+  // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid
+  // bits in the parser instead of heeding the result code. The normal parser unsets those in
+  // anticipation of making the error code ephemeral.
+  // Here we put the code back into the parser, until we've removed this method.
+  parser.valid = code == SUCCESS;
+  parser.error = code;
+  return parser;
+}
+[[deprecated("Use parser.parse() instead")]]
+simdjson_warn_unused inline dom::parser build_parsed_json(const std::string &s, bool realloc_if_needed = true) noexcept {
+  dom::parser parser;
+  error_code code = parser.parse(s.data(), s.length(), realloc_if_needed).error();
+  // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid
+  // bits in the parser instead of heeding the result code. The normal parser unsets those in
+  // anticipation of making the error code ephemeral.
+  // Here we put the code back into the parser, until we've removed this method.
+  parser.valid = code == SUCCESS;
+  parser.error = code;
+  return parser;
+}
+[[deprecated("Use parser.parse() instead")]]
+simdjson_warn_unused inline dom::parser build_parsed_json(const padded_string &s) noexcept {
+  dom::parser parser;
+  error_code code = parser.parse(s).error();
+  // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid
+  // bits in the parser instead of heeding the result code. The normal parser unsets those in
+  // anticipation of making the error code ephemeral.
+  // Here we put the code back into the parser, until we've removed this method.
+  parser.valid = code == SUCCESS;
+  parser.error = code;
+  return parser;
+}
+#endif // SIMDJSON_DISABLE_DEPRECATED_API
+
+/** @private We do not want to allow implicit conversion from C string to std::string. */
+int json_parse(const char *buf, dom::parser &parser) noexcept = delete;
+/** @private We do not want to allow implicit conversion from C string to std::string. */
+dom::parser build_parsed_json(const char *buf) noexcept = delete;
+
+} // namespace simdjson
+
+#endif // SIMDJSON_DOM_JSONPARSER_H
+/* end file include/simdjson/dom/jsonparser.h */
+/* begin file include/simdjson/dom/parsedjson_iterator.h */
+// TODO Remove this -- deprecated API and files
+
+#ifndef SIMDJSON_DOM_PARSEDJSON_ITERATOR_H
+#define SIMDJSON_DOM_PARSEDJSON_ITERATOR_H
+
+#include <cstring>
+#include <string>
+#include <ostream>
+#include <iterator>
+#include <limits>
+#include <stdexcept>
+
+/* begin file include/simdjson/internal/jsonformatutils.h */
+#ifndef SIMDJSON_INTERNAL_JSONFORMATUTILS_H
+#define SIMDJSON_INTERNAL_JSONFORMATUTILS_H
+
+#include <iomanip>
+#include <ostream>
+#include <sstream>
+
+namespace simdjson {
+namespace internal {
+
+class escape_json_string;
+
+inline std::ostream& operator<<(std::ostream& out, const escape_json_string &str);
+
+class escape_json_string {
+public:
+  escape_json_string(std::string_view _str) noexcept : str{_str} {}
+  operator std::string() const noexcept { std::stringstream s; s << *this; return s.str(); }
+private:
+  std::string_view str;
+  friend std::ostream& operator<<(std::ostream& out, const escape_json_string &unescaped);
+};
+
+inline std::ostream& operator<<(std::ostream& out, const escape_json_string &unescaped) {
+  for (size_t i=0; i<unescaped.str.length(); i++) {
+    switch (unescaped.str[i]) {
+    case '\b':
+      out << "\\b";
+      break;
+    case '\f':
+      out << "\\f";
+      break;
+    case '\n':
+      out << "\\n";
+      break;
+    case '\r':
+      out << "\\r";
+      break;
+    case '\"':
+      out << "\\\"";
+      break;
+    case '\t':
+      out << "\\t";
+      break;
+    case '\\':
+      out << "\\\\";
+      break;
+    default:
+      if (static_cast<unsigned char>(unescaped.str[i]) <= 0x1F) {
+        // TODO can this be done once at the beginning, or will it mess up << char?
+        std::ios::fmtflags f(out.flags());
+        out << "\\u" << std::hex << std::setw(4) << std::setfill('0') << int(unescaped.str[i]);
+        out.flags(f);
+      } else {
+        out << unescaped.str[i];
+      }
+    }
+  }
+  return out;
+}
+
+} // namespace internal
+} // namespace simdjson
+
+#endif // SIMDJSON_INTERNAL_JSONFORMATUTILS_H
+/* end file include/simdjson/internal/jsonformatutils.h */
+
+#ifndef SIMDJSON_DISABLE_DEPRECATED_API
+
+namespace simdjson {
+/** @private **/
+class [[deprecated("Use the new DOM navigation API instead (see doc/basics.md)")]] dom::parser::Iterator {
+public:
+  inline Iterator(const dom::parser &parser) noexcept(false);
+  inline Iterator(const Iterator &o) noexcept;
+  inline ~Iterator() noexcept;
+
+  inline Iterator& operator=(const Iterator&) = delete;
+
+  inline bool is_ok() const;
+
+  // useful for debugging purposes
+  inline size_t get_tape_location() const;
+
+  // useful for debugging purposes
+  inline size_t get_tape_length() const;
+
+  // returns the current depth (start at 1 with 0 reserved for the fictitious
+  // root node)
+  inline size_t get_depth() const;
+
+  // A scope is a series of nodes at the same depth, typically it is either an
+  // object ({) or an array ([). The root node has type 'r'.
+  inline uint8_t get_scope_type() const;
+
+  // move forward in document order
+  inline bool move_forward();
+
+  // retrieve the character code of what we're looking at:
+  // [{"slutfn are the possibilities
+  inline uint8_t get_type() const {
+      return current_type; // short functions should be inlined!
+  }
+
+  // get the int64_t value at this node; valid only if get_type is "l"
+  inline int64_t get_integer() const {
+      if (location + 1 >= tape_length) {
+      return 0; // default value in case of error
+      }
+      return static_cast<int64_t>(doc.tape[location + 1]);
+  }
+
+  // get the value as uint64; valid only if  if get_type is "u"
+  inline uint64_t get_unsigned_integer() const {
+      if (location + 1 >= tape_length) {
+      return 0; // default value in case of error
+      }
+      return doc.tape[location + 1];
+  }
+
+  // get the string value at this node (NULL ended); valid only if get_type is "
+  // note that tabs, and line endings are escaped in the returned value (see
+  // print_with_escapes) return value is valid UTF-8, it may contain NULL chars
+  // within the string: get_string_length determines the true string length.
+  inline const char *get_string() const {
+      return reinterpret_cast<const char *>(
+          doc.string_buf.get() + (current_val & internal::JSON_VALUE_MASK) + sizeof(uint32_t));
+  }
+
+  // return the length of the string in bytes
+  inline uint32_t get_string_length() const {
+      uint32_t answer;
+      std::memcpy(&answer,
+          reinterpret_cast<const char *>(doc.string_buf.get() +
+                                          (current_val & internal::JSON_VALUE_MASK)),
+          sizeof(uint32_t));
+      return answer;
+  }
+
+  // get the double value at this node; valid only if
+  // get_type() is "d"
+  inline double get_double() const {
+      if (location + 1 >= tape_length) {
+      return std::numeric_limits<double>::quiet_NaN(); // default value in
+                                                      // case of error
+      }
+      double answer;
+      std::memcpy(&answer, &doc.tape[location + 1], sizeof(answer));
+      return answer;
+  }
+
+  inline bool is_object_or_array() const { return is_object() || is_array(); }
+
+  inline bool is_object() const { return get_type() == '{'; }
+
+  inline bool is_array() const { return get_type() == '['; }
+
+  inline bool is_string() const { return get_type() == '"'; }
+
+  // Returns true if the current type of the node is an signed integer.
+  // You can get its value with `get_integer()`.
+  inline bool is_integer() const { return get_type() == 'l'; }
+
+  // Returns true if the current type of the node is an unsigned integer.
+  // You can get its value with `get_unsigned_integer()`.
+  //
+  // NOTE:
+  // Only a large value, which is out of range of a 64-bit signed integer, is
+  // represented internally as an unsigned node. On the other hand, a typical
+  // positive integer, such as 1, 42, or 1000000, is as a signed node.
+  // Be aware this function returns false for a signed node.
+  inline bool is_unsigned_integer() const { return get_type() == 'u'; }
+  // Returns true if the current type of the node is a double floating-point number.
+  inline bool is_double() const { return get_type() == 'd'; }
+  // Returns true if the current type of the node is a number (integer or floating-point).
+  inline bool is_number() const {
+      return is_integer() || is_unsigned_integer() || is_double();
+  }
+  // Returns true if the current type of the node is a bool with true value.
+  inline bool is_true() const { return get_type() == 't'; }
+  // Returns true if the current type of the node is a bool with false value.
+  inline bool is_false() const { return get_type() == 'f'; }
+  // Returns true if the current type of the node is null.
+  inline bool is_null() const { return get_type() == 'n'; }
+  // Returns true if the type byte represents an object of an array
+  static bool is_object_or_array(uint8_t type) {
+      return ((type == '[') || (type == '{'));
+  }
+
+  // when at {, go one level deep, looking for a given key
+  // if successful, we are left pointing at the value,
+  // if not, we are still pointing at the object ({)
+  // (in case of repeated keys, this only finds the first one).
+  // We seek the key using C's strcmp so if your JSON strings contain
+  // NULL chars, this would trigger a false positive: if you expect that
+  // to be the case, take extra precautions.
+  // Furthermore, we do the comparison character-by-character
+  // without taking into account Unicode equivalence.
+  inline bool move_to_key(const char *key);
+
+  // as above, but case insensitive lookup (strcmpi instead of strcmp)
+  inline bool move_to_key_insensitive(const char *key);
+
+  // when at {, go one level deep, looking for a given key
+  // if successful, we are left pointing at the value,
+  // if not, we are still pointing at the object ({)
+  // (in case of repeated keys, this only finds the first one).
+  // The string we search for can contain NULL values.
+  // Furthermore, we do the comparison character-by-character
+  // without taking into account Unicode equivalence.
+  inline bool move_to_key(const char *key, uint32_t length);
+
+  // when at a key location within an object, this moves to the accompanying
+  // value (located next to it). This is equivalent but much faster than
+  // calling "next()".
+  inline void move_to_value();
+
+  // when at [, go one level deep, and advance to the given index.
+  // if successful, we are left pointing at the value,
+  // if not, we are still pointing at the array ([)
+  inline bool move_to_index(uint32_t index);
+
+  // Moves the iterator to the value corresponding to the json pointer.
+  // Always search from the root of the document.
+  // if successful, we are left pointing at the value,
+  // if not, we are still pointing the same value we were pointing before the
+  // call. The json pointer follows the rfc6901 standard's syntax:
+  // https://tools.ietf.org/html/rfc6901 However, the standard says "If a
+  // referenced member name is not unique in an object, the member that is
+  // referenced is undefined, and evaluation fails". Here we just return the
+  // first corresponding value. The length parameter is the length of the
+  // jsonpointer string ('pointer').
+  inline bool move_to(const char *pointer, uint32_t length);
+
+  // Moves the iterator to the value corresponding to the json pointer.
+  // Always search from the root of the document.
+  // if successful, we are left pointing at the value,
+  // if not, we are still pointing the same value we were pointing before the
+  // call. The json pointer implementation follows the rfc6901 standard's
+  // syntax: https://tools.ietf.org/html/rfc6901 However, the standard says
+  // "If a referenced member name is not unique in an object, the member that
+  // is referenced is undefined, and evaluation fails". Here we just return
+  // the first corresponding value.
+  inline bool move_to(const std::string &pointer) {
+      return move_to(pointer.c_str(), uint32_t(pointer.length()));
+  }
+
+  private:
+  // Almost the same as move_to(), except it searches from the current
+  // position. The pointer's syntax is identical, though that case is not
+  // handled by the rfc6901 standard. The '/' is still required at the
+  // beginning. However, contrary to move_to(), the URI Fragment Identifier
+  // Representation is not supported here. Also, in case of failure, we are
+  // left pointing at the closest value it could reach. For these reasons it
+  // is private. It exists because it is used by move_to().
+  inline bool relative_move_to(const char *pointer, uint32_t length);
+
+  public:
+  // throughout return true if we can do the navigation, false
+  // otherwise
+
+  // Within a given scope (series of nodes at the same depth within either an
+  // array or an object), we move forward.
+  // Thus, given [true, null, {"a":1}, [1,2]], we would visit true, null, {
+  // and [. At the object ({) or at the array ([), you can issue a "down" to
+  // visit their content. valid if we're not at the end of a scope (returns
+  // true).
+  inline bool next();
+
+  // Within a given scope (series of nodes at the same depth within either an
+  // array or an object), we move backward.
+  // Thus, given [true, null, {"a":1}, [1,2]], we would visit ], }, null, true
+  // when starting at the end of the scope. At the object ({) or at the array
+  // ([), you can issue a "down" to visit their content.
+  // Performance warning: This function is implemented by starting again
+  // from the beginning of the scope and scanning forward. You should expect
+  // it to be relatively slow.
+  inline bool prev();
+
+  // Moves back to either the containing array or object (type { or [) from
+  // within a contained scope.
+  // Valid unless we are at the first level of the document
+  inline bool up();
+
+  // Valid if we're at a [ or { and it starts a non-empty scope; moves us to
+  // start of that deeper scope if it not empty. Thus, given [true, null,
+  // {"a":1}, [1,2]], if we are at the { node, we would move to the "a" node.
+  inline bool down();
+
+  // move us to the start of our current scope,
+  // a scope is a series of nodes at the same level
+  inline void to_start_scope();
+
+  inline void rewind() {
+      while (up())
+      ;
+  }
+
+
+
+  // print the node we are currently pointing at
+  inline bool print(std::ostream &os, bool escape_strings = true) const;
+
+  private:
+  const document &doc;
+  size_t max_depth{};
+  size_t depth{};
+  size_t location{}; // our current location on a tape
+  size_t tape_length{};
+  uint8_t current_type{};
+  uint64_t current_val{};
+  typedef struct {
+      size_t start_of_scope;
+      uint8_t scope_type;
+  } scopeindex_t;
+
+  scopeindex_t *depth_index{};
+};
+
+} // namespace simdjson
+#endif // SIMDJSON_DISABLE_DEPRECATED_API
+
+#endif // SIMDJSON_DOM_PARSEDJSON_ITERATOR_H
+/* end file include/simdjson/dom/parsedjson_iterator.h */
+
+// Inline functions
+/* begin file include/simdjson/dom/array-inl.h */
+#ifndef SIMDJSON_INLINE_ARRAY_H
+#define SIMDJSON_INLINE_ARRAY_H
+
+// Inline implementations go in here.
+
+#include <utility>
+
+namespace simdjson {
+
+//
+// simdjson_result<dom::array> inline implementation
+//
+simdjson_inline simdjson_result<dom::array>::simdjson_result() noexcept
+    : internal::simdjson_result_base<dom::array>() {}
+simdjson_inline simdjson_result<dom::array>::simdjson_result(dom::array value) noexcept
+    : internal::simdjson_result_base<dom::array>(std::forward<dom::array>(value)) {}
+simdjson_inline simdjson_result<dom::array>::simdjson_result(error_code error) noexcept
+    : internal::simdjson_result_base<dom::array>(error) {}
+
+#if SIMDJSON_EXCEPTIONS
+
+inline dom::array::iterator simdjson_result<dom::array>::begin() const noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first.begin();
+}
+inline dom::array::iterator simdjson_result<dom::array>::end() const noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first.end();
+}
+inline size_t simdjson_result<dom::array>::size() const noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first.size();
+}
+
+#endif // SIMDJSON_EXCEPTIONS
+
+inline simdjson_result<dom::element> simdjson_result<dom::array>::at_pointer(std::string_view json_pointer) const noexcept {
+  if (error()) { return error(); }
+  return first.at_pointer(json_pointer);
+}
+inline simdjson_result<dom::element> simdjson_result<dom::array>::at(size_t index) const noexcept {
+  if (error()) { return error(); }
+  return first.at(index);
+}
+
+namespace dom {
+
+//
+// array inline implementation
+//
+simdjson_inline array::array() noexcept : tape{} {}
+simdjson_inline array::array(const internal::tape_ref &_tape) noexcept : tape{_tape} {}
+inline array::iterator array::begin() const noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  // issue https://github.com/simdjson/simdjson/issues/1914
+  assert (tape.usable()); // the default constructed array is invalid
+#endif
+  return internal::tape_ref(tape.doc, tape.json_index + 1);
+}
+inline array::iterator array::end() const noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  // issue https://github.com/simdjson/simdjson/issues/1914
+  assert (tape.usable()); // the default constructed array is invalid
+#endif
+  return internal::tape_ref(tape.doc, tape.after_element() - 1);
+}
+inline size_t array::size() const noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  // issue https://github.com/simdjson/simdjson/issues/1914
+  assert (tape.usable()); // the default constructed array is invalid
+#endif
+  return tape.scope_count();
+}
+inline size_t array::number_of_slots() const noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  // issue https://github.com/simdjson/simdjson/issues/1914
+  assert (tape.usable()); // the default constructed array is invalid
+#endif
+  return tape.matching_brace_index() - tape.json_index;
+}
+inline simdjson_result<element> array::at_pointer(std::string_view json_pointer) const noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  // issue https://github.com/simdjson/simdjson/issues/1914
+  assert (tape.usable()); // the default constructed array is invalid
+#endif
+  if(json_pointer.empty()) { // an empty string means that we return the current node
+      return element(this->tape); // copy the current node
+  } else if(json_pointer[0] != '/') { // otherwise there is an error
+      return INVALID_JSON_POINTER;
+  }
+  json_pointer = json_pointer.substr(1);
+  // - means "the append position" or "the element after the end of the array"
+  // We don't support this, because we're returning a real element, not a position.
+  if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; }
+
+  // Read the array index
+  size_t array_index = 0;
+  size_t i;
+  for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) {
+    uint8_t digit = uint8_t(json_pointer[i] - '0');
+    // Check for non-digit in array index. If it's there, we're trying to get a field in an object
+    if (digit > 9) { return INCORRECT_TYPE; }
+    array_index = array_index*10 + digit;
+  }
+
+  // 0 followed by other digits is invalid
+  if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0"
+
+  // Empty string is invalid; so is a "/" with no digits before it
+  if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index"
+
+  // Get the child
+  auto child = array(tape).at(array_index);
+  // If there is an error, it ends here
+  if(child.error()) {
+    return child;
+  }
+  // If there is a /, we're not done yet, call recursively.
+  if (i < json_pointer.length()) {
+    child = child.at_pointer(json_pointer.substr(i));
+  }
+  return child;
+}
+
+inline simdjson_result<element> array::at(size_t index) const noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  // issue https://github.com/simdjson/simdjson/issues/1914
+  assert (tape.usable()); // the default constructed array is invalid
+#endif
+  size_t i=0;
+  for (auto element : *this) {
+    if (i == index) { return element; }
+    i++;
+  }
+  return INDEX_OUT_OF_BOUNDS;
+}
+
+//
+// array::iterator inline implementation
+//
+simdjson_inline array::iterator::iterator(const internal::tape_ref &_tape) noexcept : tape{_tape} { }
+inline element array::iterator::operator*() const noexcept {
+  return element(tape);
+}
+inline array::iterator& array::iterator::operator++() noexcept {
+  tape.json_index = tape.after_element();
+  return *this;
+}
+inline array::iterator array::iterator::operator++(int) noexcept {
+  array::iterator out = *this;
+  ++*this;
+  return out;
+}
+inline bool array::iterator::operator!=(const array::iterator& other) const noexcept {
+  return tape.json_index != other.tape.json_index;
+}
+inline bool array::iterator::operator==(const array::iterator& other) const noexcept {
+  return tape.json_index == other.tape.json_index;
+}
+inline bool array::iterator::operator<(const array::iterator& other) const noexcept {
+  return tape.json_index < other.tape.json_index;
+}
+inline bool array::iterator::operator<=(const array::iterator& other) const noexcept {
+  return tape.json_index <= other.tape.json_index;
+}
+inline bool array::iterator::operator>=(const array::iterator& other) const noexcept {
+  return tape.json_index >= other.tape.json_index;
+}
+inline bool array::iterator::operator>(const array::iterator& other) const noexcept {
+  return tape.json_index > other.tape.json_index;
+}
+
+} // namespace dom
+
+
+} // namespace simdjson
+
+/* begin file include/simdjson/dom/element-inl.h */
+#ifndef SIMDJSON_INLINE_ELEMENT_H
+#define SIMDJSON_INLINE_ELEMENT_H
+
+#include <cstring>
+#include <utility>
+
+namespace simdjson {
+
+//
+// simdjson_result<dom::element> inline implementation
+//
+simdjson_inline simdjson_result<dom::element>::simdjson_result() noexcept
+    : internal::simdjson_result_base<dom::element>() {}
+simdjson_inline simdjson_result<dom::element>::simdjson_result(dom::element &&value) noexcept
+    : internal::simdjson_result_base<dom::element>(std::forward<dom::element>(value)) {}
+simdjson_inline simdjson_result<dom::element>::simdjson_result(error_code error) noexcept
+    : internal::simdjson_result_base<dom::element>(error) {}
+inline simdjson_result<dom::element_type> simdjson_result<dom::element>::type() const noexcept {
+  if (error()) { return error(); }
+  return first.type();
+}
+
+template<typename T>
+simdjson_inline bool simdjson_result<dom::element>::is() const noexcept {
+  return !error() && first.is<T>();
+}
+template<typename T>
+simdjson_inline simdjson_result<T> simdjson_result<dom::element>::get() const noexcept {
+  if (error()) { return error(); }
+  return first.get<T>();
+}
+template<typename T>
+simdjson_warn_unused simdjson_inline error_code simdjson_result<dom::element>::get(T &value) const noexcept {
+  if (error()) { return error(); }
+  return first.get<T>(value);
+}
+
+simdjson_inline simdjson_result<dom::array> simdjson_result<dom::element>::get_array() const noexcept {
+  if (error()) { return error(); }
+  return first.get_array();
+}
+simdjson_inline simdjson_result<dom::object> simdjson_result<dom::element>::get_object() const noexcept {
+  if (error()) { return error(); }
+  return first.get_object();
+}
+simdjson_inline simdjson_result<const char *> simdjson_result<dom::element>::get_c_str() const noexcept {
+  if (error()) { return error(); }
+  return first.get_c_str();
+}
+simdjson_inline simdjson_result<size_t> simdjson_result<dom::element>::get_string_length() const noexcept {
+  if (error()) { return error(); }
+  return first.get_string_length();
+}
+simdjson_inline simdjson_result<std::string_view> simdjson_result<dom::element>::get_string() const noexcept {
+  if (error()) { return error(); }
+  return first.get_string();
+}
+simdjson_inline simdjson_result<int64_t> simdjson_result<dom::element>::get_int64() const noexcept {
+  if (error()) { return error(); }
+  return first.get_int64();
+}
+simdjson_inline simdjson_result<uint64_t> simdjson_result<dom::element>::get_uint64() const noexcept {
+  if (error()) { return error(); }
+  return first.get_uint64();
+}
+simdjson_inline simdjson_result<double> simdjson_result<dom::element>::get_double() const noexcept {
+  if (error()) { return error(); }
+  return first.get_double();
+}
+simdjson_inline simdjson_result<bool> simdjson_result<dom::element>::get_bool() const noexcept {
+  if (error()) { return error(); }
+  return first.get_bool();
+}
+
+simdjson_inline bool simdjson_result<dom::element>::is_array() const noexcept {
+  return !error() && first.is_array();
+}
+simdjson_inline bool simdjson_result<dom::element>::is_object() const noexcept {
+  return !error() && first.is_object();
+}
+simdjson_inline bool simdjson_result<dom::element>::is_string() const noexcept {
+  return !error() && first.is_string();
+}
+simdjson_inline bool simdjson_result<dom::element>::is_int64() const noexcept {
+  return !error() && first.is_int64();
+}
+simdjson_inline bool simdjson_result<dom::element>::is_uint64() const noexcept {
+  return !error() && first.is_uint64();
+}
+simdjson_inline bool simdjson_result<dom::element>::is_double() const noexcept {
+  return !error() && first.is_double();
+}
+simdjson_inline bool simdjson_result<dom::element>::is_number() const noexcept {
+  return !error() && first.is_number();
+}
+simdjson_inline bool simdjson_result<dom::element>::is_bool() const noexcept {
+  return !error() && first.is_bool();
+}
+
+simdjson_inline bool simdjson_result<dom::element>::is_null() const noexcept {
+  return !error() && first.is_null();
+}
+
+simdjson_inline simdjson_result<dom::element> simdjson_result<dom::element>::operator[](std::string_view key) const noexcept {
+  if (error()) { return error(); }
+  return first[key];
+}
+simdjson_inline simdjson_result<dom::element> simdjson_result<dom::element>::operator[](const char *key) const noexcept {
+  if (error()) { return error(); }
+  return first[key];
+}
+simdjson_inline simdjson_result<dom::element> simdjson_result<dom::element>::at_pointer(const std::string_view json_pointer) const noexcept {
+  if (error()) { return error(); }
+  return first.at_pointer(json_pointer);
+}
+#ifndef SIMDJSON_DISABLE_DEPRECATED_API
+[[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]]
+simdjson_inline simdjson_result<dom::element> simdjson_result<dom::element>::at(const std::string_view json_pointer) const noexcept {
+SIMDJSON_PUSH_DISABLE_WARNINGS
+SIMDJSON_DISABLE_DEPRECATED_WARNING
+  if (error()) { return error(); }
+  return first.at(json_pointer);
+SIMDJSON_POP_DISABLE_WARNINGS
+}
+#endif // SIMDJSON_DISABLE_DEPRECATED_API
+simdjson_inline simdjson_result<dom::element> simdjson_result<dom::element>::at(size_t index) const noexcept {
+  if (error()) { return error(); }
+  return first.at(index);
+}
+simdjson_inline simdjson_result<dom::element> simdjson_result<dom::element>::at_key(std::string_view key) const noexcept {
+  if (error()) { return error(); }
+  return first.at_key(key);
+}
+simdjson_inline simdjson_result<dom::element> simdjson_result<dom::element>::at_key_case_insensitive(std::string_view key) const noexcept {
+  if (error()) { return error(); }
+  return first.at_key_case_insensitive(key);
+}
+
+#if SIMDJSON_EXCEPTIONS
+
+simdjson_inline simdjson_result<dom::element>::operator bool() const noexcept(false) {
+  return get<bool>();
+}
+simdjson_inline simdjson_result<dom::element>::operator const char *() const noexcept(false) {
+  return get<const char *>();
+}
+simdjson_inline simdjson_result<dom::element>::operator std::string_view() const noexcept(false) {
+  return get<std::string_view>();
+}
+simdjson_inline simdjson_result<dom::element>::operator uint64_t() const noexcept(false) {
+  return get<uint64_t>();
+}
+simdjson_inline simdjson_result<dom::element>::operator int64_t() const noexcept(false) {
+  return get<int64_t>();
+}
+simdjson_inline simdjson_result<dom::element>::operator double() const noexcept(false) {
+  return get<double>();
+}
+simdjson_inline simdjson_result<dom::element>::operator dom::array() const noexcept(false) {
+  return get<dom::array>();
+}
+simdjson_inline simdjson_result<dom::element>::operator dom::object() const noexcept(false) {
+  return get<dom::object>();
+}
+
+simdjson_inline dom::array::iterator simdjson_result<dom::element>::begin() const noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first.begin();
+}
+simdjson_inline dom::array::iterator simdjson_result<dom::element>::end() const noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first.end();
+}
+
+#endif // SIMDJSON_EXCEPTIONS
+
+namespace dom {
+
+//
+// element inline implementation
+//
+simdjson_inline element::element() noexcept : tape{} {}
+simdjson_inline element::element(const internal::tape_ref &_tape) noexcept : tape{_tape} { }
+
+inline element_type element::type() const noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  // issue https://github.com/simdjson/simdjson/issues/1914
+  assert (tape.usable()); // the default constructed element is invalid
+#endif
+  auto tape_type = tape.tape_ref_type();
+  return tape_type == internal::tape_type::FALSE_VALUE ? element_type::BOOL : static_cast<element_type>(tape_type);
+}
+
+inline simdjson_result<bool> element::get_bool() const noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  // issue https://github.com/simdjson/simdjson/issues/1914
+  assert (tape.usable()); // the default constructed element is invalid
+#endif
+  if(tape.is_true()) {
+    return true;
+  } else if(tape.is_false()) {
+    return false;
+  }
+  return INCORRECT_TYPE;
+}
+inline simdjson_result<const char *> element::get_c_str() const noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  // issue https://github.com/simdjson/simdjson/issues/1914
+  assert (tape.usable()); // the default constructed element is invalid
+#endif
+  switch (tape.tape_ref_type()) {
+    case internal::tape_type::STRING: {
+      return tape.get_c_str();
+    }
+    default:
+      return INCORRECT_TYPE;
+  }
+}
+inline simdjson_result<size_t> element::get_string_length() const noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  // issue https://github.com/simdjson/simdjson/issues/1914
+  assert (tape.usable()); // the default constructed element is invalid
+#endif
+  switch (tape.tape_ref_type()) {
+    case internal::tape_type::STRING: {
+      return tape.get_string_length();
+    }
+    default:
+      return INCORRECT_TYPE;
+  }
+}
+inline simdjson_result<std::string_view> element::get_string() const noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  // issue https://github.com/simdjson/simdjson/issues/1914
+  assert (tape.usable()); // the default constructed element is invalid
+#endif
+  switch (tape.tape_ref_type()) {
+    case internal::tape_type::STRING:
+      return tape.get_string_view();
+    default:
+      return INCORRECT_TYPE;
+  }
+}
+inline simdjson_result<uint64_t> element::get_uint64() const noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  // issue https://github.com/simdjson/simdjson/issues/1914
+  assert (tape.usable()); // the default constructed element is invalid
+#endif
+  if(simdjson_unlikely(!tape.is_uint64())) { // branch rarely taken
+    if(tape.is_int64()) {
+      int64_t result = tape.next_tape_value<int64_t>();
+      if (result < 0) {
+        return NUMBER_OUT_OF_RANGE;
+      }
+      return uint64_t(result);
+    }
+    return INCORRECT_TYPE;
+  }
+  return tape.next_tape_value<int64_t>();
+}
+inline simdjson_result<int64_t> element::get_int64() const noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  // issue https://github.com/simdjson/simdjson/issues/1914
+  assert (tape.usable()); // the default constructed element is invalid
+#endif
+  if(simdjson_unlikely(!tape.is_int64())) { // branch rarely taken
+    if(tape.is_uint64()) {
+      uint64_t result = tape.next_tape_value<uint64_t>();
+      // Wrapping max in parens to handle Windows issue: https://stackoverflow.com/questions/11544073/how-do-i-deal-with-the-max-macro-in-windows-h-colliding-with-max-in-std
+      if (result > uint64_t((std::numeric_limits<int64_t>::max)())) {
+        return NUMBER_OUT_OF_RANGE;
+      }
+      return static_cast<int64_t>(result);
+    }
+    return INCORRECT_TYPE;
+  }
+  return tape.next_tape_value<int64_t>();
+}
+inline simdjson_result<double> element::get_double() const noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  // issue https://github.com/simdjson/simdjson/issues/1914
+  assert (tape.usable()); // the default constructed element is invalid
+#endif
+  // Performance considerations:
+  // 1. Querying tape_ref_type() implies doing a shift, it is fast to just do a straight
+  //   comparison.
+  // 2. Using a switch-case relies on the compiler guessing what kind of code generation
+  //    we want... But the compiler cannot know that we expect the type to be "double"
+  //    most of the time.
+  // We can expect get<double> to refer to a double type almost all the time.
+  // It is important to craft the code accordingly so that the compiler can use this
+  // information. (This could also be solved with profile-guided optimization.)
+  if(simdjson_unlikely(!tape.is_double())) { // branch rarely taken
+    if(tape.is_uint64()) {
+      return double(tape.next_tape_value<uint64_t>());
+    } else if(tape.is_int64()) {
+      return double(tape.next_tape_value<int64_t>());
+    }
+    return INCORRECT_TYPE;
+  }
+  // this is common:
+  return tape.next_tape_value<double>();
+}
+inline simdjson_result<array> element::get_array() const noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  // issue https://github.com/simdjson/simdjson/issues/1914
+  assert (tape.usable()); // the default constructed element is invalid
+#endif
+  switch (tape.tape_ref_type()) {
+    case internal::tape_type::START_ARRAY:
+      return array(tape);
+    default:
+      return INCORRECT_TYPE;
+  }
+}
+inline simdjson_result<object> element::get_object() const noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  // issue https://github.com/simdjson/simdjson/issues/1914
+  assert (tape.usable()); // the default constructed element is invalid
+#endif
+  switch (tape.tape_ref_type()) {
+    case internal::tape_type::START_OBJECT:
+      return object(tape);
+    default:
+      return INCORRECT_TYPE;
+  }
+}
+
+template<typename T>
+simdjson_warn_unused simdjson_inline error_code element::get(T &value) const noexcept {
+  return get<T>().get(value);
+}
+// An element-specific version prevents recursion with simdjson_result::get<element>(value)
+template<>
+simdjson_warn_unused simdjson_inline error_code element::get<element>(element &value) const noexcept {
+  value = element(tape);
+  return SUCCESS;
+}
+template<typename T>
+inline void element::tie(T &value, error_code &error) && noexcept {
+  error = get<T>(value);
+}
+
+template<typename T>
+simdjson_inline bool element::is() const noexcept {
+  auto result = get<T>();
+  return !result.error();
+}
+
+template<> inline simdjson_result<array> element::get<array>() const noexcept { return get_array(); }
+template<> inline simdjson_result<object> element::get<object>() const noexcept { return get_object(); }
+template<> inline simdjson_result<const char *> element::get<const char *>() const noexcept { return get_c_str(); }
+template<> inline simdjson_result<std::string_view> element::get<std::string_view>() const noexcept { return get_string(); }
+template<> inline simdjson_result<int64_t> element::get<int64_t>() const noexcept { return get_int64(); }
+template<> inline simdjson_result<uint64_t> element::get<uint64_t>() const noexcept { return get_uint64(); }
+template<> inline simdjson_result<double> element::get<double>() const noexcept { return get_double(); }
+template<> inline simdjson_result<bool> element::get<bool>() const noexcept { return get_bool(); }
+
+inline bool element::is_array() const noexcept { return is<array>(); }
+inline bool element::is_object() const noexcept { return is<object>(); }
+inline bool element::is_string() const noexcept { return is<std::string_view>(); }
+inline bool element::is_int64() const noexcept { return is<int64_t>(); }
+inline bool element::is_uint64() const noexcept { return is<uint64_t>(); }
+inline bool element::is_double() const noexcept { return is<double>(); }
+inline bool element::is_bool() const noexcept { return is<bool>(); }
+inline bool element::is_number() const noexcept { return is_int64() || is_uint64() || is_double(); }
+
+inline bool element::is_null() const noexcept {
+  return tape.is_null_on_tape();
+}
+
+#if SIMDJSON_EXCEPTIONS
+
+inline element::operator bool() const noexcept(false) { return get<bool>(); }
+inline element::operator const char*() const noexcept(false) { return get<const char *>(); }
+inline element::operator std::string_view() const noexcept(false) { return get<std::string_view>(); }
+inline element::operator uint64_t() const noexcept(false) { return get<uint64_t>(); }
+inline element::operator int64_t() const noexcept(false) { return get<int64_t>(); }
+inline element::operator double() const noexcept(false) { return get<double>(); }
+inline element::operator array() const noexcept(false) { return get<array>(); }
+inline element::operator object() const noexcept(false) { return get<object>(); }
+
+inline array::iterator element::begin() const noexcept(false) {
+  return get<array>().begin();
+}
+inline array::iterator element::end() const noexcept(false) {
+  return get<array>().end();
+}
+
+#endif // SIMDJSON_EXCEPTIONS
+
+inline simdjson_result<element> element::operator[](std::string_view key) const noexcept {
+  return at_key(key);
+}
+inline simdjson_result<element> element::operator[](const char *key) const noexcept {
+  return at_key(key);
+}
+
+inline simdjson_result<element> element::at_pointer(std::string_view json_pointer) const noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  // issue https://github.com/simdjson/simdjson/issues/1914
+  assert (tape.usable()); // the default constructed element is invalid
+#endif
+  switch (tape.tape_ref_type()) {
+    case internal::tape_type::START_OBJECT:
+      return object(tape).at_pointer(json_pointer);
+    case internal::tape_type::START_ARRAY:
+      return array(tape).at_pointer(json_pointer);
+    default: {
+      if(!json_pointer.empty()) { // a non-empty string is invalid on an atom
+        return INVALID_JSON_POINTER;
+      }
+      // an empty string means that we return the current node
+      dom::element copy(*this);
+      return simdjson_result<element>(std::move(copy));
+    }
+  }
+}
+#ifndef SIMDJSON_DISABLE_DEPRECATED_API
+[[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]]
+inline simdjson_result<element> element::at(std::string_view json_pointer) const noexcept {
+  // version 0.4 of simdjson allowed non-compliant pointers
+  auto std_pointer = (json_pointer.empty() ? "" : "/") + std::string(json_pointer.begin(), json_pointer.end());
+  return at_pointer(std_pointer);
+}
+#endif // SIMDJSON_DISABLE_DEPRECATED_API
+
+inline simdjson_result<element> element::at(size_t index) const noexcept {
+  return get<array>().at(index);
+}
+inline simdjson_result<element> element::at_key(std::string_view key) const noexcept {
+  return get<object>().at_key(key);
+}
+inline simdjson_result<element> element::at_key_case_insensitive(std::string_view key) const noexcept {
+  return get<object>().at_key_case_insensitive(key);
+}
+
+inline bool element::dump_raw_tape(std::ostream &out) const noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  // issue https://github.com/simdjson/simdjson/issues/1914
+  assert (tape.usable()); // the default constructed element is invalid
+#endif
+  return tape.doc->dump_raw_tape(out);
+}
+
+
+inline std::ostream& operator<<(std::ostream& out, element_type type) {
+  switch (type) {
+    case element_type::ARRAY:
+      return out << "array";
+    case element_type::OBJECT:
+      return out << "object";
+    case element_type::INT64:
+      return out << "int64_t";
+    case element_type::UINT64:
+      return out << "uint64_t";
+    case element_type::DOUBLE:
+      return out << "double";
+    case element_type::STRING:
+      return out << "string";
+    case element_type::BOOL:
+      return out << "bool";
+    case element_type::NULL_VALUE:
+      return out << "null";
+    default:
+      return out << "unexpected content!!!"; // abort() usage is forbidden in the library
+  }
+}
+
+} // namespace dom
+
+} // namespace simdjson
+
+#endif // SIMDJSON_INLINE_ELEMENT_H
+/* end file include/simdjson/dom/element-inl.h */
+
+#if defined(__cpp_lib_ranges)
+static_assert(std::ranges::view<simdjson::dom::array>);
+static_assert(std::ranges::sized_range<simdjson::dom::array>);
+#if SIMDJSON_EXCEPTIONS
+static_assert(std::ranges::view<simdjson::simdjson_result<simdjson::dom::array>>);
+static_assert(std::ranges::sized_range<simdjson::simdjson_result<simdjson::dom::array>>);
+#endif // SIMDJSON_EXCEPTIONS
+#endif // defined(__cpp_lib_ranges)
+
+#endif // SIMDJSON_INLINE_ARRAY_H
+/* end file include/simdjson/dom/array-inl.h */
+/* begin file include/simdjson/dom/document_stream-inl.h */
+#ifndef SIMDJSON_INLINE_DOCUMENT_STREAM_H
+#define SIMDJSON_INLINE_DOCUMENT_STREAM_H
+
+#include <algorithm>
+#include <limits>
+#include <stdexcept>
+namespace simdjson {
+namespace dom {
+
+#ifdef SIMDJSON_THREADS_ENABLED
+inline void stage1_worker::finish() {
+  // After calling "run" someone would call finish() to wait
+  // for the end of the processing.
+  // This function will wait until either the thread has done
+  // the processing or, else, the destructor has been called.
+  std::unique_lock<std::mutex> lock(locking_mutex);
+  cond_var.wait(lock, [this]{return has_work == false;});
+}
+
+inline stage1_worker::~stage1_worker() {
+  // The thread may never outlive the stage1_worker instance
+  // and will always be stopped/joined before the stage1_worker
+  // instance is gone.
+  stop_thread();
+}
+
+inline void stage1_worker::start_thread() {
+  std::unique_lock<std::mutex> lock(locking_mutex);
+  if(thread.joinable()) {
+    return; // This should never happen but we never want to create more than one thread.
+  }
+  thread = std::thread([this]{
+      while(true) {
+        std::unique_lock<std::mutex> thread_lock(locking_mutex);
+        // We wait for either "run" or "stop_thread" to be called.
+        cond_var.wait(thread_lock, [this]{return has_work || !can_work;});
+        // If, for some reason, the stop_thread() method was called (i.e., the
+        // destructor of stage1_worker is called, then we want to immediately destroy
+        // the thread (and not do any more processing).
+        if(!can_work) {
+          break;
+        }
+        this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser,
+              this->_next_batch_start);
+        this->has_work = false;
+        // The condition variable call should be moved after thread_lock.unlock() for performance
+        // reasons but thread sanitizers may report it as a data race if we do.
+        // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock
+        cond_var.notify_one(); // will notify "finish"
+        thread_lock.unlock();
+      }
+    }
+  );
+}
+
+
+inline void stage1_worker::stop_thread() {
+  std::unique_lock<std::mutex> lock(locking_mutex);
+  // We have to make sure that all locks can be released.
+  can_work = false;
+  has_work = false;
+  cond_var.notify_all();
+  lock.unlock();
+  if(thread.joinable()) {
+    thread.join();
+  }
+}
+
+inline void stage1_worker::run(document_stream * ds, dom::parser * stage1, size_t next_batch_start) {
+  std::unique_lock<std::mutex> lock(locking_mutex);
+  owner = ds;
+  _next_batch_start = next_batch_start;
+  stage1_thread_parser = stage1;
+  has_work = true;
+  // The condition variable call should be moved after thread_lock.unlock() for performance
+  // reasons but thread sanitizers may report it as a data race if we do.
+  // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock
+  cond_var.notify_one(); // will notify the thread lock that we have work
+  lock.unlock();
+}
+#endif
+
+simdjson_inline document_stream::document_stream(
+  dom::parser &_parser,
+  const uint8_t *_buf,
+  size_t _len,
+  size_t _batch_size
+) noexcept
+  : parser{&_parser},
+    buf{_buf},
+    len{_len},
+    batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size},
+    error{SUCCESS}
+#ifdef SIMDJSON_THREADS_ENABLED
+    , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change
+#endif
+{
+#ifdef SIMDJSON_THREADS_ENABLED
+  if(worker.get() == nullptr) {
+    error = MEMALLOC;
+  }
+#endif
+}
+
+simdjson_inline document_stream::document_stream() noexcept
+  : parser{nullptr},
+    buf{nullptr},
+    len{0},
+    batch_size{0},
+    error{UNINITIALIZED}
+#ifdef SIMDJSON_THREADS_ENABLED
+    , use_thread(false)
+#endif
+{
+}
+
+simdjson_inline document_stream::~document_stream() noexcept {
+#ifdef SIMDJSON_THREADS_ENABLED
+  worker.reset();
+#endif
+}
+
+simdjson_inline document_stream::iterator::iterator() noexcept
+  : stream{nullptr}, finished{true} {
+}
+
+simdjson_inline document_stream::iterator document_stream::begin() noexcept {
+  start();
+  // If there are no documents, we're finished.
+  return iterator(this, error == EMPTY);
+}
+
+simdjson_inline document_stream::iterator document_stream::end() noexcept {
+  return iterator(this, true);
+}
+
+simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept
+  : stream{_stream}, finished{is_end} {
+}
+
+simdjson_inline document_stream::iterator::reference document_stream::iterator::operator*() noexcept {
+  // Note that in case of error, we do not yet mark
+  // the iterator as "finished": this detection is done
+  // in the operator++ function since it is possible
+  // to call operator++ repeatedly while omitting
+  // calls to operator*.
+  if (stream->error) { return stream->error; }
+  return stream->parser->doc.root();
+}
+
+simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept {
+  // If there is an error, then we want the iterator
+  // to be finished, no matter what. (E.g., we do not
+  // keep generating documents with errors, or go beyond
+  // a document with errors.)
+  //
+  // Users do not have to call "operator*()" when they use operator++,
+  // so we need to end the stream in the operator++ function.
+  //
+  // Note that setting finished = true is essential otherwise
+  // we would enter an infinite loop.
+  if (stream->error) { finished = true; }
+  // Note that stream->error() is guarded against error conditions
+  // (it will immediately return if stream->error casts to false).
+  // In effect, this next function does nothing when (stream->error)
+  // is true (hence the risk of an infinite loop).
+  stream->next();
+  // If that was the last document, we're finished.
+  // It is the only type of error we do not want to appear
+  // in operator*.
+  if (stream->error == EMPTY) { finished = true; }
+  // If we had any other kind of error (not EMPTY) then we want
+  // to pass it along to the operator* and we cannot mark the result
+  // as "finished" just yet.
+  return *this;
+}
+
+simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept {
+  return finished != other.finished;
+}
+
+inline void document_stream::start() noexcept {
+  if (error) { return; }
+  error = parser->ensure_capacity(batch_size);
+  if (error) { return; }
+  // Always run the first stage 1 parse immediately
+  batch_start = 0;
+  error = run_stage1(*parser, batch_start);
+  while(error == EMPTY) {
+    // In exceptional cases, we may start with an empty block
+    batch_start = next_batch_start();
+    if (batch_start >= len) { return; }
+    error = run_stage1(*parser, batch_start);
+  }
+  if (error) { return; }
+#ifdef SIMDJSON_THREADS_ENABLED
+  if (use_thread && next_batch_start() < len) {
+    // Kick off the first thread if needed
+    error = stage1_thread_parser.ensure_capacity(batch_size);
+    if (error) { return; }
+    worker->start_thread();
+    start_stage1_thread();
+    if (error) { return; }
+  }
+#endif // SIMDJSON_THREADS_ENABLED
+  next();
+}
+
+simdjson_inline size_t document_stream::iterator::current_index() const noexcept {
+  return stream->doc_index;
+}
+
+simdjson_inline std::string_view document_stream::iterator::source() const noexcept {
+  const char* start = reinterpret_cast<const char*>(stream->buf) + current_index();
+  bool object_or_array = ((*start == '[') || (*start == '{'));
+  if(object_or_array) {
+    size_t next_doc_index = stream->batch_start + stream->parser->implementation->structural_indexes[stream->parser->implementation->next_structural_index - 1];
+    return std::string_view(start, next_doc_index - current_index() + 1);
+  } else {
+    size_t next_doc_index = stream->batch_start + stream->parser->implementation->structural_indexes[stream->parser->implementation->next_structural_index];
+    return std::string_view(reinterpret_cast<const char*>(stream->buf) + current_index(), next_doc_index - current_index() - 1);
+  }
+}
+
+
+inline void document_stream::next() noexcept {
+  // We always exit at once, once in an error condition.
+  if (error) { return; }
+
+  // Load the next document from the batch
+  doc_index = batch_start + parser->implementation->structural_indexes[parser->implementation->next_structural_index];
+  error = parser->implementation->stage2_next(parser->doc);
+  // If that was the last document in the batch, load another batch (if available)
+  while (error == EMPTY) {
+    batch_start = next_batch_start();
+    if (batch_start >= len) { break; }
+
+#ifdef SIMDJSON_THREADS_ENABLED
+    if(use_thread) {
+      load_from_stage1_thread();
+    } else {
+      error = run_stage1(*parser, batch_start);
+    }
+#else
+    error = run_stage1(*parser, batch_start);
+#endif
+    if (error) { continue; } // If the error was EMPTY, we may want to load another batch.
+    // Run stage 2 on the first document in the batch
+    doc_index = batch_start + parser->implementation->structural_indexes[parser->implementation->next_structural_index];
+    error = parser->implementation->stage2_next(parser->doc);
+  }
+}
+inline size_t document_stream::size_in_bytes() const noexcept {
+  return len;
+}
+
+inline size_t document_stream::truncated_bytes() const noexcept {
+  if(error == CAPACITY) { return len - batch_start; }
+  return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1];
+}
+
+inline size_t document_stream::next_batch_start() const noexcept {
+  return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes];
+}
+
+inline error_code document_stream::run_stage1(dom::parser &p, size_t _batch_start) noexcept {
+  size_t remaining = len - _batch_start;
+  if (remaining <= batch_size) {
+    return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final);
+  } else {
+    return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial);
+  }
+}
+
+#ifdef SIMDJSON_THREADS_ENABLED
+
+inline void document_stream::load_from_stage1_thread() noexcept {
+  worker->finish();
+  // Swap to the parser that was loaded up in the thread. Make sure the parser has
+  // enough memory to swap to, as well.
+  std::swap(*parser, stage1_thread_parser);
+  error = stage1_thread_error;
+  if (error) { return; }
+
+  // If there's anything left, start the stage 1 thread!
+  if (next_batch_start() < len) {
+    start_stage1_thread();
+  }
+}
+
+inline void document_stream::start_stage1_thread() noexcept {
+  // we call the thread on a lambda that will update
+  // this->stage1_thread_error
+  // there is only one thread that may write to this value
+  // TODO this is NOT exception-safe.
+  this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error
+  size_t _next_batch_start = this->next_batch_start();
+
+  worker->run(this, & this->stage1_thread_parser, _next_batch_start);
+}
+
+#endif // SIMDJSON_THREADS_ENABLED
+
+} // namespace dom
+
+simdjson_inline simdjson_result<dom::document_stream>::simdjson_result() noexcept
+  : simdjson_result_base() {
+}
+simdjson_inline simdjson_result<dom::document_stream>::simdjson_result(error_code error) noexcept
+  : simdjson_result_base(error) {
+}
+simdjson_inline simdjson_result<dom::document_stream>::simdjson_result(dom::document_stream &&value) noexcept
+  : simdjson_result_base(std::forward<dom::document_stream>(value)) {
+}
+
+#if SIMDJSON_EXCEPTIONS
+simdjson_inline dom::document_stream::iterator simdjson_result<dom::document_stream>::begin() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first.begin();
+}
+simdjson_inline dom::document_stream::iterator simdjson_result<dom::document_stream>::end() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first.end();
+}
+#else // SIMDJSON_EXCEPTIONS
+#ifndef SIMDJSON_DISABLE_DEPRECATED_API
+simdjson_inline dom::document_stream::iterator simdjson_result<dom::document_stream>::begin() noexcept {
+  first.error = error();
+  return first.begin();
+}
+simdjson_inline dom::document_stream::iterator simdjson_result<dom::document_stream>::end() noexcept {
+  first.error = error();
+  return first.end();
+}
+#endif // SIMDJSON_DISABLE_DEPRECATED_API
+#endif // SIMDJSON_EXCEPTIONS
+
+} // namespace simdjson
+#endif // SIMDJSON_INLINE_DOCUMENT_STREAM_H
+/* end file include/simdjson/dom/document_stream-inl.h */
+/* begin file include/simdjson/dom/document-inl.h */
+#ifndef SIMDJSON_INLINE_DOCUMENT_H
+#define SIMDJSON_INLINE_DOCUMENT_H
+
+// Inline implementations go in here.
+
+#include <ostream>
+#include <cstring>
+
+namespace simdjson {
+namespace dom {
+
+//
+// document inline implementation
+//
+inline element document::root() const noexcept {
+  return element(internal::tape_ref(this, 1));
+}
+simdjson_warn_unused
+inline size_t document::capacity() const noexcept {
+  return allocated_capacity;
+}
+
+simdjson_warn_unused
+inline error_code document::allocate(size_t capacity) noexcept {
+  if (capacity == 0) {
+    string_buf.reset();
+    tape.reset();
+    allocated_capacity = 0;
+    return SUCCESS;
+  }
+
+  // a pathological input like "[[[[..." would generate capacity tape elements, so
+  // need a capacity of at least capacity + 1, but it is also possible to do
+  // worse with "[7,7,7,7,6,7,7,7,6,7,7,6,[7,7,7,7,6,7,7,7,6,7,7,6,7,7,7,7,7,7,6"
+  //where capacity + 1 tape elements are
+  // generated, see issue https://github.com/simdjson/simdjson/issues/345
+  size_t tape_capacity = SIMDJSON_ROUNDUP_N(capacity + 3, 64);
+  // a document with only zero-length strings... could have capacity/3 string
+  // and we would need capacity/3 * 5 bytes on the string buffer
+  size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * capacity / 3 + SIMDJSON_PADDING, 64);
+  string_buf.reset( new (std::nothrow) uint8_t[string_capacity]);
+  tape.reset(new (std::nothrow) uint64_t[tape_capacity]);
+  if(!(string_buf && tape)) {
+    allocated_capacity = 0;
+    string_buf.reset();
+    tape.reset();
+    return MEMALLOC;
+  }
+  // Technically the allocated_capacity might be larger than capacity
+  // so the next line is pessimistic.
+  allocated_capacity = capacity;
+  return SUCCESS;
+}
+
+inline bool document::dump_raw_tape(std::ostream &os) const noexcept {
+  uint32_t string_length;
+  size_t tape_idx = 0;
+  uint64_t tape_val = tape[tape_idx];
+  uint8_t type = uint8_t(tape_val >> 56);
+  os << tape_idx << " : " << type;
+  tape_idx++;
+  size_t how_many = 0;
+  if (type == 'r') {
+    how_many = size_t(tape_val & internal::JSON_VALUE_MASK);
+  } else {
+    // Error: no starting root node?
+    return false;
+  }
+  os << "\t// pointing to " << how_many << " (right after last node)\n";
+  uint64_t payload;
+  for (; tape_idx < how_many; tape_idx++) {
+    os << tape_idx << " : ";
+    tape_val = tape[tape_idx];
+    payload = tape_val & internal::JSON_VALUE_MASK;
+    type = uint8_t(tape_val >> 56);
+    switch (type) {
+    case '"': // we have a string
+      os << "string \"";
+      std::memcpy(&string_length, string_buf.get() + payload, sizeof(uint32_t));
+      os << internal::escape_json_string(std::string_view(
+        reinterpret_cast<const char *>(string_buf.get() + payload + sizeof(uint32_t)),
+        string_length
+      ));
+      os << '"';
+      os << '\n';
+      break;
+    case 'l': // we have a long int
+      if (tape_idx + 1 >= how_many) {
+        return false;
+      }
+      os << "integer " << static_cast<int64_t>(tape[++tape_idx]) << "\n";
+      break;
+    case 'u': // we have a long uint
+      if (tape_idx + 1 >= how_many) {
+        return false;
+      }
+      os << "unsigned integer " << tape[++tape_idx] << "\n";
+      break;
+    case 'd': // we have a double
+      os << "float ";
+      if (tape_idx + 1 >= how_many) {
+        return false;
+      }
+      double answer;
+      std::memcpy(&answer, &tape[++tape_idx], sizeof(answer));
+      os << answer << '\n';
+      break;
+    case 'n': // we have a null
+      os << "null\n";
+      break;
+    case 't': // we have a true
+      os << "true\n";
+      break;
+    case 'f': // we have a false
+      os << "false\n";
+      break;
+    case '{': // we have an object
+      os << "{\t// pointing to next tape location " << uint32_t(payload)
+         << " (first node after the scope), "
+         << " saturated count "
+         << ((payload >> 32) & internal::JSON_COUNT_MASK)<< "\n";
+      break;    case '}': // we end an object
+      os << "}\t// pointing to previous tape location " << uint32_t(payload)
+         << " (start of the scope)\n";
+      break;
+    case '[': // we start an array
+      os << "[\t// pointing to next tape location " << uint32_t(payload)
+         << " (first node after the scope), "
+         << " saturated count "
+         << ((payload >> 32) & internal::JSON_COUNT_MASK)<< "\n";
+      break;
+    case ']': // we end an array
+      os << "]\t// pointing to previous tape location " << uint32_t(payload)
+         << " (start of the scope)\n";
+      break;
+    case 'r': // we start and end with the root node
+      // should we be hitting the root node?
+      return false;
+    default:
+      return false;
+    }
+  }
+  tape_val = tape[tape_idx];
+  payload = tape_val & internal::JSON_VALUE_MASK;
+  type = uint8_t(tape_val >> 56);
+  os << tape_idx << " : " << type << "\t// pointing to " << payload
+     << " (start root)\n";
+  return true;
+}
+
+} // namespace dom
+} // namespace simdjson
+
+#endif // SIMDJSON_INLINE_DOCUMENT_H
+/* end file include/simdjson/dom/document-inl.h */
+/* begin file include/simdjson/dom/object-inl.h */
+#ifndef SIMDJSON_INLINE_OBJECT_H
+#define SIMDJSON_INLINE_OBJECT_H
+
+#include <cstring>
+#include <string>
+
+namespace simdjson {
+
+//
+// simdjson_result<dom::object> inline implementation
+//
+simdjson_inline simdjson_result<dom::object>::simdjson_result() noexcept
+    : internal::simdjson_result_base<dom::object>() {}
+simdjson_inline simdjson_result<dom::object>::simdjson_result(dom::object value) noexcept
+    : internal::simdjson_result_base<dom::object>(std::forward<dom::object>(value)) {}
+simdjson_inline simdjson_result<dom::object>::simdjson_result(error_code error) noexcept
+    : internal::simdjson_result_base<dom::object>(error) {}
+
+inline simdjson_result<dom::element> simdjson_result<dom::object>::operator[](std::string_view key) const noexcept {
+  if (error()) { return error(); }
+  return first[key];
+}
+inline simdjson_result<dom::element> simdjson_result<dom::object>::operator[](const char *key) const noexcept {
+  if (error()) { return error(); }
+  return first[key];
+}
+inline simdjson_result<dom::element> simdjson_result<dom::object>::at_pointer(std::string_view json_pointer) const noexcept {
+  if (error()) { return error(); }
+  return first.at_pointer(json_pointer);
+}
+inline simdjson_result<dom::element> simdjson_result<dom::object>::at_key(std::string_view key) const noexcept {
+  if (error()) { return error(); }
+  return first.at_key(key);
+}
+inline simdjson_result<dom::element> simdjson_result<dom::object>::at_key_case_insensitive(std::string_view key) const noexcept {
+  if (error()) { return error(); }
+  return first.at_key_case_insensitive(key);
+}
+
+#if SIMDJSON_EXCEPTIONS
+
+inline dom::object::iterator simdjson_result<dom::object>::begin() const noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first.begin();
+}
+inline dom::object::iterator simdjson_result<dom::object>::end() const noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first.end();
+}
+inline size_t simdjson_result<dom::object>::size() const noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first.size();
+}
+
+#endif // SIMDJSON_EXCEPTIONS
+
+namespace dom {
+
+//
+// object inline implementation
+//
+simdjson_inline object::object() noexcept : tape{} {}
+simdjson_inline object::object(const internal::tape_ref &_tape) noexcept : tape{_tape} { }
+inline object::iterator object::begin() const noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  // issue https://github.com/simdjson/simdjson/issues/1914
+  assert (tape.usable()); // the default constructed object is invalid
+#endif
+  return internal::tape_ref(tape.doc, tape.json_index + 1);
+}
+inline object::iterator object::end() const noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  // issue https://github.com/simdjson/simdjson/issues/1914
+  assert (tape.usable()); // the default constructed object is invalid
+#endif
+  return internal::tape_ref(tape.doc, tape.after_element() - 1);
+}
+inline size_t object::size() const noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  // issue https://github.com/simdjson/simdjson/issues/1914
+  assert (tape.usable()); // the default constructed object is invalid
+#endif
+  return tape.scope_count();
+}
+
+inline simdjson_result<element> object::operator[](std::string_view key) const noexcept {
+  return at_key(key);
+}
+inline simdjson_result<element> object::operator[](const char *key) const noexcept {
+  return at_key(key);
+}
+inline simdjson_result<element> object::at_pointer(std::string_view json_pointer) const noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  // issue https://github.com/simdjson/simdjson/issues/1914
+  assert (tape.usable()); // the default constructed object is invalid
+#endif
+  if(json_pointer.empty()) { // an empty string means that we return the current node
+      return element(this->tape); // copy the current node
+  } else if(json_pointer[0] != '/') { // otherwise there is an error
+      return INVALID_JSON_POINTER;
+  }
+  json_pointer = json_pointer.substr(1);
+  size_t slash = json_pointer.find('/');
+  std::string_view key = json_pointer.substr(0, slash);
+  // Grab the child with the given key
+  simdjson_result<element> child;
+
+  // If there is an escape character in the key, unescape it and then get the child.
+  size_t escape = key.find('~');
+  if (escape != std::string_view::npos) {
+    // Unescape the key
+    std::string unescaped(key);
+    do {
+      switch (unescaped[escape+1]) {
+        case '0':
+          unescaped.replace(escape, 2, "~");
+          break;
+        case '1':
+          unescaped.replace(escape, 2, "/");
+          break;
+        default:
+          return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer");
+      }
+      escape = unescaped.find('~', escape+1);
+    } while (escape != std::string::npos);
+    child = at_key(unescaped);
+  } else {
+    child = at_key(key);
+  }
+  if(child.error()) {
+    return child; // we do not continue if there was an error
+  }
+  // If there is a /, we have to recurse and look up more of the path
+  if (slash != std::string_view::npos) {
+    child = child.at_pointer(json_pointer.substr(slash));
+  }
+  return child;
+}
+
+inline simdjson_result<element> object::at_key(std::string_view key) const noexcept {
+  iterator end_field = end();
+  for (iterator field = begin(); field != end_field; ++field) {
+    if (field.key_equals(key)) {
+      return field.value();
+    }
+  }
+  return NO_SUCH_FIELD;
+}
+// In case you wonder why we need this, please see
+// https://github.com/simdjson/simdjson/issues/323
+// People do seek keys in a case-insensitive manner.
+inline simdjson_result<element> object::at_key_case_insensitive(std::string_view key) const noexcept {
+  iterator end_field = end();
+  for (iterator field = begin(); field != end_field; ++field) {
+    if (field.key_equals_case_insensitive(key)) {
+      return field.value();
+    }
+  }
+  return NO_SUCH_FIELD;
+}
+
+//
+// object::iterator inline implementation
+//
+simdjson_inline object::iterator::iterator(const internal::tape_ref &_tape) noexcept : tape{_tape} { }
+inline const key_value_pair object::iterator::operator*() const noexcept {
+  return key_value_pair(key(), value());
+}
+inline bool object::iterator::operator!=(const object::iterator& other) const noexcept {
+  return tape.json_index != other.tape.json_index;
+}
+inline bool object::iterator::operator==(const object::iterator& other) const noexcept {
+  return tape.json_index == other.tape.json_index;
+}
+inline bool object::iterator::operator<(const object::iterator& other) const noexcept {
+  return tape.json_index < other.tape.json_index;
+}
+inline bool object::iterator::operator<=(const object::iterator& other) const noexcept {
+  return tape.json_index <= other.tape.json_index;
+}
+inline bool object::iterator::operator>=(const object::iterator& other) const noexcept {
+  return tape.json_index >= other.tape.json_index;
+}
+inline bool object::iterator::operator>(const object::iterator& other) const noexcept {
+  return tape.json_index > other.tape.json_index;
+}
+inline object::iterator& object::iterator::operator++() noexcept {
+  tape.json_index++;
+  tape.json_index = tape.after_element();
+  return *this;
+}
+inline object::iterator object::iterator::operator++(int) noexcept {
+  object::iterator out = *this;
+  ++*this;
+  return out;
+}
+inline std::string_view object::iterator::key() const noexcept {
+  return tape.get_string_view();
+}
+inline uint32_t object::iterator::key_length() const noexcept {
+  return tape.get_string_length();
+}
+inline const char* object::iterator::key_c_str() const noexcept {
+  return reinterpret_cast<const char *>(&tape.doc->string_buf[size_t(tape.tape_value()) + sizeof(uint32_t)]);
+}
+inline element object::iterator::value() const noexcept {
+  return element(internal::tape_ref(tape.doc, tape.json_index + 1));
+}
+
+/**
+ * Design notes:
+ * Instead of constructing a string_view and then comparing it with a
+ * user-provided strings, it is probably more performant to have dedicated
+ * functions taking as a parameter the string we want to compare against
+ * and return true when they are equal. That avoids the creation of a temporary
+ * std::string_view. Though it is possible for the compiler to avoid entirely
+ * any overhead due to string_view, relying too much on compiler magic is
+ * problematic: compiler magic sometimes fail, and then what do you do?
+ * Also, enticing users to rely on high-performance function is probably better
+ * on the long run.
+ */
+
+inline bool object::iterator::key_equals(std::string_view o) const noexcept {
+  // We use the fact that the key length can be computed quickly
+  // without access to the string buffer.
+  const uint32_t len = key_length();
+  if(o.size() == len) {
+    // We avoid construction of a temporary string_view instance.
+    return (memcmp(o.data(), key_c_str(), len) == 0);
+  }
+  return false;
+}
+
+inline bool object::iterator::key_equals_case_insensitive(std::string_view o) const noexcept {
+  // We use the fact that the key length can be computed quickly
+  // without access to the string buffer.
+  const uint32_t len = key_length();
+  if(o.size() == len) {
+      // See For case-insensitive string comparisons, avoid char-by-char functions
+      // https://lemire.me/blog/2020/04/30/for-case-insensitive-string-comparisons-avoid-char-by-char-functions/
+      // Note that it might be worth rolling our own strncasecmp function, with vectorization.
+      return (simdjson_strncasecmp(o.data(), key_c_str(), len) == 0);
+  }
+  return false;
+}
+//
+// key_value_pair inline implementation
+//
+inline key_value_pair::key_value_pair(std::string_view _key, element _value) noexcept :
+  key(_key), value(_value) {}
+
+} // namespace dom
+
+} // namespace simdjson
+
+#if defined(__cpp_lib_ranges)
+static_assert(std::ranges::view<simdjson::dom::object>);
+static_assert(std::ranges::sized_range<simdjson::dom::object>);
+#if SIMDJSON_EXCEPTIONS
+static_assert(std::ranges::view<simdjson::simdjson_result<simdjson::dom::object>>);
+static_assert(std::ranges::sized_range<simdjson::simdjson_result<simdjson::dom::object>>);
+#endif // SIMDJSON_EXCEPTIONS
+#endif // defined(__cpp_lib_ranges)
+
+#endif // SIMDJSON_INLINE_OBJECT_H
+/* end file include/simdjson/dom/object-inl.h */
+/* begin file include/simdjson/dom/parsedjson_iterator-inl.h */
+#ifndef SIMDJSON_INLINE_PARSEDJSON_ITERATOR_H
+#define SIMDJSON_INLINE_PARSEDJSON_ITERATOR_H
+
+#include <cstring>
+
+#ifndef SIMDJSON_DISABLE_DEPRECATED_API
+
+namespace simdjson {
+
+// VS2017 reports deprecated warnings when you define a deprecated class's methods.
+SIMDJSON_PUSH_DISABLE_WARNINGS
+SIMDJSON_DISABLE_DEPRECATED_WARNING
+
+// Because of template weirdness, the actual class definition is inline in the document class
+simdjson_warn_unused bool dom::parser::Iterator::is_ok() const {
+  return location < tape_length;
+}
+
+// useful for debugging purposes
+size_t dom::parser::Iterator::get_tape_location() const {
+  return location;
+}
+
+// useful for debugging purposes
+size_t dom::parser::Iterator::get_tape_length() const {
+  return tape_length;
+}
+
+// returns the current depth (start at 1 with 0 reserved for the fictitious root
+// node)
+size_t dom::parser::Iterator::get_depth() const {
+  return depth;
+}
+
+// A scope is a series of nodes at the same depth, typically it is either an
+// object ({) or an array ([). The root node has type 'r'.
+uint8_t dom::parser::Iterator::get_scope_type() const {
+  return depth_index[depth].scope_type;
+}
+
+bool dom::parser::Iterator::move_forward() {
+  if (location + 1 >= tape_length) {
+    return false; // we are at the end!
+  }
+
+  if ((current_type == '[') || (current_type == '{')) {
+    // We are entering a new scope
+    depth++;
+    assert(depth < max_depth);
+    depth_index[depth].start_of_scope = location;
+    depth_index[depth].scope_type = current_type;
+  } else if ((current_type == ']') || (current_type == '}')) {
+    // Leaving a scope.
+    depth--;
+  } else if (is_number()) {
+    // these types use 2 locations on the tape, not just one.
+    location += 1;
+  }
+
+  location += 1;
+  current_val = doc.tape[location];
+  current_type = uint8_t(current_val >> 56);
+  return true;
+}
+
+void dom::parser::Iterator::move_to_value() {
+  // assume that we are on a key, so move by 1.
+  location += 1;
+  current_val = doc.tape[location];
+  current_type = uint8_t(current_val >> 56);
+}
+
+bool dom::parser::Iterator::move_to_key(const char *key) {
+    if (down()) {
+      do {
+        const bool right_key = (strcmp(get_string(), key) == 0);
+        move_to_value();
+        if (right_key) {
+          return true;
+        }
+      } while (next());
+      up();
+    }
+    return false;
+}
+
+bool dom::parser::Iterator::move_to_key_insensitive(
+    const char *key) {
+    if (down()) {
+      do {
+        const bool right_key = (simdjson_strcasecmp(get_string(), key) == 0);
+        move_to_value();
+        if (right_key) {
+          return true;
+        }
+      } while (next());
+      up();
+    }
+    return false;
+}
+
+bool dom::parser::Iterator::move_to_key(const char *key,
+                                                       uint32_t length) {
+  if (down()) {
+    do {
+      bool right_key = ((get_string_length() == length) &&
+                        (memcmp(get_string(), key, length) == 0));
+      move_to_value();
+      if (right_key) {
+        return true;
+      }
+    } while (next());
+    up();
+  }
+  return false;
+}
+
+bool dom::parser::Iterator::move_to_index(uint32_t index) {
+  if (down()) {
+    uint32_t i = 0;
+    for (; i < index; i++) {
+      if (!next()) {
+        break;
+      }
+    }
+    if (i == index) {
+      return true;
+    }
+    up();
+  }
+  return false;
+}
+
+bool dom::parser::Iterator::prev() {
+  size_t target_location = location;
+  to_start_scope();
+  size_t npos = location;
+  if (target_location == npos) {
+    return false; // we were already at the start
+  }
+  size_t oldnpos;
+  // we have that npos < target_location here
+  do {
+    oldnpos = npos;
+    if ((current_type == '[') || (current_type == '{')) {
+      // we need to jump
+      npos = uint32_t(current_val);
+    } else {
+      npos = npos + ((current_type == 'd' || current_type == 'l') ? 2 : 1);
+    }
+  } while (npos < target_location);
+  location = oldnpos;
+  current_val = doc.tape[location];
+  current_type = uint8_t(current_val >> 56);
+  return true;
+}
+
+bool dom::parser::Iterator::up() {
+  if (depth == 1) {
+    return false; // don't allow moving back to root
+  }
+  to_start_scope();
+  // next we just move to the previous value
+  depth--;
+  location -= 1;
+  current_val = doc.tape[location];
+  current_type = uint8_t(current_val >> 56);
+  return true;
+}
+
+bool dom::parser::Iterator::down() {
+  if (location + 1 >= tape_length) {
+    return false;
+  }
+  if ((current_type == '[') || (current_type == '{')) {
+    size_t npos = uint32_t(current_val);
+    if (npos == location + 2) {
+      return false; // we have an empty scope
+    }
+    depth++;
+    assert(depth < max_depth);
+    location = location + 1;
+    depth_index[depth].start_of_scope = location;
+    depth_index[depth].scope_type = current_type;
+    current_val = doc.tape[location];
+    current_type = uint8_t(current_val >> 56);
+    return true;
+  }
+  return false;
+}
+
+void dom::parser::Iterator::to_start_scope() {
+  location = depth_index[depth].start_of_scope;
+  current_val = doc.tape[location];
+  current_type = uint8_t(current_val >> 56);
+}
+
+bool dom::parser::Iterator::next() {
+  size_t npos;
+  if ((current_type == '[') || (current_type == '{')) {
+    // we need to jump
+    npos = uint32_t(current_val);
+  } else {
+    npos = location + (is_number() ? 2 : 1);
+  }
+  uint64_t next_val = doc.tape[npos];
+  uint8_t next_type = uint8_t(next_val >> 56);
+  if ((next_type == ']') || (next_type == '}')) {
+    return false; // we reached the end of the scope
+  }
+  location = npos;
+  current_val = next_val;
+  current_type = next_type;
+  return true;
+}
+dom::parser::Iterator::Iterator(const dom::parser &pj) noexcept(false)
+    : doc(pj.doc)
+{
+#if SIMDJSON_EXCEPTIONS
+  if (!pj.valid) { throw simdjson_error(pj.error); }
+#else
+  if (!pj.valid) { return; } //  abort() usage is forbidden in the library
+#endif
+
+  max_depth = pj.max_depth();
+  depth_index = new scopeindex_t[max_depth + 1];
+  depth_index[0].start_of_scope = location;
+  current_val = doc.tape[location++];
+  current_type = uint8_t(current_val >> 56);
+  depth_index[0].scope_type = current_type;
+  tape_length = size_t(current_val & internal::JSON_VALUE_MASK);
+  if (location < tape_length) {
+    // If we make it here, then depth_capacity must >=2, but the compiler
+    // may not know this.
+    current_val = doc.tape[location];
+    current_type = uint8_t(current_val >> 56);
+    depth++;
+    assert(depth < max_depth);
+    depth_index[depth].start_of_scope = location;
+    depth_index[depth].scope_type = current_type;
+  }
+}
+dom::parser::Iterator::Iterator(
+    const dom::parser::Iterator &o) noexcept
+    : doc(o.doc),
+    max_depth(o.depth),
+    depth(o.depth),
+    location(o.location),
+    tape_length(o.tape_length),
+    current_type(o.current_type),
+    current_val(o.current_val)
+{
+  depth_index = new scopeindex_t[max_depth+1];
+  std::memcpy(depth_index, o.depth_index, (depth + 1) * sizeof(depth_index[0]));
+}
+
+dom::parser::Iterator::~Iterator() noexcept {
+  if (depth_index) { delete[] depth_index; }
+}
+
+bool dom::parser::Iterator::print(std::ostream &os, bool escape_strings) const {
+  if (!is_ok()) {
+    return false;
+  }
+  switch (current_type) {
+  case '"': // we have a string
+    os << '"';
+    if (escape_strings) {
+      os << internal::escape_json_string(std::string_view(get_string(), get_string_length()));
+    } else {
+      // was: os << get_string();, but given that we can include null chars, we
+      // have to do something crazier:
+      std::copy(get_string(), get_string() + get_string_length(), std::ostream_iterator<char>(os));
+    }
+    os << '"';
+    break;
+  case 'l': // we have a long int
+    os << get_integer();
+    break;
+  case 'u':
+    os << get_unsigned_integer();
+    break;
+  case 'd':
+    os << get_double();
+    break;
+  case 'n': // we have a null
+    os << "null";
+    break;
+  case 't': // we have a true
+    os << "true";
+    break;
+  case 'f': // we have a false
+    os << "false";
+    break;
+  case '{': // we have an object
+  case '}': // we end an object
+  case '[': // we start an array
+  case ']': // we end an array
+    os << char(current_type);
+    break;
+  default:
+    return false;
+  }
+  return true;
+}
+
+bool dom::parser::Iterator::move_to(const char *pointer,
+                                                   uint32_t length) {
+  char *new_pointer = nullptr;
+  if (pointer[0] == '#') {
+    // Converting fragment representation to string representation
+    new_pointer = new char[length];
+    uint32_t new_length = 0;
+    for (uint32_t i = 1; i < length; i++) {
+      if (pointer[i] == '%' && pointer[i + 1] == 'x') {
+#if __cpp_exceptions
+        try {
+#endif
+          int fragment =
+              std::stoi(std::string(&pointer[i + 2], 2), nullptr, 16);
+          if (fragment == '\\' || fragment == '"' || (fragment <= 0x1F)) {
+            // escaping the character
+            new_pointer[new_length] = '\\';
+            new_length++;
+          }
+          new_pointer[new_length] = char(fragment);
+          i += 3;
+#if __cpp_exceptions
+        } catch (std::invalid_argument &) {
+          delete[] new_pointer;
+          return false; // the fragment is invalid
+        }
+#endif
+      } else {
+        new_pointer[new_length] = pointer[i];
+      }
+      new_length++;
+    }
+    length = new_length;
+    pointer = new_pointer;
+  }
+
+  // saving the current state
+  size_t depth_s = depth;
+  size_t location_s = location;
+  uint8_t current_type_s = current_type;
+  uint64_t current_val_s = current_val;
+
+  rewind(); // The json pointer is used from the root of the document.
+
+  bool found = relative_move_to(pointer, length);
+  delete[] new_pointer;
+
+  if (!found) {
+    // since the pointer has found nothing, we get back to the original
+    // position.
+    depth = depth_s;
+    location = location_s;
+    current_type = current_type_s;
+    current_val = current_val_s;
+  }
+
+  return found;
+}
+
+bool dom::parser::Iterator::relative_move_to(const char *pointer,
+                                                            uint32_t length) {
+  if (length == 0) {
+    // returns the whole document
+    return true;
+  }
+
+  if (pointer[0] != '/') {
+    // '/' must be the first character
+    return false;
+  }
+
+  // finding the key in an object or the index in an array
+  std::string key_or_index;
+  uint32_t offset = 1;
+
+  // checking for the "-" case
+  if (is_array() && pointer[1] == '-') {
+    if (length != 2) {
+      // the pointer must be exactly "/-"
+      // there can't be anything more after '-' as an index
+      return false;
+    }
+    key_or_index = '-';
+    offset = length; // will skip the loop coming right after
+  }
+
+  // We either transform the first reference token to a valid json key
+  // or we make sure it is a valid index in an array.
+  for (; offset < length; offset++) {
+    if (pointer[offset] == '/') {
+      // beginning of the next key or index
+      break;
+    }
+    if (is_array() && (pointer[offset] < '0' || pointer[offset] > '9')) {
+      // the index of an array must be an integer
+      // we also make sure std::stoi won't discard whitespaces later
+      return false;
+    }
+    if (pointer[offset] == '~') {
+      // "~1" represents "/"
+      if (pointer[offset + 1] == '1') {
+        key_or_index += '/';
+        offset++;
+        continue;
+      }
+      // "~0" represents "~"
+      if (pointer[offset + 1] == '0') {
+        key_or_index += '~';
+        offset++;
+        continue;
+      }
+    }
+    if (pointer[offset] == '\\') {
+      if (pointer[offset + 1] == '\\' || pointer[offset + 1] == '"' ||
+          (pointer[offset + 1] <= 0x1F)) {
+        key_or_index += pointer[offset + 1];
+        offset++;
+        continue;
+      }
+      return false; // invalid escaped character
+    }
+    if (pointer[offset] == '\"') {
+      // unescaped quote character. this is an invalid case.
+      // lets do nothing and assume most pointers will be valid.
+      // it won't find any corresponding json key anyway.
+      // return false;
+    }
+    key_or_index += pointer[offset];
+  }
+
+  bool found = false;
+  if (is_object()) {
+    if (move_to_key(key_or_index.c_str(), uint32_t(key_or_index.length()))) {
+      found = relative_move_to(pointer + offset, length - offset);
+    }
+  } else if (is_array()) {
+    if (key_or_index == "-") { // handling "-" case first
+      if (down()) {
+        while (next())
+          ; // moving to the end of the array
+        // moving to the nonexistent value right after...
+        size_t npos;
+        if ((current_type == '[') || (current_type == '{')) {
+          // we need to jump
+          npos = uint32_t(current_val);
+        } else {
+          npos =
+              location + ((current_type == 'd' || current_type == 'l') ? 2 : 1);
+        }
+        location = npos;
+        current_val = doc.tape[npos];
+        current_type = uint8_t(current_val >> 56);
+        return true; // how could it fail ?
+      }
+    } else { // regular numeric index
+      // The index can't have a leading '0'
+      if (key_or_index[0] == '0' && key_or_index.length() > 1) {
+        return false;
+      }
+      // it cannot be empty
+      if (key_or_index.length() == 0) {
+        return false;
+      }
+      // we already checked the index contains only valid digits
+      uint32_t index = std::stoi(key_or_index);
+      if (move_to_index(index)) {
+        found = relative_move_to(pointer + offset, length - offset);
+      }
+    }
+  }
+
+  return found;
+}
+
+SIMDJSON_POP_DISABLE_WARNINGS
+} // namespace simdjson
+
+#endif // SIMDJSON_DISABLE_DEPRECATED_API
+
+
+#endif // SIMDJSON_INLINE_PARSEDJSON_ITERATOR_H
+/* end file include/simdjson/dom/parsedjson_iterator-inl.h */
+/* begin file include/simdjson/dom/parser-inl.h */
+#ifndef SIMDJSON_INLINE_PARSER_H
+#define SIMDJSON_INLINE_PARSER_H
+
+#include <cstdio>
+#include <climits>
+
+namespace simdjson {
+namespace dom {
+
+//
+// parser inline implementation
+//
+simdjson_inline parser::parser(size_t max_capacity) noexcept
+  : _max_capacity{max_capacity},
+    loaded_bytes(nullptr) {
+}
+simdjson_inline parser::parser(parser &&other) noexcept = default;
+simdjson_inline parser &parser::operator=(parser &&other) noexcept = default;
+
+inline bool parser::is_valid() const noexcept { return valid; }
+inline int parser::get_error_code() const noexcept { return error; }
+inline std::string parser::get_error_message() const noexcept { return error_message(error); }
+
+inline bool parser::dump_raw_tape(std::ostream &os) const noexcept {
+  return valid ? doc.dump_raw_tape(os) : false;
+}
+
+inline simdjson_result<size_t> parser::read_file(const std::string &path) noexcept {
+  // Open the file
+  SIMDJSON_PUSH_DISABLE_WARNINGS
+  SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe
+  std::FILE *fp = std::fopen(path.c_str(), "rb");
+  SIMDJSON_POP_DISABLE_WARNINGS
+
+  if (fp == nullptr) {
+    return IO_ERROR;
+  }
+
+  // Get the file size
+  int ret;
+#if defined(SIMDJSON_VISUAL_STUDIO) && !SIMDJSON_IS_32BITS
+  ret = _fseeki64(fp, 0, SEEK_END);
+#else
+  ret = std::fseek(fp, 0, SEEK_END);
+#endif // _WIN64
+  if(ret < 0) {
+    std::fclose(fp);
+    return IO_ERROR;
+  }
+#if defined(SIMDJSON_VISUAL_STUDIO) && !SIMDJSON_IS_32BITS
+  __int64 len = _ftelli64(fp);
+  if(len == -1L) {
+    std::fclose(fp);
+    return IO_ERROR;
+  }
+#else
+  long len = std::ftell(fp);
+  if((len < 0) || (len == LONG_MAX)) {
+    std::fclose(fp);
+    return IO_ERROR;
+  }
+#endif
+
+  // Make sure we have enough capacity to load the file
+  if (_loaded_bytes_capacity < size_t(len)) {
+    loaded_bytes.reset( internal::allocate_padded_buffer(len) );
+    if (!loaded_bytes) {
+      std::fclose(fp);
+      return MEMALLOC;
+    }
+    _loaded_bytes_capacity = len;
+  }
+
+  // Read the string
+  std::rewind(fp);
+  size_t bytes_read = std::fread(loaded_bytes.get(), 1, len, fp);
+  if (std::fclose(fp) != 0 || bytes_read != size_t(len)) {
+    return IO_ERROR;
+  }
+
+  return bytes_read;
+}
+
+inline simdjson_result<element> parser::load(const std::string &path) & noexcept {
+  size_t len;
+  auto _error = read_file(path).get(len);
+  if (_error) { return _error; }
+  return parse(loaded_bytes.get(), len, false);
+}
+
+inline simdjson_result<document_stream> parser::load_many(const std::string &path, size_t batch_size) noexcept {
+  size_t len;
+  auto _error = read_file(path).get(len);
+  if (_error) { return _error; }
+  if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; }
+  return document_stream(*this, reinterpret_cast<const uint8_t*>(loaded_bytes.get()), len, batch_size);
+}
+
+inline simdjson_result<element> parser::parse_into_document(document& provided_doc, const uint8_t *buf, size_t len, bool realloc_if_needed) & noexcept {
+  // Important: we need to ensure that document has enough capacity.
+  // Important: It is possible that provided_doc is actually the internal 'doc' within the parser!!!
+  error_code _error = ensure_capacity(provided_doc, len);
+  if (_error) { return _error; }
+  if (realloc_if_needed) {
+    // Make sure we have enough capacity to copy len bytes
+    if (!loaded_bytes || _loaded_bytes_capacity < len) {
+      loaded_bytes.reset( internal::allocate_padded_buffer(len) );
+      if (!loaded_bytes) {
+        return MEMALLOC;
+      }
+      _loaded_bytes_capacity = len;
+    }
+    std::memcpy(static_cast<void *>(loaded_bytes.get()), buf, len);
+  }
+  _error = implementation->parse(realloc_if_needed ? reinterpret_cast<const uint8_t*>(loaded_bytes.get()): buf, len, provided_doc);
+
+  if (_error) { return _error; }
+
+  return provided_doc.root();
+}
+
+simdjson_inline simdjson_result<element> parser::parse_into_document(document& provided_doc, const char *buf, size_t len, bool realloc_if_needed) & noexcept {
+  return parse_into_document(provided_doc, reinterpret_cast<const uint8_t *>(buf), len, realloc_if_needed);
+}
+simdjson_inline simdjson_result<element> parser::parse_into_document(document& provided_doc, const std::string &s) & noexcept {
+  return parse_into_document(provided_doc, s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING);
+}
+simdjson_inline simdjson_result<element> parser::parse_into_document(document& provided_doc, const padded_string &s) & noexcept {
+  return parse_into_document(provided_doc, s.data(), s.length(), false);
+}
+
+
+inline simdjson_result<element> parser::parse(const uint8_t *buf, size_t len, bool realloc_if_needed) & noexcept {
+  return parse_into_document(doc, buf, len, realloc_if_needed);
+}
+
+simdjson_inline simdjson_result<element> parser::parse(const char *buf, size_t len, bool realloc_if_needed) & noexcept {
+  return parse(reinterpret_cast<const uint8_t *>(buf), len, realloc_if_needed);
+}
+simdjson_inline simdjson_result<element> parser::parse(const std::string &s) & noexcept {
+  return parse(s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING);
+}
+simdjson_inline simdjson_result<element> parser::parse(const padded_string &s) & noexcept {
+  return parse(s.data(), s.length(), false);
+}
+
+inline simdjson_result<document_stream> parser::parse_many(const uint8_t *buf, size_t len, size_t batch_size) noexcept {
+  if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; }
+  return document_stream(*this, buf, len, batch_size);
+}
+inline simdjson_result<document_stream> parser::parse_many(const char *buf, size_t len, size_t batch_size) noexcept {
+  return parse_many(reinterpret_cast<const uint8_t *>(buf), len, batch_size);
+}
+inline simdjson_result<document_stream> parser::parse_many(const std::string &s, size_t batch_size) noexcept {
+  return parse_many(s.data(), s.length(), batch_size);
+}
+inline simdjson_result<document_stream> parser::parse_many(const padded_string &s, size_t batch_size) noexcept {
+  return parse_many(s.data(), s.length(), batch_size);
+}
+
+simdjson_inline size_t parser::capacity() const noexcept {
+  return implementation ? implementation->capacity() : 0;
+}
+simdjson_inline size_t parser::max_capacity() const noexcept {
+  return _max_capacity;
+}
+simdjson_inline size_t parser::max_depth() const noexcept {
+  return implementation ? implementation->max_depth() : DEFAULT_MAX_DEPTH;
+}
+
+simdjson_warn_unused
+inline error_code parser::allocate(size_t capacity, size_t max_depth) noexcept {
+  //
+  // Reallocate implementation if needed
+  //
+  error_code err;
+  if (implementation) {
+    err = implementation->allocate(capacity, max_depth);
+  } else {
+    err = simdjson::get_active_implementation()->create_dom_parser_implementation(capacity, max_depth, implementation);
+  }
+  if (err) { return err; }
+  return SUCCESS;
+}
+
+#ifndef SIMDJSON_DISABLE_DEPRECATED_API
+simdjson_warn_unused
+inline bool parser::allocate_capacity(size_t capacity, size_t max_depth) noexcept {
+  return !allocate(capacity, max_depth);
+}
+#endif // SIMDJSON_DISABLE_DEPRECATED_API
+
+inline error_code parser::ensure_capacity(size_t desired_capacity) noexcept {
+  return ensure_capacity(doc, desired_capacity);
+}
+
+
+inline error_code parser::ensure_capacity(document& target_document, size_t desired_capacity) noexcept {
+  // 1. It is wasteful to allocate a document and a parser for documents spanning less than MINIMAL_DOCUMENT_CAPACITY bytes.
+  // 2. If we allow desired_capacity = 0 then it is possible to exit this function with implementation == nullptr.
+  if(desired_capacity < MINIMAL_DOCUMENT_CAPACITY) { desired_capacity = MINIMAL_DOCUMENT_CAPACITY; }
+  // If we don't have enough capacity, (try to) automatically bump it.
+  // If the document needs allocation, do it too.
+  // Both in one if statement to minimize unlikely branching.
+  //
+  // Note: we must make sure that this function is called if capacity() == 0. We do so because we
+  // ensure that desired_capacity > 0.
+  if (simdjson_unlikely(capacity() < desired_capacity || target_document.capacity() < desired_capacity)) {
+    if (desired_capacity > max_capacity()) {
+      return error = CAPACITY;
+    }
+    error_code err1 = target_document.capacity() < desired_capacity ? target_document.allocate(desired_capacity) : SUCCESS;
+    error_code err2 = capacity() < desired_capacity ? allocate(desired_capacity, max_depth()) : SUCCESS;
+    if(err1 != SUCCESS) { return error = err1; }
+    if(err2 != SUCCESS) { return error = err2; }
+  }
+  return SUCCESS;
+}
+
+simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept {
+  if(max_capacity < MINIMAL_DOCUMENT_CAPACITY) {
+    _max_capacity = max_capacity;
+  } else {
+    _max_capacity = MINIMAL_DOCUMENT_CAPACITY;
+  }
+}
+
+} // namespace dom
+} // namespace simdjson
+
+#endif // SIMDJSON_INLINE_PARSER_H
+/* end file include/simdjson/dom/parser-inl.h */
+/* begin file include/simdjson/internal/tape_ref-inl.h */
+#ifndef SIMDJSON_INLINE_TAPE_REF_H
+#define SIMDJSON_INLINE_TAPE_REF_H
+
+#include <cstring>
+
+namespace simdjson {
+namespace internal {
+
+//
+// tape_ref inline implementation
+//
+simdjson_inline tape_ref::tape_ref() noexcept : doc{nullptr}, json_index{0} {}
+simdjson_inline tape_ref::tape_ref(const dom::document *_doc, size_t _json_index) noexcept : doc{_doc}, json_index{_json_index} {}
+
+
+simdjson_inline bool tape_ref::is_document_root() const noexcept {
+  return json_index == 1; // should we ever change the structure of the tape, this should get updated.
+}
+simdjson_inline bool tape_ref::usable() const noexcept {
+  return doc != nullptr; // when the document pointer is null, this tape_ref is uninitialized (should not be accessed).
+}
+// Some value types have a specific on-tape word value. It can be faster
+// to check the type by doing a word-to-word comparison instead of extracting the
+// most significant 8 bits.
+
+simdjson_inline bool tape_ref::is_double() const noexcept {
+  constexpr uint64_t tape_double = uint64_t(tape_type::DOUBLE)<<56;
+  return doc->tape[json_index] == tape_double;
+}
+simdjson_inline bool tape_ref::is_int64() const noexcept {
+  constexpr uint64_t tape_int64 = uint64_t(tape_type::INT64)<<56;
+  return doc->tape[json_index] == tape_int64;
+}
+simdjson_inline bool tape_ref::is_uint64() const noexcept {
+  constexpr uint64_t tape_uint64 = uint64_t(tape_type::UINT64)<<56;
+  return doc->tape[json_index] == tape_uint64;
+}
+simdjson_inline bool tape_ref::is_false() const noexcept {
+  constexpr uint64_t tape_false = uint64_t(tape_type::FALSE_VALUE)<<56;
+  return doc->tape[json_index] == tape_false;
+}
+simdjson_inline bool tape_ref::is_true() const noexcept {
+  constexpr uint64_t tape_true = uint64_t(tape_type::TRUE_VALUE)<<56;
+  return doc->tape[json_index] == tape_true;
+}
+simdjson_inline bool tape_ref::is_null_on_tape() const noexcept {
+  constexpr uint64_t tape_null = uint64_t(tape_type::NULL_VALUE)<<56;
+  return doc->tape[json_index] == tape_null;
+}
+
+inline size_t tape_ref::after_element() const noexcept {
+  switch (tape_ref_type()) {
+    case tape_type::START_ARRAY:
+    case tape_type::START_OBJECT:
+      return matching_brace_index();
+    case tape_type::UINT64:
+    case tape_type::INT64:
+    case tape_type::DOUBLE:
+      return json_index + 2;
+    default:
+      return json_index + 1;
+  }
+}
+simdjson_inline tape_type tape_ref::tape_ref_type() const noexcept {
+  return static_cast<tape_type>(doc->tape[json_index] >> 56);
+}
+simdjson_inline uint64_t internal::tape_ref::tape_value() const noexcept {
+  return doc->tape[json_index] & internal::JSON_VALUE_MASK;
+}
+simdjson_inline uint32_t internal::tape_ref::matching_brace_index() const noexcept {
+  return uint32_t(doc->tape[json_index]);
+}
+simdjson_inline uint32_t internal::tape_ref::scope_count() const noexcept {
+  return uint32_t((doc->tape[json_index] >> 32) & internal::JSON_COUNT_MASK);
+}
+
+template<typename T>
+simdjson_inline T tape_ref::next_tape_value() const noexcept {
+  static_assert(sizeof(T) == sizeof(uint64_t), "next_tape_value() template parameter must be 64-bit");
+  // Though the following is tempting...
+  //  return *reinterpret_cast<const T*>(&doc->tape[json_index + 1]);
+  // It is not generally safe. It is safer, and often faster to rely
+  // on memcpy. Yes, it is uglier, but it is also encapsulated.
+  T x;
+  std::memcpy(&x,&doc->tape[json_index + 1],sizeof(uint64_t));
+  return x;
+}
+
+simdjson_inline uint32_t internal::tape_ref::get_string_length() const noexcept {
+  size_t string_buf_index = size_t(tape_value());
+  uint32_t len;
+  std::memcpy(&len, &doc->string_buf[string_buf_index], sizeof(len));
+  return len;
+}
+
+simdjson_inline const char * internal::tape_ref::get_c_str() const noexcept {
+  size_t string_buf_index = size_t(tape_value());
+  return reinterpret_cast<const char *>(&doc->string_buf[string_buf_index + sizeof(uint32_t)]);
+}
+
+inline std::string_view internal::tape_ref::get_string_view() const noexcept {
+  return std::string_view(
+      get_c_str(),
+      get_string_length()
+  );
+}
+
+} // namespace internal
+} // namespace simdjson
+
+#endif // SIMDJSON_INLINE_TAPE_REF_H
+/* end file include/simdjson/internal/tape_ref-inl.h */
+/* begin file include/simdjson/dom/serialization-inl.h */
+
+#ifndef SIMDJSON_SERIALIZATION_INL_H
+#define SIMDJSON_SERIALIZATION_INL_H
+
+
+#include <cinttypes>
+#include <type_traits>
+
+namespace simdjson {
+namespace dom {
+inline bool parser::print_json(std::ostream &os) const noexcept {
+  if (!valid) { return false; }
+  simdjson::internal::string_builder<> sb;
+  sb.append(doc.root());
+  std::string_view answer = sb.str();
+  os << answer;
+  return true;
+}
+}
+/***
+ * Number utility functions
+ **/
+
+
+namespace {
+/**@private
+ * Escape sequence like \b or \u0001
+ * We expect that most compilers will use 8 bytes for this data structure.
+ **/
+struct escape_sequence {
+    uint8_t length;
+    const char string[7]; // technically, we only ever need 6 characters, we pad to 8
+};
+/**@private
+ * This converts a signed integer into a character sequence.
+ * The caller is responsible for providing enough memory (at least
+ * 20 characters.)
+ * Though various runtime libraries provide itoa functions,
+ * it is not part of the C++ standard. The C++17 standard
+ * adds the to_chars functions which would do as well, but
+ * we want to support C++11.
+ */
+char *fast_itoa(char *output, int64_t value) noexcept {
+  // This is a standard implementation of itoa.
+  char buffer[20];
+  uint64_t value_positive;
+  // In general, negating a signed integer is unsafe.
+  if(value < 0) {
+    *output++ = '-';
+    // Doing value_positive = -value; while avoiding
+    // undefined behavior warnings.
+    // It assumes two complement's which is universal at this
+    // point in time.
+    std::memcpy(&value_positive, &value, sizeof(value));
+    value_positive = (~value_positive) + 1; // this is a negation
+  } else {
+    value_positive = value;
+  }
+  // We work solely with value_positive. It *might* be easier
+  // for an optimizing compiler to deal with an unsigned variable
+  // as far as performance goes.
+  const char *const end_buffer = buffer + 20;
+  char *write_pointer = buffer + 19;
+  // A faster approach is possible if we expect large integers:
+  // unroll the loop (work in 100s, 1000s) and use some kind of
+  // memoization.
+  while(value_positive >= 10) {
+    *write_pointer-- = char('0' + (value_positive % 10));
+    value_positive /= 10;
+  }
+  *write_pointer = char('0' + value_positive);
+  size_t len = end_buffer - write_pointer;
+  std::memcpy(output, write_pointer, len);
+  return output + len;
+}
+/**@private
+ * This converts an unsigned integer into a character sequence.
+ * The caller is responsible for providing enough memory (at least
+ * 19 characters.)
+ * Though various runtime libraries provide itoa functions,
+ * it is not part of the C++ standard. The C++17 standard
+ * adds the to_chars functions which would do as well, but
+ * we want to support C++11.
+ */
+char *fast_itoa(char *output, uint64_t value) noexcept {
+  // This is a standard implementation of itoa.
+  char buffer[20];
+  const char *const end_buffer = buffer + 20;
+  char *write_pointer = buffer + 19;
+  // A faster approach is possible if we expect large integers:
+  // unroll the loop (work in 100s, 1000s) and use some kind of
+  // memoization.
+  while(value >= 10) {
+    *write_pointer-- = char('0' + (value % 10));
+    value /= 10;
+  };
+  *write_pointer = char('0' + value);
+  size_t len = end_buffer - write_pointer;
+  std::memcpy(output, write_pointer, len);
+  return output + len;
+}
+} // anonymous namespace
+namespace internal {
+
+/***
+ * Minifier/formatter code.
+ **/
+
+simdjson_inline void mini_formatter::number(uint64_t x) {
+  char number_buffer[24];
+  char *newp = fast_itoa(number_buffer, x);
+  buffer.insert(buffer.end(), number_buffer, newp);
+}
+
+simdjson_inline void mini_formatter::number(int64_t x) {
+  char number_buffer[24];
+  char *newp = fast_itoa(number_buffer, x);
+  buffer.insert(buffer.end(), number_buffer, newp);
+}
+
+simdjson_inline void mini_formatter::number(double x) {
+  char number_buffer[24];
+  // Currently, passing the nullptr to the second argument is
+  // safe because our implementation does not check the second
+  // argument.
+  char *newp = internal::to_chars(number_buffer, nullptr, x);
+  buffer.insert(buffer.end(), number_buffer, newp);
+}
+
+simdjson_inline void mini_formatter::start_array() { one_char('['); }
+simdjson_inline void mini_formatter::end_array() { one_char(']'); }
+simdjson_inline void mini_formatter::start_object() { one_char('{'); }
+simdjson_inline void mini_formatter::end_object() { one_char('}'); }
+simdjson_inline void mini_formatter::comma() { one_char(','); }
+
+
+simdjson_inline void mini_formatter::true_atom() {
+  const char * s = "true";
+  buffer.insert(buffer.end(), s, s + 4);
+}
+simdjson_inline void mini_formatter::false_atom() {
+  const char * s = "false";
+  buffer.insert(buffer.end(), s, s + 5);
+}
+simdjson_inline void mini_formatter::null_atom() {
+  const char * s = "null";
+  buffer.insert(buffer.end(), s, s + 4);
+}
+simdjson_inline void mini_formatter::one_char(char c) { buffer.push_back(c); }
+simdjson_inline void mini_formatter::key(std::string_view unescaped) {
+  string(unescaped);
+  one_char(':');
+}
+simdjson_inline void mini_formatter::string(std::string_view unescaped) {
+  one_char('\"');
+  size_t i = 0;
+  // Fast path for the case where we have no control character, no ", and no backslash.
+  // This should include most keys.
+  //
+  // We would like to use 'bool' but some compilers take offense to bitwise operation
+  // with bool types.
+  constexpr static char needs_escaping[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+  for(;i + 8 <= unescaped.length(); i += 8) {
+    // Poor's man vectorization. This could get much faster if we used SIMD.
+    //
+    // It is not the case that replacing '|' with '||' would be neutral performance-wise.
+    if(needs_escaping[uint8_t(unescaped[i])] | needs_escaping[uint8_t(unescaped[i+1])]
+      | needs_escaping[uint8_t(unescaped[i+2])] | needs_escaping[uint8_t(unescaped[i+3])]
+      | needs_escaping[uint8_t(unescaped[i+4])] | needs_escaping[uint8_t(unescaped[i+5])]
+      | needs_escaping[uint8_t(unescaped[i+6])] | needs_escaping[uint8_t(unescaped[i+7])]
+      ) { break; }
+  }
+  for(;i < unescaped.length(); i++) {
+    if(needs_escaping[uint8_t(unescaped[i])]) { break; }
+  }
+  // The following is also possible and omits a 256-byte table, but it is slower:
+  // for (; (i < unescaped.length()) && (uint8_t(unescaped[i]) > 0x1F)
+  //      && (unescaped[i] != '\"') && (unescaped[i] != '\\'); i++) {}
+
+  // At least for long strings, the following should be fast. We could
+  // do better by integrating the checks and the insertion.
+  buffer.insert(buffer.end(), unescaped.data(), unescaped.data() + i);
+  // We caught a control character if we enter this loop (slow).
+  // Note that we are do not restart from the beginning, but rather we continue
+  // from the point where we encountered something that requires escaping.
+  for (; i < unescaped.length(); i++) {
+    switch (unescaped[i]) {
+    case '\"':
+      {
+        const char * s = "\\\"";
+        buffer.insert(buffer.end(), s, s + 2);
+      }
+      break;
+    case '\\':
+      {
+        const char * s = "\\\\";
+        buffer.insert(buffer.end(), s, s + 2);
+      }
+      break;
+    default:
+      if (uint8_t(unescaped[i]) <= 0x1F) {
+        // If packed, this uses 8 * 32 bytes.
+        // Note that we expect most compilers to embed this code in the data
+        // section.
+        constexpr static escape_sequence escaped[32] = {
+          {6, "\\u0000"}, {6, "\\u0001"}, {6, "\\u0002"}, {6, "\\u0003"},
+          {6, "\\u0004"}, {6, "\\u0005"}, {6, "\\u0006"}, {6, "\\u0007"},
+          {2, "\\b"},     {2, "\\t"},     {2, "\\n"},     {6, "\\u000b"},
+          {2, "\\f"},     {2, "\\r"},     {6, "\\u000e"}, {6, "\\u000f"},
+          {6, "\\u0010"}, {6, "\\u0011"}, {6, "\\u0012"}, {6, "\\u0013"},
+          {6, "\\u0014"}, {6, "\\u0015"}, {6, "\\u0016"}, {6, "\\u0017"},
+          {6, "\\u0018"}, {6, "\\u0019"}, {6, "\\u001a"}, {6, "\\u001b"},
+          {6, "\\u001c"}, {6, "\\u001d"}, {6, "\\u001e"}, {6, "\\u001f"}};
+        auto u = escaped[uint8_t(unescaped[i])];
+        buffer.insert(buffer.end(), u.string, u.string + u.length);
+      } else {
+        one_char(unescaped[i]);
+      }
+    } // switch
+  }   // for
+  one_char('\"');
+}
+
+inline void mini_formatter::clear() {
+  buffer.clear();
+}
+
+simdjson_inline std::string_view mini_formatter::str() const {
+  return std::string_view(buffer.data(), buffer.size());
+}
+
+
+/***
+ * String building code.
+ **/
+
+template <class serializer>
+inline void string_builder<serializer>::append(simdjson::dom::element value) {
+  // using tape_type = simdjson::internal::tape_type;
+  size_t depth = 0;
+  constexpr size_t MAX_DEPTH = 16;
+  bool is_object[MAX_DEPTH];
+  is_object[0] = false;
+  bool after_value = false;
+
+  internal::tape_ref iter(value.tape);
+  do {
+    // print commas after each value
+    if (after_value) {
+      format.comma();
+    }
+    // If we are in an object, print the next key and :, and skip to the next
+    // value.
+    if (is_object[depth]) {
+      format.key(iter.get_string_view());
+      iter.json_index++;
+    }
+    switch (iter.tape_ref_type()) {
+
+    // Arrays
+    case tape_type::START_ARRAY: {
+      // If we're too deep, we need to recurse to go deeper.
+      depth++;
+      if (simdjson_unlikely(depth >= MAX_DEPTH)) {
+        append(simdjson::dom::array(iter));
+        iter.json_index = iter.matching_brace_index() - 1; // Jump to the ]
+        depth--;
+        break;
+      }
+
+      // Output start [
+      format.start_array();
+      iter.json_index++;
+
+      // Handle empty [] (we don't want to come back around and print commas)
+      if (iter.tape_ref_type() == tape_type::END_ARRAY) {
+        format.end_array();
+        depth--;
+        break;
+      }
+
+      is_object[depth] = false;
+      after_value = false;
+      continue;
+    }
+
+    // Objects
+    case tape_type::START_OBJECT: {
+      // If we're too deep, we need to recurse to go deeper.
+      depth++;
+      if (simdjson_unlikely(depth >= MAX_DEPTH)) {
+        append(simdjson::dom::object(iter));
+        iter.json_index = iter.matching_brace_index() - 1; // Jump to the }
+        depth--;
+        break;
+      }
+
+      // Output start {
+      format.start_object();
+      iter.json_index++;
+
+      // Handle empty {} (we don't want to come back around and print commas)
+      if (iter.tape_ref_type() == tape_type::END_OBJECT) {
+        format.end_object();
+        depth--;
+        break;
+      }
+
+      is_object[depth] = true;
+      after_value = false;
+      continue;
+    }
+
+    // Scalars
+    case tape_type::STRING:
+      format.string(iter.get_string_view());
+      break;
+    case tape_type::INT64:
+      format.number(iter.next_tape_value<int64_t>());
+      iter.json_index++; // numbers take up 2 spots, so we need to increment
+                         // extra
+      break;
+    case tape_type::UINT64:
+      format.number(iter.next_tape_value<uint64_t>());
+      iter.json_index++; // numbers take up 2 spots, so we need to increment
+                         // extra
+      break;
+    case tape_type::DOUBLE:
+      format.number(iter.next_tape_value<double>());
+      iter.json_index++; // numbers take up 2 spots, so we need to increment
+                         // extra
+      break;
+    case tape_type::TRUE_VALUE:
+      format.true_atom();
+      break;
+    case tape_type::FALSE_VALUE:
+      format.false_atom();
+      break;
+    case tape_type::NULL_VALUE:
+      format.null_atom();
+      break;
+
+    // These are impossible
+    case tape_type::END_ARRAY:
+    case tape_type::END_OBJECT:
+    case tape_type::ROOT:
+      SIMDJSON_UNREACHABLE();
+    }
+    iter.json_index++;
+    after_value = true;
+
+    // Handle multiple ends in a row
+    while (depth != 0 && (iter.tape_ref_type() == tape_type::END_ARRAY ||
+                          iter.tape_ref_type() == tape_type::END_OBJECT)) {
+      if (iter.tape_ref_type() == tape_type::END_ARRAY) {
+        format.end_array();
+      } else {
+        format.end_object();
+      }
+      depth--;
+      iter.json_index++;
+    }
+
+    // Stop when we're at depth 0
+  } while (depth != 0);
+}
+
+template <class serializer>
+inline void string_builder<serializer>::append(simdjson::dom::object value) {
+  format.start_object();
+  auto pair = value.begin();
+  auto end = value.end();
+  if (pair != end) {
+    append(*pair);
+    for (++pair; pair != end; ++pair) {
+      format.comma();
+      append(*pair);
+    }
+  }
+  format.end_object();
+}
+
+template <class serializer>
+inline void string_builder<serializer>::append(simdjson::dom::array value) {
+  format.start_array();
+  auto iter = value.begin();
+  auto end = value.end();
+  if (iter != end) {
+    append(*iter);
+    for (++iter; iter != end; ++iter) {
+      format.comma();
+      append(*iter);
+    }
+  }
+  format.end_array();
+}
+
+template <class serializer>
+simdjson_inline void string_builder<serializer>::append(simdjson::dom::key_value_pair kv) {
+  format.key(kv.key);
+  append(kv.value);
+}
+
+template <class serializer>
+simdjson_inline void string_builder<serializer>::clear() {
+  format.clear();
+}
+
+template <class serializer>
+simdjson_inline std::string_view string_builder<serializer>::str() const {
+  return format.str();
+}
+
+
+} // namespace internal
+} // namespace simdjson
+
+#endif
+/* end file include/simdjson/dom/serialization-inl.h */
+
+SIMDJSON_POP_DISABLE_WARNINGS
+
+#endif // SIMDJSON_DOM_H
+/* end file include/simdjson/dom.h */
+/* begin file include/simdjson/builtin.h */
+#ifndef SIMDJSON_BUILTIN_H
+#define SIMDJSON_BUILTIN_H
+
+/* begin file include/simdjson/implementations.h */
+#ifndef SIMDJSON_IMPLEMENTATIONS_H
+#define SIMDJSON_IMPLEMENTATIONS_H
+
+/* begin file include/simdjson/implementation-base.h */
+#ifndef SIMDJSON_IMPLEMENTATION_BASE_H
+#define SIMDJSON_IMPLEMENTATION_BASE_H
+
+/**
+ * @file
+ *
+ * Includes common stuff needed for implementations.
+ */
+
+
+// Implementation-internal files (must be included before the implementations themselves, to keep
+// amalgamation working--otherwise, the first time a file is included, it might be put inside the
+// #ifdef SIMDJSON_IMPLEMENTATION_ARM64/FALLBACK/etc., which means the other implementations can't
+// compile unless that implementation is turned on).
+/* begin file include/simdjson/internal/jsoncharutils_tables.h */
+#ifndef SIMDJSON_INTERNAL_JSONCHARUTILS_TABLES_H
+#define SIMDJSON_INTERNAL_JSONCHARUTILS_TABLES_H
+
+
+#ifdef JSON_TEST_STRINGS
+void found_string(const uint8_t *buf, const uint8_t *parsed_begin,
+                  const uint8_t *parsed_end);
+void found_bad_string(const uint8_t *buf);
+#endif
+
+namespace simdjson {
+namespace internal {
+// structural chars here are
+// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c (and NULL)
+// we are also interested in the four whitespace characters
+// space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d
+
+extern SIMDJSON_DLLIMPORTEXPORT const bool structural_or_whitespace_negated[256];
+extern SIMDJSON_DLLIMPORTEXPORT const bool structural_or_whitespace[256];
+extern SIMDJSON_DLLIMPORTEXPORT const uint32_t digit_to_val32[886];
+
+} // namespace internal
+} // namespace simdjson
+
+#endif // SIMDJSON_INTERNAL_JSONCHARUTILS_TABLES_H
+/* end file include/simdjson/internal/jsoncharutils_tables.h */
+/* begin file include/simdjson/internal/numberparsing_tables.h */
+#ifndef SIMDJSON_INTERNAL_NUMBERPARSING_TABLES_H
+#define SIMDJSON_INTERNAL_NUMBERPARSING_TABLES_H
+
+
+namespace simdjson {
+namespace internal {
+/**
+ * The smallest non-zero float (binary64) is 2^-1074.
+ * We take as input numbers of the form w x 10^q where w < 2^64.
+ * We have that w * 10^-343  <  2^(64-344) 5^-343 < 2^-1076.
+ * However, we have that
+ * (2^64-1) * 10^-342 =  (2^64-1) * 2^-342 * 5^-342 > 2^-1074.
+ * Thus it is possible for a number of the form w * 10^-342 where
+ * w is a 64-bit value to be a non-zero floating-point number.
+ *********
+ * Any number of form w * 10^309 where w>= 1 is going to be
+ * infinite in binary64 so we never need to worry about powers
+ * of 5 greater than 308.
+ */
+constexpr int smallest_power = -342;
+constexpr int largest_power = 308;
+
+/**
+ * Represents a 128-bit value.
+ * low: least significant 64 bits.
+ * high: most significant 64 bits.
+ */
+struct value128 {
+  uint64_t low;
+  uint64_t high;
+};
+
+
+// Precomputed powers of ten from 10^0 to 10^22. These
+// can be represented exactly using the double type.
+extern SIMDJSON_DLLIMPORTEXPORT const double power_of_ten[];
+
+
+/**
+ * When mapping numbers from decimal to binary,
+ * we go from w * 10^q to m * 2^p but we have
+ * 10^q = 5^q * 2^q, so effectively
+ * we are trying to match
+ * w * 2^q * 5^q to m * 2^p. Thus the powers of two
+ * are not a concern since they can be represented
+ * exactly using the binary notation, only the powers of five
+ * affect the binary significand.
+ */
+
+
+// The truncated powers of five from 5^-342 all the way to 5^308
+// The mantissa is truncated to 128 bits, and
+// never rounded up. Uses about 10KB.
+extern SIMDJSON_DLLIMPORTEXPORT const uint64_t power_of_five_128[];
+} // namespace internal
+} // namespace simdjson
+
+#endif // SIMDJSON_INTERNAL_NUMBERPARSING_TABLES_H
+/* end file include/simdjson/internal/numberparsing_tables.h */
+/* begin file include/simdjson/internal/simdprune_tables.h */
+#ifndef SIMDJSON_INTERNAL_SIMDPRUNE_TABLES_H
+#define SIMDJSON_INTERNAL_SIMDPRUNE_TABLES_H
+
+#include <cstdint>
+
+namespace simdjson { // table modified and copied from
+namespace internal { // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetTable
+
+extern SIMDJSON_DLLIMPORTEXPORT const unsigned char BitsSetTable256mul2[256];
+
+extern SIMDJSON_DLLIMPORTEXPORT const uint8_t pshufb_combine_table[272];
+
+// 256 * 8 bytes = 2kB, easily fits in cache.
+extern SIMDJSON_DLLIMPORTEXPORT const uint64_t thintable_epi8[256];
+
+} // namespace internal
+} // namespace simdjson
+
+#endif // SIMDJSON_INTERNAL_SIMDPRUNE_TABLES_H
+/* end file include/simdjson/internal/simdprune_tables.h */
+
+#endif // SIMDJSON_IMPLEMENTATION_BASE_H
+/* end file include/simdjson/implementation-base.h */
+
+//
+// First, figure out which implementations can be run. Doing it here makes it so we don't have to worry about the order
+// in which we include them.
+//
+
+#ifndef SIMDJSON_IMPLEMENTATION_ARM64
+#define SIMDJSON_IMPLEMENTATION_ARM64 (SIMDJSON_IS_ARM64)
+#endif
+#define SIMDJSON_CAN_ALWAYS_RUN_ARM64 SIMDJSON_IMPLEMENTATION_ARM64 && SIMDJSON_IS_ARM64
+
+#ifdef __has_include
+// How do we detect that a compiler supports vbmi2?
+// For sure if the following header is found, we are ok?
+#if __has_include(<avx512vbmi2intrin.h>)
+#define SIMDJSON_COMPILER_SUPPORTS_VBMI2 1
+#endif
+#endif
+
+#ifdef _MSC_VER
+#if _MSC_VER >= 1920
+// Visual Studio 2019 and up support VBMI2 under x64 even if the header
+// avx512vbmi2intrin.h is not found.
+#define SIMDJSON_COMPILER_SUPPORTS_VBMI2 1
+#endif
+#endif
+
+// By default, we allow AVX512.
+#ifndef SIMDJSON_AVX512_ALLOWED
+#define SIMDJSON_AVX512_ALLOWED 1
+#endif
+
+// Default Icelake to on if this is x86-64. Even if we're not compiled for it, it could be selected
+// at runtime.
+#ifndef SIMDJSON_IMPLEMENTATION_ICELAKE
+#define SIMDJSON_IMPLEMENTATION_ICELAKE ((SIMDJSON_IS_X86_64) && (SIMDJSON_AVX512_ALLOWED) && (SIMDJSON_COMPILER_SUPPORTS_VBMI2))
+#endif
+
+#ifdef _MSC_VER
+// To see why  (__BMI__) && (__PCLMUL__) && (__LZCNT__) are not part of this next line, see
+// https://github.com/simdjson/simdjson/issues/1247
+#define SIMDJSON_CAN_ALWAYS_RUN_ICELAKE ((SIMDJSON_IMPLEMENTATION_ICELAKE) && (__AVX2__) && (__AVX512F__) && (__AVX512DQ__) && (__AVX512CD__) && (__AVX512BW__) && (__AVX512VL__) && (__AVX512VBMI2__))
+#else
+#define SIMDJSON_CAN_ALWAYS_RUN_ICELAKE ((SIMDJSON_IMPLEMENTATION_ICELAKE) && (__AVX2__) && (__BMI__) && (__PCLMUL__) && (__LZCNT__) && (__AVX512F__) && (__AVX512DQ__) && (__AVX512CD__) && (__AVX512BW__) && (__AVX512VL__) && (__AVX512VBMI2__))
+#endif
+
+// Default Haswell to on if this is x86-64. Even if we're not compiled for it, it could be selected
+// at runtime.
+#ifndef SIMDJSON_IMPLEMENTATION_HASWELL
+#define SIMDJSON_IMPLEMENTATION_HASWELL SIMDJSON_IS_X86_64
+#endif
+#ifdef _MSC_VER
+// To see why  (__BMI__) && (__PCLMUL__) && (__LZCNT__) are not part of this next line, see
+// https://github.com/simdjson/simdjson/issues/1247
+#define SIMDJSON_CAN_ALWAYS_RUN_HASWELL ((SIMDJSON_IMPLEMENTATION_HASWELL) && (SIMDJSON_IS_X86_64) && (__AVX2__))
+#else
+#define SIMDJSON_CAN_ALWAYS_RUN_HASWELL ((SIMDJSON_IMPLEMENTATION_HASWELL) && (SIMDJSON_IS_X86_64) && (__AVX2__) && (__BMI__) && (__PCLMUL__) && (__LZCNT__))
+#endif
+
+// Default Westmere to on if this is x86-64. Note that the macro SIMDJSON_REQUIRES_HASWELL appears unused.
+#ifndef SIMDJSON_IMPLEMENTATION_WESTMERE
+#define SIMDJSON_IMPLEMENTATION_WESTMERE (SIMDJSON_IS_X86_64 && !SIMDJSON_REQUIRES_HASWELL)
+#endif
+#define SIMDJSON_CAN_ALWAYS_RUN_WESTMERE (SIMDJSON_IMPLEMENTATION_WESTMERE && SIMDJSON_IS_X86_64 && __SSE4_2__ && __PCLMUL__)
+
+#ifndef SIMDJSON_IMPLEMENTATION_PPC64
+#define SIMDJSON_IMPLEMENTATION_PPC64 (SIMDJSON_IS_PPC64)
+#endif
+#define SIMDJSON_CAN_ALWAYS_RUN_PPC64 SIMDJSON_IMPLEMENTATION_PPC64 && SIMDJSON_IS_PPC64
+
+// Default Fallback to on unless a builtin implementation has already been selected.
+#ifndef SIMDJSON_IMPLEMENTATION_FALLBACK
+#define SIMDJSON_IMPLEMENTATION_FALLBACK 1 // (!SIMDJSON_CAN_ALWAYS_RUN_ARM64 && !SIMDJSON_CAN_ALWAYS_RUN_HASWELL && !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE && !SIMDJSON_CAN_ALWAYS_RUN_PPC64)
+#endif
+#define SIMDJSON_CAN_ALWAYS_RUN_FALLBACK SIMDJSON_IMPLEMENTATION_FALLBACK
+
+SIMDJSON_PUSH_DISABLE_WARNINGS
+SIMDJSON_DISABLE_UNDESIRED_WARNINGS
+
+// Implementations
+/* begin file include/simdjson/arm64.h */
+#ifndef SIMDJSON_ARM64_H
+#define SIMDJSON_ARM64_H
+
+
+#if SIMDJSON_IMPLEMENTATION_ARM64
+
+namespace simdjson {
+/**
+ * Implementation for NEON (ARMv8).
+ */
+namespace arm64 {
+} // namespace arm64
+} // namespace simdjson
+
+/* begin file include/simdjson/arm64/implementation.h */
+#ifndef SIMDJSON_ARM64_IMPLEMENTATION_H
+#define SIMDJSON_ARM64_IMPLEMENTATION_H
+
+
+namespace simdjson {
+namespace arm64 {
+
+namespace {
+using namespace simdjson;
+using namespace simdjson::dom;
+}
+
+class implementation final : public simdjson::implementation {
+public:
+  simdjson_inline implementation() : simdjson::implementation("arm64", "ARM NEON", internal::instruction_set::NEON) {}
+  simdjson_warn_unused error_code create_dom_parser_implementation(
+    size_t capacity,
+    size_t max_length,
+    std::unique_ptr<internal::dom_parser_implementation>& dst
+  ) const noexcept final;
+  simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final;
+  simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final;
+};
+
+} // namespace arm64
+} // namespace simdjson
+
+#endif // SIMDJSON_ARM64_IMPLEMENTATION_H
+/* end file include/simdjson/arm64/implementation.h */
+
+/* begin file include/simdjson/arm64/begin.h */
+// redefining SIMDJSON_IMPLEMENTATION to "arm64"
+// #define SIMDJSON_IMPLEMENTATION arm64
+/* end file include/simdjson/arm64/begin.h */
+
+// Declarations
+/* begin file include/simdjson/generic/dom_parser_implementation.h */
+
+namespace simdjson {
+namespace arm64 {
+
+// expectation: sizeof(open_container) = 64/8.
+struct open_container {
+  uint32_t tape_index; // where, on the tape, does the scope ([,{) begins
+  uint32_t count; // how many elements in the scope
+}; // struct open_container
+
+static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits");
+
+class dom_parser_implementation final : public internal::dom_parser_implementation {
+public:
+  /** Tape location of each open { or [ */
+  std::unique_ptr<open_container[]> open_containers{};
+  /** Whether each open container is a [ or { */
+  std::unique_ptr<bool[]> is_array{};
+  /** Buffer passed to stage 1 */
+  const uint8_t *buf{};
+  /** Length passed to stage 1 */
+  size_t len{0};
+  /** Document passed to stage 2 */
+  dom::document *doc{};
+
+  inline dom_parser_implementation() noexcept;
+  inline dom_parser_implementation(dom_parser_implementation &&other) noexcept;
+  inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept;
+  dom_parser_implementation(const dom_parser_implementation &) = delete;
+  dom_parser_implementation &operator=(const dom_parser_implementation &) = delete;
+
+  simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final;
+  simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final;
+  simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final;
+  simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final;
+  simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst) const noexcept final;
+  inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final;
+  inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final;
+private:
+  simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity);
+
+};
+
+} // namespace arm64
+} // namespace simdjson
+
+namespace simdjson {
+namespace arm64 {
+
+inline dom_parser_implementation::dom_parser_implementation() noexcept = default;
+inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default;
+inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default;
+
+// Leaving these here so they can be inlined if so desired
+inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept {
+  if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; }
+  // Stage 1 index output
+  size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7;
+  structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] );
+  if (!structural_indexes) { _capacity = 0; return MEMALLOC; }
+  structural_indexes[0] = 0;
+  n_structural_indexes = 0;
+
+  _capacity = capacity;
+  return SUCCESS;
+}
+
+inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept {
+  // Stage 2 stacks
+  open_containers.reset(new (std::nothrow) open_container[max_depth]);
+  is_array.reset(new (std::nothrow) bool[max_depth]);
+  if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; }
+
+  _max_depth = max_depth;
+  return SUCCESS;
+}
+
+} // namespace arm64
+} // namespace simdjson
+/* end file include/simdjson/generic/dom_parser_implementation.h */
+/* begin file include/simdjson/arm64/intrinsics.h */
+#ifndef SIMDJSON_ARM64_INTRINSICS_H
+#define SIMDJSON_ARM64_INTRINSICS_H
+
+// This should be the correct header whether
+// you use visual studio or other compilers.
+#include <arm_neon.h>
+
+static_assert(sizeof(uint8x16_t) <= simdjson::SIMDJSON_PADDING, "insufficient padding for arm64");
+
+#endif //  SIMDJSON_ARM64_INTRINSICS_H
+/* end file include/simdjson/arm64/intrinsics.h */
+/* begin file include/simdjson/arm64/bitmanipulation.h */
+#ifndef SIMDJSON_ARM64_BITMANIPULATION_H
+#define SIMDJSON_ARM64_BITMANIPULATION_H
+
+namespace simdjson {
+namespace arm64 {
+namespace {
+
+// We sometimes call trailing_zero on inputs that are zero,
+// but the algorithms do not end up using the returned value.
+// Sadly, sanitizers are not smart enough to figure it out.
+SIMDJSON_NO_SANITIZE_UNDEFINED
+simdjson_inline int trailing_zeroes(uint64_t input_num) {
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+  unsigned long ret;
+  // Search the mask data from least significant bit (LSB)
+  // to the most significant bit (MSB) for a set bit (1).
+  _BitScanForward64(&ret, input_num);
+  return (int)ret;
+#else // SIMDJSON_REGULAR_VISUAL_STUDIO
+  return __builtin_ctzll(input_num);
+#endif // SIMDJSON_REGULAR_VISUAL_STUDIO
+}
+
+/* result might be undefined when input_num is zero */
+simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) {
+  return input_num & (input_num-1);
+}
+
+/* result might be undefined when input_num is zero */
+simdjson_inline int leading_zeroes(uint64_t input_num) {
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+  unsigned long leading_zero = 0;
+  // Search the mask data from most significant bit (MSB)
+  // to least significant bit (LSB) for a set bit (1).
+  if (_BitScanReverse64(&leading_zero, input_num))
+    return (int)(63 - leading_zero);
+  else
+    return 64;
+#else
+  return __builtin_clzll(input_num);
+#endif// SIMDJSON_REGULAR_VISUAL_STUDIO
+}
+
+/* result might be undefined when input_num is zero */
+simdjson_inline int count_ones(uint64_t input_num) {
+   return vaddv_u8(vcnt_u8(vcreate_u8(input_num)));
+}
+
+
+#if defined(__GNUC__) // catches clang and gcc
+/**
+ * ARM has a fast 64-bit "bit reversal function" that is handy. However,
+ * it is not generally available as an intrinsic function under Visual
+ * Studio (though this might be changing). Even under clang/gcc, we
+ * apparently need to invoke inline assembly.
+ */
+/*
+ * We use SIMDJSON_PREFER_REVERSE_BITS as a hint that algorithms that
+ * work well with bit reversal may use it.
+ */
+#define SIMDJSON_PREFER_REVERSE_BITS 1
+
+/* reverse the bits */
+simdjson_inline uint64_t reverse_bits(uint64_t input_num) {
+  uint64_t rev_bits;
+  __asm("rbit %0, %1" : "=r"(rev_bits) : "r"(input_num));
+  return rev_bits;
+}
+
+/**
+ * Flips bit at index 63 - lz. Thus if you have 'leading_zeroes' leading zeroes,
+ * then this will set to zero the leading bit. It is possible for leading_zeroes to be
+ * greating or equal to 63 in which case we trigger undefined behavior, but the output
+ * of such undefined behavior is never used.
+ **/
+SIMDJSON_NO_SANITIZE_UNDEFINED
+simdjson_inline uint64_t zero_leading_bit(uint64_t rev_bits, int leading_zeroes) {
+  return rev_bits ^ (uint64_t(0x8000000000000000) >> leading_zeroes);
+}
+
+#endif
+
+simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+  *result = value1 + value2;
+  return *result < value1;
+#else
+  return __builtin_uaddll_overflow(value1, value2,
+                                   reinterpret_cast<unsigned long long *>(result));
+#endif
+}
+
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdjson
+
+#endif // SIMDJSON_ARM64_BITMANIPULATION_H
+/* end file include/simdjson/arm64/bitmanipulation.h */
+/* begin file include/simdjson/arm64/bitmask.h */
+#ifndef SIMDJSON_ARM64_BITMASK_H
+#define SIMDJSON_ARM64_BITMASK_H
+
+namespace simdjson {
+namespace arm64 {
+namespace {
+
+//
+// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered.
+//
+// For example, prefix_xor(00100100) == 00011100
+//
+simdjson_inline uint64_t prefix_xor(uint64_t bitmask) {
+  /////////////
+  // We could do this with PMULL, but it is apparently slow.
+  //
+  //#ifdef __ARM_FEATURE_CRYPTO // some ARM processors lack this extension
+  //return vmull_p64(-1ULL, bitmask);
+  //#else
+  // Analysis by @sebpop:
+  // When diffing the assembly for src/stage1_find_marks.cpp I see that the eors are all spread out
+  // in between other vector code, so effectively the extra cycles of the sequence do not matter
+  // because the GPR units are idle otherwise and the critical path is on the FP side.
+  // Also the PMULL requires two extra fmovs: GPR->FP (3 cycles in N1, 5 cycles in A72 )
+  // and FP->GPR (2 cycles on N1 and 5 cycles on A72.)
+  ///////////
+  bitmask ^= bitmask << 1;
+  bitmask ^= bitmask << 2;
+  bitmask ^= bitmask << 4;
+  bitmask ^= bitmask << 8;
+  bitmask ^= bitmask << 16;
+  bitmask ^= bitmask << 32;
+  return bitmask;
+}
+
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdjson
+
+#endif
+/* end file include/simdjson/arm64/bitmask.h */
+/* begin file include/simdjson/arm64/simd.h */
+#ifndef SIMDJSON_ARM64_SIMD_H
+#define SIMDJSON_ARM64_SIMD_H
+
+#include <type_traits>
+
+
+namespace simdjson {
+namespace arm64 {
+namespace {
+namespace simd {
+
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+namespace {
+// Start of private section with Visual Studio workaround
+
+
+/**
+ * make_uint8x16_t initializes a SIMD register (uint8x16_t).
+ * This is needed because, incredibly, the syntax uint8x16_t x = {1,2,3...}
+ * is not recognized under Visual Studio! This is a workaround.
+ * Using a std::initializer_list<uint8_t>  as a parameter resulted in
+ * inefficient code. With the current approach, if the parameters are
+ * compile-time constants,
+ * GNU GCC compiles it to ldr, the same as uint8x16_t x = {1,2,3...}.
+ * You should not use this function except for compile-time constants:
+ * it is not efficient.
+ */
+simdjson_inline uint8x16_t make_uint8x16_t(uint8_t x1,  uint8_t x2,  uint8_t x3,  uint8_t x4,
+                                         uint8_t x5,  uint8_t x6,  uint8_t x7,  uint8_t x8,
+                                         uint8_t x9,  uint8_t x10, uint8_t x11, uint8_t x12,
+                                         uint8_t x13, uint8_t x14, uint8_t x15, uint8_t x16) {
+  // Doing a load like so end ups generating worse code.
+  // uint8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8,
+  //                     x9, x10,x11,x12,x13,x14,x15,x16};
+  // return vld1q_u8(array);
+  uint8x16_t x{};
+  // incredibly, Visual Studio does not allow x[0] = x1
+  x = vsetq_lane_u8(x1, x, 0);
+  x = vsetq_lane_u8(x2, x, 1);
+  x = vsetq_lane_u8(x3, x, 2);
+  x = vsetq_lane_u8(x4, x, 3);
+  x = vsetq_lane_u8(x5, x, 4);
+  x = vsetq_lane_u8(x6, x, 5);
+  x = vsetq_lane_u8(x7, x, 6);
+  x = vsetq_lane_u8(x8, x, 7);
+  x = vsetq_lane_u8(x9, x, 8);
+  x = vsetq_lane_u8(x10, x, 9);
+  x = vsetq_lane_u8(x11, x, 10);
+  x = vsetq_lane_u8(x12, x, 11);
+  x = vsetq_lane_u8(x13, x, 12);
+  x = vsetq_lane_u8(x14, x, 13);
+  x = vsetq_lane_u8(x15, x, 14);
+  x = vsetq_lane_u8(x16, x, 15);
+  return x;
+}
+
+simdjson_inline uint8x8_t make_uint8x8_t(uint8_t x1,  uint8_t x2,  uint8_t x3,  uint8_t x4,
+                                         uint8_t x5,  uint8_t x6,  uint8_t x7,  uint8_t x8) {
+  uint8x8_t x{};
+  x = vset_lane_u8(x1, x, 0);
+  x = vset_lane_u8(x2, x, 1);
+  x = vset_lane_u8(x3, x, 2);
+  x = vset_lane_u8(x4, x, 3);
+  x = vset_lane_u8(x5, x, 4);
+  x = vset_lane_u8(x6, x, 5);
+  x = vset_lane_u8(x7, x, 6);
+  x = vset_lane_u8(x8, x, 7);
+  return x;
+}
+
+// We have to do the same work for make_int8x16_t
+simdjson_inline int8x16_t make_int8x16_t(int8_t x1,  int8_t x2,  int8_t x3,  int8_t x4,
+                                       int8_t x5,  int8_t x6,  int8_t x7,  int8_t x8,
+                                       int8_t x9,  int8_t x10, int8_t x11, int8_t x12,
+                                       int8_t x13, int8_t x14, int8_t x15, int8_t x16) {
+  // Doing a load like so end ups generating worse code.
+  // int8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8,
+  //                     x9, x10,x11,x12,x13,x14,x15,x16};
+  // return vld1q_s8(array);
+  int8x16_t x{};
+  // incredibly, Visual Studio does not allow x[0] = x1
+  x = vsetq_lane_s8(x1, x, 0);
+  x = vsetq_lane_s8(x2, x, 1);
+  x = vsetq_lane_s8(x3, x, 2);
+  x = vsetq_lane_s8(x4, x, 3);
+  x = vsetq_lane_s8(x5, x, 4);
+  x = vsetq_lane_s8(x6, x, 5);
+  x = vsetq_lane_s8(x7, x, 6);
+  x = vsetq_lane_s8(x8, x, 7);
+  x = vsetq_lane_s8(x9, x, 8);
+  x = vsetq_lane_s8(x10, x, 9);
+  x = vsetq_lane_s8(x11, x, 10);
+  x = vsetq_lane_s8(x12, x, 11);
+  x = vsetq_lane_s8(x13, x, 12);
+  x = vsetq_lane_s8(x14, x, 13);
+  x = vsetq_lane_s8(x15, x, 14);
+  x = vsetq_lane_s8(x16, x, 15);
+  return x;
+}
+
+// End of private section with Visual Studio workaround
+} // namespace
+#endif // SIMDJSON_REGULAR_VISUAL_STUDIO
+
+
+  template<typename T>
+  struct simd8;
+
+  //
+  // Base class of simd8<uint8_t> and simd8<bool>, both of which use uint8x16_t internally.
+  //
+  template<typename T, typename Mask=simd8<bool>>
+  struct base_u8 {
+    uint8x16_t value;
+    static const int SIZE = sizeof(value);
+
+    // Conversion from/to SIMD register
+    simdjson_inline base_u8(const uint8x16_t _value) : value(_value) {}
+    simdjson_inline operator const uint8x16_t&() const { return this->value; }
+    simdjson_inline operator uint8x16_t&() { return this->value; }
+
+    // Bit operations
+    simdjson_inline simd8<T> operator|(const simd8<T> other) const { return vorrq_u8(*this, other); }
+    simdjson_inline simd8<T> operator&(const simd8<T> other) const { return vandq_u8(*this, other); }
+    simdjson_inline simd8<T> operator^(const simd8<T> other) const { return veorq_u8(*this, other); }
+    simdjson_inline simd8<T> bit_andnot(const simd8<T> other) const { return vbicq_u8(*this, other); }
+    simdjson_inline simd8<T> operator~() const { return *this ^ 0xFFu; }
+    simdjson_inline simd8<T>& operator|=(const simd8<T> other) { auto this_cast = static_cast<simd8<T>*>(this); *this_cast = *this_cast | other; return *this_cast; }
+    simdjson_inline simd8<T>& operator&=(const simd8<T> other) { auto this_cast = static_cast<simd8<T>*>(this); *this_cast = *this_cast & other; return *this_cast; }
+    simdjson_inline simd8<T>& operator^=(const simd8<T> other) { auto this_cast = static_cast<simd8<T>*>(this); *this_cast = *this_cast ^ other; return *this_cast; }
+
+    friend simdjson_inline Mask operator==(const simd8<T> lhs, const simd8<T> rhs) { return vceqq_u8(lhs, rhs); }
+
+    template<int N=1>
+    simdjson_inline simd8<T> prev(const simd8<T> prev_chunk) const {
+      return vextq_u8(prev_chunk, *this, 16 - N);
+    }
+  };
+
+  // SIMD byte mask type (returned by things like eq and gt)
+  template<>
+  struct simd8<bool>: base_u8<bool> {
+    typedef uint16_t bitmask_t;
+    typedef uint32_t bitmask2_t;
+
+    static simdjson_inline simd8<bool> splat(bool _value) { return vmovq_n_u8(uint8_t(-(!!_value))); }
+
+    simdjson_inline simd8(const uint8x16_t _value) : base_u8<bool>(_value) {}
+    // False constructor
+    simdjson_inline simd8() : simd8(vdupq_n_u8(0)) {}
+    // Splat constructor
+    simdjson_inline simd8(bool _value) : simd8(splat(_value)) {}
+
+    // We return uint32_t instead of uint16_t because that seems to be more efficient for most
+    // purposes (cutting it down to uint16_t costs performance in some compilers).
+    simdjson_inline uint32_t to_bitmask() const {
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+      const uint8x16_t bit_mask =  make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
+                                                   0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80);
+#else
+      const uint8x16_t bit_mask =  {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
+                                    0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80};
+#endif
+      auto minput = *this & bit_mask;
+      uint8x16_t tmp = vpaddq_u8(minput, minput);
+      tmp = vpaddq_u8(tmp, tmp);
+      tmp = vpaddq_u8(tmp, tmp);
+      return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0);
+    }
+    simdjson_inline bool any() const { return vmaxvq_u8(*this) != 0; }
+  };
+
+  // Unsigned bytes
+  template<>
+  struct simd8<uint8_t>: base_u8<uint8_t> {
+    static simdjson_inline uint8x16_t splat(uint8_t _value) { return vmovq_n_u8(_value); }
+    static simdjson_inline uint8x16_t zero() { return vdupq_n_u8(0); }
+    static simdjson_inline uint8x16_t load(const uint8_t* values) { return vld1q_u8(values); }
+
+    simdjson_inline simd8(const uint8x16_t _value) : base_u8<uint8_t>(_value) {}
+    // Zero constructor
+    simdjson_inline simd8() : simd8(zero()) {}
+    // Array constructor
+    simdjson_inline simd8(const uint8_t values[16]) : simd8(load(values)) {}
+    // Splat constructor
+    simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {}
+    // Member-by-member initialization
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+    simdjson_inline simd8(
+      uint8_t v0,  uint8_t v1,  uint8_t v2,  uint8_t v3,  uint8_t v4,  uint8_t v5,  uint8_t v6,  uint8_t v7,
+      uint8_t v8,  uint8_t v9,  uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15
+    ) : simd8(make_uint8x16_t(
+      v0, v1, v2, v3, v4, v5, v6, v7,
+      v8, v9, v10,v11,v12,v13,v14,v15
+    )) {}
+#else
+    simdjson_inline simd8(
+      uint8_t v0,  uint8_t v1,  uint8_t v2,  uint8_t v3,  uint8_t v4,  uint8_t v5,  uint8_t v6,  uint8_t v7,
+      uint8_t v8,  uint8_t v9,  uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15
+    ) : simd8(uint8x16_t{
+      v0, v1, v2, v3, v4, v5, v6, v7,
+      v8, v9, v10,v11,v12,v13,v14,v15
+    }) {}
+#endif
+
+    // Repeat 16 values as many times as necessary (usually for lookup tables)
+    simdjson_inline static simd8<uint8_t> repeat_16(
+      uint8_t v0,  uint8_t v1,  uint8_t v2,  uint8_t v3,  uint8_t v4,  uint8_t v5,  uint8_t v6,  uint8_t v7,
+      uint8_t v8,  uint8_t v9,  uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15
+    ) {
+      return simd8<uint8_t>(
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15
+      );
+    }
+
+    // Store to array
+    simdjson_inline void store(uint8_t dst[16]) const { return vst1q_u8(dst, *this); }
+
+    // Saturated math
+    simdjson_inline simd8<uint8_t> saturating_add(const simd8<uint8_t> other) const { return vqaddq_u8(*this, other); }
+    simdjson_inline simd8<uint8_t> saturating_sub(const simd8<uint8_t> other) const { return vqsubq_u8(*this, other); }
+
+    // Addition/subtraction are the same for signed and unsigned
+    simdjson_inline simd8<uint8_t> operator+(const simd8<uint8_t> other) const { return vaddq_u8(*this, other); }
+    simdjson_inline simd8<uint8_t> operator-(const simd8<uint8_t> other) const { return vsubq_u8(*this, other); }
+    simdjson_inline simd8<uint8_t>& operator+=(const simd8<uint8_t> other) { *this = *this + other; return *this; }
+    simdjson_inline simd8<uint8_t>& operator-=(const simd8<uint8_t> other) { *this = *this - other; return *this; }
+
+    // Order-specific operations
+    simdjson_inline uint8_t max_val() const { return vmaxvq_u8(*this); }
+    simdjson_inline uint8_t min_val() const { return vminvq_u8(*this); }
+    simdjson_inline simd8<uint8_t> max_val(const simd8<uint8_t> other) const { return vmaxq_u8(*this, other); }
+    simdjson_inline simd8<uint8_t> min_val(const simd8<uint8_t> other) const { return vminq_u8(*this, other); }
+    simdjson_inline simd8<bool> operator<=(const simd8<uint8_t> other) const { return vcleq_u8(*this, other); }
+    simdjson_inline simd8<bool> operator>=(const simd8<uint8_t> other) const { return vcgeq_u8(*this, other); }
+    simdjson_inline simd8<bool> operator<(const simd8<uint8_t> other) const { return vcltq_u8(*this, other); }
+    simdjson_inline simd8<bool> operator>(const simd8<uint8_t> other) const { return vcgtq_u8(*this, other); }
+    // Same as >, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's.
+    simdjson_inline simd8<uint8_t> gt_bits(const simd8<uint8_t> other) const { return simd8<uint8_t>(*this > other); }
+    // Same as <, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's.
+    simdjson_inline simd8<uint8_t> lt_bits(const simd8<uint8_t> other) const { return simd8<uint8_t>(*this < other); }
+
+    // Bit-specific operations
+    simdjson_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const { return vtstq_u8(*this, bits); }
+    simdjson_inline bool any_bits_set_anywhere() const { return this->max_val() != 0; }
+    simdjson_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const { return (*this & bits).any_bits_set_anywhere(); }
+    template<int N>
+    simdjson_inline simd8<uint8_t> shr() const { return vshrq_n_u8(*this, N); }
+    template<int N>
+    simdjson_inline simd8<uint8_t> shl() const { return vshlq_n_u8(*this, N); }
+
+    // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values)
+    template<typename L>
+    simdjson_inline simd8<L> lookup_16(simd8<L> lookup_table) const {
+      return lookup_table.apply_lookup_16_to(*this);
+    }
+
+
+    // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset).
+    // Passing a 0 value for mask would be equivalent to writing out every byte to output.
+    // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes
+    // get written.
+    // Design consideration: it seems like a function with the
+    // signature simd8<L> compress(uint16_t mask) would be
+    // sensible, but the AVX ISA makes this kind of approach difficult.
+    template<typename L>
+    simdjson_inline void compress(uint16_t mask, L * output) const {
+      using internal::thintable_epi8;
+      using internal::BitsSetTable256mul2;
+      using internal::pshufb_combine_table;
+      // this particular implementation was inspired by work done by @animetosho
+      // we do it in two steps, first 8 bytes and then second 8 bytes
+      uint8_t mask1 = uint8_t(mask); // least significant 8 bits
+      uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits
+      // next line just loads the 64-bit values thintable_epi8[mask1] and
+      // thintable_epi8[mask2] into a 128-bit register, using only
+      // two instructions on most compilers.
+      uint64x2_t shufmask64 = {thintable_epi8[mask1], thintable_epi8[mask2]};
+      uint8x16_t shufmask = vreinterpretq_u8_u64(shufmask64);
+      // we increment by 0x08 the second half of the mask
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+      uint8x16_t inc = make_uint8x16_t(0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08);
+#else
+      uint8x16_t inc = {0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08};
+#endif
+      shufmask = vaddq_u8(shufmask, inc);
+      // this is the version "nearly pruned"
+      uint8x16_t pruned = vqtbl1q_u8(*this, shufmask);
+      // we still need to put the two halves together.
+      // we compute the popcount of the first half:
+      int pop1 = BitsSetTable256mul2[mask1];
+      // then load the corresponding mask, what it does is to write
+      // only the first pop1 bytes from the first 8 bytes, and then
+      // it fills in with the bytes from the second 8 bytes + some filling
+      // at the end.
+      uint8x16_t compactmask = vld1q_u8(reinterpret_cast<const uint8_t *>(pshufb_combine_table + pop1 * 8));
+      uint8x16_t answer = vqtbl1q_u8(pruned, compactmask);
+      vst1q_u8(reinterpret_cast<uint8_t*>(output), answer);
+    }
+
+    // Copies all bytes corresponding to a 0 in the low half of the mask (interpreted as a
+    // bitset) to output1, then those corresponding to a 0 in the high half to output2.
+    template<typename L>
+    simdjson_inline void compress_halves(uint16_t mask, L *output1, L *output2) const {
+      using internal::thintable_epi8;
+      uint8_t mask1 = uint8_t(mask); // least significant 8 bits
+      uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits
+      uint8x8_t compactmask1 = vcreate_u8(thintable_epi8[mask1]);
+      uint8x8_t compactmask2 = vcreate_u8(thintable_epi8[mask2]);
+      // we increment by 0x08 the second half of the mask
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+      uint8x8_t inc = make_uint8x8_t(0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08);
+#else
+      uint8x8_t inc = {0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08};
+#endif
+      compactmask2 = vadd_u8(compactmask2, inc);
+      // store each result (with the second store possibly overlapping the first)
+      vst1_u8((uint8_t*)output1, vqtbl1_u8(*this, compactmask1));
+      vst1_u8((uint8_t*)output2, vqtbl1_u8(*this, compactmask2));
+    }
+
+    template<typename L>
+    simdjson_inline simd8<L> lookup_16(
+        L replace0,  L replace1,  L replace2,  L replace3,
+        L replace4,  L replace5,  L replace6,  L replace7,
+        L replace8,  L replace9,  L replace10, L replace11,
+        L replace12, L replace13, L replace14, L replace15) const {
+      return lookup_16(simd8<L>::repeat_16(
+        replace0,  replace1,  replace2,  replace3,
+        replace4,  replace5,  replace6,  replace7,
+        replace8,  replace9,  replace10, replace11,
+        replace12, replace13, replace14, replace15
+      ));
+    }
+
+    template<typename T>
+    simdjson_inline simd8<uint8_t> apply_lookup_16_to(const simd8<T> original) {
+      return vqtbl1q_u8(*this, simd8<uint8_t>(original));
+    }
+  };
+
+  // Signed bytes
+  template<>
+  struct simd8<int8_t> {
+    int8x16_t value;
+
+    static simdjson_inline simd8<int8_t> splat(int8_t _value) { return vmovq_n_s8(_value); }
+    static simdjson_inline simd8<int8_t> zero() { return vdupq_n_s8(0); }
+    static simdjson_inline simd8<int8_t> load(const int8_t values[16]) { return vld1q_s8(values); }
+
+    // Conversion from/to SIMD register
+    simdjson_inline simd8(const int8x16_t _value) : value{_value} {}
+    simdjson_inline operator const int8x16_t&() const { return this->value; }
+    simdjson_inline operator int8x16_t&() { return this->value; }
+
+    // Zero constructor
+    simdjson_inline simd8() : simd8(zero()) {}
+    // Splat constructor
+    simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {}
+    // Array constructor
+    simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {}
+    // Member-by-member initialization
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+    simdjson_inline simd8(
+      int8_t v0,  int8_t v1,  int8_t v2,  int8_t v3, int8_t v4,  int8_t v5,  int8_t v6,  int8_t v7,
+      int8_t v8,  int8_t v9,  int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15
+    ) : simd8(make_int8x16_t(
+      v0, v1, v2, v3, v4, v5, v6, v7,
+      v8, v9, v10,v11,v12,v13,v14,v15
+    )) {}
+#else
+    simdjson_inline simd8(
+      int8_t v0,  int8_t v1,  int8_t v2,  int8_t v3, int8_t v4,  int8_t v5,  int8_t v6,  int8_t v7,
+      int8_t v8,  int8_t v9,  int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15
+    ) : simd8(int8x16_t{
+      v0, v1, v2, v3, v4, v5, v6, v7,
+      v8, v9, v10,v11,v12,v13,v14,v15
+    }) {}
+#endif
+    // Repeat 16 values as many times as necessary (usually for lookup tables)
+    simdjson_inline static simd8<int8_t> repeat_16(
+      int8_t v0,  int8_t v1,  int8_t v2,  int8_t v3,  int8_t v4,  int8_t v5,  int8_t v6,  int8_t v7,
+      int8_t v8,  int8_t v9,  int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15
+    ) {
+      return simd8<int8_t>(
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15
+      );
+    }
+
+    // Store to array
+    simdjson_inline void store(int8_t dst[16]) const { return vst1q_s8(dst, *this); }
+
+    // Explicit conversion to/from unsigned
+    //
+    // Under Visual Studio/ARM64 uint8x16_t and int8x16_t are apparently the same type.
+    // In theory, we could check this occurrence with std::same_as and std::enabled_if but it is C++14
+    // and relatively ugly and hard to read.
+#ifndef SIMDJSON_REGULAR_VISUAL_STUDIO
+    simdjson_inline explicit simd8(const uint8x16_t other): simd8(vreinterpretq_s8_u8(other)) {}
+#endif
+    simdjson_inline explicit operator simd8<uint8_t>() const { return vreinterpretq_u8_s8(this->value); }
+
+    // Math
+    simdjson_inline simd8<int8_t> operator+(const simd8<int8_t> other) const { return vaddq_s8(*this, other); }
+    simdjson_inline simd8<int8_t> operator-(const simd8<int8_t> other) const { return vsubq_s8(*this, other); }
+    simdjson_inline simd8<int8_t>& operator+=(const simd8<int8_t> other) { *this = *this + other; return *this; }
+    simdjson_inline simd8<int8_t>& operator-=(const simd8<int8_t> other) { *this = *this - other; return *this; }
+
+    // Order-sensitive comparisons
+    simdjson_inline simd8<int8_t> max_val(const simd8<int8_t> other) const { return vmaxq_s8(*this, other); }
+    simdjson_inline simd8<int8_t> min_val(const simd8<int8_t> other) const { return vminq_s8(*this, other); }
+    simdjson_inline simd8<bool> operator>(const simd8<int8_t> other) const { return vcgtq_s8(*this, other); }
+    simdjson_inline simd8<bool> operator<(const simd8<int8_t> other) const { return vcltq_s8(*this, other); }
+    simdjson_inline simd8<bool> operator==(const simd8<int8_t> other) const { return vceqq_s8(*this, other); }
+
+    template<int N=1>
+    simdjson_inline simd8<int8_t> prev(const simd8<int8_t> prev_chunk) const {
+      return vextq_s8(prev_chunk, *this, 16 - N);
+    }
+
+    // Perform a lookup assuming no value is larger than 16
+    template<typename L>
+    simdjson_inline simd8<L> lookup_16(simd8<L> lookup_table) const {
+      return lookup_table.apply_lookup_16_to(*this);
+    }
+    template<typename L>
+    simdjson_inline simd8<L> lookup_16(
+        L replace0,  L replace1,  L replace2,  L replace3,
+        L replace4,  L replace5,  L replace6,  L replace7,
+        L replace8,  L replace9,  L replace10, L replace11,
+        L replace12, L replace13, L replace14, L replace15) const {
+      return lookup_16(simd8<L>::repeat_16(
+        replace0,  replace1,  replace2,  replace3,
+        replace4,  replace5,  replace6,  replace7,
+        replace8,  replace9,  replace10, replace11,
+        replace12, replace13, replace14, replace15
+      ));
+    }
+
+    template<typename T>
+    simdjson_inline simd8<int8_t> apply_lookup_16_to(const simd8<T> original) {
+      return vqtbl1q_s8(*this, simd8<uint8_t>(original));
+    }
+  };
+
+  template<typename T>
+  struct simd8x64 {
+    static constexpr int NUM_CHUNKS = 64 / sizeof(simd8<T>);
+    static_assert(NUM_CHUNKS == 4, "ARM kernel should use four registers per 64-byte block.");
+    const simd8<T> chunks[NUM_CHUNKS];
+
+    simd8x64(const simd8x64<T>& o) = delete; // no copy allowed
+    simd8x64<T>& operator=(const simd8<T>& other) = delete; // no assignment allowed
+    simd8x64() = delete; // no default constructor allowed
+
+    simdjson_inline simd8x64(const simd8<T> chunk0, const simd8<T> chunk1, const simd8<T> chunk2, const simd8<T> chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {}
+    simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8<T>::load(ptr), simd8<T>::load(ptr+16), simd8<T>::load(ptr+32), simd8<T>::load(ptr+48)} {}
+
+    simdjson_inline void store(T ptr[64]) const {
+      this->chunks[0].store(ptr+sizeof(simd8<T>)*0);
+      this->chunks[1].store(ptr+sizeof(simd8<T>)*1);
+      this->chunks[2].store(ptr+sizeof(simd8<T>)*2);
+      this->chunks[3].store(ptr+sizeof(simd8<T>)*3);
+    }
+
+    simdjson_inline simd8<T> reduce_or() const {
+      return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]);
+    }
+
+
+    simdjson_inline uint64_t compress(uint64_t mask, T * output) const {
+      uint64_t popcounts = vget_lane_u64(vreinterpret_u64_u8(vcnt_u8(vcreate_u8(~mask))), 0);
+      // compute the prefix sum of the popcounts of each byte
+      uint64_t offsets = popcounts * 0x0101010101010101;
+      this->chunks[0].compress_halves(uint16_t(mask), output, &output[popcounts & 0xFF]);
+      this->chunks[1].compress_halves(uint16_t(mask >> 16), &output[(offsets >> 8) & 0xFF], &output[(offsets >> 16) & 0xFF]);
+      this->chunks[2].compress_halves(uint16_t(mask >> 32), &output[(offsets >> 24) & 0xFF], &output[(offsets >> 32) & 0xFF]);
+      this->chunks[3].compress_halves(uint16_t(mask >> 48), &output[(offsets >> 40) & 0xFF], &output[(offsets >> 48) & 0xFF]);
+      return offsets >> 56;
+    }
+
+    simdjson_inline uint64_t to_bitmask() const {
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+      const uint8x16_t bit_mask = make_uint8x16_t(
+        0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
+        0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
+      );
+#else
+      const uint8x16_t bit_mask = {
+        0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
+        0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
+      };
+#endif
+      // Add each of the elements next to each other, successively, to stuff each 8 byte mask into one.
+      uint8x16_t sum0 = vpaddq_u8(this->chunks[0] & bit_mask, this->chunks[1] & bit_mask);
+      uint8x16_t sum1 = vpaddq_u8(this->chunks[2] & bit_mask, this->chunks[3] & bit_mask);
+      sum0 = vpaddq_u8(sum0, sum1);
+      sum0 = vpaddq_u8(sum0, sum0);
+      return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0);
+    }
+
+    simdjson_inline uint64_t eq(const T m) const {
+      const simd8<T> mask = simd8<T>::splat(m);
+      return  simd8x64<bool>(
+        this->chunks[0] == mask,
+        this->chunks[1] == mask,
+        this->chunks[2] == mask,
+        this->chunks[3] == mask
+      ).to_bitmask();
+    }
+
+    simdjson_inline uint64_t lteq(const T m) const {
+      const simd8<T> mask = simd8<T>::splat(m);
+      return  simd8x64<bool>(
+        this->chunks[0] <= mask,
+        this->chunks[1] <= mask,
+        this->chunks[2] <= mask,
+        this->chunks[3] <= mask
+      ).to_bitmask();
+    }
+  }; // struct simd8x64<T>
+
+} // namespace simd
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdjson
+
+#endif // SIMDJSON_ARM64_SIMD_H
+/* end file include/simdjson/arm64/simd.h */
+/* begin file include/simdjson/generic/jsoncharutils.h */
+
+namespace simdjson {
+namespace arm64 {
+namespace {
+namespace jsoncharutils {
+
+// return non-zero if not a structural or whitespace char
+// zero otherwise
+simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) {
+  return internal::structural_or_whitespace_negated[c];
+}
+
+simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) {
+  return internal::structural_or_whitespace[c];
+}
+
+// returns a value with the high 16 bits set if not valid
+// otherwise returns the conversion of the 4 hex digits at src into the bottom
+// 16 bits of the 32-bit return register
+//
+// see
+// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/
+static inline uint32_t hex_to_u32_nocheck(
+    const uint8_t *src) { // strictly speaking, static inline is a C-ism
+  uint32_t v1 = internal::digit_to_val32[630 + src[0]];
+  uint32_t v2 = internal::digit_to_val32[420 + src[1]];
+  uint32_t v3 = internal::digit_to_val32[210 + src[2]];
+  uint32_t v4 = internal::digit_to_val32[0 + src[3]];
+  return v1 | v2 | v3 | v4;
+}
+
+// given a code point cp, writes to c
+// the utf-8 code, outputting the length in
+// bytes, if the length is zero, the code point
+// is invalid
+//
+// This can possibly be made faster using pdep
+// and clz and table lookups, but JSON documents
+// have few escaped code points, and the following
+// function looks cheap.
+//
+// Note: we assume that surrogates are treated separately
+//
+simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) {
+  if (cp <= 0x7F) {
+    c[0] = uint8_t(cp);
+    return 1; // ascii
+  }
+  if (cp <= 0x7FF) {
+    c[0] = uint8_t((cp >> 6) + 192);
+    c[1] = uint8_t((cp & 63) + 128);
+    return 2; // universal plane
+    //  Surrogates are treated elsewhere...
+    //} //else if (0xd800 <= cp && cp <= 0xdfff) {
+    //  return 0; // surrogates // could put assert here
+  } else if (cp <= 0xFFFF) {
+    c[0] = uint8_t((cp >> 12) + 224);
+    c[1] = uint8_t(((cp >> 6) & 63) + 128);
+    c[2] = uint8_t((cp & 63) + 128);
+    return 3;
+  } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this
+                               // is not needed
+    c[0] = uint8_t((cp >> 18) + 240);
+    c[1] = uint8_t(((cp >> 12) & 63) + 128);
+    c[2] = uint8_t(((cp >> 6) & 63) + 128);
+    c[3] = uint8_t((cp & 63) + 128);
+    return 4;
+  }
+  // will return 0 when the code point was too large.
+  return 0; // bad r
+}
+
+#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm
+// this is a slow emulation routine for 32-bit
+//
+static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) {
+  return x * (uint64_t)y;
+}
+static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) {
+  uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd);
+  uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd);
+  uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32));
+  uint64_t adbc_carry = !!(adbc < ad);
+  uint64_t lo = bd + (adbc << 32);
+  *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) +
+        (adbc_carry << 32) + !!(lo < bd);
+  return lo;
+}
+#endif
+
+using internal::value128;
+
+simdjson_inline value128 full_multiplication(uint64_t value1, uint64_t value2) {
+  value128 answer;
+#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS)
+#ifdef _M_ARM64
+  // ARM64 has native support for 64-bit multiplications, no need to emultate
+  answer.high = __umulh(value1, value2);
+  answer.low = value1 * value2;
+#else
+  answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64
+#endif // _M_ARM64
+#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS)
+  __uint128_t r = (static_cast<__uint128_t>(value1)) * value2;
+  answer.low = uint64_t(r);
+  answer.high = uint64_t(r >> 64);
+#endif
+  return answer;
+}
+
+} // namespace jsoncharutils
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdjson
+/* end file include/simdjson/generic/jsoncharutils.h */
+/* begin file include/simdjson/generic/atomparsing.h */
+namespace simdjson {
+namespace arm64 {
+namespace {
+/// @private
+namespace atomparsing {
+
+// The string_to_uint32 is exclusively used to map literal strings to 32-bit values.
+// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot
+// be certain that the character pointer will be properly aligned.
+// You might think that using memcpy makes this function expensive, but you'd be wrong.
+// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false");
+// to the compile-time constant 1936482662.
+simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; }
+
+
+// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive.
+// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about.
+simdjson_warn_unused
+simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) {
+  uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++)
+  static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes");
+  std::memcpy(&srcval, src, sizeof(uint32_t));
+  return srcval ^ string_to_uint32(atom);
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_true_atom(const uint8_t *src) {
+  return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0;
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) {
+  if (len > 4) { return is_valid_true_atom(src); }
+  else if (len == 4) { return !str4ncmp(src, "true"); }
+  else { return false; }
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_false_atom(const uint8_t *src) {
+  return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0;
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) {
+  if (len > 5) { return is_valid_false_atom(src); }
+  else if (len == 5) { return !str4ncmp(src+1, "alse"); }
+  else { return false; }
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_null_atom(const uint8_t *src) {
+  return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0;
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) {
+  if (len > 4) { return is_valid_null_atom(src); }
+  else if (len == 4) { return !str4ncmp(src, "null"); }
+  else { return false; }
+}
+
+} // namespace atomparsing
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdjson
+/* end file include/simdjson/generic/atomparsing.h */
+/* begin file include/simdjson/arm64/stringparsing.h */
+#ifndef SIMDJSON_ARM64_STRINGPARSING_H
+#define SIMDJSON_ARM64_STRINGPARSING_H
+
+
+namespace simdjson {
+namespace arm64 {
+namespace {
+
+using namespace simd;
+
+// Holds backslashes and quotes locations.
+struct backslash_and_quote {
+public:
+  static constexpr uint32_t BYTES_PROCESSED = 32;
+  simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst);
+
+  simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; }
+  simdjson_inline bool has_backslash() { return bs_bits != 0; }
+  simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); }
+  simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); }
+
+  uint32_t bs_bits;
+  uint32_t quote_bits;
+}; // struct backslash_and_quote
+
+simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) {
+  // this can read up to 31 bytes beyond the buffer size, but we require
+  // SIMDJSON_PADDING of padding
+  static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes");
+  simd8<uint8_t> v0(src);
+  simd8<uint8_t> v1(src + sizeof(v0));
+  v0.store(dst);
+  v1.store(dst + sizeof(v0));
+
+  // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on ARM; therefore, we
+  // smash them together into a 64-byte mask and get the bitmask from there.
+  uint64_t bs_and_quote = simd8x64<bool>(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask();
+  return {
+    uint32_t(bs_and_quote),      // bs_bits
+    uint32_t(bs_and_quote >> 32) // quote_bits
+  };
+}
+
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdjson
+
+#endif // SIMDJSON_ARM64_STRINGPARSING_H
+/* end file include/simdjson/arm64/stringparsing.h */
+/* begin file include/simdjson/arm64/numberparsing.h */
+#ifndef SIMDJSON_ARM64_NUMBERPARSING_H
+#define SIMDJSON_ARM64_NUMBERPARSING_H
+
+namespace simdjson {
+namespace arm64 {
+namespace {
+
+// we don't have SSE, so let us use a scalar function
+// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
+static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) {
+  uint64_t val;
+  std::memcpy(&val, chars, sizeof(uint64_t));
+  val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8;
+  val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16;
+  return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32);
+}
+
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdjson
+
+#define SIMDJSON_SWAR_NUMBER_PARSING 1
+
+/* begin file include/simdjson/generic/numberparsing.h */
+#include <limits>
+
+namespace simdjson {
+namespace arm64 {
+
+namespace ondemand {
+/**
+ * The type of a JSON number
+ */
+enum class number_type {
+    floating_point_number=1, /// a binary64 number
+    signed_integer,          /// a signed integer that fits in a 64-bit word using two's complement
+    unsigned_integer         /// a positive integer larger or equal to 1<<63
+};
+}
+
+namespace {
+/// @private
+namespace numberparsing {
+
+
+
+#ifdef JSON_TEST_NUMBERS
+#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR)
+#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE)))
+#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE)))
+#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE)))
+#else
+#define INVALID_NUMBER(SRC) (NUMBER_ERROR)
+#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE))
+#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE))
+#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE))
+#endif
+
+namespace {
+// Convert a mantissa, an exponent and a sign bit into an ieee64 double.
+// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable).
+// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed.
+simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) {
+    double d;
+    mantissa &= ~(1ULL << 52);
+    mantissa |= real_exponent << 52;
+    mantissa |= ((static_cast<uint64_t>(negative)) << 63);
+    std::memcpy(&d, &mantissa, sizeof(d));
+    return d;
+}
+}
+// Attempts to compute i * 10^(power) exactly; and if "negative" is
+// true, negate the result.
+// This function will only work in some cases, when it does not work, success is
+// set to false. This should work *most of the time* (like 99% of the time).
+// We assume that power is in the [smallest_power,
+// largest_power] interval: the caller is responsible for this check.
+simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) {
+  // we start with a fast path
+  // It was described in
+  // Clinger WD. How to read floating point numbers accurately.
+  // ACM SIGPLAN Notices. 1990
+#ifndef FLT_EVAL_METHOD
+#error "FLT_EVAL_METHOD should be defined, please include cfloat."
+#endif
+#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
+  // We cannot be certain that x/y is rounded to nearest.
+  if (0 <= power && power <= 22 && i <= 9007199254740991) {
+#else
+  if (-22 <= power && power <= 22 && i <= 9007199254740991) {
+#endif
+    // convert the integer into a double. This is lossless since
+    // 0 <= i <= 2^53 - 1.
+    d = double(i);
+    //
+    // The general idea is as follows.
+    // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then
+    // 1) Both s and p can be represented exactly as 64-bit floating-point
+    // values
+    // (binary64).
+    // 2) Because s and p can be represented exactly as floating-point values,
+    // then s * p
+    // and s / p will produce correctly rounded values.
+    //
+    if (power < 0) {
+      d = d / simdjson::internal::power_of_ten[-power];
+    } else {
+      d = d * simdjson::internal::power_of_ten[power];
+    }
+    if (negative) {
+      d = -d;
+    }
+    return true;
+  }
+  // When 22 < power && power <  22 + 16, we could
+  // hope for another, secondary fast path.  It was
+  // described by David M. Gay in  "Correctly rounded
+  // binary-decimal and decimal-binary conversions." (1990)
+  // If you need to compute i * 10^(22 + x) for x < 16,
+  // first compute i * 10^x, if you know that result is exact
+  // (e.g., when i * 10^x < 2^53),
+  // then you can still proceed and do (i * 10^x) * 10^22.
+  // Is this worth your time?
+  // You need  22 < power *and* power <  22 + 16 *and* (i * 10^(x-22) < 2^53)
+  // for this second fast path to work.
+  // If you you have 22 < power *and* power <  22 + 16, and then you
+  // optimistically compute "i * 10^(x-22)", there is still a chance that you
+  // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of
+  // this optimization maybe less common than we would like. Source:
+  // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/
+  // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html
+
+  // The fast path has now failed, so we are failing back on the slower path.
+
+  // In the slow path, we need to adjust i so that it is > 1<<63 which is always
+  // possible, except if i == 0, so we handle i == 0 separately.
+  if(i == 0) {
+    d = negative ? -0.0 : 0.0;
+    return true;
+  }
+
+
+  // The exponent is 1024 + 63 + power
+  //     + floor(log(5**power)/log(2)).
+  // The 1024 comes from the ieee64 standard.
+  // The 63 comes from the fact that we use a 64-bit word.
+  //
+  // Computing floor(log(5**power)/log(2)) could be
+  // slow. Instead we use a fast function.
+  //
+  // For power in (-400,350), we have that
+  // (((152170 + 65536) * power ) >> 16);
+  // is equal to
+  //  floor(log(5**power)/log(2)) + power when power >= 0
+  // and it is equal to
+  //  ceil(log(5**-power)/log(2)) + power when power < 0
+  //
+  // The 65536 is (1<<16) and corresponds to
+  // (65536 * power) >> 16 ---> power
+  //
+  // ((152170 * power ) >> 16) is equal to
+  // floor(log(5**power)/log(2))
+  //
+  // Note that this is not magic: 152170/(1<<16) is
+  // approximatively equal to log(5)/log(2).
+  // The 1<<16 value is a power of two; we could use a
+  // larger power of 2 if we wanted to.
+  //
+  int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63;
+
+
+  // We want the most significant bit of i to be 1. Shift if needed.
+  int lz = leading_zeroes(i);
+  i <<= lz;
+
+
+  // We are going to need to do some 64-bit arithmetic to get a precise product.
+  // We use a table lookup approach.
+  // It is safe because
+  // power >= smallest_power
+  // and power <= largest_power
+  // We recover the mantissa of the power, it has a leading 1. It is always
+  // rounded down.
+  //
+  // We want the most significant 64 bits of the product. We know
+  // this will be non-zero because the most significant bit of i is
+  // 1.
+  const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power);
+  // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.)
+  //
+  // The full_multiplication function computes the 128-bit product of two 64-bit words
+  // with a returned value of type value128 with a "low component" corresponding to the
+  // 64-bit least significant bits of the product and with a "high component" corresponding
+  // to the 64-bit most significant bits of the product.
+  simdjson::internal::value128 firstproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index]);
+  // Both i and power_of_five_128[index] have their most significant bit set to 1 which
+  // implies that the either the most or the second most significant bit of the product
+  // is 1. We pack values in this manner for efficiency reasons: it maximizes the use
+  // we make of the product. It also makes it easy to reason about the product: there
+  // is 0 or 1 leading zero in the product.
+
+  // Unless the least significant 9 bits of the high (64-bit) part of the full
+  // product are all 1s, then we know that the most significant 55 bits are
+  // exact and no further work is needed. Having 55 bits is necessary because
+  // we need 53 bits for the mantissa but we have to have one rounding bit and
+  // we can waste a bit if the most significant bit of the product is zero.
+  if((firstproduct.high & 0x1FF) == 0x1FF) {
+    // We want to compute i * 5^q, but only care about the top 55 bits at most.
+    // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing
+    // the full computation is wasteful. So we do what is called a "truncated
+    // multiplication".
+    // We take the most significant 64-bits, and we put them in
+    // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q
+    // to the desired approximation using one multiplication. Sometimes it does not suffice.
+    // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and
+    // then we get a better approximation to i * 5^q. In very rare cases, even that
+    // will not suffice, though it is seemingly very hard to find such a scenario.
+    //
+    // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat
+    // more complicated.
+    //
+    // There is an extra layer of complexity in that we need more than 55 bits of
+    // accuracy in the round-to-even scenario.
+    //
+    // The full_multiplication function computes the 128-bit product of two 64-bit words
+    // with a returned value of type value128 with a "low component" corresponding to the
+    // 64-bit least significant bits of the product and with a "high component" corresponding
+    // to the 64-bit most significant bits of the product.
+    simdjson::internal::value128 secondproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]);
+    firstproduct.low += secondproduct.high;
+    if(secondproduct.high > firstproduct.low) { firstproduct.high++; }
+    // At this point, we might need to add at most one to firstproduct, but this
+    // can only change the value of firstproduct.high if firstproduct.low is maximal.
+    if(simdjson_unlikely(firstproduct.low  == 0xFFFFFFFFFFFFFFFF)) {
+      // This is very unlikely, but if so, we need to do much more work!
+      return false;
+    }
+  }
+  uint64_t lower = firstproduct.low;
+  uint64_t upper = firstproduct.high;
+  // The final mantissa should be 53 bits with a leading 1.
+  // We shift it so that it occupies 54 bits with a leading 1.
+  ///////
+  uint64_t upperbit = upper >> 63;
+  uint64_t mantissa = upper >> (upperbit + 9);
+  lz += int(1 ^ upperbit);
+
+  // Here we have mantissa < (1<<54).
+  int64_t real_exponent = exponent - lz;
+  if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal?
+    // Here have that real_exponent <= 0 so -real_exponent >= 0
+    if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure.
+      d = negative ? -0.0 : 0.0;
+      return true;
+    }
+    // next line is safe because -real_exponent + 1 < 0
+    mantissa >>= -real_exponent + 1;
+    // Thankfully, we can't have both "round-to-even" and subnormals because
+    // "round-to-even" only occurs for powers close to 0.
+    mantissa += (mantissa & 1); // round up
+    mantissa >>= 1;
+    // There is a weird scenario where we don't have a subnormal but just.
+    // Suppose we start with 2.2250738585072013e-308, we end up
+    // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal
+    // whereas 0x40000000000000 x 2^-1023-53  is normal. Now, we need to round
+    // up 0x3fffffffffffff x 2^-1023-53  and once we do, we are no longer
+    // subnormal, but we can only know this after rounding.
+    // So we only declare a subnormal if we are smaller than the threshold.
+    real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1;
+    d = to_double(mantissa, real_exponent, negative);
+    return true;
+  }
+  // We have to round to even. The "to even" part
+  // is only a problem when we are right in between two floats
+  // which we guard against.
+  // If we have lots of trailing zeros, we may fall right between two
+  // floating-point values.
+  //
+  // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54]
+  // times a power of two. That is, it is right between a number with binary significand
+  // m and another number with binary significand m+1; and it must be the case
+  // that it cannot be represented by a float itself.
+  //
+  // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p.
+  // Recall that 10^q = 5^q * 2^q.
+  // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that
+  //  5^23 <=  2^54 and it is the last power of five to qualify, so q <= 23.
+  // When q<0, we have  w  >=  (2m+1) x 5^{-q}.  We must have that w<2^{64} so
+  // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have
+  // 2^{53} x 5^{-q} < 2^{64}.
+  // Hence we have 5^{-q} < 2^{11}$ or q>= -4.
+  //
+  // We require lower <= 1 and not lower == 0 because we could not prove that
+  // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test.
+  if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) {
+    if((mantissa  << (upperbit + 64 - 53 - 2)) ==  upper) {
+      mantissa &= ~1;             // flip it so that we do not round up
+    }
+  }
+
+  mantissa += mantissa & 1;
+  mantissa >>= 1;
+
+  // Here we have mantissa < (1<<53), unless there was an overflow
+  if (mantissa >= (1ULL << 53)) {
+    //////////
+    // This will happen when parsing values such as 7.2057594037927933e+16
+    ////////
+    mantissa = (1ULL << 52);
+    real_exponent++;
+  }
+  mantissa &= ~(1ULL << 52);
+  // we have to check that real_exponent is in range, otherwise we bail out
+  if (simdjson_unlikely(real_exponent > 2046)) {
+    // We have an infinite value!!! We could actually throw an error here if we could.
+    return false;
+  }
+  d = to_double(mantissa, real_exponent, negative);
+  return true;
+}
+
+// We call a fallback floating-point parser that might be slow. Note
+// it will accept JSON numbers, but the JSON spec. is more restrictive so
+// before you call parse_float_fallback, you need to have validated the input
+// string with the JSON grammar.
+// It will return an error (false) if the parsed number is infinite.
+// The string parsing itself always succeeds. We know that there is at least
+// one digit.
+static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) {
+  *outDouble = simdjson::internal::from_chars(reinterpret_cast<const char *>(ptr));
+  // We do not accept infinite values.
+
+  // Detecting finite values in a portable manner is ridiculously hard, ideally
+  // we would want to do:
+  // return !std::isfinite(*outDouble);
+  // but that mysteriously fails under legacy/old libc++ libraries, see
+  // https://github.com/simdjson/simdjson/issues/1286
+  //
+  // Therefore, fall back to this solution (the extra parens are there
+  // to handle that max may be a macro on windows).
+  return !(*outDouble > (std::numeric_limits<double>::max)() || *outDouble < std::numeric_limits<double>::lowest());
+}
+static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) {
+  *outDouble = simdjson::internal::from_chars(reinterpret_cast<const char *>(ptr), reinterpret_cast<const char *>(end_ptr));
+  // We do not accept infinite values.
+
+  // Detecting finite values in a portable manner is ridiculously hard, ideally
+  // we would want to do:
+  // return !std::isfinite(*outDouble);
+  // but that mysteriously fails under legacy/old libc++ libraries, see
+  // https://github.com/simdjson/simdjson/issues/1286
+  //
+  // Therefore, fall back to this solution (the extra parens are there
+  // to handle that max may be a macro on windows).
+  return !(*outDouble > (std::numeric_limits<double>::max)() || *outDouble < std::numeric_limits<double>::lowest());
+}
+
+// check quickly whether the next 8 chars are made of digits
+// at a glance, it looks better than Mula's
+// http://0x80.pl/articles/swar-digits-validate.html
+simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) {
+  uint64_t val;
+  // this can read up to 7 bytes beyond the buffer size, but we require
+  // SIMDJSON_PADDING of padding
+  static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7");
+  std::memcpy(&val, chars, 8);
+  // a branchy method might be faster:
+  // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030)
+  //  && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) ==
+  //  0x3030303030303030);
+  return (((val & 0xF0F0F0F0F0F0F0F0) |
+           (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) ==
+          0x3333333333333333);
+}
+
+template<typename W>
+error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) {
+  double d;
+  if (parse_float_fallback(src, &d)) {
+    writer.append_double(d);
+    return SUCCESS;
+  }
+  return INVALID_NUMBER(src);
+}
+
+template<typename I>
+SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later
+simdjson_inline bool parse_digit(const uint8_t c, I &i) {
+  const uint8_t digit = static_cast<uint8_t>(c - '0');
+  if (digit > 9) {
+    return false;
+  }
+  // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication
+  i = 10 * i + digit; // might overflow, we will handle the overflow later
+  return true;
+}
+
+simdjson_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) {
+  // we continue with the fiction that we have an integer. If the
+  // floating point number is representable as x * 10^z for some integer
+  // z that fits in 53 bits, then we will be able to convert back the
+  // the integer into a float in a lossless manner.
+  const uint8_t *const first_after_period = p;
+
+#ifdef SIMDJSON_SWAR_NUMBER_PARSING
+#if SIMDJSON_SWAR_NUMBER_PARSING
+  // this helps if we have lots of decimals!
+  // this turns out to be frequent enough.
+  if (is_made_of_eight_digits_fast(p)) {
+    i = i * 100000000 + parse_eight_digits_unrolled(p);
+    p += 8;
+  }
+#endif // SIMDJSON_SWAR_NUMBER_PARSING
+#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING
+  // Unrolling the first digit makes a small difference on some implementations (e.g. westmere)
+  if (parse_digit(*p, i)) { ++p; }
+  while (parse_digit(*p, i)) { p++; }
+  exponent = first_after_period - p;
+  // Decimal without digits (123.) is illegal
+  if (exponent == 0) {
+    return INVALID_NUMBER(src);
+  }
+  return SUCCESS;
+}
+
+simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) {
+  // Exp Sign: -123.456e[-]78
+  bool neg_exp = ('-' == *p);
+  if (neg_exp || '+' == *p) { p++; } // Skip + as well
+
+  // Exponent: -123.456e-[78]
+  auto start_exp = p;
+  int64_t exp_number = 0;
+  while (parse_digit(*p, exp_number)) { ++p; }
+  // It is possible for parse_digit to overflow.
+  // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN.
+  // Thus we *must* check for possible overflow before we negate exp_number.
+
+  // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into
+  // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may
+  // not oblige and may, in fact, generate two distinct paths in any case. It might be
+  // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off
+  // instructions for a simdjson_likely branch, an unconclusive gain.
+
+  // If there were no digits, it's an error.
+  if (simdjson_unlikely(p == start_exp)) {
+    return INVALID_NUMBER(src);
+  }
+  // We have a valid positive exponent in exp_number at this point, except that
+  // it may have overflowed.
+
+  // If there were more than 18 digits, we may have overflowed the integer. We have to do
+  // something!!!!
+  if (simdjson_unlikely(p > start_exp+18)) {
+    // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow
+    while (*start_exp == '0') { start_exp++; }
+    // 19 digits could overflow int64_t and is kind of absurd anyway. We don't
+    // support exponents smaller than -999,999,999,999,999,999 and bigger
+    // than 999,999,999,999,999,999.
+    // We can truncate.
+    // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before
+    // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could
+    // truncate at 324.
+    // Note that there is no reason to fail per se at this point in time.
+    // E.g., 0e999999999999999999999 is a fine number.
+    if (p > start_exp+18) { exp_number = 999999999999999999; }
+  }
+  // At this point, we know that exp_number is a sane, positive, signed integer.
+  // It is <= 999,999,999,999,999,999. As long as 'exponent' is in
+  // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent'
+  // is bounded in magnitude by the size of the JSON input, we are fine in this universe.
+  // To sum it up: the next line should never overflow.
+  exponent += (neg_exp ? -exp_number : exp_number);
+  return SUCCESS;
+}
+
+simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) {
+  // It is possible that the integer had an overflow.
+  // We have to handle the case where we have 0.0000somenumber.
+  const uint8_t *start = start_digits;
+  while ((*start == '0') || (*start == '.')) { ++start; }
+  // we over-decrement by one when there is a '.'
+  return digit_count - size_t(start - start_digits);
+}
+
+template<typename W>
+simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) {
+  // If we frequently had to deal with long strings of digits,
+  // we could extend our code by using a 128-bit integer instead
+  // of a 64-bit integer. However, this is uncommon in practice.
+  //
+  // 9999999999999999999 < 2**64 so we can accommodate 19 digits.
+  // If we have a decimal separator, then digit_count - 1 is the number of digits, but we
+  // may not have a decimal separator!
+  if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) {
+    // Ok, chances are good that we had an overflow!
+    // this is almost never going to get called!!!
+    // we start anew, going slowly!!!
+    // This will happen in the following examples:
+    // 10000000000000000000000000000000000000000000e+308
+    // 3.1415926535897932384626433832795028841971693993751
+    //
+    // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens
+    // because slow_float_parsing is a non-inlined function. If we passed our writer reference to
+    // it, it would force it to be stored in memory, preventing the compiler from picking it apart
+    // and putting into registers. i.e. if we pass it as reference, it gets slow.
+    // This is what forces the skip_double, as well.
+    error_code error = slow_float_parsing(src, writer);
+    writer.skip_double();
+    return error;
+  }
+  // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other
+  // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331
+  // To future reader: we'd love if someone found a better way, or at least could explain this result!
+  if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) {
+    //
+    // Important: smallest_power is such that it leads to a zero value.
+    // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero
+    // so something x 10^-343 goes to zero, but not so with  something x 10^-342.
+    static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough");
+    //
+    if((exponent < simdjson::internal::smallest_power) || (i == 0)) {
+      // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero
+      WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer);
+      return SUCCESS;
+    } else { // (exponent > largest_power) and (i != 0)
+      // We have, for sure, an infinite value and simdjson refuses to parse infinite values.
+      return INVALID_NUMBER(src);
+    }
+  }
+  double d;
+  if (!compute_float_64(exponent, i, negative, d)) {
+    // we are almost never going to get here.
+    if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); }
+  }
+  WRITE_DOUBLE(d, src, writer);
+  return SUCCESS;
+}
+
+// for performance analysis, it is sometimes  useful to skip parsing
+#ifdef SIMDJSON_SKIPNUMBERPARSING
+
+template<typename W>
+simdjson_inline error_code parse_number(const uint8_t *const, W &writer) {
+  writer.append_s64(0);        // always write zero
+  return SUCCESS;              // always succeeds
+}
+
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer_in_string(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<double> parse_double_in_string(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept  { return false; }
+simdjson_unused simdjson_inline simdjson_result<bool> is_integer(const uint8_t * src) noexcept  { return false; }
+simdjson_unused simdjson_inline simdjson_result<ondemand::number_type> get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; }
+#else
+
+// parse the number at src
+// define JSON_TEST_NUMBERS for unit testing
+//
+// It is assumed that the number is followed by a structural ({,},],[) character
+// or a white space character. If that is not the case (e.g., when the JSON
+// document is made of a single number), then it is necessary to copy the
+// content and append a space before calling this function.
+//
+// Our objective is accurate parsing (ULP of 0) at high speed.
+template<typename W>
+simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) {
+
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  const uint8_t *p = src + uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); }
+
+  //
+  // Handle floats if there is a . or e (or both)
+  //
+  int64_t exponent = 0;
+  bool is_float = false;
+  if ('.' == *p) {
+    is_float = true;
+    ++p;
+    SIMDJSON_TRY( parse_decimal(src, p, i, exponent) );
+    digit_count = int(p - start_digits); // used later to guard against overflows
+  }
+  if (('e' == *p) || ('E' == *p)) {
+    is_float = true;
+    ++p;
+    SIMDJSON_TRY( parse_exponent(src, p, exponent) );
+  }
+  if (is_float) {
+    const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p);
+    SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) );
+    if (dirty_end) { return INVALID_NUMBER(src); }
+    return SUCCESS;
+  }
+
+  // The longest negative 64-bit number is 19 digits.
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  size_t longest_digit_count = negative ? 19 : 20;
+  if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); }
+  if (digit_count == longest_digit_count) {
+    if (negative) {
+      // Anything negative above INT64_MAX+1 is invalid
+      if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src);  }
+      WRITE_INTEGER(~i+1, src, writer);
+      if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); }
+      return SUCCESS;
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    }  else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); }
+  }
+
+  // Write unsigned if it doesn't fit in a signed integer.
+  if (i > uint64_t(INT64_MAX)) {
+    WRITE_UNSIGNED(i, src, writer);
+  } else {
+    WRITE_INTEGER(negative ? (~i+1) : i, src, writer);
+  }
+  if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); }
+  return SUCCESS;
+}
+
+// Inlineable functions
+namespace {
+
+// This table can be used to characterize the final character of an integer
+// string. For JSON structural character and allowable white space characters,
+// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise
+// we return NUMBER_ERROR.
+// Optimization note: we could easily reduce the size of the table by half (to 128)
+// at the cost of an extra branch.
+// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits):
+static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast");
+static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast");
+static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast");
+
+const uint8_t integer_string_finisher[256] = {
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, SUCCESS,
+    SUCCESS,      NUMBER_ERROR,   NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   SUCCESS,      NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, SUCCESS,
+    NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, SUCCESS,        NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    SUCCESS,      NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR};
+
+// Parse any number from 0 to 18,446,744,073,709,551,615
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src) noexcept {
+  const uint8_t *p = src;
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > 20))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+
+  if (digit_count == 20) {
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
+  }
+
+  return i;
+}
+
+
+// Parse any number from 0 to 18,446,744,073,709,551,615
+// Never read at src_end or beyond
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept {
+  const uint8_t *p = src;
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while ((p != src_end) && parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > 20))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+
+  if (digit_count == 20) {
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
+  }
+
+  return i;
+}
+
+// Parse any number from 0 to 18,446,744,073,709,551,615
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned_in_string(const uint8_t * const src) noexcept {
+  const uint8_t *p = src + 1;
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > 20))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if (*p != '"') { return NUMBER_ERROR; }
+
+  if (digit_count == 20) {
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    // Note: we use src[1] and not src[0] because src[0] is the quote character in this
+    // instance.
+    if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
+  }
+
+  return i;
+}
+
+// Parse any number from  -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t *src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  const uint8_t *p = src + uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // We go from
+  // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+  // so we can never represent numbers that have more than 19 digits.
+  size_t longest_digit_count = 19;
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > longest_digit_count))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+  // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
+  // Performance note: This check is only needed when digit_count == longest_digit_count but it is
+  // so cheap that we might as well always make it.
+  if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
+  return negative ? (~i+1) : i;
+}
+
+// Parse any number from  -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+// Never read at src_end or beyond
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept {
+  //
+  // Check for minus sign
+  //
+  if(src == src_end) { return NUMBER_ERROR; }
+  bool negative = (*src == '-');
+  const uint8_t *p = src + uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while ((p != src_end) && parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // We go from
+  // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+  // so we can never represent numbers that have more than 19 digits.
+  size_t longest_digit_count = 19;
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > longest_digit_count))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+  // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
+  // Performance note: This check is only needed when digit_count == longest_digit_count but it is
+  // so cheap that we might as well always make it.
+  if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
+  return negative ? (~i+1) : i;
+}
+
+// Parse any number from  -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer_in_string(const uint8_t *src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*(src + 1) == '-');
+  src += uint8_t(negative) + 1;
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = src;
+  uint64_t i = 0;
+  while (parse_digit(*src, i)) { src++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(src - start_digits);
+  // We go from
+  // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+  // so we can never represent numbers that have more than 19 digits.
+  size_t longest_digit_count = 19;
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > longest_digit_count))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*src)) {
+  //  return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if(*src != '"') { return NUMBER_ERROR; }
+  // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
+  // Performance note: This check is only needed when digit_count == longest_digit_count but it is
+  // so cheap that we might as well always make it.
+  if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
+  return negative ? (~i+1) : i;
+}
+
+simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  uint64_t i = 0;
+  const uint8_t *p = src;
+  p += parse_digit(*p, i);
+  bool leading_zero = (i == 0);
+  while (parse_digit(*p, i)) { p++; }
+  // no integer digits, or 0123 (zero must be solo)
+  if ( p == src ) { return INCORRECT_TYPE; }
+  if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; }
+
+  //
+  // Parse the decimal part.
+  //
+  int64_t exponent = 0;
+  bool overflow;
+  if (simdjson_likely(*p == '.')) {
+    p++;
+    const uint8_t *start_decimal_digits = p;
+    if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits
+    p++;
+    while (parse_digit(*p, i)) { p++; }
+    exponent = -(p - start_decimal_digits);
+
+    // Overflow check. More than 19 digits (minus the decimal) may be overflow.
+    overflow = p-src-1 > 19;
+    if (simdjson_unlikely(overflow && leading_zero)) {
+      // Skip leading 0.00000 and see if it still overflows
+      const uint8_t *start_digits = src + 2;
+      while (*start_digits == '0') { start_digits++; }
+      overflow = start_digits-src > 19;
+    }
+  } else {
+    overflow = p-src > 19;
+  }
+
+  //
+  // Parse the exponent
+  //
+  if (*p == 'e' || *p == 'E') {
+    p++;
+    bool exp_neg = *p == '-';
+    p += exp_neg || *p == '+';
+
+    uint64_t exp = 0;
+    const uint8_t *start_exp_digits = p;
+    while (parse_digit(*p, exp)) { p++; }
+    // no exp digits, or 20+ exp digits
+    if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }
+
+    exponent += exp_neg ? 0-exp : exp;
+  }
+
+  if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; }
+
+  overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power;
+
+  //
+  // Assemble (or slow-parse) the float
+  //
+  double d;
+  if (simdjson_likely(!overflow)) {
+    if (compute_float_64(exponent, i, negative, d)) { return d; }
+  }
+  if (!parse_float_fallback(src - uint8_t(negative), &d)) {
+    return NUMBER_ERROR;
+  }
+  return d;
+}
+
+simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept {
+  return (*src == '-');
+}
+
+simdjson_unused simdjson_inline simdjson_result<bool> is_integer(const uint8_t * src) noexcept {
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+  const uint8_t *p = src;
+  while(static_cast<uint8_t>(*p - '0') <= 9) { p++; }
+  if ( p == src ) { return NUMBER_ERROR; }
+  if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; }
+  return false;
+}
+
+simdjson_unused simdjson_inline simdjson_result<ondemand::number_type> get_number_type(const uint8_t * src) noexcept {
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+  const uint8_t *p = src;
+  while(static_cast<uint8_t>(*p - '0') <= 9) { p++; }
+  if ( p == src ) { return NUMBER_ERROR; }
+  if (jsoncharutils::is_structural_or_whitespace(*p)) {
+    // We have an integer.
+    // If the number is negative and valid, it must be a signed integer.
+    if(negative) { return ondemand::number_type::signed_integer; }
+    // We want values larger or equal to 9223372036854775808 to be unsigned
+    // integers, and the other values to be signed integers.
+    int digit_count = int(p - src);
+    if(digit_count >= 19) {
+      const uint8_t * smaller_big_integer = reinterpret_cast<const uint8_t *>("9223372036854775808");
+      if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) {
+        return ondemand::number_type::unsigned_integer;
+      }
+    }
+    return ondemand::number_type::signed_integer;
+  }
+  // Hopefully, we have 'e' or 'E' or '.'.
+  return ondemand::number_type::floating_point_number;
+}
+
+// Never read at src_end or beyond
+simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept {
+  if(src == src_end) { return NUMBER_ERROR; }
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  uint64_t i = 0;
+  const uint8_t *p = src;
+  if(p == src_end) { return NUMBER_ERROR; }
+  p += parse_digit(*p, i);
+  bool leading_zero = (i == 0);
+  while ((p != src_end) && parse_digit(*p, i)) { p++; }
+  // no integer digits, or 0123 (zero must be solo)
+  if ( p == src ) { return INCORRECT_TYPE; }
+  if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; }
+
+  //
+  // Parse the decimal part.
+  //
+  int64_t exponent = 0;
+  bool overflow;
+  if (simdjson_likely((p != src_end) && (*p == '.'))) {
+    p++;
+    const uint8_t *start_decimal_digits = p;
+    if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits
+    p++;
+    while ((p != src_end) && parse_digit(*p, i)) { p++; }
+    exponent = -(p - start_decimal_digits);
+
+    // Overflow check. More than 19 digits (minus the decimal) may be overflow.
+    overflow = p-src-1 > 19;
+    if (simdjson_unlikely(overflow && leading_zero)) {
+      // Skip leading 0.00000 and see if it still overflows
+      const uint8_t *start_digits = src + 2;
+      while (*start_digits == '0') { start_digits++; }
+      overflow = start_digits-src > 19;
+    }
+  } else {
+    overflow = p-src > 19;
+  }
+
+  //
+  // Parse the exponent
+  //
+  if ((p != src_end) && (*p == 'e' || *p == 'E')) {
+    p++;
+    if(p == src_end) { return NUMBER_ERROR; }
+    bool exp_neg = *p == '-';
+    p += exp_neg || *p == '+';
+
+    uint64_t exp = 0;
+    const uint8_t *start_exp_digits = p;
+    while ((p != src_end) && parse_digit(*p, exp)) { p++; }
+    // no exp digits, or 20+ exp digits
+    if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }
+
+    exponent += exp_neg ? 0-exp : exp;
+  }
+
+  if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; }
+
+  overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power;
+
+  //
+  // Assemble (or slow-parse) the float
+  //
+  double d;
+  if (simdjson_likely(!overflow)) {
+    if (compute_float_64(exponent, i, negative, d)) { return d; }
+  }
+  if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) {
+    return NUMBER_ERROR;
+  }
+  return d;
+}
+
+simdjson_unused simdjson_inline simdjson_result<double> parse_double_in_string(const uint8_t * src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*(src + 1) == '-');
+  src += uint8_t(negative) + 1;
+
+  //
+  // Parse the integer part.
+  //
+  uint64_t i = 0;
+  const uint8_t *p = src;
+  p += parse_digit(*p, i);
+  bool leading_zero = (i == 0);
+  while (parse_digit(*p, i)) { p++; }
+  // no integer digits, or 0123 (zero must be solo)
+  if ( p == src ) { return INCORRECT_TYPE; }
+  if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; }
+
+  //
+  // Parse the decimal part.
+  //
+  int64_t exponent = 0;
+  bool overflow;
+  if (simdjson_likely(*p == '.')) {
+    p++;
+    const uint8_t *start_decimal_digits = p;
+    if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits
+    p++;
+    while (parse_digit(*p, i)) { p++; }
+    exponent = -(p - start_decimal_digits);
+
+    // Overflow check. More than 19 digits (minus the decimal) may be overflow.
+    overflow = p-src-1 > 19;
+    if (simdjson_unlikely(overflow && leading_zero)) {
+      // Skip leading 0.00000 and see if it still overflows
+      const uint8_t *start_digits = src + 2;
+      while (*start_digits == '0') { start_digits++; }
+      overflow = start_digits-src > 19;
+    }
+  } else {
+    overflow = p-src > 19;
+  }
+
+  //
+  // Parse the exponent
+  //
+  if (*p == 'e' || *p == 'E') {
+    p++;
+    bool exp_neg = *p == '-';
+    p += exp_neg || *p == '+';
+
+    uint64_t exp = 0;
+    const uint8_t *start_exp_digits = p;
+    while (parse_digit(*p, exp)) { p++; }
+    // no exp digits, or 20+ exp digits
+    if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }
+
+    exponent += exp_neg ? 0-exp : exp;
+  }
+
+  if (*p != '"') { return NUMBER_ERROR; }
+
+  overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power;
+
+  //
+  // Assemble (or slow-parse) the float
+  //
+  double d;
+  if (simdjson_likely(!overflow)) {
+    if (compute_float_64(exponent, i, negative, d)) { return d; }
+  }
+  if (!parse_float_fallback(src - uint8_t(negative), &d)) {
+    return NUMBER_ERROR;
+  }
+  return d;
+}
+} //namespace {}
+#endif // SIMDJSON_SKIPNUMBERPARSING
+
+} // namespace numberparsing
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdjson
+/* end file include/simdjson/generic/numberparsing.h */
+
+#endif // SIMDJSON_ARM64_NUMBERPARSING_H
+/* end file include/simdjson/arm64/numberparsing.h */
+/* begin file include/simdjson/arm64/end.h */
+/* end file include/simdjson/arm64/end.h */
+
+#endif // SIMDJSON_IMPLEMENTATION_ARM64
+
+#endif // SIMDJSON_ARM64_H
+/* end file include/simdjson/arm64.h */
+/* begin file include/simdjson/fallback.h */
+#ifndef SIMDJSON_FALLBACK_H
+#define SIMDJSON_FALLBACK_H
+
+
+#if SIMDJSON_IMPLEMENTATION_FALLBACK
+
+namespace simdjson {
+/**
+ * Fallback implementation (runs on any machine).
+ */
+namespace fallback {
+} // namespace fallback
+} // namespace simdjson
+
+/* begin file include/simdjson/fallback/implementation.h */
+#ifndef SIMDJSON_FALLBACK_IMPLEMENTATION_H
+#define SIMDJSON_FALLBACK_IMPLEMENTATION_H
+
+
+namespace simdjson {
+namespace fallback {
+
+namespace {
+using namespace simdjson;
+using namespace simdjson::dom;
+}
+
+class implementation final : public simdjson::implementation {
+public:
+  simdjson_inline implementation() : simdjson::implementation(
+      "fallback",
+      "Generic fallback implementation",
+      0
+  ) {}
+  simdjson_warn_unused error_code create_dom_parser_implementation(
+    size_t capacity,
+    size_t max_length,
+    std::unique_ptr<internal::dom_parser_implementation>& dst
+  ) const noexcept final;
+  simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final;
+  simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final;
+};
+
+} // namespace fallback
+} // namespace simdjson
+
+#endif // SIMDJSON_FALLBACK_IMPLEMENTATION_H
+/* end file include/simdjson/fallback/implementation.h */
+
+/* begin file include/simdjson/fallback/begin.h */
+// redefining SIMDJSON_IMPLEMENTATION to "fallback"
+// #define SIMDJSON_IMPLEMENTATION fallback
+/* end file include/simdjson/fallback/begin.h */
+
+// Declarations
+/* begin file include/simdjson/generic/dom_parser_implementation.h */
+
+namespace simdjson {
+namespace fallback {
+
+// expectation: sizeof(open_container) = 64/8.
+struct open_container {
+  uint32_t tape_index; // where, on the tape, does the scope ([,{) begins
+  uint32_t count; // how many elements in the scope
+}; // struct open_container
+
+static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits");
+
+class dom_parser_implementation final : public internal::dom_parser_implementation {
+public:
+  /** Tape location of each open { or [ */
+  std::unique_ptr<open_container[]> open_containers{};
+  /** Whether each open container is a [ or { */
+  std::unique_ptr<bool[]> is_array{};
+  /** Buffer passed to stage 1 */
+  const uint8_t *buf{};
+  /** Length passed to stage 1 */
+  size_t len{0};
+  /** Document passed to stage 2 */
+  dom::document *doc{};
+
+  inline dom_parser_implementation() noexcept;
+  inline dom_parser_implementation(dom_parser_implementation &&other) noexcept;
+  inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept;
+  dom_parser_implementation(const dom_parser_implementation &) = delete;
+  dom_parser_implementation &operator=(const dom_parser_implementation &) = delete;
+
+  simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final;
+  simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final;
+  simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final;
+  simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final;
+  simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst) const noexcept final;
+  inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final;
+  inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final;
+private:
+  simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity);
+
+};
+
+} // namespace fallback
+} // namespace simdjson
+
+namespace simdjson {
+namespace fallback {
+
+inline dom_parser_implementation::dom_parser_implementation() noexcept = default;
+inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default;
+inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default;
+
+// Leaving these here so they can be inlined if so desired
+inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept {
+  if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; }
+  // Stage 1 index output
+  size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7;
+  structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] );
+  if (!structural_indexes) { _capacity = 0; return MEMALLOC; }
+  structural_indexes[0] = 0;
+  n_structural_indexes = 0;
+
+  _capacity = capacity;
+  return SUCCESS;
+}
+
+inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept {
+  // Stage 2 stacks
+  open_containers.reset(new (std::nothrow) open_container[max_depth]);
+  is_array.reset(new (std::nothrow) bool[max_depth]);
+  if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; }
+
+  _max_depth = max_depth;
+  return SUCCESS;
+}
+
+} // namespace fallback
+} // namespace simdjson
+/* end file include/simdjson/generic/dom_parser_implementation.h */
+/* begin file include/simdjson/fallback/bitmanipulation.h */
+#ifndef SIMDJSON_FALLBACK_BITMANIPULATION_H
+#define SIMDJSON_FALLBACK_BITMANIPULATION_H
+
+#include <limits>
+
+namespace simdjson {
+namespace fallback {
+namespace {
+
+#if defined(_MSC_VER) && !defined(_M_ARM64) && !defined(_M_X64)
+static inline unsigned char _BitScanForward64(unsigned long* ret, uint64_t x) {
+  unsigned long x0 = (unsigned long)x, top, bottom;
+  _BitScanForward(&top, (unsigned long)(x >> 32));
+  _BitScanForward(&bottom, x0);
+  *ret = x0 ? bottom : 32 + top;
+  return x != 0;
+}
+static unsigned char _BitScanReverse64(unsigned long* ret, uint64_t x) {
+  unsigned long x1 = (unsigned long)(x >> 32), top, bottom;
+  _BitScanReverse(&top, x1);
+  _BitScanReverse(&bottom, (unsigned long)x);
+  *ret = x1 ? top + 32 : bottom;
+  return x != 0;
+}
+#endif
+
+/* result might be undefined when input_num is zero */
+simdjson_inline int leading_zeroes(uint64_t input_num) {
+#ifdef _MSC_VER
+  unsigned long leading_zero = 0;
+  // Search the mask data from most significant bit (MSB)
+  // to least significant bit (LSB) for a set bit (1).
+  if (_BitScanReverse64(&leading_zero, input_num))
+    return (int)(63 - leading_zero);
+  else
+    return 64;
+#else
+  return __builtin_clzll(input_num);
+#endif// _MSC_VER
+}
+
+} // unnamed namespace
+} // namespace fallback
+} // namespace simdjson
+
+#endif // SIMDJSON_FALLBACK_BITMANIPULATION_H
+/* end file include/simdjson/fallback/bitmanipulation.h */
+/* begin file include/simdjson/generic/jsoncharutils.h */
+
+namespace simdjson {
+namespace fallback {
+namespace {
+namespace jsoncharutils {
+
+// return non-zero if not a structural or whitespace char
+// zero otherwise
+simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) {
+  return internal::structural_or_whitespace_negated[c];
+}
+
+simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) {
+  return internal::structural_or_whitespace[c];
+}
+
+// returns a value with the high 16 bits set if not valid
+// otherwise returns the conversion of the 4 hex digits at src into the bottom
+// 16 bits of the 32-bit return register
+//
+// see
+// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/
+static inline uint32_t hex_to_u32_nocheck(
+    const uint8_t *src) { // strictly speaking, static inline is a C-ism
+  uint32_t v1 = internal::digit_to_val32[630 + src[0]];
+  uint32_t v2 = internal::digit_to_val32[420 + src[1]];
+  uint32_t v3 = internal::digit_to_val32[210 + src[2]];
+  uint32_t v4 = internal::digit_to_val32[0 + src[3]];
+  return v1 | v2 | v3 | v4;
+}
+
+// given a code point cp, writes to c
+// the utf-8 code, outputting the length in
+// bytes, if the length is zero, the code point
+// is invalid
+//
+// This can possibly be made faster using pdep
+// and clz and table lookups, but JSON documents
+// have few escaped code points, and the following
+// function looks cheap.
+//
+// Note: we assume that surrogates are treated separately
+//
+simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) {
+  if (cp <= 0x7F) {
+    c[0] = uint8_t(cp);
+    return 1; // ascii
+  }
+  if (cp <= 0x7FF) {
+    c[0] = uint8_t((cp >> 6) + 192);
+    c[1] = uint8_t((cp & 63) + 128);
+    return 2; // universal plane
+    //  Surrogates are treated elsewhere...
+    //} //else if (0xd800 <= cp && cp <= 0xdfff) {
+    //  return 0; // surrogates // could put assert here
+  } else if (cp <= 0xFFFF) {
+    c[0] = uint8_t((cp >> 12) + 224);
+    c[1] = uint8_t(((cp >> 6) & 63) + 128);
+    c[2] = uint8_t((cp & 63) + 128);
+    return 3;
+  } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this
+                               // is not needed
+    c[0] = uint8_t((cp >> 18) + 240);
+    c[1] = uint8_t(((cp >> 12) & 63) + 128);
+    c[2] = uint8_t(((cp >> 6) & 63) + 128);
+    c[3] = uint8_t((cp & 63) + 128);
+    return 4;
+  }
+  // will return 0 when the code point was too large.
+  return 0; // bad r
+}
+
+#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm
+// this is a slow emulation routine for 32-bit
+//
+static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) {
+  return x * (uint64_t)y;
+}
+static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) {
+  uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd);
+  uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd);
+  uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32));
+  uint64_t adbc_carry = !!(adbc < ad);
+  uint64_t lo = bd + (adbc << 32);
+  *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) +
+        (adbc_carry << 32) + !!(lo < bd);
+  return lo;
+}
+#endif
+
+using internal::value128;
+
+simdjson_inline value128 full_multiplication(uint64_t value1, uint64_t value2) {
+  value128 answer;
+#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS)
+#ifdef _M_ARM64
+  // ARM64 has native support for 64-bit multiplications, no need to emultate
+  answer.high = __umulh(value1, value2);
+  answer.low = value1 * value2;
+#else
+  answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64
+#endif // _M_ARM64
+#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS)
+  __uint128_t r = (static_cast<__uint128_t>(value1)) * value2;
+  answer.low = uint64_t(r);
+  answer.high = uint64_t(r >> 64);
+#endif
+  return answer;
+}
+
+} // namespace jsoncharutils
+} // unnamed namespace
+} // namespace fallback
+} // namespace simdjson
+/* end file include/simdjson/generic/jsoncharutils.h */
+/* begin file include/simdjson/generic/atomparsing.h */
+namespace simdjson {
+namespace fallback {
+namespace {
+/// @private
+namespace atomparsing {
+
+// The string_to_uint32 is exclusively used to map literal strings to 32-bit values.
+// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot
+// be certain that the character pointer will be properly aligned.
+// You might think that using memcpy makes this function expensive, but you'd be wrong.
+// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false");
+// to the compile-time constant 1936482662.
+simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; }
+
+
+// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive.
+// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about.
+simdjson_warn_unused
+simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) {
+  uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++)
+  static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes");
+  std::memcpy(&srcval, src, sizeof(uint32_t));
+  return srcval ^ string_to_uint32(atom);
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_true_atom(const uint8_t *src) {
+  return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0;
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) {
+  if (len > 4) { return is_valid_true_atom(src); }
+  else if (len == 4) { return !str4ncmp(src, "true"); }
+  else { return false; }
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_false_atom(const uint8_t *src) {
+  return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0;
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) {
+  if (len > 5) { return is_valid_false_atom(src); }
+  else if (len == 5) { return !str4ncmp(src+1, "alse"); }
+  else { return false; }
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_null_atom(const uint8_t *src) {
+  return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0;
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) {
+  if (len > 4) { return is_valid_null_atom(src); }
+  else if (len == 4) { return !str4ncmp(src, "null"); }
+  else { return false; }
+}
+
+} // namespace atomparsing
+} // unnamed namespace
+} // namespace fallback
+} // namespace simdjson
+/* end file include/simdjson/generic/atomparsing.h */
+/* begin file include/simdjson/fallback/stringparsing.h */
+#ifndef SIMDJSON_FALLBACK_STRINGPARSING_H
+#define SIMDJSON_FALLBACK_STRINGPARSING_H
+
+
+namespace simdjson {
+namespace fallback {
+namespace {
+
+// Holds backslashes and quotes locations.
+struct backslash_and_quote {
+public:
+  static constexpr uint32_t BYTES_PROCESSED = 1;
+  simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst);
+
+  simdjson_inline bool has_quote_first() { return c == '"'; }
+  simdjson_inline bool has_backslash() { return c == '\\'; }
+  simdjson_inline int quote_index() { return c == '"' ? 0 : 1; }
+  simdjson_inline int backslash_index() { return c == '\\' ? 0 : 1; }
+
+  uint8_t c;
+}; // struct backslash_and_quote
+
+simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) {
+  // store to dest unconditionally - we can overwrite the bits we don't like later
+  dst[0] = src[0];
+  return { src[0] };
+}
+
+} // unnamed namespace
+} // namespace fallback
+} // namespace simdjson
+
+#endif // SIMDJSON_FALLBACK_STRINGPARSING_H
+/* end file include/simdjson/fallback/stringparsing.h */
+/* begin file include/simdjson/fallback/numberparsing.h */
+#ifndef SIMDJSON_FALLBACK_NUMBERPARSING_H
+#define SIMDJSON_FALLBACK_NUMBERPARSING_H
+
+#ifdef JSON_TEST_NUMBERS // for unit testing
+void found_invalid_number(const uint8_t *buf);
+void found_integer(int64_t result, const uint8_t *buf);
+void found_unsigned_integer(uint64_t result, const uint8_t *buf);
+void found_float(double result, const uint8_t *buf);
+#endif
+
+namespace simdjson {
+namespace fallback {
+namespace {
+// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
+static simdjson_inline uint32_t parse_eight_digits_unrolled(const char *chars) {
+  uint64_t val;
+  memcpy(&val, chars, sizeof(uint64_t));
+  val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8;
+  val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16;
+  return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32);
+}
+static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) {
+  return parse_eight_digits_unrolled(reinterpret_cast<const char *>(chars));
+}
+
+} // unnamed namespace
+} // namespace fallback
+} // namespace simdjson
+
+#define SIMDJSON_SWAR_NUMBER_PARSING 1
+
+/* begin file include/simdjson/generic/numberparsing.h */
+#include <limits>
+
+namespace simdjson {
+namespace fallback {
+
+namespace ondemand {
+/**
+ * The type of a JSON number
+ */
+enum class number_type {
+    floating_point_number=1, /// a binary64 number
+    signed_integer,          /// a signed integer that fits in a 64-bit word using two's complement
+    unsigned_integer         /// a positive integer larger or equal to 1<<63
+};
+}
+
+namespace {
+/// @private
+namespace numberparsing {
+
+
+
+#ifdef JSON_TEST_NUMBERS
+#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR)
+#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE)))
+#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE)))
+#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE)))
+#else
+#define INVALID_NUMBER(SRC) (NUMBER_ERROR)
+#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE))
+#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE))
+#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE))
+#endif
+
+namespace {
+// Convert a mantissa, an exponent and a sign bit into an ieee64 double.
+// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable).
+// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed.
+simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) {
+    double d;
+    mantissa &= ~(1ULL << 52);
+    mantissa |= real_exponent << 52;
+    mantissa |= ((static_cast<uint64_t>(negative)) << 63);
+    std::memcpy(&d, &mantissa, sizeof(d));
+    return d;
+}
+}
+// Attempts to compute i * 10^(power) exactly; and if "negative" is
+// true, negate the result.
+// This function will only work in some cases, when it does not work, success is
+// set to false. This should work *most of the time* (like 99% of the time).
+// We assume that power is in the [smallest_power,
+// largest_power] interval: the caller is responsible for this check.
+simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) {
+  // we start with a fast path
+  // It was described in
+  // Clinger WD. How to read floating point numbers accurately.
+  // ACM SIGPLAN Notices. 1990
+#ifndef FLT_EVAL_METHOD
+#error "FLT_EVAL_METHOD should be defined, please include cfloat."
+#endif
+#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
+  // We cannot be certain that x/y is rounded to nearest.
+  if (0 <= power && power <= 22 && i <= 9007199254740991) {
+#else
+  if (-22 <= power && power <= 22 && i <= 9007199254740991) {
+#endif
+    // convert the integer into a double. This is lossless since
+    // 0 <= i <= 2^53 - 1.
+    d = double(i);
+    //
+    // The general idea is as follows.
+    // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then
+    // 1) Both s and p can be represented exactly as 64-bit floating-point
+    // values
+    // (binary64).
+    // 2) Because s and p can be represented exactly as floating-point values,
+    // then s * p
+    // and s / p will produce correctly rounded values.
+    //
+    if (power < 0) {
+      d = d / simdjson::internal::power_of_ten[-power];
+    } else {
+      d = d * simdjson::internal::power_of_ten[power];
+    }
+    if (negative) {
+      d = -d;
+    }
+    return true;
+  }
+  // When 22 < power && power <  22 + 16, we could
+  // hope for another, secondary fast path.  It was
+  // described by David M. Gay in  "Correctly rounded
+  // binary-decimal and decimal-binary conversions." (1990)
+  // If you need to compute i * 10^(22 + x) for x < 16,
+  // first compute i * 10^x, if you know that result is exact
+  // (e.g., when i * 10^x < 2^53),
+  // then you can still proceed and do (i * 10^x) * 10^22.
+  // Is this worth your time?
+  // You need  22 < power *and* power <  22 + 16 *and* (i * 10^(x-22) < 2^53)
+  // for this second fast path to work.
+  // If you you have 22 < power *and* power <  22 + 16, and then you
+  // optimistically compute "i * 10^(x-22)", there is still a chance that you
+  // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of
+  // this optimization maybe less common than we would like. Source:
+  // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/
+  // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html
+
+  // The fast path has now failed, so we are failing back on the slower path.
+
+  // In the slow path, we need to adjust i so that it is > 1<<63 which is always
+  // possible, except if i == 0, so we handle i == 0 separately.
+  if(i == 0) {
+    d = negative ? -0.0 : 0.0;
+    return true;
+  }
+
+
+  // The exponent is 1024 + 63 + power
+  //     + floor(log(5**power)/log(2)).
+  // The 1024 comes from the ieee64 standard.
+  // The 63 comes from the fact that we use a 64-bit word.
+  //
+  // Computing floor(log(5**power)/log(2)) could be
+  // slow. Instead we use a fast function.
+  //
+  // For power in (-400,350), we have that
+  // (((152170 + 65536) * power ) >> 16);
+  // is equal to
+  //  floor(log(5**power)/log(2)) + power when power >= 0
+  // and it is equal to
+  //  ceil(log(5**-power)/log(2)) + power when power < 0
+  //
+  // The 65536 is (1<<16) and corresponds to
+  // (65536 * power) >> 16 ---> power
+  //
+  // ((152170 * power ) >> 16) is equal to
+  // floor(log(5**power)/log(2))
+  //
+  // Note that this is not magic: 152170/(1<<16) is
+  // approximatively equal to log(5)/log(2).
+  // The 1<<16 value is a power of two; we could use a
+  // larger power of 2 if we wanted to.
+  //
+  int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63;
+
+
+  // We want the most significant bit of i to be 1. Shift if needed.
+  int lz = leading_zeroes(i);
+  i <<= lz;
+
+
+  // We are going to need to do some 64-bit arithmetic to get a precise product.
+  // We use a table lookup approach.
+  // It is safe because
+  // power >= smallest_power
+  // and power <= largest_power
+  // We recover the mantissa of the power, it has a leading 1. It is always
+  // rounded down.
+  //
+  // We want the most significant 64 bits of the product. We know
+  // this will be non-zero because the most significant bit of i is
+  // 1.
+  const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power);
+  // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.)
+  //
+  // The full_multiplication function computes the 128-bit product of two 64-bit words
+  // with a returned value of type value128 with a "low component" corresponding to the
+  // 64-bit least significant bits of the product and with a "high component" corresponding
+  // to the 64-bit most significant bits of the product.
+  simdjson::internal::value128 firstproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index]);
+  // Both i and power_of_five_128[index] have their most significant bit set to 1 which
+  // implies that the either the most or the second most significant bit of the product
+  // is 1. We pack values in this manner for efficiency reasons: it maximizes the use
+  // we make of the product. It also makes it easy to reason about the product: there
+  // is 0 or 1 leading zero in the product.
+
+  // Unless the least significant 9 bits of the high (64-bit) part of the full
+  // product are all 1s, then we know that the most significant 55 bits are
+  // exact and no further work is needed. Having 55 bits is necessary because
+  // we need 53 bits for the mantissa but we have to have one rounding bit and
+  // we can waste a bit if the most significant bit of the product is zero.
+  if((firstproduct.high & 0x1FF) == 0x1FF) {
+    // We want to compute i * 5^q, but only care about the top 55 bits at most.
+    // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing
+    // the full computation is wasteful. So we do what is called a "truncated
+    // multiplication".
+    // We take the most significant 64-bits, and we put them in
+    // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q
+    // to the desired approximation using one multiplication. Sometimes it does not suffice.
+    // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and
+    // then we get a better approximation to i * 5^q. In very rare cases, even that
+    // will not suffice, though it is seemingly very hard to find such a scenario.
+    //
+    // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat
+    // more complicated.
+    //
+    // There is an extra layer of complexity in that we need more than 55 bits of
+    // accuracy in the round-to-even scenario.
+    //
+    // The full_multiplication function computes the 128-bit product of two 64-bit words
+    // with a returned value of type value128 with a "low component" corresponding to the
+    // 64-bit least significant bits of the product and with a "high component" corresponding
+    // to the 64-bit most significant bits of the product.
+    simdjson::internal::value128 secondproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]);
+    firstproduct.low += secondproduct.high;
+    if(secondproduct.high > firstproduct.low) { firstproduct.high++; }
+    // At this point, we might need to add at most one to firstproduct, but this
+    // can only change the value of firstproduct.high if firstproduct.low is maximal.
+    if(simdjson_unlikely(firstproduct.low  == 0xFFFFFFFFFFFFFFFF)) {
+      // This is very unlikely, but if so, we need to do much more work!
+      return false;
+    }
+  }
+  uint64_t lower = firstproduct.low;
+  uint64_t upper = firstproduct.high;
+  // The final mantissa should be 53 bits with a leading 1.
+  // We shift it so that it occupies 54 bits with a leading 1.
+  ///////
+  uint64_t upperbit = upper >> 63;
+  uint64_t mantissa = upper >> (upperbit + 9);
+  lz += int(1 ^ upperbit);
+
+  // Here we have mantissa < (1<<54).
+  int64_t real_exponent = exponent - lz;
+  if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal?
+    // Here have that real_exponent <= 0 so -real_exponent >= 0
+    if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure.
+      d = negative ? -0.0 : 0.0;
+      return true;
+    }
+    // next line is safe because -real_exponent + 1 < 0
+    mantissa >>= -real_exponent + 1;
+    // Thankfully, we can't have both "round-to-even" and subnormals because
+    // "round-to-even" only occurs for powers close to 0.
+    mantissa += (mantissa & 1); // round up
+    mantissa >>= 1;
+    // There is a weird scenario where we don't have a subnormal but just.
+    // Suppose we start with 2.2250738585072013e-308, we end up
+    // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal
+    // whereas 0x40000000000000 x 2^-1023-53  is normal. Now, we need to round
+    // up 0x3fffffffffffff x 2^-1023-53  and once we do, we are no longer
+    // subnormal, but we can only know this after rounding.
+    // So we only declare a subnormal if we are smaller than the threshold.
+    real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1;
+    d = to_double(mantissa, real_exponent, negative);
+    return true;
+  }
+  // We have to round to even. The "to even" part
+  // is only a problem when we are right in between two floats
+  // which we guard against.
+  // If we have lots of trailing zeros, we may fall right between two
+  // floating-point values.
+  //
+  // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54]
+  // times a power of two. That is, it is right between a number with binary significand
+  // m and another number with binary significand m+1; and it must be the case
+  // that it cannot be represented by a float itself.
+  //
+  // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p.
+  // Recall that 10^q = 5^q * 2^q.
+  // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that
+  //  5^23 <=  2^54 and it is the last power of five to qualify, so q <= 23.
+  // When q<0, we have  w  >=  (2m+1) x 5^{-q}.  We must have that w<2^{64} so
+  // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have
+  // 2^{53} x 5^{-q} < 2^{64}.
+  // Hence we have 5^{-q} < 2^{11}$ or q>= -4.
+  //
+  // We require lower <= 1 and not lower == 0 because we could not prove that
+  // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test.
+  if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) {
+    if((mantissa  << (upperbit + 64 - 53 - 2)) ==  upper) {
+      mantissa &= ~1;             // flip it so that we do not round up
+    }
+  }
+
+  mantissa += mantissa & 1;
+  mantissa >>= 1;
+
+  // Here we have mantissa < (1<<53), unless there was an overflow
+  if (mantissa >= (1ULL << 53)) {
+    //////////
+    // This will happen when parsing values such as 7.2057594037927933e+16
+    ////////
+    mantissa = (1ULL << 52);
+    real_exponent++;
+  }
+  mantissa &= ~(1ULL << 52);
+  // we have to check that real_exponent is in range, otherwise we bail out
+  if (simdjson_unlikely(real_exponent > 2046)) {
+    // We have an infinite value!!! We could actually throw an error here if we could.
+    return false;
+  }
+  d = to_double(mantissa, real_exponent, negative);
+  return true;
+}
+
+// We call a fallback floating-point parser that might be slow. Note
+// it will accept JSON numbers, but the JSON spec. is more restrictive so
+// before you call parse_float_fallback, you need to have validated the input
+// string with the JSON grammar.
+// It will return an error (false) if the parsed number is infinite.
+// The string parsing itself always succeeds. We know that there is at least
+// one digit.
+static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) {
+  *outDouble = simdjson::internal::from_chars(reinterpret_cast<const char *>(ptr));
+  // We do not accept infinite values.
+
+  // Detecting finite values in a portable manner is ridiculously hard, ideally
+  // we would want to do:
+  // return !std::isfinite(*outDouble);
+  // but that mysteriously fails under legacy/old libc++ libraries, see
+  // https://github.com/simdjson/simdjson/issues/1286
+  //
+  // Therefore, fall back to this solution (the extra parens are there
+  // to handle that max may be a macro on windows).
+  return !(*outDouble > (std::numeric_limits<double>::max)() || *outDouble < std::numeric_limits<double>::lowest());
+}
+static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) {
+  *outDouble = simdjson::internal::from_chars(reinterpret_cast<const char *>(ptr), reinterpret_cast<const char *>(end_ptr));
+  // We do not accept infinite values.
+
+  // Detecting finite values in a portable manner is ridiculously hard, ideally
+  // we would want to do:
+  // return !std::isfinite(*outDouble);
+  // but that mysteriously fails under legacy/old libc++ libraries, see
+  // https://github.com/simdjson/simdjson/issues/1286
+  //
+  // Therefore, fall back to this solution (the extra parens are there
+  // to handle that max may be a macro on windows).
+  return !(*outDouble > (std::numeric_limits<double>::max)() || *outDouble < std::numeric_limits<double>::lowest());
+}
+
+// check quickly whether the next 8 chars are made of digits
+// at a glance, it looks better than Mula's
+// http://0x80.pl/articles/swar-digits-validate.html
+simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) {
+  uint64_t val;
+  // this can read up to 7 bytes beyond the buffer size, but we require
+  // SIMDJSON_PADDING of padding
+  static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7");
+  std::memcpy(&val, chars, 8);
+  // a branchy method might be faster:
+  // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030)
+  //  && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) ==
+  //  0x3030303030303030);
+  return (((val & 0xF0F0F0F0F0F0F0F0) |
+           (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) ==
+          0x3333333333333333);
+}
+
+template<typename W>
+error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) {
+  double d;
+  if (parse_float_fallback(src, &d)) {
+    writer.append_double(d);
+    return SUCCESS;
+  }
+  return INVALID_NUMBER(src);
+}
+
+template<typename I>
+SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later
+simdjson_inline bool parse_digit(const uint8_t c, I &i) {
+  const uint8_t digit = static_cast<uint8_t>(c - '0');
+  if (digit > 9) {
+    return false;
+  }
+  // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication
+  i = 10 * i + digit; // might overflow, we will handle the overflow later
+  return true;
+}
+
+simdjson_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) {
+  // we continue with the fiction that we have an integer. If the
+  // floating point number is representable as x * 10^z for some integer
+  // z that fits in 53 bits, then we will be able to convert back the
+  // the integer into a float in a lossless manner.
+  const uint8_t *const first_after_period = p;
+
+#ifdef SIMDJSON_SWAR_NUMBER_PARSING
+#if SIMDJSON_SWAR_NUMBER_PARSING
+  // this helps if we have lots of decimals!
+  // this turns out to be frequent enough.
+  if (is_made_of_eight_digits_fast(p)) {
+    i = i * 100000000 + parse_eight_digits_unrolled(p);
+    p += 8;
+  }
+#endif // SIMDJSON_SWAR_NUMBER_PARSING
+#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING
+  // Unrolling the first digit makes a small difference on some implementations (e.g. westmere)
+  if (parse_digit(*p, i)) { ++p; }
+  while (parse_digit(*p, i)) { p++; }
+  exponent = first_after_period - p;
+  // Decimal without digits (123.) is illegal
+  if (exponent == 0) {
+    return INVALID_NUMBER(src);
+  }
+  return SUCCESS;
+}
+
+simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) {
+  // Exp Sign: -123.456e[-]78
+  bool neg_exp = ('-' == *p);
+  if (neg_exp || '+' == *p) { p++; } // Skip + as well
+
+  // Exponent: -123.456e-[78]
+  auto start_exp = p;
+  int64_t exp_number = 0;
+  while (parse_digit(*p, exp_number)) { ++p; }
+  // It is possible for parse_digit to overflow.
+  // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN.
+  // Thus we *must* check for possible overflow before we negate exp_number.
+
+  // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into
+  // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may
+  // not oblige and may, in fact, generate two distinct paths in any case. It might be
+  // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off
+  // instructions for a simdjson_likely branch, an unconclusive gain.
+
+  // If there were no digits, it's an error.
+  if (simdjson_unlikely(p == start_exp)) {
+    return INVALID_NUMBER(src);
+  }
+  // We have a valid positive exponent in exp_number at this point, except that
+  // it may have overflowed.
+
+  // If there were more than 18 digits, we may have overflowed the integer. We have to do
+  // something!!!!
+  if (simdjson_unlikely(p > start_exp+18)) {
+    // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow
+    while (*start_exp == '0') { start_exp++; }
+    // 19 digits could overflow int64_t and is kind of absurd anyway. We don't
+    // support exponents smaller than -999,999,999,999,999,999 and bigger
+    // than 999,999,999,999,999,999.
+    // We can truncate.
+    // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before
+    // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could
+    // truncate at 324.
+    // Note that there is no reason to fail per se at this point in time.
+    // E.g., 0e999999999999999999999 is a fine number.
+    if (p > start_exp+18) { exp_number = 999999999999999999; }
+  }
+  // At this point, we know that exp_number is a sane, positive, signed integer.
+  // It is <= 999,999,999,999,999,999. As long as 'exponent' is in
+  // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent'
+  // is bounded in magnitude by the size of the JSON input, we are fine in this universe.
+  // To sum it up: the next line should never overflow.
+  exponent += (neg_exp ? -exp_number : exp_number);
+  return SUCCESS;
+}
+
+simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) {
+  // It is possible that the integer had an overflow.
+  // We have to handle the case where we have 0.0000somenumber.
+  const uint8_t *start = start_digits;
+  while ((*start == '0') || (*start == '.')) { ++start; }
+  // we over-decrement by one when there is a '.'
+  return digit_count - size_t(start - start_digits);
+}
+
+template<typename W>
+simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) {
+  // If we frequently had to deal with long strings of digits,
+  // we could extend our code by using a 128-bit integer instead
+  // of a 64-bit integer. However, this is uncommon in practice.
+  //
+  // 9999999999999999999 < 2**64 so we can accommodate 19 digits.
+  // If we have a decimal separator, then digit_count - 1 is the number of digits, but we
+  // may not have a decimal separator!
+  if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) {
+    // Ok, chances are good that we had an overflow!
+    // this is almost never going to get called!!!
+    // we start anew, going slowly!!!
+    // This will happen in the following examples:
+    // 10000000000000000000000000000000000000000000e+308
+    // 3.1415926535897932384626433832795028841971693993751
+    //
+    // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens
+    // because slow_float_parsing is a non-inlined function. If we passed our writer reference to
+    // it, it would force it to be stored in memory, preventing the compiler from picking it apart
+    // and putting into registers. i.e. if we pass it as reference, it gets slow.
+    // This is what forces the skip_double, as well.
+    error_code error = slow_float_parsing(src, writer);
+    writer.skip_double();
+    return error;
+  }
+  // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other
+  // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331
+  // To future reader: we'd love if someone found a better way, or at least could explain this result!
+  if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) {
+    //
+    // Important: smallest_power is such that it leads to a zero value.
+    // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero
+    // so something x 10^-343 goes to zero, but not so with  something x 10^-342.
+    static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough");
+    //
+    if((exponent < simdjson::internal::smallest_power) || (i == 0)) {
+      // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero
+      WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer);
+      return SUCCESS;
+    } else { // (exponent > largest_power) and (i != 0)
+      // We have, for sure, an infinite value and simdjson refuses to parse infinite values.
+      return INVALID_NUMBER(src);
+    }
+  }
+  double d;
+  if (!compute_float_64(exponent, i, negative, d)) {
+    // we are almost never going to get here.
+    if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); }
+  }
+  WRITE_DOUBLE(d, src, writer);
+  return SUCCESS;
+}
+
+// for performance analysis, it is sometimes  useful to skip parsing
+#ifdef SIMDJSON_SKIPNUMBERPARSING
+
+template<typename W>
+simdjson_inline error_code parse_number(const uint8_t *const, W &writer) {
+  writer.append_s64(0);        // always write zero
+  return SUCCESS;              // always succeeds
+}
+
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer_in_string(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<double> parse_double_in_string(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept  { return false; }
+simdjson_unused simdjson_inline simdjson_result<bool> is_integer(const uint8_t * src) noexcept  { return false; }
+simdjson_unused simdjson_inline simdjson_result<ondemand::number_type> get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; }
+#else
+
+// parse the number at src
+// define JSON_TEST_NUMBERS for unit testing
+//
+// It is assumed that the number is followed by a structural ({,},],[) character
+// or a white space character. If that is not the case (e.g., when the JSON
+// document is made of a single number), then it is necessary to copy the
+// content and append a space before calling this function.
+//
+// Our objective is accurate parsing (ULP of 0) at high speed.
+template<typename W>
+simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) {
+
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  const uint8_t *p = src + uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); }
+
+  //
+  // Handle floats if there is a . or e (or both)
+  //
+  int64_t exponent = 0;
+  bool is_float = false;
+  if ('.' == *p) {
+    is_float = true;
+    ++p;
+    SIMDJSON_TRY( parse_decimal(src, p, i, exponent) );
+    digit_count = int(p - start_digits); // used later to guard against overflows
+  }
+  if (('e' == *p) || ('E' == *p)) {
+    is_float = true;
+    ++p;
+    SIMDJSON_TRY( parse_exponent(src, p, exponent) );
+  }
+  if (is_float) {
+    const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p);
+    SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) );
+    if (dirty_end) { return INVALID_NUMBER(src); }
+    return SUCCESS;
+  }
+
+  // The longest negative 64-bit number is 19 digits.
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  size_t longest_digit_count = negative ? 19 : 20;
+  if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); }
+  if (digit_count == longest_digit_count) {
+    if (negative) {
+      // Anything negative above INT64_MAX+1 is invalid
+      if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src);  }
+      WRITE_INTEGER(~i+1, src, writer);
+      if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); }
+      return SUCCESS;
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    }  else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); }
+  }
+
+  // Write unsigned if it doesn't fit in a signed integer.
+  if (i > uint64_t(INT64_MAX)) {
+    WRITE_UNSIGNED(i, src, writer);
+  } else {
+    WRITE_INTEGER(negative ? (~i+1) : i, src, writer);
+  }
+  if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); }
+  return SUCCESS;
+}
+
+// Inlineable functions
+namespace {
+
+// This table can be used to characterize the final character of an integer
+// string. For JSON structural character and allowable white space characters,
+// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise
+// we return NUMBER_ERROR.
+// Optimization note: we could easily reduce the size of the table by half (to 128)
+// at the cost of an extra branch.
+// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits):
+static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast");
+static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast");
+static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast");
+
+const uint8_t integer_string_finisher[256] = {
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, SUCCESS,
+    SUCCESS,      NUMBER_ERROR,   NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   SUCCESS,      NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, SUCCESS,
+    NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, SUCCESS,        NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    SUCCESS,      NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR};
+
+// Parse any number from 0 to 18,446,744,073,709,551,615
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src) noexcept {
+  const uint8_t *p = src;
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > 20))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+
+  if (digit_count == 20) {
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
+  }
+
+  return i;
+}
+
+
+// Parse any number from 0 to 18,446,744,073,709,551,615
+// Never read at src_end or beyond
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept {
+  const uint8_t *p = src;
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while ((p != src_end) && parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > 20))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+
+  if (digit_count == 20) {
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
+  }
+
+  return i;
+}
+
+// Parse any number from 0 to 18,446,744,073,709,551,615
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned_in_string(const uint8_t * const src) noexcept {
+  const uint8_t *p = src + 1;
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > 20))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if (*p != '"') { return NUMBER_ERROR; }
+
+  if (digit_count == 20) {
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    // Note: we use src[1] and not src[0] because src[0] is the quote character in this
+    // instance.
+    if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
+  }
+
+  return i;
+}
+
+// Parse any number from  -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t *src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  const uint8_t *p = src + uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // We go from
+  // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+  // so we can never represent numbers that have more than 19 digits.
+  size_t longest_digit_count = 19;
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > longest_digit_count))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+  // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
+  // Performance note: This check is only needed when digit_count == longest_digit_count but it is
+  // so cheap that we might as well always make it.
+  if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
+  return negative ? (~i+1) : i;
+}
+
+// Parse any number from  -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+// Never read at src_end or beyond
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept {
+  //
+  // Check for minus sign
+  //
+  if(src == src_end) { return NUMBER_ERROR; }
+  bool negative = (*src == '-');
+  const uint8_t *p = src + uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while ((p != src_end) && parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // We go from
+  // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+  // so we can never represent numbers that have more than 19 digits.
+  size_t longest_digit_count = 19;
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > longest_digit_count))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+  // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
+  // Performance note: This check is only needed when digit_count == longest_digit_count but it is
+  // so cheap that we might as well always make it.
+  if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
+  return negative ? (~i+1) : i;
+}
+
+// Parse any number from  -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer_in_string(const uint8_t *src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*(src + 1) == '-');
+  src += uint8_t(negative) + 1;
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = src;
+  uint64_t i = 0;
+  while (parse_digit(*src, i)) { src++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(src - start_digits);
+  // We go from
+  // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+  // so we can never represent numbers that have more than 19 digits.
+  size_t longest_digit_count = 19;
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > longest_digit_count))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*src)) {
+  //  return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if(*src != '"') { return NUMBER_ERROR; }
+  // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
+  // Performance note: This check is only needed when digit_count == longest_digit_count but it is
+  // so cheap that we might as well always make it.
+  if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
+  return negative ? (~i+1) : i;
+}
+
+simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  uint64_t i = 0;
+  const uint8_t *p = src;
+  p += parse_digit(*p, i);
+  bool leading_zero = (i == 0);
+  while (parse_digit(*p, i)) { p++; }
+  // no integer digits, or 0123 (zero must be solo)
+  if ( p == src ) { return INCORRECT_TYPE; }
+  if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; }
+
+  //
+  // Parse the decimal part.
+  //
+  int64_t exponent = 0;
+  bool overflow;
+  if (simdjson_likely(*p == '.')) {
+    p++;
+    const uint8_t *start_decimal_digits = p;
+    if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits
+    p++;
+    while (parse_digit(*p, i)) { p++; }
+    exponent = -(p - start_decimal_digits);
+
+    // Overflow check. More than 19 digits (minus the decimal) may be overflow.
+    overflow = p-src-1 > 19;
+    if (simdjson_unlikely(overflow && leading_zero)) {
+      // Skip leading 0.00000 and see if it still overflows
+      const uint8_t *start_digits = src + 2;
+      while (*start_digits == '0') { start_digits++; }
+      overflow = start_digits-src > 19;
+    }
+  } else {
+    overflow = p-src > 19;
+  }
+
+  //
+  // Parse the exponent
+  //
+  if (*p == 'e' || *p == 'E') {
+    p++;
+    bool exp_neg = *p == '-';
+    p += exp_neg || *p == '+';
+
+    uint64_t exp = 0;
+    const uint8_t *start_exp_digits = p;
+    while (parse_digit(*p, exp)) { p++; }
+    // no exp digits, or 20+ exp digits
+    if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }
+
+    exponent += exp_neg ? 0-exp : exp;
+  }
+
+  if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; }
+
+  overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power;
+
+  //
+  // Assemble (or slow-parse) the float
+  //
+  double d;
+  if (simdjson_likely(!overflow)) {
+    if (compute_float_64(exponent, i, negative, d)) { return d; }
+  }
+  if (!parse_float_fallback(src - uint8_t(negative), &d)) {
+    return NUMBER_ERROR;
+  }
+  return d;
+}
+
+simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept {
+  return (*src == '-');
+}
+
+simdjson_unused simdjson_inline simdjson_result<bool> is_integer(const uint8_t * src) noexcept {
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+  const uint8_t *p = src;
+  while(static_cast<uint8_t>(*p - '0') <= 9) { p++; }
+  if ( p == src ) { return NUMBER_ERROR; }
+  if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; }
+  return false;
+}
+
+simdjson_unused simdjson_inline simdjson_result<ondemand::number_type> get_number_type(const uint8_t * src) noexcept {
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+  const uint8_t *p = src;
+  while(static_cast<uint8_t>(*p - '0') <= 9) { p++; }
+  if ( p == src ) { return NUMBER_ERROR; }
+  if (jsoncharutils::is_structural_or_whitespace(*p)) {
+    // We have an integer.
+    // If the number is negative and valid, it must be a signed integer.
+    if(negative) { return ondemand::number_type::signed_integer; }
+    // We want values larger or equal to 9223372036854775808 to be unsigned
+    // integers, and the other values to be signed integers.
+    int digit_count = int(p - src);
+    if(digit_count >= 19) {
+      const uint8_t * smaller_big_integer = reinterpret_cast<const uint8_t *>("9223372036854775808");
+      if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) {
+        return ondemand::number_type::unsigned_integer;
+      }
+    }
+    return ondemand::number_type::signed_integer;
+  }
+  // Hopefully, we have 'e' or 'E' or '.'.
+  return ondemand::number_type::floating_point_number;
+}
+
+// Never read at src_end or beyond
+simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept {
+  if(src == src_end) { return NUMBER_ERROR; }
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  uint64_t i = 0;
+  const uint8_t *p = src;
+  if(p == src_end) { return NUMBER_ERROR; }
+  p += parse_digit(*p, i);
+  bool leading_zero = (i == 0);
+  while ((p != src_end) && parse_digit(*p, i)) { p++; }
+  // no integer digits, or 0123 (zero must be solo)
+  if ( p == src ) { return INCORRECT_TYPE; }
+  if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; }
+
+  //
+  // Parse the decimal part.
+  //
+  int64_t exponent = 0;
+  bool overflow;
+  if (simdjson_likely((p != src_end) && (*p == '.'))) {
+    p++;
+    const uint8_t *start_decimal_digits = p;
+    if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits
+    p++;
+    while ((p != src_end) && parse_digit(*p, i)) { p++; }
+    exponent = -(p - start_decimal_digits);
+
+    // Overflow check. More than 19 digits (minus the decimal) may be overflow.
+    overflow = p-src-1 > 19;
+    if (simdjson_unlikely(overflow && leading_zero)) {
+      // Skip leading 0.00000 and see if it still overflows
+      const uint8_t *start_digits = src + 2;
+      while (*start_digits == '0') { start_digits++; }
+      overflow = start_digits-src > 19;
+    }
+  } else {
+    overflow = p-src > 19;
+  }
+
+  //
+  // Parse the exponent
+  //
+  if ((p != src_end) && (*p == 'e' || *p == 'E')) {
+    p++;
+    if(p == src_end) { return NUMBER_ERROR; }
+    bool exp_neg = *p == '-';
+    p += exp_neg || *p == '+';
+
+    uint64_t exp = 0;
+    const uint8_t *start_exp_digits = p;
+    while ((p != src_end) && parse_digit(*p, exp)) { p++; }
+    // no exp digits, or 20+ exp digits
+    if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }
+
+    exponent += exp_neg ? 0-exp : exp;
+  }
+
+  if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; }
+
+  overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power;
+
+  //
+  // Assemble (or slow-parse) the float
+  //
+  double d;
+  if (simdjson_likely(!overflow)) {
+    if (compute_float_64(exponent, i, negative, d)) { return d; }
+  }
+  if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) {
+    return NUMBER_ERROR;
+  }
+  return d;
+}
+
+simdjson_unused simdjson_inline simdjson_result<double> parse_double_in_string(const uint8_t * src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*(src + 1) == '-');
+  src += uint8_t(negative) + 1;
+
+  //
+  // Parse the integer part.
+  //
+  uint64_t i = 0;
+  const uint8_t *p = src;
+  p += parse_digit(*p, i);
+  bool leading_zero = (i == 0);
+  while (parse_digit(*p, i)) { p++; }
+  // no integer digits, or 0123 (zero must be solo)
+  if ( p == src ) { return INCORRECT_TYPE; }
+  if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; }
+
+  //
+  // Parse the decimal part.
+  //
+  int64_t exponent = 0;
+  bool overflow;
+  if (simdjson_likely(*p == '.')) {
+    p++;
+    const uint8_t *start_decimal_digits = p;
+    if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits
+    p++;
+    while (parse_digit(*p, i)) { p++; }
+    exponent = -(p - start_decimal_digits);
+
+    // Overflow check. More than 19 digits (minus the decimal) may be overflow.
+    overflow = p-src-1 > 19;
+    if (simdjson_unlikely(overflow && leading_zero)) {
+      // Skip leading 0.00000 and see if it still overflows
+      const uint8_t *start_digits = src + 2;
+      while (*start_digits == '0') { start_digits++; }
+      overflow = start_digits-src > 19;
+    }
+  } else {
+    overflow = p-src > 19;
+  }
+
+  //
+  // Parse the exponent
+  //
+  if (*p == 'e' || *p == 'E') {
+    p++;
+    bool exp_neg = *p == '-';
+    p += exp_neg || *p == '+';
+
+    uint64_t exp = 0;
+    const uint8_t *start_exp_digits = p;
+    while (parse_digit(*p, exp)) { p++; }
+    // no exp digits, or 20+ exp digits
+    if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }
+
+    exponent += exp_neg ? 0-exp : exp;
+  }
+
+  if (*p != '"') { return NUMBER_ERROR; }
+
+  overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power;
+
+  //
+  // Assemble (or slow-parse) the float
+  //
+  double d;
+  if (simdjson_likely(!overflow)) {
+    if (compute_float_64(exponent, i, negative, d)) { return d; }
+  }
+  if (!parse_float_fallback(src - uint8_t(negative), &d)) {
+    return NUMBER_ERROR;
+  }
+  return d;
+}
+} //namespace {}
+#endif // SIMDJSON_SKIPNUMBERPARSING
+
+} // namespace numberparsing
+} // unnamed namespace
+} // namespace fallback
+} // namespace simdjson
+/* end file include/simdjson/generic/numberparsing.h */
+
+#endif // SIMDJSON_FALLBACK_NUMBERPARSING_H
+/* end file include/simdjson/fallback/numberparsing.h */
+/* begin file include/simdjson/fallback/end.h */
+/* end file include/simdjson/fallback/end.h */
+
+#endif // SIMDJSON_IMPLEMENTATION_FALLBACK
+#endif // SIMDJSON_FALLBACK_H
+/* end file include/simdjson/fallback.h */
+/* begin file include/simdjson/icelake.h */
+#ifndef SIMDJSON_ICELAKE_H
+#define SIMDJSON_ICELAKE_H
+
+
+#if SIMDJSON_IMPLEMENTATION_ICELAKE
+
+#if SIMDJSON_CAN_ALWAYS_RUN_ICELAKE
+#define SIMDJSON_TARGET_ICELAKE
+#define SIMDJSON_UNTARGET_ICELAKE
+#else
+#define SIMDJSON_TARGET_ICELAKE SIMDJSON_TARGET_REGION("avx512f,avx512dq,avx512cd,avx512bw,avx512vbmi,avx512vbmi2,avx512vl,avx2,bmi,pclmul,lzcnt")
+#define SIMDJSON_UNTARGET_ICELAKE SIMDJSON_UNTARGET_REGION
+#endif
+
+namespace simdjson {
+/**
+ * Implementation for Icelake (Intel AVX512).
+ */
+namespace icelake {
+} // namespace icelake
+} // namespace simdjson
+
+//
+// These two need to be included outside SIMDJSON_TARGET_ICELAKE
+//
+/* begin file include/simdjson/icelake/implementation.h */
+#ifndef SIMDJSON_ICELAKE_IMPLEMENTATION_H
+#define SIMDJSON_ICELAKE_IMPLEMENTATION_H
+
+
+// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_ICELAKE
+namespace simdjson {
+namespace icelake {
+
+using namespace simdjson;
+
+class implementation final : public simdjson::implementation {
+public:
+  simdjson_inline implementation() : simdjson::implementation(
+      "icelake",
+      "Intel/AMD AVX512",
+      internal::instruction_set::AVX2 | internal::instruction_set::PCLMULQDQ | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 | internal::instruction_set::AVX512F | internal::instruction_set::AVX512DQ | internal::instruction_set::AVX512CD | internal::instruction_set::AVX512BW | internal::instruction_set::AVX512VL | internal::instruction_set::AVX512VBMI2
+  ) {}
+  simdjson_warn_unused error_code create_dom_parser_implementation(
+    size_t capacity,
+    size_t max_length,
+    std::unique_ptr<internal::dom_parser_implementation>& dst
+  ) const noexcept final;
+  simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final;
+  simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final;
+};
+
+} // namespace icelake
+} // namespace simdjson
+
+#endif // SIMDJSON_ICELAKE_IMPLEMENTATION_H
+/* end file include/simdjson/icelake/implementation.h */
+/* begin file include/simdjson/icelake/intrinsics.h */
+#ifndef SIMDJSON_ICELAKE_INTRINSICS_H
+#define SIMDJSON_ICELAKE_INTRINSICS_H
+
+
+#ifdef SIMDJSON_VISUAL_STUDIO
+// under clang within visual studio, this will include <x86intrin.h>
+#include <intrin.h>  // visual studio or clang
+#else
+#include <x86intrin.h> // elsewhere
+#endif // SIMDJSON_VISUAL_STUDIO
+
+#ifdef SIMDJSON_CLANG_VISUAL_STUDIO
+/**
+ * You are not supposed, normally, to include these
+ * headers directly. Instead you should either include intrin.h
+ * or x86intrin.h. However, when compiling with clang
+ * under Windows (i.e., when _MSC_VER is set), these headers
+ * only get included *if* the corresponding features are detected
+ * from macros:
+ * e.g., if __AVX2__ is set... in turn,  we normally set these
+ * macros by compiling against the corresponding architecture
+ * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole
+ * software with these advanced instructions. In simdjson, we
+ * want to compile the whole program for a generic target,
+ * and only target our specific kernels. As a workaround,
+ * we directly include the needed headers. These headers would
+ * normally guard against such usage, but we carefully included
+ * <x86intrin.h>  (or <intrin.h>) before, so the headers
+ * are fooled.
+ */
+#include <bmiintrin.h>   // for _blsr_u64
+#include <lzcntintrin.h> // for  __lzcnt64
+#include <immintrin.h>   // for most things (AVX2, AVX512, _popcnt64)
+#include <smmintrin.h>
+#include <tmmintrin.h>
+#include <avxintrin.h>
+#include <avx2intrin.h>
+#include <wmmintrin.h>   // for  _mm_clmulepi64_si128
+// Important: we need the AVX-512 headers:
+#include <avx512fintrin.h>
+#include <avx512dqintrin.h>
+#include <avx512cdintrin.h>
+#include <avx512bwintrin.h>
+#include <avx512vlintrin.h>
+#include <avx512vbmiintrin.h>
+#include <avx512vbmi2intrin.h>
+// unfortunately, we may not get _blsr_u64, but, thankfully, clang
+// has it as a macro.
+#ifndef _blsr_u64
+// we roll our own
+#define _blsr_u64(n) ((n - 1) & n)
+#endif //  _blsr_u64
+#endif // SIMDJSON_CLANG_VISUAL_STUDIO
+
+static_assert(sizeof(__m512i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for icelake");
+
+#endif // SIMDJSON_ICELAKE_INTRINSICS_H
+/* end file include/simdjson/icelake/intrinsics.h */
+
+//
+// The rest need to be inside the region
+//
+/* begin file include/simdjson/icelake/begin.h */
+// redefining SIMDJSON_IMPLEMENTATION to "icelake"
+// #define SIMDJSON_IMPLEMENTATION icelake
+SIMDJSON_TARGET_ICELAKE
+/* end file include/simdjson/icelake/begin.h */
+
+// Declarations
+/* begin file include/simdjson/generic/dom_parser_implementation.h */
+
+namespace simdjson {
+namespace icelake {
+
+// expectation: sizeof(open_container) = 64/8.
+struct open_container {
+  uint32_t tape_index; // where, on the tape, does the scope ([,{) begins
+  uint32_t count; // how many elements in the scope
+}; // struct open_container
+
+static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits");
+
+class dom_parser_implementation final : public internal::dom_parser_implementation {
+public:
+  /** Tape location of each open { or [ */
+  std::unique_ptr<open_container[]> open_containers{};
+  /** Whether each open container is a [ or { */
+  std::unique_ptr<bool[]> is_array{};
+  /** Buffer passed to stage 1 */
+  const uint8_t *buf{};
+  /** Length passed to stage 1 */
+  size_t len{0};
+  /** Document passed to stage 2 */
+  dom::document *doc{};
+
+  inline dom_parser_implementation() noexcept;
+  inline dom_parser_implementation(dom_parser_implementation &&other) noexcept;
+  inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept;
+  dom_parser_implementation(const dom_parser_implementation &) = delete;
+  dom_parser_implementation &operator=(const dom_parser_implementation &) = delete;
+
+  simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final;
+  simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final;
+  simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final;
+  simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final;
+  simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst) const noexcept final;
+  inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final;
+  inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final;
+private:
+  simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity);
+
+};
+
+} // namespace icelake
+} // namespace simdjson
+
+namespace simdjson {
+namespace icelake {
+
+inline dom_parser_implementation::dom_parser_implementation() noexcept = default;
+inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default;
+inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default;
+
+// Leaving these here so they can be inlined if so desired
+inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept {
+  if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; }
+  // Stage 1 index output
+  size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7;
+  structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] );
+  if (!structural_indexes) { _capacity = 0; return MEMALLOC; }
+  structural_indexes[0] = 0;
+  n_structural_indexes = 0;
+
+  _capacity = capacity;
+  return SUCCESS;
+}
+
+inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept {
+  // Stage 2 stacks
+  open_containers.reset(new (std::nothrow) open_container[max_depth]);
+  is_array.reset(new (std::nothrow) bool[max_depth]);
+  if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; }
+
+  _max_depth = max_depth;
+  return SUCCESS;
+}
+
+} // namespace icelake
+} // namespace simdjson
+/* end file include/simdjson/generic/dom_parser_implementation.h */
+/* begin file include/simdjson/icelake/bitmanipulation.h */
+#ifndef SIMDJSON_ICELAKE_BITMANIPULATION_H
+#define SIMDJSON_ICELAKE_BITMANIPULATION_H
+
+namespace simdjson {
+namespace icelake {
+namespace {
+
+// We sometimes call trailing_zero on inputs that are zero,
+// but the algorithms do not end up using the returned value.
+// Sadly, sanitizers are not smart enough to figure it out.
+SIMDJSON_NO_SANITIZE_UNDEFINED
+simdjson_inline int trailing_zeroes(uint64_t input_num) {
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+  return (int)_tzcnt_u64(input_num);
+#else // SIMDJSON_REGULAR_VISUAL_STUDIO
+  ////////
+  // You might expect the next line to be equivalent to
+  // return (int)_tzcnt_u64(input_num);
+  // but the generated code differs and might be less efficient?
+  ////////
+  return __builtin_ctzll(input_num);
+#endif // SIMDJSON_REGULAR_VISUAL_STUDIO
+}
+
+/* result might be undefined when input_num is zero */
+simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) {
+  return _blsr_u64(input_num);
+}
+
+/* result might be undefined when input_num is zero */
+simdjson_inline int leading_zeroes(uint64_t input_num) {
+  return int(_lzcnt_u64(input_num));
+}
+
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+simdjson_inline unsigned __int64 count_ones(uint64_t input_num) {
+  // note: we do not support legacy 32-bit Windows
+  return __popcnt64(input_num);// Visual Studio wants two underscores
+}
+#else
+simdjson_inline long long int count_ones(uint64_t input_num) {
+  return _popcnt64(input_num);
+}
+#endif
+
+simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2,
+                                uint64_t *result) {
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+  return _addcarry_u64(0, value1, value2,
+                       reinterpret_cast<unsigned __int64 *>(result));
+#else
+  return __builtin_uaddll_overflow(value1, value2,
+                                   reinterpret_cast<unsigned long long *>(result));
+#endif
+}
+
+} // unnamed namespace
+} // namespace icelake
+} // namespace simdjson
+
+#endif // SIMDJSON_ICELAKE_BITMANIPULATION_H
+/* end file include/simdjson/icelake/bitmanipulation.h */
+/* begin file include/simdjson/icelake/bitmask.h */
+#ifndef SIMDJSON_ICELAKE_BITMASK_H
+#define SIMDJSON_ICELAKE_BITMASK_H
+
+namespace simdjson {
+namespace icelake {
+namespace {
+
+//
+// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered.
+//
+// For example, prefix_xor(00100100) == 00011100
+//
+simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) {
+  // There should be no such thing with a processor supporting avx2
+  // but not clmul.
+  __m128i all_ones = _mm_set1_epi8('\xFF');
+  __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0);
+  return _mm_cvtsi128_si64(result);
+}
+
+} // unnamed namespace
+} // namespace icelake
+} // namespace simdjson
+
+#endif // SIMDJSON_ICELAKE_BITMASK_H
+/* end file include/simdjson/icelake/bitmask.h */
+/* begin file include/simdjson/icelake/simd.h */
+#ifndef SIMDJSON_ICELAKE_SIMD_H
+#define SIMDJSON_ICELAKE_SIMD_H
+
+
+
+
+#if defined(__GNUC__) && !defined(__clang__)
+#if __GNUC__ == 8
+#define SIMDJSON_GCC8 1
+#endif //  __GNUC__ == 8
+#endif // defined(__GNUC__) && !defined(__clang__)
+
+#if SIMDJSON_GCC8
+/**
+ * GCC 8 fails to provide _mm512_set_epi8. We roll our own.
+ */
+inline __m512i _mm512_set_epi8(uint8_t a0, uint8_t a1, uint8_t a2, uint8_t a3, uint8_t a4, uint8_t a5, uint8_t a6, uint8_t a7, uint8_t a8, uint8_t a9, uint8_t a10, uint8_t a11, uint8_t a12, uint8_t a13, uint8_t a14, uint8_t a15, uint8_t a16, uint8_t a17, uint8_t a18, uint8_t a19, uint8_t a20, uint8_t a21, uint8_t a22, uint8_t a23, uint8_t a24, uint8_t a25, uint8_t a26, uint8_t a27, uint8_t a28, uint8_t a29, uint8_t a30, uint8_t a31, uint8_t a32, uint8_t a33, uint8_t a34, uint8_t a35, uint8_t a36, uint8_t a37, uint8_t a38, uint8_t a39, uint8_t a40, uint8_t a41, uint8_t a42, uint8_t a43, uint8_t a44, uint8_t a45, uint8_t a46, uint8_t a47, uint8_t a48, uint8_t a49, uint8_t a50, uint8_t a51, uint8_t a52, uint8_t a53, uint8_t a54, uint8_t a55, uint8_t a56, uint8_t a57, uint8_t a58, uint8_t a59, uint8_t a60, uint8_t a61, uint8_t a62, uint8_t a63) {
+  return _mm512_set_epi64(uint64_t(a7) + (uint64_t(a6) << 8) + (uint64_t(a5) << 16) + (uint64_t(a4) << 24) + (uint64_t(a3) << 32) + (uint64_t(a2) << 40) + (uint64_t(a1) << 48) + (uint64_t(a0) << 56),
+                          uint64_t(a15) + (uint64_t(a14) << 8) + (uint64_t(a13) << 16) + (uint64_t(a12) << 24) + (uint64_t(a11) << 32) + (uint64_t(a10) << 40) + (uint64_t(a9) << 48) + (uint64_t(a8) << 56),
+                          uint64_t(a23) + (uint64_t(a22) << 8) + (uint64_t(a21) << 16) + (uint64_t(a20) << 24) + (uint64_t(a19) << 32) + (uint64_t(a18) << 40) + (uint64_t(a17) << 48) + (uint64_t(a16) << 56),
+                          uint64_t(a31) + (uint64_t(a30) << 8) + (uint64_t(a29) << 16) + (uint64_t(a28) << 24) + (uint64_t(a27) << 32) + (uint64_t(a26) << 40) + (uint64_t(a25) << 48) + (uint64_t(a24) << 56),
+                          uint64_t(a39) + (uint64_t(a38) << 8) + (uint64_t(a37) << 16) + (uint64_t(a36) << 24) + (uint64_t(a35) << 32) + (uint64_t(a34) << 40) + (uint64_t(a33) << 48) + (uint64_t(a32) << 56),
+                          uint64_t(a47) + (uint64_t(a46) << 8) + (uint64_t(a45) << 16) + (uint64_t(a44) << 24) + (uint64_t(a43) << 32) + (uint64_t(a42) << 40) + (uint64_t(a41) << 48) + (uint64_t(a40) << 56),
+                          uint64_t(a55) + (uint64_t(a54) << 8) + (uint64_t(a53) << 16) + (uint64_t(a52) << 24) + (uint64_t(a51) << 32) + (uint64_t(a50) << 40) + (uint64_t(a49) << 48) + (uint64_t(a48) << 56),
+                          uint64_t(a63) + (uint64_t(a62) << 8) + (uint64_t(a61) << 16) + (uint64_t(a60) << 24) + (uint64_t(a59) << 32) + (uint64_t(a58) << 40) + (uint64_t(a57) << 48) + (uint64_t(a56) << 56));
+}
+#endif // SIMDJSON_GCC8
+
+
+
+namespace simdjson {
+namespace icelake {
+namespace {
+namespace simd {
+
+  // Forward-declared so they can be used by splat and friends.
+  template<typename Child>
+  struct base {
+    __m512i value;
+
+    // Zero constructor
+    simdjson_inline base() : value{__m512i()} {}
+
+    // Conversion from SIMD register
+    simdjson_inline base(const __m512i _value) : value(_value) {}
+
+    // Conversion to SIMD register
+    simdjson_inline operator const __m512i&() const { return this->value; }
+    simdjson_inline operator __m512i&() { return this->value; }
+
+    // Bit operations
+    simdjson_inline Child operator|(const Child other) const { return _mm512_or_si512(*this, other); }
+    simdjson_inline Child operator&(const Child other) const { return _mm512_and_si512(*this, other); }
+    simdjson_inline Child operator^(const Child other) const { return _mm512_xor_si512(*this, other); }
+    simdjson_inline Child bit_andnot(const Child other) const { return _mm512_andnot_si512(other, *this); }
+    simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast | other; return *this_cast; }
+    simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast & other; return *this_cast; }
+    simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast ^ other; return *this_cast; }
+  };
+
+  // Forward-declared so they can be used by splat and friends.
+  template<typename T>
+  struct simd8;
+
+  template<typename T, typename Mask=simd8<bool>>
+  struct base8: base<simd8<T>> {
+    typedef uint32_t bitmask_t;
+    typedef uint64_t bitmask2_t;
+
+    simdjson_inline base8() : base<simd8<T>>() {}
+    simdjson_inline base8(const __m512i _value) : base<simd8<T>>(_value) {}
+
+    friend simdjson_really_inline uint64_t operator==(const simd8<T> lhs, const simd8<T> rhs) {
+      return _mm512_cmpeq_epi8_mask(lhs, rhs);
+    }
+
+    static const int SIZE = sizeof(base<T>::value);
+
+    template<int N=1>
+    simdjson_inline simd8<T> prev(const simd8<T> prev_chunk) const {
+#if SIMDJSON_GCC8
+     // workaround for compilers unable to figure out that 16 - N is a constant (GCC 8)
+      constexpr int shift = 16 - N;
+      return _mm512_alignr_epi8(*this, _mm512_permutex2var_epi64(prev_chunk, _mm512_set_epi64(13, 12, 11, 10, 9, 8, 7, 6), *this), shift);
+#else
+      return _mm512_alignr_epi8(*this, _mm512_permutex2var_epi64(prev_chunk, _mm512_set_epi64(13, 12, 11, 10, 9, 8, 7, 6), *this), 16 - N);
+#endif
+    }
+  };
+
+  // SIMD byte mask type (returned by things like eq and gt)
+  template<>
+  struct simd8<bool>: base8<bool> {
+    static simdjson_inline simd8<bool> splat(bool _value) { return _mm512_set1_epi8(uint8_t(-(!!_value))); }
+
+    simdjson_inline simd8<bool>() : base8() {}
+    simdjson_inline simd8<bool>(const __m512i _value) : base8<bool>(_value) {}
+    // Splat constructor
+    simdjson_inline simd8<bool>(bool _value) : base8<bool>(splat(_value)) {}
+    simdjson_inline bool any() const { return !!_mm512_test_epi8_mask (*this, *this); }
+    simdjson_inline simd8<bool> operator~() const { return *this ^ true; }
+  };
+
+  template<typename T>
+  struct base8_numeric: base8<T> {
+    static simdjson_inline simd8<T> splat(T _value) { return _mm512_set1_epi8(_value); }
+    static simdjson_inline simd8<T> zero() { return _mm512_setzero_si512(); }
+    static simdjson_inline simd8<T> load(const T values[64]) {
+      return _mm512_loadu_si512(reinterpret_cast<const __m512i *>(values));
+    }
+    // Repeat 16 values as many times as necessary (usually for lookup tables)
+    static simdjson_inline simd8<T> repeat_16(
+      T v0,  T v1,  T v2,  T v3,  T v4,  T v5,  T v6,  T v7,
+      T v8,  T v9,  T v10, T v11, T v12, T v13, T v14, T v15
+    ) {
+      return simd8<T>(
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15,
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15,
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15,
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15
+      );
+    }
+
+    simdjson_inline base8_numeric() : base8<T>() {}
+    simdjson_inline base8_numeric(const __m512i _value) : base8<T>(_value) {}
+
+    // Store to array
+    simdjson_inline void store(T dst[64]) const { return _mm512_storeu_si512(reinterpret_cast<__m512i *>(dst), *this); }
+
+    // Addition/subtraction are the same for signed and unsigned
+    simdjson_inline simd8<T> operator+(const simd8<T> other) const { return _mm512_add_epi8(*this, other); }
+    simdjson_inline simd8<T> operator-(const simd8<T> other) const { return _mm512_sub_epi8(*this, other); }
+    simdjson_inline simd8<T>& operator+=(const simd8<T> other) { *this = *this + other; return *static_cast<simd8<T>*>(this); }
+    simdjson_inline simd8<T>& operator-=(const simd8<T> other) { *this = *this - other; return *static_cast<simd8<T>*>(this); }
+
+    // Override to distinguish from bool version
+    simdjson_inline simd8<T> operator~() const { return *this ^ 0xFFu; }
+
+    // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values)
+    template<typename L>
+    simdjson_inline simd8<L> lookup_16(simd8<L> lookup_table) const {
+      return _mm512_shuffle_epi8(lookup_table, *this);
+    }
+
+    // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset).
+    // Passing a 0 value for mask would be equivalent to writing out every byte to output.
+    // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes
+    // get written.
+    // Design consideration: it seems like a function with the
+    // signature simd8<L> compress(uint32_t mask) would be
+    // sensible, but the AVX ISA makes this kind of approach difficult.
+    template<typename L>
+    simdjson_inline void compress(uint64_t mask, L * output) const {
+      _mm512_mask_compressstoreu_epi8 (output,~mask,*this);
+    }
+
+    template<typename L>
+    simdjson_inline simd8<L> lookup_16(
+        L replace0,  L replace1,  L replace2,  L replace3,
+        L replace4,  L replace5,  L replace6,  L replace7,
+        L replace8,  L replace9,  L replace10, L replace11,
+        L replace12, L replace13, L replace14, L replace15) const {
+      return lookup_16(simd8<L>::repeat_16(
+        replace0,  replace1,  replace2,  replace3,
+        replace4,  replace5,  replace6,  replace7,
+        replace8,  replace9,  replace10, replace11,
+        replace12, replace13, replace14, replace15
+      ));
+    }
+  };
+
+  // Signed bytes
+  template<>
+  struct simd8<int8_t> : base8_numeric<int8_t> {
+    simdjson_inline simd8() : base8_numeric<int8_t>() {}
+    simdjson_inline simd8(const __m512i _value) : base8_numeric<int8_t>(_value) {}
+    // Splat constructor
+    simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {}
+    // Array constructor
+    simdjson_inline simd8(const int8_t values[64]) : simd8(load(values)) {}
+    // Member-by-member initialization
+    simdjson_inline simd8(
+      int8_t v0,  int8_t v1,  int8_t v2,  int8_t v3,  int8_t v4,  int8_t v5,  int8_t v6,  int8_t v7,
+      int8_t v8,  int8_t v9,  int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15,
+      int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23,
+      int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31,
+      int8_t v32, int8_t v33, int8_t v34, int8_t v35, int8_t v36, int8_t v37, int8_t v38, int8_t v39,
+      int8_t v40, int8_t v41, int8_t v42, int8_t v43, int8_t v44, int8_t v45, int8_t v46, int8_t v47,
+      int8_t v48, int8_t v49, int8_t v50, int8_t v51, int8_t v52, int8_t v53, int8_t v54, int8_t v55,
+      int8_t v56, int8_t v57, int8_t v58, int8_t v59, int8_t v60, int8_t v61, int8_t v62, int8_t v63
+    ) : simd8(_mm512_set_epi8(
+      v63, v62, v61, v60, v59, v58, v57, v56,
+      v55, v54, v53, v52, v51, v50, v49, v48,
+      v47, v46, v45, v44, v43, v42, v41, v40,
+      v39, v38, v37, v36, v35, v34, v33, v32,
+      v31, v30, v29, v28, v27, v26, v25, v24,
+      v23, v22, v21, v20, v19, v18, v17, v16,
+      v15, v14, v13, v12, v11, v10,  v9,  v8,
+       v7,  v6,  v5,  v4,  v3,  v2,  v1,  v0
+    )) {}
+
+    // Repeat 16 values as many times as necessary (usually for lookup tables)
+    simdjson_inline static simd8<int8_t> repeat_16(
+      int8_t v0,  int8_t v1,  int8_t v2,  int8_t v3,  int8_t v4,  int8_t v5,  int8_t v6,  int8_t v7,
+      int8_t v8,  int8_t v9,  int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15
+    ) {
+      return simd8<int8_t>(
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15,
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15,
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15,
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15
+      );
+    }
+
+    // Order-sensitive comparisons
+    simdjson_inline simd8<int8_t> max_val(const simd8<int8_t> other) const { return _mm512_max_epi8(*this, other); }
+    simdjson_inline simd8<int8_t> min_val(const simd8<int8_t> other) const { return _mm512_min_epi8(*this, other); }
+
+    simdjson_inline simd8<bool> operator>(const simd8<int8_t> other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(*this, other),_mm512_set1_epi8(uint8_t(0x80))); }
+    simdjson_inline simd8<bool> operator<(const simd8<int8_t> other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(other, *this),_mm512_set1_epi8(uint8_t(0x80))); }
+  };
+
+  // Unsigned bytes
+  template<>
+  struct simd8<uint8_t>: base8_numeric<uint8_t> {
+    simdjson_inline simd8() : base8_numeric<uint8_t>() {}
+    simdjson_inline simd8(const __m512i _value) : base8_numeric<uint8_t>(_value) {}
+    // Splat constructor
+    simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {}
+    // Array constructor
+    simdjson_inline simd8(const uint8_t values[64]) : simd8(load(values)) {}
+    // Member-by-member initialization
+    simdjson_inline simd8(
+      uint8_t v0,  uint8_t v1,  uint8_t v2,  uint8_t v3,  uint8_t v4,  uint8_t v5,  uint8_t v6,  uint8_t v7,
+      uint8_t v8,  uint8_t v9,  uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15,
+      uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23,
+      uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31,
+      uint8_t v32, uint8_t v33, uint8_t v34, uint8_t v35, uint8_t v36, uint8_t v37, uint8_t v38, uint8_t v39,
+      uint8_t v40, uint8_t v41, uint8_t v42, uint8_t v43, uint8_t v44, uint8_t v45, uint8_t v46, uint8_t v47,
+      uint8_t v48, uint8_t v49, uint8_t v50, uint8_t v51, uint8_t v52, uint8_t v53, uint8_t v54, uint8_t v55,
+      uint8_t v56, uint8_t v57, uint8_t v58, uint8_t v59, uint8_t v60, uint8_t v61, uint8_t v62, uint8_t v63
+    ) : simd8(_mm512_set_epi8(
+      v63, v62, v61, v60, v59, v58, v57, v56,
+      v55, v54, v53, v52, v51, v50, v49, v48,
+      v47, v46, v45, v44, v43, v42, v41, v40,
+      v39, v38, v37, v36, v35, v34, v33, v32,
+      v31, v30, v29, v28, v27, v26, v25, v24,
+      v23, v22, v21, v20, v19, v18, v17, v16,
+      v15, v14, v13, v12, v11, v10,  v9,  v8,
+       v7,  v6,  v5,  v4,  v3,  v2,  v1,  v0
+    )) {}
+
+    // Repeat 16 values as many times as necessary (usually for lookup tables)
+    simdjson_inline static simd8<uint8_t> repeat_16(
+      uint8_t v0,  uint8_t v1,  uint8_t v2,  uint8_t v3,  uint8_t v4,  uint8_t v5,  uint8_t v6,  uint8_t v7,
+      uint8_t v8,  uint8_t v9,  uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15
+    ) {
+      return simd8<uint8_t>(
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15,
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15,
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15,
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15
+      );
+    }
+
+    // Saturated math
+    simdjson_inline simd8<uint8_t> saturating_add(const simd8<uint8_t> other) const { return _mm512_adds_epu8(*this, other); }
+    simdjson_inline simd8<uint8_t> saturating_sub(const simd8<uint8_t> other) const { return _mm512_subs_epu8(*this, other); }
+
+    // Order-specific operations
+    simdjson_inline simd8<uint8_t> max_val(const simd8<uint8_t> other) const { return _mm512_max_epu8(*this, other); }
+    simdjson_inline simd8<uint8_t> min_val(const simd8<uint8_t> other) const { return _mm512_min_epu8(other, *this); }
+    // Same as >, but only guarantees true is nonzero (< guarantees true = -1)
+    simdjson_inline simd8<uint8_t> gt_bits(const simd8<uint8_t> other) const { return this->saturating_sub(other); }
+    // Same as <, but only guarantees true is nonzero (< guarantees true = -1)
+    simdjson_inline simd8<uint8_t> lt_bits(const simd8<uint8_t> other) const { return other.saturating_sub(*this); }
+    simdjson_inline uint64_t operator<=(const simd8<uint8_t> other) const { return other.max_val(*this) == other; }
+    simdjson_inline uint64_t operator>=(const simd8<uint8_t> other) const { return other.min_val(*this) == other; }
+    simdjson_inline simd8<bool> operator>(const simd8<uint8_t> other) const { return this->gt_bits(other).any_bits_set(); }
+    simdjson_inline simd8<bool> operator<(const simd8<uint8_t> other) const { return this->lt_bits(other).any_bits_set(); }
+
+    // Bit-specific operations
+    simdjson_inline simd8<bool> bits_not_set() const { return _mm512_mask_blend_epi8(*this == uint8_t(0), _mm512_set1_epi8(0), _mm512_set1_epi8(-1)); }
+    simdjson_inline simd8<bool> bits_not_set(simd8<uint8_t> bits) const { return (*this & bits).bits_not_set(); }
+    simdjson_inline simd8<bool> any_bits_set() const { return ~this->bits_not_set(); }
+    simdjson_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const { return ~this->bits_not_set(bits); }
+
+    simdjson_inline bool is_ascii() const { return _mm512_movepi8_mask(*this) == 0; }
+    simdjson_inline bool bits_not_set_anywhere() const {
+      return !_mm512_test_epi8_mask(*this, *this);
+    }
+    simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); }
+    simdjson_inline bool bits_not_set_anywhere(simd8<uint8_t> bits) const { return !_mm512_test_epi8_mask(*this, bits); }
+    simdjson_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const { return !bits_not_set_anywhere(bits); }
+    template<int N>
+    simdjson_inline simd8<uint8_t> shr() const { return simd8<uint8_t>(_mm512_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); }
+    template<int N>
+    simdjson_inline simd8<uint8_t> shl() const { return simd8<uint8_t>(_mm512_slli_epi16(*this, N)) & uint8_t(0xFFu << N); }
+    // Get one of the bits and make a bitmask out of it.
+    // e.g. value.get_bit<7>() gets the high bit
+    template<int N>
+    simdjson_inline uint64_t get_bit() const { return _mm512_movepi8_mask(_mm512_slli_epi16(*this, 7-N)); }
+  };
+
+  template<typename T>
+  struct simd8x64 {
+    static constexpr int NUM_CHUNKS = 64 / sizeof(simd8<T>);
+    static_assert(NUM_CHUNKS == 1, "Icelake kernel should use one register per 64-byte block.");
+    const simd8<T> chunks[NUM_CHUNKS];
+
+    simd8x64(const simd8x64<T>& o) = delete; // no copy allowed
+    simd8x64<T>& operator=(const simd8<T>& other) = delete; // no assignment allowed
+    simd8x64() = delete; // no default constructor allowed
+
+    simdjson_inline simd8x64(const simd8<T> chunk0, const simd8<T> chunk1) : chunks{chunk0, chunk1} {}
+    simdjson_inline simd8x64(const simd8<T> chunk0) : chunks{chunk0} {}
+    simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8<T>::load(ptr)} {}
+
+    simdjson_inline uint64_t compress(uint64_t mask, T * output) const {
+      this->chunks[0].compress(mask, output);
+      return 64 - count_ones(mask);
+    }
+
+    simdjson_inline void store(T ptr[64]) const {
+      this->chunks[0].store(ptr+sizeof(simd8<T>)*0);
+    }
+
+    simdjson_inline simd8<T> reduce_or() const {
+      return this->chunks[0];
+    }
+
+    simdjson_inline simd8x64<T> bit_or(const T m) const {
+      const simd8<T> mask = simd8<T>::splat(m);
+      return simd8x64<T>(
+        this->chunks[0] | mask
+      );
+    }
+
+    simdjson_inline uint64_t eq(const T m) const {
+      const simd8<T> mask = simd8<T>::splat(m);
+      return this->chunks[0] == mask;
+    }
+
+    simdjson_inline uint64_t eq(const simd8x64<uint8_t> &other) const {
+      return this->chunks[0] == other.chunks[0];
+    }
+
+    simdjson_inline uint64_t lteq(const T m) const {
+      const simd8<T> mask = simd8<T>::splat(m);
+      return this->chunks[0] <= mask;
+    }
+  }; // struct simd8x64<T>
+
+} // namespace simd
+
+} // unnamed namespace
+} // namespace icelake
+} // namespace simdjson
+
+#endif // SIMDJSON_ICELAKE_SIMD_H
+/* end file include/simdjson/icelake/simd.h */
+/* begin file include/simdjson/generic/jsoncharutils.h */
+
+namespace simdjson {
+namespace icelake {
+namespace {
+namespace jsoncharutils {
+
+// return non-zero if not a structural or whitespace char
+// zero otherwise
+simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) {
+  return internal::structural_or_whitespace_negated[c];
+}
+
+simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) {
+  return internal::structural_or_whitespace[c];
+}
+
+// returns a value with the high 16 bits set if not valid
+// otherwise returns the conversion of the 4 hex digits at src into the bottom
+// 16 bits of the 32-bit return register
+//
+// see
+// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/
+static inline uint32_t hex_to_u32_nocheck(
+    const uint8_t *src) { // strictly speaking, static inline is a C-ism
+  uint32_t v1 = internal::digit_to_val32[630 + src[0]];
+  uint32_t v2 = internal::digit_to_val32[420 + src[1]];
+  uint32_t v3 = internal::digit_to_val32[210 + src[2]];
+  uint32_t v4 = internal::digit_to_val32[0 + src[3]];
+  return v1 | v2 | v3 | v4;
+}
+
+// given a code point cp, writes to c
+// the utf-8 code, outputting the length in
+// bytes, if the length is zero, the code point
+// is invalid
+//
+// This can possibly be made faster using pdep
+// and clz and table lookups, but JSON documents
+// have few escaped code points, and the following
+// function looks cheap.
+//
+// Note: we assume that surrogates are treated separately
+//
+simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) {
+  if (cp <= 0x7F) {
+    c[0] = uint8_t(cp);
+    return 1; // ascii
+  }
+  if (cp <= 0x7FF) {
+    c[0] = uint8_t((cp >> 6) + 192);
+    c[1] = uint8_t((cp & 63) + 128);
+    return 2; // universal plane
+    //  Surrogates are treated elsewhere...
+    //} //else if (0xd800 <= cp && cp <= 0xdfff) {
+    //  return 0; // surrogates // could put assert here
+  } else if (cp <= 0xFFFF) {
+    c[0] = uint8_t((cp >> 12) + 224);
+    c[1] = uint8_t(((cp >> 6) & 63) + 128);
+    c[2] = uint8_t((cp & 63) + 128);
+    return 3;
+  } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this
+                               // is not needed
+    c[0] = uint8_t((cp >> 18) + 240);
+    c[1] = uint8_t(((cp >> 12) & 63) + 128);
+    c[2] = uint8_t(((cp >> 6) & 63) + 128);
+    c[3] = uint8_t((cp & 63) + 128);
+    return 4;
+  }
+  // will return 0 when the code point was too large.
+  return 0; // bad r
+}
+
+#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm
+// this is a slow emulation routine for 32-bit
+//
+static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) {
+  return x * (uint64_t)y;
+}
+static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) {
+  uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd);
+  uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd);
+  uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32));
+  uint64_t adbc_carry = !!(adbc < ad);
+  uint64_t lo = bd + (adbc << 32);
+  *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) +
+        (adbc_carry << 32) + !!(lo < bd);
+  return lo;
+}
+#endif
+
+using internal::value128;
+
+simdjson_inline value128 full_multiplication(uint64_t value1, uint64_t value2) {
+  value128 answer;
+#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS)
+#ifdef _M_ARM64
+  // ARM64 has native support for 64-bit multiplications, no need to emultate
+  answer.high = __umulh(value1, value2);
+  answer.low = value1 * value2;
+#else
+  answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64
+#endif // _M_ARM64
+#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS)
+  __uint128_t r = (static_cast<__uint128_t>(value1)) * value2;
+  answer.low = uint64_t(r);
+  answer.high = uint64_t(r >> 64);
+#endif
+  return answer;
+}
+
+} // namespace jsoncharutils
+} // unnamed namespace
+} // namespace icelake
+} // namespace simdjson
+/* end file include/simdjson/generic/jsoncharutils.h */
+/* begin file include/simdjson/generic/atomparsing.h */
+namespace simdjson {
+namespace icelake {
+namespace {
+/// @private
+namespace atomparsing {
+
+// The string_to_uint32 is exclusively used to map literal strings to 32-bit values.
+// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot
+// be certain that the character pointer will be properly aligned.
+// You might think that using memcpy makes this function expensive, but you'd be wrong.
+// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false");
+// to the compile-time constant 1936482662.
+simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; }
+
+
+// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive.
+// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about.
+simdjson_warn_unused
+simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) {
+  uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++)
+  static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes");
+  std::memcpy(&srcval, src, sizeof(uint32_t));
+  return srcval ^ string_to_uint32(atom);
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_true_atom(const uint8_t *src) {
+  return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0;
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) {
+  if (len > 4) { return is_valid_true_atom(src); }
+  else if (len == 4) { return !str4ncmp(src, "true"); }
+  else { return false; }
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_false_atom(const uint8_t *src) {
+  return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0;
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) {
+  if (len > 5) { return is_valid_false_atom(src); }
+  else if (len == 5) { return !str4ncmp(src+1, "alse"); }
+  else { return false; }
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_null_atom(const uint8_t *src) {
+  return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0;
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) {
+  if (len > 4) { return is_valid_null_atom(src); }
+  else if (len == 4) { return !str4ncmp(src, "null"); }
+  else { return false; }
+}
+
+} // namespace atomparsing
+} // unnamed namespace
+} // namespace icelake
+} // namespace simdjson
+/* end file include/simdjson/generic/atomparsing.h */
+/* begin file include/simdjson/icelake/stringparsing.h */
+#ifndef SIMDJSON_ICELAKE_STRINGPARSING_H
+#define SIMDJSON_ICELAKE_STRINGPARSING_H
+
+
+namespace simdjson {
+namespace icelake {
+namespace {
+
+using namespace simd;
+
+// Holds backslashes and quotes locations.
+struct backslash_and_quote {
+public:
+  static constexpr uint32_t BYTES_PROCESSED = 32;
+  simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst);
+
+  simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; }
+  simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; }
+  simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); }
+  simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); }
+
+  uint64_t bs_bits;
+  uint64_t quote_bits;
+}; // struct backslash_and_quote
+
+simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) {
+  // this can read up to 15 bytes beyond the buffer size, but we require
+  // SIMDJSON_PADDING of padding
+  static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes");
+  simd8<uint8_t> v(src);
+  // store to dest unconditionally - we can overwrite the bits we don't like later
+  v.store(dst);
+  return {
+      static_cast<uint64_t>(v == '\\'), // bs_bits
+      static_cast<uint64_t>(v == '"'), // quote_bits
+  };
+}
+
+} // unnamed namespace
+} // namespace icelake
+} // namespace simdjson
+
+#endif // SIMDJSON_ICELAKE_STRINGPARSING_H
+/* end file include/simdjson/icelake/stringparsing.h */
+/* begin file include/simdjson/icelake/numberparsing.h */
+#ifndef SIMDJSON_ICELAKE_NUMBERPARSING_H
+#define SIMDJSON_ICELAKE_NUMBERPARSING_H
+
+namespace simdjson {
+namespace icelake {
+namespace {
+
+static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) {
+  // this actually computes *16* values so we are being wasteful.
+  const __m128i ascii0 = _mm_set1_epi8('0');
+  const __m128i mul_1_10 =
+      _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1);
+  const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1);
+  const __m128i mul_1_10000 =
+      _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1);
+  const __m128i input = _mm_sub_epi8(
+      _mm_loadu_si128(reinterpret_cast<const __m128i *>(chars)), ascii0);
+  const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10);
+  const __m128i t2 = _mm_madd_epi16(t1, mul_1_100);
+  const __m128i t3 = _mm_packus_epi32(t2, t2);
+  const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000);
+  return _mm_cvtsi128_si32(
+      t4); // only captures the sum of the first 8 digits, drop the rest
+}
+
+} // unnamed namespace
+} // namespace icelake
+} // namespace simdjson
+
+#define SIMDJSON_SWAR_NUMBER_PARSING 1
+
+/* begin file include/simdjson/generic/numberparsing.h */
+#include <limits>
+
+namespace simdjson {
+namespace icelake {
+
+namespace ondemand {
+/**
+ * The type of a JSON number
+ */
+enum class number_type {
+    floating_point_number=1, /// a binary64 number
+    signed_integer,          /// a signed integer that fits in a 64-bit word using two's complement
+    unsigned_integer         /// a positive integer larger or equal to 1<<63
+};
+}
+
+namespace {
+/// @private
+namespace numberparsing {
+
+
+
+#ifdef JSON_TEST_NUMBERS
+#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR)
+#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE)))
+#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE)))
+#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE)))
+#else
+#define INVALID_NUMBER(SRC) (NUMBER_ERROR)
+#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE))
+#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE))
+#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE))
+#endif
+
+namespace {
+// Convert a mantissa, an exponent and a sign bit into an ieee64 double.
+// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable).
+// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed.
+simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) {
+    double d;
+    mantissa &= ~(1ULL << 52);
+    mantissa |= real_exponent << 52;
+    mantissa |= ((static_cast<uint64_t>(negative)) << 63);
+    std::memcpy(&d, &mantissa, sizeof(d));
+    return d;
+}
+}
+// Attempts to compute i * 10^(power) exactly; and if "negative" is
+// true, negate the result.
+// This function will only work in some cases, when it does not work, success is
+// set to false. This should work *most of the time* (like 99% of the time).
+// We assume that power is in the [smallest_power,
+// largest_power] interval: the caller is responsible for this check.
+simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) {
+  // we start with a fast path
+  // It was described in
+  // Clinger WD. How to read floating point numbers accurately.
+  // ACM SIGPLAN Notices. 1990
+#ifndef FLT_EVAL_METHOD
+#error "FLT_EVAL_METHOD should be defined, please include cfloat."
+#endif
+#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
+  // We cannot be certain that x/y is rounded to nearest.
+  if (0 <= power && power <= 22 && i <= 9007199254740991) {
+#else
+  if (-22 <= power && power <= 22 && i <= 9007199254740991) {
+#endif
+    // convert the integer into a double. This is lossless since
+    // 0 <= i <= 2^53 - 1.
+    d = double(i);
+    //
+    // The general idea is as follows.
+    // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then
+    // 1) Both s and p can be represented exactly as 64-bit floating-point
+    // values
+    // (binary64).
+    // 2) Because s and p can be represented exactly as floating-point values,
+    // then s * p
+    // and s / p will produce correctly rounded values.
+    //
+    if (power < 0) {
+      d = d / simdjson::internal::power_of_ten[-power];
+    } else {
+      d = d * simdjson::internal::power_of_ten[power];
+    }
+    if (negative) {
+      d = -d;
+    }
+    return true;
+  }
+  // When 22 < power && power <  22 + 16, we could
+  // hope for another, secondary fast path.  It was
+  // described by David M. Gay in  "Correctly rounded
+  // binary-decimal and decimal-binary conversions." (1990)
+  // If you need to compute i * 10^(22 + x) for x < 16,
+  // first compute i * 10^x, if you know that result is exact
+  // (e.g., when i * 10^x < 2^53),
+  // then you can still proceed and do (i * 10^x) * 10^22.
+  // Is this worth your time?
+  // You need  22 < power *and* power <  22 + 16 *and* (i * 10^(x-22) < 2^53)
+  // for this second fast path to work.
+  // If you you have 22 < power *and* power <  22 + 16, and then you
+  // optimistically compute "i * 10^(x-22)", there is still a chance that you
+  // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of
+  // this optimization maybe less common than we would like. Source:
+  // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/
+  // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html
+
+  // The fast path has now failed, so we are failing back on the slower path.
+
+  // In the slow path, we need to adjust i so that it is > 1<<63 which is always
+  // possible, except if i == 0, so we handle i == 0 separately.
+  if(i == 0) {
+    d = negative ? -0.0 : 0.0;
+    return true;
+  }
+
+
+  // The exponent is 1024 + 63 + power
+  //     + floor(log(5**power)/log(2)).
+  // The 1024 comes from the ieee64 standard.
+  // The 63 comes from the fact that we use a 64-bit word.
+  //
+  // Computing floor(log(5**power)/log(2)) could be
+  // slow. Instead we use a fast function.
+  //
+  // For power in (-400,350), we have that
+  // (((152170 + 65536) * power ) >> 16);
+  // is equal to
+  //  floor(log(5**power)/log(2)) + power when power >= 0
+  // and it is equal to
+  //  ceil(log(5**-power)/log(2)) + power when power < 0
+  //
+  // The 65536 is (1<<16) and corresponds to
+  // (65536 * power) >> 16 ---> power
+  //
+  // ((152170 * power ) >> 16) is equal to
+  // floor(log(5**power)/log(2))
+  //
+  // Note that this is not magic: 152170/(1<<16) is
+  // approximatively equal to log(5)/log(2).
+  // The 1<<16 value is a power of two; we could use a
+  // larger power of 2 if we wanted to.
+  //
+  int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63;
+
+
+  // We want the most significant bit of i to be 1. Shift if needed.
+  int lz = leading_zeroes(i);
+  i <<= lz;
+
+
+  // We are going to need to do some 64-bit arithmetic to get a precise product.
+  // We use a table lookup approach.
+  // It is safe because
+  // power >= smallest_power
+  // and power <= largest_power
+  // We recover the mantissa of the power, it has a leading 1. It is always
+  // rounded down.
+  //
+  // We want the most significant 64 bits of the product. We know
+  // this will be non-zero because the most significant bit of i is
+  // 1.
+  const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power);
+  // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.)
+  //
+  // The full_multiplication function computes the 128-bit product of two 64-bit words
+  // with a returned value of type value128 with a "low component" corresponding to the
+  // 64-bit least significant bits of the product and with a "high component" corresponding
+  // to the 64-bit most significant bits of the product.
+  simdjson::internal::value128 firstproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index]);
+  // Both i and power_of_five_128[index] have their most significant bit set to 1 which
+  // implies that the either the most or the second most significant bit of the product
+  // is 1. We pack values in this manner for efficiency reasons: it maximizes the use
+  // we make of the product. It also makes it easy to reason about the product: there
+  // is 0 or 1 leading zero in the product.
+
+  // Unless the least significant 9 bits of the high (64-bit) part of the full
+  // product are all 1s, then we know that the most significant 55 bits are
+  // exact and no further work is needed. Having 55 bits is necessary because
+  // we need 53 bits for the mantissa but we have to have one rounding bit and
+  // we can waste a bit if the most significant bit of the product is zero.
+  if((firstproduct.high & 0x1FF) == 0x1FF) {
+    // We want to compute i * 5^q, but only care about the top 55 bits at most.
+    // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing
+    // the full computation is wasteful. So we do what is called a "truncated
+    // multiplication".
+    // We take the most significant 64-bits, and we put them in
+    // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q
+    // to the desired approximation using one multiplication. Sometimes it does not suffice.
+    // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and
+    // then we get a better approximation to i * 5^q. In very rare cases, even that
+    // will not suffice, though it is seemingly very hard to find such a scenario.
+    //
+    // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat
+    // more complicated.
+    //
+    // There is an extra layer of complexity in that we need more than 55 bits of
+    // accuracy in the round-to-even scenario.
+    //
+    // The full_multiplication function computes the 128-bit product of two 64-bit words
+    // with a returned value of type value128 with a "low component" corresponding to the
+    // 64-bit least significant bits of the product and with a "high component" corresponding
+    // to the 64-bit most significant bits of the product.
+    simdjson::internal::value128 secondproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]);
+    firstproduct.low += secondproduct.high;
+    if(secondproduct.high > firstproduct.low) { firstproduct.high++; }
+    // At this point, we might need to add at most one to firstproduct, but this
+    // can only change the value of firstproduct.high if firstproduct.low is maximal.
+    if(simdjson_unlikely(firstproduct.low  == 0xFFFFFFFFFFFFFFFF)) {
+      // This is very unlikely, but if so, we need to do much more work!
+      return false;
+    }
+  }
+  uint64_t lower = firstproduct.low;
+  uint64_t upper = firstproduct.high;
+  // The final mantissa should be 53 bits with a leading 1.
+  // We shift it so that it occupies 54 bits with a leading 1.
+  ///////
+  uint64_t upperbit = upper >> 63;
+  uint64_t mantissa = upper >> (upperbit + 9);
+  lz += int(1 ^ upperbit);
+
+  // Here we have mantissa < (1<<54).
+  int64_t real_exponent = exponent - lz;
+  if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal?
+    // Here have that real_exponent <= 0 so -real_exponent >= 0
+    if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure.
+      d = negative ? -0.0 : 0.0;
+      return true;
+    }
+    // next line is safe because -real_exponent + 1 < 0
+    mantissa >>= -real_exponent + 1;
+    // Thankfully, we can't have both "round-to-even" and subnormals because
+    // "round-to-even" only occurs for powers close to 0.
+    mantissa += (mantissa & 1); // round up
+    mantissa >>= 1;
+    // There is a weird scenario where we don't have a subnormal but just.
+    // Suppose we start with 2.2250738585072013e-308, we end up
+    // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal
+    // whereas 0x40000000000000 x 2^-1023-53  is normal. Now, we need to round
+    // up 0x3fffffffffffff x 2^-1023-53  and once we do, we are no longer
+    // subnormal, but we can only know this after rounding.
+    // So we only declare a subnormal if we are smaller than the threshold.
+    real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1;
+    d = to_double(mantissa, real_exponent, negative);
+    return true;
+  }
+  // We have to round to even. The "to even" part
+  // is only a problem when we are right in between two floats
+  // which we guard against.
+  // If we have lots of trailing zeros, we may fall right between two
+  // floating-point values.
+  //
+  // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54]
+  // times a power of two. That is, it is right between a number with binary significand
+  // m and another number with binary significand m+1; and it must be the case
+  // that it cannot be represented by a float itself.
+  //
+  // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p.
+  // Recall that 10^q = 5^q * 2^q.
+  // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that
+  //  5^23 <=  2^54 and it is the last power of five to qualify, so q <= 23.
+  // When q<0, we have  w  >=  (2m+1) x 5^{-q}.  We must have that w<2^{64} so
+  // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have
+  // 2^{53} x 5^{-q} < 2^{64}.
+  // Hence we have 5^{-q} < 2^{11}$ or q>= -4.
+  //
+  // We require lower <= 1 and not lower == 0 because we could not prove that
+  // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test.
+  if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) {
+    if((mantissa  << (upperbit + 64 - 53 - 2)) ==  upper) {
+      mantissa &= ~1;             // flip it so that we do not round up
+    }
+  }
+
+  mantissa += mantissa & 1;
+  mantissa >>= 1;
+
+  // Here we have mantissa < (1<<53), unless there was an overflow
+  if (mantissa >= (1ULL << 53)) {
+    //////////
+    // This will happen when parsing values such as 7.2057594037927933e+16
+    ////////
+    mantissa = (1ULL << 52);
+    real_exponent++;
+  }
+  mantissa &= ~(1ULL << 52);
+  // we have to check that real_exponent is in range, otherwise we bail out
+  if (simdjson_unlikely(real_exponent > 2046)) {
+    // We have an infinite value!!! We could actually throw an error here if we could.
+    return false;
+  }
+  d = to_double(mantissa, real_exponent, negative);
+  return true;
+}
+
+// We call a fallback floating-point parser that might be slow. Note
+// it will accept JSON numbers, but the JSON spec. is more restrictive so
+// before you call parse_float_fallback, you need to have validated the input
+// string with the JSON grammar.
+// It will return an error (false) if the parsed number is infinite.
+// The string parsing itself always succeeds. We know that there is at least
+// one digit.
+static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) {
+  *outDouble = simdjson::internal::from_chars(reinterpret_cast<const char *>(ptr));
+  // We do not accept infinite values.
+
+  // Detecting finite values in a portable manner is ridiculously hard, ideally
+  // we would want to do:
+  // return !std::isfinite(*outDouble);
+  // but that mysteriously fails under legacy/old libc++ libraries, see
+  // https://github.com/simdjson/simdjson/issues/1286
+  //
+  // Therefore, fall back to this solution (the extra parens are there
+  // to handle that max may be a macro on windows).
+  return !(*outDouble > (std::numeric_limits<double>::max)() || *outDouble < std::numeric_limits<double>::lowest());
+}
+static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) {
+  *outDouble = simdjson::internal::from_chars(reinterpret_cast<const char *>(ptr), reinterpret_cast<const char *>(end_ptr));
+  // We do not accept infinite values.
+
+  // Detecting finite values in a portable manner is ridiculously hard, ideally
+  // we would want to do:
+  // return !std::isfinite(*outDouble);
+  // but that mysteriously fails under legacy/old libc++ libraries, see
+  // https://github.com/simdjson/simdjson/issues/1286
+  //
+  // Therefore, fall back to this solution (the extra parens are there
+  // to handle that max may be a macro on windows).
+  return !(*outDouble > (std::numeric_limits<double>::max)() || *outDouble < std::numeric_limits<double>::lowest());
+}
+
+// check quickly whether the next 8 chars are made of digits
+// at a glance, it looks better than Mula's
+// http://0x80.pl/articles/swar-digits-validate.html
+simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) {
+  uint64_t val;
+  // this can read up to 7 bytes beyond the buffer size, but we require
+  // SIMDJSON_PADDING of padding
+  static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7");
+  std::memcpy(&val, chars, 8);
+  // a branchy method might be faster:
+  // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030)
+  //  && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) ==
+  //  0x3030303030303030);
+  return (((val & 0xF0F0F0F0F0F0F0F0) |
+           (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) ==
+          0x3333333333333333);
+}
+
+template<typename W>
+error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) {
+  double d;
+  if (parse_float_fallback(src, &d)) {
+    writer.append_double(d);
+    return SUCCESS;
+  }
+  return INVALID_NUMBER(src);
+}
+
+template<typename I>
+SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later
+simdjson_inline bool parse_digit(const uint8_t c, I &i) {
+  const uint8_t digit = static_cast<uint8_t>(c - '0');
+  if (digit > 9) {
+    return false;
+  }
+  // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication
+  i = 10 * i + digit; // might overflow, we will handle the overflow later
+  return true;
+}
+
+simdjson_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) {
+  // we continue with the fiction that we have an integer. If the
+  // floating point number is representable as x * 10^z for some integer
+  // z that fits in 53 bits, then we will be able to convert back the
+  // the integer into a float in a lossless manner.
+  const uint8_t *const first_after_period = p;
+
+#ifdef SIMDJSON_SWAR_NUMBER_PARSING
+#if SIMDJSON_SWAR_NUMBER_PARSING
+  // this helps if we have lots of decimals!
+  // this turns out to be frequent enough.
+  if (is_made_of_eight_digits_fast(p)) {
+    i = i * 100000000 + parse_eight_digits_unrolled(p);
+    p += 8;
+  }
+#endif // SIMDJSON_SWAR_NUMBER_PARSING
+#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING
+  // Unrolling the first digit makes a small difference on some implementations (e.g. westmere)
+  if (parse_digit(*p, i)) { ++p; }
+  while (parse_digit(*p, i)) { p++; }
+  exponent = first_after_period - p;
+  // Decimal without digits (123.) is illegal
+  if (exponent == 0) {
+    return INVALID_NUMBER(src);
+  }
+  return SUCCESS;
+}
+
+simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) {
+  // Exp Sign: -123.456e[-]78
+  bool neg_exp = ('-' == *p);
+  if (neg_exp || '+' == *p) { p++; } // Skip + as well
+
+  // Exponent: -123.456e-[78]
+  auto start_exp = p;
+  int64_t exp_number = 0;
+  while (parse_digit(*p, exp_number)) { ++p; }
+  // It is possible for parse_digit to overflow.
+  // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN.
+  // Thus we *must* check for possible overflow before we negate exp_number.
+
+  // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into
+  // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may
+  // not oblige and may, in fact, generate two distinct paths in any case. It might be
+  // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off
+  // instructions for a simdjson_likely branch, an unconclusive gain.
+
+  // If there were no digits, it's an error.
+  if (simdjson_unlikely(p == start_exp)) {
+    return INVALID_NUMBER(src);
+  }
+  // We have a valid positive exponent in exp_number at this point, except that
+  // it may have overflowed.
+
+  // If there were more than 18 digits, we may have overflowed the integer. We have to do
+  // something!!!!
+  if (simdjson_unlikely(p > start_exp+18)) {
+    // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow
+    while (*start_exp == '0') { start_exp++; }
+    // 19 digits could overflow int64_t and is kind of absurd anyway. We don't
+    // support exponents smaller than -999,999,999,999,999,999 and bigger
+    // than 999,999,999,999,999,999.
+    // We can truncate.
+    // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before
+    // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could
+    // truncate at 324.
+    // Note that there is no reason to fail per se at this point in time.
+    // E.g., 0e999999999999999999999 is a fine number.
+    if (p > start_exp+18) { exp_number = 999999999999999999; }
+  }
+  // At this point, we know that exp_number is a sane, positive, signed integer.
+  // It is <= 999,999,999,999,999,999. As long as 'exponent' is in
+  // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent'
+  // is bounded in magnitude by the size of the JSON input, we are fine in this universe.
+  // To sum it up: the next line should never overflow.
+  exponent += (neg_exp ? -exp_number : exp_number);
+  return SUCCESS;
+}
+
+simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) {
+  // It is possible that the integer had an overflow.
+  // We have to handle the case where we have 0.0000somenumber.
+  const uint8_t *start = start_digits;
+  while ((*start == '0') || (*start == '.')) { ++start; }
+  // we over-decrement by one when there is a '.'
+  return digit_count - size_t(start - start_digits);
+}
+
+template<typename W>
+simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) {
+  // If we frequently had to deal with long strings of digits,
+  // we could extend our code by using a 128-bit integer instead
+  // of a 64-bit integer. However, this is uncommon in practice.
+  //
+  // 9999999999999999999 < 2**64 so we can accommodate 19 digits.
+  // If we have a decimal separator, then digit_count - 1 is the number of digits, but we
+  // may not have a decimal separator!
+  if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) {
+    // Ok, chances are good that we had an overflow!
+    // this is almost never going to get called!!!
+    // we start anew, going slowly!!!
+    // This will happen in the following examples:
+    // 10000000000000000000000000000000000000000000e+308
+    // 3.1415926535897932384626433832795028841971693993751
+    //
+    // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens
+    // because slow_float_parsing is a non-inlined function. If we passed our writer reference to
+    // it, it would force it to be stored in memory, preventing the compiler from picking it apart
+    // and putting into registers. i.e. if we pass it as reference, it gets slow.
+    // This is what forces the skip_double, as well.
+    error_code error = slow_float_parsing(src, writer);
+    writer.skip_double();
+    return error;
+  }
+  // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other
+  // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331
+  // To future reader: we'd love if someone found a better way, or at least could explain this result!
+  if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) {
+    //
+    // Important: smallest_power is such that it leads to a zero value.
+    // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero
+    // so something x 10^-343 goes to zero, but not so with  something x 10^-342.
+    static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough");
+    //
+    if((exponent < simdjson::internal::smallest_power) || (i == 0)) {
+      // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero
+      WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer);
+      return SUCCESS;
+    } else { // (exponent > largest_power) and (i != 0)
+      // We have, for sure, an infinite value and simdjson refuses to parse infinite values.
+      return INVALID_NUMBER(src);
+    }
+  }
+  double d;
+  if (!compute_float_64(exponent, i, negative, d)) {
+    // we are almost never going to get here.
+    if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); }
+  }
+  WRITE_DOUBLE(d, src, writer);
+  return SUCCESS;
+}
+
+// for performance analysis, it is sometimes  useful to skip parsing
+#ifdef SIMDJSON_SKIPNUMBERPARSING
+
+template<typename W>
+simdjson_inline error_code parse_number(const uint8_t *const, W &writer) {
+  writer.append_s64(0);        // always write zero
+  return SUCCESS;              // always succeeds
+}
+
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer_in_string(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<double> parse_double_in_string(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept  { return false; }
+simdjson_unused simdjson_inline simdjson_result<bool> is_integer(const uint8_t * src) noexcept  { return false; }
+simdjson_unused simdjson_inline simdjson_result<ondemand::number_type> get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; }
+#else
+
+// parse the number at src
+// define JSON_TEST_NUMBERS for unit testing
+//
+// It is assumed that the number is followed by a structural ({,},],[) character
+// or a white space character. If that is not the case (e.g., when the JSON
+// document is made of a single number), then it is necessary to copy the
+// content and append a space before calling this function.
+//
+// Our objective is accurate parsing (ULP of 0) at high speed.
+template<typename W>
+simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) {
+
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  const uint8_t *p = src + uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); }
+
+  //
+  // Handle floats if there is a . or e (or both)
+  //
+  int64_t exponent = 0;
+  bool is_float = false;
+  if ('.' == *p) {
+    is_float = true;
+    ++p;
+    SIMDJSON_TRY( parse_decimal(src, p, i, exponent) );
+    digit_count = int(p - start_digits); // used later to guard against overflows
+  }
+  if (('e' == *p) || ('E' == *p)) {
+    is_float = true;
+    ++p;
+    SIMDJSON_TRY( parse_exponent(src, p, exponent) );
+  }
+  if (is_float) {
+    const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p);
+    SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) );
+    if (dirty_end) { return INVALID_NUMBER(src); }
+    return SUCCESS;
+  }
+
+  // The longest negative 64-bit number is 19 digits.
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  size_t longest_digit_count = negative ? 19 : 20;
+  if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); }
+  if (digit_count == longest_digit_count) {
+    if (negative) {
+      // Anything negative above INT64_MAX+1 is invalid
+      if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src);  }
+      WRITE_INTEGER(~i+1, src, writer);
+      if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); }
+      return SUCCESS;
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    }  else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); }
+  }
+
+  // Write unsigned if it doesn't fit in a signed integer.
+  if (i > uint64_t(INT64_MAX)) {
+    WRITE_UNSIGNED(i, src, writer);
+  } else {
+    WRITE_INTEGER(negative ? (~i+1) : i, src, writer);
+  }
+  if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); }
+  return SUCCESS;
+}
+
+// Inlineable functions
+namespace {
+
+// This table can be used to characterize the final character of an integer
+// string. For JSON structural character and allowable white space characters,
+// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise
+// we return NUMBER_ERROR.
+// Optimization note: we could easily reduce the size of the table by half (to 128)
+// at the cost of an extra branch.
+// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits):
+static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast");
+static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast");
+static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast");
+
+const uint8_t integer_string_finisher[256] = {
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, SUCCESS,
+    SUCCESS,      NUMBER_ERROR,   NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   SUCCESS,      NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, SUCCESS,
+    NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, SUCCESS,        NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    SUCCESS,      NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR};
+
+// Parse any number from 0 to 18,446,744,073,709,551,615
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src) noexcept {
+  const uint8_t *p = src;
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > 20))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+
+  if (digit_count == 20) {
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
+  }
+
+  return i;
+}
+
+
+// Parse any number from 0 to 18,446,744,073,709,551,615
+// Never read at src_end or beyond
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept {
+  const uint8_t *p = src;
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while ((p != src_end) && parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > 20))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+
+  if (digit_count == 20) {
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
+  }
+
+  return i;
+}
+
+// Parse any number from 0 to 18,446,744,073,709,551,615
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned_in_string(const uint8_t * const src) noexcept {
+  const uint8_t *p = src + 1;
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > 20))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if (*p != '"') { return NUMBER_ERROR; }
+
+  if (digit_count == 20) {
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    // Note: we use src[1] and not src[0] because src[0] is the quote character in this
+    // instance.
+    if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
+  }
+
+  return i;
+}
+
+// Parse any number from  -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t *src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  const uint8_t *p = src + uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // We go from
+  // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+  // so we can never represent numbers that have more than 19 digits.
+  size_t longest_digit_count = 19;
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > longest_digit_count))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+  // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
+  // Performance note: This check is only needed when digit_count == longest_digit_count but it is
+  // so cheap that we might as well always make it.
+  if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
+  return negative ? (~i+1) : i;
+}
+
+// Parse any number from  -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+// Never read at src_end or beyond
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept {
+  //
+  // Check for minus sign
+  //
+  if(src == src_end) { return NUMBER_ERROR; }
+  bool negative = (*src == '-');
+  const uint8_t *p = src + uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while ((p != src_end) && parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // We go from
+  // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+  // so we can never represent numbers that have more than 19 digits.
+  size_t longest_digit_count = 19;
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > longest_digit_count))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+  // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
+  // Performance note: This check is only needed when digit_count == longest_digit_count but it is
+  // so cheap that we might as well always make it.
+  if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
+  return negative ? (~i+1) : i;
+}
+
+// Parse any number from  -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer_in_string(const uint8_t *src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*(src + 1) == '-');
+  src += uint8_t(negative) + 1;
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = src;
+  uint64_t i = 0;
+  while (parse_digit(*src, i)) { src++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(src - start_digits);
+  // We go from
+  // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+  // so we can never represent numbers that have more than 19 digits.
+  size_t longest_digit_count = 19;
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > longest_digit_count))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*src)) {
+  //  return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if(*src != '"') { return NUMBER_ERROR; }
+  // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
+  // Performance note: This check is only needed when digit_count == longest_digit_count but it is
+  // so cheap that we might as well always make it.
+  if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
+  return negative ? (~i+1) : i;
+}
+
+simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  uint64_t i = 0;
+  const uint8_t *p = src;
+  p += parse_digit(*p, i);
+  bool leading_zero = (i == 0);
+  while (parse_digit(*p, i)) { p++; }
+  // no integer digits, or 0123 (zero must be solo)
+  if ( p == src ) { return INCORRECT_TYPE; }
+  if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; }
+
+  //
+  // Parse the decimal part.
+  //
+  int64_t exponent = 0;
+  bool overflow;
+  if (simdjson_likely(*p == '.')) {
+    p++;
+    const uint8_t *start_decimal_digits = p;
+    if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits
+    p++;
+    while (parse_digit(*p, i)) { p++; }
+    exponent = -(p - start_decimal_digits);
+
+    // Overflow check. More than 19 digits (minus the decimal) may be overflow.
+    overflow = p-src-1 > 19;
+    if (simdjson_unlikely(overflow && leading_zero)) {
+      // Skip leading 0.00000 and see if it still overflows
+      const uint8_t *start_digits = src + 2;
+      while (*start_digits == '0') { start_digits++; }
+      overflow = start_digits-src > 19;
+    }
+  } else {
+    overflow = p-src > 19;
+  }
+
+  //
+  // Parse the exponent
+  //
+  if (*p == 'e' || *p == 'E') {
+    p++;
+    bool exp_neg = *p == '-';
+    p += exp_neg || *p == '+';
+
+    uint64_t exp = 0;
+    const uint8_t *start_exp_digits = p;
+    while (parse_digit(*p, exp)) { p++; }
+    // no exp digits, or 20+ exp digits
+    if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }
+
+    exponent += exp_neg ? 0-exp : exp;
+  }
+
+  if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; }
+
+  overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power;
+
+  //
+  // Assemble (or slow-parse) the float
+  //
+  double d;
+  if (simdjson_likely(!overflow)) {
+    if (compute_float_64(exponent, i, negative, d)) { return d; }
+  }
+  if (!parse_float_fallback(src - uint8_t(negative), &d)) {
+    return NUMBER_ERROR;
+  }
+  return d;
+}
+
+simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept {
+  return (*src == '-');
+}
+
+simdjson_unused simdjson_inline simdjson_result<bool> is_integer(const uint8_t * src) noexcept {
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+  const uint8_t *p = src;
+  while(static_cast<uint8_t>(*p - '0') <= 9) { p++; }
+  if ( p == src ) { return NUMBER_ERROR; }
+  if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; }
+  return false;
+}
+
+simdjson_unused simdjson_inline simdjson_result<ondemand::number_type> get_number_type(const uint8_t * src) noexcept {
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+  const uint8_t *p = src;
+  while(static_cast<uint8_t>(*p - '0') <= 9) { p++; }
+  if ( p == src ) { return NUMBER_ERROR; }
+  if (jsoncharutils::is_structural_or_whitespace(*p)) {
+    // We have an integer.
+    // If the number is negative and valid, it must be a signed integer.
+    if(negative) { return ondemand::number_type::signed_integer; }
+    // We want values larger or equal to 9223372036854775808 to be unsigned
+    // integers, and the other values to be signed integers.
+    int digit_count = int(p - src);
+    if(digit_count >= 19) {
+      const uint8_t * smaller_big_integer = reinterpret_cast<const uint8_t *>("9223372036854775808");
+      if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) {
+        return ondemand::number_type::unsigned_integer;
+      }
+    }
+    return ondemand::number_type::signed_integer;
+  }
+  // Hopefully, we have 'e' or 'E' or '.'.
+  return ondemand::number_type::floating_point_number;
+}
+
+// Never read at src_end or beyond
+simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept {
+  if(src == src_end) { return NUMBER_ERROR; }
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  uint64_t i = 0;
+  const uint8_t *p = src;
+  if(p == src_end) { return NUMBER_ERROR; }
+  p += parse_digit(*p, i);
+  bool leading_zero = (i == 0);
+  while ((p != src_end) && parse_digit(*p, i)) { p++; }
+  // no integer digits, or 0123 (zero must be solo)
+  if ( p == src ) { return INCORRECT_TYPE; }
+  if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; }
+
+  //
+  // Parse the decimal part.
+  //
+  int64_t exponent = 0;
+  bool overflow;
+  if (simdjson_likely((p != src_end) && (*p == '.'))) {
+    p++;
+    const uint8_t *start_decimal_digits = p;
+    if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits
+    p++;
+    while ((p != src_end) && parse_digit(*p, i)) { p++; }
+    exponent = -(p - start_decimal_digits);
+
+    // Overflow check. More than 19 digits (minus the decimal) may be overflow.
+    overflow = p-src-1 > 19;
+    if (simdjson_unlikely(overflow && leading_zero)) {
+      // Skip leading 0.00000 and see if it still overflows
+      const uint8_t *start_digits = src + 2;
+      while (*start_digits == '0') { start_digits++; }
+      overflow = start_digits-src > 19;
+    }
+  } else {
+    overflow = p-src > 19;
+  }
+
+  //
+  // Parse the exponent
+  //
+  if ((p != src_end) && (*p == 'e' || *p == 'E')) {
+    p++;
+    if(p == src_end) { return NUMBER_ERROR; }
+    bool exp_neg = *p == '-';
+    p += exp_neg || *p == '+';
+
+    uint64_t exp = 0;
+    const uint8_t *start_exp_digits = p;
+    while ((p != src_end) && parse_digit(*p, exp)) { p++; }
+    // no exp digits, or 20+ exp digits
+    if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }
+
+    exponent += exp_neg ? 0-exp : exp;
+  }
+
+  if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; }
+
+  overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power;
+
+  //
+  // Assemble (or slow-parse) the float
+  //
+  double d;
+  if (simdjson_likely(!overflow)) {
+    if (compute_float_64(exponent, i, negative, d)) { return d; }
+  }
+  if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) {
+    return NUMBER_ERROR;
+  }
+  return d;
+}
+
+simdjson_unused simdjson_inline simdjson_result<double> parse_double_in_string(const uint8_t * src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*(src + 1) == '-');
+  src += uint8_t(negative) + 1;
+
+  //
+  // Parse the integer part.
+  //
+  uint64_t i = 0;
+  const uint8_t *p = src;
+  p += parse_digit(*p, i);
+  bool leading_zero = (i == 0);
+  while (parse_digit(*p, i)) { p++; }
+  // no integer digits, or 0123 (zero must be solo)
+  if ( p == src ) { return INCORRECT_TYPE; }
+  if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; }
+
+  //
+  // Parse the decimal part.
+  //
+  int64_t exponent = 0;
+  bool overflow;
+  if (simdjson_likely(*p == '.')) {
+    p++;
+    const uint8_t *start_decimal_digits = p;
+    if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits
+    p++;
+    while (parse_digit(*p, i)) { p++; }
+    exponent = -(p - start_decimal_digits);
+
+    // Overflow check. More than 19 digits (minus the decimal) may be overflow.
+    overflow = p-src-1 > 19;
+    if (simdjson_unlikely(overflow && leading_zero)) {
+      // Skip leading 0.00000 and see if it still overflows
+      const uint8_t *start_digits = src + 2;
+      while (*start_digits == '0') { start_digits++; }
+      overflow = start_digits-src > 19;
+    }
+  } else {
+    overflow = p-src > 19;
+  }
+
+  //
+  // Parse the exponent
+  //
+  if (*p == 'e' || *p == 'E') {
+    p++;
+    bool exp_neg = *p == '-';
+    p += exp_neg || *p == '+';
+
+    uint64_t exp = 0;
+    const uint8_t *start_exp_digits = p;
+    while (parse_digit(*p, exp)) { p++; }
+    // no exp digits, or 20+ exp digits
+    if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }
+
+    exponent += exp_neg ? 0-exp : exp;
+  }
+
+  if (*p != '"') { return NUMBER_ERROR; }
+
+  overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power;
+
+  //
+  // Assemble (or slow-parse) the float
+  //
+  double d;
+  if (simdjson_likely(!overflow)) {
+    if (compute_float_64(exponent, i, negative, d)) { return d; }
+  }
+  if (!parse_float_fallback(src - uint8_t(negative), &d)) {
+    return NUMBER_ERROR;
+  }
+  return d;
+}
+} //namespace {}
+#endif // SIMDJSON_SKIPNUMBERPARSING
+
+} // namespace numberparsing
+} // unnamed namespace
+} // namespace icelake
+} // namespace simdjson
+/* end file include/simdjson/generic/numberparsing.h */
+
+#endif // SIMDJSON_ICELAKE_NUMBERPARSING_H
+/* end file include/simdjson/icelake/numberparsing.h */
+/* begin file include/simdjson/icelake/end.h */
+SIMDJSON_UNTARGET_ICELAKE
+/* end file include/simdjson/icelake/end.h */
+
+#endif // SIMDJSON_IMPLEMENTATION_ICELAKE
+#endif // SIMDJSON_ICELAKE_H
+/* end file include/simdjson/icelake.h */
+/* begin file include/simdjson/haswell.h */
+#ifndef SIMDJSON_HASWELL_H
+#define SIMDJSON_HASWELL_H
+
+
+#if SIMDJSON_IMPLEMENTATION_HASWELL
+
+#if SIMDJSON_CAN_ALWAYS_RUN_HASWELL
+#define SIMDJSON_TARGET_HASWELL
+#define SIMDJSON_UNTARGET_HASWELL
+#else
+#define SIMDJSON_TARGET_HASWELL SIMDJSON_TARGET_REGION("avx2,bmi,pclmul,lzcnt")
+#define SIMDJSON_UNTARGET_HASWELL SIMDJSON_UNTARGET_REGION
+#endif
+
+namespace simdjson {
+/**
+ * Implementation for Haswell (Intel AVX2).
+ */
+namespace haswell {
+} // namespace haswell
+} // namespace simdjson
+
+//
+// These two need to be included outside SIMDJSON_TARGET_HASWELL
+//
+/* begin file include/simdjson/haswell/implementation.h */
+#ifndef SIMDJSON_HASWELL_IMPLEMENTATION_H
+#define SIMDJSON_HASWELL_IMPLEMENTATION_H
+
+
+// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_HASWELL
+namespace simdjson {
+namespace haswell {
+
+using namespace simdjson;
+
+class implementation final : public simdjson::implementation {
+public:
+  simdjson_inline implementation() : simdjson::implementation(
+      "haswell",
+      "Intel/AMD AVX2",
+      internal::instruction_set::AVX2 | internal::instruction_set::PCLMULQDQ | internal::instruction_set::BMI1 | internal::instruction_set::BMI2
+  ) {}
+  simdjson_warn_unused error_code create_dom_parser_implementation(
+    size_t capacity,
+    size_t max_length,
+    std::unique_ptr<internal::dom_parser_implementation>& dst
+  ) const noexcept final;
+  simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final;
+  simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final;
+};
+
+} // namespace haswell
+} // namespace simdjson
+
+#endif // SIMDJSON_HASWELL_IMPLEMENTATION_H
+/* end file include/simdjson/haswell/implementation.h */
+/* begin file include/simdjson/haswell/intrinsics.h */
+#ifndef SIMDJSON_HASWELL_INTRINSICS_H
+#define SIMDJSON_HASWELL_INTRINSICS_H
+
+
+#ifdef SIMDJSON_VISUAL_STUDIO
+// under clang within visual studio, this will include <x86intrin.h>
+#include <intrin.h>  // visual studio or clang
+#else
+#include <x86intrin.h> // elsewhere
+#endif // SIMDJSON_VISUAL_STUDIO
+
+#ifdef SIMDJSON_CLANG_VISUAL_STUDIO
+/**
+ * You are not supposed, normally, to include these
+ * headers directly. Instead you should either include intrin.h
+ * or x86intrin.h. However, when compiling with clang
+ * under Windows (i.e., when _MSC_VER is set), these headers
+ * only get included *if* the corresponding features are detected
+ * from macros:
+ * e.g., if __AVX2__ is set... in turn,  we normally set these
+ * macros by compiling against the corresponding architecture
+ * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole
+ * software with these advanced instructions. In simdjson, we
+ * want to compile the whole program for a generic target,
+ * and only target our specific kernels. As a workaround,
+ * we directly include the needed headers. These headers would
+ * normally guard against such usage, but we carefully included
+ * <x86intrin.h>  (or <intrin.h>) before, so the headers
+ * are fooled.
+ */
+#include <bmiintrin.h>   // for _blsr_u64
+#include <lzcntintrin.h> // for  __lzcnt64
+#include <immintrin.h>   // for most things (AVX2, AVX512, _popcnt64)
+#include <smmintrin.h>
+#include <tmmintrin.h>
+#include <avxintrin.h>
+#include <avx2intrin.h>
+#include <wmmintrin.h>   // for  _mm_clmulepi64_si128
+// unfortunately, we may not get _blsr_u64, but, thankfully, clang
+// has it as a macro.
+#ifndef _blsr_u64
+// we roll our own
+#define _blsr_u64(n) ((n - 1) & n)
+#endif //  _blsr_u64
+#endif // SIMDJSON_CLANG_VISUAL_STUDIO
+
+static_assert(sizeof(__m256i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for haswell kernel.");
+
+#endif // SIMDJSON_HASWELL_INTRINSICS_H
+/* end file include/simdjson/haswell/intrinsics.h */
+
+//
+// The rest need to be inside the region
+//
+/* begin file include/simdjson/haswell/begin.h */
+// redefining SIMDJSON_IMPLEMENTATION to "haswell"
+// #define SIMDJSON_IMPLEMENTATION haswell
+SIMDJSON_TARGET_HASWELL
+/* end file include/simdjson/haswell/begin.h */
+
+// Declarations
+/* begin file include/simdjson/generic/dom_parser_implementation.h */
+
+namespace simdjson {
+namespace haswell {
+
+// expectation: sizeof(open_container) = 64/8.
+struct open_container {
+  uint32_t tape_index; // where, on the tape, does the scope ([,{) begins
+  uint32_t count; // how many elements in the scope
+}; // struct open_container
+
+static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits");
+
+class dom_parser_implementation final : public internal::dom_parser_implementation {
+public:
+  /** Tape location of each open { or [ */
+  std::unique_ptr<open_container[]> open_containers{};
+  /** Whether each open container is a [ or { */
+  std::unique_ptr<bool[]> is_array{};
+  /** Buffer passed to stage 1 */
+  const uint8_t *buf{};
+  /** Length passed to stage 1 */
+  size_t len{0};
+  /** Document passed to stage 2 */
+  dom::document *doc{};
+
+  inline dom_parser_implementation() noexcept;
+  inline dom_parser_implementation(dom_parser_implementation &&other) noexcept;
+  inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept;
+  dom_parser_implementation(const dom_parser_implementation &) = delete;
+  dom_parser_implementation &operator=(const dom_parser_implementation &) = delete;
+
+  simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final;
+  simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final;
+  simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final;
+  simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final;
+  simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst) const noexcept final;
+  inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final;
+  inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final;
+private:
+  simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity);
+
+};
+
+} // namespace haswell
+} // namespace simdjson
+
+namespace simdjson {
+namespace haswell {
+
+inline dom_parser_implementation::dom_parser_implementation() noexcept = default;
+inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default;
+inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default;
+
+// Leaving these here so they can be inlined if so desired
+inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept {
+  if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; }
+  // Stage 1 index output
+  size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7;
+  structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] );
+  if (!structural_indexes) { _capacity = 0; return MEMALLOC; }
+  structural_indexes[0] = 0;
+  n_structural_indexes = 0;
+
+  _capacity = capacity;
+  return SUCCESS;
+}
+
+inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept {
+  // Stage 2 stacks
+  open_containers.reset(new (std::nothrow) open_container[max_depth]);
+  is_array.reset(new (std::nothrow) bool[max_depth]);
+  if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; }
+
+  _max_depth = max_depth;
+  return SUCCESS;
+}
+
+} // namespace haswell
+} // namespace simdjson
+/* end file include/simdjson/generic/dom_parser_implementation.h */
+/* begin file include/simdjson/haswell/bitmanipulation.h */
+#ifndef SIMDJSON_HASWELL_BITMANIPULATION_H
+#define SIMDJSON_HASWELL_BITMANIPULATION_H
+
+namespace simdjson {
+namespace haswell {
+namespace {
+
+// We sometimes call trailing_zero on inputs that are zero,
+// but the algorithms do not end up using the returned value.
+// Sadly, sanitizers are not smart enough to figure it out.
+SIMDJSON_NO_SANITIZE_UNDEFINED
+simdjson_inline int trailing_zeroes(uint64_t input_num) {
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+  return (int)_tzcnt_u64(input_num);
+#else // SIMDJSON_REGULAR_VISUAL_STUDIO
+  ////////
+  // You might expect the next line to be equivalent to
+  // return (int)_tzcnt_u64(input_num);
+  // but the generated code differs and might be less efficient?
+  ////////
+  return __builtin_ctzll(input_num);
+#endif // SIMDJSON_REGULAR_VISUAL_STUDIO
+}
+
+/* result might be undefined when input_num is zero */
+simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) {
+  return _blsr_u64(input_num);
+}
+
+/* result might be undefined when input_num is zero */
+simdjson_inline int leading_zeroes(uint64_t input_num) {
+  return int(_lzcnt_u64(input_num));
+}
+
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+simdjson_inline unsigned __int64 count_ones(uint64_t input_num) {
+  // note: we do not support legacy 32-bit Windows
+  return __popcnt64(input_num);// Visual Studio wants two underscores
+}
+#else
+simdjson_inline long long int count_ones(uint64_t input_num) {
+  return _popcnt64(input_num);
+}
+#endif
+
+simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2,
+                                uint64_t *result) {
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+  return _addcarry_u64(0, value1, value2,
+                       reinterpret_cast<unsigned __int64 *>(result));
+#else
+  return __builtin_uaddll_overflow(value1, value2,
+                                   reinterpret_cast<unsigned long long *>(result));
+#endif
+}
+
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdjson
+
+#endif // SIMDJSON_HASWELL_BITMANIPULATION_H
+/* end file include/simdjson/haswell/bitmanipulation.h */
+/* begin file include/simdjson/haswell/bitmask.h */
+#ifndef SIMDJSON_HASWELL_BITMASK_H
+#define SIMDJSON_HASWELL_BITMASK_H
+
+namespace simdjson {
+namespace haswell {
+namespace {
+
+//
+// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered.
+//
+// For example, prefix_xor(00100100) == 00011100
+//
+simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) {
+  // There should be no such thing with a processor supporting avx2
+  // but not clmul.
+  __m128i all_ones = _mm_set1_epi8('\xFF');
+  __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0);
+  return _mm_cvtsi128_si64(result);
+}
+
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdjson
+
+#endif // SIMDJSON_HASWELL_BITMASK_H
+/* end file include/simdjson/haswell/bitmask.h */
+/* begin file include/simdjson/haswell/simd.h */
+#ifndef SIMDJSON_HASWELL_SIMD_H
+#define SIMDJSON_HASWELL_SIMD_H
+
+
+namespace simdjson {
+namespace haswell {
+namespace {
+namespace simd {
+
+  // Forward-declared so they can be used by splat and friends.
+  template<typename Child>
+  struct base {
+    __m256i value;
+
+    // Zero constructor
+    simdjson_inline base() : value{__m256i()} {}
+
+    // Conversion from SIMD register
+    simdjson_inline base(const __m256i _value) : value(_value) {}
+
+    // Conversion to SIMD register
+    simdjson_inline operator const __m256i&() const { return this->value; }
+    simdjson_inline operator __m256i&() { return this->value; }
+
+    // Bit operations
+    simdjson_inline Child operator|(const Child other) const { return _mm256_or_si256(*this, other); }
+    simdjson_inline Child operator&(const Child other) const { return _mm256_and_si256(*this, other); }
+    simdjson_inline Child operator^(const Child other) const { return _mm256_xor_si256(*this, other); }
+    simdjson_inline Child bit_andnot(const Child other) const { return _mm256_andnot_si256(other, *this); }
+    simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast | other; return *this_cast; }
+    simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast & other; return *this_cast; }
+    simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast ^ other; return *this_cast; }
+  };
+
+  // Forward-declared so they can be used by splat and friends.
+  template<typename T>
+  struct simd8;
+
+  template<typename T, typename Mask=simd8<bool>>
+  struct base8: base<simd8<T>> {
+    typedef uint32_t bitmask_t;
+    typedef uint64_t bitmask2_t;
+
+    simdjson_inline base8() : base<simd8<T>>() {}
+    simdjson_inline base8(const __m256i _value) : base<simd8<T>>(_value) {}
+
+    friend simdjson_really_inline Mask operator==(const simd8<T> lhs, const simd8<T> rhs) { return _mm256_cmpeq_epi8(lhs, rhs); }
+
+    static const int SIZE = sizeof(base<T>::value);
+
+    template<int N=1>
+    simdjson_inline simd8<T> prev(const simd8<T> prev_chunk) const {
+      return _mm256_alignr_epi8(*this, _mm256_permute2x128_si256(prev_chunk, *this, 0x21), 16 - N);
+    }
+  };
+
+  // SIMD byte mask type (returned by things like eq and gt)
+  template<>
+  struct simd8<bool>: base8<bool> {
+    static simdjson_inline simd8<bool> splat(bool _value) { return _mm256_set1_epi8(uint8_t(-(!!_value))); }
+
+    simdjson_inline simd8<bool>() : base8() {}
+    simdjson_inline simd8<bool>(const __m256i _value) : base8<bool>(_value) {}
+    // Splat constructor
+    simdjson_inline simd8<bool>(bool _value) : base8<bool>(splat(_value)) {}
+
+    simdjson_inline int to_bitmask() const { return _mm256_movemask_epi8(*this); }
+    simdjson_inline bool any() const { return !_mm256_testz_si256(*this, *this); }
+    simdjson_inline simd8<bool> operator~() const { return *this ^ true; }
+  };
+
+  template<typename T>
+  struct base8_numeric: base8<T> {
+    static simdjson_inline simd8<T> splat(T _value) { return _mm256_set1_epi8(_value); }
+    static simdjson_inline simd8<T> zero() { return _mm256_setzero_si256(); }
+    static simdjson_inline simd8<T> load(const T values[32]) {
+      return _mm256_loadu_si256(reinterpret_cast<const __m256i *>(values));
+    }
+    // Repeat 16 values as many times as necessary (usually for lookup tables)
+    static simdjson_inline simd8<T> repeat_16(
+      T v0,  T v1,  T v2,  T v3,  T v4,  T v5,  T v6,  T v7,
+      T v8,  T v9,  T v10, T v11, T v12, T v13, T v14, T v15
+    ) {
+      return simd8<T>(
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15,
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15
+      );
+    }
+
+    simdjson_inline base8_numeric() : base8<T>() {}
+    simdjson_inline base8_numeric(const __m256i _value) : base8<T>(_value) {}
+
+    // Store to array
+    simdjson_inline void store(T dst[32]) const { return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), *this); }
+
+    // Addition/subtraction are the same for signed and unsigned
+    simdjson_inline simd8<T> operator+(const simd8<T> other) const { return _mm256_add_epi8(*this, other); }
+    simdjson_inline simd8<T> operator-(const simd8<T> other) const { return _mm256_sub_epi8(*this, other); }
+    simdjson_inline simd8<T>& operator+=(const simd8<T> other) { *this = *this + other; return *static_cast<simd8<T>*>(this); }
+    simdjson_inline simd8<T>& operator-=(const simd8<T> other) { *this = *this - other; return *static_cast<simd8<T>*>(this); }
+
+    // Override to distinguish from bool version
+    simdjson_inline simd8<T> operator~() const { return *this ^ 0xFFu; }
+
+    // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values)
+    template<typename L>
+    simdjson_inline simd8<L> lookup_16(simd8<L> lookup_table) const {
+      return _mm256_shuffle_epi8(lookup_table, *this);
+    }
+
+    // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset).
+    // Passing a 0 value for mask would be equivalent to writing out every byte to output.
+    // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes
+    // get written.
+    // Design consideration: it seems like a function with the
+    // signature simd8<L> compress(uint32_t mask) would be
+    // sensible, but the AVX ISA makes this kind of approach difficult.
+    template<typename L>
+    simdjson_inline void compress(uint32_t mask, L * output) const {
+      using internal::thintable_epi8;
+      using internal::BitsSetTable256mul2;
+      using internal::pshufb_combine_table;
+      // this particular implementation was inspired by work done by @animetosho
+      // we do it in four steps, first 8 bytes and then second 8 bytes...
+      uint8_t mask1 = uint8_t(mask); // least significant 8 bits
+      uint8_t mask2 = uint8_t(mask >> 8); // second least significant 8 bits
+      uint8_t mask3 = uint8_t(mask >> 16); // ...
+      uint8_t mask4 = uint8_t(mask >> 24); // ...
+      // next line just loads the 64-bit values thintable_epi8[mask1] and
+      // thintable_epi8[mask2] into a 128-bit register, using only
+      // two instructions on most compilers.
+      __m256i shufmask =  _mm256_set_epi64x(thintable_epi8[mask4], thintable_epi8[mask3],
+        thintable_epi8[mask2], thintable_epi8[mask1]);
+      // we increment by 0x08 the second half of the mask and so forth
+      shufmask =
+      _mm256_add_epi8(shufmask, _mm256_set_epi32(0x18181818, 0x18181818,
+         0x10101010, 0x10101010, 0x08080808, 0x08080808, 0, 0));
+      // this is the version "nearly pruned"
+      __m256i pruned = _mm256_shuffle_epi8(*this, shufmask);
+      // we still need to put the  pieces back together.
+      // we compute the popcount of the first words:
+      int pop1 = BitsSetTable256mul2[mask1];
+      int pop3 = BitsSetTable256mul2[mask3];
+
+      // then load the corresponding mask
+      // could be done with _mm256_loadu2_m128i but many standard libraries omit this intrinsic.
+      __m256i v256 = _mm256_castsi128_si256(
+        _mm_loadu_si128(reinterpret_cast<const __m128i *>(pshufb_combine_table + pop1 * 8)));
+      __m256i compactmask = _mm256_insertf128_si256(v256,
+         _mm_loadu_si128(reinterpret_cast<const __m128i *>(pshufb_combine_table + pop3 * 8)), 1);
+      __m256i almostthere =  _mm256_shuffle_epi8(pruned, compactmask);
+      // We just need to write out the result.
+      // This is the tricky bit that is hard to do
+      // if we want to return a SIMD register, since there
+      // is no single-instruction approach to recombine
+      // the two 128-bit lanes with an offset.
+      __m128i v128;
+      v128 = _mm256_castsi256_si128(almostthere);
+      _mm_storeu_si128( reinterpret_cast<__m128i *>(output), v128);
+      v128 = _mm256_extractf128_si256(almostthere, 1);
+      _mm_storeu_si128( reinterpret_cast<__m128i *>(output + 16 - count_ones(mask & 0xFFFF)), v128);
+    }
+
+    template<typename L>
+    simdjson_inline simd8<L> lookup_16(
+        L replace0,  L replace1,  L replace2,  L replace3,
+        L replace4,  L replace5,  L replace6,  L replace7,
+        L replace8,  L replace9,  L replace10, L replace11,
+        L replace12, L replace13, L replace14, L replace15) const {
+      return lookup_16(simd8<L>::repeat_16(
+        replace0,  replace1,  replace2,  replace3,
+        replace4,  replace5,  replace6,  replace7,
+        replace8,  replace9,  replace10, replace11,
+        replace12, replace13, replace14, replace15
+      ));
+    }
+  };
+
+  // Signed bytes
+  template<>
+  struct simd8<int8_t> : base8_numeric<int8_t> {
+    simdjson_inline simd8() : base8_numeric<int8_t>() {}
+    simdjson_inline simd8(const __m256i _value) : base8_numeric<int8_t>(_value) {}
+    // Splat constructor
+    simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {}
+    // Array constructor
+    simdjson_inline simd8(const int8_t values[32]) : simd8(load(values)) {}
+    // Member-by-member initialization
+    simdjson_inline simd8(
+      int8_t v0,  int8_t v1,  int8_t v2,  int8_t v3,  int8_t v4,  int8_t v5,  int8_t v6,  int8_t v7,
+      int8_t v8,  int8_t v9,  int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15,
+      int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23,
+      int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31
+    ) : simd8(_mm256_setr_epi8(
+      v0, v1, v2, v3, v4, v5, v6, v7,
+      v8, v9, v10,v11,v12,v13,v14,v15,
+      v16,v17,v18,v19,v20,v21,v22,v23,
+      v24,v25,v26,v27,v28,v29,v30,v31
+    )) {}
+    // Repeat 16 values as many times as necessary (usually for lookup tables)
+    simdjson_inline static simd8<int8_t> repeat_16(
+      int8_t v0,  int8_t v1,  int8_t v2,  int8_t v3,  int8_t v4,  int8_t v5,  int8_t v6,  int8_t v7,
+      int8_t v8,  int8_t v9,  int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15
+    ) {
+      return simd8<int8_t>(
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15,
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15
+      );
+    }
+
+    // Order-sensitive comparisons
+    simdjson_inline simd8<int8_t> max_val(const simd8<int8_t> other) const { return _mm256_max_epi8(*this, other); }
+    simdjson_inline simd8<int8_t> min_val(const simd8<int8_t> other) const { return _mm256_min_epi8(*this, other); }
+    simdjson_inline simd8<bool> operator>(const simd8<int8_t> other) const { return _mm256_cmpgt_epi8(*this, other); }
+    simdjson_inline simd8<bool> operator<(const simd8<int8_t> other) const { return _mm256_cmpgt_epi8(other, *this); }
+  };
+
+  // Unsigned bytes
+  template<>
+  struct simd8<uint8_t>: base8_numeric<uint8_t> {
+    simdjson_inline simd8() : base8_numeric<uint8_t>() {}
+    simdjson_inline simd8(const __m256i _value) : base8_numeric<uint8_t>(_value) {}
+    // Splat constructor
+    simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {}
+    // Array constructor
+    simdjson_inline simd8(const uint8_t values[32]) : simd8(load(values)) {}
+    // Member-by-member initialization
+    simdjson_inline simd8(
+      uint8_t v0,  uint8_t v1,  uint8_t v2,  uint8_t v3,  uint8_t v4,  uint8_t v5,  uint8_t v6,  uint8_t v7,
+      uint8_t v8,  uint8_t v9,  uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15,
+      uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23,
+      uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31
+    ) : simd8(_mm256_setr_epi8(
+      v0, v1, v2, v3, v4, v5, v6, v7,
+      v8, v9, v10,v11,v12,v13,v14,v15,
+      v16,v17,v18,v19,v20,v21,v22,v23,
+      v24,v25,v26,v27,v28,v29,v30,v31
+    )) {}
+    // Repeat 16 values as many times as necessary (usually for lookup tables)
+    simdjson_inline static simd8<uint8_t> repeat_16(
+      uint8_t v0,  uint8_t v1,  uint8_t v2,  uint8_t v3,  uint8_t v4,  uint8_t v5,  uint8_t v6,  uint8_t v7,
+      uint8_t v8,  uint8_t v9,  uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15
+    ) {
+      return simd8<uint8_t>(
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15,
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15
+      );
+    }
+
+    // Saturated math
+    simdjson_inline simd8<uint8_t> saturating_add(const simd8<uint8_t> other) const { return _mm256_adds_epu8(*this, other); }
+    simdjson_inline simd8<uint8_t> saturating_sub(const simd8<uint8_t> other) const { return _mm256_subs_epu8(*this, other); }
+
+    // Order-specific operations
+    simdjson_inline simd8<uint8_t> max_val(const simd8<uint8_t> other) const { return _mm256_max_epu8(*this, other); }
+    simdjson_inline simd8<uint8_t> min_val(const simd8<uint8_t> other) const { return _mm256_min_epu8(other, *this); }
+    // Same as >, but only guarantees true is nonzero (< guarantees true = -1)
+    simdjson_inline simd8<uint8_t> gt_bits(const simd8<uint8_t> other) const { return this->saturating_sub(other); }
+    // Same as <, but only guarantees true is nonzero (< guarantees true = -1)
+    simdjson_inline simd8<uint8_t> lt_bits(const simd8<uint8_t> other) const { return other.saturating_sub(*this); }
+    simdjson_inline simd8<bool> operator<=(const simd8<uint8_t> other) const { return other.max_val(*this) == other; }
+    simdjson_inline simd8<bool> operator>=(const simd8<uint8_t> other) const { return other.min_val(*this) == other; }
+    simdjson_inline simd8<bool> operator>(const simd8<uint8_t> other) const { return this->gt_bits(other).any_bits_set(); }
+    simdjson_inline simd8<bool> operator<(const simd8<uint8_t> other) const { return this->lt_bits(other).any_bits_set(); }
+
+    // Bit-specific operations
+    simdjson_inline simd8<bool> bits_not_set() const { return *this == uint8_t(0); }
+    simdjson_inline simd8<bool> bits_not_set(simd8<uint8_t> bits) const { return (*this & bits).bits_not_set(); }
+    simdjson_inline simd8<bool> any_bits_set() const { return ~this->bits_not_set(); }
+    simdjson_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const { return ~this->bits_not_set(bits); }
+    simdjson_inline bool is_ascii() const { return _mm256_movemask_epi8(*this) == 0; }
+    simdjson_inline bool bits_not_set_anywhere() const { return _mm256_testz_si256(*this, *this); }
+    simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); }
+    simdjson_inline bool bits_not_set_anywhere(simd8<uint8_t> bits) const { return _mm256_testz_si256(*this, bits); }
+    simdjson_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const { return !bits_not_set_anywhere(bits); }
+    template<int N>
+    simdjson_inline simd8<uint8_t> shr() const { return simd8<uint8_t>(_mm256_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); }
+    template<int N>
+    simdjson_inline simd8<uint8_t> shl() const { return simd8<uint8_t>(_mm256_slli_epi16(*this, N)) & uint8_t(0xFFu << N); }
+    // Get one of the bits and make a bitmask out of it.
+    // e.g. value.get_bit<7>() gets the high bit
+    template<int N>
+    simdjson_inline int get_bit() const { return _mm256_movemask_epi8(_mm256_slli_epi16(*this, 7-N)); }
+  };
+
+  template<typename T>
+  struct simd8x64 {
+    static constexpr int NUM_CHUNKS = 64 / sizeof(simd8<T>);
+    static_assert(NUM_CHUNKS == 2, "Haswell kernel should use two registers per 64-byte block.");
+    const simd8<T> chunks[NUM_CHUNKS];
+
+    simd8x64(const simd8x64<T>& o) = delete; // no copy allowed
+    simd8x64<T>& operator=(const simd8<T>& other) = delete; // no assignment allowed
+    simd8x64() = delete; // no default constructor allowed
+
+    simdjson_inline simd8x64(const simd8<T> chunk0, const simd8<T> chunk1) : chunks{chunk0, chunk1} {}
+    simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8<T>::load(ptr), simd8<T>::load(ptr+32)} {}
+
+    simdjson_inline uint64_t compress(uint64_t mask, T * output) const {
+      uint32_t mask1 = uint32_t(mask);
+      uint32_t mask2 = uint32_t(mask >> 32);
+      this->chunks[0].compress(mask1, output);
+      this->chunks[1].compress(mask2, output + 32 - count_ones(mask1));
+      return 64 - count_ones(mask);
+    }
+
+    simdjson_inline void store(T ptr[64]) const {
+      this->chunks[0].store(ptr+sizeof(simd8<T>)*0);
+      this->chunks[1].store(ptr+sizeof(simd8<T>)*1);
+    }
+
+    simdjson_inline uint64_t to_bitmask() const {
+      uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask());
+      uint64_t r_hi =                       this->chunks[1].to_bitmask();
+      return r_lo | (r_hi << 32);
+    }
+
+    simdjson_inline simd8<T> reduce_or() const {
+      return this->chunks[0] | this->chunks[1];
+    }
+
+    simdjson_inline simd8x64<T> bit_or(const T m) const {
+      const simd8<T> mask = simd8<T>::splat(m);
+      return simd8x64<T>(
+        this->chunks[0] | mask,
+        this->chunks[1] | mask
+      );
+    }
+
+    simdjson_inline uint64_t eq(const T m) const {
+      const simd8<T> mask = simd8<T>::splat(m);
+      return  simd8x64<bool>(
+        this->chunks[0] == mask,
+        this->chunks[1] == mask
+      ).to_bitmask();
+    }
+
+    simdjson_inline uint64_t eq(const simd8x64<uint8_t> &other) const {
+      return  simd8x64<bool>(
+        this->chunks[0] == other.chunks[0],
+        this->chunks[1] == other.chunks[1]
+      ).to_bitmask();
+    }
+
+    simdjson_inline uint64_t lteq(const T m) const {
+      const simd8<T> mask = simd8<T>::splat(m);
+      return  simd8x64<bool>(
+        this->chunks[0] <= mask,
+        this->chunks[1] <= mask
+      ).to_bitmask();
+    }
+  }; // struct simd8x64<T>
+
+} // namespace simd
+
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdjson
+
+#endif // SIMDJSON_HASWELL_SIMD_H
+/* end file include/simdjson/haswell/simd.h */
+/* begin file include/simdjson/generic/jsoncharutils.h */
+
+namespace simdjson {
+namespace haswell {
+namespace {
+namespace jsoncharutils {
+
+// return non-zero if not a structural or whitespace char
+// zero otherwise
+simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) {
+  return internal::structural_or_whitespace_negated[c];
+}
+
+simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) {
+  return internal::structural_or_whitespace[c];
+}
+
+// returns a value with the high 16 bits set if not valid
+// otherwise returns the conversion of the 4 hex digits at src into the bottom
+// 16 bits of the 32-bit return register
+//
+// see
+// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/
+static inline uint32_t hex_to_u32_nocheck(
+    const uint8_t *src) { // strictly speaking, static inline is a C-ism
+  uint32_t v1 = internal::digit_to_val32[630 + src[0]];
+  uint32_t v2 = internal::digit_to_val32[420 + src[1]];
+  uint32_t v3 = internal::digit_to_val32[210 + src[2]];
+  uint32_t v4 = internal::digit_to_val32[0 + src[3]];
+  return v1 | v2 | v3 | v4;
+}
+
+// given a code point cp, writes to c
+// the utf-8 code, outputting the length in
+// bytes, if the length is zero, the code point
+// is invalid
+//
+// This can possibly be made faster using pdep
+// and clz and table lookups, but JSON documents
+// have few escaped code points, and the following
+// function looks cheap.
+//
+// Note: we assume that surrogates are treated separately
+//
+simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) {
+  if (cp <= 0x7F) {
+    c[0] = uint8_t(cp);
+    return 1; // ascii
+  }
+  if (cp <= 0x7FF) {
+    c[0] = uint8_t((cp >> 6) + 192);
+    c[1] = uint8_t((cp & 63) + 128);
+    return 2; // universal plane
+    //  Surrogates are treated elsewhere...
+    //} //else if (0xd800 <= cp && cp <= 0xdfff) {
+    //  return 0; // surrogates // could put assert here
+  } else if (cp <= 0xFFFF) {
+    c[0] = uint8_t((cp >> 12) + 224);
+    c[1] = uint8_t(((cp >> 6) & 63) + 128);
+    c[2] = uint8_t((cp & 63) + 128);
+    return 3;
+  } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this
+                               // is not needed
+    c[0] = uint8_t((cp >> 18) + 240);
+    c[1] = uint8_t(((cp >> 12) & 63) + 128);
+    c[2] = uint8_t(((cp >> 6) & 63) + 128);
+    c[3] = uint8_t((cp & 63) + 128);
+    return 4;
+  }
+  // will return 0 when the code point was too large.
+  return 0; // bad r
+}
+
+#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm
+// this is a slow emulation routine for 32-bit
+//
+static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) {
+  return x * (uint64_t)y;
+}
+static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) {
+  uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd);
+  uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd);
+  uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32));
+  uint64_t adbc_carry = !!(adbc < ad);
+  uint64_t lo = bd + (adbc << 32);
+  *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) +
+        (adbc_carry << 32) + !!(lo < bd);
+  return lo;
+}
+#endif
+
+using internal::value128;
+
+simdjson_inline value128 full_multiplication(uint64_t value1, uint64_t value2) {
+  value128 answer;
+#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS)
+#ifdef _M_ARM64
+  // ARM64 has native support for 64-bit multiplications, no need to emultate
+  answer.high = __umulh(value1, value2);
+  answer.low = value1 * value2;
+#else
+  answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64
+#endif // _M_ARM64
+#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS)
+  __uint128_t r = (static_cast<__uint128_t>(value1)) * value2;
+  answer.low = uint64_t(r);
+  answer.high = uint64_t(r >> 64);
+#endif
+  return answer;
+}
+
+} // namespace jsoncharutils
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdjson
+/* end file include/simdjson/generic/jsoncharutils.h */
+/* begin file include/simdjson/generic/atomparsing.h */
+namespace simdjson {
+namespace haswell {
+namespace {
+/// @private
+namespace atomparsing {
+
+// The string_to_uint32 is exclusively used to map literal strings to 32-bit values.
+// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot
+// be certain that the character pointer will be properly aligned.
+// You might think that using memcpy makes this function expensive, but you'd be wrong.
+// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false");
+// to the compile-time constant 1936482662.
+simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; }
+
+
+// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive.
+// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about.
+simdjson_warn_unused
+simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) {
+  uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++)
+  static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes");
+  std::memcpy(&srcval, src, sizeof(uint32_t));
+  return srcval ^ string_to_uint32(atom);
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_true_atom(const uint8_t *src) {
+  return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0;
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) {
+  if (len > 4) { return is_valid_true_atom(src); }
+  else if (len == 4) { return !str4ncmp(src, "true"); }
+  else { return false; }
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_false_atom(const uint8_t *src) {
+  return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0;
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) {
+  if (len > 5) { return is_valid_false_atom(src); }
+  else if (len == 5) { return !str4ncmp(src+1, "alse"); }
+  else { return false; }
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_null_atom(const uint8_t *src) {
+  return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0;
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) {
+  if (len > 4) { return is_valid_null_atom(src); }
+  else if (len == 4) { return !str4ncmp(src, "null"); }
+  else { return false; }
+}
+
+} // namespace atomparsing
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdjson
+/* end file include/simdjson/generic/atomparsing.h */
+/* begin file include/simdjson/haswell/stringparsing.h */
+#ifndef SIMDJSON_HASWELL_STRINGPARSING_H
+#define SIMDJSON_HASWELL_STRINGPARSING_H
+
+
+namespace simdjson {
+namespace haswell {
+namespace {
+
+using namespace simd;
+
+// Holds backslashes and quotes locations.
+struct backslash_and_quote {
+public:
+  static constexpr uint32_t BYTES_PROCESSED = 32;
+  simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst);
+
+  simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; }
+  simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; }
+  simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); }
+  simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); }
+
+  uint32_t bs_bits;
+  uint32_t quote_bits;
+}; // struct backslash_and_quote
+
+simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) {
+  // this can read up to 15 bytes beyond the buffer size, but we require
+  // SIMDJSON_PADDING of padding
+  static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes");
+  simd8<uint8_t> v(src);
+  // store to dest unconditionally - we can overwrite the bits we don't like later
+  v.store(dst);
+  return {
+      static_cast<uint32_t>((v == '\\').to_bitmask()),     // bs_bits
+      static_cast<uint32_t>((v == '"').to_bitmask()), // quote_bits
+  };
+}
+
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdjson
+
+#endif // SIMDJSON_HASWELL_STRINGPARSING_H
+/* end file include/simdjson/haswell/stringparsing.h */
+/* begin file include/simdjson/haswell/numberparsing.h */
+#ifndef SIMDJSON_HASWELL_NUMBERPARSING_H
+#define SIMDJSON_HASWELL_NUMBERPARSING_H
+
+namespace simdjson {
+namespace haswell {
+namespace {
+
+static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) {
+  // this actually computes *16* values so we are being wasteful.
+  const __m128i ascii0 = _mm_set1_epi8('0');
+  const __m128i mul_1_10 =
+      _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1);
+  const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1);
+  const __m128i mul_1_10000 =
+      _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1);
+  const __m128i input = _mm_sub_epi8(
+      _mm_loadu_si128(reinterpret_cast<const __m128i *>(chars)), ascii0);
+  const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10);
+  const __m128i t2 = _mm_madd_epi16(t1, mul_1_100);
+  const __m128i t3 = _mm_packus_epi32(t2, t2);
+  const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000);
+  return _mm_cvtsi128_si32(
+      t4); // only captures the sum of the first 8 digits, drop the rest
+}
+
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdjson
+
+#define SIMDJSON_SWAR_NUMBER_PARSING 1
+
+/* begin file include/simdjson/generic/numberparsing.h */
+#include <limits>
+
+namespace simdjson {
+namespace haswell {
+
+namespace ondemand {
+/**
+ * The type of a JSON number
+ */
+enum class number_type {
+    floating_point_number=1, /// a binary64 number
+    signed_integer,          /// a signed integer that fits in a 64-bit word using two's complement
+    unsigned_integer         /// a positive integer larger or equal to 1<<63
+};
+}
+
+namespace {
+/// @private
+namespace numberparsing {
+
+
+
+#ifdef JSON_TEST_NUMBERS
+#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR)
+#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE)))
+#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE)))
+#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE)))
+#else
+#define INVALID_NUMBER(SRC) (NUMBER_ERROR)
+#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE))
+#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE))
+#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE))
+#endif
+
+namespace {
+// Convert a mantissa, an exponent and a sign bit into an ieee64 double.
+// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable).
+// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed.
+simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) {
+    double d;
+    mantissa &= ~(1ULL << 52);
+    mantissa |= real_exponent << 52;
+    mantissa |= ((static_cast<uint64_t>(negative)) << 63);
+    std::memcpy(&d, &mantissa, sizeof(d));
+    return d;
+}
+}
+// Attempts to compute i * 10^(power) exactly; and if "negative" is
+// true, negate the result.
+// This function will only work in some cases, when it does not work, success is
+// set to false. This should work *most of the time* (like 99% of the time).
+// We assume that power is in the [smallest_power,
+// largest_power] interval: the caller is responsible for this check.
+simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) {
+  // we start with a fast path
+  // It was described in
+  // Clinger WD. How to read floating point numbers accurately.
+  // ACM SIGPLAN Notices. 1990
+#ifndef FLT_EVAL_METHOD
+#error "FLT_EVAL_METHOD should be defined, please include cfloat."
+#endif
+#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
+  // We cannot be certain that x/y is rounded to nearest.
+  if (0 <= power && power <= 22 && i <= 9007199254740991) {
+#else
+  if (-22 <= power && power <= 22 && i <= 9007199254740991) {
+#endif
+    // convert the integer into a double. This is lossless since
+    // 0 <= i <= 2^53 - 1.
+    d = double(i);
+    //
+    // The general idea is as follows.
+    // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then
+    // 1) Both s and p can be represented exactly as 64-bit floating-point
+    // values
+    // (binary64).
+    // 2) Because s and p can be represented exactly as floating-point values,
+    // then s * p
+    // and s / p will produce correctly rounded values.
+    //
+    if (power < 0) {
+      d = d / simdjson::internal::power_of_ten[-power];
+    } else {
+      d = d * simdjson::internal::power_of_ten[power];
+    }
+    if (negative) {
+      d = -d;
+    }
+    return true;
+  }
+  // When 22 < power && power <  22 + 16, we could
+  // hope for another, secondary fast path.  It was
+  // described by David M. Gay in  "Correctly rounded
+  // binary-decimal and decimal-binary conversions." (1990)
+  // If you need to compute i * 10^(22 + x) for x < 16,
+  // first compute i * 10^x, if you know that result is exact
+  // (e.g., when i * 10^x < 2^53),
+  // then you can still proceed and do (i * 10^x) * 10^22.
+  // Is this worth your time?
+  // You need  22 < power *and* power <  22 + 16 *and* (i * 10^(x-22) < 2^53)
+  // for this second fast path to work.
+  // If you you have 22 < power *and* power <  22 + 16, and then you
+  // optimistically compute "i * 10^(x-22)", there is still a chance that you
+  // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of
+  // this optimization maybe less common than we would like. Source:
+  // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/
+  // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html
+
+  // The fast path has now failed, so we are failing back on the slower path.
+
+  // In the slow path, we need to adjust i so that it is > 1<<63 which is always
+  // possible, except if i == 0, so we handle i == 0 separately.
+  if(i == 0) {
+    d = negative ? -0.0 : 0.0;
+    return true;
+  }
+
+
+  // The exponent is 1024 + 63 + power
+  //     + floor(log(5**power)/log(2)).
+  // The 1024 comes from the ieee64 standard.
+  // The 63 comes from the fact that we use a 64-bit word.
+  //
+  // Computing floor(log(5**power)/log(2)) could be
+  // slow. Instead we use a fast function.
+  //
+  // For power in (-400,350), we have that
+  // (((152170 + 65536) * power ) >> 16);
+  // is equal to
+  //  floor(log(5**power)/log(2)) + power when power >= 0
+  // and it is equal to
+  //  ceil(log(5**-power)/log(2)) + power when power < 0
+  //
+  // The 65536 is (1<<16) and corresponds to
+  // (65536 * power) >> 16 ---> power
+  //
+  // ((152170 * power ) >> 16) is equal to
+  // floor(log(5**power)/log(2))
+  //
+  // Note that this is not magic: 152170/(1<<16) is
+  // approximatively equal to log(5)/log(2).
+  // The 1<<16 value is a power of two; we could use a
+  // larger power of 2 if we wanted to.
+  //
+  int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63;
+
+
+  // We want the most significant bit of i to be 1. Shift if needed.
+  int lz = leading_zeroes(i);
+  i <<= lz;
+
+
+  // We are going to need to do some 64-bit arithmetic to get a precise product.
+  // We use a table lookup approach.
+  // It is safe because
+  // power >= smallest_power
+  // and power <= largest_power
+  // We recover the mantissa of the power, it has a leading 1. It is always
+  // rounded down.
+  //
+  // We want the most significant 64 bits of the product. We know
+  // this will be non-zero because the most significant bit of i is
+  // 1.
+  const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power);
+  // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.)
+  //
+  // The full_multiplication function computes the 128-bit product of two 64-bit words
+  // with a returned value of type value128 with a "low component" corresponding to the
+  // 64-bit least significant bits of the product and with a "high component" corresponding
+  // to the 64-bit most significant bits of the product.
+  simdjson::internal::value128 firstproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index]);
+  // Both i and power_of_five_128[index] have their most significant bit set to 1 which
+  // implies that the either the most or the second most significant bit of the product
+  // is 1. We pack values in this manner for efficiency reasons: it maximizes the use
+  // we make of the product. It also makes it easy to reason about the product: there
+  // is 0 or 1 leading zero in the product.
+
+  // Unless the least significant 9 bits of the high (64-bit) part of the full
+  // product are all 1s, then we know that the most significant 55 bits are
+  // exact and no further work is needed. Having 55 bits is necessary because
+  // we need 53 bits for the mantissa but we have to have one rounding bit and
+  // we can waste a bit if the most significant bit of the product is zero.
+  if((firstproduct.high & 0x1FF) == 0x1FF) {
+    // We want to compute i * 5^q, but only care about the top 55 bits at most.
+    // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing
+    // the full computation is wasteful. So we do what is called a "truncated
+    // multiplication".
+    // We take the most significant 64-bits, and we put them in
+    // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q
+    // to the desired approximation using one multiplication. Sometimes it does not suffice.
+    // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and
+    // then we get a better approximation to i * 5^q. In very rare cases, even that
+    // will not suffice, though it is seemingly very hard to find such a scenario.
+    //
+    // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat
+    // more complicated.
+    //
+    // There is an extra layer of complexity in that we need more than 55 bits of
+    // accuracy in the round-to-even scenario.
+    //
+    // The full_multiplication function computes the 128-bit product of two 64-bit words
+    // with a returned value of type value128 with a "low component" corresponding to the
+    // 64-bit least significant bits of the product and with a "high component" corresponding
+    // to the 64-bit most significant bits of the product.
+    simdjson::internal::value128 secondproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]);
+    firstproduct.low += secondproduct.high;
+    if(secondproduct.high > firstproduct.low) { firstproduct.high++; }
+    // At this point, we might need to add at most one to firstproduct, but this
+    // can only change the value of firstproduct.high if firstproduct.low is maximal.
+    if(simdjson_unlikely(firstproduct.low  == 0xFFFFFFFFFFFFFFFF)) {
+      // This is very unlikely, but if so, we need to do much more work!
+      return false;
+    }
+  }
+  uint64_t lower = firstproduct.low;
+  uint64_t upper = firstproduct.high;
+  // The final mantissa should be 53 bits with a leading 1.
+  // We shift it so that it occupies 54 bits with a leading 1.
+  ///////
+  uint64_t upperbit = upper >> 63;
+  uint64_t mantissa = upper >> (upperbit + 9);
+  lz += int(1 ^ upperbit);
+
+  // Here we have mantissa < (1<<54).
+  int64_t real_exponent = exponent - lz;
+  if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal?
+    // Here have that real_exponent <= 0 so -real_exponent >= 0
+    if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure.
+      d = negative ? -0.0 : 0.0;
+      return true;
+    }
+    // next line is safe because -real_exponent + 1 < 0
+    mantissa >>= -real_exponent + 1;
+    // Thankfully, we can't have both "round-to-even" and subnormals because
+    // "round-to-even" only occurs for powers close to 0.
+    mantissa += (mantissa & 1); // round up
+    mantissa >>= 1;
+    // There is a weird scenario where we don't have a subnormal but just.
+    // Suppose we start with 2.2250738585072013e-308, we end up
+    // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal
+    // whereas 0x40000000000000 x 2^-1023-53  is normal. Now, we need to round
+    // up 0x3fffffffffffff x 2^-1023-53  and once we do, we are no longer
+    // subnormal, but we can only know this after rounding.
+    // So we only declare a subnormal if we are smaller than the threshold.
+    real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1;
+    d = to_double(mantissa, real_exponent, negative);
+    return true;
+  }
+  // We have to round to even. The "to even" part
+  // is only a problem when we are right in between two floats
+  // which we guard against.
+  // If we have lots of trailing zeros, we may fall right between two
+  // floating-point values.
+  //
+  // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54]
+  // times a power of two. That is, it is right between a number with binary significand
+  // m and another number with binary significand m+1; and it must be the case
+  // that it cannot be represented by a float itself.
+  //
+  // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p.
+  // Recall that 10^q = 5^q * 2^q.
+  // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that
+  //  5^23 <=  2^54 and it is the last power of five to qualify, so q <= 23.
+  // When q<0, we have  w  >=  (2m+1) x 5^{-q}.  We must have that w<2^{64} so
+  // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have
+  // 2^{53} x 5^{-q} < 2^{64}.
+  // Hence we have 5^{-q} < 2^{11}$ or q>= -4.
+  //
+  // We require lower <= 1 and not lower == 0 because we could not prove that
+  // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test.
+  if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) {
+    if((mantissa  << (upperbit + 64 - 53 - 2)) ==  upper) {
+      mantissa &= ~1;             // flip it so that we do not round up
+    }
+  }
+
+  mantissa += mantissa & 1;
+  mantissa >>= 1;
+
+  // Here we have mantissa < (1<<53), unless there was an overflow
+  if (mantissa >= (1ULL << 53)) {
+    //////////
+    // This will happen when parsing values such as 7.2057594037927933e+16
+    ////////
+    mantissa = (1ULL << 52);
+    real_exponent++;
+  }
+  mantissa &= ~(1ULL << 52);
+  // we have to check that real_exponent is in range, otherwise we bail out
+  if (simdjson_unlikely(real_exponent > 2046)) {
+    // We have an infinite value!!! We could actually throw an error here if we could.
+    return false;
+  }
+  d = to_double(mantissa, real_exponent, negative);
+  return true;
+}
+
+// We call a fallback floating-point parser that might be slow. Note
+// it will accept JSON numbers, but the JSON spec. is more restrictive so
+// before you call parse_float_fallback, you need to have validated the input
+// string with the JSON grammar.
+// It will return an error (false) if the parsed number is infinite.
+// The string parsing itself always succeeds. We know that there is at least
+// one digit.
+static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) {
+  *outDouble = simdjson::internal::from_chars(reinterpret_cast<const char *>(ptr));
+  // We do not accept infinite values.
+
+  // Detecting finite values in a portable manner is ridiculously hard, ideally
+  // we would want to do:
+  // return !std::isfinite(*outDouble);
+  // but that mysteriously fails under legacy/old libc++ libraries, see
+  // https://github.com/simdjson/simdjson/issues/1286
+  //
+  // Therefore, fall back to this solution (the extra parens are there
+  // to handle that max may be a macro on windows).
+  return !(*outDouble > (std::numeric_limits<double>::max)() || *outDouble < std::numeric_limits<double>::lowest());
+}
+static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) {
+  *outDouble = simdjson::internal::from_chars(reinterpret_cast<const char *>(ptr), reinterpret_cast<const char *>(end_ptr));
+  // We do not accept infinite values.
+
+  // Detecting finite values in a portable manner is ridiculously hard, ideally
+  // we would want to do:
+  // return !std::isfinite(*outDouble);
+  // but that mysteriously fails under legacy/old libc++ libraries, see
+  // https://github.com/simdjson/simdjson/issues/1286
+  //
+  // Therefore, fall back to this solution (the extra parens are there
+  // to handle that max may be a macro on windows).
+  return !(*outDouble > (std::numeric_limits<double>::max)() || *outDouble < std::numeric_limits<double>::lowest());
+}
+
+// check quickly whether the next 8 chars are made of digits
+// at a glance, it looks better than Mula's
+// http://0x80.pl/articles/swar-digits-validate.html
+simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) {
+  uint64_t val;
+  // this can read up to 7 bytes beyond the buffer size, but we require
+  // SIMDJSON_PADDING of padding
+  static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7");
+  std::memcpy(&val, chars, 8);
+  // a branchy method might be faster:
+  // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030)
+  //  && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) ==
+  //  0x3030303030303030);
+  return (((val & 0xF0F0F0F0F0F0F0F0) |
+           (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) ==
+          0x3333333333333333);
+}
+
+template<typename W>
+error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) {
+  double d;
+  if (parse_float_fallback(src, &d)) {
+    writer.append_double(d);
+    return SUCCESS;
+  }
+  return INVALID_NUMBER(src);
+}
+
+template<typename I>
+SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later
+simdjson_inline bool parse_digit(const uint8_t c, I &i) {
+  const uint8_t digit = static_cast<uint8_t>(c - '0');
+  if (digit > 9) {
+    return false;
+  }
+  // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication
+  i = 10 * i + digit; // might overflow, we will handle the overflow later
+  return true;
+}
+
+simdjson_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) {
+  // we continue with the fiction that we have an integer. If the
+  // floating point number is representable as x * 10^z for some integer
+  // z that fits in 53 bits, then we will be able to convert back the
+  // the integer into a float in a lossless manner.
+  const uint8_t *const first_after_period = p;
+
+#ifdef SIMDJSON_SWAR_NUMBER_PARSING
+#if SIMDJSON_SWAR_NUMBER_PARSING
+  // this helps if we have lots of decimals!
+  // this turns out to be frequent enough.
+  if (is_made_of_eight_digits_fast(p)) {
+    i = i * 100000000 + parse_eight_digits_unrolled(p);
+    p += 8;
+  }
+#endif // SIMDJSON_SWAR_NUMBER_PARSING
+#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING
+  // Unrolling the first digit makes a small difference on some implementations (e.g. westmere)
+  if (parse_digit(*p, i)) { ++p; }
+  while (parse_digit(*p, i)) { p++; }
+  exponent = first_after_period - p;
+  // Decimal without digits (123.) is illegal
+  if (exponent == 0) {
+    return INVALID_NUMBER(src);
+  }
+  return SUCCESS;
+}
+
+simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) {
+  // Exp Sign: -123.456e[-]78
+  bool neg_exp = ('-' == *p);
+  if (neg_exp || '+' == *p) { p++; } // Skip + as well
+
+  // Exponent: -123.456e-[78]
+  auto start_exp = p;
+  int64_t exp_number = 0;
+  while (parse_digit(*p, exp_number)) { ++p; }
+  // It is possible for parse_digit to overflow.
+  // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN.
+  // Thus we *must* check for possible overflow before we negate exp_number.
+
+  // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into
+  // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may
+  // not oblige and may, in fact, generate two distinct paths in any case. It might be
+  // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off
+  // instructions for a simdjson_likely branch, an unconclusive gain.
+
+  // If there were no digits, it's an error.
+  if (simdjson_unlikely(p == start_exp)) {
+    return INVALID_NUMBER(src);
+  }
+  // We have a valid positive exponent in exp_number at this point, except that
+  // it may have overflowed.
+
+  // If there were more than 18 digits, we may have overflowed the integer. We have to do
+  // something!!!!
+  if (simdjson_unlikely(p > start_exp+18)) {
+    // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow
+    while (*start_exp == '0') { start_exp++; }
+    // 19 digits could overflow int64_t and is kind of absurd anyway. We don't
+    // support exponents smaller than -999,999,999,999,999,999 and bigger
+    // than 999,999,999,999,999,999.
+    // We can truncate.
+    // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before
+    // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could
+    // truncate at 324.
+    // Note that there is no reason to fail per se at this point in time.
+    // E.g., 0e999999999999999999999 is a fine number.
+    if (p > start_exp+18) { exp_number = 999999999999999999; }
+  }
+  // At this point, we know that exp_number is a sane, positive, signed integer.
+  // It is <= 999,999,999,999,999,999. As long as 'exponent' is in
+  // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent'
+  // is bounded in magnitude by the size of the JSON input, we are fine in this universe.
+  // To sum it up: the next line should never overflow.
+  exponent += (neg_exp ? -exp_number : exp_number);
+  return SUCCESS;
+}
+
+simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) {
+  // It is possible that the integer had an overflow.
+  // We have to handle the case where we have 0.0000somenumber.
+  const uint8_t *start = start_digits;
+  while ((*start == '0') || (*start == '.')) { ++start; }
+  // we over-decrement by one when there is a '.'
+  return digit_count - size_t(start - start_digits);
+}
+
+template<typename W>
+simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) {
+  // If we frequently had to deal with long strings of digits,
+  // we could extend our code by using a 128-bit integer instead
+  // of a 64-bit integer. However, this is uncommon in practice.
+  //
+  // 9999999999999999999 < 2**64 so we can accommodate 19 digits.
+  // If we have a decimal separator, then digit_count - 1 is the number of digits, but we
+  // may not have a decimal separator!
+  if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) {
+    // Ok, chances are good that we had an overflow!
+    // this is almost never going to get called!!!
+    // we start anew, going slowly!!!
+    // This will happen in the following examples:
+    // 10000000000000000000000000000000000000000000e+308
+    // 3.1415926535897932384626433832795028841971693993751
+    //
+    // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens
+    // because slow_float_parsing is a non-inlined function. If we passed our writer reference to
+    // it, it would force it to be stored in memory, preventing the compiler from picking it apart
+    // and putting into registers. i.e. if we pass it as reference, it gets slow.
+    // This is what forces the skip_double, as well.
+    error_code error = slow_float_parsing(src, writer);
+    writer.skip_double();
+    return error;
+  }
+  // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other
+  // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331
+  // To future reader: we'd love if someone found a better way, or at least could explain this result!
+  if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) {
+    //
+    // Important: smallest_power is such that it leads to a zero value.
+    // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero
+    // so something x 10^-343 goes to zero, but not so with  something x 10^-342.
+    static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough");
+    //
+    if((exponent < simdjson::internal::smallest_power) || (i == 0)) {
+      // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero
+      WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer);
+      return SUCCESS;
+    } else { // (exponent > largest_power) and (i != 0)
+      // We have, for sure, an infinite value and simdjson refuses to parse infinite values.
+      return INVALID_NUMBER(src);
+    }
+  }
+  double d;
+  if (!compute_float_64(exponent, i, negative, d)) {
+    // we are almost never going to get here.
+    if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); }
+  }
+  WRITE_DOUBLE(d, src, writer);
+  return SUCCESS;
+}
+
+// for performance analysis, it is sometimes  useful to skip parsing
+#ifdef SIMDJSON_SKIPNUMBERPARSING
+
+template<typename W>
+simdjson_inline error_code parse_number(const uint8_t *const, W &writer) {
+  writer.append_s64(0);        // always write zero
+  return SUCCESS;              // always succeeds
+}
+
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer_in_string(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<double> parse_double_in_string(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept  { return false; }
+simdjson_unused simdjson_inline simdjson_result<bool> is_integer(const uint8_t * src) noexcept  { return false; }
+simdjson_unused simdjson_inline simdjson_result<ondemand::number_type> get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; }
+#else
+
+// parse the number at src
+// define JSON_TEST_NUMBERS for unit testing
+//
+// It is assumed that the number is followed by a structural ({,},],[) character
+// or a white space character. If that is not the case (e.g., when the JSON
+// document is made of a single number), then it is necessary to copy the
+// content and append a space before calling this function.
+//
+// Our objective is accurate parsing (ULP of 0) at high speed.
+template<typename W>
+simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) {
+
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  const uint8_t *p = src + uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); }
+
+  //
+  // Handle floats if there is a . or e (or both)
+  //
+  int64_t exponent = 0;
+  bool is_float = false;
+  if ('.' == *p) {
+    is_float = true;
+    ++p;
+    SIMDJSON_TRY( parse_decimal(src, p, i, exponent) );
+    digit_count = int(p - start_digits); // used later to guard against overflows
+  }
+  if (('e' == *p) || ('E' == *p)) {
+    is_float = true;
+    ++p;
+    SIMDJSON_TRY( parse_exponent(src, p, exponent) );
+  }
+  if (is_float) {
+    const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p);
+    SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) );
+    if (dirty_end) { return INVALID_NUMBER(src); }
+    return SUCCESS;
+  }
+
+  // The longest negative 64-bit number is 19 digits.
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  size_t longest_digit_count = negative ? 19 : 20;
+  if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); }
+  if (digit_count == longest_digit_count) {
+    if (negative) {
+      // Anything negative above INT64_MAX+1 is invalid
+      if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src);  }
+      WRITE_INTEGER(~i+1, src, writer);
+      if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); }
+      return SUCCESS;
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    }  else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); }
+  }
+
+  // Write unsigned if it doesn't fit in a signed integer.
+  if (i > uint64_t(INT64_MAX)) {
+    WRITE_UNSIGNED(i, src, writer);
+  } else {
+    WRITE_INTEGER(negative ? (~i+1) : i, src, writer);
+  }
+  if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); }
+  return SUCCESS;
+}
+
+// Inlineable functions
+namespace {
+
+// This table can be used to characterize the final character of an integer
+// string. For JSON structural character and allowable white space characters,
+// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise
+// we return NUMBER_ERROR.
+// Optimization note: we could easily reduce the size of the table by half (to 128)
+// at the cost of an extra branch.
+// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits):
+static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast");
+static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast");
+static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast");
+
+const uint8_t integer_string_finisher[256] = {
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, SUCCESS,
+    SUCCESS,      NUMBER_ERROR,   NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   SUCCESS,      NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, SUCCESS,
+    NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, SUCCESS,        NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    SUCCESS,      NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR};
+
+// Parse any number from 0 to 18,446,744,073,709,551,615
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src) noexcept {
+  const uint8_t *p = src;
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > 20))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+
+  if (digit_count == 20) {
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
+  }
+
+  return i;
+}
+
+
+// Parse any number from 0 to 18,446,744,073,709,551,615
+// Never read at src_end or beyond
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept {
+  const uint8_t *p = src;
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while ((p != src_end) && parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > 20))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+
+  if (digit_count == 20) {
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
+  }
+
+  return i;
+}
+
+// Parse any number from 0 to 18,446,744,073,709,551,615
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned_in_string(const uint8_t * const src) noexcept {
+  const uint8_t *p = src + 1;
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > 20))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if (*p != '"') { return NUMBER_ERROR; }
+
+  if (digit_count == 20) {
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    // Note: we use src[1] and not src[0] because src[0] is the quote character in this
+    // instance.
+    if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
+  }
+
+  return i;
+}
+
+// Parse any number from  -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t *src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  const uint8_t *p = src + uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // We go from
+  // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+  // so we can never represent numbers that have more than 19 digits.
+  size_t longest_digit_count = 19;
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > longest_digit_count))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+  // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
+  // Performance note: This check is only needed when digit_count == longest_digit_count but it is
+  // so cheap that we might as well always make it.
+  if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
+  return negative ? (~i+1) : i;
+}
+
+// Parse any number from  -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+// Never read at src_end or beyond
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept {
+  //
+  // Check for minus sign
+  //
+  if(src == src_end) { return NUMBER_ERROR; }
+  bool negative = (*src == '-');
+  const uint8_t *p = src + uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while ((p != src_end) && parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // We go from
+  // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+  // so we can never represent numbers that have more than 19 digits.
+  size_t longest_digit_count = 19;
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > longest_digit_count))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+  // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
+  // Performance note: This check is only needed when digit_count == longest_digit_count but it is
+  // so cheap that we might as well always make it.
+  if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
+  return negative ? (~i+1) : i;
+}
+
+// Parse any number from  -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer_in_string(const uint8_t *src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*(src + 1) == '-');
+  src += uint8_t(negative) + 1;
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = src;
+  uint64_t i = 0;
+  while (parse_digit(*src, i)) { src++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(src - start_digits);
+  // We go from
+  // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+  // so we can never represent numbers that have more than 19 digits.
+  size_t longest_digit_count = 19;
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > longest_digit_count))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*src)) {
+  //  return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if(*src != '"') { return NUMBER_ERROR; }
+  // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
+  // Performance note: This check is only needed when digit_count == longest_digit_count but it is
+  // so cheap that we might as well always make it.
+  if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
+  return negative ? (~i+1) : i;
+}
+
+simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  uint64_t i = 0;
+  const uint8_t *p = src;
+  p += parse_digit(*p, i);
+  bool leading_zero = (i == 0);
+  while (parse_digit(*p, i)) { p++; }
+  // no integer digits, or 0123 (zero must be solo)
+  if ( p == src ) { return INCORRECT_TYPE; }
+  if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; }
+
+  //
+  // Parse the decimal part.
+  //
+  int64_t exponent = 0;
+  bool overflow;
+  if (simdjson_likely(*p == '.')) {
+    p++;
+    const uint8_t *start_decimal_digits = p;
+    if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits
+    p++;
+    while (parse_digit(*p, i)) { p++; }
+    exponent = -(p - start_decimal_digits);
+
+    // Overflow check. More than 19 digits (minus the decimal) may be overflow.
+    overflow = p-src-1 > 19;
+    if (simdjson_unlikely(overflow && leading_zero)) {
+      // Skip leading 0.00000 and see if it still overflows
+      const uint8_t *start_digits = src + 2;
+      while (*start_digits == '0') { start_digits++; }
+      overflow = start_digits-src > 19;
+    }
+  } else {
+    overflow = p-src > 19;
+  }
+
+  //
+  // Parse the exponent
+  //
+  if (*p == 'e' || *p == 'E') {
+    p++;
+    bool exp_neg = *p == '-';
+    p += exp_neg || *p == '+';
+
+    uint64_t exp = 0;
+    const uint8_t *start_exp_digits = p;
+    while (parse_digit(*p, exp)) { p++; }
+    // no exp digits, or 20+ exp digits
+    if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }
+
+    exponent += exp_neg ? 0-exp : exp;
+  }
+
+  if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; }
+
+  overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power;
+
+  //
+  // Assemble (or slow-parse) the float
+  //
+  double d;
+  if (simdjson_likely(!overflow)) {
+    if (compute_float_64(exponent, i, negative, d)) { return d; }
+  }
+  if (!parse_float_fallback(src - uint8_t(negative), &d)) {
+    return NUMBER_ERROR;
+  }
+  return d;
+}
+
+simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept {
+  return (*src == '-');
+}
+
+simdjson_unused simdjson_inline simdjson_result<bool> is_integer(const uint8_t * src) noexcept {
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+  const uint8_t *p = src;
+  while(static_cast<uint8_t>(*p - '0') <= 9) { p++; }
+  if ( p == src ) { return NUMBER_ERROR; }
+  if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; }
+  return false;
+}
+
+simdjson_unused simdjson_inline simdjson_result<ondemand::number_type> get_number_type(const uint8_t * src) noexcept {
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+  const uint8_t *p = src;
+  while(static_cast<uint8_t>(*p - '0') <= 9) { p++; }
+  if ( p == src ) { return NUMBER_ERROR; }
+  if (jsoncharutils::is_structural_or_whitespace(*p)) {
+    // We have an integer.
+    // If the number is negative and valid, it must be a signed integer.
+    if(negative) { return ondemand::number_type::signed_integer; }
+    // We want values larger or equal to 9223372036854775808 to be unsigned
+    // integers, and the other values to be signed integers.
+    int digit_count = int(p - src);
+    if(digit_count >= 19) {
+      const uint8_t * smaller_big_integer = reinterpret_cast<const uint8_t *>("9223372036854775808");
+      if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) {
+        return ondemand::number_type::unsigned_integer;
+      }
+    }
+    return ondemand::number_type::signed_integer;
+  }
+  // Hopefully, we have 'e' or 'E' or '.'.
+  return ondemand::number_type::floating_point_number;
+}
+
+// Never read at src_end or beyond
+simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept {
+  if(src == src_end) { return NUMBER_ERROR; }
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  uint64_t i = 0;
+  const uint8_t *p = src;
+  if(p == src_end) { return NUMBER_ERROR; }
+  p += parse_digit(*p, i);
+  bool leading_zero = (i == 0);
+  while ((p != src_end) && parse_digit(*p, i)) { p++; }
+  // no integer digits, or 0123 (zero must be solo)
+  if ( p == src ) { return INCORRECT_TYPE; }
+  if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; }
+
+  //
+  // Parse the decimal part.
+  //
+  int64_t exponent = 0;
+  bool overflow;
+  if (simdjson_likely((p != src_end) && (*p == '.'))) {
+    p++;
+    const uint8_t *start_decimal_digits = p;
+    if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits
+    p++;
+    while ((p != src_end) && parse_digit(*p, i)) { p++; }
+    exponent = -(p - start_decimal_digits);
+
+    // Overflow check. More than 19 digits (minus the decimal) may be overflow.
+    overflow = p-src-1 > 19;
+    if (simdjson_unlikely(overflow && leading_zero)) {
+      // Skip leading 0.00000 and see if it still overflows
+      const uint8_t *start_digits = src + 2;
+      while (*start_digits == '0') { start_digits++; }
+      overflow = start_digits-src > 19;
+    }
+  } else {
+    overflow = p-src > 19;
+  }
+
+  //
+  // Parse the exponent
+  //
+  if ((p != src_end) && (*p == 'e' || *p == 'E')) {
+    p++;
+    if(p == src_end) { return NUMBER_ERROR; }
+    bool exp_neg = *p == '-';
+    p += exp_neg || *p == '+';
+
+    uint64_t exp = 0;
+    const uint8_t *start_exp_digits = p;
+    while ((p != src_end) && parse_digit(*p, exp)) { p++; }
+    // no exp digits, or 20+ exp digits
+    if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }
+
+    exponent += exp_neg ? 0-exp : exp;
+  }
+
+  if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; }
+
+  overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power;
+
+  //
+  // Assemble (or slow-parse) the float
+  //
+  double d;
+  if (simdjson_likely(!overflow)) {
+    if (compute_float_64(exponent, i, negative, d)) { return d; }
+  }
+  if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) {
+    return NUMBER_ERROR;
+  }
+  return d;
+}
+
+simdjson_unused simdjson_inline simdjson_result<double> parse_double_in_string(const uint8_t * src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*(src + 1) == '-');
+  src += uint8_t(negative) + 1;
+
+  //
+  // Parse the integer part.
+  //
+  uint64_t i = 0;
+  const uint8_t *p = src;
+  p += parse_digit(*p, i);
+  bool leading_zero = (i == 0);
+  while (parse_digit(*p, i)) { p++; }
+  // no integer digits, or 0123 (zero must be solo)
+  if ( p == src ) { return INCORRECT_TYPE; }
+  if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; }
+
+  //
+  // Parse the decimal part.
+  //
+  int64_t exponent = 0;
+  bool overflow;
+  if (simdjson_likely(*p == '.')) {
+    p++;
+    const uint8_t *start_decimal_digits = p;
+    if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits
+    p++;
+    while (parse_digit(*p, i)) { p++; }
+    exponent = -(p - start_decimal_digits);
+
+    // Overflow check. More than 19 digits (minus the decimal) may be overflow.
+    overflow = p-src-1 > 19;
+    if (simdjson_unlikely(overflow && leading_zero)) {
+      // Skip leading 0.00000 and see if it still overflows
+      const uint8_t *start_digits = src + 2;
+      while (*start_digits == '0') { start_digits++; }
+      overflow = start_digits-src > 19;
+    }
+  } else {
+    overflow = p-src > 19;
+  }
+
+  //
+  // Parse the exponent
+  //
+  if (*p == 'e' || *p == 'E') {
+    p++;
+    bool exp_neg = *p == '-';
+    p += exp_neg || *p == '+';
+
+    uint64_t exp = 0;
+    const uint8_t *start_exp_digits = p;
+    while (parse_digit(*p, exp)) { p++; }
+    // no exp digits, or 20+ exp digits
+    if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }
+
+    exponent += exp_neg ? 0-exp : exp;
+  }
+
+  if (*p != '"') { return NUMBER_ERROR; }
+
+  overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power;
+
+  //
+  // Assemble (or slow-parse) the float
+  //
+  double d;
+  if (simdjson_likely(!overflow)) {
+    if (compute_float_64(exponent, i, negative, d)) { return d; }
+  }
+  if (!parse_float_fallback(src - uint8_t(negative), &d)) {
+    return NUMBER_ERROR;
+  }
+  return d;
+}
+} //namespace {}
+#endif // SIMDJSON_SKIPNUMBERPARSING
+
+} // namespace numberparsing
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdjson
+/* end file include/simdjson/generic/numberparsing.h */
+
+#endif // SIMDJSON_HASWELL_NUMBERPARSING_H
+/* end file include/simdjson/haswell/numberparsing.h */
+/* begin file include/simdjson/haswell/end.h */
+SIMDJSON_UNTARGET_HASWELL
+/* end file include/simdjson/haswell/end.h */
+
+#endif // SIMDJSON_IMPLEMENTATION_HASWELL
+#endif // SIMDJSON_HASWELL_COMMON_H
+/* end file include/simdjson/haswell.h */
+/* begin file include/simdjson/ppc64.h */
+#ifndef SIMDJSON_PPC64_H
+#define SIMDJSON_PPC64_H
+
+
+#if SIMDJSON_IMPLEMENTATION_PPC64
+
+namespace simdjson {
+/**
+ * Implementation for ALTIVEC (PPC64).
+ */
+namespace ppc64 {
+} // namespace ppc64
+} // namespace simdjson
+
+/* begin file include/simdjson/ppc64/implementation.h */
+#ifndef SIMDJSON_PPC64_IMPLEMENTATION_H
+#define SIMDJSON_PPC64_IMPLEMENTATION_H
+
+
+namespace simdjson {
+namespace ppc64 {
+
+namespace {
+using namespace simdjson;
+using namespace simdjson::dom;
+} // namespace
+
+class implementation final : public simdjson::implementation {
+public:
+  simdjson_inline implementation()
+      : simdjson::implementation("ppc64", "PPC64 ALTIVEC",
+                                 internal::instruction_set::ALTIVEC) {}
+  simdjson_warn_unused error_code create_dom_parser_implementation(
+      size_t capacity, size_t max_length,
+      std::unique_ptr<internal::dom_parser_implementation> &dst)
+      const noexcept final;
+  simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len,
+                                         uint8_t *dst,
+                                         size_t &dst_len) const noexcept final;
+  simdjson_warn_unused bool validate_utf8(const char *buf,
+                                          size_t len) const noexcept final;
+};
+
+} // namespace ppc64
+} // namespace simdjson
+
+#endif // SIMDJSON_PPC64_IMPLEMENTATION_H
+/* end file include/simdjson/ppc64/implementation.h */
+
+/* begin file include/simdjson/ppc64/begin.h */
+// redefining SIMDJSON_IMPLEMENTATION to "ppc64"
+// #define SIMDJSON_IMPLEMENTATION ppc64
+/* end file include/simdjson/ppc64/begin.h */
+
+// Declarations
+/* begin file include/simdjson/generic/dom_parser_implementation.h */
+
+namespace simdjson {
+namespace ppc64 {
+
+// expectation: sizeof(open_container) = 64/8.
+struct open_container {
+  uint32_t tape_index; // where, on the tape, does the scope ([,{) begins
+  uint32_t count; // how many elements in the scope
+}; // struct open_container
+
+static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits");
+
+class dom_parser_implementation final : public internal::dom_parser_implementation {
+public:
+  /** Tape location of each open { or [ */
+  std::unique_ptr<open_container[]> open_containers{};
+  /** Whether each open container is a [ or { */
+  std::unique_ptr<bool[]> is_array{};
+  /** Buffer passed to stage 1 */
+  const uint8_t *buf{};
+  /** Length passed to stage 1 */
+  size_t len{0};
+  /** Document passed to stage 2 */
+  dom::document *doc{};
+
+  inline dom_parser_implementation() noexcept;
+  inline dom_parser_implementation(dom_parser_implementation &&other) noexcept;
+  inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept;
+  dom_parser_implementation(const dom_parser_implementation &) = delete;
+  dom_parser_implementation &operator=(const dom_parser_implementation &) = delete;
+
+  simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final;
+  simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final;
+  simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final;
+  simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final;
+  simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst) const noexcept final;
+  inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final;
+  inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final;
+private:
+  simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity);
+
+};
+
+} // namespace ppc64
+} // namespace simdjson
+
+namespace simdjson {
+namespace ppc64 {
+
+inline dom_parser_implementation::dom_parser_implementation() noexcept = default;
+inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default;
+inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default;
+
+// Leaving these here so they can be inlined if so desired
+inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept {
+  if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; }
+  // Stage 1 index output
+  size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7;
+  structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] );
+  if (!structural_indexes) { _capacity = 0; return MEMALLOC; }
+  structural_indexes[0] = 0;
+  n_structural_indexes = 0;
+
+  _capacity = capacity;
+  return SUCCESS;
+}
+
+inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept {
+  // Stage 2 stacks
+  open_containers.reset(new (std::nothrow) open_container[max_depth]);
+  is_array.reset(new (std::nothrow) bool[max_depth]);
+  if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; }
+
+  _max_depth = max_depth;
+  return SUCCESS;
+}
+
+} // namespace ppc64
+} // namespace simdjson
+/* end file include/simdjson/generic/dom_parser_implementation.h */
+/* begin file include/simdjson/ppc64/intrinsics.h */
+#ifndef SIMDJSON_PPC64_INTRINSICS_H
+#define SIMDJSON_PPC64_INTRINSICS_H
+
+
+// This should be the correct header whether
+// you use visual studio or other compilers.
+#include <altivec.h>
+
+// These are defined by altivec.h in GCC toolchain, it is safe to undef them.
+#ifdef bool
+#undef bool
+#endif
+
+#ifdef vector
+#undef vector
+#endif
+
+static_assert(sizeof(__vector unsigned char) <= simdjson::SIMDJSON_PADDING, "insufficient padding for ppc64");
+
+#endif //  SIMDJSON_PPC64_INTRINSICS_H
+/* end file include/simdjson/ppc64/intrinsics.h */
+/* begin file include/simdjson/ppc64/bitmanipulation.h */
+#ifndef SIMDJSON_PPC64_BITMANIPULATION_H
+#define SIMDJSON_PPC64_BITMANIPULATION_H
+
+namespace simdjson {
+namespace ppc64 {
+namespace {
+
+// We sometimes call trailing_zero on inputs that are zero,
+// but the algorithms do not end up using the returned value.
+// Sadly, sanitizers are not smart enough to figure it out.
+SIMDJSON_NO_SANITIZE_UNDEFINED
+simdjson_inline int trailing_zeroes(uint64_t input_num) {
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+  unsigned long ret;
+  // Search the mask data from least significant bit (LSB)
+  // to the most significant bit (MSB) for a set bit (1).
+  _BitScanForward64(&ret, input_num);
+  return (int)ret;
+#else  // SIMDJSON_REGULAR_VISUAL_STUDIO
+  return __builtin_ctzll(input_num);
+#endif // SIMDJSON_REGULAR_VISUAL_STUDIO
+}
+
+/* result might be undefined when input_num is zero */
+simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) {
+  return input_num & (input_num - 1);
+}
+
+/* result might be undefined when input_num is zero */
+simdjson_inline int leading_zeroes(uint64_t input_num) {
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+  unsigned long leading_zero = 0;
+  // Search the mask data from most significant bit (MSB)
+  // to least significant bit (LSB) for a set bit (1).
+  if (_BitScanReverse64(&leading_zero, input_num))
+    return (int)(63 - leading_zero);
+  else
+    return 64;
+#else
+  return __builtin_clzll(input_num);
+#endif // SIMDJSON_REGULAR_VISUAL_STUDIO
+}
+
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+simdjson_inline int count_ones(uint64_t input_num) {
+  // note: we do not support legacy 32-bit Windows
+  return __popcnt64(input_num); // Visual Studio wants two underscores
+}
+#else
+simdjson_inline int count_ones(uint64_t input_num) {
+  return __builtin_popcountll(input_num);
+}
+#endif
+
+simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2,
+                                         uint64_t *result) {
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+  *result = value1 + value2;
+  return *result < value1;
+#else
+  return __builtin_uaddll_overflow(value1, value2,
+                                   reinterpret_cast<unsigned long long *>(result));
+#endif
+}
+
+} // unnamed namespace
+} // namespace ppc64
+} // namespace simdjson
+
+#endif // SIMDJSON_PPC64_BITMANIPULATION_H
+/* end file include/simdjson/ppc64/bitmanipulation.h */
+/* begin file include/simdjson/ppc64/bitmask.h */
+#ifndef SIMDJSON_PPC64_BITMASK_H
+#define SIMDJSON_PPC64_BITMASK_H
+
+namespace simdjson {
+namespace ppc64 {
+namespace {
+
+//
+// Perform a "cumulative bitwise xor," flipping bits each time a 1 is
+// encountered.
+//
+// For example, prefix_xor(00100100) == 00011100
+//
+simdjson_inline uint64_t prefix_xor(uint64_t bitmask) {
+  // You can use the version below, however gcc sometimes miscompiles
+  // vec_pmsum_be, it happens somewhere around between 8 and 9th version.
+  // The performance boost was not noticeable, falling back to a usual
+  // implementation.
+  //   __vector unsigned long long all_ones = {~0ull, ~0ull};
+  //   __vector unsigned long long mask = {bitmask, 0};
+  //   // Clang and GCC return different values for pmsum for ull so cast it to one.
+  //   // Generally it is not specified by ALTIVEC ISA what is returned by
+  //   // vec_pmsum_be.
+  // #if defined(__LITTLE_ENDIAN__)
+  //   return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[0]);
+  // #else
+  //   return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[1]);
+  // #endif
+  bitmask ^= bitmask << 1;
+  bitmask ^= bitmask << 2;
+  bitmask ^= bitmask << 4;
+  bitmask ^= bitmask << 8;
+  bitmask ^= bitmask << 16;
+  bitmask ^= bitmask << 32;
+  return bitmask;
+}
+
+} // unnamed namespace
+} // namespace ppc64
+} // namespace simdjson
+
+#endif
+/* end file include/simdjson/ppc64/bitmask.h */
+/* begin file include/simdjson/ppc64/simd.h */
+#ifndef SIMDJSON_PPC64_SIMD_H
+#define SIMDJSON_PPC64_SIMD_H
+
+#include <type_traits>
+
+namespace simdjson {
+namespace ppc64 {
+namespace {
+namespace simd {
+
+using __m128i = __vector unsigned char;
+
+template <typename Child> struct base {
+  __m128i value;
+
+  // Zero constructor
+  simdjson_inline base() : value{__m128i()} {}
+
+  // Conversion from SIMD register
+  simdjson_inline base(const __m128i _value) : value(_value) {}
+
+  // Conversion to SIMD register
+  simdjson_inline operator const __m128i &() const {
+    return this->value;
+  }
+  simdjson_inline operator __m128i &() { return this->value; }
+
+  // Bit operations
+  simdjson_inline Child operator|(const Child other) const {
+    return vec_or(this->value, (__m128i)other);
+  }
+  simdjson_inline Child operator&(const Child other) const {
+    return vec_and(this->value, (__m128i)other);
+  }
+  simdjson_inline Child operator^(const Child other) const {
+    return vec_xor(this->value, (__m128i)other);
+  }
+  simdjson_inline Child bit_andnot(const Child other) const {
+    return vec_andc(this->value, (__m128i)other);
+  }
+  simdjson_inline Child &operator|=(const Child other) {
+    auto this_cast = static_cast<Child*>(this);
+    *this_cast = *this_cast | other;
+    return *this_cast;
+  }
+  simdjson_inline Child &operator&=(const Child other) {
+    auto this_cast = static_cast<Child*>(this);
+    *this_cast = *this_cast & other;
+    return *this_cast;
+  }
+  simdjson_inline Child &operator^=(const Child other) {
+    auto this_cast = static_cast<Child*>(this);
+    *this_cast = *this_cast ^ other;
+    return *this_cast;
+  }
+};
+
+// Forward-declared so they can be used by splat and friends.
+template <typename T> struct simd8;
+
+template <typename T, typename Mask = simd8<bool>>
+struct base8 : base<simd8<T>> {
+  typedef uint16_t bitmask_t;
+  typedef uint32_t bitmask2_t;
+
+  simdjson_inline base8() : base<simd8<T>>() {}
+  simdjson_inline base8(const __m128i _value) : base<simd8<T>>(_value) {}
+
+  friend simdjson_inline Mask operator==(const simd8<T> lhs, const simd8<T> rhs) {
+    return (__m128i)vec_cmpeq(lhs.value, (__m128i)rhs);
+  }
+
+  static const int SIZE = sizeof(base<simd8<T>>::value);
+
+  template <int N = 1>
+  simdjson_inline simd8<T> prev(simd8<T> prev_chunk) const {
+    __m128i chunk = this->value;
+#ifdef __LITTLE_ENDIAN__
+    chunk = (__m128i)vec_reve(this->value);
+    prev_chunk = (__m128i)vec_reve((__m128i)prev_chunk);
+#endif
+    chunk = (__m128i)vec_sld((__m128i)prev_chunk, (__m128i)chunk, 16 - N);
+#ifdef __LITTLE_ENDIAN__
+    chunk = (__m128i)vec_reve((__m128i)chunk);
+#endif
+    return chunk;
+  }
+};
+
+// SIMD byte mask type (returned by things like eq and gt)
+template <> struct simd8<bool> : base8<bool> {
+  static simdjson_inline simd8<bool> splat(bool _value) {
+    return (__m128i)vec_splats((unsigned char)(-(!!_value)));
+  }
+
+  simdjson_inline simd8<bool>() : base8() {}
+  simdjson_inline simd8<bool>(const __m128i _value)
+      : base8<bool>(_value) {}
+  // Splat constructor
+  simdjson_inline simd8<bool>(bool _value)
+      : base8<bool>(splat(_value)) {}
+
+  simdjson_inline int to_bitmask() const {
+    __vector unsigned long long result;
+    const __m128i perm_mask = {0x78, 0x70, 0x68, 0x60, 0x58, 0x50, 0x48, 0x40,
+                               0x38, 0x30, 0x28, 0x20, 0x18, 0x10, 0x08, 0x00};
+
+    result = ((__vector unsigned long long)vec_vbpermq((__m128i)this->value,
+                                                       (__m128i)perm_mask));
+#ifdef __LITTLE_ENDIAN__
+    return static_cast<int>(result[1]);
+#else
+    return static_cast<int>(result[0]);
+#endif
+  }
+  simdjson_inline bool any() const {
+    return !vec_all_eq(this->value, (__m128i)vec_splats(0));
+  }
+  simdjson_inline simd8<bool> operator~() const {
+    return this->value ^ (__m128i)splat(true);
+  }
+};
+
+template <typename T> struct base8_numeric : base8<T> {
+  static simdjson_inline simd8<T> splat(T value) {
+    (void)value;
+    return (__m128i)vec_splats(value);
+  }
+  static simdjson_inline simd8<T> zero() { return splat(0); }
+  static simdjson_inline simd8<T> load(const T values[16]) {
+    return (__m128i)(vec_vsx_ld(0, reinterpret_cast<const uint8_t *>(values)));
+  }
+  // Repeat 16 values as many times as necessary (usually for lookup tables)
+  static simdjson_inline simd8<T> repeat_16(T v0, T v1, T v2, T v3, T v4,
+                                                   T v5, T v6, T v7, T v8, T v9,
+                                                   T v10, T v11, T v12, T v13,
+                                                   T v14, T v15) {
+    return simd8<T>(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13,
+                    v14, v15);
+  }
+
+  simdjson_inline base8_numeric() : base8<T>() {}
+  simdjson_inline base8_numeric(const __m128i _value)
+      : base8<T>(_value) {}
+
+  // Store to array
+  simdjson_inline void store(T dst[16]) const {
+    vec_vsx_st(this->value, 0, reinterpret_cast<__m128i *>(dst));
+  }
+
+  // Override to distinguish from bool version
+  simdjson_inline simd8<T> operator~() const { return *this ^ 0xFFu; }
+
+  // Addition/subtraction are the same for signed and unsigned
+  simdjson_inline simd8<T> operator+(const simd8<T> other) const {
+    return (__m128i)((__m128i)this->value + (__m128i)other);
+  }
+  simdjson_inline simd8<T> operator-(const simd8<T> other) const {
+    return (__m128i)((__m128i)this->value - (__m128i)other);
+  }
+  simdjson_inline simd8<T> &operator+=(const simd8<T> other) {
+    *this = *this + other;
+    return *static_cast<simd8<T> *>(this);
+  }
+  simdjson_inline simd8<T> &operator-=(const simd8<T> other) {
+    *this = *this - other;
+    return *static_cast<simd8<T> *>(this);
+  }
+
+  // Perform a lookup assuming the value is between 0 and 16 (undefined behavior
+  // for out of range values)
+  template <typename L>
+  simdjson_inline simd8<L> lookup_16(simd8<L> lookup_table) const {
+    return (__m128i)vec_perm((__m128i)lookup_table, (__m128i)lookup_table, this->value);
+  }
+
+  // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted
+  // as a bitset). Passing a 0 value for mask would be equivalent to writing out
+  // every byte to output. Only the first 16 - count_ones(mask) bytes of the
+  // result are significant but 16 bytes get written. Design consideration: it
+  // seems like a function with the signature simd8<L> compress(uint32_t mask)
+  // would be sensible, but the AVX ISA makes this kind of approach difficult.
+  template <typename L>
+  simdjson_inline void compress(uint16_t mask, L *output) const {
+    using internal::BitsSetTable256mul2;
+    using internal::pshufb_combine_table;
+    using internal::thintable_epi8;
+    // this particular implementation was inspired by work done by @animetosho
+    // we do it in two steps, first 8 bytes and then second 8 bytes
+    uint8_t mask1 = uint8_t(mask);      // least significant 8 bits
+    uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits
+    // next line just loads the 64-bit values thintable_epi8[mask1] and
+    // thintable_epi8[mask2] into a 128-bit register, using only
+    // two instructions on most compilers.
+#ifdef __LITTLE_ENDIAN__
+    __m128i shufmask = (__m128i)(__vector unsigned long long){
+        thintable_epi8[mask1], thintable_epi8[mask2]};
+#else
+    __m128i shufmask = (__m128i)(__vector unsigned long long){
+        thintable_epi8[mask2], thintable_epi8[mask1]};
+    shufmask = (__m128i)vec_reve((__m128i)shufmask);
+#endif
+    // we increment by 0x08 the second half of the mask
+    shufmask = ((__m128i)shufmask) +
+               ((__m128i)(__vector int){0, 0, 0x08080808, 0x08080808});
+
+    // this is the version "nearly pruned"
+    __m128i pruned = vec_perm(this->value, this->value, shufmask);
+    // we still need to put the two halves together.
+    // we compute the popcount of the first half:
+    int pop1 = BitsSetTable256mul2[mask1];
+    // then load the corresponding mask, what it does is to write
+    // only the first pop1 bytes from the first 8 bytes, and then
+    // it fills in with the bytes from the second 8 bytes + some filling
+    // at the end.
+    __m128i compactmask =
+        vec_vsx_ld(0, reinterpret_cast<const uint8_t *>(pshufb_combine_table + pop1 * 8));
+    __m128i answer = vec_perm(pruned, (__m128i)vec_splats(0), compactmask);
+    vec_vsx_st(answer, 0, reinterpret_cast<__m128i *>(output));
+  }
+
+  template <typename L>
+  simdjson_inline simd8<L>
+  lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4,
+            L replace5, L replace6, L replace7, L replace8, L replace9,
+            L replace10, L replace11, L replace12, L replace13, L replace14,
+            L replace15) const {
+    return lookup_16(simd8<L>::repeat_16(
+        replace0, replace1, replace2, replace3, replace4, replace5, replace6,
+        replace7, replace8, replace9, replace10, replace11, replace12,
+        replace13, replace14, replace15));
+  }
+};
+
+// Signed bytes
+template <> struct simd8<int8_t> : base8_numeric<int8_t> {
+  simdjson_inline simd8() : base8_numeric<int8_t>() {}
+  simdjson_inline simd8(const __m128i _value)
+      : base8_numeric<int8_t>(_value) {}
+  // Splat constructor
+  simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {}
+  // Array constructor
+  simdjson_inline simd8(const int8_t *values) : simd8(load(values)) {}
+  // Member-by-member initialization
+  simdjson_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3,
+                               int8_t v4, int8_t v5, int8_t v6, int8_t v7,
+                               int8_t v8, int8_t v9, int8_t v10, int8_t v11,
+                               int8_t v12, int8_t v13, int8_t v14, int8_t v15)
+      : simd8((__m128i)(__vector signed char){v0, v1, v2, v3, v4, v5, v6, v7,
+                                              v8, v9, v10, v11, v12, v13, v14,
+                                              v15}) {}
+  // Repeat 16 values as many times as necessary (usually for lookup tables)
+  simdjson_inline static simd8<int8_t>
+  repeat_16(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5,
+            int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11,
+            int8_t v12, int8_t v13, int8_t v14, int8_t v15) {
+    return simd8<int8_t>(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
+                         v13, v14, v15);
+  }
+
+  // Order-sensitive comparisons
+  simdjson_inline simd8<int8_t>
+  max_val(const simd8<int8_t> other) const {
+    return (__m128i)vec_max((__vector signed char)this->value,
+                            (__vector signed char)(__m128i)other);
+  }
+  simdjson_inline simd8<int8_t>
+  min_val(const simd8<int8_t> other) const {
+    return (__m128i)vec_min((__vector signed char)this->value,
+                            (__vector signed char)(__m128i)other);
+  }
+  simdjson_inline simd8<bool>
+  operator>(const simd8<int8_t> other) const {
+    return (__m128i)vec_cmpgt((__vector signed char)this->value,
+                              (__vector signed char)(__m128i)other);
+  }
+  simdjson_inline simd8<bool>
+  operator<(const simd8<int8_t> other) const {
+    return (__m128i)vec_cmplt((__vector signed char)this->value,
+                              (__vector signed char)(__m128i)other);
+  }
+};
+
+// Unsigned bytes
+template <> struct simd8<uint8_t> : base8_numeric<uint8_t> {
+  simdjson_inline simd8() : base8_numeric<uint8_t>() {}
+  simdjson_inline simd8(const __m128i _value)
+      : base8_numeric<uint8_t>(_value) {}
+  // Splat constructor
+  simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {}
+  // Array constructor
+  simdjson_inline simd8(const uint8_t *values) : simd8(load(values)) {}
+  // Member-by-member initialization
+  simdjson_inline
+  simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5,
+        uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10,
+        uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15)
+      : simd8((__m128i){v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
+                        v13, v14, v15}) {}
+  // Repeat 16 values as many times as necessary (usually for lookup tables)
+  simdjson_inline static simd8<uint8_t>
+  repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4,
+            uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9,
+            uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14,
+            uint8_t v15) {
+    return simd8<uint8_t>(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
+                          v13, v14, v15);
+  }
+
+  // Saturated math
+  simdjson_inline simd8<uint8_t>
+  saturating_add(const simd8<uint8_t> other) const {
+    return (__m128i)vec_adds(this->value, (__m128i)other);
+  }
+  simdjson_inline simd8<uint8_t>
+  saturating_sub(const simd8<uint8_t> other) const {
+    return (__m128i)vec_subs(this->value, (__m128i)other);
+  }
+
+  // Order-specific operations
+  simdjson_inline simd8<uint8_t>
+  max_val(const simd8<uint8_t> other) const {
+    return (__m128i)vec_max(this->value, (__m128i)other);
+  }
+  simdjson_inline simd8<uint8_t>
+  min_val(const simd8<uint8_t> other) const {
+    return (__m128i)vec_min(this->value, (__m128i)other);
+  }
+  // Same as >, but only guarantees true is nonzero (< guarantees true = -1)
+  simdjson_inline simd8<uint8_t>
+  gt_bits(const simd8<uint8_t> other) const {
+    return this->saturating_sub(other);
+  }
+  // Same as <, but only guarantees true is nonzero (< guarantees true = -1)
+  simdjson_inline simd8<uint8_t>
+  lt_bits(const simd8<uint8_t> other) const {
+    return other.saturating_sub(*this);
+  }
+  simdjson_inline simd8<bool>
+  operator<=(const simd8<uint8_t> other) const {
+    return other.max_val(*this) == other;
+  }
+  simdjson_inline simd8<bool>
+  operator>=(const simd8<uint8_t> other) const {
+    return other.min_val(*this) == other;
+  }
+  simdjson_inline simd8<bool>
+  operator>(const simd8<uint8_t> other) const {
+    return this->gt_bits(other).any_bits_set();
+  }
+  simdjson_inline simd8<bool>
+  operator<(const simd8<uint8_t> other) const {
+    return this->gt_bits(other).any_bits_set();
+  }
+
+  // Bit-specific operations
+  simdjson_inline simd8<bool> bits_not_set() const {
+    return (__m128i)vec_cmpeq(this->value, (__m128i)vec_splats(uint8_t(0)));
+  }
+  simdjson_inline simd8<bool> bits_not_set(simd8<uint8_t> bits) const {
+    return (*this & bits).bits_not_set();
+  }
+  simdjson_inline simd8<bool> any_bits_set() const {
+    return ~this->bits_not_set();
+  }
+  simdjson_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const {
+    return ~this->bits_not_set(bits);
+  }
+  simdjson_inline bool bits_not_set_anywhere() const {
+    return vec_all_eq(this->value, (__m128i)vec_splats(0));
+  }
+  simdjson_inline bool any_bits_set_anywhere() const {
+    return !bits_not_set_anywhere();
+  }
+  simdjson_inline bool bits_not_set_anywhere(simd8<uint8_t> bits) const {
+    return vec_all_eq(vec_and(this->value, (__m128i)bits),
+                      (__m128i)vec_splats(0));
+  }
+  simdjson_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const {
+    return !bits_not_set_anywhere(bits);
+  }
+  template <int N> simdjson_inline simd8<uint8_t> shr() const {
+    return simd8<uint8_t>(
+        (__m128i)vec_sr(this->value, (__m128i)vec_splat_u8(N)));
+  }
+  template <int N> simdjson_inline simd8<uint8_t> shl() const {
+    return simd8<uint8_t>(
+        (__m128i)vec_sl(this->value, (__m128i)vec_splat_u8(N)));
+  }
+};
+
+template <typename T> struct simd8x64 {
+  static constexpr int NUM_CHUNKS = 64 / sizeof(simd8<T>);
+  static_assert(NUM_CHUNKS == 4,
+                "PPC64 kernel should use four registers per 64-byte block.");
+  const simd8<T> chunks[NUM_CHUNKS];
+
+  simd8x64(const simd8x64<T> &o) = delete; // no copy allowed
+  simd8x64<T> &
+  operator=(const simd8<T>& other) = delete; // no assignment allowed
+  simd8x64() = delete;                      // no default constructor allowed
+
+  simdjson_inline simd8x64(const simd8<T> chunk0, const simd8<T> chunk1,
+                                  const simd8<T> chunk2, const simd8<T> chunk3)
+      : chunks{chunk0, chunk1, chunk2, chunk3} {}
+  simdjson_inline simd8x64(const T ptr[64])
+      : chunks{simd8<T>::load(ptr), simd8<T>::load(ptr + 16),
+               simd8<T>::load(ptr + 32), simd8<T>::load(ptr + 48)} {}
+
+  simdjson_inline void store(T ptr[64]) const {
+    this->chunks[0].store(ptr + sizeof(simd8<T>) * 0);
+    this->chunks[1].store(ptr + sizeof(simd8<T>) * 1);
+    this->chunks[2].store(ptr + sizeof(simd8<T>) * 2);
+    this->chunks[3].store(ptr + sizeof(simd8<T>) * 3);
+  }
+
+  simdjson_inline simd8<T> reduce_or() const {
+    return (this->chunks[0] | this->chunks[1]) |
+           (this->chunks[2] | this->chunks[3]);
+  }
+
+  simdjson_inline uint64_t compress(uint64_t mask, T *output) const {
+    this->chunks[0].compress(uint16_t(mask), output);
+    this->chunks[1].compress(uint16_t(mask >> 16),
+                             output + 16 - count_ones(mask & 0xFFFF));
+    this->chunks[2].compress(uint16_t(mask >> 32),
+                             output + 32 - count_ones(mask & 0xFFFFFFFF));
+    this->chunks[3].compress(uint16_t(mask >> 48),
+                             output + 48 - count_ones(mask & 0xFFFFFFFFFFFF));
+    return 64 - count_ones(mask);
+  }
+
+  simdjson_inline uint64_t to_bitmask() const {
+    uint64_t r0 = uint32_t(this->chunks[0].to_bitmask());
+    uint64_t r1 = this->chunks[1].to_bitmask();
+    uint64_t r2 = this->chunks[2].to_bitmask();
+    uint64_t r3 = this->chunks[3].to_bitmask();
+    return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48);
+  }
+
+  simdjson_inline uint64_t eq(const T m) const {
+    const simd8<T> mask = simd8<T>::splat(m);
+    return simd8x64<bool>(this->chunks[0] == mask, this->chunks[1] == mask,
+                          this->chunks[2] == mask, this->chunks[3] == mask)
+        .to_bitmask();
+  }
+
+  simdjson_inline uint64_t eq(const simd8x64<uint8_t> &other) const {
+    return simd8x64<bool>(this->chunks[0] == other.chunks[0],
+                          this->chunks[1] == other.chunks[1],
+                          this->chunks[2] == other.chunks[2],
+                          this->chunks[3] == other.chunks[3])
+        .to_bitmask();
+  }
+
+  simdjson_inline uint64_t lteq(const T m) const {
+    const simd8<T> mask = simd8<T>::splat(m);
+    return simd8x64<bool>(this->chunks[0] <= mask, this->chunks[1] <= mask,
+                          this->chunks[2] <= mask, this->chunks[3] <= mask)
+        .to_bitmask();
+  }
+}; // struct simd8x64<T>
+
+} // namespace simd
+} // unnamed namespace
+} // namespace ppc64
+} // namespace simdjson
+
+#endif // SIMDJSON_PPC64_SIMD_INPUT_H
+/* end file include/simdjson/ppc64/simd.h */
+/* begin file include/simdjson/generic/jsoncharutils.h */
+
+namespace simdjson {
+namespace ppc64 {
+namespace {
+namespace jsoncharutils {
+
+// return non-zero if not a structural or whitespace char
+// zero otherwise
+simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) {
+  return internal::structural_or_whitespace_negated[c];
+}
+
+simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) {
+  return internal::structural_or_whitespace[c];
+}
+
+// returns a value with the high 16 bits set if not valid
+// otherwise returns the conversion of the 4 hex digits at src into the bottom
+// 16 bits of the 32-bit return register
+//
+// see
+// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/
+static inline uint32_t hex_to_u32_nocheck(
+    const uint8_t *src) { // strictly speaking, static inline is a C-ism
+  uint32_t v1 = internal::digit_to_val32[630 + src[0]];
+  uint32_t v2 = internal::digit_to_val32[420 + src[1]];
+  uint32_t v3 = internal::digit_to_val32[210 + src[2]];
+  uint32_t v4 = internal::digit_to_val32[0 + src[3]];
+  return v1 | v2 | v3 | v4;
+}
+
+// given a code point cp, writes to c
+// the utf-8 code, outputting the length in
+// bytes, if the length is zero, the code point
+// is invalid
+//
+// This can possibly be made faster using pdep
+// and clz and table lookups, but JSON documents
+// have few escaped code points, and the following
+// function looks cheap.
+//
+// Note: we assume that surrogates are treated separately
+//
+simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) {
+  if (cp <= 0x7F) {
+    c[0] = uint8_t(cp);
+    return 1; // ascii
+  }
+  if (cp <= 0x7FF) {
+    c[0] = uint8_t((cp >> 6) + 192);
+    c[1] = uint8_t((cp & 63) + 128);
+    return 2; // universal plane
+    //  Surrogates are treated elsewhere...
+    //} //else if (0xd800 <= cp && cp <= 0xdfff) {
+    //  return 0; // surrogates // could put assert here
+  } else if (cp <= 0xFFFF) {
+    c[0] = uint8_t((cp >> 12) + 224);
+    c[1] = uint8_t(((cp >> 6) & 63) + 128);
+    c[2] = uint8_t((cp & 63) + 128);
+    return 3;
+  } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this
+                               // is not needed
+    c[0] = uint8_t((cp >> 18) + 240);
+    c[1] = uint8_t(((cp >> 12) & 63) + 128);
+    c[2] = uint8_t(((cp >> 6) & 63) + 128);
+    c[3] = uint8_t((cp & 63) + 128);
+    return 4;
+  }
+  // will return 0 when the code point was too large.
+  return 0; // bad r
+}
+
+#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm
+// this is a slow emulation routine for 32-bit
+//
+static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) {
+  return x * (uint64_t)y;
+}
+static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) {
+  uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd);
+  uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd);
+  uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32));
+  uint64_t adbc_carry = !!(adbc < ad);
+  uint64_t lo = bd + (adbc << 32);
+  *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) +
+        (adbc_carry << 32) + !!(lo < bd);
+  return lo;
+}
+#endif
+
+using internal::value128;
+
+simdjson_inline value128 full_multiplication(uint64_t value1, uint64_t value2) {
+  value128 answer;
+#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS)
+#ifdef _M_ARM64
+  // ARM64 has native support for 64-bit multiplications, no need to emultate
+  answer.high = __umulh(value1, value2);
+  answer.low = value1 * value2;
+#else
+  answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64
+#endif // _M_ARM64
+#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS)
+  __uint128_t r = (static_cast<__uint128_t>(value1)) * value2;
+  answer.low = uint64_t(r);
+  answer.high = uint64_t(r >> 64);
+#endif
+  return answer;
+}
+
+} // namespace jsoncharutils
+} // unnamed namespace
+} // namespace ppc64
+} // namespace simdjson
+/* end file include/simdjson/generic/jsoncharutils.h */
+/* begin file include/simdjson/generic/atomparsing.h */
+namespace simdjson {
+namespace ppc64 {
+namespace {
+/// @private
+namespace atomparsing {
+
+// The string_to_uint32 is exclusively used to map literal strings to 32-bit values.
+// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot
+// be certain that the character pointer will be properly aligned.
+// You might think that using memcpy makes this function expensive, but you'd be wrong.
+// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false");
+// to the compile-time constant 1936482662.
+simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; }
+
+
+// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive.
+// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about.
+simdjson_warn_unused
+simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) {
+  uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++)
+  static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes");
+  std::memcpy(&srcval, src, sizeof(uint32_t));
+  return srcval ^ string_to_uint32(atom);
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_true_atom(const uint8_t *src) {
+  return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0;
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) {
+  if (len > 4) { return is_valid_true_atom(src); }
+  else if (len == 4) { return !str4ncmp(src, "true"); }
+  else { return false; }
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_false_atom(const uint8_t *src) {
+  return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0;
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) {
+  if (len > 5) { return is_valid_false_atom(src); }
+  else if (len == 5) { return !str4ncmp(src+1, "alse"); }
+  else { return false; }
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_null_atom(const uint8_t *src) {
+  return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0;
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) {
+  if (len > 4) { return is_valid_null_atom(src); }
+  else if (len == 4) { return !str4ncmp(src, "null"); }
+  else { return false; }
+}
+
+} // namespace atomparsing
+} // unnamed namespace
+} // namespace ppc64
+} // namespace simdjson
+/* end file include/simdjson/generic/atomparsing.h */
+/* begin file include/simdjson/ppc64/stringparsing.h */
+#ifndef SIMDJSON_PPC64_STRINGPARSING_H
+#define SIMDJSON_PPC64_STRINGPARSING_H
+
+
+namespace simdjson {
+namespace ppc64 {
+namespace {
+
+using namespace simd;
+
+// Holds backslashes and quotes locations.
+struct backslash_and_quote {
+public:
+  static constexpr uint32_t BYTES_PROCESSED = 32;
+  simdjson_inline static backslash_and_quote
+  copy_and_find(const uint8_t *src, uint8_t *dst);
+
+  simdjson_inline bool has_quote_first() {
+    return ((bs_bits - 1) & quote_bits) != 0;
+  }
+  simdjson_inline bool has_backslash() { return bs_bits != 0; }
+  simdjson_inline int quote_index() {
+    return trailing_zeroes(quote_bits);
+  }
+  simdjson_inline int backslash_index() {
+    return trailing_zeroes(bs_bits);
+  }
+
+  uint32_t bs_bits;
+  uint32_t quote_bits;
+}; // struct backslash_and_quote
+
+simdjson_inline backslash_and_quote
+backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) {
+  // this can read up to 31 bytes beyond the buffer size, but we require
+  // SIMDJSON_PADDING of padding
+  static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1),
+                "backslash and quote finder must process fewer than "
+                "SIMDJSON_PADDING bytes");
+  simd8<uint8_t> v0(src);
+  simd8<uint8_t> v1(src + sizeof(v0));
+  v0.store(dst);
+  v1.store(dst + sizeof(v0));
+
+  // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on
+  // PPC; therefore, we smash them together into a 64-byte mask and get the
+  // bitmask from there.
+  uint64_t bs_and_quote =
+      simd8x64<bool>(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask();
+  return {
+      uint32_t(bs_and_quote),      // bs_bits
+      uint32_t(bs_and_quote >> 32) // quote_bits
+  };
+}
+
+} // unnamed namespace
+} // namespace ppc64
+} // namespace simdjson
+
+#endif // SIMDJSON_PPC64_STRINGPARSING_H
+/* end file include/simdjson/ppc64/stringparsing.h */
+/* begin file include/simdjson/ppc64/numberparsing.h */
+#ifndef SIMDJSON_PPC64_NUMBERPARSING_H
+#define SIMDJSON_PPC64_NUMBERPARSING_H
+
+#if defined(__linux__)
+#include <byteswap.h>
+#elif defined(__FreeBSD__)
+#include <sys/endian.h>
+#endif
+
+namespace simdjson {
+namespace ppc64 {
+namespace {
+
+// we don't have appropriate instructions, so let us use a scalar function
+// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
+static simdjson_inline uint32_t
+parse_eight_digits_unrolled(const uint8_t *chars) {
+  uint64_t val;
+  std::memcpy(&val, chars, sizeof(uint64_t));
+#ifdef __BIG_ENDIAN__
+#if defined(__linux__)
+  val = bswap_64(val);
+#elif defined(__FreeBSD__)
+  val = bswap64(val);
+#endif
+#endif
+  val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8;
+  val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16;
+  return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32);
+}
+
+} // unnamed namespace
+} // namespace ppc64
+} // namespace simdjson
+
+#define SIMDJSON_SWAR_NUMBER_PARSING 1
+
+/* begin file include/simdjson/generic/numberparsing.h */
+#include <limits>
+
+namespace simdjson {
+namespace ppc64 {
+
+namespace ondemand {
+/**
+ * The type of a JSON number
+ */
+enum class number_type {
+    floating_point_number=1, /// a binary64 number
+    signed_integer,          /// a signed integer that fits in a 64-bit word using two's complement
+    unsigned_integer         /// a positive integer larger or equal to 1<<63
+};
+}
+
+namespace {
+/// @private
+namespace numberparsing {
+
+
+
+#ifdef JSON_TEST_NUMBERS
+#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR)
+#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE)))
+#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE)))
+#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE)))
+#else
+#define INVALID_NUMBER(SRC) (NUMBER_ERROR)
+#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE))
+#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE))
+#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE))
+#endif
+
+namespace {
+// Convert a mantissa, an exponent and a sign bit into an ieee64 double.
+// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable).
+// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed.
+simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) {
+    double d;
+    mantissa &= ~(1ULL << 52);
+    mantissa |= real_exponent << 52;
+    mantissa |= ((static_cast<uint64_t>(negative)) << 63);
+    std::memcpy(&d, &mantissa, sizeof(d));
+    return d;
+}
+}
+// Attempts to compute i * 10^(power) exactly; and if "negative" is
+// true, negate the result.
+// This function will only work in some cases, when it does not work, success is
+// set to false. This should work *most of the time* (like 99% of the time).
+// We assume that power is in the [smallest_power,
+// largest_power] interval: the caller is responsible for this check.
+simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) {
+  // we start with a fast path
+  // It was described in
+  // Clinger WD. How to read floating point numbers accurately.
+  // ACM SIGPLAN Notices. 1990
+#ifndef FLT_EVAL_METHOD
+#error "FLT_EVAL_METHOD should be defined, please include cfloat."
+#endif
+#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
+  // We cannot be certain that x/y is rounded to nearest.
+  if (0 <= power && power <= 22 && i <= 9007199254740991) {
+#else
+  if (-22 <= power && power <= 22 && i <= 9007199254740991) {
+#endif
+    // convert the integer into a double. This is lossless since
+    // 0 <= i <= 2^53 - 1.
+    d = double(i);
+    //
+    // The general idea is as follows.
+    // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then
+    // 1) Both s and p can be represented exactly as 64-bit floating-point
+    // values
+    // (binary64).
+    // 2) Because s and p can be represented exactly as floating-point values,
+    // then s * p
+    // and s / p will produce correctly rounded values.
+    //
+    if (power < 0) {
+      d = d / simdjson::internal::power_of_ten[-power];
+    } else {
+      d = d * simdjson::internal::power_of_ten[power];
+    }
+    if (negative) {
+      d = -d;
+    }
+    return true;
+  }
+  // When 22 < power && power <  22 + 16, we could
+  // hope for another, secondary fast path.  It was
+  // described by David M. Gay in  "Correctly rounded
+  // binary-decimal and decimal-binary conversions." (1990)
+  // If you need to compute i * 10^(22 + x) for x < 16,
+  // first compute i * 10^x, if you know that result is exact
+  // (e.g., when i * 10^x < 2^53),
+  // then you can still proceed and do (i * 10^x) * 10^22.
+  // Is this worth your time?
+  // You need  22 < power *and* power <  22 + 16 *and* (i * 10^(x-22) < 2^53)
+  // for this second fast path to work.
+  // If you you have 22 < power *and* power <  22 + 16, and then you
+  // optimistically compute "i * 10^(x-22)", there is still a chance that you
+  // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of
+  // this optimization maybe less common than we would like. Source:
+  // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/
+  // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html
+
+  // The fast path has now failed, so we are failing back on the slower path.
+
+  // In the slow path, we need to adjust i so that it is > 1<<63 which is always
+  // possible, except if i == 0, so we handle i == 0 separately.
+  if(i == 0) {
+    d = negative ? -0.0 : 0.0;
+    return true;
+  }
+
+
+  // The exponent is 1024 + 63 + power
+  //     + floor(log(5**power)/log(2)).
+  // The 1024 comes from the ieee64 standard.
+  // The 63 comes from the fact that we use a 64-bit word.
+  //
+  // Computing floor(log(5**power)/log(2)) could be
+  // slow. Instead we use a fast function.
+  //
+  // For power in (-400,350), we have that
+  // (((152170 + 65536) * power ) >> 16);
+  // is equal to
+  //  floor(log(5**power)/log(2)) + power when power >= 0
+  // and it is equal to
+  //  ceil(log(5**-power)/log(2)) + power when power < 0
+  //
+  // The 65536 is (1<<16) and corresponds to
+  // (65536 * power) >> 16 ---> power
+  //
+  // ((152170 * power ) >> 16) is equal to
+  // floor(log(5**power)/log(2))
+  //
+  // Note that this is not magic: 152170/(1<<16) is
+  // approximatively equal to log(5)/log(2).
+  // The 1<<16 value is a power of two; we could use a
+  // larger power of 2 if we wanted to.
+  //
+  int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63;
+
+
+  // We want the most significant bit of i to be 1. Shift if needed.
+  int lz = leading_zeroes(i);
+  i <<= lz;
+
+
+  // We are going to need to do some 64-bit arithmetic to get a precise product.
+  // We use a table lookup approach.
+  // It is safe because
+  // power >= smallest_power
+  // and power <= largest_power
+  // We recover the mantissa of the power, it has a leading 1. It is always
+  // rounded down.
+  //
+  // We want the most significant 64 bits of the product. We know
+  // this will be non-zero because the most significant bit of i is
+  // 1.
+  const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power);
+  // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.)
+  //
+  // The full_multiplication function computes the 128-bit product of two 64-bit words
+  // with a returned value of type value128 with a "low component" corresponding to the
+  // 64-bit least significant bits of the product and with a "high component" corresponding
+  // to the 64-bit most significant bits of the product.
+  simdjson::internal::value128 firstproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index]);
+  // Both i and power_of_five_128[index] have their most significant bit set to 1 which
+  // implies that the either the most or the second most significant bit of the product
+  // is 1. We pack values in this manner for efficiency reasons: it maximizes the use
+  // we make of the product. It also makes it easy to reason about the product: there
+  // is 0 or 1 leading zero in the product.
+
+  // Unless the least significant 9 bits of the high (64-bit) part of the full
+  // product are all 1s, then we know that the most significant 55 bits are
+  // exact and no further work is needed. Having 55 bits is necessary because
+  // we need 53 bits for the mantissa but we have to have one rounding bit and
+  // we can waste a bit if the most significant bit of the product is zero.
+  if((firstproduct.high & 0x1FF) == 0x1FF) {
+    // We want to compute i * 5^q, but only care about the top 55 bits at most.
+    // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing
+    // the full computation is wasteful. So we do what is called a "truncated
+    // multiplication".
+    // We take the most significant 64-bits, and we put them in
+    // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q
+    // to the desired approximation using one multiplication. Sometimes it does not suffice.
+    // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and
+    // then we get a better approximation to i * 5^q. In very rare cases, even that
+    // will not suffice, though it is seemingly very hard to find such a scenario.
+    //
+    // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat
+    // more complicated.
+    //
+    // There is an extra layer of complexity in that we need more than 55 bits of
+    // accuracy in the round-to-even scenario.
+    //
+    // The full_multiplication function computes the 128-bit product of two 64-bit words
+    // with a returned value of type value128 with a "low component" corresponding to the
+    // 64-bit least significant bits of the product and with a "high component" corresponding
+    // to the 64-bit most significant bits of the product.
+    simdjson::internal::value128 secondproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]);
+    firstproduct.low += secondproduct.high;
+    if(secondproduct.high > firstproduct.low) { firstproduct.high++; }
+    // At this point, we might need to add at most one to firstproduct, but this
+    // can only change the value of firstproduct.high if firstproduct.low is maximal.
+    if(simdjson_unlikely(firstproduct.low  == 0xFFFFFFFFFFFFFFFF)) {
+      // This is very unlikely, but if so, we need to do much more work!
+      return false;
+    }
+  }
+  uint64_t lower = firstproduct.low;
+  uint64_t upper = firstproduct.high;
+  // The final mantissa should be 53 bits with a leading 1.
+  // We shift it so that it occupies 54 bits with a leading 1.
+  ///////
+  uint64_t upperbit = upper >> 63;
+  uint64_t mantissa = upper >> (upperbit + 9);
+  lz += int(1 ^ upperbit);
+
+  // Here we have mantissa < (1<<54).
+  int64_t real_exponent = exponent - lz;
+  if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal?
+    // Here have that real_exponent <= 0 so -real_exponent >= 0
+    if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure.
+      d = negative ? -0.0 : 0.0;
+      return true;
+    }
+    // next line is safe because -real_exponent + 1 < 0
+    mantissa >>= -real_exponent + 1;
+    // Thankfully, we can't have both "round-to-even" and subnormals because
+    // "round-to-even" only occurs for powers close to 0.
+    mantissa += (mantissa & 1); // round up
+    mantissa >>= 1;
+    // There is a weird scenario where we don't have a subnormal but just.
+    // Suppose we start with 2.2250738585072013e-308, we end up
+    // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal
+    // whereas 0x40000000000000 x 2^-1023-53  is normal. Now, we need to round
+    // up 0x3fffffffffffff x 2^-1023-53  and once we do, we are no longer
+    // subnormal, but we can only know this after rounding.
+    // So we only declare a subnormal if we are smaller than the threshold.
+    real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1;
+    d = to_double(mantissa, real_exponent, negative);
+    return true;
+  }
+  // We have to round to even. The "to even" part
+  // is only a problem when we are right in between two floats
+  // which we guard against.
+  // If we have lots of trailing zeros, we may fall right between two
+  // floating-point values.
+  //
+  // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54]
+  // times a power of two. That is, it is right between a number with binary significand
+  // m and another number with binary significand m+1; and it must be the case
+  // that it cannot be represented by a float itself.
+  //
+  // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p.
+  // Recall that 10^q = 5^q * 2^q.
+  // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that
+  //  5^23 <=  2^54 and it is the last power of five to qualify, so q <= 23.
+  // When q<0, we have  w  >=  (2m+1) x 5^{-q}.  We must have that w<2^{64} so
+  // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have
+  // 2^{53} x 5^{-q} < 2^{64}.
+  // Hence we have 5^{-q} < 2^{11}$ or q>= -4.
+  //
+  // We require lower <= 1 and not lower == 0 because we could not prove that
+  // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test.
+  if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) {
+    if((mantissa  << (upperbit + 64 - 53 - 2)) ==  upper) {
+      mantissa &= ~1;             // flip it so that we do not round up
+    }
+  }
+
+  mantissa += mantissa & 1;
+  mantissa >>= 1;
+
+  // Here we have mantissa < (1<<53), unless there was an overflow
+  if (mantissa >= (1ULL << 53)) {
+    //////////
+    // This will happen when parsing values such as 7.2057594037927933e+16
+    ////////
+    mantissa = (1ULL << 52);
+    real_exponent++;
+  }
+  mantissa &= ~(1ULL << 52);
+  // we have to check that real_exponent is in range, otherwise we bail out
+  if (simdjson_unlikely(real_exponent > 2046)) {
+    // We have an infinite value!!! We could actually throw an error here if we could.
+    return false;
+  }
+  d = to_double(mantissa, real_exponent, negative);
+  return true;
+}
+
+// We call a fallback floating-point parser that might be slow. Note
+// it will accept JSON numbers, but the JSON spec. is more restrictive so
+// before you call parse_float_fallback, you need to have validated the input
+// string with the JSON grammar.
+// It will return an error (false) if the parsed number is infinite.
+// The string parsing itself always succeeds. We know that there is at least
+// one digit.
+static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) {
+  *outDouble = simdjson::internal::from_chars(reinterpret_cast<const char *>(ptr));
+  // We do not accept infinite values.
+
+  // Detecting finite values in a portable manner is ridiculously hard, ideally
+  // we would want to do:
+  // return !std::isfinite(*outDouble);
+  // but that mysteriously fails under legacy/old libc++ libraries, see
+  // https://github.com/simdjson/simdjson/issues/1286
+  //
+  // Therefore, fall back to this solution (the extra parens are there
+  // to handle that max may be a macro on windows).
+  return !(*outDouble > (std::numeric_limits<double>::max)() || *outDouble < std::numeric_limits<double>::lowest());
+}
+static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) {
+  *outDouble = simdjson::internal::from_chars(reinterpret_cast<const char *>(ptr), reinterpret_cast<const char *>(end_ptr));
+  // We do not accept infinite values.
+
+  // Detecting finite values in a portable manner is ridiculously hard, ideally
+  // we would want to do:
+  // return !std::isfinite(*outDouble);
+  // but that mysteriously fails under legacy/old libc++ libraries, see
+  // https://github.com/simdjson/simdjson/issues/1286
+  //
+  // Therefore, fall back to this solution (the extra parens are there
+  // to handle that max may be a macro on windows).
+  return !(*outDouble > (std::numeric_limits<double>::max)() || *outDouble < std::numeric_limits<double>::lowest());
+}
+
+// check quickly whether the next 8 chars are made of digits
+// at a glance, it looks better than Mula's
+// http://0x80.pl/articles/swar-digits-validate.html
+simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) {
+  uint64_t val;
+  // this can read up to 7 bytes beyond the buffer size, but we require
+  // SIMDJSON_PADDING of padding
+  static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7");
+  std::memcpy(&val, chars, 8);
+  // a branchy method might be faster:
+  // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030)
+  //  && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) ==
+  //  0x3030303030303030);
+  return (((val & 0xF0F0F0F0F0F0F0F0) |
+           (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) ==
+          0x3333333333333333);
+}
+
+template<typename W>
+error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) {
+  double d;
+  if (parse_float_fallback(src, &d)) {
+    writer.append_double(d);
+    return SUCCESS;
+  }
+  return INVALID_NUMBER(src);
+}
+
+template<typename I>
+SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later
+simdjson_inline bool parse_digit(const uint8_t c, I &i) {
+  const uint8_t digit = static_cast<uint8_t>(c - '0');
+  if (digit > 9) {
+    return false;
+  }
+  // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication
+  i = 10 * i + digit; // might overflow, we will handle the overflow later
+  return true;
+}
+
+simdjson_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) {
+  // we continue with the fiction that we have an integer. If the
+  // floating point number is representable as x * 10^z for some integer
+  // z that fits in 53 bits, then we will be able to convert back the
+  // the integer into a float in a lossless manner.
+  const uint8_t *const first_after_period = p;
+
+#ifdef SIMDJSON_SWAR_NUMBER_PARSING
+#if SIMDJSON_SWAR_NUMBER_PARSING
+  // this helps if we have lots of decimals!
+  // this turns out to be frequent enough.
+  if (is_made_of_eight_digits_fast(p)) {
+    i = i * 100000000 + parse_eight_digits_unrolled(p);
+    p += 8;
+  }
+#endif // SIMDJSON_SWAR_NUMBER_PARSING
+#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING
+  // Unrolling the first digit makes a small difference on some implementations (e.g. westmere)
+  if (parse_digit(*p, i)) { ++p; }
+  while (parse_digit(*p, i)) { p++; }
+  exponent = first_after_period - p;
+  // Decimal without digits (123.) is illegal
+  if (exponent == 0) {
+    return INVALID_NUMBER(src);
+  }
+  return SUCCESS;
+}
+
+simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) {
+  // Exp Sign: -123.456e[-]78
+  bool neg_exp = ('-' == *p);
+  if (neg_exp || '+' == *p) { p++; } // Skip + as well
+
+  // Exponent: -123.456e-[78]
+  auto start_exp = p;
+  int64_t exp_number = 0;
+  while (parse_digit(*p, exp_number)) { ++p; }
+  // It is possible for parse_digit to overflow.
+  // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN.
+  // Thus we *must* check for possible overflow before we negate exp_number.
+
+  // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into
+  // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may
+  // not oblige and may, in fact, generate two distinct paths in any case. It might be
+  // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off
+  // instructions for a simdjson_likely branch, an unconclusive gain.
+
+  // If there were no digits, it's an error.
+  if (simdjson_unlikely(p == start_exp)) {
+    return INVALID_NUMBER(src);
+  }
+  // We have a valid positive exponent in exp_number at this point, except that
+  // it may have overflowed.
+
+  // If there were more than 18 digits, we may have overflowed the integer. We have to do
+  // something!!!!
+  if (simdjson_unlikely(p > start_exp+18)) {
+    // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow
+    while (*start_exp == '0') { start_exp++; }
+    // 19 digits could overflow int64_t and is kind of absurd anyway. We don't
+    // support exponents smaller than -999,999,999,999,999,999 and bigger
+    // than 999,999,999,999,999,999.
+    // We can truncate.
+    // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before
+    // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could
+    // truncate at 324.
+    // Note that there is no reason to fail per se at this point in time.
+    // E.g., 0e999999999999999999999 is a fine number.
+    if (p > start_exp+18) { exp_number = 999999999999999999; }
+  }
+  // At this point, we know that exp_number is a sane, positive, signed integer.
+  // It is <= 999,999,999,999,999,999. As long as 'exponent' is in
+  // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent'
+  // is bounded in magnitude by the size of the JSON input, we are fine in this universe.
+  // To sum it up: the next line should never overflow.
+  exponent += (neg_exp ? -exp_number : exp_number);
+  return SUCCESS;
+}
+
+simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) {
+  // It is possible that the integer had an overflow.
+  // We have to handle the case where we have 0.0000somenumber.
+  const uint8_t *start = start_digits;
+  while ((*start == '0') || (*start == '.')) { ++start; }
+  // we over-decrement by one when there is a '.'
+  return digit_count - size_t(start - start_digits);
+}
+
+template<typename W>
+simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) {
+  // If we frequently had to deal with long strings of digits,
+  // we could extend our code by using a 128-bit integer instead
+  // of a 64-bit integer. However, this is uncommon in practice.
+  //
+  // 9999999999999999999 < 2**64 so we can accommodate 19 digits.
+  // If we have a decimal separator, then digit_count - 1 is the number of digits, but we
+  // may not have a decimal separator!
+  if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) {
+    // Ok, chances are good that we had an overflow!
+    // this is almost never going to get called!!!
+    // we start anew, going slowly!!!
+    // This will happen in the following examples:
+    // 10000000000000000000000000000000000000000000e+308
+    // 3.1415926535897932384626433832795028841971693993751
+    //
+    // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens
+    // because slow_float_parsing is a non-inlined function. If we passed our writer reference to
+    // it, it would force it to be stored in memory, preventing the compiler from picking it apart
+    // and putting into registers. i.e. if we pass it as reference, it gets slow.
+    // This is what forces the skip_double, as well.
+    error_code error = slow_float_parsing(src, writer);
+    writer.skip_double();
+    return error;
+  }
+  // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other
+  // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331
+  // To future reader: we'd love if someone found a better way, or at least could explain this result!
+  if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) {
+    //
+    // Important: smallest_power is such that it leads to a zero value.
+    // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero
+    // so something x 10^-343 goes to zero, but not so with  something x 10^-342.
+    static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough");
+    //
+    if((exponent < simdjson::internal::smallest_power) || (i == 0)) {
+      // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero
+      WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer);
+      return SUCCESS;
+    } else { // (exponent > largest_power) and (i != 0)
+      // We have, for sure, an infinite value and simdjson refuses to parse infinite values.
+      return INVALID_NUMBER(src);
+    }
+  }
+  double d;
+  if (!compute_float_64(exponent, i, negative, d)) {
+    // we are almost never going to get here.
+    if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); }
+  }
+  WRITE_DOUBLE(d, src, writer);
+  return SUCCESS;
+}
+
+// for performance analysis, it is sometimes  useful to skip parsing
+#ifdef SIMDJSON_SKIPNUMBERPARSING
+
+template<typename W>
+simdjson_inline error_code parse_number(const uint8_t *const, W &writer) {
+  writer.append_s64(0);        // always write zero
+  return SUCCESS;              // always succeeds
+}
+
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer_in_string(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<double> parse_double_in_string(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept  { return false; }
+simdjson_unused simdjson_inline simdjson_result<bool> is_integer(const uint8_t * src) noexcept  { return false; }
+simdjson_unused simdjson_inline simdjson_result<ondemand::number_type> get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; }
+#else
+
+// parse the number at src
+// define JSON_TEST_NUMBERS for unit testing
+//
+// It is assumed that the number is followed by a structural ({,},],[) character
+// or a white space character. If that is not the case (e.g., when the JSON
+// document is made of a single number), then it is necessary to copy the
+// content and append a space before calling this function.
+//
+// Our objective is accurate parsing (ULP of 0) at high speed.
+template<typename W>
+simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) {
+
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  const uint8_t *p = src + uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); }
+
+  //
+  // Handle floats if there is a . or e (or both)
+  //
+  int64_t exponent = 0;
+  bool is_float = false;
+  if ('.' == *p) {
+    is_float = true;
+    ++p;
+    SIMDJSON_TRY( parse_decimal(src, p, i, exponent) );
+    digit_count = int(p - start_digits); // used later to guard against overflows
+  }
+  if (('e' == *p) || ('E' == *p)) {
+    is_float = true;
+    ++p;
+    SIMDJSON_TRY( parse_exponent(src, p, exponent) );
+  }
+  if (is_float) {
+    const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p);
+    SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) );
+    if (dirty_end) { return INVALID_NUMBER(src); }
+    return SUCCESS;
+  }
+
+  // The longest negative 64-bit number is 19 digits.
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  size_t longest_digit_count = negative ? 19 : 20;
+  if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); }
+  if (digit_count == longest_digit_count) {
+    if (negative) {
+      // Anything negative above INT64_MAX+1 is invalid
+      if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src);  }
+      WRITE_INTEGER(~i+1, src, writer);
+      if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); }
+      return SUCCESS;
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    }  else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); }
+  }
+
+  // Write unsigned if it doesn't fit in a signed integer.
+  if (i > uint64_t(INT64_MAX)) {
+    WRITE_UNSIGNED(i, src, writer);
+  } else {
+    WRITE_INTEGER(negative ? (~i+1) : i, src, writer);
+  }
+  if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); }
+  return SUCCESS;
+}
+
+// Inlineable functions
+namespace {
+
+// This table can be used to characterize the final character of an integer
+// string. For JSON structural character and allowable white space characters,
+// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise
+// we return NUMBER_ERROR.
+// Optimization note: we could easily reduce the size of the table by half (to 128)
+// at the cost of an extra branch.
+// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits):
+static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast");
+static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast");
+static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast");
+
+const uint8_t integer_string_finisher[256] = {
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, SUCCESS,
+    SUCCESS,      NUMBER_ERROR,   NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   SUCCESS,      NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, SUCCESS,
+    NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, SUCCESS,        NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    SUCCESS,      NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR};
+
+// Parse any number from 0 to 18,446,744,073,709,551,615
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src) noexcept {
+  const uint8_t *p = src;
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > 20))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+
+  if (digit_count == 20) {
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
+  }
+
+  return i;
+}
+
+
+// Parse any number from 0 to 18,446,744,073,709,551,615
+// Never read at src_end or beyond
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept {
+  const uint8_t *p = src;
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while ((p != src_end) && parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > 20))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+
+  if (digit_count == 20) {
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
+  }
+
+  return i;
+}
+
+// Parse any number from 0 to 18,446,744,073,709,551,615
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned_in_string(const uint8_t * const src) noexcept {
+  const uint8_t *p = src + 1;
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > 20))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if (*p != '"') { return NUMBER_ERROR; }
+
+  if (digit_count == 20) {
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    // Note: we use src[1] and not src[0] because src[0] is the quote character in this
+    // instance.
+    if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
+  }
+
+  return i;
+}
+
+// Parse any number from  -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t *src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  const uint8_t *p = src + uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // We go from
+  // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+  // so we can never represent numbers that have more than 19 digits.
+  size_t longest_digit_count = 19;
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > longest_digit_count))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+  // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
+  // Performance note: This check is only needed when digit_count == longest_digit_count but it is
+  // so cheap that we might as well always make it.
+  if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
+  return negative ? (~i+1) : i;
+}
+
+// Parse any number from  -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+// Never read at src_end or beyond
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept {
+  //
+  // Check for minus sign
+  //
+  if(src == src_end) { return NUMBER_ERROR; }
+  bool negative = (*src == '-');
+  const uint8_t *p = src + uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while ((p != src_end) && parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // We go from
+  // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+  // so we can never represent numbers that have more than 19 digits.
+  size_t longest_digit_count = 19;
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > longest_digit_count))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+  // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
+  // Performance note: This check is only needed when digit_count == longest_digit_count but it is
+  // so cheap that we might as well always make it.
+  if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
+  return negative ? (~i+1) : i;
+}
+
+// Parse any number from  -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer_in_string(const uint8_t *src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*(src + 1) == '-');
+  src += uint8_t(negative) + 1;
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = src;
+  uint64_t i = 0;
+  while (parse_digit(*src, i)) { src++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(src - start_digits);
+  // We go from
+  // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+  // so we can never represent numbers that have more than 19 digits.
+  size_t longest_digit_count = 19;
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > longest_digit_count))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*src)) {
+  //  return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if(*src != '"') { return NUMBER_ERROR; }
+  // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
+  // Performance note: This check is only needed when digit_count == longest_digit_count but it is
+  // so cheap that we might as well always make it.
+  if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
+  return negative ? (~i+1) : i;
+}
+
+simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  uint64_t i = 0;
+  const uint8_t *p = src;
+  p += parse_digit(*p, i);
+  bool leading_zero = (i == 0);
+  while (parse_digit(*p, i)) { p++; }
+  // no integer digits, or 0123 (zero must be solo)
+  if ( p == src ) { return INCORRECT_TYPE; }
+  if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; }
+
+  //
+  // Parse the decimal part.
+  //
+  int64_t exponent = 0;
+  bool overflow;
+  if (simdjson_likely(*p == '.')) {
+    p++;
+    const uint8_t *start_decimal_digits = p;
+    if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits
+    p++;
+    while (parse_digit(*p, i)) { p++; }
+    exponent = -(p - start_decimal_digits);
+
+    // Overflow check. More than 19 digits (minus the decimal) may be overflow.
+    overflow = p-src-1 > 19;
+    if (simdjson_unlikely(overflow && leading_zero)) {
+      // Skip leading 0.00000 and see if it still overflows
+      const uint8_t *start_digits = src + 2;
+      while (*start_digits == '0') { start_digits++; }
+      overflow = start_digits-src > 19;
+    }
+  } else {
+    overflow = p-src > 19;
+  }
+
+  //
+  // Parse the exponent
+  //
+  if (*p == 'e' || *p == 'E') {
+    p++;
+    bool exp_neg = *p == '-';
+    p += exp_neg || *p == '+';
+
+    uint64_t exp = 0;
+    const uint8_t *start_exp_digits = p;
+    while (parse_digit(*p, exp)) { p++; }
+    // no exp digits, or 20+ exp digits
+    if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }
+
+    exponent += exp_neg ? 0-exp : exp;
+  }
+
+  if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; }
+
+  overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power;
+
+  //
+  // Assemble (or slow-parse) the float
+  //
+  double d;
+  if (simdjson_likely(!overflow)) {
+    if (compute_float_64(exponent, i, negative, d)) { return d; }
+  }
+  if (!parse_float_fallback(src - uint8_t(negative), &d)) {
+    return NUMBER_ERROR;
+  }
+  return d;
+}
+
+simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept {
+  return (*src == '-');
+}
+
+simdjson_unused simdjson_inline simdjson_result<bool> is_integer(const uint8_t * src) noexcept {
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+  const uint8_t *p = src;
+  while(static_cast<uint8_t>(*p - '0') <= 9) { p++; }
+  if ( p == src ) { return NUMBER_ERROR; }
+  if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; }
+  return false;
+}
+
+simdjson_unused simdjson_inline simdjson_result<ondemand::number_type> get_number_type(const uint8_t * src) noexcept {
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+  const uint8_t *p = src;
+  while(static_cast<uint8_t>(*p - '0') <= 9) { p++; }
+  if ( p == src ) { return NUMBER_ERROR; }
+  if (jsoncharutils::is_structural_or_whitespace(*p)) {
+    // We have an integer.
+    // If the number is negative and valid, it must be a signed integer.
+    if(negative) { return ondemand::number_type::signed_integer; }
+    // We want values larger or equal to 9223372036854775808 to be unsigned
+    // integers, and the other values to be signed integers.
+    int digit_count = int(p - src);
+    if(digit_count >= 19) {
+      const uint8_t * smaller_big_integer = reinterpret_cast<const uint8_t *>("9223372036854775808");
+      if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) {
+        return ondemand::number_type::unsigned_integer;
+      }
+    }
+    return ondemand::number_type::signed_integer;
+  }
+  // Hopefully, we have 'e' or 'E' or '.'.
+  return ondemand::number_type::floating_point_number;
+}
+
+// Never read at src_end or beyond
+simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept {
+  if(src == src_end) { return NUMBER_ERROR; }
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  uint64_t i = 0;
+  const uint8_t *p = src;
+  if(p == src_end) { return NUMBER_ERROR; }
+  p += parse_digit(*p, i);
+  bool leading_zero = (i == 0);
+  while ((p != src_end) && parse_digit(*p, i)) { p++; }
+  // no integer digits, or 0123 (zero must be solo)
+  if ( p == src ) { return INCORRECT_TYPE; }
+  if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; }
+
+  //
+  // Parse the decimal part.
+  //
+  int64_t exponent = 0;
+  bool overflow;
+  if (simdjson_likely((p != src_end) && (*p == '.'))) {
+    p++;
+    const uint8_t *start_decimal_digits = p;
+    if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits
+    p++;
+    while ((p != src_end) && parse_digit(*p, i)) { p++; }
+    exponent = -(p - start_decimal_digits);
+
+    // Overflow check. More than 19 digits (minus the decimal) may be overflow.
+    overflow = p-src-1 > 19;
+    if (simdjson_unlikely(overflow && leading_zero)) {
+      // Skip leading 0.00000 and see if it still overflows
+      const uint8_t *start_digits = src + 2;
+      while (*start_digits == '0') { start_digits++; }
+      overflow = start_digits-src > 19;
+    }
+  } else {
+    overflow = p-src > 19;
+  }
+
+  //
+  // Parse the exponent
+  //
+  if ((p != src_end) && (*p == 'e' || *p == 'E')) {
+    p++;
+    if(p == src_end) { return NUMBER_ERROR; }
+    bool exp_neg = *p == '-';
+    p += exp_neg || *p == '+';
+
+    uint64_t exp = 0;
+    const uint8_t *start_exp_digits = p;
+    while ((p != src_end) && parse_digit(*p, exp)) { p++; }
+    // no exp digits, or 20+ exp digits
+    if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }
+
+    exponent += exp_neg ? 0-exp : exp;
+  }
+
+  if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; }
+
+  overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power;
+
+  //
+  // Assemble (or slow-parse) the float
+  //
+  double d;
+  if (simdjson_likely(!overflow)) {
+    if (compute_float_64(exponent, i, negative, d)) { return d; }
+  }
+  if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) {
+    return NUMBER_ERROR;
+  }
+  return d;
+}
+
+simdjson_unused simdjson_inline simdjson_result<double> parse_double_in_string(const uint8_t * src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*(src + 1) == '-');
+  src += uint8_t(negative) + 1;
+
+  //
+  // Parse the integer part.
+  //
+  uint64_t i = 0;
+  const uint8_t *p = src;
+  p += parse_digit(*p, i);
+  bool leading_zero = (i == 0);
+  while (parse_digit(*p, i)) { p++; }
+  // no integer digits, or 0123 (zero must be solo)
+  if ( p == src ) { return INCORRECT_TYPE; }
+  if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; }
+
+  //
+  // Parse the decimal part.
+  //
+  int64_t exponent = 0;
+  bool overflow;
+  if (simdjson_likely(*p == '.')) {
+    p++;
+    const uint8_t *start_decimal_digits = p;
+    if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits
+    p++;
+    while (parse_digit(*p, i)) { p++; }
+    exponent = -(p - start_decimal_digits);
+
+    // Overflow check. More than 19 digits (minus the decimal) may be overflow.
+    overflow = p-src-1 > 19;
+    if (simdjson_unlikely(overflow && leading_zero)) {
+      // Skip leading 0.00000 and see if it still overflows
+      const uint8_t *start_digits = src + 2;
+      while (*start_digits == '0') { start_digits++; }
+      overflow = start_digits-src > 19;
+    }
+  } else {
+    overflow = p-src > 19;
+  }
+
+  //
+  // Parse the exponent
+  //
+  if (*p == 'e' || *p == 'E') {
+    p++;
+    bool exp_neg = *p == '-';
+    p += exp_neg || *p == '+';
+
+    uint64_t exp = 0;
+    const uint8_t *start_exp_digits = p;
+    while (parse_digit(*p, exp)) { p++; }
+    // no exp digits, or 20+ exp digits
+    if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }
+
+    exponent += exp_neg ? 0-exp : exp;
+  }
+
+  if (*p != '"') { return NUMBER_ERROR; }
+
+  overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power;
+
+  //
+  // Assemble (or slow-parse) the float
+  //
+  double d;
+  if (simdjson_likely(!overflow)) {
+    if (compute_float_64(exponent, i, negative, d)) { return d; }
+  }
+  if (!parse_float_fallback(src - uint8_t(negative), &d)) {
+    return NUMBER_ERROR;
+  }
+  return d;
+}
+} //namespace {}
+#endif // SIMDJSON_SKIPNUMBERPARSING
+
+} // namespace numberparsing
+} // unnamed namespace
+} // namespace ppc64
+} // namespace simdjson
+/* end file include/simdjson/generic/numberparsing.h */
+
+#endif // SIMDJSON_PPC64_NUMBERPARSING_H
+/* end file include/simdjson/ppc64/numberparsing.h */
+/* begin file include/simdjson/ppc64/end.h */
+/* end file include/simdjson/ppc64/end.h */
+
+#endif // SIMDJSON_IMPLEMENTATION_PPC64
+
+#endif // SIMDJSON_PPC64_H
+/* end file include/simdjson/ppc64.h */
+/* begin file include/simdjson/westmere.h */
+#ifndef SIMDJSON_WESTMERE_H
+#define SIMDJSON_WESTMERE_H
+
+
+#if SIMDJSON_IMPLEMENTATION_WESTMERE
+
+#if SIMDJSON_CAN_ALWAYS_RUN_WESTMERE
+#define SIMDJSON_TARGET_WESTMERE
+#define SIMDJSON_UNTARGET_WESTMERE
+#else
+#define SIMDJSON_TARGET_WESTMERE SIMDJSON_TARGET_REGION("sse4.2,pclmul")
+#define SIMDJSON_UNTARGET_WESTMERE SIMDJSON_UNTARGET_REGION
+#endif
+
+namespace simdjson {
+/**
+ * Implementation for Westmere (Intel SSE4.2).
+ */
+namespace westmere {
+} // namespace westmere
+} // namespace simdjson
+
+//
+// These two need to be included outside SIMDJSON_TARGET_WESTMERE
+//
+/* begin file include/simdjson/westmere/implementation.h */
+#ifndef SIMDJSON_WESTMERE_IMPLEMENTATION_H
+#define SIMDJSON_WESTMERE_IMPLEMENTATION_H
+
+
+// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE
+namespace simdjson {
+namespace westmere {
+
+namespace {
+using namespace simdjson;
+using namespace simdjson::dom;
+}
+
+class implementation final : public simdjson::implementation {
+public:
+  simdjson_inline implementation() : simdjson::implementation("westmere", "Intel/AMD SSE4.2", internal::instruction_set::SSE42 | internal::instruction_set::PCLMULQDQ) {}
+  simdjson_warn_unused error_code create_dom_parser_implementation(
+    size_t capacity,
+    size_t max_length,
+    std::unique_ptr<internal::dom_parser_implementation>& dst
+  ) const noexcept final;
+  simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final;
+  simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final;
+};
+
+} // namespace westmere
+} // namespace simdjson
+
+#endif // SIMDJSON_WESTMERE_IMPLEMENTATION_H
+/* end file include/simdjson/westmere/implementation.h */
+/* begin file include/simdjson/westmere/intrinsics.h */
+#ifndef SIMDJSON_WESTMERE_INTRINSICS_H
+#define SIMDJSON_WESTMERE_INTRINSICS_H
+
+#ifdef SIMDJSON_VISUAL_STUDIO
+// under clang within visual studio, this will include <x86intrin.h>
+#include <intrin.h> // visual studio or clang
+#else
+#include <x86intrin.h> // elsewhere
+#endif // SIMDJSON_VISUAL_STUDIO
+
+
+#ifdef SIMDJSON_CLANG_VISUAL_STUDIO
+/**
+ * You are not supposed, normally, to include these
+ * headers directly. Instead you should either include intrin.h
+ * or x86intrin.h. However, when compiling with clang
+ * under Windows (i.e., when _MSC_VER is set), these headers
+ * only get included *if* the corresponding features are detected
+ * from macros:
+ */
+#include <smmintrin.h>  // for _mm_alignr_epi8
+#include <wmmintrin.h>  // for  _mm_clmulepi64_si128
+#endif
+
+static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for westmere");
+
+#endif // SIMDJSON_WESTMERE_INTRINSICS_H
+/* end file include/simdjson/westmere/intrinsics.h */
+
+//
+// The rest need to be inside the region
+//
+/* begin file include/simdjson/westmere/begin.h */
+// redefining SIMDJSON_IMPLEMENTATION to "westmere"
+// #define SIMDJSON_IMPLEMENTATION westmere
+SIMDJSON_TARGET_WESTMERE
+/* end file include/simdjson/westmere/begin.h */
+
+// Declarations
+/* begin file include/simdjson/generic/dom_parser_implementation.h */
+
+namespace simdjson {
+namespace westmere {
+
+// expectation: sizeof(open_container) = 64/8.
+struct open_container {
+  uint32_t tape_index; // where, on the tape, does the scope ([,{) begins
+  uint32_t count; // how many elements in the scope
+}; // struct open_container
+
+static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits");
+
+class dom_parser_implementation final : public internal::dom_parser_implementation {
+public:
+  /** Tape location of each open { or [ */
+  std::unique_ptr<open_container[]> open_containers{};
+  /** Whether each open container is a [ or { */
+  std::unique_ptr<bool[]> is_array{};
+  /** Buffer passed to stage 1 */
+  const uint8_t *buf{};
+  /** Length passed to stage 1 */
+  size_t len{0};
+  /** Document passed to stage 2 */
+  dom::document *doc{};
+
+  inline dom_parser_implementation() noexcept;
+  inline dom_parser_implementation(dom_parser_implementation &&other) noexcept;
+  inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept;
+  dom_parser_implementation(const dom_parser_implementation &) = delete;
+  dom_parser_implementation &operator=(const dom_parser_implementation &) = delete;
+
+  simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final;
+  simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final;
+  simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final;
+  simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final;
+  simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst) const noexcept final;
+  inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final;
+  inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final;
+private:
+  simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity);
+
+};
+
+} // namespace westmere
+} // namespace simdjson
+
+namespace simdjson {
+namespace westmere {
+
+inline dom_parser_implementation::dom_parser_implementation() noexcept = default;
+inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default;
+inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default;
+
+// Leaving these here so they can be inlined if so desired
+inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept {
+  if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; }
+  // Stage 1 index output
+  size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7;
+  structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] );
+  if (!structural_indexes) { _capacity = 0; return MEMALLOC; }
+  structural_indexes[0] = 0;
+  n_structural_indexes = 0;
+
+  _capacity = capacity;
+  return SUCCESS;
+}
+
+inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept {
+  // Stage 2 stacks
+  open_containers.reset(new (std::nothrow) open_container[max_depth]);
+  is_array.reset(new (std::nothrow) bool[max_depth]);
+  if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; }
+
+  _max_depth = max_depth;
+  return SUCCESS;
+}
+
+} // namespace westmere
+} // namespace simdjson
+/* end file include/simdjson/generic/dom_parser_implementation.h */
+/* begin file include/simdjson/westmere/bitmanipulation.h */
+#ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H
+#define SIMDJSON_WESTMERE_BITMANIPULATION_H
+
+namespace simdjson {
+namespace westmere {
+namespace {
+
+// We sometimes call trailing_zero on inputs that are zero,
+// but the algorithms do not end up using the returned value.
+// Sadly, sanitizers are not smart enough to figure it out.
+SIMDJSON_NO_SANITIZE_UNDEFINED
+simdjson_inline int trailing_zeroes(uint64_t input_num) {
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+  unsigned long ret;
+  // Search the mask data from least significant bit (LSB)
+  // to the most significant bit (MSB) for a set bit (1).
+  _BitScanForward64(&ret, input_num);
+  return (int)ret;
+#else // SIMDJSON_REGULAR_VISUAL_STUDIO
+  return __builtin_ctzll(input_num);
+#endif // SIMDJSON_REGULAR_VISUAL_STUDIO
+}
+
+/* result might be undefined when input_num is zero */
+simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) {
+  return input_num & (input_num-1);
+}
+
+/* result might be undefined when input_num is zero */
+simdjson_inline int leading_zeroes(uint64_t input_num) {
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+  unsigned long leading_zero = 0;
+  // Search the mask data from most significant bit (MSB)
+  // to least significant bit (LSB) for a set bit (1).
+  if (_BitScanReverse64(&leading_zero, input_num))
+    return (int)(63 - leading_zero);
+  else
+    return 64;
+#else
+  return __builtin_clzll(input_num);
+#endif// SIMDJSON_REGULAR_VISUAL_STUDIO
+}
+
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+simdjson_inline unsigned __int64 count_ones(uint64_t input_num) {
+  // note: we do not support legacy 32-bit Windows
+  return __popcnt64(input_num);// Visual Studio wants two underscores
+}
+#else
+simdjson_inline long long int count_ones(uint64_t input_num) {
+  return _popcnt64(input_num);
+}
+#endif
+
+simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2,
+                                uint64_t *result) {
+#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
+  return _addcarry_u64(0, value1, value2,
+                       reinterpret_cast<unsigned __int64 *>(result));
+#else
+  return __builtin_uaddll_overflow(value1, value2,
+                                   reinterpret_cast<unsigned long long *>(result));
+#endif
+}
+
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdjson
+
+#endif // SIMDJSON_WESTMERE_BITMANIPULATION_H
+/* end file include/simdjson/westmere/bitmanipulation.h */
+/* begin file include/simdjson/westmere/bitmask.h */
+#ifndef SIMDJSON_WESTMERE_BITMASK_H
+#define SIMDJSON_WESTMERE_BITMASK_H
+
+namespace simdjson {
+namespace westmere {
+namespace {
+
+//
+// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered.
+//
+// For example, prefix_xor(00100100) == 00011100
+//
+simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) {
+  // There should be no such thing with a processing supporting avx2
+  // but not clmul.
+  __m128i all_ones = _mm_set1_epi8('\xFF');
+  __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0);
+  return _mm_cvtsi128_si64(result);
+}
+
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdjson
+
+#endif // SIMDJSON_WESTMERE_BITMASK_H
+/* end file include/simdjson/westmere/bitmask.h */
+/* begin file include/simdjson/westmere/simd.h */
+#ifndef SIMDJSON_WESTMERE_SIMD_H
+#define SIMDJSON_WESTMERE_SIMD_H
+
+
+namespace simdjson {
+namespace westmere {
+namespace {
+namespace simd {
+
+  template<typename Child>
+  struct base {
+    __m128i value;
+
+    // Zero constructor
+    simdjson_inline base() : value{__m128i()} {}
+
+    // Conversion from SIMD register
+    simdjson_inline base(const __m128i _value) : value(_value) {}
+
+    // Conversion to SIMD register
+    simdjson_inline operator const __m128i&() const { return this->value; }
+    simdjson_inline operator __m128i&() { return this->value; }
+
+    // Bit operations
+    simdjson_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); }
+    simdjson_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); }
+    simdjson_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); }
+    simdjson_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); }
+    simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast | other; return *this_cast; }
+    simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast & other; return *this_cast; }
+    simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast ^ other; return *this_cast; }
+  };
+
+  // Forward-declared so they can be used by splat and friends.
+  template<typename T>
+  struct simd8;
+
+  template<typename T, typename Mask=simd8<bool>>
+  struct base8: base<simd8<T>> {
+    typedef uint16_t bitmask_t;
+    typedef uint32_t bitmask2_t;
+
+    simdjson_inline base8() : base<simd8<T>>() {}
+    simdjson_inline base8(const __m128i _value) : base<simd8<T>>(_value) {}
+
+    friend simdjson_inline Mask operator==(const simd8<T> lhs, const simd8<T> rhs) { return _mm_cmpeq_epi8(lhs, rhs); }
+
+    static const int SIZE = sizeof(base<simd8<T>>::value);
+
+    template<int N=1>
+    simdjson_inline simd8<T> prev(const simd8<T> prev_chunk) const {
+      return _mm_alignr_epi8(*this, prev_chunk, 16 - N);
+    }
+  };
+
+  // SIMD byte mask type (returned by things like eq and gt)
+  template<>
+  struct simd8<bool>: base8<bool> {
+    static simdjson_inline simd8<bool> splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); }
+
+    simdjson_inline simd8<bool>() : base8() {}
+    simdjson_inline simd8<bool>(const __m128i _value) : base8<bool>(_value) {}
+    // Splat constructor
+    simdjson_inline simd8<bool>(bool _value) : base8<bool>(splat(_value)) {}
+
+    simdjson_inline int to_bitmask() const { return _mm_movemask_epi8(*this); }
+    simdjson_inline bool any() const { return !_mm_testz_si128(*this, *this); }
+    simdjson_inline simd8<bool> operator~() const { return *this ^ true; }
+  };
+
+  template<typename T>
+  struct base8_numeric: base8<T> {
+    static simdjson_inline simd8<T> splat(T _value) { return _mm_set1_epi8(_value); }
+    static simdjson_inline simd8<T> zero() { return _mm_setzero_si128(); }
+    static simdjson_inline simd8<T> load(const T values[16]) {
+      return _mm_loadu_si128(reinterpret_cast<const __m128i *>(values));
+    }
+    // Repeat 16 values as many times as necessary (usually for lookup tables)
+    static simdjson_inline simd8<T> repeat_16(
+      T v0,  T v1,  T v2,  T v3,  T v4,  T v5,  T v6,  T v7,
+      T v8,  T v9,  T v10, T v11, T v12, T v13, T v14, T v15
+    ) {
+      return simd8<T>(
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15
+      );
+    }
+
+    simdjson_inline base8_numeric() : base8<T>() {}
+    simdjson_inline base8_numeric(const __m128i _value) : base8<T>(_value) {}
+
+    // Store to array
+    simdjson_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); }
+
+    // Override to distinguish from bool version
+    simdjson_inline simd8<T> operator~() const { return *this ^ 0xFFu; }
+
+    // Addition/subtraction are the same for signed and unsigned
+    simdjson_inline simd8<T> operator+(const simd8<T> other) const { return _mm_add_epi8(*this, other); }
+    simdjson_inline simd8<T> operator-(const simd8<T> other) const { return _mm_sub_epi8(*this, other); }
+    simdjson_inline simd8<T>& operator+=(const simd8<T> other) { *this = *this + other; return *static_cast<simd8<T>*>(this); }
+    simdjson_inline simd8<T>& operator-=(const simd8<T> other) { *this = *this - other; return *static_cast<simd8<T>*>(this); }
+
+    // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values)
+    template<typename L>
+    simdjson_inline simd8<L> lookup_16(simd8<L> lookup_table) const {
+      return _mm_shuffle_epi8(lookup_table, *this);
+    }
+
+    // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset).
+    // Passing a 0 value for mask would be equivalent to writing out every byte to output.
+    // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes
+    // get written.
+    // Design consideration: it seems like a function with the
+    // signature simd8<L> compress(uint32_t mask) would be
+    // sensible, but the AVX ISA makes this kind of approach difficult.
+    template<typename L>
+    simdjson_inline void compress(uint16_t mask, L * output) const {
+      using internal::thintable_epi8;
+      using internal::BitsSetTable256mul2;
+      using internal::pshufb_combine_table;
+      // this particular implementation was inspired by work done by @animetosho
+      // we do it in two steps, first 8 bytes and then second 8 bytes
+      uint8_t mask1 = uint8_t(mask); // least significant 8 bits
+      uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits
+      // next line just loads the 64-bit values thintable_epi8[mask1] and
+      // thintable_epi8[mask2] into a 128-bit register, using only
+      // two instructions on most compilers.
+      __m128i shufmask =  _mm_set_epi64x(thintable_epi8[mask2], thintable_epi8[mask1]);
+      // we increment by 0x08 the second half of the mask
+      shufmask =
+      _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0));
+      // this is the version "nearly pruned"
+      __m128i pruned = _mm_shuffle_epi8(*this, shufmask);
+      // we still need to put the two halves together.
+      // we compute the popcount of the first half:
+      int pop1 = BitsSetTable256mul2[mask1];
+      // then load the corresponding mask, what it does is to write
+      // only the first pop1 bytes from the first 8 bytes, and then
+      // it fills in with the bytes from the second 8 bytes + some filling
+      // at the end.
+      __m128i compactmask =
+      _mm_loadu_si128(reinterpret_cast<const __m128i *>(pshufb_combine_table + pop1 * 8));
+      __m128i answer = _mm_shuffle_epi8(pruned, compactmask);
+      _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer);
+    }
+
+    template<typename L>
+    simdjson_inline simd8<L> lookup_16(
+        L replace0,  L replace1,  L replace2,  L replace3,
+        L replace4,  L replace5,  L replace6,  L replace7,
+        L replace8,  L replace9,  L replace10, L replace11,
+        L replace12, L replace13, L replace14, L replace15) const {
+      return lookup_16(simd8<L>::repeat_16(
+        replace0,  replace1,  replace2,  replace3,
+        replace4,  replace5,  replace6,  replace7,
+        replace8,  replace9,  replace10, replace11,
+        replace12, replace13, replace14, replace15
+      ));
+    }
+  };
+
+  // Signed bytes
+  template<>
+  struct simd8<int8_t> : base8_numeric<int8_t> {
+    simdjson_inline simd8() : base8_numeric<int8_t>() {}
+    simdjson_inline simd8(const __m128i _value) : base8_numeric<int8_t>(_value) {}
+    // Splat constructor
+    simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {}
+    // Array constructor
+    simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {}
+    // Member-by-member initialization
+    simdjson_inline simd8(
+      int8_t v0,  int8_t v1,  int8_t v2,  int8_t v3,  int8_t v4,  int8_t v5,  int8_t v6,  int8_t v7,
+      int8_t v8,  int8_t v9,  int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15
+    ) : simd8(_mm_setr_epi8(
+      v0, v1, v2, v3, v4, v5, v6, v7,
+      v8, v9, v10,v11,v12,v13,v14,v15
+    )) {}
+    // Repeat 16 values as many times as necessary (usually for lookup tables)
+    simdjson_inline static simd8<int8_t> repeat_16(
+      int8_t v0,  int8_t v1,  int8_t v2,  int8_t v3,  int8_t v4,  int8_t v5,  int8_t v6,  int8_t v7,
+      int8_t v8,  int8_t v9,  int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15
+    ) {
+      return simd8<int8_t>(
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15
+      );
+    }
+
+    // Order-sensitive comparisons
+    simdjson_inline simd8<int8_t> max_val(const simd8<int8_t> other) const { return _mm_max_epi8(*this, other); }
+    simdjson_inline simd8<int8_t> min_val(const simd8<int8_t> other) const { return _mm_min_epi8(*this, other); }
+    simdjson_inline simd8<bool> operator>(const simd8<int8_t> other) const { return _mm_cmpgt_epi8(*this, other); }
+    simdjson_inline simd8<bool> operator<(const simd8<int8_t> other) const { return _mm_cmpgt_epi8(other, *this); }
+  };
+
+  // Unsigned bytes
+  template<>
+  struct simd8<uint8_t>: base8_numeric<uint8_t> {
+    simdjson_inline simd8() : base8_numeric<uint8_t>() {}
+    simdjson_inline simd8(const __m128i _value) : base8_numeric<uint8_t>(_value) {}
+    // Splat constructor
+    simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {}
+    // Array constructor
+    simdjson_inline simd8(const uint8_t* values) : simd8(load(values)) {}
+    // Member-by-member initialization
+    simdjson_inline simd8(
+      uint8_t v0,  uint8_t v1,  uint8_t v2,  uint8_t v3,  uint8_t v4,  uint8_t v5,  uint8_t v6,  uint8_t v7,
+      uint8_t v8,  uint8_t v9,  uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15
+    ) : simd8(_mm_setr_epi8(
+      v0, v1, v2, v3, v4, v5, v6, v7,
+      v8, v9, v10,v11,v12,v13,v14,v15
+    )) {}
+    // Repeat 16 values as many times as necessary (usually for lookup tables)
+    simdjson_inline static simd8<uint8_t> repeat_16(
+      uint8_t v0,  uint8_t v1,  uint8_t v2,  uint8_t v3,  uint8_t v4,  uint8_t v5,  uint8_t v6,  uint8_t v7,
+      uint8_t v8,  uint8_t v9,  uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15
+    ) {
+      return simd8<uint8_t>(
+        v0, v1, v2, v3, v4, v5, v6, v7,
+        v8, v9, v10,v11,v12,v13,v14,v15
+      );
+    }
+
+    // Saturated math
+    simdjson_inline simd8<uint8_t> saturating_add(const simd8<uint8_t> other) const { return _mm_adds_epu8(*this, other); }
+    simdjson_inline simd8<uint8_t> saturating_sub(const simd8<uint8_t> other) const { return _mm_subs_epu8(*this, other); }
+
+    // Order-specific operations
+    simdjson_inline simd8<uint8_t> max_val(const simd8<uint8_t> other) const { return _mm_max_epu8(*this, other); }
+    simdjson_inline simd8<uint8_t> min_val(const simd8<uint8_t> other) const { return _mm_min_epu8(*this, other); }
+    // Same as >, but only guarantees true is nonzero (< guarantees true = -1)
+    simdjson_inline simd8<uint8_t> gt_bits(const simd8<uint8_t> other) const { return this->saturating_sub(other); }
+    // Same as <, but only guarantees true is nonzero (< guarantees true = -1)
+    simdjson_inline simd8<uint8_t> lt_bits(const simd8<uint8_t> other) const { return other.saturating_sub(*this); }
+    simdjson_inline simd8<bool> operator<=(const simd8<uint8_t> other) const { return other.max_val(*this) == other; }
+    simdjson_inline simd8<bool> operator>=(const simd8<uint8_t> other) const { return other.min_val(*this) == other; }
+    simdjson_inline simd8<bool> operator>(const simd8<uint8_t> other) const { return this->gt_bits(other).any_bits_set(); }
+    simdjson_inline simd8<bool> operator<(const simd8<uint8_t> other) const { return this->gt_bits(other).any_bits_set(); }
+
+    // Bit-specific operations
+    simdjson_inline simd8<bool> bits_not_set() const { return *this == uint8_t(0); }
+    simdjson_inline simd8<bool> bits_not_set(simd8<uint8_t> bits) const { return (*this & bits).bits_not_set(); }
+    simdjson_inline simd8<bool> any_bits_set() const { return ~this->bits_not_set(); }
+    simdjson_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const { return ~this->bits_not_set(bits); }
+    simdjson_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; }
+    simdjson_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); }
+    simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); }
+    simdjson_inline bool bits_not_set_anywhere(simd8<uint8_t> bits) const { return _mm_testz_si128(*this, bits); }
+    simdjson_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const { return !bits_not_set_anywhere(bits); }
+    template<int N>
+    simdjson_inline simd8<uint8_t> shr() const { return simd8<uint8_t>(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); }
+    template<int N>
+    simdjson_inline simd8<uint8_t> shl() const { return simd8<uint8_t>(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); }
+    // Get one of the bits and make a bitmask out of it.
+    // e.g. value.get_bit<7>() gets the high bit
+    template<int N>
+    simdjson_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); }
+  };
+
+  template<typename T>
+  struct simd8x64 {
+    static constexpr int NUM_CHUNKS = 64 / sizeof(simd8<T>);
+    static_assert(NUM_CHUNKS == 4, "Westmere kernel should use four registers per 64-byte block.");
+    const simd8<T> chunks[NUM_CHUNKS];
+
+    simd8x64(const simd8x64<T>& o) = delete; // no copy allowed
+    simd8x64<T>& operator=(const simd8<T>& other) = delete; // no assignment allowed
+    simd8x64() = delete; // no default constructor allowed
+
+    simdjson_inline simd8x64(const simd8<T> chunk0, const simd8<T> chunk1, const simd8<T> chunk2, const simd8<T> chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {}
+    simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8<T>::load(ptr), simd8<T>::load(ptr+16), simd8<T>::load(ptr+32), simd8<T>::load(ptr+48)} {}
+
+    simdjson_inline void store(T ptr[64]) const {
+      this->chunks[0].store(ptr+sizeof(simd8<T>)*0);
+      this->chunks[1].store(ptr+sizeof(simd8<T>)*1);
+      this->chunks[2].store(ptr+sizeof(simd8<T>)*2);
+      this->chunks[3].store(ptr+sizeof(simd8<T>)*3);
+    }
+
+    simdjson_inline simd8<T> reduce_or() const {
+      return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]);
+    }
+
+    simdjson_inline uint64_t compress(uint64_t mask, T * output) const {
+      this->chunks[0].compress(uint16_t(mask), output);
+      this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF));
+      this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF));
+      this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF));
+      return 64 - count_ones(mask);
+    }
+
+    simdjson_inline uint64_t to_bitmask() const {
+      uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() );
+      uint64_t r1 =          this->chunks[1].to_bitmask() ;
+      uint64_t r2 =          this->chunks[2].to_bitmask() ;
+      uint64_t r3 =          this->chunks[3].to_bitmask() ;
+      return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48);
+    }
+
+    simdjson_inline uint64_t eq(const T m) const {
+      const simd8<T> mask = simd8<T>::splat(m);
+      return  simd8x64<bool>(
+        this->chunks[0] == mask,
+        this->chunks[1] == mask,
+        this->chunks[2] == mask,
+        this->chunks[3] == mask
+      ).to_bitmask();
+    }
+
+    simdjson_inline uint64_t eq(const simd8x64<uint8_t> &other) const {
+      return  simd8x64<bool>(
+        this->chunks[0] == other.chunks[0],
+        this->chunks[1] == other.chunks[1],
+        this->chunks[2] == other.chunks[2],
+        this->chunks[3] == other.chunks[3]
+      ).to_bitmask();
+    }
+
+    simdjson_inline uint64_t lteq(const T m) const {
+      const simd8<T> mask = simd8<T>::splat(m);
+      return  simd8x64<bool>(
+        this->chunks[0] <= mask,
+        this->chunks[1] <= mask,
+        this->chunks[2] <= mask,
+        this->chunks[3] <= mask
+      ).to_bitmask();
+    }
+  }; // struct simd8x64<T>
+
+} // namespace simd
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdjson
+
+#endif // SIMDJSON_WESTMERE_SIMD_INPUT_H
+/* end file include/simdjson/westmere/simd.h */
+/* begin file include/simdjson/generic/jsoncharutils.h */
+
+namespace simdjson {
+namespace westmere {
+namespace {
+namespace jsoncharutils {
+
+// return non-zero if not a structural or whitespace char
+// zero otherwise
+simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) {
+  return internal::structural_or_whitespace_negated[c];
+}
+
+simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) {
+  return internal::structural_or_whitespace[c];
+}
+
+// returns a value with the high 16 bits set if not valid
+// otherwise returns the conversion of the 4 hex digits at src into the bottom
+// 16 bits of the 32-bit return register
+//
+// see
+// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/
+static inline uint32_t hex_to_u32_nocheck(
+    const uint8_t *src) { // strictly speaking, static inline is a C-ism
+  uint32_t v1 = internal::digit_to_val32[630 + src[0]];
+  uint32_t v2 = internal::digit_to_val32[420 + src[1]];
+  uint32_t v3 = internal::digit_to_val32[210 + src[2]];
+  uint32_t v4 = internal::digit_to_val32[0 + src[3]];
+  return v1 | v2 | v3 | v4;
+}
+
+// given a code point cp, writes to c
+// the utf-8 code, outputting the length in
+// bytes, if the length is zero, the code point
+// is invalid
+//
+// This can possibly be made faster using pdep
+// and clz and table lookups, but JSON documents
+// have few escaped code points, and the following
+// function looks cheap.
+//
+// Note: we assume that surrogates are treated separately
+//
+simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) {
+  if (cp <= 0x7F) {
+    c[0] = uint8_t(cp);
+    return 1; // ascii
+  }
+  if (cp <= 0x7FF) {
+    c[0] = uint8_t((cp >> 6) + 192);
+    c[1] = uint8_t((cp & 63) + 128);
+    return 2; // universal plane
+    //  Surrogates are treated elsewhere...
+    //} //else if (0xd800 <= cp && cp <= 0xdfff) {
+    //  return 0; // surrogates // could put assert here
+  } else if (cp <= 0xFFFF) {
+    c[0] = uint8_t((cp >> 12) + 224);
+    c[1] = uint8_t(((cp >> 6) & 63) + 128);
+    c[2] = uint8_t((cp & 63) + 128);
+    return 3;
+  } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this
+                               // is not needed
+    c[0] = uint8_t((cp >> 18) + 240);
+    c[1] = uint8_t(((cp >> 12) & 63) + 128);
+    c[2] = uint8_t(((cp >> 6) & 63) + 128);
+    c[3] = uint8_t((cp & 63) + 128);
+    return 4;
+  }
+  // will return 0 when the code point was too large.
+  return 0; // bad r
+}
+
+#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm
+// this is a slow emulation routine for 32-bit
+//
+static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) {
+  return x * (uint64_t)y;
+}
+static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) {
+  uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd);
+  uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd);
+  uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32));
+  uint64_t adbc_carry = !!(adbc < ad);
+  uint64_t lo = bd + (adbc << 32);
+  *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) +
+        (adbc_carry << 32) + !!(lo < bd);
+  return lo;
+}
+#endif
+
+using internal::value128;
+
+simdjson_inline value128 full_multiplication(uint64_t value1, uint64_t value2) {
+  value128 answer;
+#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS)
+#ifdef _M_ARM64
+  // ARM64 has native support for 64-bit multiplications, no need to emultate
+  answer.high = __umulh(value1, value2);
+  answer.low = value1 * value2;
+#else
+  answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64
+#endif // _M_ARM64
+#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS)
+  __uint128_t r = (static_cast<__uint128_t>(value1)) * value2;
+  answer.low = uint64_t(r);
+  answer.high = uint64_t(r >> 64);
+#endif
+  return answer;
+}
+
+} // namespace jsoncharutils
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdjson
+/* end file include/simdjson/generic/jsoncharutils.h */
+/* begin file include/simdjson/generic/atomparsing.h */
+namespace simdjson {
+namespace westmere {
+namespace {
+/// @private
+namespace atomparsing {
+
+// The string_to_uint32 is exclusively used to map literal strings to 32-bit values.
+// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot
+// be certain that the character pointer will be properly aligned.
+// You might think that using memcpy makes this function expensive, but you'd be wrong.
+// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false");
+// to the compile-time constant 1936482662.
+simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; }
+
+
+// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive.
+// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about.
+simdjson_warn_unused
+simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) {
+  uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++)
+  static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes");
+  std::memcpy(&srcval, src, sizeof(uint32_t));
+  return srcval ^ string_to_uint32(atom);
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_true_atom(const uint8_t *src) {
+  return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0;
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) {
+  if (len > 4) { return is_valid_true_atom(src); }
+  else if (len == 4) { return !str4ncmp(src, "true"); }
+  else { return false; }
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_false_atom(const uint8_t *src) {
+  return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0;
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) {
+  if (len > 5) { return is_valid_false_atom(src); }
+  else if (len == 5) { return !str4ncmp(src+1, "alse"); }
+  else { return false; }
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_null_atom(const uint8_t *src) {
+  return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0;
+}
+
+simdjson_warn_unused
+simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) {
+  if (len > 4) { return is_valid_null_atom(src); }
+  else if (len == 4) { return !str4ncmp(src, "null"); }
+  else { return false; }
+}
+
+} // namespace atomparsing
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdjson
+/* end file include/simdjson/generic/atomparsing.h */
+/* begin file include/simdjson/westmere/stringparsing.h */
+#ifndef SIMDJSON_WESTMERE_STRINGPARSING_H
+#define SIMDJSON_WESTMERE_STRINGPARSING_H
+
+namespace simdjson {
+namespace westmere {
+namespace {
+
+using namespace simd;
+
+// Holds backslashes and quotes locations.
+struct backslash_and_quote {
+public:
+  static constexpr uint32_t BYTES_PROCESSED = 32;
+  simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst);
+
+  simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; }
+  simdjson_inline bool has_backslash() { return bs_bits != 0; }
+  simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); }
+  simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); }
+
+  uint32_t bs_bits;
+  uint32_t quote_bits;
+}; // struct backslash_and_quote
+
+simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) {
+  // this can read up to 31 bytes beyond the buffer size, but we require
+  // SIMDJSON_PADDING of padding
+  static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes");
+  simd8<uint8_t> v0(src);
+  simd8<uint8_t> v1(src + 16);
+  v0.store(dst);
+  v1.store(dst + 16);
+  uint64_t bs_and_quote = simd8x64<bool>(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask();
+  return {
+    uint32_t(bs_and_quote),      // bs_bits
+    uint32_t(bs_and_quote >> 32) // quote_bits
+  };
+}
+
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdjson
+
+#endif // SIMDJSON_WESTMERE_STRINGPARSING_H
+/* end file include/simdjson/westmere/stringparsing.h */
+/* begin file include/simdjson/westmere/numberparsing.h */
+#ifndef SIMDJSON_WESTMERE_NUMBERPARSING_H
+#define SIMDJSON_WESTMERE_NUMBERPARSING_H
+
+namespace simdjson {
+namespace westmere {
+namespace {
+
+static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) {
+  // this actually computes *16* values so we are being wasteful.
+  const __m128i ascii0 = _mm_set1_epi8('0');
+  const __m128i mul_1_10 =
+      _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1);
+  const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1);
+  const __m128i mul_1_10000 =
+      _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1);
+  const __m128i input = _mm_sub_epi8(
+      _mm_loadu_si128(reinterpret_cast<const __m128i *>(chars)), ascii0);
+  const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10);
+  const __m128i t2 = _mm_madd_epi16(t1, mul_1_100);
+  const __m128i t3 = _mm_packus_epi32(t2, t2);
+  const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000);
+  return _mm_cvtsi128_si32(
+      t4); // only captures the sum of the first 8 digits, drop the rest
+}
+
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdjson
+
+#define SIMDJSON_SWAR_NUMBER_PARSING 1
+
+/* begin file include/simdjson/generic/numberparsing.h */
+#include <limits>
+
+namespace simdjson {
+namespace westmere {
+
+namespace ondemand {
+/**
+ * The type of a JSON number
+ */
+enum class number_type {
+    floating_point_number=1, /// a binary64 number
+    signed_integer,          /// a signed integer that fits in a 64-bit word using two's complement
+    unsigned_integer         /// a positive integer larger or equal to 1<<63
+};
+}
+
+namespace {
+/// @private
+namespace numberparsing {
+
+
+
+#ifdef JSON_TEST_NUMBERS
+#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR)
+#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE)))
+#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE)))
+#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE)))
+#else
+#define INVALID_NUMBER(SRC) (NUMBER_ERROR)
+#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE))
+#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE))
+#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE))
+#endif
+
+namespace {
+// Convert a mantissa, an exponent and a sign bit into an ieee64 double.
+// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable).
+// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed.
+simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) {
+    double d;
+    mantissa &= ~(1ULL << 52);
+    mantissa |= real_exponent << 52;
+    mantissa |= ((static_cast<uint64_t>(negative)) << 63);
+    std::memcpy(&d, &mantissa, sizeof(d));
+    return d;
+}
+}
+// Attempts to compute i * 10^(power) exactly; and if "negative" is
+// true, negate the result.
+// This function will only work in some cases, when it does not work, success is
+// set to false. This should work *most of the time* (like 99% of the time).
+// We assume that power is in the [smallest_power,
+// largest_power] interval: the caller is responsible for this check.
+simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) {
+  // we start with a fast path
+  // It was described in
+  // Clinger WD. How to read floating point numbers accurately.
+  // ACM SIGPLAN Notices. 1990
+#ifndef FLT_EVAL_METHOD
+#error "FLT_EVAL_METHOD should be defined, please include cfloat."
+#endif
+#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
+  // We cannot be certain that x/y is rounded to nearest.
+  if (0 <= power && power <= 22 && i <= 9007199254740991) {
+#else
+  if (-22 <= power && power <= 22 && i <= 9007199254740991) {
+#endif
+    // convert the integer into a double. This is lossless since
+    // 0 <= i <= 2^53 - 1.
+    d = double(i);
+    //
+    // The general idea is as follows.
+    // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then
+    // 1) Both s and p can be represented exactly as 64-bit floating-point
+    // values
+    // (binary64).
+    // 2) Because s and p can be represented exactly as floating-point values,
+    // then s * p
+    // and s / p will produce correctly rounded values.
+    //
+    if (power < 0) {
+      d = d / simdjson::internal::power_of_ten[-power];
+    } else {
+      d = d * simdjson::internal::power_of_ten[power];
+    }
+    if (negative) {
+      d = -d;
+    }
+    return true;
+  }
+  // When 22 < power && power <  22 + 16, we could
+  // hope for another, secondary fast path.  It was
+  // described by David M. Gay in  "Correctly rounded
+  // binary-decimal and decimal-binary conversions." (1990)
+  // If you need to compute i * 10^(22 + x) for x < 16,
+  // first compute i * 10^x, if you know that result is exact
+  // (e.g., when i * 10^x < 2^53),
+  // then you can still proceed and do (i * 10^x) * 10^22.
+  // Is this worth your time?
+  // You need  22 < power *and* power <  22 + 16 *and* (i * 10^(x-22) < 2^53)
+  // for this second fast path to work.
+  // If you you have 22 < power *and* power <  22 + 16, and then you
+  // optimistically compute "i * 10^(x-22)", there is still a chance that you
+  // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of
+  // this optimization maybe less common than we would like. Source:
+  // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/
+  // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html
+
+  // The fast path has now failed, so we are failing back on the slower path.
+
+  // In the slow path, we need to adjust i so that it is > 1<<63 which is always
+  // possible, except if i == 0, so we handle i == 0 separately.
+  if(i == 0) {
+    d = negative ? -0.0 : 0.0;
+    return true;
+  }
+
+
+  // The exponent is 1024 + 63 + power
+  //     + floor(log(5**power)/log(2)).
+  // The 1024 comes from the ieee64 standard.
+  // The 63 comes from the fact that we use a 64-bit word.
+  //
+  // Computing floor(log(5**power)/log(2)) could be
+  // slow. Instead we use a fast function.
+  //
+  // For power in (-400,350), we have that
+  // (((152170 + 65536) * power ) >> 16);
+  // is equal to
+  //  floor(log(5**power)/log(2)) + power when power >= 0
+  // and it is equal to
+  //  ceil(log(5**-power)/log(2)) + power when power < 0
+  //
+  // The 65536 is (1<<16) and corresponds to
+  // (65536 * power) >> 16 ---> power
+  //
+  // ((152170 * power ) >> 16) is equal to
+  // floor(log(5**power)/log(2))
+  //
+  // Note that this is not magic: 152170/(1<<16) is
+  // approximatively equal to log(5)/log(2).
+  // The 1<<16 value is a power of two; we could use a
+  // larger power of 2 if we wanted to.
+  //
+  int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63;
+
+
+  // We want the most significant bit of i to be 1. Shift if needed.
+  int lz = leading_zeroes(i);
+  i <<= lz;
+
+
+  // We are going to need to do some 64-bit arithmetic to get a precise product.
+  // We use a table lookup approach.
+  // It is safe because
+  // power >= smallest_power
+  // and power <= largest_power
+  // We recover the mantissa of the power, it has a leading 1. It is always
+  // rounded down.
+  //
+  // We want the most significant 64 bits of the product. We know
+  // this will be non-zero because the most significant bit of i is
+  // 1.
+  const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power);
+  // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.)
+  //
+  // The full_multiplication function computes the 128-bit product of two 64-bit words
+  // with a returned value of type value128 with a "low component" corresponding to the
+  // 64-bit least significant bits of the product and with a "high component" corresponding
+  // to the 64-bit most significant bits of the product.
+  simdjson::internal::value128 firstproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index]);
+  // Both i and power_of_five_128[index] have their most significant bit set to 1 which
+  // implies that the either the most or the second most significant bit of the product
+  // is 1. We pack values in this manner for efficiency reasons: it maximizes the use
+  // we make of the product. It also makes it easy to reason about the product: there
+  // is 0 or 1 leading zero in the product.
+
+  // Unless the least significant 9 bits of the high (64-bit) part of the full
+  // product are all 1s, then we know that the most significant 55 bits are
+  // exact and no further work is needed. Having 55 bits is necessary because
+  // we need 53 bits for the mantissa but we have to have one rounding bit and
+  // we can waste a bit if the most significant bit of the product is zero.
+  if((firstproduct.high & 0x1FF) == 0x1FF) {
+    // We want to compute i * 5^q, but only care about the top 55 bits at most.
+    // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing
+    // the full computation is wasteful. So we do what is called a "truncated
+    // multiplication".
+    // We take the most significant 64-bits, and we put them in
+    // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q
+    // to the desired approximation using one multiplication. Sometimes it does not suffice.
+    // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and
+    // then we get a better approximation to i * 5^q. In very rare cases, even that
+    // will not suffice, though it is seemingly very hard to find such a scenario.
+    //
+    // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat
+    // more complicated.
+    //
+    // There is an extra layer of complexity in that we need more than 55 bits of
+    // accuracy in the round-to-even scenario.
+    //
+    // The full_multiplication function computes the 128-bit product of two 64-bit words
+    // with a returned value of type value128 with a "low component" corresponding to the
+    // 64-bit least significant bits of the product and with a "high component" corresponding
+    // to the 64-bit most significant bits of the product.
+    simdjson::internal::value128 secondproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]);
+    firstproduct.low += secondproduct.high;
+    if(secondproduct.high > firstproduct.low) { firstproduct.high++; }
+    // At this point, we might need to add at most one to firstproduct, but this
+    // can only change the value of firstproduct.high if firstproduct.low is maximal.
+    if(simdjson_unlikely(firstproduct.low  == 0xFFFFFFFFFFFFFFFF)) {
+      // This is very unlikely, but if so, we need to do much more work!
+      return false;
+    }
+  }
+  uint64_t lower = firstproduct.low;
+  uint64_t upper = firstproduct.high;
+  // The final mantissa should be 53 bits with a leading 1.
+  // We shift it so that it occupies 54 bits with a leading 1.
+  ///////
+  uint64_t upperbit = upper >> 63;
+  uint64_t mantissa = upper >> (upperbit + 9);
+  lz += int(1 ^ upperbit);
+
+  // Here we have mantissa < (1<<54).
+  int64_t real_exponent = exponent - lz;
+  if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal?
+    // Here have that real_exponent <= 0 so -real_exponent >= 0
+    if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure.
+      d = negative ? -0.0 : 0.0;
+      return true;
+    }
+    // next line is safe because -real_exponent + 1 < 0
+    mantissa >>= -real_exponent + 1;
+    // Thankfully, we can't have both "round-to-even" and subnormals because
+    // "round-to-even" only occurs for powers close to 0.
+    mantissa += (mantissa & 1); // round up
+    mantissa >>= 1;
+    // There is a weird scenario where we don't have a subnormal but just.
+    // Suppose we start with 2.2250738585072013e-308, we end up
+    // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal
+    // whereas 0x40000000000000 x 2^-1023-53  is normal. Now, we need to round
+    // up 0x3fffffffffffff x 2^-1023-53  and once we do, we are no longer
+    // subnormal, but we can only know this after rounding.
+    // So we only declare a subnormal if we are smaller than the threshold.
+    real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1;
+    d = to_double(mantissa, real_exponent, negative);
+    return true;
+  }
+  // We have to round to even. The "to even" part
+  // is only a problem when we are right in between two floats
+  // which we guard against.
+  // If we have lots of trailing zeros, we may fall right between two
+  // floating-point values.
+  //
+  // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54]
+  // times a power of two. That is, it is right between a number with binary significand
+  // m and another number with binary significand m+1; and it must be the case
+  // that it cannot be represented by a float itself.
+  //
+  // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p.
+  // Recall that 10^q = 5^q * 2^q.
+  // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that
+  //  5^23 <=  2^54 and it is the last power of five to qualify, so q <= 23.
+  // When q<0, we have  w  >=  (2m+1) x 5^{-q}.  We must have that w<2^{64} so
+  // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have
+  // 2^{53} x 5^{-q} < 2^{64}.
+  // Hence we have 5^{-q} < 2^{11}$ or q>= -4.
+  //
+  // We require lower <= 1 and not lower == 0 because we could not prove that
+  // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test.
+  if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) {
+    if((mantissa  << (upperbit + 64 - 53 - 2)) ==  upper) {
+      mantissa &= ~1;             // flip it so that we do not round up
+    }
+  }
+
+  mantissa += mantissa & 1;
+  mantissa >>= 1;
+
+  // Here we have mantissa < (1<<53), unless there was an overflow
+  if (mantissa >= (1ULL << 53)) {
+    //////////
+    // This will happen when parsing values such as 7.2057594037927933e+16
+    ////////
+    mantissa = (1ULL << 52);
+    real_exponent++;
+  }
+  mantissa &= ~(1ULL << 52);
+  // we have to check that real_exponent is in range, otherwise we bail out
+  if (simdjson_unlikely(real_exponent > 2046)) {
+    // We have an infinite value!!! We could actually throw an error here if we could.
+    return false;
+  }
+  d = to_double(mantissa, real_exponent, negative);
+  return true;
+}
+
+// We call a fallback floating-point parser that might be slow. Note
+// it will accept JSON numbers, but the JSON spec. is more restrictive so
+// before you call parse_float_fallback, you need to have validated the input
+// string with the JSON grammar.
+// It will return an error (false) if the parsed number is infinite.
+// The string parsing itself always succeeds. We know that there is at least
+// one digit.
+static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) {
+  *outDouble = simdjson::internal::from_chars(reinterpret_cast<const char *>(ptr));
+  // We do not accept infinite values.
+
+  // Detecting finite values in a portable manner is ridiculously hard, ideally
+  // we would want to do:
+  // return !std::isfinite(*outDouble);
+  // but that mysteriously fails under legacy/old libc++ libraries, see
+  // https://github.com/simdjson/simdjson/issues/1286
+  //
+  // Therefore, fall back to this solution (the extra parens are there
+  // to handle that max may be a macro on windows).
+  return !(*outDouble > (std::numeric_limits<double>::max)() || *outDouble < std::numeric_limits<double>::lowest());
+}
+static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) {
+  *outDouble = simdjson::internal::from_chars(reinterpret_cast<const char *>(ptr), reinterpret_cast<const char *>(end_ptr));
+  // We do not accept infinite values.
+
+  // Detecting finite values in a portable manner is ridiculously hard, ideally
+  // we would want to do:
+  // return !std::isfinite(*outDouble);
+  // but that mysteriously fails under legacy/old libc++ libraries, see
+  // https://github.com/simdjson/simdjson/issues/1286
+  //
+  // Therefore, fall back to this solution (the extra parens are there
+  // to handle that max may be a macro on windows).
+  return !(*outDouble > (std::numeric_limits<double>::max)() || *outDouble < std::numeric_limits<double>::lowest());
+}
+
+// check quickly whether the next 8 chars are made of digits
+// at a glance, it looks better than Mula's
+// http://0x80.pl/articles/swar-digits-validate.html
+simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) {
+  uint64_t val;
+  // this can read up to 7 bytes beyond the buffer size, but we require
+  // SIMDJSON_PADDING of padding
+  static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7");
+  std::memcpy(&val, chars, 8);
+  // a branchy method might be faster:
+  // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030)
+  //  && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) ==
+  //  0x3030303030303030);
+  return (((val & 0xF0F0F0F0F0F0F0F0) |
+           (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) ==
+          0x3333333333333333);
+}
+
+template<typename W>
+error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) {
+  double d;
+  if (parse_float_fallback(src, &d)) {
+    writer.append_double(d);
+    return SUCCESS;
+  }
+  return INVALID_NUMBER(src);
+}
+
+template<typename I>
+SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later
+simdjson_inline bool parse_digit(const uint8_t c, I &i) {
+  const uint8_t digit = static_cast<uint8_t>(c - '0');
+  if (digit > 9) {
+    return false;
+  }
+  // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication
+  i = 10 * i + digit; // might overflow, we will handle the overflow later
+  return true;
+}
+
+simdjson_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) {
+  // we continue with the fiction that we have an integer. If the
+  // floating point number is representable as x * 10^z for some integer
+  // z that fits in 53 bits, then we will be able to convert back the
+  // the integer into a float in a lossless manner.
+  const uint8_t *const first_after_period = p;
+
+#ifdef SIMDJSON_SWAR_NUMBER_PARSING
+#if SIMDJSON_SWAR_NUMBER_PARSING
+  // this helps if we have lots of decimals!
+  // this turns out to be frequent enough.
+  if (is_made_of_eight_digits_fast(p)) {
+    i = i * 100000000 + parse_eight_digits_unrolled(p);
+    p += 8;
+  }
+#endif // SIMDJSON_SWAR_NUMBER_PARSING
+#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING
+  // Unrolling the first digit makes a small difference on some implementations (e.g. westmere)
+  if (parse_digit(*p, i)) { ++p; }
+  while (parse_digit(*p, i)) { p++; }
+  exponent = first_after_period - p;
+  // Decimal without digits (123.) is illegal
+  if (exponent == 0) {
+    return INVALID_NUMBER(src);
+  }
+  return SUCCESS;
+}
+
+simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) {
+  // Exp Sign: -123.456e[-]78
+  bool neg_exp = ('-' == *p);
+  if (neg_exp || '+' == *p) { p++; } // Skip + as well
+
+  // Exponent: -123.456e-[78]
+  auto start_exp = p;
+  int64_t exp_number = 0;
+  while (parse_digit(*p, exp_number)) { ++p; }
+  // It is possible for parse_digit to overflow.
+  // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN.
+  // Thus we *must* check for possible overflow before we negate exp_number.
+
+  // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into
+  // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may
+  // not oblige and may, in fact, generate two distinct paths in any case. It might be
+  // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off
+  // instructions for a simdjson_likely branch, an unconclusive gain.
+
+  // If there were no digits, it's an error.
+  if (simdjson_unlikely(p == start_exp)) {
+    return INVALID_NUMBER(src);
+  }
+  // We have a valid positive exponent in exp_number at this point, except that
+  // it may have overflowed.
+
+  // If there were more than 18 digits, we may have overflowed the integer. We have to do
+  // something!!!!
+  if (simdjson_unlikely(p > start_exp+18)) {
+    // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow
+    while (*start_exp == '0') { start_exp++; }
+    // 19 digits could overflow int64_t and is kind of absurd anyway. We don't
+    // support exponents smaller than -999,999,999,999,999,999 and bigger
+    // than 999,999,999,999,999,999.
+    // We can truncate.
+    // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before
+    // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could
+    // truncate at 324.
+    // Note that there is no reason to fail per se at this point in time.
+    // E.g., 0e999999999999999999999 is a fine number.
+    if (p > start_exp+18) { exp_number = 999999999999999999; }
+  }
+  // At this point, we know that exp_number is a sane, positive, signed integer.
+  // It is <= 999,999,999,999,999,999. As long as 'exponent' is in
+  // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent'
+  // is bounded in magnitude by the size of the JSON input, we are fine in this universe.
+  // To sum it up: the next line should never overflow.
+  exponent += (neg_exp ? -exp_number : exp_number);
+  return SUCCESS;
+}
+
+simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) {
+  // It is possible that the integer had an overflow.
+  // We have to handle the case where we have 0.0000somenumber.
+  const uint8_t *start = start_digits;
+  while ((*start == '0') || (*start == '.')) { ++start; }
+  // we over-decrement by one when there is a '.'
+  return digit_count - size_t(start - start_digits);
+}
+
+template<typename W>
+simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) {
+  // If we frequently had to deal with long strings of digits,
+  // we could extend our code by using a 128-bit integer instead
+  // of a 64-bit integer. However, this is uncommon in practice.
+  //
+  // 9999999999999999999 < 2**64 so we can accommodate 19 digits.
+  // If we have a decimal separator, then digit_count - 1 is the number of digits, but we
+  // may not have a decimal separator!
+  if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) {
+    // Ok, chances are good that we had an overflow!
+    // this is almost never going to get called!!!
+    // we start anew, going slowly!!!
+    // This will happen in the following examples:
+    // 10000000000000000000000000000000000000000000e+308
+    // 3.1415926535897932384626433832795028841971693993751
+    //
+    // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens
+    // because slow_float_parsing is a non-inlined function. If we passed our writer reference to
+    // it, it would force it to be stored in memory, preventing the compiler from picking it apart
+    // and putting into registers. i.e. if we pass it as reference, it gets slow.
+    // This is what forces the skip_double, as well.
+    error_code error = slow_float_parsing(src, writer);
+    writer.skip_double();
+    return error;
+  }
+  // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other
+  // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331
+  // To future reader: we'd love if someone found a better way, or at least could explain this result!
+  if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) {
+    //
+    // Important: smallest_power is such that it leads to a zero value.
+    // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero
+    // so something x 10^-343 goes to zero, but not so with  something x 10^-342.
+    static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough");
+    //
+    if((exponent < simdjson::internal::smallest_power) || (i == 0)) {
+      // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero
+      WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer);
+      return SUCCESS;
+    } else { // (exponent > largest_power) and (i != 0)
+      // We have, for sure, an infinite value and simdjson refuses to parse infinite values.
+      return INVALID_NUMBER(src);
+    }
+  }
+  double d;
+  if (!compute_float_64(exponent, i, negative, d)) {
+    // we are almost never going to get here.
+    if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); }
+  }
+  WRITE_DOUBLE(d, src, writer);
+  return SUCCESS;
+}
+
+// for performance analysis, it is sometimes  useful to skip parsing
+#ifdef SIMDJSON_SKIPNUMBERPARSING
+
+template<typename W>
+simdjson_inline error_code parse_number(const uint8_t *const, W &writer) {
+  writer.append_s64(0);        // always write zero
+  return SUCCESS;              // always succeeds
+}
+
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer_in_string(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline simdjson_result<double> parse_double_in_string(const uint8_t * const src) noexcept { return 0; }
+simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept  { return false; }
+simdjson_unused simdjson_inline simdjson_result<bool> is_integer(const uint8_t * src) noexcept  { return false; }
+simdjson_unused simdjson_inline simdjson_result<ondemand::number_type> get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; }
+#else
+
+// parse the number at src
+// define JSON_TEST_NUMBERS for unit testing
+//
+// It is assumed that the number is followed by a structural ({,},],[) character
+// or a white space character. If that is not the case (e.g., when the JSON
+// document is made of a single number), then it is necessary to copy the
+// content and append a space before calling this function.
+//
+// Our objective is accurate parsing (ULP of 0) at high speed.
+template<typename W>
+simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) {
+
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  const uint8_t *p = src + uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); }
+
+  //
+  // Handle floats if there is a . or e (or both)
+  //
+  int64_t exponent = 0;
+  bool is_float = false;
+  if ('.' == *p) {
+    is_float = true;
+    ++p;
+    SIMDJSON_TRY( parse_decimal(src, p, i, exponent) );
+    digit_count = int(p - start_digits); // used later to guard against overflows
+  }
+  if (('e' == *p) || ('E' == *p)) {
+    is_float = true;
+    ++p;
+    SIMDJSON_TRY( parse_exponent(src, p, exponent) );
+  }
+  if (is_float) {
+    const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p);
+    SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) );
+    if (dirty_end) { return INVALID_NUMBER(src); }
+    return SUCCESS;
+  }
+
+  // The longest negative 64-bit number is 19 digits.
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  size_t longest_digit_count = negative ? 19 : 20;
+  if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); }
+  if (digit_count == longest_digit_count) {
+    if (negative) {
+      // Anything negative above INT64_MAX+1 is invalid
+      if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src);  }
+      WRITE_INTEGER(~i+1, src, writer);
+      if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); }
+      return SUCCESS;
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    }  else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); }
+  }
+
+  // Write unsigned if it doesn't fit in a signed integer.
+  if (i > uint64_t(INT64_MAX)) {
+    WRITE_UNSIGNED(i, src, writer);
+  } else {
+    WRITE_INTEGER(negative ? (~i+1) : i, src, writer);
+  }
+  if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); }
+  return SUCCESS;
+}
+
+// Inlineable functions
+namespace {
+
+// This table can be used to characterize the final character of an integer
+// string. For JSON structural character and allowable white space characters,
+// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise
+// we return NUMBER_ERROR.
+// Optimization note: we could easily reduce the size of the table by half (to 128)
+// at the cost of an extra branch.
+// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits):
+static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast");
+static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast");
+static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast");
+
+const uint8_t integer_string_finisher[256] = {
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, SUCCESS,
+    SUCCESS,      NUMBER_ERROR,   NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   SUCCESS,      NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, SUCCESS,
+    NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, SUCCESS,        NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
+    SUCCESS,      NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
+    NUMBER_ERROR};
+
+// Parse any number from 0 to 18,446,744,073,709,551,615
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src) noexcept {
+  const uint8_t *p = src;
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > 20))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+
+  if (digit_count == 20) {
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
+  }
+
+  return i;
+}
+
+
+// Parse any number from 0 to 18,446,744,073,709,551,615
+// Never read at src_end or beyond
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept {
+  const uint8_t *p = src;
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while ((p != src_end) && parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > 20))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+
+  if (digit_count == 20) {
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
+  }
+
+  return i;
+}
+
+// Parse any number from 0 to 18,446,744,073,709,551,615
+simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned_in_string(const uint8_t * const src) noexcept {
+  const uint8_t *p = src + 1;
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // The longest positive 64-bit number is 20 digits.
+  // We do it this way so we don't trigger this branch unless we must.
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > 20))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if (*p != '"') { return NUMBER_ERROR; }
+
+  if (digit_count == 20) {
+    // Positive overflow check:
+    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+    //   biggest uint64_t.
+    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+    //   If we got here, it's a 20 digit number starting with the digit "1".
+    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+    //   than 1,553,255,926,290,448,384.
+    // - That is smaller than the smallest possible 20-digit number the user could write:
+    //   10,000,000,000,000,000,000.
+    // - Therefore, if the number is positive and lower than that, it's overflow.
+    // - The value we are looking at is less than or equal to INT64_MAX.
+    //
+    // Note: we use src[1] and not src[0] because src[0] is the quote character in this
+    // instance.
+    if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
+  }
+
+  return i;
+}
+
+// Parse any number from  -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t *src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  const uint8_t *p = src + uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while (parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // We go from
+  // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+  // so we can never represent numbers that have more than 19 digits.
+  size_t longest_digit_count = 19;
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > longest_digit_count))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+  // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
+  // Performance note: This check is only needed when digit_count == longest_digit_count but it is
+  // so cheap that we might as well always make it.
+  if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
+  return negative ? (~i+1) : i;
+}
+
+// Parse any number from  -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+// Never read at src_end or beyond
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept {
+  //
+  // Check for minus sign
+  //
+  if(src == src_end) { return NUMBER_ERROR; }
+  bool negative = (*src == '-');
+  const uint8_t *p = src + uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = p;
+  uint64_t i = 0;
+  while ((p != src_end) && parse_digit(*p, i)) { p++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(p - start_digits);
+  // We go from
+  // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+  // so we can never represent numbers that have more than 19 digits.
+  size_t longest_digit_count = 19;
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > longest_digit_count))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
+  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
+  // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
+  // Performance note: This check is only needed when digit_count == longest_digit_count but it is
+  // so cheap that we might as well always make it.
+  if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
+  return negative ? (~i+1) : i;
+}
+
+// Parse any number from  -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer_in_string(const uint8_t *src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*(src + 1) == '-');
+  src += uint8_t(negative) + 1;
+
+  //
+  // Parse the integer part.
+  //
+  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
+  const uint8_t *const start_digits = src;
+  uint64_t i = 0;
+  while (parse_digit(*src, i)) { src++; }
+
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  // Optimization note: size_t is expected to be unsigned.
+  size_t digit_count = size_t(src - start_digits);
+  // We go from
+  // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
+  // so we can never represent numbers that have more than 19 digits.
+  size_t longest_digit_count = 19;
+  // Optimization note: the compiler can probably merge
+  // ((digit_count == 0) || (digit_count > longest_digit_count))
+  // into a single  branch since digit_count is unsigned.
+  if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
+  // Here digit_count > 0.
+  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
+  // We can do the following...
+  // if (!jsoncharutils::is_structural_or_whitespace(*src)) {
+  //  return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
+  // }
+  // as a single table lookup:
+  if(*src != '"') { return NUMBER_ERROR; }
+  // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
+  // Performance note: This check is only needed when digit_count == longest_digit_count but it is
+  // so cheap that we might as well always make it.
+  if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
+  return negative ? (~i+1) : i;
+}
+
+simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  uint64_t i = 0;
+  const uint8_t *p = src;
+  p += parse_digit(*p, i);
+  bool leading_zero = (i == 0);
+  while (parse_digit(*p, i)) { p++; }
+  // no integer digits, or 0123 (zero must be solo)
+  if ( p == src ) { return INCORRECT_TYPE; }
+  if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; }
+
+  //
+  // Parse the decimal part.
+  //
+  int64_t exponent = 0;
+  bool overflow;
+  if (simdjson_likely(*p == '.')) {
+    p++;
+    const uint8_t *start_decimal_digits = p;
+    if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits
+    p++;
+    while (parse_digit(*p, i)) { p++; }
+    exponent = -(p - start_decimal_digits);
+
+    // Overflow check. More than 19 digits (minus the decimal) may be overflow.
+    overflow = p-src-1 > 19;
+    if (simdjson_unlikely(overflow && leading_zero)) {
+      // Skip leading 0.00000 and see if it still overflows
+      const uint8_t *start_digits = src + 2;
+      while (*start_digits == '0') { start_digits++; }
+      overflow = start_digits-src > 19;
+    }
+  } else {
+    overflow = p-src > 19;
+  }
+
+  //
+  // Parse the exponent
+  //
+  if (*p == 'e' || *p == 'E') {
+    p++;
+    bool exp_neg = *p == '-';
+    p += exp_neg || *p == '+';
+
+    uint64_t exp = 0;
+    const uint8_t *start_exp_digits = p;
+    while (parse_digit(*p, exp)) { p++; }
+    // no exp digits, or 20+ exp digits
+    if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }
+
+    exponent += exp_neg ? 0-exp : exp;
+  }
+
+  if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; }
+
+  overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power;
+
+  //
+  // Assemble (or slow-parse) the float
+  //
+  double d;
+  if (simdjson_likely(!overflow)) {
+    if (compute_float_64(exponent, i, negative, d)) { return d; }
+  }
+  if (!parse_float_fallback(src - uint8_t(negative), &d)) {
+    return NUMBER_ERROR;
+  }
+  return d;
+}
+
+simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept {
+  return (*src == '-');
+}
+
+simdjson_unused simdjson_inline simdjson_result<bool> is_integer(const uint8_t * src) noexcept {
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+  const uint8_t *p = src;
+  while(static_cast<uint8_t>(*p - '0') <= 9) { p++; }
+  if ( p == src ) { return NUMBER_ERROR; }
+  if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; }
+  return false;
+}
+
+simdjson_unused simdjson_inline simdjson_result<ondemand::number_type> get_number_type(const uint8_t * src) noexcept {
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+  const uint8_t *p = src;
+  while(static_cast<uint8_t>(*p - '0') <= 9) { p++; }
+  if ( p == src ) { return NUMBER_ERROR; }
+  if (jsoncharutils::is_structural_or_whitespace(*p)) {
+    // We have an integer.
+    // If the number is negative and valid, it must be a signed integer.
+    if(negative) { return ondemand::number_type::signed_integer; }
+    // We want values larger or equal to 9223372036854775808 to be unsigned
+    // integers, and the other values to be signed integers.
+    int digit_count = int(p - src);
+    if(digit_count >= 19) {
+      const uint8_t * smaller_big_integer = reinterpret_cast<const uint8_t *>("9223372036854775808");
+      if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) {
+        return ondemand::number_type::unsigned_integer;
+      }
+    }
+    return ondemand::number_type::signed_integer;
+  }
+  // Hopefully, we have 'e' or 'E' or '.'.
+  return ondemand::number_type::floating_point_number;
+}
+
+// Never read at src_end or beyond
+simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept {
+  if(src == src_end) { return NUMBER_ERROR; }
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  src += uint8_t(negative);
+
+  //
+  // Parse the integer part.
+  //
+  uint64_t i = 0;
+  const uint8_t *p = src;
+  if(p == src_end) { return NUMBER_ERROR; }
+  p += parse_digit(*p, i);
+  bool leading_zero = (i == 0);
+  while ((p != src_end) && parse_digit(*p, i)) { p++; }
+  // no integer digits, or 0123 (zero must be solo)
+  if ( p == src ) { return INCORRECT_TYPE; }
+  if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; }
+
+  //
+  // Parse the decimal part.
+  //
+  int64_t exponent = 0;
+  bool overflow;
+  if (simdjson_likely((p != src_end) && (*p == '.'))) {
+    p++;
+    const uint8_t *start_decimal_digits = p;
+    if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits
+    p++;
+    while ((p != src_end) && parse_digit(*p, i)) { p++; }
+    exponent = -(p - start_decimal_digits);
+
+    // Overflow check. More than 19 digits (minus the decimal) may be overflow.
+    overflow = p-src-1 > 19;
+    if (simdjson_unlikely(overflow && leading_zero)) {
+      // Skip leading 0.00000 and see if it still overflows
+      const uint8_t *start_digits = src + 2;
+      while (*start_digits == '0') { start_digits++; }
+      overflow = start_digits-src > 19;
+    }
+  } else {
+    overflow = p-src > 19;
+  }
+
+  //
+  // Parse the exponent
+  //
+  if ((p != src_end) && (*p == 'e' || *p == 'E')) {
+    p++;
+    if(p == src_end) { return NUMBER_ERROR; }
+    bool exp_neg = *p == '-';
+    p += exp_neg || *p == '+';
+
+    uint64_t exp = 0;
+    const uint8_t *start_exp_digits = p;
+    while ((p != src_end) && parse_digit(*p, exp)) { p++; }
+    // no exp digits, or 20+ exp digits
+    if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }
+
+    exponent += exp_neg ? 0-exp : exp;
+  }
+
+  if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; }
+
+  overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power;
+
+  //
+  // Assemble (or slow-parse) the float
+  //
+  double d;
+  if (simdjson_likely(!overflow)) {
+    if (compute_float_64(exponent, i, negative, d)) { return d; }
+  }
+  if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) {
+    return NUMBER_ERROR;
+  }
+  return d;
+}
+
+simdjson_unused simdjson_inline simdjson_result<double> parse_double_in_string(const uint8_t * src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*(src + 1) == '-');
+  src += uint8_t(negative) + 1;
+
+  //
+  // Parse the integer part.
+  //
+  uint64_t i = 0;
+  const uint8_t *p = src;
+  p += parse_digit(*p, i);
+  bool leading_zero = (i == 0);
+  while (parse_digit(*p, i)) { p++; }
+  // no integer digits, or 0123 (zero must be solo)
+  if ( p == src ) { return INCORRECT_TYPE; }
+  if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; }
+
+  //
+  // Parse the decimal part.
+  //
+  int64_t exponent = 0;
+  bool overflow;
+  if (simdjson_likely(*p == '.')) {
+    p++;
+    const uint8_t *start_decimal_digits = p;
+    if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits
+    p++;
+    while (parse_digit(*p, i)) { p++; }
+    exponent = -(p - start_decimal_digits);
+
+    // Overflow check. More than 19 digits (minus the decimal) may be overflow.
+    overflow = p-src-1 > 19;
+    if (simdjson_unlikely(overflow && leading_zero)) {
+      // Skip leading 0.00000 and see if it still overflows
+      const uint8_t *start_digits = src + 2;
+      while (*start_digits == '0') { start_digits++; }
+      overflow = start_digits-src > 19;
+    }
+  } else {
+    overflow = p-src > 19;
+  }
+
+  //
+  // Parse the exponent
+  //
+  if (*p == 'e' || *p == 'E') {
+    p++;
+    bool exp_neg = *p == '-';
+    p += exp_neg || *p == '+';
+
+    uint64_t exp = 0;
+    const uint8_t *start_exp_digits = p;
+    while (parse_digit(*p, exp)) { p++; }
+    // no exp digits, or 20+ exp digits
+    if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }
+
+    exponent += exp_neg ? 0-exp : exp;
+  }
+
+  if (*p != '"') { return NUMBER_ERROR; }
+
+  overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power;
+
+  //
+  // Assemble (or slow-parse) the float
+  //
+  double d;
+  if (simdjson_likely(!overflow)) {
+    if (compute_float_64(exponent, i, negative, d)) { return d; }
+  }
+  if (!parse_float_fallback(src - uint8_t(negative), &d)) {
+    return NUMBER_ERROR;
+  }
+  return d;
+}
+} //namespace {}
+#endif // SIMDJSON_SKIPNUMBERPARSING
+
+} // namespace numberparsing
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdjson
+/* end file include/simdjson/generic/numberparsing.h */
+
+#endif //  SIMDJSON_WESTMERE_NUMBERPARSING_H
+/* end file include/simdjson/westmere/numberparsing.h */
+/* begin file include/simdjson/westmere/end.h */
+SIMDJSON_UNTARGET_WESTMERE
+/* end file include/simdjson/westmere/end.h */
+
+#endif // SIMDJSON_IMPLEMENTATION_WESTMERE
+#endif // SIMDJSON_WESTMERE_COMMON_H
+/* end file include/simdjson/westmere.h */
+
+// Builtin implementation
+
+SIMDJSON_POP_DISABLE_WARNINGS
+
+#endif // SIMDJSON_IMPLEMENTATIONS_H
+/* end file include/simdjson/implementations.h */
+
+// Determine the best builtin implementation
+#ifndef SIMDJSON_BUILTIN_IMPLEMENTATION
+#if SIMDJSON_CAN_ALWAYS_RUN_ICELAKE
+#define SIMDJSON_BUILTIN_IMPLEMENTATION icelake
+#elif SIMDJSON_CAN_ALWAYS_RUN_HASWELL
+#define SIMDJSON_BUILTIN_IMPLEMENTATION haswell
+#elif SIMDJSON_CAN_ALWAYS_RUN_WESTMERE
+#define SIMDJSON_BUILTIN_IMPLEMENTATION westmere
+#elif SIMDJSON_CAN_ALWAYS_RUN_ARM64
+#define SIMDJSON_BUILTIN_IMPLEMENTATION arm64
+#elif SIMDJSON_CAN_ALWAYS_RUN_PPC64
+#define SIMDJSON_BUILTIN_IMPLEMENTATION ppc64
+#elif SIMDJSON_CAN_ALWAYS_RUN_FALLBACK
+#define SIMDJSON_BUILTIN_IMPLEMENTATION fallback
+#else
+#error "All possible implementations (including fallback) have been disabled! simdjson will not run."
+#endif
+#endif // SIMDJSON_BUILTIN_IMPLEMENTATION
+
+// redefining SIMDJSON_IMPLEMENTATION to "SIMDJSON_BUILTIN_IMPLEMENTATION"
+// #define SIMDJSON_IMPLEMENTATION SIMDJSON_BUILTIN_IMPLEMENTATION
+
+// ondemand is only compiled as part of the builtin implementation at present
+
+// Interface declarations
+/* begin file include/simdjson/generic/implementation_simdjson_result_base.h */
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+
+// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair
+// so we can avoid inlining errors
+// TODO reconcile these!
+/**
+ * The result of a simdjson operation that could fail.
+ *
+ * Gives the option of reading error codes, or throwing an exception by casting to the desired result.
+ *
+ * This is a base class for implementations that want to add functions to the result type for
+ * chaining.
+ *
+ * Override like:
+ *
+ *   struct simdjson_result<T> : public internal::implementation_simdjson_result_base<T> {
+ *     simdjson_result() noexcept : internal::implementation_simdjson_result_base<T>() {}
+ *     simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base<T>(error) {}
+ *     simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base<T>(std::forward(value)) {}
+ *     simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base<T>(value, error) {}
+ *     // Your extra methods here
+ *   }
+ *
+ * Then any method returning simdjson_result<T> will be chainable with your methods.
+ */
+template<typename T>
+struct implementation_simdjson_result_base {
+
+  /**
+   * Create a new empty result with error = UNINITIALIZED.
+   */
+  simdjson_inline implementation_simdjson_result_base() noexcept = default;
+
+  /**
+   * Create a new error result.
+   */
+  simdjson_inline implementation_simdjson_result_base(error_code error) noexcept;
+
+  /**
+   * Create a new successful result.
+   */
+  simdjson_inline implementation_simdjson_result_base(T &&value) noexcept;
+
+  /**
+   * Create a new result with both things (use if you don't want to branch when creating the result).
+   */
+  simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept;
+
+  /**
+   * Move the value and the error to the provided variables.
+   *
+   * @param value The variable to assign the value to. May not be set if there is an error.
+   * @param error The variable to assign the error to. Set to SUCCESS if there is no error.
+   */
+  simdjson_inline void tie(T &value, error_code &error) && noexcept;
+
+  /**
+   * Move the value to the provided variable.
+   *
+   * @param value The variable to assign the value to. May not be set if there is an error.
+   */
+  simdjson_inline error_code get(T &value) && noexcept;
+
+  /**
+   * The error.
+   */
+  simdjson_inline error_code error() const noexcept;
+
+#if SIMDJSON_EXCEPTIONS
+
+  /**
+   * Get the result value.
+   *
+   * @throw simdjson_error if there was an error.
+   */
+  simdjson_inline T& value() & noexcept(false);
+
+  /**
+   * Take the result value (move it).
+   *
+   * @throw simdjson_error if there was an error.
+   */
+  simdjson_inline T&& value() && noexcept(false);
+
+  /**
+   * Take the result value (move it).
+   *
+   * @throw simdjson_error if there was an error.
+   */
+  simdjson_inline T&& take_value() && noexcept(false);
+
+  /**
+   * Cast to the value (will throw on error).
+   *
+   * @throw simdjson_error if there was an error.
+   */
+  simdjson_inline operator T&&() && noexcept(false);
+
+
+#endif // SIMDJSON_EXCEPTIONS
+
+  /**
+   * Get the result value. This function is safe if and only
+   * the error() method returns a value that evaluates to false.
+   */
+  simdjson_inline const T& value_unsafe() const& noexcept;
+  /**
+   * Get the result value. This function is safe if and only
+   * the error() method returns a value that evaluates to false.
+   */
+  simdjson_inline T& value_unsafe() & noexcept;
+  /**
+   * Take the result value (move it). This function is safe if and only
+   * the error() method returns a value that evaluates to false.
+   */
+  simdjson_inline T&& value_unsafe() && noexcept;
+protected:
+  /** users should never directly access first and second. **/
+  T first{}; /** Users should never directly access 'first'. **/
+  error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/
+}; // struct implementation_simdjson_result_base
+
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+/* end file include/simdjson/generic/implementation_simdjson_result_base.h */
+/* begin file include/simdjson/generic/ondemand.h */
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+/**
+ * A fast, simple, DOM-like interface that parses JSON as you use it.
+ *
+ * Designed for maximum speed and a lower memory profile.
+ */
+namespace ondemand {
+
+/** Represents the depth of a JSON value (number of nested arrays/objects). */
+using depth_t = int32_t;
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+/* begin file include/simdjson/generic/ondemand/json_type.h */
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+/**
+ * The type of a JSON value.
+ */
+enum class json_type {
+    // Start at 1 to catch uninitialized / default values more easily
+    array=1, ///< A JSON array   ( [ 1, 2, 3 ... ] )
+    object,  ///< A JSON object  ( { "a": 1, "b" 2, ... } )
+    number,  ///< A JSON number  ( 1 or -2.3 or 4.5e6 ...)
+    string,  ///< A JSON string  ( "a" or "hello world\n" ...)
+    boolean, ///< A JSON boolean (true or false)
+    null     ///< A JSON null    (null)
+};
+
+class value_iterator;
+
+/**
+ * A type representing a JSON number.
+ * The design of the struct is deliberately straight-forward. All
+ * functions return standard values with no error check.
+ */
+struct number {
+
+  /**
+   * return the automatically determined type of
+   * the number: number_type::floating_point_number,
+   * number_type::signed_integer or number_type::unsigned_integer.
+   *
+   *    enum class number_type {
+   *        floating_point_number=1, /// a binary64 number
+   *        signed_integer,          /// a signed integer that fits in a 64-bit word using two's complement
+   *        unsigned_integer         /// a positive integer larger or equal to 1<<63
+   *    };
+   */
+  simdjson_inline number_type get_number_type() const noexcept;
+  /**
+   * return true if the automatically determined type of
+   * the number is number_type::unsigned_integer.
+   */
+  simdjson_inline bool is_uint64() const noexcept;
+  /**
+   * return the value as a uint64_t, only valid if is_uint64() is true.
+   */
+  simdjson_inline uint64_t get_uint64() const noexcept;
+  simdjson_inline operator uint64_t() const noexcept;
+
+  /**
+   * return true if the automatically determined type of
+   * the number is number_type::signed_integer.
+   */
+  simdjson_inline bool is_int64() const noexcept;
+  /**
+   * return the value as a int64_t, only valid if is_int64() is true.
+   */
+  simdjson_inline int64_t get_int64() const noexcept;
+  simdjson_inline operator int64_t() const noexcept;
+
+
+  /**
+   * return true if the automatically determined type of
+   * the number is number_type::floating_point_number.
+   */
+  simdjson_inline bool is_double() const noexcept;
+  /**
+   * return the value as a double, only valid if is_double() is true.
+   */
+  simdjson_inline double get_double() const noexcept;
+  simdjson_inline operator double() const noexcept;
+
+  /**
+   * Convert the number to a double. Though it always succeed, the conversion
+   * may be lossy if the number cannot be represented exactly.
+   */
+  simdjson_inline double as_double() const noexcept;
+
+
+protected:
+  /**
+   * The next block of declaration is designed so that we can call the number parsing
+   * functions on a number type. They are protected and should never be used outside
+   * of the core simdjson library.
+   */
+  friend class value_iterator;
+  template<typename W>
+  friend error_code numberparsing::write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer);
+  template<typename W>
+  friend error_code numberparsing::parse_number(const uint8_t *const src, W &writer);
+  template<typename W>
+  friend error_code numberparsing::slow_float_parsing(simdjson_unused const uint8_t * src, W writer);
+  /** Store a signed 64-bit value to the number. */
+  simdjson_inline void append_s64(int64_t value) noexcept;
+  /** Store an unsigned 64-bit value to the number. */
+  simdjson_inline void append_u64(uint64_t value) noexcept;
+  /** Store a double value to the number. */
+  simdjson_inline void append_double(double value) noexcept;
+  /** Specifies that the value is a double, but leave it undefined. */
+  simdjson_inline void skip_double() noexcept;
+  /**
+   * End of friend declarations.
+   */
+
+  /**
+   * Our attributes are a union type (size = 64 bits)
+   * followed by a type indicator.
+   */
+  union {
+    double floating_point_number;
+    int64_t signed_integer;
+    uint64_t unsigned_integer;
+  } payload{0};
+  number_type type{number_type::signed_integer};
+};
+
+/**
+ * Write the JSON type to the output stream
+ *
+ * @param out The output stream.
+ * @param type The json_type.
+ */
+inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept;
+inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept;
+
+#if SIMDJSON_EXCEPTIONS
+/**
+ * Send JSON type to an output stream.
+ *
+ * @param out The output stream.
+ * @param type The json_type.
+ * @throw simdjson_error if the result being printed has an error. If there is an error with the
+ *        underlying output stream, that error will be propagated (simdjson_error will not be
+ *        thrown).
+ */
+inline std::ostream& operator<<(std::ostream& out, simdjson_result<json_type> &type) noexcept(false);
+#endif
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+template<>
+struct simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type> : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type> {
+public:
+  simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type &&value) noexcept; ///< @private
+  simdjson_inline simdjson_result(error_code error) noexcept; ///< @private
+  simdjson_inline simdjson_result() noexcept = default;
+  simdjson_inline ~simdjson_result() noexcept = default; ///< @private
+};
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/json_type.h */
+/* begin file include/simdjson/generic/ondemand/token_position.h */
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+/** @private Position in the JSON buffer indexes */
+using token_position = const uint32_t *;
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/token_position.h */
+/* begin file include/simdjson/generic/ondemand/logger.h */
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+class json_iterator;
+class value_iterator;
+
+namespace logger {
+
+#if SIMDJSON_VERBOSE_LOGGING
+  static constexpr const bool LOG_ENABLED = true;
+#else
+  static constexpr const bool LOG_ENABLED = false;
+#endif
+
+// We do not want these functions to be 'really inlined' since real inlining is
+// for performance purposes and if you are using the loggers, you do not care about
+// performance (or should not).
+static inline void log_headers() noexcept;
+static inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail) noexcept;
+static inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept;
+static inline void log_event(const json_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept;
+static inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept;
+static inline void log_value(const json_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept;
+static inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept;
+static inline void log_start_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept;
+static inline void log_end_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept;
+static inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail="") noexcept;
+static inline void log_error(const json_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept;
+
+static inline void log_event(const value_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept;
+static inline void log_value(const value_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept;
+static inline void log_start_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept;
+static inline void log_end_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept;
+static inline void log_error(const value_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept;
+
+} // namespace logger
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/logger.h */
+/* begin file include/simdjson/generic/ondemand/raw_json_string.h */
+
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+class object;
+class parser;
+class json_iterator;
+
+/**
+ * A string escaped per JSON rules, terminated with quote ("). They are used to represent
+ * unescaped keys inside JSON documents.
+ *
+ * (In other words, a pointer to the beginning of a string, just after the start quote, inside a
+ * JSON file.)
+ *
+ * This class is deliberately simplistic and has little functionality. You can
+ * compare a raw_json_string instance with an unescaped C string, but
+ * that is nearly all you can do.
+ *
+ * The raw_json_string is unescaped. If you wish to write an unescaped version of it to your own
+ * buffer, you may do so using the parser.unescape(string, buff) method, using an ondemand::parser
+ * instance. Doing so requires you to have a sufficiently large buffer.
+ *
+ * The raw_json_string instances originate typically from field instance which in turn represent
+ * key-value pairs from object instances. From a field instance, you get the raw_json_string
+ * instance by calling key(). You can, if you want a more usable string_view instance, call
+ * the unescaped_key() method on the field instance. You may also create a raw_json_string from
+ * any other string value, with the value.get_raw_json_string() method. Again, you can get
+ * a more usable string_view instance by calling get_string().
+ *
+ */
+class raw_json_string {
+public:
+  /**
+   * Create a new invalid raw_json_string.
+   *
+   * Exists so you can declare a variable and later assign to it before use.
+   */
+  simdjson_inline raw_json_string() noexcept = default;
+
+  /**
+   * Create a new invalid raw_json_string pointed at the given location in the JSON.
+   *
+   * The given location must be just *after* the beginning quote (") in the JSON file.
+   *
+   * It *must* be terminated by a ", and be a valid JSON string.
+   */
+  simdjson_inline raw_json_string(const uint8_t * _buf) noexcept;
+  /**
+   * Get the raw pointer to the beginning of the string in the JSON (just after the ").
+   *
+   * It is possible for this function to return a null pointer if the instance
+   * has outlived its existence.
+   */
+  simdjson_inline const char * raw() const noexcept;
+
+  /**
+   * This compares the current instance to the std::string_view target: returns true if
+   * they are byte-by-byte equal (no escaping is done) on target.size() characters,
+   * and if the raw_json_string instance has a quote character at byte index target.size().
+   * We never read more than length + 1 bytes in the raw_json_string instance.
+   * If length is smaller than target.size(), this will return false.
+   *
+   * The std::string_view instance may contain any characters. However, the caller
+   * is responsible for setting length so that length bytes may be read in the
+   * raw_json_string.
+   *
+   * Performance: the comparison may be done using memcmp which may be efficient
+   * for long strings.
+   */
+  simdjson_inline bool unsafe_is_equal(size_t length, std::string_view target) const noexcept;
+
+  /**
+   * This compares the current instance to the std::string_view target: returns true if
+   * they are byte-by-byte equal (no escaping is done).
+   * The std::string_view instance should not contain unescaped quote characters:
+   * the caller is responsible for this check. See is_free_from_unescaped_quote.
+   *
+   * Performance: the comparison is done byte-by-byte which might be inefficient for
+   * long strings.
+   *
+   * If target is a compile-time constant, and your compiler likes you,
+   * you should be able to do the following without performance penalty...
+   *
+   *   static_assert(raw_json_string::is_free_from_unescaped_quote(target), "");
+   *   s.unsafe_is_equal(target);
+   */
+  simdjson_inline bool unsafe_is_equal(std::string_view target) const noexcept;
+
+  /**
+   * This compares the current instance to the C string target: returns true if
+   * they are byte-by-byte equal (no escaping is done).
+   * The provided C string should not contain an unescaped quote character:
+   * the caller is responsible for this check. See is_free_from_unescaped_quote.
+   *
+   * If target is a compile-time constant, and your compiler likes you,
+   * you should be able to do the following without performance penalty...
+   *
+   *   static_assert(raw_json_string::is_free_from_unescaped_quote(target), "");
+   *   s.unsafe_is_equal(target);
+   */
+  simdjson_inline bool unsafe_is_equal(const char* target) const noexcept;
+
+  /**
+   * This compares the current instance to the std::string_view target: returns true if
+   * they are byte-by-byte equal (no escaping is done).
+   */
+  simdjson_inline bool is_equal(std::string_view target) const noexcept;
+
+  /**
+   * This compares the current instance to the C string target: returns true if
+   * they are byte-by-byte equal (no escaping is done).
+   */
+  simdjson_inline bool is_equal(const char* target) const noexcept;
+
+  /**
+   * Returns true if target is free from unescaped quote. If target is known at
+   * compile-time, we might expect the computation to happen at compile time with
+   * many compilers (not all!).
+   */
+  static simdjson_inline bool is_free_from_unescaped_quote(std::string_view target) noexcept;
+  static simdjson_inline bool is_free_from_unescaped_quote(const char* target) noexcept;
+
+private:
+
+
+  /**
+   * This will set the inner pointer to zero, effectively making
+   * this instance unusable.
+   */
+  simdjson_inline void consume() noexcept { buf = nullptr; }
+
+  /**
+   * Checks whether the inner pointer is non-null and thus usable.
+   */
+  simdjson_inline simdjson_warn_unused bool alive() const noexcept { return buf != nullptr; }
+
+  /**
+   * Unescape this JSON string, replacing \\ with \, \n with newline, etc.
+   *
+   * ## IMPORTANT: string_view lifetime
+   *
+   * The string_view is only valid until the next parse() call on the parser.
+   *
+   * @param iter A json_iterator, which contains a buffer where the string will be written.
+   */
+  simdjson_inline simdjson_warn_unused simdjson_result<std::string_view> unescape(json_iterator &iter) const noexcept;
+
+  const uint8_t * buf{};
+  friend class object;
+  friend class field;
+  friend class parser;
+  friend struct simdjson_result<raw_json_string>;
+};
+
+simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &, const raw_json_string &) noexcept;
+
+/**
+ * Comparisons between raw_json_string and std::string_view instances are potentially unsafe: the user is responsible
+ * for providing a string with no unescaped quote. Note that unescaped quotes cannot be present in valid JSON strings.
+ */
+simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept;
+simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept;
+simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept;
+simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept;
+
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+template<>
+struct simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string> : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string> {
+public:
+  simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string &&value) noexcept; ///< @private
+  simdjson_inline simdjson_result(error_code error) noexcept; ///< @private
+  simdjson_inline simdjson_result() noexcept = default;
+  simdjson_inline ~simdjson_result() noexcept = default; ///< @private
+
+  simdjson_inline simdjson_result<const char *> raw() const noexcept;
+  simdjson_inline simdjson_warn_unused simdjson_result<std::string_view> unescape(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator &iter) const noexcept;
+};
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/raw_json_string.h */
+/* begin file include/simdjson/generic/ondemand/token_iterator.h */
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+/**
+ * Iterates through JSON tokens (`{` `}` `[` `]` `,` `:` `"<string>"` `123` `true` `false` `null`)
+ * detected by stage 1.
+ *
+ * @private This is not intended for external use.
+ */
+class token_iterator {
+public:
+  /**
+   * Create a new invalid token_iterator.
+   *
+   * Exists so you can declare a variable and later assign to it before use.
+   */
+  simdjson_inline token_iterator() noexcept = default;
+  simdjson_inline token_iterator(token_iterator &&other) noexcept = default;
+  simdjson_inline token_iterator &operator=(token_iterator &&other) noexcept = default;
+  simdjson_inline token_iterator(const token_iterator &other) noexcept = default;
+  simdjson_inline token_iterator &operator=(const token_iterator &other) noexcept = default;
+
+  /**
+   * Advance to the next token (returning the current one).
+   */
+  simdjson_inline const uint8_t *return_current_and_advance() noexcept;
+  /**
+   * Reports the current offset in bytes from the start of the underlying buffer.
+   */
+  simdjson_inline uint32_t current_offset() const noexcept;
+  /**
+   * Get the JSON text for a given token (relative).
+   *
+   * This is not null-terminated; it is a view into the JSON.
+   *
+   * @param delta The relative position of the token to retrieve. e.g. 0 = current token,
+   *              1 = next token, -1 = prev token.
+   *
+   * TODO consider a string_view, assuming the length will get stripped out by the optimizer when
+   * it isn't used ...
+   */
+  simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept;
+  /**
+   * Get the maximum length of the JSON text for a given token.
+   *
+   * The length will include any whitespace at the end of the token.
+   *
+   * @param delta The relative position of the token to retrieve. e.g. 0 = current token,
+   *              1 = next token, -1 = prev token.
+   */
+  simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept;
+
+  /**
+   * Get the JSON text for a given token.
+   *
+   * This is not null-terminated; it is a view into the JSON.
+   *
+   * @param position The position of the token.
+   *
+   */
+  simdjson_inline const uint8_t *peek(token_position position) const noexcept;
+  /**
+   * Get the maximum length of the JSON text for a given token.
+   *
+   * The length will include any whitespace at the end of the token.
+   *
+   * @param position The position of the token.
+   */
+  simdjson_inline uint32_t peek_length(token_position position) const noexcept;
+
+  /**
+   * Return the current index.
+   */
+  simdjson_inline token_position position() const noexcept;
+  /**
+   * Reset to a previously saved index.
+   */
+  simdjson_inline void set_position(token_position target_position) noexcept;
+
+  // NOTE: we don't support a full C++ iterator interface, because we expect people to make
+  // different calls to advance the iterator based on *their own* state.
+
+  simdjson_inline bool operator==(const token_iterator &other) const noexcept;
+  simdjson_inline bool operator!=(const token_iterator &other) const noexcept;
+  simdjson_inline bool operator>(const token_iterator &other) const noexcept;
+  simdjson_inline bool operator>=(const token_iterator &other) const noexcept;
+  simdjson_inline bool operator<(const token_iterator &other) const noexcept;
+  simdjson_inline bool operator<=(const token_iterator &other) const noexcept;
+
+protected:
+  simdjson_inline token_iterator(const uint8_t *buf, token_position position) noexcept;
+
+  /**
+   * Get the index of the JSON text for a given token (relative).
+   *
+   * This is not null-terminated; it is a view into the JSON.
+   *
+   * @param delta The relative position of the token to retrieve. e.g. 0 = current token,
+   *              1 = next token, -1 = prev token.
+   */
+  simdjson_inline uint32_t peek_index(int32_t delta=0) const noexcept;
+  /**
+   * Get the index of the JSON text for a given token.
+   *
+   * This is not null-terminated; it is a view into the JSON.
+   *
+   * @param position The position of the token.
+   *
+   */
+  simdjson_inline uint32_t peek_index(token_position position) const noexcept;
+
+  const uint8_t *buf{};
+  token_position _position{};
+
+  friend class json_iterator;
+  friend class value_iterator;
+  friend class object;
+  friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept;
+  friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail) noexcept;
+};
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+template<>
+struct simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::token_iterator> : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::token_iterator> {
+public:
+  simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::token_iterator &&value) noexcept; ///< @private
+  simdjson_inline simdjson_result(error_code error) noexcept; ///< @private
+  simdjson_inline simdjson_result() noexcept = default;
+  simdjson_inline ~simdjson_result() noexcept = default; ///< @private
+};
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/token_iterator.h */
+/* begin file include/simdjson/generic/ondemand/json_iterator.h */
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+class document;
+class document_stream;
+class object;
+class array;
+class value;
+class raw_json_string;
+class parser;
+
+/**
+ * Iterates through JSON tokens, keeping track of depth and string buffer.
+ *
+ * @private This is not intended for external use.
+ */
+class json_iterator {
+protected:
+  token_iterator token{};
+  ondemand::parser *parser{};
+  /**
+   * Next free location in the string buffer.
+   *
+   * Used by raw_json_string::unescape() to have a place to unescape strings to.
+   */
+  uint8_t *_string_buf_loc{};
+  /**
+   * JSON error, if there is one.
+   *
+   * INCORRECT_TYPE and NO_SUCH_FIELD are *not* stored here, ever.
+   *
+   * PERF NOTE: we *hope* this will be elided into control flow, as it is only used (a) in the first
+   * iteration of the loop, or (b) for the final iteration after a missing comma is found in ++. If
+   * this is not elided, we should make sure it's at least not using up a register. Failing that,
+   * we should store it in document so there's only one of them.
+   */
+  error_code error{SUCCESS};
+  /**
+   * Depth of the current token in the JSON.
+   *
+   * - 0 = finished with document
+   * - 1 = document root value (could be [ or {, not yet known)
+   * - 2 = , or } inside root array/object
+   * - 3 = key or value inside root array/object.
+   */
+  depth_t _depth{};
+  /**
+   * Beginning of the document indexes.
+   * Normally we have root == parser->implementation->structural_indexes.get()
+   * but this may differ, especially in streaming mode (where we have several
+   * documents);
+   */
+  token_position _root{};
+  /**
+   * Normally, a json_iterator operates over a single document, but in
+   * some cases, we may have a stream of documents. This attribute is meant
+   * as meta-data: the json_iterator works the same irrespective of the
+   * value of this attribute.
+   */
+  bool _streaming{false};
+
+public:
+  simdjson_inline json_iterator() noexcept = default;
+  simdjson_inline json_iterator(json_iterator &&other) noexcept;
+  simdjson_inline json_iterator &operator=(json_iterator &&other) noexcept;
+  simdjson_inline explicit json_iterator(const json_iterator &other) noexcept = default;
+  simdjson_inline json_iterator &operator=(const json_iterator &other) noexcept = default;
+  /**
+   * Skips a JSON value, whether it is a scalar, array or object.
+   */
+  simdjson_warn_unused simdjson_inline error_code skip_child(depth_t parent_depth) noexcept;
+
+  /**
+   * Tell whether the iterator is still at the start
+   */
+  simdjson_inline bool at_root() const noexcept;
+
+  /**
+   * Tell whether we should be expected to run in streaming
+   * mode (iterating over many documents). It is pure metadata
+   * that does not affect how the iterator works. It is used by
+   * start_root_array() and start_root_object().
+   */
+  simdjson_inline bool streaming() const noexcept;
+
+  /**
+   * Get the root value iterator
+   */
+  simdjson_inline token_position root_position() const noexcept;
+  /**
+   * Assert that we are at the document depth (== 1)
+   */
+  simdjson_inline void assert_at_document_depth() const noexcept;
+  /**
+   * Assert that we are at the root of the document
+   */
+  simdjson_inline void assert_at_root() const noexcept;
+
+  /**
+   * Tell whether the iterator is at the EOF mark
+   */
+  simdjson_inline bool at_end() const noexcept;
+
+  /**
+   * Tell whether the iterator is live (has not been moved).
+   */
+  simdjson_inline bool is_alive() const noexcept;
+
+  /**
+   * Abandon this iterator, setting depth to 0 (as if the document is finished).
+   */
+  simdjson_inline void abandon() noexcept;
+
+  /**
+   * Advance the current token without modifying depth.
+   */
+  simdjson_inline const uint8_t *return_current_and_advance() noexcept;
+
+  /**
+   * Returns true if there is a single token in the index (i.e., it is
+   * a JSON with a scalar value such as a single number).
+   *
+   * @return whether there is a single token
+   */
+  simdjson_inline bool is_single_token() const noexcept;
+
+  /**
+   * Assert that there are at least the given number of tokens left.
+   *
+   * Has no effect in release builds.
+   */
+  simdjson_inline void assert_more_tokens(uint32_t required_tokens=1) const noexcept;
+  /**
+   * Assert that the given position addresses an actual token (is within bounds).
+   *
+   * Has no effect in release builds.
+   */
+  simdjson_inline void assert_valid_position(token_position position) const noexcept;
+  /**
+   * Get the JSON text for a given token (relative).
+   *
+   * This is not null-terminated; it is a view into the JSON.
+   *
+   * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token.
+   *
+   * TODO consider a string_view, assuming the length will get stripped out by the optimizer when
+   * it isn't used ...
+   */
+  simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept;
+  /**
+   * Get the maximum length of the JSON text for the current token (or relative).
+   *
+   * The length will include any whitespace at the end of the token.
+   *
+   * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token.
+   */
+  simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept;
+  /**
+   * Get a pointer to the current location in the input buffer.
+   *
+   * This is not null-terminated; it is a view into the JSON.
+   *
+   * You may be pointing outside of the input buffer: it is not generally
+   * safe to dereference this pointer.
+   */
+  simdjson_inline const uint8_t *unsafe_pointer() const noexcept;
+  /**
+   * Get the JSON text for a given token.
+   *
+   * This is not null-terminated; it is a view into the JSON.
+   *
+   * @param position The position of the token to retrieve.
+   *
+   * TODO consider a string_view, assuming the length will get stripped out by the optimizer when
+   * it isn't used ...
+   */
+  simdjson_inline const uint8_t *peek(token_position position) const noexcept;
+  /**
+   * Get the maximum length of the JSON text for the current token (or relative).
+   *
+   * The length will include any whitespace at the end of the token.
+   *
+   * @param position The position of the token to retrieve.
+   */
+  simdjson_inline uint32_t peek_length(token_position position) const noexcept;
+  /**
+   * Get the JSON text for the last token in the document.
+   *
+   * This is not null-terminated; it is a view into the JSON.
+   *
+   * TODO consider a string_view, assuming the length will get stripped out by the optimizer when
+   * it isn't used ...
+   */
+  simdjson_inline const uint8_t *peek_last() const noexcept;
+
+  /**
+   * Ascend one level.
+   *
+   * Validates that the depth - 1 == parent_depth.
+   *
+   * @param parent_depth the expected parent depth.
+   */
+  simdjson_inline void ascend_to(depth_t parent_depth) noexcept;
+
+  /**
+   * Descend one level.
+   *
+   * Validates that the new depth == child_depth.
+   *
+   * @param child_depth the expected child depth.
+   */
+  simdjson_inline void descend_to(depth_t child_depth) noexcept;
+  simdjson_inline void descend_to(depth_t child_depth, int32_t delta) noexcept;
+
+  /**
+   * Get current depth.
+   */
+  simdjson_inline depth_t depth() const noexcept;
+
+  /**
+   * Get current (writeable) location in the string buffer.
+   */
+  simdjson_inline uint8_t *&string_buf_loc() noexcept;
+
+  /**
+   * Report an unrecoverable error, preventing further iteration.
+   *
+   * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD.
+   * @param message An error message to report with the error.
+   */
+  simdjson_inline error_code report_error(error_code error, const char *message) noexcept;
+
+  /**
+   * Log error, but don't stop iteration.
+   * @param error The error to report. Must be INCORRECT_TYPE, or NO_SUCH_FIELD.
+   * @param message An error message to report with the error.
+   */
+  simdjson_inline error_code optional_error(error_code error, const char *message) noexcept;
+
+  template<int N> simdjson_warn_unused simdjson_inline bool copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t (&tmpbuf)[N]) noexcept;
+
+  simdjson_inline token_position position() const noexcept;
+  /**
+   * Write the raw_json_string to the string buffer and return a string_view.
+   * Each raw_json_string should be unescaped once, or else the string buffer might
+   * overflow.
+   */
+  simdjson_inline simdjson_result<std::string_view> unescape(raw_json_string in) noexcept;
+  simdjson_inline void reenter_child(token_position position, depth_t child_depth) noexcept;
+
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  simdjson_inline token_position start_position(depth_t depth) const noexcept;
+  simdjson_inline void set_start_position(depth_t depth, token_position position) noexcept;
+#endif
+
+  /* Useful for debugging and logging purposes. */
+  inline std::string to_string() const noexcept;
+
+  /**
+   * Returns the current location in the document if in bounds.
+   */
+  inline simdjson_result<const char *> current_location() noexcept;
+
+  /**
+   * Updates this json iterator so that it is back at the beginning of the document,
+   * as if it had just been created.
+   */
+  inline void rewind() noexcept;
+  /**
+   * This checks whether the {,},[,] are balanced so that the document
+   * ends with proper zero depth. This requires scanning the whole document
+   * and it may be expensive. It is expected that it will be rarely called.
+   * It does not attempt to match { with } and [ with ].
+   */
+  inline bool balanced() const noexcept;
+protected:
+  simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept;
+  /// The last token before the end
+  simdjson_inline token_position last_position() const noexcept;
+  /// The token *at* the end. This points at gibberish and should only be used for comparison.
+  simdjson_inline token_position end_position() const noexcept;
+  /// The end of the buffer.
+  simdjson_inline token_position end() const noexcept;
+
+  friend class document;
+  friend class document_stream;
+  friend class object;
+  friend class array;
+  friend class value;
+  friend class raw_json_string;
+  friend class parser;
+  friend class value_iterator;
+  friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept;
+  friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail) noexcept;
+}; // json_iterator
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+template<>
+struct simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator> : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator> {
+public:
+  simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator &&value) noexcept; ///< @private
+  simdjson_inline simdjson_result(error_code error) noexcept; ///< @private
+
+  simdjson_inline simdjson_result() noexcept = default;
+};
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/json_iterator.h */
+/* begin file include/simdjson/generic/ondemand/value_iterator.h */
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+class document;
+class object;
+class array;
+class value;
+class raw_json_string;
+class parser;
+
+/**
+ * Iterates through a single JSON value at a particular depth.
+ *
+ * Does not keep track of the type of value: provides methods for objects, arrays and scalars and expects
+ * the caller to call the right ones.
+ *
+ * @private This is not intended for external use.
+ */
+class value_iterator {
+protected:
+  /** The underlying JSON iterator */
+  json_iterator *_json_iter{};
+  /** The depth of this value */
+  depth_t _depth{};
+  /**
+   * The starting token index for this value
+   */
+  token_position _start_position{};
+
+public:
+  simdjson_inline value_iterator() noexcept = default;
+
+  /**
+   * Denote that we're starting a document.
+   */
+  simdjson_inline void start_document() noexcept;
+
+  /**
+   * Skips a non-iterated or partially-iterated JSON value, whether it is a scalar, array or object.
+   *
+   * Optimized for scalars.
+   */
+  simdjson_warn_unused simdjson_inline error_code skip_child() noexcept;
+
+  /**
+   * Tell whether the iterator is at the EOF mark
+   */
+  simdjson_inline bool at_end() const noexcept;
+
+  /**
+   * Tell whether the iterator is at the start of the value
+   */
+  simdjson_inline bool at_start() const noexcept;
+
+  /**
+   * Tell whether the value is open--if the value has not been used, or the array/object is still open.
+   */
+  simdjson_inline bool is_open() const noexcept;
+
+  /**
+   * Tell whether the value is at an object's first field (just after the {).
+   */
+  simdjson_inline bool at_first_field() const noexcept;
+
+  /**
+   * Abandon all iteration.
+   */
+  simdjson_inline void abandon() noexcept;
+
+  /**
+   * Get the child value as a value_iterator.
+   */
+  simdjson_inline value_iterator child_value() const noexcept;
+
+  /**
+   * Get the depth of this value.
+   */
+  simdjson_inline int32_t depth() const noexcept;
+
+  /**
+   * Get the JSON type of this value.
+   *
+   * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse".
+   */
+  simdjson_inline simdjson_result<json_type> type() const noexcept;
+
+  /**
+   * @addtogroup object Object iteration
+   *
+   * Methods to iterate and find object fields. These methods generally *assume* the value is
+   * actually an object; the caller is responsible for keeping track of that fact.
+   *
+   * @{
+   */
+
+  /**
+   * Start an object iteration.
+   *
+   * @returns Whether the object had any fields (returns false for empty).
+   * @error INCORRECT_TYPE if there is no opening {
+   */
+  simdjson_warn_unused simdjson_inline simdjson_result<bool> start_object() noexcept;
+  /**
+   * Start an object iteration from the root.
+   *
+   * @returns Whether the object had any fields (returns false for empty).
+   * @error INCORRECT_TYPE if there is no opening {
+   * @error TAPE_ERROR if there is no matching } at end of document
+   */
+  simdjson_warn_unused simdjson_inline simdjson_result<bool> start_root_object() noexcept;
+
+  /**
+   * Start an object iteration after the user has already checked and moved past the {.
+   *
+   * Does not move the iterator unless the object is empty ({}).
+   *
+   * @returns Whether the object had any fields (returns false for empty).
+   * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent*
+   *        array or object is incomplete).
+   */
+  simdjson_warn_unused simdjson_inline simdjson_result<bool> started_object() noexcept;
+  /**
+   * Start an object iteration from the root, after the user has already checked and moved past the {.
+   *
+   * Does not move the iterator unless the object is empty ({}).
+   *
+   * @returns Whether the object had any fields (returns false for empty).
+   * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent*
+   *        array or object is incomplete).
+   */
+  simdjson_warn_unused simdjson_inline simdjson_result<bool> started_root_object() noexcept;
+
+  /**
+   * Moves to the next field in an object.
+   *
+   * Looks for , and }. If } is found, the object is finished and the iterator advances past it.
+   * Otherwise, it advances to the next value.
+   *
+   * @return whether there is another field in the object.
+   * @error TAPE_ERROR If there is a comma missing between fields.
+   * @error TAPE_ERROR If there is a comma, but not enough tokens remaining to have a key, :, and value.
+   */
+  simdjson_warn_unused simdjson_inline simdjson_result<bool> has_next_field() noexcept;
+
+  /**
+   * Get the current field's key.
+   */
+  simdjson_warn_unused simdjson_inline simdjson_result<raw_json_string> field_key() noexcept;
+
+  /**
+   * Pass the : in the field and move to its value.
+   */
+  simdjson_warn_unused simdjson_inline error_code field_value() noexcept;
+
+  /**
+   * Find the next field with the given key.
+   *
+   * Assumes you have called next_field() or otherwise matched the previous value.
+   *
+   * This means the iterator must be sitting at the next key:
+   *
+   * ```
+   * { "a": 1, "b": 2 }
+   *           ^
+   * ```
+   *
+   * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to
+   * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may
+   * fail to match some keys with escapes (\u, \n, etc.).
+   */
+  simdjson_warn_unused simdjson_inline error_code find_field(const std::string_view key) noexcept;
+
+  /**
+   * Find the next field with the given key, *without* unescaping. This assumes object order: it
+   * will not find the field if it was already passed when looking for some *other* field.
+   *
+   * Assumes you have called next_field() or otherwise matched the previous value.
+   *
+   * This means the iterator must be sitting at the next key:
+   *
+   * ```
+   * { "a": 1, "b": 2 }
+   *           ^
+   * ```
+   *
+   * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to
+   * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may
+   * fail to match some keys with escapes (\u, \n, etc.).
+   */
+  simdjson_warn_unused simdjson_inline simdjson_result<bool> find_field_raw(const std::string_view key) noexcept;
+
+  /**
+   * Find the field with the given key without regard to order, and *without* unescaping.
+   *
+   * This is an unordered object lookup: if the field is not found initially, it will cycle around and scan from the beginning.
+   *
+   * Assumes you have called next_field() or otherwise matched the previous value.
+   *
+   * This means the iterator must be sitting at the next key:
+   *
+   * ```
+   * { "a": 1, "b": 2 }
+   *           ^
+   * ```
+   *
+   * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to
+   * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may
+   * fail to match some keys with escapes (\u, \n, etc.).
+   */
+  simdjson_warn_unused simdjson_inline simdjson_result<bool> find_field_unordered_raw(const std::string_view key) noexcept;
+
+  /** @} */
+
+  /**
+   * @addtogroup array Array iteration
+   * Methods to iterate over array elements. These methods generally *assume* the value is actually
+   * an object; the caller is responsible for keeping track of that fact.
+   * @{
+   */
+
+  /**
+   * Check for an opening [ and start an array iteration.
+   *
+   * @returns Whether the array had any elements (returns false for empty).
+   * @error INCORRECT_TYPE If there is no [.
+   */
+  simdjson_warn_unused simdjson_inline simdjson_result<bool> start_array() noexcept;
+  /**
+   * Check for an opening [ and start an array iteration while at the root.
+   *
+   * @returns Whether the array had any elements (returns false for empty).
+   * @error INCORRECT_TYPE If there is no [.
+   * @error TAPE_ERROR if there is no matching ] at end of document
+   */
+  simdjson_warn_unused simdjson_inline simdjson_result<bool> start_root_array() noexcept;
+
+  /**
+   * Start an array iteration, after the user has already checked and moved past the [.
+   *
+   * Does not move the iterator unless the array is empty ([]).
+   *
+   * @returns Whether the array had any elements (returns false for empty).
+   * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent*
+   *        array or object is incomplete).
+   */
+  simdjson_warn_unused simdjson_inline simdjson_result<bool> started_array() noexcept;
+  /**
+   * Start an array iteration from the root, after the user has already checked and moved past the [.
+   *
+   * Does not move the iterator unless the array is empty ([]).
+   *
+   * @returns Whether the array had any elements (returns false for empty).
+   * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent*
+   *        array or object is incomplete).
+   */
+  simdjson_warn_unused simdjson_inline simdjson_result<bool> started_root_array() noexcept;
+
+  /**
+   * Moves to the next element in an array.
+   *
+   * Looks for , and ]. If ] is found, the array is finished and the iterator advances past it.
+   * Otherwise, it advances to the next value.
+   *
+   * @return Whether there is another element in the array.
+   * @error TAPE_ERROR If there is a comma missing between elements.
+   */
+  simdjson_warn_unused simdjson_inline simdjson_result<bool> has_next_element() noexcept;
+
+  /**
+   * Get a child value iterator.
+   */
+  simdjson_warn_unused simdjson_inline value_iterator child() const noexcept;
+
+  /** @} */
+
+  /**
+   * @defgroup scalar Scalar values
+   * @addtogroup scalar
+   * @{
+   */
+
+  simdjson_warn_unused simdjson_inline simdjson_result<std::string_view> get_string() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<raw_json_string> get_raw_json_string() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<uint64_t> get_uint64() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<uint64_t> get_uint64_in_string() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<int64_t> get_int64() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<int64_t> get_int64_in_string() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<double> get_double() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<double> get_double_in_string() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<bool> get_bool() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<bool> is_null() noexcept;
+  simdjson_warn_unused simdjson_inline bool is_negative() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<bool> is_integer() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<number_type> get_number_type() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<number> get_number() noexcept;
+
+  simdjson_warn_unused simdjson_inline simdjson_result<std::string_view> get_root_string() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<raw_json_string> get_root_raw_json_string() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<uint64_t> get_root_uint64() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<uint64_t> get_root_uint64_in_string() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<int64_t> get_root_int64() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<int64_t> get_root_int64_in_string() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<double> get_root_double() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<double> get_root_double_in_string() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<bool> get_root_bool() noexcept;
+  simdjson_warn_unused simdjson_inline bool is_root_negative() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<bool> is_root_integer() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<number_type> get_root_number_type() noexcept;
+  simdjson_warn_unused simdjson_inline simdjson_result<number> get_root_number() noexcept;
+  simdjson_inline bool is_root_null() noexcept;
+
+  simdjson_inline error_code error() const noexcept;
+  simdjson_inline uint8_t *&string_buf_loc() noexcept;
+  simdjson_inline const json_iterator &json_iter() const noexcept;
+  simdjson_inline json_iterator &json_iter() noexcept;
+
+  simdjson_inline void assert_is_valid() const noexcept;
+  simdjson_inline bool is_valid() const noexcept;
+
+  /** @} */
+protected:
+  /**
+   * Restarts an array iteration.
+   * @returns Whether the array has any elements (returns false for empty).
+   */
+  simdjson_inline simdjson_result<bool> reset_array() noexcept;
+  /**
+   * Restarts an object iteration.
+   * @returns Whether the object has any fields (returns false for empty).
+   */
+  simdjson_inline simdjson_result<bool> reset_object() noexcept;
+  /**
+   * move_at_start(): moves us so that we are pointing at the beginning of
+   * the container. It updates the index so that at_start() is true and it
+   * syncs the depth. The user can then create a new container instance.
+   *
+   * Usage: used with value::count_elements().
+   **/
+  simdjson_inline void move_at_start() noexcept;
+
+  /**
+   * move_at_container_start(): moves us so that we are pointing at the beginning of
+   * the container so that assert_at_container_start() passes.
+   *
+   * Usage: used with reset_array() and reset_object().
+   **/
+   simdjson_inline void move_at_container_start() noexcept;
+  /* Useful for debugging and logging purposes. */
+  inline std::string to_string() const noexcept;
+  simdjson_inline value_iterator(json_iterator *json_iter, depth_t depth, token_position start_index) noexcept;
+
+  simdjson_inline simdjson_result<bool> parse_null(const uint8_t *json) const noexcept;
+  simdjson_inline simdjson_result<bool> parse_bool(const uint8_t *json) const noexcept;
+  simdjson_inline const uint8_t *peek_start() const noexcept;
+  simdjson_inline uint32_t peek_start_length() const noexcept;
+
+  /**
+   * The general idea of the advance_... methods and the peek_* methods
+   * is that you first peek and check that you have desired type. If you do,
+   * and only if you do, then you advance.
+   *
+   * We used to unconditionally advance. But this made reasoning about our
+   * current state difficult.
+   * Suppose you always advance. Look at the 'value' matching the key
+   * "shadowable" in the following example...
+   *
+   * ({"globals":{"a":{"shadowable":[}}}})
+   *
+   * If the user thinks it is a Boolean and asks for it, then we check the '[',
+   * decide it is not a Boolean, but still move into the next character ('}'). Now
+   * we are left pointing at '}' right after a '['. And we have not yet reported
+   * an error, only that we do not have a Boolean.
+   *
+   * If, instead, you just stand your ground until it is content that you know, then
+   * you will only even move beyond the '[' if the user tells you that you have an
+   * array. So you will be at the '}' character inside the array and, hopefully, you
+   * will then catch the error because an array cannot start with '}', but the code
+   * processing Boolean values does not know this.
+   *
+   * So the contract is: first call 'peek_...' and then call 'advance_...' only
+   * if you have determined that it is a type you can handle.
+   *
+   * Unfortunately, it makes the code more verbose, longer and maybe more error prone.
+   */
+
+  simdjson_inline void advance_scalar(const char *type) noexcept;
+  simdjson_inline void advance_root_scalar(const char *type) noexcept;
+  simdjson_inline void advance_non_root_scalar(const char *type) noexcept;
+
+  simdjson_inline const uint8_t *peek_scalar(const char *type) noexcept;
+  simdjson_inline const uint8_t *peek_root_scalar(const char *type) noexcept;
+  simdjson_inline const uint8_t *peek_non_root_scalar(const char *type) noexcept;
+
+
+  simdjson_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept;
+  simdjson_inline error_code end_container() noexcept;
+
+  /**
+   * Advance to a place expecting a value (increasing depth).
+   *
+   * @return The current token (the one left behind).
+   * @error TAPE_ERROR If the document ended early.
+   */
+  simdjson_inline simdjson_result<const uint8_t *> advance_to_value() noexcept;
+
+  simdjson_inline error_code incorrect_type_error(const char *message) const noexcept;
+  simdjson_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept;
+
+  simdjson_inline bool is_at_start() const noexcept;
+  /**
+   * is_at_iterator_start() returns true on an array or object after it has just been
+   * created, whether the instance is empty or not.
+   *
+   * Usage: used by array::begin() in debug mode (SIMDJSON_DEVELOPMENT_CHECKS)
+   */
+  simdjson_inline bool is_at_iterator_start() const noexcept;
+
+  /**
+   * Assuming that we are within an object, this returns true if we
+   * are pointing at a key.
+   *
+   * Usage: the skip_child() method should never be used while we are pointing
+   * at a key inside an object.
+   */
+  simdjson_inline bool is_at_key() const noexcept;
+
+  inline void assert_at_start() const noexcept;
+  inline void assert_at_container_start() const noexcept;
+  inline void assert_at_root() const noexcept;
+  inline void assert_at_child() const noexcept;
+  inline void assert_at_next() const noexcept;
+  inline void assert_at_non_root_start() const noexcept;
+
+  /** Get the starting position of this value */
+  simdjson_inline token_position start_position() const noexcept;
+
+  /** @copydoc error_code json_iterator::position() const noexcept; */
+  simdjson_inline token_position position() const noexcept;
+  /** @copydoc error_code json_iterator::end_position() const noexcept; */
+  simdjson_inline token_position last_position() const noexcept;
+  /** @copydoc error_code json_iterator::end_position() const noexcept; */
+  simdjson_inline token_position end_position() const noexcept;
+  /** @copydoc error_code json_iterator::report_error(error_code error, const char *message) noexcept; */
+  simdjson_inline error_code report_error(error_code error, const char *message) noexcept;
+
+  friend class document;
+  friend class object;
+  friend class array;
+  friend class value;
+}; // value_iterator
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+template<>
+struct simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value_iterator> : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value_iterator> {
+public:
+  simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value_iterator &&value) noexcept; ///< @private
+  simdjson_inline simdjson_result(error_code error) noexcept; ///< @private
+  simdjson_inline simdjson_result() noexcept = default;
+};
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/value_iterator.h */
+/* begin file include/simdjson/generic/ondemand/array_iterator.h */
+
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+class array;
+class value;
+class document;
+
+/**
+ * A forward-only JSON array.
+ *
+ * This is an input_iterator, meaning:
+ * - It is forward-only
+ * - * must be called exactly once per element.
+ * - ++ must be called exactly once in between each * (*, ++, *, ++, * ...)
+ */
+class array_iterator {
+public:
+  /** Create a new, invalid array iterator. */
+  simdjson_inline array_iterator() noexcept = default;
+
+  //
+  // Iterator interface
+  //
+
+  /**
+   * Get the current element.
+   *
+   * Part of the std::iterator interface.
+   */
+  simdjson_inline simdjson_result<value> operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION.
+  /**
+   * Check if we are at the end of the JSON.
+   *
+   * Part of the std::iterator interface.
+   *
+   * @return true if there are no more elements in the JSON array.
+   */
+  simdjson_inline bool operator==(const array_iterator &) const noexcept;
+  /**
+   * Check if there are more elements in the JSON array.
+   *
+   * Part of the std::iterator interface.
+   *
+   * @return true if there are more elements in the JSON array.
+   */
+  simdjson_inline bool operator!=(const array_iterator &) const noexcept;
+  /**
+   * Move to the next element.
+   *
+   * Part of the std::iterator interface.
+   */
+  simdjson_inline array_iterator &operator++() noexcept;
+
+private:
+  value_iterator iter{};
+
+  simdjson_inline array_iterator(const value_iterator &iter) noexcept;
+
+  friend class array;
+  friend class value;
+  friend struct simdjson_result<array_iterator>;
+};
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+template<>
+struct simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> {
+public:
+  simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator &&value) noexcept; ///< @private
+  simdjson_inline simdjson_result(error_code error) noexcept; ///< @private
+  simdjson_inline simdjson_result() noexcept = default;
+
+  //
+  // Iterator interface
+  //
+
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION.
+  simdjson_inline bool operator==(const simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> &) const noexcept;
+  simdjson_inline bool operator!=(const simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> &) const noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> &operator++() noexcept;
+};
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/array_iterator.h */
+/* begin file include/simdjson/generic/ondemand/object_iterator.h */
+
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+class field;
+
+class object_iterator {
+public:
+  /**
+   * Create a new invalid object_iterator.
+   *
+   * Exists so you can declare a variable and later assign to it before use.
+   */
+  simdjson_inline object_iterator() noexcept = default;
+
+  //
+  // Iterator interface
+  //
+
+  // Reads key and value, yielding them to the user.
+  // MUST ONLY BE CALLED ONCE PER ITERATION.
+  simdjson_inline simdjson_result<field> operator*() noexcept;
+  // Assumes it's being compared with the end. true if depth < iter->depth.
+  simdjson_inline bool operator==(const object_iterator &) const noexcept;
+  // Assumes it's being compared with the end. true if depth >= iter->depth.
+  simdjson_inline bool operator!=(const object_iterator &) const noexcept;
+  // Checks for ']' and ','
+  simdjson_inline object_iterator &operator++() noexcept;
+
+private:
+  /**
+   * The underlying JSON iterator.
+   *
+   * PERF NOTE: expected to be elided in favor of the parent document: this is set when the object
+   * is first used, and never changes afterwards.
+   */
+  value_iterator iter{};
+
+  simdjson_inline object_iterator(const value_iterator &iter) noexcept;
+  friend struct simdjson_result<object_iterator>;
+  friend class object;
+};
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+template<>
+struct simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator> : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator> {
+public:
+  simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator &&value) noexcept; ///< @private
+  simdjson_inline simdjson_result(error_code error) noexcept; ///< @private
+  simdjson_inline simdjson_result() noexcept = default;
+
+  //
+  // Iterator interface
+  //
+
+  // Reads key and value, yielding them to the user.
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::field> operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION.
+  // Assumes it's being compared with the end. true if depth < iter->depth.
+  simdjson_inline bool operator==(const simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator> &) const noexcept;
+  // Assumes it's being compared with the end. true if depth >= iter->depth.
+  simdjson_inline bool operator!=(const simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator> &) const noexcept;
+  // Checks for ']' and ','
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator> &operator++() noexcept;
+};
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/object_iterator.h */
+/* begin file include/simdjson/generic/ondemand/array.h */
+
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+class value;
+class document;
+
+/**
+ * A forward-only JSON array.
+ */
+class array {
+public:
+  /**
+   * Create a new invalid array.
+   *
+   * Exists so you can declare a variable and later assign to it before use.
+   */
+  simdjson_inline array() noexcept = default;
+
+  /**
+   * Begin array iteration.
+   *
+   * Part of the std::iterable interface.
+   */
+  simdjson_inline simdjson_result<array_iterator> begin() noexcept;
+  /**
+   * Sentinel representing the end of the array.
+   *
+   * Part of the std::iterable interface.
+   */
+  simdjson_inline simdjson_result<array_iterator> end() noexcept;
+  /**
+   * This method scans the array and counts the number of elements.
+   * The count_elements method should always be called before you have begun
+   * iterating through the array: it is expected that you are pointing at
+   * the beginning of the array.
+   * The runtime complexity is linear in the size of the array. After
+   * calling this function, if successful, the array is 'rewinded' at its
+   * beginning as if it had never been accessed. If the JSON is malformed (e.g.,
+   * there is a missing comma), then an error is returned and it is no longer
+   * safe to continue.
+   *
+   * To check that an array is empty, it is more performant to use
+   * the is_empty() method.
+   */
+  simdjson_inline simdjson_result<size_t> count_elements() & noexcept;
+  /**
+   * This method scans the beginning of the array and checks whether the
+   * array is empty.
+   * The runtime complexity is constant time. After
+   * calling this function, if successful, the array is 'rewinded' at its
+   * beginning as if it had never been accessed. If the JSON is malformed (e.g.,
+   * there is a missing comma), then an error is returned and it is no longer
+   * safe to continue.
+   */
+  simdjson_inline simdjson_result<bool> is_empty() & noexcept;
+  /**
+   * Reset the iterator so that we are pointing back at the
+   * beginning of the array. You should still consume values only once even if you
+   * can iterate through the array more than once. If you unescape a string
+   * within the array more than once, you have unsafe code. Note that rewinding
+   * an array means that you may need to reparse it anew: it is not a free
+   * operation.
+   *
+   * @returns true if the array contains some elements (not empty)
+   */
+  inline simdjson_result<bool> reset() & noexcept;
+  /**
+   * Get the value associated with the given JSON pointer.  We use the RFC 6901
+   * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node
+   * as the root of its own JSON document.
+   *
+   *   ondemand::parser parser;
+   *   auto json = R"([ { "foo": { "a": [ 10, 20, 30 ] }} ])"_padded;
+   *   auto doc = parser.iterate(json);
+   *   doc.at_pointer("/0/foo/a/1") == 20
+   *
+   * Note that at_pointer() called on the document automatically calls the document's rewind
+   * method between each call. It invalidates all previously accessed arrays, objects and values
+   * that have not been consumed. Yet it is not the case when calling at_pointer on an array
+   * instance: there is no rewind and no invalidation.
+   *
+   * You may only call at_pointer on an array after it has been created, but before it has
+   * been first accessed. When calling at_pointer on an array, the pointer is advanced to
+   * the location indicated by the JSON pointer (in case of success). It is no longer possible
+   * to call at_pointer on the same array.
+   *
+   * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching.
+   *
+   * @return The value associated with the given JSON pointer, or:
+   *         - NO_SUCH_FIELD if a field does not exist in an object
+   *         - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length
+   *         - INCORRECT_TYPE if a non-integer is used to access an array
+   *         - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed
+   */
+  inline simdjson_result<value> at_pointer(std::string_view json_pointer) noexcept;
+  /**
+   * Consumes the array and returns a string_view instance corresponding to the
+   * array as represented in JSON. It points inside the original document.
+   */
+  simdjson_inline simdjson_result<std::string_view> raw_json() noexcept;
+
+  /**
+   * Get the value at the given index. This function has linear-time complexity.
+   * This function should only be called once on an array instance since the array iterator is not reset between each call.
+   *
+   * @return The value at the given index, or:
+   *         - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length
+   */
+  simdjson_inline simdjson_result<value> at(size_t index) noexcept;
+protected:
+  /**
+   * Go to the end of the array, no matter where you are right now.
+   */
+  simdjson_inline error_code consume() noexcept;
+
+  /**
+   * Begin array iteration.
+   *
+   * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the
+   *        resulting array.
+   * @error INCORRECT_TYPE if the iterator is not at [.
+   */
+  static simdjson_inline simdjson_result<array> start(value_iterator &iter) noexcept;
+  /**
+   * Begin array iteration from the root.
+   *
+   * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the
+   *        resulting array.
+   * @error INCORRECT_TYPE if the iterator is not at [.
+   * @error TAPE_ERROR if there is no closing ] at the end of the document.
+   */
+  static simdjson_inline simdjson_result<array> start_root(value_iterator &iter) noexcept;
+  /**
+   * Begin array iteration.
+   *
+   * This version of the method should be called after the initial [ has been verified, and is
+   * intended for use by switch statements that check the type of a value.
+   *
+   * @param iter The iterator. Must be after the initial [. Will be *moved* into the resulting array.
+   */
+  static simdjson_inline simdjson_result<array> started(value_iterator &iter) noexcept;
+
+  /**
+   * Create an array at the given Internal array creation. Call array::start() or array::started() instead of this.
+   *
+   * @param iter The iterator. Must either be at the start of the first element with iter.is_alive()
+   *        == true, or past the [] with is_alive() == false if the array is empty. Will be *moved*
+   *        into the resulting array.
+   */
+  simdjson_inline array(const value_iterator &iter) noexcept;
+
+  /**
+   * Iterator marking current position.
+   *
+   * iter.is_alive() == false indicates iteration is complete.
+   */
+  value_iterator iter{};
+
+  friend class value;
+  friend class document;
+  friend struct simdjson_result<value>;
+  friend struct simdjson_result<array>;
+  friend class array_iterator;
+};
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+template<>
+struct simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array> : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array> {
+public:
+  simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array &&value) noexcept; ///< @private
+  simdjson_inline simdjson_result(error_code error) noexcept; ///< @private
+  simdjson_inline simdjson_result() noexcept = default;
+
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> begin() noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> end() noexcept;
+  inline simdjson_result<size_t> count_elements() & noexcept;
+  inline simdjson_result<bool> is_empty() & noexcept;
+  inline simdjson_result<bool> reset() & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> at(size_t index) noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> at_pointer(std::string_view json_pointer) noexcept;
+};
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/array.h */
+/* begin file include/simdjson/generic/ondemand/document.h */
+
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+class parser;
+class array;
+class object;
+class value;
+class raw_json_string;
+class array_iterator;
+class document_stream;
+
+/**
+ * A JSON document. It holds a json_iterator instance.
+ *
+ * Used by tokens to get text, and string buffer location.
+ *
+ * You must keep the document around during iteration.
+ */
+class document {
+public:
+  /**
+   * Create a new invalid document.
+   *
+   * Exists so you can declare a variable and later assign to it before use.
+   */
+  simdjson_inline document() noexcept = default;
+  simdjson_inline document(const document &other) noexcept = delete; // pass your documents by reference, not by copy
+  simdjson_inline document(document &&other) noexcept = default;
+  simdjson_inline document &operator=(const document &other) noexcept = delete;
+  simdjson_inline document &operator=(document &&other) noexcept = default;
+
+  /**
+   * Cast this JSON value to an array.
+   *
+   * @returns An object that can be used to iterate the array.
+   * @returns INCORRECT_TYPE If the JSON value is not an array.
+   */
+  simdjson_inline simdjson_result<array> get_array() & noexcept;
+  /**
+   * Cast this JSON value to an object.
+   *
+   * @returns An object that can be used to look up or iterate fields.
+   * @returns INCORRECT_TYPE If the JSON value is not an object.
+   */
+  simdjson_inline simdjson_result<object> get_object() & noexcept;
+  /**
+   * Cast this JSON value to an unsigned integer.
+   *
+   * @returns A signed 64-bit integer.
+   * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer.
+   */
+  simdjson_inline simdjson_result<uint64_t> get_uint64() noexcept;
+  /**
+   * Cast this JSON value (inside string) to an unsigned integer.
+   *
+   * @returns A signed 64-bit integer.
+   * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer.
+   */
+  simdjson_inline simdjson_result<uint64_t> get_uint64_in_string() noexcept;
+  /**
+   * Cast this JSON value to a signed integer.
+   *
+   * @returns A signed 64-bit integer.
+   * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer.
+   */
+  simdjson_inline simdjson_result<int64_t> get_int64() noexcept;
+  /**
+   * Cast this JSON value (inside string) to a signed integer.
+   *
+   * @returns A signed 64-bit integer.
+   * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer.
+   */
+  simdjson_inline simdjson_result<int64_t> get_int64_in_string() noexcept;
+  /**
+   * Cast this JSON value to a double.
+   *
+   * @returns A double.
+   * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number.
+   */
+  simdjson_inline simdjson_result<double> get_double() noexcept;
+
+  /**
+   * Cast this JSON value (inside string) to a double.
+   *
+   * @returns A double.
+   * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number.
+   */
+  simdjson_inline simdjson_result<double> get_double_in_string() noexcept;
+  /**
+   * Cast this JSON value to a string.
+   *
+   * The string is guaranteed to be valid UTF-8.
+   *
+   * Important: Calling get_string() twice on the same document is an error.
+   *
+   * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next
+   *          time it parses a document or when it is destroyed.
+   * @returns INCORRECT_TYPE if the JSON value is not a string.
+   */
+  simdjson_inline simdjson_result<std::string_view> get_string() noexcept;
+  /**
+   * Cast this JSON value to a raw_json_string.
+   *
+   * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n).
+   *
+   * @returns A pointer to the raw JSON for the given string.
+   * @returns INCORRECT_TYPE if the JSON value is not a string.
+   */
+  simdjson_inline simdjson_result<raw_json_string> get_raw_json_string() noexcept;
+  /**
+   * Cast this JSON value to a bool.
+   *
+   * @returns A bool value.
+   * @returns INCORRECT_TYPE if the JSON value is not true or false.
+   */
+  simdjson_inline simdjson_result<bool> get_bool() noexcept;
+  /**
+   * Cast this JSON value to a value when the document is an object or an array.
+   *
+   * @returns A value if a JSON array or object cannot be found.
+   * @returns SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function).
+   */
+  simdjson_inline simdjson_result<value> get_value() noexcept;
+
+  /**
+   * Checks if this JSON value is null.  If and only if the value is
+   * null, then it is consumed (we advance). If we find a token that
+   * begins with 'n' but is not 'null', then an error is returned.
+   *
+   * @returns Whether the value is null.
+   * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'.
+   */
+  simdjson_inline simdjson_result<bool> is_null() noexcept;
+
+  /**
+   * Get this value as the given type.
+   *
+   * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool
+   *
+   * You may use get_double(), get_bool(), get_uint64(), get_int64(),
+   * get_object(), get_array(), get_raw_json_string(), or get_string() instead.
+   *
+   * @returns A value of the given type, parsed from the JSON.
+   * @returns INCORRECT_TYPE If the JSON value is not the given type.
+   */
+  template<typename T> simdjson_inline simdjson_result<T> get() & noexcept {
+    // Unless the simdjson library provides an inline implementation, calling this method should
+    // immediately fail.
+    static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library.");
+  }
+  /** @overload template<typename T> simdjson_result<T> get() & noexcept */
+  template<typename T> simdjson_inline simdjson_result<T> get() && noexcept {
+    // Unless the simdjson library provides an inline implementation, calling this method should
+    // immediately fail.
+    static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library.");
+  }
+
+  /**
+   * Get this value as the given type.
+   *
+   * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool, value
+   *
+   * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances.
+   *
+   * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized.
+   * @returns INCORRECT_TYPE If the JSON value is not an object.
+   * @returns SUCCESS If the parse succeeded and the out parameter was set to the value.
+   */
+  template<typename T> simdjson_inline error_code get(T &out) & noexcept;
+  /** @overload template<typename T> error_code get(T &out) & noexcept */
+  template<typename T> simdjson_inline error_code get(T &out) && noexcept;
+
+#if SIMDJSON_EXCEPTIONS
+  /**
+   * Cast this JSON value to an array.
+   *
+   * @returns An object that can be used to iterate the array.
+   * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array.
+   */
+  simdjson_inline operator array() & noexcept(false);
+  /**
+   * Cast this JSON value to an object.
+   *
+   * @returns An object that can be used to look up or iterate fields.
+   * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object.
+   */
+  simdjson_inline operator object() & noexcept(false);
+  /**
+   * Cast this JSON value to an unsigned integer.
+   *
+   * @returns A signed 64-bit integer.
+   * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer.
+   */
+  simdjson_inline operator uint64_t() noexcept(false);
+  /**
+   * Cast this JSON value to a signed integer.
+   *
+   * @returns A signed 64-bit integer.
+   * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer.
+   */
+  simdjson_inline operator int64_t() noexcept(false);
+  /**
+   * Cast this JSON value to a double.
+   *
+   * @returns A double.
+   * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number.
+   */
+  simdjson_inline operator double() noexcept(false);
+  /**
+   * Cast this JSON value to a string.
+   *
+   * The string is guaranteed to be valid UTF-8.
+   *
+   * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next
+   *          time it parses a document or when it is destroyed.
+   * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string.
+   */
+  simdjson_inline operator std::string_view() noexcept(false);
+  /**
+   * Cast this JSON value to a raw_json_string.
+   *
+   * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n).
+   *
+   * @returns A pointer to the raw JSON for the given string.
+   * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string.
+   */
+  simdjson_inline operator raw_json_string() noexcept(false);
+  /**
+   * Cast this JSON value to a bool.
+   *
+   * @returns A bool value.
+   * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false.
+   */
+  simdjson_inline operator bool() noexcept(false);
+  /**
+   * Cast this JSON value to a value.
+   *
+   * @returns A value value.
+   * @exception if a JSON value cannot be found
+   */
+  simdjson_inline operator value() noexcept(false);
+#endif
+  /**
+   * This method scans the array and counts the number of elements.
+   * The count_elements method should always be called before you have begun
+   * iterating through the array: it is expected that you are pointing at
+   * the beginning of the array.
+   * The runtime complexity is linear in the size of the array. After
+   * calling this function, if successful, the array is 'rewinded' at its
+   * beginning as if it had never been accessed. If the JSON is malformed (e.g.,
+   * there is a missing comma), then an error is returned and it is no longer
+   * safe to continue.
+   */
+  simdjson_inline simdjson_result<size_t> count_elements() & noexcept;
+   /**
+   * This method scans the object and counts the number of key-value pairs.
+   * The count_fields method should always be called before you have begun
+   * iterating through the object: it is expected that you are pointing at
+   * the beginning of the object.
+   * The runtime complexity is linear in the size of the object. After
+   * calling this function, if successful, the object is 'rewinded' at its
+   * beginning as if it had never been accessed. If the JSON is malformed (e.g.,
+   * there is a missing comma), then an error is returned and it is no longer
+   * safe to continue.
+   *
+   * To check that an object is empty, it is more performant to use
+   * the is_empty() method.
+   */
+  simdjson_inline simdjson_result<size_t> count_fields() & noexcept;
+  /**
+   * Get the value at the given index in the array. This function has linear-time complexity.
+   * This function should only be called once on an array instance since the array iterator is not reset between each call.
+   *
+   * @return The value at the given index, or:
+   *         - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length
+   */
+  simdjson_inline simdjson_result<value> at(size_t index) & noexcept;
+  /**
+   * Begin array iteration.
+   *
+   * Part of the std::iterable interface.
+   */
+  simdjson_inline simdjson_result<array_iterator> begin() & noexcept;
+  /**
+   * Sentinel representing the end of the array.
+   *
+   * Part of the std::iterable interface.
+   */
+  simdjson_inline simdjson_result<array_iterator> end() & noexcept;
+
+  /**
+   * Look up a field by name on an object (order-sensitive).
+   *
+   * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the
+   * JSON `{ "x": 1, "y": 2, "z": 3 }`:
+   *
+   * ```c++
+   * simdjson::ondemand::parser parser;
+   * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded);
+   * double z = obj.find_field("z");
+   * double y = obj.find_field("y");
+   * double x = obj.find_field("x");
+   * ```
+   *
+   * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys.
+   * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`.
+   *
+   *
+   * You must consume the fields on an object one at a time. A request for a new key
+   * invalidates previous field values: it makes them unsafe. E.g., the array
+   * given by content["bids"].get_array() should not be accessed after you have called
+   * content["asks"].get_array(). You can detect such mistakes by first compiling and running
+   * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an
+   * OUT_OF_ORDER_ITERATION error is generated.
+   *
+   * You are expected to access keys only once. You should access the value corresponding to
+   * a key a single time. Doing object["mykey"].to_string()and then again object["mykey"].to_string()
+   * is an error.
+   *
+   * @param key The key to look up.
+   * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object.
+   */
+  simdjson_inline simdjson_result<value> find_field(std::string_view key) & noexcept;
+  /** @overload simdjson_inline simdjson_result<value> find_field(std::string_view key) & noexcept; */
+  simdjson_inline simdjson_result<value> find_field(const char *key) & noexcept;
+
+  /**
+   * Look up a field by name on an object, without regard to key order.
+   *
+   * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies
+   * and often appears negligible. It starts out normally, starting out at the last field; but if
+   * the field is not found, it scans from the beginning of the object to see if it missed it. That
+   * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object
+   * in question is large. The fact that the extra code is there also bumps the executable size.
+   *
+   * It is the default, however, because it would be highly surprising (and hard to debug) if the
+   * default behavior failed to look up a field just because it was in the wrong order--and many
+   * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order.
+   *
+   * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the
+   * field wasn't there when they aren't).
+   *
+   * You must consume the fields on an object one at a time. A request for a new key
+   * invalidates previous field values: it makes them unsafe. E.g., the array
+   * given by content["bids"].get_array() should not be accessed after you have called
+   * content["asks"].get_array(). You can detect such mistakes by first compiling and running
+   * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an
+   * OUT_OF_ORDER_ITERATION error is generated.
+   *
+   * You are expected to access keys only once. You should access the value corresponding to a key
+   * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string()
+   * is an error.
+   *
+   * @param key The key to look up.
+   * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object.
+   */
+  simdjson_inline simdjson_result<value> find_field_unordered(std::string_view key) & noexcept;
+  /** @overload simdjson_inline simdjson_result<value> find_field_unordered(std::string_view key) & noexcept; */
+  simdjson_inline simdjson_result<value> find_field_unordered(const char *key) & noexcept;
+  /** @overload simdjson_inline simdjson_result<value> find_field_unordered(std::string_view key) & noexcept; */
+  simdjson_inline simdjson_result<value> operator[](std::string_view key) & noexcept;
+  /** @overload simdjson_inline simdjson_result<value> find_field_unordered(std::string_view key) & noexcept; */
+  simdjson_inline simdjson_result<value> operator[](const char *key) & noexcept;
+
+  /**
+   * Get the type of this JSON value. It does not validate or consume the value.
+   * E.g., you must still call "is_null()" to check that a value is null even if
+   * "type()" returns json_type::null.
+   *
+   * NOTE: If you're only expecting a value to be one type (a typical case), it's generally
+   * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just
+   * let it throw an exception).
+   *
+   * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse".
+   */
+  simdjson_inline simdjson_result<json_type> type() noexcept;
+
+  /**
+   * Checks whether the document is a scalar (string, number, null, Boolean).
+   * Returns false when there it is an array or object.
+   *
+   * @returns true if the type is string, number, null, Boolean
+   * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse".
+   */
+  simdjson_inline simdjson_result<bool> is_scalar() noexcept;
+
+  /**
+   * Checks whether the document is a negative number.
+   *
+   * @returns true if the number if negative.
+   */
+  simdjson_inline bool is_negative() noexcept;
+  /**
+   * Checks whether the document is an integer number. Note that
+   * this requires to partially parse the number string. If
+   * the value is determined to be an integer, it may still
+   * not parse properly as an integer in subsequent steps
+   * (e.g., it might overflow).
+   *
+   * @returns true if the number if negative.
+   */
+  simdjson_inline simdjson_result<bool> is_integer() noexcept;
+  /**
+   * Determine the number type (integer or floating-point number) as quickly
+   * as possible. This function does not fully validate the input. It is
+   * useful when you only need to classify the numbers, without parsing them.
+   *
+   * If you are planning to retrieve the value or you need full validation,
+   * consider using the get_number() method instead: it will fully parse
+   * and validate the input, and give you access to the type:
+   * get_number().get_number_type().
+   *
+   * get_number_type() is number_type::unsigned_integer if we have
+   * an integer greater or equal to 9223372036854775808
+   * get_number_type() is number_type::signed_integer if we have an
+   * integer that is less than 9223372036854775808
+   * Otherwise, get_number_type() has value number_type::floating_point_number
+   *
+   * This function requires processing the number string, but it is expected
+   * to be faster than get_number().get_number_type() because it is does not
+   * parse the number value.
+   *
+   * @returns the type of the number
+   */
+  simdjson_inline simdjson_result<number_type> get_number_type() noexcept;
+
+  /**
+   * Attempt to parse an ondemand::number. An ondemand::number may
+   * contain an integer value or a floating-point value, the simdjson
+   * library will autodetect the type. Thus it is a dynamically typed
+   * number. Before accessing the value, you must determine the detected
+   * type.
+   *
+   * number.get_number_type() is number_type::signed_integer if we have
+   * an integer in [-9223372036854775808,9223372036854775808)
+   * You can recover the value by calling number.get_int64() and you
+   * have that number.is_int64() is true.
+   *
+   * number.get_number_type() is number_type::unsigned_integer if we have
+   * an integer in [9223372036854775808,18446744073709551616)
+   * You can recover the value by calling number.get_uint64() and you
+   * have that number.is_uint64() is true.
+   *
+   * Otherwise, number.get_number_type() has value number_type::floating_point_number
+   * and we have a binary64 number.
+   * You can recover the value by calling number.get_double() and you
+   * have that number.is_double() is true.
+   *
+   * You must check the type before accessing the value: it is an error
+   * to call "get_int64()" when number.get_number_type() is not
+   * number_type::signed_integer and when number.is_int64() is false.
+   */
+  simdjson_warn_unused simdjson_inline simdjson_result<number> get_number() noexcept;
+
+  /**
+   * Get the raw JSON for this token.
+   *
+   * The string_view will always point into the input buffer.
+   *
+   * The string_view will start at the beginning of the token, and include the entire token
+   * *as well as all spaces until the next token (or EOF).* This means, for example, that a
+   * string token always begins with a " and is always terminated by the final ", possibly
+   * followed by a number of spaces.
+   *
+   * The string_view is *not* null-terminated. If this is a scalar (string, number,
+   * boolean, or null), the character after the end of the string_view may be the padded buffer.
+   *
+   * Tokens include:
+   * - {
+   * - [
+   * - "a string (possibly with UTF-8 or backslashed characters like \\\")".
+   * - -1.2e-100
+   * - true
+   * - false
+   * - null
+   */
+  simdjson_inline simdjson_result<std::string_view> raw_json_token() noexcept;
+
+  /**
+   * Reset the iterator inside the document instance so we are pointing back at the
+   * beginning of the document, as if it had just been created. It invalidates all
+   * values, objects and arrays that you have created so far (including unescaped strings).
+   */
+  inline void rewind() noexcept;
+  /**
+   * Returns debugging information.
+   */
+  inline std::string to_debug_string() noexcept;
+  /**
+   * Some unrecoverable error conditions may render the document instance unusable.
+   * The is_alive() method returns true when the document is still suitable.
+   */
+  inline bool is_alive() noexcept;
+
+  /**
+   * Returns the current location in the document if in bounds.
+   */
+  inline simdjson_result<const char *> current_location() noexcept;
+
+  /**
+   * Returns the current depth in the document if in bounds.
+   *
+   * E.g.,
+   *  0 = finished with document
+   *  1 = document root value (could be [ or {, not yet known)
+   *  2 = , or } inside root array/object
+   *  3 = key or value inside root array/object.
+   */
+  simdjson_inline int32_t current_depth() const noexcept;
+
+  /**
+   * Get the value associated with the given JSON pointer.  We use the RFC 6901
+   * https://tools.ietf.org/html/rfc6901 standard.
+   *
+   *   ondemand::parser parser;
+   *   auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded;
+   *   auto doc = parser.iterate(json);
+   *   doc.at_pointer("/foo/a/1") == 20
+   *
+   * It is allowed for a key to be the empty string:
+   *
+   *   ondemand::parser parser;
+   *   auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded;
+   *   auto doc = parser.iterate(json);
+   *   doc.at_pointer("//a/1") == 20
+   *
+   * Note that at_pointer() automatically calls rewind between each call. Thus
+   * all values, objects and arrays that you have created so far (including unescaped strings)
+   * are invalidated. After calling at_pointer, you need to consume the result: string values
+   * should be stored in your own variables, arrays should be decoded and stored in your own array-like
+   * structures and so forth.
+   *
+   * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching
+   *
+   * @return The value associated with the given JSON pointer, or:
+   *         - NO_SUCH_FIELD if a field does not exist in an object
+   *         - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length
+   *         - INCORRECT_TYPE if a non-integer is used to access an array
+   *         - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed
+   *         - SCALAR_DOCUMENT_AS_VALUE if the json_pointer is empty and the document is not a scalar (see is_scalar() function).
+   */
+  simdjson_inline simdjson_result<value> at_pointer(std::string_view json_pointer) noexcept;
+  /**
+   * Consumes the document and returns a string_view instance corresponding to the
+   * document as represented in JSON. It points inside the original byte array containing
+   * the JSON document.
+   */
+  simdjson_inline simdjson_result<std::string_view> raw_json() noexcept;
+protected:
+  /**
+   * Consumes the document.
+   */
+  simdjson_inline error_code consume() noexcept;
+
+  simdjson_inline document(ondemand::json_iterator &&iter) noexcept;
+  simdjson_inline const uint8_t *text(uint32_t idx) const noexcept;
+
+  simdjson_inline value_iterator resume_value_iterator() noexcept;
+  simdjson_inline value_iterator get_root_value_iterator() noexcept;
+  simdjson_inline simdjson_result<object> start_or_resume_object() noexcept;
+  static simdjson_inline document start(ondemand::json_iterator &&iter) noexcept;
+
+  //
+  // Fields
+  //
+  json_iterator iter{}; ///< Current position in the document
+  static constexpr depth_t DOCUMENT_DEPTH = 0; ///< document depth is always 0
+
+  friend class array_iterator;
+  friend class value;
+  friend class ondemand::parser;
+  friend class object;
+  friend class array;
+  friend class field;
+  friend class token;
+  friend class document_stream;
+};
+
+
+/**
+ * A document_reference is a thin wrapper around a document reference instance.
+ */
+class document_reference {
+public:
+  simdjson_inline document_reference() noexcept;
+  simdjson_inline document_reference(document &d) noexcept;
+  simdjson_inline document_reference(const document_reference &other) noexcept = default;
+  simdjson_inline document_reference& operator=(const document_reference &other) noexcept = default;
+  simdjson_inline void rewind() noexcept;
+  simdjson_inline simdjson_result<array> get_array() & noexcept;
+  simdjson_inline simdjson_result<object> get_object() & noexcept;
+  simdjson_inline simdjson_result<uint64_t> get_uint64() noexcept;
+  simdjson_inline simdjson_result<int64_t> get_int64() noexcept;
+  simdjson_inline simdjson_result<double> get_double() noexcept;
+  simdjson_inline simdjson_result<std::string_view> get_string() noexcept;
+  simdjson_inline simdjson_result<raw_json_string> get_raw_json_string() noexcept;
+  simdjson_inline simdjson_result<bool> get_bool() noexcept;
+  simdjson_inline simdjson_result<value> get_value() noexcept;
+
+  simdjson_inline simdjson_result<bool> is_null() noexcept;
+  simdjson_inline simdjson_result<std::string_view> raw_json() noexcept;
+  simdjson_inline operator document&() const noexcept;
+
+#if SIMDJSON_EXCEPTIONS
+  simdjson_inline operator array() & noexcept(false);
+  simdjson_inline operator object() & noexcept(false);
+  simdjson_inline operator uint64_t() noexcept(false);
+  simdjson_inline operator int64_t() noexcept(false);
+  simdjson_inline operator double() noexcept(false);
+  simdjson_inline operator std::string_view() noexcept(false);
+  simdjson_inline operator raw_json_string() noexcept(false);
+  simdjson_inline operator bool() noexcept(false);
+  simdjson_inline operator value() noexcept(false);
+#endif
+  simdjson_inline simdjson_result<size_t> count_elements() & noexcept;
+  simdjson_inline simdjson_result<size_t> count_fields() & noexcept;
+  simdjson_inline simdjson_result<value> at(size_t index) & noexcept;
+  simdjson_inline simdjson_result<array_iterator> begin() & noexcept;
+  simdjson_inline simdjson_result<array_iterator> end() & noexcept;
+  simdjson_inline simdjson_result<value> find_field(std::string_view key) & noexcept;
+  simdjson_inline simdjson_result<value> find_field(const char *key) & noexcept;
+  simdjson_inline simdjson_result<value> operator[](std::string_view key) & noexcept;
+  simdjson_inline simdjson_result<value> operator[](const char *key) & noexcept;
+  simdjson_inline simdjson_result<value> find_field_unordered(std::string_view key) & noexcept;
+  simdjson_inline simdjson_result<value> find_field_unordered(const char *key) & noexcept;
+
+  simdjson_inline simdjson_result<json_type> type() noexcept;
+  simdjson_inline simdjson_result<bool> is_scalar() noexcept;
+
+  simdjson_inline simdjson_result<const char *> current_location() noexcept;
+  simdjson_inline int32_t current_depth() const noexcept;
+  simdjson_inline bool is_negative() noexcept;
+  simdjson_inline simdjson_result<bool> is_integer() noexcept;
+  simdjson_inline simdjson_result<number_type> get_number_type() noexcept;
+  simdjson_inline simdjson_result<number> get_number() noexcept;
+  simdjson_inline simdjson_result<std::string_view> raw_json_token() noexcept;
+  simdjson_inline simdjson_result<value> at_pointer(std::string_view json_pointer) noexcept;
+private:
+  document *doc{nullptr};
+};
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+template<>
+struct simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document> : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document> {
+public:
+  simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document &&value) noexcept; ///< @private
+  simdjson_inline simdjson_result(error_code error) noexcept; ///< @private
+  simdjson_inline simdjson_result() noexcept = default;
+  simdjson_inline error_code rewind() noexcept;
+
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array> get_array() & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object> get_object() & noexcept;
+  simdjson_inline simdjson_result<uint64_t> get_uint64() noexcept;
+  simdjson_inline simdjson_result<int64_t> get_int64() noexcept;
+  simdjson_inline simdjson_result<double> get_double() noexcept;
+  simdjson_inline simdjson_result<double> get_double_from_string() noexcept;
+  simdjson_inline simdjson_result<std::string_view> get_string() noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string> get_raw_json_string() noexcept;
+  simdjson_inline simdjson_result<bool> get_bool() noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> get_value() noexcept;
+  simdjson_inline simdjson_result<bool> is_null() noexcept;
+
+  template<typename T> simdjson_inline simdjson_result<T> get() & noexcept;
+  template<typename T> simdjson_inline simdjson_result<T> get() && noexcept;
+
+  template<typename T> simdjson_inline error_code get(T &out) & noexcept;
+  template<typename T> simdjson_inline error_code get(T &out) && noexcept;
+
+#if SIMDJSON_EXCEPTIONS
+  simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() & noexcept(false);
+  simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() & noexcept(false);
+  simdjson_inline operator uint64_t() noexcept(false);
+  simdjson_inline operator int64_t() noexcept(false);
+  simdjson_inline operator double() noexcept(false);
+  simdjson_inline operator std::string_view() noexcept(false);
+  simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false);
+  simdjson_inline operator bool() noexcept(false);
+  simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value() noexcept(false);
+#endif
+  simdjson_inline simdjson_result<size_t> count_elements() & noexcept;
+  simdjson_inline simdjson_result<size_t> count_fields() & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> at(size_t index) & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> begin() & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> end() & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> find_field(std::string_view key) & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> find_field(const char *key) & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> operator[](std::string_view key) & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> operator[](const char *key) & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> find_field_unordered(std::string_view key) & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> find_field_unordered(const char *key) & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type> type() noexcept;
+  simdjson_inline simdjson_result<bool> is_scalar() noexcept;
+  simdjson_inline simdjson_result<const char *> current_location() noexcept;
+  simdjson_inline int32_t current_depth() const noexcept;
+  simdjson_inline bool is_negative() noexcept;
+  simdjson_inline simdjson_result<bool> is_integer() noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::number_type> get_number_type() noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::number> get_number() noexcept;
+  /** @copydoc simdjson_inline std::string_view document::raw_json_token() const noexcept */
+  simdjson_inline simdjson_result<std::string_view> raw_json_token() noexcept;
+
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> at_pointer(std::string_view json_pointer) noexcept;
+};
+
+
+} // namespace simdjson
+
+
+
+namespace simdjson {
+
+template<>
+struct simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference> : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference> {
+public:
+  simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference value, error_code error) noexcept;
+  simdjson_inline simdjson_result() noexcept = default;
+  simdjson_inline error_code rewind() noexcept;
+
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array> get_array() & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object> get_object() & noexcept;
+  simdjson_inline simdjson_result<uint64_t> get_uint64() noexcept;
+  simdjson_inline simdjson_result<int64_t> get_int64() noexcept;
+  simdjson_inline simdjson_result<double> get_double() noexcept;
+  simdjson_inline simdjson_result<std::string_view> get_string() noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string> get_raw_json_string() noexcept;
+  simdjson_inline simdjson_result<bool> get_bool() noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> get_value() noexcept;
+  simdjson_inline simdjson_result<bool> is_null() noexcept;
+
+#if SIMDJSON_EXCEPTIONS
+  simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() & noexcept(false);
+  simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() & noexcept(false);
+  simdjson_inline operator uint64_t() noexcept(false);
+  simdjson_inline operator int64_t() noexcept(false);
+  simdjson_inline operator double() noexcept(false);
+  simdjson_inline operator std::string_view() noexcept(false);
+  simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false);
+  simdjson_inline operator bool() noexcept(false);
+  simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value() noexcept(false);
+#endif
+  simdjson_inline simdjson_result<size_t> count_elements() & noexcept;
+  simdjson_inline simdjson_result<size_t> count_fields() & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> at(size_t index) & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> begin() & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> end() & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> find_field(std::string_view key) & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> find_field(const char *key) & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> operator[](std::string_view key) & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> operator[](const char *key) & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> find_field_unordered(std::string_view key) & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> find_field_unordered(const char *key) & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type> type() noexcept;
+  simdjson_inline simdjson_result<bool> is_scalar() noexcept;
+  simdjson_inline simdjson_result<const char *> current_location() noexcept;
+  simdjson_inline simdjson_result<int32_t> current_depth() const noexcept;
+  simdjson_inline simdjson_result<bool> is_negative() noexcept;
+  simdjson_inline simdjson_result<bool> is_integer() noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::number_type> get_number_type() noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::number> get_number() noexcept;
+  /** @copydoc simdjson_inline std::string_view document_reference::raw_json_token() const noexcept */
+  simdjson_inline simdjson_result<std::string_view> raw_json_token() noexcept;
+
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> at_pointer(std::string_view json_pointer) noexcept;
+};
+
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/document.h */
+/* begin file include/simdjson/generic/ondemand/value.h */
+
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+class array;
+class document;
+class field;
+class object;
+class raw_json_string;
+
+/**
+ * An ephemeral JSON value returned during iteration. It is only valid for as long as you do
+ * not access more data in the JSON document.
+ */
+class value {
+public:
+  /**
+   * Create a new invalid value.
+   *
+   * Exists so you can declare a variable and later assign to it before use.
+   */
+  simdjson_inline value() noexcept = default;
+
+  /**
+   * Get this value as the given type.
+   *
+   * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool
+   *
+   * You may use get_double(), get_bool(), get_uint64(), get_int64(),
+   * get_object(), get_array(), get_raw_json_string(), or get_string() instead.
+   *
+   * @returns A value of the given type, parsed from the JSON.
+   * @returns INCORRECT_TYPE If the JSON value is not the given type.
+   */
+  template<typename T> simdjson_inline simdjson_result<T> get() noexcept {
+    // Unless the simdjson library provides an inline implementation, calling this method should
+    // immediately fail.
+    static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library.");
+  }
+
+  /**
+   * Get this value as the given type.
+   *
+   * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool
+   *
+   * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized.
+   * @returns INCORRECT_TYPE If the JSON value is not an object.
+   * @returns SUCCESS If the parse succeeded and the out parameter was set to the value.
+   */
+  template<typename T> simdjson_inline error_code get(T &out) noexcept;
+
+  /**
+   * Cast this JSON value to an array.
+   *
+   * @returns An object that can be used to iterate the array.
+   * @returns INCORRECT_TYPE If the JSON value is not an array.
+   */
+  simdjson_inline simdjson_result<array> get_array() noexcept;
+
+  /**
+   * Cast this JSON value to an object.
+   *
+   * @returns An object that can be used to look up or iterate fields.
+   * @returns INCORRECT_TYPE If the JSON value is not an object.
+   */
+  simdjson_inline simdjson_result<object> get_object() noexcept;
+
+  /**
+   * Cast this JSON value to an unsigned integer.
+   *
+   * @returns A unsigned 64-bit integer.
+   * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer.
+   */
+  simdjson_inline simdjson_result<uint64_t> get_uint64() noexcept;
+
+  /**
+   * Cast this JSON value (inside string) to a unsigned integer.
+   *
+   * @returns A unsigned 64-bit integer.
+   * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer.
+   */
+  simdjson_inline simdjson_result<uint64_t> get_uint64_in_string() noexcept;
+
+  /**
+   * Cast this JSON value to a signed integer.
+   *
+   * @returns A signed 64-bit integer.
+   * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer.
+   */
+  simdjson_inline simdjson_result<int64_t> get_int64() noexcept;
+
+  /**
+   * Cast this JSON value (inside string) to a signed integer.
+   *
+   * @returns A signed 64-bit integer.
+   * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer.
+   */
+  simdjson_inline simdjson_result<int64_t> get_int64_in_string() noexcept;
+
+  /**
+   * Cast this JSON value to a double.
+   *
+   * @returns A double.
+   * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number.
+   */
+  simdjson_inline simdjson_result<double> get_double() noexcept;
+
+  /**
+   * Cast this JSON value (inside string) to a double
+   *
+   * @returns A double.
+   * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number.
+   */
+  simdjson_inline simdjson_result<double> get_double_in_string() noexcept;
+
+  /**
+   * Cast this JSON value to a string.
+   *
+   * The string is guaranteed to be valid UTF-8.
+   *
+   * Equivalent to get<std::string_view>().
+   *
+   * Important: a value should be consumed once. Calling get_string() twice on the same value
+   * is an error.
+   *
+   * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next
+   *          time it parses a document or when it is destroyed.
+   * @returns INCORRECT_TYPE if the JSON value is not a string.
+   */
+  simdjson_inline simdjson_result<std::string_view> get_string() noexcept;
+
+  /**
+   * Cast this JSON value to a raw_json_string.
+   *
+   * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n).
+   *
+   * @returns A pointer to the raw JSON for the given string.
+   * @returns INCORRECT_TYPE if the JSON value is not a string.
+   */
+  simdjson_inline simdjson_result<raw_json_string> get_raw_json_string() noexcept;
+
+  /**
+   * Cast this JSON value to a bool.
+   *
+   * @returns A bool value.
+   * @returns INCORRECT_TYPE if the JSON value is not true or false.
+   */
+  simdjson_inline simdjson_result<bool> get_bool() noexcept;
+
+  /**
+   * Checks if this JSON value is null. If and only if the value is
+   * null, then it is consumed (we advance). If we find a token that
+   * begins with 'n' but is not 'null', then an error is returned.
+   *
+   * @returns Whether the value is null.
+   * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'.
+   */
+  simdjson_inline simdjson_result<bool> is_null() noexcept;
+
+#if SIMDJSON_EXCEPTIONS
+  /**
+   * Cast this JSON value to an array.
+   *
+   * @returns An object that can be used to iterate the array.
+   * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array.
+   */
+  simdjson_inline operator array() noexcept(false);
+  /**
+   * Cast this JSON value to an object.
+   *
+   * @returns An object that can be used to look up or iterate fields.
+   * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object.
+   */
+  simdjson_inline operator object() noexcept(false);
+  /**
+   * Cast this JSON value to an unsigned integer.
+   *
+   * @returns A signed 64-bit integer.
+   * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer.
+   */
+  simdjson_inline operator uint64_t() noexcept(false);
+  /**
+   * Cast this JSON value to a signed integer.
+   *
+   * @returns A signed 64-bit integer.
+   * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer.
+   */
+  simdjson_inline operator int64_t() noexcept(false);
+  /**
+   * Cast this JSON value to a double.
+   *
+   * @returns A double.
+   * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number.
+   */
+  simdjson_inline operator double() noexcept(false);
+  /**
+   * Cast this JSON value to a string.
+   *
+   * The string is guaranteed to be valid UTF-8.
+   *
+   * Equivalent to get<std::string_view>().
+   *
+   * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next
+   *          time it parses a document or when it is destroyed.
+   * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string.
+   */
+  simdjson_inline operator std::string_view() noexcept(false);
+  /**
+   * Cast this JSON value to a raw_json_string.
+   *
+   * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n).
+   *
+   * @returns A pointer to the raw JSON for the given string.
+   * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string.
+   */
+  simdjson_inline operator raw_json_string() noexcept(false);
+  /**
+   * Cast this JSON value to a bool.
+   *
+   * @returns A bool value.
+   * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false.
+   */
+  simdjson_inline operator bool() noexcept(false);
+#endif
+
+  /**
+   * Begin array iteration.
+   *
+   * Part of the std::iterable interface.
+   *
+   * @returns INCORRECT_TYPE If the JSON value is not an array.
+   */
+  simdjson_inline simdjson_result<array_iterator> begin() & noexcept;
+  /**
+   * Sentinel representing the end of the array.
+   *
+   * Part of the std::iterable interface.
+   */
+  simdjson_inline simdjson_result<array_iterator> end() & noexcept;
+  /**
+   * This method scans the array and counts the number of elements.
+   * The count_elements method should always be called before you have begun
+   * iterating through the array: it is expected that you are pointing at
+   * the beginning of the array.
+   * The runtime complexity is linear in the size of the array. After
+   * calling this function, if successful, the array is 'rewinded' at its
+   * beginning as if it had never been accessed. If the JSON is malformed (e.g.,
+   * there is a missing comma), then an error is returned and it is no longer
+   * safe to continue.
+   *
+   * Performance hint: You should only call count_elements() as a last
+   * resort as it may require scanning the document twice or more.
+   */
+  simdjson_inline simdjson_result<size_t> count_elements() & noexcept;
+  /**
+   * This method scans the object and counts the number of key-value pairs.
+   * The count_fields method should always be called before you have begun
+   * iterating through the object: it is expected that you are pointing at
+   * the beginning of the object.
+   * The runtime complexity is linear in the size of the object. After
+   * calling this function, if successful, the object is 'rewinded' at its
+   * beginning as if it had never been accessed. If the JSON is malformed (e.g.,
+   * there is a missing comma), then an error is returned and it is no longer
+   * safe to continue.
+   *
+   * To check that an object is empty, it is more performant to use
+   * the is_empty() method on the object instance.
+   *
+   * Performance hint: You should only call count_fields() as a last
+   * resort as it may require scanning the document twice or more.
+   */
+  simdjson_inline simdjson_result<size_t> count_fields() & noexcept;
+  /**
+   * Get the value at the given index in the array. This function has linear-time complexity.
+   * This function should only be called once on an array instance since the array iterator is not reset between each call.
+   *
+   * @return The value at the given index, or:
+   *         - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length
+   */
+  simdjson_inline simdjson_result<value> at(size_t index) noexcept;
+  /**
+   * Look up a field by name on an object (order-sensitive).
+   *
+   * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the
+   * JSON `{ "x": 1, "y": 2, "z": 3 }`:
+   *
+   * ```c++
+   * simdjson::ondemand::parser parser;
+   * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded);
+   * double z = obj.find_field("z");
+   * double y = obj.find_field("y");
+   * double x = obj.find_field("x");
+   * ```
+   * If you have multiple fields with a matching key ({"x": 1,  "x": 1}) be mindful
+   * that only one field is returned.
+
+   * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys.
+   * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`.
+   *
+   * @param key The key to look up.
+   * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object.
+   */
+  simdjson_inline simdjson_result<value> find_field(std::string_view key) noexcept;
+  /** @overload simdjson_inline simdjson_result<value> find_field(std::string_view key) noexcept; */
+  simdjson_inline simdjson_result<value> find_field(const char *key) noexcept;
+
+  /**
+   * Look up a field by name on an object, without regard to key order.
+   *
+   * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies
+   * and often appears negligible. It starts out normally, starting out at the last field; but if
+   * the field is not found, it scans from the beginning of the object to see if it missed it. That
+   * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object
+   * in question is large. The fact that the extra code is there also bumps the executable size.
+   *
+   * It is the default, however, because it would be highly surprising (and hard to debug) if the
+   * default behavior failed to look up a field just because it was in the wrong order--and many
+   * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order.
+   *
+   * If you have multiple fields with a matching key ({"x": 1,  "x": 1}) be mindful
+   * that only one field is returned.
+   *
+   * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the
+   * field wasn't there when they aren't).
+   *
+   * @param key The key to look up.
+   * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object.
+   */
+  simdjson_inline simdjson_result<value> find_field_unordered(std::string_view key) noexcept;
+  /** @overload simdjson_inline simdjson_result<value> find_field_unordered(std::string_view key) noexcept; */
+  simdjson_inline simdjson_result<value> find_field_unordered(const char *key) noexcept;
+  /** @overload simdjson_inline simdjson_result<value> find_field_unordered(std::string_view key) noexcept; */
+  simdjson_inline simdjson_result<value> operator[](std::string_view key) noexcept;
+  /** @overload simdjson_inline simdjson_result<value> find_field_unordered(std::string_view key) noexcept; */
+  simdjson_inline simdjson_result<value> operator[](const char *key) noexcept;
+
+  /**
+   * Get the type of this JSON value. It does not validate or consume the value.
+   * E.g., you must still call "is_null()" to check that a value is null even if
+   * "type()" returns json_type::null.
+   *
+   * NOTE: If you're only expecting a value to be one type (a typical case), it's generally
+   * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just
+   * let it throw an exception).
+   *
+   * @return The type of JSON value (json_type::array, json_type::object, json_type::string,
+   *     json_type::number, json_type::boolean, or json_type::null).
+   * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse".
+   */
+  simdjson_inline simdjson_result<json_type> type() noexcept;
+
+  /**
+   * Checks whether the value is a scalar (string, number, null, Boolean).
+   * Returns false when there it is an array or object.
+   *
+   * @returns true if the type is string, number, null, Boolean
+   * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse".
+   */
+  simdjson_inline simdjson_result<bool> is_scalar() noexcept;
+
+  /**
+   * Checks whether the value is a negative number.
+   *
+   * @returns true if the number if negative.
+   */
+  simdjson_inline bool is_negative() noexcept;
+  /**
+   * Checks whether the value is an integer number. Note that
+   * this requires to partially parse the number string. If
+   * the value is determined to be an integer, it may still
+   * not parse properly as an integer in subsequent steps
+   * (e.g., it might overflow).
+   *
+   * Performance note: if you call this function systematically
+   * before parsing a number, you may have fallen for a performance
+   * anti-pattern.
+   *
+   * @returns true if the number if negative.
+   */
+  simdjson_inline simdjson_result<bool> is_integer() noexcept;
+  /**
+   * Determine the number type (integer or floating-point number) as quickly
+   * as possible. This function does not fully validate the input. It is
+   * useful when you only need to classify the numbers, without parsing them.
+   *
+   * If you are planning to retrieve the value or you need full validation,
+   * consider using the get_number() method instead: it will fully parse
+   * and validate the input, and give you access to the type:
+   * get_number().get_number_type().
+   *
+   * get_number_type() is number_type::unsigned_integer if we have
+   * an integer greater or equal to 9223372036854775808
+   * get_number_type() is number_type::signed_integer if we have an
+   * integer that is less than 9223372036854775808
+   * Otherwise, get_number_type() has value number_type::floating_point_number
+   *
+   * This function requires processing the number string, but it is expected
+   * to be faster than get_number().get_number_type() because it is does not
+   * parse the number value.
+   *
+   * @returns the type of the number
+   */
+  simdjson_inline simdjson_result<number_type> get_number_type() noexcept;
+
+  /**
+   * Attempt to parse an ondemand::number. An ondemand::number may
+   * contain an integer value or a floating-point value, the simdjson
+   * library will autodetect the type. Thus it is a dynamically typed
+   * number. Before accessing the value, you must determine the detected
+   * type.
+   *
+   * number.get_number_type() is number_type::signed_integer if we have
+   * an integer in [-9223372036854775808,9223372036854775808)
+   * You can recover the value by calling number.get_int64() and you
+   * have that number.is_int64() is true.
+   *
+   * number.get_number_type() is number_type::unsigned_integer if we have
+   * an integer in [9223372036854775808,18446744073709551616)
+   * You can recover the value by calling number.get_uint64() and you
+   * have that number.is_uint64() is true.
+   *
+   * Otherwise, number.get_number_type() has value number_type::floating_point_number
+   * and we have a binary64 number.
+   * You can recover the value by calling number.get_double() and you
+   * have that number.is_double() is true.
+   *
+   * You must check the type before accessing the value: it is an error
+   * to call "get_int64()" when number.get_number_type() is not
+   * number_type::signed_integer and when number.is_int64() is false.
+   *
+   * Performance note: this is designed with performance in mind. When
+   * calling 'get_number()', you scan the number string only once, determining
+   * efficiently the type and storing it in an efficient manner.
+   */
+  simdjson_warn_unused simdjson_inline simdjson_result<number> get_number() noexcept;
+
+
+  /**
+   * Get the raw JSON for this token.
+   *
+   * The string_view will always point into the input buffer.
+   *
+   * The string_view will start at the beginning of the token, and include the entire token
+   * *as well as all spaces until the next token (or EOF).* This means, for example, that a
+   * string token always begins with a " and is always terminated by the final ", possibly
+   * followed by a number of spaces.
+   *
+   * The string_view is *not* null-terminated. However, if this is a scalar (string, number,
+   * boolean, or null), the character after the end of the string_view is guaranteed to be
+   * a non-space token.
+   *
+   * Tokens include:
+   * - {
+   * - [
+   * - "a string (possibly with UTF-8 or backslashed characters like \\\")".
+   * - -1.2e-100
+   * - true
+   * - false
+   * - null
+   */
+  simdjson_inline std::string_view raw_json_token() noexcept;
+
+  /**
+   * Returns the current location in the document if in bounds.
+   */
+  simdjson_inline simdjson_result<const char *> current_location() noexcept;
+
+  /**
+   * Returns the current depth in the document if in bounds.
+   *
+   * E.g.,
+   *  0 = finished with document
+   *  1 = document root value (could be [ or {, not yet known)
+   *  2 = , or } inside root array/object
+   *  3 = key or value inside root array/object.
+   */
+  simdjson_inline int32_t current_depth() const noexcept;
+
+  /**
+   * Get the value associated with the given JSON pointer.  We use the RFC 6901
+   * https://tools.ietf.org/html/rfc6901 standard.
+   *
+   *   ondemand::parser parser;
+   *   auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded;
+   *   auto doc = parser.iterate(json);
+   *   doc.at_pointer("/foo/a/1") == 20
+   *
+   * It is allowed for a key to be the empty string:
+   *
+   *   ondemand::parser parser;
+   *   auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded;
+   *   auto doc = parser.iterate(json);
+   *   doc.at_pointer("//a/1") == 20
+   *
+   * Note that at_pointer() called on the document automatically calls the document's rewind
+   * method between each call. It invalidates all previously accessed arrays, objects and values
+   * that have not been consumed.
+   *
+   * Calling at_pointer() on non-document instances (e.g., arrays and objects) is not
+   * standardized (by RFC 6901). We provide some experimental support for JSON pointers
+   * on non-document instances.  Yet it is not the case when calling at_pointer on an array
+   * or an object instance: there is no rewind and no invalidation.
+   *
+   * You may only call at_pointer on an array after it has been created, but before it has
+   * been first accessed. When calling at_pointer on an array, the pointer is advanced to
+   * the location indicated by the JSON pointer (in case of success). It is no longer possible
+   * to call at_pointer on the same array.
+   *
+   * You may call at_pointer more than once on an object, but each time the pointer is advanced
+   * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding
+   * key (as well as the current key) can no longer be used with following JSON pointer calls.
+   *
+   * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching
+   *
+   * @return The value associated with the given JSON pointer, or:
+   *         - NO_SUCH_FIELD if a field does not exist in an object
+   *         - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length
+   *         - INCORRECT_TYPE if a non-integer is used to access an array
+   *         - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed
+   */
+  simdjson_inline simdjson_result<value> at_pointer(std::string_view json_pointer) noexcept;
+
+protected:
+  /**
+   * Create a value.
+   */
+  simdjson_inline value(const value_iterator &iter) noexcept;
+
+  /**
+   * Skip this value, allowing iteration to continue.
+   */
+  simdjson_inline void skip() noexcept;
+
+  /**
+   * Start a value at the current position.
+   *
+   * (It should already be started; this is just a self-documentation method.)
+   */
+  static simdjson_inline value start(const value_iterator &iter) noexcept;
+
+  /**
+   * Resume a value.
+   */
+  static simdjson_inline value resume(const value_iterator &iter) noexcept;
+
+  /**
+   * Get the object, starting or resuming it as necessary
+   */
+  simdjson_inline simdjson_result<object> start_or_resume_object() noexcept;
+
+  // simdjson_inline void log_value(const char *type) const noexcept;
+  // simdjson_inline void log_error(const char *message) const noexcept;
+
+  value_iterator iter{};
+
+  friend class document;
+  friend class array_iterator;
+  friend class field;
+  friend class object;
+  friend struct simdjson_result<value>;
+  friend struct simdjson_result<field>;
+};
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+template<>
+struct simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> {
+public:
+  simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value &&value) noexcept; ///< @private
+  simdjson_inline simdjson_result(error_code error) noexcept; ///< @private
+  simdjson_inline simdjson_result() noexcept = default;
+
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array> get_array() noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object> get_object() noexcept;
+
+  simdjson_inline simdjson_result<uint64_t> get_uint64() noexcept;
+  simdjson_inline simdjson_result<uint64_t> get_uint64_in_string() noexcept;
+  simdjson_inline simdjson_result<int64_t> get_int64() noexcept;
+  simdjson_inline simdjson_result<int64_t> get_int64_in_string() noexcept;
+  simdjson_inline simdjson_result<double> get_double() noexcept;
+  simdjson_inline simdjson_result<double> get_double_in_string() noexcept;
+  simdjson_inline simdjson_result<std::string_view> get_string() noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string> get_raw_json_string() noexcept;
+  simdjson_inline simdjson_result<bool> get_bool() noexcept;
+  simdjson_inline simdjson_result<bool> is_null() noexcept;
+
+  template<typename T> simdjson_inline simdjson_result<T> get() noexcept;
+
+  template<typename T> simdjson_inline error_code get(T &out) noexcept;
+
+#if SIMDJSON_EXCEPTIONS
+  simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() noexcept(false);
+  simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() noexcept(false);
+  simdjson_inline operator uint64_t() noexcept(false);
+  simdjson_inline operator int64_t() noexcept(false);
+  simdjson_inline operator double() noexcept(false);
+  simdjson_inline operator std::string_view() noexcept(false);
+  simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false);
+  simdjson_inline operator bool() noexcept(false);
+#endif
+  simdjson_inline simdjson_result<size_t> count_elements() & noexcept;
+  simdjson_inline simdjson_result<size_t> count_fields() & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> at(size_t index) noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> begin() & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> end() & noexcept;
+
+  /**
+   * Look up a field by name on an object (order-sensitive).
+   *
+   * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the
+   * JSON `{ "x": 1, "y": 2, "z": 3 }`:
+   *
+   * ```c++
+   * simdjson::ondemand::parser parser;
+   * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded);
+   * double z = obj.find_field("z");
+   * double y = obj.find_field("y");
+   * double x = obj.find_field("x");
+   * ```
+   *
+   * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys.
+   * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`.
+   *
+   * @param key The key to look up.
+   * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object.
+   */
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> find_field(std::string_view key) noexcept;
+  /** @overload simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> find_field(std::string_view key) noexcept; */
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> find_field(const char *key) noexcept;
+
+  /**
+   * Look up a field by name on an object, without regard to key order.
+   *
+   * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies
+   * and often appears negligible. It starts out normally, starting out at the last field; but if
+   * the field is not found, it scans from the beginning of the object to see if it missed it. That
+   * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object
+   * in question is large. The fact that the extra code is there also bumps the executable size.
+   *
+   * It is the default, however, because it would be highly surprising (and hard to debug) if the
+   * default behavior failed to look up a field just because it was in the wrong order--and many
+   * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order.
+   *
+   * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the
+   * field wasn't there when they aren't).
+   *
+   * @param key The key to look up.
+   * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object.
+   */
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> find_field_unordered(std::string_view key) noexcept;
+  /** @overload simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> find_field_unordered(std::string_view key) noexcept; */
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> find_field_unordered(const char *key) noexcept;
+  /** @overload simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> find_field_unordered(std::string_view key) noexcept; */
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> operator[](std::string_view key) noexcept;
+  /** @overload simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> find_field_unordered(std::string_view key) noexcept; */
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> operator[](const char *key) noexcept;
+
+  /**
+   * Get the type of this JSON value.
+   *
+   * NOTE: If you're only expecting a value to be one type (a typical case), it's generally
+   * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just
+   * let it throw an exception).
+   */
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type> type() noexcept;
+  simdjson_inline simdjson_result<bool> is_scalar() noexcept;
+  simdjson_inline simdjson_result<bool> is_negative() noexcept;
+  simdjson_inline simdjson_result<bool> is_integer() noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::number_type> get_number_type() noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::number> get_number() noexcept;
+
+  /** @copydoc simdjson_inline std::string_view value::raw_json_token() const noexcept */
+  simdjson_inline simdjson_result<std::string_view> raw_json_token() noexcept;
+
+  /** @copydoc simdjson_inline simdjson_result<const char *> current_location() noexcept */
+  simdjson_inline simdjson_result<const char *> current_location() noexcept;
+  /** @copydoc simdjson_inline int32_t current_depth() const noexcept */
+  simdjson_inline simdjson_result<int32_t> current_depth() const noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> at_pointer(std::string_view json_pointer) noexcept;
+};
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/value.h */
+/* begin file include/simdjson/generic/ondemand/field.h */
+
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+/**
+ * A JSON field (key/value pair) in an object.
+ *
+ * Returned from object iteration.
+ *
+ * Extends from std::pair<raw_json_string, value> so you can use C++ algorithms that rely on pairs.
+ */
+class field : public std::pair<raw_json_string, value> {
+public:
+  /**
+   * Create a new invalid field.
+   *
+   * Exists so you can declare a variable and later assign to it before use.
+   */
+  simdjson_inline field() noexcept;
+
+  /**
+   * Get the key as a string_view (for higher speed, consider raw_key).
+   * We deliberately use a more cumbersome name (unescaped_key) to force users
+   * to think twice about using it.
+   *
+   * This consumes the key: once you have called unescaped_key(), you cannot
+   * call it again nor can you call key().
+   */
+  simdjson_inline simdjson_warn_unused simdjson_result<std::string_view> unescaped_key() noexcept;
+  /**
+   * Get the key as a raw_json_string. Can be used for direct comparison with
+   * an unescaped C string: e.g., key() == "test".
+   */
+  simdjson_inline raw_json_string key() const noexcept;
+  /**
+   * Get the field value.
+   */
+  simdjson_inline ondemand::value &value() & noexcept;
+  /**
+   * @overload ondemand::value &ondemand::value() & noexcept
+   */
+  simdjson_inline ondemand::value value() && noexcept;
+
+protected:
+  simdjson_inline field(raw_json_string key, ondemand::value &&value) noexcept;
+  static simdjson_inline simdjson_result<field> start(value_iterator &parent_iter) noexcept;
+  static simdjson_inline simdjson_result<field> start(const value_iterator &parent_iter, raw_json_string key) noexcept;
+  friend struct simdjson_result<field>;
+  friend class object_iterator;
+};
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+template<>
+struct simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::field> : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::field> {
+public:
+  simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::field &&value) noexcept; ///< @private
+  simdjson_inline simdjson_result(error_code error) noexcept; ///< @private
+  simdjson_inline simdjson_result() noexcept = default;
+
+  simdjson_inline simdjson_result<std::string_view> unescaped_key() noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string> key() noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> value() noexcept;
+};
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/field.h */
+/* begin file include/simdjson/generic/ondemand/object.h */
+
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+/**
+ * A forward-only JSON object field iterator.
+ */
+class object {
+public:
+  /**
+   * Create a new invalid object.
+   *
+   * Exists so you can declare a variable and later assign to it before use.
+   */
+  simdjson_inline object() noexcept = default;
+
+  simdjson_inline simdjson_result<object_iterator> begin() noexcept;
+  simdjson_inline simdjson_result<object_iterator> end() noexcept;
+  /**
+   * Look up a field by name on an object (order-sensitive).
+   *
+   * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the
+   * JSON `{ "x": 1, "y": 2, "z": 3 }`:
+   *
+   * ```c++
+   * simdjson::ondemand::parser parser;
+   * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded);
+   * double z = obj.find_field("z");
+   * double y = obj.find_field("y");
+   * double x = obj.find_field("x");
+   * ```
+   * If you have multiple fields with a matching key ({"x": 1,  "x": 1}) be mindful
+   * that only one field is returned.
+   *
+   * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys.
+   * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`.
+   *
+   * You must consume the fields on an object one at a time. A request for a new key
+   * invalidates previous field values: it makes them unsafe. The value instance you get
+   * from  `content["bids"]` becomes invalid when you call `content["asks"]`. The array
+   * given by content["bids"].get_array() should not be accessed after you have called
+   * content["asks"].get_array(). You can detect such mistakes by first compiling and running
+   * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an
+   * OUT_OF_ORDER_ITERATION error is generated.
+   *
+   * You are expected to access keys only once. You should access the value corresponding to a
+   * key a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string()
+   * is an error.
+   *
+   * @param key The key to look up.
+   * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object.
+   */
+  simdjson_inline simdjson_result<value> find_field(std::string_view key) & noexcept;
+  /** @overload simdjson_inline simdjson_result<value> find_field(std::string_view key) & noexcept; */
+  simdjson_inline simdjson_result<value> find_field(std::string_view key) && noexcept;
+
+  /**
+   * Look up a field by name on an object, without regard to key order.
+   *
+   * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies
+   * and often appears negligible. It starts out normally, starting out at the last field; but if
+   * the field is not found, it scans from the beginning of the object to see if it missed it. That
+   * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object
+   * in question is large. The fact that the extra code is there also bumps the executable size.
+   *
+   * It is the default, however, because it would be highly surprising (and hard to debug) if the
+   * default behavior failed to look up a field just because it was in the wrong order--and many
+   * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order.
+   *
+   * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the
+   * field wasn't there when they aren't).
+   *
+   * If you have multiple fields with a matching key ({"x": 1,  "x": 1}) be mindful
+   * that only one field is returned.
+   *
+   * You must consume the fields on an object one at a time. A request for a new key
+   * invalidates previous field values: it makes them unsafe. The value instance you get
+   * from  `content["bids"]` becomes invalid when you call `content["asks"]`. The array
+   * given by content["bids"].get_array() should not be accessed after you have called
+   * content["asks"].get_array(). You can detect such mistakes by first compiling and running
+   * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an
+   * OUT_OF_ORDER_ITERATION error is generated.
+   *
+   * You are expected to access keys only once. You should access the value corresponding to a key
+   * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() is an error.
+   *
+   * @param key The key to look up.
+   * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object.
+   */
+  simdjson_inline simdjson_result<value> find_field_unordered(std::string_view key) & noexcept;
+  /** @overload simdjson_inline simdjson_result<value> find_field_unordered(std::string_view key) & noexcept; */
+  simdjson_inline simdjson_result<value> find_field_unordered(std::string_view key) && noexcept;
+  /** @overload simdjson_inline simdjson_result<value> find_field_unordered(std::string_view key) & noexcept; */
+  simdjson_inline simdjson_result<value> operator[](std::string_view key) & noexcept;
+  /** @overload simdjson_inline simdjson_result<value> find_field_unordered(std::string_view key) & noexcept; */
+  simdjson_inline simdjson_result<value> operator[](std::string_view key) && noexcept;
+
+  /**
+   * Get the value associated with the given JSON pointer. We use the RFC 6901
+   * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node
+   * as the root of its own JSON document.
+   *
+   *   ondemand::parser parser;
+   *   auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded;
+   *   auto doc = parser.iterate(json);
+   *   doc.at_pointer("/foo/a/1") == 20
+   *
+   * It is allowed for a key to be the empty string:
+   *
+   *   ondemand::parser parser;
+   *   auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded;
+   *   auto doc = parser.iterate(json);
+   *   doc.at_pointer("//a/1") == 20
+   *
+   * Note that at_pointer() called on the document automatically calls the document's rewind
+   * method between each call. It invalidates all previously accessed arrays, objects and values
+   * that have not been consumed. Yet it is not the case when calling at_pointer on an object
+   * instance: there is no rewind and no invalidation.
+   *
+   * You may call at_pointer more than once on an object, but each time the pointer is advanced
+   * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding
+   * key (as well as the current key) can no longer be used with following JSON pointer calls.
+   *
+   * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching.
+   *
+   * @return The value associated with the given JSON pointer, or:
+   *         - NO_SUCH_FIELD if a field does not exist in an object
+   *         - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length
+   *         - INCORRECT_TYPE if a non-integer is used to access an array
+   *         - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed
+   */
+  inline simdjson_result<value> at_pointer(std::string_view json_pointer) noexcept;
+
+  /**
+   * Reset the iterator so that we are pointing back at the
+   * beginning of the object. You should still consume values only once even if you
+   * can iterate through the object more than once. If you unescape a string within
+   * the object more than once, you have unsafe code. Note that rewinding an object
+   * means that you may need to reparse it anew: it is not a free operation.
+   *
+   * @returns true if the object contains some elements (not empty)
+   */
+  inline simdjson_result<bool> reset() & noexcept;
+  /**
+   * This method scans the beginning of the object and checks whether the
+   * object is empty.
+   * The runtime complexity is constant time. After
+   * calling this function, if successful, the object is 'rewinded' at its
+   * beginning as if it had never been accessed. If the JSON is malformed (e.g.,
+   * there is a missing comma), then an error is returned and it is no longer
+   * safe to continue.
+   */
+  inline simdjson_result<bool> is_empty() & noexcept;
+  /**
+   * This method scans the object and counts the number of key-value pairs.
+   * The count_fields method should always be called before you have begun
+   * iterating through the object: it is expected that you are pointing at
+   * the beginning of the object.
+   * The runtime complexity is linear in the size of the object. After
+   * calling this function, if successful, the object is 'rewinded' at its
+   * beginning as if it had never been accessed. If the JSON is malformed (e.g.,
+   * there is a missing comma), then an error is returned and it is no longer
+   * safe to continue.
+   *
+   * To check that an object is empty, it is more performant to use
+   * the is_empty() method.
+   *
+   * Performance hint: You should only call count_fields() as a last
+   * resort as it may require scanning the document twice or more.
+   */
+  simdjson_inline simdjson_result<size_t> count_fields() & noexcept;
+  /**
+   * Consumes the object and returns a string_view instance corresponding to the
+   * object as represented in JSON. It points inside the original byte array containing
+   * the JSON document.
+   */
+  simdjson_inline simdjson_result<std::string_view> raw_json() noexcept;
+
+protected:
+  /**
+   * Go to the end of the object, no matter where you are right now.
+   */
+  simdjson_inline error_code consume() noexcept;
+  static simdjson_inline simdjson_result<object> start(value_iterator &iter) noexcept;
+  static simdjson_inline simdjson_result<object> start_root(value_iterator &iter) noexcept;
+  static simdjson_inline simdjson_result<object> started(value_iterator &iter) noexcept;
+  static simdjson_inline object resume(const value_iterator &iter) noexcept;
+  simdjson_inline object(const value_iterator &iter) noexcept;
+
+  simdjson_warn_unused simdjson_inline error_code find_field_raw(const std::string_view key) noexcept;
+
+  value_iterator iter{};
+
+  friend class value;
+  friend class document;
+  friend struct simdjson_result<object>;
+};
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+template<>
+struct simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object> : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object> {
+public:
+  simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object &&value) noexcept; ///< @private
+  simdjson_inline simdjson_result(error_code error) noexcept; ///< @private
+  simdjson_inline simdjson_result() noexcept = default;
+
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator> begin() noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator> end() noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> find_field(std::string_view key) & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> find_field(std::string_view key) && noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> find_field_unordered(std::string_view key) & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> find_field_unordered(std::string_view key) && noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> operator[](std::string_view key) & noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> operator[](std::string_view key) && noexcept;
+  simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> at_pointer(std::string_view json_pointer) noexcept;
+  inline simdjson_result<bool> reset() noexcept;
+  inline simdjson_result<bool> is_empty() noexcept;
+  inline simdjson_result<size_t> count_fields() & noexcept;
+
+};
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/object.h */
+/* begin file include/simdjson/generic/ondemand/parser.h */
+
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+class array;
+class object;
+class value;
+class raw_json_string;
+class document_stream;
+
+/**
+ * The default batch size for document_stream instances for this On Demand kernel.
+ * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value
+ * in the future.
+ */
+static constexpr size_t DEFAULT_BATCH_SIZE = 1000000;
+/**
+ * Some adversary might try to set the batch size to 0 or 1, which might cause problems.
+ * We set a minimum of 32B since anything else is highly likely to be an error. In practice,
+ * most users will want a much larger batch size.
+ *
+ * All non-negative MINIMAL_BATCH_SIZE values should be 'safe' except that, obviously, no JSON
+ * document can ever span 0 or 1 byte and that very large values would create memory allocation issues.
+ */
+static constexpr size_t MINIMAL_BATCH_SIZE = 32;
+
+/**
+ * A JSON fragment iterator.
+ *
+ * This holds the actual iterator as well as the buffer for writing strings.
+ */
+class parser {
+public:
+  /**
+   * Create a JSON parser.
+   *
+   * The new parser will have zero capacity.
+   */
+  inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept;
+
+  inline parser(parser &&other) noexcept = default;
+  simdjson_inline parser(const parser &other) = delete;
+  simdjson_inline parser &operator=(const parser &other) = delete;
+  simdjson_inline parser &operator=(parser &&other) noexcept = default;
+
+  /** Deallocate the JSON parser. */
+  inline ~parser() noexcept = default;
+
+  /**
+   * Start iterating an on-demand JSON document.
+   *
+   *   ondemand::parser parser;
+   *   document doc = parser.iterate(json);
+   *
+   * It is expected that the content is a valid UTF-8 file, containing a valid JSON document.
+   * Otherwise the iterate method may return an error. In particular, the whole input should be
+   * valid: we do not attempt to tolerate incorrect content either before or after a JSON
+   * document.
+   *
+   * ### IMPORTANT: Validate what you use
+   *
+   * Calling iterate on an invalid JSON document may not immediately trigger an error. The call to
+   * iterate does not parse and validate the whole document.
+   *
+   * ### IMPORTANT: Buffer Lifetime
+   *
+   * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as
+   * long as the document iteration.
+   *
+   * ### IMPORTANT: Document Lifetime
+   *
+   * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during
+   * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before
+   * you call parse() again or destroy the parser.
+   *
+   * ### REQUIRED: Buffer Padding
+   *
+   * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
+   * those bytes are initialized to, as long as they are allocated.
+   *
+   * @param json The JSON to parse.
+   * @param len The length of the JSON.
+   * @param capacity The number of bytes allocated in the JSON (must be at least len+SIMDJSON_PADDING).
+   *
+   * @return The document, or an error:
+   *         - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes.
+   *         - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory
+   *           allocation fails.
+   *         - EMPTY if the document is all whitespace.
+   *         - UTF8_ERROR if the document is not valid UTF-8.
+   *         - UNESCAPED_CHARS if a string contains control characters that must be escaped
+   *         - UNCLOSED_STRING if there is an unclosed string in the document.
+   */
+  simdjson_warn_unused simdjson_result<document> iterate(const char *json, size_t len, size_t capacity) & noexcept;
+  /** @overload simdjson_result<document> iterate(padded_string_view json) & noexcept */
+simdjson_warn_unused simdjson_result<document> iterate(padded_string_view json) & noexcept;
+/** @overload simdjson_result<document> iterate(padded_string_view json) & noexcept */
+simdjson_warn_unused simdjson_result<document> iterate(const uint8_t *json, size_t len, size_t capacity) & noexcept;
+  /** @overload simdjson_result<document> iterate(padded_string_view json) & noexcept */
+  simdjson_warn_unused simdjson_result<document> iterate(std::string_view json, size_t capacity) & noexcept;
+  /** @overload simdjson_result<document> iterate(padded_string_view json) & noexcept */
+  simdjson_warn_unused simdjson_result<document> iterate(const std::string &json) & noexcept;
+  /** @overload simdjson_result<document> iterate(padded_string_view json) & noexcept */
+  simdjson_warn_unused simdjson_result<document> iterate(const simdjson_result<padded_string> &json) & noexcept;
+  /** @overload simdjson_result<document> iterate(padded_string_view json) & noexcept */
+  simdjson_warn_unused simdjson_result<document> iterate(const simdjson_result<padded_string_view> &json) & noexcept;
+  /** @overload simdjson_result<document> iterate(padded_string_view json) & noexcept */
+  simdjson_warn_unused simdjson_result<document> iterate(padded_string &&json) & noexcept = delete;
+
+  /**
+   * @private
+   *
+   * Start iterating an on-demand JSON document.
+   *
+   *   ondemand::parser parser;
+   *   json_iterator doc = parser.iterate(json);
+   *
+   * ### IMPORTANT: Buffer Lifetime
+   *
+   * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as
+   * long as the document iteration.
+   *
+   * ### IMPORTANT: Document Lifetime
+   *
+   * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during
+   * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before
+   * you call parse() again or destroy the parser.
+   *
+   * The ondemand::document instance holds the iterator. The document must remain in scope
+   * while you are accessing instances of ondemand::value, ondemand::object, ondemand::array.
+   *
+   * ### REQUIRED: Buffer Padding
+   *
+   * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
+   * those bytes are initialized to, as long as they are allocated.
+   *
+   * @param json The JSON to parse.
+   *
+   * @return The iterator, or an error:
+   *         - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes.
+   *         - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory
+   *           allocation fails.
+   *         - EMPTY if the document is all whitespace.
+   *         - UTF8_ERROR if the document is not valid UTF-8.
+   *         - UNESCAPED_CHARS if a string contains control characters that must be escaped
+   *         - UNCLOSED_STRING if there is an unclosed string in the document.
+   */
+  simdjson_warn_unused simdjson_result<json_iterator> iterate_raw(padded_string_view json) & noexcept;
+
+
+  /**
+   * Parse a buffer containing many JSON documents.
+   *
+   *   auto json = R"({ "foo": 1 } { "foo": 2 } { "foo": 3 } )"_padded;
+   *   ondemand::parser parser;
+   *   ondemand::document_stream docs = parser.iterate_many(json);
+   *   for (auto & doc : docs) {
+   *     std::cout << doc["foo"] << std::endl;
+   *   }
+   *   // Prints 1 2 3
+   *
+   * No copy of the input buffer is made.
+   *
+   * The function is lazy: it may be that no more than one JSON document at a time is parsed.
+   *
+   * The caller is responsabile to ensure that the input string data remains unchanged and is
+   * not deleted during the loop.
+   *
+   * ### Format
+   *
+   * The buffer must contain a series of one or more JSON documents, concatenated into a single
+   * buffer, separated by ASCII whitespace. It effectively parses until it has a fully valid document,
+   * then starts parsing the next document at that point. (It does this with more parallelism and
+   * lookahead than you might think, though.)
+   *
+   * documents that consist of an object or array may omit the whitespace between them, concatenating
+   * with no separator. Documents that consist of a single primitive (i.e. documents that are not
+   * arrays or objects) MUST be separated with ASCII whitespace.
+   *
+   * The characters inside a JSON document, and between JSON documents, must be valid Unicode (UTF-8).
+   *
+   * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse.
+   * Setting batch_size to excessively large or excessively small values may impact negatively the
+   * performance.
+   *
+   * ### REQUIRED: Buffer Padding
+   *
+   * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
+   * those bytes are initialized to, as long as they are allocated.
+   *
+   * ### Threads
+   *
+   * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the
+   * hood to do some lookahead.
+   *
+   * ### Parser Capacity
+   *
+   * If the parser's current capacity is less than batch_size, it will allocate enough capacity
+   * to handle it (up to max_capacity).
+   *
+   * @param buf The concatenated JSON to parse.
+   * @param len The length of the concatenated JSON.
+   * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet
+   *                   spot is cache-related: small enough to fit in cache, yet big enough to
+   *                   parse as many documents as possible in one tight loop.
+   *                   Defaults to 10MB, which has been a reasonable sweet spot in our tests.
+   * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors:
+   *         - MEMALLOC if the parser does not have enough capacity and memory allocation fails
+   *         - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity.
+   *         - other json errors if parsing fails. You should not rely on these errors to always the same for the
+   *           same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware).
+   */
+  inline simdjson_result<document_stream> iterate_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept;
+  /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */
+  inline simdjson_result<document_stream> iterate_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept;
+  /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */
+  inline simdjson_result<document_stream> iterate_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept;
+  inline simdjson_result<document_stream> iterate_many(const std::string &&s, size_t batch_size) = delete;// unsafe
+  /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */
+  inline simdjson_result<document_stream> iterate_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept;
+  inline simdjson_result<document_stream> iterate_many(const padded_string &&s, size_t batch_size) = delete;// unsafe
+
+  /** @private We do not want to allow implicit conversion from C string to std::string. */
+  simdjson_result<document_stream> iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete;
+
+  /** The capacity of this parser (the largest document it can process). */
+  simdjson_inline size_t capacity() const noexcept;
+  /** The maximum capacity of this parser (the largest document it is allowed to process). */
+  simdjson_inline size_t max_capacity() const noexcept;
+  simdjson_inline void set_max_capacity(size_t max_capacity) noexcept;
+  /**
+   * The maximum depth of this parser (the most deeply nested objects and arrays it can process).
+   * This parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true.
+   * The document's instance current_depth() method should be used to monitor the parsing
+   * depth and limit it if desired.
+   */
+  simdjson_inline size_t max_depth() const noexcept;
+
+  /**
+   * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length
+   * and `max_depth` depth.
+   *
+   * The max_depth parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true.
+   * The document's instance current_depth() method should be used to monitor the parsing
+   * depth and limit it if desired.
+   *
+   * @param capacity The new capacity.
+   * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH.
+   * @return The error, if there is one.
+   */
+  simdjson_warn_unused error_code allocate(size_t capacity, size_t max_depth=DEFAULT_MAX_DEPTH) noexcept;
+
+  #ifdef SIMDJSON_THREADS_ENABLED
+  /**
+   * The parser instance can use threads when they are available to speed up some
+   * operations. It is enabled by default. Changing this attribute will change the
+   * behavior of the parser for future operations.
+   */
+  bool threaded{true};
+  #endif
+
+  /**
+   * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer.
+   * The provided pointer is advanced to the end of the string by reference, and a string_view instance
+   * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least
+   * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer.
+   *
+   * This unescape function is a low-level function. If you want a more user-friendly approach, you should
+   * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string()
+   * instead of get_raw_json_string()).
+   *
+   * ## IMPORTANT: string_view lifetime
+   *
+   * The string_view is only valid as long as the bytes in dst.
+   *
+   * @param in input
+   * @param dst A pointer to a buffer at least large enough to write this string as well as
+   *            an additional SIMDJSON_PADDING bytes.
+   * @return A string_view pointing at the unescaped string in dst
+   * @error STRING_ERROR if escapes are incorrect.
+   */
+  simdjson_inline simdjson_result<std::string_view> unescape(raw_json_string in, uint8_t *&dst) const noexcept;
+private:
+  /** @private [for benchmarking access] The implementation to use */
+  std::unique_ptr<internal::dom_parser_implementation> implementation{};
+  size_t _capacity{0};
+  size_t _max_capacity;
+  size_t _max_depth{DEFAULT_MAX_DEPTH};
+  std::unique_ptr<uint8_t[]> string_buf{};
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  std::unique_ptr<token_position[]> start_positions{};
+#endif
+
+  friend class json_iterator;
+  friend class document_stream;
+};
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+template<>
+struct simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::parser> : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::parser> {
+public:
+  simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::parser &&value) noexcept; ///< @private
+  simdjson_inline simdjson_result(error_code error) noexcept; ///< @private
+  simdjson_inline simdjson_result() noexcept = default;
+};
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/parser.h */
+/* begin file include/simdjson/generic/ondemand/document_stream.h */
+#ifdef SIMDJSON_THREADS_ENABLED
+#include <thread>
+#include <mutex>
+#include <condition_variable>
+#endif
+
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+class parser;
+class json_iterator;
+class document;
+
+#ifdef SIMDJSON_THREADS_ENABLED
+/** @private Custom worker class **/
+struct stage1_worker {
+  stage1_worker() noexcept = default;
+  stage1_worker(const stage1_worker&) = delete;
+  stage1_worker(stage1_worker&&) = delete;
+  stage1_worker operator=(const stage1_worker&) = delete;
+  ~stage1_worker();
+  /**
+   * We only start the thread when it is needed, not at object construction, this may throw.
+   * You should only call this once.
+   **/
+  void start_thread();
+  /**
+   * Start a stage 1 job. You should first call 'run', then 'finish'.
+   * You must call start_thread once before.
+   */
+  void run(document_stream * ds, parser * stage1, size_t next_batch_start);
+  /** Wait for the run to finish (blocking). You should first call 'run', then 'finish'. **/
+  void finish();
+
+private:
+
+  /**
+   * Normally, we would never stop the thread. But we do in the destructor.
+   * This function is only safe assuming that you are not waiting for results. You
+   * should have called run, then finish, and be done.
+   **/
+  void stop_thread();
+
+  std::thread thread{};
+  /** These three variables define the work done by the thread. **/
+  ondemand::parser * stage1_thread_parser{};
+  size_t _next_batch_start{};
+  document_stream * owner{};
+  /**
+   * We have two state variables. This could be streamlined to one variable in the future but
+   * we use two for clarity.
+   */
+  bool has_work{false};
+  bool can_work{true};
+
+  /**
+   * We lock using a mutex.
+   */
+  std::mutex locking_mutex{};
+  std::condition_variable cond_var{};
+
+  friend class document_stream;
+};
+#endif  // SIMDJSON_THREADS_ENABLED
+
+/**
+ * A forward-only stream of documents.
+ *
+ * Produced by parser::iterate_many.
+ *
+ */
+class document_stream {
+public:
+  /**
+   * Construct an uninitialized document_stream.
+   *
+   *  ```c++
+   *  document_stream docs;
+   *  auto error = parser.iterate_many(json).get(docs);
+   *  ```
+   */
+  simdjson_inline document_stream() noexcept;
+  /** Move one document_stream to another. */
+  simdjson_inline document_stream(document_stream &&other) noexcept = default;
+  /** Move one document_stream to another. */
+  simdjson_inline document_stream &operator=(document_stream &&other) noexcept = default;
+
+  simdjson_inline ~document_stream() noexcept;
+
+  /**
+   * Returns the input size in bytes.
+   */
+  inline size_t size_in_bytes() const noexcept;
+
+  /**
+   * After iterating through the stream, this method
+   * returns the number of bytes that were not parsed at the end
+   * of the stream. If truncated_bytes() differs from zero,
+   * then the input was truncated maybe because incomplete JSON
+   * documents were found at the end of the stream. You
+   * may need to process the bytes in the interval [size_in_bytes()-truncated_bytes(), size_in_bytes()).
+   *
+   * You should only call truncated_bytes() after streaming through all
+   * documents, like so:
+   *
+   *   document_stream stream = parser.iterate_many(json,window);
+   *   for(auto & doc : stream) {
+   *      // do something with doc
+   *   }
+   *   size_t truncated = stream.truncated_bytes();
+   *
+   */
+  inline size_t truncated_bytes() const noexcept;
+
+  class iterator {
+  public:
+    using value_type = simdjson_result<document>;
+    using reference  = value_type;
+
+    using difference_type   = std::ptrdiff_t;
+
+    using iterator_category = std::input_iterator_tag;
+
+    /**
+     * Default constructor.
+     */
+    simdjson_inline iterator() noexcept;
+    /**
+     * Get the current document (or error).
+     */
+    simdjson_inline simdjson_result<ondemand::document_reference> operator*() noexcept;
+    /**
+     * Advance to the next document (prefix).
+     */
+    inline iterator& operator++() noexcept;
+    /**
+     * Check if we're at the end yet.
+     * @param other the end iterator to compare to.
+     */
+    simdjson_inline bool operator!=(const iterator &other) const noexcept;
+    /**
+     * @private
+     *
+     * Gives the current index in the input document in bytes.
+     *
+     *   document_stream stream = parser.parse_many(json,window);
+     *   for(auto i = stream.begin(); i != stream.end(); ++i) {
+     *      auto doc = *i;
+     *      size_t index = i.current_index();
+     *   }
+     *
+     * This function (current_index()) is experimental and the usage
+     * may change in future versions of simdjson: we find the API somewhat
+     * awkward and we would like to offer something friendlier.
+     */
+     simdjson_inline size_t current_index() const noexcept;
+
+     /**
+     * @private
+     *
+     * Gives a view of the current document at the current position.
+     *
+     *   document_stream stream = parser.iterate_many(json,window);
+     *   for(auto i = stream.begin(); i != stream.end(); ++i) {
+     *      std::string_view v = i.source();
+     *   }
+     *
+     * The returned string_view instance is simply a map to the (unparsed)
+     * source string: it may thus include white-space characters and all manner
+     * of padding.
+     *
+     * This function (source()) is experimental and the usage
+     * may change in future versions of simdjson: we find the API somewhat
+     * awkward and we would like to offer something friendlier.
+     *
+     */
+     simdjson_inline std::string_view source() const noexcept;
+
+    /**
+     * Returns error of the stream (if any).
+     */
+     inline error_code error() const noexcept;
+
+  private:
+    simdjson_inline iterator(document_stream *s, bool finished) noexcept;
+    /** The document_stream we're iterating through. */
+    document_stream* stream;
+    /** Whether we're finished or not. */
+    bool finished;
+
+    friend class document;
+    friend class document_stream;
+    friend class json_iterator;
+  };
+
+  /**
+   * Start iterating the documents in the stream.
+   */
+  simdjson_inline iterator begin() noexcept;
+  /**
+   * The end of the stream, for iterator comparison purposes.
+   */
+  simdjson_inline iterator end() noexcept;
+
+private:
+
+  document_stream &operator=(const document_stream &) = delete; // Disallow copying
+  document_stream(const document_stream &other) = delete; // Disallow copying
+
+  /**
+   * Construct a document_stream. Does not allocate or parse anything until the iterator is
+   * used.
+   *
+   * @param parser is a reference to the parser instance used to generate this document_stream
+   * @param buf is the raw byte buffer we need to process
+   * @param len is the length of the raw byte buffer in bytes
+   * @param batch_size is the size of the windows (must be strictly greater or equal to the largest JSON document)
+   */
+  simdjson_inline document_stream(
+    ondemand::parser &parser,
+    const uint8_t *buf,
+    size_t len,
+    size_t batch_size
+  ) noexcept;
+
+  /**
+   * Parse the first document in the buffer. Used by begin(), to handle allocation and
+   * initialization.
+   */
+  inline void start() noexcept;
+
+  /**
+   * Parse the next document found in the buffer previously given to document_stream.
+   *
+   * The content should be a valid JSON document encoded as UTF-8. If there is a
+   * UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are
+   * discouraged.
+   *
+   * You do NOT need to pre-allocate a parser.  This function takes care of
+   * pre-allocating a capacity defined by the batch_size defined when creating the
+   * document_stream object.
+   *
+   * The function returns simdjson::EMPTY if there is no more data to be parsed.
+   *
+   * The function returns simdjson::SUCCESS (as integer = 0) in case of success
+   * and indicates that the buffer has successfully been parsed to the end.
+   * Every document it contained has been parsed without error.
+   *
+   * The function returns an error code from simdjson/simdjson.h in case of failure
+   * such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth;
+   * the simdjson::error_message function converts these error codes into a string).
+   *
+   * You can also check validity by calling parser.is_valid(). The same parser can
+   * and should be reused for the other documents in the buffer.
+   */
+  inline void next() noexcept;
+
+  /** Move the json_iterator of the document to the location of the next document in the stream. */
+  inline void next_document() noexcept;
+
+  /** Get the next document index. */
+  inline size_t next_batch_start() const noexcept;
+
+  /** Pass the next batch through stage 1 with the given parser. */
+  inline error_code run_stage1(ondemand::parser &p, size_t batch_start) noexcept;
+
+  // Fields
+  ondemand::parser *parser;
+  const uint8_t *buf;
+  size_t len;
+  size_t batch_size;
+  /**
+   * We are going to use just one document instance. The document owns
+   * the json_iterator. It implies that we only ever pass a reference
+   * to the document to the users.
+   */
+  document doc{};
+  /** The error (or lack thereof) from the current document. */
+  error_code error;
+  size_t batch_start{0};
+  size_t doc_index{};
+
+  #ifdef SIMDJSON_THREADS_ENABLED
+  /** Indicates whether we use threads. Note that this needs to be a constant during the execution of the parsing. */
+  bool use_thread;
+
+  inline void load_from_stage1_thread() noexcept;
+
+  /** Start a thread to run stage 1 on the next batch. */
+  inline void start_stage1_thread() noexcept;
+
+  /** Wait for the stage 1 thread to finish and capture the results. */
+  inline void finish_stage1_thread() noexcept;
+
+  /** The error returned from the stage 1 thread. */
+  error_code stage1_thread_error{UNINITIALIZED};
+  /** The thread used to run stage 1 against the next batch in the background. */
+  std::unique_ptr<stage1_worker> worker{new(std::nothrow) stage1_worker()};
+  /**
+   * The parser used to run stage 1 in the background. Will be swapped
+   * with the regular parser when finished.
+   */
+  ondemand::parser stage1_thread_parser{};
+
+  friend struct stage1_worker;
+  #endif // SIMDJSON_THREADS_ENABLED
+
+  friend class parser;
+  friend class document;
+  friend class json_iterator;
+  friend struct simdjson_result<ondemand::document_stream>;
+  friend struct internal::simdjson_result_base<ondemand::document_stream>;
+};  // document_stream
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+template<>
+struct simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_stream> : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_stream> {
+public:
+  simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_stream &&value) noexcept; ///< @private
+  simdjson_inline simdjson_result(error_code error) noexcept; ///< @private
+  simdjson_inline simdjson_result() noexcept = default;
+};
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/document_stream.h */
+/* begin file include/simdjson/generic/ondemand/serialization.h */
+
+namespace simdjson {
+/**
+ * Create a string-view instance out of a document instance. The string-view instance
+ * contains JSON text that is suitable to be parsed as JSON again. It does not
+ * validate the content.
+ */
+inline simdjson_result<std::string_view> to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document& x) noexcept;
+/**
+ * Create a string-view instance out of a value instance. The string-view instance
+ * contains JSON text that is suitable to be parsed as JSON again. The value must
+ * not have been accessed previously. It does not
+ * validate the content.
+ */
+inline simdjson_result<std::string_view> to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value& x) noexcept;
+/**
+ * Create a string-view instance out of an object instance. The string-view instance
+ * contains JSON text that is suitable to be parsed as JSON again. It does not
+ * validate the content.
+ */
+inline simdjson_result<std::string_view> to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object& x) noexcept;
+/**
+ * Create a string-view instance out of an array instance. The string-view instance
+ * contains JSON text that is suitable to be parsed as JSON again. It does not
+ * validate the content.
+ */
+inline simdjson_result<std::string_view> to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array& x) noexcept;
+inline simdjson_result<std::string_view> to_json_string(simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document> x);
+inline simdjson_result<std::string_view> to_json_string(simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> x);
+inline simdjson_result<std::string_view> to_json_string(simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object> x);
+inline simdjson_result<std::string_view> to_json_string(simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array> x);
+} // namespace simdjson
+
+/**
+ * We want to support argument-dependent lookup (ADL).
+ * Hence we should define operator<< in the namespace
+ * where the argument (here value, object, etc.) resides.
+ * Credit: @madhur4127
+ * See https://github.com/simdjson/simdjson/issues/1768
+ */
+namespace simdjson { namespace SIMDJSON_BUILTIN_IMPLEMENTATION { namespace ondemand {
+
+/**
+ * Print JSON to an output stream.  It does not
+ * validate the content.
+ *
+ * @param out The output stream.
+ * @param x The element.
+ * @throw if there is an error with the underlying output stream. simdjson itself will not throw.
+ */
+inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value x);
+#if SIMDJSON_EXCEPTIONS
+inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> x);
+#endif
+/**
+ * Print JSON to an output stream. It does not
+ * validate the content.
+ *
+ * @param out The output stream.
+ * @param value The array.
+ * @throw if there is an error with the underlying output stream. simdjson itself will not throw.
+ */
+inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array value);
+#if SIMDJSON_EXCEPTIONS
+inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array> x);
+#endif
+/**
+ * Print JSON to an output stream. It does not
+ * validate the content.
+ *
+ * @param out The output stream.
+ * @param value The array.
+ * @throw if there is an error with the underlying output stream. simdjson itself will not throw.
+ */
+inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document& value);
+#if SIMDJSON_EXCEPTIONS
+inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>&& x);
+#endif
+inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference& value);
+#if SIMDJSON_EXCEPTIONS
+inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>&& x);
+#endif
+/**
+ * Print JSON to an output stream. It does not
+ * validate the content.
+ *
+ * @param out The output stream.
+ * @param value The object.
+ * @throw if there is an error with the underlying output stream. simdjson itself will not throw.
+ */
+inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object value);
+#if SIMDJSON_EXCEPTIONS
+inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object> x);
+#endif
+}}} // namespace simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand
+/* end file include/simdjson/generic/ondemand/serialization.h */
+/* end file include/simdjson/generic/ondemand.h */
+
+// Inline definitions
+/* begin file include/simdjson/generic/implementation_simdjson_result_base-inl.h */
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+
+//
+// internal::implementation_simdjson_result_base<T> inline implementation
+//
+
+template<typename T>
+simdjson_inline void implementation_simdjson_result_base<T>::tie(T &value, error_code &error) && noexcept {
+  error = this->second;
+  if (!error) {
+    value = std::forward<implementation_simdjson_result_base<T>>(*this).first;
+  }
+}
+
+template<typename T>
+simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base<T>::get(T &value) && noexcept {
+  error_code error;
+  std::forward<implementation_simdjson_result_base<T>>(*this).tie(value, error);
+  return error;
+}
+
+template<typename T>
+simdjson_inline error_code implementation_simdjson_result_base<T>::error() const noexcept {
+  return this->second;
+}
+
+#if SIMDJSON_EXCEPTIONS
+
+template<typename T>
+simdjson_inline T& implementation_simdjson_result_base<T>::value() & noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return this->first;
+}
+
+template<typename T>
+simdjson_inline T&& implementation_simdjson_result_base<T>::value() && noexcept(false) {
+  return std::forward<implementation_simdjson_result_base<T>>(*this).take_value();
+}
+
+template<typename T>
+simdjson_inline T&& implementation_simdjson_result_base<T>::take_value() && noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return std::forward<T>(this->first);
+}
+
+template<typename T>
+simdjson_inline implementation_simdjson_result_base<T>::operator T&&() && noexcept(false) {
+  return std::forward<implementation_simdjson_result_base<T>>(*this).take_value();
+}
+
+#endif // SIMDJSON_EXCEPTIONS
+
+template<typename T>
+simdjson_inline const T& implementation_simdjson_result_base<T>::value_unsafe() const& noexcept {
+  return this->first;
+}
+
+template<typename T>
+simdjson_inline T& implementation_simdjson_result_base<T>::value_unsafe() & noexcept {
+  return this->first;
+}
+
+template<typename T>
+simdjson_inline T&& implementation_simdjson_result_base<T>::value_unsafe() && noexcept {
+  return std::forward<T>(this->first);
+}
+
+template<typename T>
+simdjson_inline implementation_simdjson_result_base<T>::implementation_simdjson_result_base(T &&value, error_code error) noexcept
+    : first{std::forward<T>(value)}, second{error} {}
+template<typename T>
+simdjson_inline implementation_simdjson_result_base<T>::implementation_simdjson_result_base(error_code error) noexcept
+    : implementation_simdjson_result_base(T{}, error) {}
+template<typename T>
+simdjson_inline implementation_simdjson_result_base<T>::implementation_simdjson_result_base(T &&value) noexcept
+    : implementation_simdjson_result_base(std::forward<T>(value), SUCCESS) {}
+
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+/* end file include/simdjson/generic/implementation_simdjson_result_base-inl.h */
+/* begin file include/simdjson/generic/ondemand-inl.h */
+/* begin file include/simdjson/generic/ondemand/json_type-inl.h */
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept {
+    switch (type) {
+        case json_type::array: out << "array"; break;
+        case json_type::object: out << "object"; break;
+        case json_type::number: out << "number"; break;
+        case json_type::string: out << "string"; break;
+        case json_type::boolean: out << "boolean"; break;
+        case json_type::null: out << "null"; break;
+        default: SIMDJSON_UNREACHABLE();
+    }
+    return out;
+}
+
+inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept {
+    switch (type) {
+        case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break;
+        case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break;
+        case number_type::floating_point_number: out << "floating-point number (binary64)"; break;
+        default: SIMDJSON_UNREACHABLE();
+    }
+    return out;
+}
+#if SIMDJSON_EXCEPTIONS
+inline std::ostream& operator<<(std::ostream& out, simdjson_result<json_type> &type) noexcept(false) {
+    return out << type.value();
+}
+#endif
+
+
+
+simdjson_inline number_type number::get_number_type() const noexcept {
+  return type;
+}
+
+simdjson_inline bool number::is_uint64() const noexcept {
+  return get_number_type() == number_type::unsigned_integer;
+}
+
+simdjson_inline uint64_t number::get_uint64() const noexcept {
+  return payload.unsigned_integer;
+}
+
+simdjson_inline number::operator uint64_t() const noexcept {
+  return get_uint64();
+}
+
+
+simdjson_inline bool number::is_int64() const noexcept {
+  return get_number_type() == number_type::signed_integer;
+}
+
+simdjson_inline int64_t number::get_int64() const noexcept {
+  return payload.signed_integer;
+}
+
+simdjson_inline number::operator int64_t() const noexcept {
+  return get_int64();
+}
+
+simdjson_inline bool number::is_double() const noexcept {
+    return get_number_type() == number_type::floating_point_number;
+}
+
+simdjson_inline double number::get_double() const noexcept {
+  return payload.floating_point_number;
+}
+
+simdjson_inline number::operator double() const noexcept {
+  return get_double();
+}
+
+simdjson_inline double number::as_double() const noexcept {
+  if(is_double()) {
+    return payload.floating_point_number;
+  }
+  if(is_int64()) {
+    return double(payload.signed_integer);
+  }
+  return double(payload.unsigned_integer);
+}
+
+simdjson_inline void number::append_s64(int64_t value) noexcept {
+  payload.signed_integer = value;
+  type = number_type::signed_integer;
+}
+
+simdjson_inline void number::append_u64(uint64_t value) noexcept {
+  payload.unsigned_integer = value;
+  type = number_type::unsigned_integer;
+}
+
+simdjson_inline void number::append_double(double value) noexcept {
+  payload.floating_point_number = value;
+  type = number_type::floating_point_number;
+}
+
+simdjson_inline void number::skip_double() noexcept {
+  type = number_type::floating_point_number;
+}
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type>::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type &&value) noexcept
+    : implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type>(std::forward<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type>(value)) {}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type>::simdjson_result(error_code error) noexcept
+    : implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type>(error) {}
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/json_type-inl.h */
+/* begin file include/simdjson/generic/ondemand/logger-inl.h */
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+namespace logger {
+
+static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------";
+static constexpr const int LOG_EVENT_LEN = 20;
+static constexpr const int LOG_BUFFER_LEN = 30;
+static constexpr const int LOG_SMALL_BUFFER_LEN = 10;
+static int log_depth = 0; // Not threadsafe. Log only.
+
+// Helper to turn unprintable or newline characters into spaces
+static inline char printable_char(char c) {
+  if (c >= 0x20) {
+    return c;
+  } else {
+    return ' ';
+  }
+}
+
+inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept {
+  log_line(iter, "", type, detail, delta, depth_delta);
+}
+
+inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept {
+  log_line(iter, index, depth, "", type, detail);
+}
+inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept {
+  log_line(iter, "", type, detail, delta, depth_delta);
+}
+
+inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept {
+  log_line(iter, index, depth, "+", type, detail);
+  if (LOG_ENABLED) { log_depth++; }
+}
+inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept {
+  log_line(iter, "+", type, "", delta, depth_delta);
+  if (LOG_ENABLED) { log_depth++; }
+}
+
+inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept {
+  if (LOG_ENABLED) { log_depth--; }
+  log_line(iter, "-", type, "", delta, depth_delta);
+}
+
+inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept {
+  log_line(iter, "ERROR: ", error, detail, delta, depth_delta);
+}
+inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept {
+  log_line(iter, index, depth, "ERROR: ", error, detail);
+}
+
+inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept {
+  log_event(iter.json_iter(), type, detail, delta, depth_delta);
+}
+
+inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept {
+  log_value(iter.json_iter(), type, detail, delta, depth_delta);
+}
+
+inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept {
+  log_start_value(iter.json_iter(), type, delta, depth_delta);
+}
+
+inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept {
+  log_end_value(iter.json_iter(), type, delta, depth_delta);
+}
+
+inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept {
+  log_error(iter.json_iter(), error, detail, delta, depth_delta);
+}
+
+inline void log_headers() noexcept {
+  if (LOG_ENABLED) {
+    // Technically a static variable is not thread-safe, but if you are using threads
+    // and logging... well...
+    static bool displayed_hint{false};
+    log_depth = 0;
+    printf("\n");
+    if(!displayed_hint) {
+      // We only print this helpful header once.
+      printf("# Logging provides the depth and position of the iterator user-visible steps:\n");
+      printf("# +array says 'this is where we were when we discovered the start array'\n");
+      printf("# -array says 'this is where we were when we ended the array'\n");
+      printf("# skip says 'this is a structural or value I am skipping'\n");
+      printf("# +/-skip says 'this is a start/end array or object I am skipping'\n");
+      printf("#\n");
+      printf("# The indentation of the terms (array, string,...) indicates the depth,\n");
+      printf("# in addition to the depth being displayed.\n");
+      printf("#\n");
+      printf("# Every token in the document has a single depth determined by the tokens before it,\n");
+      printf("# and is not affected by what the token actually is.\n");
+      printf("#\n");
+      printf("# Not all structural elements are presented as tokens in the logs.\n");
+      printf("#\n");
+      printf("# We never give control to the user within an empty array or an empty object.\n");
+      printf("#\n");
+      printf("# Inside an array, having a depth greater than the array's depth means that\n");
+      printf("# we are pointing inside a value.\n");
+      printf("# Having a depth equal to the array means that we are pointing right before a value.\n");
+      printf("# Having a depth smaller than the array means that we have moved beyond the array.\n");
+      displayed_hint = true;
+    }
+    printf("\n");
+    printf("| %-*s ", LOG_EVENT_LEN,        "Event");
+    printf("| %-*s ", LOG_BUFFER_LEN,       "Buffer");
+    printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next");
+    // printf("| %-*s ", 5,                    "Next#");
+    printf("| %-*s ", 5,                    "Depth");
+    printf("| Detail ");
+    printf("|\n");
+
+    printf("|%.*s", LOG_EVENT_LEN+2, DASHES);
+    printf("|%.*s", LOG_BUFFER_LEN+2, DASHES);
+    printf("|%.*s", LOG_SMALL_BUFFER_LEN+2, DASHES);
+    // printf("|%.*s", 5+2, DASHES);
+    printf("|%.*s", 5+2, DASHES);
+    printf("|--------");
+    printf("|\n");
+    fflush(stdout);
+  }
+}
+
+inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept {
+  log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail);
+}
+inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail) noexcept {
+  if (LOG_ENABLED) {
+    const int indent = depth*2;
+    const auto buf = iter.token.buf;
+    printf("| %*s%s%-*s ",
+      indent, "",
+      title_prefix,
+      LOG_EVENT_LEN - indent - int(strlen(title_prefix)), title
+      );
+    {
+      // Print the current structural.
+      printf("| ");
+      auto current_structural = &buf[*index];
+      for (int i=0;i<LOG_BUFFER_LEN;i++) {
+        printf("%c", printable_char(current_structural[i]));
+      }
+      printf(" ");
+    }
+    {
+      // Print the next structural.
+      printf("| ");
+      auto next_structural = &buf[*(index+1)];
+      for (int i=0;i<LOG_SMALL_BUFFER_LEN;i++) {
+        printf("%c", printable_char(next_structural[i]));
+      }
+      printf(" ");
+    }
+    // printf("| %5u ", *(index+1));
+    printf("| %5i ", depth);
+    printf("| %.*s ", int(detail.size()), detail.data());
+    printf("|\n");
+    fflush(stdout);
+  }
+}
+
+} // namespace logger
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/logger-inl.h */
+/* begin file include/simdjson/generic/ondemand/raw_json_string-inl.h */
+namespace simdjson {
+
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {}
+
+simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast<const char *>(buf); }
+
+
+simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept {
+  size_t pos{0};
+  // if the content has no escape character, just scan through it quickly!
+  for(;pos < target.size() && target[pos] != '\\';pos++) {}
+  // slow path may begin.
+  bool escaping{false};
+  for(;pos < target.size();pos++) {
+    if((target[pos] == '"') && !escaping) {
+      return false;
+    } else if(target[pos] == '\\') {
+      escaping = !escaping;
+    } else {
+      escaping = false;
+    }
+  }
+  return true;
+}
+
+simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept {
+  size_t pos{0};
+  // if the content has no escape character, just scan through it quickly!
+  for(;target[pos] && target[pos] != '\\';pos++) {}
+  // slow path may begin.
+  bool escaping{false};
+  for(;target[pos];pos++) {
+    if((target[pos] == '"') && !escaping) {
+      return false;
+    } else if(target[pos] == '\\') {
+      escaping = !escaping;
+    } else {
+      escaping = false;
+    }
+  }
+  return true;
+}
+
+
+simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept {
+  // If we are going to call memcmp, then we must know something about the length of the raw_json_string.
+  return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size());
+}
+
+simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept {
+  // Assumptions: does not contain unescaped quote characters, and
+  // the raw content is quote terminated within a valid JSON string.
+  if(target.size() <= SIMDJSON_PADDING) {
+    return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size());
+  }
+  const char * r{raw()};
+  size_t pos{0};
+  for(;pos < target.size();pos++) {
+    if(r[pos] != target[pos]) { return false; }
+  }
+  if(r[pos] != '"') { return false; }
+  return true;
+}
+
+simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept {
+  const char * r{raw()};
+  size_t pos{0};
+  bool escaping{false};
+  for(;pos < target.size();pos++) {
+    if(r[pos] != target[pos]) { return false; }
+    // if target is a compile-time constant and it is free from
+    // quotes, then the next part could get optimized away through
+    // inlining.
+    if((target[pos] == '"') && !escaping) {
+      // We have reached the end of the raw_json_string but
+      // the target is not done.
+      return false;
+    } else if(target[pos] == '\\') {
+      escaping = !escaping;
+    } else {
+      escaping = false;
+    }
+  }
+  if(r[pos] != '"') { return false; }
+  return true;
+}
+
+
+simdjson_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept {
+  // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and
+  // the raw content is quote terminated within a valid JSON string.
+  const char * r{raw()};
+  size_t pos{0};
+  for(;target[pos];pos++) {
+    if(r[pos] != target[pos]) { return false; }
+  }
+  if(r[pos] != '"') { return false; }
+  return true;
+}
+
+simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept {
+  // Assumptions: does not contain unescaped quote characters, and
+  // the raw content is quote terminated within a valid JSON string.
+  const char * r{raw()};
+  size_t pos{0};
+  bool escaping{false};
+  for(;target[pos];pos++) {
+    if(r[pos] != target[pos]) { return false; }
+    // if target is a compile-time constant and it is free from
+    // quotes, then the next part could get optimized away through
+    // inlining.
+    if((target[pos] == '"') && !escaping) {
+      // We have reached the end of the raw_json_string but
+      // the target is not done.
+      return false;
+    } else if(target[pos] == '\\') {
+      escaping = !escaping;
+    } else {
+      escaping = false;
+    }
+  }
+  if(r[pos] != '"') { return false; }
+  return true;
+}
+
+simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept {
+  return a.unsafe_is_equal(c);
+}
+
+simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept {
+  return a == c;
+}
+
+simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept {
+  return !(a == c);
+}
+
+simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept {
+  return !(a == c);
+}
+
+
+simdjson_inline simdjson_warn_unused simdjson_result<std::string_view> raw_json_string::unescape(json_iterator &iter) const noexcept {
+  return iter.unescape(*this);
+}
+
+
+simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept {
+  bool in_escape = false;
+  const char *s = str.raw();
+  while (true) {
+    switch (*s) {
+      case '\\': in_escape = !in_escape; break;
+      case '"': if (in_escape) { in_escape = false; } else { return out; } break;
+      default: if (in_escape) { in_escape = false; }
+    }
+    out << *s;
+    s++;
+  }
+}
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string>::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string &&value) noexcept
+    : implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string>(std::forward<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string>(value)) {}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string>::simdjson_result(error_code error) noexcept
+    : implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string>(error) {}
+
+simdjson_inline simdjson_result<const char *> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string>::raw() const noexcept {
+  if (error()) { return error(); }
+  return first.raw();
+}
+simdjson_inline simdjson_warn_unused simdjson_result<std::string_view> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string>::unescape(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator &iter) const noexcept {
+  if (error()) { return error(); }
+  return first.unescape(iter);
+}
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/raw_json_string-inl.h */
+/* begin file include/simdjson/generic/ondemand/token_iterator-inl.h */
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+simdjson_inline token_iterator::token_iterator(
+  const uint8_t *_buf,
+  token_position position
+) noexcept : buf{_buf}, _position{position}
+{
+}
+
+simdjson_inline uint32_t token_iterator::current_offset() const noexcept {
+  return *(_position);
+}
+
+
+simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept {
+  return &buf[*(_position++)];
+}
+
+simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept {
+  return &buf[*position];
+}
+simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept {
+  return *position;
+}
+simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept {
+  return *(position+1) - *position;
+}
+
+simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept {
+  return &buf[*(_position+delta)];
+}
+simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept {
+  return *(_position+delta);
+}
+simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept {
+  return *(_position+delta+1) - *(_position+delta);
+}
+
+simdjson_inline token_position token_iterator::position() const noexcept {
+  return _position;
+}
+simdjson_inline void token_iterator::set_position(token_position target_position) noexcept {
+  _position = target_position;
+}
+
+simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept {
+  return _position == other._position;
+}
+simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept {
+  return _position != other._position;
+}
+simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept {
+  return _position > other._position;
+}
+simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept {
+  return _position >= other._position;
+}
+simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept {
+  return _position < other._position;
+}
+simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept {
+  return _position <= other._position;
+}
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::token_iterator>::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::token_iterator &&value) noexcept
+    : implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::token_iterator>(std::forward<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::token_iterator>(value)) {}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::token_iterator>::simdjson_result(error_code error) noexcept
+    : implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::token_iterator>(error) {}
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/token_iterator-inl.h */
+/* begin file include/simdjson/generic/ondemand/json_iterator-inl.h */
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept
+  : token(std::forward<token_iterator>(other.token)),
+    parser{other.parser},
+    _string_buf_loc{other._string_buf_loc},
+    error{other.error},
+    _depth{other._depth},
+    _root{other._root},
+    _streaming{other._streaming}
+{
+  other.parser = nullptr;
+}
+simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept {
+  token = other.token;
+  parser = other.parser;
+  _string_buf_loc = other._string_buf_loc;
+  error = other.error;
+  _depth = other._depth;
+  _root = other._root;
+  _streaming = other._streaming;
+  other.parser = nullptr;
+  return *this;
+}
+
+simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept
+  : token(buf, &_parser->implementation->structural_indexes[0]),
+    parser{_parser},
+    _string_buf_loc{parser->string_buf.get()},
+    _depth{1},
+    _root{parser->implementation->structural_indexes.get()},
+    _streaming{false}
+
+{
+  logger::log_headers();
+#if SIMDJSON_CHECK_EOF
+  assert_more_tokens();
+#endif
+}
+
+inline void json_iterator::rewind() noexcept {
+  token.set_position( root_position() );
+  logger::log_headers(); // We start again
+  _string_buf_loc = parser->string_buf.get();
+  _depth = 1;
+}
+
+inline bool json_iterator::balanced() const noexcept {
+  token_iterator ti(token);
+  int32_t count{0};
+  ti.set_position( root_position() );
+  while(ti.peek() <= peek_last()) {
+    switch (*ti.return_current_and_advance())
+    {
+    case '[': case '{':
+      count++;
+      break;
+    case ']': case '}':
+      count--;
+      break;
+    default:
+      break;
+    }
+  }
+  return count == 0;
+}
+
+
+// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller
+// relating depth and parent_depth, which is a desired effect. The warning does not show up if the
+// skip_child() function is not marked inline).
+SIMDJSON_PUSH_DISABLE_WARNINGS
+SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING
+simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept {
+  if (depth() <= parent_depth) { return SUCCESS; }
+  switch (*return_current_and_advance()) {
+    // TODO consider whether matching braces is a requirement: if non-matching braces indicates
+    // *missing* braces, then future lookups are not in the object/arrays they think they are,
+    // violating the rule "validate enough structure that the user can be confident they are
+    // looking at the right values."
+    // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth
+
+    // For the first open array/object in a value, we've already incremented depth, so keep it the same
+    // We never stop at colon, but if we did, it wouldn't affect depth
+    case '[': case '{': case ':':
+      logger::log_start_value(*this, "skip");
+      break;
+    // If there is a comma, we have just finished a value in an array/object, and need to get back in
+    case ',':
+      logger::log_value(*this, "skip");
+      break;
+    // ] or } means we just finished a value and need to jump out of the array/object
+    case ']': case '}':
+      logger::log_end_value(*this, "skip");
+      _depth--;
+      if (depth() <= parent_depth) { return SUCCESS; }
+#if SIMDJSON_CHECK_EOF
+      // If there are no more tokens, the parent is incomplete.
+      if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); }
+#endif // SIMDJSON_CHECK_EOF
+      break;
+    case '"':
+      if(*peek() == ':') {
+        // We are at a key!!!
+        // This might happen if you just started an object and you skip it immediately.
+        // Performance note: it would be nice to get rid of this check as it is somewhat
+        // expensive.
+        // https://github.com/simdjson/simdjson/issues/1742
+        logger::log_value(*this, "key");
+        return_current_and_advance(); // eat up the ':'
+        break; // important!!!
+      }
+      simdjson_fallthrough;
+    // Anything else must be a scalar value
+    default:
+      // For the first scalar, we will have incremented depth already, so we decrement it here.
+      logger::log_value(*this, "skip");
+      _depth--;
+      if (depth() <= parent_depth) { return SUCCESS; }
+      break;
+  }
+
+  // Now that we've considered the first value, we only increment/decrement for arrays/objects
+  while (position() < end_position()) {
+    switch (*return_current_and_advance()) {
+      case '[': case '{':
+        logger::log_start_value(*this, "skip");
+        _depth++;
+        break;
+      // TODO consider whether matching braces is a requirement: if non-matching braces indicates
+      // *missing* braces, then future lookups are not in the object/arrays they think they are,
+      // violating the rule "validate enough structure that the user can be confident they are
+      // looking at the right values."
+      // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth
+      case ']': case '}':
+        logger::log_end_value(*this, "skip");
+        _depth--;
+        if (depth() <= parent_depth) { return SUCCESS; }
+        break;
+      default:
+        logger::log_value(*this, "skip", "");
+        break;
+    }
+  }
+
+  return report_error(TAPE_ERROR, "not enough close braces");
+}
+
+SIMDJSON_POP_DISABLE_WARNINGS
+
+simdjson_inline bool json_iterator::at_root() const noexcept {
+  return position() == root_position();
+}
+
+simdjson_inline bool json_iterator::is_single_token() const noexcept {
+  return parser->implementation->n_structural_indexes == 1;
+}
+
+simdjson_inline bool json_iterator::streaming() const noexcept {
+  return _streaming;
+}
+
+simdjson_inline token_position json_iterator::root_position() const noexcept {
+  return _root;
+}
+
+simdjson_inline void json_iterator::assert_at_document_depth() const noexcept {
+  SIMDJSON_ASSUME( _depth == 1 );
+}
+
+simdjson_inline void json_iterator::assert_at_root() const noexcept {
+  SIMDJSON_ASSUME( _depth == 1 );
+#ifndef SIMDJSON_CLANG_VISUAL_STUDIO
+  // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument
+  // has side effects that will be discarded.
+  SIMDJSON_ASSUME( token.position() == _root );
+#endif
+}
+
+simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept {
+  assert_valid_position(token._position + required_tokens - 1);
+}
+
+simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept {
+#ifndef SIMDJSON_CLANG_VISUAL_STUDIO
+  SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] );
+  SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] );
+#endif
+}
+
+simdjson_inline bool json_iterator::at_end() const noexcept {
+  return position() == end_position();
+}
+simdjson_inline token_position json_iterator::end_position() const noexcept {
+  uint32_t n_structural_indexes{parser->implementation->n_structural_indexes};
+  return &parser->implementation->structural_indexes[n_structural_indexes];
+}
+
+inline std::string json_iterator::to_string() const noexcept {
+  if( !is_alive() ) { return "dead json_iterator instance"; }
+  const char * current_structural = reinterpret_cast<const char *>(token.peek());
+  return std::string("json_iterator [ depth : ") + std::to_string(_depth)
+          + std::string(", structural : '") + std::string(current_structural,1)
+          + std::string("', offset : ") + std::to_string(token.current_offset())
+          + std::string("', error : ") + error_message(error)
+          + std::string(" ]");
+}
+
+inline simdjson_result<const char *> json_iterator::current_location() noexcept {
+  if (!is_alive()) {    // Unrecoverable error
+    if (!at_root()) {
+      return reinterpret_cast<const char *>(token.peek(-1));
+    } else {
+      return reinterpret_cast<const char *>(token.peek());
+    }
+  }
+  if (at_end()) {
+    return OUT_OF_BOUNDS;
+  }
+  return reinterpret_cast<const char *>(token.peek());
+}
+
+simdjson_inline bool json_iterator::is_alive() const noexcept {
+  return parser;
+}
+
+simdjson_inline void json_iterator::abandon() noexcept {
+  parser = nullptr;
+  _depth = 0;
+}
+
+simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept {
+#if SIMDJSON_CHECK_EOF
+  assert_more_tokens();
+#endif // SIMDJSON_CHECK_EOF
+  return token.return_current_and_advance();
+}
+
+simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept {
+  // deliberately done without safety guard:
+  return token.peek(0);
+}
+
+simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept {
+#if SIMDJSON_CHECK_EOF
+  assert_more_tokens(delta+1);
+#endif // SIMDJSON_CHECK_EOF
+  return token.peek(delta);
+}
+
+simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept {
+#if SIMDJSON_CHECK_EOF
+  assert_more_tokens(delta+1);
+#endif // #if SIMDJSON_CHECK_EOF
+  return token.peek_length(delta);
+}
+
+simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept {
+  // todo: currently we require end-of-string buffering, but the following
+  // assert_valid_position should be turned on if/when we lift that condition.
+  // assert_valid_position(position);
+  // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF
+  // is ON by default, we have no choice but to disable it for real with a comment.
+  return token.peek(position);
+}
+
+simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept {
+#if SIMDJSON_CHECK_EOF
+  assert_valid_position(position);
+#endif // SIMDJSON_CHECK_EOF
+  return token.peek_length(position);
+}
+
+simdjson_inline token_position json_iterator::last_position() const noexcept {
+  // The following line fails under some compilers...
+  // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0);
+  // since it has side-effects.
+  uint32_t n_structural_indexes{parser->implementation->n_structural_indexes};
+  SIMDJSON_ASSUME(n_structural_indexes > 0);
+  return &parser->implementation->structural_indexes[n_structural_indexes - 1];
+}
+simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept {
+  return token.peek(last_position());
+}
+
+simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept {
+  SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1);
+  SIMDJSON_ASSUME(_depth == parent_depth + 1);
+  _depth = parent_depth;
+}
+
+simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept {
+  SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX);
+  SIMDJSON_ASSUME(_depth == child_depth - 1);
+  _depth = child_depth;
+}
+
+simdjson_inline depth_t json_iterator::depth() const noexcept {
+  return _depth;
+}
+
+simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept {
+  return _string_buf_loc;
+}
+
+simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept {
+  SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD);
+  logger::log_error(*this, message);
+  error = _error;
+  return error;
+}
+
+simdjson_inline token_position json_iterator::position() const noexcept {
+  return token.position();
+}
+
+simdjson_inline simdjson_result<std::string_view> json_iterator::unescape(raw_json_string in) noexcept {
+  return parser->unescape(in, _string_buf_loc);
+}
+
+simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept {
+  SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX);
+  SIMDJSON_ASSUME(_depth == child_depth - 1);
+#if SIMDJSON_DEVELOPMENT_CHECKS
+#ifndef SIMDJSON_CLANG_VISUAL_STUDIO
+  SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth());
+  SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]);
+#endif
+#endif
+  token.set_position(position);
+  _depth = child_depth;
+}
+
+#if SIMDJSON_DEVELOPMENT_CHECKS
+
+simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept {
+  SIMDJSON_ASSUME(size_t(depth) < parser->max_depth());
+  return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0;
+}
+
+simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept {
+  SIMDJSON_ASSUME(size_t(depth) < parser->max_depth());
+  if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; }
+}
+
+#endif
+
+
+simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept {
+  SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD);
+  logger::log_error(*this, message);
+  return _error;
+}
+
+template<int N>
+simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t (&tmpbuf)[N]) noexcept {
+  // Let us guard against silly cases:
+  if((N < max_len) || (N == 0)) { return false; }
+  // Truncate whitespace to fit the buffer.
+  if (max_len > N-1) {
+    // if (jsoncharutils::is_not_structural_or_whitespace(json[N-1])) { return false; }
+    max_len = N-1;
+  }
+
+  // Copy to the buffer.
+  std::memcpy(tmpbuf, json, max_len);
+  tmpbuf[max_len] = ' ';
+  return true;
+}
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator>::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator &&value) noexcept
+    : implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator>(std::forward<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator>(value)) {}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator>::simdjson_result(error_code error) noexcept
+    : implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator>(error) {}
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/json_iterator-inl.h */
+/* begin file include/simdjson/generic/ondemand/value_iterator-inl.h */
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+simdjson_inline value_iterator::value_iterator(
+  json_iterator *json_iter,
+  depth_t depth,
+  token_position start_position
+) noexcept : _json_iter{json_iter}, _depth{depth}, _start_position{start_position}
+{
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::start_object() noexcept {
+  SIMDJSON_TRY( start_container('{', "Not an object", "object") );
+  return started_object();
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::start_root_object() noexcept {
+  SIMDJSON_TRY( start_container('{', "Not an object", "object") );
+  return started_root_object();
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::started_object() noexcept {
+  assert_at_container_start();
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  _json_iter->set_start_position(_depth, start_position());
+#endif
+  if (*_json_iter->peek() == '}') {
+    logger::log_value(*_json_iter, "empty object");
+    _json_iter->return_current_and_advance();
+    end_container();
+    return false;
+  }
+  return true;
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::started_root_object() noexcept {
+  // When in streaming mode, we cannot expect peek_last() to be the last structural element of the
+  // current document. It only works in the normal mode where we have indexed a single document.
+  // Note that adding a check for 'streaming' is not expensive since we only have at most
+  // one root element.
+  if ( ! _json_iter->streaming() ) {
+    if (*_json_iter->peek_last() != '}') {
+      _json_iter->abandon();
+      return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end");
+    }
+    // If the last character is } *and* the first gibberish character is also '}'
+    // then on-demand could accidentally go over. So we need additional checks.
+    // https://github.com/simdjson/simdjson/issues/1834
+    // Checking that the document is balanced requires a full scan which is potentially
+    // expensive, but it only happens in edge cases where the first padding character is
+    // a closing bracket.
+    if ((*_json_iter->peek(_json_iter->end_position()) == '}') && (!_json_iter->balanced())) {
+      _json_iter->abandon();
+      // The exact error would require more work. It will typically be an unclosed object.
+      return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced");
+    }
+  }
+  return started_object();
+}
+
+simdjson_warn_unused simdjson_inline error_code value_iterator::end_container() noexcept {
+#if SIMDJSON_CHECK_EOF
+    if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); }
+    // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); }
+#endif // SIMDJSON_CHECK_EOF
+    _json_iter->ascend_to(depth()-1);
+    return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::has_next_field() noexcept {
+  assert_at_next();
+
+  // It's illegal to call this unless there are more tokens: anything that ends in } or ] is
+  // obligated to verify there are more tokens if they are not the top level.
+  switch (*_json_iter->return_current_and_advance()) {
+    case '}':
+      logger::log_end_value(*_json_iter, "object");
+      SIMDJSON_TRY( end_container() );
+      return false;
+    case ',':
+      return true;
+    default:
+      return report_error(TAPE_ERROR, "Missing comma between object fields");
+  }
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::find_field_raw(const std::string_view key) noexcept {
+  error_code error;
+  bool has_value;
+  //
+  // Initially, the object can be in one of a few different places:
+  //
+  // 1. The start of the object, at the first field:
+  //
+  //    ```
+  //    { "a": [ 1, 2 ], "b": [ 3, 4 ] }
+  //      ^ (depth 2, index 1)
+  //    ```
+  if (at_first_field()) {
+    has_value = true;
+
+  //
+  // 2. When a previous search did not yield a value or the object is empty:
+  //
+  //    ```
+  //    { "a": [ 1, 2 ], "b": [ 3, 4 ] }
+  //                                     ^ (depth 0)
+  //    { }
+  //        ^ (depth 0, index 2)
+  //    ```
+  //
+  } else if (!is_open()) {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+    // If we're past the end of the object, we're being iterated out of order.
+    // Note: this isn't perfect detection. It's possible the user is inside some other object; if so,
+    // this object iterator will blithely scan that object for fields.
+    if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; }
+#endif
+    return false;
+
+  // 3. When a previous search found a field or an iterator yielded a value:
+  //
+  //    ```
+  //    // When a field was not fully consumed (or not even touched at all)
+  //    { "a": [ 1, 2 ], "b": [ 3, 4 ] }
+  //           ^ (depth 2)
+  //    // When a field was fully consumed
+  //    { "a": [ 1, 2 ], "b": [ 3, 4 ] }
+  //                   ^ (depth 1)
+  //    // When the last field was fully consumed
+  //    { "a": [ 1, 2 ], "b": [ 3, 4 ] }
+  //                                   ^ (depth 1)
+  //    ```
+  //
+  } else {
+    if ((error = skip_child() )) { abandon(); return error; }
+    if ((error = has_next_field().get(has_value) )) { abandon(); return error; }
+#if SIMDJSON_DEVELOPMENT_CHECKS
+    if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; }
+#endif
+  }
+  while (has_value) {
+    // Get the key and colon, stopping at the value.
+    raw_json_string actual_key;
+    // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes
+    // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2.
+    // field_key() advances the pointer and checks that '"' is found (corresponding to a key).
+    // The depth is left unchanged by field_key().
+    if ((error = field_key().get(actual_key) )) { abandon(); return error; };
+    // field_value() will advance and check that we find a ':' separating the
+    // key and the value. It will also increment the depth by one.
+    if ((error = field_value() )) { abandon(); return error; }
+    // If it matches, stop and return
+    // We could do it this way if we wanted to allow arbitrary
+    // key content (including escaped quotes).
+    //if (actual_key.unsafe_is_equal(max_key_length, key)) {
+    // Instead we do the following which may trigger buffer overruns if the
+    // user provides an adversarial key (containing a well placed unescaped quote
+    // character and being longer than the number of bytes remaining in the JSON
+    // input).
+    if (actual_key.unsafe_is_equal(key)) {
+      logger::log_event(*this, "match", key, -2);
+      // If we return here, then we return while pointing at the ':' that we just checked.
+      return true;
+    }
+
+    // No match: skip the value and see if , or } is next
+    logger::log_event(*this, "no match", key, -2);
+    // The call to skip_child is meant to skip over the value corresponding to the key.
+    // After skip_child(), we are right before the next comma (',') or the final brace ('}').
+    SIMDJSON_TRY( skip_child() ); // Skip the value entirely
+    // The has_next_field() advances the pointer and check that either ',' or '}' is found.
+    // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found,
+    // then we are in error and we abort.
+    if ((error = has_next_field().get(has_value) )) { abandon(); return error; }
+  }
+
+  // If the loop ended, we're out of fields to look at.
+  return false;
+}
+
+SIMDJSON_PUSH_DISABLE_WARNINGS
+SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING
+simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::find_field_unordered_raw(const std::string_view key) noexcept {
+  /**
+   * When find_field_unordered_raw is called, we can either be pointing at the
+   * first key, pointing outside (at the closing brace) or if a key was matched
+   * we can be either pointing right afterthe ':' right before the value (that we need skip),
+   * or we may have consumed the value and we might be at a comma or at the
+   * final brace (ready for a call to has_next_field()).
+   */
+  error_code error;
+  bool has_value;
+
+  // First, we scan from that point to the end.
+  // If we don't find a match, we may loop back around, and scan from the beginning to that point.
+  token_position search_start = _json_iter->position();
+
+  // We want to know whether we need to go back to the beginning.
+  bool at_first = at_first_field();
+  ///////////////
+  // Initially, the object can be in one of a few different places:
+  //
+  // 1. At the first key:
+  //
+  //    ```
+  //    { "a": [ 1, 2 ], "b": [ 3, 4 ] }
+  //      ^ (depth 2, index 1)
+  //    ```
+  //
+  if (at_first) {
+    has_value = true;
+
+  // 2. When a previous search did not yield a value or the object is empty:
+  //
+  //    ```
+  //    { "a": [ 1, 2 ], "b": [ 3, 4 ] }
+  //                                     ^ (depth 0)
+  //    { }
+  //        ^ (depth 0, index 2)
+  //    ```
+  //
+  } else if (!is_open()) {
+
+#if SIMDJSON_DEVELOPMENT_CHECKS
+    // If we're past the end of the object, we're being iterated out of order.
+    // Note: this isn't perfect detection. It's possible the user is inside some other object; if so,
+    // this object iterator will blithely scan that object for fields.
+    if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; }
+#endif
+    SIMDJSON_TRY(reset_object().get(has_value));
+    at_first = true;
+  // 3. When a previous search found a field or an iterator yielded a value:
+  //
+  //    ```
+  //    // When a field was not fully consumed (or not even touched at all)
+  //    { "a": [ 1, 2 ], "b": [ 3, 4 ] }
+  //           ^ (depth 2)
+  //    // When a field was fully consumed
+  //    { "a": [ 1, 2 ], "b": [ 3, 4 ] }
+  //                   ^ (depth 1)
+  //    // When the last field was fully consumed
+  //    { "a": [ 1, 2 ], "b": [ 3, 4 ] }
+  //                                   ^ (depth 1)
+  //    ```
+  //
+  } else {
+    // If someone queried a key but they not did access the value, then we are left pointing
+    // at the ':' and we need to move forward through the value... If the value was
+    // processed then skip_child() does not move the iterator (but may adjust the depth).
+    if ((error = skip_child() )) { abandon(); return error; }
+    search_start = _json_iter->position();
+    if ((error = has_next_field().get(has_value) )) { abandon(); return error; }
+#if SIMDJSON_DEVELOPMENT_CHECKS
+    if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; }
+#endif
+  }
+
+  // After initial processing, we will be in one of two states:
+  //
+  // ```
+  // // At the beginning of a field
+  // { "a": [ 1, 2 ], "b": [ 3, 4 ] }
+  //   ^ (depth 1)
+  // { "a": [ 1, 2 ], "b": [ 3, 4 ] }
+  //                  ^ (depth 1)
+  // // At the end of the object
+  // { "a": [ 1, 2 ], "b": [ 3, 4 ] }
+  //                                  ^ (depth 0)
+  // ```
+  //
+  // Next, we find a match starting from the current position.
+  while (has_value) {
+    SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field
+
+    // Get the key and colon, stopping at the value.
+    raw_json_string actual_key;
+    // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes
+    // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2.
+    // field_key() advances the pointer and checks that '"' is found (corresponding to a key).
+    // The depth is left unchanged by field_key().
+    if ((error = field_key().get(actual_key) )) { abandon(); return error; };
+    // field_value() will advance and check that we find a ':' separating the
+    // key and the value. It will also increment the depth by one.
+    if ((error = field_value() )) { abandon(); return error; }
+
+    // If it matches, stop and return
+    // We could do it this way if we wanted to allow arbitrary
+    // key content (including escaped quotes).
+    // if (actual_key.unsafe_is_equal(max_key_length, key)) {
+    // Instead we do the following which may trigger buffer overruns if the
+    // user provides an adversarial key (containing a well placed unescaped quote
+    // character and being longer than the number of bytes remaining in the JSON
+    // input).
+    if (actual_key.unsafe_is_equal(key)) {
+      logger::log_event(*this, "match", key, -2);
+      // If we return here, then we return while pointing at the ':' that we just checked.
+      return true;
+    }
+
+    // No match: skip the value and see if , or } is next
+    logger::log_event(*this, "no match", key, -2);
+    // The call to skip_child is meant to skip over the value corresponding to the key.
+    // After skip_child(), we are right before the next comma (',') or the final brace ('}').
+    SIMDJSON_TRY( skip_child() );
+    // The has_next_field() advances the pointer and check that either ',' or '}' is found.
+    // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found,
+    // then we are in error and we abort.
+    if ((error = has_next_field().get(has_value) )) { abandon(); return error; }
+  }
+  // Performance note: it maybe wasteful to rewind to the beginning when there might be
+  // no other query following. Indeed, it would require reskipping the whole object.
+  // Instead, you can just stay where you are. If there is a new query, there is always time
+  // to rewind.
+  if(at_first) { return false; }
+
+  // If we reach the end without finding a match, search the rest of the fields starting at the
+  // beginning of the object.
+  // (We have already run through the object before, so we've already validated its structure. We
+  // don't check errors in this bit.)
+  SIMDJSON_TRY(reset_object().get(has_value));
+  while (true) {
+    SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object
+    SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field
+
+    // Get the key and colon, stopping at the value.
+    raw_json_string actual_key;
+    // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes
+    // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2.
+    // field_key() advances the pointer and checks that '"' is found (corresponding to a key).
+    // The depth is left unchanged by field_key().
+    error = field_key().get(actual_key); SIMDJSON_ASSUME(!error);
+    // field_value() will advance and check that we find a ':' separating the
+    // key and the value.  It will also increment the depth by one.
+    error = field_value(); SIMDJSON_ASSUME(!error);
+
+    // If it matches, stop and return
+    // We could do it this way if we wanted to allow arbitrary
+    // key content (including escaped quotes).
+    // if (actual_key.unsafe_is_equal(max_key_length, key)) {
+    // Instead we do the following which may trigger buffer overruns if the
+    // user provides an adversarial key (containing a well placed unescaped quote
+    // character and being longer than the number of bytes remaining in the JSON
+    // input).
+    if (actual_key.unsafe_is_equal(key)) {
+      logger::log_event(*this, "match", key, -2);
+      // If we return here, then we return while pointing at the ':' that we just checked.
+      return true;
+    }
+
+    // No match: skip the value and see if , or } is next
+    logger::log_event(*this, "no match", key, -2);
+    // The call to skip_child is meant to skip over the value corresponding to the key.
+    // After skip_child(), we are right before the next comma (',') or the final brace ('}').
+    SIMDJSON_TRY( skip_child() );
+    // If we reached the end of the key-value pair we started from, then we know
+    // that the key is not there so we return false. We are either right before
+    // the next comma or the final brace.
+    if(_json_iter->position() == search_start) { return false; }
+    // The has_next_field() advances the pointer and check that either ',' or '}' is found.
+    // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found,
+    // then we are in error and we abort.
+    error = has_next_field().get(has_value); SIMDJSON_ASSUME(!error);
+    // If we make the mistake of exiting here, then we could be left pointing at a key
+    // in the middle of an object. That's not an allowable state.
+  }
+  // If the loop ended, we're out of fields to look at. The program should
+  // never reach this point.
+  return false;
+}
+SIMDJSON_POP_DISABLE_WARNINGS
+
+simdjson_warn_unused simdjson_inline simdjson_result<raw_json_string> value_iterator::field_key() noexcept {
+  assert_at_next();
+
+  const uint8_t *key = _json_iter->return_current_and_advance();
+  if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); }
+  return raw_json_string(key);
+}
+
+simdjson_warn_unused simdjson_inline error_code value_iterator::field_value() noexcept {
+  assert_at_next();
+
+  if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); }
+  _json_iter->descend_to(depth()+1);
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::start_array() noexcept {
+  SIMDJSON_TRY( start_container('[', "Not an array", "array") );
+  return started_array();
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::start_root_array() noexcept {
+  SIMDJSON_TRY( start_container('[', "Not an array", "array") );
+  return started_root_array();
+}
+
+inline std::string value_iterator::to_string() const noexcept {
+  auto answer = std::string("value_iterator [ depth : ") + std::to_string(_depth) + std::string(", ");
+  if(_json_iter != nullptr) { answer +=  _json_iter->to_string(); }
+  answer += std::string(" ]");
+  return answer;
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::started_array() noexcept {
+  assert_at_container_start();
+  if (*_json_iter->peek() == ']') {
+    logger::log_value(*_json_iter, "empty array");
+    _json_iter->return_current_and_advance();
+    SIMDJSON_TRY( end_container() );
+    return false;
+  }
+  _json_iter->descend_to(depth()+1);
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  _json_iter->set_start_position(_depth, start_position());
+#endif
+  return true;
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::started_root_array() noexcept {
+  // When in streaming mode, we cannot expect peek_last() to be the last structural element of the
+  // current document. It only works in the normal mode where we have indexed a single document.
+  // Note that adding a check for 'streaming' is not expensive since we only have at most
+  // one root element.
+  if ( ! _json_iter->streaming() ) {
+    if (*_json_iter->peek_last() != ']') {
+      _json_iter->abandon();
+      return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end");
+    }
+    // If the last character is ] *and* the first gibberish character is also ']'
+    // then on-demand could accidentally go over. So we need additional checks.
+    // https://github.com/simdjson/simdjson/issues/1834
+    // Checking that the document is balanced requires a full scan which is potentially
+    // expensive, but it only happens in edge cases where the first padding character is
+    // a closing bracket.
+    if ((*_json_iter->peek(_json_iter->end_position()) == ']') && (!_json_iter->balanced())) {
+      _json_iter->abandon();
+      // The exact error would require more work. It will typically be an unclosed array.
+      return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced");
+    }
+  }
+  return started_array();
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::has_next_element() noexcept {
+  assert_at_next();
+
+  logger::log_event(*this, "has_next_element");
+  switch (*_json_iter->return_current_and_advance()) {
+    case ']':
+      logger::log_end_value(*_json_iter, "array");
+      SIMDJSON_TRY( end_container() );
+      return false;
+    case ',':
+      _json_iter->descend_to(depth()+1);
+      return true;
+    default:
+      return report_error(TAPE_ERROR, "Missing comma between array elements");
+  }
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::parse_bool(const uint8_t *json) const noexcept {
+  auto not_true = atomparsing::str4ncmp(json, "true");
+  auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e');
+  bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]);
+  if (error) { return incorrect_type_error("Not a boolean"); }
+  return simdjson_result<bool>(!not_true);
+}
+simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::parse_null(const uint8_t *json) const noexcept {
+  bool is_null_string = !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]);
+  // if we start with 'n', we must be a null
+  if(!is_null_string && json[0]=='n') { return incorrect_type_error("Not a null but starts with n"); }
+  return is_null_string;
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<std::string_view> value_iterator::get_string() noexcept {
+  return get_raw_json_string().unescape(json_iter());
+}
+simdjson_warn_unused simdjson_inline simdjson_result<raw_json_string> value_iterator::get_raw_json_string() noexcept {
+  auto json = peek_scalar("string");
+  if (*json != '"') { return incorrect_type_error("Not a string"); }
+  advance_scalar("string");
+  return raw_json_string(json+1);
+}
+simdjson_warn_unused simdjson_inline simdjson_result<uint64_t> value_iterator::get_uint64() noexcept {
+  auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64"));
+  if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); }
+  return result;
+}
+simdjson_warn_unused simdjson_inline simdjson_result<uint64_t> value_iterator::get_uint64_in_string() noexcept {
+  auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64"));
+  if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); }
+  return result;
+}
+simdjson_warn_unused simdjson_inline simdjson_result<int64_t> value_iterator::get_int64() noexcept {
+  auto result = numberparsing::parse_integer(peek_non_root_scalar("int64"));
+  if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); }
+  return result;
+}
+simdjson_warn_unused simdjson_inline simdjson_result<int64_t> value_iterator::get_int64_in_string() noexcept {
+  auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64"));
+  if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); }
+  return result;
+}
+simdjson_warn_unused simdjson_inline simdjson_result<double> value_iterator::get_double() noexcept {
+  auto result = numberparsing::parse_double(peek_non_root_scalar("double"));
+  if(result.error() == SUCCESS) { advance_non_root_scalar("double"); }
+  return result;
+}
+simdjson_warn_unused simdjson_inline simdjson_result<double> value_iterator::get_double_in_string() noexcept {
+  auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double"));
+  if(result.error() == SUCCESS) { advance_non_root_scalar("double"); }
+  return result;
+}
+simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::get_bool() noexcept {
+  auto result = parse_bool(peek_non_root_scalar("bool"));
+  if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); }
+  return result;
+}
+simdjson_inline simdjson_result<bool> value_iterator::is_null() noexcept {
+  bool is_null_value;
+  SIMDJSON_TRY(parse_null(peek_non_root_scalar("null")).get(is_null_value));
+  if(is_null_value) { advance_non_root_scalar("null"); }
+  return is_null_value;
+}
+simdjson_inline bool value_iterator::is_negative() noexcept {
+  return numberparsing::is_negative(peek_non_root_scalar("numbersign"));
+}
+simdjson_inline bool value_iterator::is_root_negative() noexcept {
+  return numberparsing::is_negative(peek_root_scalar("numbersign"));
+}
+simdjson_inline simdjson_result<bool> value_iterator::is_integer() noexcept {
+  return numberparsing::is_integer(peek_non_root_scalar("integer"));
+}
+simdjson_inline simdjson_result<number_type> value_iterator::get_number_type() noexcept {
+  return numberparsing::get_number_type(peek_non_root_scalar("integer"));
+}
+simdjson_inline simdjson_result<number> value_iterator::get_number() noexcept {
+  number num;
+  error_code error =  numberparsing::parse_number(peek_non_root_scalar("number"), num);
+  if(error) { return error; }
+  return num;
+}
+
+simdjson_inline simdjson_result<bool> value_iterator::is_root_integer() noexcept {
+  auto max_len = peek_start_length();
+  auto json = peek_root_scalar("is_root_integer");
+  uint8_t tmpbuf[20+1]; // <20 digits> is the longest possible unsigned integer
+  if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) {
+    return false; // if there are more than 20 characters, it cannot be represented as an integer.
+  }
+  auto answer = numberparsing::is_integer(tmpbuf);
+  // If the parsing was a success, we must still check that it is
+  // a single scalar. Note that we parse first because of cases like '[]' where
+  // getting TRAILING_CONTENT is wrong.
+  if((answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; }
+  return answer;
+}
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::number_type> value_iterator::get_root_number_type() noexcept {
+  if (!_json_iter->is_single_token()) { return TRAILING_CONTENT; }
+  auto max_len = peek_start_length();
+  auto json = peek_root_scalar("number");
+  // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/,
+  // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest
+  // number: -0.<fraction>e-308.
+  uint8_t tmpbuf[1074+8+1];
+  if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) {
+    logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters");
+    return NUMBER_ERROR;
+  }
+  // If the parsing was a success, we must still check that it is
+  // a single scalar. Note that we parse first because of cases like '[]' where
+  // getting TRAILING_CONTENT is wrong.
+  auto answer = numberparsing::get_number_type(tmpbuf);
+  if((answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; }
+  return answer;
+}
+simdjson_inline simdjson_result<number> value_iterator::get_root_number() noexcept {
+  auto max_len = peek_start_length();
+  auto json = peek_root_scalar("number");
+  // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/,
+  // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest
+  // number: -0.<fraction>e-308.
+  uint8_t tmpbuf[1074+8+1];
+  if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) {
+    logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters");
+    return NUMBER_ERROR;
+  }
+  number num;
+  error_code error =  numberparsing::parse_number(tmpbuf, num);
+  if(error) { return error; }
+  if (!_json_iter->is_single_token()) { return TRAILING_CONTENT; }
+  advance_root_scalar("number");
+  return num;
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<std::string_view> value_iterator::get_root_string() noexcept {
+  return get_string();
+}
+simdjson_warn_unused simdjson_inline simdjson_result<raw_json_string> value_iterator::get_root_raw_json_string() noexcept {
+  return get_raw_json_string();
+}
+simdjson_warn_unused simdjson_inline simdjson_result<uint64_t> value_iterator::get_root_uint64() noexcept {
+  auto max_len = peek_start_length();
+  auto json = peek_root_scalar("uint64");
+  uint8_t tmpbuf[20+1]; // <20 digits> is the longest possible unsigned integer
+  if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) {
+    logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters");
+    return NUMBER_ERROR;
+  }
+  auto result = numberparsing::parse_unsigned(tmpbuf);
+  if(result.error() == SUCCESS) {
+    if (!_json_iter->is_single_token()) { return TRAILING_CONTENT; }
+    advance_root_scalar("uint64");
+  }
+  return result;
+}
+simdjson_warn_unused simdjson_inline simdjson_result<uint64_t> value_iterator::get_root_uint64_in_string() noexcept {
+  auto max_len = peek_start_length();
+  auto json = peek_root_scalar("uint64");
+  uint8_t tmpbuf[20+1]; // <20 digits> is the longest possible unsigned integer
+  if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) {
+    logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters");
+    return NUMBER_ERROR;
+  }
+  auto result = numberparsing::parse_unsigned_in_string(tmpbuf);
+  if(result.error() == SUCCESS) {
+    if (!_json_iter->is_single_token()) { return TRAILING_CONTENT; }
+    advance_root_scalar("uint64");
+  }
+  return result;
+}
+simdjson_warn_unused simdjson_inline simdjson_result<int64_t> value_iterator::get_root_int64() noexcept {
+  auto max_len = peek_start_length();
+  auto json = peek_root_scalar("int64");
+  uint8_t tmpbuf[20+1]; // -<19 digits> is the longest possible integer
+  if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) {
+    logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters");
+    return NUMBER_ERROR;
+  }
+
+  auto result = numberparsing::parse_integer(tmpbuf);
+  if(result.error() == SUCCESS) {
+    if (!_json_iter->is_single_token()) { return TRAILING_CONTENT; }
+    advance_root_scalar("int64");
+  }
+  return result;
+}
+simdjson_warn_unused simdjson_inline simdjson_result<int64_t> value_iterator::get_root_int64_in_string() noexcept {
+  auto max_len = peek_start_length();
+  auto json = peek_root_scalar("int64");
+  uint8_t tmpbuf[20+1]; // -<19 digits> is the longest possible integer
+  if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) {
+    logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters");
+    return NUMBER_ERROR;
+  }
+
+  auto result = numberparsing::parse_integer_in_string(tmpbuf);
+  if(result.error() == SUCCESS) {
+    if (!_json_iter->is_single_token()) { return TRAILING_CONTENT; }
+    advance_root_scalar("int64");
+  }
+  return result;
+}
+simdjson_warn_unused simdjson_inline simdjson_result<double> value_iterator::get_root_double() noexcept {
+  auto max_len = peek_start_length();
+  auto json = peek_root_scalar("double");
+  // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/,
+  // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest
+  // number: -0.<fraction>e-308.
+  uint8_t tmpbuf[1074+8+1];
+  if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) {
+    logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters");
+    return NUMBER_ERROR;
+  }
+  auto result = numberparsing::parse_double(tmpbuf);
+  if(result.error() == SUCCESS) {
+    if (!_json_iter->is_single_token()) { return TRAILING_CONTENT; }
+    advance_root_scalar("double");
+  }
+  return result;
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<double> value_iterator::get_root_double_in_string() noexcept {
+  auto max_len = peek_start_length();
+  auto json = peek_root_scalar("double");
+  // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/,
+  // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest
+  // number: -0.<fraction>e-308.
+  uint8_t tmpbuf[1074+8+1];
+  if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) {
+    logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters");
+    return NUMBER_ERROR;
+  }
+  auto result = numberparsing::parse_double_in_string(tmpbuf);
+  if(result.error() == SUCCESS) {
+    if (!_json_iter->is_single_token()) { return TRAILING_CONTENT; }
+    advance_root_scalar("double");
+  }
+  return result;
+}
+simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::get_root_bool() noexcept {
+  auto max_len = peek_start_length();
+  auto json = peek_root_scalar("bool");
+  uint8_t tmpbuf[5+1];
+  if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { return incorrect_type_error("Not a boolean"); }
+  auto result = parse_bool(tmpbuf);
+  if(result.error() == SUCCESS) {
+    if (!_json_iter->is_single_token()) { return TRAILING_CONTENT; }
+    advance_root_scalar("bool");
+  }
+  return result;
+}
+simdjson_inline bool value_iterator::is_root_null() noexcept {
+  // If there is trailing content, then the document is not null.
+  if (!_json_iter->is_single_token()) { return false; }
+  auto max_len = peek_start_length();
+  auto json = peek_root_scalar("null");
+  bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") &&
+         (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[5])));
+  if(result) { advance_root_scalar("null"); }
+  return result;
+}
+
+simdjson_warn_unused simdjson_inline error_code value_iterator::skip_child() noexcept {
+  SIMDJSON_ASSUME( _json_iter->token._position > _start_position );
+  SIMDJSON_ASSUME( _json_iter->_depth >= _depth );
+
+  return _json_iter->skip_child(depth());
+}
+
+simdjson_inline value_iterator value_iterator::child() const noexcept {
+  assert_at_child();
+  return { _json_iter, depth()+1, _json_iter->token.position() };
+}
+
+// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller
+// relating depth and iterator depth, which is a desired effect. It does not happen if is_open is
+// marked non-inline.
+SIMDJSON_PUSH_DISABLE_WARNINGS
+SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING
+simdjson_inline bool value_iterator::is_open() const noexcept {
+  return _json_iter->depth() >= depth();
+}
+SIMDJSON_POP_DISABLE_WARNINGS
+
+simdjson_inline bool value_iterator::at_end() const noexcept {
+  return _json_iter->at_end();
+}
+
+simdjson_inline bool value_iterator::at_start() const noexcept {
+  return _json_iter->token.position() == start_position();
+}
+
+simdjson_inline bool value_iterator::at_first_field() const noexcept {
+  SIMDJSON_ASSUME( _json_iter->token._position > _start_position );
+  return _json_iter->token.position() == start_position() + 1;
+}
+
+simdjson_inline void value_iterator::abandon() noexcept {
+  _json_iter->abandon();
+}
+
+simdjson_warn_unused simdjson_inline depth_t value_iterator::depth() const noexcept {
+  return _depth;
+}
+simdjson_warn_unused simdjson_inline error_code value_iterator::error() const noexcept {
+  return _json_iter->error;
+}
+simdjson_warn_unused simdjson_inline uint8_t *&value_iterator::string_buf_loc() noexcept {
+  return _json_iter->string_buf_loc();
+}
+simdjson_warn_unused simdjson_inline const json_iterator &value_iterator::json_iter() const noexcept {
+  return *_json_iter;
+}
+simdjson_warn_unused simdjson_inline json_iterator &value_iterator::json_iter() noexcept {
+  return *_json_iter;
+}
+
+simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept {
+  return _json_iter->peek(start_position());
+}
+simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept {
+  return _json_iter->peek_length(start_position());
+}
+
+simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept {
+  logger::log_value(*_json_iter, start_position(), depth(), type);
+  // If we're not at the position anymore, we don't want to advance the cursor.
+  if (!is_at_start()) { return peek_start(); }
+
+  // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value.
+  assert_at_start();
+  return _json_iter->peek();
+}
+
+simdjson_inline void value_iterator::advance_scalar(const char *type) noexcept {
+  logger::log_value(*_json_iter, start_position(), depth(), type);
+  // If we're not at the position anymore, we don't want to advance the cursor.
+  if (!is_at_start()) { return; }
+
+  // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value.
+  assert_at_start();
+  _json_iter->return_current_and_advance();
+  _json_iter->ascend_to(depth()-1);
+}
+
+simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept {
+  logger::log_start_value(*_json_iter, start_position(), depth(), type);
+  // If we're not at the position anymore, we don't want to advance the cursor.
+  const uint8_t *json;
+  if (!is_at_start()) {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+    if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; }
+#endif
+    json = peek_start();
+    if (*json != start_char) { return incorrect_type_error(incorrect_type_message); }
+  } else {
+    assert_at_start();
+    /**
+     * We should be prudent. Let us peek. If it is not the right type, we
+     * return an error. Only once we have determined that we have the right
+     * type are we allowed to advance!
+     */
+    json = _json_iter->peek();
+    if (*json != start_char) { return incorrect_type_error(incorrect_type_message); }
+    _json_iter->return_current_and_advance();
+  }
+
+
+  return SUCCESS;
+}
+
+
+simdjson_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept {
+  logger::log_value(*_json_iter, start_position(), depth(), type);
+  if (!is_at_start()) { return peek_start(); }
+
+  assert_at_root();
+  return _json_iter->peek();
+}
+simdjson_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept {
+  logger::log_value(*_json_iter, start_position(), depth(), type);
+  if (!is_at_start()) { return peek_start(); }
+
+  assert_at_non_root_start();
+  return _json_iter->peek();
+}
+
+simdjson_inline void value_iterator::advance_root_scalar(const char *type) noexcept {
+  logger::log_value(*_json_iter, start_position(), depth(), type);
+  if (!is_at_start()) { return; }
+
+  assert_at_root();
+  _json_iter->return_current_and_advance();
+  _json_iter->ascend_to(depth()-1);
+}
+simdjson_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept {
+  logger::log_value(*_json_iter, start_position(), depth(), type);
+  if (!is_at_start()) { return; }
+
+  assert_at_non_root_start();
+  _json_iter->return_current_and_advance();
+  _json_iter->ascend_to(depth()-1);
+}
+
+simdjson_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept {
+  logger::log_error(*_json_iter, start_position(), depth(), message);
+  return INCORRECT_TYPE;
+}
+
+simdjson_inline bool value_iterator::is_at_start() const noexcept {
+  return position() == start_position();
+}
+
+simdjson_inline bool value_iterator::is_at_key() const noexcept {
+  // Keys are at the same depth as the object.
+  // Note here that we could be safer and check that we are within an object,
+  // but we do not.
+  return _depth == _json_iter->_depth && *_json_iter->peek() == '"';
+}
+
+simdjson_inline bool value_iterator::is_at_iterator_start() const noexcept {
+  // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]).
+  auto delta = position() - start_position();
+  return delta == 1 || delta == 2;
+}
+
+inline void value_iterator::assert_at_start() const noexcept {
+  SIMDJSON_ASSUME( _json_iter->token._position == _start_position );
+  SIMDJSON_ASSUME( _json_iter->_depth == _depth );
+  SIMDJSON_ASSUME( _depth > 0 );
+}
+
+inline void value_iterator::assert_at_container_start() const noexcept {
+  SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 );
+  SIMDJSON_ASSUME( _json_iter->_depth == _depth );
+  SIMDJSON_ASSUME( _depth > 0 );
+}
+
+inline void value_iterator::assert_at_next() const noexcept {
+  SIMDJSON_ASSUME( _json_iter->token._position > _start_position );
+  SIMDJSON_ASSUME( _json_iter->_depth == _depth );
+  SIMDJSON_ASSUME( _depth > 0 );
+}
+
+simdjson_inline void value_iterator::move_at_start() noexcept {
+  _json_iter->_depth = _depth;
+  _json_iter->token.set_position(_start_position);
+}
+
+simdjson_inline void value_iterator::move_at_container_start() noexcept {
+  _json_iter->_depth = _depth;
+  _json_iter->token.set_position(_start_position + 1);
+}
+
+simdjson_inline simdjson_result<bool> value_iterator::reset_array() noexcept {
+  move_at_container_start();
+  return started_array();
+}
+
+simdjson_inline simdjson_result<bool> value_iterator::reset_object() noexcept {
+  move_at_container_start();
+  return started_object();
+}
+
+inline void value_iterator::assert_at_child() const noexcept {
+  SIMDJSON_ASSUME( _json_iter->token._position > _start_position );
+  SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 );
+  SIMDJSON_ASSUME( _depth > 0 );
+}
+
+inline void value_iterator::assert_at_root() const noexcept {
+  assert_at_start();
+  SIMDJSON_ASSUME( _depth == 1 );
+}
+
+inline void value_iterator::assert_at_non_root_start() const noexcept {
+  assert_at_start();
+  SIMDJSON_ASSUME( _depth > 1 );
+}
+
+inline void value_iterator::assert_is_valid() const noexcept {
+  SIMDJSON_ASSUME( _json_iter != nullptr );
+}
+
+simdjson_inline bool value_iterator::is_valid() const noexcept {
+  return _json_iter != nullptr;
+}
+
+simdjson_inline simdjson_result<json_type> value_iterator::type() const noexcept {
+  switch (*peek_start()) {
+    case '{':
+      return json_type::object;
+    case '[':
+      return json_type::array;
+    case '"':
+      return json_type::string;
+    case 'n':
+      return json_type::null;
+    case 't': case 'f':
+      return json_type::boolean;
+    case '-':
+    case '0': case '1': case '2': case '3': case '4':
+    case '5': case '6': case '7': case '8': case '9':
+      return json_type::number;
+    default:
+      return TAPE_ERROR;
+  }
+}
+
+simdjson_inline token_position value_iterator::start_position() const noexcept {
+  return _start_position;
+}
+
+simdjson_inline token_position value_iterator::position() const noexcept {
+  return _json_iter->position();
+}
+
+simdjson_inline token_position value_iterator::end_position() const noexcept {
+  return _json_iter->end_position();
+}
+
+simdjson_inline token_position value_iterator::last_position() const noexcept {
+  return _json_iter->last_position();
+}
+
+simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept {
+  return _json_iter->report_error(error, message);
+}
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value_iterator>::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value_iterator &&value) noexcept
+    : implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value_iterator>(std::forward<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value_iterator>(value)) {}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value_iterator>::simdjson_result(error_code error) noexcept
+    : implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value_iterator>(error) {}
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/value_iterator-inl.h */
+/* begin file include/simdjson/generic/ondemand/array_iterator-inl.h */
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+simdjson_inline array_iterator::array_iterator(const value_iterator &_iter) noexcept
+  : iter{_iter}
+{}
+
+simdjson_inline simdjson_result<value> array_iterator::operator*() noexcept {
+  if (iter.error()) { iter.abandon(); return iter.error(); }
+  return value(iter.child());
+}
+simdjson_inline bool array_iterator::operator==(const array_iterator &other) const noexcept {
+  return !(*this != other);
+}
+simdjson_inline bool array_iterator::operator!=(const array_iterator &) const noexcept {
+  return iter.is_open();
+}
+simdjson_inline array_iterator &array_iterator::operator++() noexcept {
+  error_code error;
+  // PERF NOTE this is a safety rail ... users should exit loops as soon as they receive an error, so we'll never get here.
+  // However, it does not seem to make a perf difference, so we add it out of an abundance of caution.
+  if (( error = iter.error() )) { return *this; }
+  if (( error = iter.skip_child() )) { return *this; }
+  if (( error = iter.has_next_element().error() )) { return *this; }
+  return *this;
+}
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator>::simdjson_result(
+  SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator &&value
+) noexcept
+  : SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator>(std::forward<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator>(value))
+{
+  first.iter.assert_is_valid();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator>::simdjson_result(error_code error) noexcept
+  : SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator>({}, error)
+{
+}
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator>::operator*() noexcept {
+  if (error()) { return error(); }
+  return *first;
+}
+simdjson_inline bool simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator>::operator==(const simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> &other) const noexcept {
+  if (!first.iter.is_valid()) { return !error(); }
+  return first == other.first;
+}
+simdjson_inline bool simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator>::operator!=(const simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> &other) const noexcept {
+  if (!first.iter.is_valid()) { return error(); }
+  return first != other.first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> &simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator>::operator++() noexcept {
+  // Clear the error if there is one, so we don't yield it twice
+  if (error()) { second = SUCCESS; return *this; }
+  ++(first);
+  return *this;
+}
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/array_iterator-inl.h */
+/* begin file include/simdjson/generic/ondemand/object_iterator-inl.h */
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+//
+// object_iterator
+//
+
+simdjson_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept
+  : iter{_iter}
+{}
+
+simdjson_inline simdjson_result<field> object_iterator::operator*() noexcept {
+  error_code error = iter.error();
+  if (error) { iter.abandon(); return error; }
+  auto result = field::start(iter);
+  // TODO this is a safety rail ... users should exit loops as soon as they receive an error.
+  // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free.
+  if (result.error()) { iter.abandon(); }
+  return result;
+}
+simdjson_inline bool object_iterator::operator==(const object_iterator &other) const noexcept {
+  return !(*this != other);
+}
+simdjson_inline bool object_iterator::operator!=(const object_iterator &) const noexcept {
+  return iter.is_open();
+}
+
+SIMDJSON_PUSH_DISABLE_WARNINGS
+SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING
+simdjson_inline object_iterator &object_iterator::operator++() noexcept {
+  // TODO this is a safety rail ... users should exit loops as soon as they receive an error.
+  // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free.
+  if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error
+
+  simdjson_unused error_code error;
+  if ((error = iter.skip_child() )) { return *this; }
+
+  simdjson_unused bool has_value;
+  if ((error = iter.has_next_field().get(has_value) )) { return *this; };
+  return *this;
+}
+SIMDJSON_POP_DISABLE_WARNINGS
+
+//
+// ### Live States
+//
+// While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is
+// always SUCCESS:
+//
+// - Start: This is the state when the object is first found and the iterator is just past the {.
+//   In this state, at_start == true.
+// - Next: After we hand a scalar value to the user, or an array/object which they then fully
+//   iterate over, the iterator is at the , or } before the next value. In this state,
+//   depth == iter.depth, at_start == false, and error == SUCCESS.
+// - Unfinished Business: When we hand an array/object to the user which they do not fully
+//   iterate over, we need to finish that iteration by skipping child values until we reach the
+//   Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS.
+//
+// ## Error States
+//
+// In error states, we will yield exactly one more value before stopping. iter.depth == depth
+// and at_start is always false. We decrement after yielding the error, moving to the Finished
+// state.
+//
+// - Chained Error: When the object iterator is part of an error chain--for example, in
+//   `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an
+//   object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and
+//   iter.depth == depth, and at_start == false. We decrement depth when we yield the error.
+// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields,
+//   we flag that as an error and treat it exactly the same as a Chained Error. In this state,
+//   error == TAPE_ERROR, iter.depth == depth, and at_start == false.
+//
+// Errors that occur while reading a field to give to the user (such as when the key is not a
+// string or the field is missing a colon) are yielded immediately. Depth is then decremented,
+// moving to the Finished state without transitioning through an Error state at all.
+//
+// ## Terminal State
+//
+// The terminal state has iter.depth < depth. at_start is always false.
+//
+// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth.
+//   In this state, iter.depth < depth, at_start == false, and error == SUCCESS.
+//
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator>::simdjson_result(
+  SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator &&value
+) noexcept
+  : implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator>(std::forward<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator>(value))
+{
+  first.iter.assert_is_valid();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator>::simdjson_result(error_code error) noexcept
+  : implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator>({}, error)
+{
+}
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::field> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator>::operator*() noexcept {
+  if (error()) { return error(); }
+  return *first;
+}
+// If we're iterating and there is an error, return the error once.
+simdjson_inline bool simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator>::operator==(const simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator> &other) const noexcept {
+  if (!first.iter.is_valid()) { return !error(); }
+  return first == other.first;
+}
+// If we're iterating and there is an error, return the error once.
+simdjson_inline bool simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator>::operator!=(const simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator> &other) const noexcept {
+  if (!first.iter.is_valid()) { return error(); }
+  return first != other.first;
+}
+// Checks for ']' and ','
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator> &simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator>::operator++() noexcept {
+  // Clear the error if there is one, so we don't yield it twice
+  if (error()) { second = SUCCESS; return *this; }
+  ++first;
+  return *this;
+}
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/object_iterator-inl.h */
+/* begin file include/simdjson/generic/ondemand/array-inl.h */
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+//
+// ### Live States
+//
+// While iterating or looking up values, depth >= iter->depth. at_start may vary. Error is
+// always SUCCESS:
+//
+// - Start: This is the state when the array is first found and the iterator is just past the `{`.
+//   In this state, at_start == true.
+// - Next: After we hand a scalar value to the user, or an array/object which they then fully
+//   iterate over, the iterator is at the `,` before the next value (or `]`). In this state,
+//   depth == iter->depth, at_start == false, and error == SUCCESS.
+// - Unfinished Business: When we hand an array/object to the user which they do not fully
+//   iterate over, we need to finish that iteration by skipping child values until we reach the
+//   Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS.
+//
+// ## Error States
+//
+// In error states, we will yield exactly one more value before stopping. iter->depth == depth
+// and at_start is always false. We decrement after yielding the error, moving to the Finished
+// state.
+//
+// - Chained Error: When the array iterator is part of an error chain--for example, in
+//   `for (auto tweet : doc["tweets"])`, where the tweet element may be missing or not be an
+//   array--we yield that error in the loop, exactly once. In this state, error != SUCCESS and
+//   iter->depth == depth, and at_start == false. We decrement depth when we yield the error.
+// - Missing Comma Error: When the iterator ++ method discovers there is no comma between elements,
+//   we flag that as an error and treat it exactly the same as a Chained Error. In this state,
+//   error == TAPE_ERROR, iter->depth == depth, and at_start == false.
+//
+// ## Terminal State
+//
+// The terminal state has iter->depth < depth. at_start is always false.
+//
+// - Finished: When we have reached a `]` or have reported an error, we are finished. We signal this
+//   by decrementing depth. In this state, iter->depth < depth, at_start == false, and
+//   error == SUCCESS.
+//
+
+simdjson_inline array::array(const value_iterator &_iter) noexcept
+  : iter{_iter}
+{
+}
+
+simdjson_inline simdjson_result<array> array::start(value_iterator &iter) noexcept {
+  // We don't need to know if the array is empty to start iteration, but we do want to know if there
+  // is an error--thus `simdjson_unused`.
+  simdjson_unused bool has_value;
+  SIMDJSON_TRY( iter.start_array().get(has_value) );
+  return array(iter);
+}
+simdjson_inline simdjson_result<array> array::start_root(value_iterator &iter) noexcept {
+  simdjson_unused bool has_value;
+  SIMDJSON_TRY( iter.start_root_array().get(has_value) );
+  return array(iter);
+}
+simdjson_inline simdjson_result<array> array::started(value_iterator &iter) noexcept {
+  bool has_value;
+  SIMDJSON_TRY(iter.started_array().get(has_value));
+  return array(iter);
+}
+
+simdjson_inline simdjson_result<array_iterator> array::begin() noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; }
+#endif
+  return array_iterator(iter);
+}
+simdjson_inline simdjson_result<array_iterator> array::end() noexcept {
+  return array_iterator(iter);
+}
+simdjson_inline error_code array::consume() noexcept {
+  auto error = iter.json_iter().skip_child(iter.depth()-1);
+  if(error) { iter.abandon(); }
+  return error;
+}
+
+simdjson_inline simdjson_result<std::string_view> array::raw_json() noexcept {
+  const uint8_t * starting_point{iter.peek_start()};
+  auto error = consume();
+  if(error) { return error; }
+  // After 'consume()', we could be left pointing just beyond the document, but that
+  // is ok because we are not going to dereference the final pointer position, we just
+  // use it to compute the length in bytes.
+  const uint8_t * final_point{iter._json_iter->unsafe_pointer()};
+  return std::string_view(reinterpret_cast<const char*>(starting_point), size_t(final_point - starting_point));
+}
+
+SIMDJSON_PUSH_DISABLE_WARNINGS
+SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING
+simdjson_inline simdjson_result<size_t> array::count_elements() & noexcept {
+  size_t count{0};
+  // Important: we do not consume any of the values.
+  for(simdjson_unused auto v : *this) { count++; }
+  // The above loop will always succeed, but we want to report errors.
+  if(iter.error()) { return iter.error(); }
+  // We need to move back at the start because we expect users to iterate through
+  // the array after counting the number of elements.
+  iter.reset_array();
+  return count;
+}
+SIMDJSON_POP_DISABLE_WARNINGS
+
+simdjson_inline simdjson_result<bool> array::is_empty() & noexcept {
+  bool is_not_empty;
+  auto error = iter.reset_array().get(is_not_empty);
+  if(error) { return error; }
+  return !is_not_empty;
+}
+
+inline simdjson_result<bool> array::reset() & noexcept {
+  return iter.reset_array();
+}
+
+inline simdjson_result<value> array::at_pointer(std::string_view json_pointer) noexcept {
+  if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; }
+  json_pointer = json_pointer.substr(1);
+  // - means "the append position" or "the element after the end of the array"
+  // We don't support this, because we're returning a real element, not a position.
+  if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; }
+
+  // Read the array index
+  size_t array_index = 0;
+  size_t i;
+  for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) {
+    uint8_t digit = uint8_t(json_pointer[i] - '0');
+    // Check for non-digit in array index. If it's there, we're trying to get a field in an object
+    if (digit > 9) { return INCORRECT_TYPE; }
+    array_index = array_index*10 + digit;
+  }
+
+  // 0 followed by other digits is invalid
+  if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0"
+
+  // Empty string is invalid; so is a "/" with no digits before it
+  if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index"
+  // Get the child
+  auto child = at(array_index);
+  // If there is an error, it ends here
+  if(child.error()) {
+    return child;
+  }
+
+  // If there is a /, we're not done yet, call recursively.
+  if (i < json_pointer.length()) {
+    child = child.at_pointer(json_pointer.substr(i));
+  }
+  return child;
+}
+
+simdjson_inline simdjson_result<value> array::at(size_t index) noexcept {
+  size_t i = 0;
+  for (auto value : *this) {
+    if (i == index) { return value; }
+    i++;
+  }
+  return INDEX_OUT_OF_BOUNDS;
+}
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array>::simdjson_result(
+  SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array &&value
+) noexcept
+  : implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array>(
+      std::forward<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array>(value)
+    )
+{
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array>::simdjson_result(
+  error_code error
+) noexcept
+  : implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array>(error)
+{
+}
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array>::begin() noexcept {
+  if (error()) { return error(); }
+  return first.begin();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array>::end() noexcept {
+  if (error()) { return error(); }
+  return first.end();
+}
+simdjson_inline  simdjson_result<size_t> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array>::count_elements() & noexcept {
+  if (error()) { return error(); }
+  return first.count_elements();
+}
+simdjson_inline  simdjson_result<bool> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array>::is_empty() & noexcept {
+  if (error()) { return error(); }
+  return first.is_empty();
+}
+simdjson_inline  simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array>::at(size_t index) noexcept {
+  if (error()) { return error(); }
+  return first.at(index);
+}
+simdjson_inline  simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array>::at_pointer(std::string_view json_pointer) noexcept {
+  if (error()) { return error(); }
+  return first.at_pointer(json_pointer);
+}
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/array-inl.h */
+/* begin file include/simdjson/generic/ondemand/document-inl.h */
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+simdjson_inline document::document(ondemand::json_iterator &&_iter) noexcept
+  : iter{std::forward<json_iterator>(_iter)}
+{
+  logger::log_start_value(iter, "document");
+}
+
+simdjson_inline document document::start(json_iterator &&iter) noexcept {
+  return document(std::forward<json_iterator>(iter));
+}
+
+inline void document::rewind() noexcept {
+  iter.rewind();
+}
+
+inline std::string document::to_debug_string() noexcept {
+  return iter.to_string();
+}
+
+inline simdjson_result<const char *> document::current_location() noexcept {
+  return iter.current_location();
+}
+
+inline int32_t document::current_depth() const noexcept {
+  return iter.depth();
+}
+
+inline bool document::is_alive() noexcept {
+  return iter.is_alive();
+}
+simdjson_inline value_iterator document::resume_value_iterator() noexcept {
+  return value_iterator(&iter, 1, iter.root_position());
+}
+simdjson_inline value_iterator document::get_root_value_iterator() noexcept {
+  return resume_value_iterator();
+}
+simdjson_inline simdjson_result<object> document::start_or_resume_object() noexcept {
+  if (iter.at_root()) {
+    return get_object();
+  } else {
+    return object::resume(resume_value_iterator());
+  }
+}
+simdjson_inline simdjson_result<value> document::get_value() noexcept {
+  // Make sure we start any arrays or objects before returning, so that start_root_<object/array>()
+  // gets called.
+  iter.assert_at_document_depth();
+  switch (*iter.peek()) {
+    case '[':
+    case '{':
+      return value(get_root_value_iterator());
+    default:
+      // Unfortunately, scalar documents are a special case in simdjson and they cannot
+      // be safely converted to value instances.
+      return SCALAR_DOCUMENT_AS_VALUE;
+      // return value(get_root_value_iterator());
+  }
+}
+simdjson_inline simdjson_result<array> document::get_array() & noexcept {
+  auto value = get_root_value_iterator();
+  return array::start_root(value);
+}
+simdjson_inline simdjson_result<object> document::get_object() & noexcept {
+  auto value = get_root_value_iterator();
+  return object::start_root(value);
+}
+simdjson_inline simdjson_result<uint64_t> document::get_uint64() noexcept {
+  return get_root_value_iterator().get_root_uint64();
+}
+simdjson_inline simdjson_result<uint64_t> document::get_uint64_in_string() noexcept {
+  return get_root_value_iterator().get_root_uint64_in_string();
+}
+simdjson_inline simdjson_result<int64_t> document::get_int64() noexcept {
+  return get_root_value_iterator().get_root_int64();
+}
+simdjson_inline simdjson_result<int64_t> document::get_int64_in_string() noexcept {
+  return get_root_value_iterator().get_root_int64_in_string();
+}
+simdjson_inline simdjson_result<double> document::get_double() noexcept {
+  return get_root_value_iterator().get_root_double();
+}
+simdjson_inline simdjson_result<double> document::get_double_in_string() noexcept {
+  return get_root_value_iterator().get_root_double_in_string();
+}
+simdjson_inline simdjson_result<std::string_view> document::get_string() noexcept {
+  return get_root_value_iterator().get_root_string();
+}
+simdjson_inline simdjson_result<raw_json_string> document::get_raw_json_string() noexcept {
+  return get_root_value_iterator().get_root_raw_json_string();
+}
+simdjson_inline simdjson_result<bool> document::get_bool() noexcept {
+  return get_root_value_iterator().get_root_bool();
+}
+simdjson_inline simdjson_result<bool> document::is_null() noexcept {
+  return get_root_value_iterator().is_root_null();
+}
+
+template<> simdjson_inline simdjson_result<array> document::get() & noexcept { return get_array(); }
+template<> simdjson_inline simdjson_result<object> document::get() & noexcept { return get_object(); }
+template<> simdjson_inline simdjson_result<raw_json_string> document::get() & noexcept { return get_raw_json_string(); }
+template<> simdjson_inline simdjson_result<std::string_view> document::get() & noexcept { return get_string(); }
+template<> simdjson_inline simdjson_result<double> document::get() & noexcept { return get_double(); }
+template<> simdjson_inline simdjson_result<uint64_t> document::get() & noexcept { return get_uint64(); }
+template<> simdjson_inline simdjson_result<int64_t> document::get() & noexcept { return get_int64(); }
+template<> simdjson_inline simdjson_result<bool> document::get() & noexcept { return get_bool(); }
+template<> simdjson_inline simdjson_result<value> document::get() & noexcept { return get_value(); }
+
+template<> simdjson_inline simdjson_result<raw_json_string> document::get() && noexcept { return get_raw_json_string(); }
+template<> simdjson_inline simdjson_result<std::string_view> document::get() && noexcept { return get_string(); }
+template<> simdjson_inline simdjson_result<double> document::get() && noexcept { return std::forward<document>(*this).get_double(); }
+template<> simdjson_inline simdjson_result<uint64_t> document::get() && noexcept { return std::forward<document>(*this).get_uint64(); }
+template<> simdjson_inline simdjson_result<int64_t> document::get() && noexcept { return std::forward<document>(*this).get_int64(); }
+template<> simdjson_inline simdjson_result<bool> document::get() && noexcept { return std::forward<document>(*this).get_bool(); }
+template<> simdjson_inline simdjson_result<value> document::get() && noexcept { return get_value(); }
+
+template<typename T> simdjson_inline error_code document::get(T &out) & noexcept {
+  return get<T>().get(out);
+}
+template<typename T> simdjson_inline error_code document::get(T &out) && noexcept {
+  return std::forward<document>(*this).get<T>().get(out);
+}
+
+#if SIMDJSON_EXCEPTIONS
+simdjson_inline document::operator array() & noexcept(false) { return get_array(); }
+simdjson_inline document::operator object() & noexcept(false) { return get_object(); }
+simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); }
+simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); }
+simdjson_inline document::operator double() noexcept(false) { return get_double(); }
+simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(); }
+simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); }
+simdjson_inline document::operator bool() noexcept(false) { return get_bool(); }
+simdjson_inline document::operator value() noexcept(false) { return get_value(); }
+
+#endif
+simdjson_inline simdjson_result<size_t> document::count_elements() & noexcept {
+  auto a = get_array();
+  simdjson_result<size_t> answer = a.count_elements();
+  /* If there was an array, we are now left pointing at its first element. */
+  if(answer.error() == SUCCESS) { rewind(); }
+  return answer;
+}
+simdjson_inline simdjson_result<size_t> document::count_fields() & noexcept {
+  auto a = get_object();
+  simdjson_result<size_t> answer = a.count_fields();
+  /* If there was an object, we are now left pointing at its first element. */
+  if(answer.error() == SUCCESS) { rewind(); }
+  return answer;
+}
+simdjson_inline simdjson_result<value> document::at(size_t index) & noexcept {
+  auto a = get_array();
+  return a.at(index);
+}
+simdjson_inline simdjson_result<array_iterator> document::begin() & noexcept {
+  return get_array().begin();
+}
+simdjson_inline simdjson_result<array_iterator> document::end() & noexcept {
+  return {};
+}
+
+simdjson_inline simdjson_result<value> document::find_field(std::string_view key) & noexcept {
+  return start_or_resume_object().find_field(key);
+}
+simdjson_inline simdjson_result<value> document::find_field(const char *key) & noexcept {
+  return start_or_resume_object().find_field(key);
+}
+simdjson_inline simdjson_result<value> document::find_field_unordered(std::string_view key) & noexcept {
+  return start_or_resume_object().find_field_unordered(key);
+}
+simdjson_inline simdjson_result<value> document::find_field_unordered(const char *key) & noexcept {
+  return start_or_resume_object().find_field_unordered(key);
+}
+simdjson_inline simdjson_result<value> document::operator[](std::string_view key) & noexcept {
+  return start_or_resume_object()[key];
+}
+simdjson_inline simdjson_result<value> document::operator[](const char *key) & noexcept {
+  return start_or_resume_object()[key];
+}
+
+simdjson_inline error_code document::consume() noexcept {
+  auto error = iter.skip_child(0);
+  if(error) { iter.abandon(); }
+  return error;
+}
+
+simdjson_inline simdjson_result<std::string_view> document::raw_json() noexcept {
+  auto _iter = get_root_value_iterator();
+  const uint8_t * starting_point{_iter.peek_start()};
+  auto error = consume();
+  if(error) { return error; }
+  // After 'consume()', we could be left pointing just beyond the document, but that
+  // is ok because we are not going to dereference the final pointer position, we just
+  // use it to compute the length in bytes.
+  const uint8_t * final_point{iter.unsafe_pointer()};
+  return std::string_view(reinterpret_cast<const char*>(starting_point), size_t(final_point - starting_point));
+}
+
+simdjson_inline simdjson_result<json_type> document::type() noexcept {
+  return get_root_value_iterator().type();
+}
+
+simdjson_inline simdjson_result<bool> document::is_scalar() noexcept {
+  json_type this_type;
+  auto error = type().get(this_type);
+  if(error) { return error; }
+  return ! ((this_type == json_type::array) || (this_type == json_type::object));
+}
+
+simdjson_inline bool document::is_negative() noexcept {
+  return get_root_value_iterator().is_root_negative();
+}
+
+simdjson_inline simdjson_result<bool> document::is_integer() noexcept {
+  return get_root_value_iterator().is_root_integer();
+}
+
+simdjson_inline simdjson_result<number_type> document::get_number_type() noexcept {
+  return get_root_value_iterator().get_root_number_type();
+}
+
+simdjson_inline simdjson_result<number> document::get_number() noexcept {
+  return get_root_value_iterator().get_root_number();
+}
+
+
+simdjson_inline simdjson_result<std::string_view> document::raw_json_token() noexcept {
+  auto _iter = get_root_value_iterator();
+  return std::string_view(reinterpret_cast<const char*>(_iter.peek_start()), _iter.peek_start_length());
+}
+
+simdjson_inline simdjson_result<value> document::at_pointer(std::string_view json_pointer) noexcept {
+  rewind(); // Rewind the document each time at_pointer is called
+  if (json_pointer.empty()) {
+    return this->get_value();
+  }
+  json_type t;
+  SIMDJSON_TRY(type().get(t));
+  switch (t)
+  {
+    case json_type::array:
+      return (*this).get_array().at_pointer(json_pointer);
+    case json_type::object:
+      return (*this).get_object().at_pointer(json_pointer);
+    default:
+      return INVALID_JSON_POINTER;
+  }
+}
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::simdjson_result(
+  SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document &&value
+) noexcept :
+    implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>(
+      std::forward<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>(value)
+    )
+{
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::simdjson_result(
+  error_code error
+) noexcept :
+    implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>(
+      error
+    )
+{
+}
+simdjson_inline simdjson_result<size_t> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::count_elements() & noexcept {
+  if (error()) { return error(); }
+  return first.count_elements();
+}
+simdjson_inline simdjson_result<size_t> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::count_fields() & noexcept {
+  if (error()) { return error(); }
+  return first.count_fields();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::at(size_t index) & noexcept {
+  if (error()) { return error(); }
+  return first.at(index);
+}
+simdjson_inline error_code simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::rewind() noexcept {
+  if (error()) { return error(); }
+  first.rewind();
+  return SUCCESS;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::begin() & noexcept {
+  if (error()) { return error(); }
+  return first.begin();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::end() & noexcept {
+  return {};
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::find_field_unordered(std::string_view key) & noexcept {
+  if (error()) { return error(); }
+  return first.find_field_unordered(key);
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::find_field_unordered(const char *key) & noexcept {
+  if (error()) { return error(); }
+  return first.find_field_unordered(key);
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::operator[](std::string_view key) & noexcept {
+  if (error()) { return error(); }
+  return first[key];
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::operator[](const char *key) & noexcept {
+  if (error()) { return error(); }
+  return first[key];
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::find_field(std::string_view key) & noexcept {
+  if (error()) { return error(); }
+  return first.find_field(key);
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::find_field(const char *key) & noexcept {
+  if (error()) { return error(); }
+  return first.find_field(key);
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::get_array() & noexcept {
+  if (error()) { return error(); }
+  return first.get_array();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::get_object() & noexcept {
+  if (error()) { return error(); }
+  return first.get_object();
+}
+simdjson_inline simdjson_result<uint64_t> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::get_uint64() noexcept {
+  if (error()) { return error(); }
+  return first.get_uint64();
+}
+simdjson_inline simdjson_result<int64_t> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::get_int64() noexcept {
+  if (error()) { return error(); }
+  return first.get_int64();
+}
+simdjson_inline simdjson_result<double> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::get_double() noexcept {
+  if (error()) { return error(); }
+  return first.get_double();
+}
+simdjson_inline simdjson_result<std::string_view> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::get_string() noexcept {
+  if (error()) { return error(); }
+  return first.get_string();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::get_raw_json_string() noexcept {
+  if (error()) { return error(); }
+  return first.get_raw_json_string();
+}
+simdjson_inline simdjson_result<bool> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::get_bool() noexcept {
+  if (error()) { return error(); }
+  return first.get_bool();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::get_value() noexcept {
+  if (error()) { return error(); }
+  return first.get_value();
+}
+simdjson_inline simdjson_result<bool> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::is_null() noexcept {
+  if (error()) { return error(); }
+  return first.is_null();
+}
+
+template<typename T>
+simdjson_inline simdjson_result<T> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::get() & noexcept {
+  if (error()) { return error(); }
+  return first.get<T>();
+}
+template<typename T>
+simdjson_inline simdjson_result<T> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::get() && noexcept {
+  if (error()) { return error(); }
+  return std::forward<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>(first).get<T>();
+}
+template<typename T>
+simdjson_inline error_code simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::get(T &out) & noexcept {
+  if (error()) { return error(); }
+  return first.get<T>(out);
+}
+template<typename T>
+simdjson_inline error_code simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::get(T &out) && noexcept {
+  if (error()) { return error(); }
+  return std::forward<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>(first).get<T>(out);
+}
+
+template<> simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::get<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>() & noexcept = delete;
+template<> simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::get<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>() && noexcept {
+  if (error()) { return error(); }
+  return std::forward<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>(first);
+}
+template<> simdjson_inline error_code simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::get<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document &out) & noexcept = delete;
+template<> simdjson_inline error_code simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::get<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document &out) && noexcept {
+  if (error()) { return error(); }
+  out = std::forward<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>(first);
+  return SUCCESS;
+}
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::type() noexcept {
+  if (error()) { return error(); }
+  return first.type();
+}
+
+simdjson_inline simdjson_result<bool> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::is_scalar() noexcept {
+  if (error()) { return error(); }
+  return first.is_scalar();
+}
+
+
+simdjson_inline bool simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::is_negative() noexcept {
+  if (error()) { return error(); }
+  return first.is_negative();
+}
+
+simdjson_inline simdjson_result<bool> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::is_integer() noexcept {
+  if (error()) { return error(); }
+  return first.is_integer();
+}
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::number_type> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::get_number_type() noexcept {
+  if (error()) { return error(); }
+  return first.get_number_type();
+}
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::number> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::get_number() noexcept {
+  if (error()) { return error(); }
+  return first.get_number();
+}
+
+
+#if SIMDJSON_EXCEPTIONS
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() & noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() & noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::operator uint64_t() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::operator int64_t() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::operator double() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::operator std::string_view() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::operator bool() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+#endif
+
+
+simdjson_inline simdjson_result<const char *> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::current_location() noexcept {
+  if (error()) { return error(); }
+  return first.current_location();
+}
+
+simdjson_inline int32_t simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::current_depth() const noexcept {
+  if (error()) { return error(); }
+  return first.current_depth();
+}
+
+simdjson_inline simdjson_result<std::string_view> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::raw_json_token() noexcept {
+  if (error()) { return error(); }
+  return first.raw_json_token();
+}
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>::at_pointer(std::string_view json_pointer) noexcept {
+  if (error()) { return error(); }
+  return first.at_pointer(json_pointer);
+}
+
+
+} // namespace simdjson
+
+
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {}
+simdjson_inline document_reference::document_reference(document &d) noexcept : doc(&d) {}
+simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); }
+simdjson_inline simdjson_result<array> document_reference::get_array() & noexcept { return doc->get_array(); }
+simdjson_inline simdjson_result<object> document_reference::get_object() & noexcept { return doc->get_object(); }
+simdjson_inline simdjson_result<uint64_t> document_reference::get_uint64() noexcept { return doc->get_uint64(); }
+simdjson_inline simdjson_result<int64_t> document_reference::get_int64() noexcept { return doc->get_int64(); }
+simdjson_inline simdjson_result<double> document_reference::get_double() noexcept { return doc->get_double(); }
+simdjson_inline simdjson_result<std::string_view> document_reference::get_string() noexcept { return doc->get_string(); }
+simdjson_inline simdjson_result<raw_json_string> document_reference::get_raw_json_string() noexcept { return doc->get_raw_json_string(); }
+simdjson_inline simdjson_result<bool> document_reference::get_bool() noexcept { return doc->get_bool(); }
+simdjson_inline simdjson_result<value> document_reference::get_value() noexcept { return doc->get_value(); }
+simdjson_inline simdjson_result<bool> document_reference::is_null() noexcept { return doc->is_null(); }
+
+#if SIMDJSON_EXCEPTIONS
+simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); }
+simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); }
+simdjson_inline document_reference::operator uint64_t() noexcept(false) { return uint64_t(*doc); }
+simdjson_inline document_reference::operator int64_t() noexcept(false) { return int64_t(*doc); }
+simdjson_inline document_reference::operator double() noexcept(false) { return double(*doc); }
+simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); }
+simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return raw_json_string(*doc); }
+simdjson_inline document_reference::operator bool() noexcept(false) { return bool(*doc); }
+simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); }
+#endif
+simdjson_inline simdjson_result<size_t> document_reference::count_elements() & noexcept { return doc->count_elements(); }
+simdjson_inline simdjson_result<size_t> document_reference::count_fields() & noexcept { return doc->count_fields(); }
+simdjson_inline simdjson_result<value> document_reference::at(size_t index) & noexcept { return doc->at(index); }
+simdjson_inline simdjson_result<array_iterator> document_reference::begin() & noexcept { return doc->begin(); }
+simdjson_inline simdjson_result<array_iterator> document_reference::end() & noexcept { return doc->end(); }
+simdjson_inline simdjson_result<value> document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); }
+simdjson_inline simdjson_result<value> document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); }
+simdjson_inline simdjson_result<value> document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; }
+simdjson_inline simdjson_result<value> document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; }
+simdjson_inline simdjson_result<value> document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); }
+simdjson_inline simdjson_result<value> document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); }
+simdjson_inline simdjson_result<json_type> document_reference::type() noexcept { return doc->type(); }
+simdjson_inline simdjson_result<bool> document_reference::is_scalar() noexcept { return doc->is_scalar(); }
+simdjson_inline simdjson_result<const char *> document_reference::current_location() noexcept { return doc->current_location(); }
+simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); }
+simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); }
+simdjson_inline simdjson_result<bool> document_reference::is_integer() noexcept { return doc->is_integer(); }
+simdjson_inline simdjson_result<number_type> document_reference::get_number_type() noexcept { return doc->get_number_type(); }
+simdjson_inline simdjson_result<number> document_reference::get_number() noexcept { return doc->get_number(); }
+simdjson_inline simdjson_result<std::string_view> document_reference::raw_json_token() noexcept { return doc->raw_json_token(); }
+simdjson_inline simdjson_result<value> document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); }
+simdjson_inline simdjson_result<std::string_view> document_reference::raw_json() noexcept { return doc->raw_json();}
+simdjson_inline document_reference::operator document&() const noexcept { return *doc; }
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+
+
+namespace simdjson {
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference value, error_code error)
+  noexcept : implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>(std::forward<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>(value), error) {}
+
+
+simdjson_inline simdjson_result<size_t> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::count_elements() & noexcept {
+  if (error()) { return error(); }
+  return first.count_elements();
+}
+simdjson_inline simdjson_result<size_t> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::count_fields() & noexcept {
+  if (error()) { return error(); }
+  return first.count_fields();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::at(size_t index) & noexcept {
+  if (error()) { return error(); }
+  return first.at(index);
+}
+simdjson_inline error_code simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::rewind() noexcept {
+  if (error()) { return error(); }
+  first.rewind();
+  return SUCCESS;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::begin() & noexcept {
+  if (error()) { return error(); }
+  return first.begin();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::end() & noexcept {
+  return {};
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::find_field_unordered(std::string_view key) & noexcept {
+  if (error()) { return error(); }
+  return first.find_field_unordered(key);
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::find_field_unordered(const char *key) & noexcept {
+  if (error()) { return error(); }
+  return first.find_field_unordered(key);
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::operator[](std::string_view key) & noexcept {
+  if (error()) { return error(); }
+  return first[key];
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::operator[](const char *key) & noexcept {
+  if (error()) { return error(); }
+  return first[key];
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::find_field(std::string_view key) & noexcept {
+  if (error()) { return error(); }
+  return first.find_field(key);
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::find_field(const char *key) & noexcept {
+  if (error()) { return error(); }
+  return first.find_field(key);
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::get_array() & noexcept {
+  if (error()) { return error(); }
+  return first.get_array();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::get_object() & noexcept {
+  if (error()) { return error(); }
+  return first.get_object();
+}
+simdjson_inline simdjson_result<uint64_t> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::get_uint64() noexcept {
+  if (error()) { return error(); }
+  return first.get_uint64();
+}
+simdjson_inline simdjson_result<int64_t> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::get_int64() noexcept {
+  if (error()) { return error(); }
+  return first.get_int64();
+}
+simdjson_inline simdjson_result<double> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::get_double() noexcept {
+  if (error()) { return error(); }
+  return first.get_double();
+}
+simdjson_inline simdjson_result<std::string_view> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::get_string() noexcept {
+  if (error()) { return error(); }
+  return first.get_string();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::get_raw_json_string() noexcept {
+  if (error()) { return error(); }
+  return first.get_raw_json_string();
+}
+simdjson_inline simdjson_result<bool> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::get_bool() noexcept {
+  if (error()) { return error(); }
+  return first.get_bool();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::get_value() noexcept {
+  if (error()) { return error(); }
+  return first.get_value();
+}
+simdjson_inline simdjson_result<bool> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::is_null() noexcept {
+  if (error()) { return error(); }
+  return first.is_null();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::type() noexcept {
+  if (error()) { return error(); }
+  return first.type();
+}
+simdjson_inline simdjson_result<bool> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::is_scalar() noexcept {
+  if (error()) { return error(); }
+  return first.is_scalar();
+}
+simdjson_inline simdjson_result<bool> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::is_negative() noexcept {
+  if (error()) { return error(); }
+  return first.is_negative();
+}
+simdjson_inline simdjson_result<bool> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::is_integer() noexcept {
+  if (error()) { return error(); }
+  return first.is_integer();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::number_type> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::get_number_type() noexcept {
+  if (error()) { return error(); }
+  return first.get_number_type();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::number> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::get_number() noexcept {
+  if (error()) { return error(); }
+  return first.get_number();
+}
+#if SIMDJSON_EXCEPTIONS
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() & noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() & noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::operator uint64_t() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::operator int64_t() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::operator double() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::operator std::string_view() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::operator bool() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+#endif
+
+simdjson_inline simdjson_result<const char *> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::current_location() noexcept {
+  if (error()) { return error(); }
+  return first.current_location();
+}
+
+simdjson_inline simdjson_result<std::string_view> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::raw_json_token() noexcept {
+  if (error()) { return error(); }
+  return first.raw_json_token();
+}
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>::at_pointer(std::string_view json_pointer) noexcept {
+  if (error()) { return error(); }
+  return first.at_pointer(json_pointer);
+}
+
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/document-inl.h */
+/* begin file include/simdjson/generic/ondemand/value-inl.h */
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+simdjson_inline value::value(const value_iterator &_iter) noexcept
+  : iter{_iter}
+{
+}
+simdjson_inline value value::start(const value_iterator &iter) noexcept {
+  return iter;
+}
+simdjson_inline value value::resume(const value_iterator &iter) noexcept {
+  return iter;
+}
+
+simdjson_inline simdjson_result<array> value::get_array() noexcept {
+  return array::start(iter);
+}
+simdjson_inline simdjson_result<object> value::get_object() noexcept {
+  return object::start(iter);
+}
+simdjson_inline simdjson_result<object> value::start_or_resume_object() noexcept {
+  if (iter.at_start()) {
+    return get_object();
+  } else {
+    return object::resume(iter);
+  }
+}
+
+simdjson_inline simdjson_result<raw_json_string> value::get_raw_json_string() noexcept {
+  return iter.get_raw_json_string();
+}
+simdjson_inline simdjson_result<std::string_view> value::get_string() noexcept {
+  return iter.get_string();
+}
+simdjson_inline simdjson_result<double> value::get_double() noexcept {
+  return iter.get_double();
+}
+simdjson_inline simdjson_result<double> value::get_double_in_string() noexcept {
+  return iter.get_double_in_string();
+}
+simdjson_inline simdjson_result<uint64_t> value::get_uint64() noexcept {
+  return iter.get_uint64();
+}
+simdjson_inline simdjson_result<uint64_t> value::get_uint64_in_string() noexcept {
+  return iter.get_uint64_in_string();
+}
+simdjson_inline simdjson_result<int64_t> value::get_int64() noexcept {
+  return iter.get_int64();
+}
+simdjson_inline simdjson_result<int64_t> value::get_int64_in_string() noexcept {
+  return iter.get_int64_in_string();
+}
+simdjson_inline simdjson_result<bool> value::get_bool() noexcept {
+  return iter.get_bool();
+}
+simdjson_inline simdjson_result<bool> value::is_null() noexcept {
+  return iter.is_null();
+}
+template<> simdjson_inline simdjson_result<array> value::get() noexcept { return get_array(); }
+template<> simdjson_inline simdjson_result<object> value::get() noexcept { return get_object(); }
+template<> simdjson_inline simdjson_result<raw_json_string> value::get() noexcept { return get_raw_json_string(); }
+template<> simdjson_inline simdjson_result<std::string_view> value::get() noexcept { return get_string(); }
+template<> simdjson_inline simdjson_result<number> value::get() noexcept { return get_number(); }
+template<> simdjson_inline simdjson_result<double> value::get() noexcept { return get_double(); }
+template<> simdjson_inline simdjson_result<uint64_t> value::get() noexcept { return get_uint64(); }
+template<> simdjson_inline simdjson_result<int64_t> value::get() noexcept { return get_int64(); }
+template<> simdjson_inline simdjson_result<bool> value::get() noexcept { return get_bool(); }
+
+template<typename T> simdjson_inline error_code value::get(T &out) noexcept {
+  return get<T>().get(out);
+}
+
+#if SIMDJSON_EXCEPTIONS
+simdjson_inline value::operator array() noexcept(false) {
+  return get_array();
+}
+simdjson_inline value::operator object() noexcept(false) {
+  return get_object();
+}
+simdjson_inline value::operator uint64_t() noexcept(false) {
+  return get_uint64();
+}
+simdjson_inline value::operator int64_t() noexcept(false) {
+  return get_int64();
+}
+simdjson_inline value::operator double() noexcept(false) {
+  return get_double();
+}
+simdjson_inline value::operator std::string_view() noexcept(false) {
+  return get_string();
+}
+simdjson_inline value::operator raw_json_string() noexcept(false) {
+  return get_raw_json_string();
+}
+simdjson_inline value::operator bool() noexcept(false) {
+  return get_bool();
+}
+#endif
+
+simdjson_inline simdjson_result<array_iterator> value::begin() & noexcept {
+  return get_array().begin();
+}
+simdjson_inline simdjson_result<array_iterator> value::end() & noexcept {
+  return {};
+}
+simdjson_inline simdjson_result<size_t> value::count_elements() & noexcept {
+  simdjson_result<size_t> answer;
+  auto a = get_array();
+  answer = a.count_elements();
+  // count_elements leaves you pointing inside the array, at the first element.
+  // We need to move back so that the user can create a new array (which requires that
+  // we point at '[').
+  iter.move_at_start();
+  return answer;
+}
+simdjson_inline simdjson_result<size_t> value::count_fields() & noexcept {
+  simdjson_result<size_t> answer;
+  auto a = get_object();
+  answer = a.count_fields();
+  iter.move_at_start();
+  return answer;
+}
+simdjson_inline simdjson_result<value> value::at(size_t index) noexcept {
+  auto a = get_array();
+  return a.at(index);
+}
+
+simdjson_inline simdjson_result<value> value::find_field(std::string_view key) noexcept {
+  return start_or_resume_object().find_field(key);
+}
+simdjson_inline simdjson_result<value> value::find_field(const char *key) noexcept {
+  return start_or_resume_object().find_field(key);
+}
+
+simdjson_inline simdjson_result<value> value::find_field_unordered(std::string_view key) noexcept {
+  return start_or_resume_object().find_field_unordered(key);
+}
+simdjson_inline simdjson_result<value> value::find_field_unordered(const char *key) noexcept {
+  return start_or_resume_object().find_field_unordered(key);
+}
+
+simdjson_inline simdjson_result<value> value::operator[](std::string_view key) noexcept {
+  return start_or_resume_object()[key];
+}
+simdjson_inline simdjson_result<value> value::operator[](const char *key) noexcept {
+  return start_or_resume_object()[key];
+}
+
+simdjson_inline simdjson_result<json_type> value::type() noexcept {
+  return iter.type();
+}
+
+simdjson_inline simdjson_result<bool> value::is_scalar() noexcept {
+  json_type this_type;
+  auto error = type().get(this_type);
+  if(error) { return error; }
+  return ! ((this_type == json_type::array) || (this_type == json_type::object));
+}
+
+simdjson_inline bool value::is_negative() noexcept {
+  return iter.is_negative();
+}
+
+simdjson_inline simdjson_result<bool> value::is_integer() noexcept {
+  return iter.is_integer();
+}
+simdjson_warn_unused simdjson_inline simdjson_result<number_type> value::get_number_type() noexcept {
+  return iter.get_number_type();
+}
+simdjson_warn_unused simdjson_inline simdjson_result<number> value::get_number() noexcept {
+  return iter.get_number();
+}
+
+simdjson_inline std::string_view value::raw_json_token() noexcept {
+  return std::string_view(reinterpret_cast<const char*>(iter.peek_start()), iter.peek_start_length());
+}
+
+simdjson_inline simdjson_result<const char *> value::current_location() noexcept {
+  return iter.json_iter().current_location();
+}
+
+simdjson_inline int32_t value::current_depth() const noexcept{
+  return iter.json_iter().depth();
+}
+
+simdjson_inline simdjson_result<value> value::at_pointer(std::string_view json_pointer) noexcept {
+  json_type t;
+  SIMDJSON_TRY(type().get(t));
+  switch (t)
+  {
+    case json_type::array:
+      return (*this).get_array().at_pointer(json_pointer);
+    case json_type::object:
+      return (*this).get_object().at_pointer(json_pointer);
+    default:
+      return INVALID_JSON_POINTER;
+  }
+}
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::simdjson_result(
+  SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value &&value
+) noexcept :
+    implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>(
+      std::forward<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>(value)
+    )
+{
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::simdjson_result(
+  error_code error
+) noexcept :
+    implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>(error)
+{
+}
+simdjson_inline simdjson_result<size_t> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::count_elements() & noexcept {
+  if (error()) { return error(); }
+  return first.count_elements();
+}
+simdjson_inline simdjson_result<size_t> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::count_fields() & noexcept {
+  if (error()) { return error(); }
+  return first.count_fields();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::at(size_t index) noexcept {
+  if (error()) { return error(); }
+  return first.at(index);
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::begin() & noexcept {
+  if (error()) { return error(); }
+  return first.begin();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::end() & noexcept {
+  if (error()) { return error(); }
+  return {};
+}
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::find_field(std::string_view key) noexcept {
+  if (error()) { return error(); }
+  return first.find_field(key);
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::find_field(const char *key) noexcept {
+  if (error()) { return error(); }
+  return first.find_field(key);
+}
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::find_field_unordered(std::string_view key) noexcept {
+  if (error()) { return error(); }
+  return first.find_field_unordered(key);
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::find_field_unordered(const char *key) noexcept {
+  if (error()) { return error(); }
+  return first.find_field_unordered(key);
+}
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::operator[](std::string_view key) noexcept {
+  if (error()) { return error(); }
+  return first[key];
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::operator[](const char *key) noexcept {
+  if (error()) { return error(); }
+  return first[key];
+}
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::get_array() noexcept {
+  if (error()) { return error(); }
+  return first.get_array();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::get_object() noexcept {
+  if (error()) { return error(); }
+  return first.get_object();
+}
+simdjson_inline simdjson_result<uint64_t> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::get_uint64() noexcept {
+  if (error()) { return error(); }
+  return first.get_uint64();
+}
+simdjson_inline simdjson_result<uint64_t> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::get_uint64_in_string() noexcept {
+  if (error()) { return error(); }
+  return first.get_uint64_in_string();
+}
+simdjson_inline simdjson_result<int64_t> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::get_int64() noexcept {
+  if (error()) { return error(); }
+  return first.get_int64();
+}
+simdjson_inline simdjson_result<int64_t> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::get_int64_in_string() noexcept {
+  if (error()) { return error(); }
+  return first.get_int64_in_string();
+}
+simdjson_inline simdjson_result<double> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::get_double() noexcept {
+  if (error()) { return error(); }
+  return first.get_double();
+}
+simdjson_inline simdjson_result<double> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::get_double_in_string() noexcept {
+  if (error()) { return error(); }
+  return first.get_double_in_string();
+}
+simdjson_inline simdjson_result<std::string_view> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::get_string() noexcept {
+  if (error()) { return error(); }
+  return first.get_string();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::get_raw_json_string() noexcept {
+  if (error()) { return error(); }
+  return first.get_raw_json_string();
+}
+simdjson_inline simdjson_result<bool> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::get_bool() noexcept {
+  if (error()) { return error(); }
+  return first.get_bool();
+}
+simdjson_inline simdjson_result<bool> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::is_null() noexcept {
+  if (error()) { return error(); }
+  return first.is_null();
+}
+
+template<typename T> simdjson_inline simdjson_result<T> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::get() noexcept {
+  if (error()) { return error(); }
+  return first.get<T>();
+}
+template<typename T> simdjson_inline error_code simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::get(T &out) noexcept {
+  if (error()) { return error(); }
+  return first.get<T>(out);
+}
+
+template<> simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::get<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>() noexcept  {
+  if (error()) { return error(); }
+  return std::move(first);
+}
+template<> simdjson_inline error_code simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::get<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value &out) noexcept {
+  if (error()) { return error(); }
+  out = first;
+  return SUCCESS;
+}
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::type() noexcept {
+  if (error()) { return error(); }
+  return first.type();
+}
+simdjson_inline simdjson_result<bool> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::is_scalar() noexcept {
+  if (error()) { return error(); }
+  return first.is_scalar();
+}
+simdjson_inline simdjson_result<bool> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::is_negative() noexcept {
+  if (error()) { return error(); }
+  return first.is_negative();
+}
+simdjson_inline simdjson_result<bool> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::is_integer() noexcept {
+  if (error()) { return error(); }
+  return first.is_integer();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::number_type> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::get_number_type() noexcept {
+  if (error()) { return error(); }
+  return first.get_number_type();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::number> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::get_number() noexcept {
+  if (error()) { return error(); }
+  return first.get_number();
+}
+#if SIMDJSON_EXCEPTIONS
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::operator uint64_t() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::operator int64_t() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::operator double() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::operator std::string_view() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::operator bool() noexcept(false) {
+  if (error()) { throw simdjson_error(error()); }
+  return first;
+}
+#endif
+
+simdjson_inline simdjson_result<std::string_view> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::raw_json_token() noexcept {
+  if (error()) { return error(); }
+  return first.raw_json_token();
+}
+
+simdjson_inline simdjson_result<const char *> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::current_location() noexcept {
+  if (error()) { return error(); }
+  return first.current_location();
+}
+
+simdjson_inline simdjson_result<int32_t> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::current_depth() const noexcept {
+  if (error()) { return error(); }
+  return first.current_depth();
+}
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value>::at_pointer(std::string_view json_pointer) noexcept {
+  if (error()) { return error(); }
+  return first.at_pointer(json_pointer);
+}
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/value-inl.h */
+/* begin file include/simdjson/generic/ondemand/field-inl.h */
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+// clang 6 doesn't think the default constructor can be noexcept, so we make it explicit
+simdjson_inline field::field() noexcept : std::pair<raw_json_string, ondemand::value>() {}
+
+simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept
+  : std::pair<raw_json_string, ondemand::value>(key, std::forward<ondemand::value>(value))
+{
+}
+
+simdjson_inline simdjson_result<field> field::start(value_iterator &parent_iter) noexcept {
+  raw_json_string key;
+  SIMDJSON_TRY( parent_iter.field_key().get(key) );
+  SIMDJSON_TRY( parent_iter.field_value() );
+  return field::start(parent_iter, key);
+}
+
+simdjson_inline simdjson_result<field> field::start(const value_iterator &parent_iter, raw_json_string key) noexcept {
+    return field(key, parent_iter.child());
+}
+
+simdjson_inline simdjson_warn_unused simdjson_result<std::string_view> field::unescaped_key() noexcept {
+  SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us.
+  simdjson_result<std::string_view> answer = first.unescape(second.iter.json_iter());
+  first.consume();
+  return answer;
+}
+
+simdjson_inline raw_json_string field::key() const noexcept {
+  SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us.
+  return first;
+}
+
+simdjson_inline value &field::value() & noexcept {
+  return second;
+}
+
+simdjson_inline value field::value() && noexcept {
+  return std::forward<field>(*this).second;
+}
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::field>::simdjson_result(
+  SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::field &&value
+) noexcept :
+    implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::field>(
+      std::forward<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::field>(value)
+    )
+{
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::field>::simdjson_result(
+  error_code error
+) noexcept :
+    implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::field>(error)
+{
+}
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::field>::key() noexcept {
+  if (error()) { return error(); }
+  return first.key();
+}
+simdjson_inline simdjson_result<std::string_view> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::field>::unescaped_key() noexcept {
+  if (error()) { return error(); }
+  return first.unescaped_key();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::field>::value() noexcept {
+  if (error()) { return error(); }
+  return std::move(first.value());
+}
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/field-inl.h */
+/* begin file include/simdjson/generic/ondemand/object-inl.h */
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+simdjson_inline simdjson_result<value> object::find_field_unordered(const std::string_view key) & noexcept {
+  bool has_value;
+  SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) );
+  if (!has_value) { return NO_SUCH_FIELD; }
+  return value(iter.child());
+}
+simdjson_inline simdjson_result<value> object::find_field_unordered(const std::string_view key) && noexcept {
+  bool has_value;
+  SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) );
+  if (!has_value) { return NO_SUCH_FIELD; }
+  return value(iter.child());
+}
+simdjson_inline simdjson_result<value> object::operator[](const std::string_view key) & noexcept {
+  return find_field_unordered(key);
+}
+simdjson_inline simdjson_result<value> object::operator[](const std::string_view key) && noexcept {
+  return std::forward<object>(*this).find_field_unordered(key);
+}
+simdjson_inline simdjson_result<value> object::find_field(const std::string_view key) & noexcept {
+  bool has_value;
+  SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) );
+  if (!has_value) { return NO_SUCH_FIELD; }
+  return value(iter.child());
+}
+simdjson_inline simdjson_result<value> object::find_field(const std::string_view key) && noexcept {
+  bool has_value;
+  SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) );
+  if (!has_value) { return NO_SUCH_FIELD; }
+  return value(iter.child());
+}
+
+simdjson_inline simdjson_result<object> object::start(value_iterator &iter) noexcept {
+  SIMDJSON_TRY( iter.start_object().error() );
+  return object(iter);
+}
+simdjson_inline simdjson_result<object> object::start_root(value_iterator &iter) noexcept {
+  SIMDJSON_TRY( iter.start_root_object().error() );
+  return object(iter);
+}
+simdjson_inline error_code object::consume() noexcept {
+  if(iter.is_at_key()) {
+    /**
+     * whenever you are pointing at a key, calling skip_child() is
+     * unsafe because you will hit a string and you will assume that
+     * it is string value, and this mistake will lead you to make bad
+     * depth computation.
+     */
+    /**
+     * We want to 'consume' the key. We could really
+     * just do _json_iter->return_current_and_advance(); at this
+     * point, but, for clarity, we will use the high-level API to
+     * eat the key. We assume that the compiler optimizes away
+     * most of the work.
+     */
+    simdjson_unused raw_json_string actual_key;
+    auto error = iter.field_key().get(actual_key);
+    if (error) { iter.abandon(); return error; };
+    // Let us move to the value while we are at it.
+    if ((error = iter.field_value())) { iter.abandon(); return error; }
+  }
+  auto error_skip = iter.json_iter().skip_child(iter.depth()-1);
+  if(error_skip) { iter.abandon(); }
+  return error_skip;
+}
+
+simdjson_inline simdjson_result<std::string_view> object::raw_json() noexcept {
+  const uint8_t * starting_point{iter.peek_start()};
+  auto error = consume();
+  if(error) { return error; }
+  const uint8_t * final_point{iter._json_iter->peek(0)};
+  return std::string_view(reinterpret_cast<const char*>(starting_point), size_t(final_point - starting_point));
+}
+
+simdjson_inline simdjson_result<object> object::started(value_iterator &iter) noexcept {
+  SIMDJSON_TRY( iter.started_object().error() );
+  return object(iter);
+}
+
+simdjson_inline object object::resume(const value_iterator &iter) noexcept {
+  return iter;
+}
+
+simdjson_inline object::object(const value_iterator &_iter) noexcept
+  : iter{_iter}
+{
+}
+
+simdjson_inline simdjson_result<object_iterator> object::begin() noexcept {
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; }
+#endif
+  return object_iterator(iter);
+}
+simdjson_inline simdjson_result<object_iterator> object::end() noexcept {
+  return object_iterator(iter);
+}
+
+inline simdjson_result<value> object::at_pointer(std::string_view json_pointer) noexcept {
+  if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; }
+  json_pointer = json_pointer.substr(1);
+  size_t slash = json_pointer.find('/');
+  std::string_view key = json_pointer.substr(0, slash);
+  // Grab the child with the given key
+  simdjson_result<value> child;
+
+  // If there is an escape character in the key, unescape it and then get the child.
+  size_t escape = key.find('~');
+  if (escape != std::string_view::npos) {
+    // Unescape the key
+    std::string unescaped(key);
+    do {
+      switch (unescaped[escape+1]) {
+        case '0':
+          unescaped.replace(escape, 2, "~");
+          break;
+        case '1':
+          unescaped.replace(escape, 2, "/");
+          break;
+        default:
+          return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer");
+      }
+      escape = unescaped.find('~', escape+1);
+    } while (escape != std::string::npos);
+    child = find_field(unescaped);  // Take note find_field does not unescape keys when matching
+  } else {
+    child = find_field(key);
+  }
+  if(child.error()) {
+    return child; // we do not continue if there was an error
+  }
+  // If there is a /, we have to recurse and look up more of the path
+  if (slash != std::string_view::npos) {
+    child = child.at_pointer(json_pointer.substr(slash));
+  }
+  return child;
+}
+
+simdjson_inline simdjson_result<size_t> object::count_fields() & noexcept {
+  size_t count{0};
+  // Important: we do not consume any of the values.
+  for(simdjson_unused auto v : *this) { count++; }
+  // The above loop will always succeed, but we want to report errors.
+  if(iter.error()) { return iter.error(); }
+  // We need to move back at the start because we expect users to iterate through
+  // the object after counting the number of elements.
+  iter.reset_object();
+  return count;
+}
+
+simdjson_inline simdjson_result<bool> object::is_empty() & noexcept {
+  bool is_not_empty;
+  auto error = iter.reset_object().get(is_not_empty);
+  if(error) { return error; }
+  return !is_not_empty;
+}
+
+simdjson_inline simdjson_result<bool> object::reset() & noexcept {
+  return iter.reset_object();
+}
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object>::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object &&value) noexcept
+    : implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object>(std::forward<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object>(value)) {}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object>::simdjson_result(error_code error) noexcept
+    : implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object>(error) {}
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object>::begin() noexcept {
+  if (error()) { return error(); }
+  return first.begin();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object>::end() noexcept {
+  if (error()) { return error(); }
+  return first.end();
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object>::find_field_unordered(std::string_view key) & noexcept {
+  if (error()) { return error(); }
+  return first.find_field_unordered(key);
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object>::find_field_unordered(std::string_view key) && noexcept {
+  if (error()) { return error(); }
+  return std::forward<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object>(first).find_field_unordered(key);
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object>::operator[](std::string_view key) & noexcept {
+  if (error()) { return error(); }
+  return first[key];
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object>::operator[](std::string_view key) && noexcept {
+  if (error()) { return error(); }
+  return std::forward<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object>(first)[key];
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object>::find_field(std::string_view key) & noexcept {
+  if (error()) { return error(); }
+  return first.find_field(key);
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object>::find_field(std::string_view key) && noexcept {
+  if (error()) { return error(); }
+  return std::forward<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object>(first).find_field(key);
+}
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object>::at_pointer(std::string_view json_pointer) noexcept {
+  if (error()) { return error(); }
+  return first.at_pointer(json_pointer);
+}
+
+inline simdjson_result<bool> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object>::reset() noexcept {
+  if (error()) { return error(); }
+  return first.reset();
+}
+
+inline simdjson_result<bool> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object>::is_empty() noexcept {
+  if (error()) { return error(); }
+  return first.is_empty();
+}
+
+simdjson_inline  simdjson_result<size_t> simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object>::count_fields() & noexcept {
+  if (error()) { return error(); }
+  return first.count_fields();
+}
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/object-inl.h */
+/* begin file include/simdjson/generic/ondemand/parser-inl.h */
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+simdjson_inline parser::parser(size_t max_capacity) noexcept
+  : _max_capacity{max_capacity} {
+}
+
+simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept {
+  if (new_capacity > max_capacity()) { return CAPACITY; }
+  if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; }
+
+  // string_capacity copied from document::allocate
+  _capacity = 0;
+  size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64);
+  string_buf.reset(new (std::nothrow) uint8_t[string_capacity]);
+#if SIMDJSON_DEVELOPMENT_CHECKS
+  start_positions.reset(new (std::nothrow) token_position[new_max_depth]);
+#endif
+  if (implementation) {
+    SIMDJSON_TRY( implementation->set_capacity(new_capacity) );
+    SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) );
+  } else {
+    SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) );
+  }
+  _capacity = new_capacity;
+  _max_depth = new_max_depth;
+  return SUCCESS;
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<document> parser::iterate(padded_string_view json) & noexcept {
+  if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; }
+
+  // Allocate if needed
+  if (capacity() < json.length() || !string_buf) {
+    SIMDJSON_TRY( allocate(json.length(), max_depth()) );
+  }
+
+  // Run stage 1.
+  SIMDJSON_TRY( implementation->stage1(reinterpret_cast<const uint8_t *>(json.data()), json.length(), stage1_mode::regular) );
+  return document::start({ reinterpret_cast<const uint8_t *>(json.data()), this });
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<document> parser::iterate(const char *json, size_t len, size_t allocated) & noexcept {
+  return iterate(padded_string_view(json, len, allocated));
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<document> parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept {
+  return iterate(padded_string_view(json, len, allocated));
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<document> parser::iterate(std::string_view json, size_t allocated) & noexcept {
+  return iterate(padded_string_view(json, allocated));
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<document> parser::iterate(const std::string &json) & noexcept {
+  return iterate(padded_string_view(json));
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<document> parser::iterate(const simdjson_result<padded_string_view> &result) & noexcept {
+  // We don't presently have a way to temporarily get a const T& from a simdjson_result<T> without throwing an exception
+  SIMDJSON_TRY( result.error() );
+  padded_string_view json = result.value_unsafe();
+  return iterate(json);
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<document> parser::iterate(const simdjson_result<padded_string> &result) & noexcept {
+  // We don't presently have a way to temporarily get a const T& from a simdjson_result<T> without throwing an exception
+  SIMDJSON_TRY( result.error() );
+  const padded_string &json = result.value_unsafe();
+  return iterate(json);
+}
+
+simdjson_warn_unused simdjson_inline simdjson_result<json_iterator> parser::iterate_raw(padded_string_view json) & noexcept {
+  if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; }
+
+  // Allocate if needed
+  if (capacity() < json.length()) {
+    SIMDJSON_TRY( allocate(json.length(), max_depth()) );
+  }
+
+  // Run stage 1.
+  SIMDJSON_TRY( implementation->stage1(reinterpret_cast<const uint8_t *>(json.data()), json.length(), stage1_mode::regular) );
+  return json_iterator(reinterpret_cast<const uint8_t *>(json.data()), this);
+}
+
+inline simdjson_result<document_stream> parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size) noexcept {
+  if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; }
+  return document_stream(*this, buf, len, batch_size);
+}
+inline simdjson_result<document_stream> parser::iterate_many(const char *buf, size_t len, size_t batch_size) noexcept {
+  return iterate_many(reinterpret_cast<const uint8_t *>(buf), len, batch_size);
+}
+inline simdjson_result<document_stream> parser::iterate_many(const std::string &s, size_t batch_size) noexcept {
+  return iterate_many(s.data(), s.length(), batch_size);
+}
+inline simdjson_result<document_stream> parser::iterate_many(const padded_string &s, size_t batch_size) noexcept {
+  return iterate_many(s.data(), s.length(), batch_size);
+}
+
+simdjson_inline size_t parser::capacity() const noexcept {
+  return _capacity;
+}
+simdjson_inline size_t parser::max_capacity() const noexcept {
+  return _max_capacity;
+}
+simdjson_inline size_t parser::max_depth() const noexcept {
+  return _max_depth;
+}
+
+simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept {
+  if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) {
+    _max_capacity = max_capacity;
+  } else {
+    _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY;
+  }
+}
+
+simdjson_inline simdjson_warn_unused simdjson_result<std::string_view> parser::unescape(raw_json_string in, uint8_t *&dst) const noexcept {
+  uint8_t *end = implementation->parse_string(in.buf, dst);
+  if (!end) { return STRING_ERROR; }
+  std::string_view result(reinterpret_cast<const char *>(dst), end-dst);
+  dst = end;
+  return result;
+}
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::parser>::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::parser &&value) noexcept
+    : implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::parser>(std::forward<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::parser>(value)) {}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::parser>::simdjson_result(error_code error) noexcept
+    : implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::parser>(error) {}
+
+} // namespace simdjson
+/* end file include/simdjson/generic/ondemand/parser-inl.h */
+/* begin file include/simdjson/generic/ondemand/document_stream-inl.h */
+#include <algorithm>
+#include <limits>
+#include <stdexcept>
+namespace simdjson {
+namespace SIMDJSON_BUILTIN_IMPLEMENTATION {
+namespace ondemand {
+
+#ifdef SIMDJSON_THREADS_ENABLED
+
+inline void stage1_worker::finish() {
+  // After calling "run" someone would call finish() to wait
+  // for the end of the processing.
+  // This function will wait until either the thread has done
+  // the processing or, else, the destructor has been called.
+  std::unique_lock<std::mutex> lock(locking_mutex);
+  cond_var.wait(lock, [this]{return has_work == false;});
+}
+
+inline stage1_worker::~stage1_worker() {
+  // The thread may never outlive the stage1_worker instance
+  // and will always be stopped/joined before the stage1_worker
+  // instance is gone.
+  stop_thread();
+}
+
+inline void stage1_worker::start_thread() {
+  std::unique_lock<std::mutex> lock(locking_mutex);
+  if(thread.joinable()) {
+    return; // This should never happen but we never want to create more than one thread.
+  }
+  thread = std::thread([this]{
+      while(true) {
+        std::unique_lock<std::mutex> thread_lock(locking_mutex);
+        // We wait for either "run" or "stop_thread" to be called.
+        cond_var.wait(thread_lock, [this]{return has_work || !can_work;});
+        // If, for some reason, the stop_thread() method was called (i.e., the
+        // destructor of stage1_worker is called, then we want to immediately destroy
+        // the thread (and not do any more processing).
+        if(!can_work) {
+          break;
+        }
+        this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser,
+              this->_next_batch_start);
+        this->has_work = false;
+        // The condition variable call should be moved after thread_lock.unlock() for performance
+        // reasons but thread sanitizers may report it as a data race if we do.
+        // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock
+        cond_var.notify_one(); // will notify "finish"
+        thread_lock.unlock();
+      }
+    }
+  );
+}
+
+
+inline void stage1_worker::stop_thread() {
+  std::unique_lock<std::mutex> lock(locking_mutex);
+  // We have to make sure that all locks can be released.
+  can_work = false;
+  has_work = false;
+  cond_var.notify_all();
+  lock.unlock();
+  if(thread.joinable()) {
+    thread.join();
+  }
+}
+
+inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) {
+  std::unique_lock<std::mutex> lock(locking_mutex);
+  owner = ds;
+  _next_batch_start = next_batch_start;
+  stage1_thread_parser = stage1;
+  has_work = true;
+  // The condition variable call should be moved after thread_lock.unlock() for performance
+  // reasons but thread sanitizers may report it as a data race if we do.
+  // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock
+  cond_var.notify_one(); // will notify the thread lock that we have work
+  lock.unlock();
+}
+
+#endif  // SIMDJSON_THREADS_ENABLED
+
+simdjson_inline document_stream::document_stream(
+  ondemand::parser &_parser,
+  const uint8_t *_buf,
+  size_t _len,
+  size_t _batch_size
+) noexcept
+  : parser{&_parser},
+    buf{_buf},
+    len{_len},
+    batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size},
+    error{SUCCESS}
+    #ifdef SIMDJSON_THREADS_ENABLED
+    , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change
+    #endif
+{
+#ifdef SIMDJSON_THREADS_ENABLED
+  if(worker.get() == nullptr) {
+    error = MEMALLOC;
+  }
+#endif
+}
+
+simdjson_inline document_stream::document_stream() noexcept
+  : parser{nullptr},
+    buf{nullptr},
+    len{0},
+    batch_size{0},
+    error{UNINITIALIZED}
+    #ifdef SIMDJSON_THREADS_ENABLED
+    , use_thread(false)
+    #endif
+{
+}
+
+simdjson_inline document_stream::~document_stream() noexcept
+{
+  #ifdef SIMDJSON_THREADS_ENABLED
+  worker.reset();
+  #endif
+}
+
+inline size_t document_stream::size_in_bytes() const noexcept {
+  return len;
+}
+
+inline size_t document_stream::truncated_bytes() const noexcept {
+  if(error == CAPACITY) { return len - batch_start; }
+  return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1];
+}
+
+simdjson_inline document_stream::iterator::iterator() noexcept
+  : stream{nullptr}, finished{true} {
+}
+
+simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept
+  : stream{_stream}, finished{is_end} {
+}
+
+simdjson_inline simdjson_result<ondemand::document_reference> document_stream::iterator::operator*() noexcept {
+  //if(stream->error) { return stream->error; }
+  return simdjson_result<ondemand::document_reference>(stream->doc, stream->error);
+}
+
+simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept {
+  // If there is an error, then we want the iterator
+  // to be finished, no matter what. (E.g., we do not
+  // keep generating documents with errors, or go beyond
+  // a document with errors.)
+  //
+  // Users do not have to call "operator*()" when they use operator++,
+  // so we need to end the stream in the operator++ function.
+  //
+  // Note that setting finished = true is essential otherwise
+  // we would enter an infinite loop.
+  if (stream->error) { finished = true; }
+  // Note that stream->error() is guarded against error conditions
+  // (it will immediately return if stream->error casts to false).
+  // In effect, this next function does nothing when (stream->error)
+  // is true (hence the risk of an infinite loop).
+  stream->next();
+  // If that was the last document, we're finished.
+  // It is the only type of error we do not want to appear
+  // in operator*.
+  if (stream->error == EMPTY) { finished = true; }
+  // If we had any other kind of error (not EMPTY) then we want
+  // to pass it along to the operator* and we cannot mark the result
+  // as "finished" just yet.
+  return *this;
+}
+
+simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept {
+  return finished != other.finished;
+}
+
+simdjson_inline document_stream::iterator document_stream::begin() noexcept {
+  start();
+  // If there are no documents, we're finished.
+  return iterator(this, error == EMPTY);
+}
+
+simdjson_inline document_stream::iterator document_stream::end() noexcept {
+  return iterator(this, true);
+}
+
+inline void document_stream::start() noexcept {
+  if (error) { return; }
+  error = parser->allocate(batch_size);
+  if (error) { return; }
+  // Always run the first stage 1 parse immediately
+  batch_start = 0;
+  error = run_stage1(*parser, batch_start);
+  while(error == EMPTY) {
+    // In exceptional cases, we may start with an empty block
+    batch_start = next_batch_start();
+    if (batch_start >= len) { return; }
+    error = run_stage1(*parser, batch_start);
+  }
+  if (error) { return; }
+  doc_index = batch_start;
+  doc = document(json_iterator(&buf[batch_start], parser));
+  doc.iter._streaming = true;
+
+  #ifdef SIMDJSON_THREADS_ENABLED
+  if (use_thread && next_batch_start() < len) {
+    // Kick off the first thread on next batch if needed
+    error = stage1_thread_parser.allocate(batch_size);
+    if (error) { return; }
+    worker->start_thread();
+    start_stage1_thread();
+    if (error) { return; }
+  }
+  #endif // SIMDJSON_THREADS_ENABLED
+}
+
+inline void document_stream::next() noexcept {
+  // We always enter at once once in an error condition.
+  if (error) { return; }
+  next_document();
+  if (error) { return; }
+  auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get();
+  doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index];
+
+  // Check if at end of structural indexes (i.e. at end of batch)
+  if(cur_struct_index >= static_cast<int64_t>(parser->implementation->n_structural_indexes)) {
+    error = EMPTY;
+    // Load another batch (if available)
+    while (error == EMPTY) {
+      batch_start = next_batch_start();
+      if (batch_start >= len) { break; }
+      #ifdef SIMDJSON_THREADS_ENABLED
+      if(use_thread) {
+        load_from_stage1_thread();
+      } else {
+        error = run_stage1(*parser, batch_start);
+      }
+      #else
+      error = run_stage1(*parser, batch_start);
+      #endif
+      /**
+       * Whenever we move to another window, we need to update all pointers to make
+       * it appear as if the input buffer started at the beginning of the window.
+       *
+       * Take this input:
+       *
+       * {"z":5}  {"1":1,"2":2,"4":4} [7,  10,   9]  [15,  11,   12, 13]  [154,  110,   112, 1311]
+       *
+       * Say you process the following window...
+       *
+       * '{"z":5}  {"1":1,"2":2,"4":4} [7,  10,   9]'
+       *
+       * When you do so, the json_iterator has a pointer at the beginning of the memory region
+       * (pointing at the beginning of '{"z"...'.
+       *
+       * When you move to the window that starts at...
+       *
+       * '[7,  10,   9]  [15,  11,   12, 13] ...
+       *
+       * then it is not sufficient to just run stage 1. You also need to re-anchor the
+       * json_iterator so that it believes we are starting at '[7,  10,   9]...'.
+       *
+       * Under the DOM front-end, this gets done automatically because the parser owns
+       * the pointer the data, and when you call stage1 and then stage2 on the same
+       * parser, then stage2 will run on the pointer acquired by stage1.
+       *
+       * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that
+       * we used. But json_iterator has no callback when stage1 is called on the parser.
+       * In fact, I think that the parser is unaware of json_iterator.
+       *
+       *
+       * So we need to re-anchor the json_iterator after each call to stage 1 so that
+       * all of the pointers are in sync.
+       */
+      doc.iter = json_iterator(&buf[batch_start], parser);
+      doc.iter._streaming = true;
+      /**
+       * End of resync.
+       */
+
+      if (error) { continue; } // If the error was EMPTY, we may want to load another batch.
+      doc_index = batch_start;
+    }
+  }
+}
+
+inline void document_stream::next_document() noexcept {
+  // Go to next place where depth=0 (document depth)
+  error = doc.iter.skip_child(0);
+  if (error) { return; }
+  // Always set depth=1 at the start of document
+  doc.iter._depth = 1;
+  // Resets the string buffer at the beginning, thus invalidating the strings.
+  doc.iter._string_buf_loc = parser->string_buf.get();
+  doc.iter._root = doc.iter.position();
+}
+
+inline size_t document_stream::next_batch_start() const noexcept {
+  return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes];
+}
+
+inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept {
+  // This code only updates the structural index in the parser, it does not update any json_iterator
+  // instance.
+  size_t remaining = len - _batch_start;
+  if (remaining <= batch_size) {
+    return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final);
+  } else {
+    return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial);
+  }
+}
+
+simdjson_inline size_t document_stream::iterator::current_index() const noexcept {
+  return stream->doc_index;
+}
+
+simdjson_inline std::string_view document_stream::iterator::source() const noexcept {
+  auto depth = stream->doc.iter.depth();
+  auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get();
+
+  // If at root, process the first token to determine if scalar value
+  if (stream->doc.iter.at_root()) {
+    switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) {
+      case '{': case '[':   // Depth=1 already at start of document
+        break;
+      case '}': case ']':
+        depth--;
+        break;
+      default:    // Scalar value document
+        // TODO: Remove any trailing whitespaces
+        // This returns a string spanning from start of value to the beginning of the next document (excluded)
+        return std::string_view(reinterpret_cast<const char*>(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[++cur_struct_index] - current_index() - 1);
+    }
+    cur_struct_index++;
+  }
+
+  while (cur_struct_index <= static_cast<int64_t>(stream->parser->implementation->n_structural_indexes)) {
+    switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) {
+      case '{': case '[':
+        depth++;
+        break;
+      case '}': case ']':
+        depth--;
+        break;
+    }
+    if (depth == 0) { break; }
+    cur_struct_index++;
+  }
+
+  return std::string_view(reinterpret_cast<const char*>(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);;
+}
+
+inline error_code document_stream::iterator::error() const noexcept {
+  return stream->error;
+}
+
+#ifdef SIMDJSON_THREADS_ENABLED
+
+inline void document_stream::load_from_stage1_thread() noexcept {
+  worker->finish();
+  // Swap to the parser that was loaded up in the thread. Make sure the parser has
+  // enough memory to swap to, as well.
+  std::swap(stage1_thread_parser,*parser);
+  error = stage1_thread_error;
+  if (error) { return; }
+
+  // If there's anything left, start the stage 1 thread!
+  if (next_batch_start() < len) {
+    start_stage1_thread();
+  }
+}
+
+inline void document_stream::start_stage1_thread() noexcept {
+  // we call the thread on a lambda that will update
+  // this->stage1_thread_error
+  // there is only one thread that may write to this value
+  // TODO this is NOT exception-safe.
+  this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error
+  size_t _next_batch_start = this->next_batch_start();
+
+  worker->run(this, & this->stage1_thread_parser, _next_batch_start);
+}
+
+#endif // SIMDJSON_THREADS_ENABLED
+
+} // namespace ondemand
+} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION
+} // namespace simdjson
+
+namespace simdjson {
+
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_stream>::simdjson_result(
+  error_code error
+) noexcept :
+    implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_stream>(error)
+{
+}
+simdjson_inline simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_stream>::simdjson_result(
+  SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_stream &&value
+) noexcept :
+    implementation_simdjson_result_base<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_stream>(
+      std::forward<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_stream>(value)
+    )
+{
+}
+
+}
+/* end file include/simdjson/generic/ondemand/document_stream-inl.h */
+/* begin file include/simdjson/generic/ondemand/serialization-inl.h */
+
+
+namespace simdjson {
+
+inline std::string_view trim(const std::string_view str) noexcept {
+  // We can almost surely do better by rolling our own find_first_not_of function.
+  size_t first = str.find_first_not_of(" \t\n\r");
+  // If we have the empty string (just white space), then no trimming is possible, and
+  // we return the empty string_view.
+  if (std::string_view::npos == first) { return std::string_view(); }
+  size_t last = str.find_last_not_of(" \t\n\r");
+  return str.substr(first, (last - first + 1));
+}
+
+
+inline simdjson_result<std::string_view> to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document& x) noexcept {
+  std::string_view v;
+  auto error = x.raw_json().get(v);
+  if(error) {return error; }
+  return trim(v);
+}
+
+inline simdjson_result<std::string_view> to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference& x) noexcept {
+  std::string_view v;
+  auto error = x.raw_json().get(v);
+  if(error) {return error; }
+  return trim(v);
+}
+
+inline simdjson_result<std::string_view> to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value& x) noexcept {
+  /**
+   * If we somehow receive a value that has already been consumed,
+   * then the following code could be in trouble. E.g., we create
+   * an array as needed, but if an array was already created, then
+   * it could be bad.
+   */
+  using namespace SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand;
+  SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type t;
+  auto error = x.type().get(t);
+  if(error != SUCCESS) { return error; }
+  switch (t)
+  {
+    case json_type::array:
+    {
+      SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array array;
+      error = x.get_array().get(array);
+      if(error) { return error; }
+      return to_json_string(array);
+    }
+    case json_type::object:
+    {
+      SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object object;
+      error = x.get_object().get(object);
+      if(error) { return error; }
+      return to_json_string(object);
+    }
+    default:
+      return trim(x.raw_json_token());
+  }
+}
+
+inline simdjson_result<std::string_view> to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object& x) noexcept {
+  std::string_view v;
+  auto error = x.raw_json().get(v);
+  if(error) {return error; }
+  return trim(v);
+}
+
+inline simdjson_result<std::string_view> to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array& x) noexcept {
+  std::string_view v;
+  auto error = x.raw_json().get(v);
+  if(error) {return error; }
+  return trim(v);
+}
+
+inline simdjson_result<std::string_view> to_json_string(simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document> x) {
+  if (x.error()) { return x.error(); }
+  return to_json_string(x.value_unsafe());
+}
+
+inline simdjson_result<std::string_view> to_json_string(simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference> x) {
+  if (x.error()) { return x.error(); }
+  return to_json_string(x.value_unsafe());
+}
+
+inline simdjson_result<std::string_view> to_json_string(simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> x) {
+  if (x.error()) { return x.error(); }
+  return to_json_string(x.value_unsafe());
+}
+
+inline simdjson_result<std::string_view> to_json_string(simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object> x) {
+  if (x.error()) { return x.error(); }
+  return to_json_string(x.value_unsafe());
+}
+
+inline simdjson_result<std::string_view> to_json_string(simdjson_result<SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array> x) {
+  if (x.error()) { return x.error(); }
+  return to_json_string(x.value_unsafe());
+}
+} // namespace simdjson
+
+namespace simdjson { namespace SIMDJSON_BUILTIN_IMPLEMENTATION { namespace ondemand {
+
+#if SIMDJSON_EXCEPTIONS
+inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value x) {
+  std::string_view v;
+  auto error = simdjson::to_json_string(x).get(v);
+  if(error == simdjson::SUCCESS) {
+    return (out << v);
+  } else {
+    throw simdjson::simdjson_error(error);
+  }
+}
+inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value> x) {
+  if (x.error()) { throw simdjson::simdjson_error(x.error()); }
+  return (out << x.value());
+}
+#else
+inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value x) {
+  std::string_view v;
+  auto error = simdjson::to_json_string(x).get(v);
+  if(error == simdjson::SUCCESS) {
+    return (out << v);
+  } else {
+    return (out << error);
+  }
+}
+#endif
+
+#if SIMDJSON_EXCEPTIONS
+inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array value) {
+  std::string_view v;
+  auto error = simdjson::to_json_string(value).get(v);
+  if(error == simdjson::SUCCESS) {
+    return (out << v);
+  } else {
+    throw simdjson::simdjson_error(error);
+  }
+}
+inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array> x) {
+  if (x.error()) { throw simdjson::simdjson_error(x.error()); }
+  return (out << x.value());
+}
+#else
+inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array value) {
+  std::string_view v;
+  auto error = simdjson::to_json_string(value).get(v);
+  if(error == simdjson::SUCCESS) {
+    return (out << v);
+  } else {
+    return (out << error);
+  }
+}
+#endif
+
+#if SIMDJSON_EXCEPTIONS
+inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document& value)  {
+  std::string_view v;
+  auto error = simdjson::to_json_string(value).get(v);
+  if(error == simdjson::SUCCESS) {
+    return (out << v);
+  } else {
+    throw simdjson::simdjson_error(error);
+  }
+}
+inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference& value)  {
+  std::string_view v;
+  auto error = simdjson::to_json_string(value).get(v);
+  if(error == simdjson::SUCCESS) {
+    return (out << v);
+  } else {
+    throw simdjson::simdjson_error(error);
+  }
+}
+inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document>&& x) {
+  if (x.error()) { throw simdjson::simdjson_error(x.error()); }
+  return (out << x.value());
+}
+inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference>&& x) {
+  if (x.error()) { throw simdjson::simdjson_error(x.error()); }
+  return (out << x.value());
+}
+#else
+inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document& value)  {
+  std::string_view v;
+  auto error = simdjson::to_json_string(value).get(v);
+  if(error == simdjson::SUCCESS) {
+    return (out << v);
+  } else {
+    return (out << error);
+  }
+}
+#endif
+
+#if SIMDJSON_EXCEPTIONS
+inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object value) {
+  std::string_view v;
+  auto error = simdjson::to_json_string(value).get(v);
+  if(error == simdjson::SUCCESS) {
+    return (out << v);
+  } else {
+    throw simdjson::simdjson_error(error);
+  }
+}
+inline std::ostream& operator<<(std::ostream& out,  simdjson::simdjson_result<simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object> x) {
+  if (x.error()) { throw  simdjson::simdjson_error(x.error()); }
+  return (out << x.value());
+}
+#else
+inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object value) {
+  std::string_view v;
+  auto error = simdjson::to_json_string(value).get(v);
+  if(error == simdjson::SUCCESS) {
+    return (out << v);
+  } else {
+    return (out << error);
+  }
+}
+#endif
+}}} // namespace simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand
+/* end file include/simdjson/generic/ondemand/serialization-inl.h */
+/* end file include/simdjson/generic/ondemand-inl.h */
+
+
+namespace simdjson {
+  /**
+   * Represents the best statically linked simdjson implementation that can be used by the compiling
+   * program.
+   *
+   * Detects what options the program is compiled against, and picks the minimum implementation that
+   * will work on any computer that can run the program. For example, if you compile with g++
+   * -march=westmere, it will pick the westmere implementation. The haswell implementation will
+   * still be available, and can be selected at runtime, but the builtin implementation (and any
+   * code that uses it) will use westmere.
+   */
+  namespace builtin = SIMDJSON_BUILTIN_IMPLEMENTATION;
+  /**
+   * @copydoc simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand
+   */
+  namespace ondemand = SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand;
+  /**
+   * Function which returns a pointer to an implementation matching the "builtin" implementation.
+   * The builtin implementation is the best statically linked simdjson implementation that can be used by the compiling
+   * program. If you compile with g++ -march=haswell, this will return the haswell implementation.
+   * It is handy to be able to check what builtin was used: builtin_implementation()->name().
+   */
+  const implementation * builtin_implementation();
+} // namespace simdjson
+
+#endif // SIMDJSON_BUILTIN_H
+/* end file include/simdjson/builtin.h */
+
+#endif // SIMDJSON_H
+/* end file include/simdjson.h */
diff --git a/kram-profile/Source/kram-profile-Bridging-Header.h b/kram-profile/Source/kram-profile-Bridging-Header.h
index f5f1741e..81a9554e 100644
--- a/kram-profile/Source/kram-profile-Bridging-Header.h
+++ b/kram-profile/Source/kram-profile-Bridging-Header.h
@@ -3,3 +3,4 @@
 //
 
 #include "KramZipHelperW.h"
+#include "CBA.h"
diff --git a/kram-profile/kram-profile.xcodeproj/project.pbxproj b/kram-profile/kram-profile.xcodeproj/project.pbxproj
index 18fc85ba..362b8ab1 100644
--- a/kram-profile/kram-profile.xcodeproj/project.pbxproj
+++ b/kram-profile/kram-profile.xcodeproj/project.pbxproj
@@ -22,6 +22,12 @@
 		705F69072BA2ED1300437FAA /* track_event_parser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 705F69042BA2ED1200437FAA /* track_event_parser.cpp */; };
 		705F690A2BA3801D00437FAA /* KramZipHelperW.mm in Sources */ = {isa = PBXBuildFile; fileRef = 705F69092BA3801D00437FAA /* KramZipHelperW.mm */; };
 		705F690C2BA3CDEC00437FAA /* File.swift in Sources */ = {isa = PBXBuildFile; fileRef = 705F690B2BA3CDEC00437FAA /* File.swift */; };
+		7061C76F2BAFB715003EC937 /* Utils.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7061C7672BAFB715003EC937 /* Utils.cpp */; };
+		7061C7702BAFB715003EC937 /* Analysis.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7061C7682BAFB715003EC937 /* Analysis.cpp */; };
+		7061C7712BAFB715003EC937 /* Arena.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7061C7692BAFB715003EC937 /* Arena.cpp */; };
+		7061C7722BAFB715003EC937 /* CBA.mm in Sources */ = {isa = PBXBuildFile; fileRef = 7061C76B2BAFB715003EC937 /* CBA.mm */; };
+		7061C7732BAFB715003EC937 /* BuildEvents.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7061C76C2BAFB715003EC937 /* BuildEvents.cpp */; };
+		7061C7762BAFC323003EC937 /* simdjson.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7061C7742BAFC323003EC937 /* simdjson.cpp */; };
 /* End PBXBuildFile section */
 
 /* Begin PBXFileReference section */
@@ -47,6 +53,18 @@
 		705F69082BA3801D00437FAA /* KramZipHelperW.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = KramZipHelperW.h; sourceTree = "<group>"; };
 		705F69092BA3801D00437FAA /* KramZipHelperW.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = KramZipHelperW.mm; sourceTree = "<group>"; };
 		705F690B2BA3CDEC00437FAA /* File.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = File.swift; sourceTree = "<group>"; };
+		7061C7662BAFB715003EC937 /* Utils.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Utils.h; sourceTree = "<group>"; };
+		7061C7672BAFB715003EC937 /* Utils.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Utils.cpp; sourceTree = "<group>"; };
+		7061C7682BAFB715003EC937 /* Analysis.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Analysis.cpp; sourceTree = "<group>"; };
+		7061C7692BAFB715003EC937 /* Arena.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Arena.cpp; sourceTree = "<group>"; };
+		7061C76A2BAFB715003EC937 /* BuildEvents.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = BuildEvents.h; sourceTree = "<group>"; };
+		7061C76B2BAFB715003EC937 /* CBA.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = CBA.mm; sourceTree = "<group>"; };
+		7061C76C2BAFB715003EC937 /* BuildEvents.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = BuildEvents.cpp; sourceTree = "<group>"; };
+		7061C76D2BAFB715003EC937 /* Arena.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Arena.h; sourceTree = "<group>"; };
+		7061C76E2BAFB715003EC937 /* Analysis.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Analysis.h; sourceTree = "<group>"; };
+		7061C7742BAFC323003EC937 /* simdjson.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = simdjson.cpp; sourceTree = "<group>"; };
+		7061C7752BAFC323003EC937 /* simdjson.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = simdjson.h; sourceTree = "<group>"; };
+		7061C7772BAFD82E003EC937 /* CBA.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = CBA.h; sourceTree = "<group>"; };
 /* End PBXFileReference section */
 
 /* Begin PBXFrameworksBuildPhase section */
@@ -63,6 +81,7 @@
 		705F68C02B820AD100437FAA = {
 			isa = PBXGroup;
 			children = (
+				7061C7652BAFB715003EC937 /* CBA */,
 				705F68FD2BA2ED1200437FAA /* Source */,
 				705F68CB2B820AD100437FAA /* kram-profile */,
 				705F68CA2B820AD100437FAA /* Products */,
@@ -120,6 +139,25 @@
 			path = Source;
 			sourceTree = "<group>";
 		};
+		7061C7652BAFB715003EC937 /* CBA */ = {
+			isa = PBXGroup;
+			children = (
+				7061C7662BAFB715003EC937 /* Utils.h */,
+				7061C7672BAFB715003EC937 /* Utils.cpp */,
+				7061C76D2BAFB715003EC937 /* Arena.h */,
+				7061C7692BAFB715003EC937 /* Arena.cpp */,
+				7061C76E2BAFB715003EC937 /* Analysis.h */,
+				7061C7682BAFB715003EC937 /* Analysis.cpp */,
+				7061C76A2BAFB715003EC937 /* BuildEvents.h */,
+				7061C76C2BAFB715003EC937 /* BuildEvents.cpp */,
+				7061C7772BAFD82E003EC937 /* CBA.h */,
+				7061C76B2BAFB715003EC937 /* CBA.mm */,
+				7061C7742BAFC323003EC937 /* simdjson.cpp */,
+				7061C7752BAFC323003EC937 /* simdjson.h */,
+			);
+			path = CBA;
+			sourceTree = "<group>";
+		};
 /* End PBXGroup section */
 
 /* Begin PBXNativeTarget section */
@@ -194,16 +232,22 @@
 			files = (
 				705F68E12B87EB8000437FAA /* AnyDecodable.swift in Sources */,
 				705F690C2BA3CDEC00437FAA /* File.swift in Sources */,
+				7061C7712BAFB715003EC937 /* Arena.cpp in Sources */,
 				705F68E32B87EB8000437FAA /* AnyEncodable.swift in Sources */,
 				705F68E92B9451CC00437FAA /* Log.swift in Sources */,
 				705F68EB2B94E33800437FAA /* Keycode.swift in Sources */,
 				705F69062BA2ED1300437FAA /* miniz.cpp in Sources */,
 				705F69052BA2ED1300437FAA /* KramZipHelper.cpp in Sources */,
 				705F68E22B87EB8000437FAA /* AnyCodable.swift in Sources */,
+				7061C7732BAFB715003EC937 /* BuildEvents.cpp in Sources */,
+				7061C7762BAFC323003EC937 /* simdjson.cpp in Sources */,
 				705F69072BA2ED1300437FAA /* track_event_parser.cpp in Sources */,
+				7061C76F2BAFB715003EC937 /* Utils.cpp in Sources */,
 				705F68CD2B820AD100437FAA /* kram_profileApp.swift in Sources */,
 				705F68E72B8BEB7100437FAA /* DataCompression.swift in Sources */,
 				705F690A2BA3801D00437FAA /* KramZipHelperW.mm in Sources */,
+				7061C7722BAFB715003EC937 /* CBA.mm in Sources */,
+				7061C7702BAFB715003EC937 /* Analysis.cpp in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 815f1680..8f652682 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -2378,6 +2378,29 @@ A tool to help profile mem, perf, and builds.
                 }
                 .disabled(selection == nil)
                 
+                Button("Build Report CBA") {
+                    let buildFiles = findFilesForBuildTimings(files: fileSearcher.files, selection: selection!)
+                    
+                    if buildFiles.isEmpty { return }
+                    
+                    var fileDatas: [Data] = []
+                    var filenames: [String] = []
+                    
+                    for file in buildFiles {
+                        if file.fileContent == nil  {
+                            continue
+                        }
+                        
+                        fileDatas.append(file.fileContent!)
+                        filenames.append(file.url.absoluteString)
+                    }
+                    // Extract the fileContent and names.  This avoids CBA needing to do IO.
+                    // But CBA is reparsing all of the json in C++ to build up it's tables.
+                    // Also demangling names, but I have that disabled.
+                    CBA.run(fileDatas, filenames: filenames)
+                }
+                .disabled(selection == nil)
+                
                 
                 // must call through NSWindow
                 Button("See Below") {

From 71aec2c3e46f42b33a945782e021bb882728f205 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 23 Mar 2024 23:41:54 -0700
Subject: [PATCH 666/901] kram-profile - return the cba report to Swift

---
 kram-profile/CBA/Analysis.cpp                 | 80 +++++++++++--------
 kram-profile/CBA/Analysis.h                   |  5 +-
 kram-profile/CBA/BuildEvents.cpp              | 10 ++-
 kram-profile/CBA/BuildEvents.h                | 14 ++--
 kram-profile/CBA/CBA.h                        |  2 +-
 kram-profile/CBA/CBA.mm                       | 27 +++----
 kram-profile/CBA/Utils.cpp                    |  3 +
 kram-profile/CBA/Utils.h                      |  2 +
 kram-profile/Source/KramZipHelper.cpp         | 51 +++++++++++-
 .../kram-profile/kram_profileApp.swift        | 10 ++-
 10 files changed, 138 insertions(+), 66 deletions(-)

diff --git a/kram-profile/CBA/Analysis.cpp b/kram-profile/CBA/Analysis.cpp
index 216eed1f..6df8e68b 100755
--- a/kram-profile/CBA/Analysis.cpp
+++ b/kram-profile/CBA/Analysis.cpp
@@ -1,10 +1,14 @@
 // Clang Build Analyzer https://github.com/aras-p/ClangBuildAnalyzer
 // SPDX-License-Identifier: Unlicense
 
-#ifdef _MSC_VER
-struct IUnknown; // workaround for old Win SDK header failures when using /permissive-
-#endif
-#define NOMINMAX
+//#ifdef _MSC_VER
+//struct IUnknown; // workaround for old Win SDK header failures when using /permissive-
+//#endif
+
+// This is for windows.h
+//#ifndef NOMINMAX
+//#define NOMINMAX
+//#endif
 
 #include "Analysis.h"
 #include "Arena.h"
@@ -20,6 +24,12 @@ struct IUnknown; // workaround for old Win SDK header failures when using /permi
 #include <map>
 #include <vector>
 
+// from kram
+// returns length of chars appended, -1 if failure
+#define NAMESPACE_STL std
+using namespace NAMESPACE_STL;
+int32_t append_sprintf(string& str, const char* format, ...) __printflike(2, 3);
+
 namespace col
 {
     const char* kBold = "";
@@ -75,7 +85,7 @@ struct pair_hash
 
 struct Analysis
 {
-    Analysis(const BuildEvents& events_, BuildNames& buildNames_, FILE* out_)
+    Analysis(const BuildEvents& events_, BuildNames& buildNames_, std::string& out_)
     : events(events_)
     , buildNames(buildNames_)
     , out(out_)
@@ -90,7 +100,7 @@ struct Analysis
     const BuildEvents& events;
     BuildNames& buildNames;
 
-    FILE* out;
+    std::string& out;
 
     std::string_view GetBuildName(DetailIndex index)
     {
@@ -322,7 +332,7 @@ void Analysis::EmitCollapsedInfo(
         sorted_collapsed.begin(), sorted_collapsed.end(),
         cmp);
 
-    fprintf(out, "%s%s**** %s%s:\n", col::kBold, col::kMagenta, header_string, col::kReset);
+    append_sprintf(out, "%s%s**** %s%s:\n", col::kBold, col::kMagenta, header_string, col::kReset);
     for (const auto &elt : sorted_collapsed)
     {
         std::string dname = elt.first;
@@ -330,9 +340,9 @@ void Analysis::EmitCollapsedInfo(
             dname = dname.substr(0, config.maxName - 2) + "...";
         int ms = int(elt.second.us / 1000);
         int avg = int(ms / elt.second.count);
-        fprintf(out, "%s%6i%s ms: %s (%i times, avg %i ms)\n", col::kBold, ms, col::kReset, dname.c_str(), elt.second.count, avg);
+        append_sprintf(out, "%s%6i%s ms: %s (%i times, avg %i ms)\n", col::kBold, ms, col::kReset, dname.c_str(), elt.second.count, avg);
     }
-    fprintf(out, "\n");
+    append_sprintf(out, "\n");
 }
 void Analysis::EmitCollapsedTemplates()
 {
@@ -389,11 +399,11 @@ void Analysis::EndAnalysis()
 {
     if (totalParseUs || totalCodegenUs)
     {
-        fprintf(out, "%s%s**** Time summary%s:\n", col::kBold, col::kMagenta, col::kReset);
-        fprintf(out, "Compilation (%i times):\n", totalParseCount);
-        fprintf(out, "  Parsing (frontend):        %s%7.1f%s s\n", col::kBold, static_cast<double>(totalParseUs) / 1000000.0, col::kReset);
-        fprintf(out, "  Codegen & opts (backend):  %s%7.1f%s s\n", col::kBold, static_cast<double>(totalCodegenUs) / 1000000.0, col::kReset);
-        fprintf(out, "\n");
+        append_sprintf(out, "%s%s**** Time summary%s:\n", col::kBold, col::kMagenta, col::kReset);
+        append_sprintf(out, "Compilation (%i times):\n", totalParseCount);
+        append_sprintf(out, "  Parsing (frontend):        %s%7.1f%s s\n", col::kBold, static_cast<double>(totalParseUs) / 1000000.0, col::kReset);
+        append_sprintf(out, "  Codegen & opts (backend):  %s%7.1f%s s\n", col::kBold, static_cast<double>(totalCodegenUs) / 1000000.0, col::kReset);
+        append_sprintf(out, "\n");
     }
 
     if (!parseFiles.empty())
@@ -409,13 +419,13 @@ void Analysis::EndAnalysis()
                 return a.us > b.us;
             return GetBuildName(a.file) < GetBuildName(b.file);
             });
-        fprintf(out, "%s%s**** Files that took longest to parse (compiler frontend)%s:\n", col::kBold, col::kMagenta, col::kReset);
+        append_sprintf(out, "%s%s**** Files that took longest to parse (compiler frontend)%s:\n", col::kBold, col::kMagenta, col::kReset);
         for (size_t i = 0, n = std::min<size_t>(config.fileParseCount, indices.size()); i != n; ++i)
         {
             const auto& e = parseFiles[indices[i]];
-            fprintf(out, "%s%6i%s ms: %s\n", col::kBold, int(e.us/1000), col::kReset, GetBuildName(e.file).data());
+            append_sprintf(out, "%s%6i%s ms: %s\n", col::kBold, int(e.us/1000), col::kReset, GetBuildName(e.file).data());
         }
-        fprintf(out, "\n");
+        append_sprintf(out, "\n");
     }
     if (!codegenFiles.empty())
     {
@@ -430,13 +440,13 @@ void Analysis::EndAnalysis()
                 return a.us > b.us;
             return GetBuildName(a.file) < GetBuildName(b.file);
             });
-        fprintf(out, "%s%s**** Files that took longest to codegen (compiler backend)%s:\n", col::kBold, col::kMagenta, col::kReset);
+        append_sprintf(out, "%s%s**** Files that took longest to codegen (compiler backend)%s:\n", col::kBold, col::kMagenta, col::kReset);
         for (size_t i = 0, n = std::min<size_t>(config.fileCodegenCount, indices.size()); i != n; ++i)
         {
             const auto& e = codegenFiles[indices[i]];
-            fprintf(out, "%s%6i%s ms: %s\n", col::kBold, int(e.us/1000), col::kReset, GetBuildName(e.file).data());
+            append_sprintf(out, "%s%6i%s ms: %s\n", col::kBold, int(e.us/1000), col::kReset, GetBuildName(e.file).data());
         }
-        fprintf(out, "\n");
+        append_sprintf(out, "\n");
     }
 
     if (!instantiations.empty())
@@ -457,7 +467,7 @@ void Analysis::EndAnalysis()
                 std::tie(b.second.us, b.second.count, b.first);
         };
         std::partial_sort(instArray.begin(), instArray.begin()+n, instArray.end(), cmp);
-        fprintf(out, "%s%s**** Templates that took longest to instantiate%s:\n", col::kBold, col::kMagenta, col::kReset);
+        append_sprintf(out, "%s%s**** Templates that took longest to instantiate%s:\n", col::kBold, col::kMagenta, col::kReset);
         for (size_t i = 0; i != n; ++i)
         {
             const auto& e = instArray[i];
@@ -466,9 +476,9 @@ void Analysis::EndAnalysis()
                 dname = dname.substr(0, config.maxName-2) + "...";
             int ms = int(e.second.us / 1000);
             int avg = int(ms / std::max(e.second.count,1));
-            fprintf(out, "%s%6i%s ms: %s (%i times, avg %i ms)\n", col::kBold, ms, col::kReset, dname.c_str(), e.second.count, avg);
+            append_sprintf(out, "%s%6i%s ms: %s (%i times, avg %i ms)\n", col::kBold, ms, col::kReset, dname.c_str(), e.second.count, avg);
         }
-        fprintf(out, "\n");
+        append_sprintf(out, "\n");
 
         EmitCollapsedTemplates();
     }
@@ -492,7 +502,7 @@ void Analysis::EndAnalysis()
                 return a.second > b.second;
             return GetBuildName(a.first.first) < GetBuildName(b.first.first);
             });
-        fprintf(out, "%s%s**** Functions that took longest to compile%s:\n", col::kBold, col::kMagenta, col::kReset);
+        append_sprintf(out, "%s%s**** Functions that took longest to compile%s:\n", col::kBold, col::kMagenta, col::kReset);
         for (size_t i = 0, n = std::min<size_t>(config.functionCount, indices.size()); i != n; ++i)
         {
             const auto& e = functionsArray[indices[i]];
@@ -500,9 +510,9 @@ void Analysis::EndAnalysis()
             if (static_cast<int>(dname.size()) > config.maxName)
                 dname = dname.substr(0, config.maxName-2) + "...";
             int ms = int(e.second / 1000);
-            fprintf(out, "%s%6i%s ms: %s (%s)\n", col::kBold, ms, col::kReset, dname.c_str(), GetBuildName(e.first.second).data());
+            append_sprintf(out, "%s%6i%s ms: %s (%s)\n", col::kBold, ms, col::kReset, dname.c_str(), GetBuildName(e.first.second).data());
         }
-        fprintf(out, "\n");
+        append_sprintf(out, "\n");
         EmitCollapsedTemplateOpt();
     }
 
@@ -510,13 +520,13 @@ void Analysis::EndAnalysis()
 
     if (!expensiveHeaders.empty())
     {
-        fprintf(out, "%s%s**** Expensive headers%s:\n", col::kBold, col::kMagenta, col::kReset);
+        append_sprintf(out, "%s%s**** Expensive headers%s:\n", col::kBold, col::kMagenta, col::kReset);
         for (const auto& e : expensiveHeaders)
         {
             const auto& es = headerMap[e.first];
             int ms = int(e.second / 1000);
             int avg = ms / es.count;
-            fprintf(out, "%s%i%s ms: %s%s%s (included %i times, avg %i ms), included via:\n", col::kBold, ms, col::kReset, col::kBold, e.first.data(), col::kReset, es.count, avg);
+            append_sprintf(out, "%s%i%s ms: %s%s%s (included %i times, avg %i ms), included via:\n", col::kBold, ms, col::kReset, col::kBold, e.first.data(), col::kReset, es.count, avg);
             int pathCount = 0;
 
             // print most costly include chains
@@ -548,24 +558,24 @@ void Analysis::EndAnalysis()
             });
             for (const auto& chain : sortedIncludeChains)
             {
-                fprintf(out, "  %ix: ", chain.count);
+                append_sprintf(out, "  %ix: ", chain.count);
                 for (auto it = chain.files.rbegin(), itEnd = chain.files.rend(); it != itEnd; ++it)
                 {
-                    fprintf(out, "%s ", utils::GetFilename(GetBuildName(*it)).data());
+                    append_sprintf(out, "%s ", utils::GetFilename(GetBuildName(*it)).data());
                 }
                 if (chain.files.empty())
-                    fprintf(out, "<direct include>");
-                fprintf(out, "\n");
+                    append_sprintf(out, "<direct include>");
+                append_sprintf(out, "\n");
                 ++pathCount;
                 if (pathCount > config.headerChainCount)
                     break;
             }
             if (pathCount > config.headerChainCount)
             {
-                fprintf(out, "  ...\n");
+                append_sprintf(out, "  ...\n");
             }
 
-            fprintf(out, "\n");
+            append_sprintf(out, "\n");
         }
     }
 }
@@ -608,7 +618,7 @@ void Analysis::ReadConfig()
 }
 
 
-void DoAnalysis(const BuildEvents& events, BuildNames& names, FILE* out)
+void DoAnalysis(const BuildEvents& events, BuildNames& names, std::string& out)
 {
     Analysis a(events, names, out);
     a.ReadConfig();
diff --git a/kram-profile/CBA/Analysis.h b/kram-profile/CBA/Analysis.h
index b570d2d3..83f93f35 100755
--- a/kram-profile/CBA/Analysis.h
+++ b/kram-profile/CBA/Analysis.h
@@ -2,7 +2,8 @@
 // SPDX-License-Identifier: Unlicense
 #pragma once
 
+#include <string>
+
 #include "BuildEvents.h"
-#include <stdio.h>
 
-void DoAnalysis(const BuildEvents& events, BuildNames& names, FILE* out);
+void DoAnalysis(const BuildEvents& events, BuildNames& names, std::string& out);
diff --git a/kram-profile/CBA/BuildEvents.cpp b/kram-profile/CBA/BuildEvents.cpp
index 3de11682..225edb11 100755
--- a/kram-profile/CBA/BuildEvents.cpp
+++ b/kram-profile/CBA/BuildEvents.cpp
@@ -2,9 +2,10 @@
 // SPDX-License-Identifier: Unlicense
 #include "BuildEvents.h"
 
-#ifndef NOMINMAX
-#define NOMINMAX
-#endif
+// This is for windows.h
+//#ifndef NOMINMAX
+//#define NOMINMAX
+//#endif
 
 #include "Arena.h"
 //#include "Colors.h"
@@ -390,7 +391,7 @@ struct BuildEventsParser
             else
                 detailString = detailPtr;
 
-            /* don't *do this
+            /* don't do this
             // don't report the clang trace .json file, instead get the object file at the same location if it's there
             if (utils::EndsWith(detailString, ".json"))
             {
@@ -414,6 +415,7 @@ struct BuildEventsParser
             */
             
             // Use the built in call
+            // clang needs to fix this, since Win clang symbols don't demangle using macOS demangle
             if (event.type == BuildEventType::kOptFunction)
             {
                 const char* demangledName = demangleSymbolName(detailString.c_str());
diff --git a/kram-profile/CBA/BuildEvents.h b/kram-profile/CBA/BuildEvents.h
index 564fa529..58af8e9e 100755
--- a/kram-profile/CBA/BuildEvents.h
+++ b/kram-profile/CBA/BuildEvents.h
@@ -9,11 +9,11 @@
 #include <utility>
 
 
-#ifdef _MSC_VER
-#define ftello64 _ftelli64
-#elif defined(__APPLE__)
-#define ftello64 ftello
-#endif
+//#ifdef _MSC_VER
+//#define ftello64 _ftelli64
+//#elif defined(__APPLE__)
+//#define ftello64 ftello
+//#endif
 
 
 enum class BuildEventType
@@ -35,6 +35,8 @@ struct DetailIndex
 {
     int idx;
     explicit DetailIndex(int d = 0) : idx(d) {}
+    
+    // TODO: C++20 can autogen most of these from like <=> operator
     bool operator==(DetailIndex rhs) const { return idx == rhs.idx; }
     bool operator!=(DetailIndex rhs) const { return idx != rhs.idx; }
     bool operator<(DetailIndex rhs) const { return idx < rhs.idx; }
@@ -47,6 +49,8 @@ struct EventIndex
 {
     int idx;
     explicit EventIndex(int e = -1) : idx(e) {}
+    
+    // TODO: C++20 can autogen most of these from like <=> operator
     bool operator==(EventIndex rhs) const { return idx == rhs.idx; }
     bool operator!=(EventIndex rhs) const { return idx != rhs.idx; }
     bool operator<(EventIndex rhs) const { return idx < rhs.idx; }
diff --git a/kram-profile/CBA/CBA.h b/kram-profile/CBA/CBA.h
index b00042ff..753bdd70 100644
--- a/kram-profile/CBA/CBA.h
+++ b/kram-profile/CBA/CBA.h
@@ -2,5 +2,5 @@
 
 // TODO: move to header
 @interface CBA : NSObject
-+ (void)RunCBA:(NSArray<NSData*> *)files filenames:(NSArray<NSString*> *)filenames;
++ (NSString* _Nonnull)RunCBA:(NSArray<NSData*> * _Nonnull)files filenames:(NSArray<NSString*> * _Nonnull)filenames;
 @end
diff --git a/kram-profile/CBA/CBA.mm b/kram-profile/CBA/CBA.mm
index c34e6e69..6a22c061 100644
--- a/kram-profile/CBA/CBA.mm
+++ b/kram-profile/CBA/CBA.mm
@@ -3,8 +3,9 @@
 
 #import "CBA.h"
 
-#define _CRT_SECURE_NO_WARNINGS
-#define NOMINMAX
+// This is for windows.h
+//#define _CRT_SECURE_NO_WARNINGS
+//#define NOMINMAX
 
 #include "Analysis.h"
 #include "Arena.h"
@@ -19,41 +20,35 @@
 #include <set>
 #include <cassert>
 
-#ifdef _MSC_VER
-struct IUnknown; // workaround for old Win SDK header failures when using /permissive-
-#endif
+//#ifdef _MSC_VER
+//struct IUnknown; // workaround for old Win SDK header failures when using /permissive-
+//#endif
 
     
 @implementation CBA {
 }
 
-// Logs out to stdout right now.  But will change.
-+ (void)RunCBA:(NSArray<NSData*> *)files filenames:(NSArray<NSString*> *)filenames
++ (NSString* _Nonnull)RunCBA:(NSArray<NSData*> * _Nonnull)files filenames:(NSArray<NSString*> * _Nonnull)filenames
 {
     ArenaInitialize();
     
     BuildEventsParser* parser = CreateBuildEventsParser();
    
-    std::vector<uint8_t> buffer;
-    
     for (uint32_t i = 0; i < files.count; ++i)
     {
-        // extract the file from disk or archive,
-        //   run it through simdjson
-        // and then feed the results to the parser
-        //if (!zip.extract(entry.filename, buffer))
-        //    continue;
         NSData* data = files[i];
-        
         ParseBuildEvents(parser, (const uint8_t*)data.bytes, data.length, [filenames[i] UTF8String]);
     }
 
     // Run the analysis on data from the parser.
-    DoAnalysis(GetBuildEvents(*parser), GetBuildNames(*parser), stdout);
+    std::string out;
+    DoAnalysis(GetBuildEvents(*parser), GetBuildNames(*parser), out);
     
     // Shutdown the parser
     DeleteBuildEventsParser(parser);
     ArenaDelete();
+    
+    return [NSString stringWithUTF8String:out.c_str()];
 }
 
 @end
diff --git a/kram-profile/CBA/Utils.cpp b/kram-profile/CBA/Utils.cpp
index 6b38da9d..31e37a9b 100755
--- a/kram-profile/CBA/Utils.cpp
+++ b/kram-profile/CBA/Utils.cpp
@@ -6,6 +6,8 @@
 #include <string.h>
 
 inline char ToLower(char c) { return (c >= 'A' && c <= 'Z') ? (c + 'a' - 'A') : c; }
+
+/*
 inline char ToUpper(char c) { return (c >= 'a' && c <= 'z') ? (c - ('a' - 'A')) : c; }
 
 void utils::Lowercase(std::string& path)
@@ -43,6 +45,7 @@ bool utils::EndsWith(const std::string_view& str, const std::string& suffix)
     }
     return true;
 }
+*/
 
 
 bool utils::IsHeader(std::string_view path)
diff --git a/kram-profile/CBA/Utils.h b/kram-profile/CBA/Utils.h
index fdd2b6e0..55395e5f 100755
--- a/kram-profile/CBA/Utils.h
+++ b/kram-profile/CBA/Utils.h
@@ -11,8 +11,10 @@ namespace utils
 
     [[nodiscard]] bool IsHeader(std::string_view path);
 
+    /*
     void Lowercase(std::string& path);
 
     [[nodiscard]] bool BeginsWith(const std::string& str, const std::string& prefix);
     [[nodiscard]] bool EndsWith(const std::string_view& str, const std::string& suffix);
+    */
 }
diff --git a/kram-profile/Source/KramZipHelper.cpp b/kram-profile/Source/KramZipHelper.cpp
index f653ef29..4c6a54b2 100644
--- a/kram-profile/Source/KramZipHelper.cpp
+++ b/kram-profile/Source/KramZipHelper.cpp
@@ -25,9 +25,56 @@
 #include <mutex>
 #endif
 
+using namespace NAMESPACE_STL;
+
+// copied out of KramLog.cpp
+static int32_t append_vsprintf(string& str, const char* format, va_list args)
+{
+    // for KLOGE("group", "%s", "text")
+    if (strcmp(format, "%s") == 0) {
+        const char* firstArg = va_arg(args, const char*);
+        str += firstArg;
+        return (int32_t)strlen(firstArg);
+    }
+
+    // This is important for the case where ##VAR_ARGS only leaves the format.
+    // In this case "text" must be a compile time constant string to avoid security warning needed for above.
+    // for KLOGE("group", "text")
+    if (strrchr(format, '%') == nullptr) {
+        str += format;
+        return (int32_t)strlen(format);
+    }
+
+    // format once to get length (without NULL at end)
+    va_list argsCopy;
+    va_copy(argsCopy, args);
+    int32_t len = vsnprintf(NULL, 0, format, argsCopy);
+    va_end(argsCopy);
+
+    if (len > 0) {
+        size_t existingLen = str.length();
+
+        // resize and format again into string
+        str.resize(existingLen + len, 0);
+
+        vsnprintf((char*)str.c_str() + existingLen, len + 1, format, args);
+    }
+
+    return len;
+}
+
+int32_t append_sprintf(string& str, const char* format, ...)
+{
+    va_list args;
+    va_start(args, format);
+    int32_t len = append_vsprintf(str, format, args);
+    va_end(args);
+
+    return len;
+}
+
+
 extern "C" const char* _Nullable demangleSymbolName(const char* _Nonnull symbolName_) {
-    using namespace NAMESPACE_STL;
-    
     // serialize to multiple threads
     static mutex sMutex;
     static unordered_map<string, const char*> sSymbolToDemangleName;
diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 8f652682..daccce5c 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -2397,7 +2397,15 @@ A tool to help profile mem, perf, and builds.
                     // Extract the fileContent and names.  This avoids CBA needing to do IO.
                     // But CBA is reparsing all of the json in C++ to build up it's tables.
                     // Also demangling names, but I have that disabled.
-                    CBA.run(fileDatas, filenames: filenames)
+                    let cbaReport = CBA.run(fileDatas, filenames: filenames)
+                    
+                    // can't use log here, since it's not setup to chop up long
+                    // strings by newlines right now.  Print doesn't go to console
+                    // so this string is only shown if app run from debugger.
+                    // TODO: use popover like info button.
+                    
+                    // printn(cbaReport) looks terrible, so have to format it
+                    print("\(cbaReport)")
                 }
                 .disabled(selection == nil)
                 

From 29a5840af702c3d762be89f27beb67df93dd1dfb Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 23 Mar 2024 23:49:30 -0700
Subject: [PATCH 667/901] kram-profile - load fileContent if doing CBA report

---
 .../kram-profile/kram_profileApp.swift        | 29 +++++++++++++------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index daccce5c..694bb18a 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -1308,7 +1308,7 @@ func updateBuildTimingsTask(_ files: [File]) {
         log.info("finished updating build timings in \(double:time, decimals:3)s")
     })
 }
-    
+
 func updateBuildTimingTask(_ file: File) throws {
     assert(file.fileType == .Build)
     
@@ -2379,6 +2379,10 @@ A tool to help profile mem, perf, and builds.
                 .disabled(selection == nil)
                 
                 Button("Build Report CBA") {
+                    // DONE: have to reload fileContent if new zip is
+                    // loaded.  The buildTiming data is still cached, but the fileContent
+                    // is nulled out and must be reloaded.
+                    
                     let buildFiles = findFilesForBuildTimings(files: fileSearcher.files, selection: selection!)
                     
                     if buildFiles.isEmpty { return }
@@ -2387,24 +2391,31 @@ A tool to help profile mem, perf, and builds.
                     var filenames: [String] = []
                     
                     for file in buildFiles {
-                        if file.fileContent == nil  {
-                            continue
+                        var fileContent = file.fileContent
+                        
+                        if fileContent == nil  {
+                            fileContent = loadFileContent(file)
+                            
+                            // skip it
+                            if fileContent == nil {
+                                continue
+                            }
                         }
                         
-                        fileDatas.append(file.fileContent!)
+                        fileDatas.append(fileContent!)
                         filenames.append(file.url.absoluteString)
                     }
                     // Extract the fileContent and names.  This avoids CBA needing to do IO.
-                    // But CBA is reparsing all of the json in C++ to build up it's tables.
-                    // Also demangling names, but I have that disabled.
+                    // But CBA is reparsing all of the json in C++ to build up its tables.
+                    // Also demangling names again..
                     let cbaReport = CBA.run(fileDatas, filenames: filenames)
                     
-                    // can't use log here, since it's not setup to chop up long
-                    // strings by newlines right now.  Print doesn't go to console
+                    // Can't use log here, since it's not setup to chop up long
+                    // strings by newlines yet.  Print doesn't go to console
                     // so this string is only shown if app run from debugger.
                     // TODO: use popover like info button.
                     
-                    // printn(cbaReport) looks terrible, so have to format it
+                    // print(cbaReport) looks terrible, so have to format it
                     print("\(cbaReport)")
                 }
                 .disabled(selection == nil)

From deafe970e5bd13f7bfbcee199ffe04d53265db00 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 24 Mar 2024 21:29:36 -0700
Subject: [PATCH 668/901] kram-profile - breakup CBA call

---
 kram-profile/CBA/CBA.h                        | 10 ++++-
 kram-profile/CBA/CBA.mm                       | 45 ++++++++++++++-----
 kram-profile/CBA/Utils.cpp                    |  5 +--
 kram-profile/CBA/Utils.h                      |  3 +-
 .../kram-profile.xcodeproj/project.pbxproj    |  3 ++
 .../kram-profile/kram_profileApp.swift        |  6 ++-
 6 files changed, 53 insertions(+), 19 deletions(-)

diff --git a/kram-profile/CBA/CBA.h b/kram-profile/CBA/CBA.h
index 753bdd70..8fa184b3 100644
--- a/kram-profile/CBA/CBA.h
+++ b/kram-profile/CBA/CBA.h
@@ -2,5 +2,13 @@
 
 // TODO: move to header
 @interface CBA : NSObject
-+ (NSString* _Nonnull)RunCBA:(NSArray<NSData*> * _Nonnull)files filenames:(NSArray<NSString*> * _Nonnull)filenames;
+
+- (_Nonnull instancetype)init;
+- (void)deinit;
+
+- (void)parse:(NSData* _Nonnull)file filename:(NSString* _Nonnull)filename;
+- (void)parseAll:(NSArray<NSData*> * _Nonnull)files filenames:(NSArray<NSString*> * _Nonnull)filenames;
+
+- (NSString* _Nonnull)analyze;
+
 @end
diff --git a/kram-profile/CBA/CBA.mm b/kram-profile/CBA/CBA.mm
index 6a22c061..5bafaec1 100644
--- a/kram-profile/CBA/CBA.mm
+++ b/kram-profile/CBA/CBA.mm
@@ -26,30 +26,51 @@
 
     
 @implementation CBA {
+    BuildEventsParser* parser;
 }
 
-+ (NSString* _Nonnull)RunCBA:(NSArray<NSData*> * _Nonnull)files filenames:(NSArray<NSString*> * _Nonnull)filenames
-{
+- (_Nonnull instancetype)init {
     ArenaInitialize();
     
-    BuildEventsParser* parser = CreateBuildEventsParser();
-   
-    for (uint32_t i = 0; i < files.count; ++i)
-    {
-        NSData* data = files[i];
-        ParseBuildEvents(parser, (const uint8_t*)data.bytes, data.length, [filenames[i] UTF8String]);
+    parser = CreateBuildEventsParser();
+    
+    return self;
+}
+
+- (void)deinit {
+    // Shutdown the parser
+    DeleteBuildEventsParser(parser);
+    parser = nullptr;
+    
+    ArenaDelete();
+}
+
+// This is bad because it runs single-threaded, and doesn't cache anything across builds.
+// TODO: restructure, so parser is built once
+// feed files to it individually, and then request analysis on a few of the events/names
+// TODO: reformat output to Perfetto json, can then display it visually.
+- (void)parseAll:(NSArray<NSData*> * _Nonnull)files filenames:(NSArray<NSString*> * _Nonnull)filenames
+{
+    for (uint32_t i = 0; i < files.count; ++i) {
+        [self parse:files[i] filename:filenames[i]];
     }
+}
+
+- (void)parse:(NSData* _Nonnull)file filename:(NSString* _Nonnull)filename {
+    const char* filename_ = [filename UTF8String];
+    ParseBuildEvents(parser, (const uint8_t*)file.bytes, file.length, filename_);
+}
 
+
+- (NSString* _Nonnull)analyze {
     // Run the analysis on data from the parser.
     std::string out;
     DoAnalysis(GetBuildEvents(*parser), GetBuildNames(*parser), out);
     
-    // Shutdown the parser
-    DeleteBuildEventsParser(parser);
-    ArenaDelete();
-    
     return [NSString stringWithUTF8String:out.c_str()];
 }
 
+
+
 @end
 
diff --git a/kram-profile/CBA/Utils.cpp b/kram-profile/CBA/Utils.cpp
index 31e37a9b..851a0e21 100755
--- a/kram-profile/CBA/Utils.cpp
+++ b/kram-profile/CBA/Utils.cpp
@@ -30,7 +30,8 @@ bool utils::BeginsWith(const std::string& str, const std::string& prefix)
     }
     return true;
 }
-
+*/
+ 
 bool utils::EndsWith(const std::string_view& str, const std::string& suffix)
 {
     if (str.size() < suffix.size())
@@ -45,8 +46,6 @@ bool utils::EndsWith(const std::string_view& str, const std::string& suffix)
     }
     return true;
 }
-*/
-
 
 bool utils::IsHeader(std::string_view path)
 {
diff --git a/kram-profile/CBA/Utils.h b/kram-profile/CBA/Utils.h
index 55395e5f..52f5e5f3 100755
--- a/kram-profile/CBA/Utils.h
+++ b/kram-profile/CBA/Utils.h
@@ -15,6 +15,7 @@ namespace utils
     void Lowercase(std::string& path);
 
     [[nodiscard]] bool BeginsWith(const std::string& str, const std::string& prefix);
+     */
     [[nodiscard]] bool EndsWith(const std::string_view& str, const std::string& suffix);
-    */
+    
 }
diff --git a/kram-profile/kram-profile.xcodeproj/project.pbxproj b/kram-profile/kram-profile.xcodeproj/project.pbxproj
index 362b8ab1..85b57c06 100644
--- a/kram-profile/kram-profile.xcodeproj/project.pbxproj
+++ b/kram-profile/kram-profile.xcodeproj/project.pbxproj
@@ -317,6 +317,7 @@
 				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
 				MTL_FAST_MATH = YES;
 				ONLY_ACTIVE_ARCH = YES;
+				OTHER_CFLAGS = "-ftime-trace";
 				SDKROOT = macosx;
 				SWIFT_ACTIVE_COMPILATION_CONDITIONS = "DEBUG $(inherited)";
 				SWIFT_OBJC_INTEROP_MODE = objc;
@@ -365,6 +366,7 @@
 				CLANG_WARN_UNREACHABLE_CODE = YES;
 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
 				COPY_PHASE_STRIP = NO;
+				DEAD_CODE_STRIPPING = YES;
 				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
 				ENABLE_NS_ASSERTIONS = NO;
 				ENABLE_STRICT_OBJC_MSGSEND = YES;
@@ -381,6 +383,7 @@
 				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				MTL_ENABLE_DEBUG_INFO = NO;
 				MTL_FAST_MATH = YES;
+				OTHER_CFLAGS = "-ftime-trace";
 				SDKROOT = macosx;
 				SWIFT_ACTIVE_COMPILATION_CONDITIONS = "";
 				SWIFT_COMPILATION_MODE = wholemodule;
diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 694bb18a..fdbdba71 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -2407,8 +2407,10 @@ A tool to help profile mem, perf, and builds.
                     }
                     // Extract the fileContent and names.  This avoids CBA needing to do IO.
                     // But CBA is reparsing all of the json in C++ to build up its tables.
-                    // Also demangling names again..
-                    let cbaReport = CBA.run(fileDatas, filenames: filenames)
+                    // Also demangling names again.
+                    let cba = CBA()
+                    cba.parseAll(fileDatas, filenames: filenames)
+                    let cbaReport = cba.analyze()
                     
                     // Can't use log here, since it's not setup to chop up long
                     // strings by newlines yet.  Print doesn't go to console

From 28f7d526e899eb647df3d5b31cffa596566359ee Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 24 Mar 2024 21:30:20 -0700
Subject: [PATCH 669/901] kram-profile - update BuildEvents

---
 kram-profile/CBA/BuildEvents.cpp | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/kram-profile/CBA/BuildEvents.cpp b/kram-profile/CBA/BuildEvents.cpp
index 225edb11..36a9dec7 100755
--- a/kram-profile/CBA/BuildEvents.cpp
+++ b/kram-profile/CBA/BuildEvents.cpp
@@ -387,10 +387,28 @@ struct BuildEventsParser
                 // do various cleanups/nice-ifications of the detail name:
                 // make paths shorter (i.e. relative to project) where possible
                 detailString = utils::GetNicePath(detailPtr);
+                
+                // switch json to .o or .obj (or .cpp)
+                if (utils::EndsWith(detailString, ".json"))
+                {
+                    detailString = std::string(detailString.substr(0, detailString.length()-4)) + "o";
+                }
             }
             else
+            {
                 detailString = detailPtr;
-
+                
+                // Use the kram demangle
+                // clang needs to fix this, since Win clang symbols don't demangle using macOS demangle
+                if (event.type == BuildEventType::kOptFunction)
+                {
+                    const char* demangledName = demangleSymbolName(detailString.c_str());
+                    if (demangledName != nullptr)
+                        detailString = demangledName;
+                }
+            }
+            
+            
             /* don't do this
             // don't report the clang trace .json file, instead get the object file at the same location if it's there
             if (utils::EndsWith(detailString, ".json"))
@@ -414,15 +432,6 @@ struct BuildEventsParser
                 detailString = llvm::demangle(detailString);
             */
             
-            // Use the built in call
-            // clang needs to fix this, since Win clang symbols don't demangle using macOS demangle
-            if (event.type == BuildEventType::kOptFunction)
-            {
-                const char* demangledName = demangleSymbolName(detailString.c_str());
-                if (demangledName != nullptr)
-                    detailString = demangledName;
-            }
-            
             event.detailIndex = NameToIndex(detailString.c_str(), nameToIndexLocal);
         }
 

From 6cc9e20ff6ad9e7f2f124bd13e67c38dd7e0332e Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 26 Mar 2024 19:46:56 -0700
Subject: [PATCH 670/901] kram-profile - isolate Timer

---
 .../kram-profile/kram_profileApp.swift        | 81 ++++++++++++++-----
 1 file changed, 63 insertions(+), 18 deletions(-)

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index fdbdba71..c74cc788 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -114,6 +114,12 @@ import UniformTypeIdentifiers
 //   only then can data be handed off toe Pefertto or CBA.  And CBA needs all files.
 //   Maybe extend CBA to read a zip file.  Can just use ZipHelper.
 
+// TODO: use refreshable on the list to allow await on an async tasks
+// so could refresh the durations off that.
+// TODO: passing children field to the List ctor creates a hierarchical list.
+// so could have dropped file, archive, folder in the list to collapse the view
+// Each file array would be a child.  Parent would be clearer then.
+
 // TODO: fix duration update modding the List item and nav title after it updates
 // Currently select list, it updates, then duration is calcualated.
 //   there is a objectWillChange.send() Could maybe send that from The File
@@ -1261,21 +1267,53 @@ func computeEventParentsAndDurSub(_ events: inout [CatapultEvent]) {
     }
 }
 
-// Fire this off any time the list changes and there
-// are build events in it.  This will update the data within,
-// so that the user doesn't have to visit every file manually.
-
-// TODO: move to timer class
-var kTickToSeconds = 0.0
 
-func updateTimebase() {
-    if kTickToSeconds != 0.0 { return }
+class Timer {
+    private static var kTickToSeconds = updateTimebase()
+    private var time: Double = -Timer.getTime()
+    
+    deinit {
+        stop()
+    }
+    
+    func timeElapsed() -> Double {
+        return time
+    }
+    
+    func restart() {
+        if time > 0.0 {
+            time = -Timer.getTime()
+        }
+    }
+    
+    func start() {
+        if time > 0.0 {
+            time -= Timer.getTime()
+        }
+    }
     
-    var machTimebase = mach_timebase_info(numer: 0, denom: 0)
-    mach_timebase_info(&machTimebase)
-    kTickToSeconds = 1e-9 * Double(machTimebase.numer) / Double(machTimebase.denom) // 125/3
+    func stop() {
+        if time < 0.0 {
+            time += Timer.getTime()
+        }
+    }
+    
+    private static func getTime() -> Double {
+        return Double(mach_absolute_time()) * kTickToSeconds
+    }
+    
+    private static func updateTimebase() -> Double {
+        var machTimebase = mach_timebase_info(numer: 0, denom: 0)
+        mach_timebase_info(&machTimebase)
+        
+        // AS = 125/3, Intel = 1/1
+        return 1e-9 * Double(machTimebase.numer) / Double(machTimebase.denom)
+    }
 }
 
+// Fire this off any time the list changes and there
+// are build events in it.  This will update the data within,
+// so that the user doesn't have to visit every file manually.
 func updateBuildTimingsTask(_ files: [File]) {
     // Can use counter for progress.  Could add up size instead of just count.
     var counter = 0
@@ -1287,11 +1325,8 @@ func updateBuildTimingsTask(_ files: [File]) {
     
     if counter == 0 { return }
     
-    
-    updateTimebase()
-    
     let _ = Task(priority: .medium, operation: {
-        var time = -Double(mach_absolute_time()) * kTickToSeconds
+        let timer = Timer()
         
         for file in files {
             if file.fileType == .Build {
@@ -1304,8 +1339,8 @@ func updateBuildTimingsTask(_ files: [File]) {
             }
         }
         
-        time += Double(mach_absolute_time()) * kTickToSeconds
-        log.info("finished updating build timings in \(double:time, decimals:3)s")
+        timer.stop()
+        log.info("finished updating build timings in \(double:timer.timeElapsed(), decimals:3)s")
     })
 }
 
@@ -2407,11 +2442,21 @@ A tool to help profile mem, perf, and builds.
                     }
                     // Extract the fileContent and names.  This avoids CBA needing to do IO.
                     // But CBA is reparsing all of the json in C++ to build up its tables.
-                    // Also demangling names again.
+                    // Also demangling names again.  And unlike the build report which is
+                    // cached per file, this is doing all on main thread.
+                    
+                    let timer = Timer()
+                    
+                    // TODO: call parse from the buildTimings task.  Then it's done in
+                    // background.  A new  should use the same unique filename.  Then
+                    // this needs to analyze only specific files that were parsed.
                     let cba = CBA()
                     cba.parseAll(fileDatas, filenames: filenames)
                     let cbaReport = cba.analyze()
                     
+                    timer.stop()
+                    log.info("finished updating CBA timings in \(double:timer.timeElapsed(), decimals:3)s")
+                    
                     // Can't use log here, since it's not setup to chop up long
                     // strings by newlines yet.  Print doesn't go to console
                     // so this string is only shown if app run from debugger.

From 0441168a99c58c26d9bb0d8135e70ce2260e92d2 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 30 Mar 2024 14:20:44 -0700
Subject: [PATCH 671/901] kram-profile - add total and percent, strip
 namespaces on function/classes

---
 kram-profile/CBA/BuildEvents.h                | 12 +++-
 kram-profile/CBA/CBA.h                        |  5 +-
 kram-profile/CBA/CBA.mm                       | 11 +++-
 kram-profile/README.md                        | 37 ++++++++++--
 kram-profile/kram-profile/File.swift          | 33 ++++++++++-
 .../kram-profile/kram_profileApp.swift        | 56 ++++++++++++-------
 6 files changed, 123 insertions(+), 31 deletions(-)

diff --git a/kram-profile/CBA/BuildEvents.h b/kram-profile/CBA/BuildEvents.h
index 58af8e9e..79e15258 100755
--- a/kram-profile/CBA/BuildEvents.h
+++ b/kram-profile/CBA/BuildEvents.h
@@ -36,13 +36,17 @@ struct DetailIndex
     int idx;
     explicit DetailIndex(int d = 0) : idx(d) {}
     
-    // TODO: C++20 can autogen most of these from like <=> operator
+#if __cplusplus >= 202002L
+    // C++20 can autogen most of these from like <=> operator
+    auto operator<=>(const DetailIndex& rhs) const = default;
+#else
     bool operator==(DetailIndex rhs) const { return idx == rhs.idx; }
     bool operator!=(DetailIndex rhs) const { return idx != rhs.idx; }
     bool operator<(DetailIndex rhs) const { return idx < rhs.idx; }
     bool operator>(DetailIndex rhs) const { return idx > rhs.idx; }
     bool operator<=(DetailIndex rhs) const { return idx <= rhs.idx; }
     bool operator>=(DetailIndex rhs) const { return idx >= rhs.idx; }
+#endif
 };
 
 struct EventIndex
@@ -50,13 +54,17 @@ struct EventIndex
     int idx;
     explicit EventIndex(int e = -1) : idx(e) {}
     
-    // TODO: C++20 can autogen most of these from like <=> operator
+#if __cplusplus >= 202002L
+    // C++20 can autogen most of these from like <=> operator
+    auto operator<=>(const EventIndex& rhs) const = default;
+#else
     bool operator==(EventIndex rhs) const { return idx == rhs.idx; }
     bool operator!=(EventIndex rhs) const { return idx != rhs.idx; }
     bool operator<(EventIndex rhs) const { return idx < rhs.idx; }
     bool operator>(EventIndex rhs) const { return idx > rhs.idx; }
     bool operator<=(EventIndex rhs) const { return idx <= rhs.idx; }
     bool operator>=(EventIndex rhs) const { return idx >= rhs.idx; }
+#endif
 };
 
 namespace std
diff --git a/kram-profile/CBA/CBA.h b/kram-profile/CBA/CBA.h
index 8fa184b3..3a141d2a 100644
--- a/kram-profile/CBA/CBA.h
+++ b/kram-profile/CBA/CBA.h
@@ -6,9 +6,12 @@
 - (_Nonnull instancetype)init;
 - (void)deinit;
 
+// Can parseAll or one file at time
 - (void)parse:(NSData* _Nonnull)file filename:(NSString* _Nonnull)filename;
 - (void)parseAll:(NSArray<NSData*> * _Nonnull)files filenames:(NSArray<NSString*> * _Nonnull)filenames;
 
-- (NSString* _Nonnull)analyze;
+// This isn't so useful, since need specific files to parse
+- (NSString* _Nonnull)analyzeAll;
+- (NSString* _Nonnull)analyze:(NSArray<NSString*> * _Nonnull)filenames;
 
 @end
diff --git a/kram-profile/CBA/CBA.mm b/kram-profile/CBA/CBA.mm
index 5bafaec1..8a93398a 100644
--- a/kram-profile/CBA/CBA.mm
+++ b/kram-profile/CBA/CBA.mm
@@ -62,7 +62,7 @@ - (void)parse:(NSData* _Nonnull)file filename:(NSString* _Nonnull)filename {
 }
 
 
-- (NSString* _Nonnull)analyze {
+- (NSString* _Nonnull)analyzeAll {
     // Run the analysis on data from the parser.
     std::string out;
     DoAnalysis(GetBuildEvents(*parser), GetBuildNames(*parser), out);
@@ -70,6 +70,15 @@ - (NSString* _Nonnull)analyze {
     return [NSString stringWithUTF8String:out.c_str()];
 }
 
+- (NSString* _Nonnull)analyze:(NSArray<NSString*> * _Nonnull)filenames {
+    // Run the analysis on data from the parser.
+    std::string out;
+    DoAnalysis(GetBuildEvents(*parser), GetBuildNames(*parser), out);
+    
+    return [NSString stringWithUTF8String:out.c_str()];
+}
+
+
 
 
 @end
diff --git a/kram-profile/README.md b/kram-profile/README.md
index dbd18959..342c2002 100644
--- a/kram-profile/README.md
+++ b/kram-profile/README.md
@@ -1,20 +1,45 @@
 kram-profile
 ==========
 
-kram-profile wraps SwiftUI atop a WKWebView running the Perfetto TraceViewer.  A dev can open directories or files of traces.  Supported files are added to a list to quickly view these in Perfetto.  The app is multidocument.  Each window is a single instance of Pefertto TraceViewer that is loaded once.   The sandboxed SwiftUI acts as the bridge to the native file system, which the TraceViewer browser sandbox lacks.
+kram-profile wraps SwiftUI atop a WKWebView running the Perfetto TraceViewer.  A dev can open directories or files of traces.  Supported files are added to a list to quickly view these in Perfetto.  The app is multi-document.  Each window is a single instance of Pefertto TraceViewer that is loaded once.   The sandboxed SwiftUI acts as the bridge to the native file system, which the TraceViewer browser sandbox lacks.
 
 Flamegraphs are key to all profiling.  Why look at giant table of numbers when you can see them visually.  Flamegraphs also need to be dynamic and display hover tips and details.  Fortunately there are several tools now supporting flamegraphs.  Perfetto is one such tool.
 
-This is also a discussion of profilers and optimizing.
+kram-profile fixes up build traces to reflect the name of the file/function.  And it demangles function names from clang.
+
+Files can be dragged onto the list view, double-clicked from Finder if the filenames below are associated with the app, or there is an Open and Refresh command.
 
 Supported files
 
 * .memtrace - memory report generated by Kram scripts folder.
 * .trace/.perftrace - performance timings in the form catapult trace json files
 * .json/.buildtrace - clang timing output generated using -ftime-trace
-
-There are pre-built version of kram-profile for macOS 13.0 and higher.
-
+* .zip archives of above
+* .gzip compressed files of above
+* folders of loose files or achives
+
+There is a pre-built version of kram-profile for macOS 13.0 and higher.
+
+List view 
+  File type, name, duration
+  Up/down arrow keys or cmd+N/cmd+shift+N to advance through list
+  Hover for path of filename
+  Can collapse/restore the list
+  Type-search in the list
+    
+Navigation Title
+  Filename (archive)
+  Info button (memtrace) - shows max of tracks for heap size
+  cmd+T  search by duration
+  cmd+shift+T search by name
+  
+WKWebView
+  Perfetto Flamegraph
+  Tracknames on left
+  cmd+S to search for a named entry in flamegraph
+  cmd+shift+P to parse command 
+  Cannot hide the tracknames
+  
 ----------------
 
 TODO: (x are done)
@@ -25,9 +50,9 @@ TODO: (x are done)
 * x Support gzip trace files
 * x Add sort by range (useful for mem/build traces)
 * x Add zip archive support, can drop archive of 1+ traces
+* x Tie in with the excellent ClangBuildAnalyzer tool
 
 * Add frame type for perf traces for vsync ticker (binary format prob has it)
-* Tie in with the excellent ClangBuildAnalyzer tool
 * Scale specific traces to a single duration.  That way the next file comes in at that scale. 
 * Move away from Catapult json to own binary format.  Can then translate to json or use the Perfetto SDK to convert to protobufs.
 
diff --git a/kram-profile/kram-profile/File.swift b/kram-profile/kram-profile/File.swift
index 4730993c..6af764d3 100644
--- a/kram-profile/kram-profile/File.swift
+++ b/kram-profile/kram-profile/File.swift
@@ -223,11 +223,40 @@ func generateDuration(file: File) -> String {
     return "\(double:file.duration, decimals:3)\(unitText)"
 }
 
-func generateNavigationTitle(_ sel: String?) -> String {
+func generateTotalDuration(_ file: File, _ buildFiles: [File]) -> String {
+    if buildFiles.isEmpty { return "" }
+    
+    // add up duration of all files related to selection
+    var totalDuration = 0.0
+    for buildFile in buildFiles {
+        totalDuration += buildFile.duration
+    }
+    
+    if totalDuration == 0.0 { return "" }
+    var text = "/ \(double:totalDuration, decimals:3)s "
+    
+    // only show percent if high enough
+    let totalPercent = file.duration * 100.0 / totalDuration
+    if totalPercent >= 1 {
+        text += "\(double:totalPercent, decimals:0)% "
+    }
+    return text
+}
+
+func generateNavigationTitle(_ sel: String?, _ files: [File]) -> String {
     if sel == nil { return "" }
     
     let f = lookupFile(selection: sel!)
-    var text = generateDuration(file: f) + " " + f.name
+    var text = generateDuration(file: f) + " "
+    
+    // total the durations matching the selection
+    if f.fileType == .Build {
+        let buildFiles = findFilesForBuildTimings(files: files, selection: sel!)
+        text += generateTotalDuration(f, buildFiles)
+    }
+    
+    // add the shortened filename
+    text += f.name
     
     // add the archive name
     if let fileArchive = f.archive {
diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index c74cc788..e80df643 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -40,6 +40,11 @@ import UniformTypeIdentifiers
 // DONE: background process to compute buildTimings across all files
 // TODO: parse totals from build traces, what CBA is doing
 // TODO: present total time, and % of total in the nav panel
+// TODO: add a total time, and show that in the nav panel, and % of total
+//   then know for a summary what the total time spend compiling is.
+// TODO: duration isn't updating properly when doing Reload on loose files
+// TODO: add children of each archive, so those show in the list and can collapse
+// TODO: add refreshable
 
 // Perf traces
 // TODO: build per-thread hierarchy and self times
@@ -61,6 +66,7 @@ import UniformTypeIdentifiers
 // TODO: add jump to source/header, but path would need to be correct (sandbox block?)
 
 // TODO: look into fast crc32 ops on M1
+//  can use this on loose fils as a hash, and also compare to zip files
 // https://dougallj.wordpress.com/2022/05/22/faster-crc32-on-the-apple-m1/
 
 // Build traces
@@ -1545,23 +1551,25 @@ func convertStatsToTotalTrack(_ stats: BuildStats) -> [CatapultEvent] {
     
     func makeDurEvent(_ tid: Int, _ name: String, _ dur: Int, _ total: Int) -> CatapultEvent {
         let percent = 100.0 * Double(dur) / Double(total)
-        return CatapultEvent(tid, "\(name) \(double:percent, decimals:0)%", dur)
+        return CatapultEvent(tid, "\(double:percent, decimals:0)% \(name)", dur)
     }
     let total = stats.totalExecuteCompiler
     
-    event = makeDurEvent(tid, "Total ExecuteCompiler", stats.totalExecuteCompiler, total)
+    // Removed Total from all these strings
+    
+    event = makeDurEvent(tid, "ExecuteCompiler", stats.totalExecuteCompiler, total)
     totalEvents.append(event)
     
-    event = makeDurEvent(tid, "Total Frontend", stats.totalFrontend, total)
+    event = makeDurEvent(tid, "Frontend", stats.totalFrontend, total)
     event.ts = stats.frontendStart
     totalEvents.append(event)
     
     // sub-areas of frontend
-    event = makeDurEvent(tid, "Total Source", stats.totalSource, total)
+    event = makeDurEvent(tid, "Source", stats.totalSource, total)
     event.ts = stats.frontendStart
     totalEvents.append(event)
     
-    event = makeDurEvent(tid, "Total InstantiateFunction", stats.totalInstantiateFunction, total)
+    event = makeDurEvent(tid, "InstantiateFunction", stats.totalInstantiateFunction, total)
     event.ts = stats.frontendStart + stats.totalSource
     totalEvents.append(event)
     
@@ -1573,7 +1581,7 @@ func convertStatsToTotalTrack(_ stats: BuildStats) -> [CatapultEvent] {
     
     // This overlaps with some Source, and some InstantiateFunction, so it's sort of double
     // counted, so clamp it for now so Perfetto doesn't freak out and get the event order wrong.
-    event = makeDurEvent(tid, "Total InstantiateClass", totalInstantiateClass, total)
+    event = makeDurEvent(tid, "InstantiateClass", totalInstantiateClass, total)
     event.ts = stats.frontendStart + stats.totalSource
     totalEvents.append(event)
     
@@ -1586,28 +1594,28 @@ func convertStatsToTotalTrack(_ stats: BuildStats) -> [CatapultEvent] {
         totalCodeGenFunction = stats.backendStart - tsCodeGenFunction
     }
     
-    event = makeDurEvent(tid, "Total CodeGen Function", totalCodeGenFunction, total)
+    event = makeDurEvent(tid, "CodeGen Function", totalCodeGenFunction, total)
     event.ts = tsCodeGenFunction
     totalEvents.append(event)
     
     // backend
-    event = makeDurEvent(tid, "Total Backend", stats.totalBackend, total)
+    event = makeDurEvent(tid, "Backend", stats.totalBackend, total)
     event.ts = stats.backendStart
     totalEvents.append(event)
     
-    event = makeDurEvent(tid, "Total Optimizer", stats.totalOptimizer, total)
+    event = makeDurEvent(tid, "Optimizer", stats.totalOptimizer, total)
     event.ts = stats.backendStart
     totalEvents.append(event)
     
-    // event = makeDurEvent(tid, "Total OptModule", stats.totalOptModule, total)
+    // event = makeDurEvent(tid, "OptModule", stats.totalOptModule, total)
     // event.ts = stats.backendStart + stats.totalOptimizer
     // totalEvents.append(event)
     
-    event = makeDurEvent(tid, "Total CodeGenPasses", stats.totalCodeGenPasses, total)
+    event = makeDurEvent(tid, "CodeGenPasses", stats.totalCodeGenPasses, total)
     event.ts = stats.backendStart + stats.totalOptimizer
     totalEvents.append(event)
     
-    event = makeDurEvent(tid, "Total OptFunction", stats.totalOptFunction, total)
+    event = makeDurEvent(tid, "OptFunction", stats.totalOptFunction, total)
     event.ts = stats.backendStart + stats.totalOptimizer
     totalEvents.append(event)
     
@@ -1754,15 +1762,25 @@ func loadFileJS(_ file: File) -> String? {
                     catapultProfile.traceEvents![i].name = url.lastPathComponent
                 }
                 else if event.name == "InstantiateFunction" ||
-                            event.name == "InstantiateClass" ||
+                            event.name == "CodeGen Function" ||
                             event.name == "OptFunction" ||
+                            event.name == "InstantiateClass" ||
                             event.name == "ParseClass" ||
                             event.name == "DebugType" || // these take a while
-                            event.name == "CodeGen Function" ||
                             event.name == "RunPass"
                 {
+                    // Note: instantiationFunction/Class are nested
+                    // so really only want to track times on the top call.
+                    
                     // This is a symbol name
-                    let detail = event.args!["detail"]!.value as! String
+                    var detail = event.args!["detail"]!.value as! String
+                    
+                    // replace namespaces in the detail
+                    let namespaces = ["std::", "kram::", "eastl::"]
+                    for namespace in namespaces {
+                        detail = detail.replacing(namespace, with:"")
+                    }
+                    
                     catapultProfile.traceEvents![i].name = detail
                 }
                 
@@ -2288,7 +2306,7 @@ A tool to help profile mem, perf, and builds.
                 openFileSelection(myWebView)
                 //focusedField = .webView
             }
-            .navigationTitle(generateNavigationTitle(selection))
+            .navigationTitle(generateNavigationTitle(selection, fileSearcher.files))
             .onOpenURL { url in
                 openFileFromURLs(urls: [url])
                 //focusedField = .webView
@@ -2448,11 +2466,11 @@ A tool to help profile mem, perf, and builds.
                     let timer = Timer()
                     
                     // TODO: call parse from the buildTimings task.  Then it's done in
-                    // background.  A new  should use the same unique filename.  Then
-                    // this needs to analyze only specific files that were parsed.
+                    // background and cached.  A new file  should use the same unique filename.
+                    // This this needs to analyze only specific files passed to analyze.
                     let cba = CBA()
                     cba.parseAll(fileDatas, filenames: filenames)
-                    let cbaReport = cba.analyze()
+                    let cbaReport = cba.analyzeAll()
                     
                     timer.stop()
                     log.info("finished updating CBA timings in \(double:timer.timeElapsed(), decimals:3)s")

From 54774b44b53593854e3d590f9eff4f8e3549c789 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 31 Mar 2024 00:21:27 -0700
Subject: [PATCH 672/901] kram-profile - add function stats to the report

This is just the plumbing for this.  Like CBA, need to collapse the names, strip operators, and only report template calls.
  There's just too much name variation to report significant times at one location.
---
 kram-profile/kram-profile/File.swift          |   2 +
 .../kram-profile/kram_profileApp.swift        | 366 ++++++++++++++----
 2 files changed, 290 insertions(+), 78 deletions(-)

diff --git a/kram-profile/kram-profile/File.swift b/kram-profile/kram-profile/File.swift
index 6af764d3..e3b9b6f3 100644
--- a/kram-profile/kram-profile/File.swift
+++ b/kram-profile/kram-profile/File.swift
@@ -98,6 +98,7 @@ class File: Identifiable, Hashable, Equatable, Comparable
     
     // This is only updated for Build fileType
     var buildTimings: [String:BuildTiming] = [:]
+    var buildFunctionTimings = BuildFunctionTimings()
     var buildStats: BuildStats?
     
     // only available for memory file type right now
@@ -146,6 +147,7 @@ class File: Identifiable, Hashable, Equatable, Comparable
         if fileType == .Build {
             // for build fileType
             buildTimings.removeAll()
+            buildFunctionTimings.reset()
             buildStats = nil
         }
         else if fileType == .Memory {
diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index e80df643..837139bd 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -38,13 +38,13 @@ import UniformTypeIdentifiers
 // Build traces
 // DONE: build hierarchy and self times
 // DONE: background process to compute buildTimings across all files
-// TODO: parse totals from build traces, what CBA is doing
-// TODO: present total time, and % of total in the nav panel
-// TODO: add a total time, and show that in the nav panel, and % of total
+// DONE: add a total time, and show that in the nav panel, and % of total
 //   then know for a summary what the total time spend compiling is.
-// TODO: duration isn't updating properly when doing Reload on loose files
+// TODO: parse instantiateFunction totals from build traces, what CBA is doing
+//   avoid InstatiateClass since it's a child
+// TODO: parse optFunction totals from build traces, what CBA is doing
+// TODO: duration isn't updating properly when doing Reload on loose files, but thought this fixed
 // TODO: add children of each archive, so those show in the list and can collapse
-// TODO: add refreshable
 
 // Perf traces
 // TODO: build per-thread hierarchy and self times
@@ -67,6 +67,7 @@ import UniformTypeIdentifiers
 
 // TODO: look into fast crc32 ops on M1
 //  can use this on loose fils as a hash, and also compare to zip files
+//  already have a crc32 in the zip lib
 // https://dougallj.wordpress.com/2022/05/22/faster-crc32-on-the-apple-m1/
 
 // Build traces
@@ -80,7 +81,7 @@ import UniformTypeIdentifiers
 // table if global would need to use same index across all files.
 // Can rebuild references on JS side to send less data.  JS can then alias strings ftw.
 // Just add special ph type that is ignored by web to specify the alias.
-// TODO: work on sending a more efficient form.  Could use Perfetto SDK to write to prototbuf.  The Catapult json format is overly verbose.  Need some thread and scope strings, some open/close timings that reference a scope string and thread.
+// TODO: work on sending a more efficient form.  Could use Perfetto SDK to write to prototbuf.  The perfetto json format is overly verbose.  Need some thread and scope strings, some open/close timings that reference a scope string and thread.
 // TODO: add compressed format, build up Pefetto json or binary from this
 //  may need one for mmap, other for super compact deltas
 //  can still alias strings from mmap
@@ -651,7 +652,8 @@ func showTimeRangeJS(objTimeScript: String) -> String? {
     return objTimeScript + script
 }
 
-struct CatapultEvent: Codable {
+// These are really json events from perfetto profile.
+struct PerfettoEvent: Codable {
     var cat: String?
     var pid: Int?
     var tid: Int?
@@ -694,10 +696,10 @@ struct CatapultEvent: Codable {
     }
 }
 
-struct CatapultProfile: Codable {
-    var traceEvents: [CatapultEvent]?
+struct PerfettoProfile: Codable {
+    var traceEvents: [PerfettoEvent]?
     
-    // not a part of the Catapult spec, but clang writes this when it zeros
+    // not a part of the perfetto spec, but clang writes this when it zeros
     // out the startTime
     var beginningOfTime: Int?
 }
@@ -766,14 +768,97 @@ class ThreadInfo : Hashable, Equatable, Comparable {
     
 }
 
+class BuildFunctionTiming {
+    var count = 0
+    var duration = 0
+    
+    func combine(_ duration: Int) {
+        self.duration += duration
+        self.count += 1
+    }
+    func combine(_ timing: BuildFunctionTiming) {
+        self.duration += timing.duration
+        self.count += timing.count
+    }
+}
+
+class BuildFunctionTimings {
+    var optFunctions: [String:BuildFunctionTiming] = [:]
+    var instantiateFunctions: [String:BuildFunctionTiming] = [:]
+    
+    func combine(_ event: PerfettoEvent) {
+        if event.name == "OptFunction" {
+            let detail = event.args!["detail"]!.value as! String
+            let dur = event.dur!
+            
+            // With classes need to create a new one to combine into
+            if let f = optFunctions[detail] {
+                f.combine(dur)
+            }
+            else {
+                let f = BuildFunctionTiming()
+                f.combine(dur)
+                optFunctions[detail] = f
+            }
+        }
+        else if event.name == "InstantiateFunction" {
+            let detail = event.args!["detail"]!.value as! String
+            let dur = event.dur!
+            
+            if let f = instantiateFunctions[detail] {
+                f.combine(dur)
+            }
+            else {
+                let f = BuildFunctionTiming()
+                f.combine(dur)
+                instantiateFunctions[detail] = f
+            }
+        }
+    }
+    
+    func combine(_ timings: BuildFunctionTimings) {
+        for pair in timings.optFunctions {
+            let detail = pair.key
+            let timing = pair.value
+            
+            if let f = optFunctions[detail] {
+                f.combine(timing)
+            }
+            else {
+                let f = BuildFunctionTiming()
+                f.combine(timing)
+                optFunctions[detail] = f
+            }
+        }
+        for pair in timings.instantiateFunctions {
+            let detail = pair.key
+            let timing = pair.value
+            
+            if let f = instantiateFunctions[detail] {
+                f.combine(timing)
+            }
+            else {
+                let f = BuildFunctionTiming()
+                f.combine(timing)
+                instantiateFunctions[detail] = f
+            }
+        }
+    }
+    
+    func reset() {
+        optFunctions.removeAll()
+        instantiateFunctions.removeAll()
+    }
+}
+
 // Could also process each build timings in a threaded task.  That what CBA is doing.
 class BuildTiming: NSCopying {
-    var name = ""
+    var name = "" // duped with key of map
+    var type = ""
     var count = 0
     var duration = 0
     var durationSub = 0
     var durationSelf: Int { return max(0, duration - durationSub) }
-    var type = ""
     
     func combine(_ duration: Int, _ durationSub: Int) {
         self.duration += duration
@@ -781,6 +866,12 @@ class BuildTiming: NSCopying {
         self.count += 1
     }
     
+    func combine(_ timing: BuildTiming) {
+        self.duration += timing.duration
+        self.durationSub += timing.durationSub
+        self.count += timing.count
+    }
+    
     // This is annoying in Swift
     func copy(with zone: NSZone? = nil) -> Any {
         let copy = BuildTiming()
@@ -793,7 +884,7 @@ class BuildTiming: NSCopying {
       }
 }
 
-func updateFileBuildTimings(_ events: [CatapultEvent]) -> [String:BuildTiming] {
+func updateFileBuildTimings(_ events: [PerfettoEvent]) -> [String:BuildTiming] {
     var buildTimings: [String:BuildTiming] = [:]
     
     // DONE: would be nice to compute the self times.  This involves
@@ -802,7 +893,7 @@ func updateFileBuildTimings(_ events: [CatapultEvent]) -> [String:BuildTiming] {
     // See what CBA and Perfetto do to establish this.
     
     // Would be good to establish this nesting once and store the level
-    // with each event.d
+    // with each event.
     
     // run through each file, and build a local map of name to size count
     for i in 0..<events.count {
@@ -852,7 +943,14 @@ func postBuildTimingsReport(files: [File]) -> String? {
     if buildTimings.isEmpty { return nil }
     let buildStats = mergeFileBuildStats(files:files)
     
-    let buildJsonBase64 = generateBuildReport(buildTimings: buildTimings, buildStats: buildStats)
+    // merge the function stats
+    // TODO: could to more to highlight and crunch template strings
+    let buildFunctionTimings = BuildFunctionTimings()
+    for file in files {
+        buildFunctionTimings.combine(file.buildFunctionTimings)
+    }
+    
+    let buildJsonBase64 = generateBuildReport(buildTimings: buildTimings, buildFunctionTimings: buildFunctionTimings, buildStats: buildStats)
     let buildJS = postLoadFileJS(fileContentBase64: buildJsonBase64, title: "BuildTimings")
     return buildJS
 }
@@ -883,12 +981,13 @@ func mergeFileBuildTimings(files: [File]) -> [String:BuildTiming] {
     for file in files {
         // merge and combine duplicates
         for buildTiming in file.buildTimings {
-            if buildTimings[buildTiming.key] == nil {
-                buildTimings[buildTiming.key] = (buildTiming.value.copy() as! BuildTiming)
+            let v = buildTiming.value
+            if let bt = buildTimings[buildTiming.key] {
+                bt.combine(v.duration, v.durationSub)
             }
             else {
-                let v = buildTiming.value
-                buildTimings[buildTiming.key]!.combine(v.duration, v.durationSub)
+                // need to copy to setup name/type
+                buildTimings[buildTiming.key] = (v.copy() as! BuildTiming)
             }
         }
         // buildTimings.merge didn't work, combine src values
@@ -897,33 +996,35 @@ func mergeFileBuildTimings(files: [File]) -> [String:BuildTiming] {
     return buildTimings
 }
 
-func generateBuildReport(buildTimings: [String:BuildTiming], buildStats: BuildStats) -> String {
+func generateBuildReport(buildTimings: [String:BuildTiming], buildFunctionTimings: BuildFunctionTimings, buildStats: BuildStats) -> String {
     // now convert those timings back into a perfetto displayable report
     // So just need to build up the json above into events on tracks
-    var events: [CatapultEvent] = []
+    var events: [PerfettoEvent] = []
 
     // Also sort or assign a sort_index to the tracks.  Sort biggest to smallest.
     // Make the threadName for the track be the short filename.
     
     // add the thread names, only using 3 threads
     if true {
-        let names = ["ParseTime", "ParseCount", "ParseSelf", "OptimizeTime"]
+        let names = ["ParseTime", "ParseCount", "ParseSelf", "OptimizeTime", "InstFunc", "OptimizeFunc"]
         for i in 0..<names.count {
-            let event = CatapultEvent(tid: i+1, threadName: names[i])
+            let event = PerfettoEvent(tid: i+1, threadName: names[i])
             events.append(event)
         }
     }
     
+    // total the parse and optimization timings
     var parseTiming = 0
     var optimizeTiming = 0
     
     for t in buildTimings.values {
         let isHeader = t.type == "Source"
+        let isOptimize = t.type == "OptFunction"
         
         if isHeader {
             parseTiming += t.duration
         }
-        else {
+        else if isOptimize {
             optimizeTiming += t.duration
         }
     }
@@ -931,6 +1032,8 @@ func generateBuildReport(buildTimings: [String:BuildTiming], buildStats: BuildSt
     let parseTimingInv = 1.0 / Double(parseTiming)
     let optimizeTimingInv = 1.0 / Double(optimizeTiming)
     
+    var event = PerfettoEvent(0, "", 0)
+    
     for buildTiming in buildTimings {
         let t = buildTiming.value
         
@@ -939,11 +1042,12 @@ func generateBuildReport(buildTimings: [String:BuildTiming], buildStats: BuildSt
         
         let dur = Double(t.duration) * 1e-6
         let durSelf = Double(t.durationSelf) * 1e-6
-        var event = CatapultEvent(0, "", t.duration)
+        event.dur = t.duration
         
         // Need to see this in the name due to multiple sorts
         
         let isHeader = t.type == "Source"
+        let isOptimize = t.type == "OptFunction"
         
         // add count in seconds, so can view sorted by count below the duration above
         if isHeader {
@@ -972,7 +1076,7 @@ func generateBuildReport(buildTimings: [String:BuildTiming], buildStats: BuildSt
                 events.append(event)
             }
         }
-        else {
+        else if isOptimize {
             // for now skip small contributions
             let percent = Double(t.duration) * optimizeTimingInv
             if percent < 0.01 { continue }
@@ -985,6 +1089,52 @@ func generateBuildReport(buildTimings: [String:BuildTiming], buildStats: BuildSt
         }
     }
     
+    let doFunctionTimings = true
+    if doFunctionTimings {
+        // compute inverse timings
+        var timing = 0
+        for time in buildFunctionTimings.instantiateFunctions.values {
+            timing += time.duration
+        }
+        let timingInv = 1.0 / Double(timing)
+        event.tid = 5
+        
+        // dump the highest ones
+        for tPair in buildFunctionTimings.instantiateFunctions {
+            let duration = tPair.value.duration
+            let percent = Double(duration) * timingInv
+            if percent < 0.01 { continue }
+            
+            let dur = Double(duration) * 1e-6
+            event.name = "\(tPair.key) \(double: dur, decimals:2, zero: false)s \(tPair.value.count)x"
+            event.dur = duration
+            events.append(event)
+        }
+    }
+    
+    if doFunctionTimings {
+        // compute inverse timings
+        var timing = 0
+        for time in buildFunctionTimings.optFunctions.values {
+            timing += time.duration
+        }
+        let timingInv = 1.0 / Double(timing)
+        event.tid = 6
+        
+        // dump the highest ones
+        for tPair in buildFunctionTimings.optFunctions {
+            let duration = tPair.value.duration
+            let percent = Double(duration) * timingInv
+            if percent < 0.01 { continue }
+            
+            let dur = Double(duration) * 1e-6
+            event.name = "\(tPair.key) \(double: dur, decimals:2, zero: false)s \(tPair.value.count)x"
+            event.dur = duration
+            events.append(event)
+        }
+        
+    }
+    
     events.sort {
         // want threadnames first, could just prepend these to array?
         if $0.ph! != $1.ph! {
@@ -1010,12 +1160,12 @@ func generateBuildReport(buildTimings: [String:BuildTiming], buildStats: BuildSt
     let totalTrackEvents = convertStatsToTotalTrack(buildStats)
     events += totalTrackEvents
     
-    let catapultProfile = CatapultProfile(traceEvents: events)
+    let perfettoProfile = PerfettoProfile(traceEvents: events)
     
     do {
         // json encode, compress, and then base64 encode that
         let encoder = JSONEncoder()
-        let fileContentFixed = try encoder.encode(catapultProfile)
+        let fileContentFixed = try encoder.encode(perfettoProfile)
         
         // gzip compress the data before sending it over
         guard let compressedData = fileContentFixed.gzip() else { return "" }
@@ -1032,13 +1182,13 @@ func generateBuildReport(buildTimings: [String:BuildTiming], buildStats: BuildSt
 
 
 // TODO: Hook this up for memory traces, build more efficient array of thread events
-func sortThreadsByName(_ catapultProfile: inout CatapultProfile) {
+func sortThreadsByName(_ perfettoProfile: inout PerfettoProfile) {
     
     var threads: [Int: [Int]] = [:]
     
     // first sort each thread
-    for i in 0..<catapultProfile.traceEvents!.count {
-        let event = catapultProfile.traceEvents![i]
+    for i in 0..<perfettoProfile.traceEvents!.count {
+        let event = perfettoProfile.traceEvents![i]
         
         guard let tid = event.tid else { continue }
         if event.ts == nil || event.dur == nil { continue }
@@ -1061,8 +1211,8 @@ func sortThreadsByName(_ catapultProfile: inout CatapultProfile) {
         
         // sort each thread by name then dur
         thread.sort {
-            let lval = catapultProfile.traceEvents![$0]
-            let rval = catapultProfile.traceEvents![$1]
+            let lval = perfettoProfile.traceEvents![$0]
+            let rval = perfettoProfile.traceEvents![$1]
             
             let lname = lval.name ?? ""
             let rname = lval.name ?? ""
@@ -1077,14 +1227,14 @@ func sortThreadsByName(_ catapultProfile: inout CatapultProfile) {
         // Note this 0's them out, but could preserve min startTime
         var startTime = 0
         for i in thread {
-            catapultProfile.traceEvents![i].ts = startTime
-            startTime += catapultProfile.traceEvents![i].dur!
+            perfettoProfile.traceEvents![i].ts = startTime
+            startTime += perfettoProfile.traceEvents![i].dur!
         }
         
         // combine nodes, and store a count into the name
         // easier to mke a new array, and replace the other
         //var combineIndex = 0
-        // for i in 1..<catapultProfile.traceEvents![i]
+        // for i in 1..<perfettoProfile.traceEvents![i]
         
     }
     
@@ -1092,12 +1242,12 @@ func sortThreadsByName(_ catapultProfile: inout CatapultProfile) {
 }
 
 // these are per thread min/max for memory reports
-func updateThreadInfo(_ catapultProfile: CatapultProfile, _ file: File) {
+func updateThreadInfo(_ perfettoProfile: PerfettoProfile, _ file: File) {
     // was using Set<>, but having trouble with lookup
     var threadInfos: [Int: ThreadInfo] = [:]
     
-    for i in 0..<catapultProfile.traceEvents!.count {
-        let event = catapultProfile.traceEvents![i]
+    for i in 0..<perfettoProfile.traceEvents!.count {
+        let event = perfettoProfile.traceEvents![i]
         
         // have to have tid to associate with ThreadInfo
         guard let tid = event.tid, 
@@ -1137,7 +1287,7 @@ func updateThreadInfo(_ catapultProfile: CatapultProfile, _ file: File) {
     file.threadInfo = text
 }
 
-func updateDuration(_ events: [CatapultEvent]) -> Double {
+func updateDuration(_ events: [PerfettoEvent]) -> Double {
     var startTime = Int.max
     var endTime = Int.min
     
@@ -1163,14 +1313,14 @@ func updateDuration(_ events: [CatapultEvent]) -> Double {
 
 // After calling this, can compute the self time, and have the parent hierarchy to draw
 // events as a flamegraph.
-func computeEventParentsAndDurSub(_ events: inout [CatapultEvent]) {
+func computeEventParentsAndDurSub(_ events: inout [PerfettoEvent]) {
     // see CBA FindParentChildrenIndices for the adaption here
     // Clang Build Analyzer https://github.com/aras-p/ClangBuildAnalyzer
     // SPDX-License-Identifier: Unlicense
     // https://github.com/aras-p/ClangBuildAnalyzer/blob/main/src/Analysis.cpp
     
     // copy the events, going to replace this array with more data
-    //var events = catapultProfile.traceEvents!
+    //var events = perfettoProfile.traceEvents!
     
     var sortedIndices: [Int] = []
     for i in 0..<events.count {
@@ -1375,21 +1525,52 @@ func updateBuildTimingTask(_ file: File) throws {
     }
     
     let decoder = JSONDecoder()
-    let catapultProfile = try decoder.decode(CatapultProfile.self, from: json)
-    if catapultProfile.traceEvents == nil { // an array
+    let perfettoProfile = try decoder.decode(PerfettoProfile.self, from: json)
+    if perfettoProfile.traceEvents == nil { // an array
         return
     }
     
-    var events = catapultProfile.traceEvents!
+    var events = perfettoProfile.traceEvents!
     
     // demangle the OptFunction name
     for i in 0..<events.count {
         let event = events[i]
-        if event.name == "OptFunction" {
+        if event.name == "OptFunction"  {
             let detail = event.args!["detail"]!.value as! String
+            
+            // demangle worked
             if let demangledName = demangleSymbolName(detail) {
-                let symbolName = String(cString: demangledName)
+                var symbolName = String(cString: demangledName)
+                
+                // remove namespaces for readability
+                replaceFunctionNamespaces(&symbolName)
+                
+                if symbolName != detail {
+                    events[i].args!["detail"] = AnyCodable(symbolName)
+                }
+            }
+            else {
+                // couldn't demangle, so this probably won't work
+                var symbolName = detail
+                
+                // remove namespaces for readability
+                replaceFunctionNamespaces(&symbolName)
                 
+                if symbolName != detail {
+                    events[i].args!["detail"] = AnyCodable(symbolName)
+                }
+            }
+        }
+        else if event.name == "InstantiateFunction" {
+            // This is already demangled by clang before recorded
+            // so only need to replace the namespace
+            let detail = event.args!["detail"]!.value as! String
+            
+            var symbolName = detail
+            
+            replaceFunctionNamespaces(&symbolName)
+            
+            if symbolName != detail {
                 events[i].args!["detail"] = AnyCodable(symbolName)
             }
         }
@@ -1400,7 +1581,7 @@ func updateBuildTimingTask(_ file: File) throws {
         file.duration = updateDuration(events)
     }
     
-    // Do this before the names are replaced below
+    // this empty test is at the top too
     if file.buildTimings.isEmpty {
         file.buildStats = generateStatsForTotalTrack(events)
         
@@ -1409,9 +1590,31 @@ func updateBuildTimingTask(_ file: File) throws {
         
         file.buildTimings = updateFileBuildTimings(events)
     }
+    
+    // losing the events at the end of this call, they should
+    // each be unique, so don't really need a map.  But could be
+    // non-unique if strip out the template types.
+    for i in 0..<events.count {
+        let event = events[i]
+        
+        // TODO: may want to strip template args for more consolidation
+        // like that would say which templates are over instantiated
+        
+        // InstantiateFunction has deep nesting, OptFunction may not be
+        if event.parentIndex == nil || event.parentIndex! < 0 { continue }
+        let parentEvent = events[event.parentIndex!]
+        
+        if event.name == "OptFunction"  {
+            if parentEvent.name == "OptFunction" { continue }
+            file.buildFunctionTimings.combine(event)
+        }
+        else if event.name == "InstantiateFunction"  {
+            if parentEvent.name == "InstantiateFunction" { continue }
+            file.buildFunctionTimings.combine(event)
+        }
+    }
 }
 
-
 /* These are types CBA is looking at.  It's not looking at any totals
  DebugType isn't in this.
  
@@ -1475,7 +1678,7 @@ func updateBuildTimingTask(_ file: File) throws {
  */
 
 // add a single track with hierarchical totals
-func generateStatsForTotalTrack(_ events: [CatapultEvent]) -> BuildStats {
+func generateStatsForTotalTrack(_ events: [PerfettoEvent]) -> BuildStats {
     let stats = BuildStats()
     
     // useful totals to track, many more in the files
@@ -1536,22 +1739,22 @@ func generateStatsForTotalTrack(_ events: [CatapultEvent]) -> BuildStats {
     return stats
 }
 
-func convertStatsToTotalTrack(_ stats: BuildStats) -> [CatapultEvent] {
+func convertStatsToTotalTrack(_ stats: BuildStats) -> [PerfettoEvent] {
     
-    var totalEvents: [CatapultEvent] = []
+    var totalEvents: [PerfettoEvent] = []
     
     // This is really ugly, change to using class?
     
     let tid = 0
-    let trackEvent = CatapultEvent(tid: tid, threadName: "Build Totals")
+    let trackEvent = PerfettoEvent(tid: tid, threadName: "Build Totals")
     totalEvents.append(trackEvent)
     
     // This is a struct, so can modify copy and add
-    var event: CatapultEvent
+    var event: PerfettoEvent
     
-    func makeDurEvent(_ tid: Int, _ name: String, _ dur: Int, _ total: Int) -> CatapultEvent {
+    func makeDurEvent(_ tid: Int, _ name: String, _ dur: Int, _ total: Int) -> PerfettoEvent {
         let percent = 100.0 * Double(dur) / Double(total)
-        return CatapultEvent(tid, "\(double:percent, decimals:0)% \(name)", dur)
+        return PerfettoEvent(tid, "\(double:percent, decimals:0)% \(name)", dur)
     }
     let total = stats.totalExecuteCompiler
     
@@ -1638,6 +1841,14 @@ func convertStatsToTotalTrack(_ stats: BuildStats) -> [CatapultEvent] {
     return totalEvents
 }
 
+func replaceFunctionNamespaces(_ detail: inout String) {
+    // replace namespaces in the detail
+    let namespaces = ["std::", "kram::", "eastl::"]
+    for namespace in namespaces {
+        detail = detail.replacing(namespace, with:"")
+    }
+}
+
 func loadFileJS(_ file: File) -> String? {
     
     do {
@@ -1680,23 +1891,23 @@ func loadFileJS(_ file: File) -> String? {
                 // walk the file and compute the duration if we don't already have it
                 if isJson && file.duration == 0.0 {
                     let decoder = JSONDecoder()
-                    let catapultProfile = try decoder.decode(CatapultProfile.self, from: fileContent)
+                    let perfettoProfile = try decoder.decode(PerfettoProfile.self, from: fileContent)
                     
-                    if catapultProfile.traceEvents == nil {
+                    if perfettoProfile.traceEvents == nil {
                         return nil
                     }
             
-                    file.duration = updateDuration(catapultProfile.traceEvents!)
+                    file.duration = updateDuration(perfettoProfile.traceEvents!)
                     
                     // For now, just log the per-thread info
                     if file.fileType == .Memory {
-                        updateThreadInfo(catapultProfile, file)
+                        updateThreadInfo(perfettoProfile, file)
                     }
                     
-                    // This mods the catapult profile to store parentIndex and durSub
+                    // This mods the perfetto profile to store parentIndex and durSub
                     // the call has build specific code right now
                     //else if file.fileType == .Perf {
-                    //    computeEventParentsAndDurSub(&catapultProfile)
+                    //    computeEventParentsAndDurSub(&perfettoProfile)
                     //}
                 }
             }
@@ -1726,21 +1937,23 @@ func loadFileJS(_ file: File) -> String? {
             // here having to ungzip and decode just to display the content
             // have already processed the build files in an async task
             let decoder = JSONDecoder()
-            var catapultProfile = try decoder.decode(CatapultProfile.self, from: json)
+            var perfettoProfile = try decoder.decode(PerfettoProfile.self, from: json)
             
-            if catapultProfile.traceEvents == nil {
+            if perfettoProfile.traceEvents == nil {
                 return nil
             }
             
             // demangle the OptFunction name
-            for i in 0..<catapultProfile.traceEvents!.count {
-                let event = catapultProfile.traceEvents![i]
+            // This is the only name not demangled
+            // https://github.com/llvm/llvm-project/issues/45901
+            for i in 0..<perfettoProfile.traceEvents!.count {
+                let event = perfettoProfile.traceEvents![i]
                 if event.name == "OptFunction" {
                     let detail = event.args!["detail"]!.value as! String
                     if let demangledName = demangleSymbolName(detail) {
                         let symbolName = String(cString: demangledName)
                         
-                        catapultProfile.traceEvents![i].args!["detail"] = AnyCodable(symbolName)
+                        perfettoProfile.traceEvents![i].args!["detail"] = AnyCodable(symbolName)
                     }
                 }
             }
@@ -1748,8 +1961,8 @@ func loadFileJS(_ file: File) -> String? {
             // Replace Source with actual file name on Clang.json files
             // That's just for the parse phase, probably need for optimization
             // phase too.
-            for i in 0..<catapultProfile.traceEvents!.count {
-                let event = catapultProfile.traceEvents![i]
+            for i in 0..<perfettoProfile.traceEvents!.count {
+                let event = perfettoProfile.traceEvents![i]
                 if  event.name == "Source" ||
                         event.name == "OptModule"
                 {
@@ -1759,7 +1972,7 @@ func loadFileJS(_ file: File) -> String? {
                     
                     // stupid immutable struct.  Makes this code untempable
                     // maybe can use class instead of struct?
-                    catapultProfile.traceEvents![i].name = url.lastPathComponent
+                    perfettoProfile.traceEvents![i].name = url.lastPathComponent
                 }
                 else if event.name == "InstantiateFunction" ||
                             event.name == "CodeGen Function" ||
@@ -1776,28 +1989,25 @@ func loadFileJS(_ file: File) -> String? {
                     var detail = event.args!["detail"]!.value as! String
                     
                     // replace namespaces in the detail
-                    let namespaces = ["std::", "kram::", "eastl::"]
-                    for namespace in namespaces {
-                        detail = detail.replacing(namespace, with:"")
-                    }
+                    replaceFunctionNamespaces(&detail)
                     
-                    catapultProfile.traceEvents![i].name = detail
+                    perfettoProfile.traceEvents![i].name = detail
                 }
                 
                 // knock out the pid.  There are "M" events setting the process_name
                 // Otherwise, display will collapse pid sections since totalTrack has no pid
-                catapultProfile.traceEvents![i].pid = nil
+                perfettoProfile.traceEvents![i].pid = nil
             }
             
             if file.buildStats != nil {
                 let totalEvents = convertStatsToTotalTrack(file.buildStats!)
                 
                 // combine these onto the end, could remove the individual tracks storing these
-                catapultProfile.traceEvents! += totalEvents
+                perfettoProfile.traceEvents! += totalEvents
             }
             
             let encoder = JSONEncoder()
-            let fileContentFixed = try encoder.encode(catapultProfile)
+            let fileContentFixed = try encoder.encode(perfettoProfile)
             
             // gzip compress the data before sending it over
             guard let compressedData = fileContentFixed.gzip() else { return nil }

From 70d8323cee630270d51c10203b4dde68aae9dd4f Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 31 Mar 2024 19:25:35 -0700
Subject: [PATCH 673/901] kram-profile - collapsed template name support

---
 kram-profile/CBA/CBA.h                        |  1 -
 kram-profile/CBA/CBA.mm                       |  3 -
 kram-profile/Source/KramZipHelper.cpp         | 66 +++++++++++++
 kram-profile/Source/KramZipHelperW.h          |  4 +
 .../kram-profile/kram_profileApp.swift        | 92 ++++++++++++-------
 5 files changed, 130 insertions(+), 36 deletions(-)

diff --git a/kram-profile/CBA/CBA.h b/kram-profile/CBA/CBA.h
index 3a141d2a..d8d48bcb 100644
--- a/kram-profile/CBA/CBA.h
+++ b/kram-profile/CBA/CBA.h
@@ -1,6 +1,5 @@
 #import "Foundation/Foundation.h"
 
-// TODO: move to header
 @interface CBA : NSObject
 
 - (_Nonnull instancetype)init;
diff --git a/kram-profile/CBA/CBA.mm b/kram-profile/CBA/CBA.mm
index 8a93398a..f5f48cef 100644
--- a/kram-profile/CBA/CBA.mm
+++ b/kram-profile/CBA/CBA.mm
@@ -78,8 +78,5 @@ - (NSString* _Nonnull)analyze:(NSArray<NSString*> * _Nonnull)filenames {
     return [NSString stringWithUTF8String:out.c_str()];
 }
 
-
-
-
 @end
 
diff --git a/kram-profile/Source/KramZipHelper.cpp b/kram-profile/Source/KramZipHelper.cpp
index 4c6a54b2..5b9b18b6 100644
--- a/kram-profile/Source/KramZipHelper.cpp
+++ b/kram-profile/Source/KramZipHelper.cpp
@@ -73,6 +73,72 @@ int32_t append_sprintf(string& str, const char* format, ...)
     return len;
 }
 
+// This is extracted from CBA Analysis.cpp
+extern "C" const char* _Nullable collapseFunctionName(const char* _Nonnull name_) {
+    // Adapted from code in Analysis.  Really the only call needed from CBA.
+    // serialize to multiple threads
+    static mutex sMutex;
+    static unordered_map<string, string> sMap;
+    lock_guard<mutex> lock(sMutex);
+    
+    string elt(name_);
+    auto it = sMap.find(elt);
+    if (it != sMap.end()) {
+        return it->second.c_str();
+    }
+    
+    // Parsing op<, op<<, op>, and op>> seems hard.  Just skip'm all
+    if (strstr(name_, "operator") != nullptr)
+        return nullptr;
+
+    std::string retval;
+    retval.reserve(elt.size());
+    auto b_range = elt.begin();
+    auto e_range = elt.begin();
+    while (b_range != elt.end())
+    {
+       e_range = std::find(b_range, elt.end(), '<');
+        if (e_range == elt.end())
+            break;
+        ++e_range;
+        retval.append(b_range, e_range);
+        retval.append("$");
+        b_range = e_range;
+        int open_count = 1;
+        // find the matching close angle bracket
+        for (; b_range != elt.end(); ++b_range)
+        {
+            if (*b_range == '<')
+            {
+                ++open_count;
+                continue;
+            }
+            if (*b_range == '>')
+            {
+                if (--open_count == 0)
+                {
+                    break;
+                }
+                continue;
+            }
+        }
+        // b_range is now pointing at a close angle, or it is at the end of the string
+    }
+    if (b_range > e_range)
+    {
+       // we are in a wacky case where something like op> showed up in a mangled name.
+       // just bail.
+       // TODO: this still isn't correct, but it avoids crashes.
+       return nullptr;
+    }
+    // append the footer
+    retval.append(b_range, e_range);
+    
+    // add it to the map
+    sMap[elt] = std::move(retval);
+    
+    return sMap[elt].c_str();
+}
 
 extern "C" const char* _Nullable demangleSymbolName(const char* _Nonnull symbolName_) {
     // serialize to multiple threads
diff --git a/kram-profile/Source/KramZipHelperW.h b/kram-profile/Source/KramZipHelperW.h
index 304ad783..dd260734 100644
--- a/kram-profile/Source/KramZipHelperW.h
+++ b/kram-profile/Source/KramZipHelperW.h
@@ -38,3 +38,7 @@ typedef struct ZipEntryW {
 // This is only needed for OptFunction and backend names
 const char* _Nullable demangleSymbolName(const char* _Nonnull symbolName_);
 
+// This is really the only call needed out of CBA
+// Convert templated code to collapsed name so get more correspondence in map.
+const char* _Nullable collapseFunctionName(const char* _Nonnull name_);
+
diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 837139bd..3de2e616 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -40,10 +40,10 @@ import UniformTypeIdentifiers
 // DONE: background process to compute buildTimings across all files
 // DONE: add a total time, and show that in the nav panel, and % of total
 //   then know for a summary what the total time spend compiling is.
-// TODO: parse instantiateFunction totals from build traces, what CBA is doing
+// DONE: parse instantiateFunction totals from build traces, what CBA is doing
 //   avoid InstatiateClass since it's a child
-// TODO: parse optFunction totals from build traces, what CBA is doing
-// TODO: duration isn't updating properly when doing Reload on loose files, but thought this fixed
+// DONE: parse optFunction totals from build traces, what CBA is doing
+// TODO: duration may not updating properly when doing Reload on loose files, but thought this fixed
 // TODO: add children of each archive, so those show in the list and can collapse
 
 // Perf traces
@@ -816,11 +816,21 @@ class BuildFunctionTimings {
         }
     }
     
-    func combine(_ timings: BuildFunctionTimings) {
+    func combine(_ timings: BuildFunctionTimings, _ collapseNames: Bool = false) {
         for pair in timings.optFunctions {
-            let detail = pair.key
+            var detail = pair.key
             let timing = pair.value
             
+            // go out to CBA to collapse the names
+            if collapseNames {
+                // skip non-templates
+                if detail.firstIndex(of: "<") == nil { continue }
+                
+                if let newDetail = collapseFunctionName(detail) {
+                    detail = String(cString: newDetail)
+                }
+            }
+            
             if let f = optFunctions[detail] {
                 f.combine(timing)
             }
@@ -831,9 +841,19 @@ class BuildFunctionTimings {
             }
         }
         for pair in timings.instantiateFunctions {
-            let detail = pair.key
+            var detail = pair.key
             let timing = pair.value
             
+            // go out to CBA to collapse the names
+            if collapseNames {
+                // skip non-templates
+                if detail.firstIndex(of: "<") == nil { continue }
+                    
+                if let newDetail = collapseFunctionName(detail) {
+                    detail = String(cString: newDetail)
+                }
+            }
+            
             if let f = instantiateFunctions[detail] {
                 f.combine(timing)
             }
@@ -950,7 +970,16 @@ func postBuildTimingsReport(files: [File]) -> String? {
         buildFunctionTimings.combine(file.buildFunctionTimings)
     }
     
-    let buildJsonBase64 = generateBuildReport(buildTimings: buildTimings, buildFunctionTimings: buildFunctionTimings, buildStats: buildStats)
+    // Compute more consolidation by collapsing names
+    let buildTemplateFunctionTimings = BuildFunctionTimings()
+    buildTemplateFunctionTimings.combine(buildFunctionTimings, true)
+    
+    let buildJsonBase64 = generateBuildReport(
+        buildTimings: buildTimings,
+        buildFunctionTimings: buildFunctionTimings,
+        buildTemplateFunctionTimings: buildTemplateFunctionTimings,
+        buildStats: buildStats)
+    
     let buildJS = postLoadFileJS(fileContentBase64: buildJsonBase64, title: "BuildTimings")
     return buildJS
 }
@@ -996,7 +1025,11 @@ func mergeFileBuildTimings(files: [File]) -> [String:BuildTiming] {
     return buildTimings
 }
 
-func generateBuildReport(buildTimings: [String:BuildTiming], buildFunctionTimings: BuildFunctionTimings, buildStats: BuildStats) -> String {
+func generateBuildReport(buildTimings: [String:BuildTiming], 
+                         buildFunctionTimings: BuildFunctionTimings,
+                         buildTemplateFunctionTimings: BuildFunctionTimings,
+                         buildStats: BuildStats) -> String
+{
     // now convert those timings back into a perfetto displayable report
     // So just need to build up the json above into events on tracks
     var events: [PerfettoEvent] = []
@@ -1006,7 +1039,11 @@ func generateBuildReport(buildTimings: [String:BuildTiming], buildFunctionTiming
     
     // add the thread names, only using 3 threads
     if true {
-        let names = ["ParseTime", "ParseCount", "ParseSelf", "OptimizeTime", "InstFunc", "OptimizeFunc"]
+        let names = ["ParseTime", "ParseCount", "ParseSelf", 
+                     "OptimizeTime",
+                     "InstFunc", "OptimizeFunc",
+                     "InstTplFunc", "OptimizeTplFunc"
+        ]
         for i in 0..<names.count {
             let event = PerfettoEvent(tid: i+1, threadName: names[i])
             events.append(event)
@@ -1089,18 +1126,16 @@ func generateBuildReport(buildTimings: [String:BuildTiming], buildFunctionTiming
         }
     }
     
-    let doFunctionTimings = true
-    if doFunctionTimings {
+    func printTimings(_ functions: [String:BuildFunctionTiming], _ event: inout PerfettoEvent, _ events: inout [PerfettoEvent]) {
         // compute inverse timings
         var timing = 0
-        for time in buildFunctionTimings.instantiateFunctions.values {
+        for time in functions.values {
             timing += time.duration
         }
         let timingInv = 1.0 / Double(timing)
-        event.tid = 5
         
-        // dump the highest ones
-        for tPair in buildFunctionTimings.instantiateFunctions {
+        // dump the highest duration
+        for tPair in functions{
             let duration = tPair.value.duration
             let percent = Double(duration) * timingInv
             if percent < 0.01 { continue }
@@ -1112,27 +1147,20 @@ func generateBuildReport(buildTimings: [String:BuildTiming], buildFunctionTiming
         }
     }
     
+    // TODO: may also need to dump the highest counts
+    let doFunctionTimings = true
     if doFunctionTimings {
-        // compute inverse timings
-        var timing = 0
-        for time in buildFunctionTimings.optFunctions.values {
-            timing += time.duration
-        }
-        let timingInv = 1.0 / Double(timing)
+        event.tid = 5
+        printTimings(buildFunctionTimings.instantiateFunctions, &event, &events)
+        
         event.tid = 6
+        printTimings(buildFunctionTimings.optFunctions, &event, &events)
         
-        // dump the highest ones
-        for tPair in buildFunctionTimings.optFunctions {
-            let duration = tPair.value.duration
-            let percent = Double(duration) * timingInv
-            if percent < 0.01 { continue }
-            
-            let dur = Double(duration) * 1e-6
-            event.name = "\(tPair.key) \(double: dur, decimals:2, zero: false)s \(tPair.value.count)x"
-            event.dur = duration
-            events.append(event)
-        }
+        event.tid = 7
+        printTimings(buildTemplateFunctionTimings.instantiateFunctions, &event, &events)
         
+        event.tid = 8
+        printTimings(buildTemplateFunctionTimings.optFunctions, &event, &events)
     }
     
     events.sort {

From 84a9fe26d26bf0c08ef6b4cece34525a1a5793b3 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 1 Apr 2024 08:38:51 -0700
Subject: [PATCH 674/901] kram-profile - add template count

---
 .../kram-profile/kram_profileApp.swift        | 23 +++++++++++++++----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 3de2e616..1dd270de 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -1042,7 +1042,7 @@ func generateBuildReport(buildTimings: [String:BuildTiming],
         let names = ["ParseTime", "ParseCount", "ParseSelf", 
                      "OptimizeTime",
                      "InstFunc", "OptimizeFunc",
-                     "InstTplFunc", "OptimizeTplFunc"
+                     "InstTplFunc", "InstTplCount", "OptimizeTplFunc"
         ]
         for i in 0..<names.count {
             let event = PerfettoEvent(tid: i+1, threadName: names[i])
@@ -1126,7 +1126,7 @@ func generateBuildReport(buildTimings: [String:BuildTiming],
         }
     }
     
-    func printTimings(_ functions: [String:BuildFunctionTiming], _ event: inout PerfettoEvent, _ events: inout [PerfettoEvent]) {
+    func printTimings(_ functions: [String:BuildFunctionTiming], _ event: inout PerfettoEvent, _ events: inout [PerfettoEvent], isCount: Bool = false ) {
         // compute inverse timings
         var timing = 0
         for time in functions.values {
@@ -1137,29 +1137,42 @@ func generateBuildReport(buildTimings: [String:BuildTiming],
         // dump the highest duration
         for tPair in functions{
             let duration = tPair.value.duration
+            let count = tPair.value.count
+            
             let percent = Double(duration) * timingInv
             if percent < 0.01 { continue }
             
             let dur = Double(duration) * 1e-6
-            event.name = "\(tPair.key) \(double: dur, decimals:2, zero: false)s \(tPair.value.count)x"
-            event.dur = duration
+            event.name = "\(tPair.key) \(double: dur, decimals:2, zero: false)s \(count)x"
+            if isCount {
+                // in 0.1s per count so they show up
+                event.dur = count * 10000
+            }
+            else {
+                event.dur = duration
+            }
             events.append(event)
         }
     }
     
-    // TODO: may also need to dump the highest counts
     let doFunctionTimings = true
     if doFunctionTimings {
+        // function (templates + regular)
         event.tid = 5
         printTimings(buildFunctionTimings.instantiateFunctions, &event, &events)
         
         event.tid = 6
         printTimings(buildFunctionTimings.optFunctions, &event, &events)
         
+        //------
+        // templates
         event.tid = 7
         printTimings(buildTemplateFunctionTimings.instantiateFunctions, &event, &events)
         
         event.tid = 8
+        printTimings(buildTemplateFunctionTimings.instantiateFunctions, &event, &events, isCount: true)
+        
+        event.tid = 9
         printTimings(buildTemplateFunctionTimings.optFunctions, &event, &events)
     }
     

From 2bac68518923ebe1a44775f6347350dd68114f43 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 19 Apr 2024 19:39:45 -0700
Subject: [PATCH 675/901] kram-profile - add TotalDebugType, don't add ._ files
 from archive

Apple stores ._Filename.trace files into zip archives with resource forks if using Finder.  Those are not .trace files, or whatever original extension is.

Some work on threading the report calcs, but not done yet.
---
 kram-profile/README.md                        | 54 ++++++++++++++-----
 kram-profile/kram-profile/File.swift          | 12 ++++-
 .../kram-profile/kram_profileApp.swift        | 44 ++++++++++++++-
 3 files changed, 94 insertions(+), 16 deletions(-)

diff --git a/kram-profile/README.md b/kram-profile/README.md
index 342c2002..ae718be3 100644
--- a/kram-profile/README.md
+++ b/kram-profile/README.md
@@ -173,7 +173,7 @@ This is a minimal version of Make.  But code must generate the Ninja file.  Cmak
 Unity builds
 -----------
 
-Not to be confused with the Unity game engine.  But unity builds combine several .cpp files into a single .cpp.  This works around problems with slow linkers, and multile template and inline code instantations.  But code and macros from one .cpp spill into the next.  To facilitate this, be careful about undeffing at the bottoms of files.  kram also uses a common namespaces across headers and source files.  This allows "using namespace" in both, and keeps the sources compartmentalized.
+Not to be confused with the Unity game engine.  But unity builds combine several .cpp files into a single .cpp.  This works around problems with slow linkers, and multiple template and inline code instantations.  But code and macros from one .cpp spill into the next.  To avoid this, be careful about undeffing at the bottoms of files.  kram also uses a common namespaces across headers and source files.  This allows "using namespace" in both, and keeps the namespaces compartmentalized.
 
 Precompiled headers (PCH)
 -----------
@@ -184,39 +184,67 @@ pch spread headers into files.  So the build can break if some don't use it, or
 
 There are broken examples of setting up pch for Makefiles all over the internet.  Maybe cmake has a valid setup, but the jist is below for gcc/clang.  Make sure to verify the parse time is gone in kram-profile by looking at the clang build profiles.
 
+Clang has options to generate a pch .o file.  This must be linked separately into the library.  This is something MSVC pch support for a long time.  gcc doesn't support this.  See the link below, and the pchObj in the makefile example below.
+
+Advanced clang pch usage
+https://maskray.me/blog/2023-07-16-precompiled-headers
+
+
     # gen the .d file, written to tmp and only replaces if it changes
-    cppFlags = ... -MMD -MP (or -MD)
+    cppFlags = ... 
+    
+    cppDepFlags = -MMD -MP (or -MD)
 
+    # header must be unique to build (f.e. defines, etc)
+    cppBuild = $(platform)($config)
+    
     # setup the files involved, only get 1 pch per DLL/App since
     pchSrc = Precompile.h
-    pchHdr = Precompile-$(platform)($config).h
+    pchHdrSrc = Precompile-$(cppBuild).h
     pchDeps = $(pchHdr).d
-    pchObj = $(pchHdr).gch
+    pchHdr = $(pchHdrSrc).pch
+    pchObj = $(pchHdr).o
     pchIncludesDirs = -Idir1 -Idir2
-            
+    
+    # this does code gen, templates, and debuginfo into the h.pch.o file
+    pchFlags = -fpch-codegen -fpch-instantiate-templates -fpch-debuginfo
+             
     # important - only copy the hdr if it changes, don't want full rebuild every time
-    $(pchHdr): $(pchSrc)
-        $cp $< $@
+    # linux (cp -u), win (xcopy), macOS (shell compare then cp)
+    $(pchHdrSrc): $(pchSrc)
+        cp $< $@
         
     # this will output the .d and .gch file
-    $(pchObj): $(pchHdr)
-        clang++ -x c++header $(cppFlags) -c $< -o $@ -$(pchIncludesDirs)
+    $(pchHdr): $(pchHdrSrc)
+        clang++ -x c++header $(cppFlags) $(cppDepFlags) $(pchFlags) $(pchIncludesDirs) -c $< -o $@ 
         
     # this makes sure that the pch is rebuilt if hdrs within pchHdr changee
     # the - sign ignores the deps file on the first run where it does not exist.
     $(pchDeps): ;
     -include $(pchDeps)
     
+    # optional code to build .o from .pch 
+    # must link this in with the lib/exe, don't use "-x c++" here - it's ast not C++ code
+    #  speeds the build, since code isn't prepended to each .o file, and then linked.
+    $(pchObj): $(pchHdr)
+        clang++ $(cppFlags) -c $< -o $@
+    
     ....
     
-    # force include Precompile.h, 
-    # and then use the pch obj to avoid parsing (appends to top of .o)
-    cppPchFlags = -include $(pchHdr) -include-pch $(pchObj))
+    # prefix Precompile.h.pch to each .o file
+    cppPchFlags = -include-pch $(pchHdr)
    
     # now build the files
     *.cpp: ... $(pchHdr)
-        clang++ $(cppFlags) -c $< -o $@ $(cppPchFlags)
+        clang++ $(cppFlags) $(cppPchFlags) -c $< -o $@ 
 
+    # link the pchObj into the lib or ese
+    allObjs = *.o $(pchObj)
+
+    $(libOrExe): $(allObjs)
+        clang++ $< -o $@
+        
+        
 SIMD
 -----------
 
diff --git a/kram-profile/kram-profile/File.swift b/kram-profile/kram-profile/File.swift
index e3b9b6f3..db9d4ac2 100644
--- a/kram-profile/kram-profile/File.swift
+++ b/kram-profile/kram-profile/File.swift
@@ -33,6 +33,7 @@ class BuildStats {
     var totalInstantiateFunction = 0
     var totalInstantiateClass = 0
     var totalCodeGenFunction = 0
+    var totalDebugType = 0
     
     var totalBackend = 0
     var totalOptimizer = 0
@@ -47,6 +48,7 @@ class BuildStats {
         totalInstantiateFunction += rhs.totalInstantiateFunction
         totalInstantiateClass += rhs.totalInstantiateClass
         totalCodeGenFunction += rhs.totalCodeGenFunction
+        totalDebugType += rhs.totalDebugType
         
         totalBackend += rhs.totalBackend
         totalOptimizer += rhs.totalOptimizer
@@ -65,6 +67,7 @@ class BuildStats {
         totalInstantiateFunction /= s
         totalInstantiateClass /= s
         totalCodeGenFunction /= s
+        totalDebugType /= s
         
         totalBackend /= s
         totalOptimizer /= s
@@ -460,10 +463,17 @@ func loadFileContent(_ file: File) -> Data {
 
 func isSupportedFilename(_ url: URL) -> Bool {
     let ext = url.pathExtension
-    
+
     // what ext does trace.zip, or trace.gz come in as ?
     // should this limit compressed files to the names supported below
     
+    // Apple and Microsoft store resource fork data in "._Filename.trace" files
+    // so need to ignore these in the lists.  These don't occur from CLI zip,
+    // only from using Finder "Compress"
+    if url.lastPathComponent.starts(with: "._") {
+        return false
+    }
+    
     if ext == "gz" {
         return true
     }
diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index 1dd270de..f75fdc49 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -1522,10 +1522,34 @@ func updateBuildTimingsTask(_ files: [File]) {
     
     if counter == 0 { return }
     
+    #if false
+    
+    
+        let backgroundTaskGroup = await withTaskGroup(of: Void.self) { group in
+            for file in files {
+                if file.fileType == .Build {
+                    _ = group.addTaskUnlessCancelled {
+                        guard Task.isCancelled == false else { return }
+                              
+                        do {
+                            try await updateBuildTimingTask(file)
+                        }
+                        catch {
+                            log.error(error.localizedDescription)
+                        }
+                        
+                    }
+                }
+            }
+        }
+    
+    #else
+    
     let _ = Task(priority: .medium, operation: {
         let timer = Timer()
         
         for file in files {
+                     
             if file.fileType == .Build {
                 do {
                     try updateBuildTimingTask(file)
@@ -1539,9 +1563,10 @@ func updateBuildTimingsTask(_ files: [File]) {
         timer.stop()
         log.info("finished updating build timings in \(double:timer.timeElapsed(), decimals:3)s")
     })
+    #endif
 }
 
-func updateBuildTimingTask(_ file: File) throws {
+func updateBuildTimingTask(_ file: File) /*async */ throws {
     assert(file.fileType == .Build)
     
     // only checking this, and not duration == 0
@@ -1751,6 +1776,9 @@ func generateStatsForTotalTrack(_ events: [PerfettoEvent]) -> BuildStats {
         else if event.name == "Total CodeGen Function" {
             stats.totalCodeGenFunction = event.dur!
         }
+        else if event.name == "Total DebugType" {
+            stats.totalDebugType = event.dur!
+        }
         
         // backend
         else if event.name == "Total Backend" {
@@ -1829,7 +1857,6 @@ func convertStatsToTotalTrack(_ stats: BuildStats) -> [PerfettoEvent] {
     event.ts = stats.frontendStart + stats.totalSource
     totalEvents.append(event)
     
-    
     // This total can exceed when backend start, so clamp it too
     let tsCodeGenFunction = stats.frontendStart + stats.totalSource + stats.totalInstantiateFunction
     
@@ -1842,6 +1869,19 @@ func convertStatsToTotalTrack(_ stats: BuildStats) -> [PerfettoEvent] {
     event.ts = tsCodeGenFunction
     totalEvents.append(event)
     
+    
+    // can gen a lot of debug types, and clamp to backend
+    let tsDebugType = tsCodeGenFunction + totalCodeGenFunction
+     
+    var totalDebugType = stats.totalDebugType
+    if totalDebugType + totalDebugType > stats.backendStart {
+        totalDebugType = stats.backendStart - tsDebugType
+    }
+     
+    event = makeDurEvent(tid, "Debug", totalDebugType, total)
+    event.ts = tsDebugType
+    totalEvents.append(event)
+    
     // backend
     event = makeDurEvent(tid, "Backend", stats.totalBackend, total)
     event.ts = stats.backendStart

From d2b12f3ceeec716f0c45925b9befe8569b83b3d3 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 23 Apr 2024 09:08:18 -0700
Subject: [PATCH 676/901] kram - speedup kram (AVX -> AVX2, enable F16C, move
 to faster simd ops for sqrt/div)

Bury the f16c ops in .cpp files, and then set a per file -mf16c in Xcode project on the two files using it.  That way get SIMD conversions.  All AVX2 machines have this, and Apple dropped support for AVX in macOS 13 which is the current minspec.  Eventually macOS Intel will be frozen and disappear.

Bury the F16C ops in astcenc.  Otherwise every file would have to have -mf16c set.  Xcode will produce warnings on the arm build about ignoring this directive, but otherwise I need to add an xcconfig file that conditionally sets that based on the arch.

Fabian G said that the div_ps and sqrt_ps ops are faster than trying to use the rsqrt/recip calls these days.  div has it's own hw, and takes around 3 cycles (11 end to end).  And these don't vary in precision across platforms.  I guess Neon recip/rsqrt are 8-bit, and Intel are 12-bit.  So it's time to do these ops better.
---
 build2/kram.xcodeproj/project.pbxproj         |  31 ++++-
 libkram/CMakeLists.txt                        |   8 +-
 libkram/astc-encoder/astcenc_mathlib.cpp      |  73 +++++++++++
 libkram/astc-encoder/astcenc_mathlib.h        | 113 +++++++++---------
 .../astc-encoder/astcenc_vecmathlib_sse_4.h   |  54 +--------
 libkram/json11/json11.cpp                     |   4 +
 libkram/kram/KramConfig.h                     |  64 +---------
 libkram/kram/float4a.cpp                      |  75 ++++++++++++
 libkram/kram/float4a.h                        |  11 +-
 9 files changed, 251 insertions(+), 182 deletions(-)

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index f0fc688e..aeb0a45b 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -49,7 +49,7 @@
 		706EEFB226D1595D001C950E /* KramLog.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2826D1583F001C950E /* KramLog.cpp */; };
 		706EEFB326D1595D001C950E /* KramSDFMipper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2B26D1583F001C950E /* KramSDFMipper.cpp */; };
 		706EEFB426D1595D001C950E /* KramMmapHelper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2C26D1583F001C950E /* KramMmapHelper.cpp */; };
-		706EEFB526D1595D001C950E /* float4a.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2F26D1583F001C950E /* float4a.cpp */; };
+		706EEFB526D1595D001C950E /* float4a.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2F26D1583F001C950E /* float4a.cpp */; settings = {COMPILER_FLAGS = "-mf16c"; }; };
 		706EEFB626D1595D001C950E /* Kram.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE3526D1583F001C950E /* Kram.cpp */; };
 		706EEFB726D1595D001C950E /* squish.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE3D26D1583F001C950E /* squish.cpp */; };
 		706EEFB826D1595D001C950E /* colourset.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE3E26D1583F001C950E /* colourset.cpp */; };
@@ -302,7 +302,7 @@
 		70871DDE27DDDBCD00D0B9E1 /* astcenc_vecmathlib_sse_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DB127DDDBCC00D0B9E1 /* astcenc_vecmathlib_sse_4.h */; };
 		70871DDF27DDDBCD00D0B9E1 /* astcenc_mathlib_softfloat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB227DDDBCC00D0B9E1 /* astcenc_mathlib_softfloat.cpp */; };
 		70871DE027DDDBCD00D0B9E1 /* astcenc_mathlib_softfloat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB227DDDBCC00D0B9E1 /* astcenc_mathlib_softfloat.cpp */; };
-		70871DE127DDDBCD00D0B9E1 /* astcenc_mathlib.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB327DDDBCC00D0B9E1 /* astcenc_mathlib.cpp */; };
+		70871DE127DDDBCD00D0B9E1 /* astcenc_mathlib.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB327DDDBCC00D0B9E1 /* astcenc_mathlib.cpp */; settings = {COMPILER_FLAGS = "-mf16c"; }; };
 		70871DE227DDDBCD00D0B9E1 /* astcenc_mathlib.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB327DDDBCC00D0B9E1 /* astcenc_mathlib.cpp */; };
 		70871DE327DDDBCD00D0B9E1 /* astcenc_decompress_symbolic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB427DDDBCC00D0B9E1 /* astcenc_decompress_symbolic.cpp */; };
 		70871DE427DDDBCD00D0B9E1 /* astcenc_decompress_symbolic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB427DDDBCC00D0B9E1 /* astcenc_decompress_symbolic.cpp */; };
@@ -2108,9 +2108,20 @@
 			buildSettings = {
 				CLANG_WARN_OBJC_EXPLICIT_OWNERSHIP_TYPE = YES;
 				CLANG_WARN_OBJC_REPEATED_USE_OF_WEAK = YES;
-				CLANG_X86_VECTOR_INSTRUCTIONS = avx;
+				CLANG_X86_VECTOR_INSTRUCTIONS = avx2;
 				CODE_SIGN_STYLE = Automatic;
 				EXECUTABLE_PREFIX = lib;
+				OTHER_CFLAGS = (
+					"-DCOMPILE_ASTCENC=1",
+					"-DCOMPILE_ATE=1",
+					"-DCOMPILE_ETCENC=1",
+					"-DCOMPILE_SQUISH=1",
+					"-DCOMPILE_BCENC=1",
+					"-DCOMPILE_COMP=1",
+					"-DCOMPILE_BASIS=0",
+					"-DCOMPILE_EASTL=0",
+					"-ftime-trace",
+				);
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SKIP_INSTALL = YES;
 				SYSTEM_HEADER_SEARCH_PATHS = "";
@@ -2122,9 +2133,21 @@
 			buildSettings = {
 				CLANG_WARN_OBJC_EXPLICIT_OWNERSHIP_TYPE = YES;
 				CLANG_WARN_OBJC_REPEATED_USE_OF_WEAK = YES;
-				CLANG_X86_VECTOR_INSTRUCTIONS = avx;
+				CLANG_X86_VECTOR_INSTRUCTIONS = avx2;
 				CODE_SIGN_STYLE = Automatic;
 				EXECUTABLE_PREFIX = lib;
+				OTHER_CFLAGS = (
+					"-DNDEBUG=1",
+					"-DCOMPILE_ASTCENC=1",
+					"-DCOMPILE_ATE=1",
+					"-DCOMPILE_ETCENC=1",
+					"-DCOMPILE_SQUISH=1",
+					"-DCOMPILE_BCENC=1",
+					"-DCOMPILE_COMP=1",
+					"-DCOMPILE_BASIS=0",
+					"-DCOMPILE_EASTL=0",
+					"-ftime-trace",
+				);
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SKIP_INSTALL = YES;
 				SYSTEM_HEADER_SEARCH_PATHS = "";
diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index f4fc5edb..cd09a449 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -226,7 +226,7 @@ if (BUILD_MAC)
         XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++"
 
         # avx1 (ignored by universal?)
-        XCODE_ATTRIBUTE_CLANG_X86_VECTOR_INSTRUCTIONS "avx"
+        XCODE_ATTRIBUTE_CLANG_X86_VECTOR_INSTRUCTIONS "avx2"
         
         # turn off exceptions/rtti
         XCODE_ATTRIBUTE_GCC_ENABLE_CPP_EXCEPTIONS NO
@@ -236,7 +236,8 @@ if (BUILD_MAC)
         XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC YES
     )
 
-    target_compile_options(${myTargetLib} PRIVATE -W -Wall)
+    # Enable all warnings, and also enable f16c sims op
+    target_compile_options(${myTargetLib} PRIVATE -W -Wall -mf16c)
 
     # TODO: switch to pch setup (KramConfig.pch)
     # this is already done by pch for libkram, but other projects need the force include inherited
@@ -247,6 +248,7 @@ elseif (BUILD_WIN)
     
     # TODO: switch to add_target_definitions
     # TODO: turn on C++17
+    # TODO: switch to clang
 
     # to turn off exceptions/rtti use /GR and /EHsc replacement
     string(REGEX REPLACE "/GR" "/GR-" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
@@ -257,7 +259,7 @@ elseif (BUILD_WIN)
     target_compile_options(${myTargetLib} PUBLIC /FIKramConfig.h)
        
     # all warnings, AVX, and multiprocess compiles, clang needs mf16c or -mavx2
-    target_compile_options(${myTargetLib} PRIVATE  /W3 /arch:AVX -mf16c /MP)
+    target_compile_options(${myTargetLib} PRIVATE  /W3 /arch:AVX2 -mf16c /MP)
     
     # fix STL (don't use -D here, will remove)
     target_compile_definitions(${myTargetLib} PRIVATE _ITERATOR_DEBUG_LEVEL=0 _HAS_EXCEPTIONS=0)
diff --git a/libkram/astc-encoder/astcenc_mathlib.cpp b/libkram/astc-encoder/astcenc_mathlib.cpp
index f276ac7e..e53331fe 100644
--- a/libkram/astc-encoder/astcenc_mathlib.cpp
+++ b/libkram/astc-encoder/astcenc_mathlib.cpp
@@ -46,3 +46,76 @@ uint64_t astc::rand(uint64_t state[2])
 	state[1] = rotl(s1, 37);
 	return res;
 }
+
+#if USE_SSE
+
+/* ============================================================================
+  Softfloat library with fp32 and fp16 conversion functionality.
+============================================================================ */
+//#if (ASTCENC_F16C == 0) && (ASTCENC_NEON == 0)
+//    /* narrowing float->float conversions */
+//    uint16_t float_to_sf16(float val);
+//    float sf16_to_float(uint16_t val);
+//#endif
+
+vint4 float_to_float16(vfloat4 a)
+{
+//#if ASTCENC_F16C >= 1
+    __m128i packedf16 = _mm_cvtps_ph(a.m, 0);
+    __m128i f16 = _mm_cvtepu16_epi32(packedf16);
+    return vint4(f16);
+//#else
+//    return vint4(
+//        float_to_sf16(a.lane<0>()),
+//        float_to_sf16(a.lane<1>()),
+//        float_to_sf16(a.lane<2>()),
+//        float_to_sf16(a.lane<3>()));
+//#endif
+}
+
+/**
+ * @brief Return a float16 value for a float scalar, using round-to-nearest.
+ */
+uint16_t float_to_float16(float a)
+{
+//#if ASTCENC_F16C >= 1
+    __m128i f16 = _mm_cvtps_ph(_mm_set1_ps(a), 0);
+    return  static_cast<uint16_t>(_mm_cvtsi128_si32(f16));
+//#else
+//    return float_to_sf16(a);
+//#endif
+}
+
+/**
+ * @brief Return a float value for a float16 vector.
+ */
+vfloat4 float16_to_float(vint4 a)
+{
+//#if ASTCENC_F16C >= 1
+    __m128i packed = _mm_packs_epi32(a.m, a.m);
+    __m128 f32 = _mm_cvtph_ps(packed);
+    return vfloat4(f32);
+//#else
+//    return vfloat4(
+//        sf16_to_float(a.lane<0>()),
+//        sf16_to_float(a.lane<1>()),
+//        sf16_to_float(a.lane<2>()),
+//        sf16_to_float(a.lane<3>()));
+//#endif
+}
+
+/**
+ * @brief Return a float value for a float16 scalar.
+ */
+float float16_to_float(uint16_t a)
+{
+//#if ASTCENC_F16C >= 1
+    __m128i packed = _mm_set1_epi16(a);
+    __m128 f32 = _mm_cvtph_ps(packed);
+    return _mm_cvtss_f32(f32);
+//#else
+//    return sf16_to_float(a);
+//#endif
+}
+
+#endif
diff --git a/libkram/astc-encoder/astcenc_mathlib.h b/libkram/astc-encoder/astcenc_mathlib.h
index 67e989e7..74dc2c6a 100644
--- a/libkram/astc-encoder/astcenc_mathlib.h
+++ b/libkram/astc-encoder/astcenc_mathlib.h
@@ -27,46 +27,6 @@
 #include <cstdint>
 #include <cmath>
 
-#ifndef ASTCENC_POPCNT
-  #if defined(__POPCNT__)
-    #define ASTCENC_POPCNT 1
-  #else
-    #define ASTCENC_POPCNT 0
-  #endif
-#endif
-
-#ifndef ASTCENC_F16C
-  #if defined(__F16C__)
-    #define ASTCENC_F16C 1
-  #else
-    #define ASTCENC_F16C 0
-  #endif
-#endif
-
-#ifndef ASTCENC_SSE
-  #if defined(__SSE4_2__)
-    #define ASTCENC_SSE 42
-  #elif defined(__SSE4_1__)
-    #define ASTCENC_SSE 41
-  #elif defined(__SSE3__)
-    #define ASTCENC_SSE 30
-  #elif defined(__SSE2__)
-    #define ASTCENC_SSE 20
-  #else
-    #define ASTCENC_SSE 0
-  #endif
-#endif
-
-#ifndef ASTCENC_AVX
-  #if defined(__AVX2__)
-    #define ASTCENC_AVX 2
-  #elif defined(__AVX__)
-    #define ASTCENC_AVX 1
-  #else
-    #define ASTCENC_AVX 0
-  #endif
-#endif
-
 #ifndef ASTCENC_NEON
   #if defined(__aarch64__)
     #define ASTCENC_NEON 1
@@ -75,14 +35,66 @@
   #endif
 #endif
 
-#if ASTCENC_AVX
-  #define ASTCENC_VECALIGN 32
+#if ASTCENC_NEON
+
+    #define ASTCENC_POPCNT 0
+    #define ASTCENC_F16C 0
+    #define ASTCENC_SSE 0
+    #define ASTCENC_AVX 0
+    #define ASTCENC_VECALIGN 16
+
 #else
-  #define ASTCENC_VECALIGN 16
-#endif
 
-#if ASTCENC_SSE != 0 || ASTCENC_AVX != 0 || ASTCENC_POPCNT != 0
-	#include <immintrin.h>
+    #ifndef ASTCENC_SSE
+      #if defined(__SSE4_2__)
+        #define ASTCENC_SSE 42
+      #elif defined(__SSE4_1__)
+        #define ASTCENC_SSE 41
+      #elif defined(__SSE3__)
+        #define ASTCENC_SSE 30
+      #elif defined(__SSE2__)
+        #define ASTCENC_SSE 20
+      #else
+        #define ASTCENC_SSE 0
+      #endif
+    #endif
+
+    #ifndef ASTCENC_AVX
+      #if defined(__AVX2__)
+        #define ASTCENC_AVX 2
+      #elif defined(__AVX__)
+        #define ASTCENC_AVX 1
+      #else
+        #define ASTCENC_AVX 0
+      #endif
+    #endif
+
+    #ifndef ASTCENC_POPCNT
+      #if defined(__POPCNT__)
+        #define ASTCENC_POPCNT 1
+      #else
+        #define ASTCENC_POPCNT 0
+      #endif
+    #endif
+
+    #ifndef ASTCENC_F16C
+      // must set -mf16c only on x86_64 build, avx not enough on clang
+      #if defined(__F16C__)
+        #define ASTCENC_F16C 1
+      #else
+        #define ASTCENC_F16C 0
+      #endif
+    #endif
+
+    //#if ASTCENC_AVX
+    //  #define ASTCENC_VECALIGN 32
+    //#else
+      #define ASTCENC_VECALIGN 16
+    //#endif
+
+    #if ASTCENC_SSE != 0 || ASTCENC_AVX != 0 || ASTCENC_POPCNT != 0
+        #include <immintrin.h>
+    #endif
 #endif
 
 /* ============================================================================
@@ -418,15 +430,6 @@ uint64_t rand(uint64_t state[2]);
 
 }
 
-/* ============================================================================
-  Softfloat library with fp32 and fp16 conversion functionality.
-============================================================================ */
-#if (ASTCENC_F16C == 0) && (ASTCENC_NEON == 0)
-	/* narrowing float->float conversions */
-	uint16_t float_to_sf16(float val);
-	float sf16_to_float(uint16_t val);
-#endif
-
 /*********************************
   Vector library
 *********************************/
diff --git a/libkram/astc-encoder/astcenc_vecmathlib_sse_4.h b/libkram/astc-encoder/astcenc_vecmathlib_sse_4.h
index 868522dc..cbc165e6 100755
--- a/libkram/astc-encoder/astcenc_vecmathlib_sse_4.h
+++ b/libkram/astc-encoder/astcenc_vecmathlib_sse_4.h
@@ -941,65 +941,23 @@ ASTCENC_SIMD_INLINE vfloat4 int_to_float(vint4 a)
 /**
  * @brief Return a float16 value for a float vector, using round-to-nearest.
  */
-ASTCENC_SIMD_INLINE vint4 float_to_float16(vfloat4 a)
-{
-#if ASTCENC_F16C >= 1
-	__m128i packedf16 = _mm_cvtps_ph(a.m, 0);
-	__m128i f16 = _mm_cvtepu16_epi32(packedf16);
-	return vint4(f16);
-#else
-	return vint4(
-		float_to_sf16(a.lane<0>()),
-		float_to_sf16(a.lane<1>()),
-		float_to_sf16(a.lane<2>()),
-		float_to_sf16(a.lane<3>()));
-#endif
-}
+vint4 float_to_float16(vfloat4 a);
 
 /**
  * @brief Return a float16 value for a float scalar, using round-to-nearest.
  */
-static inline uint16_t float_to_float16(float a)
-{
-#if ASTCENC_F16C >= 1
-	__m128i f16 = _mm_cvtps_ph(_mm_set1_ps(a), 0);
-	return  static_cast<uint16_t>(_mm_cvtsi128_si32(f16));
-#else
-	return float_to_sf16(a);
-#endif
-}
+uint16_t float_to_float16(float a);
 
 /**
  * @brief Return a float value for a float16 vector.
  */
-ASTCENC_SIMD_INLINE vfloat4 float16_to_float(vint4 a)
-{
-#if ASTCENC_F16C >= 1
-	__m128i packed = _mm_packs_epi32(a.m, a.m);
-	__m128 f32 = _mm_cvtph_ps(packed);
-	return vfloat4(f32);
-#else
-	return vfloat4(
-		sf16_to_float(a.lane<0>()),
-		sf16_to_float(a.lane<1>()),
-		sf16_to_float(a.lane<2>()),
-		sf16_to_float(a.lane<3>()));
-#endif
-}
+vfloat4 float16_to_float(vint4 a);
+
 
 /**
  * @brief Return a float value for a float16 scalar.
  */
-ASTCENC_SIMD_INLINE float float16_to_float(uint16_t a)
-{
-#if ASTCENC_F16C >= 1
-	__m128i packed = _mm_set1_epi16(a);
-	__m128 f32 = _mm_cvtph_ps(packed);
-	return _mm_cvtss_f32(f32);
-#else
-	return sf16_to_float(a);
-#endif
-}
+float float16_to_float(uint16_t a);
 
 /**
  * @brief Return a float value as an integer bit pattern (i.e. no conversion).
@@ -1008,7 +966,7 @@ ASTCENC_SIMD_INLINE float float16_to_float(uint16_t a)
  * some bit hackery based on knowledge they are IEEE 754 layout, and then
  * convert them back again. This is the first half of that flip.
  */
-ASTCENC_SIMD_INLINE vint4 float_as_int(vfloat4 a)
+vint4 float_as_int(vfloat4 a)
 {
 	return vint4(_mm_castps_si128(a.m));
 }
diff --git a/libkram/json11/json11.cpp b/libkram/json11/json11.cpp
index b78fee62..2c1df8ee 100644
--- a/libkram/json11/json11.cpp
+++ b/libkram/json11/json11.cpp
@@ -174,14 +174,18 @@ void JsonWriter::pop() {
 }
 void JsonWriter::popObject() {
     KASSERT(_stack.empty());
+#if KRAM_DEBUG
     char c = _stack.back();
     KASSERT(c == '}');
+#endif
     pop();
 }
 void JsonWriter::popArray() {
     KASSERT(_stack.empty());
+#if KRAM_DEBUG
     char c = _stack.back();
     KASSERT(c == ']');
+#endif
     pop();
 }
 
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index 7432e61f..e4d32082 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -451,68 +451,8 @@ inline float4 saturate(const float4& v)
 
 #endif
 
-#if USE_FLOAT16
-
-inline float4 toFloat4(const half4& vv)
-{
-    // https://patchwork.ozlabs.org/project/gcc/patch/559BC75A.1080606@arm.com/
-    // https://gcc.gnu.org/onlinedocs/gcc-7.5.0/gcc/Half-Precision.html
-    // https://developer.arm.com/documentation/dui0491/i/Using-NEON-Support/Converting-vectors
-    return float4m((float)vv.x, (float)vv.y, (float)vv.z, (float)vv.w);
-}
-inline half4 toHalf4(const float4& vv)
-{
-    return half4((_Float16)vv.x, (_Float16)vv.y, (_Float16)vv.z, (_Float16)vv.w);
-}
-
-#elif USE_SSE
-
-// using casts instead of vv.reg, so these calls work with Apple SIMD too
-
-inline float4 toFloat4(const half4& vv)
-{
-    // https://patchwork.ozlabs.org/project/gcc/patch/559BC75A.1080606@arm.com/
-    // https://gcc.gnu.org/onlinedocs/gcc-7.5.0/gcc/Half-Precision.html
-    // https://developer.arm.com/documentation/dui0491/i/Using-NEON-Support/Converting-vectors
-    __m128i reg16 = _mm_setzero_si128();
-
-    // TODO: switch to load low 64-bits, but don't know which one _mm_cvtsi32_si128(&vv.reg); ?
-    // want 0 extend here, sse overuses int32_t when really unsigned and zero extended value
-    reg16 = _mm_insert_epi16(reg16, vv[0], 0);
-    reg16 = _mm_insert_epi16(reg16, vv[1], 1);
-    reg16 = _mm_insert_epi16(reg16, vv[2], 2);
-    reg16 = _mm_insert_epi16(reg16, vv[3], 3);
-
-    return float4(_mm_cvtph_ps(reg16));
-}
-inline half4 toHalf4(const float4& vv)
-{
-    __m128i reg16 = _mm_cvtps_ph(*(const __m128*)&vv, 0);  // 4xfp32-> 4xfp16,  round to nearest-even
-
-    // TODO: switch to store/steam, but don't know which one _mm_storeu_epi16 ?
-    half4 val;  // = 0;
-
-    // 0 extended
-    val[0] = (half)_mm_extract_epi16(reg16, 0);
-    val[1] = (half)_mm_extract_epi16(reg16, 1);
-    val[2] = (half)_mm_extract_epi16(reg16, 2);
-    val[3] = (half)_mm_extract_epi16(reg16, 3);
-    return val;
-}
-
-#elif USE_NEON
-
-// using casts intead of vv.reg, so these calls work with Apple SIMD too
-
-inline float4 toFloat4(const half4& vv)
-{
-    return float4(vcvt_f32_f16(*(const float32x4_t*)&vv));
-}
-inline half4 toHalf4(const float4& vv)
-{
-    return half4(vcvt_f16_f32(*(const float32x4_t*)&vv));
-}
-#endif
+float4 toFloat4(const half4& vv);
+half4 toHalf4(const float4& vv);
 
 }  // namespace simd
 
diff --git a/libkram/kram/float4a.cpp b/libkram/kram/float4a.cpp
index e89dba71..44d54224 100644
--- a/libkram/kram/float4a.cpp
+++ b/libkram/kram/float4a.cpp
@@ -3,3 +3,78 @@
 // in all copies or substantial portions of the Software.
 
 #include "float4a.h"
+
+// Bury these for now.  They required -mf16c for Intel to be
+// defined, and that's kind of a pain right now.
+namespace simd {
+
+
+#if 0 // USE_FLOAT16
+
+// This only works on Apple, and not on Android unless +fp16 arch there.
+// And this is likely not faster than the simd op that does this.
+float4 toFloat4(const half4& vv)
+{
+    // https://patchwork.ozlabs.org/project/gcc/patch/559BC75A.1080606@arm.com/
+    // https://gcc.gnu.org/onlinedocs/gcc-7.5.0/gcc/Half-Precision.html
+    // https://developer.arm.com/documentation/dui0491/i/Using-NEON-Support/Converting-vectors
+    return float4m((float)vv.x, (float)vv.y, (float)vv.z, (float)vv.w);
+}
+half4 toHalf4(const float4& vv)
+{
+    return half4((_Float16)vv.x, (_Float16)vv.y, (_Float16)vv.z, (_Float16)vv.w);
+}
+
+
+#elif USE_SSE
+
+// using casts instead of vv.reg, so these calls work with Apple SIMD too
+
+float4 toFloat4(const half4& vv)
+{
+    // https://patchwork.ozlabs.org/project/gcc/patch/559BC75A.1080606@arm.com/
+    // https://gcc.gnu.org/onlinedocs/gcc-7.5.0/gcc/Half-Precision.html
+    // https://developer.arm.com/documentation/dui0491/i/Using-NEON-Support/Converting-vectors
+    __m128i reg16 = _mm_setzero_si128();
+    
+    // TODO: switch to load low 64-bits, but don't know which one _mm_cvtsi32_si128(&vv.reg); ?
+    // want 0 extend here, sse overuses int32_t when really unsigned and zero extended value
+    reg16 = _mm_insert_epi16(reg16, vv[0], 0);
+    reg16 = _mm_insert_epi16(reg16, vv[1], 1);
+    reg16 = _mm_insert_epi16(reg16, vv[2], 2);
+    reg16 = _mm_insert_epi16(reg16, vv[3], 3);
+    
+    return float4(_mm_cvtph_ps(reg16));
+}
+
+half4 toHalf4(const float4& vv)
+{
+    __m128i reg16 = _mm_cvtps_ph(*(const __m128*)&vv, 0);  // 4xfp32-> 4xfp16,  round to nearest-even
+    
+    // TODO: switch to store/steam, but don't know which one _mm_storeu_epi16 ?
+    half4 val;  // = 0;
+    
+    // 0 extended
+    val[0] = (half)_mm_extract_epi16(reg16, 0);
+    val[1] = (half)_mm_extract_epi16(reg16, 1);
+    val[2] = (half)_mm_extract_epi16(reg16, 2);
+    val[3] = (half)_mm_extract_epi16(reg16, 3);
+    return val;
+}
+
+#elif USE_NEON
+
+// using casts intead of vv.reg, so these calls work with Apple SIMD too
+// Note: could just use the sse2 neon version
+
+float4 toFloat4(const half4& vv)
+{
+    return float4(vcvt_f32_f16(*(const float16x4_t*)&vv));
+}
+half4 toHalf4(const float4& vv)
+{
+    return half4(vcvt_f16_f32(*(const float32x4_t*)&vv));
+}
+#endif
+
+}
diff --git a/libkram/kram/float4a.h b/libkram/kram/float4a.h
index 85453a23..04ee5330 100644
--- a/libkram/kram/float4a.h
+++ b/libkram/kram/float4a.h
@@ -42,13 +42,6 @@ inline float32x4_t _mm_rsqrthp_ps(const float32x4_t& a)
     return _mm_mul_ps(est, stepA);
 }
 
-// sqrt
-inline float32x4_t _mm_sqrthp_ps(const float32x4_t& a)
-{
-    // sqrt(a) = a * rsqrt(a)
-    return _mm_mul_ps(_mm_rsqrthp_ps(a), a);
-}
-
 // recip
 inline float32x4_t _mm_rcphp_ps(const float32x4_t& a)
 {
@@ -67,8 +60,6 @@ inline float32x4_t _mm_rcphp_ps(const float32x4_t& a)
 #define float32x4_t __m128
 
 #define _mm_fixzero_ps(a, b) _mm_and_ps(a, _mm_cmpneq_ps(b, _mm_setzero_ps()))
-#define _mm_sqrthp_ss(a) _mm_sqrt_ss(a)
-#define _mm_sqrthp_ps(a) _mm_sqrt_ps(a)
 
 inline float32x4_t _mm_rsqrthp_ps(const float32x4_t& a)
 {
@@ -355,7 +346,7 @@ inline float4 rsqrt(const float4& vv)
 }
 inline float4 sqrt(const float4& vv)
 {
-    return float4(_mm_sqrthp_ps(vv.reg));
+    return float4(_mm_sqrt_ps(vv.reg));
 }
 
 inline float dot(const float4& lhs, const float4& rhs)

From ff4b573b4395126581e450ae08bc334fe182e4e6 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 23 Apr 2024 09:18:42 -0700
Subject: [PATCH 677/901] kram - chop out softfloat

---
 libkram/astc-encoder/astcenc_mathlib_softfloat.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libkram/astc-encoder/astcenc_mathlib_softfloat.cpp b/libkram/astc-encoder/astcenc_mathlib_softfloat.cpp
index d95fb9da..f3f7bd54 100644
--- a/libkram/astc-encoder/astcenc_mathlib_softfloat.cpp
+++ b/libkram/astc-encoder/astcenc_mathlib_softfloat.cpp
@@ -18,7 +18,8 @@
 /**
  * @brief Soft-float library for IEEE-754.
  */
-#if (ASTCENC_F16C == 0) && (ASTCENC_NEON == 0)
+// Chopped out
+#if 0 // (ASTCENC_F16C == 0) && (ASTCENC_NEON == 0)
 
 #include "astcenc_mathlib.h"
 

From 24acf3203589cea89295e4e946b3eb0834498120 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 23 Apr 2024 09:26:18 -0700
Subject: [PATCH 678/901] kram - fix astc vecmath change

---
 libkram/astc-encoder/astcenc_vecmathlib_sse_4.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libkram/astc-encoder/astcenc_vecmathlib_sse_4.h b/libkram/astc-encoder/astcenc_vecmathlib_sse_4.h
index cbc165e6..aaf5dccb 100755
--- a/libkram/astc-encoder/astcenc_vecmathlib_sse_4.h
+++ b/libkram/astc-encoder/astcenc_vecmathlib_sse_4.h
@@ -966,7 +966,7 @@ float float16_to_float(uint16_t a);
  * some bit hackery based on knowledge they are IEEE 754 layout, and then
  * convert them back again. This is the first half of that flip.
  */
-vint4 float_as_int(vfloat4 a)
+ASTCENC_SIMD_INLINE vint4 float_as_int(vfloat4 a)
 {
 	return vint4(_mm_castps_si128(a.m));
 }

From 7d258465adf3d41be9eef7669caed8354807ff3d Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 1 Jun 2024 09:14:55 -0700
Subject: [PATCH 679/901] hlslparser - update README

---
 hlslparser/README.md | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/hlslparser/README.md b/hlslparser/README.md
index e33b5ec1..3756bfac 100644
--- a/hlslparser/README.md
+++ b/hlslparser/README.md
@@ -14,12 +14,13 @@ https://github.com/unknownworlds/hlslparser
 
 ---------------------------------
 
-Paths to turn HLSL and SPV 
+Paths to turn HLSL and SPV into MSL
 
 * HLSL2021 > hlslparser > HLSL2021 > dxc > SPV  
 * HLSL2021 > hlslparser > MSL    > metal > AIR(metallib)
 *
 * Reflection: spv > spv-reflect -> refl
+*             HLSL2021 > dxc -> ?
 *
 * Transpiling MSL: HLSL2021 > dxc   > SPV > spirv-cross > MSL
 * Transpiling MSL: HLSL2021 > glslc > SPV > spirv-cross > MSL (fails on simple HLSL)
@@ -27,6 +28,9 @@ Paths to turn HLSL and SPV
 * Variant generation 
 * HLSL2021 + defines > preprocess > HLSL2021
 * HLSL2021 + specialization > hlslparser
+*
+* Note this has no shader sources in gpu capture, nor AIR files to accumulate
+* HLSL2021 -> dxc -> DXIL -> metal-shaderconverter -> metallib
 
 ---------------------------------
 
@@ -104,6 +108,12 @@ Overview
 |spirv-opt | spv optimizer |
 |spirv-cross | transpile spv to MSL, HLSL, and GLSL, but codegen has 100's of temp vars, no comments, can target specific MSL/HLSL models |
 |spirv-reflect | gens reflection data from spv file |
+|metal-shaderconverter | compile dxil to metallib |
+
+https://github.com/microsoft/DirectXShaderCompiler
+https://github.com/KhronosGroup/SPIRV-Cross
+https://developer.apple.com/metal/shader-converter/
+https://github.com/google/shaderc
 
 Dealing with Half
 ---
@@ -131,10 +141,10 @@ HLSL2021 6.2 includes full half and int support.   So that is the compilation ta
 
 * Adreno also doesn't support half storage, so this limits SSBO and UBO usage.   
 
-* macOS on M1 - Rosetta2 lacks AVX and f16c cpu support, so translated x64 apps crash. Build Apple Silicon to fix this.
+* macOS on M1 - Rosetta2 lacks AVX and f16c cpu support, so translated x64 apps crash. Build Apple Silicon to fix this.  Win on ARM emulation (Qcom X Elite) also has the same limitations.  Neon is 16 128-bit registers where AVX needs 16 256-bit registers.
 
 * Android missing cpu arm64+f16 support from Redmi Note 8 and other chips.
-  vcvt_f32_f16 is still present without this.
+  vcvt_f32_f16 is still present without this.  Do math in fp32x4, then converter to fp16x4.
   
 Dealing with Double
 ---

From 43d6d9114848dc74b38d2463eef7d016c1bacb85 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 1 Jun 2024 21:31:03 -0700
Subject: [PATCH 680/901] kram - fix win_mmap.h

This doesn't need to leak the file mapping.
TODO: move to https://github.com/shixiongfei/mmap-win32/blob/master/mmap.c
---
 libkram/kram/win_mmap.h | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/libkram/kram/win_mmap.h b/libkram/kram/win_mmap.h
index 68511b28..d05c8d11 100644
--- a/libkram/kram/win_mmap.h
+++ b/libkram/kram/win_mmap.h
@@ -83,23 +83,20 @@ static void *mmap(void *start, size_t length, int prot, int flags, int fd, off_t
     if (flags & MAP_PRIVATE)
         dwDesiredAccess |= FILE_MAP_COPY;
     void *ret = MapViewOfFile(h, dwDesiredAccess, DWORD_HI(offset), DWORD_LO(offset), length);
+   
+    // can free the file mapping, mmap will hold it
+    CloseHandle(h);
+    
     if (ret == NULL) {
-        CloseHandle(h);
         ret = MAP_FAILED;
     }
     
-    // TODO: can CreateFileMapping handle be closed here?  View will keep file open.
-    // even if the file handle (fd) is closed.  That would prevent handle leak?
-    
     return ret;
 }
 
 static void munmap(void *addr, size_t length)
 {
     UnmapViewOfFile(addr);
-    
-    // Is this a TODO?
-    /* ruh-ro, we leaked handle from CreateFileMapping() ... */
 }
 
 #undef DWORD_HI

From e7ff33f26ee2864e27ba229a365a7bd51cfb93f1 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 4 Jun 2024 22:50:18 -0700
Subject: [PATCH 681/901] kram - stop using off_t since it's only 32-bit offset
 for mmap

---
 libkram/kram/win_mmap.h | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/libkram/kram/win_mmap.h b/libkram/kram/win_mmap.h
index d05c8d11..03a6db8e 100644
--- a/libkram/kram/win_mmap.h
+++ b/libkram/kram/win_mmap.h
@@ -30,16 +30,19 @@
 #define MAP_ANON      0x20
 #define MAP_FAILED    ((void *) -1)
 
+using myoff_t = int64_t;
+
 // TODO: find out which path this takes, want 64-bit mmsp
-#ifdef __USE_FILE_OFFSET64
-# define DWORD_HI(x) ((x) >> (uint64_t)32)
-# define DWORD_LO(x) ((x) & (uint64_t)0xffffffff)
-#else
-# define DWORD_HI(x) (0)
-# define DWORD_LO(x) (x)
-#endif
+//#ifdef __USE_FILE_OFFSET64
+#define DWORD_HI(x) ((x) >> (myoff_t)32)
+#define DWORD_LO(x) ((x) & (myoff_t)0xffffffff)
+//#else
+//#define DWORD_HI(x) (0)
+//#define DWORD_LO(x) (x)
+//#endif
+
 
-static void *mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset)
+static void *mmap(void *start, size_t length, int prot, int flags, int fd, myoff_t offset)
 {
     if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC))
         return MAP_FAILED;

From b120e88c1cdd0f42ec728033452e837c26e39faf Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 4 Jun 2024 23:52:54 -0700
Subject: [PATCH 682/901] kram - use miniz with lodepng, add libCompression

Having trouble using this to decode zlib in lodepng.  miniz works fine.
---
 kramv/KramLoader.mm             |  8 ++---
 libkram/kram/KTXImage.cpp       | 22 ++++++++++++-
 libkram/kram/Kram.cpp           | 57 ++++++++++++++++++++++++++-------
 libkram/kram/KramMmapHelper.cpp |  8 +++--
 libkram/kram/KramZipHelper.cpp  |  8 +++--
 libkram/kram/win_mmap.h         |  1 +
 libkram/lodepng/lodepng.h       |  5 +--
 7 files changed, 85 insertions(+), 24 deletions(-)

diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index 8e227337..90006a09 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -362,7 +362,7 @@ - (void)createStagingBufffer:(uint64_t)dataSize
     // enough to upload 4k x 4k @ 4 bytes no mips, careful with array and cube
     // that get too big
 
-    // allocate system memory for bufffer, can memcopy to this
+    // allocate system memory for buffer, can memcpy to this
     posix_memalign((void **)&_data, getpagesize(), dataSize);
 
     // allocate memory for circular staging buffer, only need to memcpy to this
@@ -429,7 +429,7 @@ - (void)uploadTexturesIfNeeded:(id<MTLBlitCommandEncoder>)blitEncoder
         }];
     }
     
-    // mipgen after possible initial blit above
+    // mipgen possible after initial blit above
     if (_mipgenTextures.count > 0) {
         for (id<MTLTexture> texture in _mipgenTextures) {
             // autogen mips will include srgb conversions, so toggling srgb on/off
@@ -437,7 +437,7 @@ - (void)uploadTexturesIfNeeded:(id<MTLBlitCommandEncoder>)blitEncoder
             [blitEncoder generateMipmapsForTexture:texture];
         }
 
-        // reset the arra
+        // reset the array
         [_mipgenTextures removeAllObjects];
     }
 }
@@ -497,7 +497,7 @@ inline uint64_t alignOffset(uint64_t offset, uint64_t alignment)
     // upload mip levels
 
     // TODO: about aligning to 4k for base + length
-    // http://metalkit.org/2017/05/26/working-with-memory-in-metal-part-2.html
+    // http://metalkit.org/working-with-memory-in-metal/
 
     uint32_t w = image.width;
     uint32_t h = image.height;
diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index 363e9b04..aee22870 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -16,6 +16,15 @@
 // for zstd decompress
 #include "zstd.h"
 
+// Don't trust this see use in Kram.cpp with lodepng
+#ifndef USE_LIBCOMPRESSION
+#define USE_LIBCOMPRESSION 0 // (KRAM_MAC || KRAM_IOS)
+#endif
+
+#if USE_LIBCOMPRESSION
+#include <compression.h>
+#endif
+
 namespace kram {
 
 // These are props added into the KTX file props data.
@@ -1869,13 +1878,24 @@ bool KTXImage::unpackLevel(uint32_t mipNumber, const uint8_t* srcData, uint8_t*
 
             case KTX2SupercompressionZlib: {
                 // can use miniz or libCompression
+#if USE_LIBCOMPRESSION
+                // TODO: see if this is faster
+                char scratchBuffer[compression_decode_scratch_buffer_size(COMPRESSION_ZLIB)];
+                
+                size_t dstDataSizeMiniz = compression_decode_buffer(
+                    (uint8_t*)dstData, dstDataSize,
+                    (const uint8_t*)srcData, srcDataSize,
+                    scratchBuffer, // scratch-buffer that could speed up to pass
+                    COMPRESSION_ZLIB);
+#else
                 mz_ulong dstDataSizeMiniz = 0;
                 if (mz_uncompress(dstData, &dstDataSizeMiniz,
                                   srcData, srcDataSize) != MZ_OK) {
                     KLOGE("kram", "decode mip zlib failed");
                     return false;
                 }
-                if (dstDataSizeMiniz != dstDataSize) {
+#endif
+              if (dstDataSizeMiniz != dstDataSize) {
                     KLOGE("kram", "decode mip zlib size not expected");
                     return false;
                 }
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index f31e300b..cee9db7d 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -27,6 +27,16 @@
 #include "lodepng.h"
 #include "miniz.h"
 
+// This doesn't work returns 121 for a 16K decode
+// Just open the src directory
+#ifndef USE_LIBCOMPRESSION
+#define USE_LIBCOMPRESSION 0 // (KRAM_MAC || KRAM_IOS)
+#endif
+
+#if USE_LIBCOMPRESSION
+#include <compression.h>
+#endif
+
 // one .cpp must supply these new overrides
 #if USE_EASTL
 void* __cdecl operator new[](size_t size, const char* name, int flags, unsigned debugFlags, const char* file, int line)
@@ -52,9 +62,7 @@ namespace kram {
 
 using namespace NAMESPACE_STL;
 
-// lodepng iccp decode is failing when setting this for some reason, find out why
-// Must set it with LODEPNG_NO_COMPILE_ZLIB in lodepng.h if true
-static bool useMiniZ = false;
+static bool useMiniZ = true;
 
 template <typename T>
 void releaseVector(vector<T>& v)
@@ -412,12 +420,36 @@ unsigned LodepngDecompressUsingMiniz(
     const LodePNGDecompressSettings* settings)
 {
     // mz_ulong doesn't line up with size_t on Windows, but does on macOS
-    mz_ulong dstDataSizeUL = *dstDataSize;
-
-    int result = mz_uncompress(*dstData, &dstDataSizeUL,
+    KASSERT(*dstDataSize != 0);
+    
+#if USE_LIBCOMPRESSION
+    // This call can't be replaced since lodepng doesn't pass size
+    // And it doesn't take a nullable dstData?
+    char scratchBuffer[compression_decode_scratch_buffer_size(COMPRESSION_ZLIB)];
+    size_t bytesDecoded = compression_decode_buffer(
+         (uint8_t*)*dstData, *dstDataSize,
+         (const uint8_t*)srcData, srcDataSize,
+        scratchBuffer, // scratch-buffer that could speed up to pass
+         COMPRESSION_ZLIB);
+    
+    int result = MZ_OK;
+    if (bytesDecoded != *dstDataSize) {
+        result = MZ_DATA_ERROR;
+        *dstDataSize = 0;
+    }
+#else
+    // This works.
+    mz_ulong bytesDecoded = *dstDataSize;
+    int result = mz_uncompress(*dstData, &bytesDecoded,
                                srcData, srcDataSize);
-
-    *dstDataSize = dstDataSizeUL;
+    
+    if (result != MZ_OK || bytesDecoded != *dstDataSize) {
+        *dstDataSize = 0;
+    }
+    else {
+        *dstDataSize = bytesDecoded;
+    }
+#endif
 
     return result;
 }
@@ -428,14 +460,17 @@ unsigned LodepngCompressUsingMiniz(
     const unsigned char* srcData, size_t srcDataSize,
     const LodePNGCompressSettings* settings)
 {
+    // TODO: no setting for compression level in settings?
+    // TODO: libCompression can only encode zlib to quality 5
+    
     // mz_ulong doesn't line up with size_t on Windows, but does on macOS
     mz_ulong dstDataSizeUL = *dstDataSize;
 
-    int result = mz_compress(*dstData, &dstDataSizeUL,
-                               srcData, srcDataSize);
+    int result = mz_compress2(*dstData, &dstDataSizeUL,
+                               srcData, srcDataSize, MZ_DEFAULT_COMPRESSION);
 
     *dstDataSize = dstDataSizeUL;
-
+    
     return result;
 }
 
diff --git a/libkram/kram/KramMmapHelper.cpp b/libkram/kram/KramMmapHelper.cpp
index fa3b4a4b..63c4b55b 100644
--- a/libkram/kram/KramMmapHelper.cpp
+++ b/libkram/kram/KramMmapHelper.cpp
@@ -4,7 +4,9 @@
 
 #include "KramMmapHelper.h"
 
-// here's how to mmmap data, but NSData may have another way
+// here's how to mmmap data, but NSData has another way
+// +dataWithContentsOfURL:options:error: and NSDataReadingMappedIfSafe or NSDataReadingMappedAlways."
+
 #include <stdio.h>
 #include <sys/stat.h>
 
@@ -12,8 +14,6 @@
 #include <sys/mman.h>
 #include <unistd.h>
 #elif KRAM_WIN
-// portable mmap implementation, but only using on Win
-// TODO: this indicates that it leaks a CreateFileMapping handle, since it wanted to keep same mmap/munmap api
 #include "win_mmap.h"
 #endif
 
@@ -49,6 +49,8 @@ bool MmapHelper::open(const char *filename)
     }
     length = sb.st_size;
 
+    // Only offset needs padded to pagesize, but here offset is always 0
+    
     // Stop padding out to page size, or do but then don't add to length, or will walk too far in memory
     // all remaining page data will be zero, but still want length to reflect actual length of file
     // need Windows equilvent of getpagesize() call before putting this back.  This was to use
diff --git a/libkram/kram/KramZipHelper.cpp b/libkram/kram/KramZipHelper.cpp
index d91b6a3f..bb2a427a 100644
--- a/libkram/kram/KramZipHelper.cpp
+++ b/libkram/kram/KramZipHelper.cpp
@@ -9,8 +9,9 @@
 // test for perf of this compared to one in miniz also see 
 // comments about faster algs.
 // libcompress can only encode lvl 5, but here it's only decompress.
+// Don't trust this.
 #ifndef USE_LIBCOMPRESSION
-#define USE_LIBCOMPRESSION (KRAM_MAC || KRAM_IOS)
+#define USE_LIBCOMPRESSION 0 // (KRAM_MAC || KRAM_IOS)
 #endif
 
 #if USE_LIBCOMPRESSION
@@ -229,12 +230,13 @@ bool ZipHelper::extract(const ZipEntry& entry, void* buffer, uint64_t bufferSize
     if (!data) {
         return false;
     }
-    // need to extra data and header
+    // need to extract data and header
+    char scratchBuffer[compression_decode_scratch_buffer_size(COMPRESSION_ZLIB)];
     
     uint64_t bytesDecoded = compression_decode_buffer(
         (uint8_t*)buffer, entry.uncompressedSize,
         (const uint8_t*)data, entry.compressedSize,
-        NULL, // scratch-buffer that could speed up to pass
+        scratchBuffer, // scratch-buffer that could speed up to pass
         COMPRESSION_ZLIB);
     
     bool success = false;
diff --git a/libkram/kram/win_mmap.h b/libkram/kram/win_mmap.h
index 03a6db8e..fc8cd26c 100644
--- a/libkram/kram/win_mmap.h
+++ b/libkram/kram/win_mmap.h
@@ -30,6 +30,7 @@
 #define MAP_ANON      0x20
 #define MAP_FAILED    ((void *) -1)
 
+// off_t is 32-bit, which isn't great
 using myoff_t = int64_t;
 
 // TODO: find out which path this takes, want 64-bit mmsp
diff --git a/libkram/lodepng/lodepng.h b/libkram/lodepng/lodepng.h
index 524bca4f..2ae8a4ec 100644
--- a/libkram/lodepng/lodepng.h
+++ b/libkram/lodepng/lodepng.h
@@ -37,8 +37,9 @@ extern const char* LODEPNG_VERSION_STRING;
 // don't need io
 #define LODEPNG_NO_COMPILE_DISK
 
-// using miniz now, but this was failing using miniz so switched off
-//#define LODEPNG_NO_COMPILE_ZLIB
+// using miniz or libCompression now, but this was failing using miniz so switched off
+// This disables the code that inits the array for zlib_decompress.
+// #define LODEPNG_NO_COMPILE_ZLIB
 
 // was not doing png encodes, but to strip blocks now need to
 #define LODEPNG_COMPILE_ENCODER

From 30e7e18fa2d1a462be260d695dab0dcf5b956550 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 5 Jun 2024 09:28:33 -0700
Subject: [PATCH 683/901] kram - re-enable libCompression, but disable miniz
 and libCompression on lodepng

Too many failures of the src test suite trying to switch to miniz or libCompression as a custom ziib.
  May need to supply custom_inflate/deflate instead.
  iccp block on inter-a.png breaks miniZ.
Did managed to advance +2, size -2 to get libCompression to gen similar decompress to miniZ.
---
 libkram/kram/KTXImage.cpp      |  3 +--
 libkram/kram/Kram.cpp          | 18 ++++++++++++------
 libkram/kram/KramZipHelper.cpp |  3 +--
 3 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index aee22870..047dbdea 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -16,9 +16,8 @@
 // for zstd decompress
 #include "zstd.h"
 
-// Don't trust this see use in Kram.cpp with lodepng
 #ifndef USE_LIBCOMPRESSION
-#define USE_LIBCOMPRESSION 0 // (KRAM_MAC || KRAM_IOS)
+#define USE_LIBCOMPRESSION (KRAM_MAC || KRAM_IOS)
 #endif
 
 #if USE_LIBCOMPRESSION
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index cee9db7d..2951a5b9 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -27,8 +27,6 @@
 #include "lodepng.h"
 #include "miniz.h"
 
-// This doesn't work returns 121 for a 16K decode
-// Just open the src directory
 #ifndef USE_LIBCOMPRESSION
 #define USE_LIBCOMPRESSION 0 // (KRAM_MAC || KRAM_IOS)
 #endif
@@ -62,7 +60,11 @@ namespace kram {
 
 using namespace NAMESPACE_STL;
 
-static bool useMiniZ = true;
+// This fails with libCompression (see inter-a.png)
+// and with miniZ for the ICCP block (see inter-a.png)
+// lodepng passes 16K to the custom zlib decompress, but
+// the data read isn't that big.
+static bool useMiniZ = false;
 
 template <typename T>
 void releaseVector(vector<T>& v)
@@ -423,12 +425,16 @@ unsigned LodepngDecompressUsingMiniz(
     KASSERT(*dstDataSize != 0);
     
 #if USE_LIBCOMPRESSION
-    // This call can't be replaced since lodepng doesn't pass size
-    // And it doesn't take a nullable dstData?
+    // this returns 121 dstSize instead of 16448 on 126 srcSize.  
+    // Open src dir to see this.  Have to advance by 2 to fix this.
+    if (srcDataSize <= 2) {
+        return MZ_DATA_ERROR;
+    }
+    
     char scratchBuffer[compression_decode_scratch_buffer_size(COMPRESSION_ZLIB)];
     size_t bytesDecoded = compression_decode_buffer(
          (uint8_t*)*dstData, *dstDataSize,
-         (const uint8_t*)srcData, srcDataSize,
+         (const uint8_t*)srcData + 2, srcDataSize - 2,
         scratchBuffer, // scratch-buffer that could speed up to pass
          COMPRESSION_ZLIB);
     
diff --git a/libkram/kram/KramZipHelper.cpp b/libkram/kram/KramZipHelper.cpp
index bb2a427a..f02d51ff 100644
--- a/libkram/kram/KramZipHelper.cpp
+++ b/libkram/kram/KramZipHelper.cpp
@@ -9,9 +9,8 @@
 // test for perf of this compared to one in miniz also see 
 // comments about faster algs.
 // libcompress can only encode lvl 5, but here it's only decompress.
-// Don't trust this.
 #ifndef USE_LIBCOMPRESSION
-#define USE_LIBCOMPRESSION 0 // (KRAM_MAC || KRAM_IOS)
+#define USE_LIBCOMPRESSION (KRAM_MAC || KRAM_IOS)
 #endif
 
 #if USE_LIBCOMPRESSION

From 47e913200c8923702fa4bd92cb199f224faa4e8c Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 5 Jun 2024 10:04:11 -0700
Subject: [PATCH 684/901] kram - turn off libCompression

It's failing on too many files and archives.
---
 kramv/KramViewerBase.cpp       | 28 ++++++++++++++++++++++++++++
 kramv/KramViewerBase.h         |  3 ++-
 kramv/KramViewerMain.mm        |  4 ++--
 libkram/kram/KTXImage.cpp      |  2 +-
 libkram/kram/KramZipHelper.cpp |  3 ++-
 5 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index 02b23eb3..320f091a 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -2095,6 +2095,34 @@ void Data::setLoadedText(string& text)
     }
 }
 
+void Data::setFailedText(const string& filename, string& text)
+{
+    text = "Failed ";
+
+    // This doesn't advance with failure
+    //string filename = _showSettings->lastFilename;
+    
+    text += toFilenameShort(filename.c_str());
+
+    // archives and file systems have folders, split that off
+    string folderName;
+    const char* slashPos = strrchr(filename.c_str(), '/');
+    if (slashPos != nullptr) {
+        folderName = filename.substr(0, slashPos - filename.c_str());
+    }
+
+    if (!folderName.empty()) {
+        text += " in folder ";
+        text += folderName;
+    }
+
+    if (!_archiveName.empty()) {
+        text += " from archive ";
+        text += _archiveName;
+    }
+
+}
+
 void Data::initActions()
 {
     // Don't reorder without also matching actionPtrs below
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index a9ba2737..03e96f6c 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -485,7 +485,8 @@ struct Data {
     const Action* actionFromKey(uint32_t keyCodes) const;
 
     void setLoadedText(string& text);
-
+    void setFailedText(const string& filename, string& text);
+    
     void initActions();
     vector<Action>& actions() { return _actions; }
     void initDisabledButtons();
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 9e1467f7..c35ee454 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -1579,8 +1579,8 @@ -(BOOL)loadFile
         setErrorLogCapture(false);
         
         string finalErrorText;
-        append_sprintf(finalErrorText, "Could not load from file:\n %s\n",
-                       filename);
+        // this does have previous filename set
+        _data.setFailedText(file.name.c_str(), finalErrorText);
         finalErrorText += errorText;
         
         [self setHudText:finalErrorText.c_str()];
diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index 047dbdea..321272e7 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -17,7 +17,7 @@
 #include "zstd.h"
 
 #ifndef USE_LIBCOMPRESSION
-#define USE_LIBCOMPRESSION (KRAM_MAC || KRAM_IOS)
+#define USE_LIBCOMPRESSION 0 // (KRAM_MAC || KRAM_IOS)
 #endif
 
 #if USE_LIBCOMPRESSION
diff --git a/libkram/kram/KramZipHelper.cpp b/libkram/kram/KramZipHelper.cpp
index f02d51ff..f8038d53 100644
--- a/libkram/kram/KramZipHelper.cpp
+++ b/libkram/kram/KramZipHelper.cpp
@@ -9,8 +9,9 @@
 // test for perf of this compared to one in miniz also see 
 // comments about faster algs.
 // libcompress can only encode lvl 5, but here it's only decompress.
+// This is failing on various ktx2 files in the mac archive
 #ifndef USE_LIBCOMPRESSION
-#define USE_LIBCOMPRESSION (KRAM_MAC || KRAM_IOS)
+#define USE_LIBCOMPRESSION 0 // (KRAM_MAC || KRAM_IOS)
 #endif
 
 #if USE_LIBCOMPRESSION

From 9bc3fa08a4cfc6bb87de6543bd02ef54b7056e63 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 8 Jun 2024 20:34:52 -0700
Subject: [PATCH 685/901] kram - allow DDS

Windows often uses all caps extensions.
---
 kramv/KramViewerBase.cpp | 3 ++-
 libkram/kram/Kram.cpp    | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index 320f091a..572d6650 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -982,7 +982,8 @@ bool Data::listFilesInArchive(int32_t urlIndex)
     
     // filter out unsupported extensions
     vector<string> extensions = {
-        ".ktx", ".ktx2", ".png", ".dds" // textures
+        ".ktx", ".ktx2", ".png",  // textures
+        ".dds", ".DDS" // allow caps for dds
 #if USE_GLTF
         // TODO: can't support these until have a loader from memory block
         // GLTFAsset requires a URL.
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 2951a5b9..f4f06b05 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -86,7 +86,8 @@ bool isKTX2Filename(const char* filename)
 bool isDDSFilename(const char* filename)
 {
     // should really look at first 4 bytes of data
-    return endsWithExtension(filename, ".dds");
+    return endsWithExtension(filename, ".dds") ||
+           endsWithExtension(filename, ".DDS"); // allow uppercase
 }
 bool isPNGFilename(const char* filename)
 {

From ef6e6d68cf44065f02396a9a73becbfad756e49d Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 2 Sep 2024 10:36:25 -0700
Subject: [PATCH 686/901] kram - added ZipStream, added fallthrough tests

---
 build2/kram.xcodeproj/project.pbxproj       |  70 ++--------
 build2/kramc.xcodeproj/project.pbxproj      |   2 +
 build2/kramv.xcodeproj/project.pbxproj      |   2 +
 kram-profile/Source/KramZipHelper.cpp       |  10 +-
 libkram/astc-encoder/astcenc_mathlib.h      |  13 +-
 libkram/compressonator/bc6h/bc6h_decode.cpp |   1 +
 libkram/compressonator/bc6h/hdr_encode.cpp  |   1 +
 libkram/json11/json11.cpp                   |  83 ++++++++----
 libkram/json11/json11.h                     |  14 +-
 libkram/kram/KTXImage.cpp                   |   2 +-
 libkram/kram/Kram.cpp                       |   2 +-
 libkram/kram/KramFileHelper.h               |   9 +-
 libkram/kram/KramImage.cpp                  |   9 ++
 libkram/kram/KramLog.h                      |   3 +
 libkram/kram/KramZipHelper.cpp              |   2 +-
 libkram/kram/KramZipHelper.h                |   2 +-
 libkram/kram/KramZipStream.cpp              | 143 ++++++++++++++++++++
 libkram/kram/KramZipStream.h                |  63 +++++++++
 libkram/miniz/miniz.cpp                     |   6 +-
 libkram/miniz/miniz.h                       |  31 +++--
 libkram/zstd/zstd.cpp                       |  36 ++---
 libkram/zstd/zstddeclib.cpp                 |  26 ++--
 22 files changed, 384 insertions(+), 146 deletions(-)
 create mode 100644 libkram/kram/KramZipStream.cpp
 create mode 100644 libkram/kram/KramZipStream.h

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index aeb0a45b..080ee63f 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -7,22 +7,6 @@
 	objects = {
 
 /* Begin PBXBuildFile section */
-		704738BC289F6AEE00C77A9F /* unordered_map.h in Headers */ = {isa = PBXBuildFile; fileRef = 704738B1289F6AEE00C77A9F /* unordered_map.h */; };
-		704738BD289F6AEE00C77A9F /* unordered_map.h in Headers */ = {isa = PBXBuildFile; fileRef = 704738B1289F6AEE00C77A9F /* unordered_map.h */; };
-		704738BE289F6AEE00C77A9F /* falgorithm.h in Headers */ = {isa = PBXBuildFile; fileRef = 704738B2289F6AEE00C77A9F /* falgorithm.h */; };
-		704738BF289F6AEE00C77A9F /* falgorithm.h in Headers */ = {isa = PBXBuildFile; fileRef = 704738B2289F6AEE00C77A9F /* falgorithm.h */; };
-		704738C0289F6AEE00C77A9F /* map.h in Headers */ = {isa = PBXBuildFile; fileRef = 704738B3289F6AEE00C77A9F /* map.h */; };
-		704738C1289F6AEE00C77A9F /* map.h in Headers */ = {isa = PBXBuildFile; fileRef = 704738B3289F6AEE00C77A9F /* map.h */; };
-		704738C2289F6AEE00C77A9F /* pair.h in Headers */ = {isa = PBXBuildFile; fileRef = 704738B4289F6AEE00C77A9F /* pair.h */; };
-		704738C3289F6AEE00C77A9F /* pair.h in Headers */ = {isa = PBXBuildFile; fileRef = 704738B4289F6AEE00C77A9F /* pair.h */; };
-		704738C6289F6AEE00C77A9F /* unordered_set.h in Headers */ = {isa = PBXBuildFile; fileRef = 704738B6289F6AEE00C77A9F /* unordered_set.h */; };
-		704738C7289F6AEE00C77A9F /* unordered_set.h in Headers */ = {isa = PBXBuildFile; fileRef = 704738B6289F6AEE00C77A9F /* unordered_set.h */; };
-		704738C8289F6AEE00C77A9F /* vector.h in Headers */ = {isa = PBXBuildFile; fileRef = 704738B7289F6AEE00C77A9F /* vector.h */; };
-		704738C9289F6AEE00C77A9F /* vector.h in Headers */ = {isa = PBXBuildFile; fileRef = 704738B7289F6AEE00C77A9F /* vector.h */; };
-		704738CA289F6AEE00C77A9F /* set.h in Headers */ = {isa = PBXBuildFile; fileRef = 704738B8289F6AEE00C77A9F /* set.h */; };
-		704738CB289F6AEE00C77A9F /* set.h in Headers */ = {isa = PBXBuildFile; fileRef = 704738B8289F6AEE00C77A9F /* set.h */; };
-		704738CC289F6AEE00C77A9F /* fstring.h in Headers */ = {isa = PBXBuildFile; fileRef = 704738B9289F6AEE00C77A9F /* fstring.h */; };
-		704738CD289F6AEE00C77A9F /* fstring.h in Headers */ = {isa = PBXBuildFile; fileRef = 704738B9289F6AEE00C77A9F /* fstring.h */; };
 		706EEF7F26D1595D001C950E /* EtcBlock4x4Encoding_RGB8.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDAA26D1583E001C950E /* EtcBlock4x4Encoding_RGB8.cpp */; };
 		706EEF8026D1595D001C950E /* EtcImage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDAC26D1583E001C950E /* EtcImage.cpp */; };
 		706EEF8126D1595D001C950E /* EtcDifferentialTrys.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDAF26D1583E001C950E /* EtcDifferentialTrys.cpp */; };
@@ -392,6 +376,10 @@
 		70A7BD3127092A1200DBCCF7 /* hdr_encode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70A7BD2E27092A1200DBCCF7 /* hdr_encode.cpp */; };
 		70A7BD3227092A1200DBCCF7 /* hdr_encode.h in Headers */ = {isa = PBXBuildFile; fileRef = 70A7BD2F27092A1200DBCCF7 /* hdr_encode.h */; };
 		70A7BD3327092A1200DBCCF7 /* hdr_encode.h in Headers */ = {isa = PBXBuildFile; fileRef = 70A7BD2F27092A1200DBCCF7 /* hdr_encode.h */; };
+		70B563A72C857B360089A64F /* KramZipStream.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B563A52C857B360089A64F /* KramZipStream.cpp */; };
+		70B563A82C857B360089A64F /* KramZipStream.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B563A52C857B360089A64F /* KramZipStream.cpp */; };
+		70B563A92C857B360089A64F /* KramZipStream.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B563A62C857B360089A64F /* KramZipStream.h */; };
+		70B563AA2C857B360089A64F /* KramZipStream.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B563A62C857B360089A64F /* KramZipStream.h */; };
 		70C6398D289FB234006E7422 /* KramPrefix.pch in Headers */ = {isa = PBXBuildFile; fileRef = 70C6398C289FB234006E7422 /* KramPrefix.pch */; };
 		70C6398E289FB234006E7422 /* KramPrefix.pch in Headers */ = {isa = PBXBuildFile; fileRef = 70C6398C289FB234006E7422 /* KramPrefix.pch */; };
 		70CDB65027A1382700A546C1 /* KramDDSHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 70CDB64E27A1382600A546C1 /* KramDDSHelper.h */; };
@@ -421,14 +409,6 @@
 /* End PBXBuildFile section */
 
 /* Begin PBXFileReference section */
-		704738B1289F6AEE00C77A9F /* unordered_map.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = unordered_map.h; sourceTree = "<group>"; };
-		704738B2289F6AEE00C77A9F /* falgorithm.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = falgorithm.h; sourceTree = "<group>"; };
-		704738B3289F6AEE00C77A9F /* map.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = map.h; sourceTree = "<group>"; };
-		704738B4289F6AEE00C77A9F /* pair.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = pair.h; sourceTree = "<group>"; };
-		704738B6289F6AEE00C77A9F /* unordered_set.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = unordered_set.h; sourceTree = "<group>"; };
-		704738B7289F6AEE00C77A9F /* vector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = vector.h; sourceTree = "<group>"; };
-		704738B8289F6AEE00C77A9F /* set.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = set.h; sourceTree = "<group>"; };
-		704738B9289F6AEE00C77A9F /* fstring.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = fstring.h; sourceTree = "<group>"; };
 		706ECDDE26D1577A001C950E /* libkram.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libkram.a; sourceTree = BUILT_PRODUCTS_DIR; };
 		706EEDAA26D1583E001C950E /* EtcBlock4x4Encoding_RGB8.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = EtcBlock4x4Encoding_RGB8.cpp; sourceTree = "<group>"; };
 		706EEDAB26D1583E001C950E /* EtcErrorMetric.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = EtcErrorMetric.h; sourceTree = "<group>"; };
@@ -760,6 +740,8 @@
 		709B8D4D28D7C15F0081BD1F /* KramFmt.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = KramFmt.h; sourceTree = "<group>"; };
 		70A7BD2E27092A1200DBCCF7 /* hdr_encode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = hdr_encode.cpp; sourceTree = "<group>"; };
 		70A7BD2F27092A1200DBCCF7 /* hdr_encode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = hdr_encode.h; sourceTree = "<group>"; };
+		70B563A52C857B360089A64F /* KramZipStream.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = KramZipStream.cpp; sourceTree = "<group>"; };
+		70B563A62C857B360089A64F /* KramZipStream.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = KramZipStream.h; sourceTree = "<group>"; };
 		70C6398C289FB234006E7422 /* KramPrefix.pch */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = KramPrefix.pch; sourceTree = "<group>"; };
 		70CDB64E27A1382600A546C1 /* KramDDSHelper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = KramDDSHelper.h; sourceTree = "<group>"; };
 		70CDB64F27A1382600A546C1 /* KramDDSHelper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = KramDDSHelper.cpp; sourceTree = "<group>"; };
@@ -795,21 +777,6 @@
 /* End PBXFrameworksBuildPhase section */
 
 /* Begin PBXGroup section */
-		704738AF289F6AEE00C77A9F /* fastl */ = {
-			isa = PBXGroup;
-			children = (
-				704738B2289F6AEE00C77A9F /* falgorithm.h */,
-				704738B3289F6AEE00C77A9F /* map.h */,
-				704738B4289F6AEE00C77A9F /* pair.h */,
-				704738B1289F6AEE00C77A9F /* unordered_map.h */,
-				704738B6289F6AEE00C77A9F /* unordered_set.h */,
-				704738B7289F6AEE00C77A9F /* vector.h */,
-				704738B8289F6AEE00C77A9F /* set.h */,
-				704738B9289F6AEE00C77A9F /* fstring.h */,
-			);
-			path = fastl;
-			sourceTree = "<group>";
-		};
 		706ECDD526D1577A001C950E = {
 			isa = PBXGroup;
 			children = (
@@ -834,7 +801,6 @@
 				70D222F32ADAF78300B9EA23 /* allocate */,
 				708A6A882708CE4700BA5410 /* compressonator */,
 				706EFC3E26D3473F001C950E /* eastl */,
-				704738AF289F6AEE00C77A9F /* fastl */,
 				709B8D1B28D7BCAD0081BD1F /* fmt */,
 				706EEDA926D1583E001C950E /* etc2comp */,
 				706EEDC926D1583E001C950E /* bc7enc */,
@@ -1024,6 +990,8 @@
 			children = (
 				70CDB64E27A1382600A546C1 /* KramDDSHelper.h */,
 				70CDB64F27A1382600A546C1 /* KramDDSHelper.cpp */,
+				70B563A62C857B360089A64F /* KramZipStream.h */,
+				70B563A52C857B360089A64F /* KramZipStream.cpp */,
 				706EEE1926D1583F001C950E /* KramZipHelper.h */,
 				706EEE1E26D1583F001C950E /* KramZipHelper.cpp */,
 				706EEE2326D1583F001C950E /* KramConfig.h */,
@@ -1422,7 +1390,6 @@
 				706EEFD826D15984001C950E /* EtcMath.h in Headers */,
 				706EEFD926D15984001C950E /* EtcIndividualTrys.h in Headers */,
 				706EEFDA26D15984001C950E /* EtcBlock4x4EncodingBits.h in Headers */,
-				704738BE289F6AEE00C77A9F /* falgorithm.h in Headers */,
 				706EEFDB26D15984001C950E /* EtcBlock4x4Encoding_RGB8A1.h in Headers */,
 				706EEFDC26D15984001C950E /* EtcBlock4x4.h in Headers */,
 				707789E72881BA81008A51BC /* rgbcx.h in Headers */,
@@ -1448,7 +1415,6 @@
 				706EEFF526D15985001C950E /* basisu_containers_impl.h in Headers */,
 				707789EB2881BA81008A51BC /* utils.h in Headers */,
 				706EEFF626D15985001C950E /* basisu_transcoder_internal.h in Headers */,
-				704738C0289F6AEE00C77A9F /* map.h in Headers */,
 				70871DF927DDDBCD00D0B9E1 /* astcenc_vecmathlib_avx2_8.h in Headers */,
 				70871DFB27DDDBCD00D0B9E1 /* astcenc_vecmathlib_none_4.h in Headers */,
 				706EEFF726D15985001C950E /* basisu_global_selector_cb.h in Headers */,
@@ -1459,13 +1425,12 @@
 				706EEFFB26D15985001C950E /* basisu_file_headers.h in Headers */,
 				706EEFFC26D15985001C950E /* miniz.h in Headers */,
 				706EEFFD26D15985001C950E /* hedistance.h in Headers */,
-				704738BC289F6AEE00C77A9F /* unordered_map.h in Headers */,
-				704738C2289F6AEE00C77A9F /* pair.h in Headers */,
 				706EEFFE26D15985001C950E /* stb_rect_pack.h in Headers */,
 				706EEFFF26D15985001C950E /* KramZipHelper.h in Headers */,
 				706EF00026D15985001C950E /* KramSDFMipper.h in Headers */,
 				706EF00126D15985001C950E /* sse2neon.h in Headers */,
 				70D222E62AD22BED00B9EA23 /* BlockedLinearAllocator.h in Headers */,
+				70B563A92C857B360089A64F /* KramZipStream.h in Headers */,
 				70871DF127DDDBCD00D0B9E1 /* astcenc_mathlib.h in Headers */,
 				709B8D3128D7BCAD0081BD1F /* ranges.h in Headers */,
 				706EF00226D15985001C950E /* KramConfig.h in Headers */,
@@ -1479,7 +1444,6 @@
 				707789D92881BA81008A51BC /* bc7decomp.h in Headers */,
 				706EF00826D15985001C950E /* Kram.h in Headers */,
 				70D222E02AD2132300B9EA23 /* ImmutableString.h in Headers */,
-				704738C8289F6AEE00C77A9F /* vector.h in Headers */,
 				70D222EB2ADAF25E00B9EA23 /* simdjson.h in Headers */,
 				70871DED27DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.h in Headers */,
 				707789DB2881BA81008A51BC /* ert.h in Headers */,
@@ -1488,7 +1452,6 @@
 				707789DF2881BA81008A51BC /* rgbcx_table4.h in Headers */,
 				70871DF727DDDBCD00D0B9E1 /* astcenc_vecmathlib_neon_4.h in Headers */,
 				706EF00B26D15985001C950E /* KramTimer.h in Headers */,
-				704738C6289F6AEE00C77A9F /* unordered_set.h in Headers */,
 				706EF00C26D15985001C950E /* KramMmapHelper.h in Headers */,
 				706EF00D26D15985001C950E /* float4a.h in Headers */,
 				706EF00E26D15985001C950E /* KramFileHelper.h in Headers */,
@@ -1506,7 +1469,6 @@
 				706EF01526D15985001C950E /* singlecolourfit.h in Headers */,
 				706EF01626D15985001C950E /* maths.h in Headers */,
 				707789F32881BCE2008A51BC /* rdo_bc_encoder.h in Headers */,
-				704738CA289F6AEE00C77A9F /* set.h in Headers */,
 				70D222D82AC800AC00B9EA23 /* json11.h in Headers */,
 				706EF01726D15985001C950E /* colourset.h in Headers */,
 				708A6AA42708CE4700BA5410 /* bc6h_utils.h in Headers */,
@@ -1515,7 +1477,6 @@
 				706EF01A26D15985001C950E /* zstd.h in Headers */,
 				70871DF327DDDBCD00D0B9E1 /* astcenc_internal.h in Headers */,
 				709B8D2F28D7BCAD0081BD1F /* format-inl.h in Headers */,
-				704738CC289F6AEE00C77A9F /* fstring.h in Headers */,
 				709B8D2D28D7BCAD0081BD1F /* ostream.h in Headers */,
 				706EF01B26D15985001C950E /* lodepng.h in Headers */,
 				709B8D4928D7BCAD0081BD1F /* format.h in Headers */,
@@ -1542,7 +1503,6 @@
 				706EF15226D166C5001C950E /* EtcMath.h in Headers */,
 				706EF15326D166C5001C950E /* EtcIndividualTrys.h in Headers */,
 				706EF15426D166C5001C950E /* EtcBlock4x4EncodingBits.h in Headers */,
-				704738BF289F6AEE00C77A9F /* falgorithm.h in Headers */,
 				706EF15526D166C5001C950E /* EtcBlock4x4Encoding_RGB8A1.h in Headers */,
 				706EF15626D166C5001C950E /* EtcBlock4x4.h in Headers */,
 				707789E82881BA81008A51BC /* rgbcx.h in Headers */,
@@ -1568,7 +1528,6 @@
 				706EF16F26D166C5001C950E /* basisu_containers_impl.h in Headers */,
 				707789EC2881BA81008A51BC /* utils.h in Headers */,
 				706EF17026D166C5001C950E /* basisu_transcoder_internal.h in Headers */,
-				704738C1289F6AEE00C77A9F /* map.h in Headers */,
 				70871DFA27DDDBCD00D0B9E1 /* astcenc_vecmathlib_avx2_8.h in Headers */,
 				70871DFC27DDDBCD00D0B9E1 /* astcenc_vecmathlib_none_4.h in Headers */,
 				706EF17126D166C5001C950E /* basisu_global_selector_cb.h in Headers */,
@@ -1579,13 +1538,12 @@
 				706EF17526D166C5001C950E /* basisu_file_headers.h in Headers */,
 				706EF17626D166C5001C950E /* miniz.h in Headers */,
 				706EF17726D166C5001C950E /* hedistance.h in Headers */,
-				704738BD289F6AEE00C77A9F /* unordered_map.h in Headers */,
-				704738C3289F6AEE00C77A9F /* pair.h in Headers */,
 				706EF17826D166C5001C950E /* stb_rect_pack.h in Headers */,
 				706EF17926D166C5001C950E /* KramZipHelper.h in Headers */,
 				706EF17A26D166C5001C950E /* KramSDFMipper.h in Headers */,
 				706EF17B26D166C5001C950E /* sse2neon.h in Headers */,
 				70D222E72AD22BED00B9EA23 /* BlockedLinearAllocator.h in Headers */,
+				70B563AA2C857B360089A64F /* KramZipStream.h in Headers */,
 				70871DF227DDDBCD00D0B9E1 /* astcenc_mathlib.h in Headers */,
 				709B8D3228D7BCAD0081BD1F /* ranges.h in Headers */,
 				706EF17C26D166C5001C950E /* KramConfig.h in Headers */,
@@ -1599,7 +1557,6 @@
 				707789DA2881BA81008A51BC /* bc7decomp.h in Headers */,
 				706EF18226D166C5001C950E /* Kram.h in Headers */,
 				70D222E12AD2132300B9EA23 /* ImmutableString.h in Headers */,
-				704738C9289F6AEE00C77A9F /* vector.h in Headers */,
 				70D222EC2ADAF25E00B9EA23 /* simdjson.h in Headers */,
 				70871DEE27DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.h in Headers */,
 				707789DC2881BA81008A51BC /* ert.h in Headers */,
@@ -1608,7 +1565,6 @@
 				707789E02881BA81008A51BC /* rgbcx_table4.h in Headers */,
 				70871DF827DDDBCD00D0B9E1 /* astcenc_vecmathlib_neon_4.h in Headers */,
 				706EF18526D166C5001C950E /* KramTimer.h in Headers */,
-				704738C7289F6AEE00C77A9F /* unordered_set.h in Headers */,
 				706EF18626D166C5001C950E /* KramMmapHelper.h in Headers */,
 				706EF18726D166C5001C950E /* float4a.h in Headers */,
 				706EF18826D166C5001C950E /* KramFileHelper.h in Headers */,
@@ -1626,7 +1582,6 @@
 				706EF18F26D166C5001C950E /* singlecolourfit.h in Headers */,
 				706EF19026D166C5001C950E /* maths.h in Headers */,
 				707789F42881BCE2008A51BC /* rdo_bc_encoder.h in Headers */,
-				704738CB289F6AEE00C77A9F /* set.h in Headers */,
 				70D222D92AC800AC00B9EA23 /* json11.h in Headers */,
 				706EF19126D166C5001C950E /* colourset.h in Headers */,
 				708A6AA52708CE4700BA5410 /* bc6h_utils.h in Headers */,
@@ -1635,7 +1590,6 @@
 				706EF19426D166C5001C950E /* zstd.h in Headers */,
 				70871DF427DDDBCD00D0B9E1 /* astcenc_internal.h in Headers */,
 				709B8D3028D7BCAD0081BD1F /* format-inl.h in Headers */,
-				704738CD289F6AEE00C77A9F /* fstring.h in Headers */,
 				709B8D2E28D7BCAD0081BD1F /* ostream.h in Headers */,
 				706EF19526D166C5001C950E /* lodepng.h in Headers */,
 				709B8D4A28D7BCAD0081BD1F /* format.h in Headers */,
@@ -1801,6 +1755,7 @@
 				706EEFBD26D1595D001C950E /* colourblock.cpp in Sources */,
 				706EEFBE26D1595E001C950E /* colourfit.cpp in Sources */,
 				70871DFF27DDDBCD00D0B9E1 /* astcenc_pick_best_endpoint_format.cpp in Sources */,
+				70B563A72C857B360089A64F /* KramZipStream.cpp in Sources */,
 				70871E0927DDDBCD00D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp in Sources */,
 				70871DCF27DDDBCD00D0B9E1 /* astcenc_symbolic_physical.cpp in Sources */,
 				70D222DA2AC800AC00B9EA23 /* json11.cpp in Sources */,
@@ -1900,6 +1855,7 @@
 				70871E0027DDDBCD00D0B9E1 /* astcenc_pick_best_endpoint_format.cpp in Sources */,
 				70871E0A27DDDBCD00D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp in Sources */,
 				70871DD027DDDBCD00D0B9E1 /* astcenc_symbolic_physical.cpp in Sources */,
+				70B563A82C857B360089A64F /* KramZipStream.cpp in Sources */,
 				706EFF7A26D34740001C950E /* allocator_eastl.cpp in Sources */,
 				706EF1D626D166C5001C950E /* maths.cpp in Sources */,
 				706EF1D726D166C5001C950E /* singlecolourfit.cpp in Sources */,
@@ -1939,6 +1895,7 @@
 				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
 				CLANG_WARN_EMPTY_BODY = YES;
 				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_IMPLICIT_FALLTHROUGH = YES_ERROR;
 				CLANG_WARN_INFINITE_RECURSION = YES;
 				CLANG_WARN_INT_CONVERSION = YES;
 				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
@@ -2035,6 +1992,7 @@
 				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
 				CLANG_WARN_EMPTY_BODY = YES;
 				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_IMPLICIT_FALLTHROUGH = YES_ERROR;
 				CLANG_WARN_INFINITE_RECURSION = YES;
 				CLANG_WARN_INT_CONVERSION = YES;
 				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
diff --git a/build2/kramc.xcodeproj/project.pbxproj b/build2/kramc.xcodeproj/project.pbxproj
index 5242dd82..1a5c341a 100644
--- a/build2/kramc.xcodeproj/project.pbxproj
+++ b/build2/kramc.xcodeproj/project.pbxproj
@@ -171,6 +171,7 @@
 				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
 				CLANG_WARN_EMPTY_BODY = YES;
 				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_IMPLICIT_FALLTHROUGH = YES_ERROR;
 				CLANG_WARN_INFINITE_RECURSION = YES;
 				CLANG_WARN_INT_CONVERSION = YES;
 				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
@@ -236,6 +237,7 @@
 				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
 				CLANG_WARN_EMPTY_BODY = YES;
 				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_IMPLICIT_FALLTHROUGH = YES_ERROR;
 				CLANG_WARN_INFINITE_RECURSION = YES;
 				CLANG_WARN_INT_CONVERSION = YES;
 				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
diff --git a/build2/kramv.xcodeproj/project.pbxproj b/build2/kramv.xcodeproj/project.pbxproj
index 8aa3fc53..ca9afa7d 100644
--- a/build2/kramv.xcodeproj/project.pbxproj
+++ b/build2/kramv.xcodeproj/project.pbxproj
@@ -537,6 +537,7 @@
 				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
 				CLANG_WARN_EMPTY_BODY = YES;
 				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_IMPLICIT_FALLTHROUGH = YES_ERROR;
 				CLANG_WARN_INFINITE_RECURSION = YES;
 				CLANG_WARN_INT_CONVERSION = YES;
 				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
@@ -610,6 +611,7 @@
 				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
 				CLANG_WARN_EMPTY_BODY = YES;
 				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_IMPLICIT_FALLTHROUGH = YES_ERROR;
 				CLANG_WARN_INFINITE_RECURSION = YES;
 				CLANG_WARN_INT_CONVERSION = YES;
 				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
diff --git a/kram-profile/Source/KramZipHelper.cpp b/kram-profile/Source/KramZipHelper.cpp
index 5b9b18b6..d0aa558b 100644
--- a/kram-profile/Source/KramZipHelper.cpp
+++ b/kram-profile/Source/KramZipHelper.cpp
@@ -4,14 +4,17 @@
 //#include <iterator> // for copy_if on Win
 #include <vector>
 #include <string>
+#include <unordered_map>
+#include <mutex>
 
 #include "miniz.h"
 
 // test for perf of this compared to one in miniz also see
 // comments about faster algs.
 // libcompress can only encode lvl 5, but here it's only decompress.
+// This seems to fail when used for kramv zip archives, so disable fo now
 #ifndef USE_LIBCOMPRESSION
-#define USE_LIBCOMPRESSION (KRAM_MAC || KRAM_IOS)
+#define USE_LIBCOMPRESSION 0 // (KRAM_MAC || KRAM_IOS)
 #endif
 
 #if USE_LIBCOMPRESSION
@@ -21,8 +24,6 @@
 // Throwing this in for now, since it's the only .cpp file
 #if KRAM_MAC || KRAM_IOS
 #include <cxxabi.h> // demangle
-#include <unordered_map>
-#include <mutex>
 #endif
 
 using namespace NAMESPACE_STL;
@@ -407,11 +408,12 @@ bool ZipHelper::extract(const ZipEntry& entry, void* buffer, uint64_t bufferSize
         return false;
     }
     // need to extra data and header
+    char scratchBuffer[compression_decode_scratch_buffer_size(COMPRESSION_ZLIB)];
     
     uint64_t bytesDecoded = compression_decode_buffer(
         (uint8_t*)buffer, entry.uncompressedSize,
         (const uint8_t*)data, entry.compressedSize,
-        NULL, // scratch-buffer that could speed up to pass
+        scratchBuffer,
         COMPRESSION_ZLIB);
     
     bool success = false;
diff --git a/libkram/astc-encoder/astcenc_mathlib.h b/libkram/astc-encoder/astcenc_mathlib.h
index 74dc2c6a..35989765 100644
--- a/libkram/astc-encoder/astcenc_mathlib.h
+++ b/libkram/astc-encoder/astcenc_mathlib.h
@@ -37,12 +37,18 @@
 
 #if ASTCENC_NEON
 
-    #define ASTCENC_POPCNT 0
-    #define ASTCENC_F16C 0
+    // Intel simd ops
     #define ASTCENC_SSE 0
     #define ASTCENC_AVX 0
+
+    // Keep alignment at 16B
     #define ASTCENC_VECALIGN 16
 
+    // These have equivalents in Neon
+    #define ASTCENC_POPCNT 0
+    #define ASTCENC_F16C 0
+
+
 #else
 
     #ifndef ASTCENC_SSE
@@ -69,6 +75,7 @@
       #endif
     #endif
 
+    // must set -fpopcount
     #ifndef ASTCENC_POPCNT
       #if defined(__POPCNT__)
         #define ASTCENC_POPCNT 1
@@ -77,8 +84,8 @@
       #endif
     #endif
 
+    // must set -mf16c only on x86_64 build, avx not enough on clang
     #ifndef ASTCENC_F16C
-      // must set -mf16c only on x86_64 build, avx not enough on clang
       #if defined(__F16C__)
         #define ASTCENC_F16C 1
       #else
diff --git a/libkram/compressonator/bc6h/bc6h_decode.cpp b/libkram/compressonator/bc6h/bc6h_decode.cpp
index f3d7be77..7bc4643a 100644
--- a/libkram/compressonator/bc6h/bc6h_decode.cpp
+++ b/libkram/compressonator/bc6h/bc6h_decode.cpp
@@ -210,6 +210,7 @@ int lerp(int a, int b, int i, int denom) {
     case 3:
         denom *= 5;
         i *= 5;    // fall through to case 15
+        [[fallthrough]];
     case 15:
         weights = g_aWeights4;
         break;
diff --git a/libkram/compressonator/bc6h/hdr_encode.cpp b/libkram/compressonator/bc6h/hdr_encode.cpp
index 32b4090f..e2f76ba0 100644
--- a/libkram/compressonator/bc6h/hdr_encode.cpp
+++ b/libkram/compressonator/bc6h/hdr_encode.cpp
@@ -72,6 +72,7 @@ float lerpf(float a, float b, int i, int denom) {
     case 3:
         denom *= 5;
         i *= 5;    // fall through to case 15
+        [[fallthrough]];
     case 7:
         weights = g_aWeights3;
         break;
diff --git a/libkram/json11/json11.cpp b/libkram/json11/json11.cpp
index 2c1df8ee..8e6dea8f 100644
--- a/libkram/json11/json11.cpp
+++ b/libkram/json11/json11.cpp
@@ -28,17 +28,19 @@
 
 // not including this in KramConfig.h - used for pool
 #include "BlockedLinearAllocator.h"
+#include "KramZipStream.h"
+
 
 // Heavily modifed by Alec Miller 10/1/23
 // This codebase was frozen by DropBox with very little effort put into it.
 // And I liked the readability of the code.  Optimized with ImmutableStrings
 // and a BlockedLinearAllocator.
 //
-// This is DOM reader/writer.  Building up stl data structures in a DOM
-// to write isn't great memory wise.  May move to a SAX writer.
+// json11 is DOM reader/writer.  Building up stl data structures in a DOM
+// to write isn't great memory wise.  Moved to custom SAX writer.
 // Times to read font atlas file on M1 MBP 14".  1/21/24
 //
-// json11
+// json11 reader
 // Release - parsed 101 KB of json using 576 KB of memory in 14.011ms
 // Debug   - parsed 101 KB of json using 576 KB of memory in 26.779ms
 //
@@ -128,19 +130,18 @@ const char* JsonWriter::escapedString(const char* str)
     
     return _escapedString.c_str();
 }
-  
+
 void JsonWriter::pushObject(const char* key) {
     if (key[0])
     {
         KASSERT(isObject());
         writeCommaAndNewline();
         int indent = _stack.size();
-        sprintf(*_out, "%*s\"%s\":{\n", indent, "", key);
+        writeFormat("%*s\"%s\":{\n", indent, "", key);
     }
     else
     {
-        _out->push_back('{');
-        _out->push_back('\n');
+        writeFormat("{\n");
     }
     _stack.push_back('}');
     _isFirst.push_back(false);
@@ -151,12 +152,11 @@ void JsonWriter::pushArray(const char* key) {
         KASSERT(isObject());
         writeCommaAndNewline();
         int indent = _stack.size();
-        sprintf(*_out, "%*s\"%s\":[\n", indent, "", key);
+        writeFormat("%*s\"%s\":[\n", indent, "", key);
     }
     else
     {
-        _out->push_back('[');
-        _out->push_back('\n');
+        writeFormat("[\n");
     }
     _stack.push_back(']');
     _isFirst.push_back(false);
@@ -166,8 +166,7 @@ void JsonWriter::pop() {
     KASSERT(_stack.empty());
     char c = _stack.back();
     
-    _out->push_back(c);
-    _out->push_back('\n');
+    writeFormat("%c\n", c);
     
     _stack.pop_back();
     _isFirst.pop_back();
@@ -193,43 +192,42 @@ void JsonWriter::writeString(const char* key, const char* value) {
     KASSERT(isObject());
     writeCommaAndNewline();
     int indent = _stack.size();
-    append_sprintf(*_out, "%*s\"%s\":\"%s\"", indent, "", key, escapedString(value));
+    writeFormat("%*s\"%s\":\"%s\"", indent, "", key, escapedString(value));
 }
 void JsonWriter::writeDouble(const char* key, double value) {
     KASSERT(isObject());
     writeCommaAndNewline();
     int indent = _stack.size();
-    append_sprintf(*_out, "%*s\"%s\":%f", indent, "", key, value);
+    writeFormat("%*s\"%s\":%f", indent, "", key, value);
 }
 void JsonWriter::writeInt32(const char* key, int32_t value) {
     KASSERT(isObject());
     writeCommaAndNewline();
     int indent = _stack.size();
-    append_sprintf(*_out, "%*s\"%s\":\"%d\"", indent, "", key, value);
+    writeFormat("%*s\"%s\":\"%d\"", indent, "", key, value);
     
 }
 void JsonWriter::writeBool(const char* key, bool value) {
     KASSERT(isObject());
     writeCommaAndNewline();
     int indent = _stack.size();
-    append_sprintf(*_out, "%*s\"%s\":%s", indent, "", key, value ? "true" : "false");
+    writeFormat("%*s\"%s\":%s", indent, "", key, value ? "true" : "false");
 }
 void JsonWriter::writeNull(const char* key) {
     KASSERT(isObject());
     writeCommaAndNewline();
     int indent = _stack.size();
-    append_sprintf(*_out, "%*s\"%s\":%s", indent, "", key, "null");
+    writeFormat("%*s\"%s\":%s", indent, "", key, "null");
 }
 
 // can write out json in parallel and combine
-void JsonWriter::writeJson(const JsonWriter& json)
-{
+void JsonWriter::writeJson(const JsonWriter& json) {
     KASSERT(_stack.empty());
     KASSERT(this != &json);
     
     // TODO: indent won't be correct on this
     // so caller may want to set indent
-    _out->append(*json._out);
+    writeFormat("%s", json._out->c_str());
 }
 
 void JsonWriter::writeString(const char* value) {
@@ -237,43 +235,74 @@ void JsonWriter::writeString(const char* value) {
     // only if in array
     writeCommaAndNewline();
     int indent = _stack.size();
-    append_sprintf(*_out, "%*s\"%s\"", indent, "", escapedString(value));
+    writeFormat("%*s\"%s\"", indent, "", escapedString(value));
 }
 void JsonWriter::writeDouble(double value) {
     KASSERT(isArray());
     writeCommaAndNewline();
     int indent = _stack.size();
-    append_sprintf(*_out, "%*s%f", indent, "", value);
+    writeFormat("%*s%f", indent, "", value);
 }
 void JsonWriter::writeInt32(int32_t value) {
     KASSERT(isArray());
     writeCommaAndNewline();
     int indent = _stack.size();
-    append_sprintf(*_out, "%*s\"%d\"", indent, "", value);
+    writeFormat("%*s\"%d\"", indent, "", value);
 }
 void JsonWriter::writeBool(bool value) {
     KASSERT(isArray());
     writeCommaAndNewline();
     int indent = _stack.size();
-    append_sprintf(*_out, "%*s%s", indent, "", value ? "true" : "false");
+    writeFormat("%*s%s", indent, "", value ? "true" : "false");
 }
 void JsonWriter::writeNull() {
     KASSERT(isArray());
     writeCommaAndNewline();
     int indent = _stack.size();
-    append_sprintf(*_out, "%*s%s", indent, "", "null");
+    writeFormat("%*s%s", indent, "", "null");
 }
 
 void JsonWriter::writeCommaAndNewline() {
     bool isFirst = _isFirst.back();
     if (!isFirst)
-        _out->push_back(',');
-    _out->push_back('\n');
+        writeFormat(",\n");
+    else
+        writeFormat("\n");
     
     // vector<bool> is special
     _isFirst[_isFirst.size()-1] = true;
 }
 
+void JsonWriter::writeFormat(const char* fmt, ...) {
+    // append to the string, string may grow
+    va_list args;
+    va_start(args, fmt);
+    append_vsprintf(*_out, fmt, args);
+    va_end(args);
+    
+    // when string reach certain length, flush to compressed file and/or buffer
+    if (_stream && _out->size() >= _stream->compressLimit())
+    {
+        // flush the output to a compression stream
+        _stream->compress(Slice((uint8_t*)_out->data(), _out->size())); // losing const
+        
+        // reset the buffer
+        _out->clear();
+    }
+}
+
+JsonWriter::~JsonWriter()
+{
+    if (_stream)  {
+        if (!_out->empty()) {
+            _stream->compress(Slice((uint8_t*)_out->data(), _out->size())); // losing const
+        }
+    }
+}
+
+    
+
+
 
 /*
 void JsonWriter::writeText(const char* text) {
diff --git a/libkram/json11/json11.h b/libkram/json11/json11.h
index b833c557..7a6c43ff 100644
--- a/libkram/json11/json11.h
+++ b/libkram/json11/json11.h
@@ -54,6 +54,10 @@
 
 #include "ImmutableString.h"
 
+namespace kram {
+class ICompressedStream;
+}
+
 namespace json11 {
 
 using namespace NAMESPACE_STL;
@@ -64,7 +68,7 @@ class JsonReaderData;
 
 //--------------------------
 
-/* Don't want to maintain this form.  Use SAX not DOM for writer.
+/* Don't want to maintain this form from json11.  Use SAX not DOM for writer.
 // Write json nodes out to a string.  String data is encoded.
 class JsonWriter final {
 public:
@@ -99,8 +103,13 @@ class JsonWriter final {
 // And keys go out in the order added.
 class JsonWriter final {
 public:
+    // This writes into a buffer
     JsonWriter(string* str) : _out(str) {}
     
+    // This writes into a small buffer, and then into a compressed stream
+    JsonWriter(string* str, ICompressedStream* stream) : _out(str), _stream(stream) {}
+    ~JsonWriter();
+    
     void pushObject(const char* key = "");
     void popObject();
     
@@ -125,6 +134,8 @@ class JsonWriter final {
     void writeJson(const JsonWriter& json);
     
 private:
+    void writeFormat(const char* fmt, ...) __printflike(2, 3);
+    
     bool isArray() const { return _stack.back() == ']'; }
     bool isObject() const { return _stack.back() == '}'; }
    
@@ -137,6 +148,7 @@ class JsonWriter final {
     string* _out = nullptr;
     string _stack;
     string _escapedString;
+    ICompressedStream* _stream = nullptr;
 };
 
 class JsonArrayScope {
diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index 321272e7..cd0779bf 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -1884,7 +1884,7 @@ bool KTXImage::unpackLevel(uint32_t mipNumber, const uint8_t* srcData, uint8_t*
                 size_t dstDataSizeMiniz = compression_decode_buffer(
                     (uint8_t*)dstData, dstDataSize,
                     (const uint8_t*)srcData, srcDataSize,
-                    scratchBuffer, // scratch-buffer that could speed up to pass
+                    scratchBuffer,
                     COMPRESSION_ZLIB);
 #else
                 mz_ulong dstDataSizeMiniz = 0;
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index f4f06b05..5727f0d1 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -436,7 +436,7 @@ unsigned LodepngDecompressUsingMiniz(
     size_t bytesDecoded = compression_decode_buffer(
          (uint8_t*)*dstData, *dstDataSize,
          (const uint8_t*)srcData + 2, srcDataSize - 2,
-        scratchBuffer, // scratch-buffer that could speed up to pass
+        scratchBuffer, 
          COMPRESSION_ZLIB);
     
     int result = MZ_OK;
diff --git a/libkram/kram/KramFileHelper.h b/libkram/kram/KramFileHelper.h
index 84e6902f..f468acd0 100644
--- a/libkram/kram/KramFileHelper.h
+++ b/libkram/kram/KramFileHelper.h
@@ -15,12 +15,13 @@ namespace kram {
 using namespace NAMESPACE_STL;
 
 // Use this to help open/close files, since dtor is scoped, or caller can close()
-// Also allows write to temp file, then rename over the destination file.  This
-// avoids leaving unfinished files around when
+// Also allows write to temp file, then rename over the destination file.
+// This avoids leaving unfinished files around.
 class FileHelper {
 public:
     ~FileHelper();
-
+    bool isOpen() const { return _fp != nullptr; }
+    
     bool isDirectory(const char* filename) const;
     
     bool exists(const char* filename) const;
@@ -53,6 +54,8 @@ class FileHelper {
     static uint64_t modificationTimestamp(const char* filename);
 
     static size_t pagesize();
+    
+    const string& filename() const { return _filename; }
 
 private:
     FILE* _fp = nullptr;
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index eeea2f2d..faca3fda 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -2411,10 +2411,13 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
                     switch (count) {
                         case 4:
                             dst[count * i + 3] = src[i].a;
+                            [[fallthrough]];
                         case 3:
                             dst[count * i + 2] = src[i].b;
+                            [[fallthrough]];
                         case 2:
                             dst[count * i + 1] = src[i].g;
+                            [[fallthrough]];
                         case 1:
                             dst[count * i + 0] = src[i].r;
                     }
@@ -2440,10 +2443,13 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
                     switch (count) {
                         case 4:
                             dst[count * i + 3] = src16.w;
+                            [[fallthrough]];
                         case 3:
                             dst[count * i + 2] = src16.z;
+                            [[fallthrough]];
                         case 2:
                             dst[count * i + 1] = src16.y;
+                            [[fallthrough]];
                         case 1:
                             dst[count * i + 0] = src16.x;
                     }
@@ -2464,10 +2470,13 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
                     switch (count) {
                         case 4:
                             dst[count * i + 3] = src[i].w;
+                            [[fallthrough]];
                         case 3:
                             dst[count * i + 2] = src[i].z;
+                            [[fallthrough]];
                         case 2:
                             dst[count * i + 1] = src[i].y;
+                            [[fallthrough]];
                         case 1:
                             dst[count * i + 0] = src[i].x;
                     }
diff --git a/libkram/kram/KramLog.h b/libkram/kram/KramLog.h
index 090f0b69..c712bfb5 100644
--- a/libkram/kram/KramLog.h
+++ b/libkram/kram/KramLog.h
@@ -5,6 +5,9 @@
 #pragma once
 
 #include <cassert>
+
+// TODO: fix for other platforms
+#include <sys/cdefs.h>
 //#include <string>
 
 // #include "KramConfig.h"
diff --git a/libkram/kram/KramZipHelper.cpp b/libkram/kram/KramZipHelper.cpp
index f8038d53..3e12f72d 100644
--- a/libkram/kram/KramZipHelper.cpp
+++ b/libkram/kram/KramZipHelper.cpp
@@ -236,7 +236,7 @@ bool ZipHelper::extract(const ZipEntry& entry, void* buffer, uint64_t bufferSize
     uint64_t bytesDecoded = compression_decode_buffer(
         (uint8_t*)buffer, entry.uncompressedSize,
         (const uint8_t*)data, entry.compressedSize,
-        scratchBuffer, // scratch-buffer that could speed up to pass
+        scratchBuffer, 
         COMPRESSION_ZLIB);
     
     bool success = false;
diff --git a/libkram/kram/KramZipHelper.h b/libkram/kram/KramZipHelper.h
index 4e07af35..981f6fe4 100644
--- a/libkram/kram/KramZipHelper.h
+++ b/libkram/kram/KramZipHelper.h
@@ -66,7 +66,7 @@ struct ZipHelper {
     int32_t zipEntryIndex(const char* name) const;
 
 private:
-    std::unique_ptr<mz_zip_archive> zip;
+    unique_ptr<mz_zip_archive> zip;
     vector<ZipEntry> _zipEntrys;
 
     const uint8_t* zipData;  // aliased
diff --git a/libkram/kram/KramZipStream.cpp b/libkram/kram/KramZipStream.cpp
new file mode 100644
index 00000000..f914393b
--- /dev/null
+++ b/libkram/kram/KramZipStream.cpp
@@ -0,0 +1,143 @@
+#include "KramZipStream.h"
+
+#include "miniz.h"
+
+namespace kram {
+using namespace NAMESPACE_STL;
+
+ZipStream::ZipStream() {
+    _stream = make_unique<mz_stream>();
+}
+
+// must be buried due to unique_ptr
+ZipStream::~ZipStream() {
+    close();
+}
+
+bool ZipStream::open() {
+    KVERIFY(_fileHelper.isOpen());
+    
+    // https://www.zlib.net/zlib_how.html
+    // https://www.ietf.org/rfc/rfc1952.txt
+    
+    // can also install custom allocators (allocates 256KB buffer otherwise)
+    _stream->zalloc = NULL;
+    _stream->zfree = NULL;
+    _stream->opaque = NULL;
+    
+    // Just making this double the default mz_stream buffer.
+    // Should be able to get about 2x compression (is there an estimator?).
+    // TODO: what if input is bigger than output buffer?
+    // The larger this number, the bigger the stall to compress.
+    _compressLimit = 2*256*1024;
+    
+    KVERIFY(mz_deflateInit(_stream.get(), MZ_DEFAULT_LEVEL) == MZ_OK);
+    
+    // These are all optional fields
+    enum GzipFlag : uint8_t {
+        kGzipFlagText    = 1 << 0, // text 1, or ascii/uknown 0
+        kGzipFlagCRC     = 1 << 1, // crc16 for header
+        kGzipFlagExtra   = 1 << 2,
+        kGzipFlagName    = 1 << 3, // null terminated filename
+        kGzipFlagComment = 1 << 4, // null terminated comment
+    };
+    
+    enum GzipPlatform : uint8_t {
+        kGzipPlatformFAT  = 0,
+        kGzipPlatformUnix = 3,
+        kGzipPlatformMac  = 7,
+        kGzipPlatformNT   = 11,
+        kGzipPlatformDefault = 255,
+    };
+    
+    // for deflate, but seem of little utility
+    enum GzipCompression : uint8_t {
+        kGzipCompressionUnknown  = 0,
+        kGzipCompressionSmallest = 2,
+        kGzipCompressionFastest  = 4,
+    };
+    
+    // gzip 10B header
+    const uint8_t header[10] = {
+        0x1f, 0x8b,
+        0x08, // (compression method - deflate)
+        0x00, // flags
+              // The time is in Unix format, i.e., seconds since 00:00:00 GMT, Jan.  1, 1970.
+        0x00, 0x00, 0x00, 0x00,  // TODO: timestamp mtime - start of compression or of src file
+        kGzipCompressionUnknown, // compression id
+        kGzipPlatformMac         // os platform id
+    };
+    
+    // Not writing any of the flagged fields.
+    
+    // clear the data
+    _sourceCRC32 = 0;
+    _sourceSize = 0;
+    
+    bool success = _fileHelper.write((const uint8_t*)&header, sizeof(header));
+    if (!success) {
+        KLOGE("ZipStream", "Could not write gzip header to %s", _fileHelper.filename().c_str());
+    }
+    
+    return success;
+    
+    // zlib is slightly different than gzip format (11B overhead)
+    // Could transfer zip crc32 and content over into a gzip file,
+    // but typical use case is that starting with uncompressed data.
+}
+
+void ZipStream::close() {
+    // do this to end the stream and cleanup
+    KVERIFY(mz_deflateEnd(_stream.get()) == MZ_OK);
+    
+    // data is already all written, so just need the footer
+    
+    const uint32_t footer[2] = {
+        _sourceCRC32,
+        (uint32_t)(_sourceSize & 0xFFFFFFFF)
+    };
+    
+    // gzip 8B trailer
+    // 4b crc checksum of original data (can use mz_crc32())
+    // 4b length of data (mod 0xFFFFFFFF), if bigger than 4gb then can only validate bottom 4B of length.
+    bool success = _fileHelper.write((const uint8_t*)&footer, sizeof(footer));
+    if (!success) {
+        KLOGE("ZipStream", "Could not write gzip footer to %s", _fileHelper.filename().c_str());
+    }
+}
+
+Slice ZipStream::write(const Slice& in) {
+    // If in.size is huge, then don't resize like this.
+    // But stream is assumed to take in smaller buffers
+    // and know compressed stream is smaller than input size
+    _compressed.resize(in.size());
+    
+    _stream->avail_in = in.size();
+    _stream->next_in = in.data();
+    
+    // Have to set these up, since array may have grown
+    _stream->avail_out = _compressed.size();
+    _stream->next_out = _compressed.data();
+    
+    // Hope don't need to do this in a loop
+    KVERIFY(mz_deflate(_stream.get(), MZ_FULL_FLUSH) == MZ_OK);
+    
+    // TODO: would be nice to skip crc32 work
+    _sourceSize += in.size();
+    mz_crc32(_sourceCRC32, in.data(), in.size());
+    
+    // return the compressed output
+    int numBytesCompressed = _compressed.size() - _stream->avail_out;
+    return Slice(_compressed.data(), numBytesCompressed);
+}
+
+void ZipStream::compress(const Slice& uncompressedData) {
+    Slice compressedSlice = write(uncompressedData);
+    
+    // This writes out to a fileHelper
+    _fileHelper.write(compressedSlice.data(), compressedSlice.size());
+}
+
+
+
+} // namespace kram
diff --git a/libkram/kram/KramZipStream.h b/libkram/kram/KramZipStream.h
new file mode 100644
index 00000000..9ce57459
--- /dev/null
+++ b/libkram/kram/KramZipStream.h
@@ -0,0 +1,63 @@
+#pragma once
+
+#include "KramConfig.h"
+
+#include "KramFileHelper.h"
+#include <span>
+
+struct mz_stream;
+
+namespace kram {
+using namespace NAMESPACE_STL;
+
+// This can be passed a count
+template<typename T>
+using Span = span<T, dynamic_extent>;
+using Slice = Span<uint8_t>;
+
+// Compressed stream interface.
+// Might have gzip, zlib, zip file support
+class ICompressedStream {
+public:
+    virtual ~ICompressedStream() {}
+    
+    // compress and store the data
+    virtual void compress(const Slice& uncompressedData) = 0;
+    
+    // when reached then call compress
+    virtual uint32_t compressLimit() const = 0;
+};
+
+// Compress content into a gzip (.gz) file using deflate.
+// The bytes are then written out to a provided FileHelper.
+class ZipStream : public ICompressedStream {
+public:
+    ZipStream();
+    virtual ~ZipStream();
+    
+    // writes opening header and closing footer
+    bool open();
+    void close();
+    
+    // compress and write data to helper
+    virtual void compress(const Slice& uncompressedData) override;
+    
+    // test this for when to call compress
+    virtual uint32_t compressLimit() const override {
+        return _compressLimit;
+    }
+    
+private:
+    Slice write(const Slice& in);
+    
+    vector<uint8_t> _compressed;
+    unique_ptr<mz_stream> _stream;
+    FileHelper _fileHelper;
+    
+    uint32_t _sourceCRC32 = 0;
+    size_t _sourceSize = 0;
+    uint32_t _compressLimit = 0;
+};
+
+
+} // namespace kram
diff --git a/libkram/miniz/miniz.cpp b/libkram/miniz/miniz.cpp
index a62263fc..7ecaf980 100644
--- a/libkram/miniz/miniz.cpp
+++ b/libkram/miniz/miniz.cpp
@@ -348,7 +348,7 @@ mz_ulong mz_compressBound(mz_ulong source_len)
     return mz_deflateBound(NULL, source_len);
 }
 
-typedef struct
+typedef struct inflate_state
 {
     tinfl_decompressor m_decomp;
     mz_uint m_dict_ofs, m_dict_avail, m_first_call, m_has_flushed;
@@ -3201,7 +3201,7 @@ typedef struct
     mz_uint m_element_size;
 } mz_zip_array;
 
-struct mz_zip_internal_state_tag
+typedef struct mz_zip_internal_state
 {
     mz_zip_array m_central_dir;
     mz_zip_array m_central_dir_offsets;
@@ -3223,7 +3223,7 @@ struct mz_zip_internal_state_tag
     void *m_pMem;
     size_t m_mem_size;
     size_t m_mem_capacity;
-};
+} mz_zip_internal_state;
 
 #define MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(array_ptr, element_size) (array_ptr)->m_element_size = element_size
 
diff --git a/libkram/miniz/miniz.h b/libkram/miniz/miniz.h
index 8867c3c6..c359003e 100644
--- a/libkram/miniz/miniz.h
+++ b/libkram/miniz/miniz.h
@@ -124,6 +124,9 @@
 // handling file io separately
 #define MINIZ_NO_STDIO
 
+// These defines annoying conflict with everything (f.e. compress)
+#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES
+
 #endif
 
 
@@ -291,7 +294,7 @@ enum
 struct mz_internal_state;
 
 /* Compression/decompression stream struct. */
-typedef struct mz_stream_s
+typedef struct mz_stream
 {
     const unsigned char *next_in; /* pointer to next byte to read */
     unsigned int avail_in;        /* number of bytes available at next_in */
@@ -521,7 +524,7 @@ typedef int mz_bool;
 #endif /* #ifdef MINIZ_NO_STDIO */
 
 #ifdef MINIZ_NO_TIME
-typedef struct mz_dummy_time_t_tag
+typedef struct mz_dummy_time_t
 {
     int m_dummy;
 } mz_dummy_time_t;
@@ -707,7 +710,7 @@ typedef enum {
 } tdefl_flush;
 
 /* tdefl's compression state structure. */
-typedef struct
+typedef struct tdefl_compressor
 {
     tdefl_put_buf_func_ptr m_pPut_buf_func;
     void *m_pPut_buf_user;
@@ -808,8 +811,8 @@ size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const voi
 typedef int (*tinfl_put_buf_func_ptr)(const void *pBuf, int len, void *pUser);
 int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags);
 
-struct tinfl_decompressor_tag;
-typedef struct tinfl_decompressor_tag tinfl_decompressor;
+//struct tinfl_decompressor_tag;
+typedef struct tinfl_decompressor tinfl_decompressor;
 
 #ifndef MINIZ_NO_MALLOC
 /* Allocate the tinfl_decompressor structure in C so that */
@@ -823,7 +826,7 @@ void tinfl_decompressor_free(tinfl_decompressor *pDecomp);
 #define TINFL_LZ_DICT_SIZE 32768
 
 /* Return status. */
-typedef enum {
+typedef enum tinfl_status {
     /* This flags indicates the inflator needs 1 or more input bytes to make forward progress, but the caller is indicating that no more are available. The compressed data */
     /* is probably corrupted. If you call the inflator again with more bytes it'll try to continue processing the input but this is a BAD sign (either the data is corrupted or you called it incorrectly). */
     /* If you call it again with no input you'll just get TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS again. */
@@ -880,7 +883,7 @@ enum
     TINFL_FAST_LOOKUP_SIZE = 1 << TINFL_FAST_LOOKUP_BITS
 };
 
-typedef struct
+typedef struct tinfl_huff_table
 {
     mz_uint8 m_code_size[TINFL_MAX_HUFF_SYMBOLS_0];
     mz_int16 m_look_up[TINFL_FAST_LOOKUP_SIZE], m_tree[TINFL_MAX_HUFF_SYMBOLS_0 * 2];
@@ -900,14 +903,14 @@ typedef mz_uint32 tinfl_bit_buf_t;
 #define TINFL_BITBUF_SIZE (32)
 #endif
 
-struct tinfl_decompressor_tag
+typedef struct tinfl_decompressor
 {
     mz_uint32 m_state, m_num_bits, m_zhdr0, m_zhdr1, m_z_adler32, m_final, m_type, m_check_adler32, m_dist, m_counter, m_num_extra, m_table_sizes[TINFL_MAX_HUFF_TABLES];
     tinfl_bit_buf_t m_bit_buf;
     size_t m_dist_from_out_buf_start;
     tinfl_huff_table m_tables[TINFL_MAX_HUFF_TABLES];
     mz_uint8 m_raw_header[4], m_len_codes[TINFL_MAX_HUFF_SYMBOLS_0 + TINFL_MAX_HUFF_SYMBOLS_1 + 137];
-};
+} tinfl_decompressor;
 
 #ifdef __cplusplus
 }
@@ -932,7 +935,7 @@ enum
     MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE = 512
 };
 
-typedef struct
+typedef struct mz_zip_archive_file_stat
 {
     /* Central directory file index. */
     mz_uint32 m_file_index;
@@ -992,8 +995,8 @@ typedef size_t (*mz_file_read_func)(void *pOpaque, mz_uint64 file_ofs, void *pBu
 typedef size_t (*mz_file_write_func)(void *pOpaque, mz_uint64 file_ofs, const void *pBuf, size_t n);
 typedef mz_bool (*mz_file_needs_keepalive)(void *pOpaque);
 
-struct mz_zip_internal_state_tag;
-typedef struct mz_zip_internal_state_tag mz_zip_internal_state;
+//struct mz_zip_internal_state_tag;
+typedef struct mz_zip_internal_state mz_zip_internal_state;
 
 typedef enum {
     MZ_ZIP_MODE_INVALID = 0,
@@ -1088,7 +1091,7 @@ typedef struct mz_zip_archive
 
 } mz_zip_archive;
 
-typedef struct
+typedef struct mz_zip_reader_extract_iter_state
 {
     mz_zip_archive *pZip;
     mz_uint flags;
@@ -1109,7 +1112,7 @@ typedef struct
 } mz_zip_reader_extract_iter_state;
 
 // this is purely for looking at struct in debugger
-typedef struct {
+typedef struct mz_local_file_header {
     mz_uint32 local_file_signature; // 0x04034b50 read as LE number
     mz_uint16 version;
     mz_uint16 bit_flags;
diff --git a/libkram/zstd/zstd.cpp b/libkram/zstd/zstd.cpp
index fdd1ae8d..2ac59808 100644
--- a/libkram/zstd/zstd.cpp
+++ b/libkram/zstd/zstd.cpp
@@ -1800,22 +1800,22 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
         switch(srcSize)
         {
         case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
-                /* fall-through */
+                [[fallthrough]]; /* fall-through */
 
         case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
-                /* fall-through */
+                [[fallthrough]]; /* fall-through */
 
         case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
-                /* fall-through */
+                [[fallthrough]]; /* fall-through */
 
         case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
-                /* fall-through */
+                [[fallthrough]]; /* fall-through */
 
         case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
-                /* fall-through */
+                [[fallthrough]]; /* fall-through */
 
         case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) <<  8;
-                /* fall-through */
+                [[fallthrough]]; /* fall-through */
 
         default: break;
         }
@@ -10218,13 +10218,13 @@ HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
     {
         case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable);
                  HUF_FLUSHBITS_2(&bitC);
-		 /* fall-through */
+            [[fallthrough]]; /* fall-through */
         case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable);
                  HUF_FLUSHBITS_1(&bitC);
-		 /* fall-through */
+            [[fallthrough]]; /* fall-through */
         case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable);
                  HUF_FLUSHBITS(&bitC);
-		 /* fall-through */
+            [[fallthrough]]; /* fall-through */
         case 0 : /* fall-through */
         default: break;
     }
@@ -11918,8 +11918,8 @@ MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParam
     case ZSTD_lcm_uncompressed:
         return 1;
     default:
-        assert(0 /* impossible: pre-validated */);
-        /* fall-through */
+        //assert(0 /* impossible: pre-validated */);
+        //[[fallthrough]]; /* fall-through */
     case ZSTD_lcm_auto:
         return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
     }
@@ -19991,7 +19991,7 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
                 zcs->outBuffFlushedSize = 0;
                 zcs->streamStage = zcss_flush; /* pass-through to flush stage */
             }
-	    /* fall-through */
+                [[fallthrough]]; /* fall-through */
         case zcss_flush:
             DEBUGLOG(5, "flush stage");
             assert(zcs->appliedParams.outBufferMode == ZSTD_bm_buffered);
@@ -32478,8 +32478,8 @@ static ZSTD_DDict const* ZSTD_getDDict(ZSTD_DCtx* dctx)
 {
     switch (dctx->dictUses) {
     default:
-        assert(0 /* Impossible */);
-        /* fall-through */
+        //assert(0 /* Impossible */);
+        //[[fallthrough]]; /* fall-through */
     case ZSTD_dont_use:
         ZSTD_clearDict(dctx);
         return NULL;
@@ -33370,7 +33370,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
             zds->legacyVersion = 0;
             zds->hostageByte = 0;
             zds->expectedOutBuffer = *output;
-            /* fall-through */
+            [[fallthrough]]; /* fall-through */
 
         case zdss_loadHeader :
             DEBUGLOG(5, "stage zdss_loadHeader (srcSize : %u)", (U32)(iend - ip));
@@ -33508,7 +33508,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
                         zds->outBuffSize = neededOutBuffSize;
             }   }   }
             zds->streamStage = zdss_read;
-            /* fall-through */
+            [[fallthrough]]; /* fall-through */
 
         case zdss_read:
             DEBUGLOG(5, "stage zdss_read");
@@ -33527,7 +33527,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
             }   }
             if (ip==iend) { someMoreWork = 0; break; }   /* no more input */
             zds->streamStage = zdss_load;
-            /* fall-through */
+            [[fallthrough]]; /* fall-through */
 
         case zdss_load:
             {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds);
@@ -33729,7 +33729,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
         case set_repeat:
             DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
             RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, "");
-            /* fall-through */
+            [[fallthrough]]; /* fall-through */
 
         case set_compressed:
             RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3");
diff --git a/libkram/zstd/zstddeclib.cpp b/libkram/zstd/zstddeclib.cpp
index c4f292fb..de92b84c 100644
--- a/libkram/zstd/zstddeclib.cpp
+++ b/libkram/zstd/zstddeclib.cpp
@@ -1795,22 +1795,22 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
         switch(srcSize)
         {
         case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
-                /* fall-through */
+                [[fallthrough]];/* fall-through */
 
         case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
-                /* fall-through */
+                [[fallthrough]];/* fall-through */
 
         case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
-                /* fall-through */
+                [[fallthrough]]; /* fall-through */
 
         case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
-                /* fall-through */
+                [[fallthrough]];/* fall-through */
 
         case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
-                /* fall-through */
+                [[fallthrough]];/* fall-through */
 
         case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) <<  8;
-                /* fall-through */
+                [[fallthrough]];/* fall-through */
 
         default: break;
         }
@@ -12433,8 +12433,8 @@ static ZSTD_DDict const* ZSTD_getDDict(ZSTD_DCtx* dctx)
 {
     switch (dctx->dictUses) {
     default:
-        assert(0 /* Impossible */);
-        /* fall-through */
+        //assert(0 /* Impossible */);
+        //[[fallthrough]]; /* fall-through */
     case ZSTD_dont_use:
         ZSTD_clearDict(dctx);
         return NULL;
@@ -13325,8 +13325,8 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
             zds->legacyVersion = 0;
             zds->hostageByte = 0;
             zds->expectedOutBuffer = *output;
-            /* fall-through */
-
+            [[fallthrough]]; /* fall-through */
+            
         case zdss_loadHeader :
             DEBUGLOG(5, "stage zdss_loadHeader (srcSize : %u)", (U32)(iend - ip));
 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
@@ -13463,7 +13463,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
                         zds->outBuffSize = neededOutBuffSize;
             }   }   }
             zds->streamStage = zdss_read;
-            /* fall-through */
+            [[fallthrough]]; /* fall-through */
 
         case zdss_read:
             DEBUGLOG(5, "stage zdss_read");
@@ -13482,7 +13482,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
             }   }
             if (ip==iend) { someMoreWork = 0; break; }   /* no more input */
             zds->streamStage = zdss_load;
-            /* fall-through */
+            [[fallthrough]]; /* fall-through */
 
         case zdss_load:
             {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds);
@@ -13684,7 +13684,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
         case set_repeat:
             DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
             RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, "");
-            /* fall-through */
+            [[fallthrough]]; /* fall-through */
 
         case set_compressed:
             RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3");

From 6dce8a8ad772d067feb20e7c6299697c0e451653 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 2 Sep 2024 10:51:15 -0700
Subject: [PATCH 687/901] kram - build fixes for fallthrough

---
 libkram/kram/KramLog.h      |  2 +-
 libkram/zstd/zstd.cpp       | 12 ++++++------
 libkram/zstd/zstddeclib.cpp |  6 +++---
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/libkram/kram/KramLog.h b/libkram/kram/KramLog.h
index c712bfb5..1ac203d6 100644
--- a/libkram/kram/KramLog.h
+++ b/libkram/kram/KramLog.h
@@ -40,7 +40,7 @@ extern int32_t logMessage(const char* group, int32_t logLevel,
 #define KVERIFY(x) KASSERT(x)
 #else
 #define KASSERT(x)
-#define KVERIFY(x) (x)
+#define KVERIFY(x) (void)(x)
 #endif
 
 // save code space, since file/func aren't output for debug/info
diff --git a/libkram/zstd/zstd.cpp b/libkram/zstd/zstd.cpp
index 2ac59808..d84df3af 100644
--- a/libkram/zstd/zstd.cpp
+++ b/libkram/zstd/zstd.cpp
@@ -18579,7 +18579,7 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
     if (!singleSegment) op[pos++] = windowLogByte;
     switch(dictIDSizeCode)
     {
-        default:  assert(0); /* impossible */
+        default:  assert(0); /* impossible */ [[fallthrough]];
         case 0 : break;
         case 1 : op[pos] = (BYTE)(dictID); pos++; break;
         case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break;
@@ -18587,7 +18587,7 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
     }
     switch(fcsCode)
     {
-        default:  assert(0); /* impossible */
+        default:  assert(0); /* impossible */ [[fallthrough]];
         case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break;
         case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break;
         case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break;
@@ -23099,7 +23099,7 @@ ZSTD_VecMask_rotateRight(ZSTD_VecMask mask, U32 const rotation, U32 const totalB
     return mask;
   switch (totalBits) {
     default:
-      assert(0);
+          assert(0); [[fallthrough]];
     case 16:
       return (mask >> rotation) | (U16)(mask << (16 - rotation));
     case 32:
@@ -31936,7 +31936,7 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s
         }
         switch(dictIDSizeCode)
         {
-            default: assert(0);  /* impossible */
+            default: assert(0);  /* impossible */ [[fallthrough]];
             case 0 : break;
             case 1 : dictID = ip[pos]; pos++; break;
             case 2 : dictID = MEM_readLE16(ip+pos); pos+=2; break;
@@ -31944,7 +31944,7 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s
         }
         switch(fcsID)
         {
-            default: assert(0);  /* impossible */
+            default: assert(0);  /* impossible */ [[fallthrough]];
             case 0 : if (singleSegment) frameContentSize = ip[pos]; break;
             case 1 : frameContentSize = MEM_readLE16(ip+pos)+256; break;
             case 2 : frameContentSize = MEM_readLE32(ip+pos); break;
@@ -32542,7 +32542,7 @@ ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) {
     switch(dctx->stage)
     {
     default:   /* should not happen */
-        assert(0);
+            assert(0); [[fallthrough]];
     case ZSTDds_getFrameHeaderSize:
     case ZSTDds_decodeFrameHeader:
         return ZSTDnit_frameHeader;
diff --git a/libkram/zstd/zstddeclib.cpp b/libkram/zstd/zstddeclib.cpp
index de92b84c..62d4b7d6 100644
--- a/libkram/zstd/zstddeclib.cpp
+++ b/libkram/zstd/zstddeclib.cpp
@@ -11891,7 +11891,7 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s
         }
         switch(dictIDSizeCode)
         {
-            default: assert(0);  /* impossible */
+            default: assert(0);  /* impossible */ [[fallthrough]];
             case 0 : break;
             case 1 : dictID = ip[pos]; pos++; break;
             case 2 : dictID = MEM_readLE16(ip+pos); pos+=2; break;
@@ -11899,7 +11899,7 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s
         }
         switch(fcsID)
         {
-            default: assert(0);  /* impossible */
+            default: assert(0);  /* impossible */ [[fallthrough]];
             case 0 : if (singleSegment) frameContentSize = ip[pos]; break;
             case 1 : frameContentSize = MEM_readLE16(ip+pos)+256; break;
             case 2 : frameContentSize = MEM_readLE32(ip+pos); break;
@@ -12497,7 +12497,7 @@ ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) {
     switch(dctx->stage)
     {
     default:   /* should not happen */
-        assert(0);
+            assert(0); [[fallthrough]];
     case ZSTDds_getFrameHeaderSize:
     case ZSTDds_decodeFrameHeader:
         return ZSTDnit_frameHeader;

From b6ba961579107cab8653dc2f31dbcfa9b4f4faa6 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 2 Sep 2024 11:21:04 -0700
Subject: [PATCH 688/901] kram - sys/cdefs.h doesn't exist on Win

---
 libkram/kram/KramLog.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/libkram/kram/KramLog.h b/libkram/kram/KramLog.h
index 1ac203d6..7e91608a 100644
--- a/libkram/kram/KramLog.h
+++ b/libkram/kram/KramLog.h
@@ -6,8 +6,12 @@
 
 #include <cassert>
 
-// TODO: fix for other platforms
+// This has __printflike on macOS/Linux
+#if KRAM_WIN
+// TODO: fix for Win
+#else
 #include <sys/cdefs.h>
+#endif
 //#include <string>
 
 // #include "KramConfig.h"

From f8ebc93960242f367e733171bde6eff7268f2e97 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 6 Sep 2024 19:44:28 -0700
Subject: [PATCH 689/901] kram - add ZipStream and Perf

These are a gzip stream that can compress data passed to it.  Perfetto can take in gz and zip compressed files.  Gzip has a 10B header + 8B footer, and this uses miniz deflate calls.  Had to disable the default zlib header/footer, but would be interesting to support that too.  Use this to unpack and keep the original file, since this is compress only right now.   gzunzip -k file.gz

Needed to expose some of the thread calls for Perf to obtain the id and name.
---
 kramv/KramViewerBase.cpp        |  62 +++++--
 libkram/json11/json11.cpp       |   4 +-
 libkram/kram/Kram.cpp           |   2 +-
 libkram/kram/KramFileHelper.cpp |  10 +-
 libkram/kram/KramFileHelper.h   |   2 +-
 libkram/kram/KramLog.cpp        |   5 +-
 libkram/kram/KramTimer.cpp      | 316 +++++++++++++++++++++++++++-----
 libkram/kram/KramTimer.h        |  82 ++++++++-
 libkram/kram/KramZipStream.cpp  |  81 +++++---
 libkram/kram/KramZipStream.h    |  15 +-
 libkram/kram/TaskSystem.cpp     |  71 +++----
 libkram/kram/TaskSystem.h       |  15 +-
 12 files changed, 515 insertions(+), 150 deletions(-)

diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index 572d6650..b7a66b2e 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -1179,7 +1179,21 @@ bool Data::isArchive() const
 bool Data::loadFile()
 {
     if (isArchive()) {
-        return loadFileFromArchive();
+        // This test perf layer and the ZipStream
+        Perf* perf = nullptr; // Perf::instance();
+        
+        // TODO: have to have permision to write file
+        if (perf) {
+            if (!perf->start("/Users/Alec/Library/Containers/com.hialec.kramv/Data/Traces/"
+                             "load.perftrace.gz"))
+                perf = nullptr;
+        }
+        bool success = loadFileFromArchive();
+        
+        if (perf)
+            perf->stop();
+        
+        return success;
     }
     
     // now lookup the filename and data at that entry
@@ -1379,6 +1393,8 @@ bool Data::loadFileFromArchive()
         return false;
     }
     
+    KPERFT("loadFileFromArchive");
+    
     const uint8_t* imageData = nullptr;
     uint64_t imageDataLength = 0;
 
@@ -1391,6 +1407,8 @@ bool Data::loadFileFromArchive()
     vector<uint8_t> bufferForImage;
     
     if (isFileUncompressed) {
+        KPERFT("ZipExtractRaw");
+        
         // search for main file - can be albedo or normal
         if (!zip.extractRaw(filename, &imageData, imageDataLength)) {
             return false;
@@ -1398,6 +1416,8 @@ bool Data::loadFileFromArchive()
 
     }
     else {
+        KPERFT("ZipExtract");
+        
         // need to decompress first
         if (!zip.extract(filename, bufferForImage)) {
             return false;
@@ -1430,10 +1450,14 @@ bool Data::loadFileFromArchive()
                 bool isNormalUncompressed = normalEntry->compressedSize == entry->uncompressedSize;
                 
                 if (isNormalUncompressed) {
+                    KPERFT("ZipExtractRawNormal");
+                    
                     zip.extractRaw(name.c_str(), &imageNormalData,
                                    imageNormalDataLength);
                 }
                 else {
+                    KPERFT("ZipExtractNormal");
+                    
                     // need to decompress first
                     if (!zip.extract(filename, bufferForNormal)) {
                         return false;
@@ -1459,29 +1483,43 @@ bool Data::loadFileFromArchive()
 
     // TODO: do imageDiff here?
     
+    KPERFT_START(1, "KTXOpen");
+    
     if (!imageDataKTX.open(imageData, imageDataLength, image)) {
         return false;
     }
 
-    if (hasNormal && imageNormalDataKTX.open(
-                         imageNormalData, imageNormalDataLength, imageNormal)) {
-        // shaders only pull from albedo + normal on these texture types
-        if (imageNormal.textureType == image.textureType &&
-            (imageNormal.textureType == MyMTLTextureType2D ||
-             imageNormal.textureType == MyMTLTextureType2DArray)) {
-            // hasNormal = true;
-        }
-        else {
-            hasNormal = false;
-        }
+    KPERFT_STOP(1);
+   
+    
+    if (hasNormal) {
+        KPERFT("KTXOpenNormal");
+       
+        if (imageNormalDataKTX.open(
+            imageNormalData, imageNormalDataLength, imageNormal)) {
+                // shaders only pull from albedo + normal on these texture types
+                if (imageNormal.textureType == image.textureType &&
+                    (imageNormal.textureType == MyMTLTextureType2D ||
+                     imageNormal.textureType == MyMTLTextureType2DArray)) {
+                    // hasNormal = true;
+                }
+                else {
+                    hasNormal = false;
+                }
+            }
     }
 
+    
     //---------------------------------
     
+    KPERFT_START(3, "KTXLoad");
+   
     if (!_delegate.loadTextureFromImage(fullFilename.c_str(), (double)timestamp, image, hasNormal ? &imageNormal : nullptr, nullptr, true)) {
         return false;
     }
 
+    KPERFT_STOP(3);
+   
     //---------------------------------
     
    // NSArray<NSURL*>* urls_ = (NSArray<NSURL*>*)_delegate._urls;
diff --git a/libkram/json11/json11.cpp b/libkram/json11/json11.cpp
index 8e6dea8f..9f5dbba9 100644
--- a/libkram/json11/json11.cpp
+++ b/libkram/json11/json11.cpp
@@ -284,7 +284,7 @@ void JsonWriter::writeFormat(const char* fmt, ...) {
     if (_stream && _out->size() >= _stream->compressLimit())
     {
         // flush the output to a compression stream
-        _stream->compress(Slice((uint8_t*)_out->data(), _out->size())); // losing const
+        _stream->compress(Slice((uint8_t*)_out->data(), _out->size()), false); // losing const
         
         // reset the buffer
         _out->clear();
@@ -295,7 +295,7 @@ JsonWriter::~JsonWriter()
 {
     if (_stream)  {
         if (!_out->empty()) {
-            _stream->compress(Slice((uint8_t*)_out->data(), _out->size())); // losing const
+            _stream->compress(Slice((uint8_t*)_out->data(), _out->size()), true); // losing const
         }
     }
 }
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 5727f0d1..0a42ed05 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -919,7 +919,7 @@ bool SavePNG(Image& image, const char* filename)
 
 bool SetupTmpFile(FileHelper& tmpFileHelper, const char* suffix)
 {
-    return tmpFileHelper.openTemporaryFile(suffix, "w+b");
+    return tmpFileHelper.openTemporaryFile("kramimage-", suffix, "w+b");
 }
 
 bool SetupSourceImage(const string& srcFilename, Image& sourceImage,
diff --git a/libkram/kram/KramFileHelper.cpp b/libkram/kram/KramFileHelper.cpp
index a1b87126..ca3608b2 100644
--- a/libkram/kram/KramFileHelper.cpp
+++ b/libkram/kram/KramFileHelper.cpp
@@ -65,7 +65,7 @@ static FILE* fopen_mkdir(const char* path, const char* mode)
 FileHelper::~FileHelper() { close(); }
 
 // no current extension
-bool FileHelper::openTemporaryFile(const char* suffix, const char* access)
+bool FileHelper::openTemporaryFile(const char* prefix, const char* suffix, const char* access)
 {
     close();
 
@@ -82,7 +82,7 @@ bool FileHelper::openTemporaryFile(const char* suffix, const char* access)
     int keep = 0;
 
     // Note: can't pass . either, always opened as rw
-    _fp = tmpfileplus("/tmp/", "kramimage-", suffix, &pathname, keep);
+    _fp = tmpfileplus("/tmp/", prefix, suffix, &pathname, keep);
     if (!_fp) {
         return false;
     }
@@ -139,8 +139,10 @@ size_t FileHelper::pagesize()
 
 bool FileHelper::copyTemporaryFileTo(const char* dstFilename)
 {
-    if (!_fp) return false;
-    if (_filename.empty()) return false;
+    if (!_fp) 
+        return false;
+    if (_filename.empty()) 
+        return false;
 
     // since we're not closing, need to flush output
     fflush(_fp);
diff --git a/libkram/kram/KramFileHelper.h b/libkram/kram/KramFileHelper.h
index f468acd0..23d98e1c 100644
--- a/libkram/kram/KramFileHelper.h
+++ b/libkram/kram/KramFileHelper.h
@@ -29,7 +29,7 @@ class FileHelper {
     bool open(const char* filename, const char* access);
 
     // this file is auto-deleted by close(), is that okay with renameFile use?
-    bool openTemporaryFile(const char* suffix, const char* access);
+    bool openTemporaryFile(const char* prefix, const char* suffix, const char* access);
 
     // mainly for tmp files, file can be closed, but this does rename of tmp file.
     // may fail if tmp file and dst are different volumes.
diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp
index c3227d91..7b09c342 100644
--- a/libkram/kram/KramLog.cpp
+++ b/libkram/kram/KramLog.cpp
@@ -43,13 +43,10 @@
 #include "KramFmt.h"
 #include "KramTimer.h"
 #include "format.h" // really fmt/format.h
+#include "TaskSystem.h"
 
 namespace kram {
 
-// Pulled in from TaskSystem.cpp
-constexpr const uint32_t kMaxThreadName = 32;
-extern void getCurrentThreadName(char name[kMaxThreadName]);
-
 using mymutex = std::recursive_mutex;
 using mylock = std::unique_lock<mymutex>;
 
diff --git a/libkram/kram/KramTimer.cpp b/libkram/kram/KramTimer.cpp
index b00b3a7a..c8f70a72 100644
--- a/libkram/kram/KramTimer.cpp
+++ b/libkram/kram/KramTimer.cpp
@@ -3,8 +3,7 @@
 // in all copies or substantial portions of the Software.
 
 #include "KramTimer.h"
-
-#if 1
+#include "TaskSystem.h"
 
 #if KRAM_WIN
 #include <windows.h>
@@ -12,6 +11,8 @@
 #include <mach/mach_time.h>
 #endif
 
+#define nl '\n'
+
 namespace kram {
 
 using namespace NAMESPACE_STL;
@@ -70,54 +71,285 @@ double currentTimestamp()
     return (double)delta * gQueryPeriod;
 }
 
-} // namespace kram
+//-------------------
 
-#else
-
-/*
-// see sources here
-// https://codebrowser.dev/llvm/libcxx/src/chrono.cpp.html
-// but steady on macOS uses clock_gettime(CLOCK_MONOTONIC_RAW, &tp)
-//   which should be mach_continuous_time()
-//
-// also see sources here for timers
-// https://opensource.apple.com/source/Libc/Libc-1158.1.2/gen/clock_gettime.c.auto.html
-// mach_continuous_time() vs. mach_absolute_time()
-// https://developer.apple.com/library/archive/qa/qa1398/_index.html
- 
-#if USE_EASTL
-#include "EASTL/chrono.h"
-#else
-#include <chrono>
-#endif
- 
-namespace kram {
+// TODO: also look into the Perfetto binary format and library/api.
+// https://perfetto.dev/docs/instrumentation/tracing-sdk
 
-using namespace NAMESPACE_STL;
+// TODO: escape strings, but it's just more work
+Perf* Perf::_instance = new Perf();
 
-#if USE_EASTL
-using namespace eastl::chrono;
-#else
-using namespace std::chrono;
-#endif
+thread_local uint32_t gPerfStackDepth = 0;
 
-// high-res  (defaults to steady or system in libcxx)
-//using myclock = high_resolution_clock;
-//using myclock = system_clock;
-using myclock = steady_clock;
+PerfScope::PerfScope(const char* name_)
+: name(name_), time(currentTimestamp())
+{
+    gPerfStackDepth++;
+}
 
-static const myclock::time_point gStartTime = myclock::now();
 
-double currentTimestamp()
+void PerfScope::close()
 {
-    auto t = myclock::now();
-    duration<double, std::milli> timeSpan = t - gStartTime;
-    double count = (double)timeSpan.count() * 1e-3;
-    return count;
+    if (time != 0.0) {
+        --gPerfStackDepth;
+        
+        Perf::instance()->addTimer(name, time, currentTimestamp() - time);
+        time = 0.0;
+    }
 }
 
-}  // namespace kram
-*/
+void addPerfCounter(const char* name, int64_t value)
+{
+    Perf::instance()->addCounter(name, currentTimestamp(), value);
+}
 
-#endif
+//---------------
+
+bool Perf::start(const char* filename, uint32_t maxStackDepth)
+{
+    mylock lock(_mutex);
+    
+    if (isRunning()) {
+        KLOGW("Perf", "start already called");
+        return true;
+    }
+    
+    _filename = filename;
+    _maxStackDepth = maxStackDepth;
+    
+    // test the compressor
+    bool testZipStream = false;
+    if (testZipStream) {
+        string filename2 = filename;
+        filename2 += ".txt";
+        FileHelper fileHelper;
+        ZipStream stream;
+        if (fileHelper.open(filename2.c_str(), "w+b")) {
+            const char* testStr = 
+R"(id,name
+1,TEST_1
+2,TEST_2
+3,TEST_3
+4,TEST_4
+)";
+            
+            if (stream.open(&fileHelper, false)) {
+                stream.compress(Slice((uint8_t*)testStr, strlen(testStr)), true);
+                stream.close();
+            }
+            fileHelper.close();
+        }
+    }
+    
+    // write json as binary, so win doesn't replace \n with \r\n
+    if (!_fileHelper.openTemporaryFile("perf-", ".perftrace.gz", "w+b")) {
+        KLOGW("Perf", "Could not open oerf temp file");
+        return false;
+    }
+    
+    bool isUncompressed = false; // can test, extension still .gz though
+    if (!_stream.open(&_fileHelper, isUncompressed)) {
+        _fileHelper.close();
+        return false;
+    }
+    
+    // TODO: store _startTime in json starting params, also the
+    _startTime = currentTimestamp();
+    
+    _threadIdToTidMap.clear();
+    _threadNames.clear();
+   
+    string buf;
+    
+    // displya timeUnit must be ns (nanos) or ms (micros), default is ms
+    // "displayTimeUnit": "ns"
+    sprintf(buf, R"({"traceEvents":[%c)", nl);
+    write(buf);
+    
+    // can store file info here, only using one pid
+    uint32_t processId = 0;
+    const char* processName = "kram"; // TODO: add platform, config, use filename instead?
+    sprintf(buf, R"({"name":"process_name","ph":"M","pid":%u,"args":{"name":"%s"}},%c)",
+           processId, processName, nl);
+    write(buf);
+    
+    return true;
+}
+
+void Perf::stop() 
+{
+    mylock lock(_mutex);
+    
+    if (!isRunning()) {
+        KLOGW("Perf", "stop called, but never started");
+        return;
+    }
+    
+    // write end of array and object, and force flush
+    bool forceFlush = true;
+    string buf;
+    sprintf(buf, R"(]}%c)", nl);
+    write(buf, forceFlush);
+    
+    _stream.close();
+    
+    bool success = _fileHelper.copyTemporaryFileTo(_filename.c_str());
+    if (!success) {
+        KLOGW("Perf", "Couldn't move temp file");
+    }
+    
+    _fileHelper.close();
+    
+    // TODO: now open the file in kram-profile by opening it
+    // okay to use system, but it uses a global mutex on macOS
+    // This will be a .gz file, so not sure that kram-profile can respond
+    //
+    // sprintf(buf, "open %s", _filename.c_str());
+    // system(buf.c_str());
+    
+    _startTime = 0.0;
+}
+
+void Perf::write(const string& str, bool forceFlush) 
+{
+    mylock lock(_mutex);
+    
+    _buffer += str;
+    
+    if (forceFlush || _buffer.size() >= _stream.compressLimit()) {
+        _stream.compress(Slice((uint8_t*)_buffer.data(), _buffer.size()), forceFlush);
+        _buffer.clear();
+    }
+}
+
+uint32_t Perf::addThreadIfNeeded() 
+{
+    auto threadId = getCurrentThread();
+    
+    // don't need this, it's already locked by caller
+    //mylock lock(_mutex);
+    
+    auto it = _threadIdToTidMap.find(threadId);
+    if (it != _threadIdToTidMap.end()) {
+        return it->second;
+    }
+    
+    // add the new name and tid
+    char threadName[kMaxThreadName];
+    getThreadName(threadId, threadName);
+    
+    // don't really need to store name if not sorting, just need tid counter
+    uint32_t tid = _threadNames.size();
+    _threadNames.push_back(threadName);
+    
+    _threadIdToTidMap.insert(make_pair(threadId, tid));
+    
+    // this assumes the map is wiped each time
+    string buf;
+    sprintf(buf, R"({"name":"thread_name","ph":"M","tid":%u,"args":{"name":"%s"}},%c)",
+           tid, threadName, nl);
+    write(buf);
+    
+    return tid;
+}
+
+void Perf::addTimer(const char* name, double time, double elapsed)
+{
+    if (!isRunning()) {
+        return;
+    }
+    
+    // About Perfetto ts sorting.  This is now fixed to sort duration.
+    // https://github.com/google/perfetto/issues/878
+    
+    if (_maxStackDepth && gPerfStackDepth >= _maxStackDepth)
+        return;
+    
+    // zero out the time, so times are smaller to store
+    time -= _startTime;
+    
+    // problem with duration is that existing events can overlap the start time
+    bool isClamped = time < 0.0;
+    if (isClamped) {
+        elapsed += time;
+        time = 0.0;
+    }
+    if (elapsed <= 0.0)
+        return;
+    
+    // Catapult timings are suppoed to be in micros.
+    // Convert seconds to micros (as integer), lose nanos.  Note that
+    // Perfetto will convert all values to nanos anyways and lacks a ms format.
+    // Raw means nanos, and Seconds is too small of a fraction.
+    // Also printf does IEEE round to nearest even.
+    uint32_t timeDigits = 0; // or 3 for nanos
+    time *= 1e6;
+    elapsed *= 1e6;
+
+    // TODO: worth aliasing the strings, just replacing one string with another
+    // but less chars for id.
+    
+    // now lock across isRunning, addThread, and write call
+    mylock lock(_mutex);
+    if (!isRunning()) {
+        return;
+    }
+    // This requires a lock, so buffering the events would help
+    // what about sorting the names instead of first-come, first-serve?
+    uint32_t tid = addThreadIfNeeded();
+
+    // write out the event in micros, default is displayed in ms
+    string buf;
+    sprintf(buf, R"({"name":"%s","ph":"X","tid":%d,"ts":%.*f,"dur":%.*f},%c)",
+            name, tid, timeDigits, time, timeDigits, elapsed, nl);
+    write(buf);
+}
+
+// Can also use begin/end but these aren't a atomic
+//  R"({"name":"%s","ph":"B","tid":%d,"ts":%.0f},%c)",
+//  R"({"ph":"E","tid":%d,"ts":%.0f},%c)",
+
+void Perf::addCounter(const char* name, double time, int64_t amount) 
+{
+    if (!isRunning()) {
+        return;
+    }
+    
+    // also reject nested counters off perf stack depth
+    if (_maxStackDepth && gPerfStackDepth >= _maxStackDepth)
+        return;
+    
+    // zero out the time, so times are smaller to store
+    time -= _startTime;
+    
+    // problem with duration is that events can occur outside the start time
+    if (time < 0.0) {
+        return;
+    }
+   
+    // Catapult timings are suppoed to be in micros.
+    // Convert seconds to micros (as integer), lose nanos.  Note that
+    // Perfetto will convert all values to nanos anyways.
+    // Raw means nanos, and Seconds is too small of a fraction.
+    // Also printf does IEEE round to nearest even.
+    // https://github.com/google/perfetto/issues/879
+    
+    time *= 1e6;
+    uint32_t timeDigits = 0; // or 3 for nanos
+    
+    // TODO: worth aliasing the strings?, just replacing one string with another
+    // but less chars for id.
+    
+    // Note: can also have multiple named values passed in args
+    // Note: unclear if Perfetto can handle negative values
+    
+    // write out the event in micros, default is displayed in ms
+    // lld not portable to Win
+    string buf;
+    sprintf(buf, R"({"name":"%s","ph":"C","ts":%.*f,"args":{"v":%lld}},%c)",
+            name, timeDigits, time, amount, nl);
+    write(buf);
+}
+
+} // namespace kram
 
diff --git a/libkram/kram/KramTimer.h b/libkram/kram/KramTimer.h
index 326aac76..3ad3d54d 100644
--- a/libkram/kram/KramTimer.h
+++ b/libkram/kram/KramTimer.h
@@ -6,6 +6,9 @@
 
 #include <cassert>
 
+#include "KramFileHelper.h"
+#include "KramZipStream.h"
+
 //#include "KramConfig.h"
 
 namespace kram {
@@ -26,13 +29,13 @@ class Timer {
         assert(_timeElapsed >= 0.0);
         _timeElapsed -= currentTimestamp();
     }
-
+    
     void stop()
     {
         assert(_timeElapsed < 0.0);
         _timeElapsed += currentTimestamp();
     }
-
+    
     double timeElapsed() const
     {
         double time = _timeElapsed;
@@ -48,7 +51,7 @@ class Timer {
     }
     
     bool isStopped() const { return _timeElapsed < 0.0; }
-
+    
 private:
     double _timeElapsed = 0.0;
 };
@@ -63,12 +66,12 @@ class TimerScope {
             _timer->start();
         }
     }
-
+    
     ~TimerScope()
     {
         close();
     }
-
+    
     void close()
     {
         if (_timer) {
@@ -76,9 +79,76 @@ class TimerScope {
             _timer = nullptr;
         }
     }
-
+    
 private:
     Timer* _timer = nullptr;
 };
+    
+// This implements PERF macros, sending timing data to kram-profile, perfetto, and/or Tracy.
+class Perf {
+public:
+    bool isRunning() const { return _startTime != 0.0; }
+    
+    bool start(const char* filename, uint32_t maxStackDepth = 0);
+    void stop();
+    
+    void addTimer(const char* name, double time, double elapsed);
+    void addCounter(const char* name, double time, int64_t value);
+    
+    // singleton getter, but really want to split Perf from macros.
+    static Perf* instance() { return _instance; }
+    
+    // on it's own track/tid, add a frame vsync marker
+    // TODO: void addFrameMarker(double time);
+    
+private:
+    void write(const string& str, bool forceFlush = false);
+    uint32_t addThreadIfNeeded();
+    
+    ZipStream  _stream;
+    FileHelper _fileHelper;
+    double _startTime = 0.0;
+    string _filename;
+    
+    using mymutex = recursive_mutex;
+    using mylock = unique_lock<mymutex>;
+
+    mymutex _mutex;
+    unordered_map<thread::native_handle_type, uint32_t> _threadIdToTidMap;
+    vector<string> _threadNames;
+    string _buffer;
+    uint32_t _maxStackDepth = 0; // 0 means no limit
+    
+    static Perf* _instance;
+};
+
+class PerfScope {
+public:
+    // This means that the timers are running even when not profiling
+    PerfScope(const char* name_);
+    ~PerfScope() { close(); }
+    
+    void close();
+    
+private:
+    const char* name;
+    double time;
+};
+
+// This is here to split off Perf
+void addPerfCounter(const char* name, int64_t value);
+
+#define KPERF_SCOPENAME2(a,b) scope ## b
+#define KPERF_SCOPENAME(b) KPERF_SCOPENAME2(scope,b)
+
+#define KPERFT(x) PerfScope KPERF_SCOPENAME(__COUNTER__)(x)
+
+#define KPERFT_START(num,x) PerfScope KPERF_SCOPENAME(num)(x)
+#define KPERFT_STOP(num) KPERF_SCOPENAME(num).close()
+
+#define KPERFC(x, value) addPerfCounter(x, value)
 
 }  // namespace kram
+
+
+   
diff --git a/libkram/kram/KramZipStream.cpp b/libkram/kram/KramZipStream.cpp
index f914393b..fe6601e7 100644
--- a/libkram/kram/KramZipStream.cpp
+++ b/libkram/kram/KramZipStream.cpp
@@ -1,5 +1,6 @@
 #include "KramZipStream.h"
 
+#include "KramFileHelper.h"
 #include "miniz.h"
 
 namespace kram {
@@ -14,24 +15,37 @@ ZipStream::~ZipStream() {
     close();
 }
 
-bool ZipStream::open() {
-    KVERIFY(_fileHelper.isOpen());
+bool ZipStream::open(FileHelper* fileHelper, bool isUncompressed) {
+    _fileHelper = fileHelper;
+    if (!_fileHelper->isOpen()) {
+        return false;
+    }
+    
+    _isUncompressed = isUncompressed;
+    if (_isUncompressed) {
+        return true;
+    }
+    
+    memset(_stream.get(), 0, sizeof(mz_stream));
     
     // https://www.zlib.net/zlib_how.html
     // https://www.ietf.org/rfc/rfc1952.txt
     
     // can also install custom allocators (allocates 256KB buffer otherwise)
-    _stream->zalloc = NULL;
-    _stream->zfree = NULL;
-    _stream->opaque = NULL;
-    
+//    _stream->zalloc = NULL;
+//    _stream->zfree = NULL;
+//    _stream->opaque = NULL;
+//    
     // Just making this double the default mz_stream buffer.
-    // Should be able to get about 2x compression (is there an estimator?).
+    // Should be able to get about 2x compression (there an estimator in miniz).
     // TODO: what if input is bigger than output buffer?
     // The larger this number, the bigger the stall to compress.
     _compressLimit = 2*256*1024;
     
-    KVERIFY(mz_deflateInit(_stream.get(), MZ_DEFAULT_LEVEL) == MZ_OK);
+    // TODO: control level
+    // https://stackoverflow.com/questions/32225133/how-to-use-miniz-to-create-a-compressed-file-that-can-be-decompressd-by-gzip
+    // turning off zlib footer here with WINDOW_BITS
+    KVERIFY(mz_deflateInit2(_stream.get(), MZ_DEFAULT_LEVEL, MZ_DEFLATED, -MZ_DEFAULT_WINDOW_BITS, 9, MZ_DEFAULT_STRATEGY) == MZ_OK);
     
     // These are all optional fields
     enum GzipFlag : uint8_t {
@@ -63,20 +77,22 @@ bool ZipStream::open() {
         0x08, // (compression method - deflate)
         0x00, // flags
               // The time is in Unix format, i.e., seconds since 00:00:00 GMT, Jan.  1, 1970.
-        0x00, 0x00, 0x00, 0x00,  // TODO: timestamp mtime - start of compression or of src file
+        //0x00, 0x00, 0x00, 0x00,  // TODO: timestamp mtime - start of compression or of src file
+        0xAD, 0x38, 0x4D, 0x5E, // stolen from another file
+        
         kGzipCompressionUnknown, // compression id
-        kGzipPlatformMac         // os platform id
+        kGzipPlatformUnix        // os platform id
     };
     
     // Not writing any of the flagged fields.
     
     // clear the data
-    _sourceCRC32 = 0;
+    _sourceCRC32 = MZ_CRC32_INIT; // is 0
     _sourceSize = 0;
     
-    bool success = _fileHelper.write((const uint8_t*)&header, sizeof(header));
+    bool success = _fileHelper->write((const uint8_t*)&header, sizeof(header));
     if (!success) {
-        KLOGE("ZipStream", "Could not write gzip header to %s", _fileHelper.filename().c_str());
+        KLOGE("ZipStream", "Could not write gzip header to %s", _fileHelper->filename().c_str());
     }
     
     return success;
@@ -87,9 +103,21 @@ bool ZipStream::open() {
 }
 
 void ZipStream::close() {
+    // this means it was already closed
+    if (!_fileHelper) {
+        return;
+    }
+    
+    if (_isUncompressed) {
+        return;
+    }
+    
     // do this to end the stream and cleanup
     KVERIFY(mz_deflateEnd(_stream.get()) == MZ_OK);
     
+    // can also reset and then reuse the stream, instead of end?
+    //mz_deflateReset(_stream.get());
+    
     // data is already all written, so just need the footer
     
     const uint32_t footer[2] = {
@@ -100,13 +128,15 @@ void ZipStream::close() {
     // gzip 8B trailer
     // 4b crc checksum of original data (can use mz_crc32())
     // 4b length of data (mod 0xFFFFFFFF), if bigger than 4gb then can only validate bottom 4B of length.
-    bool success = _fileHelper.write((const uint8_t*)&footer, sizeof(footer));
+    bool success = _fileHelper->write((const uint8_t*)&footer, sizeof(footer));
     if (!success) {
-        KLOGE("ZipStream", "Could not write gzip footer to %s", _fileHelper.filename().c_str());
+        KLOGE("ZipStream", "Could not write gzip footer to %s", _fileHelper->filename().c_str());
     }
+    
+    _fileHelper = nullptr;
 }
 
-Slice ZipStream::write(const Slice& in) {
+Slice ZipStream::compressSlice(const Slice& in, bool finish) {
     // If in.size is huge, then don't resize like this.
     // But stream is assumed to take in smaller buffers
     // and know compressed stream is smaller than input size
@@ -120,22 +150,31 @@ Slice ZipStream::write(const Slice& in) {
     _stream->next_out = _compressed.data();
     
     // Hope don't need to do this in a loop
-    KVERIFY(mz_deflate(_stream.get(), MZ_FULL_FLUSH) == MZ_OK);
+    int status = mz_deflate(_stream.get(), finish ? MZ_FINISH : MZ_SYNC_FLUSH);
+    if (finish)
+        KASSERT(status == MZ_STREAM_END);
+    else
+        KASSERT(status == MZ_OK);
     
     // TODO: would be nice to skip crc32 work
     _sourceSize += in.size();
-    mz_crc32(_sourceCRC32, in.data(), in.size());
+    _sourceCRC32 = mz_crc32(_sourceCRC32, in.data(), in.size());
     
     // return the compressed output
     int numBytesCompressed = _compressed.size() - _stream->avail_out;
     return Slice(_compressed.data(), numBytesCompressed);
 }
 
-void ZipStream::compress(const Slice& uncompressedData) {
-    Slice compressedSlice = write(uncompressedData);
+void ZipStream::compress(const Slice& uncompressedData, bool finish) {
+    if (_isUncompressed) {
+        _fileHelper->write(uncompressedData.data(), uncompressedData.size());
+        return;
+    }
+    
+    Slice compressedSlice = compressSlice(uncompressedData, finish);
     
     // This writes out to a fileHelper
-    _fileHelper.write(compressedSlice.data(), compressedSlice.size());
+    _fileHelper->write(compressedSlice.data(), compressedSlice.size());
 }
 
 
diff --git a/libkram/kram/KramZipStream.h b/libkram/kram/KramZipStream.h
index 9ce57459..d5f2ee73 100644
--- a/libkram/kram/KramZipStream.h
+++ b/libkram/kram/KramZipStream.h
@@ -2,7 +2,6 @@
 
 #include "KramConfig.h"
 
-#include "KramFileHelper.h"
 #include <span>
 
 struct mz_stream;
@@ -10,6 +9,8 @@ struct mz_stream;
 namespace kram {
 using namespace NAMESPACE_STL;
 
+class FileHelper;
+
 // This can be passed a count
 template<typename T>
 using Span = span<T, dynamic_extent>;
@@ -22,7 +23,7 @@ class ICompressedStream {
     virtual ~ICompressedStream() {}
     
     // compress and store the data
-    virtual void compress(const Slice& uncompressedData) = 0;
+    virtual void compress(const Slice& uncompressedData, bool finish) = 0;
     
     // when reached then call compress
     virtual uint32_t compressLimit() const = 0;
@@ -36,11 +37,12 @@ class ZipStream : public ICompressedStream {
     virtual ~ZipStream();
     
     // writes opening header and closing footer
-    bool open();
+    // Can disable compression for testing the src content.
+    bool open(FileHelper* fileHelper, bool isUncompressed = false);
     void close();
     
     // compress and write data to helper
-    virtual void compress(const Slice& uncompressedData) override;
+    virtual void compress(const Slice& uncompressedData, bool finish) override;
     
     // test this for when to call compress
     virtual uint32_t compressLimit() const override {
@@ -48,15 +50,16 @@ class ZipStream : public ICompressedStream {
     }
     
 private:
-    Slice write(const Slice& in);
+    Slice compressSlice(const Slice& in, bool finish);
     
     vector<uint8_t> _compressed;
     unique_ptr<mz_stream> _stream;
-    FileHelper _fileHelper;
+    FileHelper* _fileHelper = nullptr;
     
     uint32_t _sourceCRC32 = 0;
     size_t _sourceSize = 0;
     uint32_t _compressLimit = 0;
+    bool _isUncompressed = false;
 };
 
 
diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index ef2628f2..13d2d181 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -266,7 +266,7 @@ std::thread::native_handle_type getCurrentThread()
 // Of course, Windows has to make portability difficult.
 // And Mac non-standardly, doesn't even pass thread to call.
 //   This requires it to be set from thread itself.
-constexpr const uint32_t kMaxThreadName = 32;
+// Also linux (and Android?) limited to 15chars.
 
 #if KRAM_WIN
 
@@ -274,8 +274,10 @@ constexpr const uint32_t kMaxThreadName = 32;
 // Can just set in manifest file.
 // SetConsoleOutputCP(CP_UTF8);
 
-void setThreadName(std::thread::native_handle_type handle, const char* threadName)
+void setCurrentThreadName(const char* threadName)
 {
+    std::thread::native_handle_type handle = getCurrentThread();
+    
     // TODO: use std::wstring_convert();
     // std::codecvt_utf8_utf16
     
@@ -291,22 +293,12 @@ void setThreadName(std::thread::native_handle_type handle, const char* threadNam
     ::SetThreadDescription(handle, str.c_str());
 }
 
-void setCurrentThreadName(const char* threadName)
-{
-    setThreadName(getCurrentThread(), threadName);
-}
-
-void setThreadName(std::thread& thread, const char* threadName)
-{
-    setThreadName(thread.native_handle(), threadName);
-}
-
-void getCurrentThreadName(char name[kMaxThreadName])
+void getThreadName(std::thread::native_handle_type threadHandle, char name[kMaxThreadName])
 {
     name[0] = 0;
     
     wchar_t* threadNameW = nullptr;
-    HRESULT hr = ::GetThreadDescription(getCurrentThread(), &threadNameW);
+    HRESULT hr = ::GetThreadDescription(threadHandle, &threadNameW);
     if (SUCCEEDED(hr)) {
         // convert name back
         uint32_t len = wcslen(threadNameW);
@@ -321,60 +313,39 @@ void getCurrentThreadName(char name[kMaxThreadName])
     }
 }
 
-#elif KRAM_MAC || KRAM_IOS
+#else
 
-void setThreadName(std::thread::native_handle_type macroUnusedArg(handle), const char* threadName)
+void setCurrentThreadName(const char* threadName)
 {
-    // This can only set on self
+    #if KRAM_MAC || KRAM_IOS
+    // can only set thread from thread on macOS, sucks
     int val = pthread_setname_np(threadName);
+    #else
+    // 15 char name limit on Linux/Android, how modern!
+    int val = pthread_setname_np(getCurrentThread(), threadName);
+    #endif
+    
     if (val != 0)
         KLOGW("Thread", "Could not set thread name");
 }
 
-void setCurrentThreadName(const char* threadName)
-{
-    setThreadName(getCurrentThread(), threadName);
-}
-
-// This doesn't exist on macOS. What a pain.  Doesn't line up with getter calls.
-// Means can't set threadName externally without telling thread to wake and set itself.
-//void setThreadName(std::thread& thread, const char* threadName)
-//{
-//    auto handle = thread.native_handle();
-//    setThreadName(handle, threadName);
-//}
-
-void getCurrentThreadName(char name[kMaxThreadName])
+void getThreadName(std::thread::native_handle_type threadHandle, char name[kMaxThreadName])
 {
-    pthread_getname_np(getCurrentThread(), name, kMaxThreadName);
+    pthread_getname_np(threadHandle, name, kMaxThreadName);
 }
-#else
 
-// 15 char name limit on Linux/Android, how modern!
-void setThreadName(std::thread::native_handle_type handle, const char* threadName)
-{
-    int val = pthread_setname_np(handle, threadName);
-    if (val != 0)
-        KLOGW("Thread", "Could not set thread name");
-}
-
-void setCurrentThreadName(const char* threadName)
-{
-    setThreadName(getCurrentThread(), threadName);
-}
+#endif
 
-void setThreadName(std::thread& thread, const char* threadName)
+void getThreadName(std::thread& thread, char name[kMaxThreadName])
 {
-    setThreadName(thread.native_handle(), threadName);
+    getThreadName(thread.native_handle(), name);
 }
 
 void getCurrentThreadName(char name[kMaxThreadName])
 {
-    pthread_getname_np(getCurrentThread(), name, kMaxThreadName);
+    getThreadName(getCurrentThread(), name);
 }
 
-#endif
-
 //------------------
 
 #if KRAM_MAC || KRAM_IOS
diff --git a/libkram/kram/TaskSystem.h b/libkram/kram/TaskSystem.h
index a4118afd..a74586b3 100644
--- a/libkram/kram/TaskSystem.h
+++ b/libkram/kram/TaskSystem.h
@@ -151,10 +151,23 @@ struct ThreadInfo {
     int affinity = 0; // single core for now
 };
 
+std::thread::native_handle_type getCurrentThread();
+
 // This only works for current thread, but simplifies setting several thread params.
 void setThreadInfo(ThreadInfo& info);
 
-    
+// This is limited to 16 on linux
+// #define TASK_COMM_LEN 16
+constexpr const uint32_t kMaxThreadName = 32;
+
+void setCurrentThreadName(const char* threadName);
+
+void getThreadName(std::thread::native_handle_type threadHandle, char name[kMaxThreadName]);
+
+void getThreadName(std::thread& thread, char name[kMaxThreadName]);
+
+void getCurrentThreadName(char name[kMaxThreadName]);
+
 class task_system {
     NOT_COPYABLE(task_system);
 

From 76c14b6400bbae8c3a3e9a8396d37db0cedb3176 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 7 Sep 2024 10:56:08 -0700
Subject: [PATCH 690/901] kram - small cleanup

---
 libkram/kram/KramFileHelper.cpp | 7 +++++--
 libkram/kram/KramFileHelper.h   | 1 +
 libkram/kram/KramTimer.cpp      | 4 ++++
 libkram/kram/KramTimer.h        | 1 +
 4 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/libkram/kram/KramFileHelper.cpp b/libkram/kram/KramFileHelper.cpp
index ca3608b2..12699fa3 100644
--- a/libkram/kram/KramFileHelper.cpp
+++ b/libkram/kram/KramFileHelper.cpp
@@ -127,11 +127,13 @@ size_t FileHelper::pagesize()
 #if KRAM_MAC || KRAM_IOS || KRAM_LINUX
         pagesize = getpagesize();
 #elif KRAM_WIN
+        // win has mostly 4k, then 1MB/2MB large page size
         SYSTEM_INFO systemInfo;
         GetNativeSystemInfo(&systemInfo);
         pagesize = systemInfo.dwPageSize;
 #else
-        pagesize = 4 * 1024;  // how to determine on Win/Linux?
+        // TODO: Android 15 has variable page size (16K and 4K)
+        pagesize = 4 * 1024;  // how to determine on Win/Android?
 #endif
     }
     return pagesize;
@@ -198,6 +200,8 @@ bool FileHelper::open(const char* filename, const char* access)
 {
     close();
 
+    _filename = filename;
+     
     if (strstr(access, "w") != nullptr) {
         _fp = fopen_mkdir(filename, access);
     }
@@ -209,7 +213,6 @@ bool FileHelper::open(const char* filename, const char* access)
         return false;
     }
 
-    _filename = filename;
     return true;
 }
 
diff --git a/libkram/kram/KramFileHelper.h b/libkram/kram/KramFileHelper.h
index 23d98e1c..cd5cbcee 100644
--- a/libkram/kram/KramFileHelper.h
+++ b/libkram/kram/KramFileHelper.h
@@ -55,6 +55,7 @@ class FileHelper {
 
     static size_t pagesize();
     
+    // Can retreive if open called (even on failure)
     const string& filename() const { return _filename; }
 
 private:
diff --git a/libkram/kram/KramTimer.cpp b/libkram/kram/KramTimer.cpp
index c8f70a72..bd4a68f6 100644
--- a/libkram/kram/KramTimer.cpp
+++ b/libkram/kram/KramTimer.cpp
@@ -74,6 +74,8 @@ double currentTimestamp()
 //-------------------
 
 // TODO: also look into the Perfetto binary format and library/api.
+// This needs some daemon to flush data to.  Unclear if can route
+//  existing api and timings over to calls?
 // https://perfetto.dev/docs/instrumentation/tracing-sdk
 
 // TODO: escape strings, but it's just more work
@@ -117,6 +119,7 @@ bool Perf::start(const char* filename, uint32_t maxStackDepth)
     _filename = filename;
     _maxStackDepth = maxStackDepth;
     
+    /*
     // test the compressor
     bool testZipStream = false;
     if (testZipStream) {
@@ -140,6 +143,7 @@ R"(id,name
             fileHelper.close();
         }
     }
+    */
     
     // write json as binary, so win doesn't replace \n with \r\n
     if (!_fileHelper.openTemporaryFile("perf-", ".perftrace.gz", "w+b")) {
diff --git a/libkram/kram/KramTimer.h b/libkram/kram/KramTimer.h
index 3ad3d54d..cbdfae90 100644
--- a/libkram/kram/KramTimer.h
+++ b/libkram/kram/KramTimer.h
@@ -6,6 +6,7 @@
 
 #include <cassert>
 
+// These are only here for Perf class
 #include "KramFileHelper.h"
 #include "KramZipStream.h"
 

From dd994a2edaa584b5018382429162d5aa3abb4f62 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 8 Sep 2024 15:08:46 -0700
Subject: [PATCH 691/901] kram - add Perf menu item

This is still getting blocked from system open by Gatekeeper if App Sandbox and Hardened Runtime are set.  But those are required for App Store.  So how do apps communicate files from one sandbox to the other?   I didn't check in the app setting changes.
---
 kramv/KramViewerBase.cpp   | 185 ++++++++++++++++++++-----------------
 kramv/KramViewerBase.h     |   6 ++
 kramv/KramViewerMain.mm    |   7 ++
 libkram/kram/KramTimer.cpp | 104 ++++++++++++---------
 libkram/kram/KramTimer.h   |  10 +-
 5 files changed, 184 insertions(+), 128 deletions(-)

diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index b7a66b2e..9773fc5c 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -55,21 +55,21 @@ float4 inverseScaleSquared(const float4x4 &m)
     float3 scaleSquared = float3m(length_squared(m.columns[0].xyz),
                                   length_squared(m.columns[1].xyz),
                                   length_squared(m.columns[2].xyz));
-
+    
     // if uniform, then set scaleSquared all to 1
     if (almost_equal_elements(scaleSquared, 1e-5f)) {
         scaleSquared = float3m(1.0f);
     }
-
+    
     // don't divide by 0
     float3 invScaleSquared =
-        recip(simd::max(float3m(0.0001 * 0.0001), scaleSquared));
-
+    recip(simd::max(float3m(0.0001 * 0.0001), scaleSquared));
+    
     // identify determinant here for flipping orientation
     // all shapes with negative determinant need orientation flipped for
     // backfacing and need to be grouned together if rendering with instancing
     float det = determinant(toFloat3x3(m));
-
+    
     return float4m(invScaleSquared, det);
 }
 
@@ -92,7 +92,7 @@ static void findPossibleNormalMapFromAlbedoFilename(const char* filename, vector
     string filenameShort = filename;
     
     const char* ext = strrchr(filename, '.');
-
+    
     const char* dotPosStr = strrchr(filenameShort.c_str(), '.');
     if (dotPosStr == nullptr)
         return;
@@ -101,7 +101,7 @@ static void findPossibleNormalMapFromAlbedoFilename(const char* filename, vector
     
     // now chop off the extension
     filenameShort = filenameShort.substr(0, dotPos);
-
+    
     const char* searches[] = { "-a", "-d", "_Color", "_baseColor" };
     
     for (uint32_t i = 0; i < ArrayCount(searches); ++i) {
@@ -111,7 +111,7 @@ static void findPossibleNormalMapFromAlbedoFilename(const char* filename, vector
             break;
         }
     }
-     
+    
     const char* suffixes[] = { "-n", "_normal", "_Normal" };
     
     string normalFilename;
@@ -141,8 +141,8 @@ inline const char* toFilenameShort(const char* filename) {
 
 static const vector<const char*> supportedModelExt = {
 #if USE_GLTF
-     ".gltf",
-     ".glb",
+    ".gltf",
+    ".glb",
 #endif
 #if USE_USD
     ".gltf",
@@ -176,19 +176,19 @@ int32_t ShowSettings::totalChunks() const
 {
     int32_t one = 1;
     return std::max(one, faceCount) *
-           std::max(one, arrayCount) *
-           std::max(one, sliceCount);
+    std::max(one, arrayCount) *
+    std::max(one, sliceCount);
 }
 
 File::File(const char* name_, int32_t urlIndex_)
-    : name(name_), urlIndex(urlIndex_), nameShort(toFilenameShort(name_))
+: name(name_), urlIndex(urlIndex_), nameShort(toFilenameShort(name_))
 {
 }
 
 const char *ShowSettings::meshNumberName(uint32_t meshNumber_) const
 {
     const char *text = "";
-
+    
     switch (meshNumber_) {
         case 0:
             text = "Plane";
@@ -208,14 +208,14 @@ const char *ShowSettings::meshNumberName(uint32_t meshNumber_) const
         default:
             break;
     }
-
+    
     return text;
 }
 
 const char *ShowSettings::meshNumberText() const
 {
     const char *text = "";
-
+    
     switch (meshNumber) {
         case 0:
             text = "Shape Plane";
@@ -235,14 +235,14 @@ const char *ShowSettings::meshNumberText() const
         default:
             break;
     }
-
+    
     return text;
 }
 
 const char *ShowSettings::shapeChannelText() const
 {
     const char *text = "";
-
+    
     switch (shapeChannel) {
         case ShapeChannelNone:
             text = "Show Off";
@@ -265,21 +265,21 @@ const char *ShowSettings::shapeChannelText() const
         case ShapeChannelFaceNormal:
             text = "Show Faces";
             break;
-        // case ShapeChannelBumpNormal: text = "Show Bumps"; break;
+            // case ShapeChannelBumpNormal: text = "Show Bumps"; break;
         case ShapeChannelMipLevel:
             text = "Show Mip Levels";
             break;
         default:
             break;
     }
-
+    
     return text;
 }
 
 const char *ShowSettings::debugModeText() const
 {
     const char *text = "";
-
+    
     switch (debugMode) {
         case DebugModeNone:
             text = "Debug Off";
@@ -317,7 +317,7 @@ const char *ShowSettings::debugModeText() const
 const char *ShowSettings::lightingModeText() const
 {
     const char *text = "";
-
+    
     switch (lightingMode) {
         case LightingModeDiffuse:
             text = "Light Diffuse";
@@ -337,7 +337,7 @@ const char *ShowSettings::lightingModeText() const
 bool ShowSettings::isEyedropperFromDrawable()
 {
     return meshNumber > 0 || isPreview || isShowingAllLevelsAndMips ||
-           shapeChannel > 0;
+    shapeChannel > 0;
 }
 
 void ShowSettings::advanceMeshNumber(bool decrement)
@@ -350,7 +350,7 @@ void ShowSettings::advanceMeshNumber(bool decrement)
     else {
         number += 1;
     }
-
+    
     meshNumber = number % numEnums;
 }
 
@@ -364,9 +364,9 @@ void ShowSettings::advanceShapeChannel(bool decrement)
     else {
         mode += 1;
     }
-
+    
     shapeChannel = (ShapeChannel)(mode % numEnums);
-
+    
     // skip this channel for now, in ortho it's mostly pure white
     if (shapeChannel == ShapeChannelDepth) {
         advanceShapeChannel(decrement);
@@ -383,7 +383,7 @@ void ShowSettings::advanceLightingMode(bool decrement)
     else {
         number += 1;
     }
-
+    
     lightingMode = (LightingMode)(number % numEnums);
 }
 
@@ -397,37 +397,37 @@ void ShowSettings::advanceDebugMode(bool decrement)
     else {
         mode += 1;
     }
-
+    
     debugMode = (DebugMode)(mode % numEnums);
-
+    
     MyMTLPixelFormat format = (MyMTLPixelFormat)originalFormat;
     bool isHdr = isHdrFormat(format);
-
+    
     // DONE: work on skipping some of these based on image
     bool isAlpha = isAlphaFormat(format);
     bool isColor = isColorFormat(format);
-
+    
     if (debugMode == DebugModeTransparent && (numChannels <= 3 || !isAlpha)) {
         advanceDebugMode(decrement);
     }
-
+    
     // 2 channel textures don't really have color or grayscale pixels
     if (debugMode == DebugModeColor && (numChannels <= 2 || !isColor)) {
         advanceDebugMode(decrement);
     }
-
+    
     if (debugMode == DebugModeGray && numChannels <= 2) {
         advanceDebugMode(decrement);
     }
-
+    
     if (debugMode == DebugModeHDR && !isHdr) {
         advanceDebugMode(decrement);
     }
-
+    
     // for 3 and for channel textures could skip these with more info about image
     // (hasColor) if (_showSettings->debugMode == DebugModeGray && !hasColor)
     // advanceDebugMode(isShiftKeyDown);
-
+    
     bool isNormal = texContentType == TexContentTypeNormal;
     bool isSDF = texContentType == TexContentTypeSDF;
     
@@ -441,7 +441,7 @@ void ShowSettings::advanceDebugMode(bool decrement)
     if (debugMode == DebugModeCircleXY && !(isNormal)) {
         advanceDebugMode(decrement);
     }
-
+    
     // TODO: have a clipping mode against a variable range too, only show pixels
     // within that range to help isolate problem pixels.  Useful for depth, and
     // have auto-range scaling for it and hdr. make sure to ignore 0 or 1 for
@@ -461,7 +461,7 @@ void ShowSettings::updateUVPreviewState()
                 if (uvPreview > 0.0)
                     uvPreview -= uvPreviewStep;
             }
-
+            
             uvPreview = saturate(uvPreview);
         }
     }
@@ -499,7 +499,7 @@ void printChannels(string &tmp, const string &label, float4 c,
     else {
         // unorm data, 8-bit values displayed
         c *= 255.1f;
-
+        
         switch (numChannels) {
             case 1:
                 sprintf(tmp, "%s%.0f\n", label.c_str(), c.r);
@@ -519,7 +519,7 @@ void printChannels(string &tmp, const string &label, float4 c,
 }
 
 string ShowSettings::windowTitleString(const char* filename) const
-{    
+{
     // set title to filename, chop this to just file+ext, not directory
     const char* filenameShort = strrchr(filename, '/');
     if (filenameShort == nullptr) {
@@ -569,9 +569,9 @@ string ShowSettings::windowTitleString(const char* filename) const
 float4x4 matrix4x4_translation(float tx, float ty, float tz)
 {
     float4x4 m = {(float4){1, 0, 0, 0},
-                  (float4){0, 1, 0, 0},
-                  (float4){0, 0, 1, 0},
-                  (float4){tx, ty, tz, 1}};
+        (float4){0, 1, 0, 0},
+        (float4){0, 0, 1, 0},
+        (float4){tx, ty, tz, 1}};
     return m;
 }
 
@@ -582,7 +582,7 @@ float4x4 matrix4x4_rotation(float radians, vector_float3 axis)
     float st = sinf(radians);
     float ci = 1 - ct;
     float x = axis.x, y = axis.y, z = axis.z;
-
+    
     float4x4 m = {
         (float4){ ct + x * x * ci,     y * x * ci + z * st, z * x * ci - y * st, 0},
         (float4){ x * y * ci - z * st,     ct + y * y * ci, z * y * ci + x * st, 0},
@@ -597,7 +597,7 @@ float4x4 perspective_rhs(float fovyRadians, float aspectXtoY, float nearZ, float
     // form tangents
     float tanY = tanf(fovyRadians * 0.5f);
     float tanX = tanY * aspectXtoY;
-
+    
     // currently symmetric
     // all postive values from center
     float4 tangents = { tanY, tanY, tanX, tanX };
@@ -610,17 +610,17 @@ float4x4 perspective_rhs(float fovyRadians, float aspectXtoY, float nearZ, float
     
     float dx = (r - l);
     float dy = (t - b);
-     
+    
     float xs = 2.0f * nearZ / dx;
     float ys = 2.0f * nearZ / dy;
-
+    
     // 0.5x?
     float xoff = (r + l) / dx;
     float yoff = (t + b) / dy;
-
+    
     float m22;
     float m23;
-
+    
     if (isReverseZ) {
         // zs drops out since zs = inf / -inf = 1, 1-1 = 0
         // z' = near / -z
@@ -630,18 +630,18 @@ float4x4 perspective_rhs(float fovyRadians, float aspectXtoY, float nearZ, float
     }
     else {
         float zs = farZ / (nearZ - farZ);
-
+        
         m22 = zs;
         m23 = zs * nearZ;
     }
-     
+    
     float4x4 m = {
         (float4){ xs,       0,   0,  0 },
         (float4){  0,      ys,   0,  0 },
         (float4){  xoff, yoff, m22, -1 },
         (float4){  0,       0, m23,  0 }
     };
-     
+    
     return m;
 }
 
@@ -718,7 +718,7 @@ bool Data::loadAtlasFile(const char* filename)
     
     padded_string json((const char*)mmap.data(), mmap.dataLength());
     auto atlasProps = parser.iterate(json);
-       
+    
     // can we get at memory use numbers to do the parse?
     KLOGI("kramv", "parsed %.0f KB of json in %.3fms",
           (double)mmap.dataLength() / 1024.0,
@@ -735,7 +735,7 @@ bool Data::loadAtlasFile(const char* filename)
         
         uint64_t width = atlasProps["width"].get_uint64().value_unsafe();
         uint64_t height = atlasProps["height"].get_uint64().value_unsafe();
-    
+        
         uint64_t slice = atlasProps["slice"].get_uint64().value_unsafe();
         
         float uPad = 0.0f;
@@ -775,7 +775,7 @@ bool Data::loadAtlasFile(const char* filename)
                 values.clear();
                 for (auto value : regionProps["ruv"])
                     values.push_back(value.get_double().value_unsafe());
-            
+                
                 // Note: could convert pixel and mip0 size to uv.
                 // normalized uv make these easier to draw across all mips
                 x = values[0];
@@ -788,7 +788,7 @@ bool Data::loadAtlasFile(const char* filename)
                 values.clear();
                 for (auto value : regionProps["rpx"])
                     values.push_back(value.get_double().value_unsafe());
-            
+                
                 x = values[0];
                 y = values[1];
                 w = values[2];
@@ -800,7 +800,7 @@ bool Data::loadAtlasFile(const char* filename)
                 w /= width;
                 h /= height;
             }
-                
+            
             const char* verticalProp = "f"; // regionProps["rot"];
             bool isVertical = verticalProp && verticalProp[0] == 't';
             
@@ -861,7 +861,7 @@ bool Data::loadAtlasFile(const char* filename)
         
         int width = atlasProps["width"].int_value();
         int height = atlasProps["height"].int_value();
-    
+        
         int slice = atlasProps["slice"].int_value();
         
         float uPad = 0.0f;
@@ -902,7 +902,7 @@ bool Data::loadAtlasFile(const char* filename)
                 values.clear();
                 for (auto value : regionProps["ruv"])
                     values.push_back(value.number_value());
-            
+                
                 // Note: could convert pixel and mip0 size to uv.
                 // normalized uv make these easier to draw across all mips
                 x = values[0];
@@ -915,7 +915,7 @@ bool Data::loadAtlasFile(const char* filename)
                 values.clear();
                 for (auto value : regionProps["rpx"])
                     values.push_back(value.number_value());
-            
+                
                 x = values[0];
                 y = values[1];
                 w = values[2];
@@ -927,7 +927,7 @@ bool Data::loadAtlasFile(const char* filename)
                 w /= width;
                 h /= height;
             }
-                
+            
             const char* verticalProp = "f"; // regionProps["rot"];
             bool isVertical = verticalProp && verticalProp[0] == 't';
             
@@ -1016,7 +1016,7 @@ bool Data::hasCounterpart(bool increment) {
     
     const File& file = _files[_fileIndex];
     string currentFilename = filenameNoExtension(file.nameShort.c_str());
-   
+    
     uint32_t nextFileIndex = _fileIndex;
     
     size_t numEntries = _files.size();
@@ -1032,7 +1032,7 @@ bool Data::hasCounterpart(bool increment) {
     
     // if short name matches (no ext) then it's a counterpart
     if (currentFilename != nextFilename)
-       return false;
+        return false;
     
     return true;
 }
@@ -1055,7 +1055,7 @@ bool Data::advanceCounterpart(bool increment) {
         nextFileIndex++;
     else
         nextFileIndex += numEntries - 1;  // back 1
-
+    
     nextFileIndex = nextFileIndex % numEntries;
     
     const File& nextFile = _files[nextFileIndex];
@@ -1079,7 +1079,7 @@ bool Data::advanceFile(bool increment) {
         _fileIndex++;
     else
         _fileIndex += numEntries - 1;  // back 1
-
+    
     _fileIndex = _fileIndex % numEntries;
     
     return _delegate.loadFile(true);
@@ -1134,7 +1134,7 @@ const Atlas* Data::findAtlasAtUV(float2 pt)
 {
     if (_showSettings->atlas.empty()) return nullptr;
     if (_showSettings->imageBoundsX == 0) return nullptr;
-   
+    
     const Atlas* atlas = nullptr;
     
     // Note: rects are in uv
@@ -1174,26 +1174,16 @@ bool Data::isArchive() const
     return isSupportedArchiveFilename(filename.c_str());
 }
 
-
+void Data::setPerfDirectory(const char* directory)
+{
+    Perf* perf = Perf::instance();
+    perf->setPerfDirectory(directory);
+}
 
 bool Data::loadFile()
 {
     if (isArchive()) {
-        // This test perf layer and the ZipStream
-        Perf* perf = nullptr; // Perf::instance();
-        
-        // TODO: have to have permision to write file
-        if (perf) {
-            if (!perf->start("/Users/Alec/Library/Containers/com.hialec.kramv/Data/Traces/"
-                             "load.perftrace.gz"))
-                perf = nullptr;
-        }
-        bool success = loadFileFromArchive();
-        
-        if (perf)
-            perf->stop();
-        
-        return success;
+        return loadFileFromArchive();
     }
     
     // now lookup the filename and data at that entry
@@ -1970,7 +1960,7 @@ void Data::updateUIAfterLoad()
     // only allow srgb to be disabled, not toggle on if off at load
     MyMTLPixelFormat format = _showSettings->originalFormat;
     bool isSrgb = isSrgbFormat(format);
-   _actionSrgb->setHidden(!isSrgb);
+    _actionSrgb->setHidden(!isSrgb);
     
     // also need to call after each toggle
     updateUIControlState();
@@ -2019,7 +2009,8 @@ void Data::updateUIControlState()
     auto diffState = toState(_showSettings->isDiff && _showSettings->hasDiffTexture);
     
     auto srgbState = toState(_showSettings->isSRGBShown);
-    
+    auto perfState = toState(_showSettings->isPerf);
+   
     _actionVertical->setHighlight(verticalState);
     
     // TODO: pass boolean, and change in the call
@@ -2062,6 +2053,7 @@ void Data::updateUIControlState()
     _actionChecker->setHighlight(checkerboardState);
     
     _actionSrgb->setHighlight(srgbState);
+    _actionPerf->setHighlight(perfState);
 }
 
 // TODO: convert to C++ actions, and then call into Base holding all this
@@ -2191,6 +2183,7 @@ void Data::initActions()
         Action("F", "Face", Key::F),
         Action("Y", "Array", Key::Y),
         Action("9", "Srgb", Key::Num9),
+        Action("5", "Perf", Key::Num5), // really a debug action
         
         Action("↑", "Prev Item", Key::UpArrow),
         Action("↓", "Next Item", Key::DownArrow),
@@ -2241,6 +2234,7 @@ void Data::initActions()
         &_actionFace,
         &_actionArray,
         &_actionSrgb,
+        &_actionPerf,
        
         &_actionPrevItem,
         &_actionItem,
@@ -2541,6 +2535,29 @@ bool Data::handleEventAction(const Action* action, bool isShiftKeyDown, ActionSt
         }
         
     }
+    else if (action == _actionPerf) {
+        Perf* perf = Perf::instance();
+        
+        bool isCompressed = false;
+        if ((!_showSettings->isPerf) && perf->start("kramv", isCompressed)) {
+            _showSettings->isPerf = true;
+        }
+        else {
+            _showSettings->isPerf = false;
+            
+            if (perf->isRunning()) {
+                perf->stop();
+                
+                // see if this can open to kram-profile
+                if (!isCompressed)
+                    perf->openPerftrace();
+            }
+        }
+        
+        text = "Perf ";
+        text += _showSettings->isPerf ? "On" : "Off";
+        isChanged = true;
+    }
     else if (action == _actionPlay) {
         if (!action->isHidden) {
             
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index 03e96f6c..006796c7 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -118,6 +118,9 @@ class ShowSettings {
     
     bool isPlayAnimations = false;
     
+    // Can get a dump of perf (mostly loading a decode/transcode perf)
+    bool isPerf = false;
+    
     // transparency checkboard under the image
     bool isCheckerboardShown = false;
 
@@ -510,6 +513,8 @@ struct Data {
 
     void doZoomMath(float newZoom, float2& newPan);
 
+    void setPerfDirectory(const char* directory);
+    
 private:
     bool loadFileFromArchive();
 
@@ -563,6 +568,7 @@ struct Data {
     Action* _actionPremul;
     Action* _actionSigned;
     Action* _actionSrgb;
+    Action* _actionPerf;
     
     Action* _actionDiff;
     Action* _actionDebug;
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index c35ee454..d3c6e8d5 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -602,6 +602,13 @@ - (void)awakeFromNib
     // C++ delegate
     _data._delegate.view = (__bridge void*)self;
     
+    // this is sandbox or root if not sandboxed
+    // This is objC call...
+    // This has to be in a .mm file to call
+    std::string traceDir = [NSHomeDirectory() UTF8String];
+    traceDir += "/Traces/";
+    _data.setPerfDirectory(traceDir.c_str());
+    
     // TODO: see if can only open this
     // KLOGI("Viewer", "AwakeFromNIB");
 }
diff --git a/libkram/kram/KramTimer.cpp b/libkram/kram/KramTimer.cpp
index bd4a68f6..0a343289 100644
--- a/libkram/kram/KramTimer.cpp
+++ b/libkram/kram/KramTimer.cpp
@@ -9,6 +9,8 @@
 #include <windows.h>
 #elif KRAM_MAC || KRAM_IOS
 #include <mach/mach_time.h>
+#elif KRAM_ANDROID
+#include <trace.h>
 #endif
 
 #define nl '\n'
@@ -87,6 +89,11 @@ PerfScope::PerfScope(const char* name_)
 : name(name_), time(currentTimestamp())
 {
     gPerfStackDepth++;
+    
+#if KRAM_ANDROID
+    // TODO: also ATrace_isEnabled()
+    ATrace_beginSection(name, value);
+#endif
 }
 
 
@@ -94,7 +101,10 @@ void PerfScope::close()
 {
     if (time != 0.0) {
         --gPerfStackDepth;
-        
+
+#if KRAM_ANDROID
+        ATrace_endSection();
+#endif
         Perf::instance()->addTimer(name, time, currentTimestamp() - time);
         time = 0.0;
     }
@@ -102,12 +112,33 @@ void PerfScope::close()
 
 void addPerfCounter(const char* name, int64_t value)
 {
+#if KRAM_ANDROID
+    // only int64_t support
+    ATrace_setCounter(name, value);
+#endif
+    
     Perf::instance()->addCounter(name, currentTimestamp(), value);
 }
 
 //---------------
 
-bool Perf::start(const char* filename, uint32_t maxStackDepth)
+Perf::Perf()
+{
+    // TODO: should set alongsize exe by default
+#if KRAM_WIN
+    setPerfDirectory("C:/Traces/");
+#else
+    // sandboxed apps won't reach this, but unsandboxed exe can
+    setPerfDirectory("/Users/Alec/devref/kram/tests/traces/");
+#endif
+}
+
+void Perf::setPerfDirectory(const char* directoryName)
+{
+    _perfDirectory = directoryName;
+}
+
+bool Perf::start(const char* name, bool isCompressed, uint32_t maxStackDepth)
 {
     mylock lock(_mutex);
     
@@ -116,48 +147,25 @@ bool Perf::start(const char* filename, uint32_t maxStackDepth)
         return true;
     }
     
-    _filename = filename;
-    _maxStackDepth = maxStackDepth;
-    
-    /*
-    // test the compressor
-    bool testZipStream = false;
-    if (testZipStream) {
-        string filename2 = filename;
-        filename2 += ".txt";
-        FileHelper fileHelper;
-        ZipStream stream;
-        if (fileHelper.open(filename2.c_str(), "w+b")) {
-            const char* testStr = 
-R"(id,name
-1,TEST_1
-2,TEST_2
-3,TEST_3
-4,TEST_4
-)";
-            
-            if (stream.open(&fileHelper, false)) {
-                stream.compress(Slice((uint8_t*)testStr, strlen(testStr)), true);
-                stream.close();
-            }
-            fileHelper.close();
-        }
-    }
-    */
+    const char* ext = isCompressed ? ".perftrace.gz" : ".perftrace";
+    sprintf(_filename, "%s%s%s", _perfDirectory.c_str(), name, ext);
     
+    _maxStackDepth = maxStackDepth;
+   
     // write json as binary, so win doesn't replace \n with \r\n
-    if (!_fileHelper.openTemporaryFile("perf-", ".perftrace.gz", "w+b")) {
+    if (!_fileHelper.openTemporaryFile("perf-", ext, "w+b")) {
         KLOGW("Perf", "Could not open oerf temp file");
         return false;
     }
     
-    bool isUncompressed = false; // can test, extension still .gz though
-    if (!_stream.open(&_fileHelper, isUncompressed)) {
+    if (!_stream.open(&_fileHelper, !isCompressed)) {
         _fileHelper.close();
         return false;
     }
     
-    // TODO: store _startTime in json starting params, also the
+    // Perf is considered running after this, since _startTime is non-zero
+    
+    // TODO: store _startTime in json starting params
     _startTime = currentTimestamp();
     
     _threadIdToTidMap.clear();
@@ -165,14 +173,16 @@ R"(id,name
    
     string buf;
     
-    // displya timeUnit must be ns (nanos) or ms (micros), default is ms
+    // displayTimeUnit must be ns (nanos) or ms (micros), default is ms
     // "displayTimeUnit": "ns"
+    // want ms since it's less data if nanos truncated
     sprintf(buf, R"({"traceEvents":[%c)", nl);
     write(buf);
     
     // can store file info here, only using one pid
     uint32_t processId = 0;
-    const char* processName = "kram"; // TODO: add platform, config, use filename instead?
+    const char* processName = "kram"; // TODO: add platform + config + app?
+    
     sprintf(buf, R"({"name":"process_name","ph":"M","pid":%u,"args":{"name":"%s"}},%c)",
            processId, processName, nl);
     write(buf);
@@ -204,17 +214,25 @@ void Perf::stop()
     
     _fileHelper.close();
     
-    // TODO: now open the file in kram-profile by opening it
+    _startTime = 0.0;
+}
+
+void Perf::openPerftrace()
+{
+    mylock lock(_mutex);
+    
+    // DONE: now open the file in kram-profile by opening it
     // okay to use system, but it uses a global mutex on macOS
-    // This will be a .gz file, so not sure that kram-profile can respond
-    //
-    // sprintf(buf, "open %s", _filename.c_str());
-    // system(buf.c_str());
+    // Unclear if macOS can send compressed perftrace.gz file without failing
+    // but uncompressed perftrace file might be openable.
+    // Also sandbox and hardened runtime may interfere.
     
-    _startTime = 0.0;
+    string buf;
+    sprintf(buf, "open %s", _filename.c_str());
+    system(buf.c_str());
 }
 
-void Perf::write(const string& str, bool forceFlush) 
+void Perf::write(const string& str, bool forceFlush)
 {
     mylock lock(_mutex);
     
diff --git a/libkram/kram/KramTimer.h b/libkram/kram/KramTimer.h
index cbdfae90..0f46daff 100644
--- a/libkram/kram/KramTimer.h
+++ b/libkram/kram/KramTimer.h
@@ -88,14 +88,21 @@ class TimerScope {
 // This implements PERF macros, sending timing data to kram-profile, perfetto, and/or Tracy.
 class Perf {
 public:
+    Perf();
+    
+    void setPerfDirectory(const char* directoryName);
+
     bool isRunning() const { return _startTime != 0.0; }
     
-    bool start(const char* filename, uint32_t maxStackDepth = 0);
+    bool start(const char* filename, bool isCompressed = true, uint32_t maxStackDepth = 0);
     void stop();
     
     void addTimer(const char* name, double time, double elapsed);
     void addCounter(const char* name, double time, int64_t value);
     
+    // This may fail on sandboxed app
+    void openPerftrace();
+    
     // singleton getter, but really want to split Perf from macros.
     static Perf* instance() { return _instance; }
     
@@ -110,6 +117,7 @@ class Perf {
     FileHelper _fileHelper;
     double _startTime = 0.0;
     string _filename;
+    string _perfDirectory;
     
     using mymutex = recursive_mutex;
     using mylock = unique_lock<mymutex>;

From 5f3885b9bae416a0362121be612d68fe80081733 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 8 Sep 2024 18:02:15 -0700
Subject: [PATCH 692/901] kram - fix iOS build

---
 kramv/KramViewerBase.cpp        |  6 ++++--
 kramv/KramViewerMain.mm         | 28 ++++++++++++++++++++++++++++
 libkram/kram/KramFileHelper.cpp |  2 +-
 libkram/kram/KramTimer.cpp      |  4 ++++
 4 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index 9773fc5c..1bb893e4 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -2548,9 +2548,11 @@ bool Data::handleEventAction(const Action* action, bool isShiftKeyDown, ActionSt
             if (perf->isRunning()) {
                 perf->stop();
                 
+                // Calling this (using system call) probably
+                // violates app store, hardened runtime, and sandbox
                 // see if this can open to kram-profile
-                if (!isCompressed)
-                    perf->openPerftrace();
+                //if (!isCompressed)
+                //    perf->openPerftrace();
             }
         }
         
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index d3c6e8d5..f62ebac1 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -418,6 +418,34 @@ - (void)application:(NSApplication *)sender
     [view fixupDocumentList];
 }
 
+/* may need to add code for NSSavePanel for perftrace (.gz)
+- (void)exportDocument:(NSString*)name toType:(NSString*)typeUTI
+{
+   NSWindow* window = [[[self windowControllers] objectAtIndex:0] window];
+ 
+   // Build a new name for the file using the current name and
+   // the filename extension associated with the specified UTI.
+   CFStringRef newExtension = UTTypeCopyPreferredTagWithClass((CFStringRef)typeUTI,
+                                   kUTTagClassFilenameExtension);
+   NSString* newName = [[name stringByDeletingPathExtension]
+                       stringByAppendingPathExtension:(NSString*)newExtension];
+   CFRelease(newExtension);
+ 
+   // Set the default name for the file and show the panel.
+   NSSavePanel*    panel = [NSSavePanel savePanel];
+   [panel setNameFieldStringValue:newName];
+   [panel beginSheetModalForWindow:window completionHandler:^(NSInteger result){
+        if (result == NSFileHandlingPanelOKButton)
+        {
+            NSURL*  theFile = [panel URL];
+ 
+            // Write the contents in the new format.
+            
+        }
+    }];
+}
+*/
+
 // this isn't filtered by the document types specified, NSDocumentController?
 // added public.folder instead, this would need to call readFromURL
 - (IBAction)openDocument:(id)sender
diff --git a/libkram/kram/KramFileHelper.cpp b/libkram/kram/KramFileHelper.cpp
index 12699fa3..4807d81e 100644
--- a/libkram/kram/KramFileHelper.cpp
+++ b/libkram/kram/KramFileHelper.cpp
@@ -222,7 +222,7 @@ void FileHelper::close()
         return;
     }
 
-    // temp files are auto-deleted on fclose, since they've been "keep" is 0
+    // temp files are auto-deleted on fclose when "keep" is 0
     fclose(_fp);
 
     _isTmpFile = false;
diff --git a/libkram/kram/KramTimer.cpp b/libkram/kram/KramTimer.cpp
index 0a343289..54c2f9f7 100644
--- a/libkram/kram/KramTimer.cpp
+++ b/libkram/kram/KramTimer.cpp
@@ -219,6 +219,9 @@ void Perf::stop()
 
 void Perf::openPerftrace()
 {
+    // system call isn't available on iOS
+    // also macOS sandbox prevents open call (could write and then open script).
+#if KRAM_MAC
     mylock lock(_mutex);
     
     // DONE: now open the file in kram-profile by opening it
@@ -230,6 +233,7 @@ void Perf::openPerftrace()
     string buf;
     sprintf(buf, "open %s", _filename.c_str());
     system(buf.c_str());
+#endif
 }
 
 void Perf::write(const string& str, bool forceFlush)

From 4e9d057bcaf7e88ab31e908157722e6dfc3c271d Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 8 Sep 2024 21:57:34 -0700
Subject: [PATCH 693/901] kram - turn off App Sandbox in entitlements

This basically sets com.apple.quarantine on any files written or read by an app.  This is problematic, since trying to write profile from kramv, open that, and have it show up in the profiler.  Will revisit and assign runtime hardening, sandbox, and other items to only the app store build.  It's confusing since Xcode also has an "App Sandbox" setting that is set to No.
---
 .../kram-profile/kram_profile.entitlements    |  2 +-
 kramv/KramViewerBase.cpp                      |  8 +++----
 kramv/KramViewerMain.mm                       |  6 ++---
 kramv/kramv.entitlements                      |  2 +-
 libkram/kram/KramTimer.cpp                    | 24 ++++++++++++++-----
 libkram/kram/KramZipStream.cpp                |  1 +
 libkram/zstd/zstd.cpp                         | 19 +++++++++------
 libkram/zstd/zstddeclib.cpp                   | 12 +++++++---
 8 files changed, 48 insertions(+), 26 deletions(-)

diff --git a/kram-profile/kram-profile/kram_profile.entitlements b/kram-profile/kram-profile/kram_profile.entitlements
index 625af03d..e00d8415 100644
--- a/kram-profile/kram-profile/kram_profile.entitlements
+++ b/kram-profile/kram-profile/kram_profile.entitlements
@@ -3,7 +3,7 @@
 <plist version="1.0">
 <dict>
 	<key>com.apple.security.app-sandbox</key>
-	<true/>
+	<false/>
 	<key>com.apple.security.files.user-selected.read-only</key>
 	<true/>
 	<key>com.apple.security.network.client</key>
diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index 1bb893e4..819b3fbe 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -2548,11 +2548,9 @@ bool Data::handleEventAction(const Action* action, bool isShiftKeyDown, ActionSt
             if (perf->isRunning()) {
                 perf->stop();
                 
-                // Calling this (using system call) probably
-                // violates app store, hardened runtime, and sandbox
-                // see if this can open to kram-profile
-                //if (!isCompressed)
-                //    perf->openPerftrace();
+                // TODO: Only do this in non-sandboxed builds, it calls system("open file")
+                if (!isCompressed)
+                    perf->openPerftrace();
             }
         }
         
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index f62ebac1..f27ea5ad 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -633,9 +633,9 @@ - (void)awakeFromNib
     // this is sandbox or root if not sandboxed
     // This is objC call...
     // This has to be in a .mm file to call
-    std::string traceDir = [NSHomeDirectory() UTF8String];
-    traceDir += "/Traces/";
-    _data.setPerfDirectory(traceDir.c_str());
+    //std::string traceDir = [NSHomeDirectory() UTF8String];
+    //traceDir += "/Traces/";
+    //_data.setPerfDirectory(traceDir.c_str());
     
     // TODO: see if can only open this
     // KLOGI("Viewer", "AwakeFromNIB");
diff --git a/kramv/kramv.entitlements b/kramv/kramv.entitlements
index 18aff0ce..311b32bd 100644
--- a/kramv/kramv.entitlements
+++ b/kramv/kramv.entitlements
@@ -3,7 +3,7 @@
 <plist version="1.0">
 <dict>
 	<key>com.apple.security.app-sandbox</key>
-	<true/>
+	<false/>
 	<key>com.apple.security.files.user-selected.read-only</key>
 	<true/>
 </dict>
diff --git a/libkram/kram/KramTimer.cpp b/libkram/kram/KramTimer.cpp
index 54c2f9f7..8e43a71d 100644
--- a/libkram/kram/KramTimer.cpp
+++ b/libkram/kram/KramTimer.cpp
@@ -138,6 +138,8 @@ void Perf::setPerfDirectory(const char* directoryName)
     _perfDirectory = directoryName;
 }
 
+static bool useTempFile = false;
+
 bool Perf::start(const char* name, bool isCompressed, uint32_t maxStackDepth)
 {
     mylock lock(_mutex);
@@ -153,9 +155,17 @@ bool Perf::start(const char* name, bool isCompressed, uint32_t maxStackDepth)
     _maxStackDepth = maxStackDepth;
    
     // write json as binary, so win doesn't replace \n with \r\n
-    if (!_fileHelper.openTemporaryFile("perf-", ext, "w+b")) {
-        KLOGW("Perf", "Could not open oerf temp file");
-        return false;
+    if (useTempFile) {
+        if (!_fileHelper.openTemporaryFile("perf-", ext, "w+b")) {
+            KLOGW("Perf", "Could not open perf temp file");
+            return false;
+        }
+    }
+    else {
+        if (!_fileHelper.open(_filename.c_str(), "w+b")) {
+            KLOGW("Perf", "Could not open perf file %s", _filename.c_str());
+            return false;
+        }
     }
     
     if (!_stream.open(&_fileHelper, !isCompressed)) {
@@ -207,9 +217,11 @@ void Perf::stop()
     
     _stream.close();
     
-    bool success = _fileHelper.copyTemporaryFileTo(_filename.c_str());
-    if (!success) {
-        KLOGW("Perf", "Couldn't move temp file");
+    if (useTempFile) {
+        bool success = _fileHelper.copyTemporaryFileTo(_filename.c_str());
+        if (!success) {
+            KLOGW("Perf", "Couldn't move temp file");
+        }
     }
     
     _fileHelper.close();
diff --git a/libkram/kram/KramZipStream.cpp b/libkram/kram/KramZipStream.cpp
index fe6601e7..3f033163 100644
--- a/libkram/kram/KramZipStream.cpp
+++ b/libkram/kram/KramZipStream.cpp
@@ -155,6 +155,7 @@ Slice ZipStream::compressSlice(const Slice& in, bool finish) {
         KASSERT(status == MZ_STREAM_END);
     else
         KASSERT(status == MZ_OK);
+    (void)status;
     
     // TODO: would be nice to skip crc32 work
     _sourceSize += in.size();
diff --git a/libkram/zstd/zstd.cpp b/libkram/zstd/zstd.cpp
index d84df3af..35fe8903 100644
--- a/libkram/zstd/zstd.cpp
+++ b/libkram/zstd/zstd.cpp
@@ -44,6 +44,12 @@
 #endif
 #define ZSTD_TRACE 0
 
+#if NDEBUG
+#define assert_or_fallthrough() [[fallthrough]]
+#else
+#define assert_or_fallthrough() assert(false)
+#endif
+
 /* Include zstd_deps.h first with all the options we need enabled. */
 #define ZSTD_DEPS_NEED_MALLOC
 #define ZSTD_DEPS_NEED_MATH64
@@ -18551,7 +18557,6 @@ static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx,
     return (size_t)(op-ostart);
 }
 
-
 static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
                                     const ZSTD_CCtx_params* params, U64 pledgedSrcSize, U32 dictID)
 {   BYTE* const op = (BYTE*)dst;
@@ -18579,7 +18584,7 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
     if (!singleSegment) op[pos++] = windowLogByte;
     switch(dictIDSizeCode)
     {
-        default:  assert(0); /* impossible */ [[fallthrough]];
+        default:  assert_or_fallthrough(); /* impossible */
         case 0 : break;
         case 1 : op[pos] = (BYTE)(dictID); pos++; break;
         case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break;
@@ -18587,7 +18592,7 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
     }
     switch(fcsCode)
     {
-        default:  assert(0); /* impossible */ [[fallthrough]];
+        default:  assert_or_fallthrough(); /* impossible */
         case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break;
         case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break;
         case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break;
@@ -23099,7 +23104,7 @@ ZSTD_VecMask_rotateRight(ZSTD_VecMask mask, U32 const rotation, U32 const totalB
     return mask;
   switch (totalBits) {
     default:
-          assert(0); [[fallthrough]];
+          assert_or_fallthrough();
     case 16:
       return (mask >> rotation) | (U16)(mask << (16 - rotation));
     case 32:
@@ -31936,7 +31941,7 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s
         }
         switch(dictIDSizeCode)
         {
-            default: assert(0);  /* impossible */ [[fallthrough]];
+            default: assert_or_fallthrough();  /* impossible */
             case 0 : break;
             case 1 : dictID = ip[pos]; pos++; break;
             case 2 : dictID = MEM_readLE16(ip+pos); pos+=2; break;
@@ -31944,7 +31949,7 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s
         }
         switch(fcsID)
         {
-            default: assert(0);  /* impossible */ [[fallthrough]];
+            default: assert_or_fallthrough();  /* impossible */
             case 0 : if (singleSegment) frameContentSize = ip[pos]; break;
             case 1 : frameContentSize = MEM_readLE16(ip+pos)+256; break;
             case 2 : frameContentSize = MEM_readLE32(ip+pos); break;
@@ -32542,7 +32547,7 @@ ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) {
     switch(dctx->stage)
     {
     default:   /* should not happen */
-            assert(0); [[fallthrough]];
+            assert_or_fallthrough();
     case ZSTDds_getFrameHeaderSize:
     case ZSTDds_decodeFrameHeader:
         return ZSTDnit_frameHeader;
diff --git a/libkram/zstd/zstddeclib.cpp b/libkram/zstd/zstddeclib.cpp
index 62d4b7d6..5068d2a9 100644
--- a/libkram/zstd/zstddeclib.cpp
+++ b/libkram/zstd/zstddeclib.cpp
@@ -40,6 +40,12 @@
 #define ZSTD_STRIP_ERROR_STRINGS
 #define ZSTD_TRACE 0
 
+#if NDEBUG
+#define assert_or_fallthrough() [[fallthrough]]
+#else
+#define assert_or_fallthrough() assert(false)
+#endif
+
 /* Include zstd_deps.h first with all the options we need enabled. */
 #define ZSTD_DEPS_NEED_MALLOC
 /**** start inlining common/zstd_deps.h ****/
@@ -11891,7 +11897,7 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s
         }
         switch(dictIDSizeCode)
         {
-            default: assert(0);  /* impossible */ [[fallthrough]];
+            default: assert_or_fallthrough();  /* impossible */
             case 0 : break;
             case 1 : dictID = ip[pos]; pos++; break;
             case 2 : dictID = MEM_readLE16(ip+pos); pos+=2; break;
@@ -11899,7 +11905,7 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s
         }
         switch(fcsID)
         {
-            default: assert(0);  /* impossible */ [[fallthrough]];
+            default: assert_or_fallthrough();  /* impossible */
             case 0 : if (singleSegment) frameContentSize = ip[pos]; break;
             case 1 : frameContentSize = MEM_readLE16(ip+pos)+256; break;
             case 2 : frameContentSize = MEM_readLE32(ip+pos); break;
@@ -12497,7 +12503,7 @@ ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) {
     switch(dctx->stage)
     {
     default:   /* should not happen */
-            assert(0); [[fallthrough]];
+            assert_or_fallthrough();
     case ZSTDds_getFrameHeaderSize:
     case ZSTDds_decodeFrameHeader:
         return ZSTDnit_frameHeader;

From b3d7cce927977ee259ebf553027fa662eb425834 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 8 Sep 2024 22:23:31 -0700
Subject: [PATCH 694/901] kramv - speedup normal loading

---
 kramv/KramViewerBase.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index 819b3fbe..68333ef2 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -1437,7 +1437,7 @@ bool Data::loadFileFromArchive()
             if (hasNormal) {
                 normalFilename = name;
                 
-                bool isNormalUncompressed = normalEntry->compressedSize == entry->uncompressedSize;
+                bool isNormalUncompressed = normalEntry->compressedSize == normalEntry->uncompressedSize;
                 
                 if (isNormalUncompressed) {
                     KPERFT("ZipExtractRawNormal");
@@ -1512,7 +1512,6 @@ bool Data::loadFileFromArchive()
    
     //---------------------------------
     
-   // NSArray<NSURL*>* urls_ = (NSArray<NSURL*>*)_delegate._urls;
     string archiveURL = _urls[file.urlIndex];
     _archiveName = toFilenameShort(archiveURL.c_str());
     
@@ -2538,7 +2537,7 @@ bool Data::handleEventAction(const Action* action, bool isShiftKeyDown, ActionSt
     else if (action == _actionPerf) {
         Perf* perf = Perf::instance();
         
-        bool isCompressed = false;
+        bool isCompressed = true;
         if ((!_showSettings->isPerf) && perf->start("kramv", isCompressed)) {
             _showSettings->isPerf = true;
         }

From 2fde0e0848c9b9df958909cb3ac0edb927be30a5 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 8 Sep 2024 23:01:50 -0700
Subject: [PATCH 695/901] kram - update sse2_neon.h and flag if MacCatalyst is
 attempted, compress perf

---
 kramv/KramViewerBase.cpp |     7 +-
 libkram/kram/sse2neon.h  | 12252 ++++++++++++++++++++++++-------------
 2 files changed, 7868 insertions(+), 4391 deletions(-)

diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index 68333ef2..449bbb93 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -2547,9 +2547,10 @@ bool Data::handleEventAction(const Action* action, bool isShiftKeyDown, ActionSt
             if (perf->isRunning()) {
                 perf->stop();
                 
-                // TODO: Only do this in non-sandboxed builds, it calls system("open file")
-                if (!isCompressed)
-                    perf->openPerftrace();
+                // TODO: Only open in non-sandboxed builds, it calls system("open file")
+                // and this will have quarantine flag set if app not in app store
+                // or notarized, signed, sandboxed for distribution outside of app store
+                perf->openPerftrace();
             }
         }
         
diff --git a/libkram/kram/sse2neon.h b/libkram/kram/sse2neon.h
index 9ce4712a..8222bc85 100644
--- a/libkram/kram/sse2neon.h
+++ b/libkram/kram/sse2neon.h
@@ -1,32 +1,16 @@
 #ifndef SSE2NEON_H
 #define SSE2NEON_H
 
-// This header file provides a simple API translation layer
-// between SSE intrinsics to their corresponding Arm/Aarch64 NEON versions
-//
-// This header file does not yet translate all of the SSE intrinsics.
-//
-// Contributors to this work are:
-//   John W. Ratcliff <jratcliffscarab@gmail.com>
-//   Brandon Rowlett <browlett@nvidia.com>
-//   Ken Fast <kfast@gdeb.com>
-//   Eric van Beurden <evanbeurden@nvidia.com>
-//   Alexander Potylitsin <apotylitsin@nvidia.com>
-//   Hasindu Gamaarachchi <hasindu2008@gmail.com>
-//   Jim Huang <jserv@biilabs.io>
-//   Mark Cheng <marktwtn@biilabs.io>
-//   Malcolm James MacLeod <malcolm@gulden.com>
-//   Devin Hussey (easyaspi314) <husseydevin@gmail.com>
-//   Sebastian Pop <spop@amazon.com>
-//   Developer Ecosystem Engineering <DeveloperEcosystemEngineering@apple.com>
-//   Danila Kutenin <danilak@google.com>
-//   François Turban (JishinMaster) <francois.turban@gmail.com>
-//   Pei-Hsuan Hung <afcidk@gmail.com>
-//   Yang-Hao Yuan <yanghau@biilabs.io>
+#include <TargetConditionals.h>
+#if TARGET_OS_MACCATALYST
+#warning - this code won't compile for iOS MacCatalyst, switch target.
+#endif
 
 /*
  * sse2neon is freely redistributable under the MIT License.
  *
+ * Copyright (c) 2015-2024 SSE2NEON Contributors.
+ *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
  * in the Software without restriction, including without limitation the rights
@@ -46,34 +30,217 @@
  * SOFTWARE.
  */
 
+// This header file provides a simple API translation layer
+// between SSE intrinsics to their corresponding Arm/Aarch64 NEON versions
+//
+// Contributors to this work are:
+//   John W. Ratcliff <jratcliffscarab@gmail.com>
+//   Brandon Rowlett <browlett@nvidia.com>
+//   Ken Fast <kfast@gdeb.com>
+//   Eric van Beurden <evanbeurden@nvidia.com>
+//   Alexander Potylitsin <apotylitsin@nvidia.com>
+//   Hasindu Gamaarachchi <hasindu2008@gmail.com>
+//   Jim Huang <jserv@ccns.ncku.edu.tw>
+//   Mark Cheng <marktwtn@gmail.com>
+//   Malcolm James MacLeod <malcolm@gulden.com>
+//   Devin Hussey (easyaspi314) <husseydevin@gmail.com>
+//   Sebastian Pop <spop@amazon.com>
+//   Developer Ecosystem Engineering <DeveloperEcosystemEngineering@apple.com>
+//   Danila Kutenin <danilak@google.com>
+//   François Turban (JishinMaster) <francois.turban@gmail.com>
+//   Pei-Hsuan Hung <afcidk@gmail.com>
+//   Yang-Hao Yuan <yuanyanghau@gmail.com>
+//   Syoyo Fujita <syoyo@lighttransport.com>
+//   Brecht Van Lommel <brecht@blender.org>
+//   Jonathan Hue <jhue@adobe.com>
+//   Cuda Chen <clh960524@gmail.com>
+//   Aymen Qader <aymen.qader@arm.com>
+//   Anthony Roberts <anthony.roberts@linaro.org>
+
 /* Tunable configurations */
 
-/* Enable precise implementation of _mm_min_ps and _mm_max_ps
+/* Enable precise implementation of math operations
  * This would slow down the computation a bit, but gives consistent result with
- * x86 SSE2. (e.g. would solve a hole or NaN pixel in the rendering result)
+ * x86 SSE. (e.g. would solve a hole or NaN pixel in the rendering result)
  */
+/* _mm_min|max_ps|ss|pd|sd */
 #ifndef SSE2NEON_PRECISE_MINMAX
 #define SSE2NEON_PRECISE_MINMAX (0)
 #endif
+/* _mm_rcp_ps */
+#ifndef SSE2NEON_PRECISE_DIV
+#define SSE2NEON_PRECISE_DIV (0)
+#endif
+/* _mm_sqrt_ps and _mm_rsqrt_ps */
+#ifndef SSE2NEON_PRECISE_SQRT
+#define SSE2NEON_PRECISE_SQRT (0)
+#endif
+/* _mm_dp_pd */
+#ifndef SSE2NEON_PRECISE_DP
+#define SSE2NEON_PRECISE_DP (0)
+#endif
+
+/* Enable inclusion of windows.h on MSVC platforms
+ * This makes _mm_clflush functional on windows, as there is no builtin.
+ */
+#ifndef SSE2NEON_INCLUDE_WINDOWS_H
+#define SSE2NEON_INCLUDE_WINDOWS_H (0)
+#endif
 
+/* compiler specific definitions */
 #if defined(__GNUC__) || defined(__clang__)
 #pragma push_macro("FORCE_INLINE")
 #pragma push_macro("ALIGN_STRUCT")
 #define FORCE_INLINE static inline __attribute__((always_inline))
 #define ALIGN_STRUCT(x) __attribute__((aligned(x)))
-#else
-#error "Macro name collisions may happen with unsupported compiler."
-#ifdef FORCE_INLINE
-#undef FORCE_INLINE
+#define _sse2neon_likely(x) __builtin_expect(!!(x), 1)
+#define _sse2neon_unlikely(x) __builtin_expect(!!(x), 0)
+#elif defined(_MSC_VER)
+#if _MSVC_TRADITIONAL
+#error Using the traditional MSVC preprocessor is not supported! Use /Zc:preprocessor instead.
 #endif
+#ifndef FORCE_INLINE
 #define FORCE_INLINE static inline
+#endif
 #ifndef ALIGN_STRUCT
 #define ALIGN_STRUCT(x) __declspec(align(x))
 #endif
+#define _sse2neon_likely(x) (x)
+#define _sse2neon_unlikely(x) (x)
+#else
+#pragma message("Macro name collisions may happen with unsupported compilers.")
+#endif
+
+
+#if defined(__GNUC__) && !defined(__clang__)
+#pragma push_macro("FORCE_INLINE_OPTNONE")
+#define FORCE_INLINE_OPTNONE static inline __attribute__((optimize("O0")))
+#elif defined(__clang__)
+#pragma push_macro("FORCE_INLINE_OPTNONE")
+#define FORCE_INLINE_OPTNONE static inline __attribute__((optnone))
+#else
+#define FORCE_INLINE_OPTNONE FORCE_INLINE
+#endif
+
+#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 10
+#warning "GCC versions earlier than 10 are not supported."
+#endif
+
+/* C language does not allow initializing a variable with a function call. */
+#ifdef __cplusplus
+#define _sse2neon_const static const
+#else
+#define _sse2neon_const const
 #endif
 
 #include <stdint.h>
 #include <stdlib.h>
+#include <string.h>
+
+FORCE_INLINE double sse2neon_recast_u64_f64(uint64_t u64)
+{
+    double f64;
+    memcpy(&f64, &u64, sizeof(uint64_t));
+    return f64;
+}
+FORCE_INLINE int64_t sse2neon_recast_f64_s64(double f64)
+{
+    int64_t i64;
+    memcpy(&i64, &f64, sizeof(uint64_t));
+    return i64;
+}
+
+#if defined(_WIN32)
+/* Definitions for _mm_{malloc,free} are provided by <malloc.h>
+ * from both MinGW-w64 and MSVC.
+ */
+#define SSE2NEON_ALLOC_DEFINED
+#endif
+
+/* If using MSVC */
+#ifdef _MSC_VER
+#include <intrin.h>
+#if SSE2NEON_INCLUDE_WINDOWS_H
+#include <processthreadsapi.h>
+#include <windows.h>
+#endif
+
+#if !defined(__cplusplus)
+#error SSE2NEON only supports C++ compilation with this compiler
+#endif
+
+#ifdef SSE2NEON_ALLOC_DEFINED
+#include <malloc.h>
+#endif
+
+#if (defined(_M_AMD64) || defined(__x86_64__)) || \
+    (defined(_M_ARM64) || defined(__arm64__))
+#define SSE2NEON_HAS_BITSCAN64
+#endif
+#endif
+
+#if defined(__GNUC__) || defined(__clang__)
+#define _sse2neon_define0(type, s, body) \
+    __extension__({                      \
+        type _a = (s);                   \
+        body                             \
+    })
+#define _sse2neon_define1(type, s, body) \
+    __extension__({                      \
+        type _a = (s);                   \
+        body                             \
+    })
+#define _sse2neon_define2(type, a, b, body) \
+    __extension__({                         \
+        type _a = (a), _b = (b);            \
+        body                                \
+    })
+#define _sse2neon_return(ret) (ret)
+#else
+#define _sse2neon_define0(type, a, body) [=](type _a) { body }(a)
+#define _sse2neon_define1(type, a, body) [](type _a) { body }(a)
+#define _sse2neon_define2(type, a, b, body) \
+    [](type _a, type _b) { body }((a), (b))
+#define _sse2neon_return(ret) return ret
+#endif
+
+#define _sse2neon_init(...) \
+    {                       \
+        __VA_ARGS__         \
+    }
+
+/* Compiler barrier */
+#if defined(_MSC_VER) && !defined(__clang__)
+#define SSE2NEON_BARRIER() _ReadWriteBarrier()
+#else
+#define SSE2NEON_BARRIER()                     \
+    do {                                       \
+        __asm__ __volatile__("" ::: "memory"); \
+        (void) 0;                              \
+    } while (0)
+#endif
+
+/* Memory barriers
+ * __atomic_thread_fence does not include a compiler barrier; instead,
+ * the barrier is part of __atomic_load/__atomic_store's "volatile-like"
+ * semantics.
+ */
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)
+#include <stdatomic.h>
+#endif
+
+FORCE_INLINE void _sse2neon_smp_mb(void)
+{
+    SSE2NEON_BARRIER();
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \
+    !defined(__STDC_NO_ATOMICS__)
+    atomic_thread_fence(memory_order_seq_cst);
+#elif defined(__GNUC__) || defined(__clang__)
+    __atomic_thread_fence(__ATOMIC_SEQ_CST);
+#else /* MSVC */
+    __dmb(_ARM64_BARRIER_ISH);
+#endif
+}
 
 /* Architecture-specific build options */
 /* FIXME: #pragma GCC push_options is only available on GCC */
@@ -86,32 +253,81 @@
 #if !defined(__ARM_NEON) || !defined(__ARM_NEON__)
 #error "You must enable NEON instructions (e.g. -mfpu=neon) to use SSE2NEON."
 #endif
-//#pragma GCC push_options
-//#pragma GCC target("fpu=neon")
-#elif defined(__aarch64__)
-//#pragma GCC push_options
-//#pragma GCC target("+simd")
+#if !defined(__clang__)
+#pragma GCC push_options
+#pragma GCC target("fpu=neon")
+#endif
+#elif defined(__aarch64__) || defined(_M_ARM64)
+#if !defined(__clang__) && !defined(_MSC_VER)
+#pragma GCC push_options
+#pragma GCC target("+simd")
+#endif
+#elif __ARM_ARCH == 8
+#if !defined(__ARM_NEON) || !defined(__ARM_NEON__)
+#error \
+    "You must enable NEON instructions (e.g. -mfpu=neon-fp-armv8) to use SSE2NEON."
+#endif
+#if !defined(__clang__) && !defined(_MSC_VER)
+#pragma GCC push_options
+#endif
 #else
-#error "Unsupported target. Must be either ARMv7-A+NEON or ARMv8-A."
+#error \
+    "Unsupported target. Must be either ARMv7-A+NEON or ARMv8-A \
+(you could try setting target explicitly with -march or -mcpu)"
 #endif
 #endif
 
 #include <arm_neon.h>
+#if (!defined(__aarch64__) && !defined(_M_ARM64)) && (__ARM_ARCH == 8)
+#if defined __has_include && __has_include(<arm_acle.h>)
+#include <arm_acle.h>
+#endif
+#endif
+
+/* Apple Silicon cache lines are double of what is commonly used by Intel, AMD
+ * and other Arm microarchitectures use.
+ * From sysctl -a on Apple M1:
+ * hw.cachelinesize: 128
+ */
+#if defined(__APPLE__) && (defined(__aarch64__) || defined(__arm64__))
+#define SSE2NEON_CACHELINE_SIZE 128
+#else
+#define SSE2NEON_CACHELINE_SIZE 64
+#endif
 
-/* Rounding functions require either Aarch64 instructions or libm failback */
-#if !defined(__aarch64__)
+/* Rounding functions require either Aarch64 instructions or libm fallback */
+#if !defined(__aarch64__) && !defined(_M_ARM64)
 #include <math.h>
 #endif
 
+/* On ARMv7, some registers, such as PMUSERENR and PMCCNTR, are read-only
+ * or even not accessible in user mode.
+ * To write or access to these registers in user mode,
+ * we have to perform syscall instead.
+ */
+#if (!defined(__aarch64__) && !defined(_M_ARM64))
+#include <sys/time.h>
+#endif
+
 /* "__has_builtin" can be used to query support for built-in functions
  * provided by gcc/clang and other compilers that support it.
  */
 #ifndef __has_builtin /* GCC prior to 10 or non-clang compilers */
 /* Compatibility with gcc <= 9 */
-#if __GNUC__ <= 9
+#if defined(__GNUC__) && (__GNUC__ <= 9)
 #define __has_builtin(x) HAS##x
 #define HAS__builtin_popcount 1
 #define HAS__builtin_popcountll 1
+
+// __builtin_shuffle introduced in GCC 4.7.0
+#if (__GNUC__ >= 5) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 7))
+#define HAS__builtin_shuffle 1
+#else
+#define HAS__builtin_shuffle 0
+#endif
+
+#define HAS__builtin_shufflevector 0
+#define HAS__builtin_nontemporal_store 0
 #else
 #define __has_builtin(x) 0
 #endif
@@ -128,6 +344,26 @@
 #define _MM_SHUFFLE(fp3, fp2, fp1, fp0) \
     (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0)))
 
+#if __has_builtin(__builtin_shufflevector)
+#define _sse2neon_shuffle(type, a, b, ...) \
+    __builtin_shufflevector(a, b, __VA_ARGS__)
+#elif __has_builtin(__builtin_shuffle)
+#define _sse2neon_shuffle(type, a, b, ...) \
+    __extension__({                        \
+        type tmp = {__VA_ARGS__};          \
+        __builtin_shuffle(a, b, tmp);      \
+    })
+#endif
+
+#ifdef _sse2neon_shuffle
+#define vshuffle_s16(a, b, ...) _sse2neon_shuffle(int16x4_t, a, b, __VA_ARGS__)
+#define vshuffleq_s16(a, b, ...) _sse2neon_shuffle(int16x8_t, a, b, __VA_ARGS__)
+#define vshuffle_s32(a, b, ...) _sse2neon_shuffle(int32x2_t, a, b, __VA_ARGS__)
+#define vshuffleq_s32(a, b, ...) _sse2neon_shuffle(int32x4_t, a, b, __VA_ARGS__)
+#define vshuffle_s64(a, b, ...) _sse2neon_shuffle(int64x1_t, a, b, __VA_ARGS__)
+#define vshuffleq_s64(a, b, ...) _sse2neon_shuffle(int64x2_t, a, b, __VA_ARGS__)
+#endif
+
 /* Rounding mode macros. */
 #define _MM_FROUND_TO_NEAREST_INT 0x00
 #define _MM_FROUND_TO_NEG_INF 0x01
@@ -135,6 +371,25 @@
 #define _MM_FROUND_TO_ZERO 0x03
 #define _MM_FROUND_CUR_DIRECTION 0x04
 #define _MM_FROUND_NO_EXC 0x08
+#define _MM_FROUND_RAISE_EXC 0x00
+#define _MM_FROUND_NINT (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC)
+#define _MM_FROUND_FLOOR (_MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC)
+#define _MM_FROUND_CEIL (_MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC)
+#define _MM_FROUND_TRUNC (_MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC)
+#define _MM_FROUND_RINT (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC)
+#define _MM_FROUND_NEARBYINT (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC)
+#define _MM_ROUND_NEAREST 0x0000
+#define _MM_ROUND_DOWN 0x2000
+#define _MM_ROUND_UP 0x4000
+#define _MM_ROUND_TOWARD_ZERO 0x6000
+/* Flush zero mode macros. */
+#define _MM_FLUSH_ZERO_MASK 0x8000
+#define _MM_FLUSH_ZERO_ON 0x8000
+#define _MM_FLUSH_ZERO_OFF 0x0000
+/* Denormals are zeros mode macros. */
+#define _MM_DENORMALS_ZERO_MASK 0x0040
+#define _MM_DENORMALS_ZERO_ON 0x0040
+#define _MM_DENORMALS_ZERO_OFF 0x0000
 
 /* indicate immediate constant argument in a given range */
 #define __constrange(a, b) const
@@ -150,13 +405,28 @@ typedef float32x4_t __m128; /* 128-bit vector containing 4 floats */
 // On ARM 32-bit architecture, the float64x2_t is not supported.
 // The data type __m128d should be represented in a different way for related
 // intrinsic conversion.
-#if defined(__aarch64__)
+#if defined(__aarch64__) || defined(_M_ARM64)
 typedef float64x2_t __m128d; /* 128-bit vector containing 2 doubles */
 #else
 typedef float32x4_t __m128d;
 #endif
 typedef int64x2_t __m128i; /* 128-bit vector containing integers */
 
+// Some intrinsics operate on unaligned data types.
+typedef int16_t ALIGN_STRUCT(1) unaligned_int16_t;
+typedef int32_t ALIGN_STRUCT(1) unaligned_int32_t;
+typedef int64_t ALIGN_STRUCT(1) unaligned_int64_t;
+
+// __int64 is defined in the Intrinsics Guide which maps to different datatype
+// in different data model
+#if !(defined(_WIN32) || defined(_WIN64) || defined(__int64))
+#if (defined(__x86_64__) || defined(__i386__))
+#define __int64 long long
+#else
+#define __int64 int64_t
+#endif
+#endif
+
 /* type-safe casting between types */
 
 #define vreinterpretq_m128_f16(x) vreinterpretq_f32_f16(x)
@@ -197,6 +467,9 @@ typedef int64x2_t __m128i; /* 128-bit vector containing integers */
 #define vreinterpretq_m128i_u32(x) vreinterpretq_s64_u32(x)
 #define vreinterpretq_m128i_u64(x) vreinterpretq_s64_u64(x)
 
+#define vreinterpretq_f32_m128i(x) vreinterpretq_f32_s64(x)
+#define vreinterpretq_f64_m128i(x) vreinterpretq_f64_s64(x)
+
 #define vreinterpretq_s8_m128i(x) vreinterpretq_s8_s64(x)
 #define vreinterpretq_s16_m128i(x) vreinterpretq_s16_s64(x)
 #define vreinterpretq_s32_m128i(x) vreinterpretq_s32_s64(x)
@@ -233,31 +506,44 @@ typedef int64x2_t __m128i; /* 128-bit vector containing integers */
 
 #define vreinterpret_f32_m64(x) vreinterpret_f32_s64(x)
 
-#if defined(__aarch64__)
+#if defined(__aarch64__) || defined(_M_ARM64)
 #define vreinterpretq_m128d_s32(x) vreinterpretq_f64_s32(x)
 #define vreinterpretq_m128d_s64(x) vreinterpretq_f64_s64(x)
 
+#define vreinterpretq_m128d_u64(x) vreinterpretq_f64_u64(x)
+
+#define vreinterpretq_m128d_f32(x) vreinterpretq_f64_f32(x)
 #define vreinterpretq_m128d_f64(x) (x)
 
 #define vreinterpretq_s64_m128d(x) vreinterpretq_s64_f64(x)
 
+#define vreinterpretq_u32_m128d(x) vreinterpretq_u32_f64(x)
+#define vreinterpretq_u64_m128d(x) vreinterpretq_u64_f64(x)
+
 #define vreinterpretq_f64_m128d(x) (x)
+#define vreinterpretq_f32_m128d(x) vreinterpretq_f32_f64(x)
 #else
 #define vreinterpretq_m128d_s32(x) vreinterpretq_f32_s32(x)
 #define vreinterpretq_m128d_s64(x) vreinterpretq_f32_s64(x)
 
+#define vreinterpretq_m128d_u32(x) vreinterpretq_f32_u32(x)
+#define vreinterpretq_m128d_u64(x) vreinterpretq_f32_u64(x)
+
 #define vreinterpretq_m128d_f32(x) (x)
 
 #define vreinterpretq_s64_m128d(x) vreinterpretq_s64_f32(x)
 
+#define vreinterpretq_u32_m128d(x) vreinterpretq_u32_f32(x)
+#define vreinterpretq_u64_m128d(x) vreinterpretq_u64_f32(x)
+
 #define vreinterpretq_f32_m128d(x) (x)
 #endif
 
 // A struct is defined in this header file called 'SIMDVec' which can be used
-// by applications which attempt to access the contents of an _m128 struct
+// by applications which attempt to access the contents of an __m128 struct
 // directly.  It is important to note that accessing the __m128 struct directly
 // is bad coding practice by Microsoft: @see:
-// https://msdn.microsoft.com/en-us/library/ayeb3ayc.aspx
+// https://learn.microsoft.com/en-us/cpp/cpp/m128
 //
 // However, some legacy source code may try to access the contents of an __m128
 // struct directly so the developer can use the SIMDVec as an alias for it.  Any
@@ -293,12 +579,49 @@ typedef union ALIGN_STRUCT(16) SIMDVec {
 #define vreinterpretq_nth_u32_m128i(x, n) (((SIMDVec *) &x)->m128_u32[n])
 #define vreinterpretq_nth_u8_m128i(x, n) (((SIMDVec *) &x)->m128_u8[n])
 
+/* SSE macros */
+#define _MM_GET_FLUSH_ZERO_MODE _sse2neon_mm_get_flush_zero_mode
+#define _MM_SET_FLUSH_ZERO_MODE _sse2neon_mm_set_flush_zero_mode
+#define _MM_GET_DENORMALS_ZERO_MODE _sse2neon_mm_get_denormals_zero_mode
+#define _MM_SET_DENORMALS_ZERO_MODE _sse2neon_mm_set_denormals_zero_mode
+
+// Function declaration
+// SSE
+FORCE_INLINE unsigned int _MM_GET_ROUNDING_MODE(void);
+FORCE_INLINE __m128 _mm_move_ss(__m128, __m128);
+FORCE_INLINE __m128 _mm_or_ps(__m128, __m128);
+FORCE_INLINE __m128 _mm_set_ps1(float);
+FORCE_INLINE __m128 _mm_setzero_ps(void);
+// SSE2
+FORCE_INLINE __m128i _mm_and_si128(__m128i, __m128i);
+FORCE_INLINE __m128i _mm_castps_si128(__m128);
+FORCE_INLINE __m128i _mm_cmpeq_epi32(__m128i, __m128i);
+FORCE_INLINE __m128i _mm_cvtps_epi32(__m128);
+FORCE_INLINE __m128d _mm_move_sd(__m128d, __m128d);
+FORCE_INLINE __m128i _mm_or_si128(__m128i, __m128i);
+FORCE_INLINE __m128i _mm_set_epi32(int, int, int, int);
+FORCE_INLINE __m128i _mm_set_epi64x(int64_t, int64_t);
+FORCE_INLINE __m128d _mm_set_pd(double, double);
+FORCE_INLINE __m128i _mm_set1_epi32(int);
+FORCE_INLINE __m128i _mm_setzero_si128(void);
+// SSE4.1
+FORCE_INLINE __m128d _mm_ceil_pd(__m128d);
+FORCE_INLINE __m128 _mm_ceil_ps(__m128);
+FORCE_INLINE __m128d _mm_floor_pd(__m128d);
+FORCE_INLINE __m128 _mm_floor_ps(__m128);
+FORCE_INLINE_OPTNONE __m128d _mm_round_pd(__m128d, int);
+FORCE_INLINE_OPTNONE __m128 _mm_round_ps(__m128, int);
+// SSE4.2
+FORCE_INLINE uint32_t _mm_crc32_u8(uint32_t, uint8_t);
+
 /* Backwards compatibility for compilers with lack of specific type support */
 
 // Older gcc does not define vld1q_u8_x4 type
-#if defined(__GNUC__) && !defined(__clang__)
-#if __GNUC__ <= 9
-FORCE_INLINE uint8x16x4_t vld1q_u8_x4(const uint8_t *p)
+#if defined(__GNUC__) && !defined(__clang__) &&                        \
+    ((__GNUC__ <= 13 && defined(__arm__)) ||                           \
+     (__GNUC__ == 10 && __GNUC_MINOR__ < 3 && defined(__aarch64__)) || \
+     (__GNUC__ <= 9 && defined(__aarch64__)))
+FORCE_INLINE uint8x16x4_t _sse2neon_vld1q_u8_x4(const uint8_t *p)
 {
     uint8x16x4_t ret;
     ret.val[0] = vld1q_u8(p + 0);
@@ -307,7 +630,63 @@ FORCE_INLINE uint8x16x4_t vld1q_u8_x4(const uint8_t *p)
     ret.val[3] = vld1q_u8(p + 48);
     return ret;
 }
+#else
+// Wraps vld1q_u8_x4
+FORCE_INLINE uint8x16x4_t _sse2neon_vld1q_u8_x4(const uint8_t *p)
+{
+    return vld1q_u8_x4(p);
+}
+#endif
+
+#if !defined(__aarch64__) && !defined(_M_ARM64)
+/* emulate vaddv u8 variant */
+FORCE_INLINE uint8_t _sse2neon_vaddv_u8(uint8x8_t v8)
+{
+    const uint64x1_t v1 = vpaddl_u32(vpaddl_u16(vpaddl_u8(v8)));
+    return vget_lane_u8(vreinterpret_u8_u64(v1), 0);
+}
+#else
+// Wraps vaddv_u8
+FORCE_INLINE uint8_t _sse2neon_vaddv_u8(uint8x8_t v8)
+{
+    return vaddv_u8(v8);
+}
+#endif
+
+#if !defined(__aarch64__) && !defined(_M_ARM64)
+/* emulate vaddvq u8 variant */
+FORCE_INLINE uint8_t _sse2neon_vaddvq_u8(uint8x16_t a)
+{
+    uint8x8_t tmp = vpadd_u8(vget_low_u8(a), vget_high_u8(a));
+    uint8_t res = 0;
+    for (int i = 0; i < 8; ++i)
+        res += tmp[i];
+    return res;
+}
+#else
+// Wraps vaddvq_u8
+FORCE_INLINE uint8_t _sse2neon_vaddvq_u8(uint8x16_t a)
+{
+    return vaddvq_u8(a);
+}
 #endif
+
+#if !defined(__aarch64__) && !defined(_M_ARM64)
+/* emulate vaddvq u16 variant */
+FORCE_INLINE uint16_t _sse2neon_vaddvq_u16(uint16x8_t a)
+{
+    uint32x4_t m = vpaddlq_u16(a);
+    uint64x2_t n = vpaddlq_u32(m);
+    uint64x1_t o = vget_low_u64(n) + vget_high_u64(n);
+
+    return vget_lane_u32((uint32x2_t) o, 0);
+}
+#else
+// Wraps vaddvq_u16
+FORCE_INLINE uint16_t _sse2neon_vaddvq_u16(uint16x8_t a)
+{
+    return vaddvq_u16(a);
+}
 #endif
 
 /* Function Naming Conventions
@@ -322,7 +701,7 @@ FORCE_INLINE uint8x16x4_t vld1q_u8_x4(const uint8_t *p)
  * This last part, <data_type>, is a little complicated. It identifies the
  * content of the input values, and can be set to any of the following values:
  * + ps - vectors contain floats (ps stands for packed single-precision)
- * + pd - vectors cantain doubles (pd stands for packed double-precision)
+ * + pd - vectors contain doubles (pd stands for packed double-precision)
  * + epi8/epi16/epi32/epi64 - vectors contain 8-bit/16-bit/32-bit/64-bit
  *                            signed integers
  * + epu8/epu16/epu32/epu64 - vectors contain 8-bit/16-bit/32-bit/64-bit
@@ -344,4899 +723,6697 @@ FORCE_INLINE uint8x16x4_t vld1q_u8_x4(const uint8_t *p)
  *                                  4, 5, 12, 13, 6, 7, 14, 15);
  *   // Shuffle packed 8-bit integers
  *   __m128i v_out = _mm_shuffle_epi8(v_in, v_perm); // pshufb
- *
- * Data (Number, Binary, Byte Index):
-    +------+------+-------------+------+------+-------------+
-    |      1      |      2      |      3      |      4      | Number
-    +------+------+------+------+------+------+------+------+
-    | 0000 | 0001 | 0000 | 0010 | 0000 | 0011 | 0000 | 0100 | Binary
-    +------+------+------+------+------+------+------+------+
-    |    0 |    1 |    2 |    3 |    4 |    5 |    6 |    7 | Index
-    +------+------+------+------+------+------+------+------+
-
-    +------+------+------+------+------+------+------+------+
-    |      5      |      6      |      7      |      8      | Number
-    +------+------+------+------+------+------+------+------+
-    | 0000 | 0101 | 0000 | 0110 | 0000 | 0111 | 0000 | 1000 | Binary
-    +------+------+------+------+------+------+------+------+
-    |    8 |    9 |   10 |   11 |   12 |   13 |   14 |   15 | Index
-    +------+------+------+------+------+------+------+------+
- * Index (Byte Index):
-    +------+------+------+------+------+------+------+------+
-    |    1 |    0 |    2 |    3 |    8 |    9 |   10 |   11 |
-    +------+------+------+------+------+------+------+------+
-
-    +------+------+------+------+------+------+------+------+
-    |    4 |    5 |   12 |   13 |    6 |    7 |   14 |   15 |
-    +------+------+------+------+------+------+------+------+
- * Result:
-    +------+------+------+------+------+------+------+------+
-    |    1 |    0 |    2 |    3 |    8 |    9 |   10 |   11 | Index
-    +------+------+------+------+------+------+------+------+
-    | 0001 | 0000 | 0000 | 0010 | 0000 | 0101 | 0000 | 0110 | Binary
-    +------+------+------+------+------+------+------+------+
-    |     256     |      2      |      5      |      6      | Number
-    +------+------+------+------+------+------+------+------+
-
-    +------+------+------+------+------+------+------+------+
-    |    4 |    5 |   12 |   13 |    6 |    7 |   14 |   15 | Index
-    +------+------+------+------+------+------+------+------+
-    | 0000 | 0011 | 0000 | 0111 | 0000 | 0100 | 0000 | 1000 | Binary
-    +------+------+------+------+------+------+------+------+
-    |      3      |      7      |      4      |      8      | Number
-    +------+------+------+------+------+------+-------------+
  */
 
-/* Set/get methods */
-
-/* Constants for use with _mm_prefetch.  */
+/* Constants for use with _mm_prefetch. */
 enum _mm_hint {
-    _MM_HINT_NTA = 0,  /* load data to L1 and L2 cache, mark it as NTA */
-    _MM_HINT_T0 = 1,   /* load data to L1 and L2 cache */
-    _MM_HINT_T1 = 2,   /* load data to L2 cache only */
-    _MM_HINT_T2 = 3,   /* load data to L2 cache only, mark it as NTA */
-    _MM_HINT_ENTA = 4, /* exclusive version of _MM_HINT_NTA */
-    _MM_HINT_ET0 = 5,  /* exclusive version of _MM_HINT_T0 */
-    _MM_HINT_ET1 = 6,  /* exclusive version of _MM_HINT_T1 */
-    _MM_HINT_ET2 = 7   /* exclusive version of _MM_HINT_T2 */
+    _MM_HINT_NTA = 0, /* load data to L1 and L2 cache, mark it as NTA */
+    _MM_HINT_T0 = 1,  /* load data to L1 and L2 cache */
+    _MM_HINT_T1 = 2,  /* load data to L2 cache only */
+    _MM_HINT_T2 = 3,  /* load data to L2 cache only, mark it as NTA */
 };
 
-// Loads one cache line of data from address p to a location closer to the
-// processor. https://msdn.microsoft.com/en-us/library/84szxsww(v=vs.100).aspx
-FORCE_INLINE void _mm_prefetch(const void *p, int i)
-{
-    (void) i;
-    __builtin_prefetch(p);
-}
+// The bit field mapping to the FPCR(floating-point control register)
+typedef struct {
+    uint16_t res0;
+    uint8_t res1 : 6;
+    uint8_t bit22 : 1;
+    uint8_t bit23 : 1;
+    uint8_t bit24 : 1;
+    uint8_t res2 : 7;
+#if defined(__aarch64__) || defined(_M_ARM64)
+    uint32_t res3;
+#endif
+} fpcr_bitfield;
 
-// Copy the lower single-precision (32-bit) floating-point element of a to dst.
-//
-//   dst[31:0] := a[31:0]
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_f32
-FORCE_INLINE float _mm_cvtss_f32(__m128 a)
+// Takes the upper 64 bits of a and places it in the low end of the result
+// Takes the lower 64 bits of b and places it into the high end of the result.
+FORCE_INLINE __m128 _mm_shuffle_ps_1032(__m128 a, __m128 b)
 {
-    return vgetq_lane_f32(vreinterpretq_f32_m128(a), 0);
+    float32x2_t a32 = vget_high_f32(vreinterpretq_f32_m128(a));
+    float32x2_t b10 = vget_low_f32(vreinterpretq_f32_m128(b));
+    return vreinterpretq_m128_f32(vcombine_f32(a32, b10));
 }
 
-// Sets the 128-bit value to zero
-// https://msdn.microsoft.com/en-us/library/vstudio/ys7dw0kh(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_setzero_si128(void)
+// takes the lower two 32-bit values from a and swaps them and places in high
+// end of result takes the higher two 32 bit values from b and swaps them and
+// places in low end of result.
+FORCE_INLINE __m128 _mm_shuffle_ps_2301(__m128 a, __m128 b)
 {
-    return vreinterpretq_m128i_s32(vdupq_n_s32(0));
+    float32x2_t a01 = vrev64_f32(vget_low_f32(vreinterpretq_f32_m128(a)));
+    float32x2_t b23 = vrev64_f32(vget_high_f32(vreinterpretq_f32_m128(b)));
+    return vreinterpretq_m128_f32(vcombine_f32(a01, b23));
 }
 
-// Clears the four single-precision, floating-point values.
-// https://msdn.microsoft.com/en-us/library/vstudio/tk1t2tbz(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_setzero_ps(void)
+FORCE_INLINE __m128 _mm_shuffle_ps_0321(__m128 a, __m128 b)
 {
-    return vreinterpretq_m128_f32(vdupq_n_f32(0));
+    float32x2_t a21 = vget_high_f32(
+        vextq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a), 3));
+    float32x2_t b03 = vget_low_f32(
+        vextq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b), 3));
+    return vreinterpretq_m128_f32(vcombine_f32(a21, b03));
 }
 
-// Sets the four single-precision, floating-point values to w.
-//
-//   r0 := r1 := r2 := r3 := w
-//
-// https://msdn.microsoft.com/en-us/library/vstudio/2x1se8ha(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_set1_ps(float _w)
+FORCE_INLINE __m128 _mm_shuffle_ps_2103(__m128 a, __m128 b)
 {
-    return vreinterpretq_m128_f32(vdupq_n_f32(_w));
+    float32x2_t a03 = vget_low_f32(
+        vextq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a), 3));
+    float32x2_t b21 = vget_high_f32(
+        vextq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b), 3));
+    return vreinterpretq_m128_f32(vcombine_f32(a03, b21));
 }
 
-// Sets the four single-precision, floating-point values to w.
-// https://msdn.microsoft.com/en-us/library/vstudio/2x1se8ha(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_set_ps1(float _w)
+FORCE_INLINE __m128 _mm_shuffle_ps_1010(__m128 a, __m128 b)
 {
-    return vreinterpretq_m128_f32(vdupq_n_f32(_w));
+    float32x2_t a10 = vget_low_f32(vreinterpretq_f32_m128(a));
+    float32x2_t b10 = vget_low_f32(vreinterpretq_f32_m128(b));
+    return vreinterpretq_m128_f32(vcombine_f32(a10, b10));
 }
 
-// Sets the four single-precision, floating-point values to the four inputs.
-// https://msdn.microsoft.com/en-us/library/vstudio/afh0zf75(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_set_ps(float w, float z, float y, float x)
+FORCE_INLINE __m128 _mm_shuffle_ps_1001(__m128 a, __m128 b)
 {
-    float ALIGN_STRUCT(16) data[4] = {x, y, z, w};
-    return vreinterpretq_m128_f32(vld1q_f32(data));
+    float32x2_t a01 = vrev64_f32(vget_low_f32(vreinterpretq_f32_m128(a)));
+    float32x2_t b10 = vget_low_f32(vreinterpretq_f32_m128(b));
+    return vreinterpretq_m128_f32(vcombine_f32(a01, b10));
 }
 
-// Copy single-precision (32-bit) floating-point element a to the lower element
-// of dst, and zero the upper 3 elements.
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_ss
-FORCE_INLINE __m128 _mm_set_ss(float a)
+FORCE_INLINE __m128 _mm_shuffle_ps_0101(__m128 a, __m128 b)
 {
-    float ALIGN_STRUCT(16) data[4] = {a, 0, 0, 0};
-    return vreinterpretq_m128_f32(vld1q_f32(data));
+    float32x2_t a01 = vrev64_f32(vget_low_f32(vreinterpretq_f32_m128(a)));
+    float32x2_t b01 = vrev64_f32(vget_low_f32(vreinterpretq_f32_m128(b)));
+    return vreinterpretq_m128_f32(vcombine_f32(a01, b01));
 }
 
-// Sets the four single-precision, floating-point values to the four inputs in
-// reverse order.
-// https://msdn.microsoft.com/en-us/library/vstudio/d2172ct3(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_setr_ps(float w, float z, float y, float x)
+// keeps the low 64 bits of b in the low and puts the high 64 bits of a in the
+// high
+FORCE_INLINE __m128 _mm_shuffle_ps_3210(__m128 a, __m128 b)
 {
-    float ALIGN_STRUCT(16) data[4] = {w, z, y, x};
-    return vreinterpretq_m128_f32(vld1q_f32(data));
+    float32x2_t a10 = vget_low_f32(vreinterpretq_f32_m128(a));
+    float32x2_t b32 = vget_high_f32(vreinterpretq_f32_m128(b));
+    return vreinterpretq_m128_f32(vcombine_f32(a10, b32));
 }
 
-// Sets the 8 signed 16-bit integer values in reverse order.
-//
-// Return Value
-//   r0 := w0
-//   r1 := w1
-//   ...
-//   r7 := w7
-FORCE_INLINE __m128i _mm_setr_epi16(short w0,
-                                    short w1,
-                                    short w2,
-                                    short w3,
-                                    short w4,
-                                    short w5,
-                                    short w6,
-                                    short w7)
+FORCE_INLINE __m128 _mm_shuffle_ps_0011(__m128 a, __m128 b)
 {
-    int16_t ALIGN_STRUCT(16) data[8] = {w0, w1, w2, w3, w4, w5, w6, w7};
-    return vreinterpretq_m128i_s16(vld1q_s16((int16_t *) data));
+    float32x2_t a11 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(a)), 1);
+    float32x2_t b00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 0);
+    return vreinterpretq_m128_f32(vcombine_f32(a11, b00));
 }
 
-// Sets the 4 signed 32-bit integer values in reverse order
-// https://technet.microsoft.com/en-us/library/security/27yb3ee5(v=vs.90).aspx
-FORCE_INLINE __m128i _mm_setr_epi32(int i3, int i2, int i1, int i0)
+FORCE_INLINE __m128 _mm_shuffle_ps_0022(__m128 a, __m128 b)
 {
-    int32_t ALIGN_STRUCT(16) data[4] = {i3, i2, i1, i0};
-    return vreinterpretq_m128i_s32(vld1q_s32(data));
+    float32x2_t a22 =
+        vdup_lane_f32(vget_high_f32(vreinterpretq_f32_m128(a)), 0);
+    float32x2_t b00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 0);
+    return vreinterpretq_m128_f32(vcombine_f32(a22, b00));
 }
 
-// Set packed 64-bit integers in dst with the supplied values in reverse order.
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi64
-FORCE_INLINE __m128i _mm_setr_epi64(__m64 e1, __m64 e0)
+FORCE_INLINE __m128 _mm_shuffle_ps_2200(__m128 a, __m128 b)
 {
-    return vreinterpretq_m128i_s64(vcombine_s64(e1, e0));
+    float32x2_t a00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(a)), 0);
+    float32x2_t b22 =
+        vdup_lane_f32(vget_high_f32(vreinterpretq_f32_m128(b)), 0);
+    return vreinterpretq_m128_f32(vcombine_f32(a00, b22));
 }
 
-// Sets the 16 signed 8-bit integer values to b.
-//
-//   r0 := b
-//   r1 := b
-//   ...
-//   r15 := b
-//
-// https://msdn.microsoft.com/en-us/library/6e14xhyf(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_set1_epi8(signed char w)
+FORCE_INLINE __m128 _mm_shuffle_ps_3202(__m128 a, __m128 b)
 {
-    return vreinterpretq_m128i_s8(vdupq_n_s8(w));
+    float32_t a0 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 0);
+    float32x2_t a22 =
+        vdup_lane_f32(vget_high_f32(vreinterpretq_f32_m128(a)), 0);
+    float32x2_t a02 = vset_lane_f32(a0, a22, 1); /* TODO: use vzip ?*/
+    float32x2_t b32 = vget_high_f32(vreinterpretq_f32_m128(b));
+    return vreinterpretq_m128_f32(vcombine_f32(a02, b32));
 }
 
-// Sets the 8 signed 16-bit integer values to w.
-//
-//   r0 := w
-//   r1 := w
-//   ...
-//   r7 := w
-//
-// https://msdn.microsoft.com/en-us/library/k0ya3x0e(v=vs.90).aspx
-FORCE_INLINE __m128i _mm_set1_epi16(short w)
+FORCE_INLINE __m128 _mm_shuffle_ps_1133(__m128 a, __m128 b)
 {
-    return vreinterpretq_m128i_s16(vdupq_n_s16(w));
+    float32x2_t a33 =
+        vdup_lane_f32(vget_high_f32(vreinterpretq_f32_m128(a)), 1);
+    float32x2_t b11 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 1);
+    return vreinterpretq_m128_f32(vcombine_f32(a33, b11));
 }
 
-// Sets the 16 signed 8-bit integer values.
-// https://msdn.microsoft.com/en-us/library/x0cx8zd3(v=vs.90).aspx
-FORCE_INLINE __m128i _mm_set_epi8(signed char b15,
-                                  signed char b14,
-                                  signed char b13,
-                                  signed char b12,
-                                  signed char b11,
-                                  signed char b10,
-                                  signed char b9,
-                                  signed char b8,
-                                  signed char b7,
-                                  signed char b6,
-                                  signed char b5,
-                                  signed char b4,
-                                  signed char b3,
-                                  signed char b2,
-                                  signed char b1,
-                                  signed char b0)
+FORCE_INLINE __m128 _mm_shuffle_ps_2010(__m128 a, __m128 b)
 {
-    int8_t ALIGN_STRUCT(16)
-        data[16] = {(int8_t) b0,  (int8_t) b1,  (int8_t) b2,  (int8_t) b3,
-                    (int8_t) b4,  (int8_t) b5,  (int8_t) b6,  (int8_t) b7,
-                    (int8_t) b8,  (int8_t) b9,  (int8_t) b10, (int8_t) b11,
-                    (int8_t) b12, (int8_t) b13, (int8_t) b14, (int8_t) b15};
-    return (__m128i) vld1q_s8(data);
+    float32x2_t a10 = vget_low_f32(vreinterpretq_f32_m128(a));
+    float32_t b2 = vgetq_lane_f32(vreinterpretq_f32_m128(b), 2);
+    float32x2_t b00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 0);
+    float32x2_t b20 = vset_lane_f32(b2, b00, 1);
+    return vreinterpretq_m128_f32(vcombine_f32(a10, b20));
 }
 
-// Sets the 8 signed 16-bit integer values.
-// https://msdn.microsoft.com/en-au/library/3e0fek84(v=vs.90).aspx
-FORCE_INLINE __m128i _mm_set_epi16(short i7,
-                                   short i6,
-                                   short i5,
-                                   short i4,
-                                   short i3,
-                                   short i2,
-                                   short i1,
-                                   short i0)
+FORCE_INLINE __m128 _mm_shuffle_ps_2001(__m128 a, __m128 b)
 {
-    int16_t ALIGN_STRUCT(16) data[8] = {i0, i1, i2, i3, i4, i5, i6, i7};
-    return vreinterpretq_m128i_s16(vld1q_s16(data));
+    float32x2_t a01 = vrev64_f32(vget_low_f32(vreinterpretq_f32_m128(a)));
+    float32_t b2 = vgetq_lane_f32(b, 2);
+    float32x2_t b00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 0);
+    float32x2_t b20 = vset_lane_f32(b2, b00, 1);
+    return vreinterpretq_m128_f32(vcombine_f32(a01, b20));
 }
 
-// Sets the 16 signed 8-bit integer values in reverse order.
-// https://msdn.microsoft.com/en-us/library/2khb9c7k(v=vs.90).aspx
-FORCE_INLINE __m128i _mm_setr_epi8(signed char b0,
-                                   signed char b1,
-                                   signed char b2,
-                                   signed char b3,
-                                   signed char b4,
-                                   signed char b5,
-                                   signed char b6,
-                                   signed char b7,
-                                   signed char b8,
-                                   signed char b9,
-                                   signed char b10,
-                                   signed char b11,
-                                   signed char b12,
-                                   signed char b13,
-                                   signed char b14,
-                                   signed char b15)
+FORCE_INLINE __m128 _mm_shuffle_ps_2032(__m128 a, __m128 b)
 {
-    int8_t ALIGN_STRUCT(16)
-        data[16] = {(int8_t) b0,  (int8_t) b1,  (int8_t) b2,  (int8_t) b3,
-                    (int8_t) b4,  (int8_t) b5,  (int8_t) b6,  (int8_t) b7,
-                    (int8_t) b8,  (int8_t) b9,  (int8_t) b10, (int8_t) b11,
-                    (int8_t) b12, (int8_t) b13, (int8_t) b14, (int8_t) b15};
-    return (__m128i) vld1q_s8(data);
+    float32x2_t a32 = vget_high_f32(vreinterpretq_f32_m128(a));
+    float32_t b2 = vgetq_lane_f32(b, 2);
+    float32x2_t b00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 0);
+    float32x2_t b20 = vset_lane_f32(b2, b00, 1);
+    return vreinterpretq_m128_f32(vcombine_f32(a32, b20));
 }
 
-// Sets the 4 signed 32-bit integer values to i.
+// For MSVC, we check only if it is ARM64, as every single ARM64 processor
+// supported by WoA has crypto extensions. If this changes in the future,
+// this can be verified via the runtime-only method of:
+// IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)
+#if (defined(_M_ARM64) && !defined(__clang__)) || \
+    (defined(__ARM_FEATURE_CRYPTO) &&             \
+     (defined(__aarch64__) || __has_builtin(__builtin_arm_crypto_vmullp64)))
+// Wraps vmull_p64
+FORCE_INLINE uint64x2_t _sse2neon_vmull_p64(uint64x1_t _a, uint64x1_t _b)
+{
+    poly64_t a = vget_lane_p64(vreinterpret_p64_u64(_a), 0);
+    poly64_t b = vget_lane_p64(vreinterpret_p64_u64(_b), 0);
+#if defined(_MSC_VER) && !defined(__clang__)
+    __n64 a1 = {a}, b1 = {b};
+    return vreinterpretq_u64_p128(vmull_p64(a1, b1));
+#else
+    return vreinterpretq_u64_p128(vmull_p64(a, b));
+#endif
+}
+#else  // ARMv7 polyfill
+// ARMv7/some A64 lacks vmull_p64, but it has vmull_p8.
 //
-//   r0 := i
-//   r1 := i
-//   r2 := i
-//   r3 := I
+// vmull_p8 calculates 8 8-bit->16-bit polynomial multiplies, but we need a
+// 64-bit->128-bit polynomial multiply.
 //
-// https://msdn.microsoft.com/en-us/library/vstudio/h4xscxat(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_set1_epi32(int _i)
+// It needs some work and is somewhat slow, but it is still faster than all
+// known scalar methods.
+//
+// Algorithm adapted to C from
+// https://www.workofard.com/2017/07/ghash-for-low-end-cores/, which is adapted
+// from "Fast Software Polynomial Multiplication on ARM Processors Using the
+// NEON Engine" by Danilo Camara, Conrado Gouvea, Julio Lopez and Ricardo Dahab
+// (https://hal.inria.fr/hal-01506572)
+static uint64x2_t _sse2neon_vmull_p64(uint64x1_t _a, uint64x1_t _b)
 {
-    return vreinterpretq_m128i_s32(vdupq_n_s32(_i));
-}
+    poly8x8_t a = vreinterpret_p8_u64(_a);
+    poly8x8_t b = vreinterpret_p8_u64(_b);
 
-// Sets the 2 signed 64-bit integer values to i.
-// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/whtfzhzk(v=vs.100)
-FORCE_INLINE __m128i _mm_set1_epi64(__m64 _i)
-{
-    return vreinterpretq_m128i_s64(vdupq_n_s64((int64_t) _i));
-}
+    // Masks
+    uint8x16_t k48_32 = vcombine_u8(vcreate_u8(0x0000ffffffffffff),
+                                    vcreate_u8(0x00000000ffffffff));
+    uint8x16_t k16_00 = vcombine_u8(vcreate_u8(0x000000000000ffff),
+                                    vcreate_u8(0x0000000000000000));
 
-// Sets the 2 signed 64-bit integer values to i.
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi64x
-FORCE_INLINE __m128i _mm_set1_epi64x(int64_t _i)
-{
-    return vreinterpretq_m128i_s64(vdupq_n_s64(_i));
-}
+    // Do the multiplies, rotating with vext to get all combinations
+    uint8x16_t d = vreinterpretq_u8_p16(vmull_p8(a, b));  // D = A0 * B0
+    uint8x16_t e =
+        vreinterpretq_u8_p16(vmull_p8(a, vext_p8(b, b, 1)));  // E = A0 * B1
+    uint8x16_t f =
+        vreinterpretq_u8_p16(vmull_p8(vext_p8(a, a, 1), b));  // F = A1 * B0
+    uint8x16_t g =
+        vreinterpretq_u8_p16(vmull_p8(a, vext_p8(b, b, 2)));  // G = A0 * B2
+    uint8x16_t h =
+        vreinterpretq_u8_p16(vmull_p8(vext_p8(a, a, 2), b));  // H = A2 * B0
+    uint8x16_t i =
+        vreinterpretq_u8_p16(vmull_p8(a, vext_p8(b, b, 3)));  // I = A0 * B3
+    uint8x16_t j =
+        vreinterpretq_u8_p16(vmull_p8(vext_p8(a, a, 3), b));  // J = A3 * B0
+    uint8x16_t k =
+        vreinterpretq_u8_p16(vmull_p8(a, vext_p8(b, b, 4)));  // L = A0 * B4
 
-// Sets the 4 signed 32-bit integer values.
-// https://msdn.microsoft.com/en-us/library/vstudio/019beekt(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_set_epi32(int i3, int i2, int i1, int i0)
-{
-    int32_t ALIGN_STRUCT(16) data[4] = {i0, i1, i2, i3};
-    return vreinterpretq_m128i_s32(vld1q_s32(data));
-}
+    // Add cross products
+    uint8x16_t l = veorq_u8(e, f);  // L = E + F
+    uint8x16_t m = veorq_u8(g, h);  // M = G + H
+    uint8x16_t n = veorq_u8(i, j);  // N = I + J
 
-// Returns the __m128i structure with its two 64-bit integer values
-// initialized to the values of the two 64-bit integers passed in.
-// https://msdn.microsoft.com/en-us/library/dk2sdw0h(v=vs.120).aspx
-FORCE_INLINE __m128i _mm_set_epi64x(int64_t i1, int64_t i2)
-{
-    int64_t ALIGN_STRUCT(16) data[2] = {i2, i1};
-    return vreinterpretq_m128i_s64(vld1q_s64(data));
-}
+    // Interleave. Using vzip1 and vzip2 prevents Clang from emitting TBL
+    // instructions.
+#if defined(__aarch64__)
+    uint8x16_t lm_p0 = vreinterpretq_u8_u64(
+        vzip1q_u64(vreinterpretq_u64_u8(l), vreinterpretq_u64_u8(m)));
+    uint8x16_t lm_p1 = vreinterpretq_u8_u64(
+        vzip2q_u64(vreinterpretq_u64_u8(l), vreinterpretq_u64_u8(m)));
+    uint8x16_t nk_p0 = vreinterpretq_u8_u64(
+        vzip1q_u64(vreinterpretq_u64_u8(n), vreinterpretq_u64_u8(k)));
+    uint8x16_t nk_p1 = vreinterpretq_u8_u64(
+        vzip2q_u64(vreinterpretq_u64_u8(n), vreinterpretq_u64_u8(k)));
+#else
+    uint8x16_t lm_p0 = vcombine_u8(vget_low_u8(l), vget_low_u8(m));
+    uint8x16_t lm_p1 = vcombine_u8(vget_high_u8(l), vget_high_u8(m));
+    uint8x16_t nk_p0 = vcombine_u8(vget_low_u8(n), vget_low_u8(k));
+    uint8x16_t nk_p1 = vcombine_u8(vget_high_u8(n), vget_high_u8(k));
+#endif
+    // t0 = (L) (P0 + P1) << 8
+    // t1 = (M) (P2 + P3) << 16
+    uint8x16_t t0t1_tmp = veorq_u8(lm_p0, lm_p1);
+    uint8x16_t t0t1_h = vandq_u8(lm_p1, k48_32);
+    uint8x16_t t0t1_l = veorq_u8(t0t1_tmp, t0t1_h);
 
-// Returns the __m128i structure with its two 64-bit integer values
-// initialized to the values of the two 64-bit integers passed in.
-// https://msdn.microsoft.com/en-us/library/dk2sdw0h(v=vs.120).aspx
-FORCE_INLINE __m128i _mm_set_epi64(__m64 i1, __m64 i2)
-{
-    return _mm_set_epi64x((int64_t) i1, (int64_t) i2);
-}
+    // t2 = (N) (P4 + P5) << 24
+    // t3 = (K) (P6 + P7) << 32
+    uint8x16_t t2t3_tmp = veorq_u8(nk_p0, nk_p1);
+    uint8x16_t t2t3_h = vandq_u8(nk_p1, k16_00);
+    uint8x16_t t2t3_l = veorq_u8(t2t3_tmp, t2t3_h);
 
-// Set packed double-precision (64-bit) floating-point elements in dst with the
-// supplied values.
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_pd
-FORCE_INLINE __m128d _mm_set_pd(double e1, double e0)
-{
-    double ALIGN_STRUCT(16) data[2] = {e0, e1};
+    // De-interleave
 #if defined(__aarch64__)
-    return vreinterpretq_m128d_f64(vld1q_f64((float64_t *) data));
+    uint8x16_t t0 = vreinterpretq_u8_u64(
+        vuzp1q_u64(vreinterpretq_u64_u8(t0t1_l), vreinterpretq_u64_u8(t0t1_h)));
+    uint8x16_t t1 = vreinterpretq_u8_u64(
+        vuzp2q_u64(vreinterpretq_u64_u8(t0t1_l), vreinterpretq_u64_u8(t0t1_h)));
+    uint8x16_t t2 = vreinterpretq_u8_u64(
+        vuzp1q_u64(vreinterpretq_u64_u8(t2t3_l), vreinterpretq_u64_u8(t2t3_h)));
+    uint8x16_t t3 = vreinterpretq_u8_u64(
+        vuzp2q_u64(vreinterpretq_u64_u8(t2t3_l), vreinterpretq_u64_u8(t2t3_h)));
 #else
-    return vreinterpretq_m128d_f32(vld1q_f32((float32_t *) data));
+    uint8x16_t t1 = vcombine_u8(vget_high_u8(t0t1_l), vget_high_u8(t0t1_h));
+    uint8x16_t t0 = vcombine_u8(vget_low_u8(t0t1_l), vget_low_u8(t0t1_h));
+    uint8x16_t t3 = vcombine_u8(vget_high_u8(t2t3_l), vget_high_u8(t2t3_h));
+    uint8x16_t t2 = vcombine_u8(vget_low_u8(t2t3_l), vget_low_u8(t2t3_h));
 #endif
+    // Shift the cross products
+    uint8x16_t t0_shift = vextq_u8(t0, t0, 15);  // t0 << 8
+    uint8x16_t t1_shift = vextq_u8(t1, t1, 14);  // t1 << 16
+    uint8x16_t t2_shift = vextq_u8(t2, t2, 13);  // t2 << 24
+    uint8x16_t t3_shift = vextq_u8(t3, t3, 12);  // t3 << 32
+
+    // Accumulate the products
+    uint8x16_t cross1 = veorq_u8(t0_shift, t1_shift);
+    uint8x16_t cross2 = veorq_u8(t2_shift, t3_shift);
+    uint8x16_t mix = veorq_u8(d, cross1);
+    uint8x16_t r = veorq_u8(mix, cross2);
+    return vreinterpretq_u64_u8(r);
 }
+#endif  // ARMv7 polyfill
 
-// Stores four single-precision, floating-point values.
-// https://msdn.microsoft.com/en-us/library/vstudio/s3h4ay6y(v=vs.100).aspx
-FORCE_INLINE void _mm_store_ps(float *p, __m128 a)
+// C equivalent:
+//   __m128i _mm_shuffle_epi32_default(__m128i a,
+//                                     __constrange(0, 255) int imm) {
+//       __m128i ret;
+//       ret[0] = a[imm        & 0x3];   ret[1] = a[(imm >> 2) & 0x3];
+//       ret[2] = a[(imm >> 4) & 0x03];  ret[3] = a[(imm >> 6) & 0x03];
+//       return ret;
+//   }
+#define _mm_shuffle_epi32_default(a, imm)                                   \
+    vreinterpretq_m128i_s32(vsetq_lane_s32(                                 \
+        vgetq_lane_s32(vreinterpretq_s32_m128i(a), ((imm) >> 6) & 0x3),     \
+        vsetq_lane_s32(                                                     \
+            vgetq_lane_s32(vreinterpretq_s32_m128i(a), ((imm) >> 4) & 0x3), \
+            vsetq_lane_s32(vgetq_lane_s32(vreinterpretq_s32_m128i(a),       \
+                                          ((imm) >> 2) & 0x3),              \
+                           vmovq_n_s32(vgetq_lane_s32(                      \
+                               vreinterpretq_s32_m128i(a), (imm) & (0x3))), \
+                           1),                                              \
+            2),                                                             \
+        3))
+
+// Takes the upper 64 bits of a and places it in the low end of the result
+// Takes the lower 64 bits of a and places it into the high end of the result.
+FORCE_INLINE __m128i _mm_shuffle_epi_1032(__m128i a)
 {
-    vst1q_f32(p, vreinterpretq_f32_m128(a));
+    int32x2_t a32 = vget_high_s32(vreinterpretq_s32_m128i(a));
+    int32x2_t a10 = vget_low_s32(vreinterpretq_s32_m128i(a));
+    return vreinterpretq_m128i_s32(vcombine_s32(a32, a10));
 }
 
-// Stores four single-precision, floating-point values.
-// https://msdn.microsoft.com/en-us/library/44e30x22(v=vs.100).aspx
-FORCE_INLINE void _mm_storeu_ps(float *p, __m128 a)
+// takes the lower two 32-bit values from a and swaps them and places in low end
+// of result takes the higher two 32 bit values from a and swaps them and places
+// in high end of result.
+FORCE_INLINE __m128i _mm_shuffle_epi_2301(__m128i a)
 {
-    vst1q_f32(p, vreinterpretq_f32_m128(a));
+    int32x2_t a01 = vrev64_s32(vget_low_s32(vreinterpretq_s32_m128i(a)));
+    int32x2_t a23 = vrev64_s32(vget_high_s32(vreinterpretq_s32_m128i(a)));
+    return vreinterpretq_m128i_s32(vcombine_s32(a01, a23));
 }
 
-// Stores four 32-bit integer values as (as a __m128i value) at the address p.
-// https://msdn.microsoft.com/en-us/library/vstudio/edk11s13(v=vs.100).aspx
-FORCE_INLINE void _mm_store_si128(__m128i *p, __m128i a)
+// rotates the least significant 32 bits into the most significant 32 bits, and
+// shifts the rest down
+FORCE_INLINE __m128i _mm_shuffle_epi_0321(__m128i a)
 {
-    vst1q_s32((int32_t *) p, vreinterpretq_s32_m128i(a));
+    return vreinterpretq_m128i_s32(
+        vextq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(a), 1));
 }
 
-// Stores four 32-bit integer values as (as a __m128i value) at the address p.
-// https://msdn.microsoft.com/en-us/library/vstudio/edk11s13(v=vs.100).aspx
-FORCE_INLINE void _mm_storeu_si128(__m128i *p, __m128i a)
+// rotates the most significant 32 bits into the least significant 32 bits, and
+// shifts the rest up
+FORCE_INLINE __m128i _mm_shuffle_epi_2103(__m128i a)
 {
-    vst1q_s32((int32_t *) p, vreinterpretq_s32_m128i(a));
+    return vreinterpretq_m128i_s32(
+        vextq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(a), 3));
 }
 
-// Stores the lower single - precision, floating - point value.
-// https://msdn.microsoft.com/en-us/library/tzz10fbx(v=vs.100).aspx
-FORCE_INLINE void _mm_store_ss(float *p, __m128 a)
+// gets the lower 64 bits of a, and places it in the upper 64 bits
+// gets the lower 64 bits of a and places it in the lower 64 bits
+FORCE_INLINE __m128i _mm_shuffle_epi_1010(__m128i a)
 {
-    vst1q_lane_f32(p, vreinterpretq_f32_m128(a), 0);
+    int32x2_t a10 = vget_low_s32(vreinterpretq_s32_m128i(a));
+    return vreinterpretq_m128i_s32(vcombine_s32(a10, a10));
 }
 
-// Store 128-bits (composed of 2 packed double-precision (64-bit) floating-point
-// elements) from a into memory. mem_addr must be aligned on a 16-byte boundary
-// or a general-protection exception may be generated.
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_pd
-FORCE_INLINE void _mm_store_pd(double *mem_addr, __m128d a)
+// gets the lower 64 bits of a, swaps the 0 and 1 elements, and places it in the
+// lower 64 bits gets the lower 64 bits of a, and places it in the upper 64 bits
+FORCE_INLINE __m128i _mm_shuffle_epi_1001(__m128i a)
 {
-#if defined(__aarch64__)
-    vst1q_f64((float64_t *) mem_addr, vreinterpretq_f64_m128d(a));
-#else
-    vst1q_f32((float32_t *) mem_addr, vreinterpretq_f32_m128d(a));
-#endif
+    int32x2_t a01 = vrev64_s32(vget_low_s32(vreinterpretq_s32_m128i(a)));
+    int32x2_t a10 = vget_low_s32(vreinterpretq_s32_m128i(a));
+    return vreinterpretq_m128i_s32(vcombine_s32(a01, a10));
 }
 
-// Store 128-bits (composed of 2 packed double-precision (64-bit) floating-point
-// elements) from a into memory. mem_addr does not need to be aligned on any
-// particular boundary.
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_pd
-FORCE_INLINE void _mm_storeu_pd(double *mem_addr, __m128d a)
+// gets the lower 64 bits of a, swaps the 0 and 1 elements and places it in the
+// upper 64 bits gets the lower 64 bits of a, swaps the 0 and 1 elements, and
+// places it in the lower 64 bits
+FORCE_INLINE __m128i _mm_shuffle_epi_0101(__m128i a)
 {
-    _mm_store_pd(mem_addr, a);
+    int32x2_t a01 = vrev64_s32(vget_low_s32(vreinterpretq_s32_m128i(a)));
+    return vreinterpretq_m128i_s32(vcombine_s32(a01, a01));
 }
 
-// Reads the lower 64 bits of b and stores them into the lower 64 bits of a.
-// https://msdn.microsoft.com/en-us/library/hhwf428f%28v=vs.90%29.aspx
-FORCE_INLINE void _mm_storel_epi64(__m128i *a, __m128i b)
+FORCE_INLINE __m128i _mm_shuffle_epi_2211(__m128i a)
 {
-    uint64x1_t hi = vget_high_u64(vreinterpretq_u64_m128i(*a));
-    uint64x1_t lo = vget_low_u64(vreinterpretq_u64_m128i(b));
-    *a = vreinterpretq_m128i_u64(vcombine_u64(lo, hi));
+    int32x2_t a11 = vdup_lane_s32(vget_low_s32(vreinterpretq_s32_m128i(a)), 1);
+    int32x2_t a22 = vdup_lane_s32(vget_high_s32(vreinterpretq_s32_m128i(a)), 0);
+    return vreinterpretq_m128i_s32(vcombine_s32(a11, a22));
 }
 
-// Stores the lower two single-precision floating point values of a to the
-// address p.
-//
-//   *p0 := a0
-//   *p1 := a1
-//
-// https://msdn.microsoft.com/en-us/library/h54t98ks(v=vs.90).aspx
-FORCE_INLINE void _mm_storel_pi(__m64 *p, __m128 a)
+FORCE_INLINE __m128i _mm_shuffle_epi_0122(__m128i a)
 {
-    *p = vreinterpret_m64_f32(vget_low_f32(a));
+    int32x2_t a22 = vdup_lane_s32(vget_high_s32(vreinterpretq_s32_m128i(a)), 0);
+    int32x2_t a01 = vrev64_s32(vget_low_s32(vreinterpretq_s32_m128i(a)));
+    return vreinterpretq_m128i_s32(vcombine_s32(a22, a01));
 }
 
-// Stores the upper two single-precision, floating-point values of a to the
-// address p.
-//
-//   *p0 := a2
-//   *p1 := a3
-//
-// https://msdn.microsoft.com/en-us/library/a7525fs8(v%3dvs.90).aspx
-FORCE_INLINE void _mm_storeh_pi(__m64 *p, __m128 a)
+FORCE_INLINE __m128i _mm_shuffle_epi_3332(__m128i a)
 {
-    *p = vreinterpret_m64_f32(vget_high_f32(a));
+    int32x2_t a32 = vget_high_s32(vreinterpretq_s32_m128i(a));
+    int32x2_t a33 = vdup_lane_s32(vget_high_s32(vreinterpretq_s32_m128i(a)), 1);
+    return vreinterpretq_m128i_s32(vcombine_s32(a32, a33));
 }
 
-// Loads a single single-precision, floating-point value, copying it into all
-// four words
-// https://msdn.microsoft.com/en-us/library/vstudio/5cdkf716(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_load1_ps(const float *p)
-{
-    return vreinterpretq_m128_f32(vld1q_dup_f32(p));
-}
+#if defined(__aarch64__) || defined(_M_ARM64)
+#define _mm_shuffle_epi32_splat(a, imm) \
+    vreinterpretq_m128i_s32(vdupq_laneq_s32(vreinterpretq_s32_m128i(a), (imm)))
+#else
+#define _mm_shuffle_epi32_splat(a, imm) \
+    vreinterpretq_m128i_s32(            \
+        vdupq_n_s32(vgetq_lane_s32(vreinterpretq_s32_m128i(a), (imm))))
+#endif
 
-// Load a single-precision (32-bit) floating-point element from memory into all
-// elements of dst.
+// NEON does not support a general purpose permute intrinsic.
+// Shuffle single-precision (32-bit) floating-point elements in a using the
+// control in imm8, and store the results in dst.
 //
-//   dst[31:0] := MEM[mem_addr+31:mem_addr]
-//   dst[63:32] := MEM[mem_addr+31:mem_addr]
-//   dst[95:64] := MEM[mem_addr+31:mem_addr]
-//   dst[127:96] := MEM[mem_addr+31:mem_addr]
+// C equivalent:
+//   __m128 _mm_shuffle_ps_default(__m128 a, __m128 b,
+//                                 __constrange(0, 255) int imm) {
+//       __m128 ret;
+//       ret[0] = a[imm        & 0x3];   ret[1] = a[(imm >> 2) & 0x3];
+//       ret[2] = b[(imm >> 4) & 0x03];  ret[3] = b[(imm >> 6) & 0x03];
+//       return ret;
+//   }
 //
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_ps1
-#define _mm_load_ps1 _mm_load1_ps
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_ps
+#define _mm_shuffle_ps_default(a, b, imm)                                      \
+    vreinterpretq_m128_f32(vsetq_lane_f32(                                     \
+        vgetq_lane_f32(vreinterpretq_f32_m128(b), ((imm) >> 6) & 0x3),         \
+        vsetq_lane_f32(                                                        \
+            vgetq_lane_f32(vreinterpretq_f32_m128(b), ((imm) >> 4) & 0x3),     \
+            vsetq_lane_f32(                                                    \
+                vgetq_lane_f32(vreinterpretq_f32_m128(a), ((imm) >> 2) & 0x3), \
+                vmovq_n_f32(                                                   \
+                    vgetq_lane_f32(vreinterpretq_f32_m128(a), (imm) & (0x3))), \
+                1),                                                            \
+            2),                                                                \
+        3))
+
+// Shuffle 16-bit integers in the low 64 bits of a using the control in imm8.
+// Store the results in the low 64 bits of dst, with the high 64 bits being
+// copied from a to dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shufflelo_epi16
+#define _mm_shufflelo_epi16_function(a, imm)                                  \
+    _sse2neon_define1(                                                        \
+        __m128i, a, int16x8_t ret = vreinterpretq_s16_m128i(_a);              \
+        int16x4_t lowBits = vget_low_s16(ret);                                \
+        ret = vsetq_lane_s16(vget_lane_s16(lowBits, (imm) & (0x3)), ret, 0);  \
+        ret = vsetq_lane_s16(vget_lane_s16(lowBits, ((imm) >> 2) & 0x3), ret, \
+                             1);                                              \
+        ret = vsetq_lane_s16(vget_lane_s16(lowBits, ((imm) >> 4) & 0x3), ret, \
+                             2);                                              \
+        ret = vsetq_lane_s16(vget_lane_s16(lowBits, ((imm) >> 6) & 0x3), ret, \
+                             3);                                              \
+        _sse2neon_return(vreinterpretq_m128i_s16(ret));)
 
-// Sets the lower two single-precision, floating-point values with 64
-// bits of data loaded from the address p; the upper two values are passed
-// through from a.
-//
-// Return Value
-//   r0 := *p0
-//   r1 := *p1
-//   r2 := a2
-//   r3 := a3
-//
-// https://msdn.microsoft.com/en-us/library/s57cyak2(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_loadl_pi(__m128 a, __m64 const *p)
+// Shuffle 16-bit integers in the high 64 bits of a using the control in imm8.
+// Store the results in the high 64 bits of dst, with the low 64 bits being
+// copied from a to dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shufflehi_epi16
+#define _mm_shufflehi_epi16_function(a, imm)                                   \
+    _sse2neon_define1(                                                         \
+        __m128i, a, int16x8_t ret = vreinterpretq_s16_m128i(_a);               \
+        int16x4_t highBits = vget_high_s16(ret);                               \
+        ret = vsetq_lane_s16(vget_lane_s16(highBits, (imm) & (0x3)), ret, 4);  \
+        ret = vsetq_lane_s16(vget_lane_s16(highBits, ((imm) >> 2) & 0x3), ret, \
+                             5);                                               \
+        ret = vsetq_lane_s16(vget_lane_s16(highBits, ((imm) >> 4) & 0x3), ret, \
+                             6);                                               \
+        ret = vsetq_lane_s16(vget_lane_s16(highBits, ((imm) >> 6) & 0x3), ret, \
+                             7);                                               \
+        _sse2neon_return(vreinterpretq_m128i_s16(ret));)
+
+/* MMX */
+
+//_mm_empty is a no-op on arm
+FORCE_INLINE void _mm_empty(void) {}
+
+/* SSE */
+
+// Add packed single-precision (32-bit) floating-point elements in a and b, and
+// store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_ps
+FORCE_INLINE __m128 _mm_add_ps(__m128 a, __m128 b)
 {
     return vreinterpretq_m128_f32(
-        vcombine_f32(vld1_f32((const float32_t *) p), vget_high_f32(a)));
+        vaddq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
 }
 
-// Load 4 single-precision (32-bit) floating-point elements from memory into dst
-// in reverse order. mem_addr must be aligned on a 16-byte boundary or a
-// general-protection exception may be generated.
-//
-//   dst[31:0] := MEM[mem_addr+127:mem_addr+96]
-//   dst[63:32] := MEM[mem_addr+95:mem_addr+64]
-//   dst[95:64] := MEM[mem_addr+63:mem_addr+32]
-//   dst[127:96] := MEM[mem_addr+31:mem_addr]
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadr_ps
-FORCE_INLINE __m128 _mm_loadr_ps(const float *p)
+// Add the lower single-precision (32-bit) floating-point element in a and b,
+// store the result in the lower element of dst, and copy the upper 3 packed
+// elements from a to the upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_ss
+FORCE_INLINE __m128 _mm_add_ss(__m128 a, __m128 b)
 {
-    float32x4_t v = vrev64q_f32(vld1q_f32(p));
-    return vreinterpretq_m128_f32(vextq_f32(v, v, 2));
+    float32_t b0 = vgetq_lane_f32(vreinterpretq_f32_m128(b), 0);
+    float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0);
+    // the upper values in the result must be the remnants of <a>.
+    return vreinterpretq_m128_f32(vaddq_f32(a, value));
 }
 
-// Sets the upper two single-precision, floating-point values with 64
-// bits of data loaded from the address p; the lower two values are passed
-// through from a.
-//
-//   r0 := a0
-//   r1 := a1
-//   r2 := *p0
-//   r3 := *p1
-//
-// https://msdn.microsoft.com/en-us/library/w92wta0x(v%3dvs.100).aspx
-FORCE_INLINE __m128 _mm_loadh_pi(__m128 a, __m64 const *p)
+// Compute the bitwise AND of packed single-precision (32-bit) floating-point
+// elements in a and b, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_and_ps
+FORCE_INLINE __m128 _mm_and_ps(__m128 a, __m128 b)
 {
-    return vreinterpretq_m128_f32(
-        vcombine_f32(vget_low_f32(a), vld1_f32((const float32_t *) p)));
+    return vreinterpretq_m128_s32(
+        vandq_s32(vreinterpretq_s32_m128(a), vreinterpretq_s32_m128(b)));
 }
 
-// Loads four single-precision, floating-point values.
-// https://msdn.microsoft.com/en-us/library/vstudio/zzd50xxt(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_load_ps(const float *p)
+// Compute the bitwise NOT of packed single-precision (32-bit) floating-point
+// elements in a and then AND with b, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_andnot_ps
+FORCE_INLINE __m128 _mm_andnot_ps(__m128 a, __m128 b)
 {
-    return vreinterpretq_m128_f32(vld1q_f32(p));
+    return vreinterpretq_m128_s32(
+        vbicq_s32(vreinterpretq_s32_m128(b),
+                  vreinterpretq_s32_m128(a)));  // *NOTE* argument swap
 }
 
-// Loads four single-precision, floating-point values.
-// https://msdn.microsoft.com/en-us/library/x1b16s7z%28v=vs.90%29.aspx
-FORCE_INLINE __m128 _mm_loadu_ps(const float *p)
+// Average packed unsigned 16-bit integers in a and b, and store the results in
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_avg_pu16
+FORCE_INLINE __m64 _mm_avg_pu16(__m64 a, __m64 b)
 {
-    // for neon, alignment doesn't matter, so _mm_load_ps and _mm_loadu_ps are
-    // equivalent for neon
-    return vreinterpretq_m128_f32(vld1q_f32(p));
+    return vreinterpret_m64_u16(
+        vrhadd_u16(vreinterpret_u16_m64(a), vreinterpret_u16_m64(b)));
 }
 
-// Load unaligned 16-bit integer from memory into the first element of dst.
-//
-//   dst[15:0] := MEM[mem_addr+15:mem_addr]
-//   dst[MAX:16] := 0
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_si16
-FORCE_INLINE __m128i _mm_loadu_si16(const void *p)
+// Average packed unsigned 8-bit integers in a and b, and store the results in
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_avg_pu8
+FORCE_INLINE __m64 _mm_avg_pu8(__m64 a, __m64 b)
 {
-    return vreinterpretq_m128i_s16(
-        vsetq_lane_s16(*(const int16_t *) p, vdupq_n_s16(0), 0));
+    return vreinterpret_m64_u8(
+        vrhadd_u8(vreinterpret_u8_m64(a), vreinterpret_u8_m64(b)));
 }
 
-// Load unaligned 64-bit integer from memory into the first element of dst.
-//
-//   dst[63:0] := MEM[mem_addr+63:mem_addr]
-//   dst[MAX:64] := 0
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_si64
-FORCE_INLINE __m128i _mm_loadu_si64(const void *p)
+// Compare packed single-precision (32-bit) floating-point elements in a and b
+// for equality, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_ps
+FORCE_INLINE __m128 _mm_cmpeq_ps(__m128 a, __m128 b)
 {
-    return vreinterpretq_m128i_s64(
-        vcombine_s64(vld1_s64((const int64_t *) p), vdup_n_s64(0)));
+    return vreinterpretq_m128_u32(
+        vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
 }
 
-// Load a double-precision (64-bit) floating-point element from memory into the
-// lower of dst, and zero the upper element. mem_addr does not need to be
-// aligned on any particular boundary.
-//
-//   dst[63:0] := MEM[mem_addr+63:mem_addr]
-//   dst[127:64] := 0
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_sd
-FORCE_INLINE __m128d _mm_load_sd(const double *p)
+// Compare the lower single-precision (32-bit) floating-point elements in a and
+// b for equality, store the result in the lower element of dst, and copy the
+// upper 3 packed elements from a to the upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_ss
+FORCE_INLINE __m128 _mm_cmpeq_ss(__m128 a, __m128 b)
 {
-#if defined(__aarch64__)
-    return vreinterpretq_m128d_f64(vsetq_lane_f64(*p, vdupq_n_f64(0), 0));
-#else
-    const float *fp = (const float *) p;
-    float ALIGN_STRUCT(16) data[4] = {fp[0], fp[1], 0, 0};
-    return vreinterpretq_m128d_f32(vld1q_f32(data));
-#endif
+    return _mm_move_ss(a, _mm_cmpeq_ps(a, b));
 }
 
-// Loads two double-precision from 16-byte aligned memory, floating-point
-// values.
-//
-//   dst[127:0] := MEM[mem_addr+127:mem_addr]
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_pd
-FORCE_INLINE __m128d _mm_load_pd(const double *p)
+// Compare packed single-precision (32-bit) floating-point elements in a and b
+// for greater-than-or-equal, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_ps
+FORCE_INLINE __m128 _mm_cmpge_ps(__m128 a, __m128 b)
 {
-#if defined(__aarch64__)
-    return vreinterpretq_m128d_f64(vld1q_f64(p));
-#else
-    const float *fp = (const float *) p;
-    float ALIGN_STRUCT(16) data[4] = {fp[0], fp[1], fp[2], fp[3]};
-    return vreinterpretq_m128d_f32(vld1q_f32(data));
-#endif
+    return vreinterpretq_m128_u32(
+        vcgeq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
 }
 
-// Loads two double-precision from unaligned memory, floating-point values.
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_pd
-FORCE_INLINE __m128d _mm_loadu_pd(const double *p)
+// Compare the lower single-precision (32-bit) floating-point elements in a and
+// b for greater-than-or-equal, store the result in the lower element of dst,
+// and copy the upper 3 packed elements from a to the upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_ss
+FORCE_INLINE __m128 _mm_cmpge_ss(__m128 a, __m128 b)
 {
-    return _mm_load_pd(p);
+    return _mm_move_ss(a, _mm_cmpge_ps(a, b));
 }
 
-// Loads an single - precision, floating - point value into the low word and
-// clears the upper three words.
-// https://msdn.microsoft.com/en-us/library/548bb9h4%28v=vs.90%29.aspx
-FORCE_INLINE __m128 _mm_load_ss(const float *p)
+// Compare packed single-precision (32-bit) floating-point elements in a and b
+// for greater-than, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_ps
+FORCE_INLINE __m128 _mm_cmpgt_ps(__m128 a, __m128 b)
 {
-    return vreinterpretq_m128_f32(vsetq_lane_f32(*p, vdupq_n_f32(0), 0));
+    return vreinterpretq_m128_u32(
+        vcgtq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
 }
 
-FORCE_INLINE __m128i _mm_loadl_epi64(__m128i const *p)
+// Compare the lower single-precision (32-bit) floating-point elements in a and
+// b for greater-than, store the result in the lower element of dst, and copy
+// the upper 3 packed elements from a to the upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_ss
+FORCE_INLINE __m128 _mm_cmpgt_ss(__m128 a, __m128 b)
 {
-    /* Load the lower 64 bits of the value pointed to by p into the
-     * lower 64 bits of the result, zeroing the upper 64 bits of the result.
-     */
-    return vreinterpretq_m128i_s32(
-        vcombine_s32(vld1_s32((int32_t const *) p), vcreate_s32(0)));
+    return _mm_move_ss(a, _mm_cmpgt_ps(a, b));
 }
 
-// Load a double-precision (64-bit) floating-point element from memory into the
-// lower element of dst, and copy the upper element from a to dst. mem_addr does
-// not need to be aligned on any particular boundary.
-//
-//   dst[63:0] := MEM[mem_addr+63:mem_addr]
-//   dst[127:64] := a[127:64]
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadl_pd
-FORCE_INLINE __m128d _mm_loadl_pd(__m128d a, const double *p)
+// Compare packed single-precision (32-bit) floating-point elements in a and b
+// for less-than-or-equal, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_ps
+FORCE_INLINE __m128 _mm_cmple_ps(__m128 a, __m128 b)
 {
-#if defined(__aarch64__)
-    return vreinterpretq_m128d_f64(
-        vcombine_f64(vld1_f64(p), vget_high_f64(vreinterpretq_f64_m128d(a))));
-#else
-    return vreinterpretq_m128d_f32(
-        vcombine_f32(vld1_f32((const float *) p),
-                     vget_high_f32(vreinterpretq_f32_m128d(a))));
-#endif
+    return vreinterpretq_m128_u32(
+        vcleq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
 }
 
-// Load 2 double-precision (64-bit) floating-point elements from memory into dst
-// in reverse order. mem_addr must be aligned on a 16-byte boundary or a
-// general-protection exception may be generated.
-//
-//   dst[63:0] := MEM[mem_addr+127:mem_addr+64]
-//   dst[127:64] := MEM[mem_addr+63:mem_addr]
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadr_pd
-FORCE_INLINE __m128d _mm_loadr_pd(const double *p)
+// Compare the lower single-precision (32-bit) floating-point elements in a and
+// b for less-than-or-equal, store the result in the lower element of dst, and
+// copy the upper 3 packed elements from a to the upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_ss
+FORCE_INLINE __m128 _mm_cmple_ss(__m128 a, __m128 b)
 {
-#if defined(__aarch64__)
-    float64x2_t v = vld1q_f64(p);
-    return vreinterpretq_m128d_f64(vextq_f64(v, v, 1));
-#else
-    int64x2_t v = vld1q_s64((const int64_t *) p);
-    return vreinterpretq_m128d_s64(vextq_s64(v, v, 1));
-#endif
+    return _mm_move_ss(a, _mm_cmple_ps(a, b));
 }
 
-// Sets the low word to the single-precision, floating-point value of b
-// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/35hdzazd(v=vs.100)
-FORCE_INLINE __m128 _mm_move_ss(__m128 a, __m128 b)
+// Compare packed single-precision (32-bit) floating-point elements in a and b
+// for less-than, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_ps
+FORCE_INLINE __m128 _mm_cmplt_ps(__m128 a, __m128 b)
 {
-    return vreinterpretq_m128_f32(
-        vsetq_lane_f32(vgetq_lane_f32(vreinterpretq_f32_m128(b), 0),
-                       vreinterpretq_f32_m128(a), 0));
+    return vreinterpretq_m128_u32(
+        vcltq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
 }
 
-// Copy the lower 64-bit integer in a to the lower element of dst, and zero the
-// upper element.
-//
-//   dst[63:0] := a[63:0]
-//   dst[127:64] := 0
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_epi64
-FORCE_INLINE __m128i _mm_move_epi64(__m128i a)
+// Compare the lower single-precision (32-bit) floating-point elements in a and
+// b for less-than, store the result in the lower element of dst, and copy the
+// upper 3 packed elements from a to the upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_ss
+FORCE_INLINE __m128 _mm_cmplt_ss(__m128 a, __m128 b)
 {
-    return vreinterpretq_m128i_s64(
-        vsetq_lane_s64(0, vreinterpretq_s64_m128i(a), 1));
+    return _mm_move_ss(a, _mm_cmplt_ps(a, b));
 }
 
-// Return vector of type __m128 with undefined elements.
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_ps
-//FORCE_INLINE __m128 _mm_undefined_ps(void)
-//{
-//    __m128 a;
-//    return a;
-//}
-
-/* Logic/Binary operations */
+// Compare packed single-precision (32-bit) floating-point elements in a and b
+// for not-equal, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_ps
+FORCE_INLINE __m128 _mm_cmpneq_ps(__m128 a, __m128 b)
+{
+    return vreinterpretq_m128_u32(vmvnq_u32(
+        vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))));
+}
 
-// Computes the bitwise AND-NOT of the four single-precision, floating-point
-// values of a and b.
-//
-//   r0 := ~a0 & b0
-//   r1 := ~a1 & b1
-//   r2 := ~a2 & b2
-//   r3 := ~a3 & b3
-//
-// https://msdn.microsoft.com/en-us/library/vstudio/68h7wd02(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_andnot_ps(__m128 a, __m128 b)
+// Compare the lower single-precision (32-bit) floating-point elements in a and
+// b for not-equal, store the result in the lower element of dst, and copy the
+// upper 3 packed elements from a to the upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_ss
+FORCE_INLINE __m128 _mm_cmpneq_ss(__m128 a, __m128 b)
 {
-    return vreinterpretq_m128_s32(
-        vbicq_s32(vreinterpretq_s32_m128(b),
-                  vreinterpretq_s32_m128(a)));  // *NOTE* argument swap
+    return _mm_move_ss(a, _mm_cmpneq_ps(a, b));
 }
 
-// Compute the bitwise NOT of packed double-precision (64-bit) floating-point
-// elements in a and then AND with b, and store the results in dst.
-//
-//   FOR j := 0 to 1
-// 	     i := j*64
-// 	     dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i])
-//   ENDFOR
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_andnot_pd
-FORCE_INLINE __m128d _mm_andnot_pd(__m128d a, __m128d b)
+// Compare packed single-precision (32-bit) floating-point elements in a and b
+// for not-greater-than-or-equal, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnge_ps
+FORCE_INLINE __m128 _mm_cmpnge_ps(__m128 a, __m128 b)
 {
-    // *NOTE* argument swap
-    return vreinterpretq_m128d_s64(
-        vbicq_s64(vreinterpretq_s64_m128d(b), vreinterpretq_s64_m128d(a)));
+    return vreinterpretq_m128_u32(vmvnq_u32(
+        vcgeq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))));
 }
 
-// Computes the bitwise AND of the 128-bit value in b and the bitwise NOT of the
-// 128-bit value in a.
-//
-//   r := (~a) & b
-//
-// https://msdn.microsoft.com/en-us/library/vstudio/1beaceh8(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_andnot_si128(__m128i a, __m128i b)
+// Compare the lower single-precision (32-bit) floating-point elements in a and
+// b for not-greater-than-or-equal, store the result in the lower element of
+// dst, and copy the upper 3 packed elements from a to the upper elements of
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnge_ss
+FORCE_INLINE __m128 _mm_cmpnge_ss(__m128 a, __m128 b)
 {
-    return vreinterpretq_m128i_s32(
-        vbicq_s32(vreinterpretq_s32_m128i(b),
-                  vreinterpretq_s32_m128i(a)));  // *NOTE* argument swap
+    return _mm_move_ss(a, _mm_cmpnge_ps(a, b));
 }
 
-// Computes the bitwise AND of the 128-bit value in a and the 128-bit value in
-// b.
-//
-//   r := a & b
-//
-// https://msdn.microsoft.com/en-us/library/vstudio/6d1txsa8(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_and_si128(__m128i a, __m128i b)
+// Compare packed single-precision (32-bit) floating-point elements in a and b
+// for not-greater-than, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpngt_ps
+FORCE_INLINE __m128 _mm_cmpngt_ps(__m128 a, __m128 b)
 {
-    return vreinterpretq_m128i_s32(
-        vandq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
+    return vreinterpretq_m128_u32(vmvnq_u32(
+        vcgtq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))));
 }
 
-// Computes the bitwise AND of the four single-precision, floating-point values
-// of a and b.
-//
-//   r0 := a0 & b0
-//   r1 := a1 & b1
-//   r2 := a2 & b2
-//   r3 := a3 & b3
-//
-// https://msdn.microsoft.com/en-us/library/vstudio/73ck1xc5(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_and_ps(__m128 a, __m128 b)
+// Compare the lower single-precision (32-bit) floating-point elements in a and
+// b for not-greater-than, store the result in the lower element of dst, and
+// copy the upper 3 packed elements from a to the upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpngt_ss
+FORCE_INLINE __m128 _mm_cmpngt_ss(__m128 a, __m128 b)
 {
-    return vreinterpretq_m128_s32(
-        vandq_s32(vreinterpretq_s32_m128(a), vreinterpretq_s32_m128(b)));
+    return _mm_move_ss(a, _mm_cmpngt_ps(a, b));
 }
 
-// Compute the bitwise AND of packed double-precision (64-bit) floating-point
-// elements in a and b, and store the results in dst.
-//
-//   FOR j := 0 to 1
-//     i := j*64
-//     dst[i+63:i] := a[i+63:i] AND b[i+63:i]
-//   ENDFOR
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_and_pd
-FORCE_INLINE __m128d _mm_and_pd(__m128d a, __m128d b)
+// Compare packed single-precision (32-bit) floating-point elements in a and b
+// for not-less-than-or-equal, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnle_ps
+FORCE_INLINE __m128 _mm_cmpnle_ps(__m128 a, __m128 b)
 {
-    return vreinterpretq_m128d_s64(
-        vandq_s64(vreinterpretq_s64_m128d(a), vreinterpretq_s64_m128d(b)));
+    return vreinterpretq_m128_u32(vmvnq_u32(
+        vcleq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))));
 }
 
-// Computes the bitwise OR of the four single-precision, floating-point values
-// of a and b.
-// https://msdn.microsoft.com/en-us/library/vstudio/7ctdsyy0(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_or_ps(__m128 a, __m128 b)
+// Compare the lower single-precision (32-bit) floating-point elements in a and
+// b for not-less-than-or-equal, store the result in the lower element of dst,
+// and copy the upper 3 packed elements from a to the upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnle_ss
+FORCE_INLINE __m128 _mm_cmpnle_ss(__m128 a, __m128 b)
 {
-    return vreinterpretq_m128_s32(
-        vorrq_s32(vreinterpretq_s32_m128(a), vreinterpretq_s32_m128(b)));
+    return _mm_move_ss(a, _mm_cmpnle_ps(a, b));
 }
 
-// Computes bitwise EXOR (exclusive-or) of the four single-precision,
-// floating-point values of a and b.
-// https://msdn.microsoft.com/en-us/library/ss6k3wk8(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_xor_ps(__m128 a, __m128 b)
+// Compare packed single-precision (32-bit) floating-point elements in a and b
+// for not-less-than, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnlt_ps
+FORCE_INLINE __m128 _mm_cmpnlt_ps(__m128 a, __m128 b)
 {
-    return vreinterpretq_m128_s32(
-        veorq_s32(vreinterpretq_s32_m128(a), vreinterpretq_s32_m128(b)));
+    return vreinterpretq_m128_u32(vmvnq_u32(
+        vcltq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))));
 }
 
-// Compute the bitwise XOR of packed double-precision (64-bit) floating-point
-// elements in a and b, and store the results in dst.
-//
-//   FOR j := 0 to 1
-//      i := j*64
-//      dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
-//   ENDFOR
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_xor_pd
-FORCE_INLINE __m128d _mm_xor_pd(__m128d a, __m128d b)
+// Compare the lower single-precision (32-bit) floating-point elements in a and
+// b for not-less-than, store the result in the lower element of dst, and copy
+// the upper 3 packed elements from a to the upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnlt_ss
+FORCE_INLINE __m128 _mm_cmpnlt_ss(__m128 a, __m128 b)
 {
-    return vreinterpretq_m128d_s64(
-        veorq_s64(vreinterpretq_s64_m128d(a), vreinterpretq_s64_m128d(b)));
+    return _mm_move_ss(a, _mm_cmpnlt_ps(a, b));
 }
 
-// Computes the bitwise OR of the 128-bit value in a and the 128-bit value in b.
-//
-//   r := a | b
+// Compare packed single-precision (32-bit) floating-point elements in a and b
+// to see if neither is NaN, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpord_ps
 //
-// https://msdn.microsoft.com/en-us/library/vstudio/ew8ty0db(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_or_si128(__m128i a, __m128i b)
+// See also:
+// http://stackoverflow.com/questions/8627331/what-does-ordered-unordered-comparison-mean
+// http://stackoverflow.com/questions/29349621/neon-isnanval-intrinsics
+FORCE_INLINE __m128 _mm_cmpord_ps(__m128 a, __m128 b)
 {
-    return vreinterpretq_m128i_s32(
-        vorrq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
+    // Note: NEON does not have ordered compare builtin
+    // Need to compare a eq a and b eq b to check for NaN
+    // Do AND of results to get final
+    uint32x4_t ceqaa =
+        vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a));
+    uint32x4_t ceqbb =
+        vceqq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b));
+    return vreinterpretq_m128_u32(vandq_u32(ceqaa, ceqbb));
 }
 
-// Computes the bitwise XOR of the 128-bit value in a and the 128-bit value in
-// b.  https://msdn.microsoft.com/en-us/library/fzt08www(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_xor_si128(__m128i a, __m128i b)
+// Compare the lower single-precision (32-bit) floating-point elements in a and
+// b to see if neither is NaN, store the result in the lower element of dst, and
+// copy the upper 3 packed elements from a to the upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpord_ss
+FORCE_INLINE __m128 _mm_cmpord_ss(__m128 a, __m128 b)
 {
-    return vreinterpretq_m128i_s32(
-        veorq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
+    return _mm_move_ss(a, _mm_cmpord_ps(a, b));
 }
 
-// Duplicate odd-indexed single-precision (32-bit) floating-point elements
-// from a, and store the results in dst.
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movehdup_ps
-FORCE_INLINE __m128 _mm_movehdup_ps(__m128 a)
+// Compare packed single-precision (32-bit) floating-point elements in a and b
+// to see if either is NaN, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpunord_ps
+FORCE_INLINE __m128 _mm_cmpunord_ps(__m128 a, __m128 b)
 {
-#if __has_builtin(__builtin_shufflevector)
-    return vreinterpretq_m128_f32(__builtin_shufflevector(
-        vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a), 1, 1, 3, 3));
-#else
-    float32_t a1 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 1);
-    float32_t a3 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 3);
-    float ALIGN_STRUCT(16) data[4] = {a1, a1, a3, a3};
-    return vreinterpretq_m128_f32(vld1q_f32(data));
-#endif
+    uint32x4_t f32a =
+        vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a));
+    uint32x4_t f32b =
+        vceqq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b));
+    return vreinterpretq_m128_u32(vmvnq_u32(vandq_u32(f32a, f32b)));
 }
 
-// Duplicate even-indexed single-precision (32-bit) floating-point elements
-// from a, and store the results in dst.
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_moveldup_ps
-FORCE_INLINE __m128 _mm_moveldup_ps(__m128 a)
+// Compare the lower single-precision (32-bit) floating-point elements in a and
+// b to see if either is NaN, store the result in the lower element of dst, and
+// copy the upper 3 packed elements from a to the upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpunord_ss
+FORCE_INLINE __m128 _mm_cmpunord_ss(__m128 a, __m128 b)
 {
-#if __has_builtin(__builtin_shufflevector)
-    return vreinterpretq_m128_f32(__builtin_shufflevector(
-        vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a), 0, 0, 2, 2));
-#else
-    float32_t a0 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 0);
-    float32_t a2 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 2);
-    float ALIGN_STRUCT(16) data[4] = {a0, a0, a2, a2};
-    return vreinterpretq_m128_f32(vld1q_f32(data));
-#endif
+    return _mm_move_ss(a, _mm_cmpunord_ps(a, b));
 }
 
-// Moves the upper two values of B into the lower two values of A.
-//
-//   r3 := a3
-//   r2 := a2
-//   r1 := b3
-//   r0 := b2
-FORCE_INLINE __m128 _mm_movehl_ps(__m128 __A, __m128 __B)
-{
-    float32x2_t a32 = vget_high_f32(vreinterpretq_f32_m128(__A));
-    float32x2_t b32 = vget_high_f32(vreinterpretq_f32_m128(__B));
-    return vreinterpretq_m128_f32(vcombine_f32(b32, a32));
+// Compare the lower single-precision (32-bit) floating-point element in a and b
+// for equality, and return the boolean result (0 or 1).
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comieq_ss
+FORCE_INLINE int _mm_comieq_ss(__m128 a, __m128 b)
+{
+    uint32x4_t a_eq_b =
+        vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b));
+    return vgetq_lane_u32(a_eq_b, 0) & 0x1;
 }
 
-// Moves the lower two values of B into the upper two values of A.
-//
-//   r3 := b1
-//   r2 := b0
-//   r1 := a1
-//   r0 := a0
-FORCE_INLINE __m128 _mm_movelh_ps(__m128 __A, __m128 __B)
+// Compare the lower single-precision (32-bit) floating-point element in a and b
+// for greater-than-or-equal, and return the boolean result (0 or 1).
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comige_ss
+FORCE_INLINE int _mm_comige_ss(__m128 a, __m128 b)
 {
-    float32x2_t a10 = vget_low_f32(vreinterpretq_f32_m128(__A));
-    float32x2_t b10 = vget_low_f32(vreinterpretq_f32_m128(__B));
-    return vreinterpretq_m128_f32(vcombine_f32(a10, b10));
+    uint32x4_t a_ge_b =
+        vcgeq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b));
+    return vgetq_lane_u32(a_ge_b, 0) & 0x1;
 }
 
-// Compute the absolute value of packed signed 32-bit integers in a, and store
-// the unsigned results in dst.
-//
-//   FOR j := 0 to 3
-//     i := j*32
-//     dst[i+31:i] := ABS(a[i+31:i])
-//   ENDFOR
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi32
-FORCE_INLINE __m128i _mm_abs_epi32(__m128i a)
+// Compare the lower single-precision (32-bit) floating-point element in a and b
+// for greater-than, and return the boolean result (0 or 1).
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comigt_ss
+FORCE_INLINE int _mm_comigt_ss(__m128 a, __m128 b)
 {
-    return vreinterpretq_m128i_s32(vabsq_s32(vreinterpretq_s32_m128i(a)));
+    uint32x4_t a_gt_b =
+        vcgtq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b));
+    return vgetq_lane_u32(a_gt_b, 0) & 0x1;
 }
 
-// Compute the absolute value of packed signed 16-bit integers in a, and store
-// the unsigned results in dst.
-//
-//   FOR j := 0 to 7
-//     i := j*16
-//     dst[i+15:i] := ABS(a[i+15:i])
-//   ENDFOR
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi16
-FORCE_INLINE __m128i _mm_abs_epi16(__m128i a)
+// Compare the lower single-precision (32-bit) floating-point element in a and b
+// for less-than-or-equal, and return the boolean result (0 or 1).
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comile_ss
+FORCE_INLINE int _mm_comile_ss(__m128 a, __m128 b)
 {
-    return vreinterpretq_m128i_s16(vabsq_s16(vreinterpretq_s16_m128i(a)));
+    uint32x4_t a_le_b =
+        vcleq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b));
+    return vgetq_lane_u32(a_le_b, 0) & 0x1;
 }
 
-// Compute the absolute value of packed signed 8-bit integers in a, and store
-// the unsigned results in dst.
-//
-//   FOR j := 0 to 15
-//     i := j*8
-//     dst[i+7:i] := ABS(a[i+7:i])
-//   ENDFOR
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi8
-FORCE_INLINE __m128i _mm_abs_epi8(__m128i a)
+// Compare the lower single-precision (32-bit) floating-point element in a and b
+// for less-than, and return the boolean result (0 or 1).
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comilt_ss
+FORCE_INLINE int _mm_comilt_ss(__m128 a, __m128 b)
 {
-    return vreinterpretq_m128i_s8(vabsq_s8(vreinterpretq_s8_m128i(a)));
+    uint32x4_t a_lt_b =
+        vcltq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b));
+    return vgetq_lane_u32(a_lt_b, 0) & 0x1;
 }
 
-// Compute the absolute value of packed signed 32-bit integers in a, and store
-// the unsigned results in dst.
-//
-//   FOR j := 0 to 1
-//     i := j*32
-//     dst[i+31:i] := ABS(a[i+31:i])
-//   ENDFOR
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_pi32
-FORCE_INLINE __m64 _mm_abs_pi32(__m64 a)
+// Compare the lower single-precision (32-bit) floating-point element in a and b
+// for not-equal, and return the boolean result (0 or 1).
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comineq_ss
+FORCE_INLINE int _mm_comineq_ss(__m128 a, __m128 b)
 {
-    return vreinterpret_m64_s32(vabs_s32(vreinterpret_s32_m64(a)));
+    return !_mm_comieq_ss(a, b);
 }
 
-// Compute the absolute value of packed signed 16-bit integers in a, and store
-// the unsigned results in dst.
-//
-//   FOR j := 0 to 3
-//     i := j*16
-//     dst[i+15:i] := ABS(a[i+15:i])
-//   ENDFOR
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_pi16
-FORCE_INLINE __m64 _mm_abs_pi16(__m64 a)
+// Convert packed signed 32-bit integers in b to packed single-precision
+// (32-bit) floating-point elements, store the results in the lower 2 elements
+// of dst, and copy the upper 2 packed elements from a to the upper elements of
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_pi2ps
+FORCE_INLINE __m128 _mm_cvt_pi2ps(__m128 a, __m64 b)
 {
-    return vreinterpret_m64_s16(vabs_s16(vreinterpret_s16_m64(a)));
+    return vreinterpretq_m128_f32(
+        vcombine_f32(vcvt_f32_s32(vreinterpret_s32_m64(b)),
+                     vget_high_f32(vreinterpretq_f32_m128(a))));
 }
 
-// Compute the absolute value of packed signed 8-bit integers in a, and store
-// the unsigned results in dst.
-//
-//   FOR j := 0 to 7
-//     i := j*8
-//     dst[i+7:i] := ABS(a[i+7:i])
-//   ENDFOR
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_pi8
-FORCE_INLINE __m64 _mm_abs_pi8(__m64 a)
+// Convert packed single-precision (32-bit) floating-point elements in a to
+// packed 32-bit integers, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_ps2pi
+FORCE_INLINE __m64 _mm_cvt_ps2pi(__m128 a)
 {
-    return vreinterpret_m64_s8(vabs_s8(vreinterpret_s8_m64(a)));
+#if (defined(__aarch64__) || defined(_M_ARM64)) || \
+    defined(__ARM_FEATURE_DIRECTED_ROUNDING)
+    return vreinterpret_m64_s32(
+        vget_low_s32(vcvtnq_s32_f32(vrndiq_f32(vreinterpretq_f32_m128(a)))));
+#else
+    return vreinterpret_m64_s32(vcvt_s32_f32(vget_low_f32(
+        vreinterpretq_f32_m128(_mm_round_ps(a, _MM_FROUND_CUR_DIRECTION)))));
+#endif
 }
 
-// Takes the upper 64 bits of a and places it in the low end of the result
-// Takes the lower 64 bits of b and places it into the high end of the result.
-FORCE_INLINE __m128 _mm_shuffle_ps_1032(__m128 a, __m128 b)
+// Convert the signed 32-bit integer b to a single-precision (32-bit)
+// floating-point element, store the result in the lower element of dst, and
+// copy the upper 3 packed elements from a to the upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_si2ss
+FORCE_INLINE __m128 _mm_cvt_si2ss(__m128 a, int b)
 {
-    float32x2_t a32 = vget_high_f32(vreinterpretq_f32_m128(a));
-    float32x2_t b10 = vget_low_f32(vreinterpretq_f32_m128(b));
-    return vreinterpretq_m128_f32(vcombine_f32(a32, b10));
+    return vreinterpretq_m128_f32(
+        vsetq_lane_f32((float) b, vreinterpretq_f32_m128(a), 0));
 }
 
-// takes the lower two 32-bit values from a and swaps them and places in high
-// end of result takes the higher two 32 bit values from b and swaps them and
-// places in low end of result.
-FORCE_INLINE __m128 _mm_shuffle_ps_2301(__m128 a, __m128 b)
+// Convert the lower single-precision (32-bit) floating-point element in a to a
+// 32-bit integer, and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_ss2si
+FORCE_INLINE int _mm_cvt_ss2si(__m128 a)
 {
-    float32x2_t a01 = vrev64_f32(vget_low_f32(vreinterpretq_f32_m128(a)));
-    float32x2_t b23 = vrev64_f32(vget_high_f32(vreinterpretq_f32_m128(b)));
-    return vreinterpretq_m128_f32(vcombine_f32(a01, b23));
+#if (defined(__aarch64__) || defined(_M_ARM64)) || \
+    defined(__ARM_FEATURE_DIRECTED_ROUNDING)
+    return vgetq_lane_s32(vcvtnq_s32_f32(vrndiq_f32(vreinterpretq_f32_m128(a))),
+                          0);
+#else
+    float32_t data = vgetq_lane_f32(
+        vreinterpretq_f32_m128(_mm_round_ps(a, _MM_FROUND_CUR_DIRECTION)), 0);
+    return (int32_t) data;
+#endif
 }
 
-FORCE_INLINE __m128 _mm_shuffle_ps_0321(__m128 a, __m128 b)
+// Convert packed 16-bit integers in a to packed single-precision (32-bit)
+// floating-point elements, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpi16_ps
+FORCE_INLINE __m128 _mm_cvtpi16_ps(__m64 a)
 {
-    float32x2_t a21 = vget_high_f32(
-        vextq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a), 3));
-    float32x2_t b03 = vget_low_f32(
-        vextq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b), 3));
-    return vreinterpretq_m128_f32(vcombine_f32(a21, b03));
+    return vreinterpretq_m128_f32(
+        vcvtq_f32_s32(vmovl_s16(vreinterpret_s16_m64(a))));
 }
 
-FORCE_INLINE __m128 _mm_shuffle_ps_2103(__m128 a, __m128 b)
+// Convert packed 32-bit integers in b to packed single-precision (32-bit)
+// floating-point elements, store the results in the lower 2 elements of dst,
+// and copy the upper 2 packed elements from a to the upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpi32_ps
+FORCE_INLINE __m128 _mm_cvtpi32_ps(__m128 a, __m64 b)
 {
-    float32x2_t a03 = vget_low_f32(
-        vextq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a), 3));
-    float32x2_t b21 = vget_high_f32(
-        vextq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b), 3));
-    return vreinterpretq_m128_f32(vcombine_f32(a03, b21));
+    return vreinterpretq_m128_f32(
+        vcombine_f32(vcvt_f32_s32(vreinterpret_s32_m64(b)),
+                     vget_high_f32(vreinterpretq_f32_m128(a))));
 }
 
-FORCE_INLINE __m128 _mm_shuffle_ps_1010(__m128 a, __m128 b)
+// Convert packed signed 32-bit integers in a to packed single-precision
+// (32-bit) floating-point elements, store the results in the lower 2 elements
+// of dst, then convert the packed signed 32-bit integers in b to
+// single-precision (32-bit) floating-point element, and store the results in
+// the upper 2 elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpi32x2_ps
+FORCE_INLINE __m128 _mm_cvtpi32x2_ps(__m64 a, __m64 b)
 {
-    float32x2_t a10 = vget_low_f32(vreinterpretq_f32_m128(a));
-    float32x2_t b10 = vget_low_f32(vreinterpretq_f32_m128(b));
-    return vreinterpretq_m128_f32(vcombine_f32(a10, b10));
+    return vreinterpretq_m128_f32(vcvtq_f32_s32(
+        vcombine_s32(vreinterpret_s32_m64(a), vreinterpret_s32_m64(b))));
 }
 
-FORCE_INLINE __m128 _mm_shuffle_ps_1001(__m128 a, __m128 b)
+// Convert the lower packed 8-bit integers in a to packed single-precision
+// (32-bit) floating-point elements, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpi8_ps
+FORCE_INLINE __m128 _mm_cvtpi8_ps(__m64 a)
 {
-    float32x2_t a01 = vrev64_f32(vget_low_f32(vreinterpretq_f32_m128(a)));
-    float32x2_t b10 = vget_low_f32(vreinterpretq_f32_m128(b));
-    return vreinterpretq_m128_f32(vcombine_f32(a01, b10));
+    return vreinterpretq_m128_f32(vcvtq_f32_s32(
+        vmovl_s16(vget_low_s16(vmovl_s8(vreinterpret_s8_m64(a))))));
 }
 
-FORCE_INLINE __m128 _mm_shuffle_ps_0101(__m128 a, __m128 b)
+// Convert packed single-precision (32-bit) floating-point elements in a to
+// packed 16-bit integers, and store the results in dst. Note: this intrinsic
+// will generate 0x7FFF, rather than 0x8000, for input values between 0x7FFF and
+// 0x7FFFFFFF.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_pi16
+FORCE_INLINE __m64 _mm_cvtps_pi16(__m128 a)
 {
-    float32x2_t a01 = vrev64_f32(vget_low_f32(vreinterpretq_f32_m128(a)));
-    float32x2_t b01 = vrev64_f32(vget_low_f32(vreinterpretq_f32_m128(b)));
-    return vreinterpretq_m128_f32(vcombine_f32(a01, b01));
+    return vreinterpret_m64_s16(
+        vqmovn_s32(vreinterpretq_s32_m128i(_mm_cvtps_epi32(a))));
 }
 
-// keeps the low 64 bits of b in the low and puts the high 64 bits of a in the
-// high
-FORCE_INLINE __m128 _mm_shuffle_ps_3210(__m128 a, __m128 b)
+// Convert packed single-precision (32-bit) floating-point elements in a to
+// packed 32-bit integers, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_pi32
+#define _mm_cvtps_pi32(a) _mm_cvt_ps2pi(a)
+
+// Convert packed single-precision (32-bit) floating-point elements in a to
+// packed 8-bit integers, and store the results in lower 4 elements of dst.
+// Note: this intrinsic will generate 0x7F, rather than 0x80, for input values
+// between 0x7F and 0x7FFFFFFF.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_pi8
+FORCE_INLINE __m64 _mm_cvtps_pi8(__m128 a)
 {
-    float32x2_t a10 = vget_low_f32(vreinterpretq_f32_m128(a));
-    float32x2_t b32 = vget_high_f32(vreinterpretq_f32_m128(b));
-    return vreinterpretq_m128_f32(vcombine_f32(a10, b32));
+    return vreinterpret_m64_s8(vqmovn_s16(
+        vcombine_s16(vreinterpret_s16_m64(_mm_cvtps_pi16(a)), vdup_n_s16(0))));
 }
 
-FORCE_INLINE __m128 _mm_shuffle_ps_0011(__m128 a, __m128 b)
+// Convert packed unsigned 16-bit integers in a to packed single-precision
+// (32-bit) floating-point elements, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpu16_ps
+FORCE_INLINE __m128 _mm_cvtpu16_ps(__m64 a)
 {
-    float32x2_t a11 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(a)), 1);
-    float32x2_t b00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 0);
-    return vreinterpretq_m128_f32(vcombine_f32(a11, b00));
+    return vreinterpretq_m128_f32(
+        vcvtq_f32_u32(vmovl_u16(vreinterpret_u16_m64(a))));
 }
 
-FORCE_INLINE __m128 _mm_shuffle_ps_0022(__m128 a, __m128 b)
+// Convert the lower packed unsigned 8-bit integers in a to packed
+// single-precision (32-bit) floating-point elements, and store the results in
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpu8_ps
+FORCE_INLINE __m128 _mm_cvtpu8_ps(__m64 a)
 {
-    float32x2_t a22 =
-        vdup_lane_f32(vget_high_f32(vreinterpretq_f32_m128(a)), 0);
-    float32x2_t b00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 0);
-    return vreinterpretq_m128_f32(vcombine_f32(a22, b00));
+    return vreinterpretq_m128_f32(vcvtq_f32_u32(
+        vmovl_u16(vget_low_u16(vmovl_u8(vreinterpret_u8_m64(a))))));
 }
 
-FORCE_INLINE __m128 _mm_shuffle_ps_2200(__m128 a, __m128 b)
+// Convert the signed 32-bit integer b to a single-precision (32-bit)
+// floating-point element, store the result in the lower element of dst, and
+// copy the upper 3 packed elements from a to the upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi32_ss
+#define _mm_cvtsi32_ss(a, b) _mm_cvt_si2ss(a, b)
+
+// Convert the signed 64-bit integer b to a single-precision (32-bit)
+// floating-point element, store the result in the lower element of dst, and
+// copy the upper 3 packed elements from a to the upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi64_ss
+FORCE_INLINE __m128 _mm_cvtsi64_ss(__m128 a, int64_t b)
 {
-    float32x2_t a00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(a)), 0);
-    float32x2_t b22 =
-        vdup_lane_f32(vget_high_f32(vreinterpretq_f32_m128(b)), 0);
-    return vreinterpretq_m128_f32(vcombine_f32(a00, b22));
+    return vreinterpretq_m128_f32(
+        vsetq_lane_f32((float) b, vreinterpretq_f32_m128(a), 0));
 }
 
-FORCE_INLINE __m128 _mm_shuffle_ps_3202(__m128 a, __m128 b)
+// Copy the lower single-precision (32-bit) floating-point element of a to dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtss_f32
+FORCE_INLINE float _mm_cvtss_f32(__m128 a)
 {
-    float32_t a0 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 0);
-    float32x2_t a22 =
-        vdup_lane_f32(vget_high_f32(vreinterpretq_f32_m128(a)), 0);
-    float32x2_t a02 = vset_lane_f32(a0, a22, 1); /* TODO: use vzip ?*/
-    float32x2_t b32 = vget_high_f32(vreinterpretq_f32_m128(b));
-    return vreinterpretq_m128_f32(vcombine_f32(a02, b32));
+    return vgetq_lane_f32(vreinterpretq_f32_m128(a), 0);
 }
 
-FORCE_INLINE __m128 _mm_shuffle_ps_1133(__m128 a, __m128 b)
+// Convert the lower single-precision (32-bit) floating-point element in a to a
+// 32-bit integer, and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtss_si32
+#define _mm_cvtss_si32(a) _mm_cvt_ss2si(a)
+
+// Convert the lower single-precision (32-bit) floating-point element in a to a
+// 64-bit integer, and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtss_si64
+FORCE_INLINE int64_t _mm_cvtss_si64(__m128 a)
 {
-    float32x2_t a33 =
-        vdup_lane_f32(vget_high_f32(vreinterpretq_f32_m128(a)), 1);
-    float32x2_t b11 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 1);
-    return vreinterpretq_m128_f32(vcombine_f32(a33, b11));
+#if (defined(__aarch64__) || defined(_M_ARM64)) || \
+    defined(__ARM_FEATURE_DIRECTED_ROUNDING)
+    return (int64_t) vgetq_lane_f32(vrndiq_f32(vreinterpretq_f32_m128(a)), 0);
+#else
+    float32_t data = vgetq_lane_f32(
+        vreinterpretq_f32_m128(_mm_round_ps(a, _MM_FROUND_CUR_DIRECTION)), 0);
+    return (int64_t) data;
+#endif
 }
 
-FORCE_INLINE __m128 _mm_shuffle_ps_2010(__m128 a, __m128 b)
+// Convert packed single-precision (32-bit) floating-point elements in a to
+// packed 32-bit integers with truncation, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_ps2pi
+FORCE_INLINE __m64 _mm_cvtt_ps2pi(__m128 a)
 {
-    float32x2_t a10 = vget_low_f32(vreinterpretq_f32_m128(a));
-    float32_t b2 = vgetq_lane_f32(vreinterpretq_f32_m128(b), 2);
-    float32x2_t b00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 0);
-    float32x2_t b20 = vset_lane_f32(b2, b00, 1);
-    return vreinterpretq_m128_f32(vcombine_f32(a10, b20));
+    return vreinterpret_m64_s32(
+        vget_low_s32(vcvtq_s32_f32(vreinterpretq_f32_m128(a))));
 }
 
-FORCE_INLINE __m128 _mm_shuffle_ps_2001(__m128 a, __m128 b)
+// Convert the lower single-precision (32-bit) floating-point element in a to a
+// 32-bit integer with truncation, and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_ss2si
+FORCE_INLINE int _mm_cvtt_ss2si(__m128 a)
 {
-    float32x2_t a01 = vrev64_f32(vget_low_f32(vreinterpretq_f32_m128(a)));
-    float32_t b2 = vgetq_lane_f32(b, 2);
-    float32x2_t b00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 0);
-    float32x2_t b20 = vset_lane_f32(b2, b00, 1);
-    return vreinterpretq_m128_f32(vcombine_f32(a01, b20));
+    return vgetq_lane_s32(vcvtq_s32_f32(vreinterpretq_f32_m128(a)), 0);
 }
 
-FORCE_INLINE __m128 _mm_shuffle_ps_2032(__m128 a, __m128 b)
+// Convert packed single-precision (32-bit) floating-point elements in a to
+// packed 32-bit integers with truncation, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_pi32
+#define _mm_cvttps_pi32(a) _mm_cvtt_ps2pi(a)
+
+// Convert the lower single-precision (32-bit) floating-point element in a to a
+// 32-bit integer with truncation, and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_si32
+#define _mm_cvttss_si32(a) _mm_cvtt_ss2si(a)
+
+// Convert the lower single-precision (32-bit) floating-point element in a to a
+// 64-bit integer with truncation, and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_si64
+FORCE_INLINE int64_t _mm_cvttss_si64(__m128 a)
 {
-    float32x2_t a32 = vget_high_f32(vreinterpretq_f32_m128(a));
-    float32_t b2 = vgetq_lane_f32(b, 2);
-    float32x2_t b00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 0);
-    float32x2_t b20 = vset_lane_f32(b2, b00, 1);
-    return vreinterpretq_m128_f32(vcombine_f32(a32, b20));
+    return (int64_t) vgetq_lane_f32(vreinterpretq_f32_m128(a), 0);
 }
 
-// NEON does not support a general purpose permute intrinsic
-// Selects four specific single-precision, floating-point values from a and b,
-// based on the mask i.
-//
-// C equivalent:
-//   __m128 _mm_shuffle_ps_default(__m128 a, __m128 b,
-//                                 __constrange(0, 255) int imm) {
-//       __m128 ret;
-//       ret[0] = a[imm        & 0x3];   ret[1] = a[(imm >> 2) & 0x3];
-//       ret[2] = b[(imm >> 4) & 0x03];  ret[3] = b[(imm >> 6) & 0x03];
-//       return ret;
-//   }
-//
-// https://msdn.microsoft.com/en-us/library/vstudio/5f0858x0(v=vs.100).aspx
-#define _mm_shuffle_ps_default(a, b, imm)                                  \
-    __extension__({                                                        \
-        float32x4_t ret;                                                   \
-        ret = vmovq_n_f32(                                                 \
-            vgetq_lane_f32(vreinterpretq_f32_m128(a), (imm) & (0x3)));     \
-        ret = vsetq_lane_f32(                                              \
-            vgetq_lane_f32(vreinterpretq_f32_m128(a), ((imm) >> 2) & 0x3), \
-            ret, 1);                                                       \
-        ret = vsetq_lane_f32(                                              \
-            vgetq_lane_f32(vreinterpretq_f32_m128(b), ((imm) >> 4) & 0x3), \
-            ret, 2);                                                       \
-        ret = vsetq_lane_f32(                                              \
-            vgetq_lane_f32(vreinterpretq_f32_m128(b), ((imm) >> 6) & 0x3), \
-            ret, 3);                                                       \
-        vreinterpretq_m128_f32(ret);                                       \
-    })
+// Divide packed single-precision (32-bit) floating-point elements in a by
+// packed elements in b, and store the results in dst.
+// Due to ARMv7-A NEON's lack of a precise division intrinsic, we implement
+// division by multiplying a by b's reciprocal before using the Newton-Raphson
+// method to approximate the results.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_div_ps
+FORCE_INLINE __m128 _mm_div_ps(__m128 a, __m128 b)
+{
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128_f32(
+        vdivq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
+#else
+    float32x4_t recip = vrecpeq_f32(vreinterpretq_f32_m128(b));
+    recip = vmulq_f32(recip, vrecpsq_f32(recip, vreinterpretq_f32_m128(b)));
+    // Additional Netwon-Raphson iteration for accuracy
+    recip = vmulq_f32(recip, vrecpsq_f32(recip, vreinterpretq_f32_m128(b)));
+    return vreinterpretq_m128_f32(vmulq_f32(vreinterpretq_f32_m128(a), recip));
+#endif
+}
 
-// FORCE_INLINE __m128 _mm_shuffle_ps(__m128 a, __m128 b, __constrange(0,255)
-// int imm)
-#if __has_builtin(__builtin_shufflevector)
-#define _mm_shuffle_ps(a, b, imm)                                \
-    __extension__({                                              \
-        float32x4_t _input1 = vreinterpretq_f32_m128(a);         \
-        float32x4_t _input2 = vreinterpretq_f32_m128(b);         \
-        float32x4_t _shuf = __builtin_shufflevector(             \
-            _input1, _input2, (imm) & (0x3), ((imm) >> 2) & 0x3, \
-            (((imm) >> 4) & 0x3) + 4, (((imm) >> 6) & 0x3) + 4); \
-        vreinterpretq_m128_f32(_shuf);                           \
-    })
-#else  // generic
-#define _mm_shuffle_ps(a, b, imm)                          \
-    __extension__({                                        \
-        __m128 ret;                                        \
-        switch (imm) {                                     \
-        case _MM_SHUFFLE(1, 0, 3, 2):                      \
-            ret = _mm_shuffle_ps_1032((a), (b));           \
-            break;                                         \
-        case _MM_SHUFFLE(2, 3, 0, 1):                      \
-            ret = _mm_shuffle_ps_2301((a), (b));           \
-            break;                                         \
-        case _MM_SHUFFLE(0, 3, 2, 1):                      \
-            ret = _mm_shuffle_ps_0321((a), (b));           \
-            break;                                         \
-        case _MM_SHUFFLE(2, 1, 0, 3):                      \
-            ret = _mm_shuffle_ps_2103((a), (b));           \
-            break;                                         \
-        case _MM_SHUFFLE(1, 0, 1, 0):                      \
-            ret = _mm_movelh_ps((a), (b));                 \
-            break;                                         \
-        case _MM_SHUFFLE(1, 0, 0, 1):                      \
-            ret = _mm_shuffle_ps_1001((a), (b));           \
-            break;                                         \
-        case _MM_SHUFFLE(0, 1, 0, 1):                      \
-            ret = _mm_shuffle_ps_0101((a), (b));           \
-            break;                                         \
-        case _MM_SHUFFLE(3, 2, 1, 0):                      \
-            ret = _mm_shuffle_ps_3210((a), (b));           \
-            break;                                         \
-        case _MM_SHUFFLE(0, 0, 1, 1):                      \
-            ret = _mm_shuffle_ps_0011((a), (b));           \
-            break;                                         \
-        case _MM_SHUFFLE(0, 0, 2, 2):                      \
-            ret = _mm_shuffle_ps_0022((a), (b));           \
-            break;                                         \
-        case _MM_SHUFFLE(2, 2, 0, 0):                      \
-            ret = _mm_shuffle_ps_2200((a), (b));           \
-            break;                                         \
-        case _MM_SHUFFLE(3, 2, 0, 2):                      \
-            ret = _mm_shuffle_ps_3202((a), (b));           \
-            break;                                         \
-        case _MM_SHUFFLE(3, 2, 3, 2):                      \
-            ret = _mm_movehl_ps((b), (a));                 \
-            break;                                         \
-        case _MM_SHUFFLE(1, 1, 3, 3):                      \
-            ret = _mm_shuffle_ps_1133((a), (b));           \
-            break;                                         \
-        case _MM_SHUFFLE(2, 0, 1, 0):                      \
-            ret = _mm_shuffle_ps_2010((a), (b));           \
-            break;                                         \
-        case _MM_SHUFFLE(2, 0, 0, 1):                      \
-            ret = _mm_shuffle_ps_2001((a), (b));           \
-            break;                                         \
-        case _MM_SHUFFLE(2, 0, 3, 2):                      \
-            ret = _mm_shuffle_ps_2032((a), (b));           \
-            break;                                         \
-        default:                                           \
-            ret = _mm_shuffle_ps_default((a), (b), (imm)); \
-            break;                                         \
-        }                                                  \
-        ret;                                               \
-    })
+// Divide the lower single-precision (32-bit) floating-point element in a by the
+// lower single-precision (32-bit) floating-point element in b, store the result
+// in the lower element of dst, and copy the upper 3 packed elements from a to
+// the upper elements of dst.
+// Warning: ARMv7-A does not produce the same result compared to Intel and not
+// IEEE-compliant.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_div_ss
+FORCE_INLINE __m128 _mm_div_ss(__m128 a, __m128 b)
+{
+    float32_t value =
+        vgetq_lane_f32(vreinterpretq_f32_m128(_mm_div_ps(a, b)), 0);
+    return vreinterpretq_m128_f32(
+        vsetq_lane_f32(value, vreinterpretq_f32_m128(a), 0));
+}
+
+// Extract a 16-bit integer from a, selected with imm8, and store the result in
+// the lower element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_pi16
+#define _mm_extract_pi16(a, imm) \
+    (int32_t) vget_lane_u16(vreinterpret_u16_m64(a), (imm))
+
+// Free aligned memory that was allocated with _mm_malloc.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_free
+#if !defined(SSE2NEON_ALLOC_DEFINED)
+FORCE_INLINE void _mm_free(void *addr)
+{
+    free(addr);
+}
 #endif
 
-// Takes the upper 64 bits of a and places it in the low end of the result
-// Takes the lower 64 bits of a and places it into the high end of the result.
-FORCE_INLINE __m128i _mm_shuffle_epi_1032(__m128i a)
+FORCE_INLINE uint64_t _sse2neon_get_fpcr(void)
 {
-    int32x2_t a32 = vget_high_s32(vreinterpretq_s32_m128i(a));
-    int32x2_t a10 = vget_low_s32(vreinterpretq_s32_m128i(a));
-    return vreinterpretq_m128i_s32(vcombine_s32(a32, a10));
+    uint64_t value;
+#if defined(_MSC_VER) && !defined(__clang__)
+    value = _ReadStatusReg(ARM64_FPCR);
+#else
+    __asm__ __volatile__("mrs %0, FPCR" : "=r"(value)); /* read */
+#endif
+    return value;
 }
 
-// takes the lower two 32-bit values from a and swaps them and places in low end
-// of result takes the higher two 32 bit values from a and swaps them and places
-// in high end of result.
-FORCE_INLINE __m128i _mm_shuffle_epi_2301(__m128i a)
+FORCE_INLINE void _sse2neon_set_fpcr(uint64_t value)
 {
-    int32x2_t a01 = vrev64_s32(vget_low_s32(vreinterpretq_s32_m128i(a)));
-    int32x2_t a23 = vrev64_s32(vget_high_s32(vreinterpretq_s32_m128i(a)));
-    return vreinterpretq_m128i_s32(vcombine_s32(a01, a23));
+#if defined(_MSC_VER) && !defined(__clang__)
+    _WriteStatusReg(ARM64_FPCR, value);
+#else
+    __asm__ __volatile__("msr FPCR, %0" ::"r"(value));  /* write */
+#endif
 }
 
-// rotates the least significant 32 bits into the most signficant 32 bits, and
-// shifts the rest down
-FORCE_INLINE __m128i _mm_shuffle_epi_0321(__m128i a)
+// Macro: Get the flush zero bits from the MXCSR control and status register.
+// The flush zero may contain any of the following flags: _MM_FLUSH_ZERO_ON or
+// _MM_FLUSH_ZERO_OFF
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_MM_GET_FLUSH_ZERO_MODE
+FORCE_INLINE unsigned int _sse2neon_mm_get_flush_zero_mode(void)
 {
-    return vreinterpretq_m128i_s32(
-        vextq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(a), 1));
+    union {
+        fpcr_bitfield field;
+#if defined(__aarch64__) || defined(_M_ARM64)
+        uint64_t value;
+#else
+        uint32_t value;
+#endif
+    } r;
+
+#if defined(__aarch64__) || defined(_M_ARM64)
+    r.value = _sse2neon_get_fpcr();
+#else
+    __asm__ __volatile__("vmrs %0, FPSCR" : "=r"(r.value)); /* read */
+#endif
+
+    return r.field.bit24 ? _MM_FLUSH_ZERO_ON : _MM_FLUSH_ZERO_OFF;
 }
 
-// rotates the most significant 32 bits into the least signficant 32 bits, and
-// shifts the rest up
-FORCE_INLINE __m128i _mm_shuffle_epi_2103(__m128i a)
+// Macro: Get the rounding mode bits from the MXCSR control and status register.
+// The rounding mode may contain any of the following flags: _MM_ROUND_NEAREST,
+// _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_MM_GET_ROUNDING_MODE
+FORCE_INLINE unsigned int _MM_GET_ROUNDING_MODE(void)
 {
-    return vreinterpretq_m128i_s32(
-        vextq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(a), 3));
+    union {
+        fpcr_bitfield field;
+#if defined(__aarch64__) || defined(_M_ARM64)
+        uint64_t value;
+#else
+        uint32_t value;
+#endif
+    } r;
+
+#if defined(__aarch64__) || defined(_M_ARM64)
+    r.value = _sse2neon_get_fpcr();
+#else
+    __asm__ __volatile__("vmrs %0, FPSCR" : "=r"(r.value)); /* read */
+#endif
+
+    if (r.field.bit22) {
+        return r.field.bit23 ? _MM_ROUND_TOWARD_ZERO : _MM_ROUND_UP;
+    } else {
+        return r.field.bit23 ? _MM_ROUND_DOWN : _MM_ROUND_NEAREST;
+    }
 }
 
-// gets the lower 64 bits of a, and places it in the upper 64 bits
-// gets the lower 64 bits of a and places it in the lower 64 bits
-FORCE_INLINE __m128i _mm_shuffle_epi_1010(__m128i a)
+// Copy a to dst, and insert the 16-bit integer i into dst at the location
+// specified by imm8.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_pi16
+#define _mm_insert_pi16(a, b, imm) \
+    vreinterpret_m64_s16(vset_lane_s16((b), vreinterpret_s16_m64(a), (imm)))
+
+// Load 128-bits (composed of 4 packed single-precision (32-bit) floating-point
+// elements) from memory into dst. mem_addr must be aligned on a 16-byte
+// boundary or a general-protection exception may be generated.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_ps
+FORCE_INLINE __m128 _mm_load_ps(const float *p)
 {
-    int32x2_t a10 = vget_low_s32(vreinterpretq_s32_m128i(a));
-    return vreinterpretq_m128i_s32(vcombine_s32(a10, a10));
+    return vreinterpretq_m128_f32(vld1q_f32(p));
 }
 
-// gets the lower 64 bits of a, swaps the 0 and 1 elements, and places it in the
-// lower 64 bits gets the lower 64 bits of a, and places it in the upper 64 bits
-FORCE_INLINE __m128i _mm_shuffle_epi_1001(__m128i a)
+// Load a single-precision (32-bit) floating-point element from memory into all
+// elements of dst.
+//
+//   dst[31:0] := MEM[mem_addr+31:mem_addr]
+//   dst[63:32] := MEM[mem_addr+31:mem_addr]
+//   dst[95:64] := MEM[mem_addr+31:mem_addr]
+//   dst[127:96] := MEM[mem_addr+31:mem_addr]
+//
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_ps1
+#define _mm_load_ps1 _mm_load1_ps
+
+// Load a single-precision (32-bit) floating-point element from memory into the
+// lower of dst, and zero the upper 3 elements. mem_addr does not need to be
+// aligned on any particular boundary.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_ss
+FORCE_INLINE __m128 _mm_load_ss(const float *p)
 {
-    int32x2_t a01 = vrev64_s32(vget_low_s32(vreinterpretq_s32_m128i(a)));
-    int32x2_t a10 = vget_low_s32(vreinterpretq_s32_m128i(a));
-    return vreinterpretq_m128i_s32(vcombine_s32(a01, a10));
+    return vreinterpretq_m128_f32(vsetq_lane_f32(*p, vdupq_n_f32(0), 0));
 }
 
-// gets the lower 64 bits of a, swaps the 0 and 1 elements and places it in the
-// upper 64 bits gets the lower 64 bits of a, swaps the 0 and 1 elements, and
-// places it in the lower 64 bits
-FORCE_INLINE __m128i _mm_shuffle_epi_0101(__m128i a)
+// Load a single-precision (32-bit) floating-point element from memory into all
+// elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load1_ps
+FORCE_INLINE __m128 _mm_load1_ps(const float *p)
 {
-    int32x2_t a01 = vrev64_s32(vget_low_s32(vreinterpretq_s32_m128i(a)));
-    return vreinterpretq_m128i_s32(vcombine_s32(a01, a01));
+    return vreinterpretq_m128_f32(vld1q_dup_f32(p));
 }
 
-FORCE_INLINE __m128i _mm_shuffle_epi_2211(__m128i a)
+// Load 2 single-precision (32-bit) floating-point elements from memory into the
+// upper 2 elements of dst, and copy the lower 2 elements from a to dst.
+// mem_addr does not need to be aligned on any particular boundary.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadh_pi
+FORCE_INLINE __m128 _mm_loadh_pi(__m128 a, __m64 const *p)
 {
-    int32x2_t a11 = vdup_lane_s32(vget_low_s32(vreinterpretq_s32_m128i(a)), 1);
-    int32x2_t a22 = vdup_lane_s32(vget_high_s32(vreinterpretq_s32_m128i(a)), 0);
-    return vreinterpretq_m128i_s32(vcombine_s32(a11, a22));
+    return vreinterpretq_m128_f32(
+        vcombine_f32(vget_low_f32(a), vld1_f32((const float32_t *) p)));
 }
 
-FORCE_INLINE __m128i _mm_shuffle_epi_0122(__m128i a)
+// Load 2 single-precision (32-bit) floating-point elements from memory into the
+// lower 2 elements of dst, and copy the upper 2 elements from a to dst.
+// mem_addr does not need to be aligned on any particular boundary.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadl_pi
+FORCE_INLINE __m128 _mm_loadl_pi(__m128 a, __m64 const *p)
 {
-    int32x2_t a22 = vdup_lane_s32(vget_high_s32(vreinterpretq_s32_m128i(a)), 0);
-    int32x2_t a01 = vrev64_s32(vget_low_s32(vreinterpretq_s32_m128i(a)));
-    return vreinterpretq_m128i_s32(vcombine_s32(a22, a01));
+    return vreinterpretq_m128_f32(
+        vcombine_f32(vld1_f32((const float32_t *) p), vget_high_f32(a)));
 }
 
-FORCE_INLINE __m128i _mm_shuffle_epi_3332(__m128i a)
+// Load 4 single-precision (32-bit) floating-point elements from memory into dst
+// in reverse order. mem_addr must be aligned on a 16-byte boundary or a
+// general-protection exception may be generated.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadr_ps
+FORCE_INLINE __m128 _mm_loadr_ps(const float *p)
 {
-    int32x2_t a32 = vget_high_s32(vreinterpretq_s32_m128i(a));
-    int32x2_t a33 = vdup_lane_s32(vget_high_s32(vreinterpretq_s32_m128i(a)), 1);
-    return vreinterpretq_m128i_s32(vcombine_s32(a32, a33));
+    float32x4_t v = vrev64q_f32(vld1q_f32(p));
+    return vreinterpretq_m128_f32(vextq_f32(v, v, 2));
 }
 
-// Shuffle packed 8-bit integers in a according to shuffle control mask in the
-// corresponding 8-bit element of b, and store the results in dst.
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_epi8
-FORCE_INLINE __m128i _mm_shuffle_epi8(__m128i a, __m128i b)
+// Load 128-bits (composed of 4 packed single-precision (32-bit) floating-point
+// elements) from memory into dst. mem_addr does not need to be aligned on any
+// particular boundary.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_ps
+FORCE_INLINE __m128 _mm_loadu_ps(const float *p)
 {
-    int8x16_t tbl = vreinterpretq_s8_m128i(a);   // input a
-    uint8x16_t idx = vreinterpretq_u8_m128i(b);  // input b
-    uint8x16_t idx_masked =
-        vandq_u8(idx, vdupq_n_u8(0x8F));  // avoid using meaningless bits
-#if defined(__aarch64__)
-    return vreinterpretq_m128i_s8(vqtbl1q_s8(tbl, idx_masked));
-#elif defined(__GNUC__)
-    int8x16_t ret;
-    // %e and %f represent the even and odd D registers
-    // respectively.
-    __asm__ __volatile__(
-        "vtbl.8  %e[ret], {%e[tbl], %f[tbl]}, %e[idx]\n"
-        "vtbl.8  %f[ret], {%e[tbl], %f[tbl]}, %f[idx]\n"
-        : [ret] "=&w"(ret)
-        : [tbl] "w"(tbl), [idx] "w"(idx_masked));
-    return vreinterpretq_m128i_s8(ret);
+    // for neon, alignment doesn't matter, so _mm_load_ps and _mm_loadu_ps are
+    // equivalent for neon
+    return vreinterpretq_m128_f32(vld1q_f32(p));
+}
+
+// Load unaligned 16-bit integer from memory into the first element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si16
+FORCE_INLINE __m128i _mm_loadu_si16(const void *p)
+{
+    return vreinterpretq_m128i_s16(
+        vsetq_lane_s16(*(const unaligned_int16_t *) p, vdupq_n_s16(0), 0));
+}
+
+// Load unaligned 64-bit integer from memory into the first element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si64
+FORCE_INLINE __m128i _mm_loadu_si64(const void *p)
+{
+    return vreinterpretq_m128i_s64(
+        vsetq_lane_s64(*(const unaligned_int64_t *) p, vdupq_n_s64(0), 0));
+}
+
+// Allocate size bytes of memory, aligned to the alignment specified in align,
+// and return a pointer to the allocated memory. _mm_free should be used to free
+// memory that is allocated with _mm_malloc.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_malloc
+#if !defined(SSE2NEON_ALLOC_DEFINED)
+FORCE_INLINE void *_mm_malloc(size_t size, size_t align)
+{
+    void *ptr;
+    if (align == 1)
+        return malloc(size);
+    if (align == 2 || (sizeof(void *) == 8 && align == 4))
+        align = sizeof(void *);
+    if (!posix_memalign(&ptr, align, size))
+        return ptr;
+    return NULL;
+}
+#endif
+
+// Conditionally store 8-bit integer elements from a into memory using mask
+// (elements are not stored when the highest bit is not set in the corresponding
+// element) and a non-temporal memory hint.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskmove_si64
+FORCE_INLINE void _mm_maskmove_si64(__m64 a, __m64 mask, char *mem_addr)
+{
+    int8x8_t shr_mask = vshr_n_s8(vreinterpret_s8_m64(mask), 7);
+    __m128 b = _mm_load_ps((const float *) mem_addr);
+    int8x8_t masked =
+        vbsl_s8(vreinterpret_u8_s8(shr_mask), vreinterpret_s8_m64(a),
+                vreinterpret_s8_u64(vget_low_u64(vreinterpretq_u64_m128(b))));
+    vst1_s8((int8_t *) mem_addr, masked);
+}
+
+// Conditionally store 8-bit integer elements from a into memory using mask
+// (elements are not stored when the highest bit is not set in the corresponding
+// element) and a non-temporal memory hint.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_m_maskmovq
+#define _m_maskmovq(a, mask, mem_addr) _mm_maskmove_si64(a, mask, mem_addr)
+
+// Compare packed signed 16-bit integers in a and b, and store packed maximum
+// values in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_pi16
+FORCE_INLINE __m64 _mm_max_pi16(__m64 a, __m64 b)
+{
+    return vreinterpret_m64_s16(
+        vmax_s16(vreinterpret_s16_m64(a), vreinterpret_s16_m64(b)));
+}
+
+// Compare packed single-precision (32-bit) floating-point elements in a and b,
+// and store packed maximum values in dst. dst does not follow the IEEE Standard
+// for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or
+// signed-zero values.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_ps
+FORCE_INLINE __m128 _mm_max_ps(__m128 a, __m128 b)
+{
+#if SSE2NEON_PRECISE_MINMAX
+    float32x4_t _a = vreinterpretq_f32_m128(a);
+    float32x4_t _b = vreinterpretq_f32_m128(b);
+    return vreinterpretq_m128_f32(vbslq_f32(vcgtq_f32(_a, _b), _a, _b));
+#else
+    return vreinterpretq_m128_f32(
+        vmaxq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
+#endif
+}
+
+// Compare packed unsigned 8-bit integers in a and b, and store packed maximum
+// values in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_pu8
+FORCE_INLINE __m64 _mm_max_pu8(__m64 a, __m64 b)
+{
+    return vreinterpret_m64_u8(
+        vmax_u8(vreinterpret_u8_m64(a), vreinterpret_u8_m64(b)));
+}
+
+// Compare the lower single-precision (32-bit) floating-point elements in a and
+// b, store the maximum value in the lower element of dst, and copy the upper 3
+// packed elements from a to the upper element of dst. dst does not follow the
+// IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when
+// inputs are NaN or signed-zero values.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_ss
+FORCE_INLINE __m128 _mm_max_ss(__m128 a, __m128 b)
+{
+    float32_t value = vgetq_lane_f32(_mm_max_ps(a, b), 0);
+    return vreinterpretq_m128_f32(
+        vsetq_lane_f32(value, vreinterpretq_f32_m128(a), 0));
+}
+
+// Compare packed signed 16-bit integers in a and b, and store packed minimum
+// values in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_pi16
+FORCE_INLINE __m64 _mm_min_pi16(__m64 a, __m64 b)
+{
+    return vreinterpret_m64_s16(
+        vmin_s16(vreinterpret_s16_m64(a), vreinterpret_s16_m64(b)));
+}
+
+// Compare packed single-precision (32-bit) floating-point elements in a and b,
+// and store packed minimum values in dst. dst does not follow the IEEE Standard
+// for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or
+// signed-zero values.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_ps
+FORCE_INLINE __m128 _mm_min_ps(__m128 a, __m128 b)
+{
+#if SSE2NEON_PRECISE_MINMAX
+    float32x4_t _a = vreinterpretq_f32_m128(a);
+    float32x4_t _b = vreinterpretq_f32_m128(b);
+    return vreinterpretq_m128_f32(vbslq_f32(vcltq_f32(_a, _b), _a, _b));
+#else
+    return vreinterpretq_m128_f32(
+        vminq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
+#endif
+}
+
+// Compare packed unsigned 8-bit integers in a and b, and store packed minimum
+// values in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_pu8
+FORCE_INLINE __m64 _mm_min_pu8(__m64 a, __m64 b)
+{
+    return vreinterpret_m64_u8(
+        vmin_u8(vreinterpret_u8_m64(a), vreinterpret_u8_m64(b)));
+}
+
+// Compare the lower single-precision (32-bit) floating-point elements in a and
+// b, store the minimum value in the lower element of dst, and copy the upper 3
+// packed elements from a to the upper element of dst. dst does not follow the
+// IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when
+// inputs are NaN or signed-zero values.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_ss
+FORCE_INLINE __m128 _mm_min_ss(__m128 a, __m128 b)
+{
+    float32_t value = vgetq_lane_f32(_mm_min_ps(a, b), 0);
+    return vreinterpretq_m128_f32(
+        vsetq_lane_f32(value, vreinterpretq_f32_m128(a), 0));
+}
+
+// Move the lower single-precision (32-bit) floating-point element from b to the
+// lower element of dst, and copy the upper 3 packed elements from a to the
+// upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_move_ss
+FORCE_INLINE __m128 _mm_move_ss(__m128 a, __m128 b)
+{
+    return vreinterpretq_m128_f32(
+        vsetq_lane_f32(vgetq_lane_f32(vreinterpretq_f32_m128(b), 0),
+                       vreinterpretq_f32_m128(a), 0));
+}
+
+// Move the upper 2 single-precision (32-bit) floating-point elements from b to
+// the lower 2 elements of dst, and copy the upper 2 elements from a to the
+// upper 2 elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movehl_ps
+FORCE_INLINE __m128 _mm_movehl_ps(__m128 a, __m128 b)
+{
+#if defined(aarch64__)
+    return vreinterpretq_m128_u64(
+        vzip2q_u64(vreinterpretq_u64_m128(b), vreinterpretq_u64_m128(a)));
+#else
+    float32x2_t a32 = vget_high_f32(vreinterpretq_f32_m128(a));
+    float32x2_t b32 = vget_high_f32(vreinterpretq_f32_m128(b));
+    return vreinterpretq_m128_f32(vcombine_f32(b32, a32));
+#endif
+}
+
+// Move the lower 2 single-precision (32-bit) floating-point elements from b to
+// the upper 2 elements of dst, and copy the lower 2 elements from a to the
+// lower 2 elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movelh_ps
+FORCE_INLINE __m128 _mm_movelh_ps(__m128 __A, __m128 __B)
+{
+    float32x2_t a10 = vget_low_f32(vreinterpretq_f32_m128(__A));
+    float32x2_t b10 = vget_low_f32(vreinterpretq_f32_m128(__B));
+    return vreinterpretq_m128_f32(vcombine_f32(a10, b10));
+}
+
+// Create mask from the most significant bit of each 8-bit element in a, and
+// store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movemask_pi8
+FORCE_INLINE int _mm_movemask_pi8(__m64 a)
+{
+    uint8x8_t input = vreinterpret_u8_m64(a);
+#if defined(__aarch64__) || defined(_M_ARM64)
+    static const int8_t shift[8] = {0, 1, 2, 3, 4, 5, 6, 7};
+    uint8x8_t tmp = vshr_n_u8(input, 7);
+    return vaddv_u8(vshl_u8(tmp, vld1_s8(shift)));
+#else
+    // Refer the implementation of `_mm_movemask_epi8`
+    uint16x4_t high_bits = vreinterpret_u16_u8(vshr_n_u8(input, 7));
+    uint32x2_t paired16 =
+        vreinterpret_u32_u16(vsra_n_u16(high_bits, high_bits, 7));
+    uint8x8_t paired32 =
+        vreinterpret_u8_u32(vsra_n_u32(paired16, paired16, 14));
+    return vget_lane_u8(paired32, 0) | ((int) vget_lane_u8(paired32, 4) << 4);
+#endif
+}
+
+// Set each bit of mask dst based on the most significant bit of the
+// corresponding packed single-precision (32-bit) floating-point element in a.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movemask_ps
+FORCE_INLINE int _mm_movemask_ps(__m128 a)
+{
+    uint32x4_t input = vreinterpretq_u32_m128(a);
+#if defined(__aarch64__) || defined(_M_ARM64)
+    static const int32_t shift[4] = {0, 1, 2, 3};
+    uint32x4_t tmp = vshrq_n_u32(input, 31);
+    return vaddvq_u32(vshlq_u32(tmp, vld1q_s32(shift)));
+#else
+    // Uses the exact same method as _mm_movemask_epi8, see that for details.
+    // Shift out everything but the sign bits with a 32-bit unsigned shift
+    // right.
+    uint64x2_t high_bits = vreinterpretq_u64_u32(vshrq_n_u32(input, 31));
+    // Merge the two pairs together with a 64-bit unsigned shift right + add.
+    uint8x16_t paired =
+        vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31));
+    // Extract the result.
+    return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2);
+#endif
+}
+
+// Multiply packed single-precision (32-bit) floating-point elements in a and b,
+// and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_ps
+FORCE_INLINE __m128 _mm_mul_ps(__m128 a, __m128 b)
+{
+    return vreinterpretq_m128_f32(
+        vmulq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
+}
+
+// Multiply the lower single-precision (32-bit) floating-point element in a and
+// b, store the result in the lower element of dst, and copy the upper 3 packed
+// elements from a to the upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_ss
+FORCE_INLINE __m128 _mm_mul_ss(__m128 a, __m128 b)
+{
+    return _mm_move_ss(a, _mm_mul_ps(a, b));
+}
+
+// Multiply the packed unsigned 16-bit integers in a and b, producing
+// intermediate 32-bit integers, and store the high 16 bits of the intermediate
+// integers in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhi_pu16
+FORCE_INLINE __m64 _mm_mulhi_pu16(__m64 a, __m64 b)
+{
+    return vreinterpret_m64_u16(vshrn_n_u32(
+        vmull_u16(vreinterpret_u16_m64(a), vreinterpret_u16_m64(b)), 16));
+}
+
+// Compute the bitwise OR of packed single-precision (32-bit) floating-point
+// elements in a and b, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_ps
+FORCE_INLINE __m128 _mm_or_ps(__m128 a, __m128 b)
+{
+    return vreinterpretq_m128_s32(
+        vorrq_s32(vreinterpretq_s32_m128(a), vreinterpretq_s32_m128(b)));
+}
+
+// Average packed unsigned 8-bit integers in a and b, and store the results in
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_m_pavgb
+#define _m_pavgb(a, b) _mm_avg_pu8(a, b)
+
+// Average packed unsigned 16-bit integers in a and b, and store the results in
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_m_pavgw
+#define _m_pavgw(a, b) _mm_avg_pu16(a, b)
+
+// Extract a 16-bit integer from a, selected with imm8, and store the result in
+// the lower element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_m_pextrw
+#define _m_pextrw(a, imm) _mm_extract_pi16(a, imm)
+
+// Copy a to dst, and insert the 16-bit integer i into dst at the location
+// specified by imm8.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=m_pinsrw
+#define _m_pinsrw(a, i, imm) _mm_insert_pi16(a, i, imm)
+
+// Compare packed signed 16-bit integers in a and b, and store packed maximum
+// values in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_m_pmaxsw
+#define _m_pmaxsw(a, b) _mm_max_pi16(a, b)
+
+// Compare packed unsigned 8-bit integers in a and b, and store packed maximum
+// values in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_m_pmaxub
+#define _m_pmaxub(a, b) _mm_max_pu8(a, b)
+
+// Compare packed signed 16-bit integers in a and b, and store packed minimum
+// values in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_m_pminsw
+#define _m_pminsw(a, b) _mm_min_pi16(a, b)
+
+// Compare packed unsigned 8-bit integers in a and b, and store packed minimum
+// values in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_m_pminub
+#define _m_pminub(a, b) _mm_min_pu8(a, b)
+
+// Create mask from the most significant bit of each 8-bit element in a, and
+// store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_m_pmovmskb
+#define _m_pmovmskb(a) _mm_movemask_pi8(a)
+
+// Multiply the packed unsigned 16-bit integers in a and b, producing
+// intermediate 32-bit integers, and store the high 16 bits of the intermediate
+// integers in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_m_pmulhuw
+#define _m_pmulhuw(a, b) _mm_mulhi_pu16(a, b)
+
+// Fetch the line of data from memory that contains address p to a location in
+// the cache hierarchy specified by the locality hint i.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_prefetch
+FORCE_INLINE void _mm_prefetch(char const *p, int i)
+{
+    (void) i;
+#if defined(_MSC_VER) && !defined(__clang__)
+    switch (i) {
+    case _MM_HINT_NTA:
+        __prefetch2(p, 1);
+        break;
+    case _MM_HINT_T0:
+        __prefetch2(p, 0);
+        break;
+    case _MM_HINT_T1:
+        __prefetch2(p, 2);
+        break;
+    case _MM_HINT_T2:
+        __prefetch2(p, 4);
+        break;
+    }
+#else
+    switch (i) {
+    case _MM_HINT_NTA:
+        __builtin_prefetch(p, 0, 0);
+        break;
+    case _MM_HINT_T0:
+        __builtin_prefetch(p, 0, 3);
+        break;
+    case _MM_HINT_T1:
+        __builtin_prefetch(p, 0, 2);
+        break;
+    case _MM_HINT_T2:
+        __builtin_prefetch(p, 0, 1);
+        break;
+    }
+#endif
+}
+
+// Compute the absolute differences of packed unsigned 8-bit integers in a and
+// b, then horizontally sum each consecutive 8 differences to produce four
+// unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low
+// 16 bits of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=m_psadbw
+#define _m_psadbw(a, b) _mm_sad_pu8(a, b)
+
+// Shuffle 16-bit integers in a using the control in imm8, and store the results
+// in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_m_pshufw
+#define _m_pshufw(a, imm) _mm_shuffle_pi16(a, imm)
+
+// Compute the approximate reciprocal of packed single-precision (32-bit)
+// floating-point elements in a, and store the results in dst. The maximum
+// relative error for this approximation is less than 1.5*2^-12.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp_ps
+FORCE_INLINE __m128 _mm_rcp_ps(__m128 in)
+{
+    float32x4_t recip = vrecpeq_f32(vreinterpretq_f32_m128(in));
+    recip = vmulq_f32(recip, vrecpsq_f32(recip, vreinterpretq_f32_m128(in)));
+#if SSE2NEON_PRECISE_DIV
+    // Additional Netwon-Raphson iteration for accuracy
+    recip = vmulq_f32(recip, vrecpsq_f32(recip, vreinterpretq_f32_m128(in)));
+#endif
+    return vreinterpretq_m128_f32(recip);
+}
+
+// Compute the approximate reciprocal of the lower single-precision (32-bit)
+// floating-point element in a, store the result in the lower element of dst,
+// and copy the upper 3 packed elements from a to the upper elements of dst. The
+// maximum relative error for this approximation is less than 1.5*2^-12.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp_ss
+FORCE_INLINE __m128 _mm_rcp_ss(__m128 a)
+{
+    return _mm_move_ss(a, _mm_rcp_ps(a));
+}
+
+// Compute the approximate reciprocal square root of packed single-precision
+// (32-bit) floating-point elements in a, and store the results in dst. The
+// maximum relative error for this approximation is less than 1.5*2^-12.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt_ps
+FORCE_INLINE __m128 _mm_rsqrt_ps(__m128 in)
+{
+    float32x4_t out = vrsqrteq_f32(vreinterpretq_f32_m128(in));
+
+    // Generate masks for detecting whether input has any 0.0f/-0.0f
+    // (which becomes positive/negative infinity by IEEE-754 arithmetic rules).
+    const uint32x4_t pos_inf = vdupq_n_u32(0x7F800000);
+    const uint32x4_t neg_inf = vdupq_n_u32(0xFF800000);
+    const uint32x4_t has_pos_zero =
+        vceqq_u32(pos_inf, vreinterpretq_u32_f32(out));
+    const uint32x4_t has_neg_zero =
+        vceqq_u32(neg_inf, vreinterpretq_u32_f32(out));
+
+    out = vmulq_f32(
+        out, vrsqrtsq_f32(vmulq_f32(vreinterpretq_f32_m128(in), out), out));
+#if SSE2NEON_PRECISE_SQRT
+    // Additional Netwon-Raphson iteration for accuracy
+    out = vmulq_f32(
+        out, vrsqrtsq_f32(vmulq_f32(vreinterpretq_f32_m128(in), out), out));
+#endif
+
+    // Set output vector element to infinity/negative-infinity if
+    // the corresponding input vector element is 0.0f/-0.0f.
+    out = vbslq_f32(has_pos_zero, (float32x4_t) pos_inf, out);
+    out = vbslq_f32(has_neg_zero, (float32x4_t) neg_inf, out);
+
+    return vreinterpretq_m128_f32(out);
+}
+
+// Compute the approximate reciprocal square root of the lower single-precision
+// (32-bit) floating-point element in a, store the result in the lower element
+// of dst, and copy the upper 3 packed elements from a to the upper elements of
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt_ss
+FORCE_INLINE __m128 _mm_rsqrt_ss(__m128 in)
+{
+    return vsetq_lane_f32(vgetq_lane_f32(_mm_rsqrt_ps(in), 0), in, 0);
+}
+
+// Compute the absolute differences of packed unsigned 8-bit integers in a and
+// b, then horizontally sum each consecutive 8 differences to produce four
+// unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low
+// 16 bits of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sad_pu8
+FORCE_INLINE __m64 _mm_sad_pu8(__m64 a, __m64 b)
+{
+    uint64x1_t t = vpaddl_u32(vpaddl_u16(
+        vpaddl_u8(vabd_u8(vreinterpret_u8_m64(a), vreinterpret_u8_m64(b)))));
+    return vreinterpret_m64_u16(
+        vset_lane_u16((int) vget_lane_u64(t, 0), vdup_n_u16(0), 0));
+}
+
+// Macro: Set the flush zero bits of the MXCSR control and status register to
+// the value in unsigned 32-bit integer a. The flush zero may contain any of the
+// following flags: _MM_FLUSH_ZERO_ON or _MM_FLUSH_ZERO_OFF
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_MM_SET_FLUSH_ZERO_MODE
+FORCE_INLINE void _sse2neon_mm_set_flush_zero_mode(unsigned int flag)
+{
+    // AArch32 Advanced SIMD arithmetic always uses the Flush-to-zero setting,
+    // regardless of the value of the FZ bit.
+    union {
+        fpcr_bitfield field;
+#if defined(__aarch64__) || defined(_M_ARM64)
+        uint64_t value;
+#else
+        uint32_t value;
+#endif
+    } r;
+
+#if defined(__aarch64__) || defined(_M_ARM64)
+    r.value = _sse2neon_get_fpcr();
+#else
+    __asm__ __volatile__("vmrs %0, FPSCR" : "=r"(r.value)); /* read */
+#endif
+
+    r.field.bit24 = (flag & _MM_FLUSH_ZERO_MASK) == _MM_FLUSH_ZERO_ON;
+
+#if defined(__aarch64__) || defined(_M_ARM64)
+    _sse2neon_set_fpcr(r.value);
+#else
+    __asm__ __volatile__("vmsr FPSCR, %0" ::"r"(r));        /* write */
+#endif
+}
+
+// Set packed single-precision (32-bit) floating-point elements in dst with the
+// supplied values.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_ps
+FORCE_INLINE __m128 _mm_set_ps(float w, float z, float y, float x)
+{
+    float ALIGN_STRUCT(16) data[4] = {x, y, z, w};
+    return vreinterpretq_m128_f32(vld1q_f32(data));
+}
+
+// Broadcast single-precision (32-bit) floating-point value a to all elements of
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_ps1
+FORCE_INLINE __m128 _mm_set_ps1(float _w)
+{
+    return vreinterpretq_m128_f32(vdupq_n_f32(_w));
+}
+
+// Macro: Set the rounding mode bits of the MXCSR control and status register to
+// the value in unsigned 32-bit integer a. The rounding mode may contain any of
+// the following flags: _MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP,
+// _MM_ROUND_TOWARD_ZERO
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_MM_SET_ROUNDING_MODE
+FORCE_INLINE_OPTNONE void _MM_SET_ROUNDING_MODE(int rounding)
+{
+    union {
+        fpcr_bitfield field;
+#if defined(__aarch64__) || defined(_M_ARM64)
+        uint64_t value;
+#else
+        uint32_t value;
+#endif
+    } r;
+
+#if defined(__aarch64__) || defined(_M_ARM64)
+    r.value = _sse2neon_get_fpcr();
+#else
+    __asm__ __volatile__("vmrs %0, FPSCR" : "=r"(r.value)); /* read */
+#endif
+
+    switch (rounding) {
+    case _MM_ROUND_TOWARD_ZERO:
+        r.field.bit22 = 1;
+        r.field.bit23 = 1;
+        break;
+    case _MM_ROUND_DOWN:
+        r.field.bit22 = 0;
+        r.field.bit23 = 1;
+        break;
+    case _MM_ROUND_UP:
+        r.field.bit22 = 1;
+        r.field.bit23 = 0;
+        break;
+    default:  //_MM_ROUND_NEAREST
+        r.field.bit22 = 0;
+        r.field.bit23 = 0;
+    }
+
+#if defined(__aarch64__) || defined(_M_ARM64)
+    _sse2neon_set_fpcr(r.value);
+#else
+    __asm__ __volatile__("vmsr FPSCR, %0" ::"r"(r));        /* write */
+#endif
+}
+
+// Copy single-precision (32-bit) floating-point element a to the lower element
+// of dst, and zero the upper 3 elements.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_ss
+FORCE_INLINE __m128 _mm_set_ss(float a)
+{
+    return vreinterpretq_m128_f32(vsetq_lane_f32(a, vdupq_n_f32(0), 0));
+}
+
+// Broadcast single-precision (32-bit) floating-point value a to all elements of
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_ps
+FORCE_INLINE __m128 _mm_set1_ps(float _w)
+{
+    return vreinterpretq_m128_f32(vdupq_n_f32(_w));
+}
+
+// Set the MXCSR control and status register with the value in unsigned 32-bit
+// integer a.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setcsr
+// FIXME: _mm_setcsr() implementation supports changing the rounding mode only.
+FORCE_INLINE void _mm_setcsr(unsigned int a)
+{
+    _MM_SET_ROUNDING_MODE(a);
+}
+
+// Get the unsigned 32-bit value of the MXCSR control and status register.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getcsr
+// FIXME: _mm_getcsr() implementation supports reading the rounding mode only.
+FORCE_INLINE unsigned int _mm_getcsr(void)
+{
+    return _MM_GET_ROUNDING_MODE();
+}
+
+// Set packed single-precision (32-bit) floating-point elements in dst with the
+// supplied values in reverse order.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_ps
+FORCE_INLINE __m128 _mm_setr_ps(float w, float z, float y, float x)
+{
+    float ALIGN_STRUCT(16) data[4] = {w, z, y, x};
+    return vreinterpretq_m128_f32(vld1q_f32(data));
+}
+
+// Return vector of type __m128 with all elements set to zero.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setzero_ps
+FORCE_INLINE __m128 _mm_setzero_ps(void)
+{
+    return vreinterpretq_m128_f32(vdupq_n_f32(0));
+}
+
+// Shuffle 16-bit integers in a using the control in imm8, and store the results
+// in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_pi16
+#ifdef _sse2neon_shuffle
+#define _mm_shuffle_pi16(a, imm)                                       \
+    vreinterpret_m64_s16(vshuffle_s16(                                 \
+        vreinterpret_s16_m64(a), vreinterpret_s16_m64(a), (imm & 0x3), \
+        ((imm >> 2) & 0x3), ((imm >> 4) & 0x3), ((imm >> 6) & 0x3)))
+#else
+#define _mm_shuffle_pi16(a, imm)                                              \
+    _sse2neon_define1(                                                        \
+        __m64, a, int16x4_t ret;                                              \
+        ret = vmov_n_s16(                                                     \
+            vget_lane_s16(vreinterpret_s16_m64(_a), (imm) & (0x3)));          \
+        ret = vset_lane_s16(                                                  \
+            vget_lane_s16(vreinterpret_s16_m64(_a), ((imm) >> 2) & 0x3), ret, \
+            1);                                                               \
+        ret = vset_lane_s16(                                                  \
+            vget_lane_s16(vreinterpret_s16_m64(_a), ((imm) >> 4) & 0x3), ret, \
+            2);                                                               \
+        ret = vset_lane_s16(                                                  \
+            vget_lane_s16(vreinterpret_s16_m64(_a), ((imm) >> 6) & 0x3), ret, \
+            3);                                                               \
+        _sse2neon_return(vreinterpret_m64_s16(ret));)
+#endif
+
+// Perform a serializing operation on all store-to-memory instructions that were
+// issued prior to this instruction. Guarantees that every store instruction
+// that precedes, in program order, is globally visible before any store
+// instruction which follows the fence in program order.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sfence
+FORCE_INLINE void _mm_sfence(void)
+{
+    _sse2neon_smp_mb();
+}
+
+// Perform a serializing operation on all load-from-memory and store-to-memory
+// instructions that were issued prior to this instruction. Guarantees that
+// every memory access that precedes, in program order, the memory fence
+// instruction is globally visible before any memory instruction which follows
+// the fence in program order.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mfence
+FORCE_INLINE void _mm_mfence(void)
+{
+    _sse2neon_smp_mb();
+}
+
+// Perform a serializing operation on all load-from-memory instructions that
+// were issued prior to this instruction. Guarantees that every load instruction
+// that precedes, in program order, is globally visible before any load
+// instruction which follows the fence in program order.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_lfence
+FORCE_INLINE void _mm_lfence(void)
+{
+    _sse2neon_smp_mb();
+}
+
+// FORCE_INLINE __m128 _mm_shuffle_ps(__m128 a, __m128 b, __constrange(0,255)
+// int imm)
+#ifdef _sse2neon_shuffle
+#define _mm_shuffle_ps(a, b, imm)                                              \
+    __extension__({                                                            \
+        float32x4_t _input1 = vreinterpretq_f32_m128(a);                       \
+        float32x4_t _input2 = vreinterpretq_f32_m128(b);                       \
+        float32x4_t _shuf =                                                    \
+            vshuffleq_s32(_input1, _input2, (imm) & (0x3), ((imm) >> 2) & 0x3, \
+                          (((imm) >> 4) & 0x3) + 4, (((imm) >> 6) & 0x3) + 4); \
+        vreinterpretq_m128_f32(_shuf);                                         \
+    })
+#else  // generic
+#define _mm_shuffle_ps(a, b, imm)                            \
+    _sse2neon_define2(                                       \
+        __m128, a, b, __m128 ret; switch (imm) {             \
+            case _MM_SHUFFLE(1, 0, 3, 2):                    \
+                ret = _mm_shuffle_ps_1032(_a, _b);           \
+                break;                                       \
+            case _MM_SHUFFLE(2, 3, 0, 1):                    \
+                ret = _mm_shuffle_ps_2301(_a, _b);           \
+                break;                                       \
+            case _MM_SHUFFLE(0, 3, 2, 1):                    \
+                ret = _mm_shuffle_ps_0321(_a, _b);           \
+                break;                                       \
+            case _MM_SHUFFLE(2, 1, 0, 3):                    \
+                ret = _mm_shuffle_ps_2103(_a, _b);           \
+                break;                                       \
+            case _MM_SHUFFLE(1, 0, 1, 0):                    \
+                ret = _mm_movelh_ps(_a, _b);                 \
+                break;                                       \
+            case _MM_SHUFFLE(1, 0, 0, 1):                    \
+                ret = _mm_shuffle_ps_1001(_a, _b);           \
+                break;                                       \
+            case _MM_SHUFFLE(0, 1, 0, 1):                    \
+                ret = _mm_shuffle_ps_0101(_a, _b);           \
+                break;                                       \
+            case _MM_SHUFFLE(3, 2, 1, 0):                    \
+                ret = _mm_shuffle_ps_3210(_a, _b);           \
+                break;                                       \
+            case _MM_SHUFFLE(0, 0, 1, 1):                    \
+                ret = _mm_shuffle_ps_0011(_a, _b);           \
+                break;                                       \
+            case _MM_SHUFFLE(0, 0, 2, 2):                    \
+                ret = _mm_shuffle_ps_0022(_a, _b);           \
+                break;                                       \
+            case _MM_SHUFFLE(2, 2, 0, 0):                    \
+                ret = _mm_shuffle_ps_2200(_a, _b);           \
+                break;                                       \
+            case _MM_SHUFFLE(3, 2, 0, 2):                    \
+                ret = _mm_shuffle_ps_3202(_a, _b);           \
+                break;                                       \
+            case _MM_SHUFFLE(3, 2, 3, 2):                    \
+                ret = _mm_movehl_ps(_b, _a);                 \
+                break;                                       \
+            case _MM_SHUFFLE(1, 1, 3, 3):                    \
+                ret = _mm_shuffle_ps_1133(_a, _b);           \
+                break;                                       \
+            case _MM_SHUFFLE(2, 0, 1, 0):                    \
+                ret = _mm_shuffle_ps_2010(_a, _b);           \
+                break;                                       \
+            case _MM_SHUFFLE(2, 0, 0, 1):                    \
+                ret = _mm_shuffle_ps_2001(_a, _b);           \
+                break;                                       \
+            case _MM_SHUFFLE(2, 0, 3, 2):                    \
+                ret = _mm_shuffle_ps_2032(_a, _b);           \
+                break;                                       \
+            default:                                         \
+                ret = _mm_shuffle_ps_default(_a, _b, (imm)); \
+                break;                                       \
+        } _sse2neon_return(ret);)
+#endif
+
+// Compute the square root of packed single-precision (32-bit) floating-point
+// elements in a, and store the results in dst.
+// Due to ARMv7-A NEON's lack of a precise square root intrinsic, we implement
+// square root by multiplying input in with its reciprocal square root before
+// using the Newton-Raphson method to approximate the results.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_ps
+FORCE_INLINE __m128 _mm_sqrt_ps(__m128 in)
+{
+#if (defined(__aarch64__) || defined(_M_ARM64)) && !SSE2NEON_PRECISE_SQRT
+    return vreinterpretq_m128_f32(vsqrtq_f32(vreinterpretq_f32_m128(in)));
+#else
+    float32x4_t recip = vrsqrteq_f32(vreinterpretq_f32_m128(in));
+
+    // Test for vrsqrteq_f32(0) -> positive infinity case.
+    // Change to zero, so that s * 1/sqrt(s) result is zero too.
+    const uint32x4_t pos_inf = vdupq_n_u32(0x7F800000);
+    const uint32x4_t div_by_zero =
+        vceqq_u32(pos_inf, vreinterpretq_u32_f32(recip));
+    recip = vreinterpretq_f32_u32(
+        vandq_u32(vmvnq_u32(div_by_zero), vreinterpretq_u32_f32(recip)));
+
+    recip = vmulq_f32(
+        vrsqrtsq_f32(vmulq_f32(recip, recip), vreinterpretq_f32_m128(in)),
+        recip);
+    // Additional Netwon-Raphson iteration for accuracy
+    recip = vmulq_f32(
+        vrsqrtsq_f32(vmulq_f32(recip, recip), vreinterpretq_f32_m128(in)),
+        recip);
+
+    // sqrt(s) = s * 1/sqrt(s)
+    return vreinterpretq_m128_f32(vmulq_f32(vreinterpretq_f32_m128(in), recip));
+#endif
+}
+
+// Compute the square root of the lower single-precision (32-bit) floating-point
+// element in a, store the result in the lower element of dst, and copy the
+// upper 3 packed elements from a to the upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_ss
+FORCE_INLINE __m128 _mm_sqrt_ss(__m128 in)
+{
+    float32_t value =
+        vgetq_lane_f32(vreinterpretq_f32_m128(_mm_sqrt_ps(in)), 0);
+    return vreinterpretq_m128_f32(
+        vsetq_lane_f32(value, vreinterpretq_f32_m128(in), 0));
+}
+
+// Store 128-bits (composed of 4 packed single-precision (32-bit) floating-point
+// elements) from a into memory. mem_addr must be aligned on a 16-byte boundary
+// or a general-protection exception may be generated.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_ps
+FORCE_INLINE void _mm_store_ps(float *p, __m128 a)
+{
+    vst1q_f32(p, vreinterpretq_f32_m128(a));
+}
+
+// Store the lower single-precision (32-bit) floating-point element from a into
+// 4 contiguous elements in memory. mem_addr must be aligned on a 16-byte
+// boundary or a general-protection exception may be generated.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_ps1
+FORCE_INLINE void _mm_store_ps1(float *p, __m128 a)
+{
+    float32_t a0 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 0);
+    vst1q_f32(p, vdupq_n_f32(a0));
+}
+
+// Store the lower single-precision (32-bit) floating-point element from a into
+// memory. mem_addr does not need to be aligned on any particular boundary.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_ss
+FORCE_INLINE void _mm_store_ss(float *p, __m128 a)
+{
+    vst1q_lane_f32(p, vreinterpretq_f32_m128(a), 0);
+}
+
+// Store the lower single-precision (32-bit) floating-point element from a into
+// 4 contiguous elements in memory. mem_addr must be aligned on a 16-byte
+// boundary or a general-protection exception may be generated.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store1_ps
+#define _mm_store1_ps _mm_store_ps1
+
+// Store the upper 2 single-precision (32-bit) floating-point elements from a
+// into memory.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeh_pi
+FORCE_INLINE void _mm_storeh_pi(__m64 *p, __m128 a)
+{
+    *p = vreinterpret_m64_f32(vget_high_f32(a));
+}
+
+// Store the lower 2 single-precision (32-bit) floating-point elements from a
+// into memory.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storel_pi
+FORCE_INLINE void _mm_storel_pi(__m64 *p, __m128 a)
+{
+    *p = vreinterpret_m64_f32(vget_low_f32(a));
+}
+
+// Store 4 single-precision (32-bit) floating-point elements from a into memory
+// in reverse order. mem_addr must be aligned on a 16-byte boundary or a
+// general-protection exception may be generated.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storer_ps
+FORCE_INLINE void _mm_storer_ps(float *p, __m128 a)
+{
+    float32x4_t tmp = vrev64q_f32(vreinterpretq_f32_m128(a));
+    float32x4_t rev = vextq_f32(tmp, tmp, 2);
+    vst1q_f32(p, rev);
+}
+
+// Store 128-bits (composed of 4 packed single-precision (32-bit) floating-point
+// elements) from a into memory. mem_addr does not need to be aligned on any
+// particular boundary.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_ps
+FORCE_INLINE void _mm_storeu_ps(float *p, __m128 a)
+{
+    vst1q_f32(p, vreinterpretq_f32_m128(a));
+}
+
+// Stores 16-bits of integer data a at the address p.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si16
+FORCE_INLINE void _mm_storeu_si16(void *p, __m128i a)
+{
+    vst1q_lane_s16((int16_t *) p, vreinterpretq_s16_m128i(a), 0);
+}
+
+// Stores 64-bits of integer data a at the address p.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si64
+FORCE_INLINE void _mm_storeu_si64(void *p, __m128i a)
+{
+    vst1q_lane_s64((int64_t *) p, vreinterpretq_s64_m128i(a), 0);
+}
+
+// Store 64-bits of integer data from a into memory using a non-temporal memory
+// hint.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_pi
+FORCE_INLINE void _mm_stream_pi(__m64 *p, __m64 a)
+{
+    vst1_s64((int64_t *) p, vreinterpret_s64_m64(a));
+}
+
+// Store 128-bits (composed of 4 packed single-precision (32-bit) floating-
+// point elements) from a into memory using a non-temporal memory hint.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_ps
+FORCE_INLINE void _mm_stream_ps(float *p, __m128 a)
+{
+#if __has_builtin(__builtin_nontemporal_store)
+    __builtin_nontemporal_store(a, (float32x4_t *) p);
+#else
+    vst1q_f32(p, vreinterpretq_f32_m128(a));
+#endif
+}
+
+// Subtract packed single-precision (32-bit) floating-point elements in b from
+// packed single-precision (32-bit) floating-point elements in a, and store the
+// results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_ps
+FORCE_INLINE __m128 _mm_sub_ps(__m128 a, __m128 b)
+{
+    return vreinterpretq_m128_f32(
+        vsubq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
+}
+
+// Subtract the lower single-precision (32-bit) floating-point element in b from
+// the lower single-precision (32-bit) floating-point element in a, store the
+// result in the lower element of dst, and copy the upper 3 packed elements from
+// a to the upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_ss
+FORCE_INLINE __m128 _mm_sub_ss(__m128 a, __m128 b)
+{
+    return _mm_move_ss(a, _mm_sub_ps(a, b));
+}
+
+// Macro: Transpose the 4x4 matrix formed by the 4 rows of single-precision
+// (32-bit) floating-point elements in row0, row1, row2, and row3, and store the
+// transposed matrix in these vectors (row0 now contains column 0, etc.).
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=MM_TRANSPOSE4_PS
+#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3)         \
+    do {                                                  \
+        float32x4x2_t ROW01 = vtrnq_f32(row0, row1);      \
+        float32x4x2_t ROW23 = vtrnq_f32(row2, row3);      \
+        row0 = vcombine_f32(vget_low_f32(ROW01.val[0]),   \
+                            vget_low_f32(ROW23.val[0]));  \
+        row1 = vcombine_f32(vget_low_f32(ROW01.val[1]),   \
+                            vget_low_f32(ROW23.val[1]));  \
+        row2 = vcombine_f32(vget_high_f32(ROW01.val[0]),  \
+                            vget_high_f32(ROW23.val[0])); \
+        row3 = vcombine_f32(vget_high_f32(ROW01.val[1]),  \
+                            vget_high_f32(ROW23.val[1])); \
+    } while (0)
+
+// according to the documentation, these intrinsics behave the same as the
+// non-'u' versions.  We'll just alias them here.
+#define _mm_ucomieq_ss _mm_comieq_ss
+#define _mm_ucomige_ss _mm_comige_ss
+#define _mm_ucomigt_ss _mm_comigt_ss
+#define _mm_ucomile_ss _mm_comile_ss
+#define _mm_ucomilt_ss _mm_comilt_ss
+#define _mm_ucomineq_ss _mm_comineq_ss
+
+// Return vector of type __m128i with undefined elements.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_undefined_si128
+FORCE_INLINE __m128i _mm_undefined_si128(void)
+{
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wuninitialized"
+#endif
+    __m128i a;
+#if defined(_MSC_VER)
+    a = _mm_setzero_si128();
+#endif
+    return a;
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
+}
+
+// Return vector of type __m128 with undefined elements.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_undefined_ps
+FORCE_INLINE __m128 _mm_undefined_ps(void)
+{
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wuninitialized"
+#endif
+    __m128 a;
+#if defined(_MSC_VER)
+    a = _mm_setzero_ps();
+#endif
+    return a;
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
+}
+
+// Unpack and interleave single-precision (32-bit) floating-point elements from
+// the high half a and b, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_ps
+FORCE_INLINE __m128 _mm_unpackhi_ps(__m128 a, __m128 b)
+{
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128_f32(
+        vzip2q_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
+#else
+    float32x2_t a1 = vget_high_f32(vreinterpretq_f32_m128(a));
+    float32x2_t b1 = vget_high_f32(vreinterpretq_f32_m128(b));
+    float32x2x2_t result = vzip_f32(a1, b1);
+    return vreinterpretq_m128_f32(vcombine_f32(result.val[0], result.val[1]));
+#endif
+}
+
+// Unpack and interleave single-precision (32-bit) floating-point elements from
+// the low half of a and b, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_ps
+FORCE_INLINE __m128 _mm_unpacklo_ps(__m128 a, __m128 b)
+{
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128_f32(
+        vzip1q_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
+#else
+    float32x2_t a1 = vget_low_f32(vreinterpretq_f32_m128(a));
+    float32x2_t b1 = vget_low_f32(vreinterpretq_f32_m128(b));
+    float32x2x2_t result = vzip_f32(a1, b1);
+    return vreinterpretq_m128_f32(vcombine_f32(result.val[0], result.val[1]));
+#endif
+}
+
+// Compute the bitwise XOR of packed single-precision (32-bit) floating-point
+// elements in a and b, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_ps
+FORCE_INLINE __m128 _mm_xor_ps(__m128 a, __m128 b)
+{
+    return vreinterpretq_m128_s32(
+        veorq_s32(vreinterpretq_s32_m128(a), vreinterpretq_s32_m128(b)));
+}
+
+/* SSE2 */
+
+// Add packed 16-bit integers in a and b, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi16
+FORCE_INLINE __m128i _mm_add_epi16(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s16(
+        vaddq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)));
+}
+
+// Add packed 32-bit integers in a and b, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi32
+FORCE_INLINE __m128i _mm_add_epi32(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s32(
+        vaddq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
+}
+
+// Add packed 64-bit integers in a and b, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi64
+FORCE_INLINE __m128i _mm_add_epi64(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s64(
+        vaddq_s64(vreinterpretq_s64_m128i(a), vreinterpretq_s64_m128i(b)));
+}
+
+// Add packed 8-bit integers in a and b, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi8
+FORCE_INLINE __m128i _mm_add_epi8(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s8(
+        vaddq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b)));
+}
+
+// Add packed double-precision (64-bit) floating-point elements in a and b, and
+// store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_pd
+FORCE_INLINE __m128d _mm_add_pd(__m128d a, __m128d b)
+{
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128d_f64(
+        vaddq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
+#else
+    double a0 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
+    double a1 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
+    double b0 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
+    double b1 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 1));
+    double c[2];
+    c[0] = a0 + b0;
+    c[1] = a1 + b1;
+    return vld1q_f32((float32_t *) c);
+#endif
+}
+
+// Add the lower double-precision (64-bit) floating-point element in a and b,
+// store the result in the lower element of dst, and copy the upper element from
+// a to the upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_sd
+FORCE_INLINE __m128d _mm_add_sd(__m128d a, __m128d b)
+{
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return _mm_move_sd(a, _mm_add_pd(a, b));
+#else
+    double a0, a1, b0;
+    a0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
+    a1 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
+    b0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
+    double c[2];
+    c[0] = a0 + b0;
+    c[1] = a1;
+    return vld1q_f32((float32_t *) c);
+#endif
+}
+
+// Add 64-bit integers a and b, and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_si64
+FORCE_INLINE __m64 _mm_add_si64(__m64 a, __m64 b)
+{
+    return vreinterpret_m64_s64(
+        vadd_s64(vreinterpret_s64_m64(a), vreinterpret_s64_m64(b)));
+}
+
+// Add packed signed 16-bit integers in a and b using saturation, and store the
+// results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epi16
+FORCE_INLINE __m128i _mm_adds_epi16(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s16(
+        vqaddq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)));
+}
+
+// Add packed signed 8-bit integers in a and b using saturation, and store the
+// results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epi8
+FORCE_INLINE __m128i _mm_adds_epi8(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s8(
+        vqaddq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b)));
+}
+
+// Add packed unsigned 16-bit integers in a and b using saturation, and store
+// the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epu16
+FORCE_INLINE __m128i _mm_adds_epu16(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_u16(
+        vqaddq_u16(vreinterpretq_u16_m128i(a), vreinterpretq_u16_m128i(b)));
+}
+
+// Add packed unsigned 8-bit integers in a and b using saturation, and store the
+// results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epu8
+FORCE_INLINE __m128i _mm_adds_epu8(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_u8(
+        vqaddq_u8(vreinterpretq_u8_m128i(a), vreinterpretq_u8_m128i(b)));
+}
+
+// Compute the bitwise AND of packed double-precision (64-bit) floating-point
+// elements in a and b, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_and_pd
+FORCE_INLINE __m128d _mm_and_pd(__m128d a, __m128d b)
+{
+    return vreinterpretq_m128d_s64(
+        vandq_s64(vreinterpretq_s64_m128d(a), vreinterpretq_s64_m128d(b)));
+}
+
+// Compute the bitwise AND of 128 bits (representing integer data) in a and b,
+// and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_and_si128
+FORCE_INLINE __m128i _mm_and_si128(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s32(
+        vandq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
+}
+
+// Compute the bitwise NOT of packed double-precision (64-bit) floating-point
+// elements in a and then AND with b, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_andnot_pd
+FORCE_INLINE __m128d _mm_andnot_pd(__m128d a, __m128d b)
+{
+    // *NOTE* argument swap
+    return vreinterpretq_m128d_s64(
+        vbicq_s64(vreinterpretq_s64_m128d(b), vreinterpretq_s64_m128d(a)));
+}
+
+// Compute the bitwise NOT of 128 bits (representing integer data) in a and then
+// AND with b, and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_andnot_si128
+FORCE_INLINE __m128i _mm_andnot_si128(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s32(
+        vbicq_s32(vreinterpretq_s32_m128i(b),
+                  vreinterpretq_s32_m128i(a)));  // *NOTE* argument swap
+}
+
+// Average packed unsigned 16-bit integers in a and b, and store the results in
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_avg_epu16
+FORCE_INLINE __m128i _mm_avg_epu16(__m128i a, __m128i b)
+{
+    return (__m128i) vrhaddq_u16(vreinterpretq_u16_m128i(a),
+                                 vreinterpretq_u16_m128i(b));
+}
+
+// Average packed unsigned 8-bit integers in a and b, and store the results in
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_avg_epu8
+FORCE_INLINE __m128i _mm_avg_epu8(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_u8(
+        vrhaddq_u8(vreinterpretq_u8_m128i(a), vreinterpretq_u8_m128i(b)));
+}
+
+// Shift a left by imm8 bytes while shifting in zeros, and store the results in
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_bslli_si128
+#define _mm_bslli_si128(a, imm) _mm_slli_si128(a, imm)
+
+// Shift a right by imm8 bytes while shifting in zeros, and store the results in
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_bsrli_si128
+#define _mm_bsrli_si128(a, imm) _mm_srli_si128(a, imm)
+
+// Cast vector of type __m128d to type __m128. This intrinsic is only used for
+// compilation and does not generate any instructions, thus it has zero latency.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castpd_ps
+FORCE_INLINE __m128 _mm_castpd_ps(__m128d a)
+{
+    return vreinterpretq_m128_s64(vreinterpretq_s64_m128d(a));
+}
+
+// Cast vector of type __m128d to type __m128i. This intrinsic is only used for
+// compilation and does not generate any instructions, thus it has zero latency.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castpd_si128
+FORCE_INLINE __m128i _mm_castpd_si128(__m128d a)
+{
+    return vreinterpretq_m128i_s64(vreinterpretq_s64_m128d(a));
+}
+
+// Cast vector of type __m128 to type __m128d. This intrinsic is only used for
+// compilation and does not generate any instructions, thus it has zero latency.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castps_pd
+FORCE_INLINE __m128d _mm_castps_pd(__m128 a)
+{
+    return vreinterpretq_m128d_s32(vreinterpretq_s32_m128(a));
+}
+
+// Cast vector of type __m128 to type __m128i. This intrinsic is only used for
+// compilation and does not generate any instructions, thus it has zero latency.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castps_si128
+FORCE_INLINE __m128i _mm_castps_si128(__m128 a)
+{
+    return vreinterpretq_m128i_s32(vreinterpretq_s32_m128(a));
+}
+
+// Cast vector of type __m128i to type __m128d. This intrinsic is only used for
+// compilation and does not generate any instructions, thus it has zero latency.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castsi128_pd
+FORCE_INLINE __m128d _mm_castsi128_pd(__m128i a)
+{
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128d_f64(vreinterpretq_f64_m128i(a));
+#else
+    return vreinterpretq_m128d_f32(vreinterpretq_f32_m128i(a));
+#endif
+}
+
+// Cast vector of type __m128i to type __m128. This intrinsic is only used for
+// compilation and does not generate any instructions, thus it has zero latency.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castsi128_ps
+FORCE_INLINE __m128 _mm_castsi128_ps(__m128i a)
+{
+    return vreinterpretq_m128_s32(vreinterpretq_s32_m128i(a));
+}
+
+// Invalidate and flush the cache line that contains p from all levels of the
+// cache hierarchy.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clflush
+#if defined(__APPLE__)
+#include <libkern/OSCacheControl.h>
+#endif
+FORCE_INLINE void _mm_clflush(void const *p)
+{
+    (void) p;
+
+    /* sys_icache_invalidate is supported since macOS 10.5.
+     * However, it does not work on non-jailbroken iOS devices, although the
+     * compilation is successful.
+     */
+#if defined(__APPLE__)
+    sys_icache_invalidate((void *) (uintptr_t) p, SSE2NEON_CACHELINE_SIZE);
+#elif defined(__GNUC__) || defined(__clang__)
+    uintptr_t ptr = (uintptr_t) p;
+    __builtin___clear_cache((char *) ptr,
+                            (char *) ptr + SSE2NEON_CACHELINE_SIZE);
+#elif (_MSC_VER) && SSE2NEON_INCLUDE_WINDOWS_H
+    FlushInstructionCache(GetCurrentProcess(), p, SSE2NEON_CACHELINE_SIZE);
+#endif
+}
+
+// Compare packed 16-bit integers in a and b for equality, and store the results
+// in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi16
+FORCE_INLINE __m128i _mm_cmpeq_epi16(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_u16(
+        vceqq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)));
+}
+
+// Compare packed 32-bit integers in a and b for equality, and store the results
+// in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi32
+FORCE_INLINE __m128i _mm_cmpeq_epi32(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_u32(
+        vceqq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
+}
+
+// Compare packed 8-bit integers in a and b for equality, and store the results
+// in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi8
+FORCE_INLINE __m128i _mm_cmpeq_epi8(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_u8(
+        vceqq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b)));
+}
+
+// Compare packed double-precision (64-bit) floating-point elements in a and b
+// for equality, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_pd
+FORCE_INLINE __m128d _mm_cmpeq_pd(__m128d a, __m128d b)
+{
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128d_u64(
+        vceqq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
 #else
-    // use this line if testing on aarch64
-    int8x8x2_t a_split = {vget_low_s8(tbl), vget_high_s8(tbl)};
-    return vreinterpretq_m128i_s8(
-        vcombine_s8(vtbl2_s8(a_split, vget_low_u8(idx_masked)),
-                    vtbl2_s8(a_split, vget_high_u8(idx_masked))));
+    // (a == b) -> (a_lo == b_lo) && (a_hi == b_hi)
+    uint32x4_t cmp =
+        vceqq_u32(vreinterpretq_u32_m128d(a), vreinterpretq_u32_m128d(b));
+    uint32x4_t swapped = vrev64q_u32(cmp);
+    return vreinterpretq_m128d_u32(vandq_u32(cmp, swapped));
 #endif
 }
 
-// C equivalent:
-//   __m128i _mm_shuffle_epi32_default(__m128i a,
-//                                     __constrange(0, 255) int imm) {
-//       __m128i ret;
-//       ret[0] = a[imm        & 0x3];   ret[1] = a[(imm >> 2) & 0x3];
-//       ret[2] = a[(imm >> 4) & 0x03];  ret[3] = a[(imm >> 6) & 0x03];
-//       return ret;
-//   }
-#define _mm_shuffle_epi32_default(a, imm)                                   \
-    __extension__({                                                         \
-        int32x4_t ret;                                                      \
-        ret = vmovq_n_s32(                                                  \
-            vgetq_lane_s32(vreinterpretq_s32_m128i(a), (imm) & (0x3)));     \
-        ret = vsetq_lane_s32(                                               \
-            vgetq_lane_s32(vreinterpretq_s32_m128i(a), ((imm) >> 2) & 0x3), \
-            ret, 1);                                                        \
-        ret = vsetq_lane_s32(                                               \
-            vgetq_lane_s32(vreinterpretq_s32_m128i(a), ((imm) >> 4) & 0x3), \
-            ret, 2);                                                        \
-        ret = vsetq_lane_s32(                                               \
-            vgetq_lane_s32(vreinterpretq_s32_m128i(a), ((imm) >> 6) & 0x3), \
-            ret, 3);                                                        \
-        vreinterpretq_m128i_s32(ret);                                       \
-    })
+// Compare the lower double-precision (64-bit) floating-point elements in a and
+// b for equality, store the result in the lower element of dst, and copy the
+// upper element from a to the upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_sd
+FORCE_INLINE __m128d _mm_cmpeq_sd(__m128d a, __m128d b)
+{
+    return _mm_move_sd(a, _mm_cmpeq_pd(a, b));
+}
 
-// FORCE_INLINE __m128i _mm_shuffle_epi32_splat(__m128i a, __constrange(0,255)
-// int imm)
-#if defined(__aarch64__)
-#define _mm_shuffle_epi32_splat(a, imm)                          \
-    __extension__({                                              \
-        vreinterpretq_m128i_s32(                                 \
-            vdupq_laneq_s32(vreinterpretq_s32_m128i(a), (imm))); \
-    })
+// Compare packed double-precision (64-bit) floating-point elements in a and b
+// for greater-than-or-equal, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_pd
+FORCE_INLINE __m128d _mm_cmpge_pd(__m128d a, __m128d b)
+{
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128d_u64(
+        vcgeq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
 #else
-#define _mm_shuffle_epi32_splat(a, imm)                                      \
-    __extension__({                                                          \
-        vreinterpretq_m128i_s32(                                             \
-            vdupq_n_s32(vgetq_lane_s32(vreinterpretq_s32_m128i(a), (imm)))); \
-    })
+    double a0 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
+    double a1 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
+    double b0 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
+    double b1 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 1));
+    uint64_t d[2];
+    d[0] = a0 >= b0 ? ~UINT64_C(0) : UINT64_C(0);
+    d[1] = a1 >= b1 ? ~UINT64_C(0) : UINT64_C(0);
+
+    return vreinterpretq_m128d_u64(vld1q_u64(d));
 #endif
+}
 
-// Shuffles the 4 signed or unsigned 32-bit integers in a as specified by imm.
-// https://msdn.microsoft.com/en-us/library/56f67xbk%28v=vs.90%29.aspx
-// FORCE_INLINE __m128i _mm_shuffle_epi32(__m128i a,
-//                                        __constrange(0,255) int imm)
-#if __has_builtin(__builtin_shufflevector)
-#define _mm_shuffle_epi32(a, imm)                              \
-    __extension__({                                            \
-        int32x4_t _input = vreinterpretq_s32_m128i(a);         \
-        int32x4_t _shuf = __builtin_shufflevector(             \
-            _input, _input, (imm) & (0x3), ((imm) >> 2) & 0x3, \
-            ((imm) >> 4) & 0x3, ((imm) >> 6) & 0x3);           \
-        vreinterpretq_m128i_s32(_shuf);                        \
-    })
-#else  // generic
-#define _mm_shuffle_epi32(a, imm)                        \
-    __extension__({                                      \
-        __m128i ret;                                     \
-        switch (imm) {                                   \
-        case _MM_SHUFFLE(1, 0, 3, 2):                    \
-            ret = _mm_shuffle_epi_1032((a));             \
-            break;                                       \
-        case _MM_SHUFFLE(2, 3, 0, 1):                    \
-            ret = _mm_shuffle_epi_2301((a));             \
-            break;                                       \
-        case _MM_SHUFFLE(0, 3, 2, 1):                    \
-            ret = _mm_shuffle_epi_0321((a));             \
-            break;                                       \
-        case _MM_SHUFFLE(2, 1, 0, 3):                    \
-            ret = _mm_shuffle_epi_2103((a));             \
-            break;                                       \
-        case _MM_SHUFFLE(1, 0, 1, 0):                    \
-            ret = _mm_shuffle_epi_1010((a));             \
-            break;                                       \
-        case _MM_SHUFFLE(1, 0, 0, 1):                    \
-            ret = _mm_shuffle_epi_1001((a));             \
-            break;                                       \
-        case _MM_SHUFFLE(0, 1, 0, 1):                    \
-            ret = _mm_shuffle_epi_0101((a));             \
-            break;                                       \
-        case _MM_SHUFFLE(2, 2, 1, 1):                    \
-            ret = _mm_shuffle_epi_2211((a));             \
-            break;                                       \
-        case _MM_SHUFFLE(0, 1, 2, 2):                    \
-            ret = _mm_shuffle_epi_0122((a));             \
-            break;                                       \
-        case _MM_SHUFFLE(3, 3, 3, 2):                    \
-            ret = _mm_shuffle_epi_3332((a));             \
-            break;                                       \
-        case _MM_SHUFFLE(0, 0, 0, 0):                    \
-            ret = _mm_shuffle_epi32_splat((a), 0);       \
-            break;                                       \
-        case _MM_SHUFFLE(1, 1, 1, 1):                    \
-            ret = _mm_shuffle_epi32_splat((a), 1);       \
-            break;                                       \
-        case _MM_SHUFFLE(2, 2, 2, 2):                    \
-            ret = _mm_shuffle_epi32_splat((a), 2);       \
-            break;                                       \
-        case _MM_SHUFFLE(3, 3, 3, 3):                    \
-            ret = _mm_shuffle_epi32_splat((a), 3);       \
-            break;                                       \
-        default:                                         \
-            ret = _mm_shuffle_epi32_default((a), (imm)); \
-            break;                                       \
-        }                                                \
-        ret;                                             \
-    })
+// Compare the lower double-precision (64-bit) floating-point elements in a and
+// b for greater-than-or-equal, store the result in the lower element of dst,
+// and copy the upper element from a to the upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_sd
+FORCE_INLINE __m128d _mm_cmpge_sd(__m128d a, __m128d b)
+{
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return _mm_move_sd(a, _mm_cmpge_pd(a, b));
+#else
+    // expand "_mm_cmpge_pd()" to reduce unnecessary operations
+    double a0, b0;
+    a0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
+    uint64_t a1 = vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1);
+    b0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
+    uint64_t d[2];
+    d[0] = a0 >= b0 ? ~UINT64_C(0) : UINT64_C(0);
+    d[1] = a1;
+
+    return vreinterpretq_m128d_u64(vld1q_u64(d));
 #endif
+}
 
-// Shuffles the lower 4 signed or unsigned 16-bit integers in a as specified
-// by imm.
-// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/y41dkk37(v=vs.100)
-// FORCE_INLINE __m128i _mm_shufflelo_epi16_function(__m128i a,
-//                                                   __constrange(0,255) int
-//                                                   imm)
-#define _mm_shufflelo_epi16_function(a, imm)                                  \
-    __extension__({                                                           \
-        int16x8_t ret = vreinterpretq_s16_m128i(a);                           \
-        int16x4_t lowBits = vget_low_s16(ret);                                \
-        ret = vsetq_lane_s16(vget_lane_s16(lowBits, (imm) & (0x3)), ret, 0);  \
-        ret = vsetq_lane_s16(vget_lane_s16(lowBits, ((imm) >> 2) & 0x3), ret, \
-                             1);                                              \
-        ret = vsetq_lane_s16(vget_lane_s16(lowBits, ((imm) >> 4) & 0x3), ret, \
-                             2);                                              \
-        ret = vsetq_lane_s16(vget_lane_s16(lowBits, ((imm) >> 6) & 0x3), ret, \
-                             3);                                              \
-        vreinterpretq_m128i_s16(ret);                                         \
-    })
+// Compare packed signed 16-bit integers in a and b for greater-than, and store
+// the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi16
+FORCE_INLINE __m128i _mm_cmpgt_epi16(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_u16(
+        vcgtq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)));
+}
 
-// FORCE_INLINE __m128i _mm_shufflelo_epi16(__m128i a,
-//                                          __constrange(0,255) int imm)
-#if __has_builtin(__builtin_shufflevector)
-#define _mm_shufflelo_epi16(a, imm)                                  \
-    __extension__({                                                  \
-        int16x8_t _input = vreinterpretq_s16_m128i(a);               \
-        int16x8_t _shuf = __builtin_shufflevector(                   \
-            _input, _input, ((imm) & (0x3)), (((imm) >> 2) & 0x3),   \
-            (((imm) >> 4) & 0x3), (((imm) >> 6) & 0x3), 4, 5, 6, 7); \
-        vreinterpretq_m128i_s16(_shuf);                              \
-    })
-#else  // generic
-#define _mm_shufflelo_epi16(a, imm) _mm_shufflelo_epi16_function((a), (imm))
+// Compare packed signed 32-bit integers in a and b for greater-than, and store
+// the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi32
+FORCE_INLINE __m128i _mm_cmpgt_epi32(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_u32(
+        vcgtq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
+}
+
+// Compare packed signed 8-bit integers in a and b for greater-than, and store
+// the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi8
+FORCE_INLINE __m128i _mm_cmpgt_epi8(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_u8(
+        vcgtq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b)));
+}
+
+// Compare packed double-precision (64-bit) floating-point elements in a and b
+// for greater-than, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_pd
+FORCE_INLINE __m128d _mm_cmpgt_pd(__m128d a, __m128d b)
+{
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128d_u64(
+        vcgtq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
+#else
+    double a0 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
+    double a1 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
+    double b0 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
+    double b1 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 1));
+    uint64_t d[2];
+    d[0] = a0 > b0 ? ~UINT64_C(0) : UINT64_C(0);
+    d[1] = a1 > b1 ? ~UINT64_C(0) : UINT64_C(0);
+
+    return vreinterpretq_m128d_u64(vld1q_u64(d));
 #endif
+}
 
-// Shuffles the upper 4 signed or unsigned 16-bit integers in a as specified
-// by imm.
-// https://msdn.microsoft.com/en-us/library/13ywktbs(v=vs.100).aspx
-// FORCE_INLINE __m128i _mm_shufflehi_epi16_function(__m128i a,
-//                                                   __constrange(0,255) int
-//                                                   imm)
-#define _mm_shufflehi_epi16_function(a, imm)                                   \
-    __extension__({                                                            \
-        int16x8_t ret = vreinterpretq_s16_m128i(a);                            \
-        int16x4_t highBits = vget_high_s16(ret);                               \
-        ret = vsetq_lane_s16(vget_lane_s16(highBits, (imm) & (0x3)), ret, 4);  \
-        ret = vsetq_lane_s16(vget_lane_s16(highBits, ((imm) >> 2) & 0x3), ret, \
-                             5);                                               \
-        ret = vsetq_lane_s16(vget_lane_s16(highBits, ((imm) >> 4) & 0x3), ret, \
-                             6);                                               \
-        ret = vsetq_lane_s16(vget_lane_s16(highBits, ((imm) >> 6) & 0x3), ret, \
-                             7);                                               \
-        vreinterpretq_m128i_s16(ret);                                          \
-    })
+// Compare the lower double-precision (64-bit) floating-point elements in a and
+// b for greater-than, store the result in the lower element of dst, and copy
+// the upper element from a to the upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_sd
+FORCE_INLINE __m128d _mm_cmpgt_sd(__m128d a, __m128d b)
+{
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return _mm_move_sd(a, _mm_cmpgt_pd(a, b));
+#else
+    // expand "_mm_cmpge_pd()" to reduce unnecessary operations
+    double a0, b0;
+    a0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
+    uint64_t a1 = vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1);
+    b0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
+    uint64_t d[2];
+    d[0] = a0 > b0 ? ~UINT64_C(0) : UINT64_C(0);
+    d[1] = a1;
+
+    return vreinterpretq_m128d_u64(vld1q_u64(d));
+#endif
+}
 
-// FORCE_INLINE __m128i _mm_shufflehi_epi16(__m128i a,
-//                                          __constrange(0,255) int imm)
-#if __has_builtin(__builtin_shufflevector)
-#define _mm_shufflehi_epi16(a, imm)                             \
-    __extension__({                                             \
-        int16x8_t _input = vreinterpretq_s16_m128i(a);          \
-        int16x8_t _shuf = __builtin_shufflevector(              \
-            _input, _input, 0, 1, 2, 3, ((imm) & (0x3)) + 4,    \
-            (((imm) >> 2) & 0x3) + 4, (((imm) >> 4) & 0x3) + 4, \
-            (((imm) >> 6) & 0x3) + 4);                          \
-        vreinterpretq_m128i_s16(_shuf);                         \
-    })
-#else  // generic
-#define _mm_shufflehi_epi16(a, imm) _mm_shufflehi_epi16_function((a), (imm))
+// Compare packed double-precision (64-bit) floating-point elements in a and b
+// for less-than-or-equal, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_pd
+FORCE_INLINE __m128d _mm_cmple_pd(__m128d a, __m128d b)
+{
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128d_u64(
+        vcleq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
+#else
+    double a0 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
+    double a1 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
+    double b0 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
+    double b1 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 1));
+    uint64_t d[2];
+    d[0] = a0 <= b0 ? ~UINT64_C(0) : UINT64_C(0);
+    d[1] = a1 <= b1 ? ~UINT64_C(0) : UINT64_C(0);
+
+    return vreinterpretq_m128d_u64(vld1q_u64(d));
 #endif
+}
 
-// Blend packed 16-bit integers from a and b using control mask imm8, and store
-// the results in dst.
-//
-//   FOR j := 0 to 7
-//       i := j*16
-//       IF imm8[j]
-//           dst[i+15:i] := b[i+15:i]
-//       ELSE
-//           dst[i+15:i] := a[i+15:i]
-//       FI
-//   ENDFOR
-// FORCE_INLINE __m128i _mm_blend_epi16(__m128i a, __m128i b,
-//                                      __constrange(0,255) int imm)
-#define _mm_blend_epi16(a, b, imm)                                        \
-    __extension__({                                                       \
-        const uint16_t _mask[8] = {((imm) & (1 << 0)) ? 0xFFFF : 0x0000,  \
-                                   ((imm) & (1 << 1)) ? 0xFFFF : 0x0000,  \
-                                   ((imm) & (1 << 2)) ? 0xFFFF : 0x0000,  \
-                                   ((imm) & (1 << 3)) ? 0xFFFF : 0x0000,  \
-                                   ((imm) & (1 << 4)) ? 0xFFFF : 0x0000,  \
-                                   ((imm) & (1 << 5)) ? 0xFFFF : 0x0000,  \
-                                   ((imm) & (1 << 6)) ? 0xFFFF : 0x0000,  \
-                                   ((imm) & (1 << 7)) ? 0xFFFF : 0x0000}; \
-        uint16x8_t _mask_vec = vld1q_u16(_mask);                          \
-        uint16x8_t _a = vreinterpretq_u16_m128i(a);                       \
-        uint16x8_t _b = vreinterpretq_u16_m128i(b);                       \
-        vreinterpretq_m128i_u16(vbslq_u16(_mask_vec, _b, _a));            \
-    })
+// Compare the lower double-precision (64-bit) floating-point elements in a and
+// b for less-than-or-equal, store the result in the lower element of dst, and
+// copy the upper element from a to the upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_sd
+FORCE_INLINE __m128d _mm_cmple_sd(__m128d a, __m128d b)
+{
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return _mm_move_sd(a, _mm_cmple_pd(a, b));
+#else
+    // expand "_mm_cmpge_pd()" to reduce unnecessary operations
+    double a0, b0;
+    a0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
+    uint64_t a1 = vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1);
+    b0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
+    uint64_t d[2];
+    d[0] = a0 <= b0 ? ~UINT64_C(0) : UINT64_C(0);
+    d[1] = a1;
+
+    return vreinterpretq_m128d_u64(vld1q_u64(d));
+#endif
+}
 
-// Blend packed 8-bit integers from a and b using mask, and store the results in
-// dst.
-//
-//   FOR j := 0 to 15
-//       i := j*8
-//       IF mask[i+7]
-//           dst[i+7:i] := b[i+7:i]
-//       ELSE
-//           dst[i+7:i] := a[i+7:i]
-//       FI
-//   ENDFOR
-FORCE_INLINE __m128i _mm_blendv_epi8(__m128i _a, __m128i _b, __m128i _mask)
+// Compare packed signed 16-bit integers in a and b for less-than, and store the
+// results in dst. Note: This intrinsic emits the pcmpgtw instruction with the
+// order of the operands switched.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi16
+FORCE_INLINE __m128i _mm_cmplt_epi16(__m128i a, __m128i b)
 {
-    // Use a signed shift right to create a mask with the sign bit
-    uint8x16_t mask =
-        vreinterpretq_u8_s8(vshrq_n_s8(vreinterpretq_s8_m128i(_mask), 7));
-    uint8x16_t a = vreinterpretq_u8_m128i(_a);
-    uint8x16_t b = vreinterpretq_u8_m128i(_b);
-    return vreinterpretq_m128i_u8(vbslq_u8(mask, b, a));
+    return vreinterpretq_m128i_u16(
+        vcltq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)));
 }
 
-/* Shifts */
+// Compare packed signed 32-bit integers in a and b for less-than, and store the
+// results in dst. Note: This intrinsic emits the pcmpgtd instruction with the
+// order of the operands switched.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi32
+FORCE_INLINE __m128i _mm_cmplt_epi32(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_u32(
+        vcltq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
+}
 
+// Compare packed signed 8-bit integers in a and b for less-than, and store the
+// results in dst. Note: This intrinsic emits the pcmpgtb instruction with the
+// order of the operands switched.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi8
+FORCE_INLINE __m128i _mm_cmplt_epi8(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_u8(
+        vcltq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b)));
+}
 
-// Shift packed 16-bit integers in a right by imm while shifting in sign
-// bits, and store the results in dst.
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srai_epi16
-FORCE_INLINE __m128i _mm_srai_epi16(__m128i a, int imm)
+// Compare packed double-precision (64-bit) floating-point elements in a and b
+// for less-than, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_pd
+FORCE_INLINE __m128d _mm_cmplt_pd(__m128d a, __m128d b)
 {
-    const int count = (imm & ~15) ? 15 : imm;
-    return (__m128i) vshlq_s16((int16x8_t) a, vdupq_n_s16(-count));
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128d_u64(
+        vcltq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
+#else
+    double a0 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
+    double a1 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
+    double b0 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
+    double b1 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 1));
+    uint64_t d[2];
+    d[0] = a0 < b0 ? ~UINT64_C(0) : UINT64_C(0);
+    d[1] = a1 < b1 ? ~UINT64_C(0) : UINT64_C(0);
+
+    return vreinterpretq_m128d_u64(vld1q_u64(d));
+#endif
 }
 
-// Shifts the 8 signed or unsigned 16-bit integers in a left by count bits while
-// shifting in zeros.
-//
-//   r0 := a0 << count
-//   r1 := a1 << count
-//   ...
-//   r7 := a7 << count
-//
-// https://msdn.microsoft.com/en-us/library/es73bcsy(v=vs.90).aspx
-#define _mm_slli_epi16(a, imm)                                   \
-    __extension__({                                              \
-        __m128i ret;                                             \
-        if ((imm) <= 0) {                                        \
-            ret = a;                                             \
-        } else if ((imm) > 15) {                                 \
-            ret = _mm_setzero_si128();                           \
-        } else {                                                 \
-            ret = vreinterpretq_m128i_s16(                       \
-                vshlq_n_s16(vreinterpretq_s16_m128i(a), (imm))); \
-        }                                                        \
-        ret;                                                     \
-    })
+// Compare the lower double-precision (64-bit) floating-point elements in a and
+// b for less-than, store the result in the lower element of dst, and copy the
+// upper element from a to the upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_sd
+FORCE_INLINE __m128d _mm_cmplt_sd(__m128d a, __m128d b)
+{
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return _mm_move_sd(a, _mm_cmplt_pd(a, b));
+#else
+    double a0, b0;
+    a0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
+    uint64_t a1 = vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1);
+    b0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
+    uint64_t d[2];
+    d[0] = a0 < b0 ? ~UINT64_C(0) : UINT64_C(0);
+    d[1] = a1;
+
+    return vreinterpretq_m128d_u64(vld1q_u64(d));
+#endif
+}
 
-// Shifts the 4 signed or unsigned 32-bit integers in a left by count bits while
-// shifting in zeros. :
-// https://msdn.microsoft.com/en-us/library/z2k3bbtb%28v=vs.90%29.aspx
-// FORCE_INLINE __m128i _mm_slli_epi32(__m128i a, __constrange(0,255) int imm)
-FORCE_INLINE __m128i _mm_slli_epi32(__m128i a, int imm)
+// Compare packed double-precision (64-bit) floating-point elements in a and b
+// for not-equal, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_pd
+FORCE_INLINE __m128d _mm_cmpneq_pd(__m128d a, __m128d b)
 {
-    if (imm <= 0) /* TODO: add constant range macro: [0, 255] */
-        return a;
-    if (imm > 31) /* TODO: add unlikely macro */
-        return _mm_setzero_si128();
-    return vreinterpretq_m128i_s32(
-        vshlq_s32(vreinterpretq_s32_m128i(a), vdupq_n_s32(imm)));
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128d_s32(vmvnq_s32(vreinterpretq_s32_u64(
+        vceqq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)))));
+#else
+    // (a == b) -> (a_lo == b_lo) && (a_hi == b_hi)
+    uint32x4_t cmp =
+        vceqq_u32(vreinterpretq_u32_m128d(a), vreinterpretq_u32_m128d(b));
+    uint32x4_t swapped = vrev64q_u32(cmp);
+    return vreinterpretq_m128d_u32(vmvnq_u32(vandq_u32(cmp, swapped)));
+#endif
 }
 
-// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and
-// store the results in dst.
-FORCE_INLINE __m128i _mm_slli_epi64(__m128i a, int imm)
+// Compare the lower double-precision (64-bit) floating-point elements in a and
+// b for not-equal, store the result in the lower element of dst, and copy the
+// upper element from a to the upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_sd
+FORCE_INLINE __m128d _mm_cmpneq_sd(__m128d a, __m128d b)
 {
-    if (imm <= 0) /* TODO: add constant range macro: [0, 255] */
-        return a;
-    if (imm > 63) /* TODO: add unlikely macro */
-        return _mm_setzero_si128();
-    return vreinterpretq_m128i_s64(
-        vshlq_s64(vreinterpretq_s64_m128i(a), vdupq_n_s64(imm)));
+    return _mm_move_sd(a, _mm_cmpneq_pd(a, b));
 }
 
-// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and
-// store the results in dst.
-//
-//   FOR j := 0 to 7
-//     i := j*16
-//     IF imm8[7:0] > 15
-//       dst[i+15:i] := 0
-//     ELSE
-//       dst[i+15:i] := ZeroExtend16(a[i+15:i] >> imm8[7:0])
-//     FI
-//   ENDFOR
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi16
-#define _mm_srli_epi16(a, imm)                                             \
-    __extension__({                                                        \
-        __m128i ret;                                                       \
-        if ((imm) == 0) {                                                  \
-            ret = a;                                                       \
-        } else if (0 < (imm) && (imm) < 16) {                              \
-            ret = vreinterpretq_m128i_u16(                                 \
-                vshlq_u16(vreinterpretq_u16_m128i(a), vdupq_n_s16(-imm))); \
-        } else {                                                           \
-            ret = _mm_setzero_si128();                                     \
-        }                                                                  \
-        ret;                                                               \
-    })
+// Compare packed double-precision (64-bit) floating-point elements in a and b
+// for not-greater-than-or-equal, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnge_pd
+FORCE_INLINE __m128d _mm_cmpnge_pd(__m128d a, __m128d b)
+{
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128d_u64(veorq_u64(
+        vcgeq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)),
+        vdupq_n_u64(UINT64_MAX)));
+#else
+    double a0 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
+    double a1 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
+    double b0 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
+    double b1 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 1));
+    uint64_t d[2];
+    d[0] = !(a0 >= b0) ? ~UINT64_C(0) : UINT64_C(0);
+    d[1] = !(a1 >= b1) ? ~UINT64_C(0) : UINT64_C(0);
+
+    return vreinterpretq_m128d_u64(vld1q_u64(d));
+#endif
+}
 
-// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and
-// store the results in dst.
-//
-//   FOR j := 0 to 3
-//     i := j*32
-//     IF imm8[7:0] > 31
-//       dst[i+31:i] := 0
-//     ELSE
-//       dst[i+31:i] := ZeroExtend32(a[i+31:i] >> imm8[7:0])
-//     FI
-//   ENDFOR
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi32
-// FORCE_INLINE __m128i _mm_srli_epi32(__m128i a, __constrange(0,255) int imm)
-#define _mm_srli_epi32(a, imm)                                             \
-    __extension__({                                                        \
-        __m128i ret;                                                       \
-        if ((imm) == 0) {                                                  \
-            ret = a;                                                       \
-        } else if (0 < (imm) && (imm) < 32) {                              \
-            ret = vreinterpretq_m128i_u32(                                 \
-                vshlq_u32(vreinterpretq_u32_m128i(a), vdupq_n_s32(-imm))); \
-        } else {                                                           \
-            ret = _mm_setzero_si128();                                     \
-        }                                                                  \
-        ret;                                                               \
-    })
+// Compare the lower double-precision (64-bit) floating-point elements in a and
+// b for not-greater-than-or-equal, store the result in the lower element of
+// dst, and copy the upper element from a to the upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnge_sd
+FORCE_INLINE __m128d _mm_cmpnge_sd(__m128d a, __m128d b)
+{
+    return _mm_move_sd(a, _mm_cmpnge_pd(a, b));
+}
 
-// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and
-// store the results in dst.
-//
-//   FOR j := 0 to 1
-//     i := j*64
-//     IF imm8[7:0] > 63
-//       dst[i+63:i] := 0
-//     ELSE
-//       dst[i+63:i] := ZeroExtend64(a[i+63:i] >> imm8[7:0])
-//     FI
-//   ENDFOR
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi64
-#define _mm_srli_epi64(a, imm)                                             \
-    __extension__({                                                        \
-        __m128i ret;                                                       \
-        if ((imm) == 0) {                                                  \
-            ret = a;                                                       \
-        } else if (0 < (imm) && (imm) < 64) {                              \
-            ret = vreinterpretq_m128i_u64(                                 \
-                vshlq_u64(vreinterpretq_u64_m128i(a), vdupq_n_s64(-imm))); \
-        } else {                                                           \
-            ret = _mm_setzero_si128();                                     \
-        }                                                                  \
-        ret;                                                               \
-    })
+// Compare packed double-precision (64-bit) floating-point elements in a and b
+// for not-greater-than, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cmpngt_pd
+FORCE_INLINE __m128d _mm_cmpngt_pd(__m128d a, __m128d b)
+{
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128d_u64(veorq_u64(
+        vcgtq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)),
+        vdupq_n_u64(UINT64_MAX)));
+#else
+    double a0 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
+    double a1 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
+    double b0 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
+    double b1 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 1));
+    uint64_t d[2];
+    d[0] = !(a0 > b0) ? ~UINT64_C(0) : UINT64_C(0);
+    d[1] = !(a1 > b1) ? ~UINT64_C(0) : UINT64_C(0);
+
+    return vreinterpretq_m128d_u64(vld1q_u64(d));
+#endif
+}
+
+// Compare the lower double-precision (64-bit) floating-point elements in a and
+// b for not-greater-than, store the result in the lower element of dst, and
+// copy the upper element from a to the upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpngt_sd
+FORCE_INLINE __m128d _mm_cmpngt_sd(__m128d a, __m128d b)
+{
+    return _mm_move_sd(a, _mm_cmpngt_pd(a, b));
+}
+
+// Compare packed double-precision (64-bit) floating-point elements in a and b
+// for not-less-than-or-equal, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnle_pd
+FORCE_INLINE __m128d _mm_cmpnle_pd(__m128d a, __m128d b)
+{
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128d_u64(veorq_u64(
+        vcleq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)),
+        vdupq_n_u64(UINT64_MAX)));
+#else
+    double a0 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
+    double a1 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
+    double b0 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
+    double b1 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 1));
+    uint64_t d[2];
+    d[0] = !(a0 <= b0) ? ~UINT64_C(0) : UINT64_C(0);
+    d[1] = !(a1 <= b1) ? ~UINT64_C(0) : UINT64_C(0);
+
+    return vreinterpretq_m128d_u64(vld1q_u64(d));
+#endif
+}
+
+// Compare the lower double-precision (64-bit) floating-point elements in a and
+// b for not-less-than-or-equal, store the result in the lower element of dst,
+// and copy the upper element from a to the upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnle_sd
+FORCE_INLINE __m128d _mm_cmpnle_sd(__m128d a, __m128d b)
+{
+    return _mm_move_sd(a, _mm_cmpnle_pd(a, b));
+}
+
+// Compare packed double-precision (64-bit) floating-point elements in a and b
+// for not-less-than, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnlt_pd
+FORCE_INLINE __m128d _mm_cmpnlt_pd(__m128d a, __m128d b)
+{
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128d_u64(veorq_u64(
+        vcltq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)),
+        vdupq_n_u64(UINT64_MAX)));
+#else
+    double a0 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
+    double a1 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
+    double b0 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
+    double b1 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 1));
+    uint64_t d[2];
+    d[0] = !(a0 < b0) ? ~UINT64_C(0) : UINT64_C(0);
+    d[1] = !(a1 < b1) ? ~UINT64_C(0) : UINT64_C(0);
+
+    return vreinterpretq_m128d_u64(vld1q_u64(d));
+#endif
+}
+
+// Compare the lower double-precision (64-bit) floating-point elements in a and
+// b for not-less-than, store the result in the lower element of dst, and copy
+// the upper element from a to the upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnlt_sd
+FORCE_INLINE __m128d _mm_cmpnlt_sd(__m128d a, __m128d b)
+{
+    return _mm_move_sd(a, _mm_cmpnlt_pd(a, b));
+}
+
+// Compare packed double-precision (64-bit) floating-point elements in a and b
+// to see if neither is NaN, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpord_pd
+FORCE_INLINE __m128d _mm_cmpord_pd(__m128d a, __m128d b)
+{
+#if defined(__aarch64__) || defined(_M_ARM64)
+    // Excluding NaNs, any two floating point numbers can be compared.
+    uint64x2_t not_nan_a =
+        vceqq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(a));
+    uint64x2_t not_nan_b =
+        vceqq_f64(vreinterpretq_f64_m128d(b), vreinterpretq_f64_m128d(b));
+    return vreinterpretq_m128d_u64(vandq_u64(not_nan_a, not_nan_b));
+#else
+    double a0 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
+    double a1 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
+    double b0 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
+    double b1 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 1));
+    uint64_t d[2];
+    d[0] = (a0 == a0 && b0 == b0) ? ~UINT64_C(0) : UINT64_C(0);
+    d[1] = (a1 == a1 && b1 == b1) ? ~UINT64_C(0) : UINT64_C(0);
+
+    return vreinterpretq_m128d_u64(vld1q_u64(d));
+#endif
+}
+
+// Compare the lower double-precision (64-bit) floating-point elements in a and
+// b to see if neither is NaN, store the result in the lower element of dst, and
+// copy the upper element from a to the upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpord_sd
+FORCE_INLINE __m128d _mm_cmpord_sd(__m128d a, __m128d b)
+{
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return _mm_move_sd(a, _mm_cmpord_pd(a, b));
+#else
+    double a0, b0;
+    a0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
+    uint64_t a1 = vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1);
+    b0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
+    uint64_t d[2];
+    d[0] = (a0 == a0 && b0 == b0) ? ~UINT64_C(0) : UINT64_C(0);
+    d[1] = a1;
+
+    return vreinterpretq_m128d_u64(vld1q_u64(d));
+#endif
+}
+
+// Compare packed double-precision (64-bit) floating-point elements in a and b
+// to see if either is NaN, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpunord_pd
+FORCE_INLINE __m128d _mm_cmpunord_pd(__m128d a, __m128d b)
+{
+#if defined(__aarch64__) || defined(_M_ARM64)
+    // Two NaNs are not equal in comparison operation.
+    uint64x2_t not_nan_a =
+        vceqq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(a));
+    uint64x2_t not_nan_b =
+        vceqq_f64(vreinterpretq_f64_m128d(b), vreinterpretq_f64_m128d(b));
+    return vreinterpretq_m128d_s32(
+        vmvnq_s32(vreinterpretq_s32_u64(vandq_u64(not_nan_a, not_nan_b))));
+#else
+    double a0 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
+    double a1 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
+    double b0 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
+    double b1 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 1));
+    uint64_t d[2];
+    d[0] = (a0 == a0 && b0 == b0) ? UINT64_C(0) : ~UINT64_C(0);
+    d[1] = (a1 == a1 && b1 == b1) ? UINT64_C(0) : ~UINT64_C(0);
+
+    return vreinterpretq_m128d_u64(vld1q_u64(d));
+#endif
+}
+
+// Compare the lower double-precision (64-bit) floating-point elements in a and
+// b to see if either is NaN, store the result in the lower element of dst, and
+// copy the upper element from a to the upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpunord_sd
+FORCE_INLINE __m128d _mm_cmpunord_sd(__m128d a, __m128d b)
+{
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return _mm_move_sd(a, _mm_cmpunord_pd(a, b));
+#else
+    double a0, b0;
+    a0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
+    uint64_t a1 = vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1);
+    b0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
+    uint64_t d[2];
+    d[0] = (a0 == a0 && b0 == b0) ? UINT64_C(0) : ~UINT64_C(0);
+    d[1] = a1;
+
+    return vreinterpretq_m128d_u64(vld1q_u64(d));
+#endif
+}
+
+// Compare the lower double-precision (64-bit) floating-point element in a and b
+// for greater-than-or-equal, and return the boolean result (0 or 1).
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comige_sd
+FORCE_INLINE int _mm_comige_sd(__m128d a, __m128d b)
+{
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vgetq_lane_u64(vcgeq_f64(a, b), 0) & 0x1;
+#else
+    double a0, b0;
+    a0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
+    b0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
+    return a0 >= b0;
+#endif
+}
+
+// Compare the lower double-precision (64-bit) floating-point element in a and b
+// for greater-than, and return the boolean result (0 or 1).
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comigt_sd
+FORCE_INLINE int _mm_comigt_sd(__m128d a, __m128d b)
+{
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vgetq_lane_u64(vcgtq_f64(a, b), 0) & 0x1;
+#else
+    double a0, b0;
+    a0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
+    b0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
+
+    return a0 > b0;
+#endif
+}
+
+// Compare the lower double-precision (64-bit) floating-point element in a and b
+// for less-than-or-equal, and return the boolean result (0 or 1).
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comile_sd
+FORCE_INLINE int _mm_comile_sd(__m128d a, __m128d b)
+{
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vgetq_lane_u64(vcleq_f64(a, b), 0) & 0x1;
+#else
+    double a0, b0;
+    a0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
+    b0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
+
+    return a0 <= b0;
+#endif
+}
+
+// Compare the lower double-precision (64-bit) floating-point element in a and b
+// for less-than, and return the boolean result (0 or 1).
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comilt_sd
+FORCE_INLINE int _mm_comilt_sd(__m128d a, __m128d b)
+{
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vgetq_lane_u64(vcltq_f64(a, b), 0) & 0x1;
+#else
+    double a0, b0;
+    a0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
+    b0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
+
+    return a0 < b0;
+#endif
+}
 
-// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits,
-// and store the results in dst.
-//
-//   FOR j := 0 to 3
-//     i := j*32
-//     IF imm8[7:0] > 31
-//       dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0)
-//     ELSE
-//       dst[i+31:i] := SignExtend32(a[i+31:i] >> imm8[7:0])
-//     FI
-//   ENDFOR
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srai_epi32
-// FORCE_INLINE __m128i _mm_srai_epi32(__m128i a, __constrange(0,255) int imm)
-#define _mm_srai_epi32(a, imm)                                             \
-    __extension__({                                                        \
-        __m128i ret;                                                       \
-        if ((imm) == 0) {                                                  \
-            ret = a;                                                       \
-        } else if (0 < (imm) && (imm) < 32) {                              \
-            ret = vreinterpretq_m128i_s32(                                 \
-                vshlq_s32(vreinterpretq_s32_m128i(a), vdupq_n_s32(-imm))); \
-        } else {                                                           \
-            ret = vreinterpretq_m128i_s32(                                 \
-                vshrq_n_s32(vreinterpretq_s32_m128i(a), 31));              \
-        }                                                                  \
-        ret;                                                               \
-    })
+// Compare the lower double-precision (64-bit) floating-point element in a and b
+// for equality, and return the boolean result (0 or 1).
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comieq_sd
+FORCE_INLINE int _mm_comieq_sd(__m128d a, __m128d b)
+{
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vgetq_lane_u64(vceqq_f64(a, b), 0) & 0x1;
+#else
+    uint32x4_t a_not_nan =
+        vceqq_u32(vreinterpretq_u32_m128d(a), vreinterpretq_u32_m128d(a));
+    uint32x4_t b_not_nan =
+        vceqq_u32(vreinterpretq_u32_m128d(b), vreinterpretq_u32_m128d(b));
+    uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan);
+    uint32x4_t a_eq_b =
+        vceqq_u32(vreinterpretq_u32_m128d(a), vreinterpretq_u32_m128d(b));
+    uint64x2_t and_results = vandq_u64(vreinterpretq_u64_u32(a_and_b_not_nan),
+                                       vreinterpretq_u64_u32(a_eq_b));
+    return vgetq_lane_u64(and_results, 0) & 0x1;
+#endif
+}
 
-// Shifts the 128 - bit value in a right by imm bytes while shifting in
-// zeros.imm must be an immediate.
-//
-//   r := srl(a, imm*8)
-//
-// https://msdn.microsoft.com/en-us/library/305w28yz(v=vs.100).aspx
-// FORCE_INLINE _mm_srli_si128(__m128i a, __constrange(0,255) int imm)
-#define _mm_srli_si128(a, imm)                                              \
-    __extension__({                                                         \
-        __m128i ret;                                                        \
-        if ((imm) <= 0) {                                                   \
-            ret = a;                                                        \
-        } else if ((imm) > 15) {                                            \
-            ret = _mm_setzero_si128();                                      \
-        } else {                                                            \
-            ret = vreinterpretq_m128i_s8(                                   \
-                vextq_s8(vreinterpretq_s8_m128i(a), vdupq_n_s8(0), (imm))); \
-        }                                                                   \
-        ret;                                                                \
-    })
+// Compare the lower double-precision (64-bit) floating-point element in a and b
+// for not-equal, and return the boolean result (0 or 1).
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comineq_sd
+FORCE_INLINE int _mm_comineq_sd(__m128d a, __m128d b)
+{
+    return !_mm_comieq_sd(a, b);
+}
 
-// Shifts the 128-bit value in a left by imm bytes while shifting in zeros. imm
-// must be an immediate.
-//
-//   r := a << (imm * 8)
-//
-// https://msdn.microsoft.com/en-us/library/34d3k2kt(v=vs.100).aspx
-// FORCE_INLINE __m128i _mm_slli_si128(__m128i a, __constrange(0,255) int imm)
-#define _mm_slli_si128(a, imm)                                          \
-    __extension__({                                                     \
-        __m128i ret;                                                    \
-        if ((imm) <= 0) {                                               \
-            ret = a;                                                    \
-        } else if ((imm) > 15) {                                        \
-            ret = _mm_setzero_si128();                                  \
-        } else {                                                        \
-            ret = vreinterpretq_m128i_s8(vextq_s8(                      \
-                vdupq_n_s8(0), vreinterpretq_s8_m128i(a), 16 - (imm))); \
-        }                                                               \
-        ret;                                                            \
-    })
+// Convert packed signed 32-bit integers in a to packed double-precision
+// (64-bit) floating-point elements, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_pd
+FORCE_INLINE __m128d _mm_cvtepi32_pd(__m128i a)
+{
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128d_f64(
+        vcvtq_f64_s64(vmovl_s32(vget_low_s32(vreinterpretq_s32_m128i(a)))));
+#else
+    double a0 = (double) vgetq_lane_s32(vreinterpretq_s32_m128i(a), 0);
+    double a1 = (double) vgetq_lane_s32(vreinterpretq_s32_m128i(a), 1);
+    return _mm_set_pd(a1, a0);
+#endif
+}
 
-// Shifts the 8 signed or unsigned 16-bit integers in a left by count bits while
-// shifting in zeros.
-//
-//   r0 := a0 << count
-//   r1 := a1 << count
-//   ...
-//   r7 := a7 << count
-//
-// https://msdn.microsoft.com/en-us/library/c79w388h(v%3dvs.90).aspx
-FORCE_INLINE __m128i _mm_sll_epi16(__m128i a, __m128i count)
+// Convert packed signed 32-bit integers in a to packed single-precision
+// (32-bit) floating-point elements, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_ps
+FORCE_INLINE __m128 _mm_cvtepi32_ps(__m128i a)
 {
-    uint64_t c = vreinterpretq_nth_u64_m128i(count, 0);
-    if (c > 15)
-        return _mm_setzero_si128();
+    return vreinterpretq_m128_f32(vcvtq_f32_s32(vreinterpretq_s32_m128i(a)));
+}
 
-    int16x8_t vc = vdupq_n_s16((int16_t) c);
-    return vreinterpretq_m128i_s16(vshlq_s16(vreinterpretq_s16_m128i(a), vc));
+// Convert packed double-precision (64-bit) floating-point elements in a to
+// packed 32-bit integers, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epi32
+FORCE_INLINE_OPTNONE __m128i _mm_cvtpd_epi32(__m128d a)
+{
+// vrnd32xq_f64 not supported on clang
+#if defined(__ARM_FEATURE_FRINT) && !defined(__clang__)
+    float64x2_t rounded = vrnd32xq_f64(vreinterpretq_f64_m128d(a));
+    int64x2_t integers = vcvtq_s64_f64(rounded);
+    return vreinterpretq_m128i_s32(
+        vcombine_s32(vmovn_s64(integers), vdup_n_s32(0)));
+#else
+    __m128d rnd = _mm_round_pd(a, _MM_FROUND_CUR_DIRECTION);
+    double d0, d1;
+    d0 = sse2neon_recast_u64_f64(
+        vgetq_lane_u64(vreinterpretq_u64_m128d(rnd), 0));
+    d1 = sse2neon_recast_u64_f64(
+        vgetq_lane_u64(vreinterpretq_u64_m128d(rnd), 1));
+    return _mm_set_epi32(0, 0, (int32_t) d1, (int32_t) d0);
+#endif
 }
 
-// Shifts the 4 signed or unsigned 32-bit integers in a left by count bits while
-// shifting in zeros.
-//
-// r0 := a0 << count
-// r1 := a1 << count
-// r2 := a2 << count
-// r3 := a3 << count
-//
-// https://msdn.microsoft.com/en-us/library/6fe5a6s9(v%3dvs.90).aspx
-FORCE_INLINE __m128i _mm_sll_epi32(__m128i a, __m128i count)
+// Convert packed double-precision (64-bit) floating-point elements in a to
+// packed 32-bit integers, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_pi32
+FORCE_INLINE_OPTNONE __m64 _mm_cvtpd_pi32(__m128d a)
 {
-    uint64_t c = vreinterpretq_nth_u64_m128i(count, 0);
-    if (c > 31)
-        return _mm_setzero_si128();
+    __m128d rnd = _mm_round_pd(a, _MM_FROUND_CUR_DIRECTION);
+    double d0, d1;
+    d0 = sse2neon_recast_u64_f64(
+        vgetq_lane_u64(vreinterpretq_u64_m128d(rnd), 0));
+    d1 = sse2neon_recast_u64_f64(
+        vgetq_lane_u64(vreinterpretq_u64_m128d(rnd), 1));
+    int32_t ALIGN_STRUCT(16) data[2] = {(int32_t) d0, (int32_t) d1};
+    return vreinterpret_m64_s32(vld1_s32(data));
+}
 
-    int32x4_t vc = vdupq_n_s32((int32_t) c);
-    return vreinterpretq_m128i_s32(vshlq_s32(vreinterpretq_s32_m128i(a), vc));
+// Convert packed double-precision (64-bit) floating-point elements in a to
+// packed single-precision (32-bit) floating-point elements, and store the
+// results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_ps
+FORCE_INLINE __m128 _mm_cvtpd_ps(__m128d a)
+{
+#if defined(__aarch64__) || defined(_M_ARM64)
+    float32x2_t tmp = vcvt_f32_f64(vreinterpretq_f64_m128d(a));
+    return vreinterpretq_m128_f32(vcombine_f32(tmp, vdup_n_f32(0)));
+#else
+    double a0, a1;
+    a0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
+    a1 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
+    return _mm_set_ps(0, 0, (float) a1, (float) a0);
+#endif
 }
 
-// Shifts the 2 signed or unsigned 64-bit integers in a left by count bits while
-// shifting in zeros.
-//
-// r0 := a0 << count
-// r1 := a1 << count
-//
-// https://msdn.microsoft.com/en-us/library/6ta9dffd(v%3dvs.90).aspx
-FORCE_INLINE __m128i _mm_sll_epi64(__m128i a, __m128i count)
+// Convert packed signed 32-bit integers in a to packed double-precision
+// (64-bit) floating-point elements, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpi32_pd
+FORCE_INLINE __m128d _mm_cvtpi32_pd(__m64 a)
 {
-    uint64_t c = vreinterpretq_nth_u64_m128i(count, 0);
-    if (c > 63)
-        return _mm_setzero_si128();
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128d_f64(
+        vcvtq_f64_s64(vmovl_s32(vreinterpret_s32_m64(a))));
+#else
+    double a0 = (double) vget_lane_s32(vreinterpret_s32_m64(a), 0);
+    double a1 = (double) vget_lane_s32(vreinterpret_s32_m64(a), 1);
+    return _mm_set_pd(a1, a0);
+#endif
+}
 
-    int64x2_t vc = vdupq_n_s64((int64_t) c);
-    return vreinterpretq_m128i_s64(vshlq_s64(vreinterpretq_s64_m128i(a), vc));
+// Convert packed single-precision (32-bit) floating-point elements in a to
+// packed 32-bit integers, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epi32
+// *NOTE*. The default rounding mode on SSE is 'round to even', which ARMv7-A
+// does not support! It is supported on ARMv8-A however.
+FORCE_INLINE __m128i _mm_cvtps_epi32(__m128 a)
+{
+#if defined(__ARM_FEATURE_FRINT)
+    return vreinterpretq_m128i_s32(vcvtq_s32_f32(vrnd32xq_f32(a)));
+#elif (defined(__aarch64__) || defined(_M_ARM64)) || \
+    defined(__ARM_FEATURE_DIRECTED_ROUNDING)
+    switch (_MM_GET_ROUNDING_MODE()) {
+    case _MM_ROUND_NEAREST:
+        return vreinterpretq_m128i_s32(vcvtnq_s32_f32(a));
+    case _MM_ROUND_DOWN:
+        return vreinterpretq_m128i_s32(vcvtmq_s32_f32(a));
+    case _MM_ROUND_UP:
+        return vreinterpretq_m128i_s32(vcvtpq_s32_f32(a));
+    default:  // _MM_ROUND_TOWARD_ZERO
+        return vreinterpretq_m128i_s32(vcvtq_s32_f32(a));
+    }
+#else
+    float *f = (float *) &a;
+    switch (_MM_GET_ROUNDING_MODE()) {
+    case _MM_ROUND_NEAREST: {
+        uint32x4_t signmask = vdupq_n_u32(0x80000000);
+        float32x4_t half = vbslq_f32(signmask, vreinterpretq_f32_m128(a),
+                                     vdupq_n_f32(0.5f)); /* +/- 0.5 */
+        int32x4_t r_normal = vcvtq_s32_f32(vaddq_f32(
+            vreinterpretq_f32_m128(a), half)); /* round to integer: [a + 0.5]*/
+        int32x4_t r_trunc = vcvtq_s32_f32(
+            vreinterpretq_f32_m128(a)); /* truncate to integer: [a] */
+        int32x4_t plusone = vreinterpretq_s32_u32(vshrq_n_u32(
+            vreinterpretq_u32_s32(vnegq_s32(r_trunc)), 31)); /* 1 or 0 */
+        int32x4_t r_even = vbicq_s32(vaddq_s32(r_trunc, plusone),
+                                     vdupq_n_s32(1)); /* ([a] + {0,1}) & ~1 */
+        float32x4_t delta = vsubq_f32(
+            vreinterpretq_f32_m128(a),
+            vcvtq_f32_s32(r_trunc)); /* compute delta: delta = (a - [a]) */
+        uint32x4_t is_delta_half =
+            vceqq_f32(delta, half); /* delta == +/- 0.5 */
+        return vreinterpretq_m128i_s32(
+            vbslq_s32(is_delta_half, r_even, r_normal));
+    }
+    case _MM_ROUND_DOWN:
+        return _mm_set_epi32(floorf(f[3]), floorf(f[2]), floorf(f[1]),
+                             floorf(f[0]));
+    case _MM_ROUND_UP:
+        return _mm_set_epi32(ceilf(f[3]), ceilf(f[2]), ceilf(f[1]),
+                             ceilf(f[0]));
+    default:  // _MM_ROUND_TOWARD_ZERO
+        return _mm_set_epi32((int32_t) f[3], (int32_t) f[2], (int32_t) f[1],
+                             (int32_t) f[0]);
+    }
+#endif
 }
 
-// Shifts the 8 signed or unsigned 16-bit integers in a right by count bits
-// while shifting in zeros.
-//
-// r0 := srl(a0, count)
-// r1 := srl(a1, count)
-// ...
-// r7 := srl(a7, count)
-//
-// https://msdn.microsoft.com/en-us/library/wd5ax830(v%3dvs.90).aspx
-FORCE_INLINE __m128i _mm_srl_epi16(__m128i a, __m128i count)
+// Convert packed single-precision (32-bit) floating-point elements in a to
+// packed double-precision (64-bit) floating-point elements, and store the
+// results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_pd
+FORCE_INLINE __m128d _mm_cvtps_pd(__m128 a)
 {
-    uint64_t c = vreinterpretq_nth_u64_m128i(count, 0);
-    if (c > 15)
-        return _mm_setzero_si128();
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128d_f64(
+        vcvt_f64_f32(vget_low_f32(vreinterpretq_f32_m128(a))));
+#else
+    double a0 = (double) vgetq_lane_f32(vreinterpretq_f32_m128(a), 0);
+    double a1 = (double) vgetq_lane_f32(vreinterpretq_f32_m128(a), 1);
+    return _mm_set_pd(a1, a0);
+#endif
+}
 
-    int16x8_t vc = vdupq_n_s16(-(int16_t) c);
-    return vreinterpretq_m128i_u16(vshlq_u16(vreinterpretq_u16_m128i(a), vc));
+// Copy the lower double-precision (64-bit) floating-point element of a to dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_f64
+FORCE_INLINE double _mm_cvtsd_f64(__m128d a)
+{
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return (double) vgetq_lane_f64(vreinterpretq_f64_m128d(a), 0);
+#else
+    double _a =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
+    return _a;
+#endif
 }
 
-// Shifts the 4 signed or unsigned 32-bit integers in a right by count bits
-// while shifting in zeros.
-//
-// r0 := srl(a0, count)
-// r1 := srl(a1, count)
-// r2 := srl(a2, count)
-// r3 := srl(a3, count)
-//
-// https://msdn.microsoft.com/en-us/library/a9cbttf4(v%3dvs.90).aspx
-FORCE_INLINE __m128i _mm_srl_epi32(__m128i a, __m128i count)
+// Convert the lower double-precision (64-bit) floating-point element in a to a
+// 32-bit integer, and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_si32
+FORCE_INLINE int32_t _mm_cvtsd_si32(__m128d a)
 {
-    uint64_t c = vreinterpretq_nth_u64_m128i(count, 0);
-    if (c > 31)
-        return _mm_setzero_si128();
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return (int32_t) vgetq_lane_f64(vrndiq_f64(vreinterpretq_f64_m128d(a)), 0);
+#else
+    __m128d rnd = _mm_round_pd(a, _MM_FROUND_CUR_DIRECTION);
+    double ret = sse2neon_recast_u64_f64(
+        vgetq_lane_u64(vreinterpretq_u64_m128d(rnd), 0));
+    return (int32_t) ret;
+#endif
+}
 
-    int32x4_t vc = vdupq_n_s32(-(int32_t) c);
-    return vreinterpretq_m128i_u32(vshlq_u32(vreinterpretq_u32_m128i(a), vc));
+// Convert the lower double-precision (64-bit) floating-point element in a to a
+// 64-bit integer, and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_si64
+FORCE_INLINE int64_t _mm_cvtsd_si64(__m128d a)
+{
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return (int64_t) vgetq_lane_f64(vrndiq_f64(vreinterpretq_f64_m128d(a)), 0);
+#else
+    __m128d rnd = _mm_round_pd(a, _MM_FROUND_CUR_DIRECTION);
+    double ret = sse2neon_recast_u64_f64(
+        vgetq_lane_u64(vreinterpretq_u64_m128d(rnd), 0));
+    return (int64_t) ret;
+#endif
 }
 
-// Shifts the 2 signed or unsigned 64-bit integers in a right by count bits
-// while shifting in zeros.
-//
-// r0 := srl(a0, count)
-// r1 := srl(a1, count)
-//
-// https://msdn.microsoft.com/en-us/library/yf6cf9k8(v%3dvs.90).aspx
-FORCE_INLINE __m128i _mm_srl_epi64(__m128i a, __m128i count)
+// Convert the lower double-precision (64-bit) floating-point element in a to a
+// 64-bit integer, and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_si64x
+#define _mm_cvtsd_si64x _mm_cvtsd_si64
+
+// Convert the lower double-precision (64-bit) floating-point element in b to a
+// single-precision (32-bit) floating-point element, store the result in the
+// lower element of dst, and copy the upper 3 packed elements from a to the
+// upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_ss
+FORCE_INLINE __m128 _mm_cvtsd_ss(__m128 a, __m128d b)
 {
-    uint64_t c = vreinterpretq_nth_u64_m128i(count, 0);
-    if (c > 63)
-        return _mm_setzero_si128();
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128_f32(vsetq_lane_f32(
+        vget_lane_f32(vcvt_f32_f64(vreinterpretq_f64_m128d(b)), 0),
+        vreinterpretq_f32_m128(a), 0));
+#else
+    double b0 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
+    return vreinterpretq_m128_f32(
+        vsetq_lane_f32((float) b0, vreinterpretq_f32_m128(a), 0));
+#endif
+}
 
-    int64x2_t vc = vdupq_n_s64(-(int64_t) c);
-    return vreinterpretq_m128i_u64(vshlq_u64(vreinterpretq_u64_m128i(a), vc));
+// Copy the lower 32-bit integer in a to dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi128_si32
+FORCE_INLINE int _mm_cvtsi128_si32(__m128i a)
+{
+    return vgetq_lane_s32(vreinterpretq_s32_m128i(a), 0);
 }
 
-// NEON does not provide a version of this function.
-// Creates a 16-bit mask from the most significant bits of the 16 signed or
-// unsigned 8-bit integers in a and zero extends the upper bits.
-// https://msdn.microsoft.com/en-us/library/vstudio/s090c8fk(v=vs.100).aspx
-FORCE_INLINE int _mm_movemask_epi8(__m128i a)
+// Copy the lower 64-bit integer in a to dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi128_si64
+FORCE_INLINE int64_t _mm_cvtsi128_si64(__m128i a)
 {
-#if defined(__aarch64__)
-    uint8x16_t input = vreinterpretq_u8_m128i(a);
-    const int8_t ALIGN_STRUCT(16)
-        xr[16] = {-7, -6, -5, -4, -3, -2, -1, 0, -7, -6, -5, -4, -3, -2, -1, 0};
-    const uint8x16_t mask_and = vdupq_n_u8(0x80);
-    const int8x16_t mask_shift = vld1q_s8(xr);
-    const uint8x16_t mask_result =
-        vshlq_u8(vandq_u8(input, mask_and), mask_shift);
-    uint8x8_t lo = vget_low_u8(mask_result);
-    uint8x8_t hi = vget_high_u8(mask_result);
+    return vgetq_lane_s64(vreinterpretq_s64_m128i(a), 0);
+}
+
+// Copy the lower 64-bit integer in a to dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi128_si64x
+#define _mm_cvtsi128_si64x(a) _mm_cvtsi128_si64(a)
 
-    return vaddv_u8(lo) + (vaddv_u8(hi) << 8);
+// Convert the signed 32-bit integer b to a double-precision (64-bit)
+// floating-point element, store the result in the lower element of dst, and
+// copy the upper element from a to the upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi32_sd
+FORCE_INLINE __m128d _mm_cvtsi32_sd(__m128d a, int32_t b)
+{
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128d_f64(
+        vsetq_lane_f64((double) b, vreinterpretq_f64_m128d(a), 0));
 #else
-    // Use increasingly wide shifts+adds to collect the sign bits
-    // together.
-    // Since the widening shifts would be rather confusing to follow in little
-    // endian, everything will be illustrated in big endian order instead. This
-    // has a different result - the bits would actually be reversed on a big
-    // endian machine.
+    int64_t _b = sse2neon_recast_f64_s64((double) b);
+    return vreinterpretq_m128d_s64(
+        vsetq_lane_s64(_b, vreinterpretq_s64_m128d(a), 0));
+#endif
+}
 
-    // Starting input (only half the elements are shown):
-    // 89 ff 1d c0 00 10 99 33
-    uint8x16_t input = vreinterpretq_u8_m128i(a);
+// Copy the lower 64-bit integer in a to dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi128_si64x
+#define _mm_cvtsi128_si64x(a) _mm_cvtsi128_si64(a)
 
-    // Shift out everything but the sign bits with an unsigned shift right.
-    //
-    // Bytes of the vector::
-    // 89 ff 1d c0 00 10 99 33
-    // \  \  \  \  \  \  \  \    high_bits = (uint16x4_t)(input >> 7)
-    //  |  |  |  |  |  |  |  |
-    // 01 01 00 01 00 00 01 00
-    //
-    // Bits of first important lane(s):
-    // 10001001 (89)
-    // \______
-    //        |
-    // 00000001 (01)
-    uint16x8_t high_bits = vreinterpretq_u16_u8(vshrq_n_u8(input, 7));
+// Copy 32-bit integer a to the lower elements of dst, and zero the upper
+// elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi32_si128
+FORCE_INLINE __m128i _mm_cvtsi32_si128(int a)
+{
+    return vreinterpretq_m128i_s32(vsetq_lane_s32(a, vdupq_n_s32(0), 0));
+}
 
-    // Merge the even lanes together with a 16-bit unsigned shift right + add.
-    // 'xx' represents garbage data which will be ignored in the final result.
-    // In the important bytes, the add functions like a binary OR.
-    //
-    // 01 01 00 01 00 00 01 00
-    //  \_ |  \_ |  \_ |  \_ |   paired16 = (uint32x4_t)(input + (input >> 7))
-    //    \|    \|    \|    \|
-    // xx 03 xx 01 xx 00 xx 02
-    //
-    // 00000001 00000001 (01 01)
-    //        \_______ |
-    //                \|
-    // xxxxxxxx xxxxxx11 (xx 03)
-    uint32x4_t paired16 =
-        vreinterpretq_u32_u16(vsraq_n_u16(high_bits, high_bits, 7));
+// Convert the signed 64-bit integer b to a double-precision (64-bit)
+// floating-point element, store the result in the lower element of dst, and
+// copy the upper element from a to the upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi64_sd
+FORCE_INLINE __m128d _mm_cvtsi64_sd(__m128d a, int64_t b)
+{
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128d_f64(
+        vsetq_lane_f64((double) b, vreinterpretq_f64_m128d(a), 0));
+#else
+    int64_t _b = sse2neon_recast_f64_s64((double) b);
+    return vreinterpretq_m128d_s64(
+        vsetq_lane_s64(_b, vreinterpretq_s64_m128d(a), 0));
+#endif
+}
 
-    // Repeat with a wider 32-bit shift + add.
-    // xx 03 xx 01 xx 00 xx 02
-    //     \____ |     \____ |  paired32 = (uint64x1_t)(paired16 + (paired16 >>
-    //     14))
-    //          \|          \|
-    // xx xx xx 0d xx xx xx 02
-    //
-    // 00000011 00000001 (03 01)
-    //        \\_____ ||
-    //         '----.\||
-    // xxxxxxxx xxxx1101 (xx 0d)
-    uint64x2_t paired32 =
-        vreinterpretq_u64_u32(vsraq_n_u32(paired16, paired16, 14));
+// Copy 64-bit integer a to the lower element of dst, and zero the upper
+// element.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi64_si128
+FORCE_INLINE __m128i _mm_cvtsi64_si128(int64_t a)
+{
+    return vreinterpretq_m128i_s64(vsetq_lane_s64(a, vdupq_n_s64(0), 0));
+}
 
-    // Last, an even wider 64-bit shift + add to get our result in the low 8 bit
-    // lanes. xx xx xx 0d xx xx xx 02
-    //            \_________ |   paired64 = (uint8x8_t)(paired32 + (paired32 >>
-    //            28))
-    //                      \|
-    // xx xx xx xx xx xx xx d2
-    //
-    // 00001101 00000010 (0d 02)
-    //     \   \___ |  |
-    //      '---.  \|  |
-    // xxxxxxxx 11010010 (xx d2)
-    uint8x16_t paired64 =
-        vreinterpretq_u8_u64(vsraq_n_u64(paired32, paired32, 28));
+// Copy 64-bit integer a to the lower element of dst, and zero the upper
+// element.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi64x_si128
+#define _mm_cvtsi64x_si128(a) _mm_cvtsi64_si128(a)
 
-    // Extract the low 8 bits from each 64-bit lane with 2 8-bit extracts.
-    // xx xx xx xx xx xx xx d2
-    //                      ||  return paired64[0]
-    //                      d2
-    // Note: Little endian would return the correct value 4b (01001011) instead.
-    return vgetq_lane_u8(paired64, 0) | ((int) vgetq_lane_u8(paired64, 8) << 8);
+// Convert the signed 64-bit integer b to a double-precision (64-bit)
+// floating-point element, store the result in the lower element of dst, and
+// copy the upper element from a to the upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi64x_sd
+#define _mm_cvtsi64x_sd(a, b) _mm_cvtsi64_sd(a, b)
+
+// Convert the lower single-precision (32-bit) floating-point element in b to a
+// double-precision (64-bit) floating-point element, store the result in the
+// lower element of dst, and copy the upper element from a to the upper element
+// of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtss_sd
+FORCE_INLINE __m128d _mm_cvtss_sd(__m128d a, __m128 b)
+{
+    double d = (double) vgetq_lane_f32(vreinterpretq_f32_m128(b), 0);
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128d_f64(
+        vsetq_lane_f64(d, vreinterpretq_f64_m128d(a), 0));
+#else
+    return vreinterpretq_m128d_s64(vsetq_lane_s64(
+        sse2neon_recast_f64_s64(d), vreinterpretq_s64_m128d(a), 0));
 #endif
 }
 
-// Copy the lower 64-bit integer in a to dst.
-//
-//   dst[63:0] := a[63:0]
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movepi64_pi64
-FORCE_INLINE __m64 _mm_movepi64_pi64(__m128i a)
+// Convert packed double-precision (64-bit) floating-point elements in a to
+// packed 32-bit integers with truncation, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epi32
+FORCE_INLINE __m128i _mm_cvttpd_epi32(__m128d a)
+{
+    double a0, a1;
+    a0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
+    a1 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
+    return _mm_set_epi32(0, 0, (int32_t) a1, (int32_t) a0);
+}
+
+// Convert packed double-precision (64-bit) floating-point elements in a to
+// packed 32-bit integers with truncation, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_pi32
+FORCE_INLINE_OPTNONE __m64 _mm_cvttpd_pi32(__m128d a)
+{
+    double a0, a1;
+    a0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
+    a1 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
+    int32_t ALIGN_STRUCT(16) data[2] = {(int32_t) a0, (int32_t) a1};
+    return vreinterpret_m64_s32(vld1_s32(data));
+}
+
+// Convert packed single-precision (32-bit) floating-point elements in a to
+// packed 32-bit integers with truncation, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epi32
+FORCE_INLINE __m128i _mm_cvttps_epi32(__m128 a)
 {
-    return vreinterpret_m64_s64(vget_low_s64(vreinterpretq_s64_m128i(a)));
+    return vreinterpretq_m128i_s32(vcvtq_s32_f32(vreinterpretq_f32_m128(a)));
 }
 
-// Copy the 64-bit integer a to the lower element of dst, and zero the upper
-// element.
-//
-//   dst[63:0] := a[63:0]
-//   dst[127:64] := 0
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movpi64_epi64
-FORCE_INLINE __m128i _mm_movpi64_epi64(__m64 a)
+// Convert the lower double-precision (64-bit) floating-point element in a to a
+// 32-bit integer with truncation, and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_si32
+FORCE_INLINE int32_t _mm_cvttsd_si32(__m128d a)
 {
-    return vreinterpretq_m128i_s64(
-        vcombine_s64(vreinterpret_s64_m64(a), vdup_n_s64(0)));
+    double _a =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
+    return (int32_t) _a;
 }
 
-// NEON does not provide this method
-// Creates a 4-bit mask from the most significant bits of the four
-// single-precision, floating-point values.
-// https://msdn.microsoft.com/en-us/library/vstudio/4490ys29(v=vs.100).aspx
-FORCE_INLINE int _mm_movemask_ps(__m128 a)
+// Convert the lower double-precision (64-bit) floating-point element in a to a
+// 64-bit integer with truncation, and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_si64
+FORCE_INLINE int64_t _mm_cvttsd_si64(__m128d a)
 {
-    uint32x4_t input = vreinterpretq_u32_m128(a);
-#if defined(__aarch64__)
-    static const int32x4_t shift = {0, 1, 2, 3};
-    uint32x4_t tmp = vshrq_n_u32(input, 31);
-    return vaddvq_u32(vshlq_u32(tmp, shift));
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vgetq_lane_s64(vcvtq_s64_f64(vreinterpretq_f64_m128d(a)), 0);
 #else
-    // Uses the exact same method as _mm_movemask_epi8, see that for details.
-    // Shift out everything but the sign bits with a 32-bit unsigned shift
-    // right.
-    uint64x2_t high_bits = vreinterpretq_u64_u32(vshrq_n_u32(input, 31));
-    // Merge the two pairs together with a 64-bit unsigned shift right + add.
-    uint8x16_t paired =
-        vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31));
-    // Extract the result.
-    return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2);
+    double _a =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
+    return (int64_t) _a;
 #endif
 }
 
-// Compute the bitwise NOT of a and then AND with a 128-bit vector containing
-// all 1's, and return 1 if the result is zero, otherwise return 0.
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_test_all_ones
-FORCE_INLINE int _mm_test_all_ones(__m128i a)
+// Convert the lower double-precision (64-bit) floating-point element in a to a
+// 64-bit integer with truncation, and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_si64x
+#define _mm_cvttsd_si64x(a) _mm_cvttsd_si64(a)
+
+// Divide packed double-precision (64-bit) floating-point elements in a by
+// packed elements in b, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_div_pd
+FORCE_INLINE __m128d _mm_div_pd(__m128d a, __m128d b)
 {
-    return (uint64_t)(vgetq_lane_s64(a, 0) & vgetq_lane_s64(a, 1)) ==
-           ~(uint64_t) 0;
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128d_f64(
+        vdivq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
+#else
+    double a0 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
+    double a1 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
+    double b0 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
+    double b1 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 1));
+    double c[2];
+    c[0] = a0 / b0;
+    c[1] = a1 / b1;
+    return vld1q_f32((float32_t *) c);
+#endif
 }
 
-// Compute the bitwise AND of 128 bits (representing integer data) in a and
-// mask, and return 1 if the result is zero, otherwise return 0.
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_test_all_zeros
-FORCE_INLINE int _mm_test_all_zeros(__m128i a, __m128i mask)
+// Divide the lower double-precision (64-bit) floating-point element in a by the
+// lower double-precision (64-bit) floating-point element in b, store the result
+// in the lower element of dst, and copy the upper element from a to the upper
+// element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_div_sd
+FORCE_INLINE __m128d _mm_div_sd(__m128d a, __m128d b)
 {
-    int64x2_t a_and_mask =
-        vandq_s64(vreinterpretq_s64_m128i(a), vreinterpretq_s64_m128i(mask));
-    return (vgetq_lane_s64(a_and_mask, 0) | vgetq_lane_s64(a_and_mask, 1)) ? 0
-                                                                           : 1;
+#if defined(__aarch64__) || defined(_M_ARM64)
+    float64x2_t tmp =
+        vdivq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b));
+    return vreinterpretq_m128d_f64(
+        vsetq_lane_f64(vgetq_lane_f64(vreinterpretq_f64_m128d(a), 1), tmp, 1));
+#else
+    return _mm_move_sd(a, _mm_div_pd(a, b));
+#endif
 }
 
-/* Math operations */
+// Extract a 16-bit integer from a, selected with imm8, and store the result in
+// the lower element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_epi16
+// FORCE_INLINE int _mm_extract_epi16(__m128i a, __constrange(0,8) int imm)
+#define _mm_extract_epi16(a, imm) \
+    vgetq_lane_u16(vreinterpretq_u16_m128i(a), (imm))
 
-// Subtracts the four single-precision, floating-point values of a and b.
-//
-//   r0 := a0 - b0
-//   r1 := a1 - b1
-//   r2 := a2 - b2
-//   r3 := a3 - b3
-//
-// https://msdn.microsoft.com/en-us/library/vstudio/1zad2k61(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_sub_ps(__m128 a, __m128 b)
+// Copy a to dst, and insert the 16-bit integer i into dst at the location
+// specified by imm8.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_epi16
+// FORCE_INLINE __m128i _mm_insert_epi16(__m128i a, int b,
+//                                       __constrange(0,8) int imm)
+#define _mm_insert_epi16(a, b, imm) \
+    vreinterpretq_m128i_s16(        \
+        vsetq_lane_s16((b), vreinterpretq_s16_m128i(a), (imm)))
+
+// Load 128-bits (composed of 2 packed double-precision (64-bit) floating-point
+// elements) from memory into dst. mem_addr must be aligned on a 16-byte
+// boundary or a general-protection exception may be generated.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_pd
+FORCE_INLINE __m128d _mm_load_pd(const double *p)
 {
-    return vreinterpretq_m128_f32(
-        vsubq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128d_f64(vld1q_f64(p));
+#else
+    const float *fp = (const float *) p;
+    float ALIGN_STRUCT(16) data[4] = {fp[0], fp[1], fp[2], fp[3]};
+    return vreinterpretq_m128d_f32(vld1q_f32(data));
+#endif
 }
 
-// Subtract the lower single-precision (32-bit) floating-point element in b from
-// the lower single-precision (32-bit) floating-point element in a, store the
-// result in the lower element of dst, and copy the upper 3 packed elements from
-// a to the upper elements of dst.
-//
-//   dst[31:0] := a[31:0] - b[31:0]
-//   dst[127:32] := a[127:32]
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_ss
-FORCE_INLINE __m128 _mm_sub_ss(__m128 a, __m128 b)
-{
-    return _mm_move_ss(a, _mm_sub_ps(a, b));
-}
+// Load a double-precision (64-bit) floating-point element from memory into both
+// elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_pd1
+#define _mm_load_pd1 _mm_load1_pd
 
-// Subtract 2 packed 64-bit integers in b from 2 packed 64-bit integers in a,
-// and store the results in dst.
-//    r0 := a0 - b0
-//    r1 := a1 - b1
-FORCE_INLINE __m128i _mm_sub_epi64(__m128i a, __m128i b)
+// Load a double-precision (64-bit) floating-point element from memory into the
+// lower of dst, and zero the upper element. mem_addr does not need to be
+// aligned on any particular boundary.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_sd
+FORCE_INLINE __m128d _mm_load_sd(const double *p)
 {
-    return vreinterpretq_m128i_s64(
-        vsubq_s64(vreinterpretq_s64_m128i(a), vreinterpretq_s64_m128i(b)));
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128d_f64(vsetq_lane_f64(*p, vdupq_n_f64(0), 0));
+#else
+    const float *fp = (const float *) p;
+    float ALIGN_STRUCT(16) data[4] = {fp[0], fp[1], 0, 0};
+    return vreinterpretq_m128d_f32(vld1q_f32(data));
+#endif
 }
 
-// Subtracts the 4 signed or unsigned 32-bit integers of b from the 4 signed or
-// unsigned 32-bit integers of a.
-//
-//   r0 := a0 - b0
-//   r1 := a1 - b1
-//   r2 := a2 - b2
-//   r3 := a3 - b3
-//
-// https://msdn.microsoft.com/en-us/library/vstudio/fhh866h0(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_sub_epi32(__m128i a, __m128i b)
+// Load 128-bits of integer data from memory into dst. mem_addr must be aligned
+// on a 16-byte boundary or a general-protection exception may be generated.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_si128
+FORCE_INLINE __m128i _mm_load_si128(const __m128i *p)
 {
-    return vreinterpretq_m128i_s32(
-        vsubq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
+    return vreinterpretq_m128i_s32(vld1q_s32((const int32_t *) p));
 }
 
-FORCE_INLINE __m128i _mm_sub_epi16(__m128i a, __m128i b)
+// Load a double-precision (64-bit) floating-point element from memory into both
+// elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load1_pd
+FORCE_INLINE __m128d _mm_load1_pd(const double *p)
 {
-    return vreinterpretq_m128i_s16(
-        vsubq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)));
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128d_f64(vld1q_dup_f64(p));
+#else
+    return vreinterpretq_m128d_s64(vdupq_n_s64(*(const int64_t *) p));
+#endif
 }
 
-FORCE_INLINE __m128i _mm_sub_epi8(__m128i a, __m128i b)
+// Load a double-precision (64-bit) floating-point element from memory into the
+// upper element of dst, and copy the lower element from a to dst. mem_addr does
+// not need to be aligned on any particular boundary.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadh_pd
+FORCE_INLINE __m128d _mm_loadh_pd(__m128d a, const double *p)
 {
-    return vreinterpretq_m128i_s8(
-        vsubq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b)));
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128d_f64(
+        vcombine_f64(vget_low_f64(vreinterpretq_f64_m128d(a)), vld1_f64(p)));
+#else
+    return vreinterpretq_m128d_f32(vcombine_f32(
+        vget_low_f32(vreinterpretq_f32_m128d(a)), vld1_f32((const float *) p)));
+#endif
 }
 
-// Subtract 64-bit integer b from 64-bit integer a, and store the result in dst.
-//
-//   dst[63:0] := a[63:0] - b[63:0]
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_si64
-FORCE_INLINE __m64 _mm_sub_si64(__m64 a, __m64 b)
+// Load 64-bit integer from memory into the first element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadl_epi64
+FORCE_INLINE __m128i _mm_loadl_epi64(__m128i const *p)
 {
-    return vreinterpret_m64_s64(
-        vsub_s64(vreinterpret_s64_m64(a), vreinterpret_s64_m64(b)));
+    /* Load the lower 64 bits of the value pointed to by p into the
+     * lower 64 bits of the result, zeroing the upper 64 bits of the result.
+     */
+    return vreinterpretq_m128i_s32(
+        vcombine_s32(vld1_s32((int32_t const *) p), vcreate_s32(0)));
 }
 
-// Subtracts the 8 unsigned 16-bit integers of bfrom the 8 unsigned 16-bit
-// integers of a and saturates..
-// https://technet.microsoft.com/en-us/subscriptions/index/f44y0s19(v=vs.90).aspx
-FORCE_INLINE __m128i _mm_subs_epu16(__m128i a, __m128i b)
+// Load a double-precision (64-bit) floating-point element from memory into the
+// lower element of dst, and copy the upper element from a to dst. mem_addr does
+// not need to be aligned on any particular boundary.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadl_pd
+FORCE_INLINE __m128d _mm_loadl_pd(__m128d a, const double *p)
 {
-    return vreinterpretq_m128i_u16(
-        vqsubq_u16(vreinterpretq_u16_m128i(a), vreinterpretq_u16_m128i(b)));
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128d_f64(
+        vcombine_f64(vld1_f64(p), vget_high_f64(vreinterpretq_f64_m128d(a))));
+#else
+    return vreinterpretq_m128d_f32(
+        vcombine_f32(vld1_f32((const float *) p),
+                     vget_high_f32(vreinterpretq_f32_m128d(a))));
+#endif
 }
 
-// Subtracts the 16 unsigned 8-bit integers of b from the 16 unsigned 8-bit
-// integers of a and saturates.
-//
-//   r0 := UnsignedSaturate(a0 - b0)
-//   r1 := UnsignedSaturate(a1 - b1)
-//   ...
-//   r15 := UnsignedSaturate(a15 - b15)
-//
-// https://technet.microsoft.com/en-us/subscriptions/yadkxc18(v=vs.90)
-FORCE_INLINE __m128i _mm_subs_epu8(__m128i a, __m128i b)
+// Load 2 double-precision (64-bit) floating-point elements from memory into dst
+// in reverse order. mem_addr must be aligned on a 16-byte boundary or a
+// general-protection exception may be generated.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadr_pd
+FORCE_INLINE __m128d _mm_loadr_pd(const double *p)
 {
-    return vreinterpretq_m128i_u8(
-        vqsubq_u8(vreinterpretq_u8_m128i(a), vreinterpretq_u8_m128i(b)));
+#if defined(__aarch64__) || defined(_M_ARM64)
+    float64x2_t v = vld1q_f64(p);
+    return vreinterpretq_m128d_f64(vextq_f64(v, v, 1));
+#else
+    int64x2_t v = vld1q_s64((const int64_t *) p);
+    return vreinterpretq_m128d_s64(vextq_s64(v, v, 1));
+#endif
 }
 
-// Subtracts the 16 signed 8-bit integers of b from the 16 signed 8-bit integers
-// of a and saturates.
-//
-//   r0 := SignedSaturate(a0 - b0)
-//   r1 := SignedSaturate(a1 - b1)
-//   ...
-//   r15 := SignedSaturate(a15 - b15)
-//
-// https://technet.microsoft.com/en-us/subscriptions/by7kzks1(v=vs.90)
-FORCE_INLINE __m128i _mm_subs_epi8(__m128i a, __m128i b)
+// Loads two double-precision from unaligned memory, floating-point values.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_pd
+FORCE_INLINE __m128d _mm_loadu_pd(const double *p)
 {
-    return vreinterpretq_m128i_s8(
-        vqsubq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b)));
+    return _mm_load_pd(p);
 }
 
-// Subtracts the 8 signed 16-bit integers of b from the 8 signed 16-bit integers
-// of a and saturates.
-//
-//   r0 := SignedSaturate(a0 - b0)
-//   r1 := SignedSaturate(a1 - b1)
-//   ...
-//   r7 := SignedSaturate(a7 - b7)
-//
-// https://technet.microsoft.com/en-us/subscriptions/3247z5b8(v=vs.90)
-FORCE_INLINE __m128i _mm_subs_epi16(__m128i a, __m128i b)
+// Load 128-bits of integer data from memory into dst. mem_addr does not need to
+// be aligned on any particular boundary.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si128
+FORCE_INLINE __m128i _mm_loadu_si128(const __m128i *p)
 {
-    return vreinterpretq_m128i_s16(
-        vqsubq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)));
+    return vreinterpretq_m128i_s32(vld1q_s32((const unaligned_int32_t *) p));
 }
 
-FORCE_INLINE __m128i _mm_adds_epu16(__m128i a, __m128i b)
+// Load unaligned 32-bit integer from memory into the first element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si32
+FORCE_INLINE __m128i _mm_loadu_si32(const void *p)
 {
-    return vreinterpretq_m128i_u16(
-        vqaddq_u16(vreinterpretq_u16_m128i(a), vreinterpretq_u16_m128i(b)));
+    return vreinterpretq_m128i_s32(
+        vsetq_lane_s32(*(const unaligned_int32_t *) p, vdupq_n_s32(0), 0));
 }
 
-// Negate packed 8-bit integers in a when the corresponding signed
-// 8-bit integer in b is negative, and store the results in dst.
-// Element in dst are zeroed out when the corresponding element
-// in b is zero.
-//
-//   for i in 0..15
-//     if b[i] < 0
-//       r[i] := -a[i]
-//     else if b[i] == 0
-//       r[i] := 0
-//     else
-//       r[i] := a[i]
-//     fi
-//   done
-FORCE_INLINE __m128i _mm_sign_epi8(__m128i _a, __m128i _b)
+// Multiply packed signed 16-bit integers in a and b, producing intermediate
+// signed 32-bit integers. Horizontally add adjacent pairs of intermediate
+// 32-bit integers, and pack the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_madd_epi16
+FORCE_INLINE __m128i _mm_madd_epi16(__m128i a, __m128i b)
 {
-    int8x16_t a = vreinterpretq_s8_m128i(_a);
-    int8x16_t b = vreinterpretq_s8_m128i(_b);
-
-    // signed shift right: faster than vclt
-    // (b < 0) ? 0xFF : 0
-    uint8x16_t ltMask = vreinterpretq_u8_s8(vshrq_n_s8(b, 7));
+    int32x4_t low = vmull_s16(vget_low_s16(vreinterpretq_s16_m128i(a)),
+                              vget_low_s16(vreinterpretq_s16_m128i(b)));
+#if defined(__aarch64__) || defined(_M_ARM64)
+    int32x4_t high =
+        vmull_high_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b));
 
-    // (b == 0) ? 0xFF : 0
-#if defined(__aarch64__)
-    int8x16_t zeroMask = vreinterpretq_s8_u8(vceqzq_s8(b));
+    return vreinterpretq_m128i_s32(vpaddq_s32(low, high));
 #else
-    int8x16_t zeroMask = vreinterpretq_s8_u8(vceqq_s8(b, vdupq_n_s8(0)));
-#endif
+    int32x4_t high = vmull_s16(vget_high_s16(vreinterpretq_s16_m128i(a)),
+                               vget_high_s16(vreinterpretq_s16_m128i(b)));
 
-    // bitwise select either a or nagative 'a' (vnegq_s8(a) return nagative 'a')
-    // based on ltMask
-    int8x16_t masked = vbslq_s8(ltMask, vnegq_s8(a), a);
-    // res = masked & (~zeroMask)
-    int8x16_t res = vbicq_s8(masked, zeroMask);
+    int32x2_t low_sum = vpadd_s32(vget_low_s32(low), vget_high_s32(low));
+    int32x2_t high_sum = vpadd_s32(vget_low_s32(high), vget_high_s32(high));
 
-    return vreinterpretq_m128i_s8(res);
+    return vreinterpretq_m128i_s32(vcombine_s32(low_sum, high_sum));
+#endif
 }
 
-// Negate packed 16-bit integers in a when the corresponding signed
-// 16-bit integer in b is negative, and store the results in dst.
-// Element in dst are zeroed out when the corresponding element
-// in b is zero.
-//
-//   for i in 0..7
-//     if b[i] < 0
-//       r[i] := -a[i]
-//     else if b[i] == 0
-//       r[i] := 0
-//     else
-//       r[i] := a[i]
-//     fi
-//   done
-FORCE_INLINE __m128i _mm_sign_epi16(__m128i _a, __m128i _b)
+// Conditionally store 8-bit integer elements from a into memory using mask
+// (elements are not stored when the highest bit is not set in the corresponding
+// element) and a non-temporal memory hint. mem_addr does not need to be aligned
+// on any particular boundary.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskmoveu_si128
+FORCE_INLINE void _mm_maskmoveu_si128(__m128i a, __m128i mask, char *mem_addr)
 {
-    int16x8_t a = vreinterpretq_s16_m128i(_a);
-    int16x8_t b = vreinterpretq_s16_m128i(_b);
-
-    // signed shift right: faster than vclt
-    // (b < 0) ? 0xFFFF : 0
-    uint16x8_t ltMask = vreinterpretq_u16_s16(vshrq_n_s16(b, 15));
-    // (b == 0) ? 0xFFFF : 0
-#if defined(__aarch64__)
-    int16x8_t zeroMask = vreinterpretq_s16_u16(vceqzq_s16(b));
-#else
-    int16x8_t zeroMask = vreinterpretq_s16_u16(vceqq_s16(b, vdupq_n_s16(0)));
-#endif
+    int8x16_t shr_mask = vshrq_n_s8(vreinterpretq_s8_m128i(mask), 7);
+    __m128 b = _mm_load_ps((const float *) mem_addr);
+    int8x16_t masked =
+        vbslq_s8(vreinterpretq_u8_s8(shr_mask), vreinterpretq_s8_m128i(a),
+                 vreinterpretq_s8_m128(b));
+    vst1q_s8((int8_t *) mem_addr, masked);
+}
 
-    // bitwise select either a or negative 'a' (vnegq_s16(a) equals to negative
-    // 'a') based on ltMask
-    int16x8_t masked = vbslq_s16(ltMask, vnegq_s16(a), a);
-    // res = masked & (~zeroMask)
-    int16x8_t res = vbicq_s16(masked, zeroMask);
-    return vreinterpretq_m128i_s16(res);
+// Compare packed signed 16-bit integers in a and b, and store packed maximum
+// values in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi16
+FORCE_INLINE __m128i _mm_max_epi16(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s16(
+        vmaxq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)));
 }
 
-// Negate packed 32-bit integers in a when the corresponding signed
-// 32-bit integer in b is negative, and store the results in dst.
-// Element in dst are zeroed out when the corresponding element
-// in b is zero.
-//
-//   for i in 0..3
-//     if b[i] < 0
-//       r[i] := -a[i]
-//     else if b[i] == 0
-//       r[i] := 0
-//     else
-//       r[i] := a[i]
-//     fi
-//   done
-FORCE_INLINE __m128i _mm_sign_epi32(__m128i _a, __m128i _b)
+// Compare packed unsigned 8-bit integers in a and b, and store packed maximum
+// values in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu8
+FORCE_INLINE __m128i _mm_max_epu8(__m128i a, __m128i b)
 {
-    int32x4_t a = vreinterpretq_s32_m128i(_a);
-    int32x4_t b = vreinterpretq_s32_m128i(_b);
-
-    // signed shift right: faster than vclt
-    // (b < 0) ? 0xFFFFFFFF : 0
-    uint32x4_t ltMask = vreinterpretq_u32_s32(vshrq_n_s32(b, 31));
+    return vreinterpretq_m128i_u8(
+        vmaxq_u8(vreinterpretq_u8_m128i(a), vreinterpretq_u8_m128i(b)));
+}
 
-    // (b == 0) ? 0xFFFFFFFF : 0
-#if defined(__aarch64__)
-    int32x4_t zeroMask = vreinterpretq_s32_u32(vceqzq_s32(b));
+// Compare packed double-precision (64-bit) floating-point elements in a and b,
+// and store packed maximum values in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_pd
+FORCE_INLINE __m128d _mm_max_pd(__m128d a, __m128d b)
+{
+#if defined(__aarch64__) || defined(_M_ARM64)
+#if SSE2NEON_PRECISE_MINMAX
+    float64x2_t _a = vreinterpretq_f64_m128d(a);
+    float64x2_t _b = vreinterpretq_f64_m128d(b);
+    return vreinterpretq_m128d_f64(vbslq_f64(vcgtq_f64(_a, _b), _a, _b));
 #else
-    int32x4_t zeroMask = vreinterpretq_s32_u32(vceqq_s32(b, vdupq_n_s32(0)));
+    return vreinterpretq_m128d_f64(
+        vmaxq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
+#endif
+#else
+    double a0 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
+    double a1 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
+    double b0 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
+    double b1 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 1));
+    int64_t d[2];
+    d[0] = a0 > b0 ? sse2neon_recast_f64_s64(a0) : sse2neon_recast_f64_s64(b0);
+    d[1] = a1 > b1 ? sse2neon_recast_f64_s64(a1) : sse2neon_recast_f64_s64(b1);
+
+    return vreinterpretq_m128d_s64(vld1q_s64(d));
 #endif
-
-    // bitwise select either a or negative 'a' (vnegq_s32(a) equals to negative
-    // 'a') based on ltMask
-    int32x4_t masked = vbslq_s32(ltMask, vnegq_s32(a), a);
-    // res = masked & (~zeroMask)
-    int32x4_t res = vbicq_s32(masked, zeroMask);
-    return vreinterpretq_m128i_s32(res);
 }
 
-// Negate packed 16-bit integers in a when the corresponding signed 16-bit
-// integer in b is negative, and store the results in dst. Element in dst are
-// zeroed out when the corresponding element in b is zero.
-//
-//   FOR j := 0 to 3
-//      i := j*16
-//      IF b[i+15:i] < 0
-//        dst[i+15:i] := -(a[i+15:i])
-//      ELSE IF b[i+15:i] == 0
-//        dst[i+15:i] := 0
-//      ELSE
-//        dst[i+15:i] := a[i+15:i]
-//      FI
-//   ENDFOR
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_pi16
-FORCE_INLINE __m64 _mm_sign_pi16(__m64 _a, __m64 _b)
+// Compare the lower double-precision (64-bit) floating-point elements in a and
+// b, store the maximum value in the lower element of dst, and copy the upper
+// element from a to the upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_sd
+FORCE_INLINE __m128d _mm_max_sd(__m128d a, __m128d b)
 {
-    int16x4_t a = vreinterpret_s16_m64(_a);
-    int16x4_t b = vreinterpret_s16_m64(_b);
-
-    // signed shift right: faster than vclt
-    // (b < 0) ? 0xFFFF : 0
-    uint16x4_t ltMask = vreinterpret_u16_s16(vshr_n_s16(b, 15));
-
-    // (b == 0) ? 0xFFFF : 0
-#if defined(__aarch64__)
-    int16x4_t zeroMask = vreinterpret_s16_u16(vceqz_s16(b));
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return _mm_move_sd(a, _mm_max_pd(a, b));
 #else
-    int16x4_t zeroMask = vreinterpret_s16_u16(vceq_s16(b, vdup_n_s16(0)));
+    double a0, a1, b0;
+    a0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
+    a1 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
+    b0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
+    double c[2] = {a0 > b0 ? a0 : b0, a1};
+    return vreinterpretq_m128d_f32(vld1q_f32((float32_t *) c));
 #endif
+}
 
-    // bitwise select either a or nagative 'a' (vneg_s16(a) return nagative 'a')
-    // based on ltMask
-    int16x4_t masked = vbsl_s16(ltMask, vneg_s16(a), a);
-    // res = masked & (~zeroMask)
-    int16x4_t res = vbic_s16(masked, zeroMask);
-
-    return vreinterpret_m64_s16(res);
+// Compare packed signed 16-bit integers in a and b, and store packed minimum
+// values in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi16
+FORCE_INLINE __m128i _mm_min_epi16(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s16(
+        vminq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)));
 }
 
-// Negate packed 32-bit integers in a when the corresponding signed 32-bit
-// integer in b is negative, and store the results in dst. Element in dst are
-// zeroed out when the corresponding element in b is zero.
-//
-//   FOR j := 0 to 1
-//      i := j*32
-//      IF b[i+31:i] < 0
-//        dst[i+31:i] := -(a[i+31:i])
-//      ELSE IF b[i+31:i] == 0
-//        dst[i+31:i] := 0
-//      ELSE
-//        dst[i+31:i] := a[i+31:i]
-//      FI
-//   ENDFOR
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_pi32
-FORCE_INLINE __m64 _mm_sign_pi32(__m64 _a, __m64 _b)
+// Compare packed unsigned 8-bit integers in a and b, and store packed minimum
+// values in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu8
+FORCE_INLINE __m128i _mm_min_epu8(__m128i a, __m128i b)
 {
-    int32x2_t a = vreinterpret_s32_m64(_a);
-    int32x2_t b = vreinterpret_s32_m64(_b);
+    return vreinterpretq_m128i_u8(
+        vminq_u8(vreinterpretq_u8_m128i(a), vreinterpretq_u8_m128i(b)));
+}
 
-    // signed shift right: faster than vclt
-    // (b < 0) ? 0xFFFFFFFF : 0
-    uint32x2_t ltMask = vreinterpret_u32_s32(vshr_n_s32(b, 31));
+// Compare packed double-precision (64-bit) floating-point elements in a and b,
+// and store packed minimum values in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_pd
+FORCE_INLINE __m128d _mm_min_pd(__m128d a, __m128d b)
+{
+#if defined(__aarch64__) || defined(_M_ARM64)
+#if SSE2NEON_PRECISE_MINMAX
+    float64x2_t _a = vreinterpretq_f64_m128d(a);
+    float64x2_t _b = vreinterpretq_f64_m128d(b);
+    return vreinterpretq_m128d_f64(vbslq_f64(vcltq_f64(_a, _b), _a, _b));
+#else
+    return vreinterpretq_m128d_f64(
+        vminq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
+#endif
+#else
+    double a0 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
+    double a1 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
+    double b0 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
+    double b1 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 1));
+    int64_t d[2];
+    d[0] = a0 < b0 ? sse2neon_recast_f64_s64(a0) : sse2neon_recast_f64_s64(b0);
+    d[1] = a1 < b1 ? sse2neon_recast_f64_s64(a1) : sse2neon_recast_f64_s64(b1);
+    return vreinterpretq_m128d_s64(vld1q_s64(d));
+#endif
+}
 
-    // (b == 0) ? 0xFFFFFFFF : 0
-#if defined(__aarch64__)
-    int32x2_t zeroMask = vreinterpret_s32_u32(vceqz_s32(b));
+// Compare the lower double-precision (64-bit) floating-point elements in a and
+// b, store the minimum value in the lower element of dst, and copy the upper
+// element from a to the upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_sd
+FORCE_INLINE __m128d _mm_min_sd(__m128d a, __m128d b)
+{
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return _mm_move_sd(a, _mm_min_pd(a, b));
 #else
-    int32x2_t zeroMask = vreinterpret_s32_u32(vceq_s32(b, vdup_n_s32(0)));
+    double a0, a1, b0;
+    a0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
+    a1 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
+    b0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
+    double c[2] = {a0 < b0 ? a0 : b0, a1};
+    return vreinterpretq_m128d_f32(vld1q_f32((float32_t *) c));
 #endif
+}
 
-    // bitwise select either a or nagative 'a' (vneg_s32(a) return nagative 'a')
-    // based on ltMask
-    int32x2_t masked = vbsl_s32(ltMask, vneg_s32(a), a);
-    // res = masked & (~zeroMask)
-    int32x2_t res = vbic_s32(masked, zeroMask);
+// Copy the lower 64-bit integer in a to the lower element of dst, and zero the
+// upper element.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_move_epi64
+FORCE_INLINE __m128i _mm_move_epi64(__m128i a)
+{
+    return vreinterpretq_m128i_s64(
+        vsetq_lane_s64(0, vreinterpretq_s64_m128i(a), 1));
+}
 
-    return vreinterpret_m64_s32(res);
+// Move the lower double-precision (64-bit) floating-point element from b to the
+// lower element of dst, and copy the upper element from a to the upper element
+// of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_move_sd
+FORCE_INLINE __m128d _mm_move_sd(__m128d a, __m128d b)
+{
+    return vreinterpretq_m128d_f32(
+        vcombine_f32(vget_low_f32(vreinterpretq_f32_m128d(b)),
+                     vget_high_f32(vreinterpretq_f32_m128d(a))));
 }
 
-// Negate packed 8-bit integers in a when the corresponding signed 8-bit integer
-// in b is negative, and store the results in dst. Element in dst are zeroed out
-// when the corresponding element in b is zero.
-//
-//   FOR j := 0 to 7
-//      i := j*8
-//      IF b[i+7:i] < 0
-//        dst[i+7:i] := -(a[i+7:i])
-//      ELSE IF b[i+7:i] == 0
-//        dst[i+7:i] := 0
-//      ELSE
-//        dst[i+7:i] := a[i+7:i]
-//      FI
-//   ENDFOR
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_pi8
-FORCE_INLINE __m64 _mm_sign_pi8(__m64 _a, __m64 _b)
+// Create mask from the most significant bit of each 8-bit element in a, and
+// store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movemask_epi8
+FORCE_INLINE int _mm_movemask_epi8(__m128i a)
 {
-    int8x8_t a = vreinterpret_s8_m64(_a);
-    int8x8_t b = vreinterpret_s8_m64(_b);
+    // Use increasingly wide shifts+adds to collect the sign bits
+    // together.
+    // Since the widening shifts would be rather confusing to follow in little
+    // endian, everything will be illustrated in big endian order instead. This
+    // has a different result - the bits would actually be reversed on a big
+    // endian machine.
 
-    // signed shift right: faster than vclt
-    // (b < 0) ? 0xFF : 0
-    uint8x8_t ltMask = vreinterpret_u8_s8(vshr_n_s8(b, 7));
+    // Starting input (only half the elements are shown):
+    // 89 ff 1d c0 00 10 99 33
+    uint8x16_t input = vreinterpretq_u8_m128i(a);
 
-    // (b == 0) ? 0xFF : 0
-#if defined(__aarch64__)
-    int8x8_t zeroMask = vreinterpret_s8_u8(vceqz_s8(b));
-#else
-    int8x8_t zeroMask = vreinterpret_s8_u8(vceq_s8(b, vdup_n_s8(0)));
-#endif
+    // Shift out everything but the sign bits with an unsigned shift right.
+    //
+    // Bytes of the vector::
+    // 89 ff 1d c0 00 10 99 33
+    // \  \  \  \  \  \  \  \    high_bits = (uint16x4_t)(input >> 7)
+    //  |  |  |  |  |  |  |  |
+    // 01 01 00 01 00 00 01 00
+    //
+    // Bits of first important lane(s):
+    // 10001001 (89)
+    // \______
+    //        |
+    // 00000001 (01)
+    uint16x8_t high_bits = vreinterpretq_u16_u8(vshrq_n_u8(input, 7));
 
-    // bitwise select either a or nagative 'a' (vneg_s8(a) return nagative 'a')
-    // based on ltMask
-    int8x8_t masked = vbsl_s8(ltMask, vneg_s8(a), a);
-    // res = masked & (~zeroMask)
-    int8x8_t res = vbic_s8(masked, zeroMask);
+    // Merge the even lanes together with a 16-bit unsigned shift right + add.
+    // 'xx' represents garbage data which will be ignored in the final result.
+    // In the important bytes, the add functions like a binary OR.
+    //
+    // 01 01 00 01 00 00 01 00
+    //  \_ |  \_ |  \_ |  \_ |   paired16 = (uint32x4_t)(input + (input >> 7))
+    //    \|    \|    \|    \|
+    // xx 03 xx 01 xx 00 xx 02
+    //
+    // 00000001 00000001 (01 01)
+    //        \_______ |
+    //                \|
+    // xxxxxxxx xxxxxx11 (xx 03)
+    uint32x4_t paired16 =
+        vreinterpretq_u32_u16(vsraq_n_u16(high_bits, high_bits, 7));
 
-    return vreinterpret_m64_s8(res);
-}
+    // Repeat with a wider 32-bit shift + add.
+    // xx 03 xx 01 xx 00 xx 02
+    //     \____ |     \____ |  paired32 = (uint64x1_t)(paired16 + (paired16 >>
+    //     14))
+    //          \|          \|
+    // xx xx xx 0d xx xx xx 02
+    //
+    // 00000011 00000001 (03 01)
+    //        \\_____ ||
+    //         '----.\||
+    // xxxxxxxx xxxx1101 (xx 0d)
+    uint64x2_t paired32 =
+        vreinterpretq_u64_u32(vsraq_n_u32(paired16, paired16, 14));
 
-// Average packed unsigned 16-bit integers in a and b, and store the results in
-// dst.
-//
-//   FOR j := 0 to 3
-//     i := j*16
-//     dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) >> 1
-//   ENDFOR
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_avg_pu16
-FORCE_INLINE __m64 _mm_avg_pu16(__m64 a, __m64 b)
-{
-    return vreinterpret_m64_u16(
-        vrhadd_u16(vreinterpret_u16_m64(a), vreinterpret_u16_m64(b)));
+    // Last, an even wider 64-bit shift + add to get our result in the low 8 bit
+    // lanes. xx xx xx 0d xx xx xx 02
+    //            \_________ |   paired64 = (uint8x8_t)(paired32 + (paired32 >>
+    //            28))
+    //                      \|
+    // xx xx xx xx xx xx xx d2
+    //
+    // 00001101 00000010 (0d 02)
+    //     \   \___ |  |
+    //      '---.  \|  |
+    // xxxxxxxx 11010010 (xx d2)
+    uint8x16_t paired64 =
+        vreinterpretq_u8_u64(vsraq_n_u64(paired32, paired32, 28));
+
+    // Extract the low 8 bits from each 64-bit lane with 2 8-bit extracts.
+    // xx xx xx xx xx xx xx d2
+    //                      ||  return paired64[0]
+    //                      d2
+    // Note: Little endian would return the correct value 4b (01001011) instead.
+    return vgetq_lane_u8(paired64, 0) | ((int) vgetq_lane_u8(paired64, 8) << 8);
 }
 
-// Average packed unsigned 8-bit integers in a and b, and store the results in
-// dst.
-//
-//   FOR j := 0 to 7
-//     i := j*8
-//     dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) >> 1
-//   ENDFOR
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_avg_pu8
-FORCE_INLINE __m64 _mm_avg_pu8(__m64 a, __m64 b)
+// Set each bit of mask dst based on the most significant bit of the
+// corresponding packed double-precision (64-bit) floating-point element in a.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movemask_pd
+FORCE_INLINE int _mm_movemask_pd(__m128d a)
 {
-    return vreinterpret_m64_u8(
-        vrhadd_u8(vreinterpret_u8_m64(a), vreinterpret_u8_m64(b)));
+    uint64x2_t input = vreinterpretq_u64_m128d(a);
+    uint64x2_t high_bits = vshrq_n_u64(input, 63);
+    return (int) (vgetq_lane_u64(high_bits, 0) |
+                  (vgetq_lane_u64(high_bits, 1) << 1));
 }
 
-// Average packed unsigned 8-bit integers in a and b, and store the results in
-// dst.
-//
-//   FOR j := 0 to 7
-//     i := j*8
-//     dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) >> 1
-//   ENDFOR
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pavgb
-#define _m_pavgb(a, b) _mm_avg_pu8(a, b)
-
-// Average packed unsigned 16-bit integers in a and b, and store the results in
-// dst.
-//
-//   FOR j := 0 to 3
-//     i := j*16
-//     dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) >> 1
-//   ENDFOR
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pavgw
-#define _m_pavgw(a, b) _mm_avg_pu16(a, b)
-
-// Computes the average of the 16 unsigned 8-bit integers in a and the 16
-// unsigned 8-bit integers in b and rounds.
-//
-//   r0 := (a0 + b0) / 2
-//   r1 := (a1 + b1) / 2
-//   ...
-//   r15 := (a15 + b15) / 2
-//
-// https://msdn.microsoft.com/en-us/library/vstudio/8zwh554a(v%3dvs.90).aspx
-FORCE_INLINE __m128i _mm_avg_epu8(__m128i a, __m128i b)
+// Copy the lower 64-bit integer in a to dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi64_pi64
+FORCE_INLINE __m64 _mm_movepi64_pi64(__m128i a)
 {
-    return vreinterpretq_m128i_u8(
-        vrhaddq_u8(vreinterpretq_u8_m128i(a), vreinterpretq_u8_m128i(b)));
+    return vreinterpret_m64_s64(vget_low_s64(vreinterpretq_s64_m128i(a)));
 }
 
-// Computes the average of the 8 unsigned 16-bit integers in a and the 8
-// unsigned 16-bit integers in b and rounds.
-//
-//   r0 := (a0 + b0) / 2
-//   r1 := (a1 + b1) / 2
-//   ...
-//   r7 := (a7 + b7) / 2
-//
-// https://msdn.microsoft.com/en-us/library/vstudio/y13ca3c8(v=vs.90).aspx
-FORCE_INLINE __m128i _mm_avg_epu16(__m128i a, __m128i b)
+// Copy the 64-bit integer a to the lower element of dst, and zero the upper
+// element.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movpi64_epi64
+FORCE_INLINE __m128i _mm_movpi64_epi64(__m64 a)
 {
-    return (__m128i) vrhaddq_u16(vreinterpretq_u16_m128i(a),
-                                 vreinterpretq_u16_m128i(b));
+    return vreinterpretq_m128i_s64(
+        vcombine_s64(vreinterpret_s64_m64(a), vdup_n_s64(0)));
 }
 
-// Adds the four single-precision, floating-point values of a and b.
-//
-//   r0 := a0 + b0
-//   r1 := a1 + b1
-//   r2 := a2 + b2
-//   r3 := a3 + b3
-//
-// https://msdn.microsoft.com/en-us/library/vstudio/c9848chc(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_add_ps(__m128 a, __m128 b)
+// Multiply the low unsigned 32-bit integers from each packed 64-bit element in
+// a and b, and store the unsigned 64-bit results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_epu32
+FORCE_INLINE __m128i _mm_mul_epu32(__m128i a, __m128i b)
 {
-    return vreinterpretq_m128_f32(
-        vaddq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
+    // vmull_u32 upcasts instead of masking, so we downcast.
+    uint32x2_t a_lo = vmovn_u64(vreinterpretq_u64_m128i(a));
+    uint32x2_t b_lo = vmovn_u64(vreinterpretq_u64_m128i(b));
+    return vreinterpretq_m128i_u64(vmull_u32(a_lo, b_lo));
 }
 
-// Add packed double-precision (64-bit) floating-point elements in a and b, and
-// store the results in dst.
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_pd
-FORCE_INLINE __m128d _mm_add_pd(__m128d a, __m128d b)
+// Multiply packed double-precision (64-bit) floating-point elements in a and b,
+// and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_pd
+FORCE_INLINE __m128d _mm_mul_pd(__m128d a, __m128d b)
 {
-#if defined(__aarch64__)
+#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128d_f64(
-        vaddq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
+        vmulq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
 #else
-    double *da = (double *) &a;
-    double *db = (double *) &b;
+    double a0 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
+    double a1 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
+    double b0 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
+    double b1 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 1));
     double c[2];
-    c[0] = da[0] + db[0];
-    c[1] = da[1] + db[1];
+    c[0] = a0 * b0;
+    c[1] = a1 * b1;
     return vld1q_f32((float32_t *) c);
 #endif
 }
 
-// Add 64-bit integers a and b, and store the result in dst.
-//
-//   dst[63:0] := a[63:0] + b[63:0]
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_si64
-FORCE_INLINE __m64 _mm_add_si64(__m64 a, __m64 b)
+// Multiply the lower double-precision (64-bit) floating-point element in a and
+// b, store the result in the lower element of dst, and copy the upper element
+// from a to the upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mul_sd
+FORCE_INLINE __m128d _mm_mul_sd(__m128d a, __m128d b)
 {
-    return vreinterpret_m64_s64(
-        vadd_s64(vreinterpret_s64_m64(a), vreinterpret_s64_m64(b)));
+    return _mm_move_sd(a, _mm_mul_pd(a, b));
 }
 
-// adds the scalar single-precision floating point values of a and b.
-// https://msdn.microsoft.com/en-us/library/be94x2y6(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_add_ss(__m128 a, __m128 b)
+// Multiply the low unsigned 32-bit integers from a and b, and store the
+// unsigned 64-bit result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_su32
+FORCE_INLINE __m64 _mm_mul_su32(__m64 a, __m64 b)
 {
-    float32_t b0 = vgetq_lane_f32(vreinterpretq_f32_m128(b), 0);
-    float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0);
-    // the upper values in the result must be the remnants of <a>.
-    return vreinterpretq_m128_f32(vaddq_f32(a, value));
+    return vreinterpret_m64_u64(vget_low_u64(
+        vmull_u32(vreinterpret_u32_m64(a), vreinterpret_u32_m64(b))));
 }
 
-// Adds the 4 signed or unsigned 64-bit integers in a to the 4 signed or
-// unsigned 32-bit integers in b.
-// https://msdn.microsoft.com/en-us/library/vstudio/09xs4fkk(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_add_epi64(__m128i a, __m128i b)
+// Multiply the packed signed 16-bit integers in a and b, producing intermediate
+// 32-bit integers, and store the high 16 bits of the intermediate integers in
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhi_epi16
+FORCE_INLINE __m128i _mm_mulhi_epi16(__m128i a, __m128i b)
 {
-    return vreinterpretq_m128i_s64(
-        vaddq_s64(vreinterpretq_s64_m128i(a), vreinterpretq_s64_m128i(b)));
+    /* FIXME: issue with large values because of result saturation */
+    // int16x8_t ret = vqdmulhq_s16(vreinterpretq_s16_m128i(a),
+    // vreinterpretq_s16_m128i(b)); /* =2*a*b */ return
+    // vreinterpretq_m128i_s16(vshrq_n_s16(ret, 1));
+    int16x4_t a3210 = vget_low_s16(vreinterpretq_s16_m128i(a));
+    int16x4_t b3210 = vget_low_s16(vreinterpretq_s16_m128i(b));
+    int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */
+    int16x4_t a7654 = vget_high_s16(vreinterpretq_s16_m128i(a));
+    int16x4_t b7654 = vget_high_s16(vreinterpretq_s16_m128i(b));
+    int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */
+    uint16x8x2_t r =
+        vuzpq_u16(vreinterpretq_u16_s32(ab3210), vreinterpretq_u16_s32(ab7654));
+    return vreinterpretq_m128i_u16(r.val[1]);
 }
 
-// Adds the 4 signed or unsigned 32-bit integers in a to the 4 signed or
-// unsigned 32-bit integers in b.
-//
-//   r0 := a0 + b0
-//   r1 := a1 + b1
-//   r2 := a2 + b2
-//   r3 := a3 + b3
-//
-// https://msdn.microsoft.com/en-us/library/vstudio/09xs4fkk(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_add_epi32(__m128i a, __m128i b)
-{
-    return vreinterpretq_m128i_s32(
-        vaddq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
+// Multiply the packed unsigned 16-bit integers in a and b, producing
+// intermediate 32-bit integers, and store the high 16 bits of the intermediate
+// integers in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhi_epu16
+FORCE_INLINE __m128i _mm_mulhi_epu16(__m128i a, __m128i b)
+{
+    uint16x4_t a3210 = vget_low_u16(vreinterpretq_u16_m128i(a));
+    uint16x4_t b3210 = vget_low_u16(vreinterpretq_u16_m128i(b));
+    uint32x4_t ab3210 = vmull_u16(a3210, b3210);
+#if defined(__aarch64__) || defined(_M_ARM64)
+    uint32x4_t ab7654 =
+        vmull_high_u16(vreinterpretq_u16_m128i(a), vreinterpretq_u16_m128i(b));
+    uint16x8_t r = vuzp2q_u16(vreinterpretq_u16_u32(ab3210),
+                              vreinterpretq_u16_u32(ab7654));
+    return vreinterpretq_m128i_u16(r);
+#else
+    uint16x4_t a7654 = vget_high_u16(vreinterpretq_u16_m128i(a));
+    uint16x4_t b7654 = vget_high_u16(vreinterpretq_u16_m128i(b));
+    uint32x4_t ab7654 = vmull_u16(a7654, b7654);
+    uint16x8x2_t r =
+        vuzpq_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654));
+    return vreinterpretq_m128i_u16(r.val[1]);
+#endif
 }
 
-// Adds the 8 signed or unsigned 16-bit integers in a to the 8 signed or
-// unsigned 16-bit integers in b.
-// https://msdn.microsoft.com/en-us/library/fceha5k4(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_add_epi16(__m128i a, __m128i b)
+// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit
+// integers, and store the low 16 bits of the intermediate integers in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mullo_epi16
+FORCE_INLINE __m128i _mm_mullo_epi16(__m128i a, __m128i b)
 {
     return vreinterpretq_m128i_s16(
-        vaddq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)));
+        vmulq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)));
 }
 
-// Adds the 16 signed or unsigned 8-bit integers in a to the 16 signed or
-// unsigned 8-bit integers in b.
-// https://technet.microsoft.com/en-us/subscriptions/yc7tcyzs(v=vs.90)
-FORCE_INLINE __m128i _mm_add_epi8(__m128i a, __m128i b)
+// Compute the bitwise OR of packed double-precision (64-bit) floating-point
+// elements in a and b, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_or_pd
+FORCE_INLINE __m128d _mm_or_pd(__m128d a, __m128d b)
 {
-    return vreinterpretq_m128i_s8(
-        vaddq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b)));
+    return vreinterpretq_m128d_s64(
+        vorrq_s64(vreinterpretq_s64_m128d(a), vreinterpretq_s64_m128d(b)));
 }
 
-// Adds the 8 signed 16-bit integers in a to the 8 signed 16-bit integers in b
-// and saturates.
-//
-//   r0 := SignedSaturate(a0 + b0)
-//   r1 := SignedSaturate(a1 + b1)
-//   ...
-//   r7 := SignedSaturate(a7 + b7)
-//
-// https://msdn.microsoft.com/en-us/library/1a306ef8(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_adds_epi16(__m128i a, __m128i b)
+// Compute the bitwise OR of 128 bits (representing integer data) in a and b,
+// and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_si128
+FORCE_INLINE __m128i _mm_or_si128(__m128i a, __m128i b)
 {
-    return vreinterpretq_m128i_s16(
-        vqaddq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)));
+    return vreinterpretq_m128i_s32(
+        vorrq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
 }
 
-// Add packed signed 8-bit integers in a and b using saturation, and store the
-// results in dst.
-//
-//   FOR j := 0 to 15
-//     i := j*8
-//     dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] )
-//   ENDFOR
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epi8
-FORCE_INLINE __m128i _mm_adds_epi8(__m128i a, __m128i b)
+// Convert packed signed 16-bit integers from a and b to packed 8-bit integers
+// using signed saturation, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi16
+FORCE_INLINE __m128i _mm_packs_epi16(__m128i a, __m128i b)
 {
     return vreinterpretq_m128i_s8(
-        vqaddq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b)));
-}
-
-// Adds the 16 unsigned 8-bit integers in a to the 16 unsigned 8-bit integers in
-// b and saturates..
-// https://msdn.microsoft.com/en-us/library/9hahyddy(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_adds_epu8(__m128i a, __m128i b)
-{
-    return vreinterpretq_m128i_u8(
-        vqaddq_u8(vreinterpretq_u8_m128i(a), vreinterpretq_u8_m128i(b)));
+        vcombine_s8(vqmovn_s16(vreinterpretq_s16_m128i(a)),
+                    vqmovn_s16(vreinterpretq_s16_m128i(b))));
 }
 
-// Multiplies the 8 signed or unsigned 16-bit integers from a by the 8 signed or
-// unsigned 16-bit integers from b.
-//
-//   r0 := (a0 * b0)[15:0]
-//   r1 := (a1 * b1)[15:0]
-//   ...
-//   r7 := (a7 * b7)[15:0]
-//
-// https://msdn.microsoft.com/en-us/library/vstudio/9ks1472s(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_mullo_epi16(__m128i a, __m128i b)
+// Convert packed signed 32-bit integers from a and b to packed 16-bit integers
+// using signed saturation, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi32
+FORCE_INLINE __m128i _mm_packs_epi32(__m128i a, __m128i b)
 {
     return vreinterpretq_m128i_s16(
-        vmulq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)));
-}
-
-// Multiplies the 4 signed or unsigned 32-bit integers from a by the 4 signed or
-// unsigned 32-bit integers from b.
-// https://msdn.microsoft.com/en-us/library/vstudio/bb531409(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_mullo_epi32(__m128i a, __m128i b)
-{
-    return vreinterpretq_m128i_s32(
-        vmulq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
+        vcombine_s16(vqmovn_s32(vreinterpretq_s32_m128i(a)),
+                     vqmovn_s32(vreinterpretq_s32_m128i(b))));
 }
 
-// Multiply the packed unsigned 16-bit integers in a and b, producing
-// intermediate 32-bit integers, and store the high 16 bits of the intermediate
-// integers in dst.
-//
-//   FOR j := 0 to 3
-//      i := j*16
-//      tmp[31:0] := a[i+15:i] * b[i+15:i]
-//      dst[i+15:i] := tmp[31:16]
-//   ENDFOR
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pmulhuw
-#define _m_pmulhuw(a, b) _mm_mulhi_pu16(a, b)
-
-// Multiplies the four single-precision, floating-point values of a and b.
-//
-//   r0 := a0 * b0
-//   r1 := a1 * b1
-//   r2 := a2 * b2
-//   r3 := a3 * b3
-//
-// https://msdn.microsoft.com/en-us/library/vstudio/22kbk6t9(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_mul_ps(__m128 a, __m128 b)
+// Convert packed signed 16-bit integers from a and b to packed 8-bit integers
+// using unsigned saturation, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packus_epi16
+FORCE_INLINE __m128i _mm_packus_epi16(const __m128i a, const __m128i b)
 {
-    return vreinterpretq_m128_f32(
-        vmulq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
+    return vreinterpretq_m128i_u8(
+        vcombine_u8(vqmovun_s16(vreinterpretq_s16_m128i(a)),
+                    vqmovun_s16(vreinterpretq_s16_m128i(b))));
 }
 
-// Multiply the lower single-precision (32-bit) floating-point element in a and
-// b, store the result in the lower element of dst, and copy the upper 3 packed
-// elements from a to the upper elements of dst.
-//
-//   dst[31:0] := a[31:0] * b[31:0]
-//   dst[127:32] := a[127:32]
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_ss
-FORCE_INLINE __m128 _mm_mul_ss(__m128 a, __m128 b)
+// Pause the processor. This is typically used in spin-wait loops and depending
+// on the x86 processor typical values are in the 40-100 cycle range. The
+// 'yield' instruction isn't a good fit because it's effectively a nop on most
+// Arm cores. Experience with several databases has shown has shown an 'isb' is
+// a reasonable approximation.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_pause
+FORCE_INLINE void _mm_pause(void)
 {
-    return _mm_move_ss(a, _mm_mul_ps(a, b));
+#if defined(_MSC_VER) && !defined(__clang__)
+    __isb(_ARM64_BARRIER_SY);
+#else
+    __asm__ __volatile__("isb\n");
+#endif
 }
 
-// Multiply the low unsigned 32-bit integers from each packed 64-bit element in
-// a and b, and store the unsigned 64-bit results in dst.
-//
-//   r0 :=  (a0 & 0xFFFFFFFF) * (b0 & 0xFFFFFFFF)
-//   r1 :=  (a2 & 0xFFFFFFFF) * (b2 & 0xFFFFFFFF)
-FORCE_INLINE __m128i _mm_mul_epu32(__m128i a, __m128i b)
+// Compute the absolute differences of packed unsigned 8-bit integers in a and
+// b, then horizontally sum each consecutive 8 differences to produce two
+// unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low
+// 16 bits of 64-bit elements in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sad_epu8
+FORCE_INLINE __m128i _mm_sad_epu8(__m128i a, __m128i b)
 {
-    // vmull_u32 upcasts instead of masking, so we downcast.
-    uint32x2_t a_lo = vmovn_u64(vreinterpretq_u64_m128i(a));
-    uint32x2_t b_lo = vmovn_u64(vreinterpretq_u64_m128i(b));
-    return vreinterpretq_m128i_u64(vmull_u32(a_lo, b_lo));
+    uint16x8_t t = vpaddlq_u8(vabdq_u8((uint8x16_t) a, (uint8x16_t) b));
+    return vreinterpretq_m128i_u64(vpaddlq_u32(vpaddlq_u16(t)));
 }
 
-// Multiply the low unsigned 32-bit integers from a and b, and store the
-// unsigned 64-bit result in dst.
-//
-//   dst[63:0] := a[31:0] * b[31:0]
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_su32
-FORCE_INLINE __m64 _mm_mul_su32(__m64 a, __m64 b)
+// Set packed 16-bit integers in dst with the supplied values.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi16
+FORCE_INLINE __m128i _mm_set_epi16(short i7,
+                                   short i6,
+                                   short i5,
+                                   short i4,
+                                   short i3,
+                                   short i2,
+                                   short i1,
+                                   short i0)
 {
-    return vreinterpret_m64_u64(vget_low_u64(
-        vmull_u32(vreinterpret_u32_m64(a), vreinterpret_u32_m64(b))));
+    int16_t ALIGN_STRUCT(16) data[8] = {i0, i1, i2, i3, i4, i5, i6, i7};
+    return vreinterpretq_m128i_s16(vld1q_s16(data));
 }
 
-// Multiply the low signed 32-bit integers from each packed 64-bit element in
-// a and b, and store the signed 64-bit results in dst.
-//
-//   r0 :=  (int64_t)(int32_t)a0 * (int64_t)(int32_t)b0
-//   r1 :=  (int64_t)(int32_t)a2 * (int64_t)(int32_t)b2
-FORCE_INLINE __m128i _mm_mul_epi32(__m128i a, __m128i b)
+// Set packed 32-bit integers in dst with the supplied values.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi32
+FORCE_INLINE __m128i _mm_set_epi32(int i3, int i2, int i1, int i0)
 {
-    // vmull_s32 upcasts instead of masking, so we downcast.
-    int32x2_t a_lo = vmovn_s64(vreinterpretq_s64_m128i(a));
-    int32x2_t b_lo = vmovn_s64(vreinterpretq_s64_m128i(b));
-    return vreinterpretq_m128i_s64(vmull_s32(a_lo, b_lo));
+    int32_t ALIGN_STRUCT(16) data[4] = {i0, i1, i2, i3};
+    return vreinterpretq_m128i_s32(vld1q_s32(data));
 }
 
-// Multiplies the 8 signed 16-bit integers from a by the 8 signed 16-bit
-// integers from b.
-//
-//   r0 := (a0 * b0) + (a1 * b1)
-//   r1 := (a2 * b2) + (a3 * b3)
-//   r2 := (a4 * b4) + (a5 * b5)
-//   r3 := (a6 * b6) + (a7 * b7)
-// https://msdn.microsoft.com/en-us/library/yht36sa6(v=vs.90).aspx
-FORCE_INLINE __m128i _mm_madd_epi16(__m128i a, __m128i b)
+// Set packed 64-bit integers in dst with the supplied values.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi64
+FORCE_INLINE __m128i _mm_set_epi64(__m64 i1, __m64 i2)
 {
-    int32x4_t low = vmull_s16(vget_low_s16(vreinterpretq_s16_m128i(a)),
-                              vget_low_s16(vreinterpretq_s16_m128i(b)));
-    int32x4_t high = vmull_s16(vget_high_s16(vreinterpretq_s16_m128i(a)),
-                               vget_high_s16(vreinterpretq_s16_m128i(b)));
-
-    int32x2_t low_sum = vpadd_s32(vget_low_s32(low), vget_high_s32(low));
-    int32x2_t high_sum = vpadd_s32(vget_low_s32(high), vget_high_s32(high));
-
-    return vreinterpretq_m128i_s32(vcombine_s32(low_sum, high_sum));
+    return _mm_set_epi64x(vget_lane_s64(i1, 0), vget_lane_s64(i2, 0));
 }
 
-// Multiply packed signed 16-bit integers in a and b, producing intermediate
-// signed 32-bit integers. Shift right by 15 bits while rounding up, and store
-// the packed 16-bit integers in dst.
-//
-//   r0 := Round(((int32_t)a0 * (int32_t)b0) >> 15)
-//   r1 := Round(((int32_t)a1 * (int32_t)b1) >> 15)
-//   r2 := Round(((int32_t)a2 * (int32_t)b2) >> 15)
-//   ...
-//   r7 := Round(((int32_t)a7 * (int32_t)b7) >> 15)
-FORCE_INLINE __m128i _mm_mulhrs_epi16(__m128i a, __m128i b)
+// Set packed 64-bit integers in dst with the supplied values.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi64x
+FORCE_INLINE __m128i _mm_set_epi64x(int64_t i1, int64_t i2)
 {
-    // Has issues due to saturation
-    // return vreinterpretq_m128i_s16(vqrdmulhq_s16(a, b));
-
-    // Multiply
-    int32x4_t mul_lo = vmull_s16(vget_low_s16(vreinterpretq_s16_m128i(a)),
-                                 vget_low_s16(vreinterpretq_s16_m128i(b)));
-    int32x4_t mul_hi = vmull_s16(vget_high_s16(vreinterpretq_s16_m128i(a)),
-                                 vget_high_s16(vreinterpretq_s16_m128i(b)));
-
-    // Rounding narrowing shift right
-    // narrow = (int16_t)((mul + 16384) >> 15);
-    int16x4_t narrow_lo = vrshrn_n_s32(mul_lo, 15);
-    int16x4_t narrow_hi = vrshrn_n_s32(mul_hi, 15);
-
-    // Join together
-    return vreinterpretq_m128i_s16(vcombine_s16(narrow_lo, narrow_hi));
+    return vreinterpretq_m128i_s64(
+        vcombine_s64(vcreate_s64(i2), vcreate_s64(i1)));
 }
 
-// Vertically multiply each unsigned 8-bit integer from a with the corresponding
-// signed 8-bit integer from b, producing intermediate signed 16-bit integers.
-// Horizontally add adjacent pairs of intermediate signed 16-bit integers,
-// and pack the saturated results in dst.
-//
-//   FOR j := 0 to 7
-//      i := j*16
-//      dst[i+15:i] := Saturate_To_Int16( a[i+15:i+8]*b[i+15:i+8] +
-//      a[i+7:i]*b[i+7:i] )
-//   ENDFOR
-FORCE_INLINE __m128i _mm_maddubs_epi16(__m128i _a, __m128i _b)
+// Set packed 8-bit integers in dst with the supplied values.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi8
+FORCE_INLINE __m128i _mm_set_epi8(signed char b15,
+                                  signed char b14,
+                                  signed char b13,
+                                  signed char b12,
+                                  signed char b11,
+                                  signed char b10,
+                                  signed char b9,
+                                  signed char b8,
+                                  signed char b7,
+                                  signed char b6,
+                                  signed char b5,
+                                  signed char b4,
+                                  signed char b3,
+                                  signed char b2,
+                                  signed char b1,
+                                  signed char b0)
 {
-#if defined(__aarch64__)
-    uint8x16_t a = vreinterpretq_u8_m128i(_a);
-    int8x16_t b = vreinterpretq_s8_m128i(_b);
-    int16x8_t tl = vmulq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(a))),
-                             vmovl_s8(vget_low_s8(b)));
-    int16x8_t th = vmulq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(a))),
-                             vmovl_s8(vget_high_s8(b)));
-    return vreinterpretq_m128i_s16(
-        vqaddq_s16(vuzp1q_s16(tl, th), vuzp2q_s16(tl, th)));
-#else
-    // This would be much simpler if x86 would choose to zero extend OR sign
-    // extend, not both. This could probably be optimized better.
-    uint16x8_t a = vreinterpretq_u16_m128i(_a);
-    int16x8_t b = vreinterpretq_s16_m128i(_b);
-
-    // Zero extend a
-    int16x8_t a_odd = vreinterpretq_s16_u16(vshrq_n_u16(a, 8));
-    int16x8_t a_even = vreinterpretq_s16_u16(vbicq_u16(a, vdupq_n_u16(0xff00)));
-
-    // Sign extend by shifting left then shifting right.
-    int16x8_t b_even = vshrq_n_s16(vshlq_n_s16(b, 8), 8);
-    int16x8_t b_odd = vshrq_n_s16(b, 8);
-
-    // multiply
-    int16x8_t prod1 = vmulq_s16(a_even, b_even);
-    int16x8_t prod2 = vmulq_s16(a_odd, b_odd);
-
-    // saturated add
-    return vreinterpretq_m128i_s16(vqaddq_s16(prod1, prod2));
-#endif
+    int8_t ALIGN_STRUCT(16)
+        data[16] = {(int8_t) b0,  (int8_t) b1,  (int8_t) b2,  (int8_t) b3,
+                    (int8_t) b4,  (int8_t) b5,  (int8_t) b6,  (int8_t) b7,
+                    (int8_t) b8,  (int8_t) b9,  (int8_t) b10, (int8_t) b11,
+                    (int8_t) b12, (int8_t) b13, (int8_t) b14, (int8_t) b15};
+    return (__m128i) vld1q_s8(data);
 }
 
-// Computes the fused multiple add product of 32-bit floating point numbers.
-//
-// Return Value
-// Multiplies A and B, and adds C to the temporary result before returning it.
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd
-FORCE_INLINE __m128 _mm_fmadd_ps(__m128 a, __m128 b, __m128 c)
+// Set packed double-precision (64-bit) floating-point elements in dst with the
+// supplied values.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_pd
+FORCE_INLINE __m128d _mm_set_pd(double e1, double e0)
 {
-#if defined(__aarch64__)
-    return vreinterpretq_m128_f32(vfmaq_f32(vreinterpretq_f32_m128(c),
-                                            vreinterpretq_f32_m128(b),
-                                            vreinterpretq_f32_m128(a)));
+    double ALIGN_STRUCT(16) data[2] = {e0, e1};
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128d_f64(vld1q_f64((float64_t *) data));
 #else
-    return _mm_add_ps(_mm_mul_ps(a, b), c);
+    return vreinterpretq_m128d_f32(vld1q_f32((float32_t *) data));
 #endif
 }
 
-// Alternatively add and subtract packed single-precision (32-bit)
-// floating-point elements in a to/from packed elements in b, and store the
-// results in dst.
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=addsub_ps
-FORCE_INLINE __m128 _mm_addsub_ps(__m128 a, __m128 b)
-{
-    __m128 mask = {-1.0f, 1.0f, -1.0f, 1.0f};
-    return _mm_fmadd_ps(b, mask, a);
-}
+// Broadcast double-precision (64-bit) floating-point value a to all elements of
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_pd1
+#define _mm_set_pd1 _mm_set1_pd
 
-// Compute the absolute differences of packed unsigned 8-bit integers in a and
-// b, then horizontally sum each consecutive 8 differences to produce two
-// unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low
-// 16 bits of 64-bit elements in dst.
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sad_epu8
-FORCE_INLINE __m128i _mm_sad_epu8(__m128i a, __m128i b)
+// Copy double-precision (64-bit) floating-point element a to the lower element
+// of dst, and zero the upper element.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_sd
+FORCE_INLINE __m128d _mm_set_sd(double a)
 {
-    uint16x8_t t = vpaddlq_u8(vabdq_u8((uint8x16_t) a, (uint8x16_t) b));
-    uint16_t r0 = t[0] + t[1] + t[2] + t[3];
-    uint16_t r4 = t[4] + t[5] + t[6] + t[7];
-    uint16x8_t r = vsetq_lane_u16(r0, vdupq_n_u16(0), 0);
-    return (__m128i) vsetq_lane_u16(r4, r, 4);
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128d_f64(vsetq_lane_f64(a, vdupq_n_f64(0), 0));
+#else
+    return _mm_set_pd(0, a);
+#endif
 }
 
-// Compute the absolute differences of packed unsigned 8-bit integers in a and
-// b, then horizontally sum each consecutive 8 differences to produce four
-// unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low
-// 16 bits of dst.
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sad_pu8
-FORCE_INLINE __m64 _mm_sad_pu8(__m64 a, __m64 b)
+// Broadcast 16-bit integer a to all elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi16
+FORCE_INLINE __m128i _mm_set1_epi16(short w)
 {
-    uint16x4_t t =
-        vpaddl_u8(vabd_u8(vreinterpret_u8_m64(a), vreinterpret_u8_m64(b)));
-    uint16_t r0 = t[0] + t[1] + t[2] + t[3];
-    return vreinterpret_m64_u16(vset_lane_u16(r0, vdup_n_u16(0), 0));
+    return vreinterpretq_m128i_s16(vdupq_n_s16(w));
 }
 
-// Compute the absolute differences of packed unsigned 8-bit integers in a and
-// b, then horizontally sum each consecutive 8 differences to produce four
-// unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low
-// 16 bits of dst.
-//
-//   FOR j := 0 to 7
-//      i := j*8
-//      tmp[i+7:i] := ABS(a[i+7:i] - b[i+7:i])
-//   ENDFOR
-//   dst[15:0] := tmp[7:0] + tmp[15:8] + tmp[23:16] + tmp[31:24] + tmp[39:32] +
-//   tmp[47:40] + tmp[55:48] + tmp[63:56] dst[63:16] := 0
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_psadbw
-#define _m_psadbw(a, b) _mm_sad_pu8(a, b)
-
-// Divides the four single-precision, floating-point values of a and b.
-//
-//   r0 := a0 / b0
-//   r1 := a1 / b1
-//   r2 := a2 / b2
-//   r3 := a3 / b3
-//
-// https://msdn.microsoft.com/en-us/library/edaw8147(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_div_ps(__m128 a, __m128 b)
+// Broadcast 32-bit integer a to all elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi32
+FORCE_INLINE __m128i _mm_set1_epi32(int _i)
 {
-#if defined(__aarch64__)
-    return vreinterpretq_m128_f32(
-        vdivq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
-#else
-    float32x4_t recip0 = vrecpeq_f32(vreinterpretq_f32_m128(b));
-    float32x4_t recip1 =
-        vmulq_f32(recip0, vrecpsq_f32(recip0, vreinterpretq_f32_m128(b)));
-    return vreinterpretq_m128_f32(vmulq_f32(vreinterpretq_f32_m128(a), recip1));
-#endif
+    return vreinterpretq_m128i_s32(vdupq_n_s32(_i));
 }
 
-// Divides the scalar single-precision floating point value of a by b.
-// https://msdn.microsoft.com/en-us/library/4y73xa49(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_div_ss(__m128 a, __m128 b)
+// Broadcast 64-bit integer a to all elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi64
+FORCE_INLINE __m128i _mm_set1_epi64(__m64 _i)
 {
-    float32_t value =
-        vgetq_lane_f32(vreinterpretq_f32_m128(_mm_div_ps(a, b)), 0);
-    return vreinterpretq_m128_f32(
-        vsetq_lane_f32(value, vreinterpretq_f32_m128(a), 0));
+    return vreinterpretq_m128i_s64(vdupq_lane_s64(_i, 0));
 }
 
-// Computes the approximations of reciprocals of the four single-precision,
-// floating-point values of a.
-// https://msdn.microsoft.com/en-us/library/vstudio/796k1tty(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_rcp_ps(__m128 in)
+// Broadcast 64-bit integer a to all elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi64x
+FORCE_INLINE __m128i _mm_set1_epi64x(int64_t _i)
 {
-    float32x4_t recip = vrecpeq_f32(vreinterpretq_f32_m128(in));
-    recip = vmulq_f32(recip, vrecpsq_f32(recip, vreinterpretq_f32_m128(in)));
-    return vreinterpretq_m128_f32(recip);
+    return vreinterpretq_m128i_s64(vdupq_n_s64(_i));
 }
 
-// Compute the approximate reciprocal of the lower single-precision (32-bit)
-// floating-point element in a, store the result in the lower element of dst,
-// and copy the upper 3 packed elements from a to the upper elements of dst. The
-// maximum relative error for this approximation is less than 1.5*2^-12.
-//
-//   dst[31:0] := (1.0 / a[31:0])
-//   dst[127:32] := a[127:32]
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp_ss
-FORCE_INLINE __m128 _mm_rcp_ss(__m128 a)
+// Broadcast 8-bit integer a to all elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi8
+FORCE_INLINE __m128i _mm_set1_epi8(signed char w)
 {
-    return _mm_move_ss(a, _mm_rcp_ps(a));
+    return vreinterpretq_m128i_s8(vdupq_n_s8(w));
 }
 
-// Computes the approximations of square roots of the four single-precision,
-// floating-point values of a. First computes reciprocal square roots and then
-// reciprocals of the four values.
-//
-//   r0 := sqrt(a0)
-//   r1 := sqrt(a1)
-//   r2 := sqrt(a2)
-//   r3 := sqrt(a3)
-//
-// https://msdn.microsoft.com/en-us/library/vstudio/8z67bwwk(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_sqrt_ps(__m128 in)
+// Broadcast double-precision (64-bit) floating-point value a to all elements of
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_pd
+FORCE_INLINE __m128d _mm_set1_pd(double d)
 {
-#if defined(__aarch64__)
-    return vreinterpretq_m128_f32(vsqrtq_f32(vreinterpretq_f32_m128(in)));
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128d_f64(vdupq_n_f64(d));
 #else
-    float32x4_t recipsq = vrsqrteq_f32(vreinterpretq_f32_m128(in));
-    float32x4_t sq = vrecpeq_f32(recipsq);
-    // ??? use step versions of both sqrt and recip for better accuracy?
-    return vreinterpretq_m128_f32(sq);
+    int64_t _d = sse2neon_recast_f64_s64(d);
+    return vreinterpretq_m128d_s64(vdupq_n_s64(_d));
 #endif
 }
 
-// Computes the approximation of the square root of the scalar single-precision
-// floating point value of in.
-// https://msdn.microsoft.com/en-us/library/ahfsc22d(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_sqrt_ss(__m128 in)
+// Set packed 16-bit integers in dst with the supplied values in reverse order.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi16
+FORCE_INLINE __m128i _mm_setr_epi16(short w0,
+                                    short w1,
+                                    short w2,
+                                    short w3,
+                                    short w4,
+                                    short w5,
+                                    short w6,
+                                    short w7)
 {
-    float32_t value =
-        vgetq_lane_f32(vreinterpretq_f32_m128(_mm_sqrt_ps(in)), 0);
-    return vreinterpretq_m128_f32(
-        vsetq_lane_f32(value, vreinterpretq_f32_m128(in), 0));
+    int16_t ALIGN_STRUCT(16) data[8] = {w0, w1, w2, w3, w4, w5, w6, w7};
+    return vreinterpretq_m128i_s16(vld1q_s16((int16_t *) data));
 }
 
-// Computes the approximations of the reciprocal square roots of the four
-// single-precision floating point values of in.
-// https://msdn.microsoft.com/en-us/library/22hfsh53(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_rsqrt_ps(__m128 in)
+// Set packed 32-bit integers in dst with the supplied values in reverse order.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi32
+FORCE_INLINE __m128i _mm_setr_epi32(int i3, int i2, int i1, int i0)
 {
-    return vreinterpretq_m128_f32(vrsqrteq_f32(vreinterpretq_f32_m128(in)));
+    int32_t ALIGN_STRUCT(16) data[4] = {i3, i2, i1, i0};
+    return vreinterpretq_m128i_s32(vld1q_s32(data));
 }
 
-// Compute the approximate reciprocal square root of the lower single-precision
-// (32-bit) floating-point element in a, store the result in the lower element
-// of dst, and copy the upper 3 packed elements from a to the upper elements of
-// dst.
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rsqrt_ss
-FORCE_INLINE __m128 _mm_rsqrt_ss(__m128 in)
+// Set packed 64-bit integers in dst with the supplied values in reverse order.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi64
+FORCE_INLINE __m128i _mm_setr_epi64(__m64 e1, __m64 e0)
 {
-    return vsetq_lane_f32(vgetq_lane_f32(_mm_rsqrt_ps(in), 0), in, 0);
+    return vreinterpretq_m128i_s64(vcombine_s64(e1, e0));
 }
 
-// Compare packed signed 16-bit integers in a and b, and store packed maximum
-// values in dst.
-//
-//   FOR j := 0 to 3
-//      i := j*16
-//      dst[i+15:i] := MAX(a[i+15:i], b[i+15:i])
-//   ENDFOR
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_pi16
-FORCE_INLINE __m64 _mm_max_pi16(__m64 a, __m64 b)
+// Set packed 8-bit integers in dst with the supplied values in reverse order.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi8
+FORCE_INLINE __m128i _mm_setr_epi8(signed char b0,
+                                   signed char b1,
+                                   signed char b2,
+                                   signed char b3,
+                                   signed char b4,
+                                   signed char b5,
+                                   signed char b6,
+                                   signed char b7,
+                                   signed char b8,
+                                   signed char b9,
+                                   signed char b10,
+                                   signed char b11,
+                                   signed char b12,
+                                   signed char b13,
+                                   signed char b14,
+                                   signed char b15)
 {
-    return vreinterpret_m64_s16(
-        vmax_s16(vreinterpret_s16_m64(a), vreinterpret_s16_m64(b)));
+    int8_t ALIGN_STRUCT(16)
+        data[16] = {(int8_t) b0,  (int8_t) b1,  (int8_t) b2,  (int8_t) b3,
+                    (int8_t) b4,  (int8_t) b5,  (int8_t) b6,  (int8_t) b7,
+                    (int8_t) b8,  (int8_t) b9,  (int8_t) b10, (int8_t) b11,
+                    (int8_t) b12, (int8_t) b13, (int8_t) b14, (int8_t) b15};
+    return (__m128i) vld1q_s8(data);
 }
 
-// Compare packed signed 16-bit integers in a and b, and store packed maximum
-// values in dst.
-//
-//   FOR j := 0 to 3
-//      i := j*16
-//      dst[i+15:i] := MAX(a[i+15:i], b[i+15:i])
-//   ENDFOR
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_pi16
-#define _m_pmaxsw(a, b) _mm_max_pi16(a, b)
+// Set packed double-precision (64-bit) floating-point elements in dst with the
+// supplied values in reverse order.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_pd
+FORCE_INLINE __m128d _mm_setr_pd(double e1, double e0)
+{
+    return _mm_set_pd(e0, e1);
+}
 
-// Computes the maximums of the four single-precision, floating-point values of
-// a and b.
-// https://msdn.microsoft.com/en-us/library/vstudio/ff5d607a(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_max_ps(__m128 a, __m128 b)
+// Return vector of type __m128d with all elements set to zero.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setzero_pd
+FORCE_INLINE __m128d _mm_setzero_pd(void)
 {
-#if SSE2NEON_PRECISE_MINMAX
-    float32x4_t _a = vreinterpretq_f32_m128(a);
-    float32x4_t _b = vreinterpretq_f32_m128(b);
-    return vbslq_f32(vcltq_f32(_b, _a), _a, _b);
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128d_f64(vdupq_n_f64(0));
 #else
-    return vreinterpretq_m128_f32(
-        vmaxq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
+    return vreinterpretq_m128d_f32(vdupq_n_f32(0));
 #endif
 }
 
-// Compare packed unsigned 8-bit integers in a and b, and store packed maximum
-// values in dst.
-//
-//   FOR j := 0 to 7
-//      i := j*8
-//      dst[i+7:i] := MAX(a[i+7:i], b[i+7:i])
-//   ENDFOR
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_pu8
-FORCE_INLINE __m64 _mm_max_pu8(__m64 a, __m64 b)
+// Return vector of type __m128i with all elements set to zero.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setzero_si128
+FORCE_INLINE __m128i _mm_setzero_si128(void)
 {
-    return vreinterpret_m64_u8(
-        vmax_u8(vreinterpret_u8_m64(a), vreinterpret_u8_m64(b)));
+    return vreinterpretq_m128i_s32(vdupq_n_s32(0));
 }
 
-// Compare packed unsigned 8-bit integers in a and b, and store packed maximum
-// values in dst.
-//
-//   FOR j := 0 to 7
-//      i := j*8
-//      dst[i+7:i] := MAX(a[i+7:i], b[i+7:i])
-//   ENDFOR
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_pu8
-#define _m_pmaxub(a, b) _mm_max_pu8(a, b)
+// Shuffle 32-bit integers in a using the control in imm8, and store the results
+// in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_epi32
+// FORCE_INLINE __m128i _mm_shuffle_epi32(__m128i a,
+//                                        __constrange(0,255) int imm)
+#if defined(_sse2neon_shuffle)
+#define _mm_shuffle_epi32(a, imm)                                            \
+    __extension__({                                                          \
+        int32x4_t _input = vreinterpretq_s32_m128i(a);                       \
+        int32x4_t _shuf =                                                    \
+            vshuffleq_s32(_input, _input, (imm) & (0x3), ((imm) >> 2) & 0x3, \
+                          ((imm) >> 4) & 0x3, ((imm) >> 6) & 0x3);           \
+        vreinterpretq_m128i_s32(_shuf);                                      \
+    })
+#else  // generic
+#define _mm_shuffle_epi32(a, imm)                           \
+    _sse2neon_define1(                                      \
+        __m128i, a, __m128i ret; switch (imm) {             \
+            case _MM_SHUFFLE(1, 0, 3, 2):                   \
+                ret = _mm_shuffle_epi_1032(_a);             \
+                break;                                      \
+            case _MM_SHUFFLE(2, 3, 0, 1):                   \
+                ret = _mm_shuffle_epi_2301(_a);             \
+                break;                                      \
+            case _MM_SHUFFLE(0, 3, 2, 1):                   \
+                ret = _mm_shuffle_epi_0321(_a);             \
+                break;                                      \
+            case _MM_SHUFFLE(2, 1, 0, 3):                   \
+                ret = _mm_shuffle_epi_2103(_a);             \
+                break;                                      \
+            case _MM_SHUFFLE(1, 0, 1, 0):                   \
+                ret = _mm_shuffle_epi_1010(_a);             \
+                break;                                      \
+            case _MM_SHUFFLE(1, 0, 0, 1):                   \
+                ret = _mm_shuffle_epi_1001(_a);             \
+                break;                                      \
+            case _MM_SHUFFLE(0, 1, 0, 1):                   \
+                ret = _mm_shuffle_epi_0101(_a);             \
+                break;                                      \
+            case _MM_SHUFFLE(2, 2, 1, 1):                   \
+                ret = _mm_shuffle_epi_2211(_a);             \
+                break;                                      \
+            case _MM_SHUFFLE(0, 1, 2, 2):                   \
+                ret = _mm_shuffle_epi_0122(_a);             \
+                break;                                      \
+            case _MM_SHUFFLE(3, 3, 3, 2):                   \
+                ret = _mm_shuffle_epi_3332(_a);             \
+                break;                                      \
+            case _MM_SHUFFLE(0, 0, 0, 0):                   \
+                ret = _mm_shuffle_epi32_splat(_a, 0);       \
+                break;                                      \
+            case _MM_SHUFFLE(1, 1, 1, 1):                   \
+                ret = _mm_shuffle_epi32_splat(_a, 1);       \
+                break;                                      \
+            case _MM_SHUFFLE(2, 2, 2, 2):                   \
+                ret = _mm_shuffle_epi32_splat(_a, 2);       \
+                break;                                      \
+            case _MM_SHUFFLE(3, 3, 3, 3):                   \
+                ret = _mm_shuffle_epi32_splat(_a, 3);       \
+                break;                                      \
+            default:                                        \
+                ret = _mm_shuffle_epi32_default(_a, (imm)); \
+                break;                                      \
+        } _sse2neon_return(ret);)
+#endif
 
-// Compare packed signed 16-bit integers in a and b, and store packed minimum
-// values in dst.
-//
-//   FOR j := 0 to 3
-//      i := j*16
-//      dst[i+15:i] := MIN(a[i+15:i], b[i+15:i])
-//   ENDFOR
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_pi16
-FORCE_INLINE __m64 _mm_min_pi16(__m64 a, __m64 b)
-{
-    return vreinterpret_m64_s16(
-        vmin_s16(vreinterpret_s16_m64(a), vreinterpret_s16_m64(b)));
-}
+// Shuffle double-precision (64-bit) floating-point elements using the control
+// in imm8, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_pd
+#ifdef _sse2neon_shuffle
+#define _mm_shuffle_pd(a, b, imm8)                                            \
+    vreinterpretq_m128d_s64(                                                  \
+        vshuffleq_s64(vreinterpretq_s64_m128d(a), vreinterpretq_s64_m128d(b), \
+                      imm8 & 0x1, ((imm8 & 0x2) >> 1) + 2))
+#else
+#define _mm_shuffle_pd(a, b, imm8)                                     \
+    _mm_castsi128_pd(_mm_set_epi64x(                                   \
+        vgetq_lane_s64(vreinterpretq_s64_m128d(b), (imm8 & 0x2) >> 1), \
+        vgetq_lane_s64(vreinterpretq_s64_m128d(a), imm8 & 0x1)))
+#endif
 
-// Compare packed signed 16-bit integers in a and b, and store packed minimum
-// values in dst.
-//
-//   FOR j := 0 to 3
-//      i := j*16
-//      dst[i+15:i] := MIN(a[i+15:i], b[i+15:i])
-//   ENDFOR
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_pi16
-#define _m_pminsw(a, b) _mm_min_pi16(a, b)
+// FORCE_INLINE __m128i _mm_shufflehi_epi16(__m128i a,
+//                                          __constrange(0,255) int imm)
+#if defined(_sse2neon_shuffle)
+#define _mm_shufflehi_epi16(a, imm)                                           \
+    __extension__({                                                           \
+        int16x8_t _input = vreinterpretq_s16_m128i(a);                        \
+        int16x8_t _shuf =                                                     \
+            vshuffleq_s16(_input, _input, 0, 1, 2, 3, ((imm) & (0x3)) + 4,    \
+                          (((imm) >> 2) & 0x3) + 4, (((imm) >> 4) & 0x3) + 4, \
+                          (((imm) >> 6) & 0x3) + 4);                          \
+        vreinterpretq_m128i_s16(_shuf);                                       \
+    })
+#else  // generic
+#define _mm_shufflehi_epi16(a, imm) _mm_shufflehi_epi16_function((a), (imm))
+#endif
 
-// Computes the minima of the four single-precision, floating-point values of a
-// and b.
-// https://msdn.microsoft.com/en-us/library/vstudio/wh13kadz(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_min_ps(__m128 a, __m128 b)
-{
-#if SSE2NEON_PRECISE_MINMAX
-    float32x4_t _a = vreinterpretq_f32_m128(a);
-    float32x4_t _b = vreinterpretq_f32_m128(b);
-    return vbslq_f32(vcltq_f32(_a, _b), _a, _b);
-#else
-    return vreinterpretq_m128_f32(
-        vminq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
+// FORCE_INLINE __m128i _mm_shufflelo_epi16(__m128i a,
+//                                          __constrange(0,255) int imm)
+#if defined(_sse2neon_shuffle)
+#define _mm_shufflelo_epi16(a, imm)                                  \
+    __extension__({                                                  \
+        int16x8_t _input = vreinterpretq_s16_m128i(a);               \
+        int16x8_t _shuf = vshuffleq_s16(                             \
+            _input, _input, ((imm) & (0x3)), (((imm) >> 2) & 0x3),   \
+            (((imm) >> 4) & 0x3), (((imm) >> 6) & 0x3), 4, 5, 6, 7); \
+        vreinterpretq_m128i_s16(_shuf);                              \
+    })
+#else  // generic
+#define _mm_shufflelo_epi16(a, imm) _mm_shufflelo_epi16_function((a), (imm))
 #endif
-}
 
-// Compare packed unsigned 8-bit integers in a and b, and store packed minimum
-// values in dst.
-//
-//   FOR j := 0 to 7
-//      i := j*8
-//      dst[i+7:i] := MIN(a[i+7:i], b[i+7:i])
-//   ENDFOR
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_pu8
-FORCE_INLINE __m64 _mm_min_pu8(__m64 a, __m64 b)
+// Shift packed 16-bit integers in a left by count while shifting in zeros, and
+// store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi16
+FORCE_INLINE __m128i _mm_sll_epi16(__m128i a, __m128i count)
 {
-    return vreinterpret_m64_u8(
-        vmin_u8(vreinterpret_u8_m64(a), vreinterpret_u8_m64(b)));
+    uint64_t c = vreinterpretq_nth_u64_m128i(count, 0);
+    if (_sse2neon_unlikely(c & ~15))
+        return _mm_setzero_si128();
+
+    int16x8_t vc = vdupq_n_s16((int16_t) c);
+    return vreinterpretq_m128i_s16(vshlq_s16(vreinterpretq_s16_m128i(a), vc));
 }
 
-// Compare packed unsigned 8-bit integers in a and b, and store packed minimum
-// values in dst.
-//
-//   FOR j := 0 to 7
-//      i := j*8
-//      dst[i+7:i] := MIN(a[i+7:i], b[i+7:i])
-//   ENDFOR
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_pu8
-#define _m_pminub(a, b) _mm_min_pu8(a, b)
+// Shift packed 32-bit integers in a left by count while shifting in zeros, and
+// store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi32
+FORCE_INLINE __m128i _mm_sll_epi32(__m128i a, __m128i count)
+{
+    uint64_t c = vreinterpretq_nth_u64_m128i(count, 0);
+    if (_sse2neon_unlikely(c & ~31))
+        return _mm_setzero_si128();
 
-// Computes the maximum of the two lower scalar single-precision floating point
-// values of a and b.
-// https://msdn.microsoft.com/en-us/library/s6db5esz(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_max_ss(__m128 a, __m128 b)
+    int32x4_t vc = vdupq_n_s32((int32_t) c);
+    return vreinterpretq_m128i_s32(vshlq_s32(vreinterpretq_s32_m128i(a), vc));
+}
+
+// Shift packed 64-bit integers in a left by count while shifting in zeros, and
+// store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi64
+FORCE_INLINE __m128i _mm_sll_epi64(__m128i a, __m128i count)
 {
-    float32_t value = vgetq_lane_f32(_mm_max_ps(a, b), 0);
-    return vreinterpretq_m128_f32(
-        vsetq_lane_f32(value, vreinterpretq_f32_m128(a), 0));
+    uint64_t c = vreinterpretq_nth_u64_m128i(count, 0);
+    if (_sse2neon_unlikely(c & ~63))
+        return _mm_setzero_si128();
+
+    int64x2_t vc = vdupq_n_s64((int64_t) c);
+    return vreinterpretq_m128i_s64(vshlq_s64(vreinterpretq_s64_m128i(a), vc));
 }
 
-// Computes the minimum of the two lower scalar single-precision floating point
-// values of a and b.
-// https://msdn.microsoft.com/en-us/library/0a9y7xaa(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_min_ss(__m128 a, __m128 b)
+// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and
+// store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi16
+FORCE_INLINE __m128i _mm_slli_epi16(__m128i a, int imm)
 {
-    float32_t value = vgetq_lane_f32(_mm_min_ps(a, b), 0);
-    return vreinterpretq_m128_f32(
-        vsetq_lane_f32(value, vreinterpretq_f32_m128(a), 0));
+    if (_sse2neon_unlikely(imm & ~15))
+        return _mm_setzero_si128();
+    return vreinterpretq_m128i_s16(
+        vshlq_s16(vreinterpretq_s16_m128i(a), vdupq_n_s16(imm)));
 }
 
-// Computes the pairwise maxima of the 16 unsigned 8-bit integers from a and the
-// 16 unsigned 8-bit integers from b.
-// https://msdn.microsoft.com/en-us/library/st6634za(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_max_epu8(__m128i a, __m128i b)
+// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and
+// store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi32
+FORCE_INLINE __m128i _mm_slli_epi32(__m128i a, int imm)
 {
-    return vreinterpretq_m128i_u8(
-        vmaxq_u8(vreinterpretq_u8_m128i(a), vreinterpretq_u8_m128i(b)));
+    if (_sse2neon_unlikely(imm & ~31))
+        return _mm_setzero_si128();
+    return vreinterpretq_m128i_s32(
+        vshlq_s32(vreinterpretq_s32_m128i(a), vdupq_n_s32(imm)));
 }
 
-// Computes the pairwise minima of the 16 unsigned 8-bit integers from a and the
-// 16 unsigned 8-bit integers from b.
-// https://msdn.microsoft.com/ko-kr/library/17k8cf58(v=vs.100).aspxx
-FORCE_INLINE __m128i _mm_min_epu8(__m128i a, __m128i b)
+// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and
+// store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi64
+FORCE_INLINE __m128i _mm_slli_epi64(__m128i a, int imm)
 {
-    return vreinterpretq_m128i_u8(
-        vminq_u8(vreinterpretq_u8_m128i(a), vreinterpretq_u8_m128i(b)));
+    if (_sse2neon_unlikely(imm & ~63))
+        return _mm_setzero_si128();
+    return vreinterpretq_m128i_s64(
+        vshlq_s64(vreinterpretq_s64_m128i(a), vdupq_n_s64(imm)));
 }
 
-// Computes the pairwise minima of the 8 signed 16-bit integers from a and the 8
-// signed 16-bit integers from b.
-// https://msdn.microsoft.com/en-us/library/vstudio/6te997ew(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_min_epi16(__m128i a, __m128i b)
-{
-    return vreinterpretq_m128i_s16(
-        vminq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)));
+// Shift a left by imm8 bytes while shifting in zeros, and store the results in
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_si128
+#define _mm_slli_si128(a, imm)                                              \
+    _sse2neon_define1(                                                      \
+        __m128i, a, int8x16_t ret;                                          \
+        if (_sse2neon_unlikely(imm == 0)) ret = vreinterpretq_s8_m128i(_a); \
+        else if (_sse2neon_unlikely((imm) & ~15)) ret = vdupq_n_s8(0);      \
+        else ret = vextq_s8(vdupq_n_s8(0), vreinterpretq_s8_m128i(_a),      \
+                            ((imm <= 0 || imm > 15) ? 0 : (16 - imm)));     \
+        _sse2neon_return(vreinterpretq_m128i_s8(ret));)
+
+// Compute the square root of packed double-precision (64-bit) floating-point
+// elements in a, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_pd
+FORCE_INLINE __m128d _mm_sqrt_pd(__m128d a)
+{
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128d_f64(vsqrtq_f64(vreinterpretq_f64_m128d(a)));
+#else
+    double a0, a1;
+    a0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
+    a1 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
+    double _a0 = sqrt(a0);
+    double _a1 = sqrt(a1);
+    return _mm_set_pd(_a1, _a0);
+#endif
 }
 
-// Compare packed signed 8-bit integers in a and b, and store packed maximum
-// values in dst.
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epi8
-FORCE_INLINE __m128i _mm_max_epi8(__m128i a, __m128i b)
+// Compute the square root of the lower double-precision (64-bit) floating-point
+// element in b, store the result in the lower element of dst, and copy the
+// upper element from a to the upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_sd
+FORCE_INLINE __m128d _mm_sqrt_sd(__m128d a, __m128d b)
 {
-    return vreinterpretq_m128i_s8(
-        vmaxq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b)));
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return _mm_move_sd(a, _mm_sqrt_pd(b));
+#else
+    double _a, _b;
+    _a = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
+    _b = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
+    return _mm_set_pd(_a, sqrt(_b));
+#endif
 }
 
-// Computes the pairwise maxima of the 8 signed 16-bit integers from a and the 8
-// signed 16-bit integers from b.
-// https://msdn.microsoft.com/en-us/LIBRary/3x060h7c(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_max_epi16(__m128i a, __m128i b)
+// Shift packed 16-bit integers in a right by count while shifting in sign bits,
+// and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi16
+FORCE_INLINE __m128i _mm_sra_epi16(__m128i a, __m128i count)
 {
+    int64_t c = vgetq_lane_s64(count, 0);
+    if (_sse2neon_unlikely(c & ~15))
+        return _mm_cmplt_epi16(a, _mm_setzero_si128());
     return vreinterpretq_m128i_s16(
-        vmaxq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)));
+        vshlq_s16((int16x8_t) a, vdupq_n_s16((int) -c)));
 }
 
-// epi versions of min/max
-// Computes the pariwise maximums of the four signed 32-bit integer values of a
-// and b.
-//
-// A 128-bit parameter that can be defined with the following equations:
-//   r0 := (a0 > b0) ? a0 : b0
-//   r1 := (a1 > b1) ? a1 : b1
-//   r2 := (a2 > b2) ? a2 : b2
-//   r3 := (a3 > b3) ? a3 : b3
-//
-// https://msdn.microsoft.com/en-us/library/vstudio/bb514055(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_max_epi32(__m128i a, __m128i b)
+// Shift packed 32-bit integers in a right by count while shifting in sign bits,
+// and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi32
+FORCE_INLINE __m128i _mm_sra_epi32(__m128i a, __m128i count)
 {
+    int64_t c = vgetq_lane_s64(count, 0);
+    if (_sse2neon_unlikely(c & ~31))
+        return _mm_cmplt_epi32(a, _mm_setzero_si128());
     return vreinterpretq_m128i_s32(
-        vmaxq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
+        vshlq_s32((int32x4_t) a, vdupq_n_s32((int) -c)));
 }
 
-// Computes the pariwise minima of the four signed 32-bit integer values of a
-// and b.
-//
-// A 128-bit parameter that can be defined with the following equations:
-//   r0 := (a0 < b0) ? a0 : b0
-//   r1 := (a1 < b1) ? a1 : b1
-//   r2 := (a2 < b2) ? a2 : b2
-//   r3 := (a3 < b3) ? a3 : b3
-//
-// https://msdn.microsoft.com/en-us/library/vstudio/bb531476(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_min_epi32(__m128i a, __m128i b)
+// Shift packed 16-bit integers in a right by imm8 while shifting in sign
+// bits, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi16
+FORCE_INLINE __m128i _mm_srai_epi16(__m128i a, int imm)
 {
-    return vreinterpretq_m128i_s32(
-        vminq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
+    const int count = (imm & ~15) ? 15 : imm;
+    return (__m128i) vshlq_s16((int16x8_t) a, vdupq_n_s16(-count));
 }
 
-// Compare packed unsigned 32-bit integers in a and b, and store packed maximum
-// values in dst.
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epu32
-FORCE_INLINE __m128i _mm_max_epu32(__m128i a, __m128i b)
+// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits,
+// and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi32
+// FORCE_INLINE __m128i _mm_srai_epi32(__m128i a, __constrange(0,255) int imm)
+#define _mm_srai_epi32(a, imm)                                                \
+    _sse2neon_define0(                                                        \
+        __m128i, a, __m128i ret; if (_sse2neon_unlikely((imm) == 0)) {        \
+            ret = _a;                                                         \
+        } else if (_sse2neon_likely(0 < (imm) && (imm) < 32)) {               \
+            ret = vreinterpretq_m128i_s32(                                    \
+                vshlq_s32(vreinterpretq_s32_m128i(_a), vdupq_n_s32(-(imm)))); \
+        } else {                                                              \
+            ret = vreinterpretq_m128i_s32(                                    \
+                vshrq_n_s32(vreinterpretq_s32_m128i(_a), 31));                \
+        } _sse2neon_return(ret);)
+
+// Shift packed 16-bit integers in a right by count while shifting in zeros, and
+// store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi16
+FORCE_INLINE __m128i _mm_srl_epi16(__m128i a, __m128i count)
 {
-    return vreinterpretq_m128i_u32(
-        vmaxq_u32(vreinterpretq_u32_m128i(a), vreinterpretq_u32_m128i(b)));
+    uint64_t c = vreinterpretq_nth_u64_m128i(count, 0);
+    if (_sse2neon_unlikely(c & ~15))
+        return _mm_setzero_si128();
+
+    int16x8_t vc = vdupq_n_s16(-(int16_t) c);
+    return vreinterpretq_m128i_u16(vshlq_u16(vreinterpretq_u16_m128i(a), vc));
 }
 
-// Compare packed unsigned 32-bit integers in a and b, and store packed minimum
-// values in dst.
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epu32
-FORCE_INLINE __m128i _mm_min_epu32(__m128i a, __m128i b)
+// Shift packed 32-bit integers in a right by count while shifting in zeros, and
+// store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi32
+FORCE_INLINE __m128i _mm_srl_epi32(__m128i a, __m128i count)
 {
-    return vreinterpretq_m128i_u32(
-        vminq_u32(vreinterpretq_u32_m128i(a), vreinterpretq_u32_m128i(b)));
+    uint64_t c = vreinterpretq_nth_u64_m128i(count, 0);
+    if (_sse2neon_unlikely(c & ~31))
+        return _mm_setzero_si128();
+
+    int32x4_t vc = vdupq_n_s32(-(int32_t) c);
+    return vreinterpretq_m128i_u32(vshlq_u32(vreinterpretq_u32_m128i(a), vc));
 }
 
-// Multiply the packed unsigned 16-bit integers in a and b, producing
-// intermediate 32-bit integers, and store the high 16 bits of the intermediate
-// integers in dst.
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhi_pu16
-FORCE_INLINE __m64 _mm_mulhi_pu16(__m64 a, __m64 b)
+// Shift packed 64-bit integers in a right by count while shifting in zeros, and
+// store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi64
+FORCE_INLINE __m128i _mm_srl_epi64(__m128i a, __m128i count)
 {
-    return vreinterpret_m64_u16(vshrn_n_u32(
-        vmull_u16(vreinterpret_u16_m64(a), vreinterpret_u16_m64(b)), 16));
+    uint64_t c = vreinterpretq_nth_u64_m128i(count, 0);
+    if (_sse2neon_unlikely(c & ~63))
+        return _mm_setzero_si128();
+
+    int64x2_t vc = vdupq_n_s64(-(int64_t) c);
+    return vreinterpretq_m128i_u64(vshlq_u64(vreinterpretq_u64_m128i(a), vc));
 }
 
-// Multiplies the 8 signed 16-bit integers from a by the 8 signed 16-bit
-// integers from b.
-//
-//   r0 := (a0 * b0)[31:16]
-//   r1 := (a1 * b1)[31:16]
-//   ...
-//   r7 := (a7 * b7)[31:16]
-//
-// https://msdn.microsoft.com/en-us/library/vstudio/59hddw1d(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_mulhi_epi16(__m128i a, __m128i b)
+// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and
+// store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi16
+#define _mm_srli_epi16(a, imm)                                                \
+    _sse2neon_define0(                                                        \
+        __m128i, a, __m128i ret; if (_sse2neon_unlikely((imm) & ~15)) {       \
+            ret = _mm_setzero_si128();                                        \
+        } else {                                                              \
+            ret = vreinterpretq_m128i_u16(                                    \
+                vshlq_u16(vreinterpretq_u16_m128i(_a), vdupq_n_s16(-(imm)))); \
+        } _sse2neon_return(ret);)
+
+// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and
+// store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi32
+// FORCE_INLINE __m128i _mm_srli_epi32(__m128i a, __constrange(0,255) int imm)
+#define _mm_srli_epi32(a, imm)                                                \
+    _sse2neon_define0(                                                        \
+        __m128i, a, __m128i ret; if (_sse2neon_unlikely((imm) & ~31)) {       \
+            ret = _mm_setzero_si128();                                        \
+        } else {                                                              \
+            ret = vreinterpretq_m128i_u32(                                    \
+                vshlq_u32(vreinterpretq_u32_m128i(_a), vdupq_n_s32(-(imm)))); \
+        } _sse2neon_return(ret);)
+
+// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and
+// store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi64
+#define _mm_srli_epi64(a, imm)                                                \
+    _sse2neon_define0(                                                        \
+        __m128i, a, __m128i ret; if (_sse2neon_unlikely((imm) & ~63)) {       \
+            ret = _mm_setzero_si128();                                        \
+        } else {                                                              \
+            ret = vreinterpretq_m128i_u64(                                    \
+                vshlq_u64(vreinterpretq_u64_m128i(_a), vdupq_n_s64(-(imm)))); \
+        } _sse2neon_return(ret);)
+
+// Shift a right by imm8 bytes while shifting in zeros, and store the results in
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_si128
+#define _mm_srli_si128(a, imm)                                         \
+    _sse2neon_define1(                                                 \
+        __m128i, a, int8x16_t ret;                                     \
+        if (_sse2neon_unlikely((imm) & ~15)) ret = vdupq_n_s8(0);      \
+        else ret = vextq_s8(vreinterpretq_s8_m128i(_a), vdupq_n_s8(0), \
+                            (imm > 15 ? 0 : imm));                     \
+        _sse2neon_return(vreinterpretq_m128i_s8(ret));)
+
+// Store 128-bits (composed of 2 packed double-precision (64-bit) floating-point
+// elements) from a into memory. mem_addr must be aligned on a 16-byte boundary
+// or a general-protection exception may be generated.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_pd
+FORCE_INLINE void _mm_store_pd(double *mem_addr, __m128d a)
 {
-    /* FIXME: issue with large values because of result saturation */
-    // int16x8_t ret = vqdmulhq_s16(vreinterpretq_s16_m128i(a),
-    // vreinterpretq_s16_m128i(b)); /* =2*a*b */ return
-    // vreinterpretq_m128i_s16(vshrq_n_s16(ret, 1));
-    int16x4_t a3210 = vget_low_s16(vreinterpretq_s16_m128i(a));
-    int16x4_t b3210 = vget_low_s16(vreinterpretq_s16_m128i(b));
-    int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */
-    int16x4_t a7654 = vget_high_s16(vreinterpretq_s16_m128i(a));
-    int16x4_t b7654 = vget_high_s16(vreinterpretq_s16_m128i(b));
-    int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */
-    uint16x8x2_t r =
-        vuzpq_u16(vreinterpretq_u16_s32(ab3210), vreinterpretq_u16_s32(ab7654));
-    return vreinterpretq_m128i_u16(r.val[1]);
+#if defined(__aarch64__) || defined(_M_ARM64)
+    vst1q_f64((float64_t *) mem_addr, vreinterpretq_f64_m128d(a));
+#else
+    vst1q_f32((float32_t *) mem_addr, vreinterpretq_f32_m128d(a));
+#endif
 }
 
-// Computes pairwise add of each argument as single-precision, floating-point
-// values a and b.
-// https://msdn.microsoft.com/en-us/library/yd9wecaa.aspx
-FORCE_INLINE __m128 _mm_hadd_ps(__m128 a, __m128 b)
+// Store the lower double-precision (64-bit) floating-point element from a into
+// 2 contiguous elements in memory. mem_addr must be aligned on a 16-byte
+// boundary or a general-protection exception may be generated.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_pd1
+FORCE_INLINE void _mm_store_pd1(double *mem_addr, __m128d a)
 {
-#if defined(__aarch64__)
-    return vreinterpretq_m128_f32(
-        vpaddq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
+#if defined(__aarch64__) || defined(_M_ARM64)
+    float64x1_t a_low = vget_low_f64(vreinterpretq_f64_m128d(a));
+    vst1q_f64((float64_t *) mem_addr,
+              vreinterpretq_f64_m128d(vcombine_f64(a_low, a_low)));
 #else
-    float32x2_t a10 = vget_low_f32(vreinterpretq_f32_m128(a));
-    float32x2_t a32 = vget_high_f32(vreinterpretq_f32_m128(a));
-    float32x2_t b10 = vget_low_f32(vreinterpretq_f32_m128(b));
-    float32x2_t b32 = vget_high_f32(vreinterpretq_f32_m128(b));
-    return vreinterpretq_m128_f32(
-        vcombine_f32(vpadd_f32(a10, a32), vpadd_f32(b10, b32)));
+    float32x2_t a_low = vget_low_f32(vreinterpretq_f32_m128d(a));
+    vst1q_f32((float32_t *) mem_addr,
+              vreinterpretq_f32_m128d(vcombine_f32(a_low, a_low)));
 #endif
 }
 
-// Computes pairwise add of each argument as a 16-bit signed or unsigned integer
-// values a and b.
-FORCE_INLINE __m128i _mm_hadd_epi16(__m128i _a, __m128i _b)
+// Store the lower double-precision (64-bit) floating-point element from a into
+// memory. mem_addr does not need to be aligned on any particular boundary.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_store_sd
+FORCE_INLINE void _mm_store_sd(double *mem_addr, __m128d a)
 {
-    int16x8_t a = vreinterpretq_s16_m128i(_a);
-    int16x8_t b = vreinterpretq_s16_m128i(_b);
-#if defined(__aarch64__)
-    return vreinterpretq_m128i_s16(vpaddq_s16(a, b));
+#if defined(__aarch64__) || defined(_M_ARM64)
+    vst1_f64((float64_t *) mem_addr, vget_low_f64(vreinterpretq_f64_m128d(a)));
 #else
-    return vreinterpretq_m128i_s16(
-        vcombine_s16(vpadd_s16(vget_low_s16(a), vget_high_s16(a)),
-                     vpadd_s16(vget_low_s16(b), vget_high_s16(b))));
+    vst1_u64((uint64_t *) mem_addr, vget_low_u64(vreinterpretq_u64_m128d(a)));
 #endif
 }
 
-// Horizontally substract adjacent pairs of single-precision (32-bit)
-// floating-point elements in a and b, and pack the results in dst.
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_ps
-FORCE_INLINE __m128 _mm_hsub_ps(__m128 _a, __m128 _b)
+// Store 128-bits of integer data from a into memory. mem_addr must be aligned
+// on a 16-byte boundary or a general-protection exception may be generated.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_si128
+FORCE_INLINE void _mm_store_si128(__m128i *p, __m128i a)
 {
-#if defined(__aarch64__)
-    return vreinterpretq_m128_f32(vsubq_f32(
-        vuzp1q_f32(vreinterpretq_f32_m128(_a), vreinterpretq_f32_m128(_b)),
-        vuzp2q_f32(vreinterpretq_f32_m128(_a), vreinterpretq_f32_m128(_b))));
+    vst1q_s32((int32_t *) p, vreinterpretq_s32_m128i(a));
+}
+
+// Store the lower double-precision (64-bit) floating-point element from a into
+// 2 contiguous elements in memory. mem_addr must be aligned on a 16-byte
+// boundary or a general-protection exception may be generated.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#expand=9,526,5601&text=_mm_store1_pd
+#define _mm_store1_pd _mm_store_pd1
+
+// Store the upper double-precision (64-bit) floating-point element from a into
+// memory.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeh_pd
+FORCE_INLINE void _mm_storeh_pd(double *mem_addr, __m128d a)
+{
+#if defined(__aarch64__) || defined(_M_ARM64)
+    vst1_f64((float64_t *) mem_addr, vget_high_f64(vreinterpretq_f64_m128d(a)));
 #else
-    float32x4x2_t c =
-        vuzpq_f32(vreinterpretq_f32_m128(_a), vreinterpretq_f32_m128(_b));
-    return vreinterpretq_m128_f32(vsubq_f32(c.val[0], c.val[1]));
+    vst1_f32((float32_t *) mem_addr, vget_high_f32(vreinterpretq_f32_m128d(a)));
 #endif
 }
 
-// Horizontally add adjacent pairs of 16-bit integers in a and b, and pack the
-// signed 16-bit results in dst.
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_pi16
-FORCE_INLINE __m64 _mm_hadd_pi16(__m64 a, __m64 b)
+// Store 64-bit integer from the first element of a into memory.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storel_epi64
+FORCE_INLINE void _mm_storel_epi64(__m128i *a, __m128i b)
 {
-    return vreinterpret_m64_s16(
-        vpadd_s16(vreinterpret_s16_m64(a), vreinterpret_s16_m64(b)));
+    vst1_u64((uint64_t *) a, vget_low_u64(vreinterpretq_u64_m128i(b)));
 }
 
-// Horizontally add adjacent pairs of 32-bit integers in a and b, and pack the
-// signed 32-bit results in dst.
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_pi32
-FORCE_INLINE __m64 _mm_hadd_pi32(__m64 a, __m64 b)
+// Store the lower double-precision (64-bit) floating-point element from a into
+// memory.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storel_pd
+FORCE_INLINE void _mm_storel_pd(double *mem_addr, __m128d a)
 {
-    return vreinterpret_m64_s32(
-        vpadd_s32(vreinterpret_s32_m64(a), vreinterpret_s32_m64(b)));
+#if defined(__aarch64__) || defined(_M_ARM64)
+    vst1_f64((float64_t *) mem_addr, vget_low_f64(vreinterpretq_f64_m128d(a)));
+#else
+    vst1_f32((float32_t *) mem_addr, vget_low_f32(vreinterpretq_f32_m128d(a)));
+#endif
 }
 
-// Computes pairwise difference of each argument as a 16-bit signed or unsigned
-// integer values a and b.
-FORCE_INLINE __m128i _mm_hsub_epi16(__m128i _a, __m128i _b)
+// Store 2 double-precision (64-bit) floating-point elements from a into memory
+// in reverse order. mem_addr must be aligned on a 16-byte boundary or a
+// general-protection exception may be generated.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storer_pd
+FORCE_INLINE void _mm_storer_pd(double *mem_addr, __m128d a)
 {
-    int32x4_t a = vreinterpretq_s32_m128i(_a);
-    int32x4_t b = vreinterpretq_s32_m128i(_b);
-    // Interleave using vshrn/vmovn
-    // [a0|a2|a4|a6|b0|b2|b4|b6]
-    // [a1|a3|a5|a7|b1|b3|b5|b7]
-    int16x8_t ab0246 = vcombine_s16(vmovn_s32(a), vmovn_s32(b));
-    int16x8_t ab1357 = vcombine_s16(vshrn_n_s32(a, 16), vshrn_n_s32(b, 16));
-    // Subtract
-    return vreinterpretq_m128i_s16(vsubq_s16(ab0246, ab1357));
+    float32x4_t f = vreinterpretq_f32_m128d(a);
+    _mm_store_pd(mem_addr, vreinterpretq_m128d_f32(vextq_f32(f, f, 2)));
 }
 
-// Computes saturated pairwise sub of each argument as a 16-bit signed
-// integer values a and b.
-FORCE_INLINE __m128i _mm_hadds_epi16(__m128i _a, __m128i _b)
+// Store 128-bits (composed of 2 packed double-precision (64-bit) floating-point
+// elements) from a into memory. mem_addr does not need to be aligned on any
+// particular boundary.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_pd
+FORCE_INLINE void _mm_storeu_pd(double *mem_addr, __m128d a)
 {
-#if defined(__aarch64__)
-    int16x8_t a = vreinterpretq_s16_m128i(_a);
-    int16x8_t b = vreinterpretq_s16_m128i(_b);
-    return vreinterpretq_s64_s16(
-        vqaddq_s16(vuzp1q_s16(a, b), vuzp2q_s16(a, b)));
+    _mm_store_pd(mem_addr, a);
+}
+
+// Store 128-bits of integer data from a into memory. mem_addr does not need to
+// be aligned on any particular boundary.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si128
+FORCE_INLINE void _mm_storeu_si128(__m128i *p, __m128i a)
+{
+    vst1q_s32((int32_t *) p, vreinterpretq_s32_m128i(a));
+}
+
+// Store 32-bit integer from the first element of a into memory. mem_addr does
+// not need to be aligned on any particular boundary.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si32
+FORCE_INLINE void _mm_storeu_si32(void *p, __m128i a)
+{
+    vst1q_lane_s32((int32_t *) p, vreinterpretq_s32_m128i(a), 0);
+}
+
+// Store 128-bits (composed of 2 packed double-precision (64-bit) floating-point
+// elements) from a into memory using a non-temporal memory hint. mem_addr must
+// be aligned on a 16-byte boundary or a general-protection exception may be
+// generated.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_pd
+FORCE_INLINE void _mm_stream_pd(double *p, __m128d a)
+{
+#if __has_builtin(__builtin_nontemporal_store)
+    __builtin_nontemporal_store(a, (__m128d *) p);
+#elif defined(__aarch64__) || defined(_M_ARM64)
+    vst1q_f64(p, vreinterpretq_f64_m128d(a));
 #else
-    int32x4_t a = vreinterpretq_s32_m128i(_a);
-    int32x4_t b = vreinterpretq_s32_m128i(_b);
-    // Interleave using vshrn/vmovn
-    // [a0|a2|a4|a6|b0|b2|b4|b6]
-    // [a1|a3|a5|a7|b1|b3|b5|b7]
-    int16x8_t ab0246 = vcombine_s16(vmovn_s32(a), vmovn_s32(b));
-    int16x8_t ab1357 = vcombine_s16(vshrn_n_s32(a, 16), vshrn_n_s32(b, 16));
-    // Saturated add
-    return vreinterpretq_m128i_s16(vqaddq_s16(ab0246, ab1357));
+    vst1q_s64((int64_t *) p, vreinterpretq_s64_m128d(a));
 #endif
 }
 
-// Computes saturated pairwise difference of each argument as a 16-bit signed
-// integer values a and b.
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsubs_epi16
-FORCE_INLINE __m128i _mm_hsubs_epi16(__m128i _a, __m128i _b)
+// Store 128-bits of integer data from a into memory using a non-temporal memory
+// hint. mem_addr must be aligned on a 16-byte boundary or a general-protection
+// exception may be generated.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si128
+FORCE_INLINE void _mm_stream_si128(__m128i *p, __m128i a)
 {
-#if defined(__aarch64__)
-    int16x8_t a = vreinterpretq_s16_m128i(_a);
-    int16x8_t b = vreinterpretq_s16_m128i(_b);
-    return vreinterpretq_s64_s16(
-        vqsubq_s16(vuzp1q_s16(a, b), vuzp2q_s16(a, b)));
+#if __has_builtin(__builtin_nontemporal_store)
+    __builtin_nontemporal_store(a, p);
 #else
-    int32x4_t a = vreinterpretq_s32_m128i(_a);
-    int32x4_t b = vreinterpretq_s32_m128i(_b);
-    // Interleave using vshrn/vmovn
-    // [a0|a2|a4|a6|b0|b2|b4|b6]
-    // [a1|a3|a5|a7|b1|b3|b5|b7]
-    int16x8_t ab0246 = vcombine_s16(vmovn_s32(a), vmovn_s32(b));
-    int16x8_t ab1357 = vcombine_s16(vshrn_n_s32(a, 16), vshrn_n_s32(b, 16));
-    // Saturated subtract
-    return vreinterpretq_m128i_s16(vqsubq_s16(ab0246, ab1357));
+    vst1q_s64((int64_t *) p, vreinterpretq_s64_m128i(a));
 #endif
 }
 
-// Computes pairwise add of each argument as a 32-bit signed or unsigned integer
-// values a and b.
-FORCE_INLINE __m128i _mm_hadd_epi32(__m128i _a, __m128i _b)
+// Store 32-bit integer a into memory using a non-temporal hint to minimize
+// cache pollution. If the cache line containing address mem_addr is already in
+// the cache, the cache will be updated.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si32
+FORCE_INLINE void _mm_stream_si32(int *p, int a)
+{
+    vst1q_lane_s32((int32_t *) p, vdupq_n_s32(a), 0);
+}
+
+// Store 64-bit integer a into memory using a non-temporal hint to minimize
+// cache pollution. If the cache line containing address mem_addr is already in
+// the cache, the cache will be updated.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si64
+FORCE_INLINE void _mm_stream_si64(__int64 *p, __int64 a)
+{
+    vst1_s64((int64_t *) p, vdup_n_s64((int64_t) a));
+}
+
+// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and
+// store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi16
+FORCE_INLINE __m128i _mm_sub_epi16(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s16(
+        vsubq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)));
+}
+
+// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and
+// store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi32
+FORCE_INLINE __m128i _mm_sub_epi32(__m128i a, __m128i b)
 {
-    int32x4_t a = vreinterpretq_s32_m128i(_a);
-    int32x4_t b = vreinterpretq_s32_m128i(_b);
     return vreinterpretq_m128i_s32(
-        vcombine_s32(vpadd_s32(vget_low_s32(a), vget_high_s32(a)),
-                     vpadd_s32(vget_low_s32(b), vget_high_s32(b))));
+        vsubq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
 }
 
-// Computes pairwise difference of each argument as a 32-bit signed or unsigned
-// integer values a and b.
-FORCE_INLINE __m128i _mm_hsub_epi32(__m128i _a, __m128i _b)
+// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and
+// store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi64
+FORCE_INLINE __m128i _mm_sub_epi64(__m128i a, __m128i b)
 {
-    int64x2_t a = vreinterpretq_s64_m128i(_a);
-    int64x2_t b = vreinterpretq_s64_m128i(_b);
-    // Interleave using vshrn/vmovn
-    // [a0|a2|b0|b2]
-    // [a1|a2|b1|b3]
-    int32x4_t ab02 = vcombine_s32(vmovn_s64(a), vmovn_s64(b));
-    int32x4_t ab13 = vcombine_s32(vshrn_n_s64(a, 32), vshrn_n_s64(b, 32));
-    // Subtract
-    return vreinterpretq_m128i_s32(vsubq_s32(ab02, ab13));
+    return vreinterpretq_m128i_s64(
+        vsubq_s64(vreinterpretq_s64_m128i(a), vreinterpretq_s64_m128i(b)));
 }
 
-// Kahan summation for accurate summation of floating-point numbers.
-// http://blog.zachbjornson.com/2019/08/11/fast-float-summation.html
-FORCE_INLINE void sse2neon_kadd_f32(float *sum, float *c, float y)
+// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and
+// store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi8
+FORCE_INLINE __m128i _mm_sub_epi8(__m128i a, __m128i b)
 {
-    y -= *c;
-    float t = *sum + y;
-    *c = (t - *sum) - y;
-    *sum = t;
+    return vreinterpretq_m128i_s8(
+        vsubq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b)));
 }
 
-// Conditionally multiply the packed single-precision (32-bit) floating-point
-// elements in a and b using the high 4 bits in imm8, sum the four products,
-// and conditionally store the sum in dst using the low 4 bits of imm.
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dp_ps
-FORCE_INLINE __m128 _mm_dp_ps(__m128 a, __m128 b, const int imm)
+// Subtract packed double-precision (64-bit) floating-point elements in b from
+// packed double-precision (64-bit) floating-point elements in a, and store the
+// results in dst.
+//  https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sub_pd
+FORCE_INLINE __m128d _mm_sub_pd(__m128d a, __m128d b)
 {
-#if defined(__aarch64__)
-    /* shortcuts */
-    if (imm == 0xFF) {
-        return _mm_set1_ps(vaddvq_f32(_mm_mul_ps(a, b)));
-    }
-    if (imm == 0x7F) {
-        float32x4_t m = _mm_mul_ps(a, b);
-        m[3] = 0;
-        return _mm_set1_ps(vaddvq_f32(m));
-    }
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128d_f64(
+        vsubq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
+#else
+    double a0 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
+    double a1 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
+    double b0 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
+    double b1 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 1));
+    double c[2];
+    c[0] = a0 - b0;
+    c[1] = a1 - b1;
+    return vld1q_f32((float32_t *) c);
 #endif
-
-    float s = 0, c = 0;
-    float32x4_t f32a = vreinterpretq_f32_m128(a);
-    float32x4_t f32b = vreinterpretq_f32_m128(b);
-
-    /* To improve the accuracy of floating-point summation, Kahan algorithm
-     * is used for each operation.
-     */
-    if (imm & (1 << 4))
-        sse2neon_kadd_f32(&s, &c, f32a[0] * f32b[0]);
-    if (imm & (1 << 5))
-        sse2neon_kadd_f32(&s, &c, f32a[1] * f32b[1]);
-    if (imm & (1 << 6))
-        sse2neon_kadd_f32(&s, &c, f32a[2] * f32b[2]);
-    if (imm & (1 << 7))
-        sse2neon_kadd_f32(&s, &c, f32a[3] * f32b[3]);
-    s += c;
-
-    float32x4_t res = {
-        (imm & 0x1) ? s : 0,
-        (imm & 0x2) ? s : 0,
-        (imm & 0x4) ? s : 0,
-        (imm & 0x8) ? s : 0,
-    };
-    return vreinterpretq_m128_f32(res);
 }
 
-/* Compare operations */
-
-// Compares for less than
-// https://msdn.microsoft.com/en-us/library/vstudio/f330yhc8(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_cmplt_ps(__m128 a, __m128 b)
+// Subtract the lower double-precision (64-bit) floating-point element in b from
+// the lower double-precision (64-bit) floating-point element in a, store the
+// result in the lower element of dst, and copy the upper element from a to the
+// upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_sd
+FORCE_INLINE __m128d _mm_sub_sd(__m128d a, __m128d b)
 {
-    return vreinterpretq_m128_u32(
-        vcltq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
+    return _mm_move_sd(a, _mm_sub_pd(a, b));
 }
 
-// Compares for less than
-// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/fy94wye7(v=vs.100)
-FORCE_INLINE __m128 _mm_cmplt_ss(__m128 a, __m128 b)
+// Subtract 64-bit integer b from 64-bit integer a, and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_si64
+FORCE_INLINE __m64 _mm_sub_si64(__m64 a, __m64 b)
 {
-    return _mm_move_ss(a, _mm_cmplt_ps(a, b));
+    return vreinterpret_m64_s64(
+        vsub_s64(vreinterpret_s64_m64(a), vreinterpret_s64_m64(b)));
 }
 
-// Compares for greater than.
-//
-//   r0 := (a0 > b0) ? 0xffffffff : 0x0
-//   r1 := (a1 > b1) ? 0xffffffff : 0x0
-//   r2 := (a2 > b2) ? 0xffffffff : 0x0
-//   r3 := (a3 > b3) ? 0xffffffff : 0x0
-//
-// https://msdn.microsoft.com/en-us/library/vstudio/11dy102s(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_cmpgt_ps(__m128 a, __m128 b)
+// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a
+// using saturation, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epi16
+FORCE_INLINE __m128i _mm_subs_epi16(__m128i a, __m128i b)
 {
-    return vreinterpretq_m128_u32(
-        vcgtq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
+    return vreinterpretq_m128i_s16(
+        vqsubq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)));
 }
 
-// Compares for greater than.
-// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/1xyyyy9e(v=vs.100)
-FORCE_INLINE __m128 _mm_cmpgt_ss(__m128 a, __m128 b)
+// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a
+// using saturation, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epi8
+FORCE_INLINE __m128i _mm_subs_epi8(__m128i a, __m128i b)
 {
-    return _mm_move_ss(a, _mm_cmpgt_ps(a, b));
+    return vreinterpretq_m128i_s8(
+        vqsubq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b)));
 }
 
-// Compares for greater than or equal.
-// https://msdn.microsoft.com/en-us/library/vstudio/fs813y2t(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_cmpge_ps(__m128 a, __m128 b)
+// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit
+// integers in a using saturation, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epu16
+FORCE_INLINE __m128i _mm_subs_epu16(__m128i a, __m128i b)
 {
-    return vreinterpretq_m128_u32(
-        vcgeq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
+    return vreinterpretq_m128i_u16(
+        vqsubq_u16(vreinterpretq_u16_m128i(a), vreinterpretq_u16_m128i(b)));
 }
 
-// Compares for greater than or equal.
-// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/kesh3ddc(v=vs.100)
-FORCE_INLINE __m128 _mm_cmpge_ss(__m128 a, __m128 b)
+// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit
+// integers in a using saturation, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epu8
+FORCE_INLINE __m128i _mm_subs_epu8(__m128i a, __m128i b)
 {
-    return _mm_move_ss(a, _mm_cmpge_ps(a, b));
+    return vreinterpretq_m128i_u8(
+        vqsubq_u8(vreinterpretq_u8_m128i(a), vreinterpretq_u8_m128i(b)));
 }
 
-// Compares for less than or equal.
-//
-//   r0 := (a0 <= b0) ? 0xffffffff : 0x0
-//   r1 := (a1 <= b1) ? 0xffffffff : 0x0
-//   r2 := (a2 <= b2) ? 0xffffffff : 0x0
-//   r3 := (a3 <= b3) ? 0xffffffff : 0x0
-//
-// https://msdn.microsoft.com/en-us/library/vstudio/1s75w83z(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_cmple_ps(__m128 a, __m128 b)
+#define _mm_ucomieq_sd _mm_comieq_sd
+#define _mm_ucomige_sd _mm_comige_sd
+#define _mm_ucomigt_sd _mm_comigt_sd
+#define _mm_ucomile_sd _mm_comile_sd
+#define _mm_ucomilt_sd _mm_comilt_sd
+#define _mm_ucomineq_sd _mm_comineq_sd
+
+// Return vector of type __m128d with undefined elements.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_undefined_pd
+FORCE_INLINE __m128d _mm_undefined_pd(void)
 {
-    return vreinterpretq_m128_u32(
-        vcleq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wuninitialized"
+#endif
+    __m128d a;
+#if defined(_MSC_VER) && !defined(__clang__)
+    a = _mm_setzero_pd();
+#endif
+    return a;
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
 }
 
-// Compares for less than or equal.
-// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/a7x0hbhw(v=vs.100)
-FORCE_INLINE __m128 _mm_cmple_ss(__m128 a, __m128 b)
+// Unpack and interleave 16-bit integers from the high half of a and b, and
+// store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi16
+FORCE_INLINE __m128i _mm_unpackhi_epi16(__m128i a, __m128i b)
 {
-    return _mm_move_ss(a, _mm_cmple_ps(a, b));
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128i_s16(
+        vzip2q_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)));
+#else
+    int16x4_t a1 = vget_high_s16(vreinterpretq_s16_m128i(a));
+    int16x4_t b1 = vget_high_s16(vreinterpretq_s16_m128i(b));
+    int16x4x2_t result = vzip_s16(a1, b1);
+    return vreinterpretq_m128i_s16(vcombine_s16(result.val[0], result.val[1]));
+#endif
 }
 
-// Compares for equality.
-// https://msdn.microsoft.com/en-us/library/vstudio/36aectz5(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_cmpeq_ps(__m128 a, __m128 b)
+// Unpack and interleave 32-bit integers from the high half of a and b, and
+// store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi32
+FORCE_INLINE __m128i _mm_unpackhi_epi32(__m128i a, __m128i b)
 {
-    return vreinterpretq_m128_u32(
-        vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128i_s32(
+        vzip2q_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
+#else
+    int32x2_t a1 = vget_high_s32(vreinterpretq_s32_m128i(a));
+    int32x2_t b1 = vget_high_s32(vreinterpretq_s32_m128i(b));
+    int32x2x2_t result = vzip_s32(a1, b1);
+    return vreinterpretq_m128i_s32(vcombine_s32(result.val[0], result.val[1]));
+#endif
 }
 
-// Compares for equality.
-// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/k423z28e(v=vs.100)
-FORCE_INLINE __m128 _mm_cmpeq_ss(__m128 a, __m128 b)
+// Unpack and interleave 64-bit integers from the high half of a and b, and
+// store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi64
+FORCE_INLINE __m128i _mm_unpackhi_epi64(__m128i a, __m128i b)
 {
-    return _mm_move_ss(a, _mm_cmpeq_ps(a, b));
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128i_s64(
+        vzip2q_s64(vreinterpretq_s64_m128i(a), vreinterpretq_s64_m128i(b)));
+#else
+    int64x1_t a_h = vget_high_s64(vreinterpretq_s64_m128i(a));
+    int64x1_t b_h = vget_high_s64(vreinterpretq_s64_m128i(b));
+    return vreinterpretq_m128i_s64(vcombine_s64(a_h, b_h));
+#endif
 }
 
-// Compares for inequality.
-// https://msdn.microsoft.com/en-us/library/sf44thbx(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_cmpneq_ps(__m128 a, __m128 b)
+// Unpack and interleave 8-bit integers from the high half of a and b, and store
+// the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi8
+FORCE_INLINE __m128i _mm_unpackhi_epi8(__m128i a, __m128i b)
 {
-    return vreinterpretq_m128_u32(vmvnq_u32(
-        vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))));
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128i_s8(
+        vzip2q_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b)));
+#else
+    int8x8_t a1 =
+        vreinterpret_s8_s16(vget_high_s16(vreinterpretq_s16_m128i(a)));
+    int8x8_t b1 =
+        vreinterpret_s8_s16(vget_high_s16(vreinterpretq_s16_m128i(b)));
+    int8x8x2_t result = vzip_s8(a1, b1);
+    return vreinterpretq_m128i_s8(vcombine_s8(result.val[0], result.val[1]));
+#endif
 }
 
-// Compares for inequality.
-// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/ekya8fh4(v=vs.100)
-FORCE_INLINE __m128 _mm_cmpneq_ss(__m128 a, __m128 b)
+// Unpack and interleave double-precision (64-bit) floating-point elements from
+// the high half of a and b, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_pd
+FORCE_INLINE __m128d _mm_unpackhi_pd(__m128d a, __m128d b)
 {
-    return _mm_move_ss(a, _mm_cmpneq_ps(a, b));
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128d_f64(
+        vzip2q_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
+#else
+    return vreinterpretq_m128d_s64(
+        vcombine_s64(vget_high_s64(vreinterpretq_s64_m128d(a)),
+                     vget_high_s64(vreinterpretq_s64_m128d(b))));
+#endif
 }
 
-// Compares for not greater than or equal.
-// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/wsexys62(v=vs.100)
-FORCE_INLINE __m128 _mm_cmpnge_ps(__m128 a, __m128 b)
+// Unpack and interleave 16-bit integers from the low half of a and b, and store
+// the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi16
+FORCE_INLINE __m128i _mm_unpacklo_epi16(__m128i a, __m128i b)
 {
-    return _mm_cmplt_ps(a, b);
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128i_s16(
+        vzip1q_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)));
+#else
+    int16x4_t a1 = vget_low_s16(vreinterpretq_s16_m128i(a));
+    int16x4_t b1 = vget_low_s16(vreinterpretq_s16_m128i(b));
+    int16x4x2_t result = vzip_s16(a1, b1);
+    return vreinterpretq_m128i_s16(vcombine_s16(result.val[0], result.val[1]));
+#endif
 }
 
-// Compares for not greater than or equal.
-// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/fk2y80s8(v=vs.100)
-FORCE_INLINE __m128 _mm_cmpnge_ss(__m128 a, __m128 b)
+// Unpack and interleave 32-bit integers from the low half of a and b, and store
+// the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi32
+FORCE_INLINE __m128i _mm_unpacklo_epi32(__m128i a, __m128i b)
 {
-    return _mm_cmplt_ss(a, b);
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128i_s32(
+        vzip1q_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
+#else
+    int32x2_t a1 = vget_low_s32(vreinterpretq_s32_m128i(a));
+    int32x2_t b1 = vget_low_s32(vreinterpretq_s32_m128i(b));
+    int32x2x2_t result = vzip_s32(a1, b1);
+    return vreinterpretq_m128i_s32(vcombine_s32(result.val[0], result.val[1]));
+#endif
 }
 
-// Compares for not greater than.
-// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/d0xh7w0s(v=vs.100)
-FORCE_INLINE __m128 _mm_cmpngt_ps(__m128 a, __m128 b)
+// Unpack and interleave 64-bit integers from the low half of a and b, and store
+// the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi64
+FORCE_INLINE __m128i _mm_unpacklo_epi64(__m128i a, __m128i b)
 {
-    return _mm_cmple_ps(a, b);
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128i_s64(
+        vzip1q_s64(vreinterpretq_s64_m128i(a), vreinterpretq_s64_m128i(b)));
+#else
+    int64x1_t a_l = vget_low_s64(vreinterpretq_s64_m128i(a));
+    int64x1_t b_l = vget_low_s64(vreinterpretq_s64_m128i(b));
+    return vreinterpretq_m128i_s64(vcombine_s64(a_l, b_l));
+#endif
 }
 
-// Compares for not greater than.
-// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/z7x9ydwh(v=vs.100)
-FORCE_INLINE __m128 _mm_cmpngt_ss(__m128 a, __m128 b)
+// Unpack and interleave 8-bit integers from the low half of a and b, and store
+// the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi8
+FORCE_INLINE __m128i _mm_unpacklo_epi8(__m128i a, __m128i b)
 {
-    return _mm_cmple_ss(a, b);
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128i_s8(
+        vzip1q_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b)));
+#else
+    int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(vreinterpretq_s16_m128i(a)));
+    int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(vreinterpretq_s16_m128i(b)));
+    int8x8x2_t result = vzip_s8(a1, b1);
+    return vreinterpretq_m128i_s8(vcombine_s8(result.val[0], result.val[1]));
+#endif
 }
 
-// Compares for not less than or equal.
-// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/6a330kxw(v=vs.100)
-FORCE_INLINE __m128 _mm_cmpnle_ps(__m128 a, __m128 b)
+// Unpack and interleave double-precision (64-bit) floating-point elements from
+// the low half of a and b, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_pd
+FORCE_INLINE __m128d _mm_unpacklo_pd(__m128d a, __m128d b)
 {
-    return _mm_cmpgt_ps(a, b);
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128d_f64(
+        vzip1q_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
+#else
+    return vreinterpretq_m128d_s64(
+        vcombine_s64(vget_low_s64(vreinterpretq_s64_m128d(a)),
+                     vget_low_s64(vreinterpretq_s64_m128d(b))));
+#endif
 }
 
-// Compares for not less than or equal.
-// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/z7x9ydwh(v=vs.100)
-FORCE_INLINE __m128 _mm_cmpnle_ss(__m128 a, __m128 b)
+// Compute the bitwise XOR of packed double-precision (64-bit) floating-point
+// elements in a and b, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_pd
+FORCE_INLINE __m128d _mm_xor_pd(__m128d a, __m128d b)
 {
-    return _mm_cmpgt_ss(a, b);
+    return vreinterpretq_m128d_s64(
+        veorq_s64(vreinterpretq_s64_m128d(a), vreinterpretq_s64_m128d(b)));
 }
 
-// Compares for not less than.
-// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/4686bbdw(v=vs.100)
-FORCE_INLINE __m128 _mm_cmpnlt_ps(__m128 a, __m128 b)
+// Compute the bitwise XOR of 128 bits (representing integer data) in a and b,
+// and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_si128
+FORCE_INLINE __m128i _mm_xor_si128(__m128i a, __m128i b)
 {
-    return _mm_cmpge_ps(a, b);
+    return vreinterpretq_m128i_s32(
+        veorq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
 }
 
-// Compares for not less than.
-// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/56b9z2wf(v=vs.100)
-FORCE_INLINE __m128 _mm_cmpnlt_ss(__m128 a, __m128 b)
-{
-    return _mm_cmpge_ss(a, b);
+/* SSE3 */
+
+// Alternatively add and subtract packed double-precision (64-bit)
+// floating-point elements in a to/from packed elements in b, and store the
+// results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_addsub_pd
+FORCE_INLINE __m128d _mm_addsub_pd(__m128d a, __m128d b)
+{
+    _sse2neon_const __m128d mask = _mm_set_pd(1.0f, -1.0f);
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128d_f64(vfmaq_f64(vreinterpretq_f64_m128d(a),
+                                             vreinterpretq_f64_m128d(b),
+                                             vreinterpretq_f64_m128d(mask)));
+#else
+    return _mm_add_pd(_mm_mul_pd(b, mask), a);
+#endif
 }
 
-// Compares the 16 signed or unsigned 8-bit integers in a and the 16 signed or
-// unsigned 8-bit integers in b for equality.
-// https://msdn.microsoft.com/en-us/library/windows/desktop/bz5xk21a(v=vs.90).aspx
-FORCE_INLINE __m128i _mm_cmpeq_epi8(__m128i a, __m128i b)
+// Alternatively add and subtract packed single-precision (32-bit)
+// floating-point elements in a to/from packed elements in b, and store the
+// results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=addsub_ps
+FORCE_INLINE __m128 _mm_addsub_ps(__m128 a, __m128 b)
 {
-    return vreinterpretq_m128i_u8(
-        vceqq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b)));
+    _sse2neon_const __m128 mask = _mm_setr_ps(-1.0f, 1.0f, -1.0f, 1.0f);
+#if (defined(__aarch64__) || defined(_M_ARM64)) || \
+    defined(__ARM_FEATURE_FMA) /* VFPv4+ */
+    return vreinterpretq_m128_f32(vfmaq_f32(vreinterpretq_f32_m128(a),
+                                            vreinterpretq_f32_m128(mask),
+                                            vreinterpretq_f32_m128(b)));
+#else
+    return _mm_add_ps(_mm_mul_ps(b, mask), a);
+#endif
 }
 
-// Compares the 8 signed or unsigned 16-bit integers in a and the 8 signed or
-// unsigned 16-bit integers in b for equality.
-// https://msdn.microsoft.com/en-us/library/2ay060te(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_cmpeq_epi16(__m128i a, __m128i b)
+// Horizontally add adjacent pairs of double-precision (64-bit) floating-point
+// elements in a and b, and pack the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_pd
+FORCE_INLINE __m128d _mm_hadd_pd(__m128d a, __m128d b)
 {
-    return vreinterpretq_m128i_u16(
-        vceqq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)));
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128d_f64(
+        vpaddq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
+#else
+    double a0 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
+    double a1 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
+    double b0 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
+    double b1 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 1));
+    double c[] = {a0 + a1, b0 + b1};
+    return vreinterpretq_m128d_u64(vld1q_u64((uint64_t *) c));
+#endif
 }
 
-// Compare packed 32-bit integers in a and b for equality, and store the results
-// in dst
-FORCE_INLINE __m128i _mm_cmpeq_epi32(__m128i a, __m128i b)
+// Horizontally add adjacent pairs of single-precision (32-bit) floating-point
+// elements in a and b, and pack the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_ps
+FORCE_INLINE __m128 _mm_hadd_ps(__m128 a, __m128 b)
 {
-    return vreinterpretq_m128i_u32(
-        vceqq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128_f32(
+        vpaddq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
+#else
+    float32x2_t a10 = vget_low_f32(vreinterpretq_f32_m128(a));
+    float32x2_t a32 = vget_high_f32(vreinterpretq_f32_m128(a));
+    float32x2_t b10 = vget_low_f32(vreinterpretq_f32_m128(b));
+    float32x2_t b32 = vget_high_f32(vreinterpretq_f32_m128(b));
+    return vreinterpretq_m128_f32(
+        vcombine_f32(vpadd_f32(a10, a32), vpadd_f32(b10, b32)));
+#endif
 }
 
-// Compare packed 64-bit integers in a and b for equality, and store the results
-// in dst
-FORCE_INLINE __m128i _mm_cmpeq_epi64(__m128i a, __m128i b)
+// Horizontally subtract adjacent pairs of double-precision (64-bit)
+// floating-point elements in a and b, and pack the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_pd
+FORCE_INLINE __m128d _mm_hsub_pd(__m128d a, __m128d b)
 {
-#if defined(__aarch64__)
-    return vreinterpretq_m128i_u64(
-        vceqq_u64(vreinterpretq_u64_m128i(a), vreinterpretq_u64_m128i(b)));
+#if defined(__aarch64__) || defined(_M_ARM64)
+    float64x2_t _a = vreinterpretq_f64_m128d(a);
+    float64x2_t _b = vreinterpretq_f64_m128d(b);
+    return vreinterpretq_m128d_f64(
+        vsubq_f64(vuzp1q_f64(_a, _b), vuzp2q_f64(_a, _b)));
 #else
-    // ARMv7 lacks vceqq_u64
-    // (a == b) -> (a_lo == b_lo) && (a_hi == b_hi)
-    uint32x4_t cmp =
-        vceqq_u32(vreinterpretq_u32_m128i(a), vreinterpretq_u32_m128i(b));
-    uint32x4_t swapped = vrev64q_u32(cmp);
-    return vreinterpretq_m128i_u32(vandq_u32(cmp, swapped));
+    double a0 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
+    double a1 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
+    double b0 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
+    double b1 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 1));
+    double c[] = {a0 - a1, b0 - b1};
+    return vreinterpretq_m128d_u64(vld1q_u64((uint64_t *) c));
 #endif
 }
 
-// Compares the 16 signed 8-bit integers in a and the 16 signed 8-bit integers
-// in b for lesser than.
-// https://msdn.microsoft.com/en-us/library/windows/desktop/9s46csht(v=vs.90).aspx
-FORCE_INLINE __m128i _mm_cmplt_epi8(__m128i a, __m128i b)
+// Horizontally subtract adjacent pairs of single-precision (32-bit)
+// floating-point elements in a and b, and pack the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_ps
+FORCE_INLINE __m128 _mm_hsub_ps(__m128 _a, __m128 _b)
 {
-    return vreinterpretq_m128i_u8(
-        vcltq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b)));
+    float32x4_t a = vreinterpretq_f32_m128(_a);
+    float32x4_t b = vreinterpretq_f32_m128(_b);
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128_f32(
+        vsubq_f32(vuzp1q_f32(a, b), vuzp2q_f32(a, b)));
+#else
+    float32x4x2_t c = vuzpq_f32(a, b);
+    return vreinterpretq_m128_f32(vsubq_f32(c.val[0], c.val[1]));
+#endif
 }
 
-// Compares the 16 signed 8-bit integers in a and the 16 signed 8-bit integers
-// in b for greater than.
-//
-//   r0 := (a0 > b0) ? 0xff : 0x0
-//   r1 := (a1 > b1) ? 0xff : 0x0
-//   ...
-//   r15 := (a15 > b15) ? 0xff : 0x0
-//
-// https://msdn.microsoft.com/zh-tw/library/wf45zt2b(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_cmpgt_epi8(__m128i a, __m128i b)
+// Load 128-bits of integer data from unaligned memory into dst. This intrinsic
+// may perform better than _mm_loadu_si128 when the data crosses a cache line
+// boundary.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_lddqu_si128
+#define _mm_lddqu_si128 _mm_loadu_si128
+
+// Load a double-precision (64-bit) floating-point element from memory into both
+// elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loaddup_pd
+#define _mm_loaddup_pd _mm_load1_pd
+
+// Duplicate the low double-precision (64-bit) floating-point element from a,
+// and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movedup_pd
+FORCE_INLINE __m128d _mm_movedup_pd(__m128d a)
 {
-    return vreinterpretq_m128i_u8(
-        vcgtq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b)));
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128d_f64(
+        vdupq_laneq_f64(vreinterpretq_f64_m128d(a), 0));
+#else
+    return vreinterpretq_m128d_u64(
+        vdupq_n_u64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0)));
+#endif
 }
 
-// Compares the 8 signed 16-bit integers in a and the 8 signed 16-bit integers
-// in b for less than.
-//
-//   r0 := (a0 < b0) ? 0xffff : 0x0
-//   r1 := (a1 < b1) ? 0xffff : 0x0
-//   ...
-//   r7 := (a7 < b7) ? 0xffff : 0x0
-//
-// https://technet.microsoft.com/en-us/library/t863edb2(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_cmplt_epi16(__m128i a, __m128i b)
+// Duplicate odd-indexed single-precision (32-bit) floating-point elements
+// from a, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movehdup_ps
+FORCE_INLINE __m128 _mm_movehdup_ps(__m128 a)
 {
-    return vreinterpretq_m128i_u16(
-        vcltq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)));
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128_f32(
+        vtrn2q_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a)));
+#elif defined(_sse2neon_shuffle)
+    return vreinterpretq_m128_f32(vshuffleq_s32(
+        vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a), 1, 1, 3, 3));
+#else
+    float32_t a1 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 1);
+    float32_t a3 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 3);
+    float ALIGN_STRUCT(16) data[4] = {a1, a1, a3, a3};
+    return vreinterpretq_m128_f32(vld1q_f32(data));
+#endif
 }
 
-// Compares the 8 signed 16-bit integers in a and the 8 signed 16-bit integers
-// in b for greater than.
-//
-//   r0 := (a0 > b0) ? 0xffff : 0x0
-//   r1 := (a1 > b1) ? 0xffff : 0x0
-//   ...
-//   r7 := (a7 > b7) ? 0xffff : 0x0
-//
-// https://technet.microsoft.com/en-us/library/xd43yfsa(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_cmpgt_epi16(__m128i a, __m128i b)
+// Duplicate even-indexed single-precision (32-bit) floating-point elements
+// from a, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_moveldup_ps
+FORCE_INLINE __m128 _mm_moveldup_ps(__m128 a)
 {
-    return vreinterpretq_m128i_u16(
-        vcgtq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)));
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128_f32(
+        vtrn1q_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a)));
+#elif defined(_sse2neon_shuffle)
+    return vreinterpretq_m128_f32(vshuffleq_s32(
+        vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a), 0, 0, 2, 2));
+#else
+    float32_t a0 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 0);
+    float32_t a2 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 2);
+    float ALIGN_STRUCT(16) data[4] = {a0, a0, a2, a2};
+    return vreinterpretq_m128_f32(vld1q_f32(data));
+#endif
 }
 
+/* SSSE3 */
 
-// Compares the 4 signed 32-bit integers in a and the 4 signed 32-bit integers
-// in b for less than.
-// https://msdn.microsoft.com/en-us/library/vstudio/4ak0bf5d(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_cmplt_epi32(__m128i a, __m128i b)
+// Compute the absolute value of packed signed 16-bit integers in a, and store
+// the unsigned results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi16
+FORCE_INLINE __m128i _mm_abs_epi16(__m128i a)
 {
-    return vreinterpretq_m128i_u32(
-        vcltq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
+    return vreinterpretq_m128i_s16(vabsq_s16(vreinterpretq_s16_m128i(a)));
 }
 
-// Compares the 4 signed 32-bit integers in a and the 4 signed 32-bit integers
-// in b for greater than.
-// https://msdn.microsoft.com/en-us/library/vstudio/1s9f2z0y(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_cmpgt_epi32(__m128i a, __m128i b)
+// Compute the absolute value of packed signed 32-bit integers in a, and store
+// the unsigned results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi32
+FORCE_INLINE __m128i _mm_abs_epi32(__m128i a)
 {
-    return vreinterpretq_m128i_u32(
-        vcgtq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
+    return vreinterpretq_m128i_s32(vabsq_s32(vreinterpretq_s32_m128i(a)));
 }
 
-// Compares the 2 signed 64-bit integers in a and the 2 signed 64-bit integers
-// in b for greater than.
-FORCE_INLINE __m128i _mm_cmpgt_epi64(__m128i a, __m128i b)
-{
-#if defined(__aarch64__)
-    return vreinterpretq_m128i_u64(
-        vcgtq_s64(vreinterpretq_s64_m128i(a), vreinterpretq_s64_m128i(b)));
-#else
-    // ARMv7 lacks vcgtq_s64.
-    // This is based off of Clang's SSE2 polyfill:
-    // (a > b) -> ((a_hi > b_hi) || (a_lo > b_lo && a_hi == b_hi))
-
-    // Mask the sign bit out since we need a signed AND an unsigned comparison
-    // and it is ugly to try and split them.
-    int32x4_t mask = vreinterpretq_s32_s64(vdupq_n_s64(0x80000000ull));
-    int32x4_t a_mask = veorq_s32(vreinterpretq_s32_m128i(a), mask);
-    int32x4_t b_mask = veorq_s32(vreinterpretq_s32_m128i(b), mask);
-    // Check if a > b
-    int64x2_t greater = vreinterpretq_s64_u32(vcgtq_s32(a_mask, b_mask));
-    // Copy upper mask to lower mask
-    // a_hi > b_hi
-    int64x2_t gt_hi = vshrq_n_s64(greater, 63);
-    // Copy lower mask to upper mask
-    // a_lo > b_lo
-    int64x2_t gt_lo = vsliq_n_s64(greater, greater, 32);
-    // Compare for equality
-    int64x2_t equal = vreinterpretq_s64_u32(vceqq_s32(a_mask, b_mask));
-    // Copy upper mask to lower mask
-    // a_hi == b_hi
-    int64x2_t eq_hi = vshrq_n_s64(equal, 63);
-    // a_hi > b_hi || (a_lo > b_lo && a_hi == b_hi)
-    int64x2_t ret = vorrq_s64(gt_hi, vandq_s64(gt_lo, eq_hi));
-    return vreinterpretq_m128i_s64(ret);
-#endif
-}
-
-// Compares the four 32-bit floats in a and b to check if any values are NaN.
-// Ordered compare between each value returns true for "orderable" and false for
-// "not orderable" (NaN).
-// https://msdn.microsoft.com/en-us/library/vstudio/0h9w00fx(v=vs.100).aspx see
-// also:
-// http://stackoverflow.com/questions/8627331/what-does-ordered-unordered-comparison-mean
-// http://stackoverflow.com/questions/29349621/neon-isnanval-intrinsics
-FORCE_INLINE __m128 _mm_cmpord_ps(__m128 a, __m128 b)
+// Compute the absolute value of packed signed 8-bit integers in a, and store
+// the unsigned results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi8
+FORCE_INLINE __m128i _mm_abs_epi8(__m128i a)
 {
-    // Note: NEON does not have ordered compare builtin
-    // Need to compare a eq a and b eq b to check for NaN
-    // Do AND of results to get final
-    uint32x4_t ceqaa =
-        vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a));
-    uint32x4_t ceqbb =
-        vceqq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b));
-    return vreinterpretq_m128_u32(vandq_u32(ceqaa, ceqbb));
+    return vreinterpretq_m128i_s8(vabsq_s8(vreinterpretq_s8_m128i(a)));
 }
 
-// Compares for ordered.
-// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/343t62da(v=vs.100)
-FORCE_INLINE __m128 _mm_cmpord_ss(__m128 a, __m128 b)
+// Compute the absolute value of packed signed 16-bit integers in a, and store
+// the unsigned results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_pi16
+FORCE_INLINE __m64 _mm_abs_pi16(__m64 a)
 {
-    return _mm_move_ss(a, _mm_cmpord_ps(a, b));
+    return vreinterpret_m64_s16(vabs_s16(vreinterpret_s16_m64(a)));
 }
 
-// Compares for unordered.
-// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/khy6fk1t(v=vs.100)
-FORCE_INLINE __m128 _mm_cmpunord_ps(__m128 a, __m128 b)
+// Compute the absolute value of packed signed 32-bit integers in a, and store
+// the unsigned results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_pi32
+FORCE_INLINE __m64 _mm_abs_pi32(__m64 a)
 {
-    uint32x4_t f32a =
-        vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a));
-    uint32x4_t f32b =
-        vceqq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b));
-    return vreinterpretq_m128_u32(vmvnq_u32(vandq_u32(f32a, f32b)));
+    return vreinterpret_m64_s32(vabs_s32(vreinterpret_s32_m64(a)));
 }
 
-// Compares for unordered.
-// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/2as2387b(v=vs.100)
-FORCE_INLINE __m128 _mm_cmpunord_ss(__m128 a, __m128 b)
+// Compute the absolute value of packed signed 8-bit integers in a, and store
+// the unsigned results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_pi8
+FORCE_INLINE __m64 _mm_abs_pi8(__m64 a)
 {
-    return _mm_move_ss(a, _mm_cmpunord_ps(a, b));
+    return vreinterpret_m64_s8(vabs_s8(vreinterpret_s8_m64(a)));
 }
 
-// Compares the lower single-precision floating point scalar values of a and b
-// using a less than operation. :
-// https://msdn.microsoft.com/en-us/library/2kwe606b(v=vs.90).aspx Important
-// note!! The documentation on MSDN is incorrect!  If either of the values is a
-// NAN the docs say you will get a one, but in fact, it will return a zero!!
-FORCE_INLINE int _mm_comilt_ss(__m128 a, __m128 b)
+// Concatenate 16-byte blocks in a and b into a 32-byte temporary result, shift
+// the result right by imm8 bytes, and store the low 16 bytes in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi8
+#if defined(__GNUC__) && !defined(__clang__)
+#define _mm_alignr_epi8(a, b, imm)                                            \
+    __extension__({                                                           \
+        uint8x16_t _a = vreinterpretq_u8_m128i(a);                            \
+        uint8x16_t _b = vreinterpretq_u8_m128i(b);                            \
+        __m128i ret;                                                          \
+        if (_sse2neon_unlikely((imm) & ~31))                                  \
+            ret = vreinterpretq_m128i_u8(vdupq_n_u8(0));                      \
+        else if (imm >= 16)                                                   \
+            ret = _mm_srli_si128(a, imm >= 16 ? imm - 16 : 0);                \
+        else                                                                  \
+            ret =                                                             \
+                vreinterpretq_m128i_u8(vextq_u8(_b, _a, imm < 16 ? imm : 0)); \
+        ret;                                                                  \
+    })
+
+#else
+#define _mm_alignr_epi8(a, b, imm)                                          \
+    _sse2neon_define2(                                                      \
+        __m128i, a, b, uint8x16_t __a = vreinterpretq_u8_m128i(_a);         \
+        uint8x16_t __b = vreinterpretq_u8_m128i(_b); __m128i ret;           \
+        if (_sse2neon_unlikely((imm) & ~31)) ret =                          \
+            vreinterpretq_m128i_u8(vdupq_n_u8(0));                          \
+        else if (imm >= 16) ret =                                           \
+            _mm_srli_si128(_a, imm >= 16 ? imm - 16 : 0);                   \
+        else ret =                                                          \
+            vreinterpretq_m128i_u8(vextq_u8(__b, __a, imm < 16 ? imm : 0)); \
+        _sse2neon_return(ret);)
+
+#endif
+
+// Concatenate 8-byte blocks in a and b into a 16-byte temporary result, shift
+// the result right by imm8 bytes, and store the low 8 bytes in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_pi8
+#define _mm_alignr_pi8(a, b, imm)                                           \
+    _sse2neon_define2(                                                      \
+        __m64, a, b, __m64 ret; if (_sse2neon_unlikely((imm) >= 16)) {      \
+            ret = vreinterpret_m64_s8(vdup_n_s8(0));                        \
+        } else {                                                            \
+            uint8x8_t tmp_low;                                              \
+            uint8x8_t tmp_high;                                             \
+            if ((imm) >= 8) {                                               \
+                const int idx = (imm) -8;                                   \
+                tmp_low = vreinterpret_u8_m64(_a);                          \
+                tmp_high = vdup_n_u8(0);                                    \
+                ret = vreinterpret_m64_u8(vext_u8(tmp_low, tmp_high, idx)); \
+            } else {                                                        \
+                const int idx = (imm);                                      \
+                tmp_low = vreinterpret_u8_m64(_b);                          \
+                tmp_high = vreinterpret_u8_m64(_a);                         \
+                ret = vreinterpret_m64_u8(vext_u8(tmp_low, tmp_high, idx)); \
+            }                                                               \
+        } _sse2neon_return(ret);)
+
+// Horizontally add adjacent pairs of 16-bit integers in a and b, and pack the
+// signed 16-bit results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_epi16
+FORCE_INLINE __m128i _mm_hadd_epi16(__m128i _a, __m128i _b)
 {
-    uint32x4_t a_not_nan =
-        vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a));
-    uint32x4_t b_not_nan =
-        vceqq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b));
-    uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan);
-    uint32x4_t a_lt_b =
-        vcltq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b));
-    return (vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_lt_b), 0) != 0) ? 1 : 0;
+    int16x8_t a = vreinterpretq_s16_m128i(_a);
+    int16x8_t b = vreinterpretq_s16_m128i(_b);
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128i_s16(vpaddq_s16(a, b));
+#else
+    return vreinterpretq_m128i_s16(
+        vcombine_s16(vpadd_s16(vget_low_s16(a), vget_high_s16(a)),
+                     vpadd_s16(vget_low_s16(b), vget_high_s16(b))));
+#endif
 }
 
-// Compares the lower single-precision floating point scalar values of a and b
-// using a greater than operation. :
-// https://msdn.microsoft.com/en-us/library/b0738e0t(v=vs.100).aspx
-FORCE_INLINE int _mm_comigt_ss(__m128 a, __m128 b)
+// Horizontally add adjacent pairs of 32-bit integers in a and b, and pack the
+// signed 32-bit results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_epi32
+FORCE_INLINE __m128i _mm_hadd_epi32(__m128i _a, __m128i _b)
 {
-    // return vgetq_lane_u32(vcgtq_f32(vreinterpretq_f32_m128(a),
-    // vreinterpretq_f32_m128(b)), 0);
-    uint32x4_t a_not_nan =
-        vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a));
-    uint32x4_t b_not_nan =
-        vceqq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b));
-    uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan);
-    uint32x4_t a_gt_b =
-        vcgtq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b));
-    return (vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0) ? 1 : 0;
+    int32x4_t a = vreinterpretq_s32_m128i(_a);
+    int32x4_t b = vreinterpretq_s32_m128i(_b);
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128i_s32(vpaddq_s32(a, b));
+#else
+    return vreinterpretq_m128i_s32(
+        vcombine_s32(vpadd_s32(vget_low_s32(a), vget_high_s32(a)),
+                     vpadd_s32(vget_low_s32(b), vget_high_s32(b))));
+#endif
 }
 
-// Compares the lower single-precision floating point scalar values of a and b
-// using a less than or equal operation. :
-// https://msdn.microsoft.com/en-us/library/1w4t7c57(v=vs.90).aspx
-FORCE_INLINE int _mm_comile_ss(__m128 a, __m128 b)
+// Horizontally add adjacent pairs of 16-bit integers in a and b, and pack the
+// signed 16-bit results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_pi16
+FORCE_INLINE __m64 _mm_hadd_pi16(__m64 a, __m64 b)
 {
-    // return vgetq_lane_u32(vcleq_f32(vreinterpretq_f32_m128(a),
-    // vreinterpretq_f32_m128(b)), 0);
-    uint32x4_t a_not_nan =
-        vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a));
-    uint32x4_t b_not_nan =
-        vceqq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b));
-    uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan);
-    uint32x4_t a_le_b =
-        vcleq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b));
-    return (vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_le_b), 0) != 0) ? 1 : 0;
+    return vreinterpret_m64_s16(
+        vpadd_s16(vreinterpret_s16_m64(a), vreinterpret_s16_m64(b)));
 }
 
-// Compares the lower single-precision floating point scalar values of a and b
-// using a greater than or equal operation. :
-// https://msdn.microsoft.com/en-us/library/8t80des6(v=vs.100).aspx
-FORCE_INLINE int _mm_comige_ss(__m128 a, __m128 b)
+// Horizontally add adjacent pairs of 32-bit integers in a and b, and pack the
+// signed 32-bit results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_pi32
+FORCE_INLINE __m64 _mm_hadd_pi32(__m64 a, __m64 b)
 {
-    // return vgetq_lane_u32(vcgeq_f32(vreinterpretq_f32_m128(a),
-    // vreinterpretq_f32_m128(b)), 0);
-    uint32x4_t a_not_nan =
-        vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a));
-    uint32x4_t b_not_nan =
-        vceqq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b));
-    uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan);
-    uint32x4_t a_ge_b =
-        vcgeq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b));
-    return (vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0) ? 1 : 0;
+    return vreinterpret_m64_s32(
+        vpadd_s32(vreinterpret_s32_m64(a), vreinterpret_s32_m64(b)));
 }
 
-// Compares the lower single-precision floating point scalar values of a and b
-// using an equality operation. :
-// https://msdn.microsoft.com/en-us/library/93yx2h2b(v=vs.100).aspx
-FORCE_INLINE int _mm_comieq_ss(__m128 a, __m128 b)
+// Horizontally add adjacent pairs of signed 16-bit integers in a and b using
+// saturation, and pack the signed 16-bit results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadds_epi16
+FORCE_INLINE __m128i _mm_hadds_epi16(__m128i _a, __m128i _b)
 {
-    // return vgetq_lane_u32(vceqq_f32(vreinterpretq_f32_m128(a),
-    // vreinterpretq_f32_m128(b)), 0);
-    uint32x4_t a_not_nan =
-        vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a));
-    uint32x4_t b_not_nan =
-        vceqq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b));
-    uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan);
-    uint32x4_t a_eq_b =
-        vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b));
-    return (vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_eq_b), 0) != 0) ? 1 : 0;
+#if defined(__aarch64__) || defined(_M_ARM64)
+    int16x8_t a = vreinterpretq_s16_m128i(_a);
+    int16x8_t b = vreinterpretq_s16_m128i(_b);
+    return vreinterpretq_s64_s16(
+        vqaddq_s16(vuzp1q_s16(a, b), vuzp2q_s16(a, b)));
+#else
+    int32x4_t a = vreinterpretq_s32_m128i(_a);
+    int32x4_t b = vreinterpretq_s32_m128i(_b);
+    // Interleave using vshrn/vmovn
+    // [a0|a2|a4|a6|b0|b2|b4|b6]
+    // [a1|a3|a5|a7|b1|b3|b5|b7]
+    int16x8_t ab0246 = vcombine_s16(vmovn_s32(a), vmovn_s32(b));
+    int16x8_t ab1357 = vcombine_s16(vshrn_n_s32(a, 16), vshrn_n_s32(b, 16));
+    // Saturated add
+    return vreinterpretq_m128i_s16(vqaddq_s16(ab0246, ab1357));
+#endif
 }
 
-// Compares the lower single-precision floating point scalar values of a and b
-// using an inequality operation. :
-// https://msdn.microsoft.com/en-us/library/bafh5e0a(v=vs.90).aspx
-FORCE_INLINE int _mm_comineq_ss(__m128 a, __m128 b)
+// Horizontally add adjacent pairs of signed 16-bit integers in a and b using
+// saturation, and pack the signed 16-bit results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadds_pi16
+FORCE_INLINE __m64 _mm_hadds_pi16(__m64 _a, __m64 _b)
 {
-    // return !vgetq_lane_u32(vceqq_f32(vreinterpretq_f32_m128(a),
-    // vreinterpretq_f32_m128(b)), 0);
-    uint32x4_t a_not_nan =
-        vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a));
-    uint32x4_t b_not_nan =
-        vceqq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b));
-    uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan));
-    uint32x4_t a_neq_b = vmvnq_u32(
-        vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
-    return (vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_neq_b), 0) != 0) ? 1 : 0;
+    int16x4_t a = vreinterpret_s16_m64(_a);
+    int16x4_t b = vreinterpret_s16_m64(_b);
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpret_s64_s16(vqadd_s16(vuzp1_s16(a, b), vuzp2_s16(a, b)));
+#else
+    int16x4x2_t res = vuzp_s16(a, b);
+    return vreinterpret_s64_s16(vqadd_s16(res.val[0], res.val[1]));
+#endif
 }
 
-// according to the documentation, these intrinsics behave the same as the
-// non-'u' versions.  We'll just alias them here.
-#define _mm_ucomilt_ss _mm_comilt_ss
-#define _mm_ucomile_ss _mm_comile_ss
-#define _mm_ucomigt_ss _mm_comigt_ss
-#define _mm_ucomige_ss _mm_comige_ss
-#define _mm_ucomieq_ss _mm_comieq_ss
-#define _mm_ucomineq_ss _mm_comineq_ss
-
-/* Conversions */
-
-// Convert packed signed 32-bit integers in b to packed single-precision
-// (32-bit) floating-point elements, store the results in the lower 2 elements
-// of dst, and copy the upper 2 packed elements from a to the upper elements of
-// dst.
-//
-//   dst[31:0] := Convert_Int32_To_FP32(b[31:0])
-//   dst[63:32] := Convert_Int32_To_FP32(b[63:32])
-//   dst[95:64] := a[95:64]
-//   dst[127:96] := a[127:96]
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_pi2ps
-FORCE_INLINE __m128 _mm_cvt_pi2ps(__m128 a, __m64 b)
+// Horizontally subtract adjacent pairs of 16-bit integers in a and b, and pack
+// the signed 16-bit results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_epi16
+FORCE_INLINE __m128i _mm_hsub_epi16(__m128i _a, __m128i _b)
 {
-    return vreinterpretq_m128_f32(
-        vcombine_f32(vcvt_f32_s32(vreinterpret_s32_m64(b)),
-                     vget_high_f32(vreinterpretq_f32_m128(a))));
+    int16x8_t a = vreinterpretq_s16_m128i(_a);
+    int16x8_t b = vreinterpretq_s16_m128i(_b);
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128i_s16(
+        vsubq_s16(vuzp1q_s16(a, b), vuzp2q_s16(a, b)));
+#else
+    int16x8x2_t c = vuzpq_s16(a, b);
+    return vreinterpretq_m128i_s16(vsubq_s16(c.val[0], c.val[1]));
+#endif
 }
 
-// Convert the signed 32-bit integer b to a single-precision (32-bit)
-// floating-point element, store the result in the lower element of dst, and
-// copy the upper 3 packed elements from a to the upper elements of dst.
-//
-//   dst[31:0] := Convert_Int32_To_FP32(b[31:0])
-//   dst[127:32] := a[127:32]
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_si2ss
-FORCE_INLINE __m128 _mm_cvt_si2ss(__m128 a, int b)
+// Horizontally subtract adjacent pairs of 32-bit integers in a and b, and pack
+// the signed 32-bit results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_epi32
+FORCE_INLINE __m128i _mm_hsub_epi32(__m128i _a, __m128i _b)
 {
-    __m128 ret = a;
-    return vreinterpretq_m128_f32(
-        vsetq_lane_f32((float) b, vreinterpretq_f32_m128(ret), 0));
+    int32x4_t a = vreinterpretq_s32_m128i(_a);
+    int32x4_t b = vreinterpretq_s32_m128i(_b);
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128i_s32(
+        vsubq_s32(vuzp1q_s32(a, b), vuzp2q_s32(a, b)));
+#else
+    int32x4x2_t c = vuzpq_s32(a, b);
+    return vreinterpretq_m128i_s32(vsubq_s32(c.val[0], c.val[1]));
+#endif
 }
 
-// Convert the lower single-precision (32-bit) floating-point element in a to a
-// 32-bit integer, and store the result in dst.
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_ss2si
-FORCE_INLINE int _mm_cvt_ss2si(__m128 a)
+// Horizontally subtract adjacent pairs of 16-bit integers in a and b, and pack
+// the signed 16-bit results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_pi16
+FORCE_INLINE __m64 _mm_hsub_pi16(__m64 _a, __m64 _b)
 {
-#if defined(__aarch64__)
-    return vgetq_lane_s32(vcvtnq_s32_f32(vreinterpretq_f32_m128(a)), 0);
-#else
-    float32_t data = vgetq_lane_f32(vreinterpretq_f32_m128(a), 0);
-    float32_t diff = data - floor(data);
-    if (diff > 0.5)
-        return (int32_t) ceil(data);
-    if (diff == 0.5) {
-        int32_t f = (int32_t) floor(data);
-        int32_t c = (int32_t) ceil(data);
-        return c & 1 ? f : c;
-    }
-    return (int32_t) floor(data);
+    int16x4_t a = vreinterpret_s16_m64(_a);
+    int16x4_t b = vreinterpret_s16_m64(_b);
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpret_m64_s16(vsub_s16(vuzp1_s16(a, b), vuzp2_s16(a, b)));
+#else
+    int16x4x2_t c = vuzp_s16(a, b);
+    return vreinterpret_m64_s16(vsub_s16(c.val[0], c.val[1]));
 #endif
 }
 
-// Convert packed 16-bit integers in a to packed single-precision (32-bit)
-// floating-point elements, and store the results in dst.
-//
-//   FOR j := 0 to 3
-//      i := j*16
-//      m := j*32
-//      dst[m+31:m] := Convert_Int16_To_FP32(a[i+15:i])
-//   ENDFOR
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpi16_ps
-FORCE_INLINE __m128 _mm_cvtpi16_ps(__m64 a)
+// Horizontally subtract adjacent pairs of 32-bit integers in a and b, and pack
+// the signed 32-bit results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_hsub_pi32
+FORCE_INLINE __m64 _mm_hsub_pi32(__m64 _a, __m64 _b)
 {
-    return vreinterpretq_m128_f32(
-        vcvtq_f32_s32(vmovl_s16(vreinterpret_s16_m64(a))));
+    int32x2_t a = vreinterpret_s32_m64(_a);
+    int32x2_t b = vreinterpret_s32_m64(_b);
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpret_m64_s32(vsub_s32(vuzp1_s32(a, b), vuzp2_s32(a, b)));
+#else
+    int32x2x2_t c = vuzp_s32(a, b);
+    return vreinterpret_m64_s32(vsub_s32(c.val[0], c.val[1]));
+#endif
 }
 
-// Convert packed 32-bit integers in b to packed single-precision (32-bit)
-// floating-point elements, store the results in the lower 2 elements of dst,
-// and copy the upper 2 packed elements from a to the upper elements of dst.
-//
-//   dst[31:0] := Convert_Int32_To_FP32(b[31:0])
-//   dst[63:32] := Convert_Int32_To_FP32(b[63:32])
-//   dst[95:64] := a[95:64]
-//   dst[127:96] := a[127:96]
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpi32_ps
-FORCE_INLINE __m128 _mm_cvtpi32_ps(__m128 a, __m64 b)
+// Horizontally subtract adjacent pairs of signed 16-bit integers in a and b
+// using saturation, and pack the signed 16-bit results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsubs_epi16
+FORCE_INLINE __m128i _mm_hsubs_epi16(__m128i _a, __m128i _b)
 {
-    return vreinterpretq_m128_f32(
-        vcombine_f32(vcvt_f32_s32(vreinterpret_s32_m64(b)),
-                     vget_high_f32(vreinterpretq_f32_m128(a))));
+    int16x8_t a = vreinterpretq_s16_m128i(_a);
+    int16x8_t b = vreinterpretq_s16_m128i(_b);
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128i_s16(
+        vqsubq_s16(vuzp1q_s16(a, b), vuzp2q_s16(a, b)));
+#else
+    int16x8x2_t c = vuzpq_s16(a, b);
+    return vreinterpretq_m128i_s16(vqsubq_s16(c.val[0], c.val[1]));
+#endif
 }
 
-// Convert packed signed 32-bit integers in a to packed single-precision
-// (32-bit) floating-point elements, store the results in the lower 2 elements
-// of dst, then covert the packed signed 32-bit integers in b to
-// single-precision (32-bit) floating-point element, and store the results in
-// the upper 2 elements of dst.
-//
-//   dst[31:0] := Convert_Int32_To_FP32(a[31:0])
-//   dst[63:32] := Convert_Int32_To_FP32(a[63:32])
-//   dst[95:64] := Convert_Int32_To_FP32(b[31:0])
-//   dst[127:96] := Convert_Int32_To_FP32(b[63:32])
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpi32x2_ps
-FORCE_INLINE __m128 _mm_cvtpi32x2_ps(__m64 a, __m64 b)
+// Horizontally subtract adjacent pairs of signed 16-bit integers in a and b
+// using saturation, and pack the signed 16-bit results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsubs_pi16
+FORCE_INLINE __m64 _mm_hsubs_pi16(__m64 _a, __m64 _b)
 {
-    return vreinterpretq_m128_f32(vcvtq_f32_s32(
-        vcombine_s32(vreinterpret_s32_m64(a), vreinterpret_s32_m64(b))));
+    int16x4_t a = vreinterpret_s16_m64(_a);
+    int16x4_t b = vreinterpret_s16_m64(_b);
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpret_m64_s16(vqsub_s16(vuzp1_s16(a, b), vuzp2_s16(a, b)));
+#else
+    int16x4x2_t c = vuzp_s16(a, b);
+    return vreinterpret_m64_s16(vqsub_s16(c.val[0], c.val[1]));
+#endif
 }
 
-// Convert the lower packed 8-bit integers in a to packed single-precision
-// (32-bit) floating-point elements, and store the results in dst.
-//
-//   FOR j := 0 to 3
-//      i := j*8
-//      m := j*32
-//      dst[m+31:m] := Convert_Int8_To_FP32(a[i+7:i])
-//   ENDFOR
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpi8_ps
-FORCE_INLINE __m128 _mm_cvtpi8_ps(__m64 a)
+// Vertically multiply each unsigned 8-bit integer from a with the corresponding
+// signed 8-bit integer from b, producing intermediate signed 16-bit integers.
+// Horizontally add adjacent pairs of intermediate signed 16-bit integers,
+// and pack the saturated results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maddubs_epi16
+FORCE_INLINE __m128i _mm_maddubs_epi16(__m128i _a, __m128i _b)
 {
-    return vreinterpretq_m128_f32(vcvtq_f32_s32(
-        vmovl_s16(vget_low_s16(vmovl_s8(vreinterpret_s8_m64(a))))));
-}
+#if defined(__aarch64__) || defined(_M_ARM64)
+    uint8x16_t a = vreinterpretq_u8_m128i(_a);
+    int8x16_t b = vreinterpretq_s8_m128i(_b);
+    int16x8_t tl = vmulq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(a))),
+                             vmovl_s8(vget_low_s8(b)));
+    int16x8_t th = vmulq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(a))),
+                             vmovl_s8(vget_high_s8(b)));
+    return vreinterpretq_m128i_s16(
+        vqaddq_s16(vuzp1q_s16(tl, th), vuzp2q_s16(tl, th)));
+#else
+    // This would be much simpler if x86 would choose to zero extend OR sign
+    // extend, not both. This could probably be optimized better.
+    uint16x8_t a = vreinterpretq_u16_m128i(_a);
+    int16x8_t b = vreinterpretq_s16_m128i(_b);
 
-// Convert packed unsigned 16-bit integers in a to packed single-precision
-// (32-bit) floating-point elements, and store the results in dst.
-//
-//   FOR j := 0 to 3
-//      i := j*16
-//      m := j*32
-//      dst[m+31:m] := Convert_UInt16_To_FP32(a[i+15:i])
-//   ENDFOR
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpu16_ps
-FORCE_INLINE __m128 _mm_cvtpu16_ps(__m64 a)
-{
-    return vreinterpretq_m128_f32(
-        vcvtq_f32_u32(vmovl_u16(vreinterpret_u16_m64(a))));
+    // Zero extend a
+    int16x8_t a_odd = vreinterpretq_s16_u16(vshrq_n_u16(a, 8));
+    int16x8_t a_even = vreinterpretq_s16_u16(vbicq_u16(a, vdupq_n_u16(0xff00)));
+
+    // Sign extend by shifting left then shifting right.
+    int16x8_t b_even = vshrq_n_s16(vshlq_n_s16(b, 8), 8);
+    int16x8_t b_odd = vshrq_n_s16(b, 8);
+
+    // multiply
+    int16x8_t prod1 = vmulq_s16(a_even, b_even);
+    int16x8_t prod2 = vmulq_s16(a_odd, b_odd);
+
+    // saturated add
+    return vreinterpretq_m128i_s16(vqaddq_s16(prod1, prod2));
+#endif
 }
 
-// Convert the lower packed unsigned 8-bit integers in a to packed
-// single-precision (32-bit) floating-point elements, and store the results in
-// dst.
-//
-//   FOR j := 0 to 3
-//      i := j*8
-//      m := j*32
-//      dst[m+31:m] := Convert_UInt8_To_FP32(a[i+7:i])
-//   ENDFOR
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpu8_ps
-FORCE_INLINE __m128 _mm_cvtpu8_ps(__m64 a)
+// Vertically multiply each unsigned 8-bit integer from a with the corresponding
+// signed 8-bit integer from b, producing intermediate signed 16-bit integers.
+// Horizontally add adjacent pairs of intermediate signed 16-bit integers, and
+// pack the saturated results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maddubs_pi16
+FORCE_INLINE __m64 _mm_maddubs_pi16(__m64 _a, __m64 _b)
 {
-    return vreinterpretq_m128_f32(vcvtq_f32_u32(
-        vmovl_u16(vget_low_u16(vmovl_u8(vreinterpret_u8_m64(a))))));
+    uint16x4_t a = vreinterpret_u16_m64(_a);
+    int16x4_t b = vreinterpret_s16_m64(_b);
+
+    // Zero extend a
+    int16x4_t a_odd = vreinterpret_s16_u16(vshr_n_u16(a, 8));
+    int16x4_t a_even = vreinterpret_s16_u16(vand_u16(a, vdup_n_u16(0xff)));
+
+    // Sign extend by shifting left then shifting right.
+    int16x4_t b_even = vshr_n_s16(vshl_n_s16(b, 8), 8);
+    int16x4_t b_odd = vshr_n_s16(b, 8);
+
+    // multiply
+    int16x4_t prod1 = vmul_s16(a_even, b_even);
+    int16x4_t prod2 = vmul_s16(a_odd, b_odd);
+
+    // saturated add
+    return vreinterpret_m64_s16(vqadd_s16(prod1, prod2));
 }
 
-// Converts the four single-precision, floating-point values of a to signed
-// 32-bit integer values using truncate.
-// https://msdn.microsoft.com/en-us/library/vstudio/1h005y6x(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_cvttps_epi32(__m128 a)
+// Multiply packed signed 16-bit integers in a and b, producing intermediate
+// signed 32-bit integers. Shift right by 15 bits while rounding up, and store
+// the packed 16-bit integers in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhrs_epi16
+FORCE_INLINE __m128i _mm_mulhrs_epi16(__m128i a, __m128i b)
 {
-    return vreinterpretq_m128i_s32(vcvtq_s32_f32(vreinterpretq_f32_m128(a)));
+    // Has issues due to saturation
+    // return vreinterpretq_m128i_s16(vqrdmulhq_s16(a, b));
+
+    // Multiply
+    int32x4_t mul_lo = vmull_s16(vget_low_s16(vreinterpretq_s16_m128i(a)),
+                                 vget_low_s16(vreinterpretq_s16_m128i(b)));
+    int32x4_t mul_hi = vmull_s16(vget_high_s16(vreinterpretq_s16_m128i(a)),
+                                 vget_high_s16(vreinterpretq_s16_m128i(b)));
+
+    // Rounding narrowing shift right
+    // narrow = (int16_t)((mul + 16384) >> 15);
+    int16x4_t narrow_lo = vrshrn_n_s32(mul_lo, 15);
+    int16x4_t narrow_hi = vrshrn_n_s32(mul_hi, 15);
+
+    // Join together
+    return vreinterpretq_m128i_s16(vcombine_s16(narrow_lo, narrow_hi));
 }
 
-// Converts the four signed 32-bit integer values of a to single-precision,
-// floating-point values
-// https://msdn.microsoft.com/en-us/library/vstudio/36bwxcx5(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_cvtepi32_ps(__m128i a)
+// Multiply packed signed 16-bit integers in a and b, producing intermediate
+// signed 32-bit integers. Truncate each intermediate integer to the 18 most
+// significant bits, round by adding 1, and store bits [16:1] to dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhrs_pi16
+FORCE_INLINE __m64 _mm_mulhrs_pi16(__m64 a, __m64 b)
 {
-    return vreinterpretq_m128_f32(vcvtq_f32_s32(vreinterpretq_s32_m128i(a)));
+    int32x4_t mul_extend =
+        vmull_s16((vreinterpret_s16_m64(a)), (vreinterpret_s16_m64(b)));
+
+    // Rounding narrowing shift right
+    return vreinterpret_m64_s16(vrshrn_n_s32(mul_extend, 15));
 }
 
-// Converts the four unsigned 8-bit integers in the lower 16 bits to four
-// unsigned 32-bit integers.
-FORCE_INLINE __m128i _mm_cvtepu8_epi16(__m128i a)
+// Shuffle packed 8-bit integers in a according to shuffle control mask in the
+// corresponding 8-bit element of b, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_epi8
+FORCE_INLINE __m128i _mm_shuffle_epi8(__m128i a, __m128i b)
 {
-    uint8x16_t u8x16 = vreinterpretq_u8_m128i(a);    /* xxxx xxxx xxxx DCBA */
-    uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */
-    return vreinterpretq_m128i_u16(u16x8);
+    int8x16_t tbl = vreinterpretq_s8_m128i(a);   // input a
+    uint8x16_t idx = vreinterpretq_u8_m128i(b);  // input b
+    uint8x16_t idx_masked =
+        vandq_u8(idx, vdupq_n_u8(0x8F));  // avoid using meaningless bits
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128i_s8(vqtbl1q_s8(tbl, idx_masked));
+#elif defined(__GNUC__)
+    int8x16_t ret;
+    // %e and %f represent the even and odd D registers
+    // respectively.
+    __asm__ __volatile__(
+        "vtbl.8  %e[ret], {%e[tbl], %f[tbl]}, %e[idx]\n"
+        "vtbl.8  %f[ret], {%e[tbl], %f[tbl]}, %f[idx]\n"
+        : [ret] "=&w"(ret)
+        : [tbl] "w"(tbl), [idx] "w"(idx_masked));
+    return vreinterpretq_m128i_s8(ret);
+#else
+    // use this line if testing on aarch64
+    int8x8x2_t a_split = {vget_low_s8(tbl), vget_high_s8(tbl)};
+    return vreinterpretq_m128i_s8(
+        vcombine_s8(vtbl2_s8(a_split, vget_low_u8(idx_masked)),
+                    vtbl2_s8(a_split, vget_high_u8(idx_masked))));
+#endif
 }
 
-// Converts the four unsigned 8-bit integers in the lower 32 bits to four
-// unsigned 32-bit integers.
-// https://msdn.microsoft.com/en-us/library/bb531467%28v=vs.100%29.aspx
-FORCE_INLINE __m128i _mm_cvtepu8_epi32(__m128i a)
+// Shuffle packed 8-bit integers in a according to shuffle control mask in the
+// corresponding 8-bit element of b, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_pi8
+FORCE_INLINE __m64 _mm_shuffle_pi8(__m64 a, __m64 b)
 {
-    uint8x16_t u8x16 = vreinterpretq_u8_m128i(a);      /* xxxx xxxx xxxx DCBA */
-    uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16));   /* 0x0x 0x0x 0D0C 0B0A */
-    uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000D 000C 000B 000A */
-    return vreinterpretq_m128i_u32(u32x4);
+    const int8x8_t controlMask =
+        vand_s8(vreinterpret_s8_m64(b), vdup_n_s8((int8_t) (0x1 << 7 | 0x07)));
+    int8x8_t res = vtbl1_s8(vreinterpret_s8_m64(a), controlMask);
+    return vreinterpret_m64_s8(res);
 }
 
-// Converts the two unsigned 8-bit integers in the lower 16 bits to two
-// unsigned 64-bit integers.
-FORCE_INLINE __m128i _mm_cvtepu8_epi64(__m128i a)
+// Negate packed 16-bit integers in a when the corresponding signed
+// 16-bit integer in b is negative, and store the results in dst.
+// Element in dst are zeroed out when the corresponding element
+// in b is zero.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi16
+FORCE_INLINE __m128i _mm_sign_epi16(__m128i _a, __m128i _b)
 {
-    uint8x16_t u8x16 = vreinterpretq_u8_m128i(a);      /* xxxx xxxx xxxx xxBA */
-    uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16));   /* 0x0x 0x0x 0x0x 0B0A */
-    uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */
-    uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */
-    return vreinterpretq_m128i_u64(u64x2);
+    int16x8_t a = vreinterpretq_s16_m128i(_a);
+    int16x8_t b = vreinterpretq_s16_m128i(_b);
+
+    // signed shift right: faster than vclt
+    // (b < 0) ? 0xFFFF : 0
+    uint16x8_t ltMask = vreinterpretq_u16_s16(vshrq_n_s16(b, 15));
+    // (b == 0) ? 0xFFFF : 0
+#if defined(__aarch64__) || defined(_M_ARM64)
+    int16x8_t zeroMask = vreinterpretq_s16_u16(vceqzq_s16(b));
+#else
+    int16x8_t zeroMask = vreinterpretq_s16_u16(vceqq_s16(b, vdupq_n_s16(0)));
+#endif
+
+    // bitwise select either a or negative 'a' (vnegq_s16(a) equals to negative
+    // 'a') based on ltMask
+    int16x8_t masked = vbslq_s16(ltMask, vnegq_s16(a), a);
+    // res = masked & (~zeroMask)
+    int16x8_t res = vbicq_s16(masked, zeroMask);
+    return vreinterpretq_m128i_s16(res);
 }
 
-// Converts the four unsigned 8-bit integers in the lower 16 bits to four
-// unsigned 32-bit integers.
-FORCE_INLINE __m128i _mm_cvtepi8_epi16(__m128i a)
+// Negate packed 32-bit integers in a when the corresponding signed
+// 32-bit integer in b is negative, and store the results in dst.
+// Element in dst are zeroed out when the corresponding element
+// in b is zero.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi32
+FORCE_INLINE __m128i _mm_sign_epi32(__m128i _a, __m128i _b)
 {
-    int8x16_t s8x16 = vreinterpretq_s8_m128i(a);    /* xxxx xxxx xxxx DCBA */
-    int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */
-    return vreinterpretq_m128i_s16(s16x8);
+    int32x4_t a = vreinterpretq_s32_m128i(_a);
+    int32x4_t b = vreinterpretq_s32_m128i(_b);
+
+    // signed shift right: faster than vclt
+    // (b < 0) ? 0xFFFFFFFF : 0
+    uint32x4_t ltMask = vreinterpretq_u32_s32(vshrq_n_s32(b, 31));
+
+    // (b == 0) ? 0xFFFFFFFF : 0
+#if defined(__aarch64__) || defined(_M_ARM64)
+    int32x4_t zeroMask = vreinterpretq_s32_u32(vceqzq_s32(b));
+#else
+    int32x4_t zeroMask = vreinterpretq_s32_u32(vceqq_s32(b, vdupq_n_s32(0)));
+#endif
+
+    // bitwise select either a or negative 'a' (vnegq_s32(a) equals to negative
+    // 'a') based on ltMask
+    int32x4_t masked = vbslq_s32(ltMask, vnegq_s32(a), a);
+    // res = masked & (~zeroMask)
+    int32x4_t res = vbicq_s32(masked, zeroMask);
+    return vreinterpretq_m128i_s32(res);
 }
 
-// Converts the four unsigned 8-bit integers in the lower 32 bits to four
-// unsigned 32-bit integers.
-FORCE_INLINE __m128i _mm_cvtepi8_epi32(__m128i a)
+// Negate packed 8-bit integers in a when the corresponding signed
+// 8-bit integer in b is negative, and store the results in dst.
+// Element in dst are zeroed out when the corresponding element
+// in b is zero.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi8
+FORCE_INLINE __m128i _mm_sign_epi8(__m128i _a, __m128i _b)
 {
-    int8x16_t s8x16 = vreinterpretq_s8_m128i(a);      /* xxxx xxxx xxxx DCBA */
-    int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16));   /* 0x0x 0x0x 0D0C 0B0A */
-    int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000D 000C 000B 000A */
-    return vreinterpretq_m128i_s32(s32x4);
+    int8x16_t a = vreinterpretq_s8_m128i(_a);
+    int8x16_t b = vreinterpretq_s8_m128i(_b);
+
+    // signed shift right: faster than vclt
+    // (b < 0) ? 0xFF : 0
+    uint8x16_t ltMask = vreinterpretq_u8_s8(vshrq_n_s8(b, 7));
+
+    // (b == 0) ? 0xFF : 0
+#if defined(__aarch64__) || defined(_M_ARM64)
+    int8x16_t zeroMask = vreinterpretq_s8_u8(vceqzq_s8(b));
+#else
+    int8x16_t zeroMask = vreinterpretq_s8_u8(vceqq_s8(b, vdupq_n_s8(0)));
+#endif
+
+    // bitwise select either a or negative 'a' (vnegq_s8(a) return negative 'a')
+    // based on ltMask
+    int8x16_t masked = vbslq_s8(ltMask, vnegq_s8(a), a);
+    // res = masked & (~zeroMask)
+    int8x16_t res = vbicq_s8(masked, zeroMask);
+
+    return vreinterpretq_m128i_s8(res);
 }
 
-// Converts the two signed 8-bit integers in the lower 32 bits to four
-// signed 64-bit integers.
-FORCE_INLINE __m128i _mm_cvtepi8_epi64(__m128i a)
+// Negate packed 16-bit integers in a when the corresponding signed 16-bit
+// integer in b is negative, and store the results in dst. Element in dst are
+// zeroed out when the corresponding element in b is zero.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_pi16
+FORCE_INLINE __m64 _mm_sign_pi16(__m64 _a, __m64 _b)
 {
-    int8x16_t s8x16 = vreinterpretq_s8_m128i(a);      /* xxxx xxxx xxxx xxBA */
-    int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16));   /* 0x0x 0x0x 0x0x 0B0A */
-    int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */
-    int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */
-    return vreinterpretq_m128i_s64(s64x2);
-}
+    int16x4_t a = vreinterpret_s16_m64(_a);
+    int16x4_t b = vreinterpret_s16_m64(_b);
 
-// Converts the four signed 16-bit integers in the lower 64 bits to four signed
-// 32-bit integers.
-FORCE_INLINE __m128i _mm_cvtepi16_epi32(__m128i a)
-{
-    return vreinterpretq_m128i_s32(
-        vmovl_s16(vget_low_s16(vreinterpretq_s16_m128i(a))));
-}
+    // signed shift right: faster than vclt
+    // (b < 0) ? 0xFFFF : 0
+    uint16x4_t ltMask = vreinterpret_u16_s16(vshr_n_s16(b, 15));
 
-// Converts the two signed 16-bit integers in the lower 32 bits two signed
-// 32-bit integers.
-FORCE_INLINE __m128i _mm_cvtepi16_epi64(__m128i a)
-{
-    int16x8_t s16x8 = vreinterpretq_s16_m128i(a);     /* xxxx xxxx xxxx 0B0A */
-    int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */
-    int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */
-    return vreinterpretq_m128i_s64(s64x2);
-}
+    // (b == 0) ? 0xFFFF : 0
+#if defined(__aarch64__) || defined(_M_ARM64)
+    int16x4_t zeroMask = vreinterpret_s16_u16(vceqz_s16(b));
+#else
+    int16x4_t zeroMask = vreinterpret_s16_u16(vceq_s16(b, vdup_n_s16(0)));
+#endif
 
-// Converts the four unsigned 16-bit integers in the lower 64 bits to four
-// unsigned 32-bit integers.
-FORCE_INLINE __m128i _mm_cvtepu16_epi32(__m128i a)
-{
-    return vreinterpretq_m128i_u32(
-        vmovl_u16(vget_low_u16(vreinterpretq_u16_m128i(a))));
-}
+    // bitwise select either a or negative 'a' (vneg_s16(a) return negative 'a')
+    // based on ltMask
+    int16x4_t masked = vbsl_s16(ltMask, vneg_s16(a), a);
+    // res = masked & (~zeroMask)
+    int16x4_t res = vbic_s16(masked, zeroMask);
 
-// Converts the two unsigned 16-bit integers in the lower 32 bits to two
-// unsigned 64-bit integers.
-FORCE_INLINE __m128i _mm_cvtepu16_epi64(__m128i a)
-{
-    uint16x8_t u16x8 = vreinterpretq_u16_m128i(a);     /* xxxx xxxx xxxx 0B0A */
-    uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */
-    uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */
-    return vreinterpretq_m128i_u64(u64x2);
+    return vreinterpret_m64_s16(res);
 }
 
-// Converts the two unsigned 32-bit integers in the lower 64 bits to two
-// unsigned 64-bit integers.
-FORCE_INLINE __m128i _mm_cvtepu32_epi64(__m128i a)
+// Negate packed 32-bit integers in a when the corresponding signed 32-bit
+// integer in b is negative, and store the results in dst. Element in dst are
+// zeroed out when the corresponding element in b is zero.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_pi32
+FORCE_INLINE __m64 _mm_sign_pi32(__m64 _a, __m64 _b)
 {
-    return vreinterpretq_m128i_u64(
-        vmovl_u32(vget_low_u32(vreinterpretq_u32_m128i(a))));
-}
+    int32x2_t a = vreinterpret_s32_m64(_a);
+    int32x2_t b = vreinterpret_s32_m64(_b);
 
-// Converts the two signed 32-bit integers in the lower 64 bits to two signed
-// 64-bit integers.
-FORCE_INLINE __m128i _mm_cvtepi32_epi64(__m128i a)
-{
-    return vreinterpretq_m128i_s64(
-        vmovl_s32(vget_low_s32(vreinterpretq_s32_m128i(a))));
-}
+    // signed shift right: faster than vclt
+    // (b < 0) ? 0xFFFFFFFF : 0
+    uint32x2_t ltMask = vreinterpret_u32_s32(vshr_n_s32(b, 31));
 
-// Converts the four single-precision, floating-point values of a to signed
-// 32-bit integer values.
-//
-//   r0 := (int) a0
-//   r1 := (int) a1
-//   r2 := (int) a2
-//   r3 := (int) a3
-//
-// https://msdn.microsoft.com/en-us/library/vstudio/xdc42k5e(v=vs.100).aspx
-// *NOTE*. The default rounding mode on SSE is 'round to even', which ARMv7-A
-// does not support! It is supported on ARMv8-A however.
-FORCE_INLINE __m128i _mm_cvtps_epi32(__m128 a)
-{
-#if defined(__aarch64__)
-    return vreinterpretq_m128i_s32(vcvtnq_s32_f32(a));
-#else
-    uint32x4_t signmask = vdupq_n_u32(0x80000000);
-    float32x4_t half = vbslq_f32(signmask, vreinterpretq_f32_m128(a),
-                                 vdupq_n_f32(0.5f)); /* +/- 0.5 */
-    int32x4_t r_normal = vcvtq_s32_f32(vaddq_f32(
-        vreinterpretq_f32_m128(a), half)); /* round to integer: [a + 0.5]*/
-    int32x4_t r_trunc =
-        vcvtq_s32_f32(vreinterpretq_f32_m128(a)); /* truncate to integer: [a] */
-    int32x4_t plusone = vreinterpretq_s32_u32(vshrq_n_u32(
-        vreinterpretq_u32_s32(vnegq_s32(r_trunc)), 31)); /* 1 or 0 */
-    int32x4_t r_even = vbicq_s32(vaddq_s32(r_trunc, plusone),
-                                 vdupq_n_s32(1)); /* ([a] + {0,1}) & ~1 */
-    float32x4_t delta = vsubq_f32(
-        vreinterpretq_f32_m128(a),
-        vcvtq_f32_s32(r_trunc)); /* compute delta: delta = (a - [a]) */
-    uint32x4_t is_delta_half = vceqq_f32(delta, half); /* delta == +/- 0.5 */
-    return vreinterpretq_m128i_s32(vbslq_s32(is_delta_half, r_even, r_normal));
+    // (b == 0) ? 0xFFFFFFFF : 0
+#if defined(__aarch64__) || defined(_M_ARM64)
+    int32x2_t zeroMask = vreinterpret_s32_u32(vceqz_s32(b));
+#else
+    int32x2_t zeroMask = vreinterpret_s32_u32(vceq_s32(b, vdup_n_s32(0)));
 #endif
-}
 
-// Copy the lower 32-bit integer in a to dst.
-//
-//   dst[31:0] := a[31:0]
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si32
-FORCE_INLINE int _mm_cvtsi128_si32(__m128i a)
-{
-    return vgetq_lane_s32(vreinterpretq_s32_m128i(a), 0);
+    // bitwise select either a or negative 'a' (vneg_s32(a) return negative 'a')
+    // based on ltMask
+    int32x2_t masked = vbsl_s32(ltMask, vneg_s32(a), a);
+    // res = masked & (~zeroMask)
+    int32x2_t res = vbic_s32(masked, zeroMask);
+
+    return vreinterpret_m64_s32(res);
 }
 
-// Copy the lower 64-bit integer in a to dst.
-//
-//   dst[63:0] := a[63:0]
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si64
-FORCE_INLINE int64_t _mm_cvtsi128_si64(__m128i a)
+// Negate packed 8-bit integers in a when the corresponding signed 8-bit integer
+// in b is negative, and store the results in dst. Element in dst are zeroed out
+// when the corresponding element in b is zero.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_pi8
+FORCE_INLINE __m64 _mm_sign_pi8(__m64 _a, __m64 _b)
 {
-    return vgetq_lane_s64(vreinterpretq_s64_m128i(a), 0);
-}
+    int8x8_t a = vreinterpret_s8_m64(_a);
+    int8x8_t b = vreinterpret_s8_m64(_b);
 
-// Copy the lower 64-bit integer in a to dst.
-//
-//   dst[63:0] := a[63:0]
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si64x
-#define _mm_cvtsi128_si64x(a) _mm_cvtsi128_si64(a)
+    // signed shift right: faster than vclt
+    // (b < 0) ? 0xFF : 0
+    uint8x8_t ltMask = vreinterpret_u8_s8(vshr_n_s8(b, 7));
 
-// Moves 32-bit integer a to the least significant 32 bits of an __m128 object,
-// zero extending the upper bits.
-//
-//   r0 := a
-//   r1 := 0x0
-//   r2 := 0x0
-//   r3 := 0x0
-//
-// https://msdn.microsoft.com/en-us/library/ct3539ha%28v=vs.90%29.aspx
-FORCE_INLINE __m128i _mm_cvtsi32_si128(int a)
-{
-    return vreinterpretq_m128i_s32(vsetq_lane_s32(a, vdupq_n_s32(0), 0));
-}
+    // (b == 0) ? 0xFF : 0
+#if defined(__aarch64__) || defined(_M_ARM64)
+    int8x8_t zeroMask = vreinterpret_s8_u8(vceqz_s8(b));
+#else
+    int8x8_t zeroMask = vreinterpret_s8_u8(vceq_s8(b, vdup_n_s8(0)));
+#endif
 
-// Moves 64-bit integer a to the least significant 64 bits of an __m128 object,
-// zero extending the upper bits.
-//
-//   r0 := a
-//   r1 := 0x0
-FORCE_INLINE __m128i _mm_cvtsi64_si128(int64_t a)
-{
-    return vreinterpretq_m128i_s64(vsetq_lane_s64(a, vdupq_n_s64(0), 0));
-}
+    // bitwise select either a or negative 'a' (vneg_s8(a) return negative 'a')
+    // based on ltMask
+    int8x8_t masked = vbsl_s8(ltMask, vneg_s8(a), a);
+    // res = masked & (~zeroMask)
+    int8x8_t res = vbic_s8(masked, zeroMask);
 
-// Cast vector of type __m128 to type __m128d. This intrinsic is only used for
-// compilation and does not generate any instructions, thus it has zero latency.
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castps_pd
-FORCE_INLINE __m128d _mm_castps_pd(__m128 a)
-{
-    return vreinterpretq_m128d_s32(vreinterpretq_s32_m128(a));
+    return vreinterpret_m64_s8(res);
 }
 
-// Applies a type cast to reinterpret four 32-bit floating point values passed
-// in as a 128-bit parameter as packed 32-bit integers.
-// https://msdn.microsoft.com/en-us/library/bb514099.aspx
-FORCE_INLINE __m128i _mm_castps_si128(__m128 a)
-{
-    return vreinterpretq_m128i_s32(vreinterpretq_s32_m128(a));
-}
+/* SSE4.1 */
 
-// Applies a type cast to reinterpret four 32-bit integers passed in as a
-// 128-bit parameter as packed 32-bit floating point values.
-// https://msdn.microsoft.com/en-us/library/bb514029.aspx
-FORCE_INLINE __m128 _mm_castsi128_ps(__m128i a)
-{
-    return vreinterpretq_m128_s32(vreinterpretq_s32_m128i(a));
-}
+// Blend packed 16-bit integers from a and b using control mask imm8, and store
+// the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blend_epi16
+// FORCE_INLINE __m128i _mm_blend_epi16(__m128i a, __m128i b,
+//                                      __constrange(0,255) int imm)
+#define _mm_blend_epi16(a, b, imm)                                      \
+    _sse2neon_define2(                                                  \
+        __m128i, a, b,                                                  \
+        const uint16_t _mask[8] =                                       \
+            _sse2neon_init(((imm) & (1 << 0)) ? (uint16_t) -1 : 0x0,    \
+                           ((imm) & (1 << 1)) ? (uint16_t) -1 : 0x0,    \
+                           ((imm) & (1 << 2)) ? (uint16_t) -1 : 0x0,    \
+                           ((imm) & (1 << 3)) ? (uint16_t) -1 : 0x0,    \
+                           ((imm) & (1 << 4)) ? (uint16_t) -1 : 0x0,    \
+                           ((imm) & (1 << 5)) ? (uint16_t) -1 : 0x0,    \
+                           ((imm) & (1 << 6)) ? (uint16_t) -1 : 0x0,    \
+                           ((imm) & (1 << 7)) ? (uint16_t) -1 : 0x0);   \
+        uint16x8_t _mask_vec = vld1q_u16(_mask);                        \
+        uint16x8_t __a = vreinterpretq_u16_m128i(_a);                   \
+        uint16x8_t __b = vreinterpretq_u16_m128i(_b); _sse2neon_return( \
+            vreinterpretq_m128i_u16(vbslq_u16(_mask_vec, __b, __a)));)
+
+// Blend packed double-precision (64-bit) floating-point elements from a and b
+// using control mask imm8, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blend_pd
+#define _mm_blend_pd(a, b, imm)                                              \
+    _sse2neon_define2(                                                       \
+        __m128d, a, b,                                                       \
+        const uint64_t _mask[2] =                                            \
+            _sse2neon_init(((imm) & (1 << 0)) ? ~UINT64_C(0) : UINT64_C(0),  \
+                           ((imm) & (1 << 1)) ? ~UINT64_C(0) : UINT64_C(0)); \
+        uint64x2_t _mask_vec = vld1q_u64(_mask);                             \
+        uint64x2_t __a = vreinterpretq_u64_m128d(_a);                        \
+        uint64x2_t __b = vreinterpretq_u64_m128d(_b); _sse2neon_return(      \
+            vreinterpretq_m128d_u64(vbslq_u64(_mask_vec, __b, __a)));)
 
-// Loads 128-bit value. :
-// https://msdn.microsoft.com/en-us/library/atzzad1h(v=vs.80).aspx
-FORCE_INLINE __m128i _mm_load_si128(const __m128i *p)
+// Blend packed single-precision (32-bit) floating-point elements from a and b
+// using mask, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blend_ps
+FORCE_INLINE __m128 _mm_blend_ps(__m128 _a, __m128 _b, const char imm8)
 {
-    return vreinterpretq_m128i_s32(vld1q_s32((const int32_t *) p));
+    const uint32_t ALIGN_STRUCT(16)
+        data[4] = {((imm8) & (1 << 0)) ? UINT32_MAX : 0,
+                   ((imm8) & (1 << 1)) ? UINT32_MAX : 0,
+                   ((imm8) & (1 << 2)) ? UINT32_MAX : 0,
+                   ((imm8) & (1 << 3)) ? UINT32_MAX : 0};
+    uint32x4_t mask = vld1q_u32(data);
+    float32x4_t a = vreinterpretq_f32_m128(_a);
+    float32x4_t b = vreinterpretq_f32_m128(_b);
+    return vreinterpretq_m128_f32(vbslq_f32(mask, b, a));
 }
 
-// Load a double-precision (64-bit) floating-point element from memory into both
-// elements of dst.
-//
-//   dst[63:0] := MEM[mem_addr+63:mem_addr]
-//   dst[127:64] := MEM[mem_addr+63:mem_addr]
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load1_pd
-FORCE_INLINE __m128d _mm_load1_pd(const double *p)
+// Blend packed 8-bit integers from a and b using mask, and store the results in
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blendv_epi8
+FORCE_INLINE __m128i _mm_blendv_epi8(__m128i _a, __m128i _b, __m128i _mask)
 {
-#if defined(__aarch64__)
-    return vreinterpretq_m128d_f64(vld1q_dup_f64(p));
-#else
-    return vreinterpretq_m128d_s64(vdupq_n_s64(*(const int64_t *) p));
-#endif
+    // Use a signed shift right to create a mask with the sign bit
+    uint8x16_t mask =
+        vreinterpretq_u8_s8(vshrq_n_s8(vreinterpretq_s8_m128i(_mask), 7));
+    uint8x16_t a = vreinterpretq_u8_m128i(_a);
+    uint8x16_t b = vreinterpretq_u8_m128i(_b);
+    return vreinterpretq_m128i_u8(vbslq_u8(mask, b, a));
 }
 
-// Load a double-precision (64-bit) floating-point element from memory into the
-// upper element of dst, and copy the lower element from a to dst. mem_addr does
-// not need to be aligned on any particular boundary.
-//
-//   dst[63:0] := a[63:0]
-//   dst[127:64] := MEM[mem_addr+63:mem_addr]
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadh_pd
-FORCE_INLINE __m128d _mm_loadh_pd(__m128d a, const double *p)
-{
-#if defined(__aarch64__)
-    return vreinterpretq_m128d_f64(
-        vcombine_f64(vget_low_f64(vreinterpretq_f64_m128d(a)), vld1_f64(p)));
+// Blend packed double-precision (64-bit) floating-point elements from a and b
+// using mask, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blendv_pd
+FORCE_INLINE __m128d _mm_blendv_pd(__m128d _a, __m128d _b, __m128d _mask)
+{
+    uint64x2_t mask =
+        vreinterpretq_u64_s64(vshrq_n_s64(vreinterpretq_s64_m128d(_mask), 63));
+#if defined(__aarch64__) || defined(_M_ARM64)
+    float64x2_t a = vreinterpretq_f64_m128d(_a);
+    float64x2_t b = vreinterpretq_f64_m128d(_b);
+    return vreinterpretq_m128d_f64(vbslq_f64(mask, b, a));
 #else
-    return vreinterpretq_m128d_f32(vcombine_f32(
-        vget_low_f32(vreinterpretq_f32_m128d(a)), vld1_f32((const float *) p)));
+    uint64x2_t a = vreinterpretq_u64_m128d(_a);
+    uint64x2_t b = vreinterpretq_u64_m128d(_b);
+    return vreinterpretq_m128d_u64(vbslq_u64(mask, b, a));
 #endif
 }
 
-// Load a double-precision (64-bit) floating-point element from memory into both
-// elements of dst.
-//
-//   dst[63:0] := MEM[mem_addr+63:mem_addr]
-//   dst[127:64] := MEM[mem_addr+63:mem_addr]
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_pd1
-#define _mm_load_pd1 _mm_load1_pd
-
-// Load a double-precision (64-bit) floating-point element from memory into both
-// elements of dst.
-//
-//   dst[63:0] := MEM[mem_addr+63:mem_addr]
-//   dst[127:64] := MEM[mem_addr+63:mem_addr]
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loaddup_pd
-#define _mm_loaddup_pd _mm_load1_pd
-
-// Loads 128-bit value. :
-// https://msdn.microsoft.com/zh-cn/library/f4k12ae8(v=vs.90).aspx
-FORCE_INLINE __m128i _mm_loadu_si128(const __m128i *p)
+// Blend packed single-precision (32-bit) floating-point elements from a and b
+// using mask, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blendv_ps
+FORCE_INLINE __m128 _mm_blendv_ps(__m128 _a, __m128 _b, __m128 _mask)
 {
-    return vreinterpretq_m128i_s32(vld1q_s32((const int32_t *) p));
+    // Use a signed shift right to create a mask with the sign bit
+    uint32x4_t mask =
+        vreinterpretq_u32_s32(vshrq_n_s32(vreinterpretq_s32_m128(_mask), 31));
+    float32x4_t a = vreinterpretq_f32_m128(_a);
+    float32x4_t b = vreinterpretq_f32_m128(_b);
+    return vreinterpretq_m128_f32(vbslq_f32(mask, b, a));
 }
 
-// Load unaligned 32-bit integer from memory into the first element of dst.
-//
-//   dst[31:0] := MEM[mem_addr+31:mem_addr]
-//   dst[MAX:32] := 0
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_si32
-FORCE_INLINE __m128i _mm_loadu_si32(const void *p)
+// Round the packed double-precision (64-bit) floating-point elements in a up
+// to an integer value, and store the results as packed double-precision
+// floating-point elements in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ceil_pd
+FORCE_INLINE __m128d _mm_ceil_pd(__m128d a)
 {
-    return vreinterpretq_m128i_s32(
-        vsetq_lane_s32(*(const int32_t *) p, vdupq_n_s32(0), 0));
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128d_f64(vrndpq_f64(vreinterpretq_f64_m128d(a)));
+#else
+    double a0, a1;
+    a0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
+    a1 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
+    return _mm_set_pd(ceil(a1), ceil(a0));
+#endif
 }
 
-// Convert packed double-precision (64-bit) floating-point elements in a to
-// packed single-precision (32-bit) floating-point elements, and store the
-// results in dst.
-//
-//   FOR j := 0 to 1
-//     i := 32*j
-//     k := 64*j
-//     dst[i+31:i] := Convert_FP64_To_FP32(a[k+64:k])
-//   ENDFOR
-//   dst[127:64] := 0
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_ps
-FORCE_INLINE __m128 _mm_cvtpd_ps(__m128d a)
+// Round the packed single-precision (32-bit) floating-point elements in a up to
+// an integer value, and store the results as packed single-precision
+// floating-point elements in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ceil_ps
+FORCE_INLINE __m128 _mm_ceil_ps(__m128 a)
 {
-#if defined(__aarch64__)
-    float32x2_t tmp = vcvt_f32_f64(vreinterpretq_f64_m128d(a));
-    return vreinterpretq_m128_f32(vcombine_f32(tmp, vdup_n_f32(0)));
+#if (defined(__aarch64__) || defined(_M_ARM64)) || \
+    defined(__ARM_FEATURE_DIRECTED_ROUNDING)
+    return vreinterpretq_m128_f32(vrndpq_f32(vreinterpretq_f32_m128(a)));
 #else
-    float a0 = (float) ((double *) &a)[0];
-    float a1 = (float) ((double *) &a)[1];
-    return _mm_set_ps(0, 0, a1, a0);
+    float *f = (float *) &a;
+    return _mm_set_ps(ceilf(f[3]), ceilf(f[2]), ceilf(f[1]), ceilf(f[0]));
 #endif
 }
 
-// Copy the lower double-precision (64-bit) floating-point element of a to dst.
-//
-//   dst[63:0] := a[63:0]
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_f64
-FORCE_INLINE double _mm_cvtsd_f64(__m128d a)
+// Round the lower double-precision (64-bit) floating-point element in b up to
+// an integer value, store the result as a double-precision floating-point
+// element in the lower element of dst, and copy the upper element from a to the
+// upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ceil_sd
+FORCE_INLINE __m128d _mm_ceil_sd(__m128d a, __m128d b)
 {
-#if defined(__aarch64__)
-    return (double) vgetq_lane_f64(vreinterpretq_f64_m128d(a), 0);
-#else
-    return ((double *) &a)[0];
-#endif
+    return _mm_move_sd(a, _mm_ceil_pd(b));
 }
 
-// Convert packed single-precision (32-bit) floating-point elements in a to
-// packed double-precision (64-bit) floating-point elements, and store the
-// results in dst.
-//
-//   FOR j := 0 to 1
-//     i := 64*j
-//     k := 32*j
-//     dst[i+63:i] := Convert_FP32_To_FP64(a[k+31:k])
-//   ENDFOR
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_pd
-FORCE_INLINE __m128d _mm_cvtps_pd(__m128 a)
+// Round the lower single-precision (32-bit) floating-point element in b up to
+// an integer value, store the result as a single-precision floating-point
+// element in the lower element of dst, and copy the upper 3 packed elements
+// from a to the upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ceil_ss
+FORCE_INLINE __m128 _mm_ceil_ss(__m128 a, __m128 b)
 {
-#if defined(__aarch64__)
-    return vreinterpretq_m128d_f64(
-        vcvt_f64_f32(vget_low_f32(vreinterpretq_f32_m128(a))));
+    return _mm_move_ss(a, _mm_ceil_ps(b));
+}
+
+// Compare packed 64-bit integers in a and b for equality, and store the results
+// in dst
+FORCE_INLINE __m128i _mm_cmpeq_epi64(__m128i a, __m128i b)
+{
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128i_u64(
+        vceqq_u64(vreinterpretq_u64_m128i(a), vreinterpretq_u64_m128i(b)));
 #else
-    double a0 = (double) vgetq_lane_f32(vreinterpretq_f32_m128(a), 0);
-    double a1 = (double) vgetq_lane_f32(vreinterpretq_f32_m128(a), 1);
-    return _mm_set_pd(a1, a0);
+    // ARMv7 lacks vceqq_u64
+    // (a == b) -> (a_lo == b_lo) && (a_hi == b_hi)
+    uint32x4_t cmp =
+        vceqq_u32(vreinterpretq_u32_m128i(a), vreinterpretq_u32_m128i(b));
+    uint32x4_t swapped = vrev64q_u32(cmp);
+    return vreinterpretq_m128i_u32(vandq_u32(cmp, swapped));
 #endif
 }
 
-// Cast vector of type __m128d to type __m128i. This intrinsic is only used for
-// compilation and does not generate any instructions, thus it has zero latency.
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castpd_si128
-FORCE_INLINE __m128i _mm_castpd_si128(__m128d a)
+// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store
+// the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi16_epi32
+FORCE_INLINE __m128i _mm_cvtepi16_epi32(__m128i a)
 {
-    return vreinterpretq_m128i_s64(vreinterpretq_s64_m128d(a));
+    return vreinterpretq_m128i_s32(
+        vmovl_s16(vget_low_s16(vreinterpretq_s16_m128i(a))));
 }
 
-// Blend packed single-precision (32-bit) floating-point elements from a and b
-// using mask, and store the results in dst.
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blendv_ps
-FORCE_INLINE __m128 _mm_blendv_ps(__m128 a, __m128 b, __m128 mask)
+// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store
+// the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi16_epi64
+FORCE_INLINE __m128i _mm_cvtepi16_epi64(__m128i a)
 {
-    return vreinterpretq_m128_f32(vbslq_f32(vreinterpretq_u32_m128(mask),
-                                            vreinterpretq_f32_m128(b),
-                                            vreinterpretq_f32_m128(a)));
+    int16x8_t s16x8 = vreinterpretq_s16_m128i(a);     /* xxxx xxxx xxxx 0B0A */
+    int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */
+    int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */
+    return vreinterpretq_m128i_s64(s64x2);
 }
 
-// Round the packed single-precision (32-bit) floating-point elements in a using
-// the rounding parameter, and store the results as packed single-precision
-// floating-point elements in dst.
-// software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_ps
-FORCE_INLINE __m128 _mm_round_ps(__m128 a, int rounding)
+// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store
+// the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi64
+FORCE_INLINE __m128i _mm_cvtepi32_epi64(__m128i a)
 {
-#if defined(__aarch64__)
-    switch (rounding) {
-    case (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC):
-        return vreinterpretq_m128_f32(vrndnq_f32(vreinterpretq_f32_m128(a)));
-    case (_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC):
-        return vreinterpretq_m128_f32(vrndmq_f32(vreinterpretq_f32_m128(a)));
-    case (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC):
-        return vreinterpretq_m128_f32(vrndpq_f32(vreinterpretq_f32_m128(a)));
-    case (_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC):
-        return vreinterpretq_m128_f32(vrndq_f32(vreinterpretq_f32_m128(a)));
-    default:  //_MM_FROUND_CUR_DIRECTION
-        return vreinterpretq_m128_f32(vrndiq_f32(vreinterpretq_f32_m128(a)));
-    }
-#else
-    float *v_float = (float *) &a;
-    __m128 zero, neg_inf, pos_inf;
-
-    switch (rounding) {
-    case (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC):
-        return _mm_cvtepi32_ps(_mm_cvtps_epi32(a));
-    case (_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC):
-        return (__m128){floorf(v_float[0]), floorf(v_float[1]),
-                        floorf(v_float[2]), floorf(v_float[3])};
-    case (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC):
-        return (__m128){ceilf(v_float[0]), ceilf(v_float[1]), ceilf(v_float[2]),
-                        ceilf(v_float[3])};
-    case (_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC):
-        zero = _mm_set_ps(0.0f, 0.0f, 0.0f, 0.0f);
-        neg_inf = _mm_set_ps(floorf(v_float[0]), floorf(v_float[1]),
-                             floorf(v_float[2]), floorf(v_float[3]));
-        pos_inf = _mm_set_ps(ceilf(v_float[0]), ceilf(v_float[1]),
-                             ceilf(v_float[2]), ceilf(v_float[3]));
-        return _mm_blendv_ps(pos_inf, neg_inf, _mm_cmple_ps(a, zero));
-    default:  //_MM_FROUND_CUR_DIRECTION
-        return (__m128){roundf(v_float[0]), roundf(v_float[1]),
-                        roundf(v_float[2]), roundf(v_float[3])};
-    }
-#endif
+    return vreinterpretq_m128i_s64(
+        vmovl_s32(vget_low_s32(vreinterpretq_s32_m128i(a))));
 }
 
-// Round the packed single-precision (32-bit) floating-point elements in a up to
-// an integer value, and store the results as packed single-precision
-// floating-point elements in dst.
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_ps
-FORCE_INLINE __m128 _mm_ceil_ps(__m128 a)
+// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store
+// the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi8_epi16
+FORCE_INLINE __m128i _mm_cvtepi8_epi16(__m128i a)
 {
-    return _mm_round_ps(a, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
+    int8x16_t s8x16 = vreinterpretq_s8_m128i(a);    /* xxxx xxxx xxxx DCBA */
+    int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */
+    return vreinterpretq_m128i_s16(s16x8);
 }
 
-// Round the packed single-precision (32-bit) floating-point elements in a down
-// to an integer value, and store the results as packed single-precision
-// floating-point elements in dst.
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_ps
-FORCE_INLINE __m128 _mm_floor_ps(__m128 a)
+// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store
+// the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi8_epi32
+FORCE_INLINE __m128i _mm_cvtepi8_epi32(__m128i a)
 {
-    return _mm_round_ps(a, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
+    int8x16_t s8x16 = vreinterpretq_s8_m128i(a);      /* xxxx xxxx xxxx DCBA */
+    int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16));   /* 0x0x 0x0x 0D0C 0B0A */
+    int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000D 000C 000B 000A */
+    return vreinterpretq_m128i_s32(s32x4);
 }
 
+// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit
+// integers, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi8_epi64
+FORCE_INLINE __m128i _mm_cvtepi8_epi64(__m128i a)
+{
+    int8x16_t s8x16 = vreinterpretq_s8_m128i(a);      /* xxxx xxxx xxxx xxBA */
+    int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16));   /* 0x0x 0x0x 0x0x 0B0A */
+    int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */
+    int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */
+    return vreinterpretq_m128i_s64(s64x2);
+}
 
-// Load 128-bits of integer data from unaligned memory into dst. This intrinsic
-// may perform better than _mm_loadu_si128 when the data crosses a cache line
-// boundary.
-//
-//   dst[127:0] := MEM[mem_addr+127:mem_addr]
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_lddqu_si128
-#define _mm_lddqu_si128 _mm_loadu_si128
-
-/* Miscellaneous Operations */
-
-// Shifts the 8 signed 16-bit integers in a right by count bits while shifting
-// in the sign bit.
-//
-//   r0 := a0 >> count
-//   r1 := a1 >> count
-//   ...
-//   r7 := a7 >> count
-//
-// https://msdn.microsoft.com/en-us/library/3c9997dk(v%3dvs.90).aspx
-FORCE_INLINE __m128i _mm_sra_epi16(__m128i a, __m128i count)
+// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers,
+// and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu16_epi32
+FORCE_INLINE __m128i _mm_cvtepu16_epi32(__m128i a)
 {
-    int64_t c = (int64_t) vget_low_s64((int64x2_t) count);
-    if (c > 15)
-        return _mm_cmplt_epi16(a, _mm_setzero_si128());
-    return vreinterpretq_m128i_s16(vshlq_s16((int16x8_t) a, vdupq_n_s16(-c)));
+    return vreinterpretq_m128i_u32(
+        vmovl_u16(vget_low_u16(vreinterpretq_u16_m128i(a))));
 }
 
-// Shifts the 4 signed 32-bit integers in a right by count bits while shifting
-// in the sign bit.
-//
-//   r0 := a0 >> count
-//   r1 := a1 >> count
-//   r2 := a2 >> count
-//   r3 := a3 >> count
-//
-// https://msdn.microsoft.com/en-us/library/ce40009e(v%3dvs.100).aspx
-FORCE_INLINE __m128i _mm_sra_epi32(__m128i a, __m128i count)
+// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers,
+// and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu16_epi64
+FORCE_INLINE __m128i _mm_cvtepu16_epi64(__m128i a)
 {
-    int64_t c = (int64_t) vget_low_s64((int64x2_t) count);
-    if (c > 31)
-        return _mm_cmplt_epi32(a, _mm_setzero_si128());
-    return vreinterpretq_m128i_s32(vshlq_s32((int32x4_t) a, vdupq_n_s32(-(int32_t)c)));
+    uint16x8_t u16x8 = vreinterpretq_u16_m128i(a);     /* xxxx xxxx xxxx 0B0A */
+    uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */
+    uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */
+    return vreinterpretq_m128i_u64(u64x2);
 }
 
-// Packs the 16 signed 16-bit integers from a and b into 8-bit integers and
-// saturates.
-// https://msdn.microsoft.com/en-us/library/k4y4f7w5%28v=vs.90%29.aspx
-FORCE_INLINE __m128i _mm_packs_epi16(__m128i a, __m128i b)
+// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers,
+// and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu32_epi64
+FORCE_INLINE __m128i _mm_cvtepu32_epi64(__m128i a)
 {
-    return vreinterpretq_m128i_s8(
-        vcombine_s8(vqmovn_s16(vreinterpretq_s16_m128i(a)),
-                    vqmovn_s16(vreinterpretq_s16_m128i(b))));
+    return vreinterpretq_m128i_u64(
+        vmovl_u32(vget_low_u32(vreinterpretq_u32_m128i(a))));
 }
 
-// Packs the 16 signed 16 - bit integers from a and b into 8 - bit unsigned
-// integers and saturates.
-//
-//   r0 := UnsignedSaturate(a0)
-//   r1 := UnsignedSaturate(a1)
-//   ...
-//   r7 := UnsignedSaturate(a7)
-//   r8 := UnsignedSaturate(b0)
-//   r9 := UnsignedSaturate(b1)
-//   ...
-//   r15 := UnsignedSaturate(b7)
-//
-// https://msdn.microsoft.com/en-us/library/07ad1wx4(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_packus_epi16(const __m128i a, const __m128i b)
+// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers,
+// and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu8_epi16
+FORCE_INLINE __m128i _mm_cvtepu8_epi16(__m128i a)
 {
-    return vreinterpretq_m128i_u8(
-        vcombine_u8(vqmovun_s16(vreinterpretq_s16_m128i(a)),
-                    vqmovun_s16(vreinterpretq_s16_m128i(b))));
+    uint8x16_t u8x16 = vreinterpretq_u8_m128i(a);    /* xxxx xxxx HGFE DCBA */
+    uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0H0G 0F0E 0D0C 0B0A */
+    return vreinterpretq_m128i_u16(u16x8);
 }
 
-// Packs the 8 signed 32-bit integers from a and b into signed 16-bit integers
-// and saturates.
-//
-//   r0 := SignedSaturate(a0)
-//   r1 := SignedSaturate(a1)
-//   r2 := SignedSaturate(a2)
-//   r3 := SignedSaturate(a3)
-//   r4 := SignedSaturate(b0)
-//   r5 := SignedSaturate(b1)
-//   r6 := SignedSaturate(b2)
-//   r7 := SignedSaturate(b3)
-//
-// https://msdn.microsoft.com/en-us/library/393t56f9%28v=vs.90%29.aspx
-FORCE_INLINE __m128i _mm_packs_epi32(__m128i a, __m128i b)
+// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers,
+// and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu8_epi32
+FORCE_INLINE __m128i _mm_cvtepu8_epi32(__m128i a)
 {
-    return vreinterpretq_m128i_s16(
-        vcombine_s16(vqmovn_s32(vreinterpretq_s32_m128i(a)),
-                     vqmovn_s32(vreinterpretq_s32_m128i(b))));
+    uint8x16_t u8x16 = vreinterpretq_u8_m128i(a);      /* xxxx xxxx xxxx DCBA */
+    uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16));   /* 0x0x 0x0x 0D0C 0B0A */
+    uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000D 000C 000B 000A */
+    return vreinterpretq_m128i_u32(u32x4);
 }
 
-// Packs the 8 unsigned 32-bit integers from a and b into unsigned 16-bit
-// integers and saturates.
-//
-//   r0 := UnsignedSaturate(a0)
-//   r1 := UnsignedSaturate(a1)
-//   r2 := UnsignedSaturate(a2)
-//   r3 := UnsignedSaturate(a3)
-//   r4 := UnsignedSaturate(b0)
-//   r5 := UnsignedSaturate(b1)
-//   r6 := UnsignedSaturate(b2)
-//   r7 := UnsignedSaturate(b3)
-FORCE_INLINE __m128i _mm_packus_epi32(__m128i a, __m128i b)
+// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed
+// 64-bit integers, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu8_epi64
+FORCE_INLINE __m128i _mm_cvtepu8_epi64(__m128i a)
 {
-    return vreinterpretq_m128i_u16(
-        vcombine_u16(vqmovun_s32(vreinterpretq_s32_m128i(a)),
-                     vqmovun_s32(vreinterpretq_s32_m128i(b))));
+    uint8x16_t u8x16 = vreinterpretq_u8_m128i(a);      /* xxxx xxxx xxxx xxBA */
+    uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16));   /* 0x0x 0x0x 0x0x 0B0A */
+    uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */
+    uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */
+    return vreinterpretq_m128i_u64(u64x2);
 }
 
-// Interleaves the lower 8 signed or unsigned 8-bit integers in a with the lower
-// 8 signed or unsigned 8-bit integers in b.
-//
-//   r0 := a0
-//   r1 := b0
-//   r2 := a1
-//   r3 := b1
-//   ...
-//   r14 := a7
-//   r15 := b7
-//
-// https://msdn.microsoft.com/en-us/library/xf7k860c%28v=vs.90%29.aspx
-FORCE_INLINE __m128i _mm_unpacklo_epi8(__m128i a, __m128i b)
+// Conditionally multiply the packed double-precision (64-bit) floating-point
+// elements in a and b using the high 4 bits in imm8, sum the four products, and
+// conditionally store the sum in dst using the low 4 bits of imm8.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dp_pd
+FORCE_INLINE __m128d _mm_dp_pd(__m128d a, __m128d b, const int imm)
 {
-#if defined(__aarch64__)
-    return vreinterpretq_m128i_s8(
-        vzip1q_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b)));
+    // Generate mask value from constant immediate bit value
+    const int64_t bit0Mask = imm & 0x01 ? UINT64_MAX : 0;
+    const int64_t bit1Mask = imm & 0x02 ? UINT64_MAX : 0;
+#if !SSE2NEON_PRECISE_DP
+    const int64_t bit4Mask = imm & 0x10 ? UINT64_MAX : 0;
+    const int64_t bit5Mask = imm & 0x20 ? UINT64_MAX : 0;
+#endif
+    // Conditional multiplication
+#if !SSE2NEON_PRECISE_DP
+    __m128d mul = _mm_mul_pd(a, b);
+    const __m128d mulMask =
+        _mm_castsi128_pd(_mm_set_epi64x(bit5Mask, bit4Mask));
+    __m128d tmp = _mm_and_pd(mul, mulMask);
 #else
-    int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(vreinterpretq_s16_m128i(a)));
-    int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(vreinterpretq_s16_m128i(b)));
-    int8x8x2_t result = vzip_s8(a1, b1);
-    return vreinterpretq_m128i_s8(vcombine_s8(result.val[0], result.val[1]));
+#if defined(__aarch64__) || defined(_M_ARM64)
+    double d0 = (imm & 0x10) ? vgetq_lane_f64(vreinterpretq_f64_m128d(a), 0) *
+                                   vgetq_lane_f64(vreinterpretq_f64_m128d(b), 0)
+                             : 0;
+    double d1 = (imm & 0x20) ? vgetq_lane_f64(vreinterpretq_f64_m128d(a), 1) *
+                                   vgetq_lane_f64(vreinterpretq_f64_m128d(b), 1)
+                             : 0;
+#else
+    double a0 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
+    double a1 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
+    double b0 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
+    double b1 =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 1));
+    double d0 = (imm & 0x10) ? a0 * b0 : 0;
+    double d1 = (imm & 0x20) ? a1 * b1 : 0;
 #endif
+    __m128d tmp = _mm_set_pd(d1, d0);
+#endif
+    // Sum the products
+#if defined(__aarch64__) || defined(_M_ARM64)
+    double sum = vpaddd_f64(vreinterpretq_f64_m128d(tmp));
+#else
+    double _tmp0 = sse2neon_recast_u64_f64(
+        vgetq_lane_u64(vreinterpretq_u64_m128d(tmp), 0));
+    double _tmp1 = sse2neon_recast_u64_f64(
+        vgetq_lane_u64(vreinterpretq_u64_m128d(tmp), 1));
+    double sum = _tmp0 + _tmp1;
+#endif
+    // Conditionally store the sum
+    const __m128d sumMask =
+        _mm_castsi128_pd(_mm_set_epi64x(bit1Mask, bit0Mask));
+    __m128d res = _mm_and_pd(_mm_set_pd1(sum), sumMask);
+    return res;
 }
 
-// Interleaves the lower 4 signed or unsigned 16-bit integers in a with the
-// lower 4 signed or unsigned 16-bit integers in b.
-//
-//   r0 := a0
-//   r1 := b0
-//   r2 := a1
-//   r3 := b1
-//   r4 := a2
-//   r5 := b2
-//   r6 := a3
-//   r7 := b3
-//
-// https://msdn.microsoft.com/en-us/library/btxb17bw%28v=vs.90%29.aspx
-FORCE_INLINE __m128i _mm_unpacklo_epi16(__m128i a, __m128i b)
+// Conditionally multiply the packed single-precision (32-bit) floating-point
+// elements in a and b using the high 4 bits in imm8, sum the four products,
+// and conditionally store the sum in dst using the low 4 bits of imm.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dp_ps
+FORCE_INLINE __m128 _mm_dp_ps(__m128 a, __m128 b, const int imm)
 {
-#if defined(__aarch64__)
-    return vreinterpretq_m128i_s16(
-        vzip1q_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)));
+    float32x4_t elementwise_prod = _mm_mul_ps(a, b);
+
+#if defined(__aarch64__) || defined(_M_ARM64)
+    /* shortcuts */
+    if (imm == 0xFF) {
+        return _mm_set1_ps(vaddvq_f32(elementwise_prod));
+    }
+
+    if ((imm & 0x0F) == 0x0F) {
+        if (!(imm & (1 << 4)))
+            elementwise_prod = vsetq_lane_f32(0.0f, elementwise_prod, 0);
+        if (!(imm & (1 << 5)))
+            elementwise_prod = vsetq_lane_f32(0.0f, elementwise_prod, 1);
+        if (!(imm & (1 << 6)))
+            elementwise_prod = vsetq_lane_f32(0.0f, elementwise_prod, 2);
+        if (!(imm & (1 << 7)))
+            elementwise_prod = vsetq_lane_f32(0.0f, elementwise_prod, 3);
+
+        return _mm_set1_ps(vaddvq_f32(elementwise_prod));
+    }
+#endif
+
+    float s = 0.0f;
+
+    if (imm & (1 << 4))
+        s += vgetq_lane_f32(elementwise_prod, 0);
+    if (imm & (1 << 5))
+        s += vgetq_lane_f32(elementwise_prod, 1);
+    if (imm & (1 << 6))
+        s += vgetq_lane_f32(elementwise_prod, 2);
+    if (imm & (1 << 7))
+        s += vgetq_lane_f32(elementwise_prod, 3);
+
+    const float32_t res[4] = {
+        (imm & 0x1) ? s : 0.0f,
+        (imm & 0x2) ? s : 0.0f,
+        (imm & 0x4) ? s : 0.0f,
+        (imm & 0x8) ? s : 0.0f,
+    };
+    return vreinterpretq_m128_f32(vld1q_f32(res));
+}
+
+// Extract a 32-bit integer from a, selected with imm8, and store the result in
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_epi32
+// FORCE_INLINE int _mm_extract_epi32(__m128i a, __constrange(0,4) int imm)
+#define _mm_extract_epi32(a, imm) \
+    vgetq_lane_s32(vreinterpretq_s32_m128i(a), (imm))
+
+// Extract a 64-bit integer from a, selected with imm8, and store the result in
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_epi64
+// FORCE_INLINE __int64 _mm_extract_epi64(__m128i a, __constrange(0,2) int imm)
+#define _mm_extract_epi64(a, imm) \
+    vgetq_lane_s64(vreinterpretq_s64_m128i(a), (imm))
+
+// Extract an 8-bit integer from a, selected with imm8, and store the result in
+// the lower element of dst. FORCE_INLINE int _mm_extract_epi8(__m128i a,
+// __constrange(0,16) int imm)
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_epi8
+#define _mm_extract_epi8(a, imm) vgetq_lane_u8(vreinterpretq_u8_m128i(a), (imm))
+
+// Extracts the selected single-precision (32-bit) floating-point from a.
+// FORCE_INLINE int _mm_extract_ps(__m128 a, __constrange(0,4) int imm)
+#define _mm_extract_ps(a, imm) vgetq_lane_s32(vreinterpretq_s32_m128(a), (imm))
+
+// Round the packed double-precision (64-bit) floating-point elements in a down
+// to an integer value, and store the results as packed double-precision
+// floating-point elements in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_floor_pd
+FORCE_INLINE __m128d _mm_floor_pd(__m128d a)
+{
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128d_f64(vrndmq_f64(vreinterpretq_f64_m128d(a)));
 #else
-    int16x4_t a1 = vget_low_s16(vreinterpretq_s16_m128i(a));
-    int16x4_t b1 = vget_low_s16(vreinterpretq_s16_m128i(b));
-    int16x4x2_t result = vzip_s16(a1, b1);
-    return vreinterpretq_m128i_s16(vcombine_s16(result.val[0], result.val[1]));
+    double a0, a1;
+    a0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
+    a1 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
+    return _mm_set_pd(floor(a1), floor(a0));
 #endif
 }
 
-// Interleaves the lower 2 signed or unsigned 32 - bit integers in a with the
-// lower 2 signed or unsigned 32 - bit integers in b.
-//
-//   r0 := a0
-//   r1 := b0
-//   r2 := a1
-//   r3 := b1
-//
-// https://msdn.microsoft.com/en-us/library/x8atst9d(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_unpacklo_epi32(__m128i a, __m128i b)
+// Round the packed single-precision (32-bit) floating-point elements in a down
+// to an integer value, and store the results as packed single-precision
+// floating-point elements in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_floor_ps
+FORCE_INLINE __m128 _mm_floor_ps(__m128 a)
 {
-#if defined(__aarch64__)
-    return vreinterpretq_m128i_s32(
-        vzip1q_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
+#if (defined(__aarch64__) || defined(_M_ARM64)) || \
+    defined(__ARM_FEATURE_DIRECTED_ROUNDING)
+    return vreinterpretq_m128_f32(vrndmq_f32(vreinterpretq_f32_m128(a)));
 #else
-    int32x2_t a1 = vget_low_s32(vreinterpretq_s32_m128i(a));
-    int32x2_t b1 = vget_low_s32(vreinterpretq_s32_m128i(b));
-    int32x2x2_t result = vzip_s32(a1, b1);
-    return vreinterpretq_m128i_s32(vcombine_s32(result.val[0], result.val[1]));
+    float *f = (float *) &a;
+    return _mm_set_ps(floorf(f[3]), floorf(f[2]), floorf(f[1]), floorf(f[0]));
 #endif
 }
 
-FORCE_INLINE __m128i _mm_unpacklo_epi64(__m128i a, __m128i b)
+// Round the lower double-precision (64-bit) floating-point element in b down to
+// an integer value, store the result as a double-precision floating-point
+// element in the lower element of dst, and copy the upper element from a to the
+// upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_floor_sd
+FORCE_INLINE __m128d _mm_floor_sd(__m128d a, __m128d b)
 {
-    int64x1_t a_l = vget_low_s64(vreinterpretq_s64_m128i(a));
-    int64x1_t b_l = vget_low_s64(vreinterpretq_s64_m128i(b));
-    return vreinterpretq_m128i_s64(vcombine_s64(a_l, b_l));
+    return _mm_move_sd(a, _mm_floor_pd(b));
 }
 
-// Selects and interleaves the lower two single-precision, floating-point values
-// from a and b.
-//
-//   r0 := a0
-//   r1 := b0
-//   r2 := a1
-//   r3 := b1
-//
-// https://msdn.microsoft.com/en-us/library/25st103b%28v=vs.90%29.aspx
-FORCE_INLINE __m128 _mm_unpacklo_ps(__m128 a, __m128 b)
+// Round the lower single-precision (32-bit) floating-point element in b down to
+// an integer value, store the result as a single-precision floating-point
+// element in the lower element of dst, and copy the upper 3 packed elements
+// from a to the upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_floor_ss
+FORCE_INLINE __m128 _mm_floor_ss(__m128 a, __m128 b)
 {
-#if defined(__aarch64__)
-    return vreinterpretq_m128_f32(
-        vzip1q_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
-#else
-    float32x2_t a1 = vget_low_f32(vreinterpretq_f32_m128(a));
-    float32x2_t b1 = vget_low_f32(vreinterpretq_f32_m128(b));
-    float32x2x2_t result = vzip_f32(a1, b1);
-    return vreinterpretq_m128_f32(vcombine_f32(result.val[0], result.val[1]));
-#endif
+    return _mm_move_ss(a, _mm_floor_ps(b));
+}
+
+// Copy a to dst, and insert the 32-bit integer i into dst at the location
+// specified by imm8.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_epi32
+// FORCE_INLINE __m128i _mm_insert_epi32(__m128i a, int b,
+//                                       __constrange(0,4) int imm)
+#define _mm_insert_epi32(a, b, imm) \
+    vreinterpretq_m128i_s32(        \
+        vsetq_lane_s32((b), vreinterpretq_s32_m128i(a), (imm)))
+
+// Copy a to dst, and insert the 64-bit integer i into dst at the location
+// specified by imm8.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_epi64
+// FORCE_INLINE __m128i _mm_insert_epi64(__m128i a, __int64 b,
+//                                       __constrange(0,2) int imm)
+#define _mm_insert_epi64(a, b, imm) \
+    vreinterpretq_m128i_s64(        \
+        vsetq_lane_s64((b), vreinterpretq_s64_m128i(a), (imm)))
+
+// Copy a to dst, and insert the lower 8-bit integer from i into dst at the
+// location specified by imm8.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_epi8
+// FORCE_INLINE __m128i _mm_insert_epi8(__m128i a, int b,
+//                                      __constrange(0,16) int imm)
+#define _mm_insert_epi8(a, b, imm) \
+    vreinterpretq_m128i_s8(vsetq_lane_s8((b), vreinterpretq_s8_m128i(a), (imm)))
+
+// Copy a to tmp, then insert a single-precision (32-bit) floating-point
+// element from b into tmp using the control in imm8. Store tmp to dst using
+// the mask in imm8 (elements are zeroed out when the corresponding bit is set).
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=insert_ps
+#define _mm_insert_ps(a, b, imm8)                                            \
+    _sse2neon_define2(                                                       \
+        __m128, a, b,                                                        \
+        float32x4_t tmp1 =                                                   \
+            vsetq_lane_f32(vgetq_lane_f32(_b, (imm8 >> 6) & 0x3),            \
+                           vreinterpretq_f32_m128(_a), 0);                   \
+        float32x4_t tmp2 =                                                   \
+            vsetq_lane_f32(vgetq_lane_f32(tmp1, 0),                          \
+                           vreinterpretq_f32_m128(_a), ((imm8 >> 4) & 0x3)); \
+        const uint32_t data[4] =                                             \
+            _sse2neon_init(((imm8) & (1 << 0)) ? UINT32_MAX : 0,             \
+                           ((imm8) & (1 << 1)) ? UINT32_MAX : 0,             \
+                           ((imm8) & (1 << 2)) ? UINT32_MAX : 0,             \
+                           ((imm8) & (1 << 3)) ? UINT32_MAX : 0);            \
+        uint32x4_t mask = vld1q_u32(data);                                   \
+        float32x4_t all_zeros = vdupq_n_f32(0);                              \
+                                                                             \
+        _sse2neon_return(vreinterpretq_m128_f32(                             \
+            vbslq_f32(mask, all_zeros, vreinterpretq_f32_m128(tmp2))));)
+
+// Compare packed signed 32-bit integers in a and b, and store packed maximum
+// values in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi32
+FORCE_INLINE __m128i _mm_max_epi32(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s32(
+        vmaxq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
+}
+
+// Compare packed signed 8-bit integers in a and b, and store packed maximum
+// values in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi8
+FORCE_INLINE __m128i _mm_max_epi8(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s8(
+        vmaxq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b)));
 }
 
-// Selects and interleaves the upper two single-precision, floating-point values
-// from a and b.
-//
-//   r0 := a2
-//   r1 := b2
-//   r2 := a3
-//   r3 := b3
-//
-// https://msdn.microsoft.com/en-us/library/skccxx7d%28v=vs.90%29.aspx
-FORCE_INLINE __m128 _mm_unpackhi_ps(__m128 a, __m128 b)
+// Compare packed unsigned 16-bit integers in a and b, and store packed maximum
+// values in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu16
+FORCE_INLINE __m128i _mm_max_epu16(__m128i a, __m128i b)
 {
-#if defined(__aarch64__)
-    return vreinterpretq_m128_f32(
-        vzip2q_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
-#else
-    float32x2_t a1 = vget_high_f32(vreinterpretq_f32_m128(a));
-    float32x2_t b1 = vget_high_f32(vreinterpretq_f32_m128(b));
-    float32x2x2_t result = vzip_f32(a1, b1);
-    return vreinterpretq_m128_f32(vcombine_f32(result.val[0], result.val[1]));
-#endif
+    return vreinterpretq_m128i_u16(
+        vmaxq_u16(vreinterpretq_u16_m128i(a), vreinterpretq_u16_m128i(b)));
 }
 
-// Interleaves the upper 8 signed or unsigned 8-bit integers in a with the upper
-// 8 signed or unsigned 8-bit integers in b.
-//
-//   r0 := a8
-//   r1 := b8
-//   r2 := a9
-//   r3 := b9
-//   ...
-//   r14 := a15
-//   r15 := b15
-//
-// https://msdn.microsoft.com/en-us/library/t5h7783k(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_unpackhi_epi8(__m128i a, __m128i b)
+// Compare packed unsigned 32-bit integers in a and b, and store packed maximum
+// values in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu32
+FORCE_INLINE __m128i _mm_max_epu32(__m128i a, __m128i b)
 {
-#if defined(__aarch64__)
-    return vreinterpretq_m128i_s8(
-        vzip2q_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b)));
-#else
-    int8x8_t a1 =
-        vreinterpret_s8_s16(vget_high_s16(vreinterpretq_s16_m128i(a)));
-    int8x8_t b1 =
-        vreinterpret_s8_s16(vget_high_s16(vreinterpretq_s16_m128i(b)));
-    int8x8x2_t result = vzip_s8(a1, b1);
-    return vreinterpretq_m128i_s8(vcombine_s8(result.val[0], result.val[1]));
-#endif
+    return vreinterpretq_m128i_u32(
+        vmaxq_u32(vreinterpretq_u32_m128i(a), vreinterpretq_u32_m128i(b)));
 }
 
-// Interleaves the upper 4 signed or unsigned 16-bit integers in a with the
-// upper 4 signed or unsigned 16-bit integers in b.
-//
-//   r0 := a4
-//   r1 := b4
-//   r2 := a5
-//   r3 := b5
-//   r4 := a6
-//   r5 := b6
-//   r6 := a7
-//   r7 := b7
-//
-// https://msdn.microsoft.com/en-us/library/03196cz7(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_unpackhi_epi16(__m128i a, __m128i b)
+// Compare packed signed 32-bit integers in a and b, and store packed minimum
+// values in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi32
+FORCE_INLINE __m128i _mm_min_epi32(__m128i a, __m128i b)
 {
-#if defined(__aarch64__)
-    return vreinterpretq_m128i_s16(
-        vzip2q_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)));
-#else
-    int16x4_t a1 = vget_high_s16(vreinterpretq_s16_m128i(a));
-    int16x4_t b1 = vget_high_s16(vreinterpretq_s16_m128i(b));
-    int16x4x2_t result = vzip_s16(a1, b1);
-    return vreinterpretq_m128i_s16(vcombine_s16(result.val[0], result.val[1]));
-#endif
+    return vreinterpretq_m128i_s32(
+        vminq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
 }
 
-// Interleaves the upper 2 signed or unsigned 32-bit integers in a with the
-// upper 2 signed or unsigned 32-bit integers in b.
-// https://msdn.microsoft.com/en-us/library/65sa7cbs(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_unpackhi_epi32(__m128i a, __m128i b)
+// Compare packed signed 8-bit integers in a and b, and store packed minimum
+// values in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi8
+FORCE_INLINE __m128i _mm_min_epi8(__m128i a, __m128i b)
 {
-#if defined(__aarch64__)
-    return vreinterpretq_m128i_s32(
-        vzip2q_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
-#else
-    int32x2_t a1 = vget_high_s32(vreinterpretq_s32_m128i(a));
-    int32x2_t b1 = vget_high_s32(vreinterpretq_s32_m128i(b));
-    int32x2x2_t result = vzip_s32(a1, b1);
-    return vreinterpretq_m128i_s32(vcombine_s32(result.val[0], result.val[1]));
-#endif
+    return vreinterpretq_m128i_s8(
+        vminq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b)));
 }
 
-// Interleaves the upper signed or unsigned 64-bit integer in a with the
-// upper signed or unsigned 64-bit integer in b.
-//
-//   r0 := a1
-//   r1 := b1
-FORCE_INLINE __m128i _mm_unpackhi_epi64(__m128i a, __m128i b)
+// Compare packed unsigned 16-bit integers in a and b, and store packed minimum
+// values in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu16
+FORCE_INLINE __m128i _mm_min_epu16(__m128i a, __m128i b)
 {
-    int64x1_t a_h = vget_high_s64(vreinterpretq_s64_m128i(a));
-    int64x1_t b_h = vget_high_s64(vreinterpretq_s64_m128i(b));
-    return vreinterpretq_m128i_s64(vcombine_s64(a_h, b_h));
+    return vreinterpretq_m128i_u16(
+        vminq_u16(vreinterpretq_u16_m128i(a), vreinterpretq_u16_m128i(b)));
+}
+
+// Compare packed unsigned 32-bit integers in a and b, and store packed minimum
+// values in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu32
+FORCE_INLINE __m128i _mm_min_epu32(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_u32(
+        vminq_u32(vreinterpretq_u32_m128i(a), vreinterpretq_u32_m128i(b)));
 }
 
 // Horizontally compute the minimum amongst the packed unsigned 16-bit integers
 // in a, store the minimum and index in dst, and zero the remaining bits in dst.
-//
-//   index[2:0] := 0
-//   min[15:0] := a[15:0]
-//   FOR j := 0 to 7
-//       i := j*16
-//       IF a[i+15:i] < min[15:0]
-//           index[2:0] := j
-//           min[15:0] := a[i+15:i]
-//       FI
-//   ENDFOR
-//   dst[15:0] := min[15:0]
-//   dst[18:16] := index[2:0]
-//   dst[127:19] := 0
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_minpos_epu16
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_minpos_epu16
 FORCE_INLINE __m128i _mm_minpos_epu16(__m128i a)
 {
     __m128i dst;
     uint16_t min, idx = 0;
+#if defined(__aarch64__) || defined(_M_ARM64)
     // Find the minimum value
-#if defined(__aarch64__)
     min = vminvq_u16(vreinterpretq_u16_m128i(a));
+
+    // Get the index of the minimum value
+    static const uint16_t idxv[] = {0, 1, 2, 3, 4, 5, 6, 7};
+    uint16x8_t minv = vdupq_n_u16(min);
+    uint16x8_t cmeq = vceqq_u16(minv, vreinterpretq_u16_m128i(a));
+    idx = vminvq_u16(vornq_u16(vld1q_u16(idxv), cmeq));
 #else
+    // Find the minimum value
     __m64 tmp;
     tmp = vreinterpret_m64_u16(
         vmin_u16(vget_low_u16(vreinterpretq_u16_m128i(a)),
@@ -5246,7 +7423,6 @@ FORCE_INLINE __m128i _mm_minpos_epu16(__m128i a)
     tmp = vreinterpret_m64_u16(
         vpmin_u16(vreinterpret_u16_m64(tmp), vreinterpret_u16_m64(tmp)));
     min = vget_lane_u16(vreinterpret_u16_m64(tmp), 0);
-#endif
     // Get the index of the minimum value
     int i;
     for (i = 0; i < 8; i++) {
@@ -5256,6 +7432,7 @@ FORCE_INLINE __m128i _mm_minpos_epu16(__m128i a)
         }
         a = _mm_srli_si128(a, 2);
     }
+#endif
     // Generate result
     dst = _mm_setzero_si128();
     dst = vreinterpretq_m128i_u16(
@@ -5265,32 +7442,364 @@ FORCE_INLINE __m128i _mm_minpos_epu16(__m128i a)
     return dst;
 }
 
-// shift to right
-// https://msdn.microsoft.com/en-us/library/bb514041(v=vs.120).aspx
-// http://blog.csdn.net/hemmingway/article/details/44828303
-// Clang requires a macro here, as it is extremely picky about c being a
-// literal.
-#define _mm_alignr_epi8(a, b, c) \
-    ((__m128i) vextq_s8((int8x16_t)(b), (int8x16_t)(a), (c)))
+// Compute the sum of absolute differences (SADs) of quadruplets of unsigned
+// 8-bit integers in a compared to those in b, and store the 16-bit results in
+// dst. Eight SADs are performed using one quadruplet from b and eight
+// quadruplets from a. One quadruplet is selected from b starting at on the
+// offset specified in imm8. Eight quadruplets are formed from sequential 8-bit
+// integers selected from a starting at the offset specified in imm8.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mpsadbw_epu8
+FORCE_INLINE __m128i _mm_mpsadbw_epu8(__m128i a, __m128i b, const int imm)
+{
+    uint8x16_t _a, _b;
+
+    switch (imm & 0x4) {
+    case 0:
+        // do nothing
+        _a = vreinterpretq_u8_m128i(a);
+        break;
+    case 4:
+        _a = vreinterpretq_u8_u32(vextq_u32(vreinterpretq_u32_m128i(a),
+                                            vreinterpretq_u32_m128i(a), 1));
+        break;
+    default:
+#if defined(__GNUC__) || defined(__clang__)
+        __builtin_unreachable();
+#elif defined(_MSC_VER)
+        __assume(0);
+#endif
+        break;
+    }
+
+    switch (imm & 0x3) {
+    case 0:
+        _b = vreinterpretq_u8_u32(
+            vdupq_n_u32(vgetq_lane_u32(vreinterpretq_u32_m128i(b), 0)));
+        break;
+    case 1:
+        _b = vreinterpretq_u8_u32(
+            vdupq_n_u32(vgetq_lane_u32(vreinterpretq_u32_m128i(b), 1)));
+        break;
+    case 2:
+        _b = vreinterpretq_u8_u32(
+            vdupq_n_u32(vgetq_lane_u32(vreinterpretq_u32_m128i(b), 2)));
+        break;
+    case 3:
+        _b = vreinterpretq_u8_u32(
+            vdupq_n_u32(vgetq_lane_u32(vreinterpretq_u32_m128i(b), 3)));
+        break;
+    default:
+#if defined(__GNUC__) || defined(__clang__)
+        __builtin_unreachable();
+#elif defined(_MSC_VER)
+        __assume(0);
+#endif
+        break;
+    }
+
+    int16x8_t c04, c15, c26, c37;
+    uint8x8_t low_b = vget_low_u8(_b);
+    c04 = vreinterpretq_s16_u16(vabdl_u8(vget_low_u8(_a), low_b));
+    uint8x16_t _a_1 = vextq_u8(_a, _a, 1);
+    c15 = vreinterpretq_s16_u16(vabdl_u8(vget_low_u8(_a_1), low_b));
+    uint8x16_t _a_2 = vextq_u8(_a, _a, 2);
+    c26 = vreinterpretq_s16_u16(vabdl_u8(vget_low_u8(_a_2), low_b));
+    uint8x16_t _a_3 = vextq_u8(_a, _a, 3);
+    c37 = vreinterpretq_s16_u16(vabdl_u8(vget_low_u8(_a_3), low_b));
+#if defined(__aarch64__) || defined(_M_ARM64)
+    // |0|4|2|6|
+    c04 = vpaddq_s16(c04, c26);
+    // |1|5|3|7|
+    c15 = vpaddq_s16(c15, c37);
+
+    int32x4_t trn1_c =
+        vtrn1q_s32(vreinterpretq_s32_s16(c04), vreinterpretq_s32_s16(c15));
+    int32x4_t trn2_c =
+        vtrn2q_s32(vreinterpretq_s32_s16(c04), vreinterpretq_s32_s16(c15));
+    return vreinterpretq_m128i_s16(vpaddq_s16(vreinterpretq_s16_s32(trn1_c),
+                                              vreinterpretq_s16_s32(trn2_c)));
+#else
+    int16x4_t c01, c23, c45, c67;
+    c01 = vpadd_s16(vget_low_s16(c04), vget_low_s16(c15));
+    c23 = vpadd_s16(vget_low_s16(c26), vget_low_s16(c37));
+    c45 = vpadd_s16(vget_high_s16(c04), vget_high_s16(c15));
+    c67 = vpadd_s16(vget_high_s16(c26), vget_high_s16(c37));
+
+    return vreinterpretq_m128i_s16(
+        vcombine_s16(vpadd_s16(c01, c23), vpadd_s16(c45, c67)));
+#endif
+}
+
+// Multiply the low signed 32-bit integers from each packed 64-bit element in
+// a and b, and store the signed 64-bit results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_epi32
+FORCE_INLINE __m128i _mm_mul_epi32(__m128i a, __m128i b)
+{
+    // vmull_s32 upcasts instead of masking, so we downcast.
+    int32x2_t a_lo = vmovn_s64(vreinterpretq_s64_m128i(a));
+    int32x2_t b_lo = vmovn_s64(vreinterpretq_s64_m128i(b));
+    return vreinterpretq_m128i_s64(vmull_s32(a_lo, b_lo));
+}
+
+// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit
+// integers, and store the low 32 bits of the intermediate integers in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mullo_epi32
+FORCE_INLINE __m128i _mm_mullo_epi32(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s32(
+        vmulq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
+}
+
+// Convert packed signed 32-bit integers from a and b to packed 16-bit integers
+// using unsigned saturation, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packus_epi32
+FORCE_INLINE __m128i _mm_packus_epi32(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_u16(
+        vcombine_u16(vqmovun_s32(vreinterpretq_s32_m128i(a)),
+                     vqmovun_s32(vreinterpretq_s32_m128i(b))));
+}
+
+// Round the packed double-precision (64-bit) floating-point elements in a using
+// the rounding parameter, and store the results as packed double-precision
+// floating-point elements in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_round_pd
+FORCE_INLINE_OPTNONE __m128d _mm_round_pd(__m128d a, int rounding)
+{
+#if defined(__aarch64__) || defined(_M_ARM64)
+    switch (rounding) {
+    case (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC):
+        return vreinterpretq_m128d_f64(vrndnq_f64(vreinterpretq_f64_m128d(a)));
+    case (_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC):
+        return _mm_floor_pd(a);
+    case (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC):
+        return _mm_ceil_pd(a);
+    case (_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC):
+        return vreinterpretq_m128d_f64(vrndq_f64(vreinterpretq_f64_m128d(a)));
+    default:  //_MM_FROUND_CUR_DIRECTION
+        return vreinterpretq_m128d_f64(vrndiq_f64(vreinterpretq_f64_m128d(a)));
+    }
+#else
+    double *v_double = (double *) &a;
+
+    if (rounding == (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC) ||
+        (rounding == _MM_FROUND_CUR_DIRECTION &&
+         _MM_GET_ROUNDING_MODE() == _MM_ROUND_NEAREST)) {
+        double res[2], tmp;
+        for (int i = 0; i < 2; i++) {
+            tmp = (v_double[i] < 0) ? -v_double[i] : v_double[i];
+            double roundDown = floor(tmp);  // Round down value
+            double roundUp = ceil(tmp);     // Round up value
+            double diffDown = tmp - roundDown;
+            double diffUp = roundUp - tmp;
+            if (diffDown < diffUp) {
+                /* If it's closer to the round down value, then use it */
+                res[i] = roundDown;
+            } else if (diffDown > diffUp) {
+                /* If it's closer to the round up value, then use it */
+                res[i] = roundUp;
+            } else {
+                /* If it's equidistant between round up and round down value,
+                 * pick the one which is an even number */
+                double half = roundDown / 2;
+                if (half != floor(half)) {
+                    /* If the round down value is odd, return the round up value
+                     */
+                    res[i] = roundUp;
+                } else {
+                    /* If the round up value is odd, return the round down value
+                     */
+                    res[i] = roundDown;
+                }
+            }
+            res[i] = (v_double[i] < 0) ? -res[i] : res[i];
+        }
+        return _mm_set_pd(res[1], res[0]);
+    } else if (rounding == (_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC) ||
+               (rounding == _MM_FROUND_CUR_DIRECTION &&
+                _MM_GET_ROUNDING_MODE() == _MM_ROUND_DOWN)) {
+        return _mm_floor_pd(a);
+    } else if (rounding == (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC) ||
+               (rounding == _MM_FROUND_CUR_DIRECTION &&
+                _MM_GET_ROUNDING_MODE() == _MM_ROUND_UP)) {
+        return _mm_ceil_pd(a);
+    }
+    return _mm_set_pd(v_double[1] > 0 ? floor(v_double[1]) : ceil(v_double[1]),
+                      v_double[0] > 0 ? floor(v_double[0]) : ceil(v_double[0]));
+#endif
+}
+
+// Round the packed single-precision (32-bit) floating-point elements in a using
+// the rounding parameter, and store the results as packed single-precision
+// floating-point elements in dst.
+// software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_ps
+FORCE_INLINE_OPTNONE __m128 _mm_round_ps(__m128 a, int rounding)
+{
+#if (defined(__aarch64__) || defined(_M_ARM64)) || \
+    defined(__ARM_FEATURE_DIRECTED_ROUNDING)
+    switch (rounding) {
+    case (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC):
+        return vreinterpretq_m128_f32(vrndnq_f32(vreinterpretq_f32_m128(a)));
+    case (_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC):
+        return _mm_floor_ps(a);
+    case (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC):
+        return _mm_ceil_ps(a);
+    case (_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC):
+        return vreinterpretq_m128_f32(vrndq_f32(vreinterpretq_f32_m128(a)));
+    default:  //_MM_FROUND_CUR_DIRECTION
+        return vreinterpretq_m128_f32(vrndiq_f32(vreinterpretq_f32_m128(a)));
+    }
+#else
+    float *v_float = (float *) &a;
+
+    if (rounding == (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC) ||
+        (rounding == _MM_FROUND_CUR_DIRECTION &&
+         _MM_GET_ROUNDING_MODE() == _MM_ROUND_NEAREST)) {
+        uint32x4_t signmask = vdupq_n_u32(0x80000000);
+        float32x4_t half = vbslq_f32(signmask, vreinterpretq_f32_m128(a),
+                                     vdupq_n_f32(0.5f)); /* +/- 0.5 */
+        int32x4_t r_normal = vcvtq_s32_f32(vaddq_f32(
+            vreinterpretq_f32_m128(a), half)); /* round to integer: [a + 0.5]*/
+        int32x4_t r_trunc = vcvtq_s32_f32(
+            vreinterpretq_f32_m128(a)); /* truncate to integer: [a] */
+        int32x4_t plusone = vreinterpretq_s32_u32(vshrq_n_u32(
+            vreinterpretq_u32_s32(vnegq_s32(r_trunc)), 31)); /* 1 or 0 */
+        int32x4_t r_even = vbicq_s32(vaddq_s32(r_trunc, plusone),
+                                     vdupq_n_s32(1)); /* ([a] + {0,1}) & ~1 */
+        float32x4_t delta = vsubq_f32(
+            vreinterpretq_f32_m128(a),
+            vcvtq_f32_s32(r_trunc)); /* compute delta: delta = (a - [a]) */
+        uint32x4_t is_delta_half =
+            vceqq_f32(delta, half); /* delta == +/- 0.5 */
+        return vreinterpretq_m128_f32(
+            vcvtq_f32_s32(vbslq_s32(is_delta_half, r_even, r_normal)));
+    } else if (rounding == (_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC) ||
+               (rounding == _MM_FROUND_CUR_DIRECTION &&
+                _MM_GET_ROUNDING_MODE() == _MM_ROUND_DOWN)) {
+        return _mm_floor_ps(a);
+    } else if (rounding == (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC) ||
+               (rounding == _MM_FROUND_CUR_DIRECTION &&
+                _MM_GET_ROUNDING_MODE() == _MM_ROUND_UP)) {
+        return _mm_ceil_ps(a);
+    }
+    return _mm_set_ps(v_float[3] > 0 ? floorf(v_float[3]) : ceilf(v_float[3]),
+                      v_float[2] > 0 ? floorf(v_float[2]) : ceilf(v_float[2]),
+                      v_float[1] > 0 ? floorf(v_float[1]) : ceilf(v_float[1]),
+                      v_float[0] > 0 ? floorf(v_float[0]) : ceilf(v_float[0]));
+#endif
+}
+
+// Round the lower double-precision (64-bit) floating-point element in b using
+// the rounding parameter, store the result as a double-precision floating-point
+// element in the lower element of dst, and copy the upper element from a to the
+// upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_round_sd
+FORCE_INLINE __m128d _mm_round_sd(__m128d a, __m128d b, int rounding)
+{
+    return _mm_move_sd(a, _mm_round_pd(b, rounding));
+}
+
+// Round the lower single-precision (32-bit) floating-point element in b using
+// the rounding parameter, store the result as a single-precision floating-point
+// element in the lower element of dst, and copy the upper 3 packed elements
+// from a to the upper elements of dst. Rounding is done according to the
+// rounding[3:0] parameter, which can be one of:
+//     (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and
+//     suppress exceptions
+//     (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC)     // round down, and
+//     suppress exceptions
+//     (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC)     // round up, and suppress
+//     exceptions
+//     (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC)        // truncate, and suppress
+//     exceptions _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see
+//     _MM_SET_ROUNDING_MODE
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_round_ss
+FORCE_INLINE __m128 _mm_round_ss(__m128 a, __m128 b, int rounding)
+{
+    return _mm_move_ss(a, _mm_round_ps(b, rounding));
+}
+
+// Load 128-bits of integer data from memory into dst using a non-temporal
+// memory hint. mem_addr must be aligned on a 16-byte boundary or a
+// general-protection exception may be generated.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_load_si128
+FORCE_INLINE __m128i _mm_stream_load_si128(__m128i *p)
+{
+#if __has_builtin(__builtin_nontemporal_store)
+    return __builtin_nontemporal_load(p);
+#else
+    return vreinterpretq_m128i_s64(vld1q_s64((int64_t *) p));
+#endif
+}
+
+// Compute the bitwise NOT of a and then AND with a 128-bit vector containing
+// all 1's, and return 1 if the result is zero, otherwise return 0.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_all_ones
+FORCE_INLINE int _mm_test_all_ones(__m128i a)
+{
+    return (uint64_t) (vgetq_lane_s64(a, 0) & vgetq_lane_s64(a, 1)) ==
+           ~(uint64_t) 0;
+}
+
+// Compute the bitwise AND of 128 bits (representing integer data) in a and
+// mask, and return 1 if the result is zero, otherwise return 0.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_all_zeros
+FORCE_INLINE int _mm_test_all_zeros(__m128i a, __m128i mask)
+{
+    int64x2_t a_and_mask =
+        vandq_s64(vreinterpretq_s64_m128i(a), vreinterpretq_s64_m128i(mask));
+    return !(vgetq_lane_s64(a_and_mask, 0) | vgetq_lane_s64(a_and_mask, 1));
+}
+
+// Compute the bitwise AND of 128 bits (representing integer data) in a and
+// mask, and set ZF to 1 if the result is zero, otherwise set ZF to 0. Compute
+// the bitwise NOT of a and then AND with mask, and set CF to 1 if the result is
+// zero, otherwise set CF to 0. Return 1 if both the ZF and CF values are zero,
+// otherwise return 0.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_test_mix_ones_zero
+// Note: Argument names may be wrong in the Intel intrinsics guide.
+FORCE_INLINE int _mm_test_mix_ones_zeros(__m128i a, __m128i mask)
+{
+    uint64x2_t v = vreinterpretq_u64_m128i(a);
+    uint64x2_t m = vreinterpretq_u64_m128i(mask);
+
+    // find ones (set-bits) and zeros (clear-bits) under clip mask
+    uint64x2_t ones = vandq_u64(m, v);
+    uint64x2_t zeros = vbicq_u64(m, v);
+
+    // If both 128-bit variables are populated (non-zero) then return 1.
+    // For comparison purposes, first compact each var down to 32-bits.
+    uint32x2_t reduced = vpmax_u32(vqmovn_u64(ones), vqmovn_u64(zeros));
+
+    // if folding minimum is non-zero then both vars must be non-zero
+    return (vget_lane_u32(vpmin_u32(reduced, reduced), 0) != 0);
+}
 
 // Compute the bitwise AND of 128 bits (representing integer data) in a and b,
 // and set ZF to 1 if the result is zero, otherwise set ZF to 0. Compute the
 // bitwise NOT of a and then AND with b, and set CF to 1 if the result is zero,
 // otherwise set CF to 0. Return the CF value.
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testc_si128
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testc_si128
 FORCE_INLINE int _mm_testc_si128(__m128i a, __m128i b)
 {
     int64x2_t s64 =
-        vandq_s64(vreinterpretq_s64_s32(vmvnq_s32(vreinterpretq_s32_m128i(a))),
-                  vreinterpretq_s64_m128i(b));
+        vbicq_s64(vreinterpretq_s64_m128i(b), vreinterpretq_s64_m128i(a));
     return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1));
 }
 
+// Compute the bitwise AND of 128 bits (representing integer data) in a and b,
+// and set ZF to 1 if the result is zero, otherwise set ZF to 0. Compute the
+// bitwise NOT of a and then AND with b, and set CF to 1 if the result is zero,
+// otherwise set CF to 0. Return 1 if both the ZF and CF values are zero,
+// otherwise return 0.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testnzc_si128
+#define _mm_testnzc_si128(a, b) _mm_test_mix_ones_zeros(a, b)
+
 // Compute the bitwise AND of 128 bits (representing integer data) in a and b,
 // and set ZF to 1 if the result is zero, otherwise set ZF to 0. Compute the
 // bitwise NOT of a and then AND with b, and set CF to 1 if the result is zero,
 // otherwise set CF to 0. Return the ZF value.
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testz_si128
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testz_si128
 FORCE_INLINE int _mm_testz_si128(__m128i a, __m128i b)
 {
     int64x2_t s64 =
@@ -5298,290 +7807,899 @@ FORCE_INLINE int _mm_testz_si128(__m128i a, __m128i b)
     return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1));
 }
 
-// Extracts the selected signed or unsigned 8-bit integer from a and zero
-// extends.
-// FORCE_INLINE int _mm_extract_epi8(__m128i a, __constrange(0,16) int imm)
-#define _mm_extract_epi8(a, imm) vgetq_lane_u8(vreinterpretq_u8_m128i(a), (imm))
+/* SSE4.2 */
 
-// Inserts the least significant 8 bits of b into the selected 8-bit integer
-// of a.
-// FORCE_INLINE __m128i _mm_insert_epi8(__m128i a, int b,
-//                                      __constrange(0,16) int imm)
-#define _mm_insert_epi8(a, b, imm)                                 \
-    __extension__({                                                \
-        vreinterpretq_m128i_s8(                                    \
-            vsetq_lane_s8((b), vreinterpretq_s8_m128i(a), (imm))); \
-    })
+static const uint16_t ALIGN_STRUCT(16) _sse2neon_cmpestr_mask16b[8] = {
+    0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
+};
+static const uint8_t ALIGN_STRUCT(16) _sse2neon_cmpestr_mask8b[16] = {
+    0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
+    0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
+};
 
-// Extracts the selected signed or unsigned 16-bit integer from a and zero
-// extends.
-// https://msdn.microsoft.com/en-us/library/6dceta0c(v=vs.100).aspx
-// FORCE_INLINE int _mm_extract_epi16(__m128i a, __constrange(0,8) int imm)
-#define _mm_extract_epi16(a, imm) \
-    vgetq_lane_u16(vreinterpretq_u16_m128i(a), (imm))
+/* specify the source data format */
+#define _SIDD_UBYTE_OPS 0x00 /* unsigned 8-bit characters */
+#define _SIDD_UWORD_OPS 0x01 /* unsigned 16-bit characters */
+#define _SIDD_SBYTE_OPS 0x02 /* signed 8-bit characters */
+#define _SIDD_SWORD_OPS 0x03 /* signed 16-bit characters */
+
+/* specify the comparison operation */
+#define _SIDD_CMP_EQUAL_ANY 0x00     /* compare equal any: strchr */
+#define _SIDD_CMP_RANGES 0x04        /* compare ranges */
+#define _SIDD_CMP_EQUAL_EACH 0x08    /* compare equal each: strcmp */
+#define _SIDD_CMP_EQUAL_ORDERED 0x0C /* compare equal ordered */
+
+/* specify the polarity */
+#define _SIDD_POSITIVE_POLARITY 0x00
+#define _SIDD_MASKED_POSITIVE_POLARITY 0x20
+#define _SIDD_NEGATIVE_POLARITY 0x10 /* negate results */
+#define _SIDD_MASKED_NEGATIVE_POLARITY \
+    0x30 /* negate results only before end of string */
+
+/* specify the output selection in _mm_cmpXstri */
+#define _SIDD_LEAST_SIGNIFICANT 0x00
+#define _SIDD_MOST_SIGNIFICANT 0x40
+
+/* specify the output selection in _mm_cmpXstrm */
+#define _SIDD_BIT_MASK 0x00
+#define _SIDD_UNIT_MASK 0x40
+
+/* Pattern Matching for C macros.
+ * https://github.com/pfultz2/Cloak/wiki/C-Preprocessor-tricks,-tips,-and-idioms
+ */
 
-// Inserts the least significant 16 bits of b into the selected 16-bit integer
-// of a.
-// https://msdn.microsoft.com/en-us/library/kaze8hz1%28v=vs.100%29.aspx
-// FORCE_INLINE __m128i _mm_insert_epi16(__m128i a, int b,
-//                                       __constrange(0,8) int imm)
-#define _mm_insert_epi16(a, b, imm)                                  \
-    __extension__({                                                  \
-        vreinterpretq_m128i_s16(                                     \
-            vsetq_lane_s16((b), vreinterpretq_s16_m128i(a), (imm))); \
-    })
+/* catenate */
+#define SSE2NEON_PRIMITIVE_CAT(a, ...) a##__VA_ARGS__
+#define SSE2NEON_CAT(a, b) SSE2NEON_PRIMITIVE_CAT(a, b)
+
+#define SSE2NEON_IIF(c) SSE2NEON_PRIMITIVE_CAT(SSE2NEON_IIF_, c)
+/* run the 2nd parameter */
+#define SSE2NEON_IIF_0(t, ...) __VA_ARGS__
+/* run the 1st parameter */
+#define SSE2NEON_IIF_1(t, ...) t
+
+#define SSE2NEON_COMPL(b) SSE2NEON_PRIMITIVE_CAT(SSE2NEON_COMPL_, b)
+#define SSE2NEON_COMPL_0 1
+#define SSE2NEON_COMPL_1 0
+
+#define SSE2NEON_DEC(x) SSE2NEON_PRIMITIVE_CAT(SSE2NEON_DEC_, x)
+#define SSE2NEON_DEC_1 0
+#define SSE2NEON_DEC_2 1
+#define SSE2NEON_DEC_3 2
+#define SSE2NEON_DEC_4 3
+#define SSE2NEON_DEC_5 4
+#define SSE2NEON_DEC_6 5
+#define SSE2NEON_DEC_7 6
+#define SSE2NEON_DEC_8 7
+#define SSE2NEON_DEC_9 8
+#define SSE2NEON_DEC_10 9
+#define SSE2NEON_DEC_11 10
+#define SSE2NEON_DEC_12 11
+#define SSE2NEON_DEC_13 12
+#define SSE2NEON_DEC_14 13
+#define SSE2NEON_DEC_15 14
+#define SSE2NEON_DEC_16 15
+
+/* detection */
+#define SSE2NEON_CHECK_N(x, n, ...) n
+#define SSE2NEON_CHECK(...) SSE2NEON_CHECK_N(__VA_ARGS__, 0, )
+#define SSE2NEON_PROBE(x) x, 1,
+
+#define SSE2NEON_NOT(x) SSE2NEON_CHECK(SSE2NEON_PRIMITIVE_CAT(SSE2NEON_NOT_, x))
+#define SSE2NEON_NOT_0 SSE2NEON_PROBE(~)
+
+#define SSE2NEON_BOOL(x) SSE2NEON_COMPL(SSE2NEON_NOT(x))
+#define SSE2NEON_IF(c) SSE2NEON_IIF(SSE2NEON_BOOL(c))
+
+#define SSE2NEON_EAT(...)
+#define SSE2NEON_EXPAND(...) __VA_ARGS__
+#define SSE2NEON_WHEN(c) SSE2NEON_IF(c)(SSE2NEON_EXPAND, SSE2NEON_EAT)
+
+/* recursion */
+/* deferred expression */
+#define SSE2NEON_EMPTY()
+#define SSE2NEON_DEFER(id) id SSE2NEON_EMPTY()
+#define SSE2NEON_OBSTRUCT(...) __VA_ARGS__ SSE2NEON_DEFER(SSE2NEON_EMPTY)()
+#define SSE2NEON_EXPAND(...) __VA_ARGS__
+
+#define SSE2NEON_EVAL(...) \
+    SSE2NEON_EVAL1(SSE2NEON_EVAL1(SSE2NEON_EVAL1(__VA_ARGS__)))
+#define SSE2NEON_EVAL1(...) \
+    SSE2NEON_EVAL2(SSE2NEON_EVAL2(SSE2NEON_EVAL2(__VA_ARGS__)))
+#define SSE2NEON_EVAL2(...) \
+    SSE2NEON_EVAL3(SSE2NEON_EVAL3(SSE2NEON_EVAL3(__VA_ARGS__)))
+#define SSE2NEON_EVAL3(...) __VA_ARGS__
+
+#define SSE2NEON_REPEAT(count, macro, ...)                         \
+    SSE2NEON_WHEN(count)                                           \
+    (SSE2NEON_OBSTRUCT(SSE2NEON_REPEAT_INDIRECT)()(                \
+        SSE2NEON_DEC(count), macro,                                \
+        __VA_ARGS__) SSE2NEON_OBSTRUCT(macro)(SSE2NEON_DEC(count), \
+                                              __VA_ARGS__))
+#define SSE2NEON_REPEAT_INDIRECT() SSE2NEON_REPEAT
+
+#define SSE2NEON_SIZE_OF_byte 8
+#define SSE2NEON_NUMBER_OF_LANES_byte 16
+#define SSE2NEON_SIZE_OF_word 16
+#define SSE2NEON_NUMBER_OF_LANES_word 8
+
+#define SSE2NEON_COMPARE_EQUAL_THEN_FILL_LANE(i, type)                         \
+    mtx[i] = vreinterpretq_m128i_##type(vceqq_##type(                          \
+        vdupq_n_##type(vgetq_lane_##type(vreinterpretq_##type##_m128i(b), i)), \
+        vreinterpretq_##type##_m128i(a)));
+
+#define SSE2NEON_FILL_LANE(i, type) \
+    vec_b[i] =                      \
+        vdupq_n_##type(vgetq_lane_##type(vreinterpretq_##type##_m128i(b), i));
+
+#define PCMPSTR_RANGES(a, b, mtx, data_type_prefix, type_prefix, size,        \
+                       number_of_lanes, byte_or_word)                         \
+    do {                                                                      \
+        SSE2NEON_CAT(                                                         \
+            data_type_prefix,                                                 \
+            SSE2NEON_CAT(size,                                                \
+                         SSE2NEON_CAT(x, SSE2NEON_CAT(number_of_lanes, _t)))) \
+        vec_b[number_of_lanes];                                               \
+        __m128i mask = SSE2NEON_IIF(byte_or_word)(                            \
+            vreinterpretq_m128i_u16(vdupq_n_u16(0xff)),                       \
+            vreinterpretq_m128i_u32(vdupq_n_u32(0xffff)));                    \
+        SSE2NEON_EVAL(SSE2NEON_REPEAT(number_of_lanes, SSE2NEON_FILL_LANE,    \
+                                      SSE2NEON_CAT(type_prefix, size)))       \
+        for (int i = 0; i < number_of_lanes; i++) {                           \
+            mtx[i] = SSE2NEON_CAT(vreinterpretq_m128i_u,                      \
+                                  size)(SSE2NEON_CAT(vbslq_u, size)(          \
+                SSE2NEON_CAT(vreinterpretq_u,                                 \
+                             SSE2NEON_CAT(size, _m128i))(mask),               \
+                SSE2NEON_CAT(vcgeq_, SSE2NEON_CAT(type_prefix, size))(        \
+                    vec_b[i],                                                 \
+                    SSE2NEON_CAT(                                             \
+                        vreinterpretq_,                                       \
+                        SSE2NEON_CAT(type_prefix,                             \
+                                     SSE2NEON_CAT(size, _m128i(a))))),        \
+                SSE2NEON_CAT(vcleq_, SSE2NEON_CAT(type_prefix, size))(        \
+                    vec_b[i],                                                 \
+                    SSE2NEON_CAT(                                             \
+                        vreinterpretq_,                                       \
+                        SSE2NEON_CAT(type_prefix,                             \
+                                     SSE2NEON_CAT(size, _m128i(a)))))));      \
+        }                                                                     \
+    } while (0)
 
-// Extracts the selected signed or unsigned 32-bit integer from a and zero
-// extends.
-// FORCE_INLINE int _mm_extract_epi32(__m128i a, __constrange(0,4) int imm)
-#define _mm_extract_epi32(a, imm) \
-    vgetq_lane_s32(vreinterpretq_s32_m128i(a), (imm))
+#define PCMPSTR_EQ(a, b, mtx, size, number_of_lanes)                         \
+    do {                                                                     \
+        SSE2NEON_EVAL(SSE2NEON_REPEAT(number_of_lanes,                       \
+                                      SSE2NEON_COMPARE_EQUAL_THEN_FILL_LANE, \
+                                      SSE2NEON_CAT(u, size)))                \
+    } while (0)
 
-// Extracts the selected single-precision (32-bit) floating-point from a.
-// FORCE_INLINE int _mm_extract_ps(__m128 a, __constrange(0,4) int imm)
-#define _mm_extract_ps(a, imm) vgetq_lane_s32(vreinterpretq_s32_m128(a), (imm))
+#define SSE2NEON_CMP_EQUAL_ANY_IMPL(type)                                     \
+    static int _sse2neon_cmp_##type##_equal_any(__m128i a, int la, __m128i b, \
+                                                int lb)                       \
+    {                                                                         \
+        __m128i mtx[16];                                                      \
+        PCMPSTR_EQ(a, b, mtx, SSE2NEON_CAT(SSE2NEON_SIZE_OF_, type),          \
+                   SSE2NEON_CAT(SSE2NEON_NUMBER_OF_LANES_, type));            \
+        return SSE2NEON_CAT(                                                  \
+            _sse2neon_aggregate_equal_any_,                                   \
+            SSE2NEON_CAT(                                                     \
+                SSE2NEON_CAT(SSE2NEON_SIZE_OF_, type),                        \
+                SSE2NEON_CAT(x, SSE2NEON_CAT(SSE2NEON_NUMBER_OF_LANES_,       \
+                                             type))))(la, lb, mtx);           \
+    }
 
-// Inserts the least significant 32 bits of b into the selected 32-bit integer
-// of a.
-// FORCE_INLINE __m128i _mm_insert_epi32(__m128i a, int b,
-//                                       __constrange(0,4) int imm)
-#define _mm_insert_epi32(a, b, imm)                                  \
-    __extension__({                                                  \
-        vreinterpretq_m128i_s32(                                     \
-            vsetq_lane_s32((b), vreinterpretq_s32_m128i(a), (imm))); \
-    })
+#define SSE2NEON_CMP_RANGES_IMPL(type, data_type, us, byte_or_word)            \
+    static int _sse2neon_cmp_##us##type##_ranges(__m128i a, int la, __m128i b, \
+                                                 int lb)                       \
+    {                                                                          \
+        __m128i mtx[16];                                                       \
+        PCMPSTR_RANGES(                                                        \
+            a, b, mtx, data_type, us, SSE2NEON_CAT(SSE2NEON_SIZE_OF_, type),   \
+            SSE2NEON_CAT(SSE2NEON_NUMBER_OF_LANES_, type), byte_or_word);      \
+        return SSE2NEON_CAT(                                                   \
+            _sse2neon_aggregate_ranges_,                                       \
+            SSE2NEON_CAT(                                                      \
+                SSE2NEON_CAT(SSE2NEON_SIZE_OF_, type),                         \
+                SSE2NEON_CAT(x, SSE2NEON_CAT(SSE2NEON_NUMBER_OF_LANES_,        \
+                                             type))))(la, lb, mtx);            \
+    }
+
+#define SSE2NEON_CMP_EQUAL_ORDERED_IMPL(type)                                  \
+    static int _sse2neon_cmp_##type##_equal_ordered(__m128i a, int la,         \
+                                                    __m128i b, int lb)         \
+    {                                                                          \
+        __m128i mtx[16];                                                       \
+        PCMPSTR_EQ(a, b, mtx, SSE2NEON_CAT(SSE2NEON_SIZE_OF_, type),           \
+                   SSE2NEON_CAT(SSE2NEON_NUMBER_OF_LANES_, type));             \
+        return SSE2NEON_CAT(                                                   \
+            _sse2neon_aggregate_equal_ordered_,                                \
+            SSE2NEON_CAT(                                                      \
+                SSE2NEON_CAT(SSE2NEON_SIZE_OF_, type),                         \
+                SSE2NEON_CAT(x,                                                \
+                             SSE2NEON_CAT(SSE2NEON_NUMBER_OF_LANES_, type))))( \
+            SSE2NEON_CAT(SSE2NEON_NUMBER_OF_LANES_, type), la, lb, mtx);       \
+    }
+
+static int _sse2neon_aggregate_equal_any_8x16(int la, int lb, __m128i mtx[16])
+{
+    int res = 0;
+    int m = (1 << la) - 1;
+    uint8x8_t vec_mask = vld1_u8(_sse2neon_cmpestr_mask8b);
+    uint8x8_t t_lo = vtst_u8(vdup_n_u8(m & 0xff), vec_mask);
+    uint8x8_t t_hi = vtst_u8(vdup_n_u8(m >> 8), vec_mask);
+    uint8x16_t vec = vcombine_u8(t_lo, t_hi);
+    for (int j = 0; j < lb; j++) {
+        mtx[j] = vreinterpretq_m128i_u8(
+            vandq_u8(vec, vreinterpretq_u8_m128i(mtx[j])));
+        mtx[j] = vreinterpretq_m128i_u8(
+            vshrq_n_u8(vreinterpretq_u8_m128i(mtx[j]), 7));
+        int tmp = _sse2neon_vaddvq_u8(vreinterpretq_u8_m128i(mtx[j])) ? 1 : 0;
+        res |= (tmp << j);
+    }
+    return res;
+}
+
+static int _sse2neon_aggregate_equal_any_16x8(int la, int lb, __m128i mtx[16])
+{
+    int res = 0;
+    int m = (1 << la) - 1;
+    uint16x8_t vec =
+        vtstq_u16(vdupq_n_u16(m), vld1q_u16(_sse2neon_cmpestr_mask16b));
+    for (int j = 0; j < lb; j++) {
+        mtx[j] = vreinterpretq_m128i_u16(
+            vandq_u16(vec, vreinterpretq_u16_m128i(mtx[j])));
+        mtx[j] = vreinterpretq_m128i_u16(
+            vshrq_n_u16(vreinterpretq_u16_m128i(mtx[j]), 15));
+        int tmp = _sse2neon_vaddvq_u16(vreinterpretq_u16_m128i(mtx[j])) ? 1 : 0;
+        res |= (tmp << j);
+    }
+    return res;
+}
+
+/* clang-format off */
+#define SSE2NEON_GENERATE_CMP_EQUAL_ANY(prefix) \
+    prefix##IMPL(byte) \
+    prefix##IMPL(word)
+/* clang-format on */
+
+SSE2NEON_GENERATE_CMP_EQUAL_ANY(SSE2NEON_CMP_EQUAL_ANY_)
+
+static int _sse2neon_aggregate_ranges_16x8(int la, int lb, __m128i mtx[16])
+{
+    int res = 0;
+    int m = (1 << la) - 1;
+    uint16x8_t vec =
+        vtstq_u16(vdupq_n_u16(m), vld1q_u16(_sse2neon_cmpestr_mask16b));
+    for (int j = 0; j < lb; j++) {
+        mtx[j] = vreinterpretq_m128i_u16(
+            vandq_u16(vec, vreinterpretq_u16_m128i(mtx[j])));
+        mtx[j] = vreinterpretq_m128i_u16(
+            vshrq_n_u16(vreinterpretq_u16_m128i(mtx[j]), 15));
+        __m128i tmp = vreinterpretq_m128i_u32(
+            vshrq_n_u32(vreinterpretq_u32_m128i(mtx[j]), 16));
+        uint32x4_t vec_res = vandq_u32(vreinterpretq_u32_m128i(mtx[j]),
+                                       vreinterpretq_u32_m128i(tmp));
+#if defined(__aarch64__) || defined(_M_ARM64)
+        int t = vaddvq_u32(vec_res) ? 1 : 0;
+#else
+        uint64x2_t sumh = vpaddlq_u32(vec_res);
+        int t = vgetq_lane_u64(sumh, 0) + vgetq_lane_u64(sumh, 1);
+#endif
+        res |= (t << j);
+    }
+    return res;
+}
+
+static int _sse2neon_aggregate_ranges_8x16(int la, int lb, __m128i mtx[16])
+{
+    int res = 0;
+    int m = (1 << la) - 1;
+    uint8x8_t vec_mask = vld1_u8(_sse2neon_cmpestr_mask8b);
+    uint8x8_t t_lo = vtst_u8(vdup_n_u8(m & 0xff), vec_mask);
+    uint8x8_t t_hi = vtst_u8(vdup_n_u8(m >> 8), vec_mask);
+    uint8x16_t vec = vcombine_u8(t_lo, t_hi);
+    for (int j = 0; j < lb; j++) {
+        mtx[j] = vreinterpretq_m128i_u8(
+            vandq_u8(vec, vreinterpretq_u8_m128i(mtx[j])));
+        mtx[j] = vreinterpretq_m128i_u8(
+            vshrq_n_u8(vreinterpretq_u8_m128i(mtx[j]), 7));
+        __m128i tmp = vreinterpretq_m128i_u16(
+            vshrq_n_u16(vreinterpretq_u16_m128i(mtx[j]), 8));
+        uint16x8_t vec_res = vandq_u16(vreinterpretq_u16_m128i(mtx[j]),
+                                       vreinterpretq_u16_m128i(tmp));
+        int t = _sse2neon_vaddvq_u16(vec_res) ? 1 : 0;
+        res |= (t << j);
+    }
+    return res;
+}
+
+#define SSE2NEON_CMP_RANGES_IS_BYTE 1
+#define SSE2NEON_CMP_RANGES_IS_WORD 0
+
+/* clang-format off */
+#define SSE2NEON_GENERATE_CMP_RANGES(prefix)             \
+    prefix##IMPL(byte, uint, u, prefix##IS_BYTE)         \
+    prefix##IMPL(byte, int, s, prefix##IS_BYTE)          \
+    prefix##IMPL(word, uint, u, prefix##IS_WORD)         \
+    prefix##IMPL(word, int, s, prefix##IS_WORD)
+/* clang-format on */
+
+SSE2NEON_GENERATE_CMP_RANGES(SSE2NEON_CMP_RANGES_)
+
+#undef SSE2NEON_CMP_RANGES_IS_BYTE
+#undef SSE2NEON_CMP_RANGES_IS_WORD
+
+static int _sse2neon_cmp_byte_equal_each(__m128i a, int la, __m128i b, int lb)
+{
+    uint8x16_t mtx =
+        vceqq_u8(vreinterpretq_u8_m128i(a), vreinterpretq_u8_m128i(b));
+    int m0 = (la < lb) ? 0 : ((1 << la) - (1 << lb));
+    int m1 = 0x10000 - (1 << la);
+    int tb = 0x10000 - (1 << lb);
+    uint8x8_t vec_mask, vec0_lo, vec0_hi, vec1_lo, vec1_hi;
+    uint8x8_t tmp_lo, tmp_hi, res_lo, res_hi;
+    vec_mask = vld1_u8(_sse2neon_cmpestr_mask8b);
+    vec0_lo = vtst_u8(vdup_n_u8(m0), vec_mask);
+    vec0_hi = vtst_u8(vdup_n_u8(m0 >> 8), vec_mask);
+    vec1_lo = vtst_u8(vdup_n_u8(m1), vec_mask);
+    vec1_hi = vtst_u8(vdup_n_u8(m1 >> 8), vec_mask);
+    tmp_lo = vtst_u8(vdup_n_u8(tb), vec_mask);
+    tmp_hi = vtst_u8(vdup_n_u8(tb >> 8), vec_mask);
+
+    res_lo = vbsl_u8(vec0_lo, vdup_n_u8(0), vget_low_u8(mtx));
+    res_hi = vbsl_u8(vec0_hi, vdup_n_u8(0), vget_high_u8(mtx));
+    res_lo = vbsl_u8(vec1_lo, tmp_lo, res_lo);
+    res_hi = vbsl_u8(vec1_hi, tmp_hi, res_hi);
+    res_lo = vand_u8(res_lo, vec_mask);
+    res_hi = vand_u8(res_hi, vec_mask);
+
+    int res = _sse2neon_vaddv_u8(res_lo) + (_sse2neon_vaddv_u8(res_hi) << 8);
+    return res;
+}
+
+static int _sse2neon_cmp_word_equal_each(__m128i a, int la, __m128i b, int lb)
+{
+    uint16x8_t mtx =
+        vceqq_u16(vreinterpretq_u16_m128i(a), vreinterpretq_u16_m128i(b));
+    int m0 = (la < lb) ? 0 : ((1 << la) - (1 << lb));
+    int m1 = 0x100 - (1 << la);
+    int tb = 0x100 - (1 << lb);
+    uint16x8_t vec_mask = vld1q_u16(_sse2neon_cmpestr_mask16b);
+    uint16x8_t vec0 = vtstq_u16(vdupq_n_u16(m0), vec_mask);
+    uint16x8_t vec1 = vtstq_u16(vdupq_n_u16(m1), vec_mask);
+    uint16x8_t tmp = vtstq_u16(vdupq_n_u16(tb), vec_mask);
+    mtx = vbslq_u16(vec0, vdupq_n_u16(0), mtx);
+    mtx = vbslq_u16(vec1, tmp, mtx);
+    mtx = vandq_u16(mtx, vec_mask);
+    return _sse2neon_vaddvq_u16(mtx);
+}
+
+#define SSE2NEON_AGGREGATE_EQUAL_ORDER_IS_UBYTE 1
+#define SSE2NEON_AGGREGATE_EQUAL_ORDER_IS_UWORD 0
+
+#define SSE2NEON_AGGREGATE_EQUAL_ORDER_IMPL(size, number_of_lanes, data_type)  \
+    static int _sse2neon_aggregate_equal_ordered_##size##x##number_of_lanes(   \
+        int bound, int la, int lb, __m128i mtx[16])                            \
+    {                                                                          \
+        int res = 0;                                                           \
+        int m1 = SSE2NEON_IIF(data_type)(0x10000, 0x100) - (1 << la);          \
+        uint##size##x8_t vec_mask = SSE2NEON_IIF(data_type)(                   \
+            vld1_u##size(_sse2neon_cmpestr_mask##size##b),                     \
+            vld1q_u##size(_sse2neon_cmpestr_mask##size##b));                   \
+        uint##size##x##number_of_lanes##_t vec1 = SSE2NEON_IIF(data_type)(     \
+            vcombine_u##size(vtst_u##size(vdup_n_u##size(m1), vec_mask),       \
+                             vtst_u##size(vdup_n_u##size(m1 >> 8), vec_mask)), \
+            vtstq_u##size(vdupq_n_u##size(m1), vec_mask));                     \
+        uint##size##x##number_of_lanes##_t vec_minusone = vdupq_n_u##size(-1); \
+        uint##size##x##number_of_lanes##_t vec_zero = vdupq_n_u##size(0);      \
+        for (int j = 0; j < lb; j++) {                                         \
+            mtx[j] = vreinterpretq_m128i_u##size(vbslq_u##size(                \
+                vec1, vec_minusone, vreinterpretq_u##size##_m128i(mtx[j])));   \
+        }                                                                      \
+        for (int j = lb; j < bound; j++) {                                     \
+            mtx[j] = vreinterpretq_m128i_u##size(                              \
+                vbslq_u##size(vec1, vec_minusone, vec_zero));                  \
+        }                                                                      \
+        unsigned SSE2NEON_IIF(data_type)(char, short) *ptr =                   \
+            (unsigned SSE2NEON_IIF(data_type)(char, short) *) mtx;             \
+        for (int i = 0; i < bound; i++) {                                      \
+            int val = 1;                                                       \
+            for (int j = 0, k = i; j < bound - i && k < bound; j++, k++)       \
+                val &= ptr[k * bound + j];                                     \
+            res += val << i;                                                   \
+        }                                                                      \
+        return res;                                                            \
+    }
+
+/* clang-format off */
+#define SSE2NEON_GENERATE_AGGREGATE_EQUAL_ORDER(prefix) \
+    prefix##IMPL(8, 16, prefix##IS_UBYTE)               \
+    prefix##IMPL(16, 8, prefix##IS_UWORD)
+/* clang-format on */
 
-// Extracts the selected signed or unsigned 64-bit integer from a and zero
-// extends.
-// FORCE_INLINE __int64 _mm_extract_epi64(__m128i a, __constrange(0,2) int imm)
-#define _mm_extract_epi64(a, imm) \
-    vgetq_lane_s64(vreinterpretq_s64_m128i(a), (imm))
+SSE2NEON_GENERATE_AGGREGATE_EQUAL_ORDER(SSE2NEON_AGGREGATE_EQUAL_ORDER_)
 
-// Inserts the least significant 64 bits of b into the selected 64-bit integer
-// of a.
-// FORCE_INLINE __m128i _mm_insert_epi64(__m128i a, __int64 b,
-//                                       __constrange(0,2) int imm)
-#define _mm_insert_epi64(a, b, imm)                                  \
-    __extension__({                                                  \
-        vreinterpretq_m128i_s64(                                     \
-            vsetq_lane_s64((b), vreinterpretq_s64_m128i(a), (imm))); \
-    })
+#undef SSE2NEON_AGGREGATE_EQUAL_ORDER_IS_UBYTE
+#undef SSE2NEON_AGGREGATE_EQUAL_ORDER_IS_UWORD
 
-// Count the number of bits set to 1 in unsigned 32-bit integer a, and
-// return that count in dst.
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_popcnt_u32
-FORCE_INLINE int _mm_popcnt_u32(unsigned int a)
+/* clang-format off */
+#define SSE2NEON_GENERATE_CMP_EQUAL_ORDERED(prefix) \
+    prefix##IMPL(byte)                              \
+    prefix##IMPL(word)
+/* clang-format on */
+
+SSE2NEON_GENERATE_CMP_EQUAL_ORDERED(SSE2NEON_CMP_EQUAL_ORDERED_)
+
+#define SSE2NEON_CMPESTR_LIST                          \
+    _(CMP_UBYTE_EQUAL_ANY, cmp_byte_equal_any)         \
+    _(CMP_UWORD_EQUAL_ANY, cmp_word_equal_any)         \
+    _(CMP_SBYTE_EQUAL_ANY, cmp_byte_equal_any)         \
+    _(CMP_SWORD_EQUAL_ANY, cmp_word_equal_any)         \
+    _(CMP_UBYTE_RANGES, cmp_ubyte_ranges)              \
+    _(CMP_UWORD_RANGES, cmp_uword_ranges)              \
+    _(CMP_SBYTE_RANGES, cmp_sbyte_ranges)              \
+    _(CMP_SWORD_RANGES, cmp_sword_ranges)              \
+    _(CMP_UBYTE_EQUAL_EACH, cmp_byte_equal_each)       \
+    _(CMP_UWORD_EQUAL_EACH, cmp_word_equal_each)       \
+    _(CMP_SBYTE_EQUAL_EACH, cmp_byte_equal_each)       \
+    _(CMP_SWORD_EQUAL_EACH, cmp_word_equal_each)       \
+    _(CMP_UBYTE_EQUAL_ORDERED, cmp_byte_equal_ordered) \
+    _(CMP_UWORD_EQUAL_ORDERED, cmp_word_equal_ordered) \
+    _(CMP_SBYTE_EQUAL_ORDERED, cmp_byte_equal_ordered) \
+    _(CMP_SWORD_EQUAL_ORDERED, cmp_word_equal_ordered)
+
+enum {
+#define _(name, func_suffix) name,
+    SSE2NEON_CMPESTR_LIST
+#undef _
+};
+typedef int (*cmpestr_func_t)(__m128i a, int la, __m128i b, int lb);
+static cmpestr_func_t _sse2neon_cmpfunc_table[] = {
+#define _(name, func_suffix) _sse2neon_##func_suffix,
+    SSE2NEON_CMPESTR_LIST
+#undef _
+};
+
+FORCE_INLINE int _sse2neon_sido_negative(int res, int lb, int imm8, int bound)
 {
-#if defined(__aarch64__)
-#if __has_builtin(__builtin_popcount)
-    return __builtin_popcount(a);
-#else
-    return (int) vaddlv_u8(vcnt_u8(vcreate_u8((uint64_t) a)));
-#endif
-#else
-    uint32_t count = 0;
-    uint8x8_t input_val, count8x8_val;
-    uint16x4_t count16x4_val;
-    uint32x2_t count32x2_val;
+    switch (imm8 & 0x30) {
+    case _SIDD_NEGATIVE_POLARITY:
+        res ^= 0xffffffff;
+        break;
+    case _SIDD_MASKED_NEGATIVE_POLARITY:
+        res ^= (1 << lb) - 1;
+        break;
+    default:
+        break;
+    }
 
-    input_val = vld1_u8((uint8_t *) &a);
-    count8x8_val = vcnt_u8(input_val);
-    count16x4_val = vpaddl_u8(count8x8_val);
-    count32x2_val = vpaddl_u16(count16x4_val);
+    return res & ((bound == 8) ? 0xFF : 0xFFFF);
+}
 
-    vst1_u32(&count, count32x2_val);
-    return count;
+FORCE_INLINE int _sse2neon_clz(unsigned int x)
+{
+#if defined(_MSC_VER) && !defined(__clang__)
+    unsigned long cnt = 0;
+    if (_BitScanReverse(&cnt, x))
+        return 31 - cnt;
+    return 32;
+#else
+    return x != 0 ? __builtin_clz(x) : 32;
 #endif
 }
 
-// Count the number of bits set to 1 in unsigned 64-bit integer a, and
-// return that count in dst.
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_popcnt_u64
-FORCE_INLINE int64_t _mm_popcnt_u64(uint64_t a)
+FORCE_INLINE int _sse2neon_ctz(unsigned int x)
 {
-#if defined(__aarch64__)
-#if __has_builtin(__builtin_popcountll)
-    return __builtin_popcountll(a);
+#if defined(_MSC_VER) && !defined(__clang__)
+    unsigned long cnt = 0;
+    if (_BitScanForward(&cnt, x))
+        return cnt;
+    return 32;
 #else
-    return (int64_t) vaddlv_u8(vcnt_u8(vcreate_u8(a)));
+    return x != 0 ? __builtin_ctz(x) : 32;
 #endif
-#else
-    uint64_t count = 0;
-    uint8x8_t input_val, count8x8_val;
-    uint16x4_t count16x4_val;
-    uint32x2_t count32x2_val;
-    uint64x1_t count64x1_val;
+}
 
-    input_val = vld1_u8((uint8_t *) &a);
-    count8x8_val = vcnt_u8(input_val);
-    count16x4_val = vpaddl_u8(count8x8_val);
-    count32x2_val = vpaddl_u16(count16x4_val);
-    count64x1_val = vpaddl_u32(count32x2_val);
-    vst1_u64(&count, count64x1_val);
-    return count;
+FORCE_INLINE int _sse2neon_ctzll(unsigned long long x)
+{
+#ifdef _MSC_VER
+    unsigned long cnt;
+#if defined(SSE2NEON_HAS_BITSCAN64)
+    if (_BitScanForward64(&cnt, x))
+        return (int) (cnt);
+#else
+    if (_BitScanForward(&cnt, (unsigned long) (x)))
+        return (int) cnt;
+    if (_BitScanForward(&cnt, (unsigned long) (x >> 32)))
+        return (int) (cnt + 32);
+#endif /* SSE2NEON_HAS_BITSCAN64 */
+    return 64;
+#else /* assume GNU compatible compilers */
+    return x != 0 ? __builtin_ctzll(x) : 64;
 #endif
 }
 
-// Macro: Transpose the 4x4 matrix formed by the 4 rows of single-precision
-// (32-bit) floating-point elements in row0, row1, row2, and row3, and store the
-// transposed matrix in these vectors (row0 now contains column 0, etc.).
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=MM_TRANSPOSE4_PS
-#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3)         \
-    do {                                                  \
-        float32x4x2_t ROW01 = vtrnq_f32(row0, row1);      \
-        float32x4x2_t ROW23 = vtrnq_f32(row2, row3);      \
-        row0 = vcombine_f32(vget_low_f32(ROW01.val[0]),   \
-                            vget_low_f32(ROW23.val[0]));  \
-        row1 = vcombine_f32(vget_low_f32(ROW01.val[1]),   \
-                            vget_low_f32(ROW23.val[1]));  \
-        row2 = vcombine_f32(vget_high_f32(ROW01.val[0]),  \
-                            vget_high_f32(ROW23.val[0])); \
-        row3 = vcombine_f32(vget_high_f32(ROW01.val[1]),  \
-                            vget_high_f32(ROW23.val[1])); \
+#define SSE2NEON_MIN(x, y) (x) < (y) ? (x) : (y)
+
+#define SSE2NEON_CMPSTR_SET_UPPER(var, imm) \
+    const int var = (imm & 0x01) ? 8 : 16
+
+#define SSE2NEON_CMPESTRX_LEN_PAIR(a, b, la, lb) \
+    int tmp1 = la ^ (la >> 31);                  \
+    la = tmp1 - (la >> 31);                      \
+    int tmp2 = lb ^ (lb >> 31);                  \
+    lb = tmp2 - (lb >> 31);                      \
+    la = SSE2NEON_MIN(la, bound);                \
+    lb = SSE2NEON_MIN(lb, bound)
+
+// Compare all pairs of character in string a and b,
+// then aggregate the result.
+// As the only difference of PCMPESTR* and PCMPISTR* is the way to calculate the
+// length of string, we use SSE2NEON_CMP{I,E}STRX_GET_LEN to get the length of
+// string a and b.
+#define SSE2NEON_COMP_AGG(a, b, la, lb, imm8, IE)                  \
+    SSE2NEON_CMPSTR_SET_UPPER(bound, imm8);                        \
+    SSE2NEON_##IE##_LEN_PAIR(a, b, la, lb);                        \
+    int r2 = (_sse2neon_cmpfunc_table[imm8 & 0x0f])(a, la, b, lb); \
+    r2 = _sse2neon_sido_negative(r2, lb, imm8, bound)
+
+#define SSE2NEON_CMPSTR_GENERATE_INDEX(r2, bound, imm8)          \
+    return (r2 == 0) ? bound                                     \
+                     : ((imm8 & 0x40) ? (31 - _sse2neon_clz(r2)) \
+                                      : _sse2neon_ctz(r2))
+
+#define SSE2NEON_CMPSTR_GENERATE_MASK(dst)                                     \
+    __m128i dst = vreinterpretq_m128i_u8(vdupq_n_u8(0));                       \
+    if (imm8 & 0x40) {                                                         \
+        if (bound == 8) {                                                      \
+            uint16x8_t tmp = vtstq_u16(vdupq_n_u16(r2),                        \
+                                       vld1q_u16(_sse2neon_cmpestr_mask16b));  \
+            dst = vreinterpretq_m128i_u16(vbslq_u16(                           \
+                tmp, vdupq_n_u16(-1), vreinterpretq_u16_m128i(dst)));          \
+        } else {                                                               \
+            uint8x16_t vec_r2 =                                                \
+                vcombine_u8(vdup_n_u8(r2), vdup_n_u8(r2 >> 8));                \
+            uint8x16_t tmp =                                                   \
+                vtstq_u8(vec_r2, vld1q_u8(_sse2neon_cmpestr_mask8b));          \
+            dst = vreinterpretq_m128i_u8(                                      \
+                vbslq_u8(tmp, vdupq_n_u8(-1), vreinterpretq_u8_m128i(dst)));   \
+        }                                                                      \
+    } else {                                                                   \
+        if (bound == 16) {                                                     \
+            dst = vreinterpretq_m128i_u16(                                     \
+                vsetq_lane_u16(r2 & 0xffff, vreinterpretq_u16_m128i(dst), 0)); \
+        } else {                                                               \
+            dst = vreinterpretq_m128i_u8(                                      \
+                vsetq_lane_u8(r2 & 0xff, vreinterpretq_u8_m128i(dst), 0));     \
+        }                                                                      \
+    }                                                                          \
+    return dst
+
+// Compare packed strings in a and b with lengths la and lb using the control
+// in imm8, and returns 1 if b did not contain a null character and the
+// resulting mask was zero, and 0 otherwise.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestra
+FORCE_INLINE int _mm_cmpestra(__m128i a,
+                              int la,
+                              __m128i b,
+                              int lb,
+                              const int imm8)
+{
+    int lb_cpy = lb;
+    SSE2NEON_COMP_AGG(a, b, la, lb, imm8, CMPESTRX);
+    return !r2 & (lb_cpy > bound);
+}
+
+// Compare packed strings in a and b with lengths la and lb using the control in
+// imm8, and returns 1 if the resulting mask was non-zero, and 0 otherwise.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestrc
+FORCE_INLINE int _mm_cmpestrc(__m128i a,
+                              int la,
+                              __m128i b,
+                              int lb,
+                              const int imm8)
+{
+    SSE2NEON_COMP_AGG(a, b, la, lb, imm8, CMPESTRX);
+    return r2 != 0;
+}
+
+// Compare packed strings in a and b with lengths la and lb using the control
+// in imm8, and store the generated index in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestri
+FORCE_INLINE int _mm_cmpestri(__m128i a,
+                              int la,
+                              __m128i b,
+                              int lb,
+                              const int imm8)
+{
+    SSE2NEON_COMP_AGG(a, b, la, lb, imm8, CMPESTRX);
+    SSE2NEON_CMPSTR_GENERATE_INDEX(r2, bound, imm8);
+}
+
+// Compare packed strings in a and b with lengths la and lb using the control
+// in imm8, and store the generated mask in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestrm
+FORCE_INLINE __m128i
+_mm_cmpestrm(__m128i a, int la, __m128i b, int lb, const int imm8)
+{
+    SSE2NEON_COMP_AGG(a, b, la, lb, imm8, CMPESTRX);
+    SSE2NEON_CMPSTR_GENERATE_MASK(dst);
+}
+
+// Compare packed strings in a and b with lengths la and lb using the control in
+// imm8, and returns bit 0 of the resulting bit mask.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestro
+FORCE_INLINE int _mm_cmpestro(__m128i a,
+                              int la,
+                              __m128i b,
+                              int lb,
+                              const int imm8)
+{
+    SSE2NEON_COMP_AGG(a, b, la, lb, imm8, CMPESTRX);
+    return r2 & 1;
+}
+
+// Compare packed strings in a and b with lengths la and lb using the control in
+// imm8, and returns 1 if any character in a was null, and 0 otherwise.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestrs
+FORCE_INLINE int _mm_cmpestrs(__m128i a,
+                              int la,
+                              __m128i b,
+                              int lb,
+                              const int imm8)
+{
+    (void) a;
+    (void) b;
+    (void) lb;
+    SSE2NEON_CMPSTR_SET_UPPER(bound, imm8);
+    return la <= (bound - 1);
+}
+
+// Compare packed strings in a and b with lengths la and lb using the control in
+// imm8, and returns 1 if any character in b was null, and 0 otherwise.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestrz
+FORCE_INLINE int _mm_cmpestrz(__m128i a,
+                              int la,
+                              __m128i b,
+                              int lb,
+                              const int imm8)
+{
+    (void) a;
+    (void) b;
+    (void) la;
+    SSE2NEON_CMPSTR_SET_UPPER(bound, imm8);
+    return lb <= (bound - 1);
+}
+
+#define SSE2NEON_CMPISTRX_LENGTH(str, len, imm8)                         \
+    do {                                                                 \
+        if (imm8 & 0x01) {                                               \
+            uint16x8_t equal_mask_##str =                                \
+                vceqq_u16(vreinterpretq_u16_m128i(str), vdupq_n_u16(0)); \
+            uint8x8_t res_##str = vshrn_n_u16(equal_mask_##str, 4);      \
+            uint64_t matches_##str =                                     \
+                vget_lane_u64(vreinterpret_u64_u8(res_##str), 0);        \
+            len = _sse2neon_ctzll(matches_##str) >> 3;                   \
+        } else {                                                         \
+            uint16x8_t equal_mask_##str = vreinterpretq_u16_u8(          \
+                vceqq_u8(vreinterpretq_u8_m128i(str), vdupq_n_u8(0)));   \
+            uint8x8_t res_##str = vshrn_n_u16(equal_mask_##str, 4);      \
+            uint64_t matches_##str =                                     \
+                vget_lane_u64(vreinterpret_u64_u8(res_##str), 0);        \
+            len = _sse2neon_ctzll(matches_##str) >> 2;                   \
+        }                                                                \
     } while (0)
 
-/* Crypto Extensions */
+#define SSE2NEON_CMPISTRX_LEN_PAIR(a, b, la, lb) \
+    int la, lb;                                  \
+    do {                                         \
+        SSE2NEON_CMPISTRX_LENGTH(a, la, imm8);   \
+        SSE2NEON_CMPISTRX_LENGTH(b, lb, imm8);   \
+    } while (0)
 
-#if defined(__ARM_FEATURE_CRYPTO)
-// Wraps vmull_p64
-FORCE_INLINE uint64x2_t _sse2neon_vmull_p64(uint64x1_t _a, uint64x1_t _b)
+// Compare packed strings with implicit lengths in a and b using the control in
+// imm8, and returns 1 if b did not contain a null character and the resulting
+// mask was zero, and 0 otherwise.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistra
+FORCE_INLINE int _mm_cmpistra(__m128i a, __m128i b, const int imm8)
 {
-    poly64_t a = vget_lane_p64(vreinterpret_p64_u64(_a), 0);
-    poly64_t b = vget_lane_p64(vreinterpret_p64_u64(_b), 0);
-    return vreinterpretq_u64_p128(vmull_p64(a, b));
+    SSE2NEON_COMP_AGG(a, b, la, lb, imm8, CMPISTRX);
+    return !r2 & (lb >= bound);
 }
-#else  // ARMv7 polyfill
-// ARMv7/some A64 lacks vmull_p64, but it has vmull_p8.
-//
-// vmull_p8 calculates 8 8-bit->16-bit polynomial multiplies, but we need a
-// 64-bit->128-bit polynomial multiply.
-//
-// It needs some work and is somewhat slow, but it is still faster than all
-// known scalar methods.
-//
-// Algorithm adapted to C from
-// https://www.workofard.com/2017/07/ghash-for-low-end-cores/, which is adapted
-// from "Fast Software Polynomial Multiplication on ARM Processors Using the
-// NEON Engine" by Danilo Camara, Conrado Gouvea, Julio Lopez and Ricardo Dahab
-// (https://hal.inria.fr/hal-01506572)
-static uint64x2_t _sse2neon_vmull_p64(uint64x1_t _a, uint64x1_t _b)
+
+// Compare packed strings with implicit lengths in a and b using the control in
+// imm8, and returns 1 if the resulting mask was non-zero, and 0 otherwise.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistrc
+FORCE_INLINE int _mm_cmpistrc(__m128i a, __m128i b, const int imm8)
 {
-    poly8x8_t a = vreinterpret_p8_u64(_a);
-    poly8x8_t b = vreinterpret_p8_u64(_b);
+    SSE2NEON_COMP_AGG(a, b, la, lb, imm8, CMPISTRX);
+    return r2 != 0;
+}
 
-    // Masks
-    uint8x16_t k48_32 = vcombine_u8(vcreate_u8(0x0000ffffffffffff),
-                                    vcreate_u8(0x00000000ffffffff));
-    uint8x16_t k16_00 = vcombine_u8(vcreate_u8(0x000000000000ffff),
-                                    vcreate_u8(0x0000000000000000));
+// Compare packed strings with implicit lengths in a and b using the control in
+// imm8, and store the generated index in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistri
+FORCE_INLINE int _mm_cmpistri(__m128i a, __m128i b, const int imm8)
+{
+    SSE2NEON_COMP_AGG(a, b, la, lb, imm8, CMPISTRX);
+    SSE2NEON_CMPSTR_GENERATE_INDEX(r2, bound, imm8);
+}
 
-    // Do the multiplies, rotating with vext to get all combinations
-    uint8x16_t d = vreinterpretq_u8_p16(vmull_p8(a, b));  // D = A0 * B0
-    uint8x16_t e =
-        vreinterpretq_u8_p16(vmull_p8(a, vext_p8(b, b, 1)));  // E = A0 * B1
-    uint8x16_t f =
-        vreinterpretq_u8_p16(vmull_p8(vext_p8(a, a, 1), b));  // F = A1 * B0
-    uint8x16_t g =
-        vreinterpretq_u8_p16(vmull_p8(a, vext_p8(b, b, 2)));  // G = A0 * B2
-    uint8x16_t h =
-        vreinterpretq_u8_p16(vmull_p8(vext_p8(a, a, 2), b));  // H = A2 * B0
-    uint8x16_t i =
-        vreinterpretq_u8_p16(vmull_p8(a, vext_p8(b, b, 3)));  // I = A0 * B3
-    uint8x16_t j =
-        vreinterpretq_u8_p16(vmull_p8(vext_p8(a, a, 3), b));  // J = A3 * B0
-    uint8x16_t k =
-        vreinterpretq_u8_p16(vmull_p8(a, vext_p8(b, b, 4)));  // L = A0 * B4
+// Compare packed strings with implicit lengths in a and b using the control in
+// imm8, and store the generated mask in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistrm
+FORCE_INLINE __m128i _mm_cmpistrm(__m128i a, __m128i b, const int imm8)
+{
+    SSE2NEON_COMP_AGG(a, b, la, lb, imm8, CMPISTRX);
+    SSE2NEON_CMPSTR_GENERATE_MASK(dst);
+}
 
-    // Add cross products
-    uint8x16_t l = veorq_u8(e, f);  // L = E + F
-    uint8x16_t m = veorq_u8(g, h);  // M = G + H
-    uint8x16_t n = veorq_u8(i, j);  // N = I + J
+// Compare packed strings with implicit lengths in a and b using the control in
+// imm8, and returns bit 0 of the resulting bit mask.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistro
+FORCE_INLINE int _mm_cmpistro(__m128i a, __m128i b, const int imm8)
+{
+    SSE2NEON_COMP_AGG(a, b, la, lb, imm8, CMPISTRX);
+    return r2 & 1;
+}
 
-    // Interleave. Using vzip1 and vzip2 prevents Clang from emitting TBL
-    // instructions.
-#if defined(__aarch64__)
-    uint8x16_t lm_p0 = vreinterpretq_u8_u64(
-        vzip1q_u64(vreinterpretq_u64_u8(l), vreinterpretq_u64_u8(m)));
-    uint8x16_t lm_p1 = vreinterpretq_u8_u64(
-        vzip2q_u64(vreinterpretq_u64_u8(l), vreinterpretq_u64_u8(m)));
-    uint8x16_t nk_p0 = vreinterpretq_u8_u64(
-        vzip1q_u64(vreinterpretq_u64_u8(n), vreinterpretq_u64_u8(k)));
-    uint8x16_t nk_p1 = vreinterpretq_u8_u64(
-        vzip2q_u64(vreinterpretq_u64_u8(n), vreinterpretq_u64_u8(k)));
+// Compare packed strings with implicit lengths in a and b using the control in
+// imm8, and returns 1 if any character in a was null, and 0 otherwise.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistrs
+FORCE_INLINE int _mm_cmpistrs(__m128i a, __m128i b, const int imm8)
+{
+    (void) b;
+    SSE2NEON_CMPSTR_SET_UPPER(bound, imm8);
+    int la;
+    SSE2NEON_CMPISTRX_LENGTH(a, la, imm8);
+    return la <= (bound - 1);
+}
+
+// Compare packed strings with implicit lengths in a and b using the control in
+// imm8, and returns 1 if any character in b was null, and 0 otherwise.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistrz
+FORCE_INLINE int _mm_cmpistrz(__m128i a, __m128i b, const int imm8)
+{
+    (void) a;
+    SSE2NEON_CMPSTR_SET_UPPER(bound, imm8);
+    int lb;
+    SSE2NEON_CMPISTRX_LENGTH(b, lb, imm8);
+    return lb <= (bound - 1);
+}
+
+// Compares the 2 signed 64-bit integers in a and the 2 signed 64-bit integers
+// in b for greater than.
+FORCE_INLINE __m128i _mm_cmpgt_epi64(__m128i a, __m128i b)
+{
+#if defined(__aarch64__) || defined(_M_ARM64)
+    return vreinterpretq_m128i_u64(
+        vcgtq_s64(vreinterpretq_s64_m128i(a), vreinterpretq_s64_m128i(b)));
 #else
-    uint8x16_t lm_p0 = vcombine_u8(vget_low_u8(l), vget_low_u8(m));
-    uint8x16_t lm_p1 = vcombine_u8(vget_high_u8(l), vget_high_u8(m));
-    uint8x16_t nk_p0 = vcombine_u8(vget_low_u8(n), vget_low_u8(k));
-    uint8x16_t nk_p1 = vcombine_u8(vget_high_u8(n), vget_high_u8(k));
+    return vreinterpretq_m128i_s64(vshrq_n_s64(
+        vqsubq_s64(vreinterpretq_s64_m128i(b), vreinterpretq_s64_m128i(a)),
+        63));
 #endif
-    // t0 = (L) (P0 + P1) << 8
-    // t1 = (M) (P2 + P3) << 16
-    uint8x16_t t0t1_tmp = veorq_u8(lm_p0, lm_p1);
-    uint8x16_t t0t1_h = vandq_u8(lm_p1, k48_32);
-    uint8x16_t t0t1_l = veorq_u8(t0t1_tmp, t0t1_h);
+}
 
-    // t2 = (N) (P4 + P5) << 24
-    // t3 = (K) (P6 + P7) << 32
-    uint8x16_t t2t3_tmp = veorq_u8(nk_p0, nk_p1);
-    uint8x16_t t2t3_h = vandq_u8(nk_p1, k16_00);
-    uint8x16_t t2t3_l = veorq_u8(t2t3_tmp, t2t3_h);
+// Starting with the initial value in crc, accumulates a CRC32 value for
+// unsigned 16-bit integer v, and stores the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_crc32_u16
+FORCE_INLINE uint32_t _mm_crc32_u16(uint32_t crc, uint16_t v)
+{
+#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
+    __asm__ __volatile__("crc32ch %w[c], %w[c], %w[v]\n\t"
+                         : [c] "+r"(crc)
+                         : [v] "r"(v));
+#elif ((__ARM_ARCH == 8) && defined(__ARM_FEATURE_CRC32)) || \
+    (defined(_M_ARM64) && !defined(__clang__))
+    crc = __crc32ch(crc, v);
+#else
+    crc = _mm_crc32_u8(crc, v & 0xff);
+    crc = _mm_crc32_u8(crc, (v >> 8) & 0xff);
+#endif
+    return crc;
+}
 
-    // De-interleave
-#if defined(__aarch64__)
-    uint8x16_t t0 = vreinterpretq_u8_u64(
-        vuzp1q_u64(vreinterpretq_u64_u8(t0t1_l), vreinterpretq_u64_u8(t0t1_h)));
-    uint8x16_t t1 = vreinterpretq_u8_u64(
-        vuzp2q_u64(vreinterpretq_u64_u8(t0t1_l), vreinterpretq_u64_u8(t0t1_h)));
-    uint8x16_t t2 = vreinterpretq_u8_u64(
-        vuzp1q_u64(vreinterpretq_u64_u8(t2t3_l), vreinterpretq_u64_u8(t2t3_h)));
-    uint8x16_t t3 = vreinterpretq_u8_u64(
-        vuzp2q_u64(vreinterpretq_u64_u8(t2t3_l), vreinterpretq_u64_u8(t2t3_h)));
+// Starting with the initial value in crc, accumulates a CRC32 value for
+// unsigned 32-bit integer v, and stores the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_crc32_u32
+FORCE_INLINE uint32_t _mm_crc32_u32(uint32_t crc, uint32_t v)
+{
+#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
+    __asm__ __volatile__("crc32cw %w[c], %w[c], %w[v]\n\t"
+                         : [c] "+r"(crc)
+                         : [v] "r"(v));
+#elif ((__ARM_ARCH == 8) && defined(__ARM_FEATURE_CRC32)) || \
+    (defined(_M_ARM64) && !defined(__clang__))
+    crc = __crc32cw(crc, v);
 #else
-    uint8x16_t t1 = vcombine_u8(vget_high_u8(t0t1_l), vget_high_u8(t0t1_h));
-    uint8x16_t t0 = vcombine_u8(vget_low_u8(t0t1_l), vget_low_u8(t0t1_h));
-    uint8x16_t t3 = vcombine_u8(vget_high_u8(t2t3_l), vget_high_u8(t2t3_h));
-    uint8x16_t t2 = vcombine_u8(vget_low_u8(t2t3_l), vget_low_u8(t2t3_h));
+    crc = _mm_crc32_u16(crc, v & 0xffff);
+    crc = _mm_crc32_u16(crc, (v >> 16) & 0xffff);
 #endif
-    // Shift the cross products
-    uint8x16_t t0_shift = vextq_u8(t0, t0, 15);  // t0 << 8
-    uint8x16_t t1_shift = vextq_u8(t1, t1, 14);  // t1 << 16
-    uint8x16_t t2_shift = vextq_u8(t2, t2, 13);  // t2 << 24
-    uint8x16_t t3_shift = vextq_u8(t3, t3, 12);  // t3 << 32
+    return crc;
+}
 
-    // Accumulate the products
-    uint8x16_t cross1 = veorq_u8(t0_shift, t1_shift);
-    uint8x16_t cross2 = veorq_u8(t2_shift, t3_shift);
-    uint8x16_t mix = veorq_u8(d, cross1);
-    uint8x16_t r = veorq_u8(mix, cross2);
-    return vreinterpretq_u64_u8(r);
+// Starting with the initial value in crc, accumulates a CRC32 value for
+// unsigned 64-bit integer v, and stores the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_crc32_u64
+FORCE_INLINE uint64_t _mm_crc32_u64(uint64_t crc, uint64_t v)
+{
+#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
+    __asm__ __volatile__("crc32cx %w[c], %w[c], %x[v]\n\t"
+                         : [c] "+r"(crc)
+                         : [v] "r"(v));
+#elif (defined(_M_ARM64) && !defined(__clang__))
+    crc = __crc32cd((uint32_t) crc, v);
+#else
+    crc = _mm_crc32_u32((uint32_t) (crc), v & 0xffffffff);
+    crc = _mm_crc32_u32((uint32_t) (crc), (v >> 32) & 0xffffffff);
+#endif
+    return crc;
 }
-#endif  // ARMv7 polyfill
 
-FORCE_INLINE __m128i _mm_clmulepi64_si128(__m128i _a, __m128i _b, const int imm)
+// Starting with the initial value in crc, accumulates a CRC32 value for
+// unsigned 8-bit integer v, and stores the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_crc32_u8
+FORCE_INLINE uint32_t _mm_crc32_u8(uint32_t crc, uint8_t v)
 {
-    uint64x2_t a = vreinterpretq_u64_m128i(_a);
-    uint64x2_t b = vreinterpretq_u64_m128i(_b);
-    switch (imm & 0x11) {
-    case 0x00:
-        return vreinterpretq_m128i_u64(
-            _sse2neon_vmull_p64(vget_low_u64(a), vget_low_u64(b)));
-    case 0x01:
-        return vreinterpretq_m128i_u64(
-            _sse2neon_vmull_p64(vget_high_u64(a), vget_low_u64(b)));
-    case 0x10:
-        return vreinterpretq_m128i_u64(
-            _sse2neon_vmull_p64(vget_low_u64(a), vget_high_u64(b)));
-    case 0x11:
-        return vreinterpretq_m128i_u64(
-            _sse2neon_vmull_p64(vget_high_u64(a), vget_high_u64(b)));
-    default:
-        abort();
-    }
+#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
+    __asm__ __volatile__("crc32cb %w[c], %w[c], %w[v]\n\t"
+                         : [c] "+r"(crc)
+                         : [v] "r"(v));
+#elif ((__ARM_ARCH == 8) && defined(__ARM_FEATURE_CRC32)) || \
+    (defined(_M_ARM64) && !defined(__clang__))
+    crc = __crc32cb(crc, v);
+#else
+    crc ^= v;
+#if defined(__ARM_FEATURE_CRYPTO)
+    // Adapted from: https://mary.rs/lab/crc32/
+    // Barrent reduction
+    uint64x2_t orig =
+        vcombine_u64(vcreate_u64((uint64_t) (crc) << 24), vcreate_u64(0x0));
+    uint64x2_t tmp = orig;
+
+    // Polynomial P(x) of CRC32C
+    uint64_t p = 0x105EC76F1;
+    // Barrett Reduction (in bit-reflected form) constant mu_{64} = \lfloor
+    // 2^{64} / P(x) \rfloor = 0x11f91caf6
+    uint64_t mu = 0x1dea713f1;
+
+    // Multiply by mu_{64}
+    tmp = _sse2neon_vmull_p64(vget_low_u64(tmp), vcreate_u64(mu));
+    // Divide by 2^{64} (mask away the unnecessary bits)
+    tmp =
+        vandq_u64(tmp, vcombine_u64(vcreate_u64(0xFFFFFFFF), vcreate_u64(0x0)));
+    // Multiply by P(x) (shifted left by 1 for alignment reasons)
+    tmp = _sse2neon_vmull_p64(vget_low_u64(tmp), vcreate_u64(p));
+    // Subtract original from result
+    tmp = veorq_u64(tmp, orig);
+
+    // Extract the 'lower' (in bit-reflected sense) 32 bits
+    crc = vgetq_lane_u32(vreinterpretq_u32_u64(tmp), 1);
+#else  // Fall back to the generic table lookup approach
+    // Adapted from: https://create.stephan-brumme.com/crc32/
+    // Apply half-byte comparison algorithm for the best ratio between
+    // performance and lookup table.
+
+    // The lookup table just needs to store every 16th entry
+    // of the standard look-up table.
+    static const uint32_t crc32_half_byte_tbl[] = {
+        0x00000000, 0x105ec76f, 0x20bd8ede, 0x30e349b1, 0x417b1dbc, 0x5125dad3,
+        0x61c69362, 0x7198540d, 0x82f63b78, 0x92a8fc17, 0xa24bb5a6, 0xb21572c9,
+        0xc38d26c4, 0xd3d3e1ab, 0xe330a81a, 0xf36e6f75,
+    };
+
+    crc = (crc >> 4) ^ crc32_half_byte_tbl[crc & 0x0F];
+    crc = (crc >> 4) ^ crc32_half_byte_tbl[crc & 0x0F];
+#endif
+#endif
+    return crc;
 }
 
-#if !defined(__ARM_FEATURE_CRYPTO)
+/* AES */
+
+#if !defined(__ARM_FEATURE_CRYPTO) && (!defined(_M_ARM64) || defined(__clang__))
 /* clang-format off */
-#define SSE2NEON_AES_DATA(w)                                           \
+#define SSE2NEON_AES_SBOX(w)                                           \
     {                                                                  \
         w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), \
         w(0xc5), w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), \
@@ -5621,53 +8739,114 @@ FORCE_INLINE __m128i _mm_clmulepi64_si128(__m128i _a, __m128i _b, const int imm)
         w(0xe6), w(0x42), w(0x68), w(0x41), w(0x99), w(0x2d), w(0x0f), \
         w(0xb0), w(0x54), w(0xbb), w(0x16)                             \
     }
+#define SSE2NEON_AES_RSBOX(w)                                          \
+    {                                                                  \
+        w(0x52), w(0x09), w(0x6a), w(0xd5), w(0x30), w(0x36), w(0xa5), \
+        w(0x38), w(0xbf), w(0x40), w(0xa3), w(0x9e), w(0x81), w(0xf3), \
+        w(0xd7), w(0xfb), w(0x7c), w(0xe3), w(0x39), w(0x82), w(0x9b), \
+        w(0x2f), w(0xff), w(0x87), w(0x34), w(0x8e), w(0x43), w(0x44), \
+        w(0xc4), w(0xde), w(0xe9), w(0xcb), w(0x54), w(0x7b), w(0x94), \
+        w(0x32), w(0xa6), w(0xc2), w(0x23), w(0x3d), w(0xee), w(0x4c), \
+        w(0x95), w(0x0b), w(0x42), w(0xfa), w(0xc3), w(0x4e), w(0x08), \
+        w(0x2e), w(0xa1), w(0x66), w(0x28), w(0xd9), w(0x24), w(0xb2), \
+        w(0x76), w(0x5b), w(0xa2), w(0x49), w(0x6d), w(0x8b), w(0xd1), \
+        w(0x25), w(0x72), w(0xf8), w(0xf6), w(0x64), w(0x86), w(0x68), \
+        w(0x98), w(0x16), w(0xd4), w(0xa4), w(0x5c), w(0xcc), w(0x5d), \
+        w(0x65), w(0xb6), w(0x92), w(0x6c), w(0x70), w(0x48), w(0x50), \
+        w(0xfd), w(0xed), w(0xb9), w(0xda), w(0x5e), w(0x15), w(0x46), \
+        w(0x57), w(0xa7), w(0x8d), w(0x9d), w(0x84), w(0x90), w(0xd8), \
+        w(0xab), w(0x00), w(0x8c), w(0xbc), w(0xd3), w(0x0a), w(0xf7), \
+        w(0xe4), w(0x58), w(0x05), w(0xb8), w(0xb3), w(0x45), w(0x06), \
+        w(0xd0), w(0x2c), w(0x1e), w(0x8f), w(0xca), w(0x3f), w(0x0f), \
+        w(0x02), w(0xc1), w(0xaf), w(0xbd), w(0x03), w(0x01), w(0x13), \
+        w(0x8a), w(0x6b), w(0x3a), w(0x91), w(0x11), w(0x41), w(0x4f), \
+        w(0x67), w(0xdc), w(0xea), w(0x97), w(0xf2), w(0xcf), w(0xce), \
+        w(0xf0), w(0xb4), w(0xe6), w(0x73), w(0x96), w(0xac), w(0x74), \
+        w(0x22), w(0xe7), w(0xad), w(0x35), w(0x85), w(0xe2), w(0xf9), \
+        w(0x37), w(0xe8), w(0x1c), w(0x75), w(0xdf), w(0x6e), w(0x47), \
+        w(0xf1), w(0x1a), w(0x71), w(0x1d), w(0x29), w(0xc5), w(0x89), \
+        w(0x6f), w(0xb7), w(0x62), w(0x0e), w(0xaa), w(0x18), w(0xbe), \
+        w(0x1b), w(0xfc), w(0x56), w(0x3e), w(0x4b), w(0xc6), w(0xd2), \
+        w(0x79), w(0x20), w(0x9a), w(0xdb), w(0xc0), w(0xfe), w(0x78), \
+        w(0xcd), w(0x5a), w(0xf4), w(0x1f), w(0xdd), w(0xa8), w(0x33), \
+        w(0x88), w(0x07), w(0xc7), w(0x31), w(0xb1), w(0x12), w(0x10), \
+        w(0x59), w(0x27), w(0x80), w(0xec), w(0x5f), w(0x60), w(0x51), \
+        w(0x7f), w(0xa9), w(0x19), w(0xb5), w(0x4a), w(0x0d), w(0x2d), \
+        w(0xe5), w(0x7a), w(0x9f), w(0x93), w(0xc9), w(0x9c), w(0xef), \
+        w(0xa0), w(0xe0), w(0x3b), w(0x4d), w(0xae), w(0x2a), w(0xf5), \
+        w(0xb0), w(0xc8), w(0xeb), w(0xbb), w(0x3c), w(0x83), w(0x53), \
+        w(0x99), w(0x61), w(0x17), w(0x2b), w(0x04), w(0x7e), w(0xba), \
+        w(0x77), w(0xd6), w(0x26), w(0xe1), w(0x69), w(0x14), w(0x63), \
+        w(0x55), w(0x21), w(0x0c), w(0x7d)                             \
+    }
 /* clang-format on */
 
 /* X Macro trick. See https://en.wikipedia.org/wiki/X_Macro */
 #define SSE2NEON_AES_H0(x) (x)
-static const uint8_t SSE2NEON_sbox[256] = SSE2NEON_AES_DATA(SSE2NEON_AES_H0);
+static const uint8_t _sse2neon_sbox[256] = SSE2NEON_AES_SBOX(SSE2NEON_AES_H0);
+static const uint8_t _sse2neon_rsbox[256] = SSE2NEON_AES_RSBOX(SSE2NEON_AES_H0);
 #undef SSE2NEON_AES_H0
 
-// In the absence of crypto extensions, implement aesenc using regular neon
+/* x_time function and matrix multiply function */
+#if !defined(__aarch64__) && !defined(_M_ARM64)
+#define SSE2NEON_XT(x) (((x) << 1) ^ ((((x) >> 7) & 1) * 0x1b))
+#define SSE2NEON_MULTIPLY(x, y)                                  \
+    (((y & 1) * x) ^ ((y >> 1 & 1) * SSE2NEON_XT(x)) ^           \
+     ((y >> 2 & 1) * SSE2NEON_XT(SSE2NEON_XT(x))) ^              \
+     ((y >> 3 & 1) * SSE2NEON_XT(SSE2NEON_XT(SSE2NEON_XT(x)))) ^ \
+     ((y >> 4 & 1) * SSE2NEON_XT(SSE2NEON_XT(SSE2NEON_XT(SSE2NEON_XT(x))))))
+#endif
+
+// In the absence of crypto extensions, implement aesenc using regular NEON
 // intrinsics instead. See:
 // https://www.workofard.com/2017/01/accelerated-aes-for-the-arm64-linux-kernel/
 // https://www.workofard.com/2017/07/ghash-for-low-end-cores/ and
-// https://github.com/ColinIanKing/linux-next-mirror/blob/b5f466091e130caaf0735976648f72bd5e09aa84/crypto/aegis128-neon-inner.c#L52
-// for more information Reproduced with permission of the author.
-FORCE_INLINE __m128i _mm_aesenc_si128(__m128i EncBlock, __m128i RoundKey)
+// for more information.
+FORCE_INLINE __m128i _mm_aesenc_si128(__m128i a, __m128i RoundKey)
 {
-#if defined(__aarch64__)
-    static const uint8_t shift_rows[] = {0x0, 0x5, 0xa, 0xf, 0x4, 0x9,
-                                         0xe, 0x3, 0x8, 0xd, 0x2, 0x7,
-                                         0xc, 0x1, 0x6, 0xb};
-    static const uint8_t ror32by8[] = {0x1, 0x2, 0x3, 0x0, 0x5, 0x6, 0x7, 0x4,
-                                       0x9, 0xa, 0xb, 0x8, 0xd, 0xe, 0xf, 0xc};
+#if defined(__aarch64__) || defined(_M_ARM64)
+    static const uint8_t shift_rows[] = {
+        0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3,
+        0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb,
+    };
+    static const uint8_t ror32by8[] = {
+        0x1, 0x2, 0x3, 0x0, 0x5, 0x6, 0x7, 0x4,
+        0x9, 0xa, 0xb, 0x8, 0xd, 0xe, 0xf, 0xc,
+    };
 
     uint8x16_t v;
-    uint8x16_t w = vreinterpretq_u8_m128i(EncBlock);
+    uint8x16_t w = vreinterpretq_u8_m128i(a);
 
-    // shift rows
+    /* shift rows */
     w = vqtbl1q_u8(w, vld1q_u8(shift_rows));
 
-    // sub bytes
-    v = vqtbl4q_u8(vld1q_u8_x4(SSE2NEON_sbox), w);
-    v = vqtbx4q_u8(v, vld1q_u8_x4(SSE2NEON_sbox + 0x40), w - 0x40);
-    v = vqtbx4q_u8(v, vld1q_u8_x4(SSE2NEON_sbox + 0x80), w - 0x80);
-    v = vqtbx4q_u8(v, vld1q_u8_x4(SSE2NEON_sbox + 0xc0), w - 0xc0);
-
-    // mix columns
-    w = (v << 1) ^ (uint8x16_t)(((int8x16_t) v >> 7) & 0x1b);
+    /* sub bytes */
+    // Here, we separate the whole 256-bytes table into 4 64-bytes tables, and
+    // look up each of the table. After each lookup, we load the next table
+    // which locates at the next 64-bytes. In the meantime, the index in the
+    // table would be smaller than it was, so the index parameters of
+    // `vqtbx4q_u8()` need to be added the same constant as the loaded tables.
+    v = vqtbl4q_u8(_sse2neon_vld1q_u8_x4(_sse2neon_sbox), w);
+    // 'w-0x40' equals to 'vsubq_u8(w, vdupq_n_u8(0x40))'
+    v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_sbox + 0x40), w - 0x40);
+    v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_sbox + 0x80), w - 0x80);
+    v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_sbox + 0xc0), w - 0xc0);
+
+    /* mix columns */
+    w = (v << 1) ^ (uint8x16_t) (((int8x16_t) v >> 7) & 0x1b);
     w ^= (uint8x16_t) vrev32q_u16((uint16x8_t) v);
     w ^= vqtbl1q_u8(v ^ w, vld1q_u8(ror32by8));
 
-    //  add round key
+    /* add round key */
     return vreinterpretq_m128i_u8(w) ^ RoundKey;
 
-#else /* ARMv7-A NEON implementation */
-#define SSE2NEON_AES_B2W(b0, b1, b2, b3)                                       \
-    (((uint32_t)(b3) << 24) | ((uint32_t)(b2) << 16) | ((uint32_t)(b1) << 8) | \
-     (b0))
+#else /* ARMv7-A implementation for a table-based AES */
+#define SSE2NEON_AES_B2W(b0, b1, b2, b3)                 \
+    (((uint32_t) (b3) << 24) | ((uint32_t) (b2) << 16) | \
+     ((uint32_t) (b1) << 8) | (uint32_t) (b0))
+// multiplying 'x' by 2 in GF(2^8)
 #define SSE2NEON_AES_F2(x) ((x << 1) ^ (((x >> 7) & 1) * 0x011b /* WPOLY */))
+// multiplying 'x' by 3 in GF(2^8)
 #define SSE2NEON_AES_F3(x) (SSE2NEON_AES_F2(x) ^ x)
 #define SSE2NEON_AES_U0(p) \
     SSE2NEON_AES_B2W(SSE2NEON_AES_F2(p), p, p, SSE2NEON_AES_F3(p))
@@ -5677,11 +8856,14 @@ FORCE_INLINE __m128i _mm_aesenc_si128(__m128i EncBlock, __m128i RoundKey)
     SSE2NEON_AES_B2W(p, SSE2NEON_AES_F3(p), SSE2NEON_AES_F2(p), p)
 #define SSE2NEON_AES_U3(p) \
     SSE2NEON_AES_B2W(p, p, SSE2NEON_AES_F3(p), SSE2NEON_AES_F2(p))
+
+    // this generates a table containing every possible permutation of
+    // shift_rows() and sub_bytes() with mix_columns().
     static const uint32_t ALIGN_STRUCT(16) aes_table[4][256] = {
-        SSE2NEON_AES_DATA(SSE2NEON_AES_U0),
-        SSE2NEON_AES_DATA(SSE2NEON_AES_U1),
-        SSE2NEON_AES_DATA(SSE2NEON_AES_U2),
-        SSE2NEON_AES_DATA(SSE2NEON_AES_U3),
+        SSE2NEON_AES_SBOX(SSE2NEON_AES_U0),
+        SSE2NEON_AES_SBOX(SSE2NEON_AES_U1),
+        SSE2NEON_AES_SBOX(SSE2NEON_AES_U2),
+        SSE2NEON_AES_SBOX(SSE2NEON_AES_U3),
     };
 #undef SSE2NEON_AES_B2W
 #undef SSE2NEON_AES_F2
@@ -5691,11 +8873,15 @@ FORCE_INLINE __m128i _mm_aesenc_si128(__m128i EncBlock, __m128i RoundKey)
 #undef SSE2NEON_AES_U2
 #undef SSE2NEON_AES_U3
 
-    uint32_t x0 = _mm_cvtsi128_si32(EncBlock);
-    uint32_t x1 = _mm_cvtsi128_si32(_mm_shuffle_epi32(EncBlock, 0x55));
-    uint32_t x2 = _mm_cvtsi128_si32(_mm_shuffle_epi32(EncBlock, 0xAA));
-    uint32_t x3 = _mm_cvtsi128_si32(_mm_shuffle_epi32(EncBlock, 0xFF));
+    uint32_t x0 = _mm_cvtsi128_si32(a);  // get a[31:0]
+    uint32_t x1 =
+        _mm_cvtsi128_si32(_mm_shuffle_epi32(a, 0x55));  // get a[63:32]
+    uint32_t x2 =
+        _mm_cvtsi128_si32(_mm_shuffle_epi32(a, 0xAA));  // get a[95:64]
+    uint32_t x3 =
+        _mm_cvtsi128_si32(_mm_shuffle_epi32(a, 0xFF));  // get a[127:96]
 
+    // finish the modulo addition step in mix_columns()
     __m128i out = _mm_set_epi32(
         (aes_table[0][x3 & 0xff] ^ aes_table[1][(x0 >> 8) & 0xff] ^
          aes_table[2][(x1 >> 16) & 0xff] ^ aes_table[3][x2 >> 24]),
@@ -5710,51 +8896,254 @@ FORCE_INLINE __m128i _mm_aesenc_si128(__m128i EncBlock, __m128i RoundKey)
 #endif
 }
 
+// Perform one round of an AES decryption flow on data (state) in a using the
+// round key in RoundKey, and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdec_si128
+FORCE_INLINE __m128i _mm_aesdec_si128(__m128i a, __m128i RoundKey)
+{
+#if defined(__aarch64__)
+    static const uint8_t inv_shift_rows[] = {
+        0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb,
+        0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3,
+    };
+    static const uint8_t ror32by8[] = {
+        0x1, 0x2, 0x3, 0x0, 0x5, 0x6, 0x7, 0x4,
+        0x9, 0xa, 0xb, 0x8, 0xd, 0xe, 0xf, 0xc,
+    };
+
+    uint8x16_t v;
+    uint8x16_t w = vreinterpretq_u8_m128i(a);
+
+    // inverse shift rows
+    w = vqtbl1q_u8(w, vld1q_u8(inv_shift_rows));
+
+    // inverse sub bytes
+    v = vqtbl4q_u8(_sse2neon_vld1q_u8_x4(_sse2neon_rsbox), w);
+    v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_rsbox + 0x40), w - 0x40);
+    v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_rsbox + 0x80), w - 0x80);
+    v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_rsbox + 0xc0), w - 0xc0);
+
+    // inverse mix columns
+    // multiplying 'v' by 4 in GF(2^8)
+    w = (v << 1) ^ (uint8x16_t) (((int8x16_t) v >> 7) & 0x1b);
+    w = (w << 1) ^ (uint8x16_t) (((int8x16_t) w >> 7) & 0x1b);
+    v ^= w;
+    v ^= (uint8x16_t) vrev32q_u16((uint16x8_t) w);
+
+    w = (v << 1) ^ (uint8x16_t) (((int8x16_t) v >> 7) &
+                                 0x1b);  // multiplying 'v' by 2 in GF(2^8)
+    w ^= (uint8x16_t) vrev32q_u16((uint16x8_t) v);
+    w ^= vqtbl1q_u8(v ^ w, vld1q_u8(ror32by8));
+
+    // add round key
+    return vreinterpretq_m128i_u8(w) ^ RoundKey;
+
+#else /* ARMv7-A NEON implementation */
+    /* FIXME: optimized for NEON */
+    uint8_t i, e, f, g, h, v[4][4];
+    uint8_t *_a = (uint8_t *) &a;
+    for (i = 0; i < 16; ++i) {
+        v[((i / 4) + (i % 4)) % 4][i % 4] = _sse2neon_rsbox[_a[i]];
+    }
+
+    // inverse mix columns
+    for (i = 0; i < 4; ++i) {
+        e = v[i][0];
+        f = v[i][1];
+        g = v[i][2];
+        h = v[i][3];
+
+        v[i][0] = SSE2NEON_MULTIPLY(e, 0x0e) ^ SSE2NEON_MULTIPLY(f, 0x0b) ^
+                  SSE2NEON_MULTIPLY(g, 0x0d) ^ SSE2NEON_MULTIPLY(h, 0x09);
+        v[i][1] = SSE2NEON_MULTIPLY(e, 0x09) ^ SSE2NEON_MULTIPLY(f, 0x0e) ^
+                  SSE2NEON_MULTIPLY(g, 0x0b) ^ SSE2NEON_MULTIPLY(h, 0x0d);
+        v[i][2] = SSE2NEON_MULTIPLY(e, 0x0d) ^ SSE2NEON_MULTIPLY(f, 0x09) ^
+                  SSE2NEON_MULTIPLY(g, 0x0e) ^ SSE2NEON_MULTIPLY(h, 0x0b);
+        v[i][3] = SSE2NEON_MULTIPLY(e, 0x0b) ^ SSE2NEON_MULTIPLY(f, 0x0d) ^
+                  SSE2NEON_MULTIPLY(g, 0x09) ^ SSE2NEON_MULTIPLY(h, 0x0e);
+    }
+
+    return vreinterpretq_m128i_u8(vld1q_u8((uint8_t *) v)) ^ RoundKey;
+#endif
+}
+
+// Perform the last round of an AES encryption flow on data (state) in a using
+// the round key in RoundKey, and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenclast_si128
 FORCE_INLINE __m128i _mm_aesenclast_si128(__m128i a, __m128i RoundKey)
 {
+#if defined(__aarch64__)
+    static const uint8_t shift_rows[] = {
+        0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3,
+        0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb,
+    };
+
+    uint8x16_t v;
+    uint8x16_t w = vreinterpretq_u8_m128i(a);
+
+    // shift rows
+    w = vqtbl1q_u8(w, vld1q_u8(shift_rows));
+
+    // sub bytes
+    v = vqtbl4q_u8(_sse2neon_vld1q_u8_x4(_sse2neon_sbox), w);
+    v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_sbox + 0x40), w - 0x40);
+    v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_sbox + 0x80), w - 0x80);
+    v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_sbox + 0xc0), w - 0xc0);
+
+    // add round key
+    return vreinterpretq_m128i_u8(v) ^ RoundKey;
+
+#else /* ARMv7-A implementation */
+    uint8_t v[16] = {
+        _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 0)],
+        _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 5)],
+        _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 10)],
+        _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 15)],
+        _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 4)],
+        _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 9)],
+        _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 14)],
+        _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 3)],
+        _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 8)],
+        _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 13)],
+        _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 2)],
+        _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 7)],
+        _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 12)],
+        _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 1)],
+        _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 6)],
+        _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 11)],
+    };
+
+    return vreinterpretq_m128i_u8(vld1q_u8(v)) ^ RoundKey;
+#endif
+}
+
+// Perform the last round of an AES decryption flow on data (state) in a using
+// the round key in RoundKey, and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdeclast_si128
+FORCE_INLINE __m128i _mm_aesdeclast_si128(__m128i a, __m128i RoundKey)
+{
+#if defined(__aarch64__)
+    static const uint8_t inv_shift_rows[] = {
+        0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb,
+        0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3,
+    };
+
+    uint8x16_t v;
+    uint8x16_t w = vreinterpretq_u8_m128i(a);
+
+    // inverse shift rows
+    w = vqtbl1q_u8(w, vld1q_u8(inv_shift_rows));
+
+    // inverse sub bytes
+    v = vqtbl4q_u8(_sse2neon_vld1q_u8_x4(_sse2neon_rsbox), w);
+    v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_rsbox + 0x40), w - 0x40);
+    v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_rsbox + 0x80), w - 0x80);
+    v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_rsbox + 0xc0), w - 0xc0);
+
+    // add round key
+    return vreinterpretq_m128i_u8(v) ^ RoundKey;
+
+#else /* ARMv7-A NEON implementation */
     /* FIXME: optimized for NEON */
-    uint8_t v[4][4] = {
-        [0] = {SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 0)],
-               SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 5)],
-               SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 10)],
-               SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 15)]},
-        [1] = {SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 4)],
-               SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 9)],
-               SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 14)],
-               SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 3)]},
-        [2] = {SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 8)],
-               SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 13)],
-               SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 2)],
-               SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 7)]},
-        [3] = {SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 12)],
-               SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 1)],
-               SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 6)],
-               SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 11)]},
+    uint8_t v[4][4];
+    uint8_t *_a = (uint8_t *) &a;
+    for (int i = 0; i < 16; ++i) {
+        v[((i / 4) + (i % 4)) % 4][i % 4] = _sse2neon_rsbox[_a[i]];
+    }
+
+    return vreinterpretq_m128i_u8(vld1q_u8((uint8_t *) v)) ^ RoundKey;
+#endif
+}
+
+// Perform the InvMixColumns transformation on a and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesimc_si128
+FORCE_INLINE __m128i _mm_aesimc_si128(__m128i a)
+{
+#if defined(__aarch64__)
+    static const uint8_t ror32by8[] = {
+        0x1, 0x2, 0x3, 0x0, 0x5, 0x6, 0x7, 0x4,
+        0x9, 0xa, 0xb, 0x8, 0xd, 0xe, 0xf, 0xc,
     };
-    for (int i = 0; i < 16; i++)
-        vreinterpretq_nth_u8_m128i(a, i) =
-            v[i / 4][i % 4] ^ vreinterpretq_nth_u8_m128i(RoundKey, i);
-    return a;
+    uint8x16_t v = vreinterpretq_u8_m128i(a);
+    uint8x16_t w;
+
+    // multiplying 'v' by 4 in GF(2^8)
+    w = (v << 1) ^ (uint8x16_t) (((int8x16_t) v >> 7) & 0x1b);
+    w = (w << 1) ^ (uint8x16_t) (((int8x16_t) w >> 7) & 0x1b);
+    v ^= w;
+    v ^= (uint8x16_t) vrev32q_u16((uint16x8_t) w);
+
+    // multiplying 'v' by 2 in GF(2^8)
+    w = (v << 1) ^ (uint8x16_t) (((int8x16_t) v >> 7) & 0x1b);
+    w ^= (uint8x16_t) vrev32q_u16((uint16x8_t) v);
+    w ^= vqtbl1q_u8(v ^ w, vld1q_u8(ror32by8));
+    return vreinterpretq_m128i_u8(w);
+
+#else /* ARMv7-A NEON implementation */
+    uint8_t i, e, f, g, h, v[4][4];
+    vst1q_u8((uint8_t *) v, vreinterpretq_u8_m128i(a));
+    for (i = 0; i < 4; ++i) {
+        e = v[i][0];
+        f = v[i][1];
+        g = v[i][2];
+        h = v[i][3];
+
+        v[i][0] = SSE2NEON_MULTIPLY(e, 0x0e) ^ SSE2NEON_MULTIPLY(f, 0x0b) ^
+                  SSE2NEON_MULTIPLY(g, 0x0d) ^ SSE2NEON_MULTIPLY(h, 0x09);
+        v[i][1] = SSE2NEON_MULTIPLY(e, 0x09) ^ SSE2NEON_MULTIPLY(f, 0x0e) ^
+                  SSE2NEON_MULTIPLY(g, 0x0b) ^ SSE2NEON_MULTIPLY(h, 0x0d);
+        v[i][2] = SSE2NEON_MULTIPLY(e, 0x0d) ^ SSE2NEON_MULTIPLY(f, 0x09) ^
+                  SSE2NEON_MULTIPLY(g, 0x0e) ^ SSE2NEON_MULTIPLY(h, 0x0b);
+        v[i][3] = SSE2NEON_MULTIPLY(e, 0x0b) ^ SSE2NEON_MULTIPLY(f, 0x0d) ^
+                  SSE2NEON_MULTIPLY(g, 0x09) ^ SSE2NEON_MULTIPLY(h, 0x0e);
+    }
+
+    return vreinterpretq_m128i_u8(vld1q_u8((uint8_t *) v));
+#endif
 }
 
+// Assist in expanding the AES cipher key by computing steps towards generating
+// a round key for encryption cipher using data from a and an 8-bit round
+// constant specified in imm8, and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aeskeygenassist_si128
+//
 // Emits the Advanced Encryption Standard (AES) instruction aeskeygenassist.
 // This instruction generates a round key for AES encryption. See
 // https://kazakov.life/2017/11/01/cryptocurrency-mining-on-ios-devices/
 // for details.
-//
-// https://msdn.microsoft.com/en-us/library/cc714138(v=vs.120).aspx
-FORCE_INLINE __m128i _mm_aeskeygenassist_si128(__m128i key, const int rcon)
+FORCE_INLINE __m128i _mm_aeskeygenassist_si128(__m128i a, const int rcon)
 {
-    uint32_t X1 = _mm_cvtsi128_si32(_mm_shuffle_epi32(key, 0x55));
-    uint32_t X3 = _mm_cvtsi128_si32(_mm_shuffle_epi32(key, 0xFF));
+#if defined(__aarch64__)
+    uint8x16_t _a = vreinterpretq_u8_m128i(a);
+    uint8x16_t v = vqtbl4q_u8(_sse2neon_vld1q_u8_x4(_sse2neon_sbox), _a);
+    v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_sbox + 0x40), _a - 0x40);
+    v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_sbox + 0x80), _a - 0x80);
+    v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_sbox + 0xc0), _a - 0xc0);
+
+    uint32x4_t v_u32 = vreinterpretq_u32_u8(v);
+    uint32x4_t ror_v = vorrq_u32(vshrq_n_u32(v_u32, 8), vshlq_n_u32(v_u32, 24));
+    uint32x4_t ror_xor_v = veorq_u32(ror_v, vdupq_n_u32(rcon));
+
+    return vreinterpretq_m128i_u32(vtrn2q_u32(v_u32, ror_xor_v));
+
+#else /* ARMv7-A NEON implementation */
+    uint32_t X1 = _mm_cvtsi128_si32(_mm_shuffle_epi32(a, 0x55));
+    uint32_t X3 = _mm_cvtsi128_si32(_mm_shuffle_epi32(a, 0xFF));
     for (int i = 0; i < 4; ++i) {
-        ((uint8_t *) &X1)[i] = SSE2NEON_sbox[((uint8_t *) &X1)[i]];
-        ((uint8_t *) &X3)[i] = SSE2NEON_sbox[((uint8_t *) &X3)[i]];
+        ((uint8_t *) &X1)[i] = _sse2neon_sbox[((uint8_t *) &X1)[i]];
+        ((uint8_t *) &X3)[i] = _sse2neon_sbox[((uint8_t *) &X3)[i]];
     }
     return _mm_set_epi32(((X3 >> 8) | (X3 << 24)) ^ rcon, X3,
                          ((X1 >> 8) | (X1 << 24)) ^ rcon, X1);
+#endif
 }
-#undef SSE2NEON_AES_DATA
+#undef SSE2NEON_AES_SBOX
+#undef SSE2NEON_AES_RSBOX
+
+#if defined(__aarch64__)
+#undef SSE2NEON_XT
+#undef SSE2NEON_MULTIPLY
+#endif
 
 #else /* __ARM_FEATURE_CRYPTO */
 // Implements equivalent of 'aesenc' by combining AESE (with an empty key) and
@@ -5765,12 +9154,24 @@ FORCE_INLINE __m128i _mm_aeskeygenassist_si128(__m128i key, const int rcon)
 // for more details.
 FORCE_INLINE __m128i _mm_aesenc_si128(__m128i a, __m128i b)
 {
-    return vreinterpretq_m128i_u8(
-        vaesmcq_u8(vaeseq_u8(vreinterpretq_u8_m128i(a), vdupq_n_u8(0))) ^
-        vreinterpretq_u8_m128i(b));
+    return vreinterpretq_m128i_u8(veorq_u8(
+        vaesmcq_u8(vaeseq_u8(vreinterpretq_u8_m128i(a), vdupq_n_u8(0))),
+        vreinterpretq_u8_m128i(b)));
+}
+
+// Perform one round of an AES decryption flow on data (state) in a using the
+// round key in RoundKey, and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdec_si128
+FORCE_INLINE __m128i _mm_aesdec_si128(__m128i a, __m128i RoundKey)
+{
+    return vreinterpretq_m128i_u8(veorq_u8(
+        vaesimcq_u8(vaesdq_u8(vreinterpretq_u8_m128i(a), vdupq_n_u8(0))),
+        vreinterpretq_u8_m128i(RoundKey)));
 }
 
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aesenclast_si128
+// Perform the last round of an AES encryption flow on data (state) in a using
+// the round key in RoundKey, and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenclast_si128
 FORCE_INLINE __m128i _mm_aesenclast_si128(__m128i a, __m128i RoundKey)
 {
     return _mm_xor_si128(vreinterpretq_m128i_u8(vaeseq_u8(
@@ -5778,11 +9179,33 @@ FORCE_INLINE __m128i _mm_aesenclast_si128(__m128i a, __m128i RoundKey)
                          RoundKey);
 }
 
+// Perform the last round of an AES decryption flow on data (state) in a using
+// the round key in RoundKey, and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdeclast_si128
+FORCE_INLINE __m128i _mm_aesdeclast_si128(__m128i a, __m128i RoundKey)
+{
+    return vreinterpretq_m128i_u8(
+        veorq_u8(vaesdq_u8(vreinterpretq_u8_m128i(a), vdupq_n_u8(0)),
+                 vreinterpretq_u8_m128i(RoundKey)));
+}
+
+// Perform the InvMixColumns transformation on a and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesimc_si128
+FORCE_INLINE __m128i _mm_aesimc_si128(__m128i a)
+{
+    return vreinterpretq_m128i_u8(vaesimcq_u8(vreinterpretq_u8_m128i(a)));
+}
+
+// Assist in expanding the AES cipher key by computing steps towards generating
+// a round key for encryption cipher using data from a and an 8-bit round
+// constant specified in imm8, and store the result in dst."
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aeskeygenassist_si128
 FORCE_INLINE __m128i _mm_aeskeygenassist_si128(__m128i a, const int rcon)
 {
     // AESE does ShiftRows and SubBytes on A
     uint8x16_t u8 = vaeseq_u8(vreinterpretq_u8_m128i(a), vdupq_n_u8(0));
 
+#if !defined(_MSC_VER) || defined(__clang__)
     uint8x16_t dest = {
         // Undo ShiftRows step from AESE and extract X1 and X3
         u8[0x4], u8[0x1], u8[0xE], u8[0xB],  // SubBytes(X1)
@@ -5792,167 +9215,220 @@ FORCE_INLINE __m128i _mm_aeskeygenassist_si128(__m128i a, const int rcon)
     };
     uint32x4_t r = {0, (unsigned) rcon, 0, (unsigned) rcon};
     return vreinterpretq_m128i_u8(dest) ^ vreinterpretq_m128i_u32(r);
+#else
+    // We have to do this hack because MSVC is strictly adhering to the CPP
+    // standard, in particular C++03 8.5.1 sub-section 15, which states that
+    // unions must be initialized by their first member type.
+
+    // As per the Windows ARM64 ABI, it is always little endian, so this works
+    __n128 dest{
+        ((uint64_t) u8.n128_u8[0x4] << 0) | ((uint64_t) u8.n128_u8[0x1] << 8) |
+            ((uint64_t) u8.n128_u8[0xE] << 16) |
+            ((uint64_t) u8.n128_u8[0xB] << 24) |
+            ((uint64_t) u8.n128_u8[0x1] << 32) |
+            ((uint64_t) u8.n128_u8[0xE] << 40) |
+            ((uint64_t) u8.n128_u8[0xB] << 48) |
+            ((uint64_t) u8.n128_u8[0x4] << 56),
+        ((uint64_t) u8.n128_u8[0xC] << 0) | ((uint64_t) u8.n128_u8[0x9] << 8) |
+            ((uint64_t) u8.n128_u8[0x6] << 16) |
+            ((uint64_t) u8.n128_u8[0x3] << 24) |
+            ((uint64_t) u8.n128_u8[0x9] << 32) |
+            ((uint64_t) u8.n128_u8[0x6] << 40) |
+            ((uint64_t) u8.n128_u8[0x3] << 48) |
+            ((uint64_t) u8.n128_u8[0xC] << 56)};
+
+    dest.n128_u32[1] = dest.n128_u32[1] ^ rcon;
+    dest.n128_u32[3] = dest.n128_u32[3] ^ rcon;
+
+    return dest;
+#endif
 }
 #endif
 
-/* Streaming Extensions */
+/* Others */
 
-// Guarantees that every preceding store is globally visible before any
-// subsequent store.
-// https://msdn.microsoft.com/en-us/library/5h2w73d1%28v=vs.90%29.aspx
-FORCE_INLINE void _mm_sfence(void)
+// Perform a carry-less multiplication of two 64-bit integers, selected from a
+// and b according to imm8, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clmulepi64_si128
+FORCE_INLINE __m128i _mm_clmulepi64_si128(__m128i _a, __m128i _b, const int imm)
 {
-    __sync_synchronize();
+    uint64x2_t a = vreinterpretq_u64_m128i(_a);
+    uint64x2_t b = vreinterpretq_u64_m128i(_b);
+    switch (imm & 0x11) {
+    case 0x00:
+        return vreinterpretq_m128i_u64(
+            _sse2neon_vmull_p64(vget_low_u64(a), vget_low_u64(b)));
+    case 0x01:
+        return vreinterpretq_m128i_u64(
+            _sse2neon_vmull_p64(vget_high_u64(a), vget_low_u64(b)));
+    case 0x10:
+        return vreinterpretq_m128i_u64(
+            _sse2neon_vmull_p64(vget_low_u64(a), vget_high_u64(b)));
+    case 0x11:
+        return vreinterpretq_m128i_u64(
+            _sse2neon_vmull_p64(vget_high_u64(a), vget_high_u64(b)));
+    default:
+        abort();
+    }
 }
 
-// Store 128-bits (composed of 4 packed single-precision (32-bit) floating-
-// point elements) from a into memory using a non-temporal memory hint.
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_ps
-FORCE_INLINE void _mm_stream_ps(float *p, __m128 a)
+FORCE_INLINE unsigned int _sse2neon_mm_get_denormals_zero_mode(void)
 {
-#if __has_builtin(__builtin_nontemporal_store)
-    __builtin_nontemporal_store(a, (float32x4_t *) p);
+    union {
+        fpcr_bitfield field;
+#if defined(__aarch64__) || defined(_M_ARM64)
+        uint64_t value;
 #else
-    vst1q_f32(p, vreinterpretq_f32_m128(a));
+        uint32_t value;
 #endif
-}
+    } r;
 
-// Stores the data in a to the address p without polluting the caches.  If the
-// cache line containing address p is already in the cache, the cache will be
-// updated.
-// https://msdn.microsoft.com/en-us/library/ba08y07y%28v=vs.90%29.aspx
-FORCE_INLINE void _mm_stream_si128(__m128i *p, __m128i a)
-{
-#if __has_builtin(__builtin_nontemporal_store)
-    __builtin_nontemporal_store(a, p);
+#if defined(__aarch64__) || defined(_M_ARM64)
+    r.value = _sse2neon_get_fpcr();
 #else
-    vst1q_s64((int64_t *) p, vreinterpretq_s64_m128i(a));
+    __asm__ __volatile__("vmrs %0, FPSCR" : "=r"(r.value)); /* read */
 #endif
+
+    return r.field.bit24 ? _MM_DENORMALS_ZERO_ON : _MM_DENORMALS_ZERO_OFF;
 }
 
-// Load 128-bits of integer data from memory into dst using a non-temporal
-// memory hint. mem_addr must be aligned on a 16-byte boundary or a
-// general-protection exception may be generated.
-//
-//   dst[127:0] := MEM[mem_addr+127:mem_addr]
-//
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_load_si128
-FORCE_INLINE __m128i _mm_stream_load_si128(__m128i *p)
+// Count the number of bits set to 1 in unsigned 32-bit integer a, and
+// return that count in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_popcnt_u32
+FORCE_INLINE int _mm_popcnt_u32(unsigned int a)
 {
-#if __has_builtin(__builtin_nontemporal_store)
-    return __builtin_nontemporal_load(p);
+#if defined(__aarch64__) || defined(_M_ARM64)
+#if __has_builtin(__builtin_popcount)
+    return __builtin_popcount(a);
+#elif defined(_MSC_VER)
+    return _CountOneBits(a);
 #else
-    return vreinterpretq_m128i_s64(vld1q_s64((int64_t *) p));
+    return (int) vaddlv_u8(vcnt_u8(vcreate_u8((uint64_t) a)));
 #endif
-}
+#else
+    uint32_t count = 0;
+    uint8x8_t input_val, count8x8_val;
+    uint16x4_t count16x4_val;
+    uint32x2_t count32x2_val;
 
-// Cache line containing p is flushed and invalidated from all caches in the
-// coherency domain. :
-// https://msdn.microsoft.com/en-us/library/ba08y07y(v=vs.100).aspx
-FORCE_INLINE void _mm_clflush(void const *p)
-{
-    (void) p;
-    // no corollary for Neon?
-}
+    input_val = vld1_u8((uint8_t *) &a);
+    count8x8_val = vcnt_u8(input_val);
+    count16x4_val = vpaddl_u8(count8x8_val);
+    count32x2_val = vpaddl_u16(count16x4_val);
 
-/* conflicts with mm_malloc.h
-// Allocate aligned blocks of memory.
-// https://software.intel.com/en-us/
-//         cpp-compiler-developer-guide-and-reference-allocating-and-freeing-aligned-memory-blocks
-FORCE_INLINE void *_mm_malloc(size_t size, size_t align)
-{
-    void *ptr;
-    if (align == 1)
-        return malloc(size);
-    if (align == 2 || (sizeof(void *) == 8 && align == 4))
-        align = sizeof(void *);
-    if (!posix_memalign(&ptr, align, size))
-        return ptr;
-    return NULL;
+    vst1_u32(&count, count32x2_val);
+    return count;
+#endif
 }
 
-FORCE_INLINE void _mm_free(void *addr)
+// Count the number of bits set to 1 in unsigned 64-bit integer a, and
+// return that count in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_popcnt_u64
+FORCE_INLINE int64_t _mm_popcnt_u64(uint64_t a)
 {
-    free(addr);
+#if defined(__aarch64__) || defined(_M_ARM64)
+#if __has_builtin(__builtin_popcountll)
+    return __builtin_popcountll(a);
+#elif defined(_MSC_VER)
+    return _CountOneBits64(a);
+#else
+    return (int64_t) vaddlv_u8(vcnt_u8(vcreate_u8(a)));
+#endif
+#else
+    uint64_t count = 0;
+    uint8x8_t input_val, count8x8_val;
+    uint16x4_t count16x4_val;
+    uint32x2_t count32x2_val;
+    uint64x1_t count64x1_val;
+
+    input_val = vld1_u8((uint8_t *) &a);
+    count8x8_val = vcnt_u8(input_val);
+    count16x4_val = vpaddl_u8(count8x8_val);
+    count32x2_val = vpaddl_u16(count16x4_val);
+    count64x1_val = vpaddl_u32(count32x2_val);
+    vst1_u64(&count, count64x1_val);
+    return count;
+#endif
 }
-*/
 
-// Starting with the initial value in crc, accumulates a CRC32 value for
-// unsigned 8-bit integer v.
-// https://msdn.microsoft.com/en-us/library/bb514036(v=vs.100)
-FORCE_INLINE uint32_t _mm_crc32_u8(uint32_t crc, uint8_t v)
+FORCE_INLINE_OPTNONE void _sse2neon_mm_set_denormals_zero_mode(
+    unsigned int flag)
 {
-#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
-    __asm__ __volatile__("crc32cb %w[c], %w[c], %w[v]\n\t"
-                         : [c] "+r"(crc)
-                         : [v] "r"(v));
+    // AArch32 Advanced SIMD arithmetic always uses the Flush-to-zero setting,
+    // regardless of the value of the FZ bit.
+    union {
+        fpcr_bitfield field;
+#if defined(__aarch64__) || defined(_M_ARM64)
+        uint64_t value;
 #else
-    crc ^= v;
-    for (int bit = 0; bit < 8; bit++) {
-        if (crc & 1)
-            crc = (crc >> 1) ^ UINT32_C(0x82f63b78);
-        else
-            crc = (crc >> 1);
-    }
+        uint32_t value;
 #endif
-    return crc;
-}
+    } r;
 
-// Starting with the initial value in crc, accumulates a CRC32 value for
-// unsigned 16-bit integer v.
-// https://msdn.microsoft.com/en-us/library/bb531411(v=vs.100)
-FORCE_INLINE uint32_t _mm_crc32_u16(uint32_t crc, uint16_t v)
-{
-#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
-    __asm__ __volatile__("crc32ch %w[c], %w[c], %w[v]\n\t"
-                         : [c] "+r"(crc)
-                         : [v] "r"(v));
+#if defined(__aarch64__) || defined(_M_ARM64)
+    r.value = _sse2neon_get_fpcr();
 #else
-    crc = _mm_crc32_u8(crc, v & 0xff);
-    crc = _mm_crc32_u8(crc, (v >> 8) & 0xff);
+    __asm__ __volatile__("vmrs %0, FPSCR" : "=r"(r.value)); /* read */
 #endif
-    return crc;
-}
 
-// Starting with the initial value in crc, accumulates a CRC32 value for
-// unsigned 32-bit integer v.
-// https://msdn.microsoft.com/en-us/library/bb531394(v=vs.100)
-FORCE_INLINE uint32_t _mm_crc32_u32(uint32_t crc, uint32_t v)
-{
-#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
-    __asm__ __volatile__("crc32cw %w[c], %w[c], %w[v]\n\t"
-                         : [c] "+r"(crc)
-                         : [v] "r"(v));
+    r.field.bit24 = (flag & _MM_DENORMALS_ZERO_MASK) == _MM_DENORMALS_ZERO_ON;
+
+#if defined(__aarch64__) || defined(_M_ARM64)
+    _sse2neon_set_fpcr(r.value);
 #else
-    crc = _mm_crc32_u16(crc, v & 0xffff);
-    crc = _mm_crc32_u16(crc, (v >> 16) & 0xffff);
+    __asm__ __volatile__("vmsr FPSCR, %0" ::"r"(r));        /* write */
 #endif
-    return crc;
 }
 
-// Starting with the initial value in crc, accumulates a CRC32 value for
-// unsigned 64-bit integer v.
-// https://msdn.microsoft.com/en-us/library/bb514033(v=vs.100)
-FORCE_INLINE uint64_t _mm_crc32_u64(uint64_t crc, uint64_t v)
+// Return the current 64-bit value of the processor's time-stamp counter.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=rdtsc
+FORCE_INLINE uint64_t _rdtsc(void)
 {
-#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
-    __asm__ __volatile__("crc32cx %w[c], %w[c], %x[v]\n\t"
-                         : [c] "+r"(crc)
-                         : [v] "r"(v));
+#if defined(__aarch64__) || defined(_M_ARM64)
+    uint64_t val;
+
+    /* According to ARM DDI 0487F.c, from Armv8.0 to Armv8.5 inclusive, the
+     * system counter is at least 56 bits wide; from Armv8.6, the counter
+     * must be 64 bits wide.  So the system counter could be less than 64
+     * bits wide and it is attributed with the flag 'cap_user_time_short'
+     * is true.
+     */
+#if defined(_MSC_VER) && !defined(__clang__)
+    val = _ReadStatusReg(ARM64_SYSREG(3, 3, 14, 0, 2));
 #else
-    crc = _mm_crc32_u32((uint32_t)(crc), v & 0xffffffff);
-    crc = _mm_crc32_u32((uint32_t)(crc), (v >> 32) & 0xffffffff);
+    __asm__ __volatile__("mrs %0, cntvct_el0" : "=r"(val));
+#endif
+
+    return val;
+#else
+    uint32_t pmccntr, pmuseren, pmcntenset;
+    // Read the user mode Performance Monitoring Unit (PMU)
+    // User Enable Register (PMUSERENR) access permissions.
+    __asm__ __volatile__("mrc p15, 0, %0, c9, c14, 0" : "=r"(pmuseren));
+    if (pmuseren & 1) {  // Allows reading PMUSERENR for user mode code.
+        __asm__ __volatile__("mrc p15, 0, %0, c9, c12, 1" : "=r"(pmcntenset));
+        if (pmcntenset & 0x80000000UL) {  // Is it counting?
+            __asm__ __volatile__("mrc p15, 0, %0, c9, c13, 0" : "=r"(pmccntr));
+            // The counter is set up to count every 64th cycle
+            return (uint64_t) (pmccntr) << 6;
+        }
+    }
+
+    // Fallback to syscall as we can't enable PMUSERENR in user mode.
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+    return (uint64_t) (tv.tv_sec) * 1000000 + tv.tv_usec;
 #endif
-    return crc;
 }
 
 #if defined(__GNUC__) || defined(__clang__)
 #pragma pop_macro("ALIGN_STRUCT")
 #pragma pop_macro("FORCE_INLINE")
+#pragma pop_macro("FORCE_INLINE_OPTNONE")
 #endif
 
-#if defined(__GNUC__)
-//#pragma GCC pop_options
+#if defined(__GNUC__) && !defined(__clang__)
+#pragma GCC pop_options
 #endif
 
 #endif

From f9ad36bd013503ec08e4ffbc286a63df940216cb Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 15 Sep 2024 09:34:55 -0700
Subject: [PATCH 696/901] kram - builds - remove fastl, more comments on win
 CMake, add showBuildTImingSummary

---
 kramv/KramViewerMain.mm    |  6 +++---
 libkram/CMakeLists.txt     | 26 +++++++++++---------------
 libkram/kram/KramTimer.cpp |  4 ++--
 scripts/cibuild.sh         | 13 +++++++------
 4 files changed, 23 insertions(+), 26 deletions(-)

diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index f27ea5ad..092916cf 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -633,9 +633,9 @@ - (void)awakeFromNib
     // this is sandbox or root if not sandboxed
     // This is objC call...
     // This has to be in a .mm file to call
-    //std::string traceDir = [NSHomeDirectory() UTF8String];
-    //traceDir += "/Traces/";
-    //_data.setPerfDirectory(traceDir.c_str());
+    std::string traceDir = [NSHomeDirectory() UTF8String];
+    traceDir += "/traces/";
+    _data.setPerfDirectory(traceDir.c_str());
     
     // TODO: see if can only open this
     // KLOGI("Viewer", "AwakeFromNIB");
diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index cd09a449..a50a19a4 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -24,7 +24,6 @@ option(BCENC "Compile BCenc Encoder" ON)
 option(COMP "Compile Compressonator Encoder" ON)
 
 option(EASTL "Compile EASTL" OFF)
-option(FASTL "Compile FASTL" OFF)
 
 # convert ON to 1, UGH
 set(COMPILE_ATE 0)
@@ -67,13 +66,6 @@ if (EASTL)
     set(COMPILE_EASTL 1)
 endif()
 
-# replace parts of std/stdl with fastl
-set(COMPILE_FASTL 0)
-if (FASTL)
-    set(COMPILE_FASTL 1)
-endif()
-
-
 #-----------------------------------------------------
 # libkram
 
@@ -131,9 +123,6 @@ file(GLOB_RECURSE libSources CONFIGURE_DEPENDS
     "${SOURCE_DIR}/eastl/*.cpp"
     "${SOURCE_DIR}/eastl/*.h"
     
-    "${SOURCE_DIR}/fastl/*.cpp"
-    "${SOURCE_DIR}/fastl/*.h"
-
     "${SOURCE_DIR}/lodepng/lodepng.cpp"
     "${SOURCE_DIR}/lodepng/lodepng.h"
 
@@ -246,9 +235,17 @@ if (BUILD_MAC)
     
 elseif (BUILD_WIN)
     
+   
+    # TODO: turn on C++20
+    # TODO: switch to clang, then switch args, but clang-cl will take MSVC args
+
+    # this is MSVC setting, but needs to be here and on karmc and thumbnail
+    # /std:c++20
+    #
+    # this is setting clang-cl
+    # set(CMAKE_CXX_COMPILER "clang-cl")
+
     # TODO: switch to add_target_definitions
-    # TODO: turn on C++17
-    # TODO: switch to clang
 
     # to turn off exceptions/rtti use /GR and /EHsc replacement
     string(REGEX REPLACE "/GR" "/GR-" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
@@ -283,8 +280,7 @@ target_precompile_headers(${myTargetLib} PRIVATE
 target_compile_definitions(${myTargetLib} 
     PUBLIC
     "-DCOMPILE_EASTL=${COMPILE_EASTL}"
-    "-DCOMPILE_FASTL=${COMPILE_FASTL}"
-
+   
     PRIVATE
     "-DCOMPILE_ATE=${COMPILE_ATE}"
     "-DCOMPILE_BCENC=${COMPILE_BCENC}"
diff --git a/libkram/kram/KramTimer.cpp b/libkram/kram/KramTimer.cpp
index 8e43a71d..31f2e7c1 100644
--- a/libkram/kram/KramTimer.cpp
+++ b/libkram/kram/KramTimer.cpp
@@ -126,10 +126,10 @@ Perf::Perf()
 {
     // TODO: should set alongsize exe by default
 #if KRAM_WIN
-    setPerfDirectory("C:/Traces/");
+    setPerfDirectory("C:/traces/");
 #else
     // sandboxed apps won't reach this, but unsandboxed exe can
-    setPerfDirectory("/Users/Alec/devref/kram/tests/traces/");
+    setPerfDirectory("/Users/Alec/traces/");
 #endif
 }
 
diff --git a/scripts/cibuild.sh b/scripts/cibuild.sh
index bd233584..c4263c32 100755
--- a/scripts/cibuild.sh
+++ b/scripts/cibuild.sh
@@ -66,25 +66,26 @@ if [[ $buildType == macos ]]; then
 	# this dir already exists, so don't have to mkdir
 	pushd build2
 
+    xargs=-showBuildTimingSummary
 	# build libraries
 	# see here about destination arg
 	# https://github.com/appcelerator/titanium_mobile/pull/13098
     echo "::group::kram-ios"
-    xcodebuild build -sdk iphoneos -workspace kram.xcworkspace -scheme kram-ios -configuration Release -destination generic/platform=iOS CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
+    xcodebuild build -sdk iphoneos -workspace kram.xcworkspace -scheme kram-ios -configuration Release ${xargs} -destination generic/platform=iOS CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
     echo "::endgroup::"
  
     echo "::group::kram"
-    xcodebuild build -sdk macosx -workspace kram.xcworkspace -scheme kram -configuration Release -destination generic/platform=macOS CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
+    xcodebuild build -sdk macosx -workspace kram.xcworkspace -scheme kram -configuration Release ${xargs} -destination generic/platform=macOS CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
     echo "::endgroup::"
  
 	# install apps so they are signed
 	# can't specify empty INSTALL_PATH, or xcodebuild succeeds but copies nothing to bin
     echo "::group::kramc"
-    xcodebuild install -sdk macosx -workspace kram.xcworkspace -scheme kramc -configuration Release -destination generic/platform=macOS DSTROOT=${binHolderPath} INSTALL_PATH=bin
+    xcodebuild install -sdk macosx -workspace kram.xcworkspace -scheme kramc -configuration Release ${xargs} -destination generic/platform=macOS DSTROOT=${binHolderPath} INSTALL_PATH=bin
     echo "::endgroup::"
       
     echo "::group::kramv"
-	xcodebuild install -sdk macosx -workspace kram.xcworkspace -scheme kramv -configuration Release -destination generic/platform=macOS DSTROOT=${binHolderPath} INSTALL_PATH=bin
+	xcodebuild install -sdk macosx -workspace kram.xcworkspace -scheme kramv -configuration Release ${xargs} -destination generic/platform=macOS DSTROOT=${binHolderPath} INSTALL_PATH=bin
     echo "::endgroup::"
     
 	popd
@@ -92,14 +93,14 @@ if [[ $buildType == macos ]]; then
 	# build hlslparser to bin directory
 	pushd hlslparser
     echo "::group::hlsl-parser"
-    xcodebuild install -sdk macosx -project hlslparser.xcodeproj -configuration Release -destination generic/platform=macOS DSTROOT=${binHolderPath} INSTALL_PATH=bin
+    xcodebuild install -sdk macosx -project hlslparser.xcodeproj -configuration Release ${xargs} -destination generic/platform=macOS DSTROOT=${binHolderPath} INSTALL_PATH=bin
     echo "::endgroup::"
 	popd
 
     # build kram-profile to bin directory
     pushd kram-profile
     echo "::group::kram-profiler"
-    xcodebuild install -sdk macosx -project kram-profile.xcodeproj -configuration Release -destination generic/platform=macOS DSTROOT=${binHolderPath} INSTALL_PATH=bin
+    xcodebuild install -sdk macosx -project kram-profile.xcodeproj -configuration Release ${xargs} -destination generic/platform=macOS DSTROOT=${binHolderPath} INSTALL_PATH=bin
     echo "::endgroup::"
     popd
 

From 623641158459dfac74a54ad1ac14af23793f6ec2 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 15 Sep 2024 11:36:41 -0700
Subject: [PATCH 697/901] kram - update cmake, move to clang-cl at root

---
 CMakeLists.txt | 173 ++++++++++++++++++++++---------------------------
 1 file changed, 78 insertions(+), 95 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2d1e7762..a314873c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,32 +1,17 @@
-# for now don't require high cmake for TravisCI builds on Win, but need 3.19.2 for universal app builds
-if (APPLE)
-    cmake_minimum_required(VERSION 3.19.1 FATAL_ERROR)
-else()
-    cmake_minimum_required(VERSION 3.18.0 FATAL_ERROR)
-endif()
+cmake_minimum_required(VERSION 3.19.1 FATAL_ERROR)
 
 #-----------------------------------------------------
 
+# really not using cmake for mac, but this was used in the past so leaving it in
 set(BUILD_MAC FALSE)
 set(BUILD_WIN FALSE)
 
-if (APPLE)
-    message("build for macOS")
-    set(BUILD_MAC TRUE)
-elseif (WIN32)
-    message("build for win x64")
-    set(BUILD_WIN TRUE)
-endif()
-
 #-----------------------------------------------------
 
 # SYSROOT must be set before project,
 # SYSROOT is max OS, deployment is min OS on Apple.
 # If SYSROOT not set, then cmake uses min OS from deployment target.  Ugh.
 # so have to force SYSROOT to latest SDK.
-# Want to set 11.0 here, but Xcode 12.3 ships with 11.1, etc.
-# So then cmake breaks when it cannot find the C compiler, etc.
-# Setting macosx to take the latest greatest sdk.
 
 # don't change these to set_property(GLOBAL) or set_target_properties, the need to be set prior to project
 # and only seem to work if set() is used to force the global value.
@@ -38,85 +23,105 @@ set(CMAKE_CXX_STANDARD 20)
 set(CMAKE_CXX_STANDARD_REQUIRED YES)
 set(CMAKE_CXX_EXTENSIONS NO)
 
-# Xcode 12.2 ships with macosx11.0, but 12.3 ships with macosx11.1
-# cmake on 12.3 completely breaks when this is set and can't find c compilers.
-# set(CMAKE_OSX_SYSROOT macosx11.0)
-# set(CMAKE_OSX_SYSROOT macos)  # this doesn't work
-
-# CMAKE_OSX_DEPLOYMENT_TARGET must be set as a CACHE variable, or it will be stripped
-if (BUILD_MAC)
-    set(CMAKE_OSX_DEPLOYMENT_TARGET "11.0" CACHE STRING "Minimum macOS")
-    set(CMAKE_OSX_ARCHITECTURES "$(ARCHS_STANDARD)" CACHE STRING "Architecture macOS")
-endif()
-
 set(CMAKE_CONFIGURATION_TYPES "Debug;Release")
 set(CMAKE_BUILD_TYPE Release)
+set(CMAKE_DEFAULT_STARTUP_PROJECT "kramc")
 
-if (BUILD_MAC)
-    set(CMAKE_DEFAULT_STARTUP_PROJECT "kramc")
-elseif (BUILD_WIN)
-    set(CMAKE_DEFAULT_STARTUP_PROJECT "kramc")
-endif()
+#-----------------------------------------------------
+# still building Win using Cmake
 
+if (BUILD_WIN)
+    message("build for win x64")
+    set(BUILD_WIN TRUE)
 
-#-----------------------------------------------------
+    # use clang-cl on Win
+    set(CMAKE_CXX_COMPILER "clang-cl")
 
-# cmake translates project to sln in Win, but to xcode projects on Mac.
-# No way to make xcode workspaces, but could do manually.
-set(myTargetWorkspace kramWorkspace)
+    # cmake translates project to sln in Win, but to xcode projects on Mac.
+    # No way to make xcode workspaces, but could do manually.
+    set(myTargetWorkspace kramWorkspace)
 
-if (BUILD_MAC)
-    project(${myTargetWorkspace} LANGUAGES C CXX OBJCXX)
-elseif (BUILD_WIN)
     project(${myTargetWorkspace} LANGUAGES C CXX)
-endif()
+    
+    # the kram static library libkram which should build on iOS/Android/Mac/Win
+    # this doesn't set a project, but maybe it should
+    add_subdirectory(libkram)
+
+    # the CLI app for Mac/Win that can build content for other platforms, uses libkram
+    add_subdirectory(kramc)
+
+    # this is an Explorer thumbnail extension (run script to un/register), uses libkram
+    add_subdirectory(kram-thumb-win)
+    
+    # hack hlslparser for win build into kram for now, does not use kram
+    add_subdirectory(hlslparser)
 
-# the kram static library libkram which should build on iOS/Android/Mac/Win
-# this doesn't set a project, but maybe it should
-add_subdirectory(libkram)
+    #-----------------------------------------------------
 
-# the CLI app for Mac/Win that can build content for other platforms, uses libkram
-add_subdirectory(kramc)
+    # was considering platform-specific builds, but mac/win don't conflict
+    set(BIN_DIR ${PROJECT_SOURCE_DIR}/bin)
+
+    # install doesn't seem to do anything on WIN32, the build elements are not copied
+    install(TARGETS libkram ARCHIVE DESTINATION ${BIN_DIR})
+    
+    install(TARGETS kram RUNTIME DESTINATION ${BIN_DIR})
+    install(TARGETS kram-thumb-win LIBRARY DESTINATION ${BIN_DIR})
+    
+    # hlslparser is also now in the kram build.  Keep executables up to date.
+    # I would use the sln file, but msbuild doesn't like to be called from cibuld.sh
+    install(TARGETS hlslparser RUNTIME DESTINATION ${BIN_DIR})
+endif()
+
+#-----------------------------------------------------
+# This part is unmaintained.  Couldn't build app extensions via CMake.
+# So now just mapintain projects
 
 if (BUILD_MAC)
+    message("build for macOS universal (unmaintained)")
+    set(BUILD_MAC TRUE)
+    
+    # CMAKE_OSX_DEPLOYMENT_TARGET must be set as a CACHE variable, or it will be stripped
+    set(CMAKE_OSX_DEPLOYMENT_TARGET "13.0" CACHE STRING "Minimum macOS")
+    set(CMAKE_OSX_ARCHITECTURES "$(ARCHS_STANDARD)" CACHE STRING "Architecture macOS")
+    
+    # cmake translates project to sln in Win, but to xcode projects on Mac.
+    # No way to make xcode workspaces, but could do manually.
+    set(myTargetWorkspace kramWorkspace)
+
+    project(${myTargetWorkspace} LANGUAGES C CXX OBJCXX)
+
+    # the kram static library libkram which should build on iOS/Android/Mac/Win
+    # this doesn't set a project, but maybe it should
+    add_subdirectory(libkram)
+
+    # the CLI app for Mac/Win that can build content for other platforms, uses libkram
+    add_subdirectory(kramc)
+
     # the viewer is only written for macOS Intel/ARM currently, uses libkram
     add_subdirectory(kramv)
 
     # ps plugin that uses libkram
     add_subdirectory(plugin)
-endif()
 
-# this is an Explorer thumbnail extension (run script to un/register), uses libkrma
-if (BUILD_WIN)
-    add_subdirectory(kram-thumb-win)
-endif()
+    #-----------------------------------------------------
 
-# hack hlslparser for win build into kram for now, does not use kram
-if (BUILD_WIN)
-    add_subdirectory(hlslparser)
-endif()
+    # https://discourse.cmake.org/t/specifying-cmake-osx-sysroot-breaks-xcode-projects-but-no-other-choice/2532/8
+    # use snipet from Alian Martin to validate SDK
 
-#-----------------------------------------------------
-
-# https://discourse.cmake.org/t/specifying-cmake-osx-sysroot-breaks-xcode-projects-but-no-other-choice/2532/8
-# use snipet from Alian Martin to validate SDK
-
-if (BUILD_MAC)
     if(NOT DEFINED CMAKE_OSX_SYSROOT)
         message(FATAL_ERROR "Cannot check SDK version if CMAKE_OSX_SYSROOT is not defined."
     )
     endif()
 
     # check the Xcode app itself for it's version
-    set(XCODE_MIN_APP 12.2)
+    set(XCODE_MIN_SDK_MACOS 13.0)
+    set(XCODE_MIN_APP 13.0)
+    
     if(XCODE AND XCODE_VERSION VERSION_LESS XCODE_MIN_APP)
         message(FATAL_ERROR "This project requires at least Xcode ${XCODE_MIN_APP}")
     endif()
 
     # check the SDK
-    set(XCODE_MIN_SDK_IOS 14.1)
-    set(XCODE_MIN_SDK_MACOS 11.0)
-
     execute_process(
         COMMAND xcrun --sdk "${CMAKE_OSX_SYSROOT}" --show-sdk-version
         OUTPUT_VARIABLE SDK_VERSION
@@ -131,44 +136,22 @@ if (BUILD_MAC)
         message(FATAL_ERROR "This project requires at least macOS SDK ${XCODE_MIN_SDK_MACOS}"
     )
     endif()
-    
-endif()
-
-#-----------------------------------------------------
+  
+    #-----------------------------------------------------
 
-# was considering platform-specific builds, but mac/win don't conflict
-set(BIN_DIR ${PROJECT_SOURCE_DIR}/bin)
-    
-# So by default install depends on ALL_BUILD target, but that will fail if plugin
-# does not have everything setup to build (or like now is not building).
-# The plugin is currently setting EXCLUDE_FROM_ALL on the target so it's not built.
-# https://stackoverflow.com/questions/17164731/installing-only-one-target-and-its-dependencies-out-of-a-complex-project-with
-
-# install doesn't seem to do anything on WIN32, the build elements are not copied
-install(TARGETS libkram ARCHIVE DESTINATION ${BIN_DIR})
+    # was considering platform-specific builds, but mac/win don't conflict
+    set(BIN_DIR ${PROJECT_SOURCE_DIR}/bin)
 
-if (BUILD_MAC OR BUILD_WIN)
+    install(TARGETS libkram ARCHIVE DESTINATION ${BIN_DIR})
     install(TARGETS kram RUNTIME DESTINATION ${BIN_DIR})
-endif()
-
-if (BUILD_MAC)
 	install(TARGETS kramv BUNDLE DESTINATION ${BIN_DIR})
-endif()
-
-if (BUILD_WIN)
-    install(TARGETS kram-thumb-win LIBRARY DESTINATION ${BIN_DIR})
-endif()
+    install(TARGETS hlslparser RUNTIME DESTINATION ${BIN_DIR})
 
-# don't install this yet
-#if (BUILD_MAC)
+    # photoshop plugin
 #    install(TARGETS kram-ps BUNDLE DESTINATION ${BIN_DIR})
-#endif()
 
-# hlslparser is also now in the kram build.  Keep executables up to date.
-# I would use the sln file, but msbuild doesn't like to be called from cibuld.sh
-if (BUILD_WIN)
-    install(TARGETS hlslparser RUNTIME DESTINATION ${BIN_DIR})
 endif()
 
 
+

From d706b1eac67d7e5a5d5f123634d662765b487dfb Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 15 Sep 2024 11:53:00 -0700
Subject: [PATCH 698/901] kram - builds - cmake cleanup

---
 CMakeLists.txt | 90 ++++++++++++++++++--------------------------------
 1 file changed, 32 insertions(+), 58 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a314873c..a6ff3a8b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -8,41 +8,37 @@ set(BUILD_WIN FALSE)
 
 #-----------------------------------------------------
 
-# SYSROOT must be set before project,
-# SYSROOT is max OS, deployment is min OS on Apple.
-# If SYSROOT not set, then cmake uses min OS from deployment target.  Ugh.
-# so have to force SYSROOT to latest SDK.
-
-# don't change these to set_property(GLOBAL) or set_target_properties, the need to be set prior to project
-# and only seem to work if set() is used to force the global value.
-
 # suppress ZERO_CHECK project
 set(CMAKE_SUPPRESS_REGENERATION true)
 
-set(CMAKE_CXX_STANDARD 20)
-set(CMAKE_CXX_STANDARD_REQUIRED YES)
-set(CMAKE_CXX_EXTENSIONS NO)
-
-set(CMAKE_CONFIGURATION_TYPES "Debug;Release")
-set(CMAKE_BUILD_TYPE Release)
-set(CMAKE_DEFAULT_STARTUP_PROJECT "kramc")
-
 #-----------------------------------------------------
 # still building Win using Cmake
 
 if (BUILD_WIN)
     message("build for win x64")
     set(BUILD_WIN TRUE)
-
-    # use clang-cl on Win
-    set(CMAKE_CXX_COMPILER "clang-cl")
-
+    
     # cmake translates project to sln in Win, but to xcode projects on Mac.
     # No way to make xcode workspaces, but could do manually.
     set(myTargetWorkspace kramWorkspace)
 
     project(${myTargetWorkspace} LANGUAGES C CXX)
     
+    #-----------------------------------------------------
+
+    # use clang-cl on Win
+    set(CMAKE_CXX_COMPILER "clang-cl")
+
+    set(CMAKE_CXX_STANDARD 20)
+    set(CMAKE_CXX_STANDARD_REQUIRED YES)
+    set(CMAKE_CXX_EXTENSIONS NO)
+
+    set(CMAKE_CONFIGURATION_TYPES "Debug;Release")
+    set(CMAKE_BUILD_TYPE Release)
+    set(CMAKE_DEFAULT_STARTUP_PROJECT "kramc")
+
+    #-----------------------------------------------------
+
     # the kram static library libkram which should build on iOS/Android/Mac/Win
     # this doesn't set a project, but maybe it should
     add_subdirectory(libkram)
@@ -80,16 +76,28 @@ if (BUILD_MAC)
     message("build for macOS universal (unmaintained)")
     set(BUILD_MAC TRUE)
     
-    # CMAKE_OSX_DEPLOYMENT_TARGET must be set as a CACHE variable, or it will be stripped
-    set(CMAKE_OSX_DEPLOYMENT_TARGET "13.0" CACHE STRING "Minimum macOS")
-    set(CMAKE_OSX_ARCHITECTURES "$(ARCHS_STANDARD)" CACHE STRING "Architecture macOS")
-    
     # cmake translates project to sln in Win, but to xcode projects on Mac.
     # No way to make xcode workspaces, but could do manually.
     set(myTargetWorkspace kramWorkspace)
 
     project(${myTargetWorkspace} LANGUAGES C CXX OBJCXX)
 
+    #-----------------------------------------------------
+
+    # CMAKE_OSX_DEPLOYMENT_TARGET must be set as a CACHE variable, or it will be stripped
+    set(CMAKE_OSX_DEPLOYMENT_TARGET "13.0" CACHE STRING "Minimum macOS")
+    set(CMAKE_OSX_ARCHITECTURES "$(ARCHS_STANDARD)" CACHE STRING "Architecture macOS")
+   
+    set(CMAKE_CXX_STANDARD 20)
+    set(CMAKE_CXX_STANDARD_REQUIRED YES)
+    set(CMAKE_CXX_EXTENSIONS NO)
+
+    set(CMAKE_CONFIGURATION_TYPES "Debug;Release")
+    set(CMAKE_BUILD_TYPE Release)
+    set(CMAKE_DEFAULT_STARTUP_PROJECT "kramc")
+
+    #-----------------------------------------------------
+
     # the kram static library libkram which should build on iOS/Android/Mac/Win
     # this doesn't set a project, but maybe it should
     add_subdirectory(libkram)
@@ -102,40 +110,6 @@ if (BUILD_MAC)
 
     # ps plugin that uses libkram
     add_subdirectory(plugin)
-
-    #-----------------------------------------------------
-
-    # https://discourse.cmake.org/t/specifying-cmake-osx-sysroot-breaks-xcode-projects-but-no-other-choice/2532/8
-    # use snipet from Alian Martin to validate SDK
-
-    if(NOT DEFINED CMAKE_OSX_SYSROOT)
-        message(FATAL_ERROR "Cannot check SDK version if CMAKE_OSX_SYSROOT is not defined."
-    )
-    endif()
-
-    # check the Xcode app itself for it's version
-    set(XCODE_MIN_SDK_MACOS 13.0)
-    set(XCODE_MIN_APP 13.0)
-    
-    if(XCODE AND XCODE_VERSION VERSION_LESS XCODE_MIN_APP)
-        message(FATAL_ERROR "This project requires at least Xcode ${XCODE_MIN_APP}")
-    endif()
-
-    # check the SDK
-    execute_process(
-        COMMAND xcrun --sdk "${CMAKE_OSX_SYSROOT}" --show-sdk-version
-        OUTPUT_VARIABLE SDK_VERSION
-        OUTPUT_STRIP_TRAILING_WHITESPACE
-    )
-    
-    message("macOS SDK ${SDK_VERSION}")
-    message("macOS deploy ${CMAKE_OSX_DEPLOYMENT_TARGET}")
-    message("macOS arch ${CMAKE_OSX_ARCHITECTURES}")
-                    
-    if (SDK_VERSION VERSION_LESS XCODE_MIN_SDK_MACOS)
-        message(FATAL_ERROR "This project requires at least macOS SDK ${XCODE_MIN_SDK_MACOS}"
-    )
-    endif()
   
     #-----------------------------------------------------
 

From 09a4079bcb2222b121beb88644ac02ac10f53118 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 15 Sep 2024 11:56:48 -0700
Subject: [PATCH 699/901] kram - build - cmake fixes

---
 CMakeLists.txt | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a6ff3a8b..2351fb37 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -6,11 +6,6 @@ cmake_minimum_required(VERSION 3.19.1 FATAL_ERROR)
 set(BUILD_MAC FALSE)
 set(BUILD_WIN FALSE)
 
-#-----------------------------------------------------
-
-# suppress ZERO_CHECK project
-set(CMAKE_SUPPRESS_REGENERATION true)
-
 #-----------------------------------------------------
 # still building Win using Cmake
 
@@ -26,6 +21,9 @@ if (BUILD_WIN)
     
     #-----------------------------------------------------
 
+	# suppress ZERO_CHECK project
+    set(CMAKE_SUPPRESS_REGENERATION true)
+
     # use clang-cl on Win
     set(CMAKE_CXX_COMPILER "clang-cl")
 
@@ -84,6 +82,9 @@ if (BUILD_MAC)
 
     #-----------------------------------------------------
 
+    # suppress ZERO_CHECK project
+    set(CMAKE_SUPPRESS_REGENERATION true)
+
     # CMAKE_OSX_DEPLOYMENT_TARGET must be set as a CACHE variable, or it will be stripped
     set(CMAKE_OSX_DEPLOYMENT_TARGET "13.0" CACHE STRING "Minimum macOS")
     set(CMAKE_OSX_ARCHITECTURES "$(ARCHS_STANDARD)" CACHE STRING "Architecture macOS")

From 1c99adc3ae27807278fc630a855821f34e498c4c Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 15 Sep 2024 12:04:13 -0700
Subject: [PATCH 700/901] kram - build - cmake fixes

---
 CMakeLists.txt | 55 ++++++++++++++++++++------------------------------
 1 file changed, 22 insertions(+), 33 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2351fb37..f56457b1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,40 +1,45 @@
 cmake_minimum_required(VERSION 3.19.1 FATAL_ERROR)
-
+  
 #-----------------------------------------------------
 
 # really not using cmake for mac, but this was used in the past so leaving it in
 set(BUILD_MAC FALSE)
 set(BUILD_WIN FALSE)
 
+if (APPLE)
+    message("build for macOS")
+    set(BUILD_MAC TRUE)
+elseif (WIN32)
+    message("build for win x64")
+    set(BUILD_WIN TRUE)
+endif()
+
+#-----------------------------------------------------
+
+# suppress ZERO_CHECK project
+set(CMAKE_SUPPRESS_REGENERATION true)
+
+set(CMAKE_CXX_STANDARD 20)
+set(CMAKE_CXX_STANDARD_REQUIRED YES)
+set(CMAKE_CXX_EXTENSIONS NO)
+
+set(CMAKE_CONFIGURATION_TYPES "Debug;Release")
+set(CMAKE_BUILD_TYPE Release)
+set(CMAKE_DEFAULT_STARTUP_PROJECT "kramc")
+    
 #-----------------------------------------------------
 # still building Win using Cmake
 
 if (BUILD_WIN)
-    message("build for win x64")
-    set(BUILD_WIN TRUE)
-    
     # cmake translates project to sln in Win, but to xcode projects on Mac.
     # No way to make xcode workspaces, but could do manually.
     set(myTargetWorkspace kramWorkspace)
 
     project(${myTargetWorkspace} LANGUAGES C CXX)
     
-    #-----------------------------------------------------
-
-	# suppress ZERO_CHECK project
-    set(CMAKE_SUPPRESS_REGENERATION true)
-
     # use clang-cl on Win
     set(CMAKE_CXX_COMPILER "clang-cl")
 
-    set(CMAKE_CXX_STANDARD 20)
-    set(CMAKE_CXX_STANDARD_REQUIRED YES)
-    set(CMAKE_CXX_EXTENSIONS NO)
-
-    set(CMAKE_CONFIGURATION_TYPES "Debug;Release")
-    set(CMAKE_BUILD_TYPE Release)
-    set(CMAKE_DEFAULT_STARTUP_PROJECT "kramc")
-
     #-----------------------------------------------------
 
     # the kram static library libkram which should build on iOS/Android/Mac/Win
@@ -71,32 +76,16 @@ endif()
 # So now just mapintain projects
 
 if (BUILD_MAC)
-    message("build for macOS universal (unmaintained)")
-    set(BUILD_MAC TRUE)
-    
     # cmake translates project to sln in Win, but to xcode projects on Mac.
     # No way to make xcode workspaces, but could do manually.
     set(myTargetWorkspace kramWorkspace)
 
     project(${myTargetWorkspace} LANGUAGES C CXX OBJCXX)
 
-    #-----------------------------------------------------
-
-    # suppress ZERO_CHECK project
-    set(CMAKE_SUPPRESS_REGENERATION true)
-
     # CMAKE_OSX_DEPLOYMENT_TARGET must be set as a CACHE variable, or it will be stripped
     set(CMAKE_OSX_DEPLOYMENT_TARGET "13.0" CACHE STRING "Minimum macOS")
     set(CMAKE_OSX_ARCHITECTURES "$(ARCHS_STANDARD)" CACHE STRING "Architecture macOS")
    
-    set(CMAKE_CXX_STANDARD 20)
-    set(CMAKE_CXX_STANDARD_REQUIRED YES)
-    set(CMAKE_CXX_EXTENSIONS NO)
-
-    set(CMAKE_CONFIGURATION_TYPES "Debug;Release")
-    set(CMAKE_BUILD_TYPE Release)
-    set(CMAKE_DEFAULT_STARTUP_PROJECT "kramc")
-
     #-----------------------------------------------------
 
     # the kram static library libkram which should build on iOS/Android/Mac/Win

From 3e77ddd91aded22ef40d76260104be6551090e46 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 15 Sep 2024 12:09:39 -0700
Subject: [PATCH 701/901] kram - build - cmake fixes

---
 CMakeLists.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index f56457b1..853a4ab9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -37,7 +37,8 @@ if (BUILD_WIN)
 
     project(${myTargetWorkspace} LANGUAGES C CXX)
     
-    # use clang-cl on Win
+    # use clang-cl on Win, have to set both
+    set(CMAKE_C_COMPILER "clang-cl")
     set(CMAKE_CXX_COMPILER "clang-cl")
 
     #-----------------------------------------------------

From 2fe863fad621c263973bf5ed77060e51db790780 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 15 Sep 2024 12:15:24 -0700
Subject: [PATCH 702/901] kram - build - cmake fixes

I turned off override of the compilers.  This doesn't seem to be necessary.  But should validate if it's a clang or MSVC compile.
---
 CMakeLists.txt | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 853a4ab9..5e870d50 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -38,8 +38,10 @@ if (BUILD_WIN)
     project(${myTargetWorkspace} LANGUAGES C CXX)
     
     # use clang-cl on Win, have to set both
-    set(CMAKE_C_COMPILER "clang-cl")
-    set(CMAKE_CXX_COMPILER "clang-cl")
+    # this is suggested way to set, but github already reports clang 17.0.3 with VS extensions
+    #  as the default compiler
+    # set(CMAKE_C_COMPILER "clang-cl")
+    # set(CMAKE_CXX_COMPILER "clang-cl")
 
     #-----------------------------------------------------
 

From 2958da7540064206a05eaa2c331f659ab2887732 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 15 Sep 2024 12:46:15 -0700
Subject: [PATCH 703/901] kram - build - cmake fixes

---
 hlslparser/CMakeLists.txt            | 33 ++++++----------------------
 kram-thumb-win/KramThumbProvider.cpp |  8 +++----
 kramc/CMakeLists.txt                 | 32 +++++----------------------
 libkram/CMakeLists.txt               | 26 ----------------------
 4 files changed, 17 insertions(+), 82 deletions(-)

diff --git a/hlslparser/CMakeLists.txt b/hlslparser/CMakeLists.txt
index 4e836fa1..b1c37785 100644
--- a/hlslparser/CMakeLists.txt
+++ b/hlslparser/CMakeLists.txt
@@ -1,32 +1,9 @@
-#cmake_minimum_required(VERSION 3.19.1 FATAL_ERROR)
-
-#-----------------------------------------------------
-
-set(BUILD_MAC FALSE)
-set(BUILD_WIN FALSE)
-
-if (APPLE)
-    message("build for macOS")
-    set(BUILD_MAC TRUE)
-elseif (WIN32)
-    message("build for win x64")
-    set(BUILD_WIN TRUE)
-endif()
-
 #-----------------------------------------------------
 # hlslparser 
 
 # now setup the app project
 set(myTargetApp hlslparser)
-
-# not using version in cmake anymore, this is pulled in by KramVersion.h
-if (BUILD_MAC)
-    project(${myTargetApp} LANGUAGES C CXX OBJCXX)
-elseif (BUILD_WIN)
-    project(${myTargetApp} LANGUAGES C CXX)
-endif()
-
-# **** this the executable target ****, for a CLI App
+project(${myTargetApp})
 add_executable(${myTargetApp})
 
 #-----------------------------------------------------
@@ -64,14 +41,18 @@ if (BUILD_MAC)
     
         #-------------------------
         # for now disable signing, and just "sign to run locally"
-        XCODE_ATTRIBUTE_PRODUCT_BUNDLE_IDENTIFIER "com.ba.hlslparser"
+        XCODE_ATTRIBUTE_PRODUCT_BUNDLE_IDENTIFIER "com.hialec.hlslparser"
         XCODE_ATTRIBUTE_CODE_SIGNING_REQUIRED "NO"
         XCODE_ATTRIBUTE_CODE_SIGN_IDENTITY ""
     )
 
     target_compile_options(${myTargetApp} PRIVATE -W -Wall)
 
-elseif (BUILD_WIN)
+endif()
+
+#-----------------------------------------------------
+    
+if (BUILD_WIN)
     # When Win rebuilds library, it doesn't relink app to correct code when you
     # build the app target project.  Breakpoints stop working after any library source edit,
     # and you have to rebuild solution to get the correct code to exectute.  Since 2014.  Try this.
diff --git a/kram-thumb-win/KramThumbProvider.cpp b/kram-thumb-win/KramThumbProvider.cpp
index 30b31894..d483c6d5 100644
--- a/kram-thumb-win/KramThumbProvider.cpp
+++ b/kram-thumb-win/KramThumbProvider.cpp
@@ -1,4 +1,4 @@
-﻿#include "KramLib.h"
+#include "KramLib.h"
 
 #include <shlwapi.h>
 #include <thumbcache.h> // For IThumbnailProvider.
@@ -36,10 +36,10 @@ inline void* KLOGF(uint32_t code, const char* format, ...)
 
     // Console prints this as <private>, so what's the point of producing a localizedString ?
     // This doesn't seem to work to Console app, but maybe if logs are to terminal
-    // sudo log config --mode "level:debug" --subsystem com.ba.kramv
+    // sudo log config --mode "level:debug" --subsystem com.hialec.kramv
 
     //NSString* errorText = [NSString stringWithUTF8String:str.c_str()];
-    // return [NSError errorWithDomain:@"com.ba.kramv" code:code userInfo:@{NSLocalizedDescriptionKey : errorText}];
+    // return [NSError errorWithDomain:@"com.hialec.kramv" code:code userInfo:@{NSLocalizedDescriptionKey : errorText}];
     return nullptr;
 }
 
@@ -357,4 +357,4 @@ HRESULT KramThumbProvider_CreateInstance(REFIID riid, void** ppv) {
         provider->Release();
     }
     return hr;
-}
\ No newline at end of file
+}
diff --git a/kramc/CMakeLists.txt b/kramc/CMakeLists.txt
index a592301f..b5df5fde 100644
--- a/kramc/CMakeLists.txt
+++ b/kramc/CMakeLists.txt
@@ -1,32 +1,9 @@
-#cmake_minimum_required(VERSION 3.19.1 FATAL_ERROR)
-
-#-----------------------------------------------------
-
-set(BUILD_MAC FALSE)
-set(BUILD_WIN FALSE)
-
-if (APPLE)
-    message("build for macOS")
-    set(BUILD_MAC TRUE)
-elseif (WIN32)
-    message("build for win x64")
-    set(BUILD_WIN TRUE)
-endif()
-
 #-----------------------------------------------------
 # kram 
 
 # now setup the app project, and link to libkram
 set(myTargetApp kram)
-
-# not using version in cmake anymore, this is pulled in by KramVersion.h
-if (BUILD_MAC)
-    project(${myTargetApp} LANGUAGES C CXX OBJCXX)
-elseif (BUILD_WIN)
-    project(${myTargetApp} LANGUAGES C CXX)
-endif()
-
-# **** this the executable target ****, for a CLI App
+project(${myTargetApp})
 add_executable(${myTargetApp})
 
 #-----------------------------------------------------
@@ -67,14 +44,17 @@ if (BUILD_MAC)
     
         #-------------------------
         # for now disable signing, and just "sign to run locally"
-        XCODE_ATTRIBUTE_PRODUCT_BUNDLE_IDENTIFIER "com.ba.kram"
+        XCODE_ATTRIBUTE_PRODUCT_BUNDLE_IDENTIFIER "com.hialec.kram"
         XCODE_ATTRIBUTE_CODE_SIGNING_REQUIRED "NO"
         XCODE_ATTRIBUTE_CODE_SIGN_IDENTITY ""
     )
 
     target_compile_options(${myTargetApp} PRIVATE -W -Wall)
+endif()
+
+#-----------------------------------------------------
 
-elseif (BUILD_WIN)
+if (BUILD_WIN)
     target_link_libraries(${myTargetApp} libkram)
 
     # When Win rebuilds library, it doesn't relink app to correct code when you
diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index a50a19a4..34b1bdc4 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -1,18 +1,3 @@
-#cmake_minimum_required(VERSION 3.19.1 FATAL_ERROR)
-
-#-----------------------------------------------------
-
-set(BUILD_MAC FALSE)
-set(BUILD_WIN FALSE)
-
-if (APPLE)
-    message("build for macOS")
-    set(BUILD_MAC TRUE)
-elseif (WIN32)
-    message("build for win x64")
-    set(BUILD_WIN TRUE)
-endif()
-
 #-----------------------------------------------------
 
 # TODO: hook these up to added code below, and pass to as command line settings
@@ -234,17 +219,6 @@ if (BUILD_MAC)
     target_compile_options(${myTargetLib} PUBLIC -include KramConfig.h)
     
 elseif (BUILD_WIN)
-    
-   
-    # TODO: turn on C++20
-    # TODO: switch to clang, then switch args, but clang-cl will take MSVC args
-
-    # this is MSVC setting, but needs to be here and on karmc and thumbnail
-    # /std:c++20
-    #
-    # this is setting clang-cl
-    # set(CMAKE_CXX_COMPILER "clang-cl")
-
     # TODO: switch to add_target_definitions
 
     # to turn off exceptions/rtti use /GR and /EHsc replacement

From 1f206b99d298c0e087f82aad4dc0581a74e62db0 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 15 Sep 2024 12:49:02 -0700
Subject: [PATCH 704/901] kram - build - cmake fixes

---
 CMakeLists.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5e870d50..06b99729 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -71,7 +71,8 @@ if (BUILD_WIN)
     
     # hlslparser is also now in the kram build.  Keep executables up to date.
     # I would use the sln file, but msbuild doesn't like to be called from cibuld.sh
-    install(TARGETS hlslparser RUNTIME DESTINATION ${BIN_DIR})
+    # This builds but has a lot of warnings.  When I resume work, will re-instate.
+    # install(TARGETS hlslparser RUNTIME DESTINATION ${BIN_DIR})
 endif()
 
 #-----------------------------------------------------

From 04788005de5a1e104de71ea611210ee82b23e549 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 15 Sep 2024 14:02:13 -0700
Subject: [PATCH 705/901] kram - build - cmake fixes

---
 CMakeLists.txt            | 14 +++++++++---
 hlslparser/CMakeLists.txt | 14 +++++-------
 kramc/CMakeLists.txt      |  9 ++++----
 libkram/CMakeLists.txt    | 48 +++++++++++++++++++++------------------
 4 files changed, 47 insertions(+), 38 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 06b99729..a5014f54 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,6 +3,7 @@ cmake_minimum_required(VERSION 3.19.1 FATAL_ERROR)
 #-----------------------------------------------------
 
 # really not using cmake for mac, but this was used in the past so leaving it in
+# still building Win using Cmake.  macOS uses avx2 and Win uses avx.
 set(BUILD_MAC FALSE)
 set(BUILD_WIN FALSE)
 
@@ -28,8 +29,6 @@ set(CMAKE_BUILD_TYPE Release)
 set(CMAKE_DEFAULT_STARTUP_PROJECT "kramc")
     
 #-----------------------------------------------------
-# still building Win using Cmake
-
 if (BUILD_WIN)
     # cmake translates project to sln in Win, but to xcode projects on Mac.
     # No way to make xcode workspaces, but could do manually.
@@ -43,6 +42,13 @@ if (BUILD_WIN)
     # set(CMAKE_C_COMPILER "clang-cl")
     # set(CMAKE_CXX_COMPILER "clang-cl")
 
+    # want to only use clang across all platforms
+    if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+        message(STATUS "Using Clang compiler")
+    else
+        message(STATUS "Using MSVC compiler")
+    endif()
+    
     #-----------------------------------------------------
 
     # the kram static library libkram which should build on iOS/Android/Mac/Win
@@ -65,7 +71,6 @@ if (BUILD_WIN)
 
     # install doesn't seem to do anything on WIN32, the build elements are not copied
     install(TARGETS libkram ARCHIVE DESTINATION ${BIN_DIR})
-    
     install(TARGETS kram RUNTIME DESTINATION ${BIN_DIR})
     install(TARGETS kram-thumb-win LIBRARY DESTINATION ${BIN_DIR})
     
@@ -105,6 +110,9 @@ if (BUILD_MAC)
     # ps plugin that uses libkram
     add_subdirectory(plugin)
   
+    # hlslparser needs some more work to modernize to a C++ style HLSL syntax
+    add_subdirectory(hlslparser)
+
     #-----------------------------------------------------
 
     # was considering platform-specific builds, but mac/win don't conflict
diff --git a/hlslparser/CMakeLists.txt b/hlslparser/CMakeLists.txt
index b1c37785..958b4ea7 100644
--- a/hlslparser/CMakeLists.txt
+++ b/hlslparser/CMakeLists.txt
@@ -10,15 +10,9 @@ add_executable(${myTargetApp})
     
 if (BUILD_MAC)
     set_target_properties(${myTargetApp} PROPERTIES
-         # Note: match this up with CXX version
-        # c++11 min
         XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD "c++20"
         XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++"
-
-        # removed this in case run on Rosetta.  Not sure if this is Intel/Arm64 build.
-        # this is Intel specific, and no settings for Neon
-        # avx1
-        #XCODE_ATTRIBUTE_CLANG_X86_VECTOR_INSTRUCTIONS "avx"
+        XCODE_ATTRIBUTE_CLANG_X86_VECTOR_INSTRUCTIONS "avx2"
         
         # turn off exceptions/rtti
         XCODE_ATTRIBUTE_GCC_ENABLE_CPP_EXCEPTIONS NO
@@ -33,6 +27,9 @@ if (BUILD_MAC)
         XCODE_ATTRIBUTE_DEBUG_INFORMATION_FORMAT "dwarf-with-dsym"
         XCODE_ATTRIBUTE_ONLY_ACTIVE_ARCH "NO"
     
+        #-------------------------
+        # app specific settings
+        
         # this drops app from 762KB to 174KB with only ATE enabled
         # note about needing -gfull instead of -gused here or debug info messed up:
         # https://gist.github.com/tkersey/39b4fe69e14b859889ffadccb009e397
@@ -69,7 +66,8 @@ if (BUILD_WIN)
 
     # don't need force with apps, since they only access kram folder files which include KramConfig
     
-    # all warnings, AVX1, and multiprocess compiles
+    # all warnings, AVX, and multiprocess compiles,
+    # eliminate duplicate strings, embed full path
     target_compile_options(${myTargetApp} PRIVATE /W3 /arch:AVX -mf16c /MP /GF /FC)
     
     # fix STL (don't use -D here, will remove)
diff --git a/kramc/CMakeLists.txt b/kramc/CMakeLists.txt
index b5df5fde..17df4669 100644
--- a/kramc/CMakeLists.txt
+++ b/kramc/CMakeLists.txt
@@ -15,13 +15,9 @@ if (BUILD_MAC)
         libkram)
 
     set_target_properties(${myTargetApp} PROPERTIES
-         # Note: match this up with CXX version
-        # c++11 min
         XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD "c++20"
         XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++"
-
-        # avx1
-        XCODE_ATTRIBUTE_CLANG_X86_VECTOR_INSTRUCTIONS "avx"
+        XCODE_ATTRIBUTE_CLANG_X86_VECTOR_INSTRUCTIONS "avx2"
         
         # turn off exceptions/rtti
         XCODE_ATTRIBUTE_GCC_ENABLE_CPP_EXCEPTIONS NO
@@ -36,6 +32,9 @@ if (BUILD_MAC)
         XCODE_ATTRIBUTE_DEBUG_INFORMATION_FORMAT "dwarf-with-dsym"
         XCODE_ATTRIBUTE_ONLY_ACTIVE_ARCH "NO"
     
+        #-------------------------
+        # app specific settings
+       
         # this drops app from 762KB to 174KB with only ATE enabled
         # note about needing -gfull instead of -gused here or debug info messed up:
         # https://gist.github.com/tkersey/39b4fe69e14b859889ffadccb009e397
diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index 34b1bdc4..1e07a7e0 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -191,15 +191,13 @@ target_include_directories(${myTargetLib} PRIVATE
 # only add sources to the library
 target_sources(${myTargetLib} PRIVATE ${libSources})
 
+#-----------------------------------------------------
+
 # note: mac build is all done via Xcode workspace/project now, this cmake build is legacy
 if (BUILD_MAC)
     set_target_properties(${myTargetLib} PROPERTIES
-        # Note: match this up with CXX version
-        # c++11 min
         XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD "c++20"
         XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++"
-
-        # avx1 (ignored by universal?)
         XCODE_ATTRIBUTE_CLANG_X86_VECTOR_INSTRUCTIONS "avx2"
         
         # turn off exceptions/rtti
@@ -208,17 +206,24 @@ if (BUILD_MAC)
         
         # can't believe this isn't on by default in CMAKE
         XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC YES
+        
+        #-------------------------
+
+        # set debug style for apps
+        XCODE_ATTRIBUTE_DEBUG_INFORMATION_FORMAT "dwarf-with-dsym"
+        XCODE_ATTRIBUTE_ONLY_ACTIVE_ARCH "NO"
     )
 
     # Enable all warnings, and also enable f16c sims op
     target_compile_options(${myTargetLib} PRIVATE -W -Wall -mf16c)
 
-    # TODO: switch to pch setup (KramConfig.pch)
     # this is already done by pch for libkram, but other projects need the force include inherited
     # force include (public)
     target_compile_options(${myTargetLib} PUBLIC -include KramConfig.h)
     
-elseif (BUILD_WIN)
+endif()
+
+if (BUILD_WIN)
     # TODO: switch to add_target_definitions
 
     # to turn off exceptions/rtti use /GR and /EHsc replacement
@@ -229,8 +234,9 @@ elseif (BUILD_WIN)
     # force include (public)
     target_compile_options(${myTargetLib} PUBLIC /FIKramConfig.h)
        
-    # all warnings, AVX, and multiprocess compiles, clang needs mf16c or -mavx2
-    target_compile_options(${myTargetLib} PRIVATE  /W3 /arch:AVX2 -mf16c /MP)
+    # all warnings, AVX, and multiprocess compiles,
+    # eliminate duplicate strings, embed full path
+    target_compile_options(${myTargetLib} PRIVATE  /W3 /arch:AVX -mf16c /MP /GF /FC)
     
     # fix STL (don't use -D here, will remove)
     target_compile_definitions(${myTargetLib} PRIVATE _ITERATOR_DEBUG_LEVEL=0 _HAS_EXCEPTIONS=0)
@@ -239,9 +245,9 @@ endif()
 
 # turn on dead-code stripping in release.  Don't set this in debug.
 # does this make sense on lib, or just on apps ?
-if (CMAKE_BUILD_TYPE STREQUAL "RELEASE") 
-    target_compile_options(${myTargetLib} PUBLIC -dead_strip)
-endif()
+#if (CMAKE_BUILD_TYPE STREQUAL "RELEASE")
+#    target_compile_options(${myTargetLib} PUBLIC -dead_strip)
+#endif()
 
 # This will be force include (-include, /FI) on GCC/clang/VS.
 # Can't seem to ref KramPrefix.pch file.  Goes into cmake_pch.hxx file
@@ -249,18 +255,16 @@ target_precompile_headers(${myTargetLib} PRIVATE
     ${SOURCE_DIR}/kram/KramPrefix.h
 )
 
-# public
-# TODO: these should not be in quotes?
-target_compile_definitions(${myTargetLib} 
+# These turns into -D, don't add to the lines
+target_compile_definitions(${myTargetLib}
     PUBLIC
-    "-DCOMPILE_EASTL=${COMPILE_EASTL}"
+    COMPILE_EASTL=${COMPILE_EASTL
    
     PRIVATE
-    "-DCOMPILE_ATE=${COMPILE_ATE}"
-    "-DCOMPILE_BCENC=${COMPILE_BCENC}"
-    "-DCOMPILE_ETCENC=${COMPILE_ETCENC}"
-    "-DCOMPILE_SQUISH=${COMPILE_SQUISH}"
-    "-DCOMPILE_ASTCENC=${COMPILE_ASTCENC}"
-    "-DCOMPILE_COMP=${COMPILE_COMP}"
-   
+    COMPILE_ATE=${COMPILE_ATE}
+    COMPILE_BCENC=${COMPILE_BCENC}
+    COMPILE_ETCENC=${COMPILE_ETCENC}
+    COMPILE_SQUISH=${COMPILE_SQUISH}
+    COMPILE_ASTCENC=${COMPILE_ASTCENC}
+    COMPILE_COMP=${COMPILE_COMP}
 )

From 78b6b4102926ddb5341229e0ec0a29988cde8ebf Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 15 Sep 2024 14:13:32 -0700
Subject: [PATCH 706/901] kram - build - cmake fixes

---
 CMakeLists.txt | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a5014f54..bfa68d31 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -8,10 +8,10 @@ set(BUILD_MAC FALSE)
 set(BUILD_WIN FALSE)
 
 if (APPLE)
-    message("build for macOS")
+    message(STATUS "build for macOS")
     set(BUILD_MAC TRUE)
 elseif (WIN32)
-    message("build for win x64")
+    message(STATUS "build for win x64")
     set(BUILD_WIN TRUE)
 endif()
 
@@ -43,11 +43,7 @@ if (BUILD_WIN)
     # set(CMAKE_CXX_COMPILER "clang-cl")
 
     # want to only use clang across all platforms
-    if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
-        message(STATUS "Using Clang compiler")
-    else
-        message(STATUS "Using MSVC compiler")
-    endif()
+    message(STATUS "Using ${CMAKE_CXX_COMPILER_ID} compiler")
     
     #-----------------------------------------------------
 

From 31aaabcad8216eef4187b5f6970e9c30b73ac579 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 15 Sep 2024 14:18:05 -0700
Subject: [PATCH 707/901] kram - build - cmake fixes

---
 libkram/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index 1e07a7e0..c1789308 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -258,7 +258,7 @@ target_precompile_headers(${myTargetLib} PRIVATE
 # These turns into -D, don't add to the lines
 target_compile_definitions(${myTargetLib}
     PUBLIC
-    COMPILE_EASTL=${COMPILE_EASTL
+    COMPILE_EASTL=${COMPILE_EASTL}
    
     PRIVATE
     COMPILE_ATE=${COMPILE_ATE}

From 62168c3d55dd24684f9187faaa0d334193e405e8 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 15 Sep 2024 14:36:47 -0700
Subject: [PATCH 708/901] kram - build - cmake fixes and win warning fixes

---
 kramc/CMakeLists.txt        | 2 +-
 libkram/kram/TaskSystem.cpp | 2 ++
 libkram/kram/win_mmap.h     | 7 +++----
 libkram/zstd/zstd.cpp       | 2 ++
 4 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/kramc/CMakeLists.txt b/kramc/CMakeLists.txt
index 17df4669..2887ebbe 100644
--- a/kramc/CMakeLists.txt
+++ b/kramc/CMakeLists.txt
@@ -73,7 +73,7 @@ if (BUILD_WIN)
     # don't need force with apps, since they only access kram folder files which include KramConfig
     
     # all warnings, AVX1, and multiprocess compiles
-    target_compile_options(${myTargetApp} PRIVATE /W3 /arch:AVX -f16c /MP /GF /FC)
+    target_compile_options(${myTargetApp} PRIVATE /W3 /arch:AVX -mf16c /MP /GF /FC)
     
     # fix STL
     target_compile_definitions(${myTargetApp} PRIVATE -D_ITERATOR_DEBUG_LEVEL=0 -D_HAS_EXCEPTIONS=0)
diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index 13d2d181..29dd922c 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -150,6 +150,7 @@ static const CoreInfo& GetCoreInfo()
     vector<uint8_t> buffer;
     buffer.resize(returnLength);
     rc = GetLogicalProcessorInformation((ProcInfoPtr)buffer.data(), &returnLength);
+    (void)rc; // unused
     
     ProcInfoPtr ptr = nullptr;
     DWORD byteOffset = 0;
@@ -207,6 +208,7 @@ static const CoreInfo& GetCoreInfo()
         ptr++;
     }
     
+    (void)logicalCoreCount; // unused
     
     coreInfo.isHyperthreaded = isHyperthreaded;
     coreInfo.physicalCoreCount = physicalCoreCount;
diff --git a/libkram/kram/win_mmap.h b/libkram/kram/win_mmap.h
index fc8cd26c..4b6e5578 100644
--- a/libkram/kram/win_mmap.h
+++ b/libkram/kram/win_mmap.h
@@ -53,7 +53,7 @@ static void *mmap(void *start, size_t length, int prot, int flags, int fd, myoff
     } else if (flags & MAP_ANON)
         return MAP_FAILED;
 
-    DWORD flProtect;
+    DWORD flProtect = PAGE_READONLY;
     if (prot & PROT_WRITE) {
         if (prot & PROT_EXEC)
             flProtect = PAGE_EXECUTE_READWRITE;
@@ -64,10 +64,9 @@ static void *mmap(void *start, size_t length, int prot, int flags, int fd, myoff
             flProtect = PAGE_EXECUTE_READ;
         else if (prot & PROT_EXEC)
             flProtect = PAGE_EXECUTE;
-    } else
-        flProtect = PAGE_READONLY;
+    } 
 
-    off_t end = length + offset;
+    myoff_t end = length + offset;
     HANDLE mmap_fd, h;
     if (fd == -1)
         mmap_fd = INVALID_HANDLE_VALUE;
diff --git a/libkram/zstd/zstd.cpp b/libkram/zstd/zstd.cpp
index 35fe8903..46f6cf13 100644
--- a/libkram/zstd/zstd.cpp
+++ b/libkram/zstd/zstd.cpp
@@ -37082,6 +37082,8 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
 /*- Compiler specifics -*/
 #ifdef __clang__
 #pragma clang diagnostic ignored "-Wshorten-64-to-32"
+#pragma clang diagnostic ignored "-Wunused-function"
+//#pragma clang diagnostic ignored "-Wunused-variable"
 #endif
 
 #if defined(_MSC_VER)

From d62edd973ed453139eb337651f071391107b6cab Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 15 Sep 2024 14:46:43 -0700
Subject: [PATCH 709/901] kram - build - cmake fixes

---
 CMakeLists.txt        |  2 +-
 libkram/zstd/zstd.cpp | 23 ++++++++++++++++++-----
 2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index bfa68d31..af3551e6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -58,7 +58,7 @@ if (BUILD_WIN)
     add_subdirectory(kram-thumb-win)
     
     # hack hlslparser for win build into kram for now, does not use kram
-    add_subdirectory(hlslparser)
+    # add_subdirectory(hlslparser)
 
     #-----------------------------------------------------
 
diff --git a/libkram/zstd/zstd.cpp b/libkram/zstd/zstd.cpp
index 46f6cf13..3b3c91f8 100644
--- a/libkram/zstd/zstd.cpp
+++ b/libkram/zstd/zstd.cpp
@@ -30,6 +30,14 @@
  *
  * Note: multithreading is enabled for all platforms apart from Emscripten.
  */
+
+/*- Compiler specifics -*/
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wshorten-64-to-32"
+#pragma clang diagnostic ignored "-Wunused-function"
+#endif
+
 #define DEBUGLEVEL 0
 #define MEM_MODULE
 #undef  XXH_NAMESPACE
@@ -37080,11 +37088,11 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
  */
 
 /*- Compiler specifics -*/
-#ifdef __clang__
-#pragma clang diagnostic ignored "-Wshorten-64-to-32"
-#pragma clang diagnostic ignored "-Wunused-function"
-//#pragma clang diagnostic ignored "-Wunused-variable"
-#endif
+//#ifdef __clang__
+//#pragma clang diagnostic push
+//#pragma clang diagnostic ignored "-Wshorten-64-to-32"
+//#pragma clang diagnostic ignored "-Wunused-function"
+//#endif
 
 #if defined(_MSC_VER)
 #  pragma warning(disable : 4244)
@@ -40934,3 +40942,8 @@ size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize
                                                      params);
 }
 /**** ended inlining dictBuilder/zdict.c ****/
+
+/*- Compiler specifics -*/
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif

From 16170b3d5dc68eb630f2a4a87793d8f61ac29de1 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 15 Sep 2024 15:04:19 -0700
Subject: [PATCH 710/901] kram - warnings - fix issues in allocator and bch6h
 encoder

---
 libkram/compressonator/bc6h/bc6h_encode.cpp | 5 ++++-
 libkram/kram/BlockedLinearAllocator.cpp     | 4 +++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/libkram/compressonator/bc6h/bc6h_encode.cpp b/libkram/compressonator/bc6h/bc6h_encode.cpp
index 97fd41cd..c4043025 100644
--- a/libkram/compressonator/bc6h/bc6h_encode.cpp
+++ b/libkram/compressonator/bc6h/bc6h_encode.cpp
@@ -473,7 +473,10 @@ void BC6HBlockEncoder::QuantizeEndPointToF16Prec(float EndPoints[MAX_SUBSETS][MA
     so that indices at fix up points have higher order bit set to 0
 ==================================================================*/
 
-void BC6HBlockEncoder::SwapIndices(int iEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], int iIndices[3][BC6H_MAX_SUBSET_SIZE], int  entryCount[BC6H_MAX_SUBSETS], int max_subsets, int mode, int shape_pattern) {
+void BC6HBlockEncoder::SwapIndices(int iEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], int iIndices[MAX_SUBSETS][BC6H_MAX_SUBSET_SIZE],
+  // int  entryCount[BC6H_MAX_SUBSETS], // this is 2 but callers pass array[MAX_SUBSETS]
+  int  entryCount[MAX_SUBSETS], // to keep compiler happy
+  int max_subsets, int mode, int shape_pattern) {
 
     unsigned int uNumIndices    = 1 << ModePartition[mode].IndexPrec;
     unsigned int uHighIndexBit    = uNumIndices >> 1;
diff --git a/libkram/kram/BlockedLinearAllocator.cpp b/libkram/kram/BlockedLinearAllocator.cpp
index a58df33d..3d920966 100644
--- a/libkram/kram/BlockedLinearAllocator.cpp
+++ b/libkram/kram/BlockedLinearAllocator.cpp
@@ -9,7 +9,9 @@ namespace kram {
 using namespace NAMESPACE_STL;
 
 BlockedLinearAllocator::BlockedLinearAllocator(uint32_t itemsPerBlock, uint32_t itemSize)
-: _itemsPerBlock(itemsPerBlock), _itemSize(itemSize), _blockSize(itemsPerBlock*itemSize)
+: _itemSize(itemSize),
+  _itemsPerBlock(itemsPerBlock),
+  _blockSize(itemsPerBlock*itemSize)
 {
     
 }

From d2030cc1559997c467357e19d019fdd575a7d44f Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 15 Sep 2024 15:33:18 -0700
Subject: [PATCH 711/901] kram - build - cmake fixes

---
 .github/workflows/pre-release.yml           | 2 +-
 libkram/CMakeLists.txt                      | 2 +-
 libkram/compressonator/bc6h/bc6h_encode.cpp | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/pre-release.yml b/.github/workflows/pre-release.yml
index 6e23a987..76813f47 100644
--- a/.github/workflows/pre-release.yml
+++ b/.github/workflows/pre-release.yml
@@ -21,7 +21,7 @@ jobs:
         uses: lukka/get-cmake@latest
 
       - name: Check out code
-        uses: actions/checkout@v2
+        uses: actions/checkout@v4
 
       - name: Build and install to bin/
         run: ./scripts/cibuild.sh ${{ matrix.os }}
diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index c1789308..05a4c26e 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -214,7 +214,7 @@ if (BUILD_MAC)
         XCODE_ATTRIBUTE_ONLY_ACTIVE_ARCH "NO"
     )
 
-    # Enable all warnings, and also enable f16c sims op
+    # Enable all warnings, and also enable f16c sims op (only x64 though)
     target_compile_options(${myTargetLib} PRIVATE -W -Wall -mf16c)
 
     # this is already done by pch for libkram, but other projects need the force include inherited
diff --git a/libkram/compressonator/bc6h/bc6h_encode.cpp b/libkram/compressonator/bc6h/bc6h_encode.cpp
index c4043025..b371e1de 100644
--- a/libkram/compressonator/bc6h/bc6h_encode.cpp
+++ b/libkram/compressonator/bc6h/bc6h_encode.cpp
@@ -597,7 +597,7 @@ bool BC6HBlockEncoder::TransformEndPoints(AMD_BC6H_Format &BC6H_data, int iEndPo
 
 void BC6HBlockEncoder::SaveCompressedBlockData( AMD_BC6H_Format &BC6H_data,
         int oEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG],
-        int iIndices[2][MAX_SUBSET_SIZE],
+        int iIndices[MAX_SUBSETS][MAX_SUBSET_SIZE], // did harcode 2 = BC6H_MAX_SUBSET_SIZE, but not what is passed
         int max_subsets,
         int mode) {
     BC6H_data.m_mode    = (unsigned short)mode;

From aba23da338c9a4ee18f14db32f77c7b1b92999d4 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 15 Sep 2024 19:20:13 -0700
Subject: [PATCH 712/901] kram - build - update workflow, fix projects

---
 .github/workflows/tagged-release.yml   | 2 +-
 build2/kram.xcodeproj/project.pbxproj  | 6 ++----
 build2/kramc.xcodeproj/project.pbxproj | 2 --
 build2/kramv.xcodeproj/project.pbxproj | 5 +++--
 libkram/astc-encoder/astcenc_mathlib.h | 2 +-
 5 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/tagged-release.yml b/.github/workflows/tagged-release.yml
index f8af27f1..e71bd253 100644
--- a/.github/workflows/tagged-release.yml
+++ b/.github/workflows/tagged-release.yml
@@ -20,7 +20,7 @@ jobs:
         uses: lukka/get-cmake@latest
 
       - name: Check out code
-        uses: actions/checkout@v2
+        uses: actions/checkout@v4
 
       - name: Build and install to bin/
         run: ./scripts/cibuild.sh ${{ matrix.os }}
diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index 080ee63f..5c007f2d 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -1675,6 +1675,8 @@
 			isa = PBXSourcesBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
+				70871DE127DDDBCD00D0B9E1 /* astcenc_mathlib.cpp in Sources */,
+				706EEFB526D1595D001C950E /* float4a.cpp in Sources */,
 				70871DD727DDDBCD00D0B9E1 /* astcenc_quantization.cpp in Sources */,
 				70D222F52ADAF78300B9EA23 /* dlmalloc.cpp in Sources */,
 				707789E52881BA81008A51BC /* ert.cpp in Sources */,
@@ -1704,7 +1706,6 @@
 				706EEF8A26D1595D001C950E /* EtcBlock4x4Encoding.cpp in Sources */,
 				706EEF8B26D1595D001C950E /* EtcBlock4x4.cpp in Sources */,
 				70871DDB27DDDBCD00D0B9E1 /* astcenc_percentile_tables.cpp in Sources */,
-				70871DE127DDDBCD00D0B9E1 /* astcenc_mathlib.cpp in Sources */,
 				708A6A9A2708CE4700BA5410 /* bc6h_encode.cpp in Sources */,
 				70A7BD3027092A1200DBCCF7 /* hdr_encode.cpp in Sources */,
 				706EFF7726D34740001C950E /* string.cpp in Sources */,
@@ -1739,7 +1740,6 @@
 				709B8D3928D7BCAD0081BD1F /* format.cpp in Sources */,
 				70D222DE2AD2132300B9EA23 /* ImmutableString.cpp in Sources */,
 				70871DCB27DDDBCD00D0B9E1 /* astcenc_image.cpp in Sources */,
-				706EEFB526D1595D001C950E /* float4a.cpp in Sources */,
 				706EFF7326D34740001C950E /* thread_support.cpp in Sources */,
 				706EEFB626D1595D001C950E /* Kram.cpp in Sources */,
 				706EEFB726D1595D001C950E /* squish.cpp in Sources */,
@@ -2116,7 +2116,6 @@
 			isa = XCBuildConfiguration;
 			buildSettings = {
 				CLANG_WARN_DOCUMENTATION_COMMENTS = NO;
-				CLANG_X86_VECTOR_INSTRUCTIONS = default;
 				CODE_SIGN_STYLE = Automatic;
 				EXECUTABLE_PREFIX = lib;
 				IPHONEOS_DEPLOYMENT_TARGET = 14.1;
@@ -2132,7 +2131,6 @@
 			isa = XCBuildConfiguration;
 			buildSettings = {
 				CLANG_WARN_DOCUMENTATION_COMMENTS = NO;
-				CLANG_X86_VECTOR_INSTRUCTIONS = default;
 				CODE_SIGN_STYLE = Automatic;
 				EXECUTABLE_PREFIX = lib;
 				IPHONEOS_DEPLOYMENT_TARGET = 14.1;
diff --git a/build2/kramc.xcodeproj/project.pbxproj b/build2/kramc.xcodeproj/project.pbxproj
index 1a5c341a..abcdfdb3 100644
--- a/build2/kramc.xcodeproj/project.pbxproj
+++ b/build2/kramc.xcodeproj/project.pbxproj
@@ -190,7 +190,6 @@
 				DEBUG_INFORMATION_FORMAT = dwarf;
 				ENABLE_STRICT_OBJC_MSGSEND = YES;
 				ENABLE_TESTABILITY = YES;
-				ENABLE_USER_SCRIPT_SANDBOXING = YES;
 				GCC_C_LANGUAGE_STANDARD = gnu11;
 				GCC_DYNAMIC_NO_PIC = NO;
 				GCC_ENABLE_CPP_EXCEPTIONS = NO;
@@ -256,7 +255,6 @@
 				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
 				ENABLE_NS_ASSERTIONS = NO;
 				ENABLE_STRICT_OBJC_MSGSEND = YES;
-				ENABLE_USER_SCRIPT_SANDBOXING = YES;
 				GCC_C_LANGUAGE_STANDARD = gnu11;
 				GCC_ENABLE_CPP_EXCEPTIONS = NO;
 				GCC_ENABLE_CPP_RTTI = NO;
diff --git a/build2/kramv.xcodeproj/project.pbxproj b/build2/kramv.xcodeproj/project.pbxproj
index ca9afa7d..35a94d2a 100644
--- a/build2/kramv.xcodeproj/project.pbxproj
+++ b/build2/kramv.xcodeproj/project.pbxproj
@@ -386,7 +386,7 @@
 			isa = PBXProject;
 			attributes = {
 				BuildIndependentTargetsInParallel = YES;
-				LastUpgradeCheck = 1500;
+				LastUpgradeCheck = 1540;
 				TargetAttributes = {
 					706EF20E26D17A26001C950E = {
 						CreatedOnToolsVersion = 12.4;
@@ -521,6 +521,7 @@
 			isa = XCBuildConfiguration;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
+				ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
 				CLANG_ANALYZER_NONNULL = YES;
 				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
 				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
@@ -595,6 +596,7 @@
 			isa = XCBuildConfiguration;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
+				ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
 				CLANG_ANALYZER_NONNULL = YES;
 				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
 				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
@@ -731,7 +733,6 @@
 				MTL_LANGUAGE_REVISION = Metal30;
 				OTHER_CFLAGS = (
 					"-DNDEBUG=1",
-					"-DCOMPILE_FASTL=0",
 					"-DCOMPILE_EASTL=0",
 					"-include",
 					KramConfig.h,
diff --git a/libkram/astc-encoder/astcenc_mathlib.h b/libkram/astc-encoder/astcenc_mathlib.h
index 35989765..ebad96c3 100644
--- a/libkram/astc-encoder/astcenc_mathlib.h
+++ b/libkram/astc-encoder/astcenc_mathlib.h
@@ -99,7 +99,7 @@
       #define ASTCENC_VECALIGN 16
     //#endif
 
-    #if ASTCENC_SSE != 0 || ASTCENC_AVX != 0 || ASTCENC_POPCNT != 0
+    #if ASTCENC_SSE != 0 || ASTCENC_AVX != 0 || ASTCENC_POPCNT != 0 || ASTCENC_F16C != 0
         #include <immintrin.h>
     #endif
 #endif

From 7b8e6d2a96e70402e3c5381d8c733c626cfcd9d7 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 15 Sep 2024 19:23:29 -0700
Subject: [PATCH 713/901] kram - build - update workflows to use macos-latest

This was building with macos-13, but should just build to latest Xcode.
---
 .github/workflows/pre-release.yml    | 4 ++--
 .github/workflows/tagged-release.yml | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/pre-release.yml b/.github/workflows/pre-release.yml
index 76813f47..f3bdaf52 100644
--- a/.github/workflows/pre-release.yml
+++ b/.github/workflows/pre-release.yml
@@ -13,8 +13,8 @@ jobs:
     strategy:
       matrix:
         #os: [ubuntu-latest, macos-latest, windows-latest]
-        #os: [macos-latest, windows-latest]
-        os: [macos-13, windows-latest]
+        os: [macos-latest, windows-latest]
+        #os: [macos-13, windows-latest]
         
     steps:
       - name: Update CMake
diff --git a/.github/workflows/tagged-release.yml b/.github/workflows/tagged-release.yml
index e71bd253..07e633d7 100644
--- a/.github/workflows/tagged-release.yml
+++ b/.github/workflows/tagged-release.yml
@@ -12,8 +12,8 @@ jobs:
     strategy:
       matrix:
         #os: [ubuntu-latest, macos-latest, windows-latest]
-        #os: [macos-latest, windows-latest]
-        os: [macos-13, windows-latest]
+        os: [macos-latest, windows-latest]
+        #os: [macos-13, windows-latest]
         
     steps:
       - name: Update CMake

From 0662861866ca7b5439ea15a767972f656d3fb3b5 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 16 Sep 2024 21:53:10 -0700
Subject: [PATCH 714/901] kram - build - fix compile Xcode 16 and macOS 15.

These introduce a half and half1/2/3/4/8/16 vector type.  This conflicts with the half and half4 type, so namespace those to kram.
---
 kramv/KramRenderer.mm     |  2 +-
 libkram/kram/KramConfig.h | 18 ++++++++++++------
 libkram/kram/float4a.cpp  | 18 +++++++++---------
 3 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 50895c0a..b0c7d326 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -2320,7 +2320,7 @@ - (void)drawSample
         };
 
         if (isDrawableBlit) {
-            half4 data16f;
+            kram::half4 data16f;
             [texture getBytes:&data16f bytesPerRow:8 fromRegion:region mipmapLevel:0];
             data = toFloat4(data16f);
         }
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index e4d32082..1d41e723 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -308,8 +308,9 @@ import std.regex;
 #endif
 
 // TODO: move half4 to it's own file, but always include it
-// Apple's files don't have a half4 type.
-namespace simd {
+// x Apple's files don't have a half4 type.
+// They do now as of macOS 15/Xcode 16.  simd::half, 1/2/3/4/8/16
+namespace kram {
 
 // This has spotty support on Android.  They left out hw support
 // for _Float16 on many of the devices.  So there would need this fallback.
@@ -360,7 +361,7 @@ class half4 {
     }
 };
 
-}  // namespace simd
+}  // namespace kram
 
 #if !USE_EASTL
 
@@ -451,11 +452,16 @@ inline float4 saturate(const float4& v)
 
 #endif
 
-float4 toFloat4(const half4& vv);
-half4 toHalf4(const float4& vv);
-
 }  // namespace simd
 
+
+namespace kram {
+
+simd::float4 toFloat4(const half4& vv);
+half4 toHalf4(const simd::float4& vv);
+
+} // namespace kram
+
 //---------------------------------------
 
 // this just strips args
diff --git a/libkram/kram/float4a.cpp b/libkram/kram/float4a.cpp
index 44d54224..682ae24d 100644
--- a/libkram/kram/float4a.cpp
+++ b/libkram/kram/float4a.cpp
@@ -6,21 +6,21 @@
 
 // Bury these for now.  They required -mf16c for Intel to be
 // defined, and that's kind of a pain right now.
-namespace simd {
+namespace kram {
 
 
 #if 0 // USE_FLOAT16
 
 // This only works on Apple, and not on Android unless +fp16 arch there.
 // And this is likely not faster than the simd op that does this.
-float4 toFloat4(const half4& vv)
+simd::float4 toFloat4(const half4& vv)
 {
     // https://patchwork.ozlabs.org/project/gcc/patch/559BC75A.1080606@arm.com/
     // https://gcc.gnu.org/onlinedocs/gcc-7.5.0/gcc/Half-Precision.html
     // https://developer.arm.com/documentation/dui0491/i/Using-NEON-Support/Converting-vectors
     return float4m((float)vv.x, (float)vv.y, (float)vv.z, (float)vv.w);
 }
-half4 toHalf4(const float4& vv)
+half4 toHalf4(const simd::float4& vv)
 {
     return half4((_Float16)vv.x, (_Float16)vv.y, (_Float16)vv.z, (_Float16)vv.w);
 }
@@ -30,7 +30,7 @@ half4 toHalf4(const float4& vv)
 
 // using casts instead of vv.reg, so these calls work with Apple SIMD too
 
-float4 toFloat4(const half4& vv)
+simd::float4 toFloat4(const half4& vv)
 {
     // https://patchwork.ozlabs.org/project/gcc/patch/559BC75A.1080606@arm.com/
     // https://gcc.gnu.org/onlinedocs/gcc-7.5.0/gcc/Half-Precision.html
@@ -44,10 +44,10 @@ float4 toFloat4(const half4& vv)
     reg16 = _mm_insert_epi16(reg16, vv[2], 2);
     reg16 = _mm_insert_epi16(reg16, vv[3], 3);
     
-    return float4(_mm_cvtph_ps(reg16));
+    return simd::float4(_mm_cvtph_ps(reg16));
 }
 
-half4 toHalf4(const float4& vv)
+half4 toHalf4(const simd::float4& vv)
 {
     __m128i reg16 = _mm_cvtps_ph(*(const __m128*)&vv, 0);  // 4xfp32-> 4xfp16,  round to nearest-even
     
@@ -67,11 +67,11 @@ half4 toHalf4(const float4& vv)
 // using casts intead of vv.reg, so these calls work with Apple SIMD too
 // Note: could just use the sse2 neon version
 
-float4 toFloat4(const half4& vv)
+simd::float4 toFloat4(const half4& vv)
 {
-    return float4(vcvt_f32_f16(*(const float16x4_t*)&vv));
+    return simd::float4(vcvt_f32_f16(*(const float16x4_t*)&vv));
 }
-half4 toHalf4(const float4& vv)
+half4 toHalf4(const simd::float4& vv)
 {
     return half4(vcvt_f16_f32(*(const float32x4_t*)&vv));
 }

From e83d3fd29ac2f2cd83e4192fda67fc5a956b93fb Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 17 Sep 2024 19:06:47 -0700
Subject: [PATCH 715/901] kram - fix line endings in etc2comp

---
 libkram/etc2comp/Etc.cpp                      |  284 +-
 libkram/etc2comp/Etc.h                        |  142 +-
 libkram/etc2comp/EtcBlock4x4.cpp              |  634 +--
 libkram/etc2comp/EtcBlock4x4.h                |  264 +-
 libkram/etc2comp/EtcBlock4x4Encoding.cpp      |  248 +-
 libkram/etc2comp/EtcBlock4x4Encoding.h        |  440 +-
 libkram/etc2comp/EtcBlock4x4Encoding_R11.cpp  | 1088 ++---
 libkram/etc2comp/EtcBlock4x4Encoding_R11.h    |  272 +-
 libkram/etc2comp/EtcBlock4x4Encoding_RG11.cpp |  136 +-
 libkram/etc2comp/EtcBlock4x4Encoding_RG11.h   |  132 +-
 libkram/etc2comp/EtcBlock4x4Encoding_RGB8.cpp | 3602 ++++++++--------
 libkram/etc2comp/EtcBlock4x4Encoding_RGB8.h   |  194 +-
 .../etc2comp/EtcBlock4x4Encoding_RGB8A1.cpp   | 3658 ++++++++---------
 libkram/etc2comp/EtcBlock4x4Encoding_RGB8A1.h |  280 +-
 .../etc2comp/EtcBlock4x4Encoding_RGBA8.cpp    | 1112 ++---
 libkram/etc2comp/EtcBlock4x4Encoding_RGBA8.h  |  200 +-
 libkram/etc2comp/EtcColor.h                   |  132 +-
 libkram/etc2comp/EtcColorFloatRGBA.h          |  632 +--
 libkram/etc2comp/EtcConfig.h                  |   38 +-
 libkram/etc2comp/EtcDifferentialTrys.cpp      |  350 +-
 libkram/etc2comp/EtcDifferentialTrys.h        |  194 +-
 libkram/etc2comp/EtcErrorMetric.h             |  132 +-
 libkram/etc2comp/EtcIndividualTrys.cpp        |  178 +-
 libkram/etc2comp/EtcIndividualTrys.h          |  190 +-
 libkram/etc2comp/EtcMath.cpp                  |  128 +-
 libkram/etc2comp/EtcMath.h                    |   80 +-
 26 files changed, 7370 insertions(+), 7370 deletions(-)

diff --git a/libkram/etc2comp/Etc.cpp b/libkram/etc2comp/Etc.cpp
index 059e86c1..95151ddd 100644
--- a/libkram/etc2comp/Etc.cpp
+++ b/libkram/etc2comp/Etc.cpp
@@ -1,142 +1,142 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "EtcConfig.h"
-#include "Etc.h"
-#include "EtcFilter.h"
-
-#include <string.h>
-
-namespace Etc
-{
-	// ----------------------------------------------------------------------------------------------------
-	// C-style inteface to the encoder
-	//
-	void Encode(float *a_pafSourceRGBA,
-				unsigned int a_uiSourceWidth, 
-				unsigned int a_uiSourceHeight,
-				Image::Format a_format,
-				ErrorMetric a_eErrMetric,
-				float a_fEffort,
-				unsigned int a_uiJobs,
-				unsigned int a_uiMaxJobs,
-				unsigned char **a_ppaucEncodingBits,
-				unsigned int *a_puiEncodingBitsBytes,
-				unsigned int *a_puiExtendedWidth,
-				unsigned int *a_puiExtendedHeight, 
-				int *a_piEncodingTime_ms, bool a_bVerboseOutput)
-	{
-
-		Image image(a_pafSourceRGBA, a_uiSourceWidth,
-					a_uiSourceHeight,
-					a_eErrMetric);
-		image.m_bVerboseOutput = a_bVerboseOutput;
-		image.Encode(a_format, a_eErrMetric, a_fEffort, a_uiJobs, a_uiMaxJobs);
-
-		*a_ppaucEncodingBits = image.GetEncodingBits();
-		*a_puiEncodingBitsBytes = image.GetEncodingBitsBytes();
-		//*a_puiExtendedWidth = image.GetExtendedWidth();
-		//*a_puiExtendedHeight = image.GetExtendedHeight();
-		*a_piEncodingTime_ms = image.GetEncodingTimeMs();
-	}
-
-	void EncodeMipmaps(float *a_pafSourceRGBA,
-		unsigned int a_uiSourceWidth,
-		unsigned int a_uiSourceHeight,
-		Image::Format a_format,
-		ErrorMetric a_eErrMetric,
-		float a_fEffort,
-		unsigned int a_uiJobs,
-		unsigned int a_uiMaxJobs,
-		unsigned int a_uiMaxMipmaps,
-		unsigned int a_uiMipFilterFlags,
-		RawImage* a_pMipmapImages,
-		int *a_piEncodingTime_ms, 
-		bool a_bVerboseOutput)
-	{
-		auto mipWidth = a_uiSourceWidth;
-		auto mipHeight = a_uiSourceHeight;
-		int totalEncodingTime = 0;
-		for(unsigned int mip = 0; mip < a_uiMaxMipmaps && mipWidth >= 1 && mipHeight >= 1; mip++)
-		{
-			float* pImageData = nullptr;
-			float* pMipImage = nullptr;
-
-			if(mip == 0)
-			{
-				pImageData = a_pafSourceRGBA;
-			}
-			else
-			{
-				pMipImage = new float[mipWidth*mipHeight*4];
-				if(FilterTwoPass(a_pafSourceRGBA, a_uiSourceWidth, a_uiSourceHeight, pMipImage, mipWidth, mipHeight, a_uiMipFilterFlags, Etc::FilterLanczos3) )
-				{
-					pImageData = pMipImage;
-				}
-			}
-
-			if ( pImageData )
-			{
-			
-				Image image(pImageData, mipWidth, mipHeight,	a_eErrMetric);
-
-                image.m_bVerboseOutput = a_bVerboseOutput;
-                image.Encode(a_format, a_eErrMetric, a_fEffort, a_uiJobs, a_uiMaxJobs);
-
-                a_pMipmapImages[mip].paucEncodingBits = std::shared_ptr<unsigned char>(image.GetEncodingBits(), [](unsigned char *p) { delete[] p; });
-                a_pMipmapImages[mip].uiEncodingBitsBytes = image.GetEncodingBitsBytes();
-                //a_pMipmapImages[mip].uiExtendedWidth = image.GetExtendedWidth();
-                //a_pMipmapImages[mip].uiExtendedHeight = image.GetExtendedHeight();
-
-                totalEncodingTime += image.GetEncodingTimeMs();
-			}
-
-			if(pMipImage)
-			{
-				delete[] pMipImage;
-			}
-
-			if (!pImageData)
-			{
-				break;
-			}
-
-			mipWidth >>= 1;
-			mipHeight >>= 1;
-            
-            // Get out of the loop if both shifted dimensions are zero
-            if ((mipWidth==0) && (mipHeight==0))
-            {
-                break;
-            }
-            // Make sure to generate mipmap chain down to 1x1 for iOS
-            if (mipWidth==0)
-            {
-                mipWidth = 1;
-            }
-            if (mipHeight==0) {
-                mipHeight = 1;
-            }
-		}
-
-		*a_piEncodingTime_ms = totalEncodingTime;
-	}
-
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-
-}
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "EtcConfig.h"
+#include "Etc.h"
+#include "EtcFilter.h"
+
+#include <string.h>
+
+namespace Etc
+{
+	// ----------------------------------------------------------------------------------------------------
+	// C-style inteface to the encoder
+	//
+	void Encode(float *a_pafSourceRGBA,
+				unsigned int a_uiSourceWidth, 
+				unsigned int a_uiSourceHeight,
+				Image::Format a_format,
+				ErrorMetric a_eErrMetric,
+				float a_fEffort,
+				unsigned int a_uiJobs,
+				unsigned int a_uiMaxJobs,
+				unsigned char **a_ppaucEncodingBits,
+				unsigned int *a_puiEncodingBitsBytes,
+				unsigned int *a_puiExtendedWidth,
+				unsigned int *a_puiExtendedHeight, 
+				int *a_piEncodingTime_ms, bool a_bVerboseOutput)
+	{
+
+		Image image(a_pafSourceRGBA, a_uiSourceWidth,
+					a_uiSourceHeight,
+					a_eErrMetric);
+		image.m_bVerboseOutput = a_bVerboseOutput;
+		image.Encode(a_format, a_eErrMetric, a_fEffort, a_uiJobs, a_uiMaxJobs);
+
+		*a_ppaucEncodingBits = image.GetEncodingBits();
+		*a_puiEncodingBitsBytes = image.GetEncodingBitsBytes();
+		//*a_puiExtendedWidth = image.GetExtendedWidth();
+		//*a_puiExtendedHeight = image.GetExtendedHeight();
+		*a_piEncodingTime_ms = image.GetEncodingTimeMs();
+	}
+
+	void EncodeMipmaps(float *a_pafSourceRGBA,
+		unsigned int a_uiSourceWidth,
+		unsigned int a_uiSourceHeight,
+		Image::Format a_format,
+		ErrorMetric a_eErrMetric,
+		float a_fEffort,
+		unsigned int a_uiJobs,
+		unsigned int a_uiMaxJobs,
+		unsigned int a_uiMaxMipmaps,
+		unsigned int a_uiMipFilterFlags,
+		RawImage* a_pMipmapImages,
+		int *a_piEncodingTime_ms, 
+		bool a_bVerboseOutput)
+	{
+		auto mipWidth = a_uiSourceWidth;
+		auto mipHeight = a_uiSourceHeight;
+		int totalEncodingTime = 0;
+		for(unsigned int mip = 0; mip < a_uiMaxMipmaps && mipWidth >= 1 && mipHeight >= 1; mip++)
+		{
+			float* pImageData = nullptr;
+			float* pMipImage = nullptr;
+
+			if(mip == 0)
+			{
+				pImageData = a_pafSourceRGBA;
+			}
+			else
+			{
+				pMipImage = new float[mipWidth*mipHeight*4];
+				if(FilterTwoPass(a_pafSourceRGBA, a_uiSourceWidth, a_uiSourceHeight, pMipImage, mipWidth, mipHeight, a_uiMipFilterFlags, Etc::FilterLanczos3) )
+				{
+					pImageData = pMipImage;
+				}
+			}
+
+			if ( pImageData )
+			{
+			
+				Image image(pImageData, mipWidth, mipHeight,	a_eErrMetric);
+
+                image.m_bVerboseOutput = a_bVerboseOutput;
+                image.Encode(a_format, a_eErrMetric, a_fEffort, a_uiJobs, a_uiMaxJobs);
+
+                a_pMipmapImages[mip].paucEncodingBits = std::shared_ptr<unsigned char>(image.GetEncodingBits(), [](unsigned char *p) { delete[] p; });
+                a_pMipmapImages[mip].uiEncodingBitsBytes = image.GetEncodingBitsBytes();
+                //a_pMipmapImages[mip].uiExtendedWidth = image.GetExtendedWidth();
+                //a_pMipmapImages[mip].uiExtendedHeight = image.GetExtendedHeight();
+
+                totalEncodingTime += image.GetEncodingTimeMs();
+			}
+
+			if(pMipImage)
+			{
+				delete[] pMipImage;
+			}
+
+			if (!pImageData)
+			{
+				break;
+			}
+
+			mipWidth >>= 1;
+			mipHeight >>= 1;
+            
+            // Get out of the loop if both shifted dimensions are zero
+            if ((mipWidth==0) && (mipHeight==0))
+            {
+                break;
+            }
+            // Make sure to generate mipmap chain down to 1x1 for iOS
+            if (mipWidth==0)
+            {
+                mipWidth = 1;
+            }
+            if (mipHeight==0) {
+                mipHeight = 1;
+            }
+		}
+
+		*a_piEncodingTime_ms = totalEncodingTime;
+	}
+
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+
+}
diff --git a/libkram/etc2comp/Etc.h b/libkram/etc2comp/Etc.h
index 90962efb..439388d6 100644
--- a/libkram/etc2comp/Etc.h
+++ b/libkram/etc2comp/Etc.h
@@ -1,71 +1,71 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "EtcConfig.h"
-#include "EtcImage.h"
-#include "EtcColor.h"
-#include "EtcErrorMetric.h"
-#include <memory>
-
-#define ETCCOMP_MIN_EFFORT_LEVEL (0.0f)
-#define ETCCOMP_DEFAULT_EFFORT_LEVEL (40.0f)
-#define ETCCOMP_MAX_EFFORT_LEVEL (100.0f)
-
-namespace Etc
-{
-	class Block4x4EncodingBits;
-
-	struct RawImage
-	{
-		int uiExtendedWidth;
-		int uiExtendedHeight;
-		unsigned int uiEncodingBitsBytes;
-		std::shared_ptr<unsigned char> paucEncodingBits;
-	};
-
-
-
-	// C-style inteface to the encoder
-	void Encode(float *a_pafSourceRGBA,
-				unsigned int a_uiSourceWidth,
-				unsigned int a_uiSourceHeight,
-				Image::Format a_format,
-				ErrorMetric a_eErrMetric,
-				float a_fEffort,
-				unsigned int a_uiJobs,
-				unsigned int a_uimaxJobs,
-				unsigned char **a_ppaucEncodingBits,
-				unsigned int *a_puiEncodingBitsBytes,
-				unsigned int *a_puiExtendedWidth,
-				unsigned int *a_puiExtendedHeight,
-				int *a_piEncodingTime_ms, bool a_bVerboseOutput = false);
-
-	void EncodeMipmaps(float *a_pafSourceRGBA,
-		unsigned int a_uiSourceWidth,
-		unsigned int a_uiSourceHeight,
-		Image::Format a_format,
-		ErrorMetric a_eErrMetric,
-		float a_fEffort,
-		unsigned int a_uiJobs,
-		unsigned int a_uiMaxJobs,
-		unsigned int a_uiMaxMipmaps,
-		unsigned int a_uiMipFilterFlags,
-		RawImage* a_pMipmaps,
-		int *a_piEncodingTime_ms, bool a_bVerboseOutput = false);
-
-}
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "EtcConfig.h"
+#include "EtcImage.h"
+#include "EtcColor.h"
+#include "EtcErrorMetric.h"
+#include <memory>
+
+#define ETCCOMP_MIN_EFFORT_LEVEL (0.0f)
+#define ETCCOMP_DEFAULT_EFFORT_LEVEL (40.0f)
+#define ETCCOMP_MAX_EFFORT_LEVEL (100.0f)
+
+namespace Etc
+{
+	class Block4x4EncodingBits;
+
+	struct RawImage
+	{
+		int uiExtendedWidth;
+		int uiExtendedHeight;
+		unsigned int uiEncodingBitsBytes;
+		std::shared_ptr<unsigned char> paucEncodingBits;
+	};
+
+
+
+	// C-style inteface to the encoder
+	void Encode(float *a_pafSourceRGBA,
+				unsigned int a_uiSourceWidth,
+				unsigned int a_uiSourceHeight,
+				Image::Format a_format,
+				ErrorMetric a_eErrMetric,
+				float a_fEffort,
+				unsigned int a_uiJobs,
+				unsigned int a_uimaxJobs,
+				unsigned char **a_ppaucEncodingBits,
+				unsigned int *a_puiEncodingBitsBytes,
+				unsigned int *a_puiExtendedWidth,
+				unsigned int *a_puiExtendedHeight,
+				int *a_piEncodingTime_ms, bool a_bVerboseOutput = false);
+
+	void EncodeMipmaps(float *a_pafSourceRGBA,
+		unsigned int a_uiSourceWidth,
+		unsigned int a_uiSourceHeight,
+		Image::Format a_format,
+		ErrorMetric a_eErrMetric,
+		float a_fEffort,
+		unsigned int a_uiJobs,
+		unsigned int a_uiMaxJobs,
+		unsigned int a_uiMaxMipmaps,
+		unsigned int a_uiMipFilterFlags,
+		RawImage* a_pMipmaps,
+		int *a_piEncodingTime_ms, bool a_bVerboseOutput = false);
+
+}
diff --git a/libkram/etc2comp/EtcBlock4x4.cpp b/libkram/etc2comp/EtcBlock4x4.cpp
index ce753774..de71c663 100644
--- a/libkram/etc2comp/EtcBlock4x4.cpp
+++ b/libkram/etc2comp/EtcBlock4x4.cpp
@@ -1,317 +1,317 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* 
-EtcBlock4x4.cpp
-
-Implements the state associated with each 4x4 block of pixels in an image
-
-Source images that are not a multiple of 4x4 are extended to fill the Block4x4 using pixels with an 
-alpha of NAN
-
-*/
-
-#include "EtcConfig.h"
-#include "EtcBlock4x4.h"
-
-//#include "EtcColor.h"
-#include "EtcImage.h"
-#include "EtcColorFloatRGBA.h"
-
-// only the rgb/a encoders use Block4x4
-#include "EtcBlock4x4EncodingBits.h"
-#include "EtcBlock4x4Encoding_ETC1.h"
-#include "EtcBlock4x4Encoding_RGB8.h"
-#include "EtcBlock4x4Encoding_RGBA8.h"
-#include "EtcBlock4x4Encoding_RGB8A1.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-
-namespace Etc
-{
-	// ETC pixels are scanned vertically.  
-	// this mapping is for when someone wants to scan the ETC pixels horizontally
-	const uint8_t Block4x4::s_auiPixelOrderHScan[PIXELS] = { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 };
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-	Block4x4::Block4x4(void)
-	{
-        Init();
-	}
-	Block4x4::~Block4x4()
-	{
-		m_pimageSource = nullptr;
-        
-		if (m_pencoding)
-		{
-			delete m_pencoding;
-			m_pencoding = nullptr;
-		}
-	}
-    
-    void Block4x4::Init() {
-        m_pimageSource = nullptr;
-        m_pencoding = nullptr;
-
-        m_uiSourceH = 0;
-        m_uiSourceV = 0;
-
-        m_sourcealphamix = SourceAlphaMix::UNKNOWN;
-        //m_boolBorderPixels = false;
-        m_boolPunchThroughPixels = false;
-        m_hasColorPixels = true;
-
-        //m_errormetric = ErrorMetric::NUMERIC;
-    }
-
-    Block4x4Encoding* Block4x4::NewEncoderIfNeeded(Image::Format format)
-    {
-        Block4x4Encoding* p_encoding = m_pencoding;
-        if (!p_encoding)
-        {
-            switch(format) {
-            case Image::Format::RGB8:
-            case Image::Format::SRGB8:
-                p_encoding = new Block4x4Encoding_RGB8;
-                break;
-
-            case Image::Format::RGBA8:
-            case Image::Format::SRGBA8:
-                p_encoding = new Block4x4Encoding_RGBA8;
-                break;
-
-            // don't really care about using ETC1 or A1
-            case Image::Format::ETC1:
-                p_encoding = new Block4x4Encoding_ETC1;
-                break;
-                        
-            case Image::Format::RGB8A1:
-            case Image::Format::SRGB8A1:
-                p_encoding = new Block4x4Encoding_RGB8A1;
-                break;
-                    
-            default:
-                assert(false);
-                break;
-            }
-        }
-        return p_encoding;
-    }
-
-    void Block4x4::Encode(Image *a_pimageSource,
-                                    unsigned int a_uiSourceH, unsigned int a_uiSourceV,
-                                    unsigned char *a_paucEncodingBits)
-    {
-        // this is use the same encoding over and over, so don't delete existing
-        Etc::Block4x4Encoding* p_encoding = NewEncoderIfNeeded(a_pimageSource->GetFormat());
-        ErrorMetric errorMetric = a_pimageSource->GetErrorMetric();
-        
-        m_pencoding = nullptr;
-        Block4x4::Init();
-        
-        m_pimageSource = a_pimageSource;
-        
-        m_uiSourceH = a_uiSourceH;
-        m_uiSourceV = a_uiSourceV;
-        //m_errormetric = errorMetric;
-        m_pencoding = p_encoding;
-        
-        SetSourcePixels();
-
-        m_pencoding->Encode(this, m_afrgbaSource,
-                                    a_paucEncodingBits, errorMetric);
-
-    }
-
-
-	// ----------------------------------------------------------------------------------------------------
-	// initialization of encoding state from a prior encoding using encoding bits
-	// [a_uiSourceH,a_uiSourceV] is the location of the block in a_pimageSource
-	// a_paucEncodingBits is the place to read the prior encoding
-	// a_imageformat is used to determine how to interpret a_paucEncodingBits
-	// a_errormetric was used for the prior encoding
-	//
-	void Block4x4::Decode(
-											unsigned int a_uiSourceH, unsigned int a_uiSourceV,
-											unsigned char *a_paucEncodingBits,
-											Image *a_pimageSource,
-                                            uint16_t iterationCount)
-	{
-        // this is use the same encoding over and over, so don't delete existing
-        Etc::Block4x4Encoding* p_encoding = NewEncoderIfNeeded(a_pimageSource->GetFormat());
-        ErrorMetric errorMetric = a_pimageSource->GetErrorMetric();
-        
-        //delete m_pencoding;
-        m_pencoding = nullptr;
-        Block4x4::Init();
-
-		m_pimageSource = a_pimageSource;
-        
-		m_uiSourceH = a_uiSourceH;
-		m_uiSourceV = a_uiSourceV;
-		//m_errormetric = errorMetric;
-        m_pencoding = p_encoding;
-
-        if (m_pimageSource->HasSourcePixels()) {
-            SetSourcePixels();
-
-            m_pencoding->Decode(this, a_paucEncodingBits, m_afrgbaSource, errorMetric, iterationCount);
-        }
-        else {
-            // pure decode
-            m_pencoding->Decode(this, a_paucEncodingBits, nullptr, errorMetric, iterationCount);
-        }
-
-	}
-	
-	// ----------------------------------------------------------------------------------------------------
-	// set source pixels from m_pimageSource
-	// set m_alphamix
-	//
-	void Block4x4::SetSourcePixels(void)
-	{
-		// copy source to consecutive memory locations
-		// convert from image horizontal scan to block vertical scan
-		int uiPixel = 0;
-		for (int x = 0; x < 4; x++)
-		{
-			int uiSourcePixelH = m_uiSourceH + x;
-
-			for (int y = 0; y < 4; y++)
-			{
-				int uiSourcePixelV = m_uiSourceV + y;
-
-				ColorFloatRGBA pfrgbaSource = m_pimageSource->GetSourcePixel(uiSourcePixelH, uiSourcePixelV);
-            
-                ColorFloatRGBA& sourcePixel = m_afrgbaSource[uiPixel];
-                sourcePixel = pfrgbaSource;
-                uiPixel++;
-            }
-        }
-        
-        //----------------------------------------
-         
-        m_hasColorPixels = false;
-        for (uiPixel = 0; uiPixel < 16; ++uiPixel)
-        {
-            ColorFloatRGBA& sourcePixel = m_afrgbaSource[uiPixel];
-            
-            // this is doing fp equality
-            if (sourcePixel.fR != sourcePixel.fG || sourcePixel.fR != sourcePixel.fB)
-            {
-                m_hasColorPixels = true;
-                break;
-            }
-        }
-        
-        //----------------------------------------
-        
-        // alpha census
-        int uiTransparentSourcePixels = 0;
-        int uiOpaqueSourcePixels = 0;
-              
-        Image::Format imageformat = m_pimageSource->GetFormat();
-
-        for (uiPixel = 0; uiPixel < 16; ++uiPixel)
-        {
-            ColorFloatRGBA& sourcePixel = m_afrgbaSource[uiPixel];
-            
-            // for formats with no alpha, set source alpha to 1
-            if (imageformat == Image::Format::ETC1 ||
-                imageformat == Image::Format::RGB8 ||
-                imageformat == Image::Format::SRGB8)
-            {
-                sourcePixel.fA = 1.0f;
-            }
-            
-            // for RGB8A1, set source alpha to 0.0 or 1.0
-            // set punch through flag
-            else if (imageformat == Image::Format::RGB8A1 ||
-                     imageformat == Image::Format::SRGB8A1)
-            {
-                if (sourcePixel.fA >= 0.5f)
-                {
-                    sourcePixel.fA = 1.0f;
-                }
-                else
-                {
-                    sourcePixel.fA = 0.0f;
-                    m_boolPunchThroughPixels = true;
-                }
-            }
-
-            if (sourcePixel.fA == 1.0f)
-            {
-                uiOpaqueSourcePixels++;
-            }
-            else if (sourcePixel.fA == 0.0f)
-            {
-                // TODO: an assumption here that R/G/B are 0, but with multichannel that's not the case
-                // A could be all 0, but rgb contain valid channel content
-                uiTransparentSourcePixels++;
-            }
-        }
-
-        // This only applies for RGBA (premul weighted calcs)
-		if (uiOpaqueSourcePixels == PIXELS)
-		{
-			m_sourcealphamix = SourceAlphaMix::OPAQUE;
-		}
-		else if (uiTransparentSourcePixels == PIXELS)
-		{
-            // TODO: could check rgb for all 0, and then set TRANSPARENT
-            m_sourcealphamix = SourceAlphaMix::TRANSPARENT;
-            
-            // TODO: nothing setting ALL_ZERO_ALPHA.  Could look at all rgb to identify that.
-            
-            //(m_pimageSource->GetErrorMetric() == ErrorMetric::NUMERIC || m_pimageSource->GetErrorMetric() == ErrorMetric::RGBX) ? SourceAlphaMix::ALL_ZERO_ALPHA :
-            //    SourceAlphaMix::TRANSPARENT;
-		}
-		else
-		{
-            m_sourcealphamix = SourceAlphaMix::TRANSLUCENT;
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// return a name for the encoding mode
-	//
-//	const char * Block4x4::GetEncodingModeName(void)
-//	{
-//
-//		switch (m_pencoding->GetMode())
-//		{
-//		case Block4x4Encoding::MODE_ETC1:
-//			return "ETC1";
-//		case Block4x4Encoding::MODE_T:
-//			return "T";
-//		case Block4x4Encoding::MODE_H:
-//			return "H";
-//		case Block4x4Encoding::MODE_PLANAR:
-//			return "PLANAR";
-//		default:
-//			return "???";
-//		}
-//	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-
-}
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* 
+EtcBlock4x4.cpp
+
+Implements the state associated with each 4x4 block of pixels in an image
+
+Source images that are not a multiple of 4x4 are extended to fill the Block4x4 using pixels with an 
+alpha of NAN
+
+*/
+
+#include "EtcConfig.h"
+#include "EtcBlock4x4.h"
+
+//#include "EtcColor.h"
+#include "EtcImage.h"
+#include "EtcColorFloatRGBA.h"
+
+// only the rgb/a encoders use Block4x4
+#include "EtcBlock4x4EncodingBits.h"
+#include "EtcBlock4x4Encoding_ETC1.h"
+#include "EtcBlock4x4Encoding_RGB8.h"
+#include "EtcBlock4x4Encoding_RGBA8.h"
+#include "EtcBlock4x4Encoding_RGB8A1.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+namespace Etc
+{
+	// ETC pixels are scanned vertically.  
+	// this mapping is for when someone wants to scan the ETC pixels horizontally
+	const uint8_t Block4x4::s_auiPixelOrderHScan[PIXELS] = { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 };
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	Block4x4::Block4x4(void)
+	{
+        Init();
+	}
+	Block4x4::~Block4x4()
+	{
+		m_pimageSource = nullptr;
+        
+		if (m_pencoding)
+		{
+			delete m_pencoding;
+			m_pencoding = nullptr;
+		}
+	}
+    
+    void Block4x4::Init() {
+        m_pimageSource = nullptr;
+        m_pencoding = nullptr;
+
+        m_uiSourceH = 0;
+        m_uiSourceV = 0;
+
+        m_sourcealphamix = SourceAlphaMix::UNKNOWN;
+        //m_boolBorderPixels = false;
+        m_boolPunchThroughPixels = false;
+        m_hasColorPixels = true;
+
+        //m_errormetric = ErrorMetric::NUMERIC;
+    }
+
+    Block4x4Encoding* Block4x4::NewEncoderIfNeeded(Image::Format format)
+    {
+        Block4x4Encoding* p_encoding = m_pencoding;
+        if (!p_encoding)
+        {
+            switch(format) {
+            case Image::Format::RGB8:
+            case Image::Format::SRGB8:
+                p_encoding = new Block4x4Encoding_RGB8;
+                break;
+
+            case Image::Format::RGBA8:
+            case Image::Format::SRGBA8:
+                p_encoding = new Block4x4Encoding_RGBA8;
+                break;
+
+            // don't really care about using ETC1 or A1
+            case Image::Format::ETC1:
+                p_encoding = new Block4x4Encoding_ETC1;
+                break;
+                        
+            case Image::Format::RGB8A1:
+            case Image::Format::SRGB8A1:
+                p_encoding = new Block4x4Encoding_RGB8A1;
+                break;
+                    
+            default:
+                assert(false);
+                break;
+            }
+        }
+        return p_encoding;
+    }
+
+    void Block4x4::Encode(Image *a_pimageSource,
+                                    unsigned int a_uiSourceH, unsigned int a_uiSourceV,
+                                    unsigned char *a_paucEncodingBits)
+    {
+        // this is use the same encoding over and over, so don't delete existing
+        Etc::Block4x4Encoding* p_encoding = NewEncoderIfNeeded(a_pimageSource->GetFormat());
+        ErrorMetric errorMetric = a_pimageSource->GetErrorMetric();
+        
+        m_pencoding = nullptr;
+        Block4x4::Init();
+        
+        m_pimageSource = a_pimageSource;
+        
+        m_uiSourceH = a_uiSourceH;
+        m_uiSourceV = a_uiSourceV;
+        //m_errormetric = errorMetric;
+        m_pencoding = p_encoding;
+        
+        SetSourcePixels();
+
+        m_pencoding->Encode(this, m_afrgbaSource,
+                                    a_paucEncodingBits, errorMetric);
+
+    }
+
+
+	// ----------------------------------------------------------------------------------------------------
+	// initialization of encoding state from a prior encoding using encoding bits
+	// [a_uiSourceH,a_uiSourceV] is the location of the block in a_pimageSource
+	// a_paucEncodingBits is the place to read the prior encoding
+	// a_imageformat is used to determine how to interpret a_paucEncodingBits
+	// a_errormetric was used for the prior encoding
+	//
+	void Block4x4::Decode(
+											unsigned int a_uiSourceH, unsigned int a_uiSourceV,
+											unsigned char *a_paucEncodingBits,
+											Image *a_pimageSource,
+                                            uint16_t iterationCount)
+	{
+        // this is use the same encoding over and over, so don't delete existing
+        Etc::Block4x4Encoding* p_encoding = NewEncoderIfNeeded(a_pimageSource->GetFormat());
+        ErrorMetric errorMetric = a_pimageSource->GetErrorMetric();
+        
+        //delete m_pencoding;
+        m_pencoding = nullptr;
+        Block4x4::Init();
+
+		m_pimageSource = a_pimageSource;
+        
+		m_uiSourceH = a_uiSourceH;
+		m_uiSourceV = a_uiSourceV;
+		//m_errormetric = errorMetric;
+        m_pencoding = p_encoding;
+
+        if (m_pimageSource->HasSourcePixels()) {
+            SetSourcePixels();
+
+            m_pencoding->Decode(this, a_paucEncodingBits, m_afrgbaSource, errorMetric, iterationCount);
+        }
+        else {
+            // pure decode
+            m_pencoding->Decode(this, a_paucEncodingBits, nullptr, errorMetric, iterationCount);
+        }
+
+	}
+	
+	// ----------------------------------------------------------------------------------------------------
+	// set source pixels from m_pimageSource
+	// set m_alphamix
+	//
+	void Block4x4::SetSourcePixels(void)
+	{
+		// copy source to consecutive memory locations
+		// convert from image horizontal scan to block vertical scan
+		int uiPixel = 0;
+		for (int x = 0; x < 4; x++)
+		{
+			int uiSourcePixelH = m_uiSourceH + x;
+
+			for (int y = 0; y < 4; y++)
+			{
+				int uiSourcePixelV = m_uiSourceV + y;
+
+				ColorFloatRGBA pfrgbaSource = m_pimageSource->GetSourcePixel(uiSourcePixelH, uiSourcePixelV);
+            
+                ColorFloatRGBA& sourcePixel = m_afrgbaSource[uiPixel];
+                sourcePixel = pfrgbaSource;
+                uiPixel++;
+            }
+        }
+        
+        //----------------------------------------
+         
+        m_hasColorPixels = false;
+        for (uiPixel = 0; uiPixel < 16; ++uiPixel)
+        {
+            ColorFloatRGBA& sourcePixel = m_afrgbaSource[uiPixel];
+            
+            // this is doing fp equality
+            if (sourcePixel.fR != sourcePixel.fG || sourcePixel.fR != sourcePixel.fB)
+            {
+                m_hasColorPixels = true;
+                break;
+            }
+        }
+        
+        //----------------------------------------
+        
+        // alpha census
+        int uiTransparentSourcePixels = 0;
+        int uiOpaqueSourcePixels = 0;
+              
+        Image::Format imageformat = m_pimageSource->GetFormat();
+
+        for (uiPixel = 0; uiPixel < 16; ++uiPixel)
+        {
+            ColorFloatRGBA& sourcePixel = m_afrgbaSource[uiPixel];
+            
+            // for formats with no alpha, set source alpha to 1
+            if (imageformat == Image::Format::ETC1 ||
+                imageformat == Image::Format::RGB8 ||
+                imageformat == Image::Format::SRGB8)
+            {
+                sourcePixel.fA = 1.0f;
+            }
+            
+            // for RGB8A1, set source alpha to 0.0 or 1.0
+            // set punch through flag
+            else if (imageformat == Image::Format::RGB8A1 ||
+                     imageformat == Image::Format::SRGB8A1)
+            {
+                if (sourcePixel.fA >= 0.5f)
+                {
+                    sourcePixel.fA = 1.0f;
+                }
+                else
+                {
+                    sourcePixel.fA = 0.0f;
+                    m_boolPunchThroughPixels = true;
+                }
+            }
+
+            if (sourcePixel.fA == 1.0f)
+            {
+                uiOpaqueSourcePixels++;
+            }
+            else if (sourcePixel.fA == 0.0f)
+            {
+                // TODO: an assumption here that R/G/B are 0, but with multichannel that's not the case
+                // A could be all 0, but rgb contain valid channel content
+                uiTransparentSourcePixels++;
+            }
+        }
+
+        // This only applies for RGBA (premul weighted calcs)
+		if (uiOpaqueSourcePixels == PIXELS)
+		{
+			m_sourcealphamix = SourceAlphaMix::OPAQUE;
+		}
+		else if (uiTransparentSourcePixels == PIXELS)
+		{
+            // TODO: could check rgb for all 0, and then set TRANSPARENT
+            m_sourcealphamix = SourceAlphaMix::TRANSPARENT;
+            
+            // TODO: nothing setting ALL_ZERO_ALPHA.  Could look at all rgb to identify that.
+            
+            //(m_pimageSource->GetErrorMetric() == ErrorMetric::NUMERIC || m_pimageSource->GetErrorMetric() == ErrorMetric::RGBX) ? SourceAlphaMix::ALL_ZERO_ALPHA :
+            //    SourceAlphaMix::TRANSPARENT;
+		}
+		else
+		{
+            m_sourcealphamix = SourceAlphaMix::TRANSLUCENT;
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// return a name for the encoding mode
+	//
+//	const char * Block4x4::GetEncodingModeName(void)
+//	{
+//
+//		switch (m_pencoding->GetMode())
+//		{
+//		case Block4x4Encoding::MODE_ETC1:
+//			return "ETC1";
+//		case Block4x4Encoding::MODE_T:
+//			return "T";
+//		case Block4x4Encoding::MODE_H:
+//			return "H";
+//		case Block4x4Encoding::MODE_PLANAR:
+//			return "PLANAR";
+//		default:
+//			return "???";
+//		}
+//	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+
+}
diff --git a/libkram/etc2comp/EtcBlock4x4.h b/libkram/etc2comp/EtcBlock4x4.h
index 3e869938..518a7ba7 100644
--- a/libkram/etc2comp/EtcBlock4x4.h
+++ b/libkram/etc2comp/EtcBlock4x4.h
@@ -1,132 +1,132 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-//#include "EtcColor.h"
-#include "EtcColorFloatRGBA.h"
-//#include "EtcErrorMetric.h"
-#include "EtcImage.h"
-#include "EtcBlock4x4Encoding.h"
-
-namespace Etc
-{
-	//class Block4x4Encoding;
-   
-    // This base holds a 4x4 block, and is only used for RGB/RGBA encodings
-	class Block4x4
-	{
-	public:
-
-		static const unsigned int ROWS = 4;
-		static const unsigned int COLUMNS = 4;
-		static const unsigned int PIXELS = ROWS * COLUMNS;
-
-		// the alpha mix for a 4x4 block of pixels
-		enum class SourceAlphaMix
-		{
-			UNKNOWN,
-			//
-			OPAQUE,			// all 1.0
-			TRANSPARENT,	// all channels 0.0
-			TRANSLUCENT,	// not all opaque or transparent
-            ALL_ZERO_ALPHA  // used for multichannel where all A = 0, but rgb contain data
-		};
-
-		typedef void (Block4x4::*EncoderFunctionPtr)(void);
-
-		Block4x4(void);
-		~Block4x4();
-        
-        // called on first init of a block with/without multipass
-		void Encode(Image *a_pimageSource,
-                    unsigned int a_uiSourceH,
-                    unsigned int a_uiSourceV,
-                    unsigned char *a_paucEncodingBits
-				  );
-        
-        // used on subsequent passes with multipass to decode from block for subsequent encodes
-        void Decode(unsigned int a_uiSourceH,
-                    unsigned int a_uiSourceV,
-                    unsigned char *a_paucEncodingBits,
-                    Image *a_pimageSource,
-                    uint16_t iterationCount);
-
-        inline Block4x4Encoding * GetEncoding(void)
-        {
-            return m_pencoding;
-        }
-    
-        //----------------------
-        
-        inline unsigned int GetSourceH(void) const
-        {
-            return m_uiSourceH;
-        }
-
-        inline unsigned int GetSourceV(void) const
-        {
-            return m_uiSourceV;
-        }
-        
-		inline const ColorFloatRGBA * GetSource() const
-		{
-			return m_afrgbaSource;
-        }
-                
-		inline SourceAlphaMix GetSourceAlphaMix(void) const
-		{
-			return m_sourcealphamix; // or return from m_pimageSource->GetSourceAlphaMix()
-		}
-
-		inline const Image * GetImageSource(void) const
-		{
-			return m_pimageSource;
-		}
-
-		inline bool HasPunchThroughPixels(void) const
-		{
-			return m_boolPunchThroughPixels;
-		}
-        
-        // gray vs. color
-        inline bool HasColorPixels(void) const
-        {
-            return m_hasColorPixels;
-        }
-
-	private:
-        Block4x4Encoding* NewEncoderIfNeeded(Image::Format format);
-        void Init();
-        
-		void SetSourcePixels(void);
-
-        static const uint8_t s_auiPixelOrderHScan[PIXELS];
-
-		Image				*m_pimageSource;
-		unsigned int		m_uiSourceH;
-		unsigned int		m_uiSourceV;
-		ColorFloatRGBA		m_afrgbaSource[PIXELS];		// vertical scan (Not std. pixel order, it's stored transposed)
-
-		SourceAlphaMix		m_sourcealphamix;
-		bool				m_boolPunchThroughPixels;	// RGB8A1 or SRGB8A1 with any pixels with alpha < 0.5
-        bool                m_hasColorPixels;
-        
-		Block4x4Encoding	*m_pencoding;
-
-	};
-
-} // namespace Etc
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+//#include "EtcColor.h"
+#include "EtcColorFloatRGBA.h"
+//#include "EtcErrorMetric.h"
+#include "EtcImage.h"
+#include "EtcBlock4x4Encoding.h"
+
+namespace Etc
+{
+	//class Block4x4Encoding;
+   
+    // This base holds a 4x4 block, and is only used for RGB/RGBA encodings
+	class Block4x4
+	{
+	public:
+
+		static const unsigned int ROWS = 4;
+		static const unsigned int COLUMNS = 4;
+		static const unsigned int PIXELS = ROWS * COLUMNS;
+
+		// the alpha mix for a 4x4 block of pixels
+		enum class SourceAlphaMix
+		{
+			UNKNOWN,
+			//
+			OPAQUE,			// all 1.0
+			TRANSPARENT,	// all channels 0.0
+			TRANSLUCENT,	// not all opaque or transparent
+            ALL_ZERO_ALPHA  // used for multichannel where all A = 0, but rgb contain data
+		};
+
+		typedef void (Block4x4::*EncoderFunctionPtr)(void);
+
+		Block4x4(void);
+		~Block4x4();
+        
+        // called on first init of a block with/without multipass
+		void Encode(Image *a_pimageSource,
+                    unsigned int a_uiSourceH,
+                    unsigned int a_uiSourceV,
+                    unsigned char *a_paucEncodingBits
+				  );
+        
+        // used on subsequent passes with multipass to decode from block for subsequent encodes
+        void Decode(unsigned int a_uiSourceH,
+                    unsigned int a_uiSourceV,
+                    unsigned char *a_paucEncodingBits,
+                    Image *a_pimageSource,
+                    uint16_t iterationCount);
+
+        inline Block4x4Encoding * GetEncoding(void)
+        {
+            return m_pencoding;
+        }
+    
+        //----------------------
+        
+        inline unsigned int GetSourceH(void) const
+        {
+            return m_uiSourceH;
+        }
+
+        inline unsigned int GetSourceV(void) const
+        {
+            return m_uiSourceV;
+        }
+        
+		inline const ColorFloatRGBA * GetSource() const
+		{
+			return m_afrgbaSource;
+        }
+                
+		inline SourceAlphaMix GetSourceAlphaMix(void) const
+		{
+			return m_sourcealphamix; // or return from m_pimageSource->GetSourceAlphaMix()
+		}
+
+		inline const Image * GetImageSource(void) const
+		{
+			return m_pimageSource;
+		}
+
+		inline bool HasPunchThroughPixels(void) const
+		{
+			return m_boolPunchThroughPixels;
+		}
+        
+        // gray vs. color
+        inline bool HasColorPixels(void) const
+        {
+            return m_hasColorPixels;
+        }
+
+	private:
+        Block4x4Encoding* NewEncoderIfNeeded(Image::Format format);
+        void Init();
+        
+		void SetSourcePixels(void);
+
+        static const uint8_t s_auiPixelOrderHScan[PIXELS];
+
+		Image				*m_pimageSource;
+		unsigned int		m_uiSourceH;
+		unsigned int		m_uiSourceV;
+		ColorFloatRGBA		m_afrgbaSource[PIXELS];		// vertical scan (Not std. pixel order, it's stored transposed)
+
+		SourceAlphaMix		m_sourcealphamix;
+		bool				m_boolPunchThroughPixels;	// RGB8A1 or SRGB8A1 with any pixels with alpha < 0.5
+        bool                m_hasColorPixels;
+        
+		Block4x4Encoding	*m_pencoding;
+
+	};
+
+} // namespace Etc
diff --git a/libkram/etc2comp/EtcBlock4x4Encoding.cpp b/libkram/etc2comp/EtcBlock4x4Encoding.cpp
index fcbf5ee9..2a0068b6 100644
--- a/libkram/etc2comp/EtcBlock4x4Encoding.cpp
+++ b/libkram/etc2comp/EtcBlock4x4Encoding.cpp
@@ -1,124 +1,124 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
-EtcBlock4x4Encoding.cpp
-
-Block4x4Encoding is the abstract base class for the different encoders.  Each encoder targets a 
-particular file format (e.g. ETC1, RGB8, RGBA8, R11)
-
-*/
-
-#include "EtcConfig.h"
-#include "EtcBlock4x4Encoding.h"
-
-#include "EtcBlock4x4EncodingBits.h"
-#include "EtcBlock4x4.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-
-namespace Etc
-{
-	// ----------------------------------------------------------------------------------------------------
-	//
-	Block4x4Encoding::Block4x4Encoding(void)
-	{
-        Init();
-	}
-
-    void Block4x4Encoding::Init()
-    {
-        m_pblockParent = nullptr;
-
-        m_pafrgbaSource = nullptr;
-
-        m_fError = 0.0f;
-
-        m_mode = MODE_UNKNOWN;
-
-        m_uiEncodingIterations = 0;
-        m_boolDone = false;
-
-        for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-        {
-            m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(0.0f, 0.0f, 0.0f, 1.0f);
-        }
-    }
-
-	// ----------------------------------------------------------------------------------------------------
-	// initialize the generic encoding for a 4x4 block
-	// a_pblockParent points to the block associated with this encoding
-	// a_errormetric is used to choose the best encoding
-	// init the decoded pixels to -1 to mark them as undefined
-	// init the error to -1 to mark it as undefined
-	//
-	void Block4x4Encoding::Init(Block4x4 *a_pblockParent,
-								const ColorFloatRGBA *a_pafrgbaSource,
-								ErrorMetric a_errormetric,
-                                uint16_t iterationCount)
-	{
-        Init();
-        
-		m_pblockParent = a_pblockParent;
-		m_pafrgbaSource = a_pafrgbaSource;
-        m_errormetric = a_errormetric;
-        
-        m_uiEncodingIterations = iterationCount;
-	}
-
-    // ----------------------------------------------------------------------------------------------------
-
-    void Block4x4Encoding::SetDoneIfPerfect()
-    {
-        float kErrorTolerance = 0.0f;
-
-        // instead of comparing to 0 which is almost never achieved in float,
-        // use a normalized 8-bit tolerance.  See A8 and R11 code for kErrorTolerance.
-#define ADD_TOLERANCE 1
-#if ADD_TOLERANCE
-        // 16 pixels accumulated, all within 1/255 of final value, and then weights
-        static const float kErrorToleranceRec709 = (1.0f / 255.0f) * (1.0f / 255.0f) * 5.0f * 16.0f;
-        static const float kErrorToleranceNumeric = (1.0f / 255.0f) * (1.0f / 255.0f) * 3.0f * 16.0f;
-        static const float kErrorToleranceGray = (1.0f / 255.0f) * (1.0f / 255.0f) * 1.0f * 16.0f;
-        
-        switch(m_errormetric)
-        {
-            case ErrorMetric::GRAY:
-                kErrorTolerance = kErrorToleranceGray;
-                break;
-            case ErrorMetric::NUMERIC:
-                kErrorTolerance = kErrorToleranceNumeric;
-                break;
-            case ErrorMetric::REC709:
-                kErrorTolerance = kErrorToleranceRec709;
-                break;
-        }
-#endif
-        
-        assert(m_fError >= 0.0f);
-        if (m_fError <= kErrorTolerance)
-        {
-            m_boolDone = true;
-        }
-    }
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-
-} // namespace Etc
-
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+EtcBlock4x4Encoding.cpp
+
+Block4x4Encoding is the abstract base class for the different encoders.  Each encoder targets a 
+particular file format (e.g. ETC1, RGB8, RGBA8, R11)
+
+*/
+
+#include "EtcConfig.h"
+#include "EtcBlock4x4Encoding.h"
+
+#include "EtcBlock4x4EncodingBits.h"
+#include "EtcBlock4x4.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+namespace Etc
+{
+	// ----------------------------------------------------------------------------------------------------
+	//
+	Block4x4Encoding::Block4x4Encoding(void)
+	{
+        Init();
+	}
+
+    void Block4x4Encoding::Init()
+    {
+        m_pblockParent = nullptr;
+
+        m_pafrgbaSource = nullptr;
+
+        m_fError = 0.0f;
+
+        m_mode = MODE_UNKNOWN;
+
+        m_uiEncodingIterations = 0;
+        m_boolDone = false;
+
+        for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+        {
+            m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(0.0f, 0.0f, 0.0f, 1.0f);
+        }
+    }
+
+	// ----------------------------------------------------------------------------------------------------
+	// initialize the generic encoding for a 4x4 block
+	// a_pblockParent points to the block associated with this encoding
+	// a_errormetric is used to choose the best encoding
+	// init the decoded pixels to -1 to mark them as undefined
+	// init the error to -1 to mark it as undefined
+	//
+	void Block4x4Encoding::Init(Block4x4 *a_pblockParent,
+								const ColorFloatRGBA *a_pafrgbaSource,
+								ErrorMetric a_errormetric,
+                                uint16_t iterationCount)
+	{
+        Init();
+        
+		m_pblockParent = a_pblockParent;
+		m_pafrgbaSource = a_pafrgbaSource;
+        m_errormetric = a_errormetric;
+        
+        m_uiEncodingIterations = iterationCount;
+	}
+
+    // ----------------------------------------------------------------------------------------------------
+
+    void Block4x4Encoding::SetDoneIfPerfect()
+    {
+        float kErrorTolerance = 0.0f;
+
+        // instead of comparing to 0 which is almost never achieved in float,
+        // use a normalized 8-bit tolerance.  See A8 and R11 code for kErrorTolerance.
+#define ADD_TOLERANCE 1
+#if ADD_TOLERANCE
+        // 16 pixels accumulated, all within 1/255 of final value, and then weights
+        static const float kErrorToleranceRec709 = (1.0f / 255.0f) * (1.0f / 255.0f) * 5.0f * 16.0f;
+        static const float kErrorToleranceNumeric = (1.0f / 255.0f) * (1.0f / 255.0f) * 3.0f * 16.0f;
+        static const float kErrorToleranceGray = (1.0f / 255.0f) * (1.0f / 255.0f) * 1.0f * 16.0f;
+        
+        switch(m_errormetric)
+        {
+            case ErrorMetric::GRAY:
+                kErrorTolerance = kErrorToleranceGray;
+                break;
+            case ErrorMetric::NUMERIC:
+                kErrorTolerance = kErrorToleranceNumeric;
+                break;
+            case ErrorMetric::REC709:
+                kErrorTolerance = kErrorToleranceRec709;
+                break;
+        }
+#endif
+        
+        assert(m_fError >= 0.0f);
+        if (m_fError <= kErrorTolerance)
+        {
+            m_boolDone = true;
+        }
+    }
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+
+} // namespace Etc
+
diff --git a/libkram/etc2comp/EtcBlock4x4Encoding.h b/libkram/etc2comp/EtcBlock4x4Encoding.h
index 33bfe880..91f0cf6a 100644
--- a/libkram/etc2comp/EtcBlock4x4Encoding.h
+++ b/libkram/etc2comp/EtcBlock4x4Encoding.h
@@ -1,220 +1,220 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "EtcColorFloatRGBA.h"
-
-#include "EtcErrorMetric.h"
-
-#include <assert.h>
-#include <float.h>
-
-namespace Etc
-{
-	class Block4x4;
-
-	// abstract base class only for rgb/a encodings
-	class Block4x4Encoding
-	{
-	public:
-
-		static const int ROWS = 4;
-		static const int COLUMNS = 4;
-		static const int PIXELS = ROWS * COLUMNS;
-		
-		typedef enum
-		{
-			MODE_UNKNOWN,
-			//
-			MODE_ETC1,
-			MODE_T,
-			MODE_H,
-			MODE_PLANAR,
-			MODES
-		} Mode;
-
-		Block4x4Encoding(void);
-		virtual ~Block4x4Encoding(void) {}
-        
-		virtual void Encode(Block4x4 *a_pblockParent,
-                            const ColorFloatRGBA *a_pafrgbaSource,
-                            unsigned char *a_paucEncodingBits,
-                            ErrorMetric a_errormetric) = 0;
-
-		virtual void Decode(Block4x4 *a_pblockParent,
-                            unsigned char *a_paucEncodingBits,
-                            const ColorFloatRGBA *a_pafrgbaSource,
-                            ErrorMetric a_errormetric,
-                            uint16_t iterationCount) = 0;
-
-        // is is only called on S/RGBA format to copy alpha into decoded pixels of encoding
-        virtual void DecodeAlpha() { }
-        
-		// perform an iteration of the encoding
-		// the first iteration must generate a complete, valid (if poor) encoding
-		virtual void PerformIteration(float a_fEffort) = 0;
-
-        // write output block
-        virtual void SetEncodingBits(void) = 0;
-
-        // the count of the last iteration, can be useful in multipass encoding
-        inline uint16_t GetIterationCount(void) const
-        {
-            return m_uiEncodingIterations;
-        }
-        
-        //-------------------
-        
-		//void CalcBlockError(void);
-        //float CalcPixelError(const ColorFloatRGBA& a_frgbaDecodedColor, int uiPixel) const;
-        
-		inline float GetError(void) const
-		{
-			return m_fError;
-		}
-
-		inline Mode GetMode(void) const
-		{
-			return m_mode;
-		}
-
-		inline bool IsDone(void) const
-		{
-			return m_boolDone;
-		}
-
-        void SetDoneIfPerfect();
-
-		
-        inline const ColorFloatRGBA& GetDecodedPixel(int uiPixel) const
-        {
-            return m_afrgbaDecodedColors[uiPixel];
-        }
-
-        // CalcPixelError is a major hotspot. Called in inner loops.
-        // calculate the error between the source pixel and the decoded pixel
-        // the error amount is base on the error metric
-        inline float CalcPixelError(const ColorFloatRGBA& encodedPixel,
-                                                int uiPixel) const
-        {
-
-            const ColorFloatRGBA& sourcePixel = m_pafrgbaSource[uiPixel];
-            float error = 0.0f;
-            
-            // don't use alpha in any calcs.  This is only RGB error.
-            
-            switch(m_errormetric)
-            {
-
-            case ErrorMetric::GRAY:
-            {
-                error = encodedPixel.fR - sourcePixel.fR;
-                error *= error;
-                
-                break;
-            }
-                    
-            case ErrorMetric::REC709:
-            case ErrorMetric::NUMERIC:
-            {
-                float fDX = encodedPixel.fR - sourcePixel.fR;
-                float fDY = encodedPixel.fG - sourcePixel.fG;
-                float fDZ = encodedPixel.fB - sourcePixel.fB;
-                            
-                error = fDX*fDX + fDY*fDY + fDZ*fDZ;
-                break;
-            }
-                    
-            /* This slows down encoding 28s vs. 20s when not inlined, so stop using it
-                also the src isn't cached.
-             
-            case ErrorMetric::REC709:
-            {
-                //assert(a_fDecodedAlpha >= 0.0f);
-
-                // YCbCr of source and encodedColor
-                // TODO: could cache sourcePixel values to move out of loops
-                float fLuma1 = sourcePixel.fR*0.2126f + sourcePixel.fG*0.7152f + sourcePixel.fB*0.0722f;
-                float fChromaR1 = (sourcePixel.fR - fLuma1) * (0.5f / (1.0f - 0.2126f));
-                float fChromaB1 = (sourcePixel.fB - fLuma1) * (0.5f / (1.0f - 0.0722f));
-
-                float fLuma2 = encodedPixel.fR*0.2126f + encodedPixel.fG*0.7152f + encodedPixel.fB*0.0722f;
-                float fChromaR2 = (encodedPixel.fR - fLuma2) * (0.5f / (1.0f - 0.2126f));
-                float fChromaB2 = (encodedPixel.fB - fLuma2) * (0.5f / (1.0f - 0.0722f));
-
-                float fDeltaL = fLuma1 - fLuma2;
-                float fDeltaCr = fChromaR1 - fChromaR2;
-                float fDeltaCb = fChromaB1 - fChromaB2;
-
-                const float LUMA_WEIGHT = 3.0f;
-                const float CHROMA_RED_WEIGHT = 1.0f;
-                const float CHROMA_BLUE_WEIGHT = 1.0f;
-               
-                // Favor Luma accuracy over Chroma
-                error = LUMA_WEIGHT * fDeltaL*fDeltaL +
-                        CHROMA_RED_WEIGHT * fDeltaCr*fDeltaCr +
-                        CHROMA_BLUE_WEIGHT * fDeltaCb*fDeltaCb;
-        
-                break;
-            }
-            */
-            
-            }
-            
-            return error;
-        }
-        
-        // CalcBlockError is a major hotspot. Called in inner loops.
-        // calculate the error for the block by summing the pixel errors
-        inline void CalcBlockError(void)
-        {
-            m_fError = 0.0f;
-
-            if (m_pafrgbaSource)
-            {
-                for (int uiPixel = 0; uiPixel < (int)PIXELS; uiPixel++)
-                {
-                    m_fError += CalcPixelError(m_afrgbaDecodedColors[uiPixel], uiPixel);
-                }
-            }
-        }
-        
-	protected:
-        void Init(Block4x4 *a_pblockParent,
-					const ColorFloatRGBA *a_pafrgbaSource,
-					ErrorMetric a_errormetric,
-                    uint16_t iterationCount);
-
-		Block4x4		*m_pblockParent;
-		const ColorFloatRGBA	*m_pafrgbaSource;
-
-		ColorFloatRGBA	m_afrgbaDecodedColors[PIXELS];	// decoded RGB components, ignore Alpha
-		float			m_fError;						// error for RGB relative to m_pafrgbaSource.rgb
-
-		// intermediate encoding
-		Mode			m_mode;
-
-		unsigned int	m_uiEncodingIterations;
-		bool			m_boolDone;						// all iterations have been done
-		ErrorMetric		m_errormetric;
-
-	private:
-        void Init();
-        
-	};
-
-} // namespace Etc
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "EtcColorFloatRGBA.h"
+
+#include "EtcErrorMetric.h"
+
+#include <assert.h>
+#include <float.h>
+
+namespace Etc
+{
+	class Block4x4;
+
+	// abstract base class only for rgb/a encodings
+	class Block4x4Encoding
+	{
+	public:
+
+		static const int ROWS = 4;
+		static const int COLUMNS = 4;
+		static const int PIXELS = ROWS * COLUMNS;
+		
+		typedef enum
+		{
+			MODE_UNKNOWN,
+			//
+			MODE_ETC1,
+			MODE_T,
+			MODE_H,
+			MODE_PLANAR,
+			MODES
+		} Mode;
+
+		Block4x4Encoding(void);
+		virtual ~Block4x4Encoding(void) {}
+        
+		virtual void Encode(Block4x4 *a_pblockParent,
+                            const ColorFloatRGBA *a_pafrgbaSource,
+                            unsigned char *a_paucEncodingBits,
+                            ErrorMetric a_errormetric) = 0;
+
+		virtual void Decode(Block4x4 *a_pblockParent,
+                            unsigned char *a_paucEncodingBits,
+                            const ColorFloatRGBA *a_pafrgbaSource,
+                            ErrorMetric a_errormetric,
+                            uint16_t iterationCount) = 0;
+
+        // is is only called on S/RGBA format to copy alpha into decoded pixels of encoding
+        virtual void DecodeAlpha() { }
+        
+		// perform an iteration of the encoding
+		// the first iteration must generate a complete, valid (if poor) encoding
+		virtual void PerformIteration(float a_fEffort) = 0;
+
+        // write output block
+        virtual void SetEncodingBits(void) = 0;
+
+        // the count of the last iteration, can be useful in multipass encoding
+        inline uint16_t GetIterationCount(void) const
+        {
+            return m_uiEncodingIterations;
+        }
+        
+        //-------------------
+        
+		//void CalcBlockError(void);
+        //float CalcPixelError(const ColorFloatRGBA& a_frgbaDecodedColor, int uiPixel) const;
+        
+		inline float GetError(void) const
+		{
+			return m_fError;
+		}
+
+		inline Mode GetMode(void) const
+		{
+			return m_mode;
+		}
+
+		inline bool IsDone(void) const
+		{
+			return m_boolDone;
+		}
+
+        void SetDoneIfPerfect();
+
+		
+        inline const ColorFloatRGBA& GetDecodedPixel(int uiPixel) const
+        {
+            return m_afrgbaDecodedColors[uiPixel];
+        }
+
+        // CalcPixelError is a major hotspot. Called in inner loops.
+        // calculate the error between the source pixel and the decoded pixel
+        // the error amount is base on the error metric
+        inline float CalcPixelError(const ColorFloatRGBA& encodedPixel,
+                                                int uiPixel) const
+        {
+
+            const ColorFloatRGBA& sourcePixel = m_pafrgbaSource[uiPixel];
+            float error = 0.0f;
+            
+            // don't use alpha in any calcs.  This is only RGB error.
+            
+            switch(m_errormetric)
+            {
+
+            case ErrorMetric::GRAY:
+            {
+                error = encodedPixel.fR - sourcePixel.fR;
+                error *= error;
+                
+                break;
+            }
+                    
+            case ErrorMetric::REC709:
+            case ErrorMetric::NUMERIC:
+            {
+                float fDX = encodedPixel.fR - sourcePixel.fR;
+                float fDY = encodedPixel.fG - sourcePixel.fG;
+                float fDZ = encodedPixel.fB - sourcePixel.fB;
+                            
+                error = fDX*fDX + fDY*fDY + fDZ*fDZ;
+                break;
+            }
+                    
+            /* This slows down encoding 28s vs. 20s when not inlined, so stop using it
+                also the src isn't cached.
+             
+            case ErrorMetric::REC709:
+            {
+                //assert(a_fDecodedAlpha >= 0.0f);
+
+                // YCbCr of source and encodedColor
+                // TODO: could cache sourcePixel values to move out of loops
+                float fLuma1 = sourcePixel.fR*0.2126f + sourcePixel.fG*0.7152f + sourcePixel.fB*0.0722f;
+                float fChromaR1 = (sourcePixel.fR - fLuma1) * (0.5f / (1.0f - 0.2126f));
+                float fChromaB1 = (sourcePixel.fB - fLuma1) * (0.5f / (1.0f - 0.0722f));
+
+                float fLuma2 = encodedPixel.fR*0.2126f + encodedPixel.fG*0.7152f + encodedPixel.fB*0.0722f;
+                float fChromaR2 = (encodedPixel.fR - fLuma2) * (0.5f / (1.0f - 0.2126f));
+                float fChromaB2 = (encodedPixel.fB - fLuma2) * (0.5f / (1.0f - 0.0722f));
+
+                float fDeltaL = fLuma1 - fLuma2;
+                float fDeltaCr = fChromaR1 - fChromaR2;
+                float fDeltaCb = fChromaB1 - fChromaB2;
+
+                const float LUMA_WEIGHT = 3.0f;
+                const float CHROMA_RED_WEIGHT = 1.0f;
+                const float CHROMA_BLUE_WEIGHT = 1.0f;
+               
+                // Favor Luma accuracy over Chroma
+                error = LUMA_WEIGHT * fDeltaL*fDeltaL +
+                        CHROMA_RED_WEIGHT * fDeltaCr*fDeltaCr +
+                        CHROMA_BLUE_WEIGHT * fDeltaCb*fDeltaCb;
+        
+                break;
+            }
+            */
+            
+            }
+            
+            return error;
+        }
+        
+        // CalcBlockError is a major hotspot. Called in inner loops.
+        // calculate the error for the block by summing the pixel errors
+        inline void CalcBlockError(void)
+        {
+            m_fError = 0.0f;
+
+            if (m_pafrgbaSource)
+            {
+                for (int uiPixel = 0; uiPixel < (int)PIXELS; uiPixel++)
+                {
+                    m_fError += CalcPixelError(m_afrgbaDecodedColors[uiPixel], uiPixel);
+                }
+            }
+        }
+        
+	protected:
+        void Init(Block4x4 *a_pblockParent,
+					const ColorFloatRGBA *a_pafrgbaSource,
+					ErrorMetric a_errormetric,
+                    uint16_t iterationCount);
+
+		Block4x4		*m_pblockParent;
+		const ColorFloatRGBA	*m_pafrgbaSource;
+
+		ColorFloatRGBA	m_afrgbaDecodedColors[PIXELS];	// decoded RGB components, ignore Alpha
+		float			m_fError;						// error for RGB relative to m_pafrgbaSource.rgb
+
+		// intermediate encoding
+		Mode			m_mode;
+
+		unsigned int	m_uiEncodingIterations;
+		bool			m_boolDone;						// all iterations have been done
+		ErrorMetric		m_errormetric;
+
+	private:
+        void Init();
+        
+	};
+
+} // namespace Etc
diff --git a/libkram/etc2comp/EtcBlock4x4Encoding_R11.cpp b/libkram/etc2comp/EtcBlock4x4Encoding_R11.cpp
index cb37505c..957967ba 100644
--- a/libkram/etc2comp/EtcBlock4x4Encoding_R11.cpp
+++ b/libkram/etc2comp/EtcBlock4x4Encoding_R11.cpp
@@ -1,544 +1,544 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
-EtcBlock4x4Encoding_R11.cpp
-
-Block4x4Encoding_R11 is the encoder to use when targetting file format R11 and SR11 (signed R11).  
-
-*/
-
-#include "EtcConfig.h"
-#include "EtcBlock4x4Encoding_R11.h"
-
-#include "EtcBlock4x4EncodingBits.h"
-//#include "EtcBlock4x4.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-#include <float.h>
-#include <limits>
-//#include <algorithm>
-
-namespace Etc
-{
-    template<typename T>
-    T clamp(T value, T mn, T mx) {
-        return (value <= mn) ? mn : ((value >= mx) ? mx : value);
-    }
-
-    const int MODIFIER_TABLE_ENTRYS = 16;
-    const int SELECTOR_BITS = 3;
-    const int SELECTORS = 1 << SELECTOR_BITS;
-
-    // modifier values to use for R11, SR11, RG11 and SRG11
-    const int8_t s_modifierTable8[MODIFIER_TABLE_ENTRYS][SELECTORS]
-    {
-        { -3, -6,  -9, -15, 2, 5, 8, 14 },
-        { -3, -7, -10, -13, 2, 6, 9, 12 },
-        { -2, -5,  -8, -13, 1, 4, 7, 12 },
-        { -2, -4,  -6, -13, 1, 3, 5, 12 },
-
-        { -3, -6,  -8, -12, 2, 5, 7, 11 },
-        { -3, -7,  -9, -11, 2, 6, 8, 10 },
-        { -4, -7,  -8, -11, 3, 6, 7, 10 },
-        { -3, -5,  -8, -11, 2, 4, 7, 10 },
-
-        { -2, -6,  -8, -10, 1, 5, 7,  9 },
-        { -2, -5,  -8, -10, 1, 4, 7,  9 },
-        { -2, -4,  -8, -10, 1, 3, 7,  9 },
-        { -2, -5,  -7, -10, 1, 4, 6,  9 },
-
-        { -3, -4,  -7, -10, 2, 3, 6,  9 },
-        { -1, -2,  -3, -10, 0, 1, 2,  9 },
-        { -4, -6,  -8,  -9, 3, 5, 7,  8 },
-        { -3, -5,  -7,  -9, 2, 4, 6,  8 }
-    };
-
-    // this is simplified for interation
-    // stripped down, since it's one of the hotspots of encoding
-    inline int DecodePixelRedInt(int baseMul8Plus4, int multiplier, int modifier)
-    {
-        int pixel = baseMul8Plus4 + modifier * multiplier;
-        
-        // see here
-        // https://www.khronos.org/registry/DataFormat/specs/1.1/dataformat.1.1.html
-        
-//        if (multiplier > 0)
-//        {
-//            //fPixel = (a_fBase * 8 + 4) + 8 * fModifier * a_fMultiplier;
-//            pixel = baseMul8Plus4 + 8 * modifier * multiplier;
-//        }
-//        else
-//        {
-//            //fPixel = (a_fBase * 8 + 4) + fModifier;
-//            pixel = baseMul8Plus4 + modifier;
-//        }
-        
-        // just to debug over range pixels
-//        if (pixel < 0 || pixel > 2047)
-//        {
-//            int bp = 0;
-//            bp = bp;
-//        }
-        
-        // modifier and multiplier can push base outside valid range, but hw clamps
-        pixel = clamp(pixel, 0, 2047);
-        return pixel;
-    }
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-	Block4x4Encoding_R11::Block4x4Encoding_R11(void)
-	{
-		m_pencodingbitsR11 = nullptr;
-	}
-
-	Block4x4Encoding_R11::~Block4x4Encoding_R11(void) {}
-
-	// ----------------------------------------------------------------------------------------------------
-	void Block4x4Encoding_R11::Encode(
-		const float *sourcePixels,
-		uint8_t *encodingBits,
-        bool isSnorm
-    )
-	{
-        int numSourceChannels = 4; // advance by 4 floats
-        
-        int fMinRed = 2047;
-        int fMaxRed = 0;
-                        
-        // assumption of unorm float data for sourcePixels here
-        for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-        {
-            int fRed = clamp((int)roundf(2047.0f * sourcePixels[numSourceChannels * uiPixel]), 0, 2047);
-                        
-            if (fRed < fMinRed)
-            {
-                fMinRed = fRed;
-            }
-            if (fRed > fMaxRed)
-            {
-                fMaxRed = fRed;
-            }
-                        
-            m_srcPixels[uiPixel] = fRed;
-        }
-        
-        m_redMin = fMinRed;
-        m_redMax = fMaxRed;
-        
-        // now setup for iteration
-        m_uiEncodingIterations = 0;
-        m_fError = FLT_MAX;
-        m_isDone = false;
-        m_isSnorm = isSnorm;
-        
-		m_pencodingbitsR11 = (Block4x4EncodingBits_R11 *)encodingBits;
-    }
-
-	// ----------------------------------------------------------------------------------------------------
-	void Block4x4Encoding_R11::Decode(
-		uint8_t *encodingBits,
-		const float *sourcePixels,
-        bool isSnorm,
-        uint16_t lastIterationCount
-    )
-	{
-        m_isDone = (lastIterationCount & 0x80) != 0; // done high bit
-        
-        if (m_isDone)
-        {
-            m_pencodingbitsR11 = nullptr; // skip decode/encode on partially done block
-            m_fError = 0.0f;
-            return;
-        }
-    
-        m_uiEncodingIterations = lastIterationCount;
-        
-        // everything is re-established from the encoded block and iteration count
-        // since we already have to allocate the block storage, an iteration count per block is only additional
-        // also encoders are now across all blocks, so could just allocate one block per thread and iterate until
-        // done and skip the priority system.
-        //
-        // Note: don't call this on done blocks and then iterate, or iteration count will advance
-        // m_isDone is set to false in the Encode. Priority queue should ignore done blocks already.
-        
-		m_pencodingbitsR11 = (Block4x4EncodingBits_R11 *)encodingBits;
-        m_isSnorm = isSnorm;
-        
-        if (m_isSnorm)
-        {
-            m_redBase = (int8_t)m_pencodingbitsR11->data.base + 128;
-        }
-        else
-        {
-            m_redBase = (uint8_t)m_pencodingbitsR11->data.base;
-        }
-                        
-        m_redMultiplier = m_pencodingbitsR11->data.multiplier;
-        m_redModifierTableIndex = m_pencodingbitsR11->data.table;
-
-        uint64_t selectorBits = 0;
-        selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors0 << (uint64_t)40;
-        selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors1 << (uint64_t)32;
-        selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors2 << (uint64_t)24;
-        selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors3 << (uint64_t)16;
-        selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors4 << (uint64_t)8;
-        selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors5;
-                        
-        for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-        {
-            uint64_t uiShift = 45 - (3 * uiPixel);
-            m_redSelectors[uiPixel] = (selectorBits >> uiShift) & (uint64_t)(SELECTORS - 1);
-        }
-
-        // call this to continue encoding later iterations
-        Encode(sourcePixels, encodingBits, isSnorm);
-    
-        // recompute the block error by decoding each pixel
-        // could save out error to SortedBlock avoid needing to compute all this
-        // but would need to store r and g error separately.
-        int blockError = 0;
-        
-        int baseForDecode = m_redBase * 8 + 4;
-        int multiplierForDecode = (m_redMultiplier == 0) ? 1 : (8 * m_redMultiplier);
-        
-        for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-        {
-            int modifier = s_modifierTable8[m_redModifierTableIndex][m_redSelectors[uiPixel]];
-           
-            int decodedPixelData = DecodePixelRedInt(baseForDecode, multiplierForDecode, modifier);
-            
-            // add up the error
-            int error = decodedPixelData - m_srcPixels[uiPixel];
-            blockError += error * error;
-        }
-    
-        m_fError = (float)blockError;
-    }
-        
-    void Block4x4Encoding_R11::DecodeOnly(
-        const uint8_t *encodingBits,
-        float *decodedPixels,
-        bool isSnorm)
-    {
-        m_pencodingbitsR11 = (Block4x4EncodingBits_R11 *)encodingBits;
-        m_isSnorm = isSnorm;
-        
-        if (m_isSnorm)
-        {
-            m_redBase = (int8_t)m_pencodingbitsR11->data.base + 128;
-        }
-        else
-        {
-            m_redBase = (uint8_t)m_pencodingbitsR11->data.base;
-        }
-                        
-        m_redMultiplier = m_pencodingbitsR11->data.multiplier;
-        m_redModifierTableIndex = m_pencodingbitsR11->data.table;
-
-        uint64_t selectorBits = 0;
-        selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors0 << (uint64_t)40;
-        selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors1 << (uint64_t)32;
-        selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors2 << (uint64_t)24;
-        selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors3 << (uint64_t)16;
-        selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors4 << (uint64_t)8;
-        selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors5;
-                        
-        for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-        {
-            uint64_t uiShift = 45 - (3 * uiPixel);
-            m_redSelectors[uiPixel] = (selectorBits >> uiShift) & (uint64_t)(SELECTORS - 1);
-        }
-        
-        // now extract the pixels from the block values above
-        int numChannels = 4;
-        
-        int baseForDecode = m_redBase * 8 + 4;
-        int multiplierForDecode = (m_redMultiplier == 0) ? 1 : (8 * m_redMultiplier);
-        
-        for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-        {
-            int modifier = s_modifierTable8[m_redModifierTableIndex][m_redSelectors[uiPixel]];
-            
-            int decodedPixelData = DecodePixelRedInt(baseForDecode, multiplierForDecode, modifier);
-            
-            decodedPixels[uiPixel * numChannels] = decodedPixelData / 2047.0f;
-        }
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	
-    // 16 pixels x 1 unit squared out of 2047
-    const float kErrorTolerance = 16 * 1 * 1;
-
-    void Block4x4Encoding_R11::PerformIteration(float a_fEffort)
-	{
-        if (m_pencodingbitsR11 == nullptr)
-        {
-            return;
-        }
-        
-        if (m_isDone)
-        {
-            return;
-        }
-		
-		switch (m_uiEncodingIterations)
-		{
-		case 0:
-			CalculateR11(8, 0, 0);
-			break;
-
-		case 1:
-			CalculateR11(8, 2, 1);
-			if (a_fEffort <= 24.5f) // TODO: decouple effort from this, this is more of an iteration quality
-			{
-                m_isDone = true;
-            }
-			break;
-
-		case 2:
-			CalculateR11(8, 12, 1);
-			if (a_fEffort <= 49.5f)
-			{
-                m_isDone = true;
-            }
-			break;
-
-		case 3:
-			CalculateR11(7, 6, 1);
-			break;
-
-		case 4:
-			CalculateR11(6, 3, 1);
-			break;
-
-		case 5:
-			CalculateR11(5, 1, 0);
-			m_isDone = true;
-            break;
-		}
-
-        // advance to next iteration
-        if (!m_isDone)
-        {
-            if (m_fError < kErrorTolerance)
-            {
-                m_isDone = true;
-            }
-            else
-            {
-                m_uiEncodingIterations++;
-            }
-        }
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	
-    // find the best combination of base color, multiplier and selectors
-	void Block4x4Encoding_R11::CalculateR11(unsigned int a_uiSelectorsUsed,
-												int a_fBaseRadius, int a_fMultiplierRadius)
-	{
-		// maps from virtual (monotonic) selector to ETC selector
-		static const uint8_t auiVirtualSelectorMap[8] = {3, 2, 1, 0, 4, 5, 6, 7};
-
-        // don't search any extra radius if range is 0
-        // TODO: there's probably an instant "done" case here without any iteration
-		int fRedRange = (m_redMax - m_redMin);
-        
-        if (fRedRange == 0)
-        {
-            a_fBaseRadius = 0;
-            a_fMultiplierRadius = 0;
-        }
-        
-        // 16 x 8 x 3 x 16 x 16 x 8 iterations = 786K iteraatins / block worst case
-        
-      	// try each modifier table entry
-        // 16 of these
-		for (int uiTableEntry = 0; uiTableEntry < MODIFIER_TABLE_ENTRYS; uiTableEntry++)
-		{
-            // up to 8 of these
-			for (int uiMinVirtualSelector = 0;
-					uiMinVirtualSelector <= (int)(8 - a_uiSelectorsUsed);
-					uiMinVirtualSelector++)
-			{
-				int uiMaxVirtualSelector = uiMinVirtualSelector + a_uiSelectorsUsed - 1;
-
-				int uiMinSelector = auiVirtualSelectorMap[uiMinVirtualSelector];
-				int uiMaxSelector = auiVirtualSelectorMap[uiMaxVirtualSelector];
-
-				int fTableEntryCenter = -s_modifierTable8[uiTableEntry][uiMinSelector];
-
-				int fTableEntryRange = s_modifierTable8[uiTableEntry][uiMaxSelector] -
-                                       s_modifierTable8[uiTableEntry][uiMinSelector];
-
-                float fCenterRatio = fTableEntryCenter / (float)fTableEntryRange;
-                float fCenter = m_redMin + fCenterRatio * fRedRange;
-				int fCenterInt = (int)roundf((255.0f/2047.0f) * fCenter);
-
-                // base of 0 to 255 maps to 0 to 2047
-                // search a radius of values off center of range
-                int fMinBase = fCenterInt - a_fBaseRadius;
-                int fMaxBase = fCenterInt + a_fBaseRadius;
-                if (fMinBase < 0)
-                {
-                    fMinBase = 0;
-                }
-				if (fMaxBase > 255)
-				{
-					fMaxBase = 255;
-				}
-
-                // 255 / up to 29
-                int fRangeMultiplier = (int)roundf((fRedRange * (255.0 / 2047.0f)) / fTableEntryRange);
-
-                int fMinMultiplier = clamp(fRangeMultiplier - a_fMultiplierRadius, 0, 15); // yes, 0
-                int fMaxMultiplier = clamp(fRangeMultiplier + a_fMultiplierRadius, 1, 15);
-                
-                // find best selector for each pixel
-                uint8_t bestSelectors[PIXELS];
-                int bestRedError[PIXELS];
-                
-                // only for debug
-                //int bestPixelRed[PIXELS];
-                
-                // up to 3 of these
-				for (int fBase = fMinBase; fBase <= fMaxBase; fBase++)
-				{
-                    int baseForDecode = fBase * 8 + 4;
-                    
-                    // up to 16 of these
-					for (int fMultiplier = fMinMultiplier; fMultiplier <= fMaxMultiplier; fMultiplier++)
-					{
-                        int multiplierForDecode = (fMultiplier == 0) ? 1 : (8 * fMultiplier);
-                        
-                        // 16 of these
-						for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-						{
-                            int bestPixelError = 2047 * 2047;
-
-                            // 8 of these
-							for (int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
-							{
-                                int modifier = s_modifierTable8[uiTableEntry][uiSelector];
-                                
-								int fPixelRed = DecodePixelRedInt(baseForDecode, multiplierForDecode, modifier);
-
-                                int error = fPixelRed - (int)m_srcPixels[uiPixel];
-                                error *= error;
-                                
-                                // this is guaranteed to pick one selector for every pixel
-                                // the one with the lowest error.
-								if (error < bestPixelError)
-								{
-                                    bestPixelError = error;
-                                    bestRedError[uiPixel] = error;
-                                    bestSelectors[uiPixel] = uiSelector;
-									
-									//bestPixelRed[uiPixel] = fPixelRed;
-								}
-							}
-						}
-                        
-                        // accumulate all best pixel error into block error total
-						int blockError = 0;
-						for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-						{
-                            blockError += bestRedError[uiPixel];
-						}
-                        
-                        // pick that if it's the smallest error
-                        if (m_fError > (float)blockError)
-						{
-                            m_fError = (float)blockError;
-                           
-							if (m_isSnorm)
-							{
-								m_redBase = fBase - 128;
-							}
-							else
-							{
-                                m_redBase = fBase;
-							}
-							m_redMultiplier = fMultiplier;
-							m_redModifierTableIndex = uiTableEntry;
-
-                            for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-							{
-								m_redSelectors[uiPixel] = bestSelectors[uiPixel];
-                               
-                                // nothing looks at this data, but useful to compare to source
-                                //m_decodedPixels[uiPixel] = bestPixelRed[uiPixel]; //  / 2047.0f;
-							}
-                            
-                            
-                            // compare to tolerance, since reaching 0 is difficult in float
-                            if (m_fError <= kErrorTolerance)
-                            {
-                                return;
-                            }
-						}
-					}
-				}
-
-			}
-		}
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the encoding bits based on encoding state
-	//
-	void Block4x4Encoding_R11::SetEncodingBits(void)
-	{
-        // skip encode if block is already done
-        if (m_pencodingbitsR11 == nullptr)
-        {
-            return;
-        }
-        
-        if (m_isSnorm)
-		{
-			m_pencodingbitsR11->data.base = (int8_t)m_redBase;
-		}
-		else
-		{
-            m_pencodingbitsR11->data.base = (uint8_t)m_redBase;
-		}
-		m_pencodingbitsR11->data.table = m_redModifierTableIndex;
-		m_pencodingbitsR11->data.multiplier = m_redMultiplier;
-
-		uint64_t selectorBits = 0;
-		for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			uint64_t uiShift = 45 - (3 * uiPixel);
-			selectorBits |= ((uint64_t)m_redSelectors[uiPixel]) << uiShift;
-		}
-
-		m_pencodingbitsR11->data.selectors0 = uint32_t(selectorBits >> (uint64_t)40);
-		m_pencodingbitsR11->data.selectors1 = uint32_t(selectorBits >> (uint64_t)32);
-		m_pencodingbitsR11->data.selectors2 = uint32_t(selectorBits >> (uint64_t)24);
-		m_pencodingbitsR11->data.selectors3 = uint32_t(selectorBits >> (uint64_t)16);
-		m_pencodingbitsR11->data.selectors4 = uint32_t(selectorBits >> (uint64_t)8);
-		m_pencodingbitsR11->data.selectors5 = uint32_t(selectorBits);
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-}
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+EtcBlock4x4Encoding_R11.cpp
+
+Block4x4Encoding_R11 is the encoder to use when targetting file format R11 and SR11 (signed R11).  
+
+*/
+
+#include "EtcConfig.h"
+#include "EtcBlock4x4Encoding_R11.h"
+
+#include "EtcBlock4x4EncodingBits.h"
+//#include "EtcBlock4x4.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <float.h>
+#include <limits>
+//#include <algorithm>
+
+namespace Etc
+{
+    template<typename T>
+    T clamp(T value, T mn, T mx) {
+        return (value <= mn) ? mn : ((value >= mx) ? mx : value);
+    }
+
+    const int MODIFIER_TABLE_ENTRYS = 16;
+    const int SELECTOR_BITS = 3;
+    const int SELECTORS = 1 << SELECTOR_BITS;
+
+    // modifier values to use for R11, SR11, RG11 and SRG11
+    const int8_t s_modifierTable8[MODIFIER_TABLE_ENTRYS][SELECTORS]
+    {
+        { -3, -6,  -9, -15, 2, 5, 8, 14 },
+        { -3, -7, -10, -13, 2, 6, 9, 12 },
+        { -2, -5,  -8, -13, 1, 4, 7, 12 },
+        { -2, -4,  -6, -13, 1, 3, 5, 12 },
+
+        { -3, -6,  -8, -12, 2, 5, 7, 11 },
+        { -3, -7,  -9, -11, 2, 6, 8, 10 },
+        { -4, -7,  -8, -11, 3, 6, 7, 10 },
+        { -3, -5,  -8, -11, 2, 4, 7, 10 },
+
+        { -2, -6,  -8, -10, 1, 5, 7,  9 },
+        { -2, -5,  -8, -10, 1, 4, 7,  9 },
+        { -2, -4,  -8, -10, 1, 3, 7,  9 },
+        { -2, -5,  -7, -10, 1, 4, 6,  9 },
+
+        { -3, -4,  -7, -10, 2, 3, 6,  9 },
+        { -1, -2,  -3, -10, 0, 1, 2,  9 },
+        { -4, -6,  -8,  -9, 3, 5, 7,  8 },
+        { -3, -5,  -7,  -9, 2, 4, 6,  8 }
+    };
+
+    // this is simplified for interation
+    // stripped down, since it's one of the hotspots of encoding
+    inline int DecodePixelRedInt(int baseMul8Plus4, int multiplier, int modifier)
+    {
+        int pixel = baseMul8Plus4 + modifier * multiplier;
+        
+        // see here
+        // https://www.khronos.org/registry/DataFormat/specs/1.1/dataformat.1.1.html
+        
+//        if (multiplier > 0)
+//        {
+//            //fPixel = (a_fBase * 8 + 4) + 8 * fModifier * a_fMultiplier;
+//            pixel = baseMul8Plus4 + 8 * modifier * multiplier;
+//        }
+//        else
+//        {
+//            //fPixel = (a_fBase * 8 + 4) + fModifier;
+//            pixel = baseMul8Plus4 + modifier;
+//        }
+        
+        // just to debug over range pixels
+//        if (pixel < 0 || pixel > 2047)
+//        {
+//            int bp = 0;
+//            bp = bp;
+//        }
+        
+        // modifier and multiplier can push base outside valid range, but hw clamps
+        pixel = clamp(pixel, 0, 2047);
+        return pixel;
+    }
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	Block4x4Encoding_R11::Block4x4Encoding_R11(void)
+	{
+		m_pencodingbitsR11 = nullptr;
+	}
+
+	Block4x4Encoding_R11::~Block4x4Encoding_R11(void) {}
+
+	// ----------------------------------------------------------------------------------------------------
+	void Block4x4Encoding_R11::Encode(
+		const float *sourcePixels,
+		uint8_t *encodingBits,
+        bool isSnorm
+    )
+	{
+        int numSourceChannels = 4; // advance by 4 floats
+        
+        int fMinRed = 2047;
+        int fMaxRed = 0;
+                        
+        // assumption of unorm float data for sourcePixels here
+        for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+        {
+            int fRed = clamp((int)roundf(2047.0f * sourcePixels[numSourceChannels * uiPixel]), 0, 2047);
+                        
+            if (fRed < fMinRed)
+            {
+                fMinRed = fRed;
+            }
+            if (fRed > fMaxRed)
+            {
+                fMaxRed = fRed;
+            }
+                        
+            m_srcPixels[uiPixel] = fRed;
+        }
+        
+        m_redMin = fMinRed;
+        m_redMax = fMaxRed;
+        
+        // now setup for iteration
+        m_uiEncodingIterations = 0;
+        m_fError = FLT_MAX;
+        m_isDone = false;
+        m_isSnorm = isSnorm;
+        
+		m_pencodingbitsR11 = (Block4x4EncodingBits_R11 *)encodingBits;
+    }
+
+	// ----------------------------------------------------------------------------------------------------
+	void Block4x4Encoding_R11::Decode(
+		uint8_t *encodingBits,
+		const float *sourcePixels,
+        bool isSnorm,
+        uint16_t lastIterationCount
+    )
+	{
+        m_isDone = (lastIterationCount & 0x80) != 0; // done high bit
+        
+        if (m_isDone)
+        {
+            m_pencodingbitsR11 = nullptr; // skip decode/encode on partially done block
+            m_fError = 0.0f;
+            return;
+        }
+    
+        m_uiEncodingIterations = lastIterationCount;
+        
+        // everything is re-established from the encoded block and iteration count
+        // since we already have to allocate the block storage, an iteration count per block is only additional
+        // also encoders are now across all blocks, so could just allocate one block per thread and iterate until
+        // done and skip the priority system.
+        //
+        // Note: don't call this on done blocks and then iterate, or iteration count will advance
+        // m_isDone is set to false in the Encode. Priority queue should ignore done blocks already.
+        
+		m_pencodingbitsR11 = (Block4x4EncodingBits_R11 *)encodingBits;
+        m_isSnorm = isSnorm;
+        
+        if (m_isSnorm)
+        {
+            m_redBase = (int8_t)m_pencodingbitsR11->data.base + 128;
+        }
+        else
+        {
+            m_redBase = (uint8_t)m_pencodingbitsR11->data.base;
+        }
+                        
+        m_redMultiplier = m_pencodingbitsR11->data.multiplier;
+        m_redModifierTableIndex = m_pencodingbitsR11->data.table;
+
+        uint64_t selectorBits = 0;
+        selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors0 << (uint64_t)40;
+        selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors1 << (uint64_t)32;
+        selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors2 << (uint64_t)24;
+        selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors3 << (uint64_t)16;
+        selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors4 << (uint64_t)8;
+        selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors5;
+                        
+        for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+        {
+            uint64_t uiShift = 45 - (3 * uiPixel);
+            m_redSelectors[uiPixel] = (selectorBits >> uiShift) & (uint64_t)(SELECTORS - 1);
+        }
+
+        // call this to continue encoding later iterations
+        Encode(sourcePixels, encodingBits, isSnorm);
+    
+        // recompute the block error by decoding each pixel
+        // could save out error to SortedBlock avoid needing to compute all this
+        // but would need to store r and g error separately.
+        int blockError = 0;
+        
+        int baseForDecode = m_redBase * 8 + 4;
+        int multiplierForDecode = (m_redMultiplier == 0) ? 1 : (8 * m_redMultiplier);
+        
+        for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+        {
+            int modifier = s_modifierTable8[m_redModifierTableIndex][m_redSelectors[uiPixel]];
+           
+            int decodedPixelData = DecodePixelRedInt(baseForDecode, multiplierForDecode, modifier);
+            
+            // add up the error
+            int error = decodedPixelData - m_srcPixels[uiPixel];
+            blockError += error * error;
+        }
+    
+        m_fError = (float)blockError;
+    }
+        
+    void Block4x4Encoding_R11::DecodeOnly(
+        const uint8_t *encodingBits,
+        float *decodedPixels,
+        bool isSnorm)
+    {
+        m_pencodingbitsR11 = (Block4x4EncodingBits_R11 *)encodingBits;
+        m_isSnorm = isSnorm;
+        
+        if (m_isSnorm)
+        {
+            m_redBase = (int8_t)m_pencodingbitsR11->data.base + 128;
+        }
+        else
+        {
+            m_redBase = (uint8_t)m_pencodingbitsR11->data.base;
+        }
+                        
+        m_redMultiplier = m_pencodingbitsR11->data.multiplier;
+        m_redModifierTableIndex = m_pencodingbitsR11->data.table;
+
+        uint64_t selectorBits = 0;
+        selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors0 << (uint64_t)40;
+        selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors1 << (uint64_t)32;
+        selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors2 << (uint64_t)24;
+        selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors3 << (uint64_t)16;
+        selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors4 << (uint64_t)8;
+        selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors5;
+                        
+        for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+        {
+            uint64_t uiShift = 45 - (3 * uiPixel);
+            m_redSelectors[uiPixel] = (selectorBits >> uiShift) & (uint64_t)(SELECTORS - 1);
+        }
+        
+        // now extract the pixels from the block values above
+        int numChannels = 4;
+        
+        int baseForDecode = m_redBase * 8 + 4;
+        int multiplierForDecode = (m_redMultiplier == 0) ? 1 : (8 * m_redMultiplier);
+        
+        for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+        {
+            int modifier = s_modifierTable8[m_redModifierTableIndex][m_redSelectors[uiPixel]];
+            
+            int decodedPixelData = DecodePixelRedInt(baseForDecode, multiplierForDecode, modifier);
+            
+            decodedPixels[uiPixel * numChannels] = decodedPixelData / 2047.0f;
+        }
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	
+    // 16 pixels x 1 unit squared out of 2047
+    const float kErrorTolerance = 16 * 1 * 1;
+
+    void Block4x4Encoding_R11::PerformIteration(float a_fEffort)
+	{
+        if (m_pencodingbitsR11 == nullptr)
+        {
+            return;
+        }
+        
+        if (m_isDone)
+        {
+            return;
+        }
+		
+		switch (m_uiEncodingIterations)
+		{
+		case 0:
+			CalculateR11(8, 0, 0);
+			break;
+
+		case 1:
+			CalculateR11(8, 2, 1);
+			if (a_fEffort <= 24.5f) // TODO: decouple effort from this, this is more of an iteration quality
+			{
+                m_isDone = true;
+            }
+			break;
+
+		case 2:
+			CalculateR11(8, 12, 1);
+			if (a_fEffort <= 49.5f)
+			{
+                m_isDone = true;
+            }
+			break;
+
+		case 3:
+			CalculateR11(7, 6, 1);
+			break;
+
+		case 4:
+			CalculateR11(6, 3, 1);
+			break;
+
+		case 5:
+			CalculateR11(5, 1, 0);
+			m_isDone = true;
+            break;
+		}
+
+        // advance to next iteration
+        if (!m_isDone)
+        {
+            if (m_fError < kErrorTolerance)
+            {
+                m_isDone = true;
+            }
+            else
+            {
+                m_uiEncodingIterations++;
+            }
+        }
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	
+    // find the best combination of base color, multiplier and selectors
+	void Block4x4Encoding_R11::CalculateR11(unsigned int a_uiSelectorsUsed,
+												int a_fBaseRadius, int a_fMultiplierRadius)
+	{
+		// maps from virtual (monotonic) selector to ETC selector
+		static const uint8_t auiVirtualSelectorMap[8] = {3, 2, 1, 0, 4, 5, 6, 7};
+
+        // don't search any extra radius if range is 0
+        // TODO: there's probably an instant "done" case here without any iteration
+		int fRedRange = (m_redMax - m_redMin);
+        
+        if (fRedRange == 0)
+        {
+            a_fBaseRadius = 0;
+            a_fMultiplierRadius = 0;
+        }
+        
+        // 16 x 8 x 3 x 16 x 16 x 8 iterations = 786K iteraatins / block worst case
+        
+      	// try each modifier table entry
+        // 16 of these
+		for (int uiTableEntry = 0; uiTableEntry < MODIFIER_TABLE_ENTRYS; uiTableEntry++)
+		{
+            // up to 8 of these
+			for (int uiMinVirtualSelector = 0;
+					uiMinVirtualSelector <= (int)(8 - a_uiSelectorsUsed);
+					uiMinVirtualSelector++)
+			{
+				int uiMaxVirtualSelector = uiMinVirtualSelector + a_uiSelectorsUsed - 1;
+
+				int uiMinSelector = auiVirtualSelectorMap[uiMinVirtualSelector];
+				int uiMaxSelector = auiVirtualSelectorMap[uiMaxVirtualSelector];
+
+				int fTableEntryCenter = -s_modifierTable8[uiTableEntry][uiMinSelector];
+
+				int fTableEntryRange = s_modifierTable8[uiTableEntry][uiMaxSelector] -
+                                       s_modifierTable8[uiTableEntry][uiMinSelector];
+
+                float fCenterRatio = fTableEntryCenter / (float)fTableEntryRange;
+                float fCenter = m_redMin + fCenterRatio * fRedRange;
+				int fCenterInt = (int)roundf((255.0f/2047.0f) * fCenter);
+
+                // base of 0 to 255 maps to 0 to 2047
+                // search a radius of values off center of range
+                int fMinBase = fCenterInt - a_fBaseRadius;
+                int fMaxBase = fCenterInt + a_fBaseRadius;
+                if (fMinBase < 0)
+                {
+                    fMinBase = 0;
+                }
+				if (fMaxBase > 255)
+				{
+					fMaxBase = 255;
+				}
+
+                // 255 / up to 29
+                int fRangeMultiplier = (int)roundf((fRedRange * (255.0 / 2047.0f)) / fTableEntryRange);
+
+                int fMinMultiplier = clamp(fRangeMultiplier - a_fMultiplierRadius, 0, 15); // yes, 0
+                int fMaxMultiplier = clamp(fRangeMultiplier + a_fMultiplierRadius, 1, 15);
+                
+                // find best selector for each pixel
+                uint8_t bestSelectors[PIXELS];
+                int bestRedError[PIXELS];
+                
+                // only for debug
+                //int bestPixelRed[PIXELS];
+                
+                // up to 3 of these
+				for (int fBase = fMinBase; fBase <= fMaxBase; fBase++)
+				{
+                    int baseForDecode = fBase * 8 + 4;
+                    
+                    // up to 16 of these
+					for (int fMultiplier = fMinMultiplier; fMultiplier <= fMaxMultiplier; fMultiplier++)
+					{
+                        int multiplierForDecode = (fMultiplier == 0) ? 1 : (8 * fMultiplier);
+                        
+                        // 16 of these
+						for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+						{
+                            int bestPixelError = 2047 * 2047;
+
+                            // 8 of these
+							for (int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
+							{
+                                int modifier = s_modifierTable8[uiTableEntry][uiSelector];
+                                
+								int fPixelRed = DecodePixelRedInt(baseForDecode, multiplierForDecode, modifier);
+
+                                int error = fPixelRed - (int)m_srcPixels[uiPixel];
+                                error *= error;
+                                
+                                // this is guaranteed to pick one selector for every pixel
+                                // the one with the lowest error.
+								if (error < bestPixelError)
+								{
+                                    bestPixelError = error;
+                                    bestRedError[uiPixel] = error;
+                                    bestSelectors[uiPixel] = uiSelector;
+									
+									//bestPixelRed[uiPixel] = fPixelRed;
+								}
+							}
+						}
+                        
+                        // accumulate all best pixel error into block error total
+						int blockError = 0;
+						for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+						{
+                            blockError += bestRedError[uiPixel];
+						}
+                        
+                        // pick that if it's the smallest error
+                        if (m_fError > (float)blockError)
+						{
+                            m_fError = (float)blockError;
+                           
+							if (m_isSnorm)
+							{
+								m_redBase = fBase - 128;
+							}
+							else
+							{
+                                m_redBase = fBase;
+							}
+							m_redMultiplier = fMultiplier;
+							m_redModifierTableIndex = uiTableEntry;
+
+                            for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+							{
+								m_redSelectors[uiPixel] = bestSelectors[uiPixel];
+                               
+                                // nothing looks at this data, but useful to compare to source
+                                //m_decodedPixels[uiPixel] = bestPixelRed[uiPixel]; //  / 2047.0f;
+							}
+                            
+                            
+                            // compare to tolerance, since reaching 0 is difficult in float
+                            if (m_fError <= kErrorTolerance)
+                            {
+                                return;
+                            }
+						}
+					}
+				}
+
+			}
+		}
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the encoding bits based on encoding state
+	//
+	void Block4x4Encoding_R11::SetEncodingBits(void)
+	{
+        // skip encode if block is already done
+        if (m_pencodingbitsR11 == nullptr)
+        {
+            return;
+        }
+        
+        if (m_isSnorm)
+		{
+			m_pencodingbitsR11->data.base = (int8_t)m_redBase;
+		}
+		else
+		{
+            m_pencodingbitsR11->data.base = (uint8_t)m_redBase;
+		}
+		m_pencodingbitsR11->data.table = m_redModifierTableIndex;
+		m_pencodingbitsR11->data.multiplier = m_redMultiplier;
+
+		uint64_t selectorBits = 0;
+		for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			uint64_t uiShift = 45 - (3 * uiPixel);
+			selectorBits |= ((uint64_t)m_redSelectors[uiPixel]) << uiShift;
+		}
+
+		m_pencodingbitsR11->data.selectors0 = uint32_t(selectorBits >> (uint64_t)40);
+		m_pencodingbitsR11->data.selectors1 = uint32_t(selectorBits >> (uint64_t)32);
+		m_pencodingbitsR11->data.selectors2 = uint32_t(selectorBits >> (uint64_t)24);
+		m_pencodingbitsR11->data.selectors3 = uint32_t(selectorBits >> (uint64_t)16);
+		m_pencodingbitsR11->data.selectors4 = uint32_t(selectorBits >> (uint64_t)8);
+		m_pencodingbitsR11->data.selectors5 = uint32_t(selectorBits);
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+}
diff --git a/libkram/etc2comp/EtcBlock4x4Encoding_R11.h b/libkram/etc2comp/EtcBlock4x4Encoding_R11.h
index 5c175d9e..31c1a21c 100644
--- a/libkram/etc2comp/EtcBlock4x4Encoding_R11.h
+++ b/libkram/etc2comp/EtcBlock4x4Encoding_R11.h
@@ -1,136 +1,136 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-//#include "EtcBlock4x4Encoding_RGB8.h"
-
-namespace Etc
-{
-	class Block4x4EncodingBits_R11;
-
-	// ################################################################################
-	// Block4x4Encoding_R11
-	// ################################################################################
-
-    // Simpler interface for R11 and RG11 without all the code/data from Block4x4.
-    class IBlockEncoding
-    {
-    public:
-        virtual ~IBlockEncoding() {}
-        
-        // setup block for encoding iteration, isDone() true when finished
-        virtual void Encode(
-            const float *sourcePixels,uint8_t *encodingBits, bool isSnorm) = 0;
-        
-        // this is for decoding a block in multipass
-        virtual void Decode(
-            uint8_t *encodingBits, const float *sourcePixels, bool isSnorm,
-            uint16_t lastIterationCount
-        ) = 0;
-        
-        // for decoding a block for display or conversion
-        virtual void DecodeOnly(
-            const uint8_t *encodingBits, float *decodedPixels, bool isSnorm) = 0;
-        
-        // iterate to reduce the error
-        virtual void PerformIteration(float a_fEffort) = 0;
-
-        // write out block
-        virtual void SetEncodingBits(void) = 0;
-
-        // when error is zero, or effort level also limits iteration
-        virtual bool IsDone() const = 0;
-        
-        virtual uint16_t GetIterationCount() const = 0;
-        
-        virtual float GetError() const = 0;
-    };
-
-	class Block4x4Encoding_R11 : public IBlockEncoding
-	{
-	public:
-
-		Block4x4Encoding_R11(void);
-		virtual ~Block4x4Encoding_R11(void);
-
-        // setup block for encoding iteration, isDone() true when finished
-		virtual void Encode(
-            const float *sourcePixels, uint8_t *encodingBits, bool isSnorm) override;
-
-        // this is for decoding a block in multipass
-		virtual void Decode(
-			uint8_t *encodingBits, const float *sourcePixels, bool isSnorm,
-            uint16_t lastIterationCount) override;
-
-        // for decoding a block for display or conversion
-        virtual void DecodeOnly(
-            const uint8_t *encodingBits,
-            float *decodedPixels,
-            bool isSnorm) override;
-        
-		virtual void PerformIteration(float a_fEffort) override;
-
-		virtual void SetEncodingBits(void) override;
-
-        virtual bool IsDone() const override { return m_isDone; }
-        
-        // done bit embedded into high bit of each 8-bit count
-        virtual uint16_t GetIterationCount() const override
-        {
-            uint16_t count = m_uiEncodingIterations;
-            if (m_isDone)
-            {
-                count |= 0x80; // done high bit
-            }
-            return count;
-        }
-        
-        virtual float GetError() const override { return m_fError; }
-        
-	private:
-		void CalculateR11(unsigned int a_uiSelectorsUsed, 
-							int a_fBaseRadius, int a_fMultiplierRadius);
-
-        Block4x4EncodingBits_R11 *m_pencodingbitsR11;
-
-        //float m_fRedBlockError;
-        
-        static const int PIXELS = 16; // 4 * 4
-        
-        // adding data for block reuse (only set on first iteration)
-        int16_t m_srcPixels[PIXELS];
-        int16_t m_redMin;
-        int16_t m_redMax;
-        
-        // this can all be encoded/decoded from the EAC block
-        int16_t m_redBase;
-        int16_t m_redMultiplier;
-		uint8_t m_redSelectors[PIXELS];
-        uint8_t m_redModifierTableIndex;
-        
-        bool m_isDone;
-        bool m_isSnorm; // shifts fBase by 128
-        
-        // this is only data needed to reiterate, can decode and build up rest
-        uint8_t m_uiEncodingIterations;
-        float m_fError; // 22-bits + 4-bits = 26 bits        
-	};
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-
-} // namespace Etc
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+//#include "EtcBlock4x4Encoding_RGB8.h"
+
+namespace Etc
+{
+	class Block4x4EncodingBits_R11;
+
+	// ################################################################################
+	// Block4x4Encoding_R11
+	// ################################################################################
+
+    // Simpler interface for R11 and RG11 without all the code/data from Block4x4.
+    class IBlockEncoding
+    {
+    public:
+        virtual ~IBlockEncoding() {}
+        
+        // setup block for encoding iteration, isDone() true when finished
+        virtual void Encode(
+            const float *sourcePixels,uint8_t *encodingBits, bool isSnorm) = 0;
+        
+        // this is for decoding a block in multipass
+        virtual void Decode(
+            uint8_t *encodingBits, const float *sourcePixels, bool isSnorm,
+            uint16_t lastIterationCount
+        ) = 0;
+        
+        // for decoding a block for display or conversion
+        virtual void DecodeOnly(
+            const uint8_t *encodingBits, float *decodedPixels, bool isSnorm) = 0;
+        
+        // iterate to reduce the error
+        virtual void PerformIteration(float a_fEffort) = 0;
+
+        // write out block
+        virtual void SetEncodingBits(void) = 0;
+
+        // when error is zero, or effort level also limits iteration
+        virtual bool IsDone() const = 0;
+        
+        virtual uint16_t GetIterationCount() const = 0;
+        
+        virtual float GetError() const = 0;
+    };
+
+	class Block4x4Encoding_R11 : public IBlockEncoding
+	{
+	public:
+
+		Block4x4Encoding_R11(void);
+		virtual ~Block4x4Encoding_R11(void);
+
+        // setup block for encoding iteration, isDone() true when finished
+		virtual void Encode(
+            const float *sourcePixels, uint8_t *encodingBits, bool isSnorm) override;
+
+        // this is for decoding a block in multipass
+		virtual void Decode(
+			uint8_t *encodingBits, const float *sourcePixels, bool isSnorm,
+            uint16_t lastIterationCount) override;
+
+        // for decoding a block for display or conversion
+        virtual void DecodeOnly(
+            const uint8_t *encodingBits,
+            float *decodedPixels,
+            bool isSnorm) override;
+        
+		virtual void PerformIteration(float a_fEffort) override;
+
+		virtual void SetEncodingBits(void) override;
+
+        virtual bool IsDone() const override { return m_isDone; }
+        
+        // done bit embedded into high bit of each 8-bit count
+        virtual uint16_t GetIterationCount() const override
+        {
+            uint16_t count = m_uiEncodingIterations;
+            if (m_isDone)
+            {
+                count |= 0x80; // done high bit
+            }
+            return count;
+        }
+        
+        virtual float GetError() const override { return m_fError; }
+        
+	private:
+		void CalculateR11(unsigned int a_uiSelectorsUsed, 
+							int a_fBaseRadius, int a_fMultiplierRadius);
+
+        Block4x4EncodingBits_R11 *m_pencodingbitsR11;
+
+        //float m_fRedBlockError;
+        
+        static const int PIXELS = 16; // 4 * 4
+        
+        // adding data for block reuse (only set on first iteration)
+        int16_t m_srcPixels[PIXELS];
+        int16_t m_redMin;
+        int16_t m_redMax;
+        
+        // this can all be encoded/decoded from the EAC block
+        int16_t m_redBase;
+        int16_t m_redMultiplier;
+		uint8_t m_redSelectors[PIXELS];
+        uint8_t m_redModifierTableIndex;
+        
+        bool m_isDone;
+        bool m_isSnorm; // shifts fBase by 128
+        
+        // this is only data needed to reiterate, can decode and build up rest
+        uint8_t m_uiEncodingIterations;
+        float m_fError; // 22-bits + 4-bits = 26 bits        
+	};
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+
+} // namespace Etc
diff --git a/libkram/etc2comp/EtcBlock4x4Encoding_RG11.cpp b/libkram/etc2comp/EtcBlock4x4Encoding_RG11.cpp
index 960f083a..ee2e8569 100644
--- a/libkram/etc2comp/EtcBlock4x4Encoding_RG11.cpp
+++ b/libkram/etc2comp/EtcBlock4x4Encoding_RG11.cpp
@@ -1,68 +1,68 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
-EtcBlock4x4Encoding_RG11.cpp
-
-Block4x4Encoding_RG11 is the encoder to use when targetting file format RG11 and SRG11 (signed RG11).
-
-*/
-
-#include "EtcConfig.h"
-#include "EtcBlock4x4Encoding_RG11.h"
-
-namespace Etc
-{
-	Block4x4Encoding_RG11::Block4x4Encoding_RG11(void)
-	{
-	}
-
-	Block4x4Encoding_RG11::~Block4x4Encoding_RG11(void) {}
-
-	void Block4x4Encoding_RG11::Encode(
-		const float *sourcePixels, uint8_t *encodingBits, bool isSnorm)
-	{
-        m_red.Encode(sourcePixels + 0, encodingBits, isSnorm);
-        m_green.Encode(sourcePixels + 1, encodingBits + 8, isSnorm);
-	}
-
-	void Block4x4Encoding_RG11::Decode(
-		unsigned char *encodingBits, const float *sourcePixels, bool isSnorm,
-        uint16_t lastIteration)
-    {
-        m_red.Decode(encodingBits, sourcePixels, isSnorm, (lastIteration >> 0) & 0xFF);
-        m_green.Decode(encodingBits + 8, sourcePixels + 1, isSnorm, (lastIteration >> 8) & 0xFF);
- 	}
-
-    void Block4x4Encoding_RG11::DecodeOnly(
-        const uint8_t *encodingBits, float *decodedPixels, bool isSnorm)
-    {
-        m_red.DecodeOnly(encodingBits, decodedPixels, isSnorm);
-        m_green.DecodeOnly(encodingBits + 8, decodedPixels + 1, isSnorm);
-    }
-
-	void Block4x4Encoding_RG11::PerformIteration(float a_fEffort)
-	{
-        m_red.PerformIteration(a_fEffort);
-        m_green.PerformIteration(a_fEffort);
-	}
-
-	void Block4x4Encoding_RG11::SetEncodingBits(void)
-	{
-        m_red.SetEncodingBits();
-        m_green.SetEncodingBits();
-	}
-}
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+EtcBlock4x4Encoding_RG11.cpp
+
+Block4x4Encoding_RG11 is the encoder to use when targetting file format RG11 and SRG11 (signed RG11).
+
+*/
+
+#include "EtcConfig.h"
+#include "EtcBlock4x4Encoding_RG11.h"
+
+namespace Etc
+{
+	Block4x4Encoding_RG11::Block4x4Encoding_RG11(void)
+	{
+	}
+
+	Block4x4Encoding_RG11::~Block4x4Encoding_RG11(void) {}
+
+	void Block4x4Encoding_RG11::Encode(
+		const float *sourcePixels, uint8_t *encodingBits, bool isSnorm)
+	{
+        m_red.Encode(sourcePixels + 0, encodingBits, isSnorm);
+        m_green.Encode(sourcePixels + 1, encodingBits + 8, isSnorm);
+	}
+
+	void Block4x4Encoding_RG11::Decode(
+		unsigned char *encodingBits, const float *sourcePixels, bool isSnorm,
+        uint16_t lastIteration)
+    {
+        m_red.Decode(encodingBits, sourcePixels, isSnorm, (lastIteration >> 0) & 0xFF);
+        m_green.Decode(encodingBits + 8, sourcePixels + 1, isSnorm, (lastIteration >> 8) & 0xFF);
+ 	}
+
+    void Block4x4Encoding_RG11::DecodeOnly(
+        const uint8_t *encodingBits, float *decodedPixels, bool isSnorm)
+    {
+        m_red.DecodeOnly(encodingBits, decodedPixels, isSnorm);
+        m_green.DecodeOnly(encodingBits + 8, decodedPixels + 1, isSnorm);
+    }
+
+	void Block4x4Encoding_RG11::PerformIteration(float a_fEffort)
+	{
+        m_red.PerformIteration(a_fEffort);
+        m_green.PerformIteration(a_fEffort);
+	}
+
+	void Block4x4Encoding_RG11::SetEncodingBits(void)
+	{
+        m_red.SetEncodingBits();
+        m_green.SetEncodingBits();
+	}
+}
diff --git a/libkram/etc2comp/EtcBlock4x4Encoding_RG11.h b/libkram/etc2comp/EtcBlock4x4Encoding_RG11.h
index 71ed4b9a..eca31b0d 100644
--- a/libkram/etc2comp/EtcBlock4x4Encoding_RG11.h
+++ b/libkram/etc2comp/EtcBlock4x4Encoding_RG11.h
@@ -1,66 +1,66 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-//#include "EtcBlock4x4Encoding_RGB8.h"
-#include "EtcBlock4x4Encoding_R11.h"
-
-namespace Etc
-{
-	class Block4x4EncodingBits_RG11;
-
-	// ################################################################################
-	// Block4x4Encoding_RG11
-	// ################################################################################
-
-	class Block4x4Encoding_RG11 : public IBlockEncoding
-	{
-	public:
-
-		Block4x4Encoding_RG11(void);
-		virtual ~Block4x4Encoding_RG11(void);
-
-		virtual void Encode(
-			const float *sourcePixels, uint8_t *encodingBits, bool isSnorm) override;
-
-		virtual void Decode(
-			uint8_t *encodingBits, const float *sourcePixels, bool isSnorm, uint16_t lastIteration) override;
-
-        virtual void DecodeOnly(
-             const uint8_t *encodingBits, float *decodedPixels, bool isSnorm) override;
-        
-		virtual void PerformIteration(float a_fEffort) override;
-
-		virtual void SetEncodingBits() override;
-
-        virtual bool IsDone() const override { return m_red.IsDone() && m_green.IsDone(); }
-        
-        // done bit embedded into high bit of each 8-bit count
-        // because r and g can be done independently, and with multipass need to skip iteration, though decode/re-encode will occur
-        virtual uint16_t GetIterationCount() const override { return m_red.GetIterationCount() + (m_green.GetIterationCount() << 8); }
-        
-        virtual float GetError() const override { return m_red.GetError() + m_green.GetError(); }
-        
-    private:
-        Block4x4Encoding_R11 m_red;
-        Block4x4Encoding_R11 m_green;
-	};
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-
-} // namespace Etc
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+//#include "EtcBlock4x4Encoding_RGB8.h"
+#include "EtcBlock4x4Encoding_R11.h"
+
+namespace Etc
+{
+	class Block4x4EncodingBits_RG11;
+
+	// ################################################################################
+	// Block4x4Encoding_RG11
+	// ################################################################################
+
+	class Block4x4Encoding_RG11 : public IBlockEncoding
+	{
+	public:
+
+		Block4x4Encoding_RG11(void);
+		virtual ~Block4x4Encoding_RG11(void);
+
+		virtual void Encode(
+			const float *sourcePixels, uint8_t *encodingBits, bool isSnorm) override;
+
+		virtual void Decode(
+			uint8_t *encodingBits, const float *sourcePixels, bool isSnorm, uint16_t lastIteration) override;
+
+        virtual void DecodeOnly(
+             const uint8_t *encodingBits, float *decodedPixels, bool isSnorm) override;
+        
+		virtual void PerformIteration(float a_fEffort) override;
+
+		virtual void SetEncodingBits() override;
+
+        virtual bool IsDone() const override { return m_red.IsDone() && m_green.IsDone(); }
+        
+        // done bit embedded into high bit of each 8-bit count
+        // because r and g can be done independently, and with multipass need to skip iteration, though decode/re-encode will occur
+        virtual uint16_t GetIterationCount() const override { return m_red.GetIterationCount() + (m_green.GetIterationCount() << 8); }
+        
+        virtual float GetError() const override { return m_red.GetError() + m_green.GetError(); }
+        
+    private:
+        Block4x4Encoding_R11 m_red;
+        Block4x4Encoding_R11 m_green;
+	};
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+
+} // namespace Etc
diff --git a/libkram/etc2comp/EtcBlock4x4Encoding_RGB8.cpp b/libkram/etc2comp/EtcBlock4x4Encoding_RGB8.cpp
index fe593a26..3d6786cf 100644
--- a/libkram/etc2comp/EtcBlock4x4Encoding_RGB8.cpp
+++ b/libkram/etc2comp/EtcBlock4x4Encoding_RGB8.cpp
@@ -1,1801 +1,1801 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
-EtcBlock4x4Encoding_RGB8.cpp
-
-Block4x4Encoding_RGB8 is the encoder to use for the ETC2 extensions when targetting file format RGB8.  
-This encoder is also used for the ETC2 subset of file format RGBA8.
-
-Block4x4Encoding_ETC1 encodes the ETC1 subset of RGB8.
-
-*/
-
-// TODO: add isGray opimizations where rgb are iterated once for a single radius
-// instead of as individual channels.
-
-#include "EtcConfig.h"
-#include "EtcBlock4x4Encoding_RGB8.h"
-
-#include "EtcBlock4x4EncodingBits.h"
-#include "EtcBlock4x4.h"
-#include "EtcMath.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-#include <float.h>
-#include <limits>
-
-namespace Etc
-{
-	float Block4x4Encoding_RGB8::s_afTHDistanceTable[TH_DISTANCES] =
-	{
-		3.0f / 255.0f,
-		6.0f / 255.0f,
-		11.0f / 255.0f,
-		16.0f / 255.0f,
-		23.0f / 255.0f,
-		32.0f / 255.0f,
-		41.0f / 255.0f,
-		64.0f / 255.0f
-	};
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-	Block4x4Encoding_RGB8::Block4x4Encoding_RGB8(void)
-	{
-
-		m_pencodingbitsRGB8 = nullptr;
-
-	}
-
-	Block4x4Encoding_RGB8::~Block4x4Encoding_RGB8(void) {}
-	// ----------------------------------------------------------------------------------------------------
-	// initialization from the encoding bits of a previous encoding
-	// a_pblockParent points to the block associated with this encoding
-	// a_errormetric is used to choose the best encoding
-	// a_pafrgbaSource points to a 4x4 block subset of the source image
-	// a_paucEncodingBits points to the final encoding bits of a previous encoding
-	//
-	void Block4x4Encoding_RGB8::Decode(Block4x4 *a_pblockParent,
-														unsigned char *a_paucEncodingBits,
-														const ColorFloatRGBA *a_pafrgbaSource,
-														ErrorMetric a_errormetric,
-                                                        uint16_t iterationCount )
-	{
-		
-		// handle ETC1 modes
-		Block4x4Encoding_ETC1::Decode(a_pblockParent,
-													a_paucEncodingBits, a_pafrgbaSource,a_errormetric, iterationCount);
-
-		m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)a_paucEncodingBits;
-
-		// detect if there is a T, H or Planar mode present
-		if (m_pencodingbitsRGB8->differential.diff)
-		{
-			int iRed1 = (int)m_pencodingbitsRGB8->differential.red1;
-			int iDRed2 = m_pencodingbitsRGB8->differential.dred2;
-			int iRed2 = iRed1 + iDRed2;
-
-			int iGreen1 = (int)m_pencodingbitsRGB8->differential.green1;
-			int iDGreen2 = m_pencodingbitsRGB8->differential.dgreen2;
-			int iGreen2 = iGreen1 + iDGreen2;
-
-			int iBlue1 = (int)m_pencodingbitsRGB8->differential.blue1;
-			int iDBlue2 = m_pencodingbitsRGB8->differential.dblue2;
-			int iBlue2 = iBlue1 + iDBlue2;
-
-			if (iRed2 < 0 || iRed2 > 31)
-			{
-				InitFromEncodingBits_T();
-			}
-			else if (iGreen2 < 0 || iGreen2 > 31)
-			{
-				InitFromEncodingBits_H();
-			}
-			else if (iBlue2 < 0 || iBlue2 > 31)
-			{
-				InitFromEncodingBits_Planar();
-			}
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// initialization from the encoding bits of a previous encoding if T mode is detected
-	//
-	void Block4x4Encoding_RGB8::InitFromEncodingBits_T(void)
-	{
-
-		m_mode = MODE_T;
-
-		unsigned char ucRed1 = (unsigned char)((m_pencodingbitsRGB8->t.red1a << 2) +
-								m_pencodingbitsRGB8->t.red1b);
-		unsigned char ucGreen1 = m_pencodingbitsRGB8->t.green1;
-		unsigned char ucBlue1 = m_pencodingbitsRGB8->t.blue1;
-
-		unsigned char ucRed2 = m_pencodingbitsRGB8->t.red2;
-		unsigned char ucGreen2 = m_pencodingbitsRGB8->t.green2;
-		unsigned char ucBlue2 = m_pencodingbitsRGB8->t.blue2;
-
-		m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(ucRed1, ucGreen1, ucBlue1);
-		m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(ucRed2, ucGreen2, ucBlue2);
-
-		m_uiCW1 = (m_pencodingbitsRGB8->t.da << 1) + m_pencodingbitsRGB8->t.db;
-
-		Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors();
-
-		DecodePixels_T();
-
-		CalcBlockError();
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// initialization from the encoding bits of a previous encoding if H mode is detected
-	//
-	void Block4x4Encoding_RGB8::InitFromEncodingBits_H(void)
-	{
-
-		m_mode = MODE_H;
-		
-		unsigned char ucRed1 = m_pencodingbitsRGB8->h.red1;
-		unsigned char ucGreen1 = (unsigned char)((m_pencodingbitsRGB8->h.green1a << 1) +
-									m_pencodingbitsRGB8->h.green1b);
-		unsigned char ucBlue1 = (unsigned char)((m_pencodingbitsRGB8->h.blue1a << 3) +
-								(m_pencodingbitsRGB8->h.blue1b << 1) + 
-								m_pencodingbitsRGB8->h.blue1c);
-
-		unsigned char ucRed2 = m_pencodingbitsRGB8->h.red2;
-		unsigned char ucGreen2 = (unsigned char)((m_pencodingbitsRGB8->h.green2a << 1) +
-									m_pencodingbitsRGB8->h.green2b);
-		unsigned char ucBlue2 = m_pencodingbitsRGB8->h.blue2;
-
-		m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(ucRed1, ucGreen1, ucBlue1);
-		m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(ucRed2, ucGreen2, ucBlue2);
-
-		// used to determine the LSB of the CW
-		unsigned int uiRGB1 = (unsigned int)(((int)ucRed1 << 16) + ((int)ucGreen1 << 8) + (int)ucBlue1);
-		unsigned int uiRGB2 = (unsigned int)(((int)ucRed2 << 16) + ((int)ucGreen2 << 8) + (int)ucBlue2);
-
-		m_uiCW1 = (m_pencodingbitsRGB8->h.da << 2) + (m_pencodingbitsRGB8->h.db << 1);
-		if (uiRGB1 >= uiRGB2)
-		{
-			m_uiCW1++;
-		}
-
-		Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors();
-
-		DecodePixels_H();
-
-		CalcBlockError();
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// initialization from the encoding bits of a previous encoding if Planar mode is detected
-	//
-	void Block4x4Encoding_RGB8::InitFromEncodingBits_Planar(void)
-	{
-
-		m_mode = MODE_PLANAR;
-
-		unsigned char ucOriginRed = m_pencodingbitsRGB8->planar.originRed;
-		unsigned char ucOriginGreen = (unsigned char)((m_pencodingbitsRGB8->planar.originGreen1 << 6) +
-										m_pencodingbitsRGB8->planar.originGreen2);
-		unsigned char ucOriginBlue = (unsigned char)((m_pencodingbitsRGB8->planar.originBlue1 << 5) +
-										(m_pencodingbitsRGB8->planar.originBlue2 << 3) +
-										(m_pencodingbitsRGB8->planar.originBlue3 << 1) +
-										m_pencodingbitsRGB8->planar.originBlue4);
-
-		unsigned char ucHorizRed = (unsigned char)((m_pencodingbitsRGB8->planar.horizRed1 << 1) +
-									m_pencodingbitsRGB8->planar.horizRed2);
-		unsigned char ucHorizGreen = m_pencodingbitsRGB8->planar.horizGreen;
-		unsigned char ucHorizBlue = (unsigned char)((m_pencodingbitsRGB8->planar.horizBlue1 << 5) +
-									m_pencodingbitsRGB8->planar.horizBlue2);
-
-		unsigned char ucVertRed = (unsigned char)((m_pencodingbitsRGB8->planar.vertRed1 << 3) +
-									m_pencodingbitsRGB8->planar.vertRed2);
-		unsigned char ucVertGreen = (unsigned char)((m_pencodingbitsRGB8->planar.vertGreen1 << 2) +
-									m_pencodingbitsRGB8->planar.vertGreen2);
-		unsigned char ucVertBlue = m_pencodingbitsRGB8->planar.vertBlue;
-
-		m_frgbaColor1 = ColorFloatRGBA::ConvertFromR6G7B6(ucOriginRed, ucOriginGreen, ucOriginBlue);
-		m_frgbaColor2 = ColorFloatRGBA::ConvertFromR6G7B6(ucHorizRed, ucHorizGreen, ucHorizBlue);
-		m_frgbaColor3 = ColorFloatRGBA::ConvertFromR6G7B6(ucVertRed, ucVertGreen, ucVertBlue);
-
-		DecodePixels_Planar();
-
-		CalcBlockError();
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// perform a single encoding iteration
-	// replace the encoding if a better encoding was found
-	// subsequent iterations generally take longer for each iteration
-	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
-	//
-	void Block4x4Encoding_RGB8::PerformIteration(float a_fEffort)
-	{
-		assert(!m_boolDone);
-
-		switch (m_uiEncodingIterations)
-		{
-		case 0:
-			Block4x4Encoding_ETC1::PerformFirstIteration();
-			if (m_boolDone)
-			{
-				break;
-			}
-                
-			TryPlanar(0);
-			SetDoneIfPerfect();
-			if (m_boolDone)
-			{
-				break;
-			}
-                
-			TryTAndH(0);
-            break;
-
-		case 1:
-			Block4x4Encoding_ETC1::TryDifferential(m_boolMostLikelyFlip, 1, 0, 0);
-			break;
-
-		case 2:
-			Block4x4Encoding_ETC1::TryIndividual(m_boolMostLikelyFlip, 1);
-			break;
-
-		case 3:
-			Block4x4Encoding_ETC1::TryDifferential(!m_boolMostLikelyFlip, 1, 0, 0);
-			break;
-
-		case 4:
-			Block4x4Encoding_ETC1::TryIndividual(!m_boolMostLikelyFlip, 1);
-			break;
-
-		case 5:
-			TryPlanar(1);
-			if (a_fEffort <= 49.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 6:
-			TryTAndH(1);
-			if (a_fEffort <= 59.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 7:
-			Block4x4Encoding_ETC1::TryDegenerates1();
-			if (a_fEffort <= 69.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 8:
-			Block4x4Encoding_ETC1::TryDegenerates2();
-			if (a_fEffort <= 79.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 9:
-			Block4x4Encoding_ETC1::TryDegenerates3();
-			if (a_fEffort <= 89.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 10:
-			Block4x4Encoding_ETC1::TryDegenerates4();
-			m_boolDone = true;
-			break;
-
-		default:
-			assert(0);
-			break;
-		}
-
-		m_uiEncodingIterations++;
-
-		SetDoneIfPerfect();
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try encoding in Planar mode
-	// save this encoding if it improves the error
-	//
-	void Block4x4Encoding_RGB8::TryPlanar(unsigned int a_uiRadius)
-	{
-		Block4x4Encoding_RGB8 encodingTry = *this;
-
-		// init "try"
-		{
-			encodingTry.m_mode = MODE_PLANAR;
-			encodingTry.m_boolDiff = true;
-			encodingTry.m_boolFlip = false;
-		}
-
-		encodingTry.CalculatePlanarCornerColors();
-
-		encodingTry.DecodePixels_Planar();
-
-		encodingTry.CalcBlockError();
-
-		if (a_uiRadius > 0)
-		{
-			encodingTry.TwiddlePlanar();
-		}
-
-		if (encodingTry.m_fError < m_fError)
-		{
-			m_mode = MODE_PLANAR;
-			m_boolDiff = true;
-			m_boolFlip = false;
-			m_frgbaColor1 = encodingTry.m_frgbaColor1;
-			m_frgbaColor2 = encodingTry.m_frgbaColor2;
-			m_frgbaColor3 = encodingTry.m_frgbaColor3;
-
-			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-			{
-				m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
-			}
-
-			m_fError = encodingTry.m_fError;
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try encoding in T mode or H mode
-	// save this encoding if it improves the error
-	//
-	void Block4x4Encoding_RGB8::TryTAndH(unsigned int a_uiRadius)
-	{
-
-		CalculateBaseColorsForTAndH();
-
-		TryT(a_uiRadius);
-
-        if (!IsDone())
-        {
-            TryH(a_uiRadius);
-        }
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// calculate original values for base colors
-	// store them in m_frgbaOriginalColor1 and m_frgbaOriginalColor2
-	//
-	void Block4x4Encoding_RGB8::CalculateBaseColorsForTAndH(void)
-	{
-
-		//bool boolRGBX = m_pblockParent->GetImageSource()->GetErrorMetric() == ErrorMetric::RGBX;
-
-		ColorFloatRGBA frgbaBlockAverage = (m_frgbaSourceAverageLeft + m_frgbaSourceAverageRight) * 0.5f;
-
-		// find pixel farthest from average gray line
-		unsigned int uiFarthestPixel = 0;
-		float fFarthestGrayDistance2 = 0.0f;
-		//unsigned int uiTransparentPixels = 0;
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			// don't count transparent
-//			if (m_pafrgbaSource[uiPixel].fA == 0.0f && !boolRGBX)
-//			{
-//				uiTransparentPixels++;
-//			}
-//			else
-			{
-				float fGrayDistance2 = CalcGrayDistance2(m_pafrgbaSource[uiPixel], frgbaBlockAverage);
-
-				if (fGrayDistance2 > fFarthestGrayDistance2)
-				{
-					uiFarthestPixel = uiPixel;
-					fFarthestGrayDistance2 = fGrayDistance2;
-				}
-			}
-		}
-		// a transparent block should not reach this method
-		//assert(uiTransparentPixels < PIXELS);
-
-		// set the original base colors to:
-		//		half way to the farthest pixel and
-		//		the mirror color on the other side of the average
-		ColorFloatRGBA frgbaOffset = (m_pafrgbaSource[uiFarthestPixel] - frgbaBlockAverage) * 0.5f;
-		m_frgbaOriginalColor1_TAndH = (frgbaBlockAverage + frgbaOffset).QuantizeR4G4B4();
-		m_frgbaOriginalColor2_TAndH = (frgbaBlockAverage - frgbaOffset).ClampRGB().QuantizeR4G4B4();	// the "other side" might be out of range
-
-		// move base colors to find best fit
-		for (unsigned int uiIteration = 0; uiIteration < 10; uiIteration++)
-		{
-			// find the center of pixels closest to each color
-			float fPixelsCloserToColor1 = 0.0f;
-			ColorFloatRGBA frgbSumPixelsCloserToColor1;
-			float fPixelsCloserToColor2 = 0.0f;
-			ColorFloatRGBA frgbSumPixelsCloserToColor2;
-			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-			{
-				// don't count transparent pixels
-                // Can't do this, use premul to weight the colors before they are encoded
-                float alpha = 1.0f; // m_pafrgbaSource[uiPixel].fA;
-//				if (alpha == 0.0f)
-//				{
-//					continue;
-//				}
-
-				float fGrayDistance2ToColor1 = CalcGrayDistance2(m_pafrgbaSource[uiPixel], m_frgbaOriginalColor1_TAndH);
-				float fGrayDistance2ToColor2 = CalcGrayDistance2(m_pafrgbaSource[uiPixel], m_frgbaOriginalColor2_TAndH);
-
-				ColorFloatRGBA frgbaAlphaWeightedSource = m_pafrgbaSource[uiPixel] * alpha;
-                frgbaAlphaWeightedSource.fA = 1.0f;
-                
-				if (fGrayDistance2ToColor1 <= fGrayDistance2ToColor2)
-				{
-					fPixelsCloserToColor1 += alpha;
-					frgbSumPixelsCloserToColor1 = frgbSumPixelsCloserToColor1 + frgbaAlphaWeightedSource;
-				}
-				else
-				{
-					fPixelsCloserToColor2 += alpha;
-					frgbSumPixelsCloserToColor2 = frgbSumPixelsCloserToColor2 + frgbaAlphaWeightedSource;
-				}
-			}
-			if (fPixelsCloserToColor1 == 0.0f || fPixelsCloserToColor2 == 0.0f)
-			{
-				break;
-			}
-
-            // this doesn't scale alpha
-			ColorFloatRGBA frgbAvgColor1Pixels = (frgbSumPixelsCloserToColor1 * (1.0f / fPixelsCloserToColor1)).QuantizeR4G4B4();
-			ColorFloatRGBA frgbAvgColor2Pixels = (frgbSumPixelsCloserToColor2 * (1.0f / fPixelsCloserToColor2)).QuantizeR4G4B4();
-
-            frgbAvgColor1Pixels.fA = 1.0f;
-            frgbAvgColor2Pixels.fA = 1.0f;
-            
-			if (frgbAvgColor1Pixels.fR == m_frgbaOriginalColor1_TAndH.fR &&
-				frgbAvgColor1Pixels.fG == m_frgbaOriginalColor1_TAndH.fG &&
-				frgbAvgColor1Pixels.fB == m_frgbaOriginalColor1_TAndH.fB &&
-                
-				frgbAvgColor2Pixels.fR == m_frgbaOriginalColor2_TAndH.fR &&
-				frgbAvgColor2Pixels.fG == m_frgbaOriginalColor2_TAndH.fG &&
-				frgbAvgColor2Pixels.fB == m_frgbaOriginalColor2_TAndH.fB)
-			{
-				break;
-			}
-
-			m_frgbaOriginalColor1_TAndH = frgbAvgColor1Pixels;
-			m_frgbaOriginalColor2_TAndH = frgbAvgColor2Pixels;
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try encoding in T mode
-	// save this encoding if it improves the error
-	//
-	// since pixels that use base color1 don't use the distance table, color1 and color2 can be twiddled independently
-	// better encoding can be found if TWIDDLE_RADIUS is set to 2, but it will be much slower
-	//
-	void Block4x4Encoding_RGB8::TryT(unsigned int a_uiRadius)
-	{
-		Block4x4Encoding_RGB8 encodingTry = *this;
-
-		// init "try"
-		{
-			encodingTry.m_mode = MODE_T;
-			encodingTry.m_boolDiff = true;
-			encodingTry.m_boolFlip = false;
-			encodingTry.m_fError = FLT_MAX;
-		}
-
-		int iColor1Red = m_frgbaOriginalColor1_TAndH.IntRed(15.0f);
-		int iColor1Green = m_frgbaOriginalColor1_TAndH.IntGreen(15.0f);
-		int iColor1Blue = m_frgbaOriginalColor1_TAndH.IntBlue(15.0f);
-
-		int iMinRed1 = iColor1Red - (int)a_uiRadius;
-        int iMinGreen1 = iColor1Green - (int)a_uiRadius;
-        int iMinBlue1 = iColor1Blue - (int)a_uiRadius;
-        
-        int iMaxRed1 = iColor1Red + (int)a_uiRadius;
-        int iMaxGreen1 = iColor1Green + (int)a_uiRadius;
-        int iMaxBlue1 = iColor1Blue + (int)a_uiRadius;
-        
-        if (iMinRed1 < 0)
-		{
-			iMinRed1 = 0;
-		}
-        if (iMinGreen1 < 0)
-		{
-			iMinGreen1 = 0;
-		}
-        if (iMinBlue1 < 0)
-		{
-			iMinBlue1 = 0;
-		}
-        
-        if (iMaxRed1 > 15)
-        {
-            iMaxRed1 = 15;
-        }
-        if (iMaxGreen1 > 15)
-        {
-            iMaxGreen1 = 15;
-        }
-        if (iMaxBlue1 > 15)
-		{
-			iMaxBlue1 = 15;
-		}
-
-		int iColor2Red = m_frgbaOriginalColor2_TAndH.IntRed(15.0f);
-		int iColor2Green = m_frgbaOriginalColor2_TAndH.IntGreen(15.0f);
-		int iColor2Blue = m_frgbaOriginalColor2_TAndH.IntBlue(15.0f);
-
-		int iMinRed2 = iColor2Red - (int)a_uiRadius;
-        int iMinGreen2 = iColor2Green - (int)a_uiRadius;
-        int iMinBlue2 = iColor2Blue - (int)a_uiRadius;
-        
-        int iMaxRed2 = iColor2Red + (int)a_uiRadius;
-        int iMaxGreen2 = iColor2Green + (int)a_uiRadius;
-        int iMaxBlue2 = iColor2Blue + (int)a_uiRadius;
-        
-        if (iMinRed2 < 0)
-		{
-			iMinRed2 = 0;
-		}
-		if (iMinGreen2 < 0)
-		{
-			iMinGreen2 = 0;
-		}
-        if (iMinBlue2 < 0)
-		{
-			iMinBlue2 = 0;
-		}
-        
-        if (iMaxRed2 > 15)
-        {
-            iMaxRed2 = 15;
-        }
-        if (iMaxGreen2 > 15)
-        {
-            iMaxGreen2 = 15;
-        }
-        if (iMaxBlue2 > 15)
-		{
-			iMaxBlue2 = 15;
-		}
-
-        bool isGray = m_errormetric == GRAY || !m_pblockParent->HasColorPixels();
-        
-		for (unsigned int uiDistance = 0; uiDistance < TH_DISTANCES; uiDistance++)
-		{
-			encodingTry.m_uiCW1 = uiDistance;
-
-			// twiddle m_frgbaOriginalColor2_TAndH
-			// twiddle color2 first, since it affects 3 selectors, while color1 only affects one selector
-			//
-			for (int iRed2 = iMinRed2; iRed2 <= iMaxRed2; iRed2++)
-			{
-				for (int iGreen2 = iMinGreen2; iGreen2 <= iMaxGreen2; iGreen2++)
-				{
-					for (int iBlue2 = iMinBlue2; iBlue2 <= iMaxBlue2; iBlue2++)
-					{
-                        if (isGray && (iRed2 != iGreen2 || iRed2 != iBlue2))
-                        {
-                            continue;
-                        }
-                        
-						for (unsigned int uiBaseColorSwaps = 0; uiBaseColorSwaps < 2; uiBaseColorSwaps++)
-						{
-							if (uiBaseColorSwaps == 0)
-							{
-								encodingTry.m_frgbaColor1 = m_frgbaOriginalColor1_TAndH;
-								encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2);
-							}
-							else
-							{
-								encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2);
-								encodingTry.m_frgbaColor2 = m_frgbaOriginalColor1_TAndH;
-							}
-
-							encodingTry.TryT_BestSelectorCombination();
-
-							if (encodingTry.m_fError < m_fError)
-							{
-								m_mode = encodingTry.m_mode;
-								m_boolDiff = encodingTry.m_boolDiff;
-								m_boolFlip = encodingTry.m_boolFlip;
-
-								m_frgbaColor1 = encodingTry.m_frgbaColor1;
-								m_frgbaColor2 = encodingTry.m_frgbaColor2;
-								m_uiCW1 = encodingTry.m_uiCW1;
-
-								for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-								{
-									m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
-									m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
-								}
-
-								m_fError = encodingTry.m_fError;
-                                
-                                SetDoneIfPerfect();
-                                if (IsDone())
-                                {
-                                    return;
-                                }
-							}
-						}
-					}
-				}
-			}
-
-			// twiddle m_frgbaOriginalColor1_TAndH
-			for (int iRed1 = iMinRed1; iRed1 <= iMaxRed1; iRed1++)
-			{
-				for (int iGreen1 = iMinGreen1; iGreen1 <= iMaxGreen1; iGreen1++)
-				{
-					for (int iBlue1 = iMinBlue1; iBlue1 <= iMaxBlue1; iBlue1++)
-					{
-                        if (isGray && (iRed1 != iGreen1 || iRed1 != iBlue1))
-                        {
-                            continue;
-                        }
-                        
-						for (unsigned int uiBaseColorSwaps = 0; uiBaseColorSwaps < 2; uiBaseColorSwaps++)
-						{
-							if (uiBaseColorSwaps == 0)
-							{
-								encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1);
-								encodingTry.m_frgbaColor2 = m_frgbaOriginalColor2_TAndH;
-							}
-							else
-							{
-								encodingTry.m_frgbaColor1 = m_frgbaOriginalColor2_TAndH;
-								encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1);
-							}
-
-							encodingTry.TryT_BestSelectorCombination();
-
-							if (encodingTry.m_fError < m_fError)
-							{
-								m_mode = encodingTry.m_mode;
-								m_boolDiff = encodingTry.m_boolDiff;
-								m_boolFlip = encodingTry.m_boolFlip;
-
-								m_frgbaColor1 = encodingTry.m_frgbaColor1;
-								m_frgbaColor2 = encodingTry.m_frgbaColor2;
-								m_uiCW1 = encodingTry.m_uiCW1;
-
-								for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-								{
-									m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
-									m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
-								}
-
-								m_fError = encodingTry.m_fError;
-                                
-                                SetDoneIfPerfect();
-                                if (IsDone())
-                                {
-                                    return;
-                                }
-							}
-						}
-					}
-				}
-			}
-
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// find best selector combination for TryT
-	// called on an encodingTry
-	//
-	void Block4x4Encoding_RGB8::TryT_BestSelectorCombination(void)
-	{
-
-		float fDistance = s_afTHDistanceTable[m_uiCW1];
-
-		unsigned int auiBestPixelSelectors[PIXELS];
-		float afBestPixelErrors[PIXELS] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,
-			FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
-		ColorFloatRGBA	afrgbaBestDecodedPixels[PIXELS];
-		ColorFloatRGBA afrgbaDecodedPixel[SELECTORS];
-		
-		assert(SELECTORS == 4);
-		afrgbaDecodedPixel[0] = m_frgbaColor1;
-		afrgbaDecodedPixel[1] = (m_frgbaColor2 + fDistance).ClampRGB();
-		afrgbaDecodedPixel[2] = m_frgbaColor2;
-		afrgbaDecodedPixel[3] = (m_frgbaColor2 - fDistance).ClampRGB();
-		
-		// try each selector
-		for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
-		{
-			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-			{
-
-				float fPixelError = CalcPixelError(afrgbaDecodedPixel[uiSelector], 
-														uiPixel);
-
-				if (fPixelError < afBestPixelErrors[uiPixel])
-				{
-					afBestPixelErrors[uiPixel] = fPixelError;
-					auiBestPixelSelectors[uiPixel] = uiSelector;
-					afrgbaBestDecodedPixels[uiPixel] = afrgbaDecodedPixel[uiSelector];
-				}
-			}
-		}
-		
-
-		// add up all of the pixel errors
-		float fBlockError = 0.0f;
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			fBlockError += afBestPixelErrors[uiPixel];
-		}
-
-		if (fBlockError < m_fError)
-		{
-			m_fError = fBlockError;
-
-			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-			{
-				m_auiSelectors[uiPixel] = auiBestPixelSelectors[uiPixel];
-				m_afrgbaDecodedColors[uiPixel] = afrgbaBestDecodedPixels[uiPixel];
-			}
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try encoding in T mode
-	// save this encoding if it improves the error
-	//
-	// since all pixels use the distance table, color1 and color2 can NOT be twiddled independently
-	// TWIDDLE_RADIUS of 2 is WAY too slow
-	//
-	void Block4x4Encoding_RGB8::TryH(unsigned int a_uiRadius)
-	{
-		Block4x4Encoding_RGB8 encodingTry = *this;
-
-		// init "try"
-		{
-			encodingTry.m_mode = MODE_H;
-			encodingTry.m_boolDiff = true;
-			encodingTry.m_boolFlip = false;
-			encodingTry.m_fError = FLT_MAX;
-		}
-
-		int iColor1Red = m_frgbaOriginalColor1_TAndH.IntRed(15.0f);
-		int iColor1Green = m_frgbaOriginalColor1_TAndH.IntGreen(15.0f);
-		int iColor1Blue = m_frgbaOriginalColor1_TAndH.IntBlue(15.0f);
-
-		int iMinRed1 = iColor1Red - (int)a_uiRadius;
-        int iMaxRed1 = iColor1Red + (int)a_uiRadius;
-        int iMinGreen1 = iColor1Green - (int)a_uiRadius;
-        int iMaxGreen1 = iColor1Green + (int)a_uiRadius;
-        int iMinBlue1 = iColor1Blue - (int)a_uiRadius;
-        int iMaxBlue1 = iColor1Blue + (int)a_uiRadius;
-        
-        if (iMinRed1 < 0)
-		{
-			iMinRed1 = 0;
-		}
-		if (iMaxRed1 > 15)
-		{
-			iMaxRed1 = 15;
-		}
-		if (iMinGreen1 < 0)
-		{
-			iMinGreen1 = 0;
-		}
-		if (iMaxGreen1 > 15)
-		{
-			iMaxGreen1 = 15;
-		}
-		if (iMinBlue1 < 0)
-		{
-			iMinBlue1 = 0;
-		}
-		if (iMaxBlue1 > 15)
-		{
-			iMaxBlue1 = 15;
-		}
-
-		int iColor2Red = m_frgbaOriginalColor2_TAndH.IntRed(15.0f);
-		int iColor2Green = m_frgbaOriginalColor2_TAndH.IntGreen(15.0f);
-		int iColor2Blue = m_frgbaOriginalColor2_TAndH.IntBlue(15.0f);
-
-		int iMinRed2 = iColor2Red - (int)a_uiRadius;
-        int iMaxRed2 = iColor2Red + (int)a_uiRadius;
-        int iMinGreen2 = iColor2Green - (int)a_uiRadius;
-        int iMaxGreen2 = iColor2Green + (int)a_uiRadius;
-        int iMinBlue2 = iColor2Blue - (int)a_uiRadius;
-        int iMaxBlue2 = iColor2Blue + (int)a_uiRadius;
-        
-        if (iMinRed2 < 0)
-		{
-			iMinRed2 = 0;
-		}
-		if (iMaxRed2 > 15)
-		{
-			iMaxRed2 = 15;
-		}
-
-		if (iMinGreen2 < 0)
-		{
-			iMinGreen2 = 0;
-		}
-		if (iMaxGreen2 > 15)
-		{
-			iMaxGreen2 = 15;
-		}
-
-		if (iMinBlue2 < 0)
-		{
-			iMinBlue2 = 0;
-		}
-		if (iMaxBlue2 > 15)
-		{
-			iMaxBlue2 = 15;
-		}
-
-        bool isGray = m_errormetric == GRAY || !m_pblockParent->HasColorPixels();
-        
-		for (unsigned int uiDistance = 0; uiDistance < TH_DISTANCES; uiDistance++)
-		{
-			encodingTry.m_uiCW1 = uiDistance;
-
-			// twiddle m_frgbaOriginalColor1_TAndH
-			for (int iRed1 = iMinRed1; iRed1 <= iMaxRed1; iRed1++)
-			{
-				for (int iGreen1 = iMinGreen1; iGreen1 <= iMaxGreen1; iGreen1++)
-				{
-					for (int iBlue1 = iMinBlue1; iBlue1 <= iMaxBlue1; iBlue1++)
-					{
-                        // gray only iterates red
-                        if (isGray && (iRed1 != iGreen1 || iRed1 != iBlue1))
-                        {
-                            continue;
-                        }
-                        
-						encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1);
-						encodingTry.m_frgbaColor2 = m_frgbaOriginalColor2_TAndH;
-
-						// if color1 == color2, H encoding issues can pop up, so abort
-						if (iRed1 == iColor2Red && iGreen1 == iColor2Green && iBlue1 == iColor2Blue)
-						{
-							continue;
-						}
-
-						encodingTry.TryH_BestSelectorCombination();
-
-						if (encodingTry.m_fError < m_fError)
-						{
-							m_mode = encodingTry.m_mode;
-							m_boolDiff = encodingTry.m_boolDiff;
-							m_boolFlip = encodingTry.m_boolFlip;
-
-							m_frgbaColor1 = encodingTry.m_frgbaColor1;
-							m_frgbaColor2 = encodingTry.m_frgbaColor2;
-							m_uiCW1 = encodingTry.m_uiCW1;
-
-							for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-							{
-								m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
-								m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
-							}
-
-							m_fError = encodingTry.m_fError;
-                            
-                            SetDoneIfPerfect();
-                            if (IsDone())
-                            {
-                                return;
-                            }
-						}
-					}
-				}
-			}
-
-			// twiddle m_frgbaOriginalColor2_TAndH
-			for (int iRed2 = iMinRed2; iRed2 <= iMaxRed2; iRed2++)
-			{
-				for (int iGreen2 = iMinGreen2; iGreen2 <= iMaxGreen2; iGreen2++)
-				{
-					for (int iBlue2 = iMinBlue2; iBlue2 <= iMaxBlue2; iBlue2++)
-					{
-                        // gray only iterates red
-                        if (isGray && (iRed2 != iGreen2 || iRed2 != iBlue2))
-                        {
-                            continue;
-                        }
-                        
-						encodingTry.m_frgbaColor1 = m_frgbaOriginalColor1_TAndH;
-						encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2);
-
-						// if color1 == color2, H encoding issues can pop up, so abort
-						if (iRed2 == iColor1Red && iGreen2 == iColor1Green && iBlue2 == iColor1Blue)
-						{
-							continue;
-						}
-
-						encodingTry.TryH_BestSelectorCombination();
-
-						if (encodingTry.m_fError < m_fError)
-						{
-							m_mode = encodingTry.m_mode;
-							m_boolDiff = encodingTry.m_boolDiff;
-							m_boolFlip = encodingTry.m_boolFlip;
-
-							m_frgbaColor1 = encodingTry.m_frgbaColor1;
-							m_frgbaColor2 = encodingTry.m_frgbaColor2;
-							m_uiCW1 = encodingTry.m_uiCW1;
-
-							for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-							{
-								m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
-								m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
-							}
-
-							m_fError = encodingTry.m_fError;
-                            
-                            SetDoneIfPerfect();
-                            if (IsDone())
-                            {
-                                return;
-                            }
-						}
-					}
-				}
-			}
-
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// find best selector combination for TryH
-	// called on an encodingTry
-	//
-	void Block4x4Encoding_RGB8::TryH_BestSelectorCombination(void)
-	{
-
-		float fDistance = s_afTHDistanceTable[m_uiCW1];
-
-		unsigned int auiBestPixelSelectors[PIXELS];
-		float afBestPixelErrors[PIXELS] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,
-			FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
-		ColorFloatRGBA	afrgbaBestDecodedPixels[PIXELS];
-		ColorFloatRGBA afrgbaDecodedPixel[SELECTORS];
-		
-		assert(SELECTORS == 4);
-		afrgbaDecodedPixel[0] = (m_frgbaColor1 + fDistance).ClampRGB();
-		afrgbaDecodedPixel[1] = (m_frgbaColor1 - fDistance).ClampRGB();
-		afrgbaDecodedPixel[2] = (m_frgbaColor2 + fDistance).ClampRGB();
-		afrgbaDecodedPixel[3] = (m_frgbaColor2 - fDistance).ClampRGB();
-		
-		// try each selector
-		for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
-		{
-			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-			{
-
-				float fPixelError = CalcPixelError(afrgbaDecodedPixel[uiSelector],
-														uiPixel);
-
-				if (fPixelError < afBestPixelErrors[uiPixel])
-				{
-					afBestPixelErrors[uiPixel] = fPixelError;
-					auiBestPixelSelectors[uiPixel] = uiSelector;
-					afrgbaBestDecodedPixels[uiPixel] = afrgbaDecodedPixel[uiSelector];
-				}
-			}
-		}
-		
-
-		// add up all of the pixel errors
-		float fBlockError = 0.0f;
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			fBlockError += afBestPixelErrors[uiPixel];
-		}
-
-		if (m_fError > fBlockError)
-		{
-			m_fError = fBlockError;
-
-			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-			{
-				m_auiSelectors[uiPixel] = auiBestPixelSelectors[uiPixel];
-				m_afrgbaDecodedColors[uiPixel] = afrgbaBestDecodedPixels[uiPixel];
-			}
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// use linear regression to find the best fit for colors along the edges of the 4x4 block
-	//
-	void Block4x4Encoding_RGB8::CalculatePlanarCornerColors(void)
-	{
-		ColorFloatRGBA afrgbaRegression[MAX_PLANAR_REGRESSION_SIZE];
-		ColorFloatRGBA frgbaSlope;
-		ColorFloatRGBA frgbaOffset;
-
-		// top edge
-		afrgbaRegression[0] = m_pafrgbaSource[0];
-		afrgbaRegression[1] = m_pafrgbaSource[4];
-		afrgbaRegression[2] = m_pafrgbaSource[8];
-		afrgbaRegression[3] = m_pafrgbaSource[12];
-		ColorRegression(afrgbaRegression, 4, &frgbaSlope, &frgbaOffset);
-		m_frgbaColor1 = frgbaOffset;
-		m_frgbaColor2 = (frgbaSlope * 4.0f) + frgbaOffset;
-
-		// left edge
-		afrgbaRegression[0] = m_pafrgbaSource[0];
-		afrgbaRegression[1] = m_pafrgbaSource[1];
-		afrgbaRegression[2] = m_pafrgbaSource[2];
-		afrgbaRegression[3] = m_pafrgbaSource[3];
-		ColorRegression(afrgbaRegression, 4, &frgbaSlope, &frgbaOffset);
-		m_frgbaColor1 = (m_frgbaColor1 + frgbaOffset) * 0.5f;		// average with top edge
-		m_frgbaColor3 = (frgbaSlope * 4.0f) + frgbaOffset;
-
-		// right edge
-		afrgbaRegression[0] = m_pafrgbaSource[12];
-		afrgbaRegression[1] = m_pafrgbaSource[13];
-		afrgbaRegression[2] = m_pafrgbaSource[14];
-		afrgbaRegression[3] = m_pafrgbaSource[15];
-		ColorRegression(afrgbaRegression, 4, &frgbaSlope, &frgbaOffset);
-		m_frgbaColor2 = (m_frgbaColor2 + frgbaOffset) * 0.5f;		// average with top edge
-
-		// bottom edge
-		afrgbaRegression[0] = m_pafrgbaSource[3];
-		afrgbaRegression[1] = m_pafrgbaSource[7];
-		afrgbaRegression[2] = m_pafrgbaSource[11];
-		afrgbaRegression[3] = m_pafrgbaSource[15];
-		ColorRegression(afrgbaRegression, 4, &frgbaSlope, &frgbaOffset);
-		m_frgbaColor3 = (m_frgbaColor3 + frgbaOffset) * 0.5f;		// average with left edge
-
-		// quantize corner colors to 6/7/6
-		m_frgbaColor1 = m_frgbaColor1.QuantizeR6G7B6();
-		m_frgbaColor2 = m_frgbaColor2.QuantizeR6G7B6();
-		m_frgbaColor3 = m_frgbaColor3.QuantizeR6G7B6();
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try different corner colors by slightly changing R, G and B independently
-	//
-	// R, G and B decoding and errors are independent, so R, G and B twiddles can be independent
-	//
-	// return true if improvement
-	//
-	bool Block4x4Encoding_RGB8::TwiddlePlanar(void)
-	{
-		bool boolImprovement = false;
-        bool isGray = m_errormetric == GRAY || !m_pblockParent->HasColorPixels();
-        
-		while (TwiddlePlanarR())
-		{
-			boolImprovement = true;
-		}
-
-        if (!isGray) {
-            while (TwiddlePlanarG())
-            {
-                boolImprovement = true;
-            }
-
-            while (TwiddlePlanarB())
-            {
-                boolImprovement = true;
-            }
-        }
-        
-		return boolImprovement;
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try different corner colors by slightly changing R
-	//
-	bool Block4x4Encoding_RGB8::TwiddlePlanarR()
-	{
-		bool boolImprovement = false;
-
-		Block4x4Encoding_RGB8 encodingTry = *this;
-
-		// init "try"
-		{
-			encodingTry.m_mode = MODE_PLANAR;
-			encodingTry.m_boolDiff = true;
-			encodingTry.m_boolFlip = false;
-		}
-
-		int iOriginRed = encodingTry.m_frgbaColor1.IntRed(63.0f);
-		int iHorizRed = encodingTry.m_frgbaColor2.IntRed(63.0f);
-		int iVertRed = encodingTry.m_frgbaColor3.IntRed(63.0f);
-
-		for (int iTryOriginRed = iOriginRed - 1; iTryOriginRed <= iOriginRed + 1; iTryOriginRed++)
-		{
-			// check for out of range
-			if (iTryOriginRed < 0 || iTryOriginRed > 63)
-			{
-				continue;
-			}
-
-			encodingTry.m_frgbaColor1.fR = ((iTryOriginRed << 2) + (iTryOriginRed >> 4)) / 255.0f;
-
-			for (int iTryHorizRed = iHorizRed - 1; iTryHorizRed <= iHorizRed + 1; iTryHorizRed++)
-			{
-				// check for out of range
-				if (iTryHorizRed < 0 || iTryHorizRed > 63)
-				{
-					continue;
-				}
-
-				encodingTry.m_frgbaColor2.fR = ((iTryHorizRed << 2) + (iTryHorizRed >> 4)) / 255.0f;
-
-				for (int iTryVertRed = iVertRed - 1; iTryVertRed <= iVertRed + 1; iTryVertRed++)
-				{
-					// check for out of range
-					if (iTryVertRed < 0 || iTryVertRed > 63)
-					{
-						continue;
-					}
-
-					// don't bother with null twiddle
-					if (iTryOriginRed == iOriginRed && iTryHorizRed == iHorizRed && iTryVertRed == iVertRed)
-					{
-						continue;
-					}
-
-					encodingTry.m_frgbaColor3.fR = ((iTryVertRed << 2) + (iTryVertRed >> 4)) / 255.0f;
-
-					encodingTry.DecodePixels_Planar();
-
-					encodingTry.CalcBlockError();
-
-					if (encodingTry.m_fError < m_fError)
-					{
-						m_mode = MODE_PLANAR;
-						m_boolDiff = true;
-						m_boolFlip = false;
-						m_frgbaColor1 = encodingTry.m_frgbaColor1;
-						m_frgbaColor2 = encodingTry.m_frgbaColor2;
-						m_frgbaColor3 = encodingTry.m_frgbaColor3;
-
-						for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-						{
-							m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
-						}
-
-						m_fError = encodingTry.m_fError;
-
-						boolImprovement = true;
-					}
-				}
-			}
-		}
-
-		return boolImprovement;
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try different corner colors by slightly changing G
-	//
-	bool Block4x4Encoding_RGB8::TwiddlePlanarG()
-	{
-		bool boolImprovement = false;
-
-		Block4x4Encoding_RGB8 encodingTry = *this;
-
-		// init "try"
-		{
-			encodingTry.m_mode = MODE_PLANAR;
-			encodingTry.m_boolDiff = true;
-			encodingTry.m_boolFlip = false;
-		}
-
-		int iOriginGreen = encodingTry.m_frgbaColor1.IntGreen(127.0f);
-		int iHorizGreen = encodingTry.m_frgbaColor2.IntGreen(127.0f);
-		int iVertGreen = encodingTry.m_frgbaColor3.IntGreen(127.0f);
-
-		for (int iTryOriginGreen = iOriginGreen - 1; iTryOriginGreen <= iOriginGreen + 1; iTryOriginGreen++)
-		{
-			// check for out of range
-			if (iTryOriginGreen < 0 || iTryOriginGreen > 127)
-			{
-				continue;
-			}
-
-			encodingTry.m_frgbaColor1.fG = ((iTryOriginGreen << 1) + (iTryOriginGreen >> 6)) / 255.0f;
-
-			for (int iTryHorizGreen = iHorizGreen - 1; iTryHorizGreen <= iHorizGreen + 1; iTryHorizGreen++)
-			{
-				// check for out of range
-				if (iTryHorizGreen < 0 || iTryHorizGreen > 127)
-				{
-					continue;
-				}
-
-				encodingTry.m_frgbaColor2.fG = ((iTryHorizGreen << 1) + (iTryHorizGreen >> 6)) / 255.0f;
-
-				for (int iTryVertGreen = iVertGreen - 1; iTryVertGreen <= iVertGreen + 1; iTryVertGreen++)
-				{
-					// check for out of range
-					if (iTryVertGreen < 0 || iTryVertGreen > 127)
-					{
-						continue;
-					}
-
-					// don't bother with null twiddle
-					if (iTryOriginGreen == iOriginGreen && 
-						iTryHorizGreen == iHorizGreen && 
-						iTryVertGreen == iVertGreen)
-					{
-						continue;
-					}
-
-					encodingTry.m_frgbaColor3.fG = ((iTryVertGreen << 1) + (iTryVertGreen >> 6)) / 255.0f;
-
-					encodingTry.DecodePixels_Planar();
-
-					encodingTry.CalcBlockError();
-
-					if (encodingTry.m_fError < m_fError)
-					{
-						m_mode = MODE_PLANAR;
-						m_boolDiff = true;
-						m_boolFlip = false;
-						m_frgbaColor1 = encodingTry.m_frgbaColor1;
-						m_frgbaColor2 = encodingTry.m_frgbaColor2;
-						m_frgbaColor3 = encodingTry.m_frgbaColor3;
-
-						for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-						{
-							m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
-						}
-
-						m_fError = encodingTry.m_fError;
-
-						boolImprovement = true;
-					}
-				}
-			}
-		}
-
-		return boolImprovement;
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try different corner colors by slightly changing B
-	//
-	bool Block4x4Encoding_RGB8::TwiddlePlanarB()
-	{
-		bool boolImprovement = false;
-
-		Block4x4Encoding_RGB8 encodingTry = *this;
-
-		// init "try"
-		{
-			encodingTry.m_mode = MODE_PLANAR;
-			encodingTry.m_boolDiff = true;
-			encodingTry.m_boolFlip = false;
-		}
-
-		int iOriginBlue = encodingTry.m_frgbaColor1.IntBlue(63.0f);
-		int iHorizBlue = encodingTry.m_frgbaColor2.IntBlue(63.0f);
-		int iVertBlue = encodingTry.m_frgbaColor3.IntBlue(63.0f);
-
-		for (int iTryOriginBlue = iOriginBlue - 1; iTryOriginBlue <= iOriginBlue + 1; iTryOriginBlue++)
-		{
-			// check for out of range
-			if (iTryOriginBlue < 0 || iTryOriginBlue > 63)
-			{
-				continue;
-			}
-
-			encodingTry.m_frgbaColor1.fB = ((iTryOriginBlue << 2) + (iTryOriginBlue >> 4)) / 255.0f;
-
-			for (int iTryHorizBlue = iHorizBlue - 1; iTryHorizBlue <= iHorizBlue + 1; iTryHorizBlue++)
-			{
-				// check for out of range
-				if (iTryHorizBlue < 0 || iTryHorizBlue > 63)
-				{
-					continue;
-				}
-
-				encodingTry.m_frgbaColor2.fB = ((iTryHorizBlue << 2) + (iTryHorizBlue >> 4)) / 255.0f;
-
-				for (int iTryVertBlue = iVertBlue - 1; iTryVertBlue <= iVertBlue + 1; iTryVertBlue++)
-				{
-					// check for out of range
-					if (iTryVertBlue < 0 || iTryVertBlue > 63)
-					{
-						continue;
-					}
-
-					// don't bother with null twiddle
-					if (iTryOriginBlue == iOriginBlue && iTryHorizBlue == iHorizBlue && iTryVertBlue == iVertBlue)
-					{
-						continue;
-					}
-
-					encodingTry.m_frgbaColor3.fB = ((iTryVertBlue << 2) + (iTryVertBlue >> 4)) / 255.0f;
-
-					encodingTry.DecodePixels_Planar();
-
-					encodingTry.CalcBlockError();
-
-					if (encodingTry.m_fError < m_fError)
-					{
-						m_mode = MODE_PLANAR;
-						m_boolDiff = true;
-						m_boolFlip = false;
-						m_frgbaColor1 = encodingTry.m_frgbaColor1;
-						m_frgbaColor2 = encodingTry.m_frgbaColor2;
-						m_frgbaColor3 = encodingTry.m_frgbaColor3;
-
-						for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-						{
-							m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
-						}
-
-						m_fError = encodingTry.m_fError;
-
-						boolImprovement = true;
-					}
-				}
-			}
-		}
-
-		return boolImprovement;
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the encoding bits based on encoding state
-	//
-	void Block4x4Encoding_RGB8::SetEncodingBits(void)
-	{
-
-		switch (m_mode)
-		{
-		case MODE_ETC1:
-			Block4x4Encoding_ETC1::SetEncodingBits();
-			break;
-
-		case MODE_T:
-			SetEncodingBits_T();
-			break;
-
-		case MODE_H:
-			SetEncodingBits_H();
-			break;
-
-		case MODE_PLANAR:
-			SetEncodingBits_Planar();
-			break;
-
-		default:
-			assert(false);
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the encoding bits based on encoding state for T mode
-	//
-	void Block4x4Encoding_RGB8::SetEncodingBits_T(void)
-	{
-		static const bool SANITY_CHECK = true;
-
-		assert(m_mode == MODE_T);
-		assert(m_boolDiff == true);
-
-		unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(15.0f);
-		unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f);
-		unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f);
-
-		unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(15.0f);
-		unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f);
-		unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f);
-
-		m_pencodingbitsRGB8->t.red1a = uiRed1 >> 2;
-		m_pencodingbitsRGB8->t.red1b = uiRed1;
-		m_pencodingbitsRGB8->t.green1 = uiGreen1;
-		m_pencodingbitsRGB8->t.blue1 = uiBlue1;
-
-		m_pencodingbitsRGB8->t.red2 = uiRed2;
-		m_pencodingbitsRGB8->t.green2 = uiGreen2;
-		m_pencodingbitsRGB8->t.blue2 = uiBlue2;
-
-		m_pencodingbitsRGB8->t.da = m_uiCW1 >> 1;
-		m_pencodingbitsRGB8->t.db = m_uiCW1;
-
-		m_pencodingbitsRGB8->t.diff = 1;
-
-		Block4x4Encoding_ETC1::SetEncodingBits_Selectors();
-
-		// create an invalid R differential to trigger T mode
-		m_pencodingbitsRGB8->t.detect1 = 0;
-		m_pencodingbitsRGB8->t.detect2 = 0;
-		int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
-		if (iRed2 >= 4)
-		{
-			m_pencodingbitsRGB8->t.detect1 = 7;
-			m_pencodingbitsRGB8->t.detect2 = 0;
-		}
-		else
-		{
-			m_pencodingbitsRGB8->t.detect1 = 0;
-			m_pencodingbitsRGB8->t.detect2 = 1;
-		}
-
-		if (SANITY_CHECK)
-		{
-			iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
-
-			// make sure red overflows
-			assert(iRed2 < 0 || iRed2 > 31);
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the encoding bits based on encoding state for H mode
-	//
-	// colors and selectors may need to swap in order to generate lsb of distance index
-	//
-	void Block4x4Encoding_RGB8::SetEncodingBits_H(void)
-	{
-		static const bool SANITY_CHECK = true;
-
-		assert(m_mode == MODE_H);
-		assert(m_boolDiff == true);
-
-		unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(15.0f);
-		unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f);
-		unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f);
-
-		unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(15.0f);
-		unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f);
-		unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f);
-
-		unsigned int uiColor1 = (uiRed1 << 16) + (uiGreen1 << 8) + uiBlue1;
-		unsigned int uiColor2 = (uiRed2 << 16) + (uiGreen2 << 8) + uiBlue2;
-
-		bool boolOddDistance = m_uiCW1 & 1;
-		bool boolSwapColors = (uiColor1 < uiColor2) ^ !boolOddDistance;
-
-		if (boolSwapColors)
-		{
-			m_pencodingbitsRGB8->h.red1 = uiRed2;
-			m_pencodingbitsRGB8->h.green1a = uiGreen2 >> 1;
-			m_pencodingbitsRGB8->h.green1b = uiGreen2;
-			m_pencodingbitsRGB8->h.blue1a = uiBlue2 >> 3;
-			m_pencodingbitsRGB8->h.blue1b = uiBlue2 >> 1;
-			m_pencodingbitsRGB8->h.blue1c = uiBlue2;
-
-			m_pencodingbitsRGB8->h.red2 = uiRed1;
-			m_pencodingbitsRGB8->h.green2a = uiGreen1 >> 1;
-			m_pencodingbitsRGB8->h.green2b = uiGreen1;
-			m_pencodingbitsRGB8->h.blue2 = uiBlue1;
-
-			m_pencodingbitsRGB8->h.da = m_uiCW1 >> 2;
-			m_pencodingbitsRGB8->h.db = m_uiCW1 >> 1;
-		}
-		else
-		{
-			m_pencodingbitsRGB8->h.red1 = uiRed1;
-			m_pencodingbitsRGB8->h.green1a = uiGreen1 >> 1;
-			m_pencodingbitsRGB8->h.green1b = uiGreen1;
-			m_pencodingbitsRGB8->h.blue1a = uiBlue1 >> 3;
-			m_pencodingbitsRGB8->h.blue1b = uiBlue1 >> 1;
-			m_pencodingbitsRGB8->h.blue1c = uiBlue1;
-
-			m_pencodingbitsRGB8->h.red2 = uiRed2;
-			m_pencodingbitsRGB8->h.green2a = uiGreen2 >> 1;
-			m_pencodingbitsRGB8->h.green2b = uiGreen2;
-			m_pencodingbitsRGB8->h.blue2 = uiBlue2;
-
-			m_pencodingbitsRGB8->h.da = m_uiCW1 >> 2;
-			m_pencodingbitsRGB8->h.db = m_uiCW1 >> 1;
-		}
-
-		m_pencodingbitsRGB8->h.diff = 1;
-
-		Block4x4Encoding_ETC1::SetEncodingBits_Selectors();
-
-		if (boolSwapColors)
-		{
-			m_pencodingbitsRGB8->h.selectors ^= 0x0000FFFF;
-		}
-
-		// create an invalid R differential to trigger T mode
-		m_pencodingbitsRGB8->h.detect1 = 0;
-		m_pencodingbitsRGB8->h.detect2 = 0;
-		m_pencodingbitsRGB8->h.detect3 = 0;
-		int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
-		int iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2;
-		if (iRed2 < 0 || iRed2 > 31)
-		{
-			m_pencodingbitsRGB8->h.detect1 = 1;
-		}
-		if (iGreen2 >= 4)
-		{
-			m_pencodingbitsRGB8->h.detect2 = 7;
-			m_pencodingbitsRGB8->h.detect3 = 0;
-		}
-		else
-		{
-			m_pencodingbitsRGB8->h.detect2 = 0;
-			m_pencodingbitsRGB8->h.detect3 = 1;
-		}
-
-		if (SANITY_CHECK)
-		{
-			iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
-			iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2;
-
-			// make sure red doesn't overflow and green does
-			assert(iRed2 >= 0 && iRed2 <= 31);
-			assert(iGreen2 < 0 || iGreen2 > 31);
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the encoding bits based on encoding state for Planar mode
-	//
-	void Block4x4Encoding_RGB8::SetEncodingBits_Planar(void)
-	{
-		static const bool SANITY_CHECK = true;
-
-		assert(m_mode == MODE_PLANAR);
-		assert(m_boolDiff == true);
-
-		unsigned int uiOriginRed = (unsigned int)m_frgbaColor1.IntRed(63.0f);
-		unsigned int uiOriginGreen = (unsigned int)m_frgbaColor1.IntGreen(127.0f);
-		unsigned int uiOriginBlue = (unsigned int)m_frgbaColor1.IntBlue(63.0f);
-
-		unsigned int uiHorizRed = (unsigned int)m_frgbaColor2.IntRed(63.0f);
-		unsigned int uiHorizGreen = (unsigned int)m_frgbaColor2.IntGreen(127.0f);
-		unsigned int uiHorizBlue = (unsigned int)m_frgbaColor2.IntBlue(63.0f);
-
-		unsigned int uiVertRed = (unsigned int)m_frgbaColor3.IntRed(63.0f);
-		unsigned int uiVertGreen = (unsigned int)m_frgbaColor3.IntGreen(127.0f);
-		unsigned int uiVertBlue = (unsigned int)m_frgbaColor3.IntBlue(63.0f);
-
-		m_pencodingbitsRGB8->planar.originRed = uiOriginRed;
-		m_pencodingbitsRGB8->planar.originGreen1 = uiOriginGreen >> 6;
-		m_pencodingbitsRGB8->planar.originGreen2 = uiOriginGreen;
-		m_pencodingbitsRGB8->planar.originBlue1 = uiOriginBlue >> 5;
-		m_pencodingbitsRGB8->planar.originBlue2 = uiOriginBlue >> 3;
-		m_pencodingbitsRGB8->planar.originBlue3 = uiOriginBlue >> 1;
-		m_pencodingbitsRGB8->planar.originBlue4 = uiOriginBlue;
-
-		m_pencodingbitsRGB8->planar.horizRed1 = uiHorizRed >> 1;
-		m_pencodingbitsRGB8->planar.horizRed2 = uiHorizRed;
-		m_pencodingbitsRGB8->planar.horizGreen = uiHorizGreen;
-		m_pencodingbitsRGB8->planar.horizBlue1 = uiHorizBlue >> 5;
-		m_pencodingbitsRGB8->planar.horizBlue2 = uiHorizBlue;
-
-		m_pencodingbitsRGB8->planar.vertRed1 = uiVertRed >> 3;
-		m_pencodingbitsRGB8->planar.vertRed2 = uiVertRed;
-		m_pencodingbitsRGB8->planar.vertGreen1 = uiVertGreen >> 2;
-		m_pencodingbitsRGB8->planar.vertGreen2 = uiVertGreen;
-		m_pencodingbitsRGB8->planar.vertBlue = uiVertBlue;
-
-		m_pencodingbitsRGB8->planar.diff = 1;
-
-		// create valid RG differentials and an invalid B differential to trigger planar mode
-		m_pencodingbitsRGB8->planar.detect1 = 0;
-		m_pencodingbitsRGB8->planar.detect2 = 0;
-		m_pencodingbitsRGB8->planar.detect3 = 0;
-		m_pencodingbitsRGB8->planar.detect4 = 0;
-		int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
-		int iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2;
-		int iBlue2 = (int)m_pencodingbitsRGB8->differential.blue1 + (int)m_pencodingbitsRGB8->differential.dblue2;
-		if (iRed2 < 0 || iRed2 > 31)
-		{
-			m_pencodingbitsRGB8->planar.detect1 = 1;
-		}
-		if (iGreen2 < 0 || iGreen2 > 31)
-		{
-			m_pencodingbitsRGB8->planar.detect2 = 1;
-		}
-		if (iBlue2 >= 4)
-		{
-			m_pencodingbitsRGB8->planar.detect3 = 7;
-			m_pencodingbitsRGB8->planar.detect4 = 0;
-		}
-		else
-		{
-			m_pencodingbitsRGB8->planar.detect3 = 0;
-			m_pencodingbitsRGB8->planar.detect4 = 1;
-		}
-
-		if (SANITY_CHECK)
-		{
-			iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
-			iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2;
-			iBlue2 = (int)m_pencodingbitsRGB8->differential.blue1 + (int)m_pencodingbitsRGB8->differential.dblue2;
-
-			// make sure red and green don't overflow and blue does
-			assert(iRed2 >= 0 && iRed2 <= 31);
-			assert(iGreen2 >= 0 && iGreen2 <= 31);
-			assert(iBlue2 < 0 || iBlue2 > 31);
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the decoded colors and decoded alpha based on the encoding state for T mode
-	//
-	void Block4x4Encoding_RGB8::DecodePixels_T(void)
-	{
-
-		float fDistance = s_afTHDistanceTable[m_uiCW1];
-		ColorFloatRGBA frgbaDistance(fDistance, fDistance, fDistance, 0.0f);
-
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			switch (m_auiSelectors[uiPixel])
-			{
-			case 0:
-				m_afrgbaDecodedColors[uiPixel] = m_frgbaColor1;
-				break;
-
-			case 1:
-				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 + frgbaDistance).ClampRGB();
-				break;
-
-			case 2:
-				m_afrgbaDecodedColors[uiPixel] = m_frgbaColor2;
-				break;
-
-			case 3:
-				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 - frgbaDistance).ClampRGB();
-				break;
-			}
-
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the decoded colors and decoded alpha based on the encoding state for H mode
-	//
-	void Block4x4Encoding_RGB8::DecodePixels_H(void)
-	{
-
-		float fDistance = s_afTHDistanceTable[m_uiCW1];
-		ColorFloatRGBA frgbaDistance(fDistance, fDistance, fDistance, 0.0f);
-
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			switch (m_auiSelectors[uiPixel])
-			{
-			case 0:
-				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor1 + frgbaDistance).ClampRGB();
-				break;
-
-			case 1:
-				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor1 - frgbaDistance).ClampRGB();
-				break;
-
-			case 2:
-				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 + frgbaDistance).ClampRGB();
-				break;
-
-			case 3:
-				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 - frgbaDistance).ClampRGB();
-				break;
-			}
-
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the decoded colors and decoded alpha based on the encoding state for Planar mode
-	//
-	void Block4x4Encoding_RGB8::DecodePixels_Planar(void)
-	{
-
-		int iRO = m_frgbaColor1.IntRed(255.0f);
-        int iGO = m_frgbaColor1.IntGreen(255.0f);
-        int iBO = m_frgbaColor1.IntBlue(255.0f);
-
-        int iRH = m_frgbaColor2.IntRed(255.0f);
-        int iGH = m_frgbaColor2.IntGreen(255.0f);
-        int iBH = m_frgbaColor2.IntBlue(255.0f);
-
-        int iRV = m_frgbaColor3.IntRed(255.0f);
-        int iGV = m_frgbaColor3.IntGreen(255.0f);
-        int iBV = m_frgbaColor3.IntBlue(255.0f);
-
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			int iX = (int)(uiPixel >> 2);
-			int iY = (int)(uiPixel & 3);
-
-			int iR = (iX*(iRH - iRO) + iY*(iRV - iRO) + 4*iRO + 2) >> 2;
-			int iG = (iX*(iGH - iGO) + iY*(iGV - iGO) + 4*iGO + 2) >> 2;
-			int iB = (iX*(iBH - iBO) + iY*(iBV - iBO) + 4*iBO + 2) >> 2;
-
-			ColorFloatRGBA frgba;
-			frgba.fR = (float)iR / 255.0f;
-			frgba.fG = (float)iG / 255.0f;
-			frgba.fB = (float)iB / 255.0f;
-			frgba.fA = 1.0f;
-
-			m_afrgbaDecodedColors[uiPixel] = frgba.ClampRGB();
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// perform a linear regression for the a_uiPixels in a_pafrgbaPixels[]
-	//
-	// output the closest color line using a_pfrgbaSlope and a_pfrgbaOffset
-	//
-	void Block4x4Encoding_RGB8::ColorRegression(ColorFloatRGBA *a_pafrgbaPixels, unsigned int a_uiPixels,
-												ColorFloatRGBA *a_pfrgbaSlope, ColorFloatRGBA *a_pfrgbaOffset)
-	{
-		typedef struct
-		{
-			float f[4];
-		} Float4;
-
-		Float4 *paf4Pixels = (Float4 *)(a_pafrgbaPixels);
-		Float4 *pf4Slope = (Float4 *)(a_pfrgbaSlope);
-		Float4 *pf4Offset = (Float4 *)(a_pfrgbaOffset);
-
-		float afX[MAX_PLANAR_REGRESSION_SIZE];
-		float afY[MAX_PLANAR_REGRESSION_SIZE];
-
-		// handle r, g and b separately.  don't bother with a
-		for (unsigned int uiComponent = 0; uiComponent < 3; uiComponent++)
-		{
-			for (unsigned int uiPixel = 0; uiPixel < a_uiPixels; uiPixel++)
-			{
-				afX[uiPixel] = (float)uiPixel;
-				afY[uiPixel] = paf4Pixels[uiPixel].f[uiComponent];
-				
-			}
-			Etc::Regression(afX, afY, a_uiPixels,
-				&(pf4Slope->f[uiComponent]), &(pf4Offset->f[uiComponent]));
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-}
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+EtcBlock4x4Encoding_RGB8.cpp
+
+Block4x4Encoding_RGB8 is the encoder to use for the ETC2 extensions when targetting file format RGB8.  
+This encoder is also used for the ETC2 subset of file format RGBA8.
+
+Block4x4Encoding_ETC1 encodes the ETC1 subset of RGB8.
+
+*/
+
+// TODO: add isGray opimizations where rgb are iterated once for a single radius
+// instead of as individual channels.
+
+#include "EtcConfig.h"
+#include "EtcBlock4x4Encoding_RGB8.h"
+
+#include "EtcBlock4x4EncodingBits.h"
+#include "EtcBlock4x4.h"
+#include "EtcMath.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <float.h>
+#include <limits>
+
+namespace Etc
+{
+	float Block4x4Encoding_RGB8::s_afTHDistanceTable[TH_DISTANCES] =
+	{
+		3.0f / 255.0f,
+		6.0f / 255.0f,
+		11.0f / 255.0f,
+		16.0f / 255.0f,
+		23.0f / 255.0f,
+		32.0f / 255.0f,
+		41.0f / 255.0f,
+		64.0f / 255.0f
+	};
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	Block4x4Encoding_RGB8::Block4x4Encoding_RGB8(void)
+	{
+
+		m_pencodingbitsRGB8 = nullptr;
+
+	}
+
+	Block4x4Encoding_RGB8::~Block4x4Encoding_RGB8(void) {}
+	// ----------------------------------------------------------------------------------------------------
+	// initialization from the encoding bits of a previous encoding
+	// a_pblockParent points to the block associated with this encoding
+	// a_errormetric is used to choose the best encoding
+	// a_pafrgbaSource points to a 4x4 block subset of the source image
+	// a_paucEncodingBits points to the final encoding bits of a previous encoding
+	//
+	void Block4x4Encoding_RGB8::Decode(Block4x4 *a_pblockParent,
+														unsigned char *a_paucEncodingBits,
+														const ColorFloatRGBA *a_pafrgbaSource,
+														ErrorMetric a_errormetric,
+                                                        uint16_t iterationCount )
+	{
+		
+		// handle ETC1 modes
+		Block4x4Encoding_ETC1::Decode(a_pblockParent,
+													a_paucEncodingBits, a_pafrgbaSource,a_errormetric, iterationCount);
+
+		m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)a_paucEncodingBits;
+
+		// detect if there is a T, H or Planar mode present
+		if (m_pencodingbitsRGB8->differential.diff)
+		{
+			int iRed1 = (int)m_pencodingbitsRGB8->differential.red1;
+			int iDRed2 = m_pencodingbitsRGB8->differential.dred2;
+			int iRed2 = iRed1 + iDRed2;
+
+			int iGreen1 = (int)m_pencodingbitsRGB8->differential.green1;
+			int iDGreen2 = m_pencodingbitsRGB8->differential.dgreen2;
+			int iGreen2 = iGreen1 + iDGreen2;
+
+			int iBlue1 = (int)m_pencodingbitsRGB8->differential.blue1;
+			int iDBlue2 = m_pencodingbitsRGB8->differential.dblue2;
+			int iBlue2 = iBlue1 + iDBlue2;
+
+			if (iRed2 < 0 || iRed2 > 31)
+			{
+				InitFromEncodingBits_T();
+			}
+			else if (iGreen2 < 0 || iGreen2 > 31)
+			{
+				InitFromEncodingBits_H();
+			}
+			else if (iBlue2 < 0 || iBlue2 > 31)
+			{
+				InitFromEncodingBits_Planar();
+			}
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// initialization from the encoding bits of a previous encoding if T mode is detected
+	//
+	void Block4x4Encoding_RGB8::InitFromEncodingBits_T(void)
+	{
+
+		m_mode = MODE_T;
+
+		unsigned char ucRed1 = (unsigned char)((m_pencodingbitsRGB8->t.red1a << 2) +
+								m_pencodingbitsRGB8->t.red1b);
+		unsigned char ucGreen1 = m_pencodingbitsRGB8->t.green1;
+		unsigned char ucBlue1 = m_pencodingbitsRGB8->t.blue1;
+
+		unsigned char ucRed2 = m_pencodingbitsRGB8->t.red2;
+		unsigned char ucGreen2 = m_pencodingbitsRGB8->t.green2;
+		unsigned char ucBlue2 = m_pencodingbitsRGB8->t.blue2;
+
+		m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(ucRed1, ucGreen1, ucBlue1);
+		m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(ucRed2, ucGreen2, ucBlue2);
+
+		m_uiCW1 = (m_pencodingbitsRGB8->t.da << 1) + m_pencodingbitsRGB8->t.db;
+
+		Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors();
+
+		DecodePixels_T();
+
+		CalcBlockError();
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// initialization from the encoding bits of a previous encoding if H mode is detected
+	//
+	void Block4x4Encoding_RGB8::InitFromEncodingBits_H(void)
+	{
+
+		m_mode = MODE_H;
+		
+		unsigned char ucRed1 = m_pencodingbitsRGB8->h.red1;
+		unsigned char ucGreen1 = (unsigned char)((m_pencodingbitsRGB8->h.green1a << 1) +
+									m_pencodingbitsRGB8->h.green1b);
+		unsigned char ucBlue1 = (unsigned char)((m_pencodingbitsRGB8->h.blue1a << 3) +
+								(m_pencodingbitsRGB8->h.blue1b << 1) + 
+								m_pencodingbitsRGB8->h.blue1c);
+
+		unsigned char ucRed2 = m_pencodingbitsRGB8->h.red2;
+		unsigned char ucGreen2 = (unsigned char)((m_pencodingbitsRGB8->h.green2a << 1) +
+									m_pencodingbitsRGB8->h.green2b);
+		unsigned char ucBlue2 = m_pencodingbitsRGB8->h.blue2;
+
+		m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(ucRed1, ucGreen1, ucBlue1);
+		m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(ucRed2, ucGreen2, ucBlue2);
+
+		// used to determine the LSB of the CW
+		unsigned int uiRGB1 = (unsigned int)(((int)ucRed1 << 16) + ((int)ucGreen1 << 8) + (int)ucBlue1);
+		unsigned int uiRGB2 = (unsigned int)(((int)ucRed2 << 16) + ((int)ucGreen2 << 8) + (int)ucBlue2);
+
+		m_uiCW1 = (m_pencodingbitsRGB8->h.da << 2) + (m_pencodingbitsRGB8->h.db << 1);
+		if (uiRGB1 >= uiRGB2)
+		{
+			m_uiCW1++;
+		}
+
+		Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors();
+
+		DecodePixels_H();
+
+		CalcBlockError();
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// initialization from the encoding bits of a previous encoding if Planar mode is detected
+	//
+	void Block4x4Encoding_RGB8::InitFromEncodingBits_Planar(void)
+	{
+
+		m_mode = MODE_PLANAR;
+
+		unsigned char ucOriginRed = m_pencodingbitsRGB8->planar.originRed;
+		unsigned char ucOriginGreen = (unsigned char)((m_pencodingbitsRGB8->planar.originGreen1 << 6) +
+										m_pencodingbitsRGB8->planar.originGreen2);
+		unsigned char ucOriginBlue = (unsigned char)((m_pencodingbitsRGB8->planar.originBlue1 << 5) +
+										(m_pencodingbitsRGB8->planar.originBlue2 << 3) +
+										(m_pencodingbitsRGB8->planar.originBlue3 << 1) +
+										m_pencodingbitsRGB8->planar.originBlue4);
+
+		unsigned char ucHorizRed = (unsigned char)((m_pencodingbitsRGB8->planar.horizRed1 << 1) +
+									m_pencodingbitsRGB8->planar.horizRed2);
+		unsigned char ucHorizGreen = m_pencodingbitsRGB8->planar.horizGreen;
+		unsigned char ucHorizBlue = (unsigned char)((m_pencodingbitsRGB8->planar.horizBlue1 << 5) +
+									m_pencodingbitsRGB8->planar.horizBlue2);
+
+		unsigned char ucVertRed = (unsigned char)((m_pencodingbitsRGB8->planar.vertRed1 << 3) +
+									m_pencodingbitsRGB8->planar.vertRed2);
+		unsigned char ucVertGreen = (unsigned char)((m_pencodingbitsRGB8->planar.vertGreen1 << 2) +
+									m_pencodingbitsRGB8->planar.vertGreen2);
+		unsigned char ucVertBlue = m_pencodingbitsRGB8->planar.vertBlue;
+
+		m_frgbaColor1 = ColorFloatRGBA::ConvertFromR6G7B6(ucOriginRed, ucOriginGreen, ucOriginBlue);
+		m_frgbaColor2 = ColorFloatRGBA::ConvertFromR6G7B6(ucHorizRed, ucHorizGreen, ucHorizBlue);
+		m_frgbaColor3 = ColorFloatRGBA::ConvertFromR6G7B6(ucVertRed, ucVertGreen, ucVertBlue);
+
+		DecodePixels_Planar();
+
+		CalcBlockError();
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// perform a single encoding iteration
+	// replace the encoding if a better encoding was found
+	// subsequent iterations generally take longer for each iteration
+	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
+	//
+	void Block4x4Encoding_RGB8::PerformIteration(float a_fEffort)
+	{
+		assert(!m_boolDone);
+
+		switch (m_uiEncodingIterations)
+		{
+		case 0:
+			Block4x4Encoding_ETC1::PerformFirstIteration();
+			if (m_boolDone)
+			{
+				break;
+			}
+                
+			TryPlanar(0);
+			SetDoneIfPerfect();
+			if (m_boolDone)
+			{
+				break;
+			}
+                
+			TryTAndH(0);
+            break;
+
+		case 1:
+			Block4x4Encoding_ETC1::TryDifferential(m_boolMostLikelyFlip, 1, 0, 0);
+			break;
+
+		case 2:
+			Block4x4Encoding_ETC1::TryIndividual(m_boolMostLikelyFlip, 1);
+			break;
+
+		case 3:
+			Block4x4Encoding_ETC1::TryDifferential(!m_boolMostLikelyFlip, 1, 0, 0);
+			break;
+
+		case 4:
+			Block4x4Encoding_ETC1::TryIndividual(!m_boolMostLikelyFlip, 1);
+			break;
+
+		case 5:
+			TryPlanar(1);
+			if (a_fEffort <= 49.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 6:
+			TryTAndH(1);
+			if (a_fEffort <= 59.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 7:
+			Block4x4Encoding_ETC1::TryDegenerates1();
+			if (a_fEffort <= 69.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 8:
+			Block4x4Encoding_ETC1::TryDegenerates2();
+			if (a_fEffort <= 79.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 9:
+			Block4x4Encoding_ETC1::TryDegenerates3();
+			if (a_fEffort <= 89.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 10:
+			Block4x4Encoding_ETC1::TryDegenerates4();
+			m_boolDone = true;
+			break;
+
+		default:
+			assert(0);
+			break;
+		}
+
+		m_uiEncodingIterations++;
+
+		SetDoneIfPerfect();
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try encoding in Planar mode
+	// save this encoding if it improves the error
+	//
+	void Block4x4Encoding_RGB8::TryPlanar(unsigned int a_uiRadius)
+	{
+		Block4x4Encoding_RGB8 encodingTry = *this;
+
+		// init "try"
+		{
+			encodingTry.m_mode = MODE_PLANAR;
+			encodingTry.m_boolDiff = true;
+			encodingTry.m_boolFlip = false;
+		}
+
+		encodingTry.CalculatePlanarCornerColors();
+
+		encodingTry.DecodePixels_Planar();
+
+		encodingTry.CalcBlockError();
+
+		if (a_uiRadius > 0)
+		{
+			encodingTry.TwiddlePlanar();
+		}
+
+		if (encodingTry.m_fError < m_fError)
+		{
+			m_mode = MODE_PLANAR;
+			m_boolDiff = true;
+			m_boolFlip = false;
+			m_frgbaColor1 = encodingTry.m_frgbaColor1;
+			m_frgbaColor2 = encodingTry.m_frgbaColor2;
+			m_frgbaColor3 = encodingTry.m_frgbaColor3;
+
+			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+			{
+				m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
+			}
+
+			m_fError = encodingTry.m_fError;
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try encoding in T mode or H mode
+	// save this encoding if it improves the error
+	//
+	void Block4x4Encoding_RGB8::TryTAndH(unsigned int a_uiRadius)
+	{
+
+		CalculateBaseColorsForTAndH();
+
+		TryT(a_uiRadius);
+
+        if (!IsDone())
+        {
+            TryH(a_uiRadius);
+        }
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// calculate original values for base colors
+	// store them in m_frgbaOriginalColor1 and m_frgbaOriginalColor2
+	//
+	void Block4x4Encoding_RGB8::CalculateBaseColorsForTAndH(void)
+	{
+
+		//bool boolRGBX = m_pblockParent->GetImageSource()->GetErrorMetric() == ErrorMetric::RGBX;
+
+		ColorFloatRGBA frgbaBlockAverage = (m_frgbaSourceAverageLeft + m_frgbaSourceAverageRight) * 0.5f;
+
+		// find pixel farthest from average gray line
+		unsigned int uiFarthestPixel = 0;
+		float fFarthestGrayDistance2 = 0.0f;
+		//unsigned int uiTransparentPixels = 0;
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			// don't count transparent
+//			if (m_pafrgbaSource[uiPixel].fA == 0.0f && !boolRGBX)
+//			{
+//				uiTransparentPixels++;
+//			}
+//			else
+			{
+				float fGrayDistance2 = CalcGrayDistance2(m_pafrgbaSource[uiPixel], frgbaBlockAverage);
+
+				if (fGrayDistance2 > fFarthestGrayDistance2)
+				{
+					uiFarthestPixel = uiPixel;
+					fFarthestGrayDistance2 = fGrayDistance2;
+				}
+			}
+		}
+		// a transparent block should not reach this method
+		//assert(uiTransparentPixels < PIXELS);
+
+		// set the original base colors to:
+		//		half way to the farthest pixel and
+		//		the mirror color on the other side of the average
+		ColorFloatRGBA frgbaOffset = (m_pafrgbaSource[uiFarthestPixel] - frgbaBlockAverage) * 0.5f;
+		m_frgbaOriginalColor1_TAndH = (frgbaBlockAverage + frgbaOffset).QuantizeR4G4B4();
+		m_frgbaOriginalColor2_TAndH = (frgbaBlockAverage - frgbaOffset).ClampRGB().QuantizeR4G4B4();	// the "other side" might be out of range
+
+		// move base colors to find best fit
+		for (unsigned int uiIteration = 0; uiIteration < 10; uiIteration++)
+		{
+			// find the center of pixels closest to each color
+			float fPixelsCloserToColor1 = 0.0f;
+			ColorFloatRGBA frgbSumPixelsCloserToColor1;
+			float fPixelsCloserToColor2 = 0.0f;
+			ColorFloatRGBA frgbSumPixelsCloserToColor2;
+			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+			{
+				// don't count transparent pixels
+                // Can't do this, use premul to weight the colors before they are encoded
+                float alpha = 1.0f; // m_pafrgbaSource[uiPixel].fA;
+//				if (alpha == 0.0f)
+//				{
+//					continue;
+//				}
+
+				float fGrayDistance2ToColor1 = CalcGrayDistance2(m_pafrgbaSource[uiPixel], m_frgbaOriginalColor1_TAndH);
+				float fGrayDistance2ToColor2 = CalcGrayDistance2(m_pafrgbaSource[uiPixel], m_frgbaOriginalColor2_TAndH);
+
+				ColorFloatRGBA frgbaAlphaWeightedSource = m_pafrgbaSource[uiPixel] * alpha;
+                frgbaAlphaWeightedSource.fA = 1.0f;
+                
+				if (fGrayDistance2ToColor1 <= fGrayDistance2ToColor2)
+				{
+					fPixelsCloserToColor1 += alpha;
+					frgbSumPixelsCloserToColor1 = frgbSumPixelsCloserToColor1 + frgbaAlphaWeightedSource;
+				}
+				else
+				{
+					fPixelsCloserToColor2 += alpha;
+					frgbSumPixelsCloserToColor2 = frgbSumPixelsCloserToColor2 + frgbaAlphaWeightedSource;
+				}
+			}
+			if (fPixelsCloserToColor1 == 0.0f || fPixelsCloserToColor2 == 0.0f)
+			{
+				break;
+			}
+
+            // this doesn't scale alpha
+			ColorFloatRGBA frgbAvgColor1Pixels = (frgbSumPixelsCloserToColor1 * (1.0f / fPixelsCloserToColor1)).QuantizeR4G4B4();
+			ColorFloatRGBA frgbAvgColor2Pixels = (frgbSumPixelsCloserToColor2 * (1.0f / fPixelsCloserToColor2)).QuantizeR4G4B4();
+
+            frgbAvgColor1Pixels.fA = 1.0f;
+            frgbAvgColor2Pixels.fA = 1.0f;
+            
+			if (frgbAvgColor1Pixels.fR == m_frgbaOriginalColor1_TAndH.fR &&
+				frgbAvgColor1Pixels.fG == m_frgbaOriginalColor1_TAndH.fG &&
+				frgbAvgColor1Pixels.fB == m_frgbaOriginalColor1_TAndH.fB &&
+                
+				frgbAvgColor2Pixels.fR == m_frgbaOriginalColor2_TAndH.fR &&
+				frgbAvgColor2Pixels.fG == m_frgbaOriginalColor2_TAndH.fG &&
+				frgbAvgColor2Pixels.fB == m_frgbaOriginalColor2_TAndH.fB)
+			{
+				break;
+			}
+
+			m_frgbaOriginalColor1_TAndH = frgbAvgColor1Pixels;
+			m_frgbaOriginalColor2_TAndH = frgbAvgColor2Pixels;
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try encoding in T mode
+	// save this encoding if it improves the error
+	//
+	// since pixels that use base color1 don't use the distance table, color1 and color2 can be twiddled independently
+	// better encoding can be found if TWIDDLE_RADIUS is set to 2, but it will be much slower
+	//
+	void Block4x4Encoding_RGB8::TryT(unsigned int a_uiRadius)
+	{
+		Block4x4Encoding_RGB8 encodingTry = *this;
+
+		// init "try"
+		{
+			encodingTry.m_mode = MODE_T;
+			encodingTry.m_boolDiff = true;
+			encodingTry.m_boolFlip = false;
+			encodingTry.m_fError = FLT_MAX;
+		}
+
+		int iColor1Red = m_frgbaOriginalColor1_TAndH.IntRed(15.0f);
+		int iColor1Green = m_frgbaOriginalColor1_TAndH.IntGreen(15.0f);
+		int iColor1Blue = m_frgbaOriginalColor1_TAndH.IntBlue(15.0f);
+
+		int iMinRed1 = iColor1Red - (int)a_uiRadius;
+        int iMinGreen1 = iColor1Green - (int)a_uiRadius;
+        int iMinBlue1 = iColor1Blue - (int)a_uiRadius;
+        
+        int iMaxRed1 = iColor1Red + (int)a_uiRadius;
+        int iMaxGreen1 = iColor1Green + (int)a_uiRadius;
+        int iMaxBlue1 = iColor1Blue + (int)a_uiRadius;
+        
+        if (iMinRed1 < 0)
+		{
+			iMinRed1 = 0;
+		}
+        if (iMinGreen1 < 0)
+		{
+			iMinGreen1 = 0;
+		}
+        if (iMinBlue1 < 0)
+		{
+			iMinBlue1 = 0;
+		}
+        
+        if (iMaxRed1 > 15)
+        {
+            iMaxRed1 = 15;
+        }
+        if (iMaxGreen1 > 15)
+        {
+            iMaxGreen1 = 15;
+        }
+        if (iMaxBlue1 > 15)
+		{
+			iMaxBlue1 = 15;
+		}
+
+		int iColor2Red = m_frgbaOriginalColor2_TAndH.IntRed(15.0f);
+		int iColor2Green = m_frgbaOriginalColor2_TAndH.IntGreen(15.0f);
+		int iColor2Blue = m_frgbaOriginalColor2_TAndH.IntBlue(15.0f);
+
+		int iMinRed2 = iColor2Red - (int)a_uiRadius;
+        int iMinGreen2 = iColor2Green - (int)a_uiRadius;
+        int iMinBlue2 = iColor2Blue - (int)a_uiRadius;
+        
+        int iMaxRed2 = iColor2Red + (int)a_uiRadius;
+        int iMaxGreen2 = iColor2Green + (int)a_uiRadius;
+        int iMaxBlue2 = iColor2Blue + (int)a_uiRadius;
+        
+        if (iMinRed2 < 0)
+		{
+			iMinRed2 = 0;
+		}
+		if (iMinGreen2 < 0)
+		{
+			iMinGreen2 = 0;
+		}
+        if (iMinBlue2 < 0)
+		{
+			iMinBlue2 = 0;
+		}
+        
+        if (iMaxRed2 > 15)
+        {
+            iMaxRed2 = 15;
+        }
+        if (iMaxGreen2 > 15)
+        {
+            iMaxGreen2 = 15;
+        }
+        if (iMaxBlue2 > 15)
+		{
+			iMaxBlue2 = 15;
+		}
+
+        bool isGray = m_errormetric == GRAY || !m_pblockParent->HasColorPixels();
+        
+		for (unsigned int uiDistance = 0; uiDistance < TH_DISTANCES; uiDistance++)
+		{
+			encodingTry.m_uiCW1 = uiDistance;
+
+			// twiddle m_frgbaOriginalColor2_TAndH
+			// twiddle color2 first, since it affects 3 selectors, while color1 only affects one selector
+			//
+			for (int iRed2 = iMinRed2; iRed2 <= iMaxRed2; iRed2++)
+			{
+				for (int iGreen2 = iMinGreen2; iGreen2 <= iMaxGreen2; iGreen2++)
+				{
+					for (int iBlue2 = iMinBlue2; iBlue2 <= iMaxBlue2; iBlue2++)
+					{
+                        if (isGray && (iRed2 != iGreen2 || iRed2 != iBlue2))
+                        {
+                            continue;
+                        }
+                        
+						for (unsigned int uiBaseColorSwaps = 0; uiBaseColorSwaps < 2; uiBaseColorSwaps++)
+						{
+							if (uiBaseColorSwaps == 0)
+							{
+								encodingTry.m_frgbaColor1 = m_frgbaOriginalColor1_TAndH;
+								encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2);
+							}
+							else
+							{
+								encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2);
+								encodingTry.m_frgbaColor2 = m_frgbaOriginalColor1_TAndH;
+							}
+
+							encodingTry.TryT_BestSelectorCombination();
+
+							if (encodingTry.m_fError < m_fError)
+							{
+								m_mode = encodingTry.m_mode;
+								m_boolDiff = encodingTry.m_boolDiff;
+								m_boolFlip = encodingTry.m_boolFlip;
+
+								m_frgbaColor1 = encodingTry.m_frgbaColor1;
+								m_frgbaColor2 = encodingTry.m_frgbaColor2;
+								m_uiCW1 = encodingTry.m_uiCW1;
+
+								for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+								{
+									m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
+									m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
+								}
+
+								m_fError = encodingTry.m_fError;
+                                
+                                SetDoneIfPerfect();
+                                if (IsDone())
+                                {
+                                    return;
+                                }
+							}
+						}
+					}
+				}
+			}
+
+			// twiddle m_frgbaOriginalColor1_TAndH
+			for (int iRed1 = iMinRed1; iRed1 <= iMaxRed1; iRed1++)
+			{
+				for (int iGreen1 = iMinGreen1; iGreen1 <= iMaxGreen1; iGreen1++)
+				{
+					for (int iBlue1 = iMinBlue1; iBlue1 <= iMaxBlue1; iBlue1++)
+					{
+                        if (isGray && (iRed1 != iGreen1 || iRed1 != iBlue1))
+                        {
+                            continue;
+                        }
+                        
+						for (unsigned int uiBaseColorSwaps = 0; uiBaseColorSwaps < 2; uiBaseColorSwaps++)
+						{
+							if (uiBaseColorSwaps == 0)
+							{
+								encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1);
+								encodingTry.m_frgbaColor2 = m_frgbaOriginalColor2_TAndH;
+							}
+							else
+							{
+								encodingTry.m_frgbaColor1 = m_frgbaOriginalColor2_TAndH;
+								encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1);
+							}
+
+							encodingTry.TryT_BestSelectorCombination();
+
+							if (encodingTry.m_fError < m_fError)
+							{
+								m_mode = encodingTry.m_mode;
+								m_boolDiff = encodingTry.m_boolDiff;
+								m_boolFlip = encodingTry.m_boolFlip;
+
+								m_frgbaColor1 = encodingTry.m_frgbaColor1;
+								m_frgbaColor2 = encodingTry.m_frgbaColor2;
+								m_uiCW1 = encodingTry.m_uiCW1;
+
+								for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+								{
+									m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
+									m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
+								}
+
+								m_fError = encodingTry.m_fError;
+                                
+                                SetDoneIfPerfect();
+                                if (IsDone())
+                                {
+                                    return;
+                                }
+							}
+						}
+					}
+				}
+			}
+
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// find best selector combination for TryT
+	// called on an encodingTry
+	//
+	void Block4x4Encoding_RGB8::TryT_BestSelectorCombination(void)
+	{
+
+		float fDistance = s_afTHDistanceTable[m_uiCW1];
+
+		unsigned int auiBestPixelSelectors[PIXELS];
+		float afBestPixelErrors[PIXELS] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,
+			FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
+		ColorFloatRGBA	afrgbaBestDecodedPixels[PIXELS];
+		ColorFloatRGBA afrgbaDecodedPixel[SELECTORS];
+		
+		assert(SELECTORS == 4);
+		afrgbaDecodedPixel[0] = m_frgbaColor1;
+		afrgbaDecodedPixel[1] = (m_frgbaColor2 + fDistance).ClampRGB();
+		afrgbaDecodedPixel[2] = m_frgbaColor2;
+		afrgbaDecodedPixel[3] = (m_frgbaColor2 - fDistance).ClampRGB();
+		
+		// try each selector
+		for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
+		{
+			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+			{
+
+				float fPixelError = CalcPixelError(afrgbaDecodedPixel[uiSelector], 
+														uiPixel);
+
+				if (fPixelError < afBestPixelErrors[uiPixel])
+				{
+					afBestPixelErrors[uiPixel] = fPixelError;
+					auiBestPixelSelectors[uiPixel] = uiSelector;
+					afrgbaBestDecodedPixels[uiPixel] = afrgbaDecodedPixel[uiSelector];
+				}
+			}
+		}
+		
+
+		// add up all of the pixel errors
+		float fBlockError = 0.0f;
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			fBlockError += afBestPixelErrors[uiPixel];
+		}
+
+		if (fBlockError < m_fError)
+		{
+			m_fError = fBlockError;
+
+			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+			{
+				m_auiSelectors[uiPixel] = auiBestPixelSelectors[uiPixel];
+				m_afrgbaDecodedColors[uiPixel] = afrgbaBestDecodedPixels[uiPixel];
+			}
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try encoding in T mode
+	// save this encoding if it improves the error
+	//
+	// since all pixels use the distance table, color1 and color2 can NOT be twiddled independently
+	// TWIDDLE_RADIUS of 2 is WAY too slow
+	//
+	void Block4x4Encoding_RGB8::TryH(unsigned int a_uiRadius)
+	{
+		Block4x4Encoding_RGB8 encodingTry = *this;
+
+		// init "try"
+		{
+			encodingTry.m_mode = MODE_H;
+			encodingTry.m_boolDiff = true;
+			encodingTry.m_boolFlip = false;
+			encodingTry.m_fError = FLT_MAX;
+		}
+
+		int iColor1Red = m_frgbaOriginalColor1_TAndH.IntRed(15.0f);
+		int iColor1Green = m_frgbaOriginalColor1_TAndH.IntGreen(15.0f);
+		int iColor1Blue = m_frgbaOriginalColor1_TAndH.IntBlue(15.0f);
+
+		int iMinRed1 = iColor1Red - (int)a_uiRadius;
+        int iMaxRed1 = iColor1Red + (int)a_uiRadius;
+        int iMinGreen1 = iColor1Green - (int)a_uiRadius;
+        int iMaxGreen1 = iColor1Green + (int)a_uiRadius;
+        int iMinBlue1 = iColor1Blue - (int)a_uiRadius;
+        int iMaxBlue1 = iColor1Blue + (int)a_uiRadius;
+        
+        if (iMinRed1 < 0)
+		{
+			iMinRed1 = 0;
+		}
+		if (iMaxRed1 > 15)
+		{
+			iMaxRed1 = 15;
+		}
+		if (iMinGreen1 < 0)
+		{
+			iMinGreen1 = 0;
+		}
+		if (iMaxGreen1 > 15)
+		{
+			iMaxGreen1 = 15;
+		}
+		if (iMinBlue1 < 0)
+		{
+			iMinBlue1 = 0;
+		}
+		if (iMaxBlue1 > 15)
+		{
+			iMaxBlue1 = 15;
+		}
+
+		int iColor2Red = m_frgbaOriginalColor2_TAndH.IntRed(15.0f);
+		int iColor2Green = m_frgbaOriginalColor2_TAndH.IntGreen(15.0f);
+		int iColor2Blue = m_frgbaOriginalColor2_TAndH.IntBlue(15.0f);
+
+		int iMinRed2 = iColor2Red - (int)a_uiRadius;
+        int iMaxRed2 = iColor2Red + (int)a_uiRadius;
+        int iMinGreen2 = iColor2Green - (int)a_uiRadius;
+        int iMaxGreen2 = iColor2Green + (int)a_uiRadius;
+        int iMinBlue2 = iColor2Blue - (int)a_uiRadius;
+        int iMaxBlue2 = iColor2Blue + (int)a_uiRadius;
+        
+        if (iMinRed2 < 0)
+		{
+			iMinRed2 = 0;
+		}
+		if (iMaxRed2 > 15)
+		{
+			iMaxRed2 = 15;
+		}
+
+		if (iMinGreen2 < 0)
+		{
+			iMinGreen2 = 0;
+		}
+		if (iMaxGreen2 > 15)
+		{
+			iMaxGreen2 = 15;
+		}
+
+		if (iMinBlue2 < 0)
+		{
+			iMinBlue2 = 0;
+		}
+		if (iMaxBlue2 > 15)
+		{
+			iMaxBlue2 = 15;
+		}
+
+        bool isGray = m_errormetric == GRAY || !m_pblockParent->HasColorPixels();
+        
+		for (unsigned int uiDistance = 0; uiDistance < TH_DISTANCES; uiDistance++)
+		{
+			encodingTry.m_uiCW1 = uiDistance;
+
+			// twiddle m_frgbaOriginalColor1_TAndH
+			for (int iRed1 = iMinRed1; iRed1 <= iMaxRed1; iRed1++)
+			{
+				for (int iGreen1 = iMinGreen1; iGreen1 <= iMaxGreen1; iGreen1++)
+				{
+					for (int iBlue1 = iMinBlue1; iBlue1 <= iMaxBlue1; iBlue1++)
+					{
+                        // gray only iterates red
+                        if (isGray && (iRed1 != iGreen1 || iRed1 != iBlue1))
+                        {
+                            continue;
+                        }
+                        
+						encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1);
+						encodingTry.m_frgbaColor2 = m_frgbaOriginalColor2_TAndH;
+
+						// if color1 == color2, H encoding issues can pop up, so abort
+						if (iRed1 == iColor2Red && iGreen1 == iColor2Green && iBlue1 == iColor2Blue)
+						{
+							continue;
+						}
+
+						encodingTry.TryH_BestSelectorCombination();
+
+						if (encodingTry.m_fError < m_fError)
+						{
+							m_mode = encodingTry.m_mode;
+							m_boolDiff = encodingTry.m_boolDiff;
+							m_boolFlip = encodingTry.m_boolFlip;
+
+							m_frgbaColor1 = encodingTry.m_frgbaColor1;
+							m_frgbaColor2 = encodingTry.m_frgbaColor2;
+							m_uiCW1 = encodingTry.m_uiCW1;
+
+							for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+							{
+								m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
+								m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
+							}
+
+							m_fError = encodingTry.m_fError;
+                            
+                            SetDoneIfPerfect();
+                            if (IsDone())
+                            {
+                                return;
+                            }
+						}
+					}
+				}
+			}
+
+			// twiddle m_frgbaOriginalColor2_TAndH
+			for (int iRed2 = iMinRed2; iRed2 <= iMaxRed2; iRed2++)
+			{
+				for (int iGreen2 = iMinGreen2; iGreen2 <= iMaxGreen2; iGreen2++)
+				{
+					for (int iBlue2 = iMinBlue2; iBlue2 <= iMaxBlue2; iBlue2++)
+					{
+                        // gray only iterates red
+                        if (isGray && (iRed2 != iGreen2 || iRed2 != iBlue2))
+                        {
+                            continue;
+                        }
+                        
+						encodingTry.m_frgbaColor1 = m_frgbaOriginalColor1_TAndH;
+						encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2);
+
+						// if color1 == color2, H encoding issues can pop up, so abort
+						if (iRed2 == iColor1Red && iGreen2 == iColor1Green && iBlue2 == iColor1Blue)
+						{
+							continue;
+						}
+
+						encodingTry.TryH_BestSelectorCombination();
+
+						if (encodingTry.m_fError < m_fError)
+						{
+							m_mode = encodingTry.m_mode;
+							m_boolDiff = encodingTry.m_boolDiff;
+							m_boolFlip = encodingTry.m_boolFlip;
+
+							m_frgbaColor1 = encodingTry.m_frgbaColor1;
+							m_frgbaColor2 = encodingTry.m_frgbaColor2;
+							m_uiCW1 = encodingTry.m_uiCW1;
+
+							for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+							{
+								m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
+								m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
+							}
+
+							m_fError = encodingTry.m_fError;
+                            
+                            SetDoneIfPerfect();
+                            if (IsDone())
+                            {
+                                return;
+                            }
+						}
+					}
+				}
+			}
+
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// find best selector combination for TryH
+	// called on an encodingTry
+	//
+	void Block4x4Encoding_RGB8::TryH_BestSelectorCombination(void)
+	{
+
+		float fDistance = s_afTHDistanceTable[m_uiCW1];
+
+		unsigned int auiBestPixelSelectors[PIXELS];
+		float afBestPixelErrors[PIXELS] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,
+			FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
+		ColorFloatRGBA	afrgbaBestDecodedPixels[PIXELS];
+		ColorFloatRGBA afrgbaDecodedPixel[SELECTORS];
+		
+		assert(SELECTORS == 4);
+		afrgbaDecodedPixel[0] = (m_frgbaColor1 + fDistance).ClampRGB();
+		afrgbaDecodedPixel[1] = (m_frgbaColor1 - fDistance).ClampRGB();
+		afrgbaDecodedPixel[2] = (m_frgbaColor2 + fDistance).ClampRGB();
+		afrgbaDecodedPixel[3] = (m_frgbaColor2 - fDistance).ClampRGB();
+		
+		// try each selector
+		for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
+		{
+			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+			{
+
+				float fPixelError = CalcPixelError(afrgbaDecodedPixel[uiSelector],
+														uiPixel);
+
+				if (fPixelError < afBestPixelErrors[uiPixel])
+				{
+					afBestPixelErrors[uiPixel] = fPixelError;
+					auiBestPixelSelectors[uiPixel] = uiSelector;
+					afrgbaBestDecodedPixels[uiPixel] = afrgbaDecodedPixel[uiSelector];
+				}
+			}
+		}
+		
+
+		// add up all of the pixel errors
+		float fBlockError = 0.0f;
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			fBlockError += afBestPixelErrors[uiPixel];
+		}
+
+		if (m_fError > fBlockError)
+		{
+			m_fError = fBlockError;
+
+			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+			{
+				m_auiSelectors[uiPixel] = auiBestPixelSelectors[uiPixel];
+				m_afrgbaDecodedColors[uiPixel] = afrgbaBestDecodedPixels[uiPixel];
+			}
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// use linear regression to find the best fit for colors along the edges of the 4x4 block
+	//
+	void Block4x4Encoding_RGB8::CalculatePlanarCornerColors(void)
+	{
+		ColorFloatRGBA afrgbaRegression[MAX_PLANAR_REGRESSION_SIZE];
+		ColorFloatRGBA frgbaSlope;
+		ColorFloatRGBA frgbaOffset;
+
+		// top edge
+		afrgbaRegression[0] = m_pafrgbaSource[0];
+		afrgbaRegression[1] = m_pafrgbaSource[4];
+		afrgbaRegression[2] = m_pafrgbaSource[8];
+		afrgbaRegression[3] = m_pafrgbaSource[12];
+		ColorRegression(afrgbaRegression, 4, &frgbaSlope, &frgbaOffset);
+		m_frgbaColor1 = frgbaOffset;
+		m_frgbaColor2 = (frgbaSlope * 4.0f) + frgbaOffset;
+
+		// left edge
+		afrgbaRegression[0] = m_pafrgbaSource[0];
+		afrgbaRegression[1] = m_pafrgbaSource[1];
+		afrgbaRegression[2] = m_pafrgbaSource[2];
+		afrgbaRegression[3] = m_pafrgbaSource[3];
+		ColorRegression(afrgbaRegression, 4, &frgbaSlope, &frgbaOffset);
+		m_frgbaColor1 = (m_frgbaColor1 + frgbaOffset) * 0.5f;		// average with top edge
+		m_frgbaColor3 = (frgbaSlope * 4.0f) + frgbaOffset;
+
+		// right edge
+		afrgbaRegression[0] = m_pafrgbaSource[12];
+		afrgbaRegression[1] = m_pafrgbaSource[13];
+		afrgbaRegression[2] = m_pafrgbaSource[14];
+		afrgbaRegression[3] = m_pafrgbaSource[15];
+		ColorRegression(afrgbaRegression, 4, &frgbaSlope, &frgbaOffset);
+		m_frgbaColor2 = (m_frgbaColor2 + frgbaOffset) * 0.5f;		// average with top edge
+
+		// bottom edge
+		afrgbaRegression[0] = m_pafrgbaSource[3];
+		afrgbaRegression[1] = m_pafrgbaSource[7];
+		afrgbaRegression[2] = m_pafrgbaSource[11];
+		afrgbaRegression[3] = m_pafrgbaSource[15];
+		ColorRegression(afrgbaRegression, 4, &frgbaSlope, &frgbaOffset);
+		m_frgbaColor3 = (m_frgbaColor3 + frgbaOffset) * 0.5f;		// average with left edge
+
+		// quantize corner colors to 6/7/6
+		m_frgbaColor1 = m_frgbaColor1.QuantizeR6G7B6();
+		m_frgbaColor2 = m_frgbaColor2.QuantizeR6G7B6();
+		m_frgbaColor3 = m_frgbaColor3.QuantizeR6G7B6();
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try different corner colors by slightly changing R, G and B independently
+	//
+	// R, G and B decoding and errors are independent, so R, G and B twiddles can be independent
+	//
+	// return true if improvement
+	//
+	bool Block4x4Encoding_RGB8::TwiddlePlanar(void)
+	{
+		bool boolImprovement = false;
+        bool isGray = m_errormetric == GRAY || !m_pblockParent->HasColorPixels();
+        
+		while (TwiddlePlanarR())
+		{
+			boolImprovement = true;
+		}
+
+        if (!isGray) {
+            while (TwiddlePlanarG())
+            {
+                boolImprovement = true;
+            }
+
+            while (TwiddlePlanarB())
+            {
+                boolImprovement = true;
+            }
+        }
+        
+		return boolImprovement;
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try different corner colors by slightly changing R
+	//
+	bool Block4x4Encoding_RGB8::TwiddlePlanarR()
+	{
+		bool boolImprovement = false;
+
+		Block4x4Encoding_RGB8 encodingTry = *this;
+
+		// init "try"
+		{
+			encodingTry.m_mode = MODE_PLANAR;
+			encodingTry.m_boolDiff = true;
+			encodingTry.m_boolFlip = false;
+		}
+
+		int iOriginRed = encodingTry.m_frgbaColor1.IntRed(63.0f);
+		int iHorizRed = encodingTry.m_frgbaColor2.IntRed(63.0f);
+		int iVertRed = encodingTry.m_frgbaColor3.IntRed(63.0f);
+
+		for (int iTryOriginRed = iOriginRed - 1; iTryOriginRed <= iOriginRed + 1; iTryOriginRed++)
+		{
+			// check for out of range
+			if (iTryOriginRed < 0 || iTryOriginRed > 63)
+			{
+				continue;
+			}
+
+			encodingTry.m_frgbaColor1.fR = ((iTryOriginRed << 2) + (iTryOriginRed >> 4)) / 255.0f;
+
+			for (int iTryHorizRed = iHorizRed - 1; iTryHorizRed <= iHorizRed + 1; iTryHorizRed++)
+			{
+				// check for out of range
+				if (iTryHorizRed < 0 || iTryHorizRed > 63)
+				{
+					continue;
+				}
+
+				encodingTry.m_frgbaColor2.fR = ((iTryHorizRed << 2) + (iTryHorizRed >> 4)) / 255.0f;
+
+				for (int iTryVertRed = iVertRed - 1; iTryVertRed <= iVertRed + 1; iTryVertRed++)
+				{
+					// check for out of range
+					if (iTryVertRed < 0 || iTryVertRed > 63)
+					{
+						continue;
+					}
+
+					// don't bother with null twiddle
+					if (iTryOriginRed == iOriginRed && iTryHorizRed == iHorizRed && iTryVertRed == iVertRed)
+					{
+						continue;
+					}
+
+					encodingTry.m_frgbaColor3.fR = ((iTryVertRed << 2) + (iTryVertRed >> 4)) / 255.0f;
+
+					encodingTry.DecodePixels_Planar();
+
+					encodingTry.CalcBlockError();
+
+					if (encodingTry.m_fError < m_fError)
+					{
+						m_mode = MODE_PLANAR;
+						m_boolDiff = true;
+						m_boolFlip = false;
+						m_frgbaColor1 = encodingTry.m_frgbaColor1;
+						m_frgbaColor2 = encodingTry.m_frgbaColor2;
+						m_frgbaColor3 = encodingTry.m_frgbaColor3;
+
+						for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+						{
+							m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
+						}
+
+						m_fError = encodingTry.m_fError;
+
+						boolImprovement = true;
+					}
+				}
+			}
+		}
+
+		return boolImprovement;
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try different corner colors by slightly changing G
+	//
+	bool Block4x4Encoding_RGB8::TwiddlePlanarG()
+	{
+		bool boolImprovement = false;
+
+		Block4x4Encoding_RGB8 encodingTry = *this;
+
+		// init "try"
+		{
+			encodingTry.m_mode = MODE_PLANAR;
+			encodingTry.m_boolDiff = true;
+			encodingTry.m_boolFlip = false;
+		}
+
+		int iOriginGreen = encodingTry.m_frgbaColor1.IntGreen(127.0f);
+		int iHorizGreen = encodingTry.m_frgbaColor2.IntGreen(127.0f);
+		int iVertGreen = encodingTry.m_frgbaColor3.IntGreen(127.0f);
+
+		for (int iTryOriginGreen = iOriginGreen - 1; iTryOriginGreen <= iOriginGreen + 1; iTryOriginGreen++)
+		{
+			// check for out of range
+			if (iTryOriginGreen < 0 || iTryOriginGreen > 127)
+			{
+				continue;
+			}
+
+			encodingTry.m_frgbaColor1.fG = ((iTryOriginGreen << 1) + (iTryOriginGreen >> 6)) / 255.0f;
+
+			for (int iTryHorizGreen = iHorizGreen - 1; iTryHorizGreen <= iHorizGreen + 1; iTryHorizGreen++)
+			{
+				// check for out of range
+				if (iTryHorizGreen < 0 || iTryHorizGreen > 127)
+				{
+					continue;
+				}
+
+				encodingTry.m_frgbaColor2.fG = ((iTryHorizGreen << 1) + (iTryHorizGreen >> 6)) / 255.0f;
+
+				for (int iTryVertGreen = iVertGreen - 1; iTryVertGreen <= iVertGreen + 1; iTryVertGreen++)
+				{
+					// check for out of range
+					if (iTryVertGreen < 0 || iTryVertGreen > 127)
+					{
+						continue;
+					}
+
+					// don't bother with null twiddle
+					if (iTryOriginGreen == iOriginGreen && 
+						iTryHorizGreen == iHorizGreen && 
+						iTryVertGreen == iVertGreen)
+					{
+						continue;
+					}
+
+					encodingTry.m_frgbaColor3.fG = ((iTryVertGreen << 1) + (iTryVertGreen >> 6)) / 255.0f;
+
+					encodingTry.DecodePixels_Planar();
+
+					encodingTry.CalcBlockError();
+
+					if (encodingTry.m_fError < m_fError)
+					{
+						m_mode = MODE_PLANAR;
+						m_boolDiff = true;
+						m_boolFlip = false;
+						m_frgbaColor1 = encodingTry.m_frgbaColor1;
+						m_frgbaColor2 = encodingTry.m_frgbaColor2;
+						m_frgbaColor3 = encodingTry.m_frgbaColor3;
+
+						for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+						{
+							m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
+						}
+
+						m_fError = encodingTry.m_fError;
+
+						boolImprovement = true;
+					}
+				}
+			}
+		}
+
+		return boolImprovement;
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try different corner colors by slightly changing B
+	//
+	bool Block4x4Encoding_RGB8::TwiddlePlanarB()
+	{
+		bool boolImprovement = false;
+
+		Block4x4Encoding_RGB8 encodingTry = *this;
+
+		// init "try"
+		{
+			encodingTry.m_mode = MODE_PLANAR;
+			encodingTry.m_boolDiff = true;
+			encodingTry.m_boolFlip = false;
+		}
+
+		int iOriginBlue = encodingTry.m_frgbaColor1.IntBlue(63.0f);
+		int iHorizBlue = encodingTry.m_frgbaColor2.IntBlue(63.0f);
+		int iVertBlue = encodingTry.m_frgbaColor3.IntBlue(63.0f);
+
+		for (int iTryOriginBlue = iOriginBlue - 1; iTryOriginBlue <= iOriginBlue + 1; iTryOriginBlue++)
+		{
+			// check for out of range
+			if (iTryOriginBlue < 0 || iTryOriginBlue > 63)
+			{
+				continue;
+			}
+
+			encodingTry.m_frgbaColor1.fB = ((iTryOriginBlue << 2) + (iTryOriginBlue >> 4)) / 255.0f;
+
+			for (int iTryHorizBlue = iHorizBlue - 1; iTryHorizBlue <= iHorizBlue + 1; iTryHorizBlue++)
+			{
+				// check for out of range
+				if (iTryHorizBlue < 0 || iTryHorizBlue > 63)
+				{
+					continue;
+				}
+
+				encodingTry.m_frgbaColor2.fB = ((iTryHorizBlue << 2) + (iTryHorizBlue >> 4)) / 255.0f;
+
+				for (int iTryVertBlue = iVertBlue - 1; iTryVertBlue <= iVertBlue + 1; iTryVertBlue++)
+				{
+					// check for out of range
+					if (iTryVertBlue < 0 || iTryVertBlue > 63)
+					{
+						continue;
+					}
+
+					// don't bother with null twiddle
+					if (iTryOriginBlue == iOriginBlue && iTryHorizBlue == iHorizBlue && iTryVertBlue == iVertBlue)
+					{
+						continue;
+					}
+
+					encodingTry.m_frgbaColor3.fB = ((iTryVertBlue << 2) + (iTryVertBlue >> 4)) / 255.0f;
+
+					encodingTry.DecodePixels_Planar();
+
+					encodingTry.CalcBlockError();
+
+					if (encodingTry.m_fError < m_fError)
+					{
+						m_mode = MODE_PLANAR;
+						m_boolDiff = true;
+						m_boolFlip = false;
+						m_frgbaColor1 = encodingTry.m_frgbaColor1;
+						m_frgbaColor2 = encodingTry.m_frgbaColor2;
+						m_frgbaColor3 = encodingTry.m_frgbaColor3;
+
+						for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+						{
+							m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
+						}
+
+						m_fError = encodingTry.m_fError;
+
+						boolImprovement = true;
+					}
+				}
+			}
+		}
+
+		return boolImprovement;
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the encoding bits based on encoding state
+	//
+	void Block4x4Encoding_RGB8::SetEncodingBits(void)
+	{
+
+		switch (m_mode)
+		{
+		case MODE_ETC1:
+			Block4x4Encoding_ETC1::SetEncodingBits();
+			break;
+
+		case MODE_T:
+			SetEncodingBits_T();
+			break;
+
+		case MODE_H:
+			SetEncodingBits_H();
+			break;
+
+		case MODE_PLANAR:
+			SetEncodingBits_Planar();
+			break;
+
+		default:
+			assert(false);
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the encoding bits based on encoding state for T mode
+	//
+	void Block4x4Encoding_RGB8::SetEncodingBits_T(void)
+	{
+		static const bool SANITY_CHECK = true;
+
+		assert(m_mode == MODE_T);
+		assert(m_boolDiff == true);
+
+		unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(15.0f);
+		unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f);
+		unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f);
+
+		unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(15.0f);
+		unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f);
+		unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f);
+
+		m_pencodingbitsRGB8->t.red1a = uiRed1 >> 2;
+		m_pencodingbitsRGB8->t.red1b = uiRed1;
+		m_pencodingbitsRGB8->t.green1 = uiGreen1;
+		m_pencodingbitsRGB8->t.blue1 = uiBlue1;
+
+		m_pencodingbitsRGB8->t.red2 = uiRed2;
+		m_pencodingbitsRGB8->t.green2 = uiGreen2;
+		m_pencodingbitsRGB8->t.blue2 = uiBlue2;
+
+		m_pencodingbitsRGB8->t.da = m_uiCW1 >> 1;
+		m_pencodingbitsRGB8->t.db = m_uiCW1;
+
+		m_pencodingbitsRGB8->t.diff = 1;
+
+		Block4x4Encoding_ETC1::SetEncodingBits_Selectors();
+
+		// create an invalid R differential to trigger T mode
+		m_pencodingbitsRGB8->t.detect1 = 0;
+		m_pencodingbitsRGB8->t.detect2 = 0;
+		int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
+		if (iRed2 >= 4)
+		{
+			m_pencodingbitsRGB8->t.detect1 = 7;
+			m_pencodingbitsRGB8->t.detect2 = 0;
+		}
+		else
+		{
+			m_pencodingbitsRGB8->t.detect1 = 0;
+			m_pencodingbitsRGB8->t.detect2 = 1;
+		}
+
+		if (SANITY_CHECK)
+		{
+			iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
+
+			// make sure red overflows
+			assert(iRed2 < 0 || iRed2 > 31);
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the encoding bits based on encoding state for H mode
+	//
+	// colors and selectors may need to swap in order to generate lsb of distance index
+	//
+	void Block4x4Encoding_RGB8::SetEncodingBits_H(void)
+	{
+		static const bool SANITY_CHECK = true;
+
+		assert(m_mode == MODE_H);
+		assert(m_boolDiff == true);
+
+		unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(15.0f);
+		unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f);
+		unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f);
+
+		unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(15.0f);
+		unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f);
+		unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f);
+
+		unsigned int uiColor1 = (uiRed1 << 16) + (uiGreen1 << 8) + uiBlue1;
+		unsigned int uiColor2 = (uiRed2 << 16) + (uiGreen2 << 8) + uiBlue2;
+
+		bool boolOddDistance = m_uiCW1 & 1;
+		bool boolSwapColors = (uiColor1 < uiColor2) ^ !boolOddDistance;
+
+		if (boolSwapColors)
+		{
+			m_pencodingbitsRGB8->h.red1 = uiRed2;
+			m_pencodingbitsRGB8->h.green1a = uiGreen2 >> 1;
+			m_pencodingbitsRGB8->h.green1b = uiGreen2;
+			m_pencodingbitsRGB8->h.blue1a = uiBlue2 >> 3;
+			m_pencodingbitsRGB8->h.blue1b = uiBlue2 >> 1;
+			m_pencodingbitsRGB8->h.blue1c = uiBlue2;
+
+			m_pencodingbitsRGB8->h.red2 = uiRed1;
+			m_pencodingbitsRGB8->h.green2a = uiGreen1 >> 1;
+			m_pencodingbitsRGB8->h.green2b = uiGreen1;
+			m_pencodingbitsRGB8->h.blue2 = uiBlue1;
+
+			m_pencodingbitsRGB8->h.da = m_uiCW1 >> 2;
+			m_pencodingbitsRGB8->h.db = m_uiCW1 >> 1;
+		}
+		else
+		{
+			m_pencodingbitsRGB8->h.red1 = uiRed1;
+			m_pencodingbitsRGB8->h.green1a = uiGreen1 >> 1;
+			m_pencodingbitsRGB8->h.green1b = uiGreen1;
+			m_pencodingbitsRGB8->h.blue1a = uiBlue1 >> 3;
+			m_pencodingbitsRGB8->h.blue1b = uiBlue1 >> 1;
+			m_pencodingbitsRGB8->h.blue1c = uiBlue1;
+
+			m_pencodingbitsRGB8->h.red2 = uiRed2;
+			m_pencodingbitsRGB8->h.green2a = uiGreen2 >> 1;
+			m_pencodingbitsRGB8->h.green2b = uiGreen2;
+			m_pencodingbitsRGB8->h.blue2 = uiBlue2;
+
+			m_pencodingbitsRGB8->h.da = m_uiCW1 >> 2;
+			m_pencodingbitsRGB8->h.db = m_uiCW1 >> 1;
+		}
+
+		m_pencodingbitsRGB8->h.diff = 1;
+
+		Block4x4Encoding_ETC1::SetEncodingBits_Selectors();
+
+		if (boolSwapColors)
+		{
+			m_pencodingbitsRGB8->h.selectors ^= 0x0000FFFF;
+		}
+
+		// create an invalid R differential to trigger T mode
+		m_pencodingbitsRGB8->h.detect1 = 0;
+		m_pencodingbitsRGB8->h.detect2 = 0;
+		m_pencodingbitsRGB8->h.detect3 = 0;
+		int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
+		int iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2;
+		if (iRed2 < 0 || iRed2 > 31)
+		{
+			m_pencodingbitsRGB8->h.detect1 = 1;
+		}
+		if (iGreen2 >= 4)
+		{
+			m_pencodingbitsRGB8->h.detect2 = 7;
+			m_pencodingbitsRGB8->h.detect3 = 0;
+		}
+		else
+		{
+			m_pencodingbitsRGB8->h.detect2 = 0;
+			m_pencodingbitsRGB8->h.detect3 = 1;
+		}
+
+		if (SANITY_CHECK)
+		{
+			iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
+			iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2;
+
+			// make sure red doesn't overflow and green does
+			assert(iRed2 >= 0 && iRed2 <= 31);
+			assert(iGreen2 < 0 || iGreen2 > 31);
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the encoding bits based on encoding state for Planar mode
+	//
+	void Block4x4Encoding_RGB8::SetEncodingBits_Planar(void)
+	{
+		static const bool SANITY_CHECK = true;
+
+		assert(m_mode == MODE_PLANAR);
+		assert(m_boolDiff == true);
+
+		unsigned int uiOriginRed = (unsigned int)m_frgbaColor1.IntRed(63.0f);
+		unsigned int uiOriginGreen = (unsigned int)m_frgbaColor1.IntGreen(127.0f);
+		unsigned int uiOriginBlue = (unsigned int)m_frgbaColor1.IntBlue(63.0f);
+
+		unsigned int uiHorizRed = (unsigned int)m_frgbaColor2.IntRed(63.0f);
+		unsigned int uiHorizGreen = (unsigned int)m_frgbaColor2.IntGreen(127.0f);
+		unsigned int uiHorizBlue = (unsigned int)m_frgbaColor2.IntBlue(63.0f);
+
+		unsigned int uiVertRed = (unsigned int)m_frgbaColor3.IntRed(63.0f);
+		unsigned int uiVertGreen = (unsigned int)m_frgbaColor3.IntGreen(127.0f);
+		unsigned int uiVertBlue = (unsigned int)m_frgbaColor3.IntBlue(63.0f);
+
+		m_pencodingbitsRGB8->planar.originRed = uiOriginRed;
+		m_pencodingbitsRGB8->planar.originGreen1 = uiOriginGreen >> 6;
+		m_pencodingbitsRGB8->planar.originGreen2 = uiOriginGreen;
+		m_pencodingbitsRGB8->planar.originBlue1 = uiOriginBlue >> 5;
+		m_pencodingbitsRGB8->planar.originBlue2 = uiOriginBlue >> 3;
+		m_pencodingbitsRGB8->planar.originBlue3 = uiOriginBlue >> 1;
+		m_pencodingbitsRGB8->planar.originBlue4 = uiOriginBlue;
+
+		m_pencodingbitsRGB8->planar.horizRed1 = uiHorizRed >> 1;
+		m_pencodingbitsRGB8->planar.horizRed2 = uiHorizRed;
+		m_pencodingbitsRGB8->planar.horizGreen = uiHorizGreen;
+		m_pencodingbitsRGB8->planar.horizBlue1 = uiHorizBlue >> 5;
+		m_pencodingbitsRGB8->planar.horizBlue2 = uiHorizBlue;
+
+		m_pencodingbitsRGB8->planar.vertRed1 = uiVertRed >> 3;
+		m_pencodingbitsRGB8->planar.vertRed2 = uiVertRed;
+		m_pencodingbitsRGB8->planar.vertGreen1 = uiVertGreen >> 2;
+		m_pencodingbitsRGB8->planar.vertGreen2 = uiVertGreen;
+		m_pencodingbitsRGB8->planar.vertBlue = uiVertBlue;
+
+		m_pencodingbitsRGB8->planar.diff = 1;
+
+		// create valid RG differentials and an invalid B differential to trigger planar mode
+		m_pencodingbitsRGB8->planar.detect1 = 0;
+		m_pencodingbitsRGB8->planar.detect2 = 0;
+		m_pencodingbitsRGB8->planar.detect3 = 0;
+		m_pencodingbitsRGB8->planar.detect4 = 0;
+		int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
+		int iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2;
+		int iBlue2 = (int)m_pencodingbitsRGB8->differential.blue1 + (int)m_pencodingbitsRGB8->differential.dblue2;
+		if (iRed2 < 0 || iRed2 > 31)
+		{
+			m_pencodingbitsRGB8->planar.detect1 = 1;
+		}
+		if (iGreen2 < 0 || iGreen2 > 31)
+		{
+			m_pencodingbitsRGB8->planar.detect2 = 1;
+		}
+		if (iBlue2 >= 4)
+		{
+			m_pencodingbitsRGB8->planar.detect3 = 7;
+			m_pencodingbitsRGB8->planar.detect4 = 0;
+		}
+		else
+		{
+			m_pencodingbitsRGB8->planar.detect3 = 0;
+			m_pencodingbitsRGB8->planar.detect4 = 1;
+		}
+
+		if (SANITY_CHECK)
+		{
+			iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
+			iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2;
+			iBlue2 = (int)m_pencodingbitsRGB8->differential.blue1 + (int)m_pencodingbitsRGB8->differential.dblue2;
+
+			// make sure red and green don't overflow and blue does
+			assert(iRed2 >= 0 && iRed2 <= 31);
+			assert(iGreen2 >= 0 && iGreen2 <= 31);
+			assert(iBlue2 < 0 || iBlue2 > 31);
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the decoded colors and decoded alpha based on the encoding state for T mode
+	//
+	void Block4x4Encoding_RGB8::DecodePixels_T(void)
+	{
+
+		float fDistance = s_afTHDistanceTable[m_uiCW1];
+		ColorFloatRGBA frgbaDistance(fDistance, fDistance, fDistance, 0.0f);
+
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			switch (m_auiSelectors[uiPixel])
+			{
+			case 0:
+				m_afrgbaDecodedColors[uiPixel] = m_frgbaColor1;
+				break;
+
+			case 1:
+				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 + frgbaDistance).ClampRGB();
+				break;
+
+			case 2:
+				m_afrgbaDecodedColors[uiPixel] = m_frgbaColor2;
+				break;
+
+			case 3:
+				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 - frgbaDistance).ClampRGB();
+				break;
+			}
+
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the decoded colors and decoded alpha based on the encoding state for H mode
+	//
+	void Block4x4Encoding_RGB8::DecodePixels_H(void)
+	{
+
+		float fDistance = s_afTHDistanceTable[m_uiCW1];
+		ColorFloatRGBA frgbaDistance(fDistance, fDistance, fDistance, 0.0f);
+
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			switch (m_auiSelectors[uiPixel])
+			{
+			case 0:
+				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor1 + frgbaDistance).ClampRGB();
+				break;
+
+			case 1:
+				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor1 - frgbaDistance).ClampRGB();
+				break;
+
+			case 2:
+				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 + frgbaDistance).ClampRGB();
+				break;
+
+			case 3:
+				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 - frgbaDistance).ClampRGB();
+				break;
+			}
+
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the decoded colors and decoded alpha based on the encoding state for Planar mode
+	//
+	void Block4x4Encoding_RGB8::DecodePixels_Planar(void)
+	{
+
+		int iRO = m_frgbaColor1.IntRed(255.0f);
+        int iGO = m_frgbaColor1.IntGreen(255.0f);
+        int iBO = m_frgbaColor1.IntBlue(255.0f);
+
+        int iRH = m_frgbaColor2.IntRed(255.0f);
+        int iGH = m_frgbaColor2.IntGreen(255.0f);
+        int iBH = m_frgbaColor2.IntBlue(255.0f);
+
+        int iRV = m_frgbaColor3.IntRed(255.0f);
+        int iGV = m_frgbaColor3.IntGreen(255.0f);
+        int iBV = m_frgbaColor3.IntBlue(255.0f);
+
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			int iX = (int)(uiPixel >> 2);
+			int iY = (int)(uiPixel & 3);
+
+			int iR = (iX*(iRH - iRO) + iY*(iRV - iRO) + 4*iRO + 2) >> 2;
+			int iG = (iX*(iGH - iGO) + iY*(iGV - iGO) + 4*iGO + 2) >> 2;
+			int iB = (iX*(iBH - iBO) + iY*(iBV - iBO) + 4*iBO + 2) >> 2;
+
+			ColorFloatRGBA frgba;
+			frgba.fR = (float)iR / 255.0f;
+			frgba.fG = (float)iG / 255.0f;
+			frgba.fB = (float)iB / 255.0f;
+			frgba.fA = 1.0f;
+
+			m_afrgbaDecodedColors[uiPixel] = frgba.ClampRGB();
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// perform a linear regression for the a_uiPixels in a_pafrgbaPixels[]
+	//
+	// output the closest color line using a_pfrgbaSlope and a_pfrgbaOffset
+	//
+	void Block4x4Encoding_RGB8::ColorRegression(ColorFloatRGBA *a_pafrgbaPixels, unsigned int a_uiPixels,
+												ColorFloatRGBA *a_pfrgbaSlope, ColorFloatRGBA *a_pfrgbaOffset)
+	{
+		typedef struct
+		{
+			float f[4];
+		} Float4;
+
+		Float4 *paf4Pixels = (Float4 *)(a_pafrgbaPixels);
+		Float4 *pf4Slope = (Float4 *)(a_pfrgbaSlope);
+		Float4 *pf4Offset = (Float4 *)(a_pfrgbaOffset);
+
+		float afX[MAX_PLANAR_REGRESSION_SIZE];
+		float afY[MAX_PLANAR_REGRESSION_SIZE];
+
+		// handle r, g and b separately.  don't bother with a
+		for (unsigned int uiComponent = 0; uiComponent < 3; uiComponent++)
+		{
+			for (unsigned int uiPixel = 0; uiPixel < a_uiPixels; uiPixel++)
+			{
+				afX[uiPixel] = (float)uiPixel;
+				afY[uiPixel] = paf4Pixels[uiPixel].f[uiComponent];
+				
+			}
+			Etc::Regression(afX, afY, a_uiPixels,
+				&(pf4Slope->f[uiComponent]), &(pf4Offset->f[uiComponent]));
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+}
diff --git a/libkram/etc2comp/EtcBlock4x4Encoding_RGB8.h b/libkram/etc2comp/EtcBlock4x4Encoding_RGB8.h
index c4d6c3e9..f49222c7 100644
--- a/libkram/etc2comp/EtcBlock4x4Encoding_RGB8.h
+++ b/libkram/etc2comp/EtcBlock4x4Encoding_RGB8.h
@@ -1,97 +1,97 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "EtcBlock4x4Encoding_ETC1.h"
-
-namespace Etc
-{
-
-	class Block4x4Encoding_RGB8 : public Block4x4Encoding_ETC1
-	{
-	public:
-
-		Block4x4Encoding_RGB8(void);
-		virtual ~Block4x4Encoding_RGB8(void);
-
-		virtual void Decode(Block4x4 *a_pblockParent,
-											unsigned char *a_paucEncodingBits,
-											const ColorFloatRGBA *a_pafrgbaSource,
-
-											ErrorMetric a_errormetric,
-                                            uint16_t iterationCount);
-
-		virtual void PerformIteration(float a_fEffort);
-		
-		virtual void SetEncodingBits(void);
-
-//		inline ColorFloatRGBA GetColor3(void) const
-//		{
-//			return m_frgbaColor3;
-//		}
-
-	protected:
-
-		static const unsigned int PLANAR_CORNER_COLORS = 3;
-		static const unsigned int MAX_PLANAR_REGRESSION_SIZE = 4;
-		static const unsigned int TH_DISTANCES = 8;
-
-		static float s_afTHDistanceTable[TH_DISTANCES];
-
-		void TryPlanar(unsigned int a_uiRadius);
-		void TryTAndH(unsigned int a_uiRadius);
-
-		void InitFromEncodingBits_Planar(void);
-
-		ColorFloatRGBA	m_frgbaColor3;		// used for planar
-
-		void SetEncodingBits_T(void);
-		void SetEncodingBits_H(void);
-		void SetEncodingBits_Planar(void);
-
-		// state shared between iterations
-		ColorFloatRGBA	m_frgbaOriginalColor1_TAndH;
-		ColorFloatRGBA	m_frgbaOriginalColor2_TAndH;
-
-		void CalculateBaseColorsForTAndH(void);
-		void TryT(unsigned int a_uiRadius);
-		void TryT_BestSelectorCombination(void);
-		void TryH(unsigned int a_uiRadius);
-		void TryH_BestSelectorCombination(void);
-
-	protected:
-
-		void InitFromEncodingBits_T(void);
-		void InitFromEncodingBits_H(void);
-
-		void CalculatePlanarCornerColors(void);
-
-		void ColorRegression(ColorFloatRGBA *a_pafrgbaPixels, unsigned int a_uiPixels,
-			ColorFloatRGBA *a_pfrgbaSlope, ColorFloatRGBA *a_pfrgbaOffset);
-
-		bool TwiddlePlanar(void);
-		bool TwiddlePlanarR();
-		bool TwiddlePlanarG();
-		bool TwiddlePlanarB();
-
-		void DecodePixels_T(void);
-		void DecodePixels_H(void);
-		void DecodePixels_Planar(void);
-
-	};
-
-} // namespace Etc
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "EtcBlock4x4Encoding_ETC1.h"
+
+namespace Etc
+{
+
+	class Block4x4Encoding_RGB8 : public Block4x4Encoding_ETC1
+	{
+	public:
+
+		Block4x4Encoding_RGB8(void);
+		virtual ~Block4x4Encoding_RGB8(void);
+
+		virtual void Decode(Block4x4 *a_pblockParent,
+											unsigned char *a_paucEncodingBits,
+											const ColorFloatRGBA *a_pafrgbaSource,
+
+											ErrorMetric a_errormetric,
+                                            uint16_t iterationCount);
+
+		virtual void PerformIteration(float a_fEffort);
+		
+		virtual void SetEncodingBits(void);
+
+//		inline ColorFloatRGBA GetColor3(void) const
+//		{
+//			return m_frgbaColor3;
+//		}
+
+	protected:
+
+		static const unsigned int PLANAR_CORNER_COLORS = 3;
+		static const unsigned int MAX_PLANAR_REGRESSION_SIZE = 4;
+		static const unsigned int TH_DISTANCES = 8;
+
+		static float s_afTHDistanceTable[TH_DISTANCES];
+
+		void TryPlanar(unsigned int a_uiRadius);
+		void TryTAndH(unsigned int a_uiRadius);
+
+		void InitFromEncodingBits_Planar(void);
+
+		ColorFloatRGBA	m_frgbaColor3;		// used for planar
+
+		void SetEncodingBits_T(void);
+		void SetEncodingBits_H(void);
+		void SetEncodingBits_Planar(void);
+
+		// state shared between iterations
+		ColorFloatRGBA	m_frgbaOriginalColor1_TAndH;
+		ColorFloatRGBA	m_frgbaOriginalColor2_TAndH;
+
+		void CalculateBaseColorsForTAndH(void);
+		void TryT(unsigned int a_uiRadius);
+		void TryT_BestSelectorCombination(void);
+		void TryH(unsigned int a_uiRadius);
+		void TryH_BestSelectorCombination(void);
+
+	protected:
+
+		void InitFromEncodingBits_T(void);
+		void InitFromEncodingBits_H(void);
+
+		void CalculatePlanarCornerColors(void);
+
+		void ColorRegression(ColorFloatRGBA *a_pafrgbaPixels, unsigned int a_uiPixels,
+			ColorFloatRGBA *a_pfrgbaSlope, ColorFloatRGBA *a_pfrgbaOffset);
+
+		bool TwiddlePlanar(void);
+		bool TwiddlePlanarR();
+		bool TwiddlePlanarG();
+		bool TwiddlePlanarB();
+
+		void DecodePixels_T(void);
+		void DecodePixels_H(void);
+		void DecodePixels_Planar(void);
+
+	};
+
+} // namespace Etc
diff --git a/libkram/etc2comp/EtcBlock4x4Encoding_RGB8A1.cpp b/libkram/etc2comp/EtcBlock4x4Encoding_RGB8A1.cpp
index f6d70264..41dfc0a4 100644
--- a/libkram/etc2comp/EtcBlock4x4Encoding_RGB8A1.cpp
+++ b/libkram/etc2comp/EtcBlock4x4Encoding_RGB8A1.cpp
@@ -1,1829 +1,1829 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
-EtcBlock4x4Encoding_RGB8A1.cpp contains:
-	Block4x4Encoding_RGB8A1
-	Block4x4Encoding_RGB8A1_Opaque
-	Block4x4Encoding_RGB8A1_Transparent
-
-These encoders are used when targetting file format RGB8A1.
-
-Block4x4Encoding_RGB8A1_Opaque is used when all pixels in the 4x4 block are opaque
-Block4x4Encoding_RGB8A1_Transparent is used when all pixels in the 4x4 block are transparent
-Block4x4Encoding_RGB8A1 is used when there is a mixture of alphas in the 4x4 block
-
-*/
-
-#include "EtcConfig.h"
-#include "EtcBlock4x4Encoding_RGB8A1.h"
-
-#include "EtcBlock4x4.h"
-#include "EtcBlock4x4EncodingBits.h"
-#include "EtcBlock4x4Encoding_RGB8.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-
-namespace Etc
-{
-	
-	// ####################################################################################################
-	// Block4x4Encoding_RGB8A1
-	// ####################################################################################################
-
-	float Block4x4Encoding_RGB8A1::s_aafCwOpaqueUnsetTable[CW_RANGES][SELECTORS] =
-	{
-		{ 0.0f / 255.0f, 8.0f / 255.0f, 0.0f / 255.0f, -8.0f / 255.0f },
-		{ 0.0f / 255.0f, 17.0f / 255.0f, 0.0f / 255.0f, -17.0f / 255.0f },
-		{ 0.0f / 255.0f, 29.0f / 255.0f, 0.0f / 255.0f, -29.0f / 255.0f },
-		{ 0.0f / 255.0f, 42.0f / 255.0f, 0.0f / 255.0f, -42.0f / 255.0f },
-		{ 0.0f / 255.0f, 60.0f / 255.0f, 0.0f / 255.0f, -60.0f / 255.0f },
-		{ 0.0f / 255.0f, 80.0f / 255.0f, 0.0f / 255.0f, -80.0f / 255.0f },
-		{ 0.0f / 255.0f, 106.0f / 255.0f, 0.0f / 255.0f, -106.0f / 255.0f },
-		{ 0.0f / 255.0f, 183.0f / 255.0f, 0.0f / 255.0f, -183.0f / 255.0f }
-	};
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-	Block4x4Encoding_RGB8A1::Block4x4Encoding_RGB8A1(void)
-	{
-		m_pencodingbitsRGB8 = nullptr;
-		m_boolOpaque = false;
-		m_boolTransparent = false;
-		m_boolPunchThroughPixels = true;
-
-	}
-	Block4x4Encoding_RGB8A1::~Block4x4Encoding_RGB8A1(void) {}
-	// ----------------------------------------------------------------------------------------------------
-	// initialization prior to encoding
-	// a_pblockParent points to the block associated with this encoding
-	// a_errormetric is used to choose the best encoding
-	// a_pafrgbaSource points to a 4x4 block subset of the source image
-	// a_paucEncodingBits points to the final encoding bits
-	//
-	void Block4x4Encoding_RGB8A1::Encode(Block4x4 *a_pblockParent,
-													const ColorFloatRGBA *a_pafrgbaSource,
-													unsigned char *a_paucEncodingBits,
-													ErrorMetric a_errormetric)
-	{
-
-		Block4x4Encoding_RGB8::Encode(a_pblockParent,
-			a_pafrgbaSource,
-			a_paucEncodingBits,
-			a_errormetric);
-
-		m_boolOpaque = a_pblockParent->GetSourceAlphaMix() == Block4x4::SourceAlphaMix::OPAQUE;
-		m_boolTransparent = a_pblockParent->GetSourceAlphaMix() == Block4x4::SourceAlphaMix::TRANSPARENT;
-		m_boolPunchThroughPixels = a_pblockParent->HasPunchThroughPixels();
-
-//		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-//		{
-//			if (m_pafrgbaSource[uiPixel].fA >= 0.5f)
-//			{
-//				m_afDecodedAlphas[uiPixel] = 1.0f;
-//			}
-//			else
-//			{
-//				m_afDecodedAlphas[uiPixel] = 0.0f;
-//			}
-//		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// initialization from the encoding bits of a previous encoding
-	// a_pblockParent points to the block associated with this encoding
-	// a_errormetric is used to choose the best encoding
-	// a_pafrgbaSource points to a 4x4 block subset of the source image
-	// a_paucEncodingBits points to the final encoding bits of a previous encoding
-	//
-	void Block4x4Encoding_RGB8A1::Decode(Block4x4 *a_pblockParent,
-														unsigned char *a_paucEncodingBits,
-														const ColorFloatRGBA *a_pafrgbaSource,
-														ErrorMetric a_errormetric,
-                                                        uint16_t iterationCount)
-	{
-
-
-		InitFromEncodingBits_ETC1(a_pblockParent,
-			a_paucEncodingBits,
-			a_pafrgbaSource,
-			a_errormetric, iterationCount);
-
-		m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)a_paucEncodingBits;
-
-		// detect if there is a T, H or Planar mode present
-		int iRed1 = m_pencodingbitsRGB8->differential.red1;
-		int iDRed2 = m_pencodingbitsRGB8->differential.dred2;
-		int iRed2 = iRed1 + iDRed2;
-
-		int iGreen1 = m_pencodingbitsRGB8->differential.green1;
-		int iDGreen2 = m_pencodingbitsRGB8->differential.dgreen2;
-		int iGreen2 = iGreen1 + iDGreen2;
-
-		int iBlue1 = m_pencodingbitsRGB8->differential.blue1;
-		int iDBlue2 = m_pencodingbitsRGB8->differential.dblue2;
-		int iBlue2 = iBlue1 + iDBlue2;
-
-		if (iRed2 < 0 || iRed2 > 31)
-		{
-			InitFromEncodingBits_T();
-		}
-		else if (iGreen2 < 0 || iGreen2 > 31)
-		{
-			InitFromEncodingBits_H();
-		}
-		else if (iBlue2 < 0 || iBlue2 > 31)
-		{
-			Block4x4Encoding_RGB8::InitFromEncodingBits_Planar();
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// initialization from the encoding bits of a previous encoding assuming the encoding is an ETC1 mode.
-	// if it isn't an ETC1 mode, this will be overwritten later
-	//
-	void Block4x4Encoding_RGB8A1::InitFromEncodingBits_ETC1(Block4x4 *a_pblockParent,
-		unsigned char *a_paucEncodingBits,
-		const ColorFloatRGBA *a_pafrgbaSource,
-		ErrorMetric a_errormetric, uint16_t iterationCount)
-	{
-		Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource,
-			a_errormetric, iterationCount);
-
-		m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)a_paucEncodingBits;
-
-		m_mode = MODE_ETC1;
-		m_boolDiff = true;
-		m_boolFlip = m_pencodingbitsRGB8->differential.flip;
-		m_boolOpaque = m_pencodingbitsRGB8->differential.diff;
-
-		int iR2 = m_pencodingbitsRGB8->differential.red1 + m_pencodingbitsRGB8->differential.dred2;
-        int iG2 = m_pencodingbitsRGB8->differential.green1 + m_pencodingbitsRGB8->differential.dgreen2;
-        int iB2 = m_pencodingbitsRGB8->differential.blue1 + m_pencodingbitsRGB8->differential.dblue2;
-        
-        if (iR2 < 0)
-		{
-			iR2 = 0;
-		}
-		else if (iR2 > 31)
-		{
-			iR2 = 31;
-		}
-
-		if (iG2 < 0)
-		{
-			iG2 = 0;
-		}
-		else if (iG2 > 31)
-		{
-			iG2 = 31;
-		}
-
-		if (iB2 < 0)
-		{
-			iB2 = 0;
-		}
-		else if (iB2 > 31)
-		{
-			iB2 = 31;
-		}
-
-		m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB5(m_pencodingbitsRGB8->differential.red1, m_pencodingbitsRGB8->differential.green1, m_pencodingbitsRGB8->differential.blue1);
-		m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)iR2, (unsigned char)iG2, (unsigned char)iB2);
-
-		m_uiCW1 = m_pencodingbitsRGB8->differential.cw1;
-		m_uiCW2 = m_pencodingbitsRGB8->differential.cw2;
-
-		Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors();
-
-		Decode_ETC1();
-
-		CalcBlockError();
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// initialization from the encoding bits of a previous encoding if T mode is detected
-	//
-	void Block4x4Encoding_RGB8A1::InitFromEncodingBits_T(void)
-	{
-		m_mode = MODE_T;
-
-		unsigned char ucRed1 = (unsigned char)((m_pencodingbitsRGB8->t.red1a << 2) +
-								m_pencodingbitsRGB8->t.red1b);
-		unsigned char ucGreen1 = m_pencodingbitsRGB8->t.green1;
-		unsigned char ucBlue1 = m_pencodingbitsRGB8->t.blue1;
-
-		unsigned char ucRed2 = m_pencodingbitsRGB8->t.red2;
-		unsigned char ucGreen2 = m_pencodingbitsRGB8->t.green2;
-		unsigned char ucBlue2 = m_pencodingbitsRGB8->t.blue2;
-
-		m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(ucRed1, ucGreen1, ucBlue1);
-		m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(ucRed2, ucGreen2, ucBlue2);
-
-		m_uiCW1 = (m_pencodingbitsRGB8->t.da << 1) + m_pencodingbitsRGB8->t.db;
-
-		Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors();
-
-		DecodePixels_T();
-
-		CalcBlockError();
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// initialization from the encoding bits of a previous encoding if H mode is detected
-	//
-	void Block4x4Encoding_RGB8A1::InitFromEncodingBits_H(void)
-	{
-		m_mode = MODE_H;
-
-		unsigned char ucRed1 = m_pencodingbitsRGB8->h.red1;
-		unsigned char ucGreen1 = (unsigned char)((m_pencodingbitsRGB8->h.green1a << 1) +
-									m_pencodingbitsRGB8->h.green1b);
-		unsigned char ucBlue1 = (unsigned char)((m_pencodingbitsRGB8->h.blue1a << 3) +
-								(m_pencodingbitsRGB8->h.blue1b << 1) +
-								m_pencodingbitsRGB8->h.blue1c);
-
-		unsigned char ucRed2 = m_pencodingbitsRGB8->h.red2;
-		unsigned char ucGreen2 = (unsigned char)((m_pencodingbitsRGB8->h.green2a << 1) +
-									m_pencodingbitsRGB8->h.green2b);
-		unsigned char ucBlue2 = m_pencodingbitsRGB8->h.blue2;
-
-		m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(ucRed1, ucGreen1, ucBlue1);
-		m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(ucRed2, ucGreen2, ucBlue2);
-
-		// used to determine the LSB of the CW
-		unsigned int uiRGB1 = (unsigned int)(((int)ucRed1 << 16) + ((int)ucGreen1 << 8) + (int)ucBlue1);
-		unsigned int uiRGB2 = (unsigned int)(((int)ucRed2 << 16) + ((int)ucGreen2 << 8) + (int)ucBlue2);
-
-		m_uiCW1 = (m_pencodingbitsRGB8->h.da << 2) + (m_pencodingbitsRGB8->h.db << 1);
-		if (uiRGB1 >= uiRGB2)
-		{
-			m_uiCW1++;
-		}
-
-		Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors();
-
-		DecodePixels_H();
-
-		CalcBlockError();
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// for ETC1 modes, set the decoded colors and decoded alpha based on the encoding state
-	//
-	void Block4x4Encoding_RGB8A1::Decode_ETC1(void)
-	{
-
-		const unsigned int *pauiPixelOrder = m_boolFlip ? s_auiPixelOrderFlip1 : s_auiPixelOrderFlip0;
-
-		for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS; uiPixelOrder++)
-		{
-			ColorFloatRGBA *pfrgbaCenter = uiPixelOrder < 8 ? &m_frgbaColor1 : &m_frgbaColor2;
-			unsigned int uiCW = uiPixelOrder < 8 ? m_uiCW1 : m_uiCW2;
-
-			unsigned int uiPixel = pauiPixelOrder[uiPixelOrder];
-
-			float fDelta;
-			if (m_boolOpaque)
-				fDelta = Block4x4Encoding_ETC1::s_aafCwTable[uiCW][m_auiSelectors[uiPixel]];
-			else 
-				fDelta = s_aafCwOpaqueUnsetTable[uiCW][m_auiSelectors[uiPixel]];
-
-			if (m_boolOpaque == false && m_auiSelectors[uiPixel] == TRANSPARENT_SELECTOR)
-			{
-				m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA();
-				//m_afDecodedAlphas[uiPixel] = 0.0f;
-			}
-			else
-			{
-				m_afrgbaDecodedColors[uiPixel] = (*pfrgbaCenter + fDelta).ClampRGB();
-				//m_afDecodedAlphas[uiPixel] = 1.0f;
-			}
-            
-            // TODO: this isn't setting alpha
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// for T mode, set the decoded colors and decoded alpha based on the encoding state
-	//
-	void Block4x4Encoding_RGB8A1::DecodePixels_T(void)
-	{
-
-		float fDistance = s_afTHDistanceTable[m_uiCW1];
-		ColorFloatRGBA frgbaDistance(fDistance, fDistance, fDistance, 0.0f);
-
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			switch (m_auiSelectors[uiPixel])
-			{
-			case 0:
-				m_afrgbaDecodedColors[uiPixel] = m_frgbaColor1;
-				//m_afDecodedAlphas[uiPixel] = 1.0f;
-				break;
-
-			case 1:
-				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 + frgbaDistance).ClampRGB();
-				//m_afDecodedAlphas[uiPixel] = 1.0f;
-				break;
-
-			case 2:
-				if (m_boolOpaque == false)
-				{
-					m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA();
-					//m_afDecodedAlphas[uiPixel] = 0.0f;
-				}
-				else
-				{
-					m_afrgbaDecodedColors[uiPixel] = m_frgbaColor2;
-					//m_afDecodedAlphas[uiPixel] = 1.0f;
-				}
-				break;
-
-			case 3:
-				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 - frgbaDistance).ClampRGB();
-				//m_afDecodedAlphas[uiPixel] = 1.0f;
-				break;
-			}
-
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// for H mode, set the decoded colors and decoded alpha based on the encoding state
-	//
-	void Block4x4Encoding_RGB8A1::DecodePixels_H(void)
-	{
-
-		float fDistance = s_afTHDistanceTable[m_uiCW1];
-		ColorFloatRGBA frgbaDistance(fDistance, fDistance, fDistance, 0.0f);
-
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			switch (m_auiSelectors[uiPixel])
-			{
-			case 0:
-				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor1 + frgbaDistance).ClampRGB();
-				//m_afDecodedAlphas[uiPixel] = 1.0f;
-				break;
-
-			case 1:
-				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor1 - frgbaDistance).ClampRGB();
-				//m_afDecodedAlphas[uiPixel] = 1.0f;
-				break;
-
-			case 2:
-				if (m_boolOpaque == false)
-				{
-					m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA();
-					//m_afDecodedAlphas[uiPixel] = 0.0f;
-				}
-				else
-				{
-					m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 + frgbaDistance).ClampRGB();
-					//m_afDecodedAlphas[uiPixel] = 1.0f;
-				}
-				break;
-
-			case 3:
-				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 - frgbaDistance).ClampRGB();
-				//m_afDecodedAlphas[uiPixel] = 1.0f;
-				break;
-			}
-
-		}
-
-	}
-
-
-	// ----------------------------------------------------------------------------------------------------
-	// perform a single encoding iteration
-	// replace the encoding if a better encoding was found
-	// subsequent iterations generally take longer for each iteration
-	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
-	//
-	// RGB8A1 can't use individual mode
-	// RGB8A1 with transparent pixels can't use planar mode
-	//
-	void Block4x4Encoding_RGB8A1::PerformIteration(float a_fEffort)
-	{
-        if (m_pblockParent->GetSourceAlphaMix() == Block4x4::SourceAlphaMix::OPAQUE)
-        {
-            PerformIterationOpaque(a_fEffort);
-            return;
-        }
-        else if (m_pblockParent->GetSourceAlphaMix() == Block4x4::SourceAlphaMix::TRANSPARENT)
-        {
-            PerformIterationTransparent(a_fEffort);
-            return;
-        }
-        
-		assert(!m_boolOpaque);
-		assert(!m_boolTransparent);
-		assert(!m_boolDone);
-
-		switch (m_uiEncodingIterations)
-		{
-		case 0:
-			PerformFirstIteration();
-			break;
-
-		case 1:
-			TryDifferential(m_boolMostLikelyFlip, 1, 0, 0);
-			break;
-
-		case 2:
-			TryDifferential(!m_boolMostLikelyFlip, 1, 0, 0);
-			if (a_fEffort <= 39.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 3:
-			Block4x4Encoding_RGB8::CalculateBaseColorsForTAndH();
-			TryT(1);
-			TryH(1);
-			if (a_fEffort <= 49.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 4:
-			TryDegenerates1();
-			if (a_fEffort <= 59.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 5:
-			TryDegenerates2();
-			if (a_fEffort <= 69.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 6:
-			TryDegenerates3();
-			if (a_fEffort <= 79.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 7:
-			TryDegenerates4();
-			m_boolDone = true;
-			break;
-
-		default:
-			assert(0);
-			break;
-		}
-
-		m_uiEncodingIterations++;
-
-		SetDoneIfPerfect();
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// find best initial encoding to ensure block has a valid encoding
-	//
-	void Block4x4Encoding_RGB8A1::PerformFirstIteration(void)
-	{
-		Block4x4Encoding_ETC1::CalculateMostLikelyFlip();
-
-		m_fError = FLT_MAX;
-
-		TryDifferential(m_boolMostLikelyFlip, 0, 0, 0);
-		SetDoneIfPerfect();
-		if (m_boolDone)
-		{
-			return;
-		}
-		TryDifferential(!m_boolMostLikelyFlip, 0, 0, 0);
-		SetDoneIfPerfect();
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// mostly copied from ETC1
-	// differences:
-	//		Block4x4Encoding_RGB8A1 encodingTry = *this;
-	//
-	void Block4x4Encoding_RGB8A1::TryDifferential(bool a_boolFlip, unsigned int a_uiRadius, 
-													int a_iGrayOffset1, int a_iGrayOffset2)
-	{
-
-		ColorFloatRGBA frgbaColor1;
-		ColorFloatRGBA frgbaColor2;
-
-		const unsigned int *pauiPixelMapping1;
-		const unsigned int *pauiPixelMapping2;
-
-		if (a_boolFlip)
-		{
-			frgbaColor1 = m_frgbaSourceAverageTop;
-			frgbaColor2 = m_frgbaSourceAverageBottom;
-
-			pauiPixelMapping1 = s_auiTopPixelMapping;
-			pauiPixelMapping2 = s_auiBottomPixelMapping;
-		}
-		else
-		{
-			frgbaColor1 = m_frgbaSourceAverageLeft;
-			frgbaColor2 = m_frgbaSourceAverageRight;
-
-			pauiPixelMapping1 = s_auiLeftPixelMapping;
-			pauiPixelMapping2 = s_auiRightPixelMapping;
-		}
-
-		DifferentialTrys trys(frgbaColor1, frgbaColor2, pauiPixelMapping1, pauiPixelMapping2, 
-								a_uiRadius, a_iGrayOffset1, a_iGrayOffset2);
-
-		Block4x4Encoding_RGB8A1 encodingTry = *this;
-		encodingTry.m_boolFlip = a_boolFlip;
-
-		encodingTry.TryDifferentialHalf(&trys.m_half1);
-		encodingTry.TryDifferentialHalf(&trys.m_half2);
-
-		// find best halves that are within differential range
-		DifferentialTrys::Try *ptryBest1 = nullptr;
-		DifferentialTrys::Try *ptryBest2 = nullptr;
-		encodingTry.m_fError = FLT_MAX;
-
-		// see if the best of each half are in differential range
-		int iDRed = trys.m_half2.m_ptryBest->m_iRed - trys.m_half1.m_ptryBest->m_iRed;
-		int iDGreen = trys.m_half2.m_ptryBest->m_iGreen - trys.m_half1.m_ptryBest->m_iGreen;
-		int iDBlue = trys.m_half2.m_ptryBest->m_iBlue - trys.m_half1.m_ptryBest->m_iBlue;
-		if (iDRed >= -4 && iDRed <= 3 && iDGreen >= -4 && iDGreen <= 3 && iDBlue >= -4 && iDBlue <= 3)
-		{
-			ptryBest1 = trys.m_half1.m_ptryBest;
-			ptryBest2 = trys.m_half2.m_ptryBest;
-			encodingTry.m_fError = trys.m_half1.m_ptryBest->m_fError + trys.m_half2.m_ptryBest->m_fError;
-		}
-		else
-		{
-			// else, find the next best halves that are in differential range
-			for (DifferentialTrys::Try *ptry1 = &trys.m_half1.m_atry[0];
-			ptry1 < &trys.m_half1.m_atry[trys.m_half1.m_uiTrys];
-				ptry1++)
-			{
-				for (DifferentialTrys::Try *ptry2 = &trys.m_half2.m_atry[0];
-				ptry2 < &trys.m_half2.m_atry[trys.m_half2.m_uiTrys];
-					ptry2++)
-				{
-					iDRed = ptry2->m_iRed - ptry1->m_iRed;
-					bool boolValidRedDelta = iDRed <= 3 && iDRed >= -4;
-					iDGreen = ptry2->m_iGreen - ptry1->m_iGreen;
-					bool boolValidGreenDelta = iDGreen <= 3 && iDGreen >= -4;
-					iDBlue = ptry2->m_iBlue - ptry1->m_iBlue;
-					bool boolValidBlueDelta = iDBlue <= 3 && iDBlue >= -4;
-
-					if (boolValidRedDelta && boolValidGreenDelta && boolValidBlueDelta)
-					{
-						float fError = ptry1->m_fError + ptry2->m_fError;
-
-						if (fError < encodingTry.m_fError)
-						{
-							encodingTry.m_fError = fError;
-
-							ptryBest1 = ptry1;
-							ptryBest2 = ptry2;
-						}
-					}
-
-				}
-			}
-			assert(encodingTry.m_fError < FLT_MAX);
-			assert(ptryBest1 != nullptr);
-			assert(ptryBest2 != nullptr);
-		}
-
-		if (encodingTry.m_fError < m_fError)
-		{
-			m_mode = MODE_ETC1;
-			m_boolDiff = true;
-			m_boolFlip = encodingTry.m_boolFlip;
-			m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)ptryBest1->m_iRed, (unsigned char)ptryBest1->m_iGreen, (unsigned char)ptryBest1->m_iBlue);
-			m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)ptryBest2->m_iRed, (unsigned char)ptryBest2->m_iGreen, (unsigned char)ptryBest2->m_iBlue);
-			m_uiCW1 = ptryBest1->m_uiCW;
-			m_uiCW2 = ptryBest2->m_uiCW;
-
-			m_fError = 0.0f;
-			for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS / 2; uiPixelOrder++)
-			{
-				unsigned int uiPixel1 = pauiPixelMapping1[uiPixelOrder];
-				unsigned int uiPixel2 = pauiPixelMapping2[uiPixelOrder];
-
-				unsigned int uiSelector1 = ptryBest1->m_auiSelectors[uiPixelOrder];
-				unsigned int uiSelector2 = ptryBest2->m_auiSelectors[uiPixelOrder];
-
-				m_auiSelectors[uiPixel1] = uiSelector1;
-				m_auiSelectors[uiPixel2] = ptryBest2->m_auiSelectors[uiPixelOrder];
-
-                float alpha1 = 1.0;
-                float alpha2 = 1.0;
-                
-				if (uiSelector1 == TRANSPARENT_SELECTOR)
-				{
-					m_afrgbaDecodedColors[uiPixel1] = ColorFloatRGBA();
-					//m_afDecodedAlphas[uiPixel1] = 0.0f;
-                    alpha1 = 0.0;
-				}
-				else
-				{
-					float fDeltaRGB1 = s_aafCwOpaqueUnsetTable[m_uiCW1][uiSelector1];
-					m_afrgbaDecodedColors[uiPixel1] = (m_frgbaColor1 + fDeltaRGB1).ClampRGB();
-					//m_afDecodedAlphas[uiPixel1] = 1.0f;
-				}
-
-				if (uiSelector2 == TRANSPARENT_SELECTOR)
-				{
-					m_afrgbaDecodedColors[uiPixel2] = ColorFloatRGBA();
-					//m_afDecodedAlphas[uiPixel2] = 0.0f;
-                    alpha2 = 0.0;
-				}
-				else
-				{
-					float fDeltaRGB2 = s_aafCwOpaqueUnsetTable[m_uiCW2][uiSelector2];
-					m_afrgbaDecodedColors[uiPixel2] = (m_frgbaColor2 + fDeltaRGB2).ClampRGB();
-					//m_afDecodedAlphas[uiPixel2] = 1.0f;
-				}
-
-				float fDeltaA1 =alpha1 - m_pafrgbaSource[uiPixel1].fA;
-				m_fError += fDeltaA1 * fDeltaA1;
-				float fDeltaA2 = alpha2 - m_pafrgbaSource[uiPixel2].fA;
-				m_fError += fDeltaA2 * fDeltaA2;
-			}
-
-			m_fError1 = ptryBest1->m_fError;
-			m_fError2 = ptryBest2->m_fError;
-			m_boolSeverelyBentDifferentialColors = trys.m_boolSeverelyBentColors;
-			m_fError = m_fError1 + m_fError2;
-
-			// sanity check
-			{
-				int iRed1 = m_frgbaColor1.IntRed(31.0f);
-				int iGreen1 = m_frgbaColor1.IntGreen(31.0f);
-				int iBlue1 = m_frgbaColor1.IntBlue(31.0f);
-
-				int iRed2 = m_frgbaColor2.IntRed(31.0f);
-				int iGreen2 = m_frgbaColor2.IntGreen(31.0f);
-				int iBlue2 = m_frgbaColor2.IntBlue(31.0f);
-
-				iDRed = iRed2 - iRed1;
-				iDGreen = iGreen2 - iGreen1;
-				iDBlue = iBlue2 - iBlue1;
-
-				assert(iDRed >= -4 && iDRed < 4);
-				assert(iDGreen >= -4 && iDGreen < 4);
-				assert(iDBlue >= -4 && iDBlue < 4);
-			}
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// mostly copied from ETC1
-	// differences:
-	//		uses s_aafCwOpaqueUnsetTable
-	//		color for selector set to 0,0,0,0
-	//
-	void Block4x4Encoding_RGB8A1::TryDifferentialHalf(DifferentialTrys::Half *a_phalf)
-	{
-
-		a_phalf->m_ptryBest = nullptr;
-		float fBestTryError = FLT_MAX;
-
-		a_phalf->m_uiTrys = 0;
-		for (int iRed = a_phalf->m_iRed - (int)a_phalf->m_uiRadius;
-		iRed <= a_phalf->m_iRed + (int)a_phalf->m_uiRadius;
-			iRed++)
-		{
-			assert(iRed >= 0 && iRed <= 31);
-
-			for (int iGreen = a_phalf->m_iGreen - (int)a_phalf->m_uiRadius;
-			iGreen <= a_phalf->m_iGreen + (int)a_phalf->m_uiRadius;
-				iGreen++)
-			{
-				assert(iGreen >= 0 && iGreen <= 31);
-
-				for (int iBlue = a_phalf->m_iBlue - (int)a_phalf->m_uiRadius;
-				iBlue <= a_phalf->m_iBlue + (int)a_phalf->m_uiRadius;
-					iBlue++)
-				{
-					assert(iBlue >= 0 && iBlue <= 31);
-
-					DifferentialTrys::Try *ptry = &a_phalf->m_atry[a_phalf->m_uiTrys];
-					assert(ptry < &a_phalf->m_atry[DifferentialTrys::Half::MAX_TRYS]);
-
-					ptry->m_iRed = iRed;
-					ptry->m_iGreen = iGreen;
-					ptry->m_iBlue = iBlue;
-					ptry->m_fError = FLT_MAX;
-					ColorFloatRGBA frgbaColor = ColorFloatRGBA::ConvertFromRGB5((unsigned char)iRed, (unsigned char)iGreen, (unsigned char)iBlue);
-
-					// try each CW
-					for (unsigned int uiCW = 0; uiCW < CW_RANGES; uiCW++)
-					{
-						unsigned int auiPixelSelectors[PIXELS / 2];
-						ColorFloatRGBA	afrgbaDecodedColors[PIXELS / 2];
-						float afPixelErrors[PIXELS / 2] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,
-							FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
-
-						// pre-compute decoded pixels for each selector
-						ColorFloatRGBA afrgbaSelectors[SELECTORS];
-						assert(SELECTORS == 4);
-						afrgbaSelectors[0] = (frgbaColor + s_aafCwOpaqueUnsetTable[uiCW][0]).ClampRGB();
-						afrgbaSelectors[1] = (frgbaColor + s_aafCwOpaqueUnsetTable[uiCW][1]).ClampRGB();
-						afrgbaSelectors[2] = ColorFloatRGBA();
-						afrgbaSelectors[3] = (frgbaColor + s_aafCwOpaqueUnsetTable[uiCW][3]).ClampRGB();
-
-						for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
-						{
-                            int srcPixelIndex = a_phalf->m_pauiPixelMapping[uiPixel];
-							const ColorFloatRGBA *pfrgbaSourcePixel = &m_pafrgbaSource[srcPixelIndex];
-							ColorFloatRGBA frgbaDecodedPixel;
-
-							for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
-							{
-								if (pfrgbaSourcePixel->fA < 0.5f)
-								{
-									uiSelector = TRANSPARENT_SELECTOR;
-								}
-								else if (uiSelector == TRANSPARENT_SELECTOR)
-								{
-									continue;
-								}
-
-								frgbaDecodedPixel = afrgbaSelectors[uiSelector];
-
-								float fPixelError;
-								
-								fPixelError = CalcPixelError(frgbaDecodedPixel, srcPixelIndex);
-
-								if (fPixelError < afPixelErrors[uiPixel])
-								{
-									auiPixelSelectors[uiPixel] = uiSelector;
-									afrgbaDecodedColors[uiPixel] = frgbaDecodedPixel;
-									afPixelErrors[uiPixel] = fPixelError;
-								}
-
-								if (uiSelector == TRANSPARENT_SELECTOR)
-								{
-									break;
-								}
-							}
-						}
-
-						// add up all pixel errors
-						float fCWError = 0.0f;
-						for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
-						{
-							fCWError += afPixelErrors[uiPixel];
-						}
-
-						// if best CW so far
-						if (fCWError < ptry->m_fError)
-						{
-							ptry->m_uiCW = uiCW;
-							for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
-							{
-								ptry->m_auiSelectors[uiPixel] = auiPixelSelectors[uiPixel];
-							}
-							ptry->m_fError = fCWError;
-						}
-
-					}
-
-					if (ptry->m_fError < fBestTryError)
-					{
-						a_phalf->m_ptryBest = ptry;
-						fBestTryError = ptry->m_fError;
-					}
-
-					assert(ptry->m_fError < FLT_MAX);
-
-					a_phalf->m_uiTrys++;
-				}
-			}
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try encoding in T mode
-	// save this encoding if it improves the error
-	//
-	// since pixels that use base color1 don't use the distance table, color1 and color2 can be twiddled independently
-	// better encoding can be found if TWIDDLE_RADIUS is set to 2, but it will be much slower
-	//
-	void Block4x4Encoding_RGB8A1::TryT(unsigned int a_uiRadius)
-	{
-		Block4x4Encoding_RGB8A1 encodingTry = *this;
-
-		// init "try"
-		{
-			encodingTry.m_mode = MODE_T;
-			encodingTry.m_boolDiff = true;
-			encodingTry.m_boolFlip = false;
-			encodingTry.m_fError = FLT_MAX;
-		}
-
-		int iColor1Red = m_frgbaOriginalColor1_TAndH.IntRed(15.0f);
-		int iColor1Green = m_frgbaOriginalColor1_TAndH.IntGreen(15.0f);
-		int iColor1Blue = m_frgbaOriginalColor1_TAndH.IntBlue(15.0f);
-
-		int iMinRed1 = iColor1Red - (int)a_uiRadius;
-		if (iMinRed1 < 0)
-		{
-			iMinRed1 = 0;
-		}
-		int iMaxRed1 = iColor1Red + (int)a_uiRadius;
-		if (iMaxRed1 > 15)
-		{
-			iMinRed1 = 15;
-		}
-
-		int iMinGreen1 = iColor1Green - (int)a_uiRadius;
-		if (iMinGreen1 < 0)
-		{
-			iMinGreen1 = 0;
-		}
-		int iMaxGreen1 = iColor1Green + (int)a_uiRadius;
-		if (iMaxGreen1 > 15)
-		{
-			iMinGreen1 = 15;
-		}
-
-		int iMinBlue1 = iColor1Blue - (int)a_uiRadius;
-		if (iMinBlue1 < 0)
-		{
-			iMinBlue1 = 0;
-		}
-		int iMaxBlue1 = iColor1Blue + (int)a_uiRadius;
-		if (iMaxBlue1 > 15)
-		{
-			iMinBlue1 = 15;
-		}
-
-		int iColor2Red = m_frgbaOriginalColor2_TAndH.IntRed(15.0f);
-		int iColor2Green = m_frgbaOriginalColor2_TAndH.IntGreen(15.0f);
-		int iColor2Blue = m_frgbaOriginalColor2_TAndH.IntBlue(15.0f);
-
-		int iMinRed2 = iColor2Red - (int)a_uiRadius;
-		if (iMinRed2 < 0)
-		{
-			iMinRed2 = 0;
-		}
-		int iMaxRed2 = iColor2Red + (int)a_uiRadius;
-		if (iMaxRed2 > 15)
-		{
-			iMinRed2 = 15;
-		}
-
-		int iMinGreen2 = iColor2Green - (int)a_uiRadius;
-		if (iMinGreen2 < 0)
-		{
-			iMinGreen2 = 0;
-		}
-		int iMaxGreen2 = iColor2Green + (int)a_uiRadius;
-		if (iMaxGreen2 > 15)
-		{
-			iMinGreen2 = 15;
-		}
-
-		int iMinBlue2 = iColor2Blue - (int)a_uiRadius;
-		if (iMinBlue2 < 0)
-		{
-			iMinBlue2 = 0;
-		}
-		int iMaxBlue2 = iColor2Blue + (int)a_uiRadius;
-		if (iMaxBlue2 > 15)
-		{
-			iMinBlue2 = 15;
-		}
-
-		for (unsigned int uiDistance = 0; uiDistance < TH_DISTANCES; uiDistance++)
-		{
-			encodingTry.m_uiCW1 = uiDistance;
-
-			// twiddle m_frgbaOriginalColor2_TAndH
-			// twiddle color2 first, since it affects 3 selectors, while color1 only affects one selector
-			//
-			for (int iRed2 = iMinRed2; iRed2 <= iMaxRed2; iRed2++)
-			{
-				for (int iGreen2 = iMinGreen2; iGreen2 <= iMaxGreen2; iGreen2++)
-				{
-					for (int iBlue2 = iMinBlue2; iBlue2 <= iMaxBlue2; iBlue2++)
-					{
-						for (unsigned int uiBaseColorSwaps = 0; uiBaseColorSwaps < 2; uiBaseColorSwaps++)
-						{
-							if (uiBaseColorSwaps == 0)
-							{
-								encodingTry.m_frgbaColor1 = m_frgbaOriginalColor1_TAndH;
-								encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2);
-							}
-							else
-							{
-								encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2);
-								encodingTry.m_frgbaColor2 = m_frgbaOriginalColor1_TAndH;
-							}
-
-							encodingTry.TryT_BestSelectorCombination();
-
-							if (encodingTry.m_fError < m_fError)
-							{
-								m_mode = encodingTry.m_mode;
-								m_boolDiff = encodingTry.m_boolDiff;
-								m_boolFlip = encodingTry.m_boolFlip;
-
-								m_frgbaColor1 = encodingTry.m_frgbaColor1;
-								m_frgbaColor2 = encodingTry.m_frgbaColor2;
-								m_uiCW1 = encodingTry.m_uiCW1;
-
-								for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-								{
-									m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
-									m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
-								}
-
-								m_fError = encodingTry.m_fError;
-							}
-						}
-					}
-				}
-			}
-
-			// twiddle m_frgbaOriginalColor1_TAndH
-			for (int iRed1 = iMinRed1; iRed1 <= iMaxRed1; iRed1++)
-			{
-				for (int iGreen1 = iMinGreen1; iGreen1 <= iMaxGreen1; iGreen1++)
-				{
-					for (int iBlue1 = iMinBlue1; iBlue1 <= iMaxBlue1; iBlue1++)
-					{
-						for (unsigned int uiBaseColorSwaps = 0; uiBaseColorSwaps < 2; uiBaseColorSwaps++)
-						{
-							if (uiBaseColorSwaps == 0)
-							{
-								encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1);
-								encodingTry.m_frgbaColor2 = m_frgbaOriginalColor2_TAndH;
-							}
-							else
-							{
-								encodingTry.m_frgbaColor1 = m_frgbaOriginalColor2_TAndH;
-								encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1);
-							}
-
-							encodingTry.TryT_BestSelectorCombination();
-
-							if (encodingTry.m_fError < m_fError)
-							{
-								m_mode = encodingTry.m_mode;
-								m_boolDiff = encodingTry.m_boolDiff;
-								m_boolFlip = encodingTry.m_boolFlip;
-
-								m_frgbaColor1 = encodingTry.m_frgbaColor1;
-								m_frgbaColor2 = encodingTry.m_frgbaColor2;
-								m_uiCW1 = encodingTry.m_uiCW1;
-
-								for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-								{
-									m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
-									m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
-								}
-
-								m_fError = encodingTry.m_fError;
-							}
-						}
-					}
-				}
-			}
-
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// find best selector combination for TryT
-	// called on an encodingTry
-	//
-	void Block4x4Encoding_RGB8A1::TryT_BestSelectorCombination(void)
-	{
-
-		float fDistance = s_afTHDistanceTable[m_uiCW1];
-
-		unsigned int auiBestPixelSelectors[PIXELS];
-		float afBestPixelErrors[PIXELS] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,
-			FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
-		ColorFloatRGBA	afrgbaBestDecodedPixels[PIXELS];
-		ColorFloatRGBA afrgbaDecodedPixel[SELECTORS];
-
-		assert(SELECTORS == 4);
-		afrgbaDecodedPixel[0] = m_frgbaColor1;
-		afrgbaDecodedPixel[1] = (m_frgbaColor2 + fDistance).ClampRGB();
-		afrgbaDecodedPixel[2] = ColorFloatRGBA();
-		afrgbaDecodedPixel[3] = (m_frgbaColor2 - fDistance).ClampRGB();
-
-		// try each selector
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			unsigned int uiMinSelector = 0;
-			unsigned int uiMaxSelector = SELECTORS - 1;
-
-			if (m_pafrgbaSource[uiPixel].fA < 0.5f)
-			{
-				uiMinSelector = 2;
-				uiMaxSelector = 2;
-			}
-
-			for (unsigned int uiSelector = uiMinSelector; uiSelector <= uiMaxSelector; uiSelector++)
-			{
-				float fPixelError = CalcPixelError(afrgbaDecodedPixel[uiSelector], uiPixel);
-                
-				if (fPixelError < afBestPixelErrors[uiPixel])
-				{
-					afBestPixelErrors[uiPixel] = fPixelError;
-					auiBestPixelSelectors[uiPixel] = uiSelector;
-					afrgbaBestDecodedPixels[uiPixel] = afrgbaDecodedPixel[uiSelector];
-				}
-			}
-		}
-		
-
-		// add up all of the pixel errors
-		float fBlockError = 0.0f;
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			fBlockError += afBestPixelErrors[uiPixel];
-		}
-
-		if (m_fError > fBlockError)
-		{
-			m_fError = fBlockError;
-
-			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-			{
-				m_auiSelectors[uiPixel] = auiBestPixelSelectors[uiPixel];
-				m_afrgbaDecodedColors[uiPixel] = afrgbaBestDecodedPixels[uiPixel];
-			}
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try encoding in H mode
-	// save this encoding if it improves the error
-	//
-	// since all pixels use the distance table, color1 and color2 can NOT be twiddled independently
-	// TWIDDLE_RADIUS of 2 is WAY too slow
-	//
-	void Block4x4Encoding_RGB8A1::TryH(unsigned int a_uiRadius)
-	{
-		Block4x4Encoding_RGB8A1 encodingTry = *this;
-
-		// init "try"
-		{
-			encodingTry.m_mode = MODE_H;
-			encodingTry.m_boolDiff = true;
-			encodingTry.m_boolFlip = false;
-			encodingTry.m_fError = FLT_MAX;
-		}
-
-		int iColor1Red = m_frgbaOriginalColor1_TAndH.IntRed(15.0f);
-		int iColor1Green = m_frgbaOriginalColor1_TAndH.IntGreen(15.0f);
-		int iColor1Blue = m_frgbaOriginalColor1_TAndH.IntBlue(15.0f);
-
-		int iMinRed1 = iColor1Red - (int)a_uiRadius;
-		if (iMinRed1 < 0)
-		{
-			iMinRed1 = 0;
-		}
-		int iMaxRed1 = iColor1Red + (int)a_uiRadius;
-		if (iMaxRed1 > 15)
-		{
-			iMinRed1 = 15;
-		}
-
-		int iMinGreen1 = iColor1Green - (int)a_uiRadius;
-		if (iMinGreen1 < 0)
-		{
-			iMinGreen1 = 0;
-		}
-		int iMaxGreen1 = iColor1Green + (int)a_uiRadius;
-		if (iMaxGreen1 > 15)
-		{
-			iMinGreen1 = 15;
-		}
-
-		int iMinBlue1 = iColor1Blue - (int)a_uiRadius;
-		if (iMinBlue1 < 0)
-		{
-			iMinBlue1 = 0;
-		}
-		int iMaxBlue1 = iColor1Blue + (int)a_uiRadius;
-		if (iMaxBlue1 > 15)
-		{
-			iMinBlue1 = 15;
-		}
-
-		int iColor2Red = m_frgbaOriginalColor2_TAndH.IntRed(15.0f);
-		int iColor2Green = m_frgbaOriginalColor2_TAndH.IntGreen(15.0f);
-		int iColor2Blue = m_frgbaOriginalColor2_TAndH.IntBlue(15.0f);
-
-		int iMinRed2 = iColor2Red - (int)a_uiRadius;
-		if (iMinRed2 < 0)
-		{
-			iMinRed2 = 0;
-		}
-		int iMaxRed2 = iColor2Red + (int)a_uiRadius;
-		if (iMaxRed2 > 15)
-		{
-			iMinRed2 = 15;
-		}
-
-		int iMinGreen2 = iColor2Green - (int)a_uiRadius;
-		if (iMinGreen2 < 0)
-		{
-			iMinGreen2 = 0;
-		}
-		int iMaxGreen2 = iColor2Green + (int)a_uiRadius;
-		if (iMaxGreen2 > 15)
-		{
-			iMinGreen2 = 15;
-		}
-
-		int iMinBlue2 = iColor2Blue - (int)a_uiRadius;
-		if (iMinBlue2 < 0)
-		{
-			iMinBlue2 = 0;
-		}
-		int iMaxBlue2 = iColor2Blue + (int)a_uiRadius;
-		if (iMaxBlue2 > 15)
-		{
-			iMinBlue2 = 15;
-		}
-
-		for (unsigned int uiDistance = 0; uiDistance < TH_DISTANCES; uiDistance++)
-		{
-			encodingTry.m_uiCW1 = uiDistance;
-
-			// twiddle m_frgbaOriginalColor1_TAndH
-			for (int iRed1 = iMinRed1; iRed1 <= iMaxRed1; iRed1++)
-			{
-				for (int iGreen1 = iMinGreen1; iGreen1 <= iMaxGreen1; iGreen1++)
-				{
-					for (int iBlue1 = iMinBlue1; iBlue1 <= iMaxBlue1; iBlue1++)
-					{
-						encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1);
-						encodingTry.m_frgbaColor2 = m_frgbaOriginalColor2_TAndH;
-
-						// if color1 == color2, H encoding issues can pop up, so abort
-						if (iRed1 == iColor2Red && iGreen1 == iColor2Green && iBlue1 == iColor2Blue)
-						{
-							continue;
-						}
-
-						encodingTry.TryH_BestSelectorCombination();
-
-						if (encodingTry.m_fError < m_fError)
-						{
-							m_mode = encodingTry.m_mode;
-							m_boolDiff = encodingTry.m_boolDiff;
-							m_boolFlip = encodingTry.m_boolFlip;
-
-							m_frgbaColor1 = encodingTry.m_frgbaColor1;
-							m_frgbaColor2 = encodingTry.m_frgbaColor2;
-							m_uiCW1 = encodingTry.m_uiCW1;
-
-							for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-							{
-								m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
-								m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
-							}
-
-							m_fError = encodingTry.m_fError;
-						}
-					}
-				}
-			}
-
-			// twiddle m_frgbaOriginalColor2_TAndH
-			for (int iRed2 = iMinRed2; iRed2 <= iMaxRed2; iRed2++)
-			{
-				for (int iGreen2 = iMinGreen2; iGreen2 <= iMaxGreen2; iGreen2++)
-				{
-					for (int iBlue2 = iMinBlue2; iBlue2 <= iMaxBlue2; iBlue2++)
-					{
-						encodingTry.m_frgbaColor1 = m_frgbaOriginalColor1_TAndH;
-						encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2);
-
-						// if color1 == color2, H encoding issues can pop up, so abort
-						if (iRed2 == iColor1Red && iGreen2 == iColor1Green && iBlue2 == iColor1Blue)
-						{
-							continue;
-						}
-
-						encodingTry.TryH_BestSelectorCombination();
-
-						if (encodingTry.m_fError < m_fError)
-						{
-							m_mode = encodingTry.m_mode;
-							m_boolDiff = encodingTry.m_boolDiff;
-							m_boolFlip = encodingTry.m_boolFlip;
-
-							m_frgbaColor1 = encodingTry.m_frgbaColor1;
-							m_frgbaColor2 = encodingTry.m_frgbaColor2;
-							m_uiCW1 = encodingTry.m_uiCW1;
-
-							for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-							{
-								m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
-								m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
-							}
-
-							m_fError = encodingTry.m_fError;
-						}
-					}
-				}
-			}
-
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// find best selector combination for TryH
-	// called on an encodingTry
-	//
-	void Block4x4Encoding_RGB8A1::TryH_BestSelectorCombination(void)
-	{
-
-		// abort if colors and CW will pose an encoding problem
-		{
-			unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(255.0f);
-			unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(255.0f);
-			unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(255.0f);
-			unsigned int uiColorValue1 = (uiRed1 << 16) + (uiGreen1 << 8) + uiBlue1;
-
-			unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(255.0f);
-			unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(255.0f);
-			unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(255.0f);
-			unsigned int uiColorValue2 = (uiRed2 << 16) + (uiGreen2 << 8) + uiBlue2;
-
-			unsigned int uiCWLsb = m_uiCW1 & 1;
-
-			if ((uiColorValue1 >= (uiColorValue2 & uiCWLsb)) == 0 ||
-				(uiColorValue1 < (uiColorValue2 & uiCWLsb)) == 1)
-			{
-				return;
-			}
-		}
-
-		float fDistance = s_afTHDistanceTable[m_uiCW1];
-
-		unsigned int auiBestPixelSelectors[PIXELS];
-		float afBestPixelErrors[PIXELS] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,
-											FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
-		ColorFloatRGBA	afrgbaBestDecodedPixels[PIXELS];
-		ColorFloatRGBA afrgbaDecodedPixel[SELECTORS];
-
-		assert(SELECTORS == 4);
-		afrgbaDecodedPixel[0] = (m_frgbaColor1 + fDistance).ClampRGB();
-		afrgbaDecodedPixel[1] = (m_frgbaColor1 - fDistance).ClampRGB();
-		afrgbaDecodedPixel[2] = ColorFloatRGBA();;
-		afrgbaDecodedPixel[3] = (m_frgbaColor2 - fDistance).ClampRGB();
-
-
-		// try each selector
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			unsigned int uiMinSelector = 0;
-			unsigned int uiMaxSelector = SELECTORS - 1;
-
-			if (m_pafrgbaSource[uiPixel].fA < 0.5f)
-			{
-				uiMinSelector = 2;
-				uiMaxSelector = 2;
-			}
-
-			for (unsigned int uiSelector = uiMinSelector; uiSelector <= uiMaxSelector; uiSelector++)
-			{
-				float fPixelError = CalcPixelError(afrgbaDecodedPixel[uiSelector], uiPixel);
-
-				if (fPixelError < afBestPixelErrors[uiPixel])
-				{
-					afBestPixelErrors[uiPixel] = fPixelError;
-					auiBestPixelSelectors[uiPixel] = uiSelector;
-					afrgbaBestDecodedPixels[uiPixel] = afrgbaDecodedPixel[uiSelector];
-				}
-			}
-		}
-		
-
-		// add up all of the pixel errors
-		float fBlockError = 0.0f;
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			fBlockError += afBestPixelErrors[uiPixel];
-		}
-
-		if (m_fError > fBlockError)
-		{
-			m_fError = fBlockError;
-
-			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-			{
-				m_auiSelectors[uiPixel] = auiBestPixelSelectors[uiPixel];
-				m_afrgbaDecodedColors[uiPixel] = afrgbaBestDecodedPixels[uiPixel];
-			}
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try version 1 of the degenerate search
-	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
-	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
-	//		be successfull
-	//
-	void Block4x4Encoding_RGB8A1::TryDegenerates1(void)
-	{
-
-		TryDifferential(m_boolMostLikelyFlip, 1, -2, 0);
-		TryDifferential(m_boolMostLikelyFlip, 1, 2, 0);
-		TryDifferential(m_boolMostLikelyFlip, 1, 0, 2);
-		TryDifferential(m_boolMostLikelyFlip, 1, 0, -2);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try version 2 of the degenerate search
-	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
-	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
-	//		be successfull
-	//
-	void Block4x4Encoding_RGB8A1::TryDegenerates2(void)
-	{
-
-		TryDifferential(!m_boolMostLikelyFlip, 1, -2, 0);
-		TryDifferential(!m_boolMostLikelyFlip, 1, 2, 0);
-		TryDifferential(!m_boolMostLikelyFlip, 1, 0, 2);
-		TryDifferential(!m_boolMostLikelyFlip, 1, 0, -2);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try version 3 of the degenerate search
-	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
-	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
-	//		be successfull
-	//
-	void Block4x4Encoding_RGB8A1::TryDegenerates3(void)
-	{
-
-		TryDifferential(m_boolMostLikelyFlip, 1, -2, -2);
-		TryDifferential(m_boolMostLikelyFlip, 1, -2, 2);
-		TryDifferential(m_boolMostLikelyFlip, 1, 2, -2);
-		TryDifferential(m_boolMostLikelyFlip, 1, 2, 2);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try version 4 of the degenerate search
-	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
-	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
-	//		be successfull
-	//
-	void Block4x4Encoding_RGB8A1::TryDegenerates4(void)
-	{
-
-		TryDifferential(m_boolMostLikelyFlip, 1, -4, 0);
-		TryDifferential(m_boolMostLikelyFlip, 1, 4, 0);
-		TryDifferential(m_boolMostLikelyFlip, 1, 0, 4);
-		TryDifferential(m_boolMostLikelyFlip, 1, 0, -4);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the encoding bits based on encoding state
-	//
-	void Block4x4Encoding_RGB8A1::SetEncodingBits(void)
-	{
-		switch (m_mode)
-		{
-		case MODE_ETC1:
-			SetEncodingBits_ETC1();
-			break;
-
-		case MODE_T:
-			SetEncodingBits_T();
-			break;
-
-		case MODE_H:
-			SetEncodingBits_H();
-			break;
-
-		case MODE_PLANAR:
-			Block4x4Encoding_RGB8::SetEncodingBits_Planar();
-			break;
-
-		default:
-			assert(false);
-		}
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the encoding bits based on encoding state if ETC1 mode
-	//
-	void Block4x4Encoding_RGB8A1::SetEncodingBits_ETC1(void)
-	{
-
-		// there is no individual mode in RGB8A1
-		assert(m_boolDiff);
-
-		int iRed1 = m_frgbaColor1.IntRed(31.0f);
-		int iGreen1 = m_frgbaColor1.IntGreen(31.0f);
-		int iBlue1 = m_frgbaColor1.IntBlue(31.0f);
-
-		int iRed2 = m_frgbaColor2.IntRed(31.0f);
-		int iGreen2 = m_frgbaColor2.IntGreen(31.0f);
-		int iBlue2 = m_frgbaColor2.IntBlue(31.0f);
-
-		int iDRed2 = iRed2 - iRed1;
-		int iDGreen2 = iGreen2 - iGreen1;
-		int iDBlue2 = iBlue2 - iBlue1;
-
-		assert(iDRed2 >= -4 && iDRed2 < 4);
-		assert(iDGreen2 >= -4 && iDGreen2 < 4);
-		assert(iDBlue2 >= -4 && iDBlue2 < 4);
-
-		m_pencodingbitsRGB8->differential.red1 = iRed1;
-		m_pencodingbitsRGB8->differential.green1 = iGreen1;
-		m_pencodingbitsRGB8->differential.blue1 = iBlue1;
-
-		m_pencodingbitsRGB8->differential.dred2 = iDRed2;
-		m_pencodingbitsRGB8->differential.dgreen2 = iDGreen2;
-		m_pencodingbitsRGB8->differential.dblue2 = iDBlue2;
-
-		m_pencodingbitsRGB8->individual.cw1 = m_uiCW1;
-		m_pencodingbitsRGB8->individual.cw2 = m_uiCW2;
-
-		SetEncodingBits_Selectors();
-
-		// in RGB8A1 encoding bits, opaque replaces differential
-		m_pencodingbitsRGB8->differential.diff = !m_boolPunchThroughPixels;
-
-		m_pencodingbitsRGB8->individual.flip = m_boolFlip;
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the encoding bits based on encoding state if T mode
-	//
-	void Block4x4Encoding_RGB8A1::SetEncodingBits_T(void)
-	{
-		static const bool SANITY_CHECK = true;
-
-		assert(m_mode == MODE_T);
-		assert(m_boolDiff == true);
-
-		unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(15.0f);
-		unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f);
-		unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f);
-
-		unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(15.0f);
-		unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f);
-		unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f);
-
-		m_pencodingbitsRGB8->t.red1a = uiRed1 >> 2;
-		m_pencodingbitsRGB8->t.red1b = uiRed1;
-		m_pencodingbitsRGB8->t.green1 = uiGreen1;
-		m_pencodingbitsRGB8->t.blue1 = uiBlue1;
-
-		m_pencodingbitsRGB8->t.red2 = uiRed2;
-		m_pencodingbitsRGB8->t.green2 = uiGreen2;
-		m_pencodingbitsRGB8->t.blue2 = uiBlue2;
-
-		m_pencodingbitsRGB8->t.da = m_uiCW1 >> 1;
-		m_pencodingbitsRGB8->t.db = m_uiCW1;
-
-		// in RGB8A1 encoding bits, opaque replaces differential
-		m_pencodingbitsRGB8->differential.diff = !m_boolPunchThroughPixels;
-
-		Block4x4Encoding_ETC1::SetEncodingBits_Selectors();
-
-		// create an invalid R differential to trigger T mode
-		m_pencodingbitsRGB8->t.detect1 = 0;
-		m_pencodingbitsRGB8->t.detect2 = 0;
-		int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
-		if (iRed2 >= 4)
-		{
-			m_pencodingbitsRGB8->t.detect1 = 7;
-			m_pencodingbitsRGB8->t.detect2 = 0;
-		}
-		else
-		{
-			m_pencodingbitsRGB8->t.detect1 = 0;
-			m_pencodingbitsRGB8->t.detect2 = 1;
-		}
-
-		if (SANITY_CHECK)
-		{
-			iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
-
-			// make sure red overflows
-			assert(iRed2 < 0 || iRed2 > 31);
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the encoding bits based on encoding state if H mode
-	//
-	// colors and selectors may need to swap in order to generate lsb of distance index
-	//
-	void Block4x4Encoding_RGB8A1::SetEncodingBits_H(void)
-	{
-		static const bool SANITY_CHECK = true;
-
-		assert(m_mode == MODE_H);
-		assert(m_boolDiff == true);
-
-		unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(15.0f);
-		unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f);
-		unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f);
-
-		unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(15.0f);
-		unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f);
-		unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f);
-
-		unsigned int uiColor1 = (uiRed1 << 16) + (uiGreen1 << 8) + uiBlue1;
-		unsigned int uiColor2 = (uiRed2 << 16) + (uiGreen2 << 8) + uiBlue2;
-
-		bool boolOddDistance = m_uiCW1 & 1;
-		bool boolSwapColors = (uiColor1 < uiColor2) ^ !boolOddDistance;
-
-		if (boolSwapColors)
-		{
-			m_pencodingbitsRGB8->h.red1 = uiRed2;
-			m_pencodingbitsRGB8->h.green1a = uiGreen2 >> 1;
-			m_pencodingbitsRGB8->h.green1b = uiGreen2;
-			m_pencodingbitsRGB8->h.blue1a = uiBlue2 >> 3;
-			m_pencodingbitsRGB8->h.blue1b = uiBlue2 >> 1;
-			m_pencodingbitsRGB8->h.blue1c = uiBlue2;
-
-			m_pencodingbitsRGB8->h.red2 = uiRed1;
-			m_pencodingbitsRGB8->h.green2a = uiGreen1 >> 1;
-			m_pencodingbitsRGB8->h.green2b = uiGreen1;
-			m_pencodingbitsRGB8->h.blue2 = uiBlue1;
-
-			m_pencodingbitsRGB8->h.da = m_uiCW1 >> 2;
-			m_pencodingbitsRGB8->h.db = m_uiCW1 >> 1;
-		}
-		else
-		{
-			m_pencodingbitsRGB8->h.red1 = uiRed1;
-			m_pencodingbitsRGB8->h.green1a = uiGreen1 >> 1;
-			m_pencodingbitsRGB8->h.green1b = uiGreen1;
-			m_pencodingbitsRGB8->h.blue1a = uiBlue1 >> 3;
-			m_pencodingbitsRGB8->h.blue1b = uiBlue1 >> 1;
-			m_pencodingbitsRGB8->h.blue1c = uiBlue1;
-
-			m_pencodingbitsRGB8->h.red2 = uiRed2;
-			m_pencodingbitsRGB8->h.green2a = uiGreen2 >> 1;
-			m_pencodingbitsRGB8->h.green2b = uiGreen2;
-			m_pencodingbitsRGB8->h.blue2 = uiBlue2;
-
-			m_pencodingbitsRGB8->h.da = m_uiCW1 >> 2;
-			m_pencodingbitsRGB8->h.db = m_uiCW1 >> 1;
-		}
-
-		// in RGB8A1 encoding bits, opaque replaces differential
-		m_pencodingbitsRGB8->differential.diff = !m_boolPunchThroughPixels;
-
-		Block4x4Encoding_ETC1::SetEncodingBits_Selectors();
-
-		if (boolSwapColors)
-		{
-			m_pencodingbitsRGB8->h.selectors ^= 0x0000FFFF;
-		}
-
-		// create an invalid R differential to trigger T mode
-		m_pencodingbitsRGB8->h.detect1 = 0;
-		m_pencodingbitsRGB8->h.detect2 = 0;
-		m_pencodingbitsRGB8->h.detect3 = 0;
-		int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
-		int iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2;
-		if (iRed2 < 0 || iRed2 > 31)
-		{
-			m_pencodingbitsRGB8->h.detect1 = 1;
-		}
-		if (iGreen2 >= 4)
-		{
-			m_pencodingbitsRGB8->h.detect2 = 7;
-			m_pencodingbitsRGB8->h.detect3 = 0;
-		}
-		else
-		{
-			m_pencodingbitsRGB8->h.detect2 = 0;
-			m_pencodingbitsRGB8->h.detect3 = 1;
-		}
-
-		if (SANITY_CHECK)
-		{
-			iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
-			iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2;
-
-			// make sure red doesn't overflow and green does
-			assert(iRed2 >= 0 && iRed2 <= 31);
-			assert(iGreen2 < 0 || iGreen2 > 31);
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// perform a single encoding iteration
-	// replace the encoding if a better encoding was found
-	// subsequent iterations generally take longer for each iteration
-	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
-	//
-	void Block4x4Encoding_RGB8A1::PerformIterationOpaque(float a_fEffort)
-	{
-		assert(!m_boolPunchThroughPixels);
-		assert(!m_boolTransparent);
-		assert(!m_boolDone);
-
-		switch (m_uiEncodingIterations)
-		{
-		case 0:
-                PerformFirstIterationOpaque();
-			break;
-
-		case 1:
-			Block4x4Encoding_ETC1::TryDifferential(m_boolMostLikelyFlip, 1, 0, 0);
-			break;
-
-		case 2:
-			Block4x4Encoding_ETC1::TryDifferential(!m_boolMostLikelyFlip, 1, 0, 0);
-			break;
-
-		case 3:
-			Block4x4Encoding_RGB8::TryPlanar(1);
-			break;
-
-		case 4:
-			Block4x4Encoding_RGB8::TryTAndH(1);
-			if (a_fEffort <= 49.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 5:
-			Block4x4Encoding_ETC1::TryDegenerates1();
-			if (a_fEffort <= 59.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 6:
-			Block4x4Encoding_ETC1::TryDegenerates2();
-			if (a_fEffort <= 69.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 7:
-			Block4x4Encoding_ETC1::TryDegenerates3();
-			if (a_fEffort <= 79.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 8:
-			Block4x4Encoding_ETC1::TryDegenerates4();
-			m_boolDone = true;
-			break;
-
-		default:
-			assert(0);
-			break;
-		}
-
-		m_uiEncodingIterations++;
-		SetDoneIfPerfect();
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// find best initial encoding to ensure block has a valid encoding
-	//
-	void Block4x4Encoding_RGB8A1::PerformFirstIterationOpaque(void)
-	{
-		
-		// set decoded alphas
-		// calculate alpha error
-		m_fError = 0.0f;
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			// m_afDecodedAlphas[uiPixel] = 1.0f;
-
-			float fDeltaA = 1.0f - m_pafrgbaSource[uiPixel].fA;
-			m_fError += fDeltaA * fDeltaA;
-		}
-
-		CalculateMostLikelyFlip();
-
-		m_fError = FLT_MAX;
-
-		Block4x4Encoding_ETC1::TryDifferential(m_boolMostLikelyFlip, 0, 0, 0);
-		SetDoneIfPerfect();
-		if (m_boolDone)
-		{
-			return;
-		}
-		Block4x4Encoding_ETC1::TryDifferential(!m_boolMostLikelyFlip, 0, 0, 0);
-		SetDoneIfPerfect();
-		if (m_boolDone)
-		{
-			return;
-		}
-		Block4x4Encoding_RGB8::TryPlanar(0);
-		SetDoneIfPerfect();
-		if (m_boolDone)
-		{
-			return;
-		}
-		Block4x4Encoding_RGB8::TryTAndH(0);
-		SetDoneIfPerfect();
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// perform a single encoding iteration
-	// replace the encoding if a better encoding was found
-	// subsequent iterations generally take longer for each iteration
-	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
-	//
-	void Block4x4Encoding_RGB8A1::PerformIterationTransparent(float )
-	{
-		assert(!m_boolOpaque);
-		assert(m_boolTransparent);
-		assert(!m_boolDone);
-		assert(m_uiEncodingIterations == 0);
-
-		m_mode = MODE_ETC1;
-		m_boolDiff = true;
-		m_boolFlip = false;
-
-		m_uiCW1 = 0;
-		m_uiCW2 = 0;
-
-		m_frgbaColor1 = ColorFloatRGBA();
-		m_frgbaColor2 = ColorFloatRGBA();
-
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			m_auiSelectors[uiPixel] = TRANSPARENT_SELECTOR;
-
-			m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA();
-			//m_afDecodedAlphas[uiPixel] = 0.0f;
-		}
-
-		CalcBlockError();
-
-		m_boolDone = true;
-		m_uiEncodingIterations++;
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-}
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+EtcBlock4x4Encoding_RGB8A1.cpp contains:
+	Block4x4Encoding_RGB8A1
+	Block4x4Encoding_RGB8A1_Opaque
+	Block4x4Encoding_RGB8A1_Transparent
+
+These encoders are used when targetting file format RGB8A1.
+
+Block4x4Encoding_RGB8A1_Opaque is used when all pixels in the 4x4 block are opaque
+Block4x4Encoding_RGB8A1_Transparent is used when all pixels in the 4x4 block are transparent
+Block4x4Encoding_RGB8A1 is used when there is a mixture of alphas in the 4x4 block
+
+*/
+
+#include "EtcConfig.h"
+#include "EtcBlock4x4Encoding_RGB8A1.h"
+
+#include "EtcBlock4x4.h"
+#include "EtcBlock4x4EncodingBits.h"
+#include "EtcBlock4x4Encoding_RGB8.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+namespace Etc
+{
+	
+	// ####################################################################################################
+	// Block4x4Encoding_RGB8A1
+	// ####################################################################################################
+
+	float Block4x4Encoding_RGB8A1::s_aafCwOpaqueUnsetTable[CW_RANGES][SELECTORS] =
+	{
+		{ 0.0f / 255.0f, 8.0f / 255.0f, 0.0f / 255.0f, -8.0f / 255.0f },
+		{ 0.0f / 255.0f, 17.0f / 255.0f, 0.0f / 255.0f, -17.0f / 255.0f },
+		{ 0.0f / 255.0f, 29.0f / 255.0f, 0.0f / 255.0f, -29.0f / 255.0f },
+		{ 0.0f / 255.0f, 42.0f / 255.0f, 0.0f / 255.0f, -42.0f / 255.0f },
+		{ 0.0f / 255.0f, 60.0f / 255.0f, 0.0f / 255.0f, -60.0f / 255.0f },
+		{ 0.0f / 255.0f, 80.0f / 255.0f, 0.0f / 255.0f, -80.0f / 255.0f },
+		{ 0.0f / 255.0f, 106.0f / 255.0f, 0.0f / 255.0f, -106.0f / 255.0f },
+		{ 0.0f / 255.0f, 183.0f / 255.0f, 0.0f / 255.0f, -183.0f / 255.0f }
+	};
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	Block4x4Encoding_RGB8A1::Block4x4Encoding_RGB8A1(void)
+	{
+		m_pencodingbitsRGB8 = nullptr;
+		m_boolOpaque = false;
+		m_boolTransparent = false;
+		m_boolPunchThroughPixels = true;
+
+	}
+	Block4x4Encoding_RGB8A1::~Block4x4Encoding_RGB8A1(void) {}
+	// ----------------------------------------------------------------------------------------------------
+	// initialization prior to encoding
+	// a_pblockParent points to the block associated with this encoding
+	// a_errormetric is used to choose the best encoding
+	// a_pafrgbaSource points to a 4x4 block subset of the source image
+	// a_paucEncodingBits points to the final encoding bits
+	//
+	void Block4x4Encoding_RGB8A1::Encode(Block4x4 *a_pblockParent,
+													const ColorFloatRGBA *a_pafrgbaSource,
+													unsigned char *a_paucEncodingBits,
+													ErrorMetric a_errormetric)
+	{
+
+		Block4x4Encoding_RGB8::Encode(a_pblockParent,
+			a_pafrgbaSource,
+			a_paucEncodingBits,
+			a_errormetric);
+
+		m_boolOpaque = a_pblockParent->GetSourceAlphaMix() == Block4x4::SourceAlphaMix::OPAQUE;
+		m_boolTransparent = a_pblockParent->GetSourceAlphaMix() == Block4x4::SourceAlphaMix::TRANSPARENT;
+		m_boolPunchThroughPixels = a_pblockParent->HasPunchThroughPixels();
+
+//		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+//		{
+//			if (m_pafrgbaSource[uiPixel].fA >= 0.5f)
+//			{
+//				m_afDecodedAlphas[uiPixel] = 1.0f;
+//			}
+//			else
+//			{
+//				m_afDecodedAlphas[uiPixel] = 0.0f;
+//			}
+//		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// initialization from the encoding bits of a previous encoding
+	// a_pblockParent points to the block associated with this encoding
+	// a_errormetric is used to choose the best encoding
+	// a_pafrgbaSource points to a 4x4 block subset of the source image
+	// a_paucEncodingBits points to the final encoding bits of a previous encoding
+	//
+	void Block4x4Encoding_RGB8A1::Decode(Block4x4 *a_pblockParent,
+														unsigned char *a_paucEncodingBits,
+														const ColorFloatRGBA *a_pafrgbaSource,
+														ErrorMetric a_errormetric,
+                                                        uint16_t iterationCount)
+	{
+
+
+		InitFromEncodingBits_ETC1(a_pblockParent,
+			a_paucEncodingBits,
+			a_pafrgbaSource,
+			a_errormetric, iterationCount);
+
+		m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)a_paucEncodingBits;
+
+		// detect if there is a T, H or Planar mode present
+		int iRed1 = m_pencodingbitsRGB8->differential.red1;
+		int iDRed2 = m_pencodingbitsRGB8->differential.dred2;
+		int iRed2 = iRed1 + iDRed2;
+
+		int iGreen1 = m_pencodingbitsRGB8->differential.green1;
+		int iDGreen2 = m_pencodingbitsRGB8->differential.dgreen2;
+		int iGreen2 = iGreen1 + iDGreen2;
+
+		int iBlue1 = m_pencodingbitsRGB8->differential.blue1;
+		int iDBlue2 = m_pencodingbitsRGB8->differential.dblue2;
+		int iBlue2 = iBlue1 + iDBlue2;
+
+		if (iRed2 < 0 || iRed2 > 31)
+		{
+			InitFromEncodingBits_T();
+		}
+		else if (iGreen2 < 0 || iGreen2 > 31)
+		{
+			InitFromEncodingBits_H();
+		}
+		else if (iBlue2 < 0 || iBlue2 > 31)
+		{
+			Block4x4Encoding_RGB8::InitFromEncodingBits_Planar();
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// initialization from the encoding bits of a previous encoding assuming the encoding is an ETC1 mode.
+	// if it isn't an ETC1 mode, this will be overwritten later
+	//
+	void Block4x4Encoding_RGB8A1::InitFromEncodingBits_ETC1(Block4x4 *a_pblockParent,
+		unsigned char *a_paucEncodingBits,
+		const ColorFloatRGBA *a_pafrgbaSource,
+		ErrorMetric a_errormetric, uint16_t iterationCount)
+	{
+		Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource,
+			a_errormetric, iterationCount);
+
+		m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)a_paucEncodingBits;
+
+		m_mode = MODE_ETC1;
+		m_boolDiff = true;
+		m_boolFlip = m_pencodingbitsRGB8->differential.flip;
+		m_boolOpaque = m_pencodingbitsRGB8->differential.diff;
+
+		int iR2 = m_pencodingbitsRGB8->differential.red1 + m_pencodingbitsRGB8->differential.dred2;
+        int iG2 = m_pencodingbitsRGB8->differential.green1 + m_pencodingbitsRGB8->differential.dgreen2;
+        int iB2 = m_pencodingbitsRGB8->differential.blue1 + m_pencodingbitsRGB8->differential.dblue2;
+        
+        if (iR2 < 0)
+		{
+			iR2 = 0;
+		}
+		else if (iR2 > 31)
+		{
+			iR2 = 31;
+		}
+
+		if (iG2 < 0)
+		{
+			iG2 = 0;
+		}
+		else if (iG2 > 31)
+		{
+			iG2 = 31;
+		}
+
+		if (iB2 < 0)
+		{
+			iB2 = 0;
+		}
+		else if (iB2 > 31)
+		{
+			iB2 = 31;
+		}
+
+		m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB5(m_pencodingbitsRGB8->differential.red1, m_pencodingbitsRGB8->differential.green1, m_pencodingbitsRGB8->differential.blue1);
+		m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)iR2, (unsigned char)iG2, (unsigned char)iB2);
+
+		m_uiCW1 = m_pencodingbitsRGB8->differential.cw1;
+		m_uiCW2 = m_pencodingbitsRGB8->differential.cw2;
+
+		Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors();
+
+		Decode_ETC1();
+
+		CalcBlockError();
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// initialization from the encoding bits of a previous encoding if T mode is detected
+	//
+	void Block4x4Encoding_RGB8A1::InitFromEncodingBits_T(void)
+	{
+		m_mode = MODE_T;
+
+		unsigned char ucRed1 = (unsigned char)((m_pencodingbitsRGB8->t.red1a << 2) +
+								m_pencodingbitsRGB8->t.red1b);
+		unsigned char ucGreen1 = m_pencodingbitsRGB8->t.green1;
+		unsigned char ucBlue1 = m_pencodingbitsRGB8->t.blue1;
+
+		unsigned char ucRed2 = m_pencodingbitsRGB8->t.red2;
+		unsigned char ucGreen2 = m_pencodingbitsRGB8->t.green2;
+		unsigned char ucBlue2 = m_pencodingbitsRGB8->t.blue2;
+
+		m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(ucRed1, ucGreen1, ucBlue1);
+		m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(ucRed2, ucGreen2, ucBlue2);
+
+		m_uiCW1 = (m_pencodingbitsRGB8->t.da << 1) + m_pencodingbitsRGB8->t.db;
+
+		Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors();
+
+		DecodePixels_T();
+
+		CalcBlockError();
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// initialization from the encoding bits of a previous encoding if H mode is detected
+	//
+	void Block4x4Encoding_RGB8A1::InitFromEncodingBits_H(void)
+	{
+		m_mode = MODE_H;
+
+		unsigned char ucRed1 = m_pencodingbitsRGB8->h.red1;
+		unsigned char ucGreen1 = (unsigned char)((m_pencodingbitsRGB8->h.green1a << 1) +
+									m_pencodingbitsRGB8->h.green1b);
+		unsigned char ucBlue1 = (unsigned char)((m_pencodingbitsRGB8->h.blue1a << 3) +
+								(m_pencodingbitsRGB8->h.blue1b << 1) +
+								m_pencodingbitsRGB8->h.blue1c);
+
+		unsigned char ucRed2 = m_pencodingbitsRGB8->h.red2;
+		unsigned char ucGreen2 = (unsigned char)((m_pencodingbitsRGB8->h.green2a << 1) +
+									m_pencodingbitsRGB8->h.green2b);
+		unsigned char ucBlue2 = m_pencodingbitsRGB8->h.blue2;
+
+		m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(ucRed1, ucGreen1, ucBlue1);
+		m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(ucRed2, ucGreen2, ucBlue2);
+
+		// used to determine the LSB of the CW
+		unsigned int uiRGB1 = (unsigned int)(((int)ucRed1 << 16) + ((int)ucGreen1 << 8) + (int)ucBlue1);
+		unsigned int uiRGB2 = (unsigned int)(((int)ucRed2 << 16) + ((int)ucGreen2 << 8) + (int)ucBlue2);
+
+		m_uiCW1 = (m_pencodingbitsRGB8->h.da << 2) + (m_pencodingbitsRGB8->h.db << 1);
+		if (uiRGB1 >= uiRGB2)
+		{
+			m_uiCW1++;
+		}
+
+		Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors();
+
+		DecodePixels_H();
+
+		CalcBlockError();
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// for ETC1 modes, set the decoded colors and decoded alpha based on the encoding state
+	//
+	void Block4x4Encoding_RGB8A1::Decode_ETC1(void)
+	{
+
+		const unsigned int *pauiPixelOrder = m_boolFlip ? s_auiPixelOrderFlip1 : s_auiPixelOrderFlip0;
+
+		for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS; uiPixelOrder++)
+		{
+			ColorFloatRGBA *pfrgbaCenter = uiPixelOrder < 8 ? &m_frgbaColor1 : &m_frgbaColor2;
+			unsigned int uiCW = uiPixelOrder < 8 ? m_uiCW1 : m_uiCW2;
+
+			unsigned int uiPixel = pauiPixelOrder[uiPixelOrder];
+
+			float fDelta;
+			if (m_boolOpaque)
+				fDelta = Block4x4Encoding_ETC1::s_aafCwTable[uiCW][m_auiSelectors[uiPixel]];
+			else 
+				fDelta = s_aafCwOpaqueUnsetTable[uiCW][m_auiSelectors[uiPixel]];
+
+			if (m_boolOpaque == false && m_auiSelectors[uiPixel] == TRANSPARENT_SELECTOR)
+			{
+				m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA();
+				//m_afDecodedAlphas[uiPixel] = 0.0f;
+			}
+			else
+			{
+				m_afrgbaDecodedColors[uiPixel] = (*pfrgbaCenter + fDelta).ClampRGB();
+				//m_afDecodedAlphas[uiPixel] = 1.0f;
+			}
+            
+            // TODO: this isn't setting alpha
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// for T mode, set the decoded colors and decoded alpha based on the encoding state
+	//
+	void Block4x4Encoding_RGB8A1::DecodePixels_T(void)
+	{
+
+		float fDistance = s_afTHDistanceTable[m_uiCW1];
+		ColorFloatRGBA frgbaDistance(fDistance, fDistance, fDistance, 0.0f);
+
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			switch (m_auiSelectors[uiPixel])
+			{
+			case 0:
+				m_afrgbaDecodedColors[uiPixel] = m_frgbaColor1;
+				//m_afDecodedAlphas[uiPixel] = 1.0f;
+				break;
+
+			case 1:
+				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 + frgbaDistance).ClampRGB();
+				//m_afDecodedAlphas[uiPixel] = 1.0f;
+				break;
+
+			case 2:
+				if (m_boolOpaque == false)
+				{
+					m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA();
+					//m_afDecodedAlphas[uiPixel] = 0.0f;
+				}
+				else
+				{
+					m_afrgbaDecodedColors[uiPixel] = m_frgbaColor2;
+					//m_afDecodedAlphas[uiPixel] = 1.0f;
+				}
+				break;
+
+			case 3:
+				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 - frgbaDistance).ClampRGB();
+				//m_afDecodedAlphas[uiPixel] = 1.0f;
+				break;
+			}
+
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// for H mode, set the decoded colors and decoded alpha based on the encoding state
+	//
+	void Block4x4Encoding_RGB8A1::DecodePixels_H(void)
+	{
+
+		float fDistance = s_afTHDistanceTable[m_uiCW1];
+		ColorFloatRGBA frgbaDistance(fDistance, fDistance, fDistance, 0.0f);
+
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			switch (m_auiSelectors[uiPixel])
+			{
+			case 0:
+				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor1 + frgbaDistance).ClampRGB();
+				//m_afDecodedAlphas[uiPixel] = 1.0f;
+				break;
+
+			case 1:
+				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor1 - frgbaDistance).ClampRGB();
+				//m_afDecodedAlphas[uiPixel] = 1.0f;
+				break;
+
+			case 2:
+				if (m_boolOpaque == false)
+				{
+					m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA();
+					//m_afDecodedAlphas[uiPixel] = 0.0f;
+				}
+				else
+				{
+					m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 + frgbaDistance).ClampRGB();
+					//m_afDecodedAlphas[uiPixel] = 1.0f;
+				}
+				break;
+
+			case 3:
+				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 - frgbaDistance).ClampRGB();
+				//m_afDecodedAlphas[uiPixel] = 1.0f;
+				break;
+			}
+
+		}
+
+	}
+
+
+	// ----------------------------------------------------------------------------------------------------
+	// perform a single encoding iteration
+	// replace the encoding if a better encoding was found
+	// subsequent iterations generally take longer for each iteration
+	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
+	//
+	// RGB8A1 can't use individual mode
+	// RGB8A1 with transparent pixels can't use planar mode
+	//
+	void Block4x4Encoding_RGB8A1::PerformIteration(float a_fEffort)
+	{
+        if (m_pblockParent->GetSourceAlphaMix() == Block4x4::SourceAlphaMix::OPAQUE)
+        {
+            PerformIterationOpaque(a_fEffort);
+            return;
+        }
+        else if (m_pblockParent->GetSourceAlphaMix() == Block4x4::SourceAlphaMix::TRANSPARENT)
+        {
+            PerformIterationTransparent(a_fEffort);
+            return;
+        }
+        
+		assert(!m_boolOpaque);
+		assert(!m_boolTransparent);
+		assert(!m_boolDone);
+
+		switch (m_uiEncodingIterations)
+		{
+		case 0:
+			PerformFirstIteration();
+			break;
+
+		case 1:
+			TryDifferential(m_boolMostLikelyFlip, 1, 0, 0);
+			break;
+
+		case 2:
+			TryDifferential(!m_boolMostLikelyFlip, 1, 0, 0);
+			if (a_fEffort <= 39.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 3:
+			Block4x4Encoding_RGB8::CalculateBaseColorsForTAndH();
+			TryT(1);
+			TryH(1);
+			if (a_fEffort <= 49.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 4:
+			TryDegenerates1();
+			if (a_fEffort <= 59.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 5:
+			TryDegenerates2();
+			if (a_fEffort <= 69.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 6:
+			TryDegenerates3();
+			if (a_fEffort <= 79.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 7:
+			TryDegenerates4();
+			m_boolDone = true;
+			break;
+
+		default:
+			assert(0);
+			break;
+		}
+
+		m_uiEncodingIterations++;
+
+		SetDoneIfPerfect();
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// find best initial encoding to ensure block has a valid encoding
+	//
+	void Block4x4Encoding_RGB8A1::PerformFirstIteration(void)
+	{
+		Block4x4Encoding_ETC1::CalculateMostLikelyFlip();
+
+		m_fError = FLT_MAX;
+
+		TryDifferential(m_boolMostLikelyFlip, 0, 0, 0);
+		SetDoneIfPerfect();
+		if (m_boolDone)
+		{
+			return;
+		}
+		TryDifferential(!m_boolMostLikelyFlip, 0, 0, 0);
+		SetDoneIfPerfect();
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// mostly copied from ETC1
+	// differences:
+	//		Block4x4Encoding_RGB8A1 encodingTry = *this;
+	//
+	void Block4x4Encoding_RGB8A1::TryDifferential(bool a_boolFlip, unsigned int a_uiRadius, 
+													int a_iGrayOffset1, int a_iGrayOffset2)
+	{
+
+		ColorFloatRGBA frgbaColor1;
+		ColorFloatRGBA frgbaColor2;
+
+		const unsigned int *pauiPixelMapping1;
+		const unsigned int *pauiPixelMapping2;
+
+		if (a_boolFlip)
+		{
+			frgbaColor1 = m_frgbaSourceAverageTop;
+			frgbaColor2 = m_frgbaSourceAverageBottom;
+
+			pauiPixelMapping1 = s_auiTopPixelMapping;
+			pauiPixelMapping2 = s_auiBottomPixelMapping;
+		}
+		else
+		{
+			frgbaColor1 = m_frgbaSourceAverageLeft;
+			frgbaColor2 = m_frgbaSourceAverageRight;
+
+			pauiPixelMapping1 = s_auiLeftPixelMapping;
+			pauiPixelMapping2 = s_auiRightPixelMapping;
+		}
+
+		DifferentialTrys trys(frgbaColor1, frgbaColor2, pauiPixelMapping1, pauiPixelMapping2, 
+								a_uiRadius, a_iGrayOffset1, a_iGrayOffset2);
+
+		Block4x4Encoding_RGB8A1 encodingTry = *this;
+		encodingTry.m_boolFlip = a_boolFlip;
+
+		encodingTry.TryDifferentialHalf(&trys.m_half1);
+		encodingTry.TryDifferentialHalf(&trys.m_half2);
+
+		// find best halves that are within differential range
+		DifferentialTrys::Try *ptryBest1 = nullptr;
+		DifferentialTrys::Try *ptryBest2 = nullptr;
+		encodingTry.m_fError = FLT_MAX;
+
+		// see if the best of each half are in differential range
+		int iDRed = trys.m_half2.m_ptryBest->m_iRed - trys.m_half1.m_ptryBest->m_iRed;
+		int iDGreen = trys.m_half2.m_ptryBest->m_iGreen - trys.m_half1.m_ptryBest->m_iGreen;
+		int iDBlue = trys.m_half2.m_ptryBest->m_iBlue - trys.m_half1.m_ptryBest->m_iBlue;
+		if (iDRed >= -4 && iDRed <= 3 && iDGreen >= -4 && iDGreen <= 3 && iDBlue >= -4 && iDBlue <= 3)
+		{
+			ptryBest1 = trys.m_half1.m_ptryBest;
+			ptryBest2 = trys.m_half2.m_ptryBest;
+			encodingTry.m_fError = trys.m_half1.m_ptryBest->m_fError + trys.m_half2.m_ptryBest->m_fError;
+		}
+		else
+		{
+			// else, find the next best halves that are in differential range
+			for (DifferentialTrys::Try *ptry1 = &trys.m_half1.m_atry[0];
+			ptry1 < &trys.m_half1.m_atry[trys.m_half1.m_uiTrys];
+				ptry1++)
+			{
+				for (DifferentialTrys::Try *ptry2 = &trys.m_half2.m_atry[0];
+				ptry2 < &trys.m_half2.m_atry[trys.m_half2.m_uiTrys];
+					ptry2++)
+				{
+					iDRed = ptry2->m_iRed - ptry1->m_iRed;
+					bool boolValidRedDelta = iDRed <= 3 && iDRed >= -4;
+					iDGreen = ptry2->m_iGreen - ptry1->m_iGreen;
+					bool boolValidGreenDelta = iDGreen <= 3 && iDGreen >= -4;
+					iDBlue = ptry2->m_iBlue - ptry1->m_iBlue;
+					bool boolValidBlueDelta = iDBlue <= 3 && iDBlue >= -4;
+
+					if (boolValidRedDelta && boolValidGreenDelta && boolValidBlueDelta)
+					{
+						float fError = ptry1->m_fError + ptry2->m_fError;
+
+						if (fError < encodingTry.m_fError)
+						{
+							encodingTry.m_fError = fError;
+
+							ptryBest1 = ptry1;
+							ptryBest2 = ptry2;
+						}
+					}
+
+				}
+			}
+			assert(encodingTry.m_fError < FLT_MAX);
+			assert(ptryBest1 != nullptr);
+			assert(ptryBest2 != nullptr);
+		}
+
+		if (encodingTry.m_fError < m_fError)
+		{
+			m_mode = MODE_ETC1;
+			m_boolDiff = true;
+			m_boolFlip = encodingTry.m_boolFlip;
+			m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)ptryBest1->m_iRed, (unsigned char)ptryBest1->m_iGreen, (unsigned char)ptryBest1->m_iBlue);
+			m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)ptryBest2->m_iRed, (unsigned char)ptryBest2->m_iGreen, (unsigned char)ptryBest2->m_iBlue);
+			m_uiCW1 = ptryBest1->m_uiCW;
+			m_uiCW2 = ptryBest2->m_uiCW;
+
+			m_fError = 0.0f;
+			for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS / 2; uiPixelOrder++)
+			{
+				unsigned int uiPixel1 = pauiPixelMapping1[uiPixelOrder];
+				unsigned int uiPixel2 = pauiPixelMapping2[uiPixelOrder];
+
+				unsigned int uiSelector1 = ptryBest1->m_auiSelectors[uiPixelOrder];
+				unsigned int uiSelector2 = ptryBest2->m_auiSelectors[uiPixelOrder];
+
+				m_auiSelectors[uiPixel1] = uiSelector1;
+				m_auiSelectors[uiPixel2] = ptryBest2->m_auiSelectors[uiPixelOrder];
+
+                float alpha1 = 1.0;
+                float alpha2 = 1.0;
+                
+				if (uiSelector1 == TRANSPARENT_SELECTOR)
+				{
+					m_afrgbaDecodedColors[uiPixel1] = ColorFloatRGBA();
+					//m_afDecodedAlphas[uiPixel1] = 0.0f;
+                    alpha1 = 0.0;
+				}
+				else
+				{
+					float fDeltaRGB1 = s_aafCwOpaqueUnsetTable[m_uiCW1][uiSelector1];
+					m_afrgbaDecodedColors[uiPixel1] = (m_frgbaColor1 + fDeltaRGB1).ClampRGB();
+					//m_afDecodedAlphas[uiPixel1] = 1.0f;
+				}
+
+				if (uiSelector2 == TRANSPARENT_SELECTOR)
+				{
+					m_afrgbaDecodedColors[uiPixel2] = ColorFloatRGBA();
+					//m_afDecodedAlphas[uiPixel2] = 0.0f;
+                    alpha2 = 0.0;
+				}
+				else
+				{
+					float fDeltaRGB2 = s_aafCwOpaqueUnsetTable[m_uiCW2][uiSelector2];
+					m_afrgbaDecodedColors[uiPixel2] = (m_frgbaColor2 + fDeltaRGB2).ClampRGB();
+					//m_afDecodedAlphas[uiPixel2] = 1.0f;
+				}
+
+				float fDeltaA1 =alpha1 - m_pafrgbaSource[uiPixel1].fA;
+				m_fError += fDeltaA1 * fDeltaA1;
+				float fDeltaA2 = alpha2 - m_pafrgbaSource[uiPixel2].fA;
+				m_fError += fDeltaA2 * fDeltaA2;
+			}
+
+			m_fError1 = ptryBest1->m_fError;
+			m_fError2 = ptryBest2->m_fError;
+			m_boolSeverelyBentDifferentialColors = trys.m_boolSeverelyBentColors;
+			m_fError = m_fError1 + m_fError2;
+
+			// sanity check
+			{
+				int iRed1 = m_frgbaColor1.IntRed(31.0f);
+				int iGreen1 = m_frgbaColor1.IntGreen(31.0f);
+				int iBlue1 = m_frgbaColor1.IntBlue(31.0f);
+
+				int iRed2 = m_frgbaColor2.IntRed(31.0f);
+				int iGreen2 = m_frgbaColor2.IntGreen(31.0f);
+				int iBlue2 = m_frgbaColor2.IntBlue(31.0f);
+
+				iDRed = iRed2 - iRed1;
+				iDGreen = iGreen2 - iGreen1;
+				iDBlue = iBlue2 - iBlue1;
+
+				assert(iDRed >= -4 && iDRed < 4);
+				assert(iDGreen >= -4 && iDGreen < 4);
+				assert(iDBlue >= -4 && iDBlue < 4);
+			}
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// mostly copied from ETC1
+	// differences:
+	//		uses s_aafCwOpaqueUnsetTable
+	//		color for selector set to 0,0,0,0
+	//
+	void Block4x4Encoding_RGB8A1::TryDifferentialHalf(DifferentialTrys::Half *a_phalf)
+	{
+
+		a_phalf->m_ptryBest = nullptr;
+		float fBestTryError = FLT_MAX;
+
+		a_phalf->m_uiTrys = 0;
+		for (int iRed = a_phalf->m_iRed - (int)a_phalf->m_uiRadius;
+		iRed <= a_phalf->m_iRed + (int)a_phalf->m_uiRadius;
+			iRed++)
+		{
+			assert(iRed >= 0 && iRed <= 31);
+
+			for (int iGreen = a_phalf->m_iGreen - (int)a_phalf->m_uiRadius;
+			iGreen <= a_phalf->m_iGreen + (int)a_phalf->m_uiRadius;
+				iGreen++)
+			{
+				assert(iGreen >= 0 && iGreen <= 31);
+
+				for (int iBlue = a_phalf->m_iBlue - (int)a_phalf->m_uiRadius;
+				iBlue <= a_phalf->m_iBlue + (int)a_phalf->m_uiRadius;
+					iBlue++)
+				{
+					assert(iBlue >= 0 && iBlue <= 31);
+
+					DifferentialTrys::Try *ptry = &a_phalf->m_atry[a_phalf->m_uiTrys];
+					assert(ptry < &a_phalf->m_atry[DifferentialTrys::Half::MAX_TRYS]);
+
+					ptry->m_iRed = iRed;
+					ptry->m_iGreen = iGreen;
+					ptry->m_iBlue = iBlue;
+					ptry->m_fError = FLT_MAX;
+					ColorFloatRGBA frgbaColor = ColorFloatRGBA::ConvertFromRGB5((unsigned char)iRed, (unsigned char)iGreen, (unsigned char)iBlue);
+
+					// try each CW
+					for (unsigned int uiCW = 0; uiCW < CW_RANGES; uiCW++)
+					{
+						unsigned int auiPixelSelectors[PIXELS / 2];
+						ColorFloatRGBA	afrgbaDecodedColors[PIXELS / 2];
+						float afPixelErrors[PIXELS / 2] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,
+							FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
+
+						// pre-compute decoded pixels for each selector
+						ColorFloatRGBA afrgbaSelectors[SELECTORS];
+						assert(SELECTORS == 4);
+						afrgbaSelectors[0] = (frgbaColor + s_aafCwOpaqueUnsetTable[uiCW][0]).ClampRGB();
+						afrgbaSelectors[1] = (frgbaColor + s_aafCwOpaqueUnsetTable[uiCW][1]).ClampRGB();
+						afrgbaSelectors[2] = ColorFloatRGBA();
+						afrgbaSelectors[3] = (frgbaColor + s_aafCwOpaqueUnsetTable[uiCW][3]).ClampRGB();
+
+						for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
+						{
+                            int srcPixelIndex = a_phalf->m_pauiPixelMapping[uiPixel];
+							const ColorFloatRGBA *pfrgbaSourcePixel = &m_pafrgbaSource[srcPixelIndex];
+							ColorFloatRGBA frgbaDecodedPixel;
+
+							for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
+							{
+								if (pfrgbaSourcePixel->fA < 0.5f)
+								{
+									uiSelector = TRANSPARENT_SELECTOR;
+								}
+								else if (uiSelector == TRANSPARENT_SELECTOR)
+								{
+									continue;
+								}
+
+								frgbaDecodedPixel = afrgbaSelectors[uiSelector];
+
+								float fPixelError;
+								
+								fPixelError = CalcPixelError(frgbaDecodedPixel, srcPixelIndex);
+
+								if (fPixelError < afPixelErrors[uiPixel])
+								{
+									auiPixelSelectors[uiPixel] = uiSelector;
+									afrgbaDecodedColors[uiPixel] = frgbaDecodedPixel;
+									afPixelErrors[uiPixel] = fPixelError;
+								}
+
+								if (uiSelector == TRANSPARENT_SELECTOR)
+								{
+									break;
+								}
+							}
+						}
+
+						// add up all pixel errors
+						float fCWError = 0.0f;
+						for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
+						{
+							fCWError += afPixelErrors[uiPixel];
+						}
+
+						// if best CW so far
+						if (fCWError < ptry->m_fError)
+						{
+							ptry->m_uiCW = uiCW;
+							for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
+							{
+								ptry->m_auiSelectors[uiPixel] = auiPixelSelectors[uiPixel];
+							}
+							ptry->m_fError = fCWError;
+						}
+
+					}
+
+					if (ptry->m_fError < fBestTryError)
+					{
+						a_phalf->m_ptryBest = ptry;
+						fBestTryError = ptry->m_fError;
+					}
+
+					assert(ptry->m_fError < FLT_MAX);
+
+					a_phalf->m_uiTrys++;
+				}
+			}
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try encoding in T mode
+	// save this encoding if it improves the error
+	//
+	// since pixels that use base color1 don't use the distance table, color1 and color2 can be twiddled independently
+	// better encoding can be found if TWIDDLE_RADIUS is set to 2, but it will be much slower
+	//
+	void Block4x4Encoding_RGB8A1::TryT(unsigned int a_uiRadius)
+	{
+		Block4x4Encoding_RGB8A1 encodingTry = *this;
+
+		// init "try"
+		{
+			encodingTry.m_mode = MODE_T;
+			encodingTry.m_boolDiff = true;
+			encodingTry.m_boolFlip = false;
+			encodingTry.m_fError = FLT_MAX;
+		}
+
+		int iColor1Red = m_frgbaOriginalColor1_TAndH.IntRed(15.0f);
+		int iColor1Green = m_frgbaOriginalColor1_TAndH.IntGreen(15.0f);
+		int iColor1Blue = m_frgbaOriginalColor1_TAndH.IntBlue(15.0f);
+
+		int iMinRed1 = iColor1Red - (int)a_uiRadius;
+		if (iMinRed1 < 0)
+		{
+			iMinRed1 = 0;
+		}
+		int iMaxRed1 = iColor1Red + (int)a_uiRadius;
+		if (iMaxRed1 > 15)
+		{
+			iMinRed1 = 15;
+		}
+
+		int iMinGreen1 = iColor1Green - (int)a_uiRadius;
+		if (iMinGreen1 < 0)
+		{
+			iMinGreen1 = 0;
+		}
+		int iMaxGreen1 = iColor1Green + (int)a_uiRadius;
+		if (iMaxGreen1 > 15)
+		{
+			iMinGreen1 = 15;
+		}
+
+		int iMinBlue1 = iColor1Blue - (int)a_uiRadius;
+		if (iMinBlue1 < 0)
+		{
+			iMinBlue1 = 0;
+		}
+		int iMaxBlue1 = iColor1Blue + (int)a_uiRadius;
+		if (iMaxBlue1 > 15)
+		{
+			iMinBlue1 = 15;
+		}
+
+		int iColor2Red = m_frgbaOriginalColor2_TAndH.IntRed(15.0f);
+		int iColor2Green = m_frgbaOriginalColor2_TAndH.IntGreen(15.0f);
+		int iColor2Blue = m_frgbaOriginalColor2_TAndH.IntBlue(15.0f);
+
+		int iMinRed2 = iColor2Red - (int)a_uiRadius;
+		if (iMinRed2 < 0)
+		{
+			iMinRed2 = 0;
+		}
+		int iMaxRed2 = iColor2Red + (int)a_uiRadius;
+		if (iMaxRed2 > 15)
+		{
+			iMinRed2 = 15;
+		}
+
+		int iMinGreen2 = iColor2Green - (int)a_uiRadius;
+		if (iMinGreen2 < 0)
+		{
+			iMinGreen2 = 0;
+		}
+		int iMaxGreen2 = iColor2Green + (int)a_uiRadius;
+		if (iMaxGreen2 > 15)
+		{
+			iMinGreen2 = 15;
+		}
+
+		int iMinBlue2 = iColor2Blue - (int)a_uiRadius;
+		if (iMinBlue2 < 0)
+		{
+			iMinBlue2 = 0;
+		}
+		int iMaxBlue2 = iColor2Blue + (int)a_uiRadius;
+		if (iMaxBlue2 > 15)
+		{
+			iMinBlue2 = 15;
+		}
+
+		for (unsigned int uiDistance = 0; uiDistance < TH_DISTANCES; uiDistance++)
+		{
+			encodingTry.m_uiCW1 = uiDistance;
+
+			// twiddle m_frgbaOriginalColor2_TAndH
+			// twiddle color2 first, since it affects 3 selectors, while color1 only affects one selector
+			//
+			for (int iRed2 = iMinRed2; iRed2 <= iMaxRed2; iRed2++)
+			{
+				for (int iGreen2 = iMinGreen2; iGreen2 <= iMaxGreen2; iGreen2++)
+				{
+					for (int iBlue2 = iMinBlue2; iBlue2 <= iMaxBlue2; iBlue2++)
+					{
+						for (unsigned int uiBaseColorSwaps = 0; uiBaseColorSwaps < 2; uiBaseColorSwaps++)
+						{
+							if (uiBaseColorSwaps == 0)
+							{
+								encodingTry.m_frgbaColor1 = m_frgbaOriginalColor1_TAndH;
+								encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2);
+							}
+							else
+							{
+								encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2);
+								encodingTry.m_frgbaColor2 = m_frgbaOriginalColor1_TAndH;
+							}
+
+							encodingTry.TryT_BestSelectorCombination();
+
+							if (encodingTry.m_fError < m_fError)
+							{
+								m_mode = encodingTry.m_mode;
+								m_boolDiff = encodingTry.m_boolDiff;
+								m_boolFlip = encodingTry.m_boolFlip;
+
+								m_frgbaColor1 = encodingTry.m_frgbaColor1;
+								m_frgbaColor2 = encodingTry.m_frgbaColor2;
+								m_uiCW1 = encodingTry.m_uiCW1;
+
+								for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+								{
+									m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
+									m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
+								}
+
+								m_fError = encodingTry.m_fError;
+							}
+						}
+					}
+				}
+			}
+
+			// twiddle m_frgbaOriginalColor1_TAndH
+			for (int iRed1 = iMinRed1; iRed1 <= iMaxRed1; iRed1++)
+			{
+				for (int iGreen1 = iMinGreen1; iGreen1 <= iMaxGreen1; iGreen1++)
+				{
+					for (int iBlue1 = iMinBlue1; iBlue1 <= iMaxBlue1; iBlue1++)
+					{
+						for (unsigned int uiBaseColorSwaps = 0; uiBaseColorSwaps < 2; uiBaseColorSwaps++)
+						{
+							if (uiBaseColorSwaps == 0)
+							{
+								encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1);
+								encodingTry.m_frgbaColor2 = m_frgbaOriginalColor2_TAndH;
+							}
+							else
+							{
+								encodingTry.m_frgbaColor1 = m_frgbaOriginalColor2_TAndH;
+								encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1);
+							}
+
+							encodingTry.TryT_BestSelectorCombination();
+
+							if (encodingTry.m_fError < m_fError)
+							{
+								m_mode = encodingTry.m_mode;
+								m_boolDiff = encodingTry.m_boolDiff;
+								m_boolFlip = encodingTry.m_boolFlip;
+
+								m_frgbaColor1 = encodingTry.m_frgbaColor1;
+								m_frgbaColor2 = encodingTry.m_frgbaColor2;
+								m_uiCW1 = encodingTry.m_uiCW1;
+
+								for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+								{
+									m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
+									m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
+								}
+
+								m_fError = encodingTry.m_fError;
+							}
+						}
+					}
+				}
+			}
+
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// find best selector combination for TryT
+	// called on an encodingTry
+	//
+	void Block4x4Encoding_RGB8A1::TryT_BestSelectorCombination(void)
+	{
+
+		float fDistance = s_afTHDistanceTable[m_uiCW1];
+
+		unsigned int auiBestPixelSelectors[PIXELS];
+		float afBestPixelErrors[PIXELS] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,
+			FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
+		ColorFloatRGBA	afrgbaBestDecodedPixels[PIXELS];
+		ColorFloatRGBA afrgbaDecodedPixel[SELECTORS];
+
+		assert(SELECTORS == 4);
+		afrgbaDecodedPixel[0] = m_frgbaColor1;
+		afrgbaDecodedPixel[1] = (m_frgbaColor2 + fDistance).ClampRGB();
+		afrgbaDecodedPixel[2] = ColorFloatRGBA();
+		afrgbaDecodedPixel[3] = (m_frgbaColor2 - fDistance).ClampRGB();
+
+		// try each selector
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			unsigned int uiMinSelector = 0;
+			unsigned int uiMaxSelector = SELECTORS - 1;
+
+			if (m_pafrgbaSource[uiPixel].fA < 0.5f)
+			{
+				uiMinSelector = 2;
+				uiMaxSelector = 2;
+			}
+
+			for (unsigned int uiSelector = uiMinSelector; uiSelector <= uiMaxSelector; uiSelector++)
+			{
+				float fPixelError = CalcPixelError(afrgbaDecodedPixel[uiSelector], uiPixel);
+                
+				if (fPixelError < afBestPixelErrors[uiPixel])
+				{
+					afBestPixelErrors[uiPixel] = fPixelError;
+					auiBestPixelSelectors[uiPixel] = uiSelector;
+					afrgbaBestDecodedPixels[uiPixel] = afrgbaDecodedPixel[uiSelector];
+				}
+			}
+		}
+		
+
+		// add up all of the pixel errors
+		float fBlockError = 0.0f;
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			fBlockError += afBestPixelErrors[uiPixel];
+		}
+
+		if (m_fError > fBlockError)
+		{
+			m_fError = fBlockError;
+
+			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+			{
+				m_auiSelectors[uiPixel] = auiBestPixelSelectors[uiPixel];
+				m_afrgbaDecodedColors[uiPixel] = afrgbaBestDecodedPixels[uiPixel];
+			}
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try encoding in H mode
+	// save this encoding if it improves the error
+	//
+	// since all pixels use the distance table, color1 and color2 can NOT be twiddled independently
+	// TWIDDLE_RADIUS of 2 is WAY too slow
+	//
+	void Block4x4Encoding_RGB8A1::TryH(unsigned int a_uiRadius)
+	{
+		Block4x4Encoding_RGB8A1 encodingTry = *this;
+
+		// init "try"
+		{
+			encodingTry.m_mode = MODE_H;
+			encodingTry.m_boolDiff = true;
+			encodingTry.m_boolFlip = false;
+			encodingTry.m_fError = FLT_MAX;
+		}
+
+		int iColor1Red = m_frgbaOriginalColor1_TAndH.IntRed(15.0f);
+		int iColor1Green = m_frgbaOriginalColor1_TAndH.IntGreen(15.0f);
+		int iColor1Blue = m_frgbaOriginalColor1_TAndH.IntBlue(15.0f);
+
+		int iMinRed1 = iColor1Red - (int)a_uiRadius;
+		if (iMinRed1 < 0)
+		{
+			iMinRed1 = 0;
+		}
+		int iMaxRed1 = iColor1Red + (int)a_uiRadius;
+		if (iMaxRed1 > 15)
+		{
+			iMinRed1 = 15;
+		}
+
+		int iMinGreen1 = iColor1Green - (int)a_uiRadius;
+		if (iMinGreen1 < 0)
+		{
+			iMinGreen1 = 0;
+		}
+		int iMaxGreen1 = iColor1Green + (int)a_uiRadius;
+		if (iMaxGreen1 > 15)
+		{
+			iMinGreen1 = 15;
+		}
+
+		int iMinBlue1 = iColor1Blue - (int)a_uiRadius;
+		if (iMinBlue1 < 0)
+		{
+			iMinBlue1 = 0;
+		}
+		int iMaxBlue1 = iColor1Blue + (int)a_uiRadius;
+		if (iMaxBlue1 > 15)
+		{
+			iMinBlue1 = 15;
+		}
+
+		int iColor2Red = m_frgbaOriginalColor2_TAndH.IntRed(15.0f);
+		int iColor2Green = m_frgbaOriginalColor2_TAndH.IntGreen(15.0f);
+		int iColor2Blue = m_frgbaOriginalColor2_TAndH.IntBlue(15.0f);
+
+		int iMinRed2 = iColor2Red - (int)a_uiRadius;
+		if (iMinRed2 < 0)
+		{
+			iMinRed2 = 0;
+		}
+		int iMaxRed2 = iColor2Red + (int)a_uiRadius;
+		if (iMaxRed2 > 15)
+		{
+			iMinRed2 = 15;
+		}
+
+		int iMinGreen2 = iColor2Green - (int)a_uiRadius;
+		if (iMinGreen2 < 0)
+		{
+			iMinGreen2 = 0;
+		}
+		int iMaxGreen2 = iColor2Green + (int)a_uiRadius;
+		if (iMaxGreen2 > 15)
+		{
+			iMinGreen2 = 15;
+		}
+
+		int iMinBlue2 = iColor2Blue - (int)a_uiRadius;
+		if (iMinBlue2 < 0)
+		{
+			iMinBlue2 = 0;
+		}
+		int iMaxBlue2 = iColor2Blue + (int)a_uiRadius;
+		if (iMaxBlue2 > 15)
+		{
+			iMinBlue2 = 15;
+		}
+
+		for (unsigned int uiDistance = 0; uiDistance < TH_DISTANCES; uiDistance++)
+		{
+			encodingTry.m_uiCW1 = uiDistance;
+
+			// twiddle m_frgbaOriginalColor1_TAndH
+			for (int iRed1 = iMinRed1; iRed1 <= iMaxRed1; iRed1++)
+			{
+				for (int iGreen1 = iMinGreen1; iGreen1 <= iMaxGreen1; iGreen1++)
+				{
+					for (int iBlue1 = iMinBlue1; iBlue1 <= iMaxBlue1; iBlue1++)
+					{
+						encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1);
+						encodingTry.m_frgbaColor2 = m_frgbaOriginalColor2_TAndH;
+
+						// if color1 == color2, H encoding issues can pop up, so abort
+						if (iRed1 == iColor2Red && iGreen1 == iColor2Green && iBlue1 == iColor2Blue)
+						{
+							continue;
+						}
+
+						encodingTry.TryH_BestSelectorCombination();
+
+						if (encodingTry.m_fError < m_fError)
+						{
+							m_mode = encodingTry.m_mode;
+							m_boolDiff = encodingTry.m_boolDiff;
+							m_boolFlip = encodingTry.m_boolFlip;
+
+							m_frgbaColor1 = encodingTry.m_frgbaColor1;
+							m_frgbaColor2 = encodingTry.m_frgbaColor2;
+							m_uiCW1 = encodingTry.m_uiCW1;
+
+							for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+							{
+								m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
+								m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
+							}
+
+							m_fError = encodingTry.m_fError;
+						}
+					}
+				}
+			}
+
+			// twiddle m_frgbaOriginalColor2_TAndH
+			for (int iRed2 = iMinRed2; iRed2 <= iMaxRed2; iRed2++)
+			{
+				for (int iGreen2 = iMinGreen2; iGreen2 <= iMaxGreen2; iGreen2++)
+				{
+					for (int iBlue2 = iMinBlue2; iBlue2 <= iMaxBlue2; iBlue2++)
+					{
+						encodingTry.m_frgbaColor1 = m_frgbaOriginalColor1_TAndH;
+						encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2);
+
+						// if color1 == color2, H encoding issues can pop up, so abort
+						if (iRed2 == iColor1Red && iGreen2 == iColor1Green && iBlue2 == iColor1Blue)
+						{
+							continue;
+						}
+
+						encodingTry.TryH_BestSelectorCombination();
+
+						if (encodingTry.m_fError < m_fError)
+						{
+							m_mode = encodingTry.m_mode;
+							m_boolDiff = encodingTry.m_boolDiff;
+							m_boolFlip = encodingTry.m_boolFlip;
+
+							m_frgbaColor1 = encodingTry.m_frgbaColor1;
+							m_frgbaColor2 = encodingTry.m_frgbaColor2;
+							m_uiCW1 = encodingTry.m_uiCW1;
+
+							for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+							{
+								m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
+								m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
+							}
+
+							m_fError = encodingTry.m_fError;
+						}
+					}
+				}
+			}
+
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// find best selector combination for TryH
+	// called on an encodingTry
+	//
+	void Block4x4Encoding_RGB8A1::TryH_BestSelectorCombination(void)
+	{
+
+		// abort if colors and CW will pose an encoding problem
+		{
+			unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(255.0f);
+			unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(255.0f);
+			unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(255.0f);
+			unsigned int uiColorValue1 = (uiRed1 << 16) + (uiGreen1 << 8) + uiBlue1;
+
+			unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(255.0f);
+			unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(255.0f);
+			unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(255.0f);
+			unsigned int uiColorValue2 = (uiRed2 << 16) + (uiGreen2 << 8) + uiBlue2;
+
+			unsigned int uiCWLsb = m_uiCW1 & 1;
+
+			if ((uiColorValue1 >= (uiColorValue2 & uiCWLsb)) == 0 ||
+				(uiColorValue1 < (uiColorValue2 & uiCWLsb)) == 1)
+			{
+				return;
+			}
+		}
+
+		float fDistance = s_afTHDistanceTable[m_uiCW1];
+
+		unsigned int auiBestPixelSelectors[PIXELS];
+		float afBestPixelErrors[PIXELS] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,
+											FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
+		ColorFloatRGBA	afrgbaBestDecodedPixels[PIXELS];
+		ColorFloatRGBA afrgbaDecodedPixel[SELECTORS];
+
+		assert(SELECTORS == 4);
+		afrgbaDecodedPixel[0] = (m_frgbaColor1 + fDistance).ClampRGB();
+		afrgbaDecodedPixel[1] = (m_frgbaColor1 - fDistance).ClampRGB();
+		afrgbaDecodedPixel[2] = ColorFloatRGBA();;
+		afrgbaDecodedPixel[3] = (m_frgbaColor2 - fDistance).ClampRGB();
+
+
+		// try each selector
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			unsigned int uiMinSelector = 0;
+			unsigned int uiMaxSelector = SELECTORS - 1;
+
+			if (m_pafrgbaSource[uiPixel].fA < 0.5f)
+			{
+				uiMinSelector = 2;
+				uiMaxSelector = 2;
+			}
+
+			for (unsigned int uiSelector = uiMinSelector; uiSelector <= uiMaxSelector; uiSelector++)
+			{
+				float fPixelError = CalcPixelError(afrgbaDecodedPixel[uiSelector], uiPixel);
+
+				if (fPixelError < afBestPixelErrors[uiPixel])
+				{
+					afBestPixelErrors[uiPixel] = fPixelError;
+					auiBestPixelSelectors[uiPixel] = uiSelector;
+					afrgbaBestDecodedPixels[uiPixel] = afrgbaDecodedPixel[uiSelector];
+				}
+			}
+		}
+		
+
+		// add up all of the pixel errors
+		float fBlockError = 0.0f;
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			fBlockError += afBestPixelErrors[uiPixel];
+		}
+
+		if (m_fError > fBlockError)
+		{
+			m_fError = fBlockError;
+
+			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+			{
+				m_auiSelectors[uiPixel] = auiBestPixelSelectors[uiPixel];
+				m_afrgbaDecodedColors[uiPixel] = afrgbaBestDecodedPixels[uiPixel];
+			}
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try version 1 of the degenerate search
+	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
+	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
+	//		be successfull
+	//
+	void Block4x4Encoding_RGB8A1::TryDegenerates1(void)
+	{
+
+		TryDifferential(m_boolMostLikelyFlip, 1, -2, 0);
+		TryDifferential(m_boolMostLikelyFlip, 1, 2, 0);
+		TryDifferential(m_boolMostLikelyFlip, 1, 0, 2);
+		TryDifferential(m_boolMostLikelyFlip, 1, 0, -2);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try version 2 of the degenerate search
+	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
+	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
+	//		be successfull
+	//
+	void Block4x4Encoding_RGB8A1::TryDegenerates2(void)
+	{
+
+		TryDifferential(!m_boolMostLikelyFlip, 1, -2, 0);
+		TryDifferential(!m_boolMostLikelyFlip, 1, 2, 0);
+		TryDifferential(!m_boolMostLikelyFlip, 1, 0, 2);
+		TryDifferential(!m_boolMostLikelyFlip, 1, 0, -2);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try version 3 of the degenerate search
+	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
+	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
+	//		be successfull
+	//
+	void Block4x4Encoding_RGB8A1::TryDegenerates3(void)
+	{
+
+		TryDifferential(m_boolMostLikelyFlip, 1, -2, -2);
+		TryDifferential(m_boolMostLikelyFlip, 1, -2, 2);
+		TryDifferential(m_boolMostLikelyFlip, 1, 2, -2);
+		TryDifferential(m_boolMostLikelyFlip, 1, 2, 2);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try version 4 of the degenerate search
+	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
+	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
+	//		be successfull
+	//
+	void Block4x4Encoding_RGB8A1::TryDegenerates4(void)
+	{
+
+		TryDifferential(m_boolMostLikelyFlip, 1, -4, 0);
+		TryDifferential(m_boolMostLikelyFlip, 1, 4, 0);
+		TryDifferential(m_boolMostLikelyFlip, 1, 0, 4);
+		TryDifferential(m_boolMostLikelyFlip, 1, 0, -4);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the encoding bits based on encoding state
+	//
+	void Block4x4Encoding_RGB8A1::SetEncodingBits(void)
+	{
+		switch (m_mode)
+		{
+		case MODE_ETC1:
+			SetEncodingBits_ETC1();
+			break;
+
+		case MODE_T:
+			SetEncodingBits_T();
+			break;
+
+		case MODE_H:
+			SetEncodingBits_H();
+			break;
+
+		case MODE_PLANAR:
+			Block4x4Encoding_RGB8::SetEncodingBits_Planar();
+			break;
+
+		default:
+			assert(false);
+		}
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the encoding bits based on encoding state if ETC1 mode
+	//
+	void Block4x4Encoding_RGB8A1::SetEncodingBits_ETC1(void)
+	{
+
+		// there is no individual mode in RGB8A1
+		assert(m_boolDiff);
+
+		int iRed1 = m_frgbaColor1.IntRed(31.0f);
+		int iGreen1 = m_frgbaColor1.IntGreen(31.0f);
+		int iBlue1 = m_frgbaColor1.IntBlue(31.0f);
+
+		int iRed2 = m_frgbaColor2.IntRed(31.0f);
+		int iGreen2 = m_frgbaColor2.IntGreen(31.0f);
+		int iBlue2 = m_frgbaColor2.IntBlue(31.0f);
+
+		int iDRed2 = iRed2 - iRed1;
+		int iDGreen2 = iGreen2 - iGreen1;
+		int iDBlue2 = iBlue2 - iBlue1;
+
+		assert(iDRed2 >= -4 && iDRed2 < 4);
+		assert(iDGreen2 >= -4 && iDGreen2 < 4);
+		assert(iDBlue2 >= -4 && iDBlue2 < 4);
+
+		m_pencodingbitsRGB8->differential.red1 = iRed1;
+		m_pencodingbitsRGB8->differential.green1 = iGreen1;
+		m_pencodingbitsRGB8->differential.blue1 = iBlue1;
+
+		m_pencodingbitsRGB8->differential.dred2 = iDRed2;
+		m_pencodingbitsRGB8->differential.dgreen2 = iDGreen2;
+		m_pencodingbitsRGB8->differential.dblue2 = iDBlue2;
+
+		m_pencodingbitsRGB8->individual.cw1 = m_uiCW1;
+		m_pencodingbitsRGB8->individual.cw2 = m_uiCW2;
+
+		SetEncodingBits_Selectors();
+
+		// in RGB8A1 encoding bits, opaque replaces differential
+		m_pencodingbitsRGB8->differential.diff = !m_boolPunchThroughPixels;
+
+		m_pencodingbitsRGB8->individual.flip = m_boolFlip;
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the encoding bits based on encoding state if T mode
+	//
+	void Block4x4Encoding_RGB8A1::SetEncodingBits_T(void)
+	{
+		static const bool SANITY_CHECK = true;
+
+		assert(m_mode == MODE_T);
+		assert(m_boolDiff == true);
+
+		unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(15.0f);
+		unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f);
+		unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f);
+
+		unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(15.0f);
+		unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f);
+		unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f);
+
+		m_pencodingbitsRGB8->t.red1a = uiRed1 >> 2;
+		m_pencodingbitsRGB8->t.red1b = uiRed1;
+		m_pencodingbitsRGB8->t.green1 = uiGreen1;
+		m_pencodingbitsRGB8->t.blue1 = uiBlue1;
+
+		m_pencodingbitsRGB8->t.red2 = uiRed2;
+		m_pencodingbitsRGB8->t.green2 = uiGreen2;
+		m_pencodingbitsRGB8->t.blue2 = uiBlue2;
+
+		m_pencodingbitsRGB8->t.da = m_uiCW1 >> 1;
+		m_pencodingbitsRGB8->t.db = m_uiCW1;
+
+		// in RGB8A1 encoding bits, opaque replaces differential
+		m_pencodingbitsRGB8->differential.diff = !m_boolPunchThroughPixels;
+
+		Block4x4Encoding_ETC1::SetEncodingBits_Selectors();
+
+		// create an invalid R differential to trigger T mode
+		m_pencodingbitsRGB8->t.detect1 = 0;
+		m_pencodingbitsRGB8->t.detect2 = 0;
+		int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
+		if (iRed2 >= 4)
+		{
+			m_pencodingbitsRGB8->t.detect1 = 7;
+			m_pencodingbitsRGB8->t.detect2 = 0;
+		}
+		else
+		{
+			m_pencodingbitsRGB8->t.detect1 = 0;
+			m_pencodingbitsRGB8->t.detect2 = 1;
+		}
+
+		if (SANITY_CHECK)
+		{
+			iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
+
+			// make sure red overflows
+			assert(iRed2 < 0 || iRed2 > 31);
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the encoding bits based on encoding state if H mode
+	//
+	// colors and selectors may need to swap in order to generate lsb of distance index
+	//
+	void Block4x4Encoding_RGB8A1::SetEncodingBits_H(void)
+	{
+		static const bool SANITY_CHECK = true;
+
+		assert(m_mode == MODE_H);
+		assert(m_boolDiff == true);
+
+		unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(15.0f);
+		unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f);
+		unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f);
+
+		unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(15.0f);
+		unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f);
+		unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f);
+
+		unsigned int uiColor1 = (uiRed1 << 16) + (uiGreen1 << 8) + uiBlue1;
+		unsigned int uiColor2 = (uiRed2 << 16) + (uiGreen2 << 8) + uiBlue2;
+
+		bool boolOddDistance = m_uiCW1 & 1;
+		bool boolSwapColors = (uiColor1 < uiColor2) ^ !boolOddDistance;
+
+		if (boolSwapColors)
+		{
+			m_pencodingbitsRGB8->h.red1 = uiRed2;
+			m_pencodingbitsRGB8->h.green1a = uiGreen2 >> 1;
+			m_pencodingbitsRGB8->h.green1b = uiGreen2;
+			m_pencodingbitsRGB8->h.blue1a = uiBlue2 >> 3;
+			m_pencodingbitsRGB8->h.blue1b = uiBlue2 >> 1;
+			m_pencodingbitsRGB8->h.blue1c = uiBlue2;
+
+			m_pencodingbitsRGB8->h.red2 = uiRed1;
+			m_pencodingbitsRGB8->h.green2a = uiGreen1 >> 1;
+			m_pencodingbitsRGB8->h.green2b = uiGreen1;
+			m_pencodingbitsRGB8->h.blue2 = uiBlue1;
+
+			m_pencodingbitsRGB8->h.da = m_uiCW1 >> 2;
+			m_pencodingbitsRGB8->h.db = m_uiCW1 >> 1;
+		}
+		else
+		{
+			m_pencodingbitsRGB8->h.red1 = uiRed1;
+			m_pencodingbitsRGB8->h.green1a = uiGreen1 >> 1;
+			m_pencodingbitsRGB8->h.green1b = uiGreen1;
+			m_pencodingbitsRGB8->h.blue1a = uiBlue1 >> 3;
+			m_pencodingbitsRGB8->h.blue1b = uiBlue1 >> 1;
+			m_pencodingbitsRGB8->h.blue1c = uiBlue1;
+
+			m_pencodingbitsRGB8->h.red2 = uiRed2;
+			m_pencodingbitsRGB8->h.green2a = uiGreen2 >> 1;
+			m_pencodingbitsRGB8->h.green2b = uiGreen2;
+			m_pencodingbitsRGB8->h.blue2 = uiBlue2;
+
+			m_pencodingbitsRGB8->h.da = m_uiCW1 >> 2;
+			m_pencodingbitsRGB8->h.db = m_uiCW1 >> 1;
+		}
+
+		// in RGB8A1 encoding bits, opaque replaces differential
+		m_pencodingbitsRGB8->differential.diff = !m_boolPunchThroughPixels;
+
+		Block4x4Encoding_ETC1::SetEncodingBits_Selectors();
+
+		if (boolSwapColors)
+		{
+			m_pencodingbitsRGB8->h.selectors ^= 0x0000FFFF;
+		}
+
+		// create an invalid R differential to trigger T mode
+		m_pencodingbitsRGB8->h.detect1 = 0;
+		m_pencodingbitsRGB8->h.detect2 = 0;
+		m_pencodingbitsRGB8->h.detect3 = 0;
+		int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
+		int iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2;
+		if (iRed2 < 0 || iRed2 > 31)
+		{
+			m_pencodingbitsRGB8->h.detect1 = 1;
+		}
+		if (iGreen2 >= 4)
+		{
+			m_pencodingbitsRGB8->h.detect2 = 7;
+			m_pencodingbitsRGB8->h.detect3 = 0;
+		}
+		else
+		{
+			m_pencodingbitsRGB8->h.detect2 = 0;
+			m_pencodingbitsRGB8->h.detect3 = 1;
+		}
+
+		if (SANITY_CHECK)
+		{
+			iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
+			iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2;
+
+			// make sure red doesn't overflow and green does
+			assert(iRed2 >= 0 && iRed2 <= 31);
+			assert(iGreen2 < 0 || iGreen2 > 31);
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// perform a single encoding iteration
+	// replace the encoding if a better encoding was found
+	// subsequent iterations generally take longer for each iteration
+	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
+	//
+	void Block4x4Encoding_RGB8A1::PerformIterationOpaque(float a_fEffort)
+	{
+		assert(!m_boolPunchThroughPixels);
+		assert(!m_boolTransparent);
+		assert(!m_boolDone);
+
+		switch (m_uiEncodingIterations)
+		{
+		case 0:
+                PerformFirstIterationOpaque();
+			break;
+
+		case 1:
+			Block4x4Encoding_ETC1::TryDifferential(m_boolMostLikelyFlip, 1, 0, 0);
+			break;
+
+		case 2:
+			Block4x4Encoding_ETC1::TryDifferential(!m_boolMostLikelyFlip, 1, 0, 0);
+			break;
+
+		case 3:
+			Block4x4Encoding_RGB8::TryPlanar(1);
+			break;
+
+		case 4:
+			Block4x4Encoding_RGB8::TryTAndH(1);
+			if (a_fEffort <= 49.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 5:
+			Block4x4Encoding_ETC1::TryDegenerates1();
+			if (a_fEffort <= 59.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 6:
+			Block4x4Encoding_ETC1::TryDegenerates2();
+			if (a_fEffort <= 69.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 7:
+			Block4x4Encoding_ETC1::TryDegenerates3();
+			if (a_fEffort <= 79.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 8:
+			Block4x4Encoding_ETC1::TryDegenerates4();
+			m_boolDone = true;
+			break;
+
+		default:
+			assert(0);
+			break;
+		}
+
+		m_uiEncodingIterations++;
+		SetDoneIfPerfect();
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// find best initial encoding to ensure block has a valid encoding
+	//
+	void Block4x4Encoding_RGB8A1::PerformFirstIterationOpaque(void)
+	{
+		
+		// set decoded alphas
+		// calculate alpha error
+		m_fError = 0.0f;
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			// m_afDecodedAlphas[uiPixel] = 1.0f;
+
+			float fDeltaA = 1.0f - m_pafrgbaSource[uiPixel].fA;
+			m_fError += fDeltaA * fDeltaA;
+		}
+
+		CalculateMostLikelyFlip();
+
+		m_fError = FLT_MAX;
+
+		Block4x4Encoding_ETC1::TryDifferential(m_boolMostLikelyFlip, 0, 0, 0);
+		SetDoneIfPerfect();
+		if (m_boolDone)
+		{
+			return;
+		}
+		Block4x4Encoding_ETC1::TryDifferential(!m_boolMostLikelyFlip, 0, 0, 0);
+		SetDoneIfPerfect();
+		if (m_boolDone)
+		{
+			return;
+		}
+		Block4x4Encoding_RGB8::TryPlanar(0);
+		SetDoneIfPerfect();
+		if (m_boolDone)
+		{
+			return;
+		}
+		Block4x4Encoding_RGB8::TryTAndH(0);
+		SetDoneIfPerfect();
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// perform a single encoding iteration
+	// replace the encoding if a better encoding was found
+	// subsequent iterations generally take longer for each iteration
+	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
+	//
+	void Block4x4Encoding_RGB8A1::PerformIterationTransparent(float )
+	{
+		assert(!m_boolOpaque);
+		assert(m_boolTransparent);
+		assert(!m_boolDone);
+		assert(m_uiEncodingIterations == 0);
+
+		m_mode = MODE_ETC1;
+		m_boolDiff = true;
+		m_boolFlip = false;
+
+		m_uiCW1 = 0;
+		m_uiCW2 = 0;
+
+		m_frgbaColor1 = ColorFloatRGBA();
+		m_frgbaColor2 = ColorFloatRGBA();
+
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			m_auiSelectors[uiPixel] = TRANSPARENT_SELECTOR;
+
+			m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA();
+			//m_afDecodedAlphas[uiPixel] = 0.0f;
+		}
+
+		CalcBlockError();
+
+		m_boolDone = true;
+		m_uiEncodingIterations++;
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+}
diff --git a/libkram/etc2comp/EtcBlock4x4Encoding_RGB8A1.h b/libkram/etc2comp/EtcBlock4x4Encoding_RGB8A1.h
index f6b31bad..05e57417 100644
--- a/libkram/etc2comp/EtcBlock4x4Encoding_RGB8A1.h
+++ b/libkram/etc2comp/EtcBlock4x4Encoding_RGB8A1.h
@@ -1,140 +1,140 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "EtcBlock4x4Encoding_RGB8.h"
-#include "EtcErrorMetric.h"
-#include "EtcBlock4x4EncodingBits.h"
-
-namespace Etc
-{
-
-	// ################################################################################
-	// Block4x4Encoding_RGB8A1
-	// RGB8A1 if not completely opaque or transparent
-	// ################################################################################
-
-	class Block4x4Encoding_RGB8A1 : public Block4x4Encoding_RGB8
-	{
-	public:
-
-		static const unsigned int TRANSPARENT_SELECTOR = 2;
-
-		Block4x4Encoding_RGB8A1(void);
-		virtual ~Block4x4Encoding_RGB8A1(void);
-
-		virtual void Encode(Block4x4 *a_pblockParent,
-									const ColorFloatRGBA *a_pafrgbaSource,
-									unsigned char *a_paucEncodingBits,
-									ErrorMetric a_errormetric) override;
-
-		virtual void Decode(Block4x4 *a_pblockParent,
-											unsigned char *a_paucEncodingBits,
-											const ColorFloatRGBA *a_pafrgbaSource,
-											ErrorMetric a_errormetric,
-                                            uint16_t iterationCount) override;
-
-		virtual void PerformIteration(float a_fEffort) override;
-
-		virtual void SetEncodingBits(void) override;
-
-		void InitFromEncodingBits_ETC1(Block4x4 *a_pblockParent,
-										unsigned char *a_paucEncodingBits,
-										const ColorFloatRGBA *a_pafrgbaSource,
-										ErrorMetric a_errormetric, uint16_t iterationCount);
-
-		void InitFromEncodingBits_T(void);
-		void InitFromEncodingBits_H(void);
-
-		void PerformFirstIteration(void);
-
-		void Decode_ETC1(void);
-		void DecodePixels_T(void);
-		void DecodePixels_H(void);
-		void SetEncodingBits_ETC1(void);
-		void SetEncodingBits_T(void);
-		void SetEncodingBits_H(void);
-
-	private:
-
-		bool m_boolOpaque;				// all source pixels have alpha >= 0.5
-		bool m_boolTransparent;			// all source pixels have alpha < 0.5
-		bool m_boolPunchThroughPixels;	// some source pixels have alpha < 0.5
-
-        // pulled from ETC1
-        static const unsigned int CW_BITS = 3;
-        static const unsigned int CW_RANGES = 1 << CW_BITS;
-        
-        static const unsigned int SELECTOR_BITS = 2;
-        static const unsigned int SELECTORS = 1 << SELECTOR_BITS;
-
-		static float s_aafCwOpaqueUnsetTable[CW_RANGES][SELECTORS];
-
-	private:
-
-		void TryDifferential(bool a_boolFlip, unsigned int a_uiRadius,
-								int a_iGrayOffset1, int a_iGrayOffset2);
-		void TryDifferentialHalf(DifferentialTrys::Half *a_phalf);
-
-		void TryT(unsigned int a_uiRadius);
-		void TryT_BestSelectorCombination(void);
-		void TryH(unsigned int a_uiRadius);
-		void TryH_BestSelectorCombination(void);
-
-		void TryDegenerates1(void);
-		void TryDegenerates2(void);
-		void TryDegenerates3(void);
-		void TryDegenerates4(void);
-
-        void PerformIterationOpaque(float a_fEffort);
-        void PerformFirstIterationOpaque(void);
-        void PerformIterationTransparent(float);
-	};
-
-//	// ################################################################################
-//	// Block4x4Encoding_RGB8A1_Opaque
-//	// RGB8A1 if all pixels have alpha==1
-//	// ################################################################################
-//
-//	class Block4x4Encoding_RGB8A1_Opaque : public Block4x4Encoding_RGB8A1
-//	{
-//	public:
-//
-//		virtual void PerformIteration(float a_fEffort);
-//
-//		void PerformFirstIteration(void);
-//
-//	private:
-//
-//	};
-//
-//	// ################################################################################
-//	// Block4x4Encoding_RGB8A1_Transparent
-//	// RGB8A1 if all pixels have alpha==0
-//	// ################################################################################
-//
-//	class Block4x4Encoding_RGB8A1_Transparent : public Block4x4Encoding_RGB8A1
-//	{
-//	public:
-//
-//		virtual void PerformIteration(float a_fEffort);
-//
-//	private:
-//
-//	};
-
-} // namespace Etc
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "EtcBlock4x4Encoding_RGB8.h"
+#include "EtcErrorMetric.h"
+#include "EtcBlock4x4EncodingBits.h"
+
+namespace Etc
+{
+
+	// ################################################################################
+	// Block4x4Encoding_RGB8A1
+	// RGB8A1 if not completely opaque or transparent
+	// ################################################################################
+
+	class Block4x4Encoding_RGB8A1 : public Block4x4Encoding_RGB8
+	{
+	public:
+
+		static const unsigned int TRANSPARENT_SELECTOR = 2;
+
+		Block4x4Encoding_RGB8A1(void);
+		virtual ~Block4x4Encoding_RGB8A1(void);
+
+		virtual void Encode(Block4x4 *a_pblockParent,
+									const ColorFloatRGBA *a_pafrgbaSource,
+									unsigned char *a_paucEncodingBits,
+									ErrorMetric a_errormetric) override;
+
+		virtual void Decode(Block4x4 *a_pblockParent,
+											unsigned char *a_paucEncodingBits,
+											const ColorFloatRGBA *a_pafrgbaSource,
+											ErrorMetric a_errormetric,
+                                            uint16_t iterationCount) override;
+
+		virtual void PerformIteration(float a_fEffort) override;
+
+		virtual void SetEncodingBits(void) override;
+
+		void InitFromEncodingBits_ETC1(Block4x4 *a_pblockParent,
+										unsigned char *a_paucEncodingBits,
+										const ColorFloatRGBA *a_pafrgbaSource,
+										ErrorMetric a_errormetric, uint16_t iterationCount);
+
+		void InitFromEncodingBits_T(void);
+		void InitFromEncodingBits_H(void);
+
+		void PerformFirstIteration(void);
+
+		void Decode_ETC1(void);
+		void DecodePixels_T(void);
+		void DecodePixels_H(void);
+		void SetEncodingBits_ETC1(void);
+		void SetEncodingBits_T(void);
+		void SetEncodingBits_H(void);
+
+	private:
+
+		bool m_boolOpaque;				// all source pixels have alpha >= 0.5
+		bool m_boolTransparent;			// all source pixels have alpha < 0.5
+		bool m_boolPunchThroughPixels;	// some source pixels have alpha < 0.5
+
+        // pulled from ETC1
+        static const unsigned int CW_BITS = 3;
+        static const unsigned int CW_RANGES = 1 << CW_BITS;
+        
+        static const unsigned int SELECTOR_BITS = 2;
+        static const unsigned int SELECTORS = 1 << SELECTOR_BITS;
+
+		static float s_aafCwOpaqueUnsetTable[CW_RANGES][SELECTORS];
+
+	private:
+
+		void TryDifferential(bool a_boolFlip, unsigned int a_uiRadius,
+								int a_iGrayOffset1, int a_iGrayOffset2);
+		void TryDifferentialHalf(DifferentialTrys::Half *a_phalf);
+
+		void TryT(unsigned int a_uiRadius);
+		void TryT_BestSelectorCombination(void);
+		void TryH(unsigned int a_uiRadius);
+		void TryH_BestSelectorCombination(void);
+
+		void TryDegenerates1(void);
+		void TryDegenerates2(void);
+		void TryDegenerates3(void);
+		void TryDegenerates4(void);
+
+        void PerformIterationOpaque(float a_fEffort);
+        void PerformFirstIterationOpaque(void);
+        void PerformIterationTransparent(float);
+	};
+
+//	// ################################################################################
+//	// Block4x4Encoding_RGB8A1_Opaque
+//	// RGB8A1 if all pixels have alpha==1
+//	// ################################################################################
+//
+//	class Block4x4Encoding_RGB8A1_Opaque : public Block4x4Encoding_RGB8A1
+//	{
+//	public:
+//
+//		virtual void PerformIteration(float a_fEffort);
+//
+//		void PerformFirstIteration(void);
+//
+//	private:
+//
+//	};
+//
+//	// ################################################################################
+//	// Block4x4Encoding_RGB8A1_Transparent
+//	// RGB8A1 if all pixels have alpha==0
+//	// ################################################################################
+//
+//	class Block4x4Encoding_RGB8A1_Transparent : public Block4x4Encoding_RGB8A1
+//	{
+//	public:
+//
+//		virtual void PerformIteration(float a_fEffort);
+//
+//	private:
+//
+//	};
+
+} // namespace Etc
diff --git a/libkram/etc2comp/EtcBlock4x4Encoding_RGBA8.cpp b/libkram/etc2comp/EtcBlock4x4Encoding_RGBA8.cpp
index ea0a2427..33b08271 100644
--- a/libkram/etc2comp/EtcBlock4x4Encoding_RGBA8.cpp
+++ b/libkram/etc2comp/EtcBlock4x4Encoding_RGBA8.cpp
@@ -1,556 +1,556 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
-EtcBlock4x4Encoding_RGBA8.cpp contains:
-	Block4x4Encoding_RGBA8
-	Block4x4Encoding_RGBA8_Opaque
-	Block4x4Encoding_RGBA8_Transparent
-
-These encoders are used when targetting file format RGBA8.
-
-Block4x4Encoding_RGBA8_Opaque is used when all pixels in the 4x4 block are opaque
-Block4x4Encoding_RGBA8_Transparent is used when all pixels in the 4x4 block are transparent
-Block4x4Encoding_RGBA8 is used when there is a mixture of alphas in the 4x4 block
-
-*/
-
-#include "EtcConfig.h"
-#include "EtcBlock4x4Encoding_RGBA8.h"
-
-#include "EtcBlock4x4EncodingBits.h"
-#include "EtcBlock4x4.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-#include <float.h>
-#include <limits>
-//#include <algorithm>
-
-namespace Etc
-{
-    template<typename T>
-    T clamp(T value, T mn, T mx) {
-        return (value <= mn) ? mn : ((value >= mx) ? mx : value);
-    }
-
-	// ####################################################################################################
-	// Block4x4Encoding_RGBA8
-	// ####################################################################################################
-
-    static const unsigned int MODIFIER_TABLE_ENTRYS = 16;
-    static const unsigned int ALPHA_SELECTOR_BITS = 3;
-    static const unsigned int ALPHA_SELECTORS = 1 << ALPHA_SELECTOR_BITS;
-
-    // same selector table used for R11/G11/A8
-    static const int8_t s_aafModifierTable8[MODIFIER_TABLE_ENTRYS][ALPHA_SELECTORS]
-    {
-        { -3, -6,  -9, -15, 2, 5, 8, 14 },
-        { -3, -7, -10, -13, 2, 6, 9, 12 },
-        { -2, -5,  -8, -13, 1, 4, 7, 12 },
-        { -2, -4,  -6, -13, 1, 3, 5, 12 },
-
-        { -3, -6,  -8, -12, 2, 5, 7, 11 },
-        { -3, -7,  -9, -11, 2, 6, 8, 10 },
-        { -4, -7,  -8, -11, 3, 6, 7, 10 },
-        { -3, -5,  -8, -11, 2, 4, 7, 10 },
-
-        { -2, -6,  -8, -10, 1, 5, 7,  9 },
-        { -2, -5,  -8, -10, 1, 4, 7,  9 },
-        { -2, -4,  -8, -10, 1, 3, 7,  9 },
-        { -2, -5,  -7, -10, 1, 4, 6,  9 },
-
-        { -3, -4,  -7, -10, 2, 3, 6,  9 },
-        { -1, -2,  -3, -10, 0, 1, 2,  9 },
-        { -4, -6,  -8,  -9, 3, 5, 7,  8 },
-        { -3, -5,  -7,  -9, 2, 4, 6,  8 }
-    };
-
-    inline float DecodePixelAlpha(float a_fBase, float a_fMultiplier,
-                                    unsigned int a_uiTableIndex, unsigned int a_uiSelector)
-    {
-        float fPixelAlpha = (a_fBase +
-                            a_fMultiplier * s_aafModifierTable8[a_uiTableIndex][a_uiSelector]) / 255.0f;
-        if (fPixelAlpha < 0.0f)
-        {
-            fPixelAlpha = 0.0f;
-        }
-        else if (fPixelAlpha > 1.0f)
-        {
-            fPixelAlpha = 1.0f;
-        }
-
-        return fPixelAlpha;
-    }
-
-    inline int DecodePixelAlphaInt(int a_fBase, int a_fMultiplier,
-                                    unsigned int a_uiTableIndex, unsigned int a_uiSelector)
-    {
-        int fPixelAlpha = a_fBase +
-                            a_fMultiplier * s_aafModifierTable8[a_uiTableIndex][a_uiSelector];
-        
-        return clamp(fPixelAlpha, 0, 255);
-    }
-
-
-
-    Block4x4Encoding_A8::Block4x4Encoding_A8(void)
-    {
-        m_pencodingbitsA8 = nullptr;
-        m_pafrgbaSource = nullptr;
-    }
-
-    Block4x4Encoding_A8::~Block4x4Encoding_A8(void) {}
-
-    void Block4x4Encoding_A8::Encode(const ColorFloatRGBA *a_pafrgbaSource,
-                                     unsigned char *a_paucEncodingBits,
-                                     Block4x4::SourceAlphaMix sourceAlphaMix)
-    {
-        m_pafrgbaSource = a_pafrgbaSource;
-        
-        m_boolDone = false;
-        
-        // really only care about error for one iteration
-        //m_fError = FLT_MAX;
-        
-        m_pencodingbitsA8 = (Block4x4EncodingBits_A8 *)a_paucEncodingBits;
-        
-        if (sourceAlphaMix == Block4x4::SourceAlphaMix::OPAQUE)
-        {
-            // set the A8 portion
-            m_fBase = 255;
-            m_uiModifierTableIndex = 15;
-            m_fMultiplier = 15;
-            
-            // set all selectors to 7 (all bits set)
-            for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-            {
-                m_auiAlphaSelectors[uiPixel] = 7;
-            }
-
-            m_boolDone = true;
-        }
-        else if ((sourceAlphaMix == Block4x4::SourceAlphaMix::ALL_ZERO_ALPHA) ||
-                 (sourceAlphaMix == Block4x4::SourceAlphaMix::TRANSPARENT))
-        {
-            // set the A8 portion
-            m_fBase = 0;
-            m_uiModifierTableIndex = 0;
-            m_fMultiplier = 1;
-            
-            // set all selectors to 0
-            for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-            {
-                m_auiAlphaSelectors[uiPixel] = 0;
-            }
-
-            m_boolDone = true;
-        }
-    }
-
-    // A8 always finished in one iterations, but error metrics on rgb iteration may need the alpha values
-    // in an error metric. Skip this if alpha not part of the metric.
-    void Block4x4Encoding_A8::Decode(unsigned char *a_paucEncodingBits,
-                                     const ColorFloatRGBA *a_pafrgbaSource)
-    {
-        // Note: this is really just decoding to write this exact same data out
-        
-        m_pafrgbaSource = a_pafrgbaSource; // don't really need to hold this
-        m_pencodingbitsA8 = (Block4x4EncodingBits_A8 *)a_paucEncodingBits;
-    
-        m_fBase = m_pencodingbitsA8->data.base;
-        m_fMultiplier = m_pencodingbitsA8->data.multiplier;
-        m_uiModifierTableIndex = m_pencodingbitsA8->data.table;
-    
-        uint64_t ulliSelectorBits = 0;
-        ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors0 << (uint64_t)40;
-        ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors1 << (uint64_t)32;
-        ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors2 << (uint64_t)24;
-        ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors3 << (uint64_t)16;
-        ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors4 << (uint64_t)8;
-        ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors5;
-    
-        for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-        {
-            uint64_t uiShift = 45 - (3 * uiPixel);
-            m_auiAlphaSelectors[uiPixel] = (ulliSelectorBits >> uiShift) & (uint64_t)(ALPHA_SELECTORS - 1);
-        }
-    
-        //Encode(a_pafrgbaSource, a_paucEncodingBits, sourceAlphaMix);
-        
-        // no iteration on A8, it's all done in after first PerformIteration
-        m_boolDone = true;
-        
-        // no error calc since this doesn't iterate, it's already resolved alpha
-    }
-
-    void Block4x4Encoding_A8::DecodeAlpha(float* decodedPixels)
-    {
-//        m_pencodingbitsA8 = (Block4x4EncodingBits_A8 *)a_paucEncodingBits;
-//
-//        m_fBase = m_pencodingbitsA8->data.base;
-//        m_fMultiplier = m_pencodingbitsA8->data.multiplier;
-//        m_uiModifierTableIndex = m_pencodingbitsA8->data.table;
-//
-//        uint64_t ulliSelectorBits = 0;
-//        ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors0 << (uint64_t)40;
-//        ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors1 << (uint64_t)32;
-//        ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors2 << (uint64_t)24;
-//        ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors3 << (uint64_t)16;
-//        ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors4 << (uint64_t)8;
-//        ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors5;
-//
-//        for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-//        {
-//            uint64_t uiShift = 45 - (3 * uiPixel);
-//            m_auiAlphaSelectors[uiPixel] = (ulliSelectorBits >> uiShift) & (uint64_t)(ALPHA_SELECTORS - 1);
-//        }
-
-        for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-        {
-            // this is float version of decode
-            float pixel = DecodePixelAlpha(m_fBase, m_fMultiplier,
-                m_uiModifierTableIndex,
-                m_auiAlphaSelectors[uiPixel]);
-
-            decodedPixels[4 * uiPixel] = pixel;
-        }
-    }
-
-    void Block4x4Encoding_A8::PerformIteration(float a_fEffort)
-    {
-        if (m_boolDone)
-        {
-            return;
-        }
-
-        
-        // 0, 1, 2 pixel radius all done in iteration 0, only
-        // rgb is iterated on over multiple passes.
-        if (a_fEffort < 24.9f)
-        {
-            CalculateA8(0);
-        }
-        else if (a_fEffort < 49.9f)
-        {
-            CalculateA8(1);
-        }
-        else
-        {
-            CalculateA8(2);
-        }
-        
-        m_boolDone = true;
-    }
-
-    void Block4x4Encoding_A8::CalculateA8(int a_fRadius)
-    {
-        float m_fError = FLT_MAX;
-        
-        // This code is similiar to CalculateR11.  And it's all very slow doing brute force
-        // searches over a large nested for loop space.
-        uint8_t srcAlpha[PIXELS];
-        
-        // find min/max alpha
-        int fMinAlpha = 255;
-        int fMaxAlpha = 0;
-        for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-        {
-            int fAlpha = (int)roundf(255.0f * m_pafrgbaSource[uiPixel].fA);
-            if (fAlpha < fMinAlpha)
-            {
-                fMinAlpha = fAlpha;
-            }
-            if (fAlpha > fMaxAlpha)
-            {
-                fMaxAlpha = fAlpha;
-            }
-            
-            srcAlpha[uiPixel] = fAlpha;
-        }
-        
-        assert(fMinAlpha >= 0);
-        assert(fMaxAlpha <= 255);
-        assert(fMinAlpha <= fMaxAlpha);
-
-        int fAlphaRange = fMaxAlpha - fMinAlpha;
-        
-        // fast path if range 0 (constant alpha), no point in all this iteration
-        if (fAlphaRange == 0)
-        {
-            a_fRadius = 0;
-        }
-        
-        // try each modifier table entry
-        //m_fError = FLT_MAX;        // artificially high value
-        for (int uiTableEntry = 0; uiTableEntry < (int)MODIFIER_TABLE_ENTRYS; uiTableEntry++)
-        {
-            static const unsigned int MIN_VALUE_SELECTOR = 3;
-            static const unsigned int MAX_VALUE_SELECTOR = 7;
-
-            int fTableEntryCenter = -s_aafModifierTable8[uiTableEntry][MIN_VALUE_SELECTOR];
-
-            int fTableEntryRange = s_aafModifierTable8[uiTableEntry][MAX_VALUE_SELECTOR] -
-                s_aafModifierTable8[uiTableEntry][MIN_VALUE_SELECTOR];
-
-            float fCenterRatio = fTableEntryCenter / (float)fTableEntryRange;
-
-            int fCenterInt = (int)roundf(fMinAlpha + fCenterRatio * fAlphaRange);
-            //int fCenterInt = roundf(fCenter);
-
-            int fMinBase = fCenterInt - a_fRadius;
-            int fMaxBase = fCenterInt + a_fRadius;
-            
-            if (fMinBase < 0)
-            {
-                fMinBase = 0;
-            }
-            if (fMaxBase > 255)
-            {
-                fMaxBase = 255;
-            }
-
-            // 255 range / usp to 29
-            int fRangeMultiplier = (int)roundf(fAlphaRange / (float)fTableEntryRange);
-
-            int fMinMultiplier = clamp(fRangeMultiplier - a_fRadius, 1, 15); // no 0 case like on R11
-            int fMaxMultiplier = clamp(fRangeMultiplier + a_fRadius, 1, 15);
-            
-            int auiBestSelectors[PIXELS];
-            int afBestAlphaError[PIXELS];
-            int afBestDecodedAlphas[PIXELS];
-            
-            for (int fBase = fMinBase; fBase <= fMaxBase; fBase++)
-            {
-                for (int fMultiplier = fMinMultiplier; fMultiplier <= fMaxMultiplier; fMultiplier++)
-                {
-                    // find best selector for each pixel
-                    
-                    for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-                    {
-                        int fBestPixelAlphaError = 255 * 255;
-                        for (int uiSelector = 0; uiSelector < (int)ALPHA_SELECTORS; uiSelector++)
-                        {
-                            int fDecodedAlpha = DecodePixelAlphaInt(fBase, fMultiplier, uiTableEntry, uiSelector);
-
-                            // pixelError = dA ^ 2
-                            int fPixelDeltaAlpha = fDecodedAlpha - (int)srcAlpha[uiPixel];
-                            int fPixelAlphaError = fPixelDeltaAlpha * fPixelDeltaAlpha;
-
-                            if (fPixelAlphaError < fBestPixelAlphaError)
-                            {
-                                fBestPixelAlphaError = fPixelAlphaError;
-                                auiBestSelectors[uiPixel] = uiSelector;
-                                afBestAlphaError[uiPixel] = fBestPixelAlphaError;
-                                afBestDecodedAlphas[uiPixel] = fDecodedAlpha;
-                            }
-                        }
-                    }
-
-                    // accumlate pixel error into block error, sum(da^2)
-                    int fBlockError = 0;
-                    for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-                    {
-                        fBlockError += afBestAlphaError[uiPixel];
-                    }
-
-                    if (m_fError > (float)fBlockError)
-                    {
-                        m_fError = (float)fBlockError;
-
-                        m_fBase = fBase;
-                        m_fMultiplier = fMultiplier;
-                        m_uiModifierTableIndex = uiTableEntry;
-                        
-                        for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-                        {
-                            m_auiAlphaSelectors[uiPixel] = auiBestSelectors[uiPixel];
-                            
-                            //m_afDecodedAlphas[uiPixel] = afBestDecodedAlphas[uiPixel] / 255.0f;
-                        }
-                        
-                        // stop the iteration if tolerance is low enough
-                        const int kErrorTolerance = 16 * 1 * 1;
-                        if (fBlockError <= kErrorTolerance) {
-                            return;
-                        }
-                    }
-                }
-            }
-
-        }
-
-    }
-
-    // ----------------------------------------------------------------------------------------------------
-    // set the encoding bits based on encoding state
-    //
-    void Block4x4Encoding_A8::SetEncodingBits(void)
-    {
-        // set the A8 portion
-        m_pencodingbitsA8->data.base = (uint8_t)roundf(/*255.0f * */ m_fBase);
-        m_pencodingbitsA8->data.table = m_uiModifierTableIndex;
-        m_pencodingbitsA8->data.multiplier = (uint8_t)roundf(m_fMultiplier);
-
-        uint64_t ulliSelectorBits = 0;
-        for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-        {
-            uint64_t uiShift = 45 - (3 * uiPixel);
-            ulliSelectorBits |= ((uint64_t)m_auiAlphaSelectors[uiPixel]) << uiShift;
-        }
-
-        m_pencodingbitsA8->data.selectors0 = uint32_t(ulliSelectorBits >> (uint64_t)40);
-        m_pencodingbitsA8->data.selectors1 = uint32_t(ulliSelectorBits >> (uint64_t)32);
-        m_pencodingbitsA8->data.selectors2 = uint32_t(ulliSelectorBits >> (uint64_t)24);
-        m_pencodingbitsA8->data.selectors3 = uint32_t(ulliSelectorBits >> (uint64_t)16);
-        m_pencodingbitsA8->data.selectors4 = uint32_t(ulliSelectorBits >> (uint64_t)8);
-        m_pencodingbitsA8->data.selectors5 = uint32_t(ulliSelectorBits);
-    }
-
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-	Block4x4Encoding_RGBA8::Block4x4Encoding_RGBA8(void)
-	{
-	}
-	Block4x4Encoding_RGBA8::~Block4x4Encoding_RGBA8(void) {}
-
-	// ----------------------------------------------------------------------------------------------------
-	// initialization prior to encoding
-	// a_pblockParent points to the block associated with this encoding
-	// a_errormetric is used to choose the best encoding
-	// a_pafrgbaSource points to a 4x4 block subset of the source image
-	// a_paucEncodingBits points to the final encoding bits
-	//
-	void Block4x4Encoding_RGBA8::Encode(Block4x4 *a_pblockParent,
-												const ColorFloatRGBA *a_pafrgbaSource,
-												unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric)
-	{
-		Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource, a_errormetric, 0);
-        
-        // RGB stored after A8 block
-        m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)(a_paucEncodingBits + 8);
-
-        // Only need alpha channel passed down
-        m_alpha.Encode(a_pafrgbaSource, a_paucEncodingBits, a_pblockParent->GetSourceAlphaMix());
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// initialization from the encoding bits of a previous encoding
-	// a_pblockParent points to the block associated with this encoding
-	// a_errormetric is used to choose the best encoding
-	// a_pafrgbaSource points to a 4x4 block subset of the source image
-	// a_paucEncodingBits points to the final encoding bits of a previous encoding
-	//
-	void Block4x4Encoding_RGBA8::Decode(Block4x4 *a_pblockParent,
-														unsigned char *a_paucEncodingBits,
-														const ColorFloatRGBA *a_pafrgbaSource,
-														ErrorMetric a_errormetric,
-                                                        uint16_t iterationCount)
-	{
-        // this won't iterate, but alpha values available for error calc
-        // but not using alpha in error calc anymore, so doing after RGB8 decode
-        m_alpha.Decode(a_paucEncodingBits, a_pafrgbaSource);
-        
-		m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)(a_paucEncodingBits + 8);
-
-		// init RGB portion
-		Block4x4Encoding_RGB8::Decode(a_pblockParent,
-													(unsigned char *) m_pencodingbitsRGB8,
-													a_pafrgbaSource,
-                                                    a_errormetric,
-                                                    iterationCount);
-	}
-
-    void Block4x4Encoding_RGBA8::DecodeAlpha()
-    {
-        // API hack toe be able to fill in the decodedPixels from the already Decode called alpha
-        // this is so regular Decode path doesn't do this decode and slow down multipass
-        m_alpha.DecodeAlpha(&m_afrgbaDecodedColors[0].fA);
-    }
-
-
-	// ----------------------------------------------------------------------------------------------------
-	// perform a single encoding iteration
-	// replace the encoding if a better encoding was found
-	// subsequent iterations generally take longer for each iteration
-	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
-	//
-	// similar to Block4x4Encoding_RGB8_Base::Encode_RGB8(), but with alpha added
-	//
-	void Block4x4Encoding_RGBA8::PerformIteration(float a_fEffort)
-	{
-        // return if color and alpha done, note alpha only iterates on 0
-        if (m_boolDone && m_alpha.IsDone() )
-        {
-            return;
-        }
-        
-		if (m_uiEncodingIterations == 0)
-		{
-            m_alpha.PerformIteration(a_fEffort);
-            
-            /* TODO: can only do this if color if encoding premul color
-                but kram already knocks out all the color channels in this cae
-             
-            // this skips writing out color too
-            if (m_pblockParent->GetSourceAlphaMix() == Block4x4::SourceAlphaMix::TRANSPARENT)
-            {
-                m_mode = MODE_ETC1;
-                m_boolDiff = true;
-                m_boolFlip = false;
-
-                // none of these were cleared, like RGBA1 case
-                m_uiCW1 = 0;
-                m_uiCW2 = 0;
-
-                m_frgbaColor1 = ColorFloatRGBA();
-                m_frgbaColor2 = ColorFloatRGBA();
-                
-                for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-                {
-                    m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(); // assumes rgb also 0
-                    //m_afDecodedAlphas[uiPixel] = 0.0f;
-                }
-
-                m_fError = 0.0f;
-
-                // skip processing rgb
-                m_boolDone = true;
-                //m_uiEncodingIterations++;
-            }
-            */
-		}
-
-        if (!m_boolDone)
-        {
-            Block4x4Encoding_RGB8::PerformIteration(a_fEffort);
-        }
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the encoding bits based on encoding state
-	//
-	void Block4x4Encoding_RGBA8::SetEncodingBits(void)
-    {
-        // set the RGB8 portion
-        Block4x4Encoding_RGB8::SetEncodingBits();
-        
-        m_alpha.SetEncodingBits();
-	}
-}
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+EtcBlock4x4Encoding_RGBA8.cpp contains:
+	Block4x4Encoding_RGBA8
+	Block4x4Encoding_RGBA8_Opaque
+	Block4x4Encoding_RGBA8_Transparent
+
+These encoders are used when targetting file format RGBA8.
+
+Block4x4Encoding_RGBA8_Opaque is used when all pixels in the 4x4 block are opaque
+Block4x4Encoding_RGBA8_Transparent is used when all pixels in the 4x4 block are transparent
+Block4x4Encoding_RGBA8 is used when there is a mixture of alphas in the 4x4 block
+
+*/
+
+#include "EtcConfig.h"
+#include "EtcBlock4x4Encoding_RGBA8.h"
+
+#include "EtcBlock4x4EncodingBits.h"
+#include "EtcBlock4x4.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <float.h>
+#include <limits>
+//#include <algorithm>
+
+namespace Etc
+{
+    template<typename T>
+    T clamp(T value, T mn, T mx) {
+        return (value <= mn) ? mn : ((value >= mx) ? mx : value);
+    }
+
+	// ####################################################################################################
+	// Block4x4Encoding_RGBA8
+	// ####################################################################################################
+
+    static const unsigned int MODIFIER_TABLE_ENTRYS = 16;
+    static const unsigned int ALPHA_SELECTOR_BITS = 3;
+    static const unsigned int ALPHA_SELECTORS = 1 << ALPHA_SELECTOR_BITS;
+
+    // same selector table used for R11/G11/A8
+    static const int8_t s_aafModifierTable8[MODIFIER_TABLE_ENTRYS][ALPHA_SELECTORS]
+    {
+        { -3, -6,  -9, -15, 2, 5, 8, 14 },
+        { -3, -7, -10, -13, 2, 6, 9, 12 },
+        { -2, -5,  -8, -13, 1, 4, 7, 12 },
+        { -2, -4,  -6, -13, 1, 3, 5, 12 },
+
+        { -3, -6,  -8, -12, 2, 5, 7, 11 },
+        { -3, -7,  -9, -11, 2, 6, 8, 10 },
+        { -4, -7,  -8, -11, 3, 6, 7, 10 },
+        { -3, -5,  -8, -11, 2, 4, 7, 10 },
+
+        { -2, -6,  -8, -10, 1, 5, 7,  9 },
+        { -2, -5,  -8, -10, 1, 4, 7,  9 },
+        { -2, -4,  -8, -10, 1, 3, 7,  9 },
+        { -2, -5,  -7, -10, 1, 4, 6,  9 },
+
+        { -3, -4,  -7, -10, 2, 3, 6,  9 },
+        { -1, -2,  -3, -10, 0, 1, 2,  9 },
+        { -4, -6,  -8,  -9, 3, 5, 7,  8 },
+        { -3, -5,  -7,  -9, 2, 4, 6,  8 }
+    };
+
+    inline float DecodePixelAlpha(float a_fBase, float a_fMultiplier,
+                                    unsigned int a_uiTableIndex, unsigned int a_uiSelector)
+    {
+        float fPixelAlpha = (a_fBase +
+                            a_fMultiplier * s_aafModifierTable8[a_uiTableIndex][a_uiSelector]) / 255.0f;
+        if (fPixelAlpha < 0.0f)
+        {
+            fPixelAlpha = 0.0f;
+        }
+        else if (fPixelAlpha > 1.0f)
+        {
+            fPixelAlpha = 1.0f;
+        }
+
+        return fPixelAlpha;
+    }
+
+    inline int DecodePixelAlphaInt(int a_fBase, int a_fMultiplier,
+                                    unsigned int a_uiTableIndex, unsigned int a_uiSelector)
+    {
+        int fPixelAlpha = a_fBase +
+                            a_fMultiplier * s_aafModifierTable8[a_uiTableIndex][a_uiSelector];
+        
+        return clamp(fPixelAlpha, 0, 255);
+    }
+
+
+
+    Block4x4Encoding_A8::Block4x4Encoding_A8(void)
+    {
+        m_pencodingbitsA8 = nullptr;
+        m_pafrgbaSource = nullptr;
+    }
+
+    Block4x4Encoding_A8::~Block4x4Encoding_A8(void) {}
+
+    void Block4x4Encoding_A8::Encode(const ColorFloatRGBA *a_pafrgbaSource,
+                                     unsigned char *a_paucEncodingBits,
+                                     Block4x4::SourceAlphaMix sourceAlphaMix)
+    {
+        m_pafrgbaSource = a_pafrgbaSource;
+        
+        m_boolDone = false;
+        
+        // really only care about error for one iteration
+        //m_fError = FLT_MAX;
+        
+        m_pencodingbitsA8 = (Block4x4EncodingBits_A8 *)a_paucEncodingBits;
+        
+        if (sourceAlphaMix == Block4x4::SourceAlphaMix::OPAQUE)
+        {
+            // set the A8 portion
+            m_fBase = 255;
+            m_uiModifierTableIndex = 15;
+            m_fMultiplier = 15;
+            
+            // set all selectors to 7 (all bits set)
+            for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+            {
+                m_auiAlphaSelectors[uiPixel] = 7;
+            }
+
+            m_boolDone = true;
+        }
+        else if ((sourceAlphaMix == Block4x4::SourceAlphaMix::ALL_ZERO_ALPHA) ||
+                 (sourceAlphaMix == Block4x4::SourceAlphaMix::TRANSPARENT))
+        {
+            // set the A8 portion
+            m_fBase = 0;
+            m_uiModifierTableIndex = 0;
+            m_fMultiplier = 1;
+            
+            // set all selectors to 0
+            for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+            {
+                m_auiAlphaSelectors[uiPixel] = 0;
+            }
+
+            m_boolDone = true;
+        }
+    }
+
+    // A8 always finished in one iterations, but error metrics on rgb iteration may need the alpha values
+    // in an error metric. Skip this if alpha not part of the metric.
+    void Block4x4Encoding_A8::Decode(unsigned char *a_paucEncodingBits,
+                                     const ColorFloatRGBA *a_pafrgbaSource)
+    {
+        // Note: this is really just decoding to write this exact same data out
+        
+        m_pafrgbaSource = a_pafrgbaSource; // don't really need to hold this
+        m_pencodingbitsA8 = (Block4x4EncodingBits_A8 *)a_paucEncodingBits;
+    
+        m_fBase = m_pencodingbitsA8->data.base;
+        m_fMultiplier = m_pencodingbitsA8->data.multiplier;
+        m_uiModifierTableIndex = m_pencodingbitsA8->data.table;
+    
+        uint64_t ulliSelectorBits = 0;
+        ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors0 << (uint64_t)40;
+        ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors1 << (uint64_t)32;
+        ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors2 << (uint64_t)24;
+        ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors3 << (uint64_t)16;
+        ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors4 << (uint64_t)8;
+        ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors5;
+    
+        for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+        {
+            uint64_t uiShift = 45 - (3 * uiPixel);
+            m_auiAlphaSelectors[uiPixel] = (ulliSelectorBits >> uiShift) & (uint64_t)(ALPHA_SELECTORS - 1);
+        }
+    
+        //Encode(a_pafrgbaSource, a_paucEncodingBits, sourceAlphaMix);
+        
+        // no iteration on A8, it's all done in after first PerformIteration
+        m_boolDone = true;
+        
+        // no error calc since this doesn't iterate, it's already resolved alpha
+    }
+
+    void Block4x4Encoding_A8::DecodeAlpha(float* decodedPixels)
+    {
+//        m_pencodingbitsA8 = (Block4x4EncodingBits_A8 *)a_paucEncodingBits;
+//
+//        m_fBase = m_pencodingbitsA8->data.base;
+//        m_fMultiplier = m_pencodingbitsA8->data.multiplier;
+//        m_uiModifierTableIndex = m_pencodingbitsA8->data.table;
+//
+//        uint64_t ulliSelectorBits = 0;
+//        ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors0 << (uint64_t)40;
+//        ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors1 << (uint64_t)32;
+//        ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors2 << (uint64_t)24;
+//        ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors3 << (uint64_t)16;
+//        ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors4 << (uint64_t)8;
+//        ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors5;
+//
+//        for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+//        {
+//            uint64_t uiShift = 45 - (3 * uiPixel);
+//            m_auiAlphaSelectors[uiPixel] = (ulliSelectorBits >> uiShift) & (uint64_t)(ALPHA_SELECTORS - 1);
+//        }
+
+        for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+        {
+            // this is float version of decode
+            float pixel = DecodePixelAlpha(m_fBase, m_fMultiplier,
+                m_uiModifierTableIndex,
+                m_auiAlphaSelectors[uiPixel]);
+
+            decodedPixels[4 * uiPixel] = pixel;
+        }
+    }
+
+    void Block4x4Encoding_A8::PerformIteration(float a_fEffort)
+    {
+        if (m_boolDone)
+        {
+            return;
+        }
+
+        
+        // 0, 1, 2 pixel radius all done in iteration 0, only
+        // rgb is iterated on over multiple passes.
+        if (a_fEffort < 24.9f)
+        {
+            CalculateA8(0);
+        }
+        else if (a_fEffort < 49.9f)
+        {
+            CalculateA8(1);
+        }
+        else
+        {
+            CalculateA8(2);
+        }
+        
+        m_boolDone = true;
+    }
+
+    void Block4x4Encoding_A8::CalculateA8(int a_fRadius)
+    {
+        float m_fError = FLT_MAX;
+        
+        // This code is similiar to CalculateR11.  And it's all very slow doing brute force
+        // searches over a large nested for loop space.
+        uint8_t srcAlpha[PIXELS];
+        
+        // find min/max alpha
+        int fMinAlpha = 255;
+        int fMaxAlpha = 0;
+        for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+        {
+            int fAlpha = (int)roundf(255.0f * m_pafrgbaSource[uiPixel].fA);
+            if (fAlpha < fMinAlpha)
+            {
+                fMinAlpha = fAlpha;
+            }
+            if (fAlpha > fMaxAlpha)
+            {
+                fMaxAlpha = fAlpha;
+            }
+            
+            srcAlpha[uiPixel] = fAlpha;
+        }
+        
+        assert(fMinAlpha >= 0);
+        assert(fMaxAlpha <= 255);
+        assert(fMinAlpha <= fMaxAlpha);
+
+        int fAlphaRange = fMaxAlpha - fMinAlpha;
+        
+        // fast path if range 0 (constant alpha), no point in all this iteration
+        if (fAlphaRange == 0)
+        {
+            a_fRadius = 0;
+        }
+        
+        // try each modifier table entry
+        //m_fError = FLT_MAX;        // artificially high value
+        for (int uiTableEntry = 0; uiTableEntry < (int)MODIFIER_TABLE_ENTRYS; uiTableEntry++)
+        {
+            static const unsigned int MIN_VALUE_SELECTOR = 3;
+            static const unsigned int MAX_VALUE_SELECTOR = 7;
+
+            int fTableEntryCenter = -s_aafModifierTable8[uiTableEntry][MIN_VALUE_SELECTOR];
+
+            int fTableEntryRange = s_aafModifierTable8[uiTableEntry][MAX_VALUE_SELECTOR] -
+                s_aafModifierTable8[uiTableEntry][MIN_VALUE_SELECTOR];
+
+            float fCenterRatio = fTableEntryCenter / (float)fTableEntryRange;
+
+            int fCenterInt = (int)roundf(fMinAlpha + fCenterRatio * fAlphaRange);
+            //int fCenterInt = roundf(fCenter);
+
+            int fMinBase = fCenterInt - a_fRadius;
+            int fMaxBase = fCenterInt + a_fRadius;
+            
+            if (fMinBase < 0)
+            {
+                fMinBase = 0;
+            }
+            if (fMaxBase > 255)
+            {
+                fMaxBase = 255;
+            }
+
+            // 255 range / usp to 29
+            int fRangeMultiplier = (int)roundf(fAlphaRange / (float)fTableEntryRange);
+
+            int fMinMultiplier = clamp(fRangeMultiplier - a_fRadius, 1, 15); // no 0 case like on R11
+            int fMaxMultiplier = clamp(fRangeMultiplier + a_fRadius, 1, 15);
+            
+            int auiBestSelectors[PIXELS];
+            int afBestAlphaError[PIXELS];
+            int afBestDecodedAlphas[PIXELS];
+            
+            for (int fBase = fMinBase; fBase <= fMaxBase; fBase++)
+            {
+                for (int fMultiplier = fMinMultiplier; fMultiplier <= fMaxMultiplier; fMultiplier++)
+                {
+                    // find best selector for each pixel
+                    
+                    for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+                    {
+                        int fBestPixelAlphaError = 255 * 255;
+                        for (int uiSelector = 0; uiSelector < (int)ALPHA_SELECTORS; uiSelector++)
+                        {
+                            int fDecodedAlpha = DecodePixelAlphaInt(fBase, fMultiplier, uiTableEntry, uiSelector);
+
+                            // pixelError = dA ^ 2
+                            int fPixelDeltaAlpha = fDecodedAlpha - (int)srcAlpha[uiPixel];
+                            int fPixelAlphaError = fPixelDeltaAlpha * fPixelDeltaAlpha;
+
+                            if (fPixelAlphaError < fBestPixelAlphaError)
+                            {
+                                fBestPixelAlphaError = fPixelAlphaError;
+                                auiBestSelectors[uiPixel] = uiSelector;
+                                afBestAlphaError[uiPixel] = fBestPixelAlphaError;
+                                afBestDecodedAlphas[uiPixel] = fDecodedAlpha;
+                            }
+                        }
+                    }
+
+                    // accumlate pixel error into block error, sum(da^2)
+                    int fBlockError = 0;
+                    for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+                    {
+                        fBlockError += afBestAlphaError[uiPixel];
+                    }
+
+                    if (m_fError > (float)fBlockError)
+                    {
+                        m_fError = (float)fBlockError;
+
+                        m_fBase = fBase;
+                        m_fMultiplier = fMultiplier;
+                        m_uiModifierTableIndex = uiTableEntry;
+                        
+                        for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+                        {
+                            m_auiAlphaSelectors[uiPixel] = auiBestSelectors[uiPixel];
+                            
+                            //m_afDecodedAlphas[uiPixel] = afBestDecodedAlphas[uiPixel] / 255.0f;
+                        }
+                        
+                        // stop the iteration if tolerance is low enough
+                        const int kErrorTolerance = 16 * 1 * 1;
+                        if (fBlockError <= kErrorTolerance) {
+                            return;
+                        }
+                    }
+                }
+            }
+
+        }
+
+    }
+
+    // ----------------------------------------------------------------------------------------------------
+    // set the encoding bits based on encoding state
+    //
+    void Block4x4Encoding_A8::SetEncodingBits(void)
+    {
+        // set the A8 portion
+        m_pencodingbitsA8->data.base = (uint8_t)roundf(/*255.0f * */ m_fBase);
+        m_pencodingbitsA8->data.table = m_uiModifierTableIndex;
+        m_pencodingbitsA8->data.multiplier = (uint8_t)roundf(m_fMultiplier);
+
+        uint64_t ulliSelectorBits = 0;
+        for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+        {
+            uint64_t uiShift = 45 - (3 * uiPixel);
+            ulliSelectorBits |= ((uint64_t)m_auiAlphaSelectors[uiPixel]) << uiShift;
+        }
+
+        m_pencodingbitsA8->data.selectors0 = uint32_t(ulliSelectorBits >> (uint64_t)40);
+        m_pencodingbitsA8->data.selectors1 = uint32_t(ulliSelectorBits >> (uint64_t)32);
+        m_pencodingbitsA8->data.selectors2 = uint32_t(ulliSelectorBits >> (uint64_t)24);
+        m_pencodingbitsA8->data.selectors3 = uint32_t(ulliSelectorBits >> (uint64_t)16);
+        m_pencodingbitsA8->data.selectors4 = uint32_t(ulliSelectorBits >> (uint64_t)8);
+        m_pencodingbitsA8->data.selectors5 = uint32_t(ulliSelectorBits);
+    }
+
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	Block4x4Encoding_RGBA8::Block4x4Encoding_RGBA8(void)
+	{
+	}
+	Block4x4Encoding_RGBA8::~Block4x4Encoding_RGBA8(void) {}
+
+	// ----------------------------------------------------------------------------------------------------
+	// initialization prior to encoding
+	// a_pblockParent points to the block associated with this encoding
+	// a_errormetric is used to choose the best encoding
+	// a_pafrgbaSource points to a 4x4 block subset of the source image
+	// a_paucEncodingBits points to the final encoding bits
+	//
+	void Block4x4Encoding_RGBA8::Encode(Block4x4 *a_pblockParent,
+												const ColorFloatRGBA *a_pafrgbaSource,
+												unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric)
+	{
+		Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource, a_errormetric, 0);
+        
+        // RGB stored after A8 block
+        m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)(a_paucEncodingBits + 8);
+
+        // Only need alpha channel passed down
+        m_alpha.Encode(a_pafrgbaSource, a_paucEncodingBits, a_pblockParent->GetSourceAlphaMix());
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// initialization from the encoding bits of a previous encoding
+	// a_pblockParent points to the block associated with this encoding
+	// a_errormetric is used to choose the best encoding
+	// a_pafrgbaSource points to a 4x4 block subset of the source image
+	// a_paucEncodingBits points to the final encoding bits of a previous encoding
+	//
+	void Block4x4Encoding_RGBA8::Decode(Block4x4 *a_pblockParent,
+														unsigned char *a_paucEncodingBits,
+														const ColorFloatRGBA *a_pafrgbaSource,
+														ErrorMetric a_errormetric,
+                                                        uint16_t iterationCount)
+	{
+        // this won't iterate, but alpha values available for error calc
+        // but not using alpha in error calc anymore, so doing after RGB8 decode
+        m_alpha.Decode(a_paucEncodingBits, a_pafrgbaSource);
+        
+		m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)(a_paucEncodingBits + 8);
+
+		// init RGB portion
+		Block4x4Encoding_RGB8::Decode(a_pblockParent,
+													(unsigned char *) m_pencodingbitsRGB8,
+													a_pafrgbaSource,
+                                                    a_errormetric,
+                                                    iterationCount);
+	}
+
+    void Block4x4Encoding_RGBA8::DecodeAlpha()
+    {
+        // API hack toe be able to fill in the decodedPixels from the already Decode called alpha
+        // this is so regular Decode path doesn't do this decode and slow down multipass
+        m_alpha.DecodeAlpha(&m_afrgbaDecodedColors[0].fA);
+    }
+
+
+	// ----------------------------------------------------------------------------------------------------
+	// perform a single encoding iteration
+	// replace the encoding if a better encoding was found
+	// subsequent iterations generally take longer for each iteration
+	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
+	//
+	// similar to Block4x4Encoding_RGB8_Base::Encode_RGB8(), but with alpha added
+	//
+	void Block4x4Encoding_RGBA8::PerformIteration(float a_fEffort)
+	{
+        // return if color and alpha done, note alpha only iterates on 0
+        if (m_boolDone && m_alpha.IsDone() )
+        {
+            return;
+        }
+        
+		if (m_uiEncodingIterations == 0)
+		{
+            m_alpha.PerformIteration(a_fEffort);
+            
+            /* TODO: can only do this if color if encoding premul color
+                but kram already knocks out all the color channels in this cae
+             
+            // this skips writing out color too
+            if (m_pblockParent->GetSourceAlphaMix() == Block4x4::SourceAlphaMix::TRANSPARENT)
+            {
+                m_mode = MODE_ETC1;
+                m_boolDiff = true;
+                m_boolFlip = false;
+
+                // none of these were cleared, like RGBA1 case
+                m_uiCW1 = 0;
+                m_uiCW2 = 0;
+
+                m_frgbaColor1 = ColorFloatRGBA();
+                m_frgbaColor2 = ColorFloatRGBA();
+                
+                for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+                {
+                    m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(); // assumes rgb also 0
+                    //m_afDecodedAlphas[uiPixel] = 0.0f;
+                }
+
+                m_fError = 0.0f;
+
+                // skip processing rgb
+                m_boolDone = true;
+                //m_uiEncodingIterations++;
+            }
+            */
+		}
+
+        if (!m_boolDone)
+        {
+            Block4x4Encoding_RGB8::PerformIteration(a_fEffort);
+        }
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the encoding bits based on encoding state
+	//
+	void Block4x4Encoding_RGBA8::SetEncodingBits(void)
+    {
+        // set the RGB8 portion
+        Block4x4Encoding_RGB8::SetEncodingBits();
+        
+        m_alpha.SetEncodingBits();
+	}
+}
diff --git a/libkram/etc2comp/EtcBlock4x4Encoding_RGBA8.h b/libkram/etc2comp/EtcBlock4x4Encoding_RGBA8.h
index 9602ff0b..7439fea8 100644
--- a/libkram/etc2comp/EtcBlock4x4Encoding_RGBA8.h
+++ b/libkram/etc2comp/EtcBlock4x4Encoding_RGBA8.h
@@ -1,100 +1,100 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "EtcBlock4x4Encoding_RGB8.h"
-
-#include "EtcBlock4x4.h" // for SourceAlphaMix
-
-namespace Etc
-{
-	class Block4x4EncodingBits_A8;
-
-	// ################################################################################
-	// Block4x4Encoding_RGBA8
-	// RGBA8 if not completely opaque or transparent
-	// ################################################################################
-
-    // Encoder for the A8 portion of RGBA.  Minimizes error in a single iteration.
-    class Block4x4Encoding_A8
-    {
-    public:
-        Block4x4Encoding_A8(void);
-        ~Block4x4Encoding_A8(void);
-        
-        void Encode(const ColorFloatRGBA *a_pafrgbaSource,
-                    unsigned char *a_paucEncodingBits,
-                    Block4x4::SourceAlphaMix sourceAlphaMix);
-        
-        void Decode(unsigned char *a_paucEncodingBits,
-                    const ColorFloatRGBA *a_pafrgbaSource);
-        
-        void DecodeAlpha(float *decodedPixels);
-        
-        void PerformIteration(float a_fEffort);
-        void CalculateA8(int a_fRadius);
-        void SetEncodingBits(void);
-        
-        bool IsDone() const { return m_boolDone; }
-        
-    private:
-        static const int PIXELS = 16;
-        
-        Block4x4EncodingBits_A8 *m_pencodingbitsA8;
-        
-        // float* m_afDecodedAlphas; // alias to parent array
-        //Block4x4::SourceAlphaMix m_sourceAlphaMix;
-        
-        const ColorFloatRGBA* m_pafrgbaSource;
-        
-        uint8_t m_fBase;
-        uint8_t m_fMultiplier;
-        uint8_t m_uiModifierTableIndex;
-        uint8_t m_auiAlphaSelectors[PIXELS];
-        
-        bool m_boolDone;
-    };
-
-    // This basically combines RGBA8 encoder with A8 encoder
-	class Block4x4Encoding_RGBA8 : public Block4x4Encoding_RGB8
-	{
-	public:
-
-		Block4x4Encoding_RGBA8(void);
-		virtual ~Block4x4Encoding_RGBA8(void);
-
-		virtual void Encode(Block4x4 *a_pblockParent,
-                            const ColorFloatRGBA *a_pafrgbaSource,
-                            unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric) override;
-
-		virtual void Decode(Block4x4 *a_pblockParent,
-                            unsigned char *a_paucEncodingBits,
-                            const ColorFloatRGBA *a_pafrgbaSource,
-                            ErrorMetric a_errormetric,
-                            uint16_t iterationCount) override;
-
-        virtual void DecodeAlpha() override;
-        
-		virtual void PerformIteration(float a_fEffort) override;
-
-		virtual void SetEncodingBits(void) override;
-
-	private:
-        Block4x4Encoding_A8 m_alpha;
-	};
-
-} // namespace Etc
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "EtcBlock4x4Encoding_RGB8.h"
+
+#include "EtcBlock4x4.h" // for SourceAlphaMix
+
+namespace Etc
+{
+	class Block4x4EncodingBits_A8;
+
+	// ################################################################################
+	// Block4x4Encoding_RGBA8
+	// RGBA8 if not completely opaque or transparent
+	// ################################################################################
+
+    // Encoder for the A8 portion of RGBA.  Minimizes error in a single iteration.
+    class Block4x4Encoding_A8
+    {
+    public:
+        Block4x4Encoding_A8(void);
+        ~Block4x4Encoding_A8(void);
+        
+        void Encode(const ColorFloatRGBA *a_pafrgbaSource,
+                    unsigned char *a_paucEncodingBits,
+                    Block4x4::SourceAlphaMix sourceAlphaMix);
+        
+        void Decode(unsigned char *a_paucEncodingBits,
+                    const ColorFloatRGBA *a_pafrgbaSource);
+        
+        void DecodeAlpha(float *decodedPixels);
+        
+        void PerformIteration(float a_fEffort);
+        void CalculateA8(int a_fRadius);
+        void SetEncodingBits(void);
+        
+        bool IsDone() const { return m_boolDone; }
+        
+    private:
+        static const int PIXELS = 16;
+        
+        Block4x4EncodingBits_A8 *m_pencodingbitsA8;
+        
+        // float* m_afDecodedAlphas; // alias to parent array
+        //Block4x4::SourceAlphaMix m_sourceAlphaMix;
+        
+        const ColorFloatRGBA* m_pafrgbaSource;
+        
+        uint8_t m_fBase;
+        uint8_t m_fMultiplier;
+        uint8_t m_uiModifierTableIndex;
+        uint8_t m_auiAlphaSelectors[PIXELS];
+        
+        bool m_boolDone;
+    };
+
+    // This basically combines RGBA8 encoder with A8 encoder
+	class Block4x4Encoding_RGBA8 : public Block4x4Encoding_RGB8
+	{
+	public:
+
+		Block4x4Encoding_RGBA8(void);
+		virtual ~Block4x4Encoding_RGBA8(void);
+
+		virtual void Encode(Block4x4 *a_pblockParent,
+                            const ColorFloatRGBA *a_pafrgbaSource,
+                            unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric) override;
+
+		virtual void Decode(Block4x4 *a_pblockParent,
+                            unsigned char *a_paucEncodingBits,
+                            const ColorFloatRGBA *a_pafrgbaSource,
+                            ErrorMetric a_errormetric,
+                            uint16_t iterationCount) override;
+
+        virtual void DecodeAlpha() override;
+        
+		virtual void PerformIteration(float a_fEffort) override;
+
+		virtual void SetEncodingBits(void) override;
+
+	private:
+        Block4x4Encoding_A8 m_alpha;
+	};
+
+} // namespace Etc
diff --git a/libkram/etc2comp/EtcColor.h b/libkram/etc2comp/EtcColor.h
index a4c40fb9..fff15cf0 100644
--- a/libkram/etc2comp/EtcColor.h
+++ b/libkram/etc2comp/EtcColor.h
@@ -1,66 +1,66 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
-#pragma once
-
-#include <math.h>
-
-namespace Etc
-{
-
-	inline float LogToLinear(float a_fLog)
-	{
-		static const float ALPHA = 0.055f;
-		static const float ONE_PLUS_ALPHA = 1.0f + ALPHA;
-
-		if (a_fLog <= 0.04045f)
-		{
-			return a_fLog / 12.92f;
-		}
-		else
-		{
-			return powf((a_fLog + ALPHA) / ONE_PLUS_ALPHA, 2.4f);
-		}
-	}
-
-	inline float LinearToLog(float a_fLinear)
-	{
-		static const float ALPHA = 0.055f;
-		static const float ONE_PLUS_ALPHA = 1.0f + ALPHA;
-
-		if (a_fLinear <= 0.0031308f)
-		{
-			return 12.92f * a_fLinear;
-		}
-		else
-		{
-			return ONE_PLUS_ALPHA * powf(a_fLinear, (1.0f/2.4f)) - ALPHA;
-		}
-	}
-
-	class ColorR8G8B8A8
-	{
-	public:
-
-		unsigned char ucR;
-		unsigned char ucG;
-		unsigned char ucB;
-		unsigned char ucA;
-
-	};
-}
-*/
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+#pragma once
+
+#include <math.h>
+
+namespace Etc
+{
+
+	inline float LogToLinear(float a_fLog)
+	{
+		static const float ALPHA = 0.055f;
+		static const float ONE_PLUS_ALPHA = 1.0f + ALPHA;
+
+		if (a_fLog <= 0.04045f)
+		{
+			return a_fLog / 12.92f;
+		}
+		else
+		{
+			return powf((a_fLog + ALPHA) / ONE_PLUS_ALPHA, 2.4f);
+		}
+	}
+
+	inline float LinearToLog(float a_fLinear)
+	{
+		static const float ALPHA = 0.055f;
+		static const float ONE_PLUS_ALPHA = 1.0f + ALPHA;
+
+		if (a_fLinear <= 0.0031308f)
+		{
+			return 12.92f * a_fLinear;
+		}
+		else
+		{
+			return ONE_PLUS_ALPHA * powf(a_fLinear, (1.0f/2.4f)) - ALPHA;
+		}
+	}
+
+	class ColorR8G8B8A8
+	{
+	public:
+
+		unsigned char ucR;
+		unsigned char ucG;
+		unsigned char ucB;
+		unsigned char ucA;
+
+	};
+}
+*/
diff --git a/libkram/etc2comp/EtcColorFloatRGBA.h b/libkram/etc2comp/EtcColorFloatRGBA.h
index 162debc5..d387763c 100644
--- a/libkram/etc2comp/EtcColorFloatRGBA.h
+++ b/libkram/etc2comp/EtcColorFloatRGBA.h
@@ -1,316 +1,316 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "EtcConfig.h"
-//#include "EtcColor.h"
-
-#include <math.h>
-
-namespace Etc
-{
-    inline float LogToLinear(float a_fLog)
-    {
-        static const float ALPHA = 0.055f;
-        static const float ONE_PLUS_ALPHA = 1.0f + ALPHA;
-
-        if (a_fLog <= 0.04045f)
-        {
-            return a_fLog / 12.92f;
-        }
-        else
-        {
-            return powf((a_fLog + ALPHA) / ONE_PLUS_ALPHA, 2.4f);
-        }
-    }
-
-    inline float LinearToLog(float a_fLinear)
-    {
-        static const float ALPHA = 0.055f;
-        static const float ONE_PLUS_ALPHA = 1.0f + ALPHA;
-
-        if (a_fLinear <= 0.0031308f)
-        {
-            return 12.92f * a_fLinear;
-        }
-        else
-        {
-            return ONE_PLUS_ALPHA * powf(a_fLinear, (1.0f/2.4f)) - ALPHA;
-        }
-    }
-
-    class ColorR8G8B8A8
-    {
-    public:
-
-        uint8_t ucR;
-        uint8_t ucG;
-        uint8_t ucB;
-        uint8_t ucA;
-
-    };
-
-	class ColorFloatRGBA
-    {
-    public:
-
-		ColorFloatRGBA(void)
-        {
-            fR = fG = fB = fA = 0.0f;
-        }
-
-		ColorFloatRGBA(float a_fR, float a_fG, float a_fB, float a_fA)
-        {
-            fR = a_fR;
-            fG = a_fG;
-            fB = a_fB;
-            fA = a_fA;
-        }
-
-		inline ColorFloatRGBA operator+(const ColorFloatRGBA& a_rfrgba) const
-		{
-			ColorFloatRGBA frgba;
-			frgba.fR = fR + a_rfrgba.fR;
-			frgba.fG = fG + a_rfrgba.fG;
-			frgba.fB = fB + a_rfrgba.fB;
-			frgba.fA = fA + a_rfrgba.fA;
-			return frgba;
-		}
-
-        inline ColorFloatRGBA operator-(const ColorFloatRGBA& a_rfrgba) const
-        {
-            ColorFloatRGBA frgba;
-            frgba.fR = fR - a_rfrgba.fR;
-            frgba.fG = fG - a_rfrgba.fG;
-            frgba.fB = fB - a_rfrgba.fB;
-            frgba.fA = fA - a_rfrgba.fA;
-            return frgba;
-        }
-
-        // scalar ops don't apply to alpha
-		inline ColorFloatRGBA operator+(float a_f) const
-		{
-			ColorFloatRGBA frgba;
-			frgba.fR = fR + a_f;
-			frgba.fG = fG + a_f;
-			frgba.fB = fB + a_f;
-			frgba.fA = fA;
-			return frgba;
-		}
-
-        // scalar ops don't apply to alpha
-        inline ColorFloatRGBA operator-(float a_f) const
-		{
-            return *this + (-a_f);
-		}
-
-		
-        // scalar ops don't apply to alpha
-		inline ColorFloatRGBA operator*(float a_f) const
-		{
-            return ScaleRGB(a_f);
-		}
-
-		inline ColorFloatRGBA ScaleRGB(float a_f) const
-		{
-			ColorFloatRGBA frgba;
-			frgba.fR = fR * a_f;
-            frgba.fG = fG * a_f;
-            frgba.fB = fB * a_f;
-			frgba.fA = fA;
-
-			return frgba;
-		}
-
-		inline ColorFloatRGBA RoundRGB(void) const
-		{
-			ColorFloatRGBA frgba;
-			frgba.fR = roundf(fR);
-			frgba.fG = roundf(fG);
-			frgba.fB = roundf(fB);
-            frgba.fA = fA; // was missing in original
-            
-			return frgba;
-		}
-
-		inline ColorFloatRGBA ToLinear() const
-		{
-			ColorFloatRGBA frgbaLinear;
-			frgbaLinear.fR = LogToLinear(fR);
-			frgbaLinear.fG = LogToLinear(fG);
-			frgbaLinear.fB = LogToLinear(fB);
-			frgbaLinear.fA = fA;
-
-			return frgbaLinear;
-		}
-
-		inline ColorFloatRGBA ToLog(void) const
-		{
-			ColorFloatRGBA frgbaLog;
-			frgbaLog.fR = LinearToLog(fR);
-			frgbaLog.fG = LinearToLog(fG);
-			frgbaLog.fB = LinearToLog(fB);
-			frgbaLog.fA = fA;
-
-			return frgbaLog;
-		}
-
-		inline static ColorFloatRGBA ConvertFromRGBA8(uint8_t a_ucR,
-			uint8_t a_ucG, uint8_t a_ucB, uint8_t a_ucA)
-		{
-			ColorFloatRGBA frgba;
-
-			frgba.fR = (float)a_ucR / 255.0f;
-			frgba.fG = (float)a_ucG / 255.0f;
-			frgba.fB = (float)a_ucB / 255.0f;
-			frgba.fA = (float)a_ucA / 255.0f;
-
-			return frgba;
-		}
-
-        inline static ColorFloatRGBA ConvertFromRGBA8(const ColorR8G8B8A8& color)
-        {
-            return ConvertFromRGBA8(color.ucR, color.ucG, color.ucB, color.ucA);
-        }
-        
-		inline static ColorFloatRGBA ConvertFromRGB4(uint8_t a_ucR4,
-														uint8_t a_ucG4,
-														uint8_t a_ucB4, uint8_t a_ucA = 255)
-		{
-			uint8_t ucR8 = (uint8_t)((a_ucR4 << 4) + a_ucR4);
-			uint8_t ucG8 = (uint8_t)((a_ucG4 << 4) + a_ucG4);
-			uint8_t ucB8 = (uint8_t)((a_ucB4 << 4) + a_ucB4);
-
-            return ConvertFromRGBA8(ucR8, ucG8, ucB8, a_ucA);
-		}
-
-		inline static ColorFloatRGBA ConvertFromRGB5(uint8_t a_ucR5,
-			uint8_t a_ucG5,
-			uint8_t a_ucB5, uint8_t a_ucA = 255)
-		{
-			uint8_t ucR8 = (uint8_t)((a_ucR5 << 3) + (a_ucR5 >> 2));
-			uint8_t ucG8 = (uint8_t)((a_ucG5 << 3) + (a_ucG5 >> 2));
-			uint8_t ucB8 = (uint8_t)((a_ucB5 << 3) + (a_ucB5 >> 2));
-
-            return ConvertFromRGBA8(ucR8, ucG8, ucB8, a_ucA);
-		}
-
-		inline static ColorFloatRGBA ConvertFromR6G7B6(uint8_t a_ucR6,
-			uint8_t a_ucG7,
-			uint8_t a_ucB6, uint8_t a_ucA = 255)
-		{
-			uint8_t ucR8 = (uint8_t)((a_ucR6 << 2) + (a_ucR6 >> 4));
-			uint8_t ucG8 = (uint8_t)((a_ucG7 << 1) + (a_ucG7 >> 6));
-			uint8_t ucB8 = (uint8_t)((a_ucB6 << 2) + (a_ucB6 >> 4));
-
-            return ConvertFromRGBA8(ucR8, ucG8, ucB8, a_ucA);
-		}
-
-		// quantize to 4 bits, expand to 8 bits
-		inline ColorFloatRGBA QuantizeR4G4B4(void) const
-		{
-			ColorFloatRGBA frgba = ClampRGB();
-
-			// quantize to 4 bits
-			frgba = frgba.ScaleRGB(15.0f).RoundRGB();
-			uint32_t uiR4 = (uint32_t)frgba.fR;
-            uint32_t uiG4 = (uint32_t)frgba.fG;
-            uint32_t uiB4 = (uint32_t)frgba.fB;
-
-            frgba = ConvertFromRGB4(uiR4, uiG4, uiB4);
-            frgba.fA = fA;
-
-			return frgba;
-		}
-
-		// quantize to 5 bits, expand to 8 bits
-		inline ColorFloatRGBA QuantizeR5G5B5(void) const
-		{
-			ColorFloatRGBA frgba = ClampRGBA();
-
-			// quantize to 5 bits
-			frgba = frgba.ScaleRGB(31.0f).RoundRGB();
-            uint32_t uiR5 = (uint32_t)frgba.fR;
-            uint32_t uiG5 = (uint32_t)frgba.fG;
-            uint32_t uiB5 = (uint32_t)frgba.fB;
-
-            frgba = ConvertFromRGB5(uiR5, uiG5, uiB5);
-            frgba.fA = fA;
-			return frgba;
-		}
-
-		// quantize to 6/7/6 bits, expand to 8 bits
-		inline ColorFloatRGBA QuantizeR6G7B6(void) const
-		{
-			ColorFloatRGBA frgba = ClampRGBA();
-
-			// quantize to 6/7/6 bits
-			uint32_t uiR6 = (uint32_t)frgba.IntRed(63.0f);
-            uint32_t uiG7 = (uint32_t)frgba.IntGreen(127.0f);
-            uint32_t uiB6 = (uint32_t)frgba.IntBlue(63.0f);
-
-            frgba = ConvertFromR6G7B6(uiR6, uiG7, uiB6);
-            frgba.fA = fA;
-            
-			return frgba;
-		}
-
-		inline ColorFloatRGBA ClampRGB(void) const
-		{
-            return ClampRGBA();
-		}
-
-		inline ColorFloatRGBA ClampRGBA(void) const
-		{
-			ColorFloatRGBA frgba = *this;
-			if (frgba.fR < 0.0f) { frgba.fR = 0.0f; }
-			if (frgba.fR > 1.0f) { frgba.fR = 1.0f; }
-			if (frgba.fG < 0.0f) { frgba.fG = 0.0f; }
-			if (frgba.fG > 1.0f) { frgba.fG = 1.0f; }
-			if (frgba.fB < 0.0f) { frgba.fB = 0.0f; }
-			if (frgba.fB > 1.0f) { frgba.fB = 1.0f; }
-			if (frgba.fA < 0.0f) { frgba.fA = 0.0f; }
-			if (frgba.fA > 1.0f) { frgba.fA = 1.0f; }
-
-			return frgba;
-		}
-
-		inline int IntRed(float a_fScale) const
-		{
-			return (int)roundf(fR * a_fScale);
-		}
-
-		inline int IntGreen(float a_fScale) const
-		{
-			return (int)roundf(fG * a_fScale);
-		}
-
-		inline int IntBlue(float a_fScale) const
-		{
-			return (int)roundf(fB * a_fScale);
-		}
-
-		inline int IntAlpha(float a_fScale) const
-		{
-			return (int)roundf(fA * a_fScale);
-		}
-
-		float	fR, fG, fB, fA;
-    };
-
-}
-
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "EtcConfig.h"
+//#include "EtcColor.h"
+
+#include <math.h>
+
+namespace Etc
+{
+    inline float LogToLinear(float a_fLog)
+    {
+        static const float ALPHA = 0.055f;
+        static const float ONE_PLUS_ALPHA = 1.0f + ALPHA;
+
+        if (a_fLog <= 0.04045f)
+        {
+            return a_fLog / 12.92f;
+        }
+        else
+        {
+            return powf((a_fLog + ALPHA) / ONE_PLUS_ALPHA, 2.4f);
+        }
+    }
+
+    inline float LinearToLog(float a_fLinear)
+    {
+        static const float ALPHA = 0.055f;
+        static const float ONE_PLUS_ALPHA = 1.0f + ALPHA;
+
+        if (a_fLinear <= 0.0031308f)
+        {
+            return 12.92f * a_fLinear;
+        }
+        else
+        {
+            return ONE_PLUS_ALPHA * powf(a_fLinear, (1.0f/2.4f)) - ALPHA;
+        }
+    }
+
+    class ColorR8G8B8A8
+    {
+    public:
+
+        uint8_t ucR;
+        uint8_t ucG;
+        uint8_t ucB;
+        uint8_t ucA;
+
+    };
+
+	class ColorFloatRGBA
+    {
+    public:
+
+		ColorFloatRGBA(void)
+        {
+            fR = fG = fB = fA = 0.0f;
+        }
+
+		ColorFloatRGBA(float a_fR, float a_fG, float a_fB, float a_fA)
+        {
+            fR = a_fR;
+            fG = a_fG;
+            fB = a_fB;
+            fA = a_fA;
+        }
+
+		inline ColorFloatRGBA operator+(const ColorFloatRGBA& a_rfrgba) const
+		{
+			ColorFloatRGBA frgba;
+			frgba.fR = fR + a_rfrgba.fR;
+			frgba.fG = fG + a_rfrgba.fG;
+			frgba.fB = fB + a_rfrgba.fB;
+			frgba.fA = fA + a_rfrgba.fA;
+			return frgba;
+		}
+
+        inline ColorFloatRGBA operator-(const ColorFloatRGBA& a_rfrgba) const
+        {
+            ColorFloatRGBA frgba;
+            frgba.fR = fR - a_rfrgba.fR;
+            frgba.fG = fG - a_rfrgba.fG;
+            frgba.fB = fB - a_rfrgba.fB;
+            frgba.fA = fA - a_rfrgba.fA;
+            return frgba;
+        }
+
+        // scalar ops don't apply to alpha
+		inline ColorFloatRGBA operator+(float a_f) const
+		{
+			ColorFloatRGBA frgba;
+			frgba.fR = fR + a_f;
+			frgba.fG = fG + a_f;
+			frgba.fB = fB + a_f;
+			frgba.fA = fA;
+			return frgba;
+		}
+
+        // scalar ops don't apply to alpha
+        inline ColorFloatRGBA operator-(float a_f) const
+		{
+            return *this + (-a_f);
+		}
+
+		
+        // scalar ops don't apply to alpha
+		inline ColorFloatRGBA operator*(float a_f) const
+		{
+            return ScaleRGB(a_f);
+		}
+
+		inline ColorFloatRGBA ScaleRGB(float a_f) const
+		{
+			ColorFloatRGBA frgba;
+			frgba.fR = fR * a_f;
+            frgba.fG = fG * a_f;
+            frgba.fB = fB * a_f;
+			frgba.fA = fA;
+
+			return frgba;
+		}
+
+		inline ColorFloatRGBA RoundRGB(void) const
+		{
+			ColorFloatRGBA frgba;
+			frgba.fR = roundf(fR);
+			frgba.fG = roundf(fG);
+			frgba.fB = roundf(fB);
+            frgba.fA = fA; // was missing in original
+            
+			return frgba;
+		}
+
+		inline ColorFloatRGBA ToLinear() const
+		{
+			ColorFloatRGBA frgbaLinear;
+			frgbaLinear.fR = LogToLinear(fR);
+			frgbaLinear.fG = LogToLinear(fG);
+			frgbaLinear.fB = LogToLinear(fB);
+			frgbaLinear.fA = fA;
+
+			return frgbaLinear;
+		}
+
+		inline ColorFloatRGBA ToLog(void) const
+		{
+			ColorFloatRGBA frgbaLog;
+			frgbaLog.fR = LinearToLog(fR);
+			frgbaLog.fG = LinearToLog(fG);
+			frgbaLog.fB = LinearToLog(fB);
+			frgbaLog.fA = fA;
+
+			return frgbaLog;
+		}
+
+		inline static ColorFloatRGBA ConvertFromRGBA8(uint8_t a_ucR,
+			uint8_t a_ucG, uint8_t a_ucB, uint8_t a_ucA)
+		{
+			ColorFloatRGBA frgba;
+
+			frgba.fR = (float)a_ucR / 255.0f;
+			frgba.fG = (float)a_ucG / 255.0f;
+			frgba.fB = (float)a_ucB / 255.0f;
+			frgba.fA = (float)a_ucA / 255.0f;
+
+			return frgba;
+		}
+
+        inline static ColorFloatRGBA ConvertFromRGBA8(const ColorR8G8B8A8& color)
+        {
+            return ConvertFromRGBA8(color.ucR, color.ucG, color.ucB, color.ucA);
+        }
+        
+		inline static ColorFloatRGBA ConvertFromRGB4(uint8_t a_ucR4,
+														uint8_t a_ucG4,
+														uint8_t a_ucB4, uint8_t a_ucA = 255)
+		{
+			uint8_t ucR8 = (uint8_t)((a_ucR4 << 4) + a_ucR4);
+			uint8_t ucG8 = (uint8_t)((a_ucG4 << 4) + a_ucG4);
+			uint8_t ucB8 = (uint8_t)((a_ucB4 << 4) + a_ucB4);
+
+            return ConvertFromRGBA8(ucR8, ucG8, ucB8, a_ucA);
+		}
+
+		inline static ColorFloatRGBA ConvertFromRGB5(uint8_t a_ucR5,
+			uint8_t a_ucG5,
+			uint8_t a_ucB5, uint8_t a_ucA = 255)
+		{
+			uint8_t ucR8 = (uint8_t)((a_ucR5 << 3) + (a_ucR5 >> 2));
+			uint8_t ucG8 = (uint8_t)((a_ucG5 << 3) + (a_ucG5 >> 2));
+			uint8_t ucB8 = (uint8_t)((a_ucB5 << 3) + (a_ucB5 >> 2));
+
+            return ConvertFromRGBA8(ucR8, ucG8, ucB8, a_ucA);
+		}
+
+		inline static ColorFloatRGBA ConvertFromR6G7B6(uint8_t a_ucR6,
+			uint8_t a_ucG7,
+			uint8_t a_ucB6, uint8_t a_ucA = 255)
+		{
+			uint8_t ucR8 = (uint8_t)((a_ucR6 << 2) + (a_ucR6 >> 4));
+			uint8_t ucG8 = (uint8_t)((a_ucG7 << 1) + (a_ucG7 >> 6));
+			uint8_t ucB8 = (uint8_t)((a_ucB6 << 2) + (a_ucB6 >> 4));
+
+            return ConvertFromRGBA8(ucR8, ucG8, ucB8, a_ucA);
+		}
+
+		// quantize to 4 bits, expand to 8 bits
+		inline ColorFloatRGBA QuantizeR4G4B4(void) const
+		{
+			ColorFloatRGBA frgba = ClampRGB();
+
+			// quantize to 4 bits
+			frgba = frgba.ScaleRGB(15.0f).RoundRGB();
+			uint32_t uiR4 = (uint32_t)frgba.fR;
+            uint32_t uiG4 = (uint32_t)frgba.fG;
+            uint32_t uiB4 = (uint32_t)frgba.fB;
+
+            frgba = ConvertFromRGB4(uiR4, uiG4, uiB4);
+            frgba.fA = fA;
+
+			return frgba;
+		}
+
+		// quantize to 5 bits, expand to 8 bits
+		inline ColorFloatRGBA QuantizeR5G5B5(void) const
+		{
+			ColorFloatRGBA frgba = ClampRGBA();
+
+			// quantize to 5 bits
+			frgba = frgba.ScaleRGB(31.0f).RoundRGB();
+            uint32_t uiR5 = (uint32_t)frgba.fR;
+            uint32_t uiG5 = (uint32_t)frgba.fG;
+            uint32_t uiB5 = (uint32_t)frgba.fB;
+
+            frgba = ConvertFromRGB5(uiR5, uiG5, uiB5);
+            frgba.fA = fA;
+			return frgba;
+		}
+
+		// quantize to 6/7/6 bits, expand to 8 bits
+		inline ColorFloatRGBA QuantizeR6G7B6(void) const
+		{
+			ColorFloatRGBA frgba = ClampRGBA();
+
+			// quantize to 6/7/6 bits
+			uint32_t uiR6 = (uint32_t)frgba.IntRed(63.0f);
+            uint32_t uiG7 = (uint32_t)frgba.IntGreen(127.0f);
+            uint32_t uiB6 = (uint32_t)frgba.IntBlue(63.0f);
+
+            frgba = ConvertFromR6G7B6(uiR6, uiG7, uiB6);
+            frgba.fA = fA;
+            
+			return frgba;
+		}
+
+		inline ColorFloatRGBA ClampRGB(void) const
+		{
+            return ClampRGBA();
+		}
+
+		inline ColorFloatRGBA ClampRGBA(void) const
+		{
+			ColorFloatRGBA frgba = *this;
+			if (frgba.fR < 0.0f) { frgba.fR = 0.0f; }
+			if (frgba.fR > 1.0f) { frgba.fR = 1.0f; }
+			if (frgba.fG < 0.0f) { frgba.fG = 0.0f; }
+			if (frgba.fG > 1.0f) { frgba.fG = 1.0f; }
+			if (frgba.fB < 0.0f) { frgba.fB = 0.0f; }
+			if (frgba.fB > 1.0f) { frgba.fB = 1.0f; }
+			if (frgba.fA < 0.0f) { frgba.fA = 0.0f; }
+			if (frgba.fA > 1.0f) { frgba.fA = 1.0f; }
+
+			return frgba;
+		}
+
+		inline int IntRed(float a_fScale) const
+		{
+			return (int)roundf(fR * a_fScale);
+		}
+
+		inline int IntGreen(float a_fScale) const
+		{
+			return (int)roundf(fG * a_fScale);
+		}
+
+		inline int IntBlue(float a_fScale) const
+		{
+			return (int)roundf(fB * a_fScale);
+		}
+
+		inline int IntAlpha(float a_fScale) const
+		{
+			return (int)roundf(fA * a_fScale);
+		}
+
+		float	fR, fG, fB, fA;
+    };
+
+}
+
diff --git a/libkram/etc2comp/EtcConfig.h b/libkram/etc2comp/EtcConfig.h
index f706da8a..7c9ddac7 100644
--- a/libkram/etc2comp/EtcConfig.h
+++ b/libkram/etc2comp/EtcConfig.h
@@ -1,19 +1,19 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <stdint.h>
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <stdint.h>
diff --git a/libkram/etc2comp/EtcDifferentialTrys.cpp b/libkram/etc2comp/EtcDifferentialTrys.cpp
index aa1945b0..b6ffc429 100644
--- a/libkram/etc2comp/EtcDifferentialTrys.cpp
+++ b/libkram/etc2comp/EtcDifferentialTrys.cpp
@@ -1,175 +1,175 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
-EtcDifferentialTrys.cpp
-
-Gathers the results of the various encoding trys for both halves of a 4x4 block for Differential mode
-
-*/
-
-#include "EtcConfig.h"
-#include "EtcDifferentialTrys.h"
-
-#include <assert.h>
-
-namespace Etc
-{
-
-	// ----------------------------------------------------------------------------------------------------
-	// construct a list of trys (encoding attempts)
-	//
-	// a_frgbaColor1 is the basecolor for the first half
-	// a_frgbaColor2 is the basecolor for the second half
-	// a_pauiPixelMapping1 is the pixel order for the first half
-	// a_pauiPixelMapping2 is the pixel order for the second half
-	// a_uiRadius is the amount to vary the base colors
-	//
-	DifferentialTrys::DifferentialTrys(ColorFloatRGBA a_frgbaColor1, ColorFloatRGBA a_frgbaColor2,
-										const unsigned int *a_pauiPixelMapping1,
-										const unsigned int *a_pauiPixelMapping2,
-										unsigned int a_uiRadius,
-										int a_iGrayOffset1, int a_iGrayOffset2)
-	{
-		assert(a_uiRadius <= MAX_RADIUS);
-
-		m_boolSeverelyBentColors = false;
-
-		ColorFloatRGBA frgbaQuantizedColor1 = a_frgbaColor1.QuantizeR5G5B5();
-		ColorFloatRGBA frgbaQuantizedColor2 = a_frgbaColor2.QuantizeR5G5B5();
-
-		// quantize base colors
-		// ensure that trys with a_uiRadius don't overflow
-		int iRed1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntRed(31.0f)+a_iGrayOffset1, a_uiRadius);
-		int iGreen1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntGreen(31.0f) + a_iGrayOffset1, a_uiRadius);
-		int iBlue1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntBlue(31.0f) + a_iGrayOffset1, a_uiRadius);
-		
-        int iRed2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntRed(31.0f) + a_iGrayOffset2, a_uiRadius);
-		int iGreen2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntGreen(31.0f) + a_iGrayOffset2, a_uiRadius);
-		int iBlue2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntBlue(31.0f) + a_iGrayOffset2, a_uiRadius);
-
-		int iDeltaRed = iRed2 - iRed1;
-		int iDeltaGreen = iGreen2 - iGreen1;
-		int iDeltaBlue = iBlue2 - iBlue1;
-
-		// make sure components are within range
-		{
-			if (iDeltaRed > 3)
-			{
-				if (iDeltaRed > 7)
-				{
-					m_boolSeverelyBentColors = true;
-				}
-
-				iRed1 += (iDeltaRed - 3) / 2;
-				iRed2 = iRed1 + 3;
-				iDeltaRed = 3;
-			}
-			else if (iDeltaRed < -4)
-			{
-				if (iDeltaRed < -8)
-				{
-					m_boolSeverelyBentColors = true;
-				}
-
-				iRed1 += (iDeltaRed + 4) / 2;
-				iRed2 = iRed1 - 4;
-				iDeltaRed = -4;
-			}
-			assert(iRed1 >= (signed)(0 + a_uiRadius) && iRed1 <= (signed)(31 - a_uiRadius));
-			assert(iRed2 >= (signed)(0 + a_uiRadius) && iRed2 <= (signed)(31 - a_uiRadius));
-			assert(iDeltaRed >= -4 && iDeltaRed <= 3);
-
-			if (iDeltaGreen > 3)
-			{
-				if (iDeltaGreen > 7)
-				{
-					m_boolSeverelyBentColors = true;
-				}
-
-				iGreen1 += (iDeltaGreen - 3) / 2;
-				iGreen2 = iGreen1 + 3;
-				iDeltaGreen = 3;
-			}
-			else if (iDeltaGreen < -4)
-			{
-				if (iDeltaGreen < -8)
-				{
-					m_boolSeverelyBentColors = true;
-				}
-
-				iGreen1 += (iDeltaGreen + 4) / 2;
-				iGreen2 = iGreen1 - 4;
-				iDeltaGreen = -4;
-			}
-			assert(iGreen1 >= (signed)(0 + a_uiRadius) && iGreen1 <= (signed)(31 - a_uiRadius));
-			assert(iGreen2 >= (signed)(0 + a_uiRadius) && iGreen2 <= (signed)(31 - a_uiRadius));
-			assert(iDeltaGreen >= -4 && iDeltaGreen <= 3);
-
-			if (iDeltaBlue > 3)
-			{
-				if (iDeltaBlue > 7)
-				{
-					m_boolSeverelyBentColors = true;
-				}
-
-				iBlue1 += (iDeltaBlue - 3) / 2;
-				iBlue2 = iBlue1 + 3;
-				iDeltaBlue = 3;
-			}
-			else if (iDeltaBlue < -4)
-			{
-				if (iDeltaBlue < -8)
-				{
-					m_boolSeverelyBentColors = true;
-				}
-
-				iBlue1 += (iDeltaBlue + 4) / 2;
-				iBlue2 = iBlue1 - 4;
-				iDeltaBlue = -4;
-			}
-			assert(iBlue1 >= (signed)(0+a_uiRadius) && iBlue1 <= (signed)(31 - a_uiRadius));
-			assert(iBlue2 >= (signed)(0 + a_uiRadius) && iBlue2 <= (signed)(31 - a_uiRadius));
-			assert(iDeltaBlue >= -4 && iDeltaBlue <= 3);
-		}
-
-		m_half1.Init(iRed1, iGreen1, iBlue1, a_pauiPixelMapping1, a_uiRadius);
-		m_half2.Init(iRed2, iGreen2, iBlue2, a_pauiPixelMapping2, a_uiRadius);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-	void DifferentialTrys::Half::Init(int a_iRed, int a_iGreen, int a_iBlue, 
-										const unsigned int *a_pauiPixelMapping, unsigned int a_uiRadius)
-	{
-
-		m_iRed = a_iRed;
-		m_iGreen = a_iGreen;
-		m_iBlue = a_iBlue;
-
-		m_pauiPixelMapping = a_pauiPixelMapping;
-		m_uiRadius = a_uiRadius;
-
-		m_uiTrys = 0;
-        m_ptryBest = nullptr;
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-
-} // namespace Etc
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+EtcDifferentialTrys.cpp
+
+Gathers the results of the various encoding trys for both halves of a 4x4 block for Differential mode
+
+*/
+
+#include "EtcConfig.h"
+#include "EtcDifferentialTrys.h"
+
+#include <assert.h>
+
+namespace Etc
+{
+
+	// ----------------------------------------------------------------------------------------------------
+	// construct a list of trys (encoding attempts)
+	//
+	// a_frgbaColor1 is the basecolor for the first half
+	// a_frgbaColor2 is the basecolor for the second half
+	// a_pauiPixelMapping1 is the pixel order for the first half
+	// a_pauiPixelMapping2 is the pixel order for the second half
+	// a_uiRadius is the amount to vary the base colors
+	//
+	DifferentialTrys::DifferentialTrys(ColorFloatRGBA a_frgbaColor1, ColorFloatRGBA a_frgbaColor2,
+										const unsigned int *a_pauiPixelMapping1,
+										const unsigned int *a_pauiPixelMapping2,
+										unsigned int a_uiRadius,
+										int a_iGrayOffset1, int a_iGrayOffset2)
+	{
+		assert(a_uiRadius <= MAX_RADIUS);
+
+		m_boolSeverelyBentColors = false;
+
+		ColorFloatRGBA frgbaQuantizedColor1 = a_frgbaColor1.QuantizeR5G5B5();
+		ColorFloatRGBA frgbaQuantizedColor2 = a_frgbaColor2.QuantizeR5G5B5();
+
+		// quantize base colors
+		// ensure that trys with a_uiRadius don't overflow
+		int iRed1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntRed(31.0f)+a_iGrayOffset1, a_uiRadius);
+		int iGreen1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntGreen(31.0f) + a_iGrayOffset1, a_uiRadius);
+		int iBlue1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntBlue(31.0f) + a_iGrayOffset1, a_uiRadius);
+		
+        int iRed2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntRed(31.0f) + a_iGrayOffset2, a_uiRadius);
+		int iGreen2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntGreen(31.0f) + a_iGrayOffset2, a_uiRadius);
+		int iBlue2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntBlue(31.0f) + a_iGrayOffset2, a_uiRadius);
+
+		int iDeltaRed = iRed2 - iRed1;
+		int iDeltaGreen = iGreen2 - iGreen1;
+		int iDeltaBlue = iBlue2 - iBlue1;
+
+		// make sure components are within range
+		{
+			if (iDeltaRed > 3)
+			{
+				if (iDeltaRed > 7)
+				{
+					m_boolSeverelyBentColors = true;
+				}
+
+				iRed1 += (iDeltaRed - 3) / 2;
+				iRed2 = iRed1 + 3;
+				iDeltaRed = 3;
+			}
+			else if (iDeltaRed < -4)
+			{
+				if (iDeltaRed < -8)
+				{
+					m_boolSeverelyBentColors = true;
+				}
+
+				iRed1 += (iDeltaRed + 4) / 2;
+				iRed2 = iRed1 - 4;
+				iDeltaRed = -4;
+			}
+			assert(iRed1 >= (signed)(0 + a_uiRadius) && iRed1 <= (signed)(31 - a_uiRadius));
+			assert(iRed2 >= (signed)(0 + a_uiRadius) && iRed2 <= (signed)(31 - a_uiRadius));
+			assert(iDeltaRed >= -4 && iDeltaRed <= 3);
+
+			if (iDeltaGreen > 3)
+			{
+				if (iDeltaGreen > 7)
+				{
+					m_boolSeverelyBentColors = true;
+				}
+
+				iGreen1 += (iDeltaGreen - 3) / 2;
+				iGreen2 = iGreen1 + 3;
+				iDeltaGreen = 3;
+			}
+			else if (iDeltaGreen < -4)
+			{
+				if (iDeltaGreen < -8)
+				{
+					m_boolSeverelyBentColors = true;
+				}
+
+				iGreen1 += (iDeltaGreen + 4) / 2;
+				iGreen2 = iGreen1 - 4;
+				iDeltaGreen = -4;
+			}
+			assert(iGreen1 >= (signed)(0 + a_uiRadius) && iGreen1 <= (signed)(31 - a_uiRadius));
+			assert(iGreen2 >= (signed)(0 + a_uiRadius) && iGreen2 <= (signed)(31 - a_uiRadius));
+			assert(iDeltaGreen >= -4 && iDeltaGreen <= 3);
+
+			if (iDeltaBlue > 3)
+			{
+				if (iDeltaBlue > 7)
+				{
+					m_boolSeverelyBentColors = true;
+				}
+
+				iBlue1 += (iDeltaBlue - 3) / 2;
+				iBlue2 = iBlue1 + 3;
+				iDeltaBlue = 3;
+			}
+			else if (iDeltaBlue < -4)
+			{
+				if (iDeltaBlue < -8)
+				{
+					m_boolSeverelyBentColors = true;
+				}
+
+				iBlue1 += (iDeltaBlue + 4) / 2;
+				iBlue2 = iBlue1 - 4;
+				iDeltaBlue = -4;
+			}
+			assert(iBlue1 >= (signed)(0+a_uiRadius) && iBlue1 <= (signed)(31 - a_uiRadius));
+			assert(iBlue2 >= (signed)(0 + a_uiRadius) && iBlue2 <= (signed)(31 - a_uiRadius));
+			assert(iDeltaBlue >= -4 && iDeltaBlue <= 3);
+		}
+
+		m_half1.Init(iRed1, iGreen1, iBlue1, a_pauiPixelMapping1, a_uiRadius);
+		m_half2.Init(iRed2, iGreen2, iBlue2, a_pauiPixelMapping2, a_uiRadius);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	void DifferentialTrys::Half::Init(int a_iRed, int a_iGreen, int a_iBlue, 
+										const unsigned int *a_pauiPixelMapping, unsigned int a_uiRadius)
+	{
+
+		m_iRed = a_iRed;
+		m_iGreen = a_iGreen;
+		m_iBlue = a_iBlue;
+
+		m_pauiPixelMapping = a_pauiPixelMapping;
+		m_uiRadius = a_uiRadius;
+
+		m_uiTrys = 0;
+        m_ptryBest = nullptr;
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+
+} // namespace Etc
diff --git a/libkram/etc2comp/EtcDifferentialTrys.h b/libkram/etc2comp/EtcDifferentialTrys.h
index 6b1cd9c9..71860908 100644
--- a/libkram/etc2comp/EtcDifferentialTrys.h
+++ b/libkram/etc2comp/EtcDifferentialTrys.h
@@ -1,97 +1,97 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "EtcColorFloatRGBA.h"
-
-namespace Etc
-{
-
-	class DifferentialTrys
-	{
-	public:
-
-		static const unsigned int MAX_RADIUS = 2;
-
-		DifferentialTrys(ColorFloatRGBA a_frgbaColor1,
-							ColorFloatRGBA a_frgbaColor2,
-							const unsigned int *a_pauiPixelMapping1,
-							const unsigned int *a_pauiPixelMapping2,
-							unsigned int a_uiRadius,
-							int a_iGrayOffset1, int a_iGrayOffset2);
-
-		inline static int MoveAwayFromEdge(int a_i, int a_iDistance)
-		{
-			if (a_i < (0+ a_iDistance))
-			{
-				return (0 + a_iDistance);
-			}
-			else if (a_i > (31- a_iDistance))
-			{
-				return (31 - a_iDistance);
-			}
-
-			return a_i;
-		}
-
-		class Try
-		{
-        public :
-			static const unsigned int SELECTORS = 8;	// per half
-
-			int m_iRed;
-			int m_iGreen;
-			int m_iBlue;
-			unsigned int m_uiCW;
-			unsigned int m_auiSelectors[SELECTORS];
-			float m_fError;
-        };
-
-		class Half
-		{
-		public:
-
-			static const unsigned int MAX_TRYS = 125;
-
-			void Init(int a_iRed, int a_iGreen, int a_iBlue, 
-						const unsigned int *a_pauiPixelMapping,
-						unsigned int a_uiRadius);
-
-			// center of trys
-			int m_iRed;
-			int m_iGreen;
-			int m_iBlue;
-
-			const unsigned int *m_pauiPixelMapping;
-			unsigned int m_uiRadius;
-
-			unsigned int m_uiTrys;
-			Try m_atry[MAX_TRYS];
-
-			Try *m_ptryBest;
-		};
-
-		Half m_half1;
-		Half m_half2;
-
-		bool m_boolSeverelyBentColors;
-	};
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-
-} // namespace Etc
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "EtcColorFloatRGBA.h"
+
+namespace Etc
+{
+
+	class DifferentialTrys
+	{
+	public:
+
+		static const unsigned int MAX_RADIUS = 2;
+
+		DifferentialTrys(ColorFloatRGBA a_frgbaColor1,
+							ColorFloatRGBA a_frgbaColor2,
+							const unsigned int *a_pauiPixelMapping1,
+							const unsigned int *a_pauiPixelMapping2,
+							unsigned int a_uiRadius,
+							int a_iGrayOffset1, int a_iGrayOffset2);
+
+		inline static int MoveAwayFromEdge(int a_i, int a_iDistance)
+		{
+			if (a_i < (0+ a_iDistance))
+			{
+				return (0 + a_iDistance);
+			}
+			else if (a_i > (31- a_iDistance))
+			{
+				return (31 - a_iDistance);
+			}
+
+			return a_i;
+		}
+
+		class Try
+		{
+        public :
+			static const unsigned int SELECTORS = 8;	// per half
+
+			int m_iRed;
+			int m_iGreen;
+			int m_iBlue;
+			unsigned int m_uiCW;
+			unsigned int m_auiSelectors[SELECTORS];
+			float m_fError;
+        };
+
+		class Half
+		{
+		public:
+
+			static const unsigned int MAX_TRYS = 125;
+
+			void Init(int a_iRed, int a_iGreen, int a_iBlue, 
+						const unsigned int *a_pauiPixelMapping,
+						unsigned int a_uiRadius);
+
+			// center of trys
+			int m_iRed;
+			int m_iGreen;
+			int m_iBlue;
+
+			const unsigned int *m_pauiPixelMapping;
+			unsigned int m_uiRadius;
+
+			unsigned int m_uiTrys;
+			Try m_atry[MAX_TRYS];
+
+			Try *m_ptryBest;
+		};
+
+		Half m_half1;
+		Half m_half2;
+
+		bool m_boolSeverelyBentColors;
+	};
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+
+} // namespace Etc
diff --git a/libkram/etc2comp/EtcErrorMetric.h b/libkram/etc2comp/EtcErrorMetric.h
index 993fab88..54a2f10e 100644
--- a/libkram/etc2comp/EtcErrorMetric.h
+++ b/libkram/etc2comp/EtcErrorMetric.h
@@ -1,66 +1,66 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-namespace Etc
-{
-
-	enum ErrorMetric
-	{
-		//RGBA,      // Premul weighted RGB
-		//RGBX,
-		
-        GRAY,
-        REC709,    // Luma weighted(RGB) + A*A
-        
-		NUMERIC,   // X*X + Y*Y + Z*Z + W*W
-//        NUMERICX,  // X*X
-//        NUMERICXY, // X*X + Y*Y
-//
-//		NORMALXYZ,
-		//
-		//ERROR_METRICS,
-		//
-		//BT709 = REC709
-	};
-
-	inline const char *ErrorMetricToString(ErrorMetric errorMetric)
-	{
-		switch (errorMetric)
-		{
-//		case RGBA:
-//			return "RGBA";
-//		case RGBX:
-//			return "RGBX";
-        case GRAY:
-            return "GRAY";
-		case REC709:
-			return "REC709";
-		case NUMERIC:
-			return "NUMERIC";
-//        case NUMERICX:
-//            return "NUMERICX";
-//        case NUMERICXY:
-//            return "NUMERICXY";
-//		case NORMALXYZ:
-//			return "NORMALXYZ";
-		//case ERROR_METRICS:
-		default:
-			return "UNKNOWN";
-		}
-	}
-} // namespace Etc
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+namespace Etc
+{
+
+	enum ErrorMetric
+	{
+		//RGBA,      // Premul weighted RGB
+		//RGBX,
+		
+        GRAY,
+        REC709,    // Luma weighted(RGB) + A*A
+        
+		NUMERIC,   // X*X + Y*Y + Z*Z + W*W
+//        NUMERICX,  // X*X
+//        NUMERICXY, // X*X + Y*Y
+//
+//		NORMALXYZ,
+		//
+		//ERROR_METRICS,
+		//
+		//BT709 = REC709
+	};
+
+	inline const char *ErrorMetricToString(ErrorMetric errorMetric)
+	{
+		switch (errorMetric)
+		{
+//		case RGBA:
+//			return "RGBA";
+//		case RGBX:
+//			return "RGBX";
+        case GRAY:
+            return "GRAY";
+		case REC709:
+			return "REC709";
+		case NUMERIC:
+			return "NUMERIC";
+//        case NUMERICX:
+//            return "NUMERICX";
+//        case NUMERICXY:
+//            return "NUMERICXY";
+//		case NORMALXYZ:
+//			return "NORMALXYZ";
+		//case ERROR_METRICS:
+		default:
+			return "UNKNOWN";
+		}
+	}
+} // namespace Etc
diff --git a/libkram/etc2comp/EtcIndividualTrys.cpp b/libkram/etc2comp/EtcIndividualTrys.cpp
index 20b463a1..77db49a9 100644
--- a/libkram/etc2comp/EtcIndividualTrys.cpp
+++ b/libkram/etc2comp/EtcIndividualTrys.cpp
@@ -1,89 +1,89 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
-EtcIndividualTrys.cpp
-
-Gathers the results of the various encoding trys for both halves of a 4x4 block for Individual mode
-
-*/
-
-#include "EtcConfig.h"
-#include "EtcIndividualTrys.h"
-
-#include <assert.h>
-
-namespace Etc
-{
-
-	// ----------------------------------------------------------------------------------------------------
-	// construct a list of trys (encoding attempts)
-	//
-	// a_frgbaColor1 is the basecolor for the first half
-	// a_frgbaColor2 is the basecolor for the second half
-	// a_pauiPixelMapping1 is the pixel order for the first half
-	// a_pauiPixelMapping2 is the pixel order for the second half
-	// a_uiRadius is the amount to vary the base colors
-	//
-	IndividualTrys::IndividualTrys(ColorFloatRGBA a_frgbaColor1, ColorFloatRGBA a_frgbaColor2,
-									const unsigned int *a_pauiPixelMapping1,
-									const unsigned int *a_pauiPixelMapping2,
-									unsigned int a_uiRadius)
-	{
-		assert(a_uiRadius <= MAX_RADIUS);
-
-		ColorFloatRGBA frgbaQuantizedColor1 = a_frgbaColor1.QuantizeR4G4B4();
-		ColorFloatRGBA frgbaQuantizedColor2 = a_frgbaColor2.QuantizeR4G4B4();
-
-		// quantize base colors
-		// ensure that trys with a_uiRadius don't overflow
-		int iRed1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntRed(15.0f), a_uiRadius);
-		int iGreen1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntGreen(15.0f), a_uiRadius);
-		int iBlue1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntBlue(15.0f), a_uiRadius);
-        
-		int iRed2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntRed(15.0f), a_uiRadius);
-		int iGreen2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntGreen(15.0f), a_uiRadius);
-		int iBlue2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntBlue(15.0f), a_uiRadius);
-
-		m_half1.Init(iRed1, iGreen1, iBlue1, a_pauiPixelMapping1, a_uiRadius);
-		m_half2.Init(iRed2, iGreen2, iBlue2, a_pauiPixelMapping2, a_uiRadius);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-	void IndividualTrys::Half::Init(int a_iRed, int a_iGreen, int a_iBlue,
-									const unsigned int *a_pauiPixelMapping, unsigned int a_uiRadius)
-	{
-
-		m_iRed = a_iRed;
-		m_iGreen = a_iGreen;
-		m_iBlue = a_iBlue;
-
-		m_pauiPixelMapping = a_pauiPixelMapping;
-		m_uiRadius = a_uiRadius;
-
-		m_uiTrys = 0;
-        m_ptryBest = nullptr;
-
-        // not initialized
-        // m_atry
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-
-} // namespace Etc
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+EtcIndividualTrys.cpp
+
+Gathers the results of the various encoding trys for both halves of a 4x4 block for Individual mode
+
+*/
+
+#include "EtcConfig.h"
+#include "EtcIndividualTrys.h"
+
+#include <assert.h>
+
+namespace Etc
+{
+
+	// ----------------------------------------------------------------------------------------------------
+	// construct a list of trys (encoding attempts)
+	//
+	// a_frgbaColor1 is the basecolor for the first half
+	// a_frgbaColor2 is the basecolor for the second half
+	// a_pauiPixelMapping1 is the pixel order for the first half
+	// a_pauiPixelMapping2 is the pixel order for the second half
+	// a_uiRadius is the amount to vary the base colors
+	//
+	IndividualTrys::IndividualTrys(ColorFloatRGBA a_frgbaColor1, ColorFloatRGBA a_frgbaColor2,
+									const unsigned int *a_pauiPixelMapping1,
+									const unsigned int *a_pauiPixelMapping2,
+									unsigned int a_uiRadius)
+	{
+		assert(a_uiRadius <= MAX_RADIUS);
+
+		ColorFloatRGBA frgbaQuantizedColor1 = a_frgbaColor1.QuantizeR4G4B4();
+		ColorFloatRGBA frgbaQuantizedColor2 = a_frgbaColor2.QuantizeR4G4B4();
+
+		// quantize base colors
+		// ensure that trys with a_uiRadius don't overflow
+		int iRed1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntRed(15.0f), a_uiRadius);
+		int iGreen1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntGreen(15.0f), a_uiRadius);
+		int iBlue1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntBlue(15.0f), a_uiRadius);
+        
+		int iRed2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntRed(15.0f), a_uiRadius);
+		int iGreen2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntGreen(15.0f), a_uiRadius);
+		int iBlue2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntBlue(15.0f), a_uiRadius);
+
+		m_half1.Init(iRed1, iGreen1, iBlue1, a_pauiPixelMapping1, a_uiRadius);
+		m_half2.Init(iRed2, iGreen2, iBlue2, a_pauiPixelMapping2, a_uiRadius);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	void IndividualTrys::Half::Init(int a_iRed, int a_iGreen, int a_iBlue,
+									const unsigned int *a_pauiPixelMapping, unsigned int a_uiRadius)
+	{
+
+		m_iRed = a_iRed;
+		m_iGreen = a_iGreen;
+		m_iBlue = a_iBlue;
+
+		m_pauiPixelMapping = a_pauiPixelMapping;
+		m_uiRadius = a_uiRadius;
+
+		m_uiTrys = 0;
+        m_ptryBest = nullptr;
+
+        // not initialized
+        // m_atry
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+
+} // namespace Etc
diff --git a/libkram/etc2comp/EtcIndividualTrys.h b/libkram/etc2comp/EtcIndividualTrys.h
index 49170d43..5fb12fbc 100644
--- a/libkram/etc2comp/EtcIndividualTrys.h
+++ b/libkram/etc2comp/EtcIndividualTrys.h
@@ -1,95 +1,95 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "EtcColorFloatRGBA.h"
-
-namespace Etc
-{
-
-	class IndividualTrys
-	{
-	public:
-
-		static const unsigned int MAX_RADIUS = 1;
-
-		IndividualTrys(ColorFloatRGBA a_frgbaColor1,
-						ColorFloatRGBA a_frgbaColor2,
-						const unsigned int *a_pauiPixelMapping1,
-						const unsigned int *a_pauiPixelMapping2,
-						unsigned int a_uiRadius);
-
-		inline static int MoveAwayFromEdge(int a_i, int a_iDistance)
-		{
-			if (a_i < (0+ a_iDistance))
-			{
-				return (0 + a_iDistance);
-			}
-			else if (a_i > (15- a_iDistance))
-			{
-				return (15 - a_iDistance);
-			}
-
-			return a_i;
-		}
-
-		class Try
-		{
-        public :
-			static const unsigned int SELECTORS = 8;	// per half
-
-			int m_iRed;
-			int m_iGreen;
-			int m_iBlue;
-			unsigned int m_uiCW;
-			unsigned int m_auiSelectors[SELECTORS];
-			float m_fError;
-        };
-
-		class Half
-		{
-		public:
-
-			static const unsigned int MAX_TRYS = 27;
-
-			void Init(int a_iRed, int a_iGreen, int a_iBlue, 
-						const unsigned int *a_pauiPixelMapping,
-						unsigned int a_uiRadius);
-
-			// center of trys
-			int m_iRed;
-			int m_iGreen;
-			int m_iBlue;
-
-			const unsigned int *m_pauiPixelMapping;
-			unsigned int m_uiRadius;
-
-			unsigned int m_uiTrys;
-			Try m_atry[MAX_TRYS];
-
-			Try *m_ptryBest;
-		};
-
-		Half m_half1;
-		Half m_half2;
-
-	};
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-
-} // namespace Etc
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "EtcColorFloatRGBA.h"
+
+namespace Etc
+{
+
+	class IndividualTrys
+	{
+	public:
+
+		static const unsigned int MAX_RADIUS = 1;
+
+		IndividualTrys(ColorFloatRGBA a_frgbaColor1,
+						ColorFloatRGBA a_frgbaColor2,
+						const unsigned int *a_pauiPixelMapping1,
+						const unsigned int *a_pauiPixelMapping2,
+						unsigned int a_uiRadius);
+
+		inline static int MoveAwayFromEdge(int a_i, int a_iDistance)
+		{
+			if (a_i < (0+ a_iDistance))
+			{
+				return (0 + a_iDistance);
+			}
+			else if (a_i > (15- a_iDistance))
+			{
+				return (15 - a_iDistance);
+			}
+
+			return a_i;
+		}
+
+		class Try
+		{
+        public :
+			static const unsigned int SELECTORS = 8;	// per half
+
+			int m_iRed;
+			int m_iGreen;
+			int m_iBlue;
+			unsigned int m_uiCW;
+			unsigned int m_auiSelectors[SELECTORS];
+			float m_fError;
+        };
+
+		class Half
+		{
+		public:
+
+			static const unsigned int MAX_TRYS = 27;
+
+			void Init(int a_iRed, int a_iGreen, int a_iBlue, 
+						const unsigned int *a_pauiPixelMapping,
+						unsigned int a_uiRadius);
+
+			// center of trys
+			int m_iRed;
+			int m_iGreen;
+			int m_iBlue;
+
+			const unsigned int *m_pauiPixelMapping;
+			unsigned int m_uiRadius;
+
+			unsigned int m_uiTrys;
+			Try m_atry[MAX_TRYS];
+
+			Try *m_ptryBest;
+		};
+
+		Half m_half1;
+		Half m_half2;
+
+	};
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+
+} // namespace Etc
diff --git a/libkram/etc2comp/EtcMath.cpp b/libkram/etc2comp/EtcMath.cpp
index cd70a9ab..096d5f7a 100644
--- a/libkram/etc2comp/EtcMath.cpp
+++ b/libkram/etc2comp/EtcMath.cpp
@@ -1,64 +1,64 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "EtcConfig.h"
-#include "EtcMath.h"
-
-namespace Etc
-{
-
-	// ----------------------------------------------------------------------------------------------------
-	// calculate the line that best fits the set of XY points contained in a_afX[] and a_afY[]
-	// use a_fSlope and a_fOffset to define that line
-	//
-	bool Regression(float a_afX[], float a_afY[], unsigned int a_Points,
-					float *a_fSlope, float *a_fOffset)
-	{
-		float fPoints = (float)a_Points;
-
-		float fSumX = 0.0f;
-		float fSumY = 0.0f;
-		float fSumXY = 0.0f;
-		float fSumX2 = 0.0f;
-
-		for (unsigned int uiPoint = 0; uiPoint < a_Points; uiPoint++)
-		{
-			fSumX += a_afX[uiPoint];
-			fSumY += a_afY[uiPoint];
-			fSumXY += a_afX[uiPoint] * a_afY[uiPoint];
-			fSumX2 += a_afX[uiPoint] * a_afX[uiPoint];
-		}
-
-		float fDivisor = fPoints*fSumX2 - fSumX*fSumX;
-
-		// if vertical line
-		if (fDivisor == 0.0f)
-		{
-			*a_fSlope = 0.0f;
-			*a_fOffset = 0.0f;
-			return true;
-		}
-
-		*a_fSlope = (fPoints*fSumXY - fSumX*fSumY) / fDivisor;
-		*a_fOffset = (fSumY - (*a_fSlope)*fSumX) / fPoints;
-
-		return false;
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-
-} // namespace Etc
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "EtcConfig.h"
+#include "EtcMath.h"
+
+namespace Etc
+{
+
+	// ----------------------------------------------------------------------------------------------------
+	// calculate the line that best fits the set of XY points contained in a_afX[] and a_afY[]
+	// use a_fSlope and a_fOffset to define that line
+	//
+	bool Regression(float a_afX[], float a_afY[], unsigned int a_Points,
+					float *a_fSlope, float *a_fOffset)
+	{
+		float fPoints = (float)a_Points;
+
+		float fSumX = 0.0f;
+		float fSumY = 0.0f;
+		float fSumXY = 0.0f;
+		float fSumX2 = 0.0f;
+
+		for (unsigned int uiPoint = 0; uiPoint < a_Points; uiPoint++)
+		{
+			fSumX += a_afX[uiPoint];
+			fSumY += a_afY[uiPoint];
+			fSumXY += a_afX[uiPoint] * a_afY[uiPoint];
+			fSumX2 += a_afX[uiPoint] * a_afX[uiPoint];
+		}
+
+		float fDivisor = fPoints*fSumX2 - fSumX*fSumX;
+
+		// if vertical line
+		if (fDivisor == 0.0f)
+		{
+			*a_fSlope = 0.0f;
+			*a_fOffset = 0.0f;
+			return true;
+		}
+
+		*a_fSlope = (fPoints*fSumXY - fSumX*fSumY) / fDivisor;
+		*a_fOffset = (fSumY - (*a_fSlope)*fSumX) / fPoints;
+
+		return false;
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+
+} // namespace Etc
diff --git a/libkram/etc2comp/EtcMath.h b/libkram/etc2comp/EtcMath.h
index 3d951fee..c58c9a91 100644
--- a/libkram/etc2comp/EtcMath.h
+++ b/libkram/etc2comp/EtcMath.h
@@ -1,40 +1,40 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <math.h>
-
-namespace Etc
-{
-
-	// ----------------------------------------------------------------------------------------------------
-	// return true if vertical line
-	bool Regression(float a_afX[], float a_afY[], unsigned int a_Points,
-					float *a_fSlope, float *a_fOffset);
-
-	inline float ConvertMSEToPSNR(float a_fMSE)
-	{
-		if (a_fMSE == 0.0f)
-		{
-			return INFINITY;
-		}
-
-		return 10.0f * log10f(1.0f / a_fMSE);
-	}
-
-
-}
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <math.h>
+
+namespace Etc
+{
+
+	// ----------------------------------------------------------------------------------------------------
+	// return true if vertical line
+	bool Regression(float a_afX[], float a_afY[], unsigned int a_Points,
+					float *a_fSlope, float *a_fOffset);
+
+	inline float ConvertMSEToPSNR(float a_fMSE)
+	{
+		if (a_fMSE == 0.0f)
+		{
+			return INFINITY;
+		}
+
+		return 10.0f * log10f(1.0f / a_fMSE);
+	}
+
+
+}

From b995d93d37df2aefa78392963eb45cdb4c8a03a1 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 18 Sep 2024 22:30:02 -0700
Subject: [PATCH 716/901] kram - update simd

optimize sse2neon to arm64 specific header.  Strip unneeded armv7.  Switch to faster and more precise ops.  No more approximate recip, rsqrt.  Faster to call div and sqrt now.
---
 build2/kram.xcodeproj/project.pbxproj |    6 +
 libkram/kram/KramConfig.h             |    2 +-
 libkram/kram/float4a.h                |   73 +-
 libkram/kram/sse2neon-arm64.h         | 7854 +++++++++++++++++++++++++
 libkram/kram/sse2neon.h               |  221 +-
 5 files changed, 8007 insertions(+), 149 deletions(-)
 create mode 100644 libkram/kram/sse2neon-arm64.h

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index 5c007f2d..4fd41b91 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -7,6 +7,8 @@
 	objects = {
 
 /* Begin PBXBuildFile section */
+		706706482C9B3BB30008F865 /* sse2neon-arm64.h in Headers */ = {isa = PBXBuildFile; fileRef = 706706472C9B3BB30008F865 /* sse2neon-arm64.h */; };
+		706706492C9B3BB30008F865 /* sse2neon-arm64.h in Headers */ = {isa = PBXBuildFile; fileRef = 706706472C9B3BB30008F865 /* sse2neon-arm64.h */; };
 		706EEF7F26D1595D001C950E /* EtcBlock4x4Encoding_RGB8.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDAA26D1583E001C950E /* EtcBlock4x4Encoding_RGB8.cpp */; };
 		706EEF8026D1595D001C950E /* EtcImage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDAC26D1583E001C950E /* EtcImage.cpp */; };
 		706EEF8126D1595D001C950E /* EtcDifferentialTrys.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDAF26D1583E001C950E /* EtcDifferentialTrys.cpp */; };
@@ -409,6 +411,7 @@
 /* End PBXBuildFile section */
 
 /* Begin PBXFileReference section */
+		706706472C9B3BB30008F865 /* sse2neon-arm64.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "sse2neon-arm64.h"; sourceTree = "<group>"; };
 		706ECDDE26D1577A001C950E /* libkram.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libkram.a; sourceTree = BUILT_PRODUCTS_DIR; };
 		706EEDAA26D1583E001C950E /* EtcBlock4x4Encoding_RGB8.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = EtcBlock4x4Encoding_RGB8.cpp; sourceTree = "<group>"; };
 		706EEDAB26D1583E001C950E /* EtcErrorMetric.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = EtcErrorMetric.h; sourceTree = "<group>"; };
@@ -1021,6 +1024,7 @@
 				706EEE1C26D1583F001C950E /* KramMipper.cpp */,
 				706EEE2D26D1583F001C950E /* win_mmap.h */,
 				706EEE2226D1583F001C950E /* sse2neon.h */,
+				706706472C9B3BB30008F865 /* sse2neon-arm64.h */,
 				706EEE3426D1583F001C950E /* float4a.h */,
 				706EEE2F26D1583F001C950E /* float4a.cpp */,
 				70D222E32AD22BED00B9EA23 /* BlockedLinearAllocator.h */,
@@ -1387,6 +1391,7 @@
 				70871DC927DDDBCD00D0B9E1 /* astcenc_vecmathlib_common_4.h in Headers */,
 				706EEFD626D15984001C950E /* EtcBlock4x4Encoding_R11.h in Headers */,
 				706EEFD726D15984001C950E /* EtcBlock4x4Encoding_RG11.h in Headers */,
+				706706492C9B3BB30008F865 /* sse2neon-arm64.h in Headers */,
 				706EEFD826D15984001C950E /* EtcMath.h in Headers */,
 				706EEFD926D15984001C950E /* EtcIndividualTrys.h in Headers */,
 				706EEFDA26D15984001C950E /* EtcBlock4x4EncodingBits.h in Headers */,
@@ -1500,6 +1505,7 @@
 				70871DCA27DDDBCD00D0B9E1 /* astcenc_vecmathlib_common_4.h in Headers */,
 				706EF15026D166C5001C950E /* EtcBlock4x4Encoding_R11.h in Headers */,
 				706EF15126D166C5001C950E /* EtcBlock4x4Encoding_RG11.h in Headers */,
+				706706482C9B3BB30008F865 /* sse2neon-arm64.h in Headers */,
 				706EF15226D166C5001C950E /* EtcMath.h in Headers */,
 				706EF15326D166C5001C950E /* EtcIndividualTrys.h in Headers */,
 				706EF15426D166C5001C950E /* EtcBlock4x4EncodingBits.h in Headers */,
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index 1d41e723..8bb2dc42 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -304,7 +304,7 @@ import std.regex;
 // to keep astcenc compiling
 #include <immintrin.h>  // AVX1
 #elif USE_NEON
-#include "sse2neon.h"
+#include "sse2neon-arm64.h"
 #endif
 
 // TODO: move half4 to it's own file, but always include it
diff --git a/libkram/kram/float4a.h b/libkram/kram/float4a.h
index 04ee5330..4932f8b3 100644
--- a/libkram/kram/float4a.h
+++ b/libkram/kram/float4a.h
@@ -15,7 +15,7 @@
 
 // this is also defined in KramConfig.h, but this keeps file independent
 #if USE_NEON
-#include "sse2neon.h"
+#include "sse2neon-arm64.h"
 #else
 //#include <smmintrin.h> // SSE4.1, and includes all before it
 #include <immintrin.h>  // AVX
@@ -27,32 +27,33 @@
 namespace simd {
 
 #if USE_NEON
-#define _mm_fixzero_ps(a, b) _mm_and_ps(a, _mm_cmpneq_ps(b, _mm_setzero_ps()))
+//#define _mm_fixzero_ps(a, b) _mm_and_ps(a, _mm_cmpneq_ps(b, _mm_setzero_ps()))
 
+// removing these
 // rqrt (high precision)
-inline float32x4_t _mm_rsqrthp_ps(const float32x4_t& a)
-{
-    float32x4_t est = vrsqrteq_f32(a);
-
-    est = _mm_fixzero_ps(est, a);
-
-    // newton raphson
-    float32x4_t stepA = vrsqrtsq_f32(a, vmulq_f32(est, est));  // xn+1 = xn(3-dxn*dxn)/2
-
-    return _mm_mul_ps(est, stepA);
-}
-
-// recip
-inline float32x4_t _mm_rcphp_ps(const float32x4_t& a)
-{
-    float32x4_t est = vrecpeq_f32(a);
-
-    est = _mm_fixzero_ps(est, a);
-
-    float32x4_t stepA = vrecpsq_f32(est, a);  // xn+1 = xn(2-dxn)
-
-    return _mm_mul_ps(est, stepA);
-}
+//inline float32x4_t _mm_rsqrthp_ps(const float32x4_t& a)
+//{
+//    float32x4_t est = vrsqrteq_f32(a);
+//
+//    est = _mm_fixzero_ps(est, a);
+//
+//    // newton raphson
+//    float32x4_t stepA = vrsqrtsq_f32(a, vmulq_f32(est, est));  // xn+1 = xn(3-dxn*dxn)/2
+//
+//    return _mm_mul_ps(est, stepA);
+//}
+//
+//// recip
+//inline float32x4_t _mm_rcphp_ps(const float32x4_t& a)
+//{
+//    float32x4_t est = vrecpeq_f32(a);
+//
+//    est = _mm_fixzero_ps(est, a);
+//
+//    float32x4_t stepA = vrecpsq_f32(est, a);  // xn+1 = xn(2-dxn)
+//
+//    return _mm_mul_ps(est, stepA);
+//}
 
 #else
 
@@ -61,6 +62,7 @@ inline float32x4_t _mm_rcphp_ps(const float32x4_t& a)
 
 #define _mm_fixzero_ps(a, b) _mm_and_ps(a, _mm_cmpneq_ps(b, _mm_setzero_ps()))
 
+/* eliminating these
 inline float32x4_t _mm_rsqrthp_ps(const float32x4_t& a)
 {
     static const float32x4_t kHalf = _mm_set1_ps(0.5f);
@@ -108,6 +110,7 @@ inline float32x4_t _mm_rcphp_ps(const float32x4_t& a)
 #define _mm_rsqrthp_ss(a) _mm_setx_ps(a, _mm_rsqrthp_ps(a))
 #define _mm_rcphp_ss(a) _mm_setx_ps(a, _mm_rcphp_ps(a))
 
+ */
 #endif
 
 //---------------------------------------------------------------------------------------
@@ -118,28 +121,18 @@ using tSwizzle = uint32_t;
 #define macroSwizzle(x, y, z, w) _MM_SHUFFLE(w, z, y, x)
 
 // replicate a lane into a new vector
+// This can already be done with clang vector types much better.  v.x or v.xxxx
 #define _mm_splatx_ps(v) _mm_shuffle_ps(v, v, macroSwizzle(0, 0, 0, 0))
 #define _mm_splaty_ps(v) _mm_shuffle_ps(v, v, macroSwizzle(1, 1, 1, 1))
 #define _mm_splatz_ps(v) _mm_shuffle_ps(v, v, macroSwizzle(2, 2, 2, 2))
 #define _mm_splatw_ps(v) _mm_shuffle_ps(v, v, macroSwizzle(3, 3, 3, 3))
 
-// dot product app with horizontal adds, without using _mm_hadd_ps()
+// dot product app with horizontal adds
 inline float32x4_t _mm_hadd4_ps(const float32x4_t& r)
 {
-#if 0  // SSE1
-//    // use for hpadd
-//    static const tSwizzle kSwizzleYYZW = macroSwizzle(1, 1, 2, 3);
-//    //static const tSwizzle kSwizzleZYZW = macroSwizzle(2,1,2,3);
-//    static const tSwizzle kSwizzleWZZW = macroSwizzle(3, 2, 2, 3);
-//
-//    float32x4_t t = _mm_add_ps(r, _mm_shuffle_ps(r, r, kSwizzleWZZW));  // xy + wz
-//    t = _mm_add_ss(t, _mm_shuffle_ps(t, t, kSwizzleYYZW));              // x + y
-//    return t;
-#else  // SSE3
     float32x4_t t = _mm_hadd_ps(r, r);  // xy + wz
     t = _mm_hadd_ps(t, t);              // x + y
     return t;
-#endif
 }
 
 static const uint32_t kSignBitsF32x4i = {0x80000000};
@@ -156,7 +149,7 @@ static const float32x4_t kOnesF32x4 = _mm_set1_ps(1.0f);
 
 //---------------------------------------------------------------------------------------
 
-// Note float3 should be it's own type, but it should be float4 in size.
+// Note float3 should be its own type, but it should be float4 in size.
 // float2 is harder since on Neon, it supports a float2 data structure.
 // Needs SSE4.1, but that's most of the processors these days.
 class float4 {
@@ -338,11 +331,11 @@ inline float4 max(const float4& lhs, const float4& rhs)
 // do 4 of these at once
 inline float4 recip(const float4& vv)
 {
-    return float4(_mm_rcphp_ps(vv.reg));
+    return floar4(1.0f/vv.reg); // _mm_rcphp_ps(vv.reg));
 }
 inline float4 rsqrt(const float4& vv)
 {
-    return float4(_mm_rsqrthp_ps(vv.reg));
+    return float4(1.0f/_mm_sqrt_ps(vv.reg)); // _mm_rsqrthp_ps(vv.reg));
 }
 inline float4 sqrt(const float4& vv)
 {
diff --git a/libkram/kram/sse2neon-arm64.h b/libkram/kram/sse2neon-arm64.h
new file mode 100644
index 00000000..1ab66164
--- /dev/null
+++ b/libkram/kram/sse2neon-arm64.h
@@ -0,0 +1,7854 @@
+#ifndef SSE2NEON_H
+#define SSE2NEON_H
+
+//#include <TargetConditionals.h>
+//#if TARGET_OS_MACCATALYST
+//#warning - this code won't compile for iOS MacCatalyst, switch target.
+//#endif
+
+/*
+ * sse2neon is freely redistributable under the MIT License.
+ *
+ * Copyright (c) 2015-2024 SSE2NEON Contributors.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+// This header file provides a simple API translation layer
+// between SSE intrinsics to their corresponding Arm/Aarch64 NEON versions
+//
+// Contributors to this work are:
+//   John W. Ratcliff <jratcliffscarab@gmail.com>
+//   Brandon Rowlett <browlett@nvidia.com>
+//   Ken Fast <kfast@gdeb.com>
+//   Eric van Beurden <evanbeurden@nvidia.com>
+//   Alexander Potylitsin <apotylitsin@nvidia.com>
+//   Hasindu Gamaarachchi <hasindu2008@gmail.com>
+//   Jim Huang <jserv@ccns.ncku.edu.tw>
+//   Mark Cheng <marktwtn@gmail.com>
+//   Malcolm James MacLeod <malcolm@gulden.com>
+//   Devin Hussey (easyaspi314) <husseydevin@gmail.com>
+//   Sebastian Pop <spop@amazon.com>
+//   Developer Ecosystem Engineering <DeveloperEcosystemEngineering@apple.com>
+//   Danila Kutenin <danilak@google.com>
+//   François Turban (JishinMaster) <francois.turban@gmail.com>
+//   Pei-Hsuan Hung <afcidk@gmail.com>
+//   Yang-Hao Yuan <yuanyanghau@gmail.com>
+//   Syoyo Fujita <syoyo@lighttransport.com>
+//   Brecht Van Lommel <brecht@blender.org>
+//   Jonathan Hue <jhue@adobe.com>
+//   Cuda Chen <clh960524@gmail.com>
+//   Aymen Qader <aymen.qader@arm.com>
+//   Anthony Roberts <anthony.roberts@linaro.org>
+
+/* Tunable configurations */
+
+/* Enable precise implementation of math operations
+ * This would slow down the computation a bit, but gives consistent result with
+ * x86 SSE. (e.g. would solve a hole or NaN pixel in the rendering result)
+ */
+/* _mm_min|max_ps|ss|pd|sd */
+#ifndef SSE2NEON_PRECISE_MINMAX
+#define SSE2NEON_PRECISE_MINMAX (0)
+#endif
+/* _mm_rcp_ps */
+#ifndef SSE2NEON_PRECISE_DIV
+#define SSE2NEON_PRECISE_DIV (0)
+#endif
+/* _mm_sqrt_ps and _mm_rsqrt_ps */
+#ifndef SSE2NEON_PRECISE_SQRT
+#define SSE2NEON_PRECISE_SQRT (0)
+#endif
+/* _mm_dp_pd */
+#ifndef SSE2NEON_PRECISE_DP
+#define SSE2NEON_PRECISE_DP (0)
+#endif
+
+/* Enable inclusion of windows.h on MSVC platforms
+ * This makes _mm_clflush functional on windows, as there is no builtin.
+ */
+#ifndef SSE2NEON_INCLUDE_WINDOWS_H
+#define SSE2NEON_INCLUDE_WINDOWS_H (0)
+#endif
+
+/* compiler specific definitions */
+#pragma push_macro("FORCE_INLINE")
+#pragma push_macro("ALIGN_STRUCT")
+#define FORCE_INLINE static inline __attribute__((always_inline))
+#define ALIGN_STRUCT(x) __attribute__((aligned(x)))
+#define _sse2neon_likely(x) __builtin_expect(!!(x), 1)
+#define _sse2neon_unlikely(x) __builtin_expect(!!(x), 0)
+
+
+//#if defined(__GNUC__) && !defined(__clang__)
+//#pragma push_macro("FORCE_INLINE_OPTNONE")
+//#define FORCE_INLINE_OPTNONE static inline __attribute__((optimize("O0")))
+//#elif defined(__clang__)
+#pragma push_macro("FORCE_INLINE_OPTNONE")
+#define FORCE_INLINE_OPTNONE static inline __attribute__((optnone))
+//#else
+//#define FORCE_INLINE_OPTNONE FORCE_INLINE
+//#endif
+
+//#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 10
+//#warning "GCC versions earlier than 10 are not supported."
+//#endif
+
+/* C language does not allow initializing a variable with a function call. */
+#ifdef __cplusplus
+#define _sse2neon_const static const
+#else
+#define _sse2neon_const const
+#endif
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+FORCE_INLINE double sse2neon_recast_u64_f64(uint64_t u64)
+{
+    double f64;
+    memcpy(&f64, &u64, sizeof(uint64_t));
+    return f64;
+}
+FORCE_INLINE int64_t sse2neon_recast_f64_s64(double f64)
+{
+    int64_t i64;
+    memcpy(&i64, &f64, sizeof(uint64_t));
+    return i64;
+}
+
+//#if defined(_WIN32)
+///* Definitions for _mm_{malloc,free} are provided by <malloc.h>
+// * from both MinGW-w64 and MSVC.
+// */
+//#define SSE2NEON_ALLOC_DEFINED
+//#endif
+//
+///* If using MSVC */
+//#ifdef _MSC_VER
+//#include <intrin.h>
+//#if SSE2NEON_INCLUDE_WINDOWS_H
+//#include <processthreadsapi.h>
+//#include <windows.h>
+//#endif
+
+#if !defined(__cplusplus)
+#error SSE2NEON only supports C++ compilation with this compiler
+#endif
+
+//#ifdef SSE2NEON_ALLOC_DEFINED
+//#include <malloc.h>
+//#endif
+
+//#if (defined(_M_AMD64) || defined(__x86_64__)) || \
+//    (defined(_M_ARM64) || defined(__arm64__))
+//#define SSE2NEON_HAS_BITSCAN64
+//#endif
+//#endif
+
+#define _sse2neon_define0(type, s, body) \
+    __extension__({                      \
+        type _a = (s);                   \
+        body                             \
+    })
+#define _sse2neon_define1(type, s, body) \
+    __extension__({                      \
+        type _a = (s);                   \
+        body                             \
+    })
+#define _sse2neon_define2(type, a, b, body) \
+    __extension__({                         \
+        type _a = (a), _b = (b);            \
+        body                                \
+    })
+#define _sse2neon_return(ret) (ret)
+
+#define _sse2neon_init(...) \
+    {                       \
+        __VA_ARGS__         \
+    }
+
+/* Compiler barrier */
+#define SSE2NEON_BARRIER()                     \
+    do {                                       \
+        __asm__ __volatile__("" ::: "memory"); \
+        (void) 0;                              \
+    } while (0)
+
+/* Memory barriers
+ * __atomic_thread_fence does not include a compiler barrier; instead,
+ * the barrier is part of __atomic_load/__atomic_store's "volatile-like"
+ * semantics.
+ */
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)
+#include <stdatomic.h>
+#endif
+
+FORCE_INLINE void _sse2neon_smp_mb(void)
+{
+    SSE2NEON_BARRIER();
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \
+    !defined(__STDC_NO_ATOMICS__)
+    atomic_thread_fence(memory_order_seq_cst);
+#else
+    __atomic_thread_fence(__ATOMIC_SEQ_CST);
+#endif
+}
+
+/* Architecture-specific build options */
+/* FIXME: #pragma GCC push_options is only available on GCC */
+#if defined(__GNUC__)
+#if defined(__arm__) && __ARM_ARCH == 7
+/* According to ARM C Language Extensions Architecture specification,
+ * __ARM_NEON is defined to a value indicating the Advanced SIMD (NEON)
+ * architecture supported.
+ */
+#if !defined(__ARM_NEON) || !defined(__ARM_NEON__)
+#error "You must enable NEON instructions (e.g. -mfpu=neon) to use SSE2NEON."
+#endif
+#else
+#endif
+#endif
+
+#include <arm_neon.h>
+
+/* Apple Silicon cache lines are double of what is commonly used by Intel, AMD
+ * and other Arm microarchitectures use.
+ * From sysctl -a on Apple M1:
+ * hw.cachelinesize: 128
+ */
+#if defined(__APPLE__) && (defined(__aarch64__) || defined(__arm64__))
+#define SSE2NEON_CACHELINE_SIZE 128
+#else
+#define SSE2NEON_CACHELINE_SIZE 64
+#endif
+
+/* Rounding functions require either Aarch64 instructions or libm fallback */
+
+/* On ARMv7, some registers, such as PMUSERENR and PMCCNTR, are read-only
+ * or even not accessible in user mode.
+ * To write or access to these registers in user mode,
+ * we have to perform syscall instead.
+ */
+
+/* "__has_builtin" can be used to query support for built-in functions
+ * provided by gcc/clang and other compilers that support it.
+ */
+//#ifndef __has_builtin /* GCC prior to 10 or non-clang compilers */
+///* Compatibility with gcc <= 9 */
+//#if defined(__GNUC__) && (__GNUC__ <= 9)
+//#define __has_builtin(x) HAS##x
+//#define HAS__builtin_popcount 1
+//#define HAS__builtin_popcountll 1
+//
+//// __builtin_shuffle introduced in GCC 4.7.0
+//#if (__GNUC__ >= 5) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 7))
+//#define HAS__builtin_shuffle 1
+//#else
+//#define HAS__builtin_shuffle 0
+//#endif
+//
+//#define HAS__builtin_shufflevector 0
+//#define HAS__builtin_nontemporal_store 0
+//#else
+//#define __has_builtin(x) 0
+//#endif
+//#endif
+
+/**
+ * MACRO for shuffle parameter for _mm_shuffle_ps().
+ * Argument fp3 is a digit[0123] that represents the fp from argument "b"
+ * of mm_shuffle_ps that will be placed in fp3 of result. fp2 is the same
+ * for fp2 in result. fp1 is a digit[0123] that represents the fp from
+ * argument "a" of mm_shuffle_ps that will be places in fp1 of result.
+ * fp0 is the same for fp0 of result.
+ */
+#define _MM_SHUFFLE(fp3, fp2, fp1, fp0) \
+    (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0)))
+
+#if __has_builtin(__builtin_shufflevector)
+#define _sse2neon_shuffle(type, a, b, ...) \
+    __builtin_shufflevector(a, b, __VA_ARGS__)
+#elif __has_builtin(__builtin_shuffle)
+#define _sse2neon_shuffle(type, a, b, ...) \
+    __extension__({                        \
+        type tmp = {__VA_ARGS__};          \
+        __builtin_shuffle(a, b, tmp);      \
+    })
+#endif
+
+#ifdef _sse2neon_shuffle
+#define vshuffle_s16(a, b, ...) _sse2neon_shuffle(int16x4_t, a, b, __VA_ARGS__)
+#define vshuffleq_s16(a, b, ...) _sse2neon_shuffle(int16x8_t, a, b, __VA_ARGS__)
+#define vshuffle_s32(a, b, ...) _sse2neon_shuffle(int32x2_t, a, b, __VA_ARGS__)
+#define vshuffleq_s32(a, b, ...) _sse2neon_shuffle(int32x4_t, a, b, __VA_ARGS__)
+#define vshuffle_s64(a, b, ...) _sse2neon_shuffle(int64x1_t, a, b, __VA_ARGS__)
+#define vshuffleq_s64(a, b, ...) _sse2neon_shuffle(int64x2_t, a, b, __VA_ARGS__)
+#endif
+
+/* Rounding mode macros. */
+#define _MM_FROUND_TO_NEAREST_INT 0x00
+#define _MM_FROUND_TO_NEG_INF 0x01
+#define _MM_FROUND_TO_POS_INF 0x02
+#define _MM_FROUND_TO_ZERO 0x03
+#define _MM_FROUND_CUR_DIRECTION 0x04
+#define _MM_FROUND_NO_EXC 0x08
+#define _MM_FROUND_RAISE_EXC 0x00
+#define _MM_FROUND_NINT (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC)
+#define _MM_FROUND_FLOOR (_MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC)
+#define _MM_FROUND_CEIL (_MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC)
+#define _MM_FROUND_TRUNC (_MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC)
+#define _MM_FROUND_RINT (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC)
+#define _MM_FROUND_NEARBYINT (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC)
+#define _MM_ROUND_NEAREST 0x0000
+#define _MM_ROUND_DOWN 0x2000
+#define _MM_ROUND_UP 0x4000
+#define _MM_ROUND_TOWARD_ZERO 0x6000
+/* Flush zero mode macros. */
+#define _MM_FLUSH_ZERO_MASK 0x8000
+#define _MM_FLUSH_ZERO_ON 0x8000
+#define _MM_FLUSH_ZERO_OFF 0x0000
+/* Denormals are zeros mode macros. */
+#define _MM_DENORMALS_ZERO_MASK 0x0040
+#define _MM_DENORMALS_ZERO_ON 0x0040
+#define _MM_DENORMALS_ZERO_OFF 0x0000
+
+/* indicate immediate constant argument in a given range */
+#define __constrange(a, b) const
+
+/* A few intrinsics accept traditional data types like ints or floats, but
+ * most operate on data types that are specific to SSE.
+ * If a vector type ends in d, it contains doubles, and if it does not have
+ * a suffix, it contains floats. An integer vector type can contain any type
+ * of integer, from chars to shorts to unsigned long longs.
+ */
+typedef int64x1_t __m64;
+typedef float32x4_t __m128; /* 128-bit vector containing 4 floats */
+// On ARM 32-bit architecture, the float64x2_t is not supported.
+// The data type __m128d should be represented in a different way for related
+// intrinsic conversion.
+typedef float64x2_t __m128d; /* 128-bit vector containing 2 doubles */
+typedef int64x2_t __m128i; /* 128-bit vector containing integers */
+
+// Some intrinsics operate on unaligned data types.
+typedef int16_t ALIGN_STRUCT(1) unaligned_int16_t;
+typedef int32_t ALIGN_STRUCT(1) unaligned_int32_t;
+typedef int64_t ALIGN_STRUCT(1) unaligned_int64_t;
+
+// __int64 is defined in the Intrinsics Guide which maps to different datatype
+// in different data model
+#if !(defined(_WIN32) || defined(_WIN64) || defined(__int64))
+#if (defined(__x86_64__) || defined(__i386__))
+#define __int64 long long
+#else
+#define __int64 int64_t
+#endif
+#endif
+
+/* type-safe casting between types */
+
+#define vreinterpretq_m128_f16(x) vreinterpretq_f32_f16(x)
+#define vreinterpretq_m128_f32(x) (x)
+#define vreinterpretq_m128_f64(x) vreinterpretq_f32_f64(x)
+
+#define vreinterpretq_m128_u8(x) vreinterpretq_f32_u8(x)
+#define vreinterpretq_m128_u16(x) vreinterpretq_f32_u16(x)
+#define vreinterpretq_m128_u32(x) vreinterpretq_f32_u32(x)
+#define vreinterpretq_m128_u64(x) vreinterpretq_f32_u64(x)
+
+#define vreinterpretq_m128_s8(x) vreinterpretq_f32_s8(x)
+#define vreinterpretq_m128_s16(x) vreinterpretq_f32_s16(x)
+#define vreinterpretq_m128_s32(x) vreinterpretq_f32_s32(x)
+#define vreinterpretq_m128_s64(x) vreinterpretq_f32_s64(x)
+
+#define vreinterpretq_f16_m128(x) vreinterpretq_f16_f32(x)
+#define vreinterpretq_f32_m128(x) (x)
+#define vreinterpretq_f64_m128(x) vreinterpretq_f64_f32(x)
+
+#define vreinterpretq_u8_m128(x) vreinterpretq_u8_f32(x)
+#define vreinterpretq_u16_m128(x) vreinterpretq_u16_f32(x)
+#define vreinterpretq_u32_m128(x) vreinterpretq_u32_f32(x)
+#define vreinterpretq_u64_m128(x) vreinterpretq_u64_f32(x)
+
+#define vreinterpretq_s8_m128(x) vreinterpretq_s8_f32(x)
+#define vreinterpretq_s16_m128(x) vreinterpretq_s16_f32(x)
+#define vreinterpretq_s32_m128(x) vreinterpretq_s32_f32(x)
+#define vreinterpretq_s64_m128(x) vreinterpretq_s64_f32(x)
+
+#define vreinterpretq_m128i_s8(x) vreinterpretq_s64_s8(x)
+#define vreinterpretq_m128i_s16(x) vreinterpretq_s64_s16(x)
+#define vreinterpretq_m128i_s32(x) vreinterpretq_s64_s32(x)
+#define vreinterpretq_m128i_s64(x) (x)
+
+#define vreinterpretq_m128i_u8(x) vreinterpretq_s64_u8(x)
+#define vreinterpretq_m128i_u16(x) vreinterpretq_s64_u16(x)
+#define vreinterpretq_m128i_u32(x) vreinterpretq_s64_u32(x)
+#define vreinterpretq_m128i_u64(x) vreinterpretq_s64_u64(x)
+
+#define vreinterpretq_f32_m128i(x) vreinterpretq_f32_s64(x)
+#define vreinterpretq_f64_m128i(x) vreinterpretq_f64_s64(x)
+
+#define vreinterpretq_s8_m128i(x) vreinterpretq_s8_s64(x)
+#define vreinterpretq_s16_m128i(x) vreinterpretq_s16_s64(x)
+#define vreinterpretq_s32_m128i(x) vreinterpretq_s32_s64(x)
+#define vreinterpretq_s64_m128i(x) (x)
+
+#define vreinterpretq_u8_m128i(x) vreinterpretq_u8_s64(x)
+#define vreinterpretq_u16_m128i(x) vreinterpretq_u16_s64(x)
+#define vreinterpretq_u32_m128i(x) vreinterpretq_u32_s64(x)
+#define vreinterpretq_u64_m128i(x) vreinterpretq_u64_s64(x)
+
+#define vreinterpret_m64_s8(x) vreinterpret_s64_s8(x)
+#define vreinterpret_m64_s16(x) vreinterpret_s64_s16(x)
+#define vreinterpret_m64_s32(x) vreinterpret_s64_s32(x)
+#define vreinterpret_m64_s64(x) (x)
+
+#define vreinterpret_m64_u8(x) vreinterpret_s64_u8(x)
+#define vreinterpret_m64_u16(x) vreinterpret_s64_u16(x)
+#define vreinterpret_m64_u32(x) vreinterpret_s64_u32(x)
+#define vreinterpret_m64_u64(x) vreinterpret_s64_u64(x)
+
+#define vreinterpret_m64_f16(x) vreinterpret_s64_f16(x)
+#define vreinterpret_m64_f32(x) vreinterpret_s64_f32(x)
+#define vreinterpret_m64_f64(x) vreinterpret_s64_f64(x)
+
+#define vreinterpret_u8_m64(x) vreinterpret_u8_s64(x)
+#define vreinterpret_u16_m64(x) vreinterpret_u16_s64(x)
+#define vreinterpret_u32_m64(x) vreinterpret_u32_s64(x)
+#define vreinterpret_u64_m64(x) vreinterpret_u64_s64(x)
+
+#define vreinterpret_s8_m64(x) vreinterpret_s8_s64(x)
+#define vreinterpret_s16_m64(x) vreinterpret_s16_s64(x)
+#define vreinterpret_s32_m64(x) vreinterpret_s32_s64(x)
+#define vreinterpret_s64_m64(x) (x)
+
+#define vreinterpret_f32_m64(x) vreinterpret_f32_s64(x)
+
+#define vreinterpretq_m128d_s32(x) vreinterpretq_f64_s32(x)
+#define vreinterpretq_m128d_s64(x) vreinterpretq_f64_s64(x)
+
+#define vreinterpretq_m128d_u64(x) vreinterpretq_f64_u64(x)
+
+#define vreinterpretq_m128d_f32(x) vreinterpretq_f64_f32(x)
+#define vreinterpretq_m128d_f64(x) (x)
+
+#define vreinterpretq_s64_m128d(x) vreinterpretq_s64_f64(x)
+
+#define vreinterpretq_u32_m128d(x) vreinterpretq_u32_f64(x)
+#define vreinterpretq_u64_m128d(x) vreinterpretq_u64_f64(x)
+
+#define vreinterpretq_f64_m128d(x) (x)
+#define vreinterpretq_f32_m128d(x) vreinterpretq_f32_f64(x)
+
+// A struct is defined in this header file called 'SIMDVec' which can be used
+// by applications which attempt to access the contents of an __m128 struct
+// directly.  It is important to note that accessing the __m128 struct directly
+// is bad coding practice by Microsoft: @see:
+// https://learn.microsoft.com/en-us/cpp/cpp/m128
+//
+// However, some legacy source code may try to access the contents of an __m128
+// struct directly so the developer can use the SIMDVec as an alias for it.  Any
+// casting must be done manually by the developer, as you cannot cast or
+// otherwise alias the base NEON data type for intrinsic operations.
+//
+// union intended to allow direct access to an __m128 variable using the names
+// that the MSVC compiler provides.  This union should really only be used when
+// trying to access the members of the vector as integer values.  GCC/clang
+// allow native access to the float members through a simple array access
+// operator (in C since 4.6, in C++ since 4.8).
+//
+// Ideally direct accesses to SIMD vectors should not be used since it can cause
+// a performance hit.  If it really is needed however, the original __m128
+// variable can be aliased with a pointer to this union and used to access
+// individual components.  The use of this union should be hidden behind a macro
+// that is used throughout the codebase to access the members instead of always
+// declaring this type of variable.
+typedef union ALIGN_STRUCT(16) SIMDVec {
+    float m128_f32[4];     // as floats - DON'T USE. Added for convenience.
+    int8_t m128_i8[16];    // as signed 8-bit integers.
+    int16_t m128_i16[8];   // as signed 16-bit integers.
+    int32_t m128_i32[4];   // as signed 32-bit integers.
+    int64_t m128_i64[2];   // as signed 64-bit integers.
+    uint8_t m128_u8[16];   // as unsigned 8-bit integers.
+    uint16_t m128_u16[8];  // as unsigned 16-bit integers.
+    uint32_t m128_u32[4];  // as unsigned 32-bit integers.
+    uint64_t m128_u64[2];  // as unsigned 64-bit integers.
+} SIMDVec;
+
+// casting using SIMDVec
+#define vreinterpretq_nth_u64_m128i(x, n) (((SIMDVec *) &x)->m128_u64[n])
+#define vreinterpretq_nth_u32_m128i(x, n) (((SIMDVec *) &x)->m128_u32[n])
+#define vreinterpretq_nth_u8_m128i(x, n) (((SIMDVec *) &x)->m128_u8[n])
+
+/* SSE macros */
+#define _MM_GET_FLUSH_ZERO_MODE _sse2neon_mm_get_flush_zero_mode
+#define _MM_SET_FLUSH_ZERO_MODE _sse2neon_mm_set_flush_zero_mode
+#define _MM_GET_DENORMALS_ZERO_MODE _sse2neon_mm_get_denormals_zero_mode
+#define _MM_SET_DENORMALS_ZERO_MODE _sse2neon_mm_set_denormals_zero_mode
+
+// Function declaration
+// SSE
+FORCE_INLINE unsigned int _MM_GET_ROUNDING_MODE(void);
+FORCE_INLINE __m128 _mm_move_ss(__m128, __m128);
+FORCE_INLINE __m128 _mm_or_ps(__m128, __m128);
+FORCE_INLINE __m128 _mm_set_ps1(float);
+FORCE_INLINE __m128 _mm_setzero_ps(void);
+// SSE2
+FORCE_INLINE __m128i _mm_and_si128(__m128i, __m128i);
+FORCE_INLINE __m128i _mm_castps_si128(__m128);
+FORCE_INLINE __m128i _mm_cmpeq_epi32(__m128i, __m128i);
+FORCE_INLINE __m128i _mm_cvtps_epi32(__m128);
+FORCE_INLINE __m128d _mm_move_sd(__m128d, __m128d);
+FORCE_INLINE __m128i _mm_or_si128(__m128i, __m128i);
+FORCE_INLINE __m128i _mm_set_epi32(int, int, int, int);
+FORCE_INLINE __m128i _mm_set_epi64x(int64_t, int64_t);
+FORCE_INLINE __m128d _mm_set_pd(double, double);
+FORCE_INLINE __m128i _mm_set1_epi32(int);
+FORCE_INLINE __m128i _mm_setzero_si128(void);
+// SSE4.1
+FORCE_INLINE __m128d _mm_ceil_pd(__m128d);
+FORCE_INLINE __m128 _mm_ceil_ps(__m128);
+FORCE_INLINE __m128d _mm_floor_pd(__m128d);
+FORCE_INLINE __m128 _mm_floor_ps(__m128);
+FORCE_INLINE_OPTNONE __m128d _mm_round_pd(__m128d, int);
+FORCE_INLINE_OPTNONE __m128 _mm_round_ps(__m128, int);
+// SSE4.2
+FORCE_INLINE uint32_t _mm_crc32_u8(uint32_t, uint8_t);
+
+/* Backwards compatibility for compilers with lack of specific type support */
+
+// Older gcc does not define vld1q_u8_x4 type
+//#if defined(__GNUC__) && !defined(__clang__) &&                        \
+//    ((__GNUC__ <= 13 && defined(__arm__)) ||                           \
+//     (__GNUC__ == 10 && __GNUC_MINOR__ < 3 && defined(__aarch64__)) || \
+//     (__GNUC__ <= 9 && defined(__aarch64__)))
+//FORCE_INLINE uint8x16x4_t _sse2neon_vld1q_u8_x4(const uint8_t *p)
+//{
+//    uint8x16x4_t ret;
+//    ret.val[0] = vld1q_u8(p + 0);
+//    ret.val[1] = vld1q_u8(p + 16);
+//    ret.val[2] = vld1q_u8(p + 32);
+//    ret.val[3] = vld1q_u8(p + 48);
+//    return ret;
+//}
+//#else
+// Wraps vld1q_u8_x4
+FORCE_INLINE uint8x16x4_t _sse2neon_vld1q_u8_x4(const uint8_t *p)
+{
+    return vld1q_u8_x4(p);
+}
+//#endif
+
+// Wraps vaddv_u8
+FORCE_INLINE uint8_t _sse2neon_vaddv_u8(uint8x8_t v8)
+{
+    return vaddv_u8(v8);
+}
+
+// Wraps vaddvq_u8
+FORCE_INLINE uint8_t _sse2neon_vaddvq_u8(uint8x16_t a)
+{
+    return vaddvq_u8(a);
+}
+
+// Wraps vaddvq_u16
+FORCE_INLINE uint16_t _sse2neon_vaddvq_u16(uint16x8_t a)
+{
+    return vaddvq_u16(a);
+}
+
+/* Function Naming Conventions
+ * The naming convention of SSE intrinsics is straightforward. A generic SSE
+ * intrinsic function is given as follows:
+ *   _mm_<name>_<data_type>
+ *
+ * The parts of this format are given as follows:
+ * 1. <name> describes the operation performed by the intrinsic
+ * 2. <data_type> identifies the data type of the function's primary arguments
+ *
+ * This last part, <data_type>, is a little complicated. It identifies the
+ * content of the input values, and can be set to any of the following values:
+ * + ps - vectors contain floats (ps stands for packed single-precision)
+ * + pd - vectors contain doubles (pd stands for packed double-precision)
+ * + epi8/epi16/epi32/epi64 - vectors contain 8-bit/16-bit/32-bit/64-bit
+ *                            signed integers
+ * + epu8/epu16/epu32/epu64 - vectors contain 8-bit/16-bit/32-bit/64-bit
+ *                            unsigned integers
+ * + si128 - unspecified 128-bit vector or 256-bit vector
+ * + m128/m128i/m128d - identifies input vector types when they are different
+ *                      than the type of the returned vector
+ *
+ * For example, _mm_setzero_ps. The _mm implies that the function returns
+ * a 128-bit vector. The _ps at the end implies that the argument vectors
+ * contain floats.
+ *
+ * A complete example: Byte Shuffle - pshufb (_mm_shuffle_epi8)
+ *   // Set packed 16-bit integers. 128 bits, 8 short, per 16 bits
+ *   __m128i v_in = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
+ *   // Set packed 8-bit integers
+ *   // 128 bits, 16 chars, per 8 bits
+ *   __m128i v_perm = _mm_setr_epi8(1, 0,  2,  3, 8, 9, 10, 11,
+ *                                  4, 5, 12, 13, 6, 7, 14, 15);
+ *   // Shuffle packed 8-bit integers
+ *   __m128i v_out = _mm_shuffle_epi8(v_in, v_perm); // pshufb
+ */
+
+/* Constants for use with _mm_prefetch. */
+enum _mm_hint {
+    _MM_HINT_NTA = 0, /* load data to L1 and L2 cache, mark it as NTA */
+    _MM_HINT_T0 = 1,  /* load data to L1 and L2 cache */
+    _MM_HINT_T1 = 2,  /* load data to L2 cache only */
+    _MM_HINT_T2 = 3,  /* load data to L2 cache only, mark it as NTA */
+};
+
+// The bit field mapping to the FPCR(floating-point control register)
+typedef struct {
+    uint16_t res0;
+    uint8_t res1 : 6;
+    uint8_t bit22 : 1;
+    uint8_t bit23 : 1;
+    uint8_t bit24 : 1;
+    uint8_t res2 : 7;
+    uint32_t res3;
+} fpcr_bitfield;
+
+// Takes the upper 64 bits of a and places it in the low end of the result
+// Takes the lower 64 bits of b and places it into the high end of the result.
+FORCE_INLINE __m128 _mm_shuffle_ps_1032(__m128 a, __m128 b)
+{
+    float32x2_t a32 = vget_high_f32(vreinterpretq_f32_m128(a));
+    float32x2_t b10 = vget_low_f32(vreinterpretq_f32_m128(b));
+    return vreinterpretq_m128_f32(vcombine_f32(a32, b10));
+}
+
+// takes the lower two 32-bit values from a and swaps them and places in high
+// end of result takes the higher two 32 bit values from b and swaps them and
+// places in low end of result.
+FORCE_INLINE __m128 _mm_shuffle_ps_2301(__m128 a, __m128 b)
+{
+    float32x2_t a01 = vrev64_f32(vget_low_f32(vreinterpretq_f32_m128(a)));
+    float32x2_t b23 = vrev64_f32(vget_high_f32(vreinterpretq_f32_m128(b)));
+    return vreinterpretq_m128_f32(vcombine_f32(a01, b23));
+}
+
+FORCE_INLINE __m128 _mm_shuffle_ps_0321(__m128 a, __m128 b)
+{
+    float32x2_t a21 = vget_high_f32(
+        vextq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a), 3));
+    float32x2_t b03 = vget_low_f32(
+        vextq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b), 3));
+    return vreinterpretq_m128_f32(vcombine_f32(a21, b03));
+}
+
+FORCE_INLINE __m128 _mm_shuffle_ps_2103(__m128 a, __m128 b)
+{
+    float32x2_t a03 = vget_low_f32(
+        vextq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a), 3));
+    float32x2_t b21 = vget_high_f32(
+        vextq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b), 3));
+    return vreinterpretq_m128_f32(vcombine_f32(a03, b21));
+}
+
+FORCE_INLINE __m128 _mm_shuffle_ps_1010(__m128 a, __m128 b)
+{
+    float32x2_t a10 = vget_low_f32(vreinterpretq_f32_m128(a));
+    float32x2_t b10 = vget_low_f32(vreinterpretq_f32_m128(b));
+    return vreinterpretq_m128_f32(vcombine_f32(a10, b10));
+}
+
+FORCE_INLINE __m128 _mm_shuffle_ps_1001(__m128 a, __m128 b)
+{
+    float32x2_t a01 = vrev64_f32(vget_low_f32(vreinterpretq_f32_m128(a)));
+    float32x2_t b10 = vget_low_f32(vreinterpretq_f32_m128(b));
+    return vreinterpretq_m128_f32(vcombine_f32(a01, b10));
+}
+
+FORCE_INLINE __m128 _mm_shuffle_ps_0101(__m128 a, __m128 b)
+{
+    float32x2_t a01 = vrev64_f32(vget_low_f32(vreinterpretq_f32_m128(a)));
+    float32x2_t b01 = vrev64_f32(vget_low_f32(vreinterpretq_f32_m128(b)));
+    return vreinterpretq_m128_f32(vcombine_f32(a01, b01));
+}
+
+// keeps the low 64 bits of b in the low and puts the high 64 bits of a in the
+// high
+FORCE_INLINE __m128 _mm_shuffle_ps_3210(__m128 a, __m128 b)
+{
+    float32x2_t a10 = vget_low_f32(vreinterpretq_f32_m128(a));
+    float32x2_t b32 = vget_high_f32(vreinterpretq_f32_m128(b));
+    return vreinterpretq_m128_f32(vcombine_f32(a10, b32));
+}
+
+FORCE_INLINE __m128 _mm_shuffle_ps_0011(__m128 a, __m128 b)
+{
+    float32x2_t a11 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(a)), 1);
+    float32x2_t b00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 0);
+    return vreinterpretq_m128_f32(vcombine_f32(a11, b00));
+}
+
+FORCE_INLINE __m128 _mm_shuffle_ps_0022(__m128 a, __m128 b)
+{
+    float32x2_t a22 =
+        vdup_lane_f32(vget_high_f32(vreinterpretq_f32_m128(a)), 0);
+    float32x2_t b00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 0);
+    return vreinterpretq_m128_f32(vcombine_f32(a22, b00));
+}
+
+FORCE_INLINE __m128 _mm_shuffle_ps_2200(__m128 a, __m128 b)
+{
+    float32x2_t a00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(a)), 0);
+    float32x2_t b22 =
+        vdup_lane_f32(vget_high_f32(vreinterpretq_f32_m128(b)), 0);
+    return vreinterpretq_m128_f32(vcombine_f32(a00, b22));
+}
+
+FORCE_INLINE __m128 _mm_shuffle_ps_3202(__m128 a, __m128 b)
+{
+    float32_t a0 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 0);
+    float32x2_t a22 =
+        vdup_lane_f32(vget_high_f32(vreinterpretq_f32_m128(a)), 0);
+    float32x2_t a02 = vset_lane_f32(a0, a22, 1); /* TODO: use vzip ?*/
+    float32x2_t b32 = vget_high_f32(vreinterpretq_f32_m128(b));
+    return vreinterpretq_m128_f32(vcombine_f32(a02, b32));
+}
+
+FORCE_INLINE __m128 _mm_shuffle_ps_1133(__m128 a, __m128 b)
+{
+    float32x2_t a33 =
+        vdup_lane_f32(vget_high_f32(vreinterpretq_f32_m128(a)), 1);
+    float32x2_t b11 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 1);
+    return vreinterpretq_m128_f32(vcombine_f32(a33, b11));
+}
+
+FORCE_INLINE __m128 _mm_shuffle_ps_2010(__m128 a, __m128 b)
+{
+    float32x2_t a10 = vget_low_f32(vreinterpretq_f32_m128(a));
+    float32_t b2 = vgetq_lane_f32(vreinterpretq_f32_m128(b), 2);
+    float32x2_t b00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 0);
+    float32x2_t b20 = vset_lane_f32(b2, b00, 1);
+    return vreinterpretq_m128_f32(vcombine_f32(a10, b20));
+}
+
+FORCE_INLINE __m128 _mm_shuffle_ps_2001(__m128 a, __m128 b)
+{
+    float32x2_t a01 = vrev64_f32(vget_low_f32(vreinterpretq_f32_m128(a)));
+    float32_t b2 = vgetq_lane_f32(b, 2);
+    float32x2_t b00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 0);
+    float32x2_t b20 = vset_lane_f32(b2, b00, 1);
+    return vreinterpretq_m128_f32(vcombine_f32(a01, b20));
+}
+
+FORCE_INLINE __m128 _mm_shuffle_ps_2032(__m128 a, __m128 b)
+{
+    float32x2_t a32 = vget_high_f32(vreinterpretq_f32_m128(a));
+    float32_t b2 = vgetq_lane_f32(b, 2);
+    float32x2_t b00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 0);
+    float32x2_t b20 = vset_lane_f32(b2, b00, 1);
+    return vreinterpretq_m128_f32(vcombine_f32(a32, b20));
+}
+
+// For MSVC, we check only if it is ARM64, as every single ARM64 processor
+// supported by WoA has crypto extensions. If this changes in the future,
+// this can be verified via the runtime-only method of:
+// IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)
+#if (defined(_M_ARM64) && !defined(__clang__)) || \
+    (defined(__ARM_FEATURE_CRYPTO) &&             \
+     (defined(__aarch64__) || __has_builtin(__builtin_arm_crypto_vmullp64)))
+// Wraps vmull_p64
+FORCE_INLINE uint64x2_t _sse2neon_vmull_p64(uint64x1_t _a, uint64x1_t _b)
+{
+    poly64_t a = vget_lane_p64(vreinterpret_p64_u64(_a), 0);
+    poly64_t b = vget_lane_p64(vreinterpret_p64_u64(_b), 0);
+    return vreinterpretq_u64_p128(vmull_p64(a, b));
+}
+#else  // ARMv7 polyfill
+// ARMv7/some A64 lacks vmull_p64, but it has vmull_p8.
+//
+// vmull_p8 calculates 8 8-bit->16-bit polynomial multiplies, but we need a
+// 64-bit->128-bit polynomial multiply.
+//
+// It needs some work and is somewhat slow, but it is still faster than all
+// known scalar methods.
+//
+// Algorithm adapted to C from
+// https://www.workofard.com/2017/07/ghash-for-low-end-cores/, which is adapted
+// from "Fast Software Polynomial Multiplication on ARM Processors Using the
+// NEON Engine" by Danilo Camara, Conrado Gouvea, Julio Lopez and Ricardo Dahab
+// (https://hal.inria.fr/hal-01506572)
+static uint64x2_t _sse2neon_vmull_p64(uint64x1_t _a, uint64x1_t _b)
+{
+    poly8x8_t a = vreinterpret_p8_u64(_a);
+    poly8x8_t b = vreinterpret_p8_u64(_b);
+
+    // Masks
+    uint8x16_t k48_32 = vcombine_u8(vcreate_u8(0x0000ffffffffffff),
+                                    vcreate_u8(0x00000000ffffffff));
+    uint8x16_t k16_00 = vcombine_u8(vcreate_u8(0x000000000000ffff),
+                                    vcreate_u8(0x0000000000000000));
+
+    // Do the multiplies, rotating with vext to get all combinations
+    uint8x16_t d = vreinterpretq_u8_p16(vmull_p8(a, b));  // D = A0 * B0
+    uint8x16_t e =
+        vreinterpretq_u8_p16(vmull_p8(a, vext_p8(b, b, 1)));  // E = A0 * B1
+    uint8x16_t f =
+        vreinterpretq_u8_p16(vmull_p8(vext_p8(a, a, 1), b));  // F = A1 * B0
+    uint8x16_t g =
+        vreinterpretq_u8_p16(vmull_p8(a, vext_p8(b, b, 2)));  // G = A0 * B2
+    uint8x16_t h =
+        vreinterpretq_u8_p16(vmull_p8(vext_p8(a, a, 2), b));  // H = A2 * B0
+    uint8x16_t i =
+        vreinterpretq_u8_p16(vmull_p8(a, vext_p8(b, b, 3)));  // I = A0 * B3
+    uint8x16_t j =
+        vreinterpretq_u8_p16(vmull_p8(vext_p8(a, a, 3), b));  // J = A3 * B0
+    uint8x16_t k =
+        vreinterpretq_u8_p16(vmull_p8(a, vext_p8(b, b, 4)));  // L = A0 * B4
+
+    // Add cross products
+    uint8x16_t l = veorq_u8(e, f);  // L = E + F
+    uint8x16_t m = veorq_u8(g, h);  // M = G + H
+    uint8x16_t n = veorq_u8(i, j);  // N = I + J
+
+    // Interleave. Using vzip1 and vzip2 prevents Clang from emitting TBL
+    // instructions.
+    uint8x16_t lm_p0 = vreinterpretq_u8_u64(
+        vzip1q_u64(vreinterpretq_u64_u8(l), vreinterpretq_u64_u8(m)));
+    uint8x16_t lm_p1 = vreinterpretq_u8_u64(
+        vzip2q_u64(vreinterpretq_u64_u8(l), vreinterpretq_u64_u8(m)));
+    uint8x16_t nk_p0 = vreinterpretq_u8_u64(
+        vzip1q_u64(vreinterpretq_u64_u8(n), vreinterpretq_u64_u8(k)));
+    uint8x16_t nk_p1 = vreinterpretq_u8_u64(
+        vzip2q_u64(vreinterpretq_u64_u8(n), vreinterpretq_u64_u8(k)));
+    // t0 = (L) (P0 + P1) << 8
+    // t1 = (M) (P2 + P3) << 16
+    uint8x16_t t0t1_tmp = veorq_u8(lm_p0, lm_p1);
+    uint8x16_t t0t1_h = vandq_u8(lm_p1, k48_32);
+    uint8x16_t t0t1_l = veorq_u8(t0t1_tmp, t0t1_h);
+
+    // t2 = (N) (P4 + P5) << 24
+    // t3 = (K) (P6 + P7) << 32
+    uint8x16_t t2t3_tmp = veorq_u8(nk_p0, nk_p1);
+    uint8x16_t t2t3_h = vandq_u8(nk_p1, k16_00);
+    uint8x16_t t2t3_l = veorq_u8(t2t3_tmp, t2t3_h);
+
+    // De-interleave
+    uint8x16_t t0 = vreinterpretq_u8_u64(
+        vuzp1q_u64(vreinterpretq_u64_u8(t0t1_l), vreinterpretq_u64_u8(t0t1_h)));
+    uint8x16_t t1 = vreinterpretq_u8_u64(
+        vuzp2q_u64(vreinterpretq_u64_u8(t0t1_l), vreinterpretq_u64_u8(t0t1_h)));
+    uint8x16_t t2 = vreinterpretq_u8_u64(
+        vuzp1q_u64(vreinterpretq_u64_u8(t2t3_l), vreinterpretq_u64_u8(t2t3_h)));
+    uint8x16_t t3 = vreinterpretq_u8_u64(
+        vuzp2q_u64(vreinterpretq_u64_u8(t2t3_l), vreinterpretq_u64_u8(t2t3_h)));
+    // Shift the cross products
+    uint8x16_t t0_shift = vextq_u8(t0, t0, 15);  // t0 << 8
+    uint8x16_t t1_shift = vextq_u8(t1, t1, 14);  // t1 << 16
+    uint8x16_t t2_shift = vextq_u8(t2, t2, 13);  // t2 << 24
+    uint8x16_t t3_shift = vextq_u8(t3, t3, 12);  // t3 << 32
+
+    // Accumulate the products
+    uint8x16_t cross1 = veorq_u8(t0_shift, t1_shift);
+    uint8x16_t cross2 = veorq_u8(t2_shift, t3_shift);
+    uint8x16_t mix = veorq_u8(d, cross1);
+    uint8x16_t r = veorq_u8(mix, cross2);
+    return vreinterpretq_u64_u8(r);
+}
+#endif  // ARMv7 polyfill
+
+// C equivalent:
+//   __m128i _mm_shuffle_epi32_default(__m128i a,
+//                                     __constrange(0, 255) int imm) {
+//       __m128i ret;
+//       ret[0] = a[imm        & 0x3];   ret[1] = a[(imm >> 2) & 0x3];
+//       ret[2] = a[(imm >> 4) & 0x03];  ret[3] = a[(imm >> 6) & 0x03];
+//       return ret;
+//   }
+#define _mm_shuffle_epi32_default(a, imm)                                   \
+    vreinterpretq_m128i_s32(vsetq_lane_s32(                                 \
+        vgetq_lane_s32(vreinterpretq_s32_m128i(a), ((imm) >> 6) & 0x3),     \
+        vsetq_lane_s32(                                                     \
+            vgetq_lane_s32(vreinterpretq_s32_m128i(a), ((imm) >> 4) & 0x3), \
+            vsetq_lane_s32(vgetq_lane_s32(vreinterpretq_s32_m128i(a),       \
+                                          ((imm) >> 2) & 0x3),              \
+                           vmovq_n_s32(vgetq_lane_s32(                      \
+                               vreinterpretq_s32_m128i(a), (imm) & (0x3))), \
+                           1),                                              \
+            2),                                                             \
+        3))
+
+// Takes the upper 64 bits of a and places it in the low end of the result
+// Takes the lower 64 bits of a and places it into the high end of the result.
+FORCE_INLINE __m128i _mm_shuffle_epi_1032(__m128i a)
+{
+    int32x2_t a32 = vget_high_s32(vreinterpretq_s32_m128i(a));
+    int32x2_t a10 = vget_low_s32(vreinterpretq_s32_m128i(a));
+    return vreinterpretq_m128i_s32(vcombine_s32(a32, a10));
+}
+
+// takes the lower two 32-bit values from a and swaps them and places in low end
+// of result takes the higher two 32 bit values from a and swaps them and places
+// in high end of result.
+FORCE_INLINE __m128i _mm_shuffle_epi_2301(__m128i a)
+{
+    int32x2_t a01 = vrev64_s32(vget_low_s32(vreinterpretq_s32_m128i(a)));
+    int32x2_t a23 = vrev64_s32(vget_high_s32(vreinterpretq_s32_m128i(a)));
+    return vreinterpretq_m128i_s32(vcombine_s32(a01, a23));
+}
+
+// rotates the least significant 32 bits into the most significant 32 bits, and
+// shifts the rest down
+FORCE_INLINE __m128i _mm_shuffle_epi_0321(__m128i a)
+{
+    return vreinterpretq_m128i_s32(
+        vextq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(a), 1));
+}
+
+// rotates the most significant 32 bits into the least significant 32 bits, and
+// shifts the rest up
+FORCE_INLINE __m128i _mm_shuffle_epi_2103(__m128i a)
+{
+    return vreinterpretq_m128i_s32(
+        vextq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(a), 3));
+}
+
+// gets the lower 64 bits of a, and places it in the upper 64 bits
+// gets the lower 64 bits of a and places it in the lower 64 bits
+FORCE_INLINE __m128i _mm_shuffle_epi_1010(__m128i a)
+{
+    int32x2_t a10 = vget_low_s32(vreinterpretq_s32_m128i(a));
+    return vreinterpretq_m128i_s32(vcombine_s32(a10, a10));
+}
+
+// gets the lower 64 bits of a, swaps the 0 and 1 elements, and places it in the
+// lower 64 bits gets the lower 64 bits of a, and places it in the upper 64 bits
+FORCE_INLINE __m128i _mm_shuffle_epi_1001(__m128i a)
+{
+    int32x2_t a01 = vrev64_s32(vget_low_s32(vreinterpretq_s32_m128i(a)));
+    int32x2_t a10 = vget_low_s32(vreinterpretq_s32_m128i(a));
+    return vreinterpretq_m128i_s32(vcombine_s32(a01, a10));
+}
+
+// gets the lower 64 bits of a, swaps the 0 and 1 elements and places it in the
+// upper 64 bits gets the lower 64 bits of a, swaps the 0 and 1 elements, and
+// places it in the lower 64 bits
+FORCE_INLINE __m128i _mm_shuffle_epi_0101(__m128i a)
+{
+    int32x2_t a01 = vrev64_s32(vget_low_s32(vreinterpretq_s32_m128i(a)));
+    return vreinterpretq_m128i_s32(vcombine_s32(a01, a01));
+}
+
+FORCE_INLINE __m128i _mm_shuffle_epi_2211(__m128i a)
+{
+    int32x2_t a11 = vdup_lane_s32(vget_low_s32(vreinterpretq_s32_m128i(a)), 1);
+    int32x2_t a22 = vdup_lane_s32(vget_high_s32(vreinterpretq_s32_m128i(a)), 0);
+    return vreinterpretq_m128i_s32(vcombine_s32(a11, a22));
+}
+
+FORCE_INLINE __m128i _mm_shuffle_epi_0122(__m128i a)
+{
+    int32x2_t a22 = vdup_lane_s32(vget_high_s32(vreinterpretq_s32_m128i(a)), 0);
+    int32x2_t a01 = vrev64_s32(vget_low_s32(vreinterpretq_s32_m128i(a)));
+    return vreinterpretq_m128i_s32(vcombine_s32(a22, a01));
+}
+
+FORCE_INLINE __m128i _mm_shuffle_epi_3332(__m128i a)
+{
+    int32x2_t a32 = vget_high_s32(vreinterpretq_s32_m128i(a));
+    int32x2_t a33 = vdup_lane_s32(vget_high_s32(vreinterpretq_s32_m128i(a)), 1);
+    return vreinterpretq_m128i_s32(vcombine_s32(a32, a33));
+}
+
+#define _mm_shuffle_epi32_splat(a, imm) \
+    vreinterpretq_m128i_s32(vdupq_laneq_s32(vreinterpretq_s32_m128i(a), (imm)))
+
+// NEON does not support a general purpose permute intrinsic.
+// Shuffle single-precision (32-bit) floating-point elements in a using the
+// control in imm8, and store the results in dst.
+//
+// C equivalent:
+//   __m128 _mm_shuffle_ps_default(__m128 a, __m128 b,
+//                                 __constrange(0, 255) int imm) {
+//       __m128 ret;
+//       ret[0] = a[imm        & 0x3];   ret[1] = a[(imm >> 2) & 0x3];
+//       ret[2] = b[(imm >> 4) & 0x03];  ret[3] = b[(imm >> 6) & 0x03];
+//       return ret;
+//   }
+//
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_ps
+#define _mm_shuffle_ps_default(a, b, imm)                                      \
+    vreinterpretq_m128_f32(vsetq_lane_f32(                                     \
+        vgetq_lane_f32(vreinterpretq_f32_m128(b), ((imm) >> 6) & 0x3),         \
+        vsetq_lane_f32(                                                        \
+            vgetq_lane_f32(vreinterpretq_f32_m128(b), ((imm) >> 4) & 0x3),     \
+            vsetq_lane_f32(                                                    \
+                vgetq_lane_f32(vreinterpretq_f32_m128(a), ((imm) >> 2) & 0x3), \
+                vmovq_n_f32(                                                   \
+                    vgetq_lane_f32(vreinterpretq_f32_m128(a), (imm) & (0x3))), \
+                1),                                                            \
+            2),                                                                \
+        3))
+
+// Shuffle 16-bit integers in the low 64 bits of a using the control in imm8.
+// Store the results in the low 64 bits of dst, with the high 64 bits being
+// copied from a to dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shufflelo_epi16
+#define _mm_shufflelo_epi16_function(a, imm)                                  \
+    _sse2neon_define1(                                                        \
+        __m128i, a, int16x8_t ret = vreinterpretq_s16_m128i(_a);              \
+        int16x4_t lowBits = vget_low_s16(ret);                                \
+        ret = vsetq_lane_s16(vget_lane_s16(lowBits, (imm) & (0x3)), ret, 0);  \
+        ret = vsetq_lane_s16(vget_lane_s16(lowBits, ((imm) >> 2) & 0x3), ret, \
+                             1);                                              \
+        ret = vsetq_lane_s16(vget_lane_s16(lowBits, ((imm) >> 4) & 0x3), ret, \
+                             2);                                              \
+        ret = vsetq_lane_s16(vget_lane_s16(lowBits, ((imm) >> 6) & 0x3), ret, \
+                             3);                                              \
+        _sse2neon_return(vreinterpretq_m128i_s16(ret));)
+
+// Shuffle 16-bit integers in the high 64 bits of a using the control in imm8.
+// Store the results in the high 64 bits of dst, with the low 64 bits being
+// copied from a to dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shufflehi_epi16
+#define _mm_shufflehi_epi16_function(a, imm)                                   \
+    _sse2neon_define1(                                                         \
+        __m128i, a, int16x8_t ret = vreinterpretq_s16_m128i(_a);               \
+        int16x4_t highBits = vget_high_s16(ret);                               \
+        ret = vsetq_lane_s16(vget_lane_s16(highBits, (imm) & (0x3)), ret, 4);  \
+        ret = vsetq_lane_s16(vget_lane_s16(highBits, ((imm) >> 2) & 0x3), ret, \
+                             5);                                               \
+        ret = vsetq_lane_s16(vget_lane_s16(highBits, ((imm) >> 4) & 0x3), ret, \
+                             6);                                               \
+        ret = vsetq_lane_s16(vget_lane_s16(highBits, ((imm) >> 6) & 0x3), ret, \
+                             7);                                               \
+        _sse2neon_return(vreinterpretq_m128i_s16(ret));)
+
+/* MMX */
+
+//_mm_empty is a no-op on arm
+FORCE_INLINE void _mm_empty(void) {}
+
+/* SSE */
+
+// Add packed single-precision (32-bit) floating-point elements in a and b, and
+// store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_ps
+FORCE_INLINE __m128 _mm_add_ps(__m128 a, __m128 b)
+{
+    return vreinterpretq_m128_f32(
+        vaddq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
+}
+
+// Add the lower single-precision (32-bit) floating-point element in a and b,
+// store the result in the lower element of dst, and copy the upper 3 packed
+// elements from a to the upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_ss
+FORCE_INLINE __m128 _mm_add_ss(__m128 a, __m128 b)
+{
+    float32_t b0 = vgetq_lane_f32(vreinterpretq_f32_m128(b), 0);
+    float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0);
+    // the upper values in the result must be the remnants of <a>.
+    return vreinterpretq_m128_f32(vaddq_f32(a, value));
+}
+
+// Compute the bitwise AND of packed single-precision (32-bit) floating-point
+// elements in a and b, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_and_ps
+FORCE_INLINE __m128 _mm_and_ps(__m128 a, __m128 b)
+{
+    return vreinterpretq_m128_s32(
+        vandq_s32(vreinterpretq_s32_m128(a), vreinterpretq_s32_m128(b)));
+}
+
+// Compute the bitwise NOT of packed single-precision (32-bit) floating-point
+// elements in a and then AND with b, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_andnot_ps
+FORCE_INLINE __m128 _mm_andnot_ps(__m128 a, __m128 b)
+{
+    return vreinterpretq_m128_s32(
+        vbicq_s32(vreinterpretq_s32_m128(b),
+                  vreinterpretq_s32_m128(a)));  // *NOTE* argument swap
+}
+
+// Average packed unsigned 16-bit integers in a and b, and store the results in
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_avg_pu16
+FORCE_INLINE __m64 _mm_avg_pu16(__m64 a, __m64 b)
+{
+    return vreinterpret_m64_u16(
+        vrhadd_u16(vreinterpret_u16_m64(a), vreinterpret_u16_m64(b)));
+}
+
+// Average packed unsigned 8-bit integers in a and b, and store the results in
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_avg_pu8
+FORCE_INLINE __m64 _mm_avg_pu8(__m64 a, __m64 b)
+{
+    return vreinterpret_m64_u8(
+        vrhadd_u8(vreinterpret_u8_m64(a), vreinterpret_u8_m64(b)));
+}
+
+// Compare packed single-precision (32-bit) floating-point elements in a and b
+// for equality, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_ps
+FORCE_INLINE __m128 _mm_cmpeq_ps(__m128 a, __m128 b)
+{
+    return vreinterpretq_m128_u32(
+        vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
+}
+
+// Compare the lower single-precision (32-bit) floating-point elements in a and
+// b for equality, store the result in the lower element of dst, and copy the
+// upper 3 packed elements from a to the upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_ss
+FORCE_INLINE __m128 _mm_cmpeq_ss(__m128 a, __m128 b)
+{
+    return _mm_move_ss(a, _mm_cmpeq_ps(a, b));
+}
+
+// Compare packed single-precision (32-bit) floating-point elements in a and b
+// for greater-than-or-equal, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_ps
+FORCE_INLINE __m128 _mm_cmpge_ps(__m128 a, __m128 b)
+{
+    return vreinterpretq_m128_u32(
+        vcgeq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
+}
+
+// Compare the lower single-precision (32-bit) floating-point elements in a and
+// b for greater-than-or-equal, store the result in the lower element of dst,
+// and copy the upper 3 packed elements from a to the upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_ss
+FORCE_INLINE __m128 _mm_cmpge_ss(__m128 a, __m128 b)
+{
+    return _mm_move_ss(a, _mm_cmpge_ps(a, b));
+}
+
+// Compare packed single-precision (32-bit) floating-point elements in a and b
+// for greater-than, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_ps
+FORCE_INLINE __m128 _mm_cmpgt_ps(__m128 a, __m128 b)
+{
+    return vreinterpretq_m128_u32(
+        vcgtq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
+}
+
+// Compare the lower single-precision (32-bit) floating-point elements in a and
+// b for greater-than, store the result in the lower element of dst, and copy
+// the upper 3 packed elements from a to the upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_ss
+FORCE_INLINE __m128 _mm_cmpgt_ss(__m128 a, __m128 b)
+{
+    return _mm_move_ss(a, _mm_cmpgt_ps(a, b));
+}
+
+// Compare packed single-precision (32-bit) floating-point elements in a and b
+// for less-than-or-equal, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_ps
+FORCE_INLINE __m128 _mm_cmple_ps(__m128 a, __m128 b)
+{
+    return vreinterpretq_m128_u32(
+        vcleq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
+}
+
+// Compare the lower single-precision (32-bit) floating-point elements in a and
+// b for less-than-or-equal, store the result in the lower element of dst, and
+// copy the upper 3 packed elements from a to the upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_ss
+FORCE_INLINE __m128 _mm_cmple_ss(__m128 a, __m128 b)
+{
+    return _mm_move_ss(a, _mm_cmple_ps(a, b));
+}
+
+// Compare packed single-precision (32-bit) floating-point elements in a and b
+// for less-than, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_ps
+FORCE_INLINE __m128 _mm_cmplt_ps(__m128 a, __m128 b)
+{
+    return vreinterpretq_m128_u32(
+        vcltq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
+}
+
+// Compare the lower single-precision (32-bit) floating-point elements in a and
+// b for less-than, store the result in the lower element of dst, and copy the
+// upper 3 packed elements from a to the upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_ss
+FORCE_INLINE __m128 _mm_cmplt_ss(__m128 a, __m128 b)
+{
+    return _mm_move_ss(a, _mm_cmplt_ps(a, b));
+}
+
+// Compare packed single-precision (32-bit) floating-point elements in a and b
+// for not-equal, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_ps
+FORCE_INLINE __m128 _mm_cmpneq_ps(__m128 a, __m128 b)
+{
+    return vreinterpretq_m128_u32(vmvnq_u32(
+        vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))));
+}
+
+// Compare the lower single-precision (32-bit) floating-point elements in a and
+// b for not-equal, store the result in the lower element of dst, and copy the
+// upper 3 packed elements from a to the upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_ss
+FORCE_INLINE __m128 _mm_cmpneq_ss(__m128 a, __m128 b)
+{
+    return _mm_move_ss(a, _mm_cmpneq_ps(a, b));
+}
+
+// Compare packed single-precision (32-bit) floating-point elements in a and b
+// for not-greater-than-or-equal, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnge_ps
+FORCE_INLINE __m128 _mm_cmpnge_ps(__m128 a, __m128 b)
+{
+    return vreinterpretq_m128_u32(vmvnq_u32(
+        vcgeq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))));
+}
+
+// Compare the lower single-precision (32-bit) floating-point elements in a and
+// b for not-greater-than-or-equal, store the result in the lower element of
+// dst, and copy the upper 3 packed elements from a to the upper elements of
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnge_ss
+FORCE_INLINE __m128 _mm_cmpnge_ss(__m128 a, __m128 b)
+{
+    return _mm_move_ss(a, _mm_cmpnge_ps(a, b));
+}
+
+// Compare packed single-precision (32-bit) floating-point elements in a and b
+// for not-greater-than, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpngt_ps
+FORCE_INLINE __m128 _mm_cmpngt_ps(__m128 a, __m128 b)
+{
+    return vreinterpretq_m128_u32(vmvnq_u32(
+        vcgtq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))));
+}
+
+// Compare the lower single-precision (32-bit) floating-point elements in a and
+// b for not-greater-than, store the result in the lower element of dst, and
+// copy the upper 3 packed elements from a to the upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpngt_ss
+FORCE_INLINE __m128 _mm_cmpngt_ss(__m128 a, __m128 b)
+{
+    return _mm_move_ss(a, _mm_cmpngt_ps(a, b));
+}
+
+// Compare packed single-precision (32-bit) floating-point elements in a and b
+// for not-less-than-or-equal, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnle_ps
+FORCE_INLINE __m128 _mm_cmpnle_ps(__m128 a, __m128 b)
+{
+    return vreinterpretq_m128_u32(vmvnq_u32(
+        vcleq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))));
+}
+
+// Compare the lower single-precision (32-bit) floating-point elements in a and
+// b for not-less-than-or-equal, store the result in the lower element of dst,
+// and copy the upper 3 packed elements from a to the upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnle_ss
+FORCE_INLINE __m128 _mm_cmpnle_ss(__m128 a, __m128 b)
+{
+    return _mm_move_ss(a, _mm_cmpnle_ps(a, b));
+}
+
+// Compare packed single-precision (32-bit) floating-point elements in a and b
+// for not-less-than, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnlt_ps
+FORCE_INLINE __m128 _mm_cmpnlt_ps(__m128 a, __m128 b)
+{
+    return vreinterpretq_m128_u32(vmvnq_u32(
+        vcltq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))));
+}
+
+// Compare the lower single-precision (32-bit) floating-point elements in a and
+// b for not-less-than, store the result in the lower element of dst, and copy
+// the upper 3 packed elements from a to the upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnlt_ss
+FORCE_INLINE __m128 _mm_cmpnlt_ss(__m128 a, __m128 b)
+{
+    return _mm_move_ss(a, _mm_cmpnlt_ps(a, b));
+}
+
+// Compare packed single-precision (32-bit) floating-point elements in a and b
+// to see if neither is NaN, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpord_ps
+//
+// See also:
+// http://stackoverflow.com/questions/8627331/what-does-ordered-unordered-comparison-mean
+// http://stackoverflow.com/questions/29349621/neon-isnanval-intrinsics
+FORCE_INLINE __m128 _mm_cmpord_ps(__m128 a, __m128 b)
+{
+    // Note: NEON does not have ordered compare builtin
+    // Need to compare a eq a and b eq b to check for NaN
+    // Do AND of results to get final
+    uint32x4_t ceqaa =
+        vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a));
+    uint32x4_t ceqbb =
+        vceqq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b));
+    return vreinterpretq_m128_u32(vandq_u32(ceqaa, ceqbb));
+}
+
+// Compare the lower single-precision (32-bit) floating-point elements in a and
+// b to see if neither is NaN, store the result in the lower element of dst, and
+// copy the upper 3 packed elements from a to the upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpord_ss
+FORCE_INLINE __m128 _mm_cmpord_ss(__m128 a, __m128 b)
+{
+    return _mm_move_ss(a, _mm_cmpord_ps(a, b));
+}
+
+// Compare packed single-precision (32-bit) floating-point elements in a and b
+// to see if either is NaN, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpunord_ps
+FORCE_INLINE __m128 _mm_cmpunord_ps(__m128 a, __m128 b)
+{
+    uint32x4_t f32a =
+        vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a));
+    uint32x4_t f32b =
+        vceqq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b));
+    return vreinterpretq_m128_u32(vmvnq_u32(vandq_u32(f32a, f32b)));
+}
+
+// Compare the lower single-precision (32-bit) floating-point elements in a and
+// b to see if either is NaN, store the result in the lower element of dst, and
+// copy the upper 3 packed elements from a to the upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpunord_ss
+FORCE_INLINE __m128 _mm_cmpunord_ss(__m128 a, __m128 b)
+{
+    return _mm_move_ss(a, _mm_cmpunord_ps(a, b));
+}
+
+// Compare the lower single-precision (32-bit) floating-point element in a and b
+// for equality, and return the boolean result (0 or 1).
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comieq_ss
+FORCE_INLINE int _mm_comieq_ss(__m128 a, __m128 b)
+{
+    uint32x4_t a_eq_b =
+        vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b));
+    return vgetq_lane_u32(a_eq_b, 0) & 0x1;
+}
+
+// Compare the lower single-precision (32-bit) floating-point element in a and b
+// for greater-than-or-equal, and return the boolean result (0 or 1).
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comige_ss
+FORCE_INLINE int _mm_comige_ss(__m128 a, __m128 b)
+{
+    uint32x4_t a_ge_b =
+        vcgeq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b));
+    return vgetq_lane_u32(a_ge_b, 0) & 0x1;
+}
+
+// Compare the lower single-precision (32-bit) floating-point element in a and b
+// for greater-than, and return the boolean result (0 or 1).
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comigt_ss
+FORCE_INLINE int _mm_comigt_ss(__m128 a, __m128 b)
+{
+    uint32x4_t a_gt_b =
+        vcgtq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b));
+    return vgetq_lane_u32(a_gt_b, 0) & 0x1;
+}
+
+// Compare the lower single-precision (32-bit) floating-point element in a and b
+// for less-than-or-equal, and return the boolean result (0 or 1).
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comile_ss
+FORCE_INLINE int _mm_comile_ss(__m128 a, __m128 b)
+{
+    uint32x4_t a_le_b =
+        vcleq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b));
+    return vgetq_lane_u32(a_le_b, 0) & 0x1;
+}
+
+// Compare the lower single-precision (32-bit) floating-point element in a and b
+// for less-than, and return the boolean result (0 or 1).
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comilt_ss
+FORCE_INLINE int _mm_comilt_ss(__m128 a, __m128 b)
+{
+    uint32x4_t a_lt_b =
+        vcltq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b));
+    return vgetq_lane_u32(a_lt_b, 0) & 0x1;
+}
+
+// Compare the lower single-precision (32-bit) floating-point element in a and b
+// for not-equal, and return the boolean result (0 or 1).
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comineq_ss
+FORCE_INLINE int _mm_comineq_ss(__m128 a, __m128 b)
+{
+    return !_mm_comieq_ss(a, b);
+}
+
+// Convert packed signed 32-bit integers in b to packed single-precision
+// (32-bit) floating-point elements, store the results in the lower 2 elements
+// of dst, and copy the upper 2 packed elements from a to the upper elements of
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_pi2ps
+FORCE_INLINE __m128 _mm_cvt_pi2ps(__m128 a, __m64 b)
+{
+    return vreinterpretq_m128_f32(
+        vcombine_f32(vcvt_f32_s32(vreinterpret_s32_m64(b)),
+                     vget_high_f32(vreinterpretq_f32_m128(a))));
+}
+
+// Convert packed single-precision (32-bit) floating-point elements in a to
+// packed 32-bit integers, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_ps2pi
+FORCE_INLINE __m64 _mm_cvt_ps2pi(__m128 a)
+{
+#if (defined(__aarch64__) || defined(_M_ARM64)) || \
+    defined(__ARM_FEATURE_DIRECTED_ROUNDING)
+    return vreinterpret_m64_s32(
+        vget_low_s32(vcvtnq_s32_f32(vrndiq_f32(vreinterpretq_f32_m128(a)))));
+#else
+    return vreinterpret_m64_s32(vcvt_s32_f32(vget_low_f32(
+        vreinterpretq_f32_m128(_mm_round_ps(a, _MM_FROUND_CUR_DIRECTION)))));
+#endif
+}
+
+// Convert the signed 32-bit integer b to a single-precision (32-bit)
+// floating-point element, store the result in the lower element of dst, and
+// copy the upper 3 packed elements from a to the upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_si2ss
+FORCE_INLINE __m128 _mm_cvt_si2ss(__m128 a, int b)
+{
+    return vreinterpretq_m128_f32(
+        vsetq_lane_f32((float) b, vreinterpretq_f32_m128(a), 0));
+}
+
+// Convert the lower single-precision (32-bit) floating-point element in a to a
+// 32-bit integer, and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_ss2si
+FORCE_INLINE int _mm_cvt_ss2si(__m128 a)
+{
+#if (defined(__aarch64__) || defined(_M_ARM64)) || \
+    defined(__ARM_FEATURE_DIRECTED_ROUNDING)
+    return vgetq_lane_s32(vcvtnq_s32_f32(vrndiq_f32(vreinterpretq_f32_m128(a))),
+                          0);
+#else
+    float32_t data = vgetq_lane_f32(
+        vreinterpretq_f32_m128(_mm_round_ps(a, _MM_FROUND_CUR_DIRECTION)), 0);
+    return (int32_t) data;
+#endif
+}
+
+// Convert packed 16-bit integers in a to packed single-precision (32-bit)
+// floating-point elements, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpi16_ps
+FORCE_INLINE __m128 _mm_cvtpi16_ps(__m64 a)
+{
+    return vreinterpretq_m128_f32(
+        vcvtq_f32_s32(vmovl_s16(vreinterpret_s16_m64(a))));
+}
+
+// Convert packed 32-bit integers in b to packed single-precision (32-bit)
+// floating-point elements, store the results in the lower 2 elements of dst,
+// and copy the upper 2 packed elements from a to the upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpi32_ps
+FORCE_INLINE __m128 _mm_cvtpi32_ps(__m128 a, __m64 b)
+{
+    return vreinterpretq_m128_f32(
+        vcombine_f32(vcvt_f32_s32(vreinterpret_s32_m64(b)),
+                     vget_high_f32(vreinterpretq_f32_m128(a))));
+}
+
+// Convert packed signed 32-bit integers in a to packed single-precision
+// (32-bit) floating-point elements, store the results in the lower 2 elements
+// of dst, then convert the packed signed 32-bit integers in b to
+// single-precision (32-bit) floating-point element, and store the results in
+// the upper 2 elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpi32x2_ps
+FORCE_INLINE __m128 _mm_cvtpi32x2_ps(__m64 a, __m64 b)
+{
+    return vreinterpretq_m128_f32(vcvtq_f32_s32(
+        vcombine_s32(vreinterpret_s32_m64(a), vreinterpret_s32_m64(b))));
+}
+
+// Convert the lower packed 8-bit integers in a to packed single-precision
+// (32-bit) floating-point elements, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpi8_ps
+FORCE_INLINE __m128 _mm_cvtpi8_ps(__m64 a)
+{
+    return vreinterpretq_m128_f32(vcvtq_f32_s32(
+        vmovl_s16(vget_low_s16(vmovl_s8(vreinterpret_s8_m64(a))))));
+}
+
+// Convert packed single-precision (32-bit) floating-point elements in a to
+// packed 16-bit integers, and store the results in dst. Note: this intrinsic
+// will generate 0x7FFF, rather than 0x8000, for input values between 0x7FFF and
+// 0x7FFFFFFF.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_pi16
+FORCE_INLINE __m64 _mm_cvtps_pi16(__m128 a)
+{
+    return vreinterpret_m64_s16(
+        vqmovn_s32(vreinterpretq_s32_m128i(_mm_cvtps_epi32(a))));
+}
+
+// Convert packed single-precision (32-bit) floating-point elements in a to
+// packed 32-bit integers, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_pi32
+#define _mm_cvtps_pi32(a) _mm_cvt_ps2pi(a)
+
+// Convert packed single-precision (32-bit) floating-point elements in a to
+// packed 8-bit integers, and store the results in lower 4 elements of dst.
+// Note: this intrinsic will generate 0x7F, rather than 0x80, for input values
+// between 0x7F and 0x7FFFFFFF.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_pi8
+FORCE_INLINE __m64 _mm_cvtps_pi8(__m128 a)
+{
+    return vreinterpret_m64_s8(vqmovn_s16(
+        vcombine_s16(vreinterpret_s16_m64(_mm_cvtps_pi16(a)), vdup_n_s16(0))));
+}
+
+// Convert packed unsigned 16-bit integers in a to packed single-precision
+// (32-bit) floating-point elements, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpu16_ps
+FORCE_INLINE __m128 _mm_cvtpu16_ps(__m64 a)
+{
+    return vreinterpretq_m128_f32(
+        vcvtq_f32_u32(vmovl_u16(vreinterpret_u16_m64(a))));
+}
+
+// Convert the lower packed unsigned 8-bit integers in a to packed
+// single-precision (32-bit) floating-point elements, and store the results in
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpu8_ps
+FORCE_INLINE __m128 _mm_cvtpu8_ps(__m64 a)
+{
+    return vreinterpretq_m128_f32(vcvtq_f32_u32(
+        vmovl_u16(vget_low_u16(vmovl_u8(vreinterpret_u8_m64(a))))));
+}
+
+// Convert the signed 32-bit integer b to a single-precision (32-bit)
+// floating-point element, store the result in the lower element of dst, and
+// copy the upper 3 packed elements from a to the upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi32_ss
+#define _mm_cvtsi32_ss(a, b) _mm_cvt_si2ss(a, b)
+
+// Convert the signed 64-bit integer b to a single-precision (32-bit)
+// floating-point element, store the result in the lower element of dst, and
+// copy the upper 3 packed elements from a to the upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi64_ss
+FORCE_INLINE __m128 _mm_cvtsi64_ss(__m128 a, int64_t b)
+{
+    return vreinterpretq_m128_f32(
+        vsetq_lane_f32((float) b, vreinterpretq_f32_m128(a), 0));
+}
+
+// Copy the lower single-precision (32-bit) floating-point element of a to dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtss_f32
+FORCE_INLINE float _mm_cvtss_f32(__m128 a)
+{
+    return vgetq_lane_f32(vreinterpretq_f32_m128(a), 0);
+}
+
+// Convert the lower single-precision (32-bit) floating-point element in a to a
+// 32-bit integer, and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtss_si32
+#define _mm_cvtss_si32(a) _mm_cvt_ss2si(a)
+
+// Convert the lower single-precision (32-bit) floating-point element in a to a
+// 64-bit integer, and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtss_si64
+FORCE_INLINE int64_t _mm_cvtss_si64(__m128 a)
+{
+#if (defined(__aarch64__) || defined(_M_ARM64)) || \
+    defined(__ARM_FEATURE_DIRECTED_ROUNDING)
+    return (int64_t) vgetq_lane_f32(vrndiq_f32(vreinterpretq_f32_m128(a)), 0);
+#else
+    float32_t data = vgetq_lane_f32(
+        vreinterpretq_f32_m128(_mm_round_ps(a, _MM_FROUND_CUR_DIRECTION)), 0);
+    return (int64_t) data;
+#endif
+}
+
+// Convert packed single-precision (32-bit) floating-point elements in a to
+// packed 32-bit integers with truncation, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_ps2pi
+FORCE_INLINE __m64 _mm_cvtt_ps2pi(__m128 a)
+{
+    return vreinterpret_m64_s32(
+        vget_low_s32(vcvtq_s32_f32(vreinterpretq_f32_m128(a))));
+}
+
+// Convert the lower single-precision (32-bit) floating-point element in a to a
+// 32-bit integer with truncation, and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_ss2si
+FORCE_INLINE int _mm_cvtt_ss2si(__m128 a)
+{
+    return vgetq_lane_s32(vcvtq_s32_f32(vreinterpretq_f32_m128(a)), 0);
+}
+
+// Convert packed single-precision (32-bit) floating-point elements in a to
+// packed 32-bit integers with truncation, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_pi32
+#define _mm_cvttps_pi32(a) _mm_cvtt_ps2pi(a)
+
+// Convert the lower single-precision (32-bit) floating-point element in a to a
+// 32-bit integer with truncation, and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_si32
+#define _mm_cvttss_si32(a) _mm_cvtt_ss2si(a)
+
+// Convert the lower single-precision (32-bit) floating-point element in a to a
+// 64-bit integer with truncation, and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_si64
+FORCE_INLINE int64_t _mm_cvttss_si64(__m128 a)
+{
+    return (int64_t) vgetq_lane_f32(vreinterpretq_f32_m128(a), 0);
+}
+
+// Divide packed single-precision (32-bit) floating-point elements in a by
+// packed elements in b, and store the results in dst.
+// Due to ARMv7-A NEON's lack of a precise division intrinsic, we implement
+// division by multiplying a by b's reciprocal before using the Newton-Raphson
+// method to approximate the results.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_div_ps
+FORCE_INLINE __m128 _mm_div_ps(__m128 a, __m128 b)
+{
+    return vreinterpretq_m128_f32(
+        vdivq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
+}
+
+// Divide the lower single-precision (32-bit) floating-point element in a by the
+// lower single-precision (32-bit) floating-point element in b, store the result
+// in the lower element of dst, and copy the upper 3 packed elements from a to
+// the upper elements of dst.
+// Warning: ARMv7-A does not produce the same result compared to Intel and not
+// IEEE-compliant.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_div_ss
+FORCE_INLINE __m128 _mm_div_ss(__m128 a, __m128 b)
+{
+    float32_t value =
+        vgetq_lane_f32(vreinterpretq_f32_m128(_mm_div_ps(a, b)), 0);
+    return vreinterpretq_m128_f32(
+        vsetq_lane_f32(value, vreinterpretq_f32_m128(a), 0));
+}
+
+// Extract a 16-bit integer from a, selected with imm8, and store the result in
+// the lower element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_pi16
+#define _mm_extract_pi16(a, imm) \
+    (int32_t) vget_lane_u16(vreinterpret_u16_m64(a), (imm))
+
+// Free aligned memory that was allocated with _mm_malloc.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_free
+//#if !defined(SSE2NEON_ALLOC_DEFINED)
+//FORCE_INLINE void _mm_free(void *addr)
+//{
+//    free(addr);
+//}
+//#endif
+
+FORCE_INLINE uint64_t _sse2neon_get_fpcr(void)
+{
+    uint64_t value;
+    __asm__ __volatile__("mrs %0, FPCR" : "=r"(value)); /* read */
+    return value;
+}
+
+FORCE_INLINE void _sse2neon_set_fpcr(uint64_t value)
+{
+    __asm__ __volatile__("msr FPCR, %0" ::"r"(value));  /* write */
+}
+
+// Macro: Get the flush zero bits from the MXCSR control and status register.
+// The flush zero may contain any of the following flags: _MM_FLUSH_ZERO_ON or
+// _MM_FLUSH_ZERO_OFF
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_MM_GET_FLUSH_ZERO_MODE
+FORCE_INLINE unsigned int _sse2neon_mm_get_flush_zero_mode(void)
+{
+    union {
+        fpcr_bitfield field;
+        uint64_t value;
+    } r;
+
+    r.value = _sse2neon_get_fpcr();
+
+    return r.field.bit24 ? _MM_FLUSH_ZERO_ON : _MM_FLUSH_ZERO_OFF;
+}
+
+// Macro: Get the rounding mode bits from the MXCSR control and status register.
+// The rounding mode may contain any of the following flags: _MM_ROUND_NEAREST,
+// _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_MM_GET_ROUNDING_MODE
+FORCE_INLINE unsigned int _MM_GET_ROUNDING_MODE(void)
+{
+    union {
+        fpcr_bitfield field;
+        uint64_t value;
+    } r;
+
+    r.value = _sse2neon_get_fpcr();
+
+    if (r.field.bit22) {
+        return r.field.bit23 ? _MM_ROUND_TOWARD_ZERO : _MM_ROUND_UP;
+    } else {
+        return r.field.bit23 ? _MM_ROUND_DOWN : _MM_ROUND_NEAREST;
+    }
+}
+
+// Copy a to dst, and insert the 16-bit integer i into dst at the location
+// specified by imm8.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_pi16
+#define _mm_insert_pi16(a, b, imm) \
+    vreinterpret_m64_s16(vset_lane_s16((b), vreinterpret_s16_m64(a), (imm)))
+
+// Load 128-bits (composed of 4 packed single-precision (32-bit) floating-point
+// elements) from memory into dst. mem_addr must be aligned on a 16-byte
+// boundary or a general-protection exception may be generated.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_ps
+FORCE_INLINE __m128 _mm_load_ps(const float *p)
+{
+    return vreinterpretq_m128_f32(vld1q_f32(p));
+}
+
+// Load a single-precision (32-bit) floating-point element from memory into all
+// elements of dst.
+//
+//   dst[31:0] := MEM[mem_addr+31:mem_addr]
+//   dst[63:32] := MEM[mem_addr+31:mem_addr]
+//   dst[95:64] := MEM[mem_addr+31:mem_addr]
+//   dst[127:96] := MEM[mem_addr+31:mem_addr]
+//
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_ps1
+#define _mm_load_ps1 _mm_load1_ps
+
+// Load a single-precision (32-bit) floating-point element from memory into the
+// lower of dst, and zero the upper 3 elements. mem_addr does not need to be
+// aligned on any particular boundary.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_ss
+FORCE_INLINE __m128 _mm_load_ss(const float *p)
+{
+    return vreinterpretq_m128_f32(vsetq_lane_f32(*p, vdupq_n_f32(0), 0));
+}
+
+// Load a single-precision (32-bit) floating-point element from memory into all
+// elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load1_ps
+FORCE_INLINE __m128 _mm_load1_ps(const float *p)
+{
+    return vreinterpretq_m128_f32(vld1q_dup_f32(p));
+}
+
+// Load 2 single-precision (32-bit) floating-point elements from memory into the
+// upper 2 elements of dst, and copy the lower 2 elements from a to dst.
+// mem_addr does not need to be aligned on any particular boundary.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadh_pi
+FORCE_INLINE __m128 _mm_loadh_pi(__m128 a, __m64 const *p)
+{
+    return vreinterpretq_m128_f32(
+        vcombine_f32(vget_low_f32(a), vld1_f32((const float32_t *) p)));
+}
+
+// Load 2 single-precision (32-bit) floating-point elements from memory into the
+// lower 2 elements of dst, and copy the upper 2 elements from a to dst.
+// mem_addr does not need to be aligned on any particular boundary.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadl_pi
+FORCE_INLINE __m128 _mm_loadl_pi(__m128 a, __m64 const *p)
+{
+    return vreinterpretq_m128_f32(
+        vcombine_f32(vld1_f32((const float32_t *) p), vget_high_f32(a)));
+}
+
+// Load 4 single-precision (32-bit) floating-point elements from memory into dst
+// in reverse order. mem_addr must be aligned on a 16-byte boundary or a
+// general-protection exception may be generated.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadr_ps
+FORCE_INLINE __m128 _mm_loadr_ps(const float *p)
+{
+    float32x4_t v = vrev64q_f32(vld1q_f32(p));
+    return vreinterpretq_m128_f32(vextq_f32(v, v, 2));
+}
+
+// Load 128-bits (composed of 4 packed single-precision (32-bit) floating-point
+// elements) from memory into dst. mem_addr does not need to be aligned on any
+// particular boundary.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_ps
+FORCE_INLINE __m128 _mm_loadu_ps(const float *p)
+{
+    // for neon, alignment doesn't matter, so _mm_load_ps and _mm_loadu_ps are
+    // equivalent for neon
+    return vreinterpretq_m128_f32(vld1q_f32(p));
+}
+
+// Load unaligned 16-bit integer from memory into the first element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si16
+FORCE_INLINE __m128i _mm_loadu_si16(const void *p)
+{
+    return vreinterpretq_m128i_s16(
+        vsetq_lane_s16(*(const unaligned_int16_t *) p, vdupq_n_s16(0), 0));
+}
+
+// Load unaligned 64-bit integer from memory into the first element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si64
+FORCE_INLINE __m128i _mm_loadu_si64(const void *p)
+{
+    return vreinterpretq_m128i_s64(
+        vsetq_lane_s64(*(const unaligned_int64_t *) p, vdupq_n_s64(0), 0));
+}
+
+// Allocate size bytes of memory, aligned to the alignment specified in align,
+// and return a pointer to the allocated memory. _mm_free should be used to free
+// memory that is allocated with _mm_malloc.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_malloc
+//#if !defined(SSE2NEON_ALLOC_DEFINED)
+//FORCE_INLINE void *_mm_malloc(size_t size, size_t align)
+//{
+//    void *ptr;
+//    if (align == 1)
+//        return malloc(size);
+//    if (align == 2 || (sizeof(void *) == 8 && align == 4))
+//        align = sizeof(void *);
+//    if (!posix_memalign(&ptr, align, size))
+//        return ptr;
+//    return NULL;
+//}
+//#endif
+
+// Conditionally store 8-bit integer elements from a into memory using mask
+// (elements are not stored when the highest bit is not set in the corresponding
+// element) and a non-temporal memory hint.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskmove_si64
+FORCE_INLINE void _mm_maskmove_si64(__m64 a, __m64 mask, char *mem_addr)
+{
+    int8x8_t shr_mask = vshr_n_s8(vreinterpret_s8_m64(mask), 7);
+    __m128 b = _mm_load_ps((const float *) mem_addr);
+    int8x8_t masked =
+        vbsl_s8(vreinterpret_u8_s8(shr_mask), vreinterpret_s8_m64(a),
+                vreinterpret_s8_u64(vget_low_u64(vreinterpretq_u64_m128(b))));
+    vst1_s8((int8_t *) mem_addr, masked);
+}
+
+// Conditionally store 8-bit integer elements from a into memory using mask
+// (elements are not stored when the highest bit is not set in the corresponding
+// element) and a non-temporal memory hint.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_m_maskmovq
+#define _m_maskmovq(a, mask, mem_addr) _mm_maskmove_si64(a, mask, mem_addr)
+
+// Compare packed signed 16-bit integers in a and b, and store packed maximum
+// values in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_pi16
+FORCE_INLINE __m64 _mm_max_pi16(__m64 a, __m64 b)
+{
+    return vreinterpret_m64_s16(
+        vmax_s16(vreinterpret_s16_m64(a), vreinterpret_s16_m64(b)));
+}
+
+// Compare packed single-precision (32-bit) floating-point elements in a and b,
+// and store packed maximum values in dst. dst does not follow the IEEE Standard
+// for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or
+// signed-zero values.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_ps
+FORCE_INLINE __m128 _mm_max_ps(__m128 a, __m128 b)
+{
+#if SSE2NEON_PRECISE_MINMAX
+    float32x4_t _a = vreinterpretq_f32_m128(a);
+    float32x4_t _b = vreinterpretq_f32_m128(b);
+    return vreinterpretq_m128_f32(vbslq_f32(vcgtq_f32(_a, _b), _a, _b));
+#else
+    return vreinterpretq_m128_f32(
+        vmaxq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
+#endif
+}
+
+// Compare packed unsigned 8-bit integers in a and b, and store packed maximum
+// values in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_pu8
+FORCE_INLINE __m64 _mm_max_pu8(__m64 a, __m64 b)
+{
+    return vreinterpret_m64_u8(
+        vmax_u8(vreinterpret_u8_m64(a), vreinterpret_u8_m64(b)));
+}
+
+// Compare the lower single-precision (32-bit) floating-point elements in a and
+// b, store the maximum value in the lower element of dst, and copy the upper 3
+// packed elements from a to the upper element of dst. dst does not follow the
+// IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when
+// inputs are NaN or signed-zero values.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_ss
+FORCE_INLINE __m128 _mm_max_ss(__m128 a, __m128 b)
+{
+    float32_t value = vgetq_lane_f32(_mm_max_ps(a, b), 0);
+    return vreinterpretq_m128_f32(
+        vsetq_lane_f32(value, vreinterpretq_f32_m128(a), 0));
+}
+
+// Compare packed signed 16-bit integers in a and b, and store packed minimum
+// values in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_pi16
+FORCE_INLINE __m64 _mm_min_pi16(__m64 a, __m64 b)
+{
+    return vreinterpret_m64_s16(
+        vmin_s16(vreinterpret_s16_m64(a), vreinterpret_s16_m64(b)));
+}
+
+// Compare packed single-precision (32-bit) floating-point elements in a and b,
+// and store packed minimum values in dst. dst does not follow the IEEE Standard
+// for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or
+// signed-zero values.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_ps
+FORCE_INLINE __m128 _mm_min_ps(__m128 a, __m128 b)
+{
+#if SSE2NEON_PRECISE_MINMAX
+    float32x4_t _a = vreinterpretq_f32_m128(a);
+    float32x4_t _b = vreinterpretq_f32_m128(b);
+    return vreinterpretq_m128_f32(vbslq_f32(vcltq_f32(_a, _b), _a, _b));
+#else
+    return vreinterpretq_m128_f32(
+        vminq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
+#endif
+}
+
+// Compare packed unsigned 8-bit integers in a and b, and store packed minimum
+// values in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_pu8
+FORCE_INLINE __m64 _mm_min_pu8(__m64 a, __m64 b)
+{
+    return vreinterpret_m64_u8(
+        vmin_u8(vreinterpret_u8_m64(a), vreinterpret_u8_m64(b)));
+}
+
+// Compare the lower single-precision (32-bit) floating-point elements in a and
+// b, store the minimum value in the lower element of dst, and copy the upper 3
+// packed elements from a to the upper element of dst. dst does not follow the
+// IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when
+// inputs are NaN or signed-zero values.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_ss
+FORCE_INLINE __m128 _mm_min_ss(__m128 a, __m128 b)
+{
+    float32_t value = vgetq_lane_f32(_mm_min_ps(a, b), 0);
+    return vreinterpretq_m128_f32(
+        vsetq_lane_f32(value, vreinterpretq_f32_m128(a), 0));
+}
+
+// Move the lower single-precision (32-bit) floating-point element from b to the
+// lower element of dst, and copy the upper 3 packed elements from a to the
+// upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_move_ss
+FORCE_INLINE __m128 _mm_move_ss(__m128 a, __m128 b)
+{
+    return vreinterpretq_m128_f32(
+        vsetq_lane_f32(vgetq_lane_f32(vreinterpretq_f32_m128(b), 0),
+                       vreinterpretq_f32_m128(a), 0));
+}
+
+// Move the upper 2 single-precision (32-bit) floating-point elements from b to
+// the lower 2 elements of dst, and copy the upper 2 elements from a to the
+// upper 2 elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movehl_ps
+FORCE_INLINE __m128 _mm_movehl_ps(__m128 a, __m128 b)
+{
+#if defined(aarch64__)
+    return vreinterpretq_m128_u64(
+        vzip2q_u64(vreinterpretq_u64_m128(b), vreinterpretq_u64_m128(a)));
+#else
+    float32x2_t a32 = vget_high_f32(vreinterpretq_f32_m128(a));
+    float32x2_t b32 = vget_high_f32(vreinterpretq_f32_m128(b));
+    return vreinterpretq_m128_f32(vcombine_f32(b32, a32));
+#endif
+}
+
+// Move the lower 2 single-precision (32-bit) floating-point elements from b to
+// the upper 2 elements of dst, and copy the lower 2 elements from a to the
+// lower 2 elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movelh_ps
+FORCE_INLINE __m128 _mm_movelh_ps(__m128 __A, __m128 __B)
+{
+    float32x2_t a10 = vget_low_f32(vreinterpretq_f32_m128(__A));
+    float32x2_t b10 = vget_low_f32(vreinterpretq_f32_m128(__B));
+    return vreinterpretq_m128_f32(vcombine_f32(a10, b10));
+}
+
+// Create mask from the most significant bit of each 8-bit element in a, and
+// store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movemask_pi8
+FORCE_INLINE int _mm_movemask_pi8(__m64 a)
+{
+    uint8x8_t input = vreinterpret_u8_m64(a);
+    static const int8_t shift[8] = {0, 1, 2, 3, 4, 5, 6, 7};
+    uint8x8_t tmp = vshr_n_u8(input, 7);
+    return vaddv_u8(vshl_u8(tmp, vld1_s8(shift)));
+}
+
+// Set each bit of mask dst based on the most significant bit of the
+// corresponding packed single-precision (32-bit) floating-point element in a.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movemask_ps
+FORCE_INLINE int _mm_movemask_ps(__m128 a)
+{
+    uint32x4_t input = vreinterpretq_u32_m128(a);
+    static const int32_t shift[4] = {0, 1, 2, 3};
+    uint32x4_t tmp = vshrq_n_u32(input, 31);
+    return vaddvq_u32(vshlq_u32(tmp, vld1q_s32(shift)));
+}
+
+// Multiply packed single-precision (32-bit) floating-point elements in a and b,
+// and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_ps
+FORCE_INLINE __m128 _mm_mul_ps(__m128 a, __m128 b)
+{
+    return vreinterpretq_m128_f32(
+        vmulq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
+}
+
+// Multiply the lower single-precision (32-bit) floating-point element in a and
+// b, store the result in the lower element of dst, and copy the upper 3 packed
+// elements from a to the upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_ss
+FORCE_INLINE __m128 _mm_mul_ss(__m128 a, __m128 b)
+{
+    return _mm_move_ss(a, _mm_mul_ps(a, b));
+}
+
+// Multiply the packed unsigned 16-bit integers in a and b, producing
+// intermediate 32-bit integers, and store the high 16 bits of the intermediate
+// integers in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhi_pu16
+FORCE_INLINE __m64 _mm_mulhi_pu16(__m64 a, __m64 b)
+{
+    return vreinterpret_m64_u16(vshrn_n_u32(
+        vmull_u16(vreinterpret_u16_m64(a), vreinterpret_u16_m64(b)), 16));
+}
+
+// Compute the bitwise OR of packed single-precision (32-bit) floating-point
+// elements in a and b, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_ps
+FORCE_INLINE __m128 _mm_or_ps(__m128 a, __m128 b)
+{
+    return vreinterpretq_m128_s32(
+        vorrq_s32(vreinterpretq_s32_m128(a), vreinterpretq_s32_m128(b)));
+}
+
+// Average packed unsigned 8-bit integers in a and b, and store the results in
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_m_pavgb
+#define _m_pavgb(a, b) _mm_avg_pu8(a, b)
+
+// Average packed unsigned 16-bit integers in a and b, and store the results in
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_m_pavgw
+#define _m_pavgw(a, b) _mm_avg_pu16(a, b)
+
+// Extract a 16-bit integer from a, selected with imm8, and store the result in
+// the lower element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_m_pextrw
+#define _m_pextrw(a, imm) _mm_extract_pi16(a, imm)
+
+// Copy a to dst, and insert the 16-bit integer i into dst at the location
+// specified by imm8.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=m_pinsrw
+#define _m_pinsrw(a, i, imm) _mm_insert_pi16(a, i, imm)
+
+// Compare packed signed 16-bit integers in a and b, and store packed maximum
+// values in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_m_pmaxsw
+#define _m_pmaxsw(a, b) _mm_max_pi16(a, b)
+
+// Compare packed unsigned 8-bit integers in a and b, and store packed maximum
+// values in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_m_pmaxub
+#define _m_pmaxub(a, b) _mm_max_pu8(a, b)
+
+// Compare packed signed 16-bit integers in a and b, and store packed minimum
+// values in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_m_pminsw
+#define _m_pminsw(a, b) _mm_min_pi16(a, b)
+
+// Compare packed unsigned 8-bit integers in a and b, and store packed minimum
+// values in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_m_pminub
+#define _m_pminub(a, b) _mm_min_pu8(a, b)
+
+// Create mask from the most significant bit of each 8-bit element in a, and
+// store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_m_pmovmskb
+#define _m_pmovmskb(a) _mm_movemask_pi8(a)
+
+// Multiply the packed unsigned 16-bit integers in a and b, producing
+// intermediate 32-bit integers, and store the high 16 bits of the intermediate
+// integers in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_m_pmulhuw
+#define _m_pmulhuw(a, b) _mm_mulhi_pu16(a, b)
+
+// Fetch the line of data from memory that contains address p to a location in
+// the cache hierarchy specified by the locality hint i.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_prefetch
+FORCE_INLINE void _mm_prefetch(char const *p, int i)
+{
+    (void) i;
+    switch (i) {
+    case _MM_HINT_NTA:
+        __builtin_prefetch(p, 0, 0);
+        break;
+    case _MM_HINT_T0:
+        __builtin_prefetch(p, 0, 3);
+        break;
+    case _MM_HINT_T1:
+        __builtin_prefetch(p, 0, 2);
+        break;
+    case _MM_HINT_T2:
+        __builtin_prefetch(p, 0, 1);
+        break;
+    }
+}
+
+// Compute the absolute differences of packed unsigned 8-bit integers in a and
+// b, then horizontally sum each consecutive 8 differences to produce four
+// unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low
+// 16 bits of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=m_psadbw
+#define _m_psadbw(a, b) _mm_sad_pu8(a, b)
+
+// Shuffle 16-bit integers in a using the control in imm8, and store the results
+// in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_m_pshufw
+#define _m_pshufw(a, imm) _mm_shuffle_pi16(a, imm)
+
+// Compute the approximate reciprocal of packed single-precision (32-bit)
+// floating-point elements in a, and store the results in dst. The maximum
+// relative error for this approximation is less than 1.5*2^-12.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp_ps
+FORCE_INLINE __m128 _mm_rcp_ps(__m128 in)
+{
+    float32x4_t recip = vrecpeq_f32(vreinterpretq_f32_m128(in));
+    recip = vmulq_f32(recip, vrecpsq_f32(recip, vreinterpretq_f32_m128(in)));
+#if SSE2NEON_PRECISE_DIV
+    // Additional Netwon-Raphson iteration for accuracy
+    recip = vmulq_f32(recip, vrecpsq_f32(recip, vreinterpretq_f32_m128(in)));
+#endif
+    return vreinterpretq_m128_f32(recip);
+}
+
+// Compute the approximate reciprocal of the lower single-precision (32-bit)
+// floating-point element in a, store the result in the lower element of dst,
+// and copy the upper 3 packed elements from a to the upper elements of dst. The
+// maximum relative error for this approximation is less than 1.5*2^-12.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp_ss
+FORCE_INLINE __m128 _mm_rcp_ss(__m128 a)
+{
+    return _mm_move_ss(a, _mm_rcp_ps(a));
+}
+
+// Compute the approximate reciprocal square root of packed single-precision
+// (32-bit) floating-point elements in a, and store the results in dst. The
+// maximum relative error for this approximation is less than 1.5*2^-12.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt_ps
+FORCE_INLINE __m128 _mm_rsqrt_ps(__m128 in)
+{
+    float32x4_t out = vrsqrteq_f32(vreinterpretq_f32_m128(in));
+
+    // Generate masks for detecting whether input has any 0.0f/-0.0f
+    // (which becomes positive/negative infinity by IEEE-754 arithmetic rules).
+    const uint32x4_t pos_inf = vdupq_n_u32(0x7F800000);
+    const uint32x4_t neg_inf = vdupq_n_u32(0xFF800000);
+    const uint32x4_t has_pos_zero =
+        vceqq_u32(pos_inf, vreinterpretq_u32_f32(out));
+    const uint32x4_t has_neg_zero =
+        vceqq_u32(neg_inf, vreinterpretq_u32_f32(out));
+
+    out = vmulq_f32(
+        out, vrsqrtsq_f32(vmulq_f32(vreinterpretq_f32_m128(in), out), out));
+#if SSE2NEON_PRECISE_SQRT
+    // Additional Netwon-Raphson iteration for accuracy
+    out = vmulq_f32(
+        out, vrsqrtsq_f32(vmulq_f32(vreinterpretq_f32_m128(in), out), out));
+#endif
+
+    // Set output vector element to infinity/negative-infinity if
+    // the corresponding input vector element is 0.0f/-0.0f.
+    out = vbslq_f32(has_pos_zero, (float32x4_t) pos_inf, out);
+    out = vbslq_f32(has_neg_zero, (float32x4_t) neg_inf, out);
+
+    return vreinterpretq_m128_f32(out);
+}
+
+// Compute the approximate reciprocal square root of the lower single-precision
+// (32-bit) floating-point element in a, store the result in the lower element
+// of dst, and copy the upper 3 packed elements from a to the upper elements of
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt_ss
+FORCE_INLINE __m128 _mm_rsqrt_ss(__m128 in)
+{
+    return vsetq_lane_f32(vgetq_lane_f32(_mm_rsqrt_ps(in), 0), in, 0);
+}
+
+// Compute the absolute differences of packed unsigned 8-bit integers in a and
+// b, then horizontally sum each consecutive 8 differences to produce four
+// unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low
+// 16 bits of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sad_pu8
+FORCE_INLINE __m64 _mm_sad_pu8(__m64 a, __m64 b)
+{
+    uint64x1_t t = vpaddl_u32(vpaddl_u16(
+        vpaddl_u8(vabd_u8(vreinterpret_u8_m64(a), vreinterpret_u8_m64(b)))));
+    return vreinterpret_m64_u16(
+        vset_lane_u16((int) vget_lane_u64(t, 0), vdup_n_u16(0), 0));
+}
+
+// Macro: Set the flush zero bits of the MXCSR control and status register to
+// the value in unsigned 32-bit integer a. The flush zero may contain any of the
+// following flags: _MM_FLUSH_ZERO_ON or _MM_FLUSH_ZERO_OFF
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_MM_SET_FLUSH_ZERO_MODE
+FORCE_INLINE void _sse2neon_mm_set_flush_zero_mode(unsigned int flag)
+{
+    // AArch32 Advanced SIMD arithmetic always uses the Flush-to-zero setting,
+    // regardless of the value of the FZ bit.
+    union {
+        fpcr_bitfield field;
+        uint64_t value;
+    } r;
+
+    r.value = _sse2neon_get_fpcr();
+
+    r.field.bit24 = (flag & _MM_FLUSH_ZERO_MASK) == _MM_FLUSH_ZERO_ON;
+
+    _sse2neon_set_fpcr(r.value);
+}
+
+// Set packed single-precision (32-bit) floating-point elements in dst with the
+// supplied values.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_ps
+FORCE_INLINE __m128 _mm_set_ps(float w, float z, float y, float x)
+{
+    float ALIGN_STRUCT(16) data[4] = {x, y, z, w};
+    return vreinterpretq_m128_f32(vld1q_f32(data));
+}
+
+// Broadcast single-precision (32-bit) floating-point value a to all elements of
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_ps1
+FORCE_INLINE __m128 _mm_set_ps1(float _w)
+{
+    return vreinterpretq_m128_f32(vdupq_n_f32(_w));
+}
+
+// Macro: Set the rounding mode bits of the MXCSR control and status register to
+// the value in unsigned 32-bit integer a. The rounding mode may contain any of
+// the following flags: _MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP,
+// _MM_ROUND_TOWARD_ZERO
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_MM_SET_ROUNDING_MODE
+FORCE_INLINE_OPTNONE void _MM_SET_ROUNDING_MODE(int rounding)
+{
+    union {
+        fpcr_bitfield field;
+        uint64_t value;
+    } r;
+
+    r.value = _sse2neon_get_fpcr();
+
+    switch (rounding) {
+    case _MM_ROUND_TOWARD_ZERO:
+        r.field.bit22 = 1;
+        r.field.bit23 = 1;
+        break;
+    case _MM_ROUND_DOWN:
+        r.field.bit22 = 0;
+        r.field.bit23 = 1;
+        break;
+    case _MM_ROUND_UP:
+        r.field.bit22 = 1;
+        r.field.bit23 = 0;
+        break;
+    default:  //_MM_ROUND_NEAREST
+        r.field.bit22 = 0;
+        r.field.bit23 = 0;
+    }
+
+    _sse2neon_set_fpcr(r.value);
+}
+
+// Copy single-precision (32-bit) floating-point element a to the lower element
+// of dst, and zero the upper 3 elements.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_ss
+FORCE_INLINE __m128 _mm_set_ss(float a)
+{
+    return vreinterpretq_m128_f32(vsetq_lane_f32(a, vdupq_n_f32(0), 0));
+}
+
+// Broadcast single-precision (32-bit) floating-point value a to all elements of
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_ps
+FORCE_INLINE __m128 _mm_set1_ps(float _w)
+{
+    return vreinterpretq_m128_f32(vdupq_n_f32(_w));
+}
+
+// Set the MXCSR control and status register with the value in unsigned 32-bit
+// integer a.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setcsr
+// FIXME: _mm_setcsr() implementation supports changing the rounding mode only.
+FORCE_INLINE void _mm_setcsr(unsigned int a)
+{
+    _MM_SET_ROUNDING_MODE(a);
+}
+
+// Get the unsigned 32-bit value of the MXCSR control and status register.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getcsr
+// FIXME: _mm_getcsr() implementation supports reading the rounding mode only.
+FORCE_INLINE unsigned int _mm_getcsr(void)
+{
+    return _MM_GET_ROUNDING_MODE();
+}
+
+// Set packed single-precision (32-bit) floating-point elements in dst with the
+// supplied values in reverse order.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_ps
+FORCE_INLINE __m128 _mm_setr_ps(float w, float z, float y, float x)
+{
+    float ALIGN_STRUCT(16) data[4] = {w, z, y, x};
+    return vreinterpretq_m128_f32(vld1q_f32(data));
+}
+
+// Return vector of type __m128 with all elements set to zero.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setzero_ps
+FORCE_INLINE __m128 _mm_setzero_ps(void)
+{
+    return vreinterpretq_m128_f32(vdupq_n_f32(0));
+}
+
+// Shuffle 16-bit integers in a using the control in imm8, and store the results
+// in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_pi16
+#ifdef _sse2neon_shuffle
+#define _mm_shuffle_pi16(a, imm)                                       \
+    vreinterpret_m64_s16(vshuffle_s16(                                 \
+        vreinterpret_s16_m64(a), vreinterpret_s16_m64(a), (imm & 0x3), \
+        ((imm >> 2) & 0x3), ((imm >> 4) & 0x3), ((imm >> 6) & 0x3)))
+#else
+#define _mm_shuffle_pi16(a, imm)                                              \
+    _sse2neon_define1(                                                        \
+        __m64, a, int16x4_t ret;                                              \
+        ret = vmov_n_s16(                                                     \
+            vget_lane_s16(vreinterpret_s16_m64(_a), (imm) & (0x3)));          \
+        ret = vset_lane_s16(                                                  \
+            vget_lane_s16(vreinterpret_s16_m64(_a), ((imm) >> 2) & 0x3), ret, \
+            1);                                                               \
+        ret = vset_lane_s16(                                                  \
+            vget_lane_s16(vreinterpret_s16_m64(_a), ((imm) >> 4) & 0x3), ret, \
+            2);                                                               \
+        ret = vset_lane_s16(                                                  \
+            vget_lane_s16(vreinterpret_s16_m64(_a), ((imm) >> 6) & 0x3), ret, \
+            3);                                                               \
+        _sse2neon_return(vreinterpret_m64_s16(ret));)
+#endif
+
+// Perform a serializing operation on all store-to-memory instructions that were
+// issued prior to this instruction. Guarantees that every store instruction
+// that precedes, in program order, is globally visible before any store
+// instruction which follows the fence in program order.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sfence
+FORCE_INLINE void _mm_sfence(void)
+{
+    _sse2neon_smp_mb();
+}
+
+// Perform a serializing operation on all load-from-memory and store-to-memory
+// instructions that were issued prior to this instruction. Guarantees that
+// every memory access that precedes, in program order, the memory fence
+// instruction is globally visible before any memory instruction which follows
+// the fence in program order.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mfence
+FORCE_INLINE void _mm_mfence(void)
+{
+    _sse2neon_smp_mb();
+}
+
+// Perform a serializing operation on all load-from-memory instructions that
+// were issued prior to this instruction. Guarantees that every load instruction
+// that precedes, in program order, is globally visible before any load
+// instruction which follows the fence in program order.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_lfence
+FORCE_INLINE void _mm_lfence(void)
+{
+    _sse2neon_smp_mb();
+}
+
+// FORCE_INLINE __m128 _mm_shuffle_ps(__m128 a, __m128 b, __constrange(0,255)
+// int imm)
+#ifdef _sse2neon_shuffle
+#define _mm_shuffle_ps(a, b, imm)                                              \
+    __extension__({                                                            \
+        float32x4_t _input1 = vreinterpretq_f32_m128(a);                       \
+        float32x4_t _input2 = vreinterpretq_f32_m128(b);                       \
+        float32x4_t _shuf =                                                    \
+            vshuffleq_s32(_input1, _input2, (imm) & (0x3), ((imm) >> 2) & 0x3, \
+                          (((imm) >> 4) & 0x3) + 4, (((imm) >> 6) & 0x3) + 4); \
+        vreinterpretq_m128_f32(_shuf);                                         \
+    })
+#else  // generic
+#define _mm_shuffle_ps(a, b, imm)                            \
+    _sse2neon_define2(                                       \
+        __m128, a, b, __m128 ret; switch (imm) {             \
+            case _MM_SHUFFLE(1, 0, 3, 2):                    \
+                ret = _mm_shuffle_ps_1032(_a, _b);           \
+                break;                                       \
+            case _MM_SHUFFLE(2, 3, 0, 1):                    \
+                ret = _mm_shuffle_ps_2301(_a, _b);           \
+                break;                                       \
+            case _MM_SHUFFLE(0, 3, 2, 1):                    \
+                ret = _mm_shuffle_ps_0321(_a, _b);           \
+                break;                                       \
+            case _MM_SHUFFLE(2, 1, 0, 3):                    \
+                ret = _mm_shuffle_ps_2103(_a, _b);           \
+                break;                                       \
+            case _MM_SHUFFLE(1, 0, 1, 0):                    \
+                ret = _mm_movelh_ps(_a, _b);                 \
+                break;                                       \
+            case _MM_SHUFFLE(1, 0, 0, 1):                    \
+                ret = _mm_shuffle_ps_1001(_a, _b);           \
+                break;                                       \
+            case _MM_SHUFFLE(0, 1, 0, 1):                    \
+                ret = _mm_shuffle_ps_0101(_a, _b);           \
+                break;                                       \
+            case _MM_SHUFFLE(3, 2, 1, 0):                    \
+                ret = _mm_shuffle_ps_3210(_a, _b);           \
+                break;                                       \
+            case _MM_SHUFFLE(0, 0, 1, 1):                    \
+                ret = _mm_shuffle_ps_0011(_a, _b);           \
+                break;                                       \
+            case _MM_SHUFFLE(0, 0, 2, 2):                    \
+                ret = _mm_shuffle_ps_0022(_a, _b);           \
+                break;                                       \
+            case _MM_SHUFFLE(2, 2, 0, 0):                    \
+                ret = _mm_shuffle_ps_2200(_a, _b);           \
+                break;                                       \
+            case _MM_SHUFFLE(3, 2, 0, 2):                    \
+                ret = _mm_shuffle_ps_3202(_a, _b);           \
+                break;                                       \
+            case _MM_SHUFFLE(3, 2, 3, 2):                    \
+                ret = _mm_movehl_ps(_b, _a);                 \
+                break;                                       \
+            case _MM_SHUFFLE(1, 1, 3, 3):                    \
+                ret = _mm_shuffle_ps_1133(_a, _b);           \
+                break;                                       \
+            case _MM_SHUFFLE(2, 0, 1, 0):                    \
+                ret = _mm_shuffle_ps_2010(_a, _b);           \
+                break;                                       \
+            case _MM_SHUFFLE(2, 0, 0, 1):                    \
+                ret = _mm_shuffle_ps_2001(_a, _b);           \
+                break;                                       \
+            case _MM_SHUFFLE(2, 0, 3, 2):                    \
+                ret = _mm_shuffle_ps_2032(_a, _b);           \
+                break;                                       \
+            default:                                         \
+                ret = _mm_shuffle_ps_default(_a, _b, (imm)); \
+                break;                                       \
+        } _sse2neon_return(ret);)
+#endif
+
+// Compute the square root of packed single-precision (32-bit) floating-point
+// elements in a, and store the results in dst.
+// Due to ARMv7-A NEON's lack of a precise square root intrinsic, we implement
+// square root by multiplying input in with its reciprocal square root before
+// using the Newton-Raphson method to approximate the results.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_ps
+FORCE_INLINE __m128 _mm_sqrt_ps(__m128 in)
+{
+#if (defined(__aarch64__) || defined(_M_ARM64)) && !SSE2NEON_PRECISE_SQRT
+    return vreinterpretq_m128_f32(vsqrtq_f32(vreinterpretq_f32_m128(in)));
+#else
+    float32x4_t recip = vrsqrteq_f32(vreinterpretq_f32_m128(in));
+
+    // Test for vrsqrteq_f32(0) -> positive infinity case.
+    // Change to zero, so that s * 1/sqrt(s) result is zero too.
+    const uint32x4_t pos_inf = vdupq_n_u32(0x7F800000);
+    const uint32x4_t div_by_zero =
+        vceqq_u32(pos_inf, vreinterpretq_u32_f32(recip));
+    recip = vreinterpretq_f32_u32(
+        vandq_u32(vmvnq_u32(div_by_zero), vreinterpretq_u32_f32(recip)));
+
+    recip = vmulq_f32(
+        vrsqrtsq_f32(vmulq_f32(recip, recip), vreinterpretq_f32_m128(in)),
+        recip);
+    // Additional Netwon-Raphson iteration for accuracy
+    recip = vmulq_f32(
+        vrsqrtsq_f32(vmulq_f32(recip, recip), vreinterpretq_f32_m128(in)),
+        recip);
+
+    // sqrt(s) = s * 1/sqrt(s)
+    return vreinterpretq_m128_f32(vmulq_f32(vreinterpretq_f32_m128(in), recip));
+#endif
+}
+
+// Compute the square root of the lower single-precision (32-bit) floating-point
+// element in a, store the result in the lower element of dst, and copy the
+// upper 3 packed elements from a to the upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_ss
+FORCE_INLINE __m128 _mm_sqrt_ss(__m128 in)
+{
+    float32_t value =
+        vgetq_lane_f32(vreinterpretq_f32_m128(_mm_sqrt_ps(in)), 0);
+    return vreinterpretq_m128_f32(
+        vsetq_lane_f32(value, vreinterpretq_f32_m128(in), 0));
+}
+
+// Store 128-bits (composed of 4 packed single-precision (32-bit) floating-point
+// elements) from a into memory. mem_addr must be aligned on a 16-byte boundary
+// or a general-protection exception may be generated.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_ps
+FORCE_INLINE void _mm_store_ps(float *p, __m128 a)
+{
+    vst1q_f32(p, vreinterpretq_f32_m128(a));
+}
+
+// Store the lower single-precision (32-bit) floating-point element from a into
+// 4 contiguous elements in memory. mem_addr must be aligned on a 16-byte
+// boundary or a general-protection exception may be generated.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_ps1
+FORCE_INLINE void _mm_store_ps1(float *p, __m128 a)
+{
+    float32_t a0 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 0);
+    vst1q_f32(p, vdupq_n_f32(a0));
+}
+
+// Store the lower single-precision (32-bit) floating-point element from a into
+// memory. mem_addr does not need to be aligned on any particular boundary.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_ss
+FORCE_INLINE void _mm_store_ss(float *p, __m128 a)
+{
+    vst1q_lane_f32(p, vreinterpretq_f32_m128(a), 0);
+}
+
+// Store the lower single-precision (32-bit) floating-point element from a into
+// 4 contiguous elements in memory. mem_addr must be aligned on a 16-byte
+// boundary or a general-protection exception may be generated.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store1_ps
+#define _mm_store1_ps _mm_store_ps1
+
+// Store the upper 2 single-precision (32-bit) floating-point elements from a
+// into memory.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeh_pi
+FORCE_INLINE void _mm_storeh_pi(__m64 *p, __m128 a)
+{
+    *p = vreinterpret_m64_f32(vget_high_f32(a));
+}
+
+// Store the lower 2 single-precision (32-bit) floating-point elements from a
+// into memory.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storel_pi
+FORCE_INLINE void _mm_storel_pi(__m64 *p, __m128 a)
+{
+    *p = vreinterpret_m64_f32(vget_low_f32(a));
+}
+
+// Store 4 single-precision (32-bit) floating-point elements from a into memory
+// in reverse order. mem_addr must be aligned on a 16-byte boundary or a
+// general-protection exception may be generated.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storer_ps
+FORCE_INLINE void _mm_storer_ps(float *p, __m128 a)
+{
+    float32x4_t tmp = vrev64q_f32(vreinterpretq_f32_m128(a));
+    float32x4_t rev = vextq_f32(tmp, tmp, 2);
+    vst1q_f32(p, rev);
+}
+
+// Store 128-bits (composed of 4 packed single-precision (32-bit) floating-point
+// elements) from a into memory. mem_addr does not need to be aligned on any
+// particular boundary.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_ps
+FORCE_INLINE void _mm_storeu_ps(float *p, __m128 a)
+{
+    vst1q_f32(p, vreinterpretq_f32_m128(a));
+}
+
+// Stores 16-bits of integer data a at the address p.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si16
+FORCE_INLINE void _mm_storeu_si16(void *p, __m128i a)
+{
+    vst1q_lane_s16((int16_t *) p, vreinterpretq_s16_m128i(a), 0);
+}
+
+// Stores 64-bits of integer data a at the address p.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si64
+FORCE_INLINE void _mm_storeu_si64(void *p, __m128i a)
+{
+    vst1q_lane_s64((int64_t *) p, vreinterpretq_s64_m128i(a), 0);
+}
+
+// Store 64-bits of integer data from a into memory using a non-temporal memory
+// hint.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_pi
+FORCE_INLINE void _mm_stream_pi(__m64 *p, __m64 a)
+{
+    vst1_s64((int64_t *) p, vreinterpret_s64_m64(a));
+}
+
+// Store 128-bits (composed of 4 packed single-precision (32-bit) floating-
+// point elements) from a into memory using a non-temporal memory hint.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_ps
+FORCE_INLINE void _mm_stream_ps(float *p, __m128 a)
+{
+#if __has_builtin(__builtin_nontemporal_store)
+    __builtin_nontemporal_store(a, (float32x4_t *) p);
+#else
+    vst1q_f32(p, vreinterpretq_f32_m128(a));
+#endif
+}
+
+// Subtract packed single-precision (32-bit) floating-point elements in b from
+// packed single-precision (32-bit) floating-point elements in a, and store the
+// results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_ps
+FORCE_INLINE __m128 _mm_sub_ps(__m128 a, __m128 b)
+{
+    return vreinterpretq_m128_f32(
+        vsubq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
+}
+
+// Subtract the lower single-precision (32-bit) floating-point element in b from
+// the lower single-precision (32-bit) floating-point element in a, store the
+// result in the lower element of dst, and copy the upper 3 packed elements from
+// a to the upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_ss
+FORCE_INLINE __m128 _mm_sub_ss(__m128 a, __m128 b)
+{
+    return _mm_move_ss(a, _mm_sub_ps(a, b));
+}
+
+// Macro: Transpose the 4x4 matrix formed by the 4 rows of single-precision
+// (32-bit) floating-point elements in row0, row1, row2, and row3, and store the
+// transposed matrix in these vectors (row0 now contains column 0, etc.).
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=MM_TRANSPOSE4_PS
+#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3)         \
+    do {                                                  \
+        float32x4x2_t ROW01 = vtrnq_f32(row0, row1);      \
+        float32x4x2_t ROW23 = vtrnq_f32(row2, row3);      \
+        row0 = vcombine_f32(vget_low_f32(ROW01.val[0]),   \
+                            vget_low_f32(ROW23.val[0]));  \
+        row1 = vcombine_f32(vget_low_f32(ROW01.val[1]),   \
+                            vget_low_f32(ROW23.val[1]));  \
+        row2 = vcombine_f32(vget_high_f32(ROW01.val[0]),  \
+                            vget_high_f32(ROW23.val[0])); \
+        row3 = vcombine_f32(vget_high_f32(ROW01.val[1]),  \
+                            vget_high_f32(ROW23.val[1])); \
+    } while (0)
+
+// according to the documentation, these intrinsics behave the same as the
+// non-'u' versions.  We'll just alias them here.
+#define _mm_ucomieq_ss _mm_comieq_ss
+#define _mm_ucomige_ss _mm_comige_ss
+#define _mm_ucomigt_ss _mm_comigt_ss
+#define _mm_ucomile_ss _mm_comile_ss
+#define _mm_ucomilt_ss _mm_comilt_ss
+#define _mm_ucomineq_ss _mm_comineq_ss
+
+/* don't need these calls
+ 
+// Return vector of type __m128i with undefined elements.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_undefined_si128
+FORCE_INLINE __m128i _mm_undefined_si128(void)
+{
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wuninitialized"
+#endif
+    __m128i a;
+#if defined(_MSC_VER)
+    a = _mm_setzero_si128();
+#endif
+    return a;
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
+}
+
+// Return vector of type __m128 with undefined elements.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_undefined_ps
+FORCE_INLINE __m128 _mm_undefined_ps(void)
+{
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wuninitialized"
+#endif
+    __m128 a;
+#if defined(_MSC_VER)
+    a = _mm_setzero_ps();
+#endif
+    return a;
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
+}
+*/
+
+// Unpack and interleave single-precision (32-bit) floating-point elements from
+// the high half a and b, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_ps
+FORCE_INLINE __m128 _mm_unpackhi_ps(__m128 a, __m128 b)
+{
+    return vreinterpretq_m128_f32(
+        vzip2q_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
+}
+
+// Unpack and interleave single-precision (32-bit) floating-point elements from
+// the low half of a and b, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_ps
+FORCE_INLINE __m128 _mm_unpacklo_ps(__m128 a, __m128 b)
+{
+    return vreinterpretq_m128_f32(
+        vzip1q_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
+}
+
+// Compute the bitwise XOR of packed single-precision (32-bit) floating-point
+// elements in a and b, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_ps
+FORCE_INLINE __m128 _mm_xor_ps(__m128 a, __m128 b)
+{
+    return vreinterpretq_m128_s32(
+        veorq_s32(vreinterpretq_s32_m128(a), vreinterpretq_s32_m128(b)));
+}
+
+/* SSE2 */
+
+// Add packed 16-bit integers in a and b, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi16
+FORCE_INLINE __m128i _mm_add_epi16(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s16(
+        vaddq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)));
+}
+
+// Add packed 32-bit integers in a and b, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi32
+FORCE_INLINE __m128i _mm_add_epi32(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s32(
+        vaddq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
+}
+
+// Add packed 64-bit integers in a and b, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi64
+FORCE_INLINE __m128i _mm_add_epi64(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s64(
+        vaddq_s64(vreinterpretq_s64_m128i(a), vreinterpretq_s64_m128i(b)));
+}
+
+// Add packed 8-bit integers in a and b, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi8
+FORCE_INLINE __m128i _mm_add_epi8(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s8(
+        vaddq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b)));
+}
+
+// Add packed double-precision (64-bit) floating-point elements in a and b, and
+// store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_pd
+FORCE_INLINE __m128d _mm_add_pd(__m128d a, __m128d b)
+{
+    return vreinterpretq_m128d_f64(
+        vaddq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
+}
+
+// Add the lower double-precision (64-bit) floating-point element in a and b,
+// store the result in the lower element of dst, and copy the upper element from
+// a to the upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_sd
+FORCE_INLINE __m128d _mm_add_sd(__m128d a, __m128d b)
+{
+    return _mm_move_sd(a, _mm_add_pd(a, b));
+}
+
+// Add 64-bit integers a and b, and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_si64
+FORCE_INLINE __m64 _mm_add_si64(__m64 a, __m64 b)
+{
+    return vreinterpret_m64_s64(
+        vadd_s64(vreinterpret_s64_m64(a), vreinterpret_s64_m64(b)));
+}
+
+// Add packed signed 16-bit integers in a and b using saturation, and store the
+// results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epi16
+FORCE_INLINE __m128i _mm_adds_epi16(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s16(
+        vqaddq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)));
+}
+
+// Add packed signed 8-bit integers in a and b using saturation, and store the
+// results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epi8
+FORCE_INLINE __m128i _mm_adds_epi8(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s8(
+        vqaddq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b)));
+}
+
+// Add packed unsigned 16-bit integers in a and b using saturation, and store
+// the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epu16
+FORCE_INLINE __m128i _mm_adds_epu16(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_u16(
+        vqaddq_u16(vreinterpretq_u16_m128i(a), vreinterpretq_u16_m128i(b)));
+}
+
+// Add packed unsigned 8-bit integers in a and b using saturation, and store the
+// results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epu8
+FORCE_INLINE __m128i _mm_adds_epu8(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_u8(
+        vqaddq_u8(vreinterpretq_u8_m128i(a), vreinterpretq_u8_m128i(b)));
+}
+
+// Compute the bitwise AND of packed double-precision (64-bit) floating-point
+// elements in a and b, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_and_pd
+FORCE_INLINE __m128d _mm_and_pd(__m128d a, __m128d b)
+{
+    return vreinterpretq_m128d_s64(
+        vandq_s64(vreinterpretq_s64_m128d(a), vreinterpretq_s64_m128d(b)));
+}
+
+// Compute the bitwise AND of 128 bits (representing integer data) in a and b,
+// and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_and_si128
+FORCE_INLINE __m128i _mm_and_si128(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s32(
+        vandq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
+}
+
+// Compute the bitwise NOT of packed double-precision (64-bit) floating-point
+// elements in a and then AND with b, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_andnot_pd
+FORCE_INLINE __m128d _mm_andnot_pd(__m128d a, __m128d b)
+{
+    // *NOTE* argument swap
+    return vreinterpretq_m128d_s64(
+        vbicq_s64(vreinterpretq_s64_m128d(b), vreinterpretq_s64_m128d(a)));
+}
+
+// Compute the bitwise NOT of 128 bits (representing integer data) in a and then
+// AND with b, and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_andnot_si128
+FORCE_INLINE __m128i _mm_andnot_si128(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s32(
+        vbicq_s32(vreinterpretq_s32_m128i(b),
+                  vreinterpretq_s32_m128i(a)));  // *NOTE* argument swap
+}
+
+// Average packed unsigned 16-bit integers in a and b, and store the results in
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_avg_epu16
+FORCE_INLINE __m128i _mm_avg_epu16(__m128i a, __m128i b)
+{
+    return (__m128i) vrhaddq_u16(vreinterpretq_u16_m128i(a),
+                                 vreinterpretq_u16_m128i(b));
+}
+
+// Average packed unsigned 8-bit integers in a and b, and store the results in
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_avg_epu8
+FORCE_INLINE __m128i _mm_avg_epu8(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_u8(
+        vrhaddq_u8(vreinterpretq_u8_m128i(a), vreinterpretq_u8_m128i(b)));
+}
+
+// Shift a left by imm8 bytes while shifting in zeros, and store the results in
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_bslli_si128
+#define _mm_bslli_si128(a, imm) _mm_slli_si128(a, imm)
+
+// Shift a right by imm8 bytes while shifting in zeros, and store the results in
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_bsrli_si128
+#define _mm_bsrli_si128(a, imm) _mm_srli_si128(a, imm)
+
+// Cast vector of type __m128d to type __m128. This intrinsic is only used for
+// compilation and does not generate any instructions, thus it has zero latency.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castpd_ps
+FORCE_INLINE __m128 _mm_castpd_ps(__m128d a)
+{
+    return vreinterpretq_m128_s64(vreinterpretq_s64_m128d(a));
+}
+
+// Cast vector of type __m128d to type __m128i. This intrinsic is only used for
+// compilation and does not generate any instructions, thus it has zero latency.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castpd_si128
+FORCE_INLINE __m128i _mm_castpd_si128(__m128d a)
+{
+    return vreinterpretq_m128i_s64(vreinterpretq_s64_m128d(a));
+}
+
+// Cast vector of type __m128 to type __m128d. This intrinsic is only used for
+// compilation and does not generate any instructions, thus it has zero latency.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castps_pd
+FORCE_INLINE __m128d _mm_castps_pd(__m128 a)
+{
+    return vreinterpretq_m128d_s32(vreinterpretq_s32_m128(a));
+}
+
+// Cast vector of type __m128 to type __m128i. This intrinsic is only used for
+// compilation and does not generate any instructions, thus it has zero latency.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castps_si128
+FORCE_INLINE __m128i _mm_castps_si128(__m128 a)
+{
+    return vreinterpretq_m128i_s32(vreinterpretq_s32_m128(a));
+}
+
+// Cast vector of type __m128i to type __m128d. This intrinsic is only used for
+// compilation and does not generate any instructions, thus it has zero latency.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castsi128_pd
+FORCE_INLINE __m128d _mm_castsi128_pd(__m128i a)
+{
+    return vreinterpretq_m128d_f64(vreinterpretq_f64_m128i(a));
+}
+
+// Cast vector of type __m128i to type __m128. This intrinsic is only used for
+// compilation and does not generate any instructions, thus it has zero latency.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castsi128_ps
+FORCE_INLINE __m128 _mm_castsi128_ps(__m128i a)
+{
+    return vreinterpretq_m128_s32(vreinterpretq_s32_m128i(a));
+}
+
+// Invalidate and flush the cache line that contains p from all levels of the
+// cache hierarchy.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clflush
+#if defined(__APPLE__)
+#include <libkern/OSCacheControl.h>
+#endif
+FORCE_INLINE void _mm_clflush(void const *p)
+{
+    (void) p;
+
+    /* sys_icache_invalidate is supported since macOS 10.5.
+     * However, it does not work on non-jailbroken iOS devices, although the
+     * compilation is successful.
+     */
+#if defined(__APPLE__)
+    sys_icache_invalidate((void *) (uintptr_t) p, SSE2NEON_CACHELINE_SIZE);
+#else
+    uintptr_t ptr = (uintptr_t) p;
+    __builtin___clear_cache((char *) ptr,
+                            (char *) ptr + SSE2NEON_CACHELINE_SIZE);
+#endif
+}
+
+// Compare packed 16-bit integers in a and b for equality, and store the results
+// in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi16
+FORCE_INLINE __m128i _mm_cmpeq_epi16(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_u16(
+        vceqq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)));
+}
+
+// Compare packed 32-bit integers in a and b for equality, and store the results
+// in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi32
+FORCE_INLINE __m128i _mm_cmpeq_epi32(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_u32(
+        vceqq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
+}
+
+// Compare packed 8-bit integers in a and b for equality, and store the results
+// in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi8
+FORCE_INLINE __m128i _mm_cmpeq_epi8(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_u8(
+        vceqq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b)));
+}
+
+// Compare packed double-precision (64-bit) floating-point elements in a and b
+// for equality, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_pd
+FORCE_INLINE __m128d _mm_cmpeq_pd(__m128d a, __m128d b)
+{
+    return vreinterpretq_m128d_u64(
+        vceqq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
+}
+
+// Compare the lower double-precision (64-bit) floating-point elements in a and
+// b for equality, store the result in the lower element of dst, and copy the
+// upper element from a to the upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_sd
+FORCE_INLINE __m128d _mm_cmpeq_sd(__m128d a, __m128d b)
+{
+    return _mm_move_sd(a, _mm_cmpeq_pd(a, b));
+}
+
+// Compare packed double-precision (64-bit) floating-point elements in a and b
+// for greater-than-or-equal, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_pd
+FORCE_INLINE __m128d _mm_cmpge_pd(__m128d a, __m128d b)
+{
+    return vreinterpretq_m128d_u64(
+        vcgeq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
+}
+
+// Compare the lower double-precision (64-bit) floating-point elements in a and
+// b for greater-than-or-equal, store the result in the lower element of dst,
+// and copy the upper element from a to the upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_sd
+FORCE_INLINE __m128d _mm_cmpge_sd(__m128d a, __m128d b)
+{
+    return _mm_move_sd(a, _mm_cmpge_pd(a, b));
+}
+
+// Compare packed signed 16-bit integers in a and b for greater-than, and store
+// the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi16
+FORCE_INLINE __m128i _mm_cmpgt_epi16(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_u16(
+        vcgtq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)));
+}
+
+// Compare packed signed 32-bit integers in a and b for greater-than, and store
+// the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi32
+FORCE_INLINE __m128i _mm_cmpgt_epi32(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_u32(
+        vcgtq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
+}
+
+// Compare packed signed 8-bit integers in a and b for greater-than, and store
+// the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi8
+FORCE_INLINE __m128i _mm_cmpgt_epi8(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_u8(
+        vcgtq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b)));
+}
+
+// Compare packed double-precision (64-bit) floating-point elements in a and b
+// for greater-than, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_pd
+FORCE_INLINE __m128d _mm_cmpgt_pd(__m128d a, __m128d b)
+{
+    return vreinterpretq_m128d_u64(
+        vcgtq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
+}
+
+// Compare the lower double-precision (64-bit) floating-point elements in a and
+// b for greater-than, store the result in the lower element of dst, and copy
+// the upper element from a to the upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_sd
+FORCE_INLINE __m128d _mm_cmpgt_sd(__m128d a, __m128d b)
+{
+    return _mm_move_sd(a, _mm_cmpgt_pd(a, b));
+}
+
+// Compare packed double-precision (64-bit) floating-point elements in a and b
+// for less-than-or-equal, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_pd
+FORCE_INLINE __m128d _mm_cmple_pd(__m128d a, __m128d b)
+{
+    return vreinterpretq_m128d_u64(
+        vcleq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
+}
+
+// Compare the lower double-precision (64-bit) floating-point elements in a and
+// b for less-than-or-equal, store the result in the lower element of dst, and
+// copy the upper element from a to the upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_sd
+FORCE_INLINE __m128d _mm_cmple_sd(__m128d a, __m128d b)
+{
+    return _mm_move_sd(a, _mm_cmple_pd(a, b));
+}
+
+// Compare packed signed 16-bit integers in a and b for less-than, and store the
+// results in dst. Note: This intrinsic emits the pcmpgtw instruction with the
+// order of the operands switched.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi16
+FORCE_INLINE __m128i _mm_cmplt_epi16(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_u16(
+        vcltq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)));
+}
+
+// Compare packed signed 32-bit integers in a and b for less-than, and store the
+// results in dst. Note: This intrinsic emits the pcmpgtd instruction with the
+// order of the operands switched.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi32
+FORCE_INLINE __m128i _mm_cmplt_epi32(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_u32(
+        vcltq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
+}
+
+// Compare packed signed 8-bit integers in a and b for less-than, and store the
+// results in dst. Note: This intrinsic emits the pcmpgtb instruction with the
+// order of the operands switched.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi8
+FORCE_INLINE __m128i _mm_cmplt_epi8(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_u8(
+        vcltq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b)));
+}
+
+// Compare packed double-precision (64-bit) floating-point elements in a and b
+// for less-than, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_pd
+FORCE_INLINE __m128d _mm_cmplt_pd(__m128d a, __m128d b)
+{
+    return vreinterpretq_m128d_u64(
+        vcltq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
+}
+
+// Compare the lower double-precision (64-bit) floating-point elements in a and
+// b for less-than, store the result in the lower element of dst, and copy the
+// upper element from a to the upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_sd
+FORCE_INLINE __m128d _mm_cmplt_sd(__m128d a, __m128d b)
+{
+    return _mm_move_sd(a, _mm_cmplt_pd(a, b));
+}
+
+// Compare packed double-precision (64-bit) floating-point elements in a and b
+// for not-equal, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_pd
+FORCE_INLINE __m128d _mm_cmpneq_pd(__m128d a, __m128d b)
+{
+    return vreinterpretq_m128d_s32(vmvnq_s32(vreinterpretq_s32_u64(
+        vceqq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)))));
+}
+
+// Compare the lower double-precision (64-bit) floating-point elements in a and
+// b for not-equal, store the result in the lower element of dst, and copy the
+// upper element from a to the upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_sd
+FORCE_INLINE __m128d _mm_cmpneq_sd(__m128d a, __m128d b)
+{
+    return _mm_move_sd(a, _mm_cmpneq_pd(a, b));
+}
+
+// Compare packed double-precision (64-bit) floating-point elements in a and b
+// for not-greater-than-or-equal, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnge_pd
+FORCE_INLINE __m128d _mm_cmpnge_pd(__m128d a, __m128d b)
+{
+    return vreinterpretq_m128d_u64(veorq_u64(
+        vcgeq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)),
+        vdupq_n_u64(UINT64_MAX)));
+}
+
+// Compare the lower double-precision (64-bit) floating-point elements in a and
+// b for not-greater-than-or-equal, store the result in the lower element of
+// dst, and copy the upper element from a to the upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnge_sd
+FORCE_INLINE __m128d _mm_cmpnge_sd(__m128d a, __m128d b)
+{
+    return _mm_move_sd(a, _mm_cmpnge_pd(a, b));
+}
+
+// Compare packed double-precision (64-bit) floating-point elements in a and b
+// for not-greater-than, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cmpngt_pd
+FORCE_INLINE __m128d _mm_cmpngt_pd(__m128d a, __m128d b)
+{
+    return vreinterpretq_m128d_u64(veorq_u64(
+        vcgtq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)),
+        vdupq_n_u64(UINT64_MAX)));
+}
+
+// Compare the lower double-precision (64-bit) floating-point elements in a and
+// b for not-greater-than, store the result in the lower element of dst, and
+// copy the upper element from a to the upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpngt_sd
+FORCE_INLINE __m128d _mm_cmpngt_sd(__m128d a, __m128d b)
+{
+    return _mm_move_sd(a, _mm_cmpngt_pd(a, b));
+}
+
+// Compare packed double-precision (64-bit) floating-point elements in a and b
+// for not-less-than-or-equal, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnle_pd
+FORCE_INLINE __m128d _mm_cmpnle_pd(__m128d a, __m128d b)
+{
+    return vreinterpretq_m128d_u64(veorq_u64(
+        vcleq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)),
+        vdupq_n_u64(UINT64_MAX)));
+}
+
+// Compare the lower double-precision (64-bit) floating-point elements in a and
+// b for not-less-than-or-equal, store the result in the lower element of dst,
+// and copy the upper element from a to the upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnle_sd
+FORCE_INLINE __m128d _mm_cmpnle_sd(__m128d a, __m128d b)
+{
+    return _mm_move_sd(a, _mm_cmpnle_pd(a, b));
+}
+
+// Compare packed double-precision (64-bit) floating-point elements in a and b
+// for not-less-than, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnlt_pd
+FORCE_INLINE __m128d _mm_cmpnlt_pd(__m128d a, __m128d b)
+{
+    return vreinterpretq_m128d_u64(veorq_u64(
+        vcltq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)),
+        vdupq_n_u64(UINT64_MAX)));
+}
+
+// Compare the lower double-precision (64-bit) floating-point elements in a and
+// b for not-less-than, store the result in the lower element of dst, and copy
+// the upper element from a to the upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnlt_sd
+FORCE_INLINE __m128d _mm_cmpnlt_sd(__m128d a, __m128d b)
+{
+    return _mm_move_sd(a, _mm_cmpnlt_pd(a, b));
+}
+
+// Compare packed double-precision (64-bit) floating-point elements in a and b
+// to see if neither is NaN, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpord_pd
+FORCE_INLINE __m128d _mm_cmpord_pd(__m128d a, __m128d b)
+{
+    // Excluding NaNs, any two floating point numbers can be compared.
+    uint64x2_t not_nan_a =
+        vceqq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(a));
+    uint64x2_t not_nan_b =
+        vceqq_f64(vreinterpretq_f64_m128d(b), vreinterpretq_f64_m128d(b));
+    return vreinterpretq_m128d_u64(vandq_u64(not_nan_a, not_nan_b));
+}
+
+// Compare the lower double-precision (64-bit) floating-point elements in a and
+// b to see if neither is NaN, store the result in the lower element of dst, and
+// copy the upper element from a to the upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpord_sd
+FORCE_INLINE __m128d _mm_cmpord_sd(__m128d a, __m128d b)
+{
+    return _mm_move_sd(a, _mm_cmpord_pd(a, b));
+}
+
+// Compare packed double-precision (64-bit) floating-point elements in a and b
+// to see if either is NaN, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpunord_pd
+FORCE_INLINE __m128d _mm_cmpunord_pd(__m128d a, __m128d b)
+{
+    // Two NaNs are not equal in comparison operation.
+    uint64x2_t not_nan_a =
+        vceqq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(a));
+    uint64x2_t not_nan_b =
+        vceqq_f64(vreinterpretq_f64_m128d(b), vreinterpretq_f64_m128d(b));
+    return vreinterpretq_m128d_s32(
+        vmvnq_s32(vreinterpretq_s32_u64(vandq_u64(not_nan_a, not_nan_b))));
+}
+
+// Compare the lower double-precision (64-bit) floating-point elements in a and
+// b to see if either is NaN, store the result in the lower element of dst, and
+// copy the upper element from a to the upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpunord_sd
+FORCE_INLINE __m128d _mm_cmpunord_sd(__m128d a, __m128d b)
+{
+    return _mm_move_sd(a, _mm_cmpunord_pd(a, b));
+}
+
+// Compare the lower double-precision (64-bit) floating-point element in a and b
+// for greater-than-or-equal, and return the boolean result (0 or 1).
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comige_sd
+FORCE_INLINE int _mm_comige_sd(__m128d a, __m128d b)
+{
+    return vgetq_lane_u64(vcgeq_f64(a, b), 0) & 0x1;
+}
+
+// Compare the lower double-precision (64-bit) floating-point element in a and b
+// for greater-than, and return the boolean result (0 or 1).
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comigt_sd
+FORCE_INLINE int _mm_comigt_sd(__m128d a, __m128d b)
+{
+    return vgetq_lane_u64(vcgtq_f64(a, b), 0) & 0x1;
+}
+
+// Compare the lower double-precision (64-bit) floating-point element in a and b
+// for less-than-or-equal, and return the boolean result (0 or 1).
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comile_sd
+FORCE_INLINE int _mm_comile_sd(__m128d a, __m128d b)
+{
+    return vgetq_lane_u64(vcleq_f64(a, b), 0) & 0x1;
+}
+
+// Compare the lower double-precision (64-bit) floating-point element in a and b
+// for less-than, and return the boolean result (0 or 1).
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comilt_sd
+FORCE_INLINE int _mm_comilt_sd(__m128d a, __m128d b)
+{
+    return vgetq_lane_u64(vcltq_f64(a, b), 0) & 0x1;
+}
+
+// Compare the lower double-precision (64-bit) floating-point element in a and b
+// for equality, and return the boolean result (0 or 1).
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comieq_sd
+FORCE_INLINE int _mm_comieq_sd(__m128d a, __m128d b)
+{
+    return vgetq_lane_u64(vceqq_f64(a, b), 0) & 0x1;
+}
+
+// Compare the lower double-precision (64-bit) floating-point element in a and b
+// for not-equal, and return the boolean result (0 or 1).
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comineq_sd
+FORCE_INLINE int _mm_comineq_sd(__m128d a, __m128d b)
+{
+    return !_mm_comieq_sd(a, b);
+}
+
+// Convert packed signed 32-bit integers in a to packed double-precision
+// (64-bit) floating-point elements, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_pd
+FORCE_INLINE __m128d _mm_cvtepi32_pd(__m128i a)
+{
+    return vreinterpretq_m128d_f64(
+        vcvtq_f64_s64(vmovl_s32(vget_low_s32(vreinterpretq_s32_m128i(a)))));
+}
+
+// Convert packed signed 32-bit integers in a to packed single-precision
+// (32-bit) floating-point elements, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_ps
+FORCE_INLINE __m128 _mm_cvtepi32_ps(__m128i a)
+{
+    return vreinterpretq_m128_f32(vcvtq_f32_s32(vreinterpretq_s32_m128i(a)));
+}
+
+// Convert packed double-precision (64-bit) floating-point elements in a to
+// packed 32-bit integers, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epi32
+FORCE_INLINE_OPTNONE __m128i _mm_cvtpd_epi32(__m128d a)
+{
+// vrnd32xq_f64 not supported on clang
+    __m128d rnd = _mm_round_pd(a, _MM_FROUND_CUR_DIRECTION);
+    double d0, d1;
+    d0 = sse2neon_recast_u64_f64(
+        vgetq_lane_u64(vreinterpretq_u64_m128d(rnd), 0));
+    d1 = sse2neon_recast_u64_f64(
+        vgetq_lane_u64(vreinterpretq_u64_m128d(rnd), 1));
+    return _mm_set_epi32(0, 0, (int32_t) d1, (int32_t) d0);
+}
+
+// Convert packed double-precision (64-bit) floating-point elements in a to
+// packed 32-bit integers, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_pi32
+FORCE_INLINE_OPTNONE __m64 _mm_cvtpd_pi32(__m128d a)
+{
+    __m128d rnd = _mm_round_pd(a, _MM_FROUND_CUR_DIRECTION);
+    double d0, d1;
+    d0 = sse2neon_recast_u64_f64(
+        vgetq_lane_u64(vreinterpretq_u64_m128d(rnd), 0));
+    d1 = sse2neon_recast_u64_f64(
+        vgetq_lane_u64(vreinterpretq_u64_m128d(rnd), 1));
+    int32_t ALIGN_STRUCT(16) data[2] = {(int32_t) d0, (int32_t) d1};
+    return vreinterpret_m64_s32(vld1_s32(data));
+}
+
+// Convert packed double-precision (64-bit) floating-point elements in a to
+// packed single-precision (32-bit) floating-point elements, and store the
+// results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_ps
+FORCE_INLINE __m128 _mm_cvtpd_ps(__m128d a)
+{
+    float32x2_t tmp = vcvt_f32_f64(vreinterpretq_f64_m128d(a));
+    return vreinterpretq_m128_f32(vcombine_f32(tmp, vdup_n_f32(0)));
+}
+
+// Convert packed signed 32-bit integers in a to packed double-precision
+// (64-bit) floating-point elements, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpi32_pd
+FORCE_INLINE __m128d _mm_cvtpi32_pd(__m64 a)
+{
+    return vreinterpretq_m128d_f64(
+        vcvtq_f64_s64(vmovl_s32(vreinterpret_s32_m64(a))));
+}
+
+// Convert packed single-precision (32-bit) floating-point elements in a to
+// packed 32-bit integers, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epi32
+// *NOTE*. The default rounding mode on SSE is 'round to even', which ARMv7-A
+// does not support! It is supported on ARMv8-A however.
+FORCE_INLINE __m128i _mm_cvtps_epi32(__m128 a)
+{
+#if defined(__ARM_FEATURE_FRINT)
+    return vreinterpretq_m128i_s32(vcvtq_s32_f32(vrnd32xq_f32(a)));
+#elif (defined(__aarch64__) || defined(_M_ARM64)) || \
+    defined(__ARM_FEATURE_DIRECTED_ROUNDING)
+    switch (_MM_GET_ROUNDING_MODE()) {
+    case _MM_ROUND_NEAREST:
+        return vreinterpretq_m128i_s32(vcvtnq_s32_f32(a));
+    case _MM_ROUND_DOWN:
+        return vreinterpretq_m128i_s32(vcvtmq_s32_f32(a));
+    case _MM_ROUND_UP:
+        return vreinterpretq_m128i_s32(vcvtpq_s32_f32(a));
+    default:  // _MM_ROUND_TOWARD_ZERO
+        return vreinterpretq_m128i_s32(vcvtq_s32_f32(a));
+    }
+#else
+    float *f = (float *) &a;
+    switch (_MM_GET_ROUNDING_MODE()) {
+    case _MM_ROUND_NEAREST: {
+        uint32x4_t signmask = vdupq_n_u32(0x80000000);
+        float32x4_t half = vbslq_f32(signmask, vreinterpretq_f32_m128(a),
+                                     vdupq_n_f32(0.5f)); /* +/- 0.5 */
+        int32x4_t r_normal = vcvtq_s32_f32(vaddq_f32(
+            vreinterpretq_f32_m128(a), half)); /* round to integer: [a + 0.5]*/
+        int32x4_t r_trunc = vcvtq_s32_f32(
+            vreinterpretq_f32_m128(a)); /* truncate to integer: [a] */
+        int32x4_t plusone = vreinterpretq_s32_u32(vshrq_n_u32(
+            vreinterpretq_u32_s32(vnegq_s32(r_trunc)), 31)); /* 1 or 0 */
+        int32x4_t r_even = vbicq_s32(vaddq_s32(r_trunc, plusone),
+                                     vdupq_n_s32(1)); /* ([a] + {0,1}) & ~1 */
+        float32x4_t delta = vsubq_f32(
+            vreinterpretq_f32_m128(a),
+            vcvtq_f32_s32(r_trunc)); /* compute delta: delta = (a - [a]) */
+        uint32x4_t is_delta_half =
+            vceqq_f32(delta, half); /* delta == +/- 0.5 */
+        return vreinterpretq_m128i_s32(
+            vbslq_s32(is_delta_half, r_even, r_normal));
+    }
+    case _MM_ROUND_DOWN:
+        return _mm_set_epi32(floorf(f[3]), floorf(f[2]), floorf(f[1]),
+                             floorf(f[0]));
+    case _MM_ROUND_UP:
+        return _mm_set_epi32(ceilf(f[3]), ceilf(f[2]), ceilf(f[1]),
+                             ceilf(f[0]));
+    default:  // _MM_ROUND_TOWARD_ZERO
+        return _mm_set_epi32((int32_t) f[3], (int32_t) f[2], (int32_t) f[1],
+                             (int32_t) f[0]);
+    }
+#endif
+}
+
+// Convert packed single-precision (32-bit) floating-point elements in a to
+// packed double-precision (64-bit) floating-point elements, and store the
+// results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_pd
+FORCE_INLINE __m128d _mm_cvtps_pd(__m128 a)
+{
+    return vreinterpretq_m128d_f64(
+        vcvt_f64_f32(vget_low_f32(vreinterpretq_f32_m128(a))));
+}
+
+// Copy the lower double-precision (64-bit) floating-point element of a to dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_f64
+FORCE_INLINE double _mm_cvtsd_f64(__m128d a)
+{
+    return (double) vgetq_lane_f64(vreinterpretq_f64_m128d(a), 0);
+}
+
+// Convert the lower double-precision (64-bit) floating-point element in a to a
+// 32-bit integer, and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_si32
+FORCE_INLINE int32_t _mm_cvtsd_si32(__m128d a)
+{
+    return (int32_t) vgetq_lane_f64(vrndiq_f64(vreinterpretq_f64_m128d(a)), 0);
+}
+
+// Convert the lower double-precision (64-bit) floating-point element in a to a
+// 64-bit integer, and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_si64
+FORCE_INLINE int64_t _mm_cvtsd_si64(__m128d a)
+{
+    return (int64_t) vgetq_lane_f64(vrndiq_f64(vreinterpretq_f64_m128d(a)), 0);
+}
+
+// Convert the lower double-precision (64-bit) floating-point element in a to a
+// 64-bit integer, and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_si64x
+#define _mm_cvtsd_si64x _mm_cvtsd_si64
+
+// Convert the lower double-precision (64-bit) floating-point element in b to a
+// single-precision (32-bit) floating-point element, store the result in the
+// lower element of dst, and copy the upper 3 packed elements from a to the
+// upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_ss
+FORCE_INLINE __m128 _mm_cvtsd_ss(__m128 a, __m128d b)
+{
+    return vreinterpretq_m128_f32(vsetq_lane_f32(
+        vget_lane_f32(vcvt_f32_f64(vreinterpretq_f64_m128d(b)), 0),
+        vreinterpretq_f32_m128(a), 0));
+}
+
+// Copy the lower 32-bit integer in a to dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi128_si32
+FORCE_INLINE int _mm_cvtsi128_si32(__m128i a)
+{
+    return vgetq_lane_s32(vreinterpretq_s32_m128i(a), 0);
+}
+
+// Copy the lower 64-bit integer in a to dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi128_si64
+FORCE_INLINE int64_t _mm_cvtsi128_si64(__m128i a)
+{
+    return vgetq_lane_s64(vreinterpretq_s64_m128i(a), 0);
+}
+
+// Copy the lower 64-bit integer in a to dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi128_si64x
+#define _mm_cvtsi128_si64x(a) _mm_cvtsi128_si64(a)
+
+// Convert the signed 32-bit integer b to a double-precision (64-bit)
+// floating-point element, store the result in the lower element of dst, and
+// copy the upper element from a to the upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi32_sd
+FORCE_INLINE __m128d _mm_cvtsi32_sd(__m128d a, int32_t b)
+{
+    return vreinterpretq_m128d_f64(
+        vsetq_lane_f64((double) b, vreinterpretq_f64_m128d(a), 0));
+}
+
+// Copy the lower 64-bit integer in a to dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi128_si64x
+#define _mm_cvtsi128_si64x(a) _mm_cvtsi128_si64(a)
+
+// Copy 32-bit integer a to the lower elements of dst, and zero the upper
+// elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi32_si128
+FORCE_INLINE __m128i _mm_cvtsi32_si128(int a)
+{
+    return vreinterpretq_m128i_s32(vsetq_lane_s32(a, vdupq_n_s32(0), 0));
+}
+
+// Convert the signed 64-bit integer b to a double-precision (64-bit)
+// floating-point element, store the result in the lower element of dst, and
+// copy the upper element from a to the upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi64_sd
+FORCE_INLINE __m128d _mm_cvtsi64_sd(__m128d a, int64_t b)
+{
+    return vreinterpretq_m128d_f64(
+        vsetq_lane_f64((double) b, vreinterpretq_f64_m128d(a), 0));
+}
+
+// Copy 64-bit integer a to the lower element of dst, and zero the upper
+// element.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi64_si128
+FORCE_INLINE __m128i _mm_cvtsi64_si128(int64_t a)
+{
+    return vreinterpretq_m128i_s64(vsetq_lane_s64(a, vdupq_n_s64(0), 0));
+}
+
+// Copy 64-bit integer a to the lower element of dst, and zero the upper
+// element.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi64x_si128
+#define _mm_cvtsi64x_si128(a) _mm_cvtsi64_si128(a)
+
+// Convert the signed 64-bit integer b to a double-precision (64-bit)
+// floating-point element, store the result in the lower element of dst, and
+// copy the upper element from a to the upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi64x_sd
+#define _mm_cvtsi64x_sd(a, b) _mm_cvtsi64_sd(a, b)
+
+// Convert the lower single-precision (32-bit) floating-point element in b to a
+// double-precision (64-bit) floating-point element, store the result in the
+// lower element of dst, and copy the upper element from a to the upper element
+// of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtss_sd
+FORCE_INLINE __m128d _mm_cvtss_sd(__m128d a, __m128 b)
+{
+    double d = (double) vgetq_lane_f32(vreinterpretq_f32_m128(b), 0);
+    return vreinterpretq_m128d_f64(
+        vsetq_lane_f64(d, vreinterpretq_f64_m128d(a), 0));
+}
+
+// Convert packed double-precision (64-bit) floating-point elements in a to
+// packed 32-bit integers with truncation, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epi32
+FORCE_INLINE __m128i _mm_cvttpd_epi32(__m128d a)
+{
+    double a0, a1;
+    a0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
+    a1 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
+    return _mm_set_epi32(0, 0, (int32_t) a1, (int32_t) a0);
+}
+
+// Convert packed double-precision (64-bit) floating-point elements in a to
+// packed 32-bit integers with truncation, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_pi32
+FORCE_INLINE_OPTNONE __m64 _mm_cvttpd_pi32(__m128d a)
+{
+    double a0, a1;
+    a0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
+    a1 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
+    int32_t ALIGN_STRUCT(16) data[2] = {(int32_t) a0, (int32_t) a1};
+    return vreinterpret_m64_s32(vld1_s32(data));
+}
+
+// Convert packed single-precision (32-bit) floating-point elements in a to
+// packed 32-bit integers with truncation, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epi32
+FORCE_INLINE __m128i _mm_cvttps_epi32(__m128 a)
+{
+    return vreinterpretq_m128i_s32(vcvtq_s32_f32(vreinterpretq_f32_m128(a)));
+}
+
+// Convert the lower double-precision (64-bit) floating-point element in a to a
+// 32-bit integer with truncation, and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_si32
+FORCE_INLINE int32_t _mm_cvttsd_si32(__m128d a)
+{
+    double _a =
+        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
+    return (int32_t) _a;
+}
+
+// Convert the lower double-precision (64-bit) floating-point element in a to a
+// 64-bit integer with truncation, and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_si64
+FORCE_INLINE int64_t _mm_cvttsd_si64(__m128d a)
+{
+    return vgetq_lane_s64(vcvtq_s64_f64(vreinterpretq_f64_m128d(a)), 0);
+}
+
+// Convert the lower double-precision (64-bit) floating-point element in a to a
+// 64-bit integer with truncation, and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_si64x
+#define _mm_cvttsd_si64x(a) _mm_cvttsd_si64(a)
+
+// Divide packed double-precision (64-bit) floating-point elements in a by
+// packed elements in b, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_div_pd
+FORCE_INLINE __m128d _mm_div_pd(__m128d a, __m128d b)
+{
+    return vreinterpretq_m128d_f64(
+        vdivq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
+}
+
+// Divide the lower double-precision (64-bit) floating-point element in a by the
+// lower double-precision (64-bit) floating-point element in b, store the result
+// in the lower element of dst, and copy the upper element from a to the upper
+// element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_div_sd
+FORCE_INLINE __m128d _mm_div_sd(__m128d a, __m128d b)
+{
+    float64x2_t tmp =
+        vdivq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b));
+    return vreinterpretq_m128d_f64(
+        vsetq_lane_f64(vgetq_lane_f64(vreinterpretq_f64_m128d(a), 1), tmp, 1));
+}
+
+// Extract a 16-bit integer from a, selected with imm8, and store the result in
+// the lower element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_epi16
+// FORCE_INLINE int _mm_extract_epi16(__m128i a, __constrange(0,8) int imm)
+#define _mm_extract_epi16(a, imm) \
+    vgetq_lane_u16(vreinterpretq_u16_m128i(a), (imm))
+
+// Copy a to dst, and insert the 16-bit integer i into dst at the location
+// specified by imm8.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_epi16
+// FORCE_INLINE __m128i _mm_insert_epi16(__m128i a, int b,
+//                                       __constrange(0,8) int imm)
+#define _mm_insert_epi16(a, b, imm) \
+    vreinterpretq_m128i_s16(        \
+        vsetq_lane_s16((b), vreinterpretq_s16_m128i(a), (imm)))
+
+// Load 128-bits (composed of 2 packed double-precision (64-bit) floating-point
+// elements) from memory into dst. mem_addr must be aligned on a 16-byte
+// boundary or a general-protection exception may be generated.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_pd
+FORCE_INLINE __m128d _mm_load_pd(const double *p)
+{
+    return vreinterpretq_m128d_f64(vld1q_f64(p));
+}
+
+// Load a double-precision (64-bit) floating-point element from memory into both
+// elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_pd1
+#define _mm_load_pd1 _mm_load1_pd
+
+// Load a double-precision (64-bit) floating-point element from memory into the
+// lower of dst, and zero the upper element. mem_addr does not need to be
+// aligned on any particular boundary.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_sd
+FORCE_INLINE __m128d _mm_load_sd(const double *p)
+{
+    return vreinterpretq_m128d_f64(vsetq_lane_f64(*p, vdupq_n_f64(0), 0));
+}
+
+// Load 128-bits of integer data from memory into dst. mem_addr must be aligned
+// on a 16-byte boundary or a general-protection exception may be generated.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_si128
+FORCE_INLINE __m128i _mm_load_si128(const __m128i *p)
+{
+    return vreinterpretq_m128i_s32(vld1q_s32((const int32_t *) p));
+}
+
+// Load a double-precision (64-bit) floating-point element from memory into both
+// elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load1_pd
+FORCE_INLINE __m128d _mm_load1_pd(const double *p)
+{
+    return vreinterpretq_m128d_f64(vld1q_dup_f64(p));
+}
+
+// Load a double-precision (64-bit) floating-point element from memory into the
+// upper element of dst, and copy the lower element from a to dst. mem_addr does
+// not need to be aligned on any particular boundary.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadh_pd
+FORCE_INLINE __m128d _mm_loadh_pd(__m128d a, const double *p)
+{
+    return vreinterpretq_m128d_f64(
+        vcombine_f64(vget_low_f64(vreinterpretq_f64_m128d(a)), vld1_f64(p)));
+}
+
+// Load 64-bit integer from memory into the first element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadl_epi64
+FORCE_INLINE __m128i _mm_loadl_epi64(__m128i const *p)
+{
+    /* Load the lower 64 bits of the value pointed to by p into the
+     * lower 64 bits of the result, zeroing the upper 64 bits of the result.
+     */
+    return vreinterpretq_m128i_s32(
+        vcombine_s32(vld1_s32((int32_t const *) p), vcreate_s32(0)));
+}
+
+// Load a double-precision (64-bit) floating-point element from memory into the
+// lower element of dst, and copy the upper element from a to dst. mem_addr does
+// not need to be aligned on any particular boundary.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadl_pd
+FORCE_INLINE __m128d _mm_loadl_pd(__m128d a, const double *p)
+{
+    return vreinterpretq_m128d_f64(
+        vcombine_f64(vld1_f64(p), vget_high_f64(vreinterpretq_f64_m128d(a))));
+}
+
+// Load 2 double-precision (64-bit) floating-point elements from memory into dst
+// in reverse order. mem_addr must be aligned on a 16-byte boundary or a
+// general-protection exception may be generated.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadr_pd
+FORCE_INLINE __m128d _mm_loadr_pd(const double *p)
+{
+    float64x2_t v = vld1q_f64(p);
+    return vreinterpretq_m128d_f64(vextq_f64(v, v, 1));
+}
+
+// Loads two double-precision from unaligned memory, floating-point values.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_pd
+FORCE_INLINE __m128d _mm_loadu_pd(const double *p)
+{
+    return _mm_load_pd(p);
+}
+
+// Load 128-bits of integer data from memory into dst. mem_addr does not need to
+// be aligned on any particular boundary.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si128
+FORCE_INLINE __m128i _mm_loadu_si128(const __m128i *p)
+{
+    return vreinterpretq_m128i_s32(vld1q_s32((const unaligned_int32_t *) p));
+}
+
+// Load unaligned 32-bit integer from memory into the first element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si32
+FORCE_INLINE __m128i _mm_loadu_si32(const void *p)
+{
+    return vreinterpretq_m128i_s32(
+        vsetq_lane_s32(*(const unaligned_int32_t *) p, vdupq_n_s32(0), 0));
+}
+
+// Multiply packed signed 16-bit integers in a and b, producing intermediate
+// signed 32-bit integers. Horizontally add adjacent pairs of intermediate
+// 32-bit integers, and pack the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_madd_epi16
+FORCE_INLINE __m128i _mm_madd_epi16(__m128i a, __m128i b)
+{
+    int32x4_t low = vmull_s16(vget_low_s16(vreinterpretq_s16_m128i(a)),
+                              vget_low_s16(vreinterpretq_s16_m128i(b)));
+    int32x4_t high =
+        vmull_high_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b));
+
+    return vreinterpretq_m128i_s32(vpaddq_s32(low, high));
+}
+
+// Conditionally store 8-bit integer elements from a into memory using mask
+// (elements are not stored when the highest bit is not set in the corresponding
+// element) and a non-temporal memory hint. mem_addr does not need to be aligned
+// on any particular boundary.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskmoveu_si128
+FORCE_INLINE void _mm_maskmoveu_si128(__m128i a, __m128i mask, char *mem_addr)
+{
+    int8x16_t shr_mask = vshrq_n_s8(vreinterpretq_s8_m128i(mask), 7);
+    __m128 b = _mm_load_ps((const float *) mem_addr);
+    int8x16_t masked =
+        vbslq_s8(vreinterpretq_u8_s8(shr_mask), vreinterpretq_s8_m128i(a),
+                 vreinterpretq_s8_m128(b));
+    vst1q_s8((int8_t *) mem_addr, masked);
+}
+
+// Compare packed signed 16-bit integers in a and b, and store packed maximum
+// values in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi16
+FORCE_INLINE __m128i _mm_max_epi16(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s16(
+        vmaxq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)));
+}
+
+// Compare packed unsigned 8-bit integers in a and b, and store packed maximum
+// values in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu8
+FORCE_INLINE __m128i _mm_max_epu8(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_u8(
+        vmaxq_u8(vreinterpretq_u8_m128i(a), vreinterpretq_u8_m128i(b)));
+}
+
+// Compare packed double-precision (64-bit) floating-point elements in a and b,
+// and store packed maximum values in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_pd
+FORCE_INLINE __m128d _mm_max_pd(__m128d a, __m128d b)
+{
+#if SSE2NEON_PRECISE_MINMAX
+    float64x2_t _a = vreinterpretq_f64_m128d(a);
+    float64x2_t _b = vreinterpretq_f64_m128d(b);
+    return vreinterpretq_m128d_f64(vbslq_f64(vcgtq_f64(_a, _b), _a, _b));
+#else
+    return vreinterpretq_m128d_f64(
+        vmaxq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
+#endif
+}
+
+// Compare the lower double-precision (64-bit) floating-point elements in a and
+// b, store the maximum value in the lower element of dst, and copy the upper
+// element from a to the upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_sd
+FORCE_INLINE __m128d _mm_max_sd(__m128d a, __m128d b)
+{
+    return _mm_move_sd(a, _mm_max_pd(a, b));
+}
+
+// Compare packed signed 16-bit integers in a and b, and store packed minimum
+// values in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi16
+FORCE_INLINE __m128i _mm_min_epi16(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s16(
+        vminq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)));
+}
+
+// Compare packed unsigned 8-bit integers in a and b, and store packed minimum
+// values in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu8
+FORCE_INLINE __m128i _mm_min_epu8(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_u8(
+        vminq_u8(vreinterpretq_u8_m128i(a), vreinterpretq_u8_m128i(b)));
+}
+
+// Compare packed double-precision (64-bit) floating-point elements in a and b,
+// and store packed minimum values in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_pd
+FORCE_INLINE __m128d _mm_min_pd(__m128d a, __m128d b)
+{
+#if SSE2NEON_PRECISE_MINMAX
+    float64x2_t _a = vreinterpretq_f64_m128d(a);
+    float64x2_t _b = vreinterpretq_f64_m128d(b);
+    return vreinterpretq_m128d_f64(vbslq_f64(vcltq_f64(_a, _b), _a, _b));
+#else
+    return vreinterpretq_m128d_f64(
+        vminq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
+#endif
+}
+
+// Compare the lower double-precision (64-bit) floating-point elements in a and
+// b, store the minimum value in the lower element of dst, and copy the upper
+// element from a to the upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_sd
+FORCE_INLINE __m128d _mm_min_sd(__m128d a, __m128d b)
+{
+    return _mm_move_sd(a, _mm_min_pd(a, b));
+}
+
+// Copy the lower 64-bit integer in a to the lower element of dst, and zero the
+// upper element.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_move_epi64
+FORCE_INLINE __m128i _mm_move_epi64(__m128i a)
+{
+    return vreinterpretq_m128i_s64(
+        vsetq_lane_s64(0, vreinterpretq_s64_m128i(a), 1));
+}
+
+// Move the lower double-precision (64-bit) floating-point element from b to the
+// lower element of dst, and copy the upper element from a to the upper element
+// of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_move_sd
+FORCE_INLINE __m128d _mm_move_sd(__m128d a, __m128d b)
+{
+    return vreinterpretq_m128d_f32(
+        vcombine_f32(vget_low_f32(vreinterpretq_f32_m128d(b)),
+                     vget_high_f32(vreinterpretq_f32_m128d(a))));
+}
+
+// Create mask from the most significant bit of each 8-bit element in a, and
+// store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movemask_epi8
+FORCE_INLINE int _mm_movemask_epi8(__m128i a)
+{
+    // Use increasingly wide shifts+adds to collect the sign bits
+    // together.
+    // Since the widening shifts would be rather confusing to follow in little
+    // endian, everything will be illustrated in big endian order instead. This
+    // has a different result - the bits would actually be reversed on a big
+    // endian machine.
+
+    // Starting input (only half the elements are shown):
+    // 89 ff 1d c0 00 10 99 33
+    uint8x16_t input = vreinterpretq_u8_m128i(a);
+
+    // Shift out everything but the sign bits with an unsigned shift right.
+    //
+    // Bytes of the vector::
+    // 89 ff 1d c0 00 10 99 33
+    // \  \  \  \  \  \  \  \    high_bits = (uint16x4_t)(input >> 7)
+    //  |  |  |  |  |  |  |  |
+    // 01 01 00 01 00 00 01 00
+    //
+    // Bits of first important lane(s):
+    // 10001001 (89)
+    // \______
+    //        |
+    // 00000001 (01)
+    uint16x8_t high_bits = vreinterpretq_u16_u8(vshrq_n_u8(input, 7));
+
+    // Merge the even lanes together with a 16-bit unsigned shift right + add.
+    // 'xx' represents garbage data which will be ignored in the final result.
+    // In the important bytes, the add functions like a binary OR.
+    //
+    // 01 01 00 01 00 00 01 00
+    //  \_ |  \_ |  \_ |  \_ |   paired16 = (uint32x4_t)(input + (input >> 7))
+    //    \|    \|    \|    \|
+    // xx 03 xx 01 xx 00 xx 02
+    //
+    // 00000001 00000001 (01 01)
+    //        \_______ |
+    //                \|
+    // xxxxxxxx xxxxxx11 (xx 03)
+    uint32x4_t paired16 =
+        vreinterpretq_u32_u16(vsraq_n_u16(high_bits, high_bits, 7));
+
+    // Repeat with a wider 32-bit shift + add.
+    // xx 03 xx 01 xx 00 xx 02
+    //     \____ |     \____ |  paired32 = (uint64x1_t)(paired16 + (paired16 >>
+    //     14))
+    //          \|          \|
+    // xx xx xx 0d xx xx xx 02
+    //
+    // 00000011 00000001 (03 01)
+    //        \\_____ ||
+    //         '----.\||
+    // xxxxxxxx xxxx1101 (xx 0d)
+    uint64x2_t paired32 =
+        vreinterpretq_u64_u32(vsraq_n_u32(paired16, paired16, 14));
+
+    // Last, an even wider 64-bit shift + add to get our result in the low 8 bit
+    // lanes. xx xx xx 0d xx xx xx 02
+    //            \_________ |   paired64 = (uint8x8_t)(paired32 + (paired32 >>
+    //            28))
+    //                      \|
+    // xx xx xx xx xx xx xx d2
+    //
+    // 00001101 00000010 (0d 02)
+    //     \   \___ |  |
+    //      '---.  \|  |
+    // xxxxxxxx 11010010 (xx d2)
+    uint8x16_t paired64 =
+        vreinterpretq_u8_u64(vsraq_n_u64(paired32, paired32, 28));
+
+    // Extract the low 8 bits from each 64-bit lane with 2 8-bit extracts.
+    // xx xx xx xx xx xx xx d2
+    //                      ||  return paired64[0]
+    //                      d2
+    // Note: Little endian would return the correct value 4b (01001011) instead.
+    return vgetq_lane_u8(paired64, 0) | ((int) vgetq_lane_u8(paired64, 8) << 8);
+}
+
+// Set each bit of mask dst based on the most significant bit of the
+// corresponding packed double-precision (64-bit) floating-point element in a.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movemask_pd
+FORCE_INLINE int _mm_movemask_pd(__m128d a)
+{
+    uint64x2_t input = vreinterpretq_u64_m128d(a);
+    uint64x2_t high_bits = vshrq_n_u64(input, 63);
+    return (int) (vgetq_lane_u64(high_bits, 0) |
+                  (vgetq_lane_u64(high_bits, 1) << 1));
+}
+
+// Copy the lower 64-bit integer in a to dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi64_pi64
+FORCE_INLINE __m64 _mm_movepi64_pi64(__m128i a)
+{
+    return vreinterpret_m64_s64(vget_low_s64(vreinterpretq_s64_m128i(a)));
+}
+
+// Copy the 64-bit integer a to the lower element of dst, and zero the upper
+// element.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movpi64_epi64
+FORCE_INLINE __m128i _mm_movpi64_epi64(__m64 a)
+{
+    return vreinterpretq_m128i_s64(
+        vcombine_s64(vreinterpret_s64_m64(a), vdup_n_s64(0)));
+}
+
+// Multiply the low unsigned 32-bit integers from each packed 64-bit element in
+// a and b, and store the unsigned 64-bit results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_epu32
+FORCE_INLINE __m128i _mm_mul_epu32(__m128i a, __m128i b)
+{
+    // vmull_u32 upcasts instead of masking, so we downcast.
+    uint32x2_t a_lo = vmovn_u64(vreinterpretq_u64_m128i(a));
+    uint32x2_t b_lo = vmovn_u64(vreinterpretq_u64_m128i(b));
+    return vreinterpretq_m128i_u64(vmull_u32(a_lo, b_lo));
+}
+
+// Multiply packed double-precision (64-bit) floating-point elements in a and b,
+// and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_pd
+FORCE_INLINE __m128d _mm_mul_pd(__m128d a, __m128d b)
+{
+    return vreinterpretq_m128d_f64(
+        vmulq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
+}
+
+// Multiply the lower double-precision (64-bit) floating-point element in a and
+// b, store the result in the lower element of dst, and copy the upper element
+// from a to the upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mul_sd
+FORCE_INLINE __m128d _mm_mul_sd(__m128d a, __m128d b)
+{
+    return _mm_move_sd(a, _mm_mul_pd(a, b));
+}
+
+// Multiply the low unsigned 32-bit integers from a and b, and store the
+// unsigned 64-bit result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_su32
+FORCE_INLINE __m64 _mm_mul_su32(__m64 a, __m64 b)
+{
+    return vreinterpret_m64_u64(vget_low_u64(
+        vmull_u32(vreinterpret_u32_m64(a), vreinterpret_u32_m64(b))));
+}
+
+// Multiply the packed signed 16-bit integers in a and b, producing intermediate
+// 32-bit integers, and store the high 16 bits of the intermediate integers in
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhi_epi16
+FORCE_INLINE __m128i _mm_mulhi_epi16(__m128i a, __m128i b)
+{
+    /* FIXME: issue with large values because of result saturation */
+    // int16x8_t ret = vqdmulhq_s16(vreinterpretq_s16_m128i(a),
+    // vreinterpretq_s16_m128i(b)); /* =2*a*b */ return
+    // vreinterpretq_m128i_s16(vshrq_n_s16(ret, 1));
+    int16x4_t a3210 = vget_low_s16(vreinterpretq_s16_m128i(a));
+    int16x4_t b3210 = vget_low_s16(vreinterpretq_s16_m128i(b));
+    int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */
+    int16x4_t a7654 = vget_high_s16(vreinterpretq_s16_m128i(a));
+    int16x4_t b7654 = vget_high_s16(vreinterpretq_s16_m128i(b));
+    int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */
+    uint16x8x2_t r =
+        vuzpq_u16(vreinterpretq_u16_s32(ab3210), vreinterpretq_u16_s32(ab7654));
+    return vreinterpretq_m128i_u16(r.val[1]);
+}
+
+// Multiply the packed unsigned 16-bit integers in a and b, producing
+// intermediate 32-bit integers, and store the high 16 bits of the intermediate
+// integers in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhi_epu16
+FORCE_INLINE __m128i _mm_mulhi_epu16(__m128i a, __m128i b)
+{
+    uint16x4_t a3210 = vget_low_u16(vreinterpretq_u16_m128i(a));
+    uint16x4_t b3210 = vget_low_u16(vreinterpretq_u16_m128i(b));
+    uint32x4_t ab3210 = vmull_u16(a3210, b3210);
+    uint32x4_t ab7654 =
+        vmull_high_u16(vreinterpretq_u16_m128i(a), vreinterpretq_u16_m128i(b));
+    uint16x8_t r = vuzp2q_u16(vreinterpretq_u16_u32(ab3210),
+                              vreinterpretq_u16_u32(ab7654));
+    return vreinterpretq_m128i_u16(r);
+}
+
+// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit
+// integers, and store the low 16 bits of the intermediate integers in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mullo_epi16
+FORCE_INLINE __m128i _mm_mullo_epi16(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s16(
+        vmulq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)));
+}
+
+// Compute the bitwise OR of packed double-precision (64-bit) floating-point
+// elements in a and b, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_or_pd
+FORCE_INLINE __m128d _mm_or_pd(__m128d a, __m128d b)
+{
+    return vreinterpretq_m128d_s64(
+        vorrq_s64(vreinterpretq_s64_m128d(a), vreinterpretq_s64_m128d(b)));
+}
+
+// Compute the bitwise OR of 128 bits (representing integer data) in a and b,
+// and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_si128
+FORCE_INLINE __m128i _mm_or_si128(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s32(
+        vorrq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
+}
+
+// Convert packed signed 16-bit integers from a and b to packed 8-bit integers
+// using signed saturation, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi16
+FORCE_INLINE __m128i _mm_packs_epi16(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s8(
+        vcombine_s8(vqmovn_s16(vreinterpretq_s16_m128i(a)),
+                    vqmovn_s16(vreinterpretq_s16_m128i(b))));
+}
+
+// Convert packed signed 32-bit integers from a and b to packed 16-bit integers
+// using signed saturation, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi32
+FORCE_INLINE __m128i _mm_packs_epi32(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s16(
+        vcombine_s16(vqmovn_s32(vreinterpretq_s32_m128i(a)),
+                     vqmovn_s32(vreinterpretq_s32_m128i(b))));
+}
+
+// Convert packed signed 16-bit integers from a and b to packed 8-bit integers
+// using unsigned saturation, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packus_epi16
+FORCE_INLINE __m128i _mm_packus_epi16(const __m128i a, const __m128i b)
+{
+    return vreinterpretq_m128i_u8(
+        vcombine_u8(vqmovun_s16(vreinterpretq_s16_m128i(a)),
+                    vqmovun_s16(vreinterpretq_s16_m128i(b))));
+}
+
+// Pause the processor. This is typically used in spin-wait loops and depending
+// on the x86 processor typical values are in the 40-100 cycle range. The
+// 'yield' instruction isn't a good fit because it's effectively a nop on most
+// Arm cores. Experience with several databases has shown has shown an 'isb' is
+// a reasonable approximation.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_pause
+FORCE_INLINE void _mm_pause(void)
+{
+    __asm__ __volatile__("isb\n");
+}
+
+// Compute the absolute differences of packed unsigned 8-bit integers in a and
+// b, then horizontally sum each consecutive 8 differences to produce two
+// unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low
+// 16 bits of 64-bit elements in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sad_epu8
+FORCE_INLINE __m128i _mm_sad_epu8(__m128i a, __m128i b)
+{
+    uint16x8_t t = vpaddlq_u8(vabdq_u8((uint8x16_t) a, (uint8x16_t) b));
+    return vreinterpretq_m128i_u64(vpaddlq_u32(vpaddlq_u16(t)));
+}
+
+// Set packed 16-bit integers in dst with the supplied values.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi16
+FORCE_INLINE __m128i _mm_set_epi16(short i7,
+                                   short i6,
+                                   short i5,
+                                   short i4,
+                                   short i3,
+                                   short i2,
+                                   short i1,
+                                   short i0)
+{
+    int16_t ALIGN_STRUCT(16) data[8] = {i0, i1, i2, i3, i4, i5, i6, i7};
+    return vreinterpretq_m128i_s16(vld1q_s16(data));
+}
+
+// Set packed 32-bit integers in dst with the supplied values.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi32
+FORCE_INLINE __m128i _mm_set_epi32(int i3, int i2, int i1, int i0)
+{
+    int32_t ALIGN_STRUCT(16) data[4] = {i0, i1, i2, i3};
+    return vreinterpretq_m128i_s32(vld1q_s32(data));
+}
+
+// Set packed 64-bit integers in dst with the supplied values.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi64
+FORCE_INLINE __m128i _mm_set_epi64(__m64 i1, __m64 i2)
+{
+    return _mm_set_epi64x(vget_lane_s64(i1, 0), vget_lane_s64(i2, 0));
+}
+
+// Set packed 64-bit integers in dst with the supplied values.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi64x
+FORCE_INLINE __m128i _mm_set_epi64x(int64_t i1, int64_t i2)
+{
+    return vreinterpretq_m128i_s64(
+        vcombine_s64(vcreate_s64(i2), vcreate_s64(i1)));
+}
+
+// Set packed 8-bit integers in dst with the supplied values.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi8
+FORCE_INLINE __m128i _mm_set_epi8(signed char b15,
+                                  signed char b14,
+                                  signed char b13,
+                                  signed char b12,
+                                  signed char b11,
+                                  signed char b10,
+                                  signed char b9,
+                                  signed char b8,
+                                  signed char b7,
+                                  signed char b6,
+                                  signed char b5,
+                                  signed char b4,
+                                  signed char b3,
+                                  signed char b2,
+                                  signed char b1,
+                                  signed char b0)
+{
+    int8_t ALIGN_STRUCT(16)
+        data[16] = {(int8_t) b0,  (int8_t) b1,  (int8_t) b2,  (int8_t) b3,
+                    (int8_t) b4,  (int8_t) b5,  (int8_t) b6,  (int8_t) b7,
+                    (int8_t) b8,  (int8_t) b9,  (int8_t) b10, (int8_t) b11,
+                    (int8_t) b12, (int8_t) b13, (int8_t) b14, (int8_t) b15};
+    return (__m128i) vld1q_s8(data);
+}
+
+// Set packed double-precision (64-bit) floating-point elements in dst with the
+// supplied values.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_pd
+FORCE_INLINE __m128d _mm_set_pd(double e1, double e0)
+{
+    double ALIGN_STRUCT(16) data[2] = {e0, e1};
+    return vreinterpretq_m128d_f64(vld1q_f64((float64_t *) data));
+}
+
+// Broadcast double-precision (64-bit) floating-point value a to all elements of
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_pd1
+#define _mm_set_pd1 _mm_set1_pd
+
+// Copy double-precision (64-bit) floating-point element a to the lower element
+// of dst, and zero the upper element.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_sd
+FORCE_INLINE __m128d _mm_set_sd(double a)
+{
+    return vreinterpretq_m128d_f64(vsetq_lane_f64(a, vdupq_n_f64(0), 0));
+}
+
+// Broadcast 16-bit integer a to all elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi16
+FORCE_INLINE __m128i _mm_set1_epi16(short w)
+{
+    return vreinterpretq_m128i_s16(vdupq_n_s16(w));
+}
+
+// Broadcast 32-bit integer a to all elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi32
+FORCE_INLINE __m128i _mm_set1_epi32(int _i)
+{
+    return vreinterpretq_m128i_s32(vdupq_n_s32(_i));
+}
+
+// Broadcast 64-bit integer a to all elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi64
+FORCE_INLINE __m128i _mm_set1_epi64(__m64 _i)
+{
+    return vreinterpretq_m128i_s64(vdupq_lane_s64(_i, 0));
+}
+
+// Broadcast 64-bit integer a to all elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi64x
+FORCE_INLINE __m128i _mm_set1_epi64x(int64_t _i)
+{
+    return vreinterpretq_m128i_s64(vdupq_n_s64(_i));
+}
+
+// Broadcast 8-bit integer a to all elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi8
+FORCE_INLINE __m128i _mm_set1_epi8(signed char w)
+{
+    return vreinterpretq_m128i_s8(vdupq_n_s8(w));
+}
+
+// Broadcast double-precision (64-bit) floating-point value a to all elements of
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_pd
+FORCE_INLINE __m128d _mm_set1_pd(double d)
+{
+    return vreinterpretq_m128d_f64(vdupq_n_f64(d));
+}
+
+// Set packed 16-bit integers in dst with the supplied values in reverse order.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi16
+FORCE_INLINE __m128i _mm_setr_epi16(short w0,
+                                    short w1,
+                                    short w2,
+                                    short w3,
+                                    short w4,
+                                    short w5,
+                                    short w6,
+                                    short w7)
+{
+    int16_t ALIGN_STRUCT(16) data[8] = {w0, w1, w2, w3, w4, w5, w6, w7};
+    return vreinterpretq_m128i_s16(vld1q_s16((int16_t *) data));
+}
+
+// Set packed 32-bit integers in dst with the supplied values in reverse order.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi32
+FORCE_INLINE __m128i _mm_setr_epi32(int i3, int i2, int i1, int i0)
+{
+    int32_t ALIGN_STRUCT(16) data[4] = {i3, i2, i1, i0};
+    return vreinterpretq_m128i_s32(vld1q_s32(data));
+}
+
+// Set packed 64-bit integers in dst with the supplied values in reverse order.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi64
+FORCE_INLINE __m128i _mm_setr_epi64(__m64 e1, __m64 e0)
+{
+    return vreinterpretq_m128i_s64(vcombine_s64(e1, e0));
+}
+
+// Set packed 8-bit integers in dst with the supplied values in reverse order.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi8
+FORCE_INLINE __m128i _mm_setr_epi8(signed char b0,
+                                   signed char b1,
+                                   signed char b2,
+                                   signed char b3,
+                                   signed char b4,
+                                   signed char b5,
+                                   signed char b6,
+                                   signed char b7,
+                                   signed char b8,
+                                   signed char b9,
+                                   signed char b10,
+                                   signed char b11,
+                                   signed char b12,
+                                   signed char b13,
+                                   signed char b14,
+                                   signed char b15)
+{
+    int8_t ALIGN_STRUCT(16)
+        data[16] = {(int8_t) b0,  (int8_t) b1,  (int8_t) b2,  (int8_t) b3,
+                    (int8_t) b4,  (int8_t) b5,  (int8_t) b6,  (int8_t) b7,
+                    (int8_t) b8,  (int8_t) b9,  (int8_t) b10, (int8_t) b11,
+                    (int8_t) b12, (int8_t) b13, (int8_t) b14, (int8_t) b15};
+    return (__m128i) vld1q_s8(data);
+}
+
+// Set packed double-precision (64-bit) floating-point elements in dst with the
+// supplied values in reverse order.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_pd
+FORCE_INLINE __m128d _mm_setr_pd(double e1, double e0)
+{
+    return _mm_set_pd(e0, e1);
+}
+
+// Return vector of type __m128d with all elements set to zero.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setzero_pd
+FORCE_INLINE __m128d _mm_setzero_pd(void)
+{
+    return vreinterpretq_m128d_f64(vdupq_n_f64(0));
+}
+
+// Return vector of type __m128i with all elements set to zero.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setzero_si128
+FORCE_INLINE __m128i _mm_setzero_si128(void)
+{
+    return vreinterpretq_m128i_s32(vdupq_n_s32(0));
+}
+
+// Shuffle 32-bit integers in a using the control in imm8, and store the results
+// in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_epi32
+// FORCE_INLINE __m128i _mm_shuffle_epi32(__m128i a,
+//                                        __constrange(0,255) int imm)
+#if defined(_sse2neon_shuffle)
+#define _mm_shuffle_epi32(a, imm)                                            \
+    __extension__({                                                          \
+        int32x4_t _input = vreinterpretq_s32_m128i(a);                       \
+        int32x4_t _shuf =                                                    \
+            vshuffleq_s32(_input, _input, (imm) & (0x3), ((imm) >> 2) & 0x3, \
+                          ((imm) >> 4) & 0x3, ((imm) >> 6) & 0x3);           \
+        vreinterpretq_m128i_s32(_shuf);                                      \
+    })
+#else  // generic
+#define _mm_shuffle_epi32(a, imm)                           \
+    _sse2neon_define1(                                      \
+        __m128i, a, __m128i ret; switch (imm) {             \
+            case _MM_SHUFFLE(1, 0, 3, 2):                   \
+                ret = _mm_shuffle_epi_1032(_a);             \
+                break;                                      \
+            case _MM_SHUFFLE(2, 3, 0, 1):                   \
+                ret = _mm_shuffle_epi_2301(_a);             \
+                break;                                      \
+            case _MM_SHUFFLE(0, 3, 2, 1):                   \
+                ret = _mm_shuffle_epi_0321(_a);             \
+                break;                                      \
+            case _MM_SHUFFLE(2, 1, 0, 3):                   \
+                ret = _mm_shuffle_epi_2103(_a);             \
+                break;                                      \
+            case _MM_SHUFFLE(1, 0, 1, 0):                   \
+                ret = _mm_shuffle_epi_1010(_a);             \
+                break;                                      \
+            case _MM_SHUFFLE(1, 0, 0, 1):                   \
+                ret = _mm_shuffle_epi_1001(_a);             \
+                break;                                      \
+            case _MM_SHUFFLE(0, 1, 0, 1):                   \
+                ret = _mm_shuffle_epi_0101(_a);             \
+                break;                                      \
+            case _MM_SHUFFLE(2, 2, 1, 1):                   \
+                ret = _mm_shuffle_epi_2211(_a);             \
+                break;                                      \
+            case _MM_SHUFFLE(0, 1, 2, 2):                   \
+                ret = _mm_shuffle_epi_0122(_a);             \
+                break;                                      \
+            case _MM_SHUFFLE(3, 3, 3, 2):                   \
+                ret = _mm_shuffle_epi_3332(_a);             \
+                break;                                      \
+            case _MM_SHUFFLE(0, 0, 0, 0):                   \
+                ret = _mm_shuffle_epi32_splat(_a, 0);       \
+                break;                                      \
+            case _MM_SHUFFLE(1, 1, 1, 1):                   \
+                ret = _mm_shuffle_epi32_splat(_a, 1);       \
+                break;                                      \
+            case _MM_SHUFFLE(2, 2, 2, 2):                   \
+                ret = _mm_shuffle_epi32_splat(_a, 2);       \
+                break;                                      \
+            case _MM_SHUFFLE(3, 3, 3, 3):                   \
+                ret = _mm_shuffle_epi32_splat(_a, 3);       \
+                break;                                      \
+            default:                                        \
+                ret = _mm_shuffle_epi32_default(_a, (imm)); \
+                break;                                      \
+        } _sse2neon_return(ret);)
+#endif
+
+// Shuffle double-precision (64-bit) floating-point elements using the control
+// in imm8, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_pd
+#ifdef _sse2neon_shuffle
+#define _mm_shuffle_pd(a, b, imm8)                                            \
+    vreinterpretq_m128d_s64(                                                  \
+        vshuffleq_s64(vreinterpretq_s64_m128d(a), vreinterpretq_s64_m128d(b), \
+                      imm8 & 0x1, ((imm8 & 0x2) >> 1) + 2))
+#else
+#define _mm_shuffle_pd(a, b, imm8)                                     \
+    _mm_castsi128_pd(_mm_set_epi64x(                                   \
+        vgetq_lane_s64(vreinterpretq_s64_m128d(b), (imm8 & 0x2) >> 1), \
+        vgetq_lane_s64(vreinterpretq_s64_m128d(a), imm8 & 0x1)))
+#endif
+
+// FORCE_INLINE __m128i _mm_shufflehi_epi16(__m128i a,
+//                                          __constrange(0,255) int imm)
+#if defined(_sse2neon_shuffle)
+#define _mm_shufflehi_epi16(a, imm)                                           \
+    __extension__({                                                           \
+        int16x8_t _input = vreinterpretq_s16_m128i(a);                        \
+        int16x8_t _shuf =                                                     \
+            vshuffleq_s16(_input, _input, 0, 1, 2, 3, ((imm) & (0x3)) + 4,    \
+                          (((imm) >> 2) & 0x3) + 4, (((imm) >> 4) & 0x3) + 4, \
+                          (((imm) >> 6) & 0x3) + 4);                          \
+        vreinterpretq_m128i_s16(_shuf);                                       \
+    })
+#else  // generic
+#define _mm_shufflehi_epi16(a, imm) _mm_shufflehi_epi16_function((a), (imm))
+#endif
+
+// FORCE_INLINE __m128i _mm_shufflelo_epi16(__m128i a,
+//                                          __constrange(0,255) int imm)
+#if defined(_sse2neon_shuffle)
+#define _mm_shufflelo_epi16(a, imm)                                  \
+    __extension__({                                                  \
+        int16x8_t _input = vreinterpretq_s16_m128i(a);               \
+        int16x8_t _shuf = vshuffleq_s16(                             \
+            _input, _input, ((imm) & (0x3)), (((imm) >> 2) & 0x3),   \
+            (((imm) >> 4) & 0x3), (((imm) >> 6) & 0x3), 4, 5, 6, 7); \
+        vreinterpretq_m128i_s16(_shuf);                              \
+    })
+#else  // generic
+#define _mm_shufflelo_epi16(a, imm) _mm_shufflelo_epi16_function((a), (imm))
+#endif
+
+// Shift packed 16-bit integers in a left by count while shifting in zeros, and
+// store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi16
+FORCE_INLINE __m128i _mm_sll_epi16(__m128i a, __m128i count)
+{
+    uint64_t c = vreinterpretq_nth_u64_m128i(count, 0);
+    if (_sse2neon_unlikely(c & ~15))
+        return _mm_setzero_si128();
+
+    int16x8_t vc = vdupq_n_s16((int16_t) c);
+    return vreinterpretq_m128i_s16(vshlq_s16(vreinterpretq_s16_m128i(a), vc));
+}
+
+// Shift packed 32-bit integers in a left by count while shifting in zeros, and
+// store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi32
+FORCE_INLINE __m128i _mm_sll_epi32(__m128i a, __m128i count)
+{
+    uint64_t c = vreinterpretq_nth_u64_m128i(count, 0);
+    if (_sse2neon_unlikely(c & ~31))
+        return _mm_setzero_si128();
+
+    int32x4_t vc = vdupq_n_s32((int32_t) c);
+    return vreinterpretq_m128i_s32(vshlq_s32(vreinterpretq_s32_m128i(a), vc));
+}
+
+// Shift packed 64-bit integers in a left by count while shifting in zeros, and
+// store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi64
+FORCE_INLINE __m128i _mm_sll_epi64(__m128i a, __m128i count)
+{
+    uint64_t c = vreinterpretq_nth_u64_m128i(count, 0);
+    if (_sse2neon_unlikely(c & ~63))
+        return _mm_setzero_si128();
+
+    int64x2_t vc = vdupq_n_s64((int64_t) c);
+    return vreinterpretq_m128i_s64(vshlq_s64(vreinterpretq_s64_m128i(a), vc));
+}
+
+// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and
+// store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi16
+FORCE_INLINE __m128i _mm_slli_epi16(__m128i a, int imm)
+{
+    if (_sse2neon_unlikely(imm & ~15))
+        return _mm_setzero_si128();
+    return vreinterpretq_m128i_s16(
+        vshlq_s16(vreinterpretq_s16_m128i(a), vdupq_n_s16(imm)));
+}
+
+// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and
+// store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi32
+FORCE_INLINE __m128i _mm_slli_epi32(__m128i a, int imm)
+{
+    if (_sse2neon_unlikely(imm & ~31))
+        return _mm_setzero_si128();
+    return vreinterpretq_m128i_s32(
+        vshlq_s32(vreinterpretq_s32_m128i(a), vdupq_n_s32(imm)));
+}
+
+// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and
+// store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi64
+FORCE_INLINE __m128i _mm_slli_epi64(__m128i a, int imm)
+{
+    if (_sse2neon_unlikely(imm & ~63))
+        return _mm_setzero_si128();
+    return vreinterpretq_m128i_s64(
+        vshlq_s64(vreinterpretq_s64_m128i(a), vdupq_n_s64(imm)));
+}
+
+// Shift a left by imm8 bytes while shifting in zeros, and store the results in
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_si128
+#define _mm_slli_si128(a, imm)                                              \
+    _sse2neon_define1(                                                      \
+        __m128i, a, int8x16_t ret;                                          \
+        if (_sse2neon_unlikely(imm == 0)) ret = vreinterpretq_s8_m128i(_a); \
+        else if (_sse2neon_unlikely((imm) & ~15)) ret = vdupq_n_s8(0);      \
+        else ret = vextq_s8(vdupq_n_s8(0), vreinterpretq_s8_m128i(_a),      \
+                            ((imm <= 0 || imm > 15) ? 0 : (16 - imm)));     \
+        _sse2neon_return(vreinterpretq_m128i_s8(ret));)
+
+// Compute the square root of packed double-precision (64-bit) floating-point
+// elements in a, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_pd
+FORCE_INLINE __m128d _mm_sqrt_pd(__m128d a)
+{
+    return vreinterpretq_m128d_f64(vsqrtq_f64(vreinterpretq_f64_m128d(a)));
+}
+
+// Compute the square root of the lower double-precision (64-bit) floating-point
+// element in b, store the result in the lower element of dst, and copy the
+// upper element from a to the upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_sd
+FORCE_INLINE __m128d _mm_sqrt_sd(__m128d a, __m128d b)
+{
+    return _mm_move_sd(a, _mm_sqrt_pd(b));
+}
+
+// Shift packed 16-bit integers in a right by count while shifting in sign bits,
+// and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi16
+FORCE_INLINE __m128i _mm_sra_epi16(__m128i a, __m128i count)
+{
+    int64_t c = vgetq_lane_s64(count, 0);
+    if (_sse2neon_unlikely(c & ~15))
+        return _mm_cmplt_epi16(a, _mm_setzero_si128());
+    return vreinterpretq_m128i_s16(
+        vshlq_s16((int16x8_t) a, vdupq_n_s16((int) -c)));
+}
+
+// Shift packed 32-bit integers in a right by count while shifting in sign bits,
+// and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi32
+FORCE_INLINE __m128i _mm_sra_epi32(__m128i a, __m128i count)
+{
+    int64_t c = vgetq_lane_s64(count, 0);
+    if (_sse2neon_unlikely(c & ~31))
+        return _mm_cmplt_epi32(a, _mm_setzero_si128());
+    return vreinterpretq_m128i_s32(
+        vshlq_s32((int32x4_t) a, vdupq_n_s32((int) -c)));
+}
+
+// Shift packed 16-bit integers in a right by imm8 while shifting in sign
+// bits, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi16
+FORCE_INLINE __m128i _mm_srai_epi16(__m128i a, int imm)
+{
+    const int count = (imm & ~15) ? 15 : imm;
+    return (__m128i) vshlq_s16((int16x8_t) a, vdupq_n_s16(-count));
+}
+
+// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits,
+// and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi32
+// FORCE_INLINE __m128i _mm_srai_epi32(__m128i a, __constrange(0,255) int imm)
+#define _mm_srai_epi32(a, imm)                                                \
+    _sse2neon_define0(                                                        \
+        __m128i, a, __m128i ret; if (_sse2neon_unlikely((imm) == 0)) {        \
+            ret = _a;                                                         \
+        } else if (_sse2neon_likely(0 < (imm) && (imm) < 32)) {               \
+            ret = vreinterpretq_m128i_s32(                                    \
+                vshlq_s32(vreinterpretq_s32_m128i(_a), vdupq_n_s32(-(imm)))); \
+        } else {                                                              \
+            ret = vreinterpretq_m128i_s32(                                    \
+                vshrq_n_s32(vreinterpretq_s32_m128i(_a), 31));                \
+        } _sse2neon_return(ret);)
+
+// Shift packed 16-bit integers in a right by count while shifting in zeros, and
+// store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi16
+FORCE_INLINE __m128i _mm_srl_epi16(__m128i a, __m128i count)
+{
+    uint64_t c = vreinterpretq_nth_u64_m128i(count, 0);
+    if (_sse2neon_unlikely(c & ~15))
+        return _mm_setzero_si128();
+
+    int16x8_t vc = vdupq_n_s16(-(int16_t) c);
+    return vreinterpretq_m128i_u16(vshlq_u16(vreinterpretq_u16_m128i(a), vc));
+}
+
+// Shift packed 32-bit integers in a right by count while shifting in zeros, and
+// store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi32
+FORCE_INLINE __m128i _mm_srl_epi32(__m128i a, __m128i count)
+{
+    uint64_t c = vreinterpretq_nth_u64_m128i(count, 0);
+    if (_sse2neon_unlikely(c & ~31))
+        return _mm_setzero_si128();
+
+    int32x4_t vc = vdupq_n_s32(-(int32_t) c);
+    return vreinterpretq_m128i_u32(vshlq_u32(vreinterpretq_u32_m128i(a), vc));
+}
+
+// Shift packed 64-bit integers in a right by count while shifting in zeros, and
+// store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi64
+FORCE_INLINE __m128i _mm_srl_epi64(__m128i a, __m128i count)
+{
+    uint64_t c = vreinterpretq_nth_u64_m128i(count, 0);
+    if (_sse2neon_unlikely(c & ~63))
+        return _mm_setzero_si128();
+
+    int64x2_t vc = vdupq_n_s64(-(int64_t) c);
+    return vreinterpretq_m128i_u64(vshlq_u64(vreinterpretq_u64_m128i(a), vc));
+}
+
+// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and
+// store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi16
+#define _mm_srli_epi16(a, imm)                                                \
+    _sse2neon_define0(                                                        \
+        __m128i, a, __m128i ret; if (_sse2neon_unlikely((imm) & ~15)) {       \
+            ret = _mm_setzero_si128();                                        \
+        } else {                                                              \
+            ret = vreinterpretq_m128i_u16(                                    \
+                vshlq_u16(vreinterpretq_u16_m128i(_a), vdupq_n_s16(-(imm)))); \
+        } _sse2neon_return(ret);)
+
+// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and
+// store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi32
+// FORCE_INLINE __m128i _mm_srli_epi32(__m128i a, __constrange(0,255) int imm)
+#define _mm_srli_epi32(a, imm)                                                \
+    _sse2neon_define0(                                                        \
+        __m128i, a, __m128i ret; if (_sse2neon_unlikely((imm) & ~31)) {       \
+            ret = _mm_setzero_si128();                                        \
+        } else {                                                              \
+            ret = vreinterpretq_m128i_u32(                                    \
+                vshlq_u32(vreinterpretq_u32_m128i(_a), vdupq_n_s32(-(imm)))); \
+        } _sse2neon_return(ret);)
+
+// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and
+// store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi64
+#define _mm_srli_epi64(a, imm)                                                \
+    _sse2neon_define0(                                                        \
+        __m128i, a, __m128i ret; if (_sse2neon_unlikely((imm) & ~63)) {       \
+            ret = _mm_setzero_si128();                                        \
+        } else {                                                              \
+            ret = vreinterpretq_m128i_u64(                                    \
+                vshlq_u64(vreinterpretq_u64_m128i(_a), vdupq_n_s64(-(imm)))); \
+        } _sse2neon_return(ret);)
+
+// Shift a right by imm8 bytes while shifting in zeros, and store the results in
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_si128
+#define _mm_srli_si128(a, imm)                                         \
+    _sse2neon_define1(                                                 \
+        __m128i, a, int8x16_t ret;                                     \
+        if (_sse2neon_unlikely((imm) & ~15)) ret = vdupq_n_s8(0);      \
+        else ret = vextq_s8(vreinterpretq_s8_m128i(_a), vdupq_n_s8(0), \
+                            (imm > 15 ? 0 : imm));                     \
+        _sse2neon_return(vreinterpretq_m128i_s8(ret));)
+
+// Store 128-bits (composed of 2 packed double-precision (64-bit) floating-point
+// elements) from a into memory. mem_addr must be aligned on a 16-byte boundary
+// or a general-protection exception may be generated.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_pd
+FORCE_INLINE void _mm_store_pd(double *mem_addr, __m128d a)
+{
+    vst1q_f64((float64_t *) mem_addr, vreinterpretq_f64_m128d(a));
+}
+
+// Store the lower double-precision (64-bit) floating-point element from a into
+// 2 contiguous elements in memory. mem_addr must be aligned on a 16-byte
+// boundary or a general-protection exception may be generated.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_pd1
+FORCE_INLINE void _mm_store_pd1(double *mem_addr, __m128d a)
+{
+    float64x1_t a_low = vget_low_f64(vreinterpretq_f64_m128d(a));
+    vst1q_f64((float64_t *) mem_addr,
+              vreinterpretq_f64_m128d(vcombine_f64(a_low, a_low)));
+}
+
+// Store the lower double-precision (64-bit) floating-point element from a into
+// memory. mem_addr does not need to be aligned on any particular boundary.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_store_sd
+FORCE_INLINE void _mm_store_sd(double *mem_addr, __m128d a)
+{
+    vst1_f64((float64_t *) mem_addr, vget_low_f64(vreinterpretq_f64_m128d(a)));
+}
+
+// Store 128-bits of integer data from a into memory. mem_addr must be aligned
+// on a 16-byte boundary or a general-protection exception may be generated.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_si128
+FORCE_INLINE void _mm_store_si128(__m128i *p, __m128i a)
+{
+    vst1q_s32((int32_t *) p, vreinterpretq_s32_m128i(a));
+}
+
+// Store the lower double-precision (64-bit) floating-point element from a into
+// 2 contiguous elements in memory. mem_addr must be aligned on a 16-byte
+// boundary or a general-protection exception may be generated.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#expand=9,526,5601&text=_mm_store1_pd
+#define _mm_store1_pd _mm_store_pd1
+
+// Store the upper double-precision (64-bit) floating-point element from a into
+// memory.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeh_pd
+FORCE_INLINE void _mm_storeh_pd(double *mem_addr, __m128d a)
+{
+    vst1_f64((float64_t *) mem_addr, vget_high_f64(vreinterpretq_f64_m128d(a)));
+}
+
+// Store 64-bit integer from the first element of a into memory.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storel_epi64
+FORCE_INLINE void _mm_storel_epi64(__m128i *a, __m128i b)
+{
+    vst1_u64((uint64_t *) a, vget_low_u64(vreinterpretq_u64_m128i(b)));
+}
+
+// Store the lower double-precision (64-bit) floating-point element from a into
+// memory.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storel_pd
+FORCE_INLINE void _mm_storel_pd(double *mem_addr, __m128d a)
+{
+    vst1_f64((float64_t *) mem_addr, vget_low_f64(vreinterpretq_f64_m128d(a)));
+}
+
+// Store 2 double-precision (64-bit) floating-point elements from a into memory
+// in reverse order. mem_addr must be aligned on a 16-byte boundary or a
+// general-protection exception may be generated.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storer_pd
+FORCE_INLINE void _mm_storer_pd(double *mem_addr, __m128d a)
+{
+    float32x4_t f = vreinterpretq_f32_m128d(a);
+    _mm_store_pd(mem_addr, vreinterpretq_m128d_f32(vextq_f32(f, f, 2)));
+}
+
+// Store 128-bits (composed of 2 packed double-precision (64-bit) floating-point
+// elements) from a into memory. mem_addr does not need to be aligned on any
+// particular boundary.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_pd
+FORCE_INLINE void _mm_storeu_pd(double *mem_addr, __m128d a)
+{
+    _mm_store_pd(mem_addr, a);
+}
+
+// Store 128-bits of integer data from a into memory. mem_addr does not need to
+// be aligned on any particular boundary.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si128
+FORCE_INLINE void _mm_storeu_si128(__m128i *p, __m128i a)
+{
+    vst1q_s32((int32_t *) p, vreinterpretq_s32_m128i(a));
+}
+
+// Store 32-bit integer from the first element of a into memory. mem_addr does
+// not need to be aligned on any particular boundary.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si32
+FORCE_INLINE void _mm_storeu_si32(void *p, __m128i a)
+{
+    vst1q_lane_s32((int32_t *) p, vreinterpretq_s32_m128i(a), 0);
+}
+
+// Store 128-bits (composed of 2 packed double-precision (64-bit) floating-point
+// elements) from a into memory using a non-temporal memory hint. mem_addr must
+// be aligned on a 16-byte boundary or a general-protection exception may be
+// generated.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_pd
+FORCE_INLINE void _mm_stream_pd(double *p, __m128d a)
+{
+#if __has_builtin(__builtin_nontemporal_store)
+    __builtin_nontemporal_store(a, (__m128d *) p);
+#else
+    vst1q_f64(p, vreinterpretq_f64_m128d(a));
+#endif
+}
+
+// Store 128-bits of integer data from a into memory using a non-temporal memory
+// hint. mem_addr must be aligned on a 16-byte boundary or a general-protection
+// exception may be generated.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si128
+FORCE_INLINE void _mm_stream_si128(__m128i *p, __m128i a)
+{
+#if __has_builtin(__builtin_nontemporal_store)
+    __builtin_nontemporal_store(a, p);
+#else
+    vst1q_s64((int64_t *) p, vreinterpretq_s64_m128i(a));
+#endif
+}
+
+// Store 32-bit integer a into memory using a non-temporal hint to minimize
+// cache pollution. If the cache line containing address mem_addr is already in
+// the cache, the cache will be updated.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si32
+FORCE_INLINE void _mm_stream_si32(int *p, int a)
+{
+    vst1q_lane_s32((int32_t *) p, vdupq_n_s32(a), 0);
+}
+
+// Store 64-bit integer a into memory using a non-temporal hint to minimize
+// cache pollution. If the cache line containing address mem_addr is already in
+// the cache, the cache will be updated.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si64
+FORCE_INLINE void _mm_stream_si64(__int64 *p, __int64 a)
+{
+    vst1_s64((int64_t *) p, vdup_n_s64((int64_t) a));
+}
+
+// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and
+// store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi16
+FORCE_INLINE __m128i _mm_sub_epi16(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s16(
+        vsubq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)));
+}
+
+// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and
+// store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi32
+FORCE_INLINE __m128i _mm_sub_epi32(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s32(
+        vsubq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
+}
+
+// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and
+// store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi64
+FORCE_INLINE __m128i _mm_sub_epi64(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s64(
+        vsubq_s64(vreinterpretq_s64_m128i(a), vreinterpretq_s64_m128i(b)));
+}
+
+// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and
+// store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi8
+FORCE_INLINE __m128i _mm_sub_epi8(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s8(
+        vsubq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b)));
+}
+
+// Subtract packed double-precision (64-bit) floating-point elements in b from
+// packed double-precision (64-bit) floating-point elements in a, and store the
+// results in dst.
+//  https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sub_pd
+FORCE_INLINE __m128d _mm_sub_pd(__m128d a, __m128d b)
+{
+    return vreinterpretq_m128d_f64(
+        vsubq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
+}
+
+// Subtract the lower double-precision (64-bit) floating-point element in b from
+// the lower double-precision (64-bit) floating-point element in a, store the
+// result in the lower element of dst, and copy the upper element from a to the
+// upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_sd
+FORCE_INLINE __m128d _mm_sub_sd(__m128d a, __m128d b)
+{
+    return _mm_move_sd(a, _mm_sub_pd(a, b));
+}
+
+// Subtract 64-bit integer b from 64-bit integer a, and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_si64
+FORCE_INLINE __m64 _mm_sub_si64(__m64 a, __m64 b)
+{
+    return vreinterpret_m64_s64(
+        vsub_s64(vreinterpret_s64_m64(a), vreinterpret_s64_m64(b)));
+}
+
+// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a
+// using saturation, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epi16
+FORCE_INLINE __m128i _mm_subs_epi16(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s16(
+        vqsubq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)));
+}
+
+// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a
+// using saturation, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epi8
+FORCE_INLINE __m128i _mm_subs_epi8(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s8(
+        vqsubq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b)));
+}
+
+// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit
+// integers in a using saturation, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epu16
+FORCE_INLINE __m128i _mm_subs_epu16(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_u16(
+        vqsubq_u16(vreinterpretq_u16_m128i(a), vreinterpretq_u16_m128i(b)));
+}
+
+// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit
+// integers in a using saturation, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epu8
+FORCE_INLINE __m128i _mm_subs_epu8(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_u8(
+        vqsubq_u8(vreinterpretq_u8_m128i(a), vreinterpretq_u8_m128i(b)));
+}
+
+#define _mm_ucomieq_sd _mm_comieq_sd
+#define _mm_ucomige_sd _mm_comige_sd
+#define _mm_ucomigt_sd _mm_comigt_sd
+#define _mm_ucomile_sd _mm_comile_sd
+#define _mm_ucomilt_sd _mm_comilt_sd
+#define _mm_ucomineq_sd _mm_comineq_sd
+
+/* don't need this call
+// Return vector of type __m128d with undefined elements.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_undefined_pd
+FORCE_INLINE __m128d _mm_undefined_pd(void)
+{
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wuninitialized"
+#endif
+    __m128d a;
+#if defined(_MSC_VER) && !defined(__clang__)
+    a = _mm_setzero_pd();
+#endif
+    return a;
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
+}
+*/
+
+// Unpack and interleave 16-bit integers from the high half of a and b, and
+// store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi16
+FORCE_INLINE __m128i _mm_unpackhi_epi16(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s16(
+        vzip2q_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)));
+}
+
+// Unpack and interleave 32-bit integers from the high half of a and b, and
+// store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi32
+FORCE_INLINE __m128i _mm_unpackhi_epi32(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s32(
+        vzip2q_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
+}
+
+// Unpack and interleave 64-bit integers from the high half of a and b, and
+// store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi64
+FORCE_INLINE __m128i _mm_unpackhi_epi64(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s64(
+        vzip2q_s64(vreinterpretq_s64_m128i(a), vreinterpretq_s64_m128i(b)));
+}
+
+// Unpack and interleave 8-bit integers from the high half of a and b, and store
+// the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi8
+FORCE_INLINE __m128i _mm_unpackhi_epi8(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s8(
+        vzip2q_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b)));
+}
+
+// Unpack and interleave double-precision (64-bit) floating-point elements from
+// the high half of a and b, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_pd
+FORCE_INLINE __m128d _mm_unpackhi_pd(__m128d a, __m128d b)
+{
+    return vreinterpretq_m128d_f64(
+        vzip2q_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
+}
+
+// Unpack and interleave 16-bit integers from the low half of a and b, and store
+// the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi16
+FORCE_INLINE __m128i _mm_unpacklo_epi16(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s16(
+        vzip1q_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)));
+}
+
+// Unpack and interleave 32-bit integers from the low half of a and b, and store
+// the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi32
+FORCE_INLINE __m128i _mm_unpacklo_epi32(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s32(
+        vzip1q_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
+}
+
+// Unpack and interleave 64-bit integers from the low half of a and b, and store
+// the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi64
+FORCE_INLINE __m128i _mm_unpacklo_epi64(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s64(
+        vzip1q_s64(vreinterpretq_s64_m128i(a), vreinterpretq_s64_m128i(b)));
+}
+
+// Unpack and interleave 8-bit integers from the low half of a and b, and store
+// the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi8
+FORCE_INLINE __m128i _mm_unpacklo_epi8(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s8(
+        vzip1q_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b)));
+}
+
+// Unpack and interleave double-precision (64-bit) floating-point elements from
+// the low half of a and b, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_pd
+FORCE_INLINE __m128d _mm_unpacklo_pd(__m128d a, __m128d b)
+{
+    return vreinterpretq_m128d_f64(
+        vzip1q_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
+}
+
+// Compute the bitwise XOR of packed double-precision (64-bit) floating-point
+// elements in a and b, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_pd
+FORCE_INLINE __m128d _mm_xor_pd(__m128d a, __m128d b)
+{
+    return vreinterpretq_m128d_s64(
+        veorq_s64(vreinterpretq_s64_m128d(a), vreinterpretq_s64_m128d(b)));
+}
+
+// Compute the bitwise XOR of 128 bits (representing integer data) in a and b,
+// and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_si128
+FORCE_INLINE __m128i _mm_xor_si128(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s32(
+        veorq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
+}
+
+/* SSE3 */
+
+// Alternatively add and subtract packed double-precision (64-bit)
+// floating-point elements in a to/from packed elements in b, and store the
+// results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_addsub_pd
+FORCE_INLINE __m128d _mm_addsub_pd(__m128d a, __m128d b)
+{
+    _sse2neon_const __m128d mask = _mm_set_pd(1.0f, -1.0f);
+    return vreinterpretq_m128d_f64(vfmaq_f64(vreinterpretq_f64_m128d(a),
+                                             vreinterpretq_f64_m128d(b),
+                                             vreinterpretq_f64_m128d(mask)));
+}
+
+// Alternatively add and subtract packed single-precision (32-bit)
+// floating-point elements in a to/from packed elements in b, and store the
+// results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=addsub_ps
+FORCE_INLINE __m128 _mm_addsub_ps(__m128 a, __m128 b)
+{
+    _sse2neon_const __m128 mask = _mm_setr_ps(-1.0f, 1.0f, -1.0f, 1.0f);
+#if (defined(__aarch64__) || defined(_M_ARM64)) || \
+    defined(__ARM_FEATURE_FMA) /* VFPv4+ */
+    return vreinterpretq_m128_f32(vfmaq_f32(vreinterpretq_f32_m128(a),
+                                            vreinterpretq_f32_m128(mask),
+                                            vreinterpretq_f32_m128(b)));
+#else
+    return _mm_add_ps(_mm_mul_ps(b, mask), a);
+#endif
+}
+
+// Horizontally add adjacent pairs of double-precision (64-bit) floating-point
+// elements in a and b, and pack the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_pd
+FORCE_INLINE __m128d _mm_hadd_pd(__m128d a, __m128d b)
+{
+    return vreinterpretq_m128d_f64(
+        vpaddq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
+}
+
+// Horizontally add adjacent pairs of single-precision (32-bit) floating-point
+// elements in a and b, and pack the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_ps
+FORCE_INLINE __m128 _mm_hadd_ps(__m128 a, __m128 b)
+{
+    return vreinterpretq_m128_f32(
+        vpaddq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
+}
+
+// Horizontally subtract adjacent pairs of double-precision (64-bit)
+// floating-point elements in a and b, and pack the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_pd
+FORCE_INLINE __m128d _mm_hsub_pd(__m128d a, __m128d b)
+{
+    float64x2_t _a = vreinterpretq_f64_m128d(a);
+    float64x2_t _b = vreinterpretq_f64_m128d(b);
+    return vreinterpretq_m128d_f64(
+        vsubq_f64(vuzp1q_f64(_a, _b), vuzp2q_f64(_a, _b)));
+}
+
+// Horizontally subtract adjacent pairs of single-precision (32-bit)
+// floating-point elements in a and b, and pack the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_ps
+FORCE_INLINE __m128 _mm_hsub_ps(__m128 _a, __m128 _b)
+{
+    float32x4_t a = vreinterpretq_f32_m128(_a);
+    float32x4_t b = vreinterpretq_f32_m128(_b);
+    return vreinterpretq_m128_f32(
+        vsubq_f32(vuzp1q_f32(a, b), vuzp2q_f32(a, b)));
+}
+
+// Load 128-bits of integer data from unaligned memory into dst. This intrinsic
+// may perform better than _mm_loadu_si128 when the data crosses a cache line
+// boundary.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_lddqu_si128
+#define _mm_lddqu_si128 _mm_loadu_si128
+
+// Load a double-precision (64-bit) floating-point element from memory into both
+// elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loaddup_pd
+#define _mm_loaddup_pd _mm_load1_pd
+
+// Duplicate the low double-precision (64-bit) floating-point element from a,
+// and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movedup_pd
+FORCE_INLINE __m128d _mm_movedup_pd(__m128d a)
+{
+    return vreinterpretq_m128d_f64(
+        vdupq_laneq_f64(vreinterpretq_f64_m128d(a), 0));
+}
+
+// Duplicate odd-indexed single-precision (32-bit) floating-point elements
+// from a, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movehdup_ps
+FORCE_INLINE __m128 _mm_movehdup_ps(__m128 a)
+{
+    return vreinterpretq_m128_f32(
+        vtrn2q_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a)));
+}
+
+// Duplicate even-indexed single-precision (32-bit) floating-point elements
+// from a, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_moveldup_ps
+FORCE_INLINE __m128 _mm_moveldup_ps(__m128 a)
+{
+    return vreinterpretq_m128_f32(
+        vtrn1q_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a)));
+}
+
+/* SSSE3 */
+
+// Compute the absolute value of packed signed 16-bit integers in a, and store
+// the unsigned results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi16
+FORCE_INLINE __m128i _mm_abs_epi16(__m128i a)
+{
+    return vreinterpretq_m128i_s16(vabsq_s16(vreinterpretq_s16_m128i(a)));
+}
+
+// Compute the absolute value of packed signed 32-bit integers in a, and store
+// the unsigned results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi32
+FORCE_INLINE __m128i _mm_abs_epi32(__m128i a)
+{
+    return vreinterpretq_m128i_s32(vabsq_s32(vreinterpretq_s32_m128i(a)));
+}
+
+// Compute the absolute value of packed signed 8-bit integers in a, and store
+// the unsigned results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi8
+FORCE_INLINE __m128i _mm_abs_epi8(__m128i a)
+{
+    return vreinterpretq_m128i_s8(vabsq_s8(vreinterpretq_s8_m128i(a)));
+}
+
+// Compute the absolute value of packed signed 16-bit integers in a, and store
+// the unsigned results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_pi16
+FORCE_INLINE __m64 _mm_abs_pi16(__m64 a)
+{
+    return vreinterpret_m64_s16(vabs_s16(vreinterpret_s16_m64(a)));
+}
+
+// Compute the absolute value of packed signed 32-bit integers in a, and store
+// the unsigned results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_pi32
+FORCE_INLINE __m64 _mm_abs_pi32(__m64 a)
+{
+    return vreinterpret_m64_s32(vabs_s32(vreinterpret_s32_m64(a)));
+}
+
+// Compute the absolute value of packed signed 8-bit integers in a, and store
+// the unsigned results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_pi8
+FORCE_INLINE __m64 _mm_abs_pi8(__m64 a)
+{
+    return vreinterpret_m64_s8(vabs_s8(vreinterpret_s8_m64(a)));
+}
+
+// Concatenate 16-byte blocks in a and b into a 32-byte temporary result, shift
+// the result right by imm8 bytes, and store the low 16 bytes in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi8
+#define _mm_alignr_epi8(a, b, imm)                                          \
+    _sse2neon_define2(                                                      \
+        __m128i, a, b, uint8x16_t __a = vreinterpretq_u8_m128i(_a);         \
+        uint8x16_t __b = vreinterpretq_u8_m128i(_b); __m128i ret;           \
+        if (_sse2neon_unlikely((imm) & ~31)) ret =                          \
+            vreinterpretq_m128i_u8(vdupq_n_u8(0));                          \
+        else if (imm >= 16) ret =                                           \
+            _mm_srli_si128(_a, imm >= 16 ? imm - 16 : 0);                   \
+        else ret =                                                          \
+            vreinterpretq_m128i_u8(vextq_u8(__b, __a, imm < 16 ? imm : 0)); \
+        _sse2neon_return(ret);)
+
+
+// Concatenate 8-byte blocks in a and b into a 16-byte temporary result, shift
+// the result right by imm8 bytes, and store the low 8 bytes in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_pi8
+#define _mm_alignr_pi8(a, b, imm)                                           \
+    _sse2neon_define2(                                                      \
+        __m64, a, b, __m64 ret; if (_sse2neon_unlikely((imm) >= 16)) {      \
+            ret = vreinterpret_m64_s8(vdup_n_s8(0));                        \
+        } else {                                                            \
+            uint8x8_t tmp_low;                                              \
+            uint8x8_t tmp_high;                                             \
+            if ((imm) >= 8) {                                               \
+                const int idx = (imm) -8;                                   \
+                tmp_low = vreinterpret_u8_m64(_a);                          \
+                tmp_high = vdup_n_u8(0);                                    \
+                ret = vreinterpret_m64_u8(vext_u8(tmp_low, tmp_high, idx)); \
+            } else {                                                        \
+                const int idx = (imm);                                      \
+                tmp_low = vreinterpret_u8_m64(_b);                          \
+                tmp_high = vreinterpret_u8_m64(_a);                         \
+                ret = vreinterpret_m64_u8(vext_u8(tmp_low, tmp_high, idx)); \
+            }                                                               \
+        } _sse2neon_return(ret);)
+
+// Horizontally add adjacent pairs of 16-bit integers in a and b, and pack the
+// signed 16-bit results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_epi16
+FORCE_INLINE __m128i _mm_hadd_epi16(__m128i _a, __m128i _b)
+{
+    int16x8_t a = vreinterpretq_s16_m128i(_a);
+    int16x8_t b = vreinterpretq_s16_m128i(_b);
+    return vreinterpretq_m128i_s16(vpaddq_s16(a, b));
+}
+
+// Horizontally add adjacent pairs of 32-bit integers in a and b, and pack the
+// signed 32-bit results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_epi32
+FORCE_INLINE __m128i _mm_hadd_epi32(__m128i _a, __m128i _b)
+{
+    int32x4_t a = vreinterpretq_s32_m128i(_a);
+    int32x4_t b = vreinterpretq_s32_m128i(_b);
+    return vreinterpretq_m128i_s32(vpaddq_s32(a, b));
+}
+
+// Horizontally add adjacent pairs of 16-bit integers in a and b, and pack the
+// signed 16-bit results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_pi16
+FORCE_INLINE __m64 _mm_hadd_pi16(__m64 a, __m64 b)
+{
+    return vreinterpret_m64_s16(
+        vpadd_s16(vreinterpret_s16_m64(a), vreinterpret_s16_m64(b)));
+}
+
+// Horizontally add adjacent pairs of 32-bit integers in a and b, and pack the
+// signed 32-bit results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_pi32
+FORCE_INLINE __m64 _mm_hadd_pi32(__m64 a, __m64 b)
+{
+    return vreinterpret_m64_s32(
+        vpadd_s32(vreinterpret_s32_m64(a), vreinterpret_s32_m64(b)));
+}
+
+// Horizontally add adjacent pairs of signed 16-bit integers in a and b using
+// saturation, and pack the signed 16-bit results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadds_epi16
+FORCE_INLINE __m128i _mm_hadds_epi16(__m128i _a, __m128i _b)
+{
+    int16x8_t a = vreinterpretq_s16_m128i(_a);
+    int16x8_t b = vreinterpretq_s16_m128i(_b);
+    return vreinterpretq_s64_s16(
+        vqaddq_s16(vuzp1q_s16(a, b), vuzp2q_s16(a, b)));
+}
+
+// Horizontally add adjacent pairs of signed 16-bit integers in a and b using
+// saturation, and pack the signed 16-bit results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadds_pi16
+FORCE_INLINE __m64 _mm_hadds_pi16(__m64 _a, __m64 _b)
+{
+    int16x4_t a = vreinterpret_s16_m64(_a);
+    int16x4_t b = vreinterpret_s16_m64(_b);
+    return vreinterpret_s64_s16(vqadd_s16(vuzp1_s16(a, b), vuzp2_s16(a, b)));
+}
+
+// Horizontally subtract adjacent pairs of 16-bit integers in a and b, and pack
+// the signed 16-bit results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_epi16
+FORCE_INLINE __m128i _mm_hsub_epi16(__m128i _a, __m128i _b)
+{
+    int16x8_t a = vreinterpretq_s16_m128i(_a);
+    int16x8_t b = vreinterpretq_s16_m128i(_b);
+    return vreinterpretq_m128i_s16(
+        vsubq_s16(vuzp1q_s16(a, b), vuzp2q_s16(a, b)));
+}
+
+// Horizontally subtract adjacent pairs of 32-bit integers in a and b, and pack
+// the signed 32-bit results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_epi32
+FORCE_INLINE __m128i _mm_hsub_epi32(__m128i _a, __m128i _b)
+{
+    int32x4_t a = vreinterpretq_s32_m128i(_a);
+    int32x4_t b = vreinterpretq_s32_m128i(_b);
+    return vreinterpretq_m128i_s32(
+        vsubq_s32(vuzp1q_s32(a, b), vuzp2q_s32(a, b)));
+}
+
+// Horizontally subtract adjacent pairs of 16-bit integers in a and b, and pack
+// the signed 16-bit results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_pi16
+FORCE_INLINE __m64 _mm_hsub_pi16(__m64 _a, __m64 _b)
+{
+    int16x4_t a = vreinterpret_s16_m64(_a);
+    int16x4_t b = vreinterpret_s16_m64(_b);
+    return vreinterpret_m64_s16(vsub_s16(vuzp1_s16(a, b), vuzp2_s16(a, b)));
+}
+
+// Horizontally subtract adjacent pairs of 32-bit integers in a and b, and pack
+// the signed 32-bit results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_hsub_pi32
+FORCE_INLINE __m64 _mm_hsub_pi32(__m64 _a, __m64 _b)
+{
+    int32x2_t a = vreinterpret_s32_m64(_a);
+    int32x2_t b = vreinterpret_s32_m64(_b);
+    return vreinterpret_m64_s32(vsub_s32(vuzp1_s32(a, b), vuzp2_s32(a, b)));
+}
+
+// Horizontally subtract adjacent pairs of signed 16-bit integers in a and b
+// using saturation, and pack the signed 16-bit results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsubs_epi16
+FORCE_INLINE __m128i _mm_hsubs_epi16(__m128i _a, __m128i _b)
+{
+    int16x8_t a = vreinterpretq_s16_m128i(_a);
+    int16x8_t b = vreinterpretq_s16_m128i(_b);
+    return vreinterpretq_m128i_s16(
+        vqsubq_s16(vuzp1q_s16(a, b), vuzp2q_s16(a, b)));
+}
+
+// Horizontally subtract adjacent pairs of signed 16-bit integers in a and b
+// using saturation, and pack the signed 16-bit results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsubs_pi16
+FORCE_INLINE __m64 _mm_hsubs_pi16(__m64 _a, __m64 _b)
+{
+    int16x4_t a = vreinterpret_s16_m64(_a);
+    int16x4_t b = vreinterpret_s16_m64(_b);
+    return vreinterpret_m64_s16(vqsub_s16(vuzp1_s16(a, b), vuzp2_s16(a, b)));
+}
+
+// Vertically multiply each unsigned 8-bit integer from a with the corresponding
+// signed 8-bit integer from b, producing intermediate signed 16-bit integers.
+// Horizontally add adjacent pairs of intermediate signed 16-bit integers,
+// and pack the saturated results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maddubs_epi16
+FORCE_INLINE __m128i _mm_maddubs_epi16(__m128i _a, __m128i _b)
+{
+    uint8x16_t a = vreinterpretq_u8_m128i(_a);
+    int8x16_t b = vreinterpretq_s8_m128i(_b);
+    int16x8_t tl = vmulq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(a))),
+                             vmovl_s8(vget_low_s8(b)));
+    int16x8_t th = vmulq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(a))),
+                             vmovl_s8(vget_high_s8(b)));
+    return vreinterpretq_m128i_s16(
+        vqaddq_s16(vuzp1q_s16(tl, th), vuzp2q_s16(tl, th)));
+}
+
+// Vertically multiply each unsigned 8-bit integer from a with the corresponding
+// signed 8-bit integer from b, producing intermediate signed 16-bit integers.
+// Horizontally add adjacent pairs of intermediate signed 16-bit integers, and
+// pack the saturated results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maddubs_pi16
+FORCE_INLINE __m64 _mm_maddubs_pi16(__m64 _a, __m64 _b)
+{
+    uint16x4_t a = vreinterpret_u16_m64(_a);
+    int16x4_t b = vreinterpret_s16_m64(_b);
+
+    // Zero extend a
+    int16x4_t a_odd = vreinterpret_s16_u16(vshr_n_u16(a, 8));
+    int16x4_t a_even = vreinterpret_s16_u16(vand_u16(a, vdup_n_u16(0xff)));
+
+    // Sign extend by shifting left then shifting right.
+    int16x4_t b_even = vshr_n_s16(vshl_n_s16(b, 8), 8);
+    int16x4_t b_odd = vshr_n_s16(b, 8);
+
+    // multiply
+    int16x4_t prod1 = vmul_s16(a_even, b_even);
+    int16x4_t prod2 = vmul_s16(a_odd, b_odd);
+
+    // saturated add
+    return vreinterpret_m64_s16(vqadd_s16(prod1, prod2));
+}
+
+// Multiply packed signed 16-bit integers in a and b, producing intermediate
+// signed 32-bit integers. Shift right by 15 bits while rounding up, and store
+// the packed 16-bit integers in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhrs_epi16
+FORCE_INLINE __m128i _mm_mulhrs_epi16(__m128i a, __m128i b)
+{
+    // Has issues due to saturation
+    // return vreinterpretq_m128i_s16(vqrdmulhq_s16(a, b));
+
+    // Multiply
+    int32x4_t mul_lo = vmull_s16(vget_low_s16(vreinterpretq_s16_m128i(a)),
+                                 vget_low_s16(vreinterpretq_s16_m128i(b)));
+    int32x4_t mul_hi = vmull_s16(vget_high_s16(vreinterpretq_s16_m128i(a)),
+                                 vget_high_s16(vreinterpretq_s16_m128i(b)));
+
+    // Rounding narrowing shift right
+    // narrow = (int16_t)((mul + 16384) >> 15);
+    int16x4_t narrow_lo = vrshrn_n_s32(mul_lo, 15);
+    int16x4_t narrow_hi = vrshrn_n_s32(mul_hi, 15);
+
+    // Join together
+    return vreinterpretq_m128i_s16(vcombine_s16(narrow_lo, narrow_hi));
+}
+
+// Multiply packed signed 16-bit integers in a and b, producing intermediate
+// signed 32-bit integers. Truncate each intermediate integer to the 18 most
+// significant bits, round by adding 1, and store bits [16:1] to dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhrs_pi16
+FORCE_INLINE __m64 _mm_mulhrs_pi16(__m64 a, __m64 b)
+{
+    int32x4_t mul_extend =
+        vmull_s16((vreinterpret_s16_m64(a)), (vreinterpret_s16_m64(b)));
+
+    // Rounding narrowing shift right
+    return vreinterpret_m64_s16(vrshrn_n_s32(mul_extend, 15));
+}
+
+// Shuffle packed 8-bit integers in a according to shuffle control mask in the
+// corresponding 8-bit element of b, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_epi8
+FORCE_INLINE __m128i _mm_shuffle_epi8(__m128i a, __m128i b)
+{
+    int8x16_t tbl = vreinterpretq_s8_m128i(a);   // input a
+    uint8x16_t idx = vreinterpretq_u8_m128i(b);  // input b
+    uint8x16_t idx_masked =
+        vandq_u8(idx, vdupq_n_u8(0x8F));  // avoid using meaningless bits
+    return vreinterpretq_m128i_s8(vqtbl1q_s8(tbl, idx_masked));
+}
+
+// Shuffle packed 8-bit integers in a according to shuffle control mask in the
+// corresponding 8-bit element of b, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_pi8
+FORCE_INLINE __m64 _mm_shuffle_pi8(__m64 a, __m64 b)
+{
+    const int8x8_t controlMask =
+        vand_s8(vreinterpret_s8_m64(b), vdup_n_s8((int8_t) (0x1 << 7 | 0x07)));
+    int8x8_t res = vtbl1_s8(vreinterpret_s8_m64(a), controlMask);
+    return vreinterpret_m64_s8(res);
+}
+
+// Negate packed 16-bit integers in a when the corresponding signed
+// 16-bit integer in b is negative, and store the results in dst.
+// Element in dst are zeroed out when the corresponding element
+// in b is zero.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi16
+FORCE_INLINE __m128i _mm_sign_epi16(__m128i _a, __m128i _b)
+{
+    int16x8_t a = vreinterpretq_s16_m128i(_a);
+    int16x8_t b = vreinterpretq_s16_m128i(_b);
+
+    // signed shift right: faster than vclt
+    // (b < 0) ? 0xFFFF : 0
+    uint16x8_t ltMask = vreinterpretq_u16_s16(vshrq_n_s16(b, 15));
+    // (b == 0) ? 0xFFFF : 0
+    int16x8_t zeroMask = vreinterpretq_s16_u16(vceqzq_s16(b));
+
+    // bitwise select either a or negative 'a' (vnegq_s16(a) equals to negative
+    // 'a') based on ltMask
+    int16x8_t masked = vbslq_s16(ltMask, vnegq_s16(a), a);
+    // res = masked & (~zeroMask)
+    int16x8_t res = vbicq_s16(masked, zeroMask);
+    return vreinterpretq_m128i_s16(res);
+}
+
+// Negate packed 32-bit integers in a when the corresponding signed
+// 32-bit integer in b is negative, and store the results in dst.
+// Element in dst are zeroed out when the corresponding element
+// in b is zero.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi32
+FORCE_INLINE __m128i _mm_sign_epi32(__m128i _a, __m128i _b)
+{
+    int32x4_t a = vreinterpretq_s32_m128i(_a);
+    int32x4_t b = vreinterpretq_s32_m128i(_b);
+
+    // signed shift right: faster than vclt
+    // (b < 0) ? 0xFFFFFFFF : 0
+    uint32x4_t ltMask = vreinterpretq_u32_s32(vshrq_n_s32(b, 31));
+
+    // (b == 0) ? 0xFFFFFFFF : 0
+    int32x4_t zeroMask = vreinterpretq_s32_u32(vceqzq_s32(b));
+
+    // bitwise select either a or negative 'a' (vnegq_s32(a) equals to negative
+    // 'a') based on ltMask
+    int32x4_t masked = vbslq_s32(ltMask, vnegq_s32(a), a);
+    // res = masked & (~zeroMask)
+    int32x4_t res = vbicq_s32(masked, zeroMask);
+    return vreinterpretq_m128i_s32(res);
+}
+
+// Negate packed 8-bit integers in a when the corresponding signed
+// 8-bit integer in b is negative, and store the results in dst.
+// Element in dst are zeroed out when the corresponding element
+// in b is zero.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi8
+FORCE_INLINE __m128i _mm_sign_epi8(__m128i _a, __m128i _b)
+{
+    int8x16_t a = vreinterpretq_s8_m128i(_a);
+    int8x16_t b = vreinterpretq_s8_m128i(_b);
+
+    // signed shift right: faster than vclt
+    // (b < 0) ? 0xFF : 0
+    uint8x16_t ltMask = vreinterpretq_u8_s8(vshrq_n_s8(b, 7));
+
+    // (b == 0) ? 0xFF : 0
+    int8x16_t zeroMask = vreinterpretq_s8_u8(vceqzq_s8(b));
+
+    // bitwise select either a or negative 'a' (vnegq_s8(a) return negative 'a')
+    // based on ltMask
+    int8x16_t masked = vbslq_s8(ltMask, vnegq_s8(a), a);
+    // res = masked & (~zeroMask)
+    int8x16_t res = vbicq_s8(masked, zeroMask);
+
+    return vreinterpretq_m128i_s8(res);
+}
+
+// Negate packed 16-bit integers in a when the corresponding signed 16-bit
+// integer in b is negative, and store the results in dst. Element in dst are
+// zeroed out when the corresponding element in b is zero.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_pi16
+FORCE_INLINE __m64 _mm_sign_pi16(__m64 _a, __m64 _b)
+{
+    int16x4_t a = vreinterpret_s16_m64(_a);
+    int16x4_t b = vreinterpret_s16_m64(_b);
+
+    // signed shift right: faster than vclt
+    // (b < 0) ? 0xFFFF : 0
+    uint16x4_t ltMask = vreinterpret_u16_s16(vshr_n_s16(b, 15));
+
+    // (b == 0) ? 0xFFFF : 0
+    int16x4_t zeroMask = vreinterpret_s16_u16(vceqz_s16(b));
+
+    // bitwise select either a or negative 'a' (vneg_s16(a) return negative 'a')
+    // based on ltMask
+    int16x4_t masked = vbsl_s16(ltMask, vneg_s16(a), a);
+    // res = masked & (~zeroMask)
+    int16x4_t res = vbic_s16(masked, zeroMask);
+
+    return vreinterpret_m64_s16(res);
+}
+
+// Negate packed 32-bit integers in a when the corresponding signed 32-bit
+// integer in b is negative, and store the results in dst. Element in dst are
+// zeroed out when the corresponding element in b is zero.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_pi32
+FORCE_INLINE __m64 _mm_sign_pi32(__m64 _a, __m64 _b)
+{
+    int32x2_t a = vreinterpret_s32_m64(_a);
+    int32x2_t b = vreinterpret_s32_m64(_b);
+
+    // signed shift right: faster than vclt
+    // (b < 0) ? 0xFFFFFFFF : 0
+    uint32x2_t ltMask = vreinterpret_u32_s32(vshr_n_s32(b, 31));
+
+    // (b == 0) ? 0xFFFFFFFF : 0
+    int32x2_t zeroMask = vreinterpret_s32_u32(vceqz_s32(b));
+
+    // bitwise select either a or negative 'a' (vneg_s32(a) return negative 'a')
+    // based on ltMask
+    int32x2_t masked = vbsl_s32(ltMask, vneg_s32(a), a);
+    // res = masked & (~zeroMask)
+    int32x2_t res = vbic_s32(masked, zeroMask);
+
+    return vreinterpret_m64_s32(res);
+}
+
+// Negate packed 8-bit integers in a when the corresponding signed 8-bit integer
+// in b is negative, and store the results in dst. Element in dst are zeroed out
+// when the corresponding element in b is zero.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_pi8
+FORCE_INLINE __m64 _mm_sign_pi8(__m64 _a, __m64 _b)
+{
+    int8x8_t a = vreinterpret_s8_m64(_a);
+    int8x8_t b = vreinterpret_s8_m64(_b);
+
+    // signed shift right: faster than vclt
+    // (b < 0) ? 0xFF : 0
+    uint8x8_t ltMask = vreinterpret_u8_s8(vshr_n_s8(b, 7));
+
+    // (b == 0) ? 0xFF : 0
+    int8x8_t zeroMask = vreinterpret_s8_u8(vceqz_s8(b));
+
+    // bitwise select either a or negative 'a' (vneg_s8(a) return negative 'a')
+    // based on ltMask
+    int8x8_t masked = vbsl_s8(ltMask, vneg_s8(a), a);
+    // res = masked & (~zeroMask)
+    int8x8_t res = vbic_s8(masked, zeroMask);
+
+    return vreinterpret_m64_s8(res);
+}
+
+/* SSE4.1 */
+
+// Blend packed 16-bit integers from a and b using control mask imm8, and store
+// the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blend_epi16
+// FORCE_INLINE __m128i _mm_blend_epi16(__m128i a, __m128i b,
+//                                      __constrange(0,255) int imm)
+#define _mm_blend_epi16(a, b, imm)                                      \
+    _sse2neon_define2(                                                  \
+        __m128i, a, b,                                                  \
+        const uint16_t _mask[8] =                                       \
+            _sse2neon_init(((imm) & (1 << 0)) ? (uint16_t) -1 : 0x0,    \
+                           ((imm) & (1 << 1)) ? (uint16_t) -1 : 0x0,    \
+                           ((imm) & (1 << 2)) ? (uint16_t) -1 : 0x0,    \
+                           ((imm) & (1 << 3)) ? (uint16_t) -1 : 0x0,    \
+                           ((imm) & (1 << 4)) ? (uint16_t) -1 : 0x0,    \
+                           ((imm) & (1 << 5)) ? (uint16_t) -1 : 0x0,    \
+                           ((imm) & (1 << 6)) ? (uint16_t) -1 : 0x0,    \
+                           ((imm) & (1 << 7)) ? (uint16_t) -1 : 0x0);   \
+        uint16x8_t _mask_vec = vld1q_u16(_mask);                        \
+        uint16x8_t __a = vreinterpretq_u16_m128i(_a);                   \
+        uint16x8_t __b = vreinterpretq_u16_m128i(_b); _sse2neon_return( \
+            vreinterpretq_m128i_u16(vbslq_u16(_mask_vec, __b, __a)));)
+
+// Blend packed double-precision (64-bit) floating-point elements from a and b
+// using control mask imm8, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blend_pd
+#define _mm_blend_pd(a, b, imm)                                              \
+    _sse2neon_define2(                                                       \
+        __m128d, a, b,                                                       \
+        const uint64_t _mask[2] =                                            \
+            _sse2neon_init(((imm) & (1 << 0)) ? ~UINT64_C(0) : UINT64_C(0),  \
+                           ((imm) & (1 << 1)) ? ~UINT64_C(0) : UINT64_C(0)); \
+        uint64x2_t _mask_vec = vld1q_u64(_mask);                             \
+        uint64x2_t __a = vreinterpretq_u64_m128d(_a);                        \
+        uint64x2_t __b = vreinterpretq_u64_m128d(_b); _sse2neon_return(      \
+            vreinterpretq_m128d_u64(vbslq_u64(_mask_vec, __b, __a)));)
+
+// Blend packed single-precision (32-bit) floating-point elements from a and b
+// using mask, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blend_ps
+FORCE_INLINE __m128 _mm_blend_ps(__m128 _a, __m128 _b, const char imm8)
+{
+    const uint32_t ALIGN_STRUCT(16)
+        data[4] = {((imm8) & (1 << 0)) ? UINT32_MAX : 0,
+                   ((imm8) & (1 << 1)) ? UINT32_MAX : 0,
+                   ((imm8) & (1 << 2)) ? UINT32_MAX : 0,
+                   ((imm8) & (1 << 3)) ? UINT32_MAX : 0};
+    uint32x4_t mask = vld1q_u32(data);
+    float32x4_t a = vreinterpretq_f32_m128(_a);
+    float32x4_t b = vreinterpretq_f32_m128(_b);
+    return vreinterpretq_m128_f32(vbslq_f32(mask, b, a));
+}
+
+// Blend packed 8-bit integers from a and b using mask, and store the results in
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blendv_epi8
+FORCE_INLINE __m128i _mm_blendv_epi8(__m128i _a, __m128i _b, __m128i _mask)
+{
+    // Use a signed shift right to create a mask with the sign bit
+    uint8x16_t mask =
+        vreinterpretq_u8_s8(vshrq_n_s8(vreinterpretq_s8_m128i(_mask), 7));
+    uint8x16_t a = vreinterpretq_u8_m128i(_a);
+    uint8x16_t b = vreinterpretq_u8_m128i(_b);
+    return vreinterpretq_m128i_u8(vbslq_u8(mask, b, a));
+}
+
+// Blend packed double-precision (64-bit) floating-point elements from a and b
+// using mask, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blendv_pd
+FORCE_INLINE __m128d _mm_blendv_pd(__m128d _a, __m128d _b, __m128d _mask)
+{
+    uint64x2_t mask =
+        vreinterpretq_u64_s64(vshrq_n_s64(vreinterpretq_s64_m128d(_mask), 63));
+    float64x2_t a = vreinterpretq_f64_m128d(_a);
+    float64x2_t b = vreinterpretq_f64_m128d(_b);
+    return vreinterpretq_m128d_f64(vbslq_f64(mask, b, a));
+}
+
+// Blend packed single-precision (32-bit) floating-point elements from a and b
+// using mask, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blendv_ps
+FORCE_INLINE __m128 _mm_blendv_ps(__m128 _a, __m128 _b, __m128 _mask)
+{
+    // Use a signed shift right to create a mask with the sign bit
+    uint32x4_t mask =
+        vreinterpretq_u32_s32(vshrq_n_s32(vreinterpretq_s32_m128(_mask), 31));
+    float32x4_t a = vreinterpretq_f32_m128(_a);
+    float32x4_t b = vreinterpretq_f32_m128(_b);
+    return vreinterpretq_m128_f32(vbslq_f32(mask, b, a));
+}
+
+// Round the packed double-precision (64-bit) floating-point elements in a up
+// to an integer value, and store the results as packed double-precision
+// floating-point elements in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ceil_pd
+FORCE_INLINE __m128d _mm_ceil_pd(__m128d a)
+{
+    return vreinterpretq_m128d_f64(vrndpq_f64(vreinterpretq_f64_m128d(a)));
+}
+
+// Round the packed single-precision (32-bit) floating-point elements in a up to
+// an integer value, and store the results as packed single-precision
+// floating-point elements in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ceil_ps
+FORCE_INLINE __m128 _mm_ceil_ps(__m128 a)
+{
+#if (defined(__aarch64__) || defined(_M_ARM64)) || \
+    defined(__ARM_FEATURE_DIRECTED_ROUNDING)
+    return vreinterpretq_m128_f32(vrndpq_f32(vreinterpretq_f32_m128(a)));
+#else
+    float *f = (float *) &a;
+    return _mm_set_ps(ceilf(f[3]), ceilf(f[2]), ceilf(f[1]), ceilf(f[0]));
+#endif
+}
+
+// Round the lower double-precision (64-bit) floating-point element in b up to
+// an integer value, store the result as a double-precision floating-point
+// element in the lower element of dst, and copy the upper element from a to the
+// upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ceil_sd
+FORCE_INLINE __m128d _mm_ceil_sd(__m128d a, __m128d b)
+{
+    return _mm_move_sd(a, _mm_ceil_pd(b));
+}
+
+// Round the lower single-precision (32-bit) floating-point element in b up to
+// an integer value, store the result as a single-precision floating-point
+// element in the lower element of dst, and copy the upper 3 packed elements
+// from a to the upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ceil_ss
+FORCE_INLINE __m128 _mm_ceil_ss(__m128 a, __m128 b)
+{
+    return _mm_move_ss(a, _mm_ceil_ps(b));
+}
+
+// Compare packed 64-bit integers in a and b for equality, and store the results
+// in dst
+FORCE_INLINE __m128i _mm_cmpeq_epi64(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_u64(
+        vceqq_u64(vreinterpretq_u64_m128i(a), vreinterpretq_u64_m128i(b)));
+}
+
+// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store
+// the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi16_epi32
+FORCE_INLINE __m128i _mm_cvtepi16_epi32(__m128i a)
+{
+    return vreinterpretq_m128i_s32(
+        vmovl_s16(vget_low_s16(vreinterpretq_s16_m128i(a))));
+}
+
+// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store
+// the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi16_epi64
+FORCE_INLINE __m128i _mm_cvtepi16_epi64(__m128i a)
+{
+    int16x8_t s16x8 = vreinterpretq_s16_m128i(a);     /* xxxx xxxx xxxx 0B0A */
+    int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */
+    int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */
+    return vreinterpretq_m128i_s64(s64x2);
+}
+
+// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store
+// the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi64
+FORCE_INLINE __m128i _mm_cvtepi32_epi64(__m128i a)
+{
+    return vreinterpretq_m128i_s64(
+        vmovl_s32(vget_low_s32(vreinterpretq_s32_m128i(a))));
+}
+
+// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store
+// the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi8_epi16
+FORCE_INLINE __m128i _mm_cvtepi8_epi16(__m128i a)
+{
+    int8x16_t s8x16 = vreinterpretq_s8_m128i(a);    /* xxxx xxxx xxxx DCBA */
+    int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */
+    return vreinterpretq_m128i_s16(s16x8);
+}
+
+// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store
+// the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi8_epi32
+FORCE_INLINE __m128i _mm_cvtepi8_epi32(__m128i a)
+{
+    int8x16_t s8x16 = vreinterpretq_s8_m128i(a);      /* xxxx xxxx xxxx DCBA */
+    int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16));   /* 0x0x 0x0x 0D0C 0B0A */
+    int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000D 000C 000B 000A */
+    return vreinterpretq_m128i_s32(s32x4);
+}
+
+// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit
+// integers, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi8_epi64
+FORCE_INLINE __m128i _mm_cvtepi8_epi64(__m128i a)
+{
+    int8x16_t s8x16 = vreinterpretq_s8_m128i(a);      /* xxxx xxxx xxxx xxBA */
+    int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16));   /* 0x0x 0x0x 0x0x 0B0A */
+    int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */
+    int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */
+    return vreinterpretq_m128i_s64(s64x2);
+}
+
+// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers,
+// and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu16_epi32
+FORCE_INLINE __m128i _mm_cvtepu16_epi32(__m128i a)
+{
+    return vreinterpretq_m128i_u32(
+        vmovl_u16(vget_low_u16(vreinterpretq_u16_m128i(a))));
+}
+
+// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers,
+// and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu16_epi64
+FORCE_INLINE __m128i _mm_cvtepu16_epi64(__m128i a)
+{
+    uint16x8_t u16x8 = vreinterpretq_u16_m128i(a);     /* xxxx xxxx xxxx 0B0A */
+    uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */
+    uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */
+    return vreinterpretq_m128i_u64(u64x2);
+}
+
+// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers,
+// and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu32_epi64
+FORCE_INLINE __m128i _mm_cvtepu32_epi64(__m128i a)
+{
+    return vreinterpretq_m128i_u64(
+        vmovl_u32(vget_low_u32(vreinterpretq_u32_m128i(a))));
+}
+
+// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers,
+// and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu8_epi16
+FORCE_INLINE __m128i _mm_cvtepu8_epi16(__m128i a)
+{
+    uint8x16_t u8x16 = vreinterpretq_u8_m128i(a);    /* xxxx xxxx HGFE DCBA */
+    uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0H0G 0F0E 0D0C 0B0A */
+    return vreinterpretq_m128i_u16(u16x8);
+}
+
+// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers,
+// and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu8_epi32
+FORCE_INLINE __m128i _mm_cvtepu8_epi32(__m128i a)
+{
+    uint8x16_t u8x16 = vreinterpretq_u8_m128i(a);      /* xxxx xxxx xxxx DCBA */
+    uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16));   /* 0x0x 0x0x 0D0C 0B0A */
+    uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000D 000C 000B 000A */
+    return vreinterpretq_m128i_u32(u32x4);
+}
+
+// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed
+// 64-bit integers, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu8_epi64
+FORCE_INLINE __m128i _mm_cvtepu8_epi64(__m128i a)
+{
+    uint8x16_t u8x16 = vreinterpretq_u8_m128i(a);      /* xxxx xxxx xxxx xxBA */
+    uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16));   /* 0x0x 0x0x 0x0x 0B0A */
+    uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */
+    uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */
+    return vreinterpretq_m128i_u64(u64x2);
+}
+
+// Conditionally multiply the packed double-precision (64-bit) floating-point
+// elements in a and b using the high 4 bits in imm8, sum the four products, and
+// conditionally store the sum in dst using the low 4 bits of imm8.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dp_pd
+FORCE_INLINE __m128d _mm_dp_pd(__m128d a, __m128d b, const int imm)
+{
+    // Generate mask value from constant immediate bit value
+    const int64_t bit0Mask = imm & 0x01 ? UINT64_MAX : 0;
+    const int64_t bit1Mask = imm & 0x02 ? UINT64_MAX : 0;
+#if !SSE2NEON_PRECISE_DP
+    const int64_t bit4Mask = imm & 0x10 ? UINT64_MAX : 0;
+    const int64_t bit5Mask = imm & 0x20 ? UINT64_MAX : 0;
+#endif
+    // Conditional multiplication
+#if !SSE2NEON_PRECISE_DP
+    __m128d mul = _mm_mul_pd(a, b);
+    const __m128d mulMask =
+        _mm_castsi128_pd(_mm_set_epi64x(bit5Mask, bit4Mask));
+    __m128d tmp = _mm_and_pd(mul, mulMask);
+#else
+    double d0 = (imm & 0x10) ? vgetq_lane_f64(vreinterpretq_f64_m128d(a), 0) *
+                                   vgetq_lane_f64(vreinterpretq_f64_m128d(b), 0)
+                             : 0;
+    double d1 = (imm & 0x20) ? vgetq_lane_f64(vreinterpretq_f64_m128d(a), 1) *
+                                   vgetq_lane_f64(vreinterpretq_f64_m128d(b), 1)
+                             : 0;
+    __m128d tmp = _mm_set_pd(d1, d0);
+#endif
+    // Sum the products
+    double sum = vpaddd_f64(vreinterpretq_f64_m128d(tmp));
+    // Conditionally store the sum
+    const __m128d sumMask =
+        _mm_castsi128_pd(_mm_set_epi64x(bit1Mask, bit0Mask));
+    __m128d res = _mm_and_pd(_mm_set_pd1(sum), sumMask);
+    return res;
+}
+
+// Conditionally multiply the packed single-precision (32-bit) floating-point
+// elements in a and b using the high 4 bits in imm8, sum the four products,
+// and conditionally store the sum in dst using the low 4 bits of imm.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dp_ps
+FORCE_INLINE __m128 _mm_dp_ps(__m128 a, __m128 b, const int imm)
+{
+    float32x4_t elementwise_prod = _mm_mul_ps(a, b);
+
+    /* shortcuts */
+    if (imm == 0xFF) {
+        return _mm_set1_ps(vaddvq_f32(elementwise_prod));
+    }
+
+    if ((imm & 0x0F) == 0x0F) {
+        if (!(imm & (1 << 4)))
+            elementwise_prod = vsetq_lane_f32(0.0f, elementwise_prod, 0);
+        if (!(imm & (1 << 5)))
+            elementwise_prod = vsetq_lane_f32(0.0f, elementwise_prod, 1);
+        if (!(imm & (1 << 6)))
+            elementwise_prod = vsetq_lane_f32(0.0f, elementwise_prod, 2);
+        if (!(imm & (1 << 7)))
+            elementwise_prod = vsetq_lane_f32(0.0f, elementwise_prod, 3);
+
+        return _mm_set1_ps(vaddvq_f32(elementwise_prod));
+    }
+
+    float s = 0.0f;
+
+    if (imm & (1 << 4))
+        s += vgetq_lane_f32(elementwise_prod, 0);
+    if (imm & (1 << 5))
+        s += vgetq_lane_f32(elementwise_prod, 1);
+    if (imm & (1 << 6))
+        s += vgetq_lane_f32(elementwise_prod, 2);
+    if (imm & (1 << 7))
+        s += vgetq_lane_f32(elementwise_prod, 3);
+
+    const float32_t res[4] = {
+        (imm & 0x1) ? s : 0.0f,
+        (imm & 0x2) ? s : 0.0f,
+        (imm & 0x4) ? s : 0.0f,
+        (imm & 0x8) ? s : 0.0f,
+    };
+    return vreinterpretq_m128_f32(vld1q_f32(res));
+}
+
+// Extract a 32-bit integer from a, selected with imm8, and store the result in
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_epi32
+// FORCE_INLINE int _mm_extract_epi32(__m128i a, __constrange(0,4) int imm)
+#define _mm_extract_epi32(a, imm) \
+    vgetq_lane_s32(vreinterpretq_s32_m128i(a), (imm))
+
+// Extract a 64-bit integer from a, selected with imm8, and store the result in
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_epi64
+// FORCE_INLINE __int64 _mm_extract_epi64(__m128i a, __constrange(0,2) int imm)
+#define _mm_extract_epi64(a, imm) \
+    vgetq_lane_s64(vreinterpretq_s64_m128i(a), (imm))
+
+// Extract an 8-bit integer from a, selected with imm8, and store the result in
+// the lower element of dst. FORCE_INLINE int _mm_extract_epi8(__m128i a,
+// __constrange(0,16) int imm)
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_epi8
+#define _mm_extract_epi8(a, imm) vgetq_lane_u8(vreinterpretq_u8_m128i(a), (imm))
+
+// Extracts the selected single-precision (32-bit) floating-point from a.
+// FORCE_INLINE int _mm_extract_ps(__m128 a, __constrange(0,4) int imm)
+#define _mm_extract_ps(a, imm) vgetq_lane_s32(vreinterpretq_s32_m128(a), (imm))
+
+// Round the packed double-precision (64-bit) floating-point elements in a down
+// to an integer value, and store the results as packed double-precision
+// floating-point elements in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_floor_pd
+FORCE_INLINE __m128d _mm_floor_pd(__m128d a)
+{
+    return vreinterpretq_m128d_f64(vrndmq_f64(vreinterpretq_f64_m128d(a)));
+}
+
+// Round the packed single-precision (32-bit) floating-point elements in a down
+// to an integer value, and store the results as packed single-precision
+// floating-point elements in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_floor_ps
+FORCE_INLINE __m128 _mm_floor_ps(__m128 a)
+{
+#if (defined(__aarch64__) || defined(_M_ARM64)) || \
+    defined(__ARM_FEATURE_DIRECTED_ROUNDING)
+    return vreinterpretq_m128_f32(vrndmq_f32(vreinterpretq_f32_m128(a)));
+#else
+    float *f = (float *) &a;
+    return _mm_set_ps(floorf(f[3]), floorf(f[2]), floorf(f[1]), floorf(f[0]));
+#endif
+}
+
+// Round the lower double-precision (64-bit) floating-point element in b down to
+// an integer value, store the result as a double-precision floating-point
+// element in the lower element of dst, and copy the upper element from a to the
+// upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_floor_sd
+FORCE_INLINE __m128d _mm_floor_sd(__m128d a, __m128d b)
+{
+    return _mm_move_sd(a, _mm_floor_pd(b));
+}
+
+// Round the lower single-precision (32-bit) floating-point element in b down to
+// an integer value, store the result as a single-precision floating-point
+// element in the lower element of dst, and copy the upper 3 packed elements
+// from a to the upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_floor_ss
+FORCE_INLINE __m128 _mm_floor_ss(__m128 a, __m128 b)
+{
+    return _mm_move_ss(a, _mm_floor_ps(b));
+}
+
+// Copy a to dst, and insert the 32-bit integer i into dst at the location
+// specified by imm8.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_epi32
+// FORCE_INLINE __m128i _mm_insert_epi32(__m128i a, int b,
+//                                       __constrange(0,4) int imm)
+#define _mm_insert_epi32(a, b, imm) \
+    vreinterpretq_m128i_s32(        \
+        vsetq_lane_s32((b), vreinterpretq_s32_m128i(a), (imm)))
+
+// Copy a to dst, and insert the 64-bit integer i into dst at the location
+// specified by imm8.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_epi64
+// FORCE_INLINE __m128i _mm_insert_epi64(__m128i a, __int64 b,
+//                                       __constrange(0,2) int imm)
+#define _mm_insert_epi64(a, b, imm) \
+    vreinterpretq_m128i_s64(        \
+        vsetq_lane_s64((b), vreinterpretq_s64_m128i(a), (imm)))
+
+// Copy a to dst, and insert the lower 8-bit integer from i into dst at the
+// location specified by imm8.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_epi8
+// FORCE_INLINE __m128i _mm_insert_epi8(__m128i a, int b,
+//                                      __constrange(0,16) int imm)
+#define _mm_insert_epi8(a, b, imm) \
+    vreinterpretq_m128i_s8(vsetq_lane_s8((b), vreinterpretq_s8_m128i(a), (imm)))
+
+// Copy a to tmp, then insert a single-precision (32-bit) floating-point
+// element from b into tmp using the control in imm8. Store tmp to dst using
+// the mask in imm8 (elements are zeroed out when the corresponding bit is set).
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=insert_ps
+#define _mm_insert_ps(a, b, imm8)                                            \
+    _sse2neon_define2(                                                       \
+        __m128, a, b,                                                        \
+        float32x4_t tmp1 =                                                   \
+            vsetq_lane_f32(vgetq_lane_f32(_b, (imm8 >> 6) & 0x3),            \
+                           vreinterpretq_f32_m128(_a), 0);                   \
+        float32x4_t tmp2 =                                                   \
+            vsetq_lane_f32(vgetq_lane_f32(tmp1, 0),                          \
+                           vreinterpretq_f32_m128(_a), ((imm8 >> 4) & 0x3)); \
+        const uint32_t data[4] =                                             \
+            _sse2neon_init(((imm8) & (1 << 0)) ? UINT32_MAX : 0,             \
+                           ((imm8) & (1 << 1)) ? UINT32_MAX : 0,             \
+                           ((imm8) & (1 << 2)) ? UINT32_MAX : 0,             \
+                           ((imm8) & (1 << 3)) ? UINT32_MAX : 0);            \
+        uint32x4_t mask = vld1q_u32(data);                                   \
+        float32x4_t all_zeros = vdupq_n_f32(0);                              \
+                                                                             \
+        _sse2neon_return(vreinterpretq_m128_f32(                             \
+            vbslq_f32(mask, all_zeros, vreinterpretq_f32_m128(tmp2))));)
+
+// Compare packed signed 32-bit integers in a and b, and store packed maximum
+// values in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi32
+FORCE_INLINE __m128i _mm_max_epi32(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s32(
+        vmaxq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
+}
+
+// Compare packed signed 8-bit integers in a and b, and store packed maximum
+// values in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi8
+FORCE_INLINE __m128i _mm_max_epi8(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s8(
+        vmaxq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b)));
+}
+
+// Compare packed unsigned 16-bit integers in a and b, and store packed maximum
+// values in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu16
+FORCE_INLINE __m128i _mm_max_epu16(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_u16(
+        vmaxq_u16(vreinterpretq_u16_m128i(a), vreinterpretq_u16_m128i(b)));
+}
+
+// Compare packed unsigned 32-bit integers in a and b, and store packed maximum
+// values in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu32
+FORCE_INLINE __m128i _mm_max_epu32(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_u32(
+        vmaxq_u32(vreinterpretq_u32_m128i(a), vreinterpretq_u32_m128i(b)));
+}
+
+// Compare packed signed 32-bit integers in a and b, and store packed minimum
+// values in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi32
+FORCE_INLINE __m128i _mm_min_epi32(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s32(
+        vminq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
+}
+
+// Compare packed signed 8-bit integers in a and b, and store packed minimum
+// values in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi8
+FORCE_INLINE __m128i _mm_min_epi8(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s8(
+        vminq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b)));
+}
+
+// Compare packed unsigned 16-bit integers in a and b, and store packed minimum
+// values in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu16
+FORCE_INLINE __m128i _mm_min_epu16(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_u16(
+        vminq_u16(vreinterpretq_u16_m128i(a), vreinterpretq_u16_m128i(b)));
+}
+
+// Compare packed unsigned 32-bit integers in a and b, and store packed minimum
+// values in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu32
+FORCE_INLINE __m128i _mm_min_epu32(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_u32(
+        vminq_u32(vreinterpretq_u32_m128i(a), vreinterpretq_u32_m128i(b)));
+}
+
+// Horizontally compute the minimum amongst the packed unsigned 16-bit integers
+// in a, store the minimum and index in dst, and zero the remaining bits in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_minpos_epu16
+FORCE_INLINE __m128i _mm_minpos_epu16(__m128i a)
+{
+    __m128i dst;
+    uint16_t min, idx = 0;
+    // Find the minimum value
+    min = vminvq_u16(vreinterpretq_u16_m128i(a));
+
+    // Get the index of the minimum value
+    static const uint16_t idxv[] = {0, 1, 2, 3, 4, 5, 6, 7};
+    uint16x8_t minv = vdupq_n_u16(min);
+    uint16x8_t cmeq = vceqq_u16(minv, vreinterpretq_u16_m128i(a));
+    idx = vminvq_u16(vornq_u16(vld1q_u16(idxv), cmeq));
+    // Generate result
+    dst = _mm_setzero_si128();
+    dst = vreinterpretq_m128i_u16(
+        vsetq_lane_u16(min, vreinterpretq_u16_m128i(dst), 0));
+    dst = vreinterpretq_m128i_u16(
+        vsetq_lane_u16(idx, vreinterpretq_u16_m128i(dst), 1));
+    return dst;
+}
+
+// Compute the sum of absolute differences (SADs) of quadruplets of unsigned
+// 8-bit integers in a compared to those in b, and store the 16-bit results in
+// dst. Eight SADs are performed using one quadruplet from b and eight
+// quadruplets from a. One quadruplet is selected from b starting at on the
+// offset specified in imm8. Eight quadruplets are formed from sequential 8-bit
+// integers selected from a starting at the offset specified in imm8.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mpsadbw_epu8
+FORCE_INLINE __m128i _mm_mpsadbw_epu8(__m128i a, __m128i b, const int imm)
+{
+    uint8x16_t _a, _b;
+
+    switch (imm & 0x4) {
+    case 0:
+        // do nothing
+        _a = vreinterpretq_u8_m128i(a);
+        break;
+    case 4:
+        _a = vreinterpretq_u8_u32(vextq_u32(vreinterpretq_u32_m128i(a),
+                                            vreinterpretq_u32_m128i(a), 1));
+        break;
+    default:
+        __builtin_unreachable();
+        break;
+    }
+
+    switch (imm & 0x3) {
+    case 0:
+        _b = vreinterpretq_u8_u32(
+            vdupq_n_u32(vgetq_lane_u32(vreinterpretq_u32_m128i(b), 0)));
+        break;
+    case 1:
+        _b = vreinterpretq_u8_u32(
+            vdupq_n_u32(vgetq_lane_u32(vreinterpretq_u32_m128i(b), 1)));
+        break;
+    case 2:
+        _b = vreinterpretq_u8_u32(
+            vdupq_n_u32(vgetq_lane_u32(vreinterpretq_u32_m128i(b), 2)));
+        break;
+    case 3:
+        _b = vreinterpretq_u8_u32(
+            vdupq_n_u32(vgetq_lane_u32(vreinterpretq_u32_m128i(b), 3)));
+        break;
+    default:
+        __builtin_unreachable();
+        break;
+    }
+
+    int16x8_t c04, c15, c26, c37;
+    uint8x8_t low_b = vget_low_u8(_b);
+    c04 = vreinterpretq_s16_u16(vabdl_u8(vget_low_u8(_a), low_b));
+    uint8x16_t _a_1 = vextq_u8(_a, _a, 1);
+    c15 = vreinterpretq_s16_u16(vabdl_u8(vget_low_u8(_a_1), low_b));
+    uint8x16_t _a_2 = vextq_u8(_a, _a, 2);
+    c26 = vreinterpretq_s16_u16(vabdl_u8(vget_low_u8(_a_2), low_b));
+    uint8x16_t _a_3 = vextq_u8(_a, _a, 3);
+    c37 = vreinterpretq_s16_u16(vabdl_u8(vget_low_u8(_a_3), low_b));
+    // |0|4|2|6|
+    c04 = vpaddq_s16(c04, c26);
+    // |1|5|3|7|
+    c15 = vpaddq_s16(c15, c37);
+
+    int32x4_t trn1_c =
+        vtrn1q_s32(vreinterpretq_s32_s16(c04), vreinterpretq_s32_s16(c15));
+    int32x4_t trn2_c =
+        vtrn2q_s32(vreinterpretq_s32_s16(c04), vreinterpretq_s32_s16(c15));
+    return vreinterpretq_m128i_s16(vpaddq_s16(vreinterpretq_s16_s32(trn1_c),
+                                              vreinterpretq_s16_s32(trn2_c)));
+}
+
+// Multiply the low signed 32-bit integers from each packed 64-bit element in
+// a and b, and store the signed 64-bit results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_epi32
+FORCE_INLINE __m128i _mm_mul_epi32(__m128i a, __m128i b)
+{
+    // vmull_s32 upcasts instead of masking, so we downcast.
+    int32x2_t a_lo = vmovn_s64(vreinterpretq_s64_m128i(a));
+    int32x2_t b_lo = vmovn_s64(vreinterpretq_s64_m128i(b));
+    return vreinterpretq_m128i_s64(vmull_s32(a_lo, b_lo));
+}
+
+// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit
+// integers, and store the low 32 bits of the intermediate integers in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mullo_epi32
+FORCE_INLINE __m128i _mm_mullo_epi32(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_s32(
+        vmulq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
+}
+
+// Convert packed signed 32-bit integers from a and b to packed 16-bit integers
+// using unsigned saturation, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packus_epi32
+FORCE_INLINE __m128i _mm_packus_epi32(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_u16(
+        vcombine_u16(vqmovun_s32(vreinterpretq_s32_m128i(a)),
+                     vqmovun_s32(vreinterpretq_s32_m128i(b))));
+}
+
+// Round the packed double-precision (64-bit) floating-point elements in a using
+// the rounding parameter, and store the results as packed double-precision
+// floating-point elements in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_round_pd
+FORCE_INLINE_OPTNONE __m128d _mm_round_pd(__m128d a, int rounding)
+{
+    switch (rounding) {
+    case (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC):
+        return vreinterpretq_m128d_f64(vrndnq_f64(vreinterpretq_f64_m128d(a)));
+    case (_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC):
+        return _mm_floor_pd(a);
+    case (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC):
+        return _mm_ceil_pd(a);
+    case (_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC):
+        return vreinterpretq_m128d_f64(vrndq_f64(vreinterpretq_f64_m128d(a)));
+    default:  //_MM_FROUND_CUR_DIRECTION
+        return vreinterpretq_m128d_f64(vrndiq_f64(vreinterpretq_f64_m128d(a)));
+    }
+}
+
+// Round the packed single-precision (32-bit) floating-point elements in a using
+// the rounding parameter, and store the results as packed single-precision
+// floating-point elements in dst.
+// software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_ps
+FORCE_INLINE_OPTNONE __m128 _mm_round_ps(__m128 a, int rounding)
+{
+#if (defined(__aarch64__) || defined(_M_ARM64)) || \
+    defined(__ARM_FEATURE_DIRECTED_ROUNDING)
+    switch (rounding) {
+    case (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC):
+        return vreinterpretq_m128_f32(vrndnq_f32(vreinterpretq_f32_m128(a)));
+    case (_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC):
+        return _mm_floor_ps(a);
+    case (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC):
+        return _mm_ceil_ps(a);
+    case (_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC):
+        return vreinterpretq_m128_f32(vrndq_f32(vreinterpretq_f32_m128(a)));
+    default:  //_MM_FROUND_CUR_DIRECTION
+        return vreinterpretq_m128_f32(vrndiq_f32(vreinterpretq_f32_m128(a)));
+    }
+#else
+    float *v_float = (float *) &a;
+
+    if (rounding == (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC) ||
+        (rounding == _MM_FROUND_CUR_DIRECTION &&
+         _MM_GET_ROUNDING_MODE() == _MM_ROUND_NEAREST)) {
+        uint32x4_t signmask = vdupq_n_u32(0x80000000);
+        float32x4_t half = vbslq_f32(signmask, vreinterpretq_f32_m128(a),
+                                     vdupq_n_f32(0.5f)); /* +/- 0.5 */
+        int32x4_t r_normal = vcvtq_s32_f32(vaddq_f32(
+            vreinterpretq_f32_m128(a), half)); /* round to integer: [a + 0.5]*/
+        int32x4_t r_trunc = vcvtq_s32_f32(
+            vreinterpretq_f32_m128(a)); /* truncate to integer: [a] */
+        int32x4_t plusone = vreinterpretq_s32_u32(vshrq_n_u32(
+            vreinterpretq_u32_s32(vnegq_s32(r_trunc)), 31)); /* 1 or 0 */
+        int32x4_t r_even = vbicq_s32(vaddq_s32(r_trunc, plusone),
+                                     vdupq_n_s32(1)); /* ([a] + {0,1}) & ~1 */
+        float32x4_t delta = vsubq_f32(
+            vreinterpretq_f32_m128(a),
+            vcvtq_f32_s32(r_trunc)); /* compute delta: delta = (a - [a]) */
+        uint32x4_t is_delta_half =
+            vceqq_f32(delta, half); /* delta == +/- 0.5 */
+        return vreinterpretq_m128_f32(
+            vcvtq_f32_s32(vbslq_s32(is_delta_half, r_even, r_normal)));
+    } else if (rounding == (_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC) ||
+               (rounding == _MM_FROUND_CUR_DIRECTION &&
+                _MM_GET_ROUNDING_MODE() == _MM_ROUND_DOWN)) {
+        return _mm_floor_ps(a);
+    } else if (rounding == (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC) ||
+               (rounding == _MM_FROUND_CUR_DIRECTION &&
+                _MM_GET_ROUNDING_MODE() == _MM_ROUND_UP)) {
+        return _mm_ceil_ps(a);
+    }
+    return _mm_set_ps(v_float[3] > 0 ? floorf(v_float[3]) : ceilf(v_float[3]),
+                      v_float[2] > 0 ? floorf(v_float[2]) : ceilf(v_float[2]),
+                      v_float[1] > 0 ? floorf(v_float[1]) : ceilf(v_float[1]),
+                      v_float[0] > 0 ? floorf(v_float[0]) : ceilf(v_float[0]));
+#endif
+}
+
+// Round the lower double-precision (64-bit) floating-point element in b using
+// the rounding parameter, store the result as a double-precision floating-point
+// element in the lower element of dst, and copy the upper element from a to the
+// upper element of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_round_sd
+FORCE_INLINE __m128d _mm_round_sd(__m128d a, __m128d b, int rounding)
+{
+    return _mm_move_sd(a, _mm_round_pd(b, rounding));
+}
+
+// Round the lower single-precision (32-bit) floating-point element in b using
+// the rounding parameter, store the result as a single-precision floating-point
+// element in the lower element of dst, and copy the upper 3 packed elements
+// from a to the upper elements of dst. Rounding is done according to the
+// rounding[3:0] parameter, which can be one of:
+//     (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and
+//     suppress exceptions
+//     (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC)     // round down, and
+//     suppress exceptions
+//     (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC)     // round up, and suppress
+//     exceptions
+//     (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC)        // truncate, and suppress
+//     exceptions _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see
+//     _MM_SET_ROUNDING_MODE
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_round_ss
+FORCE_INLINE __m128 _mm_round_ss(__m128 a, __m128 b, int rounding)
+{
+    return _mm_move_ss(a, _mm_round_ps(b, rounding));
+}
+
+// Load 128-bits of integer data from memory into dst using a non-temporal
+// memory hint. mem_addr must be aligned on a 16-byte boundary or a
+// general-protection exception may be generated.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_load_si128
+FORCE_INLINE __m128i _mm_stream_load_si128(__m128i *p)
+{
+#if __has_builtin(__builtin_nontemporal_store)
+    return __builtin_nontemporal_load(p);
+#else
+    return vreinterpretq_m128i_s64(vld1q_s64((int64_t *) p));
+#endif
+}
+
+// Compute the bitwise NOT of a and then AND with a 128-bit vector containing
+// all 1's, and return 1 if the result is zero, otherwise return 0.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_all_ones
+FORCE_INLINE int _mm_test_all_ones(__m128i a)
+{
+    return (uint64_t) (vgetq_lane_s64(a, 0) & vgetq_lane_s64(a, 1)) ==
+           ~(uint64_t) 0;
+}
+
+// Compute the bitwise AND of 128 bits (representing integer data) in a and
+// mask, and return 1 if the result is zero, otherwise return 0.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_all_zeros
+FORCE_INLINE int _mm_test_all_zeros(__m128i a, __m128i mask)
+{
+    int64x2_t a_and_mask =
+        vandq_s64(vreinterpretq_s64_m128i(a), vreinterpretq_s64_m128i(mask));
+    return !(vgetq_lane_s64(a_and_mask, 0) | vgetq_lane_s64(a_and_mask, 1));
+}
+
+// Compute the bitwise AND of 128 bits (representing integer data) in a and
+// mask, and set ZF to 1 if the result is zero, otherwise set ZF to 0. Compute
+// the bitwise NOT of a and then AND with mask, and set CF to 1 if the result is
+// zero, otherwise set CF to 0. Return 1 if both the ZF and CF values are zero,
+// otherwise return 0.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_test_mix_ones_zero
+// Note: Argument names may be wrong in the Intel intrinsics guide.
+FORCE_INLINE int _mm_test_mix_ones_zeros(__m128i a, __m128i mask)
+{
+    uint64x2_t v = vreinterpretq_u64_m128i(a);
+    uint64x2_t m = vreinterpretq_u64_m128i(mask);
+
+    // find ones (set-bits) and zeros (clear-bits) under clip mask
+    uint64x2_t ones = vandq_u64(m, v);
+    uint64x2_t zeros = vbicq_u64(m, v);
+
+    // If both 128-bit variables are populated (non-zero) then return 1.
+    // For comparison purposes, first compact each var down to 32-bits.
+    uint32x2_t reduced = vpmax_u32(vqmovn_u64(ones), vqmovn_u64(zeros));
+
+    // if folding minimum is non-zero then both vars must be non-zero
+    return (vget_lane_u32(vpmin_u32(reduced, reduced), 0) != 0);
+}
+
+// Compute the bitwise AND of 128 bits (representing integer data) in a and b,
+// and set ZF to 1 if the result is zero, otherwise set ZF to 0. Compute the
+// bitwise NOT of a and then AND with b, and set CF to 1 if the result is zero,
+// otherwise set CF to 0. Return the CF value.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testc_si128
+FORCE_INLINE int _mm_testc_si128(__m128i a, __m128i b)
+{
+    int64x2_t s64 =
+        vbicq_s64(vreinterpretq_s64_m128i(b), vreinterpretq_s64_m128i(a));
+    return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1));
+}
+
+// Compute the bitwise AND of 128 bits (representing integer data) in a and b,
+// and set ZF to 1 if the result is zero, otherwise set ZF to 0. Compute the
+// bitwise NOT of a and then AND with b, and set CF to 1 if the result is zero,
+// otherwise set CF to 0. Return 1 if both the ZF and CF values are zero,
+// otherwise return 0.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testnzc_si128
+#define _mm_testnzc_si128(a, b) _mm_test_mix_ones_zeros(a, b)
+
+// Compute the bitwise AND of 128 bits (representing integer data) in a and b,
+// and set ZF to 1 if the result is zero, otherwise set ZF to 0. Compute the
+// bitwise NOT of a and then AND with b, and set CF to 1 if the result is zero,
+// otherwise set CF to 0. Return the ZF value.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testz_si128
+FORCE_INLINE int _mm_testz_si128(__m128i a, __m128i b)
+{
+    int64x2_t s64 =
+        vandq_s64(vreinterpretq_s64_m128i(a), vreinterpretq_s64_m128i(b));
+    return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1));
+}
+
+/* SSE4.2 */
+
+static const uint16_t ALIGN_STRUCT(16) _sse2neon_cmpestr_mask16b[8] = {
+    0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
+};
+static const uint8_t ALIGN_STRUCT(16) _sse2neon_cmpestr_mask8b[16] = {
+    0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
+    0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
+};
+
+/* specify the source data format */
+#define _SIDD_UBYTE_OPS 0x00 /* unsigned 8-bit characters */
+#define _SIDD_UWORD_OPS 0x01 /* unsigned 16-bit characters */
+#define _SIDD_SBYTE_OPS 0x02 /* signed 8-bit characters */
+#define _SIDD_SWORD_OPS 0x03 /* signed 16-bit characters */
+
+/* specify the comparison operation */
+#define _SIDD_CMP_EQUAL_ANY 0x00     /* compare equal any: strchr */
+#define _SIDD_CMP_RANGES 0x04        /* compare ranges */
+#define _SIDD_CMP_EQUAL_EACH 0x08    /* compare equal each: strcmp */
+#define _SIDD_CMP_EQUAL_ORDERED 0x0C /* compare equal ordered */
+
+/* specify the polarity */
+#define _SIDD_POSITIVE_POLARITY 0x00
+#define _SIDD_MASKED_POSITIVE_POLARITY 0x20
+#define _SIDD_NEGATIVE_POLARITY 0x10 /* negate results */
+#define _SIDD_MASKED_NEGATIVE_POLARITY \
+    0x30 /* negate results only before end of string */
+
+/* specify the output selection in _mm_cmpXstri */
+#define _SIDD_LEAST_SIGNIFICANT 0x00
+#define _SIDD_MOST_SIGNIFICANT 0x40
+
+/* specify the output selection in _mm_cmpXstrm */
+#define _SIDD_BIT_MASK 0x00
+#define _SIDD_UNIT_MASK 0x40
+
+/* Pattern Matching for C macros.
+ * https://github.com/pfultz2/Cloak/wiki/C-Preprocessor-tricks,-tips,-and-idioms
+ */
+
+/* catenate */
+#define SSE2NEON_PRIMITIVE_CAT(a, ...) a##__VA_ARGS__
+#define SSE2NEON_CAT(a, b) SSE2NEON_PRIMITIVE_CAT(a, b)
+
+#define SSE2NEON_IIF(c) SSE2NEON_PRIMITIVE_CAT(SSE2NEON_IIF_, c)
+/* run the 2nd parameter */
+#define SSE2NEON_IIF_0(t, ...) __VA_ARGS__
+/* run the 1st parameter */
+#define SSE2NEON_IIF_1(t, ...) t
+
+#define SSE2NEON_COMPL(b) SSE2NEON_PRIMITIVE_CAT(SSE2NEON_COMPL_, b)
+#define SSE2NEON_COMPL_0 1
+#define SSE2NEON_COMPL_1 0
+
+#define SSE2NEON_DEC(x) SSE2NEON_PRIMITIVE_CAT(SSE2NEON_DEC_, x)
+#define SSE2NEON_DEC_1 0
+#define SSE2NEON_DEC_2 1
+#define SSE2NEON_DEC_3 2
+#define SSE2NEON_DEC_4 3
+#define SSE2NEON_DEC_5 4
+#define SSE2NEON_DEC_6 5
+#define SSE2NEON_DEC_7 6
+#define SSE2NEON_DEC_8 7
+#define SSE2NEON_DEC_9 8
+#define SSE2NEON_DEC_10 9
+#define SSE2NEON_DEC_11 10
+#define SSE2NEON_DEC_12 11
+#define SSE2NEON_DEC_13 12
+#define SSE2NEON_DEC_14 13
+#define SSE2NEON_DEC_15 14
+#define SSE2NEON_DEC_16 15
+
+/* detection */
+#define SSE2NEON_CHECK_N(x, n, ...) n
+#define SSE2NEON_CHECK(...) SSE2NEON_CHECK_N(__VA_ARGS__, 0, )
+#define SSE2NEON_PROBE(x) x, 1,
+
+#define SSE2NEON_NOT(x) SSE2NEON_CHECK(SSE2NEON_PRIMITIVE_CAT(SSE2NEON_NOT_, x))
+#define SSE2NEON_NOT_0 SSE2NEON_PROBE(~)
+
+#define SSE2NEON_BOOL(x) SSE2NEON_COMPL(SSE2NEON_NOT(x))
+#define SSE2NEON_IF(c) SSE2NEON_IIF(SSE2NEON_BOOL(c))
+
+#define SSE2NEON_EAT(...)
+#define SSE2NEON_EXPAND(...) __VA_ARGS__
+#define SSE2NEON_WHEN(c) SSE2NEON_IF(c)(SSE2NEON_EXPAND, SSE2NEON_EAT)
+
+/* recursion */
+/* deferred expression */
+#define SSE2NEON_EMPTY()
+#define SSE2NEON_DEFER(id) id SSE2NEON_EMPTY()
+#define SSE2NEON_OBSTRUCT(...) __VA_ARGS__ SSE2NEON_DEFER(SSE2NEON_EMPTY)()
+#define SSE2NEON_EXPAND(...) __VA_ARGS__
+
+#define SSE2NEON_EVAL(...) \
+    SSE2NEON_EVAL1(SSE2NEON_EVAL1(SSE2NEON_EVAL1(__VA_ARGS__)))
+#define SSE2NEON_EVAL1(...) \
+    SSE2NEON_EVAL2(SSE2NEON_EVAL2(SSE2NEON_EVAL2(__VA_ARGS__)))
+#define SSE2NEON_EVAL2(...) \
+    SSE2NEON_EVAL3(SSE2NEON_EVAL3(SSE2NEON_EVAL3(__VA_ARGS__)))
+#define SSE2NEON_EVAL3(...) __VA_ARGS__
+
+#define SSE2NEON_REPEAT(count, macro, ...)                         \
+    SSE2NEON_WHEN(count)                                           \
+    (SSE2NEON_OBSTRUCT(SSE2NEON_REPEAT_INDIRECT)()(                \
+        SSE2NEON_DEC(count), macro,                                \
+        __VA_ARGS__) SSE2NEON_OBSTRUCT(macro)(SSE2NEON_DEC(count), \
+                                              __VA_ARGS__))
+#define SSE2NEON_REPEAT_INDIRECT() SSE2NEON_REPEAT
+
+#define SSE2NEON_SIZE_OF_byte 8
+#define SSE2NEON_NUMBER_OF_LANES_byte 16
+#define SSE2NEON_SIZE_OF_word 16
+#define SSE2NEON_NUMBER_OF_LANES_word 8
+
+#define SSE2NEON_COMPARE_EQUAL_THEN_FILL_LANE(i, type)                         \
+    mtx[i] = vreinterpretq_m128i_##type(vceqq_##type(                          \
+        vdupq_n_##type(vgetq_lane_##type(vreinterpretq_##type##_m128i(b), i)), \
+        vreinterpretq_##type##_m128i(a)));
+
+#define SSE2NEON_FILL_LANE(i, type) \
+    vec_b[i] =                      \
+        vdupq_n_##type(vgetq_lane_##type(vreinterpretq_##type##_m128i(b), i));
+
+#define PCMPSTR_RANGES(a, b, mtx, data_type_prefix, type_prefix, size,        \
+                       number_of_lanes, byte_or_word)                         \
+    do {                                                                      \
+        SSE2NEON_CAT(                                                         \
+            data_type_prefix,                                                 \
+            SSE2NEON_CAT(size,                                                \
+                         SSE2NEON_CAT(x, SSE2NEON_CAT(number_of_lanes, _t)))) \
+        vec_b[number_of_lanes];                                               \
+        __m128i mask = SSE2NEON_IIF(byte_or_word)(                            \
+            vreinterpretq_m128i_u16(vdupq_n_u16(0xff)),                       \
+            vreinterpretq_m128i_u32(vdupq_n_u32(0xffff)));                    \
+        SSE2NEON_EVAL(SSE2NEON_REPEAT(number_of_lanes, SSE2NEON_FILL_LANE,    \
+                                      SSE2NEON_CAT(type_prefix, size)))       \
+        for (int i = 0; i < number_of_lanes; i++) {                           \
+            mtx[i] = SSE2NEON_CAT(vreinterpretq_m128i_u,                      \
+                                  size)(SSE2NEON_CAT(vbslq_u, size)(          \
+                SSE2NEON_CAT(vreinterpretq_u,                                 \
+                             SSE2NEON_CAT(size, _m128i))(mask),               \
+                SSE2NEON_CAT(vcgeq_, SSE2NEON_CAT(type_prefix, size))(        \
+                    vec_b[i],                                                 \
+                    SSE2NEON_CAT(                                             \
+                        vreinterpretq_,                                       \
+                        SSE2NEON_CAT(type_prefix,                             \
+                                     SSE2NEON_CAT(size, _m128i(a))))),        \
+                SSE2NEON_CAT(vcleq_, SSE2NEON_CAT(type_prefix, size))(        \
+                    vec_b[i],                                                 \
+                    SSE2NEON_CAT(                                             \
+                        vreinterpretq_,                                       \
+                        SSE2NEON_CAT(type_prefix,                             \
+                                     SSE2NEON_CAT(size, _m128i(a)))))));      \
+        }                                                                     \
+    } while (0)
+
+#define PCMPSTR_EQ(a, b, mtx, size, number_of_lanes)                         \
+    do {                                                                     \
+        SSE2NEON_EVAL(SSE2NEON_REPEAT(number_of_lanes,                       \
+                                      SSE2NEON_COMPARE_EQUAL_THEN_FILL_LANE, \
+                                      SSE2NEON_CAT(u, size)))                \
+    } while (0)
+
+#define SSE2NEON_CMP_EQUAL_ANY_IMPL(type)                                     \
+    static int _sse2neon_cmp_##type##_equal_any(__m128i a, int la, __m128i b, \
+                                                int lb)                       \
+    {                                                                         \
+        __m128i mtx[16];                                                      \
+        PCMPSTR_EQ(a, b, mtx, SSE2NEON_CAT(SSE2NEON_SIZE_OF_, type),          \
+                   SSE2NEON_CAT(SSE2NEON_NUMBER_OF_LANES_, type));            \
+        return SSE2NEON_CAT(                                                  \
+            _sse2neon_aggregate_equal_any_,                                   \
+            SSE2NEON_CAT(                                                     \
+                SSE2NEON_CAT(SSE2NEON_SIZE_OF_, type),                        \
+                SSE2NEON_CAT(x, SSE2NEON_CAT(SSE2NEON_NUMBER_OF_LANES_,       \
+                                             type))))(la, lb, mtx);           \
+    }
+
+#define SSE2NEON_CMP_RANGES_IMPL(type, data_type, us, byte_or_word)            \
+    static int _sse2neon_cmp_##us##type##_ranges(__m128i a, int la, __m128i b, \
+                                                 int lb)                       \
+    {                                                                          \
+        __m128i mtx[16];                                                       \
+        PCMPSTR_RANGES(                                                        \
+            a, b, mtx, data_type, us, SSE2NEON_CAT(SSE2NEON_SIZE_OF_, type),   \
+            SSE2NEON_CAT(SSE2NEON_NUMBER_OF_LANES_, type), byte_or_word);      \
+        return SSE2NEON_CAT(                                                   \
+            _sse2neon_aggregate_ranges_,                                       \
+            SSE2NEON_CAT(                                                      \
+                SSE2NEON_CAT(SSE2NEON_SIZE_OF_, type),                         \
+                SSE2NEON_CAT(x, SSE2NEON_CAT(SSE2NEON_NUMBER_OF_LANES_,        \
+                                             type))))(la, lb, mtx);            \
+    }
+
+#define SSE2NEON_CMP_EQUAL_ORDERED_IMPL(type)                                  \
+    static int _sse2neon_cmp_##type##_equal_ordered(__m128i a, int la,         \
+                                                    __m128i b, int lb)         \
+    {                                                                          \
+        __m128i mtx[16];                                                       \
+        PCMPSTR_EQ(a, b, mtx, SSE2NEON_CAT(SSE2NEON_SIZE_OF_, type),           \
+                   SSE2NEON_CAT(SSE2NEON_NUMBER_OF_LANES_, type));             \
+        return SSE2NEON_CAT(                                                   \
+            _sse2neon_aggregate_equal_ordered_,                                \
+            SSE2NEON_CAT(                                                      \
+                SSE2NEON_CAT(SSE2NEON_SIZE_OF_, type),                         \
+                SSE2NEON_CAT(x,                                                \
+                             SSE2NEON_CAT(SSE2NEON_NUMBER_OF_LANES_, type))))( \
+            SSE2NEON_CAT(SSE2NEON_NUMBER_OF_LANES_, type), la, lb, mtx);       \
+    }
+
+static int _sse2neon_aggregate_equal_any_8x16(int la, int lb, __m128i mtx[16])
+{
+    int res = 0;
+    int m = (1 << la) - 1;
+    uint8x8_t vec_mask = vld1_u8(_sse2neon_cmpestr_mask8b);
+    uint8x8_t t_lo = vtst_u8(vdup_n_u8(m & 0xff), vec_mask);
+    uint8x8_t t_hi = vtst_u8(vdup_n_u8(m >> 8), vec_mask);
+    uint8x16_t vec = vcombine_u8(t_lo, t_hi);
+    for (int j = 0; j < lb; j++) {
+        mtx[j] = vreinterpretq_m128i_u8(
+            vandq_u8(vec, vreinterpretq_u8_m128i(mtx[j])));
+        mtx[j] = vreinterpretq_m128i_u8(
+            vshrq_n_u8(vreinterpretq_u8_m128i(mtx[j]), 7));
+        int tmp = _sse2neon_vaddvq_u8(vreinterpretq_u8_m128i(mtx[j])) ? 1 : 0;
+        res |= (tmp << j);
+    }
+    return res;
+}
+
+static int _sse2neon_aggregate_equal_any_16x8(int la, int lb, __m128i mtx[16])
+{
+    int res = 0;
+    int m = (1 << la) - 1;
+    uint16x8_t vec =
+        vtstq_u16(vdupq_n_u16(m), vld1q_u16(_sse2neon_cmpestr_mask16b));
+    for (int j = 0; j < lb; j++) {
+        mtx[j] = vreinterpretq_m128i_u16(
+            vandq_u16(vec, vreinterpretq_u16_m128i(mtx[j])));
+        mtx[j] = vreinterpretq_m128i_u16(
+            vshrq_n_u16(vreinterpretq_u16_m128i(mtx[j]), 15));
+        int tmp = _sse2neon_vaddvq_u16(vreinterpretq_u16_m128i(mtx[j])) ? 1 : 0;
+        res |= (tmp << j);
+    }
+    return res;
+}
+
+/* clang-format off */
+#define SSE2NEON_GENERATE_CMP_EQUAL_ANY(prefix) \
+    prefix##IMPL(byte) \
+    prefix##IMPL(word)
+/* clang-format on */
+
+SSE2NEON_GENERATE_CMP_EQUAL_ANY(SSE2NEON_CMP_EQUAL_ANY_)
+
+static int _sse2neon_aggregate_ranges_16x8(int la, int lb, __m128i mtx[16])
+{
+    int res = 0;
+    int m = (1 << la) - 1;
+    uint16x8_t vec =
+        vtstq_u16(vdupq_n_u16(m), vld1q_u16(_sse2neon_cmpestr_mask16b));
+    for (int j = 0; j < lb; j++) {
+        mtx[j] = vreinterpretq_m128i_u16(
+            vandq_u16(vec, vreinterpretq_u16_m128i(mtx[j])));
+        mtx[j] = vreinterpretq_m128i_u16(
+            vshrq_n_u16(vreinterpretq_u16_m128i(mtx[j]), 15));
+        __m128i tmp = vreinterpretq_m128i_u32(
+            vshrq_n_u32(vreinterpretq_u32_m128i(mtx[j]), 16));
+        uint32x4_t vec_res = vandq_u32(vreinterpretq_u32_m128i(mtx[j]),
+                                       vreinterpretq_u32_m128i(tmp));
+        int t = vaddvq_u32(vec_res) ? 1 : 0;
+        res |= (t << j);
+    }
+    return res;
+}
+
+static int _sse2neon_aggregate_ranges_8x16(int la, int lb, __m128i mtx[16])
+{
+    int res = 0;
+    int m = (1 << la) - 1;
+    uint8x8_t vec_mask = vld1_u8(_sse2neon_cmpestr_mask8b);
+    uint8x8_t t_lo = vtst_u8(vdup_n_u8(m & 0xff), vec_mask);
+    uint8x8_t t_hi = vtst_u8(vdup_n_u8(m >> 8), vec_mask);
+    uint8x16_t vec = vcombine_u8(t_lo, t_hi);
+    for (int j = 0; j < lb; j++) {
+        mtx[j] = vreinterpretq_m128i_u8(
+            vandq_u8(vec, vreinterpretq_u8_m128i(mtx[j])));
+        mtx[j] = vreinterpretq_m128i_u8(
+            vshrq_n_u8(vreinterpretq_u8_m128i(mtx[j]), 7));
+        __m128i tmp = vreinterpretq_m128i_u16(
+            vshrq_n_u16(vreinterpretq_u16_m128i(mtx[j]), 8));
+        uint16x8_t vec_res = vandq_u16(vreinterpretq_u16_m128i(mtx[j]),
+                                       vreinterpretq_u16_m128i(tmp));
+        int t = _sse2neon_vaddvq_u16(vec_res) ? 1 : 0;
+        res |= (t << j);
+    }
+    return res;
+}
+
+#define SSE2NEON_CMP_RANGES_IS_BYTE 1
+#define SSE2NEON_CMP_RANGES_IS_WORD 0
+
+/* clang-format off */
+#define SSE2NEON_GENERATE_CMP_RANGES(prefix)             \
+    prefix##IMPL(byte, uint, u, prefix##IS_BYTE)         \
+    prefix##IMPL(byte, int, s, prefix##IS_BYTE)          \
+    prefix##IMPL(word, uint, u, prefix##IS_WORD)         \
+    prefix##IMPL(word, int, s, prefix##IS_WORD)
+/* clang-format on */
+
+SSE2NEON_GENERATE_CMP_RANGES(SSE2NEON_CMP_RANGES_)
+
+#undef SSE2NEON_CMP_RANGES_IS_BYTE
+#undef SSE2NEON_CMP_RANGES_IS_WORD
+
+static int _sse2neon_cmp_byte_equal_each(__m128i a, int la, __m128i b, int lb)
+{
+    uint8x16_t mtx =
+        vceqq_u8(vreinterpretq_u8_m128i(a), vreinterpretq_u8_m128i(b));
+    int m0 = (la < lb) ? 0 : ((1 << la) - (1 << lb));
+    int m1 = 0x10000 - (1 << la);
+    int tb = 0x10000 - (1 << lb);
+    uint8x8_t vec_mask, vec0_lo, vec0_hi, vec1_lo, vec1_hi;
+    uint8x8_t tmp_lo, tmp_hi, res_lo, res_hi;
+    vec_mask = vld1_u8(_sse2neon_cmpestr_mask8b);
+    vec0_lo = vtst_u8(vdup_n_u8(m0), vec_mask);
+    vec0_hi = vtst_u8(vdup_n_u8(m0 >> 8), vec_mask);
+    vec1_lo = vtst_u8(vdup_n_u8(m1), vec_mask);
+    vec1_hi = vtst_u8(vdup_n_u8(m1 >> 8), vec_mask);
+    tmp_lo = vtst_u8(vdup_n_u8(tb), vec_mask);
+    tmp_hi = vtst_u8(vdup_n_u8(tb >> 8), vec_mask);
+
+    res_lo = vbsl_u8(vec0_lo, vdup_n_u8(0), vget_low_u8(mtx));
+    res_hi = vbsl_u8(vec0_hi, vdup_n_u8(0), vget_high_u8(mtx));
+    res_lo = vbsl_u8(vec1_lo, tmp_lo, res_lo);
+    res_hi = vbsl_u8(vec1_hi, tmp_hi, res_hi);
+    res_lo = vand_u8(res_lo, vec_mask);
+    res_hi = vand_u8(res_hi, vec_mask);
+
+    int res = _sse2neon_vaddv_u8(res_lo) + (_sse2neon_vaddv_u8(res_hi) << 8);
+    return res;
+}
+
+static int _sse2neon_cmp_word_equal_each(__m128i a, int la, __m128i b, int lb)
+{
+    uint16x8_t mtx =
+        vceqq_u16(vreinterpretq_u16_m128i(a), vreinterpretq_u16_m128i(b));
+    int m0 = (la < lb) ? 0 : ((1 << la) - (1 << lb));
+    int m1 = 0x100 - (1 << la);
+    int tb = 0x100 - (1 << lb);
+    uint16x8_t vec_mask = vld1q_u16(_sse2neon_cmpestr_mask16b);
+    uint16x8_t vec0 = vtstq_u16(vdupq_n_u16(m0), vec_mask);
+    uint16x8_t vec1 = vtstq_u16(vdupq_n_u16(m1), vec_mask);
+    uint16x8_t tmp = vtstq_u16(vdupq_n_u16(tb), vec_mask);
+    mtx = vbslq_u16(vec0, vdupq_n_u16(0), mtx);
+    mtx = vbslq_u16(vec1, tmp, mtx);
+    mtx = vandq_u16(mtx, vec_mask);
+    return _sse2neon_vaddvq_u16(mtx);
+}
+
+#define SSE2NEON_AGGREGATE_EQUAL_ORDER_IS_UBYTE 1
+#define SSE2NEON_AGGREGATE_EQUAL_ORDER_IS_UWORD 0
+
+#define SSE2NEON_AGGREGATE_EQUAL_ORDER_IMPL(size, number_of_lanes, data_type)  \
+    static int _sse2neon_aggregate_equal_ordered_##size##x##number_of_lanes(   \
+        int bound, int la, int lb, __m128i mtx[16])                            \
+    {                                                                          \
+        int res = 0;                                                           \
+        int m1 = SSE2NEON_IIF(data_type)(0x10000, 0x100) - (1 << la);          \
+        uint##size##x8_t vec_mask = SSE2NEON_IIF(data_type)(                   \
+            vld1_u##size(_sse2neon_cmpestr_mask##size##b),                     \
+            vld1q_u##size(_sse2neon_cmpestr_mask##size##b));                   \
+        uint##size##x##number_of_lanes##_t vec1 = SSE2NEON_IIF(data_type)(     \
+            vcombine_u##size(vtst_u##size(vdup_n_u##size(m1), vec_mask),       \
+                             vtst_u##size(vdup_n_u##size(m1 >> 8), vec_mask)), \
+            vtstq_u##size(vdupq_n_u##size(m1), vec_mask));                     \
+        uint##size##x##number_of_lanes##_t vec_minusone = vdupq_n_u##size(-1); \
+        uint##size##x##number_of_lanes##_t vec_zero = vdupq_n_u##size(0);      \
+        for (int j = 0; j < lb; j++) {                                         \
+            mtx[j] = vreinterpretq_m128i_u##size(vbslq_u##size(                \
+                vec1, vec_minusone, vreinterpretq_u##size##_m128i(mtx[j])));   \
+        }                                                                      \
+        for (int j = lb; j < bound; j++) {                                     \
+            mtx[j] = vreinterpretq_m128i_u##size(                              \
+                vbslq_u##size(vec1, vec_minusone, vec_zero));                  \
+        }                                                                      \
+        unsigned SSE2NEON_IIF(data_type)(char, short) *ptr =                   \
+            (unsigned SSE2NEON_IIF(data_type)(char, short) *) mtx;             \
+        for (int i = 0; i < bound; i++) {                                      \
+            int val = 1;                                                       \
+            for (int j = 0, k = i; j < bound - i && k < bound; j++, k++)       \
+                val &= ptr[k * bound + j];                                     \
+            res += val << i;                                                   \
+        }                                                                      \
+        return res;                                                            \
+    }
+
+/* clang-format off */
+#define SSE2NEON_GENERATE_AGGREGATE_EQUAL_ORDER(prefix) \
+    prefix##IMPL(8, 16, prefix##IS_UBYTE)               \
+    prefix##IMPL(16, 8, prefix##IS_UWORD)
+/* clang-format on */
+
+SSE2NEON_GENERATE_AGGREGATE_EQUAL_ORDER(SSE2NEON_AGGREGATE_EQUAL_ORDER_)
+
+#undef SSE2NEON_AGGREGATE_EQUAL_ORDER_IS_UBYTE
+#undef SSE2NEON_AGGREGATE_EQUAL_ORDER_IS_UWORD
+
+/* clang-format off */
+#define SSE2NEON_GENERATE_CMP_EQUAL_ORDERED(prefix) \
+    prefix##IMPL(byte)                              \
+    prefix##IMPL(word)
+/* clang-format on */
+
+SSE2NEON_GENERATE_CMP_EQUAL_ORDERED(SSE2NEON_CMP_EQUAL_ORDERED_)
+
+#define SSE2NEON_CMPESTR_LIST                          \
+    _(CMP_UBYTE_EQUAL_ANY, cmp_byte_equal_any)         \
+    _(CMP_UWORD_EQUAL_ANY, cmp_word_equal_any)         \
+    _(CMP_SBYTE_EQUAL_ANY, cmp_byte_equal_any)         \
+    _(CMP_SWORD_EQUAL_ANY, cmp_word_equal_any)         \
+    _(CMP_UBYTE_RANGES, cmp_ubyte_ranges)              \
+    _(CMP_UWORD_RANGES, cmp_uword_ranges)              \
+    _(CMP_SBYTE_RANGES, cmp_sbyte_ranges)              \
+    _(CMP_SWORD_RANGES, cmp_sword_ranges)              \
+    _(CMP_UBYTE_EQUAL_EACH, cmp_byte_equal_each)       \
+    _(CMP_UWORD_EQUAL_EACH, cmp_word_equal_each)       \
+    _(CMP_SBYTE_EQUAL_EACH, cmp_byte_equal_each)       \
+    _(CMP_SWORD_EQUAL_EACH, cmp_word_equal_each)       \
+    _(CMP_UBYTE_EQUAL_ORDERED, cmp_byte_equal_ordered) \
+    _(CMP_UWORD_EQUAL_ORDERED, cmp_word_equal_ordered) \
+    _(CMP_SBYTE_EQUAL_ORDERED, cmp_byte_equal_ordered) \
+    _(CMP_SWORD_EQUAL_ORDERED, cmp_word_equal_ordered)
+
+enum {
+#define _(name, func_suffix) name,
+    SSE2NEON_CMPESTR_LIST
+#undef _
+};
+typedef int (*cmpestr_func_t)(__m128i a, int la, __m128i b, int lb);
+static cmpestr_func_t _sse2neon_cmpfunc_table[] = {
+#define _(name, func_suffix) _sse2neon_##func_suffix,
+    SSE2NEON_CMPESTR_LIST
+#undef _
+};
+
+FORCE_INLINE int _sse2neon_sido_negative(int res, int lb, int imm8, int bound)
+{
+    switch (imm8 & 0x30) {
+    case _SIDD_NEGATIVE_POLARITY:
+        res ^= 0xffffffff;
+        break;
+    case _SIDD_MASKED_NEGATIVE_POLARITY:
+        res ^= (1 << lb) - 1;
+        break;
+    default:
+        break;
+    }
+
+    return res & ((bound == 8) ? 0xFF : 0xFFFF);
+}
+
+FORCE_INLINE int _sse2neon_clz(unsigned int x)
+{
+    return x != 0 ? __builtin_clz(x) : 32;
+}
+
+FORCE_INLINE int _sse2neon_ctz(unsigned int x)
+{
+    return x != 0 ? __builtin_ctz(x) : 32;
+}
+
+FORCE_INLINE int _sse2neon_ctzll(unsigned long long x)
+{
+//#ifdef _MSC_VER
+//    unsigned long cnt;
+//#if defined(SSE2NEON_HAS_BITSCAN64)
+//    if (_BitScanForward64(&cnt, x))
+//        return (int) (cnt);
+//#else
+//    if (_BitScanForward(&cnt, (unsigned long) (x)))
+//        return (int) cnt;
+//    if (_BitScanForward(&cnt, (unsigned long) (x >> 32)))
+//        return (int) (cnt + 32);
+//#endif /* SSE2NEON_HAS_BITSCAN64 */
+//    return 64;
+//#else /* assume GNU compatible compilers */
+    return x != 0 ? __builtin_ctzll(x) : 64;
+//#endif
+}
+
+#define SSE2NEON_MIN(x, y) (x) < (y) ? (x) : (y)
+
+#define SSE2NEON_CMPSTR_SET_UPPER(var, imm) \
+    const int var = (imm & 0x01) ? 8 : 16
+
+#define SSE2NEON_CMPESTRX_LEN_PAIR(a, b, la, lb) \
+    int tmp1 = la ^ (la >> 31);                  \
+    la = tmp1 - (la >> 31);                      \
+    int tmp2 = lb ^ (lb >> 31);                  \
+    lb = tmp2 - (lb >> 31);                      \
+    la = SSE2NEON_MIN(la, bound);                \
+    lb = SSE2NEON_MIN(lb, bound)
+
+// Compare all pairs of character in string a and b,
+// then aggregate the result.
+// As the only difference of PCMPESTR* and PCMPISTR* is the way to calculate the
+// length of string, we use SSE2NEON_CMP{I,E}STRX_GET_LEN to get the length of
+// string a and b.
+#define SSE2NEON_COMP_AGG(a, b, la, lb, imm8, IE)                  \
+    SSE2NEON_CMPSTR_SET_UPPER(bound, imm8);                        \
+    SSE2NEON_##IE##_LEN_PAIR(a, b, la, lb);                        \
+    int r2 = (_sse2neon_cmpfunc_table[imm8 & 0x0f])(a, la, b, lb); \
+    r2 = _sse2neon_sido_negative(r2, lb, imm8, bound)
+
+#define SSE2NEON_CMPSTR_GENERATE_INDEX(r2, bound, imm8)          \
+    return (r2 == 0) ? bound                                     \
+                     : ((imm8 & 0x40) ? (31 - _sse2neon_clz(r2)) \
+                                      : _sse2neon_ctz(r2))
+
+#define SSE2NEON_CMPSTR_GENERATE_MASK(dst)                                     \
+    __m128i dst = vreinterpretq_m128i_u8(vdupq_n_u8(0));                       \
+    if (imm8 & 0x40) {                                                         \
+        if (bound == 8) {                                                      \
+            uint16x8_t tmp = vtstq_u16(vdupq_n_u16(r2),                        \
+                                       vld1q_u16(_sse2neon_cmpestr_mask16b));  \
+            dst = vreinterpretq_m128i_u16(vbslq_u16(                           \
+                tmp, vdupq_n_u16(-1), vreinterpretq_u16_m128i(dst)));          \
+        } else {                                                               \
+            uint8x16_t vec_r2 =                                                \
+                vcombine_u8(vdup_n_u8(r2), vdup_n_u8(r2 >> 8));                \
+            uint8x16_t tmp =                                                   \
+                vtstq_u8(vec_r2, vld1q_u8(_sse2neon_cmpestr_mask8b));          \
+            dst = vreinterpretq_m128i_u8(                                      \
+                vbslq_u8(tmp, vdupq_n_u8(-1), vreinterpretq_u8_m128i(dst)));   \
+        }                                                                      \
+    } else {                                                                   \
+        if (bound == 16) {                                                     \
+            dst = vreinterpretq_m128i_u16(                                     \
+                vsetq_lane_u16(r2 & 0xffff, vreinterpretq_u16_m128i(dst), 0)); \
+        } else {                                                               \
+            dst = vreinterpretq_m128i_u8(                                      \
+                vsetq_lane_u8(r2 & 0xff, vreinterpretq_u8_m128i(dst), 0));     \
+        }                                                                      \
+    }                                                                          \
+    return dst
+
+// Compare packed strings in a and b with lengths la and lb using the control
+// in imm8, and returns 1 if b did not contain a null character and the
+// resulting mask was zero, and 0 otherwise.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestra
+FORCE_INLINE int _mm_cmpestra(__m128i a,
+                              int la,
+                              __m128i b,
+                              int lb,
+                              const int imm8)
+{
+    int lb_cpy = lb;
+    SSE2NEON_COMP_AGG(a, b, la, lb, imm8, CMPESTRX);
+    return !r2 & (lb_cpy > bound);
+}
+
+// Compare packed strings in a and b with lengths la and lb using the control in
+// imm8, and returns 1 if the resulting mask was non-zero, and 0 otherwise.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestrc
+FORCE_INLINE int _mm_cmpestrc(__m128i a,
+                              int la,
+                              __m128i b,
+                              int lb,
+                              const int imm8)
+{
+    SSE2NEON_COMP_AGG(a, b, la, lb, imm8, CMPESTRX);
+    return r2 != 0;
+}
+
+// Compare packed strings in a and b with lengths la and lb using the control
+// in imm8, and store the generated index in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestri
+FORCE_INLINE int _mm_cmpestri(__m128i a,
+                              int la,
+                              __m128i b,
+                              int lb,
+                              const int imm8)
+{
+    SSE2NEON_COMP_AGG(a, b, la, lb, imm8, CMPESTRX);
+    SSE2NEON_CMPSTR_GENERATE_INDEX(r2, bound, imm8);
+}
+
+// Compare packed strings in a and b with lengths la and lb using the control
+// in imm8, and store the generated mask in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestrm
+FORCE_INLINE __m128i
+_mm_cmpestrm(__m128i a, int la, __m128i b, int lb, const int imm8)
+{
+    SSE2NEON_COMP_AGG(a, b, la, lb, imm8, CMPESTRX);
+    SSE2NEON_CMPSTR_GENERATE_MASK(dst);
+}
+
+// Compare packed strings in a and b with lengths la and lb using the control in
+// imm8, and returns bit 0 of the resulting bit mask.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestro
+FORCE_INLINE int _mm_cmpestro(__m128i a,
+                              int la,
+                              __m128i b,
+                              int lb,
+                              const int imm8)
+{
+    SSE2NEON_COMP_AGG(a, b, la, lb, imm8, CMPESTRX);
+    return r2 & 1;
+}
+
+// Compare packed strings in a and b with lengths la and lb using the control in
+// imm8, and returns 1 if any character in a was null, and 0 otherwise.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestrs
+FORCE_INLINE int _mm_cmpestrs(__m128i a,
+                              int la,
+                              __m128i b,
+                              int lb,
+                              const int imm8)
+{
+    (void) a;
+    (void) b;
+    (void) lb;
+    SSE2NEON_CMPSTR_SET_UPPER(bound, imm8);
+    return la <= (bound - 1);
+}
+
+// Compare packed strings in a and b with lengths la and lb using the control in
+// imm8, and returns 1 if any character in b was null, and 0 otherwise.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestrz
+FORCE_INLINE int _mm_cmpestrz(__m128i a,
+                              int la,
+                              __m128i b,
+                              int lb,
+                              const int imm8)
+{
+    (void) a;
+    (void) b;
+    (void) la;
+    SSE2NEON_CMPSTR_SET_UPPER(bound, imm8);
+    return lb <= (bound - 1);
+}
+
+#define SSE2NEON_CMPISTRX_LENGTH(str, len, imm8)                         \
+    do {                                                                 \
+        if (imm8 & 0x01) {                                               \
+            uint16x8_t equal_mask_##str =                                \
+                vceqq_u16(vreinterpretq_u16_m128i(str), vdupq_n_u16(0)); \
+            uint8x8_t res_##str = vshrn_n_u16(equal_mask_##str, 4);      \
+            uint64_t matches_##str =                                     \
+                vget_lane_u64(vreinterpret_u64_u8(res_##str), 0);        \
+            len = _sse2neon_ctzll(matches_##str) >> 3;                   \
+        } else {                                                         \
+            uint16x8_t equal_mask_##str = vreinterpretq_u16_u8(          \
+                vceqq_u8(vreinterpretq_u8_m128i(str), vdupq_n_u8(0)));   \
+            uint8x8_t res_##str = vshrn_n_u16(equal_mask_##str, 4);      \
+            uint64_t matches_##str =                                     \
+                vget_lane_u64(vreinterpret_u64_u8(res_##str), 0);        \
+            len = _sse2neon_ctzll(matches_##str) >> 2;                   \
+        }                                                                \
+    } while (0)
+
+#define SSE2NEON_CMPISTRX_LEN_PAIR(a, b, la, lb) \
+    int la, lb;                                  \
+    do {                                         \
+        SSE2NEON_CMPISTRX_LENGTH(a, la, imm8);   \
+        SSE2NEON_CMPISTRX_LENGTH(b, lb, imm8);   \
+    } while (0)
+
+// Compare packed strings with implicit lengths in a and b using the control in
+// imm8, and returns 1 if b did not contain a null character and the resulting
+// mask was zero, and 0 otherwise.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistra
+FORCE_INLINE int _mm_cmpistra(__m128i a, __m128i b, const int imm8)
+{
+    SSE2NEON_COMP_AGG(a, b, la, lb, imm8, CMPISTRX);
+    return !r2 & (lb >= bound);
+}
+
+// Compare packed strings with implicit lengths in a and b using the control in
+// imm8, and returns 1 if the resulting mask was non-zero, and 0 otherwise.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistrc
+FORCE_INLINE int _mm_cmpistrc(__m128i a, __m128i b, const int imm8)
+{
+    SSE2NEON_COMP_AGG(a, b, la, lb, imm8, CMPISTRX);
+    return r2 != 0;
+}
+
+// Compare packed strings with implicit lengths in a and b using the control in
+// imm8, and store the generated index in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistri
+FORCE_INLINE int _mm_cmpistri(__m128i a, __m128i b, const int imm8)
+{
+    SSE2NEON_COMP_AGG(a, b, la, lb, imm8, CMPISTRX);
+    SSE2NEON_CMPSTR_GENERATE_INDEX(r2, bound, imm8);
+}
+
+// Compare packed strings with implicit lengths in a and b using the control in
+// imm8, and store the generated mask in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistrm
+FORCE_INLINE __m128i _mm_cmpistrm(__m128i a, __m128i b, const int imm8)
+{
+    SSE2NEON_COMP_AGG(a, b, la, lb, imm8, CMPISTRX);
+    SSE2NEON_CMPSTR_GENERATE_MASK(dst);
+}
+
+// Compare packed strings with implicit lengths in a and b using the control in
+// imm8, and returns bit 0 of the resulting bit mask.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistro
+FORCE_INLINE int _mm_cmpistro(__m128i a, __m128i b, const int imm8)
+{
+    SSE2NEON_COMP_AGG(a, b, la, lb, imm8, CMPISTRX);
+    return r2 & 1;
+}
+
+// Compare packed strings with implicit lengths in a and b using the control in
+// imm8, and returns 1 if any character in a was null, and 0 otherwise.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistrs
+FORCE_INLINE int _mm_cmpistrs(__m128i a, __m128i b, const int imm8)
+{
+    (void) b;
+    SSE2NEON_CMPSTR_SET_UPPER(bound, imm8);
+    int la;
+    SSE2NEON_CMPISTRX_LENGTH(a, la, imm8);
+    return la <= (bound - 1);
+}
+
+// Compare packed strings with implicit lengths in a and b using the control in
+// imm8, and returns 1 if any character in b was null, and 0 otherwise.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistrz
+FORCE_INLINE int _mm_cmpistrz(__m128i a, __m128i b, const int imm8)
+{
+    (void) a;
+    SSE2NEON_CMPSTR_SET_UPPER(bound, imm8);
+    int lb;
+    SSE2NEON_CMPISTRX_LENGTH(b, lb, imm8);
+    return lb <= (bound - 1);
+}
+
+// Compares the 2 signed 64-bit integers in a and the 2 signed 64-bit integers
+// in b for greater than.
+FORCE_INLINE __m128i _mm_cmpgt_epi64(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_u64(
+        vcgtq_s64(vreinterpretq_s64_m128i(a), vreinterpretq_s64_m128i(b)));
+}
+
+// Starting with the initial value in crc, accumulates a CRC32 value for
+// unsigned 16-bit integer v, and stores the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_crc32_u16
+FORCE_INLINE uint32_t _mm_crc32_u16(uint32_t crc, uint16_t v)
+{
+#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
+    __asm__ __volatile__("crc32ch %w[c], %w[c], %w[v]\n\t"
+                         : [c] "+r"(crc)
+                         : [v] "r"(v));
+#elif ((__ARM_ARCH == 8) && defined(__ARM_FEATURE_CRC32)) || \
+    (defined(_M_ARM64) && !defined(__clang__))
+    crc = __crc32ch(crc, v);
+#else
+    crc = _mm_crc32_u8(crc, v & 0xff);
+    crc = _mm_crc32_u8(crc, (v >> 8) & 0xff);
+#endif
+    return crc;
+}
+
+// Starting with the initial value in crc, accumulates a CRC32 value for
+// unsigned 32-bit integer v, and stores the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_crc32_u32
+FORCE_INLINE uint32_t _mm_crc32_u32(uint32_t crc, uint32_t v)
+{
+#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
+    __asm__ __volatile__("crc32cw %w[c], %w[c], %w[v]\n\t"
+                         : [c] "+r"(crc)
+                         : [v] "r"(v));
+#elif ((__ARM_ARCH == 8) && defined(__ARM_FEATURE_CRC32)) || \
+    (defined(_M_ARM64) && !defined(__clang__))
+    crc = __crc32cw(crc, v);
+#else
+    crc = _mm_crc32_u16(crc, v & 0xffff);
+    crc = _mm_crc32_u16(crc, (v >> 16) & 0xffff);
+#endif
+    return crc;
+}
+
+// Starting with the initial value in crc, accumulates a CRC32 value for
+// unsigned 64-bit integer v, and stores the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_crc32_u64
+FORCE_INLINE uint64_t _mm_crc32_u64(uint64_t crc, uint64_t v)
+{
+#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
+    __asm__ __volatile__("crc32cx %w[c], %w[c], %x[v]\n\t"
+                         : [c] "+r"(crc)
+                         : [v] "r"(v));
+#else
+    crc = _mm_crc32_u32((uint32_t) (crc), v & 0xffffffff);
+    crc = _mm_crc32_u32((uint32_t) (crc), (v >> 32) & 0xffffffff);
+#endif
+    return crc;
+}
+
+// Starting with the initial value in crc, accumulates a CRC32 value for
+// unsigned 8-bit integer v, and stores the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_crc32_u8
+FORCE_INLINE uint32_t _mm_crc32_u8(uint32_t crc, uint8_t v)
+{
+#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
+    __asm__ __volatile__("crc32cb %w[c], %w[c], %w[v]\n\t"
+                         : [c] "+r"(crc)
+                         : [v] "r"(v));
+#elif ((__ARM_ARCH == 8) && defined(__ARM_FEATURE_CRC32)) || \
+    (defined(_M_ARM64) && !defined(__clang__))
+    crc = __crc32cb(crc, v);
+#else
+    crc ^= v;
+#if defined(__ARM_FEATURE_CRYPTO)
+    // Adapted from: https://mary.rs/lab/crc32/
+    // Barrent reduction
+    uint64x2_t orig =
+        vcombine_u64(vcreate_u64((uint64_t) (crc) << 24), vcreate_u64(0x0));
+    uint64x2_t tmp = orig;
+
+    // Polynomial P(x) of CRC32C
+    uint64_t p = 0x105EC76F1;
+    // Barrett Reduction (in bit-reflected form) constant mu_{64} = \lfloor
+    // 2^{64} / P(x) \rfloor = 0x11f91caf6
+    uint64_t mu = 0x1dea713f1;
+
+    // Multiply by mu_{64}
+    tmp = _sse2neon_vmull_p64(vget_low_u64(tmp), vcreate_u64(mu));
+    // Divide by 2^{64} (mask away the unnecessary bits)
+    tmp =
+        vandq_u64(tmp, vcombine_u64(vcreate_u64(0xFFFFFFFF), vcreate_u64(0x0)));
+    // Multiply by P(x) (shifted left by 1 for alignment reasons)
+    tmp = _sse2neon_vmull_p64(vget_low_u64(tmp), vcreate_u64(p));
+    // Subtract original from result
+    tmp = veorq_u64(tmp, orig);
+
+    // Extract the 'lower' (in bit-reflected sense) 32 bits
+    crc = vgetq_lane_u32(vreinterpretq_u32_u64(tmp), 1);
+#else  // Fall back to the generic table lookup approach
+    // Adapted from: https://create.stephan-brumme.com/crc32/
+    // Apply half-byte comparison algorithm for the best ratio between
+    // performance and lookup table.
+
+    // The lookup table just needs to store every 16th entry
+    // of the standard look-up table.
+    static const uint32_t crc32_half_byte_tbl[] = {
+        0x00000000, 0x105ec76f, 0x20bd8ede, 0x30e349b1, 0x417b1dbc, 0x5125dad3,
+        0x61c69362, 0x7198540d, 0x82f63b78, 0x92a8fc17, 0xa24bb5a6, 0xb21572c9,
+        0xc38d26c4, 0xd3d3e1ab, 0xe330a81a, 0xf36e6f75,
+    };
+
+    crc = (crc >> 4) ^ crc32_half_byte_tbl[crc & 0x0F];
+    crc = (crc >> 4) ^ crc32_half_byte_tbl[crc & 0x0F];
+#endif
+#endif
+    return crc;
+}
+
+/* AES */
+
+#if !defined(__ARM_FEATURE_CRYPTO) && (!defined(_M_ARM64) || defined(__clang__))
+/* clang-format off */
+#define SSE2NEON_AES_SBOX(w)                                           \
+    {                                                                  \
+        w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), \
+        w(0xc5), w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), \
+        w(0xab), w(0x76), w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), \
+        w(0x59), w(0x47), w(0xf0), w(0xad), w(0xd4), w(0xa2), w(0xaf), \
+        w(0x9c), w(0xa4), w(0x72), w(0xc0), w(0xb7), w(0xfd), w(0x93), \
+        w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc), w(0x34), w(0xa5), \
+        w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15), w(0x04), \
+        w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a), \
+        w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), \
+        w(0x75), w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), \
+        w(0x5a), w(0xa0), w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), \
+        w(0xe3), w(0x2f), w(0x84), w(0x53), w(0xd1), w(0x00), w(0xed), \
+        w(0x20), w(0xfc), w(0xb1), w(0x5b), w(0x6a), w(0xcb), w(0xbe), \
+        w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf), w(0xd0), w(0xef), \
+        w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85), w(0x45), \
+        w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8), \
+        w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), \
+        w(0xf5), w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), \
+        w(0xf3), w(0xd2), w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), \
+        w(0x97), w(0x44), w(0x17), w(0xc4), w(0xa7), w(0x7e), w(0x3d), \
+        w(0x64), w(0x5d), w(0x19), w(0x73), w(0x60), w(0x81), w(0x4f), \
+        w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88), w(0x46), w(0xee), \
+        w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb), w(0xe0), \
+        w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c), \
+        w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), \
+        w(0x79), w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), \
+        w(0x4e), w(0xa9), w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), \
+        w(0x7a), w(0xae), w(0x08), w(0xba), w(0x78), w(0x25), w(0x2e), \
+        w(0x1c), w(0xa6), w(0xb4), w(0xc6), w(0xe8), w(0xdd), w(0x74), \
+        w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a), w(0x70), w(0x3e), \
+        w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e), w(0x61), \
+        w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e), \
+        w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), \
+        w(0x94), w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), \
+        w(0x28), w(0xdf), w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), \
+        w(0xe6), w(0x42), w(0x68), w(0x41), w(0x99), w(0x2d), w(0x0f), \
+        w(0xb0), w(0x54), w(0xbb), w(0x16)                             \
+    }
+#define SSE2NEON_AES_RSBOX(w)                                          \
+    {                                                                  \
+        w(0x52), w(0x09), w(0x6a), w(0xd5), w(0x30), w(0x36), w(0xa5), \
+        w(0x38), w(0xbf), w(0x40), w(0xa3), w(0x9e), w(0x81), w(0xf3), \
+        w(0xd7), w(0xfb), w(0x7c), w(0xe3), w(0x39), w(0x82), w(0x9b), \
+        w(0x2f), w(0xff), w(0x87), w(0x34), w(0x8e), w(0x43), w(0x44), \
+        w(0xc4), w(0xde), w(0xe9), w(0xcb), w(0x54), w(0x7b), w(0x94), \
+        w(0x32), w(0xa6), w(0xc2), w(0x23), w(0x3d), w(0xee), w(0x4c), \
+        w(0x95), w(0x0b), w(0x42), w(0xfa), w(0xc3), w(0x4e), w(0x08), \
+        w(0x2e), w(0xa1), w(0x66), w(0x28), w(0xd9), w(0x24), w(0xb2), \
+        w(0x76), w(0x5b), w(0xa2), w(0x49), w(0x6d), w(0x8b), w(0xd1), \
+        w(0x25), w(0x72), w(0xf8), w(0xf6), w(0x64), w(0x86), w(0x68), \
+        w(0x98), w(0x16), w(0xd4), w(0xa4), w(0x5c), w(0xcc), w(0x5d), \
+        w(0x65), w(0xb6), w(0x92), w(0x6c), w(0x70), w(0x48), w(0x50), \
+        w(0xfd), w(0xed), w(0xb9), w(0xda), w(0x5e), w(0x15), w(0x46), \
+        w(0x57), w(0xa7), w(0x8d), w(0x9d), w(0x84), w(0x90), w(0xd8), \
+        w(0xab), w(0x00), w(0x8c), w(0xbc), w(0xd3), w(0x0a), w(0xf7), \
+        w(0xe4), w(0x58), w(0x05), w(0xb8), w(0xb3), w(0x45), w(0x06), \
+        w(0xd0), w(0x2c), w(0x1e), w(0x8f), w(0xca), w(0x3f), w(0x0f), \
+        w(0x02), w(0xc1), w(0xaf), w(0xbd), w(0x03), w(0x01), w(0x13), \
+        w(0x8a), w(0x6b), w(0x3a), w(0x91), w(0x11), w(0x41), w(0x4f), \
+        w(0x67), w(0xdc), w(0xea), w(0x97), w(0xf2), w(0xcf), w(0xce), \
+        w(0xf0), w(0xb4), w(0xe6), w(0x73), w(0x96), w(0xac), w(0x74), \
+        w(0x22), w(0xe7), w(0xad), w(0x35), w(0x85), w(0xe2), w(0xf9), \
+        w(0x37), w(0xe8), w(0x1c), w(0x75), w(0xdf), w(0x6e), w(0x47), \
+        w(0xf1), w(0x1a), w(0x71), w(0x1d), w(0x29), w(0xc5), w(0x89), \
+        w(0x6f), w(0xb7), w(0x62), w(0x0e), w(0xaa), w(0x18), w(0xbe), \
+        w(0x1b), w(0xfc), w(0x56), w(0x3e), w(0x4b), w(0xc6), w(0xd2), \
+        w(0x79), w(0x20), w(0x9a), w(0xdb), w(0xc0), w(0xfe), w(0x78), \
+        w(0xcd), w(0x5a), w(0xf4), w(0x1f), w(0xdd), w(0xa8), w(0x33), \
+        w(0x88), w(0x07), w(0xc7), w(0x31), w(0xb1), w(0x12), w(0x10), \
+        w(0x59), w(0x27), w(0x80), w(0xec), w(0x5f), w(0x60), w(0x51), \
+        w(0x7f), w(0xa9), w(0x19), w(0xb5), w(0x4a), w(0x0d), w(0x2d), \
+        w(0xe5), w(0x7a), w(0x9f), w(0x93), w(0xc9), w(0x9c), w(0xef), \
+        w(0xa0), w(0xe0), w(0x3b), w(0x4d), w(0xae), w(0x2a), w(0xf5), \
+        w(0xb0), w(0xc8), w(0xeb), w(0xbb), w(0x3c), w(0x83), w(0x53), \
+        w(0x99), w(0x61), w(0x17), w(0x2b), w(0x04), w(0x7e), w(0xba), \
+        w(0x77), w(0xd6), w(0x26), w(0xe1), w(0x69), w(0x14), w(0x63), \
+        w(0x55), w(0x21), w(0x0c), w(0x7d)                             \
+    }
+/* clang-format on */
+
+/* X Macro trick. See https://en.wikipedia.org/wiki/X_Macro */
+#define SSE2NEON_AES_H0(x) (x)
+static const uint8_t _sse2neon_sbox[256] = SSE2NEON_AES_SBOX(SSE2NEON_AES_H0);
+static const uint8_t _sse2neon_rsbox[256] = SSE2NEON_AES_RSBOX(SSE2NEON_AES_H0);
+#undef SSE2NEON_AES_H0
+
+/* x_time function and matrix multiply function */
+
+// In the absence of crypto extensions, implement aesenc using regular NEON
+// intrinsics instead. See:
+// https://www.workofard.com/2017/01/accelerated-aes-for-the-arm64-linux-kernel/
+// https://www.workofard.com/2017/07/ghash-for-low-end-cores/ and
+// for more information.
+FORCE_INLINE __m128i _mm_aesenc_si128(__m128i a, __m128i RoundKey)
+{
+    static const uint8_t shift_rows[] = {
+        0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3,
+        0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb,
+    };
+    static const uint8_t ror32by8[] = {
+        0x1, 0x2, 0x3, 0x0, 0x5, 0x6, 0x7, 0x4,
+        0x9, 0xa, 0xb, 0x8, 0xd, 0xe, 0xf, 0xc,
+    };
+
+    uint8x16_t v;
+    uint8x16_t w = vreinterpretq_u8_m128i(a);
+
+    /* shift rows */
+    w = vqtbl1q_u8(w, vld1q_u8(shift_rows));
+
+    /* sub bytes */
+    // Here, we separate the whole 256-bytes table into 4 64-bytes tables, and
+    // look up each of the table. After each lookup, we load the next table
+    // which locates at the next 64-bytes. In the meantime, the index in the
+    // table would be smaller than it was, so the index parameters of
+    // `vqtbx4q_u8()` need to be added the same constant as the loaded tables.
+    v = vqtbl4q_u8(_sse2neon_vld1q_u8_x4(_sse2neon_sbox), w);
+    // 'w-0x40' equals to 'vsubq_u8(w, vdupq_n_u8(0x40))'
+    v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_sbox + 0x40), w - 0x40);
+    v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_sbox + 0x80), w - 0x80);
+    v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_sbox + 0xc0), w - 0xc0);
+
+    /* mix columns */
+    w = (v << 1) ^ (uint8x16_t) (((int8x16_t) v >> 7) & 0x1b);
+    w ^= (uint8x16_t) vrev32q_u16((uint16x8_t) v);
+    w ^= vqtbl1q_u8(v ^ w, vld1q_u8(ror32by8));
+
+    /* add round key */
+    return vreinterpretq_m128i_u8(w) ^ RoundKey;
+
+}
+
+// Perform one round of an AES decryption flow on data (state) in a using the
+// round key in RoundKey, and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdec_si128
+FORCE_INLINE __m128i _mm_aesdec_si128(__m128i a, __m128i RoundKey)
+{
+    static const uint8_t inv_shift_rows[] = {
+        0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb,
+        0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3,
+    };
+    static const uint8_t ror32by8[] = {
+        0x1, 0x2, 0x3, 0x0, 0x5, 0x6, 0x7, 0x4,
+        0x9, 0xa, 0xb, 0x8, 0xd, 0xe, 0xf, 0xc,
+    };
+
+    uint8x16_t v;
+    uint8x16_t w = vreinterpretq_u8_m128i(a);
+
+    // inverse shift rows
+    w = vqtbl1q_u8(w, vld1q_u8(inv_shift_rows));
+
+    // inverse sub bytes
+    v = vqtbl4q_u8(_sse2neon_vld1q_u8_x4(_sse2neon_rsbox), w);
+    v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_rsbox + 0x40), w - 0x40);
+    v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_rsbox + 0x80), w - 0x80);
+    v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_rsbox + 0xc0), w - 0xc0);
+
+    // inverse mix columns
+    // multiplying 'v' by 4 in GF(2^8)
+    w = (v << 1) ^ (uint8x16_t) (((int8x16_t) v >> 7) & 0x1b);
+    w = (w << 1) ^ (uint8x16_t) (((int8x16_t) w >> 7) & 0x1b);
+    v ^= w;
+    v ^= (uint8x16_t) vrev32q_u16((uint16x8_t) w);
+
+    w = (v << 1) ^ (uint8x16_t) (((int8x16_t) v >> 7) &
+                                 0x1b);  // multiplying 'v' by 2 in GF(2^8)
+    w ^= (uint8x16_t) vrev32q_u16((uint16x8_t) v);
+    w ^= vqtbl1q_u8(v ^ w, vld1q_u8(ror32by8));
+
+    // add round key
+    return vreinterpretq_m128i_u8(w) ^ RoundKey;
+
+}
+
+// Perform the last round of an AES encryption flow on data (state) in a using
+// the round key in RoundKey, and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenclast_si128
+FORCE_INLINE __m128i _mm_aesenclast_si128(__m128i a, __m128i RoundKey)
+{
+    static const uint8_t shift_rows[] = {
+        0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3,
+        0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb,
+    };
+
+    uint8x16_t v;
+    uint8x16_t w = vreinterpretq_u8_m128i(a);
+
+    // shift rows
+    w = vqtbl1q_u8(w, vld1q_u8(shift_rows));
+
+    // sub bytes
+    v = vqtbl4q_u8(_sse2neon_vld1q_u8_x4(_sse2neon_sbox), w);
+    v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_sbox + 0x40), w - 0x40);
+    v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_sbox + 0x80), w - 0x80);
+    v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_sbox + 0xc0), w - 0xc0);
+
+    // add round key
+    return vreinterpretq_m128i_u8(v) ^ RoundKey;
+
+}
+
+// Perform the last round of an AES decryption flow on data (state) in a using
+// the round key in RoundKey, and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdeclast_si128
+FORCE_INLINE __m128i _mm_aesdeclast_si128(__m128i a, __m128i RoundKey)
+{
+    static const uint8_t inv_shift_rows[] = {
+        0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb,
+        0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3,
+    };
+
+    uint8x16_t v;
+    uint8x16_t w = vreinterpretq_u8_m128i(a);
+
+    // inverse shift rows
+    w = vqtbl1q_u8(w, vld1q_u8(inv_shift_rows));
+
+    // inverse sub bytes
+    v = vqtbl4q_u8(_sse2neon_vld1q_u8_x4(_sse2neon_rsbox), w);
+    v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_rsbox + 0x40), w - 0x40);
+    v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_rsbox + 0x80), w - 0x80);
+    v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_rsbox + 0xc0), w - 0xc0);
+
+    // add round key
+    return vreinterpretq_m128i_u8(v) ^ RoundKey;
+
+}
+
+// Perform the InvMixColumns transformation on a and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesimc_si128
+FORCE_INLINE __m128i _mm_aesimc_si128(__m128i a)
+{
+    static const uint8_t ror32by8[] = {
+        0x1, 0x2, 0x3, 0x0, 0x5, 0x6, 0x7, 0x4,
+        0x9, 0xa, 0xb, 0x8, 0xd, 0xe, 0xf, 0xc,
+    };
+    uint8x16_t v = vreinterpretq_u8_m128i(a);
+    uint8x16_t w;
+
+    // multiplying 'v' by 4 in GF(2^8)
+    w = (v << 1) ^ (uint8x16_t) (((int8x16_t) v >> 7) & 0x1b);
+    w = (w << 1) ^ (uint8x16_t) (((int8x16_t) w >> 7) & 0x1b);
+    v ^= w;
+    v ^= (uint8x16_t) vrev32q_u16((uint16x8_t) w);
+
+    // multiplying 'v' by 2 in GF(2^8)
+    w = (v << 1) ^ (uint8x16_t) (((int8x16_t) v >> 7) & 0x1b);
+    w ^= (uint8x16_t) vrev32q_u16((uint16x8_t) v);
+    w ^= vqtbl1q_u8(v ^ w, vld1q_u8(ror32by8));
+    return vreinterpretq_m128i_u8(w);
+
+}
+
+// Assist in expanding the AES cipher key by computing steps towards generating
+// a round key for encryption cipher using data from a and an 8-bit round
+// constant specified in imm8, and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aeskeygenassist_si128
+//
+// Emits the Advanced Encryption Standard (AES) instruction aeskeygenassist.
+// This instruction generates a round key for AES encryption. See
+// https://kazakov.life/2017/11/01/cryptocurrency-mining-on-ios-devices/
+// for details.
+FORCE_INLINE __m128i _mm_aeskeygenassist_si128(__m128i a, const int rcon)
+{
+    uint8x16_t _a = vreinterpretq_u8_m128i(a);
+    uint8x16_t v = vqtbl4q_u8(_sse2neon_vld1q_u8_x4(_sse2neon_sbox), _a);
+    v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_sbox + 0x40), _a - 0x40);
+    v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_sbox + 0x80), _a - 0x80);
+    v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_sbox + 0xc0), _a - 0xc0);
+
+    uint32x4_t v_u32 = vreinterpretq_u32_u8(v);
+    uint32x4_t ror_v = vorrq_u32(vshrq_n_u32(v_u32, 8), vshlq_n_u32(v_u32, 24));
+    uint32x4_t ror_xor_v = veorq_u32(ror_v, vdupq_n_u32(rcon));
+
+    return vreinterpretq_m128i_u32(vtrn2q_u32(v_u32, ror_xor_v));
+
+}
+#undef SSE2NEON_AES_SBOX
+#undef SSE2NEON_AES_RSBOX
+
+#undef SSE2NEON_XT
+#undef SSE2NEON_MULTIPLY
+
+#else /* __ARM_FEATURE_CRYPTO */
+// Implements equivalent of 'aesenc' by combining AESE (with an empty key) and
+// AESMC and then manually applying the real key as an xor operation. This
+// unfortunately means an additional xor op; the compiler should be able to
+// optimize this away for repeated calls however. See
+// https://blog.michaelbrase.com/2018/05/08/emulating-x86-aes-intrinsics-on-armv8-a
+// for more details.
+FORCE_INLINE __m128i _mm_aesenc_si128(__m128i a, __m128i b)
+{
+    return vreinterpretq_m128i_u8(veorq_u8(
+        vaesmcq_u8(vaeseq_u8(vreinterpretq_u8_m128i(a), vdupq_n_u8(0))),
+        vreinterpretq_u8_m128i(b)));
+}
+
+// Perform one round of an AES decryption flow on data (state) in a using the
+// round key in RoundKey, and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdec_si128
+FORCE_INLINE __m128i _mm_aesdec_si128(__m128i a, __m128i RoundKey)
+{
+    return vreinterpretq_m128i_u8(veorq_u8(
+        vaesimcq_u8(vaesdq_u8(vreinterpretq_u8_m128i(a), vdupq_n_u8(0))),
+        vreinterpretq_u8_m128i(RoundKey)));
+}
+
+// Perform the last round of an AES encryption flow on data (state) in a using
+// the round key in RoundKey, and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenclast_si128
+FORCE_INLINE __m128i _mm_aesenclast_si128(__m128i a, __m128i RoundKey)
+{
+    return _mm_xor_si128(vreinterpretq_m128i_u8(vaeseq_u8(
+                             vreinterpretq_u8_m128i(a), vdupq_n_u8(0))),
+                         RoundKey);
+}
+
+// Perform the last round of an AES decryption flow on data (state) in a using
+// the round key in RoundKey, and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdeclast_si128
+FORCE_INLINE __m128i _mm_aesdeclast_si128(__m128i a, __m128i RoundKey)
+{
+    return vreinterpretq_m128i_u8(
+        veorq_u8(vaesdq_u8(vreinterpretq_u8_m128i(a), vdupq_n_u8(0)),
+                 vreinterpretq_u8_m128i(RoundKey)));
+}
+
+// Perform the InvMixColumns transformation on a and store the result in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesimc_si128
+FORCE_INLINE __m128i _mm_aesimc_si128(__m128i a)
+{
+    return vreinterpretq_m128i_u8(vaesimcq_u8(vreinterpretq_u8_m128i(a)));
+}
+
+// Assist in expanding the AES cipher key by computing steps towards generating
+// a round key for encryption cipher using data from a and an 8-bit round
+// constant specified in imm8, and store the result in dst."
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aeskeygenassist_si128
+FORCE_INLINE __m128i _mm_aeskeygenassist_si128(__m128i a, const int rcon)
+{
+    // AESE does ShiftRows and SubBytes on A
+    uint8x16_t u8 = vaeseq_u8(vreinterpretq_u8_m128i(a), vdupq_n_u8(0));
+
+    uint8x16_t dest = {
+        // Undo ShiftRows step from AESE and extract X1 and X3
+        u8[0x4], u8[0x1], u8[0xE], u8[0xB],  // SubBytes(X1)
+        u8[0x1], u8[0xE], u8[0xB], u8[0x4],  // ROT(SubBytes(X1))
+        u8[0xC], u8[0x9], u8[0x6], u8[0x3],  // SubBytes(X3)
+        u8[0x9], u8[0x6], u8[0x3], u8[0xC],  // ROT(SubBytes(X3))
+    };
+    uint32x4_t r = {0, (unsigned) rcon, 0, (unsigned) rcon};
+    return vreinterpretq_m128i_u8(dest) ^ vreinterpretq_m128i_u32(r);
+}
+#endif
+
+/* Others */
+
+// Perform a carry-less multiplication of two 64-bit integers, selected from a
+// and b according to imm8, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clmulepi64_si128
+FORCE_INLINE __m128i _mm_clmulepi64_si128(__m128i _a, __m128i _b, const int imm)
+{
+    uint64x2_t a = vreinterpretq_u64_m128i(_a);
+    uint64x2_t b = vreinterpretq_u64_m128i(_b);
+    switch (imm & 0x11) {
+    case 0x00:
+        return vreinterpretq_m128i_u64(
+            _sse2neon_vmull_p64(vget_low_u64(a), vget_low_u64(b)));
+    case 0x01:
+        return vreinterpretq_m128i_u64(
+            _sse2neon_vmull_p64(vget_high_u64(a), vget_low_u64(b)));
+    case 0x10:
+        return vreinterpretq_m128i_u64(
+            _sse2neon_vmull_p64(vget_low_u64(a), vget_high_u64(b)));
+    case 0x11:
+        return vreinterpretq_m128i_u64(
+            _sse2neon_vmull_p64(vget_high_u64(a), vget_high_u64(b)));
+    default:
+        abort();
+    }
+}
+
+FORCE_INLINE unsigned int _sse2neon_mm_get_denormals_zero_mode(void)
+{
+    union {
+        fpcr_bitfield field;
+        uint64_t value;
+    } r;
+
+    r.value = _sse2neon_get_fpcr();
+
+    return r.field.bit24 ? _MM_DENORMALS_ZERO_ON : _MM_DENORMALS_ZERO_OFF;
+}
+
+// Count the number of bits set to 1 in unsigned 32-bit integer a, and
+// return that count in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_popcnt_u32
+FORCE_INLINE int _mm_popcnt_u32(unsigned int a)
+{
+#if __has_builtin(__builtin_popcount)
+    return __builtin_popcount(a);
+//#elif defined(_MSC_VER)
+//    return _CountOneBits(a);
+#else
+    return (int) vaddlv_u8(vcnt_u8(vcreate_u8((uint64_t) a)));
+#endif
+}
+
+// Count the number of bits set to 1 in unsigned 64-bit integer a, and
+// return that count in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_popcnt_u64
+FORCE_INLINE int64_t _mm_popcnt_u64(uint64_t a)
+{
+#if __has_builtin(__builtin_popcountll)
+    return __builtin_popcountll(a);
+//#elif defined(_MSC_VER)
+//    return _CountOneBits64(a);
+#else
+    return (int64_t) vaddlv_u8(vcnt_u8(vcreate_u8(a)));
+#endif
+}
+
+FORCE_INLINE_OPTNONE void _sse2neon_mm_set_denormals_zero_mode(
+    unsigned int flag)
+{
+    // AArch32 Advanced SIMD arithmetic always uses the Flush-to-zero setting,
+    // regardless of the value of the FZ bit.
+    union {
+        fpcr_bitfield field;
+        uint64_t value;
+    } r;
+
+    r.value = _sse2neon_get_fpcr();
+
+    r.field.bit24 = (flag & _MM_DENORMALS_ZERO_MASK) == _MM_DENORMALS_ZERO_ON;
+
+    _sse2neon_set_fpcr(r.value);
+}
+
+// Return the current 64-bit value of the processor's time-stamp counter.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=rdtsc
+FORCE_INLINE uint64_t _rdtsc(void)
+{
+    uint64_t val;
+
+    /* According to ARM DDI 0487F.c, from Armv8.0 to Armv8.5 inclusive, the
+     * system counter is at least 56 bits wide; from Armv8.6, the counter
+     * must be 64 bits wide.  So the system counter could be less than 64
+     * bits wide and it is attributed with the flag 'cap_user_time_short'
+     * is true.
+     */
+    __asm__ __volatile__("mrs %0, cntvct_el0" : "=r"(val));
+
+    return val;
+}
+
+#pragma pop_macro("ALIGN_STRUCT")
+#pragma pop_macro("FORCE_INLINE")
+#pragma pop_macro("FORCE_INLINE_OPTNONE")
+
+
+#endif
diff --git a/libkram/kram/sse2neon.h b/libkram/kram/sse2neon.h
index 8222bc85..c20554bb 100644
--- a/libkram/kram/sse2neon.h
+++ b/libkram/kram/sse2neon.h
@@ -1,10 +1,10 @@
 #ifndef SSE2NEON_H
 #define SSE2NEON_H
 
-#include <TargetConditionals.h>
-#if TARGET_OS_MACCATALYST
-#warning - this code won't compile for iOS MacCatalyst, switch target.
-#endif
+//#include <TargetConditionals.h>
+//#if TARGET_OS_MACCATALYST
+//#warning - this code won't compile for iOS MacCatalyst, switch target.
+//#endif
 
 /*
  * sse2neon is freely redistributable under the MIT License.
@@ -112,19 +112,19 @@
 #endif
 
 
-#if defined(__GNUC__) && !defined(__clang__)
-#pragma push_macro("FORCE_INLINE_OPTNONE")
-#define FORCE_INLINE_OPTNONE static inline __attribute__((optimize("O0")))
-#elif defined(__clang__)
+//#if defined(__GNUC__) && !defined(__clang__)
+//#pragma push_macro("FORCE_INLINE_OPTNONE")
+//#define FORCE_INLINE_OPTNONE static inline __attribute__((optimize("O0")))
+//#elif defined(__clang__)
 #pragma push_macro("FORCE_INLINE_OPTNONE")
 #define FORCE_INLINE_OPTNONE static inline __attribute__((optnone))
-#else
-#define FORCE_INLINE_OPTNONE FORCE_INLINE
-#endif
+//#else
+//#define FORCE_INLINE_OPTNONE FORCE_INLINE
+//#endif
 
-#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 10
-#warning "GCC versions earlier than 10 are not supported."
-#endif
+//#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 10
+//#warning "GCC versions earlier than 10 are not supported."
+//#endif
 
 /* C language does not allow initializing a variable with a function call. */
 #ifdef __cplusplus
@@ -150,34 +150,34 @@ FORCE_INLINE int64_t sse2neon_recast_f64_s64(double f64)
     return i64;
 }
 
-#if defined(_WIN32)
-/* Definitions for _mm_{malloc,free} are provided by <malloc.h>
- * from both MinGW-w64 and MSVC.
- */
-#define SSE2NEON_ALLOC_DEFINED
-#endif
-
-/* If using MSVC */
-#ifdef _MSC_VER
-#include <intrin.h>
-#if SSE2NEON_INCLUDE_WINDOWS_H
-#include <processthreadsapi.h>
-#include <windows.h>
-#endif
+//#if defined(_WIN32)
+///* Definitions for _mm_{malloc,free} are provided by <malloc.h>
+// * from both MinGW-w64 and MSVC.
+// */
+//#define SSE2NEON_ALLOC_DEFINED
+//#endif
+//
+///* If using MSVC */
+//#ifdef _MSC_VER
+//#include <intrin.h>
+//#if SSE2NEON_INCLUDE_WINDOWS_H
+//#include <processthreadsapi.h>
+//#include <windows.h>
+//#endif
 
 #if !defined(__cplusplus)
 #error SSE2NEON only supports C++ compilation with this compiler
 #endif
 
-#ifdef SSE2NEON_ALLOC_DEFINED
-#include <malloc.h>
-#endif
+//#ifdef SSE2NEON_ALLOC_DEFINED
+//#include <malloc.h>
+//#endif
 
-#if (defined(_M_AMD64) || defined(__x86_64__)) || \
-    (defined(_M_ARM64) || defined(__arm64__))
-#define SSE2NEON_HAS_BITSCAN64
-#endif
-#endif
+//#if (defined(_M_AMD64) || defined(__x86_64__)) || \
+//    (defined(_M_ARM64) || defined(__arm64__))
+//#define SSE2NEON_HAS_BITSCAN64
+//#endif
+//#endif
 
 #if defined(__GNUC__) || defined(__clang__)
 #define _sse2neon_define0(type, s, body) \
@@ -312,26 +312,26 @@ FORCE_INLINE void _sse2neon_smp_mb(void)
 /* "__has_builtin" can be used to query support for built-in functions
  * provided by gcc/clang and other compilers that support it.
  */
-#ifndef __has_builtin /* GCC prior to 10 or non-clang compilers */
-/* Compatibility with gcc <= 9 */
-#if defined(__GNUC__) && (__GNUC__ <= 9)
-#define __has_builtin(x) HAS##x
-#define HAS__builtin_popcount 1
-#define HAS__builtin_popcountll 1
-
-// __builtin_shuffle introduced in GCC 4.7.0
-#if (__GNUC__ >= 5) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 7))
-#define HAS__builtin_shuffle 1
-#else
-#define HAS__builtin_shuffle 0
-#endif
-
-#define HAS__builtin_shufflevector 0
-#define HAS__builtin_nontemporal_store 0
-#else
-#define __has_builtin(x) 0
-#endif
-#endif
+//#ifndef __has_builtin /* GCC prior to 10 or non-clang compilers */
+///* Compatibility with gcc <= 9 */
+//#if defined(__GNUC__) && (__GNUC__ <= 9)
+//#define __has_builtin(x) HAS##x
+//#define HAS__builtin_popcount 1
+//#define HAS__builtin_popcountll 1
+//
+//// __builtin_shuffle introduced in GCC 4.7.0
+//#if (__GNUC__ >= 5) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 7))
+//#define HAS__builtin_shuffle 1
+//#else
+//#define HAS__builtin_shuffle 0
+//#endif
+//
+//#define HAS__builtin_shufflevector 0
+//#define HAS__builtin_nontemporal_store 0
+//#else
+//#define __has_builtin(x) 0
+//#endif
+//#endif
 
 /**
  * MACRO for shuffle parameter for _mm_shuffle_ps().
@@ -617,26 +617,26 @@ FORCE_INLINE uint32_t _mm_crc32_u8(uint32_t, uint8_t);
 /* Backwards compatibility for compilers with lack of specific type support */
 
 // Older gcc does not define vld1q_u8_x4 type
-#if defined(__GNUC__) && !defined(__clang__) &&                        \
-    ((__GNUC__ <= 13 && defined(__arm__)) ||                           \
-     (__GNUC__ == 10 && __GNUC_MINOR__ < 3 && defined(__aarch64__)) || \
-     (__GNUC__ <= 9 && defined(__aarch64__)))
-FORCE_INLINE uint8x16x4_t _sse2neon_vld1q_u8_x4(const uint8_t *p)
-{
-    uint8x16x4_t ret;
-    ret.val[0] = vld1q_u8(p + 0);
-    ret.val[1] = vld1q_u8(p + 16);
-    ret.val[2] = vld1q_u8(p + 32);
-    ret.val[3] = vld1q_u8(p + 48);
-    return ret;
-}
-#else
+//#if defined(__GNUC__) && !defined(__clang__) &&                        \
+//    ((__GNUC__ <= 13 && defined(__arm__)) ||                           \
+//     (__GNUC__ == 10 && __GNUC_MINOR__ < 3 && defined(__aarch64__)) || \
+//     (__GNUC__ <= 9 && defined(__aarch64__)))
+//FORCE_INLINE uint8x16x4_t _sse2neon_vld1q_u8_x4(const uint8_t *p)
+//{
+//    uint8x16x4_t ret;
+//    ret.val[0] = vld1q_u8(p + 0);
+//    ret.val[1] = vld1q_u8(p + 16);
+//    ret.val[2] = vld1q_u8(p + 32);
+//    ret.val[3] = vld1q_u8(p + 48);
+//    return ret;
+//}
+//#else
 // Wraps vld1q_u8_x4
 FORCE_INLINE uint8x16x4_t _sse2neon_vld1q_u8_x4(const uint8_t *p)
 {
     return vld1q_u8_x4(p);
 }
-#endif
+//#endif
 
 #if !defined(__aarch64__) && !defined(_M_ARM64)
 /* emulate vaddv u8 variant */
@@ -1794,12 +1794,12 @@ FORCE_INLINE __m128 _mm_div_ss(__m128 a, __m128 b)
 
 // Free aligned memory that was allocated with _mm_malloc.
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_free
-#if !defined(SSE2NEON_ALLOC_DEFINED)
-FORCE_INLINE void _mm_free(void *addr)
-{
-    free(addr);
-}
-#endif
+//#if !defined(SSE2NEON_ALLOC_DEFINED)
+//FORCE_INLINE void _mm_free(void *addr)
+//{
+//    free(addr);
+//}
+//#endif
 
 FORCE_INLINE uint64_t _sse2neon_get_fpcr(void)
 {
@@ -1977,19 +1977,19 @@ FORCE_INLINE __m128i _mm_loadu_si64(const void *p)
 // and return a pointer to the allocated memory. _mm_free should be used to free
 // memory that is allocated with _mm_malloc.
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_malloc
-#if !defined(SSE2NEON_ALLOC_DEFINED)
-FORCE_INLINE void *_mm_malloc(size_t size, size_t align)
-{
-    void *ptr;
-    if (align == 1)
-        return malloc(size);
-    if (align == 2 || (sizeof(void *) == 8 && align == 4))
-        align = sizeof(void *);
-    if (!posix_memalign(&ptr, align, size))
-        return ptr;
-    return NULL;
-}
-#endif
+//#if !defined(SSE2NEON_ALLOC_DEFINED)
+//FORCE_INLINE void *_mm_malloc(size_t size, size_t align)
+//{
+//    void *ptr;
+//    if (align == 1)
+//        return malloc(size);
+//    if (align == 2 || (sizeof(void *) == 8 && align == 4))
+//        align = sizeof(void *);
+//    if (!posix_memalign(&ptr, align, size))
+//        return ptr;
+//    return NULL;
+//}
+//#endif
 
 // Conditionally store 8-bit integer elements from a into memory using mask
 // (elements are not stored when the highest bit is not set in the corresponding
@@ -2880,6 +2880,8 @@ FORCE_INLINE __m128 _mm_sub_ss(__m128 a, __m128 b)
 #define _mm_ucomilt_ss _mm_comilt_ss
 #define _mm_ucomineq_ss _mm_comineq_ss
 
+/* don't need these calls
+ 
 // Return vector of type __m128i with undefined elements.
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_undefined_si128
 FORCE_INLINE __m128i _mm_undefined_si128(void)
@@ -2915,6 +2917,7 @@ FORCE_INLINE __m128 _mm_undefined_ps(void)
 #pragma GCC diagnostic pop
 #endif
 }
+*/
 
 // Unpack and interleave single-precision (32-bit) floating-point elements from
 // the high half a and b, and store the results in dst.
@@ -5826,6 +5829,7 @@ FORCE_INLINE __m128i _mm_subs_epu8(__m128i a, __m128i b)
 #define _mm_ucomilt_sd _mm_comilt_sd
 #define _mm_ucomineq_sd _mm_comineq_sd
 
+/* don't need this call
 // Return vector of type __m128d with undefined elements.
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_undefined_pd
 FORCE_INLINE __m128d _mm_undefined_pd(void)
@@ -5843,6 +5847,7 @@ FORCE_INLINE __m128d _mm_undefined_pd(void)
 #pragma GCC diagnostic pop
 #endif
 }
+*/
 
 // Unpack and interleave 16-bit integers from the high half of a and b, and
 // store the results in dst.
@@ -8301,21 +8306,21 @@ FORCE_INLINE int _sse2neon_ctz(unsigned int x)
 
 FORCE_INLINE int _sse2neon_ctzll(unsigned long long x)
 {
-#ifdef _MSC_VER
-    unsigned long cnt;
-#if defined(SSE2NEON_HAS_BITSCAN64)
-    if (_BitScanForward64(&cnt, x))
-        return (int) (cnt);
-#else
-    if (_BitScanForward(&cnt, (unsigned long) (x)))
-        return (int) cnt;
-    if (_BitScanForward(&cnt, (unsigned long) (x >> 32)))
-        return (int) (cnt + 32);
-#endif /* SSE2NEON_HAS_BITSCAN64 */
-    return 64;
-#else /* assume GNU compatible compilers */
+//#ifdef _MSC_VER
+//    unsigned long cnt;
+//#if defined(SSE2NEON_HAS_BITSCAN64)
+//    if (_BitScanForward64(&cnt, x))
+//        return (int) (cnt);
+//#else
+//    if (_BitScanForward(&cnt, (unsigned long) (x)))
+//        return (int) cnt;
+//    if (_BitScanForward(&cnt, (unsigned long) (x >> 32)))
+//        return (int) (cnt + 32);
+//#endif /* SSE2NEON_HAS_BITSCAN64 */
+//    return 64;
+//#else /* assume GNU compatible compilers */
     return x != 0 ? __builtin_ctzll(x) : 64;
-#endif
+//#endif
 }
 
 #define SSE2NEON_MIN(x, y) (x) < (y) ? (x) : (y)
@@ -9300,8 +9305,8 @@ FORCE_INLINE int _mm_popcnt_u32(unsigned int a)
 #if defined(__aarch64__) || defined(_M_ARM64)
 #if __has_builtin(__builtin_popcount)
     return __builtin_popcount(a);
-#elif defined(_MSC_VER)
-    return _CountOneBits(a);
+//#elif defined(_MSC_VER)
+//    return _CountOneBits(a);
 #else
     return (int) vaddlv_u8(vcnt_u8(vcreate_u8((uint64_t) a)));
 #endif
@@ -9329,8 +9334,8 @@ FORCE_INLINE int64_t _mm_popcnt_u64(uint64_t a)
 #if defined(__aarch64__) || defined(_M_ARM64)
 #if __has_builtin(__builtin_popcountll)
     return __builtin_popcountll(a);
-#elif defined(_MSC_VER)
-    return _CountOneBits64(a);
+//#elif defined(_MSC_VER)
+//    return _CountOneBits64(a);
 #else
     return (int64_t) vaddlv_u8(vcnt_u8(vcreate_u8(a)));
 #endif

From 440eb5303518d19acd518ac637b2d00cb97ec8ba Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 18 Sep 2024 22:36:23 -0700
Subject: [PATCH 717/901] kram - fix simd

---
 libkram/kram/float4a.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libkram/kram/float4a.h b/libkram/kram/float4a.h
index 4932f8b3..cca46553 100644
--- a/libkram/kram/float4a.h
+++ b/libkram/kram/float4a.h
@@ -331,7 +331,7 @@ inline float4 max(const float4& lhs, const float4& rhs)
 // do 4 of these at once
 inline float4 recip(const float4& vv)
 {
-    return floar4(1.0f/vv.reg); // _mm_rcphp_ps(vv.reg));
+    return float4(1.0f/vv.reg); // _mm_rcphp_ps(vv.reg));
 }
 inline float4 rsqrt(const float4& vv)
 {

From 61b36f440d186210b5929cfaf7ee656570c3ff77 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 21 Sep 2024 23:06:38 -0700
Subject: [PATCH 718/901] kram - simd cleanup

---
 kramv/KramLoader.mm            |  2 +-
 kramv/KramRenderer.mm          |  2 +-
 kramv/KramViewerBase.cpp       |  2 +-
 kramv/KramViewerBase.h         |  2 +-
 kramv/KramViewerMain.mm        |  2 +-
 libkram/kram/KramConfig.h      | 84 +++-------------------------------
 libkram/kram/KramImage.cpp     |  2 +-
 libkram/kram/KramImage.h       |  2 +-
 libkram/kram/KramImageInfo.cpp |  2 +-
 libkram/kram/KramImageInfo.h   |  2 +-
 libkram/kram/KramMipper.cpp    |  2 +-
 libkram/kram/KramMipper.h      |  2 +-
 libkram/kram/sse2neon-arm64.h  | 20 +++++---
 libkram/kram/sse2neon.h        | 20 +++++---
 libkram/squish/maths.h         |  2 +-
 15 files changed, 47 insertions(+), 101 deletions(-)

diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index 90006a09..d7130f9b 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -12,7 +12,7 @@
 
 using namespace kram;
 using namespace NAMESPACE_STL;
-using namespace simd;
+using namespace SIMD_NAMESPACE;
 
 using mymutex = std::recursive_mutex;
 using mylock = std::unique_lock<mymutex>;
diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index b0c7d326..43e15ff1 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -102,7 +102,7 @@ - (instancetype)initWithLoader:(KramLoader*)loader
 static const NSUInteger MaxBuffersInFlight = 3;
 
 using namespace kram;
-using namespace simd;
+using namespace SIMD_NAMESPACE;
 
 // Capture what we need to build the renderPieplines, without needing view
 struct ViewFramebufferData {
diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index 449bbb93..53c3f872 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -13,7 +13,7 @@
 #endif
 
 namespace kram {
-using namespace simd;
+using namespace SIMD_NAMESPACE;
 using namespace NAMESPACE_STL;
 
 #define ArrayCount(x) (sizeof(x) / sizeof(x[0]))
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index 006796c7..b6357f40 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -13,7 +13,7 @@
 namespace kram {
 
 using namespace NAMESPACE_STL;
-using namespace simd;
+using namespace SIMD_NAMESPACE;
 
 enum TextureChannels {
     ModeRGBA = 0,
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 092916cf..9c414405 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -39,7 +39,7 @@
 
 #include <UniformTypeIdentifiers/UTType.h>
 
-using namespace simd;
+using namespace SIMD_NAMESPACE;
 using namespace kram;
 using namespace NAMESPACE_STL;
 
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index 8bb2dc42..8dd5fa62 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -93,9 +93,6 @@
 
 #endif
 
-//------------------------
-// TODO: unix
-
 //------------------------
 
 // one of these must be set
@@ -117,8 +114,13 @@
 #endif
 #endif
 
-// use _Float16/_fp16 vs. other
-#if KRAM_MAC || KRAM_IOS
+// TODO: switch to own simd lib
+#define SIMD_NAMESPACE simd
+
+// use _Float16
+// Android is the last holdout
+// Win and Linux and Apple have support in clang
+#if !__is_identifier(_Float16)
 #define USE_FLOAT16 1
 #else
 #define USE_FLOAT16 0
@@ -152,11 +154,6 @@
 #define COMPILE_EASTL 0
 #endif
 
-// eliminate this
-//#ifndef COMPILE_FASTL
-//#define COMPILE_FASTL 0
-//#endif
-
 // basis transcoder only (read not writes)
 #ifndef COMPILE_BASIS
 #define COMPILE_BASIS 0
@@ -197,52 +194,6 @@
 // std - simpler than using eastl version
 #include <atomic>
 
-
-/* This library just doesn't work, but was an interesting idea
-#define USE_FASTL COMPILE_FASTL
-#elif USE_FASTL
-
-#define NAMESPACE_STL fastl
-
-// these are all vector based
-#include "../fastl/falgorithm.h"
-#include "../fastl/vector.h"
-
-// These don't really work.  They are constantly shifting the key-value pairs on add/revmoe
-#include "../fastl/map.h"
-#include "../fastl/set.h"
-#include "../fastl/unordered_map.h"
-#include "../fastl/unordered_set.h"
-
-// still too many holes in this (rfind, insert, back, pop_back, find_last_of, substr)
-
-#include "../fastl/fstring.h"
-
-// This was to fallback on sso of basic_string
-//#include <string>
-//namespace NAMESPACE_STL
-//{
-//    using string = std::string;
-//}
-
-// std - for missing functionality
-#include <array>
-#include <deque>
-#include <memory> // for unique_ptr/shared_ptr
-//#include <initializer_list>
-#include <iterator>  // for copy_if and back_inserter on Win
-
-// threads
-#include <functional>
-#include <atomic>
-
-// On macOS, mutex, codition_variable, thread pull in system_error which pulls in std::string
-// when then instantiates 5 versions of basic_string into all files
-//#include <mutex>
-//#include <condition_variable>
-//#include <thread>
-*/
-
 #else
 
 /*
@@ -363,27 +314,6 @@ class half4 {
 
 }  // namespace kram
 
-#if !USE_EASTL
-
-namespace NAMESPACE_STL {
-
-// scalar ops
-#if USE_FASTL
-template<typename T>
-inline T min(T x, T minValue) { return x < minValue ? x : minValue; }
-template<typename T>
-inline T max(T x, T maxValue) { return x > maxValue ? x : maxValue; }
-#endif
-
-// already defined in C++17
-//template<typename T>
-//inline T clamp(T x, T minValue, T maxValue) { return min(max(x, minValue), maxValue); }
-
-
-}  // namespace std
-
-#endif
-
 #if USE_SIMDLIB
 #include "simd/simd.h"
 #else
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index faca3fda..f3cfb5e8 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -62,7 +62,7 @@
 namespace kram {
 
 using namespace NAMESPACE_STL;
-using namespace simd;
+using namespace SIMD_NAMESPACE;
 
 template <typename T>
 void pointFilterImage(int32_t w, int32_t h, const T* srcImage,
diff --git a/libkram/kram/KramImage.h b/libkram/kram/KramImage.h
index 7de5ea2d..20a77f6a 100644
--- a/libkram/kram/KramImage.h
+++ b/libkram/kram/KramImage.h
@@ -15,7 +15,7 @@
 namespace kram {
 
 using namespace NAMESPACE_STL;
-using namespace simd;
+using namespace SIMD_NAMESPACE;
 
 class Mipper;
 class KTXHeader;
diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp
index 70c20bd0..8368da66 100644
--- a/libkram/kram/KramImageInfo.cpp
+++ b/libkram/kram/KramImageInfo.cpp
@@ -13,7 +13,7 @@
 
 namespace kram {
 using namespace NAMESPACE_STL;
-using namespace simd;
+using namespace SIMD_NAMESPACE;
 
 #define isStringEqual(lhs, rhs) (strcmp(lhs, rhs) == 0)
 
diff --git a/libkram/kram/KramImageInfo.h b/libkram/kram/KramImageInfo.h
index fafe74b5..df19adc5 100644
--- a/libkram/kram/KramImageInfo.h
+++ b/libkram/kram/KramImageInfo.h
@@ -14,7 +14,7 @@
 namespace kram {
 class Image;
 
-using namespace simd;
+using namespace SIMD_NAMESPACE;
 using namespace NAMESPACE_STL;
 
 // each encoder has it's own set of outputs, can request encoder if overlap
diff --git a/libkram/kram/KramMipper.cpp b/libkram/kram/KramMipper.cpp
index 926fa9b2..60c3f5e3 100644
--- a/libkram/kram/KramMipper.cpp
+++ b/libkram/kram/KramMipper.cpp
@@ -12,7 +12,7 @@
 namespace kram {
 
 using namespace NAMESPACE_STL;
-using namespace simd;
+using namespace SIMD_NAMESPACE;
 
 Mipper::Mipper() { initTables(); }
 
diff --git a/libkram/kram/KramMipper.h b/libkram/kram/KramMipper.h
index 7bc6ff0d..81d27923 100644
--- a/libkram/kram/KramMipper.h
+++ b/libkram/kram/KramMipper.h
@@ -11,7 +11,7 @@
 
 namespace kram {
 using namespace NAMESPACE_STL;
-using namespace simd;
+using namespace SIMD_NAMESPACE;
 
 // return whether num is pow2
 bool isPow2(int32_t num);
diff --git a/libkram/kram/sse2neon-arm64.h b/libkram/kram/sse2neon-arm64.h
index 1ab66164..366c8bf3 100644
--- a/libkram/kram/sse2neon-arm64.h
+++ b/libkram/kram/sse2neon-arm64.h
@@ -1854,9 +1854,13 @@ FORCE_INLINE __m64 _mm_max_pi16(__m64 a, __m64 b)
 FORCE_INLINE __m128 _mm_max_ps(__m128 a, __m128 b)
 {
 #if SSE2NEON_PRECISE_MINMAX
-    float32x4_t _a = vreinterpretq_f32_m128(a);
-    float32x4_t _b = vreinterpretq_f32_m128(b);
-    return vreinterpretq_m128_f32(vbslq_f32(vcgtq_f32(_a, _b), _a, _b));
+    return vreinterpretq_m128_f32(
+        vmaxmq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
+    
+    // no, use single call
+    //float32x4_t _a = vreinterpretq_f32_m128(a);
+    //float32x4_t _b = vreinterpretq_f32_m128(b);
+    //return vreinterpretq_m128_f32(vbslq_f32(vcgtq_f32(_a, _b), _a, _b));
 #else
     return vreinterpretq_m128_f32(
         vmaxq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
@@ -1902,9 +1906,13 @@ FORCE_INLINE __m64 _mm_min_pi16(__m64 a, __m64 b)
 FORCE_INLINE __m128 _mm_min_ps(__m128 a, __m128 b)
 {
 #if SSE2NEON_PRECISE_MINMAX
-    float32x4_t _a = vreinterpretq_f32_m128(a);
-    float32x4_t _b = vreinterpretq_f32_m128(b);
-    return vreinterpretq_m128_f32(vbslq_f32(vcltq_f32(_a, _b), _a, _b));
+    return vreinterpretq_m128_f32(
+        vminmq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
+    
+    // no use single call
+    //float32x4_t _a = vreinterpretq_f32_m128(a);
+    //float32x4_t _b = vreinterpretq_f32_m128(b);
+    //return vreinterpretq_m128_f32(vbslq_f32(vcltq_f32(_a, _b), _a, _b));
 #else
     return vreinterpretq_m128_f32(
         vminq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
diff --git a/libkram/kram/sse2neon.h b/libkram/kram/sse2neon.h
index c20554bb..0f0c9b52 100644
--- a/libkram/kram/sse2neon.h
+++ b/libkram/kram/sse2neon.h
@@ -2028,9 +2028,13 @@ FORCE_INLINE __m64 _mm_max_pi16(__m64 a, __m64 b)
 FORCE_INLINE __m128 _mm_max_ps(__m128 a, __m128 b)
 {
 #if SSE2NEON_PRECISE_MINMAX
-    float32x4_t _a = vreinterpretq_f32_m128(a);
-    float32x4_t _b = vreinterpretq_f32_m128(b);
-    return vreinterpretq_m128_f32(vbslq_f32(vcgtq_f32(_a, _b), _a, _b));
+    return vreinterpretq_m128_f32(
+        vmaxmq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
+    
+    // no, use single call
+    //float32x4_t _a = vreinterpretq_f32_m128(a);
+    //float32x4_t _b = vreinterpretq_f32_m128(b);
+    //return vreinterpretq_m128_f32(vbslq_f32(vcgtq_f32(_a, _b), _a, _b));
 #else
     return vreinterpretq_m128_f32(
         vmaxq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
@@ -2076,9 +2080,13 @@ FORCE_INLINE __m64 _mm_min_pi16(__m64 a, __m64 b)
 FORCE_INLINE __m128 _mm_min_ps(__m128 a, __m128 b)
 {
 #if SSE2NEON_PRECISE_MINMAX
-    float32x4_t _a = vreinterpretq_f32_m128(a);
-    float32x4_t _b = vreinterpretq_f32_m128(b);
-    return vreinterpretq_m128_f32(vbslq_f32(vcltq_f32(_a, _b), _a, _b));
+    return vreinterpretq_m128_f32(
+        vminmq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
+    
+    // no use single call
+    //float32x4_t _a = vreinterpretq_f32_m128(a);
+    //float32x4_t _b = vreinterpretq_f32_m128(b);
+    //return vreinterpretq_m128_f32(vbslq_f32(vcltq_f32(_a, _b), _a, _b));
 #else
     return vreinterpretq_m128_f32(
         vminq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
diff --git a/libkram/squish/maths.h b/libkram/squish/maths.h
index 43f37c4a..a5124dd5 100644
--- a/libkram/squish/maths.h
+++ b/libkram/squish/maths.h
@@ -357,7 +357,7 @@ class Vec4
 */
 
 
-using namespace simd;
+using namespace SIMD_NAMESPACE;
 using Vec4 = float4;
 // default ctor for float4(1) sets 1,0,0,0 in simd, but impls like Vec4 expect float4(repeating: x)
 #define VEC4_CONST(x) Vec4(makeVec4(x,x,x,x))

From 3df296b0cf7422aa715ae61b526ea1864eb489fa Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 22 Sep 2024 18:37:46 -0700
Subject: [PATCH 719/901] kram - add new simd layer, and cleanup existing calls

For win, need to switch over to new simd.  But it isn't working yet.
Fix namespace usage.  Remove most of float4a which the system was limping along on.
---
 build2/kram.xcodeproj/project.pbxproj         |   82 +-
 build2/kramc.xcodeproj/project.pbxproj        |   10 +-
 build2/kramv.xcodeproj/project.pbxproj        |    2 +
 gtlf/GLTF/Headers/Extensions/GLTFKHRLight.h   |    1 +
 gtlf/GLTF/Headers/GLTFCamera.h                |    2 +
 gtlf/GLTF/Headers/GLTFEnums.h                 |    2 +-
 gtlf/GLTF/Headers/GLTFObject.h                |    2 +-
 gtlf/GLTF/Headers/GLTFTexture.h               |    1 +
 gtlf/GLTF/Headers/GLTFUtilities.h             |    2 +
 gtlf/GLTF/Source/GLTFAnimation.m              |    1 +
 gtlf/GLTF/Source/GLTFCamera.m                 |    1 +
 gtlf/GLTF/Source/GLTFNode.m                   |    1 +
 gtlf/GLTF/Source/GLTFUtilities.m              |    2 +
 gtlf/GLTFMTL/Headers/GLTFMTLBufferAllocator.h |    2 +-
 .../Headers/GLTFMTLLightingEnvironment.h      |    2 +-
 gtlf/GLTFMTL/Headers/GLTFMTLTextureLoader.h   |    2 +-
 gtlf/GLTFMTL/Headers/GLTFMTLUtilities.h       |    2 +-
 kram-thumb/KramThumbnailProvider.mm           |    6 +-
 kramv/KramLoader.h                            |    2 +-
 kramv/KramLoader.mm                           |    2 +-
 kramv/KramRenderer.mm                         |   17 +-
 kramv/KramViewerBase.cpp                      |   10 +-
 kramv/KramViewerBase.h                        |    4 +-
 kramv/KramViewerMain.mm                       |    6 +-
 kramv/Shaders/KramShaders.h                   |   19 +-
 kramv/Shaders/KramShaders.metal               |   10 +-
 libkram/allocate/dlmalloc.h                   |    2 +-
 libkram/bc7enc/ert.h                          |    2 +-
 libkram/bc7enc/utils.h                        |    2 +-
 libkram/etc2comp/EtcImage.cpp                 |    2 +-
 libkram/json11/json11.cpp                     |    2 +-
 libkram/json11/json11.h                       |    2 +-
 libkram/kram/BlockedLinearAllocator.cpp       |    2 +-
 libkram/kram/BlockedLinearAllocator.h         |    2 +-
 libkram/kram/ImmutableString.cpp              |    2 +-
 libkram/kram/ImmutableString.h                |    2 +-
 libkram/kram/KTXImage.cpp                     |    6 +-
 libkram/kram/KTXImage.h                       |    2 +-
 libkram/kram/Kram.cpp                         |    2 +-
 libkram/kram/Kram.h                           |    2 +-
 libkram/kram/KramConfig.h                     |  226 +--
 libkram/kram/KramDDSHelper.cpp                |    2 +-
 libkram/kram/KramDDSHelper.h                  |    2 +-
 libkram/kram/KramFileHelper.cpp               |    2 +-
 libkram/kram/KramFileHelper.h                 |    2 +-
 libkram/kram/KramImage.cpp                    |   16 +-
 libkram/kram/KramImage.h                      |    2 +-
 libkram/kram/KramImageInfo.cpp                |    2 +-
 libkram/kram/KramImageInfo.h                  |    2 +-
 libkram/kram/KramLog.cpp                      |    2 +-
 libkram/kram/KramLog.h                        |    2 +-
 libkram/kram/KramMipper.cpp                   |   40 +-
 libkram/kram/KramMipper.h                     |    2 +-
 libkram/kram/KramSDFMipper.cpp                |    2 +-
 libkram/kram/KramSDFMipper.h                  |    2 +-
 libkram/kram/KramTimer.cpp                    |    2 +-
 libkram/kram/KramZipHelper.cpp                |    2 +-
 libkram/kram/KramZipHelper.h                  |    2 +-
 libkram/kram/KramZipStream.cpp                |    2 +-
 libkram/kram/KramZipStream.h                  |    2 +-
 libkram/kram/TaskSystem.cpp                   |    2 +-
 libkram/kram/TaskSystem.h                     |    2 +-
 libkram/kram/float4a.h                        |  435 ------
 libkram/lodepng/lodepng.cpp                   |    2 +-
 libkram/lodepng/lodepng.h                     |    2 +-
 libkram/squish/maths.cpp                      |    2 +-
 libkram/squish/maths.h                        |    2 +-
 libkram/{kram => vectormath}/float4a.cpp      |   47 +-
 libkram/vectormath/float4a.h                  |   92 ++
 libkram/{kram => vectormath}/sse2neon-arm64.h |    0
 libkram/{kram => vectormath}/sse2neon.h       |    6 +-
 libkram/vectormath/sse_mathfun.h              |  544 +++++++
 libkram/vectormath/vectormath++.cpp           |  572 ++++++++
 libkram/vectormath/vectormath++.h             | 1288 +++++++++++++++++
 74 files changed, 2734 insertions(+), 803 deletions(-)
 delete mode 100644 libkram/kram/float4a.h
 rename libkram/{kram => vectormath}/float4a.cpp (65%)
 create mode 100644 libkram/vectormath/float4a.h
 rename libkram/{kram => vectormath}/sse2neon-arm64.h (100%)
 rename libkram/{kram => vectormath}/sse2neon.h (99%)
 create mode 100644 libkram/vectormath/sse_mathfun.h
 create mode 100644 libkram/vectormath/vectormath++.cpp
 create mode 100644 libkram/vectormath/vectormath++.h

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index 4fd41b91..850562eb 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -7,8 +7,20 @@
 	objects = {
 
 /* Begin PBXBuildFile section */
-		706706482C9B3BB30008F865 /* sse2neon-arm64.h in Headers */ = {isa = PBXBuildFile; fileRef = 706706472C9B3BB30008F865 /* sse2neon-arm64.h */; };
-		706706492C9B3BB30008F865 /* sse2neon-arm64.h in Headers */ = {isa = PBXBuildFile; fileRef = 706706472C9B3BB30008F865 /* sse2neon-arm64.h */; };
+		705F7F072C9FF42700E377B7 /* vectormath++.h in Headers */ = {isa = PBXBuildFile; fileRef = 705F7F022C9FF42700E377B7 /* vectormath++.h */; };
+		705F7F082C9FF42700E377B7 /* sse_mathfun.h in Headers */ = {isa = PBXBuildFile; fileRef = 705F7EFC2C9FF42700E377B7 /* sse_mathfun.h */; };
+		705F7F092C9FF42700E377B7 /* float4a.h in Headers */ = {isa = PBXBuildFile; fileRef = 705F7EFA2C9FF42700E377B7 /* float4a.h */; };
+		705F7F0B2C9FF42700E377B7 /* sse2neon.h in Headers */ = {isa = PBXBuildFile; fileRef = 705F7EFD2C9FF42700E377B7 /* sse2neon.h */; };
+		705F7F0C2C9FF42700E377B7 /* sse2neon-arm64.h in Headers */ = {isa = PBXBuildFile; fileRef = 705F7EFE2C9FF42700E377B7 /* sse2neon-arm64.h */; };
+		705F7F0D2C9FF42700E377B7 /* float4a.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 705F7EFB2C9FF42700E377B7 /* float4a.cpp */; };
+		705F7F0F2C9FF42700E377B7 /* vectormath++.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 705F7F032C9FF42700E377B7 /* vectormath++.cpp */; };
+		705F7F102C9FF42700E377B7 /* vectormath++.h in Headers */ = {isa = PBXBuildFile; fileRef = 705F7F022C9FF42700E377B7 /* vectormath++.h */; };
+		705F7F112C9FF42700E377B7 /* sse_mathfun.h in Headers */ = {isa = PBXBuildFile; fileRef = 705F7EFC2C9FF42700E377B7 /* sse_mathfun.h */; };
+		705F7F122C9FF42700E377B7 /* float4a.h in Headers */ = {isa = PBXBuildFile; fileRef = 705F7EFA2C9FF42700E377B7 /* float4a.h */; };
+		705F7F142C9FF42700E377B7 /* sse2neon.h in Headers */ = {isa = PBXBuildFile; fileRef = 705F7EFD2C9FF42700E377B7 /* sse2neon.h */; };
+		705F7F152C9FF42700E377B7 /* sse2neon-arm64.h in Headers */ = {isa = PBXBuildFile; fileRef = 705F7EFE2C9FF42700E377B7 /* sse2neon-arm64.h */; };
+		705F7F162C9FF42700E377B7 /* float4a.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 705F7EFB2C9FF42700E377B7 /* float4a.cpp */; };
+		705F7F182C9FF42700E377B7 /* vectormath++.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 705F7F032C9FF42700E377B7 /* vectormath++.cpp */; };
 		706EEF7F26D1595D001C950E /* EtcBlock4x4Encoding_RGB8.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDAA26D1583E001C950E /* EtcBlock4x4Encoding_RGB8.cpp */; };
 		706EEF8026D1595D001C950E /* EtcImage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDAC26D1583E001C950E /* EtcImage.cpp */; };
 		706EEF8126D1595D001C950E /* EtcDifferentialTrys.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDAF26D1583E001C950E /* EtcDifferentialTrys.cpp */; };
@@ -35,7 +47,6 @@
 		706EEFB226D1595D001C950E /* KramLog.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2826D1583F001C950E /* KramLog.cpp */; };
 		706EEFB326D1595D001C950E /* KramSDFMipper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2B26D1583F001C950E /* KramSDFMipper.cpp */; };
 		706EEFB426D1595D001C950E /* KramMmapHelper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2C26D1583F001C950E /* KramMmapHelper.cpp */; };
-		706EEFB526D1595D001C950E /* float4a.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2F26D1583F001C950E /* float4a.cpp */; settings = {COMPILER_FLAGS = "-mf16c"; }; };
 		706EEFB626D1595D001C950E /* Kram.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE3526D1583F001C950E /* Kram.cpp */; };
 		706EEFB726D1595D001C950E /* squish.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE3D26D1583F001C950E /* squish.cpp */; };
 		706EEFB826D1595D001C950E /* colourset.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE3E26D1583F001C950E /* colourset.cpp */; };
@@ -83,7 +94,6 @@
 		706EEFFE26D15985001C950E /* stb_rect_pack.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE1726D1583F001C950E /* stb_rect_pack.h */; };
 		706EEFFF26D15985001C950E /* KramZipHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE1926D1583F001C950E /* KramZipHelper.h */; };
 		706EF00026D15985001C950E /* KramSDFMipper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2026D1583F001C950E /* KramSDFMipper.h */; };
-		706EF00126D15985001C950E /* sse2neon.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2226D1583F001C950E /* sse2neon.h */; };
 		706EF00226D15985001C950E /* KramConfig.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2326D1583F001C950E /* KramConfig.h */; };
 		706EF00326D15985001C950E /* KramLog.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2426D1583F001C950E /* KramLog.h */; };
 		706EF00426D15985001C950E /* KramLib.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2726D1583F001C950E /* KramLib.h */; };
@@ -95,7 +105,6 @@
 		706EF00A26D15985001C950E /* KramImageInfo.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3126D1583F001C950E /* KramImageInfo.h */; };
 		706EF00B26D15985001C950E /* KramTimer.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3226D1583F001C950E /* KramTimer.h */; };
 		706EF00C26D15985001C950E /* KramMmapHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3326D1583F001C950E /* KramMmapHelper.h */; };
-		706EF00D26D15985001C950E /* float4a.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3426D1583F001C950E /* float4a.h */; };
 		706EF00E26D15985001C950E /* KramFileHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3626D1583F001C950E /* KramFileHelper.h */; };
 		706EF00F26D15985001C950E /* KramMipper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3726D1583F001C950E /* KramMipper.h */; };
 		706EF01026D15985001C950E /* TaskSystem.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3826D1583F001C950E /* TaskSystem.h */; };
@@ -145,7 +154,6 @@
 		706EF17826D166C5001C950E /* stb_rect_pack.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE1726D1583F001C950E /* stb_rect_pack.h */; };
 		706EF17926D166C5001C950E /* KramZipHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE1926D1583F001C950E /* KramZipHelper.h */; };
 		706EF17A26D166C5001C950E /* KramSDFMipper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2026D1583F001C950E /* KramSDFMipper.h */; };
-		706EF17B26D166C5001C950E /* sse2neon.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2226D1583F001C950E /* sse2neon.h */; };
 		706EF17C26D166C5001C950E /* KramConfig.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2326D1583F001C950E /* KramConfig.h */; };
 		706EF17D26D166C5001C950E /* KramLog.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2426D1583F001C950E /* KramLog.h */; };
 		706EF17E26D166C5001C950E /* KramLib.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2726D1583F001C950E /* KramLib.h */; };
@@ -157,7 +165,6 @@
 		706EF18426D166C5001C950E /* KramImageInfo.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3126D1583F001C950E /* KramImageInfo.h */; };
 		706EF18526D166C5001C950E /* KramTimer.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3226D1583F001C950E /* KramTimer.h */; };
 		706EF18626D166C5001C950E /* KramMmapHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3326D1583F001C950E /* KramMmapHelper.h */; };
-		706EF18726D166C5001C950E /* float4a.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3426D1583F001C950E /* float4a.h */; };
 		706EF18826D166C5001C950E /* KramFileHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3626D1583F001C950E /* KramFileHelper.h */; };
 		706EF18926D166C5001C950E /* KramMipper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3726D1583F001C950E /* KramMipper.h */; };
 		706EF18A26D166C5001C950E /* TaskSystem.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3826D1583F001C950E /* TaskSystem.h */; };
@@ -199,7 +206,6 @@
 		706EF1CA26D166C5001C950E /* KramLog.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2826D1583F001C950E /* KramLog.cpp */; };
 		706EF1CB26D166C5001C950E /* KramSDFMipper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2B26D1583F001C950E /* KramSDFMipper.cpp */; };
 		706EF1CC26D166C5001C950E /* KramMmapHelper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2C26D1583F001C950E /* KramMmapHelper.cpp */; };
-		706EF1CD26D166C5001C950E /* float4a.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2F26D1583F001C950E /* float4a.cpp */; };
 		706EF1CE26D166C5001C950E /* Kram.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE3526D1583F001C950E /* Kram.cpp */; };
 		706EF1CF26D166C5001C950E /* squish.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE3D26D1583F001C950E /* squish.cpp */; };
 		706EF1D026D166C5001C950E /* colourset.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE3E26D1583F001C950E /* colourset.cpp */; };
@@ -382,8 +388,6 @@
 		70B563A82C857B360089A64F /* KramZipStream.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B563A52C857B360089A64F /* KramZipStream.cpp */; };
 		70B563A92C857B360089A64F /* KramZipStream.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B563A62C857B360089A64F /* KramZipStream.h */; };
 		70B563AA2C857B360089A64F /* KramZipStream.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B563A62C857B360089A64F /* KramZipStream.h */; };
-		70C6398D289FB234006E7422 /* KramPrefix.pch in Headers */ = {isa = PBXBuildFile; fileRef = 70C6398C289FB234006E7422 /* KramPrefix.pch */; };
-		70C6398E289FB234006E7422 /* KramPrefix.pch in Headers */ = {isa = PBXBuildFile; fileRef = 70C6398C289FB234006E7422 /* KramPrefix.pch */; };
 		70CDB65027A1382700A546C1 /* KramDDSHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 70CDB64E27A1382600A546C1 /* KramDDSHelper.h */; };
 		70CDB65127A1382700A546C1 /* KramDDSHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 70CDB64E27A1382600A546C1 /* KramDDSHelper.h */; };
 		70CDB65227A1382700A546C1 /* KramDDSHelper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70CDB64F27A1382600A546C1 /* KramDDSHelper.cpp */; };
@@ -411,7 +415,13 @@
 /* End PBXBuildFile section */
 
 /* Begin PBXFileReference section */
-		706706472C9B3BB30008F865 /* sse2neon-arm64.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "sse2neon-arm64.h"; sourceTree = "<group>"; };
+		705F7EFA2C9FF42700E377B7 /* float4a.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = float4a.h; sourceTree = "<group>"; };
+		705F7EFB2C9FF42700E377B7 /* float4a.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = float4a.cpp; sourceTree = "<group>"; };
+		705F7EFC2C9FF42700E377B7 /* sse_mathfun.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = sse_mathfun.h; sourceTree = "<group>"; };
+		705F7EFD2C9FF42700E377B7 /* sse2neon.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = sse2neon.h; sourceTree = "<group>"; };
+		705F7EFE2C9FF42700E377B7 /* sse2neon-arm64.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "sse2neon-arm64.h"; sourceTree = "<group>"; };
+		705F7F022C9FF42700E377B7 /* vectormath++.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "vectormath++.h"; sourceTree = "<group>"; };
+		705F7F032C9FF42700E377B7 /* vectormath++.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = "vectormath++.cpp"; sourceTree = "<group>"; };
 		706ECDDE26D1577A001C950E /* libkram.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libkram.a; sourceTree = BUILT_PRODUCTS_DIR; };
 		706EEDAA26D1583E001C950E /* EtcBlock4x4Encoding_RGB8.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = EtcBlock4x4Encoding_RGB8.cpp; sourceTree = "<group>"; };
 		706EEDAB26D1583E001C950E /* EtcErrorMetric.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = EtcErrorMetric.h; sourceTree = "<group>"; };
@@ -479,7 +489,6 @@
 		706EEE1F26D1583F001C950E /* TaskSystem.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = TaskSystem.cpp; sourceTree = "<group>"; };
 		706EEE2026D1583F001C950E /* KramSDFMipper.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KramSDFMipper.h; sourceTree = "<group>"; };
 		706EEE2126D1583F001C950E /* KramFileHelper.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = KramFileHelper.cpp; sourceTree = "<group>"; };
-		706EEE2226D1583F001C950E /* sse2neon.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = sse2neon.h; sourceTree = "<group>"; };
 		706EEE2326D1583F001C950E /* KramConfig.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KramConfig.h; sourceTree = "<group>"; };
 		706EEE2426D1583F001C950E /* KramLog.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KramLog.h; sourceTree = "<group>"; };
 		706EEE2526D1583F001C950E /* KramImageInfo.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = KramImageInfo.cpp; sourceTree = "<group>"; };
@@ -492,12 +501,10 @@
 		706EEE2C26D1583F001C950E /* KramMmapHelper.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = KramMmapHelper.cpp; sourceTree = "<group>"; };
 		706EEE2D26D1583F001C950E /* win_mmap.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = win_mmap.h; sourceTree = "<group>"; };
 		706EEE2E26D1583F001C950E /* Kram.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = Kram.h; sourceTree = "<group>"; };
-		706EEE2F26D1583F001C950E /* float4a.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = float4a.cpp; sourceTree = "<group>"; };
 		706EEE3026D1583F001C950E /* KTXImage.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KTXImage.h; sourceTree = "<group>"; };
 		706EEE3126D1583F001C950E /* KramImageInfo.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KramImageInfo.h; sourceTree = "<group>"; };
 		706EEE3226D1583F001C950E /* KramTimer.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KramTimer.h; sourceTree = "<group>"; };
 		706EEE3326D1583F001C950E /* KramMmapHelper.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KramMmapHelper.h; sourceTree = "<group>"; };
-		706EEE3426D1583F001C950E /* float4a.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = float4a.h; sourceTree = "<group>"; };
 		706EEE3526D1583F001C950E /* Kram.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = Kram.cpp; sourceTree = "<group>"; };
 		706EEE3626D1583F001C950E /* KramFileHelper.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KramFileHelper.h; sourceTree = "<group>"; };
 		706EEE3726D1583F001C950E /* KramMipper.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KramMipper.h; sourceTree = "<group>"; };
@@ -745,7 +752,6 @@
 		70A7BD2F27092A1200DBCCF7 /* hdr_encode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = hdr_encode.h; sourceTree = "<group>"; };
 		70B563A52C857B360089A64F /* KramZipStream.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = KramZipStream.cpp; sourceTree = "<group>"; };
 		70B563A62C857B360089A64F /* KramZipStream.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = KramZipStream.h; sourceTree = "<group>"; };
-		70C6398C289FB234006E7422 /* KramPrefix.pch */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = KramPrefix.pch; sourceTree = "<group>"; };
 		70CDB64E27A1382600A546C1 /* KramDDSHelper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = KramDDSHelper.h; sourceTree = "<group>"; };
 		70CDB64F27A1382600A546C1 /* KramDDSHelper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = KramDDSHelper.cpp; sourceTree = "<group>"; };
 		70D222D62AC800AC00B9EA23 /* json11.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = json11.h; sourceTree = "<group>"; };
@@ -780,6 +786,20 @@
 /* End PBXFrameworksBuildPhase section */
 
 /* Begin PBXGroup section */
+		705F7F042C9FF42700E377B7 /* vectormath */ = {
+			isa = PBXGroup;
+			children = (
+				705F7EFA2C9FF42700E377B7 /* float4a.h */,
+				705F7EFB2C9FF42700E377B7 /* float4a.cpp */,
+				705F7EFC2C9FF42700E377B7 /* sse_mathfun.h */,
+				705F7EFD2C9FF42700E377B7 /* sse2neon.h */,
+				705F7EFE2C9FF42700E377B7 /* sse2neon-arm64.h */,
+				705F7F022C9FF42700E377B7 /* vectormath++.h */,
+				705F7F032C9FF42700E377B7 /* vectormath++.cpp */,
+			);
+			path = vectormath;
+			sourceTree = "<group>";
+		};
 		706ECDD526D1577A001C950E = {
 			isa = PBXGroup;
 			children = (
@@ -820,6 +840,7 @@
 				706EEE4F26D1583F001C950E /* zstd */,
 				706EEE5326D1583F001C950E /* lodepng */,
 				706EEE5726D1583F001C950E /* tmpfileplus */,
+				705F7F042C9FF42700E377B7 /* vectormath */,
 			);
 			name = libkram;
 			path = ../libkram;
@@ -1015,7 +1036,6 @@
 				706EEE1A26D1583F001C950E /* KramTimer.cpp */,
 				706EEE3326D1583F001C950E /* KramMmapHelper.h */,
 				706EEE2C26D1583F001C950E /* KramMmapHelper.cpp */,
-				70C6398C289FB234006E7422 /* KramPrefix.pch */,
 				706EEE2E26D1583F001C950E /* Kram.h */,
 				706EEE3526D1583F001C950E /* Kram.cpp */,
 				706EEE3626D1583F001C950E /* KramFileHelper.h */,
@@ -1023,10 +1043,6 @@
 				706EEE3726D1583F001C950E /* KramMipper.h */,
 				706EEE1C26D1583F001C950E /* KramMipper.cpp */,
 				706EEE2D26D1583F001C950E /* win_mmap.h */,
-				706EEE2226D1583F001C950E /* sse2neon.h */,
-				706706472C9B3BB30008F865 /* sse2neon-arm64.h */,
-				706EEE3426D1583F001C950E /* float4a.h */,
-				706EEE2F26D1583F001C950E /* float4a.cpp */,
 				70D222E32AD22BED00B9EA23 /* BlockedLinearAllocator.h */,
 				70D222E22AD22BED00B9EA23 /* BlockedLinearAllocator.cpp */,
 				70D222DD2AD2132300B9EA23 /* ImmutableString.h */,
@@ -1383,7 +1399,6 @@
 			files = (
 				706EEFD126D15984001C950E /* EtcErrorMetric.h in Headers */,
 				706EEFD226D15984001C950E /* EtcColor.h in Headers */,
-				70C6398D289FB234006E7422 /* KramPrefix.pch in Headers */,
 				709B8D3D28D7BCAD0081BD1F /* chrono.h in Headers */,
 				706EEFD326D15984001C950E /* EtcDifferentialTrys.h in Headers */,
 				706EEFD426D15984001C950E /* EtcBlock4x4Encoding_RGB8.h in Headers */,
@@ -1391,7 +1406,6 @@
 				70871DC927DDDBCD00D0B9E1 /* astcenc_vecmathlib_common_4.h in Headers */,
 				706EEFD626D15984001C950E /* EtcBlock4x4Encoding_R11.h in Headers */,
 				706EEFD726D15984001C950E /* EtcBlock4x4Encoding_RG11.h in Headers */,
-				706706492C9B3BB30008F865 /* sse2neon-arm64.h in Headers */,
 				706EEFD826D15984001C950E /* EtcMath.h in Headers */,
 				706EEFD926D15984001C950E /* EtcIndividualTrys.h in Headers */,
 				706EEFDA26D15984001C950E /* EtcBlock4x4EncodingBits.h in Headers */,
@@ -1406,6 +1420,11 @@
 				709B8D4328D7BCAD0081BD1F /* args.h in Headers */,
 				708A6A9C2708CE4700BA5410 /* bc6h_encode.h in Headers */,
 				706EEFDF26D15984001C950E /* EtcBlock4x4Encoding_ETC1.h in Headers */,
+				705F7F102C9FF42700E377B7 /* vectormath++.h in Headers */,
+				705F7F112C9FF42700E377B7 /* sse_mathfun.h in Headers */,
+				705F7F122C9FF42700E377B7 /* float4a.h in Headers */,
+				705F7F142C9FF42700E377B7 /* sse2neon.h in Headers */,
+				705F7F152C9FF42700E377B7 /* sse2neon-arm64.h in Headers */,
 				706EEFE026D15984001C950E /* EtcBlock4x4Encoding_RGBA8.h in Headers */,
 				706EEFE126D15984001C950E /* EtcColorFloatRGBA.h in Headers */,
 				706EEFE226D15984001C950E /* EtcBlock4x4Encoding.h in Headers */,
@@ -1433,7 +1452,6 @@
 				706EEFFE26D15985001C950E /* stb_rect_pack.h in Headers */,
 				706EEFFF26D15985001C950E /* KramZipHelper.h in Headers */,
 				706EF00026D15985001C950E /* KramSDFMipper.h in Headers */,
-				706EF00126D15985001C950E /* sse2neon.h in Headers */,
 				70D222E62AD22BED00B9EA23 /* BlockedLinearAllocator.h in Headers */,
 				70B563A92C857B360089A64F /* KramZipStream.h in Headers */,
 				70871DF127DDDBCD00D0B9E1 /* astcenc_mathlib.h in Headers */,
@@ -1458,7 +1476,6 @@
 				70871DF727DDDBCD00D0B9E1 /* astcenc_vecmathlib_neon_4.h in Headers */,
 				706EF00B26D15985001C950E /* KramTimer.h in Headers */,
 				706EF00C26D15985001C950E /* KramMmapHelper.h in Headers */,
-				706EF00D26D15985001C950E /* float4a.h in Headers */,
 				706EF00E26D15985001C950E /* KramFileHelper.h in Headers */,
 				709B8D3F28D7BCAD0081BD1F /* os.h in Headers */,
 				706EF00F26D15985001C950E /* KramMipper.h in Headers */,
@@ -1497,7 +1514,6 @@
 			files = (
 				706EF14B26D166C5001C950E /* EtcErrorMetric.h in Headers */,
 				706EF14C26D166C5001C950E /* EtcColor.h in Headers */,
-				70C6398E289FB234006E7422 /* KramPrefix.pch in Headers */,
 				709B8D3E28D7BCAD0081BD1F /* chrono.h in Headers */,
 				706EF14D26D166C5001C950E /* EtcDifferentialTrys.h in Headers */,
 				706EF14E26D166C5001C950E /* EtcBlock4x4Encoding_RGB8.h in Headers */,
@@ -1505,7 +1521,6 @@
 				70871DCA27DDDBCD00D0B9E1 /* astcenc_vecmathlib_common_4.h in Headers */,
 				706EF15026D166C5001C950E /* EtcBlock4x4Encoding_R11.h in Headers */,
 				706EF15126D166C5001C950E /* EtcBlock4x4Encoding_RG11.h in Headers */,
-				706706482C9B3BB30008F865 /* sse2neon-arm64.h in Headers */,
 				706EF15226D166C5001C950E /* EtcMath.h in Headers */,
 				706EF15326D166C5001C950E /* EtcIndividualTrys.h in Headers */,
 				706EF15426D166C5001C950E /* EtcBlock4x4EncodingBits.h in Headers */,
@@ -1520,6 +1535,11 @@
 				709B8D4428D7BCAD0081BD1F /* args.h in Headers */,
 				708A6A9D2708CE4700BA5410 /* bc6h_encode.h in Headers */,
 				706EF15926D166C5001C950E /* EtcBlock4x4Encoding_ETC1.h in Headers */,
+				705F7F072C9FF42700E377B7 /* vectormath++.h in Headers */,
+				705F7F082C9FF42700E377B7 /* sse_mathfun.h in Headers */,
+				705F7F092C9FF42700E377B7 /* float4a.h in Headers */,
+				705F7F0B2C9FF42700E377B7 /* sse2neon.h in Headers */,
+				705F7F0C2C9FF42700E377B7 /* sse2neon-arm64.h in Headers */,
 				706EF15A26D166C5001C950E /* EtcBlock4x4Encoding_RGBA8.h in Headers */,
 				706EF15B26D166C5001C950E /* EtcColorFloatRGBA.h in Headers */,
 				706EF15C26D166C5001C950E /* EtcBlock4x4Encoding.h in Headers */,
@@ -1547,7 +1567,6 @@
 				706EF17826D166C5001C950E /* stb_rect_pack.h in Headers */,
 				706EF17926D166C5001C950E /* KramZipHelper.h in Headers */,
 				706EF17A26D166C5001C950E /* KramSDFMipper.h in Headers */,
-				706EF17B26D166C5001C950E /* sse2neon.h in Headers */,
 				70D222E72AD22BED00B9EA23 /* BlockedLinearAllocator.h in Headers */,
 				70B563AA2C857B360089A64F /* KramZipStream.h in Headers */,
 				70871DF227DDDBCD00D0B9E1 /* astcenc_mathlib.h in Headers */,
@@ -1572,7 +1591,6 @@
 				70871DF827DDDBCD00D0B9E1 /* astcenc_vecmathlib_neon_4.h in Headers */,
 				706EF18526D166C5001C950E /* KramTimer.h in Headers */,
 				706EF18626D166C5001C950E /* KramMmapHelper.h in Headers */,
-				706EF18726D166C5001C950E /* float4a.h in Headers */,
 				706EF18826D166C5001C950E /* KramFileHelper.h in Headers */,
 				709B8D4028D7BCAD0081BD1F /* os.h in Headers */,
 				706EF18926D166C5001C950E /* KramMipper.h in Headers */,
@@ -1682,7 +1700,6 @@
 			buildActionMask = 2147483647;
 			files = (
 				70871DE127DDDBCD00D0B9E1 /* astcenc_mathlib.cpp in Sources */,
-				706EEFB526D1595D001C950E /* float4a.cpp in Sources */,
 				70871DD727DDDBCD00D0B9E1 /* astcenc_quantization.cpp in Sources */,
 				70D222F52ADAF78300B9EA23 /* dlmalloc.cpp in Sources */,
 				707789E52881BA81008A51BC /* ert.cpp in Sources */,
@@ -1773,6 +1790,8 @@
 				707789E12881BA81008A51BC /* utils.cpp in Sources */,
 				706EEFC526D1595E001C950E /* tmpfileplus.cpp in Sources */,
 				70871E0127DDDBCD00D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp in Sources */,
+				705F7F162C9FF42700E377B7 /* float4a.cpp in Sources */,
+				705F7F182C9FF42700E377B7 /* vectormath++.cpp in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@@ -1844,7 +1863,6 @@
 				706EF1CC26D166C5001C950E /* KramMmapHelper.cpp in Sources */,
 				70D222DF2AD2132300B9EA23 /* ImmutableString.cpp in Sources */,
 				70871DCC27DDDBCD00D0B9E1 /* astcenc_image.cpp in Sources */,
-				706EF1CD26D166C5001C950E /* float4a.cpp in Sources */,
 				706EFF7426D34740001C950E /* thread_support.cpp in Sources */,
 				706EF1CE26D166C5001C950E /* Kram.cpp in Sources */,
 				706EF1CF26D166C5001C950E /* squish.cpp in Sources */,
@@ -1859,6 +1877,8 @@
 				706EF1D426D166C5001C950E /* colourblock.cpp in Sources */,
 				706EF1D526D166C5001C950E /* colourfit.cpp in Sources */,
 				70871E0027DDDBCD00D0B9E1 /* astcenc_pick_best_endpoint_format.cpp in Sources */,
+				705F7F0D2C9FF42700E377B7 /* float4a.cpp in Sources */,
+				705F7F0F2C9FF42700E377B7 /* vectormath++.cpp in Sources */,
 				70871E0A27DDDBCD00D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp in Sources */,
 				70871DD027DDDBCD00D0B9E1 /* astcenc_symbolic_physical.cpp in Sources */,
 				70B563A82C857B360089A64F /* KramZipStream.cpp in Sources */,
@@ -1953,6 +1973,7 @@
 				HEADER_SEARCH_PATHS = (
 					"$(PROJECT_DIR)/../libkram/eastl/include",
 					"$(PROJECT_DIR)/../libkram/kram",
+					"$(PROJECT_DIR)/../libkram/vectormath",
 				);
 				IPHONEOS_DEPLOYMENT_TARGET = 16.0;
 				MACOSX_DEPLOYMENT_TARGET = 13.0;
@@ -2044,6 +2065,7 @@
 				HEADER_SEARCH_PATHS = (
 					"$(PROJECT_DIR)/../libkram/eastl/include",
 					"$(PROJECT_DIR)/../libkram/kram",
+					"$(PROJECT_DIR)/../libkram/vectormath",
 				);
 				IPHONEOS_DEPLOYMENT_TARGET = 16.0;
 				MACOSX_DEPLOYMENT_TARGET = 13.0;
diff --git a/build2/kramc.xcodeproj/project.pbxproj b/build2/kramc.xcodeproj/project.pbxproj
index abcdfdb3..b49ae76e 100644
--- a/build2/kramc.xcodeproj/project.pbxproj
+++ b/build2/kramc.xcodeproj/project.pbxproj
@@ -294,7 +294,10 @@
 				GCC_WARN_NON_VIRTUAL_DESTRUCTOR = YES;
 				GCC_WARN_SHADOW = YES;
 				GCC_WARN_STRICT_SELECTOR_MATCH = YES;
-				HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram";
+				HEADER_SEARCH_PATHS = (
+					"$(PROJECT_DIR)/../libkram/kram",
+					"$(PROJECT_DIR)/../libkram/vectormath",
+				);
 				PRODUCT_BUNDLE_IDENTIFIER = com.hialec.kramc;
 				PRODUCT_NAME = kram;
 			};
@@ -316,7 +319,10 @@
 				GCC_WARN_NON_VIRTUAL_DESTRUCTOR = YES;
 				GCC_WARN_SHADOW = YES;
 				GCC_WARN_STRICT_SELECTOR_MATCH = YES;
-				HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram";
+				HEADER_SEARCH_PATHS = (
+					"$(PROJECT_DIR)/../libkram/kram",
+					"$(PROJECT_DIR)/../libkram/vectormath",
+				);
 				PRODUCT_BUNDLE_IDENTIFIER = com.hialec.kramc;
 				PRODUCT_NAME = kram;
 			};
diff --git a/build2/kramv.xcodeproj/project.pbxproj b/build2/kramv.xcodeproj/project.pbxproj
index 35a94d2a..fb7d5bb3 100644
--- a/build2/kramv.xcodeproj/project.pbxproj
+++ b/build2/kramv.xcodeproj/project.pbxproj
@@ -575,6 +575,7 @@
 					"$(PROJECT_DIR)/../libkram/kram",
 					"$(PROJECT_DIR)/../libkram",
 					"$(PROJECT_DIR)/../libkram/eastl/include",
+					"$(PROJECT_DIR)/../libkram/vectormath",
 				);
 				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
@@ -648,6 +649,7 @@
 					"$(PROJECT_DIR)/../libkram/kram",
 					"$(PROJECT_DIR)/../libkram",
 					"$(PROJECT_DIR)/../libkram/eastl/include",
+					"$(PROJECT_DIR)/../libkram/vectormath",
 				);
 				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				MTL_ENABLE_DEBUG_INFO = NO;
diff --git a/gtlf/GLTF/Headers/Extensions/GLTFKHRLight.h b/gtlf/GLTF/Headers/Extensions/GLTFKHRLight.h
index 5044f4e4..2806b2bd 100644
--- a/gtlf/GLTF/Headers/Extensions/GLTFKHRLight.h
+++ b/gtlf/GLTF/Headers/Extensions/GLTFKHRLight.h
@@ -15,6 +15,7 @@
 //
 
 #import <GLTF/GLTFObject.h>
+#import <simd/vector_types.h>
 
 //@import simd;
 
diff --git a/gtlf/GLTF/Headers/GLTFCamera.h b/gtlf/GLTF/Headers/GLTFCamera.h
index 348a44ca..b88e7ace 100644
--- a/gtlf/GLTF/Headers/GLTFCamera.h
+++ b/gtlf/GLTF/Headers/GLTFCamera.h
@@ -19,6 +19,8 @@
 
 //@import simd;
 
+#import <simd/matrix_types.h>
+
 NS_ASSUME_NONNULL_BEGIN
 
 @class GLTFNode;
diff --git a/gtlf/GLTF/Headers/GLTFEnums.h b/gtlf/GLTF/Headers/GLTFEnums.h
index 25a8d832..cee4a71a 100644
--- a/gtlf/GLTF/Headers/GLTFEnums.h
+++ b/gtlf/GLTF/Headers/GLTFEnums.h
@@ -16,7 +16,7 @@
 
 
 #import <Foundation/Foundation.h>
-#import <simd/simd.h>
+//#import <simd/simd.h>
 //@import Foundation;
 
 typedef NS_ENUM(NSInteger, GLTFDataType) {
diff --git a/gtlf/GLTF/Headers/GLTFObject.h b/gtlf/GLTF/Headers/GLTFObject.h
index 9d5c7654..7a9eef01 100644
--- a/gtlf/GLTF/Headers/GLTFObject.h
+++ b/gtlf/GLTF/Headers/GLTFObject.h
@@ -15,7 +15,7 @@
 //
 
 #import <Foundation/Foundation.h>
-#import <simd/simd.h>
+//#import <simd/simd.h>
 //@import Foundation;
 
 NS_ASSUME_NONNULL_BEGIN
diff --git a/gtlf/GLTF/Headers/GLTFTexture.h b/gtlf/GLTF/Headers/GLTFTexture.h
index 5cf0357c..b3062500 100644
--- a/gtlf/GLTF/Headers/GLTFTexture.h
+++ b/gtlf/GLTF/Headers/GLTFTexture.h
@@ -16,6 +16,7 @@
 
 #import <GLTF/GLTFObject.h>
 #import <GLTF/GLTFEnums.h>
+#import <simd/matrix_types.h>
 
 //@import simd;
 
diff --git a/gtlf/GLTF/Headers/GLTFUtilities.h b/gtlf/GLTF/Headers/GLTFUtilities.h
index 88d65925..b41ce931 100644
--- a/gtlf/GLTF/Headers/GLTFUtilities.h
+++ b/gtlf/GLTF/Headers/GLTFUtilities.h
@@ -16,6 +16,8 @@
 
 #import <GLTF/GLTFEnums.h>
 
+#import <simd/matrix.h>
+
 //@import Foundation;
 //@import simd;
 
diff --git a/gtlf/GLTF/Source/GLTFAnimation.m b/gtlf/GLTF/Source/GLTFAnimation.m
index 24ccd08f..46d153de 100644
--- a/gtlf/GLTF/Source/GLTFAnimation.m
+++ b/gtlf/GLTF/Source/GLTFAnimation.m
@@ -19,6 +19,7 @@
 #import "GLTFBufferView.h"
 #import "GLTFBuffer.h"
 #import "GLTFNode.h"
+#import <simd/quaternion.h>
 
 @implementation GLTFAnimationSampler
 
diff --git a/gtlf/GLTF/Source/GLTFCamera.m b/gtlf/GLTF/Source/GLTFCamera.m
index fd2d9c27..4b8f09c1 100644
--- a/gtlf/GLTF/Source/GLTFCamera.m
+++ b/gtlf/GLTF/Source/GLTFCamera.m
@@ -15,6 +15,7 @@
 //
 
 #import "GLTFCamera.h"
+#import <simd/simd.h>
 
 @interface GLTFCamera ()
 @property (nonatomic, assign, getter=projectionMatrixIsDirty) BOOL projectionMatrixDirty;
diff --git a/gtlf/GLTF/Source/GLTFNode.m b/gtlf/GLTF/Source/GLTFNode.m
index 52dd177f..6459d24a 100644
--- a/gtlf/GLTF/Source/GLTFNode.m
+++ b/gtlf/GLTF/Source/GLTFNode.m
@@ -18,6 +18,7 @@
 #import "GLTFAccessor.h"
 #import "GLTFMesh.h"
 #import "GLTFVertexDescriptor.h"
+#import <simd/simd.h>
 
 @interface GLTFNode ()
 @property (nonatomic, assign, getter=localTransformIsDirty) BOOL localTransformDirty;
diff --git a/gtlf/GLTF/Source/GLTFUtilities.m b/gtlf/GLTF/Source/GLTFUtilities.m
index a21bad4b..aa00c8c9 100644
--- a/gtlf/GLTF/Source/GLTFUtilities.m
+++ b/gtlf/GLTF/Source/GLTFUtilities.m
@@ -16,6 +16,8 @@
 
 #import "GLTFUtilities.h"
 
+#import <simd/simd.h>
+
 bool GLTFBoundingBoxIsEmpty(GLTFBoundingBox b) {
     return (b.minPoint.x == b.maxPoint.x) && (b.minPoint.y == b.maxPoint.y) && (b.minPoint.z == b.maxPoint.z);
 }
diff --git a/gtlf/GLTFMTL/Headers/GLTFMTLBufferAllocator.h b/gtlf/GLTFMTL/Headers/GLTFMTLBufferAllocator.h
index acd40e02..17b7f929 100644
--- a/gtlf/GLTFMTL/Headers/GLTFMTLBufferAllocator.h
+++ b/gtlf/GLTFMTL/Headers/GLTFMTLBufferAllocator.h
@@ -16,7 +16,7 @@
 
 #import <GLTF/GLTF.h>
 #import <Foundation/Foundation.h>
-#import <simd/simd.h>
+//#import <simd/simd.h>
 #import <Metal/Metal.h>
 
 //@import Foundation;
diff --git a/gtlf/GLTFMTL/Headers/GLTFMTLLightingEnvironment.h b/gtlf/GLTFMTL/Headers/GLTFMTLLightingEnvironment.h
index 95a99c44..6aefd850 100644
--- a/gtlf/GLTFMTL/Headers/GLTFMTLLightingEnvironment.h
+++ b/gtlf/GLTFMTL/Headers/GLTFMTLLightingEnvironment.h
@@ -16,7 +16,7 @@
 
 #import <GLTF/GLTF.h>
 #import <Foundation/Foundation.h>
-#import <simd/simd.h>
+//#import <simd/simd.h>
 #import <Metal/Metal.h>
 
 //@import Foundation;
diff --git a/gtlf/GLTFMTL/Headers/GLTFMTLTextureLoader.h b/gtlf/GLTFMTL/Headers/GLTFMTLTextureLoader.h
index afc00fdd..aad79244 100644
--- a/gtlf/GLTFMTL/Headers/GLTFMTLTextureLoader.h
+++ b/gtlf/GLTFMTL/Headers/GLTFMTLTextureLoader.h
@@ -16,7 +16,7 @@
 
 #import <GLTF/GLTF.h>
 #import <Foundation/Foundation.h>
-#import <simd/simd.h>
+//#import <simd/simd.h>
 #import <Metal/Metal.h>
 
 //@import Metal;
diff --git a/gtlf/GLTFMTL/Headers/GLTFMTLUtilities.h b/gtlf/GLTFMTL/Headers/GLTFMTLUtilities.h
index bc0fa5f8..0f183002 100644
--- a/gtlf/GLTFMTL/Headers/GLTFMTLUtilities.h
+++ b/gtlf/GLTFMTL/Headers/GLTFMTLUtilities.h
@@ -16,7 +16,7 @@
 
 #import <GLTF/GLTF.h>
 #import <Foundation/Foundation.h>
-#import <simd/simd.h>
+//#import <simd/simd.h>
 #import <Metal/Metal.h>
 
 
diff --git a/kram-thumb/KramThumbnailProvider.mm b/kram-thumb/KramThumbnailProvider.mm
index 25ca813a..aa00187c 100644
--- a/kram-thumb/KramThumbnailProvider.mm
+++ b/kram-thumb/KramThumbnailProvider.mm
@@ -10,7 +10,7 @@
 #import <Accelerate/Accelerate.h> // for vImage
 
 using namespace kram;
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 
 @implementation KramThumbnailProvider
 
@@ -80,8 +80,8 @@ - (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest *)request complet
             return;
         }
         
-        imageWidth = NAMESPACE_STL::max(1U, image.width);
-        imageHeight = NAMESPACE_STL::max(1U, image.height);
+        imageWidth = STL_NAMESPACE::max(1U, image.width);
+        imageHeight = STL_NAMESPACE::max(1U, image.height);
     }
 
     // This is retina factor
diff --git a/kramv/KramLoader.h b/kramv/KramLoader.h
index 3e61ccfb..6ffcc49a 100644
--- a/kramv/KramLoader.h
+++ b/kramv/KramLoader.h
@@ -69,7 +69,7 @@ class KTXImageData;
 //#include <string>
 
 namespace kram {
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 
 // provide access to lowercase strings
 string toLower(const string &text);
diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index d7130f9b..63747ac2 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -11,7 +11,7 @@
 #include "KramLib.h"
 
 using namespace kram;
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 using namespace SIMD_NAMESPACE;
 
 using mymutex = std::recursive_mutex;
diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 43e15ff1..a5a6211a 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -1675,7 +1675,7 @@ - (void)setFramePacingEnabled:(bool)enable {
 static GLTFBoundingSphere GLTFBoundingSphereFromBox2(const GLTFBoundingBox b) {
     GLTFBoundingSphere s;
     float3 center = 0.5f * (b.minPoint + b.maxPoint);
-    float r = simd::distance(b.maxPoint, center);
+    float r = distance(b.maxPoint, center);
     
     s.center = center;
     s.radius = r;
@@ -1792,7 +1792,12 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
             GLTFBoundingSphere bounds = GLTFBoundingSphereFromBox2(_asset.defaultScene.approximateBounds);
             float invScale = (bounds.radius > 0) ? (0.5 / (bounds.radius)) : 1.0;
             float4x4 centerScale = float4x4(float4m(invScale,invScale,invScale,1));
+            
+#if USE_SIMDLIB
+            float4x4 centerTranslation = float4x4::identity();
+#else
             float4x4 centerTranslation = matrix_identity_float4x4;
+#endif
             centerTranslation.columns[3] = vector4(-bounds.center, 1.0f);
             float4x4 regularizationMatrix = centerScale * centerTranslation;
     
@@ -1806,8 +1811,9 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
             // do not modify viewMatrix here since that messes with world space.
     
             // set the view and projection matrix
-            _gltfRenderer.viewMatrix = _data->_viewMatrix * regularizationMatrix;
-            _gltfRenderer.projectionMatrix = _data->_projectionMatrix;
+            float4x4 m = _data->_viewMatrix * regularizationMatrix;
+            _gltfRenderer.viewMatrix = reinterpret_cast<const simd_float4x4&>(m);
+            _gltfRenderer.projectionMatrix = reinterpret_cast<const simd_float4x4&>(_data->_projectionMatrix);
     
             RenderScope drawModelScope( renderEncoder, "DrawModel" );
             [_gltfRenderer renderScene:_asset.defaultScene commandBuffer:commandBuffer commandEncoder:renderEncoder];
@@ -2320,9 +2326,10 @@ - (void)drawSample
         };
 
         if (isDrawableBlit) {
-            kram::half4 data16f;
+            half4 data16f;
             [texture getBytes:&data16f bytesPerRow:8 fromRegion:region mipmapLevel:0];
-            data = toFloat4(data16f);
+            
+            data = float4m(data16f);
         }
         else {
             [texture getBytes:&data bytesPerRow:16 fromRegion:region mipmapLevel:0];
diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index 53c3f872..850a6e5f 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -14,7 +14,7 @@
 
 namespace kram {
 using namespace SIMD_NAMESPACE;
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 
 #define ArrayCount(x) (sizeof(x) / sizeof(x[0]))
 
@@ -63,7 +63,7 @@ float4 inverseScaleSquared(const float4x4 &m)
     
     // don't divide by 0
     float3 invScaleSquared =
-    recip(simd::max(float3m(0.0001 * 0.0001), scaleSquared));
+    recip(SIMD_NAMESPACE::max(float3m(0.0001 * 0.0001), scaleSquared));
     
     // identify determinant here for flipping orientation
     // all shapes with negative determinant need orientation flipped for
@@ -462,7 +462,7 @@ void ShowSettings::updateUVPreviewState()
                     uvPreview -= uvPreviewStep;
             }
             
-            uvPreview = saturate(uvPreview);
+            uvPreview = std::clamp(uvPreview, 0.0f, 1.0f);
         }
     }
     else {
@@ -575,9 +575,9 @@ float4x4 matrix4x4_translation(float tx, float ty, float tz)
     return m;
 }
 
-float4x4 matrix4x4_rotation(float radians, vector_float3 axis)
+float4x4 matrix4x4_rotation(float radians, float3 axis)
 {
-    axis = vector_normalize(axis);
+    axis = normalize(axis);
     float ct = cosf(radians);
     float st = sinf(radians);
     float ci = 1 - ct;
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index b6357f40..41b85031 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -12,7 +12,7 @@
 
 namespace kram {
 
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 using namespace SIMD_NAMESPACE;
 
 enum TextureChannels {
@@ -303,7 +303,7 @@ float4x4 perspective_rhs(float fovyRadians, float aspect, float nearZ, float
 float4x4 orthographic_rhs(float width, float height, float nearZ, float farZ,
                           bool isReverseZ);
 
-float4x4 matrix4x4_rotation(float radians, vector_float3 axis);
+float4x4 matrix4x4_rotation(float radians, float3 axis);
 
 void printChannels(string &tmp, const string &label, float4 c,
                    int32_t numChannels, bool isFloat, bool isSigned);
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 9c414405..638d2863 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -41,7 +41,7 @@
 
 using namespace SIMD_NAMESPACE;
 using namespace kram;
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 
 
 // ktx, ktx2, png, and dds for images
@@ -995,7 +995,7 @@ -(void)updateZoom:(float)zoom
 
     // see that rectangle intersects the view, view is -1 to 1
     // this handles inversion
-    float2 ptOrigin = simd::min(pt0.xy, pt1.xy);
+    float2 ptOrigin = SIMD_NAMESPACE::min(pt0.xy, pt1.xy);
     float2 ptSize = abs(pt0.xy - pt1.xy);
 
     float4 imageRect = float4m(ptOrigin.x, ptOrigin.y, ptSize.x, ptSize.y);
@@ -1319,7 +1319,7 @@ - (void)updatePan:(float)panX panY:(float)panY
     pt0.xyz /= pt0.w;
     pt1.xyz /= pt1.w;
 
-    float2 ptOrigin = simd::min(pt0.xy, pt1.xy);
+    float2 ptOrigin = SIMD_NAMESPACE::min(pt0.xy, pt1.xy);
     float2 ptSize = abs(pt0.xy - pt1.xy);
 
     // see that rectangle intersects the view, view is -1 to 1
diff --git a/kramv/Shaders/KramShaders.h b/kramv/Shaders/KramShaders.h
index 3566f1f6..e269a36f 100644
--- a/kramv/Shaders/KramShaders.h
+++ b/kramv/Shaders/KramShaders.h
@@ -8,9 +8,10 @@
 #ifndef __METAL_VERSION__
 #import <Foundation/Foundation.h>
 #else
+#define SIMD_NAMESPACE simd
+#import <simd/simd.h>
 #include <metal_stdlib>
 #endif
-#import <simd/simd.h>
 
 #ifdef __METAL_VERSION__
 #define NS_ENUM(_type, _name) \
@@ -111,10 +112,10 @@ typedef NS_ENUM(int32_t, ShaderLightingMode) {
 // TODO: placement of these elements in the struct breaks transfer
 // of data. This seems to work.  Alignment issues with mixing these differently.
 struct Uniforms {
-    simd::float4x4 projectionViewMatrix;
-    simd::float4x4 modelMatrix;
-    simd::float4 modelMatrixInvScale2;  // to supply inverse, w is determinant
-    simd::float3 cameraPosition;        // world-space
+    SIMD_NAMESPACE::float4x4 projectionViewMatrix;
+    SIMD_NAMESPACE::float4x4 modelMatrix;
+    SIMD_NAMESPACE::float4 modelMatrixInvScale2;  // to supply inverse, w is determinant
+    SIMD_NAMESPACE::float3 cameraPosition;        // world-space
     float uvPreview;
     float uvToShapeRatio;
     
@@ -176,14 +177,14 @@ struct UniformsLevel {
     uint32_t mipLOD;
     uint32_t face;
     uint32_t arrayOrSlice;
-    simd::float2 drawOffset;   // pixel offset to apply
-    simd::float4 textureSize;  // width, height, 1/width, 1/height
+    SIMD_NAMESPACE::float2 drawOffset;   // pixel offset to apply
+    SIMD_NAMESPACE::float4 textureSize;  // width, height, 1/width, 1/height
     uint32_t passNumber; // switch to enum
 };
 
 // This is all tied to a single level sample
 struct UniformsCS {
-    simd::uint2 uv;
+    SIMD_NAMESPACE::int2 uv;
 
     uint32_t arrayOrSlice;
     uint32_t face;
@@ -191,7 +192,7 @@ struct UniformsCS {
 };
 
 struct UniformsDebug {
-    simd::float4 rect;
+    SIMD_NAMESPACE::float4 rect;
 };
     
 #endif
diff --git a/kramv/Shaders/KramShaders.metal b/kramv/Shaders/KramShaders.metal
index 79583a81..bc435a44 100644
--- a/kramv/Shaders/KramShaders.metal
+++ b/kramv/Shaders/KramShaders.metal
@@ -1629,11 +1629,11 @@ kernel void SampleImageCS(
 {
     // the for-loop is replaced with a collection of threads, each of which
     // calls this function.
-    uint2 uv = uniforms.uv; // tie into texture lookup
+    int2 uv = uniforms.uv; // tie into texture lookup
     // uv >>= uniforms.mipLOD;
     
     // the color is returned to linear rgba32f
-    float4 color = colorMap.read(uv, uniforms.mipLOD);
+    float4 color = colorMap.read((uint2)uv, uniforms.mipLOD);
     result.write(color, index);
 }
 
@@ -1646,13 +1646,13 @@ kernel void SampleImageArrayCS(
 {
     // the for-loop is replaced with a collection of threads, each of which
     // calls this function.
-    uint2 uv = uniforms.uv; // tie into texture lookup
+    int2 uv = uniforms.uv; // tie into texture lookup
     //uv >>= uniforms.mipLOD;
     
     uint arrayOrSlice = uniforms.arrayOrSlice;
     
     // the color is returned to linear rgba32f
-    float4 color = colorMap.read(uv, arrayOrSlice, uniforms.mipLOD);
+    float4 color = colorMap.read((uint2)uv, arrayOrSlice, uniforms.mipLOD);
     result.write(color, index);
 }
 
@@ -1705,7 +1705,7 @@ kernel void SampleVolumeCS(
 {
     // the for-loop is replaced with a collection of threads, each of which
     // calls this function.
-    uint3 uv = uint3(uniforms.uv, uniforms.arrayOrSlice); // tie into texture lookup
+    uint3 uv = uint3((uint2)uniforms.uv, uniforms.arrayOrSlice); // tie into texture lookup
     //uv >>= uniforms.mipLOD);
     
     // the color is returned to linear rgba32f
diff --git a/libkram/allocate/dlmalloc.h b/libkram/allocate/dlmalloc.h
index 50f69ab2..b02bcfdd 100644
--- a/libkram/allocate/dlmalloc.h
+++ b/libkram/allocate/dlmalloc.h
@@ -4,7 +4,7 @@
 #include <stddef.h> // for size_t
 
 namespace kram {
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 
 /*
  This version of malloc supports the standard SVID/XPG mallinfo
diff --git a/libkram/bc7enc/ert.h b/libkram/bc7enc/ert.h
index 509b1aa3..03268a1e 100644
--- a/libkram/bc7enc/ert.h
+++ b/libkram/bc7enc/ert.h
@@ -12,7 +12,7 @@
 
 namespace ert
 {
-    using namespace NAMESPACE_STL;
+    using namespace STL_NAMESPACE;
 
 	struct color_rgba { uint8_t m_c[4]; };
 
diff --git a/libkram/bc7enc/utils.h b/libkram/bc7enc/utils.h
index e07a0c20..60c574b2 100644
--- a/libkram/bc7enc/utils.h
+++ b/libkram/bc7enc/utils.h
@@ -36,7 +36,7 @@
 
 namespace utils
 {
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 
 extern const uint32_t g_pretty_colors[];
 extern const uint32_t g_num_pretty_colors;
diff --git a/libkram/etc2comp/EtcImage.cpp b/libkram/etc2comp/EtcImage.cpp
index f52f2e18..16eeeece 100644
--- a/libkram/etc2comp/EtcImage.cpp
+++ b/libkram/etc2comp/EtcImage.cpp
@@ -267,7 +267,7 @@ namespace Etc
         // alias the output etxture
         m_paucEncodingBits = outputTexture;
         
-        using namespace NAMESPACE_STL;
+        using namespace STL_NAMESPACE;
         
         struct SortedBlock
         {
diff --git a/libkram/json11/json11.cpp b/libkram/json11/json11.cpp
index 9f5dbba9..d96f75aa 100644
--- a/libkram/json11/json11.cpp
+++ b/libkram/json11/json11.cpp
@@ -51,7 +51,7 @@
 
 namespace json11 {
 
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 using namespace kram;
 
 //---------------------
diff --git a/libkram/json11/json11.h b/libkram/json11/json11.h
index 7a6c43ff..e0d7cac7 100644
--- a/libkram/json11/json11.h
+++ b/libkram/json11/json11.h
@@ -60,7 +60,7 @@ class ICompressedStream;
 
 namespace json11 {
 
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 using namespace kram;
 
 class Json;
diff --git a/libkram/kram/BlockedLinearAllocator.cpp b/libkram/kram/BlockedLinearAllocator.cpp
index 3d920966..531120db 100644
--- a/libkram/kram/BlockedLinearAllocator.cpp
+++ b/libkram/kram/BlockedLinearAllocator.cpp
@@ -6,7 +6,7 @@
 
 namespace kram {
 
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 
 BlockedLinearAllocator::BlockedLinearAllocator(uint32_t itemsPerBlock, uint32_t itemSize)
 : _itemSize(itemSize),
diff --git a/libkram/kram/BlockedLinearAllocator.h b/libkram/kram/BlockedLinearAllocator.h
index 6fb350c1..a476c29b 100644
--- a/libkram/kram/BlockedLinearAllocator.h
+++ b/libkram/kram/BlockedLinearAllocator.h
@@ -8,7 +8,7 @@
 
 namespace kram {
 
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 
 // Can use to allocate tree nodes where length is unknown
 // until the tree is fully parsed.
diff --git a/libkram/kram/ImmutableString.cpp b/libkram/kram/ImmutableString.cpp
index 551a4172..e81514ba 100644
--- a/libkram/kram/ImmutableString.cpp
+++ b/libkram/kram/ImmutableString.cpp
@@ -6,7 +6,7 @@
 
 namespace kram {
 
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 
 ImmutableStringPool::ImmutableStringPool(size_t capacity_) {
     capacity = capacity_;
diff --git a/libkram/kram/ImmutableString.h b/libkram/kram/ImmutableString.h
index fbc6af66..0990c266 100644
--- a/libkram/kram/ImmutableString.h
+++ b/libkram/kram/ImmutableString.h
@@ -13,7 +13,7 @@
 
 namespace kram {
 
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 
 // case-sensitive fnv1a hash, can pass existing hash to continue a hash
 inline uint32_t HashFnv1a(const char* val, uint32_t hash = 0x811c9dc5) {
diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index cd0779bf..f9d463df 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -38,7 +38,7 @@ const char* kPropChannels = "KramChannels";
 const char* kPropAddress = "KramAddress";
 const char* kPropFilter = "KramFilter";
 
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 
 // These start each KTX file to indicate the type
 const uint8_t kKTXIdentifier[kKTXIdentifierSize] = {
@@ -1287,7 +1287,7 @@ void KTXImage::initProps(const uint8_t* propsData, size_t propDataSize)
             }
 
             //LOGD("KTXImage", "KTXProp '%s': %s\n", keyStart, valueStart);
-            auto propPair = NAMESPACE_STL::make_pair(
+            auto propPair = STL_NAMESPACE::make_pair(
                 string((const char*)keyStart),
                 string((const char*)valueStart)
             );
@@ -1309,7 +1309,7 @@ void KTXImage::addProp(const char* name, const char* value)
             return;
         }
     }
-    auto propPair = NAMESPACE_STL::make_pair(
+    auto propPair = STL_NAMESPACE::make_pair(
         string(name),
         string(value)
     );
diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h
index d5e4680b..95447312 100644
--- a/libkram/kram/KTXImage.h
+++ b/libkram/kram/KTXImage.h
@@ -11,7 +11,7 @@
 
 namespace kram {
 
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 
 // TODO: abstract MyMTLPixelFormat and move to readable/neutral type
 enum MyMTLPixelFormat {
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 0a42ed05..9f30777b 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -58,7 +58,7 @@ void* operator new[](size_t size, size_t alignment, size_t alignmentOffset, cons
 
 namespace kram {
 
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 
 // This fails with libCompression (see inter-a.png)
 // and with miniZ for the ICCP block (see inter-a.png)
diff --git a/libkram/kram/Kram.h b/libkram/kram/Kram.h
index e8cb6299..37b5413e 100644
--- a/libkram/kram/Kram.h
+++ b/libkram/kram/Kram.h
@@ -8,7 +8,7 @@
 #include "KramMmapHelper.h"
 
 namespace kram {
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 
 class Image;
 class KTXImage;
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index 8dd5fa62..2271e80e 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -59,26 +59,6 @@
     4305 // '*=': truncation from 'double' to 'float'
 */
 
-#endif
-
-//------------------------
-#if KRAM_MAC
-
-#if TARGET_CPU_X86_64
-#define USE_SSE 1
-#elif TARGET_CPU_ARM64
-#define USE_NEON 1
-#endif
-
-#endif
-
-#if KRAM_IOS
-#define USE_NEON 1
-#endif
-
-//------------------------
-#if KRAM_WIN
-
 // avoid conflicts with min/max macros, use std instead
 #define NOMINMAX
 
@@ -88,46 +68,11 @@
 #define _CRT_SECURE_NO_WARNINGS 1
 #include <tchar.h>
 
-// For now assume Intel on Win
-#define USE_SSE 1
-
 #endif
 
 //------------------------
 
-// one of these must be set
-#ifndef USE_SSE
-#define USE_SSE 0
-#endif
-#ifndef USE_NEON
-#define USE_NEON 0
-#endif
-
-// clang can compile simd/simd.h code on other platforms
-// this provides vector extensions from gcc that were setup for OpenCL shaders
-#ifndef USE_SIMDLIB
-// TODO: bring over simd for Win
-#if !KRAM_WIN 
-#define USE_SIMDLIB 1
-#else
-#define USE_SIMDLIB 0
-#endif
-#endif
-
-// TODO: switch to own simd lib
-#define SIMD_NAMESPACE simd
-
-// use _Float16
-// Android is the last holdout
-// Win and Linux and Apple have support in clang
-#if !__is_identifier(_Float16)
-#define USE_FLOAT16 1
-#else
-#define USE_FLOAT16 0
-#endif
-
-
-// can override from build system
+// SIMD_WORKSPACE is set
 
 // can't have ATE defined to 1 on other platforms
 #if !(KRAM_MAC || KRAM_IOS)
@@ -170,7 +115,7 @@
 
 #if USE_EASTL
 
-#define NAMESPACE_STL eastl
+#define STL_NAMESPACE eastl
 
 // this probably breaks all STL debugging
 #include <EASTL/algorithm.h>  // for max
@@ -196,18 +141,16 @@
 
 #else
 
-/*
-// seems that Modules have "partial" support in Xcode, whatever that means
-// these imports are taken from MSVC which has a full implementation
- 
-import std.memory;
-import std.threading;
-import std.core;
-import std.filesystem;
-import std.regex;
-*/
 
-#define NAMESPACE_STL std
+// in Xcode 14, C++20 Modules have "partial" support... whatever that means.
+// These imports are taken from MSVC which has a full implementation.
+//import std.memory;
+//import std.threading;
+//import std.core;
+//import std.filesystem;
+//import std.regex;
+
+#define STL_NAMESPACE std
 
 // all std
 #include <algorithm>  // for max
@@ -250,148 +193,31 @@ import std.regex;
 // includes that are usable across all files
 #include "KramLog.h"
 
-// this has every intrinsic header in it
-#if USE_SSE
-// to keep astcenc compiling
-#include <immintrin.h>  // AVX1
-#elif USE_NEON
-#include "sse2neon-arm64.h"
-#endif
-
-// TODO: move half4 to it's own file, but always include it
-// x Apple's files don't have a half4 type.
-// They do now as of macOS 15/Xcode 16.  simd::half, 1/2/3/4/8/16
-namespace kram {
-
-// This has spotty support on Android.  They left out hw support
-// for _Float16 on many of the devices.  So there would need this fallback.
-
-#if USE_FLOAT16
-using half = _Float16;
-#else
-// for lack of a better type
-using half = uint16_t;
-#endif
+//-------------------------
+// simd
 
-// Really just a storage format and wrapper for half, math work done in float4.
-class half4 {
-public:
-#if USE_SSE
-    // for lack of a better type, not __m128i since that's 2x bigger
-    using tType = uint64_t;
-#elif USE_NEON
-    using tType = float16x4_t;
-#endif
+#if KRAM_MAC || KRAM_IOS
 
-    union {
-        tType reg;
-        half v[4];
-        struct {
-            half x, y, z, w;
-        };
-        struct {
-            half r, g, b, a;
-        };
-    };
-
-    half4() {}
-    explicit half4(half val) : x(val), y(val), z(val), w(val) {}  // xyzw = val
-    explicit half4(tType val) { reg = val; }
-    half4(half xx, half yy, half zz, half ww) : x(xx), y(yy), z(zz), w(ww) {}
-    half4(const half4& val) { reg = val.reg; }
-
-    // no real ops here, althought Neon does have sevearal
-    // use of these pull data out of simd registers
-    half& operator[](int32_t index)
-    {
-        return v[index];
-    }
-    const half& operator[](int32_t index) const
-    {
-        return v[index];
-    }
-};
+#define USE_SIMDLIB 0
 
-}  // namespace kram
+// This is Apple simd (it's huuuggge!)
+// Also can't use the half4 type until iOS18 + macOS15 minspec, so need fallback.
+#include <simd/simd.h>
 
-#if USE_SIMDLIB
-#include "simd/simd.h"
-#else
-// emulate float4
+// this is glue code for now
 #include "float4a.h"
-#endif
-
-namespace simd {
-
-#if USE_SIMDLIB
-
-// functional ctor
-inline float4 float4m(float3 v, float w)
-{
-    return vector4(v, w);
-}
-
-inline float2 float2m(float x, float y)
-{
-    return {x, y};
-}
-inline float3 float3m(float x, float y, float z)
-{
-    return {x, y, z};
-}
-inline float4 float4m(float x, float y, float z, float w)
-{
-    return {x, y, z, w};
-}
 
-inline float2 float2m(float x)
-{
-    return x;
-}
 
-inline float3 float3m(float x)
-{
-    return x;
-}
+#else
 
-inline float4 float4m(float x)
-{
-    return x;
-}
+// this means use vectormath
+#define USE_SIMDLIB 1
 
-inline float saturate(float v)
-{
-    return std::clamp(v, 0.0f, 1.0f);
-}
-inline double saturate(double v)
-{
-    return std::clamp(v, 0.0, 1.0);
-}
-inline float2 saturate(const float2& v)
-{
-    return simd_clamp(v, 0.0f, 1.0f);
-}
-inline float3 saturate(const float3& v)
-{
-    return simd_clamp(v, 0.0f, 1.0f);
-}
-inline float4 saturate(const float4& v)
-{
-    return simd_clamp(v, 0.0f, 1.0f);
-}
+// Test out on Win build first.
+#include "vectormath++.h"
 
 #endif
-
-}  // namespace simd
-
-
-namespace kram {
-
-simd::float4 toFloat4(const half4& vv);
-half4 toHalf4(const simd::float4& vv);
-
-} // namespace kram
-
+ 
 //---------------------------------------
 
 // this just strips args
@@ -403,7 +229,7 @@ half4 toHalf4(const simd::float4& vv);
 //---------------------------------------
 
 namespace kram {
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 
 // Use this on vectors
 template <typename T>
diff --git a/libkram/kram/KramDDSHelper.cpp b/libkram/kram/KramDDSHelper.cpp
index 82488f5b..e5c837a1 100644
--- a/libkram/kram/KramDDSHelper.cpp
+++ b/libkram/kram/KramDDSHelper.cpp
@@ -8,7 +8,7 @@
 #include "KramFileHelper.h"
 
 namespace kram {
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 
 const uint32_t DDS_MAGIC = 0x20534444; // "DDS "
 
diff --git a/libkram/kram/KramDDSHelper.h b/libkram/kram/KramDDSHelper.h
index 0038e891..9f8770ec 100644
--- a/libkram/kram/KramDDSHelper.h
+++ b/libkram/kram/KramDDSHelper.h
@@ -10,7 +10,7 @@
 //#include "KramConfig.h"
 
 namespace kram {
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 
 class KTXImage;
 class FileHelper;
diff --git a/libkram/kram/KramFileHelper.cpp b/libkram/kram/KramFileHelper.cpp
index 4807d81e..394ad7b8 100644
--- a/libkram/kram/KramFileHelper.cpp
+++ b/libkram/kram/KramFileHelper.cpp
@@ -29,7 +29,7 @@
 #endif
 
 namespace kram {
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 
 #define nl "\n"
 
diff --git a/libkram/kram/KramFileHelper.h b/libkram/kram/KramFileHelper.h
index cd5cbcee..21f10ef1 100644
--- a/libkram/kram/KramFileHelper.h
+++ b/libkram/kram/KramFileHelper.h
@@ -12,7 +12,7 @@
 //#include "KramConfig.h"
 
 namespace kram {
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 
 // Use this to help open/close files, since dtor is scoped, or caller can close()
 // Also allows write to temp file, then rename over the destination file.
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index f3cfb5e8..6ed0d17b 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -61,7 +61,7 @@
 
 namespace kram {
 
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 using namespace SIMD_NAMESPACE;
 
 template <typename T>
@@ -216,7 +216,7 @@ bool Image::convertToFourChannel(const KTXImage& image, uint32_t mipNumber)
 
             const half* srcPixels = (const half*)(srcLevelData + mipBaseOffset);
 
-            half4 dstTemp = toHalf4(float4m(0.0f, 0.0f, 0.0f, 1.0f));
+            half4 dstTemp = half4m(float4m(0.0f, 0.0f, 0.0f, 1.0f));
 
             for (int32_t y = 0; y < _height; ++y) {
                 int32_t y0 = y * _width;
@@ -227,11 +227,11 @@ bool Image::convertToFourChannel(const KTXImage& image, uint32_t mipNumber)
 
                     // copy in available values
                     for (int32_t i = 0; i < numSrcChannels; ++i) {
-                        dstTemp.v[i] = srcPixels[srcX + i];
+                        dstTemp[i] = srcPixels[srcX + i];
                     }
 
                     // use AVX to convert
-                    dstPixels[dstX] = toFloat4(dstTemp);
+                    dstPixels[dstX] = float4m(dstTemp);
                 }
             }
             break;
@@ -378,7 +378,7 @@ bool Image::convertToFourChannelForThumbnail(const KTXImage& image, uint32_t mip
 
             const half* srcPixels = (const half*)(srcLevelData + mipBaseOffset);
 
-            half4 dstTemp = toHalf4(float4m(0.0f, 0.0f, 0.0f, 1.0f));
+            half4 dstTemp = half4m(float4m(0.0f, 0.0f, 0.0f, 1.0f));
 
             for (int32_t y = 0; y < _height; ++y) {
                 int32_t y0 = y * _width;
@@ -389,12 +389,12 @@ bool Image::convertToFourChannelForThumbnail(const KTXImage& image, uint32_t mip
 
                     // copy in available values
                     for (int32_t i = 0; i < numSrcChannels; ++i) {
-                        dstTemp.v[i] = srcPixels[srcX + i];
+                        dstTemp[i] = srcPixels[srcX + i];
                     }
 
                     // use AVX to convert
                     // This is a simple saturate to unorm8
-                    dstPixels[dstX] = ColorFromUnormFloat4(toFloat4(dstTemp));
+                    dstPixels[dstX] = ColorFromUnormFloat4(float4m(dstTemp));
                 }
             }
             break;
@@ -2438,7 +2438,7 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
 
                 // assumes we don't need to align r16f rows to 4 bytes
                 for (int32_t i = 0, iEnd = w * h; i < iEnd; ++i) {
-                    half4 src16 = toHalf4(src[0]);
+                    half4 src16 = half4m(src[0]);
 
                     switch (count) {
                         case 4:
diff --git a/libkram/kram/KramImage.h b/libkram/kram/KramImage.h
index 20a77f6a..91a8eaad 100644
--- a/libkram/kram/KramImage.h
+++ b/libkram/kram/KramImage.h
@@ -14,7 +14,7 @@
 
 namespace kram {
 
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 using namespace SIMD_NAMESPACE;
 
 class Mipper;
diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp
index 8368da66..5b79eb53 100644
--- a/libkram/kram/KramImageInfo.cpp
+++ b/libkram/kram/KramImageInfo.cpp
@@ -12,7 +12,7 @@
 #endif
 
 namespace kram {
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 using namespace SIMD_NAMESPACE;
 
 #define isStringEqual(lhs, rhs) (strcmp(lhs, rhs) == 0)
diff --git a/libkram/kram/KramImageInfo.h b/libkram/kram/KramImageInfo.h
index df19adc5..9de28f66 100644
--- a/libkram/kram/KramImageInfo.h
+++ b/libkram/kram/KramImageInfo.h
@@ -15,7 +15,7 @@ namespace kram {
 class Image;
 
 using namespace SIMD_NAMESPACE;
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 
 // each encoder has it's own set of outputs, can request encoder if overlap
 enum TexEncoder {
diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp
index 7b09c342..5dafe414 100644
--- a/libkram/kram/KramLog.cpp
+++ b/libkram/kram/KramLog.cpp
@@ -50,7 +50,7 @@ namespace kram {
 using mymutex = std::recursive_mutex;
 using mylock = std::unique_lock<mymutex>;
 
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 
 
 #if KRAM_WIN
diff --git a/libkram/kram/KramLog.h b/libkram/kram/KramLog.h
index 7e91608a..80794ee9 100644
--- a/libkram/kram/KramLog.h
+++ b/libkram/kram/KramLog.h
@@ -54,7 +54,7 @@ extern int32_t logMessage(const char* group, int32_t logLevel,
 #define KLOGE(group, fmt, ...) logMessage(group, kram::LogLevelError, __FILE__, __LINE__, __FUNCTION__, fmt, ##__VA_ARGS__)
 
 // TODO: move to Strings.h
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 
 // when set true, the internal string is cleared
 void setErrorLogCapture(bool enable);
diff --git a/libkram/kram/KramMipper.cpp b/libkram/kram/KramMipper.cpp
index 60c3f5e3..e09049ca 100644
--- a/libkram/kram/KramMipper.cpp
+++ b/libkram/kram/KramMipper.cpp
@@ -11,7 +11,7 @@
 
 namespace kram {
 
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 using namespace SIMD_NAMESPACE;
 
 Mipper::Mipper() { initTables(); }
@@ -172,7 +172,7 @@ void Mipper::initPixelsHalfIfNeeded(ImageData& srcImage, bool doPremultiply, boo
 {
     Color zeroColor = {0, 0, 0, 0};
     float4 zeroColorf = float4m(0.0, 0.0f, 0.0f, 0.f);  // need a constant for this
-    half4 zeroColorh = toHalf4(zeroColorf);
+    half4 zeroColorh = half4m(zeroColorf);
 
     int32_t w = srcImage.width;
     int32_t h = srcImage.height;
@@ -220,7 +220,7 @@ void Mipper::initPixelsHalfIfNeeded(ImageData& srcImage, bool doPremultiply, boo
                 // }
                 // else
                 {
-                    halfImage[y0 + x] = toHalf4(cFloat);
+                    halfImage[y0 + x] = half4m(cFloat);
                 }
 
                 // only have to rewrite src alpha/color if there is alpha and it's premul
@@ -248,7 +248,7 @@ void Mipper::initPixelsHalfIfNeeded(ImageData& srcImage, bool doPremultiply, boo
                 else {
                     float4 cFloat = {alphaToFloat[c0.r], alphaToFloat[c0.g],
                                      alphaToFloat[c0.b], alphaToFloat[c0.a]};
-                    halfImage[y0 + x] = toHalf4(cFloat);
+                    halfImage[y0 + x] = half4m(cFloat);
                 }
             }
         }
@@ -274,7 +274,7 @@ void Mipper::initPixelsHalfIfNeeded(ImageData& srcImage, bool doPremultiply, boo
                 // }
                 // else
                 {
-                    halfImage[y0 + x] = toHalf4(cFloat);
+                    halfImage[y0 + x] = half4m(cFloat);
                 }
 
                 // only have to rewrite color if there is alpha
@@ -515,17 +515,17 @@ void Mipper::mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) cons
             float4 c[9];
 
             if (srcHalf) {
-                c[0] = toFloat4(srcHalf[ym + xm]);
-                c[1] = toFloat4(srcHalf[ym + x0]);
-                c[2] = toFloat4(srcHalf[ym + x1]);
+                c[0] = float4m(srcHalf[ym + xm]);
+                c[1] = float4m(srcHalf[ym + x0]);
+                c[2] = float4m(srcHalf[ym + x1]);
 
-                c[3] = toFloat4(srcHalf[y0 + xm]);
-                c[4] = toFloat4(srcHalf[y0 + x0]);
-                c[5] = toFloat4(srcHalf[y0 + x1]);
+                c[3] = float4m(srcHalf[y0 + xm]);
+                c[4] = float4m(srcHalf[y0 + x0]);
+                c[5] = float4m(srcHalf[y0 + x1]);
 
-                c[6] = toFloat4(srcHalf[y1 + xm]);
-                c[7] = toFloat4(srcHalf[y1 + x0]);
-                c[8] = toFloat4(srcHalf[y1 + x1]);
+                c[6] = float4m(srcHalf[y1 + xm]);
+                c[7] = float4m(srcHalf[y1 + x0]);
+                c[8] = float4m(srcHalf[y1 + x1]);
             }
             else if (srcFloat) {
                 c[0] = srcFloat[ym + xm];
@@ -575,7 +575,7 @@ void Mipper::mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) cons
 
             if (srcHalf) {
                 // overwrite float4 image
-                cDstHalf[dstIndex] = toHalf4(cFloat);
+                cDstHalf[dstIndex] = half4m(cFloat);
 
                 // assume hdr pulls from half/float data
                 if (!srcImage.isHDR) {
@@ -668,17 +668,17 @@ void Mipper::mipmapLevel(const ImageData& srcImage, ImageData& dstImage) const
 
             if (srcHalf) {
                 float4 c0, c1, c2, c3;
-                c0 = toFloat4(srcHalf[y0 + x0]);
-                c1 = toFloat4(srcHalf[y0 + x1]);
-                c2 = toFloat4(srcHalf[y1 + x0]);
-                c3 = toFloat4(srcHalf[y1 + x1]);
+                c0 = float4m(srcHalf[y0 + x0]);
+                c1 = float4m(srcHalf[y0 + x1]);
+                c2 = float4m(srcHalf[y1 + x0]);
+                c3 = float4m(srcHalf[y1 + x1]);
 
                 // mip filter is simple box filter
                 // assumes alpha premultiplied already
                 float4 cFloat = (c0 + c1 + c2 + c3) * 0.25;
 
                 // overwrite half4 image
-                cDstHalf[dstIndex] = toHalf4(cFloat);
+                cDstHalf[dstIndex] = half4m(cFloat);
 
                 // assume hdr pulls from half/float data
                 if (!srcImage.isHDR) {
diff --git a/libkram/kram/KramMipper.h b/libkram/kram/KramMipper.h
index 81d27923..f311ed11 100644
--- a/libkram/kram/KramMipper.h
+++ b/libkram/kram/KramMipper.h
@@ -10,7 +10,7 @@
 //#include "KramConfig.h"
 
 namespace kram {
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 using namespace SIMD_NAMESPACE;
 
 // return whether num is pow2
diff --git a/libkram/kram/KramSDFMipper.cpp b/libkram/kram/KramSDFMipper.cpp
index 632be5f1..cd299913 100644
--- a/libkram/kram/KramSDFMipper.cpp
+++ b/libkram/kram/KramSDFMipper.cpp
@@ -12,7 +12,7 @@
 
 namespace kram {
 using namespace heman;
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 
 void SDFMipper::init(ImageData& srcImage, uint8_t sdfThreshold, bool isVerbose_)
 {
diff --git a/libkram/kram/KramSDFMipper.h b/libkram/kram/KramSDFMipper.h
index c9f0e187..d67ee5ab 100644
--- a/libkram/kram/KramSDFMipper.h
+++ b/libkram/kram/KramSDFMipper.h
@@ -9,7 +9,7 @@
 //#include "KramConfig.h"
 
 namespace kram {
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 
 class ImageData;
 
diff --git a/libkram/kram/KramTimer.cpp b/libkram/kram/KramTimer.cpp
index 31f2e7c1..b3603219 100644
--- a/libkram/kram/KramTimer.cpp
+++ b/libkram/kram/KramTimer.cpp
@@ -17,7 +17,7 @@
 
 namespace kram {
 
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 
 #if KRAM_WIN
 
diff --git a/libkram/kram/KramZipHelper.cpp b/libkram/kram/KramZipHelper.cpp
index 3e12f72d..bddd5b8b 100644
--- a/libkram/kram/KramZipHelper.cpp
+++ b/libkram/kram/KramZipHelper.cpp
@@ -19,7 +19,7 @@
 #endif
 
 namespace kram {
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 
 ZipHelper::ZipHelper()
 {
diff --git a/libkram/kram/KramZipHelper.h b/libkram/kram/KramZipHelper.h
index 981f6fe4..8a405c14 100644
--- a/libkram/kram/KramZipHelper.h
+++ b/libkram/kram/KramZipHelper.h
@@ -12,7 +12,7 @@ struct mz_zip_archive;
 namespace kram {
 
 //struct MmapHelper;
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 
 struct ZipEntry {
     const char* filename;  // max 512, aliased
diff --git a/libkram/kram/KramZipStream.cpp b/libkram/kram/KramZipStream.cpp
index 3f033163..b186951e 100644
--- a/libkram/kram/KramZipStream.cpp
+++ b/libkram/kram/KramZipStream.cpp
@@ -4,7 +4,7 @@
 #include "miniz.h"
 
 namespace kram {
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 
 ZipStream::ZipStream() {
     _stream = make_unique<mz_stream>();
diff --git a/libkram/kram/KramZipStream.h b/libkram/kram/KramZipStream.h
index d5f2ee73..b0e067b7 100644
--- a/libkram/kram/KramZipStream.h
+++ b/libkram/kram/KramZipStream.h
@@ -7,7 +7,7 @@
 struct mz_stream;
 
 namespace kram {
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 
 class FileHelper;
 
diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index 29dd922c..ec919485 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -23,7 +23,7 @@
 // TODO: look at replacing this with Job Queue from Filament
 
 namespace kram {
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 
 enum class CoreType : uint8_t
 {
diff --git a/libkram/kram/TaskSystem.h b/libkram/kram/TaskSystem.h
index a74586b3..724fa669 100644
--- a/libkram/kram/TaskSystem.h
+++ b/libkram/kram/TaskSystem.h
@@ -26,7 +26,7 @@
 /**************************************************************************************************/
 
 namespace kram {
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 
 /**************************************************************************************************/
 
diff --git a/libkram/kram/float4a.h b/libkram/kram/float4a.h
deleted file mode 100644
index cca46553..00000000
--- a/libkram/kram/float4a.h
+++ /dev/null
@@ -1,435 +0,0 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
-// The license and copyright notice shall be included
-// in all copies or substantial portions of the Software.
-
-#pragma once
-
-//#include "KramConfig.h"
-
-// This is only meant to emulate float4 when lib not available
-// (f.e. win or linux w/o clang) but may move off simd lib to this.  So
-// many open source projets skip SIMD, or only do SSE.  This is
-// how to support ARM and Neon from one codebase.  This uses
-// SSE2Neon.h to translate _mm calls to Neon calls
-#if !USE_SIMDLIB
-
-// this is also defined in KramConfig.h, but this keeps file independent
-#if USE_NEON
-#include "sse2neon-arm64.h"
-#else
-//#include <smmintrin.h> // SSE4.1, and includes all before it
-#include <immintrin.h>  // AVX
-#endif
-
-// see here for intrinsics and which instruction set they come from
-// https://docs.microsoft.com/en-us/cpp/intrinsics/x64-amd64-intrinsics-list?view=msvc-160
-
-namespace simd {
-
-#if USE_NEON
-//#define _mm_fixzero_ps(a, b) _mm_and_ps(a, _mm_cmpneq_ps(b, _mm_setzero_ps()))
-
-// removing these
-// rqrt (high precision)
-//inline float32x4_t _mm_rsqrthp_ps(const float32x4_t& a)
-//{
-//    float32x4_t est = vrsqrteq_f32(a);
-//
-//    est = _mm_fixzero_ps(est, a);
-//
-//    // newton raphson
-//    float32x4_t stepA = vrsqrtsq_f32(a, vmulq_f32(est, est));  // xn+1 = xn(3-dxn*dxn)/2
-//
-//    return _mm_mul_ps(est, stepA);
-//}
-//
-//// recip
-//inline float32x4_t _mm_rcphp_ps(const float32x4_t& a)
-//{
-//    float32x4_t est = vrecpeq_f32(a);
-//
-//    est = _mm_fixzero_ps(est, a);
-//
-//    float32x4_t stepA = vrecpsq_f32(est, a);  // xn+1 = xn(2-dxn)
-//
-//    return _mm_mul_ps(est, stepA);
-//}
-
-#else
-
-// this is an easier to read type, since __m128 can hold different data
-#define float32x4_t __m128
-
-#define _mm_fixzero_ps(a, b) _mm_and_ps(a, _mm_cmpneq_ps(b, _mm_setzero_ps()))
-
-/* eliminating these
-inline float32x4_t _mm_rsqrthp_ps(const float32x4_t& a)
-{
-    static const float32x4_t kHalf = _mm_set1_ps(0.5f);
-    static const float32x4_t kThree = _mm_set1_ps(3.0f);
-
-    //for the reciprocal square root, it looks like this: (doesn't handle 0 -> Inf -> Nan), mix in min/max
-    // doubles precision
-    //x  = rsqrt_approx(c);
-    //x *= 0.5*(3 - x*x*c); // refinement
-
-    float32x4_t low;
-    low = _mm_rsqrt_ps(a);  // low precision
-
-    // zero out any elements that started out zero (0 -> 0)
-    low = _mm_fixzero_ps(low, a);
-
-    // this is already rolled into Neon wrapper
-    // TODO: use constants
-    low = _mm_mul_ps(low, _mm_mul_ps(kHalf,
-                                     _mm_sub_ps(kThree, _mm_mul_ps(a, _mm_mul_ps(low, low)))));
-
-    return low;
-}
-
-inline float32x4_t _mm_rcphp_ps(const float32x4_t& a)
-{
-    static const float32x4_t kTwo = _mm_set1_ps(2.0f);
-
-    // http://www.virtualdub.org/blog/pivot/entry.php?id=229#body (doesn't handle 0 -> Inf, min in min/max
-    // 20-bit precision
-    //x = reciprocal_approx(c);
-    //x' = x * (2 - x * c);
-
-    float32x4_t low = _mm_rcp_ps(a);
-
-    // zero out any elements that started out 0 (0 -> 0)
-    low = _mm_fixzero_ps(low, a);
-
-    // this is already rolled into Neon wrapper
-    low = _mm_mul_ps(low, _mm_sub_ps(kTwo, _mm_mul_ps(low, a)));
-
-    return low;
-}
-
-#define _mm_rsqrthp_ss(a) _mm_setx_ps(a, _mm_rsqrthp_ps(a))
-#define _mm_rcphp_ss(a) _mm_setx_ps(a, _mm_rcphp_ps(a))
-
- */
-#endif
-
-//---------------------------------------------------------------------------------------
-
-using tSwizzle = uint32_t;
-
-// swizzle order has to be fliped to use shuffle
-#define macroSwizzle(x, y, z, w) _MM_SHUFFLE(w, z, y, x)
-
-// replicate a lane into a new vector
-// This can already be done with clang vector types much better.  v.x or v.xxxx
-#define _mm_splatx_ps(v) _mm_shuffle_ps(v, v, macroSwizzle(0, 0, 0, 0))
-#define _mm_splaty_ps(v) _mm_shuffle_ps(v, v, macroSwizzle(1, 1, 1, 1))
-#define _mm_splatz_ps(v) _mm_shuffle_ps(v, v, macroSwizzle(2, 2, 2, 2))
-#define _mm_splatw_ps(v) _mm_shuffle_ps(v, v, macroSwizzle(3, 3, 3, 3))
-
-// dot product app with horizontal adds
-inline float32x4_t _mm_hadd4_ps(const float32x4_t& r)
-{
-    float32x4_t t = _mm_hadd_ps(r, r);  // xy + wz
-    t = _mm_hadd_ps(t, t);              // x + y
-    return t;
-}
-
-static const uint32_t kSignBitsF32x4i = {0x80000000};
-static const float32x4_t kSignBitsF32x4 = _mm_set1_ps(*(const float*)&kSignBitsF32x4i);
-static const float32x4_t kOnesF32x4 = _mm_set1_ps(1.0f);
-
-// higher level comparisons, returns 0 or 1
-#define _mm_pcmpeq_ps(a, b) _mm_and_ps(_mm_cmpeq_ps(a, b), kOnesF32x4)
-#define _mm_pcmpneq_ps(a, b) _mm_and_ps(_mm_cmpneq_ps(a, b), kOnesF32x4)
-#define _mm_pcmpgt_ps(a, b) _mm_and_ps(_mm_cmpgt_ps(a, b), kOnesF32x4)
-#define _mm_pcmpge_ps(a, b) _mm_and_ps(_mm_cmpge_ps(a, b), kOnesF32x4)
-#define _mm_pcmplt_ps(a, b) _mm_pcmpge_ps(b, a)
-#define _mm_pcmple_ps(a, b) _mm_pcmpgt_ps(b, a)
-
-//---------------------------------------------------------------------------------------
-
-// Note float3 should be its own type, but it should be float4 in size.
-// float2 is harder since on Neon, it supports a float2 data structure.
-// Needs SSE4.1, but that's most of the processors these days.
-class float4 {
-public:
-    using tType = float32x4_t;
-    float4() {}
-
-    // TODO: problem is that Apple's simd::float4(val) is val,000, simd::float4(val, 0, 0, 0) is 0 (last element?)
-    // have to go through float4m(val, val, val, val) to get 4 values
-    // This behavior doesn't match HLSL/GLSL and is an artifact of the comma operator messing things up.
-    explicit float4(float val) { reg = _mm_set1_ps(val); }  // xyzw = val
-    explicit float4(tType val) { reg = val; }
-    float4(float xx, float yy, float zz, float ww) { reg = _mm_setr_ps(xx, yy, zz, ww); }
-    float4(const float4& val) { reg = val.reg; }
-
-    union {
-        tType reg;
-
-        // avoid using these, since they pull data out of simd registers
-        float v[4];
-        struct {
-            float x, y, z, w;
-        };
-        struct {
-            float r, g, b, a;
-        };
-    };
-
-    // use of these pull data out of simd registers
-    float& operator[](int32_t index)
-    {
-        return v[index];  // or _mm_extract_ps(reg, index), but imm needs to be hardcoded there
-    }
-    const float& operator[](int32_t index) const
-    {
-        return v[index];
-    }
-
-    // use these to stay in register
-    inline float4 xvec() { return float4(_mm_splatx_ps(reg)); }
-    inline float4 yvec() { return float4(_mm_splaty_ps(reg)); }
-    inline float4 zvec() { return float4(_mm_splatz_ps(reg)); }
-    inline float4 wvec() { return float4(_mm_splatw_ps(reg)); }
-
-    inline float4& operator*=(float s)
-    {
-        return *this *= float4(s);
-    }
-    inline float4& operator/=(float s)
-    {
-        return *this /= float4(s);
-    }
-    inline float4& operator-=(float s)
-    {
-        return *this -= float4(s);
-    }
-    inline float4& operator+=(float s)
-    {
-        return *this += float4(s);
-    }
-
-    // sse ops start here
-    inline float4& operator/=(const float4& b)
-    {
-        reg = _mm_div_ps(reg, b.reg);
-        return *this;
-    }
-    inline float4& operator*=(const float4& b)
-    {
-        reg = _mm_mul_ps(reg, b.reg);
-        return *this;
-    }
-    inline float4& operator-=(const float4& b)
-    {
-        reg = _mm_sub_ps(reg, b.reg);
-        return *this;
-    }
-    inline float4& operator+=(const float4& b)
-    {
-        reg = _mm_add_ps(reg, b.reg);
-        return *this;
-    }
-
-    inline float4 operator-() const
-    {
-        return float4(_mm_xor_ps(kSignBitsF32x4, reg));  // -a
-    }
-
-    inline bool equal(const float4& vv) const
-    {
-        int32_t maskBits = _mm_movemask_ps(_mm_cmpeq_ps(reg, vv.reg));
-        return maskBits == 15;
-    }
-
-    inline bool not_equal(const float4& vv) const { return !equal(vv); }
-
-    // returns 1's and 0's in a float4
-    inline float4 operator==(const float4& vv) const { return float4(_mm_pcmpeq_ps(reg, vv.reg)); }
-    inline float4 operator!=(const float4& vv) const { return float4(_mm_pcmpneq_ps(reg, vv.reg)); }
-    inline float4 operator>(const float4& vv) const { return float4(_mm_pcmpgt_ps(reg, vv.reg)); }
-    inline float4 operator>=(const float4& vv) const { return float4(_mm_pcmpge_ps(reg, vv.reg)); }
-    inline float4 operator<(const float4& vv) const { return float4(_mm_pcmplt_ps(reg, vv.reg)); }
-    inline float4 operator<=(const float4& vv) const { return float4(_mm_pcmple_ps(reg, vv.reg)); }
-};
-
-inline float4 operator*(const float4& lhs, const float4& rhs)
-{
-    float4 aa(lhs);
-    return aa *= rhs;
-}
-inline float4 operator/(const float4& lhs, const float4& rhs)
-{
-    float4 aa(lhs);
-    return aa /= rhs;
-}
-inline float4 operator+(const float4& lhs, const float4& rhs)
-{
-    float4 aa(lhs);
-    return aa += rhs;
-}
-inline float4 operator-(const float4& lhs, const float4& rhs)
-{
-    float4 aa(lhs);
-    return aa -= rhs;
-}
-
-// scalar ops for right side
-inline float4 operator*(const float4& vv, float s)
-{
-    float4 aa(vv);
-    return aa *= float4(s);
-}
-inline float4 operator/(const float4& vv, float s)
-{
-    float4 aa(vv);
-    return aa /= float4(s);
-}
-inline float4 operator+(const float4& vv, float s)
-{
-    float4 aa(vv);
-    return aa += float4(s);
-}
-inline float4 operator-(const float4& vv, float s)
-{
-    float4 aa(vv);
-    return aa -= float4(s);
-}
-
-inline float4 operator*(float a, const float4& b)
-{
-    float4 aa(a);
-    return aa *= b;
-}
-inline float4 operator/(float a, const float4& b)
-{
-    float4 aa(a);
-    return aa /= b;
-}
-inline float4 operator+(float a, const float4& b)
-{
-    float4 aa(a);
-    return aa += b;
-}
-inline float4 operator-(float a, const float4& b)
-{
-    float4 aa(a);
-    return aa -= b;
-}
-
-inline float4 min(const float4& lhs, const float4& rhs)
-{
-    return float4(_mm_min_ps(lhs.reg, rhs.reg));
-}
-inline float4 max(const float4& lhs, const float4& rhs)
-{
-    return float4(_mm_max_ps(lhs.reg, rhs.reg));
-}
-
-// do 4 of these at once
-inline float4 recip(const float4& vv)
-{
-    return float4(1.0f/vv.reg); // _mm_rcphp_ps(vv.reg));
-}
-inline float4 rsqrt(const float4& vv)
-{
-    return float4(1.0f/_mm_sqrt_ps(vv.reg)); // _mm_rsqrthp_ps(vv.reg));
-}
-inline float4 sqrt(const float4& vv)
-{
-    return float4(_mm_sqrt_ps(vv.reg));
-}
-
-inline float dot(const float4& lhs, const float4& rhs)
-{
-    return float4(_mm_hadd4_ps(_mm_mul_ps(lhs.reg, rhs.reg)))[0];
-}
-inline float length_squared(const float4& vv)
-{
-    return dot(vv, vv);
-}
-inline float length(const float4& vv)
-{
-    return sqrtf(length_squared(vv));
-}
-
-// sse4.1 ops
-inline float4 round(const float4& vv)
-{
-    return float4(_mm_round_ps(vv.reg, 0x8));  // round to nearest | exc
-}
-inline float4 ceil(const float4& vv)
-{
-    return float4(_mm_ceil_ps(vv.reg));
-}
-inline float4 floor(const float4& vv)
-{
-    return float4(_mm_floor_ps(vv.reg));  // SSE4.1
-}
-
-// see if any results are 1
-inline bool any(const float4& vv)
-{
-    return float4(_mm_hadd4_ps(vv.reg))[0] > 0.0f;
-}
-
-inline float4 select(const float4& lhs, const float4& rhs, const float4& mask)
-{
-    return float4(_mm_or_ps(_mm_andnot_ps(mask.reg, lhs.reg), _mm_and_ps(mask.reg, rhs.reg)));  // 0 picks a, 1 picks b
-}
-
-inline float4 normalize(const float4& vv)
-{
-    return float4(vv) /= length(vv);
-}
-
-inline float4 float4m(float x)
-{
-    return float4(x);
-}
-
-inline float4 float4m(float x, float y, float z, float w)
-{
-    return float4(x, y, z, w);
-}
-
-// need a float3 for this
-//inline float4 float4m(const float3& v float w)
-//{
-//    return float4(v, w);
-//}
-
-inline float4 saturate(const float4& v)
-{
-    return min(max(v, float4m(0.0f)), float4m(1.0f));
-}
-
-
-
-// don't have float2/float3 type yet
-//// use instead of simd_make_float
-//inline float2 float2m(float x)
-//{
-//    return float2(x);
-//}
-
-
-//inline float3 float3m(float x)
-//{
-//    return float3(x);
-//}
-//inline float3 float3m(float x, float y, float z)
-//{
-//    return float3(x, y, z);
-//}
-//inline float3 saturate(const float3& v)
-//{
-//    return min(max(v, float3m(0.0f)), float3m(1.0f));
-//}
-
-
-}  // namespace simd
-
-#endif
diff --git a/libkram/lodepng/lodepng.cpp b/libkram/lodepng/lodepng.cpp
index 23bb89c9..1e08be4c 100644
--- a/libkram/lodepng/lodepng.cpp
+++ b/libkram/lodepng/lodepng.cpp
@@ -6301,7 +6301,7 @@ const char* lodepng_error_text(unsigned code) {
 
 #ifdef LODEPNG_COMPILE_CPP
 namespace lodepng {
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 
 #ifdef LODEPNG_COMPILE_DISK
 unsigned load_file(vector<unsigned char>& buffer, const string& filename) {
diff --git a/libkram/lodepng/lodepng.h b/libkram/lodepng/lodepng.h
index 2ae8a4ec..782150eb 100644
--- a/libkram/lodepng/lodepng.h
+++ b/libkram/lodepng/lodepng.h
@@ -28,7 +28,7 @@ freely, subject to the following restrictions:
 
 #include <string.h> /*for size_t*/
 
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 
 extern const char* LODEPNG_VERSION_STRING;
 
diff --git a/libkram/squish/maths.cpp b/libkram/squish/maths.cpp
index 79c08c5c..2c34fd81 100644
--- a/libkram/squish/maths.cpp
+++ b/libkram/squish/maths.cpp
@@ -34,7 +34,7 @@
 //#include <algorithm>
 
 namespace squish {
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 
 Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weights )
 {
diff --git a/libkram/squish/maths.h b/libkram/squish/maths.h
index a5124dd5..2b6be1fb 100644
--- a/libkram/squish/maths.h
+++ b/libkram/squish/maths.h
@@ -360,7 +360,7 @@ class Vec4
 using namespace SIMD_NAMESPACE;
 using Vec4 = float4;
 // default ctor for float4(1) sets 1,0,0,0 in simd, but impls like Vec4 expect float4(repeating: x)
-#define VEC4_CONST(x) Vec4(makeVec4(x,x,x,x))
+#define VEC4_CONST(x) float4m(x)
 #define makeVec4(x,y,z,w) float4m(x,y,z,w)
 
 inline bool CompareAnyLessThan(Vec4 x, Vec4 y) { return any(x < y); }
diff --git a/libkram/kram/float4a.cpp b/libkram/vectormath/float4a.cpp
similarity index 65%
rename from libkram/kram/float4a.cpp
rename to libkram/vectormath/float4a.cpp
index 682ae24d..210c9dd5 100644
--- a/libkram/kram/float4a.cpp
+++ b/libkram/vectormath/float4a.cpp
@@ -2,35 +2,23 @@
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
+#if !USE_SIMDLIB
+
+// https://patchwork.ozlabs.org/project/gcc/patch/559BC75A.1080606@arm.com/
+// https://gcc.gnu.org/onlinedocs/gcc-7.5.0/gcc/Half-Precision.html
+// https://developer.arm.com/documentation/dui0491/i/Using-NEON-Support/Converting-vectors
+
 #include "float4a.h"
 
 // Bury these for now.  They required -mf16c for Intel to be
 // defined, and that's kind of a pain right now.
-namespace kram {
-
-
-#if 0 // USE_FLOAT16
-
-// This only works on Apple, and not on Android unless +fp16 arch there.
-// And this is likely not faster than the simd op that does this.
-simd::float4 toFloat4(const half4& vv)
-{
-    // https://patchwork.ozlabs.org/project/gcc/patch/559BC75A.1080606@arm.com/
-    // https://gcc.gnu.org/onlinedocs/gcc-7.5.0/gcc/Half-Precision.html
-    // https://developer.arm.com/documentation/dui0491/i/Using-NEON-Support/Converting-vectors
-    return float4m((float)vv.x, (float)vv.y, (float)vv.z, (float)vv.w);
-}
-half4 toHalf4(const simd::float4& vv)
-{
-    return half4((_Float16)vv.x, (_Float16)vv.y, (_Float16)vv.z, (_Float16)vv.w);
-}
-
-
-#elif USE_SSE
+namespace SIMD_NAMESPACE {
+ 
+#if SIMD_SSE
 
 // using casts instead of vv.reg, so these calls work with Apple SIMD too
 
-simd::float4 toFloat4(const half4& vv)
+float4 float4m(half4 vv)
 {
     // https://patchwork.ozlabs.org/project/gcc/patch/559BC75A.1080606@arm.com/
     // https://gcc.gnu.org/onlinedocs/gcc-7.5.0/gcc/Half-Precision.html
@@ -47,7 +35,7 @@ simd::float4 toFloat4(const half4& vv)
     return simd::float4(_mm_cvtph_ps(reg16));
 }
 
-half4 toHalf4(const simd::float4& vv)
+half4 half4m(float4 vv)
 {
     __m128i reg16 = _mm_cvtps_ph(*(const __m128*)&vv, 0);  // 4xfp32-> 4xfp16,  round to nearest-even
     
@@ -62,19 +50,24 @@ half4 toHalf4(const simd::float4& vv)
     return val;
 }
 
-#elif USE_NEON
+#endif
+ 
+#if SIMD_NEON
 
 // using casts intead of vv.reg, so these calls work with Apple SIMD too
 // Note: could just use the sse2 neon version
 
-simd::float4 toFloat4(const half4& vv)
+float4 float4m(half4 vv)
 {
-    return simd::float4(vcvt_f32_f16(*(const float16x4_t*)&vv));
+    return float4(vcvt_f32_f16(*(const float16x4_t*)&vv));
 }
-half4 toHalf4(const simd::float4& vv)
+half4 half4m(float4 vv)
 {
     return half4(vcvt_f16_f32(*(const float32x4_t*)&vv));
 }
 #endif
 
 }
+
+#endif
+
diff --git a/libkram/vectormath/float4a.h b/libkram/vectormath/float4a.h
new file mode 100644
index 00000000..9dbb8ceb
--- /dev/null
+++ b/libkram/vectormath/float4a.h
@@ -0,0 +1,92 @@
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
+
+#pragma once
+
+// only support avx2 and Neon, no avx-512 at first
+#if defined __ARM_NEON
+#define SIMD_SSE  0
+#define SIMD_NEON 1
+#elif defined __AVX2__ // x64 AVX2 or higher, can lower to AVX
+#define SIMD_SSE  1
+#define SIMD_NEON 0
+#else
+#warning unuspported simd arch
+#endif
+
+#define SIMD_NAMESPACE simd
+
+#if !__is_identifier(_Float16)
+#define USE_FLOAT16 1
+#else
+#define USE_FLOAT16 0
+#endif
+
+namespace SIMD_NAMESPACE {
+
+// functional ctor
+inline float4 float4m(float3 v, float w) {
+    return vector4(v, w);
+}
+
+inline float2 float2m(float x, float y) {
+    return {x, y};
+}
+inline float3 float3m(float x, float y, float z) {
+    return {x, y, z};
+}
+inline float4 float4m(float x, float y, float z, float w) {
+    return {x, y, z, w};
+}
+
+inline float2 float2m(float x) {
+    return x;
+}
+
+inline float3 float3m(float x) {
+    return x;
+}
+
+inline float4 float4m(float x) {
+    return x;
+}
+
+inline float saturate(float v) {
+    return std::clamp(v, 0.0f, 1.0f);
+}
+inline double saturate(double v) {
+    return std::clamp(v, 0.0, 1.0);
+}
+inline float2 saturate(const float2& v) {
+    return simd_clamp(v, 0.0f, 1.0f);
+}
+inline float3 saturate(const float3& v) {
+    return simd_clamp(v, 0.0f, 1.0f);
+}
+inline float4 saturate(const float4& v) {
+    return simd_clamp(v, 0.0f, 1.0f);
+}
+
+#if USE_FLOAT16
+using half = _Float16;
+#else
+// for lack of a better type
+using half = int16_t;
+#endif
+
+#define vec2to4(x) (x).xyyy
+#define vec3to4(x) (x).xyzz
+#define vec4to2(x) (x).xy
+#define vec4to3(x) (x).xyz
+
+// define half ops just for conversion, don't do math of !USE_FLOAT16 set
+half4 half4m(float4 __x);
+inline half2 half2m(float2 __x) { return vec4to2(half4m(vec2to4(__x))); }
+inline half3 half3m(float3 __x) { return vec4to3(half4m(vec3to4(__x))); }
+
+float4 float4m(half4 __x);
+inline float2 float2m(half2 __x) { return vec4to2(float4m(vec2to4(__x))); }
+inline float3 float3m(half3 __x) { return vec4to3(float4m(vec3to4(__x))); }
+
+} // namespace SIMD_NAMESPACE
diff --git a/libkram/kram/sse2neon-arm64.h b/libkram/vectormath/sse2neon-arm64.h
similarity index 100%
rename from libkram/kram/sse2neon-arm64.h
rename to libkram/vectormath/sse2neon-arm64.h
diff --git a/libkram/kram/sse2neon.h b/libkram/vectormath/sse2neon.h
similarity index 99%
rename from libkram/kram/sse2neon.h
rename to libkram/vectormath/sse2neon.h
index 0f0c9b52..a646400e 100644
--- a/libkram/kram/sse2neon.h
+++ b/libkram/vectormath/sse2neon.h
@@ -244,12 +244,13 @@ FORCE_INLINE void _sse2neon_smp_mb(void)
 
 /* Architecture-specific build options */
 /* FIXME: #pragma GCC push_options is only available on GCC */
-#if defined(__GNUC__)
-#if defined(__arm__) && __ARM_ARCH == 7
 /* According to ARM C Language Extensions Architecture specification,
  * __ARM_NEON is defined to a value indicating the Advanced SIMD (NEON)
  * architecture supported.
  */
+/*
+ #if defined(__GNUC__)
+#if defined(__arm__) && __ARM_ARCH == 7
 #if !defined(__ARM_NEON) || !defined(__ARM_NEON__)
 #error "You must enable NEON instructions (e.g. -mfpu=neon) to use SSE2NEON."
 #endif
@@ -276,6 +277,7 @@ FORCE_INLINE void _sse2neon_smp_mb(void)
 (you could try setting target explicitly with -march or -mcpu)"
 #endif
 #endif
+*/
 
 #include <arm_neon.h>
 #if (!defined(__aarch64__) && !defined(_M_ARM64)) && (__ARM_ARCH == 8)
diff --git a/libkram/vectormath/sse_mathfun.h b/libkram/vectormath/sse_mathfun.h
new file mode 100644
index 00000000..649e2d89
--- /dev/null
+++ b/libkram/vectormath/sse_mathfun.h
@@ -0,0 +1,544 @@
+/* SIMD (SSE1+MMX or SSE2) implementation of sin, cos, exp and log
+
+   Inspired by Intel Approximate Math library, and based on the
+   corresponding algorithms of the cephes math library
+
+   The default is to use the SSE1 version. If you define USE_SSE2 the
+   the SSE2 intrinsics will be used in place of the MMX intrinsics. Do
+   not expect any significant performance improvement with SSE2.
+*/
+
+/* Copyright (C) 2007  Julien Pommier
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the authors be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+     claim that you wrote the original software. If you use this software
+     in a product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+
+  (this is the zlib license)
+*/
+
+// TODO: may want to rename to sse_mathfun.cpp, since this is
+// a lot of code to inline.
+
+#pragma once
+
+#ifdef SIMD_CPPMATHFUN
+
+namespace SIMD_NAMESPACE {
+
+typedef float4 v4sf;
+typedef int4 v4si;
+
+#else
+
+typedef simd_float4 v4sf;  // vector of 4 float (sse1)
+typedef simd_int4 v4si;    // vector of 4 int (sse2)
+
+#endif
+
+#define _PS_CONST(Name, Val) \
+  static const v4sf _ps_##Name = Val
+#define _PI_CONST(Name, Val) \
+  static const v4si _pi_##Name = Val
+
+_PS_CONST(1  , 1.0f);
+_PS_CONST(0p5, 0.5f);
+/* the smallest non denormalized float number */
+_PS_CONST(min_norm_pos, (float)0x00800000);
+_PI_CONST(mant_mask, 0x7f800000);
+_PI_CONST(inv_mant_mask, ~0x7f800000);
+
+_PI_CONST(sign_mask, (int32_t)0x80000000);
+_PI_CONST(inv_sign_mask, ~0x80000000);
+
+_PI_CONST(1, 1);
+_PI_CONST(inv1, ~1);
+_PI_CONST(2, 2);
+_PI_CONST(4, 4);
+_PI_CONST(0x7f, 0x7f);
+
+_PS_CONST(cephes_SQRTHF, 0.707106781186547524);
+_PS_CONST(cephes_log_p0, 7.0376836292E-2);
+_PS_CONST(cephes_log_p1, - 1.1514610310E-1);
+_PS_CONST(cephes_log_p2, 1.1676998740E-1);
+_PS_CONST(cephes_log_p3, - 1.2420140846E-1);
+_PS_CONST(cephes_log_p4, + 1.4249322787E-1);
+_PS_CONST(cephes_log_p5, - 1.6668057665E-1);
+_PS_CONST(cephes_log_p6, + 2.0000714765E-1);
+_PS_CONST(cephes_log_p7, - 2.4999993993E-1);
+_PS_CONST(cephes_log_p8, + 3.3333331174E-1);
+_PS_CONST(cephes_log_q1, -2.12194440e-4);
+_PS_CONST(cephes_log_q2, 0.693359375);
+
+// Named to match Intel simd calls.  Can expose in header if needed.
+// sincos returns sin, take in cos as ptr though.  So that doesn't match.
+// Also can't we just pass cos, sin now instead of reversing args?
+v4sf _mm_log_ps(v4sf x);
+v4sf _mm_exp_ps(v4sf x);
+v4sf _mm_sin_ps(v4sf x);
+v4sf _mm_cos_ps(v4sf x);
+void _mm_sincos_ps(v4sf x, v4sf* s, v4sf* c);
+
+// This is just extra function overhead.  May just want to rename
+v4sf log(v4sf x) {
+    return _mm_log_ps(x);
+}
+v4sf exp(v4sf x) {
+    return _mm_exp_ps(x);
+}
+v4sf sin(v4sf x) {
+    return _mm_sin_ps(x);
+}
+v4sf cos(v4sf x) {
+    return _mm_cos_ps(x);
+}
+void sincos(v4sf x, v4sf& s, v4sf& c) {
+    return _mm_sincos_ps(x, &s, &c);
+}
+
+/* natural logarithm computed for 4 simultaneous float
+   return NaN for x <= 0
+*/
+v4sf _mm_log_ps(v4sf x) {
+  v4si emm0;
+  v4sf one = _ps_1;
+
+  v4sf invalid_mask = _mm_cmple_ps(x, _mm_setzero_ps());
+
+  x = _mm_max_ps(x, _ps_min_norm_pos);  /* cut off denormalized stuff */
+
+  emm0 = _mm_srli_epi32(_mm_castps_si128(x), 23);
+  /* keep only the fractional part */
+  x = _mm_and_ps(x, _pi_inv_mant_mask);
+  x = _mm_or_ps(x, _ps_0p5);
+
+  emm0 = _mm_sub_epi32(emm0, _pi_0x7f);
+  v4sf e = _mm_cvtepi32_ps(emm0);
+
+  e = _mm_add_ps(e, one);
+
+  /* part2: 
+     if( x < SQRTHF ) {
+       e -= 1;
+       x = x + x - 1.0;
+     } else { x = x - 1.0; }
+  */
+  v4sf mask = _mm_cmplt_ps(x, _ps_cephes_SQRTHF);
+  v4sf tmp = _mm_and_ps(x, mask);
+  x = _mm_sub_ps(x, one);
+  e = _mm_sub_ps(e, _mm_and_ps(one, mask));
+  x = _mm_add_ps(x, tmp);
+
+
+  v4sf z = _mm_mul_ps(x,x);
+
+  v4sf y = _ps_cephes_log_p0;
+  y = _mm_mul_ps(y, x);
+  y = _mm_add_ps(y, _ps_cephes_log_p1);
+  y = _mm_mul_ps(y, x);
+  y = _mm_add_ps(y, _ps_cephes_log_p2);
+  y = _mm_mul_ps(y, x);
+  y = _mm_add_ps(y, _ps_cephes_log_p3);
+  y = _mm_mul_ps(y, x);
+  y = _mm_add_ps(y, _ps_cephes_log_p4);
+  y = _mm_mul_ps(y, x);
+  y = _mm_add_ps(y, _ps_cephes_log_p5);
+  y = _mm_mul_ps(y, x);
+  y = _mm_add_ps(y, _ps_cephes_log_p6);
+  y = _mm_mul_ps(y, x);
+  y = _mm_add_ps(y, _ps_cephes_log_p7);
+  y = _mm_mul_ps(y, x);
+  y = _mm_add_ps(y, _ps_cephes_log_p8);
+  y = _mm_mul_ps(y, x);
+
+  y = _mm_mul_ps(y, z);
+  
+
+  tmp = _mm_mul_ps(e, _ps_cephes_log_q1);
+  y = _mm_add_ps(y, tmp);
+
+
+  tmp = _mm_mul_ps(z, _ps_0p5);
+  y = _mm_sub_ps(y, tmp);
+
+  tmp = _mm_mul_ps(e, _ps_cephes_log_q2);
+  x = _mm_add_ps(x, y);
+  x = _mm_add_ps(x, tmp);
+  x = _mm_or_ps(x, invalid_mask); // negative arg will be NAN
+  return x;
+}
+
+_PS_CONST(exp_hi,	88.3762626647949f);
+_PS_CONST(exp_lo,	-88.3762626647949f);
+
+_PS_CONST(cephes_LOG2EF, 1.44269504088896341);
+_PS_CONST(cephes_exp_C1, 0.693359375);
+_PS_CONST(cephes_exp_C2, -2.12194440e-4);
+
+_PS_CONST(cephes_exp_p0, 1.9875691500E-4);
+_PS_CONST(cephes_exp_p1, 1.3981999507E-3);
+_PS_CONST(cephes_exp_p2, 8.3334519073E-3);
+_PS_CONST(cephes_exp_p3, 4.1665795894E-2);
+_PS_CONST(cephes_exp_p4, 1.6666665459E-1);
+_PS_CONST(cephes_exp_p5, 5.0000001201E-1);
+
+v4sf _mm_exp_ps(v4sf x) {
+  v4sf tmp = _mm_setzero_ps(), fx;
+  v4si emm0;
+  v4sf one = _ps_1;
+
+  x = _mm_min_ps(x, _ps_exp_hi);
+  x = _mm_max_ps(x, _ps_exp_lo);
+
+  /* express exp(x) as exp(g + n*log(2)) */
+  fx = _mm_mul_ps(x, _ps_cephes_LOG2EF);
+  fx = _mm_add_ps(fx, _ps_0p5);
+
+  /* how to perform a floorf with SSE: just below */
+  emm0 = _mm_cvttps_epi32(fx);
+  tmp  = _mm_cvtepi32_ps(emm0);
+  /* if greater, substract 1 */
+  v4sf mask = _mm_cmpgt_ps(tmp, fx);    
+  mask = _mm_and_ps(mask, one);
+  fx = _mm_sub_ps(tmp, mask);
+
+  tmp = _mm_mul_ps(fx, _ps_cephes_exp_C1);
+  v4sf z = _mm_mul_ps(fx, _ps_cephes_exp_C2);
+  x = _mm_sub_ps(x, tmp);
+  x = _mm_sub_ps(x, z);
+
+  z = _mm_mul_ps(x,x);
+  
+  v4sf y = _ps_cephes_exp_p0;
+  y = _mm_mul_ps(y, x);
+  y = _mm_add_ps(y, _ps_cephes_exp_p1);
+  y = _mm_mul_ps(y, x);
+  y = _mm_add_ps(y, _ps_cephes_exp_p2);
+  y = _mm_mul_ps(y, x);
+  y = _mm_add_ps(y, _ps_cephes_exp_p3);
+  y = _mm_mul_ps(y, x);
+  y = _mm_add_ps(y, _ps_cephes_exp_p4);
+  y = _mm_mul_ps(y, x);
+  y = _mm_add_ps(y, _ps_cephes_exp_p5);
+  y = _mm_mul_ps(y, z);
+  y = _mm_add_ps(y, x);
+  y = _mm_add_ps(y, one);
+
+  /* build 2^n */
+  emm0 = _mm_cvttps_epi32(fx);
+  emm0 = _mm_add_epi32(emm0, _pi_0x7f);
+  emm0 = _mm_slli_epi32(emm0, 23);
+  v4sf pow2n = _mm_castsi128_ps(emm0);
+  y = _mm_mul_ps(y, pow2n);
+  return y;
+}
+
+_PS_CONST(minus_cephes_DP1, -0.78515625);
+_PS_CONST(minus_cephes_DP2, -2.4187564849853515625e-4);
+_PS_CONST(minus_cephes_DP3, -3.77489497744594108e-8);
+_PS_CONST(sincof_p0, -1.9515295891E-4);
+_PS_CONST(sincof_p1,  8.3321608736E-3);
+_PS_CONST(sincof_p2, -1.6666654611E-1);
+_PS_CONST(coscof_p0,  2.443315711809948E-005);
+_PS_CONST(coscof_p1, -1.388731625493765E-003);
+_PS_CONST(coscof_p2,  4.166664568298827E-002);
+_PS_CONST(cephes_FOPI, 1.27323954473516); // 4 / M_PI
+
+
+/* evaluation of 4 sines at onces, using only SSE1+MMX intrinsics so
+   it runs also on old athlons XPs and the pentium III of your grand
+   mother.
+
+   The code is the exact rewriting of the cephes sinf function.
+   Precision is excellent as long as x < 8192 (I did not bother to
+   take into account the special handling they have for greater values
+   -- it does not return garbage for arguments over 8192, though, but
+   the extra precision is missing).
+
+   Note that it is such that sinf((float)M_PI) = 8.74e-8, which is the
+   surprising but correct result.
+
+   Performance is also surprisingly good, 1.33 times faster than the
+   macos vsinf SSE2 function, and 1.5 times faster than the
+   __vrs4_sinf of amd's ACML (which is only available in 64 bits). Not
+   too bad for an SSE1 function (with no special tuning) !
+   However the latter libraries probably have a much better handling of NaN,
+   Inf, denormalized and other special arguments..
+
+   On my core 1 duo, the execution of this function takes approximately 95 cycles.
+
+   From what I have observed on the experiments with Intel AMath lib, switching to an
+   SSE2 version would improve the perf by only 10%.
+
+   Since it is based on SSE intrinsics, it has to be compiled at -O2 to
+   deliver full speed.
+*/
+v4sf _mm_sin_ps(v4sf x) { // any x
+  v4sf xmm1, xmm2 = _mm_setzero_ps(), xmm3, sign_bit, y;
+
+  v4si emm0, emm2;
+  sign_bit = x;
+  /* take the absolute value */
+  x = _mm_and_ps(x, _pi_inv_sign_mask);
+  /* extract the sign bit (upper one) */
+  sign_bit = _mm_and_ps(sign_bit, _pi_sign_mask);
+  
+  /* scale by 4/Pi */
+  y = _mm_mul_ps(x, _ps_cephes_FOPI);
+
+  /* store the integer part of y in mm0 */
+  emm2 = _mm_cvttps_epi32(y);
+  /* j=(j+1) & (~1) (see the cephes sources) */
+  emm2 = _mm_add_epi32(emm2, _pi_1);
+  emm2 = _mm_and_si128(emm2, _pi_inv1);
+  y = _mm_cvtepi32_ps(emm2);
+
+  /* get the swap sign flag */
+  emm0 = _mm_and_si128(emm2, _pi_4);
+  emm0 = _mm_slli_epi32(emm0, 29);
+  /* get the polynom selection mask 
+     there is one polynom for 0 <= x <= Pi/4
+     and another one for Pi/4<x<=Pi/2
+
+     Both branches will be computed.
+  */
+  emm2 = _mm_and_si128(emm2, _pi_2);
+  emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
+  
+  v4sf swap_sign_bit = _mm_castsi128_ps(emm0);
+  v4sf poly_mask = _mm_castsi128_ps(emm2);
+  sign_bit = _mm_xor_ps(sign_bit, swap_sign_bit);
+  
+  
+  /* The magic pass: "Extended precision modular arithmetic" 
+     x = ((x - y * DP1) - y * DP2) - y * DP3; */
+  xmm1 = _ps_minus_cephes_DP1;
+  xmm2 = _ps_minus_cephes_DP2;
+  xmm3 = _ps_minus_cephes_DP3;
+  xmm1 = _mm_mul_ps(y, xmm1);
+  xmm2 = _mm_mul_ps(y, xmm2);
+  xmm3 = _mm_mul_ps(y, xmm3);
+  x = _mm_add_ps(x, xmm1);
+  x = _mm_add_ps(x, xmm2);
+  x = _mm_add_ps(x, xmm3);
+
+  /* Evaluate the first polynom  (0 <= x <= Pi/4) */
+  y = _ps_coscof_p0;
+  v4sf z = _mm_mul_ps(x,x);
+
+  y = _mm_mul_ps(y, z);
+  y = _mm_add_ps(y, _ps_coscof_p1);
+  y = _mm_mul_ps(y, z);
+  y = _mm_add_ps(y, _ps_coscof_p2);
+  y = _mm_mul_ps(y, z);
+  y = _mm_mul_ps(y, z);
+  v4sf tmp = _mm_mul_ps(z, _ps_0p5);
+  y = _mm_sub_ps(y, tmp);
+  y = _mm_add_ps(y, _ps_1);
+  
+  /* Evaluate the second polynom  (Pi/4 <= x <= 0) */
+
+  v4sf y2 = _ps_sincof_p0;
+  y2 = _mm_mul_ps(y2, z);
+  y2 = _mm_add_ps(y2, _ps_sincof_p1);
+  y2 = _mm_mul_ps(y2, z);
+  y2 = _mm_add_ps(y2, _ps_sincof_p2);
+  y2 = _mm_mul_ps(y2, z);
+  y2 = _mm_mul_ps(y2, x);
+  y2 = _mm_add_ps(y2, x);
+
+  /* select the correct result from the two polynoms */  
+  xmm3 = poly_mask;
+  y2 = _mm_and_ps(xmm3, y2); //, xmm3);
+  y = _mm_andnot_ps(xmm3, y);
+  y = _mm_add_ps(y,y2);
+  /* update the sign */
+  y = _mm_xor_ps(y, sign_bit);
+  return y;
+}
+
+/* almost the same as sin_ps */
+v4sf _mm_cos_ps(v4sf x) { // any x
+  v4sf xmm1, xmm2 = _mm_setzero_ps(), xmm3, y;
+  v4si emm0, emm2;
+  /* take the absolute value */
+  x = _mm_and_ps(x, _pi_inv_sign_mask);
+  
+  /* scale by 4/Pi */
+  y = _mm_mul_ps(x, _ps_cephes_FOPI);
+  
+  /* store the integer part of y in mm0 */
+  emm2 = _mm_cvttps_epi32(y);
+  /* j=(j+1) & (~1) (see the cephes sources) */
+  emm2 = _mm_add_epi32(emm2, _pi_1);
+  emm2 = _mm_and_si128(emm2, _pi_inv1);
+  y = _mm_cvtepi32_ps(emm2);
+
+  emm2 = _mm_sub_epi32(emm2, _pi_2);
+  
+  /* get the swap sign flag */
+  emm0 = _mm_andnot_si128(emm2, _pi_4);
+  emm0 = _mm_slli_epi32(emm0, 29);
+  /* get the polynom selection mask */
+  emm2 = _mm_and_si128(emm2, _pi_2);
+  emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
+  
+  v4sf sign_bit = _mm_castsi128_ps(emm0);
+  v4sf poly_mask = _mm_castsi128_ps(emm2);
+  /* The magic pass: "Extended precision modular arithmetic" 
+     x = ((x - y * DP1) - y * DP2) - y * DP3; */
+  xmm1 = _ps_minus_cephes_DP1;
+  xmm2 = _ps_minus_cephes_DP2;
+  xmm3 = _ps_minus_cephes_DP3;
+  xmm1 = _mm_mul_ps(y, xmm1);
+  xmm2 = _mm_mul_ps(y, xmm2);
+  xmm3 = _mm_mul_ps(y, xmm3);
+  x = _mm_add_ps(x, xmm1);
+  x = _mm_add_ps(x, xmm2);
+  x = _mm_add_ps(x, xmm3);
+  
+  /* Evaluate the first polynom  (0 <= x <= Pi/4) */
+  y = _ps_coscof_p0;
+  v4sf z = _mm_mul_ps(x,x);
+
+  y = _mm_mul_ps(y, z);
+  y = _mm_add_ps(y, _ps_coscof_p1);
+  y = _mm_mul_ps(y, z);
+  y = _mm_add_ps(y, _ps_coscof_p2);
+  y = _mm_mul_ps(y, z);
+  y = _mm_mul_ps(y, z);
+  v4sf tmp = _mm_mul_ps(z, _ps_0p5);
+  y = _mm_sub_ps(y, tmp);
+  y = _mm_add_ps(y, _ps_1);
+  
+  /* Evaluate the second polynom  (Pi/4 <= x <= 0) */
+
+  v4sf y2 = _ps_sincof_p0;
+  y2 = _mm_mul_ps(y2, z);
+  y2 = _mm_add_ps(y2, _ps_sincof_p1);
+  y2 = _mm_mul_ps(y2, z);
+  y2 = _mm_add_ps(y2, _ps_sincof_p2);
+  y2 = _mm_mul_ps(y2, z);
+  y2 = _mm_mul_ps(y2, x);
+  y2 = _mm_add_ps(y2, x);
+
+  /* select the correct result from the two polynoms */  
+  xmm3 = poly_mask;
+  y2 = _mm_and_ps(xmm3, y2); //, xmm3);
+  y = _mm_andnot_ps(xmm3, y);
+  y = _mm_add_ps(y,y2);
+  /* update the sign */
+  y = _mm_xor_ps(y, sign_bit);
+
+  return y;
+}
+
+/* since sin_ps and cos_ps are almost identical, sincos_ps could replace both of them..
+   it is almost as fast, and gives you a free cosine with your sine */
+void _mm_sincos_ps(v4sf x, v4sf *s, v4sf *c) {
+  v4sf xmm1, xmm2, xmm3 = _mm_setzero_ps(), sign_bit_sin, y;
+  v4si emm0, emm2, emm4;
+  sign_bit_sin = x;
+  /* take the absolute value */
+  x = _mm_and_ps(x, _pi_inv_sign_mask);
+  /* extract the sign bit (upper one) */
+  sign_bit_sin = _mm_and_ps(sign_bit_sin, _pi_sign_mask);
+  
+  /* scale by 4/Pi */
+  y = _mm_mul_ps(x, _ps_cephes_FOPI);
+    
+  /* store the integer part of y in emm2 */
+  emm2 = _mm_cvttps_epi32(y);
+
+  /* j=(j+1) & (~1) (see the cephes sources) */
+  emm2 = _mm_add_epi32(emm2, _pi_1);
+  emm2 = _mm_and_si128(emm2, _pi_inv1);
+  y = _mm_cvtepi32_ps(emm2);
+
+  emm4 = emm2;
+
+  /* get the swap sign flag for the sine */
+  emm0 = _mm_and_si128(emm2, _pi_4);
+  emm0 = _mm_slli_epi32(emm0, 29);
+  v4sf swap_sign_bit_sin = _mm_castsi128_ps(emm0);
+
+  /* get the polynom selection mask for the sine*/
+  emm2 = _mm_and_si128(emm2, _pi_2);
+  emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
+  v4sf poly_mask = _mm_castsi128_ps(emm2);
+
+  /* The magic pass: "Extended precision modular arithmetic" 
+     x = ((x - y * DP1) - y * DP2) - y * DP3; */
+  xmm1 = _ps_minus_cephes_DP1;
+  xmm2 = _ps_minus_cephes_DP2;
+  xmm3 = _ps_minus_cephes_DP3;
+  xmm1 = _mm_mul_ps(y, xmm1);
+  xmm2 = _mm_mul_ps(y, xmm2);
+  xmm3 = _mm_mul_ps(y, xmm3);
+  x = _mm_add_ps(x, xmm1);
+  x = _mm_add_ps(x, xmm2);
+  x = _mm_add_ps(x, xmm3);
+
+  emm4 = _mm_sub_epi32(emm4, _pi_2);
+  emm4 = _mm_andnot_si128(emm4, _pi_4);
+  emm4 = _mm_slli_epi32(emm4, 29);
+  v4sf sign_bit_cos = _mm_castsi128_ps(emm4);
+
+  sign_bit_sin = _mm_xor_ps(sign_bit_sin, swap_sign_bit_sin);
+
+  
+  /* Evaluate the first polynom  (0 <= x <= Pi/4) */
+  v4sf z = _mm_mul_ps(x,x);
+  y = _ps_coscof_p0;
+
+  y = _mm_mul_ps(y, z);
+  y = _mm_add_ps(y, _ps_coscof_p1);
+  y = _mm_mul_ps(y, z);
+  y = _mm_add_ps(y, _ps_coscof_p2);
+  y = _mm_mul_ps(y, z);
+  y = _mm_mul_ps(y, z);
+  v4sf tmp = _mm_mul_ps(z, _ps_0p5);
+  y = _mm_sub_ps(y, tmp);
+  y = _mm_add_ps(y, _ps_1);
+  
+  /* Evaluate the second polynom  (Pi/4 <= x <= 0) */
+
+  v4sf y2 = _ps_sincof_p0;
+  y2 = _mm_mul_ps(y2, z);
+  y2 = _mm_add_ps(y2, _ps_sincof_p1);
+  y2 = _mm_mul_ps(y2, z);
+  y2 = _mm_add_ps(y2, _ps_sincof_p2);
+  y2 = _mm_mul_ps(y2, z);
+  y2 = _mm_mul_ps(y2, x);
+  y2 = _mm_add_ps(y2, x);
+
+  /* select the correct result from the two polynoms */  
+  xmm3 = poly_mask;
+  v4sf ysin2 = _mm_and_ps(xmm3, y2);
+  v4sf ysin1 = _mm_andnot_ps(xmm3, y);
+  y2 = _mm_sub_ps(y2,ysin2);
+  y = _mm_sub_ps(y, ysin1);
+
+  xmm1 = _mm_add_ps(ysin1,ysin2);
+  xmm2 = _mm_add_ps(y,y2);
+ 
+  /* update the sign */
+  *s = _mm_xor_ps(xmm1, sign_bit_sin);
+  *c = _mm_xor_ps(xmm2, sign_bit_cos);
+}
+
+#ifdef SIMD_CPPMATHFUN
+} // namespace SIMD_NAMESPACE
+#endif
diff --git a/libkram/vectormath/vectormath++.cpp b/libkram/vectormath/vectormath++.cpp
new file mode 100644
index 00000000..0abca7ea
--- /dev/null
+++ b/libkram/vectormath/vectormath++.cpp
@@ -0,0 +1,572 @@
+// kram - Copyright 2020-2024 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
+#include "vectormath++.h"
+
+#if USE_SIMDLIB
+
+// these are large functions that can be buried and optimized in the .cpp
+#define SIMD_CPPMATHFUN
+#include "sse_mathfun.h"
+
+namespace SIMD_NAMESPACE {
+
+// These aren't embedded in function, so may have pre-init ordering issues.
+// or could add pre-init order to skip using functions.
+// Expose these through function calls as const&
+
+static const float2 kfloat2_posx = {1.0f, 0.0f};
+static const float2 kfloat2_posy = kfloat2_posx.yx;
+
+static const float2 kfloat2_negx = {-1.0f, 0.0f};
+static const float2 kfloat2_negy = kfloat2_negx.yx;
+
+static const float2 kfloat2_ones = kfloat2_posx.xx;
+static const float2 kfloat2_zero = {};
+
+static const float3 kfloat3_posx = {1.0f, 0.0f, 0.0f};
+static const float3 kfloat3_posy = kfloat3_posx.yxy;
+static const float3 kfloat3_posz = kfloat3_posx.yyx;
+
+static const float3 kfloat3_negx = {-1.0f, 0.0f, 0.0f};
+static const float3 kfloat3_negy = kfloat3_negx.yxy;
+static const float3 kfloat3_negz = kfloat3_negx.yyx;
+
+static const float3 kfloat3_ones = kfloat3_posx.xxx;
+static const float3 kfloat3_zero = {};
+
+static const float4 kfloat4_posx = {1.0f, 0.0f, 0.0f, 0.0f};
+static const float4 kfloat4_posy = kfloat4_posx.yxyy;
+static const float4 kfloat4_posz = kfloat4_posx.yyxy;
+static const float4 kfloat4_posw = kfloat4_posx.yyyx;
+
+static const float4 kfloat4_negxw = {-1.0f, 0.0f, 0.0f, 1.0f};
+static const float4 kfloat4_negyw = kfloat4_negxw.yxyw;
+static const float4 kfloat4_negzw = kfloat4_negxw.yyxw;
+
+static const float4 kfloat4_posxw = {1.0f, 0.0f, 0.0f, 1.0f};
+static const float4 kfloat4_posyw = kfloat4_posxw.yxyw;
+static const float4 kfloat4_poszw = kfloat4_posxw.yyxw;
+
+static const float4 kfloat4_negx = {-1.0f, 0.0f, 0.0f, 0.0f};
+static const float4 kfloat4_negy = kfloat4_negx.yxyy;
+static const float4 kfloat4_negz = kfloat4_negx.yyxy;
+static const float4 kfloat4_negw = kfloat4_negx.yyyx;
+
+static const float4 kfloat4_ones = kfloat4_posx.xxxx;
+static const float4 kfloat4_zero = {};
+
+static const float2x2 kfloat2x2_zero = {}; // what is this value 0, or default ctor
+static const float3x3 kfloat3x3_zero = {};
+static const float3x4 kfloat3x4_zero = {};
+static const float4x4 kfloat4x4_zero = {};
+
+static const float2x2 kfloat2x2_identity = diagonal_matrix(kfloat2_ones);
+static const float3x3 kfloat3x3_identity = diagonal_matrix(kfloat3_ones);
+static const float3x4 kfloat3x4_identity = diagonal_matrix3x4(kfloat3_ones);
+static const float4x4 kfloat4x4_identity = diagonal_matrix(kfloat4_ones);
+
+//---------------------------
+
+// These should not be used often.  So can stay buried
+float2x2::float2x2(float2 diag)
+    : float2x2s((const float2x2s&)diagonal_matrix(diag)) { }
+float3x3::float3x3(float3 diag)
+    : float3x3s((const float3x3s&)diagonal_matrix(diag)) { }
+float3x4::float3x4(float3 diag)
+    : float3x4s((const float3x4s&)diagonal_matrix3x4(diag)) { }
+float4x4::float4x4(float4 diag)
+    : float4x4s((const float4x4s&)diagonal_matrix(diag)) { }
+
+//---------------------------
+
+float2x2 diagonal_matrix(float2 x) {
+    float4 xx = zeroext(x);
+    return float2x2(xx.xw, xx.wy);
+}
+float3x3 diagonal_matrix(float3 x) {
+    float4 xx = zeroext(x);
+    return float3x3(xx.xww, xx.wyw, xx.wwz);
+}
+float3x4 diagonal_matrix3x4(float3 x) {
+    float4 xx = zeroext(x);
+    return float3x4(xx.xwww, xx.wyww, xx.wwzw);
+}
+float4x4 diagonal_matrix(float4 x) {
+    float4 xx = x; xx.w = 0.0f;
+    float4 ww = xx; xx.z = x.w;
+    return float4x4(xx.xwww, xx.wyww, xx.wwzw, ww.wwwz);
+}
+
+//---------------------------
+
+
+float4 pow(float4 x, float4 y) {
+    // can xy be <= 0 ?
+    return exp(log(x) * y);
+}
+
+float4 tan(float4 x) {
+    float4 s, c;
+    sincos(x, s, c);
+    
+    // TODO: handle around c == 0 case
+    return s / c;
+}
+
+// saturate
+float2 saturate(float2 x) {
+    return clamp(x, kfloat2_zero, kfloat2_ones);
+}
+float3 saturate(float3 x) {
+   return clamp(x, kfloat3_zero, kfloat3_ones);
+}
+float4 saturate(float4 x) {
+    return clamp(x, kfloat4_zero, kfloat4_ones);
+}
+
+//--------------------------------------
+
+// textbook transpose from simd/matrix.h
+float2x2 transpose(const float2x2& x) {
+    float4 x0, x1;
+    x0.xy = x[0];
+    x1.xy = x[1];
+#if SIMD_SSE
+    float4 r01 = _mm_unpacklo_ps(x0, x1);
+#else
+    float4 r01 = vzip1q_f32(x0, x1);
+#endif
+    return (float2x2){r01.lo, r01.hi};
+}
+
+float3x3 transpose(const float3x3& x) {
+    float4 x0, x1, x2;
+    x0.xyz = x[0];
+    x1.xyz = x[1];
+    x2.xyz = x[2];
+#if SIMD_SSE
+    float4 t0 = _mm_unpacklo_ps(x0, x1);
+    float4 t1 = _mm_unpackhi_ps(x0, x1);
+    float4 r0 = t0; r0.hi = x2.lo;
+    float4 r1 = _mm_shuffle_ps(t0, x2, 0xde);
+    float4 r2 = x2; r2.lo = t1.lo;
+#else
+    float4 padding = { 0 };
+    float4 t0 = vzip1q_f32(x0,x2);
+    float4 t1 = vzip2q_f32(x0,x2);
+    float4 t2 = vzip1q_f32(x1,padding);
+    float4 t3 = vzip2q_f32(x1,padding);
+    float4 r0 = vzip1q_f32(t0,t2);
+    float4 r1 = vzip2q_f32(t0,t2);
+    float4 r2 = vzip1q_f32(t1,t3);
+#endif
+    return (float3x3){r0.xyz, r1.xyz, r2.xyz};
+}
+   
+// TODO: conversion back to 4x4 using transpose
+// static float4x4 transpose(float3x4 x);
+
+float4x4 transpose(const float4x4& x) {
+#if SIMD_SSE
+    float4 t0 = _mm_unpacklo_ps(x[0],x[2]);
+    float4 t1 = _mm_unpackhi_ps(x[0],x[2]);
+    float4 t2 = _mm_unpacklo_ps(x[1],x[3]);
+    float4 t3 = _mm_unpackhi_ps(x[1],x[3]);
+    float4 r0 = _mm_unpacklo_ps(t0,t2);
+    float4 r1 = _mm_unpackhi_ps(t0,t2);
+    float4 r2 = _mm_unpacklo_ps(t1,t3);
+    float4 r3 = _mm_unpackhi_ps(t1,t3);
+#else
+    float4 t0 = vzip1q_f32(x[0],x[2]);
+    float4 t1 = vzip2q_f32(x[0],x[2]);
+    float4 t2 = vzip1q_f32(x[1],x[3]);
+    float4 t3 = vzip2q_f32(x[1],x[3]);
+    float4 r0 = vzip1q_f32(t0,t2);
+    float4 r1 = vzip2q_f32(t0,t2);
+    float4 r2 = vzip1q_f32(t1,t3);
+    float4 r3 = vzip2q_f32(t1,t3);
+#endif
+    return (float4x4){r0,r1,r2,r3};
+}
+
+// inverse
+float2x2 inverse(const float2x2& x) {
+    float invDet = 1.0f / determinant(x);
+    if (invDet == 0.0f) return kfloat2x2_zero;
+    
+    float2x2 r = transpose(x);
+    r[0] *= invDet;
+    r[1] *= invDet;
+    return r;
+}
+
+float3x3 inverse(const float3x3& x) {
+    float invDet = 1.0f / determinant(x);
+    if (invDet == 0.0f) return kfloat3x3_zero;
+    
+    float3x3 r;
+    
+    // this forms the adjoint
+    r[0] = cross(x[1], x[2]) * invDet;
+    r[1] = cross(x[2], x[0]) * invDet;
+    r[2] = cross(x[0], x[1]) * invDet;
+    return r;
+}
+
+float4x4 inverse(const float4x4& x) {
+    // This is a full gje inverse
+    
+    float4x4 a(x), b(kfloat4x4_identity);
+    bool inversionSucceeded = true;
+    
+    // As a evolves from original mat into identity -
+    // b evolves from identity into inverse(a)
+    uint32_t cols = 4;
+    uint32_t rows = 4;
+    
+    // Loop over cols of a from left to right, eliminating above and below diag
+    for (uint32_t j=0; j<rows; j++) {
+        // Find largest pivot in column j among rows j..2
+        uint32_t i1 = j;            // Row with largest pivot candidate
+        for (uint32_t i=j+1; i<cols; i++) {
+            if ( fabsf(a[i][j]) > fabsf(a[i1][j]) ) {
+                i1 = i;
+            }
+        }
+        
+        // Swap rows i1 and j in a and b to put pivot on diagonal
+        std::swap(a[i1], a[j]);
+        std::swap(b[i1], b[j]);
+    
+        // Scale row j to have a unit diagonal
+        float s = a[j][j];
+        if ( s == 0.0f ) {
+            inversionSucceeded = false;
+            break;
+        }
+        
+        s = 1.0f/s;
+        b[j] *= s;
+        a[j] *= s;
+    
+        // Eliminate off-diagonal elems in col j of a, doing identical ops to b
+        for (uint32_t i=0; i<cols; i++ ) {
+            if (i != j) {
+                s = a[i][j];
+                b[i] -= b[j] * s;
+                a[i] -= a[j] * s;
+            }
+        }
+    }
+    
+    if (!inversionSucceeded) {
+        b = kfloat4x4_zero;
+    }
+    
+    return b;
+}
+
+// determinant
+// internal only ops
+// TODO: could just be macros
+inline float3 rotate1(float3 x) { return x.yzx; }
+inline float3 rotate2(float3 x) { return x.zxy; }
+inline float4 rotate1(float4 x) { return x.yzwx; }
+inline float4 rotate2(float4 x) { return x.zwxy; }
+inline float4 rotate3(float4 x) { return x.wxyz; }
+
+float determinant(const float2x2& x) {
+    return cross(x[0], x[1]);
+}
+
+float determinant(const float3x3& x) {
+    return reduce_add(
+            x[0]*(rotate1(x[1])*rotate2(x[2]) - rotate2(x[1])*rotate1(x[2])));
+}
+
+float determinant(const float4x4& x) {
+    float4 codet = x[0]*(rotate1(x[1])*(rotate2(x[2])*rotate3(x[3])-rotate3(x[2])*rotate2(x[3])) +
+      rotate2(x[1])*(rotate3(x[2])*rotate1(x[3])-rotate1(x[2])*rotate3(x[3])) +
+      rotate3(x[1])*(rotate1(x[2])*rotate2(x[3])-rotate2(x[2])*rotate1(x[3])));
+    return reduce_add(codet.even - codet.odd);
+}
+
+// trace
+float trace(const float2x2& x) {
+    return x[0].x + x[1].y;
+}
+
+float trace(const float3x3& x) {
+    return x[0].x + x[1].y + x[2].z;
+}
+
+float trace(const float4x4& x) {
+    return x[0].x + x[1].y + x[2].z + x[3].w;
+}
+
+// TODO: may want pre-transform on float3x4 since it's transposed
+// 3 x m3x4 should = 3 element vec
+//
+// Apple premul transform on left does a super expensive transpose to avoid dot
+// don't use this, should just dotproducts?
+//static   half2 mul(  half2 x,   half2x2 y) { return mul(transpose(y), x); }
+//
+//
+// Here's how to multiply matrices, since default ops won't do this.
+// be careful with operator* built-in.  Will do column by column mul won't it?
+// Maybe that's why *= is missing on matrices.
+//
+// This is taking each scalar of y[0], hopfully this extends and stays in vec op
+
+// premul-transform has to do dots
+float2 mul(float2 y, const float2x2& x) {
+    float2 r;
+    r.x = dot(y, x[0]);
+    r.y = dot(y, x[1]);
+    return r;
+}
+
+float3 mul(float3 y, const float3x3& x) {
+    float3 r;
+    r.x = dot(y, x[0]);
+    r.y = dot(y, x[1]);
+    r.z = dot(y, x[2]);
+    return r;
+}
+
+float4 mul(float4 y, const float4x4& x) {
+    float4 r;
+    r.x = dot(y, x[0]);
+    r.y = dot(y, x[1]);
+    r.z = dot(y, x[2]);
+    r.w = dot(y, x[3]);
+    return r;
+}
+
+
+// post-transform at least does a mul madd
+float2 mul(const float2x2& x, float2 y) {
+    float2 r = x[0]*y[0];
+    r = muladd( x[1], y[1],r);
+    return r;
+}
+
+float3 mul(const float3x3& x, float3 y) {
+    float3 r = x[0]*y[0];
+    r = muladd( x[1], y[1],r);
+    r = muladd( x[2], y[2],r);
+    return r;
+}
+
+float4 mul(const float4x4& x, float4 y) {
+    float4 r = x[0]*y[0];
+    r = muladd( x[1], y[1], r);
+    r = muladd( x[2], y[2], r);
+    r = muladd( x[3], y[3], r);
+    return r;
+}
+
+// matrix muls using mul madd
+float2x2 mul(const float2x2& x, const float2x2& y) {
+    float2x2 r;
+    for (int i=0; i<2; ++i)
+        r[i] = mul(x, y[i]);
+    return r;
+}
+
+float3x3 mul(const float3x3& x, const float3x3& y) {
+    float3x3 r;
+    for (int i=0; i<3; ++i)
+        r[i] = mul(x, y[i]);
+    return r;
+}
+
+float4x4 mul(const float4x4& x, const float4x4& y) {
+    float4x4 r;
+    for (int i=0; i<4; ++i)
+        r[i] = mul(x, y[i]);
+    return r;
+}
+
+// sub
+float2x2 sub(const float2x2& x, const float2x2& y) {
+    float2x2 r(x);
+    for (int i=0; i<2; ++i)
+        r[i] = x[i] - y[i];
+    return r;
+}
+float3x3 sub(const float3x3& x, const float3x3& y) {
+    float3x3 r(x);
+    for (int i=0; i<3; ++i)
+        r[i] -= y[i];
+    return r;
+}
+float4x4 sub(const float4x4& x, const float4x4& y) {
+    float4x4 r(x);
+    for (int i=0; i<4; ++i)
+        r[i] -= y[i];
+    return r;
+}
+
+// add
+float2x2 add(const float2x2& x, const float2x2& y) {
+    float2x2 r(x);
+    for (int i=0; i<2; ++i)
+        r[i] += y[i];
+    return r;
+}
+float3x3 add(const float3x3& x, const float3x3& y) {
+    float3x3 r(x);
+    for (int i=0; i<3; ++i)
+        r[i] += y[i];
+    return r;
+}
+float4x4 add(const float4x4& x, const float4x4& y) {
+    float4x4 r(x);
+    for (int i=0; i<4; ++i)
+        r[i] += y[i];
+    return r;
+}
+
+// equal
+bool equal(const float2x2& x, const float2x2& y) {
+    return all(x[0] == y[0] &
+               x[1] == y[1]);
+}
+bool equal(const float3x3& x, const float3x3& y) {
+    return all(x[0] == y[0] &
+               x[1] == y[1] &
+               x[2] == y[2]);
+}
+bool equal(const float4x4& x, const float4x4& y) {
+    return all(x[0] == y[0] &
+               x[1] == y[1] &
+               x[2] == y[2] &
+               x[3] == y[3]);
+}
+
+
+
+//---------------------------
+// Start of cpp calls
+
+//---------------
+
+const float2& float2_zero(){ return kfloat2_zero; }
+const float2& float2_ones(){ return kfloat2_ones; }
+
+const float2& float2_posx(){ return kfloat2_posx; }
+const float2& float2_posy(){ return kfloat2_posy; }
+const float2& float2_negx(){ return kfloat2_negx; }
+const float2& float2_negy(){ return kfloat2_negy; }
+
+//----
+
+const float3& float3_zero(){ return kfloat3_zero; }
+const float3& float3_ones(){ return kfloat3_ones; }
+
+const float3& float3_posx(){ return kfloat3_posx; }
+const float3& float3_posy(){ return kfloat3_posy; }
+const float3& float3_posz(){ return kfloat3_posz; }
+const float3& float3_negx(){ return kfloat3_negx; }
+const float3& float3_negy(){ return kfloat3_negy; }
+const float3& float3_negz(){ return kfloat3_negz; }
+
+//----
+
+const float4& float4_zero(){ return kfloat4_zero; }
+const float4& float4_ones(){ return kfloat4_ones; }
+
+const float4& float4_posx(){ return kfloat4_posx; }
+const float4& float4_posy(){ return kfloat4_posy; }
+const float4& float4_posz(){ return kfloat4_posz; }
+const float4& float4_posw(){ return kfloat4_posw; }
+const float4& float4_negx(){ return kfloat4_negx; }
+const float4& float4_negy(){ return kfloat4_negy; }
+const float4& float4_negz(){ return kfloat4_negz; }
+const float4& float4_negw(){ return kfloat4_negw; }
+
+const float4& float4_posxw(){ return kfloat4_posxw; }
+const float4& float4_posyw(){ return kfloat4_posyw; }
+const float4& float4_poszw(){ return kfloat4_poszw; }
+const float4& float4_negxw(){ return kfloat4_negxw; }
+const float4& float4_negyw(){ return kfloat4_negyw; }
+const float4& float4_negzw(){ return kfloat4_negzw; }
+
+//---------------
+
+const float2x2& float2x2::zero() { return kfloat2x2_zero; }
+const float2x2& float2x2::identity() { return kfloat2x2_identity; }
+
+const float3x3& float3x3::zero() { return kfloat3x3_zero; }
+const float3x3& float3x3::identity() { return kfloat3x3_identity; }
+
+const float3x4& float3x4::zero() { return kfloat3x4_zero; }
+const float3x4& float3x4::identity() { return kfloat3x4_identity; }
+
+const float4x4& float4x4::zero() { return kfloat4x4_zero; }
+const float4x4& float4x4::identity() { return kfloat4x4_identity; }
+
+//---------------
+
+#if SIMD_HALF4_ONLY
+ 
+#if SIMD_NEON
+
+float4 float4m(half4 vv)
+{
+    return float4(vcvt_f32_f16(*(const float16x4_t*)&vv));
+}
+half4 half4m(float4 vv)
+{
+    return half4(vcvt_f16_f32(*(const float32x4_t*)&vv));
+}
+
+#endif // SIMD_NEON
+
+#if SIMD_SSE
+
+float4 float4m(half4 vv)
+{
+    // https://patchwork.ozlabs.org/project/gcc/patch/559BC75A.1080606@arm.com/
+    // https://gcc.gnu.org/onlinedocs/gcc-7.5.0/gcc/Half-Precision.html
+    // https://developer.arm.com/documentation/dui0491/i/Using-NEON-Support/Converting-vectors
+    __m128i reg16 = _mm_setzero_si128();
+    
+    // TODO: switch to load low 64-bits, but don't know which one _mm_cvtsi32_si128(&vv.reg); ?
+    // want 0 extend here, sse overuses int32_t when really unsigned and zero extended value
+    reg16 = _mm_insert_epi16(reg16, vv[0], 0);
+    reg16 = _mm_insert_epi16(reg16, vv[1], 1);
+    reg16 = _mm_insert_epi16(reg16, vv[2], 2);
+    reg16 = _mm_insert_epi16(reg16, vv[3], 3);
+    
+    return simd::float4(_mm_cvtph_ps(reg16));
+}
+
+half4 half4m(float4 vv)
+{
+    __m128i reg16 = _mm_cvtps_ph(*(const __m128*)&vv, 0);  // 4xfp32-> 4xfp16,  round to nearest-even
+    
+    // TODO: switch to store/steam, but don't know which one _mm_storeu_epi16 ?
+    half4 val;  // = 0;
+    
+    // 0 extended
+    val[0] = (half)_mm_extract_epi16(reg16, 0);
+    val[1] = (half)_mm_extract_epi16(reg16, 1);
+    val[2] = (half)_mm_extract_epi16(reg16, 2);
+    val[3] = (half)_mm_extract_epi16(reg16, 3);
+    return val;
+}
+
+#endif // SIMD_SSE
+#endif // SIMD_HALF4_ONLY
+
+
+} // namespace SIMD_NAMESPACE
+
+#endif
+
+// Note: float4a.h has a rcp and rsqrt ops, but they are approximate.
+// Have real div and sqrt ops now.
+
diff --git a/libkram/vectormath/vectormath++.h b/libkram/vectormath/vectormath++.h
new file mode 100644
index 00000000..be755bb4
--- /dev/null
+++ b/libkram/vectormath/vectormath++.h
@@ -0,0 +1,1288 @@
+// kram - Copyright 2020-2024 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
+
+#pragma once
+
+#if USE_SIMDLIB
+
+// This requires __clang__ or gcc.
+// Only really targeting __clang__ for development.
+// Targeting x64 running AVX2 and Neon.
+//
+// gcc/clang vector extension:
+// .lo and .hi are the first second halves of a vector,
+// .even and .odd are the even- and odd-indexed elements of a vector.
+// __builtin_shufflevector and __builtin_convertvector.
+// also have .rgba built into the types
+// So can emulate double4 on Neon using 2 registers.
+// These extensions only work on a C typedef and auto convert to
+//  _m128, _m128i, _m256, .. on Intel, and to float32x4_t on Neon.
+//
+// Apple simd lib has versioning, so the Accelerate lib provides optimized
+// simd calls.  And if not present, then those calls use 4 math.h function calls
+// or fail to build.  So v0 is basic functionaliy, and it's up to v6 on iOS 18.
+//
+// x64: Compile this with -march x86_64h (haswell) or -mavx2 -mf16c -fma
+// arm64: Compile this with Neon enabled (should have fma)
+//
+// Intel SSE core has 3 or 4 32B units internally.
+//   One core is shared by 2 Hyperthreads, but HT is being removed by Intel.
+//   e-cores can only run AVX2, and don't have HT support.
+// AVX-512 can drop to 1 unit rounding down.
+// Chip frequencies drop using AVX2 and AVX-512.
+// Trying to run AVX2 (32B) can even run slow on older Intel/AMD
+//   with double-pumped 16B internal cores.
+// New AMD runs double-pumped 32B instructions for AVX-512 on all cores.
+//  AMD has e-cores, but no instruction limits like Intel.
+//
+// Intel SSE scalar ops used to run 2:1 for 1:4 ops of work
+// but now it's 1:1, so these only keep value in sse register.
+//
+// This passes vector by value, and matrix by const reference to the ops.
+// May want to reconsider const ref for long uint/float/double vectors.
+// This assumes x64 calling conventions for passing registers.
+// There are differing function calling conventions for passing first 4 values.
+//
+// Neon    32x 32B 128-bt
+// SVE2    ?
+// +fp16   _Float16 support
+//
+// SSE     16x 16B 128-bit
+// AVX/2   16x 32B 256-bit
+// AVX-512 16x 64B 512-bit (disabled on p-cores and dropped from e-cores on i9), 4 variants
+// AVX10   32x 32B 256-bit (emulates 512-bit), 3 variants
+//
+// FMA     fp multiply add
+// F16C    2 instructions for fp16 <-> fp32
+// CRC32   instructions to enable fast crc ops (not using yet, but is in sse2neon.h)
+//
+// max vec size per register
+// 16B      32B
+// char16   char32?
+// short8   short16
+// uint4    uint8
+// float4   float8
+// double2  double4
+//
+// Metal Shading Language (MSL)
+// supports up to half4
+// supports up to float4
+// no support for double.  cpu only
+//
+// HLSL and DX12/Vulkan support double on desktop, but not mobile?
+//   and likely not on arm64 gpus.
+//
+// x64 -> arm64 emulators
+// Prism supports SSE4.2, no fma, no f16c
+// Rosetta supports SSE4.2, no fma, no f16c
+// Rosetta was updated to AVX2 support
+//
+
+//-----------------------------------
+
+// Can override the namespace to your own.  This avoids collision with Apple simd.
+#ifndef SIMD_NAMESPACE
+#define SIMD_NAMESPACE simdk
+#endif // SIMD_NAMESPACE
+
+// only support avx2 and Neon, no avx-512 at first
+#if defined __ARM_NEON
+#define SIMD_SSE  0
+#define SIMD_NEON 1
+#elif defined __AVX2__ // x64 AVX2 or higher, can lower to AVX
+#define SIMD_SSE  1
+#define SIMD_NEON 0
+#else
+#warning unuspported simd arch
+#endif
+
+// TODO: u/char16 max, only 16 channels/mask bits?
+// TODO: u/short16, nothing goes above 32 elements
+// TODO: u/long8 type
+// TODO: add, but don't add uint/ushort/ulong (too much code already)
+//  note iOS/macOS are on signed char, so may just only want to use signed simd ?
+//  often though need unsigned for low bit counts (f.e. 8 or 16).
+//
+// #define SIMD_UCHAR  0
+// #define SIMD_CHAR   0
+// #define SIMD_SHORT  0
+// #define SIMD_LONG   0
+
+// TODO: have some setting to override from prefix file
+#define SIMD_HALF   1
+#define SIMD_FLOAT  1
+#define SIMD_DOUBLE 1
+#define SIMD_INT    1
+
+/*
+// can enable/disable types as needed
+#ifndef SIMD_HALF
+#if !__is_identifier(_Float16)
+#define SIMD_HALF   1
+#else
+#define SIMD_HALF   0
+#endif
+#endif // SIMD_HALF
+
+#ifndef SIMD_FLOAT
+#define SIMD_FLOAT  1
+#endif // SIMD_FLOAT
+
+#ifndef SIMD_DOUBLE
+#define SIMD_DOUBLE 0
+#endif // SIMD_DOUBLE
+
+// required for logic ops, so can't disable this
+// but mostly default vector ops defined to work with SIMD_INT right now
+#ifndef SIMD_INT
+#define SIMD_INT    1
+#endif // SIMD_INT
+*/
+
+#if SIMD_HALF
+
+// Android doesn't really have _Float16, so would need a u/int16_t mapped placeholder
+// The not identifier means its a system type.
+#if !__is_identifier(_Float16)
+#define SIMD_HALF_FLOAT16 1
+#else
+#define SIMD_HALF_FLOAT16 0
+#endif
+
+#endif // SIMD_HALF
+
+// Whether to support > 4 length vecs with some ops
+#ifndef SIMD_LONG_VECS
+#define SIMD_LONG_VECS 0
+#endif // SIMD_LONG_VECS
+
+// This means simd_float4 will come from this file instead of simd.h
+#define SIMD_RENAME_TO_SIMD_NAMESPACE 0
+
+// TODO: u/char
+// TODO: float16 and double8 requires 4x 16B instructions
+// TODO: float8 and double3/double4 require 2x 16B instructions,
+//   but double needed for double3x3, 3x4, 4x4
+// may want to stop one before for AVX2 registers.
+
+//-----------------------------------
+
+// simplify calls
+#define SIMD_CALL static inline __attribute__((__always_inline__, __const__, __nodebug__))
+
+// aligned
+#define macroVector2TypesStorage(type, name) \
+typedef type name##1s; \
+typedef __attribute__((__ext_vector_type__(2)))  type name##2s; \
+typedef __attribute__((__ext_vector_type__(3)))  type name##3s; \
+typedef __attribute__((__ext_vector_type__(4)))  type name##4s; \
+typedef __attribute__((__ext_vector_type__(8)))  type name##8s; \
+typedef __attribute__((__ext_vector_type__(16),__aligned__(16))) type name##16s; \
+// also a 32
+
+// packed
+#define macroVector2TypesPacked(type, name) \
+typedef type name##1p; \
+typedef __attribute__((__ext_vector_type__(2),__aligned__(2)))  type name##2p; \
+typedef __attribute__((__ext_vector_type__(3),__aligned__(2)))  type name##3p; \
+typedef __attribute__((__ext_vector_type__(4),__aligned__(2)))  type name##4p; \
+typedef __attribute__((__ext_vector_type__(8),__aligned__(2)))  type name##8p; \
+typedef __attribute__((__ext_vector_type__(16),__aligned__(2))) type name##16p; \
+
+// cpp rename for half, u/short
+#define macroVector2TypesStorageRenames(cname, cppname) \
+typedef ::cname##1s cppname##1; \
+typedef ::cname##2s cppname##2; \
+typedef ::cname##3s cppname##3; \
+typedef ::cname##4s cppname##4; \
+typedef ::cname##8s cppname##8; \
+typedef ::cname##16s cppname##16; \
+
+//------------
+
+// aligned
+#define macroVector4TypesStorage(type, name) \
+typedef type name##1s; \
+typedef __attribute__((__ext_vector_type__(2)))  type name##2s; \
+typedef __attribute__((__ext_vector_type__(3)))  type name##3s; \
+typedef __attribute__((__ext_vector_type__(4)))  type name##4s; \
+typedef __attribute__((__ext_vector_type__(8),__aligned__(16)))  type name##8s; \
+typedef __attribute__((__ext_vector_type__(16),__aligned__(16))) type name##16s; \
+
+// packed
+#define macroVector4TypesPacked(type, name) \
+typedef type name##1p; \
+typedef __attribute__((__ext_vector_type__(2),__aligned__(4)))  type name##2p; \
+typedef __attribute__((__ext_vector_type__(3),__aligned__(4)))  type name##3p; \
+typedef __attribute__((__ext_vector_type__(4),__aligned__(4)))  type name##4p; \
+typedef __attribute__((__ext_vector_type__(8),__aligned__(4)))  type name##8p; \
+typedef __attribute__((__ext_vector_type__(16),__aligned__(4))) type name##16p; \
+
+// cpp rename for float, u/int
+#define macroVector4TypesStorageRenames(cname, cppname) \
+typedef ::cname##1s cppname##1; \
+typedef ::cname##2s cppname##2; \
+typedef ::cname##3s cppname##3; \
+typedef ::cname##4s cppname##4; \
+typedef ::cname##8s cppname##8; \
+typedef ::cname##16s cppname##16; \
+
+//------------
+
+// aligned
+#define macroVector8TypesStorage(type, name) \
+typedef type name##1s; \
+typedef __attribute__((__ext_vector_type__(2))) type name##2s; \
+typedef __attribute__((__ext_vector_type__(3),__aligned__(16))) type name##3s; \
+typedef __attribute__((__ext_vector_type__(4),__aligned__(16))) type name##4s; \
+typedef __attribute__((__ext_vector_type__(8),__aligned__(16))) type name##8s; \
+
+// packed
+#define macroVector8TypesPacked(type, name) \
+typedef type name##1p; \
+typedef __attribute__((__ext_vector_type__(2),__aligned__(8))) type name##2p; \
+typedef __attribute__((__ext_vector_type__(3),__aligned__(8))) type name##3p; \
+typedef __attribute__((__ext_vector_type__(4),__aligned__(8))) type name##4p; \
+typedef __attribute__((__ext_vector_type__(8),__aligned__(8))) type name##8p; \
+
+// cpp rename for double, u/long
+#define macroVector8TypesStorageRenames(cname, cppname) \
+typedef ::cname##1s cppname##1; \
+typedef ::cname##2s cppname##2; \
+typedef ::cname##3s cppname##3; \
+typedef ::cname##4s cppname##4; \
+typedef ::cname##8s cppname##8; \
+
+//-----------------------------------
+
+// TODO: can do +=, -= faster than calling sub/add, but this uses same impl that way
+#define macroMatrixOps(type) \
+SIMD_CALL type& operator*=(type& x, const type& y) { x = mul(x, y); return x; } \
+SIMD_CALL type& operator+=(type& x, const type& y) { x = add(x, y); return x; } \
+SIMD_CALL type& operator-=(type& x, const type& y) { x = sub(x, y); return x; } \
+SIMD_CALL bool operator==(const type& x, const type& y) { return equal(x, y); } \
+SIMD_CALL bool operator!=(const type& x, const type& y) { return !(x == y); } \
+SIMD_CALL type operator-(const type& x, const type& y) { return sub(x,y); } \
+SIMD_CALL type operator+(const type& x, const type& y) { return add(x,y); } \
+SIMD_CALL type operator*(const type& x, const type& y) { return mul(x,y); } \
+SIMD_CALL type::column_t operator*(const type::column_t& v, const type& y) { return mul(v,y); } \
+SIMD_CALL type::column_t operator*(const type& x, const type::column_t& v) { return mul(x,v); } \
+
+//-----------------------------------
+
+// for u/int8_t, u/int32_t, u/int64_t etc
+#include <inttypes.h>
+
+//------------
+// define count and alignment of core types
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//----------
+
+#if SIMD_HALF
+
+#if SIMD_HALF_FLOAT16
+typedef _Float16 half;
+#else
+// This won't work with the operators.  Consider _fp16 storage type which does all math in fp32.
+// But even Android doesn't support that with +fp16.
+// TODO: use half struct here that can do math slowly (prob in fp32x4, then convert back)
+typedef short half;
+#endif // SIMD_HALF_FLOAT16
+
+// This means math and conversions don't work, so have to use simd ops
+#define SIMD_HALF4_ONLY !SIMD_HALF_FLOAT16
+
+// Half isn't something that should have math ops yet.  Just useful as packed type.
+// This does math, but really needs _Float16 to work properly for the operators.
+// That's not available on Android devices like it should be, but the Neon
+// fp16x4 <-> fp32x4 conversions are.
+
+// define c++ types
+macroVector2TypesStorage(half, half)
+macroVector2TypesPacked(half, half)
+
+#endif // SIMD_HALF
+
+#if SIMD_FLOAT
+
+// define c++ types
+macroVector4TypesStorage(float, float)
+macroVector4TypesPacked(float, float)
+
+// storage type for matrix
+typedef struct { float2s columns[2]; } float2x2s;
+typedef struct { float3s columns[3]; } float3x3s;
+typedef struct { float4s columns[3]; } float3x4s;
+typedef struct { float4s columns[4]; } float4x4s;
+
+#endif // SIMD_FLOAT
+
+#if SIMD_INT
+
+// define c++ types
+// Apple uses int type here (32-bit) instead of int32_t
+macroVector4TypesStorage(int, int)
+macroVector4TypesPacked(int, int)
+
+#endif // SIMD_INT
+
+#if SIMD_DOUBLE
+
+// define c types
+macroVector8TypesStorage(double, double)
+macroVector8TypesPacked(double, double)
+
+#endif // SIMD_DOUBLE
+
+//----------
+
+// This means simd_float4 will come from this file instead of simd.h
+#if SIMD_RENAME_TO_SIMD_NAMESPACE
+
+#if SIMD_HALF
+macroVector2TypesStorageRenames(half, simd_half)
+#endif // SIMD_HALF
+                     
+#if SIMD_FLOAT
+macroVector4TypesStorageRenames(float, simd_float)
+#endif // SIMD_FLOAT
+
+#if SIMD_INT
+macroVector4TypesStorageRenames(int, simd_int)
+#endif // SIMD_INT
+
+#if SIMD_DOUBLE
+macroVector8TypesStorageRenames(double, simd_double)
+#endif // SIMD_DOUBLE
+
+#endif // SIMD_RENAME_TO_SIMD_NAMESPACE
+
+#ifdef __cplusplus
+}
+#endif
+
+//-----------------------------------
+// imlementation - only code simd arch specific
+
+// Could have option to bury impls as function.  That would remove all simd headers from code,
+// but would prevent inlining.  So these could do optional inlining.
+
+#include <math.h> // for sqrt
+
+#if SIMD_NEON
+// neon types and intrinsics
+#include <arm_neon.h>
+// This converts sse to neon intrinsics, only really have 16B Neon with SVE2 on horizon
+#include "sse2neon-arm64.h"
+#else
+// SSE intrinsics up to AVX-512, but depends on -march avx2 -mf16c -fma
+#include <immintrin.h>
+#endif // SIMD_NEON
+
+//-------------------
+// This is for C++ only
+#ifdef __cplusplus
+
+namespace SIMD_NAMESPACE {
+
+// The C++ vectors are the same as the c vectors.  But these are namespaced.
+// So they shouldn't conflict, or conflicts can be resolve easier than the c types.
+#if SIMD_HALF
+macroVector2TypesStorageRenames(half, half)
+#endif
+                     
+#if SIMD_FLOAT
+macroVector4TypesStorageRenames(float, float)
+#endif
+
+#if SIMD_INT
+macroVector4TypesStorageRenames(int, int)
+#endif
+
+#if SIMD_DOUBLE
+macroVector8TypesStorageRenames(double, double)
+#endif
+    
+// using macros here cuts the ifdefs a lot
+#define vec2to4(x) (x).xyyy
+#define vec3to4(x) (x).xyzz
+#define vec4to2(x) (x).xy
+#define vec4to3(x) (x).xyz
+
+// Note there is _mm_sqrt_ss() instead of this math op to keep in registers
+#define sqrt_scalar(x) sqrtf(x)
+
+#if SIMD_FLOAT && SIMD_NEON
+
+// TODO: vec extension supports comparison but they return a mask then can select off that.
+
+// These are the only 2 ops on Neon
+SIMD_CALL float reduce_min(float4 x) {
+    return vminvq_f32(x);
+}
+
+SIMD_CALL float reduce_max(float4 x) {
+    return vmaxvq_f32(x);
+}
+
+SIMD_CALL float4 min(float4 x, float4 y) {
+    // precise returns x on Nan
+    return vmaxnmq_f32(x, y);
+}
+
+SIMD_CALL float4 max(float4 x, float4 y) {
+    // precise returns x on Nan
+    return vminnmq_f32(x, y);
+}
+
+SIMD_CALL float4 muladd(float4 x, float4 y, float4 t) {
+    // requires __ARM_VFPV4__
+    // t passed first unlike sse
+    return vfmaq_f32(t, x,y);
+}
+
+SIMD_CALL float4 sqrt(float4 x) {
+    return vsqrtq_f32(x);
+}
+
+// use sse2neon to port this for now
+SIMD_CALL float4 reduce_addv(float4 x) {
+    // 4:1 reduction
+    x = _mm_hadd_ps(x, x);
+    x = _mm_hadd_ps(x, x);
+    return x;
+}
+
+SIMD_CALL float reduce_add(float4 x) {
+    return reduce_addv(x).x;
+}
+
+
+#endif // SIMD_FLOAT && SIMD_NEON
+
+#if SIMD_FLOAT && SIMD_INT && SIMD_SSE
+
+// x64 doesn't seem to have a simd op for min/max reduce
+SIMD_CALL float reduce_min(float4 x) {
+    return fmin(fmin(x.x,x.y), fmin(x.z,x.w));
+}
+
+SIMD_CALL float reduce_max(float4 x) {
+    return fmax(fmax(x.x,x.y), fmax(x.z,x.w));
+}
+
+// needs SIMD_INT
+// needed for precise min/max calls below
+SIMD_CALL float4 bitselect_forminmax(float4 x, float4 y, int4 mask) {
+  return (float4)(((int4)x & ~mask) | ((int4)y & mask));
+}
+
+SIMD_CALL float4 min(float4 x, float4 y) {
+    // precise returns x on Nan
+    return bitselect_forminmax(_mm_min_ps(x, y), x, y != y);
+}
+  
+SIMD_CALL float4 max(float4 x, float4 y) {
+    // precise returns x on Nan
+    return bitselect_forminmax(_mm_max_ps(x, y), x, y != y);
+}
+
+SIMD_CALL float4 muladd(float4 x, float4 y, float4 t) {
+    // __FMA__, could fallback to x*y + t, but not same characteristics
+    return _mm_fmadd_ps(x,y,t);
+}
+
+SIMD_CALL float4 sqrt(float4 x) {
+    return _mm_sqrt_ps(x);
+}
+
+SIMD_CALL float4 reduce_addv(float4 x) {
+    // 4:1 reduction
+    x = _mm_hadd_ps(x, x);
+    x = _mm_hadd_ps(x, x);
+    return x;
+}
+
+SIMD_CALL float reduce_add(float4 x) {
+    return reduce_addv(x).x;
+}
+
+
+#endif // SIMD_FLOAT && SIMD_INT && SIMD_SSE
+
+
+// These take in int types, this is what comparison gens from a < b, etc.
+#if SIMD_INT && SIMD_SSE
+
+SIMD_CALL bool any(int2 x) {
+    return _mm_movemask_ps(vec2to4(x)) & 0x3;
+}
+SIMD_CALL bool any(int4 x) {
+    return _mm_movemask_ps((__m128)x);
+}
+
+SIMD_CALL bool all(int2 x) {
+  return (_mm_movemask_ps(vec2to4(x)) & 0x3) == 0x3; // 2 bits
+}
+SIMD_CALL bool all(int4 x) {
+  return _mm_movemask_ps((__m128)x) == 0xf; // 4 bits
+}
+#endif // SIMD_INT && SIMD_SSE
+
+#if SIMD_INT && SIMD_NEON
+ 
+SIMD_CALL bool any(int2 x) {
+    return vmaxv_u32(x) & 0x80000000;
+}
+SIMD_CALL bool any(int4 x) {
+    return vmaxvq_u32(x) & 0x80000000;
+}
+
+SIMD_CALL bool all(int2 x) {
+  return vminv_u32(x) & 0x80000000;
+}
+SIMD_CALL bool all(int4 x) {
+  return vminvq_u32(x) & 0x80000000;
+}
+
+#endif // SIMD_INT && SIMD_NEON
+
+// SSE4.1
+SIMD_CALL float4 round(float4 vv) {
+    return _mm_round_ps(vv, 0x8);  // round to nearest | exc
+}
+SIMD_CALL float4 ceil(float4 vv) {
+    return _mm_ceil_ps(vv);
+}
+SIMD_CALL float4 floor(float4 vv) {
+    return _mm_floor_ps(vv);
+}
+
+// end of implementation
+//-----------------------------------
+
+#if SIMD_FLOAT
+
+// zeroext - internal helper
+SIMD_CALL float4 zeroext(float2 x) {
+    return (float4){x.x,x.y,0,0};
+}
+SIMD_CALL float4 zeroext(float3 x) {
+    return (float4){x.x,x.y,x.z,0};
+}
+
+// any
+SIMD_CALL bool any(int3 x) {
+    return any(vec3to4(x));
+}
+SIMD_CALL bool all(int3 x) {
+    return all(vec3to4(x));
+}
+
+#endif
+
+// bit select
+#if SIMD_INT
+
+// bitselect
+SIMD_CALL int2 bitselect(int2 x, int2 y, int2 mask) {
+  return (x & ~mask) | (y & mask);
+}
+SIMD_CALL int3 bitselect(int3 x, int3 y, int3 mask) {
+  return (x & ~mask) | (y & mask);
+}
+SIMD_CALL int4 bitselect(int4 x, int4 y, int4 mask) {
+  return (x & ~mask) | (y & mask);
+}
+#endif // SIMD_INT
+
+#if SIMD_INT && SIMD_FLOAT
+
+// bitselect
+SIMD_CALL float2 bitselect(float2 x, float2 y, int2 mask) {
+  return (float2)bitselect((int2)x, (int2)y, mask);
+}
+SIMD_CALL float3 bitselect(float3 x, float3 y, int3 mask) {
+  return (float3)bitselect((int3)x, (int3)y, mask);
+}
+SIMD_CALL float4 bitselect(float4 x, float4 y, int4 mask) {
+  return (float4)bitselect((int4)x, (int4)y, mask);
+}
+
+// select
+SIMD_CALL float2 select(float2 x, float2 y, int2 mask) {
+    return bitselect(x, y, mask >> 31);
+}
+SIMD_CALL float3 select(float3 x, float3 y, int3 mask) {
+    return bitselect(x, y, mask >> 31);
+}
+SIMD_CALL float4 select(float4 x, float4 y, int4 mask) {
+    return bitselect(x, y, mask >> 31);
+}
+
+#endif // SIMD_INT && SIMD_FLOAT
+
+#if SIMD_FLOAT
+
+
+// min
+SIMD_CALL float2 min(float2 x, float2 y) {
+    return vec4to2(min(vec2to4(x), vec2to4(y)));
+}
+SIMD_CALL float3 min(float3 x, float3 y) {
+    return vec4to3(min(vec3to4(x), vec3to4(y)));
+}
+
+// max
+SIMD_CALL float2 max(float2 x, float2 y) {
+    return vec4to2(max(vec2to4(x), vec2to4(y)));
+}
+SIMD_CALL float3 max(float3 x, float3 y) {
+    return vec4to3(max(vec3to4(x), vec3to4(y)));
+}
+
+// sqrt
+SIMD_CALL float2 sqrt(float2 x) {
+    return vec4to2(sqrt(vec2to4(x)));
+}
+SIMD_CALL float3 sqrt(float3 x) {
+    return vec4to3(sqrt(vec3to4(x)));
+}
+
+// rsqrt
+SIMD_CALL float4 rsqrt(float4 x) {
+    // TODO: fixup near 0
+    // TODO: use _mm_div_ps if / doesn't
+    return 1.0f/sqrt(x);
+}
+SIMD_CALL float2 rsqrt(float2 x) {
+    return vec4to2(rsqrt(vec2to4(x)));
+}
+SIMD_CALL float3 rsqrt(float3 x) {
+    return vec4to3(rsqrt(vec3to4(x)));
+}
+
+
+// recip
+SIMD_CALL float4 recip(float4 x) {
+    // TODO: fixup near 0
+    // TODO: use _mm_div_ps if / doesn't
+    return 1.0f/x;
+}
+SIMD_CALL float2 recip(float2 x) {
+    return vec4to2(recip(vec2to4(x)));
+}
+SIMD_CALL float3 recip(float3 x) {
+    return vec4to3(recip(vec3to4(x)));
+}
+
+// reduce_add
+SIMD_CALL float reduce_add(float2 x) {
+    return reduce_add(zeroext(x));
+}
+SIMD_CALL float reduce_add(float3 x) {
+    return reduce_add(zeroext(x));
+}
+
+// reduce_min - arm has float2 op
+SIMD_CALL float reduce_min(float2 x) {
+    return reduce_min(vec2to4(x));
+}
+
+SIMD_CALL float reduce_min(float3 x) {
+    return reduce_min(vec3to4(x));
+}
+
+// reduce_max
+SIMD_CALL float reduce_max(float2 x) {
+    return reduce_max(vec2to4(x));
+}
+
+SIMD_CALL float reduce_max(float3 x) {
+    return reduce_max(vec3to4(x));
+}
+
+// round (to nearest)
+SIMD_CALL float2 round(float2 x) { return vec4to2(round(vec2to4(x))); }
+SIMD_CALL float3 round(float3 x) { return vec4to3(round(vec3to4(x))); }
+
+// ceil
+SIMD_CALL float2 ceil(float2 x) { return vec4to2(ceil(vec2to4(x))); }
+SIMD_CALL float3 ceil(float3 x) { return vec4to3(ceil(vec3to4(x))); }
+
+// floor
+SIMD_CALL float2 floor(float2 x) { return vec4to2(floor(vec2to4(x))); }
+SIMD_CALL float3 floor(float3 x) { return vec4to3(floor(vec3to4(x))); }
+
+// clamp
+// order matters here for Nan, left op returned on precise min/max
+SIMD_CALL float2 clamp(float2 x, float2 minv, float2 maxv) {
+    return min(maxv, max(minv, x));
+}
+SIMD_CALL float3 clamp(float3 x, float3 minv, float3 maxv) {
+    return min(maxv, max(minv, x));
+}
+SIMD_CALL float4 clamp(float4 x, float4 minv, float4 maxv) {
+    return min(maxv, max(minv, x));
+}
+
+float2 saturate(float2 x);
+float3 saturate(float3 x);
+float4 saturate(float4 x);
+
+// muladd - arm has float2 op
+SIMD_CALL float2 muladd(float2 x, float2 y, float2 t) {
+    return vec4to2(muladd(vec2to4(x), vec2to4(y), vec2to4(t)));
+}
+SIMD_CALL float3 muladd(float3 x, float3 y, float3 t) {
+    return vec4to3(muladd(vec3to4(x), vec3to4(y), vec3to4(t)));
+}
+
+// lerp - another easy one
+SIMD_CALL float2 lerp(float2 x, float2 y, float2 t) {
+  return x + t*(y - x);
+}
+SIMD_CALL float3 lerp(float3 x, float3 y, float3 t) {
+  return x + t*(y - x);
+}
+SIMD_CALL float4 lerp(float4 x, float4 y, float4 t) {
+  return x + t*(y - x);
+}
+
+
+// dot
+SIMD_CALL float dot(float2 x, float2 y) {
+    return reduce_add(x * y);
+}
+SIMD_CALL float dot(float3 x, float3 y) {
+    return reduce_add(x * y);
+}
+SIMD_CALL float dot(float4 x, float4 y) {
+    return reduce_add(x * y);
+}
+    
+// length_squared
+SIMD_CALL float length_squared(float2 x) {
+    return reduce_add(x * x);
+}
+SIMD_CALL float length_squared(float3 x) {
+    return reduce_add(x * x);
+}
+SIMD_CALL float length_squared(float4 x) {
+    return reduce_add(x * x);
+}
+
+// length
+SIMD_CALL float length(float2 x) {
+    return sqrt_scalar(reduce_add(x * x));
+}
+SIMD_CALL float length(float3 x) {
+    return sqrt_scalar(reduce_add(x * x));
+}
+SIMD_CALL float length(float4 x) {
+    return sqrt_scalar(reduce_add(x * x));
+}
+
+// distance
+SIMD_CALL float distance(float2 x, float2 y) {
+    return length(x - y);
+}
+SIMD_CALL float distance(float3 x, float3 y) {
+    return length(x - y);
+}
+SIMD_CALL float distance(float4 x, float4 y) {
+    return length(x - y);
+}
+
+// normalize
+// optimized by staying in reg
+SIMD_CALL float4 normalize(float4 x) {
+    // x * invlength(x)
+    return x * rsqrt(reduce_addv(x * x)).x;
+}
+SIMD_CALL float2 normalize(float2 x) {
+    return vec4to2(normalize(zeroext(x)));
+}
+SIMD_CALL float3 normalize(float3 x) {
+    return vec4to3(normalize(zeroext(x)));
+}
+
+// abs
+SIMD_CALL float2 abs(float2 x) {
+    return bitselect(0.0, x, 0x7fffffff);
+}
+SIMD_CALL float3 abs(float3 x) {
+   return bitselect(0.0, x, 0x7fffffff);
+}
+SIMD_CALL float4 abs(float4 x) {
+    return bitselect(0.0, x, 0x7fffffff);
+}
+
+// power series
+float4 log(float4 x);
+float4 exp(float4 x);
+float4 pow(float4 x, float4 y);
+
+// trig
+float4 sin(float4 x);
+float4 cos(float4 x);
+float4 tan(float4 x);
+void sincos(float4 x, float4& s, float4& c);
+
+// TODO: add float2/3 version of ops above
+
+SIMD_CALL float cross(float2 x, float2 y) {
+    return x.x * y.y - x.y * y.x;
+}
+SIMD_CALL float3 cross(float3 x, float3 y) {
+    return x.yzx * y.zxy - x.zxy * y.yzx;
+}
+
+// TODO: select, abs, almost_equal, almost_equal_relative
+// TODO: step, smoothstep, fract
+
+#if SIMD_LONG_VECS
+
+// These are cpu only math.  None of the gpus support these long types.
+// and MSL doesn't even support double.
+
+// how important are 8/16 ops for float and 8 for double?  Could reduce with only doing up to 4.
+// can see doing more ops on smaller types.  Slower when these have to route through simd4.
+SIMD_CALL float8 clamp(float8 x, float8 min, float8 max) {
+    return min(max(x, min), max);
+}
+SIMD_CALL float16 clamp(float16 x, float16 min, float16 max) {
+    return min(max(x, min), max);
+}
+
+SIMD_CALL float reduce_min(float8 x) {
+  return reduce_min(min(x.lo, x.hi));
+}
+
+SIMD_CALL float reduce_min(float16 x) {
+    return fmin(reduce_min(x.lo), reduce_min(x.hi));
+}
+
+SIMD_CALL float reduce_max(float8 x) {
+  return reduce_max(max(x.lo, x.hi));
+}
+SIMD_CALL float reduce_max(float16 x) {
+  return fmax(reduce_max(x.lo), reduce_max(x.hi));
+}
+
+// need to convert float4 to 8/16
+// TODO: float8 tovec8(float4 x, float4 y)
+// TODO: float16 tovec16(float8 x, float8 y)
+
+// how important are 8/16 ops for float and double?  Could reduce with only doing up to 4.
+SIMD_CALL float8 muladd(float8 x, float4 y, float4 t) {
+    return tovec8(muladd(x.lo, y.lo, z.lo), muladd(x.hi, y.hi, z.hi));
+}
+SIMD_CALL float16 muladd(float4 x, float4 y, float4 t) {
+    return tovec16(muladd(x.lo, y.lo, z.lo), muladd(x.hi, y.hi, z.hi));
+}
+
+SIMD_CALL float8 lerp(float8 x, float8 y, float8 t) {
+  return x + t*(y - x);
+}
+SIMD_CALL float16 lerp(float16 x, float16 y, float16 t) {
+  return x + t*(y - x);
+}
+
+SIMD_CALL float reduce_add(float8 x) {
+  return reduce_add(x.lo + x.hi);
+}
+
+SIMD_CALL float reduce_add(float16 x) {
+  return reduce_add(x.lo + x.hi);
+}
+
+SIMD_CALL float normalize(float8 x) {
+    return x / length(x);
+}
+SIMD_CALL float normalize(float16 x) {
+    return x / length(x);
+}
+
+#endif // SIMD_LONG_VECS
+
+// TODO: better way to rename, can there be float2::zero()
+// also could maybe use that for fake vector ctors.
+
+const float2& float2_zero();
+const float2& float2_ones();
+
+const float2& float2_posx();
+const float2& float2_posy();
+const float2& float2_negx();
+const float2& float2_negy();
+
+//----
+
+const float3& float3_zero();
+const float3& float3_ones();
+
+const float3& float3_posx();
+const float3& float3_posy();
+const float3& float3_posz();
+const float3& float3_negx();
+const float3& float3_negy();
+const float3& float3_negz();
+
+//----
+
+const float4& float4_zero();
+const float4& float4_ones();
+
+const float4& float4_posx();
+const float4& float4_posy();
+const float4& float4_posz();
+const float4& float4_posw();
+const float4& float4_negx();
+const float4& float4_negy();
+const float4& float4_negz();
+const float4& float4_negw();
+
+const float4& float4_posxw();
+const float4& float4_posyw();
+const float4& float4_poszw();
+const float4& float4_negxw();
+const float4& float4_negyw();
+const float4& float4_negzw();
+
+// Could float2 instead be derived from c like the matrices?
+// Can just cast to the Apple types.
+
+// column matrix, so post muliply vectors
+// (projToCamera * cameraToWorld * worldToModel) * modelVec
+
+// premul use dot4 and can be used for nomral, on inverse if not transposed
+
+// Price of having a class wrapper, is that all simd_float2x3 have to go to ctor copy op.
+// even though they're the same data.  That seems to be why the vec type is just a typedef
+// then can use whichever typename.  But ctors and member functions are not possible.
+// But do get all the clang vector ext.
+
+
+// these allow C funcs to take in these types, but means conversions of
+// return values from the c-calls.  So think having 1 C++ type is better.
+
+//-----------------------------------
+// matrix
+
+struct float2x2 : float2x2s
+{
+    // can be split out to traits
+    static constexpr uint32_t col = 2;
+    static constexpr uint32_t row = 2;
+    using column_t = float2;
+    using scalar_t = float;
+    
+    static const float2x2& zero();
+    static const float2x2& identity();
+    
+    float2x2() { }  // no default init
+    explicit float2x2(float2 diag);
+    float2x2(float2 c0, float2 c1)
+        : float2x2s((float2x2s){c0, c1}) { }
+    float2x2(const float2x2s& m)
+        : float2x2s(m) { }
+    
+    // simd lacks these ops
+    float2& operator[](uint32_t idx) { return columns[idx]; }
+    const float2& operator[](uint32_t idx) const { return columns[idx]; }
+};
+
+struct float3x3 : float3x3s
+{
+    static constexpr uint32_t col = 3;
+    static constexpr uint32_t row = 3;
+    using column_t = float3;
+    using scalar_t = float;
+    
+    // Done as wordy c funcs otherwize.  Funcs allow statics to init.
+    static const float3x3& zero();
+    static const float3x3& identity();
+    
+    float3x3() { }  // no default init
+    explicit float3x3(float3 diag);
+    float3x3(float3 c0, float3 c1, float3 c2)
+        : float3x3s((float3x3s){c0, c1, c2}) { }
+    float3x3(const float3x3s& m)
+        : float3x3s(m) { }
+    
+    float3& operator[](uint32_t idx) { return columns[idx]; }
+    const float3& operator[](uint32_t idx) const { return columns[idx]; }
+};
+
+// This is mostly a transposed holder for a 4x4, so very few ops defined
+// Can also serve as a SOA for some types of cpu math.
+struct float3x4 : float3x4s
+{
+    static constexpr uint32_t col = 3;
+    static constexpr uint32_t row = 4;
+    using column_t = float4;
+    using scalar_t = float;
+    
+    static const float3x4& zero();
+    static const float3x4& identity();
+   
+    float3x4() { } // no default init
+    explicit float3x4(float3 diag);
+    float3x4(float4 c0, float4 c1, float4 c2)
+        : float3x4s((float3x4s){c0, c1, c2}) { }
+    float3x4(const float3x4s& m)
+        : float3x4s(m) { }
+    
+    float4& operator[](uint32_t idx) { return columns[idx]; }
+    const float4& operator[](uint32_t idx) const { return columns[idx]; }
+};
+
+struct float4x4 : float4x4s
+{
+    static constexpr uint32_t col = 4;
+    static constexpr uint32_t row = 4;
+    using column_t = float4;
+    using scalar_t = float;
+    
+    static const float4x4& zero();
+    static const float4x4& identity();
+   
+    float4x4() { } // no default init
+    explicit float4x4(float4 diag);
+    float4x4(float4 c0, float4 c1, float4 c2, float4 c3)
+        : float4x4s((float4x4s){c0, c1, c2, c3}) { }
+    float4x4(const float4x4s& m)
+        : float4x4s(m) { }
+    
+    float4& operator[](uint32_t idx) { return columns[idx]; }
+    const float4& operator[](uint32_t idx) const { return columns[idx]; }
+};
+
+float2x2 diagonal_matrix(float2 x);
+float3x3 diagonal_matrix(float3 x);
+float3x4 diagonal_matrix3x4(float3 x);
+float4x4 diagonal_matrix(float4 x);
+
+// using refs here, 3x3 and 4x4 are large to pass by value (3 simd regs)
+float2x2 transpose(const float2x2& x);
+float3x3 transpose(const float3x3& x);
+float4x4 transpose(const float4x4& x);
+
+float2x2 inverse(const float2x2& x);
+float3x3 inverse(const float3x3& x);
+float4x4 inverse(const float4x4& x);
+
+float determinant(const float2x2& x);
+float determinant(const float3x3& x);
+float determinant(const float4x4& x);
+
+float trace(const float2x2& x);
+float trace(const float3x3& x);
+float trace(const float4x4& x);
+
+// dot with premul
+float2 mul(float2 y, const float2x2& x);
+float3 mul(float3 y, const float3x3& x);
+float4 mul(float4 y, const float4x4& x);
+
+// mul, mad with postmul
+float2x2 mul(const float2x2& x, const float2x2& y);
+float3x3 mul(const float3x3& x, const float3x3& y);
+float4x4 mul(const float4x4& x, const float4x4& y);
+
+float2 mul(const float2x2& x, float2 y);
+float3 mul(const float3x3& x, float3 y);
+float4 mul(const float4x4& x, float4 y);
+
+float2x2 sub(const float2x2& x, const float2x2& y);
+float3x3 sub(const float3x3& x, const float3x3& y);
+float4x4 sub(const float4x4& x, const float4x4& y);
+
+float2x2 add(const float2x2& x, const float2x2& y);
+float3x3 add(const float3x3& x, const float3x3& y);
+float4x4 add(const float4x4& x, const float4x4& y);
+
+bool equal(const float2x2& x, const float2x2& y);
+bool equal(const float3x3& x, const float3x3& y);
+bool equal(const float4x4& x, const float4x4& y);
+
+// operators for C++
+macroMatrixOps(float2x2);
+macroMatrixOps(float3x3);
+// TODO: no mat hops on storage type float3x4
+// macroMatrixOps(float3x4);
+macroMatrixOps(float4x4);
+
+// make "m" ctors for vecs.  This avoids wrapping the type in a struct.
+// vector types are C typedef, and so cannot have member functions.
+// Be careful with initializers = { val }, only sets first element of vector
+// and not all the values.  Use = val; or one of the calls below to be safe.
+
+SIMD_CALL float2 float2m(float x) {
+    return x;
+}
+SIMD_CALL float2 float2m(float x, float y) {
+    return {x,y};
+}
+
+SIMD_CALL float3 float3m(float x) {
+    return x; // TODO: does this go to _mm_set1_ps(x)
+}
+SIMD_CALL float3 float3m(float x, float y, float z) {
+    return {x,y,z}; // _mm_setr_ps ?
+}
+SIMD_CALL float3 float3m(float2 v, float z) {
+    float3 r; r.xy = v; r.z = z; return r;
+}
+
+SIMD_CALL float4 float4m(float x) {
+    return x; // TODO: does this go to _mm_set1_ps(x)
+}
+SIMD_CALL float4 float4m(float x, float y, float z, float w = 1.0f) {
+    return {x,y,z,w}; // _mm_setr_ps ?
+}
+SIMD_CALL float4 float4m(float3 v, float w = 1.0f) {
+    float4 r; r.xyz = v; r.w = w; return r;
+}
+
+// fast conversions where possible
+SIMD_CALL const float3& as_float3(const float4& m) {
+    return reinterpret_cast<const float3&>(m);
+}
+
+// fast conversions where possible
+SIMD_CALL const float3x3& as_float3x3(const float4x4& m) {
+    return reinterpret_cast<const float3x3&>(m);
+}
+
+// "using func = simd_func" from C++ to C function doesn't really work.  Not doing
+// extern "C" around the C calls.  If did extern, then would need :: namespace.
+// Preprocessor would but annoying to have to incur another function call in debug
+// (and possibly optimized builds) just to rename calls.
+//
+// This also doesn't work due to overloads of each call for each type.
+// But it's a nice way to avoid the cost of C++ wrappers if call has unique name.
+// also constexpr auto* foo = simd_foo;
+
+// TODO: quat, no ops,  need a fast lerp and correct 2 quats
+// and rotate a vec, can convert to/from vector.
+// typedef struct { float4 vector; } quatf;
+
+#endif // SIMD_FLOAT
+
+#if SIMD_HALF
+SIMD_CALL half2 half2m(half x) {
+    return x;
+}
+SIMD_CALL half2 float4m(half x, half y, half z, half w = (half)1.0) {
+    return {x,y};
+}
+
+SIMD_CALL half3 half3m(half x) {
+    return x;
+}
+SIMD_CALL half3 half3m(half x, half y, half z) {
+    return {x,y,z};
+}
+
+SIMD_CALL half4 half4m(half x) {
+    return x;
+}
+SIMD_CALL half4 half4m(half x, half y, half z, half w = (half)1.0) {
+    return {x,y,z,w};
+}
+SIMD_CALL half4 half4m(half3 v, float w = (half)1.0) {
+    half4 r; r.xyz = v; r.w = w; return r;
+}
+#endif
+
+#if SIMD_DOUBLE
+SIMD_CALL double2 double2m(double x) {
+    return x;
+}
+SIMD_CALL double2 float4m(double x, double y, double z, double w = 1.0) {
+    return {x,y};
+}
+
+SIMD_CALL double3 double3m(double x) {
+    return x;
+}
+SIMD_CALL double3 double3m(double x, double y, double z) {
+    return {x,y,z};
+}
+
+SIMD_CALL double4 double4m(double x) {
+    return x;
+}
+SIMD_CALL double4 double4m(double x, double y, double z, double w = 1.0) {
+    return {x,y,z,w};
+}
+SIMD_CALL double4 double4m(double3 v, double w = 1.0) {
+    double4 r; r.xyz = v; r.w = w; return r;
+}
+#endif
+
+// conversions
+#if SIMD_FLOAT && SIMD_INT
+SIMD_CALL float2 float2m(int2 __x) { return __builtin_convertvector(__x, float2); }
+SIMD_CALL float3 float3m(int3 __x) { return __builtin_convertvector(__x, float3); }
+SIMD_CALL float4 float4m(int4 __x) { return __builtin_convertvector(__x, float4); }
+
+SIMD_CALL int2 float2m(float2 __x) { return __builtin_convertvector(__x, int2); }
+SIMD_CALL int3 float3m(float3 __x) { return __builtin_convertvector(__x, int3); }
+SIMD_CALL int4 float4m(float4 __x) { return __builtin_convertvector(__x, int4); }
+
+#endif
+
+#if SIMD_FLOAT && SIMD_HALF
+
+#if SIMD_HALF4_ONLY
+
+half4 half4m(float4 __x);
+SIMD_CALL half2 half2m(float2 __x) { return vec4to2(half4m(vec2to4(__x))); }
+SIMD_CALL half3 half3m(float3 __x) { return vec4to3(half4m(vec3to4(__x))); }
+
+float4 float4m(half4 __x);
+SIMD_CALL float2 float2m(half2 __x) { return vec4to2(float4m(vec2to4(__x))); }
+SIMD_CALL float3 float3m(half3 __x) { return vec4to3(float4m(vec3to4(__x))); }
+
+#else
+SIMD_CALL float2 float2m(half2 __x) { return __builtin_convertvector(__x, float2); }
+SIMD_CALL float3 float3m(half3 __x) { return __builtin_convertvector(__x, float3); }
+SIMD_CALL float4 float4m(half4 __x) { return __builtin_convertvector(__x, float4); }
+
+SIMD_CALL half2 half2m(float2 __x) { return __builtin_convertvector(__x, half2); }
+SIMD_CALL half3 half3m(float3 __x) { return __builtin_convertvector(__x, half3); }
+SIMD_CALL half4 half4m(float4 __x) { return __builtin_convertvector(__x, half4); }
+#endif
+
+#endif
+
+#if SIMD_FLOAT && SIMD_DOUBLE
+SIMD_CALL double2 double2m(float2 __x) { return __builtin_convertvector(__x, double2); }
+SIMD_CALL double3 double3m(float3 __x) { return __builtin_convertvector(__x, double3); }
+SIMD_CALL double4 double4m(float4 __x) { return __builtin_convertvector(__x, double4); }
+
+SIMD_CALL float2 float2m(double2 __x) { return __builtin_convertvector(__x, float2); }
+SIMD_CALL float3 float3m(double3 __x) { return __builtin_convertvector(__x, float3); }
+SIMD_CALL float4 float4m(double4 __x) { return __builtin_convertvector(__x, float4); }
+#endif
+
+// TODO: saturating conversions would be useful to, and prevent overflow
+// see the conversion.h code, bit select to clamp values.
+
+// matrix_types.h has a type_traits with rows, cols, etc.
+// can call get_traits() on them.  See matrix.h for all the ops.
+
+} // namespace SIMD_NAMESPACE
+
+#endif // __cplusplus
+
+#endif
+

From 30ff0dcc04600ca47178bba8f7e9028fda9f5578 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 22 Sep 2024 19:53:29 -0700
Subject: [PATCH 720/901] kram - turn on fma and f16c

---
 build2/kram.xcodeproj/project.pbxproj          | 12 ++++++++----
 build2/kramc.xcodeproj/project.pbxproj         |  6 ++++++
 build2/kramv.xcodeproj/project.pbxproj         |  4 ++++
 gtlf/GLTF/GLTF.xcodeproj/project.pbxproj       |  8 +++++++-
 gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj |  8 +++++++-
 kramv/KramLoader.mm                            |  6 ++++--
 kramv/KramViewerMain.mm                        |  8 ++++----
 libkram/astc-encoder/astcenc_mathlib.cpp       |  2 +-
 libkram/kram/KramConfig.h                      |  2 +-
 libkram/kram/KramMipper.cpp                    |  2 +-
 libkram/vectormath/vectormath++.h              | 13 +++++++++----
 11 files changed, 52 insertions(+), 19 deletions(-)

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index 850562eb..9e7fc148 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -7,6 +7,7 @@
 	objects = {
 
 /* Begin PBXBuildFile section */
+		702E0DB62CA10BC100B652B7 /* astcenc_mathlib.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB327DDDBCC00D0B9E1 /* astcenc_mathlib.cpp */; };
 		705F7F072C9FF42700E377B7 /* vectormath++.h in Headers */ = {isa = PBXBuildFile; fileRef = 705F7F022C9FF42700E377B7 /* vectormath++.h */; };
 		705F7F082C9FF42700E377B7 /* sse_mathfun.h in Headers */ = {isa = PBXBuildFile; fileRef = 705F7EFC2C9FF42700E377B7 /* sse_mathfun.h */; };
 		705F7F092C9FF42700E377B7 /* float4a.h in Headers */ = {isa = PBXBuildFile; fileRef = 705F7EFA2C9FF42700E377B7 /* float4a.h */; };
@@ -294,7 +295,6 @@
 		70871DDE27DDDBCD00D0B9E1 /* astcenc_vecmathlib_sse_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DB127DDDBCC00D0B9E1 /* astcenc_vecmathlib_sse_4.h */; };
 		70871DDF27DDDBCD00D0B9E1 /* astcenc_mathlib_softfloat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB227DDDBCC00D0B9E1 /* astcenc_mathlib_softfloat.cpp */; };
 		70871DE027DDDBCD00D0B9E1 /* astcenc_mathlib_softfloat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB227DDDBCC00D0B9E1 /* astcenc_mathlib_softfloat.cpp */; };
-		70871DE127DDDBCD00D0B9E1 /* astcenc_mathlib.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB327DDDBCC00D0B9E1 /* astcenc_mathlib.cpp */; settings = {COMPILER_FLAGS = "-mf16c"; }; };
 		70871DE227DDDBCD00D0B9E1 /* astcenc_mathlib.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB327DDDBCC00D0B9E1 /* astcenc_mathlib.cpp */; };
 		70871DE327DDDBCD00D0B9E1 /* astcenc_decompress_symbolic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB427DDDBCC00D0B9E1 /* astcenc_decompress_symbolic.cpp */; };
 		70871DE427DDDBCD00D0B9E1 /* astcenc_decompress_symbolic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB427DDDBCC00D0B9E1 /* astcenc_decompress_symbolic.cpp */; };
@@ -1699,7 +1699,7 @@
 			isa = PBXSourcesBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
-				70871DE127DDDBCD00D0B9E1 /* astcenc_mathlib.cpp in Sources */,
+				702E0DB62CA10BC100B652B7 /* astcenc_mathlib.cpp in Sources */,
 				70871DD727DDDBCD00D0B9E1 /* astcenc_quantization.cpp in Sources */,
 				70D222F52ADAF78300B9EA23 /* dlmalloc.cpp in Sources */,
 				707789E52881BA81008A51BC /* ert.cpp in Sources */,
@@ -1938,6 +1938,7 @@
 				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
 				CLANG_WARN_UNREACHABLE_CODE = YES;
 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				CLANG_X86_VECTOR_INSTRUCTIONS = avx2;
 				COPY_PHASE_STRIP = NO;
 				DEAD_CODE_STRIPPING = YES;
 				DEBUG_INFORMATION_FORMAT = dwarf;
@@ -2036,6 +2037,7 @@
 				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
 				CLANG_WARN_UNREACHABLE_CODE = YES;
 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				CLANG_X86_VECTOR_INSTRUCTIONS = avx2;
 				COPY_PHASE_STRIP = NO;
 				DEAD_CODE_STRIPPING = YES;
 				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
@@ -2094,7 +2096,6 @@
 			buildSettings = {
 				CLANG_WARN_OBJC_EXPLICIT_OWNERSHIP_TYPE = YES;
 				CLANG_WARN_OBJC_REPEATED_USE_OF_WEAK = YES;
-				CLANG_X86_VECTOR_INSTRUCTIONS = avx2;
 				CODE_SIGN_STYLE = Automatic;
 				EXECUTABLE_PREFIX = lib;
 				OTHER_CFLAGS = (
@@ -2107,6 +2108,8 @@
 					"-DCOMPILE_BASIS=0",
 					"-DCOMPILE_EASTL=0",
 					"-ftime-trace",
+					"-mfma",
+					"-mf16c",
 				);
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SKIP_INSTALL = YES;
@@ -2119,7 +2122,6 @@
 			buildSettings = {
 				CLANG_WARN_OBJC_EXPLICIT_OWNERSHIP_TYPE = YES;
 				CLANG_WARN_OBJC_REPEATED_USE_OF_WEAK = YES;
-				CLANG_X86_VECTOR_INSTRUCTIONS = avx2;
 				CODE_SIGN_STYLE = Automatic;
 				EXECUTABLE_PREFIX = lib;
 				OTHER_CFLAGS = (
@@ -2133,6 +2135,8 @@
 					"-DCOMPILE_BASIS=0",
 					"-DCOMPILE_EASTL=0",
 					"-ftime-trace",
+					"-mfma",
+					"-mf16c",
 				);
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SKIP_INSTALL = YES;
diff --git a/build2/kramc.xcodeproj/project.pbxproj b/build2/kramc.xcodeproj/project.pbxproj
index b49ae76e..867901e8 100644
--- a/build2/kramc.xcodeproj/project.pbxproj
+++ b/build2/kramc.xcodeproj/project.pbxproj
@@ -185,6 +185,7 @@
 				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
 				CLANG_WARN_UNREACHABLE_CODE = YES;
 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				CLANG_X86_VECTOR_INSTRUCTIONS = avx2;
 				COPY_PHASE_STRIP = NO;
 				DEAD_CODE_STRIPPING = YES;
 				DEBUG_INFORMATION_FORMAT = dwarf;
@@ -210,6 +211,8 @@
 					"-DCOMPILE_EASTL=0",
 					"-include",
 					KramConfig.h,
+					"-mfma",
+					"-mf16c",
 				);
 				PRESERVE_DEAD_CODE_INITS_AND_TERMS = NO;
 				SDKROOT = macosx;
@@ -250,6 +253,7 @@
 				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
 				CLANG_WARN_UNREACHABLE_CODE = YES;
 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				CLANG_X86_VECTOR_INSTRUCTIONS = avx2;
 				COPY_PHASE_STRIP = NO;
 				DEAD_CODE_STRIPPING = YES;
 				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
@@ -273,6 +277,8 @@
 					"-DCOMPILE_EASTL=0",
 					"-include",
 					KramConfig.h,
+					"-mfma",
+					"-mf16c",
 				);
 				PRESERVE_DEAD_CODE_INITS_AND_TERMS = NO;
 				SDKROOT = macosx;
diff --git a/build2/kramv.xcodeproj/project.pbxproj b/build2/kramv.xcodeproj/project.pbxproj
index fb7d5bb3..88e44e66 100644
--- a/build2/kramv.xcodeproj/project.pbxproj
+++ b/build2/kramv.xcodeproj/project.pbxproj
@@ -552,6 +552,7 @@
 				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
 				CLANG_WARN_UNREACHABLE_CODE = YES;
 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				CLANG_X86_VECTOR_INSTRUCTIONS = avx2;
 				COPY_PHASE_STRIP = NO;
 				DEAD_CODE_STRIPPING = YES;
 				DEBUG_INFORMATION_FORMAT = dwarf;
@@ -586,6 +587,7 @@
 					"-DCOMPILE_EASTL=0",
 					"-include",
 					KramConfig.h,
+					"-mfma",
 				);
 				OTHER_CPLUSPLUSFLAGS = "$(OTHER_CFLAGS)";
 				PRESERVE_DEAD_CODE_INITS_AND_TERMS = NO;
@@ -628,6 +630,7 @@
 				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
 				CLANG_WARN_UNREACHABLE_CODE = YES;
 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				CLANG_X86_VECTOR_INSTRUCTIONS = avx2;
 				COPY_PHASE_STRIP = NO;
 				DEAD_CODE_STRIPPING = YES;
 				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
@@ -660,6 +663,7 @@
 					"-DCOMPILE_EASTL=0",
 					"-include",
 					KramConfig.h,
+					"-mfma",
 				);
 				OTHER_CPLUSPLUSFLAGS = "$(OTHER_CFLAGS)";
 				PRESERVE_DEAD_CODE_INITS_AND_TERMS = NO;
diff --git a/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj b/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj
index 91e2481b..b9c25e60 100644
--- a/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj
+++ b/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj
@@ -383,6 +383,7 @@
 				CLANG_WARN_SUSPICIOUS_MOVE = YES;
 				CLANG_WARN_UNREACHABLE_CODE = YES;
 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				CLANG_X86_VECTOR_INSTRUCTIONS = avx2;
 				CODE_SIGN_IDENTITY = "-";
 				COPY_PHASE_STRIP = NO;
 				CURRENT_PROJECT_VERSION = 1;
@@ -407,7 +408,10 @@
 				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				MTL_ENABLE_DEBUG_INFO = YES;
 				ONLY_ACTIVE_ARCH = YES;
-				OTHER_CFLAGS = "-ftime-trace";
+				OTHER_CFLAGS = (
+					"-ftime-trace",
+					"-mfma",
+				);
 				SDKROOT = macosx;
 				VERSIONING_SYSTEM = "apple-generic";
 				VERSION_INFO_PREFIX = "";
@@ -446,6 +450,7 @@
 				CLANG_WARN_SUSPICIOUS_MOVE = YES;
 				CLANG_WARN_UNREACHABLE_CODE = YES;
 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				CLANG_X86_VECTOR_INSTRUCTIONS = avx2;
 				CODE_SIGN_IDENTITY = "-";
 				COPY_PHASE_STRIP = NO;
 				CURRENT_PROJECT_VERSION = 1;
@@ -470,6 +475,7 @@
 				OTHER_CFLAGS = (
 					"-ftime-trace",
 					"-DNDEBUG=1",
+					"-mfma",
 				);
 				SDKROOT = macosx;
 				VERSIONING_SYSTEM = "apple-generic";
diff --git a/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj b/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj
index 78dc7491..297c5701 100644
--- a/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj
+++ b/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj
@@ -243,6 +243,7 @@
 				CLANG_WARN_SUSPICIOUS_MOVE = YES;
 				CLANG_WARN_UNREACHABLE_CODE = YES;
 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				CLANG_X86_VECTOR_INSTRUCTIONS = avx2;
 				CODE_SIGN_IDENTITY = "-";
 				COPY_PHASE_STRIP = NO;
 				CURRENT_PROJECT_VERSION = 1;
@@ -267,7 +268,10 @@
 				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				MTL_ENABLE_DEBUG_INFO = YES;
 				ONLY_ACTIVE_ARCH = YES;
-				OTHER_CFLAGS = "-ftime-trace";
+				OTHER_CFLAGS = (
+					"-ftime-trace",
+					"-mfma",
+				);
 				SDKROOT = macosx;
 				SUPPORTED_PLATFORMS = "macosx iphoneos";
 				VALID_ARCHS = "i386 x86_64 armv7s armv7 arm64";
@@ -308,6 +312,7 @@
 				CLANG_WARN_SUSPICIOUS_MOVE = YES;
 				CLANG_WARN_UNREACHABLE_CODE = YES;
 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				CLANG_X86_VECTOR_INSTRUCTIONS = avx2;
 				CODE_SIGN_IDENTITY = "-";
 				COPY_PHASE_STRIP = NO;
 				CURRENT_PROJECT_VERSION = 1;
@@ -332,6 +337,7 @@
 				OTHER_CFLAGS = (
 					"-DNDEBUG=1",
 					"-ftime-trace",
+					"-mfma",
 				);
 				SDKROOT = macosx;
 				SUPPORTED_PLATFORMS = "macosx iphoneos";
diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index 63747ac2..2455859d 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -80,7 +80,7 @@ bool isDecodeImageNeeded(MyMTLPixelFormat pixelFormat, MyMTLTextureType type)
 {
     bool needsDecode = false;
 
-#if USE_SSE
+#if SIMD_SSE
     if (isETCFormat(pixelFormat)) {
         needsDecode = true;
     }
@@ -99,7 +99,9 @@ bool decodeImage(const KTXImage &image, KTXImage &imageDecoded)
 {
     KramDecoderParams decoderParams;
     KramDecoder decoder;
-#if USE_SSE
+    
+    // macOS Intel only had BC support, and already have macOS arm64 build
+#if SIMD_SSE
     if (isETCFormat(image.pixelFormat)) {
         if (!decoder.decode(image, imageDecoded, decoderParams)) {
             return NO;
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 638d2863..0d615bc2 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -977,8 +977,8 @@ -(void)updateZoom:(float)zoom
     float ar = _showSettings->imageAspectRatio();
     
     // find the cursor location with respect to the image
-    float4 bottomLeftCorner = float4m(-0.5 * ar, -0.5f, 0.0f, 1.0f);
-    float4 topRightCorner = float4m(0.5 * ar, 0.5f, 0.0f, 1.0f);
+    float4 bottomLeftCorner = float4m(-0.5f * ar, -0.5f, 0.0f, 1.0f);
+    float4 topRightCorner = float4m(0.5f * ar, 0.5f, 0.0f, 1.0f);
 
     float4x4 newMatrix = _data.computeImageTransform(_showSettings->panX,
                                                     _showSettings->panY,
@@ -1312,8 +1312,8 @@ - (void)updatePan:(float)panX panY:(float)panY
     // what if zoom moves it outside?
     float ar = _showSettings->imageAspectRatio();
     
-    float4 pt0 = projectionViewModelMatrix * float4m(-0.5 * ar, -0.5f, 0.0f, 1.0f);
-    float4 pt1 = projectionViewModelMatrix * float4m(0.5 * ar, 0.5f, 0.0f, 1.0f);
+    float4 pt0 = projectionViewModelMatrix * float4m(-0.5f * ar, -0.5f, 0.0f, 1.0f);
+    float4 pt1 = projectionViewModelMatrix * float4m(0.5f * ar, 0.5f, 0.0f, 1.0f);
 
     // for perspective
     pt0.xyz /= pt0.w;
diff --git a/libkram/astc-encoder/astcenc_mathlib.cpp b/libkram/astc-encoder/astcenc_mathlib.cpp
index e53331fe..82d5a1b8 100644
--- a/libkram/astc-encoder/astcenc_mathlib.cpp
+++ b/libkram/astc-encoder/astcenc_mathlib.cpp
@@ -47,7 +47,7 @@ uint64_t astc::rand(uint64_t state[2])
 	return res;
 }
 
-#if USE_SSE
+#if SIMD_SSE
 
 /* ============================================================================
   Softfloat library with fp32 and fp16 conversion functionality.
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index 2271e80e..807fd6c8 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -196,7 +196,7 @@
 //-------------------------
 // simd
 
-#if KRAM_MAC || KRAM_IOS
+#if 0 // KRAM_MAC || KRAM_IOS
 
 #define USE_SIMDLIB 0
 
diff --git a/libkram/kram/KramMipper.cpp b/libkram/kram/KramMipper.cpp
index e09049ca..95a68c2d 100644
--- a/libkram/kram/KramMipper.cpp
+++ b/libkram/kram/KramMipper.cpp
@@ -171,7 +171,7 @@ void Mipper::initPixelsHalfIfNeeded(ImageData& srcImage, bool doPremultiply, boo
                                     vector<half4>& halfImage) const
 {
     Color zeroColor = {0, 0, 0, 0};
-    float4 zeroColorf = float4m(0.0, 0.0f, 0.0f, 0.f);  // need a constant for this
+    float4 zeroColorf = float4m(0.0f);  // need a constant for this
     half4 zeroColorh = half4m(zeroColorf);
 
     int32_t w = srcImage.width;
diff --git a/libkram/vectormath/vectormath++.h b/libkram/vectormath/vectormath++.h
index be755bb4..3ef9a3cf 100644
--- a/libkram/vectormath/vectormath++.h
+++ b/libkram/vectormath/vectormath++.h
@@ -53,13 +53,13 @@
 // AVX-512 16x 64B 512-bit (disabled on p-cores and dropped from e-cores on i9), 4 variants
 // AVX10   32x 32B 256-bit (emulates 512-bit), 3 variants
 //
-// FMA     fp multiply add
-// F16C    2 instructions for fp16 <-> fp32
+// FMA     fp multiply add (clang v14)
+// F16C    2 ops fp16 <-> fp32
 // CRC32   instructions to enable fast crc ops (not using yet, but is in sse2neon.h)
 //
 // max vec size per register
 // 16B      32B
-// char16   char32?
+// char16   char16?
 // short8   short16
 // uint4    uint8
 // float4   float8
@@ -493,8 +493,13 @@ SIMD_CALL float4 max(float4 x, float4 y) {
 }
 
 SIMD_CALL float4 muladd(float4 x, float4 y, float4 t) {
-    // __FMA__, could fallback to x*y + t, but not same characteristics
+    // can't get Xcode to set -mfma with AVX2 set
+#ifdef __FMA__
     return _mm_fmadd_ps(x,y,t);
+#else
+    // fallback with not same characteristics
+    return x * y + t;
+#endif
 }
 
 SIMD_CALL float4 sqrt(float4 x) {

From f9ce6cf880d706192d9f988cc472f89b9bcb58f7 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 22 Sep 2024 23:10:00 -0700
Subject: [PATCH 721/901] kram - simd cleanup of sse2neon

---
 kramv/KramRenderer.mm               |    2 +
 libkram/kram/KramConfig.h           |   10 +-
 libkram/vectormath/float4a.h        |   16 +-
 libkram/vectormath/sse2neon-arm64.h |  865 +++++-----
 libkram/vectormath/sse2neon.h       | 2328 +++++----------------------
 libkram/vectormath/vectormath++.h   |   86 +-
 6 files changed, 854 insertions(+), 2453 deletions(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index a5a6211a..fcd098ed 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -1812,6 +1812,8 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
     
             // set the view and projection matrix
             float4x4 m = _data->_viewMatrix * regularizationMatrix;
+            
+            // TODO: offer conversions to simd/simd.h
             _gltfRenderer.viewMatrix = reinterpret_cast<const simd_float4x4&>(m);
             _gltfRenderer.projectionMatrix = reinterpret_cast<const simd_float4x4&>(_data->_projectionMatrix);
     
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index 807fd6c8..4214bc5b 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -200,20 +200,14 @@
 
 #define USE_SIMDLIB 0
 
-// This is Apple simd (it's huuuggge!)
-// Also can't use the half4 type until iOS18 + macOS15 minspec, so need fallback.
-#include <simd/simd.h>
-
-// this is glue code for now
+// old vector math, using simd/simd.h
 #include "float4a.h"
 
-
 #else
 
-// this means use vectormath
+// new vector math
 #define USE_SIMDLIB 1
 
-// Test out on Win build first.
 #include "vectormath++.h"
 
 #endif
diff --git a/libkram/vectormath/float4a.h b/libkram/vectormath/float4a.h
index 9dbb8ceb..76620ce6 100644
--- a/libkram/vectormath/float4a.h
+++ b/libkram/vectormath/float4a.h
@@ -4,6 +4,12 @@
 
 #pragma once
 
+#if !USE_SIMDLIB
+
+// This is Apple simd (it's huuuggge!)
+// Also can't use the half4 type until iOS18 + macOS15 minspec, so need fallback.
+#include <simd/simd.h>
+
 // only support avx2 and Neon, no avx-512 at first
 #if defined __ARM_NEON
 #define SIMD_SSE  0
@@ -18,9 +24,9 @@
 #define SIMD_NAMESPACE simd
 
 #if !__is_identifier(_Float16)
-#define USE_FLOAT16 1
+#define SIMD_HALF_FLOAT16 1
 #else
-#define USE_FLOAT16 0
+#define SIMD_HALF_FLOAT16 0
 #endif
 
 namespace SIMD_NAMESPACE {
@@ -68,7 +74,7 @@ inline float4 saturate(const float4& v) {
     return simd_clamp(v, 0.0f, 1.0f);
 }
 
-#if USE_FLOAT16
+#if SIMD_HALF_FLOAT16
 using half = _Float16;
 #else
 // for lack of a better type
@@ -80,7 +86,7 @@ using half = int16_t;
 #define vec4to2(x) (x).xy
 #define vec4to3(x) (x).xyz
 
-// define half ops just for conversion, don't do math of !USE_FLOAT16 set
+// define half ops just for conversion
 half4 half4m(float4 __x);
 inline half2 half2m(float2 __x) { return vec4to2(half4m(vec2to4(__x))); }
 inline half3 half3m(float3 __x) { return vec4to3(half4m(vec3to4(__x))); }
@@ -90,3 +96,5 @@ inline float2 float2m(half2 __x) { return vec4to2(float4m(vec2to4(__x))); }
 inline float3 float3m(half3 __x) { return vec4to3(float4m(vec3to4(__x))); }
 
 } // namespace SIMD_NAMESPACE
+
+#endif
diff --git a/libkram/vectormath/sse2neon-arm64.h b/libkram/vectormath/sse2neon-arm64.h
index 366c8bf3..f3f7fb20 100644
--- a/libkram/vectormath/sse2neon-arm64.h
+++ b/libkram/vectormath/sse2neon-arm64.h
@@ -1,3 +1,5 @@
+#pragma once
+
 #ifndef SSE2NEON_H
 #define SSE2NEON_H
 
@@ -64,18 +66,18 @@
  * x86 SSE. (e.g. would solve a hole or NaN pixel in the rendering result)
  */
 /* _mm_min|max_ps|ss|pd|sd */
-#ifndef SSE2NEON_PRECISE_MINMAX
-#define SSE2NEON_PRECISE_MINMAX (0)
-#endif
-/* _mm_rcp_ps */
-#ifndef SSE2NEON_PRECISE_DIV
-#define SSE2NEON_PRECISE_DIV (0)
-#endif
-/* _mm_sqrt_ps and _mm_rsqrt_ps */
-#ifndef SSE2NEON_PRECISE_SQRT
-#define SSE2NEON_PRECISE_SQRT (0)
-#endif
-/* _mm_dp_pd */
+//#ifndef SSE2NEON_PRECISE_MINMAX
+//#define SSE2NEON_PRECISE_MINMAX (0)
+//#endif
+///* _mm_rcp_ps */
+//#ifndef SSE2NEON_PRECISE_DIV
+//#define SSE2NEON_PRECISE_DIV (0)
+//#endif
+///* _mm_sqrt_ps and _mm_rsqrt_ps */
+//#ifndef SSE2NEON_PRECISE_SQRT
+//#define SSE2NEON_PRECISE_SQRT (0)
+//#endif
+///* _mm_dp_pd */
 #ifndef SSE2NEON_PRECISE_DP
 #define SSE2NEON_PRECISE_DP (0)
 #endif
@@ -83,17 +85,33 @@
 /* Enable inclusion of windows.h on MSVC platforms
  * This makes _mm_clflush functional on windows, as there is no builtin.
  */
-#ifndef SSE2NEON_INCLUDE_WINDOWS_H
-#define SSE2NEON_INCLUDE_WINDOWS_H (0)
-#endif
+//#ifndef SSE2NEON_INCLUDE_WINDOWS_H
+//#define SSE2NEON_INCLUDE_WINDOWS_H (0)
+//#endif
 
 /* compiler specific definitions */
+//#if defined(__GNUC__) || defined(__clang__)
 #pragma push_macro("FORCE_INLINE")
 #pragma push_macro("ALIGN_STRUCT")
 #define FORCE_INLINE static inline __attribute__((always_inline))
 #define ALIGN_STRUCT(x) __attribute__((aligned(x)))
 #define _sse2neon_likely(x) __builtin_expect(!!(x), 1)
 #define _sse2neon_unlikely(x) __builtin_expect(!!(x), 0)
+//#elif defined(_MSC_VER)
+//#if _MSVC_TRADITIONAL
+//#error Using the traditional MSVC preprocessor is not supported! Use /Zc:preprocessor instead.
+//#endif
+//#ifndef FORCE_INLINE
+//#define FORCE_INLINE static inline
+//#endif
+//#ifndef ALIGN_STRUCT
+//#define ALIGN_STRUCT(x) __declspec(align(x))
+//#endif
+//#define _sse2neon_likely(x) (x)
+//#define _sse2neon_unlikely(x) (x)
+//#else
+//#pragma message("Macro name collisions may happen with unsupported compilers.")
+//#endif
 
 
 //#if defined(__GNUC__) && !defined(__clang__)
@@ -163,6 +181,7 @@ FORCE_INLINE int64_t sse2neon_recast_f64_s64(double f64)
 //#endif
 //#endif
 
+//#if defined(__GNUC__) || defined(__clang__)
 #define _sse2neon_define0(type, s, body) \
     __extension__({                      \
         type _a = (s);                   \
@@ -179,6 +198,13 @@ FORCE_INLINE int64_t sse2neon_recast_f64_s64(double f64)
         body                                \
     })
 #define _sse2neon_return(ret) (ret)
+//#else
+//#define _sse2neon_define0(type, a, body) [=](type _a) { body }(a)
+//#define _sse2neon_define1(type, a, body) [](type _a) { body }(a)
+//#define _sse2neon_define2(type, a, b, body) \
+//    [](type _a, type _b) { body }((a), (b))
+//#define _sse2neon_return(ret) return ret
+//#endif
 
 #define _sse2neon_init(...) \
     {                       \
@@ -186,11 +212,15 @@ FORCE_INLINE int64_t sse2neon_recast_f64_s64(double f64)
     }
 
 /* Compiler barrier */
+//#if defined(_MSC_VER) && !defined(__clang__)
+//#define SSE2NEON_BARRIER() _ReadWriteBarrier()
+//#else
 #define SSE2NEON_BARRIER()                     \
     do {                                       \
         __asm__ __volatile__("" ::: "memory"); \
         (void) 0;                              \
     } while (0)
+//#endif
 
 /* Memory barriers
  * __atomic_thread_fence does not include a compiler barrier; instead,
@@ -207,46 +237,81 @@ FORCE_INLINE void _sse2neon_smp_mb(void)
 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \
     !defined(__STDC_NO_ATOMICS__)
     atomic_thread_fence(memory_order_seq_cst);
-#else
+#elif defined(__GNUC__) || defined(__clang__)
     __atomic_thread_fence(__ATOMIC_SEQ_CST);
+#else /* MSVC */
+    __dmb(_ARM64_BARRIER_ISH);
 #endif
 }
 
 /* Architecture-specific build options */
 /* FIXME: #pragma GCC push_options is only available on GCC */
-#if defined(__GNUC__)
-#if defined(__arm__) && __ARM_ARCH == 7
 /* According to ARM C Language Extensions Architecture specification,
  * __ARM_NEON is defined to a value indicating the Advanced SIMD (NEON)
  * architecture supported.
  */
+/*
+ #if defined(__GNUC__)
+#if defined(__arm__) && __ARM_ARCH == 7
 #if !defined(__ARM_NEON) || !defined(__ARM_NEON__)
 #error "You must enable NEON instructions (e.g. -mfpu=neon) to use SSE2NEON."
 #endif
+#if !defined(__clang__)
+#pragma GCC push_options
+#pragma GCC target("fpu=neon")
+#endif
+#elif defined(__aarch64__) || defined(_M_ARM64)
+#if !defined(__clang__) && !defined(_MSC_VER)
+#pragma GCC push_options
+#pragma GCC target("+simd")
+#endif
+#elif __ARM_ARCH == 8
+#if !defined(__ARM_NEON) || !defined(__ARM_NEON__)
+#error \
+    "You must enable NEON instructions (e.g. -mfpu=neon-fp-armv8) to use SSE2NEON."
+#endif
+#if !defined(__clang__) && !defined(_MSC_VER)
+#pragma GCC push_options
+#endif
 #else
+#error \
+    "Unsupported target. Must be either ARMv7-A+NEON or ARMv8-A \
+(you could try setting target explicitly with -march or -mcpu)"
 #endif
 #endif
+*/
 
 #include <arm_neon.h>
+//#if (!defined(__aarch64__) && !defined(_M_ARM64)) && (__ARM_ARCH == 8)
+//#if defined __has_include && __has_include(<arm_acle.h>)
+//#include <arm_acle.h>
+//#endif
+//#endif
 
 /* Apple Silicon cache lines are double of what is commonly used by Intel, AMD
  * and other Arm microarchitectures use.
  * From sysctl -a on Apple M1:
  * hw.cachelinesize: 128
  */
-#if defined(__APPLE__) && (defined(__aarch64__) || defined(__arm64__))
+#if defined(__APPLE__)
 #define SSE2NEON_CACHELINE_SIZE 128
 #else
 #define SSE2NEON_CACHELINE_SIZE 64
 #endif
 
 /* Rounding functions require either Aarch64 instructions or libm fallback */
+//#if !defined(__aarch64__) && !defined(_M_ARM64)
+//#include <math.h>
+//#endif
 
 /* On ARMv7, some registers, such as PMUSERENR and PMCCNTR, are read-only
  * or even not accessible in user mode.
  * To write or access to these registers in user mode,
  * we have to perform syscall instead.
  */
+//#if (!defined(__aarch64__) && !defined(_M_ARM64))
+//#include <sys/time.h>
+//#endif
 
 /* "__has_builtin" can be used to query support for built-in functions
  * provided by gcc/clang and other compilers that support it.
@@ -344,7 +409,11 @@ typedef float32x4_t __m128; /* 128-bit vector containing 4 floats */
 // On ARM 32-bit architecture, the float64x2_t is not supported.
 // The data type __m128d should be represented in a different way for related
 // intrinsic conversion.
+//#if defined(__aarch64__) || defined(_M_ARM64)
 typedef float64x2_t __m128d; /* 128-bit vector containing 2 doubles */
+//#else
+//typedef float32x4_t __m128d;
+//#endif
 typedef int64x2_t __m128i; /* 128-bit vector containing integers */
 
 // Some intrinsics operate on unaligned data types.
@@ -457,6 +526,7 @@ typedef int64_t ALIGN_STRUCT(1) unaligned_int64_t;
 #define vreinterpretq_f64_m128d(x) (x)
 #define vreinterpretq_f32_m128d(x) vreinterpretq_f32_f64(x)
 
+
 // A struct is defined in this header file called 'SIMDVec' which can be used
 // by applications which attempt to access the contents of an __m128 struct
 // directly.  It is important to note that accessing the __m128 struct directly
@@ -532,29 +602,12 @@ FORCE_INLINE_OPTNONE __m128 _mm_round_ps(__m128, int);
 // SSE4.2
 FORCE_INLINE uint32_t _mm_crc32_u8(uint32_t, uint8_t);
 
-/* Backwards compatibility for compilers with lack of specific type support */
 
-// Older gcc does not define vld1q_u8_x4 type
-//#if defined(__GNUC__) && !defined(__clang__) &&                        \
-//    ((__GNUC__ <= 13 && defined(__arm__)) ||                           \
-//     (__GNUC__ == 10 && __GNUC_MINOR__ < 3 && defined(__aarch64__)) || \
-//     (__GNUC__ <= 9 && defined(__aarch64__)))
-//FORCE_INLINE uint8x16x4_t _sse2neon_vld1q_u8_x4(const uint8_t *p)
-//{
-//    uint8x16x4_t ret;
-//    ret.val[0] = vld1q_u8(p + 0);
-//    ret.val[1] = vld1q_u8(p + 16);
-//    ret.val[2] = vld1q_u8(p + 32);
-//    ret.val[3] = vld1q_u8(p + 48);
-//    return ret;
-//}
-//#else
 // Wraps vld1q_u8_x4
 FORCE_INLINE uint8x16x4_t _sse2neon_vld1q_u8_x4(const uint8_t *p)
 {
     return vld1q_u8_x4(p);
 }
-//#endif
 
 // Wraps vaddv_u8
 FORCE_INLINE uint8_t _sse2neon_vaddv_u8(uint8x8_t v8)
@@ -562,10 +615,14 @@ FORCE_INLINE uint8_t _sse2neon_vaddv_u8(uint8x8_t v8)
     return vaddv_u8(v8);
 }
 
-// Wraps vaddvq_u8
+// emulate vaddvq u8 variant
 FORCE_INLINE uint8_t _sse2neon_vaddvq_u8(uint8x16_t a)
 {
-    return vaddvq_u8(a);
+    uint8x8_t tmp = vpadd_u8(vget_low_u8(a), vget_high_u8(a));
+    uint8_t res = 0;
+    for (int i = 0; i < 8; ++i)
+        res += tmp[i];
+    return res;
 }
 
 // Wraps vaddvq_u16
@@ -776,7 +833,12 @@ FORCE_INLINE uint64x2_t _sse2neon_vmull_p64(uint64x1_t _a, uint64x1_t _b)
 {
     poly64_t a = vget_lane_p64(vreinterpret_p64_u64(_a), 0);
     poly64_t b = vget_lane_p64(vreinterpret_p64_u64(_b), 0);
+//#if defined(_MSC_VER) && !defined(__clang__)
+//    __n64 a1 = {a}, b1 = {b};
+//    return vreinterpretq_u64_p128(vmull_p64(a1, b1));
+//#else
     return vreinterpretq_u64_p128(vmull_p64(a, b));
+//#endif
 }
 #else  // ARMv7 polyfill
 // ARMv7/some A64 lacks vmull_p64, but it has vmull_p8.
@@ -835,6 +897,7 @@ static uint64x2_t _sse2neon_vmull_p64(uint64x1_t _a, uint64x1_t _b)
         vzip1q_u64(vreinterpretq_u64_u8(n), vreinterpretq_u64_u8(k)));
     uint8x16_t nk_p1 = vreinterpretq_u8_u64(
         vzip2q_u64(vreinterpretq_u64_u8(n), vreinterpretq_u64_u8(k)));
+
     // t0 = (L) (P0 + P1) << 8
     // t1 = (M) (P2 + P3) << 16
     uint8x16_t t0t1_tmp = veorq_u8(lm_p0, lm_p1);
@@ -856,6 +919,7 @@ static uint64x2_t _sse2neon_vmull_p64(uint64x1_t _a, uint64x1_t _b)
         vuzp1q_u64(vreinterpretq_u64_u8(t2t3_l), vreinterpretq_u64_u8(t2t3_h)));
     uint8x16_t t3 = vreinterpretq_u8_u64(
         vuzp2q_u64(vreinterpretq_u64_u8(t2t3_l), vreinterpretq_u64_u8(t2t3_h)));
+
     // Shift the cross products
     uint8x16_t t0_shift = vextq_u8(t0, t0, 15);  // t0 << 8
     uint8x16_t t1_shift = vextq_u8(t1, t1, 14);  // t1 << 16
@@ -1408,14 +1472,8 @@ FORCE_INLINE __m128 _mm_cvt_pi2ps(__m128 a, __m64 b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_ps2pi
 FORCE_INLINE __m64 _mm_cvt_ps2pi(__m128 a)
 {
-#if (defined(__aarch64__) || defined(_M_ARM64)) || \
-    defined(__ARM_FEATURE_DIRECTED_ROUNDING)
     return vreinterpret_m64_s32(
         vget_low_s32(vcvtnq_s32_f32(vrndiq_f32(vreinterpretq_f32_m128(a)))));
-#else
-    return vreinterpret_m64_s32(vcvt_s32_f32(vget_low_f32(
-        vreinterpretq_f32_m128(_mm_round_ps(a, _MM_FROUND_CUR_DIRECTION)))));
-#endif
 }
 
 // Convert the signed 32-bit integer b to a single-precision (32-bit)
@@ -1433,15 +1491,9 @@ FORCE_INLINE __m128 _mm_cvt_si2ss(__m128 a, int b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_ss2si
 FORCE_INLINE int _mm_cvt_ss2si(__m128 a)
 {
-#if (defined(__aarch64__) || defined(_M_ARM64)) || \
-    defined(__ARM_FEATURE_DIRECTED_ROUNDING)
     return vgetq_lane_s32(vcvtnq_s32_f32(vrndiq_f32(vreinterpretq_f32_m128(a))),
                           0);
-#else
-    float32_t data = vgetq_lane_f32(
-        vreinterpretq_f32_m128(_mm_round_ps(a, _MM_FROUND_CUR_DIRECTION)), 0);
-    return (int32_t) data;
-#endif
+
 }
 
 // Convert packed 16-bit integers in a to packed single-precision (32-bit)
@@ -1564,14 +1616,7 @@ FORCE_INLINE float _mm_cvtss_f32(__m128 a)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtss_si64
 FORCE_INLINE int64_t _mm_cvtss_si64(__m128 a)
 {
-#if (defined(__aarch64__) || defined(_M_ARM64)) || \
-    defined(__ARM_FEATURE_DIRECTED_ROUNDING)
     return (int64_t) vgetq_lane_f32(vrndiq_f32(vreinterpretq_f32_m128(a)), 0);
-#else
-    float32_t data = vgetq_lane_f32(
-        vreinterpretq_f32_m128(_mm_round_ps(a, _MM_FROUND_CUR_DIRECTION)), 0);
-    return (int64_t) data;
-#endif
 }
 
 // Convert packed single-precision (32-bit) floating-point elements in a to
@@ -1654,13 +1699,21 @@ FORCE_INLINE __m128 _mm_div_ss(__m128 a, __m128 b)
 FORCE_INLINE uint64_t _sse2neon_get_fpcr(void)
 {
     uint64_t value;
+//#if defined(_MSC_VER) && !defined(__clang__)
+//    value = _ReadStatusReg(ARM64_FPCR);
+//#else
     __asm__ __volatile__("mrs %0, FPCR" : "=r"(value)); /* read */
+//#endif
     return value;
 }
 
 FORCE_INLINE void _sse2neon_set_fpcr(uint64_t value)
 {
+//#if defined(_MSC_VER) && !defined(__clang__)
+//    _WriteStatusReg(ARM64_FPCR, value);
+//#else
     __asm__ __volatile__("msr FPCR, %0" ::"r"(value));  /* write */
+//#endif
 }
 
 // Macro: Get the flush zero bits from the MXCSR control and status register.
@@ -1853,18 +1906,8 @@ FORCE_INLINE __m64 _mm_max_pi16(__m64 a, __m64 b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_ps
 FORCE_INLINE __m128 _mm_max_ps(__m128 a, __m128 b)
 {
-#if SSE2NEON_PRECISE_MINMAX
-    return vreinterpretq_m128_f32(
-        vmaxmq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
-    
-    // no, use single call
-    //float32x4_t _a = vreinterpretq_f32_m128(a);
-    //float32x4_t _b = vreinterpretq_f32_m128(b);
-    //return vreinterpretq_m128_f32(vbslq_f32(vcgtq_f32(_a, _b), _a, _b));
-#else
     return vreinterpretq_m128_f32(
-        vmaxq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
-#endif
+        vmaxnmq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
 }
 
 // Compare packed unsigned 8-bit integers in a and b, and store packed maximum
@@ -1905,18 +1948,8 @@ FORCE_INLINE __m64 _mm_min_pi16(__m64 a, __m64 b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_ps
 FORCE_INLINE __m128 _mm_min_ps(__m128 a, __m128 b)
 {
-#if SSE2NEON_PRECISE_MINMAX
-    return vreinterpretq_m128_f32(
-        vminmq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
-    
-    // no use single call
-    //float32x4_t _a = vreinterpretq_f32_m128(a);
-    //float32x4_t _b = vreinterpretq_f32_m128(b);
-    //return vreinterpretq_m128_f32(vbslq_f32(vcltq_f32(_a, _b), _a, _b));
-#else
     return vreinterpretq_m128_f32(
-        vminq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
-#endif
+        vminnmq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
 }
 
 // Compare packed unsigned 8-bit integers in a and b, and store packed minimum
@@ -1958,14 +1991,8 @@ FORCE_INLINE __m128 _mm_move_ss(__m128 a, __m128 b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movehl_ps
 FORCE_INLINE __m128 _mm_movehl_ps(__m128 a, __m128 b)
 {
-#if defined(aarch64__)
     return vreinterpretq_m128_u64(
         vzip2q_u64(vreinterpretq_u64_m128(b), vreinterpretq_u64_m128(a)));
-#else
-    float32x2_t a32 = vget_high_f32(vreinterpretq_f32_m128(a));
-    float32x2_t b32 = vget_high_f32(vreinterpretq_f32_m128(b));
-    return vreinterpretq_m128_f32(vcombine_f32(b32, a32));
-#endif
 }
 
 // Move the lower 2 single-precision (32-bit) floating-point elements from b to
@@ -2095,6 +2122,22 @@ FORCE_INLINE __m128 _mm_or_ps(__m128 a, __m128 b)
 FORCE_INLINE void _mm_prefetch(char const *p, int i)
 {
     (void) i;
+//#if defined(_MSC_VER) && !defined(__clang__)
+//    switch (i) {
+//    case _MM_HINT_NTA:
+//        __prefetch2(p, 1);
+//        break;
+//    case _MM_HINT_T0:
+//        __prefetch2(p, 0);
+//        break;
+//    case _MM_HINT_T1:
+//        __prefetch2(p, 2);
+//        break;
+//    case _MM_HINT_T2:
+//        __prefetch2(p, 4);
+//        break;
+//    }
+//#else
     switch (i) {
     case _MM_HINT_NTA:
         __builtin_prefetch(p, 0, 0);
@@ -2109,6 +2152,7 @@ FORCE_INLINE void _mm_prefetch(char const *p, int i)
         __builtin_prefetch(p, 0, 1);
         break;
     }
+//#endif
 }
 
 // Compute the absolute differences of packed unsigned 8-bit integers in a and
@@ -2123,19 +2167,29 @@ FORCE_INLINE void _mm_prefetch(char const *p, int i)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_m_pshufw
 #define _m_pshufw(a, imm) _mm_shuffle_pi16(a, imm)
 
+// Copy single-precision (32-bit) floating-point element a to the lower element
+// of dst, and zero the upper 3 elements.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_ss
+FORCE_INLINE __m128 _mm_set_ss(float a)
+{
+    return vreinterpretq_m128_f32(vsetq_lane_f32(a, vdupq_n_f32(0), 0));
+}
+
+// Broadcast single-precision (32-bit) floating-point value a to all elements of
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_ps
+FORCE_INLINE __m128 _mm_set1_ps(float _w)
+{
+    return vreinterpretq_m128_f32(vdupq_n_f32(_w));
+}
+
 // Compute the approximate reciprocal of packed single-precision (32-bit)
 // floating-point elements in a, and store the results in dst. The maximum
 // relative error for this approximation is less than 1.5*2^-12.
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp_ps
 FORCE_INLINE __m128 _mm_rcp_ps(__m128 in)
 {
-    float32x4_t recip = vrecpeq_f32(vreinterpretq_f32_m128(in));
-    recip = vmulq_f32(recip, vrecpsq_f32(recip, vreinterpretq_f32_m128(in)));
-#if SSE2NEON_PRECISE_DIV
-    // Additional Netwon-Raphson iteration for accuracy
-    recip = vmulq_f32(recip, vrecpsq_f32(recip, vreinterpretq_f32_m128(in)));
-#endif
-    return vreinterpretq_m128_f32(recip);
+    return _mm_div_ps(_mm_set1_ps(1.0f), in);
 }
 
 // Compute the approximate reciprocal of the lower single-precision (32-bit)
@@ -2148,37 +2202,33 @@ FORCE_INLINE __m128 _mm_rcp_ss(__m128 a)
     return _mm_move_ss(a, _mm_rcp_ps(a));
 }
 
+// Compute the square root of packed single-precision (32-bit) floating-point
+// elements in a, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_ps
+FORCE_INLINE __m128 _mm_sqrt_ps(__m128 in)
+{
+    return vreinterpretq_m128_f32(vsqrtq_f32(vreinterpretq_f32_m128(in)));
+}
+
+// Compute the square root of the lower single-precision (32-bit) floating-point
+// element in a, store the result in the lower element of dst, and copy the
+// upper 3 packed elements from a to the upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_ss
+FORCE_INLINE __m128 _mm_sqrt_ss(__m128 in)
+{
+    float32_t value =
+        vgetq_lane_f32(vreinterpretq_f32_m128(_mm_sqrt_ps(in)), 0);
+    return vreinterpretq_m128_f32(
+        vsetq_lane_f32(value, vreinterpretq_f32_m128(in), 0));
+}
+
 // Compute the approximate reciprocal square root of packed single-precision
 // (32-bit) floating-point elements in a, and store the results in dst. The
 // maximum relative error for this approximation is less than 1.5*2^-12.
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt_ps
 FORCE_INLINE __m128 _mm_rsqrt_ps(__m128 in)
 {
-    float32x4_t out = vrsqrteq_f32(vreinterpretq_f32_m128(in));
-
-    // Generate masks for detecting whether input has any 0.0f/-0.0f
-    // (which becomes positive/negative infinity by IEEE-754 arithmetic rules).
-    const uint32x4_t pos_inf = vdupq_n_u32(0x7F800000);
-    const uint32x4_t neg_inf = vdupq_n_u32(0xFF800000);
-    const uint32x4_t has_pos_zero =
-        vceqq_u32(pos_inf, vreinterpretq_u32_f32(out));
-    const uint32x4_t has_neg_zero =
-        vceqq_u32(neg_inf, vreinterpretq_u32_f32(out));
-
-    out = vmulq_f32(
-        out, vrsqrtsq_f32(vmulq_f32(vreinterpretq_f32_m128(in), out), out));
-#if SSE2NEON_PRECISE_SQRT
-    // Additional Netwon-Raphson iteration for accuracy
-    out = vmulq_f32(
-        out, vrsqrtsq_f32(vmulq_f32(vreinterpretq_f32_m128(in), out), out));
-#endif
-
-    // Set output vector element to infinity/negative-infinity if
-    // the corresponding input vector element is 0.0f/-0.0f.
-    out = vbslq_f32(has_pos_zero, (float32x4_t) pos_inf, out);
-    out = vbslq_f32(has_neg_zero, (float32x4_t) neg_inf, out);
-
-    return vreinterpretq_m128_f32(out);
+    return _mm_rcp_ps(_mm_sqrt_ps(in));
 }
 
 // Compute the approximate reciprocal square root of the lower single-precision
@@ -2276,22 +2326,6 @@ FORCE_INLINE_OPTNONE void _MM_SET_ROUNDING_MODE(int rounding)
     _sse2neon_set_fpcr(r.value);
 }
 
-// Copy single-precision (32-bit) floating-point element a to the lower element
-// of dst, and zero the upper 3 elements.
-// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_ss
-FORCE_INLINE __m128 _mm_set_ss(float a)
-{
-    return vreinterpretq_m128_f32(vsetq_lane_f32(a, vdupq_n_f32(0), 0));
-}
-
-// Broadcast single-precision (32-bit) floating-point value a to all elements of
-// dst.
-// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_ps
-FORCE_INLINE __m128 _mm_set1_ps(float _w)
-{
-    return vreinterpretq_m128_f32(vdupq_n_f32(_w));
-}
-
 // Set the MXCSR control and status register with the value in unsigned 32-bit
 // integer a.
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setcsr
@@ -2328,28 +2362,28 @@ FORCE_INLINE __m128 _mm_setzero_ps(void)
 // Shuffle 16-bit integers in a using the control in imm8, and store the results
 // in dst.
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_pi16
-#ifdef _sse2neon_shuffle
+//#ifdef _sse2neon_shuffle
 #define _mm_shuffle_pi16(a, imm)                                       \
     vreinterpret_m64_s16(vshuffle_s16(                                 \
         vreinterpret_s16_m64(a), vreinterpret_s16_m64(a), (imm & 0x3), \
         ((imm >> 2) & 0x3), ((imm >> 4) & 0x3), ((imm >> 6) & 0x3)))
-#else
-#define _mm_shuffle_pi16(a, imm)                                              \
-    _sse2neon_define1(                                                        \
-        __m64, a, int16x4_t ret;                                              \
-        ret = vmov_n_s16(                                                     \
-            vget_lane_s16(vreinterpret_s16_m64(_a), (imm) & (0x3)));          \
-        ret = vset_lane_s16(                                                  \
-            vget_lane_s16(vreinterpret_s16_m64(_a), ((imm) >> 2) & 0x3), ret, \
-            1);                                                               \
-        ret = vset_lane_s16(                                                  \
-            vget_lane_s16(vreinterpret_s16_m64(_a), ((imm) >> 4) & 0x3), ret, \
-            2);                                                               \
-        ret = vset_lane_s16(                                                  \
-            vget_lane_s16(vreinterpret_s16_m64(_a), ((imm) >> 6) & 0x3), ret, \
-            3);                                                               \
-        _sse2neon_return(vreinterpret_m64_s16(ret));)
-#endif
+//#else
+//#define _mm_shuffle_pi16(a, imm)                                              \
+//    _sse2neon_define1(                                                        \
+//        __m64, a, int16x4_t ret;                                              \
+//        ret = vmov_n_s16(                                                     \
+//            vget_lane_s16(vreinterpret_s16_m64(_a), (imm) & (0x3)));          \
+//        ret = vset_lane_s16(                                                  \
+//            vget_lane_s16(vreinterpret_s16_m64(_a), ((imm) >> 2) & 0x3), ret, \
+//            1);                                                               \
+//        ret = vset_lane_s16(                                                  \
+//            vget_lane_s16(vreinterpret_s16_m64(_a), ((imm) >> 4) & 0x3), ret, \
+//            2);                                                               \
+//        ret = vset_lane_s16(                                                  \
+//            vget_lane_s16(vreinterpret_s16_m64(_a), ((imm) >> 6) & 0x3), ret, \
+//            3);                                                               \
+//        _sse2neon_return(vreinterpret_m64_s16(ret));)
+//#endif
 
 // Perform a serializing operation on all store-to-memory instructions that were
 // issued prior to this instruction. Guarantees that every store instruction
@@ -2384,7 +2418,7 @@ FORCE_INLINE void _mm_lfence(void)
 
 // FORCE_INLINE __m128 _mm_shuffle_ps(__m128 a, __m128 b, __constrange(0,255)
 // int imm)
-#ifdef _sse2neon_shuffle
+//#ifdef _sse2neon_shuffle
 #define _mm_shuffle_ps(a, b, imm)                                              \
     __extension__({                                                            \
         float32x4_t _input1 = vreinterpretq_f32_m128(a);                       \
@@ -2394,112 +2428,67 @@ FORCE_INLINE void _mm_lfence(void)
                           (((imm) >> 4) & 0x3) + 4, (((imm) >> 6) & 0x3) + 4); \
         vreinterpretq_m128_f32(_shuf);                                         \
     })
-#else  // generic
-#define _mm_shuffle_ps(a, b, imm)                            \
-    _sse2neon_define2(                                       \
-        __m128, a, b, __m128 ret; switch (imm) {             \
-            case _MM_SHUFFLE(1, 0, 3, 2):                    \
-                ret = _mm_shuffle_ps_1032(_a, _b);           \
-                break;                                       \
-            case _MM_SHUFFLE(2, 3, 0, 1):                    \
-                ret = _mm_shuffle_ps_2301(_a, _b);           \
-                break;                                       \
-            case _MM_SHUFFLE(0, 3, 2, 1):                    \
-                ret = _mm_shuffle_ps_0321(_a, _b);           \
-                break;                                       \
-            case _MM_SHUFFLE(2, 1, 0, 3):                    \
-                ret = _mm_shuffle_ps_2103(_a, _b);           \
-                break;                                       \
-            case _MM_SHUFFLE(1, 0, 1, 0):                    \
-                ret = _mm_movelh_ps(_a, _b);                 \
-                break;                                       \
-            case _MM_SHUFFLE(1, 0, 0, 1):                    \
-                ret = _mm_shuffle_ps_1001(_a, _b);           \
-                break;                                       \
-            case _MM_SHUFFLE(0, 1, 0, 1):                    \
-                ret = _mm_shuffle_ps_0101(_a, _b);           \
-                break;                                       \
-            case _MM_SHUFFLE(3, 2, 1, 0):                    \
-                ret = _mm_shuffle_ps_3210(_a, _b);           \
-                break;                                       \
-            case _MM_SHUFFLE(0, 0, 1, 1):                    \
-                ret = _mm_shuffle_ps_0011(_a, _b);           \
-                break;                                       \
-            case _MM_SHUFFLE(0, 0, 2, 2):                    \
-                ret = _mm_shuffle_ps_0022(_a, _b);           \
-                break;                                       \
-            case _MM_SHUFFLE(2, 2, 0, 0):                    \
-                ret = _mm_shuffle_ps_2200(_a, _b);           \
-                break;                                       \
-            case _MM_SHUFFLE(3, 2, 0, 2):                    \
-                ret = _mm_shuffle_ps_3202(_a, _b);           \
-                break;                                       \
-            case _MM_SHUFFLE(3, 2, 3, 2):                    \
-                ret = _mm_movehl_ps(_b, _a);                 \
-                break;                                       \
-            case _MM_SHUFFLE(1, 1, 3, 3):                    \
-                ret = _mm_shuffle_ps_1133(_a, _b);           \
-                break;                                       \
-            case _MM_SHUFFLE(2, 0, 1, 0):                    \
-                ret = _mm_shuffle_ps_2010(_a, _b);           \
-                break;                                       \
-            case _MM_SHUFFLE(2, 0, 0, 1):                    \
-                ret = _mm_shuffle_ps_2001(_a, _b);           \
-                break;                                       \
-            case _MM_SHUFFLE(2, 0, 3, 2):                    \
-                ret = _mm_shuffle_ps_2032(_a, _b);           \
-                break;                                       \
-            default:                                         \
-                ret = _mm_shuffle_ps_default(_a, _b, (imm)); \
-                break;                                       \
-        } _sse2neon_return(ret);)
-#endif
+//#else  // generic
+//#define _mm_shuffle_ps(a, b, imm)                            \
+//    _sse2neon_define2(                                       \
+//        __m128, a, b, __m128 ret; switch (imm) {             \
+//            case _MM_SHUFFLE(1, 0, 3, 2):                    \
+//                ret = _mm_shuffle_ps_1032(_a, _b);           \
+//                break;                                       \
+//            case _MM_SHUFFLE(2, 3, 0, 1):                    \
+//                ret = _mm_shuffle_ps_2301(_a, _b);           \
+//                break;                                       \
+//            case _MM_SHUFFLE(0, 3, 2, 1):                    \
+//                ret = _mm_shuffle_ps_0321(_a, _b);           \
+//                break;                                       \
+//            case _MM_SHUFFLE(2, 1, 0, 3):                    \
+//                ret = _mm_shuffle_ps_2103(_a, _b);           \
+//                break;                                       \
+//            case _MM_SHUFFLE(1, 0, 1, 0):                    \
+//                ret = _mm_movelh_ps(_a, _b);                 \
+//                break;                                       \
+//            case _MM_SHUFFLE(1, 0, 0, 1):                    \
+//                ret = _mm_shuffle_ps_1001(_a, _b);           \
+//                break;                                       \
+//            case _MM_SHUFFLE(0, 1, 0, 1):                    \
+//                ret = _mm_shuffle_ps_0101(_a, _b);           \
+//                break;                                       \
+//            case _MM_SHUFFLE(3, 2, 1, 0):                    \
+//                ret = _mm_shuffle_ps_3210(_a, _b);           \
+//                break;                                       \
+//            case _MM_SHUFFLE(0, 0, 1, 1):                    \
+//                ret = _mm_shuffle_ps_0011(_a, _b);           \
+//                break;                                       \
+//            case _MM_SHUFFLE(0, 0, 2, 2):                    \
+//                ret = _mm_shuffle_ps_0022(_a, _b);           \
+//                break;                                       \
+//            case _MM_SHUFFLE(2, 2, 0, 0):                    \
+//                ret = _mm_shuffle_ps_2200(_a, _b);           \
+//                break;                                       \
+//            case _MM_SHUFFLE(3, 2, 0, 2):                    \
+//                ret = _mm_shuffle_ps_3202(_a, _b);           \
+//                break;                                       \
+//            case _MM_SHUFFLE(3, 2, 3, 2):                    \
+//                ret = _mm_movehl_ps(_b, _a);                 \
+//                break;                                       \
+//            case _MM_SHUFFLE(1, 1, 3, 3):                    \
+//                ret = _mm_shuffle_ps_1133(_a, _b);           \
+//                break;                                       \
+//            case _MM_SHUFFLE(2, 0, 1, 0):                    \
+//                ret = _mm_shuffle_ps_2010(_a, _b);           \
+//                break;                                       \
+//            case _MM_SHUFFLE(2, 0, 0, 1):                    \
+//                ret = _mm_shuffle_ps_2001(_a, _b);           \
+//                break;                                       \
+//            case _MM_SHUFFLE(2, 0, 3, 2):                    \
+//                ret = _mm_shuffle_ps_2032(_a, _b);           \
+//                break;                                       \
+//            default:                                         \
+//                ret = _mm_shuffle_ps_default(_a, _b, (imm)); \
+//                break;                                       \
+//        } _sse2neon_return(ret);)
+//#endif
 
-// Compute the square root of packed single-precision (32-bit) floating-point
-// elements in a, and store the results in dst.
-// Due to ARMv7-A NEON's lack of a precise square root intrinsic, we implement
-// square root by multiplying input in with its reciprocal square root before
-// using the Newton-Raphson method to approximate the results.
-// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_ps
-FORCE_INLINE __m128 _mm_sqrt_ps(__m128 in)
-{
-#if (defined(__aarch64__) || defined(_M_ARM64)) && !SSE2NEON_PRECISE_SQRT
-    return vreinterpretq_m128_f32(vsqrtq_f32(vreinterpretq_f32_m128(in)));
-#else
-    float32x4_t recip = vrsqrteq_f32(vreinterpretq_f32_m128(in));
-
-    // Test for vrsqrteq_f32(0) -> positive infinity case.
-    // Change to zero, so that s * 1/sqrt(s) result is zero too.
-    const uint32x4_t pos_inf = vdupq_n_u32(0x7F800000);
-    const uint32x4_t div_by_zero =
-        vceqq_u32(pos_inf, vreinterpretq_u32_f32(recip));
-    recip = vreinterpretq_f32_u32(
-        vandq_u32(vmvnq_u32(div_by_zero), vreinterpretq_u32_f32(recip)));
-
-    recip = vmulq_f32(
-        vrsqrtsq_f32(vmulq_f32(recip, recip), vreinterpretq_f32_m128(in)),
-        recip);
-    // Additional Netwon-Raphson iteration for accuracy
-    recip = vmulq_f32(
-        vrsqrtsq_f32(vmulq_f32(recip, recip), vreinterpretq_f32_m128(in)),
-        recip);
-
-    // sqrt(s) = s * 1/sqrt(s)
-    return vreinterpretq_m128_f32(vmulq_f32(vreinterpretq_f32_m128(in), recip));
-#endif
-}
-
-// Compute the square root of the lower single-precision (32-bit) floating-point
-// element in a, store the result in the lower element of dst, and copy the
-// upper 3 packed elements from a to the upper elements of dst.
-// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_ss
-FORCE_INLINE __m128 _mm_sqrt_ss(__m128 in)
-{
-    float32_t value =
-        vgetq_lane_f32(vreinterpretq_f32_m128(_mm_sqrt_ps(in)), 0);
-    return vreinterpretq_m128_f32(
-        vsetq_lane_f32(value, vreinterpretq_f32_m128(in), 0));
-}
 
 // Store 128-bits (composed of 4 packed single-precision (32-bit) floating-point
 // elements) from a into memory. mem_addr must be aligned on a 16-byte boundary
@@ -2943,10 +2932,12 @@ FORCE_INLINE void _mm_clflush(void const *p)
      */
 #if defined(__APPLE__)
     sys_icache_invalidate((void *) (uintptr_t) p, SSE2NEON_CACHELINE_SIZE);
-#else
+#elif defined(__GNUC__) || defined(__clang__)
     uintptr_t ptr = (uintptr_t) p;
     __builtin___clear_cache((char *) ptr,
                             (char *) ptr + SSE2NEON_CACHELINE_SIZE);
+#elif (_MSC_VER) && SSE2NEON_INCLUDE_WINDOWS_H
+    FlushInstructionCache(GetCurrentProcess(), p, SSE2NEON_CACHELINE_SIZE);
 #endif
 }
 
@@ -3316,7 +3307,7 @@ FORCE_INLINE int _mm_comineq_sd(__m128d a, __m128d b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_pd
 FORCE_INLINE __m128d _mm_cvtepi32_pd(__m128i a)
 {
-    return vreinterpretq_m128d_f64(
+   return vreinterpretq_m128d_f64(
         vcvtq_f64_s64(vmovl_s32(vget_low_s32(vreinterpretq_s32_m128i(a)))));
 }
 
@@ -3334,6 +3325,12 @@ FORCE_INLINE __m128 _mm_cvtepi32_ps(__m128i a)
 FORCE_INLINE_OPTNONE __m128i _mm_cvtpd_epi32(__m128d a)
 {
 // vrnd32xq_f64 not supported on clang
+#if defined(__ARM_FEATURE_FRINT) && !defined(__clang__)
+    float64x2_t rounded = vrnd32xq_f64(vreinterpretq_f64_m128d(a));
+    int64x2_t integers = vcvtq_s64_f64(rounded);
+    return vreinterpretq_m128i_s32(
+        vcombine_s32(vmovn_s64(integers), vdup_n_s32(0)));
+#else
     __m128d rnd = _mm_round_pd(a, _MM_FROUND_CUR_DIRECTION);
     double d0, d1;
     d0 = sse2neon_recast_u64_f64(
@@ -3341,6 +3338,7 @@ FORCE_INLINE_OPTNONE __m128i _mm_cvtpd_epi32(__m128d a)
     d1 = sse2neon_recast_u64_f64(
         vgetq_lane_u64(vreinterpretq_u64_m128d(rnd), 1));
     return _mm_set_epi32(0, 0, (int32_t) d1, (int32_t) d0);
+#endif
 }
 
 // Convert packed double-precision (64-bit) floating-point elements in a to
@@ -3386,8 +3384,7 @@ FORCE_INLINE __m128i _mm_cvtps_epi32(__m128 a)
 {
 #if defined(__ARM_FEATURE_FRINT)
     return vreinterpretq_m128i_s32(vcvtq_s32_f32(vrnd32xq_f32(a)));
-#elif (defined(__aarch64__) || defined(_M_ARM64)) || \
-    defined(__ARM_FEATURE_DIRECTED_ROUNDING)
+#else
     switch (_MM_GET_ROUNDING_MODE()) {
     case _MM_ROUND_NEAREST:
         return vreinterpretq_m128i_s32(vcvtnq_s32_f32(a));
@@ -3398,39 +3395,6 @@ FORCE_INLINE __m128i _mm_cvtps_epi32(__m128 a)
     default:  // _MM_ROUND_TOWARD_ZERO
         return vreinterpretq_m128i_s32(vcvtq_s32_f32(a));
     }
-#else
-    float *f = (float *) &a;
-    switch (_MM_GET_ROUNDING_MODE()) {
-    case _MM_ROUND_NEAREST: {
-        uint32x4_t signmask = vdupq_n_u32(0x80000000);
-        float32x4_t half = vbslq_f32(signmask, vreinterpretq_f32_m128(a),
-                                     vdupq_n_f32(0.5f)); /* +/- 0.5 */
-        int32x4_t r_normal = vcvtq_s32_f32(vaddq_f32(
-            vreinterpretq_f32_m128(a), half)); /* round to integer: [a + 0.5]*/
-        int32x4_t r_trunc = vcvtq_s32_f32(
-            vreinterpretq_f32_m128(a)); /* truncate to integer: [a] */
-        int32x4_t plusone = vreinterpretq_s32_u32(vshrq_n_u32(
-            vreinterpretq_u32_s32(vnegq_s32(r_trunc)), 31)); /* 1 or 0 */
-        int32x4_t r_even = vbicq_s32(vaddq_s32(r_trunc, plusone),
-                                     vdupq_n_s32(1)); /* ([a] + {0,1}) & ~1 */
-        float32x4_t delta = vsubq_f32(
-            vreinterpretq_f32_m128(a),
-            vcvtq_f32_s32(r_trunc)); /* compute delta: delta = (a - [a]) */
-        uint32x4_t is_delta_half =
-            vceqq_f32(delta, half); /* delta == +/- 0.5 */
-        return vreinterpretq_m128i_s32(
-            vbslq_s32(is_delta_half, r_even, r_normal));
-    }
-    case _MM_ROUND_DOWN:
-        return _mm_set_epi32(floorf(f[3]), floorf(f[2]), floorf(f[1]),
-                             floorf(f[0]));
-    case _MM_ROUND_UP:
-        return _mm_set_epi32(ceilf(f[3]), ceilf(f[2]), ceilf(f[1]),
-                             ceilf(f[0]));
-    default:  // _MM_ROUND_TOWARD_ZERO
-        return _mm_set_epi32((int32_t) f[3], (int32_t) f[2], (int32_t) f[1],
-                             (int32_t) f[0]);
-    }
 #endif
 }
 
@@ -3479,7 +3443,7 @@ FORCE_INLINE int64_t _mm_cvtsd_si64(__m128d a)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_ss
 FORCE_INLINE __m128 _mm_cvtsd_ss(__m128 a, __m128d b)
 {
-    return vreinterpretq_m128_f32(vsetq_lane_f32(
+   return vreinterpretq_m128_f32(vsetq_lane_f32(
         vget_lane_f32(vcvt_f32_f64(vreinterpretq_f64_m128d(b)), 0),
         vreinterpretq_f32_m128(a), 0));
 }
@@ -3812,14 +3776,9 @@ FORCE_INLINE __m128i _mm_max_epu8(__m128i a, __m128i b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_pd
 FORCE_INLINE __m128d _mm_max_pd(__m128d a, __m128d b)
 {
-#if SSE2NEON_PRECISE_MINMAX
     float64x2_t _a = vreinterpretq_f64_m128d(a);
     float64x2_t _b = vreinterpretq_f64_m128d(b);
     return vreinterpretq_m128d_f64(vbslq_f64(vcgtq_f64(_a, _b), _a, _b));
-#else
-    return vreinterpretq_m128d_f64(
-        vmaxq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
-#endif
 }
 
 // Compare the lower double-precision (64-bit) floating-point elements in a and
@@ -3854,14 +3813,9 @@ FORCE_INLINE __m128i _mm_min_epu8(__m128i a, __m128i b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_pd
 FORCE_INLINE __m128d _mm_min_pd(__m128d a, __m128d b)
 {
-#if SSE2NEON_PRECISE_MINMAX
     float64x2_t _a = vreinterpretq_f64_m128d(a);
     float64x2_t _b = vreinterpretq_f64_m128d(b);
     return vreinterpretq_m128d_f64(vbslq_f64(vcltq_f64(_a, _b), _a, _b));
-#else
-    return vreinterpretq_m128d_f64(
-        vminq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
-#endif
 }
 
 // Compare the lower double-precision (64-bit) floating-point elements in a and
@@ -4143,7 +4097,11 @@ FORCE_INLINE __m128i _mm_packus_epi16(const __m128i a, const __m128i b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_pause
 FORCE_INLINE void _mm_pause(void)
 {
+//#if defined(_MSC_VER) && !defined(__clang__)
+//    __isb(_ARM64_BARRIER_SY);
+//#else
     __asm__ __volatile__("isb\n");
+//#endif
 }
 
 // Compute the absolute differences of packed unsigned 8-bit integers in a and
@@ -4371,7 +4329,7 @@ FORCE_INLINE __m128i _mm_setzero_si128(void)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_epi32
 // FORCE_INLINE __m128i _mm_shuffle_epi32(__m128i a,
 //                                        __constrange(0,255) int imm)
-#if defined(_sse2neon_shuffle)
+//#if defined(_sse2neon_shuffle)
 #define _mm_shuffle_epi32(a, imm)                                            \
     __extension__({                                                          \
         int32x4_t _input = vreinterpretq_s32_m128i(a);                       \
@@ -4380,76 +4338,76 @@ FORCE_INLINE __m128i _mm_setzero_si128(void)
                           ((imm) >> 4) & 0x3, ((imm) >> 6) & 0x3);           \
         vreinterpretq_m128i_s32(_shuf);                                      \
     })
-#else  // generic
-#define _mm_shuffle_epi32(a, imm)                           \
-    _sse2neon_define1(                                      \
-        __m128i, a, __m128i ret; switch (imm) {             \
-            case _MM_SHUFFLE(1, 0, 3, 2):                   \
-                ret = _mm_shuffle_epi_1032(_a);             \
-                break;                                      \
-            case _MM_SHUFFLE(2, 3, 0, 1):                   \
-                ret = _mm_shuffle_epi_2301(_a);             \
-                break;                                      \
-            case _MM_SHUFFLE(0, 3, 2, 1):                   \
-                ret = _mm_shuffle_epi_0321(_a);             \
-                break;                                      \
-            case _MM_SHUFFLE(2, 1, 0, 3):                   \
-                ret = _mm_shuffle_epi_2103(_a);             \
-                break;                                      \
-            case _MM_SHUFFLE(1, 0, 1, 0):                   \
-                ret = _mm_shuffle_epi_1010(_a);             \
-                break;                                      \
-            case _MM_SHUFFLE(1, 0, 0, 1):                   \
-                ret = _mm_shuffle_epi_1001(_a);             \
-                break;                                      \
-            case _MM_SHUFFLE(0, 1, 0, 1):                   \
-                ret = _mm_shuffle_epi_0101(_a);             \
-                break;                                      \
-            case _MM_SHUFFLE(2, 2, 1, 1):                   \
-                ret = _mm_shuffle_epi_2211(_a);             \
-                break;                                      \
-            case _MM_SHUFFLE(0, 1, 2, 2):                   \
-                ret = _mm_shuffle_epi_0122(_a);             \
-                break;                                      \
-            case _MM_SHUFFLE(3, 3, 3, 2):                   \
-                ret = _mm_shuffle_epi_3332(_a);             \
-                break;                                      \
-            case _MM_SHUFFLE(0, 0, 0, 0):                   \
-                ret = _mm_shuffle_epi32_splat(_a, 0);       \
-                break;                                      \
-            case _MM_SHUFFLE(1, 1, 1, 1):                   \
-                ret = _mm_shuffle_epi32_splat(_a, 1);       \
-                break;                                      \
-            case _MM_SHUFFLE(2, 2, 2, 2):                   \
-                ret = _mm_shuffle_epi32_splat(_a, 2);       \
-                break;                                      \
-            case _MM_SHUFFLE(3, 3, 3, 3):                   \
-                ret = _mm_shuffle_epi32_splat(_a, 3);       \
-                break;                                      \
-            default:                                        \
-                ret = _mm_shuffle_epi32_default(_a, (imm)); \
-                break;                                      \
-        } _sse2neon_return(ret);)
-#endif
+//#else  // generic
+//#define _mm_shuffle_epi32(a, imm)                           \
+//    _sse2neon_define1(                                      \
+//        __m128i, a, __m128i ret; switch (imm) {             \
+//            case _MM_SHUFFLE(1, 0, 3, 2):                   \
+//                ret = _mm_shuffle_epi_1032(_a);             \
+//                break;                                      \
+//            case _MM_SHUFFLE(2, 3, 0, 1):                   \
+//                ret = _mm_shuffle_epi_2301(_a);             \
+//                break;                                      \
+//            case _MM_SHUFFLE(0, 3, 2, 1):                   \
+//                ret = _mm_shuffle_epi_0321(_a);             \
+//                break;                                      \
+//            case _MM_SHUFFLE(2, 1, 0, 3):                   \
+//                ret = _mm_shuffle_epi_2103(_a);             \
+//                break;                                      \
+//            case _MM_SHUFFLE(1, 0, 1, 0):                   \
+//                ret = _mm_shuffle_epi_1010(_a);             \
+//                break;                                      \
+//            case _MM_SHUFFLE(1, 0, 0, 1):                   \
+//                ret = _mm_shuffle_epi_1001(_a);             \
+//                break;                                      \
+//            case _MM_SHUFFLE(0, 1, 0, 1):                   \
+//                ret = _mm_shuffle_epi_0101(_a);             \
+//                break;                                      \
+//            case _MM_SHUFFLE(2, 2, 1, 1):                   \
+//                ret = _mm_shuffle_epi_2211(_a);             \
+//                break;                                      \
+//            case _MM_SHUFFLE(0, 1, 2, 2):                   \
+//                ret = _mm_shuffle_epi_0122(_a);             \
+//                break;                                      \
+//            case _MM_SHUFFLE(3, 3, 3, 2):                   \
+//                ret = _mm_shuffle_epi_3332(_a);             \
+//                break;                                      \
+//            case _MM_SHUFFLE(0, 0, 0, 0):                   \
+//                ret = _mm_shuffle_epi32_splat(_a, 0);       \
+//                break;                                      \
+//            case _MM_SHUFFLE(1, 1, 1, 1):                   \
+//                ret = _mm_shuffle_epi32_splat(_a, 1);       \
+//                break;                                      \
+//            case _MM_SHUFFLE(2, 2, 2, 2):                   \
+//                ret = _mm_shuffle_epi32_splat(_a, 2);       \
+//                break;                                      \
+//            case _MM_SHUFFLE(3, 3, 3, 3):                   \
+//                ret = _mm_shuffle_epi32_splat(_a, 3);       \
+//                break;                                      \
+//            default:                                        \
+//                ret = _mm_shuffle_epi32_default(_a, (imm)); \
+//                break;                                      \
+//        } _sse2neon_return(ret);)
+//#endif
 
 // Shuffle double-precision (64-bit) floating-point elements using the control
 // in imm8, and store the results in dst.
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_pd
-#ifdef _sse2neon_shuffle
+//#ifdef _sse2neon_shuffle
 #define _mm_shuffle_pd(a, b, imm8)                                            \
     vreinterpretq_m128d_s64(                                                  \
         vshuffleq_s64(vreinterpretq_s64_m128d(a), vreinterpretq_s64_m128d(b), \
                       imm8 & 0x1, ((imm8 & 0x2) >> 1) + 2))
-#else
-#define _mm_shuffle_pd(a, b, imm8)                                     \
-    _mm_castsi128_pd(_mm_set_epi64x(                                   \
-        vgetq_lane_s64(vreinterpretq_s64_m128d(b), (imm8 & 0x2) >> 1), \
-        vgetq_lane_s64(vreinterpretq_s64_m128d(a), imm8 & 0x1)))
-#endif
+//#else
+//#define _mm_shuffle_pd(a, b, imm8)                                     \
+//    _mm_castsi128_pd(_mm_set_epi64x(                                   \
+//        vgetq_lane_s64(vreinterpretq_s64_m128d(b), (imm8 & 0x2) >> 1), \
+//        vgetq_lane_s64(vreinterpretq_s64_m128d(a), imm8 & 0x1)))
+//#endif
 
 // FORCE_INLINE __m128i _mm_shufflehi_epi16(__m128i a,
 //                                          __constrange(0,255) int imm)
-#if defined(_sse2neon_shuffle)
+//#if defined(_sse2neon_shuffle)
 #define _mm_shufflehi_epi16(a, imm)                                           \
     __extension__({                                                           \
         int16x8_t _input = vreinterpretq_s16_m128i(a);                        \
@@ -4459,13 +4417,13 @@ FORCE_INLINE __m128i _mm_setzero_si128(void)
                           (((imm) >> 6) & 0x3) + 4);                          \
         vreinterpretq_m128i_s16(_shuf);                                       \
     })
-#else  // generic
-#define _mm_shufflehi_epi16(a, imm) _mm_shufflehi_epi16_function((a), (imm))
-#endif
+//#else  // generic
+//#define _mm_shufflehi_epi16(a, imm) _mm_shufflehi_epi16_function((a), (imm))
+//#endif
 
 // FORCE_INLINE __m128i _mm_shufflelo_epi16(__m128i a,
 //                                          __constrange(0,255) int imm)
-#if defined(_sse2neon_shuffle)
+//#if defined(_sse2neon_shuffle)
 #define _mm_shufflelo_epi16(a, imm)                                  \
     __extension__({                                                  \
         int16x8_t _input = vreinterpretq_s16_m128i(a);               \
@@ -4474,9 +4432,9 @@ FORCE_INLINE __m128i _mm_setzero_si128(void)
             (((imm) >> 4) & 0x3), (((imm) >> 6) & 0x3), 4, 5, 6, 7); \
         vreinterpretq_m128i_s16(_shuf);                              \
     })
-#else  // generic
-#define _mm_shufflelo_epi16(a, imm) _mm_shufflelo_epi16_function((a), (imm))
-#endif
+//#else  // generic
+//#define _mm_shufflelo_epi16(a, imm) _mm_shufflelo_epi16_function((a), (imm))
+//#endif
 
 // Shift packed 16-bit integers in a left by count while shifting in zeros, and
 // store the results in dst.
@@ -4740,7 +4698,7 @@ FORCE_INLINE void _mm_store_pd1(double *mem_addr, __m128d a)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_store_sd
 FORCE_INLINE void _mm_store_sd(double *mem_addr, __m128d a)
 {
-    vst1_f64((float64_t *) mem_addr, vget_low_f64(vreinterpretq_f64_m128d(a)));
+   vst1_f64((float64_t *) mem_addr, vget_low_f64(vreinterpretq_f64_m128d(a)));
 }
 
 // Store 128-bits of integer data from a into memory. mem_addr must be aligned
@@ -5116,14 +5074,9 @@ FORCE_INLINE __m128d _mm_addsub_pd(__m128d a, __m128d b)
 FORCE_INLINE __m128 _mm_addsub_ps(__m128 a, __m128 b)
 {
     _sse2neon_const __m128 mask = _mm_setr_ps(-1.0f, 1.0f, -1.0f, 1.0f);
-#if (defined(__aarch64__) || defined(_M_ARM64)) || \
-    defined(__ARM_FEATURE_FMA) /* VFPv4+ */
     return vreinterpretq_m128_f32(vfmaq_f32(vreinterpretq_f32_m128(a),
                                             vreinterpretq_f32_m128(mask),
                                             vreinterpretq_f32_m128(b)));
-#else
-    return _mm_add_ps(_mm_mul_ps(b, mask), a);
-#endif
 }
 
 // Horizontally add adjacent pairs of double-precision (64-bit) floating-point
@@ -5257,6 +5210,23 @@ FORCE_INLINE __m64 _mm_abs_pi8(__m64 a)
 // Concatenate 16-byte blocks in a and b into a 32-byte temporary result, shift
 // the result right by imm8 bytes, and store the low 16 bytes in dst.
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi8
+#if defined(__GNUC__) && !defined(__clang__)
+#define _mm_alignr_epi8(a, b, imm)                                            \
+    __extension__({                                                           \
+        uint8x16_t _a = vreinterpretq_u8_m128i(a);                            \
+        uint8x16_t _b = vreinterpretq_u8_m128i(b);                            \
+        __m128i ret;                                                          \
+        if (_sse2neon_unlikely((imm) & ~31))                                  \
+            ret = vreinterpretq_m128i_u8(vdupq_n_u8(0));                      \
+        else if (imm >= 16)                                                   \
+            ret = _mm_srli_si128(a, imm >= 16 ? imm - 16 : 0);                \
+        else                                                                  \
+            ret =                                                             \
+                vreinterpretq_m128i_u8(vextq_u8(_b, _a, imm < 16 ? imm : 0)); \
+        ret;                                                                  \
+    })
+
+#else
 #define _mm_alignr_epi8(a, b, imm)                                          \
     _sse2neon_define2(                                                      \
         __m128i, a, b, uint8x16_t __a = vreinterpretq_u8_m128i(_a);         \
@@ -5269,6 +5239,7 @@ FORCE_INLINE __m64 _mm_abs_pi8(__m64 a)
             vreinterpretq_m128i_u8(vextq_u8(__b, __a, imm < 16 ? imm : 0)); \
         _sse2neon_return(ret);)
 
+#endif
 
 // Concatenate 8-byte blocks in a and b into a 16-byte temporary result, shift
 // the result right by imm8 bytes, and store the low 8 bytes in dst.
@@ -5775,13 +5746,7 @@ FORCE_INLINE __m128d _mm_ceil_pd(__m128d a)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ceil_ps
 FORCE_INLINE __m128 _mm_ceil_ps(__m128 a)
 {
-#if (defined(__aarch64__) || defined(_M_ARM64)) || \
-    defined(__ARM_FEATURE_DIRECTED_ROUNDING)
     return vreinterpretq_m128_f32(vrndpq_f32(vreinterpretq_f32_m128(a)));
-#else
-    float *f = (float *) &a;
-    return _mm_set_ps(ceilf(f[3]), ceilf(f[2]), ceilf(f[1]), ceilf(f[0]));
-#endif
 }
 
 // Round the lower double-precision (64-bit) floating-point element in b up to
@@ -5966,6 +5931,7 @@ FORCE_INLINE __m128d _mm_dp_pd(__m128d a, __m128d b, const int imm)
 #endif
     // Sum the products
     double sum = vpaddd_f64(vreinterpretq_f64_m128d(tmp));
+
     // Conditionally store the sum
     const __m128d sumMask =
         _mm_castsi128_pd(_mm_set_epi64x(bit1Mask, bit0Mask));
@@ -5981,7 +5947,7 @@ FORCE_INLINE __m128 _mm_dp_ps(__m128 a, __m128 b, const int imm)
 {
     float32x4_t elementwise_prod = _mm_mul_ps(a, b);
 
-    /* shortcuts */
+   /* shortcuts */
     if (imm == 0xFF) {
         return _mm_set1_ps(vaddvq_f32(elementwise_prod));
     }
@@ -6058,13 +6024,7 @@ FORCE_INLINE __m128d _mm_floor_pd(__m128d a)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_floor_ps
 FORCE_INLINE __m128 _mm_floor_ps(__m128 a)
 {
-#if (defined(__aarch64__) || defined(_M_ARM64)) || \
-    defined(__ARM_FEATURE_DIRECTED_ROUNDING)
     return vreinterpretq_m128_f32(vrndmq_f32(vreinterpretq_f32_m128(a)));
-#else
-    float *f = (float *) &a;
-    return _mm_set_ps(floorf(f[3]), floorf(f[2]), floorf(f[1]), floorf(f[0]));
-#endif
 }
 
 // Round the lower double-precision (64-bit) floating-point element in b down to
@@ -6224,6 +6184,7 @@ FORCE_INLINE __m128i _mm_minpos_epu16(__m128i a)
     uint16x8_t minv = vdupq_n_u16(min);
     uint16x8_t cmeq = vceqq_u16(minv, vreinterpretq_u16_m128i(a));
     idx = vminvq_u16(vornq_u16(vld1q_u16(idxv), cmeq));
+
     // Generate result
     dst = _mm_setzero_si128();
     dst = vreinterpretq_m128i_u16(
@@ -6254,7 +6215,11 @@ FORCE_INLINE __m128i _mm_mpsadbw_epu8(__m128i a, __m128i b, const int imm)
                                             vreinterpretq_u32_m128i(a), 1));
         break;
     default:
+//if defined(__GNUC__) || defined(__clang__)
         __builtin_unreachable();
+//#elif defined(_MSC_VER)
+//        __assume(0);
+//#endif
         break;
     }
 
@@ -6276,7 +6241,11 @@ FORCE_INLINE __m128i _mm_mpsadbw_epu8(__m128i a, __m128i b, const int imm)
             vdupq_n_u32(vgetq_lane_u32(vreinterpretq_u32_m128i(b), 3)));
         break;
     default:
+//#if defined(__GNUC__) || defined(__clang__)
         __builtin_unreachable();
+//#elif defined(_MSC_VER)
+//        __assume(0);
+//#endif
         break;
     }
 
@@ -6289,6 +6258,7 @@ FORCE_INLINE __m128i _mm_mpsadbw_epu8(__m128i a, __m128i b, const int imm)
     c26 = vreinterpretq_s16_u16(vabdl_u8(vget_low_u8(_a_2), low_b));
     uint8x16_t _a_3 = vextq_u8(_a, _a, 3);
     c37 = vreinterpretq_s16_u16(vabdl_u8(vget_low_u8(_a_3), low_b));
+
     // |0|4|2|6|
     c04 = vpaddq_s16(c04, c26);
     // |1|5|3|7|
@@ -6350,6 +6320,7 @@ FORCE_INLINE_OPTNONE __m128d _mm_round_pd(__m128d a, int rounding)
     default:  //_MM_FROUND_CUR_DIRECTION
         return vreinterpretq_m128d_f64(vrndiq_f64(vreinterpretq_f64_m128d(a)));
     }
+
 }
 
 // Round the packed single-precision (32-bit) floating-point elements in a using
@@ -6358,8 +6329,6 @@ FORCE_INLINE_OPTNONE __m128d _mm_round_pd(__m128d a, int rounding)
 // software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_ps
 FORCE_INLINE_OPTNONE __m128 _mm_round_ps(__m128 a, int rounding)
 {
-#if (defined(__aarch64__) || defined(_M_ARM64)) || \
-    defined(__ARM_FEATURE_DIRECTED_ROUNDING)
     switch (rounding) {
     case (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC):
         return vreinterpretq_m128_f32(vrndnq_f32(vreinterpretq_f32_m128(a)));
@@ -6372,44 +6341,6 @@ FORCE_INLINE_OPTNONE __m128 _mm_round_ps(__m128 a, int rounding)
     default:  //_MM_FROUND_CUR_DIRECTION
         return vreinterpretq_m128_f32(vrndiq_f32(vreinterpretq_f32_m128(a)));
     }
-#else
-    float *v_float = (float *) &a;
-
-    if (rounding == (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC) ||
-        (rounding == _MM_FROUND_CUR_DIRECTION &&
-         _MM_GET_ROUNDING_MODE() == _MM_ROUND_NEAREST)) {
-        uint32x4_t signmask = vdupq_n_u32(0x80000000);
-        float32x4_t half = vbslq_f32(signmask, vreinterpretq_f32_m128(a),
-                                     vdupq_n_f32(0.5f)); /* +/- 0.5 */
-        int32x4_t r_normal = vcvtq_s32_f32(vaddq_f32(
-            vreinterpretq_f32_m128(a), half)); /* round to integer: [a + 0.5]*/
-        int32x4_t r_trunc = vcvtq_s32_f32(
-            vreinterpretq_f32_m128(a)); /* truncate to integer: [a] */
-        int32x4_t plusone = vreinterpretq_s32_u32(vshrq_n_u32(
-            vreinterpretq_u32_s32(vnegq_s32(r_trunc)), 31)); /* 1 or 0 */
-        int32x4_t r_even = vbicq_s32(vaddq_s32(r_trunc, plusone),
-                                     vdupq_n_s32(1)); /* ([a] + {0,1}) & ~1 */
-        float32x4_t delta = vsubq_f32(
-            vreinterpretq_f32_m128(a),
-            vcvtq_f32_s32(r_trunc)); /* compute delta: delta = (a - [a]) */
-        uint32x4_t is_delta_half =
-            vceqq_f32(delta, half); /* delta == +/- 0.5 */
-        return vreinterpretq_m128_f32(
-            vcvtq_f32_s32(vbslq_s32(is_delta_half, r_even, r_normal)));
-    } else if (rounding == (_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC) ||
-               (rounding == _MM_FROUND_CUR_DIRECTION &&
-                _MM_GET_ROUNDING_MODE() == _MM_ROUND_DOWN)) {
-        return _mm_floor_ps(a);
-    } else if (rounding == (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC) ||
-               (rounding == _MM_FROUND_CUR_DIRECTION &&
-                _MM_GET_ROUNDING_MODE() == _MM_ROUND_UP)) {
-        return _mm_ceil_ps(a);
-    }
-    return _mm_set_ps(v_float[3] > 0 ? floorf(v_float[3]) : ceilf(v_float[3]),
-                      v_float[2] > 0 ? floorf(v_float[2]) : ceilf(v_float[2]),
-                      v_float[1] > 0 ? floorf(v_float[1]) : ceilf(v_float[1]),
-                      v_float[0] > 0 ? floorf(v_float[0]) : ceilf(v_float[0]));
-#endif
 }
 
 // Round the lower double-precision (64-bit) floating-point element in b using
@@ -6995,12 +6926,26 @@ FORCE_INLINE int _sse2neon_sido_negative(int res, int lb, int imm8, int bound)
 
 FORCE_INLINE int _sse2neon_clz(unsigned int x)
 {
+//#if defined(_MSC_VER) && !defined(__clang__)
+//    unsigned long cnt = 0;
+//    if (_BitScanReverse(&cnt, x))
+//        return 31 - cnt;
+//    return 32;
+//#else
     return x != 0 ? __builtin_clz(x) : 32;
+//#endif
 }
 
 FORCE_INLINE int _sse2neon_ctz(unsigned int x)
 {
+//#if defined(_MSC_VER) && !defined(__clang__)
+//    unsigned long cnt = 0;
+//    if (_BitScanForward(&cnt, x))
+//        return cnt;
+//    return 32;
+//#else
     return x != 0 ? __builtin_ctz(x) : 32;
+//#endif
 }
 
 FORCE_INLINE int _sse2neon_ctzll(unsigned long long x)
@@ -7325,6 +7270,8 @@ FORCE_INLINE uint64_t _mm_crc32_u64(uint64_t crc, uint64_t v)
     __asm__ __volatile__("crc32cx %w[c], %w[c], %x[v]\n\t"
                          : [c] "+r"(crc)
                          : [v] "r"(v));
+#elif (defined(_M_ARM64) && !defined(__clang__))
+    crc = __crc32cd((uint32_t) crc, v);
 #else
     crc = _mm_crc32_u32((uint32_t) (crc), v & 0xffffffff);
     crc = _mm_crc32_u32((uint32_t) (crc), (v >> 32) & 0xffffffff);
@@ -7393,6 +7340,7 @@ FORCE_INLINE uint32_t _mm_crc32_u8(uint32_t crc, uint8_t v)
 
 /* AES */
 
+// Some aes emulation for non arm64.  Can remove all this
 #if !defined(__ARM_FEATURE_CRYPTO) && (!defined(_M_ARM64) || defined(__clang__))
 /* clang-format off */
 #define SSE2NEON_AES_SBOX(w)                                           \
@@ -7484,6 +7432,14 @@ static const uint8_t _sse2neon_rsbox[256] = SSE2NEON_AES_RSBOX(SSE2NEON_AES_H0);
 #undef SSE2NEON_AES_H0
 
 /* x_time function and matrix multiply function */
+//#if !defined(__aarch64__) && !defined(_M_ARM64)
+//#define SSE2NEON_XT(x) (((x) << 1) ^ ((((x) >> 7) & 1) * 0x1b))
+//#define SSE2NEON_MULTIPLY(x, y)                                  \
+//    (((y & 1) * x) ^ ((y >> 1 & 1) * SSE2NEON_XT(x)) ^           \
+//     ((y >> 2 & 1) * SSE2NEON_XT(SSE2NEON_XT(x))) ^              \
+//     ((y >> 3 & 1) * SSE2NEON_XT(SSE2NEON_XT(SSE2NEON_XT(x)))) ^ \
+//     ((y >> 4 & 1) * SSE2NEON_XT(SSE2NEON_XT(SSE2NEON_XT(SSE2NEON_XT(x))))))
+//#endif
 
 // In the absence of crypto extensions, implement aesenc using regular NEON
 // intrinsics instead. See:
@@ -7526,7 +7482,6 @@ FORCE_INLINE __m128i _mm_aesenc_si128(__m128i a, __m128i RoundKey)
 
     /* add round key */
     return vreinterpretq_m128i_u8(w) ^ RoundKey;
-
 }
 
 // Perform one round of an AES decryption flow on data (state) in a using the
@@ -7569,7 +7524,6 @@ FORCE_INLINE __m128i _mm_aesdec_si128(__m128i a, __m128i RoundKey)
 
     // add round key
     return vreinterpretq_m128i_u8(w) ^ RoundKey;
-
 }
 
 // Perform the last round of an AES encryption flow on data (state) in a using
@@ -7596,7 +7550,6 @@ FORCE_INLINE __m128i _mm_aesenclast_si128(__m128i a, __m128i RoundKey)
 
     // add round key
     return vreinterpretq_m128i_u8(v) ^ RoundKey;
-
 }
 
 // Perform the last round of an AES decryption flow on data (state) in a using
@@ -7623,7 +7576,6 @@ FORCE_INLINE __m128i _mm_aesdeclast_si128(__m128i a, __m128i RoundKey)
 
     // add round key
     return vreinterpretq_m128i_u8(v) ^ RoundKey;
-
 }
 
 // Perform the InvMixColumns transformation on a and store the result in dst.
@@ -7648,7 +7600,6 @@ FORCE_INLINE __m128i _mm_aesimc_si128(__m128i a)
     w ^= (uint8x16_t) vrev32q_u16((uint16x8_t) v);
     w ^= vqtbl1q_u8(v ^ w, vld1q_u8(ror32by8));
     return vreinterpretq_m128i_u8(w);
-
 }
 
 // Assist in expanding the AES cipher key by computing steps towards generating
@@ -7673,13 +7624,14 @@ FORCE_INLINE __m128i _mm_aeskeygenassist_si128(__m128i a, const int rcon)
     uint32x4_t ror_xor_v = veorq_u32(ror_v, vdupq_n_u32(rcon));
 
     return vreinterpretq_m128i_u32(vtrn2q_u32(v_u32, ror_xor_v));
-
 }
 #undef SSE2NEON_AES_SBOX
 #undef SSE2NEON_AES_RSBOX
 
+//#if defined(__aarch64__)
 #undef SSE2NEON_XT
 #undef SSE2NEON_MULTIPLY
+//#endif
 
 #else /* __ARM_FEATURE_CRYPTO */
 // Implements equivalent of 'aesenc' by combining AESE (with an empty key) and
@@ -7741,6 +7693,7 @@ FORCE_INLINE __m128i _mm_aeskeygenassist_si128(__m128i a, const int rcon)
     // AESE does ShiftRows and SubBytes on A
     uint8x16_t u8 = vaeseq_u8(vreinterpretq_u8_m128i(a), vdupq_n_u8(0));
 
+//#if !defined(_MSC_VER) || defined(__clang__)
     uint8x16_t dest = {
         // Undo ShiftRows step from AESE and extract X1 and X3
         u8[0x4], u8[0x1], u8[0xE], u8[0xB],  // SubBytes(X1)
@@ -7750,6 +7703,33 @@ FORCE_INLINE __m128i _mm_aeskeygenassist_si128(__m128i a, const int rcon)
     };
     uint32x4_t r = {0, (unsigned) rcon, 0, (unsigned) rcon};
     return vreinterpretq_m128i_u8(dest) ^ vreinterpretq_m128i_u32(r);
+//#else
+//    // We have to do this hack because MSVC is strictly adhering to the CPP
+//    // standard, in particular C++03 8.5.1 sub-section 15, which states that
+//    // unions must be initialized by their first member type.
+//
+//    // As per the Windows ARM64 ABI, it is always little endian, so this works
+//    __n128 dest{
+//        ((uint64_t) u8.n128_u8[0x4] << 0) | ((uint64_t) u8.n128_u8[0x1] << 8) |
+//            ((uint64_t) u8.n128_u8[0xE] << 16) |
+//            ((uint64_t) u8.n128_u8[0xB] << 24) |
+//            ((uint64_t) u8.n128_u8[0x1] << 32) |
+//            ((uint64_t) u8.n128_u8[0xE] << 40) |
+//            ((uint64_t) u8.n128_u8[0xB] << 48) |
+//            ((uint64_t) u8.n128_u8[0x4] << 56),
+//        ((uint64_t) u8.n128_u8[0xC] << 0) | ((uint64_t) u8.n128_u8[0x9] << 8) |
+//            ((uint64_t) u8.n128_u8[0x6] << 16) |
+//            ((uint64_t) u8.n128_u8[0x3] << 24) |
+//            ((uint64_t) u8.n128_u8[0x9] << 32) |
+//            ((uint64_t) u8.n128_u8[0x6] << 40) |
+//            ((uint64_t) u8.n128_u8[0x3] << 48) |
+//            ((uint64_t) u8.n128_u8[0xC] << 56)};
+//
+//    dest.n128_u32[1] = dest.n128_u32[1] ^ rcon;
+//    dest.n128_u32[3] = dest.n128_u32[3] ^ rcon;
+//
+//    return dest;
+//#endif
 }
 #endif
 
@@ -7849,14 +7829,23 @@ FORCE_INLINE uint64_t _rdtsc(void)
      * bits wide and it is attributed with the flag 'cap_user_time_short'
      * is true.
      */
+//#if defined(_MSC_VER) && !defined(__clang__)
+//    val = _ReadStatusReg(ARM64_SYSREG(3, 3, 14, 0, 2));
+//#else
     __asm__ __volatile__("mrs %0, cntvct_el0" : "=r"(val));
+//#endif
 
     return val;
 }
 
+//#if defined(__GNUC__) || defined(__clang__)
 #pragma pop_macro("ALIGN_STRUCT")
 #pragma pop_macro("FORCE_INLINE")
 #pragma pop_macro("FORCE_INLINE_OPTNONE")
+//#endif
 
+//#if defined(__GNUC__) && !defined(__clang__)
+#pragma GCC pop_options
+//#endif
 
 #endif
diff --git a/libkram/vectormath/sse2neon.h b/libkram/vectormath/sse2neon.h
index a646400e..f3f7fb20 100644
--- a/libkram/vectormath/sse2neon.h
+++ b/libkram/vectormath/sse2neon.h
@@ -1,3 +1,5 @@
+#pragma once
+
 #ifndef SSE2NEON_H
 #define SSE2NEON_H
 
@@ -64,18 +66,18 @@
  * x86 SSE. (e.g. would solve a hole or NaN pixel in the rendering result)
  */
 /* _mm_min|max_ps|ss|pd|sd */
-#ifndef SSE2NEON_PRECISE_MINMAX
-#define SSE2NEON_PRECISE_MINMAX (0)
-#endif
-/* _mm_rcp_ps */
-#ifndef SSE2NEON_PRECISE_DIV
-#define SSE2NEON_PRECISE_DIV (0)
-#endif
-/* _mm_sqrt_ps and _mm_rsqrt_ps */
-#ifndef SSE2NEON_PRECISE_SQRT
-#define SSE2NEON_PRECISE_SQRT (0)
-#endif
-/* _mm_dp_pd */
+//#ifndef SSE2NEON_PRECISE_MINMAX
+//#define SSE2NEON_PRECISE_MINMAX (0)
+//#endif
+///* _mm_rcp_ps */
+//#ifndef SSE2NEON_PRECISE_DIV
+//#define SSE2NEON_PRECISE_DIV (0)
+//#endif
+///* _mm_sqrt_ps and _mm_rsqrt_ps */
+//#ifndef SSE2NEON_PRECISE_SQRT
+//#define SSE2NEON_PRECISE_SQRT (0)
+//#endif
+///* _mm_dp_pd */
 #ifndef SSE2NEON_PRECISE_DP
 #define SSE2NEON_PRECISE_DP (0)
 #endif
@@ -83,33 +85,33 @@
 /* Enable inclusion of windows.h on MSVC platforms
  * This makes _mm_clflush functional on windows, as there is no builtin.
  */
-#ifndef SSE2NEON_INCLUDE_WINDOWS_H
-#define SSE2NEON_INCLUDE_WINDOWS_H (0)
-#endif
+//#ifndef SSE2NEON_INCLUDE_WINDOWS_H
+//#define SSE2NEON_INCLUDE_WINDOWS_H (0)
+//#endif
 
 /* compiler specific definitions */
-#if defined(__GNUC__) || defined(__clang__)
+//#if defined(__GNUC__) || defined(__clang__)
 #pragma push_macro("FORCE_INLINE")
 #pragma push_macro("ALIGN_STRUCT")
 #define FORCE_INLINE static inline __attribute__((always_inline))
 #define ALIGN_STRUCT(x) __attribute__((aligned(x)))
 #define _sse2neon_likely(x) __builtin_expect(!!(x), 1)
 #define _sse2neon_unlikely(x) __builtin_expect(!!(x), 0)
-#elif defined(_MSC_VER)
-#if _MSVC_TRADITIONAL
-#error Using the traditional MSVC preprocessor is not supported! Use /Zc:preprocessor instead.
-#endif
-#ifndef FORCE_INLINE
-#define FORCE_INLINE static inline
-#endif
-#ifndef ALIGN_STRUCT
-#define ALIGN_STRUCT(x) __declspec(align(x))
-#endif
-#define _sse2neon_likely(x) (x)
-#define _sse2neon_unlikely(x) (x)
-#else
-#pragma message("Macro name collisions may happen with unsupported compilers.")
-#endif
+//#elif defined(_MSC_VER)
+//#if _MSVC_TRADITIONAL
+//#error Using the traditional MSVC preprocessor is not supported! Use /Zc:preprocessor instead.
+//#endif
+//#ifndef FORCE_INLINE
+//#define FORCE_INLINE static inline
+//#endif
+//#ifndef ALIGN_STRUCT
+//#define ALIGN_STRUCT(x) __declspec(align(x))
+//#endif
+//#define _sse2neon_likely(x) (x)
+//#define _sse2neon_unlikely(x) (x)
+//#else
+//#pragma message("Macro name collisions may happen with unsupported compilers.")
+//#endif
 
 
 //#if defined(__GNUC__) && !defined(__clang__)
@@ -179,7 +181,7 @@ FORCE_INLINE int64_t sse2neon_recast_f64_s64(double f64)
 //#endif
 //#endif
 
-#if defined(__GNUC__) || defined(__clang__)
+//#if defined(__GNUC__) || defined(__clang__)
 #define _sse2neon_define0(type, s, body) \
     __extension__({                      \
         type _a = (s);                   \
@@ -196,13 +198,13 @@ FORCE_INLINE int64_t sse2neon_recast_f64_s64(double f64)
         body                                \
     })
 #define _sse2neon_return(ret) (ret)
-#else
-#define _sse2neon_define0(type, a, body) [=](type _a) { body }(a)
-#define _sse2neon_define1(type, a, body) [](type _a) { body }(a)
-#define _sse2neon_define2(type, a, b, body) \
-    [](type _a, type _b) { body }((a), (b))
-#define _sse2neon_return(ret) return ret
-#endif
+//#else
+//#define _sse2neon_define0(type, a, body) [=](type _a) { body }(a)
+//#define _sse2neon_define1(type, a, body) [](type _a) { body }(a)
+//#define _sse2neon_define2(type, a, b, body) \
+//    [](type _a, type _b) { body }((a), (b))
+//#define _sse2neon_return(ret) return ret
+//#endif
 
 #define _sse2neon_init(...) \
     {                       \
@@ -210,15 +212,15 @@ FORCE_INLINE int64_t sse2neon_recast_f64_s64(double f64)
     }
 
 /* Compiler barrier */
-#if defined(_MSC_VER) && !defined(__clang__)
-#define SSE2NEON_BARRIER() _ReadWriteBarrier()
-#else
+//#if defined(_MSC_VER) && !defined(__clang__)
+//#define SSE2NEON_BARRIER() _ReadWriteBarrier()
+//#else
 #define SSE2NEON_BARRIER()                     \
     do {                                       \
         __asm__ __volatile__("" ::: "memory"); \
         (void) 0;                              \
     } while (0)
-#endif
+//#endif
 
 /* Memory barriers
  * __atomic_thread_fence does not include a compiler barrier; instead,
@@ -280,36 +282,36 @@ FORCE_INLINE void _sse2neon_smp_mb(void)
 */
 
 #include <arm_neon.h>
-#if (!defined(__aarch64__) && !defined(_M_ARM64)) && (__ARM_ARCH == 8)
-#if defined __has_include && __has_include(<arm_acle.h>)
-#include <arm_acle.h>
-#endif
-#endif
+//#if (!defined(__aarch64__) && !defined(_M_ARM64)) && (__ARM_ARCH == 8)
+//#if defined __has_include && __has_include(<arm_acle.h>)
+//#include <arm_acle.h>
+//#endif
+//#endif
 
 /* Apple Silicon cache lines are double of what is commonly used by Intel, AMD
  * and other Arm microarchitectures use.
  * From sysctl -a on Apple M1:
  * hw.cachelinesize: 128
  */
-#if defined(__APPLE__) && (defined(__aarch64__) || defined(__arm64__))
+#if defined(__APPLE__)
 #define SSE2NEON_CACHELINE_SIZE 128
 #else
 #define SSE2NEON_CACHELINE_SIZE 64
 #endif
 
 /* Rounding functions require either Aarch64 instructions or libm fallback */
-#if !defined(__aarch64__) && !defined(_M_ARM64)
-#include <math.h>
-#endif
+//#if !defined(__aarch64__) && !defined(_M_ARM64)
+//#include <math.h>
+//#endif
 
 /* On ARMv7, some registers, such as PMUSERENR and PMCCNTR, are read-only
  * or even not accessible in user mode.
  * To write or access to these registers in user mode,
  * we have to perform syscall instead.
  */
-#if (!defined(__aarch64__) && !defined(_M_ARM64))
-#include <sys/time.h>
-#endif
+//#if (!defined(__aarch64__) && !defined(_M_ARM64))
+//#include <sys/time.h>
+//#endif
 
 /* "__has_builtin" can be used to query support for built-in functions
  * provided by gcc/clang and other compilers that support it.
@@ -407,11 +409,11 @@ typedef float32x4_t __m128; /* 128-bit vector containing 4 floats */
 // On ARM 32-bit architecture, the float64x2_t is not supported.
 // The data type __m128d should be represented in a different way for related
 // intrinsic conversion.
-#if defined(__aarch64__) || defined(_M_ARM64)
+//#if defined(__aarch64__) || defined(_M_ARM64)
 typedef float64x2_t __m128d; /* 128-bit vector containing 2 doubles */
-#else
-typedef float32x4_t __m128d;
-#endif
+//#else
+//typedef float32x4_t __m128d;
+//#endif
 typedef int64x2_t __m128i; /* 128-bit vector containing integers */
 
 // Some intrinsics operate on unaligned data types.
@@ -508,7 +510,6 @@ typedef int64_t ALIGN_STRUCT(1) unaligned_int64_t;
 
 #define vreinterpret_f32_m64(x) vreinterpret_f32_s64(x)
 
-#if defined(__aarch64__) || defined(_M_ARM64)
 #define vreinterpretq_m128d_s32(x) vreinterpretq_f64_s32(x)
 #define vreinterpretq_m128d_s64(x) vreinterpretq_f64_s64(x)
 
@@ -524,22 +525,7 @@ typedef int64_t ALIGN_STRUCT(1) unaligned_int64_t;
 
 #define vreinterpretq_f64_m128d(x) (x)
 #define vreinterpretq_f32_m128d(x) vreinterpretq_f32_f64(x)
-#else
-#define vreinterpretq_m128d_s32(x) vreinterpretq_f32_s32(x)
-#define vreinterpretq_m128d_s64(x) vreinterpretq_f32_s64(x)
-
-#define vreinterpretq_m128d_u32(x) vreinterpretq_f32_u32(x)
-#define vreinterpretq_m128d_u64(x) vreinterpretq_f32_u64(x)
-
-#define vreinterpretq_m128d_f32(x) (x)
 
-#define vreinterpretq_s64_m128d(x) vreinterpretq_s64_f32(x)
-
-#define vreinterpretq_u32_m128d(x) vreinterpretq_u32_f32(x)
-#define vreinterpretq_u64_m128d(x) vreinterpretq_u64_f32(x)
-
-#define vreinterpretq_f32_m128d(x) (x)
-#endif
 
 // A struct is defined in this header file called 'SIMDVec' which can be used
 // by applications which attempt to access the contents of an __m128 struct
@@ -616,47 +602,20 @@ FORCE_INLINE_OPTNONE __m128 _mm_round_ps(__m128, int);
 // SSE4.2
 FORCE_INLINE uint32_t _mm_crc32_u8(uint32_t, uint8_t);
 
-/* Backwards compatibility for compilers with lack of specific type support */
 
-// Older gcc does not define vld1q_u8_x4 type
-//#if defined(__GNUC__) && !defined(__clang__) &&                        \
-//    ((__GNUC__ <= 13 && defined(__arm__)) ||                           \
-//     (__GNUC__ == 10 && __GNUC_MINOR__ < 3 && defined(__aarch64__)) || \
-//     (__GNUC__ <= 9 && defined(__aarch64__)))
-//FORCE_INLINE uint8x16x4_t _sse2neon_vld1q_u8_x4(const uint8_t *p)
-//{
-//    uint8x16x4_t ret;
-//    ret.val[0] = vld1q_u8(p + 0);
-//    ret.val[1] = vld1q_u8(p + 16);
-//    ret.val[2] = vld1q_u8(p + 32);
-//    ret.val[3] = vld1q_u8(p + 48);
-//    return ret;
-//}
-//#else
 // Wraps vld1q_u8_x4
 FORCE_INLINE uint8x16x4_t _sse2neon_vld1q_u8_x4(const uint8_t *p)
 {
     return vld1q_u8_x4(p);
 }
-//#endif
 
-#if !defined(__aarch64__) && !defined(_M_ARM64)
-/* emulate vaddv u8 variant */
-FORCE_INLINE uint8_t _sse2neon_vaddv_u8(uint8x8_t v8)
-{
-    const uint64x1_t v1 = vpaddl_u32(vpaddl_u16(vpaddl_u8(v8)));
-    return vget_lane_u8(vreinterpret_u8_u64(v1), 0);
-}
-#else
 // Wraps vaddv_u8
 FORCE_INLINE uint8_t _sse2neon_vaddv_u8(uint8x8_t v8)
 {
     return vaddv_u8(v8);
 }
-#endif
 
-#if !defined(__aarch64__) && !defined(_M_ARM64)
-/* emulate vaddvq u8 variant */
+// emulate vaddvq u8 variant
 FORCE_INLINE uint8_t _sse2neon_vaddvq_u8(uint8x16_t a)
 {
     uint8x8_t tmp = vpadd_u8(vget_low_u8(a), vget_high_u8(a));
@@ -665,31 +624,12 @@ FORCE_INLINE uint8_t _sse2neon_vaddvq_u8(uint8x16_t a)
         res += tmp[i];
     return res;
 }
-#else
-// Wraps vaddvq_u8
-FORCE_INLINE uint8_t _sse2neon_vaddvq_u8(uint8x16_t a)
-{
-    return vaddvq_u8(a);
-}
-#endif
 
-#if !defined(__aarch64__) && !defined(_M_ARM64)
-/* emulate vaddvq u16 variant */
-FORCE_INLINE uint16_t _sse2neon_vaddvq_u16(uint16x8_t a)
-{
-    uint32x4_t m = vpaddlq_u16(a);
-    uint64x2_t n = vpaddlq_u32(m);
-    uint64x1_t o = vget_low_u64(n) + vget_high_u64(n);
-
-    return vget_lane_u32((uint32x2_t) o, 0);
-}
-#else
 // Wraps vaddvq_u16
 FORCE_INLINE uint16_t _sse2neon_vaddvq_u16(uint16x8_t a)
 {
     return vaddvq_u16(a);
 }
-#endif
 
 /* Function Naming Conventions
  * The naming convention of SSE intrinsics is straightforward. A generic SSE
@@ -743,9 +683,7 @@ typedef struct {
     uint8_t bit23 : 1;
     uint8_t bit24 : 1;
     uint8_t res2 : 7;
-#if defined(__aarch64__) || defined(_M_ARM64)
     uint32_t res3;
-#endif
 } fpcr_bitfield;
 
 // Takes the upper 64 bits of a and places it in the low end of the result
@@ -895,12 +833,12 @@ FORCE_INLINE uint64x2_t _sse2neon_vmull_p64(uint64x1_t _a, uint64x1_t _b)
 {
     poly64_t a = vget_lane_p64(vreinterpret_p64_u64(_a), 0);
     poly64_t b = vget_lane_p64(vreinterpret_p64_u64(_b), 0);
-#if defined(_MSC_VER) && !defined(__clang__)
-    __n64 a1 = {a}, b1 = {b};
-    return vreinterpretq_u64_p128(vmull_p64(a1, b1));
-#else
+//#if defined(_MSC_VER) && !defined(__clang__)
+//    __n64 a1 = {a}, b1 = {b};
+//    return vreinterpretq_u64_p128(vmull_p64(a1, b1));
+//#else
     return vreinterpretq_u64_p128(vmull_p64(a, b));
-#endif
+//#endif
 }
 #else  // ARMv7 polyfill
 // ARMv7/some A64 lacks vmull_p64, but it has vmull_p8.
@@ -951,7 +889,6 @@ static uint64x2_t _sse2neon_vmull_p64(uint64x1_t _a, uint64x1_t _b)
 
     // Interleave. Using vzip1 and vzip2 prevents Clang from emitting TBL
     // instructions.
-#if defined(__aarch64__)
     uint8x16_t lm_p0 = vreinterpretq_u8_u64(
         vzip1q_u64(vreinterpretq_u64_u8(l), vreinterpretq_u64_u8(m)));
     uint8x16_t lm_p1 = vreinterpretq_u8_u64(
@@ -960,12 +897,7 @@ static uint64x2_t _sse2neon_vmull_p64(uint64x1_t _a, uint64x1_t _b)
         vzip1q_u64(vreinterpretq_u64_u8(n), vreinterpretq_u64_u8(k)));
     uint8x16_t nk_p1 = vreinterpretq_u8_u64(
         vzip2q_u64(vreinterpretq_u64_u8(n), vreinterpretq_u64_u8(k)));
-#else
-    uint8x16_t lm_p0 = vcombine_u8(vget_low_u8(l), vget_low_u8(m));
-    uint8x16_t lm_p1 = vcombine_u8(vget_high_u8(l), vget_high_u8(m));
-    uint8x16_t nk_p0 = vcombine_u8(vget_low_u8(n), vget_low_u8(k));
-    uint8x16_t nk_p1 = vcombine_u8(vget_high_u8(n), vget_high_u8(k));
-#endif
+
     // t0 = (L) (P0 + P1) << 8
     // t1 = (M) (P2 + P3) << 16
     uint8x16_t t0t1_tmp = veorq_u8(lm_p0, lm_p1);
@@ -979,7 +911,6 @@ static uint64x2_t _sse2neon_vmull_p64(uint64x1_t _a, uint64x1_t _b)
     uint8x16_t t2t3_l = veorq_u8(t2t3_tmp, t2t3_h);
 
     // De-interleave
-#if defined(__aarch64__)
     uint8x16_t t0 = vreinterpretq_u8_u64(
         vuzp1q_u64(vreinterpretq_u64_u8(t0t1_l), vreinterpretq_u64_u8(t0t1_h)));
     uint8x16_t t1 = vreinterpretq_u8_u64(
@@ -988,12 +919,7 @@ static uint64x2_t _sse2neon_vmull_p64(uint64x1_t _a, uint64x1_t _b)
         vuzp1q_u64(vreinterpretq_u64_u8(t2t3_l), vreinterpretq_u64_u8(t2t3_h)));
     uint8x16_t t3 = vreinterpretq_u8_u64(
         vuzp2q_u64(vreinterpretq_u64_u8(t2t3_l), vreinterpretq_u64_u8(t2t3_h)));
-#else
-    uint8x16_t t1 = vcombine_u8(vget_high_u8(t0t1_l), vget_high_u8(t0t1_h));
-    uint8x16_t t0 = vcombine_u8(vget_low_u8(t0t1_l), vget_low_u8(t0t1_h));
-    uint8x16_t t3 = vcombine_u8(vget_high_u8(t2t3_l), vget_high_u8(t2t3_h));
-    uint8x16_t t2 = vcombine_u8(vget_low_u8(t2t3_l), vget_low_u8(t2t3_h));
-#endif
+
     // Shift the cross products
     uint8x16_t t0_shift = vextq_u8(t0, t0, 15);  // t0 << 8
     uint8x16_t t1_shift = vextq_u8(t1, t1, 14);  // t1 << 16
@@ -1112,14 +1038,8 @@ FORCE_INLINE __m128i _mm_shuffle_epi_3332(__m128i a)
     return vreinterpretq_m128i_s32(vcombine_s32(a32, a33));
 }
 
-#if defined(__aarch64__) || defined(_M_ARM64)
 #define _mm_shuffle_epi32_splat(a, imm) \
     vreinterpretq_m128i_s32(vdupq_laneq_s32(vreinterpretq_s32_m128i(a), (imm)))
-#else
-#define _mm_shuffle_epi32_splat(a, imm) \
-    vreinterpretq_m128i_s32(            \
-        vdupq_n_s32(vgetq_lane_s32(vreinterpretq_s32_m128i(a), (imm))))
-#endif
 
 // NEON does not support a general purpose permute intrinsic.
 // Shuffle single-precision (32-bit) floating-point elements in a using the
@@ -1552,14 +1472,8 @@ FORCE_INLINE __m128 _mm_cvt_pi2ps(__m128 a, __m64 b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_ps2pi
 FORCE_INLINE __m64 _mm_cvt_ps2pi(__m128 a)
 {
-#if (defined(__aarch64__) || defined(_M_ARM64)) || \
-    defined(__ARM_FEATURE_DIRECTED_ROUNDING)
     return vreinterpret_m64_s32(
         vget_low_s32(vcvtnq_s32_f32(vrndiq_f32(vreinterpretq_f32_m128(a)))));
-#else
-    return vreinterpret_m64_s32(vcvt_s32_f32(vget_low_f32(
-        vreinterpretq_f32_m128(_mm_round_ps(a, _MM_FROUND_CUR_DIRECTION)))));
-#endif
 }
 
 // Convert the signed 32-bit integer b to a single-precision (32-bit)
@@ -1577,15 +1491,9 @@ FORCE_INLINE __m128 _mm_cvt_si2ss(__m128 a, int b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_ss2si
 FORCE_INLINE int _mm_cvt_ss2si(__m128 a)
 {
-#if (defined(__aarch64__) || defined(_M_ARM64)) || \
-    defined(__ARM_FEATURE_DIRECTED_ROUNDING)
     return vgetq_lane_s32(vcvtnq_s32_f32(vrndiq_f32(vreinterpretq_f32_m128(a))),
                           0);
-#else
-    float32_t data = vgetq_lane_f32(
-        vreinterpretq_f32_m128(_mm_round_ps(a, _MM_FROUND_CUR_DIRECTION)), 0);
-    return (int32_t) data;
-#endif
+
 }
 
 // Convert packed 16-bit integers in a to packed single-precision (32-bit)
@@ -1708,14 +1616,7 @@ FORCE_INLINE float _mm_cvtss_f32(__m128 a)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtss_si64
 FORCE_INLINE int64_t _mm_cvtss_si64(__m128 a)
 {
-#if (defined(__aarch64__) || defined(_M_ARM64)) || \
-    defined(__ARM_FEATURE_DIRECTED_ROUNDING)
     return (int64_t) vgetq_lane_f32(vrndiq_f32(vreinterpretq_f32_m128(a)), 0);
-#else
-    float32_t data = vgetq_lane_f32(
-        vreinterpretq_f32_m128(_mm_round_ps(a, _MM_FROUND_CUR_DIRECTION)), 0);
-    return (int64_t) data;
-#endif
 }
 
 // Convert packed single-precision (32-bit) floating-point elements in a to
@@ -1761,16 +1662,8 @@ FORCE_INLINE int64_t _mm_cvttss_si64(__m128 a)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_div_ps
 FORCE_INLINE __m128 _mm_div_ps(__m128 a, __m128 b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128_f32(
         vdivq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
-#else
-    float32x4_t recip = vrecpeq_f32(vreinterpretq_f32_m128(b));
-    recip = vmulq_f32(recip, vrecpsq_f32(recip, vreinterpretq_f32_m128(b)));
-    // Additional Netwon-Raphson iteration for accuracy
-    recip = vmulq_f32(recip, vrecpsq_f32(recip, vreinterpretq_f32_m128(b)));
-    return vreinterpretq_m128_f32(vmulq_f32(vreinterpretq_f32_m128(a), recip));
-#endif
 }
 
 // Divide the lower single-precision (32-bit) floating-point element in a by the
@@ -1806,21 +1699,21 @@ FORCE_INLINE __m128 _mm_div_ss(__m128 a, __m128 b)
 FORCE_INLINE uint64_t _sse2neon_get_fpcr(void)
 {
     uint64_t value;
-#if defined(_MSC_VER) && !defined(__clang__)
-    value = _ReadStatusReg(ARM64_FPCR);
-#else
+//#if defined(_MSC_VER) && !defined(__clang__)
+//    value = _ReadStatusReg(ARM64_FPCR);
+//#else
     __asm__ __volatile__("mrs %0, FPCR" : "=r"(value)); /* read */
-#endif
+//#endif
     return value;
 }
 
 FORCE_INLINE void _sse2neon_set_fpcr(uint64_t value)
 {
-#if defined(_MSC_VER) && !defined(__clang__)
-    _WriteStatusReg(ARM64_FPCR, value);
-#else
+//#if defined(_MSC_VER) && !defined(__clang__)
+//    _WriteStatusReg(ARM64_FPCR, value);
+//#else
     __asm__ __volatile__("msr FPCR, %0" ::"r"(value));  /* write */
-#endif
+//#endif
 }
 
 // Macro: Get the flush zero bits from the MXCSR control and status register.
@@ -1831,18 +1724,10 @@ FORCE_INLINE unsigned int _sse2neon_mm_get_flush_zero_mode(void)
 {
     union {
         fpcr_bitfield field;
-#if defined(__aarch64__) || defined(_M_ARM64)
         uint64_t value;
-#else
-        uint32_t value;
-#endif
     } r;
 
-#if defined(__aarch64__) || defined(_M_ARM64)
     r.value = _sse2neon_get_fpcr();
-#else
-    __asm__ __volatile__("vmrs %0, FPSCR" : "=r"(r.value)); /* read */
-#endif
 
     return r.field.bit24 ? _MM_FLUSH_ZERO_ON : _MM_FLUSH_ZERO_OFF;
 }
@@ -1855,18 +1740,10 @@ FORCE_INLINE unsigned int _MM_GET_ROUNDING_MODE(void)
 {
     union {
         fpcr_bitfield field;
-#if defined(__aarch64__) || defined(_M_ARM64)
         uint64_t value;
-#else
-        uint32_t value;
-#endif
     } r;
 
-#if defined(__aarch64__) || defined(_M_ARM64)
     r.value = _sse2neon_get_fpcr();
-#else
-    __asm__ __volatile__("vmrs %0, FPSCR" : "=r"(r.value)); /* read */
-#endif
 
     if (r.field.bit22) {
         return r.field.bit23 ? _MM_ROUND_TOWARD_ZERO : _MM_ROUND_UP;
@@ -2029,18 +1906,8 @@ FORCE_INLINE __m64 _mm_max_pi16(__m64 a, __m64 b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_ps
 FORCE_INLINE __m128 _mm_max_ps(__m128 a, __m128 b)
 {
-#if SSE2NEON_PRECISE_MINMAX
-    return vreinterpretq_m128_f32(
-        vmaxmq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
-    
-    // no, use single call
-    //float32x4_t _a = vreinterpretq_f32_m128(a);
-    //float32x4_t _b = vreinterpretq_f32_m128(b);
-    //return vreinterpretq_m128_f32(vbslq_f32(vcgtq_f32(_a, _b), _a, _b));
-#else
     return vreinterpretq_m128_f32(
-        vmaxq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
-#endif
+        vmaxnmq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
 }
 
 // Compare packed unsigned 8-bit integers in a and b, and store packed maximum
@@ -2081,18 +1948,8 @@ FORCE_INLINE __m64 _mm_min_pi16(__m64 a, __m64 b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_ps
 FORCE_INLINE __m128 _mm_min_ps(__m128 a, __m128 b)
 {
-#if SSE2NEON_PRECISE_MINMAX
-    return vreinterpretq_m128_f32(
-        vminmq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
-    
-    // no use single call
-    //float32x4_t _a = vreinterpretq_f32_m128(a);
-    //float32x4_t _b = vreinterpretq_f32_m128(b);
-    //return vreinterpretq_m128_f32(vbslq_f32(vcltq_f32(_a, _b), _a, _b));
-#else
     return vreinterpretq_m128_f32(
-        vminq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
-#endif
+        vminnmq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
 }
 
 // Compare packed unsigned 8-bit integers in a and b, and store packed minimum
@@ -2134,14 +1991,8 @@ FORCE_INLINE __m128 _mm_move_ss(__m128 a, __m128 b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movehl_ps
 FORCE_INLINE __m128 _mm_movehl_ps(__m128 a, __m128 b)
 {
-#if defined(aarch64__)
     return vreinterpretq_m128_u64(
         vzip2q_u64(vreinterpretq_u64_m128(b), vreinterpretq_u64_m128(a)));
-#else
-    float32x2_t a32 = vget_high_f32(vreinterpretq_f32_m128(a));
-    float32x2_t b32 = vget_high_f32(vreinterpretq_f32_m128(b));
-    return vreinterpretq_m128_f32(vcombine_f32(b32, a32));
-#endif
 }
 
 // Move the lower 2 single-precision (32-bit) floating-point elements from b to
@@ -2161,19 +2012,9 @@ FORCE_INLINE __m128 _mm_movelh_ps(__m128 __A, __m128 __B)
 FORCE_INLINE int _mm_movemask_pi8(__m64 a)
 {
     uint8x8_t input = vreinterpret_u8_m64(a);
-#if defined(__aarch64__) || defined(_M_ARM64)
     static const int8_t shift[8] = {0, 1, 2, 3, 4, 5, 6, 7};
     uint8x8_t tmp = vshr_n_u8(input, 7);
     return vaddv_u8(vshl_u8(tmp, vld1_s8(shift)));
-#else
-    // Refer the implementation of `_mm_movemask_epi8`
-    uint16x4_t high_bits = vreinterpret_u16_u8(vshr_n_u8(input, 7));
-    uint32x2_t paired16 =
-        vreinterpret_u32_u16(vsra_n_u16(high_bits, high_bits, 7));
-    uint8x8_t paired32 =
-        vreinterpret_u8_u32(vsra_n_u32(paired16, paired16, 14));
-    return vget_lane_u8(paired32, 0) | ((int) vget_lane_u8(paired32, 4) << 4);
-#endif
 }
 
 // Set each bit of mask dst based on the most significant bit of the
@@ -2182,21 +2023,9 @@ FORCE_INLINE int _mm_movemask_pi8(__m64 a)
 FORCE_INLINE int _mm_movemask_ps(__m128 a)
 {
     uint32x4_t input = vreinterpretq_u32_m128(a);
-#if defined(__aarch64__) || defined(_M_ARM64)
     static const int32_t shift[4] = {0, 1, 2, 3};
     uint32x4_t tmp = vshrq_n_u32(input, 31);
     return vaddvq_u32(vshlq_u32(tmp, vld1q_s32(shift)));
-#else
-    // Uses the exact same method as _mm_movemask_epi8, see that for details.
-    // Shift out everything but the sign bits with a 32-bit unsigned shift
-    // right.
-    uint64x2_t high_bits = vreinterpretq_u64_u32(vshrq_n_u32(input, 31));
-    // Merge the two pairs together with a 64-bit unsigned shift right + add.
-    uint8x16_t paired =
-        vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31));
-    // Extract the result.
-    return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2);
-#endif
 }
 
 // Multiply packed single-precision (32-bit) floating-point elements in a and b,
@@ -2293,22 +2122,22 @@ FORCE_INLINE __m128 _mm_or_ps(__m128 a, __m128 b)
 FORCE_INLINE void _mm_prefetch(char const *p, int i)
 {
     (void) i;
-#if defined(_MSC_VER) && !defined(__clang__)
-    switch (i) {
-    case _MM_HINT_NTA:
-        __prefetch2(p, 1);
-        break;
-    case _MM_HINT_T0:
-        __prefetch2(p, 0);
-        break;
-    case _MM_HINT_T1:
-        __prefetch2(p, 2);
-        break;
-    case _MM_HINT_T2:
-        __prefetch2(p, 4);
-        break;
-    }
-#else
+//#if defined(_MSC_VER) && !defined(__clang__)
+//    switch (i) {
+//    case _MM_HINT_NTA:
+//        __prefetch2(p, 1);
+//        break;
+//    case _MM_HINT_T0:
+//        __prefetch2(p, 0);
+//        break;
+//    case _MM_HINT_T1:
+//        __prefetch2(p, 2);
+//        break;
+//    case _MM_HINT_T2:
+//        __prefetch2(p, 4);
+//        break;
+//    }
+//#else
     switch (i) {
     case _MM_HINT_NTA:
         __builtin_prefetch(p, 0, 0);
@@ -2323,7 +2152,7 @@ FORCE_INLINE void _mm_prefetch(char const *p, int i)
         __builtin_prefetch(p, 0, 1);
         break;
     }
-#endif
+//#endif
 }
 
 // Compute the absolute differences of packed unsigned 8-bit integers in a and
@@ -2338,19 +2167,29 @@ FORCE_INLINE void _mm_prefetch(char const *p, int i)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_m_pshufw
 #define _m_pshufw(a, imm) _mm_shuffle_pi16(a, imm)
 
+// Copy single-precision (32-bit) floating-point element a to the lower element
+// of dst, and zero the upper 3 elements.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_ss
+FORCE_INLINE __m128 _mm_set_ss(float a)
+{
+    return vreinterpretq_m128_f32(vsetq_lane_f32(a, vdupq_n_f32(0), 0));
+}
+
+// Broadcast single-precision (32-bit) floating-point value a to all elements of
+// dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_ps
+FORCE_INLINE __m128 _mm_set1_ps(float _w)
+{
+    return vreinterpretq_m128_f32(vdupq_n_f32(_w));
+}
+
 // Compute the approximate reciprocal of packed single-precision (32-bit)
 // floating-point elements in a, and store the results in dst. The maximum
 // relative error for this approximation is less than 1.5*2^-12.
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp_ps
 FORCE_INLINE __m128 _mm_rcp_ps(__m128 in)
 {
-    float32x4_t recip = vrecpeq_f32(vreinterpretq_f32_m128(in));
-    recip = vmulq_f32(recip, vrecpsq_f32(recip, vreinterpretq_f32_m128(in)));
-#if SSE2NEON_PRECISE_DIV
-    // Additional Netwon-Raphson iteration for accuracy
-    recip = vmulq_f32(recip, vrecpsq_f32(recip, vreinterpretq_f32_m128(in)));
-#endif
-    return vreinterpretq_m128_f32(recip);
+    return _mm_div_ps(_mm_set1_ps(1.0f), in);
 }
 
 // Compute the approximate reciprocal of the lower single-precision (32-bit)
@@ -2363,37 +2202,33 @@ FORCE_INLINE __m128 _mm_rcp_ss(__m128 a)
     return _mm_move_ss(a, _mm_rcp_ps(a));
 }
 
+// Compute the square root of packed single-precision (32-bit) floating-point
+// elements in a, and store the results in dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_ps
+FORCE_INLINE __m128 _mm_sqrt_ps(__m128 in)
+{
+    return vreinterpretq_m128_f32(vsqrtq_f32(vreinterpretq_f32_m128(in)));
+}
+
+// Compute the square root of the lower single-precision (32-bit) floating-point
+// element in a, store the result in the lower element of dst, and copy the
+// upper 3 packed elements from a to the upper elements of dst.
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_ss
+FORCE_INLINE __m128 _mm_sqrt_ss(__m128 in)
+{
+    float32_t value =
+        vgetq_lane_f32(vreinterpretq_f32_m128(_mm_sqrt_ps(in)), 0);
+    return vreinterpretq_m128_f32(
+        vsetq_lane_f32(value, vreinterpretq_f32_m128(in), 0));
+}
+
 // Compute the approximate reciprocal square root of packed single-precision
 // (32-bit) floating-point elements in a, and store the results in dst. The
 // maximum relative error for this approximation is less than 1.5*2^-12.
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt_ps
 FORCE_INLINE __m128 _mm_rsqrt_ps(__m128 in)
 {
-    float32x4_t out = vrsqrteq_f32(vreinterpretq_f32_m128(in));
-
-    // Generate masks for detecting whether input has any 0.0f/-0.0f
-    // (which becomes positive/negative infinity by IEEE-754 arithmetic rules).
-    const uint32x4_t pos_inf = vdupq_n_u32(0x7F800000);
-    const uint32x4_t neg_inf = vdupq_n_u32(0xFF800000);
-    const uint32x4_t has_pos_zero =
-        vceqq_u32(pos_inf, vreinterpretq_u32_f32(out));
-    const uint32x4_t has_neg_zero =
-        vceqq_u32(neg_inf, vreinterpretq_u32_f32(out));
-
-    out = vmulq_f32(
-        out, vrsqrtsq_f32(vmulq_f32(vreinterpretq_f32_m128(in), out), out));
-#if SSE2NEON_PRECISE_SQRT
-    // Additional Netwon-Raphson iteration for accuracy
-    out = vmulq_f32(
-        out, vrsqrtsq_f32(vmulq_f32(vreinterpretq_f32_m128(in), out), out));
-#endif
-
-    // Set output vector element to infinity/negative-infinity if
-    // the corresponding input vector element is 0.0f/-0.0f.
-    out = vbslq_f32(has_pos_zero, (float32x4_t) pos_inf, out);
-    out = vbslq_f32(has_neg_zero, (float32x4_t) neg_inf, out);
-
-    return vreinterpretq_m128_f32(out);
+    return _mm_rcp_ps(_mm_sqrt_ps(in));
 }
 
 // Compute the approximate reciprocal square root of the lower single-precision
@@ -2429,26 +2264,14 @@ FORCE_INLINE void _sse2neon_mm_set_flush_zero_mode(unsigned int flag)
     // regardless of the value of the FZ bit.
     union {
         fpcr_bitfield field;
-#if defined(__aarch64__) || defined(_M_ARM64)
         uint64_t value;
-#else
-        uint32_t value;
-#endif
     } r;
 
-#if defined(__aarch64__) || defined(_M_ARM64)
     r.value = _sse2neon_get_fpcr();
-#else
-    __asm__ __volatile__("vmrs %0, FPSCR" : "=r"(r.value)); /* read */
-#endif
 
     r.field.bit24 = (flag & _MM_FLUSH_ZERO_MASK) == _MM_FLUSH_ZERO_ON;
 
-#if defined(__aarch64__) || defined(_M_ARM64)
     _sse2neon_set_fpcr(r.value);
-#else
-    __asm__ __volatile__("vmsr FPSCR, %0" ::"r"(r));        /* write */
-#endif
 }
 
 // Set packed single-precision (32-bit) floating-point elements in dst with the
@@ -2477,18 +2300,10 @@ FORCE_INLINE_OPTNONE void _MM_SET_ROUNDING_MODE(int rounding)
 {
     union {
         fpcr_bitfield field;
-#if defined(__aarch64__) || defined(_M_ARM64)
         uint64_t value;
-#else
-        uint32_t value;
-#endif
     } r;
 
-#if defined(__aarch64__) || defined(_M_ARM64)
     r.value = _sse2neon_get_fpcr();
-#else
-    __asm__ __volatile__("vmrs %0, FPSCR" : "=r"(r.value)); /* read */
-#endif
 
     switch (rounding) {
     case _MM_ROUND_TOWARD_ZERO:
@@ -2508,27 +2323,7 @@ FORCE_INLINE_OPTNONE void _MM_SET_ROUNDING_MODE(int rounding)
         r.field.bit23 = 0;
     }
 
-#if defined(__aarch64__) || defined(_M_ARM64)
     _sse2neon_set_fpcr(r.value);
-#else
-    __asm__ __volatile__("vmsr FPSCR, %0" ::"r"(r));        /* write */
-#endif
-}
-
-// Copy single-precision (32-bit) floating-point element a to the lower element
-// of dst, and zero the upper 3 elements.
-// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_ss
-FORCE_INLINE __m128 _mm_set_ss(float a)
-{
-    return vreinterpretq_m128_f32(vsetq_lane_f32(a, vdupq_n_f32(0), 0));
-}
-
-// Broadcast single-precision (32-bit) floating-point value a to all elements of
-// dst.
-// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_ps
-FORCE_INLINE __m128 _mm_set1_ps(float _w)
-{
-    return vreinterpretq_m128_f32(vdupq_n_f32(_w));
 }
 
 // Set the MXCSR control and status register with the value in unsigned 32-bit
@@ -2567,28 +2362,28 @@ FORCE_INLINE __m128 _mm_setzero_ps(void)
 // Shuffle 16-bit integers in a using the control in imm8, and store the results
 // in dst.
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_pi16
-#ifdef _sse2neon_shuffle
+//#ifdef _sse2neon_shuffle
 #define _mm_shuffle_pi16(a, imm)                                       \
     vreinterpret_m64_s16(vshuffle_s16(                                 \
         vreinterpret_s16_m64(a), vreinterpret_s16_m64(a), (imm & 0x3), \
         ((imm >> 2) & 0x3), ((imm >> 4) & 0x3), ((imm >> 6) & 0x3)))
-#else
-#define _mm_shuffle_pi16(a, imm)                                              \
-    _sse2neon_define1(                                                        \
-        __m64, a, int16x4_t ret;                                              \
-        ret = vmov_n_s16(                                                     \
-            vget_lane_s16(vreinterpret_s16_m64(_a), (imm) & (0x3)));          \
-        ret = vset_lane_s16(                                                  \
-            vget_lane_s16(vreinterpret_s16_m64(_a), ((imm) >> 2) & 0x3), ret, \
-            1);                                                               \
-        ret = vset_lane_s16(                                                  \
-            vget_lane_s16(vreinterpret_s16_m64(_a), ((imm) >> 4) & 0x3), ret, \
-            2);                                                               \
-        ret = vset_lane_s16(                                                  \
-            vget_lane_s16(vreinterpret_s16_m64(_a), ((imm) >> 6) & 0x3), ret, \
-            3);                                                               \
-        _sse2neon_return(vreinterpret_m64_s16(ret));)
-#endif
+//#else
+//#define _mm_shuffle_pi16(a, imm)                                              \
+//    _sse2neon_define1(                                                        \
+//        __m64, a, int16x4_t ret;                                              \
+//        ret = vmov_n_s16(                                                     \
+//            vget_lane_s16(vreinterpret_s16_m64(_a), (imm) & (0x3)));          \
+//        ret = vset_lane_s16(                                                  \
+//            vget_lane_s16(vreinterpret_s16_m64(_a), ((imm) >> 2) & 0x3), ret, \
+//            1);                                                               \
+//        ret = vset_lane_s16(                                                  \
+//            vget_lane_s16(vreinterpret_s16_m64(_a), ((imm) >> 4) & 0x3), ret, \
+//            2);                                                               \
+//        ret = vset_lane_s16(                                                  \
+//            vget_lane_s16(vreinterpret_s16_m64(_a), ((imm) >> 6) & 0x3), ret, \
+//            3);                                                               \
+//        _sse2neon_return(vreinterpret_m64_s16(ret));)
+//#endif
 
 // Perform a serializing operation on all store-to-memory instructions that were
 // issued prior to this instruction. Guarantees that every store instruction
@@ -2623,7 +2418,7 @@ FORCE_INLINE void _mm_lfence(void)
 
 // FORCE_INLINE __m128 _mm_shuffle_ps(__m128 a, __m128 b, __constrange(0,255)
 // int imm)
-#ifdef _sse2neon_shuffle
+//#ifdef _sse2neon_shuffle
 #define _mm_shuffle_ps(a, b, imm)                                              \
     __extension__({                                                            \
         float32x4_t _input1 = vreinterpretq_f32_m128(a);                       \
@@ -2633,112 +2428,67 @@ FORCE_INLINE void _mm_lfence(void)
                           (((imm) >> 4) & 0x3) + 4, (((imm) >> 6) & 0x3) + 4); \
         vreinterpretq_m128_f32(_shuf);                                         \
     })
-#else  // generic
-#define _mm_shuffle_ps(a, b, imm)                            \
-    _sse2neon_define2(                                       \
-        __m128, a, b, __m128 ret; switch (imm) {             \
-            case _MM_SHUFFLE(1, 0, 3, 2):                    \
-                ret = _mm_shuffle_ps_1032(_a, _b);           \
-                break;                                       \
-            case _MM_SHUFFLE(2, 3, 0, 1):                    \
-                ret = _mm_shuffle_ps_2301(_a, _b);           \
-                break;                                       \
-            case _MM_SHUFFLE(0, 3, 2, 1):                    \
-                ret = _mm_shuffle_ps_0321(_a, _b);           \
-                break;                                       \
-            case _MM_SHUFFLE(2, 1, 0, 3):                    \
-                ret = _mm_shuffle_ps_2103(_a, _b);           \
-                break;                                       \
-            case _MM_SHUFFLE(1, 0, 1, 0):                    \
-                ret = _mm_movelh_ps(_a, _b);                 \
-                break;                                       \
-            case _MM_SHUFFLE(1, 0, 0, 1):                    \
-                ret = _mm_shuffle_ps_1001(_a, _b);           \
-                break;                                       \
-            case _MM_SHUFFLE(0, 1, 0, 1):                    \
-                ret = _mm_shuffle_ps_0101(_a, _b);           \
-                break;                                       \
-            case _MM_SHUFFLE(3, 2, 1, 0):                    \
-                ret = _mm_shuffle_ps_3210(_a, _b);           \
-                break;                                       \
-            case _MM_SHUFFLE(0, 0, 1, 1):                    \
-                ret = _mm_shuffle_ps_0011(_a, _b);           \
-                break;                                       \
-            case _MM_SHUFFLE(0, 0, 2, 2):                    \
-                ret = _mm_shuffle_ps_0022(_a, _b);           \
-                break;                                       \
-            case _MM_SHUFFLE(2, 2, 0, 0):                    \
-                ret = _mm_shuffle_ps_2200(_a, _b);           \
-                break;                                       \
-            case _MM_SHUFFLE(3, 2, 0, 2):                    \
-                ret = _mm_shuffle_ps_3202(_a, _b);           \
-                break;                                       \
-            case _MM_SHUFFLE(3, 2, 3, 2):                    \
-                ret = _mm_movehl_ps(_b, _a);                 \
-                break;                                       \
-            case _MM_SHUFFLE(1, 1, 3, 3):                    \
-                ret = _mm_shuffle_ps_1133(_a, _b);           \
-                break;                                       \
-            case _MM_SHUFFLE(2, 0, 1, 0):                    \
-                ret = _mm_shuffle_ps_2010(_a, _b);           \
-                break;                                       \
-            case _MM_SHUFFLE(2, 0, 0, 1):                    \
-                ret = _mm_shuffle_ps_2001(_a, _b);           \
-                break;                                       \
-            case _MM_SHUFFLE(2, 0, 3, 2):                    \
-                ret = _mm_shuffle_ps_2032(_a, _b);           \
-                break;                                       \
-            default:                                         \
-                ret = _mm_shuffle_ps_default(_a, _b, (imm)); \
-                break;                                       \
-        } _sse2neon_return(ret);)
-#endif
-
-// Compute the square root of packed single-precision (32-bit) floating-point
-// elements in a, and store the results in dst.
-// Due to ARMv7-A NEON's lack of a precise square root intrinsic, we implement
-// square root by multiplying input in with its reciprocal square root before
-// using the Newton-Raphson method to approximate the results.
-// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_ps
-FORCE_INLINE __m128 _mm_sqrt_ps(__m128 in)
-{
-#if (defined(__aarch64__) || defined(_M_ARM64)) && !SSE2NEON_PRECISE_SQRT
-    return vreinterpretq_m128_f32(vsqrtq_f32(vreinterpretq_f32_m128(in)));
-#else
-    float32x4_t recip = vrsqrteq_f32(vreinterpretq_f32_m128(in));
-
-    // Test for vrsqrteq_f32(0) -> positive infinity case.
-    // Change to zero, so that s * 1/sqrt(s) result is zero too.
-    const uint32x4_t pos_inf = vdupq_n_u32(0x7F800000);
-    const uint32x4_t div_by_zero =
-        vceqq_u32(pos_inf, vreinterpretq_u32_f32(recip));
-    recip = vreinterpretq_f32_u32(
-        vandq_u32(vmvnq_u32(div_by_zero), vreinterpretq_u32_f32(recip)));
-
-    recip = vmulq_f32(
-        vrsqrtsq_f32(vmulq_f32(recip, recip), vreinterpretq_f32_m128(in)),
-        recip);
-    // Additional Netwon-Raphson iteration for accuracy
-    recip = vmulq_f32(
-        vrsqrtsq_f32(vmulq_f32(recip, recip), vreinterpretq_f32_m128(in)),
-        recip);
-
-    // sqrt(s) = s * 1/sqrt(s)
-    return vreinterpretq_m128_f32(vmulq_f32(vreinterpretq_f32_m128(in), recip));
-#endif
-}
+//#else  // generic
+//#define _mm_shuffle_ps(a, b, imm)                            \
+//    _sse2neon_define2(                                       \
+//        __m128, a, b, __m128 ret; switch (imm) {             \
+//            case _MM_SHUFFLE(1, 0, 3, 2):                    \
+//                ret = _mm_shuffle_ps_1032(_a, _b);           \
+//                break;                                       \
+//            case _MM_SHUFFLE(2, 3, 0, 1):                    \
+//                ret = _mm_shuffle_ps_2301(_a, _b);           \
+//                break;                                       \
+//            case _MM_SHUFFLE(0, 3, 2, 1):                    \
+//                ret = _mm_shuffle_ps_0321(_a, _b);           \
+//                break;                                       \
+//            case _MM_SHUFFLE(2, 1, 0, 3):                    \
+//                ret = _mm_shuffle_ps_2103(_a, _b);           \
+//                break;                                       \
+//            case _MM_SHUFFLE(1, 0, 1, 0):                    \
+//                ret = _mm_movelh_ps(_a, _b);                 \
+//                break;                                       \
+//            case _MM_SHUFFLE(1, 0, 0, 1):                    \
+//                ret = _mm_shuffle_ps_1001(_a, _b);           \
+//                break;                                       \
+//            case _MM_SHUFFLE(0, 1, 0, 1):                    \
+//                ret = _mm_shuffle_ps_0101(_a, _b);           \
+//                break;                                       \
+//            case _MM_SHUFFLE(3, 2, 1, 0):                    \
+//                ret = _mm_shuffle_ps_3210(_a, _b);           \
+//                break;                                       \
+//            case _MM_SHUFFLE(0, 0, 1, 1):                    \
+//                ret = _mm_shuffle_ps_0011(_a, _b);           \
+//                break;                                       \
+//            case _MM_SHUFFLE(0, 0, 2, 2):                    \
+//                ret = _mm_shuffle_ps_0022(_a, _b);           \
+//                break;                                       \
+//            case _MM_SHUFFLE(2, 2, 0, 0):                    \
+//                ret = _mm_shuffle_ps_2200(_a, _b);           \
+//                break;                                       \
+//            case _MM_SHUFFLE(3, 2, 0, 2):                    \
+//                ret = _mm_shuffle_ps_3202(_a, _b);           \
+//                break;                                       \
+//            case _MM_SHUFFLE(3, 2, 3, 2):                    \
+//                ret = _mm_movehl_ps(_b, _a);                 \
+//                break;                                       \
+//            case _MM_SHUFFLE(1, 1, 3, 3):                    \
+//                ret = _mm_shuffle_ps_1133(_a, _b);           \
+//                break;                                       \
+//            case _MM_SHUFFLE(2, 0, 1, 0):                    \
+//                ret = _mm_shuffle_ps_2010(_a, _b);           \
+//                break;                                       \
+//            case _MM_SHUFFLE(2, 0, 0, 1):                    \
+//                ret = _mm_shuffle_ps_2001(_a, _b);           \
+//                break;                                       \
+//            case _MM_SHUFFLE(2, 0, 3, 2):                    \
+//                ret = _mm_shuffle_ps_2032(_a, _b);           \
+//                break;                                       \
+//            default:                                         \
+//                ret = _mm_shuffle_ps_default(_a, _b, (imm)); \
+//                break;                                       \
+//        } _sse2neon_return(ret);)
+//#endif
 
-// Compute the square root of the lower single-precision (32-bit) floating-point
-// element in a, store the result in the lower element of dst, and copy the
-// upper 3 packed elements from a to the upper elements of dst.
-// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_ss
-FORCE_INLINE __m128 _mm_sqrt_ss(__m128 in)
-{
-    float32_t value =
-        vgetq_lane_f32(vreinterpretq_f32_m128(_mm_sqrt_ps(in)), 0);
-    return vreinterpretq_m128_f32(
-        vsetq_lane_f32(value, vreinterpretq_f32_m128(in), 0));
-}
 
 // Store 128-bits (composed of 4 packed single-precision (32-bit) floating-point
 // elements) from a into memory. mem_addr must be aligned on a 16-byte boundary
@@ -2934,15 +2684,8 @@ FORCE_INLINE __m128 _mm_undefined_ps(void)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_ps
 FORCE_INLINE __m128 _mm_unpackhi_ps(__m128 a, __m128 b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128_f32(
         vzip2q_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
-#else
-    float32x2_t a1 = vget_high_f32(vreinterpretq_f32_m128(a));
-    float32x2_t b1 = vget_high_f32(vreinterpretq_f32_m128(b));
-    float32x2x2_t result = vzip_f32(a1, b1);
-    return vreinterpretq_m128_f32(vcombine_f32(result.val[0], result.val[1]));
-#endif
 }
 
 // Unpack and interleave single-precision (32-bit) floating-point elements from
@@ -2950,15 +2693,8 @@ FORCE_INLINE __m128 _mm_unpackhi_ps(__m128 a, __m128 b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_ps
 FORCE_INLINE __m128 _mm_unpacklo_ps(__m128 a, __m128 b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128_f32(
         vzip1q_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
-#else
-    float32x2_t a1 = vget_low_f32(vreinterpretq_f32_m128(a));
-    float32x2_t b1 = vget_low_f32(vreinterpretq_f32_m128(b));
-    float32x2x2_t result = vzip_f32(a1, b1);
-    return vreinterpretq_m128_f32(vcombine_f32(result.val[0], result.val[1]));
-#endif
 }
 
 // Compute the bitwise XOR of packed single-precision (32-bit) floating-point
@@ -3009,23 +2745,8 @@ FORCE_INLINE __m128i _mm_add_epi8(__m128i a, __m128i b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_pd
 FORCE_INLINE __m128d _mm_add_pd(__m128d a, __m128d b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128d_f64(
         vaddq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
-#else
-    double a0 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
-    double a1 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
-    double b0 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
-    double b1 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 1));
-    double c[2];
-    c[0] = a0 + b0;
-    c[1] = a1 + b1;
-    return vld1q_f32((float32_t *) c);
-#endif
 }
 
 // Add the lower double-precision (64-bit) floating-point element in a and b,
@@ -3034,18 +2755,7 @@ FORCE_INLINE __m128d _mm_add_pd(__m128d a, __m128d b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_sd
 FORCE_INLINE __m128d _mm_add_sd(__m128d a, __m128d b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return _mm_move_sd(a, _mm_add_pd(a, b));
-#else
-    double a0, a1, b0;
-    a0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
-    a1 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
-    b0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
-    double c[2];
-    c[0] = a0 + b0;
-    c[1] = a1;
-    return vld1q_f32((float32_t *) c);
-#endif
 }
 
 // Add 64-bit integers a and b, and store the result in dst.
@@ -3195,11 +2905,7 @@ FORCE_INLINE __m128i _mm_castps_si128(__m128 a)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castsi128_pd
 FORCE_INLINE __m128d _mm_castsi128_pd(__m128i a)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128d_f64(vreinterpretq_f64_m128i(a));
-#else
-    return vreinterpretq_m128d_f32(vreinterpretq_f32_m128i(a));
-#endif
 }
 
 // Cast vector of type __m128i to type __m128. This intrinsic is only used for
@@ -3267,16 +2973,8 @@ FORCE_INLINE __m128i _mm_cmpeq_epi8(__m128i a, __m128i b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_pd
 FORCE_INLINE __m128d _mm_cmpeq_pd(__m128d a, __m128d b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128d_u64(
         vceqq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
-#else
-    // (a == b) -> (a_lo == b_lo) && (a_hi == b_hi)
-    uint32x4_t cmp =
-        vceqq_u32(vreinterpretq_u32_m128d(a), vreinterpretq_u32_m128d(b));
-    uint32x4_t swapped = vrev64q_u32(cmp);
-    return vreinterpretq_m128d_u32(vandq_u32(cmp, swapped));
-#endif
 }
 
 // Compare the lower double-precision (64-bit) floating-point elements in a and
@@ -3293,24 +2991,8 @@ FORCE_INLINE __m128d _mm_cmpeq_sd(__m128d a, __m128d b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_pd
 FORCE_INLINE __m128d _mm_cmpge_pd(__m128d a, __m128d b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128d_u64(
         vcgeq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
-#else
-    double a0 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
-    double a1 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
-    double b0 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
-    double b1 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 1));
-    uint64_t d[2];
-    d[0] = a0 >= b0 ? ~UINT64_C(0) : UINT64_C(0);
-    d[1] = a1 >= b1 ? ~UINT64_C(0) : UINT64_C(0);
-
-    return vreinterpretq_m128d_u64(vld1q_u64(d));
-#endif
 }
 
 // Compare the lower double-precision (64-bit) floating-point elements in a and
@@ -3319,20 +3001,7 @@ FORCE_INLINE __m128d _mm_cmpge_pd(__m128d a, __m128d b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_sd
 FORCE_INLINE __m128d _mm_cmpge_sd(__m128d a, __m128d b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return _mm_move_sd(a, _mm_cmpge_pd(a, b));
-#else
-    // expand "_mm_cmpge_pd()" to reduce unnecessary operations
-    double a0, b0;
-    a0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
-    uint64_t a1 = vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1);
-    b0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
-    uint64_t d[2];
-    d[0] = a0 >= b0 ? ~UINT64_C(0) : UINT64_C(0);
-    d[1] = a1;
-
-    return vreinterpretq_m128d_u64(vld1q_u64(d));
-#endif
 }
 
 // Compare packed signed 16-bit integers in a and b for greater-than, and store
@@ -3367,24 +3036,8 @@ FORCE_INLINE __m128i _mm_cmpgt_epi8(__m128i a, __m128i b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_pd
 FORCE_INLINE __m128d _mm_cmpgt_pd(__m128d a, __m128d b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128d_u64(
         vcgtq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
-#else
-    double a0 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
-    double a1 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
-    double b0 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
-    double b1 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 1));
-    uint64_t d[2];
-    d[0] = a0 > b0 ? ~UINT64_C(0) : UINT64_C(0);
-    d[1] = a1 > b1 ? ~UINT64_C(0) : UINT64_C(0);
-
-    return vreinterpretq_m128d_u64(vld1q_u64(d));
-#endif
 }
 
 // Compare the lower double-precision (64-bit) floating-point elements in a and
@@ -3393,20 +3046,7 @@ FORCE_INLINE __m128d _mm_cmpgt_pd(__m128d a, __m128d b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_sd
 FORCE_INLINE __m128d _mm_cmpgt_sd(__m128d a, __m128d b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return _mm_move_sd(a, _mm_cmpgt_pd(a, b));
-#else
-    // expand "_mm_cmpge_pd()" to reduce unnecessary operations
-    double a0, b0;
-    a0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
-    uint64_t a1 = vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1);
-    b0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
-    uint64_t d[2];
-    d[0] = a0 > b0 ? ~UINT64_C(0) : UINT64_C(0);
-    d[1] = a1;
-
-    return vreinterpretq_m128d_u64(vld1q_u64(d));
-#endif
 }
 
 // Compare packed double-precision (64-bit) floating-point elements in a and b
@@ -3414,24 +3054,8 @@ FORCE_INLINE __m128d _mm_cmpgt_sd(__m128d a, __m128d b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_pd
 FORCE_INLINE __m128d _mm_cmple_pd(__m128d a, __m128d b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128d_u64(
         vcleq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
-#else
-    double a0 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
-    double a1 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
-    double b0 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
-    double b1 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 1));
-    uint64_t d[2];
-    d[0] = a0 <= b0 ? ~UINT64_C(0) : UINT64_C(0);
-    d[1] = a1 <= b1 ? ~UINT64_C(0) : UINT64_C(0);
-
-    return vreinterpretq_m128d_u64(vld1q_u64(d));
-#endif
 }
 
 // Compare the lower double-precision (64-bit) floating-point elements in a and
@@ -3440,20 +3064,7 @@ FORCE_INLINE __m128d _mm_cmple_pd(__m128d a, __m128d b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_sd
 FORCE_INLINE __m128d _mm_cmple_sd(__m128d a, __m128d b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return _mm_move_sd(a, _mm_cmple_pd(a, b));
-#else
-    // expand "_mm_cmpge_pd()" to reduce unnecessary operations
-    double a0, b0;
-    a0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
-    uint64_t a1 = vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1);
-    b0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
-    uint64_t d[2];
-    d[0] = a0 <= b0 ? ~UINT64_C(0) : UINT64_C(0);
-    d[1] = a1;
-
-    return vreinterpretq_m128d_u64(vld1q_u64(d));
-#endif
 }
 
 // Compare packed signed 16-bit integers in a and b for less-than, and store the
@@ -3491,24 +3102,8 @@ FORCE_INLINE __m128i _mm_cmplt_epi8(__m128i a, __m128i b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_pd
 FORCE_INLINE __m128d _mm_cmplt_pd(__m128d a, __m128d b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128d_u64(
         vcltq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
-#else
-    double a0 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
-    double a1 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
-    double b0 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
-    double b1 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 1));
-    uint64_t d[2];
-    d[0] = a0 < b0 ? ~UINT64_C(0) : UINT64_C(0);
-    d[1] = a1 < b1 ? ~UINT64_C(0) : UINT64_C(0);
-
-    return vreinterpretq_m128d_u64(vld1q_u64(d));
-#endif
 }
 
 // Compare the lower double-precision (64-bit) floating-point elements in a and
@@ -3517,19 +3112,7 @@ FORCE_INLINE __m128d _mm_cmplt_pd(__m128d a, __m128d b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_sd
 FORCE_INLINE __m128d _mm_cmplt_sd(__m128d a, __m128d b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return _mm_move_sd(a, _mm_cmplt_pd(a, b));
-#else
-    double a0, b0;
-    a0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
-    uint64_t a1 = vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1);
-    b0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
-    uint64_t d[2];
-    d[0] = a0 < b0 ? ~UINT64_C(0) : UINT64_C(0);
-    d[1] = a1;
-
-    return vreinterpretq_m128d_u64(vld1q_u64(d));
-#endif
 }
 
 // Compare packed double-precision (64-bit) floating-point elements in a and b
@@ -3537,16 +3120,8 @@ FORCE_INLINE __m128d _mm_cmplt_sd(__m128d a, __m128d b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_pd
 FORCE_INLINE __m128d _mm_cmpneq_pd(__m128d a, __m128d b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128d_s32(vmvnq_s32(vreinterpretq_s32_u64(
         vceqq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)))));
-#else
-    // (a == b) -> (a_lo == b_lo) && (a_hi == b_hi)
-    uint32x4_t cmp =
-        vceqq_u32(vreinterpretq_u32_m128d(a), vreinterpretq_u32_m128d(b));
-    uint32x4_t swapped = vrev64q_u32(cmp);
-    return vreinterpretq_m128d_u32(vmvnq_u32(vandq_u32(cmp, swapped)));
-#endif
 }
 
 // Compare the lower double-precision (64-bit) floating-point elements in a and
@@ -3563,25 +3138,9 @@ FORCE_INLINE __m128d _mm_cmpneq_sd(__m128d a, __m128d b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnge_pd
 FORCE_INLINE __m128d _mm_cmpnge_pd(__m128d a, __m128d b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128d_u64(veorq_u64(
         vcgeq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)),
         vdupq_n_u64(UINT64_MAX)));
-#else
-    double a0 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
-    double a1 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
-    double b0 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
-    double b1 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 1));
-    uint64_t d[2];
-    d[0] = !(a0 >= b0) ? ~UINT64_C(0) : UINT64_C(0);
-    d[1] = !(a1 >= b1) ? ~UINT64_C(0) : UINT64_C(0);
-
-    return vreinterpretq_m128d_u64(vld1q_u64(d));
-#endif
 }
 
 // Compare the lower double-precision (64-bit) floating-point elements in a and
@@ -3598,25 +3157,9 @@ FORCE_INLINE __m128d _mm_cmpnge_sd(__m128d a, __m128d b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cmpngt_pd
 FORCE_INLINE __m128d _mm_cmpngt_pd(__m128d a, __m128d b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128d_u64(veorq_u64(
         vcgtq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)),
         vdupq_n_u64(UINT64_MAX)));
-#else
-    double a0 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
-    double a1 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
-    double b0 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
-    double b1 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 1));
-    uint64_t d[2];
-    d[0] = !(a0 > b0) ? ~UINT64_C(0) : UINT64_C(0);
-    d[1] = !(a1 > b1) ? ~UINT64_C(0) : UINT64_C(0);
-
-    return vreinterpretq_m128d_u64(vld1q_u64(d));
-#endif
 }
 
 // Compare the lower double-precision (64-bit) floating-point elements in a and
@@ -3633,25 +3176,9 @@ FORCE_INLINE __m128d _mm_cmpngt_sd(__m128d a, __m128d b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnle_pd
 FORCE_INLINE __m128d _mm_cmpnle_pd(__m128d a, __m128d b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128d_u64(veorq_u64(
         vcleq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)),
         vdupq_n_u64(UINT64_MAX)));
-#else
-    double a0 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
-    double a1 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
-    double b0 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
-    double b1 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 1));
-    uint64_t d[2];
-    d[0] = !(a0 <= b0) ? ~UINT64_C(0) : UINT64_C(0);
-    d[1] = !(a1 <= b1) ? ~UINT64_C(0) : UINT64_C(0);
-
-    return vreinterpretq_m128d_u64(vld1q_u64(d));
-#endif
 }
 
 // Compare the lower double-precision (64-bit) floating-point elements in a and
@@ -3668,25 +3195,9 @@ FORCE_INLINE __m128d _mm_cmpnle_sd(__m128d a, __m128d b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnlt_pd
 FORCE_INLINE __m128d _mm_cmpnlt_pd(__m128d a, __m128d b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128d_u64(veorq_u64(
         vcltq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)),
         vdupq_n_u64(UINT64_MAX)));
-#else
-    double a0 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
-    double a1 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
-    double b0 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
-    double b1 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 1));
-    uint64_t d[2];
-    d[0] = !(a0 < b0) ? ~UINT64_C(0) : UINT64_C(0);
-    d[1] = !(a1 < b1) ? ~UINT64_C(0) : UINT64_C(0);
-
-    return vreinterpretq_m128d_u64(vld1q_u64(d));
-#endif
 }
 
 // Compare the lower double-precision (64-bit) floating-point elements in a and
@@ -3703,28 +3214,12 @@ FORCE_INLINE __m128d _mm_cmpnlt_sd(__m128d a, __m128d b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpord_pd
 FORCE_INLINE __m128d _mm_cmpord_pd(__m128d a, __m128d b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     // Excluding NaNs, any two floating point numbers can be compared.
     uint64x2_t not_nan_a =
         vceqq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(a));
     uint64x2_t not_nan_b =
         vceqq_f64(vreinterpretq_f64_m128d(b), vreinterpretq_f64_m128d(b));
     return vreinterpretq_m128d_u64(vandq_u64(not_nan_a, not_nan_b));
-#else
-    double a0 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
-    double a1 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
-    double b0 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
-    double b1 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 1));
-    uint64_t d[2];
-    d[0] = (a0 == a0 && b0 == b0) ? ~UINT64_C(0) : UINT64_C(0);
-    d[1] = (a1 == a1 && b1 == b1) ? ~UINT64_C(0) : UINT64_C(0);
-
-    return vreinterpretq_m128d_u64(vld1q_u64(d));
-#endif
 }
 
 // Compare the lower double-precision (64-bit) floating-point elements in a and
@@ -3733,19 +3228,7 @@ FORCE_INLINE __m128d _mm_cmpord_pd(__m128d a, __m128d b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpord_sd
 FORCE_INLINE __m128d _mm_cmpord_sd(__m128d a, __m128d b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return _mm_move_sd(a, _mm_cmpord_pd(a, b));
-#else
-    double a0, b0;
-    a0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
-    uint64_t a1 = vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1);
-    b0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
-    uint64_t d[2];
-    d[0] = (a0 == a0 && b0 == b0) ? ~UINT64_C(0) : UINT64_C(0);
-    d[1] = a1;
-
-    return vreinterpretq_m128d_u64(vld1q_u64(d));
-#endif
 }
 
 // Compare packed double-precision (64-bit) floating-point elements in a and b
@@ -3753,7 +3236,6 @@ FORCE_INLINE __m128d _mm_cmpord_sd(__m128d a, __m128d b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpunord_pd
 FORCE_INLINE __m128d _mm_cmpunord_pd(__m128d a, __m128d b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     // Two NaNs are not equal in comparison operation.
     uint64x2_t not_nan_a =
         vceqq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(a));
@@ -3761,21 +3243,6 @@ FORCE_INLINE __m128d _mm_cmpunord_pd(__m128d a, __m128d b)
         vceqq_f64(vreinterpretq_f64_m128d(b), vreinterpretq_f64_m128d(b));
     return vreinterpretq_m128d_s32(
         vmvnq_s32(vreinterpretq_s32_u64(vandq_u64(not_nan_a, not_nan_b))));
-#else
-    double a0 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
-    double a1 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
-    double b0 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
-    double b1 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 1));
-    uint64_t d[2];
-    d[0] = (a0 == a0 && b0 == b0) ? UINT64_C(0) : ~UINT64_C(0);
-    d[1] = (a1 == a1 && b1 == b1) ? UINT64_C(0) : ~UINT64_C(0);
-
-    return vreinterpretq_m128d_u64(vld1q_u64(d));
-#endif
 }
 
 // Compare the lower double-precision (64-bit) floating-point elements in a and
@@ -3784,19 +3251,7 @@ FORCE_INLINE __m128d _mm_cmpunord_pd(__m128d a, __m128d b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpunord_sd
 FORCE_INLINE __m128d _mm_cmpunord_sd(__m128d a, __m128d b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return _mm_move_sd(a, _mm_cmpunord_pd(a, b));
-#else
-    double a0, b0;
-    a0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
-    uint64_t a1 = vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1);
-    b0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
-    uint64_t d[2];
-    d[0] = (a0 == a0 && b0 == b0) ? UINT64_C(0) : ~UINT64_C(0);
-    d[1] = a1;
-
-    return vreinterpretq_m128d_u64(vld1q_u64(d));
-#endif
 }
 
 // Compare the lower double-precision (64-bit) floating-point element in a and b
@@ -3804,14 +3259,7 @@ FORCE_INLINE __m128d _mm_cmpunord_sd(__m128d a, __m128d b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comige_sd
 FORCE_INLINE int _mm_comige_sd(__m128d a, __m128d b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vgetq_lane_u64(vcgeq_f64(a, b), 0) & 0x1;
-#else
-    double a0, b0;
-    a0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
-    b0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
-    return a0 >= b0;
-#endif
 }
 
 // Compare the lower double-precision (64-bit) floating-point element in a and b
@@ -3819,15 +3267,7 @@ FORCE_INLINE int _mm_comige_sd(__m128d a, __m128d b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comigt_sd
 FORCE_INLINE int _mm_comigt_sd(__m128d a, __m128d b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vgetq_lane_u64(vcgtq_f64(a, b), 0) & 0x1;
-#else
-    double a0, b0;
-    a0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
-    b0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
-
-    return a0 > b0;
-#endif
 }
 
 // Compare the lower double-precision (64-bit) floating-point element in a and b
@@ -3835,15 +3275,7 @@ FORCE_INLINE int _mm_comigt_sd(__m128d a, __m128d b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comile_sd
 FORCE_INLINE int _mm_comile_sd(__m128d a, __m128d b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vgetq_lane_u64(vcleq_f64(a, b), 0) & 0x1;
-#else
-    double a0, b0;
-    a0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
-    b0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
-
-    return a0 <= b0;
-#endif
 }
 
 // Compare the lower double-precision (64-bit) floating-point element in a and b
@@ -3851,15 +3283,7 @@ FORCE_INLINE int _mm_comile_sd(__m128d a, __m128d b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comilt_sd
 FORCE_INLINE int _mm_comilt_sd(__m128d a, __m128d b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vgetq_lane_u64(vcltq_f64(a, b), 0) & 0x1;
-#else
-    double a0, b0;
-    a0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
-    b0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
-
-    return a0 < b0;
-#endif
 }
 
 // Compare the lower double-precision (64-bit) floating-point element in a and b
@@ -3867,20 +3291,7 @@ FORCE_INLINE int _mm_comilt_sd(__m128d a, __m128d b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comieq_sd
 FORCE_INLINE int _mm_comieq_sd(__m128d a, __m128d b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vgetq_lane_u64(vceqq_f64(a, b), 0) & 0x1;
-#else
-    uint32x4_t a_not_nan =
-        vceqq_u32(vreinterpretq_u32_m128d(a), vreinterpretq_u32_m128d(a));
-    uint32x4_t b_not_nan =
-        vceqq_u32(vreinterpretq_u32_m128d(b), vreinterpretq_u32_m128d(b));
-    uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan);
-    uint32x4_t a_eq_b =
-        vceqq_u32(vreinterpretq_u32_m128d(a), vreinterpretq_u32_m128d(b));
-    uint64x2_t and_results = vandq_u64(vreinterpretq_u64_u32(a_and_b_not_nan),
-                                       vreinterpretq_u64_u32(a_eq_b));
-    return vgetq_lane_u64(and_results, 0) & 0x1;
-#endif
 }
 
 // Compare the lower double-precision (64-bit) floating-point element in a and b
@@ -3896,14 +3307,8 @@ FORCE_INLINE int _mm_comineq_sd(__m128d a, __m128d b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_pd
 FORCE_INLINE __m128d _mm_cvtepi32_pd(__m128i a)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
-    return vreinterpretq_m128d_f64(
+   return vreinterpretq_m128d_f64(
         vcvtq_f64_s64(vmovl_s32(vget_low_s32(vreinterpretq_s32_m128i(a)))));
-#else
-    double a0 = (double) vgetq_lane_s32(vreinterpretq_s32_m128i(a), 0);
-    double a1 = (double) vgetq_lane_s32(vreinterpretq_s32_m128i(a), 1);
-    return _mm_set_pd(a1, a0);
-#endif
 }
 
 // Convert packed signed 32-bit integers in a to packed single-precision
@@ -3957,15 +3362,8 @@ FORCE_INLINE_OPTNONE __m64 _mm_cvtpd_pi32(__m128d a)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_ps
 FORCE_INLINE __m128 _mm_cvtpd_ps(__m128d a)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     float32x2_t tmp = vcvt_f32_f64(vreinterpretq_f64_m128d(a));
     return vreinterpretq_m128_f32(vcombine_f32(tmp, vdup_n_f32(0)));
-#else
-    double a0, a1;
-    a0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
-    a1 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
-    return _mm_set_ps(0, 0, (float) a1, (float) a0);
-#endif
 }
 
 // Convert packed signed 32-bit integers in a to packed double-precision
@@ -3973,14 +3371,8 @@ FORCE_INLINE __m128 _mm_cvtpd_ps(__m128d a)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpi32_pd
 FORCE_INLINE __m128d _mm_cvtpi32_pd(__m64 a)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128d_f64(
         vcvtq_f64_s64(vmovl_s32(vreinterpret_s32_m64(a))));
-#else
-    double a0 = (double) vget_lane_s32(vreinterpret_s32_m64(a), 0);
-    double a1 = (double) vget_lane_s32(vreinterpret_s32_m64(a), 1);
-    return _mm_set_pd(a1, a0);
-#endif
 }
 
 // Convert packed single-precision (32-bit) floating-point elements in a to
@@ -3992,8 +3384,7 @@ FORCE_INLINE __m128i _mm_cvtps_epi32(__m128 a)
 {
 #if defined(__ARM_FEATURE_FRINT)
     return vreinterpretq_m128i_s32(vcvtq_s32_f32(vrnd32xq_f32(a)));
-#elif (defined(__aarch64__) || defined(_M_ARM64)) || \
-    defined(__ARM_FEATURE_DIRECTED_ROUNDING)
+#else
     switch (_MM_GET_ROUNDING_MODE()) {
     case _MM_ROUND_NEAREST:
         return vreinterpretq_m128i_s32(vcvtnq_s32_f32(a));
@@ -4004,39 +3395,6 @@ FORCE_INLINE __m128i _mm_cvtps_epi32(__m128 a)
     default:  // _MM_ROUND_TOWARD_ZERO
         return vreinterpretq_m128i_s32(vcvtq_s32_f32(a));
     }
-#else
-    float *f = (float *) &a;
-    switch (_MM_GET_ROUNDING_MODE()) {
-    case _MM_ROUND_NEAREST: {
-        uint32x4_t signmask = vdupq_n_u32(0x80000000);
-        float32x4_t half = vbslq_f32(signmask, vreinterpretq_f32_m128(a),
-                                     vdupq_n_f32(0.5f)); /* +/- 0.5 */
-        int32x4_t r_normal = vcvtq_s32_f32(vaddq_f32(
-            vreinterpretq_f32_m128(a), half)); /* round to integer: [a + 0.5]*/
-        int32x4_t r_trunc = vcvtq_s32_f32(
-            vreinterpretq_f32_m128(a)); /* truncate to integer: [a] */
-        int32x4_t plusone = vreinterpretq_s32_u32(vshrq_n_u32(
-            vreinterpretq_u32_s32(vnegq_s32(r_trunc)), 31)); /* 1 or 0 */
-        int32x4_t r_even = vbicq_s32(vaddq_s32(r_trunc, plusone),
-                                     vdupq_n_s32(1)); /* ([a] + {0,1}) & ~1 */
-        float32x4_t delta = vsubq_f32(
-            vreinterpretq_f32_m128(a),
-            vcvtq_f32_s32(r_trunc)); /* compute delta: delta = (a - [a]) */
-        uint32x4_t is_delta_half =
-            vceqq_f32(delta, half); /* delta == +/- 0.5 */
-        return vreinterpretq_m128i_s32(
-            vbslq_s32(is_delta_half, r_even, r_normal));
-    }
-    case _MM_ROUND_DOWN:
-        return _mm_set_epi32(floorf(f[3]), floorf(f[2]), floorf(f[1]),
-                             floorf(f[0]));
-    case _MM_ROUND_UP:
-        return _mm_set_epi32(ceilf(f[3]), ceilf(f[2]), ceilf(f[1]),
-                             ceilf(f[0]));
-    default:  // _MM_ROUND_TOWARD_ZERO
-        return _mm_set_epi32((int32_t) f[3], (int32_t) f[2], (int32_t) f[1],
-                             (int32_t) f[0]);
-    }
 #endif
 }
 
@@ -4046,27 +3404,15 @@ FORCE_INLINE __m128i _mm_cvtps_epi32(__m128 a)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_pd
 FORCE_INLINE __m128d _mm_cvtps_pd(__m128 a)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128d_f64(
         vcvt_f64_f32(vget_low_f32(vreinterpretq_f32_m128(a))));
-#else
-    double a0 = (double) vgetq_lane_f32(vreinterpretq_f32_m128(a), 0);
-    double a1 = (double) vgetq_lane_f32(vreinterpretq_f32_m128(a), 1);
-    return _mm_set_pd(a1, a0);
-#endif
 }
 
 // Copy the lower double-precision (64-bit) floating-point element of a to dst.
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_f64
 FORCE_INLINE double _mm_cvtsd_f64(__m128d a)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return (double) vgetq_lane_f64(vreinterpretq_f64_m128d(a), 0);
-#else
-    double _a =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
-    return _a;
-#endif
 }
 
 // Convert the lower double-precision (64-bit) floating-point element in a to a
@@ -4074,14 +3420,7 @@ FORCE_INLINE double _mm_cvtsd_f64(__m128d a)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_si32
 FORCE_INLINE int32_t _mm_cvtsd_si32(__m128d a)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return (int32_t) vgetq_lane_f64(vrndiq_f64(vreinterpretq_f64_m128d(a)), 0);
-#else
-    __m128d rnd = _mm_round_pd(a, _MM_FROUND_CUR_DIRECTION);
-    double ret = sse2neon_recast_u64_f64(
-        vgetq_lane_u64(vreinterpretq_u64_m128d(rnd), 0));
-    return (int32_t) ret;
-#endif
 }
 
 // Convert the lower double-precision (64-bit) floating-point element in a to a
@@ -4089,14 +3428,7 @@ FORCE_INLINE int32_t _mm_cvtsd_si32(__m128d a)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_si64
 FORCE_INLINE int64_t _mm_cvtsd_si64(__m128d a)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return (int64_t) vgetq_lane_f64(vrndiq_f64(vreinterpretq_f64_m128d(a)), 0);
-#else
-    __m128d rnd = _mm_round_pd(a, _MM_FROUND_CUR_DIRECTION);
-    double ret = sse2neon_recast_u64_f64(
-        vgetq_lane_u64(vreinterpretq_u64_m128d(rnd), 0));
-    return (int64_t) ret;
-#endif
 }
 
 // Convert the lower double-precision (64-bit) floating-point element in a to a
@@ -4111,16 +3443,9 @@ FORCE_INLINE int64_t _mm_cvtsd_si64(__m128d a)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_ss
 FORCE_INLINE __m128 _mm_cvtsd_ss(__m128 a, __m128d b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
-    return vreinterpretq_m128_f32(vsetq_lane_f32(
+   return vreinterpretq_m128_f32(vsetq_lane_f32(
         vget_lane_f32(vcvt_f32_f64(vreinterpretq_f64_m128d(b)), 0),
         vreinterpretq_f32_m128(a), 0));
-#else
-    double b0 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
-    return vreinterpretq_m128_f32(
-        vsetq_lane_f32((float) b0, vreinterpretq_f32_m128(a), 0));
-#endif
 }
 
 // Copy the lower 32-bit integer in a to dst.
@@ -4147,14 +3472,8 @@ FORCE_INLINE int64_t _mm_cvtsi128_si64(__m128i a)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi32_sd
 FORCE_INLINE __m128d _mm_cvtsi32_sd(__m128d a, int32_t b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128d_f64(
         vsetq_lane_f64((double) b, vreinterpretq_f64_m128d(a), 0));
-#else
-    int64_t _b = sse2neon_recast_f64_s64((double) b);
-    return vreinterpretq_m128d_s64(
-        vsetq_lane_s64(_b, vreinterpretq_s64_m128d(a), 0));
-#endif
 }
 
 // Copy the lower 64-bit integer in a to dst.
@@ -4175,14 +3494,8 @@ FORCE_INLINE __m128i _mm_cvtsi32_si128(int a)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi64_sd
 FORCE_INLINE __m128d _mm_cvtsi64_sd(__m128d a, int64_t b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128d_f64(
         vsetq_lane_f64((double) b, vreinterpretq_f64_m128d(a), 0));
-#else
-    int64_t _b = sse2neon_recast_f64_s64((double) b);
-    return vreinterpretq_m128d_s64(
-        vsetq_lane_s64(_b, vreinterpretq_s64_m128d(a), 0));
-#endif
 }
 
 // Copy 64-bit integer a to the lower element of dst, and zero the upper
@@ -4212,13 +3525,8 @@ FORCE_INLINE __m128i _mm_cvtsi64_si128(int64_t a)
 FORCE_INLINE __m128d _mm_cvtss_sd(__m128d a, __m128 b)
 {
     double d = (double) vgetq_lane_f32(vreinterpretq_f32_m128(b), 0);
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128d_f64(
         vsetq_lane_f64(d, vreinterpretq_f64_m128d(a), 0));
-#else
-    return vreinterpretq_m128d_s64(vsetq_lane_s64(
-        sse2neon_recast_f64_s64(d), vreinterpretq_s64_m128d(a), 0));
-#endif
 }
 
 // Convert packed double-precision (64-bit) floating-point elements in a to
@@ -4267,13 +3575,7 @@ FORCE_INLINE int32_t _mm_cvttsd_si32(__m128d a)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_si64
 FORCE_INLINE int64_t _mm_cvttsd_si64(__m128d a)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vgetq_lane_s64(vcvtq_s64_f64(vreinterpretq_f64_m128d(a)), 0);
-#else
-    double _a =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
-    return (int64_t) _a;
-#endif
 }
 
 // Convert the lower double-precision (64-bit) floating-point element in a to a
@@ -4286,23 +3588,8 @@ FORCE_INLINE int64_t _mm_cvttsd_si64(__m128d a)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_div_pd
 FORCE_INLINE __m128d _mm_div_pd(__m128d a, __m128d b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128d_f64(
         vdivq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
-#else
-    double a0 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
-    double a1 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
-    double b0 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
-    double b1 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 1));
-    double c[2];
-    c[0] = a0 / b0;
-    c[1] = a1 / b1;
-    return vld1q_f32((float32_t *) c);
-#endif
 }
 
 // Divide the lower double-precision (64-bit) floating-point element in a by the
@@ -4312,14 +3599,10 @@ FORCE_INLINE __m128d _mm_div_pd(__m128d a, __m128d b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_div_sd
 FORCE_INLINE __m128d _mm_div_sd(__m128d a, __m128d b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     float64x2_t tmp =
         vdivq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b));
     return vreinterpretq_m128d_f64(
         vsetq_lane_f64(vgetq_lane_f64(vreinterpretq_f64_m128d(a), 1), tmp, 1));
-#else
-    return _mm_move_sd(a, _mm_div_pd(a, b));
-#endif
 }
 
 // Extract a 16-bit integer from a, selected with imm8, and store the result in
@@ -4344,13 +3627,7 @@ FORCE_INLINE __m128d _mm_div_sd(__m128d a, __m128d b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_pd
 FORCE_INLINE __m128d _mm_load_pd(const double *p)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128d_f64(vld1q_f64(p));
-#else
-    const float *fp = (const float *) p;
-    float ALIGN_STRUCT(16) data[4] = {fp[0], fp[1], fp[2], fp[3]};
-    return vreinterpretq_m128d_f32(vld1q_f32(data));
-#endif
 }
 
 // Load a double-precision (64-bit) floating-point element from memory into both
@@ -4364,13 +3641,7 @@ FORCE_INLINE __m128d _mm_load_pd(const double *p)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_sd
 FORCE_INLINE __m128d _mm_load_sd(const double *p)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128d_f64(vsetq_lane_f64(*p, vdupq_n_f64(0), 0));
-#else
-    const float *fp = (const float *) p;
-    float ALIGN_STRUCT(16) data[4] = {fp[0], fp[1], 0, 0};
-    return vreinterpretq_m128d_f32(vld1q_f32(data));
-#endif
 }
 
 // Load 128-bits of integer data from memory into dst. mem_addr must be aligned
@@ -4386,11 +3657,7 @@ FORCE_INLINE __m128i _mm_load_si128(const __m128i *p)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load1_pd
 FORCE_INLINE __m128d _mm_load1_pd(const double *p)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128d_f64(vld1q_dup_f64(p));
-#else
-    return vreinterpretq_m128d_s64(vdupq_n_s64(*(const int64_t *) p));
-#endif
 }
 
 // Load a double-precision (64-bit) floating-point element from memory into the
@@ -4399,13 +3666,8 @@ FORCE_INLINE __m128d _mm_load1_pd(const double *p)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadh_pd
 FORCE_INLINE __m128d _mm_loadh_pd(__m128d a, const double *p)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128d_f64(
         vcombine_f64(vget_low_f64(vreinterpretq_f64_m128d(a)), vld1_f64(p)));
-#else
-    return vreinterpretq_m128d_f32(vcombine_f32(
-        vget_low_f32(vreinterpretq_f32_m128d(a)), vld1_f32((const float *) p)));
-#endif
 }
 
 // Load 64-bit integer from memory into the first element of dst.
@@ -4425,14 +3687,8 @@ FORCE_INLINE __m128i _mm_loadl_epi64(__m128i const *p)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadl_pd
 FORCE_INLINE __m128d _mm_loadl_pd(__m128d a, const double *p)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128d_f64(
         vcombine_f64(vld1_f64(p), vget_high_f64(vreinterpretq_f64_m128d(a))));
-#else
-    return vreinterpretq_m128d_f32(
-        vcombine_f32(vld1_f32((const float *) p),
-                     vget_high_f32(vreinterpretq_f32_m128d(a))));
-#endif
 }
 
 // Load 2 double-precision (64-bit) floating-point elements from memory into dst
@@ -4441,13 +3697,8 @@ FORCE_INLINE __m128d _mm_loadl_pd(__m128d a, const double *p)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadr_pd
 FORCE_INLINE __m128d _mm_loadr_pd(const double *p)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     float64x2_t v = vld1q_f64(p);
     return vreinterpretq_m128d_f64(vextq_f64(v, v, 1));
-#else
-    int64x2_t v = vld1q_s64((const int64_t *) p);
-    return vreinterpretq_m128d_s64(vextq_s64(v, v, 1));
-#endif
 }
 
 // Loads two double-precision from unaligned memory, floating-point values.
@@ -4481,20 +3732,10 @@ FORCE_INLINE __m128i _mm_madd_epi16(__m128i a, __m128i b)
 {
     int32x4_t low = vmull_s16(vget_low_s16(vreinterpretq_s16_m128i(a)),
                               vget_low_s16(vreinterpretq_s16_m128i(b)));
-#if defined(__aarch64__) || defined(_M_ARM64)
     int32x4_t high =
         vmull_high_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b));
 
     return vreinterpretq_m128i_s32(vpaddq_s32(low, high));
-#else
-    int32x4_t high = vmull_s16(vget_high_s16(vreinterpretq_s16_m128i(a)),
-                               vget_high_s16(vreinterpretq_s16_m128i(b)));
-
-    int32x2_t low_sum = vpadd_s32(vget_low_s32(low), vget_high_s32(low));
-    int32x2_t high_sum = vpadd_s32(vget_low_s32(high), vget_high_s32(high));
-
-    return vreinterpretq_m128i_s32(vcombine_s32(low_sum, high_sum));
-#endif
 }
 
 // Conditionally store 8-bit integer elements from a into memory using mask
@@ -4535,30 +3776,9 @@ FORCE_INLINE __m128i _mm_max_epu8(__m128i a, __m128i b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_pd
 FORCE_INLINE __m128d _mm_max_pd(__m128d a, __m128d b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
-#if SSE2NEON_PRECISE_MINMAX
     float64x2_t _a = vreinterpretq_f64_m128d(a);
     float64x2_t _b = vreinterpretq_f64_m128d(b);
     return vreinterpretq_m128d_f64(vbslq_f64(vcgtq_f64(_a, _b), _a, _b));
-#else
-    return vreinterpretq_m128d_f64(
-        vmaxq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
-#endif
-#else
-    double a0 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
-    double a1 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
-    double b0 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
-    double b1 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 1));
-    int64_t d[2];
-    d[0] = a0 > b0 ? sse2neon_recast_f64_s64(a0) : sse2neon_recast_f64_s64(b0);
-    d[1] = a1 > b1 ? sse2neon_recast_f64_s64(a1) : sse2neon_recast_f64_s64(b1);
-
-    return vreinterpretq_m128d_s64(vld1q_s64(d));
-#endif
 }
 
 // Compare the lower double-precision (64-bit) floating-point elements in a and
@@ -4567,16 +3787,7 @@ FORCE_INLINE __m128d _mm_max_pd(__m128d a, __m128d b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_sd
 FORCE_INLINE __m128d _mm_max_sd(__m128d a, __m128d b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return _mm_move_sd(a, _mm_max_pd(a, b));
-#else
-    double a0, a1, b0;
-    a0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
-    a1 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
-    b0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
-    double c[2] = {a0 > b0 ? a0 : b0, a1};
-    return vreinterpretq_m128d_f32(vld1q_f32((float32_t *) c));
-#endif
 }
 
 // Compare packed signed 16-bit integers in a and b, and store packed minimum
@@ -4602,29 +3813,9 @@ FORCE_INLINE __m128i _mm_min_epu8(__m128i a, __m128i b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_pd
 FORCE_INLINE __m128d _mm_min_pd(__m128d a, __m128d b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
-#if SSE2NEON_PRECISE_MINMAX
     float64x2_t _a = vreinterpretq_f64_m128d(a);
     float64x2_t _b = vreinterpretq_f64_m128d(b);
     return vreinterpretq_m128d_f64(vbslq_f64(vcltq_f64(_a, _b), _a, _b));
-#else
-    return vreinterpretq_m128d_f64(
-        vminq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
-#endif
-#else
-    double a0 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
-    double a1 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
-    double b0 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
-    double b1 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 1));
-    int64_t d[2];
-    d[0] = a0 < b0 ? sse2neon_recast_f64_s64(a0) : sse2neon_recast_f64_s64(b0);
-    d[1] = a1 < b1 ? sse2neon_recast_f64_s64(a1) : sse2neon_recast_f64_s64(b1);
-    return vreinterpretq_m128d_s64(vld1q_s64(d));
-#endif
 }
 
 // Compare the lower double-precision (64-bit) floating-point elements in a and
@@ -4633,16 +3824,7 @@ FORCE_INLINE __m128d _mm_min_pd(__m128d a, __m128d b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_sd
 FORCE_INLINE __m128d _mm_min_sd(__m128d a, __m128d b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return _mm_move_sd(a, _mm_min_pd(a, b));
-#else
-    double a0, a1, b0;
-    a0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
-    a1 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
-    b0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
-    double c[2] = {a0 < b0 ? a0 : b0, a1};
-    return vreinterpretq_m128d_f32(vld1q_f32((float32_t *) c));
-#endif
 }
 
 // Copy the lower 64-bit integer in a to the lower element of dst, and zero the
@@ -4791,23 +3973,8 @@ FORCE_INLINE __m128i _mm_mul_epu32(__m128i a, __m128i b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_pd
 FORCE_INLINE __m128d _mm_mul_pd(__m128d a, __m128d b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128d_f64(
         vmulq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
-#else
-    double a0 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
-    double a1 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
-    double b0 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
-    double b1 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 1));
-    double c[2];
-    c[0] = a0 * b0;
-    c[1] = a1 * b1;
-    return vld1q_f32((float32_t *) c);
-#endif
 }
 
 // Multiply the lower double-precision (64-bit) floating-point element in a and
@@ -4858,20 +4025,11 @@ FORCE_INLINE __m128i _mm_mulhi_epu16(__m128i a, __m128i b)
     uint16x4_t a3210 = vget_low_u16(vreinterpretq_u16_m128i(a));
     uint16x4_t b3210 = vget_low_u16(vreinterpretq_u16_m128i(b));
     uint32x4_t ab3210 = vmull_u16(a3210, b3210);
-#if defined(__aarch64__) || defined(_M_ARM64)
     uint32x4_t ab7654 =
         vmull_high_u16(vreinterpretq_u16_m128i(a), vreinterpretq_u16_m128i(b));
     uint16x8_t r = vuzp2q_u16(vreinterpretq_u16_u32(ab3210),
                               vreinterpretq_u16_u32(ab7654));
     return vreinterpretq_m128i_u16(r);
-#else
-    uint16x4_t a7654 = vget_high_u16(vreinterpretq_u16_m128i(a));
-    uint16x4_t b7654 = vget_high_u16(vreinterpretq_u16_m128i(b));
-    uint32x4_t ab7654 = vmull_u16(a7654, b7654);
-    uint16x8x2_t r =
-        vuzpq_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654));
-    return vreinterpretq_m128i_u16(r.val[1]);
-#endif
 }
 
 // Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit
@@ -4939,11 +4097,11 @@ FORCE_INLINE __m128i _mm_packus_epi16(const __m128i a, const __m128i b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_pause
 FORCE_INLINE void _mm_pause(void)
 {
-#if defined(_MSC_VER) && !defined(__clang__)
-    __isb(_ARM64_BARRIER_SY);
-#else
+//#if defined(_MSC_VER) && !defined(__clang__)
+//    __isb(_ARM64_BARRIER_SY);
+//#else
     __asm__ __volatile__("isb\n");
-#endif
+//#endif
 }
 
 // Compute the absolute differences of packed unsigned 8-bit integers in a and
@@ -5028,11 +4186,7 @@ FORCE_INLINE __m128i _mm_set_epi8(signed char b15,
 FORCE_INLINE __m128d _mm_set_pd(double e1, double e0)
 {
     double ALIGN_STRUCT(16) data[2] = {e0, e1};
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128d_f64(vld1q_f64((float64_t *) data));
-#else
-    return vreinterpretq_m128d_f32(vld1q_f32((float32_t *) data));
-#endif
 }
 
 // Broadcast double-precision (64-bit) floating-point value a to all elements of
@@ -5045,11 +4199,7 @@ FORCE_INLINE __m128d _mm_set_pd(double e1, double e0)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_sd
 FORCE_INLINE __m128d _mm_set_sd(double a)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128d_f64(vsetq_lane_f64(a, vdupq_n_f64(0), 0));
-#else
-    return _mm_set_pd(0, a);
-#endif
 }
 
 // Broadcast 16-bit integer a to all elements of dst.
@@ -5092,12 +4242,7 @@ FORCE_INLINE __m128i _mm_set1_epi8(signed char w)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_pd
 FORCE_INLINE __m128d _mm_set1_pd(double d)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128d_f64(vdupq_n_f64(d));
-#else
-    int64_t _d = sse2neon_recast_f64_s64(d);
-    return vreinterpretq_m128d_s64(vdupq_n_s64(_d));
-#endif
 }
 
 // Set packed 16-bit integers in dst with the supplied values in reverse order.
@@ -5169,11 +4314,7 @@ FORCE_INLINE __m128d _mm_setr_pd(double e1, double e0)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setzero_pd
 FORCE_INLINE __m128d _mm_setzero_pd(void)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128d_f64(vdupq_n_f64(0));
-#else
-    return vreinterpretq_m128d_f32(vdupq_n_f32(0));
-#endif
 }
 
 // Return vector of type __m128i with all elements set to zero.
@@ -5188,7 +4329,7 @@ FORCE_INLINE __m128i _mm_setzero_si128(void)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_epi32
 // FORCE_INLINE __m128i _mm_shuffle_epi32(__m128i a,
 //                                        __constrange(0,255) int imm)
-#if defined(_sse2neon_shuffle)
+//#if defined(_sse2neon_shuffle)
 #define _mm_shuffle_epi32(a, imm)                                            \
     __extension__({                                                          \
         int32x4_t _input = vreinterpretq_s32_m128i(a);                       \
@@ -5197,76 +4338,76 @@ FORCE_INLINE __m128i _mm_setzero_si128(void)
                           ((imm) >> 4) & 0x3, ((imm) >> 6) & 0x3);           \
         vreinterpretq_m128i_s32(_shuf);                                      \
     })
-#else  // generic
-#define _mm_shuffle_epi32(a, imm)                           \
-    _sse2neon_define1(                                      \
-        __m128i, a, __m128i ret; switch (imm) {             \
-            case _MM_SHUFFLE(1, 0, 3, 2):                   \
-                ret = _mm_shuffle_epi_1032(_a);             \
-                break;                                      \
-            case _MM_SHUFFLE(2, 3, 0, 1):                   \
-                ret = _mm_shuffle_epi_2301(_a);             \
-                break;                                      \
-            case _MM_SHUFFLE(0, 3, 2, 1):                   \
-                ret = _mm_shuffle_epi_0321(_a);             \
-                break;                                      \
-            case _MM_SHUFFLE(2, 1, 0, 3):                   \
-                ret = _mm_shuffle_epi_2103(_a);             \
-                break;                                      \
-            case _MM_SHUFFLE(1, 0, 1, 0):                   \
-                ret = _mm_shuffle_epi_1010(_a);             \
-                break;                                      \
-            case _MM_SHUFFLE(1, 0, 0, 1):                   \
-                ret = _mm_shuffle_epi_1001(_a);             \
-                break;                                      \
-            case _MM_SHUFFLE(0, 1, 0, 1):                   \
-                ret = _mm_shuffle_epi_0101(_a);             \
-                break;                                      \
-            case _MM_SHUFFLE(2, 2, 1, 1):                   \
-                ret = _mm_shuffle_epi_2211(_a);             \
-                break;                                      \
-            case _MM_SHUFFLE(0, 1, 2, 2):                   \
-                ret = _mm_shuffle_epi_0122(_a);             \
-                break;                                      \
-            case _MM_SHUFFLE(3, 3, 3, 2):                   \
-                ret = _mm_shuffle_epi_3332(_a);             \
-                break;                                      \
-            case _MM_SHUFFLE(0, 0, 0, 0):                   \
-                ret = _mm_shuffle_epi32_splat(_a, 0);       \
-                break;                                      \
-            case _MM_SHUFFLE(1, 1, 1, 1):                   \
-                ret = _mm_shuffle_epi32_splat(_a, 1);       \
-                break;                                      \
-            case _MM_SHUFFLE(2, 2, 2, 2):                   \
-                ret = _mm_shuffle_epi32_splat(_a, 2);       \
-                break;                                      \
-            case _MM_SHUFFLE(3, 3, 3, 3):                   \
-                ret = _mm_shuffle_epi32_splat(_a, 3);       \
-                break;                                      \
-            default:                                        \
-                ret = _mm_shuffle_epi32_default(_a, (imm)); \
-                break;                                      \
-        } _sse2neon_return(ret);)
-#endif
+//#else  // generic
+//#define _mm_shuffle_epi32(a, imm)                           \
+//    _sse2neon_define1(                                      \
+//        __m128i, a, __m128i ret; switch (imm) {             \
+//            case _MM_SHUFFLE(1, 0, 3, 2):                   \
+//                ret = _mm_shuffle_epi_1032(_a);             \
+//                break;                                      \
+//            case _MM_SHUFFLE(2, 3, 0, 1):                   \
+//                ret = _mm_shuffle_epi_2301(_a);             \
+//                break;                                      \
+//            case _MM_SHUFFLE(0, 3, 2, 1):                   \
+//                ret = _mm_shuffle_epi_0321(_a);             \
+//                break;                                      \
+//            case _MM_SHUFFLE(2, 1, 0, 3):                   \
+//                ret = _mm_shuffle_epi_2103(_a);             \
+//                break;                                      \
+//            case _MM_SHUFFLE(1, 0, 1, 0):                   \
+//                ret = _mm_shuffle_epi_1010(_a);             \
+//                break;                                      \
+//            case _MM_SHUFFLE(1, 0, 0, 1):                   \
+//                ret = _mm_shuffle_epi_1001(_a);             \
+//                break;                                      \
+//            case _MM_SHUFFLE(0, 1, 0, 1):                   \
+//                ret = _mm_shuffle_epi_0101(_a);             \
+//                break;                                      \
+//            case _MM_SHUFFLE(2, 2, 1, 1):                   \
+//                ret = _mm_shuffle_epi_2211(_a);             \
+//                break;                                      \
+//            case _MM_SHUFFLE(0, 1, 2, 2):                   \
+//                ret = _mm_shuffle_epi_0122(_a);             \
+//                break;                                      \
+//            case _MM_SHUFFLE(3, 3, 3, 2):                   \
+//                ret = _mm_shuffle_epi_3332(_a);             \
+//                break;                                      \
+//            case _MM_SHUFFLE(0, 0, 0, 0):                   \
+//                ret = _mm_shuffle_epi32_splat(_a, 0);       \
+//                break;                                      \
+//            case _MM_SHUFFLE(1, 1, 1, 1):                   \
+//                ret = _mm_shuffle_epi32_splat(_a, 1);       \
+//                break;                                      \
+//            case _MM_SHUFFLE(2, 2, 2, 2):                   \
+//                ret = _mm_shuffle_epi32_splat(_a, 2);       \
+//                break;                                      \
+//            case _MM_SHUFFLE(3, 3, 3, 3):                   \
+//                ret = _mm_shuffle_epi32_splat(_a, 3);       \
+//                break;                                      \
+//            default:                                        \
+//                ret = _mm_shuffle_epi32_default(_a, (imm)); \
+//                break;                                      \
+//        } _sse2neon_return(ret);)
+//#endif
 
 // Shuffle double-precision (64-bit) floating-point elements using the control
 // in imm8, and store the results in dst.
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_pd
-#ifdef _sse2neon_shuffle
+//#ifdef _sse2neon_shuffle
 #define _mm_shuffle_pd(a, b, imm8)                                            \
     vreinterpretq_m128d_s64(                                                  \
         vshuffleq_s64(vreinterpretq_s64_m128d(a), vreinterpretq_s64_m128d(b), \
                       imm8 & 0x1, ((imm8 & 0x2) >> 1) + 2))
-#else
-#define _mm_shuffle_pd(a, b, imm8)                                     \
-    _mm_castsi128_pd(_mm_set_epi64x(                                   \
-        vgetq_lane_s64(vreinterpretq_s64_m128d(b), (imm8 & 0x2) >> 1), \
-        vgetq_lane_s64(vreinterpretq_s64_m128d(a), imm8 & 0x1)))
-#endif
+//#else
+//#define _mm_shuffle_pd(a, b, imm8)                                     \
+//    _mm_castsi128_pd(_mm_set_epi64x(                                   \
+//        vgetq_lane_s64(vreinterpretq_s64_m128d(b), (imm8 & 0x2) >> 1), \
+//        vgetq_lane_s64(vreinterpretq_s64_m128d(a), imm8 & 0x1)))
+//#endif
 
 // FORCE_INLINE __m128i _mm_shufflehi_epi16(__m128i a,
 //                                          __constrange(0,255) int imm)
-#if defined(_sse2neon_shuffle)
+//#if defined(_sse2neon_shuffle)
 #define _mm_shufflehi_epi16(a, imm)                                           \
     __extension__({                                                           \
         int16x8_t _input = vreinterpretq_s16_m128i(a);                        \
@@ -5276,13 +4417,13 @@ FORCE_INLINE __m128i _mm_setzero_si128(void)
                           (((imm) >> 6) & 0x3) + 4);                          \
         vreinterpretq_m128i_s16(_shuf);                                       \
     })
-#else  // generic
-#define _mm_shufflehi_epi16(a, imm) _mm_shufflehi_epi16_function((a), (imm))
-#endif
+//#else  // generic
+//#define _mm_shufflehi_epi16(a, imm) _mm_shufflehi_epi16_function((a), (imm))
+//#endif
 
 // FORCE_INLINE __m128i _mm_shufflelo_epi16(__m128i a,
 //                                          __constrange(0,255) int imm)
-#if defined(_sse2neon_shuffle)
+//#if defined(_sse2neon_shuffle)
 #define _mm_shufflelo_epi16(a, imm)                                  \
     __extension__({                                                  \
         int16x8_t _input = vreinterpretq_s16_m128i(a);               \
@@ -5291,9 +4432,9 @@ FORCE_INLINE __m128i _mm_setzero_si128(void)
             (((imm) >> 4) & 0x3), (((imm) >> 6) & 0x3), 4, 5, 6, 7); \
         vreinterpretq_m128i_s16(_shuf);                              \
     })
-#else  // generic
-#define _mm_shufflelo_epi16(a, imm) _mm_shufflelo_epi16_function((a), (imm))
-#endif
+//#else  // generic
+//#define _mm_shufflelo_epi16(a, imm) _mm_shufflelo_epi16_function((a), (imm))
+//#endif
 
 // Shift packed 16-bit integers in a left by count while shifting in zeros, and
 // store the results in dst.
@@ -5384,16 +4525,7 @@ FORCE_INLINE __m128i _mm_slli_epi64(__m128i a, int imm)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_pd
 FORCE_INLINE __m128d _mm_sqrt_pd(__m128d a)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
-    return vreinterpretq_m128d_f64(vsqrtq_f64(vreinterpretq_f64_m128d(a)));
-#else
-    double a0, a1;
-    a0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
-    a1 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
-    double _a0 = sqrt(a0);
-    double _a1 = sqrt(a1);
-    return _mm_set_pd(_a1, _a0);
-#endif
+    return vreinterpretq_m128d_f64(vsqrtq_f64(vreinterpretq_f64_m128d(a)));
 }
 
 // Compute the square root of the lower double-precision (64-bit) floating-point
@@ -5402,14 +4534,7 @@ FORCE_INLINE __m128d _mm_sqrt_pd(__m128d a)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_sd
 FORCE_INLINE __m128d _mm_sqrt_sd(__m128d a, __m128d b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return _mm_move_sd(a, _mm_sqrt_pd(b));
-#else
-    double _a, _b;
-    _a = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
-    _b = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
-    return _mm_set_pd(_a, sqrt(_b));
-#endif
 }
 
 // Shift packed 16-bit integers in a right by count while shifting in sign bits,
@@ -5554,11 +4679,7 @@ FORCE_INLINE __m128i _mm_srl_epi64(__m128i a, __m128i count)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_pd
 FORCE_INLINE void _mm_store_pd(double *mem_addr, __m128d a)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     vst1q_f64((float64_t *) mem_addr, vreinterpretq_f64_m128d(a));
-#else
-    vst1q_f32((float32_t *) mem_addr, vreinterpretq_f32_m128d(a));
-#endif
 }
 
 // Store the lower double-precision (64-bit) floating-point element from a into
@@ -5567,15 +4688,9 @@ FORCE_INLINE void _mm_store_pd(double *mem_addr, __m128d a)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_pd1
 FORCE_INLINE void _mm_store_pd1(double *mem_addr, __m128d a)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     float64x1_t a_low = vget_low_f64(vreinterpretq_f64_m128d(a));
     vst1q_f64((float64_t *) mem_addr,
               vreinterpretq_f64_m128d(vcombine_f64(a_low, a_low)));
-#else
-    float32x2_t a_low = vget_low_f32(vreinterpretq_f32_m128d(a));
-    vst1q_f32((float32_t *) mem_addr,
-              vreinterpretq_f32_m128d(vcombine_f32(a_low, a_low)));
-#endif
 }
 
 // Store the lower double-precision (64-bit) floating-point element from a into
@@ -5583,11 +4698,7 @@ FORCE_INLINE void _mm_store_pd1(double *mem_addr, __m128d a)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_store_sd
 FORCE_INLINE void _mm_store_sd(double *mem_addr, __m128d a)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
-    vst1_f64((float64_t *) mem_addr, vget_low_f64(vreinterpretq_f64_m128d(a)));
-#else
-    vst1_u64((uint64_t *) mem_addr, vget_low_u64(vreinterpretq_u64_m128d(a)));
-#endif
+   vst1_f64((float64_t *) mem_addr, vget_low_f64(vreinterpretq_f64_m128d(a)));
 }
 
 // Store 128-bits of integer data from a into memory. mem_addr must be aligned
@@ -5609,11 +4720,7 @@ FORCE_INLINE void _mm_store_si128(__m128i *p, __m128i a)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeh_pd
 FORCE_INLINE void _mm_storeh_pd(double *mem_addr, __m128d a)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     vst1_f64((float64_t *) mem_addr, vget_high_f64(vreinterpretq_f64_m128d(a)));
-#else
-    vst1_f32((float32_t *) mem_addr, vget_high_f32(vreinterpretq_f32_m128d(a)));
-#endif
 }
 
 // Store 64-bit integer from the first element of a into memory.
@@ -5628,11 +4735,7 @@ FORCE_INLINE void _mm_storel_epi64(__m128i *a, __m128i b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storel_pd
 FORCE_INLINE void _mm_storel_pd(double *mem_addr, __m128d a)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     vst1_f64((float64_t *) mem_addr, vget_low_f64(vreinterpretq_f64_m128d(a)));
-#else
-    vst1_f32((float32_t *) mem_addr, vget_low_f32(vreinterpretq_f32_m128d(a)));
-#endif
 }
 
 // Store 2 double-precision (64-bit) floating-point elements from a into memory
@@ -5679,10 +4782,8 @@ FORCE_INLINE void _mm_stream_pd(double *p, __m128d a)
 {
 #if __has_builtin(__builtin_nontemporal_store)
     __builtin_nontemporal_store(a, (__m128d *) p);
-#elif defined(__aarch64__) || defined(_M_ARM64)
-    vst1q_f64(p, vreinterpretq_f64_m128d(a));
 #else
-    vst1q_s64((int64_t *) p, vreinterpretq_s64_m128d(a));
+    vst1q_f64(p, vreinterpretq_f64_m128d(a));
 #endif
 }
 
@@ -5759,23 +4860,8 @@ FORCE_INLINE __m128i _mm_sub_epi8(__m128i a, __m128i b)
 //  https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sub_pd
 FORCE_INLINE __m128d _mm_sub_pd(__m128d a, __m128d b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128d_f64(
         vsubq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
-#else
-    double a0 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
-    double a1 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
-    double b0 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
-    double b1 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 1));
-    double c[2];
-    c[0] = a0 - b0;
-    c[1] = a1 - b1;
-    return vld1q_f32((float32_t *) c);
-#endif
 }
 
 // Subtract the lower double-precision (64-bit) floating-point element in b from
@@ -5864,15 +4950,8 @@ FORCE_INLINE __m128d _mm_undefined_pd(void)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi16
 FORCE_INLINE __m128i _mm_unpackhi_epi16(__m128i a, __m128i b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128i_s16(
         vzip2q_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)));
-#else
-    int16x4_t a1 = vget_high_s16(vreinterpretq_s16_m128i(a));
-    int16x4_t b1 = vget_high_s16(vreinterpretq_s16_m128i(b));
-    int16x4x2_t result = vzip_s16(a1, b1);
-    return vreinterpretq_m128i_s16(vcombine_s16(result.val[0], result.val[1]));
-#endif
 }
 
 // Unpack and interleave 32-bit integers from the high half of a and b, and
@@ -5880,15 +4959,8 @@ FORCE_INLINE __m128i _mm_unpackhi_epi16(__m128i a, __m128i b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi32
 FORCE_INLINE __m128i _mm_unpackhi_epi32(__m128i a, __m128i b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128i_s32(
         vzip2q_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
-#else
-    int32x2_t a1 = vget_high_s32(vreinterpretq_s32_m128i(a));
-    int32x2_t b1 = vget_high_s32(vreinterpretq_s32_m128i(b));
-    int32x2x2_t result = vzip_s32(a1, b1);
-    return vreinterpretq_m128i_s32(vcombine_s32(result.val[0], result.val[1]));
-#endif
 }
 
 // Unpack and interleave 64-bit integers from the high half of a and b, and
@@ -5896,14 +4968,8 @@ FORCE_INLINE __m128i _mm_unpackhi_epi32(__m128i a, __m128i b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi64
 FORCE_INLINE __m128i _mm_unpackhi_epi64(__m128i a, __m128i b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128i_s64(
         vzip2q_s64(vreinterpretq_s64_m128i(a), vreinterpretq_s64_m128i(b)));
-#else
-    int64x1_t a_h = vget_high_s64(vreinterpretq_s64_m128i(a));
-    int64x1_t b_h = vget_high_s64(vreinterpretq_s64_m128i(b));
-    return vreinterpretq_m128i_s64(vcombine_s64(a_h, b_h));
-#endif
 }
 
 // Unpack and interleave 8-bit integers from the high half of a and b, and store
@@ -5911,17 +4977,8 @@ FORCE_INLINE __m128i _mm_unpackhi_epi64(__m128i a, __m128i b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi8
 FORCE_INLINE __m128i _mm_unpackhi_epi8(__m128i a, __m128i b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128i_s8(
         vzip2q_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b)));
-#else
-    int8x8_t a1 =
-        vreinterpret_s8_s16(vget_high_s16(vreinterpretq_s16_m128i(a)));
-    int8x8_t b1 =
-        vreinterpret_s8_s16(vget_high_s16(vreinterpretq_s16_m128i(b)));
-    int8x8x2_t result = vzip_s8(a1, b1);
-    return vreinterpretq_m128i_s8(vcombine_s8(result.val[0], result.val[1]));
-#endif
 }
 
 // Unpack and interleave double-precision (64-bit) floating-point elements from
@@ -5929,14 +4986,8 @@ FORCE_INLINE __m128i _mm_unpackhi_epi8(__m128i a, __m128i b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_pd
 FORCE_INLINE __m128d _mm_unpackhi_pd(__m128d a, __m128d b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128d_f64(
         vzip2q_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
-#else
-    return vreinterpretq_m128d_s64(
-        vcombine_s64(vget_high_s64(vreinterpretq_s64_m128d(a)),
-                     vget_high_s64(vreinterpretq_s64_m128d(b))));
-#endif
 }
 
 // Unpack and interleave 16-bit integers from the low half of a and b, and store
@@ -5944,15 +4995,8 @@ FORCE_INLINE __m128d _mm_unpackhi_pd(__m128d a, __m128d b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi16
 FORCE_INLINE __m128i _mm_unpacklo_epi16(__m128i a, __m128i b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128i_s16(
         vzip1q_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)));
-#else
-    int16x4_t a1 = vget_low_s16(vreinterpretq_s16_m128i(a));
-    int16x4_t b1 = vget_low_s16(vreinterpretq_s16_m128i(b));
-    int16x4x2_t result = vzip_s16(a1, b1);
-    return vreinterpretq_m128i_s16(vcombine_s16(result.val[0], result.val[1]));
-#endif
 }
 
 // Unpack and interleave 32-bit integers from the low half of a and b, and store
@@ -5960,15 +5004,8 @@ FORCE_INLINE __m128i _mm_unpacklo_epi16(__m128i a, __m128i b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi32
 FORCE_INLINE __m128i _mm_unpacklo_epi32(__m128i a, __m128i b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128i_s32(
         vzip1q_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
-#else
-    int32x2_t a1 = vget_low_s32(vreinterpretq_s32_m128i(a));
-    int32x2_t b1 = vget_low_s32(vreinterpretq_s32_m128i(b));
-    int32x2x2_t result = vzip_s32(a1, b1);
-    return vreinterpretq_m128i_s32(vcombine_s32(result.val[0], result.val[1]));
-#endif
 }
 
 // Unpack and interleave 64-bit integers from the low half of a and b, and store
@@ -5976,14 +5013,8 @@ FORCE_INLINE __m128i _mm_unpacklo_epi32(__m128i a, __m128i b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi64
 FORCE_INLINE __m128i _mm_unpacklo_epi64(__m128i a, __m128i b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128i_s64(
         vzip1q_s64(vreinterpretq_s64_m128i(a), vreinterpretq_s64_m128i(b)));
-#else
-    int64x1_t a_l = vget_low_s64(vreinterpretq_s64_m128i(a));
-    int64x1_t b_l = vget_low_s64(vreinterpretq_s64_m128i(b));
-    return vreinterpretq_m128i_s64(vcombine_s64(a_l, b_l));
-#endif
 }
 
 // Unpack and interleave 8-bit integers from the low half of a and b, and store
@@ -5991,15 +5022,8 @@ FORCE_INLINE __m128i _mm_unpacklo_epi64(__m128i a, __m128i b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi8
 FORCE_INLINE __m128i _mm_unpacklo_epi8(__m128i a, __m128i b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128i_s8(
         vzip1q_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b)));
-#else
-    int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(vreinterpretq_s16_m128i(a)));
-    int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(vreinterpretq_s16_m128i(b)));
-    int8x8x2_t result = vzip_s8(a1, b1);
-    return vreinterpretq_m128i_s8(vcombine_s8(result.val[0], result.val[1]));
-#endif
 }
 
 // Unpack and interleave double-precision (64-bit) floating-point elements from
@@ -6007,14 +5031,8 @@ FORCE_INLINE __m128i _mm_unpacklo_epi8(__m128i a, __m128i b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_pd
 FORCE_INLINE __m128d _mm_unpacklo_pd(__m128d a, __m128d b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128d_f64(
         vzip1q_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
-#else
-    return vreinterpretq_m128d_s64(
-        vcombine_s64(vget_low_s64(vreinterpretq_s64_m128d(a)),
-                     vget_low_s64(vreinterpretq_s64_m128d(b))));
-#endif
 }
 
 // Compute the bitwise XOR of packed double-precision (64-bit) floating-point
@@ -6044,13 +5062,9 @@ FORCE_INLINE __m128i _mm_xor_si128(__m128i a, __m128i b)
 FORCE_INLINE __m128d _mm_addsub_pd(__m128d a, __m128d b)
 {
     _sse2neon_const __m128d mask = _mm_set_pd(1.0f, -1.0f);
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128d_f64(vfmaq_f64(vreinterpretq_f64_m128d(a),
                                              vreinterpretq_f64_m128d(b),
                                              vreinterpretq_f64_m128d(mask)));
-#else
-    return _mm_add_pd(_mm_mul_pd(b, mask), a);
-#endif
 }
 
 // Alternatively add and subtract packed single-precision (32-bit)
@@ -6060,14 +5074,9 @@ FORCE_INLINE __m128d _mm_addsub_pd(__m128d a, __m128d b)
 FORCE_INLINE __m128 _mm_addsub_ps(__m128 a, __m128 b)
 {
     _sse2neon_const __m128 mask = _mm_setr_ps(-1.0f, 1.0f, -1.0f, 1.0f);
-#if (defined(__aarch64__) || defined(_M_ARM64)) || \
-    defined(__ARM_FEATURE_FMA) /* VFPv4+ */
     return vreinterpretq_m128_f32(vfmaq_f32(vreinterpretq_f32_m128(a),
                                             vreinterpretq_f32_m128(mask),
                                             vreinterpretq_f32_m128(b)));
-#else
-    return _mm_add_ps(_mm_mul_ps(b, mask), a);
-#endif
 }
 
 // Horizontally add adjacent pairs of double-precision (64-bit) floating-point
@@ -6075,21 +5084,8 @@ FORCE_INLINE __m128 _mm_addsub_ps(__m128 a, __m128 b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_pd
 FORCE_INLINE __m128d _mm_hadd_pd(__m128d a, __m128d b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128d_f64(
         vpaddq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)));
-#else
-    double a0 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
-    double a1 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
-    double b0 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
-    double b1 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 1));
-    double c[] = {a0 + a1, b0 + b1};
-    return vreinterpretq_m128d_u64(vld1q_u64((uint64_t *) c));
-#endif
 }
 
 // Horizontally add adjacent pairs of single-precision (32-bit) floating-point
@@ -6097,17 +5093,8 @@ FORCE_INLINE __m128d _mm_hadd_pd(__m128d a, __m128d b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_ps
 FORCE_INLINE __m128 _mm_hadd_ps(__m128 a, __m128 b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128_f32(
         vpaddq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
-#else
-    float32x2_t a10 = vget_low_f32(vreinterpretq_f32_m128(a));
-    float32x2_t a32 = vget_high_f32(vreinterpretq_f32_m128(a));
-    float32x2_t b10 = vget_low_f32(vreinterpretq_f32_m128(b));
-    float32x2_t b32 = vget_high_f32(vreinterpretq_f32_m128(b));
-    return vreinterpretq_m128_f32(
-        vcombine_f32(vpadd_f32(a10, a32), vpadd_f32(b10, b32)));
-#endif
 }
 
 // Horizontally subtract adjacent pairs of double-precision (64-bit)
@@ -6115,23 +5102,10 @@ FORCE_INLINE __m128 _mm_hadd_ps(__m128 a, __m128 b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_pd
 FORCE_INLINE __m128d _mm_hsub_pd(__m128d a, __m128d b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     float64x2_t _a = vreinterpretq_f64_m128d(a);
     float64x2_t _b = vreinterpretq_f64_m128d(b);
     return vreinterpretq_m128d_f64(
         vsubq_f64(vuzp1q_f64(_a, _b), vuzp2q_f64(_a, _b)));
-#else
-    double a0 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
-    double a1 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
-    double b0 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
-    double b1 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 1));
-    double c[] = {a0 - a1, b0 - b1};
-    return vreinterpretq_m128d_u64(vld1q_u64((uint64_t *) c));
-#endif
 }
 
 // Horizontally subtract adjacent pairs of single-precision (32-bit)
@@ -6141,13 +5115,8 @@ FORCE_INLINE __m128 _mm_hsub_ps(__m128 _a, __m128 _b)
 {
     float32x4_t a = vreinterpretq_f32_m128(_a);
     float32x4_t b = vreinterpretq_f32_m128(_b);
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128_f32(
         vsubq_f32(vuzp1q_f32(a, b), vuzp2q_f32(a, b)));
-#else
-    float32x4x2_t c = vuzpq_f32(a, b);
-    return vreinterpretq_m128_f32(vsubq_f32(c.val[0], c.val[1]));
-#endif
 }
 
 // Load 128-bits of integer data from unaligned memory into dst. This intrinsic
@@ -6166,13 +5135,8 @@ FORCE_INLINE __m128 _mm_hsub_ps(__m128 _a, __m128 _b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movedup_pd
 FORCE_INLINE __m128d _mm_movedup_pd(__m128d a)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128d_f64(
         vdupq_laneq_f64(vreinterpretq_f64_m128d(a), 0));
-#else
-    return vreinterpretq_m128d_u64(
-        vdupq_n_u64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0)));
-#endif
 }
 
 // Duplicate odd-indexed single-precision (32-bit) floating-point elements
@@ -6180,18 +5144,8 @@ FORCE_INLINE __m128d _mm_movedup_pd(__m128d a)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movehdup_ps
 FORCE_INLINE __m128 _mm_movehdup_ps(__m128 a)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128_f32(
         vtrn2q_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a)));
-#elif defined(_sse2neon_shuffle)
-    return vreinterpretq_m128_f32(vshuffleq_s32(
-        vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a), 1, 1, 3, 3));
-#else
-    float32_t a1 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 1);
-    float32_t a3 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 3);
-    float ALIGN_STRUCT(16) data[4] = {a1, a1, a3, a3};
-    return vreinterpretq_m128_f32(vld1q_f32(data));
-#endif
 }
 
 // Duplicate even-indexed single-precision (32-bit) floating-point elements
@@ -6199,18 +5153,8 @@ FORCE_INLINE __m128 _mm_movehdup_ps(__m128 a)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_moveldup_ps
 FORCE_INLINE __m128 _mm_moveldup_ps(__m128 a)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128_f32(
         vtrn1q_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a)));
-#elif defined(_sse2neon_shuffle)
-    return vreinterpretq_m128_f32(vshuffleq_s32(
-        vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a), 0, 0, 2, 2));
-#else
-    float32_t a0 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 0);
-    float32_t a2 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 2);
-    float ALIGN_STRUCT(16) data[4] = {a0, a0, a2, a2};
-    return vreinterpretq_m128_f32(vld1q_f32(data));
-#endif
 }
 
 /* SSSE3 */
@@ -6327,13 +5271,7 @@ FORCE_INLINE __m128i _mm_hadd_epi16(__m128i _a, __m128i _b)
 {
     int16x8_t a = vreinterpretq_s16_m128i(_a);
     int16x8_t b = vreinterpretq_s16_m128i(_b);
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128i_s16(vpaddq_s16(a, b));
-#else
-    return vreinterpretq_m128i_s16(
-        vcombine_s16(vpadd_s16(vget_low_s16(a), vget_high_s16(a)),
-                     vpadd_s16(vget_low_s16(b), vget_high_s16(b))));
-#endif
 }
 
 // Horizontally add adjacent pairs of 32-bit integers in a and b, and pack the
@@ -6343,13 +5281,7 @@ FORCE_INLINE __m128i _mm_hadd_epi32(__m128i _a, __m128i _b)
 {
     int32x4_t a = vreinterpretq_s32_m128i(_a);
     int32x4_t b = vreinterpretq_s32_m128i(_b);
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128i_s32(vpaddq_s32(a, b));
-#else
-    return vreinterpretq_m128i_s32(
-        vcombine_s32(vpadd_s32(vget_low_s32(a), vget_high_s32(a)),
-                     vpadd_s32(vget_low_s32(b), vget_high_s32(b))));
-#endif
 }
 
 // Horizontally add adjacent pairs of 16-bit integers in a and b, and pack the
@@ -6375,22 +5307,10 @@ FORCE_INLINE __m64 _mm_hadd_pi32(__m64 a, __m64 b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadds_epi16
 FORCE_INLINE __m128i _mm_hadds_epi16(__m128i _a, __m128i _b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     int16x8_t a = vreinterpretq_s16_m128i(_a);
     int16x8_t b = vreinterpretq_s16_m128i(_b);
     return vreinterpretq_s64_s16(
         vqaddq_s16(vuzp1q_s16(a, b), vuzp2q_s16(a, b)));
-#else
-    int32x4_t a = vreinterpretq_s32_m128i(_a);
-    int32x4_t b = vreinterpretq_s32_m128i(_b);
-    // Interleave using vshrn/vmovn
-    // [a0|a2|a4|a6|b0|b2|b4|b6]
-    // [a1|a3|a5|a7|b1|b3|b5|b7]
-    int16x8_t ab0246 = vcombine_s16(vmovn_s32(a), vmovn_s32(b));
-    int16x8_t ab1357 = vcombine_s16(vshrn_n_s32(a, 16), vshrn_n_s32(b, 16));
-    // Saturated add
-    return vreinterpretq_m128i_s16(vqaddq_s16(ab0246, ab1357));
-#endif
 }
 
 // Horizontally add adjacent pairs of signed 16-bit integers in a and b using
@@ -6400,12 +5320,7 @@ FORCE_INLINE __m64 _mm_hadds_pi16(__m64 _a, __m64 _b)
 {
     int16x4_t a = vreinterpret_s16_m64(_a);
     int16x4_t b = vreinterpret_s16_m64(_b);
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpret_s64_s16(vqadd_s16(vuzp1_s16(a, b), vuzp2_s16(a, b)));
-#else
-    int16x4x2_t res = vuzp_s16(a, b);
-    return vreinterpret_s64_s16(vqadd_s16(res.val[0], res.val[1]));
-#endif
 }
 
 // Horizontally subtract adjacent pairs of 16-bit integers in a and b, and pack
@@ -6415,13 +5330,8 @@ FORCE_INLINE __m128i _mm_hsub_epi16(__m128i _a, __m128i _b)
 {
     int16x8_t a = vreinterpretq_s16_m128i(_a);
     int16x8_t b = vreinterpretq_s16_m128i(_b);
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128i_s16(
         vsubq_s16(vuzp1q_s16(a, b), vuzp2q_s16(a, b)));
-#else
-    int16x8x2_t c = vuzpq_s16(a, b);
-    return vreinterpretq_m128i_s16(vsubq_s16(c.val[0], c.val[1]));
-#endif
 }
 
 // Horizontally subtract adjacent pairs of 32-bit integers in a and b, and pack
@@ -6431,13 +5341,8 @@ FORCE_INLINE __m128i _mm_hsub_epi32(__m128i _a, __m128i _b)
 {
     int32x4_t a = vreinterpretq_s32_m128i(_a);
     int32x4_t b = vreinterpretq_s32_m128i(_b);
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128i_s32(
         vsubq_s32(vuzp1q_s32(a, b), vuzp2q_s32(a, b)));
-#else
-    int32x4x2_t c = vuzpq_s32(a, b);
-    return vreinterpretq_m128i_s32(vsubq_s32(c.val[0], c.val[1]));
-#endif
 }
 
 // Horizontally subtract adjacent pairs of 16-bit integers in a and b, and pack
@@ -6447,12 +5352,7 @@ FORCE_INLINE __m64 _mm_hsub_pi16(__m64 _a, __m64 _b)
 {
     int16x4_t a = vreinterpret_s16_m64(_a);
     int16x4_t b = vreinterpret_s16_m64(_b);
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpret_m64_s16(vsub_s16(vuzp1_s16(a, b), vuzp2_s16(a, b)));
-#else
-    int16x4x2_t c = vuzp_s16(a, b);
-    return vreinterpret_m64_s16(vsub_s16(c.val[0], c.val[1]));
-#endif
 }
 
 // Horizontally subtract adjacent pairs of 32-bit integers in a and b, and pack
@@ -6462,12 +5362,7 @@ FORCE_INLINE __m64 _mm_hsub_pi32(__m64 _a, __m64 _b)
 {
     int32x2_t a = vreinterpret_s32_m64(_a);
     int32x2_t b = vreinterpret_s32_m64(_b);
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpret_m64_s32(vsub_s32(vuzp1_s32(a, b), vuzp2_s32(a, b)));
-#else
-    int32x2x2_t c = vuzp_s32(a, b);
-    return vreinterpret_m64_s32(vsub_s32(c.val[0], c.val[1]));
-#endif
 }
 
 // Horizontally subtract adjacent pairs of signed 16-bit integers in a and b
@@ -6477,13 +5372,8 @@ FORCE_INLINE __m128i _mm_hsubs_epi16(__m128i _a, __m128i _b)
 {
     int16x8_t a = vreinterpretq_s16_m128i(_a);
     int16x8_t b = vreinterpretq_s16_m128i(_b);
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128i_s16(
         vqsubq_s16(vuzp1q_s16(a, b), vuzp2q_s16(a, b)));
-#else
-    int16x8x2_t c = vuzpq_s16(a, b);
-    return vreinterpretq_m128i_s16(vqsubq_s16(c.val[0], c.val[1]));
-#endif
 }
 
 // Horizontally subtract adjacent pairs of signed 16-bit integers in a and b
@@ -6493,12 +5383,7 @@ FORCE_INLINE __m64 _mm_hsubs_pi16(__m64 _a, __m64 _b)
 {
     int16x4_t a = vreinterpret_s16_m64(_a);
     int16x4_t b = vreinterpret_s16_m64(_b);
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpret_m64_s16(vqsub_s16(vuzp1_s16(a, b), vuzp2_s16(a, b)));
-#else
-    int16x4x2_t c = vuzp_s16(a, b);
-    return vreinterpret_m64_s16(vqsub_s16(c.val[0], c.val[1]));
-#endif
 }
 
 // Vertically multiply each unsigned 8-bit integer from a with the corresponding
@@ -6508,7 +5393,6 @@ FORCE_INLINE __m64 _mm_hsubs_pi16(__m64 _a, __m64 _b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maddubs_epi16
 FORCE_INLINE __m128i _mm_maddubs_epi16(__m128i _a, __m128i _b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     uint8x16_t a = vreinterpretq_u8_m128i(_a);
     int8x16_t b = vreinterpretq_s8_m128i(_b);
     int16x8_t tl = vmulq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(a))),
@@ -6517,27 +5401,6 @@ FORCE_INLINE __m128i _mm_maddubs_epi16(__m128i _a, __m128i _b)
                              vmovl_s8(vget_high_s8(b)));
     return vreinterpretq_m128i_s16(
         vqaddq_s16(vuzp1q_s16(tl, th), vuzp2q_s16(tl, th)));
-#else
-    // This would be much simpler if x86 would choose to zero extend OR sign
-    // extend, not both. This could probably be optimized better.
-    uint16x8_t a = vreinterpretq_u16_m128i(_a);
-    int16x8_t b = vreinterpretq_s16_m128i(_b);
-
-    // Zero extend a
-    int16x8_t a_odd = vreinterpretq_s16_u16(vshrq_n_u16(a, 8));
-    int16x8_t a_even = vreinterpretq_s16_u16(vbicq_u16(a, vdupq_n_u16(0xff00)));
-
-    // Sign extend by shifting left then shifting right.
-    int16x8_t b_even = vshrq_n_s16(vshlq_n_s16(b, 8), 8);
-    int16x8_t b_odd = vshrq_n_s16(b, 8);
-
-    // multiply
-    int16x8_t prod1 = vmulq_s16(a_even, b_even);
-    int16x8_t prod2 = vmulq_s16(a_odd, b_odd);
-
-    // saturated add
-    return vreinterpretq_m128i_s16(vqaddq_s16(prod1, prod2));
-#endif
 }
 
 // Vertically multiply each unsigned 8-bit integer from a with the corresponding
@@ -6612,25 +5475,7 @@ FORCE_INLINE __m128i _mm_shuffle_epi8(__m128i a, __m128i b)
     uint8x16_t idx = vreinterpretq_u8_m128i(b);  // input b
     uint8x16_t idx_masked =
         vandq_u8(idx, vdupq_n_u8(0x8F));  // avoid using meaningless bits
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128i_s8(vqtbl1q_s8(tbl, idx_masked));
-#elif defined(__GNUC__)
-    int8x16_t ret;
-    // %e and %f represent the even and odd D registers
-    // respectively.
-    __asm__ __volatile__(
-        "vtbl.8  %e[ret], {%e[tbl], %f[tbl]}, %e[idx]\n"
-        "vtbl.8  %f[ret], {%e[tbl], %f[tbl]}, %f[idx]\n"
-        : [ret] "=&w"(ret)
-        : [tbl] "w"(tbl), [idx] "w"(idx_masked));
-    return vreinterpretq_m128i_s8(ret);
-#else
-    // use this line if testing on aarch64
-    int8x8x2_t a_split = {vget_low_s8(tbl), vget_high_s8(tbl)};
-    return vreinterpretq_m128i_s8(
-        vcombine_s8(vtbl2_s8(a_split, vget_low_u8(idx_masked)),
-                    vtbl2_s8(a_split, vget_high_u8(idx_masked))));
-#endif
 }
 
 // Shuffle packed 8-bit integers in a according to shuffle control mask in the
@@ -6658,11 +5503,7 @@ FORCE_INLINE __m128i _mm_sign_epi16(__m128i _a, __m128i _b)
     // (b < 0) ? 0xFFFF : 0
     uint16x8_t ltMask = vreinterpretq_u16_s16(vshrq_n_s16(b, 15));
     // (b == 0) ? 0xFFFF : 0
-#if defined(__aarch64__) || defined(_M_ARM64)
     int16x8_t zeroMask = vreinterpretq_s16_u16(vceqzq_s16(b));
-#else
-    int16x8_t zeroMask = vreinterpretq_s16_u16(vceqq_s16(b, vdupq_n_s16(0)));
-#endif
 
     // bitwise select either a or negative 'a' (vnegq_s16(a) equals to negative
     // 'a') based on ltMask
@@ -6687,11 +5528,7 @@ FORCE_INLINE __m128i _mm_sign_epi32(__m128i _a, __m128i _b)
     uint32x4_t ltMask = vreinterpretq_u32_s32(vshrq_n_s32(b, 31));
 
     // (b == 0) ? 0xFFFFFFFF : 0
-#if defined(__aarch64__) || defined(_M_ARM64)
     int32x4_t zeroMask = vreinterpretq_s32_u32(vceqzq_s32(b));
-#else
-    int32x4_t zeroMask = vreinterpretq_s32_u32(vceqq_s32(b, vdupq_n_s32(0)));
-#endif
 
     // bitwise select either a or negative 'a' (vnegq_s32(a) equals to negative
     // 'a') based on ltMask
@@ -6716,11 +5553,7 @@ FORCE_INLINE __m128i _mm_sign_epi8(__m128i _a, __m128i _b)
     uint8x16_t ltMask = vreinterpretq_u8_s8(vshrq_n_s8(b, 7));
 
     // (b == 0) ? 0xFF : 0
-#if defined(__aarch64__) || defined(_M_ARM64)
     int8x16_t zeroMask = vreinterpretq_s8_u8(vceqzq_s8(b));
-#else
-    int8x16_t zeroMask = vreinterpretq_s8_u8(vceqq_s8(b, vdupq_n_s8(0)));
-#endif
 
     // bitwise select either a or negative 'a' (vnegq_s8(a) return negative 'a')
     // based on ltMask
@@ -6745,11 +5578,7 @@ FORCE_INLINE __m64 _mm_sign_pi16(__m64 _a, __m64 _b)
     uint16x4_t ltMask = vreinterpret_u16_s16(vshr_n_s16(b, 15));
 
     // (b == 0) ? 0xFFFF : 0
-#if defined(__aarch64__) || defined(_M_ARM64)
     int16x4_t zeroMask = vreinterpret_s16_u16(vceqz_s16(b));
-#else
-    int16x4_t zeroMask = vreinterpret_s16_u16(vceq_s16(b, vdup_n_s16(0)));
-#endif
 
     // bitwise select either a or negative 'a' (vneg_s16(a) return negative 'a')
     // based on ltMask
@@ -6774,11 +5603,7 @@ FORCE_INLINE __m64 _mm_sign_pi32(__m64 _a, __m64 _b)
     uint32x2_t ltMask = vreinterpret_u32_s32(vshr_n_s32(b, 31));
 
     // (b == 0) ? 0xFFFFFFFF : 0
-#if defined(__aarch64__) || defined(_M_ARM64)
     int32x2_t zeroMask = vreinterpret_s32_u32(vceqz_s32(b));
-#else
-    int32x2_t zeroMask = vreinterpret_s32_u32(vceq_s32(b, vdup_n_s32(0)));
-#endif
 
     // bitwise select either a or negative 'a' (vneg_s32(a) return negative 'a')
     // based on ltMask
@@ -6803,11 +5628,7 @@ FORCE_INLINE __m64 _mm_sign_pi8(__m64 _a, __m64 _b)
     uint8x8_t ltMask = vreinterpret_u8_s8(vshr_n_s8(b, 7));
 
     // (b == 0) ? 0xFF : 0
-#if defined(__aarch64__) || defined(_M_ARM64)
     int8x8_t zeroMask = vreinterpret_s8_u8(vceqz_s8(b));
-#else
-    int8x8_t zeroMask = vreinterpret_s8_u8(vceq_s8(b, vdup_n_s8(0)));
-#endif
 
     // bitwise select either a or negative 'a' (vneg_s8(a) return negative 'a')
     // based on ltMask
@@ -6892,15 +5713,9 @@ FORCE_INLINE __m128d _mm_blendv_pd(__m128d _a, __m128d _b, __m128d _mask)
 {
     uint64x2_t mask =
         vreinterpretq_u64_s64(vshrq_n_s64(vreinterpretq_s64_m128d(_mask), 63));
-#if defined(__aarch64__) || defined(_M_ARM64)
     float64x2_t a = vreinterpretq_f64_m128d(_a);
     float64x2_t b = vreinterpretq_f64_m128d(_b);
     return vreinterpretq_m128d_f64(vbslq_f64(mask, b, a));
-#else
-    uint64x2_t a = vreinterpretq_u64_m128d(_a);
-    uint64x2_t b = vreinterpretq_u64_m128d(_b);
-    return vreinterpretq_m128d_u64(vbslq_u64(mask, b, a));
-#endif
 }
 
 // Blend packed single-precision (32-bit) floating-point elements from a and b
@@ -6922,14 +5737,7 @@ FORCE_INLINE __m128 _mm_blendv_ps(__m128 _a, __m128 _b, __m128 _mask)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ceil_pd
 FORCE_INLINE __m128d _mm_ceil_pd(__m128d a)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128d_f64(vrndpq_f64(vreinterpretq_f64_m128d(a)));
-#else
-    double a0, a1;
-    a0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
-    a1 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
-    return _mm_set_pd(ceil(a1), ceil(a0));
-#endif
 }
 
 // Round the packed single-precision (32-bit) floating-point elements in a up to
@@ -6938,13 +5746,7 @@ FORCE_INLINE __m128d _mm_ceil_pd(__m128d a)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ceil_ps
 FORCE_INLINE __m128 _mm_ceil_ps(__m128 a)
 {
-#if (defined(__aarch64__) || defined(_M_ARM64)) || \
-    defined(__ARM_FEATURE_DIRECTED_ROUNDING)
     return vreinterpretq_m128_f32(vrndpq_f32(vreinterpretq_f32_m128(a)));
-#else
-    float *f = (float *) &a;
-    return _mm_set_ps(ceilf(f[3]), ceilf(f[2]), ceilf(f[1]), ceilf(f[0]));
-#endif
 }
 
 // Round the lower double-precision (64-bit) floating-point element in b up to
@@ -6971,17 +5773,8 @@ FORCE_INLINE __m128 _mm_ceil_ss(__m128 a, __m128 b)
 // in dst
 FORCE_INLINE __m128i _mm_cmpeq_epi64(__m128i a, __m128i b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128i_u64(
         vceqq_u64(vreinterpretq_u64_m128i(a), vreinterpretq_u64_m128i(b)));
-#else
-    // ARMv7 lacks vceqq_u64
-    // (a == b) -> (a_lo == b_lo) && (a_hi == b_hi)
-    uint32x4_t cmp =
-        vceqq_u32(vreinterpretq_u32_m128i(a), vreinterpretq_u32_m128i(b));
-    uint32x4_t swapped = vrev64q_u32(cmp);
-    return vreinterpretq_m128i_u32(vandq_u32(cmp, swapped));
-#endif
 }
 
 // Sign extend packed 16-bit integers in a to packed 32-bit integers, and store
@@ -7128,37 +5921,17 @@ FORCE_INLINE __m128d _mm_dp_pd(__m128d a, __m128d b, const int imm)
         _mm_castsi128_pd(_mm_set_epi64x(bit5Mask, bit4Mask));
     __m128d tmp = _mm_and_pd(mul, mulMask);
 #else
-#if defined(__aarch64__) || defined(_M_ARM64)
     double d0 = (imm & 0x10) ? vgetq_lane_f64(vreinterpretq_f64_m128d(a), 0) *
                                    vgetq_lane_f64(vreinterpretq_f64_m128d(b), 0)
                              : 0;
     double d1 = (imm & 0x20) ? vgetq_lane_f64(vreinterpretq_f64_m128d(a), 1) *
                                    vgetq_lane_f64(vreinterpretq_f64_m128d(b), 1)
                              : 0;
-#else
-    double a0 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
-    double a1 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
-    double b0 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
-    double b1 =
-        sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 1));
-    double d0 = (imm & 0x10) ? a0 * b0 : 0;
-    double d1 = (imm & 0x20) ? a1 * b1 : 0;
-#endif
     __m128d tmp = _mm_set_pd(d1, d0);
 #endif
     // Sum the products
-#if defined(__aarch64__) || defined(_M_ARM64)
     double sum = vpaddd_f64(vreinterpretq_f64_m128d(tmp));
-#else
-    double _tmp0 = sse2neon_recast_u64_f64(
-        vgetq_lane_u64(vreinterpretq_u64_m128d(tmp), 0));
-    double _tmp1 = sse2neon_recast_u64_f64(
-        vgetq_lane_u64(vreinterpretq_u64_m128d(tmp), 1));
-    double sum = _tmp0 + _tmp1;
-#endif
+
     // Conditionally store the sum
     const __m128d sumMask =
         _mm_castsi128_pd(_mm_set_epi64x(bit1Mask, bit0Mask));
@@ -7174,8 +5947,7 @@ FORCE_INLINE __m128 _mm_dp_ps(__m128 a, __m128 b, const int imm)
 {
     float32x4_t elementwise_prod = _mm_mul_ps(a, b);
 
-#if defined(__aarch64__) || defined(_M_ARM64)
-    /* shortcuts */
+   /* shortcuts */
     if (imm == 0xFF) {
         return _mm_set1_ps(vaddvq_f32(elementwise_prod));
     }
@@ -7192,7 +5964,6 @@ FORCE_INLINE __m128 _mm_dp_ps(__m128 a, __m128 b, const int imm)
 
         return _mm_set1_ps(vaddvq_f32(elementwise_prod));
     }
-#endif
 
     float s = 0.0f;
 
@@ -7244,14 +6015,7 @@ FORCE_INLINE __m128 _mm_dp_ps(__m128 a, __m128 b, const int imm)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_floor_pd
 FORCE_INLINE __m128d _mm_floor_pd(__m128d a)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128d_f64(vrndmq_f64(vreinterpretq_f64_m128d(a)));
-#else
-    double a0, a1;
-    a0 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
-    a1 = sse2neon_recast_u64_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
-    return _mm_set_pd(floor(a1), floor(a0));
-#endif
 }
 
 // Round the packed single-precision (32-bit) floating-point elements in a down
@@ -7260,13 +6024,7 @@ FORCE_INLINE __m128d _mm_floor_pd(__m128d a)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_floor_ps
 FORCE_INLINE __m128 _mm_floor_ps(__m128 a)
 {
-#if (defined(__aarch64__) || defined(_M_ARM64)) || \
-    defined(__ARM_FEATURE_DIRECTED_ROUNDING)
     return vreinterpretq_m128_f32(vrndmq_f32(vreinterpretq_f32_m128(a)));
-#else
-    float *f = (float *) &a;
-    return _mm_set_ps(floorf(f[3]), floorf(f[2]), floorf(f[1]), floorf(f[0]));
-#endif
 }
 
 // Round the lower double-precision (64-bit) floating-point element in b down to
@@ -7418,7 +6176,6 @@ FORCE_INLINE __m128i _mm_minpos_epu16(__m128i a)
 {
     __m128i dst;
     uint16_t min, idx = 0;
-#if defined(__aarch64__) || defined(_M_ARM64)
     // Find the minimum value
     min = vminvq_u16(vreinterpretq_u16_m128i(a));
 
@@ -7427,27 +6184,7 @@ FORCE_INLINE __m128i _mm_minpos_epu16(__m128i a)
     uint16x8_t minv = vdupq_n_u16(min);
     uint16x8_t cmeq = vceqq_u16(minv, vreinterpretq_u16_m128i(a));
     idx = vminvq_u16(vornq_u16(vld1q_u16(idxv), cmeq));
-#else
-    // Find the minimum value
-    __m64 tmp;
-    tmp = vreinterpret_m64_u16(
-        vmin_u16(vget_low_u16(vreinterpretq_u16_m128i(a)),
-                 vget_high_u16(vreinterpretq_u16_m128i(a))));
-    tmp = vreinterpret_m64_u16(
-        vpmin_u16(vreinterpret_u16_m64(tmp), vreinterpret_u16_m64(tmp)));
-    tmp = vreinterpret_m64_u16(
-        vpmin_u16(vreinterpret_u16_m64(tmp), vreinterpret_u16_m64(tmp)));
-    min = vget_lane_u16(vreinterpret_u16_m64(tmp), 0);
-    // Get the index of the minimum value
-    int i;
-    for (i = 0; i < 8; i++) {
-        if (min == vgetq_lane_u16(vreinterpretq_u16_m128i(a), 0)) {
-            idx = (uint16_t) i;
-            break;
-        }
-        a = _mm_srli_si128(a, 2);
-    }
-#endif
+
     // Generate result
     dst = _mm_setzero_si128();
     dst = vreinterpretq_m128i_u16(
@@ -7478,11 +6215,11 @@ FORCE_INLINE __m128i _mm_mpsadbw_epu8(__m128i a, __m128i b, const int imm)
                                             vreinterpretq_u32_m128i(a), 1));
         break;
     default:
-#if defined(__GNUC__) || defined(__clang__)
+//if defined(__GNUC__) || defined(__clang__)
         __builtin_unreachable();
-#elif defined(_MSC_VER)
-        __assume(0);
-#endif
+//#elif defined(_MSC_VER)
+//        __assume(0);
+//#endif
         break;
     }
 
@@ -7504,11 +6241,11 @@ FORCE_INLINE __m128i _mm_mpsadbw_epu8(__m128i a, __m128i b, const int imm)
             vdupq_n_u32(vgetq_lane_u32(vreinterpretq_u32_m128i(b), 3)));
         break;
     default:
-#if defined(__GNUC__) || defined(__clang__)
+//#if defined(__GNUC__) || defined(__clang__)
         __builtin_unreachable();
-#elif defined(_MSC_VER)
-        __assume(0);
-#endif
+//#elif defined(_MSC_VER)
+//        __assume(0);
+//#endif
         break;
     }
 
@@ -7521,7 +6258,7 @@ FORCE_INLINE __m128i _mm_mpsadbw_epu8(__m128i a, __m128i b, const int imm)
     c26 = vreinterpretq_s16_u16(vabdl_u8(vget_low_u8(_a_2), low_b));
     uint8x16_t _a_3 = vextq_u8(_a, _a, 3);
     c37 = vreinterpretq_s16_u16(vabdl_u8(vget_low_u8(_a_3), low_b));
-#if defined(__aarch64__) || defined(_M_ARM64)
+
     // |0|4|2|6|
     c04 = vpaddq_s16(c04, c26);
     // |1|5|3|7|
@@ -7533,16 +6270,6 @@ FORCE_INLINE __m128i _mm_mpsadbw_epu8(__m128i a, __m128i b, const int imm)
         vtrn2q_s32(vreinterpretq_s32_s16(c04), vreinterpretq_s32_s16(c15));
     return vreinterpretq_m128i_s16(vpaddq_s16(vreinterpretq_s16_s32(trn1_c),
                                               vreinterpretq_s16_s32(trn2_c)));
-#else
-    int16x4_t c01, c23, c45, c67;
-    c01 = vpadd_s16(vget_low_s16(c04), vget_low_s16(c15));
-    c23 = vpadd_s16(vget_low_s16(c26), vget_low_s16(c37));
-    c45 = vpadd_s16(vget_high_s16(c04), vget_high_s16(c15));
-    c67 = vpadd_s16(vget_high_s16(c26), vget_high_s16(c37));
-
-    return vreinterpretq_m128i_s16(
-        vcombine_s16(vpadd_s16(c01, c23), vpadd_s16(c45, c67)));
-#endif
 }
 
 // Multiply the low signed 32-bit integers from each packed 64-bit element in
@@ -7581,7 +6308,6 @@ FORCE_INLINE __m128i _mm_packus_epi32(__m128i a, __m128i b)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_round_pd
 FORCE_INLINE_OPTNONE __m128d _mm_round_pd(__m128d a, int rounding)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     switch (rounding) {
     case (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC):
         return vreinterpretq_m128d_f64(vrndnq_f64(vreinterpretq_f64_m128d(a)));
@@ -7594,54 +6320,7 @@ FORCE_INLINE_OPTNONE __m128d _mm_round_pd(__m128d a, int rounding)
     default:  //_MM_FROUND_CUR_DIRECTION
         return vreinterpretq_m128d_f64(vrndiq_f64(vreinterpretq_f64_m128d(a)));
     }
-#else
-    double *v_double = (double *) &a;
-
-    if (rounding == (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC) ||
-        (rounding == _MM_FROUND_CUR_DIRECTION &&
-         _MM_GET_ROUNDING_MODE() == _MM_ROUND_NEAREST)) {
-        double res[2], tmp;
-        for (int i = 0; i < 2; i++) {
-            tmp = (v_double[i] < 0) ? -v_double[i] : v_double[i];
-            double roundDown = floor(tmp);  // Round down value
-            double roundUp = ceil(tmp);     // Round up value
-            double diffDown = tmp - roundDown;
-            double diffUp = roundUp - tmp;
-            if (diffDown < diffUp) {
-                /* If it's closer to the round down value, then use it */
-                res[i] = roundDown;
-            } else if (diffDown > diffUp) {
-                /* If it's closer to the round up value, then use it */
-                res[i] = roundUp;
-            } else {
-                /* If it's equidistant between round up and round down value,
-                 * pick the one which is an even number */
-                double half = roundDown / 2;
-                if (half != floor(half)) {
-                    /* If the round down value is odd, return the round up value
-                     */
-                    res[i] = roundUp;
-                } else {
-                    /* If the round up value is odd, return the round down value
-                     */
-                    res[i] = roundDown;
-                }
-            }
-            res[i] = (v_double[i] < 0) ? -res[i] : res[i];
-        }
-        return _mm_set_pd(res[1], res[0]);
-    } else if (rounding == (_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC) ||
-               (rounding == _MM_FROUND_CUR_DIRECTION &&
-                _MM_GET_ROUNDING_MODE() == _MM_ROUND_DOWN)) {
-        return _mm_floor_pd(a);
-    } else if (rounding == (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC) ||
-               (rounding == _MM_FROUND_CUR_DIRECTION &&
-                _MM_GET_ROUNDING_MODE() == _MM_ROUND_UP)) {
-        return _mm_ceil_pd(a);
-    }
-    return _mm_set_pd(v_double[1] > 0 ? floor(v_double[1]) : ceil(v_double[1]),
-                      v_double[0] > 0 ? floor(v_double[0]) : ceil(v_double[0]));
-#endif
+
 }
 
 // Round the packed single-precision (32-bit) floating-point elements in a using
@@ -7650,8 +6329,6 @@ FORCE_INLINE_OPTNONE __m128d _mm_round_pd(__m128d a, int rounding)
 // software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_ps
 FORCE_INLINE_OPTNONE __m128 _mm_round_ps(__m128 a, int rounding)
 {
-#if (defined(__aarch64__) || defined(_M_ARM64)) || \
-    defined(__ARM_FEATURE_DIRECTED_ROUNDING)
     switch (rounding) {
     case (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC):
         return vreinterpretq_m128_f32(vrndnq_f32(vreinterpretq_f32_m128(a)));
@@ -7664,44 +6341,6 @@ FORCE_INLINE_OPTNONE __m128 _mm_round_ps(__m128 a, int rounding)
     default:  //_MM_FROUND_CUR_DIRECTION
         return vreinterpretq_m128_f32(vrndiq_f32(vreinterpretq_f32_m128(a)));
     }
-#else
-    float *v_float = (float *) &a;
-
-    if (rounding == (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC) ||
-        (rounding == _MM_FROUND_CUR_DIRECTION &&
-         _MM_GET_ROUNDING_MODE() == _MM_ROUND_NEAREST)) {
-        uint32x4_t signmask = vdupq_n_u32(0x80000000);
-        float32x4_t half = vbslq_f32(signmask, vreinterpretq_f32_m128(a),
-                                     vdupq_n_f32(0.5f)); /* +/- 0.5 */
-        int32x4_t r_normal = vcvtq_s32_f32(vaddq_f32(
-            vreinterpretq_f32_m128(a), half)); /* round to integer: [a + 0.5]*/
-        int32x4_t r_trunc = vcvtq_s32_f32(
-            vreinterpretq_f32_m128(a)); /* truncate to integer: [a] */
-        int32x4_t plusone = vreinterpretq_s32_u32(vshrq_n_u32(
-            vreinterpretq_u32_s32(vnegq_s32(r_trunc)), 31)); /* 1 or 0 */
-        int32x4_t r_even = vbicq_s32(vaddq_s32(r_trunc, plusone),
-                                     vdupq_n_s32(1)); /* ([a] + {0,1}) & ~1 */
-        float32x4_t delta = vsubq_f32(
-            vreinterpretq_f32_m128(a),
-            vcvtq_f32_s32(r_trunc)); /* compute delta: delta = (a - [a]) */
-        uint32x4_t is_delta_half =
-            vceqq_f32(delta, half); /* delta == +/- 0.5 */
-        return vreinterpretq_m128_f32(
-            vcvtq_f32_s32(vbslq_s32(is_delta_half, r_even, r_normal)));
-    } else if (rounding == (_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC) ||
-               (rounding == _MM_FROUND_CUR_DIRECTION &&
-                _MM_GET_ROUNDING_MODE() == _MM_ROUND_DOWN)) {
-        return _mm_floor_ps(a);
-    } else if (rounding == (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC) ||
-               (rounding == _MM_FROUND_CUR_DIRECTION &&
-                _MM_GET_ROUNDING_MODE() == _MM_ROUND_UP)) {
-        return _mm_ceil_ps(a);
-    }
-    return _mm_set_ps(v_float[3] > 0 ? floorf(v_float[3]) : ceilf(v_float[3]),
-                      v_float[2] > 0 ? floorf(v_float[2]) : ceilf(v_float[2]),
-                      v_float[1] > 0 ? floorf(v_float[1]) : ceilf(v_float[1]),
-                      v_float[0] > 0 ? floorf(v_float[0]) : ceilf(v_float[0]));
-#endif
 }
 
 // Round the lower double-precision (64-bit) floating-point element in b using
@@ -8093,12 +6732,7 @@ static int _sse2neon_aggregate_ranges_16x8(int la, int lb, __m128i mtx[16])
             vshrq_n_u32(vreinterpretq_u32_m128i(mtx[j]), 16));
         uint32x4_t vec_res = vandq_u32(vreinterpretq_u32_m128i(mtx[j]),
                                        vreinterpretq_u32_m128i(tmp));
-#if defined(__aarch64__) || defined(_M_ARM64)
         int t = vaddvq_u32(vec_res) ? 1 : 0;
-#else
-        uint64x2_t sumh = vpaddlq_u32(vec_res);
-        int t = vgetq_lane_u64(sumh, 0) + vgetq_lane_u64(sumh, 1);
-#endif
         res |= (t << j);
     }
     return res;
@@ -8292,26 +6926,26 @@ FORCE_INLINE int _sse2neon_sido_negative(int res, int lb, int imm8, int bound)
 
 FORCE_INLINE int _sse2neon_clz(unsigned int x)
 {
-#if defined(_MSC_VER) && !defined(__clang__)
-    unsigned long cnt = 0;
-    if (_BitScanReverse(&cnt, x))
-        return 31 - cnt;
-    return 32;
-#else
+//#if defined(_MSC_VER) && !defined(__clang__)
+//    unsigned long cnt = 0;
+//    if (_BitScanReverse(&cnt, x))
+//        return 31 - cnt;
+//    return 32;
+//#else
     return x != 0 ? __builtin_clz(x) : 32;
-#endif
+//#endif
 }
 
 FORCE_INLINE int _sse2neon_ctz(unsigned int x)
 {
-#if defined(_MSC_VER) && !defined(__clang__)
-    unsigned long cnt = 0;
-    if (_BitScanForward(&cnt, x))
-        return cnt;
-    return 32;
-#else
+//#if defined(_MSC_VER) && !defined(__clang__)
+//    unsigned long cnt = 0;
+//    if (_BitScanForward(&cnt, x))
+//        return cnt;
+//    return 32;
+//#else
     return x != 0 ? __builtin_ctz(x) : 32;
-#endif
+//#endif
 }
 
 FORCE_INLINE int _sse2neon_ctzll(unsigned long long x)
@@ -8585,14 +7219,8 @@ FORCE_INLINE int _mm_cmpistrz(__m128i a, __m128i b, const int imm8)
 // in b for greater than.
 FORCE_INLINE __m128i _mm_cmpgt_epi64(__m128i a, __m128i b)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     return vreinterpretq_m128i_u64(
         vcgtq_s64(vreinterpretq_s64_m128i(a), vreinterpretq_s64_m128i(b)));
-#else
-    return vreinterpretq_m128i_s64(vshrq_n_s64(
-        vqsubq_s64(vreinterpretq_s64_m128i(b), vreinterpretq_s64_m128i(a)),
-        63));
-#endif
 }
 
 // Starting with the initial value in crc, accumulates a CRC32 value for
@@ -8712,6 +7340,7 @@ FORCE_INLINE uint32_t _mm_crc32_u8(uint32_t crc, uint8_t v)
 
 /* AES */
 
+// Some aes emulation for non arm64.  Can remove all this
 #if !defined(__ARM_FEATURE_CRYPTO) && (!defined(_M_ARM64) || defined(__clang__))
 /* clang-format off */
 #define SSE2NEON_AES_SBOX(w)                                           \
@@ -8803,14 +7432,14 @@ static const uint8_t _sse2neon_rsbox[256] = SSE2NEON_AES_RSBOX(SSE2NEON_AES_H0);
 #undef SSE2NEON_AES_H0
 
 /* x_time function and matrix multiply function */
-#if !defined(__aarch64__) && !defined(_M_ARM64)
-#define SSE2NEON_XT(x) (((x) << 1) ^ ((((x) >> 7) & 1) * 0x1b))
-#define SSE2NEON_MULTIPLY(x, y)                                  \
-    (((y & 1) * x) ^ ((y >> 1 & 1) * SSE2NEON_XT(x)) ^           \
-     ((y >> 2 & 1) * SSE2NEON_XT(SSE2NEON_XT(x))) ^              \
-     ((y >> 3 & 1) * SSE2NEON_XT(SSE2NEON_XT(SSE2NEON_XT(x)))) ^ \
-     ((y >> 4 & 1) * SSE2NEON_XT(SSE2NEON_XT(SSE2NEON_XT(SSE2NEON_XT(x))))))
-#endif
+//#if !defined(__aarch64__) && !defined(_M_ARM64)
+//#define SSE2NEON_XT(x) (((x) << 1) ^ ((((x) >> 7) & 1) * 0x1b))
+//#define SSE2NEON_MULTIPLY(x, y)                                  \
+//    (((y & 1) * x) ^ ((y >> 1 & 1) * SSE2NEON_XT(x)) ^           \
+//     ((y >> 2 & 1) * SSE2NEON_XT(SSE2NEON_XT(x))) ^              \
+//     ((y >> 3 & 1) * SSE2NEON_XT(SSE2NEON_XT(SSE2NEON_XT(x)))) ^ \
+//     ((y >> 4 & 1) * SSE2NEON_XT(SSE2NEON_XT(SSE2NEON_XT(SSE2NEON_XT(x))))))
+//#endif
 
 // In the absence of crypto extensions, implement aesenc using regular NEON
 // intrinsics instead. See:
@@ -8819,7 +7448,6 @@ static const uint8_t _sse2neon_rsbox[256] = SSE2NEON_AES_RSBOX(SSE2NEON_AES_H0);
 // for more information.
 FORCE_INLINE __m128i _mm_aesenc_si128(__m128i a, __m128i RoundKey)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     static const uint8_t shift_rows[] = {
         0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3,
         0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb,
@@ -8854,61 +7482,6 @@ FORCE_INLINE __m128i _mm_aesenc_si128(__m128i a, __m128i RoundKey)
 
     /* add round key */
     return vreinterpretq_m128i_u8(w) ^ RoundKey;
-
-#else /* ARMv7-A implementation for a table-based AES */
-#define SSE2NEON_AES_B2W(b0, b1, b2, b3)                 \
-    (((uint32_t) (b3) << 24) | ((uint32_t) (b2) << 16) | \
-     ((uint32_t) (b1) << 8) | (uint32_t) (b0))
-// multiplying 'x' by 2 in GF(2^8)
-#define SSE2NEON_AES_F2(x) ((x << 1) ^ (((x >> 7) & 1) * 0x011b /* WPOLY */))
-// multiplying 'x' by 3 in GF(2^8)
-#define SSE2NEON_AES_F3(x) (SSE2NEON_AES_F2(x) ^ x)
-#define SSE2NEON_AES_U0(p) \
-    SSE2NEON_AES_B2W(SSE2NEON_AES_F2(p), p, p, SSE2NEON_AES_F3(p))
-#define SSE2NEON_AES_U1(p) \
-    SSE2NEON_AES_B2W(SSE2NEON_AES_F3(p), SSE2NEON_AES_F2(p), p, p)
-#define SSE2NEON_AES_U2(p) \
-    SSE2NEON_AES_B2W(p, SSE2NEON_AES_F3(p), SSE2NEON_AES_F2(p), p)
-#define SSE2NEON_AES_U3(p) \
-    SSE2NEON_AES_B2W(p, p, SSE2NEON_AES_F3(p), SSE2NEON_AES_F2(p))
-
-    // this generates a table containing every possible permutation of
-    // shift_rows() and sub_bytes() with mix_columns().
-    static const uint32_t ALIGN_STRUCT(16) aes_table[4][256] = {
-        SSE2NEON_AES_SBOX(SSE2NEON_AES_U0),
-        SSE2NEON_AES_SBOX(SSE2NEON_AES_U1),
-        SSE2NEON_AES_SBOX(SSE2NEON_AES_U2),
-        SSE2NEON_AES_SBOX(SSE2NEON_AES_U3),
-    };
-#undef SSE2NEON_AES_B2W
-#undef SSE2NEON_AES_F2
-#undef SSE2NEON_AES_F3
-#undef SSE2NEON_AES_U0
-#undef SSE2NEON_AES_U1
-#undef SSE2NEON_AES_U2
-#undef SSE2NEON_AES_U3
-
-    uint32_t x0 = _mm_cvtsi128_si32(a);  // get a[31:0]
-    uint32_t x1 =
-        _mm_cvtsi128_si32(_mm_shuffle_epi32(a, 0x55));  // get a[63:32]
-    uint32_t x2 =
-        _mm_cvtsi128_si32(_mm_shuffle_epi32(a, 0xAA));  // get a[95:64]
-    uint32_t x3 =
-        _mm_cvtsi128_si32(_mm_shuffle_epi32(a, 0xFF));  // get a[127:96]
-
-    // finish the modulo addition step in mix_columns()
-    __m128i out = _mm_set_epi32(
-        (aes_table[0][x3 & 0xff] ^ aes_table[1][(x0 >> 8) & 0xff] ^
-         aes_table[2][(x1 >> 16) & 0xff] ^ aes_table[3][x2 >> 24]),
-        (aes_table[0][x2 & 0xff] ^ aes_table[1][(x3 >> 8) & 0xff] ^
-         aes_table[2][(x0 >> 16) & 0xff] ^ aes_table[3][x1 >> 24]),
-        (aes_table[0][x1 & 0xff] ^ aes_table[1][(x2 >> 8) & 0xff] ^
-         aes_table[2][(x3 >> 16) & 0xff] ^ aes_table[3][x0 >> 24]),
-        (aes_table[0][x0 & 0xff] ^ aes_table[1][(x1 >> 8) & 0xff] ^
-         aes_table[2][(x2 >> 16) & 0xff] ^ aes_table[3][x3 >> 24]));
-
-    return _mm_xor_si128(out, RoundKey);
-#endif
 }
 
 // Perform one round of an AES decryption flow on data (state) in a using the
@@ -8916,7 +7489,6 @@ FORCE_INLINE __m128i _mm_aesenc_si128(__m128i a, __m128i RoundKey)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdec_si128
 FORCE_INLINE __m128i _mm_aesdec_si128(__m128i a, __m128i RoundKey)
 {
-#if defined(__aarch64__)
     static const uint8_t inv_shift_rows[] = {
         0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb,
         0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3,
@@ -8952,34 +7524,6 @@ FORCE_INLINE __m128i _mm_aesdec_si128(__m128i a, __m128i RoundKey)
 
     // add round key
     return vreinterpretq_m128i_u8(w) ^ RoundKey;
-
-#else /* ARMv7-A NEON implementation */
-    /* FIXME: optimized for NEON */
-    uint8_t i, e, f, g, h, v[4][4];
-    uint8_t *_a = (uint8_t *) &a;
-    for (i = 0; i < 16; ++i) {
-        v[((i / 4) + (i % 4)) % 4][i % 4] = _sse2neon_rsbox[_a[i]];
-    }
-
-    // inverse mix columns
-    for (i = 0; i < 4; ++i) {
-        e = v[i][0];
-        f = v[i][1];
-        g = v[i][2];
-        h = v[i][3];
-
-        v[i][0] = SSE2NEON_MULTIPLY(e, 0x0e) ^ SSE2NEON_MULTIPLY(f, 0x0b) ^
-                  SSE2NEON_MULTIPLY(g, 0x0d) ^ SSE2NEON_MULTIPLY(h, 0x09);
-        v[i][1] = SSE2NEON_MULTIPLY(e, 0x09) ^ SSE2NEON_MULTIPLY(f, 0x0e) ^
-                  SSE2NEON_MULTIPLY(g, 0x0b) ^ SSE2NEON_MULTIPLY(h, 0x0d);
-        v[i][2] = SSE2NEON_MULTIPLY(e, 0x0d) ^ SSE2NEON_MULTIPLY(f, 0x09) ^
-                  SSE2NEON_MULTIPLY(g, 0x0e) ^ SSE2NEON_MULTIPLY(h, 0x0b);
-        v[i][3] = SSE2NEON_MULTIPLY(e, 0x0b) ^ SSE2NEON_MULTIPLY(f, 0x0d) ^
-                  SSE2NEON_MULTIPLY(g, 0x09) ^ SSE2NEON_MULTIPLY(h, 0x0e);
-    }
-
-    return vreinterpretq_m128i_u8(vld1q_u8((uint8_t *) v)) ^ RoundKey;
-#endif
 }
 
 // Perform the last round of an AES encryption flow on data (state) in a using
@@ -8987,7 +7531,6 @@ FORCE_INLINE __m128i _mm_aesdec_si128(__m128i a, __m128i RoundKey)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenclast_si128
 FORCE_INLINE __m128i _mm_aesenclast_si128(__m128i a, __m128i RoundKey)
 {
-#if defined(__aarch64__)
     static const uint8_t shift_rows[] = {
         0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3,
         0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb,
@@ -9007,29 +7550,6 @@ FORCE_INLINE __m128i _mm_aesenclast_si128(__m128i a, __m128i RoundKey)
 
     // add round key
     return vreinterpretq_m128i_u8(v) ^ RoundKey;
-
-#else /* ARMv7-A implementation */
-    uint8_t v[16] = {
-        _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 0)],
-        _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 5)],
-        _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 10)],
-        _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 15)],
-        _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 4)],
-        _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 9)],
-        _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 14)],
-        _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 3)],
-        _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 8)],
-        _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 13)],
-        _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 2)],
-        _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 7)],
-        _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 12)],
-        _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 1)],
-        _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 6)],
-        _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 11)],
-    };
-
-    return vreinterpretq_m128i_u8(vld1q_u8(v)) ^ RoundKey;
-#endif
 }
 
 // Perform the last round of an AES decryption flow on data (state) in a using
@@ -9037,7 +7557,6 @@ FORCE_INLINE __m128i _mm_aesenclast_si128(__m128i a, __m128i RoundKey)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdeclast_si128
 FORCE_INLINE __m128i _mm_aesdeclast_si128(__m128i a, __m128i RoundKey)
 {
-#if defined(__aarch64__)
     static const uint8_t inv_shift_rows[] = {
         0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb,
         0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3,
@@ -9057,24 +7576,12 @@ FORCE_INLINE __m128i _mm_aesdeclast_si128(__m128i a, __m128i RoundKey)
 
     // add round key
     return vreinterpretq_m128i_u8(v) ^ RoundKey;
-
-#else /* ARMv7-A NEON implementation */
-    /* FIXME: optimized for NEON */
-    uint8_t v[4][4];
-    uint8_t *_a = (uint8_t *) &a;
-    for (int i = 0; i < 16; ++i) {
-        v[((i / 4) + (i % 4)) % 4][i % 4] = _sse2neon_rsbox[_a[i]];
-    }
-
-    return vreinterpretq_m128i_u8(vld1q_u8((uint8_t *) v)) ^ RoundKey;
-#endif
 }
 
 // Perform the InvMixColumns transformation on a and store the result in dst.
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesimc_si128
 FORCE_INLINE __m128i _mm_aesimc_si128(__m128i a)
 {
-#if defined(__aarch64__)
     static const uint8_t ror32by8[] = {
         0x1, 0x2, 0x3, 0x0, 0x5, 0x6, 0x7, 0x4,
         0x9, 0xa, 0xb, 0x8, 0xd, 0xe, 0xf, 0xc,
@@ -9093,28 +7600,6 @@ FORCE_INLINE __m128i _mm_aesimc_si128(__m128i a)
     w ^= (uint8x16_t) vrev32q_u16((uint16x8_t) v);
     w ^= vqtbl1q_u8(v ^ w, vld1q_u8(ror32by8));
     return vreinterpretq_m128i_u8(w);
-
-#else /* ARMv7-A NEON implementation */
-    uint8_t i, e, f, g, h, v[4][4];
-    vst1q_u8((uint8_t *) v, vreinterpretq_u8_m128i(a));
-    for (i = 0; i < 4; ++i) {
-        e = v[i][0];
-        f = v[i][1];
-        g = v[i][2];
-        h = v[i][3];
-
-        v[i][0] = SSE2NEON_MULTIPLY(e, 0x0e) ^ SSE2NEON_MULTIPLY(f, 0x0b) ^
-                  SSE2NEON_MULTIPLY(g, 0x0d) ^ SSE2NEON_MULTIPLY(h, 0x09);
-        v[i][1] = SSE2NEON_MULTIPLY(e, 0x09) ^ SSE2NEON_MULTIPLY(f, 0x0e) ^
-                  SSE2NEON_MULTIPLY(g, 0x0b) ^ SSE2NEON_MULTIPLY(h, 0x0d);
-        v[i][2] = SSE2NEON_MULTIPLY(e, 0x0d) ^ SSE2NEON_MULTIPLY(f, 0x09) ^
-                  SSE2NEON_MULTIPLY(g, 0x0e) ^ SSE2NEON_MULTIPLY(h, 0x0b);
-        v[i][3] = SSE2NEON_MULTIPLY(e, 0x0b) ^ SSE2NEON_MULTIPLY(f, 0x0d) ^
-                  SSE2NEON_MULTIPLY(g, 0x09) ^ SSE2NEON_MULTIPLY(h, 0x0e);
-    }
-
-    return vreinterpretq_m128i_u8(vld1q_u8((uint8_t *) v));
-#endif
 }
 
 // Assist in expanding the AES cipher key by computing steps towards generating
@@ -9128,7 +7613,6 @@ FORCE_INLINE __m128i _mm_aesimc_si128(__m128i a)
 // for details.
 FORCE_INLINE __m128i _mm_aeskeygenassist_si128(__m128i a, const int rcon)
 {
-#if defined(__aarch64__)
     uint8x16_t _a = vreinterpretq_u8_m128i(a);
     uint8x16_t v = vqtbl4q_u8(_sse2neon_vld1q_u8_x4(_sse2neon_sbox), _a);
     v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_sbox + 0x40), _a - 0x40);
@@ -9140,25 +7624,14 @@ FORCE_INLINE __m128i _mm_aeskeygenassist_si128(__m128i a, const int rcon)
     uint32x4_t ror_xor_v = veorq_u32(ror_v, vdupq_n_u32(rcon));
 
     return vreinterpretq_m128i_u32(vtrn2q_u32(v_u32, ror_xor_v));
-
-#else /* ARMv7-A NEON implementation */
-    uint32_t X1 = _mm_cvtsi128_si32(_mm_shuffle_epi32(a, 0x55));
-    uint32_t X3 = _mm_cvtsi128_si32(_mm_shuffle_epi32(a, 0xFF));
-    for (int i = 0; i < 4; ++i) {
-        ((uint8_t *) &X1)[i] = _sse2neon_sbox[((uint8_t *) &X1)[i]];
-        ((uint8_t *) &X3)[i] = _sse2neon_sbox[((uint8_t *) &X3)[i]];
-    }
-    return _mm_set_epi32(((X3 >> 8) | (X3 << 24)) ^ rcon, X3,
-                         ((X1 >> 8) | (X1 << 24)) ^ rcon, X1);
-#endif
 }
 #undef SSE2NEON_AES_SBOX
 #undef SSE2NEON_AES_RSBOX
 
-#if defined(__aarch64__)
+//#if defined(__aarch64__)
 #undef SSE2NEON_XT
 #undef SSE2NEON_MULTIPLY
-#endif
+//#endif
 
 #else /* __ARM_FEATURE_CRYPTO */
 // Implements equivalent of 'aesenc' by combining AESE (with an empty key) and
@@ -9220,7 +7693,7 @@ FORCE_INLINE __m128i _mm_aeskeygenassist_si128(__m128i a, const int rcon)
     // AESE does ShiftRows and SubBytes on A
     uint8x16_t u8 = vaeseq_u8(vreinterpretq_u8_m128i(a), vdupq_n_u8(0));
 
-#if !defined(_MSC_VER) || defined(__clang__)
+//#if !defined(_MSC_VER) || defined(__clang__)
     uint8x16_t dest = {
         // Undo ShiftRows step from AESE and extract X1 and X3
         u8[0x4], u8[0x1], u8[0xE], u8[0xB],  // SubBytes(X1)
@@ -9230,33 +7703,33 @@ FORCE_INLINE __m128i _mm_aeskeygenassist_si128(__m128i a, const int rcon)
     };
     uint32x4_t r = {0, (unsigned) rcon, 0, (unsigned) rcon};
     return vreinterpretq_m128i_u8(dest) ^ vreinterpretq_m128i_u32(r);
-#else
-    // We have to do this hack because MSVC is strictly adhering to the CPP
-    // standard, in particular C++03 8.5.1 sub-section 15, which states that
-    // unions must be initialized by their first member type.
-
-    // As per the Windows ARM64 ABI, it is always little endian, so this works
-    __n128 dest{
-        ((uint64_t) u8.n128_u8[0x4] << 0) | ((uint64_t) u8.n128_u8[0x1] << 8) |
-            ((uint64_t) u8.n128_u8[0xE] << 16) |
-            ((uint64_t) u8.n128_u8[0xB] << 24) |
-            ((uint64_t) u8.n128_u8[0x1] << 32) |
-            ((uint64_t) u8.n128_u8[0xE] << 40) |
-            ((uint64_t) u8.n128_u8[0xB] << 48) |
-            ((uint64_t) u8.n128_u8[0x4] << 56),
-        ((uint64_t) u8.n128_u8[0xC] << 0) | ((uint64_t) u8.n128_u8[0x9] << 8) |
-            ((uint64_t) u8.n128_u8[0x6] << 16) |
-            ((uint64_t) u8.n128_u8[0x3] << 24) |
-            ((uint64_t) u8.n128_u8[0x9] << 32) |
-            ((uint64_t) u8.n128_u8[0x6] << 40) |
-            ((uint64_t) u8.n128_u8[0x3] << 48) |
-            ((uint64_t) u8.n128_u8[0xC] << 56)};
-
-    dest.n128_u32[1] = dest.n128_u32[1] ^ rcon;
-    dest.n128_u32[3] = dest.n128_u32[3] ^ rcon;
-
-    return dest;
-#endif
+//#else
+//    // We have to do this hack because MSVC is strictly adhering to the CPP
+//    // standard, in particular C++03 8.5.1 sub-section 15, which states that
+//    // unions must be initialized by their first member type.
+//
+//    // As per the Windows ARM64 ABI, it is always little endian, so this works
+//    __n128 dest{
+//        ((uint64_t) u8.n128_u8[0x4] << 0) | ((uint64_t) u8.n128_u8[0x1] << 8) |
+//            ((uint64_t) u8.n128_u8[0xE] << 16) |
+//            ((uint64_t) u8.n128_u8[0xB] << 24) |
+//            ((uint64_t) u8.n128_u8[0x1] << 32) |
+//            ((uint64_t) u8.n128_u8[0xE] << 40) |
+//            ((uint64_t) u8.n128_u8[0xB] << 48) |
+//            ((uint64_t) u8.n128_u8[0x4] << 56),
+//        ((uint64_t) u8.n128_u8[0xC] << 0) | ((uint64_t) u8.n128_u8[0x9] << 8) |
+//            ((uint64_t) u8.n128_u8[0x6] << 16) |
+//            ((uint64_t) u8.n128_u8[0x3] << 24) |
+//            ((uint64_t) u8.n128_u8[0x9] << 32) |
+//            ((uint64_t) u8.n128_u8[0x6] << 40) |
+//            ((uint64_t) u8.n128_u8[0x3] << 48) |
+//            ((uint64_t) u8.n128_u8[0xC] << 56)};
+//
+//    dest.n128_u32[1] = dest.n128_u32[1] ^ rcon;
+//    dest.n128_u32[3] = dest.n128_u32[3] ^ rcon;
+//
+//    return dest;
+//#endif
 }
 #endif
 
@@ -9291,18 +7764,10 @@ FORCE_INLINE unsigned int _sse2neon_mm_get_denormals_zero_mode(void)
 {
     union {
         fpcr_bitfield field;
-#if defined(__aarch64__) || defined(_M_ARM64)
         uint64_t value;
-#else
-        uint32_t value;
-#endif
     } r;
 
-#if defined(__aarch64__) || defined(_M_ARM64)
     r.value = _sse2neon_get_fpcr();
-#else
-    __asm__ __volatile__("vmrs %0, FPSCR" : "=r"(r.value)); /* read */
-#endif
 
     return r.field.bit24 ? _MM_DENORMALS_ZERO_ON : _MM_DENORMALS_ZERO_OFF;
 }
@@ -9312,7 +7777,6 @@ FORCE_INLINE unsigned int _sse2neon_mm_get_denormals_zero_mode(void)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_popcnt_u32
 FORCE_INLINE int _mm_popcnt_u32(unsigned int a)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
 #if __has_builtin(__builtin_popcount)
     return __builtin_popcount(a);
 //#elif defined(_MSC_VER)
@@ -9320,20 +7784,6 @@ FORCE_INLINE int _mm_popcnt_u32(unsigned int a)
 #else
     return (int) vaddlv_u8(vcnt_u8(vcreate_u8((uint64_t) a)));
 #endif
-#else
-    uint32_t count = 0;
-    uint8x8_t input_val, count8x8_val;
-    uint16x4_t count16x4_val;
-    uint32x2_t count32x2_val;
-
-    input_val = vld1_u8((uint8_t *) &a);
-    count8x8_val = vcnt_u8(input_val);
-    count16x4_val = vpaddl_u8(count8x8_val);
-    count32x2_val = vpaddl_u16(count16x4_val);
-
-    vst1_u32(&count, count32x2_val);
-    return count;
-#endif
 }
 
 // Count the number of bits set to 1 in unsigned 64-bit integer a, and
@@ -9341,7 +7791,6 @@ FORCE_INLINE int _mm_popcnt_u32(unsigned int a)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_popcnt_u64
 FORCE_INLINE int64_t _mm_popcnt_u64(uint64_t a)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
 #if __has_builtin(__builtin_popcountll)
     return __builtin_popcountll(a);
 //#elif defined(_MSC_VER)
@@ -9349,21 +7798,6 @@ FORCE_INLINE int64_t _mm_popcnt_u64(uint64_t a)
 #else
     return (int64_t) vaddlv_u8(vcnt_u8(vcreate_u8(a)));
 #endif
-#else
-    uint64_t count = 0;
-    uint8x8_t input_val, count8x8_val;
-    uint16x4_t count16x4_val;
-    uint32x2_t count32x2_val;
-    uint64x1_t count64x1_val;
-
-    input_val = vld1_u8((uint8_t *) &a);
-    count8x8_val = vcnt_u8(input_val);
-    count16x4_val = vpaddl_u8(count8x8_val);
-    count32x2_val = vpaddl_u16(count16x4_val);
-    count64x1_val = vpaddl_u32(count32x2_val);
-    vst1_u64(&count, count64x1_val);
-    return count;
-#endif
 }
 
 FORCE_INLINE_OPTNONE void _sse2neon_mm_set_denormals_zero_mode(
@@ -9373,33 +7807,20 @@ FORCE_INLINE_OPTNONE void _sse2neon_mm_set_denormals_zero_mode(
     // regardless of the value of the FZ bit.
     union {
         fpcr_bitfield field;
-#if defined(__aarch64__) || defined(_M_ARM64)
         uint64_t value;
-#else
-        uint32_t value;
-#endif
     } r;
 
-#if defined(__aarch64__) || defined(_M_ARM64)
     r.value = _sse2neon_get_fpcr();
-#else
-    __asm__ __volatile__("vmrs %0, FPSCR" : "=r"(r.value)); /* read */
-#endif
 
     r.field.bit24 = (flag & _MM_DENORMALS_ZERO_MASK) == _MM_DENORMALS_ZERO_ON;
 
-#if defined(__aarch64__) || defined(_M_ARM64)
     _sse2neon_set_fpcr(r.value);
-#else
-    __asm__ __volatile__("vmsr FPSCR, %0" ::"r"(r));        /* write */
-#endif
 }
 
 // Return the current 64-bit value of the processor's time-stamp counter.
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=rdtsc
 FORCE_INLINE uint64_t _rdtsc(void)
 {
-#if defined(__aarch64__) || defined(_M_ARM64)
     uint64_t val;
 
     /* According to ARM DDI 0487F.c, from Armv8.0 to Armv8.5 inclusive, the
@@ -9408,42 +7829,23 @@ FORCE_INLINE uint64_t _rdtsc(void)
      * bits wide and it is attributed with the flag 'cap_user_time_short'
      * is true.
      */
-#if defined(_MSC_VER) && !defined(__clang__)
-    val = _ReadStatusReg(ARM64_SYSREG(3, 3, 14, 0, 2));
-#else
+//#if defined(_MSC_VER) && !defined(__clang__)
+//    val = _ReadStatusReg(ARM64_SYSREG(3, 3, 14, 0, 2));
+//#else
     __asm__ __volatile__("mrs %0, cntvct_el0" : "=r"(val));
-#endif
+//#endif
 
     return val;
-#else
-    uint32_t pmccntr, pmuseren, pmcntenset;
-    // Read the user mode Performance Monitoring Unit (PMU)
-    // User Enable Register (PMUSERENR) access permissions.
-    __asm__ __volatile__("mrc p15, 0, %0, c9, c14, 0" : "=r"(pmuseren));
-    if (pmuseren & 1) {  // Allows reading PMUSERENR for user mode code.
-        __asm__ __volatile__("mrc p15, 0, %0, c9, c12, 1" : "=r"(pmcntenset));
-        if (pmcntenset & 0x80000000UL) {  // Is it counting?
-            __asm__ __volatile__("mrc p15, 0, %0, c9, c13, 0" : "=r"(pmccntr));
-            // The counter is set up to count every 64th cycle
-            return (uint64_t) (pmccntr) << 6;
-        }
-    }
-
-    // Fallback to syscall as we can't enable PMUSERENR in user mode.
-    struct timeval tv;
-    gettimeofday(&tv, NULL);
-    return (uint64_t) (tv.tv_sec) * 1000000 + tv.tv_usec;
-#endif
 }
 
-#if defined(__GNUC__) || defined(__clang__)
+//#if defined(__GNUC__) || defined(__clang__)
 #pragma pop_macro("ALIGN_STRUCT")
 #pragma pop_macro("FORCE_INLINE")
 #pragma pop_macro("FORCE_INLINE_OPTNONE")
-#endif
+//#endif
 
-#if defined(__GNUC__) && !defined(__clang__)
+//#if defined(__GNUC__) && !defined(__clang__)
 #pragma GCC pop_options
-#endif
+//#endif
 
 #endif
diff --git a/libkram/vectormath/vectormath++.h b/libkram/vectormath/vectormath++.h
index 3ef9a3cf..6185a61f 100644
--- a/libkram/vectormath/vectormath++.h
+++ b/libkram/vectormath/vectormath++.h
@@ -104,41 +104,29 @@
 //  note iOS/macOS are on signed char, so may just only want to use signed simd ?
 //  often though need unsigned for low bit counts (f.e. 8 or 16).
 //
-// #define SIMD_UCHAR  0
-// #define SIMD_CHAR   0
-// #define SIMD_SHORT  0
-// #define SIMD_LONG   0
 
 // TODO: have some setting to override from prefix file
+#ifndef SIMD_CONFIG
+
+// Vector and matrix types.  Currently only matrix types for SIMD_FLOAT
+// TODO: support
+//#define SIMD_UCHAR  0
+//#define SIMD_CHAR   0
+//#define SIMD_SHORT  0
+//#define SIMD_LONG   0
+
 #define SIMD_HALF   1
 #define SIMD_FLOAT  1
 #define SIMD_DOUBLE 1
 #define SIMD_INT    1
 
-/*
-// can enable/disable types as needed
-#ifndef SIMD_HALF
-#if !__is_identifier(_Float16)
-#define SIMD_HALF   1
-#else
-#define SIMD_HALF   0
-#endif
-#endif // SIMD_HALF
-
-#ifndef SIMD_FLOAT
-#define SIMD_FLOAT  1
-#endif // SIMD_FLOAT
+// Whether to support > 4 length vecs with some ops
+#define SIMD_LONG_VECS 0
 
-#ifndef SIMD_DOUBLE
-#define SIMD_DOUBLE 0
-#endif // SIMD_DOUBLE
+// This means simd_float4 will come from this file instead of simd.h
+#define SIMD_RENAME_TO_SIMD_NAMESPACE 0
 
-// required for logic ops, so can't disable this
-// but mostly default vector ops defined to work with SIMD_INT right now
-#ifndef SIMD_INT
-#define SIMD_INT    1
-#endif // SIMD_INT
-*/
+#endif // SIMD_CONFIG
 
 #if SIMD_HALF
 
@@ -152,14 +140,6 @@
 
 #endif // SIMD_HALF
 
-// Whether to support > 4 length vecs with some ops
-#ifndef SIMD_LONG_VECS
-#define SIMD_LONG_VECS 0
-#endif // SIMD_LONG_VECS
-
-// This means simd_float4 will come from this file instead of simd.h
-#define SIMD_RENAME_TO_SIMD_NAMESPACE 0
-
 // TODO: u/char
 // TODO: float16 and double8 requires 4x 16B instructions
 // TODO: float8 and double3/double4 require 2x 16B instructions,
@@ -432,12 +412,12 @@ SIMD_CALL float reduce_max(float4 x) {
 
 SIMD_CALL float4 min(float4 x, float4 y) {
     // precise returns x on Nan
-    return vmaxnmq_f32(x, y);
+    return vminnmq_f32(x, y);
 }
 
 SIMD_CALL float4 max(float4 x, float4 y) {
     // precise returns x on Nan
-    return vminnmq_f32(x, y);
+    return vmaxnmq_f32(x, y);
 }
 
 SIMD_CALL float4 muladd(float4 x, float4 y, float4 t) {
@@ -1091,12 +1071,12 @@ float trace(const float2x2& x);
 float trace(const float3x3& x);
 float trace(const float4x4& x);
 
-// dot with premul
+// premul = dot + premul
 float2 mul(float2 y, const float2x2& x);
 float3 mul(float3 y, const float3x3& x);
 float4 mul(float4 y, const float4x4& x);
 
-// mul, mad with postmul
+// posmul = mul + mad
 float2x2 mul(const float2x2& x, const float2x2& y);
 float3x3 mul(const float3x3& x, const float3x3& y);
 float4x4 mul(const float4x4& x, const float4x4& y);
@@ -1181,11 +1161,37 @@ SIMD_CALL const float3x3& as_float3x3(const float4x4& m) {
 
 #endif // SIMD_FLOAT
 
+#if SIMD_INT
+SIMD_CALL int2 int2m(int x) {
+    return x;
+}
+SIMD_CALL int2 int2m(int x, int y) {
+    return {x,y};
+}
+
+SIMD_CALL int3 int3m(int x) {
+    return x;
+}
+SIMD_CALL int3 int3m(int x, int y, int z) {
+    return {x,y,z};
+}
+
+SIMD_CALL int4 int4m(int x) {
+    return x;
+}
+SIMD_CALL int4 int4m(int x, int y, int z, int w) {
+    return {x,y,z,w};
+}
+SIMD_CALL int4 int4m(int3 v, float w) {
+    int4 r; r.xyz = v; r.w = w; return r;
+}
+#endif
+
 #if SIMD_HALF
 SIMD_CALL half2 half2m(half x) {
     return x;
 }
-SIMD_CALL half2 float4m(half x, half y, half z, half w = (half)1.0) {
+SIMD_CALL half2 half2m(half x, half y) {
     return {x,y};
 }
 
@@ -1211,7 +1217,7 @@ SIMD_CALL half4 half4m(half3 v, float w = (half)1.0) {
 SIMD_CALL double2 double2m(double x) {
     return x;
 }
-SIMD_CALL double2 float4m(double x, double y, double z, double w = 1.0) {
+SIMD_CALL double2 double2m(double x, double y) {
     return {x,y};
 }
 

From 5ead16a35393c732b4f660f8c916e9dfb6a2be5f Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 23 Sep 2024 23:42:07 -0700
Subject: [PATCH 722/901] kram - simd update, add printing

---
 libkram/kram/KramConfig.h           |  20 +-
 libkram/kram/KramLog.cpp            |  12 ++
 libkram/kram/KramLog.h              |   5 +
 libkram/vectormath/float4a.h        |  18 +-
 libkram/vectormath/vectormath++.cpp |  74 +++++--
 libkram/vectormath/vectormath++.h   | 305 ++++++++++++++++++++++------
 6 files changed, 338 insertions(+), 96 deletions(-)

diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index 4214bc5b..e1e8f3d6 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -196,20 +196,20 @@
 //-------------------------
 // simd
 
-#if 0 // KRAM_MAC || KRAM_IOS
-
-#define USE_SIMDLIB 0
-
-// old vector math, using simd/simd.h
-#include "float4a.h"
-
+#if KRAM_MAC || KRAM_IOS
+// can use old or new
+#define USE_SIMDLIB 1
 #else
-
-// new vector math
+// have to use new
 #define USE_SIMDLIB 1
+#endif
 
+#if USE_SIMDLIB
+// new vector math
 #include "vectormath++.h"
-
+#else
+// old vector math, using simd/simd.h
+#include "float4a.h"
 #endif
  
 //---------------------------------------
diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp
index 5dafe414..36be0d1f 100644
--- a/libkram/kram/KramLog.cpp
+++ b/libkram/kram/KramLog.cpp
@@ -432,6 +432,18 @@ int32_t append_sprintf(string& str, const char* format, ...)
     return len;
 }
 
+string format(const char* format, ...)
+{
+    string str;
+    
+    va_list args;
+    va_start(args, format);
+    /*int32_t len = */ vsprintf(str, format, args);
+    va_end(args);
+
+    return str;
+}
+
 //----------------------------------
 
 static size_t my_formatted_size(fmt::string_view format, fmt::format_args args)
diff --git a/libkram/kram/KramLog.h b/libkram/kram/KramLog.h
index 80794ee9..c622dac7 100644
--- a/libkram/kram/KramLog.h
+++ b/libkram/kram/KramLog.h
@@ -76,6 +76,11 @@ int32_t append_sprintf(string& str, const char* format, ...) __printflike(2, 3);
 // returns length of chars appended, -1 if failure
 int32_t append_vsprintf(string& str, const char* format, va_list args);
 
+// formats and returns string
+// ugh - name conflict if format or format_string or format_str
+string format(const char* format, ...) __printflike(1, 2);
+
+
 bool startsWith(const char* str, const string& substring);
 
 bool endsWithExtension(const char* str, const string& substring);
diff --git a/libkram/vectormath/float4a.h b/libkram/vectormath/float4a.h
index 76620ce6..501a9412 100644
--- a/libkram/vectormath/float4a.h
+++ b/libkram/vectormath/float4a.h
@@ -58,19 +58,19 @@ inline float4 float4m(float x) {
     return x;
 }
 
-inline float saturate(float v) {
-    return std::clamp(v, 0.0f, 1.0f);
-}
-inline double saturate(double v) {
-    return std::clamp(v, 0.0, 1.0);
-}
-inline float2 saturate(const float2& v) {
+//inline float saturate(float v) {
+//    return std::clamp(v, 0.0f, 1.0f);
+//}
+//inline double saturate(double v) {
+//    return std::clamp(v, 0.0, 1.0);
+//}
+inline float2 saturate(float2 v) {
     return simd_clamp(v, 0.0f, 1.0f);
 }
-inline float3 saturate(const float3& v) {
+inline float3 saturate(float3 v) {
     return simd_clamp(v, 0.0f, 1.0f);
 }
-inline float4 saturate(const float4& v) {
+inline float4 saturate(float4 v) {
     return simd_clamp(v, 0.0f, 1.0f);
 }
 
diff --git a/libkram/vectormath/vectormath++.cpp b/libkram/vectormath/vectormath++.cpp
index 0abca7ea..b113785b 100644
--- a/libkram/vectormath/vectormath++.cpp
+++ b/libkram/vectormath/vectormath++.cpp
@@ -214,6 +214,8 @@ float3x3 inverse(const float3x3& x) {
     return r;
 }
 
+// TODO: bring over fast inverses (RTS, RTU, etc)
+
 float4x4 inverse(const float4x4& x) {
     // This is a full gje inverse
     
@@ -222,8 +224,8 @@ float4x4 inverse(const float4x4& x) {
     
     // As a evolves from original mat into identity -
     // b evolves from identity into inverse(a)
-    uint32_t cols = 4;
-    uint32_t rows = 4;
+    uint32_t cols = float4x4::col;
+    uint32_t rows = float4x4::row;
     
     // Loop over cols of a from left to right, eliminating above and below diag
     for (uint32_t j=0; j<rows; j++) {
@@ -347,20 +349,20 @@ float4 mul(float4 y, const float4x4& x) {
 
 // post-transform at least does a mul madd
 float2 mul(const float2x2& x, float2 y) {
-    float2 r = x[0]*y[0];
-    r = muladd( x[1], y[1],r);
+    float2 r = x[0] * y[0]; // no mul(v,v)
+    r = muladd( x[1], y[1], r);
     return r;
 }
 
 float3 mul(const float3x3& x, float3 y) {
-    float3 r = x[0]*y[0];
-    r = muladd( x[1], y[1],r);
-    r = muladd( x[2], y[2],r);
+    float3 r = x[0] * y[0];
+    r = muladd( x[1], y[1], r);
+    r = muladd( x[2], y[2], r);
     return r;
 }
 
 float4 mul(const float4x4& x, float4 y) {
-    float4 r = x[0]*y[0];
+    float4 r = x[0] * y[0];
     r = muladd( x[1], y[1], r);
     r = muladd( x[2], y[2], r);
     r = muladd( x[3], y[3], r);
@@ -370,21 +372,23 @@ float4 mul(const float4x4& x, float4 y) {
 // matrix muls using mul madd
 float2x2 mul(const float2x2& x, const float2x2& y) {
     float2x2 r;
-    for (int i=0; i<2; ++i)
+    
+    // m * columns
+    for (int i=0; i<float2x2::col; ++i)
         r[i] = mul(x, y[i]);
     return r;
 }
 
 float3x3 mul(const float3x3& x, const float3x3& y) {
     float3x3 r;
-    for (int i=0; i<3; ++i)
+    for (int i=0; i<float3x3::col; ++i)
         r[i] = mul(x, y[i]);
     return r;
 }
 
 float4x4 mul(const float4x4& x, const float4x4& y) {
     float4x4 r;
-    for (int i=0; i<4; ++i)
+    for (int i=0; i<float4x4::col; ++i)
         r[i] = mul(x, y[i]);
     return r;
 }
@@ -392,19 +396,19 @@ float4x4 mul(const float4x4& x, const float4x4& y) {
 // sub
 float2x2 sub(const float2x2& x, const float2x2& y) {
     float2x2 r(x);
-    for (int i=0; i<2; ++i)
-        r[i] = x[i] - y[i];
+    for (int i=0; i<float2x2::col; ++i)
+        r[i] -= y[i];
     return r;
 }
 float3x3 sub(const float3x3& x, const float3x3& y) {
     float3x3 r(x);
-    for (int i=0; i<3; ++i)
+    for (int i=0; i<float3x3::col; ++i)
         r[i] -= y[i];
     return r;
 }
 float4x4 sub(const float4x4& x, const float4x4& y) {
     float4x4 r(x);
-    for (int i=0; i<4; ++i)
+    for (int i=0; i<float4x4::col; ++i)
         r[i] -= y[i];
     return r;
 }
@@ -412,19 +416,19 @@ float4x4 sub(const float4x4& x, const float4x4& y) {
 // add
 float2x2 add(const float2x2& x, const float2x2& y) {
     float2x2 r(x);
-    for (int i=0; i<2; ++i)
+    for (int i=0; i<float2x2::col; ++i)
         r[i] += y[i];
     return r;
 }
 float3x3 add(const float3x3& x, const float3x3& y) {
     float3x3 r(x);
-    for (int i=0; i<3; ++i)
+    for (int i=0; i<float3x3::col; ++i)
         r[i] += y[i];
     return r;
 }
 float4x4 add(const float4x4& x, const float4x4& y) {
     float4x4 r(x);
-    for (int i=0; i<4; ++i)
+    for (int i=0; i<float4x4::col; ++i)
         r[i] += y[i];
     return r;
 }
@@ -458,6 +462,7 @@ const float2& float2_ones(){ return kfloat2_ones; }
 
 const float2& float2_posx(){ return kfloat2_posx; }
 const float2& float2_posy(){ return kfloat2_posy; }
+
 const float2& float2_negx(){ return kfloat2_negx; }
 const float2& float2_negy(){ return kfloat2_negy; }
 
@@ -469,6 +474,7 @@ const float3& float3_ones(){ return kfloat3_ones; }
 const float3& float3_posx(){ return kfloat3_posx; }
 const float3& float3_posy(){ return kfloat3_posy; }
 const float3& float3_posz(){ return kfloat3_posz; }
+
 const float3& float3_negx(){ return kfloat3_negx; }
 const float3& float3_negy(){ return kfloat3_negy; }
 const float3& float3_negz(){ return kfloat3_negz; }
@@ -482,6 +488,7 @@ const float4& float4_posx(){ return kfloat4_posx; }
 const float4& float4_posy(){ return kfloat4_posy; }
 const float4& float4_posz(){ return kfloat4_posz; }
 const float4& float4_posw(){ return kfloat4_posw; }
+
 const float4& float4_negx(){ return kfloat4_negx; }
 const float4& float4_negy(){ return kfloat4_negy; }
 const float4& float4_negz(){ return kfloat4_negz; }
@@ -490,6 +497,7 @@ const float4& float4_negw(){ return kfloat4_negw; }
 const float4& float4_posxw(){ return kfloat4_posxw; }
 const float4& float4_posyw(){ return kfloat4_posyw; }
 const float4& float4_poszw(){ return kfloat4_poszw; }
+
 const float4& float4_negxw(){ return kfloat4_negxw; }
 const float4& float4_negyw(){ return kfloat4_negyw; }
 const float4& float4_negzw(){ return kfloat4_negzw; }
@@ -508,6 +516,36 @@ const float3x4& float3x4::identity() { return kfloat3x4_identity; }
 const float4x4& float4x4::zero() { return kfloat4x4_zero; }
 const float4x4& float4x4::identity() { return kfloat4x4_identity; }
 
+
+
+#if SIMD_FLOAT
+
+string vecf::str(float2 v) const {
+    return kram::format("(%f %f)", v.x, v.y);
+}
+string vecf::str(float3 v) const {
+    return kram::format("(%f %f %f)", v.x, v.y, v.z);
+}
+string vecf::str(float4 v) const {
+    return kram::format("(%f %f %f %f)", v.x, v.y, v.z, v.w);
+}
+ 
+string vecf::str(const float2x2& m) const {
+    return kram::format("%s\n%s\n",
+        str(m[0]).c_str(), str(m[1]).c_str());
+}
+string vecf::str(const float3x3& m) const {
+    return kram::format("%s\n%s\n%s\n",
+        str(m[0]).c_str(), str(m[1]).c_str(), str(m[2]).c_str());
+}
+string vecf::str(const float4x4& m) const {
+  return kram::format("%s\n%s\n%s\n%s\n",
+      str(m[0]).c_str(), str(m[1]).c_str(),
+      str(m[2]).c_str(), str(m[3]).c_str());
+}
+
+#endif // SIMD_FLOAT
+                  
 //---------------
 
 #if SIMD_HALF4_ONLY
diff --git a/libkram/vectormath/vectormath++.h b/libkram/vectormath/vectormath++.h
index 6185a61f..15709970 100644
--- a/libkram/vectormath/vectormath++.h
+++ b/libkram/vectormath/vectormath++.h
@@ -151,6 +151,9 @@
 // simplify calls
 #define SIMD_CALL static inline __attribute__((__always_inline__, __const__, __nodebug__))
 
+// dros inline and __const__, but not helping
+#define SIMD_CALL_OP SIMD_CALL
+
 // aligned
 #define macroVector2TypesStorage(type, name) \
 typedef type name##1s; \
@@ -238,16 +241,17 @@ typedef ::cname##8s cppname##8; \
 
 // TODO: can do +=, -= faster than calling sub/add, but this uses same impl that way
 #define macroMatrixOps(type) \
-SIMD_CALL type& operator*=(type& x, const type& y) { x = mul(x, y); return x; } \
-SIMD_CALL type& operator+=(type& x, const type& y) { x = add(x, y); return x; } \
-SIMD_CALL type& operator-=(type& x, const type& y) { x = sub(x, y); return x; } \
-SIMD_CALL bool operator==(const type& x, const type& y) { return equal(x, y); } \
-SIMD_CALL bool operator!=(const type& x, const type& y) { return !(x == y); } \
-SIMD_CALL type operator-(const type& x, const type& y) { return sub(x,y); } \
-SIMD_CALL type operator+(const type& x, const type& y) { return add(x,y); } \
-SIMD_CALL type operator*(const type& x, const type& y) { return mul(x,y); } \
-SIMD_CALL type::column_t operator*(const type::column_t& v, const type& y) { return mul(v,y); } \
-SIMD_CALL type::column_t operator*(const type& x, const type::column_t& v) { return mul(x,v); } \
+SIMD_CALL_OP type& operator*=(type& x, const type& y) { x = mul(x, y); return x; } \
+SIMD_CALL_OP type& operator+=(type& x, const type& y) { x = add(x, y); return x; } \
+SIMD_CALL_OP type& operator-=(type& x, const type& y) { x = sub(x, y); return x; } \
+SIMD_CALL_OP bool operator==(const type& x, const type& y) { return equal(x, y); } \
+SIMD_CALL_OP bool operator!=(const type& x, const type& y) { return !(x == y); } \
+\
+SIMD_CALL_OP type operator-(const type& x, const type& y) { return sub(x,y); } \
+SIMD_CALL_OP type operator+(const type& x, const type& y) { return add(x,y); } \
+SIMD_CALL_OP type operator*(const type& x, const type& y) { return mul(x,y); } \
+SIMD_CALL_OP type::column_t operator*(const type::column_t& v, const type& y) { return mul(v,y); } \
+SIMD_CALL_OP type::column_t operator*(const type& x, const type::column_t& v) { return mul(x,v); } \
 
 //-----------------------------------
 
@@ -317,6 +321,12 @@ macroVector4TypesPacked(int, int)
 macroVector8TypesStorage(double, double)
 macroVector8TypesPacked(double, double)
 
+// storage type for matrix
+typedef struct { double2s columns[2]; } double2x2s;
+typedef struct { double3s columns[3]; } double3x3s;
+typedef struct { double4s columns[3]; } double3x4s;
+typedef struct { double4s columns[4]; } double4x4s;
+
 #endif // SIMD_DOUBLE
 
 //----------
@@ -433,9 +443,9 @@ SIMD_CALL float4 sqrt(float4 x) {
 // use sse2neon to port this for now
 SIMD_CALL float4 reduce_addv(float4 x) {
     // 4:1 reduction
-    x = _mm_hadd_ps(x, x);
-    x = _mm_hadd_ps(x, x);
-    return x;
+    x = _mm_hadd_ps(x, x); // xy = x+y,z+w
+    x = _mm_hadd_ps(x, x); // x  = x+y
+    return x.x; // repeat x to all values
 }
 
 SIMD_CALL float reduce_add(float4 x) {
@@ -488,9 +498,9 @@ SIMD_CALL float4 sqrt(float4 x) {
 
 SIMD_CALL float4 reduce_addv(float4 x) {
     // 4:1 reduction
-    x = _mm_hadd_ps(x, x);
-    x = _mm_hadd_ps(x, x);
-    return x;
+    x = _mm_hadd_ps(x, x); // xy = x+y,z+w
+    x = _mm_hadd_ps(x, x); // x  = x+y
+    return x.x; // repeat x to all values
 }
 
 SIMD_CALL float reduce_add(float4 x) {
@@ -590,13 +600,13 @@ SIMD_CALL int4 bitselect(int4 x, int4 y, int4 mask) {
 
 // bitselect
 SIMD_CALL float2 bitselect(float2 x, float2 y, int2 mask) {
-  return (float2)bitselect((int2)x, (int2)y, mask);
+  return (float2)bitselect((int2)x, (int2)y, mask); // int4 -> float2
 }
 SIMD_CALL float3 bitselect(float3 x, float3 y, int3 mask) {
-  return (float3)bitselect((int3)x, (int3)y, mask);
+  return (float3)bitselect((int3)x, (int3)y, mask); // int4 -> float3
 }
 SIMD_CALL float4 bitselect(float4 x, float4 y, int4 mask) {
-  return (float4)bitselect((int4)x, (int4)y, mask);
+  return (float4)bitselect((int4)x, (int4)y, mask);  // int4 -> float4
 }
 
 // select
@@ -866,10 +876,10 @@ SIMD_CALL float reduce_max(float16 x) {
 
 // how important are 8/16 ops for float and double?  Could reduce with only doing up to 4.
 SIMD_CALL float8 muladd(float8 x, float4 y, float4 t) {
-    return tovec8(muladd(x.lo, y.lo, z.lo), muladd(x.hi, y.hi, z.hi));
+    return float8m(muladd(x.lo, y.lo, z.lo), muladd(x.hi, y.hi, z.hi));
 }
 SIMD_CALL float16 muladd(float4 x, float4 y, float4 t) {
-    return tovec16(muladd(x.lo, y.lo, z.lo), muladd(x.hi, y.hi, z.hi));
+    return float16m(muladd(x.lo, y.lo, z.lo), muladd(x.hi, y.hi, z.hi));
 }
 
 SIMD_CALL float8 lerp(float8 x, float8 y, float8 t) {
@@ -940,26 +950,16 @@ const float4& float4_negxw();
 const float4& float4_negyw();
 const float4& float4_negzw();
 
-// Could float2 instead be derived from c like the matrices?
-// Can just cast to the Apple types.
-
-// column matrix, so post muliply vectors
-// (projToCamera * cameraToWorld * worldToModel) * modelVec
-
-// premul use dot4 and can be used for nomral, on inverse if not transposed
-
-// Price of having a class wrapper, is that all simd_float2x3 have to go to ctor copy op.
-// even though they're the same data.  That seems to be why the vec type is just a typedef
-// then can use whichever typename.  But ctors and member functions are not possible.
-// But do get all the clang vector ext.
-
-
-// these allow C funcs to take in these types, but means conversions of
-// return values from the c-calls.  So think having 1 C++ type is better.
+#endif
 
 //-----------------------------------
 // matrix
 
+#if SIMD_FLOAT
+
+// column matrix, so postmul vectors
+// (projToCamera * cameraToWorld * worldToModel) * modelVec
+
 struct float2x2 : float2x2s
 {
     // can be split out to traits
@@ -1037,13 +1037,13 @@ struct float4x4 : float4x4s
     
     static const float4x4& zero();
     static const float4x4& identity();
-   
+    
     float4x4() { } // no default init
     explicit float4x4(float4 diag);
     float4x4(float4 c0, float4 c1, float4 c2, float4 c3)
-        : float4x4s((float4x4s){c0, c1, c2, c3}) { }
+    : float4x4s((float4x4s){c0, c1, c2, c3}) { }
     float4x4(const float4x4s& m)
-        : float4x4s(m) { }
+    : float4x4s(m) { }
     
     float4& operator[](uint32_t idx) { return columns[idx]; }
     const float4& operator[](uint32_t idx) const { return columns[idx]; }
@@ -1097,13 +1097,26 @@ bool equal(const float2x2& x, const float2x2& y);
 bool equal(const float3x3& x, const float3x3& y);
 bool equal(const float4x4& x, const float4x4& y);
 
+// TODO: these think they are all member functions
+// but they're just defined in a namespace, and Apples' aren't.
+    
 // operators for C++
 macroMatrixOps(float2x2);
 macroMatrixOps(float3x3);
 // TODO: no mat hops on storage type float3x4
-// macroMatrixOps(float3x4);
+// macroMatrixOps(float3x4s);
 macroMatrixOps(float4x4);
 
+// fast conversions where possible
+SIMD_CALL const float3x3& as_float3x3(const float4x4& m) {
+    return reinterpret_cast<const float3x3&>(m);
+}
+
+#endif // SIMD_FLOAT
+
+//----------------
+
+#if SIMD_FLOAT
 // make "m" ctors for vecs.  This avoids wrapping the type in a struct.
 // vector types are C typedef, and so cannot have member functions.
 // Be careful with initializers = { val }, only sets first element of vector
@@ -1141,24 +1154,6 @@ SIMD_CALL const float3& as_float3(const float4& m) {
     return reinterpret_cast<const float3&>(m);
 }
 
-// fast conversions where possible
-SIMD_CALL const float3x3& as_float3x3(const float4x4& m) {
-    return reinterpret_cast<const float3x3&>(m);
-}
-
-// "using func = simd_func" from C++ to C function doesn't really work.  Not doing
-// extern "C" around the C calls.  If did extern, then would need :: namespace.
-// Preprocessor would but annoying to have to incur another function call in debug
-// (and possibly optimized builds) just to rename calls.
-//
-// This also doesn't work due to overloads of each call for each type.
-// But it's a nice way to avoid the cost of C++ wrappers if call has unique name.
-// also constexpr auto* foo = simd_foo;
-
-// TODO: quat, no ops,  need a fast lerp and correct 2 quats
-// and rotate a vec, can convert to/from vector.
-// typedef struct { float4 vector; } quatf;
-
 #endif // SIMD_FLOAT
 
 #if SIMD_INT
@@ -1239,6 +1234,164 @@ SIMD_CALL double4 double4m(double3 v, double w = 1.0) {
 }
 #endif
 
+#if SIMD_DOUBLE && 0
+
+// TODO: would need matrix class derivations
+// and all of the matrix ops, which then need vector ops, and need double
+// constants.  So this starts to really add to codegen.  But double
+// is one of the last bastions of cpu, since many gpu don't support it.
+
+struct double2x2 : double2x2s
+{
+    // can be split out to traits
+    static constexpr uint32_t col = 2;
+    static constexpr uint32_t row = 2;
+    using column_t = double2;
+    using scalar_t = double;
+    
+    static const double2x2& zero();
+    static const double2x2& identity();
+    
+    double2x2() { }  // no default init
+    explicit double2x2(double2 diag);
+    double2x2(double2 c0, double2 c1)
+        : double2x2s((double2x2s){c0, c1}) { }
+    double2x2(const double2x2s& m)
+        : double2x2s(m) { }
+    
+    // simd lacks these ops
+    double2& operator[](uint32_t idx) { return columns[idx]; }
+    const double2& operator[](uint32_t idx) const { return columns[idx]; }
+};
+
+struct double3x3 : double3x3s
+{
+    static constexpr uint32_t col = 3;
+    static constexpr uint32_t row = 3;
+    using column_t = double3;
+    using scalar_t = double;
+    
+    // Done as wordy c funcs otherwize.  Funcs allow statics to init.
+    static const double3x3& zero();
+    static const double3x3& identity();
+    
+    double3x3() { }  // no default init
+    explicit double3x3(double3 diag);
+    double3x3(double3 c0, double3 c1, double3 c2)
+        : double3x3s((double3x3s){c0, c1, c2}) { }
+    double3x3(const double3x3s& m)
+        : double3x3s(m) { }
+    
+    double3& operator[](uint32_t idx) { return columns[idx]; }
+    const double3& operator[](uint32_t idx) const { return columns[idx]; }
+};
+
+// This is mostly a transposed holder for a 4x4, so very few ops defined
+// Can also serve as a SOA for some types of cpu math.
+struct double3x4 : double3x4s
+{
+    static constexpr uint32_t col = 3;
+    static constexpr uint32_t row = 4;
+    using column_t = double4;
+    using scalar_t = double;
+    
+    static const double3x4& zero();
+    static const double3x4& identity();
+   
+    double3x4() { } // no default init
+    explicit double3x4(double3 diag);
+    double3x4(double4 c0, double4 c1, double4 c2)
+        : double3x4s((double3x4s){c0, c1, c2}) { }
+    double3x4(const double3x4s& m)
+        : double3x4s(m) { }
+    
+    double4& operator[](uint32_t idx) { return columns[idx]; }
+    const double4& operator[](uint32_t idx) const { return columns[idx]; }
+};
+
+struct double4x4 : double4x4s
+{
+    static constexpr uint32_t col = 4;
+    static constexpr uint32_t row = 4;
+    using column_t = double4;
+    using scalar_t = double;
+    
+    static const double4x4& zero();
+    static const double4x4& identity();
+   
+    double4x4() { } // no default init
+    explicit double4x4(double4 diag);
+    double4x4(double4 c0, double4 c1, double4 c2, double4 c3)
+        : double4x4s((double4x4s){c0, c1, c2, c3}) { }
+    double4x4(const double4x4s& m)
+        : double4x4s(m) { }
+    
+    double4& operator[](uint32_t idx) { return columns[idx]; }
+    const double4& operator[](uint32_t idx) const { return columns[idx]; }
+};
+
+double2x2 diagonal_matrix(double2 x);
+double3x3 diagonal_matrix(double3 x);
+double3x4 diagonal_matrix3x4(double3 x);
+double4x4 diagonal_matrix(double4 x);
+
+// using refs here, 3x3 and 4x4 are large to pass by value (3 simd regs)
+double2x2 transpose(const double2x2& x);
+double3x3 transpose(const double3x3& x);
+double4x4 transpose(const double4x4& x);
+
+double2x2 inverse(const double2x2& x);
+double3x3 inverse(const double3x3& x);
+double4x4 inverse(const double4x4& x);
+
+double determinant(const double2x2& x);
+double determinant(const double3x3& x);
+double determinant(const double4x4& x);
+
+double trace(const double2x2& x);
+double trace(const double3x3& x);
+double trace(const double4x4& x);
+
+// premul = dot + premul
+double2 mul(double2 y, const double2x2& x);
+double3 mul(double3 y, const double3x3& x);
+double4 mul(double4 y, const double4x4& x);
+
+// posmul = mul + mad
+double2x2 mul(const double2x2& x, const double2x2& y);
+double3x3 mul(const double3x3& x, const double3x3& y);
+double4x4 mul(const double4x4& x, const double4x4& y);
+
+double2 mul(const double2x2& x, double2 y);
+double3 mul(const double3x3& x, double3 y);
+double4 mul(const double4x4& x, double4 y);
+
+double2x2 sub(const double2x2& x, const double2x2& y);
+double3x3 sub(const double3x3& x, const double3x3& y);
+double4x4 sub(const double4x4& x, const double4x4& y);
+
+double2x2 add(const double2x2& x, const double2x2& y);
+double3x3 add(const double3x3& x, const double3x3& y);
+double4x4 add(const double4x4& x, const double4x4& y);
+
+bool equal(const double2x2& x, const double2x2& y);
+bool equal(const double3x3& x, const double3x3& y);
+bool equal(const double4x4& x, const double4x4& y);
+
+// operators for C++
+macroMatrixOps(double2x2);
+macroMatrixOps(double3x3);
+// TODO: no mat ops yet on storage type double3x4
+// macroMatrixOps(double3x4);
+macroMatrixOps(double4x4);
+
+// fast conversions where possible
+SIMD_CALL const double3x3& as_double3x3(const double4x4& m) {
+    return reinterpret_cast<const double3x3&>(m);
+}
+
+#endif
+
 // conversions
 #if SIMD_FLOAT && SIMD_INT
 SIMD_CALL float2 float2m(int2 __x) { return __builtin_convertvector(__x, float2); }
@@ -1253,6 +1406,12 @@ SIMD_CALL int4 float4m(float4 __x) { return __builtin_convertvector(__x, int4);
 
 #if SIMD_FLOAT && SIMD_HALF
 
+// TODO: ryg
+// Not the right gist, you want the RTNE one (nm: that only matters for float->half,
+// this was the half->float one. FWIW, other dir is https://gist.github.com/rygorous/eb3a019b99fdaa9c3064.
+// These days I use a variant of the RTNE/RN version that also preserves NaN payload bits,
+// which is slightly more ops but matches hardware conversions exactly for every input, including all NaNs.
+    
 #if SIMD_HALF4_ONLY
 
 half4 half4m(float4 __x);
@@ -1285,6 +1444,34 @@ SIMD_CALL float3 float3m(double3 __x) { return __builtin_convertvector(__x, floa
 SIMD_CALL float4 float4m(double4 __x) { return __builtin_convertvector(__x, float4); }
 #endif
 
+using namespace STL_NAMESPACE;
+        
+// Usage:
+// vecf vfmt;
+// fprintf(stdout, "%s", vfmt.str(v1).c_str() );
+struct vecf {
+    // TODO: pass formatting options too
+    vecf() {
+    }
+    
+#if SIMD_FLOAT
+    // vector
+    string str(float2 v) const;
+    string str(float3 v) const;
+    string str(float4 v) const;
+    
+    // matrix
+    string str(const float2x2& m) const;
+    string str(const float3x3& m) const;
+    string str(const float4x4& m) const;
+#endif // SIMD_FLOAT
+};
+
+    
+// TODO: quat, need a fast lerp and correct 2 quats
+// and rotate a vec, can convert to/from vector.
+// typedef struct { float4 vector; } quatf;
+
 // TODO: saturating conversions would be useful to, and prevent overflow
 // see the conversion.h code, bit select to clamp values.
 

From 30bee72aad23ccc75fc6e519865bfc87f8922511 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 24 Sep 2024 00:20:33 -0700
Subject: [PATCH 723/901] kram - simd - fix float4x4 diagonal, and add v4 from
 v2,v2

---
 libkram/vectormath/vectormath++.cpp |  2 +-
 libkram/vectormath/vectormath++.h   | 12 ++++++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/libkram/vectormath/vectormath++.cpp b/libkram/vectormath/vectormath++.cpp
index b113785b..dc9cdbae 100644
--- a/libkram/vectormath/vectormath++.cpp
+++ b/libkram/vectormath/vectormath++.cpp
@@ -94,7 +94,7 @@ float3x4 diagonal_matrix3x4(float3 x) {
 }
 float4x4 diagonal_matrix(float4 x) {
     float4 xx = x; xx.w = 0.0f;
-    float4 ww = xx; xx.z = x.w;
+    float4 ww = xx; ww.z = x.w;
     return float4x4(xx.xwww, xx.wyww, xx.wwzw, ww.wwwz);
 }
 
diff --git a/libkram/vectormath/vectormath++.h b/libkram/vectormath/vectormath++.h
index 15709970..33898151 100644
--- a/libkram/vectormath/vectormath++.h
+++ b/libkram/vectormath/vectormath++.h
@@ -1142,6 +1142,9 @@ SIMD_CALL float3 float3m(float2 v, float z) {
 SIMD_CALL float4 float4m(float x) {
     return x; // TODO: does this go to _mm_set1_ps(x)
 }
+SIMD_CALL float4 float4m(float2 xy, float2 zw) {
+    return {xy.x,xy.y,zw.x,zw.y};
+}
 SIMD_CALL float4 float4m(float x, float y, float z, float w = 1.0f) {
     return {x,y,z,w}; // _mm_setr_ps ?
 }
@@ -1174,6 +1177,9 @@ SIMD_CALL int3 int3m(int x, int y, int z) {
 SIMD_CALL int4 int4m(int x) {
     return x;
 }
+SIMD_CALL int4 int4m(int2 xy, int2 zw) {
+    return {xy.x,xy.y,zw.x,zw.y};
+}
 SIMD_CALL int4 int4m(int x, int y, int z, int w) {
     return {x,y,z,w};
 }
@@ -1200,6 +1206,9 @@ SIMD_CALL half3 half3m(half x, half y, half z) {
 SIMD_CALL half4 half4m(half x) {
     return x;
 }
+SIMD_CALL half4 half4m(half2 xy, half2 zw) {
+    return {xy.x,xy.y,zw.x,zw.y};
+}
 SIMD_CALL half4 half4m(half x, half y, half z, half w = (half)1.0) {
     return {x,y,z,w};
 }
@@ -1226,6 +1235,9 @@ SIMD_CALL double3 double3m(double x, double y, double z) {
 SIMD_CALL double4 double4m(double x) {
     return x;
 }
+SIMD_CALL double4 double4m(double2 xy, double2 zw) {
+    return {xy.x,xy.y,zw.x,zw.y};
+}
 SIMD_CALL double4 double4m(double x, double y, double z, double w = 1.0) {
     return {x,y,z,w};
 }

From 1809117f93caea6481ca849f47d3d77bba7ba4ae Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 24 Sep 2024 00:51:01 -0700
Subject: [PATCH 724/901] kram - simd - update Win cmake

This likely will not build.
---
 CMakeLists.txt                    |  2 +-
 hlslparser/CMakeLists.txt         |  4 ++--
 kram-thumb-win/CMakeLists.txt     |  6 +++---
 kramc/CMakeLists.txt              |  4 ++--
 kramv/CMakeLists.txt              |  4 ++--
 libkram/CMakeLists.txt            | 10 ++++++----
 libkram/vectormath/sse_mathfun.h  |  2 ++
 libkram/vectormath/vectormath++.h | 15 ++++++++-------
 plugin/CMakeLists.txt             |  6 +++---
 9 files changed, 29 insertions(+), 24 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index af3551e6..25319a48 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.19.1 FATAL_ERROR)
 #-----------------------------------------------------
 
 # really not using cmake for mac, but this was used in the past so leaving it in
-# still building Win using Cmake.  macOS uses avx2 and Win uses avx.
+# still building Win using Cmake.  macOS uses avx2 and Win uses avx2.
 set(BUILD_MAC FALSE)
 set(BUILD_WIN FALSE)
 
diff --git a/hlslparser/CMakeLists.txt b/hlslparser/CMakeLists.txt
index 958b4ea7..e4c173dd 100644
--- a/hlslparser/CMakeLists.txt
+++ b/hlslparser/CMakeLists.txt
@@ -66,9 +66,9 @@ if (BUILD_WIN)
 
     # don't need force with apps, since they only access kram folder files which include KramConfig
     
-    # all warnings, AVX, and multiprocess compiles,
+    # all warnings, AVX2, and multiprocess compiles,
     # eliminate duplicate strings, embed full path
-    target_compile_options(${myTargetApp} PRIVATE /W3 /arch:AVX -mf16c /MP /GF /FC)
+    target_compile_options(${myTargetApp} PRIVATE /W3 -march=haswell -mf16c -mfma /MP /GF /FC)
     
     # fix STL (don't use -D here, will remove)
     target_compile_definitions(${myTargetApp} PRIVATE _ITERATOR_DEBUG_LEVEL=0 _HAS_EXCEPTIONS=0)
diff --git a/kram-thumb-win/CMakeLists.txt b/kram-thumb-win/CMakeLists.txt
index 2ed0d934..78c9cf45 100644
--- a/kram-thumb-win/CMakeLists.txt
+++ b/kram-thumb-win/CMakeLists.txt
@@ -1,4 +1,4 @@
-﻿
+
 # dll output can be renamed for debug vs. release, but is hard to debug
 set(myTargetLib kram-thumb-win)
 
@@ -21,8 +21,8 @@ add_library(${myTargetLib} MODULE ${SOURCE_FILES})
 string(REGEX REPLACE "/GR" "/GR-" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
 string(REGEX REPLACE "/EHsc" "/EHs-c-" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
     
-# all warnings, AVX1, and multiprocess compiles
-target_compile_options(${myTargetLib} PRIVATE /W3 /arch:AVX -mf16c /MP /GF /FC)
+# all warnings, AVX2, and multiprocess compiles
+target_compile_options(${myTargetLib} PRIVATE /W3 -march=haswell -mf16c -mfma /MP /GF /FC)
   
 target_compile_definitions(${myTargetLib} PRIVATE -D_ITERATOR_DEBUG_LEVEL=0 -D_HAS_EXCEPTIONS=0 -DUNICODE -D_UNICODE)
     
diff --git a/kramc/CMakeLists.txt b/kramc/CMakeLists.txt
index 2887ebbe..812193f8 100644
--- a/kramc/CMakeLists.txt
+++ b/kramc/CMakeLists.txt
@@ -72,8 +72,8 @@ if (BUILD_WIN)
 
     # don't need force with apps, since they only access kram folder files which include KramConfig
     
-    # all warnings, AVX1, and multiprocess compiles
-    target_compile_options(${myTargetApp} PRIVATE /W3 /arch:AVX -mf16c /MP /GF /FC)
+    # all warnings, AVX2, and multiprocess compiles
+    target_compile_options(${myTargetApp} PRIVATE /W3 -march=haswell -mf16c -mfma /MP /GF /FC)
     
     # fix STL
     target_compile_definitions(${myTargetApp} PRIVATE -D_ITERATOR_DEBUG_LEVEL=0 -D_HAS_EXCEPTIONS=0)
diff --git a/kramv/CMakeLists.txt b/kramv/CMakeLists.txt
index 97d844bf..1e76eab6 100644
--- a/kramv/CMakeLists.txt
+++ b/kramv/CMakeLists.txt
@@ -39,11 +39,11 @@ target_link_libraries(${myTargetApp}
 set_target_properties(${myTargetApp} PROPERTIES
     # Note: match this up with CXX version
     # c++11 min
-    XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD "c++14"
+    XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD "c++20"
     XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++"
 
     # avx1
-    XCODE_ATTRIBUTE_CLANG_X86_VECTOR_INSTRUCTIONS "avx"
+    XCODE_ATTRIBUTE_CLANG_X86_VECTOR_INSTRUCTIONS "avx2"
     
     # turn off exceptions/rtti
     XCODE_ATTRIBUTE_GCC_ENABLE_CPP_EXCEPTIONS NO
diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index 05a4c26e..aeecca02 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -119,6 +119,9 @@ file(GLOB_RECURSE libSources CONFIGURE_DEPENDS
 
     "${SOURCE_DIR}/tmpfileplus/tmpfileplus.cpp"
     "${SOURCE_DIR}/tmpfileplus/tmpfileplus.h"
+     
+    "${SOURCE_DIR}/vectormath/*.h"
+    "${SOURCE_DIR}/vectormath/*.cpp"
     
     # partial zstd decode-only unity file
     # cd zstd/build/single_file_libs
@@ -169,8 +172,6 @@ target_include_directories(${myTargetLib} PUBLIC
     
     # why are these public, must be in public headers
     "${SOURCE_DIR}/eastl/include/"
-
-    "${SOURCE_DIR}/fastl/"
 )
 
 target_include_directories(${myTargetLib} PRIVATE
@@ -185,6 +186,7 @@ target_include_directories(${myTargetLib} PRIVATE
     "${SOURCE_DIR}/miniz/"
     "${SOURCE_DIR}/squish/"
     "${SOURCE_DIR}/tmpfileplus/"
+    "${SOURCE_DIR}/vectormath/"
     "${SOURCE_DIR}/zstd/"
     )
      
@@ -234,9 +236,9 @@ if (BUILD_WIN)
     # force include (public)
     target_compile_options(${myTargetLib} PUBLIC /FIKramConfig.h)
        
-    # all warnings, AVX, and multiprocess compiles,
+    # all warnings, AVX2, and multiprocess compiles,
     # eliminate duplicate strings, embed full path
-    target_compile_options(${myTargetLib} PRIVATE  /W3 /arch:AVX -mf16c /MP /GF /FC)
+    target_compile_options(${myTargetLib} PRIVATE  /W3 -march=haswell -mf16c -mfma /MP /GF /FC)
     
     # fix STL (don't use -D here, will remove)
     target_compile_definitions(${myTargetLib} PRIVATE _ITERATOR_DEBUG_LEVEL=0 _HAS_EXCEPTIONS=0)
diff --git a/libkram/vectormath/sse_mathfun.h b/libkram/vectormath/sse_mathfun.h
index 649e2d89..0609b653 100644
--- a/libkram/vectormath/sse_mathfun.h
+++ b/libkram/vectormath/sse_mathfun.h
@@ -31,6 +31,8 @@
 
 // TODO: may want to rename to sse_mathfun.cpp, since this is
 // a lot of code to inline.
+// TODO: use math ops and simd ops here and let compiler gen intrinsics?
+// TODO: combine the constants into fewer registers, reference .x,..
 
 #pragma once
 
diff --git a/libkram/vectormath/vectormath++.h b/libkram/vectormath/vectormath++.h
index 33898151..bd5eed83 100644
--- a/libkram/vectormath/vectormath++.h
+++ b/libkram/vectormath/vectormath++.h
@@ -121,7 +121,7 @@
 #define SIMD_INT    1
 
 // Whether to support > 4 length vecs with some ops
-#define SIMD_LONG_VECS 0
+#define SIMD_FLOAT_EXT 0
 
 // This means simd_float4 will come from this file instead of simd.h
 #define SIMD_RENAME_TO_SIMD_NAMESPACE 0
@@ -149,9 +149,10 @@
 //-----------------------------------
 
 // simplify calls
+// const means it doesn't pull from global changing state (what about constants)
+// and inline is needed or get unused static calls, always_inline forces inline
+// of these mostly wrapper calls.
 #define SIMD_CALL static inline __attribute__((__always_inline__, __const__, __nodebug__))
-
-// dros inline and __const__, but not helping
 #define SIMD_CALL_OP SIMD_CALL
 
 // aligned
@@ -829,7 +830,7 @@ float4 cos(float4 x);
 float4 tan(float4 x);
 void sincos(float4 x, float4& s, float4& c);
 
-// TODO: add float2/3 version of ops above
+// TODO: add float2/3 version of log/exp/pow/sin/cos/tan/sincos above
 
 SIMD_CALL float cross(float2 x, float2 y) {
     return x.x * y.y - x.y * y.x;
@@ -838,10 +839,10 @@ SIMD_CALL float3 cross(float3 x, float3 y) {
     return x.yzx * y.zxy - x.zxy * y.yzx;
 }
 
-// TODO: select, abs, almost_equal, almost_equal_relative
+// TODO: almost_equal, almost_equal_rel
 // TODO: step, smoothstep, fract
 
-#if SIMD_LONG_VECS
+#if SIMD_FLOAT_EXT
 
 // These are cpu only math.  None of the gpus support these long types.
 // and MSL doesn't even support double.
@@ -904,7 +905,7 @@ SIMD_CALL float normalize(float16 x) {
     return x / length(x);
 }
 
-#endif // SIMD_LONG_VECS
+#endif // SIMD_FLOAT_EXT
 
 // TODO: better way to rename, can there be float2::zero()
 // also could maybe use that for fake vector ctors.
diff --git a/plugin/CMakeLists.txt b/plugin/CMakeLists.txt
index c1ea92db..a56a4754 100644
--- a/plugin/CMakeLists.txt
+++ b/plugin/CMakeLists.txt
@@ -34,11 +34,11 @@ target_link_libraries(${myTargetApp}
 set_target_properties(${myTargetApp} PROPERTIES
     # Note: match this up with CXX version
     # c++11 min
-    XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD "c++14"
+    XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD "c++20"
     XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++"
 
-    # avx1
-    XCODE_ATTRIBUTE_CLANG_X86_VECTOR_INSTRUCTIONS "avx"
+    # avx2
+    XCODE_ATTRIBUTE_CLANG_X86_VECTOR_INSTRUCTIONS "avx2"
     
     # turn off exceptions/rtti
     XCODE_ATTRIBUTE_GCC_ENABLE_CPP_EXCEPTIONS NO

From 8e74fdf3f07a6f3fd5e28ecb7c337402c856f344 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 24 Sep 2024 09:00:56 -0700
Subject: [PATCH 725/901] kram - update STL_NAMESPACE define

---
 kramv/KramViewerBase.cpp                 | 4 ++--
 libkram/bc7enc/utils.cpp                 | 2 +-
 libkram/kram/KramLog.cpp                 | 2 +-
 libkram/transcoder/basisu_transcoder.cpp | 2 +-
 libkram/transcoder/basisu_transcoder.h   | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index 850a6e5f..da767452 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -1605,9 +1605,9 @@ void Data::loadFilesFromUrls(vector<string>& urls, bool skipSubdirs)
     
     // sort them by short filename
 #if USE_EASTL
-    NAMESPACE_STL::quick_sort(_files.begin(), _files.end());
+    STL_NAMESPACE::quick_sort(_files.begin(), _files.end());
 #else
-    std::sort(_files.begin(), _files.end());
+    STL_NAMESPACE::sort(_files.begin(), _files.end());
 #endif
     
     // preserve filename before load, and restore that index, by finding
diff --git a/libkram/bc7enc/utils.cpp b/libkram/bc7enc/utils.cpp
index b388d3f9..37e24f70 100644
--- a/libkram/bc7enc/utils.cpp
+++ b/libkram/bc7enc/utils.cpp
@@ -10,7 +10,7 @@
 
 namespace utils 
 {		
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 
 #define FLOOD_PUSH(y, xl, xr, dy) if (((y + (dy)) >= 0) && ((y + (dy)) < (int)m_height)) { stack.push_back(fill_segment(y, xl, xr, dy)); }
 
diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp
index 36be0d1f..c0e7015e 100644
--- a/libkram/kram/KramLog.cpp
+++ b/libkram/kram/KramLog.cpp
@@ -1055,7 +1055,7 @@ int32_t logMessage(const char* group, int32_t logLevel,
 // to have full source to impl to fix things in fmt.
 // https://fmt.dev/latest/api.html#_CPPv4IDpEN3fmt14formatted_sizeE6size_t13format_stringIDp1TEDpRR1T
 
-// TODO: can this use NAMESPACE_STL::string_view instead ?
+// TODO: can this use STL_NAMESPACE::string_view instead ?
 int32_t logMessage(const char* group, int32_t logLevel,
                           const char* file, int32_t line, const char* func,
                           fmt::string_view format, fmt::format_args args)
diff --git a/libkram/transcoder/basisu_transcoder.cpp b/libkram/transcoder/basisu_transcoder.cpp
index 6579060f..6398affe 100644
--- a/libkram/transcoder/basisu_transcoder.cpp
+++ b/libkram/transcoder/basisu_transcoder.cpp
@@ -185,7 +185,7 @@ namespace basisu
 
 namespace basist
 {
-    using namespace NAMESPACE_STL;
+    using namespace STL_NAMESPACE;
 
 #if BASISD_ENABLE_DEBUG_FLAGS
 	static uint32_t g_debug_flags = 0;
diff --git a/libkram/transcoder/basisu_transcoder.h b/libkram/transcoder/basisu_transcoder.h
index 9cf29a63..7c63b340 100644
--- a/libkram/transcoder/basisu_transcoder.h
+++ b/libkram/transcoder/basisu_transcoder.h
@@ -42,7 +42,7 @@
 
 namespace basist
 {
-    using namespace NAMESPACE_STL;
+    using namespace STL_NAMESPACE;
 
 	// High-level composite texture formats supported by the transcoder.
 	// Each of these texture formats directly correspond to OpenGL/D3D/Vulkan etc. texture formats.

From 39dbdb01bc299b179f9ca6cd4f2bf3d662f8f177 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 24 Sep 2024 21:31:01 -0700
Subject: [PATCH 726/901] kram - more win fixes

---
 kram-profile/CBA/Analysis.cpp         | 4 ++--
 kram-profile/Source/KramZipHelper.cpp | 4 ++--
 kram-profile/Source/KramZipHelper.h   | 4 ++--
 kram-thumb-win/KramThumbProvider.cpp  | 6 +++---
 libkram/CMakeLists.txt                | 6 +++---
 plugin/kps/KPS.cpp                    | 2 +-
 6 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/kram-profile/CBA/Analysis.cpp b/kram-profile/CBA/Analysis.cpp
index 6df8e68b..527cf227 100755
--- a/kram-profile/CBA/Analysis.cpp
+++ b/kram-profile/CBA/Analysis.cpp
@@ -26,8 +26,8 @@
 
 // from kram
 // returns length of chars appended, -1 if failure
-#define NAMESPACE_STL std
-using namespace NAMESPACE_STL;
+#define STL_NAMESPACE std
+using namespace STL_NAMESPACE;
 int32_t append_sprintf(string& str, const char* format, ...) __printflike(2, 3);
 
 namespace col
diff --git a/kram-profile/Source/KramZipHelper.cpp b/kram-profile/Source/KramZipHelper.cpp
index d0aa558b..acb9af00 100644
--- a/kram-profile/Source/KramZipHelper.cpp
+++ b/kram-profile/Source/KramZipHelper.cpp
@@ -26,7 +26,7 @@
 #include <cxxabi.h> // demangle
 #endif
 
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 
 // copied out of KramLog.cpp
 static int32_t append_vsprintf(string& str, const char* format, va_list args)
@@ -180,7 +180,7 @@ extern "C" const char* _Nullable demangleSymbolName(const char* _Nonnull symbolN
 }
 
 namespace kram {
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 
 // Copied out of KramLog.cpp
 inline bool endsWithExtension(const char* str, const string& substring)
diff --git a/kram-profile/Source/KramZipHelper.h b/kram-profile/Source/KramZipHelper.h
index b6618fe6..926c8b21 100644
--- a/kram-profile/Source/KramZipHelper.h
+++ b/kram-profile/Source/KramZipHelper.h
@@ -3,7 +3,7 @@
 // TODO: move to KramConfig.h
 #define KRAM_MAC 1
 #define KRAM_IOS 0
-#define NAMESPACE_STL std
+#define STL_NAMESPACE
 
 #include <stdint.h>
 
@@ -18,7 +18,7 @@ struct mz_zip_archive;
 namespace kram {
 
 //struct MmapHelper;
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 
 struct ZipEntry {
     const char* filename;  // max 512, aliased
diff --git a/kram-thumb-win/KramThumbProvider.cpp b/kram-thumb-win/KramThumbProvider.cpp
index d483c6d5..a32a7563 100644
--- a/kram-thumb-win/KramThumbProvider.cpp
+++ b/kram-thumb-win/KramThumbProvider.cpp
@@ -8,7 +8,7 @@
 #include <vector>
 
 using namespace kram;
-using namespace std;
+using namespace std; // or STL_NAMESPACE
 
 template <typename T>
 using ComPtr = Microsoft::WRL::ComPtr<T>;
@@ -149,8 +149,8 @@ class KramThumbProvider final : public IInitializeWithStream, public IThumbnailP
                 return false;
             }
 
-            imageWidth = NAMESPACE_STL::max(1U, image.width);
-            imageHeight = NAMESPACE_STL::max(1U, image.height);
+            imageWidth = std::max(1U, image.width);
+            imageHeight = std::max(1U, image.height);
         }
 
         // This is retina factor
diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index aeecca02..35b89fff 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -168,10 +168,11 @@ list(FILTER libSources EXCLUDE REGEX ".fmt.cpp$")
 source_group(TREE "${SOURCE_DIR}" PREFIX "source" FILES ${libSources})
 
 target_include_directories(${myTargetLib} PUBLIC
+    # public since included in other project files
     "${SOURCE_DIR}/kram/"
-    
-    # why are these public, must be in public headers
     "${SOURCE_DIR}/eastl/include/"
+    "${SOURCE_DIR}/vectormath/"
+    
 )
 
 target_include_directories(${myTargetLib} PRIVATE
@@ -186,7 +187,6 @@ target_include_directories(${myTargetLib} PRIVATE
     "${SOURCE_DIR}/miniz/"
     "${SOURCE_DIR}/squish/"
     "${SOURCE_DIR}/tmpfileplus/"
-    "${SOURCE_DIR}/vectormath/"
     "${SOURCE_DIR}/zstd/"
     )
      
diff --git a/plugin/kps/KPS.cpp b/plugin/kps/KPS.cpp
index cdf3186d..6e7439f5 100755
--- a/plugin/kps/KPS.cpp
+++ b/plugin/kps/KPS.cpp
@@ -84,7 +84,7 @@ extern MyMTLPixelFormat FormatToPixelFormat(DDS_Format fmt);
 // global needed by a bunch of Photoshop SDK routines
 SPBasicSuite *sSPBasic = NULL;
 
-using namespace NAMESPACE_STL;
+using namespace STL_NAMESPACE;
 
 const char* kBundleIdentifier = "com.ba.kram-ps";
 

From 3bd11d22a3cbefe928752074f2b41ee469540b1d Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 24 Sep 2024 21:41:51 -0700
Subject: [PATCH 727/901] kram - more build fixes

---
 kram-profile/Source/KramZipHelper.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kram-profile/Source/KramZipHelper.h b/kram-profile/Source/KramZipHelper.h
index 926c8b21..c19e08d5 100644
--- a/kram-profile/Source/KramZipHelper.h
+++ b/kram-profile/Source/KramZipHelper.h
@@ -3,7 +3,7 @@
 // TODO: move to KramConfig.h
 #define KRAM_MAC 1
 #define KRAM_IOS 0
-#define STL_NAMESPACE
+#define STL_NAMESPACE std
 
 #include <stdint.h>
 

From cc308770a952a025a5faca64390502c1822384c4 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 25 Sep 2024 09:47:50 -0700
Subject: [PATCH 728/901] kram - simd - add more types, add Readme

---
 libkram/vectormath/README.md      |  94 ++++++++++++++
 libkram/vectormath/vectormath++.h | 203 +++++++++++++++++++++---------
 2 files changed, 240 insertions(+), 57 deletions(-)
 create mode 100644 libkram/vectormath/README.md

diff --git a/libkram/vectormath/README.md b/libkram/vectormath/README.md
new file mode 100644
index 00000000..d43433a3
--- /dev/null
+++ b/libkram/vectormath/README.md
@@ -0,0 +1,94 @@
+vectormath
+==========
+
+ 
+Simd: 
+  arm64 Neon
+  x64 AVX2, AVX, SSE4.2
+Compiler: Clang mainly 
+Language: C types/ops, C++ matrix type and ops, can pass to ObjC
+Features: Clang/GCC vector extensions (no MSVC)
+C++ usage: C++11 but compiled as C++20
+Platforms: macOS/iOS, Win, Linux, others
+
+Small vector simd kernel based around 4 element int, float, double ops.
+  Despite AVX2, it's only using 128-bit ops currently (SSE 4.2.
+  
+Half (fp16) conversions in case _Float16 not supported (f.e. Android)
+
+Clang vector extensions provide:
+swizzles (f.e. .xzy)
+array ops (f.e. v[0], v[1])
+rgba and xyzw access
+built-in conversions ops
+.even/.odd are even/odd elemnts of vector (reduced)
+.hi/.lo vector chunks (f.e. float8 provides 2x float4 vecs)
+math ops that vectorize to simd ops (+=, *=, +, -, ...)
+comparison ops that generate an int2/3/4/8 op
+can only use extension on C typedef
+C++ vector is typedef to C typedef, no member functions
+Can cast to system simd libs, no conversions needed
+Converts to smaller vectors via swizzles. v.xyzw -> v.x, v.xy, v.xyz
+Splats constants to stay in register v.x -> v.xxxx
+Auto converts to _m128 and float32x4_t
+Neon has to emulated 32B with 2 registers (f.e. double4)
+
+
+---
+
+Types
+
+half2/3/4/8/16
+half2x2/3x3/3x4/4x4
+
+float2/3/4/8/16
+float2x2/3x3/3x4/4x4
+
+int2/3/4/8/16
+int2x2, int3x3, int3x4, int4x4
+
+double2/3/4/8/16
+doublet2x2/3x3/3x4/4x4
+
+u/char2...16
+u/short2...16
+u/long2...8
+
+---
+
+max vec size per register
+16B      32B
+char16   char32
+short8   short16
+uint4    uint8
+float4   float8
+double2  double4
+
+---
+
+TODO: 
+Finish double2 ops and double2x2...
+Add quatf/d, and conversions
+Add affine inverses
+Add row vector support (vs. columns)
+Split file into float, double, half sub-headers
+Add 2-element Neon vec ops.
+Tests of the calls.
+Disassembly of the calls (MSVC?)
+Formatting in print support
+Move to release Xcode library project and CMakeLists.txt file. 
+Look into how to individually optimize .cpp files that include it
+Rename to simdk.h
+
+---
+
+Small implementation kernel (just using the float4 simd ops), so is easy to add double versions with _pd or whatever Neon has.  I would not recommend using with C, but there are C types for ObjC storage and default math and comparison ops.
+You can also bury the impls with a little work, and avoid the simd headers getting pulled into code, but the whole point is to inline the calls for speed and stay in register.  So can drop to SSE4.2, but give up F16C.  And AVX2 provides fma to line up with arm64.  So going between arm64 and AVX2 seems like a good parallel if your systems support it.
+Written so many of these libs over the years, but this one is based around the gcc/clang vector extensions.   The vecs extend from 2, 4, 8, 16, 32.   They all use more 4 ops to do so.   I'm tempted to limit counts to 32B for AVX2.   So no ctors or member functions on the vectors (see float4m, half4m - make ops), and some derived structs on the matrices.  You can further wrap these under your own vector math code, but you then have a lot of forwarding and conversion.
+I recommend using the make ctors.   float4 v = {1.0f} vs. v = float4m(1.0f) vs. v = 1.0f.  The first inits the first element, where the second two init all elements.
+Matrices are 2x2, 3x3, 3x4, and 4x4 column only.  Matrices have a C++ type with operators and calls.  Chop out with defines float, double, half, but keep int for the conditional tests.   Easy to add more types with the macros - u/char, u/long, u/short.  Had a pretty sucky day, so positive feedback or any changes to optimize this further are welcome.   And this had numerous git crlf failures today trying to fix it for Win.
+I gutted the arrmv7 stuff from sse2Neon.h, so that's readable, and updated sse_mathfun for the cos/sin/log ops.  I had the fp16 <-> fp32 calls, since that's all Android has.  Apple has similar calls and structs, but the Accelerate lib holds many of the optimized calls for sin, cos, log, inverse.  And you only get them if you're on a new enough iOS/macOS.   And that api is so much code, that for some things it's not using the best methods.  Mine probably isn't either.  A lot of this was cobbled together out of an old vec math lib for my personal apps.  And there's still more I can salvage.
+Followed Fabian's suggestions.   So div and sqrt instead of recip and rsqrt approximations that take many ops.  This is meant to follow naming to match HLSL/MSL/GLSL where possible.
+I have a lib Xcode project not yet checked in, so then could optimize whatever calls are buried, but most calls are light and force inline, so need to be in a header, or move your ops into an optimized or -Og call.   That may reduce call overhead.   I haven't looked at the disassembly this generates yet.  But VS is good at dumping that.  Xcode less so. 
+
+
diff --git a/libkram/vectormath/vectormath++.h b/libkram/vectormath/vectormath++.h
index bd5eed83..8fbbece5 100644
--- a/libkram/vectormath/vectormath++.h
+++ b/libkram/vectormath/vectormath++.h
@@ -97,28 +97,22 @@
 #warning unuspported simd arch
 #endif
 
-// TODO: u/char16 max, only 16 channels/mask bits?
-// TODO: u/short16, nothing goes above 32 elements
-// TODO: u/long8 type
-// TODO: add, but don't add uint/ushort/ulong (too much code already)
-//  note iOS/macOS are on signed char, so may just only want to use signed simd ?
-//  often though need unsigned for low bit counts (f.e. 8 or 16).
-//
-
-// TODO: have some setting to override from prefix file
+// a define to override setings from prefix file
 #ifndef SIMD_CONFIG
 
-// Vector and matrix types.  Currently only matrix types for SIMD_FLOAT
-// TODO: support
-//#define SIMD_UCHAR  0
-//#define SIMD_CHAR   0
-//#define SIMD_SHORT  0
-//#define SIMD_LONG   0
-
+// Vector and matrix types.  Currently only matrix types for SIMD_FLOAT, SIMD_DOUBLE.
+// SIMD_INT must be kept on for conditional tests.
 #define SIMD_HALF   1
 #define SIMD_FLOAT  1
 #define SIMD_DOUBLE 1
+
 #define SIMD_INT    1
+#define SIMD_CHAR   0
+//#define SIMD_UCHAR  0
+#define SIMD_SHORT  0
+//#define SIMD_USHORT 0
+#define SIMD_LONG   0
+//#define SIMD_ULONG  0
 
 // Whether to support > 4 length vecs with some ops
 #define SIMD_FLOAT_EXT 0
@@ -140,12 +134,6 @@
 
 #endif // SIMD_HALF
 
-// TODO: u/char
-// TODO: float16 and double8 requires 4x 16B instructions
-// TODO: float8 and double3/double4 require 2x 16B instructions,
-//   but double needed for double3x3, 3x4, 4x4
-// may want to stop one before for AVX2 registers.
-
 //-----------------------------------
 
 // simplify calls
@@ -155,6 +143,41 @@
 #define SIMD_CALL static inline __attribute__((__always_inline__, __const__, __nodebug__))
 #define SIMD_CALL_OP SIMD_CALL
 
+//------------
+
+// aligned
+#define macroVector1TypesStorage(type, name) \
+typedef type name##1s; \
+typedef __attribute__((__ext_vector_type__(2)))  type name##2s; \
+typedef __attribute__((__ext_vector_type__(3)))  type name##3s; \
+typedef __attribute__((__ext_vector_type__(4)))  type name##4s; \
+typedef __attribute__((__ext_vector_type__(8)))  type name##8s; \
+typedef __attribute__((__ext_vector_type__(16))) type name##16s; \
+typedef __attribute__((__ext_vector_type__(32),__aligned__(16))) type name##32s; \
+
+// packed
+#define macroVector1TypesPacked(type, name) \
+typedef type name##1p; \
+typedef __attribute__((__ext_vector_type__(2),__aligned__(1)))  type name##2p; \
+typedef __attribute__((__ext_vector_type__(3),__aligned__(1)))  type name##3p; \
+typedef __attribute__((__ext_vector_type__(4),__aligned__(1)))  type name##4p; \
+typedef __attribute__((__ext_vector_type__(8),__aligned__(1)))  type name##8p; \
+typedef __attribute__((__ext_vector_type__(16),__aligned__(1))) type name##16p; \
+typedef __attribute__((__ext_vector_type__(32),__aligned__(1))) type name##32p; \
+
+// cpp rename for half, u/short
+#define macroVector1TypesStorageRenames(cname, cppname) \
+typedef ::cname##1s cppname##1; \
+typedef ::cname##2s cppname##2; \
+typedef ::cname##3s cppname##3; \
+typedef ::cname##4s cppname##4; \
+typedef ::cname##8s cppname##8; \
+typedef ::cname##16s cppname##16; \
+typedef ::cname##32s cppname##32; \
+
+//------------
+
+
 // aligned
 #define macroVector2TypesStorage(type, name) \
 typedef type name##1s; \
@@ -268,6 +291,39 @@ extern "C" {
 
 //----------
 
+#if SIMD_CHAR
+
+// define c vector types
+macroVector1TypesStorage(char, char)
+macroVector1TypesPacked(char, char)
+
+#endif
+
+#if SIMD_SHORT
+
+// define c vector types
+macroVector2TypesStorage(short, short)
+macroVector2TypesPacked(short, short)
+
+#endif
+
+#if SIMD_INT
+
+// define c vector types
+// Apple uses int type here (32-bit) instead of int32_t
+macroVector4TypesStorage(int, int)
+macroVector4TypesPacked(int, int)
+
+#endif // SIMD_INT
+
+#if SIMD_LONG
+
+// define c vector types
+macroVector8TypesStorage(long, long)
+macroVector8TypesPacked(long, long)
+
+#endif
+
 #if SIMD_HALF
 
 #if SIMD_HALF_FLOAT16
@@ -287,15 +343,17 @@ typedef short half;
 // That's not available on Android devices like it should be, but the Neon
 // fp16x4 <-> fp32x4 conversions are.
 
-// define c++ types
+// define c vector types
 macroVector2TypesStorage(half, half)
 macroVector2TypesPacked(half, half)
 
+// No matrix type defined right now.
+
 #endif // SIMD_HALF
 
 #if SIMD_FLOAT
 
-// define c++ types
+// define c++ vector/matrix types
 macroVector4TypesStorage(float, float)
 macroVector4TypesPacked(float, float)
 
@@ -307,18 +365,9 @@ typedef struct { float4s columns[4]; } float4x4s;
 
 #endif // SIMD_FLOAT
 
-#if SIMD_INT
-
-// define c++ types
-// Apple uses int type here (32-bit) instead of int32_t
-macroVector4TypesStorage(int, int)
-macroVector4TypesPacked(int, int)
-
-#endif // SIMD_INT
-
 #if SIMD_DOUBLE
 
-// define c types
+// define c vector/matrix types
 macroVector8TypesStorage(double, double)
 macroVector8TypesPacked(double, double)
 
@@ -333,8 +382,30 @@ typedef struct { double4s columns[4]; } double4x4s;
 //----------
 
 // This means simd_float4 will come from this file instead of simd.h
+// c typedef rename to simd_ namespace.
 #if SIMD_RENAME_TO_SIMD_NAMESPACE
 
+#if SIMD_CHAR
+macroVector1TypesStorageRenames(char, simd_char)
+#endif // SIMD_CHAR
+
+#if SIMD_SHORT
+macroVector2TypesStorageRenames(short, simd_short)
+#endif // SIMD_SHORT
+
+#if SIMD_INT
+macroVector4TypesStorageRenames(int, simd_int)
+#endif // SIMD_INT
+
+#if SIMD_LONG
+macroVector8TypesStorageRenames(long, simd_long)
+#endif // SIMD_INT
+
+
+#if SIMD_CHAR
+macroVector4TypesStorageRenames(char, simd_char)
+#endif // SIMD_CHAR
+
 #if SIMD_HALF
 macroVector2TypesStorageRenames(half, simd_half)
 #endif // SIMD_HALF
@@ -343,10 +414,6 @@ macroVector2TypesStorageRenames(half, simd_half)
 macroVector4TypesStorageRenames(float, simd_float)
 #endif // SIMD_FLOAT
 
-#if SIMD_INT
-macroVector4TypesStorageRenames(int, simd_int)
-#endif // SIMD_INT
-
 #if SIMD_DOUBLE
 macroVector8TypesStorageRenames(double, simd_double)
 #endif // SIMD_DOUBLE
@@ -381,8 +448,26 @@ macroVector8TypesStorageRenames(double, simd_double)
 
 namespace SIMD_NAMESPACE {
 
-// The C++ vectors are the same as the c vectors.  But these are namespaced.
+// c++ typedef of the c vectors.  But these are namespaced.
 // So they shouldn't conflict, or conflicts can be resolve easier than the c types.
+
+#if SIMD_CHAR
+macroVector4TypesStorageRenames(char, char)
+#endif
+
+#if SIMD_SHORT
+macroVector4TypesStorageRenames(short, short)
+#endif
+
+#if SIMD_INT
+macroVector4TypesStorageRenames(int, int)
+#endif
+
+#if SIMD_LONG
+macroVector8TypesStorageRenames(long, long)
+#endif
+
+
 #if SIMD_HALF
 macroVector2TypesStorageRenames(half, half)
 #endif
@@ -391,10 +476,6 @@ macroVector2TypesStorageRenames(half, half)
 macroVector4TypesStorageRenames(float, float)
 #endif
 
-#if SIMD_INT
-macroVector4TypesStorageRenames(int, int)
-#endif
-
 #if SIMD_DOUBLE
 macroVector8TypesStorageRenames(double, double)
 #endif
@@ -410,8 +491,6 @@ macroVector8TypesStorageRenames(double, double)
 
 #if SIMD_FLOAT && SIMD_NEON
 
-// TODO: vec extension supports comparison but they return a mask then can select off that.
-
 // These are the only 2 ops on Neon
 SIMD_CALL float reduce_min(float4 x) {
     return vminvq_f32(x);
@@ -652,8 +731,8 @@ SIMD_CALL float3 sqrt(float3 x) {
 
 // rsqrt
 SIMD_CALL float4 rsqrt(float4 x) {
-    // TODO: fixup near 0
-    // TODO: use _mm_div_ps if / doesn't
+    // TODO: fixup near 0 ?
+    // TODO: use _mm_div_ps if / doesn't use
     return 1.0f/sqrt(x);
 }
 SIMD_CALL float2 rsqrt(float2 x) {
@@ -666,8 +745,8 @@ SIMD_CALL float3 rsqrt(float3 x) {
 
 // recip
 SIMD_CALL float4 recip(float4 x) {
-    // TODO: fixup near 0
-    // TODO: use _mm_div_ps if / doesn't
+    // TODO: fixup near 0 ?
+    // TODO: use _mm_div_ps if / doesn't use
     return 1.0f/x;
 }
 SIMD_CALL float2 recip(float2 x) {
@@ -830,7 +909,7 @@ float4 cos(float4 x);
 float4 tan(float4 x);
 void sincos(float4 x, float4& s, float4& c);
 
-// TODO: add float2/3 version of log/exp/pow/sin/cos/tan/sincos above
+// TODO: add float1/float2/3 version of log/exp/pow/sin/cos/tan/sincos above
 
 SIMD_CALL float cross(float2 x, float2 y) {
     return x.x * y.y - x.y * y.x;
@@ -839,9 +918,10 @@ SIMD_CALL float3 cross(float3 x, float3 y) {
     return x.yzx * y.zxy - x.zxy * y.yzx;
 }
 
-// TODO: almost_equal, almost_equal_rel
+// TODO: equal_abs, equal_rel
 // TODO: step, smoothstep, fract
 
+
 #if SIMD_FLOAT_EXT
 
 // These are cpu only math.  None of the gpus support these long types.
@@ -872,8 +952,8 @@ SIMD_CALL float reduce_max(float16 x) {
 }
 
 // need to convert float4 to 8/16
-// TODO: float8 tovec8(float4 x, float4 y)
-// TODO: float16 tovec16(float8 x, float8 y)
+// TODO: float8 float8m(float4 x, float4 y)
+// TODO: float16 float16m(float8 x, float8 y)
 
 // how important are 8/16 ops for float and double?  Could reduce with only doing up to 4.
 SIMD_CALL float8 muladd(float8 x, float4 y, float4 t) {
@@ -907,7 +987,7 @@ SIMD_CALL float normalize(float16 x) {
 
 #endif // SIMD_FLOAT_EXT
 
-// TODO: better way to rename, can there be float2::zero()
+// TODO: better way to name these, can there be float2::zero()
 // also could maybe use that for fake vector ctors.
 
 const float2& float2_zero();
@@ -1099,12 +1179,11 @@ bool equal(const float3x3& x, const float3x3& y);
 bool equal(const float4x4& x, const float4x4& y);
 
 // TODO: these think they are all member functions
-// but they're just defined in a namespace, and Apples' aren't.
     
 // operators for C++
 macroMatrixOps(float2x2);
 macroMatrixOps(float3x3);
-// TODO: no mat hops on storage type float3x4
+// TODO: no mat ops on storage type float3x4
 // macroMatrixOps(float3x4s);
 macroMatrixOps(float4x4);
 
@@ -1483,7 +1562,17 @@ struct vecf {
     
 // TODO: quat, need a fast lerp and correct 2 quats
 // and rotate a vec, can convert to/from vector.
-// typedef struct { float4 vector; } quatf;
+#if USE_FLOAT
+struct quatf : float4 {
+    
+};
+#endif
+
+#if USE_DOUBLE
+struct quatd : double4 {
+    
+};
+#endif
 
 // TODO: saturating conversions would be useful to, and prevent overflow
 // see the conversion.h code, bit select to clamp values.

From 6e3a6821e66a286b4962532d07ef8cfb90d324e1 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 25 Sep 2024 12:55:16 -0700
Subject: [PATCH 729/901] kram - simd - fix README

---
 libkram/vectormath/README.md | 119 +++++++++++++++++++----------------
 1 file changed, 66 insertions(+), 53 deletions(-)

diff --git a/libkram/vectormath/README.md b/libkram/vectormath/README.md
index d43433a3..52cff13c 100644
--- a/libkram/vectormath/README.md
+++ b/libkram/vectormath/README.md
@@ -2,14 +2,14 @@ vectormath
 ==========
 
  
-Simd: 
-  arm64 Neon
-  x64 AVX2, AVX, SSE4.2
-Compiler: Clang mainly 
-Language: C types/ops, C++ matrix type and ops, can pass to ObjC
-Features: Clang/GCC vector extensions (no MSVC)
-C++ usage: C++11 but compiled as C++20
-Platforms: macOS/iOS, Win, Linux, others
+* Simd: 
+*   arm64 Neon
+*   x64 AVX2, AVX, SSE4.2
+* Compiler: Clang mainly 
+* Language: C types/ops, C++ matrix type and ops, can pass to ObjC
+* Features: Clang/GCC vector extensions (no MSVC)
+* C++ usage: C++11 but compiled as C++20
+* Platforms: macOS/iOS, Win, Linux, others
 
 Small vector simd kernel based around 4 element int, float, double ops.
   Despite AVX2, it's only using 128-bit ops currently (SSE 4.2.
@@ -17,78 +17,91 @@ Small vector simd kernel based around 4 element int, float, double ops.
 Half (fp16) conversions in case _Float16 not supported (f.e. Android)
 
 Clang vector extensions provide:
-swizzles (f.e. .xzy)
-array ops (f.e. v[0], v[1])
-rgba and xyzw access
-built-in conversions ops
-.even/.odd are even/odd elemnts of vector (reduced)
-.hi/.lo vector chunks (f.e. float8 provides 2x float4 vecs)
-math ops that vectorize to simd ops (+=, *=, +, -, ...)
-comparison ops that generate an int2/3/4/8 op
-can only use extension on C typedef
-C++ vector is typedef to C typedef, no member functions
-Can cast to system simd libs, no conversions needed
-Converts to smaller vectors via swizzles. v.xyzw -> v.x, v.xy, v.xyz
-Splats constants to stay in register v.x -> v.xxxx
-Auto converts to _m128 and float32x4_t
-Neon has to emulated 32B with 2 registers (f.e. double4)
+* swizzles (f.e. .xzy)
+* array ops (f.e. v[0], v[1])
+* rgba and xyzw access
+* built-in conversions ops
+* .even/.odd are even/odd elemnts of vector (reduced)
+* .hi/.lo vector chunks (f.e. float8 provides 2x float4 vecs)
+* math ops that vectorize to simd ops (+=, *=, +, -, ...)
+* comparison ops that generate an int2/3/4/8 op
+* can only use extension on C typedef
+* C++ vector is typedef to C typedef, no member functions
+* Can cast to system simd libs, no conversions needed
+* Converts to smaller vectors via swizzles. v.xyzw -> v.x, v.xy, v.xyz
+* Splats constants to stay in register v.x -> v.xxxx
+* Auto converts to _m128 and float32x4_t
+* Neon has to emulated 32B with 2 registers (f.e. double4)
 
 
 ---
 
 Types
 
-half2/3/4/8/16
-half2x2/3x3/3x4/4x4
+* half2/3/4/8/16
+* half2x2/3x3/3x4/4x4
 
-float2/3/4/8/16
-float2x2/3x3/3x4/4x4
+* float2/3/4/8/16
+* float2x2/3x3/3x4/4x4
 
-int2/3/4/8/16
-int2x2, int3x3, int3x4, int4x4
+* int2/3/4/8/16
+* int2x2, int3x3, int3x4, int4x4
 
-double2/3/4/8/16
-doublet2x2/3x3/3x4/4x4
+* double2/3/4/8/16
+* doublet2x2/3x3/3x4/4x4
 
-u/char2...16
-u/short2...16
-u/long2...8
+* u/char2...16
+* u/short2...16
+* u/long2...8
 
 ---
 
 max vec size per register
-16B      32B
-char16   char32
-short8   short16
-uint4    uint8
-float4   float8
-double2  double4
+* 16B      32B
+* char16   char32
+* short8   short16
+* uint4    uint8
+* float4   float8
+* double2  double4
 
 ---
 
 TODO: 
-Finish double2 ops and double2x2...
-Add quatf/d, and conversions
-Add affine inverses
-Add row vector support (vs. columns)
-Split file into float, double, half sub-headers
-Add 2-element Neon vec ops.
-Tests of the calls.
-Disassembly of the calls (MSVC?)
-Formatting in print support
-Move to release Xcode library project and CMakeLists.txt file. 
-Look into how to individually optimize .cpp files that include it
-Rename to simdk.h
+* Finish double2 ops and double2x2...
+* Add quatf/d, and conversions
+* Add affine inverses
+* Add row vector support (vs. columns)
+* Split file into float, double, half sub-headers
+* Add 2-element Neon vec ops.
+* Tests of the calls.
+* Disassembly of the calls (MSVC?)
+* Formatting in print support
+* Move to release Xcode library project and CMakeLists.txt file. 
+* Look into how to individually optimize .cpp files that include it
+* Rename to simdk.h
 
 ---
 
 Small implementation kernel (just using the float4 simd ops), so is easy to add double versions with _pd or whatever Neon has.  I would not recommend using with C, but there are C types for ObjC storage and default math and comparison ops.
+
 You can also bury the impls with a little work, and avoid the simd headers getting pulled into code, but the whole point is to inline the calls for speed and stay in register.  So can drop to SSE4.2, but give up F16C.  And AVX2 provides fma to line up with arm64.  So going between arm64 and AVX2 seems like a good parallel if your systems support it.
+
 Written so many of these libs over the years, but this one is based around the gcc/clang vector extensions.   The vecs extend from 2, 4, 8, 16, 32.   They all use more 4 ops to do so.   I'm tempted to limit counts to 32B for AVX2.   So no ctors or member functions on the vectors (see float4m, half4m - make ops), and some derived structs on the matrices.  You can further wrap these under your own vector math code, but you then have a lot of forwarding and conversion.
-I recommend using the make ctors.   float4 v = {1.0f} vs. v = float4m(1.0f) vs. v = 1.0f.  The first inits the first element, where the second two init all elements.
+
+I recommend using the make ctors.   The curly brace init is easy to mistake for what it does.
+
+```
+float4 v = {1.0f};    v = 1,xxx
+float4 v = float4m(1.0f); v = 1,1,1,1
+float4 v = 1.0f.          v = 1,1,1,1
+```
+
 Matrices are 2x2, 3x3, 3x4, and 4x4 column only.  Matrices have a C++ type with operators and calls.  Chop out with defines float, double, half, but keep int for the conditional tests.   Easy to add more types with the macros - u/char, u/long, u/short.  Had a pretty sucky day, so positive feedback or any changes to optimize this further are welcome.   And this had numerous git crlf failures today trying to fix it for Win.
+
 I gutted the arrmv7 stuff from sse2Neon.h, so that's readable, and updated sse_mathfun for the cos/sin/log ops.  I had the fp16 <-> fp32 calls, since that's all Android has.  Apple has similar calls and structs, but the Accelerate lib holds many of the optimized calls for sin, cos, log, inverse.  And you only get them if you're on a new enough iOS/macOS.   And that api is so much code, that for some things it's not using the best methods.  Mine probably isn't either.  A lot of this was cobbled together out of an old vec math lib for my personal apps.  And there's still more I can salvage.
+
 Followed Fabian's suggestions.   So div and sqrt instead of recip and rsqrt approximations that take many ops.  This is meant to follow naming to match HLSL/MSL/GLSL where possible.
+
 I have a lib Xcode project not yet checked in, so then could optimize whatever calls are buried, but most calls are light and force inline, so need to be in a header, or move your ops into an optimized or -Og call.   That may reduce call overhead.   I haven't looked at the disassembly this generates yet.  But VS is good at dumping that.  Xcode less so. 
 
 
From 36eb2ea77ecaba6dabab2088c7dcd81af750963a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 25 Sep 2024 13:02:00 -0700
Subject: [PATCH 730/901] kram - simd - update Readme

---
 hlslparser/README.md         |  1 +
 libkram/vectormath/README.md | 21 ++++++++++++---------
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/hlslparser/README.md b/hlslparser/README.md
index 3756bfac..b4093f19 100644
--- a/hlslparser/README.md
+++ b/hlslparser/README.md
@@ -187,6 +187,7 @@ Mali
 * Sparse index buffer limits 
 * 180MB parameter buffer limit - device lost after exceeded
 * Missing VK_POLYGON_MODE_LINE (feature.fillModeNonSolid) - affects debug visuals
+* Missing shaderClipDistance and shaderCullDistance
 * ARM licenses mobile cpu reference designs
 * ARM bought Mali gpu from Falanx Microsystems
 * Sets maxDrawIndirectCount = 1, limiting MDI utility
diff --git a/libkram/vectormath/README.md b/libkram/vectormath/README.md
index 52cff13c..c6d78596 100644
--- a/libkram/vectormath/README.md
+++ b/libkram/vectormath/README.md
@@ -12,7 +12,7 @@ vectormath
 * Platforms: macOS/iOS, Win, Linux, others
 
 Small vector simd kernel based around 4 element int, float, double ops.
-  Despite AVX2, it's only using 128-bit ops currently (SSE 4.2.
+  Despite AVX2, it's only using 128-bit ops currently (SSE 4.2).
   
 Half (fp16) conversions in case _Float16 not supported (f.e. Android)
 
@@ -39,7 +39,6 @@ Clang vector extensions provide:
 Types
 
 * half2/3/4/8/16
-* half2x2/3x3/3x4/4x4
 
 * float2/3/4/8/16
 * float2x2/3x3/3x4/4x4
@@ -47,12 +46,18 @@ Types
 * int2/3/4/8/16
 * int2x2, int3x3, int3x4, int4x4
 
+* double type should replicate float code
 * double2/3/4/8/16
 * doublet2x2/3x3/3x4/4x4
 
-* u/char2...16
-* u/short2...16
-* u/long2...8
+* didn't really need a half matrix yet
+* - half2x2/3x3/3x4/4x4
+
+* optional vector only types with only default vector ops
+*  note macOS/iOS is signed char, so should compile same for Win/Linux/etc
+* - u/char2...16
+* - u/short2...16
+* - u/long2...8
 
 ---
 
@@ -86,9 +91,7 @@ Small implementation kernel (just using the float4 simd ops), so is easy to add
 
 You can also bury the impls with a little work, and avoid the simd headers getting pulled into code, but the whole point is to inline the calls for speed and stay in register.  So can drop to SSE4.2, but give up F16C.  And AVX2 provides fma to line up with arm64.  So going between arm64 and AVX2 seems like a good parallel if your systems support it.
 
-Written so many of these libs over the years, but this one is based around the gcc/clang vector extensions.   The vecs extend from 2, 4, 8, 16, 32.   They all use more 4 ops to do so.   I'm tempted to limit counts to 32B for AVX2.   So no ctors or member functions on the vectors (see float4m, half4m - make ops), and some derived structs on the matrices.  You can further wrap these under your own vector math code, but you then have a lot of forwarding and conversion.
-
-I recommend using the make ctors.   The curly brace init is easy to mistake for what it does.
+Written so many of these libs over the years, but this one is based around the gcc/clang vector extensions.   The vecs extend from 2, 4, 8, 16, 32.   They all use more 4 ops to do so.   I'm tempted to limit counts to 32B for AVX2.   So no ctors or member functions on the vectors (see float4m, half4m - make ops), and some derived structs on the matrices.  You can further wrap these under your own vector math code, but you then have a lot of forwarding and conversion.  I recommend using the make ctors for the vectors.   The curly brace init is easy to mistake for what it does.
 
 ```
 float4 v = {1.0f};    v = 1,xxx
@@ -96,7 +99,7 @@ float4 v = float4m(1.0f); v = 1,1,1,1
 float4 v = 1.0f.          v = 1,1,1,1
 ```
 
-Matrices are 2x2, 3x3, 3x4, and 4x4 column only.  Matrices have a C++ type with operators and calls.  Chop out with defines float, double, half, but keep int for the conditional tests.   Easy to add more types with the macros - u/char, u/long, u/short.  Had a pretty sucky day, so positive feedback or any changes to optimize this further are welcome.   And this had numerous git crlf failures today trying to fix it for Win.
+Matrices are 2x2, 3x3, 3x4, and 4x4 column only.  Matrices have a C++ type with operators and calls.  Chop out with defines float, double, half, but keep int for the conditional tests.   Easy to add more types with the macros - u/char, u/long, u/short. 
 
 I gutted the arrmv7 stuff from sse2Neon.h, so that's readable, and updated sse_mathfun for the cos/sin/log ops.  I had the fp16 <-> fp32 calls, since that's all Android has.  Apple has similar calls and structs, but the Accelerate lib holds many of the optimized calls for sin, cos, log, inverse.  And you only get them if you're on a new enough iOS/macOS.   And that api is so much code, that for some things it's not using the best methods.  Mine probably isn't either.  A lot of this was cobbled together out of an old vec math lib for my personal apps.  And there's still more I can salvage.
 

From 76b1d43258b03d9601fd7fc7b2214d2cc23d7325 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 26 Sep 2024 05:24:47 -0700
Subject: [PATCH 731/901] kram - simd - simplify mathfun code

---
 libkram/vectormath/sse_mathfun.h    | 768 ++++++++++++++--------------
 libkram/vectormath/vectormath++.cpp |   1 -
 2 files changed, 395 insertions(+), 374 deletions(-)

diff --git a/libkram/vectormath/sse_mathfun.h b/libkram/vectormath/sse_mathfun.h
index 0609b653..bfd3a07e 100644
--- a/libkram/vectormath/sse_mathfun.h
+++ b/libkram/vectormath/sse_mathfun.h
@@ -36,27 +36,14 @@
 
 #pragma once
 
-#ifdef SIMD_CPPMATHFUN
 
 namespace SIMD_NAMESPACE {
 
-typedef float4 v4sf;
-typedef int4 v4si;
-
-#else
-
-typedef simd_float4 v4sf;  // vector of 4 float (sse1)
-typedef simd_int4 v4si;    // vector of 4 int (sse2)
-
-#endif
-
 #define _PS_CONST(Name, Val) \
-  static const v4sf _ps_##Name = Val
+  static const float4 _ps_##Name = Val
 #define _PI_CONST(Name, Val) \
-  static const v4si _pi_##Name = Val
+  static const int4 _pi_##Name = Val
 
-_PS_CONST(1  , 1.0f);
-_PS_CONST(0p5, 0.5f);
 /* the smallest non denormalized float number */
 _PS_CONST(min_norm_pos, (float)0x00800000);
 _PI_CONST(mant_mask, 0x7f800000);
@@ -71,6 +58,9 @@ _PI_CONST(2, 2);
 _PI_CONST(4, 4);
 _PI_CONST(0x7f, 0x7f);
 
+_PS_CONST(1  , 1.0f);
+_PS_CONST(0p5, 0.5f);
+
 _PS_CONST(cephes_SQRTHF, 0.707106781186547524);
 _PS_CONST(cephes_log_p0, 7.0376836292E-2);
 _PS_CONST(cephes_log_p1, - 1.1514610310E-1);
@@ -87,99 +77,103 @@ _PS_CONST(cephes_log_q2, 0.693359375);
 // Named to match Intel simd calls.  Can expose in header if needed.
 // sincos returns sin, take in cos as ptr though.  So that doesn't match.
 // Also can't we just pass cos, sin now instead of reversing args?
-v4sf _mm_log_ps(v4sf x);
-v4sf _mm_exp_ps(v4sf x);
-v4sf _mm_sin_ps(v4sf x);
-v4sf _mm_cos_ps(v4sf x);
-void _mm_sincos_ps(v4sf x, v4sf* s, v4sf* c);
+float4 _mm_log_ps(float4 x);
+float4 _mm_exp_ps(float4 x);
+//float4 _mm_sin_ps(float4 x);
+//float4 _mm_cos_ps(float4 x);
+void _mm_sincos_ps(float4 x, float4* s, float4* c);
 
 // This is just extra function overhead.  May just want to rename
-v4sf log(v4sf x) {
+float4 log(float4 x) {
     return _mm_log_ps(x);
 }
-v4sf exp(v4sf x) {
+float4 exp(float4 x) {
     return _mm_exp_ps(x);
 }
-v4sf sin(v4sf x) {
-    return _mm_sin_ps(x);
+float4 sin(float4 x) {
+    float4 s, c;
+    _mm_sincos_ps(x, &s, &c);
+    return s;
 }
-v4sf cos(v4sf x) {
-    return _mm_cos_ps(x);
+float4 cos(float4 x) {
+    float4 s, c;
+    _mm_sincos_ps(x, &s, &c);
+    return c;
 }
-void sincos(v4sf x, v4sf& s, v4sf& c) {
+void sincos(float4 x, float4& s, float4& c) {
     return _mm_sincos_ps(x, &s, &c);
 }
 
 /* natural logarithm computed for 4 simultaneous float
    return NaN for x <= 0
 */
-v4sf _mm_log_ps(v4sf x) {
-  v4si emm0;
-  v4sf one = _ps_1;
+float4 _mm_log_ps(float4 x) {
+    int4 emm0;
+    float4 one = _ps_1;
 
-  v4sf invalid_mask = _mm_cmple_ps(x, _mm_setzero_ps());
+    float4 invalid_mask = _mm_cmple_ps(x, _mm_setzero_ps());
 
-  x = _mm_max_ps(x, _ps_min_norm_pos);  /* cut off denormalized stuff */
+    x = _mm_max_ps(x, _ps_min_norm_pos);  /* cut off denormalized stuff */
 
-  emm0 = _mm_srli_epi32(_mm_castps_si128(x), 23);
-  /* keep only the fractional part */
-  x = _mm_and_ps(x, _pi_inv_mant_mask);
-  x = _mm_or_ps(x, _ps_0p5);
+    emm0 = _mm_srli_epi32(_mm_castps_si128(x), 23);
+    /* keep only the fractional part */
+    x = _mm_and_ps(x, _pi_inv_mant_mask);
+    x = _mm_or_ps(x, _ps_0p5);
 
-  emm0 = _mm_sub_epi32(emm0, _pi_0x7f);
-  v4sf e = _mm_cvtepi32_ps(emm0);
+    emm0 = _mm_sub_epi32(emm0, _pi_0x7f);
+    float4 e = _mm_cvtepi32_ps(emm0);
 
-  e = _mm_add_ps(e, one);
+    e = _mm_add_ps(e, one);
 
-  /* part2: 
+    /* part2:
      if( x < SQRTHF ) {
        e -= 1;
        x = x + x - 1.0;
      } else { x = x - 1.0; }
-  */
-  v4sf mask = _mm_cmplt_ps(x, _ps_cephes_SQRTHF);
-  v4sf tmp = _mm_and_ps(x, mask);
-  x = _mm_sub_ps(x, one);
-  e = _mm_sub_ps(e, _mm_and_ps(one, mask));
-  x = _mm_add_ps(x, tmp);
-
-
-  v4sf z = _mm_mul_ps(x,x);
-
-  v4sf y = _ps_cephes_log_p0;
-  y = _mm_mul_ps(y, x);
-  y = _mm_add_ps(y, _ps_cephes_log_p1);
-  y = _mm_mul_ps(y, x);
-  y = _mm_add_ps(y, _ps_cephes_log_p2);
-  y = _mm_mul_ps(y, x);
-  y = _mm_add_ps(y, _ps_cephes_log_p3);
-  y = _mm_mul_ps(y, x);
-  y = _mm_add_ps(y, _ps_cephes_log_p4);
-  y = _mm_mul_ps(y, x);
-  y = _mm_add_ps(y, _ps_cephes_log_p5);
-  y = _mm_mul_ps(y, x);
-  y = _mm_add_ps(y, _ps_cephes_log_p6);
-  y = _mm_mul_ps(y, x);
-  y = _mm_add_ps(y, _ps_cephes_log_p7);
-  y = _mm_mul_ps(y, x);
-  y = _mm_add_ps(y, _ps_cephes_log_p8);
-  y = _mm_mul_ps(y, x);
-
-  y = _mm_mul_ps(y, z);
-  
-
-  tmp = _mm_mul_ps(e, _ps_cephes_log_q1);
-  y = _mm_add_ps(y, tmp);
-
-
-  tmp = _mm_mul_ps(z, _ps_0p5);
-  y = _mm_sub_ps(y, tmp);
-
-  tmp = _mm_mul_ps(e, _ps_cephes_log_q2);
-  x = _mm_add_ps(x, y);
-  x = _mm_add_ps(x, tmp);
-  x = _mm_or_ps(x, invalid_mask); // negative arg will be NAN
-  return x;
+    */
+    float4 mask = _mm_cmplt_ps(x, _ps_cephes_SQRTHF);
+    float4 tmp = _mm_and_ps(x, mask);
+    x = _mm_sub_ps(x, one);
+    e = _mm_sub_ps(e, _mm_and_ps(one, mask));
+    x = _mm_add_ps(x, tmp);
+
+
+    float4 z = _mm_mul_ps(x,x);
+
+    float4 y = _ps_cephes_log_p0;
+    y = _mm_mul_ps(y, x);
+    y = _mm_add_ps(y, _ps_cephes_log_p1);
+    y = _mm_mul_ps(y, x);
+    y = _mm_add_ps(y, _ps_cephes_log_p2);
+    y = _mm_mul_ps(y, x);
+    y = _mm_add_ps(y, _ps_cephes_log_p3);
+    y = _mm_mul_ps(y, x);
+    y = _mm_add_ps(y, _ps_cephes_log_p4);
+    y = _mm_mul_ps(y, x);
+    y = _mm_add_ps(y, _ps_cephes_log_p5);
+    y = _mm_mul_ps(y, x);
+    y = _mm_add_ps(y, _ps_cephes_log_p6);
+    y = _mm_mul_ps(y, x);
+    y = _mm_add_ps(y, _ps_cephes_log_p7);
+    y = _mm_mul_ps(y, x);
+    y = _mm_add_ps(y, _ps_cephes_log_p8);
+    y = _mm_mul_ps(y, x);
+
+    y = _mm_mul_ps(y, z);
+
+
+    tmp = _mm_mul_ps(e, _ps_cephes_log_q1);
+    y = _mm_add_ps(y, tmp);
+
+
+    tmp = _mm_mul_ps(z, _ps_0p5);
+    y = _mm_sub_ps(y, tmp);
+
+    tmp = _mm_mul_ps(e, _ps_cephes_log_q2);
+    x = _mm_add_ps(x, y);
+    x = _mm_add_ps(x, tmp);
+    x = _mm_or_ps(x, invalid_mask); // negative arg will be NAN
+    return x;
 }
 
 _PS_CONST(exp_hi,	88.3762626647949f);
@@ -196,57 +190,84 @@ _PS_CONST(cephes_exp_p3, 4.1665795894E-2);
 _PS_CONST(cephes_exp_p4, 1.6666665459E-1);
 _PS_CONST(cephes_exp_p5, 5.0000001201E-1);
 
-v4sf _mm_exp_ps(v4sf x) {
-  v4sf tmp = _mm_setzero_ps(), fx;
-  v4si emm0;
-  v4sf one = _ps_1;
-
-  x = _mm_min_ps(x, _ps_exp_hi);
-  x = _mm_max_ps(x, _ps_exp_lo);
-
-  /* express exp(x) as exp(g + n*log(2)) */
-  fx = _mm_mul_ps(x, _ps_cephes_LOG2EF);
-  fx = _mm_add_ps(fx, _ps_0p5);
-
-  /* how to perform a floorf with SSE: just below */
-  emm0 = _mm_cvttps_epi32(fx);
-  tmp  = _mm_cvtepi32_ps(emm0);
-  /* if greater, substract 1 */
-  v4sf mask = _mm_cmpgt_ps(tmp, fx);    
-  mask = _mm_and_ps(mask, one);
-  fx = _mm_sub_ps(tmp, mask);
-
-  tmp = _mm_mul_ps(fx, _ps_cephes_exp_C1);
-  v4sf z = _mm_mul_ps(fx, _ps_cephes_exp_C2);
-  x = _mm_sub_ps(x, tmp);
-  x = _mm_sub_ps(x, z);
-
-  z = _mm_mul_ps(x,x);
-  
-  v4sf y = _ps_cephes_exp_p0;
-  y = _mm_mul_ps(y, x);
-  y = _mm_add_ps(y, _ps_cephes_exp_p1);
-  y = _mm_mul_ps(y, x);
-  y = _mm_add_ps(y, _ps_cephes_exp_p2);
-  y = _mm_mul_ps(y, x);
-  y = _mm_add_ps(y, _ps_cephes_exp_p3);
-  y = _mm_mul_ps(y, x);
-  y = _mm_add_ps(y, _ps_cephes_exp_p4);
-  y = _mm_mul_ps(y, x);
-  y = _mm_add_ps(y, _ps_cephes_exp_p5);
-  y = _mm_mul_ps(y, z);
-  y = _mm_add_ps(y, x);
-  y = _mm_add_ps(y, one);
-
-  /* build 2^n */
-  emm0 = _mm_cvttps_epi32(fx);
-  emm0 = _mm_add_epi32(emm0, _pi_0x7f);
-  emm0 = _mm_slli_epi32(emm0, 23);
-  v4sf pow2n = _mm_castsi128_ps(emm0);
-  y = _mm_mul_ps(y, pow2n);
-  return y;
+float4 _mm_exp_ps(float4 x) {
+    float4 tmp = _mm_setzero_ps(), fx;
+    int4 emm0;
+    float4 one = _ps_1;
+
+#if 0
+    x = clamp(x, _ps_exp_lo, _ps_exp_hi);
+    fx = x * _ps_cephes_LOG2EF + _ps_0p5;
+    
+    fx = floor(fx);
+   
+    x -= fx * (_ps_cephes_exp_C1 + _ps_cephes_exp_C2);
+    float4 z = x * x; // squared
+    
+    // polynomial
+    float4 y = ((((((
+        _ps_cephes_exp_p0 * x + _ps_cephes_exp_p1) * x) +
+        _ps_cephes_exp_p2 * x) + _ps_cephes_exp_p3 * x) +
+        _ps_cephes_exp_p4 * x) + _ps_cephes_exp_p5 * z) + x + one;
+    
+    // build 2^n
+    emm0 = int4(fx); // truncate to int
+    emm0 = (emm0 + _pi_0x7f) << 23;
+    float4 pow2n = _mm_castsi128_ps(emm0); // treat int as float
+    y *= pow2n;
+    
+#else
+    x = _mm_min_ps(x, _ps_exp_hi);
+    x = _mm_max_ps(x, _ps_exp_lo);
+    
+    /* express exp(x) as exp(g + n*log(2)) */
+    fx = _mm_mul_ps(x, _ps_cephes_LOG2EF);
+    fx = _mm_add_ps(fx, _ps_0p5);
+    
+    /* how to perform a floorf with SSE: just below */
+    emm0 = _mm_cvttps_epi32(fx);
+    tmp  = _mm_cvtepi32_ps(emm0);
+    /* if greater, substract 1 */
+    float4 mask = _mm_cmpgt_ps(tmp, fx);
+    mask = _mm_and_ps(mask, one);
+    fx = _mm_sub_ps(tmp, mask);
+    
+    tmp = _mm_mul_ps(fx, _ps_cephes_exp_C1);
+    float4 z = _mm_mul_ps(fx, _ps_cephes_exp_C2);
+    x = _mm_sub_ps(x, tmp);
+    x = _mm_sub_ps(x, z);
+
+    z = _mm_mul_ps(x,x);
+    
+    // mads to form a polynoial
+    float4 y = _ps_cephes_exp_p0;
+    y = _mm_mul_ps(y, x);
+    y = _mm_add_ps(y, _ps_cephes_exp_p1);
+    y = _mm_mul_ps(y, x);
+    y = _mm_add_ps(y, _ps_cephes_exp_p2);
+    y = _mm_mul_ps(y, x);
+    y = _mm_add_ps(y, _ps_cephes_exp_p3);
+    y = _mm_mul_ps(y, x);
+    y = _mm_add_ps(y, _ps_cephes_exp_p4);
+    y = _mm_mul_ps(y, x);
+    y = _mm_add_ps(y, _ps_cephes_exp_p5);
+    y = _mm_mul_ps(y, z);
+    y = _mm_add_ps(y, x);
+    y = _mm_add_ps(y, one);
+
+    /* build 2^n */
+    emm0 = _mm_cvttps_epi32(fx);
+    emm0 = _mm_add_epi32(emm0, _pi_0x7f);
+    emm0 = _mm_slli_epi32(emm0, 23);
+    float4 pow2n = _mm_castsi128_ps(emm0);
+    y = _mm_mul_ps(y, pow2n);
+    
+#endif
+    
+    return y;
 }
 
+
 _PS_CONST(minus_cephes_DP1, -0.78515625);
 _PS_CONST(minus_cephes_DP2, -2.4187564849853515625e-4);
 _PS_CONST(minus_cephes_DP3, -3.77489497744594108e-8);
@@ -258,6 +279,7 @@ _PS_CONST(coscof_p1, -1.388731625493765E-003);
 _PS_CONST(coscof_p2,  4.166664568298827E-002);
 _PS_CONST(cephes_FOPI, 1.27323954473516); // 4 / M_PI
 
+#if 0
 
 /* evaluation of 4 sines at onces, using only SSE1+MMX intrinsics so
    it runs also on old athlons XPs and the pentium III of your grand
@@ -287,260 +309,260 @@ _PS_CONST(cephes_FOPI, 1.27323954473516); // 4 / M_PI
    Since it is based on SSE intrinsics, it has to be compiled at -O2 to
    deliver full speed.
 */
-v4sf _mm_sin_ps(v4sf x) { // any x
-  v4sf xmm1, xmm2 = _mm_setzero_ps(), xmm3, sign_bit, y;
-
-  v4si emm0, emm2;
-  sign_bit = x;
-  /* take the absolute value */
-  x = _mm_and_ps(x, _pi_inv_sign_mask);
-  /* extract the sign bit (upper one) */
-  sign_bit = _mm_and_ps(sign_bit, _pi_sign_mask);
-  
-  /* scale by 4/Pi */
-  y = _mm_mul_ps(x, _ps_cephes_FOPI);
-
-  /* store the integer part of y in mm0 */
-  emm2 = _mm_cvttps_epi32(y);
-  /* j=(j+1) & (~1) (see the cephes sources) */
-  emm2 = _mm_add_epi32(emm2, _pi_1);
-  emm2 = _mm_and_si128(emm2, _pi_inv1);
-  y = _mm_cvtepi32_ps(emm2);
-
-  /* get the swap sign flag */
-  emm0 = _mm_and_si128(emm2, _pi_4);
-  emm0 = _mm_slli_epi32(emm0, 29);
-  /* get the polynom selection mask 
+float4 _mm_sin_ps(float4 x) { // any x
+    float4 xmm1, xmm2 = _mm_setzero_ps(), xmm3, sign_bit, y;
+
+    int4 emm0, emm2;
+    sign_bit = x;
+    /* take the absolute value */
+    x = _mm_and_ps(x, _pi_inv_sign_mask);
+    /* extract the sign bit (upper one) */
+    sign_bit = _mm_and_ps(sign_bit, _pi_sign_mask);
+
+    /* scale by 4/Pi */
+    y = _mm_mul_ps(x, _ps_cephes_FOPI);
+
+    /* store the integer part of y in mm0 */
+    emm2 = _mm_cvttps_epi32(y);
+    /* j=(j+1) & (~1) (see the cephes sources) */
+    emm2 = _mm_add_epi32(emm2, _pi_1);
+    emm2 = _mm_and_si128(emm2, _pi_inv1);
+    y = _mm_cvtepi32_ps(emm2);
+
+    /* get the swap sign flag */
+    emm0 = _mm_and_si128(emm2, _pi_4);
+    emm0 = _mm_slli_epi32(emm0, 29);
+    /* get the polynom selection mask
      there is one polynom for 0 <= x <= Pi/4
      and another one for Pi/4<x<=Pi/2
 
      Both branches will be computed.
-  */
-  emm2 = _mm_and_si128(emm2, _pi_2);
-  emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
-  
-  v4sf swap_sign_bit = _mm_castsi128_ps(emm0);
-  v4sf poly_mask = _mm_castsi128_ps(emm2);
-  sign_bit = _mm_xor_ps(sign_bit, swap_sign_bit);
-  
-  
-  /* The magic pass: "Extended precision modular arithmetic" 
+    */
+    emm2 = _mm_and_si128(emm2, _pi_2);
+    emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
+
+    float4 swap_sign_bit = _mm_castsi128_ps(emm0);
+    float4 poly_mask = _mm_castsi128_ps(emm2);
+    sign_bit = _mm_xor_ps(sign_bit, swap_sign_bit);
+
+
+    /* The magic pass: "Extended precision modular arithmetic"
      x = ((x - y * DP1) - y * DP2) - y * DP3; */
-  xmm1 = _ps_minus_cephes_DP1;
-  xmm2 = _ps_minus_cephes_DP2;
-  xmm3 = _ps_minus_cephes_DP3;
-  xmm1 = _mm_mul_ps(y, xmm1);
-  xmm2 = _mm_mul_ps(y, xmm2);
-  xmm3 = _mm_mul_ps(y, xmm3);
-  x = _mm_add_ps(x, xmm1);
-  x = _mm_add_ps(x, xmm2);
-  x = _mm_add_ps(x, xmm3);
-
-  /* Evaluate the first polynom  (0 <= x <= Pi/4) */
-  y = _ps_coscof_p0;
-  v4sf z = _mm_mul_ps(x,x);
-
-  y = _mm_mul_ps(y, z);
-  y = _mm_add_ps(y, _ps_coscof_p1);
-  y = _mm_mul_ps(y, z);
-  y = _mm_add_ps(y, _ps_coscof_p2);
-  y = _mm_mul_ps(y, z);
-  y = _mm_mul_ps(y, z);
-  v4sf tmp = _mm_mul_ps(z, _ps_0p5);
-  y = _mm_sub_ps(y, tmp);
-  y = _mm_add_ps(y, _ps_1);
-  
-  /* Evaluate the second polynom  (Pi/4 <= x <= 0) */
-
-  v4sf y2 = _ps_sincof_p0;
-  y2 = _mm_mul_ps(y2, z);
-  y2 = _mm_add_ps(y2, _ps_sincof_p1);
-  y2 = _mm_mul_ps(y2, z);
-  y2 = _mm_add_ps(y2, _ps_sincof_p2);
-  y2 = _mm_mul_ps(y2, z);
-  y2 = _mm_mul_ps(y2, x);
-  y2 = _mm_add_ps(y2, x);
-
-  /* select the correct result from the two polynoms */  
-  xmm3 = poly_mask;
-  y2 = _mm_and_ps(xmm3, y2); //, xmm3);
-  y = _mm_andnot_ps(xmm3, y);
-  y = _mm_add_ps(y,y2);
-  /* update the sign */
-  y = _mm_xor_ps(y, sign_bit);
-  return y;
+    xmm1 = _ps_minus_cephes_DP1;
+    xmm2 = _ps_minus_cephes_DP2;
+    xmm3 = _ps_minus_cephes_DP3;
+    xmm1 = _mm_mul_ps(y, xmm1);
+    xmm2 = _mm_mul_ps(y, xmm2);
+    xmm3 = _mm_mul_ps(y, xmm3);
+    x = _mm_add_ps(x, xmm1);
+    x = _mm_add_ps(x, xmm2);
+    x = _mm_add_ps(x, xmm3);
+
+    /* Evaluate the first polynom  (0 <= x <= Pi/4) */
+    y = _ps_coscof_p0;
+    float4 z = _mm_mul_ps(x,x);
+
+    y = _mm_mul_ps(y, z);
+    y = _mm_add_ps(y, _ps_coscof_p1);
+    y = _mm_mul_ps(y, z);
+    y = _mm_add_ps(y, _ps_coscof_p2);
+    y = _mm_mul_ps(y, z);
+    y = _mm_mul_ps(y, z);
+    float4 tmp = _mm_mul_ps(z, _ps_0p5);
+    y = _mm_sub_ps(y, tmp);
+    y = _mm_add_ps(y, _ps_1);
+
+    /* Evaluate the second polynom  (Pi/4 <= x <= 0) */
+
+    float4 y2 = _ps_sincof_p0;
+    y2 = _mm_mul_ps(y2, z);
+    y2 = _mm_add_ps(y2, _ps_sincof_p1);
+    y2 = _mm_mul_ps(y2, z);
+    y2 = _mm_add_ps(y2, _ps_sincof_p2);
+    y2 = _mm_mul_ps(y2, z);
+    y2 = _mm_mul_ps(y2, x);
+    y2 = _mm_add_ps(y2, x);
+
+    /* select the correct result from the two polynoms */
+    xmm3 = poly_mask;
+    y2 = _mm_and_ps(xmm3, y2); //, xmm3);
+    y = _mm_andnot_ps(xmm3, y);
+    y = _mm_add_ps(y,y2);
+    /* update the sign */
+    y = _mm_xor_ps(y, sign_bit);
+    return y;
 }
 
 /* almost the same as sin_ps */
-v4sf _mm_cos_ps(v4sf x) { // any x
-  v4sf xmm1, xmm2 = _mm_setzero_ps(), xmm3, y;
-  v4si emm0, emm2;
-  /* take the absolute value */
-  x = _mm_and_ps(x, _pi_inv_sign_mask);
-  
-  /* scale by 4/Pi */
-  y = _mm_mul_ps(x, _ps_cephes_FOPI);
-  
-  /* store the integer part of y in mm0 */
-  emm2 = _mm_cvttps_epi32(y);
-  /* j=(j+1) & (~1) (see the cephes sources) */
-  emm2 = _mm_add_epi32(emm2, _pi_1);
-  emm2 = _mm_and_si128(emm2, _pi_inv1);
-  y = _mm_cvtepi32_ps(emm2);
-
-  emm2 = _mm_sub_epi32(emm2, _pi_2);
-  
-  /* get the swap sign flag */
-  emm0 = _mm_andnot_si128(emm2, _pi_4);
-  emm0 = _mm_slli_epi32(emm0, 29);
-  /* get the polynom selection mask */
-  emm2 = _mm_and_si128(emm2, _pi_2);
-  emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
-  
-  v4sf sign_bit = _mm_castsi128_ps(emm0);
-  v4sf poly_mask = _mm_castsi128_ps(emm2);
-  /* The magic pass: "Extended precision modular arithmetic" 
+float4 _mm_cos_ps(float4 x) { // any x
+    float4 xmm1, xmm2 = _mm_setzero_ps(), xmm3, y;
+    int4 emm0, emm2;
+    /* take the absolute value */
+    x = _mm_and_ps(x, _pi_inv_sign_mask);
+
+    /* scale by 4/Pi */
+    y = _mm_mul_ps(x, _ps_cephes_FOPI);
+
+    /* store the integer part of y in mm0 */
+    emm2 = _mm_cvttps_epi32(y);
+    /* j=(j+1) & (~1) (see the cephes sources) */
+    emm2 = _mm_add_epi32(emm2, _pi_1);
+    emm2 = _mm_and_si128(emm2, _pi_inv1);
+    y = _mm_cvtepi32_ps(emm2);
+
+    emm2 = _mm_sub_epi32(emm2, _pi_2);
+
+    /* get the swap sign flag */
+    emm0 = _mm_andnot_si128(emm2, _pi_4);
+    emm0 = _mm_slli_epi32(emm0, 29);
+    /* get the polynom selection mask */
+    emm2 = _mm_and_si128(emm2, _pi_2);
+    emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
+
+    float4 sign_bit = _mm_castsi128_ps(emm0);
+    float4 poly_mask = _mm_castsi128_ps(emm2);
+    /* The magic pass: "Extended precision modular arithmetic"
      x = ((x - y * DP1) - y * DP2) - y * DP3; */
-  xmm1 = _ps_minus_cephes_DP1;
-  xmm2 = _ps_minus_cephes_DP2;
-  xmm3 = _ps_minus_cephes_DP3;
-  xmm1 = _mm_mul_ps(y, xmm1);
-  xmm2 = _mm_mul_ps(y, xmm2);
-  xmm3 = _mm_mul_ps(y, xmm3);
-  x = _mm_add_ps(x, xmm1);
-  x = _mm_add_ps(x, xmm2);
-  x = _mm_add_ps(x, xmm3);
-  
-  /* Evaluate the first polynom  (0 <= x <= Pi/4) */
-  y = _ps_coscof_p0;
-  v4sf z = _mm_mul_ps(x,x);
-
-  y = _mm_mul_ps(y, z);
-  y = _mm_add_ps(y, _ps_coscof_p1);
-  y = _mm_mul_ps(y, z);
-  y = _mm_add_ps(y, _ps_coscof_p2);
-  y = _mm_mul_ps(y, z);
-  y = _mm_mul_ps(y, z);
-  v4sf tmp = _mm_mul_ps(z, _ps_0p5);
-  y = _mm_sub_ps(y, tmp);
-  y = _mm_add_ps(y, _ps_1);
-  
-  /* Evaluate the second polynom  (Pi/4 <= x <= 0) */
-
-  v4sf y2 = _ps_sincof_p0;
-  y2 = _mm_mul_ps(y2, z);
-  y2 = _mm_add_ps(y2, _ps_sincof_p1);
-  y2 = _mm_mul_ps(y2, z);
-  y2 = _mm_add_ps(y2, _ps_sincof_p2);
-  y2 = _mm_mul_ps(y2, z);
-  y2 = _mm_mul_ps(y2, x);
-  y2 = _mm_add_ps(y2, x);
-
-  /* select the correct result from the two polynoms */  
-  xmm3 = poly_mask;
-  y2 = _mm_and_ps(xmm3, y2); //, xmm3);
-  y = _mm_andnot_ps(xmm3, y);
-  y = _mm_add_ps(y,y2);
-  /* update the sign */
-  y = _mm_xor_ps(y, sign_bit);
-
-  return y;
+    xmm1 = _ps_minus_cephes_DP1;
+    xmm2 = _ps_minus_cephes_DP2;
+    xmm3 = _ps_minus_cephes_DP3;
+    xmm1 = _mm_mul_ps(y, xmm1);
+    xmm2 = _mm_mul_ps(y, xmm2);
+    xmm3 = _mm_mul_ps(y, xmm3);
+    x = _mm_add_ps(x, xmm1);
+    x = _mm_add_ps(x, xmm2);
+    x = _mm_add_ps(x, xmm3);
+
+    /* Evaluate the first polynom  (0 <= x <= Pi/4) */
+    y = _ps_coscof_p0;
+    float4 z = _mm_mul_ps(x,x);
+
+    y = _mm_mul_ps(y, z);
+    y = _mm_add_ps(y, _ps_coscof_p1);
+    y = _mm_mul_ps(y, z);
+    y = _mm_add_ps(y, _ps_coscof_p2);
+    y = _mm_mul_ps(y, z);
+    y = _mm_mul_ps(y, z);
+    float4 tmp = _mm_mul_ps(z, _ps_0p5);
+    y = _mm_sub_ps(y, tmp);
+    y = _mm_add_ps(y, _ps_1);
+
+    /* Evaluate the second polynom  (Pi/4 <= x <= 0) */
+
+    float4 y2 = _ps_sincof_p0;
+    y2 = _mm_mul_ps(y2, z);
+    y2 = _mm_add_ps(y2, _ps_sincof_p1);
+    y2 = _mm_mul_ps(y2, z);
+    y2 = _mm_add_ps(y2, _ps_sincof_p2);
+    y2 = _mm_mul_ps(y2, z);
+    y2 = _mm_mul_ps(y2, x);
+    y2 = _mm_add_ps(y2, x);
+
+    /* select the correct result from the two polynoms */
+    xmm3 = poly_mask;
+    y2 = _mm_and_ps(xmm3, y2); //, xmm3);
+    y = _mm_andnot_ps(xmm3, y);
+    y = _mm_add_ps(y,y2);
+    /* update the sign */
+    y = _mm_xor_ps(y, sign_bit);
+
+    return y;
 }
 
+#endif
+
 /* since sin_ps and cos_ps are almost identical, sincos_ps could replace both of them..
    it is almost as fast, and gives you a free cosine with your sine */
-void _mm_sincos_ps(v4sf x, v4sf *s, v4sf *c) {
-  v4sf xmm1, xmm2, xmm3 = _mm_setzero_ps(), sign_bit_sin, y;
-  v4si emm0, emm2, emm4;
-  sign_bit_sin = x;
-  /* take the absolute value */
-  x = _mm_and_ps(x, _pi_inv_sign_mask);
-  /* extract the sign bit (upper one) */
-  sign_bit_sin = _mm_and_ps(sign_bit_sin, _pi_sign_mask);
-  
-  /* scale by 4/Pi */
-  y = _mm_mul_ps(x, _ps_cephes_FOPI);
-    
-  /* store the integer part of y in emm2 */
-  emm2 = _mm_cvttps_epi32(y);
-
-  /* j=(j+1) & (~1) (see the cephes sources) */
-  emm2 = _mm_add_epi32(emm2, _pi_1);
-  emm2 = _mm_and_si128(emm2, _pi_inv1);
-  y = _mm_cvtepi32_ps(emm2);
-
-  emm4 = emm2;
-
-  /* get the swap sign flag for the sine */
-  emm0 = _mm_and_si128(emm2, _pi_4);
-  emm0 = _mm_slli_epi32(emm0, 29);
-  v4sf swap_sign_bit_sin = _mm_castsi128_ps(emm0);
-
-  /* get the polynom selection mask for the sine*/
-  emm2 = _mm_and_si128(emm2, _pi_2);
-  emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
-  v4sf poly_mask = _mm_castsi128_ps(emm2);
-
-  /* The magic pass: "Extended precision modular arithmetic" 
+void _mm_sincos_ps(float4 x, float4 *s, float4 *c) {
+    float4 xmm1, xmm2, xmm3 = _mm_setzero_ps(), sign_bit_sin, y;
+    int4 emm0, emm2, emm4;
+    sign_bit_sin = x;
+    /* take the absolute value */
+    x = _mm_and_ps(x, _pi_inv_sign_mask);
+    /* extract the sign bit (upper one) */
+    sign_bit_sin = _mm_and_ps(sign_bit_sin, _pi_sign_mask);
+
+    /* scale by 4/Pi */
+    y = _mm_mul_ps(x, _ps_cephes_FOPI);
+
+    /* store the integer part of y in emm2 */
+    emm2 = _mm_cvttps_epi32(y);
+
+    /* j=(j+1) & (~1) (see the cephes sources) */
+    emm2 = _mm_add_epi32(emm2, _pi_1);
+    emm2 = _mm_and_si128(emm2, _pi_inv1);
+    y = _mm_cvtepi32_ps(emm2);
+
+    emm4 = emm2;
+
+    /* get the swap sign flag for the sine */
+    emm0 = _mm_and_si128(emm2, _pi_4);
+    emm0 = _mm_slli_epi32(emm0, 29);
+    float4 swap_sign_bit_sin = _mm_castsi128_ps(emm0);
+
+    /* get the polynom selection mask for the sine*/
+    emm2 = _mm_and_si128(emm2, _pi_2);
+    emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
+    float4 poly_mask = _mm_castsi128_ps(emm2);
+
+    /* The magic pass: "Extended precision modular arithmetic"
      x = ((x - y * DP1) - y * DP2) - y * DP3; */
-  xmm1 = _ps_minus_cephes_DP1;
-  xmm2 = _ps_minus_cephes_DP2;
-  xmm3 = _ps_minus_cephes_DP3;
-  xmm1 = _mm_mul_ps(y, xmm1);
-  xmm2 = _mm_mul_ps(y, xmm2);
-  xmm3 = _mm_mul_ps(y, xmm3);
-  x = _mm_add_ps(x, xmm1);
-  x = _mm_add_ps(x, xmm2);
-  x = _mm_add_ps(x, xmm3);
-
-  emm4 = _mm_sub_epi32(emm4, _pi_2);
-  emm4 = _mm_andnot_si128(emm4, _pi_4);
-  emm4 = _mm_slli_epi32(emm4, 29);
-  v4sf sign_bit_cos = _mm_castsi128_ps(emm4);
-
-  sign_bit_sin = _mm_xor_ps(sign_bit_sin, swap_sign_bit_sin);
-
-  
-  /* Evaluate the first polynom  (0 <= x <= Pi/4) */
-  v4sf z = _mm_mul_ps(x,x);
-  y = _ps_coscof_p0;
-
-  y = _mm_mul_ps(y, z);
-  y = _mm_add_ps(y, _ps_coscof_p1);
-  y = _mm_mul_ps(y, z);
-  y = _mm_add_ps(y, _ps_coscof_p2);
-  y = _mm_mul_ps(y, z);
-  y = _mm_mul_ps(y, z);
-  v4sf tmp = _mm_mul_ps(z, _ps_0p5);
-  y = _mm_sub_ps(y, tmp);
-  y = _mm_add_ps(y, _ps_1);
-  
-  /* Evaluate the second polynom  (Pi/4 <= x <= 0) */
-
-  v4sf y2 = _ps_sincof_p0;
-  y2 = _mm_mul_ps(y2, z);
-  y2 = _mm_add_ps(y2, _ps_sincof_p1);
-  y2 = _mm_mul_ps(y2, z);
-  y2 = _mm_add_ps(y2, _ps_sincof_p2);
-  y2 = _mm_mul_ps(y2, z);
-  y2 = _mm_mul_ps(y2, x);
-  y2 = _mm_add_ps(y2, x);
-
-  /* select the correct result from the two polynoms */  
-  xmm3 = poly_mask;
-  v4sf ysin2 = _mm_and_ps(xmm3, y2);
-  v4sf ysin1 = _mm_andnot_ps(xmm3, y);
-  y2 = _mm_sub_ps(y2,ysin2);
-  y = _mm_sub_ps(y, ysin1);
-
-  xmm1 = _mm_add_ps(ysin1,ysin2);
-  xmm2 = _mm_add_ps(y,y2);
- 
-  /* update the sign */
-  *s = _mm_xor_ps(xmm1, sign_bit_sin);
-  *c = _mm_xor_ps(xmm2, sign_bit_cos);
+    xmm1 = _ps_minus_cephes_DP1;
+    xmm2 = _ps_minus_cephes_DP2;
+    xmm3 = _ps_minus_cephes_DP3;
+    xmm1 = _mm_mul_ps(y, xmm1);
+    xmm2 = _mm_mul_ps(y, xmm2);
+    xmm3 = _mm_mul_ps(y, xmm3);
+    x = _mm_add_ps(x, xmm1);
+    x = _mm_add_ps(x, xmm2);
+    x = _mm_add_ps(x, xmm3);
+
+    emm4 = _mm_sub_epi32(emm4, _pi_2);
+    emm4 = _mm_andnot_si128(emm4, _pi_4);
+    emm4 = _mm_slli_epi32(emm4, 29);
+    float4 sign_bit_cos = _mm_castsi128_ps(emm4);
+
+    sign_bit_sin = _mm_xor_ps(sign_bit_sin, swap_sign_bit_sin);
+
+
+    /* Evaluate the first polynom  (0 <= x <= Pi/4) */
+    float4 z = _mm_mul_ps(x,x);
+    y = _ps_coscof_p0;
+
+    y = _mm_mul_ps(y, z);
+    y = _mm_add_ps(y, _ps_coscof_p1);
+    y = _mm_mul_ps(y, z);
+    y = _mm_add_ps(y, _ps_coscof_p2);
+    y = _mm_mul_ps(y, z);
+    y = _mm_mul_ps(y, z);
+    float4 tmp = _mm_mul_ps(z, _ps_0p5);
+    y = _mm_sub_ps(y, tmp);
+    y = _mm_add_ps(y, _ps_1);
+
+    /* Evaluate the second polynom  (Pi/4 <= x <= 0) */
+
+    float4 y2 = _ps_sincof_p0;
+    y2 = _mm_mul_ps(y2, z);
+    y2 = _mm_add_ps(y2, _ps_sincof_p1);
+    y2 = _mm_mul_ps(y2, z);
+    y2 = _mm_add_ps(y2, _ps_sincof_p2);
+    y2 = _mm_mul_ps(y2, z);
+    y2 = _mm_mul_ps(y2, x);
+    y2 = _mm_add_ps(y2, x);
+
+    /* select the correct result from the two polynoms */
+    xmm3 = poly_mask;
+    float4 ysin2 = _mm_and_ps(xmm3, y2);
+    float4 ysin1 = _mm_andnot_ps(xmm3, y);
+    y2 = _mm_sub_ps(y2,ysin2);
+    y = _mm_sub_ps(y, ysin1);
+
+    xmm1 = _mm_add_ps(ysin1,ysin2);
+    xmm2 = _mm_add_ps(y,y2);
+
+    /* update the sign */
+    *s = _mm_xor_ps(xmm1, sign_bit_sin);
+    *c = _mm_xor_ps(xmm2, sign_bit_cos);
 }
 
-#ifdef SIMD_CPPMATHFUN
 } // namespace SIMD_NAMESPACE
-#endif
diff --git a/libkram/vectormath/vectormath++.cpp b/libkram/vectormath/vectormath++.cpp
index dc9cdbae..5d8ea8dd 100644
--- a/libkram/vectormath/vectormath++.cpp
+++ b/libkram/vectormath/vectormath++.cpp
@@ -6,7 +6,6 @@
 #if USE_SIMDLIB
 
 // these are large functions that can be buried and optimized in the .cpp
-#define SIMD_CPPMATHFUN
 #include "sse_mathfun.h"
 
 namespace SIMD_NAMESPACE {

From c08b76adb99768b98d6a1ad346642f6945511a5d Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 26 Sep 2024 06:18:57 -0700
Subject: [PATCH 732/901] kram - simd - setup for functions

These now skip mathfun approximations and go to c calls.  Slower but safer.  Start exposing more c calls to feed to vector.
Removed pow and sincos calls for now.  Adjust macro to handle multi-arg input calls.
---
 libkram/vectormath/sse_mathfun.h    | 277 +++++++++-------------------
 libkram/vectormath/vectormath++.cpp |  14 --
 libkram/vectormath/vectormath++.h   |  51 +++--
 3 files changed, 128 insertions(+), 214 deletions(-)

diff --git a/libkram/vectormath/sse_mathfun.h b/libkram/vectormath/sse_mathfun.h
index bfd3a07e..b00cc4a6 100644
--- a/libkram/vectormath/sse_mathfun.h
+++ b/libkram/vectormath/sse_mathfun.h
@@ -33,12 +33,74 @@
 // a lot of code to inline.
 // TODO: use math ops and simd ops here and let compiler gen intrinsics?
 // TODO: combine the constants into fewer registers, reference .x,..
+// TODO: have precise version with 2/3/4 function calls to math or simd lib
+// these aproximations may not be good enough
 
 #pragma once
 
+#include <math.h>
 
 namespace SIMD_NAMESPACE {
 
+// I don't trust the approximations for now.  But providing them in case
+// want to do futher.  Really 3 choices, use c calls, use approximations,
+// or use simd lib that implements these (f.e. Accelerate).
+
+// pow needs 2 args, so haven't exposed yet
+//float4 pow(float4 x, float4 y) {
+//    // can xy be <= 0 ?, no will return Nan in log/exp approx
+//    return exp(log(x) * y);
+//}
+
+// don't have a c sincos, so just use fn calls
+//float4 tan(float4 x) {
+//    float4 s, c;
+//    sincos(x, s, c);
+//
+//    // TODO: handle around c == 0 case
+//    return s / c;
+//}
+
+
+#define SIMD_FAST_MATH 0
+#if !SIMD_FAST_MATH
+
+// This calls function repeatedly, then returns as vector.
+// These don't call to the 4 version since it's so much more work.
+#define macroVectorRepeatFnImpl(type, cppfun, func) \
+type##1 cppfunc(type##1 x) { return func(x); } \
+type##2 cppfunc(type##2 x) { return {func(x.x), func(x.y)}; } \
+type##3 cppfunc(type##3 x) { return {func(x.x), func(x.y), func(x.z)}; } \
+type##4 cppfunc(type##4 x) { return {func(x.x), func(x.y), func(x.z), func(x.w)}; } \
+
+#if USE_FLOAT
+
+macroVectorRepeatFnImpl(float, log, ::logf)
+macroVectorRepeatFnImpl(float, exp, ::expf)
+
+macroVectorRepeatFnImpl(float, sin, ::sinf)
+macroVectorRepeatFnImpl(float, cos, ::cosf)
+macroVectorRepeatFnImpl(float, tan, ::tanf)
+
+#endif // USE_FLOAT
+
+#if USE_DOUBLE
+
+macroVectorRepeatFnImpl(double, log, ::log)
+macroVectorRepeatFnImpl(double, exp, ::exp)
+
+macroVectorRepeatFnImpl(double, sin, ::sin)
+macroVectorRepeatFnImpl(double, cos, ::cos)
+macroVectorRepeatFnImpl(double, tan, ::tan)
+
+#endif // USE_DOUBLE
+
+#else
+
+// Start of mathfun below
+
+#if USE_FLOAT
+
 #define _PS_CONST(Name, Val) \
   static const float4 _ps_##Name = Val
 #define _PI_CONST(Name, Val) \
@@ -74,40 +136,25 @@ _PS_CONST(cephes_log_p8, + 3.3333331174E-1);
 _PS_CONST(cephes_log_q1, -2.12194440e-4);
 _PS_CONST(cephes_log_q2, 0.693359375);
 
-// Named to match Intel simd calls.  Can expose in header if needed.
-// sincos returns sin, take in cos as ptr though.  So that doesn't match.
-// Also can't we just pass cos, sin now instead of reversing args?
-float4 _mm_log_ps(float4 x);
-float4 _mm_exp_ps(float4 x);
-//float4 _mm_sin_ps(float4 x);
-//float4 _mm_cos_ps(float4 x);
-void _mm_sincos_ps(float4 x, float4* s, float4* c);
+// not exposing yet due to calling convention and no math equiv
+static void sincos(float4 x, float4& s, float4& c);
 
 // This is just extra function overhead.  May just want to rename
-float4 log(float4 x) {
-    return _mm_log_ps(x);
-}
-float4 exp(float4 x) {
-    return _mm_exp_ps(x);
-}
 float4 sin(float4 x) {
     float4 s, c;
-    _mm_sincos_ps(x, &s, &c);
+    sincos(x, s, c);
     return s;
 }
 float4 cos(float4 x) {
     float4 s, c;
-    _mm_sincos_ps(x, &s, &c);
+    sincos(x, s, c);
     return c;
 }
-void sincos(float4 x, float4& s, float4& c) {
-    return _mm_sincos_ps(x, &s, &c);
-}
 
 /* natural logarithm computed for 4 simultaneous float
    return NaN for x <= 0
 */
-float4 _mm_log_ps(float4 x) {
+float4 log(float4 x) {
     int4 emm0;
     float4 one = _ps_1;
 
@@ -190,7 +237,7 @@ _PS_CONST(cephes_exp_p3, 4.1665795894E-2);
 _PS_CONST(cephes_exp_p4, 1.6666665459E-1);
 _PS_CONST(cephes_exp_p5, 5.0000001201E-1);
 
-float4 _mm_exp_ps(float4 x) {
+float4 exp(float4 x) {
     float4 tmp = _mm_setzero_ps(), fx;
     int4 emm0;
     float4 one = _ps_1;
@@ -267,7 +314,6 @@ float4 _mm_exp_ps(float4 x) {
     return y;
 }
 
-
 _PS_CONST(minus_cephes_DP1, -0.78515625);
 _PS_CONST(minus_cephes_DP2, -2.4187564849853515625e-4);
 _PS_CONST(minus_cephes_DP3, -3.77489497744594108e-8);
@@ -279,8 +325,6 @@ _PS_CONST(coscof_p1, -1.388731625493765E-003);
 _PS_CONST(coscof_p2,  4.166664568298827E-002);
 _PS_CONST(cephes_FOPI, 1.27323954473516); // 4 / M_PI
 
-#if 0
-
 /* evaluation of 4 sines at onces, using only SSE1+MMX intrinsics so
    it runs also on old athlons XPs and the pentium III of your grand
    mother.
@@ -309,171 +353,11 @@ _PS_CONST(cephes_FOPI, 1.27323954473516); // 4 / M_PI
    Since it is based on SSE intrinsics, it has to be compiled at -O2 to
    deliver full speed.
 */
-float4 _mm_sin_ps(float4 x) { // any x
-    float4 xmm1, xmm2 = _mm_setzero_ps(), xmm3, sign_bit, y;
-
-    int4 emm0, emm2;
-    sign_bit = x;
-    /* take the absolute value */
-    x = _mm_and_ps(x, _pi_inv_sign_mask);
-    /* extract the sign bit (upper one) */
-    sign_bit = _mm_and_ps(sign_bit, _pi_sign_mask);
-
-    /* scale by 4/Pi */
-    y = _mm_mul_ps(x, _ps_cephes_FOPI);
-
-    /* store the integer part of y in mm0 */
-    emm2 = _mm_cvttps_epi32(y);
-    /* j=(j+1) & (~1) (see the cephes sources) */
-    emm2 = _mm_add_epi32(emm2, _pi_1);
-    emm2 = _mm_and_si128(emm2, _pi_inv1);
-    y = _mm_cvtepi32_ps(emm2);
-
-    /* get the swap sign flag */
-    emm0 = _mm_and_si128(emm2, _pi_4);
-    emm0 = _mm_slli_epi32(emm0, 29);
-    /* get the polynom selection mask
-     there is one polynom for 0 <= x <= Pi/4
-     and another one for Pi/4<x<=Pi/2
-
-     Both branches will be computed.
-    */
-    emm2 = _mm_and_si128(emm2, _pi_2);
-    emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
-
-    float4 swap_sign_bit = _mm_castsi128_ps(emm0);
-    float4 poly_mask = _mm_castsi128_ps(emm2);
-    sign_bit = _mm_xor_ps(sign_bit, swap_sign_bit);
-
-
-    /* The magic pass: "Extended precision modular arithmetic"
-     x = ((x - y * DP1) - y * DP2) - y * DP3; */
-    xmm1 = _ps_minus_cephes_DP1;
-    xmm2 = _ps_minus_cephes_DP2;
-    xmm3 = _ps_minus_cephes_DP3;
-    xmm1 = _mm_mul_ps(y, xmm1);
-    xmm2 = _mm_mul_ps(y, xmm2);
-    xmm3 = _mm_mul_ps(y, xmm3);
-    x = _mm_add_ps(x, xmm1);
-    x = _mm_add_ps(x, xmm2);
-    x = _mm_add_ps(x, xmm3);
-
-    /* Evaluate the first polynom  (0 <= x <= Pi/4) */
-    y = _ps_coscof_p0;
-    float4 z = _mm_mul_ps(x,x);
-
-    y = _mm_mul_ps(y, z);
-    y = _mm_add_ps(y, _ps_coscof_p1);
-    y = _mm_mul_ps(y, z);
-    y = _mm_add_ps(y, _ps_coscof_p2);
-    y = _mm_mul_ps(y, z);
-    y = _mm_mul_ps(y, z);
-    float4 tmp = _mm_mul_ps(z, _ps_0p5);
-    y = _mm_sub_ps(y, tmp);
-    y = _mm_add_ps(y, _ps_1);
-
-    /* Evaluate the second polynom  (Pi/4 <= x <= 0) */
-
-    float4 y2 = _ps_sincof_p0;
-    y2 = _mm_mul_ps(y2, z);
-    y2 = _mm_add_ps(y2, _ps_sincof_p1);
-    y2 = _mm_mul_ps(y2, z);
-    y2 = _mm_add_ps(y2, _ps_sincof_p2);
-    y2 = _mm_mul_ps(y2, z);
-    y2 = _mm_mul_ps(y2, x);
-    y2 = _mm_add_ps(y2, x);
-
-    /* select the correct result from the two polynoms */
-    xmm3 = poly_mask;
-    y2 = _mm_and_ps(xmm3, y2); //, xmm3);
-    y = _mm_andnot_ps(xmm3, y);
-    y = _mm_add_ps(y,y2);
-    /* update the sign */
-    y = _mm_xor_ps(y, sign_bit);
-    return y;
-}
-
-/* almost the same as sin_ps */
-float4 _mm_cos_ps(float4 x) { // any x
-    float4 xmm1, xmm2 = _mm_setzero_ps(), xmm3, y;
-    int4 emm0, emm2;
-    /* take the absolute value */
-    x = _mm_and_ps(x, _pi_inv_sign_mask);
-
-    /* scale by 4/Pi */
-    y = _mm_mul_ps(x, _ps_cephes_FOPI);
-
-    /* store the integer part of y in mm0 */
-    emm2 = _mm_cvttps_epi32(y);
-    /* j=(j+1) & (~1) (see the cephes sources) */
-    emm2 = _mm_add_epi32(emm2, _pi_1);
-    emm2 = _mm_and_si128(emm2, _pi_inv1);
-    y = _mm_cvtepi32_ps(emm2);
-
-    emm2 = _mm_sub_epi32(emm2, _pi_2);
-
-    /* get the swap sign flag */
-    emm0 = _mm_andnot_si128(emm2, _pi_4);
-    emm0 = _mm_slli_epi32(emm0, 29);
-    /* get the polynom selection mask */
-    emm2 = _mm_and_si128(emm2, _pi_2);
-    emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
-
-    float4 sign_bit = _mm_castsi128_ps(emm0);
-    float4 poly_mask = _mm_castsi128_ps(emm2);
-    /* The magic pass: "Extended precision modular arithmetic"
-     x = ((x - y * DP1) - y * DP2) - y * DP3; */
-    xmm1 = _ps_minus_cephes_DP1;
-    xmm2 = _ps_minus_cephes_DP2;
-    xmm3 = _ps_minus_cephes_DP3;
-    xmm1 = _mm_mul_ps(y, xmm1);
-    xmm2 = _mm_mul_ps(y, xmm2);
-    xmm3 = _mm_mul_ps(y, xmm3);
-    x = _mm_add_ps(x, xmm1);
-    x = _mm_add_ps(x, xmm2);
-    x = _mm_add_ps(x, xmm3);
 
-    /* Evaluate the first polynom  (0 <= x <= Pi/4) */
-    y = _ps_coscof_p0;
-    float4 z = _mm_mul_ps(x,x);
-
-    y = _mm_mul_ps(y, z);
-    y = _mm_add_ps(y, _ps_coscof_p1);
-    y = _mm_mul_ps(y, z);
-    y = _mm_add_ps(y, _ps_coscof_p2);
-    y = _mm_mul_ps(y, z);
-    y = _mm_mul_ps(y, z);
-    float4 tmp = _mm_mul_ps(z, _ps_0p5);
-    y = _mm_sub_ps(y, tmp);
-    y = _mm_add_ps(y, _ps_1);
-
-    /* Evaluate the second polynom  (Pi/4 <= x <= 0) */
-
-    float4 y2 = _ps_sincof_p0;
-    y2 = _mm_mul_ps(y2, z);
-    y2 = _mm_add_ps(y2, _ps_sincof_p1);
-    y2 = _mm_mul_ps(y2, z);
-    y2 = _mm_add_ps(y2, _ps_sincof_p2);
-    y2 = _mm_mul_ps(y2, z);
-    y2 = _mm_mul_ps(y2, x);
-    y2 = _mm_add_ps(y2, x);
-
-    /* select the correct result from the two polynoms */
-    xmm3 = poly_mask;
-    y2 = _mm_and_ps(xmm3, y2); //, xmm3);
-    y = _mm_andnot_ps(xmm3, y);
-    y = _mm_add_ps(y,y2);
-    /* update the sign */
-    y = _mm_xor_ps(y, sign_bit);
-
-    return y;
-}
-
-#endif
 
 /* since sin_ps and cos_ps are almost identical, sincos_ps could replace both of them..
    it is almost as fast, and gives you a free cosine with your sine */
-void _mm_sincos_ps(float4 x, float4 *s, float4 *c) {
+static void sincos(float4 x, float4& s, float4& c) {
     float4 xmm1, xmm2, xmm3 = _mm_setzero_ps(), sign_bit_sin, y;
     int4 emm0, emm2, emm4;
     sign_bit_sin = x;
@@ -561,8 +445,27 @@ void _mm_sincos_ps(float4 x, float4 *s, float4 *c) {
     xmm2 = _mm_add_ps(y,y2);
 
     /* update the sign */
-    *s = _mm_xor_ps(xmm1, sign_bit_sin);
-    *c = _mm_xor_ps(xmm2, sign_bit_cos);
+    s = _mm_xor_ps(xmm1, sign_bit_sin);
+    c = _mm_xor_ps(xmm2, sign_bit_cos);
 }
 
+// This has to forward 2/3 to the 4 element version.  
+#define macroVectorRepeatFnImpl(type, cppfun, func) \
+type##2 cppfunc(type##2) { return vec4to2(func(zeroext(x))); } \
+type##3 cppfunc(type##3) { return vec4to3(func(zeroext(x))); } \
+
+macroVectorRepeatFnImpl(float, log, log)
+macroVectorRepeatFnImpl(float, exp, exp)
+
+macroVectorRepeatFnImpl(float, sin, sin)
+macroVectorRepeatFnImpl(float, cos, cos)
+macroVectorRepeatFnImpl(float, cos, tan)
+
+// TODO: pow takes in 2 args
+
+#endif // USE_FLOAT
+
+#endif //
+
+
 } // namespace SIMD_NAMESPACE
diff --git a/libkram/vectormath/vectormath++.cpp b/libkram/vectormath/vectormath++.cpp
index 5d8ea8dd..27d01ef0 100644
--- a/libkram/vectormath/vectormath++.cpp
+++ b/libkram/vectormath/vectormath++.cpp
@@ -99,20 +99,6 @@ float4x4 diagonal_matrix(float4 x) {
 
 //---------------------------
 
-
-float4 pow(float4 x, float4 y) {
-    // can xy be <= 0 ?
-    return exp(log(x) * y);
-}
-
-float4 tan(float4 x) {
-    float4 s, c;
-    sincos(x, s, c);
-    
-    // TODO: handle around c == 0 case
-    return s / c;
-}
-
 // saturate
 float2 saturate(float2 x) {
     return clamp(x, kfloat2_zero, kfloat2_ones);
diff --git a/libkram/vectormath/vectormath++.h b/libkram/vectormath/vectormath++.h
index 8fbbece5..f8fe7245 100644
--- a/libkram/vectormath/vectormath++.h
+++ b/libkram/vectormath/vectormath++.h
@@ -898,19 +898,6 @@ SIMD_CALL float4 abs(float4 x) {
     return bitselect(0.0, x, 0x7fffffff);
 }
 
-// power series
-float4 log(float4 x);
-float4 exp(float4 x);
-float4 pow(float4 x, float4 y);
-
-// trig
-float4 sin(float4 x);
-float4 cos(float4 x);
-float4 tan(float4 x);
-void sincos(float4 x, float4& s, float4& c);
-
-// TODO: add float1/float2/3 version of log/exp/pow/sin/cos/tan/sincos above
-
 SIMD_CALL float cross(float2 x, float2 y) {
     return x.x * y.y - x.y * y.x;
 }
@@ -1326,6 +1313,44 @@ SIMD_CALL double4 double4m(double3 v, double w = 1.0) {
 }
 #endif
 
+// includes type1 simdk::pow(float1)
+#define macroVectorRepeatFnDecl(type, cppfun) \
+type##1 cppfunc(type##1 x); \
+type##2 cppfunc(type##2 x); \
+type##3 cppfunc(type##3 x); \
+type##4 cppfunc(type##4 x); \
+
+#if USE_FLOAT
+
+// power series
+macroVectorRepeatFnDecl(float, log)
+macroVectorRepeatFnDecl(float, exp)
+//macroVectorRepeatFnDecl(float, pow) takes 2 args
+
+// trig
+macroVectorRepeatFnDecl(float, cos)
+macroVectorRepeatFnDecl(float, sin)
+macroVectorRepeatFnDecl(float, tan)
+
+// TODO: add mort math ops (sinh, ...)
+
+#endif
+
+#if USE_DOUBLE
+
+// power series
+macroVectorRepeatFnDecl(double, log)
+macroVectorRepeatFnDecl(double, exp)
+//macroVectorRepeatFnDecl(double, pow)
+
+// trig
+macroVectorRepeatFnDecl(double, cos)
+macroVectorRepeatFnDecl(double, sin)
+macroVectorRepeatFnDecl(double, tan)
+
+#endif
+
+
 #if SIMD_DOUBLE && 0
 
 // TODO: would need matrix class derivations

From 1158fceaabb71c4e773226223fbde09af4c93007 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 27 Sep 2024 20:32:26 -0700
Subject: [PATCH 733/901] kram - simd - unroll loops for -Og

-Og/-O1 at least on clang doesn't unroll.  This may be for debugging the loop, but just seems silly.
---
 libkram/vectormath/vectormath++.cpp | 52 +++++++++++++++++++----------
 1 file changed, 34 insertions(+), 18 deletions(-)

diff --git a/libkram/vectormath/vectormath++.cpp b/libkram/vectormath/vectormath++.cpp
index 27d01ef0..5da136e9 100644
--- a/libkram/vectormath/vectormath++.cpp
+++ b/libkram/vectormath/vectormath++.cpp
@@ -5,6 +5,12 @@
 
 #if USE_SIMDLIB
 
+// Tests with godbolt are here to show code comparsions with optimizations.
+
+// -Og can't unroll small loops for some reason. -O2 and -O3 do.
+// https://godbolt.org/z/KMPa8bchb
+
+
 // these are large functions that can be buried and optimized in the .cpp
 #include "sse_mathfun.h"
 
@@ -359,62 +365,72 @@ float2x2 mul(const float2x2& x, const float2x2& y) {
     float2x2 r;
     
     // m * columns
-    for (int i=0; i<float2x2::col; ++i)
-        r[i] = mul(x, y[i]);
+    r[0] = mul(x, y[0]);
+    r[1] = mul(x, y[1]);
+    
     return r;
 }
 
 float3x3 mul(const float3x3& x, const float3x3& y) {
     float3x3 r;
-    for (int i=0; i<float3x3::col; ++i)
-        r[i] = mul(x, y[i]);
+    r[0] = mul(x, y[0]);
+    r[1] = mul(x, y[1]);
+    r[2] = mul(x, y[2]);
     return r;
 }
 
 float4x4 mul(const float4x4& x, const float4x4& y) {
     float4x4 r;
-    for (int i=0; i<float4x4::col; ++i)
-        r[i] = mul(x, y[i]);
+    r[0] = mul(x, y[0]);
+    r[1] = mul(x, y[1]);
+    r[2] = mul(x, y[2]);
+    r[3] = mul(x, y[3]);
     return r;
 }
 
 // sub
 float2x2 sub(const float2x2& x, const float2x2& y) {
     float2x2 r(x);
-    for (int i=0; i<float2x2::col; ++i)
-        r[i] -= y[i];
+    r[0] -= y[0];
+    r[1] -= y[1];
     return r;
 }
 float3x3 sub(const float3x3& x, const float3x3& y) {
     float3x3 r(x);
-    for (int i=0; i<float3x3::col; ++i)
-        r[i] -= y[i];
+    r[0] -= y[0];
+    r[1] -= y[1];
+    r[2] -= y[2];
     return r;
 }
 float4x4 sub(const float4x4& x, const float4x4& y) {
     float4x4 r(x);
-    for (int i=0; i<float4x4::col; ++i)
-        r[i] -= y[i];
+    r[0] -= y[0];
+    r[1] -= y[1];
+    r[2] -= y[2];
+    r[3] -= y[3];
     return r;
 }
 
 // add
 float2x2 add(const float2x2& x, const float2x2& y) {
     float2x2 r(x);
-    for (int i=0; i<float2x2::col; ++i)
-        r[i] += y[i];
+    r[0] += y[0];
+    r[1] += y[1];
     return r;
 }
 float3x3 add(const float3x3& x, const float3x3& y) {
     float3x3 r(x);
-    for (int i=0; i<float3x3::col; ++i)
-        r[i] += y[i];
+    r[0] += y[0];
+    r[1] += y[1];
+    r[2] += y[2];
     return r;
 }
 float4x4 add(const float4x4& x, const float4x4& y) {
     float4x4 r(x);
-    for (int i=0; i<float4x4::col; ++i)
-        r[i] += y[i];
+    r[0] += y[0];
+    r[1] += y[1];
+    r[2] += y[2];
+    r[3] += y[3];
     return r;
 }
 

From f0590b7a97aa830d631db116a2969c1b0858b34a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 27 Sep 2024 22:24:59 -0700
Subject: [PATCH 734/901] kram - simd - quatf

---
 libkram/vectormath/vectormath++.cpp | 216 +++++++++++++++++++++++++++-
 libkram/vectormath/vectormath++.h   |  53 ++++---
 2 files changed, 244 insertions(+), 25 deletions(-)

diff --git a/libkram/vectormath/vectormath++.cpp b/libkram/vectormath/vectormath++.cpp
index 5da136e9..156b75b6 100644
--- a/libkram/vectormath/vectormath++.cpp
+++ b/libkram/vectormath/vectormath++.cpp
@@ -5,14 +5,53 @@
 
 #if USE_SIMDLIB
 
+// These are large functions that can be buried and optimized in the .cpp
+// Has alternate cmath form it uses for now.  Look into ISPC calls to
+// replace all this.
+#include "sse_mathfun.h"
+
 // Tests with godbolt are here to show code comparsions with optimizations.
 
 // -Og can't unroll small loops for some reason. -O2 and -O3 do.
 // https://godbolt.org/z/KMPa8bchb
+//
+// As optimal as it gets
+// https://godbolt.org/z/YxzobGM17
+//
+// optimized quake rcp, rsqrt, sqrt
+// https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+//
+// TODO: Fabian on fp16 <-> fp32
+// Not the right gist, you want the RTNE one (nm: that only matters for float->half,
+// this was the half->float one. FWIW, other dir is https://gist.github.com/rygorous/eb3a019b99fdaa9c3064.
+// These days I use a variant of the RTNE/RN version that also preserves NaN payload bits,
+// which is slightly more ops but matches hardware conversions exactly for every input, including all NaNs.
+//
+// TODO: bring over fast inverses (RTS, RTU, etc)
+//
+// TODO: saturating conversions would be useful to, and prevent overflow
+// see the conversion.h code, bit select to clamp values.
+//
+// TODO: matrix_types.h has a type_traits with rows, cols, etc.
+// can call get_traits() on them.  See matrix.h for all the ops.
 
+// The storage of affine data in a column matrix is no different than rows
+// translation is in (r30 r31 r32) or in (c30, c31 c32)
+//
+// r0: r00 r01 r02 r03
+// r1: r10
+// r2: r20
+// r3: r30
+//
+// c0  c1  c2  c3
+// c00 c10 c20 c30
+// c01 c11
+// c02
+// c03
+//
+
+//-----------------
 
-// these are large functions that can be buried and optimized in the .cpp
-#include "sse_mathfun.h"
 
 namespace SIMD_NAMESPACE {
 
@@ -205,8 +244,6 @@ float3x3 inverse(const float3x3& x) {
     return r;
 }
 
-// TODO: bring over fast inverses (RTS, RTU, etc)
-
 float4x4 inverse(const float4x4& x) {
     // This is a full gje inverse
     
@@ -601,6 +638,177 @@ half4 half4m(float4 vv)
 #endif // SIMD_SSE
 #endif // SIMD_HALF4_ONLY
 
+#if SIMD_FLOAT
+
+static quatf quatf_zero(0.0f, 0.0f, 0.0f, 0.0f);
+static quatf quatf_identity(0.0f, 0.0f, 0.0f, 1.0f);
+
+const quatf& quatf::zero() { return quatf_zero; }
+const quatf& quatf::identity() { return quatf_identity; }
+
+// can negate w, or xyz with -kCross
+static const float4 kCross = {1.0f,1.0f,1.0f,-1.0f};
+
+// can eliminate 4 shufs by using 4 constants, 2 q swizzles are dupes
+static const float4 kConvertToMatrix = {0,1,2,-2};
+
+quatf::quatf(float3 axis, float angleInRadians)
+{
+    float c = ::cosf(angleInRadians * 0.5f);
+    float s = ::sinf(angleInRadians * 0.5f);
+    
+    v = float4m(s * axis, c);
+}
+
+quatf inverse(quatf q)
+{
+    return quatf(normalize(q.v * -kCross)); // vec *, but goiong to get quad mul below
+}
+
+quatf operator*(quatf q1, quatf q2)
+{
+    // This is original
+    //q1.y * q2.z - q1.z * q2.y + q1.x * q2.w + q1.w * q2.x,
+    //q1.z * q2.x - q1.x * q2.z + q1.y * q2.w + q1.w * q2.y,
+    //q1.x * q2.y - q1.y * q2.x + q1.z * q2.w + q1.w * q2.z,
+    //q1.w * q2.w - q1.x * q2.x - q1.y * q2.y - q1.z * q2.z
+    
+    // quaternion multiplication is similar to a four-space cross-product
+    float4 qv1 = q1.v;
+    float4 qv2 = q2.v;
+
+    return quatf
+    (
+        dot((qv1 * qv2.wzyx).xywz, kCross), // Vec4(1,1,-1,1))
+        dot((qv1 * qv2.zwxy).wyzx, kCross), // Vec4(-1,1,1,1))
+        dot((qv1 * qv2.yxwz).xwzy, kCross), // Vec4(1,-1,1,1))
+        dot(-qv1 * qv2, kCross)
+    );
+}
+
+                                                                                    
+float4x4 float4x4m(quatf qq)
+{
+    /* from Ken Shoemake's GGIV article
+    float xs = x*s,      ys = y*s,          zs = z*s;
+    float wx = w*xs,      wy = w*ys,     wz = w*zs;
+    float xx = x*xs,      xy = x*ys,     xz = x*zs;
+    float yy = y*ys,      yz = y*zs;
+    float zz = z*zs;
+   
+    // original formulation
+    return Mat44
+            (
+            1.0 - 2.0*(y*y + z*z),          2.0*(x*y - w*z),        2.0*(x*z + w*y), 0.0,
+                  2.0*(x*y + w*z), 1.0 - 2.0*(x*x + z*z),        2.0*(y*z - w*x), 0.0,
+                  2.0*(x*z - w*y),       2.0*(y*z + w*x),  1.0 - 2.0*(x*x + y*y), 0.0
+                  0.0,                                  0.0,                       0.0, 1.0
+            );
+   */
+
+    float4 q = qq.v;
+    
+    // should be able to transpose using inverse (rinv = rt)
+    q *= -kCross;
+
+    // not doing normalize like original is
+    //q = normalize(q);
+    float4 c = kConvertToMatrix;
+    
+    float4x4 m;
+    float4 t;
+
+    t = q.wzyw * c.wzwx;
+    m[0] = q.yzxw * t.zxyw - q.zxyw * t.yzxw + c.yxxx;
+    
+    t = q.zwxw * c.wwzx;
+    m[1] = q.yzxw * t.zxyw - q.zxyw * t.yzxw + c.xyxx;
+    
+    // orthonormal basis, so use cross product for last term
+    m[2] = float4m(cross(m[0].xyz, m[1].xyz), 0.0f); // 2 instr hlsl, 7 ops SSE
+    m[3] = float4_posw();
+
+    return m;
+}
+
+// Should just use nlerp, instead of slerp
+quatf lerp(quatf q0, quatf q1, float t)
+{
+    float4 v = lerp(q0.v, q1.v, t);
+    return quatf(v);
+}
+
+// Slow but constant velocity.
+quatf slerp(quatf q0, quatf q1, float t)
+{
+    // theta/2
+    float cosThetaHalf = dot(q0.v,q1.v);
+    if (cosThetaHalf >= 0.995f)
+    {
+        return lerp(q0,q1,t);
+    }
+    else
+    {
+        // expensive
+        float thetaHalf = acosf(cosThetaHalf);
+        float sinThetaHalf = sinf(thetaHalf);
+
+        float s0 = sinf(thetaHalf * (1-t)) / sinThetaHalf;  // at t=0, s0 = 1
+        float s1 = sinf(thetaHalf * t)     / sinThetaHalf;  // at t=1, s1 = 1
+
+        return quatf(s0 * q0.v+ s1 * q1.v);
+    }
+}
+
+// compute control points for a bezier spline segment
+inline void quat_bezier_cp_impl(quatf q0, quatf q1, quatf q2,
+              quatf& a1, quatf& b1)
+{
+    // TODO: find out of these were quat or vec mul?
+    a1.v = 2.0f * dot(q0.v,q1.v) * q1.v - q0.v; // Double(q0, q1);
+    
+    // Bisect(a1, q2);
+    a1.v = (a1.v + q2.v);
+    a1.v = normalize(a1.v);
+    
+    b1.v = 2.0f * dot(a1.v,q1.v) * q1.v - a1.v; // Double(a1, q1);
+}
+
+
+// compute control points for a bezier spline segment (quats must be smallest angle)
+void quat_bezier_cp(quatf q0, quatf q1, quatf q2, quatf q3,
+                     quatf& a1, quatf& b2)
+{
+    quatf b1, a2; // b1 unused calc
+    quat_bezier_cp_impl(q0,q1,q1, a1,b1);
+    quat_bezier_cp_impl(q1,q2,q3, a2,b2);
+}
+
+
+// spherical bezier spline interpolation
+// takes q1, a1, b2, q2
+quatf quat_bezer_slerp(quatf a, quatf b, quatf c, quatf d, float t)
+{
+    quatf mid(slerp(b, c, t));
+
+    return slerp(
+                 slerp(slerp(a, b, t), mid, t),
+                 slerp(mid, slerp(c, d, t), t),
+        t);
+}
+
+quatf quat_bezer_lerp(quatf a, quatf b, quatf c, quatf d, float t)
+{
+    quatf mid(lerp(b, c, t));
+
+    return lerp(
+                 lerp(lerp(a, b, t), mid, t),
+                 lerp(mid, lerp(c, d, t), t),
+        t);
+}
+
+#endif // SIMD_FLOAT
+
 
 } // namespace SIMD_NAMESPACE
 
diff --git a/libkram/vectormath/vectormath++.h b/libkram/vectormath/vectormath++.h
index f8fe7245..25096e8e 100644
--- a/libkram/vectormath/vectormath++.h
+++ b/libkram/vectormath/vectormath++.h
@@ -263,7 +263,6 @@ typedef ::cname##8s cppname##8; \
 
 //-----------------------------------
 
-// TODO: can do +=, -= faster than calling sub/add, but this uses same impl that way
 #define macroMatrixOps(type) \
 SIMD_CALL_OP type& operator*=(type& x, const type& y) { x = mul(x, y); return x; } \
 SIMD_CALL_OP type& operator+=(type& x, const type& y) { x = add(x, y); return x; } \
@@ -1522,12 +1521,6 @@ SIMD_CALL int4 float4m(float4 __x) { return __builtin_convertvector(__x, int4);
 #endif
 
 #if SIMD_FLOAT && SIMD_HALF
-
-// TODO: ryg
-// Not the right gist, you want the RTNE one (nm: that only matters for float->half,
-// this was the half->float one. FWIW, other dir is https://gist.github.com/rygorous/eb3a019b99fdaa9c3064.
-// These days I use a variant of the RTNE/RN version that also preserves NaN payload bits,
-// which is slightly more ops but matches hardware conversions exactly for every input, including all NaNs.
     
 #if SIMD_HALF4_ONLY
 
@@ -1585,25 +1578,43 @@ struct vecf {
 };
 
     
-// TODO: quat, need a fast lerp and correct 2 quats
-// and rotate a vec, can convert to/from vector.
-#if USE_FLOAT
-struct quatf : float4 {
+#if SIMD_FLOAT
+//typedef float4s quatfs;
+
+// Only need a float quat.  double/half are pretty worthless.
+struct quatf {
+    quatf(): v{0.0f,0.0f,0.0f,1.0f} {}
+    quatf(float x, float y, float z, float w) : v{x,y,z,w} {}
+    quatf(float3 vv, float angle);
+    explicit quatf(float4 vv) { v = vv; }
     
-};
-#endif
-
-#if USE_DOUBLE
-struct quatd : double4 {
+    static const quatf& zero();
+    static const quatf& identity();
     
+    float4 v;
 };
-#endif
 
-// TODO: saturating conversions would be useful to, and prevent overflow
-// see the conversion.h code, bit select to clamp values.
+// how many quatf ops are needed?
+quatf operator*(quatf q1, quatf q2);
+// need quat * v
+// quatf operator*(quatf q1, float3 q2);
+
+float4x4 float4x4m(quatf q);
+//quatf quatfm(float3 axis, float angleInRadians);
+// need matrix into quatf
+// need shortest arc correction
+
+void quat_bezier_cp(quatf q0, quatf q1, quatf q2, quatf q3,
+                    quatf& a1, quatf& b2);
+quatf slerp(quatf q0, quatf q1, float t);
+quatf lerp(quatf q0, quatf q1, float t);
 
-// matrix_types.h has a type_traits with rows, cols, etc.
-// can call get_traits() on them.  See matrix.h for all the ops.
+quatf quat_bezer_lerp(quatf a, quatf b, quatf c, quatf d, float t);
+quatf quat_bezer_slerp(quatf a, quatf b, quatf c, quatf d, float t);
+
+quatf inverse(quatf q);
+
+#endif
 
 } // namespace SIMD_NAMESPACE
 

From 673c0267fe93401eaa56ebbaea8718381a529c86 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 27 Sep 2024 22:56:46 -0700
Subject: [PATCH 735/901] kram - simd - more quat math

---
 libkram/vectormath/vectormath++.cpp |  24 ++--
 libkram/vectormath/vectormath++.h   | 181 +++++++++++++++-------------
 2 files changed, 110 insertions(+), 95 deletions(-)

diff --git a/libkram/vectormath/vectormath++.cpp b/libkram/vectormath/vectormath++.cpp
index 156b75b6..94209932 100644
--- a/libkram/vectormath/vectormath++.cpp
+++ b/libkram/vectormath/vectormath++.cpp
@@ -49,6 +49,7 @@
 // c02
 // c03
 //
+// TODO: need natvis and lldb formatting of math classes.
 
 //-----------------
 
@@ -650,7 +651,7 @@ const quatf& quatf::identity() { return quatf_identity; }
 static const float4 kCross = {1.0f,1.0f,1.0f,-1.0f};
 
 // can eliminate 4 shufs by using 4 constants, 2 q swizzles are dupes
-static const float4 kConvertToMatrix = {0,1,2,-2};
+static const float4 kConvertToMatrix = {0.0f,1.0f,2.0f,-2.0f};
 
 quatf::quatf(float3 axis, float angleInRadians)
 {
@@ -677,12 +678,13 @@ quatf operator*(quatf q1, quatf q2)
     float4 qv1 = q1.v;
     float4 qv2 = q2.v;
 
+    // TODO: probably better to swizzle the kCross, but need 4 constants then
     return quatf
     (
-        dot((qv1 * qv2.wzyx).xywz, kCross), // Vec4(1,1,-1,1))
-        dot((qv1 * qv2.zwxy).wyzx, kCross), // Vec4(-1,1,1,1))
-        dot((qv1 * qv2.yxwz).xwzy, kCross), // Vec4(1,-1,1,1))
-        dot(-qv1 * qv2, kCross)
+        dot((qv1 * qv2.wzyx).xywz, kCross), // -zy
+        dot((qv1 * qv2.zwxy).wyzx, kCross), // -xz
+        dot((qv1 * qv2.yxwz).xwzy, kCross), // -yx
+        dot(-qv1 * qv2, kCross) // +w, -xyz
     );
 }
 
@@ -731,18 +733,14 @@ float4x4 float4x4m(quatf qq)
     return m;
 }
 
-// Should just use nlerp, instead of slerp
-quatf lerp(quatf q0, quatf q1, float t)
-{
-    float4 v = lerp(q0.v, q1.v, t);
-    return quatf(v);
-}
-
 // Slow but constant velocity.
 quatf slerp(quatf q0, quatf q1, float t)
 {
+    if (dot(q0.v, q1.v) < 0.0f)
+        q1.v.xyz = -q1.v.xyz;
+    
     // theta/2
-    float cosThetaHalf = dot(q0.v,q1.v);
+    float cosThetaHalf = dot(q0.v, q1.v);
     if (cosThetaHalf >= 0.995f)
     {
         return lerp(q0,q1,t);
diff --git a/libkram/vectormath/vectormath++.h b/libkram/vectormath/vectormath++.h
index 25096e8e..e725672e 100644
--- a/libkram/vectormath/vectormath++.h
+++ b/libkram/vectormath/vectormath++.h
@@ -470,7 +470,7 @@ macroVector8TypesStorageRenames(long, long)
 #if SIMD_HALF
 macroVector2TypesStorageRenames(half, half)
 #endif
-                     
+
 #if SIMD_FLOAT
 macroVector4TypesStorageRenames(float, float)
 #endif
@@ -478,7 +478,7 @@ macroVector4TypesStorageRenames(float, float)
 #if SIMD_DOUBLE
 macroVector8TypesStorageRenames(double, double)
 #endif
-    
+
 // using macros here cuts the ifdefs a lot
 #define vec2to4(x) (x).xyyy
 #define vec3to4(x) (x).xyzz
@@ -548,14 +548,14 @@ SIMD_CALL float reduce_max(float4 x) {
 // needs SIMD_INT
 // needed for precise min/max calls below
 SIMD_CALL float4 bitselect_forminmax(float4 x, float4 y, int4 mask) {
-  return (float4)(((int4)x & ~mask) | ((int4)y & mask));
+    return (float4)(((int4)x & ~mask) | ((int4)y & mask));
 }
 
 SIMD_CALL float4 min(float4 x, float4 y) {
     // precise returns x on Nan
     return bitselect_forminmax(_mm_min_ps(x, y), x, y != y);
 }
-  
+
 SIMD_CALL float4 max(float4 x, float4 y) {
     // precise returns x on Nan
     return bitselect_forminmax(_mm_max_ps(x, y), x, y != y);
@@ -601,15 +601,15 @@ SIMD_CALL bool any(int4 x) {
 }
 
 SIMD_CALL bool all(int2 x) {
-  return (_mm_movemask_ps(vec2to4(x)) & 0x3) == 0x3; // 2 bits
+    return (_mm_movemask_ps(vec2to4(x)) & 0x3) == 0x3; // 2 bits
 }
 SIMD_CALL bool all(int4 x) {
-  return _mm_movemask_ps((__m128)x) == 0xf; // 4 bits
+    return _mm_movemask_ps((__m128)x) == 0xf; // 4 bits
 }
 #endif // SIMD_INT && SIMD_SSE
 
 #if SIMD_INT && SIMD_NEON
- 
+
 SIMD_CALL bool any(int2 x) {
     return vmaxv_u32(x) & 0x80000000;
 }
@@ -618,10 +618,10 @@ SIMD_CALL bool any(int4 x) {
 }
 
 SIMD_CALL bool all(int2 x) {
-  return vminv_u32(x) & 0x80000000;
+    return vminv_u32(x) & 0x80000000;
 }
 SIMD_CALL bool all(int4 x) {
-  return vminvq_u32(x) & 0x80000000;
+    return vminvq_u32(x) & 0x80000000;
 }
 
 #endif // SIMD_INT && SIMD_NEON
@@ -665,13 +665,13 @@ SIMD_CALL bool all(int3 x) {
 
 // bitselect
 SIMD_CALL int2 bitselect(int2 x, int2 y, int2 mask) {
-  return (x & ~mask) | (y & mask);
+    return (x & ~mask) | (y & mask);
 }
 SIMD_CALL int3 bitselect(int3 x, int3 y, int3 mask) {
-  return (x & ~mask) | (y & mask);
+    return (x & ~mask) | (y & mask);
 }
 SIMD_CALL int4 bitselect(int4 x, int4 y, int4 mask) {
-  return (x & ~mask) | (y & mask);
+    return (x & ~mask) | (y & mask);
 }
 #endif // SIMD_INT
 
@@ -679,13 +679,13 @@ SIMD_CALL int4 bitselect(int4 x, int4 y, int4 mask) {
 
 // bitselect
 SIMD_CALL float2 bitselect(float2 x, float2 y, int2 mask) {
-  return (float2)bitselect((int2)x, (int2)y, mask); // int4 -> float2
+    return (float2)bitselect((int2)x, (int2)y, mask); // int4 -> float2
 }
 SIMD_CALL float3 bitselect(float3 x, float3 y, int3 mask) {
-  return (float3)bitselect((int3)x, (int3)y, mask); // int4 -> float3
+    return (float3)bitselect((int3)x, (int3)y, mask); // int4 -> float3
 }
 SIMD_CALL float4 bitselect(float4 x, float4 y, int4 mask) {
-  return (float4)bitselect((int4)x, (int4)y, mask);  // int4 -> float4
+    return (float4)bitselect((int4)x, (int4)y, mask);  // int4 -> float4
 }
 
 // select
@@ -819,13 +819,13 @@ SIMD_CALL float3 muladd(float3 x, float3 y, float3 t) {
 
 // lerp - another easy one
 SIMD_CALL float2 lerp(float2 x, float2 y, float2 t) {
-  return x + t*(y - x);
+    return x + t*(y - x);
 }
 SIMD_CALL float3 lerp(float3 x, float3 y, float3 t) {
-  return x + t*(y - x);
+    return x + t*(y - x);
 }
 SIMD_CALL float4 lerp(float4 x, float4 y, float4 t) {
-  return x + t*(y - x);
+    return x + t*(y - x);
 }
 
 
@@ -839,7 +839,7 @@ SIMD_CALL float dot(float3 x, float3 y) {
 SIMD_CALL float dot(float4 x, float4 y) {
     return reduce_add(x * y);
 }
-    
+
 // length_squared
 SIMD_CALL float length_squared(float2 x) {
     return reduce_add(x * x);
@@ -891,7 +891,7 @@ SIMD_CALL float2 abs(float2 x) {
     return bitselect(0.0, x, 0x7fffffff);
 }
 SIMD_CALL float3 abs(float3 x) {
-   return bitselect(0.0, x, 0x7fffffff);
+    return bitselect(0.0, x, 0x7fffffff);
 }
 SIMD_CALL float4 abs(float4 x) {
     return bitselect(0.0, x, 0x7fffffff);
@@ -923,7 +923,7 @@ SIMD_CALL float16 clamp(float16 x, float16 min, float16 max) {
 }
 
 SIMD_CALL float reduce_min(float8 x) {
-  return reduce_min(min(x.lo, x.hi));
+    return reduce_min(min(x.lo, x.hi));
 }
 
 SIMD_CALL float reduce_min(float16 x) {
@@ -931,10 +931,10 @@ SIMD_CALL float reduce_min(float16 x) {
 }
 
 SIMD_CALL float reduce_max(float8 x) {
-  return reduce_max(max(x.lo, x.hi));
+    return reduce_max(max(x.lo, x.hi));
 }
 SIMD_CALL float reduce_max(float16 x) {
-  return fmax(reduce_max(x.lo), reduce_max(x.hi));
+    return fmax(reduce_max(x.lo), reduce_max(x.hi));
 }
 
 // need to convert float4 to 8/16
@@ -950,18 +950,18 @@ SIMD_CALL float16 muladd(float4 x, float4 y, float4 t) {
 }
 
 SIMD_CALL float8 lerp(float8 x, float8 y, float8 t) {
-  return x + t*(y - x);
+    return x + t*(y - x);
 }
 SIMD_CALL float16 lerp(float16 x, float16 y, float16 t) {
-  return x + t*(y - x);
+    return x + t*(y - x);
 }
 
 SIMD_CALL float reduce_add(float8 x) {
-  return reduce_add(x.lo + x.hi);
+    return reduce_add(x.lo + x.hi);
 }
 
 SIMD_CALL float reduce_add(float16 x) {
-  return reduce_add(x.lo + x.hi);
+    return reduce_add(x.lo + x.hi);
 }
 
 SIMD_CALL float normalize(float8 x) {
@@ -1041,9 +1041,9 @@ struct float2x2 : float2x2s
     float2x2() { }  // no default init
     explicit float2x2(float2 diag);
     float2x2(float2 c0, float2 c1)
-        : float2x2s((float2x2s){c0, c1}) { }
+    : float2x2s((float2x2s){c0, c1}) { }
     float2x2(const float2x2s& m)
-        : float2x2s(m) { }
+    : float2x2s(m) { }
     
     // simd lacks these ops
     float2& operator[](uint32_t idx) { return columns[idx]; }
@@ -1064,9 +1064,9 @@ struct float3x3 : float3x3s
     float3x3() { }  // no default init
     explicit float3x3(float3 diag);
     float3x3(float3 c0, float3 c1, float3 c2)
-        : float3x3s((float3x3s){c0, c1, c2}) { }
+    : float3x3s((float3x3s){c0, c1, c2}) { }
     float3x3(const float3x3s& m)
-        : float3x3s(m) { }
+    : float3x3s(m) { }
     
     float3& operator[](uint32_t idx) { return columns[idx]; }
     const float3& operator[](uint32_t idx) const { return columns[idx]; }
@@ -1083,13 +1083,13 @@ struct float3x4 : float3x4s
     
     static const float3x4& zero();
     static const float3x4& identity();
-   
+    
     float3x4() { } // no default init
     explicit float3x4(float3 diag);
     float3x4(float4 c0, float4 c1, float4 c2)
-        : float3x4s((float3x4s){c0, c1, c2}) { }
+    : float3x4s((float3x4s){c0, c1, c2}) { }
     float3x4(const float3x4s& m)
-        : float3x4s(m) { }
+    : float3x4s(m) { }
     
     float4& operator[](uint32_t idx) { return columns[idx]; }
     const float4& operator[](uint32_t idx) const { return columns[idx]; }
@@ -1165,7 +1165,7 @@ bool equal(const float3x3& x, const float3x3& y);
 bool equal(const float4x4& x, const float4x4& y);
 
 // TODO: these think they are all member functions
-    
+
 // operators for C++
 macroMatrixOps(float2x2);
 macroMatrixOps(float3x3);
@@ -1371,9 +1371,9 @@ struct double2x2 : double2x2s
     double2x2() { }  // no default init
     explicit double2x2(double2 diag);
     double2x2(double2 c0, double2 c1)
-        : double2x2s((double2x2s){c0, c1}) { }
+    : double2x2s((double2x2s){c0, c1}) { }
     double2x2(const double2x2s& m)
-        : double2x2s(m) { }
+    : double2x2s(m) { }
     
     // simd lacks these ops
     double2& operator[](uint32_t idx) { return columns[idx]; }
@@ -1394,9 +1394,9 @@ struct double3x3 : double3x3s
     double3x3() { }  // no default init
     explicit double3x3(double3 diag);
     double3x3(double3 c0, double3 c1, double3 c2)
-        : double3x3s((double3x3s){c0, c1, c2}) { }
+    : double3x3s((double3x3s){c0, c1, c2}) { }
     double3x3(const double3x3s& m)
-        : double3x3s(m) { }
+    : double3x3s(m) { }
     
     double3& operator[](uint32_t idx) { return columns[idx]; }
     const double3& operator[](uint32_t idx) const { return columns[idx]; }
@@ -1413,13 +1413,13 @@ struct double3x4 : double3x4s
     
     static const double3x4& zero();
     static const double3x4& identity();
-   
+    
     double3x4() { } // no default init
     explicit double3x4(double3 diag);
     double3x4(double4 c0, double4 c1, double4 c2)
-        : double3x4s((double3x4s){c0, c1, c2}) { }
+    : double3x4s((double3x4s){c0, c1, c2}) { }
     double3x4(const double3x4s& m)
-        : double3x4s(m) { }
+    : double3x4s(m) { }
     
     double4& operator[](uint32_t idx) { return columns[idx]; }
     const double4& operator[](uint32_t idx) const { return columns[idx]; }
@@ -1434,13 +1434,13 @@ struct double4x4 : double4x4s
     
     static const double4x4& zero();
     static const double4x4& identity();
-   
+    
     double4x4() { } // no default init
     explicit double4x4(double4 diag);
     double4x4(double4 c0, double4 c1, double4 c2, double4 c3)
-        : double4x4s((double4x4s){c0, c1, c2, c3}) { }
+    : double4x4s((double4x4s){c0, c1, c2, c3}) { }
     double4x4(const double4x4s& m)
-        : double4x4s(m) { }
+    : double4x4s(m) { }
     
     double4& operator[](uint32_t idx) { return columns[idx]; }
     const double4& operator[](uint32_t idx) const { return columns[idx]; }
@@ -1521,7 +1521,7 @@ SIMD_CALL int4 float4m(float4 __x) { return __builtin_convertvector(__x, int4);
 #endif
 
 #if SIMD_FLOAT && SIMD_HALF
-    
+
 #if SIMD_HALF4_ONLY
 
 half4 half4m(float4 __x);
@@ -1554,39 +1554,16 @@ SIMD_CALL float3 float3m(double3 __x) { return __builtin_convertvector(__x, floa
 SIMD_CALL float4 float4m(double4 __x) { return __builtin_convertvector(__x, float4); }
 #endif
 
-using namespace STL_NAMESPACE;
-        
-// Usage:
-// vecf vfmt;
-// fprintf(stdout, "%s", vfmt.str(v1).c_str() );
-struct vecf {
-    // TODO: pass formatting options too
-    vecf() {
-    }
-    
-#if SIMD_FLOAT
-    // vector
-    string str(float2 v) const;
-    string str(float3 v) const;
-    string str(float4 v) const;
-    
-    // matrix
-    string str(const float2x2& m) const;
-    string str(const float3x3& m) const;
-    string str(const float4x4& m) const;
-#endif // SIMD_FLOAT
-};
-
-    
 #if SIMD_FLOAT
 //typedef float4s quatfs;
 
 // Only need a float quat.  double/half are pretty worthless.
 struct quatf {
-    quatf(): v{0.0f,0.0f,0.0f,1.0f} {}
+    // TODO: should all ctor be SIMD_CALL ?
+    quatf() : v{0.0f,0.0f,0.0f,1.0f} {}
     quatf(float x, float y, float z, float w) : v{x,y,z,w} {}
     quatf(float3 vv, float angle);
-    explicit quatf(float4 vv) { v = vv; }
+    explicit quatf(float4 vv): v(vv) {}
     
     static const quatf& zero();
     static const quatf& identity();
@@ -1594,21 +1571,31 @@ struct quatf {
     float4 v;
 };
 
-// how many quatf ops are needed?
-quatf operator*(quatf q1, quatf q2);
-// need quat * v
-// quatf operator*(quatf q1, float3 q2);
+SIMD_CALL float3 operator*(quatf q, float3 v) {
+    float4 qv = q.v;
+    float3 t = qv.w * cross(qv.xyz, v);
+    return v + 2.0f * t + cross(q.v.xyz, t);
+}
 
 float4x4 float4x4m(quatf q);
-//quatf quatfm(float3 axis, float angleInRadians);
-// need matrix into quatf
-// need shortest arc correction
 
-void quat_bezier_cp(quatf q0, quatf q1, quatf q2, quatf q3,
-                    quatf& a1, quatf& b2);
+// how many quatf ops are needed?
+// TODO: need matrix into quatf
+// TDOO: need shortest arc correction (dot(q0.v, q1.v) < 0) negate
+// TODO: need negate (or conjuagate?)
+// TODO: what about math ops
+
+SIMD_CALL quatf lerp(quatf q0, quatf q1, float t) {
+    if (dot(q0.v, q1.v) < 0.0f)
+        q1.v.xyz = -q1.v.xyz;
+    
+    float4 v = lerp(q0.v, q1.v, t);
+    return quatf(v);
+}
 quatf slerp(quatf q0, quatf q1, float t);
-quatf lerp(quatf q0, quatf q1, float t);
 
+void quat_bezier_cp(quatf q0, quatf q1, quatf q2, quatf q3,
+                    quatf& a1, quatf& b2);
 quatf quat_bezer_lerp(quatf a, quatf b, quatf c, quatf d, float t);
 quatf quat_bezer_slerp(quatf a, quatf b, quatf c, quatf d, float t);
 
@@ -1616,6 +1603,36 @@ quatf inverse(quatf q);
 
 #endif
 
+
+using namespace STL_NAMESPACE;
+        
+// Usage:
+// vecf vfmt(fmtToken);
+// fprintf(stdout, "%s", vfmt.str(v1).c_str() );
+// This may seem extreme to pass string, but it has SSO and keeps temp alive to printf.
+struct vecf {
+    // TODO: add formatting options too
+    vecf() {
+    }
+    
+#if SIMD_FLOAT
+    // vector
+    string str(float2 v) const;
+    string str(float3 v) const;
+    string str(float4 v) const;
+    
+    // matrix
+    string str(const float2x2& m) const;
+    string str(const float3x3& m) const;
+    string str(const float4x4& m) const;
+    
+    string quat(quatf q) { return str(q.v); }
+    
+#endif // SIMD_FLOAT
+    
+    // TODO: add double, int, half printing
+};
+
 } // namespace SIMD_NAMESPACE
 
 #endif // __cplusplus

From 7f70bc6fc0e11ae9e78315cf5f71e9b0333928f9 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 28 Sep 2024 00:06:58 -0700
Subject: [PATCH 736/901] kram - simd - bring over more fast math calls

---
 libkram/vectormath/vectormath++.cpp | 158 ++++++++++++++++++++++++++--
 libkram/vectormath/vectormath++.h   |  20 +++-
 2 files changed, 171 insertions(+), 7 deletions(-)

diff --git a/libkram/vectormath/vectormath++.cpp b/libkram/vectormath/vectormath++.cpp
index 94209932..d2b8bef4 100644
--- a/libkram/vectormath/vectormath++.cpp
+++ b/libkram/vectormath/vectormath++.cpp
@@ -27,7 +27,9 @@
 // These days I use a variant of the RTNE/RN version that also preserves NaN payload bits,
 // which is slightly more ops but matches hardware conversions exactly for every input, including all NaNs.
 //
-// TODO: bring over fast inverses (RTS, RTU, etc)
+// DONE: bring over fast inverses (RTS, RTU, etc)
+// DONE: need translation, rotation, scale
+// TODO: need fast post-translation, post-rotation, post-scale
 //
 // TODO: saturating conversions would be useful to, and prevent overflow
 // see the conversion.h code, bit select to clamp values.
@@ -49,6 +51,9 @@
 // c02
 // c03
 //
+// col: TRS * TRS * v   cameraToWorldTfm * worldToModelTfm * ..
+// row: v * SRT * SRT   modelToWorldTfm * worldToCameraTfm * ...
+
 // TODO: need natvis and lldb formatting of math classes.
 
 //-----------------
@@ -56,6 +61,12 @@
 
 namespace SIMD_NAMESPACE {
 
+// Which cmath had this
+inline void sincosf(float angleInRadians, float& s, float& c) {
+    s = sinf(angleInRadians);
+    c = cosf(angleInRadians);
+}
+
 // These aren't embedded in function, so may have pre-init ordering issues.
 // or could add pre-init order to skip using functions.
 // Expose these through function calls as const&
@@ -655,9 +666,8 @@ static const float4 kConvertToMatrix = {0.0f,1.0f,2.0f,-2.0f};
 
 quatf::quatf(float3 axis, float angleInRadians)
 {
-    float c = ::cosf(angleInRadians * 0.5f);
-    float s = ::sinf(angleInRadians * 0.5f);
-    
+    float s, c;
+    sincosf(angleInRadians * 0.5f, s, c);
     v = float4m(s * axis, c);
 }
 
@@ -763,13 +773,15 @@ inline void quat_bezier_cp_impl(quatf q0, quatf q1, quatf q2,
               quatf& a1, quatf& b1)
 {
     // TODO: find out of these were quat or vec mul?
-    a1.v = 2.0f * dot(q0.v,q1.v) * q1.v - q0.v; // Double(q0, q1);
+    // Double(q0, q1);
+    a1.v = 2.0f * dot(q0.v,q1.v) * q1.v - q0.v;
     
     // Bisect(a1, q2);
     a1.v = (a1.v + q2.v);
     a1.v = normalize(a1.v);
     
-    b1.v = 2.0f * dot(a1.v,q1.v) * q1.v - a1.v; // Double(a1, q1);
+    // Double(a1, q1);
+    b1.v = 2.0f * dot(a1.v,q1.v) * q1.v - a1.v;
 }
 
 
@@ -808,6 +820,140 @@ quatf quat_bezer_lerp(quatf a, quatf b, quatf c, quatf d, float t)
 #endif // SIMD_FLOAT
 
 
+#if SIMD_FLOAT
+
+void transpose_affine(float4x4& m)
+{
+    // avoid copy and one shuffle
+    float4 tmp3, tmp2, tmp1, tmp0;
+                   
+    // TOOD: use platform shuffles on Neon
+    //  this is using sse2neon
+    tmp0 = _mm_shuffle_ps(m[0], m[1], 0x44);
+    tmp2 = _mm_shuffle_ps(m[0], m[1], 0xEE);
+
+    tmp1 = _mm_shuffle_ps(m[2], m[3], 0x44);
+    tmp3 = _mm_shuffle_ps(m[2], m[3], 0xEE);
+                                                            
+    m[0] = _mm_shuffle_ps(tmp0, tmp1, 0x88);
+    m[1] = _mm_shuffle_ps(tmp0, tmp1, 0xDD);
+    m[2] = _mm_shuffle_ps(tmp2, tmp3, 0x88);
+
+    // skips m[3] - known 0001
+}
+
+float4x4 inverse_tr(const float4x4& mtx)
+{
+    float4x4 inverse(mtx);
+    inverse[3] = float4_negw();  // will be flipped by matrix mul
+    transpose_affine(inverse);  // handle rotation (R)inv = (R)T
+    
+    inverse[3] = inverse * (-mtx[3]); // 1 mul, 3 mads
+
+    return inverse;
+}
+
+// invert a row vector matrix
+float4x4 inverse_tru(const float4x4& mtx)
+{
+    bool success = false;
+    
+    float scaleX = length_squared(mtx[0]);
+    
+    float4x4 inverse(mtx);
+    if (scaleX > 1e-6f) {
+        inverse[3] = float4_negw();
+        
+        transpose_affine(inverse);
+
+        // all rows/columns in orthogonal tfm have same magnitude with uniform scale
+        float4 invScaleX = float4m(1.0f / scaleX); // inverse squared
+        
+        // scale the rotation matrix by scaling inverse
+        inverse[0] *= invScaleX;
+        inverse[1] *= invScaleX;
+        inverse[2] *= invScaleX;
+
+        // handle the translation
+        inverse[3] = inverse * (-mtx[3]);
+        
+        success = true;
+    }
+    
+    return inverse;
+}
+
+float4x4 inverse_trs(const float4x4& mtx)
+{
+    bool success = false;
+    
+    float4x4 inverse(mtx);
+    
+    // TODO: fix handling of failure
+    // compute the scaling terms (4 dots)
+    // float3 scale = calcScaleSquaredRowTfm(m);
+    // if (all(scale > float3(1e-6f)) {
+        inverse[3] = float4_negw(); // neccessary for simple inverse to hold
+    
+        transpose_affine(inverse);
+
+        // this is cheaper than 3 dot's above, just mul/add
+        float4 invScale = recip(inverse[0]*inverse[0] +
+                                inverse[1]*inverse[1] +
+                                inverse[2]*inverse[2]);
+    
+        // scale the rotation matrix by scaling inverse
+        inverse[0] *= invScale;
+        inverse[1] *= invScale;
+        inverse[2] *= invScale;
+        inverse[3] = inverse * (-mtx[3]);
+        
+        success = true;
+    //}
+    
+    return inverse;
+}
+
+// TODO: make ctor to avoid returning large matrix
+float4x4 float4x4m(char axis, float angleInRadians)
+{
+    float    sinTheta, cosTheta;
+    sincosf(angleInRadians, sinTheta, cosTheta);
+            
+    float4x4 m;
+    m[3] = float4_posw();
+            
+    switch(axis) {
+        case 'x':
+        {
+            m[0] = float4_posx();
+            m[1] = float4m(0.0f,  cosTheta, sinTheta, 0.0f);
+            m[2] = float4m(0.0f, -sinTheta, cosTheta, 0.0f);
+            break;
+        }
+        
+        case 'y':
+        {
+            m[0] = float4m(cosTheta, 0.0f, -sinTheta, 0.0f);
+            m[1] = float4_posy();
+            m[2] = float4m(sinTheta, 0.0f,  cosTheta, 0.0f);
+            break;
+        }
+        
+        case 'z':
+        {
+            m[0] = float4m( cosTheta, sinTheta, 0.0f, 0.0f);
+            m[1] = float4m(-sinTheta, cosTheta, 0.0f, 0.0f);
+            m[2] = float4_posz();
+            break;
+        }
+    }
+    return m;
+}
+
+
+#endif // SIMD_FLOAT
+
 } // namespace SIMD_NAMESPACE
 
 #endif
diff --git a/libkram/vectormath/vectormath++.h b/libkram/vectormath/vectormath++.h
index e725672e..3c9ca26f 100644
--- a/libkram/vectormath/vectormath++.h
+++ b/libkram/vectormath/vectormath++.h
@@ -1601,7 +1601,25 @@ quatf quat_bezer_slerp(quatf a, quatf b, quatf c, quatf d, float t);
 
 quatf inverse(quatf q);
 
-#endif
+#endif // SIMD_FLOAT
+
+#if SIMD_FLOAT
+
+// in-place affine transose
+void transpose_affine(float4x4& m);
+
+// fast inverses for translate, rotate, scale
+float4x4 inverse_tr(const float4x4& mtx);
+float4x4 inverse_tru(const float4x4& mtx);
+float4x4 inverse_trs(const float4x4& mtx);
+
+float4x4 float4x4m(char axis, float angleInRadians);
+
+SIMD_CALL float4x4 float4x4m(float3 axis, float angleInRadians) {
+    return float4x4m(quatf(axis, angleInRadians));
+}
+
+#endif // SIMD_FLOAT
 
 
 using namespace STL_NAMESPACE;

From 2b9d5c2da972db4b9ef8b2ffff3359398ddc1fab Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 28 Sep 2024 00:27:47 -0700
Subject: [PATCH 737/901] kram - simd - add trs and tru calls

---
 libkram/vectormath/vectormath++.cpp | 28 +++++++++++++++++++++++++++-
 libkram/vectormath/vectormath++.h   |  4 ++++
 2 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/libkram/vectormath/vectormath++.cpp b/libkram/vectormath/vectormath++.cpp
index d2b8bef4..faca94c7 100644
--- a/libkram/vectormath/vectormath++.cpp
+++ b/libkram/vectormath/vectormath++.cpp
@@ -53,8 +53,11 @@
 //
 // col: TRS * TRS * v   cameraToWorldTfm * worldToModelTfm * ..
 // row: v * SRT * SRT   modelToWorldTfm * worldToCameraTfm * ...
-
+//
 // TODO: need natvis and lldb formatting of math classes.
+//
+// TODO: here's a decomp
+// https://github.com/erich666/GraphicsGems/blob/master/gemsii/unmatrix.c
 
 //-----------------
 
@@ -883,6 +886,29 @@ float4x4 inverse_tru(const float4x4& mtx)
     return inverse;
 }
 
+float4x4 float4x4_tr(float3 t, quatf r) {
+    float4x4 m(float4x4::identity());
+    m[3].xyz = t;
+    m = m * float4x4m(r);
+    return m;
+}
+
+// TODO: there are faster ways to apply post rot, post scale
+float4x4 float4x4_trs(float3 t, quatf r, float3 scale) {
+    float4x4 m(float4x4::identity());
+    m[3].xyz = t;
+    m = m * float4x4m(r);
+    
+    // TODO: *= not working
+    m = m * float4x4(float4m(scale,1.0f));
+    return m;
+}
+
+// leaving this in here, since it can be further optimized
+float4x4 float4x4_tru(float3 t, quatf r, float scale) {
+    return float4x4_trs(t, r, float3m(scale));
+}
+
 float4x4 inverse_trs(const float4x4& mtx)
 {
     bool success = false;
diff --git a/libkram/vectormath/vectormath++.h b/libkram/vectormath/vectormath++.h
index 3c9ca26f..1889c3d0 100644
--- a/libkram/vectormath/vectormath++.h
+++ b/libkram/vectormath/vectormath++.h
@@ -1619,6 +1619,10 @@ SIMD_CALL float4x4 float4x4m(float3 axis, float angleInRadians) {
     return float4x4m(quatf(axis, angleInRadians));
 }
 
+float4x4 float4x4_tr(float3 t, quatf r);
+float4x4 float4x4_trs(float3 t, quatf r, float3 scale);
+float4x4 float4x4_tru(float3 t, quatf r, float scale);
+
 #endif // SIMD_FLOAT
 
 
From ddff65c68cdec4c32e28d0515b4b1278fbcf2bca Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 28 Sep 2024 20:19:05 -0700
Subject: [PATCH 738/901] kram - simd - print config, isolate mathfun, add
 accelerate

---
 kramv/KramViewerBase.cpp            |   8 +
 libkram/vectormath/sse_mathfun.h    |  63 +----
 libkram/vectormath/vectormath++.cpp | 348 ++++++++++++++++++++++++----
 libkram/vectormath/vectormath++.h   |  19 +-
 4 files changed, 332 insertions(+), 106 deletions(-)

diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index da767452..fdd44a49 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -683,6 +683,14 @@ float4x4 orthographic_rhs(float width, float height, float nearZ, float farZ,
 
 Data::Data()
 {
+#if USE_SIMDLIB && 1
+    vecf vfmt;
+    
+    // want to see the simd config
+    KLOGI("SIMDK", "%s", vfmt.simd_configs().c_str());
+    KLOGI("SIMDK", "%s", vfmt.simd_alignments().c_str());
+#endif
+    
     _showSettings = new ShowSettings();
     
     _textSlots.resize(kTextSlotCount);
diff --git a/libkram/vectormath/sse_mathfun.h b/libkram/vectormath/sse_mathfun.h
index b00cc4a6..ede53e55 100644
--- a/libkram/vectormath/sse_mathfun.h
+++ b/libkram/vectormath/sse_mathfun.h
@@ -42,64 +42,10 @@
 
 namespace SIMD_NAMESPACE {
 
-// I don't trust the approximations for now.  But providing them in case
-// want to do futher.  Really 3 choices, use c calls, use approximations,
-// or use simd lib that implements these (f.e. Accelerate).
-
-// pow needs 2 args, so haven't exposed yet
-//float4 pow(float4 x, float4 y) {
-//    // can xy be <= 0 ?, no will return Nan in log/exp approx
-//    return exp(log(x) * y);
-//}
-
-// don't have a c sincos, so just use fn calls
-//float4 tan(float4 x) {
-//    float4 s, c;
-//    sincos(x, s, c);
-//
-//    // TODO: handle around c == 0 case
-//    return s / c;
-//}
-
-
-#define SIMD_FAST_MATH 0
-#if !SIMD_FAST_MATH
-
-// This calls function repeatedly, then returns as vector.
-// These don't call to the 4 version since it's so much more work.
-#define macroVectorRepeatFnImpl(type, cppfun, func) \
-type##1 cppfunc(type##1 x) { return func(x); } \
-type##2 cppfunc(type##2 x) { return {func(x.x), func(x.y)}; } \
-type##3 cppfunc(type##3 x) { return {func(x.x), func(x.y), func(x.z)}; } \
-type##4 cppfunc(type##4 x) { return {func(x.x), func(x.y), func(x.z), func(x.w)}; } \
-
-#if USE_FLOAT
-
-macroVectorRepeatFnImpl(float, log, ::logf)
-macroVectorRepeatFnImpl(float, exp, ::expf)
-
-macroVectorRepeatFnImpl(float, sin, ::sinf)
-macroVectorRepeatFnImpl(float, cos, ::cosf)
-macroVectorRepeatFnImpl(float, tan, ::tanf)
-
-#endif // USE_FLOAT
-
-#if USE_DOUBLE
-
-macroVectorRepeatFnImpl(double, log, ::log)
-macroVectorRepeatFnImpl(double, exp, ::exp)
-
-macroVectorRepeatFnImpl(double, sin, ::sin)
-macroVectorRepeatFnImpl(double, cos, ::cos)
-macroVectorRepeatFnImpl(double, tan, ::tan)
-
-#endif // USE_DOUBLE
-
-#else
-
+//---------------------------
 // Start of mathfun below
 
-#if USE_FLOAT
+#if SIMD_FLOAT
 
 #define _PS_CONST(Name, Val) \
   static const float4 _ps_##Name = Val
@@ -463,9 +409,6 @@ macroVectorRepeatFnImpl(float, cos, tan)
 
 // TODO: pow takes in 2 args
 
-#endif // USE_FLOAT
-
-#endif //
-
+#endif // SIMD_FLOAT
 
 } // namespace SIMD_NAMESPACE
diff --git a/libkram/vectormath/vectormath++.cpp b/libkram/vectormath/vectormath++.cpp
index faca94c7..b317bbcb 100644
--- a/libkram/vectormath/vectormath++.cpp
+++ b/libkram/vectormath/vectormath++.cpp
@@ -5,11 +5,6 @@
 
 #if USE_SIMDLIB
 
-// These are large functions that can be buried and optimized in the .cpp
-// Has alternate cmath form it uses for now.  Look into ISPC calls to
-// replace all this.
-#include "sse_mathfun.h"
-
 // Tests with godbolt are here to show code comparsions with optimizations.
 
 // -Og can't unroll small loops for some reason. -O2 and -O3 do.
@@ -40,16 +35,16 @@
 // The storage of affine data in a column matrix is no different than rows
 // translation is in (r30 r31 r32) or in (c30, c31 c32)
 //
-// r0: r00 r01 r02 r03
-// r1: r10
-// r2: r20
-// r3: r30
+// r0: r00 r01 r02 0
+// r1: r10         0
+// r2: r20         0
+// r3: px  py  pz  1
 //
 // c0  c1  c2  c3
-// c00 c10 c20 c30
-// c01 c11
-// c02
-// c03
+// c00 c10 c20 px
+// c01 c11     py
+// c02         pz
+//  0   0   0  1
 //
 // col: TRS * TRS * v   cameraToWorldTfm * worldToModelTfm * ..
 // row: v * SRT * SRT   modelToWorldTfm * worldToCameraTfm * ...
@@ -61,9 +56,136 @@
 
 //-----------------
 
+// TODO: profvide controls over this
+#ifdef __APPLE__
+#define SIMD_ACCELERATE_LIB 1
+#define SIMD_FAST_MATH      0
+#else
+#define SIMD_ACCELERATE_LIB 0
+#define SIMD_FAST_MATH      0
+#endif
+
+#if SIMD_ACCELERATE_LIB
+// TODO: reduce this header to just calls use (f.e. geometry, etc)
+#include <simd/simd.h>
+#elif SIMD_FAST_MATH
+#include "sse_mathfun.h"
+#endif
+
+// These are large functions that can be buried and optimized in the .cpp
+// Has alternate cmath form it uses for now.  Look into ISPC calls to
+// replace some of this.
+
+//-----------------
 
 namespace SIMD_NAMESPACE {
 
+// I don't trust the approximations for now.  But providing them in case
+// want to do futher.  Really 3 choices, use c calls, use approximations,
+// or use simd lib that implements these (f.e. Accelerate).
+
+// pow needs 2 args, so haven't exposed yet
+//float4 pow(float4 x, float4 y) {
+//    // can xy be <= 0 ?, no will return Nan in log/exp approx
+//    return exp(log(x) * y);
+//}
+//
+// don't have a c sincos, so just use fn calls
+//float4 tan(float4 x) {
+//    float4 s, c;
+//    sincos(x, s, c);
+//
+//    // TODO: handle around c == 0 case
+//    return s / c;
+//}
+
+#if SIMD_ACCELERATE_LIB
+
+//---------------------------
+// Use existing Accelerate lib.
+//
+// So there are currently 6 version of the Accelerate lib.
+// This library hides implmenentations of some of the calls.
+// So need to rely on a version of the lib to get them,
+// or supply some alternative.
+//
+// 6: macOS 15.0, iOS 18.0
+// 5: macOS 13.0, iOS 16.0
+// 4: macOS 12.0, iOS 15.0
+// 0: header only
+//
+// use 5 for macOS
+// SIMD_LIBRARY_VERSION >= 5
+//
+// use 4 for iOS
+// SIMD_LIBRARY_VERSION >= 4
+
+// remap simdk to simd namespace
+#define macroVectorRepeatFnImpl(type, cppfunc) \
+type##2 cppfunc(type##2 x) { return simd::cppfunc(x); } \
+type##3 cppfunc(type##3 x) { return simd::cppfunc(x); } \
+type##4 cppfunc(type##4 x) { return simd::cppfunc(x); } \
+
+#if SIMD_FLOAT
+
+// These will get inlined here from the template
+macroVectorRepeatFnImpl(float, log)
+macroVectorRepeatFnImpl(float, exp)
+
+macroVectorRepeatFnImpl(float, sin)
+macroVectorRepeatFnImpl(float, cos)
+macroVectorRepeatFnImpl(float, tan)
+
+#endif
+
+#if SIMD_DOUBLE
+
+// These will get inlined here from the template
+macroVectorRepeatFnImpl(double, log)
+macroVectorRepeatFnImpl(double, exp)
+
+macroVectorRepeatFnImpl(double, sin)
+macroVectorRepeatFnImpl(double, cos)
+macroVectorRepeatFnImpl(double, tan)
+
+#endif
+
+#elif !SIMD_FAST_MATH
+
+//---------------------------
+
+// This calls function repeatedly, then returns as vector.
+// These don't call to the 4 version since it's so much more work.
+#define macroVectorRepeatFnImpl(type, cppfunc, func) \
+type##1 cppfunc(type##1 x) { return func(x); } \
+type##2 cppfunc(type##2 x) { return {func(x.x), func(x.y)}; } \
+type##3 cppfunc(type##3 x) { return {func(x.x), func(x.y), func(x.z)}; } \
+type##4 cppfunc(type##4 x) { return {func(x.x), func(x.y), func(x.z), func(x.w)}; } \
+
+#if SIMD_FLOAT
+
+macroVectorRepeatFnImpl(float, log, ::logf)
+macroVectorRepeatFnImpl(float, exp, ::expf)
+
+macroVectorRepeatFnImpl(float, sin, ::sinf)
+macroVectorRepeatFnImpl(float, cos, ::cosf)
+macroVectorRepeatFnImpl(float, tan, ::tanf)
+
+#endif // SIMD_FLOAT
+
+#if SIMD_DOUBLE
+
+macroVectorRepeatFnImpl(double, log, ::log)
+macroVectorRepeatFnImpl(double, exp, ::exp)
+
+macroVectorRepeatFnImpl(double, sin, ::sin)
+macroVectorRepeatFnImpl(double, cos, ::cos)
+macroVectorRepeatFnImpl(double, tan, ::tan)
+
+#endif // SIMD_DOUBLE
+
+#endif
+
 // Which cmath had this
 inline void sincosf(float angleInRadians, float& s, float& c) {
     s = sinf(angleInRadians);
@@ -129,13 +251,13 @@ static const float4x4 kfloat4x4_identity = diagonal_matrix(kfloat4_ones);
 
 // These should not be used often.  So can stay buried
 float2x2::float2x2(float2 diag)
-    : float2x2s((const float2x2s&)diagonal_matrix(diag)) { }
+: float2x2s((const float2x2s&)diagonal_matrix(diag)) { }
 float3x3::float3x3(float3 diag)
-    : float3x3s((const float3x3s&)diagonal_matrix(diag)) { }
+: float3x3s((const float3x3s&)diagonal_matrix(diag)) { }
 float3x4::float3x4(float3 diag)
-    : float3x4s((const float3x4s&)diagonal_matrix3x4(diag)) { }
+: float3x4s((const float3x4s&)diagonal_matrix3x4(diag)) { }
 float4x4::float4x4(float4 diag)
-    : float4x4s((const float4x4s&)diagonal_matrix(diag)) { }
+: float4x4s((const float4x4s&)diagonal_matrix(diag)) { }
 
 //---------------------------
 
@@ -164,7 +286,7 @@ float2 saturate(float2 x) {
     return clamp(x, kfloat2_zero, kfloat2_ones);
 }
 float3 saturate(float3 x) {
-   return clamp(x, kfloat3_zero, kfloat3_ones);
+    return clamp(x, kfloat3_zero, kfloat3_ones);
 }
 float4 saturate(float4 x) {
     return clamp(x, kfloat4_zero, kfloat4_ones);
@@ -208,11 +330,17 @@ float3x3 transpose(const float3x3& x) {
 #endif
     return (float3x3){r0.xyz, r1.xyz, r2.xyz};
 }
-   
-// TODO: conversion back to 4x4 using transpose
-// static float4x4 transpose(float3x4 x);
+
+// TODO: needs to transpose both ways
+// float4x4 transpose(float3x4 x) { .. }
+// float3x4 transpose(float4x4 x) { .. }
+// SIMD_CALL float4x4 transpose(float3x4 x) { m = transpose(x); }
+// SIMD_CALL float3x4 transpose(float4x4 x) { m = transpose(x); }
 
 float4x4 transpose(const float4x4& x) {
+    // NOTE: also _MM_TRANSPOSE4_PS using shuffles
+    // but old Neon didn't really have shuffle
+
 #if SIMD_SSE
     float4 t0 = _mm_unpacklo_ps(x[0],x[2]);
     float4 t1 = _mm_unpackhi_ps(x[0],x[2]);
@@ -353,7 +481,7 @@ float trace(const float4x4& x) {
 // TODO: may want pre-transform on float3x4 since it's transposed
 // 3 x m3x4 should = 3 element vec
 //
-// Apple premul transform on left does a super expensive transpose to avoid dot
+// simd premul transform on left does a super expensive transpose to avoid dot
 // don't use this, should just dotproducts?
 //static   half2 mul(  half2 x,   half2x2 y) { return mul(transpose(y), x); }
 //
@@ -598,7 +726,117 @@ string vecf::str(const float4x4& m) const {
 }
 
 #endif // SIMD_FLOAT
-                  
+
+#define FMT_SEP() s += "-----------\n"
+
+string vecf::simd_configs() const {
+    string s;
+    
+#define FMT_CONFIG(val) s += kram::format("%s: %d\n", #val, val);
+    
+    FMT_CONFIG(SIMD_SSE);
+    FMT_CONFIG(SIMD_NEON);
+    
+#if SIMD_SSE
+    bool hasAVX2 = false;
+    bool hasF16C = false;
+    bool hasFMA = false;
+    
+    #ifdef __AVX2__
+    hasAVX2 = true;
+    #endif
+    #ifdef __F16C__
+    hasF16C = true;
+    #endif
+    #ifdef __FMA__
+    hasFMA = true;
+    #endif
+    
+    s += kram::format("%s: %d\n", "AVX2", hasAVX2);
+    s += kram::format("%s: %d\n", "F16C", hasF16C);
+    s += kram::format("%s: %d\n", "FMA ", hasFMA);
+    
+    // fp-contract, etc ?
+#endif
+    
+#if SIMD_NEON
+    // any neon setting, arm64 version
+#endif
+    
+    FMT_CONFIG(SIMD_FLOAT_EXT);
+    FMT_CONFIG(SIMD_RENAME_TO_SIMD_NAMESPACE);
+    FMT_CONFIG(SIMD_HALF_FLOAT16);
+    
+    FMT_CONFIG(SIMD_LIBRARY_VERSION);
+    
+    FMT_SEP();
+    
+    FMT_CONFIG(SIMD_HALF);
+    FMT_CONFIG(SIMD_FLOAT);
+    FMT_CONFIG(SIMD_DOUBLE);
+   
+    FMT_CONFIG(SIMD_INT);
+    FMT_CONFIG(SIMD_CHAR);
+    FMT_CONFIG(SIMD_SHORT);
+    FMT_CONFIG(SIMD_LONG);
+    
+#undef FMT_CONFIG
+    
+    return s;
+}
+
+string vecf::simd_alignments() const {
+    string s;
+    
+#define FMT_CONFIG(val) s += kram::format("%s: %zu %zu\n", #val, sizeof(val), __alignof(val));
+    
+    // TODO: add other types int, half?
+    
+#if SIMD_FLOAT
+    FMT_SEP();
+    
+    FMT_CONFIG(float2);
+    FMT_CONFIG(float3);
+    FMT_CONFIG(float4);
+    FMT_CONFIG(float8);
+    FMT_CONFIG(float16);
+    
+    FMT_CONFIG(float2x2);
+    FMT_CONFIG(float3x3);
+    FMT_CONFIG(float3x4);
+    FMT_CONFIG(float4x4);
+#endif
+    
+#if SIMD_DOUBLE
+    FMT_SEP();
+    
+    FMT_CONFIG(double2);
+    FMT_CONFIG(double3);
+    FMT_CONFIG(double4);
+    FMT_CONFIG(double8);
+    
+//    FMT_CONFIG(double2x2);
+//    FMT_CONFIG(double3x3);
+//    FMT_CONFIG(double3x4);
+//    FMT_CONFIG(double4x4);
+#endif
+    
+#if SIMD_INT
+    FMT_SEP();
+    
+    FMT_CONFIG(int2);
+    FMT_CONFIG(int3);
+    FMT_CONFIG(int4);
+    FMT_CONFIG(int8);
+    FMT_CONFIG(int16);
+#endif
+    
+#undef FMT_CONFIG
+    
+    return s;
+}
+    
+
 //---------------
 
 #if SIMD_HALF4_ONLY
@@ -701,7 +939,30 @@ quatf operator*(quatf q1, quatf q2)
     );
 }
 
-                                                                                    
+float3x3 float3x3m(quatf qq)
+{
+    // not doing normalize like original
+    //q = normalize(q);
+    
+    float3x3 m;
+    float3 t;
+    
+    float4 c = kConvertToMatrix;
+    float4 q = qq.v;
+    q *= -kCross;
+    
+    t = q.wzy * c.wzw;
+    m[0] = q.yzx * t.zxy - q.zxy * t.yzx + c.yxx;
+    
+    t = q.zwx * c.wwz;
+    m[1] = q.yzx * t.zxy - q.zxy * t.yzx + c.xyx;
+    
+    // orthonormal basis, so use cross product for last term
+    m[2] = cross(m[0], m[1]); // 2 instr hlsl, 7 ops SSE
+    
+    return m;
+}
+
 float4x4 float4x4m(quatf qq)
 {
     /* from Ken Shoemake's GGIV article
@@ -721,21 +982,22 @@ float4x4 float4x4m(quatf qq)
             );
    */
 
-    float4 q = qq.v;
     
-    // should be able to transpose using inverse (rinv = rt)
-    q *= -kCross;
-
-    // not doing normalize like original is
+    // not doing normalize like original
     //q = normalize(q);
-    float4 c = kConvertToMatrix;
     
     float4x4 m;
     float4 t;
 
+    float4 c = kConvertToMatrix;
+    float4 q = qq.v;
+    q *= -kCross;
+
+    // really just 3 values, so w is always 0
     t = q.wzyw * c.wzwx;
     m[0] = q.yzxw * t.zxyw - q.zxyw * t.yzxw + c.yxxx;
     
+    // really just 3 values, so w is always 0
     t = q.zwxw * c.wwzx;
     m[1] = q.yzxw * t.zxyw - q.zxyw * t.yzxw + c.xyxx;
     
@@ -749,6 +1011,7 @@ float4x4 float4x4m(quatf qq)
 // Slow but constant velocity.
 quatf slerp(quatf q0, quatf q1, float t)
 {
+    // find smallest angle, flip axis
     if (dot(q0.v, q1.v) < 0.0f)
         q1.v.xyz = -q1.v.xyz;
     
@@ -788,7 +1051,7 @@ inline void quat_bezier_cp_impl(quatf q0, quatf q1, quatf q2,
 }
 
 
-// compute control points for a bezier spline segment (quats must be smallest angle)
+// compute control points for a cubic bezier spline segment (quats must be smallest angle)
 void quat_bezier_cp(quatf q0, quatf q1, quatf q2, quatf q3,
                      quatf& a1, quatf& b2)
 {
@@ -798,25 +1061,28 @@ void quat_bezier_cp(quatf q0, quatf q1, quatf q2, quatf q3,
 }
 
 
-// spherical bezier spline interpolation
-// takes q1, a1, b2, q2
-quatf quat_bezer_slerp(quatf a, quatf b, quatf c, quatf d, float t)
+// spherical cubic bezier spline interpolation
+// takes in contol points
+quatf quat_bezer_slerp(quatf q0, quatf b, quatf c, quatf q1, float t)
 {
+    // deCastljau interpolation of the control points
     quatf mid(slerp(b, c, t));
 
-    return slerp(
-                 slerp(slerp(a, b, t), mid, t),
-                 slerp(mid, slerp(c, d, t), t),
+    return slerp(slerp(slerp(q0, b, t), mid, t),
+                 slerp(mid, slerp(c, q1, t), t),
         t);
 }
 
-quatf quat_bezer_lerp(quatf a, quatf b, quatf c, quatf d, float t)
+// spherical cubic bezier spline interpolation
+// takes in contol points
+quatf quat_bezer_lerp(quatf q0, quatf b, quatf c, quatf q1, float t)
 {
+    // deCastljau interpolation of the control points
     quatf mid(lerp(b, c, t));
 
     return lerp(
-                 lerp(lerp(a, b, t), mid, t),
-                 lerp(mid, lerp(c, d, t), t),
+                 lerp(lerp(q0, b, t), mid, t),
+                 lerp(mid, lerp(c, q1, t), t),
         t);
 }
 
@@ -827,6 +1093,8 @@ quatf quat_bezer_lerp(quatf a, quatf b, quatf c, quatf d, float t)
 
 void transpose_affine(float4x4& m)
 {
+    // TODO: see other tranpose not using shuffles and do that.
+    
     // avoid copy and one shuffle
     float4 tmp3, tmp2, tmp1, tmp0;
                    
diff --git a/libkram/vectormath/vectormath++.h b/libkram/vectormath/vectormath++.h
index 1889c3d0..2254a658 100644
--- a/libkram/vectormath/vectormath++.h
+++ b/libkram/vectormath/vectormath++.h
@@ -401,10 +401,6 @@ macroVector8TypesStorageRenames(long, simd_long)
 #endif // SIMD_INT
 
 
-#if SIMD_CHAR
-macroVector4TypesStorageRenames(char, simd_char)
-#endif // SIMD_CHAR
-
 #if SIMD_HALF
 macroVector2TypesStorageRenames(half, simd_half)
 #endif // SIMD_HALF
@@ -1319,7 +1315,7 @@ type##2 cppfunc(type##2 x); \
 type##3 cppfunc(type##3 x); \
 type##4 cppfunc(type##4 x); \
 
-#if USE_FLOAT
+#if SIMD_FLOAT
 
 // power series
 macroVectorRepeatFnDecl(float, log)
@@ -1335,7 +1331,7 @@ macroVectorRepeatFnDecl(float, tan)
 
 #endif
 
-#if USE_DOUBLE
+#if SIMD_DOUBLE
 
 // power series
 macroVectorRepeatFnDecl(double, log)
@@ -1601,6 +1597,10 @@ quatf quat_bezer_slerp(quatf a, quatf b, quatf c, quatf d, float t);
 
 quatf inverse(quatf q);
 
+SIMD_CALL quatf normalize(quatf q) {
+    return quatf(normalize(q.v));
+}
+
 #endif // SIMD_FLOAT
 
 #if SIMD_FLOAT
@@ -1613,12 +1613,15 @@ float4x4 inverse_tr(const float4x4& mtx);
 float4x4 inverse_tru(const float4x4& mtx);
 float4x4 inverse_trs(const float4x4& mtx);
 
+// affine and convenience ctors
 float4x4 float4x4m(char axis, float angleInRadians);
 
 SIMD_CALL float4x4 float4x4m(float3 axis, float angleInRadians) {
     return float4x4m(quatf(axis, angleInRadians));
 }
 
+float3x3 float3x3m(quatf qq);
+
 float4x4 float4x4_tr(float3 t, quatf r);
 float4x4 float4x4_trs(float3 t, quatf r, float3 scale);
 float4x4 float4x4_tru(float3 t, quatf r, float scale);
@@ -1652,6 +1655,10 @@ struct vecf {
     
 #endif // SIMD_FLOAT
     
+    // Just stuffing this here for now
+    string simd_configs() const;
+    string simd_alignments() const;
+    
     // TODO: add double, int, half printing
 };
 

From 1ea96c19009b3ebf6cce7c191f6952124a18bdf2 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 28 Sep 2024 20:24:50 -0700
Subject: [PATCH 739/901] kram - simd - fix config

---
 libkram/vectormath/vectormath++.cpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/libkram/vectormath/vectormath++.cpp b/libkram/vectormath/vectormath++.cpp
index b317bbcb..aa0a2c70 100644
--- a/libkram/vectormath/vectormath++.cpp
+++ b/libkram/vectormath/vectormath++.cpp
@@ -767,7 +767,10 @@ string vecf::simd_configs() const {
     FMT_CONFIG(SIMD_RENAME_TO_SIMD_NAMESPACE);
     FMT_CONFIG(SIMD_HALF_FLOAT16);
     
+    FMT_CONFIG(SIMD_ACCELERATE_LIB);
+#if SIMD_ACCELERATE_LIB
     FMT_CONFIG(SIMD_LIBRARY_VERSION);
+#endif
     
     FMT_SEP();
     
@@ -799,7 +802,7 @@ string vecf::simd_alignments() const {
     FMT_CONFIG(float3);
     FMT_CONFIG(float4);
     FMT_CONFIG(float8);
-    FMT_CONFIG(float16);
+    //FMT_CONFIG(float16);
     
     FMT_CONFIG(float2x2);
     FMT_CONFIG(float3x3);
@@ -813,7 +816,7 @@ string vecf::simd_alignments() const {
     FMT_CONFIG(double2);
     FMT_CONFIG(double3);
     FMT_CONFIG(double4);
-    FMT_CONFIG(double8);
+    //FMT_CONFIG(double8);
     
 //    FMT_CONFIG(double2x2);
 //    FMT_CONFIG(double3x3);
@@ -828,7 +831,7 @@ string vecf::simd_alignments() const {
     FMT_CONFIG(int3);
     FMT_CONFIG(int4);
     FMT_CONFIG(int8);
-    FMT_CONFIG(int16);
+    //FMT_CONFIG(int16);
 #endif
     
 #undef FMT_CONFIG

From 1a9b4984f7f19586c227eef334dbf1bde82e9464 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 28 Sep 2024 21:32:39 -0700
Subject: [PATCH 740/901] kram - simd - fix matrix ops

---
 libkram/vectormath/vectormath++.cpp |  6 +++---
 libkram/vectormath/vectormath++.h   | 20 +++++++++++---------
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/libkram/vectormath/vectormath++.cpp b/libkram/vectormath/vectormath++.cpp
index aa0a2c70..62d5a51e 100644
--- a/libkram/vectormath/vectormath++.cpp
+++ b/libkram/vectormath/vectormath++.cpp
@@ -1160,7 +1160,8 @@ float4x4 inverse_tru(const float4x4& mtx)
 float4x4 float4x4_tr(float3 t, quatf r) {
     float4x4 m(float4x4::identity());
     m[3].xyz = t;
-    m = m * float4x4m(r);
+    
+    m *= float4x4m(r);
     return m;
 }
 
@@ -1170,8 +1171,7 @@ float4x4 float4x4_trs(float3 t, quatf r, float3 scale) {
     m[3].xyz = t;
     m = m * float4x4m(r);
     
-    // TODO: *= not working
-    m = m * float4x4(float4m(scale,1.0f));
+    m *= float4x4(float4m(scale,1.0f));
     return m;
 }
 
diff --git a/libkram/vectormath/vectormath++.h b/libkram/vectormath/vectormath++.h
index 2254a658..3e504f19 100644
--- a/libkram/vectormath/vectormath++.h
+++ b/libkram/vectormath/vectormath++.h
@@ -140,8 +140,10 @@
 // const means it doesn't pull from global changing state (what about constants)
 // and inline is needed or get unused static calls, always_inline forces inline
 // of these mostly wrapper calls.
-#define SIMD_CALL static inline __attribute__((__always_inline__, __const__, __nodebug__))
-#define SIMD_CALL_OP SIMD_CALL
+#define SIMD_CALL static inline __attribute__((__always_inline__,__const__,__nodebug__))
+
+// op *=, +=, -=, /= mods the calling object, so can't be const
+#define SIMD_CALL_OP static inline __attribute__((__always_inline__,__nodebug__))
 
 //------------
 
@@ -267,14 +269,14 @@ typedef ::cname##8s cppname##8; \
 SIMD_CALL_OP type& operator*=(type& x, const type& y) { x = mul(x, y); return x; } \
 SIMD_CALL_OP type& operator+=(type& x, const type& y) { x = add(x, y); return x; } \
 SIMD_CALL_OP type& operator-=(type& x, const type& y) { x = sub(x, y); return x; } \
-SIMD_CALL_OP bool operator==(const type& x, const type& y) { return equal(x, y); } \
-SIMD_CALL_OP bool operator!=(const type& x, const type& y) { return !(x == y); } \
+SIMD_CALL bool operator==(const type& x, const type& y) { return equal(x, y); } \
+SIMD_CALL bool operator!=(const type& x, const type& y) { return !(x == y); } \
 \
-SIMD_CALL_OP type operator-(const type& x, const type& y) { return sub(x,y); } \
-SIMD_CALL_OP type operator+(const type& x, const type& y) { return add(x,y); } \
-SIMD_CALL_OP type operator*(const type& x, const type& y) { return mul(x,y); } \
-SIMD_CALL_OP type::column_t operator*(const type::column_t& v, const type& y) { return mul(v,y); } \
-SIMD_CALL_OP type::column_t operator*(const type& x, const type::column_t& v) { return mul(x,v); } \
+SIMD_CALL type operator-(const type& x, const type& y) { return sub(x,y); } \
+SIMD_CALL type operator+(const type& x, const type& y) { return add(x,y); } \
+SIMD_CALL type operator*(const type& x, const type& y) { return mul(x,y); } \
+SIMD_CALL type::column_t operator*(const type::column_t& v, const type& y) { return mul(v,y); } \
+SIMD_CALL type::column_t operator*(const type& x, const type::column_t& v) { return mul(x,v); } \
 
 //-----------------------------------
 

From ad83c31bdbe31fa072b6359f8ad275d7bbf9f69f Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 29 Sep 2024 12:00:01 -0700
Subject: [PATCH 741/901] kram - simd - reorder calls, prep for splitting

---
 libkram/vectormath/vectormath++.cpp | 169 +++++---
 libkram/vectormath/vectormath++.h   | 616 ++++++++++++++++------------
 2 files changed, 459 insertions(+), 326 deletions(-)

diff --git a/libkram/vectormath/vectormath++.cpp b/libkram/vectormath/vectormath++.cpp
index 62d5a51e..4081097a 100644
--- a/libkram/vectormath/vectormath++.cpp
+++ b/libkram/vectormath/vectormath++.cpp
@@ -7,6 +7,13 @@
 
 // Tests with godbolt are here to show code comparsions with optimizations.
 
+//-----------------
+// clang version matters to codegen.
+// These two version seem to be significant changes in output.
+//
+// v14
+// v16
+//
 // -Og can't unroll small loops for some reason. -O2 and -O3 do.
 // https://godbolt.org/z/KMPa8bchb
 //
@@ -16,22 +23,10 @@
 // optimized quake rcp, rsqrt, sqrt
 // https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
 //
-// TODO: Fabian on fp16 <-> fp32
-// Not the right gist, you want the RTNE one (nm: that only matters for float->half,
-// this was the half->float one. FWIW, other dir is https://gist.github.com/rygorous/eb3a019b99fdaa9c3064.
-// These days I use a variant of the RTNE/RN version that also preserves NaN payload bits,
-// which is slightly more ops but matches hardware conversions exactly for every input, including all NaNs.
-//
-// DONE: bring over fast inverses (RTS, RTU, etc)
-// DONE: need translation, rotation, scale
-// TODO: need fast post-translation, post-rotation, post-scale
-//
-// TODO: saturating conversions would be useful to, and prevent overflow
-// see the conversion.h code, bit select to clamp values.
-//
-// TODO: matrix_types.h has a type_traits with rows, cols, etc.
-// can call get_traits() on them.  See matrix.h for all the ops.
-
+// ---------------
+// Note: float4a.h has a rcp and rsqrt ops, but they are approximate.
+// Have real div and sqrt ops now.
+// ---------------
 // The storage of affine data in a column matrix is no different than rows
 // translation is in (r30 r31 r32) or in (c30, c31 c32)
 //
@@ -49,23 +44,72 @@
 // col: TRS * TRS * v   cameraToWorldTfm * worldToModelTfm * ..
 // row: v * SRT * SRT   modelToWorldTfm * worldToCameraTfm * ...
 //
-// TODO: need natvis and lldb formatting of math classes.
+// ---------------
+// So there are currently 6 version of the Accelerate lib.
+// This library hides implementations of some of the calls.
+// So need to rely on a version of the lib to get them,
+// or supply some alternative.  Many calls have fallbacks.
 //
+// 6: macOS 15.0, iOS 18.0
+// 5: macOS 13.0, iOS 16.0
+// 4: macOS 12.0, iOS 15.0
+// 0: header only
+//
+// use 5 for macOS
+// SIMD_LIBRARY_VERSION >= 5
+//
+// use 4 for iOS
+// SIMD_LIBRARY_VERSION >= 4
+//
+//-----------------
+//
+// TODO: rename README, and name of .cpp/h (simdk?)
+// TODO: build an optimized library that is a clang module
+// TODO: split up files into types, float ops, double ops
+// TODO: limit !SIMD_FLOAT_EXT to only 32B vector types?  Have 64B vecs.
+//
+// TODO: ryg on fp16 <-> fp32
+// Not the right gist, you want the RTNE one (nm: that only matters for float->half,
+// this was the half->float one. FWIW, other dir is https://gist.github.com/rygorous/eb3a019b99fdaa9c3064.
+// These days I use a variant of the RTNE/RN version that also preserves NaN payload bits,
+// which is slightly more ops but matches hardware conversions exactly for every input, including all NaNs.
+//
+// TODO: ryg on 32B ops on AVX systems
+//   These often only have 16B simd units, so running 32B ops isn't efficient.
+//   This could apply say to PS4 or other Athlon chips too.
+//
+// DONE: bring over fast inverses (RTS, RTU, etc)
+// DONE: need translation, rotation, scale
+// TODO: need fast post-translation, post-rotation, post-scale
+// TODO: need euler <-> matrix
 // TODO: here's a decomp
 // https://github.com/erich666/GraphicsGems/blob/master/gemsii/unmatrix.c
+//
+// TODO: saturating conversions would be useful to, and prevent overflow
+// see the conversion.h code, bit select to clamp values.
+//
+// TODO: matrix_types.h has a type_traits with rows, cols, etc.
+// can call get_traits() on them.  See matrix.h for all the ops.
+//
+// TODO: need natvis and lldb formatting of math classes.
+//
+// TODO: add optimized vec2 ops on Neon, but may not be worth kernal mods
+
 
 //-----------------
 
-// TODO: profvide controls over this
+// TODO: provide controls over fast math vs. acclerate vs. func calls
 #ifdef __APPLE__
-#define SIMD_ACCELERATE_LIB 1
-#define SIMD_FAST_MATH      0
+#define SIMD_ACCELERATE_MATH 1
+#define SIMD_FAST_MATH       0
+#define SIMD_CMATH_MATH      0
 #else
-#define SIMD_ACCELERATE_LIB 0
-#define SIMD_FAST_MATH      0
+#define SIMD_ACCELERATE_MATH 0
+#define SIMD_FAST_MATH       0
+#define SIMD_CMATH_MATH      1
 #endif
 
-#if SIMD_ACCELERATE_LIB
+#if SIMD_ACCELERATE_MATH
 // TODO: reduce this header to just calls use (f.e. geometry, etc)
 #include <simd/simd.h>
 #elif SIMD_FAST_MATH
@@ -99,26 +143,10 @@ namespace SIMD_NAMESPACE {
 //    return s / c;
 //}
 
-#if SIMD_ACCELERATE_LIB
+#if SIMD_ACCELERATE_MATH
 
 //---------------------------
 // Use existing Accelerate lib.
-//
-// So there are currently 6 version of the Accelerate lib.
-// This library hides implmenentations of some of the calls.
-// So need to rely on a version of the lib to get them,
-// or supply some alternative.
-//
-// 6: macOS 15.0, iOS 18.0
-// 5: macOS 13.0, iOS 16.0
-// 4: macOS 12.0, iOS 15.0
-// 0: header only
-//
-// use 5 for macOS
-// SIMD_LIBRARY_VERSION >= 5
-//
-// use 4 for iOS
-// SIMD_LIBRARY_VERSION >= 4
 
 // remap simdk to simd namespace
 #define macroVectorRepeatFnImpl(type, cppfunc) \
@@ -136,7 +164,9 @@ macroVectorRepeatFnImpl(float, sin)
 macroVectorRepeatFnImpl(float, cos)
 macroVectorRepeatFnImpl(float, tan)
 
-#endif
+#endif // SIMD_FLOAT
+
+//---------------------
 
 #if SIMD_DOUBLE
 
@@ -148,12 +178,13 @@ macroVectorRepeatFnImpl(double, sin)
 macroVectorRepeatFnImpl(double, cos)
 macroVectorRepeatFnImpl(double, tan)
 
-#endif
-
-#elif !SIMD_FAST_MATH
+#endif // SIMD_DOUBLE
+#endif // SIMD_ACCELERATE_MATH
 
 //---------------------------
 
+#if SIMD_CMATH_MATH
+
 // This calls function repeatedly, then returns as vector.
 // These don't call to the 4 version since it's so much more work.
 #define macroVectorRepeatFnImpl(type, cppfunc, func) \
@@ -183,15 +214,16 @@ macroVectorRepeatFnImpl(double, cos, ::cos)
 macroVectorRepeatFnImpl(double, tan, ::tan)
 
 #endif // SIMD_DOUBLE
+#endif // SIMD_CMATH_MATH
 
-#endif
-
-// Which cmath had this
+// Wish cmath had this
 inline void sincosf(float angleInRadians, float& s, float& c) {
     s = sinf(angleInRadians);
     c = cosf(angleInRadians);
 }
 
+#if SIMD_FLOAT
+
 // These aren't embedded in function, so may have pre-init ordering issues.
 // or could add pre-init order to skip using functions.
 // Expose these through function calls as const&
@@ -631,6 +663,39 @@ bool equal(const float4x4& x, const float4x4& y) {
                x[3] == y[3]);
 }
 
+// equal_abs
+bool equal_abs(const float2x2& x, const float2x2& y, float tol) {
+    return all((abs(x[0] - y[0]) <= tol) &
+               (abs(x[1] - y[1]) <= tol));
+}
+bool equal_abs(const float3x3& x, const float3x3& y, float tol) {
+    return all((abs(x[0] - y[0]) <= tol) &
+               (abs(x[1] - y[1]) <= tol) &
+               (abs(x[2] - y[2]) <= tol));
+}
+bool equal_abs(const float4x4& x, const float4x4& y, float tol) {
+    return all((abs(x[0] - y[0]) <= tol) &
+               (abs(x[1] - y[1]) <= tol) &
+               (abs(x[2] - y[2]) <= tol) &
+               (abs(x[3] - y[3]) <= tol));
+}
+
+// equal_rel
+bool equal_rel(const float2x2& x, const float2x2& y, float tol) {
+    return all((abs(x[0] - y[0]) <= tol * abs(x[0])) &
+               (abs(x[1] - y[1]) <= tol * abs(x[1])));
+}
+bool equal_rel(const float3x3& x, const float3x3& y, float tol) {
+    return all((abs(x[0] - y[0]) <= tol * abs(x[0])) &
+               (abs(x[1] - y[1]) <= tol * abs(x[1])) &
+               (abs(x[2] - y[2]) <= tol * abs(x[2])));
+}
+bool equal_rel(const float4x4& x, const float4x4& y, float tol) {
+    return all((abs(x[0] - y[0]) <= tol * abs(x[0])) &
+               (abs(x[1] - y[1]) <= tol * abs(x[1])) &
+               (abs(x[2] - y[2]) <= tol * abs(x[2])) &
+               (abs(x[3] - y[3]) <= tol * abs(x[3])));
+}
 
 
 //---------------------------
@@ -697,7 +762,7 @@ const float3x4& float3x4::identity() { return kfloat3x4_identity; }
 const float4x4& float4x4::zero() { return kfloat4x4_zero; }
 const float4x4& float4x4::identity() { return kfloat4x4_identity; }
 
-
+#endif // SIMD_FLOAT
 
 #if SIMD_FLOAT
 
@@ -767,8 +832,8 @@ string vecf::simd_configs() const {
     FMT_CONFIG(SIMD_RENAME_TO_SIMD_NAMESPACE);
     FMT_CONFIG(SIMD_HALF_FLOAT16);
     
-    FMT_CONFIG(SIMD_ACCELERATE_LIB);
-#if SIMD_ACCELERATE_LIB
+    FMT_CONFIG(SIMD_ACCELERATE_MATH);
+#if SIMD_ACCELERATE_MATH
     FMT_CONFIG(SIMD_LIBRARY_VERSION);
 #endif
     
@@ -1253,8 +1318,6 @@ float4x4 float4x4m(char axis, float angleInRadians)
 
 } // namespace SIMD_NAMESPACE
 
-#endif
+#endif // USE_SIMDLIB
 
-// Note: float4a.h has a rcp and rsqrt ops, but they are approximate.
-// Have real div and sqrt ops now.
 
diff --git a/libkram/vectormath/vectormath++.h b/libkram/vectormath/vectormath++.h
index 3e504f19..f438d259 100644
--- a/libkram/vectormath/vectormath++.h
+++ b/libkram/vectormath/vectormath++.h
@@ -74,9 +74,9 @@
 //   and likely not on arm64 gpus.
 //
 // x64 -> arm64 emulators
-// Prism supports SSE4.2, no fma, no f16c
+// Prism   supports SSE4.2, no fma, no f16c
 // Rosetta supports SSE4.2, no fma, no f16c
-// Rosetta was updated to AVX2 support
+// Rosetta supports AVX2 (macOS 15.0)
 //
 
 //-----------------------------------
@@ -145,6 +145,13 @@
 // op *=, +=, -=, /= mods the calling object, so can't be const
 #define SIMD_CALL_OP static inline __attribute__((__always_inline__,__nodebug__))
 
+// includes type1 simdk::log(float1)
+#define macroVectorRepeatFnDecl(type, cppfun) \
+type##1 cppfunc(type##1 x); \
+type##2 cppfunc(type##2 x); \
+type##3 cppfunc(type##3 x); \
+type##4 cppfunc(type##4 x); \
+
 //------------
 
 // aligned
@@ -167,7 +174,7 @@ typedef __attribute__((__ext_vector_type__(8),__aligned__(1)))  type name##8p; \
 typedef __attribute__((__ext_vector_type__(16),__aligned__(1))) type name##16p; \
 typedef __attribute__((__ext_vector_type__(32),__aligned__(1))) type name##32p; \
 
-// cpp rename for half, u/short
+// cpp rename for u/char
 #define macroVector1TypesStorageRenames(cname, cppname) \
 typedef ::cname##1s cppname##1; \
 typedef ::cname##2s cppname##2; \
@@ -488,7 +495,8 @@ macroVector8TypesStorageRenames(double, double)
 
 #if SIMD_FLOAT && SIMD_NEON
 
-// These are the only 2 ops on Neon
+// TODO: expose float2 ops on Neon.
+
 SIMD_CALL float reduce_min(float4 x) {
     return vminvq_f32(x);
 }
@@ -624,6 +632,8 @@ SIMD_CALL bool all(int4 x) {
 
 #endif // SIMD_INT && SIMD_NEON
 
+#if SIMD_FLOAT
+
 // SSE4.1
 SIMD_CALL float4 round(float4 vv) {
     return _mm_round_ps(vv, 0x8);  // round to nearest | exc
@@ -635,28 +645,11 @@ SIMD_CALL float4 floor(float4 vv) {
     return _mm_floor_ps(vv);
 }
 
-// end of implementation
-//-----------------------------------
-
-#if SIMD_FLOAT
-
-// zeroext - internal helper
-SIMD_CALL float4 zeroext(float2 x) {
-    return (float4){x.x,x.y,0,0};
-}
-SIMD_CALL float4 zeroext(float3 x) {
-    return (float4){x.x,x.y,x.z,0};
-}
+#endif // SIMD_FLOAT
 
-// any
-SIMD_CALL bool any(int3 x) {
-    return any(vec3to4(x));
-}
-SIMD_CALL bool all(int3 x) {
-    return all(vec3to4(x));
-}
 
-#endif
+// end of implementation
+//-----------------------------------
 
 // bit select
 #if SIMD_INT
@@ -673,17 +666,18 @@ SIMD_CALL int4 bitselect(int4 x, int4 y, int4 mask) {
 }
 #endif // SIMD_INT
 
-#if SIMD_INT && SIMD_FLOAT
+#if SIMD_FLOAT
+#if SIMD_INT // && SIMD_FLOAT
 
 // bitselect
 SIMD_CALL float2 bitselect(float2 x, float2 y, int2 mask) {
-    return (float2)bitselect((int2)x, (int2)y, mask); // int4 -> float2
+    return (float2)bitselect((int2)x, (int2)y, mask);
 }
 SIMD_CALL float3 bitselect(float3 x, float3 y, int3 mask) {
-    return (float3)bitselect((int3)x, (int3)y, mask); // int4 -> float3
+    return (float3)bitselect((int3)x, (int3)y, mask);
 }
 SIMD_CALL float4 bitselect(float4 x, float4 y, int4 mask) {
-    return (float4)bitselect((int4)x, (int4)y, mask);  // int4 -> float4
+    return (float4)bitselect((int4)x, (int4)y, mask);
 }
 
 // select
@@ -697,10 +691,23 @@ SIMD_CALL float4 select(float4 x, float4 y, int4 mask) {
     return bitselect(x, y, mask >> 31);
 }
 
-#endif // SIMD_INT && SIMD_FLOAT
+#endif // SIMD_INT // && SIMD_FLOAT
 
-#if SIMD_FLOAT
+// zeroext - internal helper
+SIMD_CALL float4 zeroext(float2 x) {
+    return (float4){x.x,x.y,0,0};
+}
+SIMD_CALL float4 zeroext(float3 x) {
+    return (float4){x.x,x.y,x.z,0};
+}
 
+// any
+SIMD_CALL bool any(int3 x) {
+    return any(vec3to4(x));
+}
+SIMD_CALL bool all(int3 x) {
+    return all(vec3to4(x));
+}
 
 // min
 SIMD_CALL float2 min(float2 x, float2 y) {
@@ -895,6 +902,7 @@ SIMD_CALL float4 abs(float4 x) {
     return bitselect(0.0, x, 0x7fffffff);
 }
 
+// cross
 SIMD_CALL float cross(float2 x, float2 y) {
     return x.x * y.y - x.y * y.x;
 }
@@ -902,8 +910,65 @@ SIMD_CALL float3 cross(float3 x, float3 y) {
     return x.yzx * y.zxy - x.zxy * y.yzx;
 }
 
-// TODO: equal_abs, equal_rel
-// TODO: step, smoothstep, fract
+// equal_abs
+SIMD_CALL bool equal_abs(float2 x, float2 y, float tol) {
+    return all((abs(x - y) <= tol));
+}
+SIMD_CALL bool equal_abs(float3 x, float3 y, float tol) {
+    return all((abs(x - y) <= tol));
+}
+SIMD_CALL bool equal_abs(float4 x, float4 y, float tol) {
+    return all((abs(x - y) <= tol));
+}
+
+// equal_rel
+SIMD_CALL bool equal_rel(float2 x, float2 y, float tol) {
+    return all((abs(x - y) <= tol * ::abs(x.x)));
+}
+SIMD_CALL bool equal_rel(float3 x, float3 y, float tol) {
+    return all((abs(x - y) <= tol * ::abs(x.x)));
+}
+SIMD_CALL bool equal_rel(float4 x, float4 y, float tol) {
+    return all((abs(x - y) <= tol * ::abs(x.x)));
+}
+
+// step
+SIMD_CALL float2 step(float2 edge, float2 x) {
+    return bitselect((float2)1, 0, x < edge);
+}
+SIMD_CALL float3 step(float3 edge, float3 x) {
+    return bitselect((float3)1, 0, x < edge);
+}
+SIMD_CALL float4 step(float4 edge, float4 x) {
+    return bitselect((float4)1, 0, x < edge);
+}
+
+// smoothstep
+SIMD_CALL float2 smoothstep(float2 edge0, float2 edge1, float2 x) {
+    float2 t = saturate((x-edge0)/(edge0-edge1));
+    return t*t*(3 - 2*t);
+}
+SIMD_CALL float3 smoothstep(float3 edge0, float3 edge1, float3 x) {
+    float3 t = saturate((x-edge0)/(edge0-edge1));
+    return t*t*(3 - 2*t);
+}
+SIMD_CALL float4 smoothstep(float4 edge0, float4 edge1, float4 x) {
+    float4 t = saturate((x-edge0)/(edge0-edge1));
+    return t*t*(3 - 2*t);
+}
+
+// fract
+SIMD_CALL float2 fract(float2 x) {
+    return min(x - floor(x), 0x1.fffffep-1f);
+}
+
+SIMD_CALL float3 fract(float3 x) {
+    return min(x - floor(x), 0x1.fffffep-1f);
+}
+
+SIMD_CALL float4 fract(float4 x) {
+    return min(x - floor(x), 0x1.fffffep-1f);
+}
 
 
 #if SIMD_FLOAT_EXT
@@ -971,6 +1036,106 @@ SIMD_CALL float normalize(float16 x) {
 
 #endif // SIMD_FLOAT_EXT
 
+// make "m" ctors for vecs.  This avoids wrapping the type in a struct.
+// vector types are C typedef, and so cannot have member functions.
+// Be careful with initializers = { val }, only sets first element of vector
+// and not all the values.  Use = val; or one of the calls below to be safe.
+
+SIMD_CALL float2 float2m(float x) {
+    return x;
+}
+SIMD_CALL float2 float2m(float x, float y) {
+    return {x,y};
+}
+
+SIMD_CALL float3 float3m(float x) {
+    return x;
+}
+SIMD_CALL float3 float3m(float x, float y, float z) {
+    return {x,y,z};
+}
+SIMD_CALL float3 float3m(float2 v, float z) {
+    float3 r; r.xy = v; r.z = z; return r;
+}
+
+SIMD_CALL float4 float4m(float x) {
+    return x;
+}
+SIMD_CALL float4 float4m(float2 xy, float2 zw) {
+    float4 r; r.xy = xy; r.zw = zw; return r;
+}
+SIMD_CALL float4 float4m(float x, float y, float z, float w = 1.0f) {
+    return {x,y,z,w};
+}
+SIMD_CALL float4 float4m(float3 v, float w = 1.0f) {
+    float4 r; r.xyz = v; r.w = w; return r;
+}
+
+// fast conversions where possible
+SIMD_CALL const float3& as_float3(const float4& m) {
+    return reinterpret_cast<const float3&>(m);
+}
+
+// power series
+macroVectorRepeatFnDecl(float, log)
+macroVectorRepeatFnDecl(float, exp)
+//macroVectorRepeatFnDecl(float, pow) takes 2 args
+
+// trig
+macroVectorRepeatFnDecl(float, cos)
+macroVectorRepeatFnDecl(float, sin)
+macroVectorRepeatFnDecl(float, tan)
+
+// TODO: add more math ops
+
+// conversions
+#if SIMD_INT // && SIMD_FLOAT
+SIMD_CALL float2 float2m(int2 x) { return __builtin_convertvector(x, float2); }
+SIMD_CALL float3 float3m(int3 x) { return __builtin_convertvector(x, float3); }
+SIMD_CALL float4 float4m(int4 x) { return __builtin_convertvector(x, float4); }
+
+SIMD_CALL int2 float2m(float2 x) { return __builtin_convertvector(x, int2); }
+SIMD_CALL int3 float3m(float3 x) { return __builtin_convertvector(x, int3); }
+SIMD_CALL int4 float4m(float4 x) { return __builtin_convertvector(x, int4); }
+
+#endif
+
+#if SIMD_HALF // && SIMD_FLOAT
+
+#if SIMD_HALF4_ONLY
+
+half4 half4m(float4 );
+SIMD_CALL half2 half2m(float2 x) { return vec4to2(half4m(vec2to4(x))); }
+SIMD_CALL half3 half3m(float3 x) { return vec4to3(half4m(vec3to4(x))); }
+
+float4 float4m(half4 );
+SIMD_CALL float2 float2m(half2 x) { return vec4to2(float4m(vec2to4(x))); }
+SIMD_CALL float3 float3m(half3 x) { return vec4to3(float4m(vec3to4(x))); }
+
+#else
+SIMD_CALL float2 float2m(half2 x) { return __builtin_convertvector(x, float2); }
+SIMD_CALL float3 float3m(half3 x) { return __builtin_convertvector(x, float3); }
+SIMD_CALL float4 float4m(half4 x) { return __builtin_convertvector(x, float4); }
+
+SIMD_CALL half2 half2m(float2 x) { return __builtin_convertvector(x, half2); }
+SIMD_CALL half3 half3m(float3 x) { return __builtin_convertvector(x, half3); }
+SIMD_CALL half4 half4m(float4 x) { return __builtin_convertvector(x, half4); }
+#endif
+
+#endif
+
+#if SIMD_DOUBLE // && SIMD_FLOAT
+SIMD_CALL double2 double2m(float2 x) { return __builtin_convertvector(x, double2); }
+SIMD_CALL double3 double3m(float3 x) { return __builtin_convertvector(x, double3); }
+SIMD_CALL double4 double4m(float4 x) { return __builtin_convertvector(x, double4); }
+
+SIMD_CALL float2 float2m(double2 x) { return __builtin_convertvector(x, float2); }
+SIMD_CALL float3 float3m(double3 x) { return __builtin_convertvector(x, float3); }
+SIMD_CALL float4 float4m(double4 x) { return __builtin_convertvector(x, float4); }
+#endif
+
+//----
+
 // TODO: better way to name these, can there be float2::zero()
 // also could maybe use that for fake vector ctors.
 
@@ -1015,13 +1180,9 @@ const float4& float4_negxw();
 const float4& float4_negyw();
 const float4& float4_negzw();
 
-#endif
-
 //-----------------------------------
 // matrix
 
-#if SIMD_FLOAT
-
 // column matrix, so postmul vectors
 // (projToCamera * cameraToWorld * worldToModel) * modelVec
 
@@ -1114,6 +1275,7 @@ struct float4x4 : float4x4s
     const float4& operator[](uint32_t idx) const { return columns[idx]; }
 };
 
+// set diangonal and rest to 0
 float2x2 diagonal_matrix(float2 x);
 float3x3 diagonal_matrix(float3 x);
 float3x4 diagonal_matrix3x4(float3 x);
@@ -1124,6 +1286,7 @@ float2x2 transpose(const float2x2& x);
 float3x3 transpose(const float3x3& x);
 float4x4 transpose(const float4x4& x);
 
+// general inverses - faster ones for trs
 float2x2 inverse(const float2x2& x);
 float3x3 inverse(const float3x3& x);
 float4x4 inverse(const float4x4& x);
@@ -1132,36 +1295,51 @@ float determinant(const float2x2& x);
 float determinant(const float3x3& x);
 float determinant(const float4x4& x);
 
+// diagonal sum
 float trace(const float2x2& x);
 float trace(const float3x3& x);
 float trace(const float4x4& x);
 
-// premul = dot + premul
-float2 mul(float2 y, const float2x2& x);
-float3 mul(float3 y, const float3x3& x);
-float4 mul(float4 y, const float4x4& x);
-
-// posmul = mul + mad
+// m * m
 float2x2 mul(const float2x2& x, const float2x2& y);
 float3x3 mul(const float3x3& x, const float3x3& y);
 float4x4 mul(const float4x4& x, const float4x4& y);
 
+// vrow * m - premul = dot + premul
+float2 mul(float2 y, const float2x2& x);
+float3 mul(float3 y, const float3x3& x);
+float4 mul(float4 y, const float4x4& x);
+
+// m * vcol - postmul = mul + mad (prefer this)
 float2 mul(const float2x2& x, float2 y);
 float3 mul(const float3x3& x, float3 y);
 float4 mul(const float4x4& x, float4 y);
 
+// sub
 float2x2 sub(const float2x2& x, const float2x2& y);
 float3x3 sub(const float3x3& x, const float3x3& y);
 float4x4 sub(const float4x4& x, const float4x4& y);
 
+// add
 float2x2 add(const float2x2& x, const float2x2& y);
 float3x3 add(const float3x3& x, const float3x3& y);
 float4x4 add(const float4x4& x, const float4x4& y);
 
+// equal
 bool equal(const float2x2& x, const float2x2& y);
 bool equal(const float3x3& x, const float3x3& y);
 bool equal(const float4x4& x, const float4x4& y);
 
+// equal_abs
+bool equal_abs(const float2x2& x, const float2x2& y, float tol);
+bool equal_abs(const float3x3& x, const float3x3& y, float tol);
+bool equal_abs(const float4x4& x, const float4x4& y, float tol);
+
+// equal_rel
+bool equal_rel(const float2x2& x, const float2x2& y, float tol);
+bool equal_rel(const float3x3& x, const float3x3& y, float tol);
+bool equal_rel(const float4x4& x, const float4x4& y, float tol);
+
 // TODO: these think they are all member functions
 
 // operators for C++
@@ -1176,112 +1354,87 @@ SIMD_CALL const float3x3& as_float3x3(const float4x4& m) {
     return reinterpret_cast<const float3x3&>(m);
 }
 
-#endif // SIMD_FLOAT
-
-//----------------
+//-----------------------
+// quat
 
-#if SIMD_FLOAT
-// make "m" ctors for vecs.  This avoids wrapping the type in a struct.
-// vector types are C typedef, and so cannot have member functions.
-// Be careful with initializers = { val }, only sets first element of vector
-// and not all the values.  Use = val; or one of the calls below to be safe.
+// Only need a fp32 quat.  double/half are pretty worthless.
+struct quatf {
+    // TODO: should all ctor be SIMD_CALL ?
+    quatf() : v{0.0f,0.0f,0.0f,1.0f} {}
+    quatf(float x, float y, float z, float w) : v{x,y,z,w} {}
+    quatf(float3 vv, float angle);
+    explicit quatf(float4 vv): v(vv) {}
+    
+    static const quatf& zero();
+    static const quatf& identity();
+    
+    float4 v;
+};
 
-SIMD_CALL float2 float2m(float x) {
-    return x;
-}
-SIMD_CALL float2 float2m(float x, float y) {
-    return {x,y};
+SIMD_CALL float3 operator*(quatf q, float3 v) {
+    float4 qv = q.v;
+    float3 t = qv.w * cross(qv.xyz, v);
+    return v + 2.0f * t + cross(q.v.xyz, t);
 }
 
-SIMD_CALL float3 float3m(float x) {
-    return x; // TODO: does this go to _mm_set1_ps(x)
-}
-SIMD_CALL float3 float3m(float x, float y, float z) {
-    return {x,y,z}; // _mm_setr_ps ?
-}
-SIMD_CALL float3 float3m(float2 v, float z) {
-    float3 r; r.xy = v; r.z = z; return r;
-}
+float4x4 float4x4m(quatf q);
 
-SIMD_CALL float4 float4m(float x) {
-    return x; // TODO: does this go to _mm_set1_ps(x)
-}
-SIMD_CALL float4 float4m(float2 xy, float2 zw) {
-    return {xy.x,xy.y,zw.x,zw.y};
-}
-SIMD_CALL float4 float4m(float x, float y, float z, float w = 1.0f) {
-    return {x,y,z,w}; // _mm_setr_ps ?
-}
-SIMD_CALL float4 float4m(float3 v, float w = 1.0f) {
-    float4 r; r.xyz = v; r.w = w; return r;
-}
+// how many quatf ops are needed?
+// TODO: need matrix into quatf
+// TDOO: need shortest arc correction (dot(q0.v, q1.v) < 0) negate
+// TODO: need negate (or conjuagate?)
+// TODO: what about math ops
 
-// fast conversions where possible
-SIMD_CALL const float3& as_float3(const float4& m) {
-    return reinterpret_cast<const float3&>(m);
+SIMD_CALL quatf lerp(quatf q0, quatf q1, float t) {
+    if (dot(q0.v, q1.v) < 0.0f)
+        q1.v.xyz = -q1.v.xyz;
+    
+    float4 v = lerp(q0.v, q1.v, t);
+    return quatf(v);
 }
+quatf slerp(quatf q0, quatf q1, float t);
 
-#endif // SIMD_FLOAT
+void quat_bezier_cp(quatf q0, quatf q1, quatf q2, quatf q3,
+                    quatf& a1, quatf& b2);
+quatf quat_bezer_lerp(quatf a, quatf b, quatf c, quatf d, float t);
+quatf quat_bezer_slerp(quatf a, quatf b, quatf c, quatf d, float t);
 
-#if SIMD_INT
-SIMD_CALL int2 int2m(int x) {
-    return x;
-}
-SIMD_CALL int2 int2m(int x, int y) {
-    return {x,y};
-}
+quatf inverse(quatf q);
 
-SIMD_CALL int3 int3m(int x) {
-    return x;
-}
-SIMD_CALL int3 int3m(int x, int y, int z) {
-    return {x,y,z};
+SIMD_CALL quatf normalize(quatf q) {
+    return quatf(normalize(q.v));
 }
 
-SIMD_CALL int4 int4m(int x) {
-    return x;
-}
-SIMD_CALL int4 int4m(int2 xy, int2 zw) {
-    return {xy.x,xy.y,zw.x,zw.y};
-}
-SIMD_CALL int4 int4m(int x, int y, int z, int w) {
-    return {x,y,z,w};
-}
-SIMD_CALL int4 int4m(int3 v, float w) {
-    int4 r; r.xyz = v; r.w = w; return r;
-}
-#endif
+//----------------
+// affine and convenience ctors
 
-#if SIMD_HALF
-SIMD_CALL half2 half2m(half x) {
-    return x;
-}
-SIMD_CALL half2 half2m(half x, half y) {
-    return {x,y};
-}
+// in-place affine transose
+void transpose_affine(float4x4& m);
 
-SIMD_CALL half3 half3m(half x) {
-    return x;
-}
-SIMD_CALL half3 half3m(half x, half y, half z) {
-    return {x,y,z};
-}
+// fast inverses for translate, rotate, scale
+float4x4 inverse_tr(const float4x4& mtx);
+float4x4 inverse_tru(const float4x4& mtx);
+float4x4 inverse_trs(const float4x4& mtx);
 
-SIMD_CALL half4 half4m(half x) {
-    return x;
-}
-SIMD_CALL half4 half4m(half2 xy, half2 zw) {
-    return {xy.x,xy.y,zw.x,zw.y};
-}
-SIMD_CALL half4 half4m(half x, half y, half z, half w = (half)1.0) {
-    return {x,y,z,w};
-}
-SIMD_CALL half4 half4m(half3 v, float w = (half)1.0) {
-    half4 r; r.xyz = v; r.w = w; return r;
+float4x4 float4x4m(char axis, float angleInRadians);
+
+SIMD_CALL float4x4 float4x4m(float3 axis, float angleInRadians) {
+    return float4x4m(quatf(axis, angleInRadians));
 }
-#endif
 
-#if SIMD_DOUBLE
+float3x3 float3x3m(quatf qq);
+
+float4x4 float4x4_tr(float3 t, quatf r);
+float4x4 float4x4_trs(float3 t, quatf r, float3 scale);
+float4x4 float4x4_tru(float3 t, quatf r, float scale);
+
+#endif // SIMD_FLOAT
+
+//---------------------------
+// double vec/matrix
+
+#if SIMD_DOUBLE && 0
+
 SIMD_CALL double2 double2m(double x) {
     return x;
 }
@@ -1295,12 +1448,15 @@ SIMD_CALL double3 double3m(double x) {
 SIMD_CALL double3 double3m(double x, double y, double z) {
     return {x,y,z};
 }
+SIMD_CALL double3 double3m(double2 v, float z) {
+    double3 r; r.xy = v; r.z = z; return r;
+}
 
 SIMD_CALL double4 double4m(double x) {
     return x;
 }
 SIMD_CALL double4 double4m(double2 xy, double2 zw) {
-    return {xy.x,xy.y,zw.x,zw.y};
+    double4 r; r.xy = xy; r.zw = zw; return r;
 }
 SIMD_CALL double4 double4m(double x, double y, double z, double w = 1.0) {
     return {x,y,z,w};
@@ -1308,32 +1464,6 @@ SIMD_CALL double4 double4m(double x, double y, double z, double w = 1.0) {
 SIMD_CALL double4 double4m(double3 v, double w = 1.0) {
     double4 r; r.xyz = v; r.w = w; return r;
 }
-#endif
-
-// includes type1 simdk::pow(float1)
-#define macroVectorRepeatFnDecl(type, cppfun) \
-type##1 cppfunc(type##1 x); \
-type##2 cppfunc(type##2 x); \
-type##3 cppfunc(type##3 x); \
-type##4 cppfunc(type##4 x); \
-
-#if SIMD_FLOAT
-
-// power series
-macroVectorRepeatFnDecl(float, log)
-macroVectorRepeatFnDecl(float, exp)
-//macroVectorRepeatFnDecl(float, pow) takes 2 args
-
-// trig
-macroVectorRepeatFnDecl(float, cos)
-macroVectorRepeatFnDecl(float, sin)
-macroVectorRepeatFnDecl(float, tan)
-
-// TODO: add mort math ops (sinh, ...)
-
-#endif
-
-#if SIMD_DOUBLE
 
 // power series
 macroVectorRepeatFnDecl(double, log)
@@ -1345,11 +1475,6 @@ macroVectorRepeatFnDecl(double, cos)
 macroVectorRepeatFnDecl(double, sin)
 macroVectorRepeatFnDecl(double, tan)
 
-#endif
-
-
-#if SIMD_DOUBLE && 0
-
 // TODO: would need matrix class derivations
 // and all of the matrix ops, which then need vector ops, and need double
 // constants.  So this starts to really add to codegen.  But double
@@ -1504,131 +1629,76 @@ SIMD_CALL const double3x3& as_double3x3(const double4x4& m) {
     return reinterpret_cast<const double3x3&>(m);
 }
 
-#endif
-
-// conversions
-#if SIMD_FLOAT && SIMD_INT
-SIMD_CALL float2 float2m(int2 __x) { return __builtin_convertvector(__x, float2); }
-SIMD_CALL float3 float3m(int3 __x) { return __builtin_convertvector(__x, float3); }
-SIMD_CALL float4 float4m(int4 __x) { return __builtin_convertvector(__x, float4); }
-
-SIMD_CALL int2 float2m(float2 __x) { return __builtin_convertvector(__x, int2); }
-SIMD_CALL int3 float3m(float3 __x) { return __builtin_convertvector(__x, int3); }
-SIMD_CALL int4 float4m(float4 __x) { return __builtin_convertvector(__x, int4); }
-
-#endif
-
-#if SIMD_FLOAT && SIMD_HALF
-
-#if SIMD_HALF4_ONLY
-
-half4 half4m(float4 __x);
-SIMD_CALL half2 half2m(float2 __x) { return vec4to2(half4m(vec2to4(__x))); }
-SIMD_CALL half3 half3m(float3 __x) { return vec4to3(half4m(vec3to4(__x))); }
-
-float4 float4m(half4 __x);
-SIMD_CALL float2 float2m(half2 __x) { return vec4to2(float4m(vec2to4(__x))); }
-SIMD_CALL float3 float3m(half3 __x) { return vec4to3(float4m(vec3to4(__x))); }
+#endif // SIMD_DOUBLE
 
-#else
-SIMD_CALL float2 float2m(half2 __x) { return __builtin_convertvector(__x, float2); }
-SIMD_CALL float3 float3m(half3 __x) { return __builtin_convertvector(__x, float3); }
-SIMD_CALL float4 float4m(half4 __x) { return __builtin_convertvector(__x, float4); }
+//----------------
 
-SIMD_CALL half2 half2m(float2 __x) { return __builtin_convertvector(__x, half2); }
-SIMD_CALL half3 half3m(float3 __x) { return __builtin_convertvector(__x, half3); }
-SIMD_CALL half4 half4m(float4 __x) { return __builtin_convertvector(__x, half4); }
-#endif
+#if SIMD_INT
+SIMD_CALL int2 int2m(int x) {
+    return x;
+}
+SIMD_CALL int2 int2m(int x, int y) {
+    return {x,y};
+}
 
-#endif
+SIMD_CALL int3 int3m(int x) {
+    return x;
+}
+SIMD_CALL int3 int3m(int x, int y, int z) {
+    return {x,y,z};
+}
+SIMD_CALL int3 int3m(int2 v, float z) {
+    int3 r; r.xy = v; r.z = z; return r;
+}
 
-#if SIMD_FLOAT && SIMD_DOUBLE
-SIMD_CALL double2 double2m(float2 __x) { return __builtin_convertvector(__x, double2); }
-SIMD_CALL double3 double3m(float3 __x) { return __builtin_convertvector(__x, double3); }
-SIMD_CALL double4 double4m(float4 __x) { return __builtin_convertvector(__x, double4); }
 
-SIMD_CALL float2 float2m(double2 __x) { return __builtin_convertvector(__x, float2); }
-SIMD_CALL float3 float3m(double3 __x) { return __builtin_convertvector(__x, float3); }
-SIMD_CALL float4 float4m(double4 __x) { return __builtin_convertvector(__x, float4); }
+SIMD_CALL int4 int4m(int x) {
+    return x;
+}
+SIMD_CALL int4 int4m(int2 xy, int2 zw) {
+    int4 r; r.xy = xy; r.zw = zw; return r;
+}
+SIMD_CALL int4 int4m(int x, int y, int z, int w) {
+    return {x,y,z,w};
+}
+SIMD_CALL int4 int4m(int3 v, float w) {
+    int4 r; r.xyz = v; r.w = w; return r;
+}
 #endif
 
-#if SIMD_FLOAT
-//typedef float4s quatfs;
-
-// Only need a float quat.  double/half are pretty worthless.
-struct quatf {
-    // TODO: should all ctor be SIMD_CALL ?
-    quatf() : v{0.0f,0.0f,0.0f,1.0f} {}
-    quatf(float x, float y, float z, float w) : v{x,y,z,w} {}
-    quatf(float3 vv, float angle);
-    explicit quatf(float4 vv): v(vv) {}
-    
-    static const quatf& zero();
-    static const quatf& identity();
-    
-    float4 v;
-};
-
-SIMD_CALL float3 operator*(quatf q, float3 v) {
-    float4 qv = q.v;
-    float3 t = qv.w * cross(qv.xyz, v);
-    return v + 2.0f * t + cross(q.v.xyz, t);
+#if SIMD_HALF
+SIMD_CALL half2 half2m(half x) {
+    return x;
 }
-
-float4x4 float4x4m(quatf q);
-
-// how many quatf ops are needed?
-// TODO: need matrix into quatf
-// TDOO: need shortest arc correction (dot(q0.v, q1.v) < 0) negate
-// TODO: need negate (or conjuagate?)
-// TODO: what about math ops
-
-SIMD_CALL quatf lerp(quatf q0, quatf q1, float t) {
-    if (dot(q0.v, q1.v) < 0.0f)
-        q1.v.xyz = -q1.v.xyz;
-    
-    float4 v = lerp(q0.v, q1.v, t);
-    return quatf(v);
+SIMD_CALL half2 half2m(half x, half y) {
+    return {x,y};
 }
-quatf slerp(quatf q0, quatf q1, float t);
-
-void quat_bezier_cp(quatf q0, quatf q1, quatf q2, quatf q3,
-                    quatf& a1, quatf& b2);
-quatf quat_bezer_lerp(quatf a, quatf b, quatf c, quatf d, float t);
-quatf quat_bezer_slerp(quatf a, quatf b, quatf c, quatf d, float t);
-
-quatf inverse(quatf q);
 
-SIMD_CALL quatf normalize(quatf q) {
-    return quatf(normalize(q.v));
+SIMD_CALL half3 half3m(half x) {
+    return x;
 }
-
-#endif // SIMD_FLOAT
-
-#if SIMD_FLOAT
-
-// in-place affine transose
-void transpose_affine(float4x4& m);
-
-// fast inverses for translate, rotate, scale
-float4x4 inverse_tr(const float4x4& mtx);
-float4x4 inverse_tru(const float4x4& mtx);
-float4x4 inverse_trs(const float4x4& mtx);
-
-// affine and convenience ctors
-float4x4 float4x4m(char axis, float angleInRadians);
-
-SIMD_CALL float4x4 float4x4m(float3 axis, float angleInRadians) {
-    return float4x4m(quatf(axis, angleInRadians));
+SIMD_CALL half3 half3m(half x, half y, half z) {
+    return {x,y,z};
+}
+SIMD_CALL half3 half3m(half2 v, float z) {
+    half3 r; r.xy = v; r.z = z; return r;
 }
 
-float3x3 float3x3m(quatf qq);
+SIMD_CALL half4 half4m(half x) {
+    return x;
+}
+SIMD_CALL half4 half4m(half2 xy, half2 zw) {
+    half4 r; r.xy = xy; r.zw = zw; return r;
+}
+SIMD_CALL half4 half4m(half x, half y, half z, half w = (half)1.0) {
+    return {x,y,z,w};
+}
+SIMD_CALL half4 half4m(half3 v, float w = (half)1.0) {
+    half4 r; r.xyz = v; r.w = w; return r;
+}
+#endif
 
-float4x4 float4x4_tr(float3 t, quatf r);
-float4x4 float4x4_trs(float3 t, quatf r, float3 scale);
-float4x4 float4x4_tru(float3 t, quatf r, float scale);
 
-#endif // SIMD_FLOAT
 
 
 using namespace STL_NAMESPACE;

From 6a73142a0f70a89d078b3f69118867a14b335b71 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 29 Sep 2024 14:54:35 -0700
Subject: [PATCH 742/901] kram - simd - more cleanup, add m3x4 ops, more
 config, add pow/log/sin/cos/tan

---
 libkram/vectormath/sse_mathfun.h    |  10 +-
 libkram/vectormath/vectormath++.cpp |  77 ++++++++++----
 libkram/vectormath/vectormath++.h   | 150 ++++++++++++++++++++--------
 3 files changed, 170 insertions(+), 67 deletions(-)

diff --git a/libkram/vectormath/sse_mathfun.h b/libkram/vectormath/sse_mathfun.h
index ede53e55..7be46dcd 100644
--- a/libkram/vectormath/sse_mathfun.h
+++ b/libkram/vectormath/sse_mathfun.h
@@ -29,12 +29,14 @@
   (this is the zlib license)
 */
 
-// TODO: may want to rename to sse_mathfun.cpp, since this is
-// a lot of code to inline.
+// Mods to this;
+// kram - Copyright 2020-2024 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
+
+// TODO: may want to rename to sse_mathfun.cpp
 // TODO: use math ops and simd ops here and let compiler gen intrinsics?
 // TODO: combine the constants into fewer registers, reference .x,..
-// TODO: have precise version with 2/3/4 function calls to math or simd lib
-// these aproximations may not be good enough
 
 #pragma once
 
diff --git a/libkram/vectormath/vectormath++.cpp b/libkram/vectormath/vectormath++.cpp
index 4081097a..90cede59 100644
--- a/libkram/vectormath/vectormath++.cpp
+++ b/libkram/vectormath/vectormath++.cpp
@@ -11,8 +11,9 @@
 // clang version matters to codegen.
 // These two version seem to be significant changes in output.
 //
-// v14
-// v16
+// v14 fma
+// v16 better fma
+// v18 Intel APX support
 //
 // -Og can't unroll small loops for some reason. -O2 and -O3 do.
 // https://godbolt.org/z/KMPa8bchb
@@ -76,7 +77,7 @@
 //
 // TODO: ryg on 32B ops on AVX systems
 //   These often only have 16B simd units, so running 32B ops isn't efficient.
-//   This could apply say to PS4 or other Athlon chips too.
+//   This could apply say to PS4/AMD chips too.
 //
 // DONE: bring over fast inverses (RTS, RTU, etc)
 // DONE: need translation, rotation, scale
@@ -128,12 +129,6 @@ namespace SIMD_NAMESPACE {
 // want to do futher.  Really 3 choices, use c calls, use approximations,
 // or use simd lib that implements these (f.e. Accelerate).
 
-// pow needs 2 args, so haven't exposed yet
-//float4 pow(float4 x, float4 y) {
-//    // can xy be <= 0 ?, no will return Nan in log/exp approx
-//    return exp(log(x) * y);
-//}
-//
 // don't have a c sincos, so just use fn calls
 //float4 tan(float4 x) {
 //    float4 s, c;
@@ -279,6 +274,24 @@ static const float3x3 kfloat3x3_identity = diagonal_matrix(kfloat3_ones);
 static const float3x4 kfloat3x4_identity = diagonal_matrix3x4(kfloat3_ones);
 static const float4x4 kfloat4x4_identity = diagonal_matrix(kfloat4_ones);
 
+//---------------------------
+
+float4x4 float4x4m(const float3x4& m) {
+    float4x4 m44;
+    m44[0] = m[0];
+    m44[1] = m[1];
+    m44[2] = m[2];
+    m44[3] = float4_posw();
+    
+    return transpose(m44);
+}
+
+float3x4 float3x4m(const float4x4& m) {
+    float4x4 m44(transpose(m));
+    return (const float3x4&)m44;
+}
+
+
 //---------------------------
 
 // These should not be used often.  So can stay buried
@@ -427,14 +440,14 @@ float4x4 inverse(const float4x4& x) {
     
     // As a evolves from original mat into identity -
     // b evolves from identity into inverse(a)
-    uint32_t cols = float4x4::col;
-    uint32_t rows = float4x4::row;
+    int cols = float4x4::col;
+    int rows = float4x4::row;
     
     // Loop over cols of a from left to right, eliminating above and below diag
-    for (uint32_t j=0; j<rows; j++) {
+    for (int j=0; j<rows; j++) {
         // Find largest pivot in column j among rows j..2
-        uint32_t i1 = j;            // Row with largest pivot candidate
-        for (uint32_t i=j+1; i<cols; i++) {
+        int i1 = j;            // Row with largest pivot candidate
+        for (int i=j+1; i<cols; i++) {
             if ( fabsf(a[i][j]) > fabsf(a[i1][j]) ) {
                 i1 = i;
             }
@@ -456,7 +469,7 @@ float4x4 inverse(const float4x4& x) {
         a[j] *= s;
     
         // Eliminate off-diagonal elems in col j of a, doing identical ops to b
-        for (uint32_t i=0; i<cols; i++ ) {
+        for (int i=0; i<cols; i++ ) {
             if (i != j) {
                 s = a[i][j];
                 b[i] -= b[j] * s;
@@ -803,13 +816,26 @@ string vecf::simd_configs() const {
     FMT_CONFIG(SIMD_NEON);
     
 #if SIMD_SSE
+    bool hasSSE42 = false;
+    bool hasAVX = false;
     bool hasAVX2 = false;
+    
     bool hasF16C = false;
     bool hasFMA = false;
     
+    #ifdef __SSE42__
+    hasSSE42 = true;
+    #endif
+    #ifdef __AVX__
+    hasAVX = true;
+    #endif
     #ifdef __AVX2__
     hasAVX2 = true;
     #endif
+    
+    // TODO: AVX-512 flags (combine into one?)
+    // (__AVX512F__) && (__AVX512DQ__) && (__AVX512CD__) && (__AVX512BW__) && (__AVX512VL__) && (__AVX512VBMI2__)
+   
     #ifdef __F16C__
     hasF16C = true;
     #endif
@@ -817,21 +843,34 @@ string vecf::simd_configs() const {
     hasFMA = true;
     #endif
     
-    s += kram::format("%s: %d\n", "AVX2", hasAVX2);
-    s += kram::format("%s: %d\n", "F16C", hasF16C);
-    s += kram::format("%s: %d\n", "FMA ", hasFMA);
+    if (hasAVX2)
+        s += kram::format("%s: %d\n", "AVX2 ", hasAVX2);
+    else if (hasAVX)
+        s += kram::format("%s: %d\n", "AVX  ", hasAVX);
+    else if (hasSSE42)
+        s += kram::format("%s: %d\n", "SSE42 ", hasSSE42);
+    
+    s += kram::format("%s: %d\n", "F16C  ", hasF16C);
+    s += kram::format("%s: %d\n", "FMA   ", hasFMA);
     
     // fp-contract, etc ?
+    // CRC (may not be worth it)
+                                                    
 #endif
     
 #if SIMD_NEON
     // any neon setting, arm64 version
+    // __ARM_VFPV4__
+    // CRC (may not be worth it)
+    
 #endif
     
     FMT_CONFIG(SIMD_FLOAT_EXT);
     FMT_CONFIG(SIMD_RENAME_TO_SIMD_NAMESPACE);
     FMT_CONFIG(SIMD_HALF_FLOAT16);
-    
+#if SIMD_HALF
+    FMT_CONFIG(SIMD_HALF4_ONLY);
+#endif
     FMT_CONFIG(SIMD_ACCELERATE_MATH);
 #if SIMD_ACCELERATE_MATH
     FMT_CONFIG(SIMD_LIBRARY_VERSION);
diff --git a/libkram/vectormath/vectormath++.h b/libkram/vectormath/vectormath++.h
index f438d259..12ab0569 100644
--- a/libkram/vectormath/vectormath++.h
+++ b/libkram/vectormath/vectormath++.h
@@ -73,11 +73,46 @@
 // HLSL and DX12/Vulkan support double on desktop, but not mobile?
 //   and likely not on arm64 gpus.
 //
+//------------
 // x64 -> arm64 emulators
 // Prism   supports SSE4.2, no fma, no f16c
 // Rosetta supports SSE4.2, no fma, no f16c
 // Rosetta supports AVX2 (macOS 15.0)
 //
+//------------
+// Types for 32B max vector size (2 Neon reg, 1 AVX/AVX2 reg)
+// char2,3,4,8,16,32
+// int2,3,4,8
+//
+// half2,3,4,8,16
+// float2,3,4,8
+// double2,3,4
+//
+//------------
+// APX first introduced in 10th gen has APX extension
+//   expands general purpose registers from 16 to 32.
+//
+// Intel chips
+//  1 Nehalem,
+//  2 Sandy Bridge,
+//  3 Ivy Bridge,
+//  4 Haswell,       AVX2
+//  5 Broadwell,
+//  6 Sky Lake,
+//  7 Kaby Lake,
+//  8 Coffee Lake,
+//  9 Coffee Lake Refresh
+// 10 Comet Lake,    APX
+// 11 Rocket Lake,
+// 12 Alder Lake,
+// 13 Raptor Lake
+//
+// AMD chips
+//
+//
+// Apple Silicon
+// iPhone 5S has arm64 arm64-v?
+//
 
 //-----------------------------------
 
@@ -145,9 +180,10 @@
 // op *=, +=, -=, /= mods the calling object, so can't be const
 #define SIMD_CALL_OP static inline __attribute__((__always_inline__,__nodebug__))
 
-// includes type1 simdk::log(float1)
-#define macroVectorRepeatFnDecl(type, cppfun) \
-type##1 cppfunc(type##1 x); \
+// TODO: type1 simdk::log(float1)
+// type##1 cppfunc(type##1 x);
+
+#define macroVectorRepeatFnDecl(type, cppfunc) \
 type##2 cppfunc(type##2 x); \
 type##3 cppfunc(type##3 x); \
 type##4 cppfunc(type##4 x); \
@@ -288,7 +324,8 @@ SIMD_CALL type::column_t operator*(const type& x, const type::column_t& v) { ret
 //-----------------------------------
 
 // for u/int8_t, u/int32_t, u/int64_t etc
-#include <inttypes.h>
+// using the built-in types now
+//#include <inttypes.h>
 
 //------------
 // define count and alignment of core types
@@ -1079,13 +1116,24 @@ SIMD_CALL const float3& as_float3(const float4& m) {
 // power series
 macroVectorRepeatFnDecl(float, log)
 macroVectorRepeatFnDecl(float, exp)
-//macroVectorRepeatFnDecl(float, pow) takes 2 args
 
 // trig
 macroVectorRepeatFnDecl(float, cos)
 macroVectorRepeatFnDecl(float, sin)
 macroVectorRepeatFnDecl(float, tan)
 
+// pow
+// can xy be <= 0 ?, no will return Nan in log/exp approx
+SIMD_CALL float2 pow(float2 x, float2 y) {
+    return exp(log(x) * y);
+}
+SIMD_CALL float3 pow(float3 x, float3 y) {
+    return exp(log(x) * y);
+}
+SIMD_CALL float4 pow(float4 x, float4 y) {
+    return exp(log(x) * y);
+}
+
 // TODO: add more math ops
 
 // conversions
@@ -1189,15 +1237,15 @@ const float4& float4_negzw();
 struct float2x2 : float2x2s
 {
     // can be split out to traits
-    static constexpr uint32_t col = 2;
-    static constexpr uint32_t row = 2;
+    static constexpr int col = 2;
+    static constexpr int row = 2;
     using column_t = float2;
     using scalar_t = float;
     
     static const float2x2& zero();
     static const float2x2& identity();
     
-    float2x2() { }  // no default init
+    float2x2() { }  // default uninit
     explicit float2x2(float2 diag);
     float2x2(float2 c0, float2 c1)
     : float2x2s((float2x2s){c0, c1}) { }
@@ -1205,14 +1253,14 @@ struct float2x2 : float2x2s
     : float2x2s(m) { }
     
     // simd lacks these ops
-    float2& operator[](uint32_t idx) { return columns[idx]; }
-    const float2& operator[](uint32_t idx) const { return columns[idx]; }
+    float2& operator[](int idx) { return columns[idx]; }
+    const float2& operator[](int idx) const { return columns[idx]; }
 };
 
 struct float3x3 : float3x3s
 {
-    static constexpr uint32_t col = 3;
-    static constexpr uint32_t row = 3;
+    static constexpr int col = 3;
+    static constexpr int row = 3;
     using column_t = float3;
     using scalar_t = float;
     
@@ -1220,61 +1268,65 @@ struct float3x3 : float3x3s
     static const float3x3& zero();
     static const float3x3& identity();
     
-    float3x3() { }  // no default init
+    float3x3() { }  // default uninit
     explicit float3x3(float3 diag);
     float3x3(float3 c0, float3 c1, float3 c2)
     : float3x3s((float3x3s){c0, c1, c2}) { }
     float3x3(const float3x3s& m)
     : float3x3s(m) { }
     
-    float3& operator[](uint32_t idx) { return columns[idx]; }
-    const float3& operator[](uint32_t idx) const { return columns[idx]; }
+    float3& operator[](int idx) { return columns[idx]; }
+    const float3& operator[](int idx) const { return columns[idx]; }
 };
 
 // This is mostly a transposed holder for a 4x4, so very few ops defined
 // Can also serve as a SOA for some types of cpu math.
 struct float3x4 : float3x4s
 {
-    static constexpr uint32_t col = 3;
-    static constexpr uint32_t row = 4;
+    static constexpr int col = 3;
+    static constexpr int row = 4;
     using column_t = float4;
     using scalar_t = float;
     
     static const float3x4& zero();
     static const float3x4& identity();
     
-    float3x4() { } // no default init
+    float3x4() { } // default uninit
     explicit float3x4(float3 diag);
     float3x4(float4 c0, float4 c1, float4 c2)
     : float3x4s((float3x4s){c0, c1, c2}) { }
     float3x4(const float3x4s& m)
     : float3x4s(m) { }
     
-    float4& operator[](uint32_t idx) { return columns[idx]; }
-    const float4& operator[](uint32_t idx) const { return columns[idx]; }
+    float4& operator[](int idx) { return columns[idx]; }
+    const float4& operator[](int idx) const { return columns[idx]; }
 };
 
 struct float4x4 : float4x4s
 {
-    static constexpr uint32_t col = 4;
-    static constexpr uint32_t row = 4;
+    static constexpr int col = 4;
+    static constexpr int row = 4;
     using column_t = float4;
     using scalar_t = float;
     
     static const float4x4& zero();
     static const float4x4& identity();
     
-    float4x4() { } // no default init
+    float4x4() { } // default uninit
     explicit float4x4(float4 diag);
     float4x4(float4 c0, float4 c1, float4 c2, float4 c3)
     : float4x4s((float4x4s){c0, c1, c2, c3}) { }
     float4x4(const float4x4s& m)
     : float4x4s(m) { }
     
-    float4& operator[](uint32_t idx) { return columns[idx]; }
-    const float4& operator[](uint32_t idx) const { return columns[idx]; }
+    float4& operator[](int idx) { return columns[idx]; }
+    const float4& operator[](int idx) const { return columns[idx]; }
 };
 
+// transposes to convert between matrix type
+float4x4 float4x4m(const float3x4& m);
+float3x4 float3x4m(const float4x4& m);
+
 // set diangonal and rest to 0
 float2x2 diagonal_matrix(float2 x);
 float3x3 diagonal_matrix(float3 x);
@@ -1475,6 +1527,16 @@ macroVectorRepeatFnDecl(double, cos)
 macroVectorRepeatFnDecl(double, sin)
 macroVectorRepeatFnDecl(double, tan)
 
+SIMD_CALL double2 pow(double2 x, double2 y) {
+    return exp(log(x) * y);
+}
+SIMD_CALL double3 pow(double3 x, double3 y) {
+    return exp(log(x) * y);
+}
+SIMD_CALL double4 pow(double4 x, double4 y) {
+    return exp(log(x) * y);
+}
+
 // TODO: would need matrix class derivations
 // and all of the matrix ops, which then need vector ops, and need double
 // constants.  So this starts to really add to codegen.  But double
@@ -1483,8 +1545,8 @@ macroVectorRepeatFnDecl(double, tan)
 struct double2x2 : double2x2s
 {
     // can be split out to traits
-    static constexpr uint32_t col = 2;
-    static constexpr uint32_t row = 2;
+    static constexpr int col = 2;
+    static constexpr int row = 2;
     using column_t = double2;
     using scalar_t = double;
     
@@ -1499,14 +1561,14 @@ struct double2x2 : double2x2s
     : double2x2s(m) { }
     
     // simd lacks these ops
-    double2& operator[](uint32_t idx) { return columns[idx]; }
-    const double2& operator[](uint32_t idx) const { return columns[idx]; }
+    double2& operator[](int idx) { return columns[idx]; }
+    const double2& operator[](int idx) const { return columns[idx]; }
 };
 
 struct double3x3 : double3x3s
 {
-    static constexpr uint32_t col = 3;
-    static constexpr uint32_t row = 3;
+    static constexpr int col = 3;
+    static constexpr int row = 3;
     using column_t = double3;
     using scalar_t = double;
     
@@ -1521,16 +1583,16 @@ struct double3x3 : double3x3s
     double3x3(const double3x3s& m)
     : double3x3s(m) { }
     
-    double3& operator[](uint32_t idx) { return columns[idx]; }
-    const double3& operator[](uint32_t idx) const { return columns[idx]; }
+    double3& operator[](int idx) { return columns[idx]; }
+    const double3& operator[](int idx) const { return columns[idx]; }
 };
 
 // This is mostly a transposed holder for a 4x4, so very few ops defined
 // Can also serve as a SOA for some types of cpu math.
 struct double3x4 : double3x4s
 {
-    static constexpr uint32_t col = 3;
-    static constexpr uint32_t row = 4;
+    static constexpr int col = 3;
+    static constexpr int row = 4;
     using column_t = double4;
     using scalar_t = double;
     
@@ -1544,14 +1606,14 @@ struct double3x4 : double3x4s
     double3x4(const double3x4s& m)
     : double3x4s(m) { }
     
-    double4& operator[](uint32_t idx) { return columns[idx]; }
-    const double4& operator[](uint32_t idx) const { return columns[idx]; }
+    double4& operator[](int idx) { return columns[idx]; }
+    const double4& operator[](int idx) const { return columns[idx]; }
 };
 
 struct double4x4 : double4x4s
 {
-    static constexpr uint32_t col = 4;
-    static constexpr uint32_t row = 4;
+    static constexpr int col = 4;
+    static constexpr int row = 4;
     using column_t = double4;
     using scalar_t = double;
     
@@ -1565,8 +1627,8 @@ struct double4x4 : double4x4s
     double4x4(const double4x4s& m)
     : double4x4s(m) { }
     
-    double4& operator[](uint32_t idx) { return columns[idx]; }
-    const double4& operator[](uint32_t idx) const { return columns[idx]; }
+    double4& operator[](int idx) { return columns[idx]; }
+    const double4& operator[](int idx) const { return columns[idx]; }
 };
 
 double2x2 diagonal_matrix(double2 x);
@@ -1699,10 +1761,10 @@ SIMD_CALL half4 half4m(half3 v, float w = (half)1.0) {
 #endif
 
 
-
+//---------------------------
 
 using namespace STL_NAMESPACE;
-        
+
 // Usage:
 // vecf vfmt(fmtToken);
 // fprintf(stdout, "%s", vfmt.str(v1).c_str() );
@@ -1711,7 +1773,7 @@ struct vecf {
     // TODO: add formatting options too
     vecf() {
     }
-    
+   
 #if SIMD_FLOAT
     // vector
     string str(float2 v) const;

From 5ddb9a71907a5ce01c7bc8e96f8b12ae6cf494f9 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 29 Sep 2024 15:58:09 -0700
Subject: [PATCH 743/901] kram - simd - breakup header

---
 build2/kram.xcodeproj/project.pbxproj |   24 +
 libkram/vectormath/double234.h        |  251 +++++
 libkram/vectormath/float234.h         |  892 ++++++++++++++++
 libkram/vectormath/half234.h          |   93 ++
 libkram/vectormath/int234.h           |  121 +++
 libkram/vectormath/vectormath++.cpp   |  108 +-
 libkram/vectormath/vectormath++.h     | 1389 +------------------------
 7 files changed, 1482 insertions(+), 1396 deletions(-)
 create mode 100644 libkram/vectormath/double234.h
 create mode 100644 libkram/vectormath/float234.h
 create mode 100644 libkram/vectormath/half234.h
 create mode 100644 libkram/vectormath/int234.h

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index 9e7fc148..09ec8980 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -7,6 +7,14 @@
 	objects = {
 
 /* Begin PBXBuildFile section */
+		7013AD4F2CAA0820007E5554 /* float234.h in Headers */ = {isa = PBXBuildFile; fileRef = 7013AD4E2CAA0818007E5554 /* float234.h */; };
+		7013AD502CAA0820007E5554 /* float234.h in Headers */ = {isa = PBXBuildFile; fileRef = 7013AD4E2CAA0818007E5554 /* float234.h */; };
+		7013AD582CAA0938007E5554 /* double234.h in Headers */ = {isa = PBXBuildFile; fileRef = 7013AD572CAA0934007E5554 /* double234.h */; };
+		7013AD592CAA0938007E5554 /* double234.h in Headers */ = {isa = PBXBuildFile; fileRef = 7013AD572CAA0934007E5554 /* double234.h */; };
+		7013AD5E2CAA0E1C007E5554 /* half234.h in Headers */ = {isa = PBXBuildFile; fileRef = 7013AD5D2CAA0E18007E5554 /* half234.h */; };
+		7013AD5F2CAA0E1C007E5554 /* half234.h in Headers */ = {isa = PBXBuildFile; fileRef = 7013AD5D2CAA0E18007E5554 /* half234.h */; };
+		7013AD612CAA0E28007E5554 /* int234.h in Headers */ = {isa = PBXBuildFile; fileRef = 7013AD602CAA0E21007E5554 /* int234.h */; };
+		7013AD622CAA0E28007E5554 /* int234.h in Headers */ = {isa = PBXBuildFile; fileRef = 7013AD602CAA0E21007E5554 /* int234.h */; };
 		702E0DB62CA10BC100B652B7 /* astcenc_mathlib.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB327DDDBCC00D0B9E1 /* astcenc_mathlib.cpp */; };
 		705F7F072C9FF42700E377B7 /* vectormath++.h in Headers */ = {isa = PBXBuildFile; fileRef = 705F7F022C9FF42700E377B7 /* vectormath++.h */; };
 		705F7F082C9FF42700E377B7 /* sse_mathfun.h in Headers */ = {isa = PBXBuildFile; fileRef = 705F7EFC2C9FF42700E377B7 /* sse_mathfun.h */; };
@@ -415,6 +423,10 @@
 /* End PBXBuildFile section */
 
 /* Begin PBXFileReference section */
+		7013AD4E2CAA0818007E5554 /* float234.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = float234.h; sourceTree = "<group>"; };
+		7013AD572CAA0934007E5554 /* double234.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = double234.h; sourceTree = "<group>"; };
+		7013AD5D2CAA0E18007E5554 /* half234.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = half234.h; sourceTree = "<group>"; };
+		7013AD602CAA0E21007E5554 /* int234.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = int234.h; sourceTree = "<group>"; };
 		705F7EFA2C9FF42700E377B7 /* float4a.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = float4a.h; sourceTree = "<group>"; };
 		705F7EFB2C9FF42700E377B7 /* float4a.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = float4a.cpp; sourceTree = "<group>"; };
 		705F7EFC2C9FF42700E377B7 /* sse_mathfun.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = sse_mathfun.h; sourceTree = "<group>"; };
@@ -795,6 +807,10 @@
 				705F7EFD2C9FF42700E377B7 /* sse2neon.h */,
 				705F7EFE2C9FF42700E377B7 /* sse2neon-arm64.h */,
 				705F7F022C9FF42700E377B7 /* vectormath++.h */,
+				7013AD5D2CAA0E18007E5554 /* half234.h */,
+				7013AD602CAA0E21007E5554 /* int234.h */,
+				7013AD4E2CAA0818007E5554 /* float234.h */,
+				7013AD572CAA0934007E5554 /* double234.h */,
 				705F7F032C9FF42700E377B7 /* vectormath++.cpp */,
 			);
 			path = vectormath;
@@ -1435,6 +1451,7 @@
 				708A6AA02708CE4700BA5410 /* bc6h_definitions.h in Headers */,
 				706EEFF426D15984001C950E /* basisu_containers.h in Headers */,
 				70871DD527DDDBCD00D0B9E1 /* astcenc.h in Headers */,
+				7013AD5E2CAA0E1C007E5554 /* half234.h in Headers */,
 				709B8D4528D7BCAD0081BD1F /* printf.h in Headers */,
 				706EEFF526D15985001C950E /* basisu_containers_impl.h in Headers */,
 				707789EB2881BA81008A51BC /* utils.h in Headers */,
@@ -1445,6 +1462,7 @@
 				706EEFF826D15985001C950E /* basisu_transcoder_uastc.h in Headers */,
 				706EEFF926D15985001C950E /* basisu_global_selector_palette.h in Headers */,
 				707789E32881BA81008A51BC /* rgbcx_table4_small.h in Headers */,
+				7013AD4F2CAA0820007E5554 /* float234.h in Headers */,
 				706EEFFA26D15985001C950E /* basisu.h in Headers */,
 				706EEFFB26D15985001C950E /* basisu_file_headers.h in Headers */,
 				706EEFFC26D15985001C950E /* miniz.h in Headers */,
@@ -1461,6 +1479,7 @@
 				706EF00426D15985001C950E /* KramLib.h in Headers */,
 				706EF00526D15985001C950E /* KramVersion.h in Headers */,
 				706EF00626D15985001C950E /* KramImage.h in Headers */,
+				7013AD612CAA0E28007E5554 /* int234.h in Headers */,
 				706EF00726D15985001C950E /* win_mmap.h in Headers */,
 				70871DDD27DDDBCD00D0B9E1 /* astcenc_vecmathlib_sse_4.h in Headers */,
 				709B8D4F28D7C15F0081BD1F /* KramFmt.h in Headers */,
@@ -1473,6 +1492,7 @@
 				706EF00926D15985001C950E /* KTXImage.h in Headers */,
 				706EF00A26D15985001C950E /* KramImageInfo.h in Headers */,
 				707789DF2881BA81008A51BC /* rgbcx_table4.h in Headers */,
+				7013AD592CAA0938007E5554 /* double234.h in Headers */,
 				70871DF727DDDBCD00D0B9E1 /* astcenc_vecmathlib_neon_4.h in Headers */,
 				706EF00B26D15985001C950E /* KramTimer.h in Headers */,
 				706EF00C26D15985001C950E /* KramMmapHelper.h in Headers */,
@@ -1550,6 +1570,7 @@
 				708A6AA12708CE4700BA5410 /* bc6h_definitions.h in Headers */,
 				706EF16E26D166C5001C950E /* basisu_containers.h in Headers */,
 				70871DD627DDDBCD00D0B9E1 /* astcenc.h in Headers */,
+				7013AD5F2CAA0E1C007E5554 /* half234.h in Headers */,
 				709B8D4628D7BCAD0081BD1F /* printf.h in Headers */,
 				706EF16F26D166C5001C950E /* basisu_containers_impl.h in Headers */,
 				707789EC2881BA81008A51BC /* utils.h in Headers */,
@@ -1560,6 +1581,7 @@
 				706EF17226D166C5001C950E /* basisu_transcoder_uastc.h in Headers */,
 				706EF17326D166C5001C950E /* basisu_global_selector_palette.h in Headers */,
 				707789E42881BA81008A51BC /* rgbcx_table4_small.h in Headers */,
+				7013AD502CAA0820007E5554 /* float234.h in Headers */,
 				706EF17426D166C5001C950E /* basisu.h in Headers */,
 				706EF17526D166C5001C950E /* basisu_file_headers.h in Headers */,
 				706EF17626D166C5001C950E /* miniz.h in Headers */,
@@ -1576,6 +1598,7 @@
 				706EF17E26D166C5001C950E /* KramLib.h in Headers */,
 				706EF17F26D166C5001C950E /* KramVersion.h in Headers */,
 				706EF18026D166C5001C950E /* KramImage.h in Headers */,
+				7013AD622CAA0E28007E5554 /* int234.h in Headers */,
 				706EF18126D166C5001C950E /* win_mmap.h in Headers */,
 				70871DDE27DDDBCD00D0B9E1 /* astcenc_vecmathlib_sse_4.h in Headers */,
 				709B8D5028D7C15F0081BD1F /* KramFmt.h in Headers */,
@@ -1588,6 +1611,7 @@
 				706EF18326D166C5001C950E /* KTXImage.h in Headers */,
 				706EF18426D166C5001C950E /* KramImageInfo.h in Headers */,
 				707789E02881BA81008A51BC /* rgbcx_table4.h in Headers */,
+				7013AD582CAA0938007E5554 /* double234.h in Headers */,
 				70871DF827DDDBCD00D0B9E1 /* astcenc_vecmathlib_neon_4.h in Headers */,
 				706EF18526D166C5001C950E /* KramTimer.h in Headers */,
 				706EF18626D166C5001C950E /* KramMmapHelper.h in Headers */,
diff --git a/libkram/vectormath/double234.h b/libkram/vectormath/double234.h
new file mode 100644
index 00000000..b764741a
--- /dev/null
+++ b/libkram/vectormath/double234.h
@@ -0,0 +1,251 @@
+// kram - Copyright 2020-2024 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
+
+#pragma once
+
+// This is not yet standalone.  vectormath++.h includes it.
+#if USE_SIMDLIB && SIMD_DOUBLE
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// define c vector/matrix types
+macroVector8TypesStorage(double, double)
+macroVector8TypesPacked(double, double)
+
+// storage type for matrix
+typedef struct { double2s columns[2]; } double2x2s;
+typedef struct { double3s columns[3]; } double3x3s;
+typedef struct { double4s columns[3]; } double3x4s;
+typedef struct { double4s columns[4]; } double4x4s;
+
+// glue to Accelerate
+#if SIMD_RENAME_TO_SIMD_NAMESPACE
+macroVector8TypesStorageRenames(double, simd_double)
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifdef __cplusplus
+namespace SIMD_NAMESPACE {
+
+macroVector8TypesStorageRenames(double, double)
+
+#if 0
+
+SIMD_CALL double2 double2m(double x) {
+    return x;
+}
+SIMD_CALL double2 double2m(double x, double y) {
+    return {x,y};
+}
+
+SIMD_CALL double3 double3m(double x) {
+    return x;
+}
+SIMD_CALL double3 double3m(double x, double y, double z) {
+    return {x,y,z};
+}
+SIMD_CALL double3 double3m(double2 v, float z) {
+    double3 r; r.xy = v; r.z = z; return r;
+}
+
+SIMD_CALL double4 double4m(double x) {
+    return x;
+}
+SIMD_CALL double4 double4m(double2 xy, double2 zw) {
+    double4 r; r.xy = xy; r.zw = zw; return r;
+}
+SIMD_CALL double4 double4m(double x, double y, double z, double w = 1.0) {
+    return {x,y,z,w};
+}
+SIMD_CALL double4 double4m(double3 v, double w = 1.0) {
+    double4 r; r.xyz = v; r.w = w; return r;
+}
+
+// power series
+macroVectorRepeatFnDecl(double, log)
+macroVectorRepeatFnDecl(double, exp)
+//macroVectorRepeatFnDecl(double, pow)
+
+// trig
+macroVectorRepeatFnDecl(double, cos)
+macroVectorRepeatFnDecl(double, sin)
+macroVectorRepeatFnDecl(double, tan)
+
+SIMD_CALL double2 pow(double2 x, double2 y) {
+    return exp(log(x) * y);
+}
+SIMD_CALL double3 pow(double3 x, double3 y) {
+    return exp(log(x) * y);
+}
+SIMD_CALL double4 pow(double4 x, double4 y) {
+    return exp(log(x) * y);
+}
+
+// TODO: would need matrix class derivations
+// and all of the matrix ops, which then need vector ops, and need double
+// constants.  So this starts to really add to codegen.  But double
+// is one of the last bastions of cpu, since many gpu don't support it.
+
+struct double2x2 : double2x2s
+{
+    // can be split out to traits
+    static constexpr int col = 2;
+    static constexpr int row = 2;
+    using column_t = double2;
+    using scalar_t = double;
+    
+    static const double2x2& zero();
+    static const double2x2& identity();
+    
+    double2x2() { }  // no default init
+    explicit double2x2(double2 diag);
+    double2x2(double2 c0, double2 c1)
+    : double2x2s((double2x2s){c0, c1}) { }
+    double2x2(const double2x2s& m)
+    : double2x2s(m) { }
+    
+    // simd lacks these ops
+    double2& operator[](int idx) { return columns[idx]; }
+    const double2& operator[](int idx) const { return columns[idx]; }
+};
+
+struct double3x3 : double3x3s
+{
+    static constexpr int col = 3;
+    static constexpr int row = 3;
+    using column_t = double3;
+    using scalar_t = double;
+    
+    // Done as wordy c funcs otherwize.  Funcs allow statics to init.
+    static const double3x3& zero();
+    static const double3x3& identity();
+    
+    double3x3() { }  // no default init
+    explicit double3x3(double3 diag);
+    double3x3(double3 c0, double3 c1, double3 c2)
+    : double3x3s((double3x3s){c0, c1, c2}) { }
+    double3x3(const double3x3s& m)
+    : double3x3s(m) { }
+    
+    double3& operator[](int idx) { return columns[idx]; }
+    const double3& operator[](int idx) const { return columns[idx]; }
+};
+
+// This is mostly a transposed holder for a 4x4, so very few ops defined
+// Can also serve as a SOA for some types of cpu math.
+struct double3x4 : double3x4s
+{
+    static constexpr int col = 3;
+    static constexpr int row = 4;
+    using column_t = double4;
+    using scalar_t = double;
+    
+    static const double3x4& zero();
+    static const double3x4& identity();
+    
+    double3x4() { } // no default init
+    explicit double3x4(double3 diag);
+    double3x4(double4 c0, double4 c1, double4 c2)
+    : double3x4s((double3x4s){c0, c1, c2}) { }
+    double3x4(const double3x4s& m)
+    : double3x4s(m) { }
+    
+    double4& operator[](int idx) { return columns[idx]; }
+    const double4& operator[](int idx) const { return columns[idx]; }
+};
+
+struct double4x4 : double4x4s
+{
+    static constexpr int col = 4;
+    static constexpr int row = 4;
+    using column_t = double4;
+    using scalar_t = double;
+    
+    static const double4x4& zero();
+    static const double4x4& identity();
+    
+    double4x4() { } // no default init
+    explicit double4x4(double4 diag);
+    double4x4(double4 c0, double4 c1, double4 c2, double4 c3)
+    : double4x4s((double4x4s){c0, c1, c2, c3}) { }
+    double4x4(const double4x4s& m)
+    : double4x4s(m) { }
+    
+    double4& operator[](int idx) { return columns[idx]; }
+    const double4& operator[](int idx) const { return columns[idx]; }
+};
+
+double2x2 diagonal_matrix(double2 x);
+double3x3 diagonal_matrix(double3 x);
+double3x4 diagonal_matrix3x4(double3 x);
+double4x4 diagonal_matrix(double4 x);
+
+// using refs here, 3x3 and 4x4 are large to pass by value (3 simd regs)
+double2x2 transpose(const double2x2& x);
+double3x3 transpose(const double3x3& x);
+double4x4 transpose(const double4x4& x);
+
+double2x2 inverse(const double2x2& x);
+double3x3 inverse(const double3x3& x);
+double4x4 inverse(const double4x4& x);
+
+double determinant(const double2x2& x);
+double determinant(const double3x3& x);
+double determinant(const double4x4& x);
+
+double trace(const double2x2& x);
+double trace(const double3x3& x);
+double trace(const double4x4& x);
+
+// premul = dot + premul
+double2 mul(double2 y, const double2x2& x);
+double3 mul(double3 y, const double3x3& x);
+double4 mul(double4 y, const double4x4& x);
+
+// posmul = mul + mad
+double2x2 mul(const double2x2& x, const double2x2& y);
+double3x3 mul(const double3x3& x, const double3x3& y);
+double4x4 mul(const double4x4& x, const double4x4& y);
+
+double2 mul(const double2x2& x, double2 y);
+double3 mul(const double3x3& x, double3 y);
+double4 mul(const double4x4& x, double4 y);
+
+double2x2 sub(const double2x2& x, const double2x2& y);
+double3x3 sub(const double3x3& x, const double3x3& y);
+double4x4 sub(const double4x4& x, const double4x4& y);
+
+double2x2 add(const double2x2& x, const double2x2& y);
+double3x3 add(const double3x3& x, const double3x3& y);
+double4x4 add(const double4x4& x, const double4x4& y);
+
+bool equal(const double2x2& x, const double2x2& y);
+bool equal(const double3x3& x, const double3x3& y);
+bool equal(const double4x4& x, const double4x4& y);
+
+// operators for C++
+macroMatrixOps(double2x2);
+macroMatrixOps(double3x3);
+// TODO: no mat ops yet on storage type double3x4
+// macroMatrixOps(double3x4);
+macroMatrixOps(double4x4);
+
+// fast conversions where possible
+SIMD_CALL const double3x3& as_double3x3(const double4x4& m) {
+    return reinterpret_cast<const double3x3&>(m);
+}
+
+#endif // SIMD_DOUBLE
+
+
+} // SIMD_NAMESPACE
+
+#endif
+
+#endif // USE_SIMDLIB && SIMD_FLOAT
diff --git a/libkram/vectormath/float234.h b/libkram/vectormath/float234.h
new file mode 100644
index 00000000..dbf80685
--- /dev/null
+++ b/libkram/vectormath/float234.h
@@ -0,0 +1,892 @@
+// kram - Copyright 2020-2024 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
+
+#pragma once
+
+#if USE_SIMDLIB && SIMD_FLOAT
+
+// This is not yet standalone.  vectormath++.h includes it.
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// define c++ vector/matrix types
+macroVector4TypesStorage(float, float)
+macroVector4TypesPacked(float, float)
+
+// storage type for matrix
+typedef struct { float2s columns[2]; } float2x2s;
+typedef struct { float3s columns[3]; } float3x3s;
+typedef struct { float4s columns[3]; } float3x4s;
+typedef struct { float4s columns[4]; } float4x4s;
+
+// glue to Accelerate
+#if SIMD_RENAME_TO_SIMD_NAMESPACE
+macroVector4TypesStorageRenames(float, simd_float)
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifdef __cplusplus
+namespace SIMD_NAMESPACE {
+
+macroVector4TypesStorageRenames(float, float)
+
+//-----------------------------------
+// start of implementation
+
+// Note there is _mm_sqrt_ss() instead of this math op to keep in registers
+#define sqrt_scalar(x) sqrtf(x)
+
+#if SIMD_NEON
+
+// TODO: expose float2 ops on Neon.
+
+SIMD_CALL float reduce_min(float4 x) {
+    return vminvq_f32(x);
+}
+
+SIMD_CALL float reduce_max(float4 x) {
+    return vmaxvq_f32(x);
+}
+
+SIMD_CALL float4 min(float4 x, float4 y) {
+    // precise returns x on Nan
+    return vminnmq_f32(x, y);
+}
+
+SIMD_CALL float4 max(float4 x, float4 y) {
+    // precise returns x on Nan
+    return vmaxnmq_f32(x, y);
+}
+
+SIMD_CALL float4 muladd(float4 x, float4 y, float4 t) {
+    // requires __ARM_VFPV4__
+    // t passed first unlike sse
+    return vfmaq_f32(t, x,y);
+}
+
+SIMD_CALL float4 sqrt(float4 x) {
+    return vsqrtq_f32(x);
+}
+
+// use sse2neon to port this for now
+SIMD_CALL float4 reduce_addv(float4 x) {
+    // 4:1 reduction
+    x = _mm_hadd_ps(x, x); // xy = x+y,z+w
+    x = _mm_hadd_ps(x, x); // x  = x+y
+    return x.x; // repeat x to all values
+}
+
+SIMD_CALL float reduce_add(float4 x) {
+    return reduce_addv(x).x;
+}
+
+#endif // SIMD_NEON
+
+#if SIMD_SSE
+
+// x64 doesn't seem to have a simd op for min/max reduce
+SIMD_CALL float reduce_min(float4 x) {
+    return fmin(fmin(x.x,x.y), fmin(x.z,x.w));
+}
+
+SIMD_CALL float reduce_max(float4 x) {
+    return fmax(fmax(x.x,x.y), fmax(x.z,x.w));
+}
+
+// needs SIMD_INT
+// needed for precise min/max calls below
+#if SIMD_INT
+SIMD_CALL float4 bitselect_forminmax(float4 x, float4 y, int4 mask) {
+    return (float4)(((int4)x & ~mask) | ((int4)y & mask));
+}
+#endif
+
+SIMD_CALL float4 min(float4 x, float4 y) {
+    // precise returns x on Nan
+    return bitselect_forminmax(_mm_min_ps(x, y), x, y != y);
+}
+
+SIMD_CALL float4 max(float4 x, float4 y) {
+    // precise returns x on Nan
+    return bitselect_forminmax(_mm_max_ps(x, y), x, y != y);
+}
+
+SIMD_CALL float4 muladd(float4 x, float4 y, float4 t) {
+    // can't get Xcode to set -mfma with AVX2 set
+#ifdef __FMA__
+    return _mm_fmadd_ps(x,y,t);
+#else
+    // fallback with not same characteristics
+    return x * y + t;
+#endif
+}
+
+SIMD_CALL float4 sqrt(float4 x) {
+    return _mm_sqrt_ps(x);
+}
+
+SIMD_CALL float4 reduce_addv(float4 x) {
+    // 4:1 reduction
+    x = _mm_hadd_ps(x, x); // xy = x+y,z+w
+    x = _mm_hadd_ps(x, x); // x  = x+y
+    return x.x; // repeat x to all values
+}
+
+SIMD_CALL float reduce_add(float4 x) {
+    return reduce_addv(x).x;
+}
+
+#endif // SIMD_INT && SIMD_SSE
+
+// SSE4.1
+SIMD_CALL float4 round(float4 vv) {
+    return _mm_round_ps(vv, 0x8);  // round to nearest | exc
+}
+SIMD_CALL float4 ceil(float4 vv) {
+    return _mm_ceil_ps(vv);
+}
+SIMD_CALL float4 floor(float4 vv) {
+    return _mm_floor_ps(vv);
+}
+
+// end of implementation
+//-----------------------------------
+#if SIMD_INT
+
+// bitselect
+SIMD_CALL float2 bitselect(float2 x, float2 y, int2 mask) {
+    return (float2)bitselect((int2)x, (int2)y, mask);
+}
+SIMD_CALL float3 bitselect(float3 x, float3 y, int3 mask) {
+    return (float3)bitselect((int3)x, (int3)y, mask);
+}
+SIMD_CALL float4 bitselect(float4 x, float4 y, int4 mask) {
+    return (float4)bitselect((int4)x, (int4)y, mask);
+}
+
+// select
+SIMD_CALL float2 select(float2 x, float2 y, int2 mask) {
+    return bitselect(x, y, mask >> 31);
+}
+SIMD_CALL float3 select(float3 x, float3 y, int3 mask) {
+    return bitselect(x, y, mask >> 31);
+}
+SIMD_CALL float4 select(float4 x, float4 y, int4 mask) {
+    return bitselect(x, y, mask >> 31);
+}
+
+#endif // SIMD_INT
+
+// zeroext - internal helper
+SIMD_CALL float4 zeroext(float2 x) {
+    return (float4){x.x,x.y,0,0};
+}
+SIMD_CALL float4 zeroext(float3 x) {
+    return (float4){x.x,x.y,x.z,0};
+}
+
+// any
+SIMD_CALL bool any(int3 x) {
+    return any(vec3to4(x));
+}
+SIMD_CALL bool all(int3 x) {
+    return all(vec3to4(x));
+}
+
+// min
+SIMD_CALL float2 min(float2 x, float2 y) {
+    return vec4to2(min(vec2to4(x), vec2to4(y)));
+}
+SIMD_CALL float3 min(float3 x, float3 y) {
+    return vec4to3(min(vec3to4(x), vec3to4(y)));
+}
+
+// max
+SIMD_CALL float2 max(float2 x, float2 y) {
+    return vec4to2(max(vec2to4(x), vec2to4(y)));
+}
+SIMD_CALL float3 max(float3 x, float3 y) {
+    return vec4to3(max(vec3to4(x), vec3to4(y)));
+}
+
+// sqrt
+SIMD_CALL float2 sqrt(float2 x) {
+    return vec4to2(sqrt(vec2to4(x)));
+}
+SIMD_CALL float3 sqrt(float3 x) {
+    return vec4to3(sqrt(vec3to4(x)));
+}
+
+// rsqrt
+SIMD_CALL float4 rsqrt(float4 x) {
+    // TODO: fixup near 0 ?
+    // TODO: use _mm_div_ps if / doesn't use
+    return 1.0f/sqrt(x);
+}
+SIMD_CALL float2 rsqrt(float2 x) {
+    return vec4to2(rsqrt(vec2to4(x)));
+}
+SIMD_CALL float3 rsqrt(float3 x) {
+    return vec4to3(rsqrt(vec3to4(x)));
+}
+
+
+// recip
+SIMD_CALL float4 recip(float4 x) {
+    // TODO: fixup near 0 ?
+    // TODO: use _mm_div_ps if / doesn't use
+    return 1.0f/x;
+}
+SIMD_CALL float2 recip(float2 x) {
+    return vec4to2(recip(vec2to4(x)));
+}
+SIMD_CALL float3 recip(float3 x) {
+    return vec4to3(recip(vec3to4(x)));
+}
+
+// reduce_add
+SIMD_CALL float reduce_add(float2 x) {
+    return reduce_add(zeroext(x));
+}
+SIMD_CALL float reduce_add(float3 x) {
+    return reduce_add(zeroext(x));
+}
+
+// reduce_min - arm has float2 op
+SIMD_CALL float reduce_min(float2 x) {
+    return reduce_min(vec2to4(x));
+}
+
+SIMD_CALL float reduce_min(float3 x) {
+    return reduce_min(vec3to4(x));
+}
+
+// reduce_max
+SIMD_CALL float reduce_max(float2 x) {
+    return reduce_max(vec2to4(x));
+}
+
+SIMD_CALL float reduce_max(float3 x) {
+    return reduce_max(vec3to4(x));
+}
+
+// round (to nearest)
+SIMD_CALL float2 round(float2 x) { return vec4to2(round(vec2to4(x))); }
+SIMD_CALL float3 round(float3 x) { return vec4to3(round(vec3to4(x))); }
+
+// ceil
+SIMD_CALL float2 ceil(float2 x) { return vec4to2(ceil(vec2to4(x))); }
+SIMD_CALL float3 ceil(float3 x) { return vec4to3(ceil(vec3to4(x))); }
+
+// floor
+SIMD_CALL float2 floor(float2 x) { return vec4to2(floor(vec2to4(x))); }
+SIMD_CALL float3 floor(float3 x) { return vec4to3(floor(vec3to4(x))); }
+
+// clamp
+// order matters here for Nan, left op returned on precise min/max
+SIMD_CALL float2 clamp(float2 x, float2 minv, float2 maxv) {
+    return min(maxv, max(minv, x));
+}
+SIMD_CALL float3 clamp(float3 x, float3 minv, float3 maxv) {
+    return min(maxv, max(minv, x));
+}
+SIMD_CALL float4 clamp(float4 x, float4 minv, float4 maxv) {
+    return min(maxv, max(minv, x));
+}
+
+float2 saturate(float2 x);
+float3 saturate(float3 x);
+float4 saturate(float4 x);
+
+// muladd - arm has float2 op
+SIMD_CALL float2 muladd(float2 x, float2 y, float2 t) {
+    return vec4to2(muladd(vec2to4(x), vec2to4(y), vec2to4(t)));
+}
+SIMD_CALL float3 muladd(float3 x, float3 y, float3 t) {
+    return vec4to3(muladd(vec3to4(x), vec3to4(y), vec3to4(t)));
+}
+
+// lerp - another easy one
+SIMD_CALL float2 lerp(float2 x, float2 y, float2 t) {
+    return x + t*(y - x);
+}
+SIMD_CALL float3 lerp(float3 x, float3 y, float3 t) {
+    return x + t*(y - x);
+}
+SIMD_CALL float4 lerp(float4 x, float4 y, float4 t) {
+    return x + t*(y - x);
+}
+
+
+// dot
+SIMD_CALL float dot(float2 x, float2 y) {
+    return reduce_add(x * y);
+}
+SIMD_CALL float dot(float3 x, float3 y) {
+    return reduce_add(x * y);
+}
+SIMD_CALL float dot(float4 x, float4 y) {
+    return reduce_add(x * y);
+}
+
+// length_squared
+SIMD_CALL float length_squared(float2 x) {
+    return reduce_add(x * x);
+}
+SIMD_CALL float length_squared(float3 x) {
+    return reduce_add(x * x);
+}
+SIMD_CALL float length_squared(float4 x) {
+    return reduce_add(x * x);
+}
+
+// length
+SIMD_CALL float length(float2 x) {
+    return sqrt_scalar(reduce_add(x * x));
+}
+SIMD_CALL float length(float3 x) {
+    return sqrt_scalar(reduce_add(x * x));
+}
+SIMD_CALL float length(float4 x) {
+    return sqrt_scalar(reduce_add(x * x));
+}
+
+// distance
+SIMD_CALL float distance(float2 x, float2 y) {
+    return length(x - y);
+}
+SIMD_CALL float distance(float3 x, float3 y) {
+    return length(x - y);
+}
+SIMD_CALL float distance(float4 x, float4 y) {
+    return length(x - y);
+}
+
+// normalize
+// optimized by staying in reg
+SIMD_CALL float4 normalize(float4 x) {
+    // x * invlength(x)
+    return x * rsqrt(reduce_addv(x * x)).x;
+}
+SIMD_CALL float2 normalize(float2 x) {
+    return vec4to2(normalize(zeroext(x)));
+}
+SIMD_CALL float3 normalize(float3 x) {
+    return vec4to3(normalize(zeroext(x)));
+}
+
+// abs
+SIMD_CALL float2 abs(float2 x) {
+    return bitselect(0.0, x, 0x7fffffff);
+}
+SIMD_CALL float3 abs(float3 x) {
+    return bitselect(0.0, x, 0x7fffffff);
+}
+SIMD_CALL float4 abs(float4 x) {
+    return bitselect(0.0, x, 0x7fffffff);
+}
+
+// cross
+SIMD_CALL float cross(float2 x, float2 y) {
+    return x.x * y.y - x.y * y.x;
+}
+SIMD_CALL float3 cross(float3 x, float3 y) {
+    return x.yzx * y.zxy - x.zxy * y.yzx;
+}
+
+// equal_abs
+SIMD_CALL bool equal_abs(float2 x, float2 y, float tol) {
+    return all((abs(x - y) <= tol));
+}
+SIMD_CALL bool equal_abs(float3 x, float3 y, float tol) {
+    return all((abs(x - y) <= tol));
+}
+SIMD_CALL bool equal_abs(float4 x, float4 y, float tol) {
+    return all((abs(x - y) <= tol));
+}
+
+// equal_rel
+SIMD_CALL bool equal_rel(float2 x, float2 y, float tol) {
+    return all((abs(x - y) <= tol * ::abs(x.x)));
+}
+SIMD_CALL bool equal_rel(float3 x, float3 y, float tol) {
+    return all((abs(x - y) <= tol * ::abs(x.x)));
+}
+SIMD_CALL bool equal_rel(float4 x, float4 y, float tol) {
+    return all((abs(x - y) <= tol * ::abs(x.x)));
+}
+
+// step
+SIMD_CALL float2 step(float2 edge, float2 x) {
+    return bitselect((float2)1, 0, x < edge);
+}
+SIMD_CALL float3 step(float3 edge, float3 x) {
+    return bitselect((float3)1, 0, x < edge);
+}
+SIMD_CALL float4 step(float4 edge, float4 x) {
+    return bitselect((float4)1, 0, x < edge);
+}
+
+// smoothstep
+SIMD_CALL float2 smoothstep(float2 edge0, float2 edge1, float2 x) {
+    float2 t = saturate((x-edge0)/(edge0-edge1));
+    return t*t*(3 - 2*t);
+}
+SIMD_CALL float3 smoothstep(float3 edge0, float3 edge1, float3 x) {
+    float3 t = saturate((x-edge0)/(edge0-edge1));
+    return t*t*(3 - 2*t);
+}
+SIMD_CALL float4 smoothstep(float4 edge0, float4 edge1, float4 x) {
+    float4 t = saturate((x-edge0)/(edge0-edge1));
+    return t*t*(3 - 2*t);
+}
+
+// fract
+SIMD_CALL float2 fract(float2 x) {
+    return min(x - floor(x), 0x1.fffffep-1f);
+}
+
+SIMD_CALL float3 fract(float3 x) {
+    return min(x - floor(x), 0x1.fffffep-1f);
+}
+
+SIMD_CALL float4 fract(float4 x) {
+    return min(x - floor(x), 0x1.fffffep-1f);
+}
+
+
+#if SIMD_FLOAT_EXT
+
+// These are cpu only math.  None of the gpus support these long types.
+// and MSL doesn't even support double.
+
+// how important are 8/16 ops for float and 8 for double?  Could reduce with only doing up to 4.
+// can see doing more ops on smaller types.  Slower when these have to route through simd4.
+SIMD_CALL float8 clamp(float8 x, float8 min, float8 max) {
+    return min(max(x, min), max);
+}
+SIMD_CALL float16 clamp(float16 x, float16 min, float16 max) {
+    return min(max(x, min), max);
+}
+
+SIMD_CALL float reduce_min(float8 x) {
+    return reduce_min(min(x.lo, x.hi));
+}
+
+SIMD_CALL float reduce_min(float16 x) {
+    return fmin(reduce_min(x.lo), reduce_min(x.hi));
+}
+
+SIMD_CALL float reduce_max(float8 x) {
+    return reduce_max(max(x.lo, x.hi));
+}
+SIMD_CALL float reduce_max(float16 x) {
+    return fmax(reduce_max(x.lo), reduce_max(x.hi));
+}
+
+// need to convert float4 to 8/16
+// TODO: float8 float8m(float4 x, float4 y)
+// TODO: float16 float16m(float8 x, float8 y)
+
+// how important are 8/16 ops for float and double?  Could reduce with only doing up to 4.
+SIMD_CALL float8 muladd(float8 x, float4 y, float4 t) {
+    return float8m(muladd(x.lo, y.lo, z.lo), muladd(x.hi, y.hi, z.hi));
+}
+SIMD_CALL float16 muladd(float4 x, float4 y, float4 t) {
+    return float16m(muladd(x.lo, y.lo, z.lo), muladd(x.hi, y.hi, z.hi));
+}
+
+SIMD_CALL float8 lerp(float8 x, float8 y, float8 t) {
+    return x + t*(y - x);
+}
+SIMD_CALL float16 lerp(float16 x, float16 y, float16 t) {
+    return x + t*(y - x);
+}
+
+SIMD_CALL float reduce_add(float8 x) {
+    return reduce_add(x.lo + x.hi);
+}
+
+SIMD_CALL float reduce_add(float16 x) {
+    return reduce_add(x.lo + x.hi);
+}
+
+SIMD_CALL float normalize(float8 x) {
+    return x / length(x);
+}
+SIMD_CALL float normalize(float16 x) {
+    return x / length(x);
+}
+
+#endif // SIMD_FLOAT_EXT
+
+// make "m" ctors for vecs.  This avoids wrapping the type in a struct.
+// vector types are C typedef, and so cannot have member functions.
+// Be careful with initializers = { val }, only sets first element of vector
+// and not all the values.  Use = val; or one of the calls below to be safe.
+
+SIMD_CALL float2 float2m(float x) {
+    return x;
+}
+SIMD_CALL float2 float2m(float x, float y) {
+    return {x,y};
+}
+
+SIMD_CALL float3 float3m(float x) {
+    return x;
+}
+SIMD_CALL float3 float3m(float x, float y, float z) {
+    return {x,y,z};
+}
+SIMD_CALL float3 float3m(float2 v, float z) {
+    float3 r; r.xy = v; r.z = z; return r;
+}
+
+SIMD_CALL float4 float4m(float x) {
+    return x;
+}
+SIMD_CALL float4 float4m(float2 xy, float2 zw) {
+    float4 r; r.xy = xy; r.zw = zw; return r;
+}
+SIMD_CALL float4 float4m(float x, float y, float z, float w = 1.0f) {
+    return {x,y,z,w};
+}
+SIMD_CALL float4 float4m(float3 v, float w = 1.0f) {
+    float4 r; r.xyz = v; r.w = w; return r;
+}
+
+// fast conversions where possible
+SIMD_CALL const float3& as_float3(const float4& m) {
+    return reinterpret_cast<const float3&>(m);
+}
+
+// power series
+macroVectorRepeatFnDecl(float, log)
+macroVectorRepeatFnDecl(float, exp)
+
+// trig
+macroVectorRepeatFnDecl(float, cos)
+macroVectorRepeatFnDecl(float, sin)
+macroVectorRepeatFnDecl(float, tan)
+
+// pow
+// can xy be <= 0 ?, no will return Nan in log/exp approx
+SIMD_CALL float2 pow(float2 x, float2 y) {
+    return exp(log(x) * y);
+}
+SIMD_CALL float3 pow(float3 x, float3 y) {
+    return exp(log(x) * y);
+}
+SIMD_CALL float4 pow(float4 x, float4 y) {
+    return exp(log(x) * y);
+}
+
+// TODO: add more math ops
+
+// TODO: better way to name these, can there be float2::zero()
+// also could maybe use that for fake vector ctors.
+
+const float2& float2_zero();
+const float2& float2_ones();
+
+const float2& float2_posx();
+const float2& float2_posy();
+const float2& float2_negx();
+const float2& float2_negy();
+
+//----
+
+const float3& float3_zero();
+const float3& float3_ones();
+
+const float3& float3_posx();
+const float3& float3_posy();
+const float3& float3_posz();
+const float3& float3_negx();
+const float3& float3_negy();
+const float3& float3_negz();
+
+//----
+
+const float4& float4_zero();
+const float4& float4_ones();
+
+const float4& float4_posx();
+const float4& float4_posy();
+const float4& float4_posz();
+const float4& float4_posw();
+const float4& float4_negx();
+const float4& float4_negy();
+const float4& float4_negz();
+const float4& float4_negw();
+
+const float4& float4_posxw();
+const float4& float4_posyw();
+const float4& float4_poszw();
+const float4& float4_negxw();
+const float4& float4_negyw();
+const float4& float4_negzw();
+
+//-----------------------------------
+// matrix
+
+// column matrix, so postmul vectors
+// (projToCamera * cameraToWorld * worldToModel) * modelVec
+
+struct float2x2 : float2x2s
+{
+    // can be split out to traits
+    static constexpr int col = 2;
+    static constexpr int row = 2;
+    using column_t = float2;
+    using scalar_t = float;
+    
+    static const float2x2& zero();
+    static const float2x2& identity();
+    
+    float2x2() { }  // default uninit
+    explicit float2x2(float2 diag);
+    float2x2(float2 c0, float2 c1)
+    : float2x2s((float2x2s){c0, c1}) { }
+    float2x2(const float2x2s& m)
+    : float2x2s(m) { }
+    
+    // simd lacks these ops
+    float2& operator[](int idx) { return columns[idx]; }
+    const float2& operator[](int idx) const { return columns[idx]; }
+};
+
+struct float3x3 : float3x3s
+{
+    static constexpr int col = 3;
+    static constexpr int row = 3;
+    using column_t = float3;
+    using scalar_t = float;
+    
+    // Done as wordy c funcs otherwize.  Funcs allow statics to init.
+    static const float3x3& zero();
+    static const float3x3& identity();
+    
+    float3x3() { }  // default uninit
+    explicit float3x3(float3 diag);
+    float3x3(float3 c0, float3 c1, float3 c2)
+    : float3x3s((float3x3s){c0, c1, c2}) { }
+    float3x3(const float3x3s& m)
+    : float3x3s(m) { }
+    
+    float3& operator[](int idx) { return columns[idx]; }
+    const float3& operator[](int idx) const { return columns[idx]; }
+};
+
+// This is mostly a transposed holder for a 4x4, so very few ops defined
+// Can also serve as a SOA for some types of cpu math.
+struct float3x4 : float3x4s
+{
+    static constexpr int col = 3;
+    static constexpr int row = 4;
+    using column_t = float4;
+    using scalar_t = float;
+    
+    static const float3x4& zero();
+    static const float3x4& identity();
+    
+    float3x4() { } // default uninit
+    explicit float3x4(float3 diag);
+    float3x4(float4 c0, float4 c1, float4 c2)
+    : float3x4s((float3x4s){c0, c1, c2}) { }
+    float3x4(const float3x4s& m)
+    : float3x4s(m) { }
+    
+    float4& operator[](int idx) { return columns[idx]; }
+    const float4& operator[](int idx) const { return columns[idx]; }
+};
+
+struct float4x4 : float4x4s
+{
+    static constexpr int col = 4;
+    static constexpr int row = 4;
+    using column_t = float4;
+    using scalar_t = float;
+    
+    static const float4x4& zero();
+    static const float4x4& identity();
+    
+    float4x4() { } // default uninit
+    explicit float4x4(float4 diag);
+    float4x4(float4 c0, float4 c1, float4 c2, float4 c3)
+    : float4x4s((float4x4s){c0, c1, c2, c3}) { }
+    float4x4(const float4x4s& m)
+    : float4x4s(m) { }
+    
+    float4& operator[](int idx) { return columns[idx]; }
+    const float4& operator[](int idx) const { return columns[idx]; }
+};
+
+// transposes to convert between matrix type
+float4x4 float4x4m(const float3x4& m);
+float3x4 float3x4m(const float4x4& m);
+
+// set diangonal and rest to 0
+float2x2 diagonal_matrix(float2 x);
+float3x3 diagonal_matrix(float3 x);
+float3x4 diagonal_matrix3x4(float3 x);
+float4x4 diagonal_matrix(float4 x);
+
+// using refs here, 3x3 and 4x4 are large to pass by value (3 simd regs)
+float2x2 transpose(const float2x2& x);
+float3x3 transpose(const float3x3& x);
+float4x4 transpose(const float4x4& x);
+
+// general inverses - faster ones for trs
+float2x2 inverse(const float2x2& x);
+float3x3 inverse(const float3x3& x);
+float4x4 inverse(const float4x4& x);
+
+float determinant(const float2x2& x);
+float determinant(const float3x3& x);
+float determinant(const float4x4& x);
+
+// diagonal sum
+float trace(const float2x2& x);
+float trace(const float3x3& x);
+float trace(const float4x4& x);
+
+// m * m
+float2x2 mul(const float2x2& x, const float2x2& y);
+float3x3 mul(const float3x3& x, const float3x3& y);
+float4x4 mul(const float4x4& x, const float4x4& y);
+
+// vrow * m - premul = dot + premul
+float2 mul(float2 y, const float2x2& x);
+float3 mul(float3 y, const float3x3& x);
+float4 mul(float4 y, const float4x4& x);
+
+// m * vcol - postmul = mul + mad (prefer this)
+float2 mul(const float2x2& x, float2 y);
+float3 mul(const float3x3& x, float3 y);
+float4 mul(const float4x4& x, float4 y);
+
+// sub
+float2x2 sub(const float2x2& x, const float2x2& y);
+float3x3 sub(const float3x3& x, const float3x3& y);
+float4x4 sub(const float4x4& x, const float4x4& y);
+
+// add
+float2x2 add(const float2x2& x, const float2x2& y);
+float3x3 add(const float3x3& x, const float3x3& y);
+float4x4 add(const float4x4& x, const float4x4& y);
+
+// equal
+bool equal(const float2x2& x, const float2x2& y);
+bool equal(const float3x3& x, const float3x3& y);
+bool equal(const float4x4& x, const float4x4& y);
+
+// equal_abs
+bool equal_abs(const float2x2& x, const float2x2& y, float tol);
+bool equal_abs(const float3x3& x, const float3x3& y, float tol);
+bool equal_abs(const float4x4& x, const float4x4& y, float tol);
+
+// equal_rel
+bool equal_rel(const float2x2& x, const float2x2& y, float tol);
+bool equal_rel(const float3x3& x, const float3x3& y, float tol);
+bool equal_rel(const float4x4& x, const float4x4& y, float tol);
+
+// TODO: these think they are all member functions
+
+// operators for C++
+macroMatrixOps(float2x2);
+macroMatrixOps(float3x3);
+// TODO: no mat ops on storage type float3x4
+// macroMatrixOps(float3x4s);
+macroMatrixOps(float4x4);
+
+// fast conversions where possible
+SIMD_CALL const float3x3& as_float3x3(const float4x4& m) {
+    return reinterpret_cast<const float3x3&>(m);
+}
+
+//-----------------------
+// quat
+
+// Only need a fp32 quat.  double/half are pretty worthless.
+struct quatf {
+    // TODO: should all ctor be SIMD_CALL ?
+    quatf() : v{0.0f,0.0f,0.0f,1.0f} {}
+    quatf(float x, float y, float z, float w) : v{x,y,z,w} {}
+    quatf(float3 vv, float angle);
+    explicit quatf(float4 vv): v(vv) {}
+    
+    static const quatf& zero();
+    static const quatf& identity();
+    
+    float4 v;
+};
+
+SIMD_CALL float3 operator*(quatf q, float3 v) {
+    float4 qv = q.v;
+    float3 t = qv.w * cross(qv.xyz, v);
+    return v + 2.0f * t + cross(q.v.xyz, t);
+}
+
+float4x4 float4x4m(quatf q);
+
+// how many quatf ops are needed?
+// TODO: need matrix into quatf
+// TDOO: need shortest arc correction (dot(q0.v, q1.v) < 0) negate
+// TODO: need negate (or conjuagate?)
+// TODO: what about math ops
+
+SIMD_CALL quatf lerp(quatf q0, quatf q1, float t) {
+    if (dot(q0.v, q1.v) < 0.0f)
+        q1.v.xyz = -q1.v.xyz;
+    
+    float4 v = lerp(q0.v, q1.v, t);
+    return quatf(v);
+}
+quatf slerp(quatf q0, quatf q1, float t);
+
+void quat_bezier_cp(quatf q0, quatf q1, quatf q2, quatf q3,
+                    quatf& a1, quatf& b2);
+quatf quat_bezer_lerp(quatf a, quatf b, quatf c, quatf d, float t);
+quatf quat_bezer_slerp(quatf a, quatf b, quatf c, quatf d, float t);
+
+quatf inverse(quatf q);
+
+SIMD_CALL quatf normalize(quatf q) {
+    return quatf(normalize(q.v));
+}
+
+//----------------
+// affine and convenience ctors
+
+// in-place affine transose
+void transpose_affine(float4x4& m);
+
+// fast inverses for translate, rotate, scale
+float4x4 inverse_tr(const float4x4& mtx);
+float4x4 inverse_tru(const float4x4& mtx);
+float4x4 inverse_trs(const float4x4& mtx);
+
+float4x4 float4x4m(char axis, float angleInRadians);
+
+SIMD_CALL float4x4 float4x4m(float3 axis, float angleInRadians) {
+    return float4x4m(quatf(axis, angleInRadians));
+}
+
+float3x3 float3x3m(quatf qq);
+
+float4x4 float4x4_tr(float3 t, quatf r);
+float4x4 float4x4_trs(float3 t, quatf r, float3 scale);
+float4x4 float4x4_tru(float3 t, quatf r, float scale);
+
+} // SIMD_NAMESPACE
+
+#endif // __cplusplus
+
+#endif // USE_SIMDLIB && SIMD_FLOAT
diff --git a/libkram/vectormath/half234.h b/libkram/vectormath/half234.h
new file mode 100644
index 00000000..9ee6f560
--- /dev/null
+++ b/libkram/vectormath/half234.h
@@ -0,0 +1,93 @@
+// kram - Copyright 2020-2024 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
+
+#pragma once
+
+// This is not yet standalone.  vectormath++.h includes it.
+#if USE_SIMDLIB && SIMD_HALF
+
+// Android doesn't really have _Float16, so would need a u/int16_t mapped placeholder
+// The not identifier means its a system type.
+#if !__is_identifier(_Float16)
+#define SIMD_HALF_FLOAT16 1
+#else
+#define SIMD_HALF_FLOAT16 0
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+#if SIMD_HALF_FLOAT16
+typedef _Float16 half;
+#else
+// This won't work with the operators.  Consider _fp16 storage type which does all math in fp32.
+// But even Android doesn't support that with +fp16.
+// TODO: use half struct here that can do math slowly (prob in fp32x4, then convert back)
+typedef short half;
+#endif // SIMD_HALF_FLOAT16
+
+// This means math and conversions don't work, so have to use simd ops
+//#define SIMD_HALF4_ONLY !SIMD_HALF_FLOAT16
+
+// Half isn't something that should have math ops yet.  Just useful as packed type.
+// This does math, but really needs _Float16 to work properly for the operators.
+// That's not available on Android devices like it should be, but the Neon
+// fp16x4 <-> fp32x4 conversions are.
+
+// define c vector types
+macroVector2TypesStorage(half, half)
+macroVector2TypesPacked(half, half)
+
+// No matrix type defined right now.
+
+// glue to Accelerate
+#if SIMD_RENAME_TO_SIMD_NAMESPACE
+macroVector8TypesStorageRenames(half, simd_half)
+#endif
+
+#ifdef __cplusplus
+}
+#endif // __cplusplus
+
+#ifdef __cplusplus
+namespace SIMD_NAMESPACE {
+
+macroVector2TypesStorageRenames(half, half)
+
+SIMD_CALL half2 half2m(half x) {
+    return x;
+}
+SIMD_CALL half2 half2m(half x, half y) {
+    return {x,y};
+}
+
+SIMD_CALL half3 half3m(half x) {
+    return x;
+}
+SIMD_CALL half3 half3m(half x, half y, half z) {
+    return {x,y,z};
+}
+SIMD_CALL half3 half3m(half2 v, half z) {
+    half3 r; r.xy = v; r.z = z; return r;
+}
+
+SIMD_CALL half4 half4m(half x) {
+    return x;
+}
+SIMD_CALL half4 half4m(half2 xy, half2 zw) {
+    half4 r; r.xy = xy; r.zw = zw; return r;
+}
+SIMD_CALL half4 half4m(half x, half y, half z, half w = (half)1.0) {
+    return {x,y,z,w};
+}
+SIMD_CALL half4 half4m(half3 v, float w = (half)1.0) {
+    half4 r; r.xyz = v; r.w = w; return r;
+}
+
+
+}
+#endif // __cplusplus
+#endif // USE_SIMDLIB && SIMD_HALF
+
diff --git a/libkram/vectormath/int234.h b/libkram/vectormath/int234.h
new file mode 100644
index 00000000..af1906cd
--- /dev/null
+++ b/libkram/vectormath/int234.h
@@ -0,0 +1,121 @@
+// kram - Copyright 2020-2024 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
+
+#pragma once
+
+// This is not yet standalone.  vectormath++.h includes it.
+#if USE_SIMDLIB && SIMD_INT
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// define c vector types
+// Apple uses int type here (32-bit) instead of int32_t
+macroVector4TypesStorage(int, int)
+macroVector4TypesPacked(int, int)
+
+#if SIMD_RENAME_TO_SIMD_NAMESPACE
+macroVector4TypesStorageRenames(int, simd_int)
+#endif // SIMD_RENAME_TO_SIMD_NAMESPACE
+
+#ifdef __cplusplus
+}
+#endif // __cplusplus
+
+//--------------------
+
+#ifdef __cplusplus
+namespace SIMD_NAMESPACE {
+
+macroVector4TypesStorageRenames(int, int)
+
+//-----------------------------------
+// imlementation - only code simd arch specific
+
+#if SIMD_NEON
+
+SIMD_CALL bool any(int2 x) {
+    return vmaxv_u32(x) & 0x80000000;
+}
+SIMD_CALL bool any(int4 x) {
+    return vmaxvq_u32(x) & 0x80000000;
+}
+
+SIMD_CALL bool all(int2 x) {
+    return vminv_u32(x) & 0x80000000;
+}
+SIMD_CALL bool all(int4 x) {
+    return vminvq_u32(x) & 0x80000000;
+}
+
+#endif // SIMD_NEON
+
+// These take in int types, this is what comparison gens from a < b, etc.
+#if SIMD_SSE
+
+SIMD_CALL bool any(int2 x) {
+    return _mm_movemask_ps(vec2to4(x)) & 0x3;
+}
+SIMD_CALL bool any(int4 x) {
+    return _mm_movemask_ps((__m128)x);
+}
+
+SIMD_CALL bool all(int2 x) {
+    return (_mm_movemask_ps(vec2to4(x)) & 0x3) == 0x3; // 2 bits
+}
+SIMD_CALL bool all(int4 x) {
+    return _mm_movemask_ps((__m128)x) == 0xf; // 4 bits
+}
+#endif // SIMD_SSE
+       
+
+// end of implementation
+//-----------------------------------
+
+// bitselect
+SIMD_CALL int2 bitselect(int2 x, int2 y, int2 mask) {
+    return (x & ~mask) | (y & mask);
+}
+SIMD_CALL int3 bitselect(int3 x, int3 y, int3 mask) {
+    return (x & ~mask) | (y & mask);
+}
+SIMD_CALL int4 bitselect(int4 x, int4 y, int4 mask) {
+    return (x & ~mask) | (y & mask);
+}
+
+SIMD_CALL int2 int2m(int x) {
+    return x;
+}
+SIMD_CALL int2 int2m(int x, int y) {
+    return {x,y};
+}
+
+SIMD_CALL int3 int3m(int x) {
+    return x;
+}
+SIMD_CALL int3 int3m(int x, int y, int z) {
+    return {x,y,z};
+}
+SIMD_CALL int3 int3m(int2 v, float z) {
+    int3 r; r.xy = v; r.z = z; return r;
+}
+
+
+SIMD_CALL int4 int4m(int x) {
+    return x;
+}
+SIMD_CALL int4 int4m(int2 xy, int2 zw) {
+    int4 r; r.xy = xy; r.zw = zw; return r;
+}
+SIMD_CALL int4 int4m(int x, int y, int z, int w) {
+    return {x,y,z,w};
+}
+SIMD_CALL int4 int4m(int3 v, float w) {
+    int4 r; r.xyz = v; r.w = w; return r;
+}
+
+}
+#endif // __cplusplus
+#endif // USE_SIMDLIB && SIMD_INT
diff --git a/libkram/vectormath/vectormath++.cpp b/libkram/vectormath/vectormath++.cpp
index 90cede59..55271874 100644
--- a/libkram/vectormath/vectormath++.cpp
+++ b/libkram/vectormath/vectormath++.cpp
@@ -868,9 +868,9 @@ string vecf::simd_configs() const {
     FMT_CONFIG(SIMD_FLOAT_EXT);
     FMT_CONFIG(SIMD_RENAME_TO_SIMD_NAMESPACE);
     FMT_CONFIG(SIMD_HALF_FLOAT16);
-#if SIMD_HALF
-    FMT_CONFIG(SIMD_HALF4_ONLY);
-#endif
+//#if SIMD_HALF
+//    FMT_CONFIG(SIMD_HALF4_ONLY);
+//#endif
     FMT_CONFIG(SIMD_ACCELERATE_MATH);
 #if SIMD_ACCELERATE_MATH
     FMT_CONFIG(SIMD_LIBRARY_VERSION);
@@ -946,57 +946,57 @@ string vecf::simd_alignments() const {
 
 //---------------
 
-#if SIMD_HALF4_ONLY
- 
-#if SIMD_NEON
-
-float4 float4m(half4 vv)
-{
-    return float4(vcvt_f32_f16(*(const float16x4_t*)&vv));
-}
-half4 half4m(float4 vv)
-{
-    return half4(vcvt_f16_f32(*(const float32x4_t*)&vv));
-}
-
-#endif // SIMD_NEON
-
-#if SIMD_SSE
-
-float4 float4m(half4 vv)
-{
-    // https://patchwork.ozlabs.org/project/gcc/patch/559BC75A.1080606@arm.com/
-    // https://gcc.gnu.org/onlinedocs/gcc-7.5.0/gcc/Half-Precision.html
-    // https://developer.arm.com/documentation/dui0491/i/Using-NEON-Support/Converting-vectors
-    __m128i reg16 = _mm_setzero_si128();
-    
-    // TODO: switch to load low 64-bits, but don't know which one _mm_cvtsi32_si128(&vv.reg); ?
-    // want 0 extend here, sse overuses int32_t when really unsigned and zero extended value
-    reg16 = _mm_insert_epi16(reg16, vv[0], 0);
-    reg16 = _mm_insert_epi16(reg16, vv[1], 1);
-    reg16 = _mm_insert_epi16(reg16, vv[2], 2);
-    reg16 = _mm_insert_epi16(reg16, vv[3], 3);
-    
-    return simd::float4(_mm_cvtph_ps(reg16));
-}
-
-half4 half4m(float4 vv)
-{
-    __m128i reg16 = _mm_cvtps_ph(*(const __m128*)&vv, 0);  // 4xfp32-> 4xfp16,  round to nearest-even
-    
-    // TODO: switch to store/steam, but don't know which one _mm_storeu_epi16 ?
-    half4 val;  // = 0;
-    
-    // 0 extended
-    val[0] = (half)_mm_extract_epi16(reg16, 0);
-    val[1] = (half)_mm_extract_epi16(reg16, 1);
-    val[2] = (half)_mm_extract_epi16(reg16, 2);
-    val[3] = (half)_mm_extract_epi16(reg16, 3);
-    return val;
-}
-
-#endif // SIMD_SSE
-#endif // SIMD_HALF4_ONLY
+//#if SIMD_HALF4_ONLY
+// 
+//#if SIMD_NEON
+//
+//float4 float4m(half4 vv)
+//{
+//    return float4(vcvt_f32_f16(*(const float16x4_t*)&vv));
+//}
+//half4 half4m(float4 vv)
+//{
+//    return half4(vcvt_f16_f32(*(const float32x4_t*)&vv));
+//}
+//
+//#endif // SIMD_NEON
+//
+//#if SIMD_SSE
+//
+//float4 float4m(half4 vv)
+//{
+//    // https://patchwork.ozlabs.org/project/gcc/patch/559BC75A.1080606@arm.com/
+//    // https://gcc.gnu.org/onlinedocs/gcc-7.5.0/gcc/Half-Precision.html
+//    // https://developer.arm.com/documentation/dui0491/i/Using-NEON-Support/Converting-vectors
+//    __m128i reg16 = _mm_setzero_si128();
+//    
+//    // TODO: switch to load low 64-bits, but don't know which one _mm_cvtsi32_si128(&vv.reg); ?
+//    // want 0 extend here, sse overuses int32_t when really unsigned and zero extended value
+//    reg16 = _mm_insert_epi16(reg16, vv[0], 0);
+//    reg16 = _mm_insert_epi16(reg16, vv[1], 1);
+//    reg16 = _mm_insert_epi16(reg16, vv[2], 2);
+//    reg16 = _mm_insert_epi16(reg16, vv[3], 3);
+//    
+//    return simd::float4(_mm_cvtph_ps(reg16));
+//}
+//
+//half4 half4m(float4 vv)
+//{
+//    __m128i reg16 = _mm_cvtps_ph(*(const __m128*)&vv, 0);  // 4xfp32-> 4xfp16,  round to nearest-even
+//    
+//    // TODO: switch to store/steam, but don't know which one _mm_storeu_epi16 ?
+//    half4 val;  // = 0;
+//    
+//    // 0 extended
+//    val[0] = (half)_mm_extract_epi16(reg16, 0);
+//    val[1] = (half)_mm_extract_epi16(reg16, 1);
+//    val[2] = (half)_mm_extract_epi16(reg16, 2);
+//    val[3] = (half)_mm_extract_epi16(reg16, 3);
+//    return val;
+//}
+//
+//#endif // SIMD_SSE
+//#endif // SIMD_HALF4_ONLY
 
 #if SIMD_FLOAT
 
diff --git a/libkram/vectormath/vectormath++.h b/libkram/vectormath/vectormath++.h
index 12ab0569..32c71a67 100644
--- a/libkram/vectormath/vectormath++.h
+++ b/libkram/vectormath/vectormath++.h
@@ -115,6 +115,7 @@
 //
 
 //-----------------------------------
+// config
 
 // Can override the namespace to your own.  This avoids collision with Apple simd.
 #ifndef SIMD_NAMESPACE
@@ -157,18 +158,6 @@
 
 #endif // SIMD_CONFIG
 
-#if SIMD_HALF
-
-// Android doesn't really have _Float16, so would need a u/int16_t mapped placeholder
-// The not identifier means its a system type.
-#if !__is_identifier(_Float16)
-#define SIMD_HALF_FLOAT16 1
-#else
-#define SIMD_HALF_FLOAT16 0
-#endif
-
-#endif // SIMD_HALF
-
 //-----------------------------------
 
 // simplify calls
@@ -222,7 +211,6 @@ typedef ::cname##32s cppname##32; \
 
 //------------
 
-
 // aligned
 #define macroVector2TypesStorage(type, name) \
 typedef type name##1s; \
@@ -321,20 +309,14 @@ SIMD_CALL type operator*(const type& x, const type& y) { return mul(x,y); } \
 SIMD_CALL type::column_t operator*(const type::column_t& v, const type& y) { return mul(v,y); } \
 SIMD_CALL type::column_t operator*(const type& x, const type::column_t& v) { return mul(x,v); } \
 
-//-----------------------------------
-
-// for u/int8_t, u/int32_t, u/int64_t etc
-// using the built-in types now
-//#include <inttypes.h>
-
-//------------
-// define count and alignment of core types
+//---------------------------
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 //----------
+// define count and alignment of core types
 
 #if SIMD_CHAR
 
@@ -342,125 +324,38 @@ extern "C" {
 macroVector1TypesStorage(char, char)
 macroVector1TypesPacked(char, char)
 
-#endif
+#if SIMD_RENAME_TO_SIMD_NAMESPACE
+macroVector1TypesStorageRenames(char, simd_char)
+#endif // SIMD_RENAME_TO_SIMD_NAMESPACE
+
+#endif // SIMD_CHAR
 
+//------------
 #if SIMD_SHORT
 
 // define c vector types
 macroVector2TypesStorage(short, short)
 macroVector2TypesPacked(short, short)
 
-#endif
-
-#if SIMD_INT
-
-// define c vector types
-// Apple uses int type here (32-bit) instead of int32_t
-macroVector4TypesStorage(int, int)
-macroVector4TypesPacked(int, int)
+#if SIMD_RENAME_TO_SIMD_NAMESPACE
+macroVector2TypesStorageRenames(short, simd_short)
+#endif // SIMD_RENAME_TO_SIMD_NAMESPACE
 
-#endif // SIMD_INT
+#endif // SIMD_SHORT
 
+//------------
 #if SIMD_LONG
 
 // define c vector types
 macroVector8TypesStorage(long, long)
 macroVector8TypesPacked(long, long)
 
-#endif
-
-#if SIMD_HALF
-
-#if SIMD_HALF_FLOAT16
-typedef _Float16 half;
-#else
-// This won't work with the operators.  Consider _fp16 storage type which does all math in fp32.
-// But even Android doesn't support that with +fp16.
-// TODO: use half struct here that can do math slowly (prob in fp32x4, then convert back)
-typedef short half;
-#endif // SIMD_HALF_FLOAT16
-
-// This means math and conversions don't work, so have to use simd ops
-#define SIMD_HALF4_ONLY !SIMD_HALF_FLOAT16
-
-// Half isn't something that should have math ops yet.  Just useful as packed type.
-// This does math, but really needs _Float16 to work properly for the operators.
-// That's not available on Android devices like it should be, but the Neon
-// fp16x4 <-> fp32x4 conversions are.
-
-// define c vector types
-macroVector2TypesStorage(half, half)
-macroVector2TypesPacked(half, half)
-
-// No matrix type defined right now.
-
-#endif // SIMD_HALF
-
-#if SIMD_FLOAT
-
-// define c++ vector/matrix types
-macroVector4TypesStorage(float, float)
-macroVector4TypesPacked(float, float)
-
-// storage type for matrix
-typedef struct { float2s columns[2]; } float2x2s;
-typedef struct { float3s columns[3]; } float3x3s;
-typedef struct { float4s columns[3]; } float3x4s;
-typedef struct { float4s columns[4]; } float4x4s;
-
-#endif // SIMD_FLOAT
-
-#if SIMD_DOUBLE
-
-// define c vector/matrix types
-macroVector8TypesStorage(double, double)
-macroVector8TypesPacked(double, double)
-
-// storage type for matrix
-typedef struct { double2s columns[2]; } double2x2s;
-typedef struct { double3s columns[3]; } double3x3s;
-typedef struct { double4s columns[3]; } double3x4s;
-typedef struct { double4s columns[4]; } double4x4s;
-
-#endif // SIMD_DOUBLE
-
-//----------
-
-// This means simd_float4 will come from this file instead of simd.h
-// c typedef rename to simd_ namespace.
 #if SIMD_RENAME_TO_SIMD_NAMESPACE
-
-#if SIMD_CHAR
-macroVector1TypesStorageRenames(char, simd_char)
-#endif // SIMD_CHAR
-
-#if SIMD_SHORT
-macroVector2TypesStorageRenames(short, simd_short)
-#endif // SIMD_SHORT
-
-#if SIMD_INT
-macroVector4TypesStorageRenames(int, simd_int)
-#endif // SIMD_INT
-
-#if SIMD_LONG
 macroVector8TypesStorageRenames(long, simd_long)
-#endif // SIMD_INT
-
-
-#if SIMD_HALF
-macroVector2TypesStorageRenames(half, simd_half)
-#endif // SIMD_HALF
-                     
-#if SIMD_FLOAT
-macroVector4TypesStorageRenames(float, simd_float)
-#endif // SIMD_FLOAT
-
-#if SIMD_DOUBLE
-macroVector8TypesStorageRenames(double, simd_double)
-#endif // SIMD_DOUBLE
-
 #endif // SIMD_RENAME_TO_SIMD_NAMESPACE
 
+#endif // SIMD_LONG
+
 #ifdef __cplusplus
 }
 #endif
@@ -468,21 +363,32 @@ macroVector8TypesStorageRenames(double, simd_double)
 //-----------------------------------
 // imlementation - only code simd arch specific
 
-// Could have option to bury impls as function.  That would remove all simd headers from code,
-// but would prevent inlining.  So these could do optional inlining.
+// Could optionally bury impl and headers, but then couldn't inline.
 
 #include <math.h> // for sqrt
 
 #if SIMD_NEON
-// neon types and intrinsics
+// neon types and intrinsics, 16B
 #include <arm_neon.h>
-// This converts sse to neon intrinsics, only really have 16B Neon with SVE2 on horizon
+// This converts sse to neon intrinsics
 #include "sse2neon-arm64.h"
 #else
 // SSE intrinsics up to AVX-512, but depends on -march avx2 -mf16c -fma
 #include <immintrin.h>
 #endif // SIMD_NEON
 
+// using macros here cuts the ifdefs a lot
+#define vec2to4(x) (x).xyyy
+#define vec3to4(x) (x).xyzz
+#define vec4to2(x) (x).xy
+#define vec4to3(x) (x).xyz
+
+// moved vec/matrix ops into secondary headers
+#include "int234.h"
+#include "half234.h"
+#include "float234.h"
+#include "double234.h"
+
 //-------------------
 // This is for C++ only
 #ifdef __cplusplus
@@ -500,643 +406,16 @@ macroVector4TypesStorageRenames(char, char)
 macroVector4TypesStorageRenames(short, short)
 #endif
 
-#if SIMD_INT
-macroVector4TypesStorageRenames(int, int)
-#endif
-
 #if SIMD_LONG
 macroVector8TypesStorageRenames(long, long)
 #endif
 
-
-#if SIMD_HALF
-macroVector2TypesStorageRenames(half, half)
-#endif
-
-#if SIMD_FLOAT
-macroVector4TypesStorageRenames(float, float)
-#endif
-
-#if SIMD_DOUBLE
-macroVector8TypesStorageRenames(double, double)
-#endif
-
-// using macros here cuts the ifdefs a lot
-#define vec2to4(x) (x).xyyy
-#define vec3to4(x) (x).xyzz
-#define vec4to2(x) (x).xy
-#define vec4to3(x) (x).xyz
-
-// Note there is _mm_sqrt_ss() instead of this math op to keep in registers
-#define sqrt_scalar(x) sqrtf(x)
-
-#if SIMD_FLOAT && SIMD_NEON
-
-// TODO: expose float2 ops on Neon.
-
-SIMD_CALL float reduce_min(float4 x) {
-    return vminvq_f32(x);
-}
-
-SIMD_CALL float reduce_max(float4 x) {
-    return vmaxvq_f32(x);
-}
-
-SIMD_CALL float4 min(float4 x, float4 y) {
-    // precise returns x on Nan
-    return vminnmq_f32(x, y);
-}
-
-SIMD_CALL float4 max(float4 x, float4 y) {
-    // precise returns x on Nan
-    return vmaxnmq_f32(x, y);
-}
-
-SIMD_CALL float4 muladd(float4 x, float4 y, float4 t) {
-    // requires __ARM_VFPV4__
-    // t passed first unlike sse
-    return vfmaq_f32(t, x,y);
-}
-
-SIMD_CALL float4 sqrt(float4 x) {
-    return vsqrtq_f32(x);
-}
-
-// use sse2neon to port this for now
-SIMD_CALL float4 reduce_addv(float4 x) {
-    // 4:1 reduction
-    x = _mm_hadd_ps(x, x); // xy = x+y,z+w
-    x = _mm_hadd_ps(x, x); // x  = x+y
-    return x.x; // repeat x to all values
-}
-
-SIMD_CALL float reduce_add(float4 x) {
-    return reduce_addv(x).x;
-}
-
-
-#endif // SIMD_FLOAT && SIMD_NEON
-
-#if SIMD_FLOAT && SIMD_INT && SIMD_SSE
-
-// x64 doesn't seem to have a simd op for min/max reduce
-SIMD_CALL float reduce_min(float4 x) {
-    return fmin(fmin(x.x,x.y), fmin(x.z,x.w));
-}
-
-SIMD_CALL float reduce_max(float4 x) {
-    return fmax(fmax(x.x,x.y), fmax(x.z,x.w));
-}
-
-// needs SIMD_INT
-// needed for precise min/max calls below
-SIMD_CALL float4 bitselect_forminmax(float4 x, float4 y, int4 mask) {
-    return (float4)(((int4)x & ~mask) | ((int4)y & mask));
-}
-
-SIMD_CALL float4 min(float4 x, float4 y) {
-    // precise returns x on Nan
-    return bitselect_forminmax(_mm_min_ps(x, y), x, y != y);
-}
-
-SIMD_CALL float4 max(float4 x, float4 y) {
-    // precise returns x on Nan
-    return bitselect_forminmax(_mm_max_ps(x, y), x, y != y);
-}
-
-SIMD_CALL float4 muladd(float4 x, float4 y, float4 t) {
-    // can't get Xcode to set -mfma with AVX2 set
-#ifdef __FMA__
-    return _mm_fmadd_ps(x,y,t);
-#else
-    // fallback with not same characteristics
-    return x * y + t;
-#endif
-}
-
-SIMD_CALL float4 sqrt(float4 x) {
-    return _mm_sqrt_ps(x);
-}
-
-SIMD_CALL float4 reduce_addv(float4 x) {
-    // 4:1 reduction
-    x = _mm_hadd_ps(x, x); // xy = x+y,z+w
-    x = _mm_hadd_ps(x, x); // x  = x+y
-    return x.x; // repeat x to all values
-}
-
-SIMD_CALL float reduce_add(float4 x) {
-    return reduce_addv(x).x;
-}
-
-
-#endif // SIMD_FLOAT && SIMD_INT && SIMD_SSE
-
-
-// These take in int types, this is what comparison gens from a < b, etc.
-#if SIMD_INT && SIMD_SSE
-
-SIMD_CALL bool any(int2 x) {
-    return _mm_movemask_ps(vec2to4(x)) & 0x3;
-}
-SIMD_CALL bool any(int4 x) {
-    return _mm_movemask_ps((__m128)x);
-}
-
-SIMD_CALL bool all(int2 x) {
-    return (_mm_movemask_ps(vec2to4(x)) & 0x3) == 0x3; // 2 bits
-}
-SIMD_CALL bool all(int4 x) {
-    return _mm_movemask_ps((__m128)x) == 0xf; // 4 bits
-}
-#endif // SIMD_INT && SIMD_SSE
-
-#if SIMD_INT && SIMD_NEON
-
-SIMD_CALL bool any(int2 x) {
-    return vmaxv_u32(x) & 0x80000000;
-}
-SIMD_CALL bool any(int4 x) {
-    return vmaxvq_u32(x) & 0x80000000;
-}
-
-SIMD_CALL bool all(int2 x) {
-    return vminv_u32(x) & 0x80000000;
-}
-SIMD_CALL bool all(int4 x) {
-    return vminvq_u32(x) & 0x80000000;
-}
-
-#endif // SIMD_INT && SIMD_NEON
-
 #if SIMD_FLOAT
 
-// SSE4.1
-SIMD_CALL float4 round(float4 vv) {
-    return _mm_round_ps(vv, 0x8);  // round to nearest | exc
-}
-SIMD_CALL float4 ceil(float4 vv) {
-    return _mm_ceil_ps(vv);
-}
-SIMD_CALL float4 floor(float4 vv) {
-    return _mm_floor_ps(vv);
-}
-
-#endif // SIMD_FLOAT
-
-
-// end of implementation
 //-----------------------------------
-
-// bit select
-#if SIMD_INT
-
-// bitselect
-SIMD_CALL int2 bitselect(int2 x, int2 y, int2 mask) {
-    return (x & ~mask) | (y & mask);
-}
-SIMD_CALL int3 bitselect(int3 x, int3 y, int3 mask) {
-    return (x & ~mask) | (y & mask);
-}
-SIMD_CALL int4 bitselect(int4 x, int4 y, int4 mask) {
-    return (x & ~mask) | (y & mask);
-}
-#endif // SIMD_INT
-
-#if SIMD_FLOAT
-#if SIMD_INT // && SIMD_FLOAT
-
-// bitselect
-SIMD_CALL float2 bitselect(float2 x, float2 y, int2 mask) {
-    return (float2)bitselect((int2)x, (int2)y, mask);
-}
-SIMD_CALL float3 bitselect(float3 x, float3 y, int3 mask) {
-    return (float3)bitselect((int3)x, (int3)y, mask);
-}
-SIMD_CALL float4 bitselect(float4 x, float4 y, int4 mask) {
-    return (float4)bitselect((int4)x, (int4)y, mask);
-}
-
-// select
-SIMD_CALL float2 select(float2 x, float2 y, int2 mask) {
-    return bitselect(x, y, mask >> 31);
-}
-SIMD_CALL float3 select(float3 x, float3 y, int3 mask) {
-    return bitselect(x, y, mask >> 31);
-}
-SIMD_CALL float4 select(float4 x, float4 y, int4 mask) {
-    return bitselect(x, y, mask >> 31);
-}
-
-#endif // SIMD_INT // && SIMD_FLOAT
-
-// zeroext - internal helper
-SIMD_CALL float4 zeroext(float2 x) {
-    return (float4){x.x,x.y,0,0};
-}
-SIMD_CALL float4 zeroext(float3 x) {
-    return (float4){x.x,x.y,x.z,0};
-}
-
-// any
-SIMD_CALL bool any(int3 x) {
-    return any(vec3to4(x));
-}
-SIMD_CALL bool all(int3 x) {
-    return all(vec3to4(x));
-}
-
-// min
-SIMD_CALL float2 min(float2 x, float2 y) {
-    return vec4to2(min(vec2to4(x), vec2to4(y)));
-}
-SIMD_CALL float3 min(float3 x, float3 y) {
-    return vec4to3(min(vec3to4(x), vec3to4(y)));
-}
-
-// max
-SIMD_CALL float2 max(float2 x, float2 y) {
-    return vec4to2(max(vec2to4(x), vec2to4(y)));
-}
-SIMD_CALL float3 max(float3 x, float3 y) {
-    return vec4to3(max(vec3to4(x), vec3to4(y)));
-}
-
-// sqrt
-SIMD_CALL float2 sqrt(float2 x) {
-    return vec4to2(sqrt(vec2to4(x)));
-}
-SIMD_CALL float3 sqrt(float3 x) {
-    return vec4to3(sqrt(vec3to4(x)));
-}
-
-// rsqrt
-SIMD_CALL float4 rsqrt(float4 x) {
-    // TODO: fixup near 0 ?
-    // TODO: use _mm_div_ps if / doesn't use
-    return 1.0f/sqrt(x);
-}
-SIMD_CALL float2 rsqrt(float2 x) {
-    return vec4to2(rsqrt(vec2to4(x)));
-}
-SIMD_CALL float3 rsqrt(float3 x) {
-    return vec4to3(rsqrt(vec3to4(x)));
-}
-
-
-// recip
-SIMD_CALL float4 recip(float4 x) {
-    // TODO: fixup near 0 ?
-    // TODO: use _mm_div_ps if / doesn't use
-    return 1.0f/x;
-}
-SIMD_CALL float2 recip(float2 x) {
-    return vec4to2(recip(vec2to4(x)));
-}
-SIMD_CALL float3 recip(float3 x) {
-    return vec4to3(recip(vec3to4(x)));
-}
-
-// reduce_add
-SIMD_CALL float reduce_add(float2 x) {
-    return reduce_add(zeroext(x));
-}
-SIMD_CALL float reduce_add(float3 x) {
-    return reduce_add(zeroext(x));
-}
-
-// reduce_min - arm has float2 op
-SIMD_CALL float reduce_min(float2 x) {
-    return reduce_min(vec2to4(x));
-}
-
-SIMD_CALL float reduce_min(float3 x) {
-    return reduce_min(vec3to4(x));
-}
-
-// reduce_max
-SIMD_CALL float reduce_max(float2 x) {
-    return reduce_max(vec2to4(x));
-}
-
-SIMD_CALL float reduce_max(float3 x) {
-    return reduce_max(vec3to4(x));
-}
-
-// round (to nearest)
-SIMD_CALL float2 round(float2 x) { return vec4to2(round(vec2to4(x))); }
-SIMD_CALL float3 round(float3 x) { return vec4to3(round(vec3to4(x))); }
-
-// ceil
-SIMD_CALL float2 ceil(float2 x) { return vec4to2(ceil(vec2to4(x))); }
-SIMD_CALL float3 ceil(float3 x) { return vec4to3(ceil(vec3to4(x))); }
-
-// floor
-SIMD_CALL float2 floor(float2 x) { return vec4to2(floor(vec2to4(x))); }
-SIMD_CALL float3 floor(float3 x) { return vec4to3(floor(vec3to4(x))); }
-
-// clamp
-// order matters here for Nan, left op returned on precise min/max
-SIMD_CALL float2 clamp(float2 x, float2 minv, float2 maxv) {
-    return min(maxv, max(minv, x));
-}
-SIMD_CALL float3 clamp(float3 x, float3 minv, float3 maxv) {
-    return min(maxv, max(minv, x));
-}
-SIMD_CALL float4 clamp(float4 x, float4 minv, float4 maxv) {
-    return min(maxv, max(minv, x));
-}
-
-float2 saturate(float2 x);
-float3 saturate(float3 x);
-float4 saturate(float4 x);
-
-// muladd - arm has float2 op
-SIMD_CALL float2 muladd(float2 x, float2 y, float2 t) {
-    return vec4to2(muladd(vec2to4(x), vec2to4(y), vec2to4(t)));
-}
-SIMD_CALL float3 muladd(float3 x, float3 y, float3 t) {
-    return vec4to3(muladd(vec3to4(x), vec3to4(y), vec3to4(t)));
-}
-
-// lerp - another easy one
-SIMD_CALL float2 lerp(float2 x, float2 y, float2 t) {
-    return x + t*(y - x);
-}
-SIMD_CALL float3 lerp(float3 x, float3 y, float3 t) {
-    return x + t*(y - x);
-}
-SIMD_CALL float4 lerp(float4 x, float4 y, float4 t) {
-    return x + t*(y - x);
-}
-
-
-// dot
-SIMD_CALL float dot(float2 x, float2 y) {
-    return reduce_add(x * y);
-}
-SIMD_CALL float dot(float3 x, float3 y) {
-    return reduce_add(x * y);
-}
-SIMD_CALL float dot(float4 x, float4 y) {
-    return reduce_add(x * y);
-}
-
-// length_squared
-SIMD_CALL float length_squared(float2 x) {
-    return reduce_add(x * x);
-}
-SIMD_CALL float length_squared(float3 x) {
-    return reduce_add(x * x);
-}
-SIMD_CALL float length_squared(float4 x) {
-    return reduce_add(x * x);
-}
-
-// length
-SIMD_CALL float length(float2 x) {
-    return sqrt_scalar(reduce_add(x * x));
-}
-SIMD_CALL float length(float3 x) {
-    return sqrt_scalar(reduce_add(x * x));
-}
-SIMD_CALL float length(float4 x) {
-    return sqrt_scalar(reduce_add(x * x));
-}
-
-// distance
-SIMD_CALL float distance(float2 x, float2 y) {
-    return length(x - y);
-}
-SIMD_CALL float distance(float3 x, float3 y) {
-    return length(x - y);
-}
-SIMD_CALL float distance(float4 x, float4 y) {
-    return length(x - y);
-}
-
-// normalize
-// optimized by staying in reg
-SIMD_CALL float4 normalize(float4 x) {
-    // x * invlength(x)
-    return x * rsqrt(reduce_addv(x * x)).x;
-}
-SIMD_CALL float2 normalize(float2 x) {
-    return vec4to2(normalize(zeroext(x)));
-}
-SIMD_CALL float3 normalize(float3 x) {
-    return vec4to3(normalize(zeroext(x)));
-}
-
-// abs
-SIMD_CALL float2 abs(float2 x) {
-    return bitselect(0.0, x, 0x7fffffff);
-}
-SIMD_CALL float3 abs(float3 x) {
-    return bitselect(0.0, x, 0x7fffffff);
-}
-SIMD_CALL float4 abs(float4 x) {
-    return bitselect(0.0, x, 0x7fffffff);
-}
-
-// cross
-SIMD_CALL float cross(float2 x, float2 y) {
-    return x.x * y.y - x.y * y.x;
-}
-SIMD_CALL float3 cross(float3 x, float3 y) {
-    return x.yzx * y.zxy - x.zxy * y.yzx;
-}
-
-// equal_abs
-SIMD_CALL bool equal_abs(float2 x, float2 y, float tol) {
-    return all((abs(x - y) <= tol));
-}
-SIMD_CALL bool equal_abs(float3 x, float3 y, float tol) {
-    return all((abs(x - y) <= tol));
-}
-SIMD_CALL bool equal_abs(float4 x, float4 y, float tol) {
-    return all((abs(x - y) <= tol));
-}
-
-// equal_rel
-SIMD_CALL bool equal_rel(float2 x, float2 y, float tol) {
-    return all((abs(x - y) <= tol * ::abs(x.x)));
-}
-SIMD_CALL bool equal_rel(float3 x, float3 y, float tol) {
-    return all((abs(x - y) <= tol * ::abs(x.x)));
-}
-SIMD_CALL bool equal_rel(float4 x, float4 y, float tol) {
-    return all((abs(x - y) <= tol * ::abs(x.x)));
-}
-
-// step
-SIMD_CALL float2 step(float2 edge, float2 x) {
-    return bitselect((float2)1, 0, x < edge);
-}
-SIMD_CALL float3 step(float3 edge, float3 x) {
-    return bitselect((float3)1, 0, x < edge);
-}
-SIMD_CALL float4 step(float4 edge, float4 x) {
-    return bitselect((float4)1, 0, x < edge);
-}
-
-// smoothstep
-SIMD_CALL float2 smoothstep(float2 edge0, float2 edge1, float2 x) {
-    float2 t = saturate((x-edge0)/(edge0-edge1));
-    return t*t*(3 - 2*t);
-}
-SIMD_CALL float3 smoothstep(float3 edge0, float3 edge1, float3 x) {
-    float3 t = saturate((x-edge0)/(edge0-edge1));
-    return t*t*(3 - 2*t);
-}
-SIMD_CALL float4 smoothstep(float4 edge0, float4 edge1, float4 x) {
-    float4 t = saturate((x-edge0)/(edge0-edge1));
-    return t*t*(3 - 2*t);
-}
-
-// fract
-SIMD_CALL float2 fract(float2 x) {
-    return min(x - floor(x), 0x1.fffffep-1f);
-}
-
-SIMD_CALL float3 fract(float3 x) {
-    return min(x - floor(x), 0x1.fffffep-1f);
-}
-
-SIMD_CALL float4 fract(float4 x) {
-    return min(x - floor(x), 0x1.fffffep-1f);
-}
-
-
-#if SIMD_FLOAT_EXT
-
-// These are cpu only math.  None of the gpus support these long types.
-// and MSL doesn't even support double.
-
-// how important are 8/16 ops for float and 8 for double?  Could reduce with only doing up to 4.
-// can see doing more ops on smaller types.  Slower when these have to route through simd4.
-SIMD_CALL float8 clamp(float8 x, float8 min, float8 max) {
-    return min(max(x, min), max);
-}
-SIMD_CALL float16 clamp(float16 x, float16 min, float16 max) {
-    return min(max(x, min), max);
-}
-
-SIMD_CALL float reduce_min(float8 x) {
-    return reduce_min(min(x.lo, x.hi));
-}
-
-SIMD_CALL float reduce_min(float16 x) {
-    return fmin(reduce_min(x.lo), reduce_min(x.hi));
-}
-
-SIMD_CALL float reduce_max(float8 x) {
-    return reduce_max(max(x.lo, x.hi));
-}
-SIMD_CALL float reduce_max(float16 x) {
-    return fmax(reduce_max(x.lo), reduce_max(x.hi));
-}
-
-// need to convert float4 to 8/16
-// TODO: float8 float8m(float4 x, float4 y)
-// TODO: float16 float16m(float8 x, float8 y)
-
-// how important are 8/16 ops for float and double?  Could reduce with only doing up to 4.
-SIMD_CALL float8 muladd(float8 x, float4 y, float4 t) {
-    return float8m(muladd(x.lo, y.lo, z.lo), muladd(x.hi, y.hi, z.hi));
-}
-SIMD_CALL float16 muladd(float4 x, float4 y, float4 t) {
-    return float16m(muladd(x.lo, y.lo, z.lo), muladd(x.hi, y.hi, z.hi));
-}
-
-SIMD_CALL float8 lerp(float8 x, float8 y, float8 t) {
-    return x + t*(y - x);
-}
-SIMD_CALL float16 lerp(float16 x, float16 y, float16 t) {
-    return x + t*(y - x);
-}
-
-SIMD_CALL float reduce_add(float8 x) {
-    return reduce_add(x.lo + x.hi);
-}
-
-SIMD_CALL float reduce_add(float16 x) {
-    return reduce_add(x.lo + x.hi);
-}
-
-SIMD_CALL float normalize(float8 x) {
-    return x / length(x);
-}
-SIMD_CALL float normalize(float16 x) {
-    return x / length(x);
-}
-
-#endif // SIMD_FLOAT_EXT
-
-// make "m" ctors for vecs.  This avoids wrapping the type in a struct.
-// vector types are C typedef, and so cannot have member functions.
-// Be careful with initializers = { val }, only sets first element of vector
-// and not all the values.  Use = val; or one of the calls below to be safe.
-
-SIMD_CALL float2 float2m(float x) {
-    return x;
-}
-SIMD_CALL float2 float2m(float x, float y) {
-    return {x,y};
-}
-
-SIMD_CALL float3 float3m(float x) {
-    return x;
-}
-SIMD_CALL float3 float3m(float x, float y, float z) {
-    return {x,y,z};
-}
-SIMD_CALL float3 float3m(float2 v, float z) {
-    float3 r; r.xy = v; r.z = z; return r;
-}
-
-SIMD_CALL float4 float4m(float x) {
-    return x;
-}
-SIMD_CALL float4 float4m(float2 xy, float2 zw) {
-    float4 r; r.xy = xy; r.zw = zw; return r;
-}
-SIMD_CALL float4 float4m(float x, float y, float z, float w = 1.0f) {
-    return {x,y,z,w};
-}
-SIMD_CALL float4 float4m(float3 v, float w = 1.0f) {
-    float4 r; r.xyz = v; r.w = w; return r;
-}
-
-// fast conversions where possible
-SIMD_CALL const float3& as_float3(const float4& m) {
-    return reinterpret_cast<const float3&>(m);
-}
-
-// power series
-macroVectorRepeatFnDecl(float, log)
-macroVectorRepeatFnDecl(float, exp)
-
-// trig
-macroVectorRepeatFnDecl(float, cos)
-macroVectorRepeatFnDecl(float, sin)
-macroVectorRepeatFnDecl(float, tan)
-
-// pow
-// can xy be <= 0 ?, no will return Nan in log/exp approx
-SIMD_CALL float2 pow(float2 x, float2 y) {
-    return exp(log(x) * y);
-}
-SIMD_CALL float3 pow(float3 x, float3 y) {
-    return exp(log(x) * y);
-}
-SIMD_CALL float4 pow(float4 x, float4 y) {
-    return exp(log(x) * y);
-}
-
-// TODO: add more math ops
-
 // conversions
+// keeping these here due to ordering issues of header includes
+
 #if SIMD_INT // && SIMD_FLOAT
 SIMD_CALL float2 float2m(int2 x) { return __builtin_convertvector(x, float2); }
 SIMD_CALL float3 float3m(int3 x) { return __builtin_convertvector(x, float3); }
@@ -1150,17 +429,20 @@ SIMD_CALL int4 float4m(float4 x) { return __builtin_convertvector(x, int4); }
 
 #if SIMD_HALF // && SIMD_FLOAT
 
-#if SIMD_HALF4_ONLY
-
-half4 half4m(float4 );
-SIMD_CALL half2 half2m(float2 x) { return vec4to2(half4m(vec2to4(x))); }
-SIMD_CALL half3 half3m(float3 x) { return vec4to3(half4m(vec3to4(x))); }
-
-float4 float4m(half4 );
-SIMD_CALL float2 float2m(half2 x) { return vec4to2(float4m(vec2to4(x))); }
-SIMD_CALL float3 float3m(half3 x) { return vec4to3(float4m(vec3to4(x))); }
+// keep this for now, until know if Android has the builtin
+//#if SIMD_HALF4_ONLY
+//
+//half4 half4m(float4 );
+//SIMD_CALL half2 half2m(float2 x) { return vec4to2(half4m(vec2to4(x))); }
+//SIMD_CALL half3 half3m(float3 x) { return vec4to3(half4m(vec3to4(x))); }
+//
+//float4 float4m(half4 );
+//SIMD_CALL float2 float2m(half2 x) { return vec4to2(float4m(vec2to4(x))); }
+//SIMD_CALL float3 float3m(half3 x) { return vec4to3(float4m(vec3to4(x))); }
+//
+//#else
 
-#else
+// this probably goes to correct call, could elim SIMD_HALF4_ONLY
 SIMD_CALL float2 float2m(half2 x) { return __builtin_convertvector(x, float2); }
 SIMD_CALL float3 float3m(half3 x) { return __builtin_convertvector(x, float3); }
 SIMD_CALL float4 float4m(half4 x) { return __builtin_convertvector(x, float4); }
@@ -1168,7 +450,7 @@ SIMD_CALL float4 float4m(half4 x) { return __builtin_convertvector(x, float4); }
 SIMD_CALL half2 half2m(float2 x) { return __builtin_convertvector(x, half2); }
 SIMD_CALL half3 half3m(float3 x) { return __builtin_convertvector(x, half3); }
 SIMD_CALL half4 half4m(float4 x) { return __builtin_convertvector(x, half4); }
-#endif
+//#endif
 
 #endif
 
@@ -1182,585 +464,8 @@ SIMD_CALL float3 float3m(double3 x) { return __builtin_convertvector(x, float3);
 SIMD_CALL float4 float4m(double4 x) { return __builtin_convertvector(x, float4); }
 #endif
 
-//----
-
-// TODO: better way to name these, can there be float2::zero()
-// also could maybe use that for fake vector ctors.
-
-const float2& float2_zero();
-const float2& float2_ones();
-
-const float2& float2_posx();
-const float2& float2_posy();
-const float2& float2_negx();
-const float2& float2_negy();
-
-//----
-
-const float3& float3_zero();
-const float3& float3_ones();
-
-const float3& float3_posx();
-const float3& float3_posy();
-const float3& float3_posz();
-const float3& float3_negx();
-const float3& float3_negy();
-const float3& float3_negz();
-
-//----
-
-const float4& float4_zero();
-const float4& float4_ones();
-
-const float4& float4_posx();
-const float4& float4_posy();
-const float4& float4_posz();
-const float4& float4_posw();
-const float4& float4_negx();
-const float4& float4_negy();
-const float4& float4_negz();
-const float4& float4_negw();
-
-const float4& float4_posxw();
-const float4& float4_posyw();
-const float4& float4_poszw();
-const float4& float4_negxw();
-const float4& float4_negyw();
-const float4& float4_negzw();
-
-//-----------------------------------
-// matrix
-
-// column matrix, so postmul vectors
-// (projToCamera * cameraToWorld * worldToModel) * modelVec
-
-struct float2x2 : float2x2s
-{
-    // can be split out to traits
-    static constexpr int col = 2;
-    static constexpr int row = 2;
-    using column_t = float2;
-    using scalar_t = float;
-    
-    static const float2x2& zero();
-    static const float2x2& identity();
-    
-    float2x2() { }  // default uninit
-    explicit float2x2(float2 diag);
-    float2x2(float2 c0, float2 c1)
-    : float2x2s((float2x2s){c0, c1}) { }
-    float2x2(const float2x2s& m)
-    : float2x2s(m) { }
-    
-    // simd lacks these ops
-    float2& operator[](int idx) { return columns[idx]; }
-    const float2& operator[](int idx) const { return columns[idx]; }
-};
-
-struct float3x3 : float3x3s
-{
-    static constexpr int col = 3;
-    static constexpr int row = 3;
-    using column_t = float3;
-    using scalar_t = float;
-    
-    // Done as wordy c funcs otherwize.  Funcs allow statics to init.
-    static const float3x3& zero();
-    static const float3x3& identity();
-    
-    float3x3() { }  // default uninit
-    explicit float3x3(float3 diag);
-    float3x3(float3 c0, float3 c1, float3 c2)
-    : float3x3s((float3x3s){c0, c1, c2}) { }
-    float3x3(const float3x3s& m)
-    : float3x3s(m) { }
-    
-    float3& operator[](int idx) { return columns[idx]; }
-    const float3& operator[](int idx) const { return columns[idx]; }
-};
-
-// This is mostly a transposed holder for a 4x4, so very few ops defined
-// Can also serve as a SOA for some types of cpu math.
-struct float3x4 : float3x4s
-{
-    static constexpr int col = 3;
-    static constexpr int row = 4;
-    using column_t = float4;
-    using scalar_t = float;
-    
-    static const float3x4& zero();
-    static const float3x4& identity();
-    
-    float3x4() { } // default uninit
-    explicit float3x4(float3 diag);
-    float3x4(float4 c0, float4 c1, float4 c2)
-    : float3x4s((float3x4s){c0, c1, c2}) { }
-    float3x4(const float3x4s& m)
-    : float3x4s(m) { }
-    
-    float4& operator[](int idx) { return columns[idx]; }
-    const float4& operator[](int idx) const { return columns[idx]; }
-};
-
-struct float4x4 : float4x4s
-{
-    static constexpr int col = 4;
-    static constexpr int row = 4;
-    using column_t = float4;
-    using scalar_t = float;
-    
-    static const float4x4& zero();
-    static const float4x4& identity();
-    
-    float4x4() { } // default uninit
-    explicit float4x4(float4 diag);
-    float4x4(float4 c0, float4 c1, float4 c2, float4 c3)
-    : float4x4s((float4x4s){c0, c1, c2, c3}) { }
-    float4x4(const float4x4s& m)
-    : float4x4s(m) { }
-    
-    float4& operator[](int idx) { return columns[idx]; }
-    const float4& operator[](int idx) const { return columns[idx]; }
-};
-
-// transposes to convert between matrix type
-float4x4 float4x4m(const float3x4& m);
-float3x4 float3x4m(const float4x4& m);
-
-// set diangonal and rest to 0
-float2x2 diagonal_matrix(float2 x);
-float3x3 diagonal_matrix(float3 x);
-float3x4 diagonal_matrix3x4(float3 x);
-float4x4 diagonal_matrix(float4 x);
-
-// using refs here, 3x3 and 4x4 are large to pass by value (3 simd regs)
-float2x2 transpose(const float2x2& x);
-float3x3 transpose(const float3x3& x);
-float4x4 transpose(const float4x4& x);
-
-// general inverses - faster ones for trs
-float2x2 inverse(const float2x2& x);
-float3x3 inverse(const float3x3& x);
-float4x4 inverse(const float4x4& x);
-
-float determinant(const float2x2& x);
-float determinant(const float3x3& x);
-float determinant(const float4x4& x);
-
-// diagonal sum
-float trace(const float2x2& x);
-float trace(const float3x3& x);
-float trace(const float4x4& x);
-
-// m * m
-float2x2 mul(const float2x2& x, const float2x2& y);
-float3x3 mul(const float3x3& x, const float3x3& y);
-float4x4 mul(const float4x4& x, const float4x4& y);
-
-// vrow * m - premul = dot + premul
-float2 mul(float2 y, const float2x2& x);
-float3 mul(float3 y, const float3x3& x);
-float4 mul(float4 y, const float4x4& x);
-
-// m * vcol - postmul = mul + mad (prefer this)
-float2 mul(const float2x2& x, float2 y);
-float3 mul(const float3x3& x, float3 y);
-float4 mul(const float4x4& x, float4 y);
-
-// sub
-float2x2 sub(const float2x2& x, const float2x2& y);
-float3x3 sub(const float3x3& x, const float3x3& y);
-float4x4 sub(const float4x4& x, const float4x4& y);
-
-// add
-float2x2 add(const float2x2& x, const float2x2& y);
-float3x3 add(const float3x3& x, const float3x3& y);
-float4x4 add(const float4x4& x, const float4x4& y);
-
-// equal
-bool equal(const float2x2& x, const float2x2& y);
-bool equal(const float3x3& x, const float3x3& y);
-bool equal(const float4x4& x, const float4x4& y);
-
-// equal_abs
-bool equal_abs(const float2x2& x, const float2x2& y, float tol);
-bool equal_abs(const float3x3& x, const float3x3& y, float tol);
-bool equal_abs(const float4x4& x, const float4x4& y, float tol);
-
-// equal_rel
-bool equal_rel(const float2x2& x, const float2x2& y, float tol);
-bool equal_rel(const float3x3& x, const float3x3& y, float tol);
-bool equal_rel(const float4x4& x, const float4x4& y, float tol);
-
-// TODO: these think they are all member functions
-
-// operators for C++
-macroMatrixOps(float2x2);
-macroMatrixOps(float3x3);
-// TODO: no mat ops on storage type float3x4
-// macroMatrixOps(float3x4s);
-macroMatrixOps(float4x4);
-
-// fast conversions where possible
-SIMD_CALL const float3x3& as_float3x3(const float4x4& m) {
-    return reinterpret_cast<const float3x3&>(m);
-}
-
-//-----------------------
-// quat
-
-// Only need a fp32 quat.  double/half are pretty worthless.
-struct quatf {
-    // TODO: should all ctor be SIMD_CALL ?
-    quatf() : v{0.0f,0.0f,0.0f,1.0f} {}
-    quatf(float x, float y, float z, float w) : v{x,y,z,w} {}
-    quatf(float3 vv, float angle);
-    explicit quatf(float4 vv): v(vv) {}
-    
-    static const quatf& zero();
-    static const quatf& identity();
-    
-    float4 v;
-};
-
-SIMD_CALL float3 operator*(quatf q, float3 v) {
-    float4 qv = q.v;
-    float3 t = qv.w * cross(qv.xyz, v);
-    return v + 2.0f * t + cross(q.v.xyz, t);
-}
-
-float4x4 float4x4m(quatf q);
-
-// how many quatf ops are needed?
-// TODO: need matrix into quatf
-// TDOO: need shortest arc correction (dot(q0.v, q1.v) < 0) negate
-// TODO: need negate (or conjuagate?)
-// TODO: what about math ops
-
-SIMD_CALL quatf lerp(quatf q0, quatf q1, float t) {
-    if (dot(q0.v, q1.v) < 0.0f)
-        q1.v.xyz = -q1.v.xyz;
-    
-    float4 v = lerp(q0.v, q1.v, t);
-    return quatf(v);
-}
-quatf slerp(quatf q0, quatf q1, float t);
-
-void quat_bezier_cp(quatf q0, quatf q1, quatf q2, quatf q3,
-                    quatf& a1, quatf& b2);
-quatf quat_bezer_lerp(quatf a, quatf b, quatf c, quatf d, float t);
-quatf quat_bezer_slerp(quatf a, quatf b, quatf c, quatf d, float t);
-
-quatf inverse(quatf q);
-
-SIMD_CALL quatf normalize(quatf q) {
-    return quatf(normalize(q.v));
-}
-
-//----------------
-// affine and convenience ctors
-
-// in-place affine transose
-void transpose_affine(float4x4& m);
-
-// fast inverses for translate, rotate, scale
-float4x4 inverse_tr(const float4x4& mtx);
-float4x4 inverse_tru(const float4x4& mtx);
-float4x4 inverse_trs(const float4x4& mtx);
-
-float4x4 float4x4m(char axis, float angleInRadians);
-
-SIMD_CALL float4x4 float4x4m(float3 axis, float angleInRadians) {
-    return float4x4m(quatf(axis, angleInRadians));
-}
-
-float3x3 float3x3m(quatf qq);
-
-float4x4 float4x4_tr(float3 t, quatf r);
-float4x4 float4x4_trs(float3 t, quatf r, float3 scale);
-float4x4 float4x4_tru(float3 t, quatf r, float scale);
-
 #endif // SIMD_FLOAT
 
-//---------------------------
-// double vec/matrix
-
-#if SIMD_DOUBLE && 0
-
-SIMD_CALL double2 double2m(double x) {
-    return x;
-}
-SIMD_CALL double2 double2m(double x, double y) {
-    return {x,y};
-}
-
-SIMD_CALL double3 double3m(double x) {
-    return x;
-}
-SIMD_CALL double3 double3m(double x, double y, double z) {
-    return {x,y,z};
-}
-SIMD_CALL double3 double3m(double2 v, float z) {
-    double3 r; r.xy = v; r.z = z; return r;
-}
-
-SIMD_CALL double4 double4m(double x) {
-    return x;
-}
-SIMD_CALL double4 double4m(double2 xy, double2 zw) {
-    double4 r; r.xy = xy; r.zw = zw; return r;
-}
-SIMD_CALL double4 double4m(double x, double y, double z, double w = 1.0) {
-    return {x,y,z,w};
-}
-SIMD_CALL double4 double4m(double3 v, double w = 1.0) {
-    double4 r; r.xyz = v; r.w = w; return r;
-}
-
-// power series
-macroVectorRepeatFnDecl(double, log)
-macroVectorRepeatFnDecl(double, exp)
-//macroVectorRepeatFnDecl(double, pow)
-
-// trig
-macroVectorRepeatFnDecl(double, cos)
-macroVectorRepeatFnDecl(double, sin)
-macroVectorRepeatFnDecl(double, tan)
-
-SIMD_CALL double2 pow(double2 x, double2 y) {
-    return exp(log(x) * y);
-}
-SIMD_CALL double3 pow(double3 x, double3 y) {
-    return exp(log(x) * y);
-}
-SIMD_CALL double4 pow(double4 x, double4 y) {
-    return exp(log(x) * y);
-}
-
-// TODO: would need matrix class derivations
-// and all of the matrix ops, which then need vector ops, and need double
-// constants.  So this starts to really add to codegen.  But double
-// is one of the last bastions of cpu, since many gpu don't support it.
-
-struct double2x2 : double2x2s
-{
-    // can be split out to traits
-    static constexpr int col = 2;
-    static constexpr int row = 2;
-    using column_t = double2;
-    using scalar_t = double;
-    
-    static const double2x2& zero();
-    static const double2x2& identity();
-    
-    double2x2() { }  // no default init
-    explicit double2x2(double2 diag);
-    double2x2(double2 c0, double2 c1)
-    : double2x2s((double2x2s){c0, c1}) { }
-    double2x2(const double2x2s& m)
-    : double2x2s(m) { }
-    
-    // simd lacks these ops
-    double2& operator[](int idx) { return columns[idx]; }
-    const double2& operator[](int idx) const { return columns[idx]; }
-};
-
-struct double3x3 : double3x3s
-{
-    static constexpr int col = 3;
-    static constexpr int row = 3;
-    using column_t = double3;
-    using scalar_t = double;
-    
-    // Done as wordy c funcs otherwize.  Funcs allow statics to init.
-    static const double3x3& zero();
-    static const double3x3& identity();
-    
-    double3x3() { }  // no default init
-    explicit double3x3(double3 diag);
-    double3x3(double3 c0, double3 c1, double3 c2)
-    : double3x3s((double3x3s){c0, c1, c2}) { }
-    double3x3(const double3x3s& m)
-    : double3x3s(m) { }
-    
-    double3& operator[](int idx) { return columns[idx]; }
-    const double3& operator[](int idx) const { return columns[idx]; }
-};
-
-// This is mostly a transposed holder for a 4x4, so very few ops defined
-// Can also serve as a SOA for some types of cpu math.
-struct double3x4 : double3x4s
-{
-    static constexpr int col = 3;
-    static constexpr int row = 4;
-    using column_t = double4;
-    using scalar_t = double;
-    
-    static const double3x4& zero();
-    static const double3x4& identity();
-    
-    double3x4() { } // no default init
-    explicit double3x4(double3 diag);
-    double3x4(double4 c0, double4 c1, double4 c2)
-    : double3x4s((double3x4s){c0, c1, c2}) { }
-    double3x4(const double3x4s& m)
-    : double3x4s(m) { }
-    
-    double4& operator[](int idx) { return columns[idx]; }
-    const double4& operator[](int idx) const { return columns[idx]; }
-};
-
-struct double4x4 : double4x4s
-{
-    static constexpr int col = 4;
-    static constexpr int row = 4;
-    using column_t = double4;
-    using scalar_t = double;
-    
-    static const double4x4& zero();
-    static const double4x4& identity();
-    
-    double4x4() { } // no default init
-    explicit double4x4(double4 diag);
-    double4x4(double4 c0, double4 c1, double4 c2, double4 c3)
-    : double4x4s((double4x4s){c0, c1, c2, c3}) { }
-    double4x4(const double4x4s& m)
-    : double4x4s(m) { }
-    
-    double4& operator[](int idx) { return columns[idx]; }
-    const double4& operator[](int idx) const { return columns[idx]; }
-};
-
-double2x2 diagonal_matrix(double2 x);
-double3x3 diagonal_matrix(double3 x);
-double3x4 diagonal_matrix3x4(double3 x);
-double4x4 diagonal_matrix(double4 x);
-
-// using refs here, 3x3 and 4x4 are large to pass by value (3 simd regs)
-double2x2 transpose(const double2x2& x);
-double3x3 transpose(const double3x3& x);
-double4x4 transpose(const double4x4& x);
-
-double2x2 inverse(const double2x2& x);
-double3x3 inverse(const double3x3& x);
-double4x4 inverse(const double4x4& x);
-
-double determinant(const double2x2& x);
-double determinant(const double3x3& x);
-double determinant(const double4x4& x);
-
-double trace(const double2x2& x);
-double trace(const double3x3& x);
-double trace(const double4x4& x);
-
-// premul = dot + premul
-double2 mul(double2 y, const double2x2& x);
-double3 mul(double3 y, const double3x3& x);
-double4 mul(double4 y, const double4x4& x);
-
-// posmul = mul + mad
-double2x2 mul(const double2x2& x, const double2x2& y);
-double3x3 mul(const double3x3& x, const double3x3& y);
-double4x4 mul(const double4x4& x, const double4x4& y);
-
-double2 mul(const double2x2& x, double2 y);
-double3 mul(const double3x3& x, double3 y);
-double4 mul(const double4x4& x, double4 y);
-
-double2x2 sub(const double2x2& x, const double2x2& y);
-double3x3 sub(const double3x3& x, const double3x3& y);
-double4x4 sub(const double4x4& x, const double4x4& y);
-
-double2x2 add(const double2x2& x, const double2x2& y);
-double3x3 add(const double3x3& x, const double3x3& y);
-double4x4 add(const double4x4& x, const double4x4& y);
-
-bool equal(const double2x2& x, const double2x2& y);
-bool equal(const double3x3& x, const double3x3& y);
-bool equal(const double4x4& x, const double4x4& y);
-
-// operators for C++
-macroMatrixOps(double2x2);
-macroMatrixOps(double3x3);
-// TODO: no mat ops yet on storage type double3x4
-// macroMatrixOps(double3x4);
-macroMatrixOps(double4x4);
-
-// fast conversions where possible
-SIMD_CALL const double3x3& as_double3x3(const double4x4& m) {
-    return reinterpret_cast<const double3x3&>(m);
-}
-
-#endif // SIMD_DOUBLE
-
-//----------------
-
-#if SIMD_INT
-SIMD_CALL int2 int2m(int x) {
-    return x;
-}
-SIMD_CALL int2 int2m(int x, int y) {
-    return {x,y};
-}
-
-SIMD_CALL int3 int3m(int x) {
-    return x;
-}
-SIMD_CALL int3 int3m(int x, int y, int z) {
-    return {x,y,z};
-}
-SIMD_CALL int3 int3m(int2 v, float z) {
-    int3 r; r.xy = v; r.z = z; return r;
-}
-
-
-SIMD_CALL int4 int4m(int x) {
-    return x;
-}
-SIMD_CALL int4 int4m(int2 xy, int2 zw) {
-    int4 r; r.xy = xy; r.zw = zw; return r;
-}
-SIMD_CALL int4 int4m(int x, int y, int z, int w) {
-    return {x,y,z,w};
-}
-SIMD_CALL int4 int4m(int3 v, float w) {
-    int4 r; r.xyz = v; r.w = w; return r;
-}
-#endif
-
-#if SIMD_HALF
-SIMD_CALL half2 half2m(half x) {
-    return x;
-}
-SIMD_CALL half2 half2m(half x, half y) {
-    return {x,y};
-}
-
-SIMD_CALL half3 half3m(half x) {
-    return x;
-}
-SIMD_CALL half3 half3m(half x, half y, half z) {
-    return {x,y,z};
-}
-SIMD_CALL half3 half3m(half2 v, float z) {
-    half3 r; r.xy = v; r.z = z; return r;
-}
-
-SIMD_CALL half4 half4m(half x) {
-    return x;
-}
-SIMD_CALL half4 half4m(half2 xy, half2 zw) {
-    half4 r; r.xy = xy; r.zw = zw; return r;
-}
-SIMD_CALL half4 half4m(half x, half y, half z, half w = (half)1.0) {
-    return {x,y,z,w};
-}
-SIMD_CALL half4 half4m(half3 v, float w = (half)1.0) {
-    half4 r; r.xyz = v; r.w = w; return r;
-}
-#endif
-
-
 //---------------------------
 
 using namespace STL_NAMESPACE;

From eb204b2288a41b9ccf79384102d72a98c80d53eb Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 29 Sep 2024 16:08:59 -0700
Subject: [PATCH 744/901] kram - simd - more cleanup to split

---
 libkram/vectormath/float234.h     |   2 -
 libkram/vectormath/half234.h      |   2 -
 libkram/vectormath/int234.h       |   4 -
 libkram/vectormath/vectormath++.h | 119 +++++++++++++++++-------------
 4 files changed, 67 insertions(+), 60 deletions(-)

diff --git a/libkram/vectormath/float234.h b/libkram/vectormath/float234.h
index dbf80685..d862d947 100644
--- a/libkram/vectormath/float234.h
+++ b/libkram/vectormath/float234.h
@@ -29,9 +29,7 @@ macroVector4TypesStorageRenames(float, simd_float)
 
 #ifdef __cplusplus
 }
-#endif
 
-#ifdef __cplusplus
 namespace SIMD_NAMESPACE {
 
 macroVector4TypesStorageRenames(float, float)
diff --git a/libkram/vectormath/half234.h b/libkram/vectormath/half234.h
index 9ee6f560..85f638b5 100644
--- a/libkram/vectormath/half234.h
+++ b/libkram/vectormath/half234.h
@@ -49,9 +49,7 @@ macroVector8TypesStorageRenames(half, simd_half)
 
 #ifdef __cplusplus
 }
-#endif // __cplusplus
 
-#ifdef __cplusplus
 namespace SIMD_NAMESPACE {
 
 macroVector2TypesStorageRenames(half, half)
diff --git a/libkram/vectormath/int234.h b/libkram/vectormath/int234.h
index af1906cd..25fc33d2 100644
--- a/libkram/vectormath/int234.h
+++ b/libkram/vectormath/int234.h
@@ -22,11 +22,7 @@ macroVector4TypesStorageRenames(int, simd_int)
 
 #ifdef __cplusplus
 }
-#endif // __cplusplus
-
-//--------------------
 
-#ifdef __cplusplus
 namespace SIMD_NAMESPACE {
 
 macroVector4TypesStorageRenames(int, int)
diff --git a/libkram/vectormath/vectormath++.h b/libkram/vectormath/vectormath++.h
index 32c71a67..e2b7aabd 100644
--- a/libkram/vectormath/vectormath++.h
+++ b/libkram/vectormath/vectormath++.h
@@ -309,17 +309,44 @@ SIMD_CALL type operator*(const type& x, const type& y) { return mul(x,y); } \
 SIMD_CALL type::column_t operator*(const type::column_t& v, const type& y) { return mul(v,y); } \
 SIMD_CALL type::column_t operator*(const type& x, const type::column_t& v) { return mul(x,v); } \
 
+
+
+
+
+//-----------------------------------
+
+#include <math.h> // for sqrt
+
+#if SIMD_NEON
+// neon types and intrinsics, 16B
+#include <arm_neon.h>
+// This converts sse to neon intrinsics
+#include "sse2neon-arm64.h"
+#else
+// SSE intrinsics up to AVX-512, but depends on -march avx2 -mf16c -fma
+#include <immintrin.h>
+#endif // SIMD_NEON
+
+// using macros here cuts the ifdefs a lot
+#define vec2to4(x) (x).xyyy
+#define vec3to4(x) (x).xyzz
+#define vec4to2(x) (x).xy
+#define vec4to3(x) (x).xyz
+
+// moved vec/matrix ops into secondary headers
+#include "int234.h"
+#include "half234.h"
+#include "float234.h"
+#include "double234.h"
+
 //---------------------------
 
+#if SIMD_CHAR
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-//----------
-// define count and alignment of core types
-
-#if SIMD_CHAR
-
 // define c vector types
 macroVector1TypesStorage(char, char)
 macroVector1TypesPacked(char, char)
@@ -328,11 +355,25 @@ macroVector1TypesPacked(char, char)
 macroVector1TypesStorageRenames(char, simd_char)
 #endif // SIMD_RENAME_TO_SIMD_NAMESPACE
 
+#ifdef __cplusplus
+}
+
+namespace SIMD_NAMESPACE {
+#if SIMD_CHAR
+macroVector4TypesStorageRenames(char, char)
+#endif
+}
+#endif
+
 #endif // SIMD_CHAR
 
 //------------
 #if SIMD_SHORT
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 // define c vector types
 macroVector2TypesStorage(short, short)
 macroVector2TypesPacked(short, short)
@@ -341,11 +382,25 @@ macroVector2TypesPacked(short, short)
 macroVector2TypesStorageRenames(short, simd_short)
 #endif // SIMD_RENAME_TO_SIMD_NAMESPACE
 
+#ifdef __cplusplus
+}
+
+namespace SIMD_NAMESPACE {
+#if SIMD_CHAR
+macroVector2TypesStorageRenames(short, short)
+#endif
+}
+#endif
+
 #endif // SIMD_SHORT
 
 //------------
 #if SIMD_LONG
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 // define c vector types
 macroVector8TypesStorage(long, long)
 macroVector8TypesPacked(long, long)
@@ -354,68 +409,28 @@ macroVector8TypesPacked(long, long)
 macroVector8TypesStorageRenames(long, simd_long)
 #endif // SIMD_RENAME_TO_SIMD_NAMESPACE
 
-#endif // SIMD_LONG
-
 #ifdef __cplusplus
 }
-#endif
-
-//-----------------------------------
-// imlementation - only code simd arch specific
-
-// Could optionally bury impl and headers, but then couldn't inline.
-
-#include <math.h> // for sqrt
-
-#if SIMD_NEON
-// neon types and intrinsics, 16B
-#include <arm_neon.h>
-// This converts sse to neon intrinsics
-#include "sse2neon-arm64.h"
-#else
-// SSE intrinsics up to AVX-512, but depends on -march avx2 -mf16c -fma
-#include <immintrin.h>
-#endif // SIMD_NEON
 
-// using macros here cuts the ifdefs a lot
-#define vec2to4(x) (x).xyyy
-#define vec3to4(x) (x).xyzz
-#define vec4to2(x) (x).xy
-#define vec4to3(x) (x).xyz
+namespace SIMD_NAMESPACE {
+macroVector8TypesStorageRenames(long, long)
+}
+#endif
 
-// moved vec/matrix ops into secondary headers
-#include "int234.h"
-#include "half234.h"
-#include "float234.h"
-#include "double234.h"
+#endif // SIMD_LONG
 
 //-------------------
-// This is for C++ only
 #ifdef __cplusplus
 
 namespace SIMD_NAMESPACE {
 
-// c++ typedef of the c vectors.  But these are namespaced.
-// So they shouldn't conflict, or conflicts can be resolve easier than the c types.
-
-#if SIMD_CHAR
-macroVector4TypesStorageRenames(char, char)
-#endif
-
-#if SIMD_SHORT
-macroVector4TypesStorageRenames(short, short)
-#endif
-
-#if SIMD_LONG
-macroVector8TypesStorageRenames(long, long)
-#endif
-
-#if SIMD_FLOAT
 
 //-----------------------------------
 // conversions
 // keeping these here due to ordering issues of header includes
 
+#if SIMD_FLOAT
+
 #if SIMD_INT // && SIMD_FLOAT
 SIMD_CALL float2 float2m(int2 x) { return __builtin_convertvector(x, float2); }
 SIMD_CALL float3 float3m(int3 x) { return __builtin_convertvector(x, float3); }

From 34a1d2746d6d6aa7281b0917c4e76f33bba8a5d1 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 29 Sep 2024 16:42:18 -0700
Subject: [PATCH 745/901] kram - simd - more cleanup, expose double

---
 libkram/vectormath/double234.h      |  24 +++++-
 libkram/vectormath/vectormath++.cpp | 127 ++++++++++++++++++++++++----
 libkram/vectormath/vectormath++.h   |  57 +++++++------
 3 files changed, 162 insertions(+), 46 deletions(-)

diff --git a/libkram/vectormath/double234.h b/libkram/vectormath/double234.h
index b764741a..01a17f52 100644
--- a/libkram/vectormath/double234.h
+++ b/libkram/vectormath/double234.h
@@ -35,7 +35,13 @@ namespace SIMD_NAMESPACE {
 
 macroVector8TypesStorageRenames(double, double)
 
-#if 0
+// zeroext - internal helper
+SIMD_CALL double4 zeroext(double2 x) {
+    return (double4){x.x,x.y,0,0};
+}
+SIMD_CALL double4 zeroext(double3 x) {
+    return (double4){x.x,x.y,x.z,0};
+}
 
 SIMD_CALL double2 double2m(double x) {
     return x;
@@ -87,11 +93,16 @@ SIMD_CALL double4 pow(double4 x, double4 y) {
     return exp(log(x) * y);
 }
 
+// Need to split out float234.cpp, then can alter
+// that for double calls.
+
+
 // TODO: would need matrix class derivations
 // and all of the matrix ops, which then need vector ops, and need double
 // constants.  So this starts to really add to codegen.  But double
 // is one of the last bastions of cpu, since many gpu don't support it.
 
+
 struct double2x2 : double2x2s
 {
     // can be split out to traits
@@ -186,6 +197,10 @@ double3x3 diagonal_matrix(double3 x);
 double3x4 diagonal_matrix3x4(double3 x);
 double4x4 diagonal_matrix(double4 x);
 
+// TODO: port these over from float versions
+// ops need to call these
+#if 0
+
 // using refs here, 3x3 and 4x4 are large to pass by value (3 simd regs)
 double2x2 transpose(const double2x2& x);
 double3x3 transpose(const double3x3& x);
@@ -229,6 +244,8 @@ bool equal(const double2x2& x, const double2x2& y);
 bool equal(const double3x3& x, const double3x3& y);
 bool equal(const double4x4& x, const double4x4& y);
 
+// TODO: equal_abs, equal_rel
+
 // operators for C++
 macroMatrixOps(double2x2);
 macroMatrixOps(double3x3);
@@ -241,11 +258,10 @@ SIMD_CALL const double3x3& as_double3x3(const double4x4& m) {
     return reinterpret_cast<const double3x3&>(m);
 }
 
-#endif // SIMD_DOUBLE
-
+#endif // 0
 
 } // SIMD_NAMESPACE
 
 #endif
 
-#endif // USE_SIMDLIB && SIMD_FLOAT
+#endif // USE_SIMDLIB && SIMD_DOUBLE
diff --git a/libkram/vectormath/vectormath++.cpp b/libkram/vectormath/vectormath++.cpp
index 55271874..c2d3f234 100644
--- a/libkram/vectormath/vectormath++.cpp
+++ b/libkram/vectormath/vectormath++.cpp
@@ -264,6 +264,8 @@ static const float4 kfloat4_negw = kfloat4_negx.yyyx;
 static const float4 kfloat4_ones = kfloat4_posx.xxxx;
 static const float4 kfloat4_zero = {};
 
+//---------------------------
+
 static const float2x2 kfloat2x2_zero = {}; // what is this value 0, or default ctor
 static const float3x3 kfloat3x3_zero = {};
 static const float3x4 kfloat3x4_zero = {};
@@ -291,7 +293,6 @@ float3x4 float3x4m(const float4x4& m) {
     return (const float3x4&)m44;
 }
 
-
 //---------------------------
 
 // These should not be used often.  So can stay buried
@@ -710,11 +711,7 @@ bool equal_rel(const float4x4& x, const float4x4& y, float tol) {
                (abs(x[3] - y[3]) <= tol * abs(x[3])));
 }
 
-
-//---------------------------
-// Start of cpp calls
-
-//---------------
+//----
 
 const float2& float2_zero(){ return kfloat2_zero; }
 const float2& float2_ones(){ return kfloat2_ones; }
@@ -775,9 +772,8 @@ const float3x4& float3x4::identity() { return kfloat3x4_identity; }
 const float4x4& float4x4::zero() { return kfloat4x4_zero; }
 const float4x4& float4x4::identity() { return kfloat4x4_identity; }
 
-#endif // SIMD_FLOAT
 
-#if SIMD_FLOAT
+//-------------------
 
 string vecf::str(float2 v) const {
     return kram::format("(%f %f)", v.x, v.y);
@@ -805,6 +801,11 @@ string vecf::str(const float4x4& m) const {
 
 #endif // SIMD_FLOAT
 
+#if SIMD_DOUBLE
+
+
+#endif // SIMD_DOUBLE
+
 #define FMT_SEP() s += "-----------\n"
 
 string vecf::simd_configs() const {
@@ -920,12 +921,12 @@ string vecf::simd_alignments() const {
     FMT_CONFIG(double2);
     FMT_CONFIG(double3);
     FMT_CONFIG(double4);
-    //FMT_CONFIG(double8);
+    // FMT_CONFIG(double8);
     
-//    FMT_CONFIG(double2x2);
-//    FMT_CONFIG(double3x3);
-//    FMT_CONFIG(double3x4);
-//    FMT_CONFIG(double4x4);
+    FMT_CONFIG(double2x2);
+    FMT_CONFIG(double3x3);
+    FMT_CONFIG(double3x4);
+    FMT_CONFIG(double4x4);
 #endif
     
 #if SIMD_INT
@@ -1193,10 +1194,7 @@ quatf quat_bezer_lerp(quatf q0, quatf b, quatf c, quatf q1, float t)
         t);
 }
 
-#endif // SIMD_FLOAT
-
-
-#if SIMD_FLOAT
+// ----------------------
 
 void transpose_affine(float4x4& m)
 {
@@ -1315,7 +1313,6 @@ float4x4 inverse_trs(const float4x4& mtx)
     return inverse;
 }
 
-// TODO: make ctor to avoid returning large matrix
 float4x4 float4x4m(char axis, float angleInRadians)
 {
     float    sinTheta, cosTheta;
@@ -1357,6 +1354,100 @@ float4x4 float4x4m(char axis, float angleInRadians)
 
 } // namespace SIMD_NAMESPACE
 
+
+
+namespace SIMD_NAMESPACE {
+
+#if SIMD_DOUBLE
+
+//---------------------------
+
+static const double2x2 kdouble2x2_zero = {}; // what is this value 0, or default ctor
+static const double3x3 kdouble3x3_zero = {};
+static const double3x4 kdouble3x4_zero = {};
+static const double4x4 kdouble4x4_zero = {};
+
+static const double2x2 kdouble2x2_identity = diagonal_matrix((double2)1);
+static const double3x3 kdouble3x3_identity = diagonal_matrix((double3)1);
+static const double3x4 kdouble3x4_identity = diagonal_matrix3x4((double3)1);
+static const double4x4 kdouble4x4_identity = diagonal_matrix((double4)1);
+
+//---------------------------
+
+const double2x2& double2x2::zero() { return kdouble2x2_zero; }
+const double2x2& double2x2::identity() { return kdouble2x2_identity; }
+
+const double3x3& double3x3::zero() { return kdouble3x3_zero; }
+const double3x3& double3x3::identity() { return kdouble3x3_identity; }
+
+const double3x4& double3x4::zero() { return kdouble3x4_zero; }
+const double3x4& double3x4::identity() { return kdouble3x4_identity; }
+
+const double4x4& double4x4::zero() { return kdouble4x4_zero; }
+const double4x4& double4x4::identity() { return kdouble4x4_identity; }
+
+//---------------------------
+
+// These should not be used often.  So can stay buried
+double2x2::double2x2(double2 diag)
+: double2x2s((const double2x2s&)diagonal_matrix(diag)) { }
+double3x3::double3x3(double3 diag)
+: double3x3s((const double3x3s&)diagonal_matrix(diag)) { }
+double3x4::double3x4(double3 diag)
+: double3x4s((const double3x4s&)diagonal_matrix3x4(diag)) { }
+double4x4::double4x4(double4 diag)
+: double4x4s((const double4x4s&)diagonal_matrix(diag)) { }
+
+//---------------------------
+
+double2x2 diagonal_matrix(double2 x) {
+    double4 xx = zeroext(x);
+    return double2x2(xx.xw, xx.wy);
+}
+double3x3 diagonal_matrix(double3 x) {
+    double4 xx = zeroext(x);
+    return double3x3(xx.xww, xx.wyw, xx.wwz);
+}
+double3x4 diagonal_matrix3x4(double3 x) {
+    double4 xx = zeroext(x);
+    return double3x4(xx.xwww, xx.wyww, xx.wwzw);
+}
+double4x4 diagonal_matrix(double4 x) {
+    double4 xx = x; xx.w = 0.0f;
+    double4 ww = xx; ww.z = x.w;
+    return double4x4(xx.xwww, xx.wyww, xx.wwzw, ww.wwwz);
+}
+
+//---------------------------
+
+string vecf::str(double2 v) const {
+    return kram::format("(%f %f)", v.x, v.y);
+}
+string vecf::str(double3 v) const {
+    return kram::format("(%f %f %f)", v.x, v.y, v.z);
+}
+string vecf::str(double4 v) const {
+    return kram::format("(%f %f %f %f)", v.x, v.y, v.z, v.w);
+}
+ 
+string vecf::str(const double2x2& m) const {
+    return kram::format("%s\n%s\n",
+        str(m[0]).c_str(), str(m[1]).c_str());
+}
+string vecf::str(const double3x3& m) const {
+    return kram::format("%s\n%s\n%s\n",
+        str(m[0]).c_str(), str(m[1]).c_str(), str(m[2]).c_str());
+}
+string vecf::str(const double4x4& m) const {
+  return kram::format("%s\n%s\n%s\n%s\n",
+      str(m[0]).c_str(), str(m[1]).c_str(),
+      str(m[2]).c_str(), str(m[3]).c_str());
+}
+
+#endif // SIMD_DOUBLE
+
+} // namespace SIMD_NAMESPACE
+
 #endif // USE_SIMDLIB
 
 
diff --git a/libkram/vectormath/vectormath++.h b/libkram/vectormath/vectormath++.h
index e2b7aabd..6a85a3b2 100644
--- a/libkram/vectormath/vectormath++.h
+++ b/libkram/vectormath/vectormath++.h
@@ -219,7 +219,6 @@ typedef __attribute__((__ext_vector_type__(3)))  type name##3s; \
 typedef __attribute__((__ext_vector_type__(4)))  type name##4s; \
 typedef __attribute__((__ext_vector_type__(8)))  type name##8s; \
 typedef __attribute__((__ext_vector_type__(16),__aligned__(16))) type name##16s; \
-// also a 32
 
 // packed
 #define macroVector2TypesPacked(type, name) \
@@ -248,7 +247,8 @@ typedef __attribute__((__ext_vector_type__(2)))  type name##2s; \
 typedef __attribute__((__ext_vector_type__(3)))  type name##3s; \
 typedef __attribute__((__ext_vector_type__(4)))  type name##4s; \
 typedef __attribute__((__ext_vector_type__(8),__aligned__(16)))  type name##8s; \
-typedef __attribute__((__ext_vector_type__(16),__aligned__(16))) type name##16s; \
+
+// typedef __attribute__((__ext_vector_type__(16),__aligned__(16))) type name##16s;
 
 // packed
 #define macroVector4TypesPacked(type, name) \
@@ -257,7 +257,8 @@ typedef __attribute__((__ext_vector_type__(2),__aligned__(4)))  type name##2p; \
 typedef __attribute__((__ext_vector_type__(3),__aligned__(4)))  type name##3p; \
 typedef __attribute__((__ext_vector_type__(4),__aligned__(4)))  type name##4p; \
 typedef __attribute__((__ext_vector_type__(8),__aligned__(4)))  type name##8p; \
-typedef __attribute__((__ext_vector_type__(16),__aligned__(4))) type name##16p; \
+
+// typedef __attribute__((__ext_vector_type__(16),__aligned__(4))) type name##16p; \
 
 // cpp rename for float, u/int
 #define macroVector4TypesStorageRenames(cname, cppname) \
@@ -266,7 +267,8 @@ typedef ::cname##2s cppname##2; \
 typedef ::cname##3s cppname##3; \
 typedef ::cname##4s cppname##4; \
 typedef ::cname##8s cppname##8; \
-typedef ::cname##16s cppname##16; \
+
+// typedef ::cname##16s cppname##16; \
 
 //------------
 
@@ -276,7 +278,8 @@ typedef type name##1s; \
 typedef __attribute__((__ext_vector_type__(2))) type name##2s; \
 typedef __attribute__((__ext_vector_type__(3),__aligned__(16))) type name##3s; \
 typedef __attribute__((__ext_vector_type__(4),__aligned__(16))) type name##4s; \
-typedef __attribute__((__ext_vector_type__(8),__aligned__(16))) type name##8s; \
+
+// typedef __attribute__((__ext_vector_type__(8),__aligned__(16))) type name##8s;
 
 // packed
 #define macroVector8TypesPacked(type, name) \
@@ -284,7 +287,8 @@ typedef type name##1p; \
 typedef __attribute__((__ext_vector_type__(2),__aligned__(8))) type name##2p; \
 typedef __attribute__((__ext_vector_type__(3),__aligned__(8))) type name##3p; \
 typedef __attribute__((__ext_vector_type__(4),__aligned__(8))) type name##4p; \
-typedef __attribute__((__ext_vector_type__(8),__aligned__(8))) type name##8p; \
+
+//typedef __attribute__((__ext_vector_type__(8),__aligned__(8))) type name##8p;
 
 // cpp rename for double, u/long
 #define macroVector8TypesStorageRenames(cname, cppname) \
@@ -292,7 +296,8 @@ typedef ::cname##1s cppname##1; \
 typedef ::cname##2s cppname##2; \
 typedef ::cname##3s cppname##3; \
 typedef ::cname##4s cppname##4; \
-typedef ::cname##8s cppname##8; \
+
+// typedef ::cname##8s cppname##8;
 
 //-----------------------------------
 
@@ -309,13 +314,9 @@ SIMD_CALL type operator*(const type& x, const type& y) { return mul(x,y); } \
 SIMD_CALL type::column_t operator*(const type::column_t& v, const type& y) { return mul(v,y); } \
 SIMD_CALL type::column_t operator*(const type& x, const type::column_t& v) { return mul(x,v); } \
 
-
-
-
-
 //-----------------------------------
 
-#include <math.h> // for sqrt
+#include <math.h> // for sqrt, sqrtf
 
 #if SIMD_NEON
 // neon types and intrinsics, 16B
@@ -359,12 +360,9 @@ macroVector1TypesStorageRenames(char, simd_char)
 }
 
 namespace SIMD_NAMESPACE {
-#if SIMD_CHAR
 macroVector4TypesStorageRenames(char, char)
-#endif
 }
-#endif
-
+#endif // __cplusplus
 #endif // SIMD_CHAR
 
 //------------
@@ -386,12 +384,9 @@ macroVector2TypesStorageRenames(short, simd_short)
 }
 
 namespace SIMD_NAMESPACE {
-#if SIMD_CHAR
 macroVector2TypesStorageRenames(short, short)
-#endif
 }
-#endif
-
+#endif // __cplusplus
 #endif // SIMD_SHORT
 
 //------------
@@ -415,8 +410,7 @@ macroVector8TypesStorageRenames(long, simd_long)
 namespace SIMD_NAMESPACE {
 macroVector8TypesStorageRenames(long, long)
 }
-#endif
-
+#endif // __cplusplus
 #endif // SIMD_LONG
 
 //-------------------
@@ -424,8 +418,6 @@ macroVector8TypesStorageRenames(long, long)
 
 namespace SIMD_NAMESPACE {
 
-
-//-----------------------------------
 // conversions
 // keeping these here due to ordering issues of header includes
 
@@ -482,6 +474,7 @@ SIMD_CALL float4 float4m(double4 x) { return __builtin_convertvector(x, float4);
 #endif // SIMD_FLOAT
 
 //---------------------------
+// formatting
 
 using namespace STL_NAMESPACE;
 
@@ -505,10 +498,26 @@ struct vecf {
     string str(const float3x3& m) const;
     string str(const float4x4& m) const;
     
+    // quat
     string quat(quatf q) { return str(q.v); }
     
+    // no packed float support
 #endif // SIMD_FLOAT
     
+#if SIMD_DOUBLE
+    // vector
+    string str(double2 v) const;
+    string str(double3 v) const;
+    string str(double4 v) const;
+    
+    // matrix
+    string str(const double2x2& m) const;
+    string str(const double3x3& m) const;
+    string str(const double4x4& m) const;
+    
+    // no packed double support
+#endif // SIMD_DOUBLE
+
     // Just stuffing this here for now
     string simd_configs() const;
     string simd_alignments() const;

From 28f2790d638d6c9caaf9f6d4cfba9ea64fb38471 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 29 Sep 2024 17:38:47 -0700
Subject: [PATCH 746/901] kram - simd - more double support, but turned off
 until complete

---
 libkram/vectormath/double234.h      | 444 +++++++++++++++++++++++++++-
 libkram/vectormath/float234.h       |  80 +++--
 libkram/vectormath/vectormath++.cpp | 381 +++++++++++++++++++++++-
 libkram/vectormath/vectormath++.h   |   8 +-
 4 files changed, 844 insertions(+), 69 deletions(-)

diff --git a/libkram/vectormath/double234.h b/libkram/vectormath/double234.h
index 01a17f52..fa684ec7 100644
--- a/libkram/vectormath/double234.h
+++ b/libkram/vectormath/double234.h
@@ -35,6 +35,151 @@ namespace SIMD_NAMESPACE {
 
 macroVector8TypesStorageRenames(double, double)
 
+//-----------------------------------
+// start of implementation
+
+#if SIMD_NEON
+
+// TODO: expose double2 ops on Neon.
+
+SIMD_CALL double reduce_min(double4 x) {
+    return vminvq_f32(x); // TODO: fp64
+}
+
+SIMD_CALL double reduce_max(double4 x) {
+    return vmaxvq_f32(x); // TODO: fp64
+}
+
+SIMD_CALL double4 min(double4 x, double4 y) {
+    // precise returns x on Nan
+    return vminnmq_f32(x, y); // TODO: fp64
+}
+
+SIMD_CALL double4 max(double4 x, double4 y) {
+    // precise returns x on Nan
+    return vmaxnmq_f32(x, y); // TODO: fp64
+}
+
+SIMD_CALL double4 muladd(double4 x, double4 y, double4 t) {
+    // requires __ARM_VFPV4__
+    // t passed first unlike sse
+    return vfmaq_f32(t, x,y); // TODO: fp64
+}
+
+SIMD_CALL double4 sqrt(double4 x) {
+    return vsqrtq_f32(x); // TODO: fp64
+}
+
+// use sse2neon to port this for now
+SIMD_CALL double4 reduce_addv(double4 x) {
+    // 4:1 reduction
+    x = _mm_hadd_pd(x, x); // xy = x+y,z+w
+    x = _mm_hadd_pd(x, x); // x  = x+y
+    return x.x; // repeat x to all values
+}
+
+SIMD_CALL double reduce_add(double4 x) {
+    return reduce_addv(x).x;
+}
+
+#endif // SIMD_NEON
+
+#if SIMD_SSE
+
+// x64 doesn't seem to have a simd op for min/max reduce
+SIMD_CALL double reduce_min(double4 x) {
+    return fmin(fmin(x.x,x.y), fmin(x.z,x.w));
+}
+
+SIMD_CALL double reduce_max(double4 x) {
+    return fmax(fmax(x.x,x.y), fmax(x.z,x.w));
+}
+
+// needs SIMD_INT
+// needed for precise min/max calls below
+#if SIMD_INT
+SIMD_CALL double4 bitselect_forminmax(double4 x, double4 y, int4 mask) { // TODO: fp64
+    return (double4)(((int4)x & ~mask) | ((int4)y & mask));
+}
+#endif
+
+SIMD_CALL double4 min(double4 x, double4 y) {
+    // precise returns x on Nan
+    return bitselect_forminmax(_mm_min_pd(x, y), x, y != y);
+}
+
+SIMD_CALL double4 max(double4 x, double4 y) {
+    // precise returns x on Nan
+    return bitselect_forminmax(_mm_max_pd(x, y), x, y != y);
+}
+
+SIMD_CALL double4 muladd(double4 x, double4 y, double4 t) {
+    // can't get Xcode to set -mfma with AVX2 set
+#ifdef __FMA__
+    return _mm_fmadd_pd(x,y,t);
+#else
+    // fallback with not same characteristics
+    return x * y + t;
+#endif
+}
+
+SIMD_CALL double4 sqrt(double4 x) {
+    return _mm_sqrt_pd(x);
+}
+
+SIMD_CALL double4 reduce_addv(double4 x) {
+    // 4:1 reduction
+    x = _mm_hadd_pd(x, x); // xy = x+y,z+w
+    x = _mm_hadd_pd(x, x); // x  = x+y
+    return x.x; // repeat x to all values
+}
+
+SIMD_CALL double reduce_add(double4 x) {
+    return reduce_addv(x).x;
+}
+
+#endif // SIMD_INT && SIMD_SSE
+
+// SSE4.1
+SIMD_CALL double4 round(double4 vv) {
+    return _mm_round_pd(vv, 0x8);  // round to nearest | exc
+}
+SIMD_CALL double4 ceil(double4 vv) {
+    return _mm_ceil_pd(vv);
+}
+SIMD_CALL double4 floor(double4 vv) {
+    return _mm_floor_pd(vv);
+}
+
+// end of implementation
+//-----------------------------------
+
+#if SIMD_INT
+
+// bitselect
+SIMD_CALL double2 bitselect(double2 x, double2 y, int2 mask) { // TODO: fp64
+    return (double2)bitselect((int2)x, (int2)y, mask);
+}
+SIMD_CALL double3 bitselect(double3 x, double3 y, int3 mask) {
+    return (double3)bitselect((int3)x, (int3)y, mask);
+}
+SIMD_CALL double4 bitselect(double4 x, double4 y, int4 mask) {
+    return (double4)bitselect((int4)x, (int4)y, mask);
+}
+
+// select
+SIMD_CALL double2 select(double2 x, double2 y, int2 mask) { // TODO: fp64
+    return bitselect(x, y, mask >> 31);
+}
+SIMD_CALL double3 select(double3 x, double3 y, int3 mask) {
+    return bitselect(x, y, mask >> 31);
+}
+SIMD_CALL double4 select(double4 x, double4 y, int4 mask) {
+    return bitselect(x, y, mask >> 31);
+}
+
+#endif // SIMD_INT
+
 // zeroext - internal helper
 SIMD_CALL double4 zeroext(double2 x) {
     return (double4){x.x,x.y,0,0};
@@ -43,6 +188,278 @@ SIMD_CALL double4 zeroext(double3 x) {
     return (double4){x.x,x.y,x.z,0};
 }
 
+// any
+SIMD_CALL bool any(int3 x) {
+    return any(vec3to4(x));
+}
+SIMD_CALL bool all(int3 x) {
+    return all(vec3to4(x));
+}
+
+// min
+SIMD_CALL double2 min(double2 x, double2 y) {
+    return vec4to2(min(vec2to4(x), vec2to4(y)));
+}
+SIMD_CALL double3 min(double3 x, double3 y) {
+    return vec4to3(min(vec3to4(x), vec3to4(y)));
+}
+
+// max
+SIMD_CALL double2 max(double2 x, double2 y) {
+    return vec4to2(max(vec2to4(x), vec2to4(y)));
+}
+SIMD_CALL double3 max(double3 x, double3 y) {
+    return vec4to3(max(vec3to4(x), vec3to4(y)));
+}
+
+// sqrt
+SIMD_CALL double2 sqrt(double2 x) {
+    return vec4to2(sqrt(vec2to4(x)));
+}
+SIMD_CALL double3 sqrt(double3 x) {
+    return vec4to3(sqrt(vec3to4(x)));
+}
+
+// rsqrt
+SIMD_CALL double4 rsqrt(double4 x) {
+    // TODO: fixup near 0 ?
+    // TODO: use _mm_div_ps if / doesn't use
+    return 1.0/sqrt(x);
+}
+SIMD_CALL double2 rsqrt(double2 x) {
+    return vec4to2(rsqrt(vec2to4(x)));
+}
+SIMD_CALL double3 rsqrt(double3 x) {
+    return vec4to3(rsqrt(vec3to4(x)));
+}
+
+
+// recip
+SIMD_CALL double4 recip(double4 x) {
+    // TODO: fixup near 0 ?
+    // TODO: use _mm_div_ps if / doesn't use
+    return 1.0/x;
+}
+SIMD_CALL double2 recip(double2 x) {
+    return vec4to2(recip(vec2to4(x)));
+}
+SIMD_CALL double3 recip(double3 x) {
+    return vec4to3(recip(vec3to4(x)));
+}
+
+// reduce_add
+SIMD_CALL double reduce_add(double2 x) {
+    return reduce_add(zeroext(x));
+}
+SIMD_CALL double reduce_add(double3 x) {
+    return reduce_add(zeroext(x));
+}
+
+// reduce_min - arm has double2 op
+SIMD_CALL double reduce_min(double2 x) {
+    return reduce_min(vec2to4(x));
+}
+
+SIMD_CALL double reduce_min(double3 x) {
+    return reduce_min(vec3to4(x));
+}
+
+// reduce_max
+SIMD_CALL double reduce_max(double2 x) {
+    return reduce_max(vec2to4(x));
+}
+
+SIMD_CALL double reduce_max(double3 x) {
+    return reduce_max(vec3to4(x));
+}
+
+// round (to nearest)
+SIMD_CALL double2 round(double2 x) { return vec4to2(round(vec2to4(x))); }
+SIMD_CALL double3 round(double3 x) { return vec4to3(round(vec3to4(x))); }
+
+// ceil
+SIMD_CALL double2 ceil(double2 x) { return vec4to2(ceil(vec2to4(x))); }
+SIMD_CALL double3 ceil(double3 x) { return vec4to3(ceil(vec3to4(x))); }
+
+// floor
+SIMD_CALL double2 floor(double2 x) { return vec4to2(floor(vec2to4(x))); }
+SIMD_CALL double3 floor(double3 x) { return vec4to3(floor(vec3to4(x))); }
+
+// clamp
+// order matters here for Nan, left op returned on precise min/max
+SIMD_CALL double2 clamp(double2 x, double2 minv, double2 maxv) {
+    return min(maxv, max(minv, x));
+}
+SIMD_CALL double3 clamp(double3 x, double3 minv, double3 maxv) {
+    return min(maxv, max(minv, x));
+}
+SIMD_CALL double4 clamp(double4 x, double4 minv, double4 maxv) {
+    return min(maxv, max(minv, x));
+}
+
+double2 saturate(double2 x);
+double3 saturate(double3 x);
+double4 saturate(double4 x);
+
+// muladd - arm has double2 op
+SIMD_CALL double2 muladd(double2 x, double2 y, double2 t) {
+    return vec4to2(muladd(vec2to4(x), vec2to4(y), vec2to4(t)));
+}
+SIMD_CALL double3 muladd(double3 x, double3 y, double3 t) {
+    return vec4to3(muladd(vec3to4(x), vec3to4(y), vec3to4(t)));
+}
+
+// lerp - another easy one
+SIMD_CALL double2 lerp(double2 x, double2 y, double2 t) {
+    return x + t*(y - x);
+}
+SIMD_CALL double3 lerp(double3 x, double3 y, double3 t) {
+    return x + t*(y - x);
+}
+SIMD_CALL double4 lerp(double4 x, double4 y, double4 t) {
+    return x + t*(y - x);
+}
+
+
+// dot
+SIMD_CALL double dot(double2 x, double2 y) {
+    return reduce_add(x * y);
+}
+SIMD_CALL double dot(double3 x, double3 y) {
+    return reduce_add(x * y);
+}
+SIMD_CALL double dot(double4 x, double4 y) {
+    return reduce_add(x * y);
+}
+
+// length_squared
+SIMD_CALL double length_squared(double2 x) {
+    return reduce_add(x * x);
+}
+SIMD_CALL double length_squared(double3 x) {
+    return reduce_add(x * x);
+}
+SIMD_CALL double length_squared(double4 x) {
+    return reduce_add(x * x);
+}
+
+// length
+SIMD_CALL double length(double2 x) {
+    // worth using simd_sqrt?
+    return ::sqrt(reduce_add(x * x));
+}
+SIMD_CALL double length(double3 x) {
+    return ::sqrt(reduce_add(x * x));
+}
+SIMD_CALL double length(double4 x) {
+    return ::sqrt(reduce_add(x * x));
+}
+
+// distance
+SIMD_CALL double distance(double2 x, double2 y) {
+    return length(x - y);
+}
+SIMD_CALL double distance(double3 x, double3 y) {
+    return length(x - y);
+}
+SIMD_CALL double distance(double4 x, double4 y) {
+    return length(x - y);
+}
+
+// normalize
+// optimized by staying in reg
+SIMD_CALL double4 normalize(double4 x) {
+    // x * invlength(x)
+    return x * rsqrt(reduce_addv(x * x)).x;
+}
+SIMD_CALL double2 normalize(double2 x) {
+    return vec4to2(normalize(zeroext(x)));
+}
+SIMD_CALL double3 normalize(double3 x) {
+    return vec4to3(normalize(zeroext(x)));
+}
+
+// abs
+SIMD_CALL double2 abs(double2 x) {
+    return bitselect(0.0, x, 0x7fffffff); // TODO: fp64
+}
+SIMD_CALL double3 abs(double3 x) {
+    return bitselect(0.0, x, 0x7fffffff);
+}
+SIMD_CALL double4 abs(double4 x) {
+    return bitselect(0.0, x, 0x7fffffff);
+}
+
+// cross
+SIMD_CALL double cross(double2 x, double2 y) {
+    return x.x * y.y - x.y * y.x;
+}
+SIMD_CALL double3 cross(double3 x, double3 y) {
+    return x.yzx * y.zxy - x.zxy * y.yzx;
+}
+
+// equal_abs
+SIMD_CALL bool equal_abs(double2 x, double2 y, double tol) {
+    return all((abs(x - y) <= tol));
+}
+SIMD_CALL bool equal_abs(double3 x, double3 y, double tol) {
+    return all((abs(x - y) <= tol));
+}
+SIMD_CALL bool equal_abs(double4 x, double4 y, double tol) {
+    return all((abs(x - y) <= tol));
+}
+
+// equal_rel
+SIMD_CALL bool equal_rel(double2 x, double2 y, double tol) {
+    return all((abs(x - y) <= tol * ::abs(x.x)));
+}
+SIMD_CALL bool equal_rel(double3 x, double3 y, double tol) {
+    return all((abs(x - y) <= tol * ::abs(x.x)));
+}
+SIMD_CALL bool equal_rel(double4 x, double4 y, double tol) {
+    return all((abs(x - y) <= tol * ::abs(x.x)));
+}
+
+// step
+SIMD_CALL double2 step(double2 edge, double2 x) {
+    return bitselect((double2)1, 0, x < edge);
+}
+SIMD_CALL double3 step(double3 edge, double3 x) {
+    return bitselect((double3)1, 0, x < edge);
+}
+SIMD_CALL double4 step(double4 edge, double4 x) {
+    return bitselect((double4)1, 0, x < edge);
+}
+
+// smoothstep
+SIMD_CALL double2 smoothstep(double2 edge0, double2 edge1, double2 x) {
+    double2 t = saturate((x-edge0)/(edge0-edge1));
+    return t*t*(3 - 2*t);
+}
+SIMD_CALL double3 smoothstep(double3 edge0, double3 edge1, double3 x) {
+    double3 t = saturate((x-edge0)/(edge0-edge1));
+    return t*t*(3 - 2*t);
+}
+SIMD_CALL double4 smoothstep(double4 edge0, double4 edge1, double4 x) {
+    double4 t = saturate((x-edge0)/(edge0-edge1));
+    return t*t*(3 - 2*t);
+}
+
+// fract
+SIMD_CALL double2 fract(double2 x) {
+    return min(x - floor(x), 0x1.fffffep-1f); // TODO: fp64
+}
+
+SIMD_CALL double3 fract(double3 x) {
+    return min(x - floor(x), 0x1.fffffep-1f);
+}
+
+SIMD_CALL double4 fract(double4 x) {
+    return min(x - floor(x), 0x1.fffffep-1f);
+}
+
+//-------------------
+
 SIMD_CALL double2 double2m(double x) {
     return x;
 }
@@ -56,7 +473,7 @@ SIMD_CALL double3 double3m(double x) {
 SIMD_CALL double3 double3m(double x, double y, double z) {
     return {x,y,z};
 }
-SIMD_CALL double3 double3m(double2 v, float z) {
+SIMD_CALL double3 double3m(double2 v, double z) {
     double3 r; r.xy = v; r.z = z; return r;
 }
 
@@ -93,16 +510,6 @@ SIMD_CALL double4 pow(double4 x, double4 y) {
     return exp(log(x) * y);
 }
 
-// Need to split out float234.cpp, then can alter
-// that for double calls.
-
-
-// TODO: would need matrix class derivations
-// and all of the matrix ops, which then need vector ops, and need double
-// constants.  So this starts to really add to codegen.  But double
-// is one of the last bastions of cpu, since many gpu don't support it.
-
-
 struct double2x2 : double2x2s
 {
     // can be split out to traits
@@ -197,9 +604,7 @@ double3x3 diagonal_matrix(double3 x);
 double3x4 diagonal_matrix3x4(double3 x);
 double4x4 diagonal_matrix(double4 x);
 
-// TODO: port these over from float versions
 // ops need to call these
-#if 0
 
 // using refs here, 3x3 and 4x4 are large to pass by value (3 simd regs)
 double2x2 transpose(const double2x2& x);
@@ -244,7 +649,16 @@ bool equal(const double2x2& x, const double2x2& y);
 bool equal(const double3x3& x, const double3x3& y);
 bool equal(const double4x4& x, const double4x4& y);
 
-// TODO: equal_abs, equal_rel
+// equal_abs
+bool equal_abs(const double2x2& x, const double2x2& y, double tol);
+bool equal_abs(const double3x3& x, const double3x3& y, double tol);
+bool equal_abs(const double4x4& x, const double4x4& y, double tol);
+
+// equal_rel
+bool equal_rel(const double2x2& x, const double2x2& y, double tol);
+bool equal_rel(const double3x3& x, const double3x3& y, double tol);
+bool equal_rel(const double4x4& x, const double4x4& y, double tol);
+
 
 // operators for C++
 macroMatrixOps(double2x2);
@@ -258,8 +672,6 @@ SIMD_CALL const double3x3& as_double3x3(const double4x4& m) {
     return reinterpret_cast<const double3x3&>(m);
 }
 
-#endif // 0
-
 } // SIMD_NAMESPACE
 
 #endif
diff --git a/libkram/vectormath/float234.h b/libkram/vectormath/float234.h
index d862d947..7568202f 100644
--- a/libkram/vectormath/float234.h
+++ b/libkram/vectormath/float234.h
@@ -37,9 +37,6 @@ macroVector4TypesStorageRenames(float, float)
 //-----------------------------------
 // start of implementation
 
-// Note there is _mm_sqrt_ss() instead of this math op to keep in registers
-#define sqrt_scalar(x) sqrtf(x)
-
 #if SIMD_NEON
 
 // TODO: expose float2 ops on Neon.
@@ -346,13 +343,14 @@ SIMD_CALL float length_squared(float4 x) {
 
 // length
 SIMD_CALL float length(float2 x) {
-    return sqrt_scalar(reduce_add(x * x));
+    // worth using simd_sqrt?
+    return ::sqrt(reduce_add(x * x));
 }
 SIMD_CALL float length(float3 x) {
-    return sqrt_scalar(reduce_add(x * x));
+    return ::sqrt(reduce_add(x * x));
 }
 SIMD_CALL float length(float4 x) {
-    return sqrt_scalar(reduce_add(x * x));
+    return ::sqrt(reduce_add(x * x));
 }
 
 // distance
@@ -458,71 +456,67 @@ SIMD_CALL float4 fract(float4 x) {
     return min(x - floor(x), 0x1.fffffep-1f);
 }
 
-
+/* this is just to show examples of extended vector types, float8 should move out
+   
 #if SIMD_FLOAT_EXT
 
 // These are cpu only math.  None of the gpus support these long types.
 // and MSL doesn't even support double.
+ // need to convert float4 to 8/16
+ // TODO: float8 float8m(float4 x, float4 y)
+ // TODO: float16 float16m(float8 x, float8 y)
 
 // how important are 8/16 ops for float and 8 for double?  Could reduce with only doing up to 4.
 // can see doing more ops on smaller types.  Slower when these have to route through simd4.
+ 
+ 
 SIMD_CALL float8 clamp(float8 x, float8 min, float8 max) {
     return min(max(x, min), max);
 }
+SIMD_CALL float reduce_min(float8 x) {
+     return reduce_min(min(x.lo, x.hi));
+}
+SIMD_CALL float reduce_max(float8 x) {
+     return reduce_max(max(x.lo, x.hi));
+}
+SIMD_CALL float8 muladd(float8 x, float8 y, float8 t) {
+     return float8m(muladd(x.lo, y.lo, z.lo), muladd(x.hi, y.hi, z.hi));
+}
+SIMD_CALL float8 lerp(float8 x, float8 y, float8 t) {
+     return x + t*(y - x);
+}
+SIMD_CALL float reduce_add(float8 x) {
+    return reduce_add(x.lo + x.hi);
+}
+SIMD_CALL float normalize(float8 x) {
+     return x / length(x);
+}
+ 
+// float16 calling up to float8
 SIMD_CALL float16 clamp(float16 x, float16 min, float16 max) {
     return min(max(x, min), max);
 }
-
-SIMD_CALL float reduce_min(float8 x) {
-    return reduce_min(min(x.lo, x.hi));
-}
-
 SIMD_CALL float reduce_min(float16 x) {
     return fmin(reduce_min(x.lo), reduce_min(x.hi));
 }
-
-SIMD_CALL float reduce_max(float8 x) {
-    return reduce_max(max(x.lo, x.hi));
-}
 SIMD_CALL float reduce_max(float16 x) {
     return fmax(reduce_max(x.lo), reduce_max(x.hi));
 }
-
-// need to convert float4 to 8/16
-// TODO: float8 float8m(float4 x, float4 y)
-// TODO: float16 float16m(float8 x, float8 y)
-
-// how important are 8/16 ops for float and double?  Could reduce with only doing up to 4.
-SIMD_CALL float8 muladd(float8 x, float4 y, float4 t) {
-    return float8m(muladd(x.lo, y.lo, z.lo), muladd(x.hi, y.hi, z.hi));
-}
-SIMD_CALL float16 muladd(float4 x, float4 y, float4 t) {
+SIMD_CALL float16 muladd(float16 x, float16 y, float16 t) {
     return float16m(muladd(x.lo, y.lo, z.lo), muladd(x.hi, y.hi, z.hi));
 }
-
-SIMD_CALL float8 lerp(float8 x, float8 y, float8 t) {
-    return x + t*(y - x);
-}
 SIMD_CALL float16 lerp(float16 x, float16 y, float16 t) {
     return x + t*(y - x);
 }
-
-SIMD_CALL float reduce_add(float8 x) {
-    return reduce_add(x.lo + x.hi);
-}
-
 SIMD_CALL float reduce_add(float16 x) {
     return reduce_add(x.lo + x.hi);
 }
-
-SIMD_CALL float normalize(float8 x) {
-    return x / length(x);
-}
 SIMD_CALL float normalize(float16 x) {
     return x / length(x);
 }
 
 #endif // SIMD_FLOAT_EXT
+*/
 
 // make "m" ctors for vecs.  This avoids wrapping the type in a struct.
 // vector types are C typedef, and so cannot have member functions.
@@ -587,6 +581,9 @@ SIMD_CALL float4 pow(float4 x, float4 y) {
 
 // TODO: add more math ops
 
+//-----------------------------------
+// constants
+
 // TODO: better way to name these, can there be float2::zero()
 // also could maybe use that for fake vector ctors.
 
@@ -730,13 +727,13 @@ struct float4x4 : float4x4s
 float4x4 float4x4m(const float3x4& m);
 float3x4 float3x4m(const float4x4& m);
 
-// set diangonal and rest to 0
+// set diagonal to vec and rest to 0
 float2x2 diagonal_matrix(float2 x);
 float3x3 diagonal_matrix(float3 x);
 float3x4 diagonal_matrix3x4(float3 x);
 float4x4 diagonal_matrix(float4 x);
 
-// using refs here, 3x3 and 4x4 are large to pass by value (3 simd regs)
+// transpose
 float2x2 transpose(const float2x2& x);
 float3x3 transpose(const float3x3& x);
 float4x4 transpose(const float4x4& x);
@@ -746,6 +743,7 @@ float2x2 inverse(const float2x2& x);
 float3x3 inverse(const float3x3& x);
 float4x4 inverse(const float4x4& x);
 
+// determinant
 float determinant(const float2x2& x);
 float determinant(const float3x3& x);
 float determinant(const float4x4& x);
diff --git a/libkram/vectormath/vectormath++.cpp b/libkram/vectormath/vectormath++.cpp
index c2d3f234..bb08dd08 100644
--- a/libkram/vectormath/vectormath++.cpp
+++ b/libkram/vectormath/vectormath++.cpp
@@ -340,7 +340,7 @@ float4 saturate(float4 x) {
 
 //--------------------------------------
 
-// textbook transpose from simd/matrix.h
+// textbook transpose
 float2x2 transpose(const float2x2& x) {
     float4 x0, x1;
     x0.xy = x[0];
@@ -377,12 +377,6 @@ float3x3 transpose(const float3x3& x) {
     return (float3x3){r0.xyz, r1.xyz, r2.xyz};
 }
 
-// TODO: needs to transpose both ways
-// float4x4 transpose(float3x4 x) { .. }
-// float3x4 transpose(float4x4 x) { .. }
-// SIMD_CALL float4x4 transpose(float3x4 x) { m = transpose(x); }
-// SIMD_CALL float3x4 transpose(float4x4 x) { m = transpose(x); }
-
 float4x4 transpose(const float4x4& x) {
     // NOTE: also _MM_TRANSPOSE4_PS using shuffles
     // but old Neon didn't really have shuffle
@@ -1198,7 +1192,7 @@ quatf quat_bezer_lerp(quatf q0, quatf b, quatf c, quatf q1, float t)
 
 void transpose_affine(float4x4& m)
 {
-    // TODO: see other tranpose not using shuffles and do that.
+    // TODO: see other tranpsose not using shuffles and do that.
     
     // avoid copy and one shuffle
     float4 tmp3, tmp2, tmp1, tmp0;
@@ -1354,6 +1348,8 @@ float4x4 float4x4m(char axis, float angleInRadians)
 
 } // namespace SIMD_NAMESPACE
 
+// --------------------------
+// TODO: break into own file with double ops
 
 
 namespace SIMD_NAMESPACE {
@@ -1444,6 +1440,375 @@ string vecf::str(const double4x4& m) const {
       str(m[2]).c_str(), str(m[3]).c_str());
 }
 
+//-----------------------------
+
+// textbook transpose 
+double2x2 transpose(const double2x2& x) {
+    double4 x0, x1;
+    x0.xy = x[0];
+    x1.xy = x[1];
+#if SIMD_SSE
+    double4 r01 = _mm_unpacklo_pd(x0, x1); // required AVX2
+#else
+    double4 r01 = vzip1q_f64(x0, x1);
+#endif
+    return (double2x2){r01.lo, r01.hi};
+}
+
+double3x3 transpose(const double3x3& x) {
+    double4 x0, x1, x2;
+    x0.xyz = x[0];
+    x1.xyz = x[1];
+    x2.xyz = x[2];
+#if SIMD_SSE
+    double4 t0 = _mm_unpacklo_pd(x0, x1);
+    double4 t1 = _mm_unpackhi_pd(x0, x1);
+    double4 r0 = t0; r0.hi = x2.lo;
+    double4 r1 = _mm_shuffle_pd(t0, x2, 0xde);
+    double4 r2 = x2; r2.lo = t1.lo;
+#else // SIMD_NEON
+    double2 padding = { 0 };
+    double4 r0,r1,r2;
+    r0.lo = vzip1q_f64(x0.lo,x1.lo);
+    r1.lo = vzip2q_f64(x0.lo,x1.lo);
+    r2.lo = vzip1q_f64(x0.hi,x1.hi);
+    r0.hi = vzip1q_f64(x2.lo,padding);
+    r1.hi = vzip2q_f64(x2.lo,padding);
+    r2.hi = vzip1q_f64(x2.hi,padding);
+#endif
+    return (double3x3){r0.xyz, r1.xyz, r2.xyz};
+}
+
+double4x4 transpose(const double4x4& x) {
+    // NOTE: also _MM_TRANSPOSE4_PS using shuffles
+    // but old Neon didn't really have shuffle.
+
+#if SIMD_SSE
+    double4 t0 = _mm_unpacklo_pd(x[0],x[2]);
+    double4 t1 = _mm_unpackhi_pd(x[0],x[2]);
+    double4 t2 = _mm_unpacklo_pd(x[1],x[3]);
+    double4 t3 = _mm_unpackhi_pd(x[1],x[3]);
+    double4 r0 = _mm_unpacklo_pd(t0,t2);
+    double4 r1 = _mm_unpackhi_pd(t0,t2);
+    double4 r2 = _mm_unpacklo_pd(t1,t3);
+    double4 r3 = _mm_unpackhi_pd(t1,t3);
+#else // SIMD_NEON
+    simd_double4 r0,r1,r2,r3;
+    r0.lo = vzip1q_f64(x[0].lo,x[1].lo);
+    r1.lo = vzip2q_f64(x[0].lo,x[1].lo);
+    r2.lo = vzip1q_f64(x[0].hi,x[1].hi);
+    r3.lo = vzip2q_f64(x[0].hi,x[1].hi);
+    r0.hi = vzip1q_f64(x[2].lo,x[3].lo);
+    r1.hi = vzip2q_f64(x[2].lo,x[3].lo);
+    r2.hi = vzip1q_f64(x[2].hi,x[3].hi);
+    r3.hi = vzip2q_f64(x[2].hi,x[3].hi);
+#endif
+    return (double4x4){r0,r1,r2,r3};
+}
+
+// inverse
+double2x2 inverse(const double2x2& x) {
+    double invDet = 1.0f / determinant(x);
+    if (invDet == 0.0f) return kdouble2x2_zero;
+    
+    double2x2 r = transpose(x);
+    r[0] *= invDet;
+    r[1] *= invDet;
+    return r;
+}
+
+double3x3 inverse(const double3x3& x) {
+    double invDet = 1.0f / determinant(x);
+    if (invDet == 0.0f) return kdouble3x3_zero;
+    
+    double3x3 r;
+    
+    // this forms the adjoint
+    r[0] = cross(x[1], x[2]) * invDet;
+    r[1] = cross(x[2], x[0]) * invDet;
+    r[2] = cross(x[0], x[1]) * invDet;
+    return r;
+}
+
+double4x4 inverse(const double4x4& x) {
+    // This is a full gje inverse
+    
+    double4x4 a(x), b(kdouble4x4_identity);
+    bool inversionSucceeded = true;
+    
+    // As a evolves from original mat into identity -
+    // b evolves from identity into inverse(a)
+    int cols = double4x4::col;
+    int rows = double4x4::row;
+    
+    // Loop over cols of a from left to right, eliminating above and below diag
+    for (int j=0; j<rows; j++) {
+        // Find largest pivot in column j among rows j..2
+        int i1 = j;            // Row with largest pivot candidate
+        for (int i=j+1; i<cols; i++) {
+            if ( fabsf(a[i][j]) > fabsf(a[i1][j]) ) {
+                i1 = i;
+            }
+        }
+        
+        // Swap rows i1 and j in a and b to put pivot on diagonal
+        std::swap(a[i1], a[j]);
+        std::swap(b[i1], b[j]);
+    
+        // Scale row j to have a unit diagonal
+        double s = a[j][j];
+        if ( s == 0.0f ) {
+            inversionSucceeded = false;
+            break;
+        }
+        
+        s = 1.0f/s;
+        b[j] *= s;
+        a[j] *= s;
+    
+        // Eliminate off-diagonal elems in col j of a, doing identical ops to b
+        for (int i=0; i<cols; i++ ) {
+            if (i != j) {
+                s = a[i][j];
+                b[i] -= b[j] * s;
+                a[i] -= a[j] * s;
+            }
+        }
+    }
+    
+    if (!inversionSucceeded) {
+        b = kdouble4x4_zero;
+    }
+    
+    return b;
+}
+
+
+// determinant
+// internal only ops
+// TODO: could just be macros
+inline double3 rotate1(double3 x) { return x.yzx; }
+inline double3 rotate2(double3 x) { return x.zxy; }
+inline double4 rotate1(double4 x) { return x.yzwx; }
+inline double4 rotate2(double4 x) { return x.zwxy; }
+inline double4 rotate3(double4 x) { return x.wxyz; }
+
+double determinant(const double2x2& x) {
+    return cross(x[0], x[1]);
+}
+
+double determinant(const double3x3& x) {
+    return reduce_add(
+            x[0]*(rotate1(x[1])*rotate2(x[2]) - rotate2(x[1])*rotate1(x[2])));
+}
+
+double determinant(const double4x4& x) {
+    double4 codet = x[0]*(rotate1(x[1])*(rotate2(x[2])*rotate3(x[3])-rotate3(x[2])*rotate2(x[3])) +
+      rotate2(x[1])*(rotate3(x[2])*rotate1(x[3])-rotate1(x[2])*rotate3(x[3])) +
+      rotate3(x[1])*(rotate1(x[2])*rotate2(x[3])-rotate2(x[2])*rotate1(x[3])));
+    return reduce_add(codet.even - codet.odd);
+}
+
+// trace
+double trace(const double2x2& x) {
+    return x[0].x + x[1].y;
+}
+
+double trace(const double3x3& x) {
+    return x[0].x + x[1].y + x[2].z;
+}
+
+double trace(const double4x4& x) {
+    return x[0].x + x[1].y + x[2].z + x[3].w;
+}
+
+// TODO: may want pre-transform on double3x4 since it's transposed
+// 3 x m3x4 should = 3 element vec
+//
+// simd premul transform on left does a super expensive transpose to avoid dot
+// don't use this, should just dotproducts?
+//static   half2 mul(  half2 x,   half2x2 y) { return mul(transpose(y), x); }
+//
+//
+// Here's how to multiply matrices, since default ops won't do this.
+// be careful with operator* built-in.  Will do column by column mul won't it?
+// Maybe that's why *= is missing on matrices.
+//
+// This is taking each scalar of y[0], hopfully this extends and stays in vec op
+
+// premul-transform has to do dots
+double2 mul(double2 y, const double2x2& x) {
+    double2 r;
+    r.x = dot(y, x[0]);
+    r.y = dot(y, x[1]);
+    return r;
+}
+
+double3 mul(double3 y, const double3x3& x) {
+    double3 r;
+    r.x = dot(y, x[0]);
+    r.y = dot(y, x[1]);
+    r.z = dot(y, x[2]);
+    return r;
+}
+
+double4 mul(double4 y, const double4x4& x) {
+    double4 r;
+    r.x = dot(y, x[0]);
+    r.y = dot(y, x[1]);
+    r.z = dot(y, x[2]);
+    r.w = dot(y, x[3]);
+    return r;
+}
+
+
+// post-transform at least does a mul madd
+double2 mul(const double2x2& x, double2 y) {
+    double2 r = x[0] * y[0]; // no mul(v,v)
+    r = muladd( x[1], y[1], r);
+    return r;
+}
+
+double3 mul(const double3x3& x, double3 y) {
+    double3 r = x[0] * y[0];
+    r = muladd( x[1], y[1], r);
+    r = muladd( x[2], y[2], r);
+    return r;
+}
+
+double4 mul(const double4x4& x, double4 y) {
+    double4 r = x[0] * y[0];
+    r = muladd( x[1], y[1], r);
+    r = muladd( x[2], y[2], r);
+    r = muladd( x[3], y[3], r);
+    return r;
+}
+
+// matrix muls using mul madd
+double2x2 mul(const double2x2& x, const double2x2& y) {
+    double2x2 r;
+    
+    // m * columns
+    r[0] = mul(x, y[0]);
+    r[1] = mul(x, y[1]);
+    
+    return r;
+}
+
+double3x3 mul(const double3x3& x, const double3x3& y) {
+    double3x3 r;
+    r[0] = mul(x, y[0]);
+    r[1] = mul(x, y[1]);
+    r[2] = mul(x, y[2]);
+    return r;
+}
+
+double4x4 mul(const double4x4& x, const double4x4& y) {
+    double4x4 r;
+    r[0] = mul(x, y[0]);
+    r[1] = mul(x, y[1]);
+    r[2] = mul(x, y[2]);
+    r[3] = mul(x, y[3]);
+    return r;
+}
+
+// sub
+double2x2 sub(const double2x2& x, const double2x2& y) {
+    double2x2 r(x);
+    r[0] -= y[0];
+    r[1] -= y[1];
+    return r;
+}
+double3x3 sub(const double3x3& x, const double3x3& y) {
+    double3x3 r(x);
+    r[0] -= y[0];
+    r[1] -= y[1];
+    r[2] -= y[2];
+    return r;
+}
+double4x4 sub(const double4x4& x, const double4x4& y) {
+    double4x4 r(x);
+    r[0] -= y[0];
+    r[1] -= y[1];
+    r[2] -= y[2];
+    r[3] -= y[3];
+    return r;
+}
+
+// add
+double2x2 add(const double2x2& x, const double2x2& y) {
+    double2x2 r(x);
+    r[0] += y[0];
+    r[1] += y[1];
+    return r;
+}
+double3x3 add(const double3x3& x, const double3x3& y) {
+    double3x3 r(x);
+    r[0] += y[0];
+    r[1] += y[1];
+    r[2] += y[2];
+    return r;
+}
+double4x4 add(const double4x4& x, const double4x4& y) {
+    double4x4 r(x);
+    r[0] += y[0];
+    r[1] += y[1];
+    r[2] += y[2];
+    r[3] += y[3];
+    return r;
+}
+
+// equal
+bool equal(const double2x2& x, const double2x2& y) {
+    return all(x[0] == y[0] &
+               x[1] == y[1]);
+}
+bool equal(const double3x3& x, const double3x3& y) {
+    return all(x[0] == y[0] &
+               x[1] == y[1] &
+               x[2] == y[2]);
+}
+bool equal(const double4x4& x, const double4x4& y) {
+    return all(x[0] == y[0] &
+               x[1] == y[1] &
+               x[2] == y[2] &
+               x[3] == y[3]);
+}
+
+// equal_abs
+bool equal_abs(const double2x2& x, const double2x2& y, double tol) {
+    return all((abs(x[0] - y[0]) <= tol) &
+               (abs(x[1] - y[1]) <= tol));
+}
+bool equal_abs(const double3x3& x, const double3x3& y, double tol) {
+    return all((abs(x[0] - y[0]) <= tol) &
+               (abs(x[1] - y[1]) <= tol) &
+               (abs(x[2] - y[2]) <= tol));
+}
+bool equal_abs(const double4x4& x, const double4x4& y, double tol) {
+    return all((abs(x[0] - y[0]) <= tol) &
+               (abs(x[1] - y[1]) <= tol) &
+               (abs(x[2] - y[2]) <= tol) &
+               (abs(x[3] - y[3]) <= tol));
+}
+
+// equal_rel
+bool equal_rel(const double2x2& x, const double2x2& y, double tol) {
+    return all((abs(x[0] - y[0]) <= tol * abs(x[0])) &
+               (abs(x[1] - y[1]) <= tol * abs(x[1])));
+}
+bool equal_rel(const double3x3& x, const double3x3& y, double tol) {
+    return all((abs(x[0] - y[0]) <= tol * abs(x[0])) &
+               (abs(x[1] - y[1]) <= tol * abs(x[1])) &
+               (abs(x[2] - y[2]) <= tol * abs(x[2])));
+}
+bool equal_rel(const double4x4& x, const double4x4& y, double tol) {
+    return all((abs(x[0] - y[0]) <= tol * abs(x[0])) &
+               (abs(x[1] - y[1]) <= tol * abs(x[1])) &
+               (abs(x[2] - y[2]) <= tol * abs(x[2])) &
+               (abs(x[3] - y[3]) <= tol * abs(x[3])));
+}
+
 #endif // SIMD_DOUBLE
 
 } // namespace SIMD_NAMESPACE
diff --git a/libkram/vectormath/vectormath++.h b/libkram/vectormath/vectormath++.h
index 6a85a3b2..29d0dbd6 100644
--- a/libkram/vectormath/vectormath++.h
+++ b/libkram/vectormath/vectormath++.h
@@ -140,7 +140,7 @@
 // SIMD_INT must be kept on for conditional tests.
 #define SIMD_HALF   1
 #define SIMD_FLOAT  1
-#define SIMD_DOUBLE 1
+#define SIMD_DOUBLE 0
 
 #define SIMD_INT    1
 #define SIMD_CHAR   0
@@ -432,7 +432,7 @@ SIMD_CALL int2 float2m(float2 x) { return __builtin_convertvector(x, int2); }
 SIMD_CALL int3 float3m(float3 x) { return __builtin_convertvector(x, int3); }
 SIMD_CALL int4 float4m(float4 x) { return __builtin_convertvector(x, int4); }
 
-#endif
+#endif // SIMD_INT
 
 #if SIMD_HALF // && SIMD_FLOAT
 
@@ -459,7 +459,7 @@ SIMD_CALL half3 half3m(float3 x) { return __builtin_convertvector(x, half3); }
 SIMD_CALL half4 half4m(float4 x) { return __builtin_convertvector(x, half4); }
 //#endif
 
-#endif
+#endif // SIMD_HALF
 
 #if SIMD_DOUBLE // && SIMD_FLOAT
 SIMD_CALL double2 double2m(float2 x) { return __builtin_convertvector(x, double2); }
@@ -469,7 +469,7 @@ SIMD_CALL double4 double4m(float4 x) { return __builtin_convertvector(x, double4
 SIMD_CALL float2 float2m(double2 x) { return __builtin_convertvector(x, float2); }
 SIMD_CALL float3 float3m(double3 x) { return __builtin_convertvector(x, float3); }
 SIMD_CALL float4 float4m(double4 x) { return __builtin_convertvector(x, float4); }
-#endif
+#endif // SIMD_DOUBLE
 
 #endif // SIMD_FLOAT
 

From 85d41487a996b1d0861f6344d112b739947b6fa2 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 29 Sep 2024 18:13:49 -0700
Subject: [PATCH 747/901] kram - simd - more work on double234.h, needs long
 enabled

---
 libkram/vectormath/double234.h    | 161 ++++++++++++++++++------------
 libkram/vectormath/float234.h     |   2 -
 libkram/vectormath/vectormath++.h |  13 ++-
 3 files changed, 103 insertions(+), 73 deletions(-)

diff --git a/libkram/vectormath/double234.h b/libkram/vectormath/double234.h
index fa684ec7..af6c7e7a 100644
--- a/libkram/vectormath/double234.h
+++ b/libkram/vectormath/double234.h
@@ -35,46 +35,102 @@ namespace SIMD_NAMESPACE {
 
 macroVector8TypesStorageRenames(double, double)
 
+SIMD_CALL double2 double2m(double x) {
+    return x;
+}
+SIMD_CALL double2 double2m(double x, double y) {
+    return {x,y};
+}
+
+SIMD_CALL double3 double3m(double x) {
+    return x;
+}
+SIMD_CALL double3 double3m(double x, double y, double z) {
+    return {x,y,z};
+}
+SIMD_CALL double3 double3m(double2 v, double z) {
+    double3 r; r.xy = v; r.z = z; return r;
+}
+
+SIMD_CALL double4 double4m(double x) {
+    return x;
+}
+SIMD_CALL double4 double4m(double2 xy, double2 zw) {
+    double4 r; r.xy = xy; r.zw = zw; return r;
+}
+SIMD_CALL double4 double4m(double x, double y, double z, double w = 1.0) {
+    return {x,y,z,w};
+}
+SIMD_CALL double4 double4m(double3 v, double w = 1.0) {
+    double4 r; r.xyz = v; r.w = w; return r;
+}
+
 //-----------------------------------
 // start of implementation
 
 #if SIMD_NEON
 
 // TODO: expose double2 ops on Neon.
+// think I have to, so that 4 can call 2x2 with hi/lo
+
+SIMD_CALL double reduce_min(double2 x) {
+    return vminvq_f64(x);
+}
 
 SIMD_CALL double reduce_min(double4 x) {
-    return vminvq_f32(x); // TODO: fp64
+    return fmin(reduce_min(x.lo),reduce_min(x.hi));
+}
+
+SIMD_CALL double reduce_max(double2 x) {
+    return vmaxvq_f64(x);
 }
 
 SIMD_CALL double reduce_max(double4 x) {
-    return vmaxvq_f32(x); // TODO: fp64
+    return fmax(reduce_max(x.lo),reduce_max(x.hi));
 }
 
+SIMD_CALL double2 min(double2 x, double2 y) {
+    // precise returns x on Nan
+    return vminnmq_f64(x, y);
+}
 SIMD_CALL double4 min(double4 x, double4 y) {
     // precise returns x on Nan
-    return vminnmq_f32(x, y); // TODO: fp64
+    return double4m(min(x.lo,y.lo), min(x.hi,y.hi));
+}
+
+SIMD_CALL double2 max(double2 x, double2 y) {
+    // precise returns x on Nan
+    return vmaxnmq_f64(x, y);
 }
 
 SIMD_CALL double4 max(double4 x, double4 y) {
     // precise returns x on Nan
-    return vmaxnmq_f32(x, y); // TODO: fp64
+    return double4m(max(x.lo,y.lo), max(x.hi,y.hi));
 }
 
-SIMD_CALL double4 muladd(double4 x, double4 y, double4 t) {
+
+SIMD_CALL double2 muladd(double2 x, double2 y, double2 t) {
     // requires __ARM_VFPV4__
     // t passed first unlike sse
-    return vfmaq_f32(t, x,y); // TODO: fp64
+    return vfmaq_f64(t, x,y);
+}
+SIMD_CALL double4 muladd(double4 x, double4 y, double4 t) {
+    return double4m(muladd(x.lo,y.lo,t.lo), muladd(x.hi,y.hi,t.hi));
 }
 
+SIMD_CALL double2 sqrt(double2 x) {
+    return vsqrtq_f64(x);
+}
 SIMD_CALL double4 sqrt(double4 x) {
-    return vsqrtq_f32(x); // TODO: fp64
+    return double4m(sqrt(x.lo,x.lo), sqrt(x.hi,x.hi));
 }
 
 // use sse2neon to port this for now
 SIMD_CALL double4 reduce_addv(double4 x) {
     // 4:1 reduction
-    x = _mm_hadd_pd(x, x); // xy = x+y,z+w
-    x = _mm_hadd_pd(x, x); // x  = x+y
+    x = _mm_hadd_pd(x.lo, x.lo);
+    x = _mm_hadd_pd(x.hi, x.hi);
+    x = _mm_hadd_pd(x.lo, x.hi);
     return x.x; // repeat x to all values
 }
 
@@ -95,13 +151,13 @@ SIMD_CALL double reduce_max(double4 x) {
     return fmax(fmax(x.x,x.y), fmax(x.z,x.w));
 }
 
-// needs SIMD_INT
+// needs SIMD_LONG
 // needed for precise min/max calls below
-#if SIMD_INT
-SIMD_CALL double4 bitselect_forminmax(double4 x, double4 y, int4 mask) { // TODO: fp64
-    return (double4)(((int4)x & ~mask) | ((int4)y & mask));
+#if SIMD_LONG
+SIMD_CALL double4 bitselect_forminmax(double4 x, double4 y, long4 mask) {
+    return (double4)(((long4)x & ~mask) | ((long4)y & mask));
 }
-#endif
+#endif // SIMD_LONG
 
 SIMD_CALL double4 min(double4 x, double4 y) {
     // precise returns x on Nan
@@ -129,8 +185,9 @@ SIMD_CALL double4 sqrt(double4 x) {
 
 SIMD_CALL double4 reduce_addv(double4 x) {
     // 4:1 reduction
-    x = _mm_hadd_pd(x, x); // xy = x+y,z+w
-    x = _mm_hadd_pd(x, x); // x  = x+y
+    x = _mm_hadd_pd(x.lo, x.lo);
+    x = _mm_hadd_pd(x.hi, x.hi);
+    x = _mm_hadd_pd(x.lo, x.hi);
     return x.x; // repeat x to all values
 }
 
@@ -138,47 +195,49 @@ SIMD_CALL double reduce_add(double4 x) {
     return reduce_addv(x).x;
 }
 
-#endif // SIMD_INT && SIMD_SSE
+#endif // SIMD_LONG && SIMD_SSE
 
 // SSE4.1
+// single ops in AVX/2
+
 SIMD_CALL double4 round(double4 vv) {
-    return _mm_round_pd(vv, 0x8);  // round to nearest | exc
+    return double4m(_mm_round_pd(vv.lo, 0x8),_mm_round_pd(vv.hi, 0x8));  // round to nearest | exc
 }
 SIMD_CALL double4 ceil(double4 vv) {
-    return _mm_ceil_pd(vv);
+    return double4m(_mm_ceil_pd(vv.lo),_mm_ceil_pd(vv.hi));
 }
 SIMD_CALL double4 floor(double4 vv) {
-    return _mm_floor_pd(vv);
+    return double4m(_mm_floor_pd(vv.lo),_mm_floor_pd(vv.hi));
 }
 
 // end of implementation
 //-----------------------------------
 
-#if SIMD_INT
+#if SIMD_LONG
 
 // bitselect
-SIMD_CALL double2 bitselect(double2 x, double2 y, int2 mask) { // TODO: fp64
-    return (double2)bitselect((int2)x, (int2)y, mask);
+SIMD_CALL double2 bitselect(double2 x, double2 y, long2 mask) {
+    return (double2)bitselect((long2)x, (long2)y, mask);
 }
-SIMD_CALL double3 bitselect(double3 x, double3 y, int3 mask) {
-    return (double3)bitselect((int3)x, (int3)y, mask);
+SIMD_CALL double3 bitselect(double3 x, double3 y, long3 mask) {
+    return (double3)bitselect((long3)x, (long3)y, mask);
 }
-SIMD_CALL double4 bitselect(double4 x, double4 y, int4 mask) {
-    return (double4)bitselect((int4)x, (int4)y, mask);
+SIMD_CALL double4 bitselect(double4 x, double4 y, long4 mask) {
+    return (double4)bitselect((long4)x, (long4)y, mask);
 }
 
 // select
-SIMD_CALL double2 select(double2 x, double2 y, int2 mask) { // TODO: fp64
-    return bitselect(x, y, mask >> 31);
+SIMD_CALL double2 select(double2 x, double2 y, long2 mask) {
+    return bitselect(x, y, mask >> 63);
 }
-SIMD_CALL double3 select(double3 x, double3 y, int3 mask) {
-    return bitselect(x, y, mask >> 31);
+SIMD_CALL double3 select(double3 x, double3 y, long3 mask) {
+    return bitselect(x, y, mask >> 63);
 }
-SIMD_CALL double4 select(double4 x, double4 y, int4 mask) {
-    return bitselect(x, y, mask >> 31);
+SIMD_CALL double4 select(double4 x, double4 y, long4 mask) {
+    return bitselect(x, y, mask >> 63);
 }
 
-#endif // SIMD_INT
+#endif // SIMD_LONG
 
 // zeroext - internal helper
 SIMD_CALL double4 zeroext(double2 x) {
@@ -189,10 +248,10 @@ SIMD_CALL double4 zeroext(double3 x) {
 }
 
 // any
-SIMD_CALL bool any(int3 x) {
+SIMD_CALL bool any(long3 x) {
     return any(vec3to4(x));
 }
-SIMD_CALL bool all(int3 x) {
+SIMD_CALL bool all(long4 x) {
     return all(vec3to4(x));
 }
 
@@ -449,46 +508,16 @@ SIMD_CALL double4 smoothstep(double4 edge0, double4 edge1, double4 x) {
 SIMD_CALL double2 fract(double2 x) {
     return min(x - floor(x), 0x1.fffffep-1f); // TODO: fp64
 }
-
 SIMD_CALL double3 fract(double3 x) {
     return min(x - floor(x), 0x1.fffffep-1f);
 }
-
 SIMD_CALL double4 fract(double4 x) {
     return min(x - floor(x), 0x1.fffffep-1f);
 }
 
 //-------------------
 
-SIMD_CALL double2 double2m(double x) {
-    return x;
-}
-SIMD_CALL double2 double2m(double x, double y) {
-    return {x,y};
-}
 
-SIMD_CALL double3 double3m(double x) {
-    return x;
-}
-SIMD_CALL double3 double3m(double x, double y, double z) {
-    return {x,y,z};
-}
-SIMD_CALL double3 double3m(double2 v, double z) {
-    double3 r; r.xy = v; r.z = z; return r;
-}
-
-SIMD_CALL double4 double4m(double x) {
-    return x;
-}
-SIMD_CALL double4 double4m(double2 xy, double2 zw) {
-    double4 r; r.xy = xy; r.zw = zw; return r;
-}
-SIMD_CALL double4 double4m(double x, double y, double z, double w = 1.0) {
-    return {x,y,z,w};
-}
-SIMD_CALL double4 double4m(double3 v, double w = 1.0) {
-    double4 r; r.xyz = v; r.w = w; return r;
-}
 
 // power series
 macroVectorRepeatFnDecl(double, log)
diff --git a/libkram/vectormath/float234.h b/libkram/vectormath/float234.h
index 7568202f..a70ba6ad 100644
--- a/libkram/vectormath/float234.h
+++ b/libkram/vectormath/float234.h
@@ -447,11 +447,9 @@ SIMD_CALL float4 smoothstep(float4 edge0, float4 edge1, float4 x) {
 SIMD_CALL float2 fract(float2 x) {
     return min(x - floor(x), 0x1.fffffep-1f);
 }
-
 SIMD_CALL float3 fract(float3 x) {
     return min(x - floor(x), 0x1.fffffep-1f);
 }
-
 SIMD_CALL float4 fract(float4 x) {
     return min(x - floor(x), 0x1.fffffep-1f);
 }
diff --git a/libkram/vectormath/vectormath++.h b/libkram/vectormath/vectormath++.h
index 29d0dbd6..b410c280 100644
--- a/libkram/vectormath/vectormath++.h
+++ b/libkram/vectormath/vectormath++.h
@@ -136,18 +136,21 @@
 // a define to override setings from prefix file
 #ifndef SIMD_CONFIG
 
+#define SIMD_INT    1
+#define SIMD_LONG   1
+
 // Vector and matrix types.  Currently only matrix types for SIMD_FLOAT, SIMD_DOUBLE.
 // SIMD_INT must be kept on for conditional tests.
-#define SIMD_HALF   1
-#define SIMD_FLOAT  1
-#define SIMD_DOUBLE 0
+// SIMD_HALF for bitselect would need SIMD_SHORT or SIMD_INT?
+#define SIMD_HALF   (1)
+#define SIMD_FLOAT  (1 && SIMD_INT)
+#define SIMD_DOUBLE (0 && SIMD_LONG)
+
 
-#define SIMD_INT    1
 #define SIMD_CHAR   0
 //#define SIMD_UCHAR  0
 #define SIMD_SHORT  0
 //#define SIMD_USHORT 0
-#define SIMD_LONG   0
 //#define SIMD_ULONG  0
 
 // Whether to support > 4 length vecs with some ops

From 3ae96267c7c340ae373675a5cfea1b8015740f2e Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 29 Sep 2024 19:00:31 -0700
Subject: [PATCH 748/901] kram - simd - add double and long mask support

This is only using 16B ops, where AVX/2 expose 32B ops.
---
 build2/kram.xcodeproj/project.pbxproj |   6 ++
 libkram/vectormath/double234.h        |  72 +++++++-------
 libkram/vectormath/float234.h         |   8 --
 libkram/vectormath/int234.h           |  11 ++-
 libkram/vectormath/long234.h          | 132 ++++++++++++++++++++++++++
 libkram/vectormath/vectormath++.cpp   |  12 ++-
 libkram/vectormath/vectormath++.h     |  44 +++------
 7 files changed, 203 insertions(+), 82 deletions(-)
 create mode 100644 libkram/vectormath/long234.h

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index 09ec8980..8d2762e1 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -396,6 +396,8 @@
 		70B563A82C857B360089A64F /* KramZipStream.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B563A52C857B360089A64F /* KramZipStream.cpp */; };
 		70B563A92C857B360089A64F /* KramZipStream.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B563A62C857B360089A64F /* KramZipStream.h */; };
 		70B563AA2C857B360089A64F /* KramZipStream.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B563A62C857B360089A64F /* KramZipStream.h */; };
+		70B686E32CAA3409007ACA58 /* long234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B686E22CAA3405007ACA58 /* long234.h */; };
+		70B686E42CAA3409007ACA58 /* long234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B686E22CAA3405007ACA58 /* long234.h */; };
 		70CDB65027A1382700A546C1 /* KramDDSHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 70CDB64E27A1382600A546C1 /* KramDDSHelper.h */; };
 		70CDB65127A1382700A546C1 /* KramDDSHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 70CDB64E27A1382600A546C1 /* KramDDSHelper.h */; };
 		70CDB65227A1382700A546C1 /* KramDDSHelper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70CDB64F27A1382600A546C1 /* KramDDSHelper.cpp */; };
@@ -764,6 +766,7 @@
 		70A7BD2F27092A1200DBCCF7 /* hdr_encode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = hdr_encode.h; sourceTree = "<group>"; };
 		70B563A52C857B360089A64F /* KramZipStream.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = KramZipStream.cpp; sourceTree = "<group>"; };
 		70B563A62C857B360089A64F /* KramZipStream.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = KramZipStream.h; sourceTree = "<group>"; };
+		70B686E22CAA3405007ACA58 /* long234.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = long234.h; sourceTree = "<group>"; };
 		70CDB64E27A1382600A546C1 /* KramDDSHelper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = KramDDSHelper.h; sourceTree = "<group>"; };
 		70CDB64F27A1382600A546C1 /* KramDDSHelper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = KramDDSHelper.cpp; sourceTree = "<group>"; };
 		70D222D62AC800AC00B9EA23 /* json11.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = json11.h; sourceTree = "<group>"; };
@@ -807,6 +810,7 @@
 				705F7EFD2C9FF42700E377B7 /* sse2neon.h */,
 				705F7EFE2C9FF42700E377B7 /* sse2neon-arm64.h */,
 				705F7F022C9FF42700E377B7 /* vectormath++.h */,
+				70B686E22CAA3405007ACA58 /* long234.h */,
 				7013AD5D2CAA0E18007E5554 /* half234.h */,
 				7013AD602CAA0E21007E5554 /* int234.h */,
 				7013AD4E2CAA0818007E5554 /* float234.h */,
@@ -1513,6 +1517,7 @@
 				707789F32881BCE2008A51BC /* rdo_bc_encoder.h in Headers */,
 				70D222D82AC800AC00B9EA23 /* json11.h in Headers */,
 				706EF01726D15985001C950E /* colourset.h in Headers */,
+				70B686E42CAA3409007ACA58 /* long234.h in Headers */,
 				708A6AA42708CE4700BA5410 /* bc6h_utils.h in Headers */,
 				706EF01826D15985001C950E /* colourblock.h in Headers */,
 				706EF01926D15985001C950E /* rangefit.h in Headers */,
@@ -1632,6 +1637,7 @@
 				707789F42881BCE2008A51BC /* rdo_bc_encoder.h in Headers */,
 				70D222D92AC800AC00B9EA23 /* json11.h in Headers */,
 				706EF19126D166C5001C950E /* colourset.h in Headers */,
+				70B686E32CAA3409007ACA58 /* long234.h in Headers */,
 				708A6AA52708CE4700BA5410 /* bc6h_utils.h in Headers */,
 				706EF19226D166C5001C950E /* colourblock.h in Headers */,
 				706EF19326D166C5001C950E /* rangefit.h in Headers */,
diff --git a/libkram/vectormath/double234.h b/libkram/vectormath/double234.h
index af6c7e7a..f8bedc48 100644
--- a/libkram/vectormath/double234.h
+++ b/libkram/vectormath/double234.h
@@ -122,16 +122,16 @@ SIMD_CALL double2 sqrt(double2 x) {
     return vsqrtq_f64(x);
 }
 SIMD_CALL double4 sqrt(double4 x) {
-    return double4m(sqrt(x.lo,x.lo), sqrt(x.hi,x.hi));
+    return double4m(sqrt(x.lo), sqrt(x.hi));
 }
 
 // use sse2neon to port this for now
 SIMD_CALL double4 reduce_addv(double4 x) {
     // 4:1 reduction
-    x = _mm_hadd_pd(x.lo, x.lo);
-    x = _mm_hadd_pd(x.hi, x.hi);
-    x = _mm_hadd_pd(x.lo, x.hi);
-    return x.x; // repeat x to all values
+    x.lo = _mm_hadd_pd(x.lo, x.lo);
+    x.hi = _mm_hadd_pd(x.hi, x.hi);
+    x.lo = _mm_hadd_pd(x.lo, x.hi);
+    return x.lo.x; // repeat x to all values
 }
 
 SIMD_CALL double reduce_add(double4 x) {
@@ -179,16 +179,24 @@ SIMD_CALL double4 muladd(double4 x, double4 y, double4 t) {
 #endif
 }
 
-SIMD_CALL double4 sqrt(double4 x) {
+SIMD_CALL double2 sqrt(double2 x) {
     return _mm_sqrt_pd(x);
 }
+SIMD_CALL double4 sqrt(double4 x) {
+    return double4m(sqrt(x.lo), sqrt(x.hi));
+}
+
+SIMD_CALL double2 reduce_addv(double2 x) {
+    x = _mm_hadd_pd(x.lo, x.lo);
+    return x.x;
+}
 
 SIMD_CALL double4 reduce_addv(double4 x) {
     // 4:1 reduction
-    x = _mm_hadd_pd(x.lo, x.lo);
-    x = _mm_hadd_pd(x.hi, x.hi);
-    x = _mm_hadd_pd(x.lo, x.hi);
-    return x.x; // repeat x to all values
+    x.lo = _mm_hadd_pd(x.lo, x.lo); // TODO: fix
+    x.hi = _mm_hadd_pd(x.hi, x.hi);
+    x.lo = _mm_hadd_pd(x.lo, x.hi);
+    return x.lo.x; // repeat x to all values
 }
 
 SIMD_CALL double reduce_add(double4 x) {
@@ -247,34 +255,26 @@ SIMD_CALL double4 zeroext(double3 x) {
     return (double4){x.x,x.y,x.z,0};
 }
 
-// any
-SIMD_CALL bool any(long3 x) {
-    return any(vec3to4(x));
-}
-SIMD_CALL bool all(long4 x) {
-    return all(vec3to4(x));
-}
-
 // min
-SIMD_CALL double2 min(double2 x, double2 y) {
-    return vec4to2(min(vec2to4(x), vec2to4(y)));
-}
+//SIMD_CALL double2 min(double2 x, double2 y) {
+//    return vec4to2(min(vec2to4(x), vec2to4(y)));
+//}
 SIMD_CALL double3 min(double3 x, double3 y) {
     return vec4to3(min(vec3to4(x), vec3to4(y)));
 }
 
 // max
-SIMD_CALL double2 max(double2 x, double2 y) {
-    return vec4to2(max(vec2to4(x), vec2to4(y)));
-}
+//SIMD_CALL double2 max(double2 x, double2 y) {
+//    return vec4to2(max(vec2to4(x), vec2to4(y)));
+//}
 SIMD_CALL double3 max(double3 x, double3 y) {
     return vec4to3(max(vec3to4(x), vec3to4(y)));
 }
 
 // sqrt
-SIMD_CALL double2 sqrt(double2 x) {
-    return vec4to2(sqrt(vec2to4(x)));
-}
+//SIMD_CALL double2 sqrt(double2 x) {
+//    return vec4to2(sqrt(vec2to4(x)));
+//}
 SIMD_CALL double3 sqrt(double3 x) {
     return vec4to3(sqrt(vec3to4(x)));
 }
@@ -315,18 +315,18 @@ SIMD_CALL double reduce_add(double3 x) {
 }
 
 // reduce_min - arm has double2 op
-SIMD_CALL double reduce_min(double2 x) {
-    return reduce_min(vec2to4(x));
-}
+//SIMD_CALL double reduce_min(double2 x) {
+//    return reduce_min(vec2to4(x));
+//}
 
 SIMD_CALL double reduce_min(double3 x) {
     return reduce_min(vec3to4(x));
 }
 
 // reduce_max
-SIMD_CALL double reduce_max(double2 x) {
-    return reduce_max(vec2to4(x));
-}
+//SIMD_CALL double reduce_max(double2 x) {
+//    return reduce_max(vec2to4(x));
+//}
 
 SIMD_CALL double reduce_max(double3 x) {
     return reduce_max(vec3to4(x));
@@ -361,9 +361,9 @@ double3 saturate(double3 x);
 double4 saturate(double4 x);
 
 // muladd - arm has double2 op
-SIMD_CALL double2 muladd(double2 x, double2 y, double2 t) {
-    return vec4to2(muladd(vec2to4(x), vec2to4(y), vec2to4(t)));
-}
+//SIMD_CALL double2 muladd(double2 x, double2 y, double2 t) {
+//    return vec4to2(muladd(vec2to4(x), vec2to4(y), vec2to4(t)));
+//}
 SIMD_CALL double3 muladd(double3 x, double3 y, double3 t) {
     return vec4to3(muladd(vec3to4(x), vec3to4(y), vec3to4(t)));
 }
diff --git a/libkram/vectormath/float234.h b/libkram/vectormath/float234.h
index a70ba6ad..99c7a6cd 100644
--- a/libkram/vectormath/float234.h
+++ b/libkram/vectormath/float234.h
@@ -186,14 +186,6 @@ SIMD_CALL float4 zeroext(float3 x) {
     return (float4){x.x,x.y,x.z,0};
 }
 
-// any
-SIMD_CALL bool any(int3 x) {
-    return any(vec3to4(x));
-}
-SIMD_CALL bool all(int3 x) {
-    return all(vec3to4(x));
-}
-
 // min
 SIMD_CALL float2 min(float2 x, float2 y) {
     return vec4to2(min(vec2to4(x), vec2to4(y)));
diff --git a/libkram/vectormath/int234.h b/libkram/vectormath/int234.h
index 25fc33d2..c542ea25 100644
--- a/libkram/vectormath/int234.h
+++ b/libkram/vectormath/int234.h
@@ -66,6 +66,13 @@ SIMD_CALL bool all(int4 x) {
 }
 #endif // SIMD_SSE
        
+// any-all
+SIMD_CALL bool any(int3 x) {
+    return any(vec3to4(x));
+}
+SIMD_CALL bool all(int3 x) {
+    return all(vec3to4(x));
+}
 
 // end of implementation
 //-----------------------------------
@@ -94,7 +101,7 @@ SIMD_CALL int3 int3m(int x) {
 SIMD_CALL int3 int3m(int x, int y, int z) {
     return {x,y,z};
 }
-SIMD_CALL int3 int3m(int2 v, float z) {
+SIMD_CALL int3 int3m(int2 v, int z) {
     int3 r; r.xy = v; r.z = z; return r;
 }
 
@@ -108,7 +115,7 @@ SIMD_CALL int4 int4m(int2 xy, int2 zw) {
 SIMD_CALL int4 int4m(int x, int y, int z, int w) {
     return {x,y,z,w};
 }
-SIMD_CALL int4 int4m(int3 v, float w) {
+SIMD_CALL int4 int4m(int3 v, int w) {
     int4 r; r.xyz = v; r.w = w; return r;
 }
 
diff --git a/libkram/vectormath/long234.h b/libkram/vectormath/long234.h
new file mode 100644
index 00000000..f9f9ace3
--- /dev/null
+++ b/libkram/vectormath/long234.h
@@ -0,0 +1,132 @@
+// kram - Copyright 2020-2024 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
+
+#pragma once
+
+// This is not yet standalone.  vectormath++.h includes it.
+#if USE_SIMDLIB && SIMD_LONG
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// define c vector types
+// Apple uses long type here (32-bit) instead of long32_t
+macroVector4TypesStorage(long, long)
+macroVector4TypesPacked(long, long)
+
+#if SIMD_RENAME_TO_SIMD_NAMESPACE
+macroVector4TypesStorageRenames(long, simd_long)
+#endif // SIMD_RENAME_TO_SIMD_NAMESPACE
+
+#ifdef __cplusplus
+}
+
+namespace SIMD_NAMESPACE {
+
+macroVector4TypesStorageRenames(long, long)
+
+//-----------------------------------
+// imlementation - only code simd arch specific
+
+#if SIMD_NEON
+
+SIMD_CALL bool any(long2 x) {
+    return (x.x | x.y) & 0x8000000000000000U;
+}
+SIMD_CALL bool any(long3 x) {
+    return (x.x | x.y | x.z) & 0x8000000000000000U;
+}
+SIMD_CALL bool any(long4 x) {
+    return any(x.lo | x.hi);
+}
+
+SIMD_CALL bool all(long2 x) {
+    return (x.x & x.y) & 0x8000000000000000U;
+}
+SIMD_CALL bool all(long3 x) {
+    return (x.x & x.y & x.z) & 0x8000000000000000U;
+}
+SIMD_CALL bool all(long4 x) {
+    return all(x.lo & x.hi);
+}
+
+#endif // SIMD_NEON
+
+// These take in long types, this is what comparison gens from a < b, etc.
+#if SIMD_SSE
+
+SIMD_CALL bool any(long2 x) {
+    return _mm_movemask_pd(x) & 0x3; // 2 bits
+}
+SIMD_CALL bool any(long3 x) {
+    // avx/2 have double4 op
+    return (x.x | x.y) & 0x8000000000000000U;
+}
+SIMD_CALL bool any(long4 x) {
+    // avx/2 have double4 op
+    return any(x.lo | x.hi);
+}
+
+SIMD_CALL bool all(long2 x) {
+    return (_mm_movemask_pd(x) & 0x3) == 0x3; // 2 bits
+}
+SIMD_CALL bool all(long3 x) {
+    // avx/2 have double4 op
+    return (x.x & x.y & x.z) & 0x8000000000000000U;
+}
+SIMD_CALL bool all(long4 x) {
+    // avx/2 have double4 op
+    return any(x.lo & x.hi);
+}
+#endif // SIMD_SSE
+       
+// end of implementation
+//-----------------------------------
+
+// bitselect
+SIMD_CALL long2 bitselect(long2 x, long2 y, long2 mask) {
+    return (x & ~mask) | (y & mask);
+}
+SIMD_CALL long3 bitselect(long3 x, long3 y, long3 mask) {
+    return (x & ~mask) | (y & mask);
+}
+SIMD_CALL long4 bitselect(long4 x, long4 y, long4 mask) {
+    return (x & ~mask) | (y & mask);
+}
+
+SIMD_CALL long2 long2m(long x) {
+    return x;
+}
+SIMD_CALL long2 long2m(long x, long y) {
+    return {x,y};
+}
+
+SIMD_CALL long3 long3m(long x) {
+    return x;
+}
+SIMD_CALL long3 long3m(long x, long y, long z) {
+    return {x,y,z};
+}
+SIMD_CALL long3 long3m(long2 v, long z) {
+    long3 r; r.xy = v; r.z = z; return r;
+}
+
+
+SIMD_CALL long4 long4m(long x) {
+    return x;
+}
+SIMD_CALL long4 long4m(long2 xy, long2 zw) {
+    long4 r; r.xy = xy; r.zw = zw; return r;
+}
+SIMD_CALL long4 long4m(long x, long y, long z, long w) {
+    return {x,y,z,w};
+}
+SIMD_CALL long4 long4m(long3 v, long w) {
+    long4 r; r.xyz = v; r.w = w; return r;
+}
+
+}
+#endif // __cplusplus
+#endif // USE_SIMDLIB && SIMD_LONG
diff --git a/libkram/vectormath/vectormath++.cpp b/libkram/vectormath/vectormath++.cpp
index bb08dd08..d7a2e9e3 100644
--- a/libkram/vectormath/vectormath++.cpp
+++ b/libkram/vectormath/vectormath++.cpp
@@ -1444,15 +1444,19 @@ string vecf::str(const double4x4& m) const {
 
 // textbook transpose 
 double2x2 transpose(const double2x2& x) {
-    double4 x0, x1;
+    double2 x0, x1;
     x0.xy = x[0];
     x1.xy = x[1];
+    
+    // std::swap would seem faster here?
 #if SIMD_SSE
-    double4 r01 = _mm_unpacklo_pd(x0, x1); // required AVX2
+    double2 r0 = { x0[0], x1[0] };
+    double2 r1 = { x0[1], x1[1] };
 #else
-    double4 r01 = vzip1q_f64(x0, x1);
+    double2 r0 = vzip1q_f64(x0, x1);
+    double2 r1 = vzip2q_f64(x0, x1);
 #endif
-    return (double2x2){r01.lo, r01.hi};
+    return (double2x2){r0, r1};
 }
 
 double3x3 transpose(const double3x3& x) {
diff --git a/libkram/vectormath/vectormath++.h b/libkram/vectormath/vectormath++.h
index b410c280..02370484 100644
--- a/libkram/vectormath/vectormath++.h
+++ b/libkram/vectormath/vectormath++.h
@@ -136,22 +136,24 @@
 // a define to override setings from prefix file
 #ifndef SIMD_CONFIG
 
+// fp comparisons gen a corresponding signed integer type
 #define SIMD_INT    1
 #define SIMD_LONG   1
 
+// don't need these yet, doing math, not string processing
+#define SIMD_CHAR   0
+#define SIMD_SHORT  0
+//#define SIMD_UCHAR  0
+//#define SIMD_USHORT 0
+//#define SIMD_ULONG  0
+
 // Vector and matrix types.  Currently only matrix types for SIMD_FLOAT, SIMD_DOUBLE.
 // SIMD_INT must be kept on for conditional tests.
 // SIMD_HALF for bitselect would need SIMD_SHORT or SIMD_INT?
+// #define SIMD_HALF   (1 && SIMD_SHORT)
 #define SIMD_HALF   (1)
 #define SIMD_FLOAT  (1 && SIMD_INT)
-#define SIMD_DOUBLE (0 && SIMD_LONG)
-
-
-#define SIMD_CHAR   0
-//#define SIMD_UCHAR  0
-#define SIMD_SHORT  0
-//#define SIMD_USHORT 0
-//#define SIMD_ULONG  0
+#define SIMD_DOUBLE (1 && SIMD_LONG)
 
 // Whether to support > 4 length vecs with some ops
 #define SIMD_FLOAT_EXT 0
@@ -339,6 +341,8 @@ SIMD_CALL type::column_t operator*(const type& x, const type::column_t& v) { ret
 
 // moved vec/matrix ops into secondary headers
 #include "int234.h"
+#include "long234.h"
+
 #include "half234.h"
 #include "float234.h"
 #include "double234.h"
@@ -392,30 +396,6 @@ macroVector2TypesStorageRenames(short, short)
 #endif // __cplusplus
 #endif // SIMD_SHORT
 
-//------------
-#if SIMD_LONG
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// define c vector types
-macroVector8TypesStorage(long, long)
-macroVector8TypesPacked(long, long)
-
-#if SIMD_RENAME_TO_SIMD_NAMESPACE
-macroVector8TypesStorageRenames(long, simd_long)
-#endif // SIMD_RENAME_TO_SIMD_NAMESPACE
-
-#ifdef __cplusplus
-}
-
-namespace SIMD_NAMESPACE {
-macroVector8TypesStorageRenames(long, long)
-}
-#endif // __cplusplus
-#endif // SIMD_LONG
-
 //-------------------
 #ifdef __cplusplus
 

From 3b9ee68416183e8981f41e3c20806b0d59d13f35 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 29 Sep 2024 22:04:19 -0700
Subject: [PATCH 749/901] kram - simd - fix up long, use neon vec2 ops

---
 libkram/vectormath/double234.h      | 121 ++++++++++++------
 libkram/vectormath/float234.h       | 182 +++++++++++++++++++---------
 libkram/vectormath/half234.h        |   6 +-
 libkram/vectormath/int234.h         |   4 +-
 libkram/vectormath/long234.h        |  12 +-
 libkram/vectormath/vectormath++.cpp | 143 +++++++++++-----------
 libkram/vectormath/vectormath++.h   |  47 ++++---
 7 files changed, 320 insertions(+), 195 deletions(-)

diff --git a/libkram/vectormath/double234.h b/libkram/vectormath/double234.h
index f8bedc48..6f27f740 100644
--- a/libkram/vectormath/double234.h
+++ b/libkram/vectormath/double234.h
@@ -22,9 +22,9 @@ typedef struct { double4s columns[3]; } double3x4s;
 typedef struct { double4s columns[4]; } double4x4s;
 
 // glue to Accelerate
-#if SIMD_RENAME_TO_SIMD_NAMESPACE
+#if SIMD_ACCELERATE_MATH_NAMES
 macroVector8TypesStorageRenames(double, simd_double)
-#endif
+#endif // SIMD_ACCELERATE_MATH_NAMES
 
 #ifdef __cplusplus
 }
@@ -76,7 +76,6 @@ SIMD_CALL double4 double4m(double3 v, double w = 1.0) {
 SIMD_CALL double reduce_min(double2 x) {
     return vminvq_f64(x);
 }
-
 SIMD_CALL double reduce_min(double4 x) {
     return fmin(reduce_min(x.lo),reduce_min(x.hi));
 }
@@ -84,7 +83,6 @@ SIMD_CALL double reduce_min(double4 x) {
 SIMD_CALL double reduce_max(double2 x) {
     return vmaxvq_f64(x);
 }
-
 SIMD_CALL double reduce_max(double4 x) {
     return fmax(reduce_max(x.lo),reduce_max(x.hi));
 }
@@ -102,7 +100,6 @@ SIMD_CALL double2 max(double2 x, double2 y) {
     // precise returns x on Nan
     return vmaxnmq_f64(x, y);
 }
-
 SIMD_CALL double4 max(double4 x, double4 y) {
     // precise returns x on Nan
     return double4m(max(x.lo,y.lo), max(x.hi,y.hi));
@@ -125,28 +122,59 @@ SIMD_CALL double4 sqrt(double4 x) {
     return double4m(sqrt(x.lo), sqrt(x.hi));
 }
 
-// use sse2neon to port this for now
+SIMD_CALL double2 reduce_addv(double2 x) {
+    // 4:1 reduction
+    x = vpaddq_f64(x, x);
+    return x.x;
+}
 SIMD_CALL double4 reduce_addv(double4 x) {
     // 4:1 reduction
-    x.lo = _mm_hadd_pd(x.lo, x.lo);
-    x.hi = _mm_hadd_pd(x.hi, x.hi);
-    x.lo = _mm_hadd_pd(x.lo, x.hi);
+    x.lo = vpaddq_f64(x.lo, x.lo);
+    x.hi = vpaddq_f64(x.hi, x.hi);
+    x.lo = vpaddq_f64(x.lo, x.hi);
     return x.lo.x; // repeat x to all values
 }
 
+SIMD_CALL double reduce_add(double2 x) {
+    return reduce_addv(x).x;
+}
 SIMD_CALL double reduce_add(double4 x) {
     return reduce_addv(x).x;
 }
 
+SIMD_CALL double2 round(double2 vv) {
+    // round to nearest | exc
+    return vrndnq_f64(vv); // _MM_FROUND_NO_EXC
+}
+SIMD_CALL double4 round(double4 vv) {
+    // round to nearest | exc
+    return double4m(round(vv.lo),round(vv.hi));
+}
+
+SIMD_CALL double2 ceil(double2 vv) {
+    return vrndpq_f64(vv);
+}
+SIMD_CALL double4 ceil(double4 vv) {
+    return double4m(ceil(vv.lo),ceil(vv.hi));
+}
+
+SIMD_CALL double2 floor(double2 vv) {
+    return vrndmq_f64(vv);
+}
+SIMD_CALL double4 floor(double4 vv) {
+    return double4m(floor(vv.lo),floor(vv.hi));
+}
+
 #endif // SIMD_NEON
 
+//----------------------
+
 #if SIMD_SSE
 
 // x64 doesn't seem to have a simd op for min/max reduce
 SIMD_CALL double reduce_min(double4 x) {
     return fmin(fmin(x.x,x.y), fmin(x.z,x.w));
 }
-
 SIMD_CALL double reduce_max(double4 x) {
     return fmax(fmax(x.x,x.y), fmax(x.z,x.w));
 }
@@ -163,7 +191,6 @@ SIMD_CALL double4 min(double4 x, double4 y) {
     // precise returns x on Nan
     return bitselect_forminmax(_mm_min_pd(x, y), x, y != y);
 }
-
 SIMD_CALL double4 max(double4 x, double4 y) {
     // precise returns x on Nan
     return bitselect_forminmax(_mm_max_pd(x, y), x, y != y);
@@ -178,6 +205,9 @@ SIMD_CALL double4 muladd(double4 x, double4 y, double4 t) {
     return x * y + t;
 #endif
 }
+SIMD_CALL double2 muladd(double2 x, double2 y, double2 t) {
+    return vec4to2(muladd(vec2to4(x), vec2to4(y), vec2to4(t)));
+}
 
 SIMD_CALL double2 sqrt(double2 x) {
     return _mm_sqrt_pd(x);
@@ -190,7 +220,6 @@ SIMD_CALL double2 reduce_addv(double2 x) {
     x = _mm_hadd_pd(x.lo, x.lo);
     return x.x;
 }
-
 SIMD_CALL double4 reduce_addv(double4 x) {
     // 4:1 reduction
     x.lo = _mm_hadd_pd(x.lo, x.lo); // TODO: fix
@@ -199,24 +228,42 @@ SIMD_CALL double4 reduce_addv(double4 x) {
     return x.lo.x; // repeat x to all values
 }
 
+SIMD_CALL double reduce_add(double2 x) {
+    return reduce_addv(x).x;
+}
 SIMD_CALL double reduce_add(double4 x) {
     return reduce_addv(x).x;
 }
 
-#endif // SIMD_LONG && SIMD_SSE
-
 // SSE4.1
 // single ops in AVX/2
 
 SIMD_CALL double4 round(double4 vv) {
-    return double4m(_mm_round_pd(vv.lo, 0x8),_mm_round_pd(vv.hi, 0x8));  // round to nearest | exc
+    // round to nearest | exc
+    return double4m(_mm_round_pd(vv.lo, _MM_FROUND_NO_EXC),
+                    _mm_round_pd(vv.hi, _MM_FROUND_NO_EXC));
+}
+SIMD_CALL double2 round(double2 x) {
+    return vec4to2(round(vec2to4(x)));
 }
+
 SIMD_CALL double4 ceil(double4 vv) {
     return double4m(_mm_ceil_pd(vv.lo),_mm_ceil_pd(vv.hi));
 }
+SIMD_CALL double2 ceil(double2 x) {
+    return vec4to2(ceil(vec2to4(x)));
+}
+
 SIMD_CALL double4 floor(double4 vv) {
     return double4m(_mm_floor_pd(vv.lo),_mm_floor_pd(vv.hi));
 }
+SIMD_CALL double2 floor(double2 x) {
+    return vec4to2(floor(vec2to4(x)));
+}
+
+
+#endif // SIMD_SSE
+
 
 // end of implementation
 //-----------------------------------
@@ -249,10 +296,10 @@ SIMD_CALL double4 select(double4 x, double4 y, long4 mask) {
 
 // zeroext - internal helper
 SIMD_CALL double4 zeroext(double2 x) {
-    return (double4){x.x,x.y,0,0};
+    double4 v = 0; v.xy = x; return v;
 }
 SIMD_CALL double4 zeroext(double3 x) {
-    return (double4){x.x,x.y,x.z,0};
+    double4 v = 0; v.xyz = x; return v;
 }
 
 // min
@@ -307,9 +354,9 @@ SIMD_CALL double3 recip(double3 x) {
 }
 
 // reduce_add
-SIMD_CALL double reduce_add(double2 x) {
-    return reduce_add(zeroext(x));
-}
+//SIMD_CALL double reduce_add(double2 x) {
+//    return reduce_add(zeroext(x));
+//}
 SIMD_CALL double reduce_add(double3 x) {
     return reduce_add(zeroext(x));
 }
@@ -332,16 +379,14 @@ SIMD_CALL double reduce_max(double3 x) {
     return reduce_max(vec3to4(x));
 }
 
+
 // round (to nearest)
-SIMD_CALL double2 round(double2 x) { return vec4to2(round(vec2to4(x))); }
 SIMD_CALL double3 round(double3 x) { return vec4to3(round(vec3to4(x))); }
 
 // ceil
-SIMD_CALL double2 ceil(double2 x) { return vec4to2(ceil(vec2to4(x))); }
 SIMD_CALL double3 ceil(double3 x) { return vec4to3(ceil(vec3to4(x))); }
 
 // floor
-SIMD_CALL double2 floor(double2 x) { return vec4to2(floor(vec2to4(x))); }
 SIMD_CALL double3 floor(double3 x) { return vec4to3(floor(vec3to4(x))); }
 
 // clamp
@@ -356,14 +401,18 @@ SIMD_CALL double4 clamp(double4 x, double4 minv, double4 maxv) {
     return min(maxv, max(minv, x));
 }
 
-double2 saturate(double2 x);
-double3 saturate(double3 x);
-double4 saturate(double4 x);
+// saturate
+SIMD_CALL double2 saturate(double2 x) {
+    return clamp(x, 0, (double2)1);
+}
+SIMD_CALL double3 saturate(double3 x) {
+    return clamp(x, 0, (double3)1);
+}
+SIMD_CALL double4 saturate(double4 x) {
+    return clamp(x, 0, (double4)1);
+}
 
-// muladd - arm has double2 op
-//SIMD_CALL double2 muladd(double2 x, double2 y, double2 t) {
-//    return vec4to2(muladd(vec2to4(x), vec2to4(y), vec2to4(t)));
-//}
+// muladd
 SIMD_CALL double3 muladd(double3 x, double3 y, double3 t) {
     return vec4to3(muladd(vec3to4(x), vec3to4(y), vec3to4(t)));
 }
@@ -440,13 +489,13 @@ SIMD_CALL double3 normalize(double3 x) {
 
 // abs
 SIMD_CALL double2 abs(double2 x) {
-    return bitselect(0.0, x, 0x7fffffff); // TODO: fp64
+    return bitselect(0.0, x, 0x7fffffffffffffff);
 }
 SIMD_CALL double3 abs(double3 x) {
-    return bitselect(0.0, x, 0x7fffffff);
+    return bitselect(0.0, x, 0x7fffffffffffffff);
 }
 SIMD_CALL double4 abs(double4 x) {
-    return bitselect(0.0, x, 0x7fffffff);
+    return bitselect(0.0, x, 0x7fffffffffffffff);
 }
 
 // cross
@@ -506,13 +555,13 @@ SIMD_CALL double4 smoothstep(double4 edge0, double4 edge1, double4 x) {
 
 // fract
 SIMD_CALL double2 fract(double2 x) {
-    return min(x - floor(x), 0x1.fffffep-1f); // TODO: fp64
+    return min(x - floor(x), 0x1.fffffffffffffp-1);
 }
 SIMD_CALL double3 fract(double3 x) {
-    return min(x - floor(x), 0x1.fffffep-1f);
+    return min(x - floor(x), 0x1.fffffffffffffp-1);
 }
 SIMD_CALL double4 fract(double4 x) {
-    return min(x - floor(x), 0x1.fffffep-1f);
+    return min(x - floor(x), 0x1.fffffffffffffp-1);
 }
 
 //-------------------
diff --git a/libkram/vectormath/float234.h b/libkram/vectormath/float234.h
index 99c7a6cd..9b1cf71b 100644
--- a/libkram/vectormath/float234.h
+++ b/libkram/vectormath/float234.h
@@ -23,9 +23,9 @@ typedef struct { float4s columns[3]; } float3x4s;
 typedef struct { float4s columns[4]; } float4x4s;
 
 // glue to Accelerate
-#if SIMD_RENAME_TO_SIMD_NAMESPACE
+#if SIMD_ACCELERATE_MATH_NAMES
 macroVector4TypesStorageRenames(float, simd_float)
-#endif
+#endif // SIMD_ACCELERATE_MATH_NAMES
 
 #ifdef __cplusplus
 }
@@ -39,48 +39,98 @@ macroVector4TypesStorageRenames(float, float)
 
 #if SIMD_NEON
 
-// TODO: expose float2 ops on Neon.
+// DONE: expose float2 ops on Neon.
+// q = 4, nothing = 2
 
+SIMD_CALL float reduce_min(float2 x) {
+    return vminv_f32(x);
+}
 SIMD_CALL float reduce_min(float4 x) {
     return vminvq_f32(x);
 }
 
+SIMD_CALL float reduce_max(float2 x) {
+    return vmaxv_f32(x);
+}
 SIMD_CALL float reduce_max(float4 x) {
     return vmaxvq_f32(x);
 }
 
+SIMD_CALL float2 min(float2 x, float2 y) {
+    // precise returns x on Nan
+    return vminnm_f32(x, y);
+}
 SIMD_CALL float4 min(float4 x, float4 y) {
     // precise returns x on Nan
     return vminnmq_f32(x, y);
 }
 
+SIMD_CALL float2 max(float2 x, float2 y) {
+    // precise returns x on Nan
+    return vmaxnm_f32(x, y);
+}
 SIMD_CALL float4 max(float4 x, float4 y) {
     // precise returns x on Nan
     return vmaxnmq_f32(x, y);
 }
 
+// requires __ARM_VFPV4__
+// t passed first unlike sse
+SIMD_CALL float2 muladd(float2 x, float2 y, float2 t) {
+    return vfma_f32(t, x,y);
+}
 SIMD_CALL float4 muladd(float4 x, float4 y, float4 t) {
-    // requires __ARM_VFPV4__
-    // t passed first unlike sse
     return vfmaq_f32(t, x,y);
 }
 
+SIMD_CALL float2 sqrt(float2 x) {
+    return vsqrt_f32(x);
+}
 SIMD_CALL float4 sqrt(float4 x) {
     return vsqrtq_f32(x);
 }
 
-// use sse2neon to port this for now
+SIMD_CALL float2 reduce_addv(float2 x) {
+    x = vpadd_f32(x, x);
+    return x.x; // repeat x to all values
+}
 SIMD_CALL float4 reduce_addv(float4 x) {
     // 4:1 reduction
-    x = _mm_hadd_ps(x, x); // xy = x+y,z+w
-    x = _mm_hadd_ps(x, x); // x  = x+y
+    x = vpaddq_f32(x, x); // xy = x+y,z+w
+    x = vpaddq_f32(x, x); // x  = x+y
     return x.x; // repeat x to all values
 }
 
+SIMD_CALL float reduce_add(float2 x) {
+    return reduce_addv(x).x;
+}
 SIMD_CALL float reduce_add(float4 x) {
     return reduce_addv(x).x;
 }
 
+SIMD_CALL float2 round(float2 vv) {
+    // round to nearest | exc
+    return vrndn_f32(vv); // _MM_FROUND_NO_EXC
+}
+SIMD_CALL float4 round(float4 vv) {
+    // round to nearest | exc
+    return vrndnq_f32(vv); // _MM_FROUND_NO_EXC
+}
+
+SIMD_CALL float2 ceil(float2 vv) {
+    return vrndp_f32(vv);
+}
+SIMD_CALL float4 ceil(float4 vv) {
+    return vrndpq_f32(vv);
+}
+
+SIMD_CALL float2 floor(float2 vv) {
+    return vrndm_f32(vv);
+}
+SIMD_CALL float4 floor(float4 vv) {
+    return vrndmq_f32(vv);
+}
+
 #endif // SIMD_NEON
 
 #if SIMD_SSE
@@ -89,10 +139,16 @@ SIMD_CALL float reduce_add(float4 x) {
 SIMD_CALL float reduce_min(float4 x) {
     return fmin(fmin(x.x,x.y), fmin(x.z,x.w));
 }
+SIMD_CALL float reduce_min(float2 x) {
+    return reduce_min(vec2to4(x));
+}
 
 SIMD_CALL float reduce_max(float4 x) {
     return fmax(fmax(x.x,x.y), fmax(x.z,x.w));
 }
+SIMD_CALL float reduce_max(float2 x) {
+    return reduce_max(vec2to4(x));
+}
 
 // needs SIMD_INT
 // needed for precise min/max calls below
@@ -106,11 +162,17 @@ SIMD_CALL float4 min(float4 x, float4 y) {
     // precise returns x on Nan
     return bitselect_forminmax(_mm_min_ps(x, y), x, y != y);
 }
+SIMD_CALL float2 min(float2 x, float2 y) {
+    return vec4to2(min(vec2to4(x), vec2to4(y)));
+}
 
 SIMD_CALL float4 max(float4 x, float4 y) {
     // precise returns x on Nan
     return bitselect_forminmax(_mm_max_ps(x, y), x, y != y);
 }
+SIMD_CALL float2 max(float2 x, float2 y) {
+    return vec4to2(max(vec2to4(x), vec2to4(y)));
+}
 
 SIMD_CALL float4 muladd(float4 x, float4 y, float4 t) {
     // can't get Xcode to set -mfma with AVX2 set
@@ -121,10 +183,16 @@ SIMD_CALL float4 muladd(float4 x, float4 y, float4 t) {
     return x * y + t;
 #endif
 }
+SIMD_CALL float2 muladd(float2 x, float2 y, float2 t) {
+    return vec4to2(muladd(vec2to4(x), vec2to4(y), vec2to4(t)));
+}
 
 SIMD_CALL float4 sqrt(float4 x) {
     return _mm_sqrt_ps(x);
 }
+SIMD_CALL float2 sqrt(float2 x) {
+    return vec4to2(sqrt(vec2to4(x)));
+}
 
 SIMD_CALL float4 reduce_addv(float4 x) {
     // 4:1 reduction
@@ -136,19 +204,36 @@ SIMD_CALL float4 reduce_addv(float4 x) {
 SIMD_CALL float reduce_add(float4 x) {
     return reduce_addv(x).x;
 }
-
-#endif // SIMD_INT && SIMD_SSE
+SIMD_CALL float reduce_add(float2 x) {
+    return reduce_add(zeroext(x));
+}
 
 // SSE4.1
 SIMD_CALL float4 round(float4 vv) {
-    return _mm_round_ps(vv, 0x8);  // round to nearest | exc
+    // round to nearest | exc
+    return _mm_round_ps(vv, _MM_FROUND_NO_EXC);
 }
+SIMD_CALL float2 round(float2 x) {
+    return vec4to2(round(vec2to4(x)));
+}
+
 SIMD_CALL float4 ceil(float4 vv) {
     return _mm_ceil_ps(vv);
 }
+SIMD_CALL float2 ceil(float2 x) {
+    return vec4to2(ceil(vec2to4(x)));
+}
+
 SIMD_CALL float4 floor(float4 vv) {
     return _mm_floor_ps(vv);
 }
+SIMD_CALL float2 floor(float2 x) {
+    return vec4to2(floor(vec2to4(x)));
+}
+
+#endif // SIMD_INT && SIMD_SSE
+
+
 
 // end of implementation
 //-----------------------------------
@@ -180,50 +265,48 @@ SIMD_CALL float4 select(float4 x, float4 y, int4 mask) {
 
 // zeroext - internal helper
 SIMD_CALL float4 zeroext(float2 x) {
-    return (float4){x.x,x.y,0,0};
+    float4 v = 0; v.xy = x; return v;
 }
 SIMD_CALL float4 zeroext(float3 x) {
-    return (float4){x.x,x.y,x.z,0};
+    float4 v = 0; v.xyz = x; return v;
 }
 
+// TODO: consider casts instead of shuffles below, at least on inputs
+// float3 same size as float4, can't use cast on reduce calls.
+
 // min
-SIMD_CALL float2 min(float2 x, float2 y) {
-    return vec4to2(min(vec2to4(x), vec2to4(y)));
-}
 SIMD_CALL float3 min(float3 x, float3 y) {
     return vec4to3(min(vec3to4(x), vec3to4(y)));
 }
 
 // max
-SIMD_CALL float2 max(float2 x, float2 y) {
-    return vec4to2(max(vec2to4(x), vec2to4(y)));
-}
 SIMD_CALL float3 max(float3 x, float3 y) {
     return vec4to3(max(vec3to4(x), vec3to4(y)));
 }
 
 // sqrt
-SIMD_CALL float2 sqrt(float2 x) {
-    return vec4to2(sqrt(vec2to4(x)));
-}
 SIMD_CALL float3 sqrt(float3 x) {
     return vec4to3(sqrt(vec3to4(x)));
 }
 
+// muladd
+SIMD_CALL float3 muladd(float3 x, float3 y, float3 t) {
+    return vec4to3(muladd(vec3to4(x), vec3to4(y), vec3to4(t)));
+}
+
 // rsqrt
+// TODO: fixup near 0 ?
+// TODO: use _mm_div_ps if / doesn't use
 SIMD_CALL float4 rsqrt(float4 x) {
-    // TODO: fixup near 0 ?
-    // TODO: use _mm_div_ps if / doesn't use
     return 1.0f/sqrt(x);
 }
 SIMD_CALL float2 rsqrt(float2 x) {
-    return vec4to2(rsqrt(vec2to4(x)));
+    return 1.0f/sqrt(x);
 }
 SIMD_CALL float3 rsqrt(float3 x) {
-    return vec4to3(rsqrt(vec3to4(x)));
+    return 1.0f/sqrt(x);
 }
 
-
 // recip
 SIMD_CALL float4 recip(float4 x) {
     // TODO: fixup near 0 ?
@@ -231,48 +314,35 @@ SIMD_CALL float4 recip(float4 x) {
     return 1.0f/x;
 }
 SIMD_CALL float2 recip(float2 x) {
-    return vec4to2(recip(vec2to4(x)));
+    return 1.0f/x;
 }
 SIMD_CALL float3 recip(float3 x) {
-    return vec4to3(recip(vec3to4(x)));
+    return 1.0f/x;
 }
 
 // reduce_add
-SIMD_CALL float reduce_add(float2 x) {
-    return reduce_add(zeroext(x));
-}
 SIMD_CALL float reduce_add(float3 x) {
     return reduce_add(zeroext(x));
 }
 
 // reduce_min - arm has float2 op
-SIMD_CALL float reduce_min(float2 x) {
-    return reduce_min(vec2to4(x));
-}
-
 SIMD_CALL float reduce_min(float3 x) {
     return reduce_min(vec3to4(x));
 }
 
 // reduce_max
-SIMD_CALL float reduce_max(float2 x) {
-    return reduce_max(vec2to4(x));
-}
-
 SIMD_CALL float reduce_max(float3 x) {
     return reduce_max(vec3to4(x));
 }
 
+
 // round (to nearest)
-SIMD_CALL float2 round(float2 x) { return vec4to2(round(vec2to4(x))); }
 SIMD_CALL float3 round(float3 x) { return vec4to3(round(vec3to4(x))); }
 
 // ceil
-SIMD_CALL float2 ceil(float2 x) { return vec4to2(ceil(vec2to4(x))); }
 SIMD_CALL float3 ceil(float3 x) { return vec4to3(ceil(vec3to4(x))); }
 
 // floor
-SIMD_CALL float2 floor(float2 x) { return vec4to2(floor(vec2to4(x))); }
 SIMD_CALL float3 floor(float3 x) { return vec4to3(floor(vec3to4(x))); }
 
 // clamp
@@ -287,19 +357,19 @@ SIMD_CALL float4 clamp(float4 x, float4 minv, float4 maxv) {
     return min(maxv, max(minv, x));
 }
 
-float2 saturate(float2 x);
-float3 saturate(float3 x);
-float4 saturate(float4 x);
-
-// muladd - arm has float2 op
-SIMD_CALL float2 muladd(float2 x, float2 y, float2 t) {
-    return vec4to2(muladd(vec2to4(x), vec2to4(y), vec2to4(t)));
+// saturate
+SIMD_CALL float2 saturate(float2 x) {
+    return clamp(x, 0, (float2)1);
 }
-SIMD_CALL float3 muladd(float3 x, float3 y, float3 t) {
-    return vec4to3(muladd(vec3to4(x), vec3to4(y), vec3to4(t)));
+SIMD_CALL float3 saturate(float3 x) {
+    return clamp(x, 0, (float3)1);
+}
+SIMD_CALL float4 saturate(float4 x) {
+    return clamp(x, 0, (float4)1);
 }
 
-// lerp - another easy one
+
+// lerp - another easy one, could use muladd(t, y-x, x)
 SIMD_CALL float2 lerp(float2 x, float2 y, float2 t) {
     return x + t*(y - x);
 }
@@ -453,8 +523,10 @@ SIMD_CALL float4 fract(float4 x) {
 // These are cpu only math.  None of the gpus support these long types.
 // and MSL doesn't even support double.
  // need to convert float4 to 8/16
- // TODO: float8 float8m(float4 x, float4 y)
- // TODO: float16 float16m(float8 x, float8 y)
+ float8 float8m(float4 x, float4 y) {
+ }
+ float16 float16m(float8 x, float8 y) {
+ }
 
 // how important are 8/16 ops for float and 8 for double?  Could reduce with only doing up to 4.
 // can see doing more ops on smaller types.  Slower when these have to route through simd4.
diff --git a/libkram/vectormath/half234.h b/libkram/vectormath/half234.h
index 85f638b5..3f7903ba 100644
--- a/libkram/vectormath/half234.h
+++ b/libkram/vectormath/half234.h
@@ -29,7 +29,7 @@ typedef short half;
 #endif // SIMD_HALF_FLOAT16
 
 // This means math and conversions don't work, so have to use simd ops
-//#define SIMD_HALF4_ONLY !SIMD_HALF_FLOAT16
+#define SIMD_HALF4_ONLY !SIMD_HALF_FLOAT16
 
 // Half isn't something that should have math ops yet.  Just useful as packed type.
 // This does math, but really needs _Float16 to work properly for the operators.
@@ -43,9 +43,9 @@ macroVector2TypesPacked(half, half)
 // No matrix type defined right now.
 
 // glue to Accelerate
-#if SIMD_RENAME_TO_SIMD_NAMESPACE
+#if SIMD_ACCELERATE_MATH_NAMES
 macroVector8TypesStorageRenames(half, simd_half)
-#endif
+#endif // SIMD_ACCELERATE_MATH_NAMES
 
 #ifdef __cplusplus
 }
diff --git a/libkram/vectormath/int234.h b/libkram/vectormath/int234.h
index c542ea25..be40d994 100644
--- a/libkram/vectormath/int234.h
+++ b/libkram/vectormath/int234.h
@@ -16,9 +16,9 @@ extern "C" {
 macroVector4TypesStorage(int, int)
 macroVector4TypesPacked(int, int)
 
-#if SIMD_RENAME_TO_SIMD_NAMESPACE
+#if SIMD_ACCELERATE_MATH_NAMES
 macroVector4TypesStorageRenames(int, simd_int)
-#endif // SIMD_RENAME_TO_SIMD_NAMESPACE
+#endif // SIMD_ACCELERATE_MATH_NAMES
 
 #ifdef __cplusplus
 }
diff --git a/libkram/vectormath/long234.h b/libkram/vectormath/long234.h
index f9f9ace3..703a0028 100644
--- a/libkram/vectormath/long234.h
+++ b/libkram/vectormath/long234.h
@@ -13,19 +13,19 @@ extern "C" {
 
 // define c vector types
 // Apple uses long type here (32-bit) instead of long32_t
-macroVector4TypesStorage(long, long)
-macroVector4TypesPacked(long, long)
+macroVector8TypesStorage(long, long)
+macroVector8TypesPacked(long, long)
 
-#if SIMD_RENAME_TO_SIMD_NAMESPACE
-macroVector4TypesStorageRenames(long, simd_long)
-#endif // SIMD_RENAME_TO_SIMD_NAMESPACE
+#if SIMD_ACCELERATE_MATH_NAMES
+macroVector8TypesStorageRenames(long, simd_long)
+#endif // SIMD_ACCELERATE_MATH_NAMES
 
 #ifdef __cplusplus
 }
 
 namespace SIMD_NAMESPACE {
 
-macroVector4TypesStorageRenames(long, long)
+macroVector8TypesStorageRenames(long, long)
 
 //-----------------------------------
 // imlementation - only code simd arch specific
diff --git a/libkram/vectormath/vectormath++.cpp b/libkram/vectormath/vectormath++.cpp
index d7a2e9e3..f2c58c47 100644
--- a/libkram/vectormath/vectormath++.cpp
+++ b/libkram/vectormath/vectormath++.cpp
@@ -325,19 +325,6 @@ float4x4 diagonal_matrix(float4 x) {
     return float4x4(xx.xwww, xx.wyww, xx.wwzw, ww.wwwz);
 }
 
-//---------------------------
-
-// saturate
-float2 saturate(float2 x) {
-    return clamp(x, kfloat2_zero, kfloat2_ones);
-}
-float3 saturate(float3 x) {
-    return clamp(x, kfloat3_zero, kfloat3_ones);
-}
-float4 saturate(float4 x) {
-    return clamp(x, kfloat4_zero, kfloat4_ones);
-}
-
 //--------------------------------------
 
 // textbook transpose
@@ -795,11 +782,6 @@ string vecf::str(const float4x4& m) const {
 
 #endif // SIMD_FLOAT
 
-#if SIMD_DOUBLE
-
-
-#endif // SIMD_DOUBLE
-
 #define FMT_SEP() s += "-----------\n"
 
 string vecf::simd_configs() const {
@@ -861,14 +843,17 @@ string vecf::simd_configs() const {
 #endif
     
     FMT_CONFIG(SIMD_FLOAT_EXT);
-    FMT_CONFIG(SIMD_RENAME_TO_SIMD_NAMESPACE);
     FMT_CONFIG(SIMD_HALF_FLOAT16);
-//#if SIMD_HALF
-//    FMT_CONFIG(SIMD_HALF4_ONLY);
-//#endif
+#if SIMD_HALF
+    FMT_CONFIG(SIMD_HALF4_ONLY);
+#endif
+    
+    FMT_SEP();
+    
     FMT_CONFIG(SIMD_ACCELERATE_MATH);
 #if SIMD_ACCELERATE_MATH
     FMT_CONFIG(SIMD_LIBRARY_VERSION);
+    FMT_CONFIG(SIMD_ACCELERATE_MATH_NAMES);
 #endif
     
     FMT_SEP();
@@ -933,6 +918,16 @@ string vecf::simd_alignments() const {
     //FMT_CONFIG(int16);
 #endif
     
+#if SIMD_LONG
+    FMT_SEP();
+    
+    FMT_CONFIG(long2);
+    FMT_CONFIG(long3);
+    FMT_CONFIG(long4);
+    //FMT_CONFIG(long8);
+#endif
+
+    
 #undef FMT_CONFIG
     
     return s;
@@ -941,57 +936,57 @@ string vecf::simd_alignments() const {
 
 //---------------
 
-//#if SIMD_HALF4_ONLY
-// 
-//#if SIMD_NEON
-//
-//float4 float4m(half4 vv)
-//{
-//    return float4(vcvt_f32_f16(*(const float16x4_t*)&vv));
-//}
-//half4 half4m(float4 vv)
-//{
-//    return half4(vcvt_f16_f32(*(const float32x4_t*)&vv));
-//}
-//
-//#endif // SIMD_NEON
-//
-//#if SIMD_SSE
-//
-//float4 float4m(half4 vv)
-//{
-//    // https://patchwork.ozlabs.org/project/gcc/patch/559BC75A.1080606@arm.com/
-//    // https://gcc.gnu.org/onlinedocs/gcc-7.5.0/gcc/Half-Precision.html
-//    // https://developer.arm.com/documentation/dui0491/i/Using-NEON-Support/Converting-vectors
-//    __m128i reg16 = _mm_setzero_si128();
-//    
-//    // TODO: switch to load low 64-bits, but don't know which one _mm_cvtsi32_si128(&vv.reg); ?
-//    // want 0 extend here, sse overuses int32_t when really unsigned and zero extended value
-//    reg16 = _mm_insert_epi16(reg16, vv[0], 0);
-//    reg16 = _mm_insert_epi16(reg16, vv[1], 1);
-//    reg16 = _mm_insert_epi16(reg16, vv[2], 2);
-//    reg16 = _mm_insert_epi16(reg16, vv[3], 3);
-//    
-//    return simd::float4(_mm_cvtph_ps(reg16));
-//}
-//
-//half4 half4m(float4 vv)
-//{
-//    __m128i reg16 = _mm_cvtps_ph(*(const __m128*)&vv, 0);  // 4xfp32-> 4xfp16,  round to nearest-even
-//    
-//    // TODO: switch to store/steam, but don't know which one _mm_storeu_epi16 ?
-//    half4 val;  // = 0;
-//    
-//    // 0 extended
-//    val[0] = (half)_mm_extract_epi16(reg16, 0);
-//    val[1] = (half)_mm_extract_epi16(reg16, 1);
-//    val[2] = (half)_mm_extract_epi16(reg16, 2);
-//    val[3] = (half)_mm_extract_epi16(reg16, 3);
-//    return val;
-//}
-//
-//#endif // SIMD_SSE
-//#endif // SIMD_HALF4_ONLY
+#if SIMD_HALF4_ONLY
+ 
+#if SIMD_NEON
+
+float4 float4m(half4 vv)
+{
+    return float4(vcvt_f32_f16(*(const float16x4_t*)&vv));
+}
+half4 half4m(float4 vv)
+{
+    return half4(vcvt_f16_f32(*(const float32x4_t*)&vv));
+}
+
+#endif // SIMD_NEON
+
+#if SIMD_SSE
+
+float4 float4m(half4 vv)
+{
+    // https://patchwork.ozlabs.org/project/gcc/patch/559BC75A.1080606@arm.com/
+    // https://gcc.gnu.org/onlinedocs/gcc-7.5.0/gcc/Half-Precision.html
+    // https://developer.arm.com/documentation/dui0491/i/Using-NEON-Support/Converting-vectors
+    __m128i reg16 = _mm_setzero_si128();
+    
+    // TODO: switch to load low 64-bits, but don't know which one _mm_cvtsi32_si128(&vv.reg); ?
+    // want 0 extend here, sse overuses int32_t when really unsigned and zero extended value
+    reg16 = _mm_insert_epi16(reg16, vv[0], 0);
+    reg16 = _mm_insert_epi16(reg16, vv[1], 1);
+    reg16 = _mm_insert_epi16(reg16, vv[2], 2);
+    reg16 = _mm_insert_epi16(reg16, vv[3], 3);
+    
+    return simd::float4(_mm_cvtph_ps(reg16));
+}
+
+half4 half4m(float4 vv)
+{
+    __m128i reg16 = _mm_cvtps_ph(*(const __m128*)&vv, 0);  // 4xfp32-> 4xfp16,  round to nearest-even
+    
+    // TODO: switch to store/steam, but don't know which one _mm_storeu_epi16 ?
+    half4 val;  // = 0;
+    
+    // 0 extended
+    val[0] = (half)_mm_extract_epi16(reg16, 0);
+    val[1] = (half)_mm_extract_epi16(reg16, 1);
+    val[2] = (half)_mm_extract_epi16(reg16, 2);
+    val[3] = (half)_mm_extract_epi16(reg16, 3);
+    return val;
+}
+
+#endif // SIMD_SSE
+#endif // SIMD_HALF4_ONLY
 
 #if SIMD_FLOAT
 
@@ -1550,7 +1545,7 @@ double4x4 inverse(const double4x4& x) {
         // Find largest pivot in column j among rows j..2
         int i1 = j;            // Row with largest pivot candidate
         for (int i=j+1; i<cols; i++) {
-            if ( fabsf(a[i][j]) > fabsf(a[i1][j]) ) {
+            if ( fabs(a[i][j]) > fabs(a[i1][j]) ) {
                 i1 = i;
             }
         }
diff --git a/libkram/vectormath/vectormath++.h b/libkram/vectormath/vectormath++.h
index 02370484..cd524015 100644
--- a/libkram/vectormath/vectormath++.h
+++ b/libkram/vectormath/vectormath++.h
@@ -159,7 +159,7 @@
 #define SIMD_FLOAT_EXT 0
 
 // This means simd_float4 will come from this file instead of simd.h
-#define SIMD_RENAME_TO_SIMD_NAMESPACE 0
+#define SIMD_ACCELERATE_MATH_NAMES 0
 
 #endif // SIMD_CONFIG
 
@@ -359,9 +359,9 @@ extern "C" {
 macroVector1TypesStorage(char, char)
 macroVector1TypesPacked(char, char)
 
-#if SIMD_RENAME_TO_SIMD_NAMESPACE
+#if SIMD_ACCELERATE_MATH_NAMES
 macroVector1TypesStorageRenames(char, simd_char)
-#endif // SIMD_RENAME_TO_SIMD_NAMESPACE
+#endif // SIMD_ACCELERATE_MATH_NAMES
 
 #ifdef __cplusplus
 }
@@ -383,9 +383,9 @@ extern "C" {
 macroVector2TypesStorage(short, short)
 macroVector2TypesPacked(short, short)
 
-#if SIMD_RENAME_TO_SIMD_NAMESPACE
+#if SIMD_ACCELERATE_MATH_NAMES
 macroVector2TypesStorageRenames(short, simd_short)
-#endif // SIMD_RENAME_TO_SIMD_NAMESPACE
+#endif // SIMD_ACCELERATE_MATH_NAMES
 
 #ifdef __cplusplus
 }
@@ -419,20 +419,19 @@ SIMD_CALL int4 float4m(float4 x) { return __builtin_convertvector(x, int4); }
 
 #if SIMD_HALF // && SIMD_FLOAT
 
-// keep this for now, until know if Android has the builtin
-//#if SIMD_HALF4_ONLY
-//
-//half4 half4m(float4 );
-//SIMD_CALL half2 half2m(float2 x) { return vec4to2(half4m(vec2to4(x))); }
-//SIMD_CALL half3 half3m(float3 x) { return vec4to3(half4m(vec3to4(x))); }
-//
-//float4 float4m(half4 );
-//SIMD_CALL float2 float2m(half2 x) { return vec4to2(float4m(vec2to4(x))); }
-//SIMD_CALL float3 float3m(half3 x) { return vec4to3(float4m(vec3to4(x))); }
-//
-//#else
+#if SIMD_HALF4_ONLY
+
+// half type is short, so builtin convert doesn't work
+half4 half4m(float4 x);
+SIMD_CALL half2 half2m(float2 x) { return vec4to2(half4m(vec2to4(x))); }
+SIMD_CALL half3 half3m(float3 x) { return vec4to3(half4m(vec3to4(x))); }
+
+float4 float4m(half4 x);
+SIMD_CALL float2 float2m(half2 x) { return vec4to2(float4m(vec2to4(x))); }
+SIMD_CALL float3 float3m(half3 x) { return vec4to3(float4m(vec3to4(x))); }
+
+#else
 
-// this probably goes to correct call, could elim SIMD_HALF4_ONLY
 SIMD_CALL float2 float2m(half2 x) { return __builtin_convertvector(x, float2); }
 SIMD_CALL float3 float3m(half3 x) { return __builtin_convertvector(x, float3); }
 SIMD_CALL float4 float4m(half4 x) { return __builtin_convertvector(x, float4); }
@@ -440,7 +439,7 @@ SIMD_CALL float4 float4m(half4 x) { return __builtin_convertvector(x, float4); }
 SIMD_CALL half2 half2m(float2 x) { return __builtin_convertvector(x, half2); }
 SIMD_CALL half3 half3m(float3 x) { return __builtin_convertvector(x, half3); }
 SIMD_CALL half4 half4m(float4 x) { return __builtin_convertvector(x, half4); }
-//#endif
+#endif
 
 #endif // SIMD_HALF
 
@@ -456,6 +455,16 @@ SIMD_CALL float4 float4m(double4 x) { return __builtin_convertvector(x, float4);
 
 #endif // SIMD_FLOAT
 
+#if SIMD_DOUBLE && SIMD_LONG
+SIMD_CALL double2 double2m(long2 x) { return __builtin_convertvector(x, double2); }
+SIMD_CALL double3 double3m(long3 x) { return __builtin_convertvector(x, double3); }
+SIMD_CALL double4 double4m(long4 x) { return __builtin_convertvector(x, double4); }
+
+SIMD_CALL long2 long2m(double2 x) { return __builtin_convertvector(x, long2); }
+SIMD_CALL long3 long3m(double3 x) { return __builtin_convertvector(x, long3); }
+SIMD_CALL long4 long4m(double4 x) { return __builtin_convertvector(x, long4); }
+#endif // SIMD_LONG
+
 //---------------------------
 // formatting
 

From 8a2f9364b1b25b9d9a95d81393bf19b923f934f9 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 29 Sep 2024 23:06:50 -0700
Subject: [PATCH 750/901] kram - simd - turn off SIMD_DOUBLE for now

---
 libkram/vectormath/double234.h    | 281 +++++++++--------------------
 libkram/vectormath/float234.h     | 287 ++++++++----------------------
 libkram/vectormath/vectormath++.h |   2 +-
 3 files changed, 159 insertions(+), 411 deletions(-)

diff --git a/libkram/vectormath/double234.h b/libkram/vectormath/double234.h
index 6f27f740..1e29c2e2 100644
--- a/libkram/vectormath/double234.h
+++ b/libkram/vectormath/double234.h
@@ -68,6 +68,10 @@ SIMD_CALL double4 double4m(double3 v, double w = 1.0) {
 //-----------------------------------
 // start of implementation
 
+// zeroext - internal helper
+SIMD_CALL double4 zeroext(double2 x) { double4 v = 0; v.xy = x; return v;}
+SIMD_CALL double4 zeroext(double3 x) { double4 v = 0; v.xyz = x; return v; }
+
 #if SIMD_NEON
 
 // TODO: expose double2 ops on Neon.
@@ -105,7 +109,6 @@ SIMD_CALL double4 max(double4 x, double4 y) {
     return double4m(max(x.lo,y.lo), max(x.hi,y.hi));
 }
 
-
 SIMD_CALL double2 muladd(double2 x, double2 y, double2 t) {
     // requires __ARM_VFPV4__
     // t passed first unlike sse
@@ -132,14 +135,10 @@ SIMD_CALL double4 reduce_addv(double4 x) {
     x.lo = vpaddq_f64(x.lo, x.lo);
     x.hi = vpaddq_f64(x.hi, x.hi);
     x.lo = vpaddq_f64(x.lo, x.hi);
-    return x.lo.x; // repeat x to all values
+    return x.x; // repeat x to all values
 }
-
-SIMD_CALL double reduce_add(double2 x) {
-    return reduce_addv(x).x;
-}
-SIMD_CALL double reduce_add(double4 x) {
-    return reduce_addv(x).x;
+SIMD_CALL double3 reduce_addv(double3 x) {
+    return reduce_addv(zeroext(x)).x;
 }
 
 SIMD_CALL double2 round(double2 vv) {
@@ -182,22 +181,26 @@ SIMD_CALL double reduce_max(double4 x) {
 // needs SIMD_LONG
 // needed for precise min/max calls below
 #if SIMD_LONG
-SIMD_CALL double4 bitselect_forminmax(double4 x, double4 y, long4 mask) {
-    return (double4)(((long4)x & ~mask) | ((long4)y & mask));
+SIMD_CALL double2 bitselect_forminmax(double2 x, double2 y, long2 mask) {
+    return (double2)(((long2)x & ~mask) | ((long2)y & mask));
 }
 #endif // SIMD_LONG
 
-SIMD_CALL double4 min(double4 x, double4 y) {
-    // precise returns x on Nan
+// precise returns x on Nan
+SIMD_CALL double2 min(double2 x, double2 y) {
     return bitselect_forminmax(_mm_min_pd(x, y), x, y != y);
 }
-SIMD_CALL double4 max(double4 x, double4 y) {
-    // precise returns x on Nan
+SIMD_CALL double4 min(double4 x, double4 y) {
+    return double4m(min(x.lo,y.lo), min(x.hi,y.hi));
+}
+SIMD_CALL double2 max(double2 x, double2 y) {
     return bitselect_forminmax(_mm_max_pd(x, y), x, y != y);
 }
+SIMD_CALL double4 max(double4 x, double4 y) {
+    return double4m(max(x.lo,y.lo), min(x.hi,y.hi));
+}
 
-SIMD_CALL double4 muladd(double4 x, double4 y, double4 t) {
-    // can't get Xcode to set -mfma with AVX2 set
+SIMD_CALL double2 muladd(double2 x, double2 y, double2 t) {
 #ifdef __FMA__
     return _mm_fmadd_pd(x,y,t);
 #else
@@ -205,10 +208,17 @@ SIMD_CALL double4 muladd(double4 x, double4 y, double4 t) {
     return x * y + t;
 #endif
 }
-SIMD_CALL double2 muladd(double2 x, double2 y, double2 t) {
-    return vec4to2(muladd(vec2to4(x), vec2to4(y), vec2to4(t)));
+SIMD_CALL double4 muladd(double4 x, double4 y, double4 t) {
+#ifdef __FMA__
+    return double4m(muladd(x.lo,y.lo,t.lo),
+                    muladd(x.hi,y.hi,t.hi));
+#else
+    // fallback with not same characteristics
+    return x * y + t;
+#endif
 }
 
+
 SIMD_CALL double2 sqrt(double2 x) {
     return _mm_sqrt_pd(x);
 }
@@ -217,22 +227,18 @@ SIMD_CALL double4 sqrt(double4 x) {
 }
 
 SIMD_CALL double2 reduce_addv(double2 x) {
-    x = _mm_hadd_pd(x.lo, x.lo);
+    x = _mm_hadd_pd(x, x);
     return x.x;
 }
 SIMD_CALL double4 reduce_addv(double4 x) {
     // 4:1 reduction
-    x.lo = _mm_hadd_pd(x.lo, x.lo); // TODO: fix
+    x.lo = _mm_hadd_pd(x.lo, x.lo);
     x.hi = _mm_hadd_pd(x.hi, x.hi);
     x.lo = _mm_hadd_pd(x.lo, x.hi);
-    return x.lo.x; // repeat x to all values
+    return x.x; // repeat x to all values
 }
-
-SIMD_CALL double reduce_add(double2 x) {
-    return reduce_addv(x).x;
-}
-SIMD_CALL double reduce_add(double4 x) {
-    return reduce_addv(x).x;
+SIMD_CALL double3 reduce_addv(double3 x) {
+    return reduce_addv(zeroext(x)).x;
 }
 
 // SSE4.1
@@ -294,101 +300,29 @@ SIMD_CALL double4 select(double4 x, double4 y, long4 mask) {
 
 #endif // SIMD_LONG
 
-// zeroext - internal helper
-SIMD_CALL double4 zeroext(double2 x) {
-    double4 v = 0; v.xy = x; return v;
-}
-SIMD_CALL double4 zeroext(double3 x) {
-    double4 v = 0; v.xyz = x; return v;
-}
-
-// min
-//SIMD_CALL double2 min(double2 x, double2 y) {
-//    return vec4to2(min(vec2to4(x), vec2to4(y)));
-//}
-SIMD_CALL double3 min(double3 x, double3 y) {
-    return vec4to3(min(vec3to4(x), vec3to4(y)));
-}
-
-// max
-//SIMD_CALL double2 max(double2 x, double2 y) {
-//    return vec4to2(max(vec2to4(x), vec2to4(y)));
-//}
-SIMD_CALL double3 max(double3 x, double3 y) {
-    return vec4to3(max(vec3to4(x), vec3to4(y)));
-}
-
-// sqrt
-//SIMD_CALL double2 sqrt(double2 x) {
-//    return vec4to2(sqrt(vec2to4(x)));
-//}
-SIMD_CALL double3 sqrt(double3 x) {
-    return vec4to3(sqrt(vec3to4(x)));
-}
-
-// rsqrt
-SIMD_CALL double4 rsqrt(double4 x) {
-    // TODO: fixup near 0 ?
-    // TODO: use _mm_div_ps if / doesn't use
-    return 1.0/sqrt(x);
-}
-SIMD_CALL double2 rsqrt(double2 x) {
-    return vec4to2(rsqrt(vec2to4(x)));
-}
-SIMD_CALL double3 rsqrt(double3 x) {
-    return vec4to3(rsqrt(vec3to4(x)));
-}
-
-
-// recip
-SIMD_CALL double4 recip(double4 x) {
-    // TODO: fixup near 0 ?
-    // TODO: use _mm_div_ps if / doesn't use
-    return 1.0/x;
-}
-SIMD_CALL double2 recip(double2 x) {
-    return vec4to2(recip(vec2to4(x)));
-}
-SIMD_CALL double3 recip(double3 x) {
-    return vec4to3(recip(vec3to4(x)));
-}
-
-// reduce_add
-//SIMD_CALL double reduce_add(double2 x) {
-//    return reduce_add(zeroext(x));
-//}
-SIMD_CALL double reduce_add(double3 x) {
-    return reduce_add(zeroext(x));
-}
-
-// reduce_min - arm has double2 op
-//SIMD_CALL double reduce_min(double2 x) {
-//    return reduce_min(vec2to4(x));
-//}
-
-SIMD_CALL double reduce_min(double3 x) {
-    return reduce_min(vec3to4(x));
-}
-
-// reduce_max
-//SIMD_CALL double reduce_max(double2 x) {
-//    return reduce_max(vec2to4(x));
-//}
-
-SIMD_CALL double reduce_max(double3 x) {
-    return reduce_max(vec3to4(x));
-}
-
-
-// round (to nearest)
+// double3 leftovers
+SIMD_CALL double3 min(double3 x, double3 y) { return vec4to3(min(vec3to4(x), vec3to4(y))); }
+SIMD_CALL double3 max(double3 x, double3 y) { return vec4to3(max(vec3to4(x), vec3to4(y))); }
+SIMD_CALL double3 muladd(double3 x, double3 y, double3 t) { return vec4to3(muladd(vec3to4(x), vec3to4(y), vec3to4(t))); }
+SIMD_CALL double3 sqrt(double3 x) { return vec4to3(sqrt(vec3to4(x))); }
+SIMD_CALL double reduce_min(double3 x) { return reduce_min(vec3to4(x)); }
+SIMD_CALL double reduce_max(double3 x) { return reduce_max(vec3to4(x)); }
 SIMD_CALL double3 round(double3 x) { return vec4to3(round(vec3to4(x))); }
-
-// ceil
 SIMD_CALL double3 ceil(double3 x) { return vec4to3(ceil(vec3to4(x))); }
-
-// floor
 SIMD_CALL double3 floor(double3 x) { return vec4to3(floor(vec3to4(x))); }
 
+SIMD_CALL double4 rsqrt(double4 x) { return 1.0/sqrt(x); }
+SIMD_CALL double2 rsqrt(double2 x) { return 1.0/sqrt(x); }
+SIMD_CALL double3 rsqrt(double3 x) { return 1.0/sqrt(x); }
+
+SIMD_CALL double2 recip(double2 x) { return 1.0/x; }
+SIMD_CALL double3 recip(double3 x) { return 1.0/x; }
+SIMD_CALL double4 recip(double4 x) { return 1.0/x; }
+
+SIMD_CALL double reduce_add(double2 x) { return reduce_addv(x).x; }
+SIMD_CALL double reduce_add(double3 x) { return reduce_addv(x).x; }
+SIMD_CALL double reduce_add(double4 x) { return reduce_addv(x).x; }
+
 // clamp
 // order matters here for Nan, left op returned on precise min/max
 SIMD_CALL double2 clamp(double2 x, double2 minv, double2 maxv) {
@@ -402,90 +336,43 @@ SIMD_CALL double4 clamp(double4 x, double4 minv, double4 maxv) {
 }
 
 // saturate
-SIMD_CALL double2 saturate(double2 x) {
-    return clamp(x, 0, (double2)1);
-}
-SIMD_CALL double3 saturate(double3 x) {
-    return clamp(x, 0, (double3)1);
-}
-SIMD_CALL double4 saturate(double4 x) {
-    return clamp(x, 0, (double4)1);
-}
+SIMD_CALL double2 saturate(double2 x) { return clamp(x, 0, (double2)1); }
+SIMD_CALL double3 saturate(double3 x) { return clamp(x, 0, (double3)1); }
+SIMD_CALL double4 saturate(double4 x) { return clamp(x, 0, (double4)1); }
 
-// muladd
-SIMD_CALL double3 muladd(double3 x, double3 y, double3 t) {
-    return vec4to3(muladd(vec3to4(x), vec3to4(y), vec3to4(t)));
-}
 
 // lerp - another easy one
-SIMD_CALL double2 lerp(double2 x, double2 y, double2 t) {
-    return x + t*(y - x);
-}
-SIMD_CALL double3 lerp(double3 x, double3 y, double3 t) {
-    return x + t*(y - x);
-}
-SIMD_CALL double4 lerp(double4 x, double4 y, double4 t) {
-    return x + t*(y - x);
-}
-
+SIMD_CALL double2 lerp(double2 x, double2 y, double2 t) { return x + t*(y - x); }
+SIMD_CALL double3 lerp(double3 x, double3 y, double3 t) { return x + t*(y - x); }
+SIMD_CALL double4 lerp(double4 x, double4 y, double4 t) { return x + t*(y - x); }
 
 // dot
-SIMD_CALL double dot(double2 x, double2 y) {
-    return reduce_add(x * y);
-}
-SIMD_CALL double dot(double3 x, double3 y) {
-    return reduce_add(x * y);
-}
-SIMD_CALL double dot(double4 x, double4 y) {
-    return reduce_add(x * y);
-}
+SIMD_CALL double dot(double2 x, double2 y) { return reduce_add(x * y); }
+SIMD_CALL double dot(double3 x, double3 y) { return reduce_add(x * y); }
+SIMD_CALL double dot(double4 x, double4 y) { return reduce_add(x * y); }
 
 // length_squared
-SIMD_CALL double length_squared(double2 x) {
-    return reduce_add(x * x);
-}
-SIMD_CALL double length_squared(double3 x) {
-    return reduce_add(x * x);
-}
-SIMD_CALL double length_squared(double4 x) {
-    return reduce_add(x * x);
-}
+SIMD_CALL double length_squared(double2 x) { return reduce_add(x * x); }
+SIMD_CALL double length_squared(double3 x) { return reduce_add(x * x); }
+SIMD_CALL double length_squared(double4 x) { return reduce_add(x * x); }
 
 // length
-SIMD_CALL double length(double2 x) {
-    // worth using simd_sqrt?
-    return ::sqrt(reduce_add(x * x));
-}
-SIMD_CALL double length(double3 x) {
-    return ::sqrt(reduce_add(x * x));
-}
-SIMD_CALL double length(double4 x) {
-    return ::sqrt(reduce_add(x * x));
-}
+// worth using simd_sqrt?
+SIMD_CALL double length(double2 x) { return ::sqrt(reduce_add(x * x)); }
+SIMD_CALL double length(double3 x) { return ::sqrt(reduce_add(x * x)); }
+SIMD_CALL double length(double4 x) { return ::sqrt(reduce_add(x * x)); }
 
 // distance
-SIMD_CALL double distance(double2 x, double2 y) {
-    return length(x - y);
-}
-SIMD_CALL double distance(double3 x, double3 y) {
-    return length(x - y);
-}
-SIMD_CALL double distance(double4 x, double4 y) {
-    return length(x - y);
-}
+SIMD_CALL double distance(double2 x, double2 y) { return length(x - y); }
+SIMD_CALL double distance(double3 x, double3 y) { return length(x - y); }
+SIMD_CALL double distance(double4 x, double4 y) { return length(x - y); }
 
 // normalize
 // optimized by staying in reg
-SIMD_CALL double4 normalize(double4 x) {
-    // x * invlength(x)
-    return x * rsqrt(reduce_addv(x * x)).x;
-}
-SIMD_CALL double2 normalize(double2 x) {
-    return vec4to2(normalize(zeroext(x)));
-}
-SIMD_CALL double3 normalize(double3 x) {
-    return vec4to3(normalize(zeroext(x)));
-}
+// x * invlength(x)
+SIMD_CALL double4 normalize(double4 x) { return x * rsqrt(reduce_addv(x * x)).x; }
+SIMD_CALL double2 normalize(double2 x) { return x * rsqrt(reduce_addv(x * x)).x; }
+SIMD_CALL double3 normalize(double3 x) { return x * rsqrt(reduce_addv(x * x)).x; }
 
 // abs
 SIMD_CALL double2 abs(double2 x) {
@@ -565,8 +452,7 @@ SIMD_CALL double4 fract(double4 x) {
 }
 
 //-------------------
-
-
+// Functions
 
 // power series
 macroVectorRepeatFnDecl(double, log)
@@ -578,15 +464,12 @@ macroVectorRepeatFnDecl(double, cos)
 macroVectorRepeatFnDecl(double, sin)
 macroVectorRepeatFnDecl(double, tan)
 
-SIMD_CALL double2 pow(double2 x, double2 y) {
-    return exp(log(x) * y);
-}
-SIMD_CALL double3 pow(double3 x, double3 y) {
-    return exp(log(x) * y);
-}
-SIMD_CALL double4 pow(double4 x, double4 y) {
-    return exp(log(x) * y);
-}
+SIMD_CALL double2 pow(double2 x, double2 y) { return exp(log(x) * y); }
+SIMD_CALL double3 pow(double3 x, double3 y) { return exp(log(x) * y); }
+SIMD_CALL double4 pow(double4 x, double4 y) { return exp(log(x) * y); }
+
+//-------------------
+// Matrix
 
 struct double2x2 : double2x2s
 {
diff --git a/libkram/vectormath/float234.h b/libkram/vectormath/float234.h
index 9b1cf71b..f4ca89b4 100644
--- a/libkram/vectormath/float234.h
+++ b/libkram/vectormath/float234.h
@@ -37,58 +37,40 @@ macroVector4TypesStorageRenames(float, float)
 //-----------------------------------
 // start of implementation
 
+// zeroext - internal helper
+SIMD_CALL float4 zeroext(float2 x) {
+    float4 v = 0; v.xy = x; return v;
+}
+SIMD_CALL float4 zeroext(float3 x) {
+    float4 v = 0; v.xyz = x; return v;
+}
+
 #if SIMD_NEON
 
 // DONE: expose float2 ops on Neon.
 // q = 4, nothing = 2
 
-SIMD_CALL float reduce_min(float2 x) {
-    return vminv_f32(x);
-}
-SIMD_CALL float reduce_min(float4 x) {
-    return vminvq_f32(x);
-}
+SIMD_CALL float reduce_min(float2 x) { return vminv_f32(x); }
+SIMD_CALL float reduce_min(float4 x) { return vminvq_f32(x); }
 
-SIMD_CALL float reduce_max(float2 x) {
-    return vmaxv_f32(x);
-}
-SIMD_CALL float reduce_max(float4 x) {
-    return vmaxvq_f32(x);
-}
+SIMD_CALL float reduce_max(float2 x) { return vmaxv_f32(x); }
+SIMD_CALL float reduce_max(float4 x) { return vmaxvq_f32(x); }
 
-SIMD_CALL float2 min(float2 x, float2 y) {
-    // precise returns x on Nan
-    return vminnm_f32(x, y);
-}
-SIMD_CALL float4 min(float4 x, float4 y) {
-    // precise returns x on Nan
-    return vminnmq_f32(x, y);
-}
+// precise returns x on Nan
+SIMD_CALL float2 min(float2 x, float2 y) { return vminnm_f32(x, y); }
+SIMD_CALL float4 min(float4 x, float4 y) { return vminnmq_f32(x, y); }
 
-SIMD_CALL float2 max(float2 x, float2 y) {
-    // precise returns x on Nan
-    return vmaxnm_f32(x, y);
-}
-SIMD_CALL float4 max(float4 x, float4 y) {
-    // precise returns x on Nan
-    return vmaxnmq_f32(x, y);
-}
+// precise returns x on Nan
+SIMD_CALL float2 max(float2 x, float2 y) { return vmaxnm_f32(x, y); }
+SIMD_CALL float4 max(float4 x, float4 y) { return vmaxnmq_f32(x, y); }
 
 // requires __ARM_VFPV4__
 // t passed first unlike sse
-SIMD_CALL float2 muladd(float2 x, float2 y, float2 t) {
-    return vfma_f32(t, x,y);
-}
-SIMD_CALL float4 muladd(float4 x, float4 y, float4 t) {
-    return vfmaq_f32(t, x,y);
-}
+SIMD_CALL float2 muladd(float2 x, float2 y, float2 t) { return vfma_f32(t, x,y); }
+SIMD_CALL float4 muladd(float4 x, float4 y, float4 t) { return vfmaq_f32(t, x,y); }
 
-SIMD_CALL float2 sqrt(float2 x) {
-    return vsqrt_f32(x);
-}
-SIMD_CALL float4 sqrt(float4 x) {
-    return vsqrtq_f32(x);
-}
+SIMD_CALL float2 sqrt(float2 x) { return vsqrt_f32(x); }
+SIMD_CALL float4 sqrt(float4 x) { return vsqrtq_f32(x); }
 
 SIMD_CALL float2 reduce_addv(float2 x) {
     x = vpadd_f32(x, x);
@@ -100,36 +82,19 @@ SIMD_CALL float4 reduce_addv(float4 x) {
     x = vpaddq_f32(x, x); // x  = x+y
     return x.x; // repeat x to all values
 }
-
-SIMD_CALL float reduce_add(float2 x) {
-    return reduce_addv(x).x;
-}
-SIMD_CALL float reduce_add(float4 x) {
-    return reduce_addv(x).x;
+SIMD_CALL float3 reduce_addv(float3 x) {
+    return reduce_addv(zeroext(x)).x; // repeat
 }
 
-SIMD_CALL float2 round(float2 vv) {
-    // round to nearest | exc
-    return vrndn_f32(vv); // _MM_FROUND_NO_EXC
-}
-SIMD_CALL float4 round(float4 vv) {
-    // round to nearest | exc
-    return vrndnq_f32(vv); // _MM_FROUND_NO_EXC
-}
+// round to nearest | exc
+SIMD_CALL float2 round(float2 vv) { return vrndn_f32(vv); }
+SIMD_CALL float4 round(float4 vv) { return vrndnq_f32(vv); }
 
-SIMD_CALL float2 ceil(float2 vv) {
-    return vrndp_f32(vv);
-}
-SIMD_CALL float4 ceil(float4 vv) {
-    return vrndpq_f32(vv);
-}
+SIMD_CALL float2 ceil(float2 vv) { return vrndp_f32(vv); }
+SIMD_CALL float4 ceil(float4 vv) { return vrndpq_f32(vv); }
 
-SIMD_CALL float2 floor(float2 vv) {
-    return vrndm_f32(vv);
-}
-SIMD_CALL float4 floor(float4 vv) {
-    return vrndmq_f32(vv);
-}
+SIMD_CALL float2 floor(float2 vv) { return vrndm_f32(vv); }
+SIMD_CALL float4 floor(float4 vv) { return vrndmq_f32(vv); }
 
 #endif // SIMD_NEON
 
@@ -200,12 +165,11 @@ SIMD_CALL float4 reduce_addv(float4 x) {
     x = _mm_hadd_ps(x, x); // x  = x+y
     return x.x; // repeat x to all values
 }
-
-SIMD_CALL float reduce_add(float4 x) {
-    return reduce_addv(x).x;
+SIMD_CALL float2 reduce_addv(float2 x) {
+    return reduce_addv(zeroext(x)).x;
 }
-SIMD_CALL float reduce_add(float2 x) {
-    return reduce_add(zeroext(x));
+SIMD_CALL float3 reduce_addv(float3 x) {
+    return reduce_addv(zeroext(x)).x;
 }
 
 // SSE4.1
@@ -263,87 +227,33 @@ SIMD_CALL float4 select(float4 x, float4 y, int4 mask) {
 
 #endif // SIMD_INT
 
-// zeroext - internal helper
-SIMD_CALL float4 zeroext(float2 x) {
-    float4 v = 0; v.xy = x; return v;
-}
-SIMD_CALL float4 zeroext(float3 x) {
-    float4 v = 0; v.xyz = x; return v;
-}
-
 // TODO: consider casts instead of shuffles below, at least on inputs
 // float3 same size as float4, can't use cast on reduce calls.
 
-// min
-SIMD_CALL float3 min(float3 x, float3 y) {
-    return vec4to3(min(vec3to4(x), vec3to4(y)));
-}
-
-// max
-SIMD_CALL float3 max(float3 x, float3 y) {
-    return vec4to3(max(vec3to4(x), vec3to4(y)));
-}
-
-// sqrt
-SIMD_CALL float3 sqrt(float3 x) {
-    return vec4to3(sqrt(vec3to4(x)));
-}
-
-// muladd
-SIMD_CALL float3 muladd(float3 x, float3 y, float3 t) {
-    return vec4to3(muladd(vec3to4(x), vec3to4(y), vec3to4(t)));
-}
+// float3 leftovers
+SIMD_CALL float3 min(float3 x, float3 y) { return vec4to3(min(vec3to4(x), vec3to4(y))); }
+SIMD_CALL float3 max(float3 x, float3 y) { return vec4to3(max(vec3to4(x), vec3to4(y))); }
+SIMD_CALL float3 muladd(float3 x, float3 y, float3 t) { return vec4to3(muladd(vec3to4(x), vec3to4(y), vec3to4(t))); }
+SIMD_CALL float reduce_min(float3 x) { return reduce_min(vec3to4(x)); }
+SIMD_CALL float reduce_max(float3 x) { return reduce_max(vec3to4(x)); }
+SIMD_CALL float3 round(float3 x) { return vec4to3(round(vec3to4(x))); }
+SIMD_CALL float3 ceil(float3 x) { return vec4to3(ceil(vec3to4(x))); }
+SIMD_CALL float3 floor(float3 x) { return vec4to3(floor(vec3to4(x))); }
+SIMD_CALL float3 sqrt(float3 x) { return vec4to3(sqrt(vec3to4(x))); }
 
 // rsqrt
-// TODO: fixup near 0 ?
-// TODO: use _mm_div_ps if / doesn't use
-SIMD_CALL float4 rsqrt(float4 x) {
-    return 1.0f/sqrt(x);
-}
-SIMD_CALL float2 rsqrt(float2 x) {
-    return 1.0f/sqrt(x);
-}
-SIMD_CALL float3 rsqrt(float3 x) {
-    return 1.0f/sqrt(x);
-}
+SIMD_CALL float4 rsqrt(float4 x) { return 1.0f/sqrt(x); }
+SIMD_CALL float2 rsqrt(float2 x) { return 1.0f/sqrt(x); }
+SIMD_CALL float3 rsqrt(float3 x) { return 1.0f/sqrt(x); }
 
 // recip
-SIMD_CALL float4 recip(float4 x) {
-    // TODO: fixup near 0 ?
-    // TODO: use _mm_div_ps if / doesn't use
-    return 1.0f/x;
-}
-SIMD_CALL float2 recip(float2 x) {
-    return 1.0f/x;
-}
-SIMD_CALL float3 recip(float3 x) {
-    return 1.0f/x;
-}
-
-// reduce_add
-SIMD_CALL float reduce_add(float3 x) {
-    return reduce_add(zeroext(x));
-}
-
-// reduce_min - arm has float2 op
-SIMD_CALL float reduce_min(float3 x) {
-    return reduce_min(vec3to4(x));
-}
-
-// reduce_max
-SIMD_CALL float reduce_max(float3 x) {
-    return reduce_max(vec3to4(x));
-}
-
-
-// round (to nearest)
-SIMD_CALL float3 round(float3 x) { return vec4to3(round(vec3to4(x))); }
-
-// ceil
-SIMD_CALL float3 ceil(float3 x) { return vec4to3(ceil(vec3to4(x))); }
+SIMD_CALL float4 recip(float4 x) { return 1.0f/x; }
+SIMD_CALL float2 recip(float2 x) { return 1.0f/x; }
+SIMD_CALL float3 recip(float3 x) { return 1.0f/x; }
 
-// floor
-SIMD_CALL float3 floor(float3 x) { return vec4to3(floor(vec3to4(x))); }
+SIMD_CALL float reduce_add(float2 x) { return reduce_addv(x).x; }
+SIMD_CALL float reduce_add(float3 x) { return reduce_addv(x).x; }
+SIMD_CALL float reduce_add(float4 x) { return reduce_addv(x).x; }
 
 // clamp
 // order matters here for Nan, left op returned on precise min/max
@@ -358,86 +268,41 @@ SIMD_CALL float4 clamp(float4 x, float4 minv, float4 maxv) {
 }
 
 // saturate
-SIMD_CALL float2 saturate(float2 x) {
-    return clamp(x, 0, (float2)1);
-}
-SIMD_CALL float3 saturate(float3 x) {
-    return clamp(x, 0, (float3)1);
-}
-SIMD_CALL float4 saturate(float4 x) {
-    return clamp(x, 0, (float4)1);
-}
-
+SIMD_CALL float2 saturate(float2 x) { return clamp(x, 0, (float2)1); }
+SIMD_CALL float3 saturate(float3 x) { return clamp(x, 0, (float3)1); }
+SIMD_CALL float4 saturate(float4 x) { return clamp(x, 0, (float4)1); }
 
 // lerp - another easy one, could use muladd(t, y-x, x)
-SIMD_CALL float2 lerp(float2 x, float2 y, float2 t) {
-    return x + t*(y - x);
-}
-SIMD_CALL float3 lerp(float3 x, float3 y, float3 t) {
-    return x + t*(y - x);
-}
-SIMD_CALL float4 lerp(float4 x, float4 y, float4 t) {
-    return x + t*(y - x);
-}
-
+SIMD_CALL float2 lerp(float2 x, float2 y, float2 t) { return x + t*(y - x); }
+SIMD_CALL float3 lerp(float3 x, float3 y, float3 t) { return x + t*(y - x); }
+SIMD_CALL float4 lerp(float4 x, float4 y, float4 t) { return x + t*(y - x); }
 
 // dot
-SIMD_CALL float dot(float2 x, float2 y) {
-    return reduce_add(x * y);
-}
-SIMD_CALL float dot(float3 x, float3 y) {
-    return reduce_add(x * y);
-}
-SIMD_CALL float dot(float4 x, float4 y) {
-    return reduce_add(x * y);
-}
+SIMD_CALL float dot(float2 x, float2 y) { return reduce_add(x * y); }
+SIMD_CALL float dot(float3 x, float3 y) { return reduce_add(x * y); }
+SIMD_CALL float dot(float4 x, float4 y) { return reduce_add(x * y); }
 
 // length_squared
-SIMD_CALL float length_squared(float2 x) {
-    return reduce_add(x * x);
-}
-SIMD_CALL float length_squared(float3 x) {
-    return reduce_add(x * x);
-}
-SIMD_CALL float length_squared(float4 x) {
-    return reduce_add(x * x);
-}
+SIMD_CALL float length_squared(float2 x) { return reduce_add(x * x); }
+SIMD_CALL float length_squared(float3 x) { return reduce_add(x * x); }
+SIMD_CALL float length_squared(float4 x) { return reduce_add(x * x); }
 
 // length
-SIMD_CALL float length(float2 x) {
-    // worth using simd_sqrt?
-    return ::sqrt(reduce_add(x * x));
-}
-SIMD_CALL float length(float3 x) {
-    return ::sqrt(reduce_add(x * x));
-}
-SIMD_CALL float length(float4 x) {
-    return ::sqrt(reduce_add(x * x));
-}
+SIMD_CALL float length(float2 x) { return ::sqrt(reduce_add(x * x)); }
+SIMD_CALL float length(float3 x) { return ::sqrt(reduce_add(x * x)); }
+SIMD_CALL float length(float4 x) { return ::sqrt(reduce_add(x * x)); }
 
 // distance
-SIMD_CALL float distance(float2 x, float2 y) {
-    return length(x - y);
-}
-SIMD_CALL float distance(float3 x, float3 y) {
-    return length(x - y);
-}
-SIMD_CALL float distance(float4 x, float4 y) {
-    return length(x - y);
-}
+SIMD_CALL float distance(float2 x, float2 y) { return length(x - y); }
+SIMD_CALL float distance(float3 x, float3 y) { return length(x - y); }
+SIMD_CALL float distance(float4 x, float4 y) { return length(x - y); }
 
 // normalize
 // optimized by staying in reg
-SIMD_CALL float4 normalize(float4 x) {
-    // x * invlength(x)
-    return x * rsqrt(reduce_addv(x * x)).x;
-}
-SIMD_CALL float2 normalize(float2 x) {
-    return vec4to2(normalize(zeroext(x)));
-}
-SIMD_CALL float3 normalize(float3 x) {
-    return vec4to3(normalize(zeroext(x)));
-}
+// x * invlength(x)
+SIMD_CALL float4 normalize(float4 x) { return x * rsqrt(reduce_addv(x * x)).x; }
+SIMD_CALL float2 normalize(float2 x) { return x * rsqrt(reduce_addv(x * x)).x; }
+SIMD_CALL float3 normalize(float3 x) { return x * rsqrt(reduce_addv(x * x)).x; }
 
 // abs
 SIMD_CALL float2 abs(float2 x) {
diff --git a/libkram/vectormath/vectormath++.h b/libkram/vectormath/vectormath++.h
index cd524015..f521ac62 100644
--- a/libkram/vectormath/vectormath++.h
+++ b/libkram/vectormath/vectormath++.h
@@ -153,7 +153,7 @@
 // #define SIMD_HALF   (1 && SIMD_SHORT)
 #define SIMD_HALF   (1)
 #define SIMD_FLOAT  (1 && SIMD_INT)
-#define SIMD_DOUBLE (1 && SIMD_LONG)
+#define SIMD_DOUBLE (0 && SIMD_LONG)
 
 // Whether to support > 4 length vecs with some ops
 #define SIMD_FLOAT_EXT 0

From 62bb60fbbf27fc1b28b86fda74023e7a654614fc Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 30 Sep 2024 00:14:34 -0700
Subject: [PATCH 751/901] kram - simd - start using some AVX2, re-enable
 SIMD_DOUBLE

---
 libkram/vectormath/double234.h      |  47 ++++++------
 libkram/vectormath/float234.h       |   2 +-
 libkram/vectormath/vectormath++.cpp | 110 +++++++++++++++++++++++-----
 libkram/vectormath/vectormath++.h   |   5 +-
 4 files changed, 121 insertions(+), 43 deletions(-)

diff --git a/libkram/vectormath/double234.h b/libkram/vectormath/double234.h
index 1e29c2e2..dda70181 100644
--- a/libkram/vectormath/double234.h
+++ b/libkram/vectormath/double234.h
@@ -141,12 +141,11 @@ SIMD_CALL double3 reduce_addv(double3 x) {
     return reduce_addv(zeroext(x)).x;
 }
 
+// round to nearest | exc
 SIMD_CALL double2 round(double2 vv) {
-    // round to nearest | exc
-    return vrndnq_f64(vv); // _MM_FROUND_NO_EXC
+    return vrndnq_f64(vv);
 }
 SIMD_CALL double4 round(double4 vv) {
-    // round to nearest | exc
     return double4m(round(vv.lo),round(vv.hi));
 }
 
@@ -170,10 +169,19 @@ SIMD_CALL double4 floor(double4 vv) {
 
 #if SIMD_SSE
 
+// TODO: double3/4 impl would benefit from AVX2 calls (32B)
+
 // x64 doesn't seem to have a simd op for min/max reduce
+SIMD_CALL double reduce_min(double2 x) {
+    return fmin(x.x,x.y);
+}
 SIMD_CALL double reduce_min(double4 x) {
     return fmin(fmin(x.x,x.y), fmin(x.z,x.w));
 }
+
+SIMD_CALL double reduce_max(double2 x) {
+    return fmax(x.x,x.y);
+}
 SIMD_CALL double reduce_max(double4 x) {
     return fmax(fmax(x.x,x.y), fmax(x.z,x.w));
 }
@@ -193,11 +201,12 @@ SIMD_CALL double2 min(double2 x, double2 y) {
 SIMD_CALL double4 min(double4 x, double4 y) {
     return double4m(min(x.lo,y.lo), min(x.hi,y.hi));
 }
+
 SIMD_CALL double2 max(double2 x, double2 y) {
     return bitselect_forminmax(_mm_max_pd(x, y), x, y != y);
 }
 SIMD_CALL double4 max(double4 x, double4 y) {
-    return double4m(max(x.lo,y.lo), min(x.hi,y.hi));
+    return double4m(max(x.lo,y.lo), max(x.hi,y.hi));
 }
 
 SIMD_CALL double2 muladd(double2 x, double2 y, double2 t) {
@@ -243,30 +252,27 @@ SIMD_CALL double3 reduce_addv(double3 x) {
 
 // SSE4.1
 // single ops in AVX/2
-
-SIMD_CALL double4 round(double4 vv) {
-    // round to nearest | exc
-    return double4m(_mm_round_pd(vv.lo, _MM_FROUND_NO_EXC),
-                    _mm_round_pd(vv.hi, _MM_FROUND_NO_EXC));
-}
+// round to nearest | exc
 SIMD_CALL double2 round(double2 x) {
-    return vec4to2(round(vec2to4(x)));
+    return _mm_round_pd(x, _MM_FROUND_NO_EXC); // TODO: _MM_FROUND_TO_NEAREST_INT |
 }
-
-SIMD_CALL double4 ceil(double4 vv) {
-    return double4m(_mm_ceil_pd(vv.lo),_mm_ceil_pd(vv.hi));
+SIMD_CALL double4 round(double4 vv) {
+    return double4m(round(vv.lo), round(vv.hi));
 }
+
 SIMD_CALL double2 ceil(double2 x) {
-    return vec4to2(ceil(vec2to4(x)));
+    return _mm_ceil_pd(x);
 }
-
-SIMD_CALL double4 floor(double4 vv) {
-    return double4m(_mm_floor_pd(vv.lo),_mm_floor_pd(vv.hi));
+SIMD_CALL double4 ceil(double4 vv) {
+    return double4m(ceil(vv.lo),ceil(vv.hi));
 }
+
 SIMD_CALL double2 floor(double2 x) {
-    return vec4to2(floor(vec2to4(x)));
+    return _mm_floor_pd(x);
+}
+SIMD_CALL double4 floor(double4 vv) {
+    return double4m(floor(vv.lo),floor(vv.hi));
 }
-
 
 #endif // SIMD_SSE
 
@@ -340,7 +346,6 @@ SIMD_CALL double2 saturate(double2 x) { return clamp(x, 0, (double2)1); }
 SIMD_CALL double3 saturate(double3 x) { return clamp(x, 0, (double3)1); }
 SIMD_CALL double4 saturate(double4 x) { return clamp(x, 0, (double4)1); }
 
-
 // lerp - another easy one
 SIMD_CALL double2 lerp(double2 x, double2 y, double2 t) { return x + t*(y - x); }
 SIMD_CALL double3 lerp(double3 x, double3 y, double3 t) { return x + t*(y - x); }
diff --git a/libkram/vectormath/float234.h b/libkram/vectormath/float234.h
index f4ca89b4..cdc33a04 100644
--- a/libkram/vectormath/float234.h
+++ b/libkram/vectormath/float234.h
@@ -175,7 +175,7 @@ SIMD_CALL float3 reduce_addv(float3 x) {
 // SSE4.1
 SIMD_CALL float4 round(float4 vv) {
     // round to nearest | exc
-    return _mm_round_ps(vv, _MM_FROUND_NO_EXC);
+    return _mm_round_ps(vv, _MM_FROUND_NO_EXC); // TODO: _MM_FROUND_TO_NEAREST_INT
 }
 SIMD_CALL float2 round(float2 x) {
     return vec4to2(round(vec2to4(x)));
diff --git a/libkram/vectormath/vectormath++.cpp b/libkram/vectormath/vectormath++.cpp
index f2c58c47..02aeeb31 100644
--- a/libkram/vectormath/vectormath++.cpp
+++ b/libkram/vectormath/vectormath++.cpp
@@ -1439,19 +1439,34 @@ string vecf::str(const double4x4& m) const {
 
 // textbook transpose 
 double2x2 transpose(const double2x2& x) {
+    
+    
+    // std::swap would seem faster here?
+#if SIMD_SSE
+#ifdef __AVX2__
+    double4 x0, x1;
+    x0.xy = x[0];
+    x1.xy = x[1];
+    
+    double4 r01 = _mm256_unpacklo_pd(x0, x1);
+    return (double2x2){r01.lo, r01.hi};
+#else
     double2 x0, x1;
     x0.xy = x[0];
     x1.xy = x[1];
     
-    // std::swap would seem faster here?
-#if SIMD_SSE
+    // super slow transpose
     double2 r0 = { x0[0], x1[0] };
     double2 r1 = { x0[1], x1[1] };
-#else
-    double2 r0 = vzip1q_f64(x0, x1);
-    double2 r1 = vzip2q_f64(x0, x1);
+    return (double2x2){r0, r1};
 #endif
+#endif // SIMD_SSE
+    
+#if SIMD_NEON
+    double2 r0 = vzip1q_f64(x[0], x[1]);
+    double2 r1 = vzip2q_f64(x[0], x[1]);
     return (double2x2){r0, r1};
+#endif // SIMD_NEON
 }
 
 double3x3 transpose(const double3x3& x) {
@@ -1459,13 +1474,33 @@ double3x3 transpose(const double3x3& x) {
     x0.xyz = x[0];
     x1.xyz = x[1];
     x2.xyz = x[2];
+    
 #if SIMD_SSE
-    double4 t0 = _mm_unpacklo_pd(x0, x1);
-    double4 t1 = _mm_unpackhi_pd(x0, x1);
+#if defined( __AVX2__) && 0
+    double4 t0 = _mm256_unpacklo_pd(x0, x1);
+    double4 t1 = _mm256_unpackhi_pd(x0, x1);
+    
     double4 r0 = t0; r0.hi = x2.lo;
-    double4 r1 = _mm_shuffle_pd(t0, x2, 0xde);
+    // TODO: fix shuffle,  222 outside 15 range
+    // looks like op was changed to 4-bit bitmask
+    // lookup shuffle 4 values, and convert this
+    //
+    // 0xde = _MM_SHUFFLE(x,y,z,w)
+    // #define _MM_SHUFFLE(fp3, fp2, fp1, fp0) \
+        (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0)))
+    // fp0 to fp3 = 2, 3, 1, 3
+    
+    double4 r1 = _mm256_shuffle_pd(t0, x2, 0xde);
     double4 r2 = x2; r2.lo = t1.lo;
-#else // SIMD_NEON
+#else
+    // super slow transpose
+    double3 r0 = { x0[0], x1[0], x2[0] };
+    double3 r1 = { x0[1], x1[1], x2[1] };
+    double3 r2 = { x0[2], x1[2], x2[2] };
+#endif
+#endif // SIMD_SSE
+    
+#if SIMD_NEON
     double2 padding = { 0 };
     double4 r0,r1,r2;
     r0.lo = vzip1q_f64(x0.lo,x1.lo);
@@ -1474,7 +1509,7 @@ double3x3 transpose(const double3x3& x) {
     r0.hi = vzip1q_f64(x2.lo,padding);
     r1.hi = vzip2q_f64(x2.lo,padding);
     r2.hi = vzip1q_f64(x2.hi,padding);
-#endif
+#endif // SIMD_NEON
     return (double3x3){r0.xyz, r1.xyz, r2.xyz};
 }
 
@@ -1483,16 +1518,51 @@ double4x4 transpose(const double4x4& x) {
     // but old Neon didn't really have shuffle.
 
 #if SIMD_SSE
-    double4 t0 = _mm_unpacklo_pd(x[0],x[2]);
-    double4 t1 = _mm_unpackhi_pd(x[0],x[2]);
-    double4 t2 = _mm_unpacklo_pd(x[1],x[3]);
-    double4 t3 = _mm_unpackhi_pd(x[1],x[3]);
-    double4 r0 = _mm_unpacklo_pd(t0,t2);
-    double4 r1 = _mm_unpackhi_pd(t0,t2);
-    double4 r2 = _mm_unpacklo_pd(t1,t3);
-    double4 r3 = _mm_unpackhi_pd(t1,t3);
-#else // SIMD_NEON
-    simd_double4 r0,r1,r2,r3;
+#ifdef __AVX2__
+    
+// using shuffles + permute
+//    double4 tmp0, tmp1, tmp2, tmp3;
+//    tmp0 = _mm256_shuffle_pd(row0, row1, 0x0);
+//    tmp2 = _mm256_shuffle_pd(row0, row1, 0xF);
+//    tmp1 = _mm256_shuffle_pd(row2, row3, 0x0);
+//    tmp3 = _mm256_shuffle_pd(row2, row3, 0xF);
+//
+//    double4 x0, x1, x2, x3;
+//    r0 = _mm256_permute2f128_pd(tmp0, tmp1, 0x20);
+//    r1 = _mm256_permute2f128_pd(tmp2, tmp3, 0x20);
+//    r2 = _mm256_permute2f128_pd(tmp0, tmp1, 0x31);
+//    r3 = _mm256_permute2f128_pd(tmp2, tmp3, 0x31);
+    
+// or unpack
+    
+    double4 t0 = _mm256_unpacklo_pd(x[0],x[2]);
+    double4 t1 = _mm256_unpackhi_pd(x[0],x[2]);
+    double4 t2 = _mm256_unpacklo_pd(x[1],x[3]);
+    double4 t3 = _mm256_unpackhi_pd(x[1],x[3]);
+
+    double4 r0 = _mm256_unpacklo_pd(t0,t2);
+    double4 r1 = _mm256_unpackhi_pd(t0,t2);
+    double4 r2 = _mm256_unpacklo_pd(t1,t3);
+    double4 r3 = _mm256_unpackhi_pd(t1,t3);
+    
+#else
+    // super slow transpose
+    double4 x0, x1, x2, x3;
+    x0 = x[0];
+    x1 = x[1];
+    x2 = x[2];
+    x3 = x[2];
+    
+    // TODO: avx2 probably has double shuffle
+    double4 r0 = { x0[0], x1[0], x2[0], x3[0] };
+    double4 r1 = { x0[1], x1[1], x2[1], x3[1] };
+    double4 r2 = { x0[2], x1[2], x2[2], x3[2] };
+    double4 r3 = { x0[3], x1[3], x2[3], x3[3] };
+#endif
+#endif // SIMD_SSE
+    
+#if  SIMD_NEON
+    double4 r0,r1,r2,r3;
     r0.lo = vzip1q_f64(x[0].lo,x[1].lo);
     r1.lo = vzip2q_f64(x[0].lo,x[1].lo);
     r2.lo = vzip1q_f64(x[0].hi,x[1].hi);
diff --git a/libkram/vectormath/vectormath++.h b/libkram/vectormath/vectormath++.h
index f521ac62..47cab9f0 100644
--- a/libkram/vectormath/vectormath++.h
+++ b/libkram/vectormath/vectormath++.h
@@ -153,7 +153,7 @@
 // #define SIMD_HALF   (1 && SIMD_SHORT)
 #define SIMD_HALF   (1)
 #define SIMD_FLOAT  (1 && SIMD_INT)
-#define SIMD_DOUBLE (0 && SIMD_LONG)
+#define SIMD_DOUBLE (1 && SIMD_LONG)
 
 // Whether to support > 4 length vecs with some ops
 #define SIMD_FLOAT_EXT 0
@@ -326,8 +326,11 @@ SIMD_CALL type::column_t operator*(const type& x, const type::column_t& v) { ret
 #if SIMD_NEON
 // neon types and intrinsics, 16B
 #include <arm_neon.h>
+
 // This converts sse to neon intrinsics
+// only currently using this for transpose_affine (_mm_shuffle_ps)
 #include "sse2neon-arm64.h"
+
 #else
 // SSE intrinsics up to AVX-512, but depends on -march avx2 -mf16c -fma
 #include <immintrin.h>

From dd9c94f44d11bff0a104d7faf0bdd5d694b078ec Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 30 Sep 2024 00:41:30 -0700
Subject: [PATCH 752/901] kram - simd - turn off SIMD_DOUBLE and SIMD_LONG,
 issues with long on Win

---
 libkram/vectormath/long234.h        | 22 ++++++++++++----------
 libkram/vectormath/vectormath++.cpp |  4 ++--
 libkram/vectormath/vectormath++.h   | 11 ++++++-----
 3 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/libkram/vectormath/long234.h b/libkram/vectormath/long234.h
index 703a0028..50f4c288 100644
--- a/libkram/vectormath/long234.h
+++ b/libkram/vectormath/long234.h
@@ -7,14 +7,16 @@
 // This is not yet standalone.  vectormath++.h includes it.
 #if USE_SIMDLIB && SIMD_LONG
 
+typedef int64_t long1;
+
 #ifdef __cplusplus
 extern "C" {
 #endif // __cplusplus
 
 // define c vector types
 // Apple uses long type here (32-bit) instead of long32_t
-macroVector8TypesStorage(long, long)
-macroVector8TypesPacked(long, long)
+macroVector8TypesStorage(long1, long)
+macroVector8TypesPacked(long1, long)
 
 #if SIMD_ACCELERATE_MATH_NAMES
 macroVector8TypesStorageRenames(long, simd_long)
@@ -96,34 +98,34 @@ SIMD_CALL long4 bitselect(long4 x, long4 y, long4 mask) {
     return (x & ~mask) | (y & mask);
 }
 
-SIMD_CALL long2 long2m(long x) {
+SIMD_CALL long2 long2m(long1 x) {
     return x;
 }
-SIMD_CALL long2 long2m(long x, long y) {
+SIMD_CALL long2 long2m(long1 x, long1 y) {
     return {x,y};
 }
 
-SIMD_CALL long3 long3m(long x) {
+SIMD_CALL long3 long3m(long1 x) {
     return x;
 }
-SIMD_CALL long3 long3m(long x, long y, long z) {
+SIMD_CALL long3 long3m(long1 x, long1 y, long1 z) {
     return {x,y,z};
 }
-SIMD_CALL long3 long3m(long2 v, long z) {
+SIMD_CALL long3 long3m(long2 v, long1 z) {
     long3 r; r.xy = v; r.z = z; return r;
 }
 
 
-SIMD_CALL long4 long4m(long x) {
+SIMD_CALL long4 long4m(long1 x) {
     return x;
 }
 SIMD_CALL long4 long4m(long2 xy, long2 zw) {
     long4 r; r.xy = xy; r.zw = zw; return r;
 }
-SIMD_CALL long4 long4m(long x, long y, long z, long w) {
+SIMD_CALL long4 long4m(long1 x, long1 y, long1 z, long1 w) {
     return {x,y,z,w};
 }
-SIMD_CALL long4 long4m(long3 v, long w) {
+SIMD_CALL long4 long4m(long3 v, long1 w) {
     long4 r; r.xyz = v; r.w = w; return r;
 }
 
diff --git a/libkram/vectormath/vectormath++.cpp b/libkram/vectormath/vectormath++.cpp
index 02aeeb31..a880641e 100644
--- a/libkram/vectormath/vectormath++.cpp
+++ b/libkram/vectormath/vectormath++.cpp
@@ -1188,12 +1188,12 @@ quatf quat_bezer_lerp(quatf q0, quatf b, quatf c, quatf q1, float t)
 void transpose_affine(float4x4& m)
 {
     // TODO: see other tranpsose not using shuffles and do that.
+    // TODO: use platform shuffles on Neon
     
     // avoid copy and one shuffle
     float4 tmp3, tmp2, tmp1, tmp0;
                    
-    // TOOD: use platform shuffles on Neon
-    //  this is using sse2neon
+    // using sse2neon to port this
     tmp0 = _mm_shuffle_ps(m[0], m[1], 0x44);
     tmp2 = _mm_shuffle_ps(m[0], m[1], 0xEE);
 
diff --git a/libkram/vectormath/vectormath++.h b/libkram/vectormath/vectormath++.h
index 47cab9f0..532adecd 100644
--- a/libkram/vectormath/vectormath++.h
+++ b/libkram/vectormath/vectormath++.h
@@ -138,7 +138,7 @@
 
 // fp comparisons gen a corresponding signed integer type
 #define SIMD_INT    1
-#define SIMD_LONG   1
+#define SIMD_LONG   0
 
 // don't need these yet, doing math, not string processing
 #define SIMD_CHAR   0
@@ -153,7 +153,7 @@
 // #define SIMD_HALF   (1 && SIMD_SHORT)
 #define SIMD_HALF   (1)
 #define SIMD_FLOAT  (1 && SIMD_INT)
-#define SIMD_DOUBLE (1 && SIMD_LONG)
+#define SIMD_DOUBLE (0 && SIMD_LONG)
 
 // Whether to support > 4 length vecs with some ops
 #define SIMD_FLOAT_EXT 0
@@ -321,14 +321,15 @@ SIMD_CALL type::column_t operator*(const type& x, const type::column_t& v) { ret
 
 //-----------------------------------
 
-#include <math.h> // for sqrt, sqrtf
+#include <inttypes.h> // for u/long
+#include <math.h>     // for sqrt, sqrtf
 
 #if SIMD_NEON
 // neon types and intrinsics, 16B
 #include <arm_neon.h>
 
-// This converts sse to neon intrinsics
-// only currently using this for transpose_affine (_mm_shuffle_ps)
+// This converts sse to neon intrinsics.
+// Only for transpose_affine (_mm_shuffle_ps)
 #include "sse2neon-arm64.h"
 
 #else

From ff9289ef036aa20155603ce80836963a1a7c0ec6 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 30 Sep 2024 09:31:15 -0700
Subject: [PATCH 753/901] kram - simd - turn back on SIMD_DOUBLE

---
 libkram/vectormath/double234.h      |  2 +-
 libkram/vectormath/float234.h       |  4 +++-
 libkram/vectormath/long234.h        |  5 +++++
 libkram/vectormath/sse2neon-arm64.h | 20 +++++++++++---------
 libkram/vectormath/sse2neon.h       | 20 +++++++++++---------
 libkram/vectormath/vectormath++.cpp |  7 ++++++-
 libkram/vectormath/vectormath++.h   |  5 +++--
 7 files changed, 40 insertions(+), 23 deletions(-)

diff --git a/libkram/vectormath/double234.h b/libkram/vectormath/double234.h
index dda70181..5f9e8349 100644
--- a/libkram/vectormath/double234.h
+++ b/libkram/vectormath/double234.h
@@ -254,7 +254,7 @@ SIMD_CALL double3 reduce_addv(double3 x) {
 // single ops in AVX/2
 // round to nearest | exc
 SIMD_CALL double2 round(double2 x) {
-    return _mm_round_pd(x, _MM_FROUND_NO_EXC); // TODO: _MM_FROUND_TO_NEAREST_INT |
+    return _mm_round_pd(x, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 SIMD_CALL double4 round(double4 vv) {
     return double4m(round(vv.lo), round(vv.hi));
diff --git a/libkram/vectormath/float234.h b/libkram/vectormath/float234.h
index cdc33a04..14c4043e 100644
--- a/libkram/vectormath/float234.h
+++ b/libkram/vectormath/float234.h
@@ -175,7 +175,7 @@ SIMD_CALL float3 reduce_addv(float3 x) {
 // SSE4.1
 SIMD_CALL float4 round(float4 vv) {
     // round to nearest | exc
-    return _mm_round_ps(vv, _MM_FROUND_NO_EXC); // TODO: _MM_FROUND_TO_NEAREST_INT
+    return _mm_round_ps(vv, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 SIMD_CALL float2 round(float2 x) {
     return vec4to2(round(vec2to4(x)));
@@ -204,6 +204,8 @@ SIMD_CALL float2 floor(float2 x) {
 #if SIMD_INT
 
 // bitselect
+// Hoping these casts float2 -> int2 don't truncate
+//  want this to map to _mm_cast calls
 SIMD_CALL float2 bitselect(float2 x, float2 y, int2 mask) {
     return (float2)bitselect((int2)x, (int2)y, mask);
 }
diff --git a/libkram/vectormath/long234.h b/libkram/vectormath/long234.h
index 50f4c288..49b925fa 100644
--- a/libkram/vectormath/long234.h
+++ b/libkram/vectormath/long234.h
@@ -7,7 +7,12 @@
 // This is not yet standalone.  vectormath++.h includes it.
 #if USE_SIMDLIB && SIMD_LONG
 
+#ifdef _WIN32
+// Windows breaks portable code.
 typedef int64_t long1;
+#else
+typedef long long1;
+#endif
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/libkram/vectormath/sse2neon-arm64.h b/libkram/vectormath/sse2neon-arm64.h
index f3f7fb20..f312c409 100644
--- a/libkram/vectormath/sse2neon-arm64.h
+++ b/libkram/vectormath/sse2neon-arm64.h
@@ -348,16 +348,16 @@ FORCE_INLINE void _sse2neon_smp_mb(void)
 #define _MM_SHUFFLE(fp3, fp2, fp1, fp0) \
     (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0)))
 
-#if __has_builtin(__builtin_shufflevector)
+//#if __has_builtin(__builtin_shufflevector)
 #define _sse2neon_shuffle(type, a, b, ...) \
     __builtin_shufflevector(a, b, __VA_ARGS__)
-#elif __has_builtin(__builtin_shuffle)
-#define _sse2neon_shuffle(type, a, b, ...) \
-    __extension__({                        \
-        type tmp = {__VA_ARGS__};          \
-        __builtin_shuffle(a, b, tmp);      \
-    })
-#endif
+//#elif __has_builtin(__builtin_shuffle)
+//#define _sse2neon_shuffle(type, a, b, ...) \
+//    __extension__({                        \
+//        type tmp = {__VA_ARGS__};          \
+//        __builtin_shuffle(a, b, tmp);      \
+//    })
+//#endif
 
 #ifdef _sse2neon_shuffle
 #define vshuffle_s16(a, b, ...) _sse2neon_shuffle(int16x4_t, a, b, __VA_ARGS__)
@@ -686,6 +686,7 @@ typedef struct {
     uint32_t res3;
 } fpcr_bitfield;
 
+/*
 // Takes the upper 64 bits of a and places it in the low end of the result
 // Takes the lower 64 bits of b and places it into the high end of the result.
 FORCE_INLINE __m128 _mm_shuffle_ps_1032(__m128 a, __m128 b)
@@ -781,7 +782,7 @@ FORCE_INLINE __m128 _mm_shuffle_ps_3202(__m128 a, __m128 b)
     float32_t a0 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 0);
     float32x2_t a22 =
         vdup_lane_f32(vget_high_f32(vreinterpretq_f32_m128(a)), 0);
-    float32x2_t a02 = vset_lane_f32(a0, a22, 1); /* TODO: use vzip ?*/
+    float32x2_t a02 = vset_lane_f32(a0, a22, 1); // TODO: use vzip ?
     float32x2_t b32 = vget_high_f32(vreinterpretq_f32_m128(b));
     return vreinterpretq_m128_f32(vcombine_f32(a02, b32));
 }
@@ -820,6 +821,7 @@ FORCE_INLINE __m128 _mm_shuffle_ps_2032(__m128 a, __m128 b)
     float32x2_t b20 = vset_lane_f32(b2, b00, 1);
     return vreinterpretq_m128_f32(vcombine_f32(a32, b20));
 }
+*/
 
 // For MSVC, we check only if it is ARM64, as every single ARM64 processor
 // supported by WoA has crypto extensions. If this changes in the future,
diff --git a/libkram/vectormath/sse2neon.h b/libkram/vectormath/sse2neon.h
index f3f7fb20..f312c409 100644
--- a/libkram/vectormath/sse2neon.h
+++ b/libkram/vectormath/sse2neon.h
@@ -348,16 +348,16 @@ FORCE_INLINE void _sse2neon_smp_mb(void)
 #define _MM_SHUFFLE(fp3, fp2, fp1, fp0) \
     (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0)))
 
-#if __has_builtin(__builtin_shufflevector)
+//#if __has_builtin(__builtin_shufflevector)
 #define _sse2neon_shuffle(type, a, b, ...) \
     __builtin_shufflevector(a, b, __VA_ARGS__)
-#elif __has_builtin(__builtin_shuffle)
-#define _sse2neon_shuffle(type, a, b, ...) \
-    __extension__({                        \
-        type tmp = {__VA_ARGS__};          \
-        __builtin_shuffle(a, b, tmp);      \
-    })
-#endif
+//#elif __has_builtin(__builtin_shuffle)
+//#define _sse2neon_shuffle(type, a, b, ...) \
+//    __extension__({                        \
+//        type tmp = {__VA_ARGS__};          \
+//        __builtin_shuffle(a, b, tmp);      \
+//    })
+//#endif
 
 #ifdef _sse2neon_shuffle
 #define vshuffle_s16(a, b, ...) _sse2neon_shuffle(int16x4_t, a, b, __VA_ARGS__)
@@ -686,6 +686,7 @@ typedef struct {
     uint32_t res3;
 } fpcr_bitfield;
 
+/*
 // Takes the upper 64 bits of a and places it in the low end of the result
 // Takes the lower 64 bits of b and places it into the high end of the result.
 FORCE_INLINE __m128 _mm_shuffle_ps_1032(__m128 a, __m128 b)
@@ -781,7 +782,7 @@ FORCE_INLINE __m128 _mm_shuffle_ps_3202(__m128 a, __m128 b)
     float32_t a0 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 0);
     float32x2_t a22 =
         vdup_lane_f32(vget_high_f32(vreinterpretq_f32_m128(a)), 0);
-    float32x2_t a02 = vset_lane_f32(a0, a22, 1); /* TODO: use vzip ?*/
+    float32x2_t a02 = vset_lane_f32(a0, a22, 1); // TODO: use vzip ?
     float32x2_t b32 = vget_high_f32(vreinterpretq_f32_m128(b));
     return vreinterpretq_m128_f32(vcombine_f32(a02, b32));
 }
@@ -820,6 +821,7 @@ FORCE_INLINE __m128 _mm_shuffle_ps_2032(__m128 a, __m128 b)
     float32x2_t b20 = vset_lane_f32(b2, b00, 1);
     return vreinterpretq_m128_f32(vcombine_f32(a32, b20));
 }
+*/
 
 // For MSVC, we check only if it is ARM64, as every single ARM64 processor
 // supported by WoA has crypto extensions. If this changes in the future,
diff --git a/libkram/vectormath/vectormath++.cpp b/libkram/vectormath/vectormath++.cpp
index a880641e..93ae3f5b 100644
--- a/libkram/vectormath/vectormath++.cpp
+++ b/libkram/vectormath/vectormath++.cpp
@@ -366,8 +366,13 @@ float3x3 transpose(const float3x3& x) {
 
 float4x4 transpose(const float4x4& x) {
     // NOTE: also _MM_TRANSPOSE4_PS using shuffles
-    // but old Neon didn't really have shuffle
+    // but old Neon didn't really have shuffle, but
+    // and sse2neon is using combine instead of shuffle.
 
+//    float4x4 xt(x);
+//    _MM_TRANSPOSE4_PS(xt[0], xt[1], xt[2], xt[3]);
+//    return xt;
+    
 #if SIMD_SSE
     float4 t0 = _mm_unpacklo_ps(x[0],x[2]);
     float4 t1 = _mm_unpackhi_ps(x[0],x[2]);
diff --git a/libkram/vectormath/vectormath++.h b/libkram/vectormath/vectormath++.h
index 532adecd..70a3e723 100644
--- a/libkram/vectormath/vectormath++.h
+++ b/libkram/vectormath/vectormath++.h
@@ -138,7 +138,7 @@
 
 // fp comparisons gen a corresponding signed integer type
 #define SIMD_INT    1
-#define SIMD_LONG   0
+#define SIMD_LONG   1
 
 // don't need these yet, doing math, not string processing
 #define SIMD_CHAR   0
@@ -153,7 +153,7 @@
 // #define SIMD_HALF   (1 && SIMD_SHORT)
 #define SIMD_HALF   (1)
 #define SIMD_FLOAT  (1 && SIMD_INT)
-#define SIMD_DOUBLE (0 && SIMD_LONG)
+#define SIMD_DOUBLE (1 && SIMD_LONG)
 
 // Whether to support > 4 length vecs with some ops
 #define SIMD_FLOAT_EXT 0
@@ -330,6 +330,7 @@ SIMD_CALL type::column_t operator*(const type& x, const type::column_t& v) { ret
 
 // This converts sse to neon intrinsics.
 // Only for transpose_affine (_mm_shuffle_ps)
+// Good reference for SSE <-> Neon mapping .
 #include "sse2neon-arm64.h"
 
 #else

From 7b3c7273eb531da002e86636dd0f0ec3a3ab0135 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 30 Sep 2024 09:31:52 -0700
Subject: [PATCH 754/901] kram - simd - turn on SIMD_DOUBLE

---
 libkram/vectormath/long234.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libkram/vectormath/long234.h b/libkram/vectormath/long234.h
index 49b925fa..42c7a05e 100644
--- a/libkram/vectormath/long234.h
+++ b/libkram/vectormath/long234.h
@@ -9,7 +9,7 @@
 
 #ifdef _WIN32
 // Windows breaks portable code.
-typedef int64_t long1;
+typedef long long long1;
 #else
 typedef long long1;
 #endif

From 2fe75ff2c1229a27c117c973f814ece98b071d5a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 30 Sep 2024 09:51:41 -0700
Subject: [PATCH 755/901] kram - simd - fix warnings

---
 libkram/kram/TaskSystem.cpp         |  4 ++++
 libkram/vectormath/vectormath++.cpp | 24 ++++++++++++++++++++----
 2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index ec919485..f46cd407 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -167,6 +167,8 @@ static const CoreInfo& GetCoreInfo()
                 }
                 break;
             }
+            default:
+                break;
         }
         
         if (isHyperthreaded)
@@ -203,6 +205,8 @@ static const CoreInfo& GetCoreInfo()
                 logicalCoreCount += logicalCores;
                 break;
             }
+            default:
+                break;
         }
         byteOffset += sizeof(ProcInfo);
         ptr++;
diff --git a/libkram/vectormath/vectormath++.cpp b/libkram/vectormath/vectormath++.cpp
index 93ae3f5b..2eee46a8 100644
--- a/libkram/vectormath/vectormath++.cpp
+++ b/libkram/vectormath/vectormath++.cpp
@@ -419,6 +419,13 @@ float3x3 inverse(const float3x3& x) {
     return r;
 }
 
+// std::swap has warning on aligned data
+inline void swap(float4& a, float4& b) {
+    float4 temp = a;
+    a = b;
+    b = temp;
+}
+
 float4x4 inverse(const float4x4& x) {
     // This is a full gje inverse
     
@@ -441,8 +448,8 @@ float4x4 inverse(const float4x4& x) {
         }
         
         // Swap rows i1 and j in a and b to put pivot on diagonal
-        std::swap(a[i1], a[j]);
-        std::swap(b[i1], b[j]);
+        SIMD_NAMESPACE::swap(a[i1], a[j]);
+        SIMD_NAMESPACE::swap(b[i1], b[j]);
     
         // Scale row j to have a unit diagonal
         float s = a[j][j];
@@ -1248,6 +1255,7 @@ float4x4 inverse_tru(const float4x4& mtx)
         inverse[3] = inverse * (-mtx[3]);
         
         success = true;
+        macroUnusedVar(success);
     }
     
     return inverse;
@@ -1302,6 +1310,7 @@ float4x4 inverse_trs(const float4x4& mtx)
         inverse[3] = inverse * (-mtx[3]);
         
         success = true;
+    macroUnusedVar(success);
     //}
     
     return inverse;
@@ -1604,6 +1613,13 @@ double3x3 inverse(const double3x3& x) {
     return r;
 }
 
+// std::swap has warning on aligned data
+inline void swap(double4& a, double4& b) {
+    double4 temp = a;
+    a = b;
+    b = temp;
+}
+
 double4x4 inverse(const double4x4& x) {
     // This is a full gje inverse
     
@@ -1626,8 +1642,8 @@ double4x4 inverse(const double4x4& x) {
         }
         
         // Swap rows i1 and j in a and b to put pivot on diagonal
-        std::swap(a[i1], a[j]);
-        std::swap(b[i1], b[j]);
+        SIMD_NAMESPACE::swap(a[i1], a[j]);
+        SIMD_NAMESPACE::swap(b[i1], b[j]);
     
         // Scale row j to have a unit diagonal
         double s = a[j][j];

From 25433dc5f1093cb5ab78cc80fb26d575a33cd3cd Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 30 Sep 2024 19:47:37 -0700
Subject: [PATCH 756/901] kram - simd - add AVX2 to double impl

---
 libkram/vectormath/double234.h      | 105 +++++++++++++++++++---------
 libkram/vectormath/vectormath++.cpp |   5 ++
 2 files changed, 78 insertions(+), 32 deletions(-)

diff --git a/libkram/vectormath/double234.h b/libkram/vectormath/double234.h
index 5f9e8349..abeed633 100644
--- a/libkram/vectormath/double234.h
+++ b/libkram/vectormath/double234.h
@@ -169,8 +169,6 @@ SIMD_CALL double4 floor(double4 vv) {
 
 #if SIMD_SSE
 
-// TODO: double3/4 impl would benefit from AVX2 calls (32B)
-
 // x64 doesn't seem to have a simd op for min/max reduce
 SIMD_CALL double reduce_min(double2 x) {
     return fmin(x.x,x.y);
@@ -192,23 +190,19 @@ SIMD_CALL double reduce_max(double4 x) {
 SIMD_CALL double2 bitselect_forminmax(double2 x, double2 y, long2 mask) {
     return (double2)(((long2)x & ~mask) | ((long2)y & mask));
 }
+// may only want to use this on AVX2
+SIMD_CALL double4 bitselect_forminmax(double4 x, double4 y, long4 mask) {
+    return (double4)(((long4)x & ~mask) | ((long4)y & mask));
+}
 #endif // SIMD_LONG
 
 // precise returns x on Nan
 SIMD_CALL double2 min(double2 x, double2 y) {
     return bitselect_forminmax(_mm_min_pd(x, y), x, y != y);
 }
-SIMD_CALL double4 min(double4 x, double4 y) {
-    return double4m(min(x.lo,y.lo), min(x.hi,y.hi));
-}
-
 SIMD_CALL double2 max(double2 x, double2 y) {
     return bitselect_forminmax(_mm_max_pd(x, y), x, y != y);
 }
-SIMD_CALL double4 max(double4 x, double4 y) {
-    return double4m(max(x.lo,y.lo), max(x.hi,y.hi));
-}
-
 SIMD_CALL double2 muladd(double2 x, double2 y, double2 t) {
 #ifdef __FMA__
     return _mm_fmadd_pd(x,y,t);
@@ -217,28 +211,88 @@ SIMD_CALL double2 muladd(double2 x, double2 y, double2 t) {
     return x * y + t;
 #endif
 }
+
+SIMD_CALL double2 sqrt(double2 x) {
+    return _mm_sqrt_pd(x);
+}
+SIMD_CALL double2 reduce_addv(double2 x) {
+    x = _mm_hadd_pd(x, x);
+    return x.x;
+}
+SIMD_CALL double2 round(double2 x) {
+    return _mm_round_pd(x, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+SIMD_CALL double2 ceil(double2 x) {
+    return _mm_ceil_pd(x);
+}
+SIMD_CALL double2 floor(double2 x) {
+    return _mm_floor_pd(x);
+}
+
+// now avx/avx2 can do 4 doubles in one call
+#if defined(__AVX2__)
+
+SIMD_CALL double4 min(double4 x, double4 y) {
+    return bitselect_forminmax(_mm256_min_pd(x, y), x, y != y);
+}
+SIMD_CALL double4 max(double4 x, double4 y) {
+    return bitselect_forminmax(_mm256_max_pd(x, y), x, y != y);
+}
 SIMD_CALL double4 muladd(double4 x, double4 y, double4 t) {
 #ifdef __FMA__
-    return double4m(muladd(x.lo,y.lo,t.lo),
-                    muladd(x.hi,y.hi,t.hi));
+    return _mm256_fmadd_pd(x,y,t);
 #else
     // fallback with not same characteristics
     return x * y + t;
 #endif
 }
 
+SIMD_CALL double4 sqrt(double4 x) {
+    return _mm256_sqrt_pd(x);
+}
 
-SIMD_CALL double2 sqrt(double2 x) {
-    return _mm_sqrt_pd(x);
+SIMD_CALL double4 reduce_addv(double4 x) {
+    x = _mm256_hadd_ps(x, x); // xy = x+y,z+w
+    x = _mm256_hadd_ps(x, x); // x  = x+y
+    return x.x; // repeat x to all values
+}
+SIMD_CALL double3 reduce_addv(double3 x) {
+    return reduce_addv(zeroext(x)).x;
 }
+
+SIMD_CALL double4 round(double4 vv) {
+    return _mm256_round_pd(vv, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+SIMD_CALL double4 ceil(double4 vv) {
+    return _mm256_ceil_pd(vv);
+}
+SIMD_CALL double4 floor(double4 vv) {
+    return _mm256_floor_pd(vv);
+}
+
+#else
+
+// SSE4 ops as a fallback.  These have to make 2+ calls.
+SIMD_CALL double4 min(double4 x, double4 y) {
+    return double4m(min(x.lo,y.lo), min(x.hi,y.hi));
+}
+SIMD_CALL double4 max(double4 x, double4 y) {
+    return double4m(max(x.lo,y.lo), max(x.hi,y.hi));
+}
+SIMD_CALL double4 muladd(double4 x, double4 y, double4 t) {
+#ifdef __FMA__
+    return double4m(muladd(x.lo,y.lo,t.lo),
+                    muladd(x.hi,y.hi,t.hi));
+#else
+    // fallback with not same characteristics
+    return x * y + t;
+#endif
+}
+
 SIMD_CALL double4 sqrt(double4 x) {
     return double4m(sqrt(x.lo), sqrt(x.hi));
 }
 
-SIMD_CALL double2 reduce_addv(double2 x) {
-    x = _mm_hadd_pd(x, x);
-    return x.x;
-}
 SIMD_CALL double4 reduce_addv(double4 x) {
     // 4:1 reduction
     x.lo = _mm_hadd_pd(x.lo, x.lo);
@@ -250,30 +304,17 @@ SIMD_CALL double3 reduce_addv(double3 x) {
     return reduce_addv(zeroext(x)).x;
 }
 
-// SSE4.1
-// single ops in AVX/2
-// round to nearest | exc
-SIMD_CALL double2 round(double2 x) {
-    return _mm_round_pd(x, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
-}
 SIMD_CALL double4 round(double4 vv) {
     return double4m(round(vv.lo), round(vv.hi));
 }
-
-SIMD_CALL double2 ceil(double2 x) {
-    return _mm_ceil_pd(x);
-}
 SIMD_CALL double4 ceil(double4 vv) {
     return double4m(ceil(vv.lo),ceil(vv.hi));
 }
-
-SIMD_CALL double2 floor(double2 x) {
-    return _mm_floor_pd(x);
-}
 SIMD_CALL double4 floor(double4 vv) {
     return double4m(floor(vv.lo),floor(vv.hi));
 }
 
+#endif
 #endif // SIMD_SSE
 
 
diff --git a/libkram/vectormath/vectormath++.cpp b/libkram/vectormath/vectormath++.cpp
index 2eee46a8..241316fa 100644
--- a/libkram/vectormath/vectormath++.cpp
+++ b/libkram/vectormath/vectormath++.cpp
@@ -96,6 +96,11 @@
 //
 // TODO: add optimized vec2 ops on Neon, but may not be worth kernal mods
 
+// intrinsic tables
+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html
+
+// older but good talk on simd
+// https://people.inf.ethz.ch/markusp/teaching/263-2300-ETH-spring14/slides/11-simd.pdf
 
 //-----------------
 

From 112518b8f9c6b9b85ef4b3e8f28cbef65e789bbe Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 30 Sep 2024 22:11:18 -0700
Subject: [PATCH 757/901] kram - simd - split up float/double234.cpp

---
 build2/kram.xcodeproj/project.pbxproj |   12 +
 libkram/vectormath/double234.cpp      |  547 +++++++++
 libkram/vectormath/float234.cpp       |  953 +++++++++++++++
 libkram/vectormath/long234.h          |    4 -
 libkram/vectormath/vectormath++.cpp   | 1591 +------------------------
 libkram/vectormath/vectormath++.h     |   39 +
 6 files changed, 1579 insertions(+), 1567 deletions(-)
 create mode 100644 libkram/vectormath/double234.cpp
 create mode 100644 libkram/vectormath/float234.cpp

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index 8d2762e1..3b059b29 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -398,6 +398,10 @@
 		70B563AA2C857B360089A64F /* KramZipStream.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B563A62C857B360089A64F /* KramZipStream.h */; };
 		70B686E32CAA3409007ACA58 /* long234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B686E22CAA3405007ACA58 /* long234.h */; };
 		70B686E42CAA3409007ACA58 /* long234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B686E22CAA3405007ACA58 /* long234.h */; };
+		70B686E62CABB415007ACA58 /* float234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B686E52CABB40F007ACA58 /* float234.cpp */; };
+		70B686E72CABB415007ACA58 /* float234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B686E52CABB40F007ACA58 /* float234.cpp */; };
+		70B686E92CABB420007ACA58 /* double234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B686E82CABB41A007ACA58 /* double234.cpp */; };
+		70B686EA2CABB420007ACA58 /* double234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B686E82CABB41A007ACA58 /* double234.cpp */; };
 		70CDB65027A1382700A546C1 /* KramDDSHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 70CDB64E27A1382600A546C1 /* KramDDSHelper.h */; };
 		70CDB65127A1382700A546C1 /* KramDDSHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 70CDB64E27A1382600A546C1 /* KramDDSHelper.h */; };
 		70CDB65227A1382700A546C1 /* KramDDSHelper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70CDB64F27A1382600A546C1 /* KramDDSHelper.cpp */; };
@@ -767,6 +771,8 @@
 		70B563A52C857B360089A64F /* KramZipStream.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = KramZipStream.cpp; sourceTree = "<group>"; };
 		70B563A62C857B360089A64F /* KramZipStream.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = KramZipStream.h; sourceTree = "<group>"; };
 		70B686E22CAA3405007ACA58 /* long234.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = long234.h; sourceTree = "<group>"; };
+		70B686E52CABB40F007ACA58 /* float234.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = float234.cpp; sourceTree = "<group>"; };
+		70B686E82CABB41A007ACA58 /* double234.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = double234.cpp; sourceTree = "<group>"; };
 		70CDB64E27A1382600A546C1 /* KramDDSHelper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = KramDDSHelper.h; sourceTree = "<group>"; };
 		70CDB64F27A1382600A546C1 /* KramDDSHelper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = KramDDSHelper.cpp; sourceTree = "<group>"; };
 		70D222D62AC800AC00B9EA23 /* json11.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = json11.h; sourceTree = "<group>"; };
@@ -816,6 +822,8 @@
 				7013AD4E2CAA0818007E5554 /* float234.h */,
 				7013AD572CAA0934007E5554 /* double234.h */,
 				705F7F032C9FF42700E377B7 /* vectormath++.cpp */,
+				70B686E52CABB40F007ACA58 /* float234.cpp */,
+				70B686E82CABB41A007ACA58 /* double234.cpp */,
 			);
 			path = vectormath;
 			sourceTree = "<group>";
@@ -1747,6 +1755,7 @@
 				70D222E42AD22BED00B9EA23 /* BlockedLinearAllocator.cpp in Sources */,
 				706EEF8126D1595D001C950E /* EtcDifferentialTrys.cpp in Sources */,
 				706EEF8226D1595D001C950E /* EtcMath.cpp in Sources */,
+				70B686E62CABB415007ACA58 /* float234.cpp in Sources */,
 				706EEF8326D1595D001C950E /* EtcBlock4x4Encoding_RGBA8.cpp in Sources */,
 				706EEF8426D1595D001C950E /* EtcBlock4x4Encoding_RG11.cpp in Sources */,
 				706EEF8526D1595D001C950E /* EtcBlock4x4Encoding_RGB8A1.cpp in Sources */,
@@ -1789,6 +1798,7 @@
 				706EEFB126D1595D001C950E /* KramImage.cpp in Sources */,
 				706EEFB226D1595D001C950E /* KramLog.cpp in Sources */,
 				706EEFB326D1595D001C950E /* KramSDFMipper.cpp in Sources */,
+				70B686E92CABB420007ACA58 /* double234.cpp in Sources */,
 				706EEFB426D1595D001C950E /* KramMmapHelper.cpp in Sources */,
 				709B8D3928D7BCAD0081BD1F /* format.cpp in Sources */,
 				70D222DE2AD2132300B9EA23 /* ImmutableString.cpp in Sources */,
@@ -1847,6 +1857,8 @@
 				706EF19A26D166C5001C950E /* EtcDifferentialTrys.cpp in Sources */,
 				706EF19B26D166C5001C950E /* EtcMath.cpp in Sources */,
 				706EF19C26D166C5001C950E /* EtcBlock4x4Encoding_RGBA8.cpp in Sources */,
+				70B686EA2CABB420007ACA58 /* double234.cpp in Sources */,
+				70B686E72CABB415007ACA58 /* float234.cpp in Sources */,
 				706EF19D26D166C5001C950E /* EtcBlock4x4Encoding_RG11.cpp in Sources */,
 				706EF19E26D166C5001C950E /* EtcBlock4x4Encoding_RGB8A1.cpp in Sources */,
 				706EF19F26D166C5001C950E /* EtcIndividualTrys.cpp in Sources */,
diff --git a/libkram/vectormath/double234.cpp b/libkram/vectormath/double234.cpp
new file mode 100644
index 00000000..d52358cc
--- /dev/null
+++ b/libkram/vectormath/double234.cpp
@@ -0,0 +1,547 @@
+// kram - Copyright 2020-2024 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
+#include "vectormath++.h"
+
+// This has to include this, not double234.h
+#if USE_SIMDLIB && USE_DOUBLE
+
+#if SIMD_ACCELERATE_MATH
+// TODO: reduce this header to just calls use (f.e. geometry, etc)
+#include <simd/simd.h>
+#endif // SIMD_ACCELERATE_MATH
+
+namespace SIMD_NAMESPACE {
+
+#if SIMD_ACCELERATE_MATH
+// These will get inlined here from the template
+macroVectorRepeatFnImpl(double, log)
+macroVectorRepeatFnImpl(double, exp)
+
+macroVectorRepeatFnImpl(double, sin)
+macroVectorRepeatFnImpl(double, cos)
+macroVectorRepeatFnImpl(double, tan)
+#endif // SIMD_ACCELERATE_MATH
+
+#if SIMD_CMATH_MATH
+macroVectorRepeatFnImpl(double, log, ::log)
+macroVectorRepeatFnImpl(double, exp, ::exp)
+
+macroVectorRepeatFnImpl(double, sin, ::sin)
+macroVectorRepeatFnImpl(double, cos, ::cos)
+macroVectorRepeatFnImpl(double, tan, ::tan)
+#endif // SIMD_CMATH_MATH
+
+//---------------------------
+
+static const double2x2 kdouble2x2_zero = {}; // what is this value 0, or default ctor
+static const double3x3 kdouble3x3_zero = {};
+static const double3x4 kdouble3x4_zero = {};
+static const double4x4 kdouble4x4_zero = {};
+
+static const double2x2 kdouble2x2_identity = diagonal_matrix((double2)1);
+static const double3x3 kdouble3x3_identity = diagonal_matrix((double3)1);
+static const double3x4 kdouble3x4_identity = diagonal_matrix3x4((double3)1);
+static const double4x4 kdouble4x4_identity = diagonal_matrix((double4)1);
+
+//---------------------------
+
+const double2x2& double2x2::zero() { return kdouble2x2_zero; }
+const double2x2& double2x2::identity() { return kdouble2x2_identity; }
+
+const double3x3& double3x3::zero() { return kdouble3x3_zero; }
+const double3x3& double3x3::identity() { return kdouble3x3_identity; }
+
+const double3x4& double3x4::zero() { return kdouble3x4_zero; }
+const double3x4& double3x4::identity() { return kdouble3x4_identity; }
+
+const double4x4& double4x4::zero() { return kdouble4x4_zero; }
+const double4x4& double4x4::identity() { return kdouble4x4_identity; }
+
+//---------------------------
+
+// These should not be used often.  So can stay buried
+double2x2::double2x2(double2 diag)
+: double2x2s((const double2x2s&)diagonal_matrix(diag)) { }
+double3x3::double3x3(double3 diag)
+: double3x3s((const double3x3s&)diagonal_matrix(diag)) { }
+double3x4::double3x4(double3 diag)
+: double3x4s((const double3x4s&)diagonal_matrix3x4(diag)) { }
+double4x4::double4x4(double4 diag)
+: double4x4s((const double4x4s&)diagonal_matrix(diag)) { }
+
+//---------------------------
+
+double2x2 diagonal_matrix(double2 x) {
+    double4 xx = zeroext(x);
+    return double2x2(xx.xw, xx.wy);
+}
+double3x3 diagonal_matrix(double3 x) {
+    double4 xx = zeroext(x);
+    return double3x3(xx.xww, xx.wyw, xx.wwz);
+}
+double3x4 diagonal_matrix3x4(double3 x) {
+    double4 xx = zeroext(x);
+    return double3x4(xx.xwww, xx.wyww, xx.wwzw);
+}
+double4x4 diagonal_matrix(double4 x) {
+    double4 xx = x; xx.w = 0.0f;
+    double4 ww = xx; ww.z = x.w;
+    return double4x4(xx.xwww, xx.wyww, xx.wwzw, ww.wwwz);
+}
+
+//---------------------------
+
+
+
+// textbook transpose
+double2x2 transpose(const double2x2& x) {
+    
+    
+    // std::swap would seem faster here?
+#if SIMD_SSE
+#ifdef __AVX2__
+    double4 x0, x1;
+    x0.xy = x[0];
+    x1.xy = x[1];
+    
+    double4 r01 = _mm256_unpacklo_pd(x0, x1);
+    return (double2x2){r01.lo, r01.hi};
+#else
+    double2 x0, x1;
+    x0.xy = x[0];
+    x1.xy = x[1];
+    
+    // super slow transpose
+    double2 r0 = { x0[0], x1[0] };
+    double2 r1 = { x0[1], x1[1] };
+    return (double2x2){r0, r1};
+#endif
+#endif // SIMD_SSE
+    
+#if SIMD_NEON
+    double2 r0 = vzip1q_f64(x[0], x[1]);
+    double2 r1 = vzip2q_f64(x[0], x[1]);
+    return (double2x2){r0, r1};
+#endif // SIMD_NEON
+}
+
+double3x3 transpose(const double3x3& x) {
+    double4 x0, x1, x2;
+    x0.xyz = x[0];
+    x1.xyz = x[1];
+    x2.xyz = x[2];
+    
+#if SIMD_SSE
+#if defined( __AVX2__) && 0
+    double4 t0 = _mm256_unpacklo_pd(x0, x1);
+    double4 t1 = _mm256_unpackhi_pd(x0, x1);
+    
+    double4 r0 = t0; r0.hi = x2.lo;
+    // TODO: fix shuffle,  222 outside 15 range
+    // looks like op was changed to 4-bit bitmask
+    // lookup shuffle 4 values, and convert this
+    //
+    // 0xde = _MM_SHUFFLE(x,y,z,w)
+    // #define _MM_SHUFFLE(fp3, fp2, fp1, fp0) \
+        (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0)))
+    // fp0 to fp3 = 2, 3, 1, 3
+    
+    double4 r1 = _mm256_shuffle_pd(t0, x2, 0xde);
+    double4 r2 = x2; r2.lo = t1.lo;
+#else
+    // super slow transpose
+    double3 r0 = { x0[0], x1[0], x2[0] };
+    double3 r1 = { x0[1], x1[1], x2[1] };
+    double3 r2 = { x0[2], x1[2], x2[2] };
+#endif
+#endif // SIMD_SSE
+    
+#if SIMD_NEON
+    double2 padding = { 0 };
+    double4 r0,r1,r2;
+    r0.lo = vzip1q_f64(x0.lo,x1.lo);
+    r1.lo = vzip2q_f64(x0.lo,x1.lo);
+    r2.lo = vzip1q_f64(x0.hi,x1.hi);
+    r0.hi = vzip1q_f64(x2.lo,padding);
+    r1.hi = vzip2q_f64(x2.lo,padding);
+    r2.hi = vzip1q_f64(x2.hi,padding);
+#endif // SIMD_NEON
+    return (double3x3){r0.xyz, r1.xyz, r2.xyz};
+}
+
+double4x4 transpose(const double4x4& x) {
+    // NOTE: also _MM_TRANSPOSE4_PS using shuffles
+    // but old Neon didn't really have shuffle.
+
+#if SIMD_SSE
+#ifdef __AVX2__
+    
+// using shuffles + permute
+//    double4 tmp0, tmp1, tmp2, tmp3;
+//    tmp0 = _mm256_shuffle_pd(row0, row1, 0x0);
+//    tmp2 = _mm256_shuffle_pd(row0, row1, 0xF);
+//    tmp1 = _mm256_shuffle_pd(row2, row3, 0x0);
+//    tmp3 = _mm256_shuffle_pd(row2, row3, 0xF);
+//
+//    double4 x0, x1, x2, x3;
+//    r0 = _mm256_permute2f128_pd(tmp0, tmp1, 0x20);
+//    r1 = _mm256_permute2f128_pd(tmp2, tmp3, 0x20);
+//    r2 = _mm256_permute2f128_pd(tmp0, tmp1, 0x31);
+//    r3 = _mm256_permute2f128_pd(tmp2, tmp3, 0x31);
+    
+// or unpack
+    
+    double4 t0 = _mm256_unpacklo_pd(x[0],x[2]);
+    double4 t1 = _mm256_unpackhi_pd(x[0],x[2]);
+    double4 t2 = _mm256_unpacklo_pd(x[1],x[3]);
+    double4 t3 = _mm256_unpackhi_pd(x[1],x[3]);
+
+    double4 r0 = _mm256_unpacklo_pd(t0,t2);
+    double4 r1 = _mm256_unpackhi_pd(t0,t2);
+    double4 r2 = _mm256_unpacklo_pd(t1,t3);
+    double4 r3 = _mm256_unpackhi_pd(t1,t3);
+    
+#else
+    // super slow transpose
+    double4 x0, x1, x2, x3;
+    x0 = x[0];
+    x1 = x[1];
+    x2 = x[2];
+    x3 = x[2];
+    
+    // TODO: avx2 probably has double shuffle
+    double4 r0 = { x0[0], x1[0], x2[0], x3[0] };
+    double4 r1 = { x0[1], x1[1], x2[1], x3[1] };
+    double4 r2 = { x0[2], x1[2], x2[2], x3[2] };
+    double4 r3 = { x0[3], x1[3], x2[3], x3[3] };
+#endif
+#endif // SIMD_SSE
+    
+#if  SIMD_NEON
+    double4 r0,r1,r2,r3;
+    r0.lo = vzip1q_f64(x[0].lo,x[1].lo);
+    r1.lo = vzip2q_f64(x[0].lo,x[1].lo);
+    r2.lo = vzip1q_f64(x[0].hi,x[1].hi);
+    r3.lo = vzip2q_f64(x[0].hi,x[1].hi);
+    r0.hi = vzip1q_f64(x[2].lo,x[3].lo);
+    r1.hi = vzip2q_f64(x[2].lo,x[3].lo);
+    r2.hi = vzip1q_f64(x[2].hi,x[3].hi);
+    r3.hi = vzip2q_f64(x[2].hi,x[3].hi);
+#endif
+    return (double4x4){r0,r1,r2,r3};
+}
+
+// inverse
+double2x2 inverse(const double2x2& x) {
+    double invDet = 1.0f / determinant(x);
+    if (invDet == 0.0f) return kdouble2x2_zero;
+    
+    double2x2 r = transpose(x);
+    r[0] *= invDet;
+    r[1] *= invDet;
+    return r;
+}
+
+double3x3 inverse(const double3x3& x) {
+    double invDet = 1.0f / determinant(x);
+    if (invDet == 0.0f) return kdouble3x3_zero;
+    
+    double3x3 r;
+    
+    // this forms the adjoint
+    r[0] = cross(x[1], x[2]) * invDet;
+    r[1] = cross(x[2], x[0]) * invDet;
+    r[2] = cross(x[0], x[1]) * invDet;
+    return r;
+}
+
+// std::swap has warning on aligned data
+inline void swap(double4& a, double4& b) {
+    double4 temp = a;
+    a = b;
+    b = temp;
+}
+
+double4x4 inverse(const double4x4& x) {
+    // This is a full gje inverse
+    
+    double4x4 a(x), b(kdouble4x4_identity);
+    bool inversionSucceeded = true;
+    
+    // As a evolves from original mat into identity -
+    // b evolves from identity into inverse(a)
+    int cols = double4x4::col;
+    int rows = double4x4::row;
+    
+    // Loop over cols of a from left to right, eliminating above and below diag
+    for (int j=0; j<rows; j++) {
+        // Find largest pivot in column j among rows j..2
+        int i1 = j;            // Row with largest pivot candidate
+        for (int i=j+1; i<cols; i++) {
+            if ( fabs(a[i][j]) > fabs(a[i1][j]) ) {
+                i1 = i;
+            }
+        }
+        
+        // Swap rows i1 and j in a and b to put pivot on diagonal
+        SIMD_NAMESPACE::swap(a[i1], a[j]);
+        SIMD_NAMESPACE::swap(b[i1], b[j]);
+    
+        // Scale row j to have a unit diagonal
+        double s = a[j][j];
+        if ( s == 0.0 ) {
+            inversionSucceeded = false;
+            break;
+        }
+        
+        s = 1.0/s;
+        b[j] *= s;
+        a[j] *= s;
+    
+        // Eliminate off-diagonal elems in col j of a, doing identical ops to b
+        for (int i=0; i<cols; i++ ) {
+            if (i != j) {
+                s = a[i][j];
+                b[i] -= b[j] * s;
+                a[i] -= a[j] * s;
+            }
+        }
+    }
+    
+    if (!inversionSucceeded) {
+        b = kdouble4x4_zero;
+    }
+    
+    return b;
+}
+
+
+// determinant
+// internal only ops
+// TODO: could just be macros
+inline double3 rotate1(double3 x) { return x.yzx; }
+inline double3 rotate2(double3 x) { return x.zxy; }
+inline double4 rotate1(double4 x) { return x.yzwx; }
+inline double4 rotate2(double4 x) { return x.zwxy; }
+inline double4 rotate3(double4 x) { return x.wxyz; }
+
+double determinant(const double2x2& x) {
+    return cross(x[0], x[1]);
+}
+
+double determinant(const double3x3& x) {
+    return reduce_add(
+            x[0]*(rotate1(x[1])*rotate2(x[2]) - rotate2(x[1])*rotate1(x[2])));
+}
+
+double determinant(const double4x4& x) {
+    double4 codet = x[0]*(rotate1(x[1])*(rotate2(x[2])*rotate3(x[3])-rotate3(x[2])*rotate2(x[3])) +
+      rotate2(x[1])*(rotate3(x[2])*rotate1(x[3])-rotate1(x[2])*rotate3(x[3])) +
+      rotate3(x[1])*(rotate1(x[2])*rotate2(x[3])-rotate2(x[2])*rotate1(x[3])));
+    return reduce_add(codet.even - codet.odd);
+}
+
+// trace
+double trace(const double2x2& x) {
+    return x[0].x + x[1].y;
+}
+
+double trace(const double3x3& x) {
+    return x[0].x + x[1].y + x[2].z;
+}
+
+double trace(const double4x4& x) {
+    return x[0].x + x[1].y + x[2].z + x[3].w;
+}
+
+// TODO: may want pre-transform on double3x4 since it's transposed
+// 3 x m3x4 should = 3 element vec
+//
+// simd premul transform on left does a super expensive transpose to avoid dot
+// don't use this, should just dotproducts?
+//static   half2 mul(  half2 x,   half2x2 y) { return mul(transpose(y), x); }
+//
+//
+// Here's how to multiply matrices, since default ops won't do this.
+// be careful with operator* built-in.  Will do column by column mul won't it?
+// Maybe that's why *= is missing on matrices.
+//
+// This is taking each scalar of y[0], hopfully this extends and stays in vec op
+
+// premul-transform has to do dots
+double2 mul(double2 y, const double2x2& x) {
+    double2 r;
+    r.x = dot(y, x[0]);
+    r.y = dot(y, x[1]);
+    return r;
+}
+
+double3 mul(double3 y, const double3x3& x) {
+    double3 r;
+    r.x = dot(y, x[0]);
+    r.y = dot(y, x[1]);
+    r.z = dot(y, x[2]);
+    return r;
+}
+
+double4 mul(double4 y, const double4x4& x) {
+    double4 r;
+    r.x = dot(y, x[0]);
+    r.y = dot(y, x[1]);
+    r.z = dot(y, x[2]);
+    r.w = dot(y, x[3]);
+    return r;
+}
+
+
+// post-transform at least does a mul madd
+double2 mul(const double2x2& x, double2 y) {
+    double2 r = x[0] * y[0]; // no mul(v,v)
+    r = muladd( x[1], y[1], r);
+    return r;
+}
+
+double3 mul(const double3x3& x, double3 y) {
+    double3 r = x[0] * y[0];
+    r = muladd( x[1], y[1], r);
+    r = muladd( x[2], y[2], r);
+    return r;
+}
+
+double4 mul(const double4x4& x, double4 y) {
+    double4 r = x[0] * y[0];
+    r = muladd( x[1], y[1], r);
+    r = muladd( x[2], y[2], r);
+    r = muladd( x[3], y[3], r);
+    return r;
+}
+
+// matrix muls using mul madd
+double2x2 mul(const double2x2& x, const double2x2& y) {
+    double2x2 r;
+    
+    // m * columns
+    r[0] = mul(x, y[0]);
+    r[1] = mul(x, y[1]);
+    
+    return r;
+}
+
+double3x3 mul(const double3x3& x, const double3x3& y) {
+    double3x3 r;
+    r[0] = mul(x, y[0]);
+    r[1] = mul(x, y[1]);
+    r[2] = mul(x, y[2]);
+    return r;
+}
+
+double4x4 mul(const double4x4& x, const double4x4& y) {
+    double4x4 r;
+    r[0] = mul(x, y[0]);
+    r[1] = mul(x, y[1]);
+    r[2] = mul(x, y[2]);
+    r[3] = mul(x, y[3]);
+    return r;
+}
+
+// sub
+double2x2 sub(const double2x2& x, const double2x2& y) {
+    double2x2 r(x);
+    r[0] -= y[0];
+    r[1] -= y[1];
+    return r;
+}
+double3x3 sub(const double3x3& x, const double3x3& y) {
+    double3x3 r(x);
+    r[0] -= y[0];
+    r[1] -= y[1];
+    r[2] -= y[2];
+    return r;
+}
+double4x4 sub(const double4x4& x, const double4x4& y) {
+    double4x4 r(x);
+    r[0] -= y[0];
+    r[1] -= y[1];
+    r[2] -= y[2];
+    r[3] -= y[3];
+    return r;
+}
+
+// add
+double2x2 add(const double2x2& x, const double2x2& y) {
+    double2x2 r(x);
+    r[0] += y[0];
+    r[1] += y[1];
+    return r;
+}
+double3x3 add(const double3x3& x, const double3x3& y) {
+    double3x3 r(x);
+    r[0] += y[0];
+    r[1] += y[1];
+    r[2] += y[2];
+    return r;
+}
+double4x4 add(const double4x4& x, const double4x4& y) {
+    double4x4 r(x);
+    r[0] += y[0];
+    r[1] += y[1];
+    r[2] += y[2];
+    r[3] += y[3];
+    return r;
+}
+
+// equal
+bool equal(const double2x2& x, const double2x2& y) {
+    return all(x[0] == y[0] &
+               x[1] == y[1]);
+}
+bool equal(const double3x3& x, const double3x3& y) {
+    return all(x[0] == y[0] &
+               x[1] == y[1] &
+               x[2] == y[2]);
+}
+bool equal(const double4x4& x, const double4x4& y) {
+    return all(x[0] == y[0] &
+               x[1] == y[1] &
+               x[2] == y[2] &
+               x[3] == y[3]);
+}
+
+// equal_abs
+bool equal_abs(const double2x2& x, const double2x2& y, double tol) {
+    return all((abs(x[0] - y[0]) <= tol) &
+               (abs(x[1] - y[1]) <= tol));
+}
+bool equal_abs(const double3x3& x, const double3x3& y, double tol) {
+    return all((abs(x[0] - y[0]) <= tol) &
+               (abs(x[1] - y[1]) <= tol) &
+               (abs(x[2] - y[2]) <= tol));
+}
+bool equal_abs(const double4x4& x, const double4x4& y, double tol) {
+    return all((abs(x[0] - y[0]) <= tol) &
+               (abs(x[1] - y[1]) <= tol) &
+               (abs(x[2] - y[2]) <= tol) &
+               (abs(x[3] - y[3]) <= tol));
+}
+
+// equal_rel
+bool equal_rel(const double2x2& x, const double2x2& y, double tol) {
+    return all((abs(x[0] - y[0]) <= tol * abs(x[0])) &
+               (abs(x[1] - y[1]) <= tol * abs(x[1])));
+}
+bool equal_rel(const double3x3& x, const double3x3& y, double tol) {
+    return all((abs(x[0] - y[0]) <= tol * abs(x[0])) &
+               (abs(x[1] - y[1]) <= tol * abs(x[1])) &
+               (abs(x[2] - y[2]) <= tol * abs(x[2])));
+}
+bool equal_rel(const double4x4& x, const double4x4& y, double tol) {
+    return all((abs(x[0] - y[0]) <= tol * abs(x[0])) &
+               (abs(x[1] - y[1]) <= tol * abs(x[1])) &
+               (abs(x[2] - y[2]) <= tol * abs(x[2])) &
+               (abs(x[3] - y[3]) <= tol * abs(x[3])));
+}
+
+
+} // namespace SIMD_NAMESPACE
+#endif // SIMD_DOUBLE
diff --git a/libkram/vectormath/float234.cpp b/libkram/vectormath/float234.cpp
new file mode 100644
index 00000000..dba9b588
--- /dev/null
+++ b/libkram/vectormath/float234.cpp
@@ -0,0 +1,953 @@
+// kram - Copyright 2020-2024 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
+
+// This has to include this, not float234.h
+#include "vectormath++.h"
+
+#if USE_SIMDLIB && SIMD_FLOAT
+
+// I don't trust the ssemathfun approximations.  But providing them in case
+// want to go futher.  Really 3 choices - use c calls, use approximations,
+// or use platform simd lib that implements these (f.e. Accelerate).
+//#define SIMD_FAST_MATH       0
+//#if SIMD_FAST_MATH
+//// fp32 ops only
+//#include "sse_mathfun.h"
+//#endif // SIMD_FAST_MATH
+
+#if SIMD_ACCELERATE_MATH
+// TODO: reduce this header to just calls use (f.e. geometry, etc)
+#include <simd/simd.h>
+#endif // SIMD_ACCELERATE_MATH
+
+namespace SIMD_NAMESPACE {
+
+#if SIMD_ACCELERATE_MATH
+// These will get inlined here from the template
+macroVectorRepeatFnImpl(float, log)
+macroVectorRepeatFnImpl(float, exp)
+
+macroVectorRepeatFnImpl(float, sin)
+macroVectorRepeatFnImpl(float, cos)
+macroVectorRepeatFnImpl(float, tan)
+#endif // SIMD_ACCELERATE_MATH
+
+#if SIMD_CMATH_MATH
+
+macroVectorRepeatFnImpl(float, log, ::logf)
+macroVectorRepeatFnImpl(float, exp, ::expf)
+
+macroVectorRepeatFnImpl(float, sin, ::sinf)
+macroVectorRepeatFnImpl(float, cos, ::cosf)
+macroVectorRepeatFnImpl(float, tan, ::tanf)
+
+#endif // SIMD_CMATH_MATH
+
+// Wish cmath had this
+inline void sincosf(float angleInRadians, float& s, float& c) {
+    s = sinf(angleInRadians);
+    c = cosf(angleInRadians);
+}
+
+// These aren't embedded in function, so may have pre-init ordering issues.
+// or could add pre-init order to skip using functions.
+// Expose these through function calls as const&
+
+static const float2 kfloat2_posx = {1.0f, 0.0f};
+static const float2 kfloat2_posy = kfloat2_posx.yx;
+
+static const float2 kfloat2_negx = {-1.0f, 0.0f};
+static const float2 kfloat2_negy = kfloat2_negx.yx;
+
+static const float2 kfloat2_ones = kfloat2_posx.xx;
+static const float2 kfloat2_zero = {};
+
+static const float3 kfloat3_posx = {1.0f, 0.0f, 0.0f};
+static const float3 kfloat3_posy = kfloat3_posx.yxy;
+static const float3 kfloat3_posz = kfloat3_posx.yyx;
+
+static const float3 kfloat3_negx = {-1.0f, 0.0f, 0.0f};
+static const float3 kfloat3_negy = kfloat3_negx.yxy;
+static const float3 kfloat3_negz = kfloat3_negx.yyx;
+
+static const float3 kfloat3_ones = kfloat3_posx.xxx;
+static const float3 kfloat3_zero = {};
+
+static const float4 kfloat4_posx = {1.0f, 0.0f, 0.0f, 0.0f};
+static const float4 kfloat4_posy = kfloat4_posx.yxyy;
+static const float4 kfloat4_posz = kfloat4_posx.yyxy;
+static const float4 kfloat4_posw = kfloat4_posx.yyyx;
+
+static const float4 kfloat4_negxw = {-1.0f, 0.0f, 0.0f, 1.0f};
+static const float4 kfloat4_negyw = kfloat4_negxw.yxyw;
+static const float4 kfloat4_negzw = kfloat4_negxw.yyxw;
+
+static const float4 kfloat4_posxw = {1.0f, 0.0f, 0.0f, 1.0f};
+static const float4 kfloat4_posyw = kfloat4_posxw.yxyw;
+static const float4 kfloat4_poszw = kfloat4_posxw.yyxw;
+
+static const float4 kfloat4_negx = {-1.0f, 0.0f, 0.0f, 0.0f};
+static const float4 kfloat4_negy = kfloat4_negx.yxyy;
+static const float4 kfloat4_negz = kfloat4_negx.yyxy;
+static const float4 kfloat4_negw = kfloat4_negx.yyyx;
+
+static const float4 kfloat4_ones = kfloat4_posx.xxxx;
+static const float4 kfloat4_zero = {};
+
+//---------------------------
+
+static const float2x2 kfloat2x2_zero = {}; // what is this value 0, or default ctor
+static const float3x3 kfloat3x3_zero = {};
+static const float3x4 kfloat3x4_zero = {};
+static const float4x4 kfloat4x4_zero = {};
+
+static const float2x2 kfloat2x2_identity = diagonal_matrix(kfloat2_ones);
+static const float3x3 kfloat3x3_identity = diagonal_matrix(kfloat3_ones);
+static const float3x4 kfloat3x4_identity = diagonal_matrix3x4(kfloat3_ones);
+static const float4x4 kfloat4x4_identity = diagonal_matrix(kfloat4_ones);
+
+//---------------------------
+
+float4x4 float4x4m(const float3x4& m) {
+    float4x4 m44;
+    m44[0] = m[0];
+    m44[1] = m[1];
+    m44[2] = m[2];
+    m44[3] = float4_posw();
+    
+    return transpose(m44);
+}
+
+float3x4 float3x4m(const float4x4& m) {
+    float4x4 m44(transpose(m));
+    return (const float3x4&)m44;
+}
+
+//---------------------------
+
+// These should not be used often.  So can stay buried
+float2x2::float2x2(float2 diag)
+: float2x2s((const float2x2s&)diagonal_matrix(diag)) { }
+float3x3::float3x3(float3 diag)
+: float3x3s((const float3x3s&)diagonal_matrix(diag)) { }
+float3x4::float3x4(float3 diag)
+: float3x4s((const float3x4s&)diagonal_matrix3x4(diag)) { }
+float4x4::float4x4(float4 diag)
+: float4x4s((const float4x4s&)diagonal_matrix(diag)) { }
+
+//---------------------------
+
+float2x2 diagonal_matrix(float2 x) {
+    float4 xx = zeroext(x);
+    return float2x2(xx.xw, xx.wy);
+}
+float3x3 diagonal_matrix(float3 x) {
+    float4 xx = zeroext(x);
+    return float3x3(xx.xww, xx.wyw, xx.wwz);
+}
+float3x4 diagonal_matrix3x4(float3 x) {
+    float4 xx = zeroext(x);
+    return float3x4(xx.xwww, xx.wyww, xx.wwzw);
+}
+float4x4 diagonal_matrix(float4 x) {
+    float4 xx = x; xx.w = 0.0f;
+    float4 ww = xx; ww.z = x.w;
+    return float4x4(xx.xwww, xx.wyww, xx.wwzw, ww.wwwz);
+}
+
+//--------------------------------------
+
+// textbook transpose
+float2x2 transpose(const float2x2& x) {
+    float4 x0, x1;
+    x0.xy = x[0];
+    x1.xy = x[1];
+#if SIMD_SSE
+    float4 r01 = _mm_unpacklo_ps(x0, x1);
+#else
+    float4 r01 = vzip1q_f32(x0, x1);
+#endif
+    return (float2x2){r01.lo, r01.hi};
+}
+
+float3x3 transpose(const float3x3& x) {
+    float4 x0, x1, x2;
+    x0.xyz = x[0];
+    x1.xyz = x[1];
+    x2.xyz = x[2];
+#if SIMD_SSE
+    float4 t0 = _mm_unpacklo_ps(x0, x1);
+    float4 t1 = _mm_unpackhi_ps(x0, x1);
+    float4 r0 = t0; r0.hi = x2.lo;
+    float4 r1 = _mm_shuffle_ps(t0, x2, 0xde);
+    float4 r2 = x2; r2.lo = t1.lo;
+#else
+    float4 padding = { 0 };
+    float4 t0 = vzip1q_f32(x0,x2);
+    float4 t1 = vzip2q_f32(x0,x2);
+    float4 t2 = vzip1q_f32(x1,padding);
+    float4 t3 = vzip2q_f32(x1,padding);
+    float4 r0 = vzip1q_f32(t0,t2);
+    float4 r1 = vzip2q_f32(t0,t2);
+    float4 r2 = vzip1q_f32(t1,t3);
+#endif
+    return (float3x3){r0.xyz, r1.xyz, r2.xyz};
+}
+
+float4x4 transpose(const float4x4& x) {
+    // NOTE: also _MM_TRANSPOSE4_PS using shuffles
+    // but old Neon didn't really have shuffle, but
+    // and sse2neon is using combine instead of shuffle.
+    
+    //    float4x4 xt(x);
+    //    _MM_TRANSPOSE4_PS(xt[0], xt[1], xt[2], xt[3]);
+    //    return xt;
+    
+#if SIMD_SSE
+    float4 t0 = _mm_unpacklo_ps(x[0],x[2]);
+    float4 t1 = _mm_unpackhi_ps(x[0],x[2]);
+    float4 t2 = _mm_unpacklo_ps(x[1],x[3]);
+    float4 t3 = _mm_unpackhi_ps(x[1],x[3]);
+    float4 r0 = _mm_unpacklo_ps(t0,t2);
+    float4 r1 = _mm_unpackhi_ps(t0,t2);
+    float4 r2 = _mm_unpacklo_ps(t1,t3);
+    float4 r3 = _mm_unpackhi_ps(t1,t3);
+#else
+    float4 t0 = vzip1q_f32(x[0],x[2]);
+    float4 t1 = vzip2q_f32(x[0],x[2]);
+    float4 t2 = vzip1q_f32(x[1],x[3]);
+    float4 t3 = vzip2q_f32(x[1],x[3]);
+    float4 r0 = vzip1q_f32(t0,t2);
+    float4 r1 = vzip2q_f32(t0,t2);
+    float4 r2 = vzip1q_f32(t1,t3);
+    float4 r3 = vzip2q_f32(t1,t3);
+#endif
+    return (float4x4){r0,r1,r2,r3};
+}
+
+// inverse
+float2x2 inverse(const float2x2& x) {
+    float invDet = 1.0f / determinant(x);
+    if (invDet == 0.0f) return kfloat2x2_zero;
+    
+    float2x2 r = transpose(x);
+    r[0] *= invDet;
+    r[1] *= invDet;
+    return r;
+}
+
+float3x3 inverse(const float3x3& x) {
+    float invDet = 1.0f / determinant(x);
+    if (invDet == 0.0f) return kfloat3x3_zero;
+    
+    float3x3 r;
+    
+    // this forms the adjoint
+    r[0] = cross(x[1], x[2]) * invDet;
+    r[1] = cross(x[2], x[0]) * invDet;
+    r[2] = cross(x[0], x[1]) * invDet;
+    return r;
+}
+
+// std::swap has warning on aligned data
+inline void swap(float4& a, float4& b) {
+    float4 temp = a;
+    a = b;
+    b = temp;
+}
+
+float4x4 inverse(const float4x4& x) {
+    // This is a full gje inverse
+    
+    float4x4 a(x), b(kfloat4x4_identity);
+    bool inversionSucceeded = true;
+    
+    // As a evolves from original mat into identity -
+    // b evolves from identity into inverse(a)
+    int cols = float4x4::col;
+    int rows = float4x4::row;
+    
+    // Loop over cols of a from left to right, eliminating above and below diag
+    for (int j=0; j<rows; j++) {
+        // Find largest pivot in column j among rows j..2
+        int i1 = j;            // Row with largest pivot candidate
+        for (int i=j+1; i<cols; i++) {
+            if ( fabsf(a[i][j]) > fabsf(a[i1][j]) ) {
+                i1 = i;
+            }
+        }
+        
+        // Swap rows i1 and j in a and b to put pivot on diagonal
+        SIMD_NAMESPACE::swap(a[i1], a[j]);
+        SIMD_NAMESPACE::swap(b[i1], b[j]);
+        
+        // Scale row j to have a unit diagonal
+        float s = a[j][j];
+        if ( s == 0.0f ) {
+            inversionSucceeded = false;
+            break;
+        }
+        
+        s = 1.0f/s;
+        b[j] *= s;
+        a[j] *= s;
+        
+        // Eliminate off-diagonal elems in col j of a, doing identical ops to b
+        for (int i=0; i<cols; i++ ) {
+            if (i != j) {
+                s = a[i][j];
+                b[i] -= b[j] * s;
+                a[i] -= a[j] * s;
+            }
+        }
+    }
+    
+    if (!inversionSucceeded) {
+        b = kfloat4x4_zero;
+    }
+    
+    return b;
+}
+
+// determinant
+// internal only ops
+// TODO: could just be macros
+inline float3 rotate1(float3 x) { return x.yzx; }
+inline float3 rotate2(float3 x) { return x.zxy; }
+inline float4 rotate1(float4 x) { return x.yzwx; }
+inline float4 rotate2(float4 x) { return x.zwxy; }
+inline float4 rotate3(float4 x) { return x.wxyz; }
+
+float determinant(const float2x2& x) {
+    return cross(x[0], x[1]);
+}
+
+float determinant(const float3x3& x) {
+    return reduce_add(
+                      x[0]*(rotate1(x[1])*rotate2(x[2]) - rotate2(x[1])*rotate1(x[2])));
+}
+
+float determinant(const float4x4& x) {
+    float4 codet = x[0]*(rotate1(x[1])*(rotate2(x[2])*rotate3(x[3])-rotate3(x[2])*rotate2(x[3])) +
+                         rotate2(x[1])*(rotate3(x[2])*rotate1(x[3])-rotate1(x[2])*rotate3(x[3])) +
+                         rotate3(x[1])*(rotate1(x[2])*rotate2(x[3])-rotate2(x[2])*rotate1(x[3])));
+    return reduce_add(codet.even - codet.odd);
+}
+
+// trace
+float trace(const float2x2& x) {
+    return x[0].x + x[1].y;
+}
+
+float trace(const float3x3& x) {
+    return x[0].x + x[1].y + x[2].z;
+}
+
+float trace(const float4x4& x) {
+    return x[0].x + x[1].y + x[2].z + x[3].w;
+}
+
+// TODO: may want pre-transform on float3x4 since it's transposed
+// 3 x m3x4 should = 3 element vec
+//
+// simd premul transform on left does a super expensive transpose to avoid dot
+// don't use this, should just dotproducts?
+//static   half2 mul(  half2 x,   half2x2 y) { return mul(transpose(y), x); }
+//
+//
+// Here's how to multiply matrices, since default ops won't do this.
+// be careful with operator* built-in.  Will do column by column mul won't it?
+// Maybe that's why *= is missing on matrices.
+//
+// This is taking each scalar of y[0], hopfully this extends and stays in vec op
+
+// premul-transform has to do dots
+float2 mul(float2 y, const float2x2& x) {
+    float2 r;
+    r.x = dot(y, x[0]);
+    r.y = dot(y, x[1]);
+    return r;
+}
+
+float3 mul(float3 y, const float3x3& x) {
+    float3 r;
+    r.x = dot(y, x[0]);
+    r.y = dot(y, x[1]);
+    r.z = dot(y, x[2]);
+    return r;
+}
+
+float4 mul(float4 y, const float4x4& x) {
+    float4 r;
+    r.x = dot(y, x[0]);
+    r.y = dot(y, x[1]);
+    r.z = dot(y, x[2]);
+    r.w = dot(y, x[3]);
+    return r;
+}
+
+
+// post-transform at least does a mul madd
+float2 mul(const float2x2& x, float2 y) {
+    float2 r = x[0] * y[0]; // no mul(v,v)
+    r = muladd( x[1], y[1], r);
+    return r;
+}
+
+float3 mul(const float3x3& x, float3 y) {
+    float3 r = x[0] * y[0];
+    r = muladd( x[1], y[1], r);
+    r = muladd( x[2], y[2], r);
+    return r;
+}
+
+float4 mul(const float4x4& x, float4 y) {
+    float4 r = x[0] * y[0];
+    r = muladd( x[1], y[1], r);
+    r = muladd( x[2], y[2], r);
+    r = muladd( x[3], y[3], r);
+    return r;
+}
+
+// matrix muls using mul madd
+float2x2 mul(const float2x2& x, const float2x2& y) {
+    float2x2 r;
+    
+    // m * columns
+    r[0] = mul(x, y[0]);
+    r[1] = mul(x, y[1]);
+    
+    return r;
+}
+
+float3x3 mul(const float3x3& x, const float3x3& y) {
+    float3x3 r;
+    r[0] = mul(x, y[0]);
+    r[1] = mul(x, y[1]);
+    r[2] = mul(x, y[2]);
+    return r;
+}
+
+float4x4 mul(const float4x4& x, const float4x4& y) {
+    float4x4 r;
+    r[0] = mul(x, y[0]);
+    r[1] = mul(x, y[1]);
+    r[2] = mul(x, y[2]);
+    r[3] = mul(x, y[3]);
+    return r;
+}
+
+// sub
+float2x2 sub(const float2x2& x, const float2x2& y) {
+    float2x2 r(x);
+    r[0] -= y[0];
+    r[1] -= y[1];
+    return r;
+}
+float3x3 sub(const float3x3& x, const float3x3& y) {
+    float3x3 r(x);
+    r[0] -= y[0];
+    r[1] -= y[1];
+    r[2] -= y[2];
+    return r;
+}
+float4x4 sub(const float4x4& x, const float4x4& y) {
+    float4x4 r(x);
+    r[0] -= y[0];
+    r[1] -= y[1];
+    r[2] -= y[2];
+    r[3] -= y[3];
+    return r;
+}
+
+// add
+float2x2 add(const float2x2& x, const float2x2& y) {
+    float2x2 r(x);
+    r[0] += y[0];
+    r[1] += y[1];
+    return r;
+}
+float3x3 add(const float3x3& x, const float3x3& y) {
+    float3x3 r(x);
+    r[0] += y[0];
+    r[1] += y[1];
+    r[2] += y[2];
+    return r;
+}
+float4x4 add(const float4x4& x, const float4x4& y) {
+    float4x4 r(x);
+    r[0] += y[0];
+    r[1] += y[1];
+    r[2] += y[2];
+    r[3] += y[3];
+    return r;
+}
+
+// equal
+bool equal(const float2x2& x, const float2x2& y) {
+    return all(x[0] == y[0] &
+               x[1] == y[1]);
+}
+bool equal(const float3x3& x, const float3x3& y) {
+    return all(x[0] == y[0] &
+               x[1] == y[1] &
+               x[2] == y[2]);
+}
+bool equal(const float4x4& x, const float4x4& y) {
+    return all(x[0] == y[0] &
+               x[1] == y[1] &
+               x[2] == y[2] &
+               x[3] == y[3]);
+}
+
+// equal_abs
+bool equal_abs(const float2x2& x, const float2x2& y, float tol) {
+    return all((abs(x[0] - y[0]) <= tol) &
+               (abs(x[1] - y[1]) <= tol));
+}
+bool equal_abs(const float3x3& x, const float3x3& y, float tol) {
+    return all((abs(x[0] - y[0]) <= tol) &
+               (abs(x[1] - y[1]) <= tol) &
+               (abs(x[2] - y[2]) <= tol));
+}
+bool equal_abs(const float4x4& x, const float4x4& y, float tol) {
+    return all((abs(x[0] - y[0]) <= tol) &
+               (abs(x[1] - y[1]) <= tol) &
+               (abs(x[2] - y[2]) <= tol) &
+               (abs(x[3] - y[3]) <= tol));
+}
+
+// equal_rel
+bool equal_rel(const float2x2& x, const float2x2& y, float tol) {
+    return all((abs(x[0] - y[0]) <= tol * abs(x[0])) &
+               (abs(x[1] - y[1]) <= tol * abs(x[1])));
+}
+bool equal_rel(const float3x3& x, const float3x3& y, float tol) {
+    return all((abs(x[0] - y[0]) <= tol * abs(x[0])) &
+               (abs(x[1] - y[1]) <= tol * abs(x[1])) &
+               (abs(x[2] - y[2]) <= tol * abs(x[2])));
+}
+bool equal_rel(const float4x4& x, const float4x4& y, float tol) {
+    return all((abs(x[0] - y[0]) <= tol * abs(x[0])) &
+               (abs(x[1] - y[1]) <= tol * abs(x[1])) &
+               (abs(x[2] - y[2]) <= tol * abs(x[2])) &
+               (abs(x[3] - y[3]) <= tol * abs(x[3])));
+}
+
+//----
+
+const float2& float2_zero(){ return kfloat2_zero; }
+const float2& float2_ones(){ return kfloat2_ones; }
+
+const float2& float2_posx(){ return kfloat2_posx; }
+const float2& float2_posy(){ return kfloat2_posy; }
+
+const float2& float2_negx(){ return kfloat2_negx; }
+const float2& float2_negy(){ return kfloat2_negy; }
+
+//----
+
+const float3& float3_zero(){ return kfloat3_zero; }
+const float3& float3_ones(){ return kfloat3_ones; }
+
+const float3& float3_posx(){ return kfloat3_posx; }
+const float3& float3_posy(){ return kfloat3_posy; }
+const float3& float3_posz(){ return kfloat3_posz; }
+
+const float3& float3_negx(){ return kfloat3_negx; }
+const float3& float3_negy(){ return kfloat3_negy; }
+const float3& float3_negz(){ return kfloat3_negz; }
+
+//----
+
+const float4& float4_zero(){ return kfloat4_zero; }
+const float4& float4_ones(){ return kfloat4_ones; }
+
+const float4& float4_posx(){ return kfloat4_posx; }
+const float4& float4_posy(){ return kfloat4_posy; }
+const float4& float4_posz(){ return kfloat4_posz; }
+const float4& float4_posw(){ return kfloat4_posw; }
+
+const float4& float4_negx(){ return kfloat4_negx; }
+const float4& float4_negy(){ return kfloat4_negy; }
+const float4& float4_negz(){ return kfloat4_negz; }
+const float4& float4_negw(){ return kfloat4_negw; }
+
+const float4& float4_posxw(){ return kfloat4_posxw; }
+const float4& float4_posyw(){ return kfloat4_posyw; }
+const float4& float4_poszw(){ return kfloat4_poszw; }
+
+const float4& float4_negxw(){ return kfloat4_negxw; }
+const float4& float4_negyw(){ return kfloat4_negyw; }
+const float4& float4_negzw(){ return kfloat4_negzw; }
+
+//---------------
+
+const float2x2& float2x2::zero() { return kfloat2x2_zero; }
+const float2x2& float2x2::identity() { return kfloat2x2_identity; }
+
+const float3x3& float3x3::zero() { return kfloat3x3_zero; }
+const float3x3& float3x3::identity() { return kfloat3x3_identity; }
+
+const float3x4& float3x4::zero() { return kfloat3x4_zero; }
+const float3x4& float3x4::identity() { return kfloat3x4_identity; }
+
+const float4x4& float4x4::zero() { return kfloat4x4_zero; }
+const float4x4& float4x4::identity() { return kfloat4x4_identity; }
+
+
+//----------------------------------
+
+static quatf quatf_zero(0.0f, 0.0f, 0.0f, 0.0f);
+static quatf quatf_identity(0.0f, 0.0f, 0.0f, 1.0f);
+
+const quatf& quatf::zero() { return quatf_zero; }
+const quatf& quatf::identity() { return quatf_identity; }
+
+// can negate w, or xyz with -kCross
+static const float4 kCross = {1.0f,1.0f,1.0f,-1.0f};
+
+// can eliminate 4 shufs by using 4 constants, 2 q swizzles are dupes
+static const float4 kConvertToMatrix = {0.0f,1.0f,2.0f,-2.0f};
+
+quatf::quatf(float3 axis, float angleInRadians)
+{
+    float s, c;
+    sincosf(angleInRadians * 0.5f, s, c);
+    v = float4m(s * axis, c);
+}
+
+quatf inverse(quatf q)
+{
+    return quatf(normalize(q.v * -kCross)); // vec *, but goiong to get quad mul below
+}
+
+quatf operator*(quatf q1, quatf q2)
+{
+    // This is original
+    //q1.y * q2.z - q1.z * q2.y + q1.x * q2.w + q1.w * q2.x,
+    //q1.z * q2.x - q1.x * q2.z + q1.y * q2.w + q1.w * q2.y,
+    //q1.x * q2.y - q1.y * q2.x + q1.z * q2.w + q1.w * q2.z,
+    //q1.w * q2.w - q1.x * q2.x - q1.y * q2.y - q1.z * q2.z
+    
+    // quaternion multiplication is similar to a four-space cross-product
+    float4 qv1 = q1.v;
+    float4 qv2 = q2.v;
+
+    // TODO: probably better to swizzle the kCross, but need 4 constants then
+    return quatf
+    (
+        dot((qv1 * qv2.wzyx).xywz, kCross), // -zy
+        dot((qv1 * qv2.zwxy).wyzx, kCross), // -xz
+        dot((qv1 * qv2.yxwz).xwzy, kCross), // -yx
+        dot(-qv1 * qv2, kCross) // +w, -xyz
+    );
+}
+
+float3x3 float3x3m(quatf qq)
+{
+    // not doing normalize like original
+    //q = normalize(q);
+    
+    float3x3 m;
+    float3 t;
+    
+    float4 c = kConvertToMatrix;
+    float4 q = qq.v;
+    q *= -kCross;
+    
+    t = q.wzy * c.wzw;
+    m[0] = q.yzx * t.zxy - q.zxy * t.yzx + c.yxx;
+    
+    t = q.zwx * c.wwz;
+    m[1] = q.yzx * t.zxy - q.zxy * t.yzx + c.xyx;
+    
+    // orthonormal basis, so use cross product for last term
+    m[2] = cross(m[0], m[1]); // 2 instr hlsl, 7 ops SSE
+    
+    return m;
+}
+
+float4x4 float4x4m(quatf qq)
+{
+    /* from Ken Shoemake's GGIV article
+    float xs = x*s,      ys = y*s,          zs = z*s;
+    float wx = w*xs,      wy = w*ys,     wz = w*zs;
+    float xx = x*xs,      xy = x*ys,     xz = x*zs;
+    float yy = y*ys,      yz = y*zs;
+    float zz = z*zs;
+   
+    // original formulation
+    return Mat44
+            (
+            1.0 - 2.0*(y*y + z*z),          2.0*(x*y - w*z),        2.0*(x*z + w*y), 0.0,
+                  2.0*(x*y + w*z), 1.0 - 2.0*(x*x + z*z),        2.0*(y*z - w*x), 0.0,
+                  2.0*(x*z - w*y),       2.0*(y*z + w*x),  1.0 - 2.0*(x*x + y*y), 0.0
+                  0.0,                                  0.0,                       0.0, 1.0
+            );
+   */
+
+    
+    // not doing normalize like original
+    //q = normalize(q);
+    
+    float4x4 m;
+    float4 t;
+
+    float4 c = kConvertToMatrix;
+    float4 q = qq.v;
+    q *= -kCross;
+
+    // really just 3 values, so w is always 0
+    t = q.wzyw * c.wzwx;
+    m[0] = q.yzxw * t.zxyw - q.zxyw * t.yzxw + c.yxxx;
+    
+    // really just 3 values, so w is always 0
+    t = q.zwxw * c.wwzx;
+    m[1] = q.yzxw * t.zxyw - q.zxyw * t.yzxw + c.xyxx;
+    
+    // orthonormal basis, so use cross product for last term
+    m[2] = float4m(cross(m[0].xyz, m[1].xyz), 0.0f); // 2 instr hlsl, 7 ops SSE
+    m[3] = float4_posw();
+
+    return m;
+}
+
+// Slow but constant velocity.
+quatf slerp(quatf q0, quatf q1, float t)
+{
+    // find smallest angle, flip axis
+    if (dot(q0.v, q1.v) < 0.0f)
+        q1.v.xyz = -q1.v.xyz;
+    
+    // theta/2
+    float cosThetaHalf = dot(q0.v, q1.v);
+    if (cosThetaHalf >= 0.995f)
+    {
+        return lerp(q0,q1,t);
+    }
+    else
+    {
+        // expensive
+        float thetaHalf = acosf(cosThetaHalf);
+        float sinThetaHalf = sinf(thetaHalf);
+
+        float s0 = sinf(thetaHalf * (1-t)) / sinThetaHalf;  // at t=0, s0 = 1
+        float s1 = sinf(thetaHalf * t)     / sinThetaHalf;  // at t=1, s1 = 1
+
+        return quatf(s0 * q0.v+ s1 * q1.v);
+    }
+}
+
+// compute control points for a bezier spline segment
+inline void quat_bezier_cp_impl(quatf q0, quatf q1, quatf q2,
+              quatf& a1, quatf& b1)
+{
+    // TODO: find out of these were quat or vec mul?
+    // Double(q0, q1);
+    a1.v = 2.0f * dot(q0.v,q1.v) * q1.v - q0.v;
+    
+    // Bisect(a1, q2);
+    a1.v = (a1.v + q2.v);
+    a1.v = normalize(a1.v);
+    
+    // Double(a1, q1);
+    b1.v = 2.0f * dot(a1.v,q1.v) * q1.v - a1.v;
+}
+
+
+// compute control points for a cubic bezier spline segment (quats must be smallest angle)
+void quat_bezier_cp(quatf q0, quatf q1, quatf q2, quatf q3,
+                     quatf& a1, quatf& b2)
+{
+    quatf b1, a2; // b1 unused calc
+    quat_bezier_cp_impl(q0,q1,q1, a1,b1);
+    quat_bezier_cp_impl(q1,q2,q3, a2,b2);
+}
+
+
+// spherical cubic bezier spline interpolation
+// takes in contol points
+quatf quat_bezer_slerp(quatf q0, quatf b, quatf c, quatf q1, float t)
+{
+    // deCastljau interpolation of the control points
+    quatf mid(slerp(b, c, t));
+
+    return slerp(slerp(slerp(q0, b, t), mid, t),
+                 slerp(mid, slerp(c, q1, t), t),
+        t);
+}
+
+// spherical cubic bezier spline interpolation
+// takes in contol points
+quatf quat_bezer_lerp(quatf q0, quatf b, quatf c, quatf q1, float t)
+{
+    // deCastljau interpolation of the control points
+    quatf mid(lerp(b, c, t));
+
+    return lerp(
+                 lerp(lerp(q0, b, t), mid, t),
+                 lerp(mid, lerp(c, q1, t), t),
+        t);
+}
+
+// ----------------------
+
+void transpose_affine(float4x4& m)
+{
+    // TODO: see other tranpsose not using shuffles and do that.
+    // TODO: use platform shuffles on Neon
+    
+    // avoid copy and one shuffle
+    float4 tmp3, tmp2, tmp1, tmp0;
+                   
+    // using sse2neon to port this
+    tmp0 = _mm_shuffle_ps(m[0], m[1], 0x44);
+    tmp2 = _mm_shuffle_ps(m[0], m[1], 0xEE);
+
+    tmp1 = _mm_shuffle_ps(m[2], m[3], 0x44);
+    tmp3 = _mm_shuffle_ps(m[2], m[3], 0xEE);
+                                                            
+    m[0] = _mm_shuffle_ps(tmp0, tmp1, 0x88);
+    m[1] = _mm_shuffle_ps(tmp0, tmp1, 0xDD);
+    m[2] = _mm_shuffle_ps(tmp2, tmp3, 0x88);
+
+    // skips m[3] - known 0001
+}
+
+float4x4 inverse_tr(const float4x4& mtx)
+{
+    float4x4 inverse(mtx);
+    inverse[3] = float4_negw();  // will be flipped by matrix mul
+    transpose_affine(inverse);  // handle rotation (R)inv = (R)T
+    
+    inverse[3] = inverse * (-mtx[3]); // 1 mul, 3 mads
+
+    return inverse;
+}
+
+// invert a row vector matrix
+float4x4 inverse_tru(const float4x4& mtx)
+{
+    bool success = false;
+    
+    float scaleX = length_squared(mtx[0]);
+    
+    float4x4 inverse(mtx);
+    if (scaleX > 1e-6f) {
+        inverse[3] = float4_negw();
+        
+        transpose_affine(inverse);
+
+        // all rows/columns in orthogonal tfm have same magnitude with uniform scale
+        float4 invScaleX = float4m(1.0f / scaleX); // inverse squared
+        
+        // scale the rotation matrix by scaling inverse
+        inverse[0] *= invScaleX;
+        inverse[1] *= invScaleX;
+        inverse[2] *= invScaleX;
+
+        // handle the translation
+        inverse[3] = inverse * (-mtx[3]);
+        
+        success = true;
+        macroUnusedVar(success);
+    }
+    
+    return inverse;
+}
+
+float4x4 float4x4_tr(float3 t, quatf r) {
+    float4x4 m(float4x4::identity());
+    m[3].xyz = t;
+    
+    m *= float4x4m(r);
+    return m;
+}
+
+// TODO: there are faster ways to apply post rot, post scale
+float4x4 float4x4_trs(float3 t, quatf r, float3 scale) {
+    float4x4 m(float4x4::identity());
+    m[3].xyz = t;
+    m = m * float4x4m(r);
+    
+    m *= float4x4(float4m(scale,1.0f));
+    return m;
+}
+
+// leaving this in here, since it can be further optimized
+float4x4 float4x4_tru(float3 t, quatf r, float scale) {
+    return float4x4_trs(t, r, float3m(scale));
+}
+
+float4x4 inverse_trs(const float4x4& mtx)
+{
+    bool success = false;
+    
+    float4x4 inverse(mtx);
+    
+    // TODO: fix handling of failure
+    // compute the scaling terms (4 dots)
+    // float3 scale = calcScaleSquaredRowTfm(m);
+    // if (all(scale > float3(1e-6f)) {
+        inverse[3] = float4_negw(); // neccessary for simple inverse to hold
+    
+        transpose_affine(inverse);
+
+        // this is cheaper than 3 dot's above, just mul/add
+        float4 invScale = recip(inverse[0]*inverse[0] +
+                                inverse[1]*inverse[1] +
+                                inverse[2]*inverse[2]);
+    
+        // scale the rotation matrix by scaling inverse
+        inverse[0] *= invScale;
+        inverse[1] *= invScale;
+        inverse[2] *= invScale;
+        inverse[3] = inverse * (-mtx[3]);
+        
+        success = true;
+    macroUnusedVar(success);
+    //}
+    
+    return inverse;
+}
+
+float4x4 float4x4m(char axis, float angleInRadians)
+{
+    float    sinTheta, cosTheta;
+    sincosf(angleInRadians, sinTheta, cosTheta);
+            
+    float4x4 m;
+    m[3] = float4_posw();
+            
+    switch(axis) {
+        case 'x':
+        {
+            m[0] = float4_posx();
+            m[1] = float4m(0.0f,  cosTheta, sinTheta, 0.0f);
+            m[2] = float4m(0.0f, -sinTheta, cosTheta, 0.0f);
+            break;
+        }
+        
+        case 'y':
+        {
+            m[0] = float4m(cosTheta, 0.0f, -sinTheta, 0.0f);
+            m[1] = float4_posy();
+            m[2] = float4m(sinTheta, 0.0f,  cosTheta, 0.0f);
+            break;
+        }
+        
+        case 'z':
+        {
+            m[0] = float4m( cosTheta, sinTheta, 0.0f, 0.0f);
+            m[1] = float4m(-sinTheta, cosTheta, 0.0f, 0.0f);
+            m[2] = float4_posz();
+            break;
+        }
+    }
+    return m;
+}
+
+} // SIMD_NAMESPACE
+#endif // USE_SIMDLIB
diff --git a/libkram/vectormath/long234.h b/libkram/vectormath/long234.h
index 42c7a05e..8a12f911 100644
--- a/libkram/vectormath/long234.h
+++ b/libkram/vectormath/long234.h
@@ -68,11 +68,9 @@ SIMD_CALL bool any(long2 x) {
     return _mm_movemask_pd(x) & 0x3; // 2 bits
 }
 SIMD_CALL bool any(long3 x) {
-    // avx/2 have double4 op
     return (x.x | x.y) & 0x8000000000000000U;
 }
 SIMD_CALL bool any(long4 x) {
-    // avx/2 have double4 op
     return any(x.lo | x.hi);
 }
 
@@ -80,11 +78,9 @@ SIMD_CALL bool all(long2 x) {
     return (_mm_movemask_pd(x) & 0x3) == 0x3; // 2 bits
 }
 SIMD_CALL bool all(long3 x) {
-    // avx/2 have double4 op
     return (x.x & x.y & x.z) & 0x8000000000000000U;
 }
 SIMD_CALL bool all(long4 x) {
-    // avx/2 have double4 op
     return any(x.lo & x.hi);
 }
 #endif // SIMD_SSE
diff --git a/libkram/vectormath/vectormath++.cpp b/libkram/vectormath/vectormath++.cpp
index 241316fa..a8841f4e 100644
--- a/libkram/vectormath/vectormath++.cpp
+++ b/libkram/vectormath/vectormath++.cpp
@@ -64,10 +64,10 @@
 //
 //-----------------
 //
-// TODO: rename README, and name of .cpp/h (simdk?)
+// TODO: rename in README, and name of .cpp/h (simdk?)
 // TODO: build an optimized library that is a clang module
-// TODO: split up files into types, float ops, double ops
-// TODO: limit !SIMD_FLOAT_EXT to only 32B vector types?  Have 64B vecs.
+// DONE: split up files into types, float ops, double ops
+// DONE: limit !SIMD_FLOAT_EXT to only 32B vector types?  Have 64B vecs.
 //
 // TODO: ryg on fp16 <-> fp32
 // Not the right gist, you want the RTNE one (nm: that only matters for float->half,
@@ -89,12 +89,10 @@
 // TODO: saturating conversions would be useful to, and prevent overflow
 // see the conversion.h code, bit select to clamp values.
 //
-// TODO: matrix_types.h has a type_traits with rows, cols, etc.
-// can call get_traits() on them.  See matrix.h for all the ops.
-//
 // TODO: need natvis and lldb formatting of math classes.
 //
-// TODO: add optimized vec2 ops on Neon, but may not be worth kernal mods
+// DONE: add optimized vec2 ops on Neon
+// DONE: add AVX2 for double4
 
 // intrinsic tables
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html
@@ -102,676 +100,43 @@
 // older but good talk on simd
 // https://people.inf.ethz.ch/markusp/teaching/263-2300-ETH-spring14/slides/11-simd.pdf
 
-//-----------------
-
-// TODO: provide controls over fast math vs. acclerate vs. func calls
-#ifdef __APPLE__
-#define SIMD_ACCELERATE_MATH 1
-#define SIMD_FAST_MATH       0
-#define SIMD_CMATH_MATH      0
-#else
-#define SIMD_ACCELERATE_MATH 0
-#define SIMD_FAST_MATH       0
-#define SIMD_CMATH_MATH      1
-#endif
-
-#if SIMD_ACCELERATE_MATH
-// TODO: reduce this header to just calls use (f.e. geometry, etc)
-#include <simd/simd.h>
-#elif SIMD_FAST_MATH
-#include "sse_mathfun.h"
-#endif
-
-// These are large functions that can be buried and optimized in the .cpp
-// Has alternate cmath form it uses for now.  Look into ISPC calls to
-// replace some of this.
-
-//-----------------
-
-namespace SIMD_NAMESPACE {
-
-// I don't trust the approximations for now.  But providing them in case
-// want to do futher.  Really 3 choices, use c calls, use approximations,
-// or use simd lib that implements these (f.e. Accelerate).
-
-// don't have a c sincos, so just use fn calls
-//float4 tan(float4 x) {
-//    float4 s, c;
-//    sincos(x, s, c);
-//
-//    // TODO: handle around c == 0 case
-//    return s / c;
-//}
-
 #if SIMD_ACCELERATE_MATH
-
-//---------------------------
-// Use existing Accelerate lib.
-
-// remap simdk to simd namespace
-#define macroVectorRepeatFnImpl(type, cppfunc) \
-type##2 cppfunc(type##2 x) { return simd::cppfunc(x); } \
-type##3 cppfunc(type##3 x) { return simd::cppfunc(x); } \
-type##4 cppfunc(type##4 x) { return simd::cppfunc(x); } \
-
-#if SIMD_FLOAT
-
-// These will get inlined here from the template
-macroVectorRepeatFnImpl(float, log)
-macroVectorRepeatFnImpl(float, exp)
-
-macroVectorRepeatFnImpl(float, sin)
-macroVectorRepeatFnImpl(float, cos)
-macroVectorRepeatFnImpl(float, tan)
-
-#endif // SIMD_FLOAT
-
-//---------------------
-
-#if SIMD_DOUBLE
-
-// These will get inlined here from the template
-macroVectorRepeatFnImpl(double, log)
-macroVectorRepeatFnImpl(double, exp)
-
-macroVectorRepeatFnImpl(double, sin)
-macroVectorRepeatFnImpl(double, cos)
-macroVectorRepeatFnImpl(double, tan)
-
-#endif // SIMD_DOUBLE
+#include <simd/base.h>
 #endif // SIMD_ACCELERATE_MATH
 
-//---------------------------
-
-#if SIMD_CMATH_MATH
-
-// This calls function repeatedly, then returns as vector.
-// These don't call to the 4 version since it's so much more work.
-#define macroVectorRepeatFnImpl(type, cppfunc, func) \
-type##1 cppfunc(type##1 x) { return func(x); } \
-type##2 cppfunc(type##2 x) { return {func(x.x), func(x.y)}; } \
-type##3 cppfunc(type##3 x) { return {func(x.x), func(x.y), func(x.z)}; } \
-type##4 cppfunc(type##4 x) { return {func(x.x), func(x.y), func(x.z), func(x.w)}; } \
-
-#if SIMD_FLOAT
-
-macroVectorRepeatFnImpl(float, log, ::logf)
-macroVectorRepeatFnImpl(float, exp, ::expf)
-
-macroVectorRepeatFnImpl(float, sin, ::sinf)
-macroVectorRepeatFnImpl(float, cos, ::cosf)
-macroVectorRepeatFnImpl(float, tan, ::tanf)
-
-#endif // SIMD_FLOAT
+namespace SIMD_NAMESPACE {
 
 #if SIMD_DOUBLE
 
-macroVectorRepeatFnImpl(double, log, ::log)
-macroVectorRepeatFnImpl(double, exp, ::exp)
-
-macroVectorRepeatFnImpl(double, sin, ::sin)
-macroVectorRepeatFnImpl(double, cos, ::cos)
-macroVectorRepeatFnImpl(double, tan, ::tan)
-
-#endif // SIMD_DOUBLE
-#endif // SIMD_CMATH_MATH
-
-// Wish cmath had this
-inline void sincosf(float angleInRadians, float& s, float& c) {
-    s = sinf(angleInRadians);
-    c = cosf(angleInRadians);
-}
-
-#if SIMD_FLOAT
-
-// These aren't embedded in function, so may have pre-init ordering issues.
-// or could add pre-init order to skip using functions.
-// Expose these through function calls as const&
-
-static const float2 kfloat2_posx = {1.0f, 0.0f};
-static const float2 kfloat2_posy = kfloat2_posx.yx;
-
-static const float2 kfloat2_negx = {-1.0f, 0.0f};
-static const float2 kfloat2_negy = kfloat2_negx.yx;
-
-static const float2 kfloat2_ones = kfloat2_posx.xx;
-static const float2 kfloat2_zero = {};
-
-static const float3 kfloat3_posx = {1.0f, 0.0f, 0.0f};
-static const float3 kfloat3_posy = kfloat3_posx.yxy;
-static const float3 kfloat3_posz = kfloat3_posx.yyx;
-
-static const float3 kfloat3_negx = {-1.0f, 0.0f, 0.0f};
-static const float3 kfloat3_negy = kfloat3_negx.yxy;
-static const float3 kfloat3_negz = kfloat3_negx.yyx;
-
-static const float3 kfloat3_ones = kfloat3_posx.xxx;
-static const float3 kfloat3_zero = {};
-
-static const float4 kfloat4_posx = {1.0f, 0.0f, 0.0f, 0.0f};
-static const float4 kfloat4_posy = kfloat4_posx.yxyy;
-static const float4 kfloat4_posz = kfloat4_posx.yyxy;
-static const float4 kfloat4_posw = kfloat4_posx.yyyx;
-
-static const float4 kfloat4_negxw = {-1.0f, 0.0f, 0.0f, 1.0f};
-static const float4 kfloat4_negyw = kfloat4_negxw.yxyw;
-static const float4 kfloat4_negzw = kfloat4_negxw.yyxw;
-
-static const float4 kfloat4_posxw = {1.0f, 0.0f, 0.0f, 1.0f};
-static const float4 kfloat4_posyw = kfloat4_posxw.yxyw;
-static const float4 kfloat4_poszw = kfloat4_posxw.yyxw;
-
-static const float4 kfloat4_negx = {-1.0f, 0.0f, 0.0f, 0.0f};
-static const float4 kfloat4_negy = kfloat4_negx.yxyy;
-static const float4 kfloat4_negz = kfloat4_negx.yyxy;
-static const float4 kfloat4_negw = kfloat4_negx.yyyx;
-
-static const float4 kfloat4_ones = kfloat4_posx.xxxx;
-static const float4 kfloat4_zero = {};
-
-//---------------------------
-
-static const float2x2 kfloat2x2_zero = {}; // what is this value 0, or default ctor
-static const float3x3 kfloat3x3_zero = {};
-static const float3x4 kfloat3x4_zero = {};
-static const float4x4 kfloat4x4_zero = {};
-
-static const float2x2 kfloat2x2_identity = diagonal_matrix(kfloat2_ones);
-static const float3x3 kfloat3x3_identity = diagonal_matrix(kfloat3_ones);
-static const float3x4 kfloat3x4_identity = diagonal_matrix3x4(kfloat3_ones);
-static const float4x4 kfloat4x4_identity = diagonal_matrix(kfloat4_ones);
-
-//---------------------------
-
-float4x4 float4x4m(const float3x4& m) {
-    float4x4 m44;
-    m44[0] = m[0];
-    m44[1] = m[1];
-    m44[2] = m[2];
-    m44[3] = float4_posw();
-    
-    return transpose(m44);
-}
-
-float3x4 float3x4m(const float4x4& m) {
-    float4x4 m44(transpose(m));
-    return (const float3x4&)m44;
-}
-
-//---------------------------
-
-// These should not be used often.  So can stay buried
-float2x2::float2x2(float2 diag)
-: float2x2s((const float2x2s&)diagonal_matrix(diag)) { }
-float3x3::float3x3(float3 diag)
-: float3x3s((const float3x3s&)diagonal_matrix(diag)) { }
-float3x4::float3x4(float3 diag)
-: float3x4s((const float3x4s&)diagonal_matrix3x4(diag)) { }
-float4x4::float4x4(float4 diag)
-: float4x4s((const float4x4s&)diagonal_matrix(diag)) { }
-
-//---------------------------
-
-float2x2 diagonal_matrix(float2 x) {
-    float4 xx = zeroext(x);
-    return float2x2(xx.xw, xx.wy);
+string vecf::str(double2 v) const {
+    return kram::format("(%f %f)", v.x, v.y);
 }
-float3x3 diagonal_matrix(float3 x) {
-    float4 xx = zeroext(x);
-    return float3x3(xx.xww, xx.wyw, xx.wwz);
+string vecf::str(double3 v) const {
+    return kram::format("(%f %f %f)", v.x, v.y, v.z);
 }
-float3x4 diagonal_matrix3x4(float3 x) {
-    float4 xx = zeroext(x);
-    return float3x4(xx.xwww, xx.wyww, xx.wwzw);
+string vecf::str(double4 v) const {
+    return kram::format("(%f %f %f %f)", v.x, v.y, v.z, v.w);
 }
-float4x4 diagonal_matrix(float4 x) {
-    float4 xx = x; xx.w = 0.0f;
-    float4 ww = xx; ww.z = x.w;
-    return float4x4(xx.xwww, xx.wyww, xx.wwzw, ww.wwwz);
+ 
+string vecf::str(const double2x2& m) const {
+    return kram::format("%s\n%s\n",
+        str(m[0]).c_str(), str(m[1]).c_str());
 }
-
-//--------------------------------------
-
-// textbook transpose
-float2x2 transpose(const float2x2& x) {
-    float4 x0, x1;
-    x0.xy = x[0];
-    x1.xy = x[1];
-#if SIMD_SSE
-    float4 r01 = _mm_unpacklo_ps(x0, x1);
-#else
-    float4 r01 = vzip1q_f32(x0, x1);
-#endif
-    return (float2x2){r01.lo, r01.hi};
+string vecf::str(const double3x3& m) const {
+    return kram::format("%s\n%s\n%s\n",
+        str(m[0]).c_str(), str(m[1]).c_str(), str(m[2]).c_str());
 }
-
-float3x3 transpose(const float3x3& x) {
-    float4 x0, x1, x2;
-    x0.xyz = x[0];
-    x1.xyz = x[1];
-    x2.xyz = x[2];
-#if SIMD_SSE
-    float4 t0 = _mm_unpacklo_ps(x0, x1);
-    float4 t1 = _mm_unpackhi_ps(x0, x1);
-    float4 r0 = t0; r0.hi = x2.lo;
-    float4 r1 = _mm_shuffle_ps(t0, x2, 0xde);
-    float4 r2 = x2; r2.lo = t1.lo;
-#else
-    float4 padding = { 0 };
-    float4 t0 = vzip1q_f32(x0,x2);
-    float4 t1 = vzip2q_f32(x0,x2);
-    float4 t2 = vzip1q_f32(x1,padding);
-    float4 t3 = vzip2q_f32(x1,padding);
-    float4 r0 = vzip1q_f32(t0,t2);
-    float4 r1 = vzip2q_f32(t0,t2);
-    float4 r2 = vzip1q_f32(t1,t3);
-#endif
-    return (float3x3){r0.xyz, r1.xyz, r2.xyz};
+string vecf::str(const double4x4& m) const {
+  return kram::format("%s\n%s\n%s\n%s\n",
+      str(m[0]).c_str(), str(m[1]).c_str(),
+      str(m[2]).c_str(), str(m[3]).c_str());
 }
 
-float4x4 transpose(const float4x4& x) {
-    // NOTE: also _MM_TRANSPOSE4_PS using shuffles
-    // but old Neon didn't really have shuffle, but
-    // and sse2neon is using combine instead of shuffle.
-
-//    float4x4 xt(x);
-//    _MM_TRANSPOSE4_PS(xt[0], xt[1], xt[2], xt[3]);
-//    return xt;
-    
-#if SIMD_SSE
-    float4 t0 = _mm_unpacklo_ps(x[0],x[2]);
-    float4 t1 = _mm_unpackhi_ps(x[0],x[2]);
-    float4 t2 = _mm_unpacklo_ps(x[1],x[3]);
-    float4 t3 = _mm_unpackhi_ps(x[1],x[3]);
-    float4 r0 = _mm_unpacklo_ps(t0,t2);
-    float4 r1 = _mm_unpackhi_ps(t0,t2);
-    float4 r2 = _mm_unpacklo_ps(t1,t3);
-    float4 r3 = _mm_unpackhi_ps(t1,t3);
-#else
-    float4 t0 = vzip1q_f32(x[0],x[2]);
-    float4 t1 = vzip2q_f32(x[0],x[2]);
-    float4 t2 = vzip1q_f32(x[1],x[3]);
-    float4 t3 = vzip2q_f32(x[1],x[3]);
-    float4 r0 = vzip1q_f32(t0,t2);
-    float4 r1 = vzip2q_f32(t0,t2);
-    float4 r2 = vzip1q_f32(t1,t3);
-    float4 r3 = vzip2q_f32(t1,t3);
 #endif
-    return (float4x4){r0,r1,r2,r3};
-}
-
-// inverse
-float2x2 inverse(const float2x2& x) {
-    float invDet = 1.0f / determinant(x);
-    if (invDet == 0.0f) return kfloat2x2_zero;
-    
-    float2x2 r = transpose(x);
-    r[0] *= invDet;
-    r[1] *= invDet;
-    return r;
-}
-
-float3x3 inverse(const float3x3& x) {
-    float invDet = 1.0f / determinant(x);
-    if (invDet == 0.0f) return kfloat3x3_zero;
-    
-    float3x3 r;
-    
-    // this forms the adjoint
-    r[0] = cross(x[1], x[2]) * invDet;
-    r[1] = cross(x[2], x[0]) * invDet;
-    r[2] = cross(x[0], x[1]) * invDet;
-    return r;
-}
-
-// std::swap has warning on aligned data
-inline void swap(float4& a, float4& b) {
-    float4 temp = a;
-    a = b;
-    b = temp;
-}
-
-float4x4 inverse(const float4x4& x) {
-    // This is a full gje inverse
-    
-    float4x4 a(x), b(kfloat4x4_identity);
-    bool inversionSucceeded = true;
-    
-    // As a evolves from original mat into identity -
-    // b evolves from identity into inverse(a)
-    int cols = float4x4::col;
-    int rows = float4x4::row;
-    
-    // Loop over cols of a from left to right, eliminating above and below diag
-    for (int j=0; j<rows; j++) {
-        // Find largest pivot in column j among rows j..2
-        int i1 = j;            // Row with largest pivot candidate
-        for (int i=j+1; i<cols; i++) {
-            if ( fabsf(a[i][j]) > fabsf(a[i1][j]) ) {
-                i1 = i;
-            }
-        }
-        
-        // Swap rows i1 and j in a and b to put pivot on diagonal
-        SIMD_NAMESPACE::swap(a[i1], a[j]);
-        SIMD_NAMESPACE::swap(b[i1], b[j]);
-    
-        // Scale row j to have a unit diagonal
-        float s = a[j][j];
-        if ( s == 0.0f ) {
-            inversionSucceeded = false;
-            break;
-        }
-        
-        s = 1.0f/s;
-        b[j] *= s;
-        a[j] *= s;
-    
-        // Eliminate off-diagonal elems in col j of a, doing identical ops to b
-        for (int i=0; i<cols; i++ ) {
-            if (i != j) {
-                s = a[i][j];
-                b[i] -= b[j] * s;
-                a[i] -= a[j] * s;
-            }
-        }
-    }
-    
-    if (!inversionSucceeded) {
-        b = kfloat4x4_zero;
-    }
-    
-    return b;
-}
-
-// determinant
-// internal only ops
-// TODO: could just be macros
-inline float3 rotate1(float3 x) { return x.yzx; }
-inline float3 rotate2(float3 x) { return x.zxy; }
-inline float4 rotate1(float4 x) { return x.yzwx; }
-inline float4 rotate2(float4 x) { return x.zwxy; }
-inline float4 rotate3(float4 x) { return x.wxyz; }
-
-float determinant(const float2x2& x) {
-    return cross(x[0], x[1]);
-}
-
-float determinant(const float3x3& x) {
-    return reduce_add(
-            x[0]*(rotate1(x[1])*rotate2(x[2]) - rotate2(x[1])*rotate1(x[2])));
-}
-
-float determinant(const float4x4& x) {
-    float4 codet = x[0]*(rotate1(x[1])*(rotate2(x[2])*rotate3(x[3])-rotate3(x[2])*rotate2(x[3])) +
-      rotate2(x[1])*(rotate3(x[2])*rotate1(x[3])-rotate1(x[2])*rotate3(x[3])) +
-      rotate3(x[1])*(rotate1(x[2])*rotate2(x[3])-rotate2(x[2])*rotate1(x[3])));
-    return reduce_add(codet.even - codet.odd);
-}
-
-// trace
-float trace(const float2x2& x) {
-    return x[0].x + x[1].y;
-}
-
-float trace(const float3x3& x) {
-    return x[0].x + x[1].y + x[2].z;
-}
-
-float trace(const float4x4& x) {
-    return x[0].x + x[1].y + x[2].z + x[3].w;
-}
-
-// TODO: may want pre-transform on float3x4 since it's transposed
-// 3 x m3x4 should = 3 element vec
-//
-// simd premul transform on left does a super expensive transpose to avoid dot
-// don't use this, should just dotproducts?
-//static   half2 mul(  half2 x,   half2x2 y) { return mul(transpose(y), x); }
-//
-//
-// Here's how to multiply matrices, since default ops won't do this.
-// be careful with operator* built-in.  Will do column by column mul won't it?
-// Maybe that's why *= is missing on matrices.
-//
-// This is taking each scalar of y[0], hopfully this extends and stays in vec op
-
-// premul-transform has to do dots
-float2 mul(float2 y, const float2x2& x) {
-    float2 r;
-    r.x = dot(y, x[0]);
-    r.y = dot(y, x[1]);
-    return r;
-}
-
-float3 mul(float3 y, const float3x3& x) {
-    float3 r;
-    r.x = dot(y, x[0]);
-    r.y = dot(y, x[1]);
-    r.z = dot(y, x[2]);
-    return r;
-}
-
-float4 mul(float4 y, const float4x4& x) {
-    float4 r;
-    r.x = dot(y, x[0]);
-    r.y = dot(y, x[1]);
-    r.z = dot(y, x[2]);
-    r.w = dot(y, x[3]);
-    return r;
-}
-
-
-// post-transform at least does a mul madd
-float2 mul(const float2x2& x, float2 y) {
-    float2 r = x[0] * y[0]; // no mul(v,v)
-    r = muladd( x[1], y[1], r);
-    return r;
-}
-
-float3 mul(const float3x3& x, float3 y) {
-    float3 r = x[0] * y[0];
-    r = muladd( x[1], y[1], r);
-    r = muladd( x[2], y[2], r);
-    return r;
-}
-
-float4 mul(const float4x4& x, float4 y) {
-    float4 r = x[0] * y[0];
-    r = muladd( x[1], y[1], r);
-    r = muladd( x[2], y[2], r);
-    r = muladd( x[3], y[3], r);
-    return r;
-}
-
-// matrix muls using mul madd
-float2x2 mul(const float2x2& x, const float2x2& y) {
-    float2x2 r;
-    
-    // m * columns
-    r[0] = mul(x, y[0]);
-    r[1] = mul(x, y[1]);
-    
-    return r;
-}
-
-float3x3 mul(const float3x3& x, const float3x3& y) {
-    float3x3 r;
-    r[0] = mul(x, y[0]);
-    r[1] = mul(x, y[1]);
-    r[2] = mul(x, y[2]);
-    return r;
-}
-
-float4x4 mul(const float4x4& x, const float4x4& y) {
-    float4x4 r;
-    r[0] = mul(x, y[0]);
-    r[1] = mul(x, y[1]);
-    r[2] = mul(x, y[2]);
-    r[3] = mul(x, y[3]);
-    return r;
-}
-
-// sub
-float2x2 sub(const float2x2& x, const float2x2& y) {
-    float2x2 r(x);
-    r[0] -= y[0];
-    r[1] -= y[1];
-    return r;
-}
-float3x3 sub(const float3x3& x, const float3x3& y) {
-    float3x3 r(x);
-    r[0] -= y[0];
-    r[1] -= y[1];
-    r[2] -= y[2];
-    return r;
-}
-float4x4 sub(const float4x4& x, const float4x4& y) {
-    float4x4 r(x);
-    r[0] -= y[0];
-    r[1] -= y[1];
-    r[2] -= y[2];
-    r[3] -= y[3];
-    return r;
-}
-
-// add
-float2x2 add(const float2x2& x, const float2x2& y) {
-    float2x2 r(x);
-    r[0] += y[0];
-    r[1] += y[1];
-    return r;
-}
-float3x3 add(const float3x3& x, const float3x3& y) {
-    float3x3 r(x);
-    r[0] += y[0];
-    r[1] += y[1];
-    r[2] += y[2];
-    return r;
-}
-float4x4 add(const float4x4& x, const float4x4& y) {
-    float4x4 r(x);
-    r[0] += y[0];
-    r[1] += y[1];
-    r[2] += y[2];
-    r[3] += y[3];
-    return r;
-}
-
-// equal
-bool equal(const float2x2& x, const float2x2& y) {
-    return all(x[0] == y[0] &
-               x[1] == y[1]);
-}
-bool equal(const float3x3& x, const float3x3& y) {
-    return all(x[0] == y[0] &
-               x[1] == y[1] &
-               x[2] == y[2]);
-}
-bool equal(const float4x4& x, const float4x4& y) {
-    return all(x[0] == y[0] &
-               x[1] == y[1] &
-               x[2] == y[2] &
-               x[3] == y[3]);
-}
-
-// equal_abs
-bool equal_abs(const float2x2& x, const float2x2& y, float tol) {
-    return all((abs(x[0] - y[0]) <= tol) &
-               (abs(x[1] - y[1]) <= tol));
-}
-bool equal_abs(const float3x3& x, const float3x3& y, float tol) {
-    return all((abs(x[0] - y[0]) <= tol) &
-               (abs(x[1] - y[1]) <= tol) &
-               (abs(x[2] - y[2]) <= tol));
-}
-bool equal_abs(const float4x4& x, const float4x4& y, float tol) {
-    return all((abs(x[0] - y[0]) <= tol) &
-               (abs(x[1] - y[1]) <= tol) &
-               (abs(x[2] - y[2]) <= tol) &
-               (abs(x[3] - y[3]) <= tol));
-}
-
-// equal_rel
-bool equal_rel(const float2x2& x, const float2x2& y, float tol) {
-    return all((abs(x[0] - y[0]) <= tol * abs(x[0])) &
-               (abs(x[1] - y[1]) <= tol * abs(x[1])));
-}
-bool equal_rel(const float3x3& x, const float3x3& y, float tol) {
-    return all((abs(x[0] - y[0]) <= tol * abs(x[0])) &
-               (abs(x[1] - y[1]) <= tol * abs(x[1])) &
-               (abs(x[2] - y[2]) <= tol * abs(x[2])));
-}
-bool equal_rel(const float4x4& x, const float4x4& y, float tol) {
-    return all((abs(x[0] - y[0]) <= tol * abs(x[0])) &
-               (abs(x[1] - y[1]) <= tol * abs(x[1])) &
-               (abs(x[2] - y[2]) <= tol * abs(x[2])) &
-               (abs(x[3] - y[3]) <= tol * abs(x[3])));
-}
-
-//----
-
-const float2& float2_zero(){ return kfloat2_zero; }
-const float2& float2_ones(){ return kfloat2_ones; }
-
-const float2& float2_posx(){ return kfloat2_posx; }
-const float2& float2_posy(){ return kfloat2_posy; }
-
-const float2& float2_negx(){ return kfloat2_negx; }
-const float2& float2_negy(){ return kfloat2_negy; }
-
-//----
-
-const float3& float3_zero(){ return kfloat3_zero; }
-const float3& float3_ones(){ return kfloat3_ones; }
-
-const float3& float3_posx(){ return kfloat3_posx; }
-const float3& float3_posy(){ return kfloat3_posy; }
-const float3& float3_posz(){ return kfloat3_posz; }
-
-const float3& float3_negx(){ return kfloat3_negx; }
-const float3& float3_negy(){ return kfloat3_negy; }
-const float3& float3_negz(){ return kfloat3_negz; }
-
-//----
-
-const float4& float4_zero(){ return kfloat4_zero; }
-const float4& float4_ones(){ return kfloat4_ones; }
-
-const float4& float4_posx(){ return kfloat4_posx; }
-const float4& float4_posy(){ return kfloat4_posy; }
-const float4& float4_posz(){ return kfloat4_posz; }
-const float4& float4_posw(){ return kfloat4_posw; }
-
-const float4& float4_negx(){ return kfloat4_negx; }
-const float4& float4_negy(){ return kfloat4_negy; }
-const float4& float4_negz(){ return kfloat4_negz; }
-const float4& float4_negw(){ return kfloat4_negw; }
-
-const float4& float4_posxw(){ return kfloat4_posxw; }
-const float4& float4_posyw(){ return kfloat4_posyw; }
-const float4& float4_poszw(){ return kfloat4_poszw; }
-
-const float4& float4_negxw(){ return kfloat4_negxw; }
-const float4& float4_negyw(){ return kfloat4_negyw; }
-const float4& float4_negzw(){ return kfloat4_negzw; }
-
-//---------------
-
-const float2x2& float2x2::zero() { return kfloat2x2_zero; }
-const float2x2& float2x2::identity() { return kfloat2x2_identity; }
-
-const float3x3& float3x3::zero() { return kfloat3x3_zero; }
-const float3x3& float3x3::identity() { return kfloat3x3_identity; }
-
-const float3x4& float3x4::zero() { return kfloat3x4_zero; }
-const float3x4& float3x4::identity() { return kfloat3x4_identity; }
-
-const float4x4& float4x4::zero() { return kfloat4x4_zero; }
-const float4x4& float4x4::identity() { return kfloat4x4_identity; }
 
+//-----------------------------
 
-//-------------------
+#if SIMD_FLOAT
 
 string vecf::str(float2 v) const {
     return kram::format("(%f %f)", v.x, v.y);
@@ -799,6 +164,8 @@ string vecf::str(const float4x4& m) const {
 
 #endif // SIMD_FLOAT
 
+//-----------------------------
+
 #define FMT_SEP() s += "-----------\n"
 
 string vecf::simd_configs() const {
@@ -1005,909 +372,7 @@ half4 half4m(float4 vv)
 #endif // SIMD_SSE
 #endif // SIMD_HALF4_ONLY
 
-#if SIMD_FLOAT
-
-static quatf quatf_zero(0.0f, 0.0f, 0.0f, 0.0f);
-static quatf quatf_identity(0.0f, 0.0f, 0.0f, 1.0f);
-
-const quatf& quatf::zero() { return quatf_zero; }
-const quatf& quatf::identity() { return quatf_identity; }
-
-// can negate w, or xyz with -kCross
-static const float4 kCross = {1.0f,1.0f,1.0f,-1.0f};
-
-// can eliminate 4 shufs by using 4 constants, 2 q swizzles are dupes
-static const float4 kConvertToMatrix = {0.0f,1.0f,2.0f,-2.0f};
-
-quatf::quatf(float3 axis, float angleInRadians)
-{
-    float s, c;
-    sincosf(angleInRadians * 0.5f, s, c);
-    v = float4m(s * axis, c);
-}
-
-quatf inverse(quatf q)
-{
-    return quatf(normalize(q.v * -kCross)); // vec *, but goiong to get quad mul below
-}
-
-quatf operator*(quatf q1, quatf q2)
-{
-    // This is original
-    //q1.y * q2.z - q1.z * q2.y + q1.x * q2.w + q1.w * q2.x,
-    //q1.z * q2.x - q1.x * q2.z + q1.y * q2.w + q1.w * q2.y,
-    //q1.x * q2.y - q1.y * q2.x + q1.z * q2.w + q1.w * q2.z,
-    //q1.w * q2.w - q1.x * q2.x - q1.y * q2.y - q1.z * q2.z
-    
-    // quaternion multiplication is similar to a four-space cross-product
-    float4 qv1 = q1.v;
-    float4 qv2 = q2.v;
-
-    // TODO: probably better to swizzle the kCross, but need 4 constants then
-    return quatf
-    (
-        dot((qv1 * qv2.wzyx).xywz, kCross), // -zy
-        dot((qv1 * qv2.zwxy).wyzx, kCross), // -xz
-        dot((qv1 * qv2.yxwz).xwzy, kCross), // -yx
-        dot(-qv1 * qv2, kCross) // +w, -xyz
-    );
-}
-
-float3x3 float3x3m(quatf qq)
-{
-    // not doing normalize like original
-    //q = normalize(q);
-    
-    float3x3 m;
-    float3 t;
-    
-    float4 c = kConvertToMatrix;
-    float4 q = qq.v;
-    q *= -kCross;
-    
-    t = q.wzy * c.wzw;
-    m[0] = q.yzx * t.zxy - q.zxy * t.yzx + c.yxx;
-    
-    t = q.zwx * c.wwz;
-    m[1] = q.yzx * t.zxy - q.zxy * t.yzx + c.xyx;
-    
-    // orthonormal basis, so use cross product for last term
-    m[2] = cross(m[0], m[1]); // 2 instr hlsl, 7 ops SSE
-    
-    return m;
-}
-
-float4x4 float4x4m(quatf qq)
-{
-    /* from Ken Shoemake's GGIV article
-    float xs = x*s,      ys = y*s,          zs = z*s;
-    float wx = w*xs,      wy = w*ys,     wz = w*zs;
-    float xx = x*xs,      xy = x*ys,     xz = x*zs;
-    float yy = y*ys,      yz = y*zs;
-    float zz = z*zs;
-   
-    // original formulation
-    return Mat44
-            (
-            1.0 - 2.0*(y*y + z*z),          2.0*(x*y - w*z),        2.0*(x*z + w*y), 0.0,
-                  2.0*(x*y + w*z), 1.0 - 2.0*(x*x + z*z),        2.0*(y*z - w*x), 0.0,
-                  2.0*(x*z - w*y),       2.0*(y*z + w*x),  1.0 - 2.0*(x*x + y*y), 0.0
-                  0.0,                                  0.0,                       0.0, 1.0
-            );
-   */
-
-    
-    // not doing normalize like original
-    //q = normalize(q);
-    
-    float4x4 m;
-    float4 t;
-
-    float4 c = kConvertToMatrix;
-    float4 q = qq.v;
-    q *= -kCross;
-
-    // really just 3 values, so w is always 0
-    t = q.wzyw * c.wzwx;
-    m[0] = q.yzxw * t.zxyw - q.zxyw * t.yzxw + c.yxxx;
-    
-    // really just 3 values, so w is always 0
-    t = q.zwxw * c.wwzx;
-    m[1] = q.yzxw * t.zxyw - q.zxyw * t.yzxw + c.xyxx;
-    
-    // orthonormal basis, so use cross product for last term
-    m[2] = float4m(cross(m[0].xyz, m[1].xyz), 0.0f); // 2 instr hlsl, 7 ops SSE
-    m[3] = float4_posw();
-
-    return m;
-}
-
-// Slow but constant velocity.
-quatf slerp(quatf q0, quatf q1, float t)
-{
-    // find smallest angle, flip axis
-    if (dot(q0.v, q1.v) < 0.0f)
-        q1.v.xyz = -q1.v.xyz;
-    
-    // theta/2
-    float cosThetaHalf = dot(q0.v, q1.v);
-    if (cosThetaHalf >= 0.995f)
-    {
-        return lerp(q0,q1,t);
-    }
-    else
-    {
-        // expensive
-        float thetaHalf = acosf(cosThetaHalf);
-        float sinThetaHalf = sinf(thetaHalf);
-
-        float s0 = sinf(thetaHalf * (1-t)) / sinThetaHalf;  // at t=0, s0 = 1
-        float s1 = sinf(thetaHalf * t)     / sinThetaHalf;  // at t=1, s1 = 1
-
-        return quatf(s0 * q0.v+ s1 * q1.v);
-    }
-}
-
-// compute control points for a bezier spline segment
-inline void quat_bezier_cp_impl(quatf q0, quatf q1, quatf q2,
-              quatf& a1, quatf& b1)
-{
-    // TODO: find out of these were quat or vec mul?
-    // Double(q0, q1);
-    a1.v = 2.0f * dot(q0.v,q1.v) * q1.v - q0.v;
-    
-    // Bisect(a1, q2);
-    a1.v = (a1.v + q2.v);
-    a1.v = normalize(a1.v);
-    
-    // Double(a1, q1);
-    b1.v = 2.0f * dot(a1.v,q1.v) * q1.v - a1.v;
-}
-
-
-// compute control points for a cubic bezier spline segment (quats must be smallest angle)
-void quat_bezier_cp(quatf q0, quatf q1, quatf q2, quatf q3,
-                     quatf& a1, quatf& b2)
-{
-    quatf b1, a2; // b1 unused calc
-    quat_bezier_cp_impl(q0,q1,q1, a1,b1);
-    quat_bezier_cp_impl(q1,q2,q3, a2,b2);
-}
-
-
-// spherical cubic bezier spline interpolation
-// takes in contol points
-quatf quat_bezer_slerp(quatf q0, quatf b, quatf c, quatf q1, float t)
-{
-    // deCastljau interpolation of the control points
-    quatf mid(slerp(b, c, t));
-
-    return slerp(slerp(slerp(q0, b, t), mid, t),
-                 slerp(mid, slerp(c, q1, t), t),
-        t);
-}
-
-// spherical cubic bezier spline interpolation
-// takes in contol points
-quatf quat_bezer_lerp(quatf q0, quatf b, quatf c, quatf q1, float t)
-{
-    // deCastljau interpolation of the control points
-    quatf mid(lerp(b, c, t));
-
-    return lerp(
-                 lerp(lerp(q0, b, t), mid, t),
-                 lerp(mid, lerp(c, q1, t), t),
-        t);
-}
-
-// ----------------------
-
-void transpose_affine(float4x4& m)
-{
-    // TODO: see other tranpsose not using shuffles and do that.
-    // TODO: use platform shuffles on Neon
-    
-    // avoid copy and one shuffle
-    float4 tmp3, tmp2, tmp1, tmp0;
-                   
-    // using sse2neon to port this
-    tmp0 = _mm_shuffle_ps(m[0], m[1], 0x44);
-    tmp2 = _mm_shuffle_ps(m[0], m[1], 0xEE);
-
-    tmp1 = _mm_shuffle_ps(m[2], m[3], 0x44);
-    tmp3 = _mm_shuffle_ps(m[2], m[3], 0xEE);
-                                                            
-    m[0] = _mm_shuffle_ps(tmp0, tmp1, 0x88);
-    m[1] = _mm_shuffle_ps(tmp0, tmp1, 0xDD);
-    m[2] = _mm_shuffle_ps(tmp2, tmp3, 0x88);
-
-    // skips m[3] - known 0001
-}
-
-float4x4 inverse_tr(const float4x4& mtx)
-{
-    float4x4 inverse(mtx);
-    inverse[3] = float4_negw();  // will be flipped by matrix mul
-    transpose_affine(inverse);  // handle rotation (R)inv = (R)T
-    
-    inverse[3] = inverse * (-mtx[3]); // 1 mul, 3 mads
-
-    return inverse;
-}
-
-// invert a row vector matrix
-float4x4 inverse_tru(const float4x4& mtx)
-{
-    bool success = false;
-    
-    float scaleX = length_squared(mtx[0]);
-    
-    float4x4 inverse(mtx);
-    if (scaleX > 1e-6f) {
-        inverse[3] = float4_negw();
-        
-        transpose_affine(inverse);
-
-        // all rows/columns in orthogonal tfm have same magnitude with uniform scale
-        float4 invScaleX = float4m(1.0f / scaleX); // inverse squared
-        
-        // scale the rotation matrix by scaling inverse
-        inverse[0] *= invScaleX;
-        inverse[1] *= invScaleX;
-        inverse[2] *= invScaleX;
-
-        // handle the translation
-        inverse[3] = inverse * (-mtx[3]);
-        
-        success = true;
-        macroUnusedVar(success);
-    }
-    
-    return inverse;
-}
-
-float4x4 float4x4_tr(float3 t, quatf r) {
-    float4x4 m(float4x4::identity());
-    m[3].xyz = t;
-    
-    m *= float4x4m(r);
-    return m;
-}
-
-// TODO: there are faster ways to apply post rot, post scale
-float4x4 float4x4_trs(float3 t, quatf r, float3 scale) {
-    float4x4 m(float4x4::identity());
-    m[3].xyz = t;
-    m = m * float4x4m(r);
-    
-    m *= float4x4(float4m(scale,1.0f));
-    return m;
-}
-
-// leaving this in here, since it can be further optimized
-float4x4 float4x4_tru(float3 t, quatf r, float scale) {
-    return float4x4_trs(t, r, float3m(scale));
-}
-
-float4x4 inverse_trs(const float4x4& mtx)
-{
-    bool success = false;
-    
-    float4x4 inverse(mtx);
-    
-    // TODO: fix handling of failure
-    // compute the scaling terms (4 dots)
-    // float3 scale = calcScaleSquaredRowTfm(m);
-    // if (all(scale > float3(1e-6f)) {
-        inverse[3] = float4_negw(); // neccessary for simple inverse to hold
-    
-        transpose_affine(inverse);
-
-        // this is cheaper than 3 dot's above, just mul/add
-        float4 invScale = recip(inverse[0]*inverse[0] +
-                                inverse[1]*inverse[1] +
-                                inverse[2]*inverse[2]);
-    
-        // scale the rotation matrix by scaling inverse
-        inverse[0] *= invScale;
-        inverse[1] *= invScale;
-        inverse[2] *= invScale;
-        inverse[3] = inverse * (-mtx[3]);
-        
-        success = true;
-    macroUnusedVar(success);
-    //}
-    
-    return inverse;
-}
-
-float4x4 float4x4m(char axis, float angleInRadians)
-{
-    float    sinTheta, cosTheta;
-    sincosf(angleInRadians, sinTheta, cosTheta);
-            
-    float4x4 m;
-    m[3] = float4_posw();
-            
-    switch(axis) {
-        case 'x':
-        {
-            m[0] = float4_posx();
-            m[1] = float4m(0.0f,  cosTheta, sinTheta, 0.0f);
-            m[2] = float4m(0.0f, -sinTheta, cosTheta, 0.0f);
-            break;
-        }
-        
-        case 'y':
-        {
-            m[0] = float4m(cosTheta, 0.0f, -sinTheta, 0.0f);
-            m[1] = float4_posy();
-            m[2] = float4m(sinTheta, 0.0f,  cosTheta, 0.0f);
-            break;
-        }
-        
-        case 'z':
-        {
-            m[0] = float4m( cosTheta, sinTheta, 0.0f, 0.0f);
-            m[1] = float4m(-sinTheta, cosTheta, 0.0f, 0.0f);
-            m[2] = float4_posz();
-            break;
-        }
-    }
-    return m;
-}
-
-
-#endif // SIMD_FLOAT
-
-} // namespace SIMD_NAMESPACE
-
-// --------------------------
-// TODO: break into own file with double ops
-
-
-namespace SIMD_NAMESPACE {
-
-#if SIMD_DOUBLE
-
-//---------------------------
-
-static const double2x2 kdouble2x2_zero = {}; // what is this value 0, or default ctor
-static const double3x3 kdouble3x3_zero = {};
-static const double3x4 kdouble3x4_zero = {};
-static const double4x4 kdouble4x4_zero = {};
-
-static const double2x2 kdouble2x2_identity = diagonal_matrix((double2)1);
-static const double3x3 kdouble3x3_identity = diagonal_matrix((double3)1);
-static const double3x4 kdouble3x4_identity = diagonal_matrix3x4((double3)1);
-static const double4x4 kdouble4x4_identity = diagonal_matrix((double4)1);
-
-//---------------------------
-
-const double2x2& double2x2::zero() { return kdouble2x2_zero; }
-const double2x2& double2x2::identity() { return kdouble2x2_identity; }
-
-const double3x3& double3x3::zero() { return kdouble3x3_zero; }
-const double3x3& double3x3::identity() { return kdouble3x3_identity; }
-
-const double3x4& double3x4::zero() { return kdouble3x4_zero; }
-const double3x4& double3x4::identity() { return kdouble3x4_identity; }
-
-const double4x4& double4x4::zero() { return kdouble4x4_zero; }
-const double4x4& double4x4::identity() { return kdouble4x4_identity; }
-
-//---------------------------
-
-// These should not be used often.  So can stay buried
-double2x2::double2x2(double2 diag)
-: double2x2s((const double2x2s&)diagonal_matrix(diag)) { }
-double3x3::double3x3(double3 diag)
-: double3x3s((const double3x3s&)diagonal_matrix(diag)) { }
-double3x4::double3x4(double3 diag)
-: double3x4s((const double3x4s&)diagonal_matrix3x4(diag)) { }
-double4x4::double4x4(double4 diag)
-: double4x4s((const double4x4s&)diagonal_matrix(diag)) { }
-
-//---------------------------
-
-double2x2 diagonal_matrix(double2 x) {
-    double4 xx = zeroext(x);
-    return double2x2(xx.xw, xx.wy);
-}
-double3x3 diagonal_matrix(double3 x) {
-    double4 xx = zeroext(x);
-    return double3x3(xx.xww, xx.wyw, xx.wwz);
-}
-double3x4 diagonal_matrix3x4(double3 x) {
-    double4 xx = zeroext(x);
-    return double3x4(xx.xwww, xx.wyww, xx.wwzw);
-}
-double4x4 diagonal_matrix(double4 x) {
-    double4 xx = x; xx.w = 0.0f;
-    double4 ww = xx; ww.z = x.w;
-    return double4x4(xx.xwww, xx.wyww, xx.wwzw, ww.wwwz);
-}
-
-//---------------------------
-
-string vecf::str(double2 v) const {
-    return kram::format("(%f %f)", v.x, v.y);
-}
-string vecf::str(double3 v) const {
-    return kram::format("(%f %f %f)", v.x, v.y, v.z);
-}
-string vecf::str(double4 v) const {
-    return kram::format("(%f %f %f %f)", v.x, v.y, v.z, v.w);
-}
- 
-string vecf::str(const double2x2& m) const {
-    return kram::format("%s\n%s\n",
-        str(m[0]).c_str(), str(m[1]).c_str());
-}
-string vecf::str(const double3x3& m) const {
-    return kram::format("%s\n%s\n%s\n",
-        str(m[0]).c_str(), str(m[1]).c_str(), str(m[2]).c_str());
-}
-string vecf::str(const double4x4& m) const {
-  return kram::format("%s\n%s\n%s\n%s\n",
-      str(m[0]).c_str(), str(m[1]).c_str(),
-      str(m[2]).c_str(), str(m[3]).c_str());
-}
-
-//-----------------------------
-
-// textbook transpose 
-double2x2 transpose(const double2x2& x) {
-    
-    
-    // std::swap would seem faster here?
-#if SIMD_SSE
-#ifdef __AVX2__
-    double4 x0, x1;
-    x0.xy = x[0];
-    x1.xy = x[1];
-    
-    double4 r01 = _mm256_unpacklo_pd(x0, x1);
-    return (double2x2){r01.lo, r01.hi};
-#else
-    double2 x0, x1;
-    x0.xy = x[0];
-    x1.xy = x[1];
-    
-    // super slow transpose
-    double2 r0 = { x0[0], x1[0] };
-    double2 r1 = { x0[1], x1[1] };
-    return (double2x2){r0, r1};
-#endif
-#endif // SIMD_SSE
-    
-#if SIMD_NEON
-    double2 r0 = vzip1q_f64(x[0], x[1]);
-    double2 r1 = vzip2q_f64(x[0], x[1]);
-    return (double2x2){r0, r1};
-#endif // SIMD_NEON
-}
-
-double3x3 transpose(const double3x3& x) {
-    double4 x0, x1, x2;
-    x0.xyz = x[0];
-    x1.xyz = x[1];
-    x2.xyz = x[2];
-    
-#if SIMD_SSE
-#if defined( __AVX2__) && 0
-    double4 t0 = _mm256_unpacklo_pd(x0, x1);
-    double4 t1 = _mm256_unpackhi_pd(x0, x1);
-    
-    double4 r0 = t0; r0.hi = x2.lo;
-    // TODO: fix shuffle,  222 outside 15 range
-    // looks like op was changed to 4-bit bitmask
-    // lookup shuffle 4 values, and convert this
-    //
-    // 0xde = _MM_SHUFFLE(x,y,z,w)
-    // #define _MM_SHUFFLE(fp3, fp2, fp1, fp0) \
-        (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0)))
-    // fp0 to fp3 = 2, 3, 1, 3
-    
-    double4 r1 = _mm256_shuffle_pd(t0, x2, 0xde);
-    double4 r2 = x2; r2.lo = t1.lo;
-#else
-    // super slow transpose
-    double3 r0 = { x0[0], x1[0], x2[0] };
-    double3 r1 = { x0[1], x1[1], x2[1] };
-    double3 r2 = { x0[2], x1[2], x2[2] };
-#endif
-#endif // SIMD_SSE
-    
-#if SIMD_NEON
-    double2 padding = { 0 };
-    double4 r0,r1,r2;
-    r0.lo = vzip1q_f64(x0.lo,x1.lo);
-    r1.lo = vzip2q_f64(x0.lo,x1.lo);
-    r2.lo = vzip1q_f64(x0.hi,x1.hi);
-    r0.hi = vzip1q_f64(x2.lo,padding);
-    r1.hi = vzip2q_f64(x2.lo,padding);
-    r2.hi = vzip1q_f64(x2.hi,padding);
-#endif // SIMD_NEON
-    return (double3x3){r0.xyz, r1.xyz, r2.xyz};
-}
-
-double4x4 transpose(const double4x4& x) {
-    // NOTE: also _MM_TRANSPOSE4_PS using shuffles
-    // but old Neon didn't really have shuffle.
-
-#if SIMD_SSE
-#ifdef __AVX2__
-    
-// using shuffles + permute
-//    double4 tmp0, tmp1, tmp2, tmp3;
-//    tmp0 = _mm256_shuffle_pd(row0, row1, 0x0);
-//    tmp2 = _mm256_shuffle_pd(row0, row1, 0xF);
-//    tmp1 = _mm256_shuffle_pd(row2, row3, 0x0);
-//    tmp3 = _mm256_shuffle_pd(row2, row3, 0xF);
-//
-//    double4 x0, x1, x2, x3;
-//    r0 = _mm256_permute2f128_pd(tmp0, tmp1, 0x20);
-//    r1 = _mm256_permute2f128_pd(tmp2, tmp3, 0x20);
-//    r2 = _mm256_permute2f128_pd(tmp0, tmp1, 0x31);
-//    r3 = _mm256_permute2f128_pd(tmp2, tmp3, 0x31);
-    
-// or unpack
-    
-    double4 t0 = _mm256_unpacklo_pd(x[0],x[2]);
-    double4 t1 = _mm256_unpackhi_pd(x[0],x[2]);
-    double4 t2 = _mm256_unpacklo_pd(x[1],x[3]);
-    double4 t3 = _mm256_unpackhi_pd(x[1],x[3]);
-
-    double4 r0 = _mm256_unpacklo_pd(t0,t2);
-    double4 r1 = _mm256_unpackhi_pd(t0,t2);
-    double4 r2 = _mm256_unpacklo_pd(t1,t3);
-    double4 r3 = _mm256_unpackhi_pd(t1,t3);
-    
-#else
-    // super slow transpose
-    double4 x0, x1, x2, x3;
-    x0 = x[0];
-    x1 = x[1];
-    x2 = x[2];
-    x3 = x[2];
-    
-    // TODO: avx2 probably has double shuffle
-    double4 r0 = { x0[0], x1[0], x2[0], x3[0] };
-    double4 r1 = { x0[1], x1[1], x2[1], x3[1] };
-    double4 r2 = { x0[2], x1[2], x2[2], x3[2] };
-    double4 r3 = { x0[3], x1[3], x2[3], x3[3] };
-#endif
-#endif // SIMD_SSE
-    
-#if  SIMD_NEON
-    double4 r0,r1,r2,r3;
-    r0.lo = vzip1q_f64(x[0].lo,x[1].lo);
-    r1.lo = vzip2q_f64(x[0].lo,x[1].lo);
-    r2.lo = vzip1q_f64(x[0].hi,x[1].hi);
-    r3.lo = vzip2q_f64(x[0].hi,x[1].hi);
-    r0.hi = vzip1q_f64(x[2].lo,x[3].lo);
-    r1.hi = vzip2q_f64(x[2].lo,x[3].lo);
-    r2.hi = vzip1q_f64(x[2].hi,x[3].hi);
-    r3.hi = vzip2q_f64(x[2].hi,x[3].hi);
-#endif
-    return (double4x4){r0,r1,r2,r3};
-}
-
-// inverse
-double2x2 inverse(const double2x2& x) {
-    double invDet = 1.0f / determinant(x);
-    if (invDet == 0.0f) return kdouble2x2_zero;
-    
-    double2x2 r = transpose(x);
-    r[0] *= invDet;
-    r[1] *= invDet;
-    return r;
-}
-
-double3x3 inverse(const double3x3& x) {
-    double invDet = 1.0f / determinant(x);
-    if (invDet == 0.0f) return kdouble3x3_zero;
-    
-    double3x3 r;
-    
-    // this forms the adjoint
-    r[0] = cross(x[1], x[2]) * invDet;
-    r[1] = cross(x[2], x[0]) * invDet;
-    r[2] = cross(x[0], x[1]) * invDet;
-    return r;
-}
-
-// std::swap has warning on aligned data
-inline void swap(double4& a, double4& b) {
-    double4 temp = a;
-    a = b;
-    b = temp;
-}
-
-double4x4 inverse(const double4x4& x) {
-    // This is a full gje inverse
-    
-    double4x4 a(x), b(kdouble4x4_identity);
-    bool inversionSucceeded = true;
-    
-    // As a evolves from original mat into identity -
-    // b evolves from identity into inverse(a)
-    int cols = double4x4::col;
-    int rows = double4x4::row;
-    
-    // Loop over cols of a from left to right, eliminating above and below diag
-    for (int j=0; j<rows; j++) {
-        // Find largest pivot in column j among rows j..2
-        int i1 = j;            // Row with largest pivot candidate
-        for (int i=j+1; i<cols; i++) {
-            if ( fabs(a[i][j]) > fabs(a[i1][j]) ) {
-                i1 = i;
-            }
-        }
-        
-        // Swap rows i1 and j in a and b to put pivot on diagonal
-        SIMD_NAMESPACE::swap(a[i1], a[j]);
-        SIMD_NAMESPACE::swap(b[i1], b[j]);
-    
-        // Scale row j to have a unit diagonal
-        double s = a[j][j];
-        if ( s == 0.0f ) {
-            inversionSucceeded = false;
-            break;
-        }
-        
-        s = 1.0f/s;
-        b[j] *= s;
-        a[j] *= s;
-    
-        // Eliminate off-diagonal elems in col j of a, doing identical ops to b
-        for (int i=0; i<cols; i++ ) {
-            if (i != j) {
-                s = a[i][j];
-                b[i] -= b[j] * s;
-                a[i] -= a[j] * s;
-            }
-        }
-    }
-    
-    if (!inversionSucceeded) {
-        b = kdouble4x4_zero;
-    }
-    
-    return b;
-}
-
-
-// determinant
-// internal only ops
-// TODO: could just be macros
-inline double3 rotate1(double3 x) { return x.yzx; }
-inline double3 rotate2(double3 x) { return x.zxy; }
-inline double4 rotate1(double4 x) { return x.yzwx; }
-inline double4 rotate2(double4 x) { return x.zwxy; }
-inline double4 rotate3(double4 x) { return x.wxyz; }
-
-double determinant(const double2x2& x) {
-    return cross(x[0], x[1]);
-}
-
-double determinant(const double3x3& x) {
-    return reduce_add(
-            x[0]*(rotate1(x[1])*rotate2(x[2]) - rotate2(x[1])*rotate1(x[2])));
-}
-
-double determinant(const double4x4& x) {
-    double4 codet = x[0]*(rotate1(x[1])*(rotate2(x[2])*rotate3(x[3])-rotate3(x[2])*rotate2(x[3])) +
-      rotate2(x[1])*(rotate3(x[2])*rotate1(x[3])-rotate1(x[2])*rotate3(x[3])) +
-      rotate3(x[1])*(rotate1(x[2])*rotate2(x[3])-rotate2(x[2])*rotate1(x[3])));
-    return reduce_add(codet.even - codet.odd);
-}
-
-// trace
-double trace(const double2x2& x) {
-    return x[0].x + x[1].y;
-}
-
-double trace(const double3x3& x) {
-    return x[0].x + x[1].y + x[2].z;
-}
-
-double trace(const double4x4& x) {
-    return x[0].x + x[1].y + x[2].z + x[3].w;
-}
-
-// TODO: may want pre-transform on double3x4 since it's transposed
-// 3 x m3x4 should = 3 element vec
-//
-// simd premul transform on left does a super expensive transpose to avoid dot
-// don't use this, should just dotproducts?
-//static   half2 mul(  half2 x,   half2x2 y) { return mul(transpose(y), x); }
-//
-//
-// Here's how to multiply matrices, since default ops won't do this.
-// be careful with operator* built-in.  Will do column by column mul won't it?
-// Maybe that's why *= is missing on matrices.
-//
-// This is taking each scalar of y[0], hopfully this extends and stays in vec op
-
-// premul-transform has to do dots
-double2 mul(double2 y, const double2x2& x) {
-    double2 r;
-    r.x = dot(y, x[0]);
-    r.y = dot(y, x[1]);
-    return r;
-}
-
-double3 mul(double3 y, const double3x3& x) {
-    double3 r;
-    r.x = dot(y, x[0]);
-    r.y = dot(y, x[1]);
-    r.z = dot(y, x[2]);
-    return r;
-}
-
-double4 mul(double4 y, const double4x4& x) {
-    double4 r;
-    r.x = dot(y, x[0]);
-    r.y = dot(y, x[1]);
-    r.z = dot(y, x[2]);
-    r.w = dot(y, x[3]);
-    return r;
-}
-
-
-// post-transform at least does a mul madd
-double2 mul(const double2x2& x, double2 y) {
-    double2 r = x[0] * y[0]; // no mul(v,v)
-    r = muladd( x[1], y[1], r);
-    return r;
-}
-
-double3 mul(const double3x3& x, double3 y) {
-    double3 r = x[0] * y[0];
-    r = muladd( x[1], y[1], r);
-    r = muladd( x[2], y[2], r);
-    return r;
-}
-
-double4 mul(const double4x4& x, double4 y) {
-    double4 r = x[0] * y[0];
-    r = muladd( x[1], y[1], r);
-    r = muladd( x[2], y[2], r);
-    r = muladd( x[3], y[3], r);
-    return r;
-}
-
-// matrix muls using mul madd
-double2x2 mul(const double2x2& x, const double2x2& y) {
-    double2x2 r;
-    
-    // m * columns
-    r[0] = mul(x, y[0]);
-    r[1] = mul(x, y[1]);
-    
-    return r;
-}
-
-double3x3 mul(const double3x3& x, const double3x3& y) {
-    double3x3 r;
-    r[0] = mul(x, y[0]);
-    r[1] = mul(x, y[1]);
-    r[2] = mul(x, y[2]);
-    return r;
-}
-
-double4x4 mul(const double4x4& x, const double4x4& y) {
-    double4x4 r;
-    r[0] = mul(x, y[0]);
-    r[1] = mul(x, y[1]);
-    r[2] = mul(x, y[2]);
-    r[3] = mul(x, y[3]);
-    return r;
-}
-
-// sub
-double2x2 sub(const double2x2& x, const double2x2& y) {
-    double2x2 r(x);
-    r[0] -= y[0];
-    r[1] -= y[1];
-    return r;
-}
-double3x3 sub(const double3x3& x, const double3x3& y) {
-    double3x3 r(x);
-    r[0] -= y[0];
-    r[1] -= y[1];
-    r[2] -= y[2];
-    return r;
-}
-double4x4 sub(const double4x4& x, const double4x4& y) {
-    double4x4 r(x);
-    r[0] -= y[0];
-    r[1] -= y[1];
-    r[2] -= y[2];
-    r[3] -= y[3];
-    return r;
-}
-
-// add
-double2x2 add(const double2x2& x, const double2x2& y) {
-    double2x2 r(x);
-    r[0] += y[0];
-    r[1] += y[1];
-    return r;
-}
-double3x3 add(const double3x3& x, const double3x3& y) {
-    double3x3 r(x);
-    r[0] += y[0];
-    r[1] += y[1];
-    r[2] += y[2];
-    return r;
-}
-double4x4 add(const double4x4& x, const double4x4& y) {
-    double4x4 r(x);
-    r[0] += y[0];
-    r[1] += y[1];
-    r[2] += y[2];
-    r[3] += y[3];
-    return r;
-}
-
-// equal
-bool equal(const double2x2& x, const double2x2& y) {
-    return all(x[0] == y[0] &
-               x[1] == y[1]);
-}
-bool equal(const double3x3& x, const double3x3& y) {
-    return all(x[0] == y[0] &
-               x[1] == y[1] &
-               x[2] == y[2]);
-}
-bool equal(const double4x4& x, const double4x4& y) {
-    return all(x[0] == y[0] &
-               x[1] == y[1] &
-               x[2] == y[2] &
-               x[3] == y[3]);
-}
-
-// equal_abs
-bool equal_abs(const double2x2& x, const double2x2& y, double tol) {
-    return all((abs(x[0] - y[0]) <= tol) &
-               (abs(x[1] - y[1]) <= tol));
-}
-bool equal_abs(const double3x3& x, const double3x3& y, double tol) {
-    return all((abs(x[0] - y[0]) <= tol) &
-               (abs(x[1] - y[1]) <= tol) &
-               (abs(x[2] - y[2]) <= tol));
-}
-bool equal_abs(const double4x4& x, const double4x4& y, double tol) {
-    return all((abs(x[0] - y[0]) <= tol) &
-               (abs(x[1] - y[1]) <= tol) &
-               (abs(x[2] - y[2]) <= tol) &
-               (abs(x[3] - y[3]) <= tol));
-}
-
-// equal_rel
-bool equal_rel(const double2x2& x, const double2x2& y, double tol) {
-    return all((abs(x[0] - y[0]) <= tol * abs(x[0])) &
-               (abs(x[1] - y[1]) <= tol * abs(x[1])));
-}
-bool equal_rel(const double3x3& x, const double3x3& y, double tol) {
-    return all((abs(x[0] - y[0]) <= tol * abs(x[0])) &
-               (abs(x[1] - y[1]) <= tol * abs(x[1])) &
-               (abs(x[2] - y[2]) <= tol * abs(x[2])));
-}
-bool equal_rel(const double4x4& x, const double4x4& y, double tol) {
-    return all((abs(x[0] - y[0]) <= tol * abs(x[0])) &
-               (abs(x[1] - y[1]) <= tol * abs(x[1])) &
-               (abs(x[2] - y[2]) <= tol * abs(x[2])) &
-               (abs(x[3] - y[3]) <= tol * abs(x[3])));
-}
-
-#endif // SIMD_DOUBLE
-
 } // namespace SIMD_NAMESPACE
-
 #endif // USE_SIMDLIB
 
 
diff --git a/libkram/vectormath/vectormath++.h b/libkram/vectormath/vectormath++.h
index 70a3e723..3c27d091 100644
--- a/libkram/vectormath/vectormath++.h
+++ b/libkram/vectormath/vectormath++.h
@@ -158,6 +158,15 @@
 // Whether to support > 4 length vecs with some ops
 #define SIMD_FLOAT_EXT 0
 
+// controls over acclerate vs. func calls
+#ifdef __APPLE__
+#define SIMD_ACCELERATE_MATH 1
+#define SIMD_CMATH_MATH      0
+#else
+#define SIMD_ACCELERATE_MATH 0
+#define SIMD_CMATH_MATH      1
+#endif
+
 // This means simd_float4 will come from this file instead of simd.h
 #define SIMD_ACCELERATE_MATH_NAMES 0
 
@@ -321,6 +330,36 @@ SIMD_CALL type::column_t operator*(const type& x, const type::column_t& v) { ret
 
 //-----------------------------------
 
+#if SIMD_ACCELERATE_MATH
+
+// remap simdk to simd namespace
+#define macroVectorRepeatFnImpl(type, cppfunc) \
+type##2 cppfunc(type##2 x) { return simd::cppfunc(x); } \
+type##3 cppfunc(type##3 x) { return simd::cppfunc(x); } \
+type##4 cppfunc(type##4 x) { return simd::cppfunc(x); } \
+
+#endif // SIMD_ACCELERATE_MATH
+
+// These are large functions that can be buried and optimized in the .cpp
+// Has alternate cmath form it uses for now.  Look into ISPC calls to
+// replace some of this.
+
+//-----------------------------------
+
+#if SIMD_CMATH_MATH
+
+// This calls function repeatedly, then returns as vector.
+// These don't call to the 4 version since it's so much more work.
+#define macroVectorRepeatFnImpl(type, cppfunc, func) \
+type##1 cppfunc(type##1 x) { return func(x); } \
+type##2 cppfunc(type##2 x) { return {func(x.x), func(x.y)}; } \
+type##3 cppfunc(type##3 x) { return {func(x.x), func(x.y), func(x.z)}; } \
+type##4 cppfunc(type##4 x) { return {func(x.x), func(x.y), func(x.z), func(x.w)}; } \
+
+#endif // SIMD_CMATH_MATH
+
+//-----------------------------------
+
 #include <inttypes.h> // for u/long
 #include <math.h>     // for sqrt, sqrtf
 

From 3df265286e069d404e42663c74d02fc64572b5ad Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 1 Oct 2024 08:42:49 -0700
Subject: [PATCH 758/901] kram - simd - cleanup config, and constants

---
 libkram/vectormath/double234.cpp    | 102 +++++++++++++++++++-
 libkram/vectormath/double234.h      |  50 +++++++++-
 libkram/vectormath/float234.cpp     | 143 ++++++++++++++--------------
 libkram/vectormath/vectormath++.cpp |   4 +-
 libkram/vectormath/vectormath++.h   |   8 +-
 5 files changed, 230 insertions(+), 77 deletions(-)

diff --git a/libkram/vectormath/double234.cpp b/libkram/vectormath/double234.cpp
index d52358cc..c21707fb 100644
--- a/libkram/vectormath/double234.cpp
+++ b/libkram/vectormath/double234.cpp
@@ -4,7 +4,7 @@
 #include "vectormath++.h"
 
 // This has to include this, not double234.h
-#if USE_SIMDLIB && USE_DOUBLE
+#if USE_SIMDLIB && SIMD_DOUBLE
 
 #if SIMD_ACCELERATE_MATH
 // TODO: reduce this header to just calls use (f.e. geometry, etc)
@@ -34,6 +34,53 @@ macroVectorRepeatFnImpl(double, tan, ::tan)
 
 //---------------------------
 
+static const double2 kdouble2_posx = {1.0f, 0.0f};
+static const double2 kdouble2_posy = kdouble2_posx.yx;
+
+static const double2 kdouble2_negx = {-1.0f, 0.0f};
+static const double2 kdouble2_negy = kdouble2_negx.yx;
+
+static const double2 kdouble2_ones = kdouble2_posx.xx;
+static const double2 kdouble2_zero = {};
+
+//----
+
+static const double3 kdouble3_posx = {1.0f, 0.0f, 0.0f};
+static const double3 kdouble3_posy = kdouble3_posx.yxy;
+static const double3 kdouble3_posz = kdouble3_posx.yyx;
+
+static const double3 kdouble3_negx = {-1.0f, 0.0f, 0.0f};
+static const double3 kdouble3_negy = kdouble3_negx.yxy;
+static const double3 kdouble3_negz = kdouble3_negx.yyx;
+
+static const double3 kdouble3_ones = kdouble3_posx.xxx;
+static const double3 kdouble3_zero = {};
+
+//----
+
+static const double4 kdouble4_posx = {1.0f, 0.0f, 0.0f, 0.0f};
+static const double4 kdouble4_posy = kdouble4_posx.yxyy;
+static const double4 kdouble4_posz = kdouble4_posx.yyxy;
+static const double4 kdouble4_posw = kdouble4_posx.yyyx;
+
+static const double4 kdouble4_negxw = {-1.0f, 0.0f, 0.0f, 1.0f};
+static const double4 kdouble4_negyw = kdouble4_negxw.yxyw;
+static const double4 kdouble4_negzw = kdouble4_negxw.yyxw;
+
+static const double4 kdouble4_posxw = {1.0f, 0.0f, 0.0f, 1.0f};
+static const double4 kdouble4_posyw = kdouble4_posxw.yxyw;
+static const double4 kdouble4_poszw = kdouble4_posxw.yyxw;
+
+static const double4 kdouble4_negx = {-1.0f, 0.0f, 0.0f, 0.0f};
+static const double4 kdouble4_negy = kdouble4_negx.yxyy;
+static const double4 kdouble4_negz = kdouble4_negx.yyxy;
+static const double4 kdouble4_negw = kdouble4_negx.yyyx;
+
+static const double4 kdouble4_ones = kdouble4_posx.xxxx;
+static const double4 kdouble4_zero = {};
+
+//---------------------------
+
 static const double2x2 kdouble2x2_zero = {}; // what is this value 0, or default ctor
 static const double3x3 kdouble3x3_zero = {};
 static const double3x4 kdouble3x4_zero = {};
@@ -44,6 +91,53 @@ static const double3x3 kdouble3x3_identity = diagonal_matrix((double3)1);
 static const double3x4 kdouble3x4_identity = diagonal_matrix3x4((double3)1);
 static const double4x4 kdouble4x4_identity = diagonal_matrix((double4)1);
 
+//----
+
+const double2& double2_zero(){ return kdouble2_zero; }
+const double2& double2_ones(){ return kdouble2_ones; }
+
+const double2& double2_posx(){ return kdouble2_posx; }
+const double2& double2_posy(){ return kdouble2_posy; }
+
+const double2& double2_negx(){ return kdouble2_negx; }
+const double2& double2_negy(){ return kdouble2_negy; }
+
+//----
+
+const double3& double3_zero(){ return kdouble3_zero; }
+const double3& double3_ones(){ return kdouble3_ones; }
+
+const double3& double3_posx(){ return kdouble3_posx; }
+const double3& double3_posy(){ return kdouble3_posy; }
+const double3& double3_posz(){ return kdouble3_posz; }
+
+const double3& double3_negx(){ return kdouble3_negx; }
+const double3& double3_negy(){ return kdouble3_negy; }
+const double3& double3_negz(){ return kdouble3_negz; }
+
+//----
+
+const double4& double4_zero(){ return kdouble4_zero; }
+const double4& double4_ones(){ return kdouble4_ones; }
+
+const double4& double4_posx(){ return kdouble4_posx; }
+const double4& double4_posy(){ return kdouble4_posy; }
+const double4& double4_posz(){ return kdouble4_posz; }
+const double4& double4_posw(){ return kdouble4_posw; }
+
+const double4& double4_negx(){ return kdouble4_negx; }
+const double4& double4_negy(){ return kdouble4_negy; }
+const double4& double4_negz(){ return kdouble4_negz; }
+const double4& double4_negw(){ return kdouble4_negw; }
+
+const double4& double4_posxw(){ return kdouble4_posxw; }
+const double4& double4_posyw(){ return kdouble4_posyw; }
+const double4& double4_poszw(){ return kdouble4_poszw; }
+
+const double4& double4_negxw(){ return kdouble4_negxw; }
+const double4& double4_negyw(){ return kdouble4_negyw; }
+const double4& double4_negzw(){ return kdouble4_negzw; }
+
 //---------------------------
 
 const double2x2& double2x2::zero() { return kdouble2x2_zero; }
@@ -100,7 +194,7 @@ double2x2 transpose(const double2x2& x) {
     
     // std::swap would seem faster here?
 #if SIMD_SSE
-#ifdef __AVX2__
+#if SIMD_AVX2
     double4 x0, x1;
     x0.xy = x[0];
     x1.xy = x[1];
@@ -133,7 +227,7 @@ double3x3 transpose(const double3x3& x) {
     x2.xyz = x[2];
     
 #if SIMD_SSE
-#if defined( __AVX2__) && 0
+#if SIMD_AVX2 && 0
     double4 t0 = _mm256_unpacklo_pd(x0, x1);
     double4 t1 = _mm256_unpackhi_pd(x0, x1);
     
@@ -175,7 +269,7 @@ double4x4 transpose(const double4x4& x) {
     // but old Neon didn't really have shuffle.
 
 #if SIMD_SSE
-#ifdef __AVX2__
+#if SIMD_AVX2
     
 // using shuffles + permute
 //    double4 tmp0, tmp1, tmp2, tmp3;
diff --git a/libkram/vectormath/double234.h b/libkram/vectormath/double234.h
index abeed633..49a0cbd7 100644
--- a/libkram/vectormath/double234.h
+++ b/libkram/vectormath/double234.h
@@ -230,7 +230,7 @@ SIMD_CALL double2 floor(double2 x) {
 }
 
 // now avx/avx2 can do 4 doubles in one call
-#if defined(__AVX2__)
+#if SIMD_AVX2
 
 SIMD_CALL double4 min(double4 x, double4 y) {
     return bitselect_forminmax(_mm256_min_pd(x, y), x, y != y);
@@ -514,6 +514,54 @@ SIMD_CALL double2 pow(double2 x, double2 y) { return exp(log(x) * y); }
 SIMD_CALL double3 pow(double3 x, double3 y) { return exp(log(x) * y); }
 SIMD_CALL double4 pow(double4 x, double4 y) { return exp(log(x) * y); }
 
+//-------------------
+// constants
+
+const double2& double2_zero();
+const double2& double2_ones();
+
+const double2& double2_posx();
+const double2& double2_posy();
+
+const double2& double2_negx();
+const double2& double2_negy();
+
+//----
+
+const double3& double3_zero();
+const double3& double3_ones();
+
+const double3& double3_posx();
+const double3& double3_posy();
+const double3& double3_posz();
+
+const double3& double3_negx();
+const double3& double3_negy();
+const double3& double3_negz();
+
+//----
+
+const double4& double4_zero();
+const double4& double4_ones();
+
+const double4& double4_posx();
+const double4& double4_posy();
+const double4& double4_posz();
+const double4& double4_posw();
+
+const double4& double4_negx();
+const double4& double4_negy();
+const double4& double4_negz();
+const double4& double4_negw();
+
+const double4& double4_posxw();
+const double4& double4_posyw();
+const double4& double4_poszw();
+
+const double4& double4_negxw();
+const double4& double4_negyw();
+const double4& double4_negzw();
+
 //-------------------
 // Matrix
 
diff --git a/libkram/vectormath/float234.cpp b/libkram/vectormath/float234.cpp
index dba9b588..e1b7add3 100644
--- a/libkram/vectormath/float234.cpp
+++ b/libkram/vectormath/float234.cpp
@@ -63,6 +63,8 @@ static const float2 kfloat2_negy = kfloat2_negx.yx;
 static const float2 kfloat2_ones = kfloat2_posx.xx;
 static const float2 kfloat2_zero = {};
 
+//----
+
 static const float3 kfloat3_posx = {1.0f, 0.0f, 0.0f};
 static const float3 kfloat3_posy = kfloat3_posx.yxy;
 static const float3 kfloat3_posz = kfloat3_posx.yyx;
@@ -74,6 +76,8 @@ static const float3 kfloat3_negz = kfloat3_negx.yyx;
 static const float3 kfloat3_ones = kfloat3_posx.xxx;
 static const float3 kfloat3_zero = {};
 
+//----
+
 static const float4 kfloat4_posx = {1.0f, 0.0f, 0.0f, 0.0f};
 static const float4 kfloat4_posy = kfloat4_posx.yxyy;
 static const float4 kfloat4_posz = kfloat4_posx.yyxy;
@@ -107,6 +111,75 @@ static const float3x3 kfloat3x3_identity = diagonal_matrix(kfloat3_ones);
 static const float3x4 kfloat3x4_identity = diagonal_matrix3x4(kfloat3_ones);
 static const float4x4 kfloat4x4_identity = diagonal_matrix(kfloat4_ones);
 
+//----
+
+const float2& float2_zero(){ return kfloat2_zero; }
+const float2& float2_ones(){ return kfloat2_ones; }
+
+const float2& float2_posx(){ return kfloat2_posx; }
+const float2& float2_posy(){ return kfloat2_posy; }
+
+const float2& float2_negx(){ return kfloat2_negx; }
+const float2& float2_negy(){ return kfloat2_negy; }
+
+//----
+
+const float3& float3_zero(){ return kfloat3_zero; }
+const float3& float3_ones(){ return kfloat3_ones; }
+
+const float3& float3_posx(){ return kfloat3_posx; }
+const float3& float3_posy(){ return kfloat3_posy; }
+const float3& float3_posz(){ return kfloat3_posz; }
+
+const float3& float3_negx(){ return kfloat3_negx; }
+const float3& float3_negy(){ return kfloat3_negy; }
+const float3& float3_negz(){ return kfloat3_negz; }
+
+//----
+
+const float4& float4_zero(){ return kfloat4_zero; }
+const float4& float4_ones(){ return kfloat4_ones; }
+
+const float4& float4_posx(){ return kfloat4_posx; }
+const float4& float4_posy(){ return kfloat4_posy; }
+const float4& float4_posz(){ return kfloat4_posz; }
+const float4& float4_posw(){ return kfloat4_posw; }
+
+const float4& float4_negx(){ return kfloat4_negx; }
+const float4& float4_negy(){ return kfloat4_negy; }
+const float4& float4_negz(){ return kfloat4_negz; }
+const float4& float4_negw(){ return kfloat4_negw; }
+
+const float4& float4_posxw(){ return kfloat4_posxw; }
+const float4& float4_posyw(){ return kfloat4_posyw; }
+const float4& float4_poszw(){ return kfloat4_poszw; }
+
+const float4& float4_negxw(){ return kfloat4_negxw; }
+const float4& float4_negyw(){ return kfloat4_negyw; }
+const float4& float4_negzw(){ return kfloat4_negzw; }
+
+//---------------
+
+const float2x2& float2x2::zero() { return kfloat2x2_zero; }
+const float2x2& float2x2::identity() { return kfloat2x2_identity; }
+
+const float3x3& float3x3::zero() { return kfloat3x3_zero; }
+const float3x3& float3x3::identity() { return kfloat3x3_identity; }
+
+const float3x4& float3x4::zero() { return kfloat3x4_zero; }
+const float3x4& float3x4::identity() { return kfloat3x4_identity; }
+
+const float4x4& float4x4::zero() { return kfloat4x4_zero; }
+const float4x4& float4x4::identity() { return kfloat4x4_identity; }
+
+//----------------------------------
+
+static quatf quatf_zero(0.0f, 0.0f, 0.0f, 0.0f);
+static quatf quatf_identity(0.0f, 0.0f, 0.0f, 1.0f);
+
+const quatf& quatf::zero() { return quatf_zero; }
+const quatf& quatf::identity() { return quatf_identity; }
+
 //---------------------------
 
 float4x4 float4x4m(const float3x4& m) {
@@ -535,75 +608,7 @@ bool equal_rel(const float4x4& x, const float4x4& y, float tol) {
                (abs(x[3] - y[3]) <= tol * abs(x[3])));
 }
 
-//----
-
-const float2& float2_zero(){ return kfloat2_zero; }
-const float2& float2_ones(){ return kfloat2_ones; }
-
-const float2& float2_posx(){ return kfloat2_posx; }
-const float2& float2_posy(){ return kfloat2_posy; }
-
-const float2& float2_negx(){ return kfloat2_negx; }
-const float2& float2_negy(){ return kfloat2_negy; }
-
-//----
-
-const float3& float3_zero(){ return kfloat3_zero; }
-const float3& float3_ones(){ return kfloat3_ones; }
-
-const float3& float3_posx(){ return kfloat3_posx; }
-const float3& float3_posy(){ return kfloat3_posy; }
-const float3& float3_posz(){ return kfloat3_posz; }
-
-const float3& float3_negx(){ return kfloat3_negx; }
-const float3& float3_negy(){ return kfloat3_negy; }
-const float3& float3_negz(){ return kfloat3_negz; }
-
-//----
-
-const float4& float4_zero(){ return kfloat4_zero; }
-const float4& float4_ones(){ return kfloat4_ones; }
-
-const float4& float4_posx(){ return kfloat4_posx; }
-const float4& float4_posy(){ return kfloat4_posy; }
-const float4& float4_posz(){ return kfloat4_posz; }
-const float4& float4_posw(){ return kfloat4_posw; }
-
-const float4& float4_negx(){ return kfloat4_negx; }
-const float4& float4_negy(){ return kfloat4_negy; }
-const float4& float4_negz(){ return kfloat4_negz; }
-const float4& float4_negw(){ return kfloat4_negw; }
-
-const float4& float4_posxw(){ return kfloat4_posxw; }
-const float4& float4_posyw(){ return kfloat4_posyw; }
-const float4& float4_poszw(){ return kfloat4_poszw; }
-
-const float4& float4_negxw(){ return kfloat4_negxw; }
-const float4& float4_negyw(){ return kfloat4_negyw; }
-const float4& float4_negzw(){ return kfloat4_negzw; }
-
-//---------------
-
-const float2x2& float2x2::zero() { return kfloat2x2_zero; }
-const float2x2& float2x2::identity() { return kfloat2x2_identity; }
-
-const float3x3& float3x3::zero() { return kfloat3x3_zero; }
-const float3x3& float3x3::identity() { return kfloat3x3_identity; }
-
-const float3x4& float3x4::zero() { return kfloat3x4_zero; }
-const float3x4& float3x4::identity() { return kfloat3x4_identity; }
-
-const float4x4& float4x4::zero() { return kfloat4x4_zero; }
-const float4x4& float4x4::identity() { return kfloat4x4_identity; }
-
-
-//----------------------------------
-
-static quatf quatf_zero(0.0f, 0.0f, 0.0f, 0.0f);
-static quatf quatf_identity(0.0f, 0.0f, 0.0f, 1.0f);
-
-const quatf& quatf::zero() { return quatf_zero; }
-const quatf& quatf::identity() { return quatf_identity; }
+//-----------------
 
 // can negate w, or xyz with -kCross
 static const float4 kCross = {1.0f,1.0f,1.0f,-1.0f};
diff --git a/libkram/vectormath/vectormath++.cpp b/libkram/vectormath/vectormath++.cpp
index a8841f4e..22a0d4ed 100644
--- a/libkram/vectormath/vectormath++.cpp
+++ b/libkram/vectormath/vectormath++.cpp
@@ -184,13 +184,13 @@ string vecf::simd_configs() const {
     bool hasF16C = false;
     bool hasFMA = false;
     
-    #ifdef __SSE42__
+    #if SIMD_SSE
     hasSSE42 = true;
     #endif
     #ifdef __AVX__
     hasAVX = true;
     #endif
-    #ifdef __AVX2__
+    #if SIMD_AVX2
     hasAVX2 = true;
     #endif
     
diff --git a/libkram/vectormath/vectormath++.h b/libkram/vectormath/vectormath++.h
index 3c27d091..3cc92a00 100644
--- a/libkram/vectormath/vectormath++.h
+++ b/libkram/vectormath/vectormath++.h
@@ -123,11 +123,17 @@
 #endif // SIMD_NAMESPACE
 
 // only support avx2 and Neon, no avx-512 at first
-#if defined __ARM_NEON
+#if defined __ARM_NEON__
 #define SIMD_SSE  0
+#define SIMD_AVX2 0
 #define SIMD_NEON 1
 #elif defined __AVX2__ // x64 AVX2 or higher, can lower to AVX
 #define SIMD_SSE  1
+#define SIMD_AVX2 1
+#define SIMD_NEON 0
+#elif defined __SSE4_1__ // SSE 4.1+
+#define SIMD_SSE  1
+#define SIMD_AVX2 0
 #define SIMD_NEON 0
 #else
 #warning unuspported simd arch

From 81d5d3135a2b2d219a2a2de0d38db87a1e02523a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 1 Oct 2024 09:03:47 -0700
Subject: [PATCH 759/901] kram - simd - more name cleanup

---
 libkram/vectormath/double234.cpp  |   8 +--
 libkram/vectormath/double234.h    |  46 +++++++-------
 libkram/vectormath/float234.cpp   |   8 +--
 libkram/vectormath/float234.h     |  46 +++++++-------
 libkram/vectormath/half234.h      |   2 +-
 libkram/vectormath/int234.h       |   3 +-
 libkram/vectormath/long234.h      |   2 +-
 libkram/vectormath/vectormath++.h | 100 +++++++++++++++---------------
 8 files changed, 111 insertions(+), 104 deletions(-)

diff --git a/libkram/vectormath/double234.cpp b/libkram/vectormath/double234.cpp
index c21707fb..b537548b 100644
--- a/libkram/vectormath/double234.cpp
+++ b/libkram/vectormath/double234.cpp
@@ -156,13 +156,13 @@ const double4x4& double4x4::identity() { return kdouble4x4_identity; }
 
 // These should not be used often.  So can stay buried
 double2x2::double2x2(double2 diag)
-: double2x2s((const double2x2s&)diagonal_matrix(diag)) { }
+: base((const base&)diagonal_matrix(diag)) { }
 double3x3::double3x3(double3 diag)
-: double3x3s((const double3x3s&)diagonal_matrix(diag)) { }
+: base((const base&)diagonal_matrix(diag)) { }
 double3x4::double3x4(double3 diag)
-: double3x4s((const double3x4s&)diagonal_matrix3x4(diag)) { }
+: base((const base&)diagonal_matrix3x4(diag)) { }
 double4x4::double4x4(double4 diag)
-: double4x4s((const double4x4s&)diagonal_matrix(diag)) { }
+: base((const base&)diagonal_matrix(diag)) { }
 
 //---------------------------
 
diff --git a/libkram/vectormath/double234.h b/libkram/vectormath/double234.h
index 49a0cbd7..2830a9d1 100644
--- a/libkram/vectormath/double234.h
+++ b/libkram/vectormath/double234.h
@@ -12,14 +12,14 @@ extern "C" {
 #endif
 
 // define c vector/matrix types
-macroVector8TypesStorage(double, double)
+macroVector8TypesAligned(double, double)
 macroVector8TypesPacked(double, double)
 
 // storage type for matrix
-typedef struct { double2s columns[2]; } double2x2s;
-typedef struct { double3s columns[3]; } double3x3s;
-typedef struct { double4s columns[3]; } double3x4s;
-typedef struct { double4s columns[4]; } double4x4s;
+typedef struct { double2a columns[2]; } double2x2a;
+typedef struct { double3a columns[3]; } double3x3a;
+typedef struct { double4a columns[3]; } double3x4a;
+typedef struct { double4a columns[4]; } double4x4a;
 
 // glue to Accelerate
 #if SIMD_ACCELERATE_MATH_NAMES
@@ -565,13 +565,14 @@ const double4& double4_negzw();
 //-------------------
 // Matrix
 
-struct double2x2 : double2x2s
+struct double2x2 : double2x2a
 {
     // can be split out to traits
     static constexpr int col = 2;
     static constexpr int row = 2;
     using column_t = double2;
     using scalar_t = double;
+    using base = double2x2a;
     
     static const double2x2& zero();
     static const double2x2& identity();
@@ -579,21 +580,22 @@ struct double2x2 : double2x2s
     double2x2() { }  // no default init
     explicit double2x2(double2 diag);
     double2x2(double2 c0, double2 c1)
-    : double2x2s((double2x2s){c0, c1}) { }
-    double2x2(const double2x2s& m)
-    : double2x2s(m) { }
+    : base((base){c0, c1}) { }
+    double2x2(const base& m)
+    : base(m) { }
     
     // simd lacks these ops
     double2& operator[](int idx) { return columns[idx]; }
     const double2& operator[](int idx) const { return columns[idx]; }
 };
 
-struct double3x3 : double3x3s
+struct double3x3 : double3x3a
 {
     static constexpr int col = 3;
     static constexpr int row = 3;
     using column_t = double3;
     using scalar_t = double;
+    using base = double3x3a;
     
     // Done as wordy c funcs otherwize.  Funcs allow statics to init.
     static const double3x3& zero();
@@ -602,9 +604,9 @@ struct double3x3 : double3x3s
     double3x3() { }  // no default init
     explicit double3x3(double3 diag);
     double3x3(double3 c0, double3 c1, double3 c2)
-    : double3x3s((double3x3s){c0, c1, c2}) { }
-    double3x3(const double3x3s& m)
-    : double3x3s(m) { }
+    : base((base){c0, c1, c2}) { }
+    double3x3(const base& m)
+    : base(m) { }
     
     double3& operator[](int idx) { return columns[idx]; }
     const double3& operator[](int idx) const { return columns[idx]; }
@@ -612,12 +614,13 @@ struct double3x3 : double3x3s
 
 // This is mostly a transposed holder for a 4x4, so very few ops defined
 // Can also serve as a SOA for some types of cpu math.
-struct double3x4 : double3x4s
+struct double3x4 : double3x4a
 {
     static constexpr int col = 3;
     static constexpr int row = 4;
     using column_t = double4;
     using scalar_t = double;
+    using base = double3x4a;
     
     static const double3x4& zero();
     static const double3x4& identity();
@@ -625,20 +628,21 @@ struct double3x4 : double3x4s
     double3x4() { } // no default init
     explicit double3x4(double3 diag);
     double3x4(double4 c0, double4 c1, double4 c2)
-    : double3x4s((double3x4s){c0, c1, c2}) { }
-    double3x4(const double3x4s& m)
-    : double3x4s(m) { }
+    : base((base){c0, c1, c2}) { }
+    double3x4(const base& m)
+    : base(m) { }
     
     double4& operator[](int idx) { return columns[idx]; }
     const double4& operator[](int idx) const { return columns[idx]; }
 };
 
-struct double4x4 : double4x4s
+struct double4x4 : double4x4a
 {
     static constexpr int col = 4;
     static constexpr int row = 4;
     using column_t = double4;
     using scalar_t = double;
+    using base = double4x4a;
     
     static const double4x4& zero();
     static const double4x4& identity();
@@ -646,9 +650,9 @@ struct double4x4 : double4x4s
     double4x4() { } // no default init
     explicit double4x4(double4 diag);
     double4x4(double4 c0, double4 c1, double4 c2, double4 c3)
-    : double4x4s((double4x4s){c0, c1, c2, c3}) { }
-    double4x4(const double4x4s& m)
-    : double4x4s(m) { }
+    : base((base){c0, c1, c2, c3}) { }
+    double4x4(const base& m)
+    : base(m) { }
     
     double4& operator[](int idx) { return columns[idx]; }
     const double4& operator[](int idx) const { return columns[idx]; }
diff --git a/libkram/vectormath/float234.cpp b/libkram/vectormath/float234.cpp
index e1b7add3..bbb6b64c 100644
--- a/libkram/vectormath/float234.cpp
+++ b/libkram/vectormath/float234.cpp
@@ -201,13 +201,13 @@ float3x4 float3x4m(const float4x4& m) {
 
 // These should not be used often.  So can stay buried
 float2x2::float2x2(float2 diag)
-: float2x2s((const float2x2s&)diagonal_matrix(diag)) { }
+: base((const base&)diagonal_matrix(diag)) { }
 float3x3::float3x3(float3 diag)
-: float3x3s((const float3x3s&)diagonal_matrix(diag)) { }
+: base((const base&)diagonal_matrix(diag)) { }
 float3x4::float3x4(float3 diag)
-: float3x4s((const float3x4s&)diagonal_matrix3x4(diag)) { }
+: base((const base&)diagonal_matrix3x4(diag)) { }
 float4x4::float4x4(float4 diag)
-: float4x4s((const float4x4s&)diagonal_matrix(diag)) { }
+: base((const base&)diagonal_matrix(diag)) { }
 
 //---------------------------
 
diff --git a/libkram/vectormath/float234.h b/libkram/vectormath/float234.h
index 14c4043e..1504c034 100644
--- a/libkram/vectormath/float234.h
+++ b/libkram/vectormath/float234.h
@@ -13,14 +13,14 @@ extern "C" {
 #endif
 
 // define c++ vector/matrix types
-macroVector4TypesStorage(float, float)
+macroVector4TypesAligned(float, float)
 macroVector4TypesPacked(float, float)
 
 // storage type for matrix
-typedef struct { float2s columns[2]; } float2x2s;
-typedef struct { float3s columns[3]; } float3x3s;
-typedef struct { float4s columns[3]; } float3x4s;
-typedef struct { float4s columns[4]; } float4x4s;
+typedef struct { float2a columns[2]; } float2x2a;
+typedef struct { float3a columns[3]; } float3x3a;
+typedef struct { float4a columns[3]; } float3x4a;
+typedef struct { float4a columns[4]; } float4x4a;
 
 // glue to Accelerate
 #if SIMD_ACCELERATE_MATH_NAMES
@@ -563,13 +563,14 @@ const float4& float4_negzw();
 // column matrix, so postmul vectors
 // (projToCamera * cameraToWorld * worldToModel) * modelVec
 
-struct float2x2 : float2x2s
+struct float2x2 : float2x2a
 {
     // can be split out to traits
     static constexpr int col = 2;
     static constexpr int row = 2;
     using column_t = float2;
     using scalar_t = float;
+    using base = float2x2a;
     
     static const float2x2& zero();
     static const float2x2& identity();
@@ -577,21 +578,22 @@ struct float2x2 : float2x2s
     float2x2() { }  // default uninit
     explicit float2x2(float2 diag);
     float2x2(float2 c0, float2 c1)
-    : float2x2s((float2x2s){c0, c1}) { }
-    float2x2(const float2x2s& m)
-    : float2x2s(m) { }
+    : base((base){c0, c1}) { }
+    float2x2(const base& m)
+    : base(m) { }
     
     // simd lacks these ops
     float2& operator[](int idx) { return columns[idx]; }
     const float2& operator[](int idx) const { return columns[idx]; }
 };
 
-struct float3x3 : float3x3s
+struct float3x3 : float3x3a
 {
     static constexpr int col = 3;
     static constexpr int row = 3;
     using column_t = float3;
     using scalar_t = float;
+    using base = float3x3a;
     
     // Done as wordy c funcs otherwize.  Funcs allow statics to init.
     static const float3x3& zero();
@@ -600,9 +602,9 @@ struct float3x3 : float3x3s
     float3x3() { }  // default uninit
     explicit float3x3(float3 diag);
     float3x3(float3 c0, float3 c1, float3 c2)
-    : float3x3s((float3x3s){c0, c1, c2}) { }
-    float3x3(const float3x3s& m)
-    : float3x3s(m) { }
+    : base((base){c0, c1, c2}) { }
+    float3x3(const base& m)
+    : base(m) { }
     
     float3& operator[](int idx) { return columns[idx]; }
     const float3& operator[](int idx) const { return columns[idx]; }
@@ -610,12 +612,13 @@ struct float3x3 : float3x3s
 
 // This is mostly a transposed holder for a 4x4, so very few ops defined
 // Can also serve as a SOA for some types of cpu math.
-struct float3x4 : float3x4s
+struct float3x4 : float3x4a
 {
     static constexpr int col = 3;
     static constexpr int row = 4;
     using column_t = float4;
     using scalar_t = float;
+    using base = float3x4a;
     
     static const float3x4& zero();
     static const float3x4& identity();
@@ -623,20 +626,21 @@ struct float3x4 : float3x4s
     float3x4() { } // default uninit
     explicit float3x4(float3 diag);
     float3x4(float4 c0, float4 c1, float4 c2)
-    : float3x4s((float3x4s){c0, c1, c2}) { }
-    float3x4(const float3x4s& m)
-    : float3x4s(m) { }
+    : base((base){c0, c1, c2}) { }
+    float3x4(const float3x4a& m)
+    : base(m) { }
     
     float4& operator[](int idx) { return columns[idx]; }
     const float4& operator[](int idx) const { return columns[idx]; }
 };
 
-struct float4x4 : float4x4s
+struct float4x4 : float4x4a
 {
     static constexpr int col = 4;
     static constexpr int row = 4;
     using column_t = float4;
     using scalar_t = float;
+    using base = float4x4a;
     
     static const float4x4& zero();
     static const float4x4& identity();
@@ -644,9 +648,9 @@ struct float4x4 : float4x4s
     float4x4() { } // default uninit
     explicit float4x4(float4 diag);
     float4x4(float4 c0, float4 c1, float4 c2, float4 c3)
-    : float4x4s((float4x4s){c0, c1, c2, c3}) { }
-    float4x4(const float4x4s& m)
-    : float4x4s(m) { }
+    : base((base){c0, c1, c2, c3}) { }
+    float4x4(const base& m)
+    : base(m) { }
     
     float4& operator[](int idx) { return columns[idx]; }
     const float4& operator[](int idx) const { return columns[idx]; }
diff --git a/libkram/vectormath/half234.h b/libkram/vectormath/half234.h
index 3f7903ba..016893cf 100644
--- a/libkram/vectormath/half234.h
+++ b/libkram/vectormath/half234.h
@@ -37,7 +37,7 @@ typedef short half;
 // fp16x4 <-> fp32x4 conversions are.
 
 // define c vector types
-macroVector2TypesStorage(half, half)
+macroVector2TypesAligned(half, half)
 macroVector2TypesPacked(half, half)
 
 // No matrix type defined right now.
diff --git a/libkram/vectormath/int234.h b/libkram/vectormath/int234.h
index be40d994..a6de9a2e 100644
--- a/libkram/vectormath/int234.h
+++ b/libkram/vectormath/int234.h
@@ -12,8 +12,7 @@ extern "C" {
 #endif // __cplusplus
 
 // define c vector types
-// Apple uses int type here (32-bit) instead of int32_t
-macroVector4TypesStorage(int, int)
+macroVector4TypesAligned(int, int)
 macroVector4TypesPacked(int, int)
 
 #if SIMD_ACCELERATE_MATH_NAMES
diff --git a/libkram/vectormath/long234.h b/libkram/vectormath/long234.h
index 8a12f911..39629ea0 100644
--- a/libkram/vectormath/long234.h
+++ b/libkram/vectormath/long234.h
@@ -20,7 +20,7 @@ extern "C" {
 
 // define c vector types
 // Apple uses long type here (32-bit) instead of long32_t
-macroVector8TypesStorage(long1, long)
+macroVector8TypesAligned(long1, long)
 macroVector8TypesPacked(long1, long)
 
 #if SIMD_ACCELERATE_MATH_NAMES
diff --git a/libkram/vectormath/vectormath++.h b/libkram/vectormath/vectormath++.h
index 3cc92a00..f4ab367a 100644
--- a/libkram/vectormath/vectormath++.h
+++ b/libkram/vectormath/vectormath++.h
@@ -200,14 +200,14 @@ type##4 cppfunc(type##4 x); \
 //------------
 
 // aligned
-#define macroVector1TypesStorage(type, name) \
-typedef type name##1s; \
-typedef __attribute__((__ext_vector_type__(2)))  type name##2s; \
-typedef __attribute__((__ext_vector_type__(3)))  type name##3s; \
-typedef __attribute__((__ext_vector_type__(4)))  type name##4s; \
-typedef __attribute__((__ext_vector_type__(8)))  type name##8s; \
-typedef __attribute__((__ext_vector_type__(16))) type name##16s; \
-typedef __attribute__((__ext_vector_type__(32),__aligned__(16))) type name##32s; \
+#define macroVector1TypesAligned(type, name) \
+typedef type name##1a; \
+typedef __attribute__((__ext_vector_type__(2)))  type name##2a; \
+typedef __attribute__((__ext_vector_type__(3)))  type name##3a; \
+typedef __attribute__((__ext_vector_type__(4)))  type name##4a; \
+typedef __attribute__((__ext_vector_type__(8)))  type name##8a; \
+typedef __attribute__((__ext_vector_type__(16))) type name##16a; \
+typedef __attribute__((__ext_vector_type__(32),__aligned__(16))) type name##32a; \
 
 // packed
 #define macroVector1TypesPacked(type, name) \
@@ -221,24 +221,24 @@ typedef __attribute__((__ext_vector_type__(32),__aligned__(1))) type name##32p;
 
 // cpp rename for u/char
 #define macroVector1TypesStorageRenames(cname, cppname) \
-typedef ::cname##1s cppname##1; \
-typedef ::cname##2s cppname##2; \
-typedef ::cname##3s cppname##3; \
-typedef ::cname##4s cppname##4; \
-typedef ::cname##8s cppname##8; \
-typedef ::cname##16s cppname##16; \
-typedef ::cname##32s cppname##32; \
+typedef ::cname##1a cppname##1; \
+typedef ::cname##2a cppname##2; \
+typedef ::cname##3a cppname##3; \
+typedef ::cname##4a cppname##4; \
+typedef ::cname##8a cppname##8; \
+typedef ::cname##16a cppname##16; \
+typedef ::cname##32a cppname##32; \
 
 //------------
 
 // aligned
-#define macroVector2TypesStorage(type, name) \
-typedef type name##1s; \
-typedef __attribute__((__ext_vector_type__(2)))  type name##2s; \
-typedef __attribute__((__ext_vector_type__(3)))  type name##3s; \
-typedef __attribute__((__ext_vector_type__(4)))  type name##4s; \
-typedef __attribute__((__ext_vector_type__(8)))  type name##8s; \
-typedef __attribute__((__ext_vector_type__(16),__aligned__(16))) type name##16s; \
+#define macroVector2TypesAligned(type, name) \
+typedef type name##1a; \
+typedef __attribute__((__ext_vector_type__(2)))  type name##2a; \
+typedef __attribute__((__ext_vector_type__(3)))  type name##3a; \
+typedef __attribute__((__ext_vector_type__(4)))  type name##4a; \
+typedef __attribute__((__ext_vector_type__(8)))  type name##8a; \
+typedef __attribute__((__ext_vector_type__(16),__aligned__(16))) type name##16a; \
 
 // packed
 #define macroVector2TypesPacked(type, name) \
@@ -251,22 +251,22 @@ typedef __attribute__((__ext_vector_type__(16),__aligned__(2))) type name##16p;
 
 // cpp rename for half, u/short
 #define macroVector2TypesStorageRenames(cname, cppname) \
-typedef ::cname##1s cppname##1; \
-typedef ::cname##2s cppname##2; \
-typedef ::cname##3s cppname##3; \
-typedef ::cname##4s cppname##4; \
-typedef ::cname##8s cppname##8; \
-typedef ::cname##16s cppname##16; \
+typedef ::cname##1a cppname##1; \
+typedef ::cname##2a cppname##2; \
+typedef ::cname##3a cppname##3; \
+typedef ::cname##4a cppname##4; \
+typedef ::cname##8a cppname##8; \
+typedef ::cname##16a cppname##16; \
 
 //------------
 
 // aligned
-#define macroVector4TypesStorage(type, name) \
-typedef type name##1s; \
-typedef __attribute__((__ext_vector_type__(2)))  type name##2s; \
-typedef __attribute__((__ext_vector_type__(3)))  type name##3s; \
-typedef __attribute__((__ext_vector_type__(4)))  type name##4s; \
-typedef __attribute__((__ext_vector_type__(8),__aligned__(16)))  type name##8s; \
+#define macroVector4TypesAligned(type, name) \
+typedef type name##1a; \
+typedef __attribute__((__ext_vector_type__(2)))  type name##2a; \
+typedef __attribute__((__ext_vector_type__(3)))  type name##3a; \
+typedef __attribute__((__ext_vector_type__(4)))  type name##4a; \
+typedef __attribute__((__ext_vector_type__(8),__aligned__(16)))  type name##8a; \
 
 // typedef __attribute__((__ext_vector_type__(16),__aligned__(16))) type name##16s;
 
@@ -282,22 +282,22 @@ typedef __attribute__((__ext_vector_type__(8),__aligned__(4)))  type name##8p; \
 
 // cpp rename for float, u/int
 #define macroVector4TypesStorageRenames(cname, cppname) \
-typedef ::cname##1s cppname##1; \
-typedef ::cname##2s cppname##2; \
-typedef ::cname##3s cppname##3; \
-typedef ::cname##4s cppname##4; \
-typedef ::cname##8s cppname##8; \
+typedef ::cname##1a cppname##1; \
+typedef ::cname##2a cppname##2; \
+typedef ::cname##3a cppname##3; \
+typedef ::cname##4a cppname##4; \
+typedef ::cname##8a cppname##8; \
 
 // typedef ::cname##16s cppname##16; \
 
 //------------
 
 // aligned
-#define macroVector8TypesStorage(type, name) \
-typedef type name##1s; \
-typedef __attribute__((__ext_vector_type__(2))) type name##2s; \
-typedef __attribute__((__ext_vector_type__(3),__aligned__(16))) type name##3s; \
-typedef __attribute__((__ext_vector_type__(4),__aligned__(16))) type name##4s; \
+#define macroVector8TypesAligned(type, name) \
+typedef type name##1a; \
+typedef __attribute__((__ext_vector_type__(2))) type name##2a; \
+typedef __attribute__((__ext_vector_type__(3),__aligned__(16))) type name##3a; \
+typedef __attribute__((__ext_vector_type__(4),__aligned__(16))) type name##4a; \
 
 // typedef __attribute__((__ext_vector_type__(8),__aligned__(16))) type name##8s;
 
@@ -312,10 +312,10 @@ typedef __attribute__((__ext_vector_type__(4),__aligned__(8))) type name##4p; \
 
 // cpp rename for double, u/long
 #define macroVector8TypesStorageRenames(cname, cppname) \
-typedef ::cname##1s cppname##1; \
-typedef ::cname##2s cppname##2; \
-typedef ::cname##3s cppname##3; \
-typedef ::cname##4s cppname##4; \
+typedef ::cname##1a cppname##1; \
+typedef ::cname##2a cppname##2; \
+typedef ::cname##3a cppname##3; \
+typedef ::cname##4a cppname##4; \
 
 // typedef ::cname##8s cppname##8;
 
@@ -406,7 +406,7 @@ extern "C" {
 #endif
 
 // define c vector types
-macroVector1TypesStorage(char, char)
+macroVector1TypesAligned(char, char)
 macroVector1TypesPacked(char, char)
 
 #if SIMD_ACCELERATE_MATH_NAMES
@@ -430,7 +430,7 @@ extern "C" {
 #endif
 
 // define c vector types
-macroVector2TypesStorage(short, short)
+macroVector2TypesAligned(short, short)
 macroVector2TypesPacked(short, short)
 
 #if SIMD_ACCELERATE_MATH_NAMES

From 0da786c327ed3c50f7e068763dfd236ead63bb42 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 1 Oct 2024 09:09:16 -0700
Subject: [PATCH 760/901] kram - simd - more name cleanup

---
 build2/kram.xcodeproj/project.pbxproj         | 24 +++++++++----------
 libkram/kram/KramConfig.h                     |  2 +-
 libkram/vectormath/double234.cpp              |  2 +-
 libkram/vectormath/double234.h                |  2 +-
 libkram/vectormath/float234.cpp               |  2 +-
 libkram/vectormath/float234.h                 |  2 +-
 libkram/vectormath/half234.h                  |  2 +-
 libkram/vectormath/int234.h                   |  2 +-
 libkram/vectormath/long234.h                  |  2 +-
 .../{vectormath++.cpp => vectormath234.cpp}   |  2 +-
 .../{vectormath++.h => vectormath234.h}       |  0
 11 files changed, 21 insertions(+), 21 deletions(-)
 rename libkram/vectormath/{vectormath++.cpp => vectormath234.cpp} (99%)
 rename libkram/vectormath/{vectormath++.h => vectormath234.h} (100%)

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index 3b059b29..cf694803 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -16,20 +16,20 @@
 		7013AD612CAA0E28007E5554 /* int234.h in Headers */ = {isa = PBXBuildFile; fileRef = 7013AD602CAA0E21007E5554 /* int234.h */; };
 		7013AD622CAA0E28007E5554 /* int234.h in Headers */ = {isa = PBXBuildFile; fileRef = 7013AD602CAA0E21007E5554 /* int234.h */; };
 		702E0DB62CA10BC100B652B7 /* astcenc_mathlib.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB327DDDBCC00D0B9E1 /* astcenc_mathlib.cpp */; };
-		705F7F072C9FF42700E377B7 /* vectormath++.h in Headers */ = {isa = PBXBuildFile; fileRef = 705F7F022C9FF42700E377B7 /* vectormath++.h */; };
+		705F7F072C9FF42700E377B7 /* vectormath234.h in Headers */ = {isa = PBXBuildFile; fileRef = 705F7F022C9FF42700E377B7 /* vectormath234.h */; };
 		705F7F082C9FF42700E377B7 /* sse_mathfun.h in Headers */ = {isa = PBXBuildFile; fileRef = 705F7EFC2C9FF42700E377B7 /* sse_mathfun.h */; };
 		705F7F092C9FF42700E377B7 /* float4a.h in Headers */ = {isa = PBXBuildFile; fileRef = 705F7EFA2C9FF42700E377B7 /* float4a.h */; };
 		705F7F0B2C9FF42700E377B7 /* sse2neon.h in Headers */ = {isa = PBXBuildFile; fileRef = 705F7EFD2C9FF42700E377B7 /* sse2neon.h */; };
 		705F7F0C2C9FF42700E377B7 /* sse2neon-arm64.h in Headers */ = {isa = PBXBuildFile; fileRef = 705F7EFE2C9FF42700E377B7 /* sse2neon-arm64.h */; };
 		705F7F0D2C9FF42700E377B7 /* float4a.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 705F7EFB2C9FF42700E377B7 /* float4a.cpp */; };
-		705F7F0F2C9FF42700E377B7 /* vectormath++.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 705F7F032C9FF42700E377B7 /* vectormath++.cpp */; };
-		705F7F102C9FF42700E377B7 /* vectormath++.h in Headers */ = {isa = PBXBuildFile; fileRef = 705F7F022C9FF42700E377B7 /* vectormath++.h */; };
+		705F7F0F2C9FF42700E377B7 /* vectormath234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 705F7F032C9FF42700E377B7 /* vectormath234.cpp */; };
+		705F7F102C9FF42700E377B7 /* vectormath234.h in Headers */ = {isa = PBXBuildFile; fileRef = 705F7F022C9FF42700E377B7 /* vectormath234.h */; };
 		705F7F112C9FF42700E377B7 /* sse_mathfun.h in Headers */ = {isa = PBXBuildFile; fileRef = 705F7EFC2C9FF42700E377B7 /* sse_mathfun.h */; };
 		705F7F122C9FF42700E377B7 /* float4a.h in Headers */ = {isa = PBXBuildFile; fileRef = 705F7EFA2C9FF42700E377B7 /* float4a.h */; };
 		705F7F142C9FF42700E377B7 /* sse2neon.h in Headers */ = {isa = PBXBuildFile; fileRef = 705F7EFD2C9FF42700E377B7 /* sse2neon.h */; };
 		705F7F152C9FF42700E377B7 /* sse2neon-arm64.h in Headers */ = {isa = PBXBuildFile; fileRef = 705F7EFE2C9FF42700E377B7 /* sse2neon-arm64.h */; };
 		705F7F162C9FF42700E377B7 /* float4a.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 705F7EFB2C9FF42700E377B7 /* float4a.cpp */; };
-		705F7F182C9FF42700E377B7 /* vectormath++.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 705F7F032C9FF42700E377B7 /* vectormath++.cpp */; };
+		705F7F182C9FF42700E377B7 /* vectormath234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 705F7F032C9FF42700E377B7 /* vectormath234.cpp */; };
 		706EEF7F26D1595D001C950E /* EtcBlock4x4Encoding_RGB8.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDAA26D1583E001C950E /* EtcBlock4x4Encoding_RGB8.cpp */; };
 		706EEF8026D1595D001C950E /* EtcImage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDAC26D1583E001C950E /* EtcImage.cpp */; };
 		706EEF8126D1595D001C950E /* EtcDifferentialTrys.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDAF26D1583E001C950E /* EtcDifferentialTrys.cpp */; };
@@ -438,8 +438,8 @@
 		705F7EFC2C9FF42700E377B7 /* sse_mathfun.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = sse_mathfun.h; sourceTree = "<group>"; };
 		705F7EFD2C9FF42700E377B7 /* sse2neon.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = sse2neon.h; sourceTree = "<group>"; };
 		705F7EFE2C9FF42700E377B7 /* sse2neon-arm64.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "sse2neon-arm64.h"; sourceTree = "<group>"; };
-		705F7F022C9FF42700E377B7 /* vectormath++.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "vectormath++.h"; sourceTree = "<group>"; };
-		705F7F032C9FF42700E377B7 /* vectormath++.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = "vectormath++.cpp"; sourceTree = "<group>"; };
+		705F7F022C9FF42700E377B7 /* vectormath234.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = vectormath234.h; sourceTree = "<group>"; };
+		705F7F032C9FF42700E377B7 /* vectormath234.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = vectormath234.cpp; sourceTree = "<group>"; };
 		706ECDDE26D1577A001C950E /* libkram.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libkram.a; sourceTree = BUILT_PRODUCTS_DIR; };
 		706EEDAA26D1583E001C950E /* EtcBlock4x4Encoding_RGB8.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = EtcBlock4x4Encoding_RGB8.cpp; sourceTree = "<group>"; };
 		706EEDAB26D1583E001C950E /* EtcErrorMetric.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = EtcErrorMetric.h; sourceTree = "<group>"; };
@@ -815,13 +815,13 @@
 				705F7EFC2C9FF42700E377B7 /* sse_mathfun.h */,
 				705F7EFD2C9FF42700E377B7 /* sse2neon.h */,
 				705F7EFE2C9FF42700E377B7 /* sse2neon-arm64.h */,
-				705F7F022C9FF42700E377B7 /* vectormath++.h */,
+				705F7F022C9FF42700E377B7 /* vectormath234.h */,
 				70B686E22CAA3405007ACA58 /* long234.h */,
 				7013AD5D2CAA0E18007E5554 /* half234.h */,
 				7013AD602CAA0E21007E5554 /* int234.h */,
 				7013AD4E2CAA0818007E5554 /* float234.h */,
 				7013AD572CAA0934007E5554 /* double234.h */,
-				705F7F032C9FF42700E377B7 /* vectormath++.cpp */,
+				705F7F032C9FF42700E377B7 /* vectormath234.cpp */,
 				70B686E52CABB40F007ACA58 /* float234.cpp */,
 				70B686E82CABB41A007ACA58 /* double234.cpp */,
 			);
@@ -1448,7 +1448,7 @@
 				709B8D4328D7BCAD0081BD1F /* args.h in Headers */,
 				708A6A9C2708CE4700BA5410 /* bc6h_encode.h in Headers */,
 				706EEFDF26D15984001C950E /* EtcBlock4x4Encoding_ETC1.h in Headers */,
-				705F7F102C9FF42700E377B7 /* vectormath++.h in Headers */,
+				705F7F102C9FF42700E377B7 /* vectormath234.h in Headers */,
 				705F7F112C9FF42700E377B7 /* sse_mathfun.h in Headers */,
 				705F7F122C9FF42700E377B7 /* float4a.h in Headers */,
 				705F7F142C9FF42700E377B7 /* sse2neon.h in Headers */,
@@ -1568,7 +1568,7 @@
 				709B8D4428D7BCAD0081BD1F /* args.h in Headers */,
 				708A6A9D2708CE4700BA5410 /* bc6h_encode.h in Headers */,
 				706EF15926D166C5001C950E /* EtcBlock4x4Encoding_ETC1.h in Headers */,
-				705F7F072C9FF42700E377B7 /* vectormath++.h in Headers */,
+				705F7F072C9FF42700E377B7 /* vectormath234.h in Headers */,
 				705F7F082C9FF42700E377B7 /* sse_mathfun.h in Headers */,
 				705F7F092C9FF42700E377B7 /* float4a.h in Headers */,
 				705F7F0B2C9FF42700E377B7 /* sse2neon.h in Headers */,
@@ -1831,7 +1831,7 @@
 				706EEFC526D1595E001C950E /* tmpfileplus.cpp in Sources */,
 				70871E0127DDDBCD00D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp in Sources */,
 				705F7F162C9FF42700E377B7 /* float4a.cpp in Sources */,
-				705F7F182C9FF42700E377B7 /* vectormath++.cpp in Sources */,
+				705F7F182C9FF42700E377B7 /* vectormath234.cpp in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@@ -1920,7 +1920,7 @@
 				706EF1D526D166C5001C950E /* colourfit.cpp in Sources */,
 				70871E0027DDDBCD00D0B9E1 /* astcenc_pick_best_endpoint_format.cpp in Sources */,
 				705F7F0D2C9FF42700E377B7 /* float4a.cpp in Sources */,
-				705F7F0F2C9FF42700E377B7 /* vectormath++.cpp in Sources */,
+				705F7F0F2C9FF42700E377B7 /* vectormath234.cpp in Sources */,
 				70871E0A27DDDBCD00D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp in Sources */,
 				70871DD027DDDBCD00D0B9E1 /* astcenc_symbolic_physical.cpp in Sources */,
 				70B563A82C857B360089A64F /* KramZipStream.cpp in Sources */,
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index e1e8f3d6..a7c1d543 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -206,7 +206,7 @@
 
 #if USE_SIMDLIB
 // new vector math
-#include "vectormath++.h"
+#include "vectormath234.h"
 #else
 // old vector math, using simd/simd.h
 #include "float4a.h"
diff --git a/libkram/vectormath/double234.cpp b/libkram/vectormath/double234.cpp
index b537548b..8d588c3a 100644
--- a/libkram/vectormath/double234.cpp
+++ b/libkram/vectormath/double234.cpp
@@ -1,7 +1,7 @@
 // kram - Copyright 2020-2024 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
-#include "vectormath++.h"
+#include "vectormath234.h"
 
 // This has to include this, not double234.h
 #if USE_SIMDLIB && SIMD_DOUBLE
diff --git a/libkram/vectormath/double234.h b/libkram/vectormath/double234.h
index 2830a9d1..9fc0dc67 100644
--- a/libkram/vectormath/double234.h
+++ b/libkram/vectormath/double234.h
@@ -4,7 +4,7 @@
 
 #pragma once
 
-// This is not yet standalone.  vectormath++.h includes it.
+// This is not yet standalone.  vectormath234.h includes it.
 #if USE_SIMDLIB && SIMD_DOUBLE
 
 #ifdef __cplusplus
diff --git a/libkram/vectormath/float234.cpp b/libkram/vectormath/float234.cpp
index bbb6b64c..51aaaea1 100644
--- a/libkram/vectormath/float234.cpp
+++ b/libkram/vectormath/float234.cpp
@@ -3,7 +3,7 @@
 // in all copies or substantial portions of the Software.
 
 // This has to include this, not float234.h
-#include "vectormath++.h"
+#include "vectormath234.h"
 
 #if USE_SIMDLIB && SIMD_FLOAT
 
diff --git a/libkram/vectormath/float234.h b/libkram/vectormath/float234.h
index 1504c034..ac4900c1 100644
--- a/libkram/vectormath/float234.h
+++ b/libkram/vectormath/float234.h
@@ -6,7 +6,7 @@
 
 #if USE_SIMDLIB && SIMD_FLOAT
 
-// This is not yet standalone.  vectormath++.h includes it.
+// This is not yet standalone.  vectormath234.h includes it.
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/libkram/vectormath/half234.h b/libkram/vectormath/half234.h
index 016893cf..499bcf71 100644
--- a/libkram/vectormath/half234.h
+++ b/libkram/vectormath/half234.h
@@ -4,7 +4,7 @@
 
 #pragma once
 
-// This is not yet standalone.  vectormath++.h includes it.
+// This is not yet standalone.  vectormath234.h includes it.
 #if USE_SIMDLIB && SIMD_HALF
 
 // Android doesn't really have _Float16, so would need a u/int16_t mapped placeholder
diff --git a/libkram/vectormath/int234.h b/libkram/vectormath/int234.h
index a6de9a2e..ca528bc9 100644
--- a/libkram/vectormath/int234.h
+++ b/libkram/vectormath/int234.h
@@ -4,7 +4,7 @@
 
 #pragma once
 
-// This is not yet standalone.  vectormath++.h includes it.
+// This is not yet standalone.  vectormath234.h includes it.
 #if USE_SIMDLIB && SIMD_INT
 
 #ifdef __cplusplus
diff --git a/libkram/vectormath/long234.h b/libkram/vectormath/long234.h
index 39629ea0..17a5e1d1 100644
--- a/libkram/vectormath/long234.h
+++ b/libkram/vectormath/long234.h
@@ -4,7 +4,7 @@
 
 #pragma once
 
-// This is not yet standalone.  vectormath++.h includes it.
+// This is not yet standalone.  vectormath234.h includes it.
 #if USE_SIMDLIB && SIMD_LONG
 
 #ifdef _WIN32
diff --git a/libkram/vectormath/vectormath++.cpp b/libkram/vectormath/vectormath234.cpp
similarity index 99%
rename from libkram/vectormath/vectormath++.cpp
rename to libkram/vectormath/vectormath234.cpp
index 22a0d4ed..857ece27 100644
--- a/libkram/vectormath/vectormath++.cpp
+++ b/libkram/vectormath/vectormath234.cpp
@@ -1,7 +1,7 @@
 // kram - Copyright 2020-2024 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
-#include "vectormath++.h"
+#include "vectormath234.h"
 
 #if USE_SIMDLIB
 
diff --git a/libkram/vectormath/vectormath++.h b/libkram/vectormath/vectormath234.h
similarity index 100%
rename from libkram/vectormath/vectormath++.h
rename to libkram/vectormath/vectormath234.h

From 9e8cf9fea12f50f588e9d3cf5ca94bf98732e8b4 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 1 Oct 2024 09:27:35 -0700
Subject: [PATCH 761/901] kram - simd - update readme

---
 README.md                    |  4 +++
 libkram/vectormath/README.md | 67 +++++++++++++++++-------------------
 2 files changed, 35 insertions(+), 36 deletions(-)

diff --git a/README.md b/README.md
index 7af9f966..7cd98d33 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,9 @@
 The suite of kram tools below.  I hope these improve your game, app, and art development.
 
+# vectormath
+Fast vector math based around clang vector extensions.  Requires clang but accelerated for ARM or AVX2.
+https://github.com/alecazam/kram/tree/main/libkram/vectormath
+
 # kram-profile
 Display profile traces (f.e. Perfetto) quickly in an application for optimizing memory, builds, and cpu/gpu timings
 https://github.com/alecazam/kram/tree/main/kram-profile
diff --git a/libkram/vectormath/README.md b/libkram/vectormath/README.md
index c6d78596..d9edc5eb 100644
--- a/libkram/vectormath/README.md
+++ b/libkram/vectormath/README.md
@@ -1,7 +1,6 @@
 vectormath
 ==========
 
- 
 * Simd: 
 *   arm64 Neon
 *   x64 AVX2, AVX, SSE4.2
@@ -38,26 +37,25 @@ Clang vector extensions provide:
 
 Types
 
-* half2/3/4/8/16
-
-* float2/3/4/8/16
-* float2x2/3x3/3x4/4x4
+* all types come in three flavors
+* float4a - aligned type
+* float4p - packed type
+* float   - c++ type omits the "a"
 
-* int2/3/4/8/16
-* int2x2, int3x3, int3x4, int4x4
+* int2/3/4/8
+* long2/3/4
 
-* double type should replicate float code
-* double2/3/4/8/16
-* doublet2x2/3x3/3x4/4x4
-
-* didn't really need a half matrix yet
-* - half2x2/3x3/3x4/4x4
+* half2/3/4/8/16
+* float2/3/4/8
+* float2x2/3x3/3x4/4x4
+* double2/3/4
+* double2x2/3x3/3x4/4x4
 
 * optional vector only types with only default vector ops
 *  note macOS/iOS is signed char, so should compile same for Win/Linux/etc
-* - u/char2...16
+* - u/char2...32
 * - u/short2...16
-* - u/long2...8
+* - ulong2...16
 
 ---
 
@@ -71,40 +69,37 @@ max vec size per register
 
 ---
 
-TODO: 
-* Finish double2 ops and double2x2...
-* Add quatf/d, and conversions
-* Add affine inverses
-* Add row vector support (vs. columns)
-* Split file into float, double, half sub-headers
-* Add 2-element Neon vec ops.
-* Tests of the calls.
-* Disassembly of the calls (MSVC?)
-* Formatting in print support
-* Move to release Xcode library project and CMakeLists.txt file. 
-* Look into how to individually optimize .cpp files that include it
-* Rename to simdk.h
+* DONE: Finish double2 ops and double2x2...
+* DONE: Add quatf, and conversions
+* DONE: Add affine inverses
+* DONE: Split file into float, double, half sub-headers
+* DONE: Add float2/double2 Neon vec ops.
+* DONE: Add double2 SSE ops
+* DONE: Add AVX2 double4 ops
+* DONE: Formatting in print support
+* DONE: Move to release Xcode library project and CMakeLists.txt file. 
+
+* TODO: Tests of the calls.
+* TODO: Add row vector support (vs. columns)
+* SOME: Disassembly of the calls (MSVC?)
 
 ---
 
-Small implementation kernel (just using the float4 simd ops), so is easy to add double versions with _pd or whatever Neon has.  I would not recommend using with C, but there are C types for ObjC storage and default math and comparison ops.
+Small implementation kernel (just using the float2/4 and double2/4 simd ops).  I would not recommend using with C.  All types have base C types for ObjC and these provide default math and comparison ops.
 
 You can also bury the impls with a little work, and avoid the simd headers getting pulled into code, but the whole point is to inline the calls for speed and stay in register.  So can drop to SSE4.2, but give up F16C.  And AVX2 provides fma to line up with arm64.  So going between arm64 and AVX2 seems like a good parallel if your systems support it.
 
-Written so many of these libs over the years, but this one is based around the gcc/clang vector extensions.   The vecs extend from 2, 4, 8, 16, 32.   They all use more 4 ops to do so.   I'm tempted to limit counts to 32B for AVX2.   So no ctors or member functions on the vectors (see float4m, half4m - make ops), and some derived structs on the matrices.  You can further wrap these under your own vector math code, but you then have a lot of forwarding and conversion.  I recommend using the make ctors for the vectors.   The curly brace init is easy to mistake for what it does.
+Based around the gcc/clang vector extensions.  These provide a lot of opimtized base ops.  The vecs extend to 2, 4, 8, 16, 32 operands.   On larger types, these use multiple 4 operand instructions to do so.   I've limited vector counts to 32B for AVX2 for now.   These are c types for the vectors, so no ctors or member functions.  You can further wrap these under your own vector math code, but you then have a lot of forwarding and conversion.  I recommend using the make ctors for the vectors.   But the curly brace init is misuse for what it does.
 
 ```
-float4 v = {1.0f};    v = 1,xxx
+float4 v = {1.0f};        v = 1,xxx
 float4 v = float4m(1.0f); v = 1,1,1,1
 float4 v = 1.0f.          v = 1,1,1,1
 ```
 
 Matrices are 2x2, 3x3, 3x4, and 4x4 column only.  Matrices have a C++ type with operators and calls.  Chop out with defines float, double, half, but keep int for the conditional tests.   Easy to add more types with the macros - u/char, u/long, u/short. 
 
-I gutted the arrmv7 stuff from sse2Neon.h, so that's readable, and updated sse_mathfun for the cos/sin/log ops.  I had the fp16 <-> fp32 calls, since that's all Android has.  Apple has similar calls and structs, but the Accelerate lib holds many of the optimized calls for sin, cos, log, inverse.  And you only get them if you're on a new enough iOS/macOS.   And that api is so much code, that for some things it's not using the best methods.  Mine probably isn't either.  A lot of this was cobbled together out of an old vec math lib for my personal apps.  And there's still more I can salvage.
-
-Followed Fabian's suggestions.   So div and sqrt instead of recip and rsqrt approximations that take many ops.  This is meant to follow naming to match HLSL/MSL/GLSL where possible.
-
-I have a lib Xcode project not yet checked in, so then could optimize whatever calls are buried, but most calls are light and force inline, so need to be in a header, or move your ops into an optimized or -Og call.   That may reduce call overhead.   I haven't looked at the disassembly this generates yet.  But VS is good at dumping that.  Xcode less so. 
+I gutted the arrmv7 stuff from sse2neon.h so that's readable.  But this is only needed for an _mm_shuffle_ps.  Updated sse_mathfun for the cos/sin/log ops, but it's currently only reference fp32 SSE.  I added the fp16 <-> fp32 calls, since that's all Android has.  
 
+Apple Accelerate has similar calls and structs.  The lib holds the optimized calls for sin, cos, log, inverse, but you only get them if you're on a new enough iOS/macOS.   And that api is so much code, that for some things it's not using the best methods.  
 

From a33d1213676f5345caa249d06e1262dca34ffbe4 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 1 Oct 2024 09:42:29 -0700
Subject: [PATCH 762/901] kram - simd - update readme

---
 libkram/vectormath/README.md | 57 ++++++++++++++++++------------------
 1 file changed, 29 insertions(+), 28 deletions(-)

diff --git a/libkram/vectormath/README.md b/libkram/vectormath/README.md
index d9edc5eb..ba2ac4f2 100644
--- a/libkram/vectormath/README.md
+++ b/libkram/vectormath/README.md
@@ -1,17 +1,36 @@
 vectormath
 ==========
 
-* Simd: 
-*   arm64 Neon
-*   x64 AVX2, AVX, SSE4.2
+A small vector math library for float and double vectors, matrices, and quaternions.  There are also types for int/long used for the float/double comparisons.  Each type can be disabled (SIMD_FLOAT, SIMD_DOUBLE) to limit usage.  This should be built as an optimized library to keep debug builds running fast.  Most of the calls are inlined, so selectively optimizing the included code in a debug build will also help.
+
+Small implementation kernel (just using float2/4 and double2/4 simd ops).  I would not recommend using with C.  All types have base C types for ObjC and these provide default math and comparison ops.
+
+You can also bury the impls with a little work, and avoid the simd headers getting pulled into code, but the whole point is to inline the calls for speed and stay in register.  So can drop to SSE4.2, but give up F16C.  And AVX2 provides fma to line up with arm64.  So going between arm64 and AVX2 seems like a good parallel if your systems support it.
+
+Based around the gcc/clang vector extensions.  These provide a lot of opimtized base ops.  The vecs extend to 2, 4, 8, 16, 32 operands.   On larger types, these use multiple 4 operand instructions to do so.   I've limited vector counts to 32B for AVX2 for now.   These are c types for the vectors, so no ctors or member functions.  You can further wrap these under your own vector math code, but you then have a lot of forwarding and conversion.  I recommend using the make ctors for the vectors.   But the curly brace init is misuse for what it does.
+
+```
+float4 v = {1.0f};        v = 1,xxx
+float4 v = float4m(1.0f); v = 1,1,1,1
+float4 v = 1.0f.          v = 1,1,1,1
+```
+
+Matrices are 2x2, 3x3, 3x4, and 4x4 column only.  Matrices have a C++ type with operators and calls.  Chop out with defines float, double, half, but keep int for the conditional tests.   Easy to add more types with the macros - u/char, u/long, u/short. 
+
+I gutted the arrmv7 stuff from sse2neon.h so that's readable.  But this is only needed for an _mm_shuffle_ps.  Updated sse_mathfun for the cos/sin/log ops, but it's currently only reference fp32 SSE.  I added the fp16 <-> fp32 calls, since that's all Android has.  
+
+Apple Accelerate has similar calls and structs.  The lib holds the optimized calls for sin, cos, log, inverse, but you only get them if you're on a new enough iOS/macOS.   And that api is so much code, that for some things it's not using the best methods.  
+
+---
+
+* Simd: arm64 Neon, x64 AVX2/AVX/SSE4.1
 * Compiler: Clang mainly 
 * Language: C types/ops, C++ matrix type and ops, can pass to ObjC
 * Features: Clang/GCC vector extensions (no MSVC)
 * C++ usage: C++11 but compiled as C++20
 * Platforms: macOS/iOS, Win, Linux, others
 
-Small vector simd kernel based around 4 element int, float, double ops.
-  Despite AVX2, it's only using 128-bit ops currently (SSE 4.2).
+Small vector simd kernel based around 2 and 4 element int, float, double ops.
   
 Half (fp16) conversions in case _Float16 not supported (f.e. Android)
 
@@ -32,7 +51,6 @@ Clang vector extensions provide:
 * Auto converts to _m128 and float32x4_t
 * Neon has to emulated 32B with 2 registers (f.e. double4)
 
-
 ---
 
 Types
@@ -51,8 +69,6 @@ Types
 * double2/3/4
 * double2x2/3x3/3x4/4x4
 
-* optional vector only types with only default vector ops
-*  note macOS/iOS is signed char, so should compile same for Win/Linux/etc
 * - u/char2...32
 * - u/short2...16
 * - ulong2...16
@@ -69,37 +85,22 @@ max vec size per register
 
 ---
 
-* DONE: Finish double2 ops and double2x2...
+* DONE: Add double2 ops and double2x2...
 * DONE: Add quatf, and conversions
 * DONE: Add affine inverses
-* DONE: Split file into float, double, half sub-headers
+* DONE: Split file into float and double sub-files
 * DONE: Add float2/double2 Neon vec ops.
 * DONE: Add double2 SSE ops
-* DONE: Add AVX2 double4 ops
+* DONE: Add double4 AVX2 ops
 * DONE: Formatting in print support
 * DONE: Move to release Xcode library project and CMakeLists.txt file. 
 
 * TODO: Tests of the calls.
 * TODO: Add row vector support (vs. columns)
+* TODO: Consider adding ISPC optimized calls for log, exp, sin, cos, etc
+* TODO: Add debugger natvis and lldb formatters
 * SOME: Disassembly of the calls (MSVC?)
 
 ---
 
-Small implementation kernel (just using the float2/4 and double2/4 simd ops).  I would not recommend using with C.  All types have base C types for ObjC and these provide default math and comparison ops.
-
-You can also bury the impls with a little work, and avoid the simd headers getting pulled into code, but the whole point is to inline the calls for speed and stay in register.  So can drop to SSE4.2, but give up F16C.  And AVX2 provides fma to line up with arm64.  So going between arm64 and AVX2 seems like a good parallel if your systems support it.
-
-Based around the gcc/clang vector extensions.  These provide a lot of opimtized base ops.  The vecs extend to 2, 4, 8, 16, 32 operands.   On larger types, these use multiple 4 operand instructions to do so.   I've limited vector counts to 32B for AVX2 for now.   These are c types for the vectors, so no ctors or member functions.  You can further wrap these under your own vector math code, but you then have a lot of forwarding and conversion.  I recommend using the make ctors for the vectors.   But the curly brace init is misuse for what it does.
-
-```
-float4 v = {1.0f};        v = 1,xxx
-float4 v = float4m(1.0f); v = 1,1,1,1
-float4 v = 1.0f.          v = 1,1,1,1
-```
-
-Matrices are 2x2, 3x3, 3x4, and 4x4 column only.  Matrices have a C++ type with operators and calls.  Chop out with defines float, double, half, but keep int for the conditional tests.   Easy to add more types with the macros - u/char, u/long, u/short. 
-
-I gutted the arrmv7 stuff from sse2neon.h so that's readable.  But this is only needed for an _mm_shuffle_ps.  Updated sse_mathfun for the cos/sin/log ops, but it's currently only reference fp32 SSE.  I added the fp16 <-> fp32 calls, since that's all Android has.  
-
-Apple Accelerate has similar calls and structs.  The lib holds the optimized calls for sin, cos, log, inverse, but you only get them if you're on a new enough iOS/macOS.   And that api is so much code, that for some things it's not using the best methods.  
 

From a72b1ff205f46fc3c3eff122145d344a4bf8e277 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 1 Oct 2024 13:49:20 -0700
Subject: [PATCH 763/901] kram - simd - fixup TODO, fix config

---
 libkram/vectormath/vectormath234.cpp | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/libkram/vectormath/vectormath234.cpp b/libkram/vectormath/vectormath234.cpp
index 857ece27..88b25971 100644
--- a/libkram/vectormath/vectormath234.cpp
+++ b/libkram/vectormath/vectormath234.cpp
@@ -64,8 +64,7 @@
 //
 //-----------------
 //
-// TODO: rename in README, and name of .cpp/h (simdk?)
-// TODO: build an optimized library that is a clang module
+// DONE: rename in README, and name of .cpp/h
 // DONE: split up files into types, float ops, double ops
 // DONE: limit !SIMD_FLOAT_EXT to only 32B vector types?  Have 64B vecs.
 //
@@ -75,24 +74,26 @@
 // These days I use a variant of the RTNE/RN version that also preserves NaN payload bits,
 // which is slightly more ops but matches hardware conversions exactly for every input, including all NaNs.
 //
-// TODO: ryg on 32B ops on AVX systems
+// DONE: ryg on 32B ops on AVX systems
 //   These often only have 16B simd units, so running 32B ops isn't efficient.
 //   This could apply say to PS4/AMD chips too.
 //
 // DONE: bring over fast inverses (RTS, RTU, etc)
 // DONE: need translation, rotation, scale
+// DONE: verify size/alignments are same across Win/macOS
+// DONE: add optimized vec2 ops on Neon
+// DONE: add AVX2 for double4
+
+// TODO: build an optimized Xcode library that is a clang module or framework
+// TODO: build an optimized VS library with cmake, clang module too?
 // TODO: need fast post-translation, post-rotation, post-scale
 // TODO: need euler <-> matrix
 // TODO: here's a decomp
 // https://github.com/erich666/GraphicsGems/blob/master/gemsii/unmatrix.c
 //
 // TODO: saturating conversions would be useful to, and prevent overflow
-// see the conversion.h code, bit select to clamp values.
-//
+//   see the conversion.h code, bit select to clamp values.
 // TODO: need natvis and lldb formatting of math classes.
-//
-// DONE: add optimized vec2 ops on Neon
-// DONE: add AVX2 for double4
 
 // intrinsic tables
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html
@@ -234,6 +235,7 @@ string vecf::simd_configs() const {
     
     FMT_SEP();
     
+    FMT_CONFIG(SIMD_CMATH_MATH);
     FMT_CONFIG(SIMD_ACCELERATE_MATH);
 #if SIMD_ACCELERATE_MATH
     FMT_CONFIG(SIMD_LIBRARY_VERSION);
@@ -247,10 +249,12 @@ string vecf::simd_configs() const {
     FMT_CONFIG(SIMD_DOUBLE);
    
     FMT_CONFIG(SIMD_INT);
-    FMT_CONFIG(SIMD_CHAR);
-    FMT_CONFIG(SIMD_SHORT);
     FMT_CONFIG(SIMD_LONG);
     
+    // don't have these implemented yet
+    //FMT_CONFIG(SIMD_CHAR);
+    //FMT_CONFIG(SIMD_SHORT);
+    
 #undef FMT_CONFIG
     
     return s;

From 6b1011febe80e05d5308c793c93a1c5701574c07 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 1 Oct 2024 14:08:53 -0700
Subject: [PATCH 764/901] kram - simd - more formatting ops

---
 libkram/vectormath/half234.h         |  6 ++
 libkram/vectormath/vectormath234.cpp | 93 ++++++++++++++++++++++++----
 libkram/vectormath/vectormath234.h   | 22 +++++++
 3 files changed, 110 insertions(+), 11 deletions(-)

diff --git a/libkram/vectormath/half234.h b/libkram/vectormath/half234.h
index 499bcf71..a012a603 100644
--- a/libkram/vectormath/half234.h
+++ b/libkram/vectormath/half234.h
@@ -84,6 +84,12 @@ SIMD_CALL half4 half4m(half3 v, float w = (half)1.0) {
     half4 r; r.xyz = v; r.w = w; return r;
 }
 
+SIMD_CALL half4 zeroext(half2 x) {
+    half4 v; v.xy = x; v.zw = 0; return v;
+}
+SIMD_CALL half4 zeroext(half3 x) {
+    half4 v; v.xyz = x; v.w = 0; return v;
+}
 
 }
 #endif // __cplusplus
diff --git a/libkram/vectormath/vectormath234.cpp b/libkram/vectormath/vectormath234.cpp
index 88b25971..6a580f76 100644
--- a/libkram/vectormath/vectormath234.cpp
+++ b/libkram/vectormath/vectormath234.cpp
@@ -12,8 +12,8 @@
 // These two version seem to be significant changes in output.
 //
 // v14 fma
-// v16 better fma
-// v18 Intel APX support
+// v16 better fma?
+// v18 Intel APX support (still no chip)
 //
 // -Og can't unroll small loops for some reason. -O2 and -O3 do.
 // https://godbolt.org/z/KMPa8bchb
@@ -68,12 +68,7 @@
 // DONE: split up files into types, float ops, double ops
 // DONE: limit !SIMD_FLOAT_EXT to only 32B vector types?  Have 64B vecs.
 //
-// TODO: ryg on fp16 <-> fp32
-// Not the right gist, you want the RTNE one (nm: that only matters for float->half,
-// this was the half->float one. FWIW, other dir is https://gist.github.com/rygorous/eb3a019b99fdaa9c3064.
-// These days I use a variant of the RTNE/RN version that also preserves NaN payload bits,
-// which is slightly more ops but matches hardware conversions exactly for every input, including all NaNs.
-//
+
 // DONE: ryg on 32B ops on AVX systems
 //   These often only have 16B simd units, so running 32B ops isn't efficient.
 //   This could apply say to PS4/AMD chips too.
@@ -84,17 +79,28 @@
 // DONE: add optimized vec2 ops on Neon
 // DONE: add AVX2 for double4
 
+//-----------------
+
+// TODO: ryg on fp16 <-> fp32
+// Not the right gist, you want the RTNE one (nm: that only matters for float->half,
+// this was the half->float one. FWIW, other dir is https://gist.github.com/rygorous/eb3a019b99fdaa9c3064.
+// These days I use a variant of the RTNE/RN version that also preserves NaN payload bits,
+// which is slightly more ops but matches hardware conversions exactly for every input, including all NaNs.
 // TODO: build an optimized Xcode library that is a clang module or framework
 // TODO: build an optimized VS library with cmake, clang module too?
 // TODO: need fast post-translation, post-rotation, post-scale
 // TODO: need euler <-> matrix
-// TODO: here's a decomp
-// https://github.com/erich666/GraphicsGems/blob/master/gemsii/unmatrix.c
-//
 // TODO: saturating conversions would be useful to, and prevent overflow
 //   see the conversion.h code, bit select to clamp values.
 // TODO: need natvis and lldb formatting of math classes.
 
+//-----------------
+// Links
+
+// here's a decomp
+// https://github.com/erich666/GraphicsGems/blob/master/gemsii/unmatrix.c
+//
+
 // intrinsic tables
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html
 
@@ -165,6 +171,71 @@ string vecf::str(const float4x4& m) const {
 
 #endif // SIMD_FLOAT
 
+// TODO: add formatters for the other types.
+// What is printf for half?
+#if SIMD_HALF
+
+#if SIMD_HALF_FLOAT16
+
+string vecf::str(half2 v) const {
+    return kram::format("(%f %f)", (double)v.x, (double)v.y);
+}
+string vecf::str(half3 v) const {
+    return kram::format("(%f %f %f)", (double)v.x, (double)v.y, (double)v.z);
+}
+string vecf::str(half4 v) const {
+    return kram::format("(%f %f %f %f)", (double)v.x, (double)v.y, (double)v.z, (double)v.w);
+}
+
+#elif SIMD_HALF4_ONLY
+
+string vecf::str(half2 v) const {
+    float4 vv = float4m(zeroext(v));
+    return kram::format("(%f %f)", vv.x, vv.y);
+}
+string vecf::str(half3 v) const {
+    float4 vv = float4m(zeroext(v));
+    return kram::format("(%f %f %f)", vv.x, vv.y, vv.z);
+}
+string vecf::str(half4 v) const {
+    float4 vv = float4m(v);
+    return kram::format("(%f %f %f %f)", vv.x, vv.y, vv.z, vv.w);
+}
+
+#endif // SIMD_HALF_FLOAT16
+
+#endif // SIMD_HALF
+
+#if SIMD_INT
+string vecf::str(int2 v) const {
+    return kram::format("(%d %d)", v.x, v.y);
+}
+string vecf::str(int3 v) const {
+    return kram::format("(%d %d %d)", v.x, v.y, v.z);
+}
+string vecf::str(int4 v) const {
+    return kram::format("(%d %d %d %d)", v.x, v.y, v.z, v.w);
+}
+#endif
+
+#if SIMD_LONG
+
+// This works across Win and macOS, so don't need to use PRId64.
+#define long1cast long long
+
+string vecf::str(long2 v) const {
+    return kram::format("(%lld %lld)", (long1cast)v.x, (long1cast)v.y);
+}
+string vecf::str(long3 v) const {
+    return kram::format("(%lld %lld %lld)", (long1cast)v.x, (long1cast)v.y, (long1cast)v.z);
+}
+string vecf::str(long4 v) const {
+    return kram::format("(%lld %lld %lld %lld)", (long1cast)v.x, (long1cast)v.y, (long1cast)v.z, (long1cast)v.w);
+}
+#endif
+
+
+
 //-----------------------------
 
 #define FMT_SEP() s += "-----------\n"
diff --git a/libkram/vectormath/vectormath234.h b/libkram/vectormath/vectormath234.h
index f4ab367a..f1be2757 100644
--- a/libkram/vectormath/vectormath234.h
+++ b/libkram/vectormath/vectormath234.h
@@ -560,6 +560,28 @@ struct vecf {
     // no packed double support
 #endif // SIMD_DOUBLE
 
+#if SIMD_HALF
+    // vector
+    string str(half2 v) const;
+    string str(half3 v) const;
+    string str(half4 v) const;
+#endif
+    
+#if SIMD_LONG
+    // vector
+    string str(long2 v) const;
+    string str(long3 v) const;
+    string str(long4 v) const;
+#endif
+
+#if SIMD_INT
+    // vector
+    string str(int2 v) const;
+    string str(int3 v) const;
+    string str(int4 v) const;
+#endif
+
+    
     // Just stuffing this here for now
     string simd_configs() const;
     string simd_alignments() const;

From 83d5a178007bdb85edc8b93fd5de75ca656a3fe2 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 1 Oct 2024 20:52:21 -0700
Subject: [PATCH 765/901] kram - simd - add matrix conversions

---
 libkram/vectormath/vectormath234.h | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/libkram/vectormath/vectormath234.h b/libkram/vectormath/vectormath234.h
index f1be2757..bc98e592 100644
--- a/libkram/vectormath/vectormath234.h
+++ b/libkram/vectormath/vectormath234.h
@@ -501,11 +501,19 @@ SIMD_CALL double4 double4m(float4 x) { return __builtin_convertvector(x, double4
 SIMD_CALL float2 float2m(double2 x) { return __builtin_convertvector(x, float2); }
 SIMD_CALL float3 float3m(double3 x) { return __builtin_convertvector(x, float3); }
 SIMD_CALL float4 float4m(double4 x) { return __builtin_convertvector(x, float4); }
+
+SIMD_CALL float2x2 float2x2m(const double2x2& x) { return float2x2(float2m(x[0]),float2m(x[1])); }
+SIMD_CALL float3x3 float3x3m(const double3x3& x) { return float3x3(float3m(x[0]),float3m(x[1]),float3m(x[2])); }
+SIMD_CALL float3x4 float3x4m(const double3x4& x) { return float3x4(float4m(x[0]),float4m(x[1]),float4m(x[2])); }
+SIMD_CALL float4x4 float4x4m(const double4x4& x) { return float4x4(float4m(x[0]),float4m(x[1]),float4m(x[2]),float4m(x[3])); }
+
 #endif // SIMD_DOUBLE
 
 #endif // SIMD_FLOAT
 
-#if SIMD_DOUBLE && SIMD_LONG
+#if SIMD_DOUBLE
+
+#if SIMD_LONG
 SIMD_CALL double2 double2m(long2 x) { return __builtin_convertvector(x, double2); }
 SIMD_CALL double3 double3m(long3 x) { return __builtin_convertvector(x, double3); }
 SIMD_CALL double4 double4m(long4 x) { return __builtin_convertvector(x, double4); }
@@ -514,6 +522,7 @@ SIMD_CALL long2 long2m(double2 x) { return __builtin_convertvector(x, long2); }
 SIMD_CALL long3 long3m(double3 x) { return __builtin_convertvector(x, long3); }
 SIMD_CALL long4 long4m(double4 x) { return __builtin_convertvector(x, long4); }
 #endif // SIMD_LONG
+#endif // SIMD_DOUBLE
 
 //---------------------------
 // formatting

From 7d7bf8d65978b6e7bdba7d39d77d3539184d1008 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 1 Oct 2024 21:20:36 -0700
Subject: [PATCH 766/901] kram - update readme

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 7cd98d33..707214e5 100644
--- a/README.md
+++ b/README.md
@@ -27,7 +27,7 @@ https://github.com/alecazam/kram/tree/main/kram-thumb-win
 ObjC++ viewer for PNG/KTX/KTX2/DDS supported files from kram.  Uses Metal compute and shaders, eyedropper, grids, debugging, preview.  Supports HDR and all texture types.  Mip, face, volume, and array access.  No dmg yet, just drop onto /Applications folder.  Runs on macOS (arm64/x64).  Generates Finder thumbnails and QuickLook previews via modern macOS app extension mechanisms.
 
 Diagrams and screenshots can be located here:
-https://www.figma.com/file/bPmPSpBGTi2xTVnBDqVEq0/kram
+https://www.figma.com/design/bPmPSpBGTi2xTVnBDqVEq0/kram?node-id=0-1&t=OnP0wHcDOmg7b7Vg-1
 
 #### Releases includes builds for macOS (Xcode 14.3 - arm64/x64/clang) and Windows x64 (VS 2022 - x64/clang).  kramv for macOS, kram for macOS/Win, libkram for macOS/iOS/Win, win-thumb-kram for Win.  Android library via NDK is possible, but f16 support is spotty on devices.
 

From 218450f2ebde552fcd5bb50d5322bc6457b44fb3 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 1 Oct 2024 23:16:04 -0700
Subject: [PATCH 767/901] kram - simd - breakout into project (optimized static
 lib)

---
 build2/kram.xcodeproj/project.pbxproj         |  92 -----
 .../kram.xcworkspace/contents.xcworkspacedata |   3 +
 build2/kramc.xcodeproj/project.pbxproj        |   4 +
 build2/kramv.xcodeproj/project.pbxproj        |  12 +
 build2/vectormath.xcodeproj/project.pbxproj   | 358 ++++++++++++++++++
 libkram/vectormath/README.md                  |   6 +-
 libkram/vectormath/double234.cpp              |  12 +
 libkram/vectormath/double234.h                |   7 +-
 libkram/vectormath/float234.cpp               |   4 +-
 libkram/vectormath/float234.h                 |  11 +
 libkram/vectormath/vectormath234.cpp          | 108 +++---
 libkram/vectormath/vectormath234.h            |  81 ++--
 12 files changed, 527 insertions(+), 171 deletions(-)
 create mode 100644 build2/vectormath.xcodeproj/project.pbxproj

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index cf694803..31c12e2b 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -7,29 +7,7 @@
 	objects = {
 
 /* Begin PBXBuildFile section */
-		7013AD4F2CAA0820007E5554 /* float234.h in Headers */ = {isa = PBXBuildFile; fileRef = 7013AD4E2CAA0818007E5554 /* float234.h */; };
-		7013AD502CAA0820007E5554 /* float234.h in Headers */ = {isa = PBXBuildFile; fileRef = 7013AD4E2CAA0818007E5554 /* float234.h */; };
-		7013AD582CAA0938007E5554 /* double234.h in Headers */ = {isa = PBXBuildFile; fileRef = 7013AD572CAA0934007E5554 /* double234.h */; };
-		7013AD592CAA0938007E5554 /* double234.h in Headers */ = {isa = PBXBuildFile; fileRef = 7013AD572CAA0934007E5554 /* double234.h */; };
-		7013AD5E2CAA0E1C007E5554 /* half234.h in Headers */ = {isa = PBXBuildFile; fileRef = 7013AD5D2CAA0E18007E5554 /* half234.h */; };
-		7013AD5F2CAA0E1C007E5554 /* half234.h in Headers */ = {isa = PBXBuildFile; fileRef = 7013AD5D2CAA0E18007E5554 /* half234.h */; };
-		7013AD612CAA0E28007E5554 /* int234.h in Headers */ = {isa = PBXBuildFile; fileRef = 7013AD602CAA0E21007E5554 /* int234.h */; };
-		7013AD622CAA0E28007E5554 /* int234.h in Headers */ = {isa = PBXBuildFile; fileRef = 7013AD602CAA0E21007E5554 /* int234.h */; };
 		702E0DB62CA10BC100B652B7 /* astcenc_mathlib.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB327DDDBCC00D0B9E1 /* astcenc_mathlib.cpp */; };
-		705F7F072C9FF42700E377B7 /* vectormath234.h in Headers */ = {isa = PBXBuildFile; fileRef = 705F7F022C9FF42700E377B7 /* vectormath234.h */; };
-		705F7F082C9FF42700E377B7 /* sse_mathfun.h in Headers */ = {isa = PBXBuildFile; fileRef = 705F7EFC2C9FF42700E377B7 /* sse_mathfun.h */; };
-		705F7F092C9FF42700E377B7 /* float4a.h in Headers */ = {isa = PBXBuildFile; fileRef = 705F7EFA2C9FF42700E377B7 /* float4a.h */; };
-		705F7F0B2C9FF42700E377B7 /* sse2neon.h in Headers */ = {isa = PBXBuildFile; fileRef = 705F7EFD2C9FF42700E377B7 /* sse2neon.h */; };
-		705F7F0C2C9FF42700E377B7 /* sse2neon-arm64.h in Headers */ = {isa = PBXBuildFile; fileRef = 705F7EFE2C9FF42700E377B7 /* sse2neon-arm64.h */; };
-		705F7F0D2C9FF42700E377B7 /* float4a.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 705F7EFB2C9FF42700E377B7 /* float4a.cpp */; };
-		705F7F0F2C9FF42700E377B7 /* vectormath234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 705F7F032C9FF42700E377B7 /* vectormath234.cpp */; };
-		705F7F102C9FF42700E377B7 /* vectormath234.h in Headers */ = {isa = PBXBuildFile; fileRef = 705F7F022C9FF42700E377B7 /* vectormath234.h */; };
-		705F7F112C9FF42700E377B7 /* sse_mathfun.h in Headers */ = {isa = PBXBuildFile; fileRef = 705F7EFC2C9FF42700E377B7 /* sse_mathfun.h */; };
-		705F7F122C9FF42700E377B7 /* float4a.h in Headers */ = {isa = PBXBuildFile; fileRef = 705F7EFA2C9FF42700E377B7 /* float4a.h */; };
-		705F7F142C9FF42700E377B7 /* sse2neon.h in Headers */ = {isa = PBXBuildFile; fileRef = 705F7EFD2C9FF42700E377B7 /* sse2neon.h */; };
-		705F7F152C9FF42700E377B7 /* sse2neon-arm64.h in Headers */ = {isa = PBXBuildFile; fileRef = 705F7EFE2C9FF42700E377B7 /* sse2neon-arm64.h */; };
-		705F7F162C9FF42700E377B7 /* float4a.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 705F7EFB2C9FF42700E377B7 /* float4a.cpp */; };
-		705F7F182C9FF42700E377B7 /* vectormath234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 705F7F032C9FF42700E377B7 /* vectormath234.cpp */; };
 		706EEF7F26D1595D001C950E /* EtcBlock4x4Encoding_RGB8.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDAA26D1583E001C950E /* EtcBlock4x4Encoding_RGB8.cpp */; };
 		706EEF8026D1595D001C950E /* EtcImage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDAC26D1583E001C950E /* EtcImage.cpp */; };
 		706EEF8126D1595D001C950E /* EtcDifferentialTrys.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDAF26D1583E001C950E /* EtcDifferentialTrys.cpp */; };
@@ -396,12 +374,6 @@
 		70B563A82C857B360089A64F /* KramZipStream.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B563A52C857B360089A64F /* KramZipStream.cpp */; };
 		70B563A92C857B360089A64F /* KramZipStream.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B563A62C857B360089A64F /* KramZipStream.h */; };
 		70B563AA2C857B360089A64F /* KramZipStream.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B563A62C857B360089A64F /* KramZipStream.h */; };
-		70B686E32CAA3409007ACA58 /* long234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B686E22CAA3405007ACA58 /* long234.h */; };
-		70B686E42CAA3409007ACA58 /* long234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B686E22CAA3405007ACA58 /* long234.h */; };
-		70B686E62CABB415007ACA58 /* float234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B686E52CABB40F007ACA58 /* float234.cpp */; };
-		70B686E72CABB415007ACA58 /* float234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B686E52CABB40F007ACA58 /* float234.cpp */; };
-		70B686E92CABB420007ACA58 /* double234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B686E82CABB41A007ACA58 /* double234.cpp */; };
-		70B686EA2CABB420007ACA58 /* double234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B686E82CABB41A007ACA58 /* double234.cpp */; };
 		70CDB65027A1382700A546C1 /* KramDDSHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 70CDB64E27A1382600A546C1 /* KramDDSHelper.h */; };
 		70CDB65127A1382700A546C1 /* KramDDSHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 70CDB64E27A1382600A546C1 /* KramDDSHelper.h */; };
 		70CDB65227A1382700A546C1 /* KramDDSHelper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70CDB64F27A1382600A546C1 /* KramDDSHelper.cpp */; };
@@ -429,17 +401,6 @@
 /* End PBXBuildFile section */
 
 /* Begin PBXFileReference section */
-		7013AD4E2CAA0818007E5554 /* float234.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = float234.h; sourceTree = "<group>"; };
-		7013AD572CAA0934007E5554 /* double234.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = double234.h; sourceTree = "<group>"; };
-		7013AD5D2CAA0E18007E5554 /* half234.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = half234.h; sourceTree = "<group>"; };
-		7013AD602CAA0E21007E5554 /* int234.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = int234.h; sourceTree = "<group>"; };
-		705F7EFA2C9FF42700E377B7 /* float4a.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = float4a.h; sourceTree = "<group>"; };
-		705F7EFB2C9FF42700E377B7 /* float4a.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = float4a.cpp; sourceTree = "<group>"; };
-		705F7EFC2C9FF42700E377B7 /* sse_mathfun.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = sse_mathfun.h; sourceTree = "<group>"; };
-		705F7EFD2C9FF42700E377B7 /* sse2neon.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = sse2neon.h; sourceTree = "<group>"; };
-		705F7EFE2C9FF42700E377B7 /* sse2neon-arm64.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "sse2neon-arm64.h"; sourceTree = "<group>"; };
-		705F7F022C9FF42700E377B7 /* vectormath234.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = vectormath234.h; sourceTree = "<group>"; };
-		705F7F032C9FF42700E377B7 /* vectormath234.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = vectormath234.cpp; sourceTree = "<group>"; };
 		706ECDDE26D1577A001C950E /* libkram.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libkram.a; sourceTree = BUILT_PRODUCTS_DIR; };
 		706EEDAA26D1583E001C950E /* EtcBlock4x4Encoding_RGB8.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = EtcBlock4x4Encoding_RGB8.cpp; sourceTree = "<group>"; };
 		706EEDAB26D1583E001C950E /* EtcErrorMetric.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = EtcErrorMetric.h; sourceTree = "<group>"; };
@@ -770,9 +731,6 @@
 		70A7BD2F27092A1200DBCCF7 /* hdr_encode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = hdr_encode.h; sourceTree = "<group>"; };
 		70B563A52C857B360089A64F /* KramZipStream.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = KramZipStream.cpp; sourceTree = "<group>"; };
 		70B563A62C857B360089A64F /* KramZipStream.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = KramZipStream.h; sourceTree = "<group>"; };
-		70B686E22CAA3405007ACA58 /* long234.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = long234.h; sourceTree = "<group>"; };
-		70B686E52CABB40F007ACA58 /* float234.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = float234.cpp; sourceTree = "<group>"; };
-		70B686E82CABB41A007ACA58 /* double234.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = double234.cpp; sourceTree = "<group>"; };
 		70CDB64E27A1382600A546C1 /* KramDDSHelper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = KramDDSHelper.h; sourceTree = "<group>"; };
 		70CDB64F27A1382600A546C1 /* KramDDSHelper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = KramDDSHelper.cpp; sourceTree = "<group>"; };
 		70D222D62AC800AC00B9EA23 /* json11.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = json11.h; sourceTree = "<group>"; };
@@ -807,27 +765,6 @@
 /* End PBXFrameworksBuildPhase section */
 
 /* Begin PBXGroup section */
-		705F7F042C9FF42700E377B7 /* vectormath */ = {
-			isa = PBXGroup;
-			children = (
-				705F7EFA2C9FF42700E377B7 /* float4a.h */,
-				705F7EFB2C9FF42700E377B7 /* float4a.cpp */,
-				705F7EFC2C9FF42700E377B7 /* sse_mathfun.h */,
-				705F7EFD2C9FF42700E377B7 /* sse2neon.h */,
-				705F7EFE2C9FF42700E377B7 /* sse2neon-arm64.h */,
-				705F7F022C9FF42700E377B7 /* vectormath234.h */,
-				70B686E22CAA3405007ACA58 /* long234.h */,
-				7013AD5D2CAA0E18007E5554 /* half234.h */,
-				7013AD602CAA0E21007E5554 /* int234.h */,
-				7013AD4E2CAA0818007E5554 /* float234.h */,
-				7013AD572CAA0934007E5554 /* double234.h */,
-				705F7F032C9FF42700E377B7 /* vectormath234.cpp */,
-				70B686E52CABB40F007ACA58 /* float234.cpp */,
-				70B686E82CABB41A007ACA58 /* double234.cpp */,
-			);
-			path = vectormath;
-			sourceTree = "<group>";
-		};
 		706ECDD526D1577A001C950E = {
 			isa = PBXGroup;
 			children = (
@@ -868,7 +805,6 @@
 				706EEE4F26D1583F001C950E /* zstd */,
 				706EEE5326D1583F001C950E /* lodepng */,
 				706EEE5726D1583F001C950E /* tmpfileplus */,
-				705F7F042C9FF42700E377B7 /* vectormath */,
 			);
 			name = libkram;
 			path = ../libkram;
@@ -1448,11 +1384,6 @@
 				709B8D4328D7BCAD0081BD1F /* args.h in Headers */,
 				708A6A9C2708CE4700BA5410 /* bc6h_encode.h in Headers */,
 				706EEFDF26D15984001C950E /* EtcBlock4x4Encoding_ETC1.h in Headers */,
-				705F7F102C9FF42700E377B7 /* vectormath234.h in Headers */,
-				705F7F112C9FF42700E377B7 /* sse_mathfun.h in Headers */,
-				705F7F122C9FF42700E377B7 /* float4a.h in Headers */,
-				705F7F142C9FF42700E377B7 /* sse2neon.h in Headers */,
-				705F7F152C9FF42700E377B7 /* sse2neon-arm64.h in Headers */,
 				706EEFE026D15984001C950E /* EtcBlock4x4Encoding_RGBA8.h in Headers */,
 				706EEFE126D15984001C950E /* EtcColorFloatRGBA.h in Headers */,
 				706EEFE226D15984001C950E /* EtcBlock4x4Encoding.h in Headers */,
@@ -1463,7 +1394,6 @@
 				708A6AA02708CE4700BA5410 /* bc6h_definitions.h in Headers */,
 				706EEFF426D15984001C950E /* basisu_containers.h in Headers */,
 				70871DD527DDDBCD00D0B9E1 /* astcenc.h in Headers */,
-				7013AD5E2CAA0E1C007E5554 /* half234.h in Headers */,
 				709B8D4528D7BCAD0081BD1F /* printf.h in Headers */,
 				706EEFF526D15985001C950E /* basisu_containers_impl.h in Headers */,
 				707789EB2881BA81008A51BC /* utils.h in Headers */,
@@ -1474,7 +1404,6 @@
 				706EEFF826D15985001C950E /* basisu_transcoder_uastc.h in Headers */,
 				706EEFF926D15985001C950E /* basisu_global_selector_palette.h in Headers */,
 				707789E32881BA81008A51BC /* rgbcx_table4_small.h in Headers */,
-				7013AD4F2CAA0820007E5554 /* float234.h in Headers */,
 				706EEFFA26D15985001C950E /* basisu.h in Headers */,
 				706EEFFB26D15985001C950E /* basisu_file_headers.h in Headers */,
 				706EEFFC26D15985001C950E /* miniz.h in Headers */,
@@ -1491,7 +1420,6 @@
 				706EF00426D15985001C950E /* KramLib.h in Headers */,
 				706EF00526D15985001C950E /* KramVersion.h in Headers */,
 				706EF00626D15985001C950E /* KramImage.h in Headers */,
-				7013AD612CAA0E28007E5554 /* int234.h in Headers */,
 				706EF00726D15985001C950E /* win_mmap.h in Headers */,
 				70871DDD27DDDBCD00D0B9E1 /* astcenc_vecmathlib_sse_4.h in Headers */,
 				709B8D4F28D7C15F0081BD1F /* KramFmt.h in Headers */,
@@ -1504,7 +1432,6 @@
 				706EF00926D15985001C950E /* KTXImage.h in Headers */,
 				706EF00A26D15985001C950E /* KramImageInfo.h in Headers */,
 				707789DF2881BA81008A51BC /* rgbcx_table4.h in Headers */,
-				7013AD592CAA0938007E5554 /* double234.h in Headers */,
 				70871DF727DDDBCD00D0B9E1 /* astcenc_vecmathlib_neon_4.h in Headers */,
 				706EF00B26D15985001C950E /* KramTimer.h in Headers */,
 				706EF00C26D15985001C950E /* KramMmapHelper.h in Headers */,
@@ -1525,7 +1452,6 @@
 				707789F32881BCE2008A51BC /* rdo_bc_encoder.h in Headers */,
 				70D222D82AC800AC00B9EA23 /* json11.h in Headers */,
 				706EF01726D15985001C950E /* colourset.h in Headers */,
-				70B686E42CAA3409007ACA58 /* long234.h in Headers */,
 				708A6AA42708CE4700BA5410 /* bc6h_utils.h in Headers */,
 				706EF01826D15985001C950E /* colourblock.h in Headers */,
 				706EF01926D15985001C950E /* rangefit.h in Headers */,
@@ -1568,11 +1494,6 @@
 				709B8D4428D7BCAD0081BD1F /* args.h in Headers */,
 				708A6A9D2708CE4700BA5410 /* bc6h_encode.h in Headers */,
 				706EF15926D166C5001C950E /* EtcBlock4x4Encoding_ETC1.h in Headers */,
-				705F7F072C9FF42700E377B7 /* vectormath234.h in Headers */,
-				705F7F082C9FF42700E377B7 /* sse_mathfun.h in Headers */,
-				705F7F092C9FF42700E377B7 /* float4a.h in Headers */,
-				705F7F0B2C9FF42700E377B7 /* sse2neon.h in Headers */,
-				705F7F0C2C9FF42700E377B7 /* sse2neon-arm64.h in Headers */,
 				706EF15A26D166C5001C950E /* EtcBlock4x4Encoding_RGBA8.h in Headers */,
 				706EF15B26D166C5001C950E /* EtcColorFloatRGBA.h in Headers */,
 				706EF15C26D166C5001C950E /* EtcBlock4x4Encoding.h in Headers */,
@@ -1583,7 +1504,6 @@
 				708A6AA12708CE4700BA5410 /* bc6h_definitions.h in Headers */,
 				706EF16E26D166C5001C950E /* basisu_containers.h in Headers */,
 				70871DD627DDDBCD00D0B9E1 /* astcenc.h in Headers */,
-				7013AD5F2CAA0E1C007E5554 /* half234.h in Headers */,
 				709B8D4628D7BCAD0081BD1F /* printf.h in Headers */,
 				706EF16F26D166C5001C950E /* basisu_containers_impl.h in Headers */,
 				707789EC2881BA81008A51BC /* utils.h in Headers */,
@@ -1594,7 +1514,6 @@
 				706EF17226D166C5001C950E /* basisu_transcoder_uastc.h in Headers */,
 				706EF17326D166C5001C950E /* basisu_global_selector_palette.h in Headers */,
 				707789E42881BA81008A51BC /* rgbcx_table4_small.h in Headers */,
-				7013AD502CAA0820007E5554 /* float234.h in Headers */,
 				706EF17426D166C5001C950E /* basisu.h in Headers */,
 				706EF17526D166C5001C950E /* basisu_file_headers.h in Headers */,
 				706EF17626D166C5001C950E /* miniz.h in Headers */,
@@ -1611,7 +1530,6 @@
 				706EF17E26D166C5001C950E /* KramLib.h in Headers */,
 				706EF17F26D166C5001C950E /* KramVersion.h in Headers */,
 				706EF18026D166C5001C950E /* KramImage.h in Headers */,
-				7013AD622CAA0E28007E5554 /* int234.h in Headers */,
 				706EF18126D166C5001C950E /* win_mmap.h in Headers */,
 				70871DDE27DDDBCD00D0B9E1 /* astcenc_vecmathlib_sse_4.h in Headers */,
 				709B8D5028D7C15F0081BD1F /* KramFmt.h in Headers */,
@@ -1624,7 +1542,6 @@
 				706EF18326D166C5001C950E /* KTXImage.h in Headers */,
 				706EF18426D166C5001C950E /* KramImageInfo.h in Headers */,
 				707789E02881BA81008A51BC /* rgbcx_table4.h in Headers */,
-				7013AD582CAA0938007E5554 /* double234.h in Headers */,
 				70871DF827DDDBCD00D0B9E1 /* astcenc_vecmathlib_neon_4.h in Headers */,
 				706EF18526D166C5001C950E /* KramTimer.h in Headers */,
 				706EF18626D166C5001C950E /* KramMmapHelper.h in Headers */,
@@ -1645,7 +1562,6 @@
 				707789F42881BCE2008A51BC /* rdo_bc_encoder.h in Headers */,
 				70D222D92AC800AC00B9EA23 /* json11.h in Headers */,
 				706EF19126D166C5001C950E /* colourset.h in Headers */,
-				70B686E32CAA3409007ACA58 /* long234.h in Headers */,
 				708A6AA52708CE4700BA5410 /* bc6h_utils.h in Headers */,
 				706EF19226D166C5001C950E /* colourblock.h in Headers */,
 				706EF19326D166C5001C950E /* rangefit.h in Headers */,
@@ -1755,7 +1671,6 @@
 				70D222E42AD22BED00B9EA23 /* BlockedLinearAllocator.cpp in Sources */,
 				706EEF8126D1595D001C950E /* EtcDifferentialTrys.cpp in Sources */,
 				706EEF8226D1595D001C950E /* EtcMath.cpp in Sources */,
-				70B686E62CABB415007ACA58 /* float234.cpp in Sources */,
 				706EEF8326D1595D001C950E /* EtcBlock4x4Encoding_RGBA8.cpp in Sources */,
 				706EEF8426D1595D001C950E /* EtcBlock4x4Encoding_RG11.cpp in Sources */,
 				706EEF8526D1595D001C950E /* EtcBlock4x4Encoding_RGB8A1.cpp in Sources */,
@@ -1798,7 +1713,6 @@
 				706EEFB126D1595D001C950E /* KramImage.cpp in Sources */,
 				706EEFB226D1595D001C950E /* KramLog.cpp in Sources */,
 				706EEFB326D1595D001C950E /* KramSDFMipper.cpp in Sources */,
-				70B686E92CABB420007ACA58 /* double234.cpp in Sources */,
 				706EEFB426D1595D001C950E /* KramMmapHelper.cpp in Sources */,
 				709B8D3928D7BCAD0081BD1F /* format.cpp in Sources */,
 				70D222DE2AD2132300B9EA23 /* ImmutableString.cpp in Sources */,
@@ -1830,8 +1744,6 @@
 				707789E12881BA81008A51BC /* utils.cpp in Sources */,
 				706EEFC526D1595E001C950E /* tmpfileplus.cpp in Sources */,
 				70871E0127DDDBCD00D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp in Sources */,
-				705F7F162C9FF42700E377B7 /* float4a.cpp in Sources */,
-				705F7F182C9FF42700E377B7 /* vectormath234.cpp in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@@ -1857,8 +1769,6 @@
 				706EF19A26D166C5001C950E /* EtcDifferentialTrys.cpp in Sources */,
 				706EF19B26D166C5001C950E /* EtcMath.cpp in Sources */,
 				706EF19C26D166C5001C950E /* EtcBlock4x4Encoding_RGBA8.cpp in Sources */,
-				70B686EA2CABB420007ACA58 /* double234.cpp in Sources */,
-				70B686E72CABB415007ACA58 /* float234.cpp in Sources */,
 				706EF19D26D166C5001C950E /* EtcBlock4x4Encoding_RG11.cpp in Sources */,
 				706EF19E26D166C5001C950E /* EtcBlock4x4Encoding_RGB8A1.cpp in Sources */,
 				706EF19F26D166C5001C950E /* EtcIndividualTrys.cpp in Sources */,
@@ -1919,8 +1829,6 @@
 				706EF1D426D166C5001C950E /* colourblock.cpp in Sources */,
 				706EF1D526D166C5001C950E /* colourfit.cpp in Sources */,
 				70871E0027DDDBCD00D0B9E1 /* astcenc_pick_best_endpoint_format.cpp in Sources */,
-				705F7F0D2C9FF42700E377B7 /* float4a.cpp in Sources */,
-				705F7F0F2C9FF42700E377B7 /* vectormath234.cpp in Sources */,
 				70871E0A27DDDBCD00D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp in Sources */,
 				70871DD027DDDBCD00D0B9E1 /* astcenc_symbolic_physical.cpp in Sources */,
 				70B563A82C857B360089A64F /* KramZipStream.cpp in Sources */,
diff --git a/build2/kram.xcworkspace/contents.xcworkspacedata b/build2/kram.xcworkspace/contents.xcworkspacedata
index 6c500c6d..db914c88 100644
--- a/build2/kram.xcworkspace/contents.xcworkspacedata
+++ b/build2/kram.xcworkspace/contents.xcworkspacedata
@@ -7,6 +7,9 @@
    <FileRef
       location = "group:../gtlf/GLTF/GLTF.xcodeproj">
    </FileRef>
+   <FileRef
+      location = "absolute:/Users/Alec/devref/kram/build2/vectormath.xcodeproj">
+   </FileRef>
    <FileRef
       location = "group:kram.xcodeproj">
    </FileRef>
diff --git a/build2/kramc.xcodeproj/project.pbxproj b/build2/kramc.xcodeproj/project.pbxproj
index 867901e8..dad0d46d 100644
--- a/build2/kramc.xcodeproj/project.pbxproj
+++ b/build2/kramc.xcodeproj/project.pbxproj
@@ -12,6 +12,7 @@
 		706EF28526D1825D001C950E /* libate.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF28426D18257001C950E /* libate.tbd */; };
 		706EF28726D18290001C950E /* KramMain.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EF28026D18223001C950E /* KramMain.cpp */; };
 		706EF28B26D182CB001C950E /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF28A26D182CB001C950E /* Foundation.framework */; };
+		70B687282CAD1996007ACA58 /* libvectormath.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 70B687272CAD1996007ACA58 /* libvectormath.a */; };
 /* End PBXBuildFile section */
 
 /* Begin PBXCopyFilesBuildPhase section */
@@ -33,6 +34,7 @@
 		706EF28226D18251001C950E /* libkram.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; path = libkram.a; sourceTree = BUILT_PRODUCTS_DIR; };
 		706EF28426D18257001C950E /* libate.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = libate.tbd; path = usr/lib/libate.tbd; sourceTree = SDKROOT; };
 		706EF28A26D182CB001C950E /* Foundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Foundation.framework; path = System/Library/Frameworks/Foundation.framework; sourceTree = SDKROOT; };
+		70B687272CAD1996007ACA58 /* libvectormath.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; path = libvectormath.a; sourceTree = BUILT_PRODUCTS_DIR; };
 /* End PBXFileReference section */
 
 /* Begin PBXFrameworksBuildPhase section */
@@ -44,6 +46,7 @@
 				706EF28B26D182CB001C950E /* Foundation.framework in Frameworks */,
 				706EF28526D1825D001C950E /* libate.tbd in Frameworks */,
 				705F68F82BA2DD2000437FAA /* libcompression.tbd in Frameworks */,
+				70B687282CAD1996007ACA58 /* libvectormath.a in Frameworks */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@@ -79,6 +82,7 @@
 		706EF28126D18251001C950E /* Frameworks */ = {
 			isa = PBXGroup;
 			children = (
+				70B687272CAD1996007ACA58 /* libvectormath.a */,
 				705F68F72BA2DD1100437FAA /* libcompression.tbd */,
 				706EF28A26D182CB001C950E /* Foundation.framework */,
 				706EF28426D18257001C950E /* libate.tbd */,
diff --git a/build2/kramv.xcodeproj/project.pbxproj b/build2/kramv.xcodeproj/project.pbxproj
index 88e44e66..3ddc8c01 100644
--- a/build2/kramv.xcodeproj/project.pbxproj
+++ b/build2/kramv.xcodeproj/project.pbxproj
@@ -37,6 +37,9 @@
 		708D44D5272FA4C800783DCE /* piazza_san_marco.ktx in Resources */ = {isa = PBXBuildFile; fileRef = 708D44D3272FA4C800783DCE /* piazza_san_marco.ktx */; };
 		7099CFBD28E8319C008D4ABF /* UniformTypeIdentifiers.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 7099CFBC28E8319B008D4ABF /* UniformTypeIdentifiers.framework */; };
 		70B5BFF828F5254000CD83D8 /* CoreText.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 70B5BFF728F5253F00CD83D8 /* CoreText.framework */; };
+		70B687222CAD1962007ACA58 /* libvectormath.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 70B687212CAD1962007ACA58 /* libvectormath.a */; };
+		70B687242CAD1976007ACA58 /* libvectormath.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 70B687232CAD1976007ACA58 /* libvectormath.a */; };
+		70B687262CAD197E007ACA58 /* libvectormath.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 70B687252CAD197E007ACA58 /* libvectormath.a */; };
 		70E33EC826E536BF00CBA422 /* QuickLookThumbnailing.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 70E33EC726E536BF00CBA422 /* QuickLookThumbnailing.framework */; };
 		70E33ECA26E536BF00CBA422 /* Quartz.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 70E33EC926E536BF00CBA422 /* Quartz.framework */; };
 		70E33ECE26E536BF00CBA422 /* KramThumbnailProvider.mm in Sources */ = {isa = PBXBuildFile; fileRef = 70E33ECD26E536BF00CBA422 /* KramThumbnailProvider.mm */; };
@@ -136,6 +139,9 @@
 		708D44D3272FA4C800783DCE /* piazza_san_marco.ktx */ = {isa = PBXFileReference; lastKnownFileType = file; path = piazza_san_marco.ktx; sourceTree = "<group>"; };
 		7099CFBC28E8319B008D4ABF /* UniformTypeIdentifiers.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = UniformTypeIdentifiers.framework; path = System/Library/Frameworks/UniformTypeIdentifiers.framework; sourceTree = SDKROOT; };
 		70B5BFF728F5253F00CD83D8 /* CoreText.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreText.framework; path = System/Library/Frameworks/CoreText.framework; sourceTree = SDKROOT; };
+		70B687212CAD1962007ACA58 /* libvectormath.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; path = libvectormath.a; sourceTree = BUILT_PRODUCTS_DIR; };
+		70B687232CAD1976007ACA58 /* libvectormath.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; path = libvectormath.a; sourceTree = BUILT_PRODUCTS_DIR; };
+		70B687252CAD197E007ACA58 /* libvectormath.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; path = libvectormath.a; sourceTree = BUILT_PRODUCTS_DIR; };
 		70E33EC626E536BF00CBA422 /* kram-thumb.appex */ = {isa = PBXFileReference; explicitFileType = "wrapper.app-extension"; includeInIndex = 0; path = "kram-thumb.appex"; sourceTree = BUILT_PRODUCTS_DIR; };
 		70E33EC726E536BF00CBA422 /* QuickLookThumbnailing.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = QuickLookThumbnailing.framework; path = System/Library/Frameworks/QuickLookThumbnailing.framework; sourceTree = SDKROOT; };
 		70E33EC926E536BF00CBA422 /* Quartz.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Quartz.framework; path = System/Library/Frameworks/Quartz.framework; sourceTree = SDKROOT; };
@@ -166,6 +172,7 @@
 				7099CFBD28E8319C008D4ABF /* UniformTypeIdentifiers.framework in Frameworks */,
 				706EF25726D17C9D001C950E /* AppKit.framework in Frameworks */,
 				70833669271575EA0077BCB6 /* GLTFMTL.framework in Frameworks */,
+				70B687222CAD1962007ACA58 /* libvectormath.a in Frameworks */,
 				706EF26726D17DFA001C950E /* libate.tbd in Frameworks */,
 				706EF24F26D17C43001C950E /* Foundation.framework in Frameworks */,
 				70833665271575E50077BCB6 /* GLTF.framework in Frameworks */,
@@ -179,6 +186,7 @@
 			files = (
 				70E33EC826E536BF00CBA422 /* QuickLookThumbnailing.framework in Frameworks */,
 				705F68FB2BA2DD5900437FAA /* libcompression.tbd in Frameworks */,
+				70B687242CAD1976007ACA58 /* libvectormath.a in Frameworks */,
 				70E33ECA26E536BF00CBA422 /* Quartz.framework in Frameworks */,
 				70E33ED826E5377000CBA422 /* libkram.a in Frameworks */,
 				70E33EDD26E537AD00CBA422 /* Accelerate.framework in Frameworks */,
@@ -193,6 +201,7 @@
 			files = (
 				70E33EF526E548D800CBA422 /* CoreGraphics.framework in Frameworks */,
 				705F68FC2BA2DD6200437FAA /* libcompression.tbd in Frameworks */,
+				70B687262CAD197E007ACA58 /* libvectormath.a in Frameworks */,
 				70E33EF426E548CF00CBA422 /* libate.tbd in Frameworks */,
 				70E33EF626E548E200CBA422 /* Accelerate.framework in Frameworks */,
 				70E33EF726E553B900CBA422 /* AppKit.framework in Frameworks */,
@@ -249,6 +258,9 @@
 		706EF24726D17BC2001C950E /* Frameworks */ = {
 			isa = PBXGroup;
 			children = (
+				70B687252CAD197E007ACA58 /* libvectormath.a */,
+				70B687232CAD1976007ACA58 /* libvectormath.a */,
+				70B687212CAD1962007ACA58 /* libvectormath.a */,
 				705F68F92BA2DD3E00437FAA /* libcompression.tbd */,
 				70B5BFF728F5253F00CD83D8 /* CoreText.framework */,
 				7099CFBC28E8319B008D4ABF /* UniformTypeIdentifiers.framework */,
diff --git a/build2/vectormath.xcodeproj/project.pbxproj b/build2/vectormath.xcodeproj/project.pbxproj
new file mode 100644
index 00000000..ce667a16
--- /dev/null
+++ b/build2/vectormath.xcodeproj/project.pbxproj
@@ -0,0 +1,358 @@
+// !$*UTF8*$!
+{
+	archiveVersion = 1;
+	classes = {
+	};
+	objectVersion = 77;
+	objects = {
+
+/* Begin PBXBuildFile section */
+		70B6870B2CAD1072007ACA58 /* float234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B687002CAD1072007ACA58 /* float234.cpp */; };
+		70B6870C2CAD1072007ACA58 /* double234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B686FC2CAD1072007ACA58 /* double234.cpp */; };
+		70B6870D2CAD1072007ACA58 /* vectormath234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B687092CAD1072007ACA58 /* vectormath234.cpp */; };
+		70B6870E2CAD1072007ACA58 /* float4a.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B686FE2CAD1072007ACA58 /* float4a.cpp */; };
+		70B6870F2CAD1072007ACA58 /* vectormath234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B687082CAD1072007ACA58 /* vectormath234.h */; };
+		70B687102CAD1072007ACA58 /* long234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B687032CAD1072007ACA58 /* long234.h */; };
+		70B687112CAD1072007ACA58 /* sse2neon.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B687062CAD1072007ACA58 /* sse2neon.h */; };
+		70B687122CAD1072007ACA58 /* sse_mathfun.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B687052CAD1072007ACA58 /* sse_mathfun.h */; };
+		70B687132CAD1072007ACA58 /* float234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B686FF2CAD1072007ACA58 /* float234.h */; };
+		70B687142CAD1072007ACA58 /* half234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B687012CAD1072007ACA58 /* half234.h */; };
+		70B687152CAD1072007ACA58 /* int234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B687022CAD1072007ACA58 /* int234.h */; };
+		70B687162CAD1072007ACA58 /* float4a.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B686FD2CAD1072007ACA58 /* float4a.h */; };
+		70B687172CAD1072007ACA58 /* sse2neon-arm64.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B687072CAD1072007ACA58 /* sse2neon-arm64.h */; };
+		70B687182CAD1072007ACA58 /* double234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B686FB2CAD1072007ACA58 /* double234.h */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXFileReference section */
+		70B686F42CAD1026007ACA58 /* libvectormath.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libvectormath.a; sourceTree = BUILT_PRODUCTS_DIR; };
+		70B686FB2CAD1072007ACA58 /* double234.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = double234.h; sourceTree = "<group>"; };
+		70B686FC2CAD1072007ACA58 /* double234.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = double234.cpp; sourceTree = "<group>"; };
+		70B686FD2CAD1072007ACA58 /* float4a.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = float4a.h; sourceTree = "<group>"; };
+		70B686FE2CAD1072007ACA58 /* float4a.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = float4a.cpp; sourceTree = "<group>"; };
+		70B686FF2CAD1072007ACA58 /* float234.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = float234.h; sourceTree = "<group>"; };
+		70B687002CAD1072007ACA58 /* float234.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = float234.cpp; sourceTree = "<group>"; };
+		70B687012CAD1072007ACA58 /* half234.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = half234.h; sourceTree = "<group>"; };
+		70B687022CAD1072007ACA58 /* int234.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = int234.h; sourceTree = "<group>"; };
+		70B687032CAD1072007ACA58 /* long234.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = long234.h; sourceTree = "<group>"; };
+		70B687042CAD1072007ACA58 /* README.md */ = {isa = PBXFileReference; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = "<group>"; };
+		70B687052CAD1072007ACA58 /* sse_mathfun.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = sse_mathfun.h; sourceTree = "<group>"; };
+		70B687062CAD1072007ACA58 /* sse2neon.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = sse2neon.h; sourceTree = "<group>"; };
+		70B687072CAD1072007ACA58 /* sse2neon-arm64.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "sse2neon-arm64.h"; sourceTree = "<group>"; };
+		70B687082CAD1072007ACA58 /* vectormath234.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = vectormath234.h; sourceTree = "<group>"; };
+		70B687092CAD1072007ACA58 /* vectormath234.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = vectormath234.cpp; sourceTree = "<group>"; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+		70B686F22CAD1026007ACA58 /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+		70B686EB2CAD1026007ACA58 = {
+			isa = PBXGroup;
+			children = (
+				70B6870A2CAD1072007ACA58 /* vectormath */,
+				70B686F52CAD1026007ACA58 /* Products */,
+			);
+			sourceTree = "<group>";
+		};
+		70B686F52CAD1026007ACA58 /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				70B686F42CAD1026007ACA58 /* libvectormath.a */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
+		70B6870A2CAD1072007ACA58 /* vectormath */ = {
+			isa = PBXGroup;
+			children = (
+				70B687042CAD1072007ACA58 /* README.md */,
+				70B686FB2CAD1072007ACA58 /* double234.h */,
+				70B686FC2CAD1072007ACA58 /* double234.cpp */,
+				70B686FD2CAD1072007ACA58 /* float4a.h */,
+				70B686FE2CAD1072007ACA58 /* float4a.cpp */,
+				70B686FF2CAD1072007ACA58 /* float234.h */,
+				70B687002CAD1072007ACA58 /* float234.cpp */,
+				70B687012CAD1072007ACA58 /* half234.h */,
+				70B687022CAD1072007ACA58 /* int234.h */,
+				70B687032CAD1072007ACA58 /* long234.h */,
+				70B687052CAD1072007ACA58 /* sse_mathfun.h */,
+				70B687062CAD1072007ACA58 /* sse2neon.h */,
+				70B687072CAD1072007ACA58 /* sse2neon-arm64.h */,
+				70B687082CAD1072007ACA58 /* vectormath234.h */,
+				70B687092CAD1072007ACA58 /* vectormath234.cpp */,
+			);
+			name = vectormath;
+			path = ../libkram/vectormath;
+			sourceTree = SOURCE_ROOT;
+		};
+/* End PBXGroup section */
+
+/* Begin PBXHeadersBuildPhase section */
+		70B686F02CAD1026007ACA58 /* Headers */ = {
+			isa = PBXHeadersBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				70B6870F2CAD1072007ACA58 /* vectormath234.h in Headers */,
+				70B687102CAD1072007ACA58 /* long234.h in Headers */,
+				70B687112CAD1072007ACA58 /* sse2neon.h in Headers */,
+				70B687122CAD1072007ACA58 /* sse_mathfun.h in Headers */,
+				70B687132CAD1072007ACA58 /* float234.h in Headers */,
+				70B687142CAD1072007ACA58 /* half234.h in Headers */,
+				70B687152CAD1072007ACA58 /* int234.h in Headers */,
+				70B687162CAD1072007ACA58 /* float4a.h in Headers */,
+				70B687172CAD1072007ACA58 /* sse2neon-arm64.h in Headers */,
+				70B687182CAD1072007ACA58 /* double234.h in Headers */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXHeadersBuildPhase section */
+
+/* Begin PBXNativeTarget section */
+		70B686F32CAD1026007ACA58 /* vectormath */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 70B686F82CAD1026007ACA58 /* Build configuration list for PBXNativeTarget "vectormath" */;
+			buildPhases = (
+				70B686F02CAD1026007ACA58 /* Headers */,
+				70B686F12CAD1026007ACA58 /* Sources */,
+				70B686F22CAD1026007ACA58 /* Frameworks */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = vectormath;
+			packageProductDependencies = (
+			);
+			productName = vectormath;
+			productReference = 70B686F42CAD1026007ACA58 /* libvectormath.a */;
+			productType = "com.apple.product-type.library.static";
+		};
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+		70B686EC2CAD1026007ACA58 /* Project object */ = {
+			isa = PBXProject;
+			attributes = {
+				BuildIndependentTargetsInParallel = 1;
+				LastUpgradeCheck = 1600;
+				TargetAttributes = {
+					70B686F32CAD1026007ACA58 = {
+						CreatedOnToolsVersion = 16.0;
+					};
+				};
+			};
+			buildConfigurationList = 70B686EF2CAD1026007ACA58 /* Build configuration list for PBXProject "vectormath" */;
+			developmentRegion = en;
+			hasScannedForEncodings = 0;
+			knownRegions = (
+				en,
+				Base,
+			);
+			mainGroup = 70B686EB2CAD1026007ACA58;
+			minimizedProjectReferenceProxies = 1;
+			preferredProjectObjectVersion = 77;
+			productRefGroup = 70B686F52CAD1026007ACA58 /* Products */;
+			projectDirPath = "";
+			projectRoot = "";
+			targets = (
+				70B686F32CAD1026007ACA58 /* vectormath */,
+			);
+		};
+/* End PBXProject section */
+
+/* Begin PBXSourcesBuildPhase section */
+		70B686F12CAD1026007ACA58 /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				70B6870B2CAD1072007ACA58 /* float234.cpp in Sources */,
+				70B6870C2CAD1072007ACA58 /* double234.cpp in Sources */,
+				70B6870D2CAD1072007ACA58 /* vectormath234.cpp in Sources */,
+				70B6870E2CAD1072007ACA58 /* float4a.cpp in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXSourcesBuildPhase section */
+
+/* Begin XCBuildConfiguration section */
+		70B686F62CAD1026007ACA58 /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
+				CLANG_ANALYZER_NONNULL = YES;
+				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
+				CLANG_ENABLE_MODULES = YES;
+				CLANG_ENABLE_OBJC_ARC = YES;
+				CLANG_ENABLE_OBJC_WEAK = YES;
+				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_COMMA = YES;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INFINITE_RECURSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
+				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+				CLANG_WARN_STRICT_PROTOTYPES = YES;
+				CLANG_WARN_SUSPICIOUS_MOVE = YES;
+				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
+				CLANG_WARN_UNREACHABLE_CODE = YES;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				CLANG_X86_VECTOR_INSTRUCTIONS = avx2;
+				COPY_PHASE_STRIP = NO;
+				DEBUG_INFORMATION_FORMAT = dwarf;
+				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				ENABLE_TESTABILITY = YES;
+				ENABLE_USER_SCRIPT_SANDBOXING = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu17;
+				GCC_DYNAMIC_NO_PIC = NO;
+				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_OPTIMIZATION_LEVEL = 1;
+				GCC_PREPROCESSOR_DEFINITIONS = (
+					"DEBUG=1",
+					"$(inherited)",
+				);
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				LOCALIZATION_PREFERS_STRING_CATALOGS = YES;
+				MACOSX_DEPLOYMENT_TARGET = 15.0;
+				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
+				MTL_FAST_MATH = YES;
+				ONLY_ACTIVE_ARCH = YES;
+				OTHER_CFLAGS = (
+					"-DUSE_SIMDLIB=1",
+					"-mfma",
+					"-mf16c",
+				);
+				SDKROOT = macosx;
+			};
+			name = Debug;
+		};
+		70B686F72CAD1026007ACA58 /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
+				CLANG_ANALYZER_NONNULL = YES;
+				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
+				CLANG_ENABLE_MODULES = YES;
+				CLANG_ENABLE_OBJC_ARC = YES;
+				CLANG_ENABLE_OBJC_WEAK = YES;
+				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_COMMA = YES;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INFINITE_RECURSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
+				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+				CLANG_WARN_STRICT_PROTOTYPES = YES;
+				CLANG_WARN_SUSPICIOUS_MOVE = YES;
+				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
+				CLANG_WARN_UNREACHABLE_CODE = YES;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				CLANG_X86_VECTOR_INSTRUCTIONS = avx2;
+				COPY_PHASE_STRIP = NO;
+				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+				ENABLE_NS_ASSERTIONS = NO;
+				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				ENABLE_USER_SCRIPT_SANDBOXING = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu17;
+				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				LOCALIZATION_PREFERS_STRING_CATALOGS = YES;
+				MACOSX_DEPLOYMENT_TARGET = 15.0;
+				MTL_ENABLE_DEBUG_INFO = NO;
+				MTL_FAST_MATH = YES;
+				OTHER_CFLAGS = (
+					"-DUSE_SIMDLIB=1",
+					"-mfma",
+					"-mf16c",
+				);
+				SDKROOT = macosx;
+			};
+			name = Release;
+		};
+		70B686F92CAD1026007ACA58 /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				CODE_SIGN_STYLE = Automatic;
+				EXECUTABLE_PREFIX = lib;
+				MACOSX_DEPLOYMENT_TARGET = 13.0;
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				SKIP_INSTALL = YES;
+			};
+			name = Debug;
+		};
+		70B686FA2CAD1026007ACA58 /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				CODE_SIGN_STYLE = Automatic;
+				EXECUTABLE_PREFIX = lib;
+				MACOSX_DEPLOYMENT_TARGET = 13.0;
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				SKIP_INSTALL = YES;
+			};
+			name = Release;
+		};
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+		70B686EF2CAD1026007ACA58 /* Build configuration list for PBXProject "vectormath" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				70B686F62CAD1026007ACA58 /* Debug */,
+				70B686F72CAD1026007ACA58 /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		70B686F82CAD1026007ACA58 /* Build configuration list for PBXNativeTarget "vectormath" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				70B686F92CAD1026007ACA58 /* Debug */,
+				70B686FA2CAD1026007ACA58 /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+/* End XCConfigurationList section */
+	};
+	rootObject = 70B686EC2CAD1026007ACA58 /* Project object */;
+}
diff --git a/libkram/vectormath/README.md b/libkram/vectormath/README.md
index ba2ac4f2..97786634 100644
--- a/libkram/vectormath/README.md
+++ b/libkram/vectormath/README.md
@@ -59,16 +59,16 @@ Types
 * float4a - aligned type
 * float4p - packed type
 * float   - c++ type omits the "a"
-
+*
 * int2/3/4/8
 * long2/3/4
-
+*
 * half2/3/4/8/16
 * float2/3/4/8
 * float2x2/3x3/3x4/4x4
 * double2/3/4
 * double2x2/3x3/3x4/4x4
-
+*
 * - u/char2...32
 * - u/short2...16
 * - ulong2...16
diff --git a/libkram/vectormath/double234.cpp b/libkram/vectormath/double234.cpp
index 8d588c3a..27d62606 100644
--- a/libkram/vectormath/double234.cpp
+++ b/libkram/vectormath/double234.cpp
@@ -21,6 +21,13 @@ macroVectorRepeatFnImpl(double, exp)
 macroVectorRepeatFnImpl(double, sin)
 macroVectorRepeatFnImpl(double, cos)
 macroVectorRepeatFnImpl(double, tan)
+
+macroVectorRepeatFnImpl(double, asin)
+macroVectorRepeatFnImpl(double, acos)
+macroVectorRepeatFnImpl(double, atan)
+
+macroVectorRepeatFn2Impl(double, atan2)
+
 #endif // SIMD_ACCELERATE_MATH
 
 #if SIMD_CMATH_MATH
@@ -30,6 +37,11 @@ macroVectorRepeatFnImpl(double, exp, ::exp)
 macroVectorRepeatFnImpl(double, sin, ::sin)
 macroVectorRepeatFnImpl(double, cos, ::cos)
 macroVectorRepeatFnImpl(double, tan, ::tan)
+
+macroVectorRepeatFnImpl(double, asin, ::asin)
+macroVectorRepeatFnImpl(double, acos, ::acos)
+macroVectorRepeatFnImpl(double, atan, ::atan)
+
 #endif // SIMD_CMATH_MATH
 
 //---------------------------
diff --git a/libkram/vectormath/double234.h b/libkram/vectormath/double234.h
index 9fc0dc67..fb0633e7 100644
--- a/libkram/vectormath/double234.h
+++ b/libkram/vectormath/double234.h
@@ -503,13 +503,18 @@ SIMD_CALL double4 fract(double4 x) {
 // power series
 macroVectorRepeatFnDecl(double, log)
 macroVectorRepeatFnDecl(double, exp)
-//macroVectorRepeatFnDecl(double, pow)
 
 // trig
 macroVectorRepeatFnDecl(double, cos)
 macroVectorRepeatFnDecl(double, sin)
 macroVectorRepeatFnDecl(double, tan)
 
+macroVectorRepeatFnDecl(double, acos)
+macroVectorRepeatFnDecl(double, asin)
+macroVectorRepeatFnDecl(double, atan)
+
+macroVectorRepeatFn2Decl(double, atan2)
+
 SIMD_CALL double2 pow(double2 x, double2 y) { return exp(log(x) * y); }
 SIMD_CALL double3 pow(double3 x, double3 y) { return exp(log(x) * y); }
 SIMD_CALL double4 pow(double4 x, double4 y) { return exp(log(x) * y); }
diff --git a/libkram/vectormath/float234.cpp b/libkram/vectormath/float234.cpp
index 51aaaea1..7440af78 100644
--- a/libkram/vectormath/float234.cpp
+++ b/libkram/vectormath/float234.cpp
@@ -857,7 +857,7 @@ float4x4 inverse_tru(const float4x4& mtx)
         inverse[3] = inverse * (-mtx[3]);
         
         success = true;
-        macroUnusedVar(success);
+        (void)(success);
     }
     
     return inverse;
@@ -912,7 +912,7 @@ float4x4 inverse_trs(const float4x4& mtx)
         inverse[3] = inverse * (-mtx[3]);
         
         success = true;
-    macroUnusedVar(success);
+    (void)(success);
     //}
     
     return inverse;
diff --git a/libkram/vectormath/float234.h b/libkram/vectormath/float234.h
index ac4900c1..3ab79fea 100644
--- a/libkram/vectormath/float234.h
+++ b/libkram/vectormath/float234.h
@@ -492,10 +492,21 @@ macroVectorRepeatFnDecl(float, log)
 macroVectorRepeatFnDecl(float, exp)
 
 // trig
+// TODO: more accurate cospi, sinpi, ...
 macroVectorRepeatFnDecl(float, cos)
 macroVectorRepeatFnDecl(float, sin)
 macroVectorRepeatFnDecl(float, tan)
 
+macroVectorRepeatFnDecl(float, acos)
+macroVectorRepeatFnDecl(float, asin)
+macroVectorRepeatFnDecl(float, atan)
+
+macroVectorRepeatFn2Decl(float, atan2)
+
+// sincos requires accel 5 lib, and takes 2 ptrs
+// may need math fallback for some calls
+// macroVectorRepeatFn2Decl(float, sincos)
+
 // pow
 // can xy be <= 0 ?, no will return Nan in log/exp approx
 SIMD_CALL float2 pow(float2 x, float2 y) {
diff --git a/libkram/vectormath/vectormath234.cpp b/libkram/vectormath/vectormath234.cpp
index 6a580f76..37a3e095 100644
--- a/libkram/vectormath/vectormath234.cpp
+++ b/libkram/vectormath/vectormath234.cpp
@@ -67,31 +67,29 @@
 // DONE: rename in README, and name of .cpp/h
 // DONE: split up files into types, float ops, double ops
 // DONE: limit !SIMD_FLOAT_EXT to only 32B vector types?  Have 64B vecs.
-//
-
 // DONE: ryg on 32B ops on AVX systems
 //   These often only have 16B simd units, so running 32B ops isn't efficient.
 //   This could apply say to PS4/AMD chips too.
-//
 // DONE: bring over fast inverses (RTS, RTU, etc)
 // DONE: need translation, rotation, scale
 // DONE: verify size/alignments are same across Win/macOS
 // DONE: add optimized vec2 ops on Neon
 // DONE: add AVX2 for double4
+// DONE: build an optimized Xcode library
 
 //-----------------
 
 // TODO: ryg on fp16 <-> fp32
-// Not the right gist, you want the RTNE one (nm: that only matters for float->half,
-// this was the half->float one. FWIW, other dir is https://gist.github.com/rygorous/eb3a019b99fdaa9c3064.
-// These days I use a variant of the RTNE/RN version that also preserves NaN payload bits,
-// which is slightly more ops but matches hardware conversions exactly for every input, including all NaNs.
-// TODO: build an optimized Xcode library that is a clang module or framework
+//   Not the right gist, you want the RTNE one (nm: that only matters for float->half,
+//   this was the half->float one. FWIW, other dir is https://gist.github.com/rygorous/eb3a019b99fdaa9c3064.
+//   These days I use a variant of the RTNE/RN version that also preserves NaN payload bits,
+//   which is slightly more ops but matches hardware conversions exactly for every input, including all NaNs.
+// TODO: build Xcode library that is a clang module or framework
 // TODO: build an optimized VS library with cmake, clang module too?
 // TODO: need fast post-translation, post-rotation, post-scale
 // TODO: need euler <-> matrix
-// TODO: saturating conversions would be useful to, and prevent overflow
-//   see the conversion.h code, bit select to clamp values.
+// TODO: saturating conversions would be useful too and prevent overflow
+//   bit select to clamp values.
 // TODO: need natvis and lldb formatting of math classes.
 
 //-----------------
@@ -107,34 +105,59 @@
 // older but good talk on simd
 // https://people.inf.ethz.ch/markusp/teaching/263-2300-ETH-spring14/slides/11-simd.pdf
 
+// aarch64
+// https://en.wikipedia.org/wiki/AArch64
+
 #if SIMD_ACCELERATE_MATH
 #include <simd/base.h>
 #endif // SIMD_ACCELERATE_MATH
 
 namespace SIMD_NAMESPACE {
 
+// was using kram::format, but wanted to decouple this lib
+inline string format(const char* format, ...) {
+    string str;
+    
+    va_list args;
+    va_start(args, format);
+    
+    // format once to get length (without NULL at end)
+    va_list argsCopy;
+    va_copy(argsCopy, args);
+    int32_t len = vsnprintf(NULL, 0, format, argsCopy);
+    va_end(argsCopy);
+
+    // replace string
+    str.resize(len, 0);
+    vsnprintf((char*)str.c_str(), len + 1, format, args);
+    
+    va_end(args);
+    
+    return str;
+}
+
 #if SIMD_DOUBLE
 
 string vecf::str(double2 v) const {
-    return kram::format("(%f %f)", v.x, v.y);
+    return format("(%f %f)", v.x, v.y);
 }
 string vecf::str(double3 v) const {
-    return kram::format("(%f %f %f)", v.x, v.y, v.z);
+    return format("(%f %f %f)", v.x, v.y, v.z);
 }
 string vecf::str(double4 v) const {
-    return kram::format("(%f %f %f %f)", v.x, v.y, v.z, v.w);
+    return format("(%f %f %f %f)", v.x, v.y, v.z, v.w);
 }
  
 string vecf::str(const double2x2& m) const {
-    return kram::format("%s\n%s\n",
+    return format("%s\n%s\n",
         str(m[0]).c_str(), str(m[1]).c_str());
 }
 string vecf::str(const double3x3& m) const {
-    return kram::format("%s\n%s\n%s\n",
+    return format("%s\n%s\n%s\n",
         str(m[0]).c_str(), str(m[1]).c_str(), str(m[2]).c_str());
 }
 string vecf::str(const double4x4& m) const {
-  return kram::format("%s\n%s\n%s\n%s\n",
+  return format("%s\n%s\n%s\n%s\n",
       str(m[0]).c_str(), str(m[1]).c_str(),
       str(m[2]).c_str(), str(m[3]).c_str());
 }
@@ -146,60 +169,59 @@ string vecf::str(const double4x4& m) const {
 #if SIMD_FLOAT
 
 string vecf::str(float2 v) const {
-    return kram::format("(%f %f)", v.x, v.y);
+    return format("(%f %f)", v.x, v.y);
 }
 string vecf::str(float3 v) const {
-    return kram::format("(%f %f %f)", v.x, v.y, v.z);
+    return format("(%f %f %f)", v.x, v.y, v.z);
 }
 string vecf::str(float4 v) const {
-    return kram::format("(%f %f %f %f)", v.x, v.y, v.z, v.w);
+    return format("(%f %f %f %f)", v.x, v.y, v.z, v.w);
 }
  
 string vecf::str(const float2x2& m) const {
-    return kram::format("%s\n%s\n",
+    return format("%s\n%s\n",
         str(m[0]).c_str(), str(m[1]).c_str());
 }
 string vecf::str(const float3x3& m) const {
-    return kram::format("%s\n%s\n%s\n",
+    return format("%s\n%s\n%s\n",
         str(m[0]).c_str(), str(m[1]).c_str(), str(m[2]).c_str());
 }
 string vecf::str(const float4x4& m) const {
-  return kram::format("%s\n%s\n%s\n%s\n",
+  return format("%s\n%s\n%s\n%s\n",
       str(m[0]).c_str(), str(m[1]).c_str(),
       str(m[2]).c_str(), str(m[3]).c_str());
 }
 
 #endif // SIMD_FLOAT
 
-// TODO: add formatters for the other types.
-// What is printf for half?
 #if SIMD_HALF
 
 #if SIMD_HALF_FLOAT16
 
 string vecf::str(half2 v) const {
-    return kram::format("(%f %f)", (double)v.x, (double)v.y);
+    return format("(%f %f)", (double)v.x, (double)v.y);
 }
 string vecf::str(half3 v) const {
-    return kram::format("(%f %f %f)", (double)v.x, (double)v.y, (double)v.z);
+    return format("(%f %f %f)", (double)v.x, (double)v.y, (double)v.z);
 }
 string vecf::str(half4 v) const {
-    return kram::format("(%f %f %f %f)", (double)v.x, (double)v.y, (double)v.z, (double)v.w);
+    return format("(%f %f %f %f)", (double)v.x, (double)v.y, (double)v.z, (double)v.w);
 }
 
 #elif SIMD_HALF4_ONLY
 
+// this converts half4 to float, then just prints that
 string vecf::str(half2 v) const {
     float4 vv = float4m(zeroext(v));
-    return kram::format("(%f %f)", vv.x, vv.y);
+    return format("(%f %f)", vv.x, vv.y);
 }
 string vecf::str(half3 v) const {
     float4 vv = float4m(zeroext(v));
-    return kram::format("(%f %f %f)", vv.x, vv.y, vv.z);
+    return format("(%f %f %f)", vv.x, vv.y, vv.z);
 }
 string vecf::str(half4 v) const {
     float4 vv = float4m(v);
-    return kram::format("(%f %f %f %f)", vv.x, vv.y, vv.z, vv.w);
+    return format("(%f %f %f %f)", vv.x, vv.y, vv.z, vv.w);
 }
 
 #endif // SIMD_HALF_FLOAT16
@@ -208,13 +230,13 @@ string vecf::str(half4 v) const {
 
 #if SIMD_INT
 string vecf::str(int2 v) const {
-    return kram::format("(%d %d)", v.x, v.y);
+    return format("(%d %d)", v.x, v.y);
 }
 string vecf::str(int3 v) const {
-    return kram::format("(%d %d %d)", v.x, v.y, v.z);
+    return format("(%d %d %d)", v.x, v.y, v.z);
 }
 string vecf::str(int4 v) const {
-    return kram::format("(%d %d %d %d)", v.x, v.y, v.z, v.w);
+    return format("(%d %d %d %d)", v.x, v.y, v.z, v.w);
 }
 #endif
 
@@ -224,13 +246,13 @@ string vecf::str(int4 v) const {
 #define long1cast long long
 
 string vecf::str(long2 v) const {
-    return kram::format("(%lld %lld)", (long1cast)v.x, (long1cast)v.y);
+    return format("(%lld %lld)", (long1cast)v.x, (long1cast)v.y);
 }
 string vecf::str(long3 v) const {
-    return kram::format("(%lld %lld %lld)", (long1cast)v.x, (long1cast)v.y, (long1cast)v.z);
+    return format("(%lld %lld %lld)", (long1cast)v.x, (long1cast)v.y, (long1cast)v.z);
 }
 string vecf::str(long4 v) const {
-    return kram::format("(%lld %lld %lld %lld)", (long1cast)v.x, (long1cast)v.y, (long1cast)v.z, (long1cast)v.w);
+    return format("(%lld %lld %lld %lld)", (long1cast)v.x, (long1cast)v.y, (long1cast)v.z, (long1cast)v.w);
 }
 #endif
 
@@ -243,7 +265,7 @@ string vecf::str(long4 v) const {
 string vecf::simd_configs() const {
     string s;
     
-#define FMT_CONFIG(val) s += kram::format("%s: %d\n", #val, val);
+#define FMT_CONFIG(val) s += format("%s: %d\n", #val, val);
     
     FMT_CONFIG(SIMD_SSE);
     FMT_CONFIG(SIMD_NEON);
@@ -277,14 +299,14 @@ string vecf::simd_configs() const {
     #endif
     
     if (hasAVX2)
-        s += kram::format("%s: %d\n", "AVX2 ", hasAVX2);
+        s += format("%s: %d\n", "AVX2 ", hasAVX2);
     else if (hasAVX)
-        s += kram::format("%s: %d\n", "AVX  ", hasAVX);
+        s += format("%s: %d\n", "AVX  ", hasAVX);
     else if (hasSSE42)
-        s += kram::format("%s: %d\n", "SSE42 ", hasSSE42);
+        s += format("%s: %d\n", "SSE42 ", hasSSE42);
     
-    s += kram::format("%s: %d\n", "F16C  ", hasF16C);
-    s += kram::format("%s: %d\n", "FMA   ", hasFMA);
+    s += format("%s: %d\n", "F16C  ", hasF16C);
+    s += format("%s: %d\n", "FMA   ", hasFMA);
     
     // fp-contract, etc ?
     // CRC (may not be worth it)
@@ -334,7 +356,7 @@ string vecf::simd_configs() const {
 string vecf::simd_alignments() const {
     string s;
     
-#define FMT_CONFIG(val) s += kram::format("%s: %zu %zu\n", #val, sizeof(val), __alignof(val));
+#define FMT_CONFIG(val) s += format("%s: %zu %zu\n", #val, sizeof(val), __alignof(val));
     
     // TODO: add other types int, half?
     
diff --git a/libkram/vectormath/vectormath234.h b/libkram/vectormath/vectormath234.h
index bc98e592..7d057af3 100644
--- a/libkram/vectormath/vectormath234.h
+++ b/libkram/vectormath/vectormath234.h
@@ -89,9 +89,7 @@
 // double2,3,4
 //
 //------------
-// APX first introduced in 10th gen has APX extension
-//   expands general purpose registers from 16 to 32.
-//
+
 // Intel chips
 //  1 Nehalem,
 //  2 Sandy Bridge,
@@ -102,10 +100,11 @@
 //  7 Kaby Lake,
 //  8 Coffee Lake,
 //  9 Coffee Lake Refresh
-// 10 Comet Lake,    APX
+// 10 Comet Lake,
 // 11 Rocket Lake,
 // 12 Alder Lake,
 // 13 Raptor Lake
+//                  APX?  AVX10?
 //
 // AMD chips
 //
@@ -122,6 +121,11 @@
 #define SIMD_NAMESPACE simdk
 #endif // SIMD_NAMESPACE
 
+// Can use std or eastl
+#ifndef STL_NAMESPACE
+#define STL_NAMESPACE std
+#endif // SIMD_NAMESPACE
+
 // only support avx2 and Neon, no avx-512 at first
 #if defined __ARM_NEON__
 #define SIMD_SSE  0
@@ -189,14 +193,6 @@
 // op *=, +=, -=, /= mods the calling object, so can't be const
 #define SIMD_CALL_OP static inline __attribute__((__always_inline__,__nodebug__))
 
-// TODO: type1 simdk::log(float1)
-// type##1 cppfunc(type##1 x);
-
-#define macroVectorRepeatFnDecl(type, cppfunc) \
-type##2 cppfunc(type##2 x); \
-type##3 cppfunc(type##3 x); \
-type##4 cppfunc(type##4 x); \
-
 //------------
 
 // aligned
@@ -336,13 +332,36 @@ SIMD_CALL type::column_t operator*(const type& x, const type::column_t& v) { ret
 
 //-----------------------------------
 
+// TODO: type1 simdk::log(float1)
+// type##1 cppfunc(type##1 x);
+
+// define functions that don't map to typical simd ops
+#define macroVectorRepeatFnDecl(type, cppfunc) \
+type##2 cppfunc(type##2 x); \
+type##3 cppfunc(type##3 x); \
+type##4 cppfunc(type##4 x); \
+
+#define macroVectorRepeatFn2Decl(type, cppfunc) \
+type##2 cppfunc(type##2 x, type##2 y); \
+type##3 cppfunc(type##3 x, type##3 y); \
+type##4 cppfunc(type##4 x, type##4 y); \
+
+
+//------------
+
+
 #if SIMD_ACCELERATE_MATH
 
 // remap simdk to simd namespace
 #define macroVectorRepeatFnImpl(type, cppfunc) \
-type##2 cppfunc(type##2 x) { return simd::cppfunc(x); } \
-type##3 cppfunc(type##3 x) { return simd::cppfunc(x); } \
-type##4 cppfunc(type##4 x) { return simd::cppfunc(x); } \
+type##2 cppfunc(type##2 a) { return simd::cppfunc(a); } \
+type##3 cppfunc(type##3 a) { return simd::cppfunc(a); } \
+type##4 cppfunc(type##4 a) { return simd::cppfunc(a); } \
+
+#define macroVectorRepeatFn2Impl(type, cppfunc) \
+type##2 cppfunc(type##2 a, type##2 b) { return simd::cppfunc(a,b); } \
+type##3 cppfunc(type##3 a, type##3 b) { return simd::cppfunc(a,b); } \
+type##4 cppfunc(type##4 a, type##4 b) { return simd::cppfunc(a,b); } \
 
 #endif // SIMD_ACCELERATE_MATH
 
@@ -354,13 +373,20 @@ type##4 cppfunc(type##4 x) { return simd::cppfunc(x); } \
 
 #if SIMD_CMATH_MATH
 
+// TODO: add this back
+// type##1 cppfunc(type##1 x) { return func(x); } \
+
 // This calls function repeatedly, then returns as vector.
 // These don't call to the 4 version since it's so much more work.
 #define macroVectorRepeatFnImpl(type, cppfunc, func) \
-type##1 cppfunc(type##1 x) { return func(x); } \
-type##2 cppfunc(type##2 x) { return {func(x.x), func(x.y)}; } \
-type##3 cppfunc(type##3 x) { return {func(x.x), func(x.y), func(x.z)}; } \
-type##4 cppfunc(type##4 x) { return {func(x.x), func(x.y), func(x.z), func(x.w)}; } \
+type##2 cppfunc(type##2 a) { return {func(a.x), func(a.y)}; } \
+type##3 cppfunc(type##3 a) { return {func(a.x), func(a.y), func(a.z)}; } \
+type##4 cppfunc(type##4 a) { return {func(a.x), func(a.y), func(a.z), func(a.w)}; } \
+
+#define macroVectorRepeatFn2Impl(type, cppfunc, func) \
+type##2 cppfunc(type##2 a, type##2 b) { return {func(a.x, b.x), func(a.y, b.y)}; } \
+type##3 cppfunc(type##3 a, type##3 b) { return {func(a.x, b.x), func(a.y, b.y), func(a.z, b.z)}; } \
+type##4 cppfunc(type##4 a, type##4 b) { return {func(a.x, b.x), func(a.y, b.y), func(a.z, b.z), func(a.w, z.w)}; } \
 
 #endif // SIMD_CMATH_MATH
 
@@ -369,6 +395,8 @@ type##4 cppfunc(type##4 x) { return {func(x.x), func(x.y), func(x.z), func(x.w)}
 #include <inttypes.h> // for u/long
 #include <math.h>     // for sqrt, sqrtf
 
+#include <string>     // for formatter (only works for std::string)
+
 #if SIMD_NEON
 // neon types and intrinsics, 16B
 #include <arm_neon.h>
@@ -383,7 +411,7 @@ type##4 cppfunc(type##4 x) { return {func(x.x), func(x.y), func(x.z), func(x.w)}
 #include <immintrin.h>
 #endif // SIMD_NEON
 
-// using macros here cuts the ifdefs a lot
+// using macros here cuts the ifdefs a lot, leaked out of this into .cpp files
 #define vec2to4(x) (x).xyyy
 #define vec3to4(x) (x).xyzz
 #define vec4to2(x) (x).xy
@@ -535,8 +563,8 @@ using namespace STL_NAMESPACE;
 // This may seem extreme to pass string, but it has SSO and keeps temp alive to printf.
 struct vecf {
     // TODO: add formatting options too
-    vecf() {
-    }
+    // no packed float support
+    vecf() {}
    
 #if SIMD_FLOAT
     // vector
@@ -550,9 +578,7 @@ struct vecf {
     string str(const float4x4& m) const;
     
     // quat
-    string quat(quatf q) { return str(q.v); }
-    
-    // no packed float support
+    string quat(quatf q) const { return str(q.v); }
 #endif // SIMD_FLOAT
     
 #if SIMD_DOUBLE
@@ -565,8 +591,6 @@ struct vecf {
     string str(const double2x2& m) const;
     string str(const double3x3& m) const;
     string str(const double4x4& m) const;
-    
-    // no packed double support
 #endif // SIMD_DOUBLE
 
 #if SIMD_HALF
@@ -590,12 +614,9 @@ struct vecf {
     string str(int4 v) const;
 #endif
 
-    
     // Just stuffing this here for now
     string simd_configs() const;
     string simd_alignments() const;
-    
-    // TODO: add double, int, half printing
 };
 
 } // namespace SIMD_NAMESPACE

From 231e64596a6ee8ee2471492c69f26fca43c70446 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 2 Oct 2024 00:22:13 -0700
Subject: [PATCH 768/901] kram - simd - add vectormath-ios, fix minspec targets

---
 build2/kram.xcodeproj/project.pbxproj       |   6 +-
 build2/kramc.xcodeproj/project.pbxproj      |   2 +
 build2/kramv.xcodeproj/project.pbxproj      |   2 +
 build2/vectormath.xcodeproj/project.pbxproj | 112 +++++++++++++++++++-
 libkram/vectormath/vectormath234.cpp        |  30 +++++-
 libkram/vectormath/vectormath234.h          |   2 +-
 6 files changed, 144 insertions(+), 10 deletions(-)

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index 31c12e2b..f1e4b2d4 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -1926,7 +1926,7 @@
 					"$(PROJECT_DIR)/../libkram/kram",
 					"$(PROJECT_DIR)/../libkram/vectormath",
 				);
-				IPHONEOS_DEPLOYMENT_TARGET = 16.0;
+				IPHONEOS_DEPLOYMENT_TARGET = 15.0;
 				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
 				MTL_FAST_MATH = YES;
@@ -2019,7 +2019,7 @@
 					"$(PROJECT_DIR)/../libkram/kram",
 					"$(PROJECT_DIR)/../libkram/vectormath",
 				);
-				IPHONEOS_DEPLOYMENT_TARGET = 16.0;
+				IPHONEOS_DEPLOYMENT_TARGET = 15.0;
 				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				MTL_ENABLE_DEBUG_INFO = NO;
 				MTL_FAST_MATH = YES;
@@ -2100,7 +2100,6 @@
 				CLANG_WARN_DOCUMENTATION_COMMENTS = NO;
 				CODE_SIGN_STYLE = Automatic;
 				EXECUTABLE_PREFIX = lib;
-				IPHONEOS_DEPLOYMENT_TARGET = 14.1;
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SDKROOT = iphoneos;
 				SKIP_INSTALL = YES;
@@ -2115,7 +2114,6 @@
 				CLANG_WARN_DOCUMENTATION_COMMENTS = NO;
 				CODE_SIGN_STYLE = Automatic;
 				EXECUTABLE_PREFIX = lib;
-				IPHONEOS_DEPLOYMENT_TARGET = 14.1;
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SDKROOT = iphoneos;
 				SKIP_INSTALL = YES;
diff --git a/build2/kramc.xcodeproj/project.pbxproj b/build2/kramc.xcodeproj/project.pbxproj
index dad0d46d..0c94f8bf 100644
--- a/build2/kramc.xcodeproj/project.pbxproj
+++ b/build2/kramc.xcodeproj/project.pbxproj
@@ -207,6 +207,7 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
+				IPHONEOS_DEPLOYMENT_TARGET = 15.0;
 				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
 				MTL_FAST_MATH = YES;
@@ -273,6 +274,7 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
+				IPHONEOS_DEPLOYMENT_TARGET = 15.0;
 				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				MTL_ENABLE_DEBUG_INFO = NO;
 				MTL_FAST_MATH = YES;
diff --git a/build2/kramv.xcodeproj/project.pbxproj b/build2/kramv.xcodeproj/project.pbxproj
index 3ddc8c01..6c6ba048 100644
--- a/build2/kramv.xcodeproj/project.pbxproj
+++ b/build2/kramv.xcodeproj/project.pbxproj
@@ -590,6 +590,7 @@
 					"$(PROJECT_DIR)/../libkram/eastl/include",
 					"$(PROJECT_DIR)/../libkram/vectormath",
 				);
+				IPHONEOS_DEPLOYMENT_TARGET = 15.0;
 				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
 				MTL_FAST_MATH = YES;
@@ -666,6 +667,7 @@
 					"$(PROJECT_DIR)/../libkram/eastl/include",
 					"$(PROJECT_DIR)/../libkram/vectormath",
 				);
+				IPHONEOS_DEPLOYMENT_TARGET = 15.0;
 				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				MTL_ENABLE_DEBUG_INFO = NO;
 				MTL_FAST_MATH = YES;
diff --git a/build2/vectormath.xcodeproj/project.pbxproj b/build2/vectormath.xcodeproj/project.pbxproj
index ce667a16..5315091e 100644
--- a/build2/vectormath.xcodeproj/project.pbxproj
+++ b/build2/vectormath.xcodeproj/project.pbxproj
@@ -7,6 +7,20 @@
 	objects = {
 
 /* Begin PBXBuildFile section */
+		701AF17E2CAD27CB00BD0886 /* vectormath234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B687082CAD1072007ACA58 /* vectormath234.h */; };
+		701AF17F2CAD27CB00BD0886 /* long234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B687032CAD1072007ACA58 /* long234.h */; };
+		701AF1802CAD27CB00BD0886 /* sse2neon.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B687062CAD1072007ACA58 /* sse2neon.h */; };
+		701AF1812CAD27CB00BD0886 /* sse_mathfun.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B687052CAD1072007ACA58 /* sse_mathfun.h */; };
+		701AF1822CAD27CB00BD0886 /* float234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B686FF2CAD1072007ACA58 /* float234.h */; };
+		701AF1832CAD27CB00BD0886 /* half234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B687012CAD1072007ACA58 /* half234.h */; };
+		701AF1842CAD27CB00BD0886 /* int234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B687022CAD1072007ACA58 /* int234.h */; };
+		701AF1852CAD27CB00BD0886 /* float4a.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B686FD2CAD1072007ACA58 /* float4a.h */; };
+		701AF1862CAD27CB00BD0886 /* sse2neon-arm64.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B687072CAD1072007ACA58 /* sse2neon-arm64.h */; };
+		701AF1872CAD27CB00BD0886 /* double234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B686FB2CAD1072007ACA58 /* double234.h */; };
+		701AF1892CAD27CB00BD0886 /* float234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B687002CAD1072007ACA58 /* float234.cpp */; };
+		701AF18A2CAD27CB00BD0886 /* double234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B686FC2CAD1072007ACA58 /* double234.cpp */; };
+		701AF18B2CAD27CB00BD0886 /* vectormath234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B687092CAD1072007ACA58 /* vectormath234.cpp */; };
+		701AF18C2CAD27CB00BD0886 /* float4a.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B686FE2CAD1072007ACA58 /* float4a.cpp */; };
 		70B6870B2CAD1072007ACA58 /* float234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B687002CAD1072007ACA58 /* float234.cpp */; };
 		70B6870C2CAD1072007ACA58 /* double234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B686FC2CAD1072007ACA58 /* double234.cpp */; };
 		70B6870D2CAD1072007ACA58 /* vectormath234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B687092CAD1072007ACA58 /* vectormath234.cpp */; };
@@ -24,6 +38,7 @@
 /* End PBXBuildFile section */
 
 /* Begin PBXFileReference section */
+		701AF1912CAD27CB00BD0886 /* libvectormath-ios.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = "libvectormath-ios.a"; sourceTree = BUILT_PRODUCTS_DIR; };
 		70B686F42CAD1026007ACA58 /* libvectormath.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libvectormath.a; sourceTree = BUILT_PRODUCTS_DIR; };
 		70B686FB2CAD1072007ACA58 /* double234.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = double234.h; sourceTree = "<group>"; };
 		70B686FC2CAD1072007ACA58 /* double234.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = double234.cpp; sourceTree = "<group>"; };
@@ -43,6 +58,13 @@
 /* End PBXFileReference section */
 
 /* Begin PBXFrameworksBuildPhase section */
+		701AF18D2CAD27CB00BD0886 /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
 		70B686F22CAD1026007ACA58 /* Frameworks */ = {
 			isa = PBXFrameworksBuildPhase;
 			buildActionMask = 2147483647;
@@ -65,6 +87,7 @@
 			isa = PBXGroup;
 			children = (
 				70B686F42CAD1026007ACA58 /* libvectormath.a */,
+				701AF1912CAD27CB00BD0886 /* libvectormath-ios.a */,
 			);
 			name = Products;
 			sourceTree = "<group>";
@@ -95,6 +118,23 @@
 /* End PBXGroup section */
 
 /* Begin PBXHeadersBuildPhase section */
+		701AF17D2CAD27CB00BD0886 /* Headers */ = {
+			isa = PBXHeadersBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				701AF17E2CAD27CB00BD0886 /* vectormath234.h in Headers */,
+				701AF17F2CAD27CB00BD0886 /* long234.h in Headers */,
+				701AF1802CAD27CB00BD0886 /* sse2neon.h in Headers */,
+				701AF1812CAD27CB00BD0886 /* sse_mathfun.h in Headers */,
+				701AF1822CAD27CB00BD0886 /* float234.h in Headers */,
+				701AF1832CAD27CB00BD0886 /* half234.h in Headers */,
+				701AF1842CAD27CB00BD0886 /* int234.h in Headers */,
+				701AF1852CAD27CB00BD0886 /* float4a.h in Headers */,
+				701AF1862CAD27CB00BD0886 /* sse2neon-arm64.h in Headers */,
+				701AF1872CAD27CB00BD0886 /* double234.h in Headers */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
 		70B686F02CAD1026007ACA58 /* Headers */ = {
 			isa = PBXHeadersBuildPhase;
 			buildActionMask = 2147483647;
@@ -115,6 +155,25 @@
 /* End PBXHeadersBuildPhase section */
 
 /* Begin PBXNativeTarget section */
+		701AF17C2CAD27CB00BD0886 /* vectormath-ios */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 701AF18E2CAD27CB00BD0886 /* Build configuration list for PBXNativeTarget "vectormath-ios" */;
+			buildPhases = (
+				701AF17D2CAD27CB00BD0886 /* Headers */,
+				701AF1882CAD27CB00BD0886 /* Sources */,
+				701AF18D2CAD27CB00BD0886 /* Frameworks */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = "vectormath-ios";
+			packageProductDependencies = (
+			);
+			productName = vectormath;
+			productReference = 701AF1912CAD27CB00BD0886 /* libvectormath-ios.a */;
+			productType = "com.apple.product-type.library.static";
+		};
 		70B686F32CAD1026007ACA58 /* vectormath */ = {
 			isa = PBXNativeTarget;
 			buildConfigurationList = 70B686F82CAD1026007ACA58 /* Build configuration list for PBXNativeTarget "vectormath" */;
@@ -163,11 +222,23 @@
 			projectRoot = "";
 			targets = (
 				70B686F32CAD1026007ACA58 /* vectormath */,
+				701AF17C2CAD27CB00BD0886 /* vectormath-ios */,
 			);
 		};
 /* End PBXProject section */
 
 /* Begin PBXSourcesBuildPhase section */
+		701AF1882CAD27CB00BD0886 /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				701AF1892CAD27CB00BD0886 /* float234.cpp in Sources */,
+				701AF18A2CAD27CB00BD0886 /* double234.cpp in Sources */,
+				701AF18B2CAD27CB00BD0886 /* vectormath234.cpp in Sources */,
+				701AF18C2CAD27CB00BD0886 /* float4a.cpp in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
 		70B686F12CAD1026007ACA58 /* Sources */ = {
 			isa = PBXSourcesBuildPhase;
 			buildActionMask = 2147483647;
@@ -182,6 +253,30 @@
 /* End PBXSourcesBuildPhase section */
 
 /* Begin XCBuildConfiguration section */
+		701AF18F2CAD27CB00BD0886 /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				CODE_SIGN_STYLE = Automatic;
+				EXECUTABLE_PREFIX = lib;
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				SDKROOT = iphoneos;
+				SKIP_INSTALL = YES;
+				SUPPORTED_PLATFORMS = "iphonesimulator iphoneos";
+			};
+			name = Debug;
+		};
+		701AF1902CAD27CB00BD0886 /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				CODE_SIGN_STYLE = Automatic;
+				EXECUTABLE_PREFIX = lib;
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				SDKROOT = iphoneos;
+				SKIP_INSTALL = YES;
+				SUPPORTED_PLATFORMS = "iphonesimulator iphoneos";
+			};
+			name = Release;
+		};
 		70B686F62CAD1026007ACA58 /* Debug */ = {
 			isa = XCBuildConfiguration;
 			buildSettings = {
@@ -235,8 +330,9 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
+				IPHONEOS_DEPLOYMENT_TARGET = 15.0;
 				LOCALIZATION_PREFERS_STRING_CATALOGS = YES;
-				MACOSX_DEPLOYMENT_TARGET = 15.0;
+				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
 				MTL_FAST_MATH = YES;
 				ONLY_ACTIVE_ARCH = YES;
@@ -296,8 +392,9 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
+				IPHONEOS_DEPLOYMENT_TARGET = 15.0;
 				LOCALIZATION_PREFERS_STRING_CATALOGS = YES;
-				MACOSX_DEPLOYMENT_TARGET = 15.0;
+				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				MTL_ENABLE_DEBUG_INFO = NO;
 				MTL_FAST_MATH = YES;
 				OTHER_CFLAGS = (
@@ -314,7 +411,6 @@
 			buildSettings = {
 				CODE_SIGN_STYLE = Automatic;
 				EXECUTABLE_PREFIX = lib;
-				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SKIP_INSTALL = YES;
 			};
@@ -325,7 +421,6 @@
 			buildSettings = {
 				CODE_SIGN_STYLE = Automatic;
 				EXECUTABLE_PREFIX = lib;
-				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SKIP_INSTALL = YES;
 			};
@@ -334,6 +429,15 @@
 /* End XCBuildConfiguration section */
 
 /* Begin XCConfigurationList section */
+		701AF18E2CAD27CB00BD0886 /* Build configuration list for PBXNativeTarget "vectormath-ios" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				701AF18F2CAD27CB00BD0886 /* Debug */,
+				701AF1902CAD27CB00BD0886 /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
 		70B686EF2CAD1026007ACA58 /* Build configuration list for PBXProject "vectormath" */ = {
 			isa = XCConfigurationList;
 			buildConfigurations = (
diff --git a/libkram/vectormath/vectormath234.cpp b/libkram/vectormath/vectormath234.cpp
index 37a3e095..cbe255f7 100644
--- a/libkram/vectormath/vectormath234.cpp
+++ b/libkram/vectormath/vectormath234.cpp
@@ -112,6 +112,32 @@
 #include <simd/base.h>
 #endif // SIMD_ACCELERATE_MATH
 
+//#if SIMD_LIBRARY_VERSION >= 6
+//blarg
+//#endif
+
+// NOTE: this reports 5 for macOS 13 minspec, but SIMD_LIBRARY_VERSION is set to 6.
+//   This is a problem, since some lib code only exists on macOS 15 and iOS 18 then.
+   
+#if SIMD_ACCELERATE_MATH
+# if SIMD_COMPILER_HAS_REQUIRED_FEATURES
+#  if __has_include(<TargetConditionals.h>) && __has_include(<Availability.h>)
+#   include <TargetConditionals.h>
+#   include <Availability.h>
+#   if TARGET_OS_RTKIT
+#    define SIMD_LIBRARY_VERSION SIMD_CURRENT_LIBRARY_VERSION
+#   elif __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_15_0   || \
+        __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_18_0
+#    define SIMD_LIBRARY_VERSION_TEST 6
+#   elif __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_13_0   || \
+        __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_16_0
+#    define SIMD_LIBRARY_VERSION_TEST 5
+#   endif
+#  endif
+#endif
+#endif
+// */
+
 namespace SIMD_NAMESPACE {
 
 // was using kram::format, but wanted to decouple this lib
@@ -331,7 +357,9 @@ string vecf::simd_configs() const {
     FMT_CONFIG(SIMD_CMATH_MATH);
     FMT_CONFIG(SIMD_ACCELERATE_MATH);
 #if SIMD_ACCELERATE_MATH
-    FMT_CONFIG(SIMD_LIBRARY_VERSION);
+    FMT_CONFIG(SIMD_LIBRARY_VERSION); // lib based on min os target
+    FMT_CONFIG(SIMD_CURRENT_LIBRARY_VERSION); // max lib based on sdk
+    FMT_CONFIG(SIMD_LIBRARY_VERSION_TEST);
     FMT_CONFIG(SIMD_ACCELERATE_MATH_NAMES);
 #endif
     
diff --git a/libkram/vectormath/vectormath234.h b/libkram/vectormath/vectormath234.h
index 7d057af3..5e314d63 100644
--- a/libkram/vectormath/vectormath234.h
+++ b/libkram/vectormath/vectormath234.h
@@ -395,7 +395,7 @@ type##4 cppfunc(type##4 a, type##4 b) { return {func(a.x, b.x), func(a.y, b.y),
 #include <inttypes.h> // for u/long
 #include <math.h>     // for sqrt, sqrtf
 
-#include <string>     // for formatter (only works for std::string)
+#include <string>     // for formatter (only works using std::string, not eastl)
 
 #if SIMD_NEON
 // neon types and intrinsics, 16B

From 4360089fb8a2cc360435a26a341397f6fae921d5 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 2 Oct 2024 00:26:40 -0700
Subject: [PATCH 769/901] kram - simd - fix win

---
 libkram/vectormath/vectormath234.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/libkram/vectormath/vectormath234.cpp b/libkram/vectormath/vectormath234.cpp
index cbe255f7..c6da7c5c 100644
--- a/libkram/vectormath/vectormath234.cpp
+++ b/libkram/vectormath/vectormath234.cpp
@@ -112,6 +112,9 @@
 #include <simd/base.h>
 #endif // SIMD_ACCELERATE_MATH
 
+// make win happy for va_copy in format call
+#include <stdarg.h>
+
 //#if SIMD_LIBRARY_VERSION >= 6
 //blarg
 //#endif

From 0b9fde7f726f6037b9c380941859990dfcc1050a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 2 Oct 2024 00:58:02 -0700
Subject: [PATCH 770/901] kram - simd - macOS build fix

---
 build2/kramc.xcodeproj/project.pbxproj |  2 +-
 build2/kramv.xcodeproj/project.pbxproj | 14 +++++++-------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/build2/kramc.xcodeproj/project.pbxproj b/build2/kramc.xcodeproj/project.pbxproj
index 0c94f8bf..e1a0b860 100644
--- a/build2/kramc.xcodeproj/project.pbxproj
+++ b/build2/kramc.xcodeproj/project.pbxproj
@@ -42,11 +42,11 @@
 			isa = PBXFrameworksBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
+				70B687282CAD1996007ACA58 /* libvectormath.a in Frameworks */,
 				706EF28326D18251001C950E /* libkram.a in Frameworks */,
 				706EF28B26D182CB001C950E /* Foundation.framework in Frameworks */,
 				706EF28526D1825D001C950E /* libate.tbd in Frameworks */,
 				705F68F82BA2DD2000437FAA /* libcompression.tbd in Frameworks */,
-				70B687282CAD1996007ACA58 /* libvectormath.a in Frameworks */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
diff --git a/build2/kramv.xcodeproj/project.pbxproj b/build2/kramv.xcodeproj/project.pbxproj
index 6c6ba048..93e52e6e 100644
--- a/build2/kramv.xcodeproj/project.pbxproj
+++ b/build2/kramv.xcodeproj/project.pbxproj
@@ -164,16 +164,16 @@
 			isa = PBXFrameworksBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
+				70B687222CAD1962007ACA58 /* libvectormath.a in Frameworks */,
+				70871D4327CAD3EA00D0B9E1 /* libkram.a in Frameworks */,
+				706EF26726D17DFA001C950E /* libate.tbd in Frameworks */,
 				706EF24D26D17C30001C950E /* ModelIO.framework in Frameworks */,
 				706EF25226D17C6F001C950E /* MetalKit.framework in Frameworks */,
-				70871D4327CAD3EA00D0B9E1 /* libkram.a in Frameworks */,
 				70B5BFF828F5254000CD83D8 /* CoreText.framework in Frameworks */,
 				706EF25526D17C85001C950E /* Metal.framework in Frameworks */,
 				7099CFBD28E8319C008D4ABF /* UniformTypeIdentifiers.framework in Frameworks */,
 				706EF25726D17C9D001C950E /* AppKit.framework in Frameworks */,
 				70833669271575EA0077BCB6 /* GLTFMTL.framework in Frameworks */,
-				70B687222CAD1962007ACA58 /* libvectormath.a in Frameworks */,
-				706EF26726D17DFA001C950E /* libate.tbd in Frameworks */,
 				706EF24F26D17C43001C950E /* Foundation.framework in Frameworks */,
 				70833665271575E50077BCB6 /* GLTF.framework in Frameworks */,
 				705F68FA2BA2DD4800437FAA /* libcompression.tbd in Frameworks */,
@@ -184,11 +184,11 @@
 			isa = PBXFrameworksBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
+				70B687242CAD1976007ACA58 /* libvectormath.a in Frameworks */,
+				70E33ED826E5377000CBA422 /* libkram.a in Frameworks */,
 				70E33EC826E536BF00CBA422 /* QuickLookThumbnailing.framework in Frameworks */,
 				705F68FB2BA2DD5900437FAA /* libcompression.tbd in Frameworks */,
-				70B687242CAD1976007ACA58 /* libvectormath.a in Frameworks */,
 				70E33ECA26E536BF00CBA422 /* Quartz.framework in Frameworks */,
-				70E33ED826E5377000CBA422 /* libkram.a in Frameworks */,
 				70E33EDD26E537AD00CBA422 /* Accelerate.framework in Frameworks */,
 				70E33EDB26E5379900CBA422 /* CoreGraphics.framework in Frameworks */,
 				70E33ED926E5378800CBA422 /* libate.tbd in Frameworks */,
@@ -199,13 +199,13 @@
 			isa = PBXFrameworksBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
+				70B687262CAD197E007ACA58 /* libvectormath.a in Frameworks */,
+				70E33EF326E548C700CBA422 /* libkram.a in Frameworks */,
 				70E33EF526E548D800CBA422 /* CoreGraphics.framework in Frameworks */,
 				705F68FC2BA2DD6200437FAA /* libcompression.tbd in Frameworks */,
-				70B687262CAD197E007ACA58 /* libvectormath.a in Frameworks */,
 				70E33EF426E548CF00CBA422 /* libate.tbd in Frameworks */,
 				70E33EF626E548E200CBA422 /* Accelerate.framework in Frameworks */,
 				70E33EF726E553B900CBA422 /* AppKit.framework in Frameworks */,
-				70E33EF326E548C700CBA422 /* libkram.a in Frameworks */,
 				70E33EE326E5478900CBA422 /* Quartz.framework in Frameworks */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;

From 1c795aadb76de7bc9aaddcb76e3f7aa8fd25f591 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 2 Oct 2024 09:49:32 -0700
Subject: [PATCH 771/901] kram - simd - failing SIMD_LIBRARY_VERSION

---
 libkram/vectormath/vectormath234.cpp | 54 ++++++++++++++++++++++------
 1 file changed, 43 insertions(+), 11 deletions(-)

diff --git a/libkram/vectormath/vectormath234.cpp b/libkram/vectormath/vectormath234.cpp
index c6da7c5c..3778d12e 100644
--- a/libkram/vectormath/vectormath234.cpp
+++ b/libkram/vectormath/vectormath234.cpp
@@ -108,21 +108,18 @@
 // aarch64
 // https://en.wikipedia.org/wiki/AArch64
 
-#if SIMD_ACCELERATE_MATH
-#include <simd/base.h>
-#endif // SIMD_ACCELERATE_MATH
-
 // make win happy for va_copy in format call
 #include <stdarg.h>
 
-//#if SIMD_LIBRARY_VERSION >= 6
-//blarg
-//#endif
+#if SIMD_ACCELERATE_MATH
+#   include <TargetConditionals.h>
+#   include <Availability.h>
+
+#include <simd/base.h>
 
 // NOTE: this reports 5 for macOS 13 minspec, but SIMD_LIBRARY_VERSION is set to 6.
 //   This is a problem, since some lib code only exists on macOS 15 and iOS 18 then.
-   
-#if SIMD_ACCELERATE_MATH
+// Can remove this once SIMD_LIBRARY_VERSION is correct.
 # if SIMD_COMPILER_HAS_REQUIRED_FEATURES
 #  if __has_include(<TargetConditionals.h>) && __has_include(<Availability.h>)
 #   include <TargetConditionals.h>
@@ -135,16 +132,51 @@
 #   elif __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_13_0   || \
         __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_16_0
 #    define SIMD_LIBRARY_VERSION_TEST 5
+#   elif   __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_12_0   || \
+        __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_15_0
+#    define SIMD_LIBRARY_VERSION_TEST 4
 #   endif
 #  endif
 #endif
+
+#if 0
+// SIMD_LIBRARY_VERSION is set to 6 regadless of the minspec
+// iOS 15 = 4, and macOS 13 = 5
+#if TARGET_OS_OSX
+    #if SIMD_LIBRARY_VERSION_TEST != 5
+    blarg1
+    #endif
+
+    #if SIMD_LIBRARY_VERSION != 5
+    blarg2 // this fires
+    #endif
+#else
+    #if SIMD_LIBRARY_VERSION_TEST != 4
+    blarg1
+    #endif
+
+    #if SIMD_LIBRARY_VERSION != 4
+    blarg2 // this fires
+    #endif
 #endif
-// */
+#endif
+
+#endif // SIMD_ACCELERATE_MATH
+
+
 
 namespace SIMD_NAMESPACE {
 
+// Check format arguments.
+#ifndef __printflike
+#define __printflike(fmtIndex, varargIndex)
+#endif
+
+inline string format(const char* format, ...) __printflike(1, 2);
+
 // was using kram::format, but wanted to decouple this lib
-inline string format(const char* format, ...) {
+inline string format(const char* format, ...)
+{
     string str;
     
     va_list args;

From 718edb33df9baf5f228835b3353f8379b1fc1f29 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 2 Oct 2024 20:58:38 -0700
Subject: [PATCH 772/901] kram - github - switch to mac-15

The build is failing to find the vectormath lib on Xcode 15 (on macOS 14), so try Xcode 16 (on macOS 15)
---
 .github/workflows/pre-release.yml    | 4 ++--
 .github/workflows/tagged-release.yml | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/pre-release.yml b/.github/workflows/pre-release.yml
index f3bdaf52..770376e5 100644
--- a/.github/workflows/pre-release.yml
+++ b/.github/workflows/pre-release.yml
@@ -13,8 +13,8 @@ jobs:
     strategy:
       matrix:
         #os: [ubuntu-latest, macos-latest, windows-latest]
-        os: [macos-latest, windows-latest]
-        #os: [macos-13, windows-latest]
+        #os: [macos-latest, windows-latest]
+        os: [macos-15, windows-latest]
         
     steps:
       - name: Update CMake
diff --git a/.github/workflows/tagged-release.yml b/.github/workflows/tagged-release.yml
index 07e633d7..0790ec57 100644
--- a/.github/workflows/tagged-release.yml
+++ b/.github/workflows/tagged-release.yml
@@ -12,8 +12,8 @@ jobs:
     strategy:
       matrix:
         #os: [ubuntu-latest, macos-latest, windows-latest]
-        os: [macos-latest, windows-latest]
-        #os: [macos-13, windows-latest]
+        #os: [macos-latest, windows-latest]
+        os: [macos-15, windows-latest]
         
     steps:
       - name: Update CMake

From d6f81671d0c8091bb3b0d556bc12b75818daa7d5 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 2 Oct 2024 21:15:17 -0700
Subject: [PATCH 773/901] kram - builds - try to fix build

---
 build2/kramc.xcodeproj/project.pbxproj | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/build2/kramc.xcodeproj/project.pbxproj b/build2/kramc.xcodeproj/project.pbxproj
index e1a0b860..ee960d4e 100644
--- a/build2/kramc.xcodeproj/project.pbxproj
+++ b/build2/kramc.xcodeproj/project.pbxproj
@@ -7,12 +7,12 @@
 	objects = {
 
 /* Begin PBXBuildFile section */
+		701AF1922CAE4F2300BD0886 /* libvectormath.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 70B687272CAD1996007ACA58 /* libvectormath.a */; };
 		705F68F82BA2DD2000437FAA /* libcompression.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 705F68F72BA2DD1100437FAA /* libcompression.tbd */; };
 		706EF28326D18251001C950E /* libkram.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF28226D18251001C950E /* libkram.a */; };
 		706EF28526D1825D001C950E /* libate.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF28426D18257001C950E /* libate.tbd */; };
 		706EF28726D18290001C950E /* KramMain.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EF28026D18223001C950E /* KramMain.cpp */; };
 		706EF28B26D182CB001C950E /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF28A26D182CB001C950E /* Foundation.framework */; };
-		70B687282CAD1996007ACA58 /* libvectormath.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 70B687272CAD1996007ACA58 /* libvectormath.a */; };
 /* End PBXBuildFile section */
 
 /* Begin PBXCopyFilesBuildPhase section */
@@ -42,7 +42,7 @@
 			isa = PBXFrameworksBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
-				70B687282CAD1996007ACA58 /* libvectormath.a in Frameworks */,
+				701AF1922CAE4F2300BD0886 /* libvectormath.a in Frameworks */,
 				706EF28326D18251001C950E /* libkram.a in Frameworks */,
 				706EF28B26D182CB001C950E /* Foundation.framework in Frameworks */,
 				706EF28526D1825D001C950E /* libate.tbd in Frameworks */,

From 40bb622240fc0f7651f54be1ff7066ff4d90902f Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 2 Oct 2024 23:02:35 -0700
Subject: [PATCH 774/901] kram - build - fix build

---
 scripts/cibuild.sh | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/scripts/cibuild.sh b/scripts/cibuild.sh
index c4263c32..8a7fe586 100755
--- a/scripts/cibuild.sh
+++ b/scripts/cibuild.sh
@@ -70,10 +70,19 @@ if [[ $buildType == macos ]]; then
 	# build libraries
 	# see here about destination arg
 	# https://github.com/appcelerator/titanium_mobile/pull/13098
+ 
+    echo "::group::vectormath-ios"
+    xcodebuild build -sdk iphoneos -workspace kram.xcworkspace -scheme vectormath-ios -configuration Release ${xargs} -destination generic/platform=iOS CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
+    echo "::endgroup::"
+ 
+    echo "::group::vectormath"
+    xcodebuild build -sdk iphoneos -workspace kram.xcworkspace -scheme vectormath -configuration Release ${xargs} -destination generic/platform=macOS CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
+    echo "::endgroup::"
+ 
     echo "::group::kram-ios"
     xcodebuild build -sdk iphoneos -workspace kram.xcworkspace -scheme kram-ios -configuration Release ${xargs} -destination generic/platform=iOS CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
     echo "::endgroup::"
- 
+    
     echo "::group::kram"
     xcodebuild build -sdk macosx -workspace kram.xcworkspace -scheme kram -configuration Release ${xargs} -destination generic/platform=macOS CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
     echo "::endgroup::"

From 2dffcfd9a7c074e73385c412f4eb08a06d40f83b Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 2 Oct 2024 23:26:20 -0700
Subject: [PATCH 775/901] kram - build - list schemes/targets in Xcode

---
 scripts/cibuild.sh | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/scripts/cibuild.sh b/scripts/cibuild.sh
index 8a7fe586..0b89a4f7 100755
--- a/scripts/cibuild.sh
+++ b/scripts/cibuild.sh
@@ -71,6 +71,10 @@ if [[ $buildType == macos ]]; then
 	# see here about destination arg
 	# https://github.com/appcelerator/titanium_mobile/pull/13098
  
+    echo "::group::list-targets"
+    xcodebuild build -workspace kram.xcworkspace -list
+    echo "::endgroup::"
+ 
     echo "::group::vectormath-ios"
     xcodebuild build -sdk iphoneos -workspace kram.xcworkspace -scheme vectormath-ios -configuration Release ${xargs} -destination generic/platform=iOS CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
     echo "::endgroup::"

From 3b2f3cabada6f8bee8f3916f06081ece2fe4c2b0 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 2 Oct 2024 23:34:18 -0700
Subject: [PATCH 776/901] kram - build - fix build

had an absolute link in the workspace
---
 build2/kram.xcworkspace/contents.xcworkspacedata | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/build2/kram.xcworkspace/contents.xcworkspacedata b/build2/kram.xcworkspace/contents.xcworkspacedata
index db914c88..c4f3782a 100644
--- a/build2/kram.xcworkspace/contents.xcworkspacedata
+++ b/build2/kram.xcworkspace/contents.xcworkspacedata
@@ -7,9 +7,6 @@
    <FileRef
       location = "group:../gtlf/GLTF/GLTF.xcodeproj">
    </FileRef>
-   <FileRef
-      location = "absolute:/Users/Alec/devref/kram/build2/vectormath.xcodeproj">
-   </FileRef>
    <FileRef
       location = "group:kram.xcodeproj">
    </FileRef>
@@ -19,4 +16,7 @@
    <FileRef
       location = "container:kramv.xcodeproj">
    </FileRef>
+   <FileRef
+      location = "group:vectormath.xcodeproj">
+   </FileRef>
 </Workspace>

From 66e5961345b73b2d915ae91d250c23ff91a56caf Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 3 Oct 2024 18:44:15 -0700
Subject: [PATCH 777/901] kram - simd - enable short/char vecs

---
 libkram/vectormath/vectormath234.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/libkram/vectormath/vectormath234.h b/libkram/vectormath/vectormath234.h
index 5e314d63..99eb98d9 100644
--- a/libkram/vectormath/vectormath234.h
+++ b/libkram/vectormath/vectormath234.h
@@ -150,9 +150,11 @@
 #define SIMD_INT    1
 #define SIMD_LONG   1
 
-// don't need these yet, doing math, not string processing
-#define SIMD_CHAR   0
-#define SIMD_SHORT  0
+// apple is signed-char, so make sure to set on -fsigned-char on other platforms
+#define SIMD_CHAR   1
+#define SIMD_SHORT  1
+
+// don't need these yet, but easy to add with macros
 //#define SIMD_UCHAR  0
 //#define SIMD_USHORT 0
 //#define SIMD_ULONG  0

From 0679e39cf746f04c430948aa9277823a7c04c794 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 5 Oct 2024 18:12:25 -0700
Subject: [PATCH 778/901] kram - simd - switch transpose to shuffles

---
 libkram/vectormath/double234.cpp     | 48 +++++++++++-----------------
 libkram/vectormath/float234.cpp      | 22 +++----------
 libkram/vectormath/vectormath234.cpp | 17 ++++++++--
 libkram/vectormath/vectormath234.h   |  7 ++--
 4 files changed, 44 insertions(+), 50 deletions(-)

diff --git a/libkram/vectormath/double234.cpp b/libkram/vectormath/double234.cpp
index 27d62606..95a4a931 100644
--- a/libkram/vectormath/double234.cpp
+++ b/libkram/vectormath/double234.cpp
@@ -277,37 +277,27 @@ double3x3 transpose(const double3x3& x) {
 }
 
 double4x4 transpose(const double4x4& x) {
-    // NOTE: also _MM_TRANSPOSE4_PS using shuffles
-    // but old Neon didn't really have shuffle.
-
+    
 #if SIMD_SSE
 #if SIMD_AVX2
     
-// using shuffles + permute
-//    double4 tmp0, tmp1, tmp2, tmp3;
-//    tmp0 = _mm256_shuffle_pd(row0, row1, 0x0);
-//    tmp2 = _mm256_shuffle_pd(row0, row1, 0xF);
-//    tmp1 = _mm256_shuffle_pd(row2, row3, 0x0);
-//    tmp3 = _mm256_shuffle_pd(row2, row3, 0xF);
-//
-//    double4 x0, x1, x2, x3;
-//    r0 = _mm256_permute2f128_pd(tmp0, tmp1, 0x20);
-//    r1 = _mm256_permute2f128_pd(tmp2, tmp3, 0x20);
-//    r2 = _mm256_permute2f128_pd(tmp0, tmp1, 0x31);
-//    r3 = _mm256_permute2f128_pd(tmp2, tmp3, 0x31);
-    
-// or unpack
-    
-    double4 t0 = _mm256_unpacklo_pd(x[0],x[2]);
-    double4 t1 = _mm256_unpackhi_pd(x[0],x[2]);
-    double4 t2 = _mm256_unpacklo_pd(x[1],x[3]);
-    double4 t3 = _mm256_unpackhi_pd(x[1],x[3]);
-
-    double4 r0 = _mm256_unpacklo_pd(t0,t2);
-    double4 r1 = _mm256_unpackhi_pd(t0,t2);
-    double4 r2 = _mm256_unpacklo_pd(t1,t3);
-    double4 r3 = _mm256_unpackhi_pd(t1,t3);
-    
+     // NOTE: similar to _MM_TRANSPOSE4_PS using shuffles
+     // but old Neon didn't really have shuffle.
+
+     // using shuffles + permute
+     // unpack runs slower
+     double4 tmp0, tmp1, tmp2, tmp3;
+     tmp0 = _mm256_shuffle_pd(x[0], x[1], 0x0);
+     tmp2 = _mm256_shuffle_pd(x[0], x[1], 0xF);
+     tmp1 = _mm256_shuffle_pd(x[2], x[3], 0x0);
+     tmp3 = _mm256_shuffle_pd(x[2], x[3], 0xF);
+ 
+     double4 r0, r1, r2, r3;
+     r0 = _mm256_permute2f128_pd(tmp0, tmp1, 0x20);
+     r1 = _mm256_permute2f128_pd(tmp2, tmp3, 0x20);
+     r2 = _mm256_permute2f128_pd(tmp0, tmp1, 0x31);
+     r3 = _mm256_permute2f128_pd(tmp2, tmp3, 0x31);
+        
 #else
     // super slow transpose
     double4 x0, x1, x2, x3;
@@ -324,7 +314,7 @@ double4x4 transpose(const double4x4& x) {
 #endif
 #endif // SIMD_SSE
     
-#if  SIMD_NEON
+#if SIMD_NEON
     double4 r0,r1,r2,r3;
     r0.lo = vzip1q_f64(x[0].lo,x[1].lo);
     r1.lo = vzip2q_f64(x[0].lo,x[1].lo);
diff --git a/libkram/vectormath/float234.cpp b/libkram/vectormath/float234.cpp
index 7440af78..60ce711e 100644
--- a/libkram/vectormath/float234.cpp
+++ b/libkram/vectormath/float234.cpp
@@ -269,23 +269,11 @@ float3x3 transpose(const float3x3& x) {
 }
 
 float4x4 transpose(const float4x4& x) {
-    // NOTE: also _MM_TRANSPOSE4_PS using shuffles
-    // but old Neon didn't really have shuffle, but
-    // and sse2neon is using combine instead of shuffle.
-    
-    //    float4x4 xt(x);
-    //    _MM_TRANSPOSE4_PS(xt[0], xt[1], xt[2], xt[3]);
-    //    return xt;
-    
 #if SIMD_SSE
-    float4 t0 = _mm_unpacklo_ps(x[0],x[2]);
-    float4 t1 = _mm_unpackhi_ps(x[0],x[2]);
-    float4 t2 = _mm_unpacklo_ps(x[1],x[3]);
-    float4 t3 = _mm_unpackhi_ps(x[1],x[3]);
-    float4 r0 = _mm_unpacklo_ps(t0,t2);
-    float4 r1 = _mm_unpackhi_ps(t0,t2);
-    float4 r2 = _mm_unpacklo_ps(t1,t3);
-    float4 r3 = _mm_unpackhi_ps(t1,t3);
+    // shuffles are faster than unpack
+    float4x4 xt(x);
+    _MM_TRANSPOSE4_PS(xt[0], xt[1], xt[2], xt[3]);
+    return xt;
 #else
     float4 t0 = vzip1q_f32(x[0],x[2]);
     float4 t1 = vzip2q_f32(x[0],x[2]);
@@ -295,8 +283,8 @@ float4x4 transpose(const float4x4& x) {
     float4 r1 = vzip2q_f32(t0,t2);
     float4 r2 = vzip1q_f32(t1,t3);
     float4 r3 = vzip2q_f32(t1,t3);
-#endif
     return (float4x4){r0,r1,r2,r3};
+#endif
 }
 
 // inverse
diff --git a/libkram/vectormath/vectormath234.cpp b/libkram/vectormath/vectormath234.cpp
index 3778d12e..b84aa78e 100644
--- a/libkram/vectormath/vectormath234.cpp
+++ b/libkram/vectormath/vectormath234.cpp
@@ -76,6 +76,7 @@
 // DONE: add optimized vec2 ops on Neon
 // DONE: add AVX2 for double4
 // DONE: build an optimized Xcode library
+// DONE: check if packed to aligned conversions are automatic
 
 //-----------------
 
@@ -85,10 +86,10 @@
 //   These days I use a variant of the RTNE/RN version that also preserves NaN payload bits,
 //   which is slightly more ops but matches hardware conversions exactly for every input, including all NaNs.
 // TODO: build Xcode library that is a clang module or framework
-// TODO: build an optimized VS library with cmake, clang module too?
+// TODO: build VisualStudio library with cmake, clang module too?
 // TODO: need fast post-translation, post-rotation, post-scale
 // TODO: need euler <-> matrix
-// TODO: saturating conversions would be useful too and prevent overflow
+// TODO: saturating conversions (esp. integer) would be useful too and prevent overflow
 //   bit select to clamp values.
 // TODO: need natvis and lldb formatting of math classes.
 
@@ -167,6 +168,18 @@
 
 namespace SIMD_NAMESPACE {
 
+void TestCalls()
+{
+#if SIMD_FLOAT
+    float4a va = 0;
+    float4p vp = (float)1;
+    
+    va = vp;
+    vp = va;
+#endif
+    
+}
+
 // Check format arguments.
 #ifndef __printflike
 #define __printflike(fmtIndex, varargIndex)
diff --git a/libkram/vectormath/vectormath234.h b/libkram/vectormath/vectormath234.h
index 99eb98d9..1dcd5fd2 100644
--- a/libkram/vectormath/vectormath234.h
+++ b/libkram/vectormath/vectormath234.h
@@ -94,7 +94,7 @@
 //  1 Nehalem,
 //  2 Sandy Bridge,
 //  3 Ivy Bridge,
-//  4 Haswell,       AVX2
+//  4 Haswell,       AVX2, FMA3 (not Pentiums/Celerons)
 //  5 Broadwell,
 //  6 Sky Lake,
 //  7 Kaby Lake,
@@ -107,7 +107,10 @@
 //                  APX?  AVX10?
 //
 // AMD chips
-//
+// Jaguar           AVX
+// Piledriver       AVX2?  FMA3
+// Ryzen
+// Zen
 //
 // Apple Silicon
 // iPhone 5S has arm64 arm64-v?

From 41bba84374bb840e50a98769d0d1ef88dbb7bc4c Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 5 Oct 2024 18:13:41 -0700
Subject: [PATCH 779/901] kram - simd - fix transpose in double234

---
 libkram/vectormath/double234.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/libkram/vectormath/double234.cpp b/libkram/vectormath/double234.cpp
index 95a4a931..794ffc3b 100644
--- a/libkram/vectormath/double234.cpp
+++ b/libkram/vectormath/double234.cpp
@@ -304,9 +304,8 @@ double4x4 transpose(const double4x4& x) {
     x0 = x[0];
     x1 = x[1];
     x2 = x[2];
-    x3 = x[2];
+    x3 = x[3];
     
-    // TODO: avx2 probably has double shuffle
     double4 r0 = { x0[0], x1[0], x2[0], x3[0] };
     double4 r1 = { x0[1], x1[1], x2[1], x3[1] };
     double4 r2 = { x0[2], x1[2], x2[2], x3[2] };

From 2201afb263a08a63d892af35a3b42cdf1d220f10 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 5 Oct 2024 23:08:48 -0700
Subject: [PATCH 780/901] kram - build - change 2 frameworks to iOS 15, but
 only used on macOS

---
 gtlf/GLTF/GLTF.xcodeproj/project.pbxproj       | 4 ++--
 gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj b/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj
index b9c25e60..0449c6a3 100644
--- a/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj
+++ b/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj
@@ -404,7 +404,7 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
-				IPHONEOS_DEPLOYMENT_TARGET = 16.0;
+				IPHONEOS_DEPLOYMENT_TARGET = 15.0;
 				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				MTL_ENABLE_DEBUG_INFO = YES;
 				ONLY_ACTIVE_ARCH = YES;
@@ -469,7 +469,7 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
-				IPHONEOS_DEPLOYMENT_TARGET = 16.0;
+				IPHONEOS_DEPLOYMENT_TARGET = 15.0;
 				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				MTL_ENABLE_DEBUG_INFO = NO;
 				OTHER_CFLAGS = (
diff --git a/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj b/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj
index 297c5701..fc573af6 100644
--- a/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj
+++ b/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj
@@ -264,7 +264,7 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
-				IPHONEOS_DEPLOYMENT_TARGET = 16.0;
+				IPHONEOS_DEPLOYMENT_TARGET = 15.0;
 				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				MTL_ENABLE_DEBUG_INFO = YES;
 				ONLY_ACTIVE_ARCH = YES;
@@ -331,7 +331,7 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
-				IPHONEOS_DEPLOYMENT_TARGET = 16.0;
+				IPHONEOS_DEPLOYMENT_TARGET = 15.0;
 				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				MTL_ENABLE_DEBUG_INFO = NO;
 				OTHER_CFLAGS = (

From 9d6aede80dca23f9cd5adc57c738b81668d1cb2c Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 5 Oct 2024 23:11:58 -0700
Subject: [PATCH 781/901] kram - simd - more logging about library, simplify
 normalize

---
 libkram/vectormath/double234.h       |  6 +++---
 libkram/vectormath/float234.h        |  6 +++---
 libkram/vectormath/vectormath234.cpp | 32 ++++++++++------------------
 libkram/vectormath/vectormath234.h   | 20 +++++++++++++++++
 4 files changed, 37 insertions(+), 27 deletions(-)

diff --git a/libkram/vectormath/double234.h b/libkram/vectormath/double234.h
index fb0633e7..ea1aa9b7 100644
--- a/libkram/vectormath/double234.h
+++ b/libkram/vectormath/double234.h
@@ -416,9 +416,9 @@ SIMD_CALL double distance(double4 x, double4 y) { return length(x - y); }
 // normalize
 // optimized by staying in reg
 // x * invlength(x)
-SIMD_CALL double4 normalize(double4 x) { return x * rsqrt(reduce_addv(x * x)).x; }
-SIMD_CALL double2 normalize(double2 x) { return x * rsqrt(reduce_addv(x * x)).x; }
-SIMD_CALL double3 normalize(double3 x) { return x * rsqrt(reduce_addv(x * x)).x; }
+SIMD_CALL double4 normalize(double4 x) { return x / sqrt(reduce_addv(x * x)).x; }
+SIMD_CALL double2 normalize(double2 x) { return x / sqrt(reduce_addv(x * x)).x; }
+SIMD_CALL double3 normalize(double3 x) { return x / sqrt(reduce_addv(x * x)).x; }
 
 // abs
 SIMD_CALL double2 abs(double2 x) {
diff --git a/libkram/vectormath/float234.h b/libkram/vectormath/float234.h
index 3ab79fea..65283a09 100644
--- a/libkram/vectormath/float234.h
+++ b/libkram/vectormath/float234.h
@@ -302,9 +302,9 @@ SIMD_CALL float distance(float4 x, float4 y) { return length(x - y); }
 // normalize
 // optimized by staying in reg
 // x * invlength(x)
-SIMD_CALL float4 normalize(float4 x) { return x * rsqrt(reduce_addv(x * x)).x; }
-SIMD_CALL float2 normalize(float2 x) { return x * rsqrt(reduce_addv(x * x)).x; }
-SIMD_CALL float3 normalize(float3 x) { return x * rsqrt(reduce_addv(x * x)).x; }
+SIMD_CALL float4 normalize(float4 x) { return x / sqrt(reduce_addv(x * x)).x; }
+SIMD_CALL float2 normalize(float2 x) { return x / sqrt(reduce_addv(x * x)).x; }
+SIMD_CALL float3 normalize(float3 x) { return x / sqrt(reduce_addv(x * x)).x; }
 
 // abs
 SIMD_CALL float2 abs(float2 x) {
diff --git a/libkram/vectormath/vectormath234.cpp b/libkram/vectormath/vectormath234.cpp
index b84aa78e..9a9d7b65 100644
--- a/libkram/vectormath/vectormath234.cpp
+++ b/libkram/vectormath/vectormath234.cpp
@@ -7,23 +7,6 @@
 
 // Tests with godbolt are here to show code comparsions with optimizations.
 
-//-----------------
-// clang version matters to codegen.
-// These two version seem to be significant changes in output.
-//
-// v14 fma
-// v16 better fma?
-// v18 Intel APX support (still no chip)
-//
-// -Og can't unroll small loops for some reason. -O2 and -O3 do.
-// https://godbolt.org/z/KMPa8bchb
-//
-// As optimal as it gets
-// https://godbolt.org/z/YxzobGM17
-//
-// optimized quake rcp, rsqrt, sqrt
-// https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
-//
 // ---------------
 // Note: float4a.h has a rcp and rsqrt ops, but they are approximate.
 // Have real div and sqrt ops now.
@@ -113,9 +96,6 @@
 #include <stdarg.h>
 
 #if SIMD_ACCELERATE_MATH
-#   include <TargetConditionals.h>
-#   include <Availability.h>
-
 #include <simd/base.h>
 
 // NOTE: this reports 5 for macOS 13 minspec, but SIMD_LIBRARY_VERSION is set to 6.
@@ -140,8 +120,9 @@
 #  endif
 #endif
 
+
 #if 0
-// SIMD_LIBRARY_VERSION is set to 6 regadless of the minspec
+// SIMD_LIBRARY_VERSION is set to 6 regardless of the minspec
 // iOS 15 = 4, and macOS 13 = 5
 #if TARGET_OS_OSX
     #if SIMD_LIBRARY_VERSION_TEST != 5
@@ -405,6 +386,15 @@ string vecf::simd_configs() const {
     FMT_CONFIG(SIMD_CMATH_MATH);
     FMT_CONFIG(SIMD_ACCELERATE_MATH);
 #if SIMD_ACCELERATE_MATH
+    // Dump the min version. This is supposed to control SIMD_LIBRARY_VERSION
+    #if __APPLE__
+    #if TARGET_OS_OSX
+        FMT_CONFIG(__MAC_OS_X_VERSION_MIN_REQUIRED);
+    #else
+        FMD_CONFIG(__IPHONE_OS_VERSION_MIN_REQUIRED);
+    #endif
+    #endif
+    
     FMT_CONFIG(SIMD_LIBRARY_VERSION); // lib based on min os target
     FMT_CONFIG(SIMD_CURRENT_LIBRARY_VERSION); // max lib based on sdk
     FMT_CONFIG(SIMD_LIBRARY_VERSION_TEST);
diff --git a/libkram/vectormath/vectormath234.h b/libkram/vectormath/vectormath234.h
index 1dcd5fd2..119c35ec 100644
--- a/libkram/vectormath/vectormath234.h
+++ b/libkram/vectormath/vectormath234.h
@@ -115,6 +115,26 @@
 // Apple Silicon
 // iPhone 5S has arm64 arm64-v?
 //
+//-----------------
+// clang version matters to codegen.
+// These two version seem to be significant changes in output.
+//
+// v14 fma
+// v16 better fma?
+// v18 Intel APX support (still no chip)
+//
+// -Og can't unroll small loops for some reason. -O2 and -O3 do.
+// https://godbolt.org/z/KMPa8bchb
+//
+// As optimal as it gets
+// https://godbolt.org/z/YxzobGM17
+//
+// optimized quake rcp, rsqrt, sqrt
+// https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+//
+// transpose examples
+// https://godbolt.org/z/TYcvrP7Y3
+
 
 //-----------------------------------
 // config

From 634c233bf677cf973a3f41ced603a9da22a1bc8e Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 6 Oct 2024 19:24:37 -0700
Subject: [PATCH 782/901] kram - simd - add bound234.h and nan tests, camera
 transforms

---
 build2/vectormath.xcodeproj/project.pbxproj |  12 ++
 kramv/KramViewerBase.cpp                    | 124 +---------------
 kramv/KramViewerBase.h                      |  10 --
 libkram/vectormath/bounds234.cpp            | 156 ++++++++++++++++++++
 libkram/vectormath/bounds234.h              | 129 ++++++++++++++++
 libkram/vectormath/double234.h              |  22 +++
 libkram/vectormath/float234.cpp             | 107 +++++++++++++-
 libkram/vectormath/float234.h               |  45 +++++-
 libkram/vectormath/vectormath234.cpp        |   8 +-
 libkram/vectormath/vectormath234.h          |  13 +-
 10 files changed, 486 insertions(+), 140 deletions(-)
 create mode 100644 libkram/vectormath/bounds234.cpp
 create mode 100644 libkram/vectormath/bounds234.h

diff --git a/build2/vectormath.xcodeproj/project.pbxproj b/build2/vectormath.xcodeproj/project.pbxproj
index 5315091e..de6527d3 100644
--- a/build2/vectormath.xcodeproj/project.pbxproj
+++ b/build2/vectormath.xcodeproj/project.pbxproj
@@ -21,6 +21,10 @@
 		701AF18A2CAD27CB00BD0886 /* double234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B686FC2CAD1072007ACA58 /* double234.cpp */; };
 		701AF18B2CAD27CB00BD0886 /* vectormath234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B687092CAD1072007ACA58 /* vectormath234.cpp */; };
 		701AF18C2CAD27CB00BD0886 /* float4a.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B686FE2CAD1072007ACA58 /* float4a.cpp */; };
+		70570FE52CB378EE005692BB /* bounds234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70570FE42CB378E7005692BB /* bounds234.h */; };
+		70570FE62CB378EE005692BB /* bounds234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70570FE42CB378E7005692BB /* bounds234.h */; };
+		70570FE82CB379C9005692BB /* bounds234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70570FE72CB37997005692BB /* bounds234.cpp */; };
+		70570FE92CB379C9005692BB /* bounds234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70570FE72CB37997005692BB /* bounds234.cpp */; };
 		70B6870B2CAD1072007ACA58 /* float234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B687002CAD1072007ACA58 /* float234.cpp */; };
 		70B6870C2CAD1072007ACA58 /* double234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B686FC2CAD1072007ACA58 /* double234.cpp */; };
 		70B6870D2CAD1072007ACA58 /* vectormath234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B687092CAD1072007ACA58 /* vectormath234.cpp */; };
@@ -39,6 +43,8 @@
 
 /* Begin PBXFileReference section */
 		701AF1912CAD27CB00BD0886 /* libvectormath-ios.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = "libvectormath-ios.a"; sourceTree = BUILT_PRODUCTS_DIR; };
+		70570FE42CB378E7005692BB /* bounds234.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = bounds234.h; sourceTree = "<group>"; };
+		70570FE72CB37997005692BB /* bounds234.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = bounds234.cpp; sourceTree = "<group>"; };
 		70B686F42CAD1026007ACA58 /* libvectormath.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libvectormath.a; sourceTree = BUILT_PRODUCTS_DIR; };
 		70B686FB2CAD1072007ACA58 /* double234.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = double234.h; sourceTree = "<group>"; };
 		70B686FC2CAD1072007ACA58 /* double234.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = double234.cpp; sourceTree = "<group>"; };
@@ -102,6 +108,8 @@
 				70B686FE2CAD1072007ACA58 /* float4a.cpp */,
 				70B686FF2CAD1072007ACA58 /* float234.h */,
 				70B687002CAD1072007ACA58 /* float234.cpp */,
+				70570FE42CB378E7005692BB /* bounds234.h */,
+				70570FE72CB37997005692BB /* bounds234.cpp */,
 				70B687012CAD1072007ACA58 /* half234.h */,
 				70B687022CAD1072007ACA58 /* int234.h */,
 				70B687032CAD1072007ACA58 /* long234.h */,
@@ -123,6 +131,7 @@
 			buildActionMask = 2147483647;
 			files = (
 				701AF17E2CAD27CB00BD0886 /* vectormath234.h in Headers */,
+				70570FE62CB378EE005692BB /* bounds234.h in Headers */,
 				701AF17F2CAD27CB00BD0886 /* long234.h in Headers */,
 				701AF1802CAD27CB00BD0886 /* sse2neon.h in Headers */,
 				701AF1812CAD27CB00BD0886 /* sse_mathfun.h in Headers */,
@@ -140,6 +149,7 @@
 			buildActionMask = 2147483647;
 			files = (
 				70B6870F2CAD1072007ACA58 /* vectormath234.h in Headers */,
+				70570FE52CB378EE005692BB /* bounds234.h in Headers */,
 				70B687102CAD1072007ACA58 /* long234.h in Headers */,
 				70B687112CAD1072007ACA58 /* sse2neon.h in Headers */,
 				70B687122CAD1072007ACA58 /* sse_mathfun.h in Headers */,
@@ -235,6 +245,7 @@
 				701AF1892CAD27CB00BD0886 /* float234.cpp in Sources */,
 				701AF18A2CAD27CB00BD0886 /* double234.cpp in Sources */,
 				701AF18B2CAD27CB00BD0886 /* vectormath234.cpp in Sources */,
+				70570FE82CB379C9005692BB /* bounds234.cpp in Sources */,
 				701AF18C2CAD27CB00BD0886 /* float4a.cpp in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
@@ -246,6 +257,7 @@
 				70B6870B2CAD1072007ACA58 /* float234.cpp in Sources */,
 				70B6870C2CAD1072007ACA58 /* double234.cpp in Sources */,
 				70B6870D2CAD1072007ACA58 /* vectormath234.cpp in Sources */,
+				70570FE92CB379C9005692BB /* bounds234.cpp in Sources */,
 				70B6870E2CAD1072007ACA58 /* float4a.cpp in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index fdd44a49..5d0e810d 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -566,116 +566,7 @@ string ShowSettings::windowTitleString(const char* filename) const
     return title;
 }
 
-float4x4 matrix4x4_translation(float tx, float ty, float tz)
-{
-    float4x4 m = {(float4){1, 0, 0, 0},
-        (float4){0, 1, 0, 0},
-        (float4){0, 0, 1, 0},
-        (float4){tx, ty, tz, 1}};
-    return m;
-}
-
-float4x4 matrix4x4_rotation(float radians, float3 axis)
-{
-    axis = normalize(axis);
-    float ct = cosf(radians);
-    float st = sinf(radians);
-    float ci = 1 - ct;
-    float x = axis.x, y = axis.y, z = axis.z;
-    
-    float4x4 m = {
-        (float4){ ct + x * x * ci,     y * x * ci + z * st, z * x * ci - y * st, 0},
-        (float4){ x * y * ci - z * st,     ct + y * y * ci, z * y * ci + x * st, 0},
-        (float4){ x * z * ci + y * st, y * z * ci - x * st,     ct + z * z * ci, 0},
-        (float4){                   0,                   0,                   0, 1}
-    };
-    return m;
-}
-
-float4x4 perspective_rhs(float fovyRadians, float aspectXtoY, float nearZ, float farZ, bool isReverseZ)
-{
-    // form tangents
-    float tanY = tanf(fovyRadians * 0.5f);
-    float tanX = tanY * aspectXtoY;
-    
-    // currently symmetric
-    // all postive values from center
-    float4 tangents = { tanY, tanY, tanX, tanX };
-    tangents *= nearZ;
-    
-    float t =  tangents.x;
-    float b = -tangents.y;
-    float r =  tangents.z;
-    float l = -tangents.w;
-    
-    float dx = (r - l);
-    float dy = (t - b);
-    
-    float xs = 2.0f * nearZ / dx;
-    float ys = 2.0f * nearZ / dy;
-    
-    // 0.5x?
-    float xoff = (r + l) / dx;
-    float yoff = (t + b) / dy;
-    
-    float m22;
-    float m23;
-    
-    if (isReverseZ) {
-        // zs drops out since zs = inf / -inf = 1, 1-1 = 0
-        // z' = near / -z
-        
-        m22 = 0;
-        m23 = nearZ;
-    }
-    else {
-        float zs = farZ / (nearZ - farZ);
-        
-        m22 = zs;
-        m23 = zs * nearZ;
-    }
-    
-    float4x4 m = {
-        (float4){ xs,       0,   0,  0 },
-        (float4){  0,      ys,   0,  0 },
-        (float4){  xoff, yoff, m22, -1 },
-        (float4){  0,       0, m23,  0 }
-    };
-    
-    return m;
-}
 
-float4x4 orthographic_rhs(float width, float height, float nearZ, float farZ,
-                          bool isReverseZ)
-{
-    // float aspectRatio = width / height;
-    float xs = 2.0f / width;
-    float ys = 2.0f / height;
-    
-    float xoff = 0.0f;  // -0.5f * width;
-    float yoff = 0.0f;  // -0.5f * height;
-    
-    float dz = -(farZ - nearZ);
-    float zs = 1.0f / dz;
-    
-    float m22 = zs;
-    float m23 = zs * nearZ;
-    
-    // revZ, can't use infiniteZ with ortho view
-    if (isReverseZ) {
-        m22 = -m22;
-        m23 = 1.0f - m23;
-    }
-    
-    float4x4 m = {
-        (float4){xs, 0, 0, 0},
-        (float4){0, ys, 0, 0},
-        (float4){0, 0, m22, 0},
-        (float4){xoff, yoff, m23, 1}
-    };
-    return m;
-    
-}
 
 //--------------------------------
 
@@ -3025,7 +2916,7 @@ void Data::updateProjTransform()
     // to work out to keep zoom to cursor working.
 #if USE_PERSPECTIVE
     float aspect = _showSettings->viewSizeX /  (float)_showSettings->viewSizeY;
-    _projectionMatrix = perspective_rhs(90.0f * (M_PI / 180.0f), aspect, 0.1f, 100000.0f, _showSettings->isReverseZ);
+    _projectionMatrix = perspective_rhcs(90.0f * (M_PI / 180.0f), aspect, 0.1f);
 
     // This was used to reset zoom to a baseline that had a nice zoom.  But little connected to it now.
     // Remember with rotation, the bounds can hit the nearClip.  Note all shapes are 0.5 radius,
@@ -3037,14 +2928,13 @@ void Data::updateProjTransform()
 
     if (_showSettings->isModel) {
         float aspect = _showSettings->viewSizeX /  (float)_showSettings->viewSizeY;
-        _projectionMatrix = perspective_rhs(90.0f * (M_PI / 180.0f), aspect, 0.1f, 100000.0f, _showSettings->isReverseZ);
+        _projectionMatrix = perspective_rhcs(90.0f * (M_PI / 180.0f), aspect, 0.1f);
 
         _showSettings->zoomFit = 1;
     }
     else {
         _projectionMatrix =
-            orthographic_rhs(_showSettings->viewSizeX, _showSettings->viewSizeY, 0.1f,
-                             100000.0f, _showSettings->isReverseZ);
+            orthographic_rhcs(_showSettings->viewSizeX, _showSettings->viewSizeY, 0.1f, 100000.0f);
 
         // DONE: adjust zoom to fit the entire image to the window
         _showSettings->zoomFit =
@@ -3118,7 +3008,7 @@ void Data::resetSomeImageSettings(bool isNewFile)
     _modelMatrix2D =
     float4x4(float4m(scaleX, scaleY, scaleZ, 1.0f)); // uniform scale
     _modelMatrix2D = _modelMatrix2D *
-    matrix4x4_translation(0.0f, 0.0f, -1.0);  // set z=-1 unit back
+    translation(float3m(0.0f, 0.0f, -1.0));  // set z=-1 unit back
     
     // uniform scaled 3d primitive
     float scale = scaleY; // MAX(scaleX, scaleY);
@@ -3135,7 +3025,7 @@ void Data::resetSomeImageSettings(bool isNewFile)
     _modelMatrix3D = float4x4(float4m(scale, scale, scale, 1.0f));  // uniform scale
     _modelMatrix3D =
     _modelMatrix3D *
-    matrix4x4_translation(0.0f, 0.0f, -1.0f);  // set z=-1 unit back
+    translation(float3m(0.0f, 0.0f, -1.0f));  // set z=-1 unit back
 }
 
 void Data::updateTransforms()
@@ -3145,7 +3035,7 @@ void Data::updateTransforms()
     
     // translate
     float4x4 panTransform =
-        matrix4x4_translation(-_showSettings->panX, _showSettings->panY, 0.0);
+        translation(float3m(-_showSettings->panX, _showSettings->panY, 0.0));
 
     if (_showSettings->is3DView) {
         _viewMatrix3D = float4x4(float4m(zoom, zoom, 1.0f, 1.0f));  // non-uniform
@@ -3183,7 +3073,7 @@ void Data::updateTransforms()
 float4x4 Data::computeImageTransform(float panX, float panY, float zoom)
 {
     // translate
-    float4x4 panTransform = matrix4x4_translation(-panX, panY, 0.0);
+    float4x4 panTransform = translation(float3m(-panX, panY, 0.0));
 
     // non-uniform scale is okay here, only affects ortho volume
     // setting this to uniform zoom and object is not visible, zoom can be 20x in
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index 41b85031..abe69cc5 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -295,16 +295,6 @@ class ShowSettings {
     vector<Atlas> atlas;
 };
 
-float4x4 matrix4x4_translation(float tx, float ty, float tz);
-
-float4x4 perspective_rhs(float fovyRadians, float aspect, float nearZ, float
-                         farZ, bool isReverseZ);
-
-float4x4 orthographic_rhs(float width, float height, float nearZ, float farZ,
-                          bool isReverseZ);
-
-float4x4 matrix4x4_rotation(float radians, float3 axis);
-
 void printChannels(string &tmp, const string &label, float4 c,
                    int32_t numChannels, bool isFloat, bool isSigned);
 
diff --git a/libkram/vectormath/bounds234.cpp b/libkram/vectormath/bounds234.cpp
new file mode 100644
index 00000000..cb9e76d7
--- /dev/null
+++ b/libkram/vectormath/bounds234.cpp
@@ -0,0 +1,156 @@
+#include "vectormath234.h"
+
+#if SIMD_FLOAT && SIMD_INT
+
+namespace SIMD_NAMESPACE {
+
+culler::culler(const float4x4& projView) {
+    // build a worldspace frustum
+    // https://fgiesen.wordpress.com/2010/10/17/view-frustum-culling/
+    // but don't test farZ plane if infFarZ
+
+    float4x4 m = transpose(projView);
+    const float4& x = m[0];
+    const float4& y = m[1];
+    const float4& z = m[2];
+    const float4& w = m[3];
+    
+    // x < w     0 < w - x
+    // x > -w    0 < w + x
+    
+    _planes[0] = normalize(w + x);
+    _planes[1] = normalize(w - x);
+    _planes[2] = normalize(w + y);
+    _planes[3] = normalize(w - y);
+    
+    // This uses 0 to 1
+    
+    // revZ
+    _planes[4] = normalize(w - z);
+    
+    bool isInfFarPlane = projView[2][2] == 0;
+    if (isInfFarPlane)
+        _planes[5] = 0;
+    else
+        _planes[5] = normalize(z);
+    
+    // anyway to always use 6 for unrolling?
+    // f.e. above use 0,0,-1,FLT_MAX, instead of 0
+    _planeCount = isInfFarPlane ? 5 : 6;
+    
+    // select min or max based on normal direction
+    for (int i = 0; i < _planeCount; ++i) {
+        _selectionMasks[i] = _planes[i] < 0;
+    }
+}
+
+bool culler::cullBox(float3 min, float3 max) const {
+    int count = 0;
+    
+    // Note: make sure box min <= max, or this call will fail
+    
+    // TODO: convert this from dot to a mul of 4, then finish plane 5,6
+    // could precompute/store the selection masks.
+    // Also may need to do 4 boxes at a time.
+    
+    // TODO: also if frustum is finite farZ, then may need to test for
+    // frustum in box.  This is a rather expensive test though
+    // of the 8 frustum corners.
+    
+    float4 min1 = float4m(min,1);
+    float4 max1 = float4m(max,1);
+
+    // test the min/max against the x planes
+    for (int i = 0; i < _planeCount; ++i) {
+        count += dot(_planes[i], select(min1, max1, _selectionMasks[i])) > 0;
+    }
+            
+    return count == _planeCount;
+}
+            
+bool culler::cullSphere(float4 sphere) const {
+    int count = 0;
+            
+    // TODO: convert this from dot to a mul of 4, then finish plane 5,6
+    // keep everything in simd reg.
+    // Also may need to do 4 spheres at a time.
+    
+    float4 sphere1 = float4m(sphere.xyz,1);
+    float radius = sphere.w;
+    
+    for (int i = 0; i < _planeCount; ++i) {
+        count += dot(_planes[i], sphere1) < radius;
+            
+        // note: gpu can cull and do occlusion lookup
+        // cpu can just do frustum culls
+    }
+                     
+    return count == _planeCount;
+}
+            
+void culler::cullBoxes(const float3* boxes, int count, uint8_t* results) const {
+    // box array is 2x count
+    for (int i = 0; i < count; ++i) {
+        float3 min = boxes[2*i];
+        float3 max = boxes[2*i+1];
+        
+        results[i] = cullBox(min, max);
+        
+        // note: gpu can cull and do occlusion lookup
+        // cpu can just do frustum culls
+    }
+}
+
+void culler::cullSpheres(const float4* sphere, int count, uint8_t* results) const {
+    for(int i = 0; i < count; ++i) {
+        results[i] = cullSphere(sphere[i]);
+    }
+}
+    
+bsphere culler::transformSphereTRU(bsphere sphere, const float4x4& modelTfm) {
+    // May be better to convert to box with non-uniform scale
+    // sphere gets huge otherwise.  Cache these too.
+    
+    float size = reduce_max(decomposeScale(modelTfm));
+    float radius = sphere.radius() * size;
+    float4 sphereCenter = float4m(sphere.center(), 1);
+    sphereCenter = modelTfm * sphereCenter;
+    
+    sphere = bsphere(sphereCenter.xyz, radius);
+    return sphere;
+}
+
+bbox culler::transformBoxTRS(bbox box, const float4x4& modelTfm) {
+    // Woth doing on cpu and caching.  So can still process an array
+    // but should transform only ones thatt didn't change transform or bound.
+    
+    float4 min1 = float4m(box.min, 1);
+    float4 max1 = float4m(box.max, 1);
+    
+    // convert the box to 8 pts first
+    float4 pt[8];
+   
+    pt[0] = min1;
+    pt[1] = max1;
+        
+    pt[2] = float4m(min1.xy, max1.zw);
+    pt[3] = float4m(max1.xy, min1.zw);
+    
+    pt[4] = min1; pt[4].y = max1.y; // float4m(min1.x, max1.y, min1.zw),
+    pt[5] = max1; pt[5].x = min1.x; // float4m(min1.x, max1.yzw),
+        
+    pt[6] = max1; pt[6].y = min1.y; // float4m(max1.x, min1.y, max1.zw),
+    pt[7] = min1; pt[7].x = max1.x; // float4m(max1.x, min1.yzw),
+    
+    box.setInvalid();
+    for (int i = 0; i < 8; ++i) {
+        float3 v = (pt[i] * modelTfm).xyz;
+        box.unionWith(v);
+    }
+    return box;
+}
+
+}
+
+#endif
+
diff --git a/libkram/vectormath/bounds234.h b/libkram/vectormath/bounds234.h
new file mode 100644
index 00000000..ae2ab45d
--- /dev/null
+++ b/libkram/vectormath/bounds234.h
@@ -0,0 +1,129 @@
+// kram - Copyright 2020-2024 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
+
+#pragma once
+
+#if USE_SIMDLIB && SIMD_FLOAT
+
+namespace SIMD_NAMESPACE {
+
+// TODO: may want a 2d box/rect as well
+
+struct bbox {
+    bbox() {} // nothing
+    bbox(float3 minv, float3 maxv) : min(minv), max(maxv) {}
+    
+    // TODO: add a unit radius and unit diameter box
+    
+    // can use this to accumulate points into a box
+    void setInvalid() { min = (float3)FLT_MAX; max = -(float3)FLT_MAX; }
+    bool isInvalid() const { return any(min > max); }
+    
+    void unionWith(float3 v) {
+        min = SIMD_NAMESPACE::min(min, v);
+        max = SIMD_NAMESPACE::max(max, v);
+    }
+    void unionWith(bbox b) {
+        min = SIMD_NAMESPACE::min(min, b.min);
+        max = SIMD_NAMESPACE::max(max, b.max);
+    }
+    
+    // TODO: call to intersect or combine bbox
+    
+    float3 center() const { return 0.5f * (min + max); }
+    float3 dimensions() const { return max - min; }
+    
+    float width() const { return dimensions().x; }
+    float height() const { return dimensions().y; }
+    float depth() const { return dimensions().z; }
+    
+    float diameter() const { return length(dimensions()); }
+    float radius() const { return 0.5f * diameter(); }
+    float radiusSquared() const { return length_squared(dimensions() * 0.5f); }
+    
+    void scale(float3 s) { min *= s; max *= s; }
+    void offset(float3 o) { min += o; max += o; }
+    
+    // after transforms (f.e. rotate, min/max can swap)
+    void fix() {
+        // don't call this on invalid box, or it will be huge
+        float3 tmp = SIMD_NAMESPACE::max(min, max);
+        min = SIMD_NAMESPACE::min(min, max);
+        max = tmp;
+    }
+    
+public:
+    float3 min;
+    float3 max;
+};
+
+// center + radius
+struct bsphere {
+    bsphere() {} // nothing
+    bsphere(float3 center, float radius) : centerRadius(float4m(center, radius)) {}
+    
+    // TODO: add a unit radius and unit diameter
+    
+    float3 center() const { return centerRadius.xyz; }
+    float radius() const { return centerRadius.w; }
+    float radiusSquared() const { return centerRadius.w * centerRadius.w; }
+    
+public:
+    float4 centerRadius;
+};
+
+// Fast cpu culler per frustum.  Easy port to gpu which can do occlusion.
+// This only tests 5 or 6 planes.
+struct culler {
+    culler(const float4x4& projView);
+    
+    // TODO: should pass bitmask instead of uint8_t array
+    
+    void cullBoxes(const float3* boxes, int count, uint8_t* results) const;
+    void cullSpheres(const float4* sphere, int count, uint8_t* results) const;
+    
+    bool cullSphere(float4 sphere) const;
+    bool cullBox(float3 min, float3 max) const;
+    
+    // can use the helper types instead
+    void cullBoxes(const bbox* boxes, int count, uint8_t* results) const {
+        cullBoxes((const float3*)boxes, count, results);
+    }
+    bool cullBoxes(const bbox& box) const {
+        return cullBox(box.min, box.max);
+    }
+    void cullSpheres(const bsphere* spheres, int count, uint8_t* results) const {
+        cullSpheres((const float4*)spheres, count, results);
+    }
+    bool cullSphere(const bsphere& sphere) const {
+        return cullSphere(sphere.centerRadius);
+    }
+    
+    // TODO: move this to vectormath affine ops
+    static float decomposeSize(const float4x4& m) {
+        return length(m[0]);
+    }
+    static float3 decomposeScale(const float4x4& m) {
+        // TODO: this length is unsigned, so need to fix that for inversion
+        return float3m(length(m[0]),
+                       length(m[1]),
+                       length(m[2]));
+    }
+    
+    bsphere transformSphereTRU(bsphere sphere, const float4x4& modelTfm);
+    bbox transformBoxTRS(bbox box, const float4x4& modelTfm);
+    
+private:
+    float4 _planes[6];
+    // This won't work if SIMD_INT is not defined.
+#if SIMD_INT
+    int4 _selectionMasks[6];
+#endif
+    uint32_t _planeCount = 0;
+};
+
+} // namespace SIMD_NAMESPACE
+
+#endif //  USE_SIMDLIB && SIMD_FLOAT
+
diff --git a/libkram/vectormath/double234.h b/libkram/vectormath/double234.h
index ea1aa9b7..f45ca70c 100644
--- a/libkram/vectormath/double234.h
+++ b/libkram/vectormath/double234.h
@@ -497,6 +497,28 @@ SIMD_CALL double4 fract(double4 x) {
     return min(x - floor(x), 0x1.fffffffffffffp-1);
 }
 
+SIMD_CALL bool is_nan(double2 x) {
+    return any(x != x);
+}
+SIMD_CALL bool is_nan(double3 x) {
+    return any(x != x);
+}
+SIMD_CALL bool is_nan(double4 x) {
+    return any(x != x);
+}
+
+SIMD_CALL double2 fix_nan(double2 x, double2 replace) {
+    return min(replace, x);
+}
+SIMD_CALL double3 fix_nan(double3 x, double3 replace) {
+    return min(replace, x);
+}
+SIMD_CALL double4 fix_nan(double4 x, double4 replace) {
+    return min(replace, x);
+}
+
+
+
 //-------------------
 // Functions
 
diff --git a/libkram/vectormath/float234.cpp b/libkram/vectormath/float234.cpp
index 60ce711e..0d7c3907 100644
--- a/libkram/vectormath/float234.cpp
+++ b/libkram/vectormath/float234.cpp
@@ -45,9 +45,9 @@ macroVectorRepeatFnImpl(float, tan, ::tanf)
 #endif // SIMD_CMATH_MATH
 
 // Wish cmath had this
-inline void sincosf(float angleInRadians, float& s, float& c) {
-    s = sinf(angleInRadians);
-    c = cosf(angleInRadians);
+inline void sincosf(float radians, float& s, float& c) {
+    s = sinf(radians);
+    c = cosf(radians);
 }
 
 // These aren't embedded in function, so may have pre-init ordering issues.
@@ -604,10 +604,10 @@ static const float4 kCross = {1.0f,1.0f,1.0f,-1.0f};
 // can eliminate 4 shufs by using 4 constants, 2 q swizzles are dupes
 static const float4 kConvertToMatrix = {0.0f,1.0f,2.0f,-2.0f};
 
-quatf::quatf(float3 axis, float angleInRadians)
+quatf::quatf(float3 axis, float radians)
 {
     float s, c;
-    sincosf(angleInRadians * 0.5f, s, c);
+    sincosf(radians * 0.5f, s, c);
     v = float4m(s * axis, c);
 }
 
@@ -906,10 +906,10 @@ float4x4 inverse_trs(const float4x4& mtx)
     return inverse;
 }
 
-float4x4 float4x4m(char axis, float angleInRadians)
+float4x4 float4x4m(char axis, float radians)
 {
     float    sinTheta, cosTheta;
-    sincosf(angleInRadians, sinTheta, cosTheta);
+    sincosf(radians, sinTheta, cosTheta);
             
     float4x4 m;
     m[3] = float4_posw();
@@ -942,5 +942,98 @@ float4x4 float4x4m(char axis, float angleInRadians)
     return m;
 }
 
+float4x4 perspective_rhcs(float fovyRadians, float aspectXtoY, float nearZ, float farZ)
+{
+    // form tangents
+    float tanY = tanf(fovyRadians * 0.5f);
+    float tanX = tanY * aspectXtoY;
+    
+    // currently symmetric
+    // all postive values from center
+    float4 tangents = { -tanX, tanY, tanX, -tanY }; // l,t,r,b
+    
+    return perspective_rhcs(tangents, nearZ, farZ);
+}
+
+float4x4 perspective_rhcs(float4 tangents, float nearZ, float farZ)
+{
+    tangents *= nearZ;
+    
+    float l = tangents.x;
+    float t = tangents.y;
+    float r = tangents.z;
+    float b = tangents.w;
+    
+    float dx = (r - l);
+    float dy = (t - b);
+    
+    float xs = 2.0f * nearZ / dx;
+    float ys = 2.0f * nearZ / dy;
+    
+    // 0.5x?
+    float xoff = (r + l) / dx;
+    float yoff = (t + b) / dy;
+    
+    float m22;
+    float m23;
+  
+    // TODO: handle farZ when not inf
+
+    //if (isReverseZ) {
+        // zs drops out since zs = inf / -inf = 1, 1-1 = 0
+        // z' = near / -z
+        if (farZ == FLT_MAX)
+            m22 = -nearZ / farZ;
+        else
+            m22 = 0;
+        m23 = nearZ;
+//    }
+//    else {
+//        float zs = farZ / (nearZ - farZ);
+//        
+//        m22 = zs;
+//        m23 = zs * nearZ;
+//    }
+    
+    float4x4 m(
+        (float4){ xs,       0,   0,  0 },
+        (float4){  0,      ys,   0,  0 },
+        (float4){  xoff, yoff, m22, -1 },
+        (float4){  0,       0, m23,  0 }
+    );
+    
+    return m;
+}
+
+float4x4 orthographic_rhcs(float width, float height, float nearZ, float farZ)
+{
+    // float aspectRatio = width / height;
+    float xs = 2.0f / width;
+    float ys = 2.0f / height;
+    
+    float xoff = 0.0f;  // -0.5f * width;
+    float yoff = 0.0f;  // -0.5f * height;
+    
+    float dz = -(farZ - nearZ);
+    float zs = 1.0f / dz;
+    
+    float m22 = zs;
+    float m23 = zs * nearZ;
+    
+    // revZ, can't use infiniteZ with ortho view
+    //if (isReverseZ) {
+        m22 = -m22;
+        m23 = 1.0f - m23;
+    //}
+    
+    float4x4 m(
+        (float4){xs, 0, 0, 0},
+        (float4){0, ys, 0, 0},
+        (float4){0, 0, m22, 0},
+        (float4){xoff, yoff, m23, 1}
+    );
+    return m;
+}
+
 } // SIMD_NAMESPACE
 #endif // USE_SIMDLIB
diff --git a/libkram/vectormath/float234.h b/libkram/vectormath/float234.h
index 65283a09..013ef5bc 100644
--- a/libkram/vectormath/float234.h
+++ b/libkram/vectormath/float234.h
@@ -383,6 +383,28 @@ SIMD_CALL float4 fract(float4 x) {
     return min(x - floor(x), 0x1.fffffep-1f);
 }
 
+SIMD_CALL bool is_nan(float2 x) {
+    return any(x != x);
+}
+SIMD_CALL bool is_nan(float3 x) {
+    return any(x != x);
+}
+SIMD_CALL bool is_nan(float4 x) {
+    return any(x != x);
+}
+
+SIMD_CALL float2 fix_nan(float2 x, float2 replace) {
+    return min(replace, x);
+}
+SIMD_CALL float3 fix_nan(float3 x, float3 replace) {
+    return min(replace, x);
+}
+SIMD_CALL float4 fix_nan(float4 x, float4 replace) {
+    return min(replace, x);
+}
+
+
+
 /* this is just to show examples of extended vector types, float8 should move out
    
 #if SIMD_FLOAT_EXT
@@ -813,10 +835,10 @@ float4x4 inverse_tr(const float4x4& mtx);
 float4x4 inverse_tru(const float4x4& mtx);
 float4x4 inverse_trs(const float4x4& mtx);
 
-float4x4 float4x4m(char axis, float angleInRadians);
+float4x4 float4x4m(char axis, float radians);
 
-SIMD_CALL float4x4 float4x4m(float3 axis, float angleInRadians) {
-    return float4x4m(quatf(axis, angleInRadians));
+SIMD_CALL float4x4 float4x4m(float3 axis, float radians) {
+    return float4x4m(quatf(axis, radians));
 }
 
 float3x3 float3x3m(quatf qq);
@@ -825,6 +847,23 @@ float4x4 float4x4_tr(float3 t, quatf r);
 float4x4 float4x4_trs(float3 t, quatf r, float3 scale);
 float4x4 float4x4_tru(float3 t, quatf r, float scale);
 
+float4x4 perspective_rhcs(float fovyRadians, float aspectXtoY, float nearZ, float farZ = FLT_MAX);
+float4x4 perspective_rhcs(float4 tangents, float nearZ, float farZ = FLT_MAX);
+float4x4 orthographic_rhcs(float width, float height, float nearZ, float farZ);
+
+SIMD_CALL float4x4 rotation(float3 axis, float radians) {
+    quatf q(axis, radians);
+    return float4x4m(q);
+}
+SIMD_CALL float4x4 scale(float3 scale) {
+    return float4x4(float4m(scale,1.0f));
+}
+SIMD_CALL float4x4 translation(float3 t) {
+    float4x4 m(float4x4::identity());
+    m[3] = float4m(t,1);
+    return m;
+}
+
 } // SIMD_NAMESPACE
 
 #endif // __cplusplus
diff --git a/libkram/vectormath/vectormath234.cpp b/libkram/vectormath/vectormath234.cpp
index 9a9d7b65..b12c7dbb 100644
--- a/libkram/vectormath/vectormath234.cpp
+++ b/libkram/vectormath/vectormath234.cpp
@@ -101,6 +101,7 @@
 // NOTE: this reports 5 for macOS 13 minspec, but SIMD_LIBRARY_VERSION is set to 6.
 //   This is a problem, since some lib code only exists on macOS 15 and iOS 18 then.
 // Can remove this once SIMD_LIBRARY_VERSION is correct.
+// Also unclear what XR_OS_1_0 library support there is.  It's not in the comparisons.
 # if SIMD_COMPILER_HAS_REQUIRED_FEATURES
 #  if __has_include(<TargetConditionals.h>) && __has_include(<Availability.h>)
 #   include <TargetConditionals.h>
@@ -108,7 +109,8 @@
 #   if TARGET_OS_RTKIT
 #    define SIMD_LIBRARY_VERSION SIMD_CURRENT_LIBRARY_VERSION
 #   elif __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_15_0   || \
-        __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_18_0
+        __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_18_0 || \
+        __XR_OS_VERSION_MIN_REQUIRED     >= __XROS_2_0
 #    define SIMD_LIBRARY_VERSION_TEST 6
 #   elif __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_13_0   || \
         __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_16_0
@@ -390,8 +392,10 @@ string vecf::simd_configs() const {
     #if __APPLE__
     #if TARGET_OS_OSX
         FMT_CONFIG(__MAC_OS_X_VERSION_MIN_REQUIRED);
+    #elif TARGET_OS_VISION
+        FMT_CONFIG(__XR_OS_VERSION_MIN_REQUIRED);
     #else
-        FMD_CONFIG(__IPHONE_OS_VERSION_MIN_REQUIRED);
+        FMT_CONFIG(__IPHONE_OS_VERSION_MIN_REQUIRED);
     #endif
     #endif
     
diff --git a/libkram/vectormath/vectormath234.h b/libkram/vectormath/vectormath234.h
index 119c35ec..4a5aeb59 100644
--- a/libkram/vectormath/vectormath234.h
+++ b/libkram/vectormath/vectormath234.h
@@ -418,10 +418,14 @@ type##4 cppfunc(type##4 a, type##4 b) { return {func(a.x, b.x), func(a.y, b.y),
 //-----------------------------------
 
 #include <inttypes.h> // for u/long
-#include <math.h>     // for sqrt, sqrtf
 
 #include <string>     // for formatter (only works using std::string, not eastl)
 
+#include <math.h>     // for sqrt, sqrtf
+#if SIMD_FLOAT
+#include <float.h>    // for FLT_MAX
+#endif
+
 #if SIMD_NEON
 // neon types and intrinsics, 16B
 #include <arm_neon.h>
@@ -452,6 +456,13 @@ type##4 cppfunc(type##4 a, type##4 b) { return {func(a.x, b.x), func(a.y, b.y),
 
 //---------------------------
 
+// This may not belong in here.  But just want to use the lib to build
+// some helpers.  
+
+#include "bounds234.h"
+
+//---------------------------
+
 #if SIMD_CHAR
 
 #ifdef __cplusplus

From 367ca3398018eb46b6ac9790e6801c2e1a388c7b Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 6 Oct 2024 19:39:18 -0700
Subject: [PATCH 783/901] kram - simd - fix revZ

---
 kramv/KramRenderer.mm           |  8 ++------
 kramv/KramViewerBase.h          |  3 ---
 kramv/KramViewerMain.mm         |  2 +-
 libkram/vectormath/float234.cpp | 13 +++++++++----
 4 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index fcd098ed..c1c30ebf 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -503,15 +503,11 @@ - (void)_loadMetalWithView:(nonnull MTKView *)view
 
     MTLDepthStencilDescriptor *depthStateDesc =
         [[MTLDepthStencilDescriptor alloc] init];
-    depthStateDesc.depthCompareFunction = _showSettings->isReverseZ
-                                              ? MTLCompareFunctionGreaterEqual
-                                              : MTLCompareFunctionLessEqual;
+    depthStateDesc.depthCompareFunction = MTLCompareFunctionGreaterEqual;
     depthStateDesc.depthWriteEnabled = YES;
     _depthStateFull = [_device newDepthStencilStateWithDescriptor:depthStateDesc];
 
-    depthStateDesc.depthCompareFunction = _showSettings->isReverseZ
-                                              ? MTLCompareFunctionGreaterEqual
-                                              : MTLCompareFunctionLessEqual;
+    depthStateDesc.depthCompareFunction = MTLCompareFunctionGreaterEqual;
     depthStateDesc.depthWriteEnabled = NO;
     _depthStateNone = [_device newDepthStencilStateWithDescriptor:depthStateDesc];
 
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index abe69cc5..0d66d3a3 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -174,9 +174,6 @@ class ShowSettings {
     // whether to use normal to tangent (false), or vertex tangents (true)
     bool useTangent = true;
 
-    // draw with reverseZ to better match perspective
-    bool isReverseZ = true;
-
     // image vs. gltf model
     bool isModel = false;
     
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 0d615bc2..9731897e 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -666,7 +666,7 @@ - (instancetype)initWithCoder:(NSCoder *)coder
 
     self.clearColor = MTLClearColorMake(0.005f, 0.005f, 0.005f, 0.0f);
 
-    self.clearDepth = _showSettings->isReverseZ ? 0.0f : 1.0f;
+    self.clearDepth = 0.0f;
 
     // only re-render when changes are made
     // Note: this breaks ability to gpu capture, since display link not running.
diff --git a/libkram/vectormath/float234.cpp b/libkram/vectormath/float234.cpp
index 0d7c3907..22e783c4 100644
--- a/libkram/vectormath/float234.cpp
+++ b/libkram/vectormath/float234.cpp
@@ -982,11 +982,16 @@ float4x4 perspective_rhcs(float4 tangents, float nearZ, float farZ)
     //if (isReverseZ) {
         // zs drops out since zs = inf / -inf = 1, 1-1 = 0
         // z' = near / -z
-        if (farZ == FLT_MAX)
-            m22 = -nearZ / farZ;
-        else
+    
+        if (farZ == FLT_MAX) {
             m22 = 0;
-        m23 = nearZ;
+            m23 = nearZ;
+        }
+        else {
+            m22 = -nearZ / farZ; // TODO: check these
+            m23 = nearZ;
+        }
+        
 //    }
 //    else {
 //        float zs = farZ / (nearZ - farZ);

From 4fbcf861526126af32ecc5bc6493f24c31bf30b1 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 6 Oct 2024 20:12:14 -0700
Subject: [PATCH 784/901] kram - simd - faster bounds calcs

---
 libkram/vectormath/bounds234.cpp | 71 +++++++++++++++++++++++++++++++-
 libkram/vectormath/float234.cpp  |  6 +--
 2 files changed, 72 insertions(+), 5 deletions(-)

diff --git a/libkram/vectormath/bounds234.cpp b/libkram/vectormath/bounds234.cpp
index cb9e76d7..26a24bd9 100644
--- a/libkram/vectormath/bounds234.cpp
+++ b/libkram/vectormath/bounds234.cpp
@@ -111,19 +111,36 @@ bsphere culler::transformSphereTRU(bsphere sphere, const float4x4& modelTfm) {
     // May be better to convert to box with non-uniform scale
     // sphere gets huge otherwise.  Cache these too.
     
+#if 0
+    // not sure which code is smaller, still have to add t
     float size = reduce_max(decomposeScale(modelTfm));
     float radius = sphere.radius() * size;
     float4 sphereCenter = float4m(sphere.center(), 1);
     sphereCenter = modelTfm * sphereCenter;
-    
+
     sphere = bsphere(sphereCenter.xyz, radius);
     return sphere;
+#else
+    // really just a 3x3 and translation
+    const float3x3& m = as_float3x3(modelTfm);
+    float3 t = m[3];
+    
+    float size = reduce_max(decomposeScale(modelTfm));
+    float radius = sphere.radius() * size;
+    float3 sphereCenter = m * sphere.center();
+    sphereCenter += t;
+    
+    sphere = bsphere(sphereCenter, radius);
+    return sphere;
+#endif
 }
 
 bbox culler::transformBoxTRS(bbox box, const float4x4& modelTfm) {
     // Woth doing on cpu and caching.  So can still process an array
     // but should transform only ones thatt didn't change transform or bound.
     
+#if 0
+    // This is for a full general 4x4, but want a simpler affine version
     float4 min1 = float4m(box.min, 1);
     float4 max1 = float4m(box.max, 1);
     
@@ -144,9 +161,59 @@ bbox culler::transformBoxTRS(bbox box, const float4x4& modelTfm) {
     
     box.setInvalid();
     for (int i = 0; i < 8; ++i) {
-        float3 v = (pt[i] * modelTfm).xyz;
+        float3 v = (modelTfm * pt[i]).xyz;
+        box.unionWith(v);
+    }
+    
+#elif 0
+    
+    float3 min1 = box.min;
+    float3 max1 = box.max;
+    
+    // really just a 3x3 and translation
+    const float3x3& m = as_float3x3(modelTfm);
+    float3 t = m[3];
+    
+    // convert the box to 8 pts first
+    float3 pt[8];
+   
+    pt[0] = min1;
+    pt[1] = max1;
+        
+    pt[2] = float3m(min1.xy, max1.z);
+    pt[3] = float3m(max1.xy, min1.z);
+    
+    pt[4] = min1; pt[4].y = max1.y;
+    pt[5] = max1; pt[5].x = min1.x;
+        
+    pt[6] = max1; pt[6].y = min1.y;
+    pt[7] = min1; pt[7].x = max1.x;
+    
+    box.setInvalid();
+    for (int i = 0; i < 8; ++i) {
+        float3 v = m * pt[i];
         box.unionWith(v);
     }
+    box.offset(t);
+    
+#else
+    // This is way less setup on the points, and only 2 transformed points
+    // instead of 8.  At least the inspiration for code below.
+    // https://github.com/erich666/GraphicsGems/blob/master/gems/TransBox.c
+   
+    const float3x3& m = as_float3x3(modelTfm);
+    float3 t = m[3];
+   
+    box.min = m * box.min;
+    box.max = m * box.max;
+    
+    // swap back extrema that flipped due to rot/invert
+    box.fix();
+    
+    box.offset(t);
+    
+#endif
+    
     return box;
 }
 
diff --git a/libkram/vectormath/float234.cpp b/libkram/vectormath/float234.cpp
index 22e783c4..da9470cb 100644
--- a/libkram/vectormath/float234.cpp
+++ b/libkram/vectormath/float234.cpp
@@ -977,8 +977,7 @@ float4x4 perspective_rhcs(float4 tangents, float nearZ, float farZ)
     float m22;
     float m23;
   
-    // TODO: handle farZ when not inf
-
+    
     //if (isReverseZ) {
         // zs drops out since zs = inf / -inf = 1, 1-1 = 0
         // z' = near / -z
@@ -988,7 +987,8 @@ float4x4 perspective_rhcs(float4 tangents, float nearZ, float farZ)
             m23 = nearZ;
         }
         else {
-            m22 = -nearZ / farZ; // TODO: check these
+            // TODO: handle farZ when not inf, check these
+            m22 = -nearZ / farZ;
             m23 = nearZ;
         }
         

From 233a7835f7e5a2116423061b921b9ae7582bf3cd Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 7 Oct 2024 22:46:15 -0700
Subject: [PATCH 785/901] kram - simd - faster box transforms

---
 libkram/vectormath/bounds234.cpp | 53 ++++++++++++++++++++------------
 1 file changed, 34 insertions(+), 19 deletions(-)

diff --git a/libkram/vectormath/bounds234.cpp b/libkram/vectormath/bounds234.cpp
index 26a24bd9..12940eb8 100644
--- a/libkram/vectormath/bounds234.cpp
+++ b/libkram/vectormath/bounds234.cpp
@@ -45,7 +45,6 @@ culler::culler(const float4x4& projView) {
 }
 
 bool culler::cullBox(float3 min, float3 max) const {
-    int count = 0;
     
     // Note: make sure box min <= max, or this call will fail
     
@@ -61,6 +60,8 @@ bool culler::cullBox(float3 min, float3 max) const {
     float4 max1 = float4m(max,1);
 
     // test the min/max against the x planes
+    int count = 0;
+    
     for (int i = 0; i < _planeCount; ++i) {
         count += dot(_planes[i], select(min1, max1, _selectionMasks[i])) > 0;
     }
@@ -69,7 +70,6 @@ bool culler::cullBox(float3 min, float3 max) const {
 }
             
 bool culler::cullSphere(float4 sphere) const {
-    int count = 0;
             
     // TODO: convert this from dot to a mul of 4, then finish plane 5,6
     // keep everything in simd reg.
@@ -78,11 +78,9 @@ bool culler::cullSphere(float4 sphere) const {
     float4 sphere1 = float4m(sphere.xyz,1);
     float radius = sphere.w;
     
+    int count = 0;
     for (int i = 0; i < _planeCount; ++i) {
-        count += dot(_planes[i], sphere1) < radius;
-            
-        // note: gpu can cull and do occlusion lookup
-        // cpu can just do frustum culls
+        count += dot(_planes[i], sphere1) > radius;
     }
                      
     return count == _planeCount;
@@ -95,9 +93,6 @@ void culler::cullBoxes(const float3* boxes, int count, uint8_t* results) const {
         float3 max = boxes[2*i+1];
         
         results[i] = cullBox(min, max);
-        
-        // note: gpu can cull and do occlusion lookup
-        // cpu can just do frustum culls
     }
 }
 
@@ -111,7 +106,7 @@ bsphere culler::transformSphereTRU(bsphere sphere, const float4x4& modelTfm) {
     // May be better to convert to box with non-uniform scale
     // sphere gets huge otherwise.  Cache these too.
     
-#if 0
+#if 1
     // not sure which code is smaller, still have to add t
     float size = reduce_max(decomposeScale(modelTfm));
     float radius = sphere.radius() * size;
@@ -197,21 +192,41 @@ bbox culler::transformBoxTRS(bbox box, const float4x4& modelTfm) {
     box.offset(t);
     
 #else
-    // This is way less setup on the points, and only 2 transformed points
-    // instead of 8.  At least the inspiration for code below.
-    // https://github.com/erich666/GraphicsGems/blob/master/gems/TransBox.c
-   
+    // This is way less setup on the points.
+    
     const float3x3& m = as_float3x3(modelTfm);
     float3 t = m[3];
    
-    box.min = m * box.min;
-    box.max = m * box.max;
-    
+    // what about this
+    // box.min = m * box.min;
+    // box.max = m * box.max;
     // swap back extrema that flipped due to rot/invert
-    box.fix();
+    // box.fix();
+    // box.offset(t);
     
-    box.offset(t);
+    // Inspiration for code below.
+    // https://github.com/erich666/GraphicsGems/blob/master/gems/TransBox.c
+    float3 min1 = box.min;
+    float3 max1 = box.max;
+    
+    box.min = t;
+    box.max = t;
     
+    float3 a,b;
+    for (int i = 0; i < 3; ++i) {
+        // these are muls, not dots
+        a = m[i] * min1;
+        b = m[i] * max1;
+        
+        int3 test = a < b;
+        
+        box.min += select(0, a, test);
+        box.max += select(0, a, !test);
+        
+        box.max += select(0, b, test);
+        box.min += select(0, b, !test);
+    }
+
 #endif
     
     return box;

From 80a612f761a52bdac3412976af47031fd8c67e9d Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 9 Oct 2024 09:24:43 -0700
Subject: [PATCH 786/901] kram - simd - add equals calls.

All comparison ops == and != ops return short4/int4/long4 just like <=, <, >, >=
---
 libkram/vectormath/double234.h | 29 ++++++++++++++++++++++++++++
 libkram/vectormath/float234.h  | 35 ++++++++++++++++++++++++++++++++++
 2 files changed, 64 insertions(+)

diff --git a/libkram/vectormath/double234.h b/libkram/vectormath/double234.h
index f45ca70c..3df68cde 100644
--- a/libkram/vectormath/double234.h
+++ b/libkram/vectormath/double234.h
@@ -439,6 +439,18 @@ SIMD_CALL double3 cross(double3 x, double3 y) {
     return x.yzx * y.zxy - x.zxy * y.yzx;
 }
 
+// equal
+// == and != return a int234 vector, so need these to match other vecs
+SIMD_CALL bool equal(double2 x, double2 y) {
+    return all(x == y);
+}
+SIMD_CALL bool equal(double3 x, double3 y) {
+    return all(x == y);
+}
+SIMD_CALL bool equal(double4 x, double4 y) {
+    return all(x == y);
+}
+
 // equal_abs
 SIMD_CALL bool equal_abs(double2 x, double2 y, double tol) {
     return all((abs(x - y) <= tol));
@@ -518,6 +530,20 @@ SIMD_CALL double4 fix_nan(double4 x, double4 replace) {
 }
 
 
+// fast conversions where possible
+// need non-const too
+SIMD_CALL const double3& as_double3(const double4& m) {
+    return reinterpret_cast<const double3&>(m);
+}
+SIMD_CALL const double3* as_double3(const double4* m) {
+    return reinterpret_cast<const double3*>(m);
+}
+
+// this one is dangerous, since w is undefined
+//SIMD_CALL const double4& as_double4(const double3& m) {
+//    return reinterpret_cast<const double4&>(m);
+//}
+
 
 //-------------------
 // Functions
@@ -757,6 +783,9 @@ macroMatrixOps(double4x4);
 SIMD_CALL const double3x3& as_double3x3(const double4x4& m) {
     return reinterpret_cast<const double3x3&>(m);
 }
+SIMD_CALL const double3x3* as_double3x3(const double4x4* m) {
+    return reinterpret_cast<const double3x3*>(m);
+}
 
 } // SIMD_NAMESPACE
 
diff --git a/libkram/vectormath/float234.h b/libkram/vectormath/float234.h
index 013ef5bc..74961fbe 100644
--- a/libkram/vectormath/float234.h
+++ b/libkram/vectormath/float234.h
@@ -325,6 +325,18 @@ SIMD_CALL float3 cross(float3 x, float3 y) {
     return x.yzx * y.zxy - x.zxy * y.yzx;
 }
 
+// equal
+// == and != return a int234 vector, so need these to match other vecs
+SIMD_CALL bool equal(float2 x, float2 y) {
+    return all(x == y);
+}
+SIMD_CALL bool equal(float3 x, float3 y) {
+    return all(x == y);
+}
+SIMD_CALL bool equal(float4 x, float4 y) {
+    return all(x == y);
+}
+
 // equal_abs
 SIMD_CALL bool equal_abs(float2 x, float2 y, float tol) {
     return all((abs(x - y) <= tol));
@@ -505,9 +517,18 @@ SIMD_CALL float4 float4m(float3 v, float w = 1.0f) {
 }
 
 // fast conversions where possible
+// need non-const too
 SIMD_CALL const float3& as_float3(const float4& m) {
     return reinterpret_cast<const float3&>(m);
 }
+SIMD_CALL const float3* as_float3(const float4* m) {
+    return reinterpret_cast<const float3*>(m);
+}
+
+// this one is dangerous, since w is undefined
+//SIMD_CALL const float4& as_float4(const float3& m) {
+//    return reinterpret_cast<const float4&>(m);
+//}
 
 // power series
 macroVectorRepeatFnDecl(float, log)
@@ -841,6 +862,20 @@ SIMD_CALL float4x4 float4x4m(float3 axis, float radians) {
     return float4x4m(quatf(axis, radians));
 }
 
+// These sizes are positive and do not include inversion
+SIMD_CALL float decompose_size(const float4x4& m) {
+    return length(m[0]);
+}
+SIMD_CALL float3 decompose_scale(const float4x4& m) {
+    return float3m(length(m[0]),
+                   length(m[1]),
+                   length(m[2]));
+}
+SIMD_CALL float decompose_scale_max(const float4x4& m) {
+    return reduce_max(decomposeScale(m));
+}
+
+
 float3x3 float3x3m(quatf qq);
 
 float4x4 float4x4_tr(float3 t, quatf r);

From 62c5d564ec19fa1b62645b6c428f2977a509f74c Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 10 Oct 2024 18:50:54 -0700
Subject: [PATCH 787/901] kram - simd - small matrix/quat fixes

---
 libkram/vectormath/float234.cpp | 78 ++++++++++++++++-----------------
 libkram/vectormath/float234.h   | 29 ++++++++----
 2 files changed, 58 insertions(+), 49 deletions(-)

diff --git a/libkram/vectormath/float234.cpp b/libkram/vectormath/float234.cpp
index da9470cb..f9bad342 100644
--- a/libkram/vectormath/float234.cpp
+++ b/libkram/vectormath/float234.cpp
@@ -851,7 +851,7 @@ float4x4 inverse_tru(const float4x4& mtx)
     return inverse;
 }
 
-float4x4 float4x4_tr(float3 t, quatf r) {
+float4x4 float4x4m_tr(float3 t, quatf r) {
     float4x4 m(float4x4::identity());
     m[3].xyz = t;
     
@@ -860,7 +860,7 @@ float4x4 float4x4_tr(float3 t, quatf r) {
 }
 
 // TODO: there are faster ways to apply post rot, post scale
-float4x4 float4x4_trs(float3 t, quatf r, float3 scale) {
+float4x4 float4x4m_trs(float3 t, quatf r, float3 scale) {
     float4x4 m(float4x4::identity());
     m[3].xyz = t;
     m = m * float4x4m(r);
@@ -870,8 +870,8 @@ float4x4 float4x4_trs(float3 t, quatf r, float3 scale) {
 }
 
 // leaving this in here, since it can be further optimized
-float4x4 float4x4_tru(float3 t, quatf r, float scale) {
-    return float4x4_trs(t, r, float3m(scale));
+float4x4 float4x4m_tru(float3 t, quatf r, float scale) {
+    return float4x4m_trs(t, r, float3m(scale));
 }
 
 float4x4 inverse_trs(const float4x4& mtx)
@@ -974,32 +974,22 @@ float4x4 perspective_rhcs(float4 tangents, float nearZ, float farZ)
     float xoff = (r + l) / dx;
     float yoff = (t + b) / dy;
     
+    // zs drops out since zs = inf / -inf = 1, 1-1 = 0
+    // z' = near / -z
+
     float m22;
     float m23;
   
-    
-    //if (isReverseZ) {
-        // zs drops out since zs = inf / -inf = 1, 1-1 = 0
-        // z' = near / -z
-    
-        if (farZ == FLT_MAX) {
-            m22 = 0;
-            m23 = nearZ;
-        }
-        else {
-            // TODO: handle farZ when not inf, check these
-            m22 = -nearZ / farZ;
-            m23 = nearZ;
-        }
+    if (farZ == FLT_MAX) {
+        m22 = 0;
+        m23 = nearZ;
+    }
+    else {
+        // TODO: handle farZ when not inf, check these
+        m22 = -nearZ / farZ;
+        m23 = nearZ;
+    }
         
-//    }
-//    else {
-//        float zs = farZ / (nearZ - farZ);
-//        
-//        m22 = zs;
-//        m23 = zs * nearZ;
-//    }
-    
     float4x4 m(
         (float4){ xs,       0,   0,  0 },
         (float4){  0,      ys,   0,  0 },
@@ -1010,14 +1000,22 @@ float4x4 perspective_rhcs(float4 tangents, float nearZ, float farZ)
     return m;
 }
 
-float4x4 orthographic_rhcs(float width, float height, float nearZ, float farZ)
+float4x4 orthographic_rhcs(float4 rect, float nearZ, float farZ)
 {
-    // float aspectRatio = width / height;
-    float xs = 2.0f / width;
-    float ys = 2.0f / height;
+    // l,t,r,b
+    float l = rect.x;
+    float t = rect.y;
+    float r = rect.z;
+    float b = rect.w;
+    
+    float dx = (r-l);
+    float dy = (t-b);
+    
+    float m00 = 2.0f / dx;
+    float m11 = 2.0f / dy;
     
-    float xoff = 0.0f;  // -0.5f * width;
-    float yoff = 0.0f;  // -0.5f * height;
+    float m03 = (r+l) / dx;
+    float m13 = (t+b) / dy;
     
     float dz = -(farZ - nearZ);
     float zs = 1.0f / dz;
@@ -1026,16 +1024,14 @@ float4x4 orthographic_rhcs(float width, float height, float nearZ, float farZ)
     float m23 = zs * nearZ;
     
     // revZ, can't use infiniteZ with ortho view
-    //if (isReverseZ) {
-        m22 = -m22;
-        m23 = 1.0f - m23;
-    //}
-    
+    m22 = -m22;
+    m23 = 1.0f - m23;
+
     float4x4 m(
-        (float4){xs, 0, 0, 0},
-        (float4){0, ys, 0, 0},
-        (float4){0, 0, m22, 0},
-        (float4){xoff, yoff, m23, 1}
+        (float4){m00,   0,   0, 0},
+        (float4){  0, m11,   0, 0},
+        (float4){  0,   0, m22, 0},
+        (float4){m03, m13, m23, 1}
     );
     return m;
 }
diff --git a/libkram/vectormath/float234.h b/libkram/vectormath/float234.h
index 74961fbe..969ba850 100644
--- a/libkram/vectormath/float234.h
+++ b/libkram/vectormath/float234.h
@@ -808,13 +808,25 @@ struct quatf {
     static const quatf& zero();
     static const quatf& identity();
     
+    // image = axis * sin(theta/2), real = cos(theta/2)
+    const float3& imag() const { return as_float3(v); }
+    float real() const { return v.w; }
+    
     float4 v;
 };
 
+// this is conjugate, so only axis is inverted
+SIMD_CALL quatf operator-(quatf q) {
+    float4 qv = q.v;
+    qv.xyz = -qv.xyz;
+    return quatf(qv);
+}
+
 SIMD_CALL float3 operator*(quatf q, float3 v) {
+    // see https://fgiesen.wordpress.com/2019/02/09/rotating-a-single-vector-using-a-quaternion/
     float4 qv = q.v;
-    float3 t = qv.w * cross(qv.xyz, v);
-    return v + 2.0f * t + cross(q.v.xyz, t);
+    float3 t = 2.0f * cross(qv.xyz, v);
+    return v + qv.w * t + cross(qv.xyz, t);
 }
 
 float4x4 float4x4m(quatf q);
@@ -827,7 +839,7 @@ float4x4 float4x4m(quatf q);
 
 SIMD_CALL quatf lerp(quatf q0, quatf q1, float t) {
     if (dot(q0.v, q1.v) < 0.0f)
-        q1.v.xyz = -q1.v.xyz;
+        q1 = -q1; // conjugate
     
     float4 v = lerp(q0.v, q1.v, t);
     return quatf(v);
@@ -872,19 +884,20 @@ SIMD_CALL float3 decompose_scale(const float4x4& m) {
                    length(m[2]));
 }
 SIMD_CALL float decompose_scale_max(const float4x4& m) {
-    return reduce_max(decomposeScale(m));
+    return reduce_max(decompose_scale(m));
 }
 
 
 float3x3 float3x3m(quatf qq);
 
-float4x4 float4x4_tr(float3 t, quatf r);
-float4x4 float4x4_trs(float3 t, quatf r, float3 scale);
-float4x4 float4x4_tru(float3 t, quatf r, float scale);
+// m in here?
+float4x4 float4x4m_tr(float3 t, quatf r);
+float4x4 float4x4m_tru(float3 t, quatf r, float scale);
+float4x4 float4x4m_trs(float3 t, quatf r, float3 scale);
 
 float4x4 perspective_rhcs(float fovyRadians, float aspectXtoY, float nearZ, float farZ = FLT_MAX);
 float4x4 perspective_rhcs(float4 tangents, float nearZ, float farZ = FLT_MAX);
-float4x4 orthographic_rhcs(float width, float height, float nearZ, float farZ);
+float4x4 orthographic_rhcs(float4 rect, float nearZ, float farZ);
 
 SIMD_CALL float4x4 rotation(float3 axis, float radians) {
     quatf q(axis, radians);

From fc4d013f96bc98bb02c4d357f019ede8973e949e Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 10 Oct 2024 19:58:13 -0700
Subject: [PATCH 788/901] kram - simd - more affine and culling, add modulemap

Note that clang modules aren't building.
---
 build2/vectormath.xcodeproj/project.pbxproj |  10 ++
 libkram/kram/KramConfig.h                   |  13 +-
 libkram/vectormath/bounds234.cpp            | 147 +++++++++++++++-----
 libkram/vectormath/bounds234.h              |  39 ++++--
 libkram/vectormath/float234.cpp             |  17 ++-
 libkram/vectormath/float234.h               |   4 +
 libkram/vectormath/module.modulemap         |  16 +++
 7 files changed, 190 insertions(+), 56 deletions(-)
 create mode 100644 libkram/vectormath/module.modulemap

diff --git a/build2/vectormath.xcodeproj/project.pbxproj b/build2/vectormath.xcodeproj/project.pbxproj
index de6527d3..f1825618 100644
--- a/build2/vectormath.xcodeproj/project.pbxproj
+++ b/build2/vectormath.xcodeproj/project.pbxproj
@@ -45,6 +45,7 @@
 		701AF1912CAD27CB00BD0886 /* libvectormath-ios.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = "libvectormath-ios.a"; sourceTree = BUILT_PRODUCTS_DIR; };
 		70570FE42CB378E7005692BB /* bounds234.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = bounds234.h; sourceTree = "<group>"; };
 		70570FE72CB37997005692BB /* bounds234.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = bounds234.cpp; sourceTree = "<group>"; };
+		70570FEF2CB8C5C6005692BB /* module.modulemap */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.module-map"; path = module.modulemap; sourceTree = "<group>"; };
 		70B686F42CAD1026007ACA58 /* libvectormath.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libvectormath.a; sourceTree = BUILT_PRODUCTS_DIR; };
 		70B686FB2CAD1072007ACA58 /* double234.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = double234.h; sourceTree = "<group>"; };
 		70B686FC2CAD1072007ACA58 /* double234.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = double234.cpp; sourceTree = "<group>"; };
@@ -101,6 +102,7 @@
 		70B6870A2CAD1072007ACA58 /* vectormath */ = {
 			isa = PBXGroup;
 			children = (
+				70570FEF2CB8C5C6005692BB /* module.modulemap */,
 				70B687042CAD1072007ACA58 /* README.md */,
 				70B686FB2CAD1072007ACA58 /* double234.h */,
 				70B686FC2CAD1072007ACA58 /* double234.cpp */,
@@ -325,6 +327,8 @@
 				CLANG_X86_VECTOR_INSTRUCTIONS = avx2;
 				COPY_PHASE_STRIP = NO;
 				DEBUG_INFORMATION_FORMAT = dwarf;
+				DEFINES_MODULE = YES;
+				ENABLE_MODULE_VERIFIER = YES;
 				ENABLE_STRICT_OBJC_MSGSEND = YES;
 				ENABLE_TESTABILITY = YES;
 				ENABLE_USER_SCRIPT_SANDBOXING = YES;
@@ -345,6 +349,8 @@
 				IPHONEOS_DEPLOYMENT_TARGET = 15.0;
 				LOCALIZATION_PREFERS_STRING_CATALOGS = YES;
 				MACOSX_DEPLOYMENT_TARGET = 13.0;
+				MODULEMAP_FILE = ../libkram/vectormath/;
+				MODULE_VERIFIER_SUPPORTED_LANGUAGES = "objective-c c++";
 				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
 				MTL_FAST_MATH = YES;
 				ONLY_ACTIVE_ARCH = YES;
@@ -393,6 +399,8 @@
 				CLANG_X86_VECTOR_INSTRUCTIONS = avx2;
 				COPY_PHASE_STRIP = NO;
 				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+				DEFINES_MODULE = YES;
+				ENABLE_MODULE_VERIFIER = YES;
 				ENABLE_NS_ASSERTIONS = NO;
 				ENABLE_STRICT_OBJC_MSGSEND = YES;
 				ENABLE_USER_SCRIPT_SANDBOXING = YES;
@@ -407,6 +415,8 @@
 				IPHONEOS_DEPLOYMENT_TARGET = 15.0;
 				LOCALIZATION_PREFERS_STRING_CATALOGS = YES;
 				MACOSX_DEPLOYMENT_TARGET = 13.0;
+				MODULEMAP_FILE = ../libkram/vectormath/;
+				MODULE_VERIFIER_SUPPORTED_LANGUAGES = "objective-c c++";
 				MTL_ENABLE_DEBUG_INFO = NO;
 				MTL_FAST_MATH = YES;
 				OTHER_CFLAGS = (
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index a7c1d543..33b8255f 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -199,14 +199,25 @@
 #if KRAM_MAC || KRAM_IOS
 // can use old or new
 #define USE_SIMDLIB 1
+
+// maybe this doesn't work with C++ pch,
+#define USE_SIMDLIBMODULE 0
 #else
-// have to use new
+// have to use new on all other platforms
 #define USE_SIMDLIB 1
+#define USE_SIMDLIBMODULE 0
 #endif
 
 #if USE_SIMDLIB
+
 // new vector math
+#if USE_SIMDLIBMODULE
+// import this as a clang module now
+import vectormath
+#else
 #include "vectormath234.h"
+#endif
+
 #else
 // old vector math, using simd/simd.h
 #include "float4a.h"
diff --git a/libkram/vectormath/bounds234.cpp b/libkram/vectormath/bounds234.cpp
index 12940eb8..20cbfac6 100644
--- a/libkram/vectormath/bounds234.cpp
+++ b/libkram/vectormath/bounds234.cpp
@@ -4,11 +4,14 @@
 
 namespace SIMD_NAMESPACE {
 
-culler::culler(const float4x4& projView) {
+culler::culler(): _planeCount(0) {
+}
+
+void culler::update(const float4x4& projView) {
     // build a worldspace frustum
     // https://fgiesen.wordpress.com/2010/10/17/view-frustum-culling/
     // but don't test farZ plane if infFarZ
-
+    
     float4x4 m = transpose(projView);
     const float4& x = m[0];
     const float4& y = m[1];
@@ -42,6 +45,36 @@ culler::culler(const float4x4& projView) {
     for (int i = 0; i < _planeCount; ++i) {
         _selectionMasks[i] = _planes[i] < 0;
     }
+    
+    // Nathan Reed - If you represent the frustum corners in homogeneous coordinates,
+    // with w=0 for points at infinity, this just falls out of the usual
+    // point vs plane test, where you dot the homogeneous point against the plane equation.
+    
+    // generate 8 corners of frustum from the inverse
+    float4x4 projViewInv = inverse(projView); // TODO: can pass down
+    float nearClip = 1;
+    
+    // inset so division can occur
+    float farClip  = isInfFarPlane ? 1e-6f : 0;
+    
+    static float4 clipCorners[8] = {
+        {-1,-1,nearClip,1},
+        {-1, 1,nearClip,1},
+        { 1,-1,nearClip,1},
+        { 1, 1,nearClip,1},
+        
+        {-1,-1,farClip,1},
+        {-1, 1,farClip,1},
+        { 1,-1,farClip,1},
+        { 1, 1,farClip,1},
+    };
+    
+    // These are homogenous coords, so w may be 0
+    for (int i = 0; i < 8; ++i) {
+        float4 cornerHomog = projViewInv * clipCorners[i];
+        _corners[i] = cornerHomog / cornerHomog.w;
+        _corners[i].w = 1;
+    }
 }
 
 bool culler::cullBox(float3 min, float3 max) const {
@@ -92,23 +125,58 @@ void culler::cullBoxes(const float3* boxes, int count, uint8_t* results) const {
         float3 min = boxes[2*i];
         float3 max = boxes[2*i+1];
         
-        results[i] = cullBox(min, max);
+        if (cullBox(min, max))
+            results[i] |= 1;
     }
 }
 
 void culler::cullSpheres(const float4* sphere, int count, uint8_t* results) const {
-    for(int i = 0; i < count; ++i) {
-        results[i] = cullSphere(sphere[i]);
+    for (int i = 0; i < count; ++i) {
+        if (cullSphere(sphere[i]))
+            results[i] |= 1;
+    }
+}
+    
+bool culler::isFrustumInBox(bbox box) const {
+    // See if all 8 verts of the frustum are in the box.
+    // This becomes a false negative for non-inf far (skips box while inside)
+    const float3* corners = frustumCorners();
+    
+    int3 count = 0;
+    for (int i = 0; i < 8; ++i) {
+        float3 c = corners[i];
+        count += c >= box.min &
+                 c <= box.max;
     }
+    
+    // high-bit set is -1
+    return all(count == (int3)-8);
 }
+
+bool culler::isFrustumOutsideBox(bbox box) const {
+    // See if all 8 verts of the frustum are outside box.
+    // This becomes a false positive (draws box even though outside)
+    const float3* corners = frustumCorners();
+                                        
+    int3 countMin = 0;
+    int3 countMax = 0;
+    for (int i = 0; i < 8; ++i) {
+        float3 c = corners[i];
+        countMin += c < box.min;
+        countMax += c > box.max;
+    }
     
-bsphere culler::transformSphereTRU(bsphere sphere, const float4x4& modelTfm) {
+    // high-bit set is -1
+    return any(countMin == (int3)-8 | countMax == (int3)-8);
+}
+
+bsphere culler::transformSphereTRS(bsphere sphere, const float4x4& modelTfm) {
     // May be better to convert to box with non-uniform scale
     // sphere gets huge otherwise.  Cache these too.
     
 #if 1
     // not sure which code is smaller, still have to add t
-    float size = reduce_max(decomposeScale(modelTfm));
+    float size = decompose_scale_max(modelTfm);
     float radius = sphere.radius() * size;
     float4 sphereCenter = float4m(sphere.center(), 1);
     sphereCenter = modelTfm * sphereCenter;
@@ -120,7 +188,7 @@ bsphere culler::transformSphereTRU(bsphere sphere, const float4x4& modelTfm) {
     const float3x3& m = as_float3x3(modelTfm);
     float3 t = m[3];
     
-    float size = reduce_max(decomposeScale(modelTfm));
+    float size = decompose_scale_max(modelTfm);
     float radius = sphere.radius() * size;
     float3 sphereCenter = m * sphere.center();
     sphereCenter += t;
@@ -130,30 +198,56 @@ bsphere culler::transformSphereTRU(bsphere sphere, const float4x4& modelTfm) {
 #endif
 }
 
-bbox culler::transformBoxTRS(bbox box, const float4x4& modelTfm) {
-    // Woth doing on cpu and caching.  So can still process an array
-    // but should transform only ones thatt didn't change transform or bound.
+// Note: if doing infFar, may want float4 in homogenous space w = 0
+// then the points are accurate.
     
-#if 0
-    // This is for a full general 4x4, but want a simpler affine version
+void culler::boxCorners(bbox box, float3 pt[8]) const {
+    // TODO: fix these so order is 000 to 111 in bits
+    
+    float3 min1 = box.min;
+    float3 max1 = box.max;
+    
+    pt[0] = min1;
+    pt[1] = max1;
+    
+    pt[2] = float3m(min1.xy, max1.z);
+    pt[3] = float3m(max1.xy, min1.z);
+    
+    pt[4] = min1; pt[4].y = max1.y;
+    pt[5] = max1; pt[5].x = min1.x;
+    
+    pt[6] = max1; pt[6].y = min1.y;
+    pt[7] = min1; pt[7].x = max1.x;
+}
+
+void culler::boxCorners(bbox box, float4 pt[8]) const {
     float4 min1 = float4m(box.min, 1);
     float4 max1 = float4m(box.max, 1);
     
-    // convert the box to 8 pts first
-    float4 pt[8];
-   
     pt[0] = min1;
     pt[1] = max1;
-        
+    
     pt[2] = float4m(min1.xy, max1.zw);
     pt[3] = float4m(max1.xy, min1.zw);
     
     pt[4] = min1; pt[4].y = max1.y; // float4m(min1.x, max1.y, min1.zw),
     pt[5] = max1; pt[5].x = min1.x; // float4m(min1.x, max1.yzw),
-        
+    
     pt[6] = max1; pt[6].y = min1.y; // float4m(max1.x, min1.y, max1.zw),
     pt[7] = min1; pt[7].x = max1.x; // float4m(max1.x, min1.yzw),
+}
+
+
+bbox culler::transformBoxTRS(bbox box, const float4x4& modelTfm) {
+    // Woth doing on cpu and caching.  So can still process an array
+    // but should transform only ones thatt didn't change transform or bound.
     
+#if 0
+    // This is for a full general 4x4, but want a simpler affine version
+    // convert the box to 8 pts first
+    float4 pt[8];
+    boxCorners(box, pt)
+
     box.setInvalid();
     for (int i = 0; i < 8; ++i) {
         float3 v = (modelTfm * pt[i]).xyz;
@@ -161,29 +255,14 @@ bbox culler::transformBoxTRS(bbox box, const float4x4& modelTfm) {
     }
     
 #elif 0
-    
-    float3 min1 = box.min;
-    float3 max1 = box.max;
-    
     // really just a 3x3 and translation
     const float3x3& m = as_float3x3(modelTfm);
     float3 t = m[3];
     
     // convert the box to 8 pts first
     float3 pt[8];
+    boxCorners(box, ptr);
    
-    pt[0] = min1;
-    pt[1] = max1;
-        
-    pt[2] = float3m(min1.xy, max1.z);
-    pt[3] = float3m(max1.xy, min1.z);
-    
-    pt[4] = min1; pt[4].y = max1.y;
-    pt[5] = max1; pt[5].x = min1.x;
-        
-    pt[6] = max1; pt[6].y = min1.y;
-    pt[7] = min1; pt[7].x = max1.x;
-    
     box.setInvalid();
     for (int i = 0; i < 8; ++i) {
         float3 v = m * pt[i];
diff --git a/libkram/vectormath/bounds234.h b/libkram/vectormath/bounds234.h
index ae2ab45d..ac8bd57f 100644
--- a/libkram/vectormath/bounds234.h
+++ b/libkram/vectormath/bounds234.h
@@ -8,7 +8,7 @@
 
 namespace SIMD_NAMESPACE {
 
-// TODO: may want a 2d box/rect as well
+// TODO: may want a rect, circle, capsule as well.
 
 struct bbox {
     bbox() {} // nothing
@@ -76,9 +76,12 @@ struct bsphere {
 // Fast cpu culler per frustum.  Easy port to gpu which can do occlusion.
 // This only tests 5 or 6 planes.
 struct culler {
-    culler(const float4x4& projView);
+    culler();
+    
+    void update(const float4x4& projView);
     
     // TODO: should pass bitmask instead of uint8_t array
+    // caller must zero the results array, and visible state sets only low bit
     
     void cullBoxes(const float3* boxes, int count, uint8_t* results) const;
     void cullSpheres(const float4* sphere, int count, uint8_t* results) const;
@@ -100,27 +103,33 @@ struct culler {
         return cullSphere(sphere.centerRadius);
     }
     
-    // TODO: move this to vectormath affine ops
-    static float decomposeSize(const float4x4& m) {
-        return length(m[0]);
+    // should probably move these
+    static bsphere transformSphereTRS(bsphere sphere, const float4x4& modelTfm);
+    static bbox transformBoxTRS(bbox box, const float4x4& modelTfm);
+    
+    void boxCorners(bbox box, float3 pt[8]) const;
+    void boxCorners(bbox box, float4 pt[8]) const;
+   
+    bool isFrustumInBox(bbox box) const;
+    bool isFrustumOutsideBox(bbox box) const;
+
+    const float4* frustumCorners4() const {
+        return _corners;
     }
-    static float3 decomposeScale(const float4x4& m) {
-        // TODO: this length is unsigned, so need to fix that for inversion
-        return float3m(length(m[0]),
-                       length(m[1]),
-                       length(m[2]));
+    const float3* frustumCorners() const {
+        return as_float3(_corners);
     }
-    
-    bsphere transformSphereTRU(bsphere sphere, const float4x4& modelTfm);
-    bbox transformBoxTRS(bbox box, const float4x4& modelTfm);
-    
+
 private:
     float4 _planes[6];
     // This won't work if SIMD_INT is not defined.
 #if SIMD_INT
     int4 _selectionMasks[6];
 #endif
-    uint32_t _planeCount = 0;
+    uint32_t _planeCount;
+    
+    // 8 corners of frustum
+    float4 _corners[8];
 };
 
 } // namespace SIMD_NAMESPACE
diff --git a/libkram/vectormath/float234.cpp b/libkram/vectormath/float234.cpp
index f9bad342..460bfad8 100644
--- a/libkram/vectormath/float234.cpp
+++ b/libkram/vectormath/float234.cpp
@@ -861,19 +861,24 @@ float4x4 float4x4m_tr(float3 t, quatf r) {
 
 // TODO: there are faster ways to apply post rot, post scale
 float4x4 float4x4m_trs(float3 t, quatf r, float3 scale) {
-    float4x4 m(float4x4::identity());
-    m[3].xyz = t;
-    m = m * float4x4m(r);
-    
-    m *= float4x4(float4m(scale,1.0f));
-    return m;
+    return translation(t) * float4x4m(r) * float4x4(float4m(scale,1.0f));
 }
 
+float4x4 float4x4m_inverse_trs(float3 t, quatf r, float3 scale) {
+    // 1/S * RT * -T
+    return float4x4(recip(float4m(scale,1.0f))) * transpose(float4x4m(r)) * translation(-t);
+}
+
+
 // leaving this in here, since it can be further optimized
 float4x4 float4x4m_tru(float3 t, quatf r, float scale) {
     return float4x4m_trs(t, r, float3m(scale));
 }
 
+float4x4 float4x4m_tru_inverse(float3 t, quatf r, float scale) {
+    return float4x4m_inverse_trs(t, r, float3m(scale));
+}
+    
 float4x4 inverse_trs(const float4x4& mtx)
 {
     bool success = false;
diff --git a/libkram/vectormath/float234.h b/libkram/vectormath/float234.h
index 969ba850..1dc3422e 100644
--- a/libkram/vectormath/float234.h
+++ b/libkram/vectormath/float234.h
@@ -895,6 +895,10 @@ float4x4 float4x4m_tr(float3 t, quatf r);
 float4x4 float4x4m_tru(float3 t, quatf r, float scale);
 float4x4 float4x4m_trs(float3 t, quatf r, float3 scale);
 
+// can build inverse from same data
+float4x4 float4x4m_trs(float3 t, quatf r, float3 scale);
+float4x4 float4x4m_inverse_trs(float3 t, quatf r, float3 scale);
+
 float4x4 perspective_rhcs(float fovyRadians, float aspectXtoY, float nearZ, float farZ = FLT_MAX);
 float4x4 perspective_rhcs(float4 tangents, float nearZ, float farZ = FLT_MAX);
 float4x4 orthographic_rhcs(float4 rect, float nearZ, float farZ);
diff --git a/libkram/vectormath/module.modulemap b/libkram/vectormath/module.modulemap
new file mode 100644
index 00000000..2bfa82f5
--- /dev/null
+++ b/libkram/vectormath/module.modulemap
@@ -0,0 +1,16 @@
+module vectormath {
+    // All headers are pulled in by this.
+    
+    // This defaults to namespace simdk
+    header "vectormath234.h"
+    
+    // These aren't yet independent includes.
+    // header "int234.h"
+    // header "long234.h"
+    // header "float234.h"
+    // header "double234.h"
+    // header "bounds234.h"
+    
+    export *
+}
+

From d5884ab6036a80fdb5224118c2183523eb303979 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 10 Oct 2024 20:25:05 -0700
Subject: [PATCH 789/901] kram - fix build

---
 kramv/KramViewerBase.cpp | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index 5d0e810d..5dd569b9 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -2933,8 +2933,13 @@ void Data::updateProjTransform()
         _showSettings->zoomFit = 1;
     }
     else {
+        // ltrb
+        float2 rectDims = 0.5f * float2m(_showSettings->viewSizeX,_showSettings->viewSizeY);
+        float4 rect = float4m(-rectDims.x,  rectDims.y,
+                               rectDims.x, -rectDims.y);
+        
         _projectionMatrix =
-            orthographic_rhcs(_showSettings->viewSizeX, _showSettings->viewSizeY, 0.1f, 100000.0f);
+            orthographic_rhcs(rect, 0.1f, 1e6f);
 
         // DONE: adjust zoom to fit the entire image to the window
         _showSettings->zoomFit =

From c30e0b01bd0c84dc810db8071505fa826f00dfdc Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 10 Oct 2024 21:29:58 -0700
Subject: [PATCH 790/901] kram - simd - cull cleanup

---
 libkram/vectormath/bounds234.cpp | 14 +++++++-------
 libkram/vectormath/bounds234.h   | 15 +++++++++------
 2 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/libkram/vectormath/bounds234.cpp b/libkram/vectormath/bounds234.cpp
index 20cbfac6..e82648b0 100644
--- a/libkram/vectormath/bounds234.cpp
+++ b/libkram/vectormath/bounds234.cpp
@@ -4,7 +4,7 @@
 
 namespace SIMD_NAMESPACE {
 
-culler::culler(): _planeCount(0) {
+culler::culler(): _planesCount(0) {
 }
 
 void culler::update(const float4x4& projView) {
@@ -39,10 +39,10 @@ void culler::update(const float4x4& projView) {
     
     // anyway to always use 6 for unrolling?
     // f.e. above use 0,0,-1,FLT_MAX, instead of 0
-    _planeCount = isInfFarPlane ? 5 : 6;
+    _planesCount = isInfFarPlane ? 5 : 6;
     
     // select min or max based on normal direction
-    for (int i = 0; i < _planeCount; ++i) {
+    for (int i = 0; i < _planesCount; ++i) {
         _selectionMasks[i] = _planes[i] < 0;
     }
     
@@ -95,11 +95,11 @@ bool culler::cullBox(float3 min, float3 max) const {
     // test the min/max against the x planes
     int count = 0;
     
-    for (int i = 0; i < _planeCount; ++i) {
+    for (int i = 0; i < _planesCount; ++i) {
         count += dot(_planes[i], select(min1, max1, _selectionMasks[i])) > 0;
     }
             
-    return count == _planeCount;
+    return count == _planesCount;
 }
             
 bool culler::cullSphere(float4 sphere) const {
@@ -112,11 +112,11 @@ bool culler::cullSphere(float4 sphere) const {
     float radius = sphere.w;
     
     int count = 0;
-    for (int i = 0; i < _planeCount; ++i) {
+    for (int i = 0; i < _planesCount; ++i) {
         count += dot(_planes[i], sphere1) > radius;
     }
                      
-    return count == _planeCount;
+    return count == _planesCount;
 }
             
 void culler::cullBoxes(const float3* boxes, int count, uint8_t* results) const {
diff --git a/libkram/vectormath/bounds234.h b/libkram/vectormath/bounds234.h
index ac8bd57f..391332e6 100644
--- a/libkram/vectormath/bounds234.h
+++ b/libkram/vectormath/bounds234.h
@@ -113,20 +113,23 @@ struct culler {
     bool isFrustumInBox(bbox box) const;
     bool isFrustumOutsideBox(bbox box) const;
 
-    const float4* frustumCorners4() const {
-        return _corners;
-    }
-    const float3* frustumCorners() const {
+    // Camera corners in world space
+    const float3* cameraCorners() const {
         return as_float3(_corners);
     }
-
+    int cameraCornersCount() const { return 8; }
+    
+    // Camera clip planes in world space
+    const float4* cameraPlanes() const { return _planes; }
+    int cameraPlanesCount() const { return _planesCount; }
+    
 private:
     float4 _planes[6];
     // This won't work if SIMD_INT is not defined.
 #if SIMD_INT
     int4 _selectionMasks[6];
 #endif
-    uint32_t _planeCount;
+    uint32_t _planesCount;
     
     // 8 corners of frustum
     float4 _corners[8];

From ecf723097431b8272d70673739c87a0372784c29 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 11 Oct 2024 09:25:07 -0700
Subject: [PATCH 791/901] kram - simd - culler cleanup, added shift and skip
 bits

---
 libkram/vectormath/bounds234.cpp | 36 ++++++++++++++++++---------
 libkram/vectormath/bounds234.h   | 42 ++++++++++++++++++--------------
 2 files changed, 49 insertions(+), 29 deletions(-)

diff --git a/libkram/vectormath/bounds234.cpp b/libkram/vectormath/bounds234.cpp
index e82648b0..88fa4185 100644
--- a/libkram/vectormath/bounds234.cpp
+++ b/libkram/vectormath/bounds234.cpp
@@ -8,7 +8,7 @@ culler::culler(): _planesCount(0) {
 }
 
 void culler::update(const float4x4& projView) {
-    // build a worldspace frustum
+    // build a worldspace cameraa volume
     // https://fgiesen.wordpress.com/2010/10/17/view-frustum-culling/
     // but don't test farZ plane if infFarZ
     
@@ -50,7 +50,7 @@ void culler::update(const float4x4& projView) {
     // with w=0 for points at infinity, this just falls out of the usual
     // point vs plane test, where you dot the homogeneous point against the plane equation.
     
-    // generate 8 corners of frustum from the inverse
+    // generate 8 corners of camera volume from the inverse
     float4x4 projViewInv = inverse(projView); // TODO: can pass down
     float nearClip = 1;
     
@@ -119,28 +119,42 @@ bool culler::cullSphere(float4 sphere) const {
     return count == _planesCount;
 }
             
-void culler::cullBoxes(const float3* boxes, int count, uint8_t* results) const {
+void culler::cullBoxes(const float3* boxes, int count, uint8_t shift, uint8_t* results) const {
     // box array is 2x count
+    uint8_t bit = (1 << shift);
+    uint8_t skipBit = (1 << 7);
+   
     for (int i = 0; i < count; ++i) {
+        uint8_t& res8 = results[i];
+        if ((res8 & skipBit) != 0)
+            continue;
+        
         float3 min = boxes[2*i];
         float3 max = boxes[2*i+1];
         
         if (cullBox(min, max))
-            results[i] |= 1;
+            res8 |= bit;
     }
 }
 
-void culler::cullSpheres(const float4* sphere, int count, uint8_t* results) const {
+void culler::cullSpheres(const float4* sphere, int count, uint8_t shift, uint8_t* results) const {
+    uint8_t bit = (1 << shift);
+    uint8_t skipBit = (1 << 7);
+   
     for (int i = 0; i < count; ++i) {
+        uint8_t& res8 = results[i];
+        if ((res8 & skipBit) != 0)
+            continue;
+        
         if (cullSphere(sphere[i]))
-            results[i] |= 1;
+            res8 |= bit;
     }
 }
     
-bool culler::isFrustumInBox(bbox box) const {
+bool culler::isCameraInBox(bbox box) const {
     // See if all 8 verts of the frustum are in the box.
     // This becomes a false negative for non-inf far (skips box while inside)
-    const float3* corners = frustumCorners();
+    const float3* corners = cameraCorners();
     
     int3 count = 0;
     for (int i = 0; i < 8; ++i) {
@@ -153,10 +167,10 @@ bool culler::isFrustumInBox(bbox box) const {
     return all(count == (int3)-8);
 }
 
-bool culler::isFrustumOutsideBox(bbox box) const {
-    // See if all 8 verts of the frustum are outside box.
+bool culler::isCameraOutsideBox(bbox box) const {
+    // See if all 8 verts of the camera are outside box.
     // This becomes a false positive (draws box even though outside)
-    const float3* corners = frustumCorners();
+    const float3* corners = cameraCorners();
                                         
     int3 countMin = 0;
     int3 countMax = 0;
diff --git a/libkram/vectormath/bounds234.h b/libkram/vectormath/bounds234.h
index 391332e6..ea328636 100644
--- a/libkram/vectormath/bounds234.h
+++ b/libkram/vectormath/bounds234.h
@@ -80,38 +80,40 @@ struct culler {
     
     void update(const float4x4& projView);
     
-    // TODO: should pass bitmask instead of uint8_t array
-    // caller must zero the results array, and visible state sets only low bit
-    
-    void cullBoxes(const float3* boxes, int count, uint8_t* results) const;
-    void cullSpheres(const float4* sphere, int count, uint8_t* results) const;
-    
+    // can use the helper types instead
     bool cullSphere(float4 sphere) const;
     bool cullBox(float3 min, float3 max) const;
     
-    // can use the helper types instead
-    void cullBoxes(const bbox* boxes, int count, uint8_t* results) const {
-        cullBoxes((const float3*)boxes, count, results);
-    }
-    bool cullBoxes(const bbox& box) const {
+    bool cullBox(const bbox& box) const {
         return cullBox(box.min, box.max);
     }
-    void cullSpheres(const bsphere* spheres, int count, uint8_t* results) const {
-        cullSpheres((const float4*)spheres, count, results);
-    }
     bool cullSphere(const bsphere& sphere) const {
         return cullSphere(sphere.centerRadius);
     }
     
-    // should probably move these
+    // Caller must zero the results array, and visible state sets only low bit
+    // These store a bit (with shift) in the results array.
+    // If high bit is set in results, then test is skipped.
+    void cullBoxes(const float3* boxes, int count, uint8_t shift, uint8_t* results) const;
+    void cullSpheres(const float4* sphere, int count, uint8_t shift, uint8_t* results) const;
+    
+    void cullBoxes(const bbox* boxes, int count, uint8_t shift, uint8_t* results) const {
+        cullBoxes((const float3*)boxes, count, shift, results);
+    }
+    void cullSpheres(const bsphere* spheres, int count, uint8_t shift, uint8_t* results) const {
+        cullSpheres((const float4*)spheres, count, shift, results);
+    }
+    
+    // move these out?
     static bsphere transformSphereTRS(bsphere sphere, const float4x4& modelTfm);
     static bbox transformBoxTRS(bbox box, const float4x4& modelTfm);
     
+    // bbox corners
     void boxCorners(bbox box, float3 pt[8]) const;
     void boxCorners(bbox box, float4 pt[8]) const;
    
-    bool isFrustumInBox(bbox box) const;
-    bool isFrustumOutsideBox(bbox box) const;
+    bool isCameraInBox(bbox box) const;
+    bool isCameraOutsideBox(bbox box) const;
 
     // Camera corners in world space
     const float3* cameraCorners() const {
@@ -124,14 +126,18 @@ struct culler {
     int cameraPlanesCount() const { return _planesCount; }
     
 private:
+    // camera planes in world space
     float4 _planes[6];
+    
     // This won't work if SIMD_INT is not defined.
 #if SIMD_INT
+    // cached tests of which planes are positive/negative
     int4 _selectionMasks[6];
 #endif
+    
     uint32_t _planesCount;
     
-    // 8 corners of frustum
+    // 8 corners of camera volume
     float4 _corners[8];
 };
 

From 926a3352049f8f483c93331482982030c9fc0eb5 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 12 Oct 2024 15:20:13 -0700
Subject: [PATCH 792/901] kram - clang-format ignore

---
 _clang-format-ignore       |  38 +++++++++
 libkram/kram/_clang-format | 156 -------------------------------------
 2 files changed, 38 insertions(+), 156 deletions(-)
 create mode 100644 _clang-format-ignore
 delete mode 100644 libkram/kram/_clang-format

diff --git a/_clang-format-ignore b/_clang-format-ignore
new file mode 100644
index 00000000..03e6ec95
--- /dev/null
+++ b/_clang-format-ignore
@@ -0,0 +1,38 @@
+#comments look like this, so these dirs are processed
+
+gltf/*
+
+#hlslparser/*
+
+#kram-preview/*
+#kram-profile/*
+#kram-profile/CBA/*
+#kram-shader/*
+#kram-thumb/*
+#kram-thumb-win/*
+#kramc
+#kramv
+
+libkram/allocate/*
+libkram/astciencoder/*
+libkram/bc7enc/*
+libkram/cgltf/*
+libkram/compressonator/*
+libkram/eastl/*
+#libkram/etc2comp/*
+libkram/fastl/*
+libkram/fmt/*
+libkram/heman/*
+libkram/json11/*
+libkram/fastl/*
+#libkram/kram/*
+libkram/lodepng/*
+libkram/miniz/*
+libkram/simdjson/*
+libkram/squish/*
+libkram/tmpfileplus/*
+libkram/transcoder/*
+#libkram/vectormath/*
+libkram/zstd/*
+
+plugin/*
diff --git a/libkram/kram/_clang-format b/libkram/kram/_clang-format
deleted file mode 100644
index bb5f2bf3..00000000
--- a/libkram/kram/_clang-format
+++ /dev/null
@@ -1,156 +0,0 @@
----
-Language:        Cpp
-# BasedOnStyle:  Google
-AccessModifierOffset: -4
-AlignAfterOpenBracket: Align
-AlignConsecutiveAssignments: false
-AlignConsecutiveDeclarations: false
-AlignEscapedNewlines: Left
-AlignOperands:   true
-AlignTrailingComments: true
-AllowAllArgumentsOnNextLine: true
-AllowAllConstructorInitializersOnNextLine: true
-AllowAllParametersOfDeclarationOnNextLine: true
-AllowShortBlocksOnASingleLine: false
-AllowShortCaseLabelsOnASingleLine: false
-AllowShortFunctionsOnASingleLine: All
-AllowShortLambdasOnASingleLine: All
-AllowShortIfStatementsOnASingleLine: WithoutElse
-AllowShortLoopsOnASingleLine: true
-AlwaysBreakAfterDefinitionReturnType: None
-AlwaysBreakAfterReturnType: None
-AlwaysBreakBeforeMultilineStrings: true
-AlwaysBreakTemplateDeclarations: Yes
-BinPackArguments: true
-BinPackParameters: true
-BraceWrapping:   
-  AfterCaseLabel:  false
-  AfterClass:      false
-  AfterControlStatement: false
-  AfterEnum:       false
-  AfterFunction:   false
-  AfterNamespace:  false
-  AfterObjCDeclaration: false
-  AfterStruct:     false
-  AfterUnion:      false
-  AfterExternBlock: false
-  BeforeCatch:     false
-  BeforeElse:      false
-  IndentBraces:    false
-  SplitEmptyFunction: true
-  SplitEmptyRecord: true
-  SplitEmptyNamespace: true
-BreakBeforeBinaryOperators: None
-BreakBeforeBraces: Stroustrup
-BreakBeforeInheritanceComma: false
-BreakInheritanceList: BeforeColon
-BreakBeforeTernaryOperators: true
-BreakConstructorInitializersBeforeComma: false
-BreakConstructorInitializers: BeforeColon
-BreakAfterJavaFieldAnnotations: false
-BreakStringLiterals: true
-ColumnLimit:     0
-CommentPragmas:  '^ IWYU pragma:'
-CompactNamespaces: false
-ConstructorInitializerAllOnOneLineOrOnePerLine: true
-ConstructorInitializerIndentWidth: 4
-ContinuationIndentWidth: 4
-Cpp11BracedListStyle: true
-DerivePointerAlignment: false
-DisableFormat:   false
-ExperimentalAutoDetectBinPacking: false
-FixNamespaceComments: true
-ForEachMacros:   
-  - foreach
-  - Q_FOREACH
-  - BOOST_FOREACH
-IncludeBlocks:   Regroup
-IncludeCategories: 
-  - Regex:           '^<ext/.*\.h>'
-    Priority:        2
-  - Regex:           '^<.*\.h>'
-    Priority:        1
-  - Regex:           '^<.*'
-    Priority:        2
-  - Regex:           '.*'
-    Priority:        3
-IncludeIsMainRegex: '([-_](test|unittest))?$'
-IndentCaseLabels: true
-IndentPPDirectives: None
-IndentWidth:     4
-IndentWrappedFunctionNames: false
-JavaScriptQuotes: Leave
-JavaScriptWrapImports: true
-KeepEmptyLinesAtTheStartOfBlocks: false
-MacroBlockBegin: ''
-MacroBlockEnd:   ''
-MaxEmptyLinesToKeep: 1
-NamespaceIndentation: None
-ObjCBinPackProtocolList: Never
-ObjCBlockIndentWidth: 4
-ObjCSpaceAfterProperty: false
-ObjCSpaceBeforeProtocolList: true
-PenaltyBreakAssignment: 2
-PenaltyBreakBeforeFirstCallParameter: 1
-PenaltyBreakComment: 300
-PenaltyBreakFirstLessLess: 120
-PenaltyBreakString: 1000
-PenaltyBreakTemplateDeclaration: 10
-PenaltyExcessCharacter: 1000000
-PenaltyReturnTypeOnItsOwnLine: 200
-PointerAlignment: Left
-RawStringFormats: 
-  - Language:        Cpp
-    Delimiters:      
-      - cc
-      - CC
-      - cpp
-      - Cpp
-      - CPP
-      - 'c++'
-      - 'C++'
-    CanonicalDelimiter: ''
-    BasedOnStyle:    google
-  - Language:        TextProto
-    Delimiters:      
-      - pb
-      - PB
-      - proto
-      - PROTO
-    EnclosingFunctions: 
-      - EqualsProto
-      - EquivToProto
-      - PARSE_PARTIAL_TEXT_PROTO
-      - PARSE_TEST_PROTO
-      - PARSE_TEXT_PROTO
-      - ParseTextOrDie
-      - ParseTextProtoOrDie
-    CanonicalDelimiter: ''
-    BasedOnStyle:    google
-ReflowComments:  true
-SortIncludes:    true
-SortUsingDeclarations: true
-SpaceAfterCStyleCast: false
-SpaceAfterLogicalNot: false
-SpaceAfterTemplateKeyword: true
-SpaceBeforeAssignmentOperators: true
-SpaceBeforeCpp11BracedList: false
-SpaceBeforeCtorInitializerColon: true
-SpaceBeforeInheritanceColon: true
-SpaceBeforeParens: ControlStatements
-SpaceBeforeRangeBasedForLoopColon: true
-SpaceInEmptyParentheses: false
-SpacesBeforeTrailingComments: 2
-SpacesInAngles:  false
-SpacesInContainerLiterals: true
-SpacesInCStyleCastParentheses: false
-SpacesInParentheses: false
-SpacesInSquareBrackets: false
-Standard:        Auto
-StatementMacros: 
-  - Q_UNUSED
-  - QT_REQUIRE_VERSION
-TabWidth:        4
-UseTab:          Never
-...
-

From 3a1b00a8808cb020f83b14b4fdb36c4aaaf71e82 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 12 Oct 2024 15:21:00 -0700
Subject: [PATCH 793/901] kram - simd - a few more ops

---
 libkram/vectormath/float234.h        | 12 ++++++++++
 libkram/vectormath/int234.h          | 34 ++++++++++++++++++++++++++++
 libkram/vectormath/vectormath234.cpp |  3 +++
 3 files changed, 49 insertions(+)

diff --git a/libkram/vectormath/float234.h b/libkram/vectormath/float234.h
index 1dc3422e..4ecfc612 100644
--- a/libkram/vectormath/float234.h
+++ b/libkram/vectormath/float234.h
@@ -279,6 +279,11 @@ SIMD_CALL float2 lerp(float2 x, float2 y, float2 t) { return x + t*(y - x); }
 SIMD_CALL float3 lerp(float3 x, float3 y, float3 t) { return x + t*(y - x); }
 SIMD_CALL float4 lerp(float4 x, float4 y, float4 t) { return x + t*(y - x); }
 
+SIMD_CALL float2 lerp(float2 x, float2 y, float t) { return x + t*(y - x); }
+SIMD_CALL float3 lerp(float3 x, float3 y, float t) { return x + t*(y - x); }
+SIMD_CALL float4 lerp(float4 x, float4 y, float t) { return x + t*(y - x); }
+
+
 // dot
 SIMD_CALL float dot(float2 x, float2 y) { return reduce_add(x * y); }
 SIMD_CALL float dot(float3 x, float3 y) { return reduce_add(x * y); }
@@ -829,6 +834,13 @@ SIMD_CALL float3 operator*(quatf q, float3 v) {
     return v + qv.w * t + cross(qv.xyz, t);
 }
 
+SIMD_CALL bool equal(quatf x, quatf y) {
+    return all(x.v == y.v);
+}
+SIMD_CALL bool operator==(quatf x, quatf y) {
+    return all(x.v == y.v);
+}
+
 float4x4 float4x4m(quatf q);
 
 // how many quatf ops are needed?
diff --git a/libkram/vectormath/int234.h b/libkram/vectormath/int234.h
index ca528bc9..4750ea61 100644
--- a/libkram/vectormath/int234.h
+++ b/libkram/vectormath/int234.h
@@ -26,6 +26,13 @@ namespace SIMD_NAMESPACE {
 
 macroVector4TypesStorageRenames(int, int)
 
+SIMD_CALL int4 zeroext(int2 x) {
+    int4 v = 0; v.xy = x; return v;
+}
+SIMD_CALL int4 zeroext(int3 x) {
+    int4 v = 0; v.xyz = x; return v;
+}
+
 //-----------------------------------
 // imlementation - only code simd arch specific
 
@@ -45,6 +52,20 @@ SIMD_CALL bool all(int4 x) {
     return vminvq_u32(x) & 0x80000000;
 }
 
+SIMD_CALL int reduce_add(int2 x) {
+    x = vpadd_s32(x, x);
+    return x.x; // repeat x to all values
+}
+SIMD_CALL int reduce_add(int4 x) {
+    // 4:1 reduction
+    x = vpaddq_s32(x, x); // xy = x+y,z+w
+    x = vpaddq_s32(x, x); // x  = x+y
+    return x.x; // repeat x to all values
+}
+SIMD_CALL int reduce_add(int3 x) {
+    return reduce_add(zeroext(x));
+}
+
 #endif // SIMD_NEON
 
 // These take in int types, this is what comparison gens from a < b, etc.
@@ -63,6 +84,19 @@ SIMD_CALL bool all(int2 x) {
 SIMD_CALL bool all(int4 x) {
     return _mm_movemask_ps((__m128)x) == 0xf; // 4 bits
 }
+
+// TODO: need SSE ops for this,
+SIMD_CALL int reduce_add(int4 x) {
+    int2 r = x.lo + x.hi;
+    return r.x + r.y;
+}
+SIMD_CALL int reduce_add(int2 x) {
+    return x.x + x.y;
+}
+SIMD_CALL int reduce_add(int3 x) {
+    return x.x + x.y + x.z;
+}
+
 #endif // SIMD_SSE
        
 // any-all
diff --git a/libkram/vectormath/vectormath234.cpp b/libkram/vectormath/vectormath234.cpp
index b12c7dbb..d931b3d1 100644
--- a/libkram/vectormath/vectormath234.cpp
+++ b/libkram/vectormath/vectormath234.cpp
@@ -89,6 +89,9 @@
 // older but good talk on simd
 // https://people.inf.ethz.ch/markusp/teaching/263-2300-ETH-spring14/slides/11-simd.pdf
 
+// another article
+// https://www.cs.uaf.edu/courses/cs441/notes/sse-avx/
+
 // aarch64
 // https://en.wikipedia.org/wiki/AArch64
 

From 7ba42e4a51606f106f0adf209d9cbbc98cd698aa Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 12 Oct 2024 15:33:46 -0700
Subject: [PATCH 794/901] kram - more clang-format

---
 _clang-format-ignore | 3 +--
 scripts/pre-commit   | 4 +++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/_clang-format-ignore b/_clang-format-ignore
index 03e6ec95..f6ca734e 100644
--- a/_clang-format-ignore
+++ b/_clang-format-ignore
@@ -14,7 +14,7 @@ gltf/*
 #kramv
 
 libkram/allocate/*
-libkram/astciencoder/*
+libkram/astcencoder/*
 libkram/bc7enc/*
 libkram/cgltf/*
 libkram/compressonator/*
@@ -24,7 +24,6 @@ libkram/fastl/*
 libkram/fmt/*
 libkram/heman/*
 libkram/json11/*
-libkram/fastl/*
 #libkram/kram/*
 libkram/lodepng/*
 libkram/miniz/*
diff --git a/scripts/pre-commit b/scripts/pre-commit
index 594af076..0021e712 100755
--- a/scripts/pre-commit
+++ b/scripts/pre-commit
@@ -2,6 +2,8 @@
 
 # derived from here
 # https://github.com/andrewseidl/githook-clang-format/blob/master/clang-format.hook
+# Uses the _clang-format file for rules
+# Uses the _clang-format_ignore file to skip dirs
 
 format_file() {
   file="${1}"
@@ -12,7 +14,7 @@ format_file() {
   fi
 }
 
-for file in `git diff-index --cached --name-only HEAD | grep -iE '\Kram*.(cpp|h|m|mm)$' ` ; do
+for file in `git diff-index --cached --name-only HEAD | grep -iE '\*.(cpp|h|m|mm)$' ` ; do
   format_file "${file}"
 done
 

From f91c664e67f670e58467f71c11582b2a63ebdbea Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 12 Oct 2024 16:03:28 -0700
Subject: [PATCH 795/901] kram - clang-format renames

---
 _clang-format => .clang-format               | 0
 _clang-format-ignore => .clang-format-ignore | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename _clang-format => .clang-format (100%)
 rename _clang-format-ignore => .clang-format-ignore (100%)

diff --git a/_clang-format b/.clang-format
similarity index 100%
rename from _clang-format
rename to .clang-format
diff --git a/_clang-format-ignore b/.clang-format-ignore
similarity index 100%
rename from _clang-format-ignore
rename to .clang-format-ignore

From f8ccd18d92422dfd9108a3bbdb35203fcf3332bd Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 12 Oct 2024 17:17:22 -0700
Subject: [PATCH 796/901] kram - update clang-format

---
 .clang-format            | 13 +++++++++--
 .clang-format-ignore     |  2 +-
 scripts/formatSources.sh | 47 +++++++++++++++++++++++++++++++---------
 3 files changed, 49 insertions(+), 13 deletions(-)

diff --git a/.clang-format b/.clang-format
index d1af2d18..afb5fa06 100644
--- a/.clang-format
+++ b/.clang-format
@@ -7,7 +7,7 @@ AlignConsecutiveAssignments: false
 AlignConsecutiveDeclarations: false
 AlignEscapedNewlines: Left
 AlignOperands:   true
-AlignTrailingComments: true
+AlignTrailingComments: false
 AllowAllArgumentsOnNextLine: true
 AllowAllConstructorInitializersOnNextLine: true
 AllowAllParametersOfDeclarationOnNextLine: true
@@ -130,6 +130,8 @@ RawStringFormats:
 ReflowComments:  true
 SortIncludes:    true
 SortUsingDeclarations: true
+
+# spaces
 SpaceAfterCStyleCast: false
 SpaceAfterLogicalNot: false
 SpaceAfterTemplateKeyword: true
@@ -140,12 +142,19 @@ SpaceBeforeInheritanceColon: true
 SpaceBeforeParens: ControlStatements
 SpaceBeforeRangeBasedForLoopColon: true
 SpaceInEmptyParentheses: false
-SpacesBeforeTrailingComments: 2
+SpacesBeforeTrailingComments: 1
 SpacesInAngles:  false
 SpacesInContainerLiterals: true
 SpacesInCStyleCastParentheses: false
 SpacesInParentheses: false
 SpacesInSquareBrackets: false
+
+# https://stackoverflow.com/questions/67396557/adding-space-after-in-clang-format
+# this is to allow both // and //space commants to be unmangled
+SpacesInLineCommentPrefix:
+  Minimum: 0
+  Maximum: 1
+
 Standard:        Auto
 StatementMacros: 
   - Q_UNUSED
diff --git a/.clang-format-ignore b/.clang-format-ignore
index f6ca734e..cf89029a 100644
--- a/.clang-format-ignore
+++ b/.clang-format-ignore
@@ -14,7 +14,7 @@ gltf/*
 #kramv
 
 libkram/allocate/*
-libkram/astcencoder/*
+libkram/astc-encoder/*
 libkram/bc7enc/*
 libkram/cgltf/*
 libkram/compressonator/*
diff --git a/scripts/formatSources.sh b/scripts/formatSources.sh
index 45547dd0..feb00d90 100755
--- a/scripts/formatSources.sh
+++ b/scripts/formatSources.sh
@@ -3,15 +3,42 @@
 # use the app/clang_format to only process sources in app directory
 # eventually replace with git hook.  This script only runs on Posix.
 
-pushd ../libkram/kram
-clang-format -style=file -i Kram*.cpp
-clang-format -style=file -i Kram*.h
-clang-format -style=file -i KTX*.cpp
-clang-format -style=file -i KTX*.h
+# pushd ../libkram/kram
+# clang-format -style=file -i Kram*.cpp
+# clang-format -style=file -i Kram*.h
+# clang-format -style=file -i KTX*.cpp
+# clang-format -style=file -i KTX*.h
+# popd
+
+# pushd ../kramv
+# clang-format -style=file -i Kram*.cpp
+# clang-format -style=file -i Kram*.h
+# clang-format -style=file -i Kram*.mm
+# popd
+
+
+# hope that the ignore file does so
+#pushd ../libkram/kram
+#clang-format -style=file -i *.*
+#popd
+
+#pushd ../libkram
+# this doesn't seem to honor the ignore file
+# find ../libkram -iname '*.h' -o -iname '*.cpp' | xargs clang-format -i
+#popd
+
+# no recursion for clang-format
+pushd ../libkram
+clang-format -style=file -i kram/*.(cpp|h)
+clang-format -style=file -i vectormath/*.(cpp|h)
 popd
 
-pushd ../kramv
-clang-format -style=file -i Kram*.cpp
-clang-format -style=file -i Kram*.h
-clang-format -style=file -i Kram*.mm
-popd
\ No newline at end of file
+# pushd ..
+# clang-format -style=file -i kramv/*.*
+# clang-format -style=file -i kramc/*.*
+# clang-format -style=file -i kram-thumb/*.(cpp|h)
+# clang-format -style=file -i kram-thumb-win/*.*
+# clang-format -style=file -i kram-profile/*.*
+# clang-format -style=file -i kram-preview/*.*
+# clang-format -style=file -i hlslparser/*.*
+# popd
\ No newline at end of file

From bcfd1d29f768f5319eed78caf802c0a32f36ab3f Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 12 Oct 2024 17:31:25 -0700
Subject: [PATCH 797/901] kram - add clang-format off/on

---
 libkram/vectormath/double234.cpp     |  4 ++++
 libkram/vectormath/double234.h       | 11 +++++++++++
 libkram/vectormath/float234.cpp      |  4 ++++
 libkram/vectormath/float234.h        | 14 ++++++++++++++
 libkram/vectormath/half234.h         |  4 ++++
 libkram/vectormath/int234.h          |  4 ++++
 libkram/vectormath/long234.h         |  4 ++++
 libkram/vectormath/sse2neon-arm64.h  |  1 +
 libkram/vectormath/sse2neon.h        |  1 +
 libkram/vectormath/sse_mathfun.h     |  1 +
 libkram/vectormath/vectormath234.cpp |  2 ++
 libkram/vectormath/vectormath234.h   |  9 +++++----
 12 files changed, 55 insertions(+), 4 deletions(-)

diff --git a/libkram/vectormath/double234.cpp b/libkram/vectormath/double234.cpp
index 794ffc3b..6b8b002b 100644
--- a/libkram/vectormath/double234.cpp
+++ b/libkram/vectormath/double234.cpp
@@ -13,6 +13,8 @@
 
 namespace SIMD_NAMESPACE {
 
+// clang-format off
+
 #if SIMD_ACCELERATE_MATH
 // These will get inlined here from the template
 macroVectorRepeatFnImpl(double, log)
@@ -44,6 +46,8 @@ macroVectorRepeatFnImpl(double, atan, ::atan)
 
 #endif // SIMD_CMATH_MATH
 
+// clang-format on
+
 //---------------------------
 
 static const double2 kdouble2_posx = {1.0f, 0.0f};
diff --git a/libkram/vectormath/double234.h b/libkram/vectormath/double234.h
index 3df68cde..aa727ed8 100644
--- a/libkram/vectormath/double234.h
+++ b/libkram/vectormath/double234.h
@@ -7,6 +7,8 @@
 // This is not yet standalone.  vectormath234.h includes it.
 #if USE_SIMDLIB && SIMD_DOUBLE
 
+// clang-format off
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -35,6 +37,8 @@ namespace SIMD_NAMESPACE {
 
 macroVector8TypesStorageRenames(double, double)
 
+// clang-format on
+
 SIMD_CALL double2 double2m(double x) {
     return x;
 }
@@ -548,6 +552,8 @@ SIMD_CALL const double3* as_double3(const double4* m) {
 //-------------------
 // Functions
 
+// clang-format off
+
 // power series
 macroVectorRepeatFnDecl(double, log)
 macroVectorRepeatFnDecl(double, exp)
@@ -563,6 +569,8 @@ macroVectorRepeatFnDecl(double, atan)
 
 macroVectorRepeatFn2Decl(double, atan2)
 
+// clang-format on
+
 SIMD_CALL double2 pow(double2 x, double2 y) { return exp(log(x) * y); }
 SIMD_CALL double3 pow(double3 x, double3 y) { return exp(log(x) * y); }
 SIMD_CALL double4 pow(double4 x, double4 y) { return exp(log(x) * y); }
@@ -771,6 +779,7 @@ bool equal_rel(const double2x2& x, const double2x2& y, double tol);
 bool equal_rel(const double3x3& x, const double3x3& y, double tol);
 bool equal_rel(const double4x4& x, const double4x4& y, double tol);
 
+// clang-format off
 
 // operators for C++
 macroMatrixOps(double2x2);
@@ -779,6 +788,8 @@ macroMatrixOps(double3x3);
 // macroMatrixOps(double3x4);
 macroMatrixOps(double4x4);
 
+// clang-format on
+
 // fast conversions where possible
 SIMD_CALL const double3x3& as_double3x3(const double4x4& m) {
     return reinterpret_cast<const double3x3&>(m);
diff --git a/libkram/vectormath/float234.cpp b/libkram/vectormath/float234.cpp
index 460bfad8..41d89b36 100644
--- a/libkram/vectormath/float234.cpp
+++ b/libkram/vectormath/float234.cpp
@@ -23,6 +23,8 @@
 
 namespace SIMD_NAMESPACE {
 
+// clang-format off
+
 #if SIMD_ACCELERATE_MATH
 // These will get inlined here from the template
 macroVectorRepeatFnImpl(float, log)
@@ -44,6 +46,8 @@ macroVectorRepeatFnImpl(float, tan, ::tanf)
 
 #endif // SIMD_CMATH_MATH
 
+// clang-format off
+
 // Wish cmath had this
 inline void sincosf(float radians, float& s, float& c) {
     s = sinf(radians);
diff --git a/libkram/vectormath/float234.h b/libkram/vectormath/float234.h
index 4ecfc612..e32c4bbf 100644
--- a/libkram/vectormath/float234.h
+++ b/libkram/vectormath/float234.h
@@ -12,6 +12,8 @@
 extern "C" {
 #endif
 
+// clang-format off
+
 // define c++ vector/matrix types
 macroVector4TypesAligned(float, float)
 macroVector4TypesPacked(float, float)
@@ -27,12 +29,16 @@ typedef struct { float4a columns[4]; } float4x4a;
 macroVector4TypesStorageRenames(float, simd_float)
 #endif // SIMD_ACCELERATE_MATH_NAMES
 
+// clang-format on
+
 #ifdef __cplusplus
 }
 
 namespace SIMD_NAMESPACE {
 
+// clang-format off
 macroVector4TypesStorageRenames(float, float)
+// clang-format on
 
 //-----------------------------------
 // start of implementation
@@ -535,6 +541,8 @@ SIMD_CALL const float3* as_float3(const float4* m) {
 //    return reinterpret_cast<const float4&>(m);
 //}
 
+// clang-format off
+
 // power series
 macroVectorRepeatFnDecl(float, log)
 macroVectorRepeatFnDecl(float, exp)
@@ -551,6 +559,8 @@ macroVectorRepeatFnDecl(float, atan)
 
 macroVectorRepeatFn2Decl(float, atan2)
 
+// clang-format on
+
 // sincos requires accel 5 lib, and takes 2 ptrs
 // may need math fallback for some calls
 // macroVectorRepeatFn2Decl(float, sincos)
@@ -787,6 +797,8 @@ bool equal_rel(const float4x4& x, const float4x4& y, float tol);
 
 // TODO: these think they are all member functions
 
+// clang-format off
+
 // operators for C++
 macroMatrixOps(float2x2);
 macroMatrixOps(float3x3);
@@ -794,6 +806,8 @@ macroMatrixOps(float3x3);
 // macroMatrixOps(float3x4s);
 macroMatrixOps(float4x4);
 
+// clang-format on
+
 // fast conversions where possible
 SIMD_CALL const float3x3& as_float3x3(const float4x4& m) {
     return reinterpret_cast<const float3x3&>(m);
diff --git a/libkram/vectormath/half234.h b/libkram/vectormath/half234.h
index a012a603..36dbbcd8 100644
--- a/libkram/vectormath/half234.h
+++ b/libkram/vectormath/half234.h
@@ -31,6 +31,8 @@ typedef short half;
 // This means math and conversions don't work, so have to use simd ops
 #define SIMD_HALF4_ONLY !SIMD_HALF_FLOAT16
 
+// clang-format off
+
 // Half isn't something that should have math ops yet.  Just useful as packed type.
 // This does math, but really needs _Float16 to work properly for the operators.
 // That's not available on Android devices like it should be, but the Neon
@@ -54,6 +56,8 @@ namespace SIMD_NAMESPACE {
 
 macroVector2TypesStorageRenames(half, half)
 
+// clang-format on
+
 SIMD_CALL half2 half2m(half x) {
     return x;
 }
diff --git a/libkram/vectormath/int234.h b/libkram/vectormath/int234.h
index 4750ea61..df9ca592 100644
--- a/libkram/vectormath/int234.h
+++ b/libkram/vectormath/int234.h
@@ -7,6 +7,8 @@
 // This is not yet standalone.  vectormath234.h includes it.
 #if USE_SIMDLIB && SIMD_INT
 
+// clang-format off
+
 #ifdef __cplusplus
 extern "C" {
 #endif // __cplusplus
@@ -26,6 +28,8 @@ namespace SIMD_NAMESPACE {
 
 macroVector4TypesStorageRenames(int, int)
 
+// clang-format on
+
 SIMD_CALL int4 zeroext(int2 x) {
     int4 v = 0; v.xy = x; return v;
 }
diff --git a/libkram/vectormath/long234.h b/libkram/vectormath/long234.h
index 17a5e1d1..645f246b 100644
--- a/libkram/vectormath/long234.h
+++ b/libkram/vectormath/long234.h
@@ -18,6 +18,8 @@ typedef long long1;
 extern "C" {
 #endif // __cplusplus
 
+// clang-format off
+
 // define c vector types
 // Apple uses long type here (32-bit) instead of long32_t
 macroVector8TypesAligned(long1, long)
@@ -34,6 +36,8 @@ namespace SIMD_NAMESPACE {
 
 macroVector8TypesStorageRenames(long, long)
 
+// clang-format on
+
 //-----------------------------------
 // imlementation - only code simd arch specific
 
diff --git a/libkram/vectormath/sse2neon-arm64.h b/libkram/vectormath/sse2neon-arm64.h
index f312c409..5376a3f7 100644
--- a/libkram/vectormath/sse2neon-arm64.h
+++ b/libkram/vectormath/sse2neon-arm64.h
@@ -1,4 +1,5 @@
 #pragma once
+// clang-format off
 
 #ifndef SSE2NEON_H
 #define SSE2NEON_H
diff --git a/libkram/vectormath/sse2neon.h b/libkram/vectormath/sse2neon.h
index f312c409..5376a3f7 100644
--- a/libkram/vectormath/sse2neon.h
+++ b/libkram/vectormath/sse2neon.h
@@ -1,4 +1,5 @@
 #pragma once
+// clang-format off
 
 #ifndef SSE2NEON_H
 #define SSE2NEON_H
diff --git a/libkram/vectormath/sse_mathfun.h b/libkram/vectormath/sse_mathfun.h
index 7be46dcd..1b068ae8 100644
--- a/libkram/vectormath/sse_mathfun.h
+++ b/libkram/vectormath/sse_mathfun.h
@@ -39,6 +39,7 @@
 // TODO: combine the constants into fewer registers, reference .x,..
 
 #pragma once
+// clang-format off
 
 #include <math.h>
 
diff --git a/libkram/vectormath/vectormath234.cpp b/libkram/vectormath/vectormath234.cpp
index d931b3d1..61d09695 100644
--- a/libkram/vectormath/vectormath234.cpp
+++ b/libkram/vectormath/vectormath234.cpp
@@ -99,6 +99,7 @@
 #include <stdarg.h>
 
 #if SIMD_ACCELERATE_MATH
+// clang-format off
 #include <simd/base.h>
 
 // NOTE: this reports 5 for macOS 13 minspec, but SIMD_LIBRARY_VERSION is set to 6.
@@ -148,6 +149,7 @@
 #endif
 #endif
 
+// clang-format on
 #endif // SIMD_ACCELERATE_MATH
 
 
diff --git a/libkram/vectormath/vectormath234.h b/libkram/vectormath/vectormath234.h
index 4a5aeb59..4c0602bd 100644
--- a/libkram/vectormath/vectormath234.h
+++ b/libkram/vectormath/vectormath234.h
@@ -417,6 +417,8 @@ type##4 cppfunc(type##4 a, type##4 b) { return {func(a.x, b.x), func(a.y, b.y),
 
 //-----------------------------------
 
+// clang-format off
+
 #include <inttypes.h> // for u/long
 
 #include <string>     // for formatter (only works using std::string, not eastl)
@@ -454,11 +456,8 @@ type##4 cppfunc(type##4 a, type##4 b) { return {func(a.x, b.x), func(a.y, b.y),
 #include "float234.h"
 #include "double234.h"
 
-//---------------------------
-
 // This may not belong in here.  But just want to use the lib to build
-// some helpers.  
-
+// some helpers.
 #include "bounds234.h"
 
 //---------------------------
@@ -510,6 +509,8 @@ macroVector2TypesStorageRenames(short, short)
 #endif // __cplusplus
 #endif // SIMD_SHORT
 
+// clang-format on
+
 //-------------------
 #ifdef __cplusplus
 

From 4201401bd6428679d7e2faa015d470d18dababb4 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 12 Oct 2024 17:32:03 -0700
Subject: [PATCH 798/901] kram - clang-format libkram

---
 libkram/kram/BlockedLinearAllocator.cpp |  34 +-
 libkram/kram/BlockedLinearAllocator.h   |  40 +-
 libkram/kram/ImmutableString.cpp        |  60 +--
 libkram/kram/ImmutableString.h          |  68 +--
 libkram/kram/KTXImage.cpp               | 246 ++++++-----
 libkram/kram/KTXImage.h                 |  66 +--
 libkram/kram/Kram.cpp                   | 335 +++++++--------
 libkram/kram/Kram.h                     |   6 +-
 libkram/kram/KramConfig.h               |  65 ++-
 libkram/kram/KramDDSHelper.cpp          | 528 ++++++++++++------------
 libkram/kram/KramDDSHelper.h            |   2 +-
 libkram/kram/KramFileHelper.cpp         |  23 +-
 libkram/kram/KramFileHelper.h           |   8 +-
 libkram/kram/KramFmt.h                  |   7 +-
 libkram/kram/KramImage.cpp              | 349 ++++++++--------
 libkram/kram/KramImage.h                |  22 +-
 libkram/kram/KramImageInfo.cpp          |  88 ++--
 libkram/kram/KramImageInfo.h            |  46 +--
 libkram/kram/KramLog.cpp                | 414 +++++++++----------
 libkram/kram/KramLog.h                  |   4 +-
 libkram/kram/KramMipper.cpp             |  58 ++-
 libkram/kram/KramMipper.h               |  24 +-
 libkram/kram/KramMmapHelper.cpp         |   4 +-
 libkram/kram/KramSDFMipper.cpp          |  14 +-
 libkram/kram/KramSDFMipper.h            |   2 +-
 libkram/kram/KramTimer.cpp              | 115 +++---
 libkram/kram/KramTimer.h                |  55 ++-
 libkram/kram/KramZipHelper.cpp          |  27 +-
 libkram/kram/KramZipHelper.h            |   6 +-
 libkram/kram/KramZipStream.cpp          | 119 +++---
 libkram/kram/KramZipStream.h            |  26 +-
 libkram/kram/TaskSystem.cpp             | 329 ++++++++-------
 libkram/kram/TaskSystem.h               |  29 +-
 libkram/kram/win_mmap.h                 |  31 +-
 libkram/vectormath/bounds234.cpp        | 218 +++++-----
 libkram/vectormath/bounds234.h          |  97 +++--
 libkram/vectormath/double234.cpp        | 387 +++++++++--------
 libkram/vectormath/double234.h          | 493 +++++++++++++---------
 libkram/vectormath/float234.h           | 468 ++++++++++++---------
 libkram/vectormath/float4a.cpp          |  19 +-
 libkram/vectormath/float4a.h            |  34 +-
 libkram/vectormath/half234.h            |  69 +++-
 libkram/vectormath/int234.h             | 126 ++++--
 libkram/vectormath/long234.h            |  98 +++--
 libkram/vectormath/sse2neon-arm64.h     | 389 +++++++++++------
 libkram/vectormath/sse2neon.h           | 389 +++++++++++------
 libkram/vectormath/vectormath234.cpp    | 247 +++++------
 libkram/vectormath/vectormath234.h      | 278 +++++++------
 48 files changed, 3592 insertions(+), 2970 deletions(-)

diff --git a/libkram/kram/BlockedLinearAllocator.cpp b/libkram/kram/BlockedLinearAllocator.cpp
index 531120db..f945a958 100644
--- a/libkram/kram/BlockedLinearAllocator.cpp
+++ b/libkram/kram/BlockedLinearAllocator.cpp
@@ -9,42 +9,45 @@ namespace kram {
 using namespace STL_NAMESPACE;
 
 BlockedLinearAllocator::BlockedLinearAllocator(uint32_t itemsPerBlock, uint32_t itemSize)
-: _itemSize(itemSize),
-  _itemsPerBlock(itemsPerBlock),
-  _blockSize(itemsPerBlock*itemSize)
+    : _itemSize(itemSize),
+      _itemsPerBlock(itemsPerBlock),
+      _blockSize(itemsPerBlock * itemSize)
 {
-    
 }
 
-BlockedLinearAllocator::~BlockedLinearAllocator() {
+BlockedLinearAllocator::~BlockedLinearAllocator()
+{
     resetAndFree();
 }
 
-void BlockedLinearAllocator::reset() {
+void BlockedLinearAllocator::reset()
+{
     // don't free the block memory, reuse for next parse
     _blockCurrent = 0;
     _counter = 0;
     _blockCounter = 0;
 }
 
-void BlockedLinearAllocator::resetAndFree() {
-    for (auto& it: _blocks) {
-        delete [] it;
+void BlockedLinearAllocator::resetAndFree()
+{
+    for (auto& it : _blocks) {
+        delete[] it;
     }
     _blocks.clear();
     reset();
 }
 
-bool BlockedLinearAllocator::checkAllocate() {
+bool BlockedLinearAllocator::checkAllocate()
+{
     // allocate more blocks
     if (_counter >= _blocks.size() * _itemsPerBlock) {
         uint8_t* newBlock = new uint8_t[_blockSize];
         if (!newBlock)
             return false;
-        
+
         _blocks.push_back(newBlock);
     }
-    
+
     // advance to next block
     if (_counter && ((_counter % _itemsPerBlock) == 0)) {
         _blockCurrent++;
@@ -53,11 +56,12 @@ bool BlockedLinearAllocator::checkAllocate() {
     return true;
 }
 
-void* BlockedLinearAllocator::allocate() {
+void* BlockedLinearAllocator::allocate()
+{
     // make sure space exists
     if (!checkAllocate())
         return nullptr;
-    
+
     // return a new item off the block
     auto& block = _blocks[_blockCurrent];
     uint32_t start = _blockCounter++;
@@ -65,4 +69,4 @@ void* BlockedLinearAllocator::allocate() {
     return block + start * _itemSize;
 }
 
-}
+} //namespace kram
diff --git a/libkram/kram/BlockedLinearAllocator.h b/libkram/kram/BlockedLinearAllocator.h
index a476c29b..8c7376e3 100644
--- a/libkram/kram/BlockedLinearAllocator.h
+++ b/libkram/kram/BlockedLinearAllocator.h
@@ -12,55 +12,59 @@ using namespace STL_NAMESPACE;
 
 // Can use to allocate tree nodes where length is unknown
 // until the tree is fully parsed.
-class BlockedLinearAllocator
-{
+class BlockedLinearAllocator {
 public:
     BlockedLinearAllocator(uint32_t itemsPerBlock, uint32_t itemSize);
     ~BlockedLinearAllocator();
-    
+
     void* allocate();
     // for POD, caller must zero out?
-    template<typename T>
-    T* allocateItem() { return (T*)allocate(); }
-    
+    template <typename T>
+    T* allocateItem()
+    {
+        return (T*)allocate();
+    }
+
     // placement new/delete could also be done, and variable
     // itemSize, but then have to address alignment
-    
+
     // Convert to/from an index.  Call before allocate.
     uint32_t nextItemIndex() const { return _counter; }
-    
+
     // This retrieves data from an index
-    void* itemIndexToData(uint32_t itemIndex) const {
+    void* itemIndexToData(uint32_t itemIndex) const
+    {
         uint32_t blockNum = itemIndex / _itemsPerBlock;
         uint32_t blockIndex = itemIndex % _itemsPerBlock;
         return _blocks[blockNum] + blockIndex * _itemSize;
     }
-    
+
     // can reuse same allocated blocks to avoid fragmentation
     void reset();
-    
+
     // free the allocated blocks
     void resetAndFree();
-    
-    size_t memoryUse() const {
+
+    size_t memoryUse() const
+    {
         return _blocks.size() * _blockSize;
     }
-    
+
 private:
     bool checkAllocate();
-    
+
     using Block = uint8_t*;
     vector<Block> _blocks;
-    
+
     // currently only one item size storeed in Block
     uint32_t _itemSize = 0;
     uint32_t _itemsPerBlock = 0;
     uint32_t _blockSize = 0;
-    
+
     // where in block, and total item count
     uint32_t _blockCurrent = 0;
     uint32_t _blockCounter = 0; // item index into current block
     uint32_t _counter = 0;
 };
 
-}
+} //namespace kram
diff --git a/libkram/kram/ImmutableString.cpp b/libkram/kram/ImmutableString.cpp
index e81514ba..145a60da 100644
--- a/libkram/kram/ImmutableString.cpp
+++ b/libkram/kram/ImmutableString.cpp
@@ -8,92 +8,94 @@ namespace kram {
 
 using namespace STL_NAMESPACE;
 
-ImmutableStringPool::ImmutableStringPool(size_t capacity_) {
+ImmutableStringPool::ImmutableStringPool(size_t capacity_)
+{
     capacity = capacity_;
-    
+
     // empty string is always 0.  Only one buffer for now.  Does not grow.
-    ImmutableStringInfo info = { 0, (uint16_t)counter++ };
+    ImmutableStringInfo info = {0, (uint16_t)counter++};
     mem = new char[capacity];
-    
+
     memcpy(mem, &info, sizeof(ImmutableStringInfo));
     mem[sizeof(ImmutableStringInfo)] = 0;
-    
+
     emptyString = mem + sizeof(ImmutableStringInfo);
-    
+
     keyTable.reserve(1024);
-    
+
     // keep aligned to 2B for ImmutableStringInfo
     uint32_t sz = 2;
     size += sz + sizeof(ImmutableStringInfo);
 }
 
-ImmutableStringPool::~ImmutableStringPool() {
-    delete [] mem;
+ImmutableStringPool::~ImmutableStringPool()
+{
+    delete[] mem;
     mem = nullptr;
 }
 
-ImmutableString ImmutableStringPool::getImmutableString(const char* s) {
+ImmutableString ImmutableStringPool::getImmutableString(const char* s)
+{
     if (!s || !*s)
         return emptyString;
-    
+
     // caller passing in an already immutable string in the block
     if (isImmutableString(s))
         return s;
-    
+
     // mutex lock from here on down if hitting from multiple threads
     // this is iterating on map
     mylock lock(mapLock);
-    
+
     // find a block with the string
     auto it = map.find(s);
     if (it != map.end())
         return it->first;
-    
+
     // Keep unique key count under 64K
-    const uint32_t kMaxCounter = 64*1024;
+    const uint32_t kMaxCounter = 64 * 1024;
     if (counter >= kMaxCounter) {
         KLOGE("ImmutableString", "Pool cannot fit string");
         return emptyString;
     }
     // not found, so need to add to an empty block
     size_t sz = strlen(s) + 1;
-    
-    
+
     // see if it will fit the block
-    if ((size + sz + sizeof(ImmutableStringInfo))  > capacity) {
+    if ((size + sz + sizeof(ImmutableStringInfo)) > capacity) {
         KLOGE("ImmutableString", "Pool cannot fit string length %zu", sz);
         return emptyString;
     }
-    
+
     // uint32_t hash = (uint32_t)map.hash_function()( s ); // or just use fnv1a call ?  unordered_map does cache this?
-    ImmutableStringInfo info = { (uint16_t)(counter++), (uint16_t)(sz - 1) }; // hashStr };
-    
+    ImmutableStringInfo info = {(uint16_t)(counter++), (uint16_t)(sz - 1)}; // hashStr };
+
     // 4B header
     sz += sizeof(ImmutableStringInfo);
-    
+
     // This finds a string from a 2B lookup uisng the info.counter
     keyTable.push_back(size + sizeof(ImmutableStringPool));
-    
+
     // append it
     char* immStr = mem + size;
-    
+
     memcpy(immStr, &info, sizeof(ImmutableStringInfo));
     immStr += sizeof(ImmutableStringInfo);
     memcpy(immStr, s, sz);
-    
+
     // add it into the map
     map[immStr] = size;
     size += sz;
-    
+
     // keep aligned to 2 bytes
     size_t align = alignof(ImmutableStringInfo);
     assert(align == 2);
     (void)align;
-    
+
     if (size & 1)
         ++size;
-    
+
     return immStr;
 }
 
-}
+} //namespace kram
diff --git a/libkram/kram/ImmutableString.h b/libkram/kram/ImmutableString.h
index 0990c266..c7d81344 100644
--- a/libkram/kram/ImmutableString.h
+++ b/libkram/kram/ImmutableString.h
@@ -16,8 +16,9 @@ namespace kram {
 using namespace STL_NAMESPACE;
 
 // case-sensitive fnv1a hash, can pass existing hash to continue a hash
-inline uint32_t HashFnv1a(const char* val, uint32_t hash = 0x811c9dc5) {
-    const uint32_t prime  = 0x01000193; // 16777619 (32-bit)
+inline uint32_t HashFnv1a(const char* val, uint32_t hash = 0x811c9dc5)
+{
+    const uint32_t prime = 0x01000193; // 16777619 (32-bit)
     while (*val) {
         hash = (hash * prime) ^ (uint32_t)*val++;
     }
@@ -25,16 +26,18 @@ inline uint32_t HashFnv1a(const char* val, uint32_t hash = 0x811c9dc5) {
 }
 
 // this compares string stored as const char*
-struct CompareStrings
-{
+struct CompareStrings {
     // Would count and/or hash help?
     // otherwise, this has to walk memory, hash already found bucket
     template <class T>
     bool operator()(const T& x, const T& y) const
-    { return strcmp(x, y) == 0; }
-    
+    {
+        return strcmp(x, y) == 0;
+    }
+
     template <class T>
-    size_t operator()(const T& x) const {
+    size_t operator()(const T& x) const
+    {
         // 32-bit hash to uint64 conversion on 64-bit system
         return (size_t)HashFnv1a(x);
     }
@@ -51,69 +54,74 @@ struct ImmutableStringInfo {
 using ImmutableString = const char*;
 
 // Store and retrieve immutable strings.  The address of these never changes.
-class ImmutableStringPool
-{
+class ImmutableStringPool {
 public:
-    ImmutableStringPool(size_t capacity_ = 32*1024);
+    ImmutableStringPool(size_t capacity_ = 32 * 1024);
     ~ImmutableStringPool();
-    
+
     ImmutableString getImmutableString(const char* s);
-    string_view getImmutableStringView(const char* s) {
+    string_view getImmutableStringView(const char* s)
+    {
         ImmutableString str = getImmutableString(s);
         return string_view(getImmutableString(s), getLength(str));
     }
-    
+
     // Compress 8B to 2B using counter
-    uint16_t getCounter(ImmutableString str) const {
-        const ImmutableStringInfo* info = ((const ImmutableStringInfo*)(str-sizeof(ImmutableStringInfo)));
+    uint16_t getCounter(ImmutableString str) const
+    {
+        const ImmutableStringInfo* info = ((const ImmutableStringInfo*)(str - sizeof(ImmutableStringInfo)));
         return info->counter;
     }
     // cached strlen of string
-    uint16_t getLength(ImmutableString str) const {
-        const ImmutableStringInfo* info = ((const ImmutableStringInfo*)(str-sizeof(ImmutableStringInfo)));
+    uint16_t getLength(ImmutableString str) const
+    {
+        const ImmutableStringInfo* info = ((const ImmutableStringInfo*)(str - sizeof(ImmutableStringInfo)));
         return info->length;
     }
-    
+
     // Can lookup string from counter
-    ImmutableString getImmutableString(uint16_t counter_) const {
+    ImmutableString getImmutableString(uint16_t counter_) const
+    {
         mylock lock(mapLock);
         return mem + keyTable[counter_];
     }
-    string_view getImmutableStringView(uint16_t counter_) const {
+    string_view getImmutableStringView(uint16_t counter_) const
+    {
         mylock lock(mapLock);
         ImmutableString str = mem + keyTable[counter_];
         return string_view(str, getLength(str));
     }
-    
+
     // Can call outside of mutex if mem never grows.
-    bool isImmutableString(const char* s) const {
+    bool isImmutableString(const char* s) const
+    {
         return s >= mem && s < mem + capacity;
     }
-    
+
 private:
     using mymutex = std::mutex;
     using mylock = std::unique_lock<mymutex>; // or lock_guard?
-    
+
     mutable mymutex mapLock;
-    
+
     // Remap strings to immutable strings.
     // Could be unordered_set.
     using ImmutableMap = unordered_map<ImmutableString, uint32_t, CompareStrings, CompareStrings>;
     ImmutableMap map;
-    
+
     // Can convert keys to 2B using lookup table.  Can grow.
     vector<uint32_t> keyTable;
-    
+
     // Only has one block of memory right now.
     // This block cannot grow or addresses are all invalidated.
     char* mem = nullptr;
     uint32_t size = 0;
     uint32_t capacity = 0;
-    
+
     // A count of how many strings are stored
     uint32_t counter = 0;
-    
+
     ImmutableString emptyString = nullptr;
 };
 
-}
+} //namespace kram
diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index f9d463df..35cb9e23 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -58,110 +58,109 @@ const uint32_t DXGI_FORMAT_ETC2_OFFSET = 50;
 // enum based on dxgiformat.h, but added astc ldr/hdr from obscure docs
 // and added etc2/eac by just making up constants since those aren't public
 // Copyright (C) Microsoft.  All rights reserved.
-enum MyDXGIFormat : uint32_t
-{
-    DXGI_FORMAT_UNKNOWN                                 = 0,
-    
+enum MyDXGIFormat : uint32_t {
+    DXGI_FORMAT_UNKNOWN = 0,
+
     //DXGI_FORMAT_R32G32B32A32_TYPELESS                   = 1,
-    DXGI_FORMAT_R32G32B32A32_FLOAT                      = 2,
-    DXGI_FORMAT_R32G32B32A32_UINT                       = 3,
-    DXGI_FORMAT_R32G32B32A32_SINT                       = 4,
+    DXGI_FORMAT_R32G32B32A32_FLOAT = 2,
+    DXGI_FORMAT_R32G32B32A32_UINT = 3,
+    DXGI_FORMAT_R32G32B32A32_SINT = 4,
     //DXGI_FORMAT_R32G32B32_TYPELESS                      = 5,
-    DXGI_FORMAT_R32G32B32_FLOAT                         = 6,
-    DXGI_FORMAT_R32G32B32_UINT                          = 7,
-    DXGI_FORMAT_R32G32B32_SINT                          = 8,
-    
+    DXGI_FORMAT_R32G32B32_FLOAT = 6,
+    DXGI_FORMAT_R32G32B32_UINT = 7,
+    DXGI_FORMAT_R32G32B32_SINT = 8,
+
     //DXGI_FORMAT_R16G16B16A16_TYPELESS                   = 9,
-    DXGI_FORMAT_R16G16B16A16_FLOAT                      = 10,
-    DXGI_FORMAT_R16G16B16A16_UNORM                      = 11,
-    DXGI_FORMAT_R16G16B16A16_UINT                       = 12,
-    DXGI_FORMAT_R16G16B16A16_SNORM                      = 13,
-    DXGI_FORMAT_R16G16B16A16_SINT                       = 14,
-    
+    DXGI_FORMAT_R16G16B16A16_FLOAT = 10,
+    DXGI_FORMAT_R16G16B16A16_UNORM = 11,
+    DXGI_FORMAT_R16G16B16A16_UINT = 12,
+    DXGI_FORMAT_R16G16B16A16_SNORM = 13,
+    DXGI_FORMAT_R16G16B16A16_SINT = 14,
+
     //DXGI_FORMAT_R32G32_TYPELESS                         = 15,
-    DXGI_FORMAT_R32G32_FLOAT                            = 16,
-    DXGI_FORMAT_R32G32_UINT                             = 17,
-    DXGI_FORMAT_R32G32_SINT                             = 18,
+    DXGI_FORMAT_R32G32_FLOAT = 16,
+    DXGI_FORMAT_R32G32_UINT = 17,
+    DXGI_FORMAT_R32G32_SINT = 18,
     //DXGI_FORMAT_R32G8X24_TYPELESS                       = 19,
     //DXGI_FORMAT_D32_FLOAT_S8X24_UINT                    = 20,
     //DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS                = 21,
     //DXGI_FORMAT_X32_TYPELESS_G8X24_UINT                 = 22,
-    
+
     //DXGI_FORMAT_R10G10B10A2_TYPELESS                    = 23,
     //DXGI_FORMAT_R10G10B10A2_UNORM                       = 24,
     //DXGI_FORMAT_R10G10B10A2_UINT                        = 25,
     //DXGI_FORMAT_R11G11B10_FLOAT                         = 26,
-    
+
     //DXGI_FORMAT_R8G8B8A8_TYPELESS                       = 27,
-    DXGI_FORMAT_R8G8B8A8_UNORM                          = 28,
-    DXGI_FORMAT_R8G8B8A8_UNORM_SRGB                     = 29,
-    DXGI_FORMAT_R8G8B8A8_UINT                           = 30,
-    DXGI_FORMAT_R8G8B8A8_SNORM                          = 31,
-    DXGI_FORMAT_R8G8B8A8_SINT                           = 32,
-    
+    DXGI_FORMAT_R8G8B8A8_UNORM = 28,
+    DXGI_FORMAT_R8G8B8A8_UNORM_SRGB = 29,
+    DXGI_FORMAT_R8G8B8A8_UINT = 30,
+    DXGI_FORMAT_R8G8B8A8_SNORM = 31,
+    DXGI_FORMAT_R8G8B8A8_SINT = 32,
+
     //DXGI_FORMAT_R16G16_TYPELESS                         = 33,
-    DXGI_FORMAT_R16G16_FLOAT                            = 34,
-    DXGI_FORMAT_R16G16_UNORM                            = 35,
-    DXGI_FORMAT_R16G16_UINT                             = 36,
-    DXGI_FORMAT_R16G16_SNORM                            = 37,
-    DXGI_FORMAT_R16G16_SINT                             = 38,
-    
+    DXGI_FORMAT_R16G16_FLOAT = 34,
+    DXGI_FORMAT_R16G16_UNORM = 35,
+    DXGI_FORMAT_R16G16_UINT = 36,
+    DXGI_FORMAT_R16G16_SNORM = 37,
+    DXGI_FORMAT_R16G16_SINT = 38,
+
     //DXGI_FORMAT_R32_TYPELESS                            = 39,
     //DXGI_FORMAT_D32_FLOAT                               = 40,
-    DXGI_FORMAT_R32_FLOAT                               = 41,
-    DXGI_FORMAT_R32_UINT                                = 42,
-    DXGI_FORMAT_R32_SINT                                = 43,
-    
+    DXGI_FORMAT_R32_FLOAT = 41,
+    DXGI_FORMAT_R32_UINT = 42,
+    DXGI_FORMAT_R32_SINT = 43,
+
     //DXGI_FORMAT_R24G8_TYPELESS                          = 44,
     //DXGI_FORMAT_D24_UNORM_S8_UINT                       = 45,
     //DXGI_FORMAT_R24_UNORM_X8_TYPELESS                   = 46,
     //DXGI_FORMAT_X24_TYPELESS_G8_UINT                    = 47,
     //DXGI_FORMAT_R8G8_TYPELESS                           = 48,
-    
-    DXGI_FORMAT_R8G8_UNORM                              = 49,
-    DXGI_FORMAT_R8G8_UINT                               = 50,
-    DXGI_FORMAT_R8G8_SNORM                              = 51,
-    DXGI_FORMAT_R8G8_SINT                               = 52,
-    
-    DXGI_FORMAT_R16_FLOAT                               = 54,
+
+    DXGI_FORMAT_R8G8_UNORM = 49,
+    DXGI_FORMAT_R8G8_UINT = 50,
+    DXGI_FORMAT_R8G8_SNORM = 51,
+    DXGI_FORMAT_R8G8_SINT = 52,
+
+    DXGI_FORMAT_R16_FLOAT = 54,
     //DXGI_FORMAT_D16_UNORM                               = 55,
-    DXGI_FORMAT_R16_UNORM                               = 56,
-    DXGI_FORMAT_R16_UINT                                = 57,
-    DXGI_FORMAT_R16_SNORM                               = 58,
-    DXGI_FORMAT_R16_SINT                                = 59,
-    
-    DXGI_FORMAT_R8_UNORM                                = 61,
-    DXGI_FORMAT_R8_UINT                                 = 62,
-    DXGI_FORMAT_R8_SNORM                                = 63,
-    DXGI_FORMAT_R8_SINT                                 = 64,
-    
+    DXGI_FORMAT_R16_UNORM = 56,
+    DXGI_FORMAT_R16_UINT = 57,
+    DXGI_FORMAT_R16_SNORM = 58,
+    DXGI_FORMAT_R16_SINT = 59,
+
+    DXGI_FORMAT_R8_UNORM = 61,
+    DXGI_FORMAT_R8_UINT = 62,
+    DXGI_FORMAT_R8_SNORM = 63,
+    DXGI_FORMAT_R8_SINT = 64,
+
     //DXGI_FORMAT_A8_UNORM                                = 65,
     //DXGI_FORMAT_R1_UNORM                                = 66,
     //DXGI_FORMAT_R9G9B9E5_SHAREDEXP                      = 67,
-    
+
     //DXGI_FORMAT_R8G8_B8G8_UNORM                         = 68,
     //DXGI_FORMAT_G8R8_G8B8_UNORM                         = 69,
-    
-    DXGI_FORMAT_BC1_UNORM                               = 71,
-    DXGI_FORMAT_BC1_UNORM_SRGB                          = 72,
-    DXGI_FORMAT_BC3_UNORM                               = 77,
-    DXGI_FORMAT_BC3_UNORM_SRGB                          = 78,
-    DXGI_FORMAT_BC4_UNORM                               = 80,
-    DXGI_FORMAT_BC4_SNORM                               = 81,
-    DXGI_FORMAT_BC5_UNORM                               = 83,
-    DXGI_FORMAT_BC5_SNORM                               = 84,
-    DXGI_FORMAT_BC6H_UF16                               = 95,
-    DXGI_FORMAT_BC6H_SF16                               = 96,
-    DXGI_FORMAT_BC7_UNORM                               = 98,
-    DXGI_FORMAT_BC7_UNORM_SRGB                          = 99,
-    
-    DXGI_FORMAT_B8G8R8A8_UNORM                          = 87,
-    DXGI_FORMAT_B8G8R8X8_UNORM                          = 88,
+
+    DXGI_FORMAT_BC1_UNORM = 71,
+    DXGI_FORMAT_BC1_UNORM_SRGB = 72,
+    DXGI_FORMAT_BC3_UNORM = 77,
+    DXGI_FORMAT_BC3_UNORM_SRGB = 78,
+    DXGI_FORMAT_BC4_UNORM = 80,
+    DXGI_FORMAT_BC4_SNORM = 81,
+    DXGI_FORMAT_BC5_UNORM = 83,
+    DXGI_FORMAT_BC5_SNORM = 84,
+    DXGI_FORMAT_BC6H_UF16 = 95,
+    DXGI_FORMAT_BC6H_SF16 = 96,
+    DXGI_FORMAT_BC7_UNORM = 98,
+    DXGI_FORMAT_BC7_UNORM_SRGB = 99,
+
+    DXGI_FORMAT_B8G8R8A8_UNORM = 87,
+    DXGI_FORMAT_B8G8R8X8_UNORM = 88,
     //DXGI_FORMAT_B8G8R8A8_TYPELESS                       = 90,
-    DXGI_FORMAT_B8G8R8A8_UNORM_SRGB                     = 91,
+    DXGI_FORMAT_B8G8R8A8_UNORM_SRGB = 91,
     //DXGI_FORMAT_B8G8R8X8_TYPELESS                       = 92,
-    DXGI_FORMAT_B8G8R8X8_UNORM_SRGB                     = 93,
-    
+    DXGI_FORMAT_B8G8R8X8_UNORM_SRGB = 93,
+
     // Astc constants are taken from here.
     // HDR constant weren't too hard to guess from gap, but are a guess.
     // Not officially in DX now that Windows Mobile was killed off.
@@ -221,7 +220,7 @@ enum MyDXGIFormat : uint32_t
     DXGI_FORMAT_ASTC_12X12_UNORM = 186,
     DXGI_FORMAT_ASTC_12X12_UNORM_SRGB = 187,
     DXGI_FORMAT_ASTC_12X12_HDR = 188,
-    
+
     // These are fabricated by kram.  See here for RFI on formats
     // and extensibility on DDS format.  Use at own risk.
     // Set to DXGI_FORMAT_UNKNOWN if don't like this hack.
@@ -393,7 +392,7 @@ enum GLFormatBase {
     GL_RG = 0x8227,
     GL_RGB = 0x1907,
     GL_RGBA = 0x1908,
-    GL_SRGB = 0x8C40,  // only for BC1
+    GL_SRGB = 0x8C40, // only for BC1
     GL_SRGB_ALPHA = 0x8C42,
 };
 
@@ -419,7 +418,7 @@ enum MyVKFormat {
 
     // distinguish HDR from LDR formats
     // Provided by VK_EXT_texture_compression_astc_hdr
-    VK_FORMAT_ASTC_4x4_SFLOAT_BLOCK_EXT = 1000066000,  // large decimal
+    VK_FORMAT_ASTC_4x4_SFLOAT_BLOCK_EXT = 1000066000, // large decimal
     VK_FORMAT_ASTC_5x4_SFLOAT_BLOCK_EXT = 1000066001,
     VK_FORMAT_ASTC_5x5_SFLOAT_BLOCK_EXT = 1000066002,
     VK_FORMAT_ASTC_6x5_SFLOAT_BLOCK_EXT = 1000066003,
@@ -481,18 +480,18 @@ enum MyVKFormat {
 //    VK_FORMAT_ASTC_8x6_UNORM_BLOCK = 169,
 //    VK_FORMAT_ASTC_8x6_SRGB_BLOCK = 170,
 
-//    VK_FORMAT_ASTC_10x5_UNORM_BLOCK = 173,
-//    VK_FORMAT_ASTC_10x5_SRGB_BLOCK = 174,
-//    VK_FORMAT_ASTC_10x6_UNORM_BLOCK = 175,
-//    VK_FORMAT_ASTC_10x6_SRGB_BLOCK = 176,
-//    VK_FORMAT_ASTC_10x8_UNORM_BLOCK = 177,
-//    VK_FORMAT_ASTC_10x8_SRGB_BLOCK = 178,
-//    VK_FORMAT_ASTC_10x10_UNORM_BLOCK = 179,
-//    VK_FORMAT_ASTC_10x10_SRGB_BLOCK = 180,
-//    VK_FORMAT_ASTC_12x10_UNORM_BLOCK = 181,
-//    VK_FORMAT_ASTC_12x10_SRGB_BLOCK = 182,
-//    VK_FORMAT_ASTC_12x12_UNORM_BLOCK = 183,
-//    VK_FORMAT_ASTC_12x12_SRGB_BLOCK = 184,
+// VK_FORMAT_ASTC_10x5_UNORM_BLOCK = 173,
+// VK_FORMAT_ASTC_10x5_SRGB_BLOCK = 174,
+// VK_FORMAT_ASTC_10x6_UNORM_BLOCK = 175,
+// VK_FORMAT_ASTC_10x6_SRGB_BLOCK = 176,
+// VK_FORMAT_ASTC_10x8_UNORM_BLOCK = 177,
+// VK_FORMAT_ASTC_10x8_SRGB_BLOCK = 178,
+// VK_FORMAT_ASTC_10x10_UNORM_BLOCK = 179,
+// VK_FORMAT_ASTC_10x10_SRGB_BLOCK = 180,
+// VK_FORMAT_ASTC_12x10_UNORM_BLOCK = 181,
+// VK_FORMAT_ASTC_12x10_SRGB_BLOCK = 182,
+// VK_FORMAT_ASTC_12x12_UNORM_BLOCK = 183,
+// VK_FORMAT_ASTC_12x12_SRGB_BLOCK = 184,
 
 #if SUPPORT_RGB
     // import only
@@ -553,9 +552,9 @@ class KTXFormatInfo {
         constexpr uint32_t glNameStart = sizeof("GL_") - 1;
         constexpr uint32_t dxNameStart = sizeof("DXGI_FORMAT_") - 1;
         constexpr uint32_t metalNameStart = sizeof("MyMTLPixelFormat") - 1;
-        
+
         formatName = formatName_;
-        
+
         // skip the redunant part
         metalName = metalName_ + metalNameStart;
         vulkanName = vulkanName_ + vulkanNameStart;
@@ -638,27 +637,27 @@ static bool initFormatsIfNeeded()
     if (gFormatTable) {
         return true;
     }
-    
+
     mylock lock(gFormatTableMutex);
-    
+
     if (gFormatTable) {
         return true;
     }
-    
+
     mymap* formatTable = new unordered_map<uint32_t /*MyMTLPixelFormat*/, KTXFormatInfo>();
 
 // the following table could be included multiple ties to build switch statements, but instead use a hashmap
 #define KTX_FORMAT(fmt, metalType, vulkanType, directxType, glType, glBase, x, y, blockSize, numChannels, flags) \
-    (*formatTable)[(uint32_t)metalType] = KTXFormatInfo(             \
-        #fmt, #metalType, #vulkanType, #directxType, #glType,        \
-        metalType, vulkanType, directxType, glType, glBase,          \
+    (*formatTable)[(uint32_t)metalType] = KTXFormatInfo(                                                         \
+        #fmt, #metalType, #vulkanType, #directxType, #glType,                                                    \
+        metalType, vulkanType, directxType, glType, glBase,                                                      \
         x, y, blockSize, numChannels, (flags));
 
     KTX_FORMAT(Invalid, MyMTLPixelFormatInvalid, VK_FORMAT_UNDEFINED, DXGI_FORMAT_UNKNOWN, GL_UNKNOWN, GL_RGBA, 1, 1, 0, 0, 0)
 
     // BC
     KTX_FORMAT(BC1, MyMTLPixelFormatBC1_RGBA, VK_FORMAT_BC1_RGB_UNORM_BLOCK, DXGI_FORMAT_BC1_UNORM, GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, 4, 4, 8, 3, FLAG_ENC_BC)
-    KTX_FORMAT(BC1s, MyMTLPixelFormatBC1_RGBA_sRGB, VK_FORMAT_BC1_RGB_SRGB_BLOCK, DXGI_FORMAT_BC1_UNORM_SRGB,GL_COMPRESSED_SRGB_S3TC_DXT1_EXT, GL_SRGB, 4, 4, 8, 3, FLAG_ENC_BC | FLAG_SRGB)
+    KTX_FORMAT(BC1s, MyMTLPixelFormatBC1_RGBA_sRGB, VK_FORMAT_BC1_RGB_SRGB_BLOCK, DXGI_FORMAT_BC1_UNORM_SRGB, GL_COMPRESSED_SRGB_S3TC_DXT1_EXT, GL_SRGB, 4, 4, 8, 3, FLAG_ENC_BC | FLAG_SRGB)
 
     KTX_FORMAT(BC3, MyMTLPixelFormatBC3_RGBA, VK_FORMAT_BC3_UNORM_BLOCK, DXGI_FORMAT_BC3_UNORM, GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, 4, 4, 16, 4, FLAG_ENC_BC)
     KTX_FORMAT(BC3s, MyMTLPixelFormatBC3_RGBA_sRGB, VK_FORMAT_BC3_SRGB_BLOCK, DXGI_FORMAT_BC3_UNORM_SRGB, GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_SRGB_ALPHA, 4, 4, 16, 4, FLAG_ENC_BC | FLAG_SRGB)
@@ -691,23 +690,23 @@ static bool initFormatsIfNeeded()
     // ASTC
     KTX_FORMAT(ASTC4x4, MyMTLPixelFormatASTC_4x4_LDR, VK_FORMAT_ASTC_4x4_UNORM_BLOCK, DXGI_FORMAT_ASTC_4X4_UNORM, GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_RGBA, 4, 4, 16, 4, FLAG_ENC_ASTC)
     KTX_FORMAT(ASTC4x4s, MyMTLPixelFormatASTC_4x4_sRGB, VK_FORMAT_ASTC_4x4_SRGB_BLOCK, DXGI_FORMAT_ASTC_4X4_UNORM_SRGB, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR, GL_SRGB_ALPHA, 4, 4, 16, 4, FLAG_ENC_ASTC | FLAG_SRGB)
-    KTX_FORMAT(ASTC4x4h, MyMTLPixelFormatASTC_4x4_HDR, VK_FORMAT_ASTC_4x4_SFLOAT_BLOCK_EXT, DXGI_FORMAT_UNKNOWN, GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_RGBA, 4, 4, 16, 4, FLAG_ENC_ASTC | FLAG_16F)  // gl type same as LDR
+    KTX_FORMAT(ASTC4x4h, MyMTLPixelFormatASTC_4x4_HDR, VK_FORMAT_ASTC_4x4_SFLOAT_BLOCK_EXT, DXGI_FORMAT_UNKNOWN, GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_RGBA, 4, 4, 16, 4, FLAG_ENC_ASTC | FLAG_16F) // gl type same as LDR
 
     KTX_FORMAT(ASTC5x5, MyMTLPixelFormatASTC_5x5_LDR, VK_FORMAT_ASTC_5x5_UNORM_BLOCK, DXGI_FORMAT_ASTC_5X5_UNORM, GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_RGBA, 5, 5, 16, 4, FLAG_ENC_ASTC)
     KTX_FORMAT(ASTC5x5s, MyMTLPixelFormatASTC_5x5_sRGB, VK_FORMAT_ASTC_5x5_SRGB_BLOCK, DXGI_FORMAT_ASTC_5X5_UNORM_SRGB, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR, GL_SRGB_ALPHA, 5, 5, 16, 4, FLAG_ENC_ASTC | FLAG_SRGB)
-    KTX_FORMAT(ASTC5x5h, MyMTLPixelFormatASTC_5x5_HDR, VK_FORMAT_ASTC_5x5_SFLOAT_BLOCK_EXT, DXGI_FORMAT_ASTC_5X5_HDR, GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_RGBA, 5, 5, 16, 4, FLAG_ENC_ASTC | FLAG_16F)  // gl type same as LDR
+    KTX_FORMAT(ASTC5x5h, MyMTLPixelFormatASTC_5x5_HDR, VK_FORMAT_ASTC_5x5_SFLOAT_BLOCK_EXT, DXGI_FORMAT_ASTC_5X5_HDR, GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_RGBA, 5, 5, 16, 4, FLAG_ENC_ASTC | FLAG_16F) // gl type same as LDR
 
     KTX_FORMAT(ASTC6x6, MyMTLPixelFormatASTC_6x6_LDR, VK_FORMAT_ASTC_6x6_UNORM_BLOCK, DXGI_FORMAT_ASTC_6X6_UNORM, GL_COMPRESSED_RGBA_ASTC_6x6_KHR, GL_RGBA, 6, 6, 16, 4, FLAG_ENC_ASTC)
     KTX_FORMAT(ASTC6x6s, MyMTLPixelFormatASTC_6x6_sRGB, VK_FORMAT_ASTC_6x6_SRGB_BLOCK, DXGI_FORMAT_ASTC_6X6_UNORM_SRGB, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR, GL_SRGB_ALPHA, 6, 6, 16, 4, FLAG_ENC_ASTC | FLAG_SRGB)
-    KTX_FORMAT(ASTC6x6h, MyMTLPixelFormatASTC_6x6_HDR, VK_FORMAT_ASTC_6x6_SFLOAT_BLOCK_EXT, DXGI_FORMAT_ASTC_6X6_HDR, GL_COMPRESSED_RGBA_ASTC_6x6_KHR, GL_RGBA, 6, 6, 16, 4, FLAG_ENC_ASTC | FLAG_16F)  // gl type same as LDR
+    KTX_FORMAT(ASTC6x6h, MyMTLPixelFormatASTC_6x6_HDR, VK_FORMAT_ASTC_6x6_SFLOAT_BLOCK_EXT, DXGI_FORMAT_ASTC_6X6_HDR, GL_COMPRESSED_RGBA_ASTC_6x6_KHR, GL_RGBA, 6, 6, 16, 4, FLAG_ENC_ASTC | FLAG_16F) // gl type same as LDR
 
     KTX_FORMAT(ASTC8x8, MyMTLPixelFormatASTC_8x8_LDR, VK_FORMAT_ASTC_8x8_UNORM_BLOCK, DXGI_FORMAT_ASTC_8X8_UNORM, GL_COMPRESSED_RGBA_ASTC_8x8_KHR, GL_RGBA, 8, 8, 16, 4, FLAG_ENC_ASTC)
     KTX_FORMAT(ASTC8x8s, MyMTLPixelFormatASTC_8x8_sRGB, VK_FORMAT_ASTC_8x8_SRGB_BLOCK, DXGI_FORMAT_ASTC_8X8_UNORM_SRGB, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR, GL_SRGB_ALPHA, 8, 8, 16, 4, FLAG_ENC_ASTC | FLAG_SRGB)
-    KTX_FORMAT(ASTC8x8h, MyMTLPixelFormatASTC_8x8_HDR, VK_FORMAT_ASTC_8x8_SFLOAT_BLOCK_EXT, DXGI_FORMAT_ASTC_8X8_HDR, GL_COMPRESSED_RGBA_ASTC_8x8_KHR, GL_RGBA, 8, 8, 16, 4, FLAG_ENC_ASTC | FLAG_16F)  // gl type same as LDR
+    KTX_FORMAT(ASTC8x8h, MyMTLPixelFormatASTC_8x8_HDR, VK_FORMAT_ASTC_8x8_SFLOAT_BLOCK_EXT, DXGI_FORMAT_ASTC_8X8_HDR, GL_COMPRESSED_RGBA_ASTC_8x8_KHR, GL_RGBA, 8, 8, 16, 4, FLAG_ENC_ASTC | FLAG_16F) // gl type same as LDR
 
     // Explicit
     KTX_FORMAT(EXPr8, MyMTLPixelFormatR8Unorm, VK_FORMAT_R8_UNORM, DXGI_FORMAT_R8_UNORM, GL_R8, GL_RED, 1, 1, 1, 1, 0)
-    KTX_FORMAT(EXPrg8, MyMTLPixelFormatRG8Unorm,  VK_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8_UNORM,GL_RG8, GL_RG, 1, 1, 2, 2, 0)
+    KTX_FORMAT(EXPrg8, MyMTLPixelFormatRG8Unorm, VK_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8_UNORM, GL_RG8, GL_RG, 1, 1, 2, 2, 0)
     KTX_FORMAT(EXPrgba8, MyMTLPixelFormatRGBA8Unorm, VK_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_R8G8B8A8_UNORM, GL_RGBA8, GL_RGBA, 1, 1, 4, 4, 0)
     KTX_FORMAT(EXPsrgba8, MyMTLPixelFormatRGBA8Unorm_sRGB, VK_FORMAT_R8G8B8A8_SRGB, DXGI_FORMAT_R8G8B8A8_UNORM_SRGB, GL_SRGB8_ALPHA8, GL_SRGB_ALPHA, 1, 1, 4, 4, FLAG_SRGB)
 
@@ -729,7 +728,7 @@ static bool initFormatsIfNeeded()
 #endif
 
     gFormatTable = formatTable;
-    
+
     return true;
 }
 
@@ -867,7 +866,6 @@ uint32_t directxType(MyMTLPixelFormat format)
     return it.directxType;
 }
 
-
 const char* vulkanTypeName(MyMTLPixelFormat format)
 {
     const auto& it = formatInfo(format);
@@ -897,7 +895,7 @@ MyMTLPixelFormat glToMetalFormat(uint32_t format)
     if (format == 0) {
         return MyMTLPixelFormatInvalid;
     }
-    
+
     initFormatsIfNeeded();
 
     for (const auto& it : *gFormatTable) {
@@ -916,7 +914,7 @@ MyMTLPixelFormat vulkanToMetalFormat(uint32_t format)
     if (format == 0) {
         return MyMTLPixelFormatInvalid;
     }
-    
+
     initFormatsIfNeeded();
 
     for (const auto& it : *gFormatTable) {
@@ -935,7 +933,7 @@ MyMTLPixelFormat directxToMetalFormat(uint32_t format)
     if (format == 0) {
         return MyMTLPixelFormatInvalid;
     }
-    
+
     initFormatsIfNeeded();
 
     for (const auto& it : *gFormatTable) {
@@ -1185,22 +1183,24 @@ MyMTLTextureType KTXHeader::metalTextureType() const
 
 //---------------------------------------------------
 
-inline bool isKTX2File(const uint8_t* data, size_t dataSize) {
+inline bool isKTX2File(const uint8_t* data, size_t dataSize)
+{
     if (dataSize < sizeof(kKTX2Identifier)) {
         return false;
     }
-    
+
     if (memcmp(data, kKTX2Identifier, sizeof(kKTX2Identifier)) != 0) {
         return false;
     }
     return true;
 }
 
-inline bool isKTX1File(const uint8_t* data, size_t dataSize) {
+inline bool isKTX1File(const uint8_t* data, size_t dataSize)
+{
     if (dataSize < sizeof(kKTXIdentifier)) {
         return false;
     }
-    
+
     if (memcmp(data, kKTXIdentifier, sizeof(kKTXIdentifier)) != 0) {
         return false;
     }
@@ -1289,9 +1289,8 @@ void KTXImage::initProps(const uint8_t* propsData, size_t propDataSize)
             //LOGD("KTXImage", "KTXProp '%s': %s\n", keyStart, valueStart);
             auto propPair = STL_NAMESPACE::make_pair(
                 string((const char*)keyStart),
-                string((const char*)valueStart)
-            );
-                          
+                string((const char*)valueStart));
+
             props.emplace_back(propPair);
 
             // pad to 4 byte alignment
@@ -1311,8 +1310,7 @@ void KTXImage::addProp(const char* name, const char* value)
     }
     auto propPair = STL_NAMESPACE::make_pair(
         string(name),
-        string(value)
-    );
+        string(value));
     props.emplace_back(propPair);
 }
 
@@ -1440,7 +1438,7 @@ void KTXImage::initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxS
 
     bool needsDownsample = (numSkippedMips > 0) || (w > mipMaxSize || h > mipMaxSize);
 
-    int32_t maxMipLevels = 16;  // 64K x 64K
+    int32_t maxMipLevels = 16; // 64K x 64K
 
     // can still downsample src multiple times even with only 1 level exported
     if ((!doMipmaps) && needsDownsample) {
@@ -1821,8 +1819,8 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength, bool i
 
             // uncompressed level
             auto& level1 = mipLevels[i];
-            level1.lengthCompressed = level2.lengthCompressed;      // need this for copyLevel to have enough data
-            uint8_t* dstData = (uint8_t*)fileData + level1.offset;  // can const_cast, since class owns data
+            level1.lengthCompressed = level2.lengthCompressed; // need this for copyLevel to have enough data
+            uint8_t* dstData = (uint8_t*)fileData + level1.offset; // can const_cast, since class owns data
 
             if (!unpackLevel(i, srcData, dstData)) {
                 return false;
@@ -1880,7 +1878,7 @@ bool KTXImage::unpackLevel(uint32_t mipNumber, const uint8_t* srcData, uint8_t*
 #if USE_LIBCOMPRESSION
                 // TODO: see if this is faster
                 char scratchBuffer[compression_decode_scratch_buffer_size(COMPRESSION_ZLIB)];
-                
+
                 size_t dstDataSizeMiniz = compression_decode_buffer(
                     (uint8_t*)dstData, dstDataSize,
                     (const uint8_t*)srcData, srcDataSize,
@@ -1894,7 +1892,7 @@ bool KTXImage::unpackLevel(uint32_t mipNumber, const uint8_t* srcData, uint8_t*
                     return false;
                 }
 #endif
-              if (dstDataSizeMiniz != dstDataSize) {
+                if (dstDataSizeMiniz != dstDataSize) {
                     KLOGE("kram", "decode mip zlib size not expected");
                     return false;
                 }
@@ -1949,4 +1947,4 @@ void KTXImage::reserveImageData(size_t totalSize)
     fileData = _imageData.data();
 }
 
-}  // namespace kram
+} // namespace kram
diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h
index 95447312..b14890e6 100644
--- a/libkram/kram/KTXImage.h
+++ b/libkram/kram/KTXImage.h
@@ -72,8 +72,8 @@ enum MyMTLPixelFormat {
     MyMTLPixelFormatEAC_RGBA8_sRGB = 179,
 
     // not supporting
-    //    MyMTLPixelFormatETC2_RGB8A1            = 182,
-    //    MyMTLPixelFormatETC2_RGB8A1_sRGB       = 183,
+    // MyMTLPixelFormatETC2_RGB8A1            = 182,
+    // MyMTLPixelFormatETC2_RGB8A1_sRGB       = 183,
 
     // ------
     // Explicit formats
@@ -116,7 +116,7 @@ enum MyMTLPixelFormat {
 
 enum MyMTLTextureType {
     // MyMTLTextureType1D = 0,   // not twiddled or compressed, more like a buffer but with texture limits
-    MyMTLTextureType1DArray = 1,  // not twiddled or compressed, more like a buffer but with texture limits
+    MyMTLTextureType1DArray = 1, // not twiddled or compressed, more like a buffer but with texture limits
     MyMTLTextureType2D = 2,
     MyMTLTextureType2DArray = 3,
     // MyMTLTextureType2DMultisample = 4,
@@ -151,20 +151,20 @@ class KTXHeader {
     };
 
     uint32_t endianness = 0x04030201;
-    uint32_t glType = 0;      // compressed = 0
-    uint32_t glTypeSize = 1;  // doesn't depend on endianness
+    uint32_t glType = 0; // compressed = 0
+    uint32_t glTypeSize = 1; // doesn't depend on endianness
 
     uint32_t glFormat = 0;
-    uint32_t glInternalFormat = 0;      // must be same as glFormat
-    uint32_t glBaseInternalFormat = 0;  // GL_RED, RG, RGB, RGBA, SRGB, SRGBA
+    uint32_t glInternalFormat = 0; // must be same as glFormat
+    uint32_t glBaseInternalFormat = 0; // GL_RED, RG, RGB, RGBA, SRGB, SRGBA
 
     uint32_t pixelWidth = 1;
-    uint32_t pixelHeight = 0;  // >0 for 2d
-    uint32_t pixelDepth = 0;   // >0 for 3d
+    uint32_t pixelHeight = 0; // >0 for 2d
+    uint32_t pixelDepth = 0; // >0 for 3d
 
     uint32_t numberOfArrayElements = 0;
     uint32_t numberOfFaces = 1;
-    uint32_t numberOfMipmapLevels = 1;  // 0 means auto mip
+    uint32_t numberOfMipmapLevels = 1; // 0 means auto mip
 
     uint32_t bytesOfKeyValueData = 0;
 
@@ -201,7 +201,7 @@ class KTX2Header {
         // '«', 'K', 'T', 'X', ' ', '2', '0', '»', '\r', '\n', '\x1A', '\n'
     };
 
-    uint32_t vkFormat = 0;  // invalid format
+    uint32_t vkFormat = 0; // invalid format
     uint32_t typeSize = 1;
 
     uint32_t pixelWidth = 1;
@@ -240,16 +240,16 @@ class KTX2Header {
 // and the offsts include a 4 byte length at the start of each level.
 class KTXImageLevel {
 public:
-    uint64_t offset = 0;            //  differ in ordering - ktx largest first, ktx2 smallest first
-    uint64_t lengthCompressed = 0;  // set to 0 if not compresseds
-    uint64_t length = 0;            // numChunks * mipSize when written for non cube on KTX1 or all KTX2, internally only stores mipSize
+    uint64_t offset = 0; // differ in ordering - ktx largest first, ktx2 smallest first
+    uint64_t lengthCompressed = 0; // set to 0 if not compresseds
+    uint64_t length = 0; // numChunks * mipSize when written for non cube on KTX1 or all KTX2, internally only stores mipSize
 };
 
 enum KTX2Supercompression {
     KTX2SupercompressionNone = 0,
-    KTX2SupercompressionBasisLZ = 1,  // can transcode, but can't gen from KTX file using ktxsc, uses sgdByteLength
-    KTX2SupercompressionZstd = 2,     // faster deflate, ktxsc support
-    KTX2SupercompressionZlib = 3,     // deflate, no ktxsc support (use miniz)
+    KTX2SupercompressionBasisLZ = 1, // can transcode, but can't gen from KTX file using ktxsc, uses sgdByteLength
+    KTX2SupercompressionZstd = 2, // faster deflate, ktxsc support
+    KTX2SupercompressionZlib = 3, // deflate, no ktxsc support (use miniz)
     // TODO: Need LZFSE?
     // TODO: need Kraken for PS4
     // TODO: need Xbox format
@@ -257,7 +257,7 @@ enum KTX2Supercompression {
 
 struct KTX2Compressor {
     KTX2Supercompression compressorType = KTX2SupercompressionNone;
-    float compressorLevel = 0.0f;  // 0.0 is default
+    float compressorLevel = 0.0f; // 0.0 is default
 
     bool isCompressed() const { return compressorType != KTX2SupercompressionNone; }
 };
@@ -311,7 +311,7 @@ class KTXImage {
 
     // determine if image stores rgb * a
     bool isPremul() const;
-    
+
     // can use on ktx1/2 files, does a decompress if needed
     bool unpackLevel(uint32_t mipNumber, const uint8_t* srcData, uint8_t* dstData) const;
 
@@ -324,7 +324,7 @@ class KTXImage {
     uint32_t mipLengthCalc(uint32_t mipNumber) const;
     size_t mipLengthLargest() const { return mipLevels[0].length; }
     size_t mipLength(uint32_t mipNumber) const { return mipLevels[mipNumber].length; }
-    
+
     // level
     size_t levelLength(uint32_t mipNumber) const { return mipLevels[mipNumber].length * totalChunks(); }
     size_t levelLengthCompressed(uint32_t mipNumber) const { return mipLevels[mipNumber].lengthCompressed; }
@@ -335,16 +335,16 @@ class KTXImage {
 
     // trying to bury access to KTX1 header, since this supports KTX2 now
     uint32_t arrayCount() const { return std::max(1u, header.numberOfArrayElements); }
-    uint32_t mipCount() const   { return std::max(1u, header.numberOfMipmapLevels); }
-    uint32_t faceCount() const  { return std::max(1u, header.numberOfFaces); }
-    
+    uint32_t mipCount() const { return std::max(1u, header.numberOfMipmapLevels); }
+    uint32_t faceCount() const { return std::max(1u, header.numberOfFaces); }
+
 private:
     bool openKTX2(const uint8_t* imageData, size_t imageDataLength, bool isInfoOnly);
 
     // ktx2 mips are uncompressed to convert back to ktx1, but without the image offset
     vector<uint8_t> _imageData;
 
-public:  // TODO: bury this
+public: // TODO: bury this
     MyMTLTextureType textureType = MyMTLTextureType2D;
     MyMTLPixelFormat pixelFormat = MyMTLPixelFormatInvalid;
 
@@ -358,16 +358,16 @@ class KTXImage {
     bool skipImageLength = false;
     KTX2Supercompression supercompressionType = KTX2SupercompressionNone;
 
-    KTXHeader header;  // copy of KTXHeader from KTX1, so can be modified and then written back
+    KTXHeader header; // copy of KTXHeader from KTX1, so can be modified and then written back
 
     // write out only string/string props, for easy of viewing
     vector<pair<string, string> > props;
 
-    vector<KTXImageLevel> mipLevels;  // offsets into fileData
+    vector<KTXImageLevel> mipLevels; // offsets into fileData
 
     // this only holds data for mipLevels
     size_t fileDataLength = 0;
-    const uint8_t* fileData = nullptr;  // mmap data
+    const uint8_t* fileData = nullptr; // mmap data
 };
 
 // GL/D3D hobbled non-pow2 mips by only supporting round down, not round up
@@ -435,17 +435,17 @@ const char* formatTypeName(MyMTLPixelFormat format);
 
 // metal
 const char* metalTypeName(MyMTLPixelFormat format);
-uint32_t metalType(MyMTLPixelFormat format);  // really MTLPixelFormat
+uint32_t metalType(MyMTLPixelFormat format); // really MTLPixelFormat
 
 // directx
 const char* directxTypeName(MyMTLPixelFormat format);
-uint32_t directxType(MyMTLPixelFormat format);           // really DXFormat
-MyMTLPixelFormat directxToMetalFormat(uint32_t format);  // really DXFormat
+uint32_t directxType(MyMTLPixelFormat format); // really DXFormat
+MyMTLPixelFormat directxToMetalFormat(uint32_t format); // really DXFormat
 
 // vuklan
 const char* vulkanTypeName(MyMTLPixelFormat format);
-uint32_t vulkanType(MyMTLPixelFormat format);           // really VKFormat
-MyMTLPixelFormat vulkanToMetalFormat(uint32_t format);  // really VKFormat
+uint32_t vulkanType(MyMTLPixelFormat format); // really VKFormat
+MyMTLPixelFormat vulkanToMetalFormat(uint32_t format); // really VKFormat
 
 // gl
 const char* glTypeName(MyMTLPixelFormat format);
@@ -457,4 +457,4 @@ const char* textureTypeName(MyMTLTextureType textureType);
 // find a corresponding srgb/non-srgb format for a given format
 MyMTLPixelFormat toggleSrgbFormat(MyMTLPixelFormat format);
 
-}  // namespace kram
+} // namespace kram
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 9f30777b..56ed89bc 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -18,7 +18,7 @@
 #include "KTXImage.h"
 #include "KramDDSHelper.h"
 #include "KramFileHelper.h"
-#include "KramImage.h"  // has config defines, move them out
+#include "KramImage.h" // has config defines, move them out
 #include "KramMmapHelper.h"
 #include "KramTimer.h"
 #define KRAM_VERSION "1.0"
@@ -44,7 +44,7 @@ void* __cdecl operator new[](size_t size, const char* name, int flags, unsigned
 
 void* operator new[](size_t size, size_t alignment, size_t alignmentOffset, const char* pName, int flags, unsigned debugFlags, const char* file, int line)
 {
-    return new uint8_t[size];  // TODO: honor alignment
+    return new uint8_t[size]; // TODO: honor alignment
 }
 
 #endif
@@ -148,11 +148,11 @@ bool KTXImageData::open(const char* filename, KTXImage& image, bool isInfoOnly_)
     _name = toFilenameShort(filename);
 
     if (isPNGFilename(filename)) {
-       bool success = openPNG(filename, image);
-        
+        bool success = openPNG(filename, image);
+
         if (success)
             fixPixelFormat(image, filename);
-        
+
         return success;
     }
 
@@ -265,10 +265,10 @@ bool KTXImageData::openPNG(const uint8_t* data, size_t dataSize, KTXImage& image
     // This is returned by LoadPng.  Note that many png have this set
     // by default and not controllable by artists.
     bool isSrgb = false;
-    
+
     Image singleImage;
     bool isLoaded = LoadPng(data, dataSize, false, false, isSrgb, singleImage);
-    
+
     // don't need png data anymore
     close();
 
@@ -298,7 +298,7 @@ bool KTXImageData::openPNG(const uint8_t* data, size_t dataSize, KTXImage& image
 
     // TODO: png has 16u format useful for heights
 
-    image.initMipLevels(sizeof(KTXHeader));  // TODO: could also make this ktx2 with zstd compress
+    image.initMipLevels(sizeof(KTXHeader)); // TODO: could also make this ktx2 with zstd compress
     image.reserveImageData();
     memcpy((uint8_t*)image.fileData, &image.header, sizeof(KTXHeader));
 
@@ -314,7 +314,7 @@ bool KTXImageData::open(const uint8_t* data, size_t dataSize, KTXImage& image, b
 
     if (isPNGFile(data, dataSize)) {
         // data stored in image
-        return openPNG(data, dataSize, image);  // TODO: pass isInfoOnly
+        return openPNG(data, dataSize, image); // TODO: pass isInfoOnly
     }
     else if (isDDSFile(data, dataSize)) {
         // converts dds to ktx, data stored in image
@@ -385,13 +385,13 @@ class MortonOrder {
 // https://en.wikipedia.org/wiki/Grayscale
 inline Color toGrayscaleRec709(Color c, const Mipper& mipper)
 {
-    const float4 kRec709Conversion = float4m(0.2126f, 0.7152f, 0.0722f, 0.0f);  // really a float3
+    const float4 kRec709Conversion = float4m(0.2126f, 0.7152f, 0.0722f, 0.0f); // really a float3
 
     // convert to linear, do luminance, then back to srgb primary
 
     float4 clin = mipper.toLinear(c);
     float luminance = dot(clin, kRec709Conversion);
-    luminance = std::min(luminance, 1.0f);  // to avoid assert if math goes above 1.0
+    luminance = std::min(luminance, 1.0f); // to avoid assert if math goes above 1.0
 
     c.r = (uint8_t)(roundf(linearToSRGBFunc(luminance) * 255.0f));
 
@@ -424,21 +424,21 @@ unsigned LodepngDecompressUsingMiniz(
 {
     // mz_ulong doesn't line up with size_t on Windows, but does on macOS
     KASSERT(*dstDataSize != 0);
-    
+
 #if USE_LIBCOMPRESSION
-    // this returns 121 dstSize instead of 16448 on 126 srcSize.  
+    // this returns 121 dstSize instead of 16448 on 126 srcSize.
     // Open src dir to see this.  Have to advance by 2 to fix this.
     if (srcDataSize <= 2) {
         return MZ_DATA_ERROR;
     }
-    
+
     char scratchBuffer[compression_decode_scratch_buffer_size(COMPRESSION_ZLIB)];
     size_t bytesDecoded = compression_decode_buffer(
-         (uint8_t*)*dstData, *dstDataSize,
-         (const uint8_t*)srcData + 2, srcDataSize - 2,
-        scratchBuffer, 
-         COMPRESSION_ZLIB);
-    
+        (uint8_t*)*dstData, *dstDataSize,
+        (const uint8_t*)srcData + 2, srcDataSize - 2,
+        scratchBuffer,
+        COMPRESSION_ZLIB);
+
     int result = MZ_OK;
     if (bytesDecoded != *dstDataSize) {
         result = MZ_DATA_ERROR;
@@ -449,7 +449,7 @@ unsigned LodepngDecompressUsingMiniz(
     mz_ulong bytesDecoded = *dstDataSize;
     int result = mz_uncompress(*dstData, &bytesDecoded,
                                srcData, srcDataSize);
-    
+
     if (result != MZ_OK || bytesDecoded != *dstDataSize) {
         *dstDataSize = 0;
     }
@@ -469,19 +469,18 @@ unsigned LodepngCompressUsingMiniz(
 {
     // TODO: no setting for compression level in settings?
     // TODO: libCompression can only encode zlib to quality 5
-    
+
     // mz_ulong doesn't line up with size_t on Windows, but does on macOS
     mz_ulong dstDataSizeUL = *dstDataSize;
 
     int result = mz_compress2(*dstData, &dstDataSizeUL,
-                               srcData, srcDataSize, MZ_DEFAULT_COMPRESSION);
+                              srcData, srcDataSize, MZ_DEFAULT_COMPRESSION);
 
     *dstDataSize = dstDataSizeUL;
-    
+
     return result;
 }
 
-
 //-----------------------
 
 // TODO: fix this to identify srgb, otherwise will skip GAMA block
@@ -489,33 +488,34 @@ unsigned LodepngCompressUsingMiniz(
 // have code for this.˜
 static const bool doParseIccProfile = false;
 
-struct IccProfileTag
-{
+struct IccProfileTag {
     uint32_t type, offset, size;
 };
 
-static void swapEndianUint32(uint32_t& x) {
-    x =  ((x << 24) & 0xff000000 ) |
-         ((x <<  8) & 0x00ff0000 ) |
-         ((x >>  8) & 0x0000ff00 ) |
-         ((x >> 24) & 0x000000ff );
+static void swapEndianUint32(uint32_t& x)
+{
+    x = ((x << 24) & 0xff000000) |
+        ((x << 8) & 0x00ff0000) |
+        ((x >> 8) & 0x0000ff00) |
+        ((x >> 24) & 0x000000ff);
 }
 
 // https://github.com/lvandeve/lodepng/blob/master/pngdetail.cpp
-static int getICCInt32(const unsigned char* icc, size_t size, size_t pos) {
-  if (pos + 4 > size) return 0;
-    
-  // this is just swapEndianUint32 in byte form
-  return (int)((icc[pos] << 24) | (icc[pos + 1] << 16) | (icc[pos + 2] << 8) | (icc[pos + 3] << 0));
+static int getICCInt32(const unsigned char* icc, size_t size, size_t pos)
+{
+    if (pos + 4 > size) return 0;
+
+    // this is just swapEndianUint32 in byte form
+    return (int)((icc[pos] << 24) | (icc[pos + 1] << 16) | (icc[pos + 2] << 8) | (icc[pos + 3] << 0));
 }
 
-static float getICC15Fixed16(const unsigned char* icc, size_t size, size_t pos) {
-  return getICCInt32(icc, size, pos) / 65536.0;
+static float getICC15Fixed16(const unsigned char* icc, size_t size, size_t pos)
+{
+    return getICCInt32(icc, size, pos) / 65536.0;
 }
 
 // this is all big-endian, so needs swapped, 132 bytes total
-struct IccProfileHeader
-{
+struct IccProfileHeader {
     uint32_t size; // 0
     uint32_t cmmType; // 4 - 'appl'
     uint32_t version; // 8
@@ -536,29 +536,28 @@ struct IccProfileHeader
     uint32_t padding[7]; // 100
     uint32_t numTags; // 128
 };
-static_assert( sizeof(IccProfileHeader) == 132, "invalid IccProfileHeader");
-
-#define MAKEFOURCC(str)                                                       \
-    ((uint32_t)(uint8_t)(str[0]) | ((uint32_t)(uint8_t)(str[1]) << 8) |       \
-    ((uint32_t)(uint8_t)(str[2]) << 16) | ((uint32_t)(uint8_t)(str[3]) << 24 ))
+static_assert(sizeof(IccProfileHeader) == 132, "invalid IccProfileHeader");
 
+#define MAKEFOURCC(str)                                                 \
+    ((uint32_t)(uint8_t)(str[0]) | ((uint32_t)(uint8_t)(str[1]) << 8) | \
+     ((uint32_t)(uint8_t)(str[2]) << 16) | ((uint32_t)(uint8_t)(str[3]) << 24))
 
 // this must be run after deflate if profile is compressed
 bool parseIccProfile(const uint8_t* data, uint32_t dataSize, bool& isSrgb)
 {
     isSrgb = false;
-    
+
     // should look at other blocks if this is false
     if (dataSize < sizeof(IccProfileHeader)) {
         return false;
     }
-    
+
     // copy header so can endianSwap it
     IccProfileHeader header = *(const IccProfileHeader*)data;
     // convert big to little endian
     swapEndianUint32(header.size);
     swapEndianUint32(header.numTags);
-    
+
     if (header.signature != MAKEFOURCC("acsp")) {
         return false;
     }
@@ -567,20 +566,20 @@ bool parseIccProfile(const uint8_t* data, uint32_t dataSize, bool& isSrgb)
         isSrgb = true;
         return true;
     }
-    
+
     IccProfileTag* tags = (IccProfileTag*)(data + sizeof(IccProfileHeader));
 
     for (uint32_t i = 0; i < header.numTags; ++i) {
         IccProfileTag tag = tags[i];
         swapEndianUint32(tag.offset);
         swapEndianUint32(tag.size);
-        
+
         // There's also tag.name which is 'wtpt' and others.
         // Open a .icc profile to see all these names
-        
+
         uint32_t datatype = *(const uint32_t*)(data + tag.offset);
-        
-        switch(datatype) {
+
+        switch (datatype) {
             case MAKEFOURCC("XYZ "): {
                 if (tag.type == MAKEFOURCC("wtpt")) {
                     float x = getICC15Fixed16(data, dataSize, tag.offset + 8);
@@ -620,14 +619,14 @@ bool parseIccProfile(const uint8_t* data, uint32_t dataSize, bool& isSrgb)
             case MAKEFOURCC("sf32"):
                 // chad - chromatic adaptation matrix
                 break;
-                
+
             case MAKEFOURCC("mAB "):
                 // A2B0, A2B1 - Intent-0/1, device to PCS table
             case MAKEFOURCC("mBA "):
                 // B2A0, B2A1 - Intent-0/1, PCS to device table
             case MAKEFOURCC("sig "):
                 // rig0
-                
+
             case MAKEFOURCC("text"):
             case MAKEFOURCC("mluc"):
                 // muti-localizaed description strings
@@ -638,11 +637,12 @@ bool parseIccProfile(const uint8_t* data, uint32_t dataSize, bool& isSrgb)
                 break;
         }
     }
-    
+
     return true;
 }
-   
-bool isIccProfileSrgb(const uint8_t* data, uint32_t dataSize) {
+
+bool isIccProfileSrgb(const uint8_t* data, uint32_t dataSize)
+{
     bool isSrgb = false;
     parseIccProfile(data, dataSize, isSrgb);
     return isSrgb;
@@ -673,12 +673,12 @@ bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray
     }
 
     isSrgb = false;
-   
+
     // Stop at the idat, or if not present the end of the file
     const uint8_t* end = lodepng_chunk_find_const(data, data + dataSize, "IDAT");
     if (!end)
         end = data + dataSize;
-    
+
     bool hasNonSrgbBlocks = false;
     bool hasSrgbBlock = false;
     {
@@ -687,46 +687,44 @@ bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray
             lodepng_chunk_find_const(data, end, "iCCP") != nullptr ||
             lodepng_chunk_find_const(data, end, "gAMA") != nullptr ||
             lodepng_chunk_find_const(data, end, "cHRM") != nullptr;
-        
+
         // Apps like Figma always set this
         hasSrgbBlock = lodepng_chunk_find_const(data, end, "sRGB") != nullptr;
     }
-    
+
     const uint8_t* chunkData = lodepng_chunk_find_const(data, end, "sRGB");
     if (chunkData) {
-        lodepng_inspect_chunk(&state, chunkData - data, data, end-data);
+        lodepng_inspect_chunk(&state, chunkData - data, data, end - data);
         isSrgb = state.info_png.srgb_defined;
         //state.info_png.srgb_intent; // 0-3
     }
-    
+
     if (doParseIccProfile && !chunkData) {
         chunkData = lodepng_chunk_find_const(data, end, "iCCP");
         if (chunkData) {
-            lodepng_inspect_chunk(&state, chunkData - data, data, end-data);
+            lodepng_inspect_chunk(&state, chunkData - data, data, end - data);
             if (state.info_png.iccp_defined) {
                 if (!isSrgb)
                     isSrgb = isIccProfileSrgb(state.info_png.iccp_profile, state.info_png.iccp_profile_size);
             }
-                
         }
     }
-    
+
     if (!chunkData) {
         chunkData = lodepng_chunk_find_const(data, end, "gAMA");
         if (chunkData) {
-            lodepng_inspect_chunk(&state, chunkData - data, data, end-data);
+            lodepng_inspect_chunk(&state, chunkData - data, data, end - data);
             if (state.info_png.gama_defined) {
                 if (!isSrgb)
                     isSrgb = state.info_png.gama_gamma == 45455; // 1/2.2 x 100000
             }
-                
         }
     }
-    
+
     if (!chunkData) {
         chunkData = lodepng_chunk_find_const(data, end, "cHRM");
         if (chunkData) {
-            lodepng_inspect_chunk(&state, chunkData - data, data, end-data);
+            lodepng_inspect_chunk(&state, chunkData - data, data, end - data);
             if (state.info_png.chrm_defined) {
                 if (!isSrgb)
                     isSrgb =
@@ -741,7 +739,7 @@ bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray
             }
         }
     }
-    
+
     // because Apple finder thumbnails can't be overridden with custom thumbanailer
     // and defaults to white bkgd (making white icons impossible to see).
     // track the bkgd block, and set/re-define as all black.  Maybe will honor that.
@@ -749,7 +747,7 @@ bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray
     bool hasBlackBackground = false;
     chunkData = lodepng_chunk_find_const(data, data + dataSize, "bKGD");
     if (chunkData) {
-        lodepng_inspect_chunk(&state, chunkData - data, data, end-data);
+        lodepng_inspect_chunk(&state, chunkData - data, data, end - data);
         if (state.info_png.background_defined) {
             hasBackground = true;
             hasBlackBackground =
@@ -758,7 +756,7 @@ bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray
                 state.info_png.background_b == 0;
         }
     }
-    
+
     // don't convert png bit depths, but can convert pallete data
     //    if (state.info_png.color.bitdepth != 8) {
     //        return false;
@@ -775,7 +773,7 @@ bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray
         case LCT_MAX_OCTET_VALUE:
         case LCT_RGB:
         case LCT_RGBA:
-        case LCT_PALETTE:  // ?
+        case LCT_PALETTE: // ?
             hasColor = true;
             break;
     }
@@ -788,12 +786,11 @@ bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray
         case LCT_MAX_OCTET_VALUE:
         case LCT_RGBA:
         case LCT_GREY_ALPHA:
-        case LCT_PALETTE:  // ?
+        case LCT_PALETTE: // ?
             hasAlpha = true;
             break;
     }
-    
-    
+
     // this inserts onto end of array, it doesn't resize
     vector<uint8_t> pixelsPNG;
     pixelsPNG.clear();
@@ -834,7 +831,7 @@ bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray
 
     sourceImage.setSrgbState(isSrgb, hasSrgbBlock, hasNonSrgbBlocks);
     sourceImage.setBackgroundState(hasBlackBackground);
-    
+
     return sourceImage.loadImageFromPixels(pixels, width, height, hasColor, hasAlpha);
 }
 
@@ -851,7 +848,7 @@ bool SavePNG(Image& image, const char* filename)
     // Then if srgb, see if that matches content type srgb state below.
     TexContentType contentType = findContentTypeFromFilename(filename);
     bool isSrgb = contentType == TexContentTypeAlbedo;
-    
+
     // Skip file if it has srgb block, and none of the other block types.
     // This code will also strip the sRGB block from apps like Figma that always set it.
     if (image.hasBlackBackground()) {
@@ -870,7 +867,7 @@ bool SavePNG(Image& image, const char* filename)
         state.info_png.srgb_defined = 1;
         state.info_png.srgb_intent = 0;
     }
-    
+
     // always redefine background to black, so Finder thumbnails are not white
     // this makes viewing any white icons nearly impossible.  Make suer lodepng
     // ignores this background on import, want the stored pixels not ones composited.
@@ -880,13 +877,13 @@ bool SavePNG(Image& image, const char* filename)
     state.info_png.background_r = 0;
     state.info_png.background_g = 0;
     state.info_png.background_b = 0;
-    
+
     // TODO: could write other data into Txt block
     // or try to preserve those
-    
+
     // TODO: image converted to 32-bit, so will save out large ?
     // Can we write out L, LA, RGB, RGBA based on image state?
- 
+
     // use miniz as the encoder
     auto& settings = lodepng_default_compress_settings;
     if (useMiniZ)
@@ -895,25 +892,25 @@ bool SavePNG(Image& image, const char* filename)
     // encode to png
     vector<unsigned char> outputData;
     unsigned error = lodepng::encode(outputData, (const uint8_t*)(image.pixels().data()), image.width(), image.height(), state);
-    
+
     if (error) {
         return false;
     }
-    
+
     FileHelper fileHelper;
     if (!fileHelper.open(filename, "wb+")) {
         return false;
     }
-    
+
     // this is overrwriting the source file currently
     // TODO: could use tmp file, and then replace existing
     // this could destroy original png on failure otherwise
     if (!fileHelper.write((const uint8_t*)outputData.data(), outputData.size())) {
         return false;
     }
-    
+
     KLOGI("Kram", "saved %s %s sRGB block", filename, isSrgb ? "with" : "without");
-    
+
     return true;
 }
 
@@ -985,12 +982,12 @@ bool SetupSourceImage(const string& srcFilename, Image& sourceImage,
     if (isPNG) {
         bool isSrgb = false;
         if (!LoadPng(data, dataSize, isPremulSrgb, isGray, isSrgb, sourceImage)) {
-            return false;  // error
+            return false; // error
         }
     }
     else {
         if (!LoadKtx(data, dataSize, sourceImage)) {
-            return false;  // error
+            return false; // error
         }
     }
 
@@ -1161,7 +1158,7 @@ static const char* formatFormat(MyMTLPixelFormat format)
             break;
 
         default:
-            assert(false);  // unknown format
+            assert(false); // unknown format
             break;
     }
 
@@ -1271,7 +1268,7 @@ string formatInputAndOutput(int32_t testNumber, const char* srcFilename, MyMTLPi
     assert(extSeparatorStr != nullptr);
     size_t extSeparator = extSeparatorStr - dst.c_str();
     dst.erase(extSeparator);
-    dst.append(".ktx");  // TODO: test ktx2 too
+    dst.append(".ktx"); // TODO: test ktx2 too
 
     cmd += dst;
 
@@ -1292,9 +1289,9 @@ bool kramTestCommand(int32_t testNumber,
     //#define SwizzleA " -swizzle 000r"
     //#define SwizzleLA " -swizzle rrrg"
 
-#define ASTCSwizzle2nm " -swizzle gggr"  // store as L+A, decode to snorm with .ag * 2 - 1
-#define ASTCSwizzleL1 " -swizzle rrr1"   // store as L
-#define ASTCSwizzle2 " -swizzle gggr"    // store as L+A, decode to snorm with .ag
+#define ASTCSwizzle2nm " -swizzle gggr" // store as L+A, decode to snorm with .ag * 2 - 1
+#define ASTCSwizzleL1 " -swizzle rrr1" // store as L
+#define ASTCSwizzle2 " -swizzle gggr" // store as L+A, decode to snorm with .ag
 
     // TODO: these are all run at default quality
     bool isNotPremul = true;
@@ -1467,7 +1464,7 @@ bool kramTestCommand(int32_t testNumber,
             testNumber = 3003;
             encoder = kTexEncoderEtcenc;
             cmd += " -sdf";
-            cmd +=   formatInputAndOutput(testNumber, "flipper-sdf.png", MyMTLPixelFormatEAC_R11Unorm, encoder);
+            cmd += formatInputAndOutput(testNumber, "flipper-sdf.png", MyMTLPixelFormatEAC_R11Unorm, encoder);
 
             break;
 
@@ -1475,7 +1472,7 @@ bool kramTestCommand(int32_t testNumber,
             testNumber = 3004;
             encoder = kTexEncoderATE;
             cmd += " -sdf" ASTCSwizzleL1;
-            cmd +=     formatInputAndOutput(testNumber, "flipper-sdf.png", MyMTLPixelFormatASTC_4x4_LDR, encoder, isNotPremul);
+            cmd += formatInputAndOutput(testNumber, "flipper-sdf.png", MyMTLPixelFormatASTC_4x4_LDR, encoder, isNotPremul);
             break;
 
         default:
@@ -1650,7 +1647,6 @@ void kramFixupUsage(bool showVersion = true)
           showVersion ? usageName : "");
 }
 
-
 void kramInfoUsage(bool showVersion = true)
 {
     KLOGI("Kram",
@@ -1750,22 +1746,22 @@ void kramEncodeUsage(bool showVersion = true)
 
           // can force an encoder when there is overlap
           "\t-encoder squish"
-          "\tbc[1,3,4,5] %s\n"  // can be disabled
+          "\tbc[1,3,4,5] %s\n" // can be disabled
 
           "\t-encoder bcenc"
-          "\tbc[1,3,4,5,7] %s\n"  // can be disabled
+          "\tbc[1,3,4,5,7] %s\n" // can be disabled
 
           "\t-encoder ate"
-          "\tbc[1,4,5,7] %s\n"  // can be disabled
+          "\tbc[1,4,5,7] %s\n" // can be disabled
 
           "\t-encoder ate"
-          "\tastc[4x4,8x8] %s\n"  // can be disabled
+          "\tastc[4x4,8x8] %s\n" // can be disabled
 
           "\t-encoder astcenc"
-          "\tastc[4x4,5x5,6x6,8x8] ldr/hdr support %s\n"  // can be disabled
+          "\tastc[4x4,5x5,6x6,8x8] ldr/hdr support %s\n" // can be disabled
 
           "\t-encoder etcenc"
-          "\tetc2[r,rg,rgb,rgba] %s\n"  // can be disabled
+          "\tetc2[r,rg,rgb,rgba] %s\n" // can be disabled
 
           "\t-encoder explicit"
           "\tr|rg|rgba[8|16f|32f]\n"
@@ -1806,19 +1802,19 @@ void kramEncodeUsage(bool showVersion = true)
           "\tsrc set to linear\n"
           "\t-srcsrgbimage"
           "\tsrc set to png flag (unreliable) or container format\n"
-          
+
           // normals and snorm data
           "\t-signed"
           "\tSigned r or rg for etc/bc formats, astc doesn't have signed format.\n"
           "\t-normal"
           "\tNormal map rg storage signed for etc/bc (rg01), only unsigned astc L+A (gggr).\n"
-          
+
           // sdf
           "\t-sdf"
           "\tGenerate single-channel SDF from a bitmap, can mip and drop large mips. Encode to r8, bc4, etc2r, astc4x4 (Unorm LLL1) to encode\n"
           "\t-sdfThreshold 120"
           "\tSDF generation uses bitmap converted from 8-bit red channel\n"
-          
+
           "\t-gray"
           "\tConvert to grayscale before premul\n"
 
@@ -2081,51 +2077,49 @@ string kramInfoPNGToString(const string& srcFilename, const uint8_t* data, uint6
         KLOGE("Kram", "info couldn't open png file");
         return "";
     }
-    
+
     // TODO: also gama 2.2 block sometimes used in older files
     bool isSrgb = false;
-    
+
     const uint8_t* end = lodepng_chunk_find_const(data, data + dataSize, "IDAT");
     if (!end)
         end = data + dataSize;
-    
+
     const uint8_t* chunkData = lodepng_chunk_find_const(data, end, "sRGB");
     if (chunkData) {
-        lodepng_inspect_chunk(&state, chunkData - data, data, end-data);
+        lodepng_inspect_chunk(&state, chunkData - data, data, end - data);
         isSrgb = state.info_png.srgb_defined;
         //state.info_png.srgb_intent; // 0-3
     }
-    
+
     // Adobe Photoshop 2022 only sets iccp + gama instead of sRGB flag, but iccp takes
     // priority to gama block.
     if (doParseIccProfile && !chunkData) {
         chunkData = lodepng_chunk_find_const(data, end, "iCCP");
         if (chunkData) {
-            lodepng_inspect_chunk(&state, chunkData - data, data, end-data);
+            lodepng_inspect_chunk(&state, chunkData - data, data, end - data);
             if (state.info_png.iccp_defined) {
                 if (!isSrgb)
                     isSrgb = isIccProfileSrgb(state.info_png.iccp_profile, state.info_png.iccp_profile_size);
             }
-                
         }
     }
-    
+
     if (!chunkData) {
         chunkData = lodepng_chunk_find_const(data, end, "gAMA");
         if (chunkData) {
-            lodepng_inspect_chunk(&state, chunkData - data, data, end-data);
+            lodepng_inspect_chunk(&state, chunkData - data, data, end - data);
             if (state.info_png.gama_defined) {
                 if (!isSrgb)
                     isSrgb = state.info_png.gama_gamma == 45455; // 1/2.2 x 100000
             }
-                
         }
     }
-    
+
     if (!chunkData) {
         chunkData = lodepng_chunk_find_const(data, data + dataSize, "cHRM");
         if (chunkData) {
-            lodepng_inspect_chunk(&state, chunkData - data, data, end-data);
+            lodepng_inspect_chunk(&state, chunkData - data, data, end - data);
             if (state.info_png.chrm_defined) {
                 if (!isSrgb)
                     isSrgb =
@@ -2140,7 +2134,7 @@ string kramInfoPNGToString(const string& srcFilename, const uint8_t* data, uint6
             }
         }
     }
-    
+
     // because Apple finder thumbnails can't be overridden with custom thumbanailer
     // and defaults to white bkgd (making white icons impossible to see).
     // track the bkgd block, and set/re-define as all black.  Maybe will honor that.
@@ -2148,7 +2142,7 @@ string kramInfoPNGToString(const string& srcFilename, const uint8_t* data, uint6
     bool hasBlackBackground = false;
     chunkData = lodepng_chunk_find_const(data, data + dataSize, "bKGD");
     if (chunkData) {
-        lodepng_inspect_chunk(&state, chunkData - data, data, end-data);
+        lodepng_inspect_chunk(&state, chunkData - data, data, end - data);
         if (state.info_png.background_defined) {
             hasBackground = true;
             hasBlackBackground =
@@ -2157,7 +2151,7 @@ string kramInfoPNGToString(const string& srcFilename, const uint8_t* data, uint6
                 state.info_png.background_b == 0;
         }
     }
-    
+
     string info;
 
     bool hasColor = true;
@@ -2172,7 +2166,7 @@ string kramInfoPNGToString(const string& srcFilename, const uint8_t* data, uint6
         case LCT_MAX_OCTET_VALUE:
         case LCT_RGB:
         case LCT_RGBA:
-        case LCT_PALETTE:  // ?
+        case LCT_PALETTE: // ?
             hasColor = true;
             break;
     }
@@ -2185,7 +2179,7 @@ string kramInfoPNGToString(const string& srcFilename, const uint8_t* data, uint6
         case LCT_MAX_OCTET_VALUE:
         case LCT_RGBA:
         case LCT_GREY_ALPHA:
-        case LCT_PALETTE:  // ?
+        case LCT_PALETTE: // ?
             hasAlpha = true;
             break;
     }
@@ -2221,15 +2215,14 @@ string kramInfoPNGToString(const string& srcFilename, const uint8_t* data, uint6
             hasAlpha ? "y" : "n",
             hasPalette ? "y" : "n",
             isSrgb ? "y" : "n",
-            hasBackground ? "y" : "n"
-            );
+            hasBackground ? "y" : "n");
     info += tmp;
 
     // optional block with ppi
     chunkData = lodepng_chunk_find_const(data, end, "pHYs");
     if (chunkData) {
-        lodepng_inspect_chunk(&state, chunkData - data, data, end-data);
-    
+        lodepng_inspect_chunk(&state, chunkData - data, data, end - data);
+
         if (state.info_png.phys_defined && state.info_png.phys_unit == 1) {
             float metersToInches = 39.37;
             // TODO: there is info_pgn.phys_unit (0 - unknown, 1 - meters)
@@ -2297,7 +2290,7 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage,
     numPixels *= (float)numChunks;
 
     if (srcImage.mipCount() > 1) {
-        numPixels *= 4.0 / 3.0f;  // TODO: estimate for now
+        numPixels *= 4.0 / 3.0f; // TODO: estimate for now
     }
 
     // to megapixels
@@ -2405,7 +2398,7 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage,
                 append_sprintf(info,
                                "%" PRIu64 ",%" PRIu64 "\n",
                                mip.offset,
-                               mip.length  // only size of one mip right now, not mip * numChunks
+                               mip.length // only size of one mip right now, not mip * numChunks
                 );
             }
         }
@@ -2543,10 +2536,10 @@ static int32_t kramAppDecode(vector<const char*>& args)
     }
 
     const char* dstExt = ".ktx";
-    //    if (isDstKTX2)
-    //        dstExt = ".ktx2";
-    //    if (isDstDDS)
-    //        dstExt = ".dds";
+    // if (isDstKTX2)
+    //     dstExt = ".ktx2";
+    // if (isDstDDS)
+    //     dstExt = ".dds";
 
     KTXImage srcImage;
     KTXImageData srcImageData;
@@ -2578,7 +2571,7 @@ static int32_t kramAppDecode(vector<const char*>& args)
     params.decoder = textureDecoder;
     params.swizzleText = swizzleText;
 
-    KramDecoder decoder;  // just to call decode
+    KramDecoder decoder; // just to call decode
     success = decoder.decode(srcImage, tmpFileHelper.pointer(), params);
 
     // rename to dest filepath, note this only occurs if above succeeded
@@ -2601,7 +2594,7 @@ int32_t kramAppFixup(vector<const char*>& args)
     string srcFilename;
     bool doFixupSrgb = false;
     bool error = false;
-    
+
     for (int32_t i = 0; i < argc; ++i) {
         // check for options
         const char* word = args[i];
@@ -2611,9 +2604,9 @@ int32_t kramAppFixup(vector<const char*>& args)
             error = true;
             break;
         }
-        
+
         // TDOO: may want to add output command too
-        
+
         if (isStringEqual(word, "-srgb")) {
             doFixupSrgb = true;
         }
@@ -2635,12 +2628,12 @@ int32_t kramAppFixup(vector<const char*>& args)
             break;
         }
     }
-        
+
     if (srcFilename.empty()) {
         KLOGE("Kram", "no input file given\n");
         error = true;
     }
-        
+
     if (doFixupSrgb) {
         bool isPNG = isPNGFilename(srcFilename);
 
@@ -2648,11 +2641,11 @@ int32_t kramAppFixup(vector<const char*>& args)
             KLOGE("Kram", "fixup srgb only supports png input");
             error = true;
         }
-        
+
         bool success = !error;
-        
+
         Image srcImage;
-        
+
         // load the png, this doesn't return srgb state of original png
         if (success)
             success = SetupSourceImage(srcFilename, srcImage);
@@ -2660,16 +2653,16 @@ int32_t kramAppFixup(vector<const char*>& args)
         // stuff srgb block based on filename to content conversion for now
         if (success) {
             success = SavePNG(srcImage, srcFilename.c_str());
-            
+
             if (!success) {
                 KLOGE("Kram", "fixup srgb could not save to file");
             }
         }
-        
+
         if (!success)
             error = true;
     }
-    
+
     return error ? -1 : 0;
 }
 
@@ -2713,7 +2706,7 @@ static int32_t kramAppEncode(vector<const char*>& args)
                 error = true;
                 break;
             }
-            
+
             infoArgs.sdfThreshold = StringToInt32(args[i]);
             if (infoArgs.sdfThreshold < 1 || infoArgs.sdfThreshold > 255) {
                 KLOGE("Kram", "sdfThreshold arg invalid");
@@ -2929,12 +2922,12 @@ static int32_t kramAppEncode(vector<const char*>& args)
         else if (isStringEqual(word, "-srgb")) {
             // not validating format for whether it's srgb or not
             infoArgs.isSRGBSrc = true;
-            
+
             // The format may override this setting.  Not all formats
             // have an srgb varient.
             infoArgs.isSRGBDst = true;
         }
-        
+
         // This means ignore the srgb state on the src image
         // This has to be specified after -srgb
         else if (isStringEqual(word, "-srclin")) {
@@ -3271,9 +3264,6 @@ static int32_t kramAppEncode(vector<const char*>& args)
     return success ? 0 : -1;
 }
 
-
-                   
-                   
 int32_t kramAppScript(vector<const char*>& args)
 {
     // this is help
@@ -3366,7 +3356,7 @@ int32_t kramAppScript(vector<const char*>& args)
 
     // as a global this auto allocates 16 threads, and don't want that unless actually
     // using scripting.  And even then want control over the number of threads.
-    std::atomic<int32_t> errorCounter(0);  // doesn't initialize to 0 otherwise
+    std::atomic<int32_t> errorCounter(0); // doesn't initialize to 0 otherwise
     std::atomic<int32_t> skippedCounter(0);
     int32_t commandCounter = 0;
 
@@ -3406,7 +3396,7 @@ int32_t kramAppScript(vector<const char*>& args)
                 // stop any new work when not "continue on error"
                 if (isHaltedOnError && int32_t(errorCounter) > 0) {
                     skippedCounter++;
-                    return 0;  // not really success, just skipping command
+                    return 0; // not really success, just skipping command
                 }
 
                 Timer commandTimer;
@@ -3633,12 +3623,13 @@ int32_t kramAppMain(int32_t argc, char* argv[])
     return kramAppCommand(args);
 }
 
-bool isSupportedFilename(const char* filename) {
+bool isSupportedFilename(const char* filename)
+{
     if (isPNGFilename(filename) ||
         isKTXFilename(filename) ||
         isKTX2Filename(filename) ||
         isDDSFilename(filename)) {
-    return true;
+        return true;
     }
     return false;
 }
@@ -3652,13 +3643,13 @@ void fixPixelFormat(KTXImage& image, const char* filename)
     static bool doReplacePixelFormatFromContentType = true;
     if (!doReplacePixelFormatFromContentType)
         return;
-    
+
     bool isPNG = isPNGFilename(filename);
     if (!isPNG)
         return;
-    
+
     TexContentType contentType = findContentTypeFromFilename(filename);
-    
+
     bool isSrgb = contentType == TexContentTypeAlbedo;
     image.pixelFormat = isSrgb ? MyMTLPixelFormatRGBA8Unorm_sRGB : MyMTLPixelFormatRGBA8Unorm;
 }
@@ -3668,12 +3659,12 @@ void fixPixelFormat(KTXImage& image, const char* filename)
 TexContentType findContentTypeFromFilename(const char* filename)
 {
     string filenameShort = filename;
-    
+
     const char* dotPosStr = strrchr(filenameShort.c_str(), '.');
     if (dotPosStr == nullptr)
         return TexContentTypeUnknown;
     auto dotPos = dotPosStr - filenameShort.c_str();
-    
+
     // now chop off the extension
     filenameShort = filenameShort.substr(0, dotPos);
 
@@ -3686,36 +3677,28 @@ TexContentType findContentTypeFromFilename(const char* filename)
     }
     else if (endsWith(filenameShort, "-n") ||
              endsWith(filenameShort, "_normal") ||
-             endsWith(filenameShort, "_Normal")
-             )
-    {
+             endsWith(filenameShort, "_Normal")) {
         return TexContentTypeNormal;
     }
     else if (endsWith(filenameShort, "-a") ||
              endsWith(filenameShort, "-d") ||
              endsWith(filenameShort, "_baseColor") ||
-             endsWith(filenameShort, "_Color")
-             )
-    {
+             endsWith(filenameShort, "_Color")) {
         return TexContentTypeAlbedo;
     }
     else if (endsWith(filenameShort, "-ao") ||
-             endsWith(filenameShort, "_AO")
-             )
-    {
+             endsWith(filenameShort, "_AO")) {
         return TexContentTypeAO;
     }
     else if (endsWith(filenameShort, "-mr") ||
              endsWith(filenameShort, "_Metallic") ||
              endsWith(filenameShort, "_Roughness") ||
-             endsWith(filenameShort, "_MetaliicRoughness")
-             )
-    {
+             endsWith(filenameShort, "_MetaliicRoughness")) {
         return TexContentTypeMetallicRoughness;
     }
-    
+
     // fallback to albedo for now
     return TexContentTypeAlbedo;
 }
 
-}  // namespace kram
+} // namespace kram
diff --git a/libkram/kram/Kram.h b/libkram/kram/Kram.h
index 37b5413e..ab775488 100644
--- a/libkram/kram/Kram.h
+++ b/libkram/kram/Kram.h
@@ -74,8 +74,7 @@ string kramInfoToString(const string& srcFilename, bool isVerbose);
 // this is entry point to library for cli app
 int32_t kramAppMain(int32_t argc, char* argv[]);
 
-enum TexContentType
-{
+enum TexContentType {
     TexContentTypeUnknown = 0,
     TexContentTypeAlbedo,
     TexContentTypeNormal,
@@ -92,5 +91,4 @@ void fixPixelFormat(KTXImage& image, const char* filename);
 // This is using naming conventions on filenames, but KTX/KTX2 hold channel props
 TexContentType findContentTypeFromFilename(const char* filename);
 
-
-}  // namespace kram
+} // namespace kram
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index 33b8255f..d46e8248 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -118,30 +118,26 @@
 #define STL_NAMESPACE eastl
 
 // this probably breaks all STL debugging
-#include <EASTL/algorithm.h>  // for max
+#include <EASTL/algorithm.h> // for max
 //#include "EASTL/atomic.h"
-#include <EASTL/functional.h>
-
-#include <EASTL/deque.h>
-#include <EASTL/iterator.h>    // for copy_if on Win
-#include <EASTL/sort.h>
-#include <EASTL/basic_string.h>
-
 #include <EASTL/array.h>
+#include <EASTL/basic_string.h>
+#include <EASTL/deque.h>
+#include <EASTL/functional.h>
+#include <EASTL/initializer_list.h>
+#include <EASTL/iterator.h> // for copy_if on Win
 #include <EASTL/map.h>
+#include <EASTL/shared_ptr.h> // includes thread/mutex
+#include <EASTL/sort.h>
+#include <EASTL/unique_ptr.h>
 #include <EASTL/unordered_map.h>
 #include <EASTL/vector.h>
 
-#include <EASTL/shared_ptr.h>  // includes thread/mutex
-#include <EASTL/unique_ptr.h>
-#include <EASTL/initializer_list.h>
-
 // std - simpler than using eastl version
 #include <atomic>
 
 #else
 
-
 // in Xcode 14, C++20 Modules have "partial" support... whatever that means.
 // These imports are taken from MSVC which has a full implementation.
 //import std.memory;
@@ -153,34 +149,29 @@
 #define STL_NAMESPACE std
 
 // all std
-#include <algorithm>  // for max
-#include <functional>
-
-#include <deque>
-#include <iterator>  // for copy_if on Win
-#include <string>
-
+#include <algorithm> // for max
+#include <array>
 #include <atomic>
-#include <memory>    // for shared_ptr
+#include <chrono>
+#include <condition_variable>
+#include <cstdlib>
+#include <deque>
+#include <functional>
 #include <initializer_list>
-
-#include <array>
+#include <iterator> // for copy_if on Win
 #include <map>
-#include <unordered_map>
-#include <vector>
-
+#include <memory> // for shared_ptr
 #include <mutex>
-#include <condition_variable>
-#include <thread>
-#include <chrono>
 #include <random>
-
-#include <cstdlib>
+#include <string>
+#include <thread>
+#include <unordered_map>
+#include <vector>
 //#include <exception>
+#include <filesystem>
 #include <type_traits>
 #include <typeinfo>
 #include <utility>
-#include <filesystem>
 #include <variant>
 
 #endif
@@ -222,7 +213,7 @@ import vectormath
 // old vector math, using simd/simd.h
 #include "float4a.h"
 #endif
- 
+
 //---------------------------------------
 
 // this just strips args
@@ -231,10 +222,10 @@ import vectormath
 // this just strips args
 #define macroUnusedVar(x) (void)x
 
-//---------------------------------------
+    //---------------------------------------
 
-namespace kram {
-using namespace STL_NAMESPACE;
+    namespace kram {
+        using namespace STL_NAMESPACE;
 
 // Use this on vectors
 template <typename T>
@@ -242,4 +233,4 @@ inline size_t vsizeof(const vector<T>& v)
 {
     return sizeof(T) * v.size();
 }
-}  // namespace kram
+} // namespace kram
diff --git a/libkram/kram/KramDDSHelper.cpp b/libkram/kram/KramDDSHelper.cpp
index e5c837a1..30b76c77 100644
--- a/libkram/kram/KramDDSHelper.cpp
+++ b/libkram/kram/KramDDSHelper.cpp
@@ -12,149 +12,147 @@ using namespace STL_NAMESPACE;
 
 const uint32_t DDS_MAGIC = 0x20534444; // "DDS "
 
-#define MAKEFOURCC(str)                                                       \
-    ((uint32_t)(uint8_t)(str[0]) | ((uint32_t)(uint8_t)(str[1]) << 8) |       \
-    ((uint32_t)(uint8_t)(str[2]) << 16) | ((uint32_t)(uint8_t)(str[3]) << 24 ))
+#define MAKEFOURCC(str)                                                 \
+    ((uint32_t)(uint8_t)(str[0]) | ((uint32_t)(uint8_t)(str[1]) << 8) | \
+     ((uint32_t)(uint8_t)(str[2]) << 16) | ((uint32_t)(uint8_t)(str[3]) << 24))
 
 // DX9 era formats, only for reading dds files in dx9 style
-enum D3DFORMAT : uint32_t
-{
-    D3DFMT_UNKNOWN              =  0,
-
-//    D3DFMT_R8G8B8               = 20,
-    D3DFMT_A8R8G8B8             = 21,
-//    D3DFMT_X8R8G8B8             = 22,
-//    D3DFMT_R5G6B5               = 23,
-//    D3DFMT_X1R5G5B5             = 24,
-//    D3DFMT_A1R5G5B5             = 25,
-//    D3DFMT_A4R4G4B4             = 26,
-//    D3DFMT_R3G3B2               = 27,
-//    D3DFMT_A8                   = 28,
-//    D3DFMT_A8R3G3B2             = 29,
-//    D3DFMT_X4R4G4B4             = 30,
-//    D3DFMT_A2B10G10R10          = 31,
-    D3DFMT_A8B8G8R8             = 32,
-//    D3DFMT_X8B8G8R8             = 33,
-//    D3DFMT_G16R16               = 34,
-//    D3DFMT_A2R10G10B10          = 35,
-//    D3DFMT_A16B16G16R16         = 36,
-
-//    D3DFMT_A8P8                 = 40,
-//    D3DFMT_P8                   = 41,
-
-//    D3DFMT_L8                   = 50,
-//    D3DFMT_A8L8                 = 51,
-//    D3DFMT_A4L4                 = 52,
-
-//    D3DFMT_V8U8                 = 60,
-//    D3DFMT_L6V5U5               = 61,
-//    D3DFMT_X8L8V8U8             = 62,
-//    D3DFMT_Q8W8V8U8             = 63,
-//    D3DFMT_V16U16               = 64,
-//    D3DFMT_A2W10V10U10          = 67,
-
-//    D3DFMT_UYVY                 = MAKEFOURCC("UYVY"),
-//    D3DFMT_R8G8_B8G8            = MAKEFOURCC("RGBG"),
-//    D3DFMT_YUY2                 = MAKEFOURCC("YUY2"),
-//    D3DFMT_G8R8_G8B8            = MAKEFOURCC("GRGB"),
-    
-    D3DFMT_DXT1                 = MAKEFOURCC("DXT1"),
-    D3DFMT_DXT2                 = MAKEFOURCC("DXT2"),
-    D3DFMT_DXT3                 = MAKEFOURCC("DXT3"),
-    D3DFMT_DXT4                 = MAKEFOURCC("DXT4"),
-    D3DFMT_DXT5                 = MAKEFOURCC("DXT5"),
+enum D3DFORMAT : uint32_t {
+    D3DFMT_UNKNOWN = 0,
+
+    // D3DFMT_R8G8B8               = 20,
+    D3DFMT_A8R8G8B8 = 21,
+    // D3DFMT_X8R8G8B8             = 22,
+    // D3DFMT_R5G6B5               = 23,
+    // D3DFMT_X1R5G5B5             = 24,
+    // D3DFMT_A1R5G5B5             = 25,
+    // D3DFMT_A4R4G4B4             = 26,
+    // D3DFMT_R3G3B2               = 27,
+    // D3DFMT_A8                   = 28,
+    // D3DFMT_A8R3G3B2             = 29,
+    // D3DFMT_X4R4G4B4             = 30,
+    // D3DFMT_A2B10G10R10          = 31,
+    D3DFMT_A8B8G8R8 = 32,
+    // D3DFMT_X8B8G8R8             = 33,
+    // D3DFMT_G16R16               = 34,
+    // D3DFMT_A2R10G10B10          = 35,
+    // D3DFMT_A16B16G16R16         = 36,
+
+    // D3DFMT_A8P8                 = 40,
+    // D3DFMT_P8                   = 41,
+
+    // D3DFMT_L8                   = 50,
+    // D3DFMT_A8L8                 = 51,
+    // D3DFMT_A4L4                 = 52,
+
+    // D3DFMT_V8U8                 = 60,
+    // D3DFMT_L6V5U5               = 61,
+    // D3DFMT_X8L8V8U8             = 62,
+    // D3DFMT_Q8W8V8U8             = 63,
+    // D3DFMT_V16U16               = 64,
+    // D3DFMT_A2W10V10U10          = 67,
+
+    // D3DFMT_UYVY                 = MAKEFOURCC("UYVY"),
+    // D3DFMT_R8G8_B8G8            = MAKEFOURCC("RGBG"),
+    // D3DFMT_YUY2                 = MAKEFOURCC("YUY2"),
+    // D3DFMT_G8R8_G8B8            = MAKEFOURCC("GRGB"),
+
+    D3DFMT_DXT1 = MAKEFOURCC("DXT1"),
+    D3DFMT_DXT2 = MAKEFOURCC("DXT2"),
+    D3DFMT_DXT3 = MAKEFOURCC("DXT3"),
+    D3DFMT_DXT4 = MAKEFOURCC("DXT4"),
+    D3DFMT_DXT5 = MAKEFOURCC("DXT5"),
 
     // Not worth support dx9-style files, these don't even hold srgb state
     D3DFMT_ATI1 = MAKEFOURCC("ATI1"),
     D3DFMT_BC4U = MAKEFOURCC("BC4U"),
     D3DFMT_BC4S = MAKEFOURCC("BC4S"),
-    
+
     D3DFMT_ATI2 = MAKEFOURCC("ATI2"),
     D3DFMT_BC5U = MAKEFOURCC("BC5U"),
     D3DFMT_BC5S = MAKEFOURCC("BC5S"),
-    
-//    D3DFMT_D16_LOCKABLE         = 70,
-//    D3DFMT_D32                  = 71,
-//    D3DFMT_D15S1                = 73,
-//    D3DFMT_D24S8                = 75,
-//    D3DFMT_D24X8                = 77,
-//    D3DFMT_D24X4S4              = 79,
-//    D3DFMT_D16                  = 80,
-//
-//    D3DFMT_D32F_LOCKABLE        = 82,
-//    D3DFMT_D24FS8               = 83,
+
+    // D3DFMT_D16_LOCKABLE         = 70,
+    // D3DFMT_D32                  = 71,
+    // D3DFMT_D15S1                = 73,
+    // D3DFMT_D24S8                = 75,
+    // D3DFMT_D24X8                = 77,
+    // D3DFMT_D24X4S4              = 79,
+    // D3DFMT_D16                  = 80,
+    //
+    // D3DFMT_D32F_LOCKABLE        = 82,
+    // D3DFMT_D24FS8               = 83,
 
     //D3DFMT_D32_LOCKABLE         = 84,
     //D3DFMT_S8_LOCKABLE          = 85,
 
-//    D3DFMT_L16                  = 81,
-//
-//    D3DFMT_VERTEXDATA           =100,
-//    D3DFMT_INDEX16              =101,
-//    D3DFMT_INDEX32              =102,
+    // D3DFMT_L16                  = 81,
+    //
+    // D3DFMT_VERTEXDATA           =100,
+    // D3DFMT_INDEX16              =101,
+    // D3DFMT_INDEX32              =102,
 
     //D3DFMT_Q16W16V16U16         =110,
 
     //D3DFMT_MULTI2_ARGB8         = MAKEFOURCC("MET1"),
 
-    D3DFMT_R16F                 = 111,
-    D3DFMT_G16R16F              = 112,
-    D3DFMT_A16B16G16R16F        = 113,
+    D3DFMT_R16F = 111,
+    D3DFMT_G16R16F = 112,
+    D3DFMT_A16B16G16R16F = 113,
 
-    D3DFMT_R32F                 = 114,
-    D3DFMT_G32R32F              = 115,
-    D3DFMT_A32B32G32R32F        = 116,
+    D3DFMT_R32F = 114,
+    D3DFMT_G32R32F = 115,
+    D3DFMT_A32B32G32R32F = 116,
 
-//    D3DFMT_CxV8U8               = 117,
+    // D3DFMT_CxV8U8               = 117,
 
     //D3DFMT_A1                   = 118,
     //D3DFMT_A2B10G10R10_XR_BIAS  = 119,
     //D3DFMT_BINARYBUFFER         = 199,
 
-    D3DFMT_FORCE_DWORD          =0x7fffffff
+    D3DFMT_FORCE_DWORD = 0x7fffffff
 };
 
-enum DDS_FLAGS : uint32_t
-{
-    
+enum DDS_FLAGS : uint32_t {
+
     DDSD_HEIGHT = 0x00000002,
-    DDSD_DEPTH  = 0x00800000,
+    DDSD_DEPTH = 0x00800000,
 
     DDSD_WIDTH = 0x00000004,
     DDSD_LINEARSIZE = 0x00080000,
     DDSD_PITCH = 0x00000008,
 
-    DDSD_CAPS        = 0x00000001,
+    DDSD_CAPS = 0x00000001,
     DDSD_PIXELFORMAT = 0x00001000,
     DDSD_MIPMAPCOUNT = 0x00020000,
 
     // ddspf
     DDSPF_ALPHAPIXELS = 0x00000001,
-    DDSPF_FOURCC =      0x00000004,
-    DDSPF_RGB =         0x00000040,
-    DDSPF_LUMINANCE =   0x00020000, // dx9
-    DDSPF_ALPHA =       0x00000002, // dx9
+    DDSPF_FOURCC = 0x00000004,
+    DDSPF_RGB = 0x00000040,
+    DDSPF_LUMINANCE = 0x00020000, // dx9
+    DDSPF_ALPHA = 0x00000002, // dx9
     //DDSPF_BUMPDUDV =    0x00080000,
-    
+
     // caps
     DDSCAPS_TEXTURE = 0x00001000,
-    DDSCAPS_MIPMAP  = 0x00400000,
+    DDSCAPS_MIPMAP = 0x00400000,
     DDSCAPS_COMPLEX = 0x00000008,
-    
+
     // caps2
     DDSCAPS2_VOLUME = 0x200000,
     DDSCAPS2_CUBEMAP_ALLFACES = 0x0000FA00, // DDSCAPS2_CUBEMAP | all faces
     DDSCAPS2_CUBEMAP = 0x00000200, // DDSCAPS2_CUBEMAP
-    
+
     DDS_RESOURCE_MISC_TEXTURECUBE = 0x4,
-    
+
     // resourceDimension
     DDS_DIMENSION_TEXTURE1D = 2,
     DDS_DIMENSION_TEXTURE2D = 3,
     DDS_DIMENSION_TEXTURE3D = 4,
-    
+
     FOURCC_DX10 = MAKEFOURCC("DX10"),
-    
+
     // dx10 misc2 flags
     DDS_ALPHA_MODE_UNKNOWN = 0,
     DDS_ALPHA_MODE_STRAIGHT = 1,
@@ -163,164 +161,161 @@ enum DDS_FLAGS : uint32_t
     DDS_ALPHA_MODE_CUSTOM = 4,
 };
 
-struct DDS_PIXELFORMAT
-{
-    uint32_t    size;
-    uint32_t    flags;
-    uint32_t    fourCC;
-    uint32_t    RGBBitCount;
-    uint32_t    RBitMask;
-    uint32_t    GBitMask;
-    uint32_t    BBitMask;
-    uint32_t    ABitMask;
+struct DDS_PIXELFORMAT {
+    uint32_t size;
+    uint32_t flags;
+    uint32_t fourCC;
+    uint32_t RGBBitCount;
+    uint32_t RBitMask;
+    uint32_t GBitMask;
+    uint32_t BBitMask;
+    uint32_t ABitMask;
 };
 
-struct DDS_HEADER
-{
-    uint32_t        size;
-    uint32_t        flags;
-    uint32_t        height;
-    uint32_t        width;
-    uint32_t        pitchOrLinearSize;
-    uint32_t        depth; // only if DDS_HEADER_FLAGS_VOLUME is set in flags
-    uint32_t        mipMapCount;
-    uint32_t        reserved1[11];
+struct DDS_HEADER {
+    uint32_t size;
+    uint32_t flags;
+    uint32_t height;
+    uint32_t width;
+    uint32_t pitchOrLinearSize;
+    uint32_t depth; // only if DDS_HEADER_FLAGS_VOLUME is set in flags
+    uint32_t mipMapCount;
+    uint32_t reserved1[11];
     DDS_PIXELFORMAT ddspf;
-    uint32_t        caps;
-    uint32_t        caps2;
-    uint32_t        caps3;
-    uint32_t        caps4;
-    uint32_t        reserved2;
+    uint32_t caps;
+    uint32_t caps2;
+    uint32_t caps3;
+    uint32_t caps4;
+    uint32_t reserved2;
 };
 
-struct DDS_HEADER_DXT10
-{
-    uint32_t /*DXGI_FORMAT*/     dxgiFormat;
-    uint32_t        resourceDimension;
-    uint32_t        miscFlag; // see D3D11_RESOURCE_MISC_FLAG
-    uint32_t        arraySize;
-    uint32_t        miscFlags2;
+struct DDS_HEADER_DXT10 {
+    uint32_t /*DXGI_FORMAT*/ dxgiFormat;
+    uint32_t resourceDimension;
+    uint32_t miscFlag; // see D3D11_RESOURCE_MISC_FLAG
+    uint32_t arraySize;
+    uint32_t miscFlags2;
 };
 
 // DX9 bitmask parsing adapted from GetPixelFormat() call here https://github.com/microsoft/DirectXTex/blob/main/DDSTextureLoader/DDSTextureLoader12.cpp
 static MyMTLPixelFormat getMetalFormatFromDDS9(const DDS_PIXELFORMAT& ddpf)
 {
-    // Copyright (c) Microsoft Corporation.
-    // Licensed under the MIT License.
-    #define ISBITMASK( r,g,b,a ) ( ddpf.RBitMask == r && ddpf.GBitMask == g && ddpf.BBitMask == b && ddpf.ABitMask == a )
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+#define ISBITMASK(r, g, b, a) (ddpf.RBitMask == r && ddpf.GBitMask == g && ddpf.BBitMask == b && ddpf.ABitMask == a)
 
-    if (ddpf.flags & DDSPF_RGB)
-    {
+    if (ddpf.flags & DDSPF_RGB) {
         // Note that sRGB formats are written using the "DX10" extended header
         // here would need to force the format to an srgb format from cli
-        switch (ddpf.RGBBitCount)
-        {
-        case 32:
-            if (ISBITMASK(0x000000ff, 0x0000ff00, 0x00ff0000, 0xff000000))
-            {
-                return MyMTLPixelFormatRGBA8Unorm;
-            }
+        switch (ddpf.RGBBitCount) {
+            case 32:
+                if (ISBITMASK(0x000000ff, 0x0000ff00, 0x00ff0000, 0xff000000)) {
+                    return MyMTLPixelFormatRGBA8Unorm;
+                }
 
-            if (ISBITMASK(0xffffffff, 0, 0, 0))
-            {
-                // Only 32-bit color channel format in D3D9 was R32F
-                return MyMTLPixelFormatR32Float; // D3DX writes this out as a FourCC of 114
-            }
-            break;
+                if (ISBITMASK(0xffffffff, 0, 0, 0)) {
+                    // Only 32-bit color channel format in D3D9 was R32F
+                    return MyMTLPixelFormatR32Float; // D3DX writes this out as a FourCC of 114
+                }
+                break;
 
-        case 8:
-            // NVTT versions 1.x wrote this as RGB instead of LUMINANCE
-            if (ISBITMASK(0xff, 0, 0, 0))
-            {
-                return MyMTLPixelFormatR8Unorm;
-            }
+            case 8:
+                // NVTT versions 1.x wrote this as RGB instead of LUMINANCE
+                if (ISBITMASK(0xff, 0, 0, 0)) {
+                    return MyMTLPixelFormatR8Unorm;
+                }
 
-            // No 3:3:2 or paletted DXGI formats aka D3DFMT_R3G3B2, D3DFMT_P8
-            break;
+                // No 3:3:2 or paletted DXGI formats aka D3DFMT_R3G3B2, D3DFMT_P8
+                break;
         }
     }
-    else if (ddpf.flags & DDSPF_LUMINANCE)
-    {
+    else if (ddpf.flags & DDSPF_LUMINANCE) {
         // TODO: need rrrg swizzle on these
-        switch (ddpf.RGBBitCount)
-        {
-        case 16:
-            if (ISBITMASK(0x00ff, 0, 0, 0xff00))
-            {
-                return MyMTLPixelFormatRG8Unorm; // D3DX10/11 writes this out as DX10 extension
-            }
-            break;
+        switch (ddpf.RGBBitCount) {
+            case 16:
+                if (ISBITMASK(0x00ff, 0, 0, 0xff00)) {
+                    return MyMTLPixelFormatRG8Unorm; // D3DX10/11 writes this out as DX10 extension
+                }
+                break;
 
-        case 8:
-            if (ISBITMASK(0xff, 0, 0, 0))
-            {
-                return MyMTLPixelFormatR8Unorm; // D3DX10/11 writes this out as DX10 extension
-            }
+            case 8:
+                if (ISBITMASK(0xff, 0, 0, 0)) {
+                    return MyMTLPixelFormatR8Unorm; // D3DX10/11 writes this out as DX10 extension
+                }
 
-            // No DXGI format maps to ISBITMASK(0x0f,0,0,0xf0) aka D3DFMT_A4L4
+                // No DXGI format maps to ISBITMASK(0x0f,0,0,0xf0) aka D3DFMT_A4L4
 
-            if (ISBITMASK(0x00ff, 0, 0, 0xff00))
-            {
-                return MyMTLPixelFormatRG8Unorm; // Some DDS writers assume the bitcount should be 8 instead of 16
-            }
-            break;
+                if (ISBITMASK(0x00ff, 0, 0, 0xff00)) {
+                    return MyMTLPixelFormatRG8Unorm; // Some DDS writers assume the bitcount should be 8 instead of 16
+                }
+                break;
         }
     }
-    else if (ddpf.flags & DDSPF_ALPHA)
-    {
-        if (8 == ddpf.RGBBitCount)
-        {
+    else if (ddpf.flags & DDSPF_ALPHA) {
+        if (8 == ddpf.RGBBitCount) {
             // TODO: need rrrr swizzle
             return MyMTLPixelFormatR8Unorm; // really A8, but use a swizzle
         }
     }
-    else if (ddpf.flags & DDSPF_FOURCC)
-    {
-        switch (ddpf.fourCC)
-        {
-            case D3DFMT_DXT1: return MyMTLPixelFormatBC1_RGBA;
+    else if (ddpf.flags & DDSPF_FOURCC) {
+        switch (ddpf.fourCC) {
+            case D3DFMT_DXT1:
+                return MyMTLPixelFormatBC1_RGBA;
             //case D3DFMT_DXT2: return MyMTLPixelFormatBC2_RGBA; // isPremul
             //case D3DFMT_DXT3: return MyMTLPixelFormatBC2_RGBA;
-            case D3DFMT_DXT4: return MyMTLPixelFormatBC3_RGBA; // isPremul
-            case D3DFMT_DXT5: return MyMTLPixelFormatBC3_RGBA;
-                
-            case D3DFMT_ATI1: return MyMTLPixelFormatBC4_RUnorm;
-            case D3DFMT_BC4U: return MyMTLPixelFormatBC4_RUnorm;
-            case D3DFMT_BC4S: return MyMTLPixelFormatBC4_RSnorm;
-            
-            case D3DFMT_ATI2: return MyMTLPixelFormatBC5_RGUnorm;
-            case D3DFMT_BC5U: return MyMTLPixelFormatBC5_RGUnorm;
-            case D3DFMT_BC5S: return MyMTLPixelFormatBC5_RGSnorm;
-            
-            case D3DFMT_R16F: return MyMTLPixelFormatR16Float;
-            case D3DFMT_G16R16F: return MyMTLPixelFormatRG16Float;
-            case D3DFMT_A16B16G16R16F: return MyMTLPixelFormatRGBA16Float;
-                
-            case D3DFMT_R32F: return MyMTLPixelFormatR32Float;
-            case D3DFMT_G32R32F: return MyMTLPixelFormatRG32Float;
-            case D3DFMT_A32B32G32R32F: return MyMTLPixelFormatRGBA32Float;
+            case D3DFMT_DXT4:
+                return MyMTLPixelFormatBC3_RGBA; // isPremul
+            case D3DFMT_DXT5:
+                return MyMTLPixelFormatBC3_RGBA;
+
+            case D3DFMT_ATI1:
+                return MyMTLPixelFormatBC4_RUnorm;
+            case D3DFMT_BC4U:
+                return MyMTLPixelFormatBC4_RUnorm;
+            case D3DFMT_BC4S:
+                return MyMTLPixelFormatBC4_RSnorm;
+
+            case D3DFMT_ATI2:
+                return MyMTLPixelFormatBC5_RGUnorm;
+            case D3DFMT_BC5U:
+                return MyMTLPixelFormatBC5_RGUnorm;
+            case D3DFMT_BC5S:
+                return MyMTLPixelFormatBC5_RGSnorm;
+
+            case D3DFMT_R16F:
+                return MyMTLPixelFormatR16Float;
+            case D3DFMT_G16R16F:
+                return MyMTLPixelFormatRG16Float;
+            case D3DFMT_A16B16G16R16F:
+                return MyMTLPixelFormatRGBA16Float;
+
+            case D3DFMT_R32F:
+                return MyMTLPixelFormatR32Float;
+            case D3DFMT_G32R32F:
+                return MyMTLPixelFormatRG32Float;
+            case D3DFMT_A32B32G32R32F:
+                return MyMTLPixelFormatRGBA32Float;
         }
-        
     }
 
     return MyMTLPixelFormatInvalid;
-    #undef ISBITMASK
+#undef ISBITMASK
 }
 
 bool DDSHelper::load(const uint8_t* data, size_t dataSize, KTXImage& image, bool isInfoOnly)
 {
     const uint32_t magicSize = sizeof(uint32_t);
     uint32_t mipDataOffset = magicSize + sizeof(DDS_HEADER);
-    
+
     if (dataSize <= mipDataOffset) {
         KLOGE("kram", "bad dataSize too small %zu <= %d", dataSize, mipDataOffset);
         return false;
     }
-    
+
     const uint32_t& magic = *(const uint32_t*)data;
     const DDS_HEADER& hdr = *(const DDS_HEADER*)(data + magicSize);
     const DDS_PIXELFORMAT& format = hdr.ddspf;
-    
+
     if (magic != DDS_MAGIC) {
         KLOGE("kram", "bad magic number 0x%08X", magic);
         return false;
@@ -334,16 +329,16 @@ bool DDSHelper::load(const uint8_t* data, size_t dataSize, KTXImage& image, bool
         KLOGE("kram", "bad format size %d", format.size);
         return false;
     }
-    
+
     // this flag must be set even though just using fourcc to indicate DX10
     if ((format.flags & DDSPF_FOURCC) == 0) {
         KLOGE("kram", "missing format.fourCC flag");
         return false;
     }
-    
+
     bool isDDS10 = format.fourCC == FOURCC_DX10;
     const DDS_HEADER_DXT10& hdr10 = *(const DDS_HEADER_DXT10*)(data + magicSize + sizeof(DDS_HEADER));
-    
+
     MyMTLPixelFormat pixelFormat = MyMTLPixelFormatInvalid;
     if (isDDS10) {
         mipDataOffset += sizeof(DDS_HEADER_DXT10);
@@ -352,30 +347,30 @@ bool DDSHelper::load(const uint8_t* data, size_t dataSize, KTXImage& image, bool
     else {
         pixelFormat = getMetalFormatFromDDS9(format);
     }
-    
+
     // Kram only supports a subset of DDS formats
     if (pixelFormat == MyMTLPixelFormatInvalid) {
         KLOGE("kram", "unsupported dds format");
         return false;
     }
-    
+
     // make sure to copy mips/slices from DDS array-ordered to mip-ordered for KTX
     uint32_t width = (hdr.flags & DDSD_WIDTH) ? hdr.width : 1;
     uint32_t height = (hdr.flags & DDSD_HEIGHT) ? hdr.height : 1;
     uint32_t depth = (hdr.flags & DDSD_DEPTH) ? hdr.depth : 1;
-    
+
     uint32_t mipCount = (hdr.flags & DDSD_MIPMAPCOUNT) ? hdr.mipMapCount : 1;
     uint32_t arrayCount = 1;
-    
+
     if (isDDS10) {
         arrayCount = hdr10.arraySize;
     }
-    
+
     // make sure that counts are reasonable
     const uint32_t kMaxMipCount = 16;
     const uint32_t kMaxTextureSize = 1u << (kMaxMipCount - 1); // 32K
-    const uint32_t kMaxArrayCount = 2*1024;
-   
+    const uint32_t kMaxArrayCount = 2 * 1024;
+
     if (width > kMaxTextureSize) {
         KLOGE("kram", "bad dimension width %d", width);
         return false;
@@ -396,7 +391,7 @@ bool DDSHelper::load(const uint8_t* data, size_t dataSize, KTXImage& image, bool
         KLOGE("kram", "bad dimension height %d", arrayCount);
         return false;
     }
-    
+
     // does mipCount = 0 mean automip?
     if (width == 0)
         width = 1;
@@ -404,20 +399,20 @@ bool DDSHelper::load(const uint8_t* data, size_t dataSize, KTXImage& image, bool
         height = 1;
     if (depth == 0)
         depth = 1;
-    
+
     if (mipCount == 0)
         mipCount = 1;
     if (arrayCount == 0)
         arrayCount = 1;
-    
+
     bool isCube = false;
     bool isArray = arrayCount > 1;
     bool isPremul = false;
-    
+
     if (isDDS10) {
         isCube = (hdr10.miscFlag & DDS_RESOURCE_MISC_TEXTURECUBE);
-        
-        switch(hdr10.resourceDimension) {
+
+        switch (hdr10.resourceDimension) {
             case DDS_DIMENSION_TEXTURE1D:
                 image.textureType = MyMTLTextureType1DArray;
                 isArray = true; // kram doesn't support 1d
@@ -439,7 +434,7 @@ bool DDSHelper::load(const uint8_t* data, size_t dataSize, KTXImage& image, bool
     }
     else {
         isArray = false;
-        
+
         if (hdr.flags & DDSD_DEPTH) {
             image.textureType = MyMTLTextureType3D;
         }
@@ -447,29 +442,29 @@ bool DDSHelper::load(const uint8_t* data, size_t dataSize, KTXImage& image, bool
             image.textureType = MyMTLTextureTypeCube;
         }
     }
-    
+
     // transfer premul setting, would like to not depend on "info" to carry this
     if (isPremul)
         image.addChannelProps("Alb.ra,Alb.ga,Alb.ba,Alb.a");
-    
+
     //-------------
-    
+
     // TODO: may need to fix these to KTX conventions first
     image.width = width;
     image.height = height;
     image.depth = depth;
-    
+
     auto& ktxHdr = image.header;
-    ktxHdr.pixelWidth  = image.width;
+    ktxHdr.pixelWidth = image.width;
     ktxHdr.pixelHeight = image.height;
-    ktxHdr.pixelDepth  = image.depth;
-    
+    ktxHdr.pixelDepth = image.depth;
+
     ktxHdr.initFormatGL(pixelFormat);
-    
+
     ktxHdr.numberOfFaces = isCube ? 6 : 1;
     ktxHdr.numberOfMipmapLevels = mipCount;
     ktxHdr.numberOfArrayElements = arrayCount;
-    
+
     // fix up the values, so that can convert header properly to type in info
     // TODO: this means image and ktx header don't match
     if (!isArray)
@@ -485,46 +480,46 @@ bool DDSHelper::load(const uint8_t* data, size_t dataSize, KTXImage& image, bool
             return false;
         }
     }
-        
+
     // make sure derived type lines up
     if (ktxHdr.metalTextureType() != image.textureType) {
         KLOGE("kram", "unsupported textureType");
         return false;
     }
-    
+
     image.pixelFormat = pixelFormat;
-    
+
     // allocate data
     image.initMipLevels(mipDataOffset);
-    
+
     // Skip allocating the pixels
     if (!isInfoOnly) {
         image.reserveImageData();
-        
+
         uint8_t* dstImageData = image.imageData().data();
         const uint8_t* srcImageData = data + mipDataOffset;
-        
+
         size_t srcOffset = 0;
         for (uint32_t chunkNum = 0; chunkNum < image.totalChunks(); ++chunkNum) {
             for (uint32_t mipNum = 0; mipNum < image.mipCount(); ++mipNum) {
                 // memcpy from src to dst
                 size_t dstOffset = image.chunkOffset(mipNum, chunkNum);
                 size_t mipLength = image.mipLevels[mipNum].length;
-        
+
                 if ((mipDataOffset + srcOffset + mipLength) > dataSize) {
                     KLOGE("kram", "source image data incomplete");
                     return false;
                 }
-                
+
                 memcpy(dstImageData + dstOffset, srcImageData + srcOffset, mipLength);
-                
+
                 srcOffset += mipLength;
             }
         }
     }
-    
+
     // Now have a valid KTX or KTX2 file from the DDS
-    
+
     return true;
 }
 
@@ -535,46 +530,46 @@ bool DDSHelper::save(const KTXImage& image, FileHelper& fileHelper)
     // be compressed when writing to DDS, so KTX conversion is simpler.
     if (image.isSupercompressed())
         return false;
-    
+
     // Can only write out if matching format in DDS
     if (directxType(image.pixelFormat) == MyMTLPixelFormatInvalid)
         return false;
-    
+
     // https://docs.microsoft.com/en-us/windows/win32/direct3ddds/dds-header
-    
+
     // lots of headers, this is newer dx10 style dds
     DDS_HEADER hdr = {};
     DDS_PIXELFORMAT& format = hdr.ddspf;
     DDS_HEADER_DXT10 hdr10 = {};
-    
+
     hdr.size = sizeof(DDS_HEADER);
     format.size = sizeof(DDS_PIXELFORMAT);
-    
+
     hdr.width = image.width;
     hdr.height = image.height;
     hdr.depth = image.depth;
-    
+
     hdr.mipMapCount = image.mipCount();
-    
+
     hdr.caps |= DDSCAPS_TEXTURE;
     if (image.mipCount() > 1) {
         hdr.caps |= DDSCAPS_MIPMAP;
         hdr.flags |= DDSD_MIPMAPCOUNT;
     }
-    
+
     // indicate this is newer dds file with pixelFormat
     // important to set FOURCC flag
     format.fourCC = FOURCC_DX10;
     format.flags |= DDSPF_FOURCC;
-    
+
     hdr.flags |= DDSD_CAPS | DDSD_WIDTH | DDSD_HEIGHT | DDSD_PIXELFORMAT;
-    
+
     if (hdr.depth > 1)
         hdr.flags |= DDSD_DEPTH;
-    
+
     if (isBlockFormat(image.pixelFormat)) {
         hdr.flags |= DDSD_LINEARSIZE;
-        
+
         // This is assuming BC 4x4 blocks
         hdr.pitchOrLinearSize = image.blockDims().x * blockSizeOfFormat(image.pixelFormat);
     }
@@ -582,21 +577,21 @@ bool DDSHelper::save(const KTXImage& image, FileHelper& fileHelper)
         hdr.flags |= DDSD_PITCH;
         hdr.pitchOrLinearSize = image.blockDims().x * blockSizeOfFormat(image.pixelFormat);
     }
-    
+
     hdr10.arraySize = image.arrayCount();
     hdr10.dxgiFormat = directxType(image.pixelFormat);
-    
+
     switch (image.textureType) {
         case MyMTLTextureType1DArray:
             hdr.caps |= DDSCAPS_COMPLEX;
-            
+
             hdr10.resourceDimension = DDS_DIMENSION_TEXTURE1D;
             break;
         case MyMTLTextureTypeCube:
         case MyMTLTextureTypeCubeArray:
             hdr.caps |= DDSCAPS_COMPLEX;
             hdr.caps2 = DDSCAPS2_CUBEMAP | DDSCAPS2_CUBEMAP_ALLFACES;
-            
+
             hdr10.miscFlag = DDS_RESOURCE_MISC_TEXTURECUBE;
             hdr10.resourceDimension = DDS_DIMENSION_TEXTURE2D;
             break;
@@ -612,17 +607,17 @@ bool DDSHelper::save(const KTXImage& image, FileHelper& fileHelper)
         case MyMTLTextureType3D:
             hdr.caps |= DDSCAPS_COMPLEX;
             hdr.caps2 = DDSCAPS2_VOLUME;
-            
+
             hdr10.resourceDimension = DDS_DIMENSION_TEXTURE3D;
             break;
     }
-    
+
     // fill out in the format fields
     if (!isBlockFormat(image.pixelFormat)) {
         if (isColorFormat(image.pixelFormat)) {
             bool hasG = numChannelsOfFormat(image.pixelFormat) >= 2;
             bool hasB = numChannelsOfFormat(image.pixelFormat) >= 3;
-            
+
             format.flags |= DDSPF_RGB;
             // supposed to include alpha bits too
             format.RGBBitCount = blockSizeOfFormat(image.pixelFormat) * 8;
@@ -635,7 +630,7 @@ bool DDSHelper::save(const KTXImage& image, FileHelper& fileHelper)
             format.ABitMask = 0xff000000;
         }
     }
-    
+
     // set premul state
     // The legacy D3DX 10 and D3DX 11 utility libraries will fail to load any .DDS file with miscFlags2 not equal to zero.
     if (image.isPremul()) {
@@ -646,38 +641,35 @@ bool DDSHelper::save(const KTXImage& image, FileHelper& fileHelper)
     }
     // TODO: also hdr10.miscFlags2 |= DDS_ALPHA_MODE_OPAQUE (alpha full opaque)
     // TODO: also hdr10.miscFlags2 |= DDS_ALPHA_MODE_CUSTOM (raw data in alpha)
-        
+
     bool success = true;
-    
+
     success = success && fileHelper.write((const uint8_t*)&DDS_MAGIC, sizeof(DDS_MAGIC));
     success = success && fileHelper.write((const uint8_t*)&hdr, sizeof(hdr));
     success = success && fileHelper.write((const uint8_t*)&hdr10, sizeof(hdr10));
-    
+
     if (success) {
         // Now write the mip data out in the order dds expects
         // Ugh, dds stores each array item mips, then the next array item mips.
         const uint8_t* imageData = image.fileData;
         for (uint32_t chunkNum = 0; chunkNum < image.totalChunks(); ++chunkNum) {
-            
             for (uint32_t mipNum = 0; mipNum < image.mipCount(); ++mipNum) {
                 size_t offset = image.chunkOffset(mipNum, chunkNum);
                 size_t mipLength = image.mipLevels[mipNum].length;
-                
+
                 success = fileHelper.write(imageData + offset, mipLength);
                 if (!success) {
                     break;
                 }
             }
-            
+
             if (!success) {
                 break;
             }
         }
     }
-    
+
     return success;
 }
 
-
-
-}  // namespace kram
+} // namespace kram
diff --git a/libkram/kram/KramDDSHelper.h b/libkram/kram/KramDDSHelper.h
index 9f8770ec..b2ce5c7d 100644
--- a/libkram/kram/KramDDSHelper.h
+++ b/libkram/kram/KramDDSHelper.h
@@ -31,4 +31,4 @@ class DDSHelper {
     bool save(const KTXImage& image, FileHelper& fileHelper);
 };
 
-}  // namespace kram
+} // namespace kram
diff --git a/libkram/kram/KramFileHelper.cpp b/libkram/kram/KramFileHelper.cpp
index 394ad7b8..4c3fe23e 100644
--- a/libkram/kram/KramFileHelper.cpp
+++ b/libkram/kram/KramFileHelper.cpp
@@ -16,12 +16,12 @@
 #include "tmpfileplus.h"
 
 #if KRAM_MAC || KRAM_IOS || KRAM_LINUX
-#include <unistd.h>  // for getpagesize()
+#include <unistd.h> // for getpagesize()
 #endif
 
 #if KRAM_WIN
-#include <direct.h>   // direct-ory for _mkdir, _rmdir
-#include <windows.h>  // for GetNativeSystemInfo()
+#include <direct.h> // direct-ory for _mkdir, _rmdir
+#include <windows.h> // for GetNativeSystemInfo()
 
 // Windows mkdir doesn't take permission
 #define mkdir(fname, permission) _mkdir(fname)
@@ -45,7 +45,7 @@ static void mkdirRecursive(char* path)
 
     if (*path != '\0' && mkdir(path, 0755) && errno != EEXIST) {
         KLOGE("kram", "error while trying to create '%s'" nl "%s" nl,
-              path, strerror(errno));  // same as %m
+              path, strerror(errno)); // same as %m
     }
 }
 
@@ -133,7 +133,7 @@ size_t FileHelper::pagesize()
         pagesize = systemInfo.dwPageSize;
 #else
         // TODO: Android 15 has variable page size (16K and 4K)
-        pagesize = 4 * 1024;  // how to determine on Win/Android?
+        pagesize = 4 * 1024; // how to determine on Win/Android?
 #endif
     }
     return pagesize;
@@ -141,9 +141,9 @@ size_t FileHelper::pagesize()
 
 bool FileHelper::copyTemporaryFileTo(const char* dstFilename)
 {
-    if (!_fp) 
+    if (!_fp)
         return false;
-    if (_filename.empty()) 
+    if (_filename.empty())
         return false;
 
     // since we're not closing, need to flush output
@@ -201,7 +201,7 @@ bool FileHelper::open(const char* filename, const char* access)
     close();
 
     _filename = filename;
-     
+
     if (strstr(access, "w") != nullptr) {
         _fp = fopen_mkdir(filename, access);
     }
@@ -260,9 +260,8 @@ bool FileHelper::exists(const char* filename) const
 bool FileHelper::isDirectory(const char* filename) const
 {
     struct stat stats;
-    if( stat(filename,&stats) == 0 )
-    {
-        if( stats.st_mode & S_IFDIR )
+    if (stat(filename, &stats) == 0) {
+        if (stats.st_mode & S_IFDIR)
             return true;
     }
     return false;
@@ -295,4 +294,4 @@ uint64_t FileHelper::modificationTimestamp(const char* filename)
     return stats.st_mtime;
 }
 
-}  // namespace kram
+} // namespace kram
diff --git a/libkram/kram/KramFileHelper.h b/libkram/kram/KramFileHelper.h
index 21f10ef1..32b0bb2a 100644
--- a/libkram/kram/KramFileHelper.h
+++ b/libkram/kram/KramFileHelper.h
@@ -21,9 +21,9 @@ class FileHelper {
 public:
     ~FileHelper();
     bool isOpen() const { return _fp != nullptr; }
-    
+
     bool isDirectory(const char* filename) const;
-    
+
     bool exists(const char* filename) const;
 
     bool open(const char* filename, const char* access);
@@ -54,7 +54,7 @@ class FileHelper {
     static uint64_t modificationTimestamp(const char* filename);
 
     static size_t pagesize();
-    
+
     // Can retreive if open called (even on failure)
     const string& filename() const { return _filename; }
 
@@ -64,4 +64,4 @@ class FileHelper {
     bool _isTmpFile = false;
 };
 
-}  // namespace kram
+} // namespace kram
diff --git a/libkram/kram/KramFmt.h b/libkram/kram/KramFmt.h
index 8bb43060..a304bee4 100644
--- a/libkram/kram/KramFmt.h
+++ b/libkram/kram/KramFmt.h
@@ -10,7 +10,6 @@
 //#include "KramConfig.h"
 
 #include "KramLog.h"
-
 #include "core.h" // really fmt/core.h
 #include "format.h" // really fmt/format.h - for FMT_STRING
 
@@ -20,8 +19,8 @@
 namespace kram {
 
 int32_t logMessage(const char* group, int32_t logLevel,
-                const char* file, int32_t line, const char* func,
-                fmt::string_view format, fmt::format_args args);
+                   const char* file, int32_t line, const char* func,
+                   fmt::string_view format, fmt::format_args args);
 
 // This is a way to convert to single function call, so handling
 // can be buriend within that.
@@ -66,4 +65,4 @@ int32_t append_sprintf_fmt(string& s, const S& format, Args&&... args)
     return append_sprintf_impl(s, format, fmt::make_format_args(args...));
 }
 
-}  // namespace kram
+} // namespace kram
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index 6ed0d17b..f23aa7f2 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -4,22 +4,21 @@
 
 #include "KramImage.h"
 
-
 #if COMPILE_ATE
-#include "ateencoder.h"  // astc/bc encoder, apple only
+#include "ateencoder.h" // astc/bc encoder, apple only
 #endif
 
 #if COMPILE_ETCENC
-#include "EtcImage.h"  // etc encoder
+#include "EtcImage.h" // etc encoder
 #endif
 
 #if COMPILE_SQUISH
-#include "squish.h"  // bc encoder
+#include "squish.h" // bc encoder
 #endif
 
 #if COMPILE_COMP
-#include "bc6h_encode.h"  // bc encoder
-#include "bc6h_decode.h"  // bc decoder
+#include "bc6h_decode.h" // bc decoder
+#include "bc6h_encode.h" // bc encoder
 #endif
 
 #if COMPILE_BCENC
@@ -28,12 +27,12 @@
 #define RGBCX_USE_SMALLER_TABLES 1
 
 #include "bc7decomp.h"
-#include "bc7enc.h"  // bc encoder
+#include "bc7enc.h" // bc encoder
 #include "rgbcx.h"
 #endif
 
 #if COMPILE_ASTCENC
-#include "astcenc.h"  // astc encoder
+#include "astcenc.h" // astc encoder
 
 // hack to improve block generation on L1 and LA encoding
 //extern thread_local int32_t gAstcenc_UniqueChannelsInPartitioning;
@@ -139,7 +138,7 @@ bool Image::convertToFourChannel(const KTXImage& image, uint32_t mipNumber)
 {
     if (mipNumber >= image.mipLevels.size())
         return false;
-    
+
     const auto& srcMipLevel = image.mipLevels[mipNumber];
 
     // copy the data into a contiguous array
@@ -189,7 +188,7 @@ bool Image::convertToFourChannel(const KTXImage& image, uint32_t mipNumber)
 
                 for (int32_t x = 0; x < _width; ++x) {
                     int32_t srcX = (y0 + x) * numSrcChannels;
-                    int32_t dstX = (y0 + x);  // * numDstChannels;
+                    int32_t dstX = (y0 + x); // * numDstChannels;
 
                     for (int32_t i = 0; i < numSrcChannels; ++i) {
                         *(&dstTemp.r + i) = srcPixels[srcX + i];
@@ -299,11 +298,11 @@ bool Image::convertToFourChannelForThumbnail(const KTXImage& image, uint32_t mip
 {
     if (mipNumber >= image.mipLevels.size())
         return false;
-    
+
     const auto& srcMipLevel = image.mipLevels[mipNumber];
-    
+
     uint32_t chunkCount = chunksY();
-    
+
     // copy the data into a contiguous array
     // a verticaly chunked image, will be converted to chunks in encode
     uint32_t width, height, depth;
@@ -351,7 +350,7 @@ bool Image::convertToFourChannelForThumbnail(const KTXImage& image, uint32_t mip
 
                 for (int32_t x = 0; x < _width; ++x) {
                     int32_t srcX = (y0 + x) * numSrcChannels;
-                    int32_t dstX = (y0 + x);  // * numDstChannels;
+                    int32_t dstX = (y0 + x); // * numDstChannels;
 
                     for (int32_t i = 0; i < numSrcChannels; ++i) {
                         *(&dstTemp.r + i) = srcPixels[srcX + i];
@@ -451,9 +450,9 @@ bool Image::loadImageFromPixels(const vector<Color>& pixels, int32_t width,
 
     // if true, then don't actually know this unless walk the pixels.
     // Format can also affect this, since 1/2 channel don't have color or alpha.
-    _hasColor = hasColor;  // grayscale or no rgb when false
+    _hasColor = hasColor; // grayscale or no rgb when false
     _hasAlpha = hasAlpha;
-    
+
     // always assumes 4 rgba8 channels
     // _pixels.resize(4 * _width * _height);
     assert((int32_t)pixels.size() == (width * height));
@@ -477,8 +476,7 @@ void Image::setSrgbState(bool isSrgb, bool hasSrgbBlock, bool hasNonSrgbBlocks)
 // being set ot 0.  This runs counter to ASTC L+A mode though which eliminates
 // the endpoint storage.
 void KramEncoder::averageChannelsInBlock(
-    const char* averageChannels, const KTXImage& image, ImageData& srcImage
-) const  // otherwise, it's BlueAlpha averaging
+    const char* averageChannels, const KTXImage& image, ImageData& srcImage) const // otherwise, it's BlueAlpha averaging
 {
     int32_t w = srcImage.width;
     int32_t h = srcImage.height;
@@ -494,9 +492,9 @@ void KramEncoder::averageChannelsInBlock(
     // these then don't affect the fitting, but do affect endpoint storage (f.e.
     // RGB instead of L+A) must be done before the encode due to complexity of
     // BC6/7 and ASTC
-    
+
     Int2 blockDims = image.blockDims();
-  
+
     for (int32_t yy = 0; yy < h; yy += blockDims.y) {
         for (int32_t xx = 0; xx < w; xx += blockDims.x) {
             // compute clamped blockDims
@@ -566,7 +564,7 @@ static bool writeDataAtOffset(const uint8_t* data, size_t dataSize, size_t dataO
 
 bool KramDecoder::decode(const KTXImage& srcImage, FILE* dstFile, const KramDecoderParams& params) const
 {
-    KTXImage dstImage;  // thrown out, data written to file
+    KTXImage dstImage; // thrown out, data written to file
     return decodeImpl(srcImage, dstFile, dstImage, params);
 }
 
@@ -578,7 +576,7 @@ bool KramDecoder::decode(const KTXImage& srcImage, KTXImage& dstImage, const Kra
 bool KramDecoder::decodeBlocks(
     int32_t w, int32_t h,
     const uint8_t* blockData, uint32_t blockDataSize, MyMTLPixelFormat blockFormat,
-    vector<uint8_t>& outputTexture,  // currently Color
+    vector<uint8_t>& outputTexture, // currently Color
     const KramDecoderParams& params) const
 {
     bool success = false;
@@ -587,7 +585,7 @@ bool KramDecoder::decodeBlocks(
     // or may want to disable if decoders don't gen correct output
     TexEncoder decoder = params.decoder;
     MyMTLTextureType textureType = MyMTLTextureType2D; // Note: this is a lie to get decode to occur
-    
+
     if (!validateFormatAndDecoder(textureType, blockFormat, decoder)) {
         KLOGE("Kram", "block decode only supports specific block types");
         return false;
@@ -625,9 +623,9 @@ bool KramDecoder::decodeBlocks(
     bool isVerbose = params.isVerbose;
     const string& swizzleText = params.swizzleText;
     bool isHDR = isHdrFormat(blockFormat);
-    
+
     // start decoding after format pulled from KTX file
-    if (isExplicitFormat(blockFormat)) { 
+    if (isExplicitFormat(blockFormat)) {
         // Could convert r/rg/rgb/rgba8 and 16f/32f to rgba8u image for png 8-bit output
         // for now just copying these to ktx format which supports these formats
     }
@@ -656,21 +654,20 @@ bool KramDecoder::decodeBlocks(
                     // Clear to 0001
                     // TODO: could only do for bc4/5
                     Color pixels[blockDim * blockDim] = {};
-                    for (uint32_t i = 0, iEnd = blockDim*blockDim; i < iEnd; ++i)
-                    {
+                    for (uint32_t i = 0, iEnd = blockDim * blockDim; i < iEnd; ++i) {
                         pixels[i].a = 255;
                     }
-                    
+
                     // TODO: need this for bc4/5/6sn on other decoders (ate + squish)
                     // but have to run through all blocks before passing.  Here doing one block
                     // at a time.  EAC_R11/RG11sn already do this conversion on decode.
-                    
+
                     // Switch from unorm to snorm if needed
                     uint16_t* e0;
                     uint16_t* e1;
 
                     e0 = (uint16_t*)&srcBlock[0];
-                    
+
                     if (blockFormat == MyMTLPixelFormatBC4_RSnorm) {
                         // 2 8-bit endpoints
                         remapFromSignedBCEndpoint88(*e0);
@@ -678,11 +675,11 @@ bool KramDecoder::decodeBlocks(
                     else if (blockFormat == MyMTLPixelFormatBC5_RGSnorm) {
                         // 4 8-bit endpoints
                         remapFromSignedBCEndpoint88(*e0);
-                        
-                        e1 = (uint16_t*)&srcBlock[4*2];
+
+                        e1 = (uint16_t*)&srcBlock[4 * 2];
                         remapFromSignedBCEndpoint88(*e1);
                     }
-                    
+
                     // decode into temp 4x4 pixels
                     success = true;
 
@@ -697,13 +694,13 @@ bool KramDecoder::decodeBlocks(
                             // Returns false if the block uses 3 color punchthrough alpha mode.
                             rgbcx::unpack_bc3(srcBlock, pixels);
                             break;
-                            
+
                         // writes r packed
                         case MyMTLPixelFormatBC4_RSnorm:
                         case MyMTLPixelFormatBC4_RUnorm:
                             rgbcx::unpack_bc4(srcBlock, (uint8_t*)pixels);
                             break;
-                            
+
                         // writes rg packed
                         case MyMTLPixelFormatBC5_RGSnorm:
                         case MyMTLPixelFormatBC5_RGUnorm:
@@ -720,10 +717,10 @@ bool KramDecoder::decodeBlocks(
                             for (uint32_t i = 0; i < 16; ++i) {
                                 srcBlockForDecompress[i] = srcBlock[i];
                             }
-                            
+
                             BC6HBlockDecoder decoderCompressenator;
                             decoderCompressenator.DecompressBlock(pixelsFloat, srcBlockForDecompress);
-                            
+
                             // losing snorm and chopping to 8-bit
                             for (uint32_t i = 0; i < 16; ++i) {
                                 pixels[i] = ColorFromUnormFloat4(*(const float4*)&pixelsFloat[i]);
@@ -731,7 +728,7 @@ bool KramDecoder::decodeBlocks(
                             break;
                         }
 #endif
-                            
+
                         case MyMTLPixelFormatBC7_RGBAUnorm:
                         case MyMTLPixelFormatBC7_RGBAUnorm_sRGB:
                             bc7decomp::unpack_bc7(srcBlock, (bc7decomp::color_rgba*)pixels);
@@ -757,7 +754,7 @@ bool KramDecoder::decodeBlocks(
                         for (int32_t bx = 0; bx < blockDim; ++bx) {
                             int32_t xx = x + bx;
                             if (xx >= w) {
-                                break;  // go to next y above
+                                break; // go to next y above
                             }
 
                             const Color& c = pixels[by * blockDim + bx];
@@ -801,7 +798,7 @@ bool KramDecoder::decodeBlocks(
                 // only handles bc1,3,4,5
                 // TODO: colors still don't look correct on rs, rgs.  Above it always requests unorm.
                 squish::DecompressImage(outputTexture.data(), w, h, srcData, format);
-                
+
                 success = true;
             }
         }
@@ -809,7 +806,7 @@ bool KramDecoder::decodeBlocks(
 #if COMPILE_ATE
         else if (useATE) {
             ATEEncoder encoder;
-            
+
             // TODO: colors still don't look correct on rs, rgs
             // docs mention needing to pass float pixels for snorm, but always using unorm decode format now
             success = encoder.Decode(blockFormat, blockDataSize, blockDims.y,
@@ -877,7 +874,7 @@ bool KramDecoder::decodeBlocks(
             astcenc_image dstImageASTC;
             dstImageASTC.dim_x = w;
             dstImageASTC.dim_y = h;
-            dstImageASTC.dim_z = 1;  // Not using 3D blocks, not supported on iOS
+            dstImageASTC.dim_z = 1; // Not using 3D blocks, not supported on iOS
             //dstImageASTC.dim_pad = 0;
             dstImageASTC.data_type = ASTCENC_TYPE_U8;
 
@@ -888,9 +885,9 @@ bool KramDecoder::decodeBlocks(
             uint32_t srcDataLength = blockDataSize;
 
             astcenc_profile profile;
-            profile = ASTCENC_PRF_LDR;  // isSrgb ? ASTCENC_PRF_LDR_SRGB : ASTCENC_PRF_LDR;
+            profile = ASTCENC_PRF_LDR; // isSrgb ? ASTCENC_PRF_LDR_SRGB : ASTCENC_PRF_LDR;
             if (isHDR) {
-                profile = ASTCENC_PRF_HDR;  // TODO: also ASTCENC_PRF_HDR_RGB_LDR_A
+                profile = ASTCENC_PRF_HDR; // TODO: also ASTCENC_PRF_HDR_RGB_LDR_A
             }
 
             astcenc_config config;
@@ -960,7 +957,7 @@ bool KramDecoder::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage&
 
     // setup dstImage
     //KTXImage dstImage;
-    dstImage = srcImage;  // copy src (name-value pairs copied too)
+    dstImage = srcImage; // copy src (name-value pairs copied too)
 
     // important otherwise offsets are wrong if src is ktx2
     if (srcImage.skipImageLength) {
@@ -985,7 +982,7 @@ bool KramDecoder::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage&
 
     dstHeader.initFormatGL(dstPixelFormat);
     dstImage.pixelFormat = dstPixelFormat;
-    dstImage.addFormatProps();  // update format prop
+    dstImage.addFormatProps(); // update format prop
 
     vector<uint8_t> propsData;
     dstImage.toPropsData(propsData);
@@ -1030,7 +1027,7 @@ bool KramDecoder::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage&
     bool success = true;
 
     vector<uint8_t> mipStorage;
-    mipStorage.resize(srcImage.mipLengthLargest() * numChunks);  // enough to hold biggest mip
+    mipStorage.resize(srcImage.mipLengthLargest() * numChunks); // enough to hold biggest mip
 
     for (uint32_t i = 0; i < srcImage.mipLevels.size(); ++i) {
         // DONE: to decode compressed KTX2 want to walk all chunks of a single level
@@ -1229,7 +1226,7 @@ enum KHR_DF_CHANNEL {
     // ETC2
     //KHR_DF_CHANNEL_ETC2_RED = 0,
     KHR_DF_CHANNEL_ETC2_GREEN = 1,
-    KHR_DF_CHANNEL_ETC2_COLOR = 2,  // RGB
+    KHR_DF_CHANNEL_ETC2_COLOR = 2, // RGB
     KHR_DF_CHANNEL_ETC2_ALPHA = 15,
 
     // ASTC
@@ -1241,7 +1238,7 @@ enum KHR_DF_PRIMARIES {
 };
 
 enum KHR_DF_TRANSFER {
-    KHR_DF_TRANSFER_LINEAR = 1,  // ?
+    KHR_DF_TRANSFER_LINEAR = 1, // ?
     KHR_DF_TRANSFER_SRGB = 2,
 };
 
@@ -1255,8 +1252,8 @@ struct KTX2DescriptorChannelBlock {
     // 32-bits
     uint16_t bitOffset = 0;
     uint8_t bitLength = 0;
-    uint8_t channelType : 4;  // RED, GREEN, BLUE, RRR, GGG
-    uint8_t FSEL : 4;         // L is low bit - Float, Signed, Exponent, Linear (used on Alpha)
+    uint8_t channelType : 4; // RED, GREEN, BLUE, RRR, GGG
+    uint8_t FSEL : 4; // L is low bit - Float, Signed, Exponent, Linear (used on Alpha)
 
     // 32-bits
     uint8_t samplePositions[4] = {0};
@@ -1269,12 +1266,12 @@ struct KTX2DescriptorChannelBlock {
 struct KTX2DescriptorFileBlock {
     KTX2DescriptorFileBlock(MyMTLPixelFormat format, bool isPremul, bool isCompressed);
 
-    uint32_t totalSize = 0;  // descriptorBlockSize + 4
+    uint32_t totalSize = 0; // descriptorBlockSize + 4
 
     uint32_t vendorID : 18;
     uint32_t descriptorType : 14;
     uint16_t versionNumber = 2;
-    uint16_t descriptorBlockSize = 0;  // 24B + channels (doesn't include totalSize)
+    uint16_t descriptorBlockSize = 0; // 24B + channels (doesn't include totalSize)
 
     uint8_t colorModel = 0;
     uint8_t colorPrimaries = 0;
@@ -1285,7 +1282,7 @@ struct KTX2DescriptorFileBlock {
     uint8_t bytesPlane[8] = {0};
 
     // now 16 bytes for each channel present
-    KTX2DescriptorChannelBlock channels[4];  // max channels
+    KTX2DescriptorChannelBlock channels[4]; // max channels
 };
 
 KTX2DescriptorFileBlock::KTX2DescriptorFileBlock(MyMTLPixelFormat format, bool isPremul, bool isCompressed)
@@ -1331,12 +1328,12 @@ KTX2DescriptorFileBlock::KTX2DescriptorFileBlock(MyMTLPixelFormat format, bool i
         if (isFloat) {
             // This is for BC6H, TODO: might be half only so test for isHalf?
             if (isSigned) {
-                c.sampleLower = 0xBF800000U;  // -1.0f;
-                c.sampleUpper = 0x7F800000U;  //  1.0f;
+                c.sampleLower = 0xBF800000U; // -1.0f;
+                c.sampleUpper = 0x7F800000U; //  1.0f;
             }
             else {
-                c.sampleLower = 0xBF800000U;  //  -1.0f;
-                c.sampleUpper = 0x7F800000U;  //   1.0f;
+                c.sampleLower = 0xBF800000U; //  -1.0f;
+                c.sampleUpper = 0x7F800000U; //   1.0f;
             }
         }
         else if (isSigned) {
@@ -1347,7 +1344,7 @@ KTX2DescriptorFileBlock::KTX2DescriptorFileBlock(MyMTLPixelFormat format, bool i
 
     // set this since it applies to so many block formats
     channels[0].bitOffset = 0;
-    channels[0].bitLength = blockSize * 8 - 1;  // needs to be split of channel bits
+    channels[0].bitLength = blockSize * 8 - 1; // needs to be split of channel bits
 
     switch (format) {
         case MyMTLPixelFormatBC1_RGBA:
@@ -1446,9 +1443,9 @@ void KramEncoder::addBaseProps(const ImageInfo& info, KTXImage& dstImage) const
     if (info.swizzleText == "gggr")
         postSwizzleText = "ag01";
     else if (info.swizzleText == "rrrg")
-        postSwizzleText = "ga01";  // or ra01
+        postSwizzleText = "ga01"; // or ra01
     else if (info.swizzleText == "rrr1")
-        postSwizzleText = "r001";  // to match up with BC4/EAC_R11
+        postSwizzleText = "r001"; // to match up with BC4/EAC_R11
 
     dstImage.addSwizzleProps(info.swizzleText.c_str(), postSwizzleText.c_str());
 
@@ -1466,11 +1463,10 @@ void KramEncoder::addBaseProps(const ImageInfo& info, KTXImage& dstImage) const
             dstImage.addChannelProps("Alb.r,Alb.g,Alb.b,Alb.a");
         }
     }
-    else if (info.isSourcePremultiplied)
-    {
+    else if (info.isSourcePremultiplied) {
         dstImage.addChannelProps("Alb.ra,Alb.ga,Alb.ba,Alb.a");
     }
-    
+
     // TODO: texture encode can depend on wrap vs. clamp state (f.e. normal map gen, sdf)
     // and formsts like PVRTC must know wrap/clamp before encode
     // address: Wrap, Clamp, MirrorWrap, MirrorClamp, BorderClamp, BorderClamp0
@@ -1487,10 +1483,10 @@ void KramEncoder::addBaseProps(const ImageInfo& info, KTXImage& dstImage) const
     }
 
     if (info.doMipmaps) {
-        dstImage.addFilterProps("Lin,Lin,Lin");  // min,mag,mip
+        dstImage.addFilterProps("Lin,Lin,Lin"); // min,mag,mip
     }
     else {
-        dstImage.addFilterProps("Lin,Lin,X");  // min,mag,mip
+        dstImage.addFilterProps("Lin,Lin,X"); // min,mag,mip
     }
 
     // This is hash of source png/ktx file (use xxhash32 or crc32)
@@ -1540,7 +1536,7 @@ bool KramEncoder::encodeImpl(ImageInfo& info, Image& singleImage, FILE* dstFile,
     // whd might be changed by initMipLevels based on min/max mip size
     dstImage.width = w;
     dstImage.height = h;
-    dstImage.depth = header.pixelDepth;  // from validate above
+    dstImage.depth = header.pixelDepth; // from validate above
 
     dstImage.initMipLevels(info.doMipmaps, info.mipMinSize, info.mipMaxSize, info.mipSkip, mipConstructData.numSkippedMips);
 
@@ -1577,12 +1573,12 @@ bool KramEncoder::saveKTX2(const KTXImage& srcImage, const KTX2Compressor& compr
     // TODO: move this propsData into KTXImage
     vector<uint8_t> propsData;
     srcImage.toPropsData(propsData);
-    
+
     // now convert from ktx1 to ktx2
     const KTXHeader& header = srcImage.header;
-    
+
     KTXImage dummyImage; // unused, just passed to reference
-    
+
     KTX2Header header2;
 
     header2.vkFormat = vulkanType(srcImage.pixelFormat);
@@ -1688,7 +1684,7 @@ bool KramEncoder::saveKTX2(const KTXImage& srcImage, const KTX2Compressor& compr
 
         // allocate big enough to hold entire uncompressed level
         vector<uint8_t> compressedData;
-        compressedData.resize(mz_compressBound(ktx2Levels[0].length));  // largest mip
+        compressedData.resize(mz_compressBound(ktx2Levels[0].length)); // largest mip
         size_t compressedDataSize = 0;
 
         // reuse a context here
@@ -1784,7 +1780,7 @@ bool KramEncoder::saveKTX2(const KTXImage& srcImage, const KTX2Compressor& compr
             return false;
         }
     }
-    
+
     return true;
 }
 
@@ -1817,7 +1813,7 @@ bool KramEncoder::writeKTX1FileOrImage(
 
         for (int32_t i = 0; i < (int32_t)dstImage.mipLevels.size(); ++i) {
             auto& level = dstImage.mipLevels[i];
-            level.offset = lastMipOffset + 4;  // offset by length
+            level.offset = lastMipOffset + 4; // offset by length
 
             lastMipOffset = level.offset + level.length * numChunks;
         }
@@ -1850,10 +1846,11 @@ bool KramEncoder::writeKTX1FileOrImage(
     return true;
 }
 
-bool KramEncoder::saveKTX1(const KTXImage& image, FILE* dstFile) const {
+bool KramEncoder::saveKTX1(const KTXImage& image, FILE* dstFile) const
+{
     // write the header out
     KTXHeader headerCopy = image.header;
-    
+
     // fixup header for 1d array
     if (image.textureType == MyMTLTextureType1DArray) {
         headerCopy.pixelHeight = 0;
@@ -1862,59 +1859,59 @@ bool KramEncoder::saveKTX1(const KTXImage& image, FILE* dstFile) const {
 
     // This is unused
     KTXImage dummyImage;
-    
+
     vector<uint8_t> propsData;
     image.toPropsData(propsData);
     headerCopy.bytesOfKeyValueData = (uint32_t)vsizeof(propsData);
-    
+
     uint32_t dstOffset = 0;
-    
+
     if (!writeDataAtOffset((const uint8_t*)&headerCopy, sizeof(KTXHeader), 0, dstFile, dummyImage)) {
         return false;
     }
     dstOffset += sizeof(KTXHeader);
-    
+
     // write out the props
     if (!writeDataAtOffset(propsData.data(), headerCopy.bytesOfKeyValueData, sizeof(KTXHeader), dstFile, dummyImage)) {
         return false;
     }
     dstOffset += headerCopy.bytesOfKeyValueData;
-    
+
     // build and write out the mip data
-    
+
     // This may not have been allocated, might be aliasing original
     const uint8_t* mipLevelData = image.fileData;
     const auto& mipLevels = image.mipLevels;
-    
+
     // KTX writes largest mips first
-    
+
     uint32_t numChunks = image.totalChunks();
     for (uint32_t mipNum = 0; mipNum < image.mipCount(); ++mipNum) {
         // ktx weirdly writes size differently for cube, but not cube array
         // also this completely throws off block alignment
         uint32_t mipStorageSize = mipLevels[mipNum].length;
         uint32_t levelDataSize = mipStorageSize * numChunks;
-        
+
         // cube stores size of one face, ugh
         if (image.textureType != MyMTLTextureTypeCube) {
             mipStorageSize *= numChunks;
         }
-        
+
         size_t chunkOffset = image.chunkOffset(mipNum, 0);
-        
+
         // write length of mip
         if (!writeDataAtOffset((const uint8_t*)&mipStorageSize, sizeof(uint32_t), dstOffset, dstFile, dummyImage)) {
             return false;
         }
         dstOffset += sizeof(uint32_t);
-        
+
         // write the level pixels
         if (!writeDataAtOffset(mipLevelData + chunkOffset, levelDataSize, dstOffset, dstFile, dummyImage)) {
             return false;
         }
         dstOffset += levelDataSize;
     }
-    
+
     return true;
 }
 
@@ -1994,7 +1991,7 @@ bool KramEncoder::createMipsFromChunks(
     KTXImage& dstImage) const
 {
     Timer totalTimer;
-    
+
     // ----------------------------------------------------
 
     // set the structure fields and allocate it, only need enough to hold single
@@ -2089,7 +2086,7 @@ bool KramEncoder::createMipsFromChunks(
             // so large mips even if clamped with -mipmax allocate to largest mip size (2k x 2k @16 = 64MB)
             // have to build the mips off that.  srgb and premul is why fp32 is
             // needed, and need to downsample in linear space.
-            
+
             srcImage.pixelsHalf = halfImage.data();
         }
     }
@@ -2104,7 +2101,7 @@ bool KramEncoder::createMipsFromChunks(
 
     for (int32_t chunk = 0; chunk < numChunks; ++chunk) {
         Timer timerBuildMips;
-        
+
         // this needs to append before chunkOffset copy below
         w = srcTopMipWidth;
         h = srcTopMipHeight;
@@ -2118,7 +2115,7 @@ bool KramEncoder::createMipsFromChunks(
 
         if (info.isHDR) {
             // TODO: should this support halfImage too?
-            
+
             if (isMultichunk) {
                 const float4* srcPixels = (const float4*)singleImage.pixelsFloat().data();
                 for (int32_t y = 0; y < h; ++y) {
@@ -2171,21 +2168,21 @@ bool KramEncoder::createMipsFromChunks(
         // mipgen and encoding are separated.  This simplifies mipFlood and
         // channel averaging.
         const int32_t numMipLevels = (int32_t)dstMipLevels.size();
-       
+
         vector<ImageData> dstMipImages;
         dstMipImages.resize(numMipLevels);
-        
+
         // mip1...n are held here
         vector<Color> mipPixels;
         vector<half4> mipPixelsHalf;
         vector<float4> mipPixelsFloat;
-        
+
         {
             ImageData dstImageData = srcImage;
             dstImageData.isSRGB = isSrgbFormat(info.pixelFormat);
-            
+
             int32_t numSkippedMips = data.numSkippedMips;
-            
+
             if (info.doSDF) {
                 // count up pixels needed for all mips of this chunk
                 uint32_t numPixels = 0;
@@ -2196,23 +2193,23 @@ bool KramEncoder::createMipsFromChunks(
                     mipDown(w, h, d, mipLevel + numSkippedMips);
                     numPixels += w * h;
                 }
-                
+
                 // now allocate enough memory to hold all the mips
                 mipPixels.resize(numPixels);
-                
+
                 size_t pixelOffset = 0;
                 for (int32_t mipLevel = 0; mipLevel < numMipLevels; ++mipLevel) {
                     //const auto& dstMipLevel = dstMipLevels[mipLevel];
                     ImageData& dstMipImage = dstMipImages[mipLevel];
-                    
+
                     dstMipImage = dstImageData; // settings replaced in mipmap call
                     dstMipImage.pixels = mipPixels.data() + pixelOffset;
-                    
+
                     // sdf mipper has to build from largest sourceImage
                     // but it can in-place write to the same dstImage
                     // But not doing in-place mips anymore.
                     sdfMipper.mipmap(dstMipImage, mipLevel + numSkippedMips);
-                    
+
                     // assumes depth = 1
                     pixelOffset += dstMipImage.width * dstMipImage.height;
                 }
@@ -2223,15 +2220,15 @@ bool KramEncoder::createMipsFromChunks(
                     for (int32_t i = 0; i < numSkippedMips; ++i) {
                         // have to build the submips even with skipMip
                         mipper.mipmap(srcImage, dstImageData);
-                        
+
                         // dst becomes src for next in-place mipmap
                         srcImage = dstImageData;
                     }
                 }
-                
+
                 // allocate memory for mips
                 dstMipImages[0] = dstImageData;
-                
+
                 // count up pixels needed for all sub mips of this chunk
                 uint32_t numPixels = 0;
                 for (int32_t mipLevel = 1; mipLevel < numMipLevels; ++mipLevel) {
@@ -2241,7 +2238,7 @@ bool KramEncoder::createMipsFromChunks(
                     mipDown(w, h, d, mipLevel + numSkippedMips);
                     numPixels += w * h;
                 }
-                
+
                 // This is more memory than in-place, but the submips
                 // are only 1/3rd the memory of the main mip
                 mipPixels.resize(numPixels);
@@ -2249,40 +2246,40 @@ bool KramEncoder::createMipsFromChunks(
                     mipPixelsFloat.resize(numPixels);
                 else if (srcImage.pixelsHalf)
                     mipPixelsHalf.resize(numPixels);
-                
+
                 size_t pixelOffset = 0;
                 for (int32_t mipLevel = 1; mipLevel < numMipLevels; ++mipLevel) {
                     ImageData& dstMipImage = dstMipImages[mipLevel];
                     dstMipImage.isSRGB = dstImageData.isSRGB;
-                    
+
                     dstMipImage.pixels = mipPixels.data() + pixelOffset;
                     if (srcImage.pixelsFloat)
                         dstMipImage.pixelsFloat = mipPixelsFloat.data() + pixelOffset;
                     else if (srcImage.pixelsHalf)
                         dstMipImage.pixelsHalf = mipPixelsHalf.data() + pixelOffset;
-                      
+
                     mipper.mipmap(srcImage, dstMipImage);
-                    
+
                     // dst becomes src for next mipmap
                     // preserve the isSRGB state
                     bool isSRGBSrc = srcImage.isSRGB;
                     srcImage = dstMipImage;
                     srcImage.isSRGB = isSRGBSrc;
-                    
+
                     pixelOffset += dstMipImage.width * dstMipImage.height;
                 }
-                
+
                 // Now can run mip flooding on image
                 if (info.doMipflood) {
                     mipper.mipflood(dstMipImages);
                 }
-                
+
                 // apply average channels, now that unique mips
                 bool isFloat = srcImage.pixelsHalf || srcImage.pixelsFloat;
                 if (!info.averageChannels.empty() && !isFloat) {
                     for (int32_t mipLevel = 0; mipLevel < numMipLevels; ++mipLevel) {
                         ImageData& dstMipImage = dstMipImages[mipLevel];
-                        
+
                         // this isn't applied to srgb data (what about premul?)
                         averageChannelsInBlock(info.averageChannels.c_str(), dstImage,
                                                dstMipImage);
@@ -2290,15 +2287,15 @@ bool KramEncoder::createMipsFromChunks(
                 }
             }
         }
-        
+
         timerBuildMips.stop();
-        
+
         if (info.isVerbose) {
             KLOGI("Image", "Chunk %d source %d miplevels in %0.3fms\n",
                   chunk, numMipLevels,
-                  timerBuildMips.timeElapsedMillis() );
+                  timerBuildMips.timeElapsedMillis());
         }
-        
+
         //----------------------------------------------
 
         for (int32_t mipLevel = 0; mipLevel < numMipLevels; ++mipLevel) {
@@ -2307,7 +2304,7 @@ bool KramEncoder::createMipsFromChunks(
 
             w = dstImageData.width;
             h = dstImageData.height;
-            
+
             // size of one mip, not levelSize = numChunks * mipStorageSize
             size_t mipStorageSize = dstMipLevel.length;
 
@@ -2360,18 +2357,18 @@ bool KramEncoder::createMipsFromChunks(
             }
         }
     }
-    
+
     if (info.isVerbose) {
         KLOGI("Image", "Total time in %0.3fms\n",
-              totalTimer.timeElapsedMillis() );
-    }
-    
-//    Timer test;
-//    test.stop();
-//
-//    KLOGI("Image", "Test time in %0.3fms\n",
-//          test.timeElapsedMillis() );
-    
+              totalTimer.timeElapsedMillis());
+    }
+
+    // Timer test;
+    // test.stop();
+    //
+    // KLOGI("Image", "Test time in %0.3fms\n",
+    //       test.timeElapsedMillis() );
+
     return true;
 }
 
@@ -2511,7 +2508,7 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
             // on color_grid-a.png to etc2rgb.  So stop using it.  The error calc is called
             // millions of times, so any work done there adds up.
 
-            bool useRec709 = false;  // info.isColorWeighted;
+            bool useRec709 = false; // info.isColorWeighted;
 
             switch (info.pixelFormat) {
                 case MyMTLPixelFormatEAC_R11Unorm:
@@ -2565,7 +2562,7 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
             Etc::Image::EncodingStatus status;
 
             // TODO: have encoder setting to enable multipass
-            bool doSinglepass = true;  // || (effort == 100.0f); // problem is 100% quality also runs all passes
+            bool doSinglepass = true; // || (effort == 100.0f); // problem is 100% quality also runs all passes
             if (doSinglepass) {
                 // single pass iterates each block until done
                 status = imageEtc.EncodeSinglepass(effort, outputTexture.data.data());
@@ -2633,7 +2630,7 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
                 else if (info.quality <= 90) {
                     uberLevel = 1;
                     maxPartitions = 64;
-                    bc7params.m_try_least_squares = true;  // true = 0.7s on test case
+                    bc7params.m_try_least_squares = true; // true = 0.7s on test case
                     bc7params.m_mode17_partition_estimation_filterbank = true;
                 }
                 else {
@@ -2702,17 +2699,17 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
                     // , so opaque textures repro as 254 alpha on Toof-a.png.
                     // ate sets pbits on mode 6 for same block.  Also fixed mip weights in non-pow2 mipper.
 
-                    //                    bool doPrintBlock = false;
-                    //                    if (bx == 8 && by == 1) {
-                    //                        int32_t bp = 0;
-                    //                        bp = bp;
-                    //                        doPrintBlock = true;
-                    //                    }
+                    // bool doPrintBlock = false;
+                    // if (bx == 8 && by == 1) {
+                    //     int32_t bp = 0;
+                    //     bp = bp;
+                    //     doPrintBlock = true;
+                    // }
 
                     // could tie to quality parameter, high quality uses the two
                     // modes of bc3/4/5.
                     bool useHighQuality = true;
-                    
+
                     switch (info.pixelFormat) {
                         case MyMTLPixelFormatBC1_RGBA:
                         case MyMTLPixelFormatBC1_RGBA_sRGB: {
@@ -2752,11 +2749,11 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
                         case MyMTLPixelFormatBC6H_RGBFloat: {
                             CMP_BC6H_BLOCK_PARAMETERS options;
                             options.isSigned = info.isSigned;
-                            
+
                             BC6HBlockEncoder encoderCompressenator(options);
-                            
+
                             // TODO: this needs HDR data
-                            float   srcPixelCopyFloat[16][4];
+                            float srcPixelCopyFloat[16][4];
                             for (int i = 0; i < 16; ++i) {
                                 srcPixelCopyFloat[i][0] = srcPixelCopy[i * 4 + 0];
                                 srcPixelCopyFloat[i][1] = srcPixelCopy[i * 4 + 1];
@@ -2771,9 +2768,9 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
                         case MyMTLPixelFormatBC7_RGBAUnorm_sRGB: {
                             bc7enc_compress_block(dstBlock, srcPixelCopy, &bc7params);
 
-                            //                            if (doPrintBlock) {
-                            //                                printBCBlock(dstBlock, info.pixelFormat);
-                            //                            }
+                            // if (doPrintBlock) {
+                            //     printBCBlock(dstBlock, info.pixelFormat);
+                            // }
                             break;
                         }
                         default: {
@@ -2828,7 +2825,7 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
         else if (info.useSquish) {
             static const float* noWeights = NULL;
             static const float perceptualWeights[3] = {
-                0.2126f, 0.7152f, 0.0722f};  // weight g > r > b
+                0.2126f, 0.7152f, 0.0722f}; // weight g > r > b
 
             const float* weights =
                 info.isColorWeighted ? &perceptualWeights[0] : noWeights;
@@ -2836,13 +2833,13 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
             int32_t flags = 0;
 
             if (info.quality <= 10)
-                flags = squish::kColourRangeFit;  // fast but inferior, only uses corner of color cube
+                flags = squish::kColourRangeFit; // fast but inferior, only uses corner of color cube
             else if (info.quality <= 90)
-                flags = squish::kColourClusterFit;  // decent speed and quality, fits to best line
+                flags = squish::kColourClusterFit; // decent speed and quality, fits to best line
             else
-                flags = squish::kColourIterativeClusterFit;  // very slow, but
-                                                             // slighting better
-                                                             // quality
+                flags = squish::kColourIterativeClusterFit; // very slow, but
+                                                            // slighting better
+                                                            // quality
 
             squish::TexFormat format = squish::kBC1;
 
@@ -2948,7 +2945,7 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
             enum ChannelType {
                 kChannelTypeOneR1 = 1,
                 kChannelTypeTwoAG = 2,
-                kChannelTypeTwoNormalAG = 3,  // not channel count
+                kChannelTypeTwoNormalAG = 3, // not channel count
                 kChannelTypeNormalFour = 4,
             };
 
@@ -2956,7 +2953,7 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
             ChannelType channelType = kChannelTypeNormalFour;
 
             if (info.isNormal) {
-                channelType = kChannelTypeTwoNormalAG;  // changes error metric
+                channelType = kChannelTypeTwoNormalAG; // changes error metric
                 assert(info.swizzleText == "rrrg" || info.swizzleText == "gggr");
             }
             else if (info.swizzleText == "rrrg" || info.swizzleText == "gggr") {
@@ -2970,7 +2967,7 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
             profile = info.isSRGBDst ? ASTCENC_PRF_LDR_SRGB : ASTCENC_PRF_LDR;
             if (info.isHDR) {
                 profile =
-                    ASTCENC_PRF_HDR;  // TODO: also ASTCENC_PRF_HDR_RGB_LDR_A
+                    ASTCENC_PRF_HDR; // TODO: also ASTCENC_PRF_HDR_RGB_LDR_A
             }
 
             // not generating 3d ASTC ever, even for 3D textures
@@ -2980,7 +2977,7 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
             uint32_t flags = 0;
 
             if (channelType == kChannelTypeTwoNormalAG) {
-                flags |= ASTCENC_FLG_MAP_NORMAL;  // weights r and a channels only in error calc
+                flags |= ASTCENC_FLG_MAP_NORMAL; // weights r and a channels only in error calc
             }
             else if (info.isColorWeighted) {
                 flags |= ASTCENC_FLG_USE_PERCEPTUAL;
@@ -3004,23 +3001,23 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
             // Using this option for compression give 10-20% more performance, depending on
             // block size, so is highly recommended.
             flags |= ASTCENC_FLG_SELF_DECOMPRESS_ONLY;
-            
+
             // convert quality to present
             float quality = info.quality;
 
-            //            ASTCENC_PRE_FAST;
-            //            if (info.quality <= 10) {
-            //                preset = ASTCENC_PRE_FAST;
-            //            }
-            //            else if (info.quality <= 50) {
-            //                preset = ASTCENC_PRE_MEDIUM;
-            //            }
-            //            else if (info.quality < 90) {
-            //                preset = ASTCENC_PRE_THOROUGH;
-            //            }
-            //            else {
-            //                preset = ASTCENC_PRE_EXHAUSTIVE;
-            //            }
+            // ASTCENC_PRE_FAST;
+            // if (info.quality <= 10) {
+            //     preset = ASTCENC_PRE_FAST;
+            // }
+            // else if (info.quality <= 50) {
+            //     preset = ASTCENC_PRE_MEDIUM;
+            // }
+            // else if (info.quality < 90) {
+            //     preset = ASTCENC_PRE_THOROUGH;
+            // }
+            // else {
+            //     preset = ASTCENC_PRE_EXHAUSTIVE;
+            // }
 
             astcenc_config config;
             astcenc_error error = astcenc_config_init(
@@ -3033,19 +3030,19 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
             // pull from rgb = y, a = x, to match up with astcenc internals
             if (channelType == kChannelTypeOneR1) {
                 config.cw_r_weight = 1.0f;
-                config.cw_g_weight = 0.0f;  // rgb same value
+                config.cw_g_weight = 0.0f; // rgb same value
                 config.cw_b_weight = 0.0f;
-                config.cw_a_weight = 0.0f;  // set to 0 to indicate alpha error doesn't matter (always 1)
+                config.cw_a_weight = 0.0f; // set to 0 to indicate alpha error doesn't matter (always 1)
             }
             else if (channelType == kChannelTypeTwoAG) {
                 config.cw_r_weight = 1.0f;
-                config.cw_g_weight = 0.0f;  // rgb same value
+                config.cw_g_weight = 0.0f; // rgb same value
                 config.cw_b_weight = 0.0f;
                 config.cw_a_weight = 1.0f;
             }
             else if (channelType == kChannelTypeTwoNormalAG) {
                 config.cw_r_weight = 1.0f;
-                config.cw_g_weight = 0.0f;  // rgb same value
+                config.cw_g_weight = 0.0f; // rgb same value
                 config.cw_b_weight = 0.0f;
                 config.cw_a_weight = 1.0f;
             }
@@ -3069,7 +3066,7 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
             astcenc_image srcImage;
             srcImage.dim_x = w;
             srcImage.dim_y = h;
-            srcImage.dim_z = 1;  // Not using 3D blocks, not supported on iOS
+            srcImage.dim_z = 1; // Not using 3D blocks, not supported on iOS
             //srcImage.dim_pad = 0;
 
             // data is triple-pointer so it can work with 3d textures, but only
@@ -3132,7 +3129,7 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
             error = astcenc_compress_image(
                 codec_context, &srcImage, &swizzleEncode,
                 outputTexture.data.data(), mipStorageSize,
-                0);  // threadIndex
+                0); // threadIndex
 #endif
 
             // Or should this context only be freed after all mips?
@@ -3149,4 +3146,4 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image,
     return false;
 }
 
-}  // namespace kram
+} // namespace kram
diff --git a/libkram/kram/KramImage.h b/libkram/kram/KramImage.h
index 91a8eaad..eed20426 100644
--- a/libkram/kram/KramImage.h
+++ b/libkram/kram/KramImage.h
@@ -7,7 +7,7 @@
 //#include <string>
 //#include <vector>
 
-#include "KTXImage.h"  // for MyMTLTextureType
+#include "KTXImage.h" // for MyMTLTextureType
 //#include "KramConfig.h"
 #include "KramImageInfo.h"
 #include "KramMipper.h"
@@ -42,11 +42,11 @@ class Image {
     bool loadImageFromPixels(const vector<Color>& pixels,
                              int32_t width, int32_t height,
                              bool hasColor, bool hasAlpha);
-    
+
     // set state off png blocks
     void setSrgbState(bool isSrgb, bool hasSrgbBlock, bool hasNonSrgbBlocks);
     void setBackgroundState(bool hasBlackBackground) { _hasBlackBackground = hasBlackBackground; }
-    
+
     // convert mip level of explicit format to single-image
     bool loadImageFromKTX(const KTXImage& image, uint32_t mipNumber = 0);
 
@@ -72,9 +72,9 @@ class Image {
     bool isSrgb() const { return _isSrgb; }
     bool hasSrgbBlock() const { return _hasSrgbBlock; }
     bool hasNonSrgbBlocks() const { return _hasNonSrgbBlocks; }
-    
+
     bool hasBlackBackground() const { return _hasBlackBackground; }
-    
+
     // if converted a KTX/2 image to Image, then this field will be non-zero
     uint32_t chunksY() const { return _chunksY; }
     void setChunksY(uint32_t chunksY) { _chunksY = chunksY; }
@@ -100,10 +100,10 @@ class Image {
     bool _isSrgb = false;
     bool _hasNonSrgbBlocks = false;
     bool _hasSrgbBlock = false;
-    
+
     // track to fix Apple Finder previews that are always white background
     bool _hasBlackBackground = false;
-    
+
     // this is the entire strip data, float version can be passed for HDR
     // sources always 4 channels RGBA for 8 and 32f data.  16f promoted to 32f.
     vector<Color> _pixels;
@@ -115,7 +115,7 @@ class Image {
 
 class KramDecoderParams {
 public:
-    TexEncoder decoder = kTexEncoderUnknown;  // will pick best available from format
+    TexEncoder decoder = kTexEncoderUnknown; // will pick best available from format
     bool isVerbose = false;
     string swizzleText;
 };
@@ -132,7 +132,7 @@ class KramDecoder {
     bool decodeBlocks(
         int32_t w, int32_t h,
         const uint8_t* blockData, uint32_t numBlocks, MyMTLPixelFormat blockFormat,
-        vector<uint8_t>& dstPixels,  // currently Color
+        vector<uint8_t>& dstPixels, // currently Color
         const KramDecoderParams& params) const;
 
 private:
@@ -155,7 +155,7 @@ class KramEncoder {
 
     // can save out to ktx2 directly, this can supercompress mips
     bool saveKTX2(const KTXImage& srcImage, const KTX2Compressor& compressor, FILE* dstFile) const;
-    
+
 private:
     bool encodeImpl(ImageInfo& info, Image& singleImage, FILE* dstFile, KTXImage& dstImage) const;
 
@@ -187,4 +187,4 @@ class KramEncoder {
     void addBaseProps(const ImageInfo& info, KTXImage& dstImage) const;
 };
 
-}  // namespace kram
+} // namespace kram
diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp
index 5b79eb53..428facf0 100644
--- a/libkram/kram/KramImageInfo.cpp
+++ b/libkram/kram/KramImageInfo.cpp
@@ -36,9 +36,9 @@ MyMTLTextureType parseTextureType(const char* typeName)
     else if (isStringEqual(typeName, "2d")) {
         type = MyMTLTextureType2D;
     }
-    //    else if (isStringEqual(typeName, "1d")) {
-    //        type = MyMTLTextureType1D;
-    //    }
+    // else if (isStringEqual(typeName, "1d")) {
+    //     type = MyMTLTextureType1D;
+    // }
     else if (isStringEqual(typeName, "cube")) {
         type = MyMTLTextureTypeCube;
     }
@@ -62,11 +62,11 @@ TexEncoder parseEncoder(const char* encoder)
         textureEncoder = kTexEncoderBcenc;
     }
     else if (isStringEqual(encoder,
-                           "ate")) {  // platform specific, no sources
+                           "ate")) { // platform specific, no sources
         textureEncoder = kTexEncoderATE;
     }
     else if (isStringEqual(encoder,
-                           "astcenc")) {  // platform specific, no sources
+                           "astcenc")) { // platform specific, no sources
         textureEncoder = kTexEncoderAstcenc;
     }
 
@@ -77,16 +77,16 @@ static MyMTLPixelFormat parseFormat(ImageInfoArgs& infoArgs)
 {
     MyMTLPixelFormat format = MyMTLPixelFormatInvalid;
     const char* formatString = infoArgs.formatString.c_str();
-    
+
     bool isSRGBDst = infoArgs.isSRGBDst;
-    
+
     // bc
     if (isStringEqual(formatString, "bc1")) {
         format = isSRGBDst ? MyMTLPixelFormatBC1_RGBA_sRGB : MyMTLPixelFormatBC1_RGBA;
     }
-    //    else if (isStringEqual(formatString, "bc2")) {
-    //        format = MyMTLPixelFormatBC2_RGBA;
-    //    }
+    // else if (isStringEqual(formatString, "bc2")) {
+    //     format = MyMTLPixelFormatBC2_RGBA;
+    // }
     else if (isStringEqual(formatString, "bc3")) {
         format = isSRGBDst ? MyMTLPixelFormatBC3_RGBA_sRGB : MyMTLPixelFormatBC3_RGBA;
     }
@@ -113,7 +113,7 @@ static MyMTLPixelFormat parseFormat(ImageInfoArgs& infoArgs)
     else if (isStringEqual(formatString, "etc2rgb")) {
         format = isSRGBDst ? MyMTLPixelFormatETC2_RGB8_sRGB : MyMTLPixelFormatETC2_RGB8;
     }
-    else if (isStringEqual(formatString, "etc2rgba")) {  // for rgb/rgba
+    else if (isStringEqual(formatString, "etc2rgba")) { // for rgb/rgba
         format = isSRGBDst ? MyMTLPixelFormatEAC_RGBA8_sRGB : MyMTLPixelFormatEAC_RGBA8;
     }
 
@@ -121,16 +121,20 @@ static MyMTLPixelFormat parseFormat(ImageInfoArgs& infoArgs)
     // or RGBA to save endpoint storage dual plane can occur for more than just
     // RGB+A, any one channel can be a plane to itself if encoder supports
     else if (isStringEqual(formatString, "astc4x4")) {
-        format = infoArgs.isHDR ? MyMTLPixelFormatASTC_4x4_HDR : isSRGBDst ? MyMTLPixelFormatASTC_4x4_sRGB : MyMTLPixelFormatASTC_4x4_LDR;
+        format = infoArgs.isHDR ? MyMTLPixelFormatASTC_4x4_HDR : isSRGBDst ? MyMTLPixelFormatASTC_4x4_sRGB
+                                                                           : MyMTLPixelFormatASTC_4x4_LDR;
     }
     else if (isStringEqual(formatString, "astc5x5")) {
-        format = infoArgs.isHDR ? MyMTLPixelFormatASTC_5x5_HDR : isSRGBDst ? MyMTLPixelFormatASTC_5x5_sRGB : MyMTLPixelFormatASTC_5x5_LDR;
+        format = infoArgs.isHDR ? MyMTLPixelFormatASTC_5x5_HDR : isSRGBDst ? MyMTLPixelFormatASTC_5x5_sRGB
+                                                                           : MyMTLPixelFormatASTC_5x5_LDR;
     }
     else if (isStringEqual(formatString, "astc6x6")) {
-        format = infoArgs.isHDR ? MyMTLPixelFormatASTC_6x6_HDR : isSRGBDst ? MyMTLPixelFormatASTC_6x6_sRGB : MyMTLPixelFormatASTC_6x6_LDR;
+        format = infoArgs.isHDR ? MyMTLPixelFormatASTC_6x6_HDR : isSRGBDst ? MyMTLPixelFormatASTC_6x6_sRGB
+                                                                           : MyMTLPixelFormatASTC_6x6_LDR;
     }
     else if (isStringEqual(formatString, "astc8x8")) {
-        format = infoArgs.isHDR ? MyMTLPixelFormatASTC_8x8_HDR : isSRGBDst ? MyMTLPixelFormatASTC_8x8_sRGB : MyMTLPixelFormatASTC_8x8_LDR;
+        format = infoArgs.isHDR ? MyMTLPixelFormatASTC_8x8_HDR : isSRGBDst ? MyMTLPixelFormatASTC_8x8_sRGB
+                                                                           : MyMTLPixelFormatASTC_8x8_LDR;
     }
 
     // explicit formats
@@ -143,7 +147,7 @@ static MyMTLPixelFormat parseFormat(ImageInfoArgs& infoArgs)
         format = // isSRGBDst ? MyMTLPixelFormatRG8Unorm_sRGB :
             MyMTLPixelFormatRG8Unorm;
     }
-    else if (isStringEqual(formatString, "rgba8")) {  // for rgb/rgba
+    else if (isStringEqual(formatString, "rgba8")) { // for rgb/rgba
         format = isSRGBDst ? MyMTLPixelFormatRGBA8Unorm_sRGB : MyMTLPixelFormatRGBA8Unorm;
     }
 
@@ -153,7 +157,7 @@ static MyMTLPixelFormat parseFormat(ImageInfoArgs& infoArgs)
     else if (isStringEqual(formatString, "rg16f")) {
         format = MyMTLPixelFormatRG16Float;
     }
-    else if (isStringEqual(formatString, "rgba16f")) {  // for rgb/rgba
+    else if (isStringEqual(formatString, "rgba16f")) { // for rgb/rgba
         format = MyMTLPixelFormatRGBA16Float;
     }
 
@@ -163,7 +167,7 @@ static MyMTLPixelFormat parseFormat(ImageInfoArgs& infoArgs)
     else if (isStringEqual(formatString, "rg32f")) {
         format = MyMTLPixelFormatRG32Float;
     }
-    else if (isStringEqual(formatString, "rgba32f")) {  // for rgb/rgba
+    else if (isStringEqual(formatString, "rgba32f")) { // for rgb/rgba
         format = MyMTLPixelFormatRGBA32Float;
     }
 
@@ -345,7 +349,7 @@ static const MyMTLPixelFormat kEncodingFormatsBcenc[] =
         MyMTLPixelFormatBC6H_RGBUfloat,
         MyMTLPixelFormatBC6H_RGBFloat,
 #endif
-        
+
         MyMTLPixelFormatBC7_RGBAUnorm,
         MyMTLPixelFormatBC7_RGBAUnorm_sRGB,
 };
@@ -564,11 +568,11 @@ bool validateFormatAndEncoder(ImageInfoArgs& infoArgs)
 
     // check arguments
     // flag unsupported formats
-//    if (format == MyMTLPixelFormatBC6H_RGBUfloat ||
-//        format == MyMTLPixelFormatBC6H_RGBFloat) {
-//        KLOGE("ImageInfo", "bc6 not supported\n");
-//        error = true;
-//    }
+    // if (format == MyMTLPixelFormatBC6H_RGBUfloat ||
+    //     format == MyMTLPixelFormatBC6H_RGBFloat) {
+    //     KLOGE("ImageInfo", "bc6 not supported\n");
+    //     error = true;
+    // }
 
     infoArgs.pixelFormat = format;
 
@@ -718,7 +722,7 @@ bool validateTextureType(MyMTLTextureType textureType, int32_t& w, int32_t& h,
             if (w != (int32_t)(h * numSlices)) {
                 return false;
             }
-            w = h;  // assume square
+            w = h; // assume square
 
             for (int32_t i = 0; i < (int32_t)numSlices; ++i) {
                 Int2 chunkOffset = {w * i, 0};
@@ -731,7 +735,7 @@ bool validateTextureType(MyMTLTextureType textureType, int32_t& w, int32_t& h,
             if (h != (int32_t)(w * numSlices)) {
                 return false;
             }
-            h = w;  // assume square
+            h = w; // assume square
 
             for (int32_t i = 0; i < (int32_t)numSlices; ++i) {
                 Int2 chunkOffset = {0, h * i};
@@ -788,7 +792,7 @@ bool validateTextureType(MyMTLTextureType textureType, int32_t& w, int32_t& h,
                     return false;
                 }
 
-                w = h;  // assume square
+                w = h; // assume square
                 for (int32_t i = 0; i < (int32_t)header.numberOfArrayElements; ++i) {
                     Int2 chunkOffset = {w * i, 0};
                     chunkOffsets.push_back(chunkOffset);
@@ -800,7 +804,7 @@ bool validateTextureType(MyMTLTextureType textureType, int32_t& w, int32_t& h,
                     return false;
                 }
 
-                h = w;  // assume square
+                h = w; // assume square
                 for (int32_t i = 0; i < (int32_t)header.numberOfArrayElements; ++i) {
                     Int2 chunkOffset = {0, h * i};
                     chunkOffsets.push_back(chunkOffset);
@@ -1011,19 +1015,19 @@ void ImageInfo::initWithArgs(const ImageInfoArgs& args)
     isPrezero = false;
     isPremultiplied = false;
     isSourcePremultiplied = false;
-    
+
     if (args.isSourcePremultiplied)
         isSourcePremultiplied = true;
     else if (args.isPremultiplied)
         isPremultiplied = true;
     else if (args.isPrezero)
         isPrezero = true;
-    
+
     isNormal = args.isNormal;
 
     doSDF = args.doSDF;
     sdfThreshold = args.sdfThreshold;
-    
+
     //skipImageLength = args.skipImageLength;
 
     // mips
@@ -1075,7 +1079,7 @@ void ImageInfo::initWithArgs(const ImageInfoArgs& args)
     isSRGBSrc = args.isSRGBSrc;
     isSRGBSrcFlag = args.isSRGBSrcFlag;
     isSRGBDst = isSrgbFormat(pixelFormat);
-    
+
     hasAlpha = true;
     hasColor = true;
     if (!isAlphaFormat(pixelFormat))
@@ -1184,9 +1188,9 @@ void ImageInfo::initWithSourceImage(Image& sourceImage)
     // Note: srgb flags are unreliable in png since most tools use linear
     // RGBA8 blends and just write out the pixel as is (f.e. Photoshop, figma, etc).
     // TODO: offer mode to use srg image srgb state if author has fixed up
-     if (isSRGBSrcFlag)
+    if (isSRGBSrcFlag)
         isSRGBSrc = sourceImage.isSrgb();
-    
+
     // this implies color is stored in rgb
     if (isSRGBDst) {
         isColorWeighted = hasColor;
@@ -1209,15 +1213,15 @@ void ImageInfo::initWithSourceImage(Image& sourceImage)
 
         // averaging all the values in 8-bit space, so only apply to lin. rgbs
         switch (pixelFormat) {
-            case MyMTLPixelFormatETC2_RGB8:  // 3 channel
-            case MyMTLPixelFormatEAC_RGBA8:  // 4 channel
+            case MyMTLPixelFormatETC2_RGB8: // 3 channel
+            case MyMTLPixelFormatEAC_RGBA8: // 4 channel
 
             case MyMTLPixelFormatASTC_4x4_LDR:
             case MyMTLPixelFormatASTC_5x5_LDR:
             case MyMTLPixelFormatASTC_6x6_LDR:
             case MyMTLPixelFormatASTC_8x8_LDR:
 
-            case MyMTLPixelFormatBC1_RGBA:  // 3 channel RGB only
+            case MyMTLPixelFormatBC1_RGBA: // 3 channel RGB only
             case MyMTLPixelFormatBC3_RGBA:
             // case MyMTLPixelFormatBC6H_RGBFloat:
             // case MyMTLPixelFormatBC6H_RGBUfloat:
@@ -1275,8 +1279,8 @@ void ImageInfo::heightToNormals(int32_t w, int32_t h,
 
     // 2.0 is distance betwen +1 and -1
     // don't scale by this, want caller to be able to pass 1.0 as default scale not 2.0
-    float scaleX = scale;  // / 2.0;
-    float scaleY = scale;  // / 2.0;
+    float scaleX = scale; // / 2.0;
+    float scaleY = scale; // / 2.0;
 
     if (!isFloat) {
         scaleX /= 255.0f;
@@ -1366,7 +1370,7 @@ void ImageInfo::heightToNormals(int32_t w, int32_t h,
             else {
                 // cross pattern
                 // height channel is in x
-                uint8_t cN = srcPixels8[ym + x].r;  // assumes first elem (.r) is height channel
+                uint8_t cN = srcPixels8[ym + x].r; // assumes first elem (.r) is height channel
                 uint8_t cS = srcPixels8[yp + x].r;
                 uint8_t cE = srcPixels8[y0 + xp].r;
                 uint8_t cW = srcPixels8[y0 + xm].r;
@@ -1418,8 +1422,8 @@ const char* encoderName(TexEncoder encoder)
         case kTexEncoderUnknown:
             return "Unknown";
         default:
-            return "Unknown";  // to fix Visual Studio C4715
+            return "Unknown"; // to fix Visual Studio C4715
     }
 }
 
-}  // namespace kram
+} // namespace kram
diff --git a/libkram/kram/KramImageInfo.h b/libkram/kram/KramImageInfo.h
index 9de28f66..7fef9cbe 100644
--- a/libkram/kram/KramImageInfo.h
+++ b/libkram/kram/KramImageInfo.h
@@ -9,7 +9,7 @@
 
 //#include "KramConfig.h"
 #include "KTXImage.h"
-#include "KramMipper.h"  // for Color
+#include "KramMipper.h" // for Color
 
 namespace kram {
 class Image;
@@ -19,18 +19,18 @@ using namespace STL_NAMESPACE;
 
 // each encoder has it's own set of outputs, can request encoder if overlap
 enum TexEncoder {
-    kTexEncoderUnknown = 0,  // pick best encoder
+    kTexEncoderUnknown = 0, // pick best encoder
 
-    kTexEncoderExplicit,  // r,rg,rgba 8|16f|32f
+    kTexEncoderExplicit, // r,rg,rgba 8|16f|32f
 
-    kTexEncoderATE,  // bc1,3,4,5,7,  and astc4x4,8x8 (macOS/iOS only),
-                     // different lib versions and support based on OS version
+    kTexEncoderATE, // bc1,3,4,5,7,  and astc4x4,8x8 (macOS/iOS only),
+                    // different lib versions and support based on OS version
 
-    kTexEncoderSquish,  // bc1,2,3,4,5
+    kTexEncoderSquish, // bc1,2,3,4,5
 
-    kTexEncoderBcenc,  // bc1,3,4,5,7
+    kTexEncoderBcenc, // bc1,3,4,5,7
 
-    kTexEncoderEtcenc,  // etc-r,rg11, etc2, no HDR format
+    kTexEncoderEtcenc, // etc-r,rg11, etc2, no HDR format
 
     kTexEncoderAstcenc,
 };
@@ -41,28 +41,28 @@ class ImageInfoArgs {
     MyMTLTextureType textureType = MyMTLTextureType2D;
     TexEncoder textureEncoder = kTexEncoderUnknown;
     MyMTLPixelFormat pixelFormat = MyMTLPixelFormatInvalid;
-    string formatString = "";  // will convert to pixelFormat
+    string formatString = ""; // will convert to pixelFormat
 
     int32_t mipMinSize = 1;
     int32_t mipMaxSize = 32 * 1024;
     int32_t mipSkip = 0;
 
-    int32_t quality = 49;  // may want float
+    int32_t quality = 49; // may want float
 
     // ktx2 has a compression type and level
     KTX2Compressor compressor;
     bool isKTX2 = false;
 
-    bool doMipmaps = true;  // default to mips on
+    bool doMipmaps = true; // default to mips on
     bool doMipflood = false;
     bool isVerbose = false;
     bool doSDF = false;
-    
+
     bool isSourcePremultiplied = false; // skip further premul of src
     bool isPremultiplied = false;
     bool isPrezero = false;
-    
-    bool isNormal = false;  // signed, but may be stored unorm and swizzled (f.e. astc/bc3nm gggr or rrrg)
+
+    bool isNormal = false; // signed, but may be stored unorm and swizzled (f.e. astc/bc3nm gggr or rrrg)
 
     // can pick a smaller format if alpha = 1 (only for bc and etc)
     bool optimizeFormatForOpaque = false;
@@ -73,10 +73,10 @@ class ImageInfoArgs {
     bool isSRGBSrc = false;
     bool isSRGBSrcFlag = false;
     bool isSRGBDst = false;
-    
+
     // For dst. TODO: could have signed source passed in
     bool isSigned = false;
-    
+
     // Applies to src.  But also have hdr specific output formats.
     bool isHDR = false;
 
@@ -92,7 +92,7 @@ class ImageInfoArgs {
     int32_t chunksX = 0;
     int32_t chunksY = 0;
     int32_t chunksCount = 0;
-    
+
     int32_t sdfThreshold = 120;
 };
 
@@ -142,14 +142,14 @@ class ImageInfo {
     bool hasAlpha = false;
     bool isSRGBSrc = false;
     bool isSRGBSrcFlag = false;
-    
+
     // output image state
     bool isSRGBDst = false;
     bool isSigned = false;
     bool isNormal = false;
     bool isColorWeighted = false;
     bool isSourcePremultiplied = false;
-    bool isPremultiplied = false;  // don't premul
+    bool isPremultiplied = false; // don't premul
     bool isPrezero = false;
     bool isHDR = false;
 
@@ -157,7 +157,7 @@ class ImageInfo {
     bool doMipmaps = false;
     bool doMipflood = false;
     bool optimizeFormatForOpaque = false;
-    
+
     bool isVerbose = false;
 
     // compression format
@@ -183,12 +183,12 @@ class ImageInfo {
 
     int32_t mipMinSize = 1;
     int32_t mipMaxSize = 32 * 1024;
-    int32_t mipSkip = 0;  // count of large mips to skip
+    int32_t mipSkip = 0; // count of large mips to skip
 
     int32_t chunksX = 0;
     int32_t chunksY = 0;
     int32_t chunksCount = 0;
-    
+
     // This converts incoming image channel to bitmap
     int32_t sdfThreshold = 120;
 };
@@ -215,4 +215,4 @@ bool isEncoderAvailable(TexEncoder encoder);
 
 const char* encoderName(TexEncoder encoder);
 
-}  // namespace kram
+} // namespace kram
diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp
index c0e7015e..0038355d 100644
--- a/libkram/kram/KramLog.cpp
+++ b/libkram/kram/KramLog.cpp
@@ -21,8 +21,8 @@
 #include <mutex>
 
 #if KRAM_WIN
-#include <windows.h>
 #include <intrin.h> // for AddressOfReturnAdress, ReturnAddress
+#include <windows.h>
 
 #if KRAM_LOG_STACKTRACE
 // There is a DbgHelp.lib that is redistributable
@@ -34,16 +34,16 @@
 #include <log.h>
 
 #elif KRAM_IOS || KRAM_MAC
-#include <os/log.h>
 #include <cxxabi.h> // demangle
-#include <dlfcn.h>  // address to symbol
+#include <dlfcn.h> // address to symbol
 #include <execinfo.h>
+#include <os/log.h>
 #endif
 
 #include "KramFmt.h"
 #include "KramTimer.h"
-#include "format.h" // really fmt/format.h
 #include "TaskSystem.h"
+#include "format.h" // really fmt/format.h
 
 namespace kram {
 
@@ -52,62 +52,59 @@ using mylock = std::unique_lock<mymutex>;
 
 using namespace STL_NAMESPACE;
 
-
 #if KRAM_WIN
 // https://stackoverflow.com/questions/18547251/when-i-use-strlcpy-function-in-c-the-compilor-give-me-an-error
 
 // '_cups_strlcat()' - Safely concatenate two strings.
-size_t                    // O - Length of string
-strlcat(char       *dst,  // O - Destination string
-        const char *src,  // I - Source string
-        size_t     size)  // I - Size of destination string buffer
+size_t // O - Length of string
+strlcat(char* dst, // O - Destination string
+        const char* src, // I - Source string
+        size_t size) // I - Size of destination string buffer
 {
-  size_t    srclen;         // Length of source string
-  size_t    dstlen;         // Length of destination string
-
+    size_t srclen; // Length of source string
+    size_t dstlen; // Length of destination string
 
-   // Figure out how much room is left...
-  dstlen = strlen(dst);
-  size   -= dstlen + 1;
+    // Figure out how much room is left...
+    dstlen = strlen(dst);
+    size -= dstlen + 1;
 
-  if (!size)
-    return (dstlen);        // No room, return immediately...
+    if (!size)
+        return (dstlen); // No room, return immediately...
 
-  // Figure out how much room is needed...
-  srclen = strlen(src);
+    // Figure out how much room is needed...
+    srclen = strlen(src);
 
-  // Copy the appropriate amount...
-  if (srclen > size)
-    srclen = size;
+    // Copy the appropriate amount...
+    if (srclen > size)
+        srclen = size;
 
-  memcpy(dst + dstlen, src, srclen);
-  dst[dstlen + srclen] = '\0';
+    memcpy(dst + dstlen, src, srclen);
+    dst[dstlen + srclen] = '\0';
 
-  return (dstlen + srclen);
+    return (dstlen + srclen);
 }
 
 // '_cups_strlcpy()' - Safely copy two strings.
-size_t                          // O - Length of string
-strlcpy(char       *dst,        // O - Destination string
-        const char *src,        // I - Source string
-        size_t      size)       // I - Size of destination string buffer
+size_t // O - Length of string
+strlcpy(char* dst, // O - Destination string
+        const char* src, // I - Source string
+        size_t size) // I - Size of destination string buffer
 {
-  size_t    srclen; // Length of source string
+    size_t srclen; // Length of source string
 
+    // Figure out how much room is needed...
+    size--;
 
-  // Figure out how much room is needed...
-  size --;
+    srclen = strlen(src);
 
-  srclen = strlen(src);
+    // Copy the appropriate amount...
+    if (srclen > size)
+        srclen = size;
 
-  // Copy the appropriate amount...
-  if (srclen > size)
-    srclen = size;
+    memcpy(dst, src, srclen);
+    dst[srclen] = '\0';
 
-  memcpy(dst, src, srclen);
-  dst[srclen] = '\0';
-
-  return (srclen);
+    return (srclen);
 }
 #endif
 
@@ -115,41 +112,39 @@ strlcpy(char       *dst,        // O - Destination string
 
 #if KRAM_WIN
 // https://learn.microsoft.com/en-us/windows/win32/debug/retrieving-symbol-information-by-address?redirectedfrom=MSDN
-class AddressHelper
-{
+class AddressHelper {
 private:
-    
     HANDLE m_process = 0;
-    
+
 public:
     AddressHelper()
     {
         m_process = GetCurrentProcess();
-        
+
         // produces line number and demangles name
         SymSetOptions(SYMOPT_LOAD_LINES | SYMOPT_UNDNAME);
-        
+
         // load the symbols
         SymInitialize(m_process, NULL, TRUE);
     }
-    
+
     ~AddressHelper()
     {
         SymCleanup(m_process);
     }
-    
+
     bool isStackTraceSupported() const { return true; }
-    
+
     bool getAddressInfo(const void* address, string& symbolName, string& filename, uint32_t& line)
     {
         string.clear();
         filename.clear();
         line = 0;
-        
+
         IMAGEHLP_LINE64 loc = {};
         loc.SizeOfStruct = sizeof(IMAGEHLP_LINE64);
-        DWORD  displacement = 0;
-        
+        DWORD displacement = 0;
+
         // This grabs the symbol name
         char buffer[sizeof(SYMBOL_INFO) + MAX_SYM_NAME * sizeof(TCHAR)] = {};
         SYMBOL_INFO& symbol = *(SYMBOL_INFO*)buffer;
@@ -157,38 +152,38 @@ class AddressHelper
         symbol.MaxNameLen = MAX_SYM_NAME;
         SymFromAddr(process, (ULONG64)address, &displacement, &symbol);
         symbolName = symbol.Name;
-        
+
         // all demangle ops are single-threaded, so run this under log mutex
         if (!SymGetLineFromAddr64(m_process, (DWORD64)address, &displacement, &loc))
             return false;
-        
+
         filename = loc.Filename;
         line = loc.LineNumber;
         return true;
     }
-    
+
     void getStackInfo(string& stackInfo, uint32_t skipStackLevels)
     {
         string symbolName, filename;
         uint32_t line = 0;
-        
+
         const uint32_t kMaxStackTrace = 128;
         void* stacktrace[kMaxStackTrace] = {};
-        
+
         // Can use this hash to uniquely identify stacks that are the same
         ULONG stackTraceHash = 0;
-        
+
         // This provides the symbols
         uint32_t frameCount = CaptureStackBackTrace(skipStackLevels, kMaxStackTrace, stacktrace, &stackTraceHash);
-        
-        for(uint32_t i = 0; i < frameCount; ++i) {
+
+        for (uint32_t i = 0; i < frameCount; ++i) {
             if (getAddressInfo(stacktrace[i], symbolName, filename, line))
                 append_sprintf(stackInfo, "%s:%u: %s", filename.c_str(), line, symbolName.c_str());
-            
+
             // Note: can get Module with a different call if above fails
         }
     }
-    
+
     // See here on using StackWalk64 to walk up the SEH context.
     // https://stackoverflow.com/questions/22467604/how-can-you-use-capturestackbacktrace-to-capture-the-exception-stack-not-the-ca
 };
@@ -208,7 +203,6 @@ class AddressHelper
 // here's nm on osx
 // https://opensource.apple.com/source/cctools/cctools-622.5.1/misc/nm.c
 
-
 // The dladdr() function is available only in dynamically linked programs.
 // #include <dlfcn.h>
 // int dladdr(const void *addr, Dl_info *info);
@@ -217,52 +211,53 @@ class AddressHelper
 // Here's supposed to be code that deals with static libs too
 // https://stackoverflow.com/questions/19848567/how-to-get-the-build-uuid-in-runtime-and-the-image-base-address/19859516#19859516
 
-class AddressHelper
-{
+class AddressHelper {
 private:
-    void substr(string& str, const char* start, const char* end) {
+    void substr(string& str, const char* start, const char* end)
+    {
         str = str.substr(start - str.c_str(), end - start);
     }
-    
-    const char* strrchr(const char* start, const char* end, char c) {
+
+    const char* strrchr(const char* start, const char* end, char c)
+    {
         while (end > start) {
             end--;
             if (*end == c)
                 return end;
         }
-        
+
         return nullptr;
     }
-    
+
     void demangleSymbol(string& symbolName)
     {
         size_t size = 0;
         int status = 0;
-        
+
         // This one is getting chopped up incorrect
         // 10  AppKit                              0x0000000193079730 __24-[NSViewController view]_block_invoke + 28
-        
+
         // Some other examples
         // 14  AppKit                              0x0000000192c7b230 NSPerformVisuallyAtomicChange + 108
         // 24  kramv                               0x0000000104c6b4e0 main + 76
-        
+
         const char* text = symbolName.c_str();
         // chop off the "+ 132" offset
         const char* plusOffsetEnd = strstr(text, " +");
         const char* objCStart = strstr(text, " -");
         if (!objCStart)
             objCStart = strstr(text, " __24-"); // block invoke
-        
+
         const char* cppStart = strstr(text, " _ZN4");
         const char* spaceStart = plusOffsetEnd ? strrchr(text, plusOffsetEnd, ' ') : nullptr;
-        
+
         if (objCStart)
-            substr(symbolName, objCStart+1, plusOffsetEnd);
+            substr(symbolName, objCStart + 1, plusOffsetEnd);
         else if (cppStart)
-            substr(symbolName, cppStart+1, plusOffsetEnd);
+            substr(symbolName, cppStart + 1, plusOffsetEnd);
         else if (spaceStart)
-            substr(symbolName, spaceStart+1, plusOffsetEnd);
-        
+            substr(symbolName, spaceStart + 1, plusOffsetEnd);
+
         // Note: some objC does need demangle
         if (cppStart) {
             // This allocates memory using malloc
@@ -275,83 +270,82 @@ class AddressHelper
             }
         }
     }
-    
+
 public:
     bool isStackTraceSupported() const { return true; }
-   
+
     bool getAddressInfo(const void* address, string& symbolName, string& filename, uint32_t& line)
     {
-        void* callstack[1] = { (void*)address };
-        
+        void* callstack[1] = {(void*)address};
+
         // this allocates memory
         char** strs = backtrace_symbols(callstack, 1);
-        
+
         // may need -no_pie to turn off ASLR, also don't reuse stack-frame reg
         // Will have to parse symbolName, filename, line
         symbolName = strs[0];
-        
+
         free(strs);
-        
+
         // TODO: figure out file/line lookup, don't want to fire nm/addr2line process each time
         // Those are GPL poison into codebases.  But they no doubt do a ton
         // or work on each launch to then lookup 1+ symbols.
-        // Apple doesn't even have addr2line, and needs to use atos. 
+        // Apple doesn't even have addr2line, and needs to use atos.
         // But atos doesn't exist except on dev systems.
         // atos goes to private framework CoreSymbolication.  Ugh.
         // There is also boost::stack_trace which does gen a valid stack somehow.
-        
+
         // CoreSymbolicate might have calls
         // https://opensource.apple.com/source/xnu/xnu-3789.21.4/tools/tests/darwintests/backtracing.c.auto.html
-        
+
         // https://developer.apple.com/documentation/xcode/adding-identifiable-symbol-names-to-a-crash-report
         // https://developer.apple.com/documentation/xcode/analyzing-a-crash-report
-        
+
         // Note: this can provide the file/line, but requires calling out to external process
         // also nm and addr2line
         // posix_spawn("atos -o kramv.app.dSYM/Contents/Resources/DWARF/kramv -arch arm64 -l %p", address);
-        
+
         filename.clear();
         line = 0;
-        
+
         demangleSymbol(symbolName);
-        
+
         return true;
     }
-    
+
     void getStackInfo(string& stackInfo, uint32_t skipStackLevels)
     {
         void* callstack[128];
         uint32_t frames = backtrace(callstack, 128);
-        
+
         // Also this call, but can't use it to lookup a symbol, and it's ObjC.
         // but it just returns the same data as below (no file/line).
         // +[NSThread callStackSymbols]
-        
+
         // backtrace_symbols() attempts to transform a call stack obtained by
         // backtrace() into an array of human-readable strings using dladdr().
         char** strs = backtrace_symbols(callstack, frames);
         string symbolName;
         for (uint32_t i = skipStackLevels; i < frames; ++i) {
             symbolName = strs[i];
-            
+
             demangleSymbol(symbolName);
-            
-            append_sprintf(stackInfo, "[%2u] ", i-skipStackLevels);
+
+            append_sprintf(stackInfo, "[%2u] ", i - skipStackLevels);
             stackInfo += symbolName;
             stackInfo += "\n";
         }
-        
+
         free(strs);
     }
-    
+
     // nm is typically used to decode, but that's an executable
 };
 #endif
 
 #else
 
-class AddressHelper
-{
+class AddressHelper {
 public:
     bool isStackTraceSupported() const { return false; }
     bool getAddressInfo(const void* address, string& symbolName, string& filename, uint32_t& line) { return false; }
@@ -362,7 +356,6 @@ class AddressHelper
 
 static AddressHelper gAddressHelper;
 
-
 // TODO: install assert handler to intercept, and also add a verify (assert that leaves source in)
 //void __assert(const char *expression, const char *file, int32_t line) {
 //
@@ -435,7 +428,7 @@ int32_t append_sprintf(string& str, const char* format, ...)
 string format(const char* format, ...)
 {
     string str;
-    
+
     va_list args;
     va_start(args, format);
     /*int32_t len = */ vsprintf(str, format, args);
@@ -457,14 +450,14 @@ static size_t my_formatted_size(fmt::string_view format, fmt::format_args args)
 int32_t append_sprintf_impl(string& str, fmt::string_view format, fmt::format_args args)
 {
     size_t size = my_formatted_size(format, args);
-    
+
     // TODO: write directly to end of str
     string text = vformat(format, args);
-    
+
     // this does all formatting work
     str.resize(str.size() + size);
     str.insert(str.back(), text);
-    
+
     return size; // how many chars appended, no real failure case yet
 }
 
@@ -493,12 +486,12 @@ bool endsWith(const string& value, const string& ending)
     if (value.size() < ending.size())
         return false;
     uint32_t start = value.size() - ending.size();
-        
+
     for (uint32_t i = 0; i < ending.size(); ++i) {
         if (value[start + i] != ending[i])
             return false;
     }
-    
+
     return true;
 }
 
@@ -522,32 +515,31 @@ inline void OutputDebugStringU(LPCSTR lpOutputString, uint32_t len8)
 {
     // empty string
     if (len8 == 0) return;
-    
+
     // Run the conversion twice, first to get length, then to do the conversion
     int len16 = MultiByteToWideChar(CP_UTF8, 0, lpOutputString, (int)len8, nullptr, 0);
-    
+
     // watch out for large len16
-    if (len16 == 0 || len16 > 128*1024) return;
-    
+    if (len16 == 0 || len16 > 128 * 1024) return;
+
     wchar_t* strWide = (wchar_t*)_malloca(len16 * sizeof(wchar_t));
-    
+
     // ran out of stack
     if (!strWide) return;
-    
+
     MultiByteToWideChar(CP_UTF8, 0, lpOutputString, (int)len8, strWide, len16);
-    
+
     ULONG_PTR args[4] = {
         (ULONG_PTR)len16 + 1, (ULONG_PTR)strWide,
-        (ULONG_PTR)len8 + 1, (ULONG_PTR)lpOutputString
-    };
-    
+        (ULONG_PTR)len8 + 1, (ULONG_PTR)lpOutputString};
+
     // TODO: note that there is a limit to the length of this string
     // so may want to split up the string in a loop.
-    
+
     RaiseException(0x4001000A, 0, 4, args); // DBG_PRINTEXCEPTION_WIDE_C
-    
+
     _freea(strWide);
-    
+
     // Can't use OutputDebugStringW.
     // OutputDebugStringW converts the specified string based on the current system
     // locale information and passes it to OutputDebugStringA to be displayed. As a
@@ -557,14 +549,13 @@ inline void OutputDebugStringU(LPCSTR lpOutputString, uint32_t len8)
 
 #endif
 
-struct LogState
-{
+struct LogState {
     mymutex lock;
     string errorLogCaptureText;
     string buffer;
     bool isErrorLogCapture = false;
     uint32_t counter = 0;
-    
+
 #if KRAM_WIN
     bool isWindowsGuiApp = false; // default isConsole
     bool isWindowsDebugger = false;
@@ -595,29 +586,27 @@ void getErrorLogCaptureText(string& text)
     }
 }
 
-struct LogMessage
-{
+struct LogMessage {
     const char* group;
     int32_t logLevel;
-    
+
     // from macro
     const char* file;
     int32_t line;
     const char* func;
-    
+
     // embelished
     const char* threadName;
     double timestamp;
-    
+
     void* dso;
     void* returnAddress;
-    
+
     const char* msg;
     bool msgHasNewline;
 };
 
-enum DebuggerType
-{
+enum DebuggerType {
     DebuggerOutputDebugString,
     DebuggerOSLog,
     DebuggerLogcat,
@@ -626,42 +615,48 @@ enum DebuggerType
 
 constexpr const uint32_t kMaxTokens = 32;
 
-static const char* getFormatTokens(char tokens[kMaxTokens], const LogMessage& msg, DebuggerType type) 
+static const char* getFormatTokens(char tokens[kMaxTokens], const LogMessage& msg, DebuggerType type)
 {
 #if KRAM_WIN
     if (msg.logLevel <= LogLevelInfo) {
         strlcpy(tokens, "m\n", kMaxTokens);
     }
     else if (msg.file) {
-        strlcpy(tokens, "[l] g m\n" "F: L: t u\n", kMaxTokens);
+        strlcpy(tokens,
+                "[l] g m\n"
+                "F: L: t u\n",
+                kMaxTokens);
     }
     else {
         strlcpy(tokens, "[l] g m\n", kMaxTokens);
     }
 #elif KRAM_ANDROID
     // Android logcat has level, tag, file/line passed in the mesasge
-   strlcpy(tokens, "m\n", kMaxTokens);
+    strlcpy(tokens, "m\n", kMaxTokens);
 #else
     // copy of formatters above
     if (msg.logLevel <= LogLevelInfo) {
         strlcpy(tokens, "m\n", kMaxTokens);
     }
     else if (msg.file) {
-        strlcpy(tokens, "[l] g m\n" "F: L: t u\n", kMaxTokens);
+        strlcpy(tokens,
+                "[l] g m\n"
+                "F: L: t u\n",
+                kMaxTokens);
     }
     else {
         strlcpy(tokens, "[l] g m\n", kMaxTokens);
     }
-    
+
     bool printStacksForErrors = false;
     if (printStacksForErrors && gAddressHelper.isStackTraceSupported() && msg.logLevel >= LogLevelError) {
-        
         // can just report the caller, and not a full stack
         // already have function, so returnAddress printing is the same.
         /* if (msg.returnAddress) {
             strlcat(tokens, "s\n", kMaxTokens);
         }
-        else */ {
+        else */
+        {
             strlcat(tokens, "S", kMaxTokens);
         }
     }
@@ -669,14 +664,13 @@ static const char* getFormatTokens(char tokens[kMaxTokens], const LogMessage& ms
     return tokens;
 }
 
-
 static void formatMessage(string& buffer, const LogMessage& msg, const char* tokens)
 {
     buffer.clear();
-   
+
     char c = 0;
     while ((c = *tokens++) != 0) {
-        switch(c) {
+        switch (c) {
             case ' ':
             case ':':
             case '[':
@@ -684,12 +678,12 @@ static void formatMessage(string& buffer, const LogMessage& msg, const char* tok
             case '\n':
                 buffer += c;
                 break;
-                
+
             case 'l':
             case 'L': { // level
                 bool isVerbose = c == 'L';
                 const char* level = "";
-                switch(msg.logLevel) {
+                switch (msg.logLevel) {
                     case LogLevelDebug:
                         level = isVerbose ? "debug" : "D";
                         break;
@@ -710,7 +704,7 @@ static void formatMessage(string& buffer, const LogMessage& msg, const char* tok
                 buffer += msg.group;
                 break;
             }
-                
+
             case 's': { // return address (1 line stack)
                 if (msg.returnAddress) {
                     string symbolName, filename;
@@ -732,19 +726,19 @@ static void formatMessage(string& buffer, const LogMessage& msg, const char* tok
                 if (msg.func) {
                     buffer += msg.func;
                     int32_t len = (int32_t)strlen(msg.func);
-                    if (len > 1 && msg.func[len-1] != ']')
+                    if (len > 1 && msg.func[len - 1] != ']')
                         buffer += "()";
                 }
                 break;
             }
-                
+
             case 'd': { // date/timestamp
                 if (msg.timestamp != 0.0) {
                     append_sprintf(buffer, "%f", msg.timestamp);
                 }
                 break;
             }
-                
+
             case 't': { // thread
                 if (msg.threadName) {
                     buffer += msg.threadName;
@@ -760,7 +754,7 @@ static void formatMessage(string& buffer, const LogMessage& msg, const char* tok
                 }
                 break;
             }
-                
+
             case 'f': // file:line
             case 'F': {
                 if (msg.file) {
@@ -770,9 +764,9 @@ static void formatMessage(string& buffer, const LogMessage& msg, const char* tok
                     const char fileSeparator = '/';
 #endif
                     bool isVerbose = c == 'L';
-                    
+
                     const char* filename = msg.file;
-                    
+
                     // shorten filename
                     if (!isVerbose) {
                         const char* shortFilename = strrchr(filename, fileSeparator);
@@ -781,7 +775,7 @@ static void formatMessage(string& buffer, const LogMessage& msg, const char* tok
                             filename = shortFilename;
                         }
                     }
-                    
+
 #if KRAM_WIN
                     // format needed for Visual Studio to collect/clickthrough
                     append_sprintf(buffer, "%s(%d)", filename, msg.line);
@@ -796,8 +790,8 @@ static void formatMessage(string& buffer, const LogMessage& msg, const char* tok
     }
 }
 
-
-bool isMessageFiltered(const LogMessage& msg) {
+bool isMessageFiltered(const LogMessage& msg)
+{
 #if KRAM_RELEASE
     if (msg.logLevel == LogLevelDebug)
         return true;
@@ -807,12 +801,12 @@ bool isMessageFiltered(const LogMessage& msg) {
 void setMessageFields(LogMessage& msg, char threadName[kMaxThreadName])
 {
     const char* text = msg.msg;
-    
+
     msg.msgHasNewline = false;
     int32_t len = (int32_t)strlen(text);
     if (len >= 1 && text[len - 1] == '\n')
         msg.msgHasNewline = true;
-    
+
     // Note: this could analyze the format tokens for all reporters.
     // Also may want a log file with own formatting/fields.
 #if KRAM_ANDROID
@@ -823,12 +817,12 @@ void setMessageFields(LogMessage& msg, char threadName[kMaxThreadName])
     if (msg.logLevel <= LogLevelInfo)
         return;
 #endif
-    
+
     // fill out thread name
     getCurrentThreadName(threadName);
     if (threadName[0] != 0)
         msg.threadName = threadName;
-    
+
     // retrieve timestamp
     msg.timestamp = currentTimestamp();
 }
@@ -836,49 +830,48 @@ void setMessageFields(LogMessage& msg, char threadName[kMaxThreadName])
 static int32_t logMessageImpl(const LogMessage& msg)
 {
     // TODO: add any filtering up here, or before msg is built
-    
+
     mylock lock(gLogState.lock);
-    
+
     // this means caller needs to know all errors to display in the hud
     if (gLogState.isErrorLogCapture && msg.logLevel == LogLevelError) {
         gLogState.errorLogCaptureText += msg.msg;
         if (!msg.msgHasNewline)
             gLogState.errorLogCaptureText += "\n";
     }
-    
+
     // format into a buffer (it's under lock, so can use static)
     string& buffer = gLogState.buffer;
-    
+
     gLogState.counter++;
-    
+
     int32_t status = (msg.logLevel == LogLevelError) ? 1 : 0;
-    
+
 #if KRAM_WIN
-    
+
     // This is only needed for Window Gui.
     // Assumes gui app didn't call AllocConsole.
     if (gLogState.counter == 1) {
         bool hasConsole = ::GetStdHandle(STD_OUTPUT_HANDLE) != nullptr;
-        
+
         // only way to debug a gui app without console is to attach debugger
         gLogState.isWindowsGuiApp = !hasConsole;
     }
     // TODO: test IsDebuggerPresent once per frame, not on every log
     gLogState.isWindowsDebugger = ::IsDebuggerPresent();
-    
+
     if (gLogState.isWindowsGuiApp && !gLogState.isWindowsDebugger)
         return status;
-    
-    
+
     if (gLogState.isWindowsGuiApp) {
         char tokens[kMaxTokens] = {};
         getFormatTokens(tokens, msg, DebuggerOutputDebugString);
         formatMessage(buffer, msg, tokens);
-        
+
         // TODO: split string up into multiple logs
         // this is limited to 32K
         // OutputDebugString(buffer.c_str());
-        
+
         // This supports UTF8 strings by converting them to wide.
         // TODO: Wine doesn't handle.
         OutputDebugStringU(buffer.c_str(), buffer.size());
@@ -887,7 +880,7 @@ static int32_t logMessageImpl(const LogMessage& msg)
         char tokens[kMaxTokens] = {};
         getFormatTokens(tokens, msg, Debugger);
         formatMessage(buffer, msg, tokens);
-        
+
         // avoid double print to debugger
         FILE* fp = stdout;
         fwrite(buffer.c_str(), 1, buffer.size(), fp);
@@ -905,7 +898,7 @@ static int32_t logMessageImpl(const LogMessage& msg)
         case LogLevelInfo:
             osLogLevel = ANDROID_LOG_INFO;
             break;
-            
+
         case LogLevelWarning:
             osLogLevel = ANDROID_LOG_WARNING;
             break;
@@ -913,39 +906,37 @@ static int32_t logMessageImpl(const LogMessage& msg)
             osLogLevel = ANDROID_LOG_ERROR;
             break;
     }
-    
+
     if (!__android_log_is_loggable(osLogLevel, msg.group, __android_log_get_minimum_priority())) // will be default level if not set
         return status;
-    
+
     char tokens[kMaxTokens] = {};
     getFormatTokens(tokens, msg, DebuggerLogcat);
     formatMessage(buffer, msg, tokens);
-    
+
     // TODO: split string up into multiple logs by /n
     // this can only write 4K - 80 chars at time, don't use print it's 1023
     // API 30
     __android_log_message msg = {
-        LOG_ID_MAIN, msg.file, msg.line, buffer.c_str(), osLogLevel, sizeof(__android_log_message), msg.group
-    };
+        LOG_ID_MAIN, msg.file, msg.line, buffer.c_str(), osLogLevel, sizeof(__android_log_message), msg.group};
     __android_log_write_log_message(msg);
 #else
-    
+
 #if KRAM_IOS || KRAM_MAC
     // test os_log
-    
+
     static bool useOSLog = true;
-    if (useOSLog)
-    {
+    if (useOSLog) {
         char tokens[kMaxTokens] = {};
         getFormatTokens(tokens, msg, DebuggerOSLog);
         formatMessage(buffer, msg, tokens);
-        
+
         // os_log reports this as the callsite, and doesn't jump to another file
         // or if the dso is even passed from this file, the file/line aren't correct.
         // So os_log_impl is grabbing return address whithin the function that can't be set.
         // So have to inject the NSLog, os_log, syslog calls directly into code, but that
         // not feasible.   This will at least color the mesages.
-        
+
         auto osLogLevel = OS_LOG_TYPE_INFO;
         switch (msg.logLevel) {
             case LogLevelDebug:
@@ -954,7 +945,7 @@ static int32_t logMessageImpl(const LogMessage& msg)
             case LogLevelInfo:
                 osLogLevel = OS_LOG_TYPE_INFO;
                 break;
-                
+
             case LogLevelWarning:
                 osLogLevel = OS_LOG_TYPE_ERROR; // no warning level
                 break;
@@ -962,10 +953,10 @@ static int32_t logMessageImpl(const LogMessage& msg)
                 osLogLevel = OS_LOG_TYPE_FAULT;
                 break;
         }
-        
+
         // TODO: have kramc and kramv using this logger, can we get at subsystem?
         const char* subsystem = "com.hialec.kram";
-        
+
         os_log_with_type(os_log_create(subsystem, msg.group), osLogLevel, "%{public}s", buffer.c_str());
     }
     else
@@ -974,7 +965,7 @@ static int32_t logMessageImpl(const LogMessage& msg)
         char tokens[kMaxTokens] = {};
         getFormatTokens(tokens, msg, Debugger);
         formatMessage(buffer, msg, tokens);
-        
+
         FILE* fp = stdout;
         fwrite(buffer.c_str(), 1, buffer.size(), fp);
         // if heavy logging, then could delay fflush
@@ -982,18 +973,16 @@ static int32_t logMessageImpl(const LogMessage& msg)
     }
 #endif
 
-    return status;  // reserved for later
+    return status; // reserved for later
 }
 
-
-                     
 int32_t logMessage(const char* group, int32_t logLevel,
-                          const char* file, int32_t line, const char* func,
-                          const char* fmt, ...)
+                   const char* file, int32_t line, const char* func,
+                   const char* fmt, ...)
 {
     void* dso = nullptr;
     void* logAddress = nullptr;
-    
+
 #if KRAM_IOS || KRAM_MAC
     dso = &__dso_handle; // may need to come from call site for the mach_header of .o
     logAddress = __builtin_return_address(0); // or __builtin_frame_address(0))
@@ -1003,21 +992,21 @@ int32_t logMessage(const char* group, int32_t logLevel,
     // from DbgHelp.dll
     logAddress = _ReturnAddress(); // or _AddressOfReturnAddress()
 #endif
-    
+
     LogMessage logMessage = {
         group, logLevel,
-        file, line, func, 
+        file, line, func,
         nullptr, 0.0, // threadname, timestamp
-        
+
         // must set -no_pie to use __builtin_return_address to turn off ASLR
         dso, logAddress,
         nullptr, false, // msg, msgHasNewline
     };
-    
+
     if (isMessageFiltered(logMessage)) {
         return 0;
     }
-    
+
     // convert var ags to a msg
     const char* msg = nullptr;
 
@@ -1038,18 +1027,17 @@ int32_t logMessage(const char* group, int32_t logLevel,
         int res = vsprintf(str, fmt, args);
         va_end(args);
         if (res < 0) return 0;
-        
+
         msg = str.c_str();
     }
-    
+
     logMessage.msg = msg;
-    
+
     char threadName[kMaxThreadName] = {};
     setMessageFields(logMessage, threadName);
     return logMessageImpl(logMessage);
 }
 
-
 // This is the api reference for fmt.
 // Might be able to use std::format in C++20 instead, but nice
 // to have full source to impl to fix things in fmt.
@@ -1057,8 +1045,8 @@ int32_t logMessage(const char* group, int32_t logLevel,
 
 // TODO: can this use STL_NAMESPACE::string_view instead ?
 int32_t logMessage(const char* group, int32_t logLevel,
-                          const char* file, int32_t line, const char* func,
-                          fmt::string_view format, fmt::format_args args)
+                   const char* file, int32_t line, const char* func,
+                   fmt::string_view format, fmt::format_args args)
 {
     // TODO: size_t size = std::formatted_size(format, args);
     // and then reserve that space in str.  Use that for impl of append_format.
@@ -1070,11 +1058,11 @@ int32_t logMessage(const char* group, int32_t logLevel,
     void* dso = nullptr;
     void* logAddress = nullptr;
 #endif
-    
+
     LogMessage logMessage = {
         group, logLevel,
         file, line, func, nullptr, 0.0, // threadName, timestamp
-        
+
         // must set -no_pie to use __builtin_return_address to turn off ASLR
         dso, logAddress,
         nullptr, false, // msg, msgHasNewline
@@ -1082,15 +1070,15 @@ int32_t logMessage(const char* group, int32_t logLevel,
     if (isMessageFiltered(logMessage)) {
         return 0;
     }
-    
+
     string str = fmt::vformat(format, args);
     const char* msg = str.c_str();
-    
+
     logMessage.msg = msg;
-    
+
     char threadName[kMaxThreadName] = {};
     setMessageFields(logMessage, threadName);
     return logMessageImpl(logMessage);
 }
 
-}  // namespace kram
+} // namespace kram
diff --git a/libkram/kram/KramLog.h b/libkram/kram/KramLog.h
index c622dac7..b2a555c4 100644
--- a/libkram/kram/KramLog.h
+++ b/libkram/kram/KramLog.h
@@ -80,7 +80,6 @@ int32_t append_vsprintf(string& str, const char* format, va_list args);
 // ugh - name conflict if format or format_string or format_str
 string format(const char* format, ...) __printflike(1, 2);
 
-
 bool startsWith(const char* str, const string& substring);
 
 bool endsWithExtension(const char* str, const string& substring);
@@ -118,5 +117,4 @@ inline uint32_t StringToUInt32(const char* num)
     return (int32_t)StringToUInt64(num);
 }
 
-
-}  // namespace kram
+} // namespace kram
diff --git a/libkram/kram/KramMipper.cpp b/libkram/kram/KramMipper.cpp
index 95a68c2d..3baec9b8 100644
--- a/libkram/kram/KramMipper.cpp
+++ b/libkram/kram/KramMipper.cpp
@@ -7,7 +7,7 @@
 //#include <algorithm>
 #include <cassert>
 
-#include "KTXImage.h"  // for mipDown
+#include "KTXImage.h" // for mipDown
 
 namespace kram {
 
@@ -36,7 +36,7 @@ int32_t nextPow2(int32_t num)
 
 inline uint8_t floatToUint8(float value)
 {
-    return (uint8_t)roundf(value * 255.0f);  // or use 255.1f ?
+    return (uint8_t)roundf(value * 255.0f); // or use 255.1f ?
 }
 
 // same as ColorFromUnormFloat4
@@ -161,8 +161,8 @@ void Mipper::initTables()
         lin = srgbToLinearFunc(linearToSRGBFunc(lin));
 
         float s = 0.5;
-        s = srgbToLinearFunc(s);  // 0.21404
-        s = linearToSRGBFunc(s);  // back to 0.5
+        s = srgbToLinearFunc(s); // 0.21404
+        s = linearToSRGBFunc(s); // back to 0.5
     }
 #endif
 }
@@ -171,7 +171,7 @@ void Mipper::initPixelsHalfIfNeeded(ImageData& srcImage, bool doPremultiply, boo
                                     vector<half4>& halfImage) const
 {
     Color zeroColor = {0, 0, 0, 0};
-    float4 zeroColorf = float4m(0.0f);  // need a constant for this
+    float4 zeroColorf = float4m(0.0f); // need a constant for this
     half4 zeroColorh = half4m(zeroColorf);
 
     int32_t w = srcImage.width;
@@ -352,43 +352,43 @@ void mipfloodBigMip(const ImageData& smallMip, ImageData& bigMip)
     // horizontal or vertically, so lower mip mapping not so easy
     // if we assume pow2, then simpler.  Could still have non-square
     // pow2, which don't want to read off end of buffer.
-    
+
     uint32_t w = bigMip.width;
     uint32_t h = bigMip.height;
-    
+
     uint32_t wDst = smallMip.width;
     uint32_t hDst = smallMip.height;
-    
+
     const uint8_t kAlphaThreshold = 0;
-    
+
     // now run through the pixels with 0 alpha, and flood them with pixel from below
     for (uint32_t y = 0; y < h; ++y) {
-       Color* srcRow = &bigMip.pixels[y * w];
-        uint32_t yDst = y/2;
+        Color* srcRow = &bigMip.pixels[y * w];
+        uint32_t yDst = y / 2;
         if (yDst >= hDst)
             yDst = hDst - 1;
-        
+
         const Color* dstRow = &smallMip.pixels[yDst * wDst];
-        
+
         for (uint32_t x = 0; x < w; ++x) {
             // skip any pixels above threshold
             Color& srcPixel = srcRow[x];
             if (srcPixel.a > kAlphaThreshold) continue;
-            
+
             // replace the rest
-            uint32_t xDst = x/2;
+            uint32_t xDst = x / 2;
             if (xDst == wDst)
                 xDst = wDst - 1;
-            
+
             Color dstPixel = dstRow[xDst];
             dstPixel.a = srcPixel.a;
-            
+
             // an invalid premul color with rgb > a, may want valid non-premul
             srcPixel = dstPixel;
         }
     }
 }
-         
+
 // Propogate up from bottom so that every 0 pixel gets a non-zero value.
 void Mipper::mipflood(vector<ImageData>& mips) const
 {
@@ -398,17 +398,15 @@ void Mipper::mipflood(vector<ImageData>& mips) const
     // Unclear why they didn't use premul instead, but maybe compression
     // quality was better.  So this masks the filtering errors of black halos.
     // https://www.youtube.com/watch?v=MKX45_riWQA?t=2991
-    
+
     int32_t numMipLevels = mips.size();
-        
+
     // this overwrites the existing mips
-    for (int32_t i = numMipLevels-1; i >= 1; --i)
-    {
-        mipfloodBigMip(mips[i], mips[i-1]);
+    for (int32_t i = numMipLevels - 1; i >= 1; --i) {
+        mipfloodBigMip(mips[i], mips[i - 1]);
     }
 }
 
-
 void Mipper::mipmap(const ImageData& srcImage, ImageData& dstImage) const
 {
     dstImage.width = srcImage.width;
@@ -454,7 +452,7 @@ void Mipper::mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) cons
     // After linear combine, convert back to srgb
     // mip source is always linear to build all levels.
     bool isSRGBDst = dstImage.isSRGB;
-    
+
     for (int32_t y = isOddY ? 1 : 0; y < height; y += 2) {
         int32_t ym = y - 1;
         int32_t y0 = y;
@@ -467,7 +465,7 @@ void Mipper::mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) cons
         float y1w = mipY * invHeight;
 
         if (!isOddY) {
-            ym = y;  // weight is 0
+            ym = y; // weight is 0
 
             ymw = 0.0f;
             y0w = 0.5f;
@@ -496,7 +494,7 @@ void Mipper::mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) cons
             float x1w = mipX * invWidth;
 
             if (!isOddX) {
-                xm = x;  // weight is 0
+                xm = x; // weight is 0
 
                 xmw = 0.0f;
                 x0w = 0.5f;
@@ -649,11 +647,11 @@ void Mipper::mipmapLevel(const ImageData& srcImage, ImageData& dstImage) const
     const half4* srcHalf = srcImage.pixelsHalf;
 
     // Note the ptrs above may point to same memory
-    
+
     // After linear combine, convert back to srgb
     // mip source is always linear to build all levels.
     bool isSRGBDst = dstImage.isSRGB;
-    
+
     int32_t dstIndex = 0;
 
     for (int32_t y = 0; y < height; y += 2) {
@@ -746,4 +744,4 @@ void Mipper::mipmapLevel(const ImageData& srcImage, ImageData& dstImage) const
     }
 }
 
-}  // namespace kram
+} // namespace kram
diff --git a/libkram/kram/KramMipper.h b/libkram/kram/KramMipper.h
index f311ed11..599004c0 100644
--- a/libkram/kram/KramMipper.h
+++ b/libkram/kram/KramMipper.h
@@ -35,29 +35,28 @@ inline Color toPremul(Color c)
     return c;
 }
 
-
-inline float4 ColorToUnormFloat4(const Color &value)
+inline float4 ColorToUnormFloat4(const Color& value)
 {
     // simd lib can't ctor these even in C++, so will make abstracting harder
     float4 c = float4m((float)value.r, (float)value.g, (float)value.b, (float)value.a);
     return c / 255.0f;
 }
 
-inline float4 ColorToSnormFloat4(const Color &value)
+inline float4 ColorToSnormFloat4(const Color& value)
 {
     float4 c = float4m((float)value.r, (float)value.g, (float)value.b, (float)value.a);
     return (c - float4(128.0f)) / 255.0f;
 }
 
-inline Color ColorFromUnormFloat4(const float4 &value)
+inline Color ColorFromUnormFloat4(const float4& value)
 {
     float4 c = round(saturate(value) * 255.0f);
-    Color color = { (uint8_t)c.x, (uint8_t)c.y, (uint8_t)c.z, (uint8_t)c.w };
+    Color color = {(uint8_t)c.x, (uint8_t)c.y, (uint8_t)c.z, (uint8_t)c.w};
     return color;
 }
 
 // for signed bc4/5, remap the endpoints after unorm fit
-void remapToSignedBCEndpoint88(uint16_t &endpoint);
+void remapToSignedBCEndpoint88(uint16_t& endpoint);
 
 // for decoding bc4/5 snorm, convert block to unsigned endpoints before decode
 void remapFromSignedBCEndpoint88(uint16_t& endpoint);
@@ -71,16 +70,16 @@ class ImageData {
 public:
     // data can be mipped as 8u, 16f, or 32f.  Prefer smallest size.
     // half is used when srgb/premultiply is used.  32f is really only for r/rg/rgba32f mips.
-    Color *pixels = nullptr;
-    half4 *pixelsHalf = nullptr;    // optional
-    float4 *pixelsFloat = nullptr;  // optional
+    Color* pixels = nullptr;
+    half4* pixelsHalf = nullptr; // optional
+    float4* pixelsFloat = nullptr; // optional
 
     int32_t width = 0;
     int32_t height = 0;
     int32_t depth = 0;
 
     bool isSRGB = false;
-    bool isHDR = false;  // only updates pixelsFloat
+    bool isHDR = false; // only updates pixelsFloat
 };
 
 class Mipper {
@@ -102,7 +101,7 @@ class Mipper {
     // wherever the alpha is 0.  This is a form of cheap
     // dilation, but will result in invalid premul colors r > a.
     void mipflood(vector<ImageData>& srcImage) const;
-    
+
     // these use table lookups, so need to be class members
     float toLinear(uint8_t srgb) const { return srgbToLinear[srgb]; }
     float toAlphaFloat(uint8_t alpha) const { return alphaToFloat[alpha]; }
@@ -111,7 +110,6 @@ class Mipper {
 
     uint8_t toPremul(uint8_t channelIntensity, uint8_t alpha) const { return ((uint32_t)channelIntensity * (uint32_t)alpha) / 255; }
 
-    
 private:
     void initTables();
 
@@ -120,4 +118,4 @@ class Mipper {
     void mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) const;
 };
 
-}  // namespace kram
+} // namespace kram
diff --git a/libkram/kram/KramMmapHelper.cpp b/libkram/kram/KramMmapHelper.cpp
index 63c4b55b..27555db7 100644
--- a/libkram/kram/KramMmapHelper.cpp
+++ b/libkram/kram/KramMmapHelper.cpp
@@ -50,7 +50,7 @@ bool MmapHelper::open(const char *filename)
     length = sb.st_size;
 
     // Only offset needs padded to pagesize, but here offset is always 0
-    
+
     // Stop padding out to page size, or do but then don't add to length, or will walk too far in memory
     // all remaining page data will be zero, but still want length to reflect actual length of file
     // need Windows equilvent of getpagesize() call before putting this back.  This was to use
@@ -70,7 +70,7 @@ bool MmapHelper::open(const char *filename)
     // this needs to be MAP_SHARED or Metal can't reference with NoCopy
     addr =
         (const uint8_t *)mmap(nullptr, length, PROT_READ, MAP_SHARED, fd, 0);
-    fclose(fp);  // mmap keeps pages alive until munmap
+    fclose(fp); // mmap keeps pages alive until munmap
 
     if (addr == MAP_FAILED) {
         return false;
diff --git a/libkram/kram/KramSDFMipper.cpp b/libkram/kram/KramSDFMipper.cpp
index cd299913..030429d3 100644
--- a/libkram/kram/KramSDFMipper.cpp
+++ b/libkram/kram/KramSDFMipper.cpp
@@ -6,7 +6,7 @@
 
 //#include <algorithm>
 
-#include "KTXImage.h"  // for mipDown
+#include "KTXImage.h" // for mipDown
 #include "KramMipper.h"
 #include "hedistance.h"
 
@@ -22,15 +22,15 @@ void SDFMipper::init(ImageData& srcImage, uint8_t sdfThreshold, bool isVerbose_)
     maxD = 0.0;
     isVerbose = isVerbose_;
     threshold = sdfThreshold;
-    
+
     int32_t w = srcImage.width;
     int32_t h = srcImage.height;
 
     srcBitmap.resize(w * h);
 
     // store bitmap in 8-bit grayscale
-    const Color* pixels = srcImage.pixels;               // 4 bytes
-    uint8_t* dstImageData = (uint8_t*)srcBitmap.data();  // 1 byte
+    const Color* pixels = srcImage.pixels; // 4 bytes
+    uint8_t* dstImageData = (uint8_t*)srcBitmap.data(); // 1 byte
 
     for (int32_t y = 0; y < h; y++) {
         int32_t y0 = y * w;
@@ -65,7 +65,7 @@ void SDFMipper::mipmap(ImageData& dstImage, int32_t mipLevel)
     dstImage.width = w;
     dstImage.height = h;
 
-    Color* pixels = dstImage.pixels;  // 4 bytes
+    Color* pixels = dstImage.pixels; // 4 bytes
 
     // stuff back into the rgb channel of the dst texture to feed to encoder
     // have to do in reverse, since we're expanding 1 channel to 4
@@ -74,7 +74,7 @@ void SDFMipper::mipmap(ImageData& dstImage, int32_t mipLevel)
 
     heman_distance_create_sdf((const heman::my_image*)&srcBitmapImage, (heman::my_image*)&dst, maxD, isVerbose);
 
-    const uint8_t* srcImageData = (const uint8_t*)pixels;  // 1 byte
+    const uint8_t* srcImageData = (const uint8_t*)pixels; // 1 byte
 
     for (int32_t y = h - 1; y >= 0; y--) {
         int32_t y0 = y * w;
@@ -93,4 +93,4 @@ void SDFMipper::mipmap(ImageData& dstImage, int32_t mipLevel)
     }
 }
 
-}  // namespace kram
+} // namespace kram
diff --git a/libkram/kram/KramSDFMipper.h b/libkram/kram/KramSDFMipper.h
index d67ee5ab..1143a726 100644
--- a/libkram/kram/KramSDFMipper.h
+++ b/libkram/kram/KramSDFMipper.h
@@ -35,4 +35,4 @@ class SDFMipper {
     vector<uint8_t> srcBitmap;
 };
 
-}  // namespace kram
+} // namespace kram
diff --git a/libkram/kram/KramTimer.cpp b/libkram/kram/KramTimer.cpp
index b3603219..079aefd7 100644
--- a/libkram/kram/KramTimer.cpp
+++ b/libkram/kram/KramTimer.cpp
@@ -3,6 +3,7 @@
 // in all copies or substantial portions of the Software.
 
 #include "KramTimer.h"
+
 #include "TaskSystem.h"
 
 #if KRAM_WIN
@@ -25,7 +26,7 @@ static double queryPeriod()
 {
     LARGE_INTEGER frequency;
     QueryPerformanceFrequency(&frequency);
-    
+
     // convert from nanos to seconds
     return 1.0 / double(frequency.QuadPart);
 };
@@ -43,13 +44,13 @@ static double queryPeriod()
 {
     mach_timebase_info_data_t timebase;
     mach_timebase_info(&timebase);
-    
+
     // https://eclecticlight.co/2020/11/27/inside-m1-macs-time-and-logs/
     // On macOS Intel, nanosecondsPerTick are 1ns (1/1)
     // On macOS M1, nanosecondsPerTick are 41.67ns (num/denom = 125/3)
     double period = (double)timebase.numer / timebase.denom;
     period *= 1e-9; // convert to seconds
-    
+
     return period;
 }
 
@@ -57,7 +58,7 @@ static uint64_t queryCounter()
 {
     // increment when app sleeps
     // return mach_continuous_time();
-    
+
     // no increment when app sleeps
     return mach_absolute_time();
 }
@@ -86,17 +87,16 @@ Perf* Perf::_instance = new Perf();
 thread_local uint32_t gPerfStackDepth = 0;
 
 PerfScope::PerfScope(const char* name_)
-: name(name_), time(currentTimestamp())
+    : name(name_), time(currentTimestamp())
 {
     gPerfStackDepth++;
-    
+
 #if KRAM_ANDROID
     // TODO: also ATrace_isEnabled()
     ATrace_beginSection(name, value);
 #endif
 }
 
-
 void PerfScope::close()
 {
     if (time != 0.0) {
@@ -116,7 +116,7 @@ void addPerfCounter(const char* name, int64_t value)
     // only int64_t support
     ATrace_setCounter(name, value);
 #endif
-    
+
     Perf::instance()->addCounter(name, currentTimestamp(), value);
 }
 
@@ -143,17 +143,17 @@ static bool useTempFile = false;
 bool Perf::start(const char* name, bool isCompressed, uint32_t maxStackDepth)
 {
     mylock lock(_mutex);
-    
+
     if (isRunning()) {
         KLOGW("Perf", "start already called");
         return true;
     }
-    
+
     const char* ext = isCompressed ? ".perftrace.gz" : ".perftrace";
     sprintf(_filename, "%s%s%s", _perfDirectory.c_str(), name, ext);
-    
+
     _maxStackDepth = maxStackDepth;
-   
+
     // write json as binary, so win doesn't replace \n with \r\n
     if (useTempFile) {
         if (!_fileHelper.openTemporaryFile("perf-", ext, "w+b")) {
@@ -167,65 +167,65 @@ bool Perf::start(const char* name, bool isCompressed, uint32_t maxStackDepth)
             return false;
         }
     }
-    
+
     if (!_stream.open(&_fileHelper, !isCompressed)) {
         _fileHelper.close();
         return false;
     }
-    
+
     // Perf is considered running after this, since _startTime is non-zero
-    
+
     // TODO: store _startTime in json starting params
     _startTime = currentTimestamp();
-    
+
     _threadIdToTidMap.clear();
     _threadNames.clear();
-   
+
     string buf;
-    
+
     // displayTimeUnit must be ns (nanos) or ms (micros), default is ms
     // "displayTimeUnit": "ns"
     // want ms since it's less data if nanos truncated
     sprintf(buf, R"({"traceEvents":[%c)", nl);
     write(buf);
-    
+
     // can store file info here, only using one pid
     uint32_t processId = 0;
     const char* processName = "kram"; // TODO: add platform + config + app?
-    
+
     sprintf(buf, R"({"name":"process_name","ph":"M","pid":%u,"args":{"name":"%s"}},%c)",
-           processId, processName, nl);
+            processId, processName, nl);
     write(buf);
-    
+
     return true;
 }
 
-void Perf::stop() 
+void Perf::stop()
 {
     mylock lock(_mutex);
-    
+
     if (!isRunning()) {
         KLOGW("Perf", "stop called, but never started");
         return;
     }
-    
+
     // write end of array and object, and force flush
     bool forceFlush = true;
     string buf;
     sprintf(buf, R"(]}%c)", nl);
     write(buf, forceFlush);
-    
+
     _stream.close();
-    
+
     if (useTempFile) {
         bool success = _fileHelper.copyTemporaryFileTo(_filename.c_str());
         if (!success) {
             KLOGW("Perf", "Couldn't move temp file");
         }
     }
-    
+
     _fileHelper.close();
-    
+
     _startTime = 0.0;
 }
 
@@ -235,13 +235,13 @@ void Perf::openPerftrace()
     // also macOS sandbox prevents open call (could write and then open script).
 #if KRAM_MAC
     mylock lock(_mutex);
-    
+
     // DONE: now open the file in kram-profile by opening it
     // okay to use system, but it uses a global mutex on macOS
     // Unclear if macOS can send compressed perftrace.gz file without failing
     // but uncompressed perftrace file might be openable.
     // Also sandbox and hardened runtime may interfere.
-    
+
     string buf;
     sprintf(buf, "open %s", _filename.c_str());
     system(buf.c_str());
@@ -251,43 +251,43 @@ void Perf::openPerftrace()
 void Perf::write(const string& str, bool forceFlush)
 {
     mylock lock(_mutex);
-    
+
     _buffer += str;
-    
+
     if (forceFlush || _buffer.size() >= _stream.compressLimit()) {
         _stream.compress(Slice((uint8_t*)_buffer.data(), _buffer.size()), forceFlush);
         _buffer.clear();
     }
 }
 
-uint32_t Perf::addThreadIfNeeded() 
+uint32_t Perf::addThreadIfNeeded()
 {
     auto threadId = getCurrentThread();
-    
+
     // don't need this, it's already locked by caller
     //mylock lock(_mutex);
-    
+
     auto it = _threadIdToTidMap.find(threadId);
     if (it != _threadIdToTidMap.end()) {
         return it->second;
     }
-    
+
     // add the new name and tid
     char threadName[kMaxThreadName];
     getThreadName(threadId, threadName);
-    
+
     // don't really need to store name if not sorting, just need tid counter
     uint32_t tid = _threadNames.size();
     _threadNames.push_back(threadName);
-    
+
     _threadIdToTidMap.insert(make_pair(threadId, tid));
-    
+
     // this assumes the map is wiped each time
     string buf;
     sprintf(buf, R"({"name":"thread_name","ph":"M","tid":%u,"args":{"name":"%s"}},%c)",
-           tid, threadName, nl);
+            tid, threadName, nl);
     write(buf);
-    
+
     return tid;
 }
 
@@ -296,16 +296,16 @@ void Perf::addTimer(const char* name, double time, double elapsed)
     if (!isRunning()) {
         return;
     }
-    
+
     // About Perfetto ts sorting.  This is now fixed to sort duration.
     // https://github.com/google/perfetto/issues/878
-    
+
     if (_maxStackDepth && gPerfStackDepth >= _maxStackDepth)
         return;
-    
+
     // zero out the time, so times are smaller to store
     time -= _startTime;
-    
+
     // problem with duration is that existing events can overlap the start time
     bool isClamped = time < 0.0;
     if (isClamped) {
@@ -314,7 +314,7 @@ void Perf::addTimer(const char* name, double time, double elapsed)
     }
     if (elapsed <= 0.0)
         return;
-    
+
     // Catapult timings are suppoed to be in micros.
     // Convert seconds to micros (as integer), lose nanos.  Note that
     // Perfetto will convert all values to nanos anyways and lacks a ms format.
@@ -326,7 +326,7 @@ void Perf::addTimer(const char* name, double time, double elapsed)
 
     // TODO: worth aliasing the strings, just replacing one string with another
     // but less chars for id.
-    
+
     // now lock across isRunning, addThread, and write call
     mylock lock(_mutex);
     if (!isRunning()) {
@@ -347,40 +347,40 @@ void Perf::addTimer(const char* name, double time, double elapsed)
 //  R"({"name":"%s","ph":"B","tid":%d,"ts":%.0f},%c)",
 //  R"({"ph":"E","tid":%d,"ts":%.0f},%c)",
 
-void Perf::addCounter(const char* name, double time, int64_t amount) 
+void Perf::addCounter(const char* name, double time, int64_t amount)
 {
     if (!isRunning()) {
         return;
     }
-    
+
     // also reject nested counters off perf stack depth
     if (_maxStackDepth && gPerfStackDepth >= _maxStackDepth)
         return;
-    
+
     // zero out the time, so times are smaller to store
     time -= _startTime;
-    
+
     // problem with duration is that events can occur outside the start time
     if (time < 0.0) {
         return;
     }
-   
+
     // Catapult timings are suppoed to be in micros.
     // Convert seconds to micros (as integer), lose nanos.  Note that
     // Perfetto will convert all values to nanos anyways.
     // Raw means nanos, and Seconds is too small of a fraction.
     // Also printf does IEEE round to nearest even.
     // https://github.com/google/perfetto/issues/879
-    
+
     time *= 1e6;
     uint32_t timeDigits = 0; // or 3 for nanos
-    
+
     // TODO: worth aliasing the strings?, just replacing one string with another
     // but less chars for id.
-    
+
     // Note: can also have multiple named values passed in args
     // Note: unclear if Perfetto can handle negative values
-    
+
     // write out the event in micros, default is displayed in ms
     // lld not portable to Win
     string buf;
@@ -390,4 +390,3 @@ void Perf::addCounter(const char* name, double time, int64_t amount)
 }
 
 } // namespace kram
-
diff --git a/libkram/kram/KramTimer.h b/libkram/kram/KramTimer.h
index 0f46daff..ade0ecd0 100644
--- a/libkram/kram/KramTimer.h
+++ b/libkram/kram/KramTimer.h
@@ -30,13 +30,13 @@ class Timer {
         assert(_timeElapsed >= 0.0);
         _timeElapsed -= currentTimestamp();
     }
-    
+
     void stop()
     {
         assert(_timeElapsed < 0.0);
         _timeElapsed += currentTimestamp();
     }
-    
+
     double timeElapsed() const
     {
         double time = _timeElapsed;
@@ -45,14 +45,14 @@ class Timer {
         }
         return time;
     }
-    
+
     double timeElapsedMillis() const
     {
         return timeElapsed() * 1e3;
     }
-    
+
     bool isStopped() const { return _timeElapsed < 0.0; }
-    
+
 private:
     double _timeElapsed = 0.0;
 };
@@ -67,12 +67,12 @@ class TimerScope {
             _timer->start();
         }
     }
-    
+
     ~TimerScope()
     {
         close();
     }
-    
+
     void close()
     {
         if (_timer) {
@@ -80,45 +80,45 @@ class TimerScope {
             _timer = nullptr;
         }
     }
-    
+
 private:
     Timer* _timer = nullptr;
 };
-    
+
 // This implements PERF macros, sending timing data to kram-profile, perfetto, and/or Tracy.
 class Perf {
 public:
     Perf();
-    
+
     void setPerfDirectory(const char* directoryName);
 
     bool isRunning() const { return _startTime != 0.0; }
-    
+
     bool start(const char* filename, bool isCompressed = true, uint32_t maxStackDepth = 0);
     void stop();
-    
+
     void addTimer(const char* name, double time, double elapsed);
     void addCounter(const char* name, double time, int64_t value);
-    
+
     // This may fail on sandboxed app
     void openPerftrace();
-    
+
     // singleton getter, but really want to split Perf from macros.
     static Perf* instance() { return _instance; }
-    
+
     // on it's own track/tid, add a frame vsync marker
     // TODO: void addFrameMarker(double time);
-    
+
 private:
     void write(const string& str, bool forceFlush = false);
     uint32_t addThreadIfNeeded();
-    
-    ZipStream  _stream;
+
+    ZipStream _stream;
     FileHelper _fileHelper;
     double _startTime = 0.0;
     string _filename;
     string _perfDirectory;
-    
+
     using mymutex = recursive_mutex;
     using mylock = unique_lock<mymutex>;
 
@@ -127,7 +127,7 @@ class Perf {
     vector<string> _threadNames;
     string _buffer;
     uint32_t _maxStackDepth = 0; // 0 means no limit
-    
+
     static Perf* _instance;
 };
 
@@ -136,9 +136,9 @@ class PerfScope {
     // This means that the timers are running even when not profiling
     PerfScope(const char* name_);
     ~PerfScope() { close(); }
-    
+
     void close();
-    
+
 private:
     const char* name;
     double time;
@@ -147,17 +147,14 @@ class PerfScope {
 // This is here to split off Perf
 void addPerfCounter(const char* name, int64_t value);
 
-#define KPERF_SCOPENAME2(a,b) scope ## b
-#define KPERF_SCOPENAME(b) KPERF_SCOPENAME2(scope,b)
+#define KPERF_SCOPENAME2(a, b) scope##b
+#define KPERF_SCOPENAME(b) KPERF_SCOPENAME2(scope, b)
 
 #define KPERFT(x) PerfScope KPERF_SCOPENAME(__COUNTER__)(x)
 
-#define KPERFT_START(num,x) PerfScope KPERF_SCOPENAME(num)(x)
+#define KPERFT_START(num, x) PerfScope KPERF_SCOPENAME(num)(x)
 #define KPERFT_STOP(num) KPERF_SCOPENAME(num).close()
 
 #define KPERFC(x, value) addPerfCounter(x, value)
 
-}  // namespace kram
-
-
-   
+} // namespace kram
diff --git a/libkram/kram/KramZipHelper.cpp b/libkram/kram/KramZipHelper.cpp
index bddd5b8b..b514d974 100644
--- a/libkram/kram/KramZipHelper.cpp
+++ b/libkram/kram/KramZipHelper.cpp
@@ -6,7 +6,7 @@
 
 #include "miniz.h"
 
-// test for perf of this compared to one in miniz also see 
+// test for perf of this compared to one in miniz also see
 // comments about faster algs.
 // libcompress can only encode lvl 5, but here it's only decompress.
 // This is failing on various ktx2 files in the mac archive
@@ -114,13 +114,13 @@ void ZipHelper::initZipEntryTables()
 
         ZipEntry& zipEntry = _zipEntrys[index];
         zipEntry.fileIndex = stat.m_file_index;
-        zipEntry.filename = filename;  // can alias
+        zipEntry.filename = filename; // can alias
         zipEntry.uncompressedSize = stat.m_uncomp_size;
         zipEntry.compressedSize = stat.m_comp_size;
-        zipEntry.modificationDate = (int32_t)stat.m_time;  // really a time_t
+        zipEntry.modificationDate = (int32_t)stat.m_time; // really a time_t
 #undef crc32
         zipEntry.crc32 = stat.m_crc32;
-        
+
         // TODO: stat.m_time, state.m_crc32
 
         index++;
@@ -222,9 +222,9 @@ bool ZipHelper::extract(const ZipEntry& entry, void* buffer, uint64_t bufferSize
     // https://dougallj.wordpress.com/2022/08/20/faster-zlib-deflate-decompression-on-the-apple-m1-and-x86/
 
     // https://developer.apple.com/documentation/compression/1481000-compression_decode_buffer?language=objc
-    
+
     // This call is internal, so caller has already tested failure cases.
-    
+
 #if USE_LIBCOMPRESSION
     const uint8_t* data = mz_zip_reader_get_raw_data(zip.get(), entry.fileIndex);
     if (!data) {
@@ -232,20 +232,19 @@ bool ZipHelper::extract(const ZipEntry& entry, void* buffer, uint64_t bufferSize
     }
     // need to extract data and header
     char scratchBuffer[compression_decode_scratch_buffer_size(COMPRESSION_ZLIB)];
-    
+
     uint64_t bytesDecoded = compression_decode_buffer(
         (uint8_t*)buffer, entry.uncompressedSize,
         (const uint8_t*)data, entry.compressedSize,
-        scratchBuffer, 
+        scratchBuffer,
         COMPRESSION_ZLIB);
-    
+
     bool success = false;
-    if (bytesDecoded == entry.uncompressedSize)
-    {
+    if (bytesDecoded == entry.uncompressedSize) {
         success = true;
     }
 #else
-    
+
     // this pulls pages from mmap, no allocations
     mz_bool success = mz_zip_reader_extract_to_mem(
         zip.get(), entry.fileIndex, buffer, bufferSize, 0);
@@ -275,7 +274,7 @@ bool ZipHelper::extractRaw(const char* filename, const uint8_t** bufferData, uin
     }
 
     *bufferData = data;
-    
+
     // This isn't correct, need to return comp_size.
     // Caller may need the uncompressed size though to decompress fully into.
     //bufferDataSize = stat.m_uncomp_size;
@@ -284,4 +283,4 @@ bool ZipHelper::extractRaw(const char* filename, const uint8_t** bufferData, uin
     return true;
 }
 
-}  // namespace kram
+} // namespace kram
diff --git a/libkram/kram/KramZipHelper.h b/libkram/kram/KramZipHelper.h
index 8a405c14..6f861b82 100644
--- a/libkram/kram/KramZipHelper.h
+++ b/libkram/kram/KramZipHelper.h
@@ -15,7 +15,7 @@ namespace kram {
 using namespace STL_NAMESPACE;
 
 struct ZipEntry {
-    const char* filename;  // max 512, aliased
+    const char* filename; // max 512, aliased
     int32_t fileIndex;
 
     // attributes
@@ -69,8 +69,8 @@ struct ZipHelper {
     unique_ptr<mz_zip_archive> zip;
     vector<ZipEntry> _zipEntrys;
 
-    const uint8_t* zipData;  // aliased
+    const uint8_t* zipData; // aliased
 
     vector<char> allFilenames;
 };
-}  // namespace kram
+} // namespace kram
diff --git a/libkram/kram/KramZipStream.cpp b/libkram/kram/KramZipStream.cpp
index b186951e..240695ad 100644
--- a/libkram/kram/KramZipStream.cpp
+++ b/libkram/kram/KramZipStream.cpp
@@ -6,71 +6,74 @@
 namespace kram {
 using namespace STL_NAMESPACE;
 
-ZipStream::ZipStream() {
+ZipStream::ZipStream()
+{
     _stream = make_unique<mz_stream>();
 }
 
 // must be buried due to unique_ptr
-ZipStream::~ZipStream() {
+ZipStream::~ZipStream()
+{
     close();
 }
 
-bool ZipStream::open(FileHelper* fileHelper, bool isUncompressed) {
+bool ZipStream::open(FileHelper* fileHelper, bool isUncompressed)
+{
     _fileHelper = fileHelper;
     if (!_fileHelper->isOpen()) {
         return false;
     }
-    
+
     _isUncompressed = isUncompressed;
     if (_isUncompressed) {
         return true;
     }
-    
+
     memset(_stream.get(), 0, sizeof(mz_stream));
-    
+
     // https://www.zlib.net/zlib_how.html
     // https://www.ietf.org/rfc/rfc1952.txt
-    
+
     // can also install custom allocators (allocates 256KB buffer otherwise)
-//    _stream->zalloc = NULL;
-//    _stream->zfree = NULL;
-//    _stream->opaque = NULL;
-//    
-    // Just making this double the default mz_stream buffer.
-    // Should be able to get about 2x compression (there an estimator in miniz).
-    // TODO: what if input is bigger than output buffer?
-    // The larger this number, the bigger the stall to compress.
-    _compressLimit = 2*256*1024;
-    
+    // _stream->zalloc = NULL;
+    // _stream->zfree = NULL;
+    // _stream->opaque = NULL;
+    //
+    //Just making this double the default mz_stream buffer.
+    //Should be able to get about 2x compression (there an estimator in miniz).
+    //TODO: what if input is bigger than output buffer?
+    //The larger this number, the bigger the stall to compress.
+    _compressLimit = 2 * 256 * 1024;
+
     // TODO: control level
     // https://stackoverflow.com/questions/32225133/how-to-use-miniz-to-create-a-compressed-file-that-can-be-decompressd-by-gzip
     // turning off zlib footer here with WINDOW_BITS
     KVERIFY(mz_deflateInit2(_stream.get(), MZ_DEFAULT_LEVEL, MZ_DEFLATED, -MZ_DEFAULT_WINDOW_BITS, 9, MZ_DEFAULT_STRATEGY) == MZ_OK);
-    
+
     // These are all optional fields
     enum GzipFlag : uint8_t {
-        kGzipFlagText    = 1 << 0, // text 1, or ascii/uknown 0
-        kGzipFlagCRC     = 1 << 1, // crc16 for header
-        kGzipFlagExtra   = 1 << 2,
-        kGzipFlagName    = 1 << 3, // null terminated filename
+        kGzipFlagText = 1 << 0, // text 1, or ascii/uknown 0
+        kGzipFlagCRC = 1 << 1, // crc16 for header
+        kGzipFlagExtra = 1 << 2,
+        kGzipFlagName = 1 << 3, // null terminated filename
         kGzipFlagComment = 1 << 4, // null terminated comment
     };
-    
+
     enum GzipPlatform : uint8_t {
-        kGzipPlatformFAT  = 0,
+        kGzipPlatformFAT = 0,
         kGzipPlatformUnix = 3,
-        kGzipPlatformMac  = 7,
-        kGzipPlatformNT   = 11,
+        kGzipPlatformMac = 7,
+        kGzipPlatformNT = 11,
         kGzipPlatformDefault = 255,
     };
-    
+
     // for deflate, but seem of little utility
     enum GzipCompression : uint8_t {
-        kGzipCompressionUnknown  = 0,
+        kGzipCompressionUnknown = 0,
         kGzipCompressionSmallest = 2,
-        kGzipCompressionFastest  = 4,
+        kGzipCompressionFastest = 4,
     };
-    
+
     // gzip 10B header
     const uint8_t header[10] = {
         0x1f, 0x8b,
@@ -79,52 +82,52 @@ bool ZipStream::open(FileHelper* fileHelper, bool isUncompressed) {
               // The time is in Unix format, i.e., seconds since 00:00:00 GMT, Jan.  1, 1970.
         //0x00, 0x00, 0x00, 0x00,  // TODO: timestamp mtime - start of compression or of src file
         0xAD, 0x38, 0x4D, 0x5E, // stolen from another file
-        
+
         kGzipCompressionUnknown, // compression id
-        kGzipPlatformUnix        // os platform id
+        kGzipPlatformUnix // os platform id
     };
-    
+
     // Not writing any of the flagged fields.
-    
+
     // clear the data
     _sourceCRC32 = MZ_CRC32_INIT; // is 0
     _sourceSize = 0;
-    
+
     bool success = _fileHelper->write((const uint8_t*)&header, sizeof(header));
     if (!success) {
         KLOGE("ZipStream", "Could not write gzip header to %s", _fileHelper->filename().c_str());
     }
-    
+
     return success;
-    
+
     // zlib is slightly different than gzip format (11B overhead)
     // Could transfer zip crc32 and content over into a gzip file,
     // but typical use case is that starting with uncompressed data.
 }
 
-void ZipStream::close() {
+void ZipStream::close()
+{
     // this means it was already closed
     if (!_fileHelper) {
         return;
     }
-    
+
     if (_isUncompressed) {
         return;
     }
-    
+
     // do this to end the stream and cleanup
     KVERIFY(mz_deflateEnd(_stream.get()) == MZ_OK);
-    
+
     // can also reset and then reuse the stream, instead of end?
     //mz_deflateReset(_stream.get());
-    
+
     // data is already all written, so just need the footer
-    
+
     const uint32_t footer[2] = {
         _sourceCRC32,
-        (uint32_t)(_sourceSize & 0xFFFFFFFF)
-    };
-    
+        (uint32_t)(_sourceSize & 0xFFFFFFFF)};
+
     // gzip 8B trailer
     // 4b crc checksum of original data (can use mz_crc32())
     // 4b length of data (mod 0xFFFFFFFF), if bigger than 4gb then can only validate bottom 4B of length.
@@ -132,23 +135,24 @@ void ZipStream::close() {
     if (!success) {
         KLOGE("ZipStream", "Could not write gzip footer to %s", _fileHelper->filename().c_str());
     }
-    
+
     _fileHelper = nullptr;
 }
 
-Slice ZipStream::compressSlice(const Slice& in, bool finish) {
+Slice ZipStream::compressSlice(const Slice& in, bool finish)
+{
     // If in.size is huge, then don't resize like this.
     // But stream is assumed to take in smaller buffers
     // and know compressed stream is smaller than input size
     _compressed.resize(in.size());
-    
+
     _stream->avail_in = in.size();
     _stream->next_in = in.data();
-    
+
     // Have to set these up, since array may have grown
     _stream->avail_out = _compressed.size();
     _stream->next_out = _compressed.data();
-    
+
     // Hope don't need to do this in a loop
     int status = mz_deflate(_stream.get(), finish ? MZ_FINISH : MZ_SYNC_FLUSH);
     if (finish)
@@ -156,28 +160,27 @@ Slice ZipStream::compressSlice(const Slice& in, bool finish) {
     else
         KASSERT(status == MZ_OK);
     (void)status;
-    
+
     // TODO: would be nice to skip crc32 work
     _sourceSize += in.size();
     _sourceCRC32 = mz_crc32(_sourceCRC32, in.data(), in.size());
-    
+
     // return the compressed output
     int numBytesCompressed = _compressed.size() - _stream->avail_out;
     return Slice(_compressed.data(), numBytesCompressed);
 }
 
-void ZipStream::compress(const Slice& uncompressedData, bool finish) {
+void ZipStream::compress(const Slice& uncompressedData, bool finish)
+{
     if (_isUncompressed) {
         _fileHelper->write(uncompressedData.data(), uncompressedData.size());
         return;
     }
-    
+
     Slice compressedSlice = compressSlice(uncompressedData, finish);
-    
+
     // This writes out to a fileHelper
     _fileHelper->write(compressedSlice.data(), compressedSlice.size());
 }
 
-
-
 } // namespace kram
diff --git a/libkram/kram/KramZipStream.h b/libkram/kram/KramZipStream.h
index b0e067b7..bf920074 100644
--- a/libkram/kram/KramZipStream.h
+++ b/libkram/kram/KramZipStream.h
@@ -1,9 +1,9 @@
 #pragma once
 
-#include "KramConfig.h"
-
 #include <span>
 
+#include "KramConfig.h"
+
 struct mz_stream;
 
 namespace kram {
@@ -12,7 +12,7 @@ using namespace STL_NAMESPACE;
 class FileHelper;
 
 // This can be passed a count
-template<typename T>
+template <typename T>
 using Span = span<T, dynamic_extent>;
 using Slice = Span<uint8_t>;
 
@@ -21,10 +21,10 @@ using Slice = Span<uint8_t>;
 class ICompressedStream {
 public:
     virtual ~ICompressedStream() {}
-    
+
     // compress and store the data
     virtual void compress(const Slice& uncompressedData, bool finish) = 0;
-    
+
     // when reached then call compress
     virtual uint32_t compressLimit() const = 0;
 };
@@ -35,32 +35,32 @@ class ZipStream : public ICompressedStream {
 public:
     ZipStream();
     virtual ~ZipStream();
-    
+
     // writes opening header and closing footer
     // Can disable compression for testing the src content.
     bool open(FileHelper* fileHelper, bool isUncompressed = false);
     void close();
-    
+
     // compress and write data to helper
     virtual void compress(const Slice& uncompressedData, bool finish) override;
-    
+
     // test this for when to call compress
-    virtual uint32_t compressLimit() const override {
+    virtual uint32_t compressLimit() const override
+    {
         return _compressLimit;
     }
-    
+
 private:
     Slice compressSlice(const Slice& in, bool finish);
-    
+
     vector<uint8_t> _compressed;
     unique_ptr<mz_stream> _stream;
     FileHelper* _fileHelper = nullptr;
-    
+
     uint32_t _sourceCRC32 = 0;
     size_t _sourceSize = 0;
     uint32_t _compressLimit = 0;
     bool _isUncompressed = false;
 };
 
-
 } // namespace kram
diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index f46cd407..51e4f9b7 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -1,23 +1,22 @@
 #include "TaskSystem.h"
 
 #if KRAM_MAC
-    // affinity
-    #include <mach/thread_act.h>
-    #include <mach/thread_policy.h>
-
-    #include <pthread/qos.h>
-    #include <pthread/pthread.h>
-    #include <sys/sysctl.h>
+// affinity
+#include <mach/thread_act.h>
+#include <mach/thread_policy.h>
+#include <pthread/pthread.h>
+#include <pthread/qos.h>
+#include <sys/sysctl.h>
 #elif KRAM_IOS
-    #include <pthread/qos.h>
-    #include <sys/sysctl.h>
+#include <pthread/qos.h>
+#include <sys/sysctl.h>
 #elif KRAM_WIN
-    #include <windows.h>
-    #include <processthreadsapi.h>
+#include <processthreadsapi.h>
+#include <windows.h>
 #elif KRAM_ANDROID
-    #include <sys/resource.h>
+#include <sys/resource.h>
 #else
-    #include <pthread/pthread.h>
+#include <pthread/pthread.h>
 #endif
 
 // TODO: look at replacing this with Job Queue from Filament
@@ -25,37 +24,34 @@
 namespace kram {
 using namespace STL_NAMESPACE;
 
-enum class CoreType : uint8_t
-{
+enum class CoreType : uint8_t {
     Little,
     // Medium,
     Big,
 };
 
-struct CoreNum
-{
+struct CoreNum {
     uint8_t index;
-//#if KRAM_WIN
-//    uint8_t group; // for Win only
-//#endif
+    //#if KRAM_WIN
+    //    uint8_t group; // for Win only
+    //#endif
     CoreType type;
 };
 
-struct CoreInfo
-{
+struct CoreInfo {
     // hyperthreading can result in logical = 2x physical cores (1.5x on Alderlake)
     uint32_t logicalCoreCount;
     uint32_t physicalCoreCount;
-    
+
     // ARM is has big-little and big-medium-little, no HT, 2/4, 4/4, 6/2, 8/2.
     // Intel x64 AlderLake has big-little. 24 threads (8x2HT/8)
     uint32_t bigCoreCount;
     uint32_t littleCoreCount;
-    
+
     // x64 under Rosetta2 on M1 Arm chip, no AVX only SSE 4.2
     uint32_t isTranslated;
     uint32_t isHyperthreaded;
-    
+
     vector<CoreNum> remapTable;
 };
 
@@ -63,14 +59,13 @@ struct CoreInfo
 // Helper function to count set bits in the processor mask.
 static DWORD CountSetBits(ULONG_PTR bitMask)
 {
-    DWORD LSHIFT = sizeof(ULONG_PTR)*8 - 1;
+    DWORD LSHIFT = sizeof(ULONG_PTR) * 8 - 1;
     DWORD bitSetCount = 0;
     ULONG_PTR bitTest = (ULONG_PTR)1 << LSHIFT;
     DWORD i;
-    
-    for (i = 0; i <= LSHIFT; ++i)
-    {
-        bitSetCount += ((bitMask & bitTest)?1:0);
+
+    for (i = 0; i <= LSHIFT; ++i) {
+        bitSetCount += ((bitMask & bitTest) ? 1 : 0);
         bitTest /= 2;
     }
 
@@ -87,14 +82,14 @@ static const CoreInfo& GetCoreInfo()
     // this includes hyperthreads
     coreInfo.logicalCoreCount = std::thread::hardware_concurrency();
     coreInfo.physicalCoreCount = coreInfo.logicalCoreCount;
-        
-    #if KRAM_IOS || KRAM_MAC
+
+#if KRAM_IOS || KRAM_MAC
     // get big/little core counts
     // use sysctl -a from command line to see all
     size_t size = sizeof(coreInfo.bigCoreCount);
-    
+
     uint32_t perfLevelCount = 0;
-    
+
     // only big-little core counts on macOS12/iOS15
     sysctlbyname("hw.nperflevels", &perfLevelCount, &size, nullptr, 0);
     if (perfLevelCount > 0) {
@@ -106,10 +101,10 @@ static const CoreInfo& GetCoreInfo()
         // can't identify little cores
         sysctlbyname("hw.perflevel0.physicalcpu", &coreInfo.bigCoreCount, &size, nullptr, 0);
     }
-    
+
     // may not work on A10 2/2 exclusive
     coreInfo.physicalCoreCount = std::min(coreInfo.bigCoreCount + coreInfo.littleCoreCount, coreInfo.physicalCoreCount);
-    
+
     // no affinity, so core order here doesn't really matter.
     for (uint32_t i = 0; i < coreInfo.bigCoreCount; ++i) {
         coreInfo.remapTable.push_back({(uint8_t)i, CoreType::Big});
@@ -117,44 +112,44 @@ static const CoreInfo& GetCoreInfo()
     for (uint32_t i = 0; i < coreInfo.littleCoreCount; ++i) {
         coreInfo.remapTable.push_back({(uint8_t)(i + coreInfo.bigCoreCount), CoreType::Little});
     }
-    
+
     coreInfo.isHyperthreaded = coreInfo.logicalCoreCount != coreInfo.physicalCoreCount;
-    
-    #if KRAM_MAC
+
+#if KRAM_MAC
     // Call the sysctl and if successful return the result
     sysctlbyname("sysctl.proc_translated", &coreInfo.isTranslated, &size, NULL, 0);
-    #endif
-    
-    #elif KRAM_WIN
-    
+#endif
+
+#elif KRAM_WIN
+
     // have to walk array of data, and assemble this info, ugh
     // https://docs.microsoft.com/en-us/windows/win32/api/sysinfoapi/nf-sysinfoapi-getlogicalprocessorinformation
-    
+
     // https://docs.microsoft.com/en-us/windows/win32/procthread/multiple-processors
-    
+
     // Really need to use _EX version to get at numa groups
     // but it doesn't have same bitmask for logical cores.
     // Revisit when really building app on Win, but it just
     // broke the build too many times.
-    
+
     DWORD logicalCoreCount = 0;
     DWORD physicalCoreCount = 0;
     bool isHyperthreaded = false;
-    
+
     using ProcInfo = SYSTEM_LOGICAL_PROCESSOR_INFORMATION;
     using ProcInfoPtr = PSYSTEM_LOGICAL_PROCESSOR_INFORMATION;
-    
+
     DWORD returnLength = 0;
-    DWORD rc = GetLogicalProcessorInformation((ProcInfoPtr)nullptr, &returnLength);
-    
+    DWORD rc = GetLogicalProcessorInformation((ProcInfoPtr) nullptr, &returnLength);
+
     vector<uint8_t> buffer;
     buffer.resize(returnLength);
     rc = GetLogicalProcessorInformation((ProcInfoPtr)buffer.data(), &returnLength);
     (void)rc; // unused
-    
+
     ProcInfoPtr ptr = nullptr;
     DWORD byteOffset = 0;
-    
+
     // walk the array
     ptr = (ProcInfoPtr)buffer.data();
     byteOffset = 0;
@@ -170,14 +165,14 @@ static const CoreInfo& GetCoreInfo()
             default:
                 break;
         }
-        
+
         if (isHyperthreaded)
             break;
-        
+
         byteOffset += sizeof(ProcInfo);
         ptr++;
     }
-    
+
     ptr = (ProcInfoPtr)buffer.data();
     byteOffset = 0;
     uint32_t coreNumber = 0;
@@ -185,7 +180,7 @@ static const CoreInfo& GetCoreInfo()
         switch (ptr->Relationship) {
             case RelationProcessorCore: {
                 physicalCoreCount++;
-                
+
                 // A hyperthreaded core supplies more than one logical processor.
                 // Can identify AlderLake big vs. little off this
                 uint32_t logicalCores = CountSetBits(ptr->ProcessorMask);
@@ -197,11 +192,11 @@ static const CoreInfo& GetCoreInfo()
                     coreInfo.littleCoreCount++;
                     coreInfo.remapTable.push_back({(uint8_t)coreNumber, CoreType::Little});
                 }
-                
+
                 // Is this the correct index for physical cores?
                 // Always go through remap table
                 coreNumber += logicalCores;
-                
+
                 logicalCoreCount += logicalCores;
                 break;
             }
@@ -211,13 +206,13 @@ static const CoreInfo& GetCoreInfo()
         byteOffset += sizeof(ProcInfo);
         ptr++;
     }
-    
+
     (void)logicalCoreCount; // unused
-    
+
     coreInfo.isHyperthreaded = isHyperthreaded;
     coreInfo.physicalCoreCount = physicalCoreCount;
-    
-    #elif KRAM_ANDROID
+
+#elif KRAM_ANDROID
 
     // TODO: have to walk array of proc/cpuinfo, and assemble this info, ugh
     // then build a core remap table since big core are typically last, little early
@@ -225,18 +220,18 @@ static const CoreInfo& GetCoreInfo()
 
     // JDK and NDK version of library with workarounds
     // https://github.com/google/cpu_features
-    
+
     // hack - assume all big cores, typical 1/3/4 or 2/2/4
     coreInfo.bigCoreCount = coreInfo.physicalCoreCount;
-    
-    for (int32_t i = coreInfo.bigCoreCount-1; i >= 0; --i) {
+
+    for (int32_t i = coreInfo.bigCoreCount - 1; i >= 0; --i) {
         coreInfo.remapTable.push_back({(uint8_t)i, CoreType::Big});
     }
-    
-    #endif
-    
+
+#endif
+
     // sort faster cores first in the remap table
-    std::sort(coreInfo.remapTable.begin(), coreInfo.remapTable.end(), [](const CoreNum& lhs, const CoreNum& rhs){
+    std::sort(coreInfo.remapTable.begin(), coreInfo.remapTable.end(), [](const CoreNum& lhs, const CoreNum& rhs) {
 #if KRAM_ANDROID
         // sort largest index
         if (lhs.type == rhs.type)
@@ -248,10 +243,8 @@ static const CoreInfo& GetCoreInfo()
             return lhs.index < rhs.index;
         return lhs.type > rhs.type;
 #endif
-        
-       
     });
-    
+
     return coreInfo;
 }
 
@@ -283,10 +276,10 @@ std::thread::native_handle_type getCurrentThread()
 void setCurrentThreadName(const char* threadName)
 {
     std::thread::native_handle_type handle = getCurrentThread();
-    
+
     // TODO: use std::wstring_convert();
     // std::codecvt_utf8_utf16
-    
+
     // ugh, win still using char16_t.  TODO: this isn't utf8 to utf16 conversion
     uint32_t len = strlen(threadName);
     std::wstring str;
@@ -295,14 +288,14 @@ void setCurrentThreadName(const char* threadName)
         if (threadName[i] <= 127)
             str.push_back((char)threadName[i]);
     }
-    
+
     ::SetThreadDescription(handle, str.c_str());
 }
 
 void getThreadName(std::thread::native_handle_type threadHandle, char name[kMaxThreadName])
 {
     name[0] = 0;
-    
+
     wchar_t* threadNameW = nullptr;
     HRESULT hr = ::GetThreadDescription(threadHandle, &threadNameW);
     if (SUCCEEDED(hr)) {
@@ -313,8 +306,8 @@ void getThreadName(std::thread::native_handle_type threadHandle, char name[kMaxT
         for (uint32_t i = 0; i < len; ++i) {
             name[i] = (char)threadNameW[i];
         }
-        name[kMaxThreadName-1] = 0;
-        
+        name[kMaxThreadName - 1] = 0;
+
         LocalFree(threadNameW);
     }
 }
@@ -323,14 +316,14 @@ void getThreadName(std::thread::native_handle_type threadHandle, char name[kMaxT
 
 void setCurrentThreadName(const char* threadName)
 {
-    #if KRAM_MAC || KRAM_IOS
+#if KRAM_MAC || KRAM_IOS
     // can only set thread from thread on macOS, sucks
     int val = pthread_setname_np(threadName);
-    #else
+#else
     // 15 char name limit on Linux/Android, how modern!
     int val = pthread_setname_np(getCurrentThread(), threadName);
-    #endif
-    
+#endif
+
     if (val != 0)
         KLOGW("Thread", "Could not set thread name");
 }
@@ -359,7 +352,6 @@ void getCurrentThreadName(char name[kMaxThreadName])
 static void setThreadPriority(std::thread::native_handle_type handle, ThreadPriority priority)
 {
     if (priority == ThreadPriority::Default) {
-        
         /* samples of qos
         qos_class_t qos = QOS_CLASS_UNSPECIFIED;
         switch(level) {
@@ -370,17 +362,17 @@ static void setThreadPriority(std::thread::native_handle_type handle, ThreadPrio
             case ThreadQos::Low: qos = QOS_CLASS_BACKGROUND; break;
         }
         */
-        
+
         // qos is transferred to GCD jobs, and can experience thread depriority
         // can system can try to adjust priority inversion.
-        
+
         // note here the priorityOffset = 0, but is negative offsets
         // there is a narrow range of offsets
-        
+
         // note this is a start/end overide call, but can set override on existing thread
         // TODO: this returns a newly allocated object which isn't released here
         // need to release with pthread_override_qos_class_end_np(override);
-        
+
         qos_class_t qos = QOS_CLASS_DEFAULT;
         auto val = pthread_override_qos_class_start_np(handle, qos, 0);
         if (val != nullptr)
@@ -388,18 +380,24 @@ static void setThreadPriority(std::thread::native_handle_type handle, ThreadPrio
     }
     else {
         int prioritySys = 0;
-        switch(priority) {
-            case ThreadPriority::Default: prioritySys = 30;  break; // skipped above
-            case ThreadPriority::High: prioritySys = 41; break;
-            case ThreadPriority::Interactive: prioritySys = 45; break;
+        switch (priority) {
+            case ThreadPriority::Default:
+                prioritySys = 30;
+                break; // skipped above
+            case ThreadPriority::High:
+                prioritySys = 41;
+                break;
+            case ThreadPriority::Interactive:
+                prioritySys = 45;
+                break;
         }
-                
-        struct sched_param param = { prioritySys };
-        
+
+        struct sched_param param = {prioritySys};
+
         // policy choices
         // SCHED_RR, SCHED_FIFO, SCHED_OTHER
         int policy = SCHED_RR;
-        
+
         // this sets policy to round-robin and priority
         int val = pthread_setschedparam(handle, policy, &param);
         if (val != 0)
@@ -414,12 +412,18 @@ static void setThreadPriority(std::thread::native_handle_type handle, uint8_t pr
     // This doesn't change policy.
     // Android on -20 to 20, where lower is higher priority
     int prioritySys = 0;
-    switch(priority) {
-        case ThreadPriority::Default: prioritySys = 0;  break; // NORMAL
-        case ThreadPriority::High: prioritySys = -4; break; // ABOVE NORMAL
-        case ThreadPriority::Interactive: prioritySys = -8; break; // HIGHEST
+    switch (priority) {
+        case ThreadPriority::Default:
+            prioritySys = 0;
+            break; // NORMAL
+        case ThreadPriority::High:
+            prioritySys = -4;
+            break; // ABOVE NORMAL
+        case ThreadPriority::Interactive:
+            prioritySys = -8;
+            break; // HIGHEST
     }
-    
+
     int val = setpriority(PRIO_PROCESS, 0, prioritySys);
     if (val != 0)
         KLOGW("Thread", "Failed to set priority %d", prioritySys);
@@ -432,12 +436,18 @@ static void setThreadPriority(std::thread::native_handle_type handle, ThreadPrio
     // This doesn't change policy.
     // Win has 0 to 15 normal, then 16-31 real time priority
     int prioritySys = 0;
-    switch(priority) {
-        case ThreadPriority::Default: prioritySys = 0;  break; // NORMAL
-        case ThreadPriority::High: prioritySys = 1; break; // ABOVE NORMAL
-        case ThreadPriority::Interactive: prioritySys = 2; break; // HIGHEST
+    switch (priority) {
+        case ThreadPriority::Default:
+            prioritySys = 0;
+            break; // NORMAL
+        case ThreadPriority::High:
+            prioritySys = 1;
+            break; // ABOVE NORMAL
+        case ThreadPriority::Interactive:
+            prioritySys = 2;
+            break; // HIGHEST
     }
-    
+
     BOOL success = SetThreadPriority(handle, prioritySys);
     if (!success)
         KLOGW("Thread", "Failed to set priority %d", prioritySys);
@@ -458,41 +468,41 @@ static void setThreadAffinity(std::thread::native_handle_type handle, uint32_t t
     // https://eli.thegreenplace.net/2016/c11-threads-affinity-and-hyperthreading/
     //
     const auto& coreInfo = GetCoreInfo();
-    
+
     uint32_t maxIndex = coreInfo.remapTable.size() - 1;
     if (threadIndex > maxIndex)
         threadIndex = maxIndex;
-    
+
     threadIndex = coreInfo.remapTable[threadIndex].index;
-    
+
     // for now only allow single core mask
     uint64_t affinityMask = ((uint64_t)1) << threadIndex;
-    
+
     // These are used in most of the paths
     macroUnusedVar(handle);
     macroUnusedVar(affinityMask);
-    
+
     bool success = false;
-    
+
 #if KRAM_MAC || KRAM_IOS
     // no support, don't use thread_policy_set it's not on M1 and just a hint
     success = true;
-    
+
 #elif KRAM_ANDROID
     cpu_set_t cpuset;
     CPU_ZERO(&cpuset);
     CPU_SET(threadIndex, &cpuset);
-    
+
     // convert pthread to pid
     pid_t pid;
     pthread_getunique_np(handle, &pid);
     success = sched_setaffinity(pid, sizeof(cpu_set_t), &cpuset) == 0;
-    
+
 #elif KRAM_WIN
     // each processor group only has 64 bits
     DWORD_PTR mask = SetThreadAffinityMask(handle, *(const DWORD_PTR*)&affinityMask);
     success = mask != 0;
-    
+
 #else
     // most systems are pthread-based, this is represented with array of bits
     cpu_set_t cpuset;
@@ -511,7 +521,6 @@ void task_system::set_current_affinity(uint32_t threadIndex)
     setThreadAffinity(getCurrentThread(), threadIndex);
 }
 
-
 #endif
 
 void task_system::run(int32_t threadIndex)
@@ -525,7 +534,7 @@ void task_system::run(int32_t threadIndex)
         // Note that if threadIndex queue is empty and stays empty
         // then pop() below will stop using that thread.  But async_ is round-robining
         // all work across the available queues.
-        int32_t multiple = 4;  // 32;
+        int32_t multiple = 4; // 32;
         int32_t numTries = 0;
         for (int32_t n = 0, nEnd = _count * multiple; n < nEnd; ++n) {
             numTries++;
@@ -562,47 +571,46 @@ void task_system::run(int32_t threadIndex)
 }
 
 // This only works for current thread, but simplifies setting several thread params.
-void setThreadInfo(ThreadInfo& info) {
+void setThreadInfo(ThreadInfo& info)
+{
     setCurrentThreadName(info.name);
 
     setThreadPriority(getCurrentThread(), info.priority);
-    
-    #if SUPPORT_AFFINITY
+
+#if SUPPORT_AFFINITY
     setThreadAffinity(getCurrentThread(), info.affinity);
-    #endif
+#endif
 }
 
-task_system::task_system(int32_t count) :
-    _count(std::min(count, (int32_t)GetCoreInfo().physicalCoreCount)),
-    _q{(size_t)_count},
-    _index(0)
+task_system::task_system(int32_t count) : _count(std::min(count, (int32_t)GetCoreInfo().physicalCoreCount)),
+                                          _q{(size_t)_count},
+                                          _index(0)
 {
     // see WWDC 2021 presentation here
     // Tune CPU job scheduling for Apple silicon games
     // https://developer.apple.com/videos/play/tech-talks/110147/
-    ThreadInfo infoMain = { "Main", ThreadPriority::Interactive, 0 };
+    ThreadInfo infoMain = {"Main", ThreadPriority::Interactive, 0};
     setThreadInfo(infoMain);
-    
+
     // Note that running work on core0 when core0 may starve it
     // from assigning work to threads.
-        
+
     // start up the threads
     string name;
     for (int32_t threadIndex = 0; threadIndex != _count; ++threadIndex) {
-        
         // Generate a name, also corresponds to core for affinity
         // May want to include priority too.
         sprintf(name, "Task%d", threadIndex);
         _threadNames.push_back(name);
-        
+
         _threads.emplace_back([&, threadIndex, name] {
-            ThreadInfo infoTask = { name.c_str(), ThreadPriority::High, threadIndex };
+            ThreadInfo infoTask = {name.c_str(), ThreadPriority::High, threadIndex};
             setThreadInfo(infoTask);
 
             run(threadIndex);
         });
     }
-        
+
     // dump out thread data
     log_threads();
 }
@@ -610,29 +618,35 @@ task_system::task_system(int32_t count) :
 static ThreadPriority getThreadPriority(std::thread::native_handle_type handle)
 {
     ThreadPriority priority = ThreadPriority::Default;
-    
+
 #if KRAM_MAC || KRAM_IOS || KRAM_ANDROID
     // Note: this doesn't handle qOS, and returns default priority
     // on those threads.
-    
+
     int policy = 0;
     struct sched_param priorityVal;
     int val = pthread_getschedparam(handle, &policy, &priorityVal);
     if (val != 0)
         KLOGW("Thread", "failed to retrieve thread data");
     int prioritySys = priorityVal.sched_priority;
-    
+
     // remap back to enum
-    switch(prioritySys) {
-        case 41: priority = ThreadPriority::High; break;
-        case 45: priority = ThreadPriority::Interactive; break;
-        default: priority = ThreadPriority::Default; break;
+    switch (prioritySys) {
+        case 41:
+            priority = ThreadPriority::High;
+            break;
+        case 45:
+            priority = ThreadPriority::Interactive;
+            break;
+        default:
+            priority = ThreadPriority::Default;
+            break;
     }
-    
+
 /* Using code above since it may work with other threads
 #elif KRAM_ANDROID
     // Note: only for current thread
-    
+
     // only have getpriority call on current thread
     // pthread_getschedparam never returns valid data
     int priority = getpriority(PRIO_PROCESS, 0);
@@ -645,26 +659,31 @@ static ThreadPriority getThreadPriority(std::thread::native_handle_type handle)
 #elif KRAM_WIN
     // all threads same policy on Win?
     // https://www.microsoftpressstore.com/articles/article.aspx?p=2233328&seqNum=7#:~:text=Windows%20never%20adjusts%20the%20priority,the%20process%20that%20created%20it.
-    
+
     // scheduling based on process priority class, thread priority is +/- offset
     // DWORD priorityClass = GetPriorityClass(GetCurrentProcess());
-    
+
     // The handle must have the THREAD_QUERY_INFORMATION or THREAD_QUERY_LIMITED_INFORMATION access right.
     int prioritySys = GetThreadPriority(handle);
     if (prioritySys == THREAD_PRIORITY_ERROR_RETURN)
         prioritySys = 0;
-    
-    switch(prioritySys) {
-        case 1: priority = ThreadPriority::High; break;
-        case 2: priority = ThreadPriority::Interactive; break;
-        default: priority = ThreadPriority::Default; break;
+
+    switch (prioritySys) {
+        case 1:
+            priority = ThreadPriority::High;
+            break;
+        case 2:
+            priority = ThreadPriority::Interactive;
+            break;
+        default:
+            priority = ThreadPriority::Default;
+            break;
     }
 #endif
-    
+
     return priority;
 }
 
-
 void task_system::log_threads()
 {
     ThreadInfo info = {};
@@ -672,13 +691,12 @@ void task_system::log_threads()
 #if SUPPORT_AFFINITY
     info.affinity = 0;
 #endif
-    
+
     info.priority = getThreadPriority(getCurrentThread());
     KLOGI("Thread", "Thread:%s (pri:%d aff:%d)",
           info.name, info.priority, info.affinity);
-    
-    for (uint32_t i = 0; i < _threads.size(); ++i)
-    {
+
+    for (uint32_t i = 0; i < _threads.size(); ++i) {
         info.name = _threadNames[i].c_str();
 #if SUPPORT_AFFINITY
         // TODO: if more tasks/threads than cores, then this isn't accurate
@@ -702,8 +720,7 @@ task_system::~task_system()
         e.join();
 }
 
-}
-
+} //namespace kram
 
 /**************************************************************************************************/
 
diff --git a/libkram/kram/TaskSystem.h b/libkram/kram/TaskSystem.h
index 724fa669..cdad455e 100644
--- a/libkram/kram/TaskSystem.h
+++ b/libkram/kram/TaskSystem.h
@@ -1,7 +1,7 @@
 /*
     Copyright 2015 Adobe Systems Incorporated
     Distributed under the MIT License (see license at http://stlab.adobe.com/licenses.html)
-    
+
     This file is intended as example code and is not production quality.
 */
 
@@ -15,14 +15,12 @@
 
 // TODO: get these three out of header, they pull in basic_string via system_errror header
 // but this file isn't included in many places.
-#include <mutex>
 #include <condition_variable>
+#include <mutex>
 #include <thread>
 
 //#include <vector>
 
-
-
 /**************************************************************************************************/
 
 namespace kram {
@@ -59,7 +57,7 @@ class notification_queue {
     {
         mylock lock{_mutex};
         while (_q.empty() && !_done) {
-            _ready.wait(lock);  // this is what blocks a given thread to avoid spin loop
+            _ready.wait(lock); // this is what blocks a given thread to avoid spin loop
         }
 
         // handle done state
@@ -107,7 +105,7 @@ class notification_queue {
     // has queue been marked done or not
     bool is_done() const
     {
-        mylock lock{const_cast<mymutex&>(_mutex)};  // ugh
+        mylock lock{const_cast<mymutex&>(_mutex)}; // ugh
         bool done_ = _done;
         return done_;
     }
@@ -134,10 +132,8 @@ class notification_queue {
 // limiting cores to say 4/16, then can run 4 processes faster w/o affinity.
 #define SUPPORT_AFFINITY (KRAM_ANDROID || KRAM_WIN)
 
-
 // only for ioS/macOS
-enum class ThreadPriority
-{
+enum class ThreadPriority {
     //Low = 1,
     //Medium = 2,
     Default = 3,
@@ -173,10 +169,10 @@ class task_system {
 
     const int32_t _count;
     vector<std::thread> _threads;
-    
+
     // want to store with thread itself, but no field.  Also have affinity, priority data.
     vector<string> _threadNames;
-    
+
     // currently one queue to each thread, but can steal from other queues
     vector<notification_queue> _q;
     std::atomic<int32_t> _index;
@@ -186,17 +182,17 @@ class task_system {
 #if SUPPORT_AFFINITY
     static void set_current_affinity(uint32_t threadIndex);
 #endif
-    
+
     static void set_current_priority(ThreadPriority priority);
-    
+
     void log_threads();
-    
+
 public:
     task_system(int32_t count = 1);
     ~task_system();
 
     int32_t num_threads() const { return _count; }
-    
+
     template <typename F>
     void async_(F&& f)
     {
@@ -218,5 +214,4 @@ class task_system {
     }
 };
 
-
-}  // namespace kram
+} // namespace kram
diff --git a/libkram/kram/win_mmap.h b/libkram/kram/win_mmap.h
index 4b6e5578..91617c02 100644
--- a/libkram/kram/win_mmap.h
+++ b/libkram/kram/win_mmap.h
@@ -18,17 +18,17 @@
  */
 
 #include <io.h>
-#include <windows.h>
 #include <sys/types.h>
+#include <windows.h>
 
-#define PROT_READ     0x1
-#define PROT_WRITE    0x2
-#define PROT_EXEC     0x4
+#define PROT_READ 0x1
+#define PROT_WRITE 0x2
+#define PROT_EXEC 0x4
 
-#define MAP_SHARED    0x01
-#define MAP_PRIVATE   0x02
-#define MAP_ANON      0x20
-#define MAP_FAILED    ((void *) -1)
+#define MAP_SHARED 0x01
+#define MAP_PRIVATE 0x02
+#define MAP_ANON 0x20
+#define MAP_FAILED ((void *)-1)
 
 // off_t is 32-bit, which isn't great
 using myoff_t = int64_t;
@@ -42,7 +42,6 @@ using myoff_t = int64_t;
 //#define DWORD_LO(x) (x)
 //#endif
 
-
 static void *mmap(void *start, size_t length, int prot, int flags, int fd, myoff_t offset)
 {
     if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC))
@@ -50,7 +49,8 @@ static void *mmap(void *start, size_t length, int prot, int flags, int fd, myoff
     if (fd == -1) {
         if (!(flags & MAP_ANON) || offset)
             return MAP_FAILED;
-    } else if (flags & MAP_ANON)
+    }
+    else if (flags & MAP_ANON)
         return MAP_FAILED;
 
     DWORD flProtect = PAGE_READONLY;
@@ -59,12 +59,13 @@ static void *mmap(void *start, size_t length, int prot, int flags, int fd, myoff
             flProtect = PAGE_EXECUTE_READWRITE;
         else
             flProtect = PAGE_READWRITE;
-    } else if (prot & PROT_EXEC) {
+    }
+    else if (prot & PROT_EXEC) {
         if (prot & PROT_READ)
             flProtect = PAGE_EXECUTE_READ;
         else if (prot & PROT_EXEC)
             flProtect = PAGE_EXECUTE;
-    } 
+    }
 
     myoff_t end = length + offset;
     HANDLE mmap_fd, h;
@@ -86,14 +87,14 @@ static void *mmap(void *start, size_t length, int prot, int flags, int fd, myoff
     if (flags & MAP_PRIVATE)
         dwDesiredAccess |= FILE_MAP_COPY;
     void *ret = MapViewOfFile(h, dwDesiredAccess, DWORD_HI(offset), DWORD_LO(offset), length);
-   
+
     // can free the file mapping, mmap will hold it
     CloseHandle(h);
-    
+
     if (ret == NULL) {
         ret = MAP_FAILED;
     }
-    
+
     return ret;
 }
 
diff --git a/libkram/vectormath/bounds234.cpp b/libkram/vectormath/bounds234.cpp
index 88fa4185..29253482 100644
--- a/libkram/vectormath/bounds234.cpp
+++ b/libkram/vectormath/bounds234.cpp
@@ -4,71 +4,73 @@
 
 namespace SIMD_NAMESPACE {
 
-culler::culler(): _planesCount(0) {
+culler::culler() : _planesCount(0)
+{
 }
 
-void culler::update(const float4x4& projView) {
+void culler::update(const float4x4& projView)
+{
     // build a worldspace cameraa volume
     // https://fgiesen.wordpress.com/2010/10/17/view-frustum-culling/
     // but don't test farZ plane if infFarZ
-    
+
     float4x4 m = transpose(projView);
     const float4& x = m[0];
     const float4& y = m[1];
     const float4& z = m[2];
     const float4& w = m[3];
-    
+
     // x < w     0 < w - x
     // x > -w    0 < w + x
-    
+
     _planes[0] = normalize(w + x);
     _planes[1] = normalize(w - x);
     _planes[2] = normalize(w + y);
     _planes[3] = normalize(w - y);
-    
+
     // This uses 0 to 1
-    
+
     // revZ
     _planes[4] = normalize(w - z);
-    
+
     bool isInfFarPlane = projView[2][2] == 0;
     if (isInfFarPlane)
         _planes[5] = 0;
     else
         _planes[5] = normalize(z);
-    
+
     // anyway to always use 6 for unrolling?
     // f.e. above use 0,0,-1,FLT_MAX, instead of 0
     _planesCount = isInfFarPlane ? 5 : 6;
-    
+
     // select min or max based on normal direction
     for (int i = 0; i < _planesCount; ++i) {
         _selectionMasks[i] = _planes[i] < 0;
     }
-    
+
     // Nathan Reed - If you represent the frustum corners in homogeneous coordinates,
     // with w=0 for points at infinity, this just falls out of the usual
     // point vs plane test, where you dot the homogeneous point against the plane equation.
-    
+
     // generate 8 corners of camera volume from the inverse
     float4x4 projViewInv = inverse(projView); // TODO: can pass down
     float nearClip = 1;
-    
+
     // inset so division can occur
-    float farClip  = isInfFarPlane ? 1e-6f : 0;
-    
+    float farClip = isInfFarPlane ? 1e-6f : 0;
+
     static float4 clipCorners[8] = {
-        {-1,-1,nearClip,1},
-        {-1, 1,nearClip,1},
-        { 1,-1,nearClip,1},
-        { 1, 1,nearClip,1},
-        
-        {-1,-1,farClip,1},
-        {-1, 1,farClip,1},
-        { 1,-1,farClip,1},
-        { 1, 1,farClip,1},
+        {-1, -1, nearClip, 1},
+        {-1, 1, nearClip, 1},
+        {1, -1, nearClip, 1},
+        {1, 1, nearClip, 1},
+
+        {-1, -1, farClip, 1},
+        {-1, 1, farClip, 1},
+        {1, -1, farClip, 1},
+        {1, 1, farClip, 1},
     };
-    
+
     // These are homogenous coords, so w may be 0
     for (int i = 0; i < 8; ++i) {
         float4 cornerHomog = projViewInv * clipCorners[i];
@@ -77,101 +79,105 @@ void culler::update(const float4x4& projView) {
     }
 }
 
-bool culler::cullBox(float3 min, float3 max) const {
-    
+bool culler::cullBox(float3 min, float3 max) const
+{
     // Note: make sure box min <= max, or this call will fail
-    
+
     // TODO: convert this from dot to a mul of 4, then finish plane 5,6
     // could precompute/store the selection masks.
     // Also may need to do 4 boxes at a time.
-    
+
     // TODO: also if frustum is finite farZ, then may need to test for
     // frustum in box.  This is a rather expensive test though
     // of the 8 frustum corners.
-    
-    float4 min1 = float4m(min,1);
-    float4 max1 = float4m(max,1);
+
+    float4 min1 = float4m(min, 1);
+    float4 max1 = float4m(max, 1);
 
     // test the min/max against the x planes
     int count = 0;
-    
+
     for (int i = 0; i < _planesCount; ++i) {
         count += dot(_planes[i], select(min1, max1, _selectionMasks[i])) > 0;
     }
-            
+
     return count == _planesCount;
 }
-            
-bool culler::cullSphere(float4 sphere) const {
-            
+
+bool culler::cullSphere(float4 sphere) const
+{
     // TODO: convert this from dot to a mul of 4, then finish plane 5,6
     // keep everything in simd reg.
     // Also may need to do 4 spheres at a time.
-    
-    float4 sphere1 = float4m(sphere.xyz,1);
+
+    float4 sphere1 = float4m(sphere.xyz, 1);
     float radius = sphere.w;
-    
+
     int count = 0;
     for (int i = 0; i < _planesCount; ++i) {
         count += dot(_planes[i], sphere1) > radius;
     }
-                     
+
     return count == _planesCount;
 }
-            
-void culler::cullBoxes(const float3* boxes, int count, uint8_t shift, uint8_t* results) const {
+
+void culler::cullBoxes(const float3* boxes, int count, uint8_t shift, uint8_t* results) const
+{
     // box array is 2x count
     uint8_t bit = (1 << shift);
     uint8_t skipBit = (1 << 7);
-   
+
     for (int i = 0; i < count; ++i) {
         uint8_t& res8 = results[i];
         if ((res8 & skipBit) != 0)
             continue;
-        
-        float3 min = boxes[2*i];
-        float3 max = boxes[2*i+1];
-        
+
+        float3 min = boxes[2 * i];
+        float3 max = boxes[2 * i + 1];
+
         if (cullBox(min, max))
             res8 |= bit;
     }
 }
 
-void culler::cullSpheres(const float4* sphere, int count, uint8_t shift, uint8_t* results) const {
+void culler::cullSpheres(const float4* sphere, int count, uint8_t shift, uint8_t* results) const
+{
     uint8_t bit = (1 << shift);
     uint8_t skipBit = (1 << 7);
-   
+
     for (int i = 0; i < count; ++i) {
         uint8_t& res8 = results[i];
         if ((res8 & skipBit) != 0)
             continue;
-        
+
         if (cullSphere(sphere[i]))
             res8 |= bit;
     }
 }
-    
-bool culler::isCameraInBox(bbox box) const {
+
+bool culler::isCameraInBox(bbox box) const
+{
     // See if all 8 verts of the frustum are in the box.
     // This becomes a false negative for non-inf far (skips box while inside)
     const float3* corners = cameraCorners();
-    
+
     int3 count = 0;
     for (int i = 0; i < 8; ++i) {
         float3 c = corners[i];
         count += c >= box.min &
                  c <= box.max;
     }
-    
+
     // high-bit set is -1
     return all(count == (int3)-8);
 }
 
-bool culler::isCameraOutsideBox(bbox box) const {
+bool culler::isCameraOutsideBox(bbox box) const
+{
     // See if all 8 verts of the camera are outside box.
     // This becomes a false positive (draws box even though outside)
     const float3* corners = cameraCorners();
-                                        
+
     int3 countMin = 0;
     int3 countMax = 0;
     for (int i = 0; i < 8; ++i) {
@@ -179,15 +185,16 @@ bool culler::isCameraOutsideBox(bbox box) const {
         countMin += c < box.min;
         countMax += c > box.max;
     }
-    
+
     // high-bit set is -1
     return any(countMin == (int3)-8 | countMax == (int3)-8);
 }
 
-bsphere culler::transformSphereTRS(bsphere sphere, const float4x4& modelTfm) {
+bsphere culler::transformSphereTRS(bsphere sphere, const float4x4& modelTfm)
+{
     // May be better to convert to box with non-uniform scale
     // sphere gets huge otherwise.  Cache these too.
-    
+
 #if 1
     // not sure which code is smaller, still have to add t
     float size = decompose_scale_max(modelTfm);
@@ -201,12 +208,12 @@ bsphere culler::transformSphereTRS(bsphere sphere, const float4x4& modelTfm) {
     // really just a 3x3 and translation
     const float3x3& m = as_float3x3(modelTfm);
     float3 t = m[3];
-    
+
     float size = decompose_scale_max(modelTfm);
     float radius = sphere.radius() * size;
     float3 sphereCenter = m * sphere.center();
     sphereCenter += t;
-    
+
     sphere = bsphere(sphereCenter, radius);
     return sphere;
 #endif
@@ -214,48 +221,58 @@ bsphere culler::transformSphereTRS(bsphere sphere, const float4x4& modelTfm) {
 
 // Note: if doing infFar, may want float4 in homogenous space w = 0
 // then the points are accurate.
-    
-void culler::boxCorners(bbox box, float3 pt[8]) const {
+
+void culler::boxCorners(bbox box, float3 pt[8]) const
+{
     // TODO: fix these so order is 000 to 111 in bits
-    
+
     float3 min1 = box.min;
     float3 max1 = box.max;
-    
+
     pt[0] = min1;
     pt[1] = max1;
-    
+
     pt[2] = float3m(min1.xy, max1.z);
     pt[3] = float3m(max1.xy, min1.z);
-    
-    pt[4] = min1; pt[4].y = max1.y;
-    pt[5] = max1; pt[5].x = min1.x;
-    
-    pt[6] = max1; pt[6].y = min1.y;
-    pt[7] = min1; pt[7].x = max1.x;
+
+    pt[4] = min1;
+    pt[4].y = max1.y;
+    pt[5] = max1;
+    pt[5].x = min1.x;
+
+    pt[6] = max1;
+    pt[6].y = min1.y;
+    pt[7] = min1;
+    pt[7].x = max1.x;
 }
 
-void culler::boxCorners(bbox box, float4 pt[8]) const {
+void culler::boxCorners(bbox box, float4 pt[8]) const
+{
     float4 min1 = float4m(box.min, 1);
     float4 max1 = float4m(box.max, 1);
-    
+
     pt[0] = min1;
     pt[1] = max1;
-    
+
     pt[2] = float4m(min1.xy, max1.zw);
     pt[3] = float4m(max1.xy, min1.zw);
-    
-    pt[4] = min1; pt[4].y = max1.y; // float4m(min1.x, max1.y, min1.zw),
-    pt[5] = max1; pt[5].x = min1.x; // float4m(min1.x, max1.yzw),
-    
-    pt[6] = max1; pt[6].y = min1.y; // float4m(max1.x, min1.y, max1.zw),
-    pt[7] = min1; pt[7].x = max1.x; // float4m(max1.x, min1.yzw),
-}
 
+    pt[4] = min1;
+    pt[4].y = max1.y; // float4m(min1.x, max1.y, min1.zw),
+    pt[5] = max1;
+    pt[5].x = min1.x; // float4m(min1.x, max1.yzw),
+
+    pt[6] = max1;
+    pt[6].y = min1.y; // float4m(max1.x, min1.y, max1.zw),
+    pt[7] = min1;
+    pt[7].x = max1.x; // float4m(max1.x, min1.yzw),
+}
 
-bbox culler::transformBoxTRS(bbox box, const float4x4& modelTfm) {
+bbox culler::transformBoxTRS(bbox box, const float4x4& modelTfm)
+{
     // Woth doing on cpu and caching.  So can still process an array
     // but should transform only ones thatt didn't change transform or bound.
-    
+
 #if 0
     // This is for a full general 4x4, but want a simpler affine version
     // convert the box to 8 pts first
@@ -267,65 +284,64 @@ bbox culler::transformBoxTRS(bbox box, const float4x4& modelTfm) {
         float3 v = (modelTfm * pt[i]).xyz;
         box.unionWith(v);
     }
-    
+
 #elif 0
     // really just a 3x3 and translation
     const float3x3& m = as_float3x3(modelTfm);
     float3 t = m[3];
-    
+
     // convert the box to 8 pts first
     float3 pt[8];
     boxCorners(box, ptr);
-   
+
     box.setInvalid();
     for (int i = 0; i < 8; ++i) {
         float3 v = m * pt[i];
         box.unionWith(v);
     }
     box.offset(t);
-    
+
 #else
     // This is way less setup on the points.
-    
+
     const float3x3& m = as_float3x3(modelTfm);
     float3 t = m[3];
-   
+
     // what about this
     // box.min = m * box.min;
     // box.max = m * box.max;
     // swap back extrema that flipped due to rot/invert
     // box.fix();
     // box.offset(t);
-    
+
     // Inspiration for code below.
     // https://github.com/erich666/GraphicsGems/blob/master/gems/TransBox.c
     float3 min1 = box.min;
     float3 max1 = box.max;
-    
+
     box.min = t;
     box.max = t;
-    
-    float3 a,b;
+
+    float3 a, b;
     for (int i = 0; i < 3; ++i) {
         // these are muls, not dots
         a = m[i] * min1;
         b = m[i] * max1;
-        
+
         int3 test = a < b;
-        
+
         box.min += select(0, a, test);
         box.max += select(0, a, !test);
-        
+
         box.max += select(0, b, test);
         box.min += select(0, b, !test);
     }
 
 #endif
-    
+
     return box;
 }
 
-}
+} //namespace SIMD_NAMESPACE
 
 #endif
-
diff --git a/libkram/vectormath/bounds234.h b/libkram/vectormath/bounds234.h
index ea328636..fcea92c1 100644
--- a/libkram/vectormath/bounds234.h
+++ b/libkram/vectormath/bounds234.h
@@ -13,46 +13,61 @@ namespace SIMD_NAMESPACE {
 struct bbox {
     bbox() {} // nothing
     bbox(float3 minv, float3 maxv) : min(minv), max(maxv) {}
-    
+
     // TODO: add a unit radius and unit diameter box
-    
+
     // can use this to accumulate points into a box
-    void setInvalid() { min = (float3)FLT_MAX; max = -(float3)FLT_MAX; }
+    void setInvalid()
+    {
+        min = (float3)FLT_MAX;
+        max = -(float3)FLT_MAX;
+    }
     bool isInvalid() const { return any(min > max); }
-    
-    void unionWith(float3 v) {
+
+    void unionWith(float3 v)
+    {
         min = SIMD_NAMESPACE::min(min, v);
         max = SIMD_NAMESPACE::max(max, v);
     }
-    void unionWith(bbox b) {
+    void unionWith(bbox b)
+    {
         min = SIMD_NAMESPACE::min(min, b.min);
         max = SIMD_NAMESPACE::max(max, b.max);
     }
-    
+
     // TODO: call to intersect or combine bbox
-    
+
     float3 center() const { return 0.5f * (min + max); }
     float3 dimensions() const { return max - min; }
-    
+
     float width() const { return dimensions().x; }
     float height() const { return dimensions().y; }
     float depth() const { return dimensions().z; }
-    
+
     float diameter() const { return length(dimensions()); }
     float radius() const { return 0.5f * diameter(); }
     float radiusSquared() const { return length_squared(dimensions() * 0.5f); }
-    
-    void scale(float3 s) { min *= s; max *= s; }
-    void offset(float3 o) { min += o; max += o; }
-    
+
+    void scale(float3 s)
+    {
+        min *= s;
+        max *= s;
+    }
+    void offset(float3 o)
+    {
+        min += o;
+        max += o;
+    }
+
     // after transforms (f.e. rotate, min/max can swap)
-    void fix() {
+    void fix()
+    {
         // don't call this on invalid box, or it will be huge
         float3 tmp = SIMD_NAMESPACE::max(min, max);
         min = SIMD_NAMESPACE::min(min, max);
         max = tmp;
     }
-    
+
 public:
     float3 min;
     float3 max;
@@ -62,13 +77,13 @@ struct bbox {
 struct bsphere {
     bsphere() {} // nothing
     bsphere(float3 center, float radius) : centerRadius(float4m(center, radius)) {}
-    
+
     // TODO: add a unit radius and unit diameter
-    
+
     float3 center() const { return centerRadius.xyz; }
     float radius() const { return centerRadius.w; }
     float radiusSquared() const { return centerRadius.w * centerRadius.w; }
-    
+
 public:
     float4 centerRadius;
 };
@@ -77,71 +92,75 @@ struct bsphere {
 // This only tests 5 or 6 planes.
 struct culler {
     culler();
-    
+
     void update(const float4x4& projView);
-    
+
     // can use the helper types instead
     bool cullSphere(float4 sphere) const;
     bool cullBox(float3 min, float3 max) const;
-    
-    bool cullBox(const bbox& box) const {
+
+    bool cullBox(const bbox& box) const
+    {
         return cullBox(box.min, box.max);
     }
-    bool cullSphere(const bsphere& sphere) const {
+    bool cullSphere(const bsphere& sphere) const
+    {
         return cullSphere(sphere.centerRadius);
     }
-    
+
     // Caller must zero the results array, and visible state sets only low bit
     // These store a bit (with shift) in the results array.
     // If high bit is set in results, then test is skipped.
     void cullBoxes(const float3* boxes, int count, uint8_t shift, uint8_t* results) const;
     void cullSpheres(const float4* sphere, int count, uint8_t shift, uint8_t* results) const;
-    
-    void cullBoxes(const bbox* boxes, int count, uint8_t shift, uint8_t* results) const {
+
+    void cullBoxes(const bbox* boxes, int count, uint8_t shift, uint8_t* results) const
+    {
         cullBoxes((const float3*)boxes, count, shift, results);
     }
-    void cullSpheres(const bsphere* spheres, int count, uint8_t shift, uint8_t* results) const {
+    void cullSpheres(const bsphere* spheres, int count, uint8_t shift, uint8_t* results) const
+    {
         cullSpheres((const float4*)spheres, count, shift, results);
     }
-    
+
     // move these out?
     static bsphere transformSphereTRS(bsphere sphere, const float4x4& modelTfm);
     static bbox transformBoxTRS(bbox box, const float4x4& modelTfm);
-    
+
     // bbox corners
     void boxCorners(bbox box, float3 pt[8]) const;
     void boxCorners(bbox box, float4 pt[8]) const;
-   
+
     bool isCameraInBox(bbox box) const;
     bool isCameraOutsideBox(bbox box) const;
 
     // Camera corners in world space
-    const float3* cameraCorners() const {
+    const float3* cameraCorners() const
+    {
         return as_float3(_corners);
     }
     int cameraCornersCount() const { return 8; }
-    
+
     // Camera clip planes in world space
     const float4* cameraPlanes() const { return _planes; }
     int cameraPlanesCount() const { return _planesCount; }
-    
+
 private:
     // camera planes in world space
     float4 _planes[6];
-    
+
     // This won't work if SIMD_INT is not defined.
 #if SIMD_INT
     // cached tests of which planes are positive/negative
     int4 _selectionMasks[6];
 #endif
-    
+
     uint32_t _planesCount;
-    
+
     // 8 corners of camera volume
     float4 _corners[8];
 };
 
 } // namespace SIMD_NAMESPACE
 
-#endif //  USE_SIMDLIB && SIMD_FLOAT
-
+#endif // USE_SIMDLIB && SIMD_FLOAT
diff --git a/libkram/vectormath/double234.cpp b/libkram/vectormath/double234.cpp
index 6b8b002b..2bfb58f3 100644
--- a/libkram/vectormath/double234.cpp
+++ b/libkram/vectormath/double234.cpp
@@ -46,11 +46,11 @@ macroVectorRepeatFnImpl(double, atan, ::atan)
 
 #endif // SIMD_CMATH_MATH
 
-// clang-format on
+    // clang-format on
 
-//---------------------------
+    //---------------------------
 
-static const double2 kdouble2_posx = {1.0f, 0.0f};
+    static const double2 kdouble2_posx = {1.0f, 0.0f};
 static const double2 kdouble2_posy = kdouble2_posx.yx;
 
 static const double2 kdouble2_negx = {-1.0f, 0.0f};
@@ -109,50 +109,50 @@ static const double4x4 kdouble4x4_identity = diagonal_matrix((double4)1);
 
 //----
 
-const double2& double2_zero(){ return kdouble2_zero; }
-const double2& double2_ones(){ return kdouble2_ones; }
+const double2& double2_zero() { return kdouble2_zero; }
+const double2& double2_ones() { return kdouble2_ones; }
 
-const double2& double2_posx(){ return kdouble2_posx; }
-const double2& double2_posy(){ return kdouble2_posy; }
+const double2& double2_posx() { return kdouble2_posx; }
+const double2& double2_posy() { return kdouble2_posy; }
 
-const double2& double2_negx(){ return kdouble2_negx; }
-const double2& double2_negy(){ return kdouble2_negy; }
+const double2& double2_negx() { return kdouble2_negx; }
+const double2& double2_negy() { return kdouble2_negy; }
 
 //----
 
-const double3& double3_zero(){ return kdouble3_zero; }
-const double3& double3_ones(){ return kdouble3_ones; }
+const double3& double3_zero() { return kdouble3_zero; }
+const double3& double3_ones() { return kdouble3_ones; }
 
-const double3& double3_posx(){ return kdouble3_posx; }
-const double3& double3_posy(){ return kdouble3_posy; }
-const double3& double3_posz(){ return kdouble3_posz; }
+const double3& double3_posx() { return kdouble3_posx; }
+const double3& double3_posy() { return kdouble3_posy; }
+const double3& double3_posz() { return kdouble3_posz; }
 
-const double3& double3_negx(){ return kdouble3_negx; }
-const double3& double3_negy(){ return kdouble3_negy; }
-const double3& double3_negz(){ return kdouble3_negz; }
+const double3& double3_negx() { return kdouble3_negx; }
+const double3& double3_negy() { return kdouble3_negy; }
+const double3& double3_negz() { return kdouble3_negz; }
 
 //----
 
-const double4& double4_zero(){ return kdouble4_zero; }
-const double4& double4_ones(){ return kdouble4_ones; }
+const double4& double4_zero() { return kdouble4_zero; }
+const double4& double4_ones() { return kdouble4_ones; }
 
-const double4& double4_posx(){ return kdouble4_posx; }
-const double4& double4_posy(){ return kdouble4_posy; }
-const double4& double4_posz(){ return kdouble4_posz; }
-const double4& double4_posw(){ return kdouble4_posw; }
+const double4& double4_posx() { return kdouble4_posx; }
+const double4& double4_posy() { return kdouble4_posy; }
+const double4& double4_posz() { return kdouble4_posz; }
+const double4& double4_posw() { return kdouble4_posw; }
 
-const double4& double4_negx(){ return kdouble4_negx; }
-const double4& double4_negy(){ return kdouble4_negy; }
-const double4& double4_negz(){ return kdouble4_negz; }
-const double4& double4_negw(){ return kdouble4_negw; }
+const double4& double4_negx() { return kdouble4_negx; }
+const double4& double4_negy() { return kdouble4_negy; }
+const double4& double4_negz() { return kdouble4_negz; }
+const double4& double4_negw() { return kdouble4_negw; }
 
-const double4& double4_posxw(){ return kdouble4_posxw; }
-const double4& double4_posyw(){ return kdouble4_posyw; }
-const double4& double4_poszw(){ return kdouble4_poszw; }
+const double4& double4_posxw() { return kdouble4_posxw; }
+const double4& double4_posyw() { return kdouble4_posyw; }
+const double4& double4_poszw() { return kdouble4_poszw; }
 
-const double4& double4_negxw(){ return kdouble4_negxw; }
-const double4& double4_negyw(){ return kdouble4_negyw; }
-const double4& double4_negzw(){ return kdouble4_negzw; }
+const double4& double4_negxw() { return kdouble4_negxw; }
+const double4& double4_negyw() { return kdouble4_negyw; }
+const double4& double4_negzw() { return kdouble4_negzw; }
 
 //---------------------------
 
@@ -172,63 +172,66 @@ const double4x4& double4x4::identity() { return kdouble4x4_identity; }
 
 // These should not be used often.  So can stay buried
 double2x2::double2x2(double2 diag)
-: base((const base&)diagonal_matrix(diag)) { }
+    : base((const base&)diagonal_matrix(diag)) {}
 double3x3::double3x3(double3 diag)
-: base((const base&)diagonal_matrix(diag)) { }
+    : base((const base&)diagonal_matrix(diag)) {}
 double3x4::double3x4(double3 diag)
-: base((const base&)diagonal_matrix3x4(diag)) { }
+    : base((const base&)diagonal_matrix3x4(diag)) {}
 double4x4::double4x4(double4 diag)
-: base((const base&)diagonal_matrix(diag)) { }
+    : base((const base&)diagonal_matrix(diag)) {}
 
 //---------------------------
 
-double2x2 diagonal_matrix(double2 x) {
+double2x2 diagonal_matrix(double2 x)
+{
     double4 xx = zeroext(x);
     return double2x2(xx.xw, xx.wy);
 }
-double3x3 diagonal_matrix(double3 x) {
+double3x3 diagonal_matrix(double3 x)
+{
     double4 xx = zeroext(x);
     return double3x3(xx.xww, xx.wyw, xx.wwz);
 }
-double3x4 diagonal_matrix3x4(double3 x) {
+double3x4 diagonal_matrix3x4(double3 x)
+{
     double4 xx = zeroext(x);
     return double3x4(xx.xwww, xx.wyww, xx.wwzw);
 }
-double4x4 diagonal_matrix(double4 x) {
-    double4 xx = x; xx.w = 0.0f;
-    double4 ww = xx; ww.z = x.w;
+double4x4 diagonal_matrix(double4 x)
+{
+    double4 xx = x;
+    xx.w = 0.0f;
+    double4 ww = xx;
+    ww.z = x.w;
     return double4x4(xx.xwww, xx.wyww, xx.wwzw, ww.wwwz);
 }
 
 //---------------------------
 
-
-
 // textbook transpose
-double2x2 transpose(const double2x2& x) {
-    
-    
+double2x2 transpose(const double2x2& x)
+{
     // std::swap would seem faster here?
 #if SIMD_SSE
 #if SIMD_AVX2
     double4 x0, x1;
     x0.xy = x[0];
     x1.xy = x[1];
-    
+
     double4 r01 = _mm256_unpacklo_pd(x0, x1);
     return (double2x2){r01.lo, r01.hi};
 #else
     double2 x0, x1;
     x0.xy = x[0];
     x1.xy = x[1];
-    
+
     // super slow transpose
-    double2 r0 = { x0[0], x1[0] };
-    double2 r1 = { x0[1], x1[1] };
+    double2 r0 = {x0[0], x1[0]};
+    double2 r1 = {x0[1], x1[1]};
     return (double2x2){r0, r1};
 #endif
 #endif // SIMD_SSE
-    
+
 #if SIMD_NEON
     double2 r0 = vzip1q_f64(x[0], x[1]);
     double2 r1 = vzip2q_f64(x[0], x[1]);
@@ -236,18 +239,20 @@ double2x2 transpose(const double2x2& x) {
 #endif // SIMD_NEON
 }
 
-double3x3 transpose(const double3x3& x) {
+double3x3 transpose(const double3x3& x)
+{
     double4 x0, x1, x2;
     x0.xyz = x[0];
     x1.xyz = x[1];
     x2.xyz = x[2];
-    
+
 #if SIMD_SSE
 #if SIMD_AVX2 && 0
     double4 t0 = _mm256_unpacklo_pd(x0, x1);
     double4 t1 = _mm256_unpackhi_pd(x0, x1);
-    
-    double4 r0 = t0; r0.hi = x2.lo;
+
+    double4 r0 = t0;
+    r0.hi = x2.lo;
     // TODO: fix shuffle,  222 outside 15 range
     // looks like op was changed to 4-bit bitmask
     // lookup shuffle 4 values, and convert this
@@ -256,52 +261,53 @@ double3x3 transpose(const double3x3& x) {
     // #define _MM_SHUFFLE(fp3, fp2, fp1, fp0) \
         (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0)))
     // fp0 to fp3 = 2, 3, 1, 3
-    
+
     double4 r1 = _mm256_shuffle_pd(t0, x2, 0xde);
-    double4 r2 = x2; r2.lo = t1.lo;
+    double4 r2 = x2;
+    r2.lo = t1.lo;
 #else
     // super slow transpose
-    double3 r0 = { x0[0], x1[0], x2[0] };
-    double3 r1 = { x0[1], x1[1], x2[1] };
-    double3 r2 = { x0[2], x1[2], x2[2] };
+    double3 r0 = {x0[0], x1[0], x2[0]};
+    double3 r1 = {x0[1], x1[1], x2[1]};
+    double3 r2 = {x0[2], x1[2], x2[2]};
 #endif
 #endif // SIMD_SSE
-    
+
 #if SIMD_NEON
-    double2 padding = { 0 };
-    double4 r0,r1,r2;
-    r0.lo = vzip1q_f64(x0.lo,x1.lo);
-    r1.lo = vzip2q_f64(x0.lo,x1.lo);
-    r2.lo = vzip1q_f64(x0.hi,x1.hi);
-    r0.hi = vzip1q_f64(x2.lo,padding);
-    r1.hi = vzip2q_f64(x2.lo,padding);
-    r2.hi = vzip1q_f64(x2.hi,padding);
+    double2 padding = {0};
+    double4 r0, r1, r2;
+    r0.lo = vzip1q_f64(x0.lo, x1.lo);
+    r1.lo = vzip2q_f64(x0.lo, x1.lo);
+    r2.lo = vzip1q_f64(x0.hi, x1.hi);
+    r0.hi = vzip1q_f64(x2.lo, padding);
+    r1.hi = vzip2q_f64(x2.lo, padding);
+    r2.hi = vzip1q_f64(x2.hi, padding);
 #endif // SIMD_NEON
     return (double3x3){r0.xyz, r1.xyz, r2.xyz};
 }
 
-double4x4 transpose(const double4x4& x) {
-    
+double4x4 transpose(const double4x4& x)
+{
 #if SIMD_SSE
 #if SIMD_AVX2
-    
-     // NOTE: similar to _MM_TRANSPOSE4_PS using shuffles
-     // but old Neon didn't really have shuffle.
-
-     // using shuffles + permute
-     // unpack runs slower
-     double4 tmp0, tmp1, tmp2, tmp3;
-     tmp0 = _mm256_shuffle_pd(x[0], x[1], 0x0);
-     tmp2 = _mm256_shuffle_pd(x[0], x[1], 0xF);
-     tmp1 = _mm256_shuffle_pd(x[2], x[3], 0x0);
-     tmp3 = _mm256_shuffle_pd(x[2], x[3], 0xF);
- 
-     double4 r0, r1, r2, r3;
-     r0 = _mm256_permute2f128_pd(tmp0, tmp1, 0x20);
-     r1 = _mm256_permute2f128_pd(tmp2, tmp3, 0x20);
-     r2 = _mm256_permute2f128_pd(tmp0, tmp1, 0x31);
-     r3 = _mm256_permute2f128_pd(tmp2, tmp3, 0x31);
-        
+
+    // NOTE: similar to _MM_TRANSPOSE4_PS using shuffles
+    // but old Neon didn't really have shuffle.
+
+    // using shuffles + permute
+    // unpack runs slower
+    double4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = _mm256_shuffle_pd(x[0], x[1], 0x0);
+    tmp2 = _mm256_shuffle_pd(x[0], x[1], 0xF);
+    tmp1 = _mm256_shuffle_pd(x[2], x[3], 0x0);
+    tmp3 = _mm256_shuffle_pd(x[2], x[3], 0xF);
+
+    double4 r0, r1, r2, r3;
+    r0 = _mm256_permute2f128_pd(tmp0, tmp1, 0x20);
+    r1 = _mm256_permute2f128_pd(tmp2, tmp3, 0x20);
+    r2 = _mm256_permute2f128_pd(tmp0, tmp1, 0x31);
+    r3 = _mm256_permute2f128_pd(tmp2, tmp3, 0x31);
+
 #else
     // super slow transpose
     double4 x0, x1, x2, x3;
@@ -309,45 +315,47 @@ double4x4 transpose(const double4x4& x) {
     x1 = x[1];
     x2 = x[2];
     x3 = x[3];
-    
-    double4 r0 = { x0[0], x1[0], x2[0], x3[0] };
-    double4 r1 = { x0[1], x1[1], x2[1], x3[1] };
-    double4 r2 = { x0[2], x1[2], x2[2], x3[2] };
-    double4 r3 = { x0[3], x1[3], x2[3], x3[3] };
+
+    double4 r0 = {x0[0], x1[0], x2[0], x3[0]};
+    double4 r1 = {x0[1], x1[1], x2[1], x3[1]};
+    double4 r2 = {x0[2], x1[2], x2[2], x3[2]};
+    double4 r3 = {x0[3], x1[3], x2[3], x3[3]};
 #endif
 #endif // SIMD_SSE
-    
+
 #if SIMD_NEON
-    double4 r0,r1,r2,r3;
-    r0.lo = vzip1q_f64(x[0].lo,x[1].lo);
-    r1.lo = vzip2q_f64(x[0].lo,x[1].lo);
-    r2.lo = vzip1q_f64(x[0].hi,x[1].hi);
-    r3.lo = vzip2q_f64(x[0].hi,x[1].hi);
-    r0.hi = vzip1q_f64(x[2].lo,x[3].lo);
-    r1.hi = vzip2q_f64(x[2].lo,x[3].lo);
-    r2.hi = vzip1q_f64(x[2].hi,x[3].hi);
-    r3.hi = vzip2q_f64(x[2].hi,x[3].hi);
+    double4 r0, r1, r2, r3;
+    r0.lo = vzip1q_f64(x[0].lo, x[1].lo);
+    r1.lo = vzip2q_f64(x[0].lo, x[1].lo);
+    r2.lo = vzip1q_f64(x[0].hi, x[1].hi);
+    r3.lo = vzip2q_f64(x[0].hi, x[1].hi);
+    r0.hi = vzip1q_f64(x[2].lo, x[3].lo);
+    r1.hi = vzip2q_f64(x[2].lo, x[3].lo);
+    r2.hi = vzip1q_f64(x[2].hi, x[3].hi);
+    r3.hi = vzip2q_f64(x[2].hi, x[3].hi);
 #endif
-    return (double4x4){r0,r1,r2,r3};
+    return (double4x4){r0, r1, r2, r3};
 }
 
 // inverse
-double2x2 inverse(const double2x2& x) {
+double2x2 inverse(const double2x2& x)
+{
     double invDet = 1.0f / determinant(x);
     if (invDet == 0.0f) return kdouble2x2_zero;
-    
+
     double2x2 r = transpose(x);
     r[0] *= invDet;
     r[1] *= invDet;
     return r;
 }
 
-double3x3 inverse(const double3x3& x) {
+double3x3 inverse(const double3x3& x)
+{
     double invDet = 1.0f / determinant(x);
     if (invDet == 0.0f) return kdouble3x3_zero;
-    
+
     double3x3 r;
-    
+
     // this forms the adjoint
     r[0] = cross(x[1], x[2]) * invDet;
     r[1] = cross(x[2], x[0]) * invDet;
@@ -356,50 +364,52 @@ double3x3 inverse(const double3x3& x) {
 }
 
 // std::swap has warning on aligned data
-inline void swap(double4& a, double4& b) {
+inline void swap(double4& a, double4& b)
+{
     double4 temp = a;
     a = b;
     b = temp;
 }
 
-double4x4 inverse(const double4x4& x) {
+double4x4 inverse(const double4x4& x)
+{
     // This is a full gje inverse
-    
+
     double4x4 a(x), b(kdouble4x4_identity);
     bool inversionSucceeded = true;
-    
+
     // As a evolves from original mat into identity -
     // b evolves from identity into inverse(a)
     int cols = double4x4::col;
     int rows = double4x4::row;
-    
+
     // Loop over cols of a from left to right, eliminating above and below diag
-    for (int j=0; j<rows; j++) {
+    for (int j = 0; j < rows; j++) {
         // Find largest pivot in column j among rows j..2
-        int i1 = j;            // Row with largest pivot candidate
-        for (int i=j+1; i<cols; i++) {
-            if ( fabs(a[i][j]) > fabs(a[i1][j]) ) {
+        int i1 = j; // Row with largest pivot candidate
+        for (int i = j + 1; i < cols; i++) {
+            if (fabs(a[i][j]) > fabs(a[i1][j])) {
                 i1 = i;
             }
         }
-        
+
         // Swap rows i1 and j in a and b to put pivot on diagonal
         SIMD_NAMESPACE::swap(a[i1], a[j]);
         SIMD_NAMESPACE::swap(b[i1], b[j]);
-    
+
         // Scale row j to have a unit diagonal
         double s = a[j][j];
-        if ( s == 0.0 ) {
+        if (s == 0.0) {
             inversionSucceeded = false;
             break;
         }
-        
-        s = 1.0/s;
+
+        s = 1.0 / s;
         b[j] *= s;
         a[j] *= s;
-    
+
         // Eliminate off-diagonal elems in col j of a, doing identical ops to b
-        for (int i=0; i<cols; i++ ) {
+        for (int i = 0; i < cols; i++) {
             if (i != j) {
                 s = a[i][j];
                 b[i] -= b[j] * s;
@@ -407,15 +417,14 @@ double4x4 inverse(const double4x4& x) {
             }
         }
     }
-    
+
     if (!inversionSucceeded) {
         b = kdouble4x4_zero;
     }
-    
+
     return b;
 }
 
-
 // determinant
 // internal only ops
 // TODO: could just be macros
@@ -425,32 +434,38 @@ inline double4 rotate1(double4 x) { return x.yzwx; }
 inline double4 rotate2(double4 x) { return x.zwxy; }
 inline double4 rotate3(double4 x) { return x.wxyz; }
 
-double determinant(const double2x2& x) {
+double determinant(const double2x2& x)
+{
     return cross(x[0], x[1]);
 }
 
-double determinant(const double3x3& x) {
+double determinant(const double3x3& x)
+{
     return reduce_add(
-            x[0]*(rotate1(x[1])*rotate2(x[2]) - rotate2(x[1])*rotate1(x[2])));
+        x[0] * (rotate1(x[1]) * rotate2(x[2]) - rotate2(x[1]) * rotate1(x[2])));
 }
 
-double determinant(const double4x4& x) {
-    double4 codet = x[0]*(rotate1(x[1])*(rotate2(x[2])*rotate3(x[3])-rotate3(x[2])*rotate2(x[3])) +
-      rotate2(x[1])*(rotate3(x[2])*rotate1(x[3])-rotate1(x[2])*rotate3(x[3])) +
-      rotate3(x[1])*(rotate1(x[2])*rotate2(x[3])-rotate2(x[2])*rotate1(x[3])));
+double determinant(const double4x4& x)
+{
+    double4 codet = x[0] * (rotate1(x[1]) * (rotate2(x[2]) * rotate3(x[3]) - rotate3(x[2]) * rotate2(x[3])) +
+                            rotate2(x[1]) * (rotate3(x[2]) * rotate1(x[3]) - rotate1(x[2]) * rotate3(x[3])) +
+                            rotate3(x[1]) * (rotate1(x[2]) * rotate2(x[3]) - rotate2(x[2]) * rotate1(x[3])));
     return reduce_add(codet.even - codet.odd);
 }
 
 // trace
-double trace(const double2x2& x) {
+double trace(const double2x2& x)
+{
     return x[0].x + x[1].y;
 }
 
-double trace(const double3x3& x) {
+double trace(const double3x3& x)
+{
     return x[0].x + x[1].y + x[2].z;
 }
 
-double trace(const double4x4& x) {
+double trace(const double4x4& x)
+{
     return x[0].x + x[1].y + x[2].z + x[3].w;
 }
 
@@ -469,14 +484,16 @@ double trace(const double4x4& x) {
 // This is taking each scalar of y[0], hopfully this extends and stays in vec op
 
 // premul-transform has to do dots
-double2 mul(double2 y, const double2x2& x) {
+double2 mul(double2 y, const double2x2& x)
+{
     double2 r;
     r.x = dot(y, x[0]);
     r.y = dot(y, x[1]);
     return r;
 }
 
-double3 mul(double3 y, const double3x3& x) {
+double3 mul(double3 y, const double3x3& x)
+{
     double3 r;
     r.x = dot(y, x[0]);
     r.y = dot(y, x[1]);
@@ -484,7 +501,8 @@ double3 mul(double3 y, const double3x3& x) {
     return r;
 }
 
-double4 mul(double4 y, const double4x4& x) {
+double4 mul(double4 y, const double4x4& x)
+{
     double4 r;
     r.x = dot(y, x[0]);
     r.y = dot(y, x[1]);
@@ -493,41 +511,45 @@ double4 mul(double4 y, const double4x4& x) {
     return r;
 }
 
-
 // post-transform at least does a mul madd
-double2 mul(const double2x2& x, double2 y) {
+double2 mul(const double2x2& x, double2 y)
+{
     double2 r = x[0] * y[0]; // no mul(v,v)
-    r = muladd( x[1], y[1], r);
+    r = muladd(x[1], y[1], r);
     return r;
 }
 
-double3 mul(const double3x3& x, double3 y) {
+double3 mul(const double3x3& x, double3 y)
+{
     double3 r = x[0] * y[0];
-    r = muladd( x[1], y[1], r);
-    r = muladd( x[2], y[2], r);
+    r = muladd(x[1], y[1], r);
+    r = muladd(x[2], y[2], r);
     return r;
 }
 
-double4 mul(const double4x4& x, double4 y) {
+double4 mul(const double4x4& x, double4 y)
+{
     double4 r = x[0] * y[0];
-    r = muladd( x[1], y[1], r);
-    r = muladd( x[2], y[2], r);
-    r = muladd( x[3], y[3], r);
+    r = muladd(x[1], y[1], r);
+    r = muladd(x[2], y[2], r);
+    r = muladd(x[3], y[3], r);
     return r;
 }
 
 // matrix muls using mul madd
-double2x2 mul(const double2x2& x, const double2x2& y) {
+double2x2 mul(const double2x2& x, const double2x2& y)
+{
     double2x2 r;
-    
+
     // m * columns
     r[0] = mul(x, y[0]);
     r[1] = mul(x, y[1]);
-    
+
     return r;
 }
 
-double3x3 mul(const double3x3& x, const double3x3& y) {
+double3x3 mul(const double3x3& x, const double3x3& y)
+{
     double3x3 r;
     r[0] = mul(x, y[0]);
     r[1] = mul(x, y[1]);
@@ -535,7 +557,8 @@ double3x3 mul(const double3x3& x, const double3x3& y) {
     return r;
 }
 
-double4x4 mul(const double4x4& x, const double4x4& y) {
+double4x4 mul(const double4x4& x, const double4x4& y)
+{
     double4x4 r;
     r[0] = mul(x, y[0]);
     r[1] = mul(x, y[1]);
@@ -545,20 +568,23 @@ double4x4 mul(const double4x4& x, const double4x4& y) {
 }
 
 // sub
-double2x2 sub(const double2x2& x, const double2x2& y) {
+double2x2 sub(const double2x2& x, const double2x2& y)
+{
     double2x2 r(x);
     r[0] -= y[0];
     r[1] -= y[1];
     return r;
 }
-double3x3 sub(const double3x3& x, const double3x3& y) {
+double3x3 sub(const double3x3& x, const double3x3& y)
+{
     double3x3 r(x);
     r[0] -= y[0];
     r[1] -= y[1];
     r[2] -= y[2];
     return r;
 }
-double4x4 sub(const double4x4& x, const double4x4& y) {
+double4x4 sub(const double4x4& x, const double4x4& y)
+{
     double4x4 r(x);
     r[0] -= y[0];
     r[1] -= y[1];
@@ -568,20 +594,23 @@ double4x4 sub(const double4x4& x, const double4x4& y) {
 }
 
 // add
-double2x2 add(const double2x2& x, const double2x2& y) {
+double2x2 add(const double2x2& x, const double2x2& y)
+{
     double2x2 r(x);
     r[0] += y[0];
     r[1] += y[1];
     return r;
 }
-double3x3 add(const double3x3& x, const double3x3& y) {
+double3x3 add(const double3x3& x, const double3x3& y)
+{
     double3x3 r(x);
     r[0] += y[0];
     r[1] += y[1];
     r[2] += y[2];
     return r;
 }
-double4x4 add(const double4x4& x, const double4x4& y) {
+double4x4 add(const double4x4& x, const double4x4& y)
+{
     double4x4 r(x);
     r[0] += y[0];
     r[1] += y[1];
@@ -591,16 +620,19 @@ double4x4 add(const double4x4& x, const double4x4& y) {
 }
 
 // equal
-bool equal(const double2x2& x, const double2x2& y) {
+bool equal(const double2x2& x, const double2x2& y)
+{
     return all(x[0] == y[0] &
                x[1] == y[1]);
 }
-bool equal(const double3x3& x, const double3x3& y) {
+bool equal(const double3x3& x, const double3x3& y)
+{
     return all(x[0] == y[0] &
                x[1] == y[1] &
                x[2] == y[2]);
 }
-bool equal(const double4x4& x, const double4x4& y) {
+bool equal(const double4x4& x, const double4x4& y)
+{
     return all(x[0] == y[0] &
                x[1] == y[1] &
                x[2] == y[2] &
@@ -608,16 +640,19 @@ bool equal(const double4x4& x, const double4x4& y) {
 }
 
 // equal_abs
-bool equal_abs(const double2x2& x, const double2x2& y, double tol) {
+bool equal_abs(const double2x2& x, const double2x2& y, double tol)
+{
     return all((abs(x[0] - y[0]) <= tol) &
                (abs(x[1] - y[1]) <= tol));
 }
-bool equal_abs(const double3x3& x, const double3x3& y, double tol) {
+bool equal_abs(const double3x3& x, const double3x3& y, double tol)
+{
     return all((abs(x[0] - y[0]) <= tol) &
                (abs(x[1] - y[1]) <= tol) &
                (abs(x[2] - y[2]) <= tol));
 }
-bool equal_abs(const double4x4& x, const double4x4& y, double tol) {
+bool equal_abs(const double4x4& x, const double4x4& y, double tol)
+{
     return all((abs(x[0] - y[0]) <= tol) &
                (abs(x[1] - y[1]) <= tol) &
                (abs(x[2] - y[2]) <= tol) &
@@ -625,22 +660,24 @@ bool equal_abs(const double4x4& x, const double4x4& y, double tol) {
 }
 
 // equal_rel
-bool equal_rel(const double2x2& x, const double2x2& y, double tol) {
+bool equal_rel(const double2x2& x, const double2x2& y, double tol)
+{
     return all((abs(x[0] - y[0]) <= tol * abs(x[0])) &
                (abs(x[1] - y[1]) <= tol * abs(x[1])));
 }
-bool equal_rel(const double3x3& x, const double3x3& y, double tol) {
+bool equal_rel(const double3x3& x, const double3x3& y, double tol)
+{
     return all((abs(x[0] - y[0]) <= tol * abs(x[0])) &
                (abs(x[1] - y[1]) <= tol * abs(x[1])) &
                (abs(x[2] - y[2]) <= tol * abs(x[2])));
 }
-bool equal_rel(const double4x4& x, const double4x4& y, double tol) {
+bool equal_rel(const double4x4& x, const double4x4& y, double tol)
+{
     return all((abs(x[0] - y[0]) <= tol * abs(x[0])) &
                (abs(x[1] - y[1]) <= tol * abs(x[1])) &
                (abs(x[2] - y[2]) <= tol * abs(x[2])) &
                (abs(x[3] - y[3]) <= tol * abs(x[3])));
 }
 
-
 } // namespace SIMD_NAMESPACE
 #endif // SIMD_DOUBLE
diff --git a/libkram/vectormath/double234.h b/libkram/vectormath/double234.h
index aa727ed8..ffde9200 100644
--- a/libkram/vectormath/double234.h
+++ b/libkram/vectormath/double234.h
@@ -37,134 +37,183 @@ namespace SIMD_NAMESPACE {
 
 macroVector8TypesStorageRenames(double, double)
 
-// clang-format on
+    // clang-format on
 
-SIMD_CALL double2 double2m(double x) {
+    SIMD_CALL double2 double2m(double x)
+{
     return x;
 }
-SIMD_CALL double2 double2m(double x, double y) {
-    return {x,y};
+SIMD_CALL double2 double2m(double x, double y)
+{
+    return {x, y};
 }
 
-SIMD_CALL double3 double3m(double x) {
+SIMD_CALL double3 double3m(double x)
+{
     return x;
 }
-SIMD_CALL double3 double3m(double x, double y, double z) {
-    return {x,y,z};
+SIMD_CALL double3 double3m(double x, double y, double z)
+{
+    return {x, y, z};
 }
-SIMD_CALL double3 double3m(double2 v, double z) {
-    double3 r; r.xy = v; r.z = z; return r;
+SIMD_CALL double3 double3m(double2 v, double z)
+{
+    double3 r;
+    r.xy = v;
+    r.z = z;
+    return r;
 }
 
-SIMD_CALL double4 double4m(double x) {
+SIMD_CALL double4 double4m(double x)
+{
     return x;
 }
-SIMD_CALL double4 double4m(double2 xy, double2 zw) {
-    double4 r; r.xy = xy; r.zw = zw; return r;
+SIMD_CALL double4 double4m(double2 xy, double2 zw)
+{
+    double4 r;
+    r.xy = xy;
+    r.zw = zw;
+    return r;
 }
-SIMD_CALL double4 double4m(double x, double y, double z, double w = 1.0) {
-    return {x,y,z,w};
+SIMD_CALL double4 double4m(double x, double y, double z, double w = 1.0)
+{
+    return {x, y, z, w};
 }
-SIMD_CALL double4 double4m(double3 v, double w = 1.0) {
-    double4 r; r.xyz = v; r.w = w; return r;
+SIMD_CALL double4 double4m(double3 v, double w = 1.0)
+{
+    double4 r;
+    r.xyz = v;
+    r.w = w;
+    return r;
 }
 
 //-----------------------------------
 // start of implementation
 
 // zeroext - internal helper
-SIMD_CALL double4 zeroext(double2 x) { double4 v = 0; v.xy = x; return v;}
-SIMD_CALL double4 zeroext(double3 x) { double4 v = 0; v.xyz = x; return v; }
+SIMD_CALL double4 zeroext(double2 x)
+{
+    double4 v = 0;
+    v.xy = x;
+    return v;
+}
+SIMD_CALL double4 zeroext(double3 x)
+{
+    double4 v = 0;
+    v.xyz = x;
+    return v;
+}
 
 #if SIMD_NEON
 
 // TODO: expose double2 ops on Neon.
 // think I have to, so that 4 can call 2x2 with hi/lo
 
-SIMD_CALL double reduce_min(double2 x) {
+SIMD_CALL double reduce_min(double2 x)
+{
     return vminvq_f64(x);
 }
-SIMD_CALL double reduce_min(double4 x) {
-    return fmin(reduce_min(x.lo),reduce_min(x.hi));
+SIMD_CALL double reduce_min(double4 x)
+{
+    return fmin(reduce_min(x.lo), reduce_min(x.hi));
 }
 
-SIMD_CALL double reduce_max(double2 x) {
+SIMD_CALL double reduce_max(double2 x)
+{
     return vmaxvq_f64(x);
 }
-SIMD_CALL double reduce_max(double4 x) {
-    return fmax(reduce_max(x.lo),reduce_max(x.hi));
+SIMD_CALL double reduce_max(double4 x)
+{
+    return fmax(reduce_max(x.lo), reduce_max(x.hi));
 }
 
-SIMD_CALL double2 min(double2 x, double2 y) {
+SIMD_CALL double2 min(double2 x, double2 y)
+{
     // precise returns x on Nan
     return vminnmq_f64(x, y);
 }
-SIMD_CALL double4 min(double4 x, double4 y) {
+SIMD_CALL double4 min(double4 x, double4 y)
+{
     // precise returns x on Nan
-    return double4m(min(x.lo,y.lo), min(x.hi,y.hi));
+    return double4m(min(x.lo, y.lo), min(x.hi, y.hi));
 }
 
-SIMD_CALL double2 max(double2 x, double2 y) {
+SIMD_CALL double2 max(double2 x, double2 y)
+{
     // precise returns x on Nan
     return vmaxnmq_f64(x, y);
 }
-SIMD_CALL double4 max(double4 x, double4 y) {
+SIMD_CALL double4 max(double4 x, double4 y)
+{
     // precise returns x on Nan
-    return double4m(max(x.lo,y.lo), max(x.hi,y.hi));
+    return double4m(max(x.lo, y.lo), max(x.hi, y.hi));
 }
 
-SIMD_CALL double2 muladd(double2 x, double2 y, double2 t) {
+SIMD_CALL double2 muladd(double2 x, double2 y, double2 t)
+{
     // requires __ARM_VFPV4__
     // t passed first unlike sse
-    return vfmaq_f64(t, x,y);
+    return vfmaq_f64(t, x, y);
 }
-SIMD_CALL double4 muladd(double4 x, double4 y, double4 t) {
-    return double4m(muladd(x.lo,y.lo,t.lo), muladd(x.hi,y.hi,t.hi));
+SIMD_CALL double4 muladd(double4 x, double4 y, double4 t)
+{
+    return double4m(muladd(x.lo, y.lo, t.lo), muladd(x.hi, y.hi, t.hi));
 }
 
-SIMD_CALL double2 sqrt(double2 x) {
+SIMD_CALL double2 sqrt(double2 x)
+{
     return vsqrtq_f64(x);
 }
-SIMD_CALL double4 sqrt(double4 x) {
+SIMD_CALL double4 sqrt(double4 x)
+{
     return double4m(sqrt(x.lo), sqrt(x.hi));
 }
 
-SIMD_CALL double2 reduce_addv(double2 x) {
+SIMD_CALL double2 reduce_addv(double2 x)
+{
     // 4:1 reduction
     x = vpaddq_f64(x, x);
     return x.x;
 }
-SIMD_CALL double4 reduce_addv(double4 x) {
+SIMD_CALL double4 reduce_addv(double4 x)
+{
     // 4:1 reduction
     x.lo = vpaddq_f64(x.lo, x.lo);
     x.hi = vpaddq_f64(x.hi, x.hi);
     x.lo = vpaddq_f64(x.lo, x.hi);
     return x.x; // repeat x to all values
 }
-SIMD_CALL double3 reduce_addv(double3 x) {
+SIMD_CALL double3 reduce_addv(double3 x)
+{
     return reduce_addv(zeroext(x)).x;
 }
 
 // round to nearest | exc
-SIMD_CALL double2 round(double2 vv) {
+SIMD_CALL double2 round(double2 vv)
+{
     return vrndnq_f64(vv);
 }
-SIMD_CALL double4 round(double4 vv) {
-    return double4m(round(vv.lo),round(vv.hi));
+SIMD_CALL double4 round(double4 vv)
+{
+    return double4m(round(vv.lo), round(vv.hi));
 }
 
-SIMD_CALL double2 ceil(double2 vv) {
+SIMD_CALL double2 ceil(double2 vv)
+{
     return vrndpq_f64(vv);
 }
-SIMD_CALL double4 ceil(double4 vv) {
-    return double4m(ceil(vv.lo),ceil(vv.hi));
+SIMD_CALL double4 ceil(double4 vv)
+{
+    return double4m(ceil(vv.lo), ceil(vv.hi));
 }
 
-SIMD_CALL double2 floor(double2 vv) {
+SIMD_CALL double2 floor(double2 vv)
+{
     return vrndmq_f64(vv);
 }
-SIMD_CALL double4 floor(double4 vv) {
-    return double4m(floor(vv.lo),floor(vv.hi));
+SIMD_CALL double4 floor(double4 vv)
+{
+    return double4m(floor(vv.lo), floor(vv.hi));
 }
 
 #endif // SIMD_NEON
@@ -174,178 +223,215 @@ SIMD_CALL double4 floor(double4 vv) {
 #if SIMD_SSE
 
 // x64 doesn't seem to have a simd op for min/max reduce
-SIMD_CALL double reduce_min(double2 x) {
-    return fmin(x.x,x.y);
+SIMD_CALL double reduce_min(double2 x)
+{
+    return fmin(x.x, x.y);
 }
-SIMD_CALL double reduce_min(double4 x) {
-    return fmin(fmin(x.x,x.y), fmin(x.z,x.w));
+SIMD_CALL double reduce_min(double4 x)
+{
+    return fmin(fmin(x.x, x.y), fmin(x.z, x.w));
 }
 
-SIMD_CALL double reduce_max(double2 x) {
-    return fmax(x.x,x.y);
+SIMD_CALL double reduce_max(double2 x)
+{
+    return fmax(x.x, x.y);
 }
-SIMD_CALL double reduce_max(double4 x) {
-    return fmax(fmax(x.x,x.y), fmax(x.z,x.w));
+SIMD_CALL double reduce_max(double4 x)
+{
+    return fmax(fmax(x.x, x.y), fmax(x.z, x.w));
 }
 
 // needs SIMD_LONG
 // needed for precise min/max calls below
 #if SIMD_LONG
-SIMD_CALL double2 bitselect_forminmax(double2 x, double2 y, long2 mask) {
+SIMD_CALL double2 bitselect_forminmax(double2 x, double2 y, long2 mask)
+{
     return (double2)(((long2)x & ~mask) | ((long2)y & mask));
 }
 // may only want to use this on AVX2
-SIMD_CALL double4 bitselect_forminmax(double4 x, double4 y, long4 mask) {
+SIMD_CALL double4 bitselect_forminmax(double4 x, double4 y, long4 mask)
+{
     return (double4)(((long4)x & ~mask) | ((long4)y & mask));
 }
 #endif // SIMD_LONG
 
 // precise returns x on Nan
-SIMD_CALL double2 min(double2 x, double2 y) {
+SIMD_CALL double2 min(double2 x, double2 y)
+{
     return bitselect_forminmax(_mm_min_pd(x, y), x, y != y);
 }
-SIMD_CALL double2 max(double2 x, double2 y) {
+SIMD_CALL double2 max(double2 x, double2 y)
+{
     return bitselect_forminmax(_mm_max_pd(x, y), x, y != y);
 }
-SIMD_CALL double2 muladd(double2 x, double2 y, double2 t) {
+SIMD_CALL double2 muladd(double2 x, double2 y, double2 t)
+{
 #ifdef __FMA__
-    return _mm_fmadd_pd(x,y,t);
+    return _mm_fmadd_pd(x, y, t);
 #else
     // fallback with not same characteristics
     return x * y + t;
 #endif
 }
 
-SIMD_CALL double2 sqrt(double2 x) {
+SIMD_CALL double2 sqrt(double2 x)
+{
     return _mm_sqrt_pd(x);
 }
-SIMD_CALL double2 reduce_addv(double2 x) {
+SIMD_CALL double2 reduce_addv(double2 x)
+{
     x = _mm_hadd_pd(x, x);
     return x.x;
 }
-SIMD_CALL double2 round(double2 x) {
+SIMD_CALL double2 round(double2 x)
+{
     return _mm_round_pd(x, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
-SIMD_CALL double2 ceil(double2 x) {
+SIMD_CALL double2 ceil(double2 x)
+{
     return _mm_ceil_pd(x);
 }
-SIMD_CALL double2 floor(double2 x) {
+SIMD_CALL double2 floor(double2 x)
+{
     return _mm_floor_pd(x);
 }
 
 // now avx/avx2 can do 4 doubles in one call
 #if SIMD_AVX2
 
-SIMD_CALL double4 min(double4 x, double4 y) {
+SIMD_CALL double4 min(double4 x, double4 y)
+{
     return bitselect_forminmax(_mm256_min_pd(x, y), x, y != y);
 }
-SIMD_CALL double4 max(double4 x, double4 y) {
+SIMD_CALL double4 max(double4 x, double4 y)
+{
     return bitselect_forminmax(_mm256_max_pd(x, y), x, y != y);
 }
-SIMD_CALL double4 muladd(double4 x, double4 y, double4 t) {
+SIMD_CALL double4 muladd(double4 x, double4 y, double4 t)
+{
 #ifdef __FMA__
-    return _mm256_fmadd_pd(x,y,t);
+    return _mm256_fmadd_pd(x, y, t);
 #else
     // fallback with not same characteristics
     return x * y + t;
 #endif
 }
 
-SIMD_CALL double4 sqrt(double4 x) {
+SIMD_CALL double4 sqrt(double4 x)
+{
     return _mm256_sqrt_pd(x);
 }
 
-SIMD_CALL double4 reduce_addv(double4 x) {
+SIMD_CALL double4 reduce_addv(double4 x)
+{
     x = _mm256_hadd_ps(x, x); // xy = x+y,z+w
     x = _mm256_hadd_ps(x, x); // x  = x+y
     return x.x; // repeat x to all values
 }
-SIMD_CALL double3 reduce_addv(double3 x) {
+SIMD_CALL double3 reduce_addv(double3 x)
+{
     return reduce_addv(zeroext(x)).x;
 }
 
-SIMD_CALL double4 round(double4 vv) {
+SIMD_CALL double4 round(double4 vv)
+{
     return _mm256_round_pd(vv, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
-SIMD_CALL double4 ceil(double4 vv) {
+SIMD_CALL double4 ceil(double4 vv)
+{
     return _mm256_ceil_pd(vv);
 }
-SIMD_CALL double4 floor(double4 vv) {
+SIMD_CALL double4 floor(double4 vv)
+{
     return _mm256_floor_pd(vv);
 }
 
 #else
 
 // SSE4 ops as a fallback.  These have to make 2+ calls.
-SIMD_CALL double4 min(double4 x, double4 y) {
-    return double4m(min(x.lo,y.lo), min(x.hi,y.hi));
+SIMD_CALL double4 min(double4 x, double4 y)
+{
+    return double4m(min(x.lo, y.lo), min(x.hi, y.hi));
 }
-SIMD_CALL double4 max(double4 x, double4 y) {
-    return double4m(max(x.lo,y.lo), max(x.hi,y.hi));
+SIMD_CALL double4 max(double4 x, double4 y)
+{
+    return double4m(max(x.lo, y.lo), max(x.hi, y.hi));
 }
-SIMD_CALL double4 muladd(double4 x, double4 y, double4 t) {
+SIMD_CALL double4 muladd(double4 x, double4 y, double4 t)
+{
 #ifdef __FMA__
-    return double4m(muladd(x.lo,y.lo,t.lo),
-                    muladd(x.hi,y.hi,t.hi));
+    return double4m(muladd(x.lo, y.lo, t.lo),
+                    muladd(x.hi, y.hi, t.hi));
 #else
     // fallback with not same characteristics
     return x * y + t;
 #endif
 }
 
-SIMD_CALL double4 sqrt(double4 x) {
+SIMD_CALL double4 sqrt(double4 x)
+{
     return double4m(sqrt(x.lo), sqrt(x.hi));
 }
 
-SIMD_CALL double4 reduce_addv(double4 x) {
+SIMD_CALL double4 reduce_addv(double4 x)
+{
     // 4:1 reduction
     x.lo = _mm_hadd_pd(x.lo, x.lo);
     x.hi = _mm_hadd_pd(x.hi, x.hi);
     x.lo = _mm_hadd_pd(x.lo, x.hi);
     return x.x; // repeat x to all values
 }
-SIMD_CALL double3 reduce_addv(double3 x) {
+SIMD_CALL double3 reduce_addv(double3 x)
+{
     return reduce_addv(zeroext(x)).x;
 }
 
-SIMD_CALL double4 round(double4 vv) {
+SIMD_CALL double4 round(double4 vv)
+{
     return double4m(round(vv.lo), round(vv.hi));
 }
-SIMD_CALL double4 ceil(double4 vv) {
-    return double4m(ceil(vv.lo),ceil(vv.hi));
+SIMD_CALL double4 ceil(double4 vv)
+{
+    return double4m(ceil(vv.lo), ceil(vv.hi));
 }
-SIMD_CALL double4 floor(double4 vv) {
-    return double4m(floor(vv.lo),floor(vv.hi));
+SIMD_CALL double4 floor(double4 vv)
+{
+    return double4m(floor(vv.lo), floor(vv.hi));
 }
 
 #endif
 #endif // SIMD_SSE
 
-
 // end of implementation
 //-----------------------------------
 
 #if SIMD_LONG
 
 // bitselect
-SIMD_CALL double2 bitselect(double2 x, double2 y, long2 mask) {
+SIMD_CALL double2 bitselect(double2 x, double2 y, long2 mask)
+{
     return (double2)bitselect((long2)x, (long2)y, mask);
 }
-SIMD_CALL double3 bitselect(double3 x, double3 y, long3 mask) {
+SIMD_CALL double3 bitselect(double3 x, double3 y, long3 mask)
+{
     return (double3)bitselect((long3)x, (long3)y, mask);
 }
-SIMD_CALL double4 bitselect(double4 x, double4 y, long4 mask) {
+SIMD_CALL double4 bitselect(double4 x, double4 y, long4 mask)
+{
     return (double4)bitselect((long4)x, (long4)y, mask);
 }
 
 // select
-SIMD_CALL double2 select(double2 x, double2 y, long2 mask) {
+SIMD_CALL double2 select(double2 x, double2 y, long2 mask)
+{
     return bitselect(x, y, mask >> 63);
 }
-SIMD_CALL double3 select(double3 x, double3 y, long3 mask) {
+SIMD_CALL double3 select(double3 x, double3 y, long3 mask)
+{
     return bitselect(x, y, mask >> 63);
 }
-SIMD_CALL double4 select(double4 x, double4 y, long4 mask) {
+SIMD_CALL double4 select(double4 x, double4 y, long4 mask)
+{
     return bitselect(x, y, mask >> 63);
 }
 
@@ -362,13 +448,13 @@ SIMD_CALL double3 round(double3 x) { return vec4to3(round(vec3to4(x))); }
 SIMD_CALL double3 ceil(double3 x) { return vec4to3(ceil(vec3to4(x))); }
 SIMD_CALL double3 floor(double3 x) { return vec4to3(floor(vec3to4(x))); }
 
-SIMD_CALL double4 rsqrt(double4 x) { return 1.0/sqrt(x); }
-SIMD_CALL double2 rsqrt(double2 x) { return 1.0/sqrt(x); }
-SIMD_CALL double3 rsqrt(double3 x) { return 1.0/sqrt(x); }
+SIMD_CALL double4 rsqrt(double4 x) { return 1.0 / sqrt(x); }
+SIMD_CALL double2 rsqrt(double2 x) { return 1.0 / sqrt(x); }
+SIMD_CALL double3 rsqrt(double3 x) { return 1.0 / sqrt(x); }
 
-SIMD_CALL double2 recip(double2 x) { return 1.0/x; }
-SIMD_CALL double3 recip(double3 x) { return 1.0/x; }
-SIMD_CALL double4 recip(double4 x) { return 1.0/x; }
+SIMD_CALL double2 recip(double2 x) { return 1.0 / x; }
+SIMD_CALL double3 recip(double3 x) { return 1.0 / x; }
+SIMD_CALL double4 recip(double4 x) { return 1.0 / x; }
 
 SIMD_CALL double reduce_add(double2 x) { return reduce_addv(x).x; }
 SIMD_CALL double reduce_add(double3 x) { return reduce_addv(x).x; }
@@ -376,13 +462,16 @@ SIMD_CALL double reduce_add(double4 x) { return reduce_addv(x).x; }
 
 // clamp
 // order matters here for Nan, left op returned on precise min/max
-SIMD_CALL double2 clamp(double2 x, double2 minv, double2 maxv) {
+SIMD_CALL double2 clamp(double2 x, double2 minv, double2 maxv)
+{
     return min(maxv, max(minv, x));
 }
-SIMD_CALL double3 clamp(double3 x, double3 minv, double3 maxv) {
+SIMD_CALL double3 clamp(double3 x, double3 minv, double3 maxv)
+{
     return min(maxv, max(minv, x));
 }
-SIMD_CALL double4 clamp(double4 x, double4 minv, double4 maxv) {
+SIMD_CALL double4 clamp(double4 x, double4 minv, double4 maxv)
+{
     return min(maxv, max(minv, x));
 }
 
@@ -392,9 +481,9 @@ SIMD_CALL double3 saturate(double3 x) { return clamp(x, 0, (double3)1); }
 SIMD_CALL double4 saturate(double4 x) { return clamp(x, 0, (double4)1); }
 
 // lerp - another easy one
-SIMD_CALL double2 lerp(double2 x, double2 y, double2 t) { return x + t*(y - x); }
-SIMD_CALL double3 lerp(double3 x, double3 y, double3 t) { return x + t*(y - x); }
-SIMD_CALL double4 lerp(double4 x, double4 y, double4 t) { return x + t*(y - x); }
+SIMD_CALL double2 lerp(double2 x, double2 y, double2 t) { return x + t * (y - x); }
+SIMD_CALL double3 lerp(double3 x, double3 y, double3 t) { return x + t * (y - x); }
+SIMD_CALL double4 lerp(double4 x, double4 y, double4 t) { return x + t * (y - x); }
 
 // dot
 SIMD_CALL double dot(double2 x, double2 y) { return reduce_add(x * y); }
@@ -425,121 +514,151 @@ SIMD_CALL double2 normalize(double2 x) { return x / sqrt(reduce_addv(x * x)).x;
 SIMD_CALL double3 normalize(double3 x) { return x / sqrt(reduce_addv(x * x)).x; }
 
 // abs
-SIMD_CALL double2 abs(double2 x) {
+SIMD_CALL double2 abs(double2 x)
+{
     return bitselect(0.0, x, 0x7fffffffffffffff);
 }
-SIMD_CALL double3 abs(double3 x) {
+SIMD_CALL double3 abs(double3 x)
+{
     return bitselect(0.0, x, 0x7fffffffffffffff);
 }
-SIMD_CALL double4 abs(double4 x) {
+SIMD_CALL double4 abs(double4 x)
+{
     return bitselect(0.0, x, 0x7fffffffffffffff);
 }
 
 // cross
-SIMD_CALL double cross(double2 x, double2 y) {
+SIMD_CALL double cross(double2 x, double2 y)
+{
     return x.x * y.y - x.y * y.x;
 }
-SIMD_CALL double3 cross(double3 x, double3 y) {
+SIMD_CALL double3 cross(double3 x, double3 y)
+{
     return x.yzx * y.zxy - x.zxy * y.yzx;
 }
 
 // equal
 // == and != return a int234 vector, so need these to match other vecs
-SIMD_CALL bool equal(double2 x, double2 y) {
+SIMD_CALL bool equal(double2 x, double2 y)
+{
     return all(x == y);
 }
-SIMD_CALL bool equal(double3 x, double3 y) {
+SIMD_CALL bool equal(double3 x, double3 y)
+{
     return all(x == y);
 }
-SIMD_CALL bool equal(double4 x, double4 y) {
+SIMD_CALL bool equal(double4 x, double4 y)
+{
     return all(x == y);
 }
 
 // equal_abs
-SIMD_CALL bool equal_abs(double2 x, double2 y, double tol) {
+SIMD_CALL bool equal_abs(double2 x, double2 y, double tol)
+{
     return all((abs(x - y) <= tol));
 }
-SIMD_CALL bool equal_abs(double3 x, double3 y, double tol) {
+SIMD_CALL bool equal_abs(double3 x, double3 y, double tol)
+{
     return all((abs(x - y) <= tol));
 }
-SIMD_CALL bool equal_abs(double4 x, double4 y, double tol) {
+SIMD_CALL bool equal_abs(double4 x, double4 y, double tol)
+{
     return all((abs(x - y) <= tol));
 }
 
 // equal_rel
-SIMD_CALL bool equal_rel(double2 x, double2 y, double tol) {
+SIMD_CALL bool equal_rel(double2 x, double2 y, double tol)
+{
     return all((abs(x - y) <= tol * ::abs(x.x)));
 }
-SIMD_CALL bool equal_rel(double3 x, double3 y, double tol) {
+SIMD_CALL bool equal_rel(double3 x, double3 y, double tol)
+{
     return all((abs(x - y) <= tol * ::abs(x.x)));
 }
-SIMD_CALL bool equal_rel(double4 x, double4 y, double tol) {
+SIMD_CALL bool equal_rel(double4 x, double4 y, double tol)
+{
     return all((abs(x - y) <= tol * ::abs(x.x)));
 }
 
 // step
-SIMD_CALL double2 step(double2 edge, double2 x) {
+SIMD_CALL double2 step(double2 edge, double2 x)
+{
     return bitselect((double2)1, 0, x < edge);
 }
-SIMD_CALL double3 step(double3 edge, double3 x) {
+SIMD_CALL double3 step(double3 edge, double3 x)
+{
     return bitselect((double3)1, 0, x < edge);
 }
-SIMD_CALL double4 step(double4 edge, double4 x) {
+SIMD_CALL double4 step(double4 edge, double4 x)
+{
     return bitselect((double4)1, 0, x < edge);
 }
 
 // smoothstep
-SIMD_CALL double2 smoothstep(double2 edge0, double2 edge1, double2 x) {
-    double2 t = saturate((x-edge0)/(edge0-edge1));
-    return t*t*(3 - 2*t);
+SIMD_CALL double2 smoothstep(double2 edge0, double2 edge1, double2 x)
+{
+    double2 t = saturate((x - edge0) / (edge0 - edge1));
+    return t * t * (3 - 2 * t);
 }
-SIMD_CALL double3 smoothstep(double3 edge0, double3 edge1, double3 x) {
-    double3 t = saturate((x-edge0)/(edge0-edge1));
-    return t*t*(3 - 2*t);
+SIMD_CALL double3 smoothstep(double3 edge0, double3 edge1, double3 x)
+{
+    double3 t = saturate((x - edge0) / (edge0 - edge1));
+    return t * t * (3 - 2 * t);
 }
-SIMD_CALL double4 smoothstep(double4 edge0, double4 edge1, double4 x) {
-    double4 t = saturate((x-edge0)/(edge0-edge1));
-    return t*t*(3 - 2*t);
+SIMD_CALL double4 smoothstep(double4 edge0, double4 edge1, double4 x)
+{
+    double4 t = saturate((x - edge0) / (edge0 - edge1));
+    return t * t * (3 - 2 * t);
 }
 
 // fract
-SIMD_CALL double2 fract(double2 x) {
+SIMD_CALL double2 fract(double2 x)
+{
     return min(x - floor(x), 0x1.fffffffffffffp-1);
 }
-SIMD_CALL double3 fract(double3 x) {
+SIMD_CALL double3 fract(double3 x)
+{
     return min(x - floor(x), 0x1.fffffffffffffp-1);
 }
-SIMD_CALL double4 fract(double4 x) {
+SIMD_CALL double4 fract(double4 x)
+{
     return min(x - floor(x), 0x1.fffffffffffffp-1);
 }
 
-SIMD_CALL bool is_nan(double2 x) {
+SIMD_CALL bool is_nan(double2 x)
+{
     return any(x != x);
 }
-SIMD_CALL bool is_nan(double3 x) {
+SIMD_CALL bool is_nan(double3 x)
+{
     return any(x != x);
 }
-SIMD_CALL bool is_nan(double4 x) {
+SIMD_CALL bool is_nan(double4 x)
+{
     return any(x != x);
 }
 
-SIMD_CALL double2 fix_nan(double2 x, double2 replace) {
+SIMD_CALL double2 fix_nan(double2 x, double2 replace)
+{
     return min(replace, x);
 }
-SIMD_CALL double3 fix_nan(double3 x, double3 replace) {
+SIMD_CALL double3 fix_nan(double3 x, double3 replace)
+{
     return min(replace, x);
 }
-SIMD_CALL double4 fix_nan(double4 x, double4 replace) {
+SIMD_CALL double4 fix_nan(double4 x, double4 replace)
+{
     return min(replace, x);
 }
 
-
 // fast conversions where possible
 // need non-const too
-SIMD_CALL const double3& as_double3(const double4& m) {
+SIMD_CALL const double3& as_double3(const double4& m)
+{
     return reinterpret_cast<const double3&>(m);
 }
-SIMD_CALL const double3* as_double3(const double4* m) {
+SIMD_CALL const double3* as_double3(const double4* m)
+{
     return reinterpret_cast<const double3*>(m);
 }
 
@@ -548,7 +667,6 @@ SIMD_CALL const double3* as_double3(const double4* m) {
 //    return reinterpret_cast<const double4&>(m);
 //}
 
-
 //-------------------
 // Functions
 
@@ -569,9 +687,12 @@ macroVectorRepeatFnDecl(double, atan)
 
 macroVectorRepeatFn2Decl(double, atan2)
 
-// clang-format on
+    // clang-format on
 
-SIMD_CALL double2 pow(double2 x, double2 y) { return exp(log(x) * y); }
+    SIMD_CALL double2 pow(double2 x, double2 y)
+{
+    return exp(log(x) * y);
+}
 SIMD_CALL double3 pow(double3 x, double3 y) { return exp(log(x) * y); }
 SIMD_CALL double4 pow(double4 x, double4 y) { return exp(log(x) * y); }
 
@@ -626,95 +747,91 @@ const double4& double4_negzw();
 //-------------------
 // Matrix
 
-struct double2x2 : double2x2a
-{
+struct double2x2 : double2x2a {
     // can be split out to traits
     static constexpr int col = 2;
     static constexpr int row = 2;
     using column_t = double2;
     using scalar_t = double;
     using base = double2x2a;
-    
+
     static const double2x2& zero();
     static const double2x2& identity();
-    
-    double2x2() { }  // no default init
+
+    double2x2() {} // no default init
     explicit double2x2(double2 diag);
     double2x2(double2 c0, double2 c1)
-    : base((base){c0, c1}) { }
+        : base((base){c0, c1}) {}
     double2x2(const base& m)
-    : base(m) { }
-    
+        : base(m) {}
+
     // simd lacks these ops
     double2& operator[](int idx) { return columns[idx]; }
     const double2& operator[](int idx) const { return columns[idx]; }
 };
 
-struct double3x3 : double3x3a
-{
+struct double3x3 : double3x3a {
     static constexpr int col = 3;
     static constexpr int row = 3;
     using column_t = double3;
     using scalar_t = double;
     using base = double3x3a;
-    
+
     // Done as wordy c funcs otherwize.  Funcs allow statics to init.
     static const double3x3& zero();
     static const double3x3& identity();
-    
-    double3x3() { }  // no default init
+
+    double3x3() {} // no default init
     explicit double3x3(double3 diag);
     double3x3(double3 c0, double3 c1, double3 c2)
-    : base((base){c0, c1, c2}) { }
+        : base((base){c0, c1, c2}) {}
     double3x3(const base& m)
-    : base(m) { }
-    
+        : base(m) {}
+
     double3& operator[](int idx) { return columns[idx]; }
     const double3& operator[](int idx) const { return columns[idx]; }
 };
 
 // This is mostly a transposed holder for a 4x4, so very few ops defined
 // Can also serve as a SOA for some types of cpu math.
-struct double3x4 : double3x4a
-{
+struct double3x4 : double3x4a {
     static constexpr int col = 3;
     static constexpr int row = 4;
     using column_t = double4;
     using scalar_t = double;
     using base = double3x4a;
-    
+
     static const double3x4& zero();
     static const double3x4& identity();
-    
-    double3x4() { } // no default init
+
+    double3x4() {} // no default init
     explicit double3x4(double3 diag);
     double3x4(double4 c0, double4 c1, double4 c2)
-    : base((base){c0, c1, c2}) { }
+        : base((base){c0, c1, c2}) {}
     double3x4(const base& m)
-    : base(m) { }
-    
+        : base(m) {}
+
     double4& operator[](int idx) { return columns[idx]; }
     const double4& operator[](int idx) const { return columns[idx]; }
 };
 
-struct double4x4 : double4x4a
-{
+struct double4x4 : double4x4a {
     static constexpr int col = 4;
     static constexpr int row = 4;
     using column_t = double4;
     using scalar_t = double;
     using base = double4x4a;
-    
+
     static const double4x4& zero();
     static const double4x4& identity();
-    
-    double4x4() { } // no default init
+
+    double4x4() {} // no default init
     explicit double4x4(double4 diag);
     double4x4(double4 c0, double4 c1, double4 c2, double4 c3)
-    : base((base){c0, c1, c2, c3}) { }
+        : base((base){c0, c1, c2, c3}) {}
     double4x4(const base& m)
-    : base(m) { }
-    
+        : base(m) {}
+
     double4& operator[](int idx) { return columns[idx]; }
     const double4& operator[](int idx) const { return columns[idx]; }
 };
@@ -791,14 +908,16 @@ macroMatrixOps(double4x4);
 // clang-format on
 
 // fast conversions where possible
-SIMD_CALL const double3x3& as_double3x3(const double4x4& m) {
+SIMD_CALL const double3x3& as_double3x3(const double4x4& m)
+{
     return reinterpret_cast<const double3x3&>(m);
 }
-SIMD_CALL const double3x3* as_double3x3(const double4x4* m) {
+SIMD_CALL const double3x3* as_double3x3(const double4x4* m)
+{
     return reinterpret_cast<const double3x3*>(m);
 }
 
-} // SIMD_NAMESPACE
+} //namespace SIMD_NAMESPACE
 
 #endif
 
diff --git a/libkram/vectormath/float234.h b/libkram/vectormath/float234.h
index e32c4bbf..2b52b27e 100644
--- a/libkram/vectormath/float234.h
+++ b/libkram/vectormath/float234.h
@@ -38,17 +38,23 @@ namespace SIMD_NAMESPACE {
 
 // clang-format off
 macroVector4TypesStorageRenames(float, float)
-// clang-format on
+    // clang-format on
 
-//-----------------------------------
-// start of implementation
+    //-----------------------------------
+    // start of implementation
 
-// zeroext - internal helper
-SIMD_CALL float4 zeroext(float2 x) {
-    float4 v = 0; v.xy = x; return v;
+    // zeroext - internal helper
+    SIMD_CALL float4 zeroext(float2 x)
+{
+    float4 v = 0;
+    v.xy = x;
+    return v;
 }
-SIMD_CALL float4 zeroext(float3 x) {
-    float4 v = 0; v.xyz = x; return v;
+SIMD_CALL float4 zeroext(float3 x)
+{
+    float4 v = 0;
+    v.xyz = x;
+    return v;
 }
 
 #if SIMD_NEON
@@ -72,23 +78,26 @@ SIMD_CALL float4 max(float4 x, float4 y) { return vmaxnmq_f32(x, y); }
 
 // requires __ARM_VFPV4__
 // t passed first unlike sse
-SIMD_CALL float2 muladd(float2 x, float2 y, float2 t) { return vfma_f32(t, x,y); }
-SIMD_CALL float4 muladd(float4 x, float4 y, float4 t) { return vfmaq_f32(t, x,y); }
+SIMD_CALL float2 muladd(float2 x, float2 y, float2 t) { return vfma_f32(t, x, y); }
+SIMD_CALL float4 muladd(float4 x, float4 y, float4 t) { return vfmaq_f32(t, x, y); }
 
 SIMD_CALL float2 sqrt(float2 x) { return vsqrt_f32(x); }
 SIMD_CALL float4 sqrt(float4 x) { return vsqrtq_f32(x); }
 
-SIMD_CALL float2 reduce_addv(float2 x) {
+SIMD_CALL float2 reduce_addv(float2 x)
+{
     x = vpadd_f32(x, x);
     return x.x; // repeat x to all values
 }
-SIMD_CALL float4 reduce_addv(float4 x) {
+SIMD_CALL float4 reduce_addv(float4 x)
+{
     // 4:1 reduction
     x = vpaddq_f32(x, x); // xy = x+y,z+w
     x = vpaddq_f32(x, x); // x  = x+y
     return x.x; // repeat x to all values
 }
-SIMD_CALL float3 reduce_addv(float3 x) {
+SIMD_CALL float3 reduce_addv(float3 x)
+{
     return reduce_addv(zeroext(x)).x; // repeat
 }
 
@@ -107,104 +116,124 @@ SIMD_CALL float4 floor(float4 vv) { return vrndmq_f32(vv); }
 #if SIMD_SSE
 
 // x64 doesn't seem to have a simd op for min/max reduce
-SIMD_CALL float reduce_min(float4 x) {
-    return fmin(fmin(x.x,x.y), fmin(x.z,x.w));
+SIMD_CALL float reduce_min(float4 x)
+{
+    return fmin(fmin(x.x, x.y), fmin(x.z, x.w));
 }
-SIMD_CALL float reduce_min(float2 x) {
+SIMD_CALL float reduce_min(float2 x)
+{
     return reduce_min(vec2to4(x));
 }
 
-SIMD_CALL float reduce_max(float4 x) {
-    return fmax(fmax(x.x,x.y), fmax(x.z,x.w));
+SIMD_CALL float reduce_max(float4 x)
+{
+    return fmax(fmax(x.x, x.y), fmax(x.z, x.w));
 }
-SIMD_CALL float reduce_max(float2 x) {
+SIMD_CALL float reduce_max(float2 x)
+{
     return reduce_max(vec2to4(x));
 }
 
 // needs SIMD_INT
 // needed for precise min/max calls below
 #if SIMD_INT
-SIMD_CALL float4 bitselect_forminmax(float4 x, float4 y, int4 mask) {
+SIMD_CALL float4 bitselect_forminmax(float4 x, float4 y, int4 mask)
+{
     return (float4)(((int4)x & ~mask) | ((int4)y & mask));
 }
 #endif
 
-SIMD_CALL float4 min(float4 x, float4 y) {
+SIMD_CALL float4 min(float4 x, float4 y)
+{
     // precise returns x on Nan
     return bitselect_forminmax(_mm_min_ps(x, y), x, y != y);
 }
-SIMD_CALL float2 min(float2 x, float2 y) {
+SIMD_CALL float2 min(float2 x, float2 y)
+{
     return vec4to2(min(vec2to4(x), vec2to4(y)));
 }
 
-SIMD_CALL float4 max(float4 x, float4 y) {
+SIMD_CALL float4 max(float4 x, float4 y)
+{
     // precise returns x on Nan
     return bitselect_forminmax(_mm_max_ps(x, y), x, y != y);
 }
-SIMD_CALL float2 max(float2 x, float2 y) {
+SIMD_CALL float2 max(float2 x, float2 y)
+{
     return vec4to2(max(vec2to4(x), vec2to4(y)));
 }
 
-SIMD_CALL float4 muladd(float4 x, float4 y, float4 t) {
+SIMD_CALL float4 muladd(float4 x, float4 y, float4 t)
+{
     // can't get Xcode to set -mfma with AVX2 set
 #ifdef __FMA__
-    return _mm_fmadd_ps(x,y,t);
+    return _mm_fmadd_ps(x, y, t);
 #else
     // fallback with not same characteristics
     return x * y + t;
 #endif
 }
-SIMD_CALL float2 muladd(float2 x, float2 y, float2 t) {
+SIMD_CALL float2 muladd(float2 x, float2 y, float2 t)
+{
     return vec4to2(muladd(vec2to4(x), vec2to4(y), vec2to4(t)));
 }
 
-SIMD_CALL float4 sqrt(float4 x) {
+SIMD_CALL float4 sqrt(float4 x)
+{
     return _mm_sqrt_ps(x);
 }
-SIMD_CALL float2 sqrt(float2 x) {
+SIMD_CALL float2 sqrt(float2 x)
+{
     return vec4to2(sqrt(vec2to4(x)));
 }
 
-SIMD_CALL float4 reduce_addv(float4 x) {
+SIMD_CALL float4 reduce_addv(float4 x)
+{
     // 4:1 reduction
     x = _mm_hadd_ps(x, x); // xy = x+y,z+w
     x = _mm_hadd_ps(x, x); // x  = x+y
     return x.x; // repeat x to all values
 }
-SIMD_CALL float2 reduce_addv(float2 x) {
+SIMD_CALL float2 reduce_addv(float2 x)
+{
     return reduce_addv(zeroext(x)).x;
 }
-SIMD_CALL float3 reduce_addv(float3 x) {
+SIMD_CALL float3 reduce_addv(float3 x)
+{
     return reduce_addv(zeroext(x)).x;
 }
 
 // SSE4.1
-SIMD_CALL float4 round(float4 vv) {
+SIMD_CALL float4 round(float4 vv)
+{
     // round to nearest | exc
     return _mm_round_ps(vv, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
-SIMD_CALL float2 round(float2 x) {
+SIMD_CALL float2 round(float2 x)
+{
     return vec4to2(round(vec2to4(x)));
 }
 
-SIMD_CALL float4 ceil(float4 vv) {
+SIMD_CALL float4 ceil(float4 vv)
+{
     return _mm_ceil_ps(vv);
 }
-SIMD_CALL float2 ceil(float2 x) {
+SIMD_CALL float2 ceil(float2 x)
+{
     return vec4to2(ceil(vec2to4(x)));
 }
 
-SIMD_CALL float4 floor(float4 vv) {
+SIMD_CALL float4 floor(float4 vv)
+{
     return _mm_floor_ps(vv);
 }
-SIMD_CALL float2 floor(float2 x) {
+SIMD_CALL float2 floor(float2 x)
+{
     return vec4to2(floor(vec2to4(x)));
 }
 
 #endif // SIMD_INT && SIMD_SSE
 
-
-
 // end of implementation
 //-----------------------------------
 #if SIMD_INT
@@ -212,24 +241,30 @@ SIMD_CALL float2 floor(float2 x) {
 // bitselect
 // Hoping these casts float2 -> int2 don't truncate
 //  want this to map to _mm_cast calls
-SIMD_CALL float2 bitselect(float2 x, float2 y, int2 mask) {
+SIMD_CALL float2 bitselect(float2 x, float2 y, int2 mask)
+{
     return (float2)bitselect((int2)x, (int2)y, mask);
 }
-SIMD_CALL float3 bitselect(float3 x, float3 y, int3 mask) {
+SIMD_CALL float3 bitselect(float3 x, float3 y, int3 mask)
+{
     return (float3)bitselect((int3)x, (int3)y, mask);
 }
-SIMD_CALL float4 bitselect(float4 x, float4 y, int4 mask) {
+SIMD_CALL float4 bitselect(float4 x, float4 y, int4 mask)
+{
     return (float4)bitselect((int4)x, (int4)y, mask);
 }
 
 // select
-SIMD_CALL float2 select(float2 x, float2 y, int2 mask) {
+SIMD_CALL float2 select(float2 x, float2 y, int2 mask)
+{
     return bitselect(x, y, mask >> 31);
 }
-SIMD_CALL float3 select(float3 x, float3 y, int3 mask) {
+SIMD_CALL float3 select(float3 x, float3 y, int3 mask)
+{
     return bitselect(x, y, mask >> 31);
 }
-SIMD_CALL float4 select(float4 x, float4 y, int4 mask) {
+SIMD_CALL float4 select(float4 x, float4 y, int4 mask)
+{
     return bitselect(x, y, mask >> 31);
 }
 
@@ -250,14 +285,14 @@ SIMD_CALL float3 floor(float3 x) { return vec4to3(floor(vec3to4(x))); }
 SIMD_CALL float3 sqrt(float3 x) { return vec4to3(sqrt(vec3to4(x))); }
 
 // rsqrt
-SIMD_CALL float4 rsqrt(float4 x) { return 1.0f/sqrt(x); }
-SIMD_CALL float2 rsqrt(float2 x) { return 1.0f/sqrt(x); }
-SIMD_CALL float3 rsqrt(float3 x) { return 1.0f/sqrt(x); }
+SIMD_CALL float4 rsqrt(float4 x) { return 1.0f / sqrt(x); }
+SIMD_CALL float2 rsqrt(float2 x) { return 1.0f / sqrt(x); }
+SIMD_CALL float3 rsqrt(float3 x) { return 1.0f / sqrt(x); }
 
 // recip
-SIMD_CALL float4 recip(float4 x) { return 1.0f/x; }
-SIMD_CALL float2 recip(float2 x) { return 1.0f/x; }
-SIMD_CALL float3 recip(float3 x) { return 1.0f/x; }
+SIMD_CALL float4 recip(float4 x) { return 1.0f / x; }
+SIMD_CALL float2 recip(float2 x) { return 1.0f / x; }
+SIMD_CALL float3 recip(float3 x) { return 1.0f / x; }
 
 SIMD_CALL float reduce_add(float2 x) { return reduce_addv(x).x; }
 SIMD_CALL float reduce_add(float3 x) { return reduce_addv(x).x; }
@@ -265,13 +300,16 @@ SIMD_CALL float reduce_add(float4 x) { return reduce_addv(x).x; }
 
 // clamp
 // order matters here for Nan, left op returned on precise min/max
-SIMD_CALL float2 clamp(float2 x, float2 minv, float2 maxv) {
+SIMD_CALL float2 clamp(float2 x, float2 minv, float2 maxv)
+{
     return min(maxv, max(minv, x));
 }
-SIMD_CALL float3 clamp(float3 x, float3 minv, float3 maxv) {
+SIMD_CALL float3 clamp(float3 x, float3 minv, float3 maxv)
+{
     return min(maxv, max(minv, x));
 }
-SIMD_CALL float4 clamp(float4 x, float4 minv, float4 maxv) {
+SIMD_CALL float4 clamp(float4 x, float4 minv, float4 maxv)
+{
     return min(maxv, max(minv, x));
 }
 
@@ -281,14 +319,13 @@ SIMD_CALL float3 saturate(float3 x) { return clamp(x, 0, (float3)1); }
 SIMD_CALL float4 saturate(float4 x) { return clamp(x, 0, (float4)1); }
 
 // lerp - another easy one, could use muladd(t, y-x, x)
-SIMD_CALL float2 lerp(float2 x, float2 y, float2 t) { return x + t*(y - x); }
-SIMD_CALL float3 lerp(float3 x, float3 y, float3 t) { return x + t*(y - x); }
-SIMD_CALL float4 lerp(float4 x, float4 y, float4 t) { return x + t*(y - x); }
-
-SIMD_CALL float2 lerp(float2 x, float2 y, float t) { return x + t*(y - x); }
-SIMD_CALL float3 lerp(float3 x, float3 y, float t) { return x + t*(y - x); }
-SIMD_CALL float4 lerp(float4 x, float4 y, float t) { return x + t*(y - x); }
+SIMD_CALL float2 lerp(float2 x, float2 y, float2 t) { return x + t * (y - x); }
+SIMD_CALL float3 lerp(float3 x, float3 y, float3 t) { return x + t * (y - x); }
+SIMD_CALL float4 lerp(float4 x, float4 y, float4 t) { return x + t * (y - x); }
 
+SIMD_CALL float2 lerp(float2 x, float2 y, float t) { return x + t * (y - x); }
+SIMD_CALL float3 lerp(float3 x, float3 y, float t) { return x + t * (y - x); }
+SIMD_CALL float4 lerp(float4 x, float4 y, float t) { return x + t * (y - x); }
 
 // dot
 SIMD_CALL float dot(float2 x, float2 y) { return reduce_add(x * y); }
@@ -318,118 +355,145 @@ SIMD_CALL float2 normalize(float2 x) { return x / sqrt(reduce_addv(x * x)).x; }
 SIMD_CALL float3 normalize(float3 x) { return x / sqrt(reduce_addv(x * x)).x; }
 
 // abs
-SIMD_CALL float2 abs(float2 x) {
+SIMD_CALL float2 abs(float2 x)
+{
     return bitselect(0.0, x, 0x7fffffff);
 }
-SIMD_CALL float3 abs(float3 x) {
+SIMD_CALL float3 abs(float3 x)
+{
     return bitselect(0.0, x, 0x7fffffff);
 }
-SIMD_CALL float4 abs(float4 x) {
+SIMD_CALL float4 abs(float4 x)
+{
     return bitselect(0.0, x, 0x7fffffff);
 }
 
 // cross
-SIMD_CALL float cross(float2 x, float2 y) {
+SIMD_CALL float cross(float2 x, float2 y)
+{
     return x.x * y.y - x.y * y.x;
 }
-SIMD_CALL float3 cross(float3 x, float3 y) {
+SIMD_CALL float3 cross(float3 x, float3 y)
+{
     return x.yzx * y.zxy - x.zxy * y.yzx;
 }
 
 // equal
 // == and != return a int234 vector, so need these to match other vecs
-SIMD_CALL bool equal(float2 x, float2 y) {
+SIMD_CALL bool equal(float2 x, float2 y)
+{
     return all(x == y);
 }
-SIMD_CALL bool equal(float3 x, float3 y) {
+SIMD_CALL bool equal(float3 x, float3 y)
+{
     return all(x == y);
 }
-SIMD_CALL bool equal(float4 x, float4 y) {
+SIMD_CALL bool equal(float4 x, float4 y)
+{
     return all(x == y);
 }
 
 // equal_abs
-SIMD_CALL bool equal_abs(float2 x, float2 y, float tol) {
+SIMD_CALL bool equal_abs(float2 x, float2 y, float tol)
+{
     return all((abs(x - y) <= tol));
 }
-SIMD_CALL bool equal_abs(float3 x, float3 y, float tol) {
+SIMD_CALL bool equal_abs(float3 x, float3 y, float tol)
+{
     return all((abs(x - y) <= tol));
 }
-SIMD_CALL bool equal_abs(float4 x, float4 y, float tol) {
+SIMD_CALL bool equal_abs(float4 x, float4 y, float tol)
+{
     return all((abs(x - y) <= tol));
 }
 
 // equal_rel
-SIMD_CALL bool equal_rel(float2 x, float2 y, float tol) {
+SIMD_CALL bool equal_rel(float2 x, float2 y, float tol)
+{
     return all((abs(x - y) <= tol * ::abs(x.x)));
 }
-SIMD_CALL bool equal_rel(float3 x, float3 y, float tol) {
+SIMD_CALL bool equal_rel(float3 x, float3 y, float tol)
+{
     return all((abs(x - y) <= tol * ::abs(x.x)));
 }
-SIMD_CALL bool equal_rel(float4 x, float4 y, float tol) {
+SIMD_CALL bool equal_rel(float4 x, float4 y, float tol)
+{
     return all((abs(x - y) <= tol * ::abs(x.x)));
 }
 
 // step
-SIMD_CALL float2 step(float2 edge, float2 x) {
+SIMD_CALL float2 step(float2 edge, float2 x)
+{
     return bitselect((float2)1, 0, x < edge);
 }
-SIMD_CALL float3 step(float3 edge, float3 x) {
+SIMD_CALL float3 step(float3 edge, float3 x)
+{
     return bitselect((float3)1, 0, x < edge);
 }
-SIMD_CALL float4 step(float4 edge, float4 x) {
+SIMD_CALL float4 step(float4 edge, float4 x)
+{
     return bitselect((float4)1, 0, x < edge);
 }
 
 // smoothstep
-SIMD_CALL float2 smoothstep(float2 edge0, float2 edge1, float2 x) {
-    float2 t = saturate((x-edge0)/(edge0-edge1));
-    return t*t*(3 - 2*t);
+SIMD_CALL float2 smoothstep(float2 edge0, float2 edge1, float2 x)
+{
+    float2 t = saturate((x - edge0) / (edge0 - edge1));
+    return t * t * (3 - 2 * t);
 }
-SIMD_CALL float3 smoothstep(float3 edge0, float3 edge1, float3 x) {
-    float3 t = saturate((x-edge0)/(edge0-edge1));
-    return t*t*(3 - 2*t);
+SIMD_CALL float3 smoothstep(float3 edge0, float3 edge1, float3 x)
+{
+    float3 t = saturate((x - edge0) / (edge0 - edge1));
+    return t * t * (3 - 2 * t);
 }
-SIMD_CALL float4 smoothstep(float4 edge0, float4 edge1, float4 x) {
-    float4 t = saturate((x-edge0)/(edge0-edge1));
-    return t*t*(3 - 2*t);
+SIMD_CALL float4 smoothstep(float4 edge0, float4 edge1, float4 x)
+{
+    float4 t = saturate((x - edge0) / (edge0 - edge1));
+    return t * t * (3 - 2 * t);
 }
 
 // fract
-SIMD_CALL float2 fract(float2 x) {
+SIMD_CALL float2 fract(float2 x)
+{
     return min(x - floor(x), 0x1.fffffep-1f);
 }
-SIMD_CALL float3 fract(float3 x) {
+SIMD_CALL float3 fract(float3 x)
+{
     return min(x - floor(x), 0x1.fffffep-1f);
 }
-SIMD_CALL float4 fract(float4 x) {
+SIMD_CALL float4 fract(float4 x)
+{
     return min(x - floor(x), 0x1.fffffep-1f);
 }
 
-SIMD_CALL bool is_nan(float2 x) {
+SIMD_CALL bool is_nan(float2 x)
+{
     return any(x != x);
 }
-SIMD_CALL bool is_nan(float3 x) {
+SIMD_CALL bool is_nan(float3 x)
+{
     return any(x != x);
 }
-SIMD_CALL bool is_nan(float4 x) {
+SIMD_CALL bool is_nan(float4 x)
+{
     return any(x != x);
 }
 
-SIMD_CALL float2 fix_nan(float2 x, float2 replace) {
+SIMD_CALL float2 fix_nan(float2 x, float2 replace)
+{
     return min(replace, x);
 }
-SIMD_CALL float3 fix_nan(float3 x, float3 replace) {
+SIMD_CALL float3 fix_nan(float3 x, float3 replace)
+{
     return min(replace, x);
 }
-SIMD_CALL float4 fix_nan(float4 x, float4 replace) {
+SIMD_CALL float4 fix_nan(float4 x, float4 replace)
+{
     return min(replace, x);
 }
 
-
-
 /* this is just to show examples of extended vector types, float8 should move out
-   
+
 #if SIMD_FLOAT_EXT
 
 // These are cpu only math.  None of the gpus support these long types.
@@ -442,8 +506,8 @@ SIMD_CALL float4 fix_nan(float4 x, float4 replace) {
 
 // how important are 8/16 ops for float and 8 for double?  Could reduce with only doing up to 4.
 // can see doing more ops on smaller types.  Slower when these have to route through simd4.
- 
- 
+
+
 SIMD_CALL float8 clamp(float8 x, float8 min, float8 max) {
     return min(max(x, min), max);
 }
@@ -465,7 +529,7 @@ SIMD_CALL float reduce_add(float8 x) {
 SIMD_CALL float normalize(float8 x) {
      return x / length(x);
 }
- 
+
 // float16 calling up to float8
 SIMD_CALL float16 clamp(float16 x, float16 min, float16 max) {
     return min(max(x, min), max);
@@ -497,42 +561,62 @@ SIMD_CALL float normalize(float16 x) {
 // Be careful with initializers = { val }, only sets first element of vector
 // and not all the values.  Use = val; or one of the calls below to be safe.
 
-SIMD_CALL float2 float2m(float x) {
+SIMD_CALL float2 float2m(float x)
+{
     return x;
 }
-SIMD_CALL float2 float2m(float x, float y) {
-    return {x,y};
+SIMD_CALL float2 float2m(float x, float y)
+{
+    return {x, y};
 }
 
-SIMD_CALL float3 float3m(float x) {
+SIMD_CALL float3 float3m(float x)
+{
     return x;
 }
-SIMD_CALL float3 float3m(float x, float y, float z) {
-    return {x,y,z};
+SIMD_CALL float3 float3m(float x, float y, float z)
+{
+    return {x, y, z};
 }
-SIMD_CALL float3 float3m(float2 v, float z) {
-    float3 r; r.xy = v; r.z = z; return r;
+SIMD_CALL float3 float3m(float2 v, float z)
+{
+    float3 r;
+    r.xy = v;
+    r.z = z;
+    return r;
 }
 
-SIMD_CALL float4 float4m(float x) {
+SIMD_CALL float4 float4m(float x)
+{
     return x;
 }
-SIMD_CALL float4 float4m(float2 xy, float2 zw) {
-    float4 r; r.xy = xy; r.zw = zw; return r;
+SIMD_CALL float4 float4m(float2 xy, float2 zw)
+{
+    float4 r;
+    r.xy = xy;
+    r.zw = zw;
+    return r;
 }
-SIMD_CALL float4 float4m(float x, float y, float z, float w = 1.0f) {
-    return {x,y,z,w};
+SIMD_CALL float4 float4m(float x, float y, float z, float w = 1.0f)
+{
+    return {x, y, z, w};
 }
-SIMD_CALL float4 float4m(float3 v, float w = 1.0f) {
-    float4 r; r.xyz = v; r.w = w; return r;
+SIMD_CALL float4 float4m(float3 v, float w = 1.0f)
+{
+    float4 r;
+    r.xyz = v;
+    r.w = w;
+    return r;
 }
 
 // fast conversions where possible
 // need non-const too
-SIMD_CALL const float3& as_float3(const float4& m) {
+SIMD_CALL const float3& as_float3(const float4& m)
+{
     return reinterpret_cast<const float3&>(m);
 }
-SIMD_CALL const float3* as_float3(const float4* m) {
+SIMD_CALL const float3* as_float3(const float4* m)
+{
     return reinterpret_cast<const float3*>(m);
 }
 
@@ -559,21 +643,24 @@ macroVectorRepeatFnDecl(float, atan)
 
 macroVectorRepeatFn2Decl(float, atan2)
 
-// clang-format on
+    // clang-format on
 
-// sincos requires accel 5 lib, and takes 2 ptrs
-// may need math fallback for some calls
-// macroVectorRepeatFn2Decl(float, sincos)
+    // sincos requires accel 5 lib, and takes 2 ptrs
+    // may need math fallback for some calls
+    // macroVectorRepeatFn2Decl(float, sincos)
 
-// pow
-// can xy be <= 0 ?, no will return Nan in log/exp approx
-SIMD_CALL float2 pow(float2 x, float2 y) {
+    // pow
+    // can xy be <= 0 ?, no will return Nan in log/exp approx
+    SIMD_CALL float2 pow(float2 x, float2 y)
+{
     return exp(log(x) * y);
 }
-SIMD_CALL float3 pow(float3 x, float3 y) {
+SIMD_CALL float3 pow(float3 x, float3 y)
+{
     return exp(log(x) * y);
 }
-SIMD_CALL float4 pow(float4 x, float4 y) {
+SIMD_CALL float4 pow(float4 x, float4 y)
+{
     return exp(log(x) * y);
 }
 
@@ -632,95 +719,91 @@ const float4& float4_negzw();
 // column matrix, so postmul vectors
 // (projToCamera * cameraToWorld * worldToModel) * modelVec
 
-struct float2x2 : float2x2a
-{
+struct float2x2 : float2x2a {
     // can be split out to traits
     static constexpr int col = 2;
     static constexpr int row = 2;
     using column_t = float2;
     using scalar_t = float;
     using base = float2x2a;
-    
+
     static const float2x2& zero();
     static const float2x2& identity();
-    
-    float2x2() { }  // default uninit
+
+    float2x2() {} // default uninit
     explicit float2x2(float2 diag);
     float2x2(float2 c0, float2 c1)
-    : base((base){c0, c1}) { }
+        : base((base){c0, c1}) {}
     float2x2(const base& m)
-    : base(m) { }
-    
+        : base(m) {}
+
     // simd lacks these ops
     float2& operator[](int idx) { return columns[idx]; }
     const float2& operator[](int idx) const { return columns[idx]; }
 };
 
-struct float3x3 : float3x3a
-{
+struct float3x3 : float3x3a {
     static constexpr int col = 3;
     static constexpr int row = 3;
     using column_t = float3;
     using scalar_t = float;
     using base = float3x3a;
-    
+
     // Done as wordy c funcs otherwize.  Funcs allow statics to init.
     static const float3x3& zero();
     static const float3x3& identity();
-    
-    float3x3() { }  // default uninit
+
+    float3x3() {} // default uninit
     explicit float3x3(float3 diag);
     float3x3(float3 c0, float3 c1, float3 c2)
-    : base((base){c0, c1, c2}) { }
+        : base((base){c0, c1, c2}) {}
     float3x3(const base& m)
-    : base(m) { }
-    
+        : base(m) {}
+
     float3& operator[](int idx) { return columns[idx]; }
     const float3& operator[](int idx) const { return columns[idx]; }
 };
 
 // This is mostly a transposed holder for a 4x4, so very few ops defined
 // Can also serve as a SOA for some types of cpu math.
-struct float3x4 : float3x4a
-{
+struct float3x4 : float3x4a {
     static constexpr int col = 3;
     static constexpr int row = 4;
     using column_t = float4;
     using scalar_t = float;
     using base = float3x4a;
-    
+
     static const float3x4& zero();
     static const float3x4& identity();
-    
-    float3x4() { } // default uninit
+
+    float3x4() {} // default uninit
     explicit float3x4(float3 diag);
     float3x4(float4 c0, float4 c1, float4 c2)
-    : base((base){c0, c1, c2}) { }
+        : base((base){c0, c1, c2}) {}
     float3x4(const float3x4a& m)
-    : base(m) { }
-    
+        : base(m) {}
+
     float4& operator[](int idx) { return columns[idx]; }
     const float4& operator[](int idx) const { return columns[idx]; }
 };
 
-struct float4x4 : float4x4a
-{
+struct float4x4 : float4x4a {
     static constexpr int col = 4;
     static constexpr int row = 4;
     using column_t = float4;
     using scalar_t = float;
     using base = float4x4a;
-    
+
     static const float4x4& zero();
     static const float4x4& identity();
-    
-    float4x4() { } // default uninit
+
+    float4x4() {} // default uninit
     explicit float4x4(float4 diag);
     float4x4(float4 c0, float4 c1, float4 c2, float4 c3)
-    : base((base){c0, c1, c2, c3}) { }
+        : base((base){c0, c1, c2, c3}) {}
     float4x4(const base& m)
-    : base(m) { }
-    
+        : base(m) {}
+
     float4& operator[](int idx) { return columns[idx]; }
     const float4& operator[](int idx) const { return columns[idx]; }
 };
@@ -809,7 +892,8 @@ macroMatrixOps(float4x4);
 // clang-format on
 
 // fast conversions where possible
-SIMD_CALL const float3x3& as_float3x3(const float4x4& m) {
+SIMD_CALL const float3x3& as_float3x3(const float4x4& m)
+{
     return reinterpret_cast<const float3x3&>(m);
 }
 
@@ -819,39 +903,43 @@ SIMD_CALL const float3x3& as_float3x3(const float4x4& m) {
 // Only need a fp32 quat.  double/half are pretty worthless.
 struct quatf {
     // TODO: should all ctor be SIMD_CALL ?
-    quatf() : v{0.0f,0.0f,0.0f,1.0f} {}
-    quatf(float x, float y, float z, float w) : v{x,y,z,w} {}
+    quatf() : v{0.0f, 0.0f, 0.0f, 1.0f} {}
+    quatf(float x, float y, float z, float w) : v{x, y, z, w} {}
     quatf(float3 vv, float angle);
-    explicit quatf(float4 vv): v(vv) {}
-    
+    explicit quatf(float4 vv) : v(vv) {}
+
     static const quatf& zero();
     static const quatf& identity();
-    
+
     // image = axis * sin(theta/2), real = cos(theta/2)
     const float3& imag() const { return as_float3(v); }
     float real() const { return v.w; }
-    
+
     float4 v;
 };
 
 // this is conjugate, so only axis is inverted
-SIMD_CALL quatf operator-(quatf q) {
+SIMD_CALL quatf operator-(quatf q)
+{
     float4 qv = q.v;
     qv.xyz = -qv.xyz;
     return quatf(qv);
 }
 
-SIMD_CALL float3 operator*(quatf q, float3 v) {
+SIMD_CALL float3 operator*(quatf q, float3 v)
+{
     // see https://fgiesen.wordpress.com/2019/02/09/rotating-a-single-vector-using-a-quaternion/
     float4 qv = q.v;
     float3 t = 2.0f * cross(qv.xyz, v);
     return v + qv.w * t + cross(qv.xyz, t);
 }
 
-SIMD_CALL bool equal(quatf x, quatf y) {
+SIMD_CALL bool equal(quatf x, quatf y)
+{
     return all(x.v == y.v);
 }
-SIMD_CALL bool operator==(quatf x, quatf y) {
+SIMD_CALL bool operator==(quatf x, quatf y)
+{
     return all(x.v == y.v);
 }
 
@@ -863,10 +951,11 @@ float4x4 float4x4m(quatf q);
 // TODO: need negate (or conjuagate?)
 // TODO: what about math ops
 
-SIMD_CALL quatf lerp(quatf q0, quatf q1, float t) {
+SIMD_CALL quatf lerp(quatf q0, quatf q1, float t)
+{
     if (dot(q0.v, q1.v) < 0.0f)
         q1 = -q1; // conjugate
-    
+
     float4 v = lerp(q0.v, q1.v, t);
     return quatf(v);
 }
@@ -879,7 +968,8 @@ quatf quat_bezer_slerp(quatf a, quatf b, quatf c, quatf d, float t);
 
 quatf inverse(quatf q);
 
-SIMD_CALL quatf normalize(quatf q) {
+SIMD_CALL quatf normalize(quatf q)
+{
     return quatf(normalize(q.v));
 }
 
@@ -896,24 +986,27 @@ float4x4 inverse_trs(const float4x4& mtx);
 
 float4x4 float4x4m(char axis, float radians);
 
-SIMD_CALL float4x4 float4x4m(float3 axis, float radians) {
+SIMD_CALL float4x4 float4x4m(float3 axis, float radians)
+{
     return float4x4m(quatf(axis, radians));
 }
 
 // These sizes are positive and do not include inversion
-SIMD_CALL float decompose_size(const float4x4& m) {
+SIMD_CALL float decompose_size(const float4x4& m)
+{
     return length(m[0]);
 }
-SIMD_CALL float3 decompose_scale(const float4x4& m) {
+SIMD_CALL float3 decompose_scale(const float4x4& m)
+{
     return float3m(length(m[0]),
                    length(m[1]),
                    length(m[2]));
 }
-SIMD_CALL float decompose_scale_max(const float4x4& m) {
+SIMD_CALL float decompose_scale_max(const float4x4& m)
+{
     return reduce_max(decompose_scale(m));
 }
 
-
 float3x3 float3x3m(quatf qq);
 
 // m in here?
@@ -929,20 +1022,23 @@ float4x4 perspective_rhcs(float fovyRadians, float aspectXtoY, float nearZ, floa
 float4x4 perspective_rhcs(float4 tangents, float nearZ, float farZ = FLT_MAX);
 float4x4 orthographic_rhcs(float4 rect, float nearZ, float farZ);
 
-SIMD_CALL float4x4 rotation(float3 axis, float radians) {
+SIMD_CALL float4x4 rotation(float3 axis, float radians)
+{
     quatf q(axis, radians);
     return float4x4m(q);
 }
-SIMD_CALL float4x4 scale(float3 scale) {
-    return float4x4(float4m(scale,1.0f));
+SIMD_CALL float4x4 scale(float3 scale)
+{
+    return float4x4(float4m(scale, 1.0f));
 }
-SIMD_CALL float4x4 translation(float3 t) {
+SIMD_CALL float4x4 translation(float3 t)
+{
     float4x4 m(float4x4::identity());
-    m[3] = float4m(t,1);
+    m[3] = float4m(t, 1);
     return m;
 }
 
-} // SIMD_NAMESPACE
+} //namespace SIMD_NAMESPACE
 
 #endif // __cplusplus
 
diff --git a/libkram/vectormath/float4a.cpp b/libkram/vectormath/float4a.cpp
index 210c9dd5..6fcb8793 100644
--- a/libkram/vectormath/float4a.cpp
+++ b/libkram/vectormath/float4a.cpp
@@ -13,7 +13,7 @@
 // Bury these for now.  They required -mf16c for Intel to be
 // defined, and that's kind of a pain right now.
 namespace SIMD_NAMESPACE {
- 
+
 #if SIMD_SSE
 
 // using casts instead of vv.reg, so these calls work with Apple SIMD too
@@ -24,24 +24,24 @@ float4 float4m(half4 vv)
     // https://gcc.gnu.org/onlinedocs/gcc-7.5.0/gcc/Half-Precision.html
     // https://developer.arm.com/documentation/dui0491/i/Using-NEON-Support/Converting-vectors
     __m128i reg16 = _mm_setzero_si128();
-    
+
     // TODO: switch to load low 64-bits, but don't know which one _mm_cvtsi32_si128(&vv.reg); ?
     // want 0 extend here, sse overuses int32_t when really unsigned and zero extended value
     reg16 = _mm_insert_epi16(reg16, vv[0], 0);
     reg16 = _mm_insert_epi16(reg16, vv[1], 1);
     reg16 = _mm_insert_epi16(reg16, vv[2], 2);
     reg16 = _mm_insert_epi16(reg16, vv[3], 3);
-    
+
     return simd::float4(_mm_cvtph_ps(reg16));
 }
 
 half4 half4m(float4 vv)
 {
-    __m128i reg16 = _mm_cvtps_ph(*(const __m128*)&vv, 0);  // 4xfp32-> 4xfp16,  round to nearest-even
-    
+    __m128i reg16 = _mm_cvtps_ph(*(const __m128*)&vv, 0); // 4xfp32-> 4xfp16,  round to nearest-even
+
     // TODO: switch to store/steam, but don't know which one _mm_storeu_epi16 ?
-    half4 val;  // = 0;
-    
+    half4 val; // = 0;
+
     // 0 extended
     val[0] = (half)_mm_extract_epi16(reg16, 0);
     val[1] = (half)_mm_extract_epi16(reg16, 1);
@@ -51,7 +51,7 @@ half4 half4m(float4 vv)
 }
 
 #endif
- 
+
 #if SIMD_NEON
 
 // using casts intead of vv.reg, so these calls work with Apple SIMD too
@@ -67,7 +67,6 @@ half4 half4m(float4 vv)
 }
 #endif
 
-}
+} //namespace SIMD_NAMESPACE
 
 #endif
-
diff --git a/libkram/vectormath/float4a.h b/libkram/vectormath/float4a.h
index 501a9412..abcb44e8 100644
--- a/libkram/vectormath/float4a.h
+++ b/libkram/vectormath/float4a.h
@@ -12,10 +12,10 @@
 
 // only support avx2 and Neon, no avx-512 at first
 #if defined __ARM_NEON
-#define SIMD_SSE  0
+#define SIMD_SSE 0
 #define SIMD_NEON 1
 #elif defined __AVX2__ // x64 AVX2 or higher, can lower to AVX
-#define SIMD_SSE  1
+#define SIMD_SSE 1
 #define SIMD_NEON 0
 #else
 #warning unuspported simd arch
@@ -32,29 +32,36 @@
 namespace SIMD_NAMESPACE {
 
 // functional ctor
-inline float4 float4m(float3 v, float w) {
+inline float4 float4m(float3 v, float w)
+{
     return vector4(v, w);
 }
 
-inline float2 float2m(float x, float y) {
+inline float2 float2m(float x, float y)
+{
     return {x, y};
 }
-inline float3 float3m(float x, float y, float z) {
+inline float3 float3m(float x, float y, float z)
+{
     return {x, y, z};
 }
-inline float4 float4m(float x, float y, float z, float w) {
+inline float4 float4m(float x, float y, float z, float w)
+{
     return {x, y, z, w};
 }
 
-inline float2 float2m(float x) {
+inline float2 float2m(float x)
+{
     return x;
 }
 
-inline float3 float3m(float x) {
+inline float3 float3m(float x)
+{
     return x;
 }
 
-inline float4 float4m(float x) {
+inline float4 float4m(float x)
+{
     return x;
 }
 
@@ -64,13 +71,16 @@ inline float4 float4m(float x) {
 //inline double saturate(double v) {
 //    return std::clamp(v, 0.0, 1.0);
 //}
-inline float2 saturate(float2 v) {
+inline float2 saturate(float2 v)
+{
     return simd_clamp(v, 0.0f, 1.0f);
 }
-inline float3 saturate(float3 v) {
+inline float3 saturate(float3 v)
+{
     return simd_clamp(v, 0.0f, 1.0f);
 }
-inline float4 saturate(float4 v) {
+inline float4 saturate(float4 v)
+{
     return simd_clamp(v, 0.0f, 1.0f);
 }
 
diff --git a/libkram/vectormath/half234.h b/libkram/vectormath/half234.h
index 36dbbcd8..3ee71def 100644
--- a/libkram/vectormath/half234.h
+++ b/libkram/vectormath/half234.h
@@ -56,46 +56,71 @@ namespace SIMD_NAMESPACE {
 
 macroVector2TypesStorageRenames(half, half)
 
-// clang-format on
+    // clang-format on
 
-SIMD_CALL half2 half2m(half x) {
+    SIMD_CALL half2 half2m(half x)
+{
     return x;
 }
-SIMD_CALL half2 half2m(half x, half y) {
-    return {x,y};
+SIMD_CALL half2 half2m(half x, half y)
+{
+    return {x, y};
 }
 
-SIMD_CALL half3 half3m(half x) {
+SIMD_CALL half3 half3m(half x)
+{
     return x;
 }
-SIMD_CALL half3 half3m(half x, half y, half z) {
-    return {x,y,z};
+SIMD_CALL half3 half3m(half x, half y, half z)
+{
+    return {x, y, z};
 }
-SIMD_CALL half3 half3m(half2 v, half z) {
-    half3 r; r.xy = v; r.z = z; return r;
+SIMD_CALL half3 half3m(half2 v, half z)
+{
+    half3 r;
+    r.xy = v;
+    r.z = z;
+    return r;
 }
 
-SIMD_CALL half4 half4m(half x) {
+SIMD_CALL half4 half4m(half x)
+{
     return x;
 }
-SIMD_CALL half4 half4m(half2 xy, half2 zw) {
-    half4 r; r.xy = xy; r.zw = zw; return r;
+SIMD_CALL half4 half4m(half2 xy, half2 zw)
+{
+    half4 r;
+    r.xy = xy;
+    r.zw = zw;
+    return r;
 }
-SIMD_CALL half4 half4m(half x, half y, half z, half w = (half)1.0) {
-    return {x,y,z,w};
+SIMD_CALL half4 half4m(half x, half y, half z, half w = (half)1.0)
+{
+    return {x, y, z, w};
 }
-SIMD_CALL half4 half4m(half3 v, float w = (half)1.0) {
-    half4 r; r.xyz = v; r.w = w; return r;
+SIMD_CALL half4 half4m(half3 v, float w = (half)1.0)
+{
+    half4 r;
+    r.xyz = v;
+    r.w = w;
+    return r;
 }
 
-SIMD_CALL half4 zeroext(half2 x) {
-    half4 v; v.xy = x; v.zw = 0; return v;
+SIMD_CALL half4 zeroext(half2 x)
+{
+    half4 v;
+    v.xy = x;
+    v.zw = 0;
+    return v;
 }
-SIMD_CALL half4 zeroext(half3 x) {
-    half4 v; v.xyz = x; v.w = 0; return v;
+SIMD_CALL half4 zeroext(half3 x)
+{
+    half4 v;
+    v.xyz = x;
+    v.w = 0;
+    return v;
 }
 
-}
+} //namespace SIMD_NAMESPACE
 #endif // __cplusplus
 #endif // USE_SIMDLIB && SIMD_HALF
-
diff --git a/libkram/vectormath/int234.h b/libkram/vectormath/int234.h
index df9ca592..76098b58 100644
--- a/libkram/vectormath/int234.h
+++ b/libkram/vectormath/int234.h
@@ -28,13 +28,19 @@ namespace SIMD_NAMESPACE {
 
 macroVector4TypesStorageRenames(int, int)
 
-// clang-format on
+    // clang-format on
 
-SIMD_CALL int4 zeroext(int2 x) {
-    int4 v = 0; v.xy = x; return v;
+    SIMD_CALL int4 zeroext(int2 x)
+{
+    int4 v = 0;
+    v.xy = x;
+    return v;
 }
-SIMD_CALL int4 zeroext(int3 x) {
-    int4 v = 0; v.xyz = x; return v;
+SIMD_CALL int4 zeroext(int3 x)
+{
+    int4 v = 0;
+    v.xyz = x;
+    return v;
 }
 
 //-----------------------------------
@@ -42,31 +48,38 @@ SIMD_CALL int4 zeroext(int3 x) {
 
 #if SIMD_NEON
 
-SIMD_CALL bool any(int2 x) {
+SIMD_CALL bool any(int2 x)
+{
     return vmaxv_u32(x) & 0x80000000;
 }
-SIMD_CALL bool any(int4 x) {
+SIMD_CALL bool any(int4 x)
+{
     return vmaxvq_u32(x) & 0x80000000;
 }
 
-SIMD_CALL bool all(int2 x) {
+SIMD_CALL bool all(int2 x)
+{
     return vminv_u32(x) & 0x80000000;
 }
-SIMD_CALL bool all(int4 x) {
+SIMD_CALL bool all(int4 x)
+{
     return vminvq_u32(x) & 0x80000000;
 }
 
-SIMD_CALL int reduce_add(int2 x) {
+SIMD_CALL int reduce_add(int2 x)
+{
     x = vpadd_s32(x, x);
     return x.x; // repeat x to all values
 }
-SIMD_CALL int reduce_add(int4 x) {
+SIMD_CALL int reduce_add(int4 x)
+{
     // 4:1 reduction
     x = vpaddq_s32(x, x); // xy = x+y,z+w
     x = vpaddq_s32(x, x); // x  = x+y
     return x.x; // repeat x to all values
 }
-SIMD_CALL int reduce_add(int3 x) {
+SIMD_CALL int reduce_add(int3 x)
+{
     return reduce_add(zeroext(x));
 }
 
@@ -75,39 +88,48 @@ SIMD_CALL int reduce_add(int3 x) {
 // These take in int types, this is what comparison gens from a < b, etc.
 #if SIMD_SSE
 
-SIMD_CALL bool any(int2 x) {
+SIMD_CALL bool any(int2 x)
+{
     return _mm_movemask_ps(vec2to4(x)) & 0x3;
 }
-SIMD_CALL bool any(int4 x) {
+SIMD_CALL bool any(int4 x)
+{
     return _mm_movemask_ps((__m128)x);
 }
 
-SIMD_CALL bool all(int2 x) {
+SIMD_CALL bool all(int2 x)
+{
     return (_mm_movemask_ps(vec2to4(x)) & 0x3) == 0x3; // 2 bits
 }
-SIMD_CALL bool all(int4 x) {
+SIMD_CALL bool all(int4 x)
+{
     return _mm_movemask_ps((__m128)x) == 0xf; // 4 bits
 }
 
 // TODO: need SSE ops for this,
-SIMD_CALL int reduce_add(int4 x) {
+SIMD_CALL int reduce_add(int4 x)
+{
     int2 r = x.lo + x.hi;
     return r.x + r.y;
 }
-SIMD_CALL int reduce_add(int2 x) {
+SIMD_CALL int reduce_add(int2 x)
+{
     return x.x + x.y;
 }
-SIMD_CALL int reduce_add(int3 x) {
+SIMD_CALL int reduce_add(int3 x)
+{
     return x.x + x.y + x.z;
 }
 
 #endif // SIMD_SSE
-       
+
 // any-all
-SIMD_CALL bool any(int3 x) {
+SIMD_CALL bool any(int3 x)
+{
     return any(vec3to4(x));
 }
-SIMD_CALL bool all(int3 x) {
+SIMD_CALL bool all(int3 x)
+{
     return all(vec3to4(x));
 }
 
@@ -115,47 +137,67 @@ SIMD_CALL bool all(int3 x) {
 //-----------------------------------
 
 // bitselect
-SIMD_CALL int2 bitselect(int2 x, int2 y, int2 mask) {
+SIMD_CALL int2 bitselect(int2 x, int2 y, int2 mask)
+{
     return (x & ~mask) | (y & mask);
 }
-SIMD_CALL int3 bitselect(int3 x, int3 y, int3 mask) {
+SIMD_CALL int3 bitselect(int3 x, int3 y, int3 mask)
+{
     return (x & ~mask) | (y & mask);
 }
-SIMD_CALL int4 bitselect(int4 x, int4 y, int4 mask) {
+SIMD_CALL int4 bitselect(int4 x, int4 y, int4 mask)
+{
     return (x & ~mask) | (y & mask);
 }
 
-SIMD_CALL int2 int2m(int x) {
+SIMD_CALL int2 int2m(int x)
+{
     return x;
 }
-SIMD_CALL int2 int2m(int x, int y) {
-    return {x,y};
+SIMD_CALL int2 int2m(int x, int y)
+{
+    return {x, y};
 }
 
-SIMD_CALL int3 int3m(int x) {
+SIMD_CALL int3 int3m(int x)
+{
     return x;
 }
-SIMD_CALL int3 int3m(int x, int y, int z) {
-    return {x,y,z};
+SIMD_CALL int3 int3m(int x, int y, int z)
+{
+    return {x, y, z};
 }
-SIMD_CALL int3 int3m(int2 v, int z) {
-    int3 r; r.xy = v; r.z = z; return r;
+SIMD_CALL int3 int3m(int2 v, int z)
+{
+    int3 r;
+    r.xy = v;
+    r.z = z;
+    return r;
 }
 
-
-SIMD_CALL int4 int4m(int x) {
+SIMD_CALL int4 int4m(int x)
+{
     return x;
 }
-SIMD_CALL int4 int4m(int2 xy, int2 zw) {
-    int4 r; r.xy = xy; r.zw = zw; return r;
+SIMD_CALL int4 int4m(int2 xy, int2 zw)
+{
+    int4 r;
+    r.xy = xy;
+    r.zw = zw;
+    return r;
 }
-SIMD_CALL int4 int4m(int x, int y, int z, int w) {
-    return {x,y,z,w};
+SIMD_CALL int4 int4m(int x, int y, int z, int w)
+{
+    return {x, y, z, w};
 }
-SIMD_CALL int4 int4m(int3 v, int w) {
-    int4 r; r.xyz = v; r.w = w; return r;
+SIMD_CALL int4 int4m(int3 v, int w)
+{
+    int4 r;
+    r.xyz = v;
+    r.w = w;
+    return r;
 }
 
-}
+} //namespace SIMD_NAMESPACE
 #endif // __cplusplus
 #endif // USE_SIMDLIB && SIMD_INT
diff --git a/libkram/vectormath/long234.h b/libkram/vectormath/long234.h
index 645f246b..060bac38 100644
--- a/libkram/vectormath/long234.h
+++ b/libkram/vectormath/long234.h
@@ -43,23 +43,29 @@ macroVector8TypesStorageRenames(long, long)
 
 #if SIMD_NEON
 
-SIMD_CALL bool any(long2 x) {
+    SIMD_CALL bool any(long2 x)
+{
     return (x.x | x.y) & 0x8000000000000000U;
 }
-SIMD_CALL bool any(long3 x) {
+SIMD_CALL bool any(long3 x)
+{
     return (x.x | x.y | x.z) & 0x8000000000000000U;
 }
-SIMD_CALL bool any(long4 x) {
+SIMD_CALL bool any(long4 x)
+{
     return any(x.lo | x.hi);
 }
 
-SIMD_CALL bool all(long2 x) {
+SIMD_CALL bool all(long2 x)
+{
     return (x.x & x.y) & 0x8000000000000000U;
 }
-SIMD_CALL bool all(long3 x) {
+SIMD_CALL bool all(long3 x)
+{
     return (x.x & x.y & x.z) & 0x8000000000000000U;
 }
-SIMD_CALL bool all(long4 x) {
+SIMD_CALL bool all(long4 x)
+{
     return all(x.lo & x.hi);
 }
 
@@ -68,72 +74,98 @@ SIMD_CALL bool all(long4 x) {
 // These take in long types, this is what comparison gens from a < b, etc.
 #if SIMD_SSE
 
-SIMD_CALL bool any(long2 x) {
+SIMD_CALL bool any(long2 x)
+{
     return _mm_movemask_pd(x) & 0x3; // 2 bits
 }
-SIMD_CALL bool any(long3 x) {
+SIMD_CALL bool any(long3 x)
+{
     return (x.x | x.y) & 0x8000000000000000U;
 }
-SIMD_CALL bool any(long4 x) {
+SIMD_CALL bool any(long4 x)
+{
     return any(x.lo | x.hi);
 }
 
-SIMD_CALL bool all(long2 x) {
+SIMD_CALL bool all(long2 x)
+{
     return (_mm_movemask_pd(x) & 0x3) == 0x3; // 2 bits
 }
-SIMD_CALL bool all(long3 x) {
+SIMD_CALL bool all(long3 x)
+{
     return (x.x & x.y & x.z) & 0x8000000000000000U;
 }
-SIMD_CALL bool all(long4 x) {
+SIMD_CALL bool all(long4 x)
+{
     return any(x.lo & x.hi);
 }
 #endif // SIMD_SSE
-       
+
 // end of implementation
 //-----------------------------------
 
 // bitselect
-SIMD_CALL long2 bitselect(long2 x, long2 y, long2 mask) {
+SIMD_CALL long2 bitselect(long2 x, long2 y, long2 mask)
+{
     return (x & ~mask) | (y & mask);
 }
-SIMD_CALL long3 bitselect(long3 x, long3 y, long3 mask) {
+SIMD_CALL long3 bitselect(long3 x, long3 y, long3 mask)
+{
     return (x & ~mask) | (y & mask);
 }
-SIMD_CALL long4 bitselect(long4 x, long4 y, long4 mask) {
+SIMD_CALL long4 bitselect(long4 x, long4 y, long4 mask)
+{
     return (x & ~mask) | (y & mask);
 }
 
-SIMD_CALL long2 long2m(long1 x) {
+SIMD_CALL long2 long2m(long1 x)
+{
     return x;
 }
-SIMD_CALL long2 long2m(long1 x, long1 y) {
-    return {x,y};
+SIMD_CALL long2 long2m(long1 x, long1 y)
+{
+    return {x, y};
 }
 
-SIMD_CALL long3 long3m(long1 x) {
+SIMD_CALL long3 long3m(long1 x)
+{
     return x;
 }
-SIMD_CALL long3 long3m(long1 x, long1 y, long1 z) {
-    return {x,y,z};
+SIMD_CALL long3 long3m(long1 x, long1 y, long1 z)
+{
+    return {x, y, z};
 }
-SIMD_CALL long3 long3m(long2 v, long1 z) {
-    long3 r; r.xy = v; r.z = z; return r;
+SIMD_CALL long3 long3m(long2 v, long1 z)
+{
+    long3 r;
+    r.xy = v;
+    r.z = z;
+    return r;
 }
 
-
-SIMD_CALL long4 long4m(long1 x) {
+SIMD_CALL long4 long4m(long1 x)
+{
     return x;
 }
-SIMD_CALL long4 long4m(long2 xy, long2 zw) {
-    long4 r; r.xy = xy; r.zw = zw; return r;
+SIMD_CALL long4 long4m(long2 xy, long2 zw)
+{
+    long4 r;
+    r.xy = xy;
+    r.zw = zw;
+    return r;
 }
-SIMD_CALL long4 long4m(long1 x, long1 y, long1 z, long1 w) {
-    return {x,y,z,w};
+SIMD_CALL long4 long4m(long1 x, long1 y, long1 z, long1 w)
+{
+    return {x, y, z, w};
 }
-SIMD_CALL long4 long4m(long3 v, long1 w) {
-    long4 r; r.xyz = v; r.w = w; return r;
+SIMD_CALL long4 long4m(long3 v, long1 w)
+{
+    long4 r;
+    r.xyz = v;
+    r.w = w;
+    return r;
 }
 
-}
+} //namespace SIMD_NAMESPACE
 #endif // __cplusplus
 #endif // USE_SIMDLIB && SIMD_LONG
diff --git a/libkram/vectormath/sse2neon-arm64.h b/libkram/vectormath/sse2neon-arm64.h
index 5376a3f7..79a6c38e 100644
--- a/libkram/vectormath/sse2neon-arm64.h
+++ b/libkram/vectormath/sse2neon-arm64.h
@@ -6852,7 +6852,7 @@ static int _sse2neon_cmp_word_equal_each(__m128i a, int la, __m128i b, int lb)
                 vbslq_u##size(vec1, vec_minusone, vec_zero));                  \
         }                                                                      \
         unsigned SSE2NEON_IIF(data_type)(char, short) *ptr =                   \
-            (unsigned SSE2NEON_IIF(data_type)(char, short) *) mtx;             \
+            (unsigned SSE2NEON_IIF(data_type)(char, short) *)mtx;              \
         for (int i = 0; i < bound; i++) {                                      \
             int val = 1;                                                       \
             for (int j = 0, k = i; j < bound - i && k < bound; j++, k++)       \
@@ -6914,14 +6914,14 @@ static cmpestr_func_t _sse2neon_cmpfunc_table[] = {
 FORCE_INLINE int _sse2neon_sido_negative(int res, int lb, int imm8, int bound)
 {
     switch (imm8 & 0x30) {
-    case _SIDD_NEGATIVE_POLARITY:
-        res ^= 0xffffffff;
-        break;
-    case _SIDD_MASKED_NEGATIVE_POLARITY:
-        res ^= (1 << lb) - 1;
-        break;
-    default:
-        break;
+        case _SIDD_NEGATIVE_POLARITY:
+            res ^= 0xffffffff;
+            break;
+        case _SIDD_MASKED_NEGATIVE_POLARITY:
+            res ^= (1 << lb) - 1;
+            break;
+        default:
+            break;
     }
 
     return res & ((bound == 8) ? 0xFF : 0xFFFF);
@@ -6929,45 +6929,45 @@ FORCE_INLINE int _sse2neon_sido_negative(int res, int lb, int imm8, int bound)
 
 FORCE_INLINE int _sse2neon_clz(unsigned int x)
 {
-//#if defined(_MSC_VER) && !defined(__clang__)
-//    unsigned long cnt = 0;
-//    if (_BitScanReverse(&cnt, x))
-//        return 31 - cnt;
-//    return 32;
-//#else
+    //#if defined(_MSC_VER) && !defined(__clang__)
+    //    unsigned long cnt = 0;
+    //    if (_BitScanReverse(&cnt, x))
+    //        return 31 - cnt;
+    //    return 32;
+    //#else
     return x != 0 ? __builtin_clz(x) : 32;
-//#endif
+    //#endif
 }
 
 FORCE_INLINE int _sse2neon_ctz(unsigned int x)
 {
-//#if defined(_MSC_VER) && !defined(__clang__)
-//    unsigned long cnt = 0;
-//    if (_BitScanForward(&cnt, x))
-//        return cnt;
-//    return 32;
-//#else
+    //#if defined(_MSC_VER) && !defined(__clang__)
+    //    unsigned long cnt = 0;
+    //    if (_BitScanForward(&cnt, x))
+    //        return cnt;
+    //    return 32;
+    //#else
     return x != 0 ? __builtin_ctz(x) : 32;
-//#endif
+    //#endif
 }
 
 FORCE_INLINE int _sse2neon_ctzll(unsigned long long x)
 {
-//#ifdef _MSC_VER
-//    unsigned long cnt;
-//#if defined(SSE2NEON_HAS_BITSCAN64)
-//    if (_BitScanForward64(&cnt, x))
-//        return (int) (cnt);
-//#else
-//    if (_BitScanForward(&cnt, (unsigned long) (x)))
-//        return (int) cnt;
-//    if (_BitScanForward(&cnt, (unsigned long) (x >> 32)))
-//        return (int) (cnt + 32);
-//#endif /* SSE2NEON_HAS_BITSCAN64 */
-//    return 64;
-//#else /* assume GNU compatible compilers */
+    //#ifdef _MSC_VER
+    //    unsigned long cnt;
+    //#if defined(SSE2NEON_HAS_BITSCAN64)
+    //    if (_BitScanForward64(&cnt, x))
+    //        return (int) (cnt);
+    //#else
+    //    if (_BitScanForward(&cnt, (unsigned long) (x)))
+    //        return (int) cnt;
+    //    if (_BitScanForward(&cnt, (unsigned long) (x >> 32)))
+    //        return (int) (cnt + 32);
+    //#endif /* SSE2NEON_HAS_BITSCAN64 */
+    //    return 64;
+    //#else /* assume GNU compatible compilers */
     return x != 0 ? __builtin_ctzll(x) : 64;
-//#endif
+    //#endif
 }
 
 #define SSE2NEON_MIN(x, y) (x) < (y) ? (x) : (y)
@@ -7007,7 +7007,8 @@ FORCE_INLINE int _sse2neon_ctzll(unsigned long long x)
                                        vld1q_u16(_sse2neon_cmpestr_mask16b));  \
             dst = vreinterpretq_m128i_u16(vbslq_u16(                           \
                 tmp, vdupq_n_u16(-1), vreinterpretq_u16_m128i(dst)));          \
-        } else {                                                               \
+        }                                                                      \
+        else {                                                                 \
             uint8x16_t vec_r2 =                                                \
                 vcombine_u8(vdup_n_u8(r2), vdup_n_u8(r2 >> 8));                \
             uint8x16_t tmp =                                                   \
@@ -7015,11 +7016,13 @@ FORCE_INLINE int _sse2neon_ctzll(unsigned long long x)
             dst = vreinterpretq_m128i_u8(                                      \
                 vbslq_u8(tmp, vdupq_n_u8(-1), vreinterpretq_u8_m128i(dst)));   \
         }                                                                      \
-    } else {                                                                   \
+    }                                                                          \
+    else {                                                                     \
         if (bound == 16) {                                                     \
             dst = vreinterpretq_m128i_u16(                                     \
                 vsetq_lane_u16(r2 & 0xffff, vreinterpretq_u16_m128i(dst), 0)); \
-        } else {                                                               \
+        }                                                                      \
+        else {                                                                 \
             dst = vreinterpretq_m128i_u8(                                      \
                 vsetq_lane_u8(r2 & 0xff, vreinterpretq_u8_m128i(dst), 0));     \
         }                                                                      \
@@ -7099,9 +7102,9 @@ FORCE_INLINE int _mm_cmpestrs(__m128i a,
                               int lb,
                               const int imm8)
 {
-    (void) a;
-    (void) b;
-    (void) lb;
+    (void)a;
+    (void)b;
+    (void)lb;
     SSE2NEON_CMPSTR_SET_UPPER(bound, imm8);
     return la <= (bound - 1);
 }
@@ -7115,9 +7118,9 @@ FORCE_INLINE int _mm_cmpestrz(__m128i a,
                               int lb,
                               const int imm8)
 {
-    (void) a;
-    (void) b;
-    (void) la;
+    (void)a;
+    (void)b;
+    (void)la;
     SSE2NEON_CMPSTR_SET_UPPER(bound, imm8);
     return lb <= (bound - 1);
 }
@@ -7131,7 +7134,8 @@ FORCE_INLINE int _mm_cmpestrz(__m128i a,
             uint64_t matches_##str =                                     \
                 vget_lane_u64(vreinterpret_u64_u8(res_##str), 0);        \
             len = _sse2neon_ctzll(matches_##str) >> 3;                   \
-        } else {                                                         \
+        }                                                                \
+        else {                                                           \
             uint16x8_t equal_mask_##str = vreinterpretq_u16_u8(          \
                 vceqq_u8(vreinterpretq_u8_m128i(str), vdupq_n_u8(0)));   \
             uint8x8_t res_##str = vshrn_n_u16(equal_mask_##str, 4);      \
@@ -7199,7 +7203,7 @@ FORCE_INLINE int _mm_cmpistro(__m128i a, __m128i b, const int imm8)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistrs
 FORCE_INLINE int _mm_cmpistrs(__m128i a, __m128i b, const int imm8)
 {
-    (void) b;
+    (void)b;
     SSE2NEON_CMPSTR_SET_UPPER(bound, imm8);
     int la;
     SSE2NEON_CMPISTRX_LENGTH(a, la, imm8);
@@ -7211,7 +7215,7 @@ FORCE_INLINE int _mm_cmpistrs(__m128i a, __m128i b, const int imm8)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistrz
 FORCE_INLINE int _mm_cmpistrz(__m128i a, __m128i b, const int imm8)
 {
-    (void) a;
+    (void)a;
     SSE2NEON_CMPSTR_SET_UPPER(bound, imm8);
     int lb;
     SSE2NEON_CMPISTRX_LENGTH(b, lb, imm8);
@@ -7274,10 +7278,10 @@ FORCE_INLINE uint64_t _mm_crc32_u64(uint64_t crc, uint64_t v)
                          : [c] "+r"(crc)
                          : [v] "r"(v));
 #elif (defined(_M_ARM64) && !defined(__clang__))
-    crc = __crc32cd((uint32_t) crc, v);
+    crc = __crc32cd((uint32_t)crc, v);
 #else
-    crc = _mm_crc32_u32((uint32_t) (crc), v & 0xffffffff);
-    crc = _mm_crc32_u32((uint32_t) (crc), (v >> 32) & 0xffffffff);
+    crc = _mm_crc32_u32((uint32_t)(crc), v & 0xffffffff);
+    crc = _mm_crc32_u32((uint32_t)(crc), (v >> 32) & 0xffffffff);
 #endif
     return crc;
 }
@@ -7300,7 +7304,7 @@ FORCE_INLINE uint32_t _mm_crc32_u8(uint32_t crc, uint8_t v)
     // Adapted from: https://mary.rs/lab/crc32/
     // Barrent reduction
     uint64x2_t orig =
-        vcombine_u64(vcreate_u64((uint64_t) (crc) << 24), vcreate_u64(0x0));
+        vcombine_u64(vcreate_u64((uint64_t)(crc) << 24), vcreate_u64(0x0));
     uint64x2_t tmp = orig;
 
     // Polynomial P(x) of CRC32C
@@ -7321,7 +7325,7 @@ FORCE_INLINE uint32_t _mm_crc32_u8(uint32_t crc, uint8_t v)
 
     // Extract the 'lower' (in bit-reflected sense) 32 bits
     crc = vgetq_lane_u32(vreinterpretq_u32_u64(tmp), 1);
-#else  // Fall back to the generic table lookup approach
+#else // Fall back to the generic table lookup approach
     // Adapted from: https://create.stephan-brumme.com/crc32/
     // Apply half-byte comparison algorithm for the best ratio between
     // performance and lookup table.
@@ -7329,9 +7333,22 @@ FORCE_INLINE uint32_t _mm_crc32_u8(uint32_t crc, uint8_t v)
     // The lookup table just needs to store every 16th entry
     // of the standard look-up table.
     static const uint32_t crc32_half_byte_tbl[] = {
-        0x00000000, 0x105ec76f, 0x20bd8ede, 0x30e349b1, 0x417b1dbc, 0x5125dad3,
-        0x61c69362, 0x7198540d, 0x82f63b78, 0x92a8fc17, 0xa24bb5a6, 0xb21572c9,
-        0xc38d26c4, 0xd3d3e1ab, 0xe330a81a, 0xf36e6f75,
+        0x00000000,
+        0x105ec76f,
+        0x20bd8ede,
+        0x30e349b1,
+        0x417b1dbc,
+        0x5125dad3,
+        0x61c69362,
+        0x7198540d,
+        0x82f63b78,
+        0x92a8fc17,
+        0xa24bb5a6,
+        0xb21572c9,
+        0xc38d26c4,
+        0xd3d3e1ab,
+        0xe330a81a,
+        0xf36e6f75,
     };
 
     crc = (crc >> 4) ^ crc32_half_byte_tbl[crc & 0x0F];
@@ -7452,12 +7469,40 @@ static const uint8_t _sse2neon_rsbox[256] = SSE2NEON_AES_RSBOX(SSE2NEON_AES_H0);
 FORCE_INLINE __m128i _mm_aesenc_si128(__m128i a, __m128i RoundKey)
 {
     static const uint8_t shift_rows[] = {
-        0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3,
-        0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb,
+        0x0,
+        0x5,
+        0xa,
+        0xf,
+        0x4,
+        0x9,
+        0xe,
+        0x3,
+        0x8,
+        0xd,
+        0x2,
+        0x7,
+        0xc,
+        0x1,
+        0x6,
+        0xb,
     };
     static const uint8_t ror32by8[] = {
-        0x1, 0x2, 0x3, 0x0, 0x5, 0x6, 0x7, 0x4,
-        0x9, 0xa, 0xb, 0x8, 0xd, 0xe, 0xf, 0xc,
+        0x1,
+        0x2,
+        0x3,
+        0x0,
+        0x5,
+        0x6,
+        0x7,
+        0x4,
+        0x9,
+        0xa,
+        0xb,
+        0x8,
+        0xd,
+        0xe,
+        0xf,
+        0xc,
     };
 
     uint8x16_t v;
@@ -7479,8 +7524,8 @@ FORCE_INLINE __m128i _mm_aesenc_si128(__m128i a, __m128i RoundKey)
     v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_sbox + 0xc0), w - 0xc0);
 
     /* mix columns */
-    w = (v << 1) ^ (uint8x16_t) (((int8x16_t) v >> 7) & 0x1b);
-    w ^= (uint8x16_t) vrev32q_u16((uint16x8_t) v);
+    w = (v << 1) ^ (uint8x16_t)(((int8x16_t)v >> 7) & 0x1b);
+    w ^= (uint8x16_t)vrev32q_u16((uint16x8_t)v);
     w ^= vqtbl1q_u8(v ^ w, vld1q_u8(ror32by8));
 
     /* add round key */
@@ -7493,12 +7538,40 @@ FORCE_INLINE __m128i _mm_aesenc_si128(__m128i a, __m128i RoundKey)
 FORCE_INLINE __m128i _mm_aesdec_si128(__m128i a, __m128i RoundKey)
 {
     static const uint8_t inv_shift_rows[] = {
-        0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb,
-        0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3,
+        0x0,
+        0xd,
+        0xa,
+        0x7,
+        0x4,
+        0x1,
+        0xe,
+        0xb,
+        0x8,
+        0x5,
+        0x2,
+        0xf,
+        0xc,
+        0x9,
+        0x6,
+        0x3,
     };
     static const uint8_t ror32by8[] = {
-        0x1, 0x2, 0x3, 0x0, 0x5, 0x6, 0x7, 0x4,
-        0x9, 0xa, 0xb, 0x8, 0xd, 0xe, 0xf, 0xc,
+        0x1,
+        0x2,
+        0x3,
+        0x0,
+        0x5,
+        0x6,
+        0x7,
+        0x4,
+        0x9,
+        0xa,
+        0xb,
+        0x8,
+        0xd,
+        0xe,
+        0xf,
+        0xc,
     };
 
     uint8x16_t v;
@@ -7515,14 +7588,14 @@ FORCE_INLINE __m128i _mm_aesdec_si128(__m128i a, __m128i RoundKey)
 
     // inverse mix columns
     // multiplying 'v' by 4 in GF(2^8)
-    w = (v << 1) ^ (uint8x16_t) (((int8x16_t) v >> 7) & 0x1b);
-    w = (w << 1) ^ (uint8x16_t) (((int8x16_t) w >> 7) & 0x1b);
+    w = (v << 1) ^ (uint8x16_t)(((int8x16_t)v >> 7) & 0x1b);
+    w = (w << 1) ^ (uint8x16_t)(((int8x16_t)w >> 7) & 0x1b);
     v ^= w;
-    v ^= (uint8x16_t) vrev32q_u16((uint16x8_t) w);
+    v ^= (uint8x16_t)vrev32q_u16((uint16x8_t)w);
 
-    w = (v << 1) ^ (uint8x16_t) (((int8x16_t) v >> 7) &
-                                 0x1b);  // multiplying 'v' by 2 in GF(2^8)
-    w ^= (uint8x16_t) vrev32q_u16((uint16x8_t) v);
+    w = (v << 1) ^ (uint8x16_t)(((int8x16_t)v >> 7) &
+                                0x1b); // multiplying 'v' by 2 in GF(2^8)
+    w ^= (uint8x16_t)vrev32q_u16((uint16x8_t)v);
     w ^= vqtbl1q_u8(v ^ w, vld1q_u8(ror32by8));
 
     // add round key
@@ -7535,8 +7608,22 @@ FORCE_INLINE __m128i _mm_aesdec_si128(__m128i a, __m128i RoundKey)
 FORCE_INLINE __m128i _mm_aesenclast_si128(__m128i a, __m128i RoundKey)
 {
     static const uint8_t shift_rows[] = {
-        0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3,
-        0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb,
+        0x0,
+        0x5,
+        0xa,
+        0xf,
+        0x4,
+        0x9,
+        0xe,
+        0x3,
+        0x8,
+        0xd,
+        0x2,
+        0x7,
+        0xc,
+        0x1,
+        0x6,
+        0xb,
     };
 
     uint8x16_t v;
@@ -7561,8 +7648,22 @@ FORCE_INLINE __m128i _mm_aesenclast_si128(__m128i a, __m128i RoundKey)
 FORCE_INLINE __m128i _mm_aesdeclast_si128(__m128i a, __m128i RoundKey)
 {
     static const uint8_t inv_shift_rows[] = {
-        0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb,
-        0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3,
+        0x0,
+        0xd,
+        0xa,
+        0x7,
+        0x4,
+        0x1,
+        0xe,
+        0xb,
+        0x8,
+        0x5,
+        0x2,
+        0xf,
+        0xc,
+        0x9,
+        0x6,
+        0x3,
     };
 
     uint8x16_t v;
@@ -7586,21 +7687,35 @@ FORCE_INLINE __m128i _mm_aesdeclast_si128(__m128i a, __m128i RoundKey)
 FORCE_INLINE __m128i _mm_aesimc_si128(__m128i a)
 {
     static const uint8_t ror32by8[] = {
-        0x1, 0x2, 0x3, 0x0, 0x5, 0x6, 0x7, 0x4,
-        0x9, 0xa, 0xb, 0x8, 0xd, 0xe, 0xf, 0xc,
+        0x1,
+        0x2,
+        0x3,
+        0x0,
+        0x5,
+        0x6,
+        0x7,
+        0x4,
+        0x9,
+        0xa,
+        0xb,
+        0x8,
+        0xd,
+        0xe,
+        0xf,
+        0xc,
     };
     uint8x16_t v = vreinterpretq_u8_m128i(a);
     uint8x16_t w;
 
     // multiplying 'v' by 4 in GF(2^8)
-    w = (v << 1) ^ (uint8x16_t) (((int8x16_t) v >> 7) & 0x1b);
-    w = (w << 1) ^ (uint8x16_t) (((int8x16_t) w >> 7) & 0x1b);
+    w = (v << 1) ^ (uint8x16_t)(((int8x16_t)v >> 7) & 0x1b);
+    w = (w << 1) ^ (uint8x16_t)(((int8x16_t)w >> 7) & 0x1b);
     v ^= w;
-    v ^= (uint8x16_t) vrev32q_u16((uint16x8_t) w);
+    v ^= (uint8x16_t)vrev32q_u16((uint16x8_t)w);
 
     // multiplying 'v' by 2 in GF(2^8)
-    w = (v << 1) ^ (uint8x16_t) (((int8x16_t) v >> 7) & 0x1b);
-    w ^= (uint8x16_t) vrev32q_u16((uint16x8_t) v);
+    w = (v << 1) ^ (uint8x16_t)(((int8x16_t)v >> 7) & 0x1b);
+    w ^= (uint8x16_t)vrev32q_u16((uint16x8_t)v);
     w ^= vqtbl1q_u8(v ^ w, vld1q_u8(ror32by8));
     return vreinterpretq_m128i_u8(w);
 }
@@ -7696,43 +7811,43 @@ FORCE_INLINE __m128i _mm_aeskeygenassist_si128(__m128i a, const int rcon)
     // AESE does ShiftRows and SubBytes on A
     uint8x16_t u8 = vaeseq_u8(vreinterpretq_u8_m128i(a), vdupq_n_u8(0));
 
-//#if !defined(_MSC_VER) || defined(__clang__)
+    //#if !defined(_MSC_VER) || defined(__clang__)
     uint8x16_t dest = {
         // Undo ShiftRows step from AESE and extract X1 and X3
-        u8[0x4], u8[0x1], u8[0xE], u8[0xB],  // SubBytes(X1)
-        u8[0x1], u8[0xE], u8[0xB], u8[0x4],  // ROT(SubBytes(X1))
-        u8[0xC], u8[0x9], u8[0x6], u8[0x3],  // SubBytes(X3)
-        u8[0x9], u8[0x6], u8[0x3], u8[0xC],  // ROT(SubBytes(X3))
+        u8[0x4], u8[0x1], u8[0xE], u8[0xB], // SubBytes(X1)
+        u8[0x1], u8[0xE], u8[0xB], u8[0x4], // ROT(SubBytes(X1))
+        u8[0xC], u8[0x9], u8[0x6], u8[0x3], // SubBytes(X3)
+        u8[0x9], u8[0x6], u8[0x3], u8[0xC], // ROT(SubBytes(X3))
     };
-    uint32x4_t r = {0, (unsigned) rcon, 0, (unsigned) rcon};
+    uint32x4_t r = {0, (unsigned)rcon, 0, (unsigned)rcon};
     return vreinterpretq_m128i_u8(dest) ^ vreinterpretq_m128i_u32(r);
-//#else
-//    // We have to do this hack because MSVC is strictly adhering to the CPP
-//    // standard, in particular C++03 8.5.1 sub-section 15, which states that
-//    // unions must be initialized by their first member type.
-//
-//    // As per the Windows ARM64 ABI, it is always little endian, so this works
-//    __n128 dest{
-//        ((uint64_t) u8.n128_u8[0x4] << 0) | ((uint64_t) u8.n128_u8[0x1] << 8) |
-//            ((uint64_t) u8.n128_u8[0xE] << 16) |
-//            ((uint64_t) u8.n128_u8[0xB] << 24) |
-//            ((uint64_t) u8.n128_u8[0x1] << 32) |
-//            ((uint64_t) u8.n128_u8[0xE] << 40) |
-//            ((uint64_t) u8.n128_u8[0xB] << 48) |
-//            ((uint64_t) u8.n128_u8[0x4] << 56),
-//        ((uint64_t) u8.n128_u8[0xC] << 0) | ((uint64_t) u8.n128_u8[0x9] << 8) |
-//            ((uint64_t) u8.n128_u8[0x6] << 16) |
-//            ((uint64_t) u8.n128_u8[0x3] << 24) |
-//            ((uint64_t) u8.n128_u8[0x9] << 32) |
-//            ((uint64_t) u8.n128_u8[0x6] << 40) |
-//            ((uint64_t) u8.n128_u8[0x3] << 48) |
-//            ((uint64_t) u8.n128_u8[0xC] << 56)};
-//
-//    dest.n128_u32[1] = dest.n128_u32[1] ^ rcon;
-//    dest.n128_u32[3] = dest.n128_u32[3] ^ rcon;
-//
-//    return dest;
-//#endif
+    //#else
+    //    // We have to do this hack because MSVC is strictly adhering to the CPP
+    //    // standard, in particular C++03 8.5.1 sub-section 15, which states that
+    //    // unions must be initialized by their first member type.
+    //
+    //    // As per the Windows ARM64 ABI, it is always little endian, so this works
+    //    __n128 dest{
+    //        ((uint64_t) u8.n128_u8[0x4] << 0) | ((uint64_t) u8.n128_u8[0x1] << 8) |
+    //            ((uint64_t) u8.n128_u8[0xE] << 16) |
+    //            ((uint64_t) u8.n128_u8[0xB] << 24) |
+    //            ((uint64_t) u8.n128_u8[0x1] << 32) |
+    //            ((uint64_t) u8.n128_u8[0xE] << 40) |
+    //            ((uint64_t) u8.n128_u8[0xB] << 48) |
+    //            ((uint64_t) u8.n128_u8[0x4] << 56),
+    //        ((uint64_t) u8.n128_u8[0xC] << 0) | ((uint64_t) u8.n128_u8[0x9] << 8) |
+    //            ((uint64_t) u8.n128_u8[0x6] << 16) |
+    //            ((uint64_t) u8.n128_u8[0x3] << 24) |
+    //            ((uint64_t) u8.n128_u8[0x9] << 32) |
+    //            ((uint64_t) u8.n128_u8[0x6] << 40) |
+    //            ((uint64_t) u8.n128_u8[0x3] << 48) |
+    //            ((uint64_t) u8.n128_u8[0xC] << 56)};
+    //
+    //    dest.n128_u32[1] = dest.n128_u32[1] ^ rcon;
+    //    dest.n128_u32[3] = dest.n128_u32[3] ^ rcon;
+    //
+    //    return dest;
+    //#endif
 }
 #endif
 
@@ -7746,20 +7861,20 @@ FORCE_INLINE __m128i _mm_clmulepi64_si128(__m128i _a, __m128i _b, const int imm)
     uint64x2_t a = vreinterpretq_u64_m128i(_a);
     uint64x2_t b = vreinterpretq_u64_m128i(_b);
     switch (imm & 0x11) {
-    case 0x00:
-        return vreinterpretq_m128i_u64(
-            _sse2neon_vmull_p64(vget_low_u64(a), vget_low_u64(b)));
-    case 0x01:
-        return vreinterpretq_m128i_u64(
-            _sse2neon_vmull_p64(vget_high_u64(a), vget_low_u64(b)));
-    case 0x10:
-        return vreinterpretq_m128i_u64(
-            _sse2neon_vmull_p64(vget_low_u64(a), vget_high_u64(b)));
-    case 0x11:
-        return vreinterpretq_m128i_u64(
-            _sse2neon_vmull_p64(vget_high_u64(a), vget_high_u64(b)));
-    default:
-        abort();
+        case 0x00:
+            return vreinterpretq_m128i_u64(
+                _sse2neon_vmull_p64(vget_low_u64(a), vget_low_u64(b)));
+        case 0x01:
+            return vreinterpretq_m128i_u64(
+                _sse2neon_vmull_p64(vget_high_u64(a), vget_low_u64(b)));
+        case 0x10:
+            return vreinterpretq_m128i_u64(
+                _sse2neon_vmull_p64(vget_low_u64(a), vget_high_u64(b)));
+        case 0x11:
+            return vreinterpretq_m128i_u64(
+                _sse2neon_vmull_p64(vget_high_u64(a), vget_high_u64(b)));
+        default:
+            abort();
     }
 }
 
@@ -7785,7 +7900,7 @@ FORCE_INLINE int _mm_popcnt_u32(unsigned int a)
 //#elif defined(_MSC_VER)
 //    return _CountOneBits(a);
 #else
-    return (int) vaddlv_u8(vcnt_u8(vcreate_u8((uint64_t) a)));
+    return (int)vaddlv_u8(vcnt_u8(vcreate_u8((uint64_t)a)));
 #endif
 }
 
@@ -7799,7 +7914,7 @@ FORCE_INLINE int64_t _mm_popcnt_u64(uint64_t a)
 //#elif defined(_MSC_VER)
 //    return _CountOneBits64(a);
 #else
-    return (int64_t) vaddlv_u8(vcnt_u8(vcreate_u8(a)));
+    return (int64_t)vaddlv_u8(vcnt_u8(vcreate_u8(a)));
 #endif
 }
 
@@ -7832,11 +7947,11 @@ FORCE_INLINE uint64_t _rdtsc(void)
      * bits wide and it is attributed with the flag 'cap_user_time_short'
      * is true.
      */
-//#if defined(_MSC_VER) && !defined(__clang__)
-//    val = _ReadStatusReg(ARM64_SYSREG(3, 3, 14, 0, 2));
-//#else
+    //#if defined(_MSC_VER) && !defined(__clang__)
+    //    val = _ReadStatusReg(ARM64_SYSREG(3, 3, 14, 0, 2));
+    //#else
     __asm__ __volatile__("mrs %0, cntvct_el0" : "=r"(val));
-//#endif
+    //#endif
 
     return val;
 }
diff --git a/libkram/vectormath/sse2neon.h b/libkram/vectormath/sse2neon.h
index 5376a3f7..79a6c38e 100644
--- a/libkram/vectormath/sse2neon.h
+++ b/libkram/vectormath/sse2neon.h
@@ -6852,7 +6852,7 @@ static int _sse2neon_cmp_word_equal_each(__m128i a, int la, __m128i b, int lb)
                 vbslq_u##size(vec1, vec_minusone, vec_zero));                  \
         }                                                                      \
         unsigned SSE2NEON_IIF(data_type)(char, short) *ptr =                   \
-            (unsigned SSE2NEON_IIF(data_type)(char, short) *) mtx;             \
+            (unsigned SSE2NEON_IIF(data_type)(char, short) *)mtx;              \
         for (int i = 0; i < bound; i++) {                                      \
             int val = 1;                                                       \
             for (int j = 0, k = i; j < bound - i && k < bound; j++, k++)       \
@@ -6914,14 +6914,14 @@ static cmpestr_func_t _sse2neon_cmpfunc_table[] = {
 FORCE_INLINE int _sse2neon_sido_negative(int res, int lb, int imm8, int bound)
 {
     switch (imm8 & 0x30) {
-    case _SIDD_NEGATIVE_POLARITY:
-        res ^= 0xffffffff;
-        break;
-    case _SIDD_MASKED_NEGATIVE_POLARITY:
-        res ^= (1 << lb) - 1;
-        break;
-    default:
-        break;
+        case _SIDD_NEGATIVE_POLARITY:
+            res ^= 0xffffffff;
+            break;
+        case _SIDD_MASKED_NEGATIVE_POLARITY:
+            res ^= (1 << lb) - 1;
+            break;
+        default:
+            break;
     }
 
     return res & ((bound == 8) ? 0xFF : 0xFFFF);
@@ -6929,45 +6929,45 @@ FORCE_INLINE int _sse2neon_sido_negative(int res, int lb, int imm8, int bound)
 
 FORCE_INLINE int _sse2neon_clz(unsigned int x)
 {
-//#if defined(_MSC_VER) && !defined(__clang__)
-//    unsigned long cnt = 0;
-//    if (_BitScanReverse(&cnt, x))
-//        return 31 - cnt;
-//    return 32;
-//#else
+    //#if defined(_MSC_VER) && !defined(__clang__)
+    //    unsigned long cnt = 0;
+    //    if (_BitScanReverse(&cnt, x))
+    //        return 31 - cnt;
+    //    return 32;
+    //#else
     return x != 0 ? __builtin_clz(x) : 32;
-//#endif
+    //#endif
 }
 
 FORCE_INLINE int _sse2neon_ctz(unsigned int x)
 {
-//#if defined(_MSC_VER) && !defined(__clang__)
-//    unsigned long cnt = 0;
-//    if (_BitScanForward(&cnt, x))
-//        return cnt;
-//    return 32;
-//#else
+    //#if defined(_MSC_VER) && !defined(__clang__)
+    //    unsigned long cnt = 0;
+    //    if (_BitScanForward(&cnt, x))
+    //        return cnt;
+    //    return 32;
+    //#else
     return x != 0 ? __builtin_ctz(x) : 32;
-//#endif
+    //#endif
 }
 
 FORCE_INLINE int _sse2neon_ctzll(unsigned long long x)
 {
-//#ifdef _MSC_VER
-//    unsigned long cnt;
-//#if defined(SSE2NEON_HAS_BITSCAN64)
-//    if (_BitScanForward64(&cnt, x))
-//        return (int) (cnt);
-//#else
-//    if (_BitScanForward(&cnt, (unsigned long) (x)))
-//        return (int) cnt;
-//    if (_BitScanForward(&cnt, (unsigned long) (x >> 32)))
-//        return (int) (cnt + 32);
-//#endif /* SSE2NEON_HAS_BITSCAN64 */
-//    return 64;
-//#else /* assume GNU compatible compilers */
+    //#ifdef _MSC_VER
+    //    unsigned long cnt;
+    //#if defined(SSE2NEON_HAS_BITSCAN64)
+    //    if (_BitScanForward64(&cnt, x))
+    //        return (int) (cnt);
+    //#else
+    //    if (_BitScanForward(&cnt, (unsigned long) (x)))
+    //        return (int) cnt;
+    //    if (_BitScanForward(&cnt, (unsigned long) (x >> 32)))
+    //        return (int) (cnt + 32);
+    //#endif /* SSE2NEON_HAS_BITSCAN64 */
+    //    return 64;
+    //#else /* assume GNU compatible compilers */
     return x != 0 ? __builtin_ctzll(x) : 64;
-//#endif
+    //#endif
 }
 
 #define SSE2NEON_MIN(x, y) (x) < (y) ? (x) : (y)
@@ -7007,7 +7007,8 @@ FORCE_INLINE int _sse2neon_ctzll(unsigned long long x)
                                        vld1q_u16(_sse2neon_cmpestr_mask16b));  \
             dst = vreinterpretq_m128i_u16(vbslq_u16(                           \
                 tmp, vdupq_n_u16(-1), vreinterpretq_u16_m128i(dst)));          \
-        } else {                                                               \
+        }                                                                      \
+        else {                                                                 \
             uint8x16_t vec_r2 =                                                \
                 vcombine_u8(vdup_n_u8(r2), vdup_n_u8(r2 >> 8));                \
             uint8x16_t tmp =                                                   \
@@ -7015,11 +7016,13 @@ FORCE_INLINE int _sse2neon_ctzll(unsigned long long x)
             dst = vreinterpretq_m128i_u8(                                      \
                 vbslq_u8(tmp, vdupq_n_u8(-1), vreinterpretq_u8_m128i(dst)));   \
         }                                                                      \
-    } else {                                                                   \
+    }                                                                          \
+    else {                                                                     \
         if (bound == 16) {                                                     \
             dst = vreinterpretq_m128i_u16(                                     \
                 vsetq_lane_u16(r2 & 0xffff, vreinterpretq_u16_m128i(dst), 0)); \
-        } else {                                                               \
+        }                                                                      \
+        else {                                                                 \
             dst = vreinterpretq_m128i_u8(                                      \
                 vsetq_lane_u8(r2 & 0xff, vreinterpretq_u8_m128i(dst), 0));     \
         }                                                                      \
@@ -7099,9 +7102,9 @@ FORCE_INLINE int _mm_cmpestrs(__m128i a,
                               int lb,
                               const int imm8)
 {
-    (void) a;
-    (void) b;
-    (void) lb;
+    (void)a;
+    (void)b;
+    (void)lb;
     SSE2NEON_CMPSTR_SET_UPPER(bound, imm8);
     return la <= (bound - 1);
 }
@@ -7115,9 +7118,9 @@ FORCE_INLINE int _mm_cmpestrz(__m128i a,
                               int lb,
                               const int imm8)
 {
-    (void) a;
-    (void) b;
-    (void) la;
+    (void)a;
+    (void)b;
+    (void)la;
     SSE2NEON_CMPSTR_SET_UPPER(bound, imm8);
     return lb <= (bound - 1);
 }
@@ -7131,7 +7134,8 @@ FORCE_INLINE int _mm_cmpestrz(__m128i a,
             uint64_t matches_##str =                                     \
                 vget_lane_u64(vreinterpret_u64_u8(res_##str), 0);        \
             len = _sse2neon_ctzll(matches_##str) >> 3;                   \
-        } else {                                                         \
+        }                                                                \
+        else {                                                           \
             uint16x8_t equal_mask_##str = vreinterpretq_u16_u8(          \
                 vceqq_u8(vreinterpretq_u8_m128i(str), vdupq_n_u8(0)));   \
             uint8x8_t res_##str = vshrn_n_u16(equal_mask_##str, 4);      \
@@ -7199,7 +7203,7 @@ FORCE_INLINE int _mm_cmpistro(__m128i a, __m128i b, const int imm8)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistrs
 FORCE_INLINE int _mm_cmpistrs(__m128i a, __m128i b, const int imm8)
 {
-    (void) b;
+    (void)b;
     SSE2NEON_CMPSTR_SET_UPPER(bound, imm8);
     int la;
     SSE2NEON_CMPISTRX_LENGTH(a, la, imm8);
@@ -7211,7 +7215,7 @@ FORCE_INLINE int _mm_cmpistrs(__m128i a, __m128i b, const int imm8)
 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistrz
 FORCE_INLINE int _mm_cmpistrz(__m128i a, __m128i b, const int imm8)
 {
-    (void) a;
+    (void)a;
     SSE2NEON_CMPSTR_SET_UPPER(bound, imm8);
     int lb;
     SSE2NEON_CMPISTRX_LENGTH(b, lb, imm8);
@@ -7274,10 +7278,10 @@ FORCE_INLINE uint64_t _mm_crc32_u64(uint64_t crc, uint64_t v)
                          : [c] "+r"(crc)
                          : [v] "r"(v));
 #elif (defined(_M_ARM64) && !defined(__clang__))
-    crc = __crc32cd((uint32_t) crc, v);
+    crc = __crc32cd((uint32_t)crc, v);
 #else
-    crc = _mm_crc32_u32((uint32_t) (crc), v & 0xffffffff);
-    crc = _mm_crc32_u32((uint32_t) (crc), (v >> 32) & 0xffffffff);
+    crc = _mm_crc32_u32((uint32_t)(crc), v & 0xffffffff);
+    crc = _mm_crc32_u32((uint32_t)(crc), (v >> 32) & 0xffffffff);
 #endif
     return crc;
 }
@@ -7300,7 +7304,7 @@ FORCE_INLINE uint32_t _mm_crc32_u8(uint32_t crc, uint8_t v)
     // Adapted from: https://mary.rs/lab/crc32/
     // Barrent reduction
     uint64x2_t orig =
-        vcombine_u64(vcreate_u64((uint64_t) (crc) << 24), vcreate_u64(0x0));
+        vcombine_u64(vcreate_u64((uint64_t)(crc) << 24), vcreate_u64(0x0));
     uint64x2_t tmp = orig;
 
     // Polynomial P(x) of CRC32C
@@ -7321,7 +7325,7 @@ FORCE_INLINE uint32_t _mm_crc32_u8(uint32_t crc, uint8_t v)
 
     // Extract the 'lower' (in bit-reflected sense) 32 bits
     crc = vgetq_lane_u32(vreinterpretq_u32_u64(tmp), 1);
-#else  // Fall back to the generic table lookup approach
+#else // Fall back to the generic table lookup approach
     // Adapted from: https://create.stephan-brumme.com/crc32/
     // Apply half-byte comparison algorithm for the best ratio between
     // performance and lookup table.
@@ -7329,9 +7333,22 @@ FORCE_INLINE uint32_t _mm_crc32_u8(uint32_t crc, uint8_t v)
     // The lookup table just needs to store every 16th entry
     // of the standard look-up table.
     static const uint32_t crc32_half_byte_tbl[] = {
-        0x00000000, 0x105ec76f, 0x20bd8ede, 0x30e349b1, 0x417b1dbc, 0x5125dad3,
-        0x61c69362, 0x7198540d, 0x82f63b78, 0x92a8fc17, 0xa24bb5a6, 0xb21572c9,
-        0xc38d26c4, 0xd3d3e1ab, 0xe330a81a, 0xf36e6f75,
+        0x00000000,
+        0x105ec76f,
+        0x20bd8ede,
+        0x30e349b1,
+        0x417b1dbc,
+        0x5125dad3,
+        0x61c69362,
+        0x7198540d,
+        0x82f63b78,
+        0x92a8fc17,
+        0xa24bb5a6,
+        0xb21572c9,
+        0xc38d26c4,
+        0xd3d3e1ab,
+        0xe330a81a,
+        0xf36e6f75,
     };
 
     crc = (crc >> 4) ^ crc32_half_byte_tbl[crc & 0x0F];
@@ -7452,12 +7469,40 @@ static const uint8_t _sse2neon_rsbox[256] = SSE2NEON_AES_RSBOX(SSE2NEON_AES_H0);
 FORCE_INLINE __m128i _mm_aesenc_si128(__m128i a, __m128i RoundKey)
 {
     static const uint8_t shift_rows[] = {
-        0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3,
-        0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb,
+        0x0,
+        0x5,
+        0xa,
+        0xf,
+        0x4,
+        0x9,
+        0xe,
+        0x3,
+        0x8,
+        0xd,
+        0x2,
+        0x7,
+        0xc,
+        0x1,
+        0x6,
+        0xb,
     };
     static const uint8_t ror32by8[] = {
-        0x1, 0x2, 0x3, 0x0, 0x5, 0x6, 0x7, 0x4,
-        0x9, 0xa, 0xb, 0x8, 0xd, 0xe, 0xf, 0xc,
+        0x1,
+        0x2,
+        0x3,
+        0x0,
+        0x5,
+        0x6,
+        0x7,
+        0x4,
+        0x9,
+        0xa,
+        0xb,
+        0x8,
+        0xd,
+        0xe,
+        0xf,
+        0xc,
     };
 
     uint8x16_t v;
@@ -7479,8 +7524,8 @@ FORCE_INLINE __m128i _mm_aesenc_si128(__m128i a, __m128i RoundKey)
     v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_sbox + 0xc0), w - 0xc0);
 
     /* mix columns */
-    w = (v << 1) ^ (uint8x16_t) (((int8x16_t) v >> 7) & 0x1b);
-    w ^= (uint8x16_t) vrev32q_u16((uint16x8_t) v);
+    w = (v << 1) ^ (uint8x16_t)(((int8x16_t)v >> 7) & 0x1b);
+    w ^= (uint8x16_t)vrev32q_u16((uint16x8_t)v);
     w ^= vqtbl1q_u8(v ^ w, vld1q_u8(ror32by8));
 
     /* add round key */
@@ -7493,12 +7538,40 @@ FORCE_INLINE __m128i _mm_aesenc_si128(__m128i a, __m128i RoundKey)
 FORCE_INLINE __m128i _mm_aesdec_si128(__m128i a, __m128i RoundKey)
 {
     static const uint8_t inv_shift_rows[] = {
-        0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb,
-        0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3,
+        0x0,
+        0xd,
+        0xa,
+        0x7,
+        0x4,
+        0x1,
+        0xe,
+        0xb,
+        0x8,
+        0x5,
+        0x2,
+        0xf,
+        0xc,
+        0x9,
+        0x6,
+        0x3,
     };
     static const uint8_t ror32by8[] = {
-        0x1, 0x2, 0x3, 0x0, 0x5, 0x6, 0x7, 0x4,
-        0x9, 0xa, 0xb, 0x8, 0xd, 0xe, 0xf, 0xc,
+        0x1,
+        0x2,
+        0x3,
+        0x0,
+        0x5,
+        0x6,
+        0x7,
+        0x4,
+        0x9,
+        0xa,
+        0xb,
+        0x8,
+        0xd,
+        0xe,
+        0xf,
+        0xc,
     };
 
     uint8x16_t v;
@@ -7515,14 +7588,14 @@ FORCE_INLINE __m128i _mm_aesdec_si128(__m128i a, __m128i RoundKey)
 
     // inverse mix columns
     // multiplying 'v' by 4 in GF(2^8)
-    w = (v << 1) ^ (uint8x16_t) (((int8x16_t) v >> 7) & 0x1b);
-    w = (w << 1) ^ (uint8x16_t) (((int8x16_t) w >> 7) & 0x1b);
+    w = (v << 1) ^ (uint8x16_t)(((int8x16_t)v >> 7) & 0x1b);
+    w = (w << 1) ^ (uint8x16_t)(((int8x16_t)w >> 7) & 0x1b);
     v ^= w;
-    v ^= (uint8x16_t) vrev32q_u16((uint16x8_t) w);
+    v ^= (uint8x16_t)vrev32q_u16((uint16x8_t)w);
 
-    w = (v << 1) ^ (uint8x16_t) (((int8x16_t) v >> 7) &
-                                 0x1b);  // multiplying 'v' by 2 in GF(2^8)
-    w ^= (uint8x16_t) vrev32q_u16((uint16x8_t) v);
+    w = (v << 1) ^ (uint8x16_t)(((int8x16_t)v >> 7) &
+                                0x1b); // multiplying 'v' by 2 in GF(2^8)
+    w ^= (uint8x16_t)vrev32q_u16((uint16x8_t)v);
     w ^= vqtbl1q_u8(v ^ w, vld1q_u8(ror32by8));
 
     // add round key
@@ -7535,8 +7608,22 @@ FORCE_INLINE __m128i _mm_aesdec_si128(__m128i a, __m128i RoundKey)
 FORCE_INLINE __m128i _mm_aesenclast_si128(__m128i a, __m128i RoundKey)
 {
     static const uint8_t shift_rows[] = {
-        0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3,
-        0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb,
+        0x0,
+        0x5,
+        0xa,
+        0xf,
+        0x4,
+        0x9,
+        0xe,
+        0x3,
+        0x8,
+        0xd,
+        0x2,
+        0x7,
+        0xc,
+        0x1,
+        0x6,
+        0xb,
     };
 
     uint8x16_t v;
@@ -7561,8 +7648,22 @@ FORCE_INLINE __m128i _mm_aesenclast_si128(__m128i a, __m128i RoundKey)
 FORCE_INLINE __m128i _mm_aesdeclast_si128(__m128i a, __m128i RoundKey)
 {
     static const uint8_t inv_shift_rows[] = {
-        0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb,
-        0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3,
+        0x0,
+        0xd,
+        0xa,
+        0x7,
+        0x4,
+        0x1,
+        0xe,
+        0xb,
+        0x8,
+        0x5,
+        0x2,
+        0xf,
+        0xc,
+        0x9,
+        0x6,
+        0x3,
     };
 
     uint8x16_t v;
@@ -7586,21 +7687,35 @@ FORCE_INLINE __m128i _mm_aesdeclast_si128(__m128i a, __m128i RoundKey)
 FORCE_INLINE __m128i _mm_aesimc_si128(__m128i a)
 {
     static const uint8_t ror32by8[] = {
-        0x1, 0x2, 0x3, 0x0, 0x5, 0x6, 0x7, 0x4,
-        0x9, 0xa, 0xb, 0x8, 0xd, 0xe, 0xf, 0xc,
+        0x1,
+        0x2,
+        0x3,
+        0x0,
+        0x5,
+        0x6,
+        0x7,
+        0x4,
+        0x9,
+        0xa,
+        0xb,
+        0x8,
+        0xd,
+        0xe,
+        0xf,
+        0xc,
     };
     uint8x16_t v = vreinterpretq_u8_m128i(a);
     uint8x16_t w;
 
     // multiplying 'v' by 4 in GF(2^8)
-    w = (v << 1) ^ (uint8x16_t) (((int8x16_t) v >> 7) & 0x1b);
-    w = (w << 1) ^ (uint8x16_t) (((int8x16_t) w >> 7) & 0x1b);
+    w = (v << 1) ^ (uint8x16_t)(((int8x16_t)v >> 7) & 0x1b);
+    w = (w << 1) ^ (uint8x16_t)(((int8x16_t)w >> 7) & 0x1b);
     v ^= w;
-    v ^= (uint8x16_t) vrev32q_u16((uint16x8_t) w);
+    v ^= (uint8x16_t)vrev32q_u16((uint16x8_t)w);
 
     // multiplying 'v' by 2 in GF(2^8)
-    w = (v << 1) ^ (uint8x16_t) (((int8x16_t) v >> 7) & 0x1b);
-    w ^= (uint8x16_t) vrev32q_u16((uint16x8_t) v);
+    w = (v << 1) ^ (uint8x16_t)(((int8x16_t)v >> 7) & 0x1b);
+    w ^= (uint8x16_t)vrev32q_u16((uint16x8_t)v);
     w ^= vqtbl1q_u8(v ^ w, vld1q_u8(ror32by8));
     return vreinterpretq_m128i_u8(w);
 }
@@ -7696,43 +7811,43 @@ FORCE_INLINE __m128i _mm_aeskeygenassist_si128(__m128i a, const int rcon)
     // AESE does ShiftRows and SubBytes on A
     uint8x16_t u8 = vaeseq_u8(vreinterpretq_u8_m128i(a), vdupq_n_u8(0));
 
-//#if !defined(_MSC_VER) || defined(__clang__)
+    //#if !defined(_MSC_VER) || defined(__clang__)
     uint8x16_t dest = {
         // Undo ShiftRows step from AESE and extract X1 and X3
-        u8[0x4], u8[0x1], u8[0xE], u8[0xB],  // SubBytes(X1)
-        u8[0x1], u8[0xE], u8[0xB], u8[0x4],  // ROT(SubBytes(X1))
-        u8[0xC], u8[0x9], u8[0x6], u8[0x3],  // SubBytes(X3)
-        u8[0x9], u8[0x6], u8[0x3], u8[0xC],  // ROT(SubBytes(X3))
+        u8[0x4], u8[0x1], u8[0xE], u8[0xB], // SubBytes(X1)
+        u8[0x1], u8[0xE], u8[0xB], u8[0x4], // ROT(SubBytes(X1))
+        u8[0xC], u8[0x9], u8[0x6], u8[0x3], // SubBytes(X3)
+        u8[0x9], u8[0x6], u8[0x3], u8[0xC], // ROT(SubBytes(X3))
     };
-    uint32x4_t r = {0, (unsigned) rcon, 0, (unsigned) rcon};
+    uint32x4_t r = {0, (unsigned)rcon, 0, (unsigned)rcon};
     return vreinterpretq_m128i_u8(dest) ^ vreinterpretq_m128i_u32(r);
-//#else
-//    // We have to do this hack because MSVC is strictly adhering to the CPP
-//    // standard, in particular C++03 8.5.1 sub-section 15, which states that
-//    // unions must be initialized by their first member type.
-//
-//    // As per the Windows ARM64 ABI, it is always little endian, so this works
-//    __n128 dest{
-//        ((uint64_t) u8.n128_u8[0x4] << 0) | ((uint64_t) u8.n128_u8[0x1] << 8) |
-//            ((uint64_t) u8.n128_u8[0xE] << 16) |
-//            ((uint64_t) u8.n128_u8[0xB] << 24) |
-//            ((uint64_t) u8.n128_u8[0x1] << 32) |
-//            ((uint64_t) u8.n128_u8[0xE] << 40) |
-//            ((uint64_t) u8.n128_u8[0xB] << 48) |
-//            ((uint64_t) u8.n128_u8[0x4] << 56),
-//        ((uint64_t) u8.n128_u8[0xC] << 0) | ((uint64_t) u8.n128_u8[0x9] << 8) |
-//            ((uint64_t) u8.n128_u8[0x6] << 16) |
-//            ((uint64_t) u8.n128_u8[0x3] << 24) |
-//            ((uint64_t) u8.n128_u8[0x9] << 32) |
-//            ((uint64_t) u8.n128_u8[0x6] << 40) |
-//            ((uint64_t) u8.n128_u8[0x3] << 48) |
-//            ((uint64_t) u8.n128_u8[0xC] << 56)};
-//
-//    dest.n128_u32[1] = dest.n128_u32[1] ^ rcon;
-//    dest.n128_u32[3] = dest.n128_u32[3] ^ rcon;
-//
-//    return dest;
-//#endif
+    //#else
+    //    // We have to do this hack because MSVC is strictly adhering to the CPP
+    //    // standard, in particular C++03 8.5.1 sub-section 15, which states that
+    //    // unions must be initialized by their first member type.
+    //
+    //    // As per the Windows ARM64 ABI, it is always little endian, so this works
+    //    __n128 dest{
+    //        ((uint64_t) u8.n128_u8[0x4] << 0) | ((uint64_t) u8.n128_u8[0x1] << 8) |
+    //            ((uint64_t) u8.n128_u8[0xE] << 16) |
+    //            ((uint64_t) u8.n128_u8[0xB] << 24) |
+    //            ((uint64_t) u8.n128_u8[0x1] << 32) |
+    //            ((uint64_t) u8.n128_u8[0xE] << 40) |
+    //            ((uint64_t) u8.n128_u8[0xB] << 48) |
+    //            ((uint64_t) u8.n128_u8[0x4] << 56),
+    //        ((uint64_t) u8.n128_u8[0xC] << 0) | ((uint64_t) u8.n128_u8[0x9] << 8) |
+    //            ((uint64_t) u8.n128_u8[0x6] << 16) |
+    //            ((uint64_t) u8.n128_u8[0x3] << 24) |
+    //            ((uint64_t) u8.n128_u8[0x9] << 32) |
+    //            ((uint64_t) u8.n128_u8[0x6] << 40) |
+    //            ((uint64_t) u8.n128_u8[0x3] << 48) |
+    //            ((uint64_t) u8.n128_u8[0xC] << 56)};
+    //
+    //    dest.n128_u32[1] = dest.n128_u32[1] ^ rcon;
+    //    dest.n128_u32[3] = dest.n128_u32[3] ^ rcon;
+    //
+    //    return dest;
+    //#endif
 }
 #endif
 
@@ -7746,20 +7861,20 @@ FORCE_INLINE __m128i _mm_clmulepi64_si128(__m128i _a, __m128i _b, const int imm)
     uint64x2_t a = vreinterpretq_u64_m128i(_a);
     uint64x2_t b = vreinterpretq_u64_m128i(_b);
     switch (imm & 0x11) {
-    case 0x00:
-        return vreinterpretq_m128i_u64(
-            _sse2neon_vmull_p64(vget_low_u64(a), vget_low_u64(b)));
-    case 0x01:
-        return vreinterpretq_m128i_u64(
-            _sse2neon_vmull_p64(vget_high_u64(a), vget_low_u64(b)));
-    case 0x10:
-        return vreinterpretq_m128i_u64(
-            _sse2neon_vmull_p64(vget_low_u64(a), vget_high_u64(b)));
-    case 0x11:
-        return vreinterpretq_m128i_u64(
-            _sse2neon_vmull_p64(vget_high_u64(a), vget_high_u64(b)));
-    default:
-        abort();
+        case 0x00:
+            return vreinterpretq_m128i_u64(
+                _sse2neon_vmull_p64(vget_low_u64(a), vget_low_u64(b)));
+        case 0x01:
+            return vreinterpretq_m128i_u64(
+                _sse2neon_vmull_p64(vget_high_u64(a), vget_low_u64(b)));
+        case 0x10:
+            return vreinterpretq_m128i_u64(
+                _sse2neon_vmull_p64(vget_low_u64(a), vget_high_u64(b)));
+        case 0x11:
+            return vreinterpretq_m128i_u64(
+                _sse2neon_vmull_p64(vget_high_u64(a), vget_high_u64(b)));
+        default:
+            abort();
     }
 }
 
@@ -7785,7 +7900,7 @@ FORCE_INLINE int _mm_popcnt_u32(unsigned int a)
 //#elif defined(_MSC_VER)
 //    return _CountOneBits(a);
 #else
-    return (int) vaddlv_u8(vcnt_u8(vcreate_u8((uint64_t) a)));
+    return (int)vaddlv_u8(vcnt_u8(vcreate_u8((uint64_t)a)));
 #endif
 }
 
@@ -7799,7 +7914,7 @@ FORCE_INLINE int64_t _mm_popcnt_u64(uint64_t a)
 //#elif defined(_MSC_VER)
 //    return _CountOneBits64(a);
 #else
-    return (int64_t) vaddlv_u8(vcnt_u8(vcreate_u8(a)));
+    return (int64_t)vaddlv_u8(vcnt_u8(vcreate_u8(a)));
 #endif
 }
 
@@ -7832,11 +7947,11 @@ FORCE_INLINE uint64_t _rdtsc(void)
      * bits wide and it is attributed with the flag 'cap_user_time_short'
      * is true.
      */
-//#if defined(_MSC_VER) && !defined(__clang__)
-//    val = _ReadStatusReg(ARM64_SYSREG(3, 3, 14, 0, 2));
-//#else
+    //#if defined(_MSC_VER) && !defined(__clang__)
+    //    val = _ReadStatusReg(ARM64_SYSREG(3, 3, 14, 0, 2));
+    //#else
     __asm__ __volatile__("mrs %0, cntvct_el0" : "=r"(val));
-//#endif
+    //#endif
 
     return val;
 }
diff --git a/libkram/vectormath/vectormath234.cpp b/libkram/vectormath/vectormath234.cpp
index 61d09695..1e4acd7d 100644
--- a/libkram/vectormath/vectormath234.cpp
+++ b/libkram/vectormath/vectormath234.cpp
@@ -152,8 +152,6 @@
 // clang-format on
 #endif // SIMD_ACCELERATE_MATH
 
-
-
 namespace SIMD_NAMESPACE {
 
 void TestCalls()
@@ -161,11 +159,10 @@ void TestCalls()
 #if SIMD_FLOAT
     float4a va = 0;
     float4p vp = (float)1;
-    
+
     va = vp;
     vp = va;
 #endif
-    
 }
 
 // Check format arguments.
@@ -179,10 +176,10 @@ inline string format(const char* format, ...) __printflike(1, 2);
 inline string format(const char* format, ...)
 {
     string str;
-    
+
     va_list args;
     va_start(args, format);
-    
+
     // format once to get length (without NULL at end)
     va_list argsCopy;
     va_copy(argsCopy, args);
@@ -192,36 +189,42 @@ inline string format(const char* format, ...)
     // replace string
     str.resize(len, 0);
     vsnprintf((char*)str.c_str(), len + 1, format, args);
-    
+
     va_end(args);
-    
+
     return str;
 }
 
 #if SIMD_DOUBLE
 
-string vecf::str(double2 v) const {
+string vecf::str(double2 v) const
+{
     return format("(%f %f)", v.x, v.y);
 }
-string vecf::str(double3 v) const {
+string vecf::str(double3 v) const
+{
     return format("(%f %f %f)", v.x, v.y, v.z);
 }
-string vecf::str(double4 v) const {
+string vecf::str(double4 v) const
+{
     return format("(%f %f %f %f)", v.x, v.y, v.z, v.w);
 }
- 
-string vecf::str(const double2x2& m) const {
+
+string vecf::str(const double2x2& m) const
+{
     return format("%s\n%s\n",
-        str(m[0]).c_str(), str(m[1]).c_str());
+                  str(m[0]).c_str(), str(m[1]).c_str());
 }
-string vecf::str(const double3x3& m) const {
+string vecf::str(const double3x3& m) const
+{
     return format("%s\n%s\n%s\n",
-        str(m[0]).c_str(), str(m[1]).c_str(), str(m[2]).c_str());
+                  str(m[0]).c_str(), str(m[1]).c_str(), str(m[2]).c_str());
 }
-string vecf::str(const double4x4& m) const {
-  return format("%s\n%s\n%s\n%s\n",
-      str(m[0]).c_str(), str(m[1]).c_str(),
-      str(m[2]).c_str(), str(m[3]).c_str());
+string vecf::str(const double4x4& m) const
+{
+    return format("%s\n%s\n%s\n%s\n",
+                  str(m[0]).c_str(), str(m[1]).c_str(),
+                  str(m[2]).c_str(), str(m[3]).c_str());
 }
 
 #endif
@@ -230,28 +233,34 @@ string vecf::str(const double4x4& m) const {
 
 #if SIMD_FLOAT
 
-string vecf::str(float2 v) const {
+string vecf::str(float2 v) const
+{
     return format("(%f %f)", v.x, v.y);
 }
-string vecf::str(float3 v) const {
+string vecf::str(float3 v) const
+{
     return format("(%f %f %f)", v.x, v.y, v.z);
 }
-string vecf::str(float4 v) const {
+string vecf::str(float4 v) const
+{
     return format("(%f %f %f %f)", v.x, v.y, v.z, v.w);
 }
- 
-string vecf::str(const float2x2& m) const {
+
+string vecf::str(const float2x2& m) const
+{
     return format("%s\n%s\n",
-        str(m[0]).c_str(), str(m[1]).c_str());
+                  str(m[0]).c_str(), str(m[1]).c_str());
 }
-string vecf::str(const float3x3& m) const {
+string vecf::str(const float3x3& m) const
+{
     return format("%s\n%s\n%s\n",
-        str(m[0]).c_str(), str(m[1]).c_str(), str(m[2]).c_str());
+                  str(m[0]).c_str(), str(m[1]).c_str(), str(m[2]).c_str());
 }
-string vecf::str(const float4x4& m) const {
-  return format("%s\n%s\n%s\n%s\n",
-      str(m[0]).c_str(), str(m[1]).c_str(),
-      str(m[2]).c_str(), str(m[3]).c_str());
+string vecf::str(const float4x4& m) const
+{
+    return format("%s\n%s\n%s\n%s\n",
+                  str(m[0]).c_str(), str(m[1]).c_str(),
+                  str(m[2]).c_str(), str(m[3]).c_str());
 }
 
 #endif // SIMD_FLOAT
@@ -260,28 +269,34 @@ string vecf::str(const float4x4& m) const {
 
 #if SIMD_HALF_FLOAT16
 
-string vecf::str(half2 v) const {
+string vecf::str(half2 v) const
+{
     return format("(%f %f)", (double)v.x, (double)v.y);
 }
-string vecf::str(half3 v) const {
+string vecf::str(half3 v) const
+{
     return format("(%f %f %f)", (double)v.x, (double)v.y, (double)v.z);
 }
-string vecf::str(half4 v) const {
+string vecf::str(half4 v) const
+{
     return format("(%f %f %f %f)", (double)v.x, (double)v.y, (double)v.z, (double)v.w);
 }
 
 #elif SIMD_HALF4_ONLY
 
 // this converts half4 to float, then just prints that
-string vecf::str(half2 v) const {
+string vecf::str(half2 v) const
+{
     float4 vv = float4m(zeroext(v));
     return format("(%f %f)", vv.x, vv.y);
 }
-string vecf::str(half3 v) const {
+string vecf::str(half3 v) const
+{
     float4 vv = float4m(zeroext(v));
     return format("(%f %f %f)", vv.x, vv.y, vv.z);
 }
-string vecf::str(half4 v) const {
+string vecf::str(half4 v) const
+{
     float4 vv = float4m(v);
     return format("(%f %f %f %f)", vv.x, vv.y, vv.z, vv.w);
 }
@@ -291,13 +306,16 @@ string vecf::str(half4 v) const {
 #endif // SIMD_HALF
 
 #if SIMD_INT
-string vecf::str(int2 v) const {
+string vecf::str(int2 v) const
+{
     return format("(%d %d)", v.x, v.y);
 }
-string vecf::str(int3 v) const {
+string vecf::str(int3 v) const
+{
     return format("(%d %d %d)", v.x, v.y, v.z);
 }
-string vecf::str(int4 v) const {
+string vecf::str(int4 v) const
+{
     return format("(%d %d %d %d)", v.x, v.y, v.z, v.w);
 }
 #endif
@@ -307,193 +325,194 @@ string vecf::str(int4 v) const {
 // This works across Win and macOS, so don't need to use PRId64.
 #define long1cast long long
 
-string vecf::str(long2 v) const {
+string vecf::str(long2 v) const
+{
     return format("(%lld %lld)", (long1cast)v.x, (long1cast)v.y);
 }
-string vecf::str(long3 v) const {
+string vecf::str(long3 v) const
+{
     return format("(%lld %lld %lld)", (long1cast)v.x, (long1cast)v.y, (long1cast)v.z);
 }
-string vecf::str(long4 v) const {
+string vecf::str(long4 v) const
+{
     return format("(%lld %lld %lld %lld)", (long1cast)v.x, (long1cast)v.y, (long1cast)v.z, (long1cast)v.w);
 }
 #endif
 
-
-
 //-----------------------------
 
 #define FMT_SEP() s += "-----------\n"
 
-string vecf::simd_configs() const {
+string vecf::simd_configs() const
+{
     string s;
-    
+
 #define FMT_CONFIG(val) s += format("%s: %d\n", #val, val);
-    
+
     FMT_CONFIG(SIMD_SSE);
     FMT_CONFIG(SIMD_NEON);
-    
+
 #if SIMD_SSE
     bool hasSSE42 = false;
     bool hasAVX = false;
     bool hasAVX2 = false;
-    
+
     bool hasF16C = false;
     bool hasFMA = false;
-    
-    #if SIMD_SSE
+
+#if SIMD_SSE
     hasSSE42 = true;
-    #endif
-    #ifdef __AVX__
+#endif
+#ifdef __AVX__
     hasAVX = true;
-    #endif
-    #if SIMD_AVX2
+#endif
+#if SIMD_AVX2
     hasAVX2 = true;
-    #endif
-    
+#endif
+
     // TODO: AVX-512 flags (combine into one?)
     // (__AVX512F__) && (__AVX512DQ__) && (__AVX512CD__) && (__AVX512BW__) && (__AVX512VL__) && (__AVX512VBMI2__)
-   
-    #ifdef __F16C__
+
+#ifdef __F16C__
     hasF16C = true;
-    #endif
-    #ifdef __FMA__
+#endif
+#ifdef __FMA__
     hasFMA = true;
-    #endif
-    
+#endif
+
     if (hasAVX2)
         s += format("%s: %d\n", "AVX2 ", hasAVX2);
     else if (hasAVX)
         s += format("%s: %d\n", "AVX  ", hasAVX);
     else if (hasSSE42)
         s += format("%s: %d\n", "SSE42 ", hasSSE42);
-    
+
     s += format("%s: %d\n", "F16C  ", hasF16C);
     s += format("%s: %d\n", "FMA   ", hasFMA);
-    
+
     // fp-contract, etc ?
     // CRC (may not be worth it)
-                                                    
+
 #endif
-    
+
 #if SIMD_NEON
     // any neon setting, arm64 version
     // __ARM_VFPV4__
     // CRC (may not be worth it)
-    
+
 #endif
-    
+
     FMT_CONFIG(SIMD_FLOAT_EXT);
     FMT_CONFIG(SIMD_HALF_FLOAT16);
 #if SIMD_HALF
     FMT_CONFIG(SIMD_HALF4_ONLY);
 #endif
-    
+
     FMT_SEP();
-    
+
     FMT_CONFIG(SIMD_CMATH_MATH);
     FMT_CONFIG(SIMD_ACCELERATE_MATH);
 #if SIMD_ACCELERATE_MATH
-    // Dump the min version. This is supposed to control SIMD_LIBRARY_VERSION
-    #if __APPLE__
-    #if TARGET_OS_OSX
-        FMT_CONFIG(__MAC_OS_X_VERSION_MIN_REQUIRED);
-    #elif TARGET_OS_VISION
-        FMT_CONFIG(__XR_OS_VERSION_MIN_REQUIRED);
-    #else
-        FMT_CONFIG(__IPHONE_OS_VERSION_MIN_REQUIRED);
-    #endif
-    #endif
-    
+// Dump the min version. This is supposed to control SIMD_LIBRARY_VERSION
+#if __APPLE__
+#if TARGET_OS_OSX
+    FMT_CONFIG(__MAC_OS_X_VERSION_MIN_REQUIRED);
+#elif TARGET_OS_VISION
+    FMT_CONFIG(__XR_OS_VERSION_MIN_REQUIRED);
+#else
+    FMT_CONFIG(__IPHONE_OS_VERSION_MIN_REQUIRED);
+#endif
+#endif
+
     FMT_CONFIG(SIMD_LIBRARY_VERSION); // lib based on min os target
     FMT_CONFIG(SIMD_CURRENT_LIBRARY_VERSION); // max lib based on sdk
     FMT_CONFIG(SIMD_LIBRARY_VERSION_TEST);
     FMT_CONFIG(SIMD_ACCELERATE_MATH_NAMES);
 #endif
-    
+
     FMT_SEP();
-    
+
     FMT_CONFIG(SIMD_HALF);
     FMT_CONFIG(SIMD_FLOAT);
     FMT_CONFIG(SIMD_DOUBLE);
-   
+
     FMT_CONFIG(SIMD_INT);
     FMT_CONFIG(SIMD_LONG);
-    
+
     // don't have these implemented yet
     //FMT_CONFIG(SIMD_CHAR);
     //FMT_CONFIG(SIMD_SHORT);
-    
+
 #undef FMT_CONFIG
-    
+
     return s;
 }
 
-string vecf::simd_alignments() const {
+string vecf::simd_alignments() const
+{
     string s;
-    
+
 #define FMT_CONFIG(val) s += format("%s: %zu %zu\n", #val, sizeof(val), __alignof(val));
-    
+
     // TODO: add other types int, half?
-    
+
 #if SIMD_FLOAT
     FMT_SEP();
-    
+
     FMT_CONFIG(float2);
     FMT_CONFIG(float3);
     FMT_CONFIG(float4);
     FMT_CONFIG(float8);
     //FMT_CONFIG(float16);
-    
+
     FMT_CONFIG(float2x2);
     FMT_CONFIG(float3x3);
     FMT_CONFIG(float3x4);
     FMT_CONFIG(float4x4);
 #endif
-    
+
 #if SIMD_DOUBLE
     FMT_SEP();
-    
+
     FMT_CONFIG(double2);
     FMT_CONFIG(double3);
     FMT_CONFIG(double4);
     // FMT_CONFIG(double8);
-    
+
     FMT_CONFIG(double2x2);
     FMT_CONFIG(double3x3);
     FMT_CONFIG(double3x4);
     FMT_CONFIG(double4x4);
 #endif
-    
+
 #if SIMD_INT
     FMT_SEP();
-    
+
     FMT_CONFIG(int2);
     FMT_CONFIG(int3);
     FMT_CONFIG(int4);
     FMT_CONFIG(int8);
     //FMT_CONFIG(int16);
 #endif
-    
+
 #if SIMD_LONG
     FMT_SEP();
-    
+
     FMT_CONFIG(long2);
     FMT_CONFIG(long3);
     FMT_CONFIG(long4);
     //FMT_CONFIG(long8);
 #endif
 
-    
 #undef FMT_CONFIG
-    
+
     return s;
 }
-    
 
 //---------------
 
 #if SIMD_HALF4_ONLY
- 
+
 #if SIMD_NEON
 
 float4 float4m(half4 vv)
@@ -515,24 +534,24 @@ float4 float4m(half4 vv)
     // https://gcc.gnu.org/onlinedocs/gcc-7.5.0/gcc/Half-Precision.html
     // https://developer.arm.com/documentation/dui0491/i/Using-NEON-Support/Converting-vectors
     __m128i reg16 = _mm_setzero_si128();
-    
+
     // TODO: switch to load low 64-bits, but don't know which one _mm_cvtsi32_si128(&vv.reg); ?
     // want 0 extend here, sse overuses int32_t when really unsigned and zero extended value
     reg16 = _mm_insert_epi16(reg16, vv[0], 0);
     reg16 = _mm_insert_epi16(reg16, vv[1], 1);
     reg16 = _mm_insert_epi16(reg16, vv[2], 2);
     reg16 = _mm_insert_epi16(reg16, vv[3], 3);
-    
+
     return simd::float4(_mm_cvtph_ps(reg16));
 }
 
 half4 half4m(float4 vv)
 {
-    __m128i reg16 = _mm_cvtps_ph(*(const __m128*)&vv, 0);  // 4xfp32-> 4xfp16,  round to nearest-even
-    
+    __m128i reg16 = _mm_cvtps_ph(*(const __m128*)&vv, 0); // 4xfp32-> 4xfp16,  round to nearest-even
+
     // TODO: switch to store/steam, but don't know which one _mm_storeu_epi16 ?
-    half4 val;  // = 0;
-    
+    half4 val; // = 0;
+
     // 0 extended
     val[0] = (half)_mm_extract_epi16(reg16, 0);
     val[1] = (half)_mm_extract_epi16(reg16, 1);
@@ -546,5 +565,3 @@ half4 half4m(float4 vv)
 
 } // namespace SIMD_NAMESPACE
 #endif // USE_SIMDLIB
-
-
diff --git a/libkram/vectormath/vectormath234.h b/libkram/vectormath/vectormath234.h
index 4c0602bd..0df779dd 100644
--- a/libkram/vectormath/vectormath234.h
+++ b/libkram/vectormath/vectormath234.h
@@ -135,7 +135,6 @@
 // transpose examples
 // https://godbolt.org/z/TYcvrP7Y3
 
-
 //-----------------------------------
 // config
 
@@ -151,15 +150,15 @@
 
 // only support avx2 and Neon, no avx-512 at first
 #if defined __ARM_NEON__
-#define SIMD_SSE  0
+#define SIMD_SSE 0
 #define SIMD_AVX2 0
 #define SIMD_NEON 1
 #elif defined __AVX2__ // x64 AVX2 or higher, can lower to AVX
-#define SIMD_SSE  1
+#define SIMD_SSE 1
 #define SIMD_AVX2 1
 #define SIMD_NEON 0
 #elif defined __SSE4_1__ // SSE 4.1+
-#define SIMD_SSE  1
+#define SIMD_SSE 1
 #define SIMD_AVX2 0
 #define SIMD_NEON 0
 #else
@@ -170,12 +169,12 @@
 #ifndef SIMD_CONFIG
 
 // fp comparisons gen a corresponding signed integer type
-#define SIMD_INT    1
-#define SIMD_LONG   1
+#define SIMD_INT 1
+#define SIMD_LONG 1
 
 // apple is signed-char, so make sure to set on -fsigned-char on other platforms
-#define SIMD_CHAR   1
-#define SIMD_SHORT  1
+#define SIMD_CHAR 1
+#define SIMD_SHORT 1
 
 // don't need these yet, but easy to add with macros
 //#define SIMD_UCHAR  0
@@ -186,8 +185,8 @@
 // SIMD_INT must be kept on for conditional tests.
 // SIMD_HALF for bitselect would need SIMD_SHORT or SIMD_INT?
 // #define SIMD_HALF   (1 && SIMD_SHORT)
-#define SIMD_HALF   (1)
-#define SIMD_FLOAT  (1 && SIMD_INT)
+#define SIMD_HALF (1)
+#define SIMD_FLOAT (1 && SIMD_INT)
 #define SIMD_DOUBLE (1 && SIMD_LONG)
 
 // Whether to support > 4 length vecs with some ops
@@ -196,10 +195,10 @@
 // controls over acclerate vs. func calls
 #ifdef __APPLE__
 #define SIMD_ACCELERATE_MATH 1
-#define SIMD_CMATH_MATH      0
+#define SIMD_CMATH_MATH 0
 #else
 #define SIMD_ACCELERATE_MATH 0
-#define SIMD_CMATH_MATH      1
+#define SIMD_CMATH_MATH 1
 #endif
 
 // This means simd_float4 will come from this file instead of simd.h
@@ -213,147 +212,159 @@
 // const means it doesn't pull from global changing state (what about constants)
 // and inline is needed or get unused static calls, always_inline forces inline
 // of these mostly wrapper calls.
-#define SIMD_CALL static inline __attribute__((__always_inline__,__const__,__nodebug__))
+#define SIMD_CALL static inline __attribute__((__always_inline__, __const__, __nodebug__))
 
 // op *=, +=, -=, /= mods the calling object, so can't be const
-#define SIMD_CALL_OP static inline __attribute__((__always_inline__,__nodebug__))
+#define SIMD_CALL_OP static inline __attribute__((__always_inline__, __nodebug__))
 
 //------------
 
 // aligned
-#define macroVector1TypesAligned(type, name) \
-typedef type name##1a; \
-typedef __attribute__((__ext_vector_type__(2)))  type name##2a; \
-typedef __attribute__((__ext_vector_type__(3)))  type name##3a; \
-typedef __attribute__((__ext_vector_type__(4)))  type name##4a; \
-typedef __attribute__((__ext_vector_type__(8)))  type name##8a; \
-typedef __attribute__((__ext_vector_type__(16))) type name##16a; \
-typedef __attribute__((__ext_vector_type__(32),__aligned__(16))) type name##32a; \
+#define macroVector1TypesAligned(type, name)                         \
+    typedef type name##1a;                                           \
+    typedef __attribute__((__ext_vector_type__(2))) type name##2a;   \
+    typedef __attribute__((__ext_vector_type__(3))) type name##3a;   \
+    typedef __attribute__((__ext_vector_type__(4))) type name##4a;   \
+    typedef __attribute__((__ext_vector_type__(8))) type name##8a;   \
+    typedef __attribute__((__ext_vector_type__(16))) type name##16a; \
+    typedef __attribute__((__ext_vector_type__(32), __aligned__(16))) type name##32a;
 
 // packed
-#define macroVector1TypesPacked(type, name) \
-typedef type name##1p; \
-typedef __attribute__((__ext_vector_type__(2),__aligned__(1)))  type name##2p; \
-typedef __attribute__((__ext_vector_type__(3),__aligned__(1)))  type name##3p; \
-typedef __attribute__((__ext_vector_type__(4),__aligned__(1)))  type name##4p; \
-typedef __attribute__((__ext_vector_type__(8),__aligned__(1)))  type name##8p; \
-typedef __attribute__((__ext_vector_type__(16),__aligned__(1))) type name##16p; \
-typedef __attribute__((__ext_vector_type__(32),__aligned__(1))) type name##32p; \
+#define macroVector1TypesPacked(type, name)                                          \
+    typedef type name##1p;                                                           \
+    typedef __attribute__((__ext_vector_type__(2), __aligned__(1))) type name##2p;   \
+    typedef __attribute__((__ext_vector_type__(3), __aligned__(1))) type name##3p;   \
+    typedef __attribute__((__ext_vector_type__(4), __aligned__(1))) type name##4p;   \
+    typedef __attribute__((__ext_vector_type__(8), __aligned__(1))) type name##8p;   \
+    typedef __attribute__((__ext_vector_type__(16), __aligned__(1))) type name##16p; \
+    typedef __attribute__((__ext_vector_type__(32), __aligned__(1))) type name##32p;
 
 // cpp rename for u/char
 #define macroVector1TypesStorageRenames(cname, cppname) \
-typedef ::cname##1a cppname##1; \
-typedef ::cname##2a cppname##2; \
-typedef ::cname##3a cppname##3; \
-typedef ::cname##4a cppname##4; \
-typedef ::cname##8a cppname##8; \
-typedef ::cname##16a cppname##16; \
-typedef ::cname##32a cppname##32; \
+    typedef ::cname##1a cppname##1;                     \
+    typedef ::cname##2a cppname##2;                     \
+    typedef ::cname##3a cppname##3;                     \
+    typedef ::cname##4a cppname##4;                     \
+    typedef ::cname##8a cppname##8;                     \
+    typedef ::cname##16a cppname##16;                   \
+    typedef ::cname##32a cppname##32;
 
 //------------
 
 // aligned
-#define macroVector2TypesAligned(type, name) \
-typedef type name##1a; \
-typedef __attribute__((__ext_vector_type__(2)))  type name##2a; \
-typedef __attribute__((__ext_vector_type__(3)))  type name##3a; \
-typedef __attribute__((__ext_vector_type__(4)))  type name##4a; \
-typedef __attribute__((__ext_vector_type__(8)))  type name##8a; \
-typedef __attribute__((__ext_vector_type__(16),__aligned__(16))) type name##16a; \
+#define macroVector2TypesAligned(type, name)                       \
+    typedef type name##1a;                                         \
+    typedef __attribute__((__ext_vector_type__(2))) type name##2a; \
+    typedef __attribute__((__ext_vector_type__(3))) type name##3a; \
+    typedef __attribute__((__ext_vector_type__(4))) type name##4a; \
+    typedef __attribute__((__ext_vector_type__(8))) type name##8a; \
+    typedef __attribute__((__ext_vector_type__(16), __aligned__(16))) type name##16a;
 
 // packed
-#define macroVector2TypesPacked(type, name) \
-typedef type name##1p; \
-typedef __attribute__((__ext_vector_type__(2),__aligned__(2)))  type name##2p; \
-typedef __attribute__((__ext_vector_type__(3),__aligned__(2)))  type name##3p; \
-typedef __attribute__((__ext_vector_type__(4),__aligned__(2)))  type name##4p; \
-typedef __attribute__((__ext_vector_type__(8),__aligned__(2)))  type name##8p; \
-typedef __attribute__((__ext_vector_type__(16),__aligned__(2))) type name##16p; \
+#define macroVector2TypesPacked(type, name)                                        \
+    typedef type name##1p;                                                         \
+    typedef __attribute__((__ext_vector_type__(2), __aligned__(2))) type name##2p; \
+    typedef __attribute__((__ext_vector_type__(3), __aligned__(2))) type name##3p; \
+    typedef __attribute__((__ext_vector_type__(4), __aligned__(2))) type name##4p; \
+    typedef __attribute__((__ext_vector_type__(8), __aligned__(2))) type name##8p; \
+    typedef __attribute__((__ext_vector_type__(16), __aligned__(2))) type name##16p;
 
 // cpp rename for half, u/short
 #define macroVector2TypesStorageRenames(cname, cppname) \
-typedef ::cname##1a cppname##1; \
-typedef ::cname##2a cppname##2; \
-typedef ::cname##3a cppname##3; \
-typedef ::cname##4a cppname##4; \
-typedef ::cname##8a cppname##8; \
-typedef ::cname##16a cppname##16; \
+    typedef ::cname##1a cppname##1;                     \
+    typedef ::cname##2a cppname##2;                     \
+    typedef ::cname##3a cppname##3;                     \
+    typedef ::cname##4a cppname##4;                     \
+    typedef ::cname##8a cppname##8;                     \
+    typedef ::cname##16a cppname##16;
 
 //------------
 
 // aligned
-#define macroVector4TypesAligned(type, name) \
-typedef type name##1a; \
-typedef __attribute__((__ext_vector_type__(2)))  type name##2a; \
-typedef __attribute__((__ext_vector_type__(3)))  type name##3a; \
-typedef __attribute__((__ext_vector_type__(4)))  type name##4a; \
-typedef __attribute__((__ext_vector_type__(8),__aligned__(16)))  type name##8a; \
+#define macroVector4TypesAligned(type, name)                       \
+    typedef type name##1a;                                         \
+    typedef __attribute__((__ext_vector_type__(2))) type name##2a; \
+    typedef __attribute__((__ext_vector_type__(3))) type name##3a; \
+    typedef __attribute__((__ext_vector_type__(4))) type name##4a; \
+    typedef __attribute__((__ext_vector_type__(8), __aligned__(16))) type name##8a;
 
 // typedef __attribute__((__ext_vector_type__(16),__aligned__(16))) type name##16s;
 
 // packed
-#define macroVector4TypesPacked(type, name) \
-typedef type name##1p; \
-typedef __attribute__((__ext_vector_type__(2),__aligned__(4)))  type name##2p; \
-typedef __attribute__((__ext_vector_type__(3),__aligned__(4)))  type name##3p; \
-typedef __attribute__((__ext_vector_type__(4),__aligned__(4)))  type name##4p; \
-typedef __attribute__((__ext_vector_type__(8),__aligned__(4)))  type name##8p; \
+#define macroVector4TypesPacked(type, name)                                        \
+    typedef type name##1p;                                                         \
+    typedef __attribute__((__ext_vector_type__(2), __aligned__(4))) type name##2p; \
+    typedef __attribute__((__ext_vector_type__(3), __aligned__(4))) type name##3p; \
+    typedef __attribute__((__ext_vector_type__(4), __aligned__(4))) type name##4p; \
+    typedef __attribute__((__ext_vector_type__(8), __aligned__(4))) type name##8p;
 
 // typedef __attribute__((__ext_vector_type__(16),__aligned__(4))) type name##16p; \
 
 // cpp rename for float, u/int
 #define macroVector4TypesStorageRenames(cname, cppname) \
-typedef ::cname##1a cppname##1; \
-typedef ::cname##2a cppname##2; \
-typedef ::cname##3a cppname##3; \
-typedef ::cname##4a cppname##4; \
-typedef ::cname##8a cppname##8; \
+    typedef ::cname##1a cppname##1;                     \
+    typedef ::cname##2a cppname##2;                     \
+    typedef ::cname##3a cppname##3;                     \
+    typedef ::cname##4a cppname##4;                     \
+    typedef ::cname##8a cppname##8;
 
 // typedef ::cname##16s cppname##16; \
 
 //------------
 
 // aligned
-#define macroVector8TypesAligned(type, name) \
-typedef type name##1a; \
-typedef __attribute__((__ext_vector_type__(2))) type name##2a; \
-typedef __attribute__((__ext_vector_type__(3),__aligned__(16))) type name##3a; \
-typedef __attribute__((__ext_vector_type__(4),__aligned__(16))) type name##4a; \
+#define macroVector8TypesAligned(type, name)                                        \
+    typedef type name##1a;                                                          \
+    typedef __attribute__((__ext_vector_type__(2))) type name##2a;                  \
+    typedef __attribute__((__ext_vector_type__(3), __aligned__(16))) type name##3a; \
+    typedef __attribute__((__ext_vector_type__(4), __aligned__(16))) type name##4a;
 
 // typedef __attribute__((__ext_vector_type__(8),__aligned__(16))) type name##8s;
 
 // packed
-#define macroVector8TypesPacked(type, name) \
-typedef type name##1p; \
-typedef __attribute__((__ext_vector_type__(2),__aligned__(8))) type name##2p; \
-typedef __attribute__((__ext_vector_type__(3),__aligned__(8))) type name##3p; \
-typedef __attribute__((__ext_vector_type__(4),__aligned__(8))) type name##4p; \
+#define macroVector8TypesPacked(type, name)                                        \
+    typedef type name##1p;                                                         \
+    typedef __attribute__((__ext_vector_type__(2), __aligned__(8))) type name##2p; \
+    typedef __attribute__((__ext_vector_type__(3), __aligned__(8))) type name##3p; \
+    typedef __attribute__((__ext_vector_type__(4), __aligned__(8))) type name##4p;
 
 //typedef __attribute__((__ext_vector_type__(8),__aligned__(8))) type name##8p;
 
 // cpp rename for double, u/long
 #define macroVector8TypesStorageRenames(cname, cppname) \
-typedef ::cname##1a cppname##1; \
-typedef ::cname##2a cppname##2; \
-typedef ::cname##3a cppname##3; \
-typedef ::cname##4a cppname##4; \
+    typedef ::cname##1a cppname##1;                     \
+    typedef ::cname##2a cppname##2;                     \
+    typedef ::cname##3a cppname##3;                     \
+    typedef ::cname##4a cppname##4;
 
 // typedef ::cname##8s cppname##8;
 
 //-----------------------------------
 
-#define macroMatrixOps(type) \
-SIMD_CALL_OP type& operator*=(type& x, const type& y) { x = mul(x, y); return x; } \
-SIMD_CALL_OP type& operator+=(type& x, const type& y) { x = add(x, y); return x; } \
-SIMD_CALL_OP type& operator-=(type& x, const type& y) { x = sub(x, y); return x; } \
-SIMD_CALL bool operator==(const type& x, const type& y) { return equal(x, y); } \
-SIMD_CALL bool operator!=(const type& x, const type& y) { return !(x == y); } \
-\
-SIMD_CALL type operator-(const type& x, const type& y) { return sub(x,y); } \
-SIMD_CALL type operator+(const type& x, const type& y) { return add(x,y); } \
-SIMD_CALL type operator*(const type& x, const type& y) { return mul(x,y); } \
-SIMD_CALL type::column_t operator*(const type::column_t& v, const type& y) { return mul(v,y); } \
-SIMD_CALL type::column_t operator*(const type& x, const type::column_t& v) { return mul(x,v); } \
+#define macroMatrixOps(type)                                                                         \
+    SIMD_CALL_OP type& operator*=(type& x, const type& y)                                            \
+    {                                                                                                \
+        x = mul(x, y);                                                                               \
+        return x;                                                                                    \
+    }                                                                                                \
+    SIMD_CALL_OP type& operator+=(type& x, const type& y)                                            \
+    {                                                                                                \
+        x = add(x, y);                                                                               \
+        return x;                                                                                    \
+    }                                                                                                \
+    SIMD_CALL_OP type& operator-=(type& x, const type& y)                                            \
+    {                                                                                                \
+        x = sub(x, y);                                                                               \
+        return x;                                                                                    \
+    }                                                                                                \
+    SIMD_CALL bool operator==(const type& x, const type& y) { return equal(x, y); }                  \
+    SIMD_CALL bool operator!=(const type& x, const type& y) { return !(x == y); }                    \
+                                                                                                     \
+    SIMD_CALL type operator-(const type& x, const type& y) { return sub(x, y); }                     \
+    SIMD_CALL type operator+(const type& x, const type& y) { return add(x, y); }                     \
+    SIMD_CALL type operator*(const type& x, const type& y) { return mul(x, y); }                     \
+    SIMD_CALL type::column_t operator*(const type::column_t& v, const type& y) { return mul(v, y); } \
+    SIMD_CALL type::column_t operator*(const type& x, const type::column_t& v) { return mul(x, v); }
 
 //-----------------------------------
 
@@ -362,31 +373,29 @@ SIMD_CALL type::column_t operator*(const type& x, const type::column_t& v) { ret
 
 // define functions that don't map to typical simd ops
 #define macroVectorRepeatFnDecl(type, cppfunc) \
-type##2 cppfunc(type##2 x); \
-type##3 cppfunc(type##3 x); \
-type##4 cppfunc(type##4 x); \
+    type##2 cppfunc(type##2 x);                \
+    type##3 cppfunc(type##3 x);                \
+    type##4 cppfunc(type##4 x);
 
 #define macroVectorRepeatFn2Decl(type, cppfunc) \
-type##2 cppfunc(type##2 x, type##2 y); \
-type##3 cppfunc(type##3 x, type##3 y); \
-type##4 cppfunc(type##4 x, type##4 y); \
-
+    type##2 cppfunc(type##2 x, type##2 y);      \
+    type##3 cppfunc(type##3 x, type##3 y);      \
+    type##4 cppfunc(type##4 x, type##4 y);
 
 //------------
 
-
 #if SIMD_ACCELERATE_MATH
 
 // remap simdk to simd namespace
-#define macroVectorRepeatFnImpl(type, cppfunc) \
-type##2 cppfunc(type##2 a) { return simd::cppfunc(a); } \
-type##3 cppfunc(type##3 a) { return simd::cppfunc(a); } \
-type##4 cppfunc(type##4 a) { return simd::cppfunc(a); } \
+#define macroVectorRepeatFnImpl(type, cppfunc)              \
+    type##2 cppfunc(type##2 a) { return simd::cppfunc(a); } \
+    type##3 cppfunc(type##3 a) { return simd::cppfunc(a); } \
+    type##4 cppfunc(type##4 a) { return simd::cppfunc(a); }
 
-#define macroVectorRepeatFn2Impl(type, cppfunc) \
-type##2 cppfunc(type##2 a, type##2 b) { return simd::cppfunc(a,b); } \
-type##3 cppfunc(type##3 a, type##3 b) { return simd::cppfunc(a,b); } \
-type##4 cppfunc(type##4 a, type##4 b) { return simd::cppfunc(a,b); } \
+#define macroVectorRepeatFn2Impl(type, cppfunc)                           \
+    type##2 cppfunc(type##2 a, type##2 b) { return simd::cppfunc(a, b); } \
+    type##3 cppfunc(type##3 a, type##3 b) { return simd::cppfunc(a, b); } \
+    type##4 cppfunc(type##4 a, type##4 b) { return simd::cppfunc(a, b); }
 
 #endif // SIMD_ACCELERATE_MATH
 
@@ -403,15 +412,15 @@ type##4 cppfunc(type##4 a, type##4 b) { return simd::cppfunc(a,b); } \
 
 // This calls function repeatedly, then returns as vector.
 // These don't call to the 4 version since it's so much more work.
-#define macroVectorRepeatFnImpl(type, cppfunc, func) \
-type##2 cppfunc(type##2 a) { return {func(a.x), func(a.y)}; } \
-type##3 cppfunc(type##3 a) { return {func(a.x), func(a.y), func(a.z)}; } \
-type##4 cppfunc(type##4 a) { return {func(a.x), func(a.y), func(a.z), func(a.w)}; } \
+#define macroVectorRepeatFnImpl(type, cppfunc, func)                         \
+    type##2 cppfunc(type##2 a) { return {func(a.x), func(a.y)}; }            \
+    type##3 cppfunc(type##3 a) { return {func(a.x), func(a.y), func(a.z)}; } \
+    type##4 cppfunc(type##4 a) { return {func(a.x), func(a.y), func(a.z), func(a.w)}; }
 
-#define macroVectorRepeatFn2Impl(type, cppfunc, func) \
-type##2 cppfunc(type##2 a, type##2 b) { return {func(a.x, b.x), func(a.y, b.y)}; } \
-type##3 cppfunc(type##3 a, type##3 b) { return {func(a.x, b.x), func(a.y, b.y), func(a.z, b.z)}; } \
-type##4 cppfunc(type##4 a, type##4 b) { return {func(a.x, b.x), func(a.y, b.y), func(a.z, b.z), func(a.w, z.w)}; } \
+#define macroVectorRepeatFn2Impl(type, cppfunc, func)                                                  \
+    type##2 cppfunc(type##2 a, type##2 b) { return {func(a.x, b.x), func(a.y, b.y)}; }                 \
+    type##3 cppfunc(type##3 a, type##3 b) { return {func(a.x, b.x), func(a.y, b.y), func(a.z, b.z)}; } \
+    type##4 cppfunc(type##4 a, type##4 b) { return {func(a.x, b.x), func(a.y, b.y), func(a.z, b.z), func(a.w, z.w)}; }
 
 #endif // SIMD_CMATH_MATH
 
@@ -567,10 +576,10 @@ SIMD_CALL float2 float2m(double2 x) { return __builtin_convertvector(x, float2);
 SIMD_CALL float3 float3m(double3 x) { return __builtin_convertvector(x, float3); }
 SIMD_CALL float4 float4m(double4 x) { return __builtin_convertvector(x, float4); }
 
-SIMD_CALL float2x2 float2x2m(const double2x2& x) { return float2x2(float2m(x[0]),float2m(x[1])); }
-SIMD_CALL float3x3 float3x3m(const double3x3& x) { return float3x3(float3m(x[0]),float3m(x[1]),float3m(x[2])); }
-SIMD_CALL float3x4 float3x4m(const double3x4& x) { return float3x4(float4m(x[0]),float4m(x[1]),float4m(x[2])); }
-SIMD_CALL float4x4 float4x4m(const double4x4& x) { return float4x4(float4m(x[0]),float4m(x[1]),float4m(x[2]),float4m(x[3])); }
+SIMD_CALL float2x2 float2x2m(const double2x2& x) { return float2x2(float2m(x[0]), float2m(x[1])); }
+SIMD_CALL float3x3 float3x3m(const double3x3& x) { return float3x3(float3m(x[0]), float3m(x[1]), float3m(x[2])); }
+SIMD_CALL float3x4 float3x4m(const double3x4& x) { return float3x4(float4m(x[0]), float4m(x[1]), float4m(x[2])); }
+SIMD_CALL float4x4 float4x4m(const double4x4& x) { return float4x4(float4m(x[0]), float4m(x[1]), float4m(x[2]), float4m(x[3])); }
 
 #endif // SIMD_DOUBLE
 
@@ -602,28 +611,28 @@ struct vecf {
     // TODO: add formatting options too
     // no packed float support
     vecf() {}
-   
+
 #if SIMD_FLOAT
     // vector
     string str(float2 v) const;
     string str(float3 v) const;
     string str(float4 v) const;
-    
+
     // matrix
     string str(const float2x2& m) const;
     string str(const float3x3& m) const;
     string str(const float4x4& m) const;
-    
+
     // quat
     string quat(quatf q) const { return str(q.v); }
 #endif // SIMD_FLOAT
-    
+
 #if SIMD_DOUBLE
     // vector
     string str(double2 v) const;
     string str(double3 v) const;
     string str(double4 v) const;
-    
+
     // matrix
     string str(const double2x2& m) const;
     string str(const double3x3& m) const;
@@ -636,7 +645,7 @@ struct vecf {
     string str(half3 v) const;
     string str(half4 v) const;
 #endif
-    
+
 #if SIMD_LONG
     // vector
     string str(long2 v) const;
@@ -661,4 +670,3 @@ struct vecf {
 #endif // __cplusplus
 
 #endif
-

From 839aa51136a8a7a91d22685e035ad7b5b8681916 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 12 Oct 2024 17:40:28 -0700
Subject: [PATCH 799/901] kram - clang-format the projects

---
 hlslparser/src/CodeWriter.cpp                 |   50 +-
 hlslparser/src/CodeWriter.h                   |   28 +-
 hlslparser/src/Engine.cpp                     |  231 +-
 hlslparser/src/Engine.h                       |  151 +-
 hlslparser/src/HLSLGenerator.cpp              |  818 +++--
 hlslparser/src/HLSLGenerator.h                |   44 +-
 hlslparser/src/HLSLParser.cpp                 | 3081 ++++++++---------
 hlslparser/src/HLSLParser.h                   |   94 +-
 hlslparser/src/HLSLTokenizer.cpp              |  753 ++--
 hlslparser/src/HLSLTokenizer.h                |  125 +-
 hlslparser/src/HLSLTree.cpp                   | 1323 ++++---
 hlslparser/src/HLSLTree.h                     |  678 ++--
 hlslparser/src/MSLGenerator.cpp               | 1403 ++++----
 hlslparser/src/MSLGenerator.h                 |   76 +-
 hlslparser/src/Main.cpp                       |  335 +-
 kram-preview/KramPreviewViewController.mm     |  167 +-
 kram-profile/Source/KramZipHelper.cpp         |   83 +-
 kram-profile/Source/KramZipHelper.h           |    8 +-
 kram-profile/Source/KramZipHelperW.h          |   28 +-
 kram-profile/Source/KramZipHelperW.mm         |   32 +-
 .../Source/kram-profile-Bridging-Header.h     |    2 +-
 kram-profile/Source/track_event_parser.cpp    |   31 +-
 kram-thumb-win/Dll.cpp                        |  118 +-
 kram-thumb-win/KramThumbProvider.cpp          |   67 +-
 kram-thumb-win/resource.h                     |   16 +-
 kram-thumb/KramThumbnailProvider.mm           |  401 ++-
 kramc/KramMain.cpp                            |    4 +-
 kramv/KramLoader.h                            |    6 +-
 kramv/KramLoader.mm                           |   92 +-
 kramv/KramRenderer.h                          |   20 +-
 kramv/KramRenderer.mm                         |  691 ++--
 kramv/KramViewerBase.cpp                      | 1168 +++----
 kramv/KramViewerBase.h                        |  178 +-
 kramv/KramViewerMain.mm                       |  759 ++--
 scripts/formatSources.sh                      |   18 +-
 35 files changed, 6078 insertions(+), 7001 deletions(-)
 mode change 100755 => 100644 hlslparser/src/Engine.cpp
 mode change 100755 => 100644 hlslparser/src/Engine.h

diff --git a/hlslparser/src/CodeWriter.cpp b/hlslparser/src/CodeWriter.cpp
index 046219f4..c8af3706 100644
--- a/hlslparser/src/CodeWriter.cpp
+++ b/hlslparser/src/CodeWriter.cpp
@@ -7,52 +7,46 @@
 //
 //=============================================================================
 
-#include "Engine.h"
-
 #include "CodeWriter.h"
 
 #include <stdarg.h>
 
-namespace M4
-{
+#include "Engine.h"
+
+namespace M4 {
 CodeWriter::CodeWriter()
 {
-    m_currentLine       = 1;
-    m_currentFileName   = NULL;
-    m_spacesPerIndent   = 4;
-    m_writeFileLine     = false;
+    m_currentLine = 1;
+    m_currentFileName = NULL;
+    m_spacesPerIndent = 4;
+    m_writeFileLine = false;
 }
 
 void CodeWriter::BeginLine(int indent, const char* fileName, int lineNumber)
 {
     // probably missing an EndLine
     ASSERT(m_currentIndent == 0);
-    
-    if (m_writeFileLine)
-    {
+
+    if (m_writeFileLine) {
         bool outputLine = false;
         bool outputFile = false;
 
         // Output a line number pragma if necessary.
-        if (fileName != NULL && m_currentFileName != fileName)
-        {
+        if (fileName != NULL && m_currentFileName != fileName) {
             m_currentFileName = fileName;
             fileName = m_currentFileName;
             outputFile = true;
         }
-        if (lineNumber != -1 && m_currentLine != lineNumber)
-        {
+        if (lineNumber != -1 && m_currentLine != lineNumber) {
             m_currentLine = lineNumber;
             outputLine = true;
         }
 
         // if previous filename is same, only output line
-        if (outputFile)
-        {
+        if (outputFile) {
             String_Printf(m_buffer, "#line %d \"%s\"\n", lineNumber, fileName);
         }
-        else if (outputLine)
-        {
+        else if (outputLine) {
             String_Printf(m_buffer, "#line %d\n", lineNumber);
         }
     }
@@ -60,20 +54,18 @@ void CodeWriter::BeginLine(int indent, const char* fileName, int lineNumber)
     // Handle the indentation.
     if (indent)
         Write("%*s", indent * m_spacesPerIndent, "");
-    
+
     m_currentIndent = indent;
-    
 }
 
 int CodeWriter::EndLine(const char* text)
 {
-    if (text != NULL)
-    {
+    if (text != NULL) {
         m_buffer += text;
     }
     m_buffer += "\n";
     ++m_currentLine;
-    
+
     // so can EndLine/BeginLine
     int indent = m_currentIndent;
     m_currentIndent = 0;
@@ -93,13 +85,13 @@ void CodeWriter::WriteLine(int indent, const char* format, ...)
 {
     if (indent)
         Write("%*s", indent * m_spacesPerIndent, "");
-    
+
     va_list args;
     va_start(args, format);
     int result = String_PrintfArgList(m_buffer, format, args);
     ASSERT(result != -1);
     va_end(args);
-    
+
     EndLine();
 }
 
@@ -107,13 +99,13 @@ void CodeWriter::WriteLineTagged(int indent, const char* fileName, int lineNumbe
 {
     // TODO: this should make sure that line isn't already Begu
     BeginLine(indent, fileName, lineNumber);
-    
+
     va_list args;
     va_start(args, format);
     int result = String_PrintfArgList(m_buffer, format, args);
     ASSERT(result != -1);
     va_end(args);
-        
+
     EndLine();
 }
 
@@ -127,4 +119,4 @@ void CodeWriter::Reset()
     m_buffer.clear();
 }
 
-}
+} //namespace M4
diff --git a/hlslparser/src/CodeWriter.h b/hlslparser/src/CodeWriter.h
index f9d27b7e..5454fd71 100644
--- a/hlslparser/src/CodeWriter.h
+++ b/hlslparser/src/CodeWriter.h
@@ -14,8 +14,7 @@
 // stl
 #include <string>
 
-namespace M4
-{
+namespace M4 {
 
 class Allocator;
 
@@ -23,34 +22,29 @@ class Allocator;
  * This class is used for outputting code. It handles indentation and inserting #line markers
  * to match the desired output line numbers.
  */
-class CodeWriter
-{
-
+class CodeWriter {
 public:
     CodeWriter();
 
     void SetWriteFileLine(bool enable) { m_writeFileLine = enable; }
-    
+
     void BeginLine(int indent, const char* fileName = NULL, int lineNumber = -1);
     void Write(const char* format, ...) M4_PRINTF_ATTR(2, 3);
     int EndLine(const char* text = NULL);
 
     void WriteLine(int indent, const char* format, ...) M4_PRINTF_ATTR(3, 4);
-    void WriteLineTagged(int indent, const char* fileName, int lineNumber, const char* format, ...) M4_PRINTF_ATTR(5, 6) ;
+    void WriteLineTagged(int indent, const char* fileName, int lineNumber, const char* format, ...) M4_PRINTF_ATTR(5, 6);
 
     const char* GetResult() const;
     void Reset();
 
 private:
-
-    std::string     m_buffer;
-    int             m_currentLine;
-    const char*     m_currentFileName;
-    int             m_spacesPerIndent;
-    int             m_currentIndent;
-    bool            m_writeFileLine;
-
+    std::string m_buffer;
+    int m_currentLine;
+    const char* m_currentFileName;
+    int m_spacesPerIndent;
+    int m_currentIndent;
+    bool m_writeFileLine;
 };
 
-}
-
+} //namespace M4
diff --git a/hlslparser/src/Engine.cpp b/hlslparser/src/Engine.cpp
old mode 100755
new mode 100644
index 49bdd1c0..85b1c1c2
--- a/hlslparser/src/Engine.cpp
+++ b/hlslparser/src/Engine.cpp
@@ -1,9 +1,9 @@
 
 #include "Engine.h"
 
-#include <stdio.h>  // vsnprintf
+#include <stdio.h> // vsnprintf
+#include <stdlib.h> // strtod, strtol
 #include <string.h> // strcmp, strcasecmp
-#include <stdlib.h>	// strtod, strtol
 
 // this is usually just an unordered_map internally
 #include <unordered_set>
@@ -16,23 +16,22 @@ void String_Copy(char* str, const char* b, uint32_t size)
 {
 #ifdef WIN32
     strncpy(str, b, size);
-    str[size-1] = 0;
+    str[size - 1] = 0;
 #else
     strlcpy(str, b, size);
 #endif
 }
 
 // This version doesn't truncate and is simpler
-int String_PrintfArgList(std::string& buffer, const char * format, va_list args) {
+int String_PrintfArgList(std::string& buffer, const char* format, va_list args)
+{
     int n = 0;
-    
-    if (!String_HasChar(format, '%'))
-    {
+
+    if (!String_HasChar(format, '%')) {
         buffer += format;
         n = (uint32_t)strlen(format);
     }
-    else if (String_Equal(format, "%s"))
-    {
+    else if (String_Equal(format, "%s")) {
         va_list tmp;
         va_copy(tmp, args);
         const char* text = va_arg(args, const char*);
@@ -40,50 +39,46 @@ int String_PrintfArgList(std::string& buffer, const char * format, va_list args)
         buffer += text;
         va_end(tmp);
     }
-    else
-    {
+    else {
         va_list tmp;
         va_copy(tmp, args);
-        
+
         int len = vsnprintf(nullptr, 0, format, tmp);
-        if (len >= 0)
-        {
+        if (len >= 0) {
             size_t bufferLength = buffer.length();
-            buffer.resize(bufferLength+len);
-            vsnprintf((char*)buffer.data() + bufferLength, len+1, format, tmp);
-            
+            buffer.resize(bufferLength + len);
+            vsnprintf((char*)buffer.data() + bufferLength, len + 1, format, tmp);
+
             n = len;
         }
         va_end(tmp);
     }
-    
+
     return n;
 }
 
 // This version truncates but works on stack
-int String_PrintfArgList(char* buffer, int size, const char * format, va_list args) {
+int String_PrintfArgList(char* buffer, int size, const char* format, va_list args)
+{
     int n;
-    
-    if (!String_HasChar(format, '%'))
-    {
+
+    if (!String_HasChar(format, '%')) {
         String_Copy(buffer, format, size);
-        
+
         // truncation or not
         n = (int)strlen(format);
     }
-    else if (String_Equal(format, "%s"))
-    {
+    else if (String_Equal(format, "%s")) {
         va_list tmp;
         va_copy(tmp, args);
         const char* text = va_arg(args, const char*);
         n = (int)strlen(text);
-        
+
         // truncation
         String_Copy(buffer, text, size);
         va_end(tmp);
     }
-    else
-    {
+    else {
         va_list tmp;
         va_copy(tmp, args);
 
@@ -92,15 +87,15 @@ int String_PrintfArgList(char* buffer, int size, const char * format, va_list ar
         n = vsnprintf(buffer, size, format, tmp);
         va_end(tmp);
     }
-   
-	if (n < 0 || (n+1) > size)
+
+    if (n < 0 || (n + 1) > size)
         return -1;
-	
+
     return n;
 }
 
-int String_Printf(std::string& buffer, const char * format, ...) {
-
+int String_Printf(std::string& buffer, const char* format, ...)
+{
     va_list args;
     va_start(args, format);
 
@@ -111,8 +106,8 @@ int String_Printf(std::string& buffer, const char * format, ...) {
     return n;
 }
 
-int String_Printf(char * buffer, int size, const char * format, ...) {
-
+int String_Printf(char* buffer, int size, const char* format, ...)
+{
     va_list args;
     va_start(args, format);
 
@@ -120,109 +115,112 @@ int String_Printf(char * buffer, int size, const char * format, ...) {
 
     va_end(args);
 
-	return n;
+    return n;
 }
 
-int String_FormatFloat(char * buffer, int size, float value) {
+int String_FormatFloat(char* buffer, int size, float value)
+{
     return String_Printf(buffer, size, "%.6f", value);
 }
 
-bool String_HasChar(const char* str, char c) {
+bool String_HasChar(const char* str, char c)
+{
     return strchr(str, c) != NULL;
 }
 
-bool String_HasString(const char* str, const char* search) {
+bool String_HasString(const char* str, const char* search)
+{
     return strstr(str, search) != NULL;
 }
 
-bool String_Equal(const char * a, const char * b) {
-	if (a == b) return true;
-	if (a == NULL || b == NULL) return false;
-	return strcmp(a, b) == 0;
+bool String_Equal(const char* a, const char* b)
+{
+    if (a == b) return true;
+    if (a == NULL || b == NULL) return false;
+    return strcmp(a, b) == 0;
 }
 
-bool String_EqualNoCase(const char * a, const char * b) {
-	if (a == b) return true;
-	if (a == NULL || b == NULL) return false;
+bool String_EqualNoCase(const char* a, const char* b)
+{
+    if (a == b) return true;
+    if (a == NULL || b == NULL) return false;
 #if _MSC_VER
-	return _stricmp(a, b) == 0;
+    return _stricmp(a, b) == 0;
 #else
-	return strcasecmp(a, b) == 0;
+    return strcasecmp(a, b) == 0;
 #endif
 }
 
-double String_ToDouble(const char * str, char ** endptr) {
-	return strtod(str, endptr);
+double String_ToDouble(const char* str, char** endptr)
+{
+    return strtod(str, endptr);
 }
 
-float String_ToFloat(const char * str, char ** endptr) {
+float String_ToFloat(const char* str, char** endptr)
+{
     return strtof(str, endptr);
 }
 
 static const int kBase10 = 10;
 static const int kBase16 = 16;
 
-int32_t String_ToIntHex(const char * str, char ** endptr) {
+int32_t String_ToIntHex(const char* str, char** endptr)
+{
     return (int)strtol(str, endptr, kBase16);
 }
 
-int32_t String_ToInt(const char * str, char ** endptr) {
-	return (int)strtol(str, endptr, kBase10);
+int32_t String_ToInt(const char* str, char** endptr)
+{
+    return (int)strtol(str, endptr, kBase10);
 }
 
-uint32_t String_ToUint(const char * str, char ** endptr) {
+uint32_t String_ToUint(const char* str, char** endptr)
+{
     return (int)strtoul(str, endptr, kBase10);
 }
 
-uint64_t String_ToUlong(const char * str, char ** endptr) {
+uint64_t String_ToUlong(const char* str, char** endptr)
+{
     return (int)strtoull(str, endptr, kBase10);
 }
 
-int64_t String_ToLong(const char * str, char ** endptr) {
+int64_t String_ToLong(const char* str, char** endptr)
+{
     return (int)strtoll(str, endptr, kBase10);
 }
 
-
-
-
-
-
-
 void String_StripTrailingFloatZeroes(char* buffer)
 {
     const char* dotPos = strrchr(buffer, '.');
     if (dotPos == nullptr) return;
-    
+
     uint32_t bufferLen = (uint32_t)strlen(buffer);
-    
+
     // strip trailing zeroes
-    while (bufferLen > 0)
-    {
-        char& c = buffer[bufferLen-1];
-        if (c == '0')
-        {
+    while (bufferLen > 0) {
+        char& c = buffer[bufferLen - 1];
+        if (c == '0') {
             c = 0;
             bufferLen--;
         }
-        else
-        {
+        else {
             break;
         }
     }
-    
+
     // This breaks appending h to a number in MSL
     // strip the period (only for MSL)
-//    char& c = buffer[bufferLen-1];
-//    if (dotPos == &c)
-//    {
-//        c = 0;
-//        bufferLen--;
-//    }
+    // char& c = buffer[bufferLen-1];
+    // if (dotPos == &c)
+    // {
+    //     c = 0;
+    //     bufferLen--;
+    // }
 }
 
 // Engine/Log.cpp
 
-void Log_Error(const char * format, ...)
+void Log_Error(const char* format, ...)
 {
     va_list args;
     va_start(args, format);
@@ -230,82 +228,83 @@ void Log_Error(const char * format, ...)
     va_end(args);
 }
 
-void Log_ErrorArgList(const char * format, va_list args, const char* filename, uint32_t line)
+void Log_ErrorArgList(const char* format, va_list args, const char* filename, uint32_t line)
 {
     va_list tmp;
     va_copy(tmp, args);
-    
+
     // Not thread-safe
     static std::string buffer;
     buffer.clear();
     String_PrintfArgList(buffer, format, tmp);
-    
+
     // TODO: this doesn't work on Win/Android
     // use a real log abstraction to ODS/etc from Kram
     if (filename)
-        fprintf( stderr, "%s:%d: error: %s", filename, line, buffer.c_str());
+        fprintf(stderr, "%s:%d: error: %s", filename, line, buffer.c_str());
     else
-        fprintf( stderr, "error: %s", buffer.c_str());
-    
-    va_end(tmp);
+        fprintf(stderr, "error: %s", buffer.c_str());
 
+    va_end(tmp);
 }
 
-
 // Engine/StringPool.cpp
 
 using StringPoolSet = std::unordered_set<const char*, CompareAndHandStrings, CompareAndHandStrings>;
 
 #define CastImpl(imp) (StringPoolSet*)imp
 
-StringPool::StringPool(Allocator * allocator) {
+StringPool::StringPool(Allocator* allocator)
+{
     // NOTE: allocator not used
-    
+
     m_impl = new StringPoolSet();
 }
-StringPool::~StringPool() {
+StringPool::~StringPool()
+{
     auto* impl = CastImpl(m_impl);
-    
+
     // delete the strings
     for (auto it : *impl) {
         const char* text = it;
         free((char*)text);
     }
-    
+
     delete impl;
 }
 
-const char * StringPool::AddString(const char * text) {
+const char* StringPool::AddString(const char* text)
+{
     auto* impl = CastImpl(m_impl);
     auto it = impl->find(text);
     if (it != impl->end())
         return *it;
-    
+
     // _strdup doesn't go through allocator either
 #if _MSC_VER
-    const char * dup = _strdup(text);
+    const char* dup = _strdup(text);
 #else
-    const char * dup = strdup(text);
+    const char* dup = strdup(text);
 #endif
-    
+
     impl->insert(dup);
     return dup;
 }
 
-const char* StringPool::PrintFormattedVaList(const char* fmt, va_list args) {
+const char* StringPool::PrintFormattedVaList(const char* fmt, va_list args)
+{
     char* res = nullptr;
-    
+
     va_list tmp;
 
     // va_copy needed?
     va_copy(tmp, args);
-    
+
     // just call 2x, once for len
     int len = vsnprintf(nullptr, 0, fmt, tmp);
-    if (len >= 0)
-    {
-        res = (char*)malloc(len+1);
-        vsnprintf(res, len+1, fmt, tmp);
+    if (len >= 0) {
+        res = (char*)malloc(len + 1);
+        vsnprintf(res, len + 1, fmt, tmp);
     }
     va_end(tmp);
 
@@ -313,41 +312,43 @@ const char* StringPool::PrintFormattedVaList(const char* fmt, va_list args) {
     return res;
 }
 
-const char * StringPool::AddStringFormatList(const char * format, va_list args) {
+const char* StringPool::AddStringFormatList(const char* format, va_list args)
+{
     // don't format if no tokens
     va_list tmp;
     va_copy(tmp, args);
-    const char * text = PrintFormattedVaList(format, tmp);
+    const char* text = PrintFormattedVaList(format, tmp);
     va_end(tmp);
 
     auto* impl = CastImpl(m_impl);
-    
+
     // add it if not found
     auto it = impl->find(text);
-    if (it == impl->end())
-    {
+    if (it == impl->end()) {
         impl->insert(text);
         return text;
     }
-    
+
     // allocated inside PrintFormattedVaList
     free((char*)text);
     return *it;
 }
 
-const char * StringPool::AddStringFormat(const char * format, ...) {
+const char* StringPool::AddStringFormat(const char* format, ...)
+{
     // TODO: don't format if no tokens
     va_list args;
     va_start(args, format);
-    const char * string = AddStringFormatList(format, args);
+    const char* string = AddStringFormatList(format, args);
     va_end(args);
 
     return string;
 }
 
-bool StringPool::GetContainsString(const char * text) const {
+bool StringPool::GetContainsString(const char* text) const
+{
     const auto* impl = CastImpl(m_impl);
     return impl->find(text) != impl->end();
 }
 
-} // M4 namespace
+} //namespace M4
diff --git a/hlslparser/src/Engine.h b/hlslparser/src/Engine.h
old mode 100755
new mode 100644
index ea3eeb2c..43535cd9
--- a/hlslparser/src/Engine.h
+++ b/hlslparser/src/Engine.h
@@ -6,13 +6,14 @@
 
 #include <stdarg.h> // va_list, vsnprintf
 #include <stdlib.h> // malloc
+
 #include <new> // for placement new
 
 // stl
 #include <string>
 
 #ifndef NULL
-#define NULL    0
+#define NULL 0
 #endif
 
 #ifndef va_copy
@@ -33,7 +34,6 @@
 
 namespace M4 {
 
-
 // Engine/Allocator.h
 
 // This doesn't do placement new/delete, but is only
@@ -42,50 +42,55 @@ namespace M4 {
 // there default ctor variable initializers are safe to use.
 class Allocator {
 public:
-    template <typename T> T * New() {
-        return (T *)malloc(sizeof(T));
+    template <typename T>
+    T* New()
+    {
+        return (T*)malloc(sizeof(T));
     }
-    template <typename T> T * New(size_t count) {
-        return (T *)malloc(sizeof(T) * count);
+    template <typename T>
+    T* New(size_t count)
+    {
+        return (T*)malloc(sizeof(T) * count);
     }
-    template <typename T> void Delete(T * ptr) {
-        free((void *)ptr);
+    template <typename T>
+    void Delete(T* ptr)
+    {
+        free((void*)ptr);
     }
-    template <typename T> T * Realloc(T * ptr, size_t count) {
-        return (T *)realloc(ptr, sizeof(T) * count);
+    template <typename T>
+    T* Realloc(T* ptr, size_t count)
+    {
+        return (T*)realloc(ptr, sizeof(T) * count);
     }
 };
 
-
 // Engine/String.h
 
+int String_FormatFloat(char* buffer, int size, float value);
+bool String_Equal(const char* a, const char* b);
+bool String_EqualNoCase(const char* a, const char* b);
 
-
-int String_FormatFloat(char * buffer, int size, float value);
-bool String_Equal(const char * a, const char * b);
-bool String_EqualNoCase(const char * a, const char * b);
-
-double String_ToDouble(const char * str, char ** end);
-float String_ToFloat(const char * str, char ** end);
+double String_ToDouble(const char* str, char** end);
+float String_ToFloat(const char* str, char** end);
 // no half
 
-int32_t String_ToIntHex(const char * str, char ** end);
-int32_t String_ToInt(const char * str, char ** end);
-uint32_t String_ToUint(const char * str, char ** end);
+int32_t String_ToIntHex(const char* str, char** end);
+int32_t String_ToInt(const char* str, char** end);
+uint32_t String_ToUint(const char* str, char** end);
 
-uint64_t String_ToUlong(const char * str, char ** end);
-int64_t String_ToLong(const char * str, char ** end);
+uint64_t String_ToUlong(const char* str, char** end);
+int64_t String_ToLong(const char* str, char** end);
 
 bool String_HasChar(const char* str, char c);
 bool String_HasString(const char* str, const char* search);
 
 // just use these, it's way easier than using fixed buffers
-int String_PrintfArgList(std::string& buffer, const char * format, va_list args);
-int String_Printf(std::string& buffer, const char * format, ...) M4_PRINTF_ATTR(2, 3);
+int String_PrintfArgList(std::string& buffer, const char* format, va_list args);
+int String_Printf(std::string& buffer, const char* format, ...) M4_PRINTF_ATTR(2, 3);
 
 // These 3 calls have truncation issues
-int String_Printf(char * buffer, int size, const char * format, ...) M4_PRINTF_ATTR(3, 4);
-int String_PrintfArgList(char * buffer, int size, const char * format, va_list args);
+int String_Printf(char* buffer, int size, const char* format, ...) M4_PRINTF_ATTR(3, 4);
+int String_PrintfArgList(char* buffer, int size, const char* format, va_list args);
 void String_Copy(char* str, const char* b, uint32_t size);
 
 void String_StripTrailingFloatZeroes(char* buffer);
@@ -94,23 +99,24 @@ void String_StripTrailingFloatZeroes(char* buffer);
 // case sensitive fnv1a hash, can pass existing hash to continue a hash
 inline uint32_t HashFnv1a(const char* val, uint32_t hash = 0x811c9dc5)
 {
-    const uint32_t prime  = 0x01000193; // 16777619 (32-bit)
-    while (*val)
-    {
+    const uint32_t prime = 0x01000193; // 16777619 (32-bit)
+    while (*val) {
         hash = (hash * prime) ^ (uint32_t)*val++;
     }
     return hash;
 }
 
 // this compares string stored as const char*
-struct CompareAndHandStrings
-{
+struct CompareAndHandStrings {
     template <class _Tp>
     bool operator()(const _Tp& __x, const _Tp& __y) const
-    { return String_Equal( __x, __y ); }
-    
+    {
+        return String_Equal(__x, __y);
+    }
+
     template <class _Tp>
-    size_t operator()(const _Tp& __x) const {
+    size_t operator()(const _Tp& __x) const
+    {
         // assumes 32-bit hash to int64 conversion here
         return (size_t)HashFnv1a(__x);
     }
@@ -118,42 +124,44 @@ struct CompareAndHandStrings
 
 // Engine/Log.h
 
-void Log_Error(const char * format, ...) M4_PRINTF_ATTR(1, 2);
-
-void Log_ErrorArgList(const char * format, va_list args, const char* filename = NULL, uint32_t line = 0);
+void Log_Error(const char* format, ...) M4_PRINTF_ATTR(1, 2);
 
+void Log_ErrorArgList(const char* format, va_list args, const char* filename = NULL, uint32_t line = 0);
 
 // Engine/Array.h
 
 template <typename T>
-void ConstructRange(T * buffer, int new_size, int old_size) {
+void ConstructRange(T* buffer, int new_size, int old_size)
+{
     for (int i = old_size; i < new_size; i++) {
-        new(buffer+i) T; // placement new
+        new (buffer + i) T; // placement new
     }
 }
 
 template <typename T>
-void ConstructRange(T * buffer, int new_size, int old_size, const T & val) {
+void ConstructRange(T* buffer, int new_size, int old_size, const T& val)
+{
     for (int i = old_size; i < new_size; i++) {
-        new(buffer+i) T(val); // placement new
+        new (buffer + i) T(val); // placement new
     }
 }
 
 template <typename T>
-void DestroyRange(T * buffer, int new_size, int old_size) {
+void DestroyRange(T* buffer, int new_size, int old_size)
+{
     for (int i = new_size; i < old_size; i++) {
-        (buffer+i)->~T(); // Explicit call to the destructor
+        (buffer + i)->~T(); // Explicit call to the destructor
     }
 }
 
-
 template <typename T>
 class Array {
 public:
-    Array(Allocator * allocator) : allocator(allocator), buffer(NULL), size(0), capacity(0) {}
+    Array(Allocator* allocator) : allocator(allocator), buffer(NULL), size(0), capacity(0) {}
 
-    void PushBack(const T & val) {
-        ASSERT(&val < buffer || &val >= buffer+size);
+    void PushBack(const T& val)
+    {
+        ASSERT(&val < buffer || &val >= buffer + size);
 
         int old_size = size;
         int new_size = size + 1;
@@ -162,7 +170,8 @@ class Array {
 
         ConstructRange(buffer, new_size, old_size, val);
     }
-    T & PushBackNew() {
+    T& PushBackNew()
+    {
         int old_size = size;
         int new_size = size + 1;
 
@@ -172,7 +181,8 @@ class Array {
 
         return buffer[old_size];
     }
-    void Resize(int new_size) {
+    void Resize(int new_size)
+    {
         int old_size = size;
 
         DestroyRange(buffer, new_size, old_size);
@@ -183,13 +193,21 @@ class Array {
     }
 
     int GetSize() const { return size; }
-    const T & operator[](int i) const { ASSERT(i < size); return buffer[i]; }
-    T & operator[](int i) { ASSERT(i < size); return buffer[i]; }
+    const T& operator[](int i) const
+    {
+        ASSERT(i < size);
+        return buffer[i];
+    }
+    T& operator[](int i)
+    {
+        ASSERT(i < size);
+        return buffer[i];
+    }
 
 private:
-
     // Change array size.
-    void SetSize(int new_size) {
+    void SetSize(int new_size)
+    {
         size = new_size;
 
         if (new_size > capacity) {
@@ -208,7 +226,8 @@ class Array {
     }
 
     // Change array capacity.
-    void SetCapacity(int new_capacity) {
+    void SetCapacity(int new_capacity)
+    {
         ASSERT(new_capacity >= size);
 
         if (new_capacity == 0) {
@@ -226,30 +245,28 @@ class Array {
         capacity = new_capacity;
     }
 
-
 private:
-    Allocator * allocator; // @@ Do we really have to keep a pointer to this?
-    T * buffer;
+    Allocator* allocator; // @@ Do we really have to keep a pointer to this?
+    T* buffer;
     int size;
     int capacity;
 };
 
-
 // Engine/StringPool.h
 
 // @@ Implement this with a hash table!
 struct StringPool {
-    StringPool(Allocator * allocator);
+    StringPool(Allocator* allocator);
     ~StringPool();
 
-    const char * AddString(const char * text);
-    const char * AddStringFormat(const char * fmt, ...) M4_PRINTF_ATTR(2, 3);
-    const char * AddStringFormatList(const char * fmt, va_list args);
-    bool GetContainsString(const char * text) const;
+    const char* AddString(const char* text);
+    const char* AddStringFormat(const char* fmt, ...) M4_PRINTF_ATTR(2, 3);
+    const char* AddStringFormatList(const char* fmt, va_list args);
+    bool GetContainsString(const char* text) const;
+
 private:
-    const char*PrintFormattedVaList(const char* fmt, va_list args);
+    const char* PrintFormattedVaList(const char* fmt, va_list args);
     void* m_impl = NULL;
 };
 
-
-} // M4 namespace
+} //namespace M4
diff --git a/hlslparser/src/HLSLGenerator.cpp b/hlslparser/src/HLSLGenerator.cpp
index 2565098c..9c78e26c 100644
--- a/hlslparser/src/HLSLGenerator.cpp
+++ b/hlslparser/src/HLSLGenerator.cpp
@@ -13,8 +13,7 @@
 #include "HLSLParser.h"
 #include "HLSLTree.h"
 
-namespace M4
-{
+namespace M4 {
 
 const char* HLSLGenerator::GetTypeName(const HLSLType& type)
 {
@@ -23,45 +22,42 @@ const char* HLSLGenerator::GetTypeName(const HLSLType& type)
     // number
     bool isHalfNumerics = promote && !m_options.treatHalfAsFloat;
     HLSLBaseType baseType = type.baseType;
-    
+
     // Note: these conversions should really be done during parsing
     // so that casting gets applied.
     if (!isHalfNumerics)
         baseType = HalfToFloatBaseType(baseType);
-    
+
     // MSL doesn't support double, and many HLSL cards don't either.
     //if (IsDouble(baseType))
     //    baseType = DoubleToFloatBaseType(baseType);
-    
+
     HLSLType remappedType(type);
     remappedType.baseType = baseType;
-    
+
     // DONE: these can all just use a table entry, have another slot for MSL
     // Functions can return void, especially with compute
     if (IsTextureType(baseType) || IsSamplerType(baseType) || IsNumericType(baseType) || baseType == HLSLBaseType_Void || baseType == HLSLBaseType_UserDefined)
         return GetTypeNameHLSL(remappedType);
-    
+
     Error("Unknown type");
     return NULL;
 }
 
 // TODO: copied from MSLGenerator
 // @@ We could be a lot smarter removing parenthesis based on the operator precedence of the parent expression.
-static bool NeedsParenthesis(HLSLExpression* expression, HLSLExpression* parentExpression) {
-
+static bool NeedsParenthesis(HLSLExpression* expression, HLSLExpression* parentExpression)
+{
     // For now we just omit the parenthesis if there's no parent expression.
-    if (parentExpression == NULL)
-    {
+    if (parentExpression == NULL) {
         return false;
     }
 
     // One more special case that's pretty common.
-    if (parentExpression->nodeType == HLSLNodeType_MemberAccess)
-    {
+    if (parentExpression->nodeType == HLSLNodeType_MemberAccess) {
         if (expression->nodeType == HLSLNodeType_IdentifierExpression ||
             expression->nodeType == HLSLNodeType_ArrayAccess ||
-            expression->nodeType == HLSLNodeType_MemberAccess)
-        {
+            expression->nodeType == HLSLNodeType_MemberAccess) {
             return false;
         }
     }
@@ -89,62 +85,49 @@ static int GetFunctionArguments(HLSLFunctionCall* functionCall, HLSLExpression*
 
 HLSLGenerator::HLSLGenerator()
 {
-    m_tree                          = NULL;
-    m_entryName                     = NULL;
-    m_target                        = HLSLTarget_VertexShader;
-    m_isInsideBuffer                = false;
-    m_error                         = false;
+    m_tree = NULL;
+    m_entryName = NULL;
+    m_target = HLSLTarget_VertexShader;
+    m_isInsideBuffer = false;
+    m_error = false;
 }
 
 // @@ We need a better way of doing semantic replacement:
 // - Look at the function being generated.
 // - Return semantic, semantics associated to fields of the return structure, or output arguments, or fields of structures associated to output arguments -> output semantic replacement.
 // - Semantics associated input arguments or fields of the input arguments -> input semantic replacement.
-static const char * TranslateSemantic(const char* semantic, bool output, HLSLTarget target)
+static const char* TranslateSemantic(const char* semantic, bool output, HLSLTarget target)
 {
     // Note: these are all just passthrough of the DX10 semantics
     // except for BASEVERTEX/INSTANCE which doesn't seem to dxc compile.
-    
-    if (target == HLSLTarget_VertexShader)
-    {
-        if (output) 
-        {
 
+    if (target == HLSLTarget_VertexShader) {
+        if (output) {
         }
         else {
             // see here for sample of builtin notation
             // https://github.com/microsoft/DirectXShaderCompiler/commit/b6fe9886ad
-            
+
             // Vulkan/MSL only, requires ext DrawParameters
             // [[vk::builtin(\"BaseVertex\")]] uint baseVertex :
             // [[vk::builtin(\"BaseInstance\")]] uint instance : SV_BaseInstance
-            
+
             if (String_Equal(semantic, "BASEVERTEX"))
-                return "BaseVertex";  // vulkan only
+                return "BaseVertex"; // vulkan only
             if (String_Equal(semantic, "BASEINSTANCE"))
-                return "BaseInstance";  // vulkan only
+                return "BaseInstance"; // vulkan only
         }
     }
-    else if (target == HLSLTarget_PixelShader)
-    {
-        if (output)
-        {
-
+    else if (target == HLSLTarget_PixelShader) {
+        if (output) {
         }
-        else
-        {
-
+        else {
         }
     }
-    else if (target == HLSLTarget_ComputeShader)
-    {
-        if (output)
-        {
-
+    else if (target == HLSLTarget_ComputeShader) {
+        if (output) {
         }
-        else
-        {
-
+        else {
         }
     }
     return NULL;
@@ -155,8 +138,7 @@ void HLSLGenerator::Error(const char* format, ...)
     // It's not always convenient to stop executing when an error occurs,
     // so just track once we've hit an error and stop reporting them until
     // we successfully bail out of execution.
-    if (m_error)
-    {
+    if (m_error) {
         return;
     }
     m_error = true;
@@ -169,21 +151,19 @@ void HLSLGenerator::Error(const char* format, ...)
 
 bool HLSLGenerator::Generate(HLSLTree* tree, HLSLTarget target, const char* entryName, const HLSLOptions& options)
 {
-    
-    m_tree      = tree;
+    m_tree = tree;
     m_entryName = entryName;
-    m_target    = target;
+    m_target = target;
     m_isInsideBuffer = false;
 
-    m_options   = options;
+    m_options = options;
     m_writer.SetWriteFileLine(options.writeFileLine);
-    
+
     m_writer.Reset();
 
     // Find entry point function
     HLSLFunction* entryFunction = tree->FindFunction(entryName);
-    if (entryFunction == NULL)
-    {
+    if (entryFunction == NULL) {
         Error("Entry point '%s' doesn't exist\n", entryName);
         return false;
     }
@@ -191,88 +171,90 @@ bool HLSLGenerator::Generate(HLSLTree* tree, HLSLTarget target, const char* entr
     // PruneTree resets hidden flags to true, then marks visible elements
     // based on whether entry point visits them.
     PruneTree(tree, entryFunction->name); // Note: takes second entry
-    
+
     // This sorts tree by type, but keeps ordering
     SortTree(tree);
-   
+
     // This strips any unused inputs to the entry point function
     HideUnusedArguments(entryFunction);
-    
+
     // Is this needed
     FlattenExpressions(tree);
-    
+
     m_writer.WriteLine(0, "#include \"ShaderHLSL.h\"");
-    
+
     // @@ Should we generate an entirely new copy of the tree so that we can modify it in place?
     //if (!legacy)
     {
-        HLSLFunction * function = tree->FindFunction(entryName);
+        HLSLFunction* function = tree->FindFunction(entryName);
 
         // Handle return value semantics
         if (function->semantic != NULL) {
             function->sv_semantic = TranslateSemantic(function->semantic, /*output=*/true, target);
         }
         if (function->returnType.baseType == HLSLBaseType_UserDefined) {
-            HLSLStruct * s = tree->FindGlobalStruct(function->returnType.typeName);
+            HLSLStruct* s = tree->FindGlobalStruct(function->returnType.typeName);
 
-			HLSLStructField * sv_fields = NULL;
+            HLSLStructField* sv_fields = NULL;
 
-			HLSLStructField * lastField = NULL;
-            HLSLStructField * field = s->field;
+            HLSLStructField* lastField = NULL;
+            HLSLStructField* field = s->field;
             while (field) {
-				HLSLStructField * nextField = field->nextField;
+                HLSLStructField* nextField = field->nextField;
 
                 // TODO: may have to be careful with SV_Position, since this puts
                 // those last.  SSBO won't use those semantics, so should be okay.
-                
+
                 if (field->semantic) {
-					field->hidden = false;
+                    field->hidden = false;
                     field->sv_semantic = TranslateSemantic(field->semantic, /*output=*/true, target);
 
-					// Fields with SV semantics are stored at the end to avoid linkage problems.
-					if (field->sv_semantic != NULL) {
-						// Unlink from last.
-						if (lastField != NULL) lastField->nextField = nextField;
-						else s->field = nextField;
-
-						// Add to sv_fields.
-						field->nextField = sv_fields;
-						sv_fields = field;
-					}
+                    // Fields with SV semantics are stored at the end to avoid linkage problems.
+                    if (field->sv_semantic != NULL) {
+                        // Unlink from last.
+                        if (lastField != NULL)
+                            lastField->nextField = nextField;
+                        else
+                            s->field = nextField;
+
+                        // Add to sv_fields.
+                        field->nextField = sv_fields;
+                        sv_fields = field;
+                    }
                 }
 
-				if (field != sv_fields) lastField = field;
+                if (field != sv_fields) lastField = field;
                 field = nextField;
             }
 
-			// Append SV fields at the end.
-			if (sv_fields != NULL) {
-				if (lastField == NULL) {
-					s->field = sv_fields;
-				}
-				else {
-					ASSERT(lastField->nextField == NULL);
-					lastField->nextField = sv_fields;
-				}
-			}
+            // Append SV fields at the end.
+            if (sv_fields != NULL) {
+                if (lastField == NULL) {
+                    s->field = sv_fields;
+                }
+                else {
+                    ASSERT(lastField->nextField == NULL);
+                    lastField->nextField = sv_fields;
+                }
+            }
         }
 
         // Handle argument semantics.
         // @@ It would be nice to flag arguments that are used by the program and skip or hide the unused ones.
-        HLSLArgument * argument = function->argument;
+        HLSLArgument* argument = function->argument;
         while (argument) {
             bool output = argument->modifier == HLSLArgumentModifier_Out;
             if (argument->semantic) {
-                argument->sv_semantic = TranslateSemantic(argument->semantic, output, target); 
+                argument->sv_semantic = TranslateSemantic(argument->semantic, output, target);
             }
 
             if (argument->type.baseType == HLSLBaseType_UserDefined) {
-                HLSLStruct * s = tree->FindGlobalStruct(argument->type.typeName);
+                HLSLStruct* s = tree->FindGlobalStruct(argument->type.typeName);
 
-                HLSLStructField * field = s->field;
+                HLSLStructField* field = s->field;
                 while (field) {
                     if (field->semantic) {
-						field->hidden = false;
+                        field->hidden = false;
                         field->sv_semantic = TranslateSemantic(field->semantic, output, target);
                     }
 
@@ -283,7 +265,7 @@ bool HLSLGenerator::Generate(HLSLTree* tree, HLSLTarget target, const char* entr
             argument = argument->nextArgument;
         }
     }
-    
+
     HLSLRoot* root = m_tree->GetRoot();
     OutputStatements(0, root->statement);
 
@@ -299,10 +281,8 @@ const char* HLSLGenerator::GetResult() const
 void HLSLGenerator::OutputExpressionList(HLSLExpression* expression)
 {
     int numExpressions = 0;
-    while (expression != NULL)
-    {
-        if (numExpressions > 0)
-        {
+    while (expression != NULL) {
+        if (numExpressions > 0) {
             m_writer.Write(", ");
         }
         OutputExpression(expression);
@@ -311,154 +291,196 @@ void HLSLGenerator::OutputExpressionList(HLSLExpression* expression)
     }
 }
 
-
-
 void HLSLGenerator::OutputExpression(HLSLExpression* expression)
 {
-    if (expression->nodeType == HLSLNodeType_IdentifierExpression)
-    {
+    if (expression->nodeType == HLSLNodeType_IdentifierExpression) {
         HLSLIdentifierExpression* identifierExpression = static_cast<HLSLIdentifierExpression*>(expression);
         const char* name = identifierExpression->name;
-        
+
         m_writer.Write("%s", name);
     }
-    else if (expression->nodeType == HLSLNodeType_CastingExpression)
-    {
+    else if (expression->nodeType == HLSLNodeType_CastingExpression) {
         HLSLCastingExpression* castingExpression = static_cast<HLSLCastingExpression*>(expression);
         m_writer.Write("(");
         // OutputDeclaration(castingExpression->type, ""); // old - adds space after type
-        OutputDeclarationType(castingExpression->type, true/*isTypeCast*/); // new
+        OutputDeclarationType(castingExpression->type, true /*isTypeCast*/); // new
         m_writer.Write(")");
-        
+
         // These parens may not be needed
         m_writer.Write("(");
         OutputExpression(castingExpression->expression);
         m_writer.Write(")");
     }
-    else if (expression->nodeType == HLSLNodeType_ConstructorExpression)
-    {
+    else if (expression->nodeType == HLSLNodeType_ConstructorExpression) {
         HLSLConstructorExpression* constructorExpression = static_cast<HLSLConstructorExpression*>(expression);
         m_writer.Write("%s(", GetTypeName(constructorExpression->type));
         OutputExpressionList(constructorExpression->argument);
         m_writer.Write(")");
     }
-    else if (expression->nodeType == HLSLNodeType_LiteralExpression)
-    {
+    else if (expression->nodeType == HLSLNodeType_LiteralExpression) {
         HLSLLiteralExpression* literalExpression = static_cast<HLSLLiteralExpression*>(expression);
-        
+
         HLSLBaseType type = literalExpression->type;
         if (m_options.treatHalfAsFloat && IsHalf(type))
             type = HLSLBaseType_Float;
-        
-        switch (type)
-        {
-        case HLSLBaseType_Half:
-        case HLSLBaseType_Float:
-        case HLSLBaseType_Double:
-            {
+
+        switch (type) {
+            case HLSLBaseType_Half:
+            case HLSLBaseType_Float:
+            case HLSLBaseType_Double: {
                 // Don't use printf directly so that we don't use the system locale.
                 char buffer[64];
                 String_FormatFloat(buffer, sizeof(buffer), literalExpression->fValue);
                 String_StripTrailingFloatZeroes(buffer);
-                m_writer.Write("%s%s", buffer, type == HLSLBaseType_Half ? "h" : "" );
-            }
-            break;
-                
-        case HLSLBaseType_Short:
-        case HLSLBaseType_Ulong:
-        case HLSLBaseType_Int:
-            m_writer.Write("%d", literalExpression->iValue);
-            break;
-        // TODO: missing uint, u/short, u/long double
-                
-        case HLSLBaseType_Bool:
-            m_writer.Write("%s", literalExpression->bValue ? "true" : "false");
-            break;
-        default:
-            Error("Unhandled literal");
-            //ASSERT(false);
+                m_writer.Write("%s%s", buffer, type == HLSLBaseType_Half ? "h" : "");
+            } break;
+
+            case HLSLBaseType_Short:
+            case HLSLBaseType_Ulong:
+            case HLSLBaseType_Int:
+                m_writer.Write("%d", literalExpression->iValue);
+                break;
+                // TODO: missing uint, u/short, u/long double
+
+            case HLSLBaseType_Bool:
+                m_writer.Write("%s", literalExpression->bValue ? "true" : "false");
+                break;
+            default:
+                Error("Unhandled literal");
+                //ASSERT(false);
         }
     }
-    else if (expression->nodeType == HLSLNodeType_UnaryExpression)
-    {
+    else if (expression->nodeType == HLSLNodeType_UnaryExpression) {
         HLSLUnaryExpression* unaryExpression = static_cast<HLSLUnaryExpression*>(expression);
         const char* op = "?";
         bool pre = true;
-        switch (unaryExpression->unaryOp)
-        {
-        case HLSLUnaryOp_Negative:      op = "-";  break;
-        case HLSLUnaryOp_Positive:      op = "+";  break;
-        case HLSLUnaryOp_Not:           op = "!";  break;
-        case HLSLUnaryOp_PreIncrement:  op = "++"; break;
-        case HLSLUnaryOp_PreDecrement:  op = "--"; break;
-        case HLSLUnaryOp_PostIncrement: op = "++"; pre = false; break;
-        case HLSLUnaryOp_PostDecrement: op = "--"; pre = false; break;
-        case HLSLUnaryOp_BitNot:        op = "~";  break;
+        switch (unaryExpression->unaryOp) {
+            case HLSLUnaryOp_Negative:
+                op = "-";
+                break;
+            case HLSLUnaryOp_Positive:
+                op = "+";
+                break;
+            case HLSLUnaryOp_Not:
+                op = "!";
+                break;
+            case HLSLUnaryOp_PreIncrement:
+                op = "++";
+                break;
+            case HLSLUnaryOp_PreDecrement:
+                op = "--";
+                break;
+            case HLSLUnaryOp_PostIncrement:
+                op = "++";
+                pre = false;
+                break;
+            case HLSLUnaryOp_PostDecrement:
+                op = "--";
+                pre = false;
+                break;
+            case HLSLUnaryOp_BitNot:
+                op = "~";
+                break;
         }
-        
+
         // eliminate () if pure characters
         bool addParenthesis = NeedsParenthesis(unaryExpression->expression, expression);
         if (addParenthesis) m_writer.Write("(");
-        
-        if (pre)
-        {
+
+        if (pre) {
             m_writer.Write("%s", op);
             OutputExpression(unaryExpression->expression);
         }
-        else
-        {
+        else {
             OutputExpression(unaryExpression->expression);
             m_writer.Write("%s", op);
         }
         if (addParenthesis) m_writer.Write(")");
     }
-    else if (expression->nodeType == HLSLNodeType_BinaryExpression)
-    {
+    else if (expression->nodeType == HLSLNodeType_BinaryExpression) {
         HLSLBinaryExpression* binaryExpression = static_cast<HLSLBinaryExpression*>(expression);
-        
+
         // TODO: to fix this need to pass in parentExpression to
         // the call.  And MSLGenerator passes NULL for most of these.
         // TODO: eliminate () if pure characters
-        
+
         bool addParenthesis = false; // NeedsParenthesis(expression, parentExpression);
         if (addParenthesis) m_writer.Write("(");
-        
+
         OutputExpression(binaryExpression->expression1);
         const char* op = "?";
-        switch (binaryExpression->binaryOp)
-        {
-        case HLSLBinaryOp_Add:          op = " + "; break;
-        case HLSLBinaryOp_Sub:          op = " - "; break;
-        case HLSLBinaryOp_Mul:          op = " * "; break;
-        case HLSLBinaryOp_Div:          op = " / "; break;
-        case HLSLBinaryOp_Less:         op = " < "; break;
-        case HLSLBinaryOp_Greater:      op = " > "; break;
-        case HLSLBinaryOp_LessEqual:    op = " <= "; break;
-        case HLSLBinaryOp_GreaterEqual: op = " >= "; break;
-        case HLSLBinaryOp_Equal:        op = " == "; break;
-        case HLSLBinaryOp_NotEqual:     op = " != "; break;
-        case HLSLBinaryOp_Assign:       op = " = "; break;
-        case HLSLBinaryOp_AddAssign:    op = " += "; break;
-        case HLSLBinaryOp_SubAssign:    op = " -= "; break;
-        case HLSLBinaryOp_MulAssign:    op = " *= "; break;
-        case HLSLBinaryOp_DivAssign:    op = " /= "; break;
-        case HLSLBinaryOp_And:          op = " && "; break;
-        case HLSLBinaryOp_Or:           op = " || "; break;
-		case HLSLBinaryOp_BitAnd:       op = " & "; break;
-        case HLSLBinaryOp_BitOr:        op = " | "; break;
-        case HLSLBinaryOp_BitXor:       op = " ^ "; break;
-        default:
-            Error("Unhandled binary op");
-            //ASSERT(false);
+        switch (binaryExpression->binaryOp) {
+            case HLSLBinaryOp_Add:
+                op = " + ";
+                break;
+            case HLSLBinaryOp_Sub:
+                op = " - ";
+                break;
+            case HLSLBinaryOp_Mul:
+                op = " * ";
+                break;
+            case HLSLBinaryOp_Div:
+                op = " / ";
+                break;
+            case HLSLBinaryOp_Less:
+                op = " < ";
+                break;
+            case HLSLBinaryOp_Greater:
+                op = " > ";
+                break;
+            case HLSLBinaryOp_LessEqual:
+                op = " <= ";
+                break;
+            case HLSLBinaryOp_GreaterEqual:
+                op = " >= ";
+                break;
+            case HLSLBinaryOp_Equal:
+                op = " == ";
+                break;
+            case HLSLBinaryOp_NotEqual:
+                op = " != ";
+                break;
+            case HLSLBinaryOp_Assign:
+                op = " = ";
+                break;
+            case HLSLBinaryOp_AddAssign:
+                op = " += ";
+                break;
+            case HLSLBinaryOp_SubAssign:
+                op = " -= ";
+                break;
+            case HLSLBinaryOp_MulAssign:
+                op = " *= ";
+                break;
+            case HLSLBinaryOp_DivAssign:
+                op = " /= ";
+                break;
+            case HLSLBinaryOp_And:
+                op = " && ";
+                break;
+            case HLSLBinaryOp_Or:
+                op = " || ";
+                break;
+            case HLSLBinaryOp_BitAnd:
+                op = " & ";
+                break;
+            case HLSLBinaryOp_BitOr:
+                op = " | ";
+                break;
+            case HLSLBinaryOp_BitXor:
+                op = " ^ ";
+                break;
+            default:
+                Error("Unhandled binary op");
+                //ASSERT(false);
         }
         m_writer.Write("%s", op);
         OutputExpression(binaryExpression->expression2);
         if (addParenthesis) m_writer.Write(")");
     }
-    else if (expression->nodeType == HLSLNodeType_ConditionalExpression)
-    {
+    else if (expression->nodeType == HLSLNodeType_ConditionalExpression) {
         HLSLConditionalExpression* conditionalExpression = static_cast<HLSLConditionalExpression*>(expression);
-        
+
         // TODO: eliminate () if pure characters
         m_writer.Write("((");
         OutputExpression(conditionalExpression->condition);
@@ -468,56 +490,50 @@ void HLSLGenerator::OutputExpression(HLSLExpression* expression)
         OutputExpression(conditionalExpression->falseExpression);
         m_writer.Write("))");
     }
-    else if (expression->nodeType == HLSLNodeType_MemberAccess)
-    {
+    else if (expression->nodeType == HLSLNodeType_MemberAccess) {
         HLSLMemberAccess* memberAccess = static_cast<HLSLMemberAccess*>(expression);
-        
+
         bool addParenthesis = NeedsParenthesis(memberAccess->object, expression);
-        
+
         // eliminate () if pure characters
-        if ( addParenthesis ) m_writer.Write("(");
+        if (addParenthesis) m_writer.Write("(");
         OutputExpression(memberAccess->object);
-        if ( addParenthesis ) m_writer.Write(")");
+        if (addParenthesis) m_writer.Write(")");
         m_writer.Write(".%s", memberAccess->field);
     }
-    else if (expression->nodeType == HLSLNodeType_ArrayAccess)
-    {
+    else if (expression->nodeType == HLSLNodeType_ArrayAccess) {
         HLSLArrayAccess* arrayAccess = static_cast<HLSLArrayAccess*>(expression);
         OutputExpression(arrayAccess->array);
         m_writer.Write("[");
         OutputExpression(arrayAccess->index);
         m_writer.Write("]");
     }
-    else if (expression->nodeType == HLSLNodeType_FunctionCall)
-    {
+    else if (expression->nodeType == HLSLNodeType_FunctionCall) {
         HLSLFunctionCall* functionCall = static_cast<HLSLFunctionCall*>(expression);
         const char* name = functionCall->function->name;
         m_writer.Write("%s(", name);
         OutputExpressionList(functionCall->argument);
         m_writer.Write(")");
     }
-    else if (expression->nodeType == HLSLNodeType_MemberFunctionCall)
-    {
+    else if (expression->nodeType == HLSLNodeType_MemberFunctionCall) {
         HLSLMemberFunctionCall* functionCall = static_cast<HLSLMemberFunctionCall*>(expression);
-        
+
         // Spriv only supports fp32 or i32/i64 OpTypeImage
-        if (IsHalf(functionCall->function->returnType.baseType) && m_options.writeVulkan)
-        {
+        if (IsHalf(functionCall->function->returnType.baseType) && m_options.writeVulkan) {
             // TODO: may need parens
             m_writer.Write("(half4)");
         }
-        
+
         // Write out the member identifier
         m_writer.Write("%s.", functionCall->memberIdentifier->name);
-        
+
         // Same as FunctionCall
         const char* name = functionCall->function->name;
         m_writer.Write("%s(", name);
         OutputExpressionList(functionCall->argument);
         m_writer.Write(")");
     }
-    else
-    {
+    else {
         Error("unknown expression");
     }
 }
@@ -525,19 +541,16 @@ void HLSLGenerator::OutputExpression(HLSLExpression* expression)
 void HLSLGenerator::OutputArguments(HLSLArgument* argument)
 {
     int numArgs = 0;
-    while (argument != NULL)
-    {
-        if (numArgs > 0)
-        {
+    while (argument != NULL) {
+        if (numArgs > 0) {
             int indent = m_writer.EndLine(",");
             m_writer.BeginLine(indent);
         }
 
-        const char * semantic = argument->sv_semantic ? argument->sv_semantic : argument->semantic;
+        const char* semantic = argument->sv_semantic ? argument->sv_semantic : argument->semantic;
 
         // Have to inject vulkan
-        if (semantic && m_options.writeVulkan)
-        {
+        if (semantic && m_options.writeVulkan) {
             if (String_Equal(semantic, "PSIZE"))
                 m_writer.Write("%s ", "[[vk::builtin(\"PointSize\")]]");
             else if (String_Equal(semantic, "BaseVertex"))
@@ -545,34 +558,33 @@ void HLSLGenerator::OutputArguments(HLSLArgument* argument)
             else if (String_Equal(semantic, "BaseInstance"))
                 m_writer.Write("%s ", "[[vk::builtin(\"BaseInstance\")]]");
         }
-        
+
         // Then modifier
-        switch (argument->modifier)
-        {
-        case HLSLArgumentModifier_In:
-            m_writer.Write("in ");
-            break;
-        case HLSLArgumentModifier_Out:
-            m_writer.Write("out ");
-            break;
-        case HLSLArgumentModifier_Inout:
-            m_writer.Write("inout ");
-            break;
-        case HLSLArgumentModifier_Uniform:
-            m_writer.Write("uniform ");
-            break;
-        default:
-            break;
+        switch (argument->modifier) {
+            case HLSLArgumentModifier_In:
+                m_writer.Write("in ");
+                break;
+            case HLSLArgumentModifier_Out:
+                m_writer.Write("out ");
+                break;
+            case HLSLArgumentModifier_Inout:
+                m_writer.Write("inout ");
+                break;
+            case HLSLArgumentModifier_Uniform:
+                m_writer.Write("uniform ");
+                break;
+            default:
+                break;
         }
-        
+
         OutputDeclaration(argument->type, argument->name, semantic, /*registerName=*/NULL, argument->defaultValue);
-        
+
         argument = argument->nextArgument;
         ++numArgs;
     }
 }
 
-static const char * GetAttributeName(HLSLAttributeType attributeType)
+static const char* GetAttributeName(HLSLAttributeType attributeType)
 {
     if (attributeType == HLSLAttributeType_Unroll) return "unroll";
     if (attributeType == HLSLAttributeType_Branch) return "branch";
@@ -582,12 +594,10 @@ static const char * GetAttributeName(HLSLAttributeType attributeType)
 
 void HLSLGenerator::OutputAttributes(int indent, HLSLAttribute* attribute)
 {
-    while (attribute != NULL)
-    {
-        const char * attributeName = GetAttributeName(attribute->attributeType);
-    
-        if (attributeName != NULL)
-        {
+    while (attribute != NULL) {
+        const char* attributeName = GetAttributeName(attribute->attributeType);
+
+        if (attributeName != NULL) {
             m_writer.WriteLineTagged(indent, attribute->fileName, attribute->line, "[%s]", attributeName);
         }
 
@@ -598,93 +608,95 @@ void HLSLGenerator::OutputAttributes(int indent, HLSLAttribute* attribute)
 static const char* BufferTypeToName(HLSLBufferType bufferType)
 {
     const char* name = "";
-    switch(bufferType)
-    {
-        case HLSLBufferType_CBuffer: name = "cbuffer"; break;
-        case HLSLBufferType_TBuffer: name = "tbuffer"; break;
-            
-        case HLSLBufferType_ConstantBuffer: name = "ConstantBuffer"; break;
-        case HLSLBufferType_StructuredBuffer: name = "StructuredBuffer"; break;
-        case HLSLBufferType_RWStructuredBuffer: name = "RWStructuredBuffer"; break;
-        case HLSLBufferType_ByteAddressBuffer: name = "ByteAddressBuffer"; break;
-        case HLSLBufferType_RWByteAddressBuffer: name = "RWByteAddresssBuffer"; break;
+    switch (bufferType) {
+        case HLSLBufferType_CBuffer:
+            name = "cbuffer";
+            break;
+        case HLSLBufferType_TBuffer:
+            name = "tbuffer";
+            break;
+
+        case HLSLBufferType_ConstantBuffer:
+            name = "ConstantBuffer";
+            break;
+        case HLSLBufferType_StructuredBuffer:
+            name = "StructuredBuffer";
+            break;
+        case HLSLBufferType_RWStructuredBuffer:
+            name = "RWStructuredBuffer";
+            break;
+        case HLSLBufferType_ByteAddressBuffer:
+            name = "ByteAddressBuffer";
+            break;
+        case HLSLBufferType_RWByteAddressBuffer:
+            name = "RWByteAddresssBuffer";
+            break;
     }
-    
+
     return name;
 }
 
 bool HLSLGenerator::CanSkipWrittenStatement(const HLSLStatement* statement) const
 {
     if (!statement->written) return false;
-    
+
     // only write these once for multi-entrypoint
     if (statement->nodeType == HLSLNodeType_Comment ||
-         statement->nodeType == HLSLNodeType_Buffer ||
-         statement->nodeType == HLSLNodeType_Struct)
+        statement->nodeType == HLSLNodeType_Buffer ||
+        statement->nodeType == HLSLNodeType_Struct)
         return true;
-    
+
     // only write const scalars out once, so they don't conflict
-    if (statement->nodeType == HLSLNodeType_Declaration)
-    {
+    if (statement->nodeType == HLSLNodeType_Declaration) {
         const HLSLDeclaration* decl = (const HLSLDeclaration*)statement;
-        if (IsScalarType(decl->type.baseType) && decl->type.flags & HLSLTypeFlag_Const)
-        {
+        if (IsScalarType(decl->type.baseType) && decl->type.flags & HLSLTypeFlag_Const) {
             return true;
         }
     }
-    
+
     // Helper functions should be skipped once written out
-    if (statement->nodeType == HLSLNodeType_Function)
-    {
+    if (statement->nodeType == HLSLNodeType_Function) {
         return true;
     }
-    
+
     return false;
 }
 void HLSLGenerator::OutputStatements(int indent, HLSLStatement* statement)
 {
-    while (statement != NULL)
-    {
+    while (statement != NULL) {
         // skip pruned statements
-        if (statement->hidden) 
-        {
+        if (statement->hidden) {
             statement = statement->nextStatement;
             continue;
         }
 
         // skip writing some types across multiple entry points
-        if (CanSkipWrittenStatement(statement))
-        {
+        if (CanSkipWrittenStatement(statement)) {
             statement = statement->nextStatement;
             continue;
         }
         statement->written = true;
-        
+
         OutputAttributes(indent, statement->attributes);
 
-        if (statement->nodeType == HLSLNodeType_Comment)
-        {
+        if (statement->nodeType == HLSLNodeType_Comment) {
             HLSLComment* comment = static_cast<HLSLComment*>(statement);
             m_writer.WriteLine(indent, "//%s", comment->text);
         }
-        else if (statement->nodeType == HLSLNodeType_Declaration)
-        {
+        else if (statement->nodeType == HLSLNodeType_Declaration) {
             HLSLDeclaration* declaration = static_cast<HLSLDeclaration*>(statement);
             m_writer.BeginLine(indent, declaration->fileName, declaration->line);
             OutputDeclaration(declaration);
             m_writer.EndLine(";");
         }
-        else if (statement->nodeType == HLSLNodeType_Struct)
-        {
+        else if (statement->nodeType == HLSLNodeType_Struct) {
             HLSLStruct* structure = static_cast<HLSLStruct*>(statement);
             m_writer.WriteLineTagged(indent, structure->fileName, structure->line, "struct %s {", structure->name);
             HLSLStructField* field = structure->field;
-            while (field != NULL)
-            {
-                if (!field->hidden)
-                {
+            while (field != NULL) {
+                if (!field->hidden) {
                     m_writer.BeginLine(indent + 1, field->fileName, field->line);
-                    const char * semantic = field->sv_semantic ? field->sv_semantic : field->semantic;
+                    const char* semantic = field->sv_semantic ? field->sv_semantic : field->semantic;
                     OutputDeclaration(field->type, field->name, semantic);
                     m_writer.Write(";");
                     m_writer.EndLine();
@@ -693,95 +705,83 @@ void HLSLGenerator::OutputStatements(int indent, HLSLStatement* statement)
             }
             m_writer.WriteLine(indent, "};");
         }
-        else if (statement->nodeType == HLSLNodeType_Buffer)
-        {
+        else if (statement->nodeType == HLSLNodeType_Buffer) {
             HLSLBuffer* buffer = static_cast<HLSLBuffer*>(statement);
             HLSLDeclaration* field = buffer->field;
 
-            if (!buffer->IsGlobalFields())
-            {
+            if (!buffer->IsGlobalFields()) {
                 // Constant/Structured/ByteAdddressBuffer
                 m_writer.BeginLine(indent, buffer->fileName, buffer->line);
-                
+
                 // Handle push constant for Vulkan.
                 // This is just a buffer to MSL.
                 // VK is limited to 1 buffer as a result.  Cannot contain half on AMD.
-                if (buffer->bufferType == HLSLBufferType_ConstantBuffer)
-                {
+                if (buffer->bufferType == HLSLBufferType_ConstantBuffer) {
                     if (m_options.writeVulkan &&
                         (String_HasString(buffer->name, "Push") ||
-                         String_HasString(buffer->name, "push")))
-                    {
+                         String_HasString(buffer->name, "push"))) {
                         m_writer.Write("[[vk::push_constant]] ");
                     }
                 }
-                
+
                 // write out template
                 m_writer.Write("%s<%s> %s",
                                BufferTypeToName(buffer->bufferType),
                                buffer->bufferStruct->name,
                                buffer->name);
-                
+
                 // write out optinal register
-                if (buffer->registerName != NULL)
-                {
-                     m_writer.Write(" : register(%s)", buffer->registerName);
+                if (buffer->registerName != NULL) {
+                    m_writer.Write(" : register(%s)", buffer->registerName);
                 }
-                
+
                 m_writer.Write(";");
                 m_writer.EndLine();
             }
-            else
-            {
+            else {
                 // c/tbuffer
                 m_writer.BeginLine(indent, buffer->fileName, buffer->line);
-                
+
                 // not templated
                 m_writer.Write("%s %s",
                                BufferTypeToName(buffer->bufferType),
                                buffer->name);
-                
+
                 // write out optional register
-                if (buffer->registerName != NULL)
-                {
-                     m_writer.Write(" : register(%s)", buffer->registerName);
+                if (buffer->registerName != NULL) {
+                    m_writer.Write(" : register(%s)", buffer->registerName);
                 }
-                
+
                 m_writer.EndLine(" {");
                 m_isInsideBuffer = true;
-                
-                while (field != NULL)
-                {
-                    if (!field->hidden)
-                    {
+
+                while (field != NULL) {
+                    if (!field->hidden) {
                         m_writer.BeginLine(indent + 1, field->fileName, field->line);
-                        OutputDeclaration(field->type, field->name, /*semantic=*/NULL, /*registerName*/field->registerName, field->assignment);
+                        OutputDeclaration(field->type, field->name, /*semantic=*/NULL, /*registerName*/ field->registerName, field->assignment);
                         m_writer.Write(";");
                         m_writer.EndLine();
                     }
                     field = (HLSLDeclaration*)field->nextStatement;
                 }
-                
+
                 m_isInsideBuffer = false;
-                
+
                 m_writer.WriteLine(indent, "};");
             }
         }
-        else if (statement->nodeType == HLSLNodeType_Function)
-        {
+        else if (statement->nodeType == HLSLNodeType_Function) {
             HLSLFunction* function = static_cast<HLSLFunction*>(statement);
 
             // Use an alternate name for the function which is supposed to be entry point
             // so that we can supply our own function which will be the actual entry point.
-            const char* functionName   = function->name;
+            const char* functionName = function->name;
             const char* returnTypeName = GetTypeName(function->returnType);
 
             bool isEntryPoint = String_Equal(functionName, m_entryName);
-            if (isEntryPoint)
-            {
+            if (isEntryPoint) {
                 // This is a SM6.x construct for tagging entry points
-                switch(m_target)
-                {
+                switch (m_target) {
                     case HLSLTarget_VertexShader:
                         m_writer.WriteLine(indent, "[shader(\"vertex\")] ");
                         break;
@@ -795,19 +795,17 @@ void HLSLGenerator::OutputStatements(int indent, HLSLStatement* statement)
                         break;
                 }
             }
-            
+
             m_writer.BeginLine(indent, function->fileName, function->line);
             m_writer.Write("%s %s(", returnTypeName, functionName);
 
             OutputArguments(function->argument);
 
-            const char * semantic = function->sv_semantic ? function->sv_semantic : function->semantic;
-            if (semantic != NULL)
-            {
+            const char* semantic = function->sv_semantic ? function->sv_semantic : function->semantic;
+            if (semantic != NULL) {
                 m_writer.Write(") : %s {", semantic);
             }
-            else
-            {
+            else {
                 m_writer.Write(") {");
             }
 
@@ -816,45 +814,37 @@ void HLSLGenerator::OutputStatements(int indent, HLSLStatement* statement)
             OutputStatements(indent + 1, function->statement);
             m_writer.WriteLine(indent, "};");
         }
-        else if (statement->nodeType == HLSLNodeType_ExpressionStatement)
-        {
+        else if (statement->nodeType == HLSLNodeType_ExpressionStatement) {
             HLSLExpressionStatement* expressionStatement = static_cast<HLSLExpressionStatement*>(statement);
             m_writer.BeginLine(indent, statement->fileName, statement->line);
             OutputExpression(expressionStatement->expression);
             m_writer.EndLine(";");
         }
-        else if (statement->nodeType == HLSLNodeType_ReturnStatement)
-        {
+        else if (statement->nodeType == HLSLNodeType_ReturnStatement) {
             HLSLReturnStatement* returnStatement = static_cast<HLSLReturnStatement*>(statement);
-            if (returnStatement->expression != NULL)
-            {
+            if (returnStatement->expression != NULL) {
                 m_writer.BeginLine(indent, returnStatement->fileName, returnStatement->line);
                 m_writer.Write("return ");
                 OutputExpression(returnStatement->expression);
                 m_writer.EndLine(";");
             }
-            else
-            {
+            else {
                 m_writer.WriteLineTagged(indent, returnStatement->fileName, returnStatement->line, "return;");
             }
         }
-        else if (statement->nodeType == HLSLNodeType_DiscardStatement)
-        {
+        else if (statement->nodeType == HLSLNodeType_DiscardStatement) {
             HLSLDiscardStatement* discardStatement = static_cast<HLSLDiscardStatement*>(statement);
             m_writer.WriteLineTagged(indent, discardStatement->fileName, discardStatement->line, "discard;");
         }
-        else if (statement->nodeType == HLSLNodeType_BreakStatement)
-        {
+        else if (statement->nodeType == HLSLNodeType_BreakStatement) {
             HLSLBreakStatement* breakStatement = static_cast<HLSLBreakStatement*>(statement);
             m_writer.WriteLineTagged(indent, breakStatement->fileName, breakStatement->line, "break;");
         }
-        else if (statement->nodeType == HLSLNodeType_ContinueStatement)
-        {
+        else if (statement->nodeType == HLSLNodeType_ContinueStatement) {
             HLSLContinueStatement* continueStatement = static_cast<HLSLContinueStatement*>(statement);
             m_writer.WriteLineTagged(indent, continueStatement->fileName, continueStatement->line, "continue;");
         }
-        else if (statement->nodeType == HLSLNodeType_IfStatement)
-        {
+        else if (statement->nodeType == HLSLNodeType_IfStatement) {
             HLSLIfStatement* ifStatement = static_cast<HLSLIfStatement*>(statement);
             m_writer.BeginLine(indent, ifStatement->fileName, ifStatement->line);
             m_writer.Write("if (");
@@ -863,15 +853,13 @@ void HLSLGenerator::OutputStatements(int indent, HLSLStatement* statement)
             m_writer.EndLine();
             OutputStatements(indent + 1, ifStatement->statement);
             m_writer.WriteLine(indent, "}");
-            if (ifStatement->elseStatement != NULL)
-            {
+            if (ifStatement->elseStatement != NULL) {
                 m_writer.WriteLine(indent, "else {");
                 OutputStatements(indent + 1, ifStatement->elseStatement);
                 m_writer.WriteLine(indent, "}");
             }
         }
-        else if (statement->nodeType == HLSLNodeType_ForStatement)
-        {
+        else if (statement->nodeType == HLSLNodeType_ForStatement) {
             HLSLForStatement* forStatement = static_cast<HLSLForStatement*>(statement);
             m_writer.BeginLine(indent, forStatement->fileName, forStatement->line);
             m_writer.Write("for (");
@@ -885,24 +873,22 @@ void HLSLGenerator::OutputStatements(int indent, HLSLStatement* statement)
             OutputStatements(indent + 1, forStatement->statement);
             m_writer.WriteLine(indent, "}");
         }
-        else if (statement->nodeType == HLSLNodeType_BlockStatement)
-        {
+        else if (statement->nodeType == HLSLNodeType_BlockStatement) {
             HLSLBlockStatement* blockStatement = static_cast<HLSLBlockStatement*>(statement);
             m_writer.WriteLineTagged(indent, blockStatement->fileName, blockStatement->line, "{");
             OutputStatements(indent + 1, blockStatement->statement);
             m_writer.WriteLine(indent, "}");
         }
         // FX file constructs
-//        else if (statement->nodeType == HLSLNodeType_Technique)
-//        {
-//            // Techniques are ignored.
-//        }
-//        else if (statement->nodeType == HLSLNodeType_Pipeline)
-//        {
-//            // Pipelines are ignored.
-//        }
-        else
-        {
+        // else if (statement->nodeType == HLSLNodeType_Technique)
+        // {
+        //     // Techniques are ignored.
+        // }
+        // else if (statement->nodeType == HLSLNodeType_Pipeline)
+        // {
+        //     // Pipelines are ignored.
+        // }
+        else {
             // Unhanded statement type.
             Error("Unhandled statement");
             //ASSERT(false);
@@ -917,73 +903,62 @@ const char* HLSLGenerator::GetFormatName(HLSLBaseType bufferOrTextureType, HLSLB
 {
     // TODO: have a way to disable use of half (like on MSLGenerator)
     bool isHalf = IsHalf(formatType);
-    
+
     // Can't use half4 textures with spirv.  Can only cast from full float sampler.
     // Can tell Vulkan was written by/for desktop IHVs.
     // https://github.com/microsoft/DirectXShaderCompiler/issues/2711
     bool isSpirvTarget = m_options.writeVulkan;
     if (isSpirvTarget)
         isHalf = false;
-    
+
     const char* formatName = isHalf ? "half4" : "float4";
-    
+
     // MSL only uses half/float mostly. With HLSL, this is a full
     // template format of float/2/3/4.
-    
+
     return formatName;
 }
 
-
 void HLSLGenerator::OutputDeclaration(HLSLDeclaration* declaration)
 {
-    if (IsSamplerType(declaration->type))
-    {
+    if (IsSamplerType(declaration->type)) {
         int reg = -1;
-        if (declaration->registerName != NULL)
-        {
+        if (declaration->registerName != NULL) {
             sscanf(declaration->registerName, "s%d", &reg);
         }
-        
+
         // sampler
         const char* samplerTypeName = GetTypeName(declaration->type);
-        if (samplerTypeName)
-        {
-            if (reg != -1)
-            {
+        if (samplerTypeName) {
+            if (reg != -1) {
                 m_writer.Write("%s %s : register(s%d)", samplerTypeName, declaration->name, reg);
             }
-            else
-            {
+            else {
                 m_writer.Write("%s %s", samplerTypeName, declaration->name);
             }
         }
         return;
     }
-    if (IsTextureType(declaration->type))
-    {
+    if (IsTextureType(declaration->type)) {
         int reg = -1;
-        if (declaration->registerName != NULL)
-        {
+        if (declaration->registerName != NULL) {
             sscanf(declaration->registerName, "t%d", &reg);
         }
 
         HLSLBaseType formatType = declaration->type.formatType;
         if (m_options.treatHalfAsFloat && IsHalf(formatType))
             formatType = HalfToFloatBaseType(formatType);
-            
+
         const char* formatTypeName = GetFormatName(declaration->type.baseType, formatType);
-       
+
         // texture carts the dimension and format
         const char* textureTypeName = GetTypeName(declaration->type);
-    
-        if (textureTypeName != NULL)
-        {
-            if (reg != -1)
-            {
+
+        if (textureTypeName != NULL) {
+            if (reg != -1) {
                 m_writer.Write("%s<%s> %s : register(t%d)", textureTypeName, formatTypeName, declaration->name, reg);
             }
-            else
-            {
+            else {
                 m_writer.Write("%s<%s> %s", textureTypeName, formatTypeName, declaration->name);
             }
         }
@@ -994,7 +969,7 @@ void HLSLGenerator::OutputDeclaration(HLSLDeclaration* declaration)
     OutputDeclarationBody(declaration->type, declaration->name, declaration->semantic, declaration->registerName, declaration->assignment);
     declaration = declaration->nextDeclaration;
 
-    while(declaration != NULL) {
+    while (declaration != NULL) {
         m_writer.Write(", ");
         OutputDeclarationBody(declaration->type, declaration->name, declaration->semantic, declaration->registerName, declaration->assignment);
         declaration = declaration->nextDeclaration;
@@ -1005,39 +980,32 @@ void HLSLGenerator::OutputDeclarationType(const HLSLType& type, bool isTypeCast)
 {
     const char* typeName = GetTypeName(type);
 
-    if (isTypeCast)
-    {
+    if (isTypeCast) {
         m_writer.Write("%s", typeName);
         return;
     }
-    
-    if (type.flags & HLSLTypeFlag_Static)
-    {
+
+    if (type.flags & HLSLTypeFlag_Static) {
         m_writer.Write("static ");
     }
-    if (type.flags & HLSLTypeFlag_Const)
-    {
+    if (type.flags & HLSLTypeFlag_Const) {
         m_writer.Write("const ");
     }
-    
+
     // Interpolation modifiers.
-    if (type.flags & HLSLTypeFlag_Centroid)
-    {
+    if (type.flags & HLSLTypeFlag_Centroid) {
         m_writer.Write("centroid ");
     }
-    if (type.flags & HLSLTypeFlag_Linear)
-    {
+    if (type.flags & HLSLTypeFlag_Linear) {
         m_writer.Write("linear ");
     }
-    if (type.flags & HLSLTypeFlag_NoInterpolation)
-    {
+    if (type.flags & HLSLTypeFlag_NoInterpolation) {
         m_writer.Write("nointerpolation ");
     }
-    if (type.flags & HLSLTypeFlag_NoPerspective)
-    {
+    if (type.flags & HLSLTypeFlag_NoPerspective) {
         m_writer.Write("noperspective ");
     }
-    if (type.flags & HLSLTypeFlag_Sample)   // @@ Only in shader model >= 4.1
+    if (type.flags & HLSLTypeFlag_Sample) // @@ Only in shader model >= 4.1
     {
         m_writer.Write("sample ");
     }
@@ -1045,55 +1013,46 @@ void HLSLGenerator::OutputDeclarationType(const HLSLType& type, bool isTypeCast)
     m_writer.Write("%s ", typeName);
 }
 
-void HLSLGenerator::OutputDeclarationBody(const HLSLType& type, const char* name, const char* semantic/*=NULL*/, const char* registerName/*=NULL*/, HLSLExpression * assignment/*=NULL*/)
+void HLSLGenerator::OutputDeclarationBody(const HLSLType& type, const char* name, const char* semantic /*=NULL*/, const char* registerName /*=NULL*/, HLSLExpression* assignment /*=NULL*/)
 {
     m_writer.Write("%s", name);
 
-    if (type.array)
-    {
+    if (type.array) {
         ASSERT(semantic == NULL);
         m_writer.Write("[");
-        if (type.arraySize != NULL)
-        {
+        if (type.arraySize != NULL) {
             OutputExpression(type.arraySize);
         }
         m_writer.Write("]");
     }
 
-    if (semantic != NULL) 
-    {
+    if (semantic != NULL) {
         m_writer.Write(" : %s", semantic);
     }
 
-    if (registerName != NULL)
-    {
-        if (m_isInsideBuffer)
-        {
+    if (registerName != NULL) {
+        if (m_isInsideBuffer) {
             m_writer.Write(" : packoffset(%s)", registerName);
         }
-        else 
-        {
+        else {
             m_writer.Write(" : register(%s)", registerName);
         }
     }
 
-    if (assignment != NULL && !IsSamplerType(type))
-    {
+    if (assignment != NULL && !IsSamplerType(type)) {
         m_writer.Write(" = ");
-        if (type.array)
-        {
+        if (type.array) {
             m_writer.Write("{ ");
             OutputExpressionList(assignment);
             m_writer.Write(" }");
         }
-        else
-        {
+        else {
             OutputExpression(assignment);
         }
     }
 }
 
-void HLSLGenerator::OutputDeclaration(const HLSLType& type, const char* name, const char* semantic/*=NULL*/, const char* registerName/*=NULL*/, HLSLExpression * assignment/*=NULL*/)
+void HLSLGenerator::OutputDeclaration(const HLSLType& type, const char* name, const char* semantic /*=NULL*/, const char* registerName /*=NULL*/, HLSLExpression* assignment /*=NULL*/)
 {
     OutputDeclarationType(type);
     OutputDeclarationBody(type, name, semantic, registerName, assignment);
@@ -1103,20 +1062,17 @@ bool HLSLGenerator::ChooseUniqueName(const char* base, char* dst, int dstLength)
 {
     // IC: Try without suffix first.
     String_Printf(dst, dstLength, "%s", base);
-    if (!m_tree->GetContainsString(base))
-    {
+    if (!m_tree->GetContainsString(base)) {
         return true;
     }
 
-    for (int i = 1; i < 1024; ++i)
-    {
+    for (int i = 1; i < 1024; ++i) {
         String_Printf(dst, dstLength, "%s%d", base, i);
-        if (!m_tree->GetContainsString(dst))
-        {
+        if (!m_tree->GetContainsString(dst)) {
             return true;
         }
     }
     return false;
 }
 
-}
+} //namespace M4
diff --git a/hlslparser/src/HLSLGenerator.h b/hlslparser/src/HLSLGenerator.h
index a909d0ca..5ad6b711 100644
--- a/hlslparser/src/HLSLGenerator.h
+++ b/hlslparser/src/HLSLGenerator.h
@@ -12,25 +12,23 @@
 #include "CodeWriter.h"
 #include "HLSLTree.h"
 
-namespace M4
-{
+namespace M4 {
 
-class  HLSLTree;
+class HLSLTree;
 struct HLSLFunction;
 struct HLSLStruct;
 
 // TODO: try to unify some options with MSLGenerator
-struct HLSLOptions
-{
+struct HLSLOptions {
     // int (*attributeCallback)(const char* name, uint32_t index) = NULL;
     // uint32_t bufferRegisterOffset = 0;
-    
+
     bool writeFileLine = false;
-    
+
     bool treatHalfAsFloat = false;
     // TODO: hook this up
     // bool treatDoubleAsFloat = true;
-    
+
     // add vk constructions to HLSL source to convert to Spriv
     bool writeVulkan = false;
 };
@@ -39,19 +37,14 @@ struct HLSLOptions
  * This class is used to generate HLSL which is compatible with the D3D9
  * compiler (i.e. no cbuffers).
  */
-class HLSLGenerator
-{
-
+class HLSLGenerator {
 public:
     HLSLGenerator();
-    
-    
-    
-    bool Generate(HLSLTree* tree, HLSLTarget target, const char* entryName, const HLSLOptions& options = HLSLOptions() );
+
+    bool Generate(HLSLTree* tree, HLSLTarget target, const char* entryName, const HLSLOptions& options = HLSLOptions());
     const char* GetResult() const;
 
 private:
-
     void OutputExpressionList(HLSLExpression* expression);
     void OutputExpression(HLSLExpression* expression);
     void OutputArguments(HLSLArgument* argument);
@@ -60,7 +53,7 @@ class HLSLGenerator
     void OutputDeclaration(HLSLDeclaration* declaration);
     void OutputDeclaration(const HLSLType& type, const char* name, const char* semantic = NULL, const char* registerName = NULL, HLSLExpression* defaultValue = NULL);
     void OutputDeclarationType(const HLSLType& type, bool isTypeCast = false);
-    void OutputDeclarationBody(const HLSLType& type, const char* name, const char* semantic =NULL, const char* registerName = NULL, HLSLExpression * assignment = NULL);
+    void OutputDeclarationBody(const HLSLType& type, const char* name, const char* semantic = NULL, const char* registerName = NULL, HLSLExpression* assignment = NULL);
 
     /** Generates a name of the format "base+n" where n is an integer such that the name
      * isn't used in the syntax tree. */
@@ -69,20 +62,19 @@ class HLSLGenerator
     const char* GetTypeName(const HLSLType& type);
 
     void Error(const char* format, ...) M4_PRINTF_ATTR(2, 3);
-    
+
     const char* GetFormatName(HLSLBaseType bufferOrTextureType, HLSLBaseType formatType);
     bool CanSkipWrittenStatement(const HLSLStatement* statement) const;
 
 private:
-
-    CodeWriter      m_writer;
+    CodeWriter m_writer;
 
     const HLSLTree* m_tree;
-    const char*     m_entryName;
-    HLSLTarget      m_target;
-    bool            m_isInsideBuffer;
-    bool            m_error;
-    HLSLOptions     m_options;
+    const char* m_entryName;
+    HLSLTarget m_target;
+    bool m_isInsideBuffer;
+    bool m_error;
+    HLSLOptions m_options;
 };
 
-} // M4
+} //namespace M4
diff --git a/hlslparser/src/HLSLParser.cpp b/hlslparser/src/HLSLParser.cpp
index b3dd5086..46580c57 100644
--- a/hlslparser/src/HLSLParser.cpp
+++ b/hlslparser/src/HLSLParser.cpp
@@ -10,7 +10,6 @@
 #include "HLSLParser.h"
 
 #include "Engine.h"
-
 #include "HLSLTree.h"
 
 #ifdef _WIN32
@@ -21,27 +20,24 @@
 
 // stl
 #include <algorithm>
-#include <vector>
 #include <unordered_map>
+#include <vector>
 
-namespace M4
-{
+namespace M4 {
 
-enum CompareFunctionsResult
-{
+enum CompareFunctionsResult {
     FunctionsEqual,
     Function1Better,
     Function2Better
 };
 
-enum CoreType
-{
+enum CoreType {
     CoreType_None,
-    
+
     CoreType_Scalar,
     CoreType_Vector,
     CoreType_Matrix,
-    
+
     CoreType_Sampler,
     CoreType_Texture,
     CoreType_Struct,
@@ -49,12 +45,11 @@ enum CoreType
     CoreType_Expression,
     CoreType_Comment,
     CoreType_Buffer,
-    
+
     CoreType_Count // must be last
 };
 
-enum DimensionType
-{
+enum DimensionType {
     DimensionType_None,
 
     DimensionType_Scalar,
@@ -66,29 +61,28 @@ enum DimensionType
     DimensionType_Matrix2x2,
     DimensionType_Matrix3x3,
     DimensionType_Matrix4x4,
-    
+
     //DimensionType_Matrix4x3, // TODO: no 3x4
     //DimensionType_Matrix4x2
 };
 
 // Can use this to break apart type to useful constructs
-struct BaseTypeDescription
-{
-    const char*     typeName = "";
-    const char*     typeNameMetal = "";
-    
-    HLSLBaseType    baseType = HLSLBaseType_Unknown;
-    CoreType        coreType = CoreType_None;
-    DimensionType   dimensionType = DimensionType_None;
-    NumericType     numericType = NumericType_NaN;
-    
+struct BaseTypeDescription {
+    const char* typeName = "";
+    const char* typeNameMetal = "";
+
+    HLSLBaseType baseType = HLSLBaseType_Unknown;
+    CoreType coreType = CoreType_None;
+    DimensionType dimensionType = DimensionType_None;
+    NumericType numericType = NumericType_NaN;
+
     // TODO: is this useful ?
-   // int             numDimensions; // scalar = 0, vector = 1, matrix = 2
-    uint8_t             numDimensions = 0;
-    uint8_t             numComponents = 0;
-    uint8_t             height = 0;
-    
-    int8_t              binaryOpRank = -1; // or was this supposed to be max (-1 in uint8_t)
+    // int             numDimensions; // scalar = 0, vector = 1, matrix = 2
+    uint8_t numDimensions = 0;
+    uint8_t numComponents = 0;
+    uint8_t height = 0;
+
+    int8_t binaryOpRank = -1; // or was this supposed to be max (-1 in uint8_t)
 };
 
 // really const
@@ -122,18 +116,16 @@ bool IsTextureType(HLSLBaseType baseType)
 bool IsDepthTextureType(HLSLBaseType baseType)
 {
     // return baseTypeDescriptions[baseType].coreType == CoreType_DepthTexture;
-    return  baseType == HLSLBaseType_Depth2D ||
-            baseType == HLSLBaseType_Depth2DArray ||
-            baseType == HLSLBaseType_DepthCube;
+    return baseType == HLSLBaseType_Depth2D ||
+           baseType == HLSLBaseType_Depth2DArray ||
+           baseType == HLSLBaseType_DepthCube;
 }
 
-
 bool IsBufferType(HLSLBaseType baseType)
 {
     return baseTypeDescriptions[baseType].coreType == CoreType_Buffer;
 }
 
-
 bool IsCoreTypeEqual(HLSLBaseType lhsType, HLSLBaseType rhsType)
 {
     return baseTypeDescriptions[lhsType].coreType ==
@@ -143,9 +135,9 @@ bool IsCoreTypeEqual(HLSLBaseType lhsType, HLSLBaseType rhsType)
 bool IsDimensionEqual(HLSLBaseType lhsType, HLSLBaseType rhsType)
 {
     return baseTypeDescriptions[lhsType].numComponents ==
-           baseTypeDescriptions[rhsType].numComponents &&
+               baseTypeDescriptions[rhsType].numComponents &&
            baseTypeDescriptions[lhsType].height ==
-           baseTypeDescriptions[rhsType].height;
+               baseTypeDescriptions[rhsType].height;
 }
 
 bool IsCrossDimensionEqual(HLSLBaseType lhsType, HLSLBaseType rhsType)
@@ -154,7 +146,6 @@ bool IsCrossDimensionEqual(HLSLBaseType lhsType, HLSLBaseType rhsType)
            baseTypeDescriptions[rhsType].numComponents;
 }
 
-
 bool IsNumericTypeEqual(HLSLBaseType lhsType, HLSLBaseType rhsType)
 {
     return baseTypeDescriptions[lhsType].numericType ==
@@ -191,7 +182,6 @@ bool IsIntegerType(HLSLBaseType type)
            n == NumericType_Long || n == NumericType_Ulong;
 }
 
-
 bool IsInt(HLSLBaseType type)
 {
     return baseTypeDescriptions[type].numericType == NumericType_Int;
@@ -225,31 +215,27 @@ bool IsBool(HLSLBaseType type)
     return baseTypeDescriptions[type].numericType == NumericType_Bool;
 }
 
-
-
-
-
-bool IsSamplerType(const HLSLType & type)
+bool IsSamplerType(const HLSLType& type)
 {
     return IsSamplerType(type.baseType);
 }
 
-bool IsScalarType(const HLSLType & type)
+bool IsScalarType(const HLSLType& type)
 {
     return IsScalarType(type.baseType);
 }
 
-bool IsVectorType(const HLSLType & type)
+bool IsVectorType(const HLSLType& type)
 {
     return IsVectorType(type.baseType);
 }
 
-bool IsMatrixType(const HLSLType & type)
+bool IsMatrixType(const HLSLType& type)
 {
     return IsMatrixType(type.baseType);
 }
 
-bool IsTextureType(const HLSLType & type)
+bool IsTextureType(const HLSLType& type)
 {
     return IsTextureType(type.baseType);
 }
@@ -262,55 +248,80 @@ bool IsNumericType(HLSLBaseType baseType)
 HLSLBufferType ConvertTokenToBufferType(HLSLToken token)
 {
     HLSLBufferType type = HLSLBufferType_CBuffer;
-    
-    switch(token)
-    {
+
+    switch (token) {
         // DX9
         case HLSLToken_CBuffer:
-            type = HLSLBufferType_CBuffer; break;
+            type = HLSLBufferType_CBuffer;
+            break;
         case HLSLToken_TBuffer:
-            type = HLSLBufferType_TBuffer; break;
-        
+            type = HLSLBufferType_TBuffer;
+            break;
+
         // DX10
         case HLSLToken_ConstantBuffer:
-            type = HLSLBufferType_ConstantBuffer; break;
+            type = HLSLBufferType_ConstantBuffer;
+            break;
         case HLSLToken_StructuredBuffer:
-            type = HLSLBufferType_StructuredBuffer; break;
+            type = HLSLBufferType_StructuredBuffer;
+            break;
         case HLSLToken_RWStructuredBuffer:
-            type = HLSLBufferType_RWStructuredBuffer; break;
+            type = HLSLBufferType_RWStructuredBuffer;
+            break;
         case HLSLToken_ByteAddressBuffer:
-            type = HLSLBufferType_ByteAddressBuffer; break;
+            type = HLSLBufferType_ByteAddressBuffer;
+            break;
         case HLSLToken_RWByteAddressBuffer:
-            type = HLSLBufferType_RWByteAddressBuffer; break;
-            
+            type = HLSLBufferType_RWByteAddressBuffer;
+            break;
+
         default:
             break;
     }
-    
+
     return type;
 }
 
 HLSLBaseType NumericToBaseType(NumericType numericType)
 {
     HLSLBaseType baseType = HLSLBaseType_Unknown;
-    switch(numericType)
-    {
-        case NumericType_Float: baseType = HLSLBaseType_Float; break;
-        case NumericType_Half: baseType = HLSLBaseType_Half; break;
-        case NumericType_Double: baseType = HLSLBaseType_Bool; break;
-       
-        case NumericType_Int: baseType = HLSLBaseType_Int; break;
-        case NumericType_Uint: baseType = HLSLBaseType_Uint; break;
-        case NumericType_Ushort: baseType = HLSLBaseType_Ushort; break;
-        case NumericType_Short: baseType = HLSLBaseType_Short; break;
-        case NumericType_Ulong: baseType = HLSLBaseType_Ulong; break;
-        case NumericType_Long: baseType = HLSLBaseType_Long; break;
-        case NumericType_Bool: baseType = HLSLBaseType_Bool; break;
-        
-        // MSL has 8-bit, but HLSL/Vulkan don't
-        //case NumericType_Uint8: baseType = HLSLBaseType_Uint8; break;
-        //case NumericType_Int8: baseType = HLSLBaseType_Int8; break;
-    
+    switch (numericType) {
+        case NumericType_Float:
+            baseType = HLSLBaseType_Float;
+            break;
+        case NumericType_Half:
+            baseType = HLSLBaseType_Half;
+            break;
+        case NumericType_Double:
+            baseType = HLSLBaseType_Bool;
+            break;
+
+        case NumericType_Int:
+            baseType = HLSLBaseType_Int;
+            break;
+        case NumericType_Uint:
+            baseType = HLSLBaseType_Uint;
+            break;
+        case NumericType_Ushort:
+            baseType = HLSLBaseType_Ushort;
+            break;
+        case NumericType_Short:
+            baseType = HLSLBaseType_Short;
+            break;
+        case NumericType_Ulong:
+            baseType = HLSLBaseType_Ulong;
+            break;
+        case NumericType_Long:
+            baseType = HLSLBaseType_Long;
+            break;
+        case NumericType_Bool:
+            baseType = HLSLBaseType_Bool;
+            break;
+
+            // MSL has 8-bit, but HLSL/Vulkan don't
+            //case NumericType_Uint8: baseType = HLSLBaseType_Uint8; break;
+            //case NumericType_Int8: baseType = HLSLBaseType_Int8; break;
+
         default:
             break;
     }
@@ -327,7 +338,7 @@ int32_t GetVectorDimension(HLSLBaseType type)
 {
     if (IsScalarType(type)) return 1;
     if (!IsVectorType(type)) return 0;
-    
+
     return baseTypeDescriptions[type].numComponents;
 }
 
@@ -345,14 +356,13 @@ HLSLBaseType DoubleToFloatBaseType(HLSLBaseType type)
     return type;
 }
 
-
 static HLSLBaseType ArithmeticOpResultType(HLSLBinaryOp binaryOp, HLSLBaseType t1, HLSLBaseType t2);
 
 const char* GetNumericTypeName(HLSLBaseType type)
 {
     if (!IsNumericType(type))
         return nullptr;
-    
+
     // MSL/HLSL share the same type names
     const auto& b = baseTypeDescriptions[type];
     return b.typeName;
@@ -364,47 +374,41 @@ HLSLBaseType PromoteType(HLSLBaseType toType, HLSLBaseType type)
                         baseTypeDescriptions[type].dimensionType - DimensionType_Scalar);
 }
 
-
-
 /** This structure stores a HLSLFunction-like declaration for an intrinsic function */
-struct Intrinsic
-{
+struct Intrinsic {
     explicit Intrinsic(const char* name, uint32_t numArgs)
     {
-        function.name         = name;
+        function.name = name;
         function.numArguments = numArgs;
-        
+
         if (numArgs == 0) return;
-        
-        for (uint32_t i = 0; i < numArgs; ++i)
-        {
+
+        for (uint32_t i = 0; i < numArgs; ++i) {
             argument[i].type.flags = HLSLTypeFlag_Const;
         }
     }
-    
+
     void ChainArgumentPointers()
     {
         function.argument = argument + 0;
-        
+
         uint32_t numArgs = function.numArguments;
         // This chain pf pointers won't surive copy
-        for (uint32_t i = 0; i < numArgs; ++i)
-        {
+        for (uint32_t i = 0; i < numArgs; ++i) {
             if (i < numArgs - 1)
                 argument[i].nextArgument = argument + i + 1;
         }
     }
-    
+
     void SetArgumentTypes(HLSLBaseType returnType, HLSLBaseType args[4])
     {
         function.returnType.baseType = returnType;
-        for (uint32_t i = 0; i < function.numArguments; ++i)
-        {
+        for (uint32_t i = 0; i < function.numArguments; ++i) {
             ASSERT(args[i] != HLSLBaseType_Unknown);
             argument[i].type.baseType = args[i];
         }
     }
-    
+
     void ArgsToArray(HLSLBaseType args[4], uint32_t& numArgs, HLSLBaseType arg1, HLSLBaseType arg2, HLSLBaseType arg3, HLSLBaseType arg4)
     {
         numArgs = 0;
@@ -417,24 +421,24 @@ struct Intrinsic
         if (arg4 == HLSLBaseType_Unknown) return;
         args[numArgs++] = arg4;
     }
-    
+
     explicit Intrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType arg1 = HLSLBaseType_Unknown, HLSLBaseType arg2 = HLSLBaseType_Unknown, HLSLBaseType arg3 = HLSLBaseType_Unknown, HLSLBaseType arg4 = HLSLBaseType_Unknown)
     {
-        function.name                   = name;
-        
+        function.name = name;
+
         HLSLBaseType argumentTypes[4];
         uint32_t numArgs = 0;
         ArgsToArray(argumentTypes, numArgs, arg1, arg2, arg3, arg4);
-        
+
         *this = Intrinsic(name, numArgs);
         SetArgumentTypes(returnType, argumentTypes);
     }
-    
+
     // TODO: allow member function intrinsices on buffers/textures
-    HLSLFunction    function;
-    HLSLArgument    argument[4];
+    HLSLFunction function;
+    HLSLArgument argument[4];
 };
-    
+
 // So many calls are member functions in modern HLSL/MSL.
 // This means the parser has to work harder to write out these intrinsics
 // since some have default args, and some need level(), bias() wrappers in MSL.
@@ -449,12 +453,12 @@ void AddTextureLoadIntrinsic(const char* name, HLSLBaseType returnType, HLSLBase
     Intrinsic i(name, returnType, uvType, arg3, arg4);
     i.function.memberType = textureType; // extract formatType from return type
 #else
-//    Intrinsic i(name, returnType, textureType, uvType);
+// Intrinsic i(name, returnType, textureType, uvType);
 //
-//    // classify textureType subtype off scalar
-//    i.argument[0].type.formatType = GetScalarType(returnType);
+// // classify textureType subtype off scalar
+// i.argument[0].type.formatType = GetScalarType(returnType);
 #endif
-    
+
     AddIntrinsic(i);
 }
 
@@ -464,37 +468,36 @@ void AddTextureIntrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType
     Intrinsic i(name, returnType, HLSLBaseType_SamplerState, uvType, arg3, arg4);
     i.function.memberType = textureType;
 #else
-//    Intrinsic i(name, returnType, textureType, HLSLBaseType_SamplerState, uvType);
+// Intrinsic i(name, returnType, textureType, HLSLBaseType_SamplerState, uvType);
 //
-//    // classify textureType subtype off scalar
-//    i.argument[0].type.formatType = GetScalarType(returnType);
+// // classify textureType subtype off scalar
+// i.argument[0].type.formatType = GetScalarType(returnType);
 #endif
-    
+
     AddIntrinsic(i);
 }
 
 void AddTextureIntrinsics(const char* name, HLSLBaseType textureType, HLSLBaseType uvType, HLSLBaseType arg3 = HLSLBaseType_Unknown, HLSLBaseType arg4 = HLSLBaseType_Unknown)
 {
-    AddTextureIntrinsic( name, HLSLBaseType_Float4, textureType, uvType, arg3, arg4);
-    AddTextureIntrinsic( name, HLSLBaseType_Half4, textureType, uvType, arg3, arg4);
+    AddTextureIntrinsic(name, HLSLBaseType_Float4, textureType, uvType, arg3, arg4);
+    AddTextureIntrinsic(name, HLSLBaseType_Half4, textureType, uvType, arg3, arg4);
 }
 
-
 // DepthCmp takes additional arg for comparison value, but this rolls it into uv
 void AddDepthIntrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType textureType, HLSLBaseType uvType, HLSLBaseType arg3 = HLSLBaseType_Unknown, HLSLBaseType arg4 = HLSLBaseType_Unknown)
 {
     // ComparisonState is only for SampleCmp/GatherCmp
     bool isCompare = String_Equal(name, "GatherCmp") || String_Equal(name, "SampleCmp");
     HLSLBaseType samplerType = isCompare ? HLSLBaseType_SamplerComparisonState : HLSLBaseType_SamplerState;
-    
+
 #if USE_MEMBER_FUNCTIONS
     Intrinsic i(name, returnType, samplerType, uvType, arg3, arg4);
     i.function.memberType = textureType;
 #else
-//    Intrinsic i(name, returnType, textureType, samplerType, uvType);
-//    i.argument[0].type.formatType = GetScalarType(returnType);
+// Intrinsic i(name, returnType, textureType, samplerType, uvType);
+// i.argument[0].type.formatType = GetScalarType(returnType);
 #endif
-    
+
     AddIntrinsic(i);
 }
 
@@ -503,23 +506,22 @@ void AddDepthIntrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType t
 //    AddTextureIntrinsic( name, HLSLBaseType_Float4, textureType, uvType) \
 //    AddTextureIntrinsic( name, HLSLBaseType_Half4, textureType, uvType )
 
+static const int _numberTypeRank[NumericType_Count][NumericType_Count] =
+    {
+        // across is what type list on right is converted into (5 means don't, 0 means best)
+        //F  H  D  B  I  UI  S US  L UL
+        {0, 3, 3, 4, 4, 4, 4, 4, 4, 4}, // NumericType_Float
+        {2, 0, 4, 4, 4, 4, 4, 4, 4, 4}, // NumericType_Half
+        {1, 4, 0, 4, 4, 4, 4, 4, 4, 4}, // NumericType_Double
 
-static const int _numberTypeRank[NumericType_Count][NumericType_Count] = 
-{
-    // across is what type list on right is converted into (5 means don't, 0 means best)
-    //F  H  D  B  I  UI  S US  L UL
-    { 0, 3, 3, 4, 4, 4,  4, 4, 4, 4 },  // NumericType_Float
-    { 2, 0, 4, 4, 4, 4,  4, 4, 4, 4 },  // NumericType_Half
-    { 1, 4, 0, 4, 4, 4,  4, 4, 4, 4 },  // NumericType_Double
-   
-    { 5, 5, 5, 0, 5, 5,  5, 5, 5, 5 },  // NumericType_Bool
-    { 5, 5, 5, 4, 0, 3,  4, 3, 5, 5 },  // NumericType_Int
-    { 5, 5, 5, 4, 2, 0,  3, 4, 5, 5 },  // NumericType_Uint
-    { 5, 5, 5, 4, 0, 3,  0, 5, 5, 5 },  // NumericType_Short
-    { 5, 5, 5, 4, 2, 0,  5, 0, 5, 5 },  // NumericType_Ushort
-    
-    { 5, 5, 5, 4, 0, 3,  5, 5, 0, 5 },  // NumericType_Long
-    { 5, 5, 5, 4, 2, 0,  5, 5, 5, 0 },  // NumericType_Ulong
+        {5, 5, 5, 0, 5, 5, 5, 5, 5, 5}, // NumericType_Bool
+        {5, 5, 5, 4, 0, 3, 4, 3, 5, 5}, // NumericType_Int
+        {5, 5, 5, 4, 2, 0, 3, 4, 5, 5}, // NumericType_Uint
+        {5, 5, 5, 4, 0, 3, 0, 5, 5, 5}, // NumericType_Short
+        {5, 5, 5, 4, 2, 0, 5, 0, 5, 5}, // NumericType_Ushort
+
+        {5, 5, 5, 4, 0, 3, 5, 5, 0, 5}, // NumericType_Long
+        {5, 5, 5, 4, 2, 0, 5, 5, 5, 0}, // NumericType_Ulong
 };
 
 /* All FX state
@@ -774,34 +776,32 @@ static const EffectState pipelineStates[] = {
 // Note: these strings need to live until end of the app
 StringPool gStringPool(NULL);
 
-enum All
-{
-    AllHalf = (1<<0),
-    AllFloat = (1<<1),
-    AllDouble = (1<<2),
-    
+enum All {
+    AllHalf = (1 << 0),
+    AllFloat = (1 << 1),
+    AllDouble = (1 << 2),
+
     AllFloats = AllHalf | AllFloat | AllDouble,
-    
-    AllUint = (1<<3),
-    AllInt = (1<<4),
-    AllShort = (1<<5),
-    AllUshort = (1<<6),
-    AllLong = (1<<7),
-    AllUlong = (1<<8),
-    AllBool = (1<<9),
-    
+
+    AllUint = (1 << 3),
+    AllInt = (1 << 4),
+    AllShort = (1 << 5),
+    AllUshort = (1 << 6),
+    AllLong = (1 << 7),
+    AllUlong = (1 << 8),
+    AllBool = (1 << 9),
+
     AllInts = AllUint | AllInt | AllShort | AllUshort | AllLong | AllUlong | AllBool,
-    
+
     //AllScalar  = (1<<15),
-    AllVecs = (1<<16),
-    AllMats = (1<<17),
+    AllVecs = (1 << 16),
+    AllMats = (1 << 17),
     AllDims = AllVecs | AllMats,
 };
 using AllMask = uint32_t;
 
 // TODO: want to use Array, but it needs Allocator passed
-struct Range
-{
+struct Range {
     uint32_t start;
     uint32_t count;
 };
@@ -815,30 +815,28 @@ static IntrinsicRangeMap _intrinsicRangeMap;
 static void AddIntrinsic(const Intrinsic& intrinsic)
 {
     const char* name = intrinsic.function.name;
-    
+
     // Put in string pool since using this as a key.  Also means equals just ptr compar.
-   name = gStringPool.AddString(name);
-    
+    name = gStringPool.AddString(name);
+
     // track intrinsic range in a map, also the name lookup helps speed the parser up
     auto it = _intrinsicRangeMap.find(name);
-    if (it != _intrinsicRangeMap.end())
-    {
+    if (it != _intrinsicRangeMap.end()) {
         it->second.count++;
     }
-    else
-    {
-        _intrinsicRangeMap[name] = { (uint32_t)_intrinsics.size(), 1 };
+    else {
+        _intrinsicRangeMap[name] = {(uint32_t)_intrinsics.size(), 1};
     }
-    
+
     // To avoid having growth destroy the argument chains
     const uint32_t kMaxIntrinsics = 10000; // TODO: reduce once count is known
     if (_intrinsics.empty())
         _intrinsics.reserve(kMaxIntrinsics);
     ASSERT(_intrinsics.size() < kMaxIntrinsics);
-    
+
     _intrinsics.push_back(intrinsic);
     _intrinsics.back().function.name = name;
-    
+
     // These pointers change when copied or when vector grows, so do a reserve
     _intrinsics.back().ChainArgumentPointers();
 }
@@ -852,16 +850,15 @@ void AddIntrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType arg1 =
 void RegisterBaseTypeIntrinsic(Intrinsic& intrinsic, uint32_t numArgs, HLSLBaseType returnType, HLSLBaseType baseType, uint32_t start, uint32_t end)
 {
     HLSLBaseType args[4] = {};
-    
-    for (uint32_t i = start; i < end; ++i)
-    {
+
+    for (uint32_t i = start; i < end; ++i) {
         HLSLBaseType baseTypeIter = (HLSLBaseType)(baseType + i);
-        
+
         HLSLBaseType newReturnType = (returnType == HLSLBaseType_Unknown) ? baseTypeIter : returnType;
-        
+
         for (uint32_t a = 0; a < numArgs; ++a)
             args[a] = baseTypeIter;
-        
+
         intrinsic.SetArgumentTypes(newReturnType, args);
         AddIntrinsic(intrinsic);
     }
@@ -878,8 +875,8 @@ void RegisterIntrinsics(const char* name, uint32_t numArgs, AllMask mask, HLSLBa
 
     {
         const uint32_t kNumTypes = 3;
-        HLSLBaseType baseTypes[kNumTypes] = { HLSLBaseType_Float, HLSLBaseType_Half, HLSLBaseType_Double };
-    
+        HLSLBaseType baseTypes[kNumTypes] = {HLSLBaseType_Float, HLSLBaseType_Half, HLSLBaseType_Double};
+
         bool skip[kNumTypes] = {};
         if (!TestBits(mask, AllFloat))
             skip[0] = true;
@@ -887,12 +884,11 @@ void RegisterIntrinsics(const char* name, uint32_t numArgs, AllMask mask, HLSLBa
             skip[1] = true;
         if (!TestBits(mask, AllDouble))
             skip[2] = true;
-        
-        for (uint32_t i = 0; i < kNumTypes; ++i)
-        {
+
+        for (uint32_t i = 0; i < kNumTypes; ++i) {
             if (skip[i]) continue;
             HLSLBaseType baseType = baseTypes[i];
-            
+
             if (mask & AllVecs)
                 RegisterBaseTypeIntrinsic(intrinsic, numArgs, returnType, baseType, 0, 4);
             if (mask & AllMats)
@@ -900,16 +896,14 @@ void RegisterIntrinsics(const char* name, uint32_t numArgs, AllMask mask, HLSLBa
         }
     }
 
-    if ((mask & AllInts) == AllInts)
-    {
+    if ((mask & AllInts) == AllInts) {
         const uint32_t kNumTypes = 7;
         HLSLBaseType baseTypes[kNumTypes] = {
             HLSLBaseType_Long, HLSLBaseType_Ulong,
-            HLSLBaseType_Int,  HLSLBaseType_Uint,
+            HLSLBaseType_Int, HLSLBaseType_Uint,
             HLSLBaseType_Short, HLSLBaseType_Ushort,
-            HLSLBaseType_Bool
-        };
-        
+            HLSLBaseType_Bool};
+
         bool skip[kNumTypes] = {};
         if (!TestBits(mask, AllLong))
             skip[0] = true;
@@ -925,15 +919,14 @@ void RegisterIntrinsics(const char* name, uint32_t numArgs, AllMask mask, HLSLBa
             skip[5] = true;
         if (!TestBits(mask, AllBool))
             skip[6] = true;
-        
-        for (uint32_t i = 0; i < kNumTypes; ++i)
-        {
+
+        for (uint32_t i = 0; i < kNumTypes; ++i) {
             if (skip[i]) continue;
             HLSLBaseType baseType = baseTypes[i];
-            
+
             if (mask & AllVecs)
                 RegisterBaseTypeIntrinsic(intrinsic, numArgs, returnType, baseType, 0, 4);
-            
+
             // TODO: No int matrices yet, but could add them
             //if (mask & AllMats)
             //    RegisterBaseTypeIntrinsic(intrinsic, numArgs, returnType, 4, 7);
@@ -941,7 +934,7 @@ void RegisterIntrinsics(const char* name, uint32_t numArgs, AllMask mask, HLSLBa
     }
 }
 
-#define ArrayCount(array) (sizeof(array) / sizeof(array[0]) )
+#define ArrayCount(array) (sizeof(array) / sizeof(array[0]))
 
 bool InitIntrinsics()
 {
@@ -949,7 +942,7 @@ bool InitIntrinsics()
     // since an unordered map is used for lookup.  But do need
     // all intrinsics of the same name to be defined together in
     // a single range.
-    
+
     const char* kVecOps1[] = {
         "acos", "asin", "atan",
         "cos", "sin", "tan",
@@ -962,33 +955,38 @@ bool InitIntrinsics()
         "isnan", "isinf", "isfinite",
         "degrees", "radians" // emulated in MSL
     };
-    
+
     // apply to float/int
     const char* kVecOps1All[] = {
         "abs",
     };
-    
+
     const char* kVecOps2[] = {
-        "atan2", "pow", // can't pow take scalar?
-        "step", "frexp",
+        "atan2",
+        "pow", // can't pow take scalar?
+        "step",
+        "frexp",
     };
-    
+
     // apply to float/int
     const char* kVecOps2All[] = {
-        "min", "max",
+        "min",
+        "max",
     };
-    
+
     const char* kVecOps3[] = {
         "lerp", // can clamp and lerp take a scalar for last args/arg?
-        "smoothstep", "fma",
+        "smoothstep",
+        "fma",
     };
 
     // apply to float/int
     const char* kVecOps3All[] = {
         "clamp",
-        "min3", "max3",
+        "min3",
+        "max3",
     };
-    
+
     // HLSL intrinsics
     //
     // not going to support due to swizzle, just have similar routine for half
@@ -1012,156 +1010,147 @@ bool InitIntrinsics()
     // absdiff, hadd(x,y),
     // is_null_texture(tex)
     // tex.fence()
-    
 
-    
-    
     AllMask mask = AllFloats | AllVecs;
-    for (uint32_t i = 0, iEnd = ArrayCount(kVecOps1); i < iEnd; ++i)
-    {
-        RegisterIntrinsics( kVecOps1[i], 1, mask );
+    for (uint32_t i = 0, iEnd = ArrayCount(kVecOps1); i < iEnd; ++i) {
+        RegisterIntrinsics(kVecOps1[i], 1, mask);
     }
-    for (uint32_t i = 0, iEnd = ArrayCount(kVecOps2); i < iEnd; ++i)
-    {
-        RegisterIntrinsics( kVecOps2[i], 2, mask );
+    for (uint32_t i = 0, iEnd = ArrayCount(kVecOps2); i < iEnd; ++i) {
+        RegisterIntrinsics(kVecOps2[i], 2, mask);
     }
-    for (uint32_t i = 0, iEnd = ArrayCount(kVecOps3); i < iEnd; ++i)
-    {
-        RegisterIntrinsics( kVecOps3[i], 3, mask );
+    for (uint32_t i = 0, iEnd = ArrayCount(kVecOps3); i < iEnd; ++i) {
+        RegisterIntrinsics(kVecOps3[i], 3, mask);
     }
-    
+
     mask = AllFloats | AllInts | AllVecs;
-    for (uint32_t i = 0, iEnd = ArrayCount(kVecOps1All); i < iEnd; ++i)
-    {
-        RegisterIntrinsics( kVecOps1All[i], 1, mask );
+    for (uint32_t i = 0, iEnd = ArrayCount(kVecOps1All); i < iEnd; ++i) {
+        RegisterIntrinsics(kVecOps1All[i], 1, mask);
     }
-    for (uint32_t i = 0, iEnd = ArrayCount(kVecOps2All); i < iEnd; ++i)
-    {
-        RegisterIntrinsics( kVecOps2All[i], 2, mask );
+    for (uint32_t i = 0, iEnd = ArrayCount(kVecOps2All); i < iEnd; ++i) {
+        RegisterIntrinsics(kVecOps2All[i], 2, mask);
     }
-    for (uint32_t i = 0, iEnd = ArrayCount(kVecOps3All); i < iEnd; ++i)
-    {
-        RegisterIntrinsics( kVecOps3All[i], 3, mask );
+    for (uint32_t i = 0, iEnd = ArrayCount(kVecOps3All); i < iEnd; ++i) {
+        RegisterIntrinsics(kVecOps3All[i], 3, mask);
     }
-    
+
     // bit counting
-    RegisterIntrinsics( "countbits", 1, AllInts | AllVecs); // popcount in MSL
-    RegisterIntrinsics( "firstbithigh", 1, AllInts | AllVecs); // clz in MSL
-    RegisterIntrinsics( "firstbitlow", 1, AllInts | AllVecs); // ctz in MSL
-    RegisterIntrinsics( "reversebits", 1, AllInts | AllVecs); // ctz in MSL
-   
-    RegisterIntrinsics( "sincos", 2, AllFloats | AllVecs, HLSLBaseType_Void);
+    RegisterIntrinsics("countbits", 1, AllInts | AllVecs); // popcount in MSL
+    RegisterIntrinsics("firstbithigh", 1, AllInts | AllVecs); // clz in MSL
+    RegisterIntrinsics("firstbitlow", 1, AllInts | AllVecs); // ctz in MSL
+    RegisterIntrinsics("reversebits", 1, AllInts | AllVecs); // ctz in MSL
+
+    RegisterIntrinsics("sincos", 2, AllFloats | AllVecs, HLSLBaseType_Void);
+
+    RegisterIntrinsics("mad", 3, AllFloats | AllVecs);
+
+    RegisterIntrinsics("any", 1, AllFloats | AllInts | AllVecs, HLSLBaseType_Bool);
+    RegisterIntrinsics("all", 1, AllFloats | AllInts | AllVecs, HLSLBaseType_Bool);
+
+    RegisterIntrinsics("clip", 1, AllFloats | AllVecs, HLSLBaseType_Void);
+
+    RegisterIntrinsics("dot", 2, AllHalf | AllVecs, HLSLBaseType_Half);
+    RegisterIntrinsics("dot", 2, AllFloat | AllVecs, HLSLBaseType_Float);
+    RegisterIntrinsics("dot", 2, AllDouble | AllVecs, HLSLBaseType_Double);
 
-    RegisterIntrinsics( "mad", 3, AllFloats | AllVecs);
-   
-    RegisterIntrinsics( "any", 1, AllFloats | AllInts | AllVecs, HLSLBaseType_Bool);
-    RegisterIntrinsics( "all", 1, AllFloats | AllInts | AllVecs, HLSLBaseType_Bool);
-    
-    RegisterIntrinsics( "clip", 1, AllFloats | AllVecs, HLSLBaseType_Void);
-    
-    RegisterIntrinsics( "dot", 2, AllHalf | AllVecs, HLSLBaseType_Half);
-    RegisterIntrinsics( "dot", 2, AllFloat | AllVecs, HLSLBaseType_Float);
-    RegisterIntrinsics( "dot", 2, AllDouble | AllVecs, HLSLBaseType_Double);
-  
     // 3d cross product only
-    AddIntrinsic( "cross", HLSLBaseType_Float3,  HLSLBaseType_Float3,  HLSLBaseType_Float3 );
-    AddIntrinsic( "cross", HLSLBaseType_Half3,   HLSLBaseType_Half3,   HLSLBaseType_Half3 );
-    AddIntrinsic( "cross", HLSLBaseType_Double3, HLSLBaseType_Double3, HLSLBaseType_Double3 );
-    
-    AddIntrinsic( "reflect", HLSLBaseType_Float3,  HLSLBaseType_Float3,  HLSLBaseType_Float3 );
-    AddIntrinsic( "reflect", HLSLBaseType_Half3,   HLSLBaseType_Half3,   HLSLBaseType_Half3 );
-    AddIntrinsic( "reflect", HLSLBaseType_Double3, HLSLBaseType_Double3, HLSLBaseType_Double3 );
-    
-    AddIntrinsic( "refract", HLSLBaseType_Float3,  HLSLBaseType_Float3,  HLSLBaseType_Float3, HLSLBaseType_Float );
-    AddIntrinsic( "refract", HLSLBaseType_Half3,   HLSLBaseType_Half3,   HLSLBaseType_Half3, HLSLBaseType_Half );
-    AddIntrinsic( "refract", HLSLBaseType_Double3, HLSLBaseType_Double3, HLSLBaseType_Double3, HLSLBaseType_Double );
-    
-    RegisterIntrinsics( "length", 1, AllHalf | AllVecs, HLSLBaseType_Half);
-    RegisterIntrinsics( "length", 1, AllFloat | AllVecs, HLSLBaseType_Float);
-    RegisterIntrinsics( "length", 1, AllDouble | AllVecs, HLSLBaseType_Double);
-  
+    AddIntrinsic("cross", HLSLBaseType_Float3, HLSLBaseType_Float3, HLSLBaseType_Float3);
+    AddIntrinsic("cross", HLSLBaseType_Half3, HLSLBaseType_Half3, HLSLBaseType_Half3);
+    AddIntrinsic("cross", HLSLBaseType_Double3, HLSLBaseType_Double3, HLSLBaseType_Double3);
+
+    AddIntrinsic("reflect", HLSLBaseType_Float3, HLSLBaseType_Float3, HLSLBaseType_Float3);
+    AddIntrinsic("reflect", HLSLBaseType_Half3, HLSLBaseType_Half3, HLSLBaseType_Half3);
+    AddIntrinsic("reflect", HLSLBaseType_Double3, HLSLBaseType_Double3, HLSLBaseType_Double3);
+
+    AddIntrinsic("refract", HLSLBaseType_Float3, HLSLBaseType_Float3, HLSLBaseType_Float3, HLSLBaseType_Float);
+    AddIntrinsic("refract", HLSLBaseType_Half3, HLSLBaseType_Half3, HLSLBaseType_Half3, HLSLBaseType_Half);
+    AddIntrinsic("refract", HLSLBaseType_Double3, HLSLBaseType_Double3, HLSLBaseType_Double3, HLSLBaseType_Double);
+
+    RegisterIntrinsics("length", 1, AllHalf | AllVecs, HLSLBaseType_Half);
+    RegisterIntrinsics("length", 1, AllFloat | AllVecs, HLSLBaseType_Float);
+    RegisterIntrinsics("length", 1, AllDouble | AllVecs, HLSLBaseType_Double);
+
     // MSL construct
-    RegisterIntrinsics( "length_squared", 1, AllHalf | AllVecs, HLSLBaseType_Half);
-    RegisterIntrinsics( "length_squared", 1, AllFloat | AllVecs, HLSLBaseType_Float);
-    RegisterIntrinsics( "length_squared", 1, AllDouble | AllVecs, HLSLBaseType_Double);
+    RegisterIntrinsics("length_squared", 1, AllHalf | AllVecs, HLSLBaseType_Half);
+    RegisterIntrinsics("length_squared", 1, AllFloat | AllVecs, HLSLBaseType_Float);
+    RegisterIntrinsics("length_squared", 1, AllDouble | AllVecs, HLSLBaseType_Double);
 
-    RegisterIntrinsics( "distance", 1, AllHalf | AllVecs, HLSLBaseType_Half);
-    RegisterIntrinsics( "distance", 1, AllFloat | AllVecs, HLSLBaseType_Float);
-    RegisterIntrinsics( "distance", 1, AllDouble | AllVecs, HLSLBaseType_Double);
+    RegisterIntrinsics("distance", 1, AllHalf | AllVecs, HLSLBaseType_Half);
+    RegisterIntrinsics("distance", 1, AllFloat | AllVecs, HLSLBaseType_Float);
+    RegisterIntrinsics("distance", 1, AllDouble | AllVecs, HLSLBaseType_Double);
 
-    RegisterIntrinsics( "distance_squared", 1, AllHalf | AllVecs, HLSLBaseType_Half);
-    RegisterIntrinsics( "distance_squared", 1, AllFloat | AllVecs, HLSLBaseType_Float);
-    RegisterIntrinsics( "distance_squared", 1, AllDouble | AllVecs, HLSLBaseType_Double);
+    RegisterIntrinsics("distance_squared", 1, AllHalf | AllVecs, HLSLBaseType_Half);
+    RegisterIntrinsics("distance_squared", 1, AllFloat | AllVecs, HLSLBaseType_Float);
+    RegisterIntrinsics("distance_squared", 1, AllDouble | AllVecs, HLSLBaseType_Double);
 
     // ps only
-    AddIntrinsic( "fwidth", HLSLBaseType_Float, HLSLBaseType_Float2, HLSLBaseType_Float2 );
-   
+    AddIntrinsic("fwidth", HLSLBaseType_Float, HLSLBaseType_Float2, HLSLBaseType_Float2);
+
     // scalar/vec ops
-    RegisterIntrinsics( "mul", 2, AllFloat | AllVecs | AllMats );
-    
+    RegisterIntrinsics("mul", 2, AllFloat | AllVecs | AllMats);
+
     // scalar mul, since * isn't working on Metal properly
     // m = s * m
-    AddIntrinsic( "mul", HLSLBaseType_Float2x2, HLSLBaseType_Float, HLSLBaseType_Float2x2 );
-    AddIntrinsic( "mul", HLSLBaseType_Float3x3, HLSLBaseType_Float, HLSLBaseType_Float3x3 );
-    AddIntrinsic( "mul", HLSLBaseType_Float4x4, HLSLBaseType_Float, HLSLBaseType_Float4x4 );
-    AddIntrinsic( "mul", HLSLBaseType_Float2x2, HLSLBaseType_Float2x2, HLSLBaseType_Float );
-    AddIntrinsic( "mul", HLSLBaseType_Float3x3, HLSLBaseType_Float3x3, HLSLBaseType_Float );
-    AddIntrinsic( "mul", HLSLBaseType_Float4x4, HLSLBaseType_Float4x4, HLSLBaseType_Float );
-    
+    AddIntrinsic("mul", HLSLBaseType_Float2x2, HLSLBaseType_Float, HLSLBaseType_Float2x2);
+    AddIntrinsic("mul", HLSLBaseType_Float3x3, HLSLBaseType_Float, HLSLBaseType_Float3x3);
+    AddIntrinsic("mul", HLSLBaseType_Float4x4, HLSLBaseType_Float, HLSLBaseType_Float4x4);
+    AddIntrinsic("mul", HLSLBaseType_Float2x2, HLSLBaseType_Float2x2, HLSLBaseType_Float);
+    AddIntrinsic("mul", HLSLBaseType_Float3x3, HLSLBaseType_Float3x3, HLSLBaseType_Float);
+    AddIntrinsic("mul", HLSLBaseType_Float4x4, HLSLBaseType_Float4x4, HLSLBaseType_Float);
+
     // v = v * m
-    AddIntrinsic( "mul", HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2x2 );
-    AddIntrinsic( "mul", HLSLBaseType_Float3, HLSLBaseType_Float3, HLSLBaseType_Float3x3 );
-    AddIntrinsic( "mul", HLSLBaseType_Float4, HLSLBaseType_Float4, HLSLBaseType_Float4x4 );
-    AddIntrinsic( "mul", HLSLBaseType_Float2, HLSLBaseType_Float2x2, HLSLBaseType_Float2 );
-    AddIntrinsic( "mul", HLSLBaseType_Float3, HLSLBaseType_Float3x3, HLSLBaseType_Float3 );
-    AddIntrinsic( "mul", HLSLBaseType_Float4, HLSLBaseType_Float4x4, HLSLBaseType_Float4 );
-    
+    AddIntrinsic("mul", HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2x2);
+    AddIntrinsic("mul", HLSLBaseType_Float3, HLSLBaseType_Float3, HLSLBaseType_Float3x3);
+    AddIntrinsic("mul", HLSLBaseType_Float4, HLSLBaseType_Float4, HLSLBaseType_Float4x4);
+    AddIntrinsic("mul", HLSLBaseType_Float2, HLSLBaseType_Float2x2, HLSLBaseType_Float2);
+    AddIntrinsic("mul", HLSLBaseType_Float3, HLSLBaseType_Float3x3, HLSLBaseType_Float3);
+    AddIntrinsic("mul", HLSLBaseType_Float4, HLSLBaseType_Float4x4, HLSLBaseType_Float4);
+
     // m = s * m
-    AddIntrinsic( "mul", HLSLBaseType_Half2x2, HLSLBaseType_Half, HLSLBaseType_Half2x2 );
-    AddIntrinsic( "mul", HLSLBaseType_Half3x3, HLSLBaseType_Half, HLSLBaseType_Half3x3 );
-    AddIntrinsic( "mul", HLSLBaseType_Half4x4, HLSLBaseType_Half, HLSLBaseType_Half4x4 );
-    AddIntrinsic( "mul", HLSLBaseType_Half2x2, HLSLBaseType_Half2x2, HLSLBaseType_Half );
-    AddIntrinsic( "mul", HLSLBaseType_Half3x3, HLSLBaseType_Half3x3, HLSLBaseType_Half );
-    AddIntrinsic( "mul", HLSLBaseType_Half4x4, HLSLBaseType_Half4x4, HLSLBaseType_Half );
-    
+    AddIntrinsic("mul", HLSLBaseType_Half2x2, HLSLBaseType_Half, HLSLBaseType_Half2x2);
+    AddIntrinsic("mul", HLSLBaseType_Half3x3, HLSLBaseType_Half, HLSLBaseType_Half3x3);
+    AddIntrinsic("mul", HLSLBaseType_Half4x4, HLSLBaseType_Half, HLSLBaseType_Half4x4);
+    AddIntrinsic("mul", HLSLBaseType_Half2x2, HLSLBaseType_Half2x2, HLSLBaseType_Half);
+    AddIntrinsic("mul", HLSLBaseType_Half3x3, HLSLBaseType_Half3x3, HLSLBaseType_Half);
+    AddIntrinsic("mul", HLSLBaseType_Half4x4, HLSLBaseType_Half4x4, HLSLBaseType_Half);
+
     // v = v * m
-    AddIntrinsic( "mul", HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Half2x2 );
-    AddIntrinsic( "mul", HLSLBaseType_Half3, HLSLBaseType_Half3, HLSLBaseType_Half3x3 );
-    AddIntrinsic( "mul", HLSLBaseType_Half4, HLSLBaseType_Half4, HLSLBaseType_Half4x4 );
-    AddIntrinsic( "mul", HLSLBaseType_Half2, HLSLBaseType_Half2x2, HLSLBaseType_Half2 );
-    AddIntrinsic( "mul", HLSLBaseType_Half3, HLSLBaseType_Half3x3, HLSLBaseType_Half3 );
-    AddIntrinsic( "mul", HLSLBaseType_Half4, HLSLBaseType_Half4x4, HLSLBaseType_Half4 );
-    
+    AddIntrinsic("mul", HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Half2x2);
+    AddIntrinsic("mul", HLSLBaseType_Half3, HLSLBaseType_Half3, HLSLBaseType_Half3x3);
+    AddIntrinsic("mul", HLSLBaseType_Half4, HLSLBaseType_Half4, HLSLBaseType_Half4x4);
+    AddIntrinsic("mul", HLSLBaseType_Half2, HLSLBaseType_Half2x2, HLSLBaseType_Half2);
+    AddIntrinsic("mul", HLSLBaseType_Half3, HLSLBaseType_Half3x3, HLSLBaseType_Half3);
+    AddIntrinsic("mul", HLSLBaseType_Half4, HLSLBaseType_Half4x4, HLSLBaseType_Half4);
+
     // m = s * m
-    AddIntrinsic( "mul", HLSLBaseType_Double2x2, HLSLBaseType_Double, HLSLBaseType_Double2x2 );
-    AddIntrinsic( "mul", HLSLBaseType_Double3x3, HLSLBaseType_Double, HLSLBaseType_Double3x3 );
-    AddIntrinsic( "mul", HLSLBaseType_Double4x4, HLSLBaseType_Double, HLSLBaseType_Double4x4 );
-    AddIntrinsic( "mul", HLSLBaseType_Double2x2, HLSLBaseType_Double2x2, HLSLBaseType_Double );
-    AddIntrinsic( "mul", HLSLBaseType_Double3x3, HLSLBaseType_Double3x3, HLSLBaseType_Double );
-    AddIntrinsic( "mul", HLSLBaseType_Double4x4, HLSLBaseType_Double4x4, HLSLBaseType_Double );
-    
+    AddIntrinsic("mul", HLSLBaseType_Double2x2, HLSLBaseType_Double, HLSLBaseType_Double2x2);
+    AddIntrinsic("mul", HLSLBaseType_Double3x3, HLSLBaseType_Double, HLSLBaseType_Double3x3);
+    AddIntrinsic("mul", HLSLBaseType_Double4x4, HLSLBaseType_Double, HLSLBaseType_Double4x4);
+    AddIntrinsic("mul", HLSLBaseType_Double2x2, HLSLBaseType_Double2x2, HLSLBaseType_Double);
+    AddIntrinsic("mul", HLSLBaseType_Double3x3, HLSLBaseType_Double3x3, HLSLBaseType_Double);
+    AddIntrinsic("mul", HLSLBaseType_Double4x4, HLSLBaseType_Double4x4, HLSLBaseType_Double);
+
     // v = v * m
-    AddIntrinsic( "mul", HLSLBaseType_Double2, HLSLBaseType_Double2, HLSLBaseType_Double2x2 );
-    AddIntrinsic( "mul", HLSLBaseType_Double3, HLSLBaseType_Double3, HLSLBaseType_Double3x3 );
-    AddIntrinsic( "mul", HLSLBaseType_Double4, HLSLBaseType_Double4, HLSLBaseType_Double4x4 );
-    AddIntrinsic( "mul", HLSLBaseType_Double2, HLSLBaseType_Double2x2, HLSLBaseType_Double2 );
-    AddIntrinsic( "mul", HLSLBaseType_Double3, HLSLBaseType_Double3x3, HLSLBaseType_Double3 );
-    AddIntrinsic( "mul", HLSLBaseType_Double4, HLSLBaseType_Double4x4, HLSLBaseType_Double4 );
-    
+    AddIntrinsic("mul", HLSLBaseType_Double2, HLSLBaseType_Double2, HLSLBaseType_Double2x2);
+    AddIntrinsic("mul", HLSLBaseType_Double3, HLSLBaseType_Double3, HLSLBaseType_Double3x3);
+    AddIntrinsic("mul", HLSLBaseType_Double4, HLSLBaseType_Double4, HLSLBaseType_Double4x4);
+    AddIntrinsic("mul", HLSLBaseType_Double2, HLSLBaseType_Double2x2, HLSLBaseType_Double2);
+    AddIntrinsic("mul", HLSLBaseType_Double3, HLSLBaseType_Double3x3, HLSLBaseType_Double3);
+    AddIntrinsic("mul", HLSLBaseType_Double4, HLSLBaseType_Double4x4, HLSLBaseType_Double4);
+
     // matrix transpose
     RegisterIntrinsics("transpose", 1, AllFloats | AllMats);
-    
+
     // determinant needs to return scalar for all 9 mat types
-    AddIntrinsic("determinant", HLSLBaseType_Float,  HLSLBaseType_Float2x2);
-    AddIntrinsic("determinant", HLSLBaseType_Float,  HLSLBaseType_Float3x3);
-    AddIntrinsic("determinant", HLSLBaseType_Float,  HLSLBaseType_Float4x4);
-    AddIntrinsic("determinant", HLSLBaseType_Half,   HLSLBaseType_Half2x2);
-    AddIntrinsic("determinant", HLSLBaseType_Half,   HLSLBaseType_Half3x3);
-    AddIntrinsic("determinant", HLSLBaseType_Half,   HLSLBaseType_Half4x4);
+    AddIntrinsic("determinant", HLSLBaseType_Float, HLSLBaseType_Float2x2);
+    AddIntrinsic("determinant", HLSLBaseType_Float, HLSLBaseType_Float3x3);
+    AddIntrinsic("determinant", HLSLBaseType_Float, HLSLBaseType_Float4x4);
+    AddIntrinsic("determinant", HLSLBaseType_Half, HLSLBaseType_Half2x2);
+    AddIntrinsic("determinant", HLSLBaseType_Half, HLSLBaseType_Half3x3);
+    AddIntrinsic("determinant", HLSLBaseType_Half, HLSLBaseType_Half4x4);
     AddIntrinsic("determinant", HLSLBaseType_Double, HLSLBaseType_Double2x2);
     AddIntrinsic("determinant", HLSLBaseType_Double, HLSLBaseType_Double3x3);
     AddIntrinsic("determinant", HLSLBaseType_Double, HLSLBaseType_Double4x4);
-   
+
 #if 0
     // TODO: more conversions fp16, double, etc.
     // MSL can just do simple casts.  These are more for getting data in/out
@@ -1186,7 +1175,7 @@ bool InitIntrinsics()
     AddIntrinsic("asuint", HLSLBaseType_Ulong, HLSLBaseType_Uint, HLSLBaseType_Uint);
     AddIntrinsic("asuint", HLSLBaseType_Uint, HLSLBaseType_Float);
 #endif
-    
+
 #if 0
     // TODO: get atomics working
     // these work on atomic_int/uint, then bool/ulong 2.4,
@@ -1247,77 +1236,74 @@ bool InitIntrinsics()
 
 #endif
 
-    
     // TODO: split off sampler intrinsics from math above
     // these need to be member functions and have default arg value support
-    
+
     //------------------------
-    
+
     // TODO: need optional offset
-    
+
     // Cannot use Sample with 2DMS/Array
     AddTextureIntrinsics("Sample", HLSLBaseType_Texture2D, HLSLBaseType_Float2); // Int2 offset
     AddTextureIntrinsics("Sample", HLSLBaseType_Texture3D, HLSLBaseType_Float3); // Int3 offset
     AddTextureIntrinsics("Sample", HLSLBaseType_Texture2DArray, HLSLBaseType_Float3); // Int2 offset
-    
+
     // these don't have offset
     AddTextureIntrinsics("Sample", HLSLBaseType_TextureCube, HLSLBaseType_Float3);
     AddTextureIntrinsics("Sample", HLSLBaseType_TextureCubeArray, HLSLBaseType_Float4);
-    
+
     // Depth
     AddDepthIntrinsic("Sample", HLSLBaseType_Float, HLSLBaseType_Depth2D, HLSLBaseType_Float2); // Int2 offset
-    AddDepthIntrinsic("Sample", HLSLBaseType_Float, HLSLBaseType_Depth2DArray,  HLSLBaseType_Float3); // Int2 offset
-    AddDepthIntrinsic("Sample", HLSLBaseType_Float, HLSLBaseType_DepthCube,  HLSLBaseType_Float3); // no offset
-    
+    AddDepthIntrinsic("Sample", HLSLBaseType_Float, HLSLBaseType_Depth2DArray, HLSLBaseType_Float3); // Int2 offset
+    AddDepthIntrinsic("Sample", HLSLBaseType_Float, HLSLBaseType_DepthCube, HLSLBaseType_Float3); // no offset
+
     AddDepthIntrinsic("SampleCmp", HLSLBaseType_Float, HLSLBaseType_Depth2D, HLSLBaseType_Float2, HLSLBaseType_Float);
     AddDepthIntrinsic("SampleCmp", HLSLBaseType_Float, HLSLBaseType_Depth2DArray, HLSLBaseType_Float3, HLSLBaseType_Float);
     AddDepthIntrinsic("SampleCmp", HLSLBaseType_Float, HLSLBaseType_DepthCube, HLSLBaseType_Float3, HLSLBaseType_Float);
-    
+
     // returns float4 w/comparisons, probably only on mip0
     // TODO: add GatherRed? to read 4 depth values
     AddDepthIntrinsic("GatherCmp", HLSLBaseType_Float4, HLSLBaseType_Depth2D, HLSLBaseType_Float2, HLSLBaseType_Float);
     AddDepthIntrinsic("GatherCmp", HLSLBaseType_Float4, HLSLBaseType_Depth2DArray, HLSLBaseType_Float3, HLSLBaseType_Float);
     AddDepthIntrinsic("GatherCmp", HLSLBaseType_Float4, HLSLBaseType_DepthCube, HLSLBaseType_Float3, HLSLBaseType_Float);
-    
+
     // one more dimension than Sample
     AddTextureIntrinsics("SampleLevel", HLSLBaseType_Texture2D, HLSLBaseType_Float2, HLSLBaseType_Float);
     AddTextureIntrinsics("SampleLevel", HLSLBaseType_Texture3D, HLSLBaseType_Float3, HLSLBaseType_Float);
     AddTextureIntrinsics("SampleLevel", HLSLBaseType_Texture2DArray, HLSLBaseType_Float3, HLSLBaseType_Float);
     AddTextureIntrinsics("SampleLevel", HLSLBaseType_TextureCube, HLSLBaseType_Float3, HLSLBaseType_Float);
     // TEXTURE_INTRINSIC_FUNCTION("SampleLevel", HLSLBaseType_TextureCubeArray, HLSLBaseType_Float4, Float);
-    
+
     // bias always in w
     AddTextureIntrinsics("SampleBias", HLSLBaseType_Texture2D, HLSLBaseType_Float2, HLSLBaseType_Float);
     AddTextureIntrinsics("SampleBias", HLSLBaseType_Texture3D, HLSLBaseType_Float3, HLSLBaseType_Float);
     AddTextureIntrinsics("SampleBias", HLSLBaseType_Texture2DArray, HLSLBaseType_Float3, HLSLBaseType_Float);
-    
-    
+
     // no offset on cube/cubearray
     AddTextureIntrinsics("SampleBias", HLSLBaseType_TextureCube, HLSLBaseType_Float3, HLSLBaseType_Float);
     // AddTextureIntrinsics("SampleBias", HLSLBaseType_TextureCubeArray, HLSLBaseType_Float4, Float);
-    
 
     // TODO: for 2D tex (int2 offset is optional, how to indicate that?)
     // arguments have defaultValue that can be set.
-    
-    AddTextureIntrinsics("GatherRed", HLSLBaseType_Texture2D,  HLSLBaseType_Float2);
-    AddTextureIntrinsics("GatherGreen", HLSLBaseType_Texture2D,  HLSLBaseType_Float2);
-    AddTextureIntrinsics("GatherBlue", HLSLBaseType_Texture2D,  HLSLBaseType_Float2);
-    AddTextureIntrinsics("GatherAlpha", HLSLBaseType_Texture2D,  HLSLBaseType_Float2);
-    
+
+    AddTextureIntrinsics("GatherRed", HLSLBaseType_Texture2D, HLSLBaseType_Float2);
+    AddTextureIntrinsics("GatherGreen", HLSLBaseType_Texture2D, HLSLBaseType_Float2);
+    AddTextureIntrinsics("GatherBlue", HLSLBaseType_Texture2D, HLSLBaseType_Float2);
+    AddTextureIntrinsics("GatherAlpha", HLSLBaseType_Texture2D, HLSLBaseType_Float2);
+
     // TODO: add more types cube/3d takes gradient3d in MSL
     // The Intrinsic ctor would need to have 5 args instead 4
     // first move to member functions, then add this with 4 args
     // AddTextureIntrinsics( "SampleGrad", HLSLBaseType_Texture2D, HLSLBaseType_Float, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2);
-    
+
     // These constructs are not declaring the lod or offset param which have default
     AddTextureLoadIntrinsic("Load", HLSLBaseType_Float4, HLSLBaseType_Texture2D, HLSLBaseType_Int2); // TODO: needs lod
     AddTextureLoadIntrinsic("Load", HLSLBaseType_Float4, HLSLBaseType_Texture3D, HLSLBaseType_Int3); // TODO: need lod
     AddTextureLoadIntrinsic("Load", HLSLBaseType_Float4, HLSLBaseType_Texture2DArray, HLSLBaseType_Int2); // TODO: needs array, lod
-    //    AddTextureLoadIntrinsic("Load", HLSLBaseType_Float4, HLSLBaseType_TextureCube, HLSLBaseType_Int2); // TODO: needs face, lod
-    //    AddTextureLoadIntrinsic("Load", HLSLBaseType_Float4, HLSLBaseType_TextureCubeArray, HLSLBaseType_Int2); // TODO: needs face, lod, array
+    // AddTextureLoadIntrinsic("Load", HLSLBaseType_Float4, HLSLBaseType_TextureCube, HLSLBaseType_Int2); // TODO: needs face, lod
+    // AddTextureLoadIntrinsic("Load", HLSLBaseType_Float4, HLSLBaseType_TextureCubeArray, HLSLBaseType_Int2); // TODO: needs face, lod, array
     AddTextureLoadIntrinsic("Load", HLSLBaseType_Float4, HLSLBaseType_Texture2DMS, HLSLBaseType_Int2); // TODO: needs sampleIndex
-    
+
     // TODO: aren't these uint instead of int?
     AddTextureIntrinsics("GetDimensions", HLSLBaseType_Texture2D, HLSLBaseType_Int2);
     AddTextureIntrinsics("GetDimensions", HLSLBaseType_Texture3D, HLSLBaseType_Int3);
@@ -1325,90 +1311,87 @@ bool InitIntrinsics()
     AddTextureIntrinsics("GetDimensions", HLSLBaseType_TextureCube, HLSLBaseType_Int3);
     AddTextureIntrinsics("GetDimensions", HLSLBaseType_TextureCubeArray, HLSLBaseType_Int3);
     AddTextureIntrinsics("GetDimensions", HLSLBaseType_Texture2DMS, HLSLBaseType_Int2);
-    
+
     return true;
 };
 
-
 // The order in this array must match up with HLSLBinaryOp
 const int _binaryOpPriority[] =
     {
-        2, 1, //  &&, ||
-        8, 8, //  +,  -
-        9, 9, //  *,  /
-        7, 7, //  <,  >,
-        7, 7, //  <=, >=,
-        6, 6, //  ==, !=
+        2, 1, // &&, ||
+        8, 8, // +,  -
+        9, 9, // *,  /
+        7, 7, // <,  >,
+        7, 7, // <=, >=,
+        6, 6, // ==, !=
         5, 3, 4, // &, |, ^
-    };
-
-
+};
 
 BaseTypeDescription baseTypeDescriptions[HLSLBaseType_Count];
 
-void RegisterMatrix(HLSLBaseType type, uint32_t typeOffset, NumericType numericType,  int binaryOpRank, const char* typeName, uint32_t dim1, uint32_t dim2)
+void RegisterMatrix(HLSLBaseType type, uint32_t typeOffset, NumericType numericType, int binaryOpRank, const char* typeName, uint32_t dim1, uint32_t dim2)
 {
     char buf[32];
     snprintf(buf, sizeof(buf), "%s%dx%d", typeName, dim1, dim2);
     const char* name = gStringPool.AddString(buf);
-    
+
     HLSLBaseType baseType = (HLSLBaseType)(type + typeOffset);
-    
+
     BaseTypeDescription& desc = baseTypeDescriptions[baseType];
     desc.typeName = name;
     desc.typeNameMetal = name;
-    
+
     desc.baseType = baseType;
     desc.coreType = CoreType_Matrix;
     desc.dimensionType = DimensionType(DimensionType_Matrix2x2 + (dim2 - 2));
     desc.numericType = numericType;
-    
+
     desc.numDimensions = 2;
     desc.numComponents = dim1;
     desc.height = dim2;
     desc.binaryOpRank = binaryOpRank;
 }
 
-void RegisterVector(HLSLBaseType type, uint32_t typeOffset, NumericType numericType,  int binaryOpRank,  const char* typeName, uint32_t dim)
+void RegisterVector(HLSLBaseType type, uint32_t typeOffset, NumericType numericType, int binaryOpRank, const char* typeName, uint32_t dim)
 {
     char buf[32];
     snprintf(buf, sizeof(buf), "%s%d", typeName, dim);
     const char* name = gStringPool.AddString(buf);
-    
+
     HLSLBaseType baseType = (HLSLBaseType)(type + typeOffset);
-    
+
     BaseTypeDescription& desc = baseTypeDescriptions[type + typeOffset];
     desc.typeName = name;
     desc.typeNameMetal = name;
-    
+
     // 4 types
     desc.baseType = baseType;
     desc.coreType = CoreType_Vector;
     desc.dimensionType = DimensionType(DimensionType_Vector2 + (dim - 2));
     desc.numericType = numericType;
-    
+
     desc.numDimensions = 1;
     desc.numComponents = dim;
     desc.height = 1;
     desc.binaryOpRank = binaryOpRank;
 }
 
-void RegisterScalar(HLSLBaseType type, uint32_t typeOffset, NumericType numericType,  int binaryOpRank, const char* typeName)
+void RegisterScalar(HLSLBaseType type, uint32_t typeOffset, NumericType numericType, int binaryOpRank, const char* typeName)
 {
     const char* name = gStringPool.AddString(typeName);
-    
+
     HLSLBaseType baseType = (HLSLBaseType)(type + typeOffset);
-    
+
     BaseTypeDescription& desc = baseTypeDescriptions[baseType];
     desc.typeName = name;
     desc.typeNameMetal = name;
-    
+
     // 4 types
     desc.baseType = baseType;
     desc.coreType = CoreType_Scalar;
     desc.dimensionType = DimensionType_Scalar;
     desc.numericType = numericType;
-    
+
     desc.numDimensions = 0;
     desc.numComponents = 1;
     desc.height = 1;
@@ -1421,7 +1404,7 @@ void RegisterTexture(HLSLBaseType baseType, const char* typeName, const char* ty
     desc.baseType = baseType;
     desc.typeName = typeName;
     desc.typeNameMetal = typeNameMetal;
-    
+
     desc.coreType = CoreType_Texture;
 }
 
@@ -1431,7 +1414,7 @@ void RegisterSampler(HLSLBaseType baseType, const char* typeName, const char* ty
     desc.baseType = baseType;
     desc.typeName = typeName;
     desc.typeNameMetal = typeNameMetal;
-    
+
     desc.coreType = CoreType_Sampler;
 }
 
@@ -1441,84 +1424,77 @@ void RegisterType(HLSLBaseType baseType, CoreType coreType, const char* typeName
     desc.baseType = baseType;
     desc.typeName = typeName;
     desc.typeNameMetal = typeName;
-    
+
     desc.coreType = coreType;
 }
 
-
 bool InitBaseTypeDescriptions()
 {
     {
         const uint32_t kNumTypes = 3;
-        const char* typeNames[kNumTypes] = { "float", "half", "double" };
-        const HLSLBaseType baseTypes[kNumTypes] = { HLSLBaseType_Float, HLSLBaseType_Half, HLSLBaseType_Double };
-        const NumericType numericTypes[kNumTypes] = { NumericType_Float, NumericType_Half, NumericType_Double };
-        const int binaryOpRanks[kNumTypes] = { 0, 1, 2 };
-        
-        for (uint32_t i = 0; i < kNumTypes; ++i)
-        {
+        const char* typeNames[kNumTypes] = {"float", "half", "double"};
+        const HLSLBaseType baseTypes[kNumTypes] = {HLSLBaseType_Float, HLSLBaseType_Half, HLSLBaseType_Double};
+        const NumericType numericTypes[kNumTypes] = {NumericType_Float, NumericType_Half, NumericType_Double};
+        const int binaryOpRanks[kNumTypes] = {0, 1, 2};
+
+        for (uint32_t i = 0; i < kNumTypes; ++i) {
             const char* typeName = typeNames[i];
             HLSLBaseType baseType = baseTypes[i];
             NumericType numericType = numericTypes[i];
             int binaryOpRank = binaryOpRanks[i];
-            
+
             RegisterScalar(baseType, 0, numericType, binaryOpRank, typeName);
             RegisterVector(baseType, 1, numericType, binaryOpRank, typeName, 2);
             RegisterVector(baseType, 2, numericType, binaryOpRank, typeName, 3);
             RegisterVector(baseType, 3, numericType, binaryOpRank, typeName, 4);
-            
+
             RegisterMatrix(baseType, 4, numericType, binaryOpRank, typeName, 2, 2);
             RegisterMatrix(baseType, 5, numericType, binaryOpRank, typeName, 3, 3);
             RegisterMatrix(baseType, 6, numericType, binaryOpRank, typeName, 4, 4);
         }
     }
-    
+
     {
         const uint32_t kNumTypes = 7;
         const char* typeNames[kNumTypes] = {
             "int", "uint",
             "long", "ulong",
             "short", "ushort",
-            "bool"
-        };
+            "bool"};
         const HLSLBaseType baseTypes[kNumTypes] = {
-            HLSLBaseType_Int,  HLSLBaseType_Uint,
+            HLSLBaseType_Int, HLSLBaseType_Uint,
             HLSLBaseType_Long, HLSLBaseType_Ulong,
             HLSLBaseType_Short, HLSLBaseType_Ushort,
-            HLSLBaseType_Bool
-        };
+            HLSLBaseType_Bool};
         const NumericType numericTypes[kNumTypes] = {
-            NumericType_Int,  NumericType_Uint,
-            NumericType_Long,  NumericType_Ulong,
+            NumericType_Int, NumericType_Uint,
+            NumericType_Long, NumericType_Ulong,
             NumericType_Short, NumericType_Ushort,
-            NumericType_Bool
-        };
+            NumericType_Bool};
         const int binaryOpRanks[kNumTypes] = {
             2, 1, // Note: int seems like it should be highest
             3, 2,
             4, 3,
-            4
-        };
-        
-        for (uint32_t i = 0; i < kNumTypes; ++i)
-        {
+            4};
+
+        for (uint32_t i = 0; i < kNumTypes; ++i) {
             const char* typeName = typeNames[i];
             HLSLBaseType baseType = baseTypes[i];
             NumericType numericType = numericTypes[i];
             int binaryOpRank = binaryOpRanks[i];
-            
+
             RegisterScalar(baseType, 0, numericType, binaryOpRank, typeName);
             RegisterVector(baseType, 1, numericType, binaryOpRank, typeName, 2);
             RegisterVector(baseType, 2, numericType, binaryOpRank, typeName, 3);
             RegisterVector(baseType, 3, numericType, binaryOpRank, typeName, 4);
         }
     }
-    
+
     // TODO: add u/char, but HLSL2021 doesn't have support, but MSL does
-    
+
     // TODO: would it be better to use "texture" base type (see "buffer")
     // and then have a TextureSubType off that?
-    
+
     // texutres
     RegisterTexture(HLSLBaseType_Texture2D, "Texture2D", "texture2d");
     RegisterTexture(HLSLBaseType_Texture2DArray, "Texture2DArray", "texture2d_array");
@@ -1526,24 +1502,24 @@ bool InitBaseTypeDescriptions()
     RegisterTexture(HLSLBaseType_TextureCube, "TextureCube", "texturecube");
     RegisterTexture(HLSLBaseType_TextureCubeArray, "TextureCubeArray", "texturecube_rray");
     RegisterTexture(HLSLBaseType_Texture2DMS, "Texture2DMS", "texture2d_ms");
-    
+
     RegisterTexture(HLSLBaseType_Depth2D, "Depth2D", "depth2d");
     RegisterTexture(HLSLBaseType_Depth2DArray, "Depth2DArray", "depth2d_array");
     RegisterTexture(HLSLBaseType_DepthCube, "DepthCube", "depthcube");
-    
+
     RegisterTexture(HLSLBaseType_RWTexture2D, "RWTexture2D", "texture2d");
-    
+
     // samplers
     RegisterSampler(HLSLBaseType_SamplerState, "SamplerState", "sampler");
     RegisterSampler(HLSLBaseType_SamplerComparisonState, "SamplerComparisonState", "sampler");
-    
+
     RegisterType(HLSLBaseType_UserDefined, CoreType_Struct, "struct");
     RegisterType(HLSLBaseType_Void, CoreType_Void, "void");
     RegisterType(HLSLBaseType_Unknown, CoreType_None, "unknown");
     RegisterType(HLSLBaseType_Expression, CoreType_Expression, "expression");
     RegisterType(HLSLBaseType_Comment, CoreType_Comment, "comment");
     RegisterType(HLSLBaseType_Buffer, CoreType_Buffer, "buffer");
-    
+
     return true;
 }
 
@@ -1555,124 +1531,129 @@ static bool _initIntrinsics = InitIntrinsics();
 HLSLBaseType ArithmeticOpResultType(HLSLBinaryOp binaryOp, HLSLBaseType t1, HLSLBaseType t2)
 {
     // check that both are same numeric types
-    
+
     // add, sub, div are similar
     // mul is it's own test
 
     // most mixing of types is invalid here
-    
-    if (IsNumericTypeEqual(t1, t2))
-    {
+
+    if (IsNumericTypeEqual(t1, t2)) {
         bool isSameDimensions = IsDimensionEqual(t1, t2);
-        
-        if (IsScalarType(t1) && IsScalarType(t2))
-        {
+
+        if (IsScalarType(t1) && IsScalarType(t2)) {
             if (isSameDimensions) return t1;
         }
-        else if (IsVectorType(t1) && IsVectorType(t2))
-        {
+        else if (IsVectorType(t1) && IsVectorType(t2)) {
             if (isSameDimensions) return t1;
         }
-        else if (IsMatrixType(t1) && IsMatrixType(t2))
-        {
+        else if (IsMatrixType(t1) && IsMatrixType(t2)) {
             if (isSameDimensions) return t1;
         }
-        
+
         else if ((binaryOp == HLSLBinaryOp_Add || binaryOp == HLSLBinaryOp_Sub) &&
-                 (IsScalarType(t1) || IsScalarType(t2)))
-        {
+                 (IsScalarType(t1) || IsScalarType(t2))) {
             // allow v + 1, and 1 - v
             return (IsVectorType(t1) || IsMatrixType(t1)) ? t1 : t2;
         }
-         
+
         else if ((binaryOp == HLSLBinaryOp_Mul || binaryOp == HLSLBinaryOp_Div) &&
-                 (IsScalarType(t1) || IsScalarType(t2)))
-        {
+                 (IsScalarType(t1) || IsScalarType(t2))) {
             // v * s
             return (IsVectorType(t1) || IsMatrixType(t1)) ? t1 : t2;
         }
-        
+
         // this has to check dimension across the mul
-        else if (binaryOp == HLSLBinaryOp_Mul)
-        {
+        else if (binaryOp == HLSLBinaryOp_Mul) {
             bool isSameCrossDimension = IsCrossDimensionEqual(t1, t2);
-            
-            if (IsMatrixType(t1) && IsVectorType(t2))
-            {
+
+            if (IsMatrixType(t1) && IsVectorType(t2)) {
                 if (isSameCrossDimension) return t2;
             }
-            else if (IsVectorType(t1) && IsMatrixType(t2))
-            {
+            else if (IsVectorType(t1) && IsMatrixType(t2)) {
                 if (isSameCrossDimension) return t1;
             }
         }
     }
-    
+
     return HLSLBaseType_Unknown;
 }
-    
+
 // Priority of the ? : operator.
 const int _conditionalOpPriority = 1;
 
 const char* GetTypeNameHLSL(const HLSLType& type)
 {
-    if (type.baseType == HLSLBaseType_UserDefined)
-    {
+    if (type.baseType == HLSLBaseType_UserDefined) {
         return type.typeName;
     }
-    else
-    {
+    else {
         return baseTypeDescriptions[type.baseType].typeName;
     }
 }
 
 const char* GetTypeNameMetal(const HLSLType& type)
 {
-    if (type.baseType == HLSLBaseType_UserDefined)
-    {
+    if (type.baseType == HLSLBaseType_UserDefined) {
         return type.typeName;
     }
-    else
-    {
+    else {
         return baseTypeDescriptions[type.baseType].typeNameMetal;
     }
 }
 
 static const char* GetBinaryOpName(HLSLBinaryOp binaryOp)
 {
-    switch (binaryOp)
-    {
-    case HLSLBinaryOp_And:          return "&&";
-    case HLSLBinaryOp_Or:           return "||";
-            
-    case HLSLBinaryOp_Add:          return "+";
-    case HLSLBinaryOp_Sub:          return "-";
-    case HLSLBinaryOp_Mul:          return "*";
-    case HLSLBinaryOp_Div:          return "/";
-            
-    case HLSLBinaryOp_Less:         return "<";
-    case HLSLBinaryOp_Greater:      return ">";
-    case HLSLBinaryOp_LessEqual:    return "<=";
-    case HLSLBinaryOp_GreaterEqual: return ">=";
-    case HLSLBinaryOp_Equal:        return "==";
-    case HLSLBinaryOp_NotEqual:     return "!=";
-            
-    case HLSLBinaryOp_BitAnd:       return "&";
-    case HLSLBinaryOp_BitOr:        return "|";
-    case HLSLBinaryOp_BitXor:       return "^";
-            
-    case HLSLBinaryOp_Assign:       return "=";
-    case HLSLBinaryOp_AddAssign:    return "+=";
-    case HLSLBinaryOp_SubAssign:    return "-=";
-    case HLSLBinaryOp_MulAssign:    return "*=";
-    case HLSLBinaryOp_DivAssign:    return "/=";
-    default:
-        ASSERT(false);
-        return "???";
+    switch (binaryOp) {
+        case HLSLBinaryOp_And:
+            return "&&";
+        case HLSLBinaryOp_Or:
+            return "||";
+
+        case HLSLBinaryOp_Add:
+            return "+";
+        case HLSLBinaryOp_Sub:
+            return "-";
+        case HLSLBinaryOp_Mul:
+            return "*";
+        case HLSLBinaryOp_Div:
+            return "/";
+
+        case HLSLBinaryOp_Less:
+            return "<";
+        case HLSLBinaryOp_Greater:
+            return ">";
+        case HLSLBinaryOp_LessEqual:
+            return "<=";
+        case HLSLBinaryOp_GreaterEqual:
+            return ">=";
+        case HLSLBinaryOp_Equal:
+            return "==";
+        case HLSLBinaryOp_NotEqual:
+            return "!=";
+
+        case HLSLBinaryOp_BitAnd:
+            return "&";
+        case HLSLBinaryOp_BitOr:
+            return "|";
+        case HLSLBinaryOp_BitXor:
+            return "^";
+
+        case HLSLBinaryOp_Assign:
+            return "=";
+        case HLSLBinaryOp_AddAssign:
+            return "+=";
+        case HLSLBinaryOp_SubAssign:
+            return "-=";
+        case HLSLBinaryOp_MulAssign:
+            return "*=";
+        case HLSLBinaryOp_DivAssign:
+            return "/=";
+        default:
+            ASSERT(false);
+            return "???";
     }
 }
 
-
 /*
  * 1.) Match
  * 2.) Scalar dimension promotion (scalar -> vector/matrix)
@@ -1680,21 +1661,19 @@ static const char* GetBinaryOpName(HLSLBinaryOp binaryOp)
  * 4.) Conversion + scalar dimension promotion
  * 5.) Truncation (vector -> scalar or lower component vector, matrix -> scalar or lower component matrix)
  * 6.) Conversion + truncation
- */    
-static int GetTypeCastRank(HLSLTree * tree, const HLSLType& srcType, const HLSLType& dstType)
+ */
+static int GetTypeCastRank(HLSLTree* tree, const HLSLType& srcType, const HLSLType& dstType)
 {
     /*if (srcType.array != dstType.array || srcType.arraySize != dstType.arraySize)
     {
         return -1;
     }*/
 
-    if (srcType.array != dstType.array)
-    {
+    if (srcType.array != dstType.array) {
         return -1;
     }
 
-    if (srcType.array == true)
-    {
+    if (srcType.array == true) {
         ASSERT(dstType.array == true);
         int srcArraySize = -1;
         int dstArraySize = -1;
@@ -1707,103 +1686,86 @@ static int GetTypeCastRank(HLSLTree * tree, const HLSLType& srcType, const HLSLT
         }
     }
 
-    if (srcType.baseType == HLSLBaseType_UserDefined && dstType.baseType == HLSLBaseType_UserDefined)
-    {
+    if (srcType.baseType == HLSLBaseType_UserDefined && dstType.baseType == HLSLBaseType_UserDefined) {
         return String_Equal(srcType.typeName, dstType.typeName) ? 0 : -1;
     }
 
-    if (srcType.baseType == dstType.baseType)
-    {
+    if (srcType.baseType == dstType.baseType) {
         // This only works if textures are half or float, but not hwne
         // there are more varied texture that can be cast.
-        if (IsTextureType(srcType.baseType))
-        {
+        if (IsTextureType(srcType.baseType)) {
             return srcType.formatType == dstType.formatType ? 0 : -1;
         }
-        
+
         return 0;
     }
 
     const BaseTypeDescription& srcDesc = baseTypeDescriptions[srcType.baseType];
     const BaseTypeDescription& dstDesc = baseTypeDescriptions[dstType.baseType];
-    if (srcDesc.numericType == NumericType_NaN || dstDesc.numericType == NumericType_NaN)
-    {
+    if (srcDesc.numericType == NumericType_NaN || dstDesc.numericType == NumericType_NaN) {
         return -1;
     }
 
     // Result bits: T R R R P (T = truncation, R = conversion rank, P = dimension promotion)
     int result = _numberTypeRank[srcDesc.numericType][dstDesc.numericType] << 1;
 
-    if (srcDesc.numDimensions == 0 && dstDesc.numDimensions > 0)
-    {
+    if (srcDesc.numDimensions == 0 && dstDesc.numDimensions > 0) {
         // Scalar dimension promotion
         result |= (1 << 0);
     }
     else if ((srcDesc.numDimensions == dstDesc.numDimensions && (srcDesc.numComponents > dstDesc.numComponents || srcDesc.height > dstDesc.height)) ||
-             (srcDesc.numDimensions > 0 && dstDesc.numDimensions == 0))
-    {
+             (srcDesc.numDimensions > 0 && dstDesc.numDimensions == 0)) {
         // Truncation
         result |= (1 << 4);
     }
     else if (srcDesc.numDimensions != dstDesc.numDimensions ||
              srcDesc.numComponents != dstDesc.numComponents ||
-             srcDesc.height != dstDesc.height)
-    {
+             srcDesc.height != dstDesc.height) {
         // Can't convert
         return -1;
     }
-    
+
     return result;
-    
 }
 
 static bool GetFunctionCallCastRanks(HLSLTree* tree, const HLSLFunctionCall* call, const HLSLFunction* function, int* rankBuffer)
 {
-
-    if (function == NULL || function->numArguments < call->numArguments)
-    {
+    if (function == NULL || function->numArguments < call->numArguments) {
         // Function not viable
         return false;
     }
 
     const HLSLExpression* expression = call->argument;
     const HLSLArgument* argument = function->argument;
-   
-    for (int i = 0; i < call->numArguments; ++i)
-    {
+
+    for (int i = 0; i < call->numArguments; ++i) {
         int rank = GetTypeCastRank(tree, expression->expressionType, argument->type);
-        if (rank == -1)
-        {
+        if (rank == -1) {
             return false;
         }
 
         rankBuffer[i] = rank;
-        
+
         argument = argument->nextArgument;
         expression = expression->nextExpression;
     }
 
-    for (int i = call->numArguments; i < function->numArguments; ++i)
-    {
-        if (argument->defaultValue == NULL)
-        {
+    for (int i = call->numArguments; i < function->numArguments; ++i) {
+        if (argument->defaultValue == NULL) {
             // Function not viable.
             return false;
         }
     }
 
     return true;
-
 }
 
-struct CompareRanks
-{
-    bool operator() (const int& rank1, const int& rank2) { return rank1 > rank2; }
+struct CompareRanks {
+    bool operator()(const int& rank1, const int& rank2) { return rank1 > rank2; }
 };
 
 static CompareFunctionsResult CompareFunctions(HLSLTree* tree, const HLSLFunctionCall* call, const HLSLFunction* function1, const HLSLFunction* function2)
-{ 
-
+{
     int* function1Ranks = static_cast<int*>(alloca(sizeof(int) * call->numArguments));
     int* function2Ranks = static_cast<int*>(alloca(sizeof(int) * call->numArguments));
 
@@ -1811,85 +1773,70 @@ static CompareFunctionsResult CompareFunctions(HLSLTree* tree, const HLSLFunctio
     const bool function2Viable = GetFunctionCallCastRanks(tree, call, function2, function2Ranks);
 
     // Both functions have to be viable to be able to compare them
-    if (!(function1Viable && function2Viable))
-    {
-        if (function1Viable)
-        {
+    if (!(function1Viable && function2Viable)) {
+        if (function1Viable) {
             return Function1Better;
         }
-        else if (function2Viable)
-        {
+        else if (function2Viable) {
             return Function2Better;
         }
-        else
-        {
+        else {
             return FunctionsEqual;
         }
     }
 
     std::sort(function1Ranks, function1Ranks + call->numArguments, CompareRanks());
     std::sort(function2Ranks, function2Ranks + call->numArguments, CompareRanks());
-    
-    for (int i = 0; i < call->numArguments; ++i)
-    {
-        if (function1Ranks[i] < function2Ranks[i])
-        {
+
+    for (int i = 0; i < call->numArguments; ++i) {
+        if (function1Ranks[i] < function2Ranks[i]) {
             return Function1Better;
         }
-        else if (function2Ranks[i] < function1Ranks[i])
-        {
+        else if (function2Ranks[i] < function1Ranks[i]) {
             return Function2Better;
         }
     }
 
     return FunctionsEqual;
-
 }
 
 static bool GetBinaryOpResultType(HLSLBinaryOp binaryOp, const HLSLType& type1, const HLSLType& type2, HLSLType& result)
 {
     // only allow numeric types for binary operators
     if (!IsNumericType(type1.baseType) || type1.array ||
-        !IsNumericType(type2.baseType) || type2.array)
-    {
-         return false;
+        !IsNumericType(type2.baseType) || type2.array) {
+        return false;
     }
 
-    if (IsBitOp(binaryOp))
-    {
-        if (!IsIntegerType(type1.baseType))
-        {
+    if (IsBitOp(binaryOp)) {
+        if (!IsIntegerType(type1.baseType)) {
             return false;
         }
     }
 
-    if (IsLogicOp(binaryOp) || IsCompareOp(binaryOp))
-    {
-        int numComponents = std::max( baseTypeDescriptions[ type1.baseType ].numComponents, baseTypeDescriptions[ type2.baseType ].numComponents );
-        result.baseType = HLSLBaseType( HLSLBaseType_Bool + numComponents - 1 );
+    if (IsLogicOp(binaryOp) || IsCompareOp(binaryOp)) {
+        int numComponents = std::max(baseTypeDescriptions[type1.baseType].numComponents, baseTypeDescriptions[type2.baseType].numComponents);
+        result.baseType = HLSLBaseType(HLSLBaseType_Bool + numComponents - 1);
     }
-    else
-    {
+    else {
         // TODO: allso mulAssign, ...
         assert(!IsAssignOp(binaryOp));
-        
+
         result.baseType = ArithmeticOpResultType(binaryOp, type1.baseType, type2.baseType);
     }
 
-    result.typeName     = NULL;
-    result.array        = false;
-    result.arraySize    = NULL;
-    result.flags        = (type1.flags & type2.flags) & HLSLTypeFlag_Const; // Propagate constness.
-    
-    return result.baseType != HLSLBaseType_Unknown;
+    result.typeName = NULL;
+    result.array = false;
+    result.arraySize = NULL;
+    result.flags = (type1.flags & type2.flags) & HLSLTypeFlag_Const; // Propagate constness.
 
+    return result.baseType != HLSLBaseType_Unknown;
 }
 
-HLSLParser::HLSLParser(Allocator* allocator, const char* fileName, const char* buffer, size_t length) : 
-    m_tokenizer(fileName, buffer, length),
-    m_userTypes(allocator),
-    m_variables(allocator),
-    m_functions(allocator)
+HLSLParser::HLSLParser(Allocator* allocator, const char* fileName, const char* buffer, size_t length) : m_tokenizer(fileName, buffer, length),
+                                                                                                        m_userTypes(allocator),
+                                                                                                        m_variables(allocator),
+                                                                                                        m_functions(allocator)
 {
     m_numGlobals = 0;
     m_tree = NULL;
@@ -1897,18 +1844,16 @@ HLSLParser::HLSLParser(Allocator* allocator, const char* fileName, const char* b
 
 bool HLSLParser::Accept(int token)
 {
-    if (m_tokenizer.GetToken() == token)
-    {
-       m_tokenizer.Next();
-       return true;
+    if (m_tokenizer.GetToken() == token) {
+        m_tokenizer.Next();
+        return true;
     }
     return false;
 }
 
 bool HLSLParser::Accept(const char* token)
 {
-    if (m_tokenizer.GetToken() == HLSLToken_Identifier && String_Equal( token, m_tokenizer.GetIdentifier() ) )
-    {
+    if (m_tokenizer.GetToken() == HLSLToken_Identifier && String_Equal(token, m_tokenizer.GetIdentifier())) {
         m_tokenizer.Next();
         return true;
     }
@@ -1917,8 +1862,7 @@ bool HLSLParser::Accept(const char* token)
 
 bool HLSLParser::Expect(int token)
 {
-    if (!Accept(token))
-    {
+    if (!Accept(token)) {
         char want[HLSLTokenizer::s_maxIdentifier];
         m_tokenizer.GetTokenName(token, want);
         char near[HLSLTokenizer::s_maxIdentifier];
@@ -1929,11 +1873,10 @@ bool HLSLParser::Expect(int token)
     return true;
 }
 
-bool HLSLParser::Expect(const char * token)
+bool HLSLParser::Expect(const char* token)
 {
-    if (!Accept(token))
-    {
-        const char * want = token;
+    if (!Accept(token)) {
+        const char* want = token;
         char near[HLSLTokenizer::s_maxIdentifier];
         m_tokenizer.GetTokenName(near);
         m_tokenizer.Error("Syntax error: expected '%s' near '%s'", want, near);
@@ -1942,12 +1885,10 @@ bool HLSLParser::Expect(const char * token)
     return true;
 }
 
-
 bool HLSLParser::AcceptIdentifier(const char*& identifier)
 {
-    if (m_tokenizer.GetToken() == HLSLToken_Identifier)
-    {
-        identifier = m_tree->AddString( m_tokenizer.GetIdentifier() );
+    if (m_tokenizer.GetToken() == HLSLToken_Identifier) {
+        identifier = m_tree->AddString(m_tokenizer.GetIdentifier());
         m_tokenizer.Next();
         return true;
     }
@@ -1956,8 +1897,7 @@ bool HLSLParser::AcceptIdentifier(const char*& identifier)
 
 bool HLSLParser::ExpectIdentifier(const char*& identifier)
 {
-    if (!AcceptIdentifier(identifier))
-    {
+    if (!AcceptIdentifier(identifier)) {
         char near[HLSLTokenizer::s_maxIdentifier] = {};
         m_tokenizer.GetTokenName(near);
         m_tokenizer.Error("Syntax error: expected identifier near '%s'", near);
@@ -1969,8 +1909,7 @@ bool HLSLParser::ExpectIdentifier(const char*& identifier)
 
 bool HLSLParser::AcceptFloat(float& value)
 {
-    if (m_tokenizer.GetToken() == HLSLToken_FloatLiteral)
-    {
+    if (m_tokenizer.GetToken() == HLSLToken_FloatLiteral) {
         value = m_tokenizer.GetFloat();
         m_tokenizer.Next();
         return true;
@@ -1980,19 +1919,17 @@ bool HLSLParser::AcceptFloat(float& value)
 
 bool HLSLParser::AcceptHalf(float& value)
 {
-	if(m_tokenizer.GetToken() == HLSLToken_HalfLiteral)
-	{
-		value = m_tokenizer.GetFloat();
-		m_tokenizer.Next();
-		return true;
-	}
-	return false;
+    if (m_tokenizer.GetToken() == HLSLToken_HalfLiteral) {
+        value = m_tokenizer.GetFloat();
+        m_tokenizer.Next();
+        return true;
+    }
+    return false;
 }
 
 bool HLSLParser::AcceptInt(int& value)
 {
-    if (m_tokenizer.GetToken() == HLSLToken_IntLiteral)
-    {
+    if (m_tokenizer.GetToken() == HLSLToken_IntLiteral) {
         value = m_tokenizer.GetInt();
         m_tokenizer.Next();
         return true;
@@ -2002,12 +1939,12 @@ bool HLSLParser::AcceptInt(int& value)
 
 bool HLSLParser::ParseTopLevel(HLSLStatement*& statement)
 {
-    HLSLAttribute * attributes = NULL;
+    HLSLAttribute* attributes = NULL;
     ParseAttributeBlock(attributes);
 
-    int line             = GetLineNumber();
+    int line = GetLineNumber();
     const char* fileName = GetFileName();
-    
+
     HLSLType type;
     //HLSLBaseType type;
     //const char*  typeName = NULL;
@@ -2015,31 +1952,26 @@ bool HLSLParser::ParseTopLevel(HLSLStatement*& statement)
 
     // TODO: this cast likely isn't safe
     HLSLToken token = (HLSLToken)m_tokenizer.GetToken();
-    
+
     bool doesNotExpectSemicolon = false;
 
     // Alec add comment
-    if (ParseComment(statement))
-    {
+    if (ParseComment(statement)) {
         doesNotExpectSemicolon = true;
     }
-    else if (Accept(HLSLToken_Struct))
-    {
+    else if (Accept(HLSLToken_Struct)) {
         // Struct declaration.
 
         const char* structName = NULL;
-        if (!ExpectIdentifier(structName))
-        {
+        if (!ExpectIdentifier(structName)) {
             return false;
         }
-        if (FindUserDefinedType(structName) != NULL)
-        {
+        if (FindUserDefinedType(structName) != NULL) {
             m_tokenizer.Error("struct %s already defined", structName);
             return false;
         }
 
-        if (!Expect('{'))
-        {
+        if (!Expect('{')) {
             return false;
         }
 
@@ -2047,30 +1979,25 @@ bool HLSLParser::ParseTopLevel(HLSLStatement*& statement)
         structure->name = structName;
 
         m_userTypes.PushBack(structure);
- 
+
         HLSLStructField* lastField = NULL;
 
         // Add the struct to our list of user defined types.
-        while (!Accept('}'))
-        {
-            if (CheckForUnexpectedEndOfStream('}'))
-            {
+        while (!Accept('}')) {
+            if (CheckForUnexpectedEndOfStream('}')) {
                 return false;
             }
-            
+
             // chain fields onto struct
             HLSLStructField* field = NULL;
-            if (!ParseFieldDeclaration(field))
-            {
+            if (!ParseFieldDeclaration(field)) {
                 return false;
             }
             ASSERT(field != NULL);
-            if (lastField == NULL)
-            {
+            if (lastField == NULL) {
                 structure->field = field;
             }
-            else
-            {
+            else {
                 lastField->nextField = field;
             }
             lastField = field;
@@ -2082,71 +2009,62 @@ bool HLSLParser::ParseTopLevel(HLSLStatement*& statement)
              Accept(HLSLToken_StructuredBuffer) ||
              Accept(HLSLToken_RWStructuredBuffer) ||
              Accept(HLSLToken_ByteAddressBuffer) ||
-             Accept(HLSLToken_RWByteAddressBuffer))
-    {
+             Accept(HLSLToken_RWByteAddressBuffer)) {
         HLSLBuffer* buffer = m_tree->AddNode<HLSLBuffer>(fileName, line);
-        
+
         // these can appear on t or u slots for read vs. read/write
         // need to track what the user specified.  Load vs. Store calls.
         buffer->bufferType = ConvertTokenToBufferType(token);
-    
+
         // Is template struct type required?
-        if (Expect('<'))
-        {
+        if (Expect('<')) {
             const char* structName = nullptr;
-            
+
             // Read the templated type, should reference a struct
             // don't need to support fields on this.
-            if (!ExpectIdentifier(structName) || !Expect('>'))
-            {
+            if (!ExpectIdentifier(structName) || !Expect('>')) {
                 return false;
             }
-           
+
             buffer->bufferStruct = const_cast<HLSLStruct*>(FindUserDefinedType(structName));
-            if (!buffer->bufferStruct)
-            {
+            if (!buffer->bufferStruct) {
                 return false;
             }
         }
-        
+
         // get name of buffer
         AcceptIdentifier(buffer->name);
-    
+
         // Parse ": register(t0/u0)"
-        if (Accept(':'))
-        {
-            if (!Expect(HLSLToken_Register) || !Expect('(') || !ExpectIdentifier(buffer->registerName) || !Expect(')'))
-            {
+        if (Accept(':')) {
+            if (!Expect(HLSLToken_Register) || !Expect('(') || !ExpectIdentifier(buffer->registerName) || !Expect(')')) {
                 return false;
             }
             // TODO: Check that we aren't re-using a register.
         }
-        
+
         // Buffer needs to show up to reference the fields
         // of the struct of the templated type.
         HLSLType bufferType(HLSLBaseType_UserDefined);
         bufferType.typeName = buffer->bufferStruct->name; // this is for userDefined name (f.e. struct)
-        
-        DeclareVariable( buffer->name, bufferType );
-       
+
+        DeclareVariable(buffer->name, bufferType);
+
         // TODO: add fields as variables too?
-        
+
         statement = buffer;
     }
-    else if (Accept(HLSLToken_CBuffer) || Accept(HLSLToken_TBuffer))
-    {
+    else if (Accept(HLSLToken_CBuffer) || Accept(HLSLToken_TBuffer)) {
         // cbuffer/tbuffer declaration.
 
         HLSLBuffer* buffer = m_tree->AddNode<HLSLBuffer>(fileName, line);
         AcceptIdentifier(buffer->name);
 
         buffer->bufferType = ConvertTokenToBufferType(token);
-        
+
         // Optional register assignment.
-        if (Accept(':'))
-        {
-            if (!Expect(HLSLToken_Register) || !Expect('(') || !ExpectIdentifier(buffer->registerName) || !Expect(')'))
-            {
+        if (Accept(':')) {
+            if (!Expect(HLSLToken_Register) || !Expect('(') || !ExpectIdentifier(buffer->registerName) || !Expect(')')) {
                 return false;
             }
             // TODO: Check that we aren't re-using a register.
@@ -2155,90 +2073,76 @@ bool HLSLParser::ParseTopLevel(HLSLStatement*& statement)
         // Fields are defined inside the c/tbuffer.
         // These represent globals to the rest of the codebase which
         // is simply evil.
-        
-        if (!Expect('{'))
-        {
+
+        if (!Expect('{')) {
             return false;
         }
         HLSLDeclaration* lastField = NULL;
-        while (!Accept('}'))
-        {
-            if (CheckForUnexpectedEndOfStream('}'))
-            {
+        while (!Accept('}')) {
+            if (CheckForUnexpectedEndOfStream('}')) {
                 return false;
             }
-            
+
             // TODO: can't convert statement to fields
-            if (ParseComment(statement))
-            {
+            if (ParseComment(statement)) {
                 continue;
             }
-           
+
             HLSLDeclaration* field = NULL;
-            if (!ParseDeclaration(field))
-            {
+            if (!ParseDeclaration(field)) {
                 m_tokenizer.Error("Expected variable declaration");
                 return false;
             }
-            
+
             // These show up as global variables of the fields
-            DeclareVariable( field->name, field->type );
-            
+            DeclareVariable(field->name, field->type);
+
             // chain fields onto buffer
             field->buffer = buffer;
-            if (buffer->field == NULL)
-            {
+            if (buffer->field == NULL) {
                 buffer->field = field;
             }
-            else
-            {
+            else {
                 lastField->nextStatement = field;
             }
             lastField = field;
-            
+
             if (!Expect(';')) {
                 return false;
             }
-
         }
 
         statement = buffer;
     }
-    else if (AcceptType(true, type))
-    {
+    else if (AcceptType(true, type)) {
         // Global declaration (uniform or function).
         const char* globalName = NULL;
-        if (!ExpectIdentifier(globalName))
-        {
+        if (!ExpectIdentifier(globalName)) {
             return false;
         }
 
-        if (Accept('('))
-        {
+        if (Accept('(')) {
             // Function declaration.
 
             HLSLFunction* function = m_tree->AddNode<HLSLFunction>(fileName, line);
-            function->name                  = globalName;
-            function->returnType.baseType   = type.baseType;
-            function->returnType.typeName   = type.typeName;
-            function->attributes            = attributes;
+            function->name = globalName;
+            function->returnType.baseType = type.baseType;
+            function->returnType.typeName = type.typeName;
+            function->attributes = attributes;
 
             BeginScope();
 
-            if (!ParseArgumentList(function->argument, function->numArguments, function->numOutputArguments))
-            {
+            if (!ParseArgumentList(function->argument, function->numArguments, function->numOutputArguments)) {
                 return false;
             }
 
             const HLSLFunction* declaration = FindFunction(function);
 
             // Forward declaration
-            if (Accept(';'))
-            {
+            if (Accept(';')) {
                 // Add a function entry so that calls can refer to it
-                if (!declaration)
-                {
-                    m_functions.PushBack( function );
+                if (!declaration) {
+                    m_functions.PushBack(function);
                     statement = function;
                 }
                 EndScope();
@@ -2246,28 +2150,23 @@ bool HLSLParser::ParseTopLevel(HLSLStatement*& statement)
             }
 
             // Optional semantic.
-            if (Accept(':') && !ExpectIdentifier(function->semantic))
-            {
+            if (Accept(':') && !ExpectIdentifier(function->semantic)) {
                 return false;
             }
 
-            if (declaration)
-            {
-                if (declaration->forward || declaration->statement)
-                {
+            if (declaration) {
+                if (declaration->forward || declaration->statement) {
                     m_tokenizer.Error("Duplicate function definition");
                     return false;
                 }
 
                 const_cast<HLSLFunction*>(declaration)->forward = function;
             }
-            else
-            {
-                m_functions.PushBack( function );
+            else {
+                m_functions.PushBack(function);
             }
 
-            if (!Expect('{') || !ParseBlock(function->statement, function->returnType))
-            {
+            if (!Expect('{') || !ParseBlock(function->statement, function->returnType)) {
                 return false;
             }
 
@@ -2275,23 +2174,19 @@ bool HLSLParser::ParseTopLevel(HLSLStatement*& statement)
 
             // Note, no semi-colon at the end of a function declaration.
             statement = function;
-            
+
             return true;
         }
-        else
-        {
+        else {
             // Uniform declaration.
             HLSLDeclaration* declaration = m_tree->AddNode<HLSLDeclaration>(fileName, line);
-            declaration->name            = globalName;
-            declaration->type            = type;
+            declaration->name = globalName;
+            declaration->type = type;
 
             // Handle array syntax.
-            if (Accept('['))
-            {
-                if (!Accept(']'))
-                {
-                    if (!ParseExpression(declaration->type.arraySize) || !Expect(']'))
-                    {
+            if (Accept('[')) {
+                if (!Accept(']')) {
+                    if (!ParseExpression(declaration->type.arraySize) || !Expect(']')) {
                         return false;
                     }
                 }
@@ -2299,31 +2194,28 @@ bool HLSLParser::ParseTopLevel(HLSLStatement*& statement)
             }
 
             // Handle optional register.
-            if (Accept(':'))
-            {
+            if (Accept(':')) {
                 // @@ Currently we support either a semantic or a register, but not both.
                 if (AcceptIdentifier(declaration->semantic)) {
                     // int k = 1;
                 }
-                else if (!Expect(HLSLToken_Register) || !Expect('(') || !ExpectIdentifier(declaration->registerName) || !Expect(')'))
-                {
+                else if (!Expect(HLSLToken_Register) || !Expect('(') || !ExpectIdentifier(declaration->registerName) || !Expect(')')) {
                     return false;
                 }
             }
 
-            DeclareVariable( globalName, declaration->type );
+            DeclareVariable(globalName, declaration->type);
 
-            if (!ParseDeclarationAssignment(declaration))
-            {
+            if (!ParseDeclarationAssignment(declaration)) {
                 return false;
             }
 
             // TODO: Multiple variables declared on one line.
-            
+
             statement = declaration;
         }
     }
-    
+
     /*
     // These three are from .fx file syntax
     else if (ParseTechnique(statement)) {
@@ -2336,7 +2228,7 @@ bool HLSLParser::ParseTopLevel(HLSLStatement*& statement)
         doesNotExpectSemicolon = true;
     }
     */
-    
+
     if (statement != NULL) {
         statement->attributes = attributes;
     }
@@ -2344,28 +2236,22 @@ bool HLSLParser::ParseTopLevel(HLSLStatement*& statement)
     return doesNotExpectSemicolon || Expect(';');
 }
 
-bool HLSLParser::ParseStatementOrBlock(HLSLStatement*& firstStatement, const HLSLType& returnType, bool scoped/*=true*/)
+bool HLSLParser::ParseStatementOrBlock(HLSLStatement*& firstStatement, const HLSLType& returnType, bool scoped /*=true*/)
 {
-    if (scoped)
-    {
+    if (scoped) {
         BeginScope();
     }
-    if (Accept('{'))
-    {
-        if (!ParseBlock(firstStatement, returnType))
-        {
+    if (Accept('{')) {
+        if (!ParseBlock(firstStatement, returnType)) {
             return false;
         }
     }
-    else
-    {
-        if (!ParseStatement(firstStatement, returnType))
-        {
+    else {
+        if (!ParseStatement(firstStatement, returnType)) {
             return false;
         }
     }
-    if (scoped)
-    {
+    if (scoped) {
         EndScope();
     }
     return true;
@@ -2375,21 +2261,21 @@ bool HLSLParser::ParseComment(HLSLStatement*& statement)
 {
     if (m_tokenizer.GetToken() != HLSLToken_Comment)
         return false;
-    
+
     const char* textName = m_tree->AddString(m_tokenizer.GetComment());
-    
+
     // This has already parsed the next comment before have had a chance to
     // grab the string from the previous comment, if they were sequenential comments.
     // So grabbing a copy of comment before this parses the next comment.
     if (!Accept(HLSLToken_Comment))
         return false;
-    
+
     const char* fileName = GetFileName();
-    int         line     = GetLineNumber();
+    int line = GetLineNumber();
 
     HLSLComment* comment = m_tree->AddNode<HLSLComment>(fileName, line);
     comment->text = textName;
-    
+
     // pass it back
     statement = comment;
     return true;
@@ -2398,33 +2284,27 @@ bool HLSLParser::ParseComment(HLSLStatement*& statement)
 bool HLSLParser::ParseBlock(HLSLStatement*& firstStatement, const HLSLType& returnType)
 {
     HLSLStatement* lastStatement = NULL;
-    while (!Accept('}'))
-    {
-        if (CheckForUnexpectedEndOfStream('}'))
-        {
+    while (!Accept('}')) {
+        if (CheckForUnexpectedEndOfStream('}')) {
             return false;
         }
-        
+
         HLSLStatement* statement = NULL;
-        
-        if (!ParseStatement(statement, returnType))
-        {
+
+        if (!ParseStatement(statement, returnType)) {
             return false;
         }
-        
+
         // chain statements onto the list
-        if (statement != NULL)
-        {
-            if (firstStatement == NULL)
-            {
+        if (statement != NULL) {
+            if (firstStatement == NULL) {
                 firstStatement = statement;
             }
-            else
-            {
+            else {
                 lastStatement->nextStatement = statement;
             }
             lastStatement = statement;
-            
+
             // some statement parsing can gen more than one statement, so find end
             while (lastStatement->nextStatement)
                 lastStatement = lastStatement->nextStatement;
@@ -2436,16 +2316,15 @@ bool HLSLParser::ParseBlock(HLSLStatement*& firstStatement, const HLSLType& retu
 bool HLSLParser::ParseStatement(HLSLStatement*& statement, const HLSLType& returnType)
 {
     const char* fileName = GetFileName();
-    int         line     = GetLineNumber();
+    int line = GetLineNumber();
 
     // Empty statements.
-    if (Accept(';'))
-    {
+    if (Accept(';')) {
         return true;
     }
 
-    HLSLAttribute * attributes = NULL;
-    ParseAttributeBlock(attributes);    // @@ Leak if not assigned to node? 
+    HLSLAttribute* attributes = NULL;
+    ParseAttributeBlock(attributes); // @@ Leak if not assigned to node?
 
 #if 0
     // @@ Work in progress.
@@ -2518,78 +2397,64 @@ bool HLSLParser::ParseStatement(HLSLStatement*& statement, const HLSLType& retur
     }
 */
 #endif
-    
-    if (ParseComment(statement))
-    {
+
+    if (ParseComment(statement)) {
         return true;
     }
-    
+
     // If statement.
-    if (Accept(HLSLToken_If))
-    {
+    if (Accept(HLSLToken_If)) {
         HLSLIfStatement* ifStatement = m_tree->AddNode<HLSLIfStatement>(fileName, line);
         ifStatement->attributes = attributes;
-        if (!Expect('(') || !ParseExpression(ifStatement->condition) || !Expect(')'))
-        {
+        if (!Expect('(') || !ParseExpression(ifStatement->condition) || !Expect(')')) {
             return false;
         }
         statement = ifStatement;
-        if (!ParseStatementOrBlock(ifStatement->statement, returnType))
-        {
+        if (!ParseStatementOrBlock(ifStatement->statement, returnType)) {
             return false;
         }
-        if (Accept(HLSLToken_Else))
-        {
+        if (Accept(HLSLToken_Else)) {
             return ParseStatementOrBlock(ifStatement->elseStatement, returnType);
         }
         return true;
     }
-    
+
     // For statement.
-    if (Accept(HLSLToken_For))
-    {
+    if (Accept(HLSLToken_For)) {
         HLSLForStatement* forStatement = m_tree->AddNode<HLSLForStatement>(fileName, line);
         forStatement->attributes = attributes;
-        if (!Expect('('))
-        {
+        if (!Expect('(')) {
             return false;
         }
         BeginScope();
-        if (!ParseDeclaration(forStatement->initialization))
-        {
+        if (!ParseDeclaration(forStatement->initialization)) {
             return false;
         }
-        if (!Expect(';'))
-        {
+        if (!Expect(';')) {
             return false;
         }
         ParseExpression(forStatement->condition);
-        if (!Expect(';'))
-        {
+        if (!Expect(';')) {
             return false;
         }
         ParseExpression(forStatement->increment);
-        if (!Expect(')'))
-        {
+        if (!Expect(')')) {
             return false;
         }
         statement = forStatement;
-        if (!ParseStatementOrBlock(forStatement->statement, returnType))
-        {
+        if (!ParseStatementOrBlock(forStatement->statement, returnType)) {
             return false;
         }
         EndScope();
         return true;
     }
 
-    if (attributes != NULL)
-    {
+    if (attributes != NULL) {
         // @@ Error. Unexpected attribute. We only support attributes associated to if and for statements.
     }
 
     // Block statement.
-    if (Accept('{'))
-    {
+    if (Accept('{')) {
         HLSLBlockStatement* blockStatement = m_tree->AddNode<HLSLBlockStatement>(fileName, line);
         statement = blockStatement;
         BeginScope();
@@ -2599,41 +2464,35 @@ bool HLSLParser::ParseStatement(HLSLStatement*& statement, const HLSLType& retur
     }
 
     // Discard statement.
-    if (Accept(HLSLToken_Discard))
-    {
+    if (Accept(HLSLToken_Discard)) {
         HLSLDiscardStatement* discardStatement = m_tree->AddNode<HLSLDiscardStatement>(fileName, line);
         statement = discardStatement;
         return Expect(';');
     }
 
     // Break statement.
-    if (Accept(HLSLToken_Break))
-    {
+    if (Accept(HLSLToken_Break)) {
         HLSLBreakStatement* breakStatement = m_tree->AddNode<HLSLBreakStatement>(fileName, line);
         statement = breakStatement;
         return Expect(';');
     }
 
     // Continue statement.
-    if (Accept(HLSLToken_Continue))
-    {
+    if (Accept(HLSLToken_Continue)) {
         HLSLContinueStatement* continueStatement = m_tree->AddNode<HLSLContinueStatement>(fileName, line);
         statement = continueStatement;
         return Expect(';');
     }
 
     // Return statement
-    if (Accept(HLSLToken_Return))
-    {
+    if (Accept(HLSLToken_Return)) {
         HLSLReturnStatement* returnStatement = m_tree->AddNode<HLSLReturnStatement>(fileName, line);
-        if (!Accept(';') && !ParseExpression(returnStatement->expression))
-        {
+        if (!Accept(';') && !ParseExpression(returnStatement->expression)) {
             return false;
         }
         // Check that the return expression can be cast to the return type of the function.
         HLSLType voidType(HLSLBaseType_Void);
-        if (!CheckTypeCast(returnStatement->expression ? returnStatement->expression->expressionType : voidType, returnType))
-        {
+        if (!CheckTypeCast(returnStatement->expression ? returnStatement->expression->expressionType : voidType, returnType)) {
             return false;
         }
 
@@ -2642,14 +2501,12 @@ bool HLSLParser::ParseStatement(HLSLStatement*& statement, const HLSLType& retur
     }
 
     HLSLDeclaration* declaration = NULL;
-    HLSLExpression*  expression  = NULL;
+    HLSLExpression* expression = NULL;
 
-    if (ParseDeclaration(declaration))
-    {
+    if (ParseDeclaration(declaration)) {
         statement = declaration;
     }
-    else if (ParseExpression(expression))
-    {
+    else if (ParseExpression(expression)) {
         HLSLExpressionStatement* expressionStatement;
         expressionStatement = m_tree->AddNode<HLSLExpressionStatement>(fileName, line);
         expressionStatement->expression = expression;
@@ -2659,55 +2516,49 @@ bool HLSLParser::ParseStatement(HLSLStatement*& statement, const HLSLType& retur
     return Expect(';');
 }
 
-
 // IC: This is only used in block statements, or within control flow statements. So, it doesn't support semantics or layout modifiers.
 // @@ We should add suport for semantics for inline input/output declarations.
 bool HLSLParser::ParseDeclaration(HLSLDeclaration*& declaration)
 {
-    const char* fileName    = GetFileName();
-    int         line        = GetLineNumber();
+    const char* fileName = GetFileName();
+    int line = GetLineNumber();
 
     HLSLType type;
-    if (!AcceptType(/*allowVoid=*/false, type))
-    {
+    if (!AcceptType(/*allowVoid=*/false, type)) {
         return false;
     }
 
-    bool allowUnsizedArray = true;  // This is needed for SSBO
-    
-    HLSLDeclaration * firstDeclaration = NULL;
-    HLSLDeclaration * lastDeclaration = NULL;
+    bool allowUnsizedArray = true; // This is needed for SSBO
+
+    HLSLDeclaration* firstDeclaration = NULL;
+    HLSLDeclaration* lastDeclaration = NULL;
 
     do {
         const char* name;
-        if (!ExpectIdentifier(name))
-        {
+        if (!ExpectIdentifier(name)) {
             // TODO: false means we didn't accept a declaration and we had an error!
             return false;
         }
         // Handle array syntax.
-        if (Accept('['))
-        {
+        if (Accept('[')) {
             type.array = true;
             // Optionally allow no size to the specified for the array.
-            if (Accept(']') && allowUnsizedArray)
-            {
+            if (Accept(']') && allowUnsizedArray) {
                 return true;
             }
-            if (!ParseExpression(type.arraySize) || !Expect(']'))
-            {
+            if (!ParseExpression(type.arraySize) || !Expect(']')) {
                 return false;
             }
         }
 
-        HLSLDeclaration * parsedDeclaration = m_tree->AddNode<HLSLDeclaration>(fileName, line);
-        parsedDeclaration->type  = type;
-        parsedDeclaration->name  = name;
+        HLSLDeclaration* parsedDeclaration = m_tree->AddNode<HLSLDeclaration>(fileName, line);
+        parsedDeclaration->type = type;
+        parsedDeclaration->name = name;
 
-        DeclareVariable( parsedDeclaration->name, parsedDeclaration->type );
+        DeclareVariable(parsedDeclaration->name, parsedDeclaration->type);
 
         // Handle option assignment of the declared variables(s).
-        if (!ParseDeclarationAssignment( parsedDeclaration )) {
+        if (!ParseDeclarationAssignment(parsedDeclaration)) {
             return false;
         }
 
@@ -2715,7 +2566,7 @@ bool HLSLParser::ParseDeclaration(HLSLDeclaration*& declaration)
         if (lastDeclaration != NULL) lastDeclaration->nextDeclaration = parsedDeclaration;
         lastDeclaration = parsedDeclaration;
 
-    } while(Accept(','));
+    } while (Accept(','));
 
     declaration = firstDeclaration;
 
@@ -2724,26 +2575,22 @@ bool HLSLParser::ParseDeclaration(HLSLDeclaration*& declaration)
 
 bool HLSLParser::ParseDeclarationAssignment(HLSLDeclaration* declaration)
 {
-    if (Accept('='))
-    {
+    if (Accept('=')) {
         // Handle array initialization syntax.
-        if (declaration->type.array)
-        {
+        if (declaration->type.array) {
             int numValues = 0;
-            if (!Expect('{') || !ParseExpressionList('}', true, declaration->assignment, numValues))
-            {
+            if (!Expect('{') || !ParseExpressionList('}', true, declaration->assignment, numValues)) {
                 return false;
             }
         }
-//        else if (IsSamplerType(declaration->type.baseType)) // TODO: should be for SamplerStateBlock, not Sampler
-//        {
-//            if (!ParseSamplerState(declaration->assignment))
-//            {
-//                return false;
-//            }
-//        }
-        else if (!ParseExpression(declaration->assignment))
-        {
+        // else if (IsSamplerType(declaration->type.baseType)) // TODO: should be for SamplerStateBlock, not Sampler
+        // {
+        //     if (!ParseSamplerState(declaration->assignment))
+        //     {
+        //         return false;
+        //     }
+        // }
+        else if (!ParseExpression(declaration->assignment)) {
             return false;
         }
     }
@@ -2752,16 +2599,13 @@ bool HLSLParser::ParseDeclarationAssignment(HLSLDeclaration* declaration)
 
 bool HLSLParser::ParseFieldDeclaration(HLSLStructField*& field)
 {
-    field = m_tree->AddNode<HLSLStructField>( GetFileName(), GetLineNumber() );
-    if (!ExpectDeclaration(false, field->type, field->name))
-    {
+    field = m_tree->AddNode<HLSLStructField>(GetFileName(), GetLineNumber());
+    if (!ExpectDeclaration(false, field->type, field->name)) {
         return false;
     }
     // Handle optional semantics.
-    if (Accept(':'))
-    {
-        if (!ExpectIdentifier(field->semantic))
-        {
+    if (Accept(':')) {
+        if (!ExpectIdentifier(field->semantic)) {
             return false;
         }
     }
@@ -2795,8 +2639,7 @@ bool HLSLParser::ParseFieldDeclaration(HLSLStructField*& field)
 
 bool HLSLParser::CheckTypeCast(const HLSLType& srcType, const HLSLType& dstType)
 {
-    if (GetTypeCastRank(m_tree, srcType, dstType) == -1)
-    {
+    if (GetTypeCastRank(m_tree, srcType, dstType) == -1) {
         const char* srcTypeName = GetTypeNameHLSL(srcType);
         const char* dstTypeName = GetTypeNameHLSL(dstType);
         m_tokenizer.Error("Cannot implicitly convert from '%s' to '%s'", srcTypeName, dstTypeName);
@@ -2807,17 +2650,14 @@ bool HLSLParser::CheckTypeCast(const HLSLType& srcType, const HLSLType& dstType)
 
 bool HLSLParser::ParseExpression(HLSLExpression*& expression)
 {
-    if (!ParseBinaryExpression(0, expression))
-    {
+    if (!ParseBinaryExpression(0, expression)) {
         return false;
     }
 
     HLSLBinaryOp assignOp;
-    if (AcceptAssign(assignOp))
-    {
+    if (AcceptAssign(assignOp)) {
         HLSLExpression* expression2 = NULL;
-        if (!ParseExpression(expression2))
-        {
+        if (!ParseExpression(expression2)) {
             return false;
         }
         HLSLBinaryExpression* binaryExpression = m_tree->AddNode<HLSLBinaryExpression>(expression->fileName, expression->line);
@@ -2828,8 +2668,7 @@ bool HLSLParser::ParseExpression(HLSLExpression*& expression)
         // However, for our usage of the types it should be sufficient.
         binaryExpression->expressionType = expression->expressionType;
 
-        if (!CheckTypeCast(expression2->expressionType, expression->expressionType))
-        {
+        if (!CheckTypeCast(expression2->expressionType, expression->expressionType)) {
             const char* srcTypeName = GetTypeNameHLSL(expression2->expressionType);
             const char* dstTypeName = GetTypeNameHLSL(expression->expressionType);
             m_tokenizer.Error("Cannot implicitly convert from '%s' to '%s'", srcTypeName, dstTypeName);
@@ -2845,28 +2684,56 @@ bool HLSLParser::ParseExpression(HLSLExpression*& expression)
 bool HLSLParser::AcceptBinaryOperator(int priority, HLSLBinaryOp& binaryOp)
 {
     int token = m_tokenizer.GetToken();
-    switch (token)
-    {
-    case HLSLToken_LogicalAnd:      binaryOp = HLSLBinaryOp_And;          break;
-    case HLSLToken_LogicalOr:       binaryOp = HLSLBinaryOp_Or;           break;
-    case '+':                       binaryOp = HLSLBinaryOp_Add;          break;
-    case '-':                       binaryOp = HLSLBinaryOp_Sub;          break;
-    case '*':                       binaryOp = HLSLBinaryOp_Mul;          break;
-    case '/':                       binaryOp = HLSLBinaryOp_Div;          break;
-    case '<':                       binaryOp = HLSLBinaryOp_Less;         break;
-    case '>':                       binaryOp = HLSLBinaryOp_Greater;      break;
-    case HLSLToken_LessEqual:       binaryOp = HLSLBinaryOp_LessEqual;    break;
-    case HLSLToken_GreaterEqual:    binaryOp = HLSLBinaryOp_GreaterEqual; break;
-    case HLSLToken_EqualEqual:      binaryOp = HLSLBinaryOp_Equal;        break;
-    case HLSLToken_NotEqual:        binaryOp = HLSLBinaryOp_NotEqual;     break;
-    case '&':                       binaryOp = HLSLBinaryOp_BitAnd;       break;
-    case '|':                       binaryOp = HLSLBinaryOp_BitOr;        break;
-    case '^':                       binaryOp = HLSLBinaryOp_BitXor;       break;
-    default:
-        return false;
+    switch (token) {
+        case HLSLToken_LogicalAnd:
+            binaryOp = HLSLBinaryOp_And;
+            break;
+        case HLSLToken_LogicalOr:
+            binaryOp = HLSLBinaryOp_Or;
+            break;
+        case '+':
+            binaryOp = HLSLBinaryOp_Add;
+            break;
+        case '-':
+            binaryOp = HLSLBinaryOp_Sub;
+            break;
+        case '*':
+            binaryOp = HLSLBinaryOp_Mul;
+            break;
+        case '/':
+            binaryOp = HLSLBinaryOp_Div;
+            break;
+        case '<':
+            binaryOp = HLSLBinaryOp_Less;
+            break;
+        case '>':
+            binaryOp = HLSLBinaryOp_Greater;
+            break;
+        case HLSLToken_LessEqual:
+            binaryOp = HLSLBinaryOp_LessEqual;
+            break;
+        case HLSLToken_GreaterEqual:
+            binaryOp = HLSLBinaryOp_GreaterEqual;
+            break;
+        case HLSLToken_EqualEqual:
+            binaryOp = HLSLBinaryOp_Equal;
+            break;
+        case HLSLToken_NotEqual:
+            binaryOp = HLSLBinaryOp_NotEqual;
+            break;
+        case '&':
+            binaryOp = HLSLBinaryOp_BitAnd;
+            break;
+        case '|':
+            binaryOp = HLSLBinaryOp_BitOr;
+            break;
+        case '^':
+            binaryOp = HLSLBinaryOp_BitXor;
+            break;
+        default:
+            return false;
     }
-    if (_binaryOpPriority[binaryOp] > priority)
-    {
+    if (_binaryOpPriority[binaryOp] > priority) {
         m_tokenizer.Next();
         return true;
     }
@@ -2876,32 +2743,25 @@ bool HLSLParser::AcceptBinaryOperator(int priority, HLSLBinaryOp& binaryOp)
 bool HLSLParser::AcceptUnaryOperator(bool pre, HLSLUnaryOp& unaryOp)
 {
     int token = m_tokenizer.GetToken();
-    if (token == HLSLToken_PlusPlus)
-    {
+    if (token == HLSLToken_PlusPlus) {
         unaryOp = pre ? HLSLUnaryOp_PreIncrement : HLSLUnaryOp_PostIncrement;
     }
-    else if (token == HLSLToken_MinusMinus)
-    {
+    else if (token == HLSLToken_MinusMinus) {
         unaryOp = pre ? HLSLUnaryOp_PreDecrement : HLSLUnaryOp_PostDecrement;
     }
-    else if (pre && token == '-')
-    {
+    else if (pre && token == '-') {
         unaryOp = HLSLUnaryOp_Negative;
     }
-    else if (pre && token == '+')
-    {
+    else if (pre && token == '+') {
         unaryOp = HLSLUnaryOp_Positive;
     }
-    else if (pre && token == '!')
-    {
+    else if (pre && token == '!') {
         unaryOp = HLSLUnaryOp_Not;
     }
-    else if (pre && token == '~')
-    {
+    else if (pre && token == '~') {
         unaryOp = HLSLUnaryOp_Not;
     }
-    else
-    {
+    else {
         return false;
     }
     m_tokenizer.Next();
@@ -2910,28 +2770,22 @@ bool HLSLParser::AcceptUnaryOperator(bool pre, HLSLUnaryOp& unaryOp)
 
 bool HLSLParser::AcceptAssign(HLSLBinaryOp& binaryOp)
 {
-    if (Accept('='))
-    {
+    if (Accept('=')) {
         binaryOp = HLSLBinaryOp_Assign;
     }
-    else if (Accept(HLSLToken_PlusEqual))
-    {
+    else if (Accept(HLSLToken_PlusEqual)) {
         binaryOp = HLSLBinaryOp_AddAssign;
     }
-    else if (Accept(HLSLToken_MinusEqual))
-    {
+    else if (Accept(HLSLToken_MinusEqual)) {
         binaryOp = HLSLBinaryOp_SubAssign;
-    }     
-    else if (Accept(HLSLToken_TimesEqual))
-    {
+    }
+    else if (Accept(HLSLToken_TimesEqual)) {
         binaryOp = HLSLBinaryOp_MulAssign;
-    }     
-    else if (Accept(HLSLToken_DivideEqual))
-    {
+    }
+    else if (Accept(HLSLToken_DivideEqual)) {
         binaryOp = HLSLBinaryOp_DivAssign;
-    }     
-    else
-    {
+    }
+    else {
         return false;
     }
     return true;
@@ -2940,89 +2794,77 @@ bool HLSLParser::AcceptAssign(HLSLBinaryOp& binaryOp)
 bool HLSLParser::ParseBinaryExpression(int priority, HLSLExpression*& expression)
 {
     const char* fileName = GetFileName();
-    int         line     = GetLineNumber();
+    int line = GetLineNumber();
 
     bool needsEndParen;
 
-    if (!ParseTerminalExpression(expression, needsEndParen))
-    {
+    if (!ParseTerminalExpression(expression, needsEndParen)) {
         return false;
     }
 
-	// reset priority cause openned parenthesis
-	if( needsEndParen )
-		priority = 0;
+    // reset priority cause openned parenthesis
+    if (needsEndParen)
+        priority = 0;
 
-    while (1)
-    {
+    while (1) {
         HLSLBinaryOp binaryOp;
-        if (AcceptBinaryOperator(priority, binaryOp))
-        {
-
+        if (AcceptBinaryOperator(priority, binaryOp)) {
             HLSLExpression* expression2 = NULL;
-            ASSERT( binaryOp < sizeof(_binaryOpPriority) / sizeof(int) );
-            if (!ParseBinaryExpression(_binaryOpPriority[binaryOp], expression2))
-            {
+            ASSERT(binaryOp < sizeof(_binaryOpPriority) / sizeof(int));
+            if (!ParseBinaryExpression(_binaryOpPriority[binaryOp], expression2)) {
                 return false;
             }
             HLSLBinaryExpression* binaryExpression = m_tree->AddNode<HLSLBinaryExpression>(fileName, line);
-            binaryExpression->binaryOp    = binaryOp;
+            binaryExpression->binaryOp = binaryOp;
             binaryExpression->expression1 = expression;
             binaryExpression->expression2 = expression2;
-            if (!GetBinaryOpResultType( binaryOp, expression->expressionType, expression2->expressionType, binaryExpression->expressionType ))
-            {
-                const char* typeName1 = GetTypeNameHLSL( binaryExpression->expression1->expressionType );
-                const char* typeName2 = GetTypeNameHLSL( binaryExpression->expression2->expressionType );
+            if (!GetBinaryOpResultType(binaryOp, expression->expressionType, expression2->expressionType, binaryExpression->expressionType)) {
+                const char* typeName1 = GetTypeNameHLSL(binaryExpression->expression1->expressionType);
+                const char* typeName2 = GetTypeNameHLSL(binaryExpression->expression2->expressionType);
                 m_tokenizer.Error("binary '%s' : no global operator found which takes types '%s' and '%s' (or there is no acceptable conversion)",
-                    GetBinaryOpName(binaryOp), typeName1, typeName2);
+                                  GetBinaryOpName(binaryOp), typeName1, typeName2);
 
                 return false;
             }
-            
+
             // Propagate constness.
             binaryExpression->expressionType.flags = (expression->expressionType.flags | expression2->expressionType.flags) & HLSLTypeFlag_Const;
-            
+
             expression = binaryExpression;
         }
-        else if (_conditionalOpPriority > priority && Accept('?'))
-        {
-
+        else if (_conditionalOpPriority > priority && Accept('?')) {
             HLSLConditionalExpression* conditionalExpression = m_tree->AddNode<HLSLConditionalExpression>(fileName, line);
             conditionalExpression->condition = expression;
-            
+
             HLSLExpression* expression1 = NULL;
             HLSLExpression* expression2 = NULL;
-            if (!ParseBinaryExpression(_conditionalOpPriority, expression1) || !Expect(':') || !ParseBinaryExpression(_conditionalOpPriority, expression2))
-            {
+            if (!ParseBinaryExpression(_conditionalOpPriority, expression1) || !Expect(':') || !ParseBinaryExpression(_conditionalOpPriority, expression2)) {
                 return false;
             }
 
             // Make sure both cases have compatible types.
-            if (GetTypeCastRank(m_tree, expression1->expressionType, expression2->expressionType) == -1)
-            {
+            if (GetTypeCastRank(m_tree, expression1->expressionType, expression2->expressionType) == -1) {
                 const char* srcTypeName = GetTypeNameHLSL(expression2->expressionType);
                 const char* dstTypeName = GetTypeNameHLSL(expression1->expressionType);
                 m_tokenizer.Error("':' no possible conversion from from '%s' to '%s'", srcTypeName, dstTypeName);
                 return false;
             }
 
-            conditionalExpression->trueExpression  = expression1;
+            conditionalExpression->trueExpression = expression1;
             conditionalExpression->falseExpression = expression2;
-            conditionalExpression->expressionType  = expression1->expressionType;
+            conditionalExpression->expressionType = expression1->expressionType;
 
             expression = conditionalExpression;
         }
-        else
-        {
+        else {
             break;
         }
 
-		if( needsEndParen )
-		{
-			if( !Expect( ')' ) )
-				return false;
-			needsEndParen = false;
-		}
+        if (needsEndParen) {
+            if (!Expect(')'))
+                return false;
+            needsEndParen = false;
+        }
     }
 
     return !needsEndParen || Expect(')');
@@ -3031,16 +2873,15 @@ bool HLSLParser::ParseBinaryExpression(int priority, HLSLExpression*& expression
 bool HLSLParser::ParsePartialConstructor(HLSLExpression*& expression, HLSLBaseType type, const char* typeName)
 {
     const char* fileName = GetFileName();
-    int         line     = GetLineNumber();
+    int line = GetLineNumber();
 
     HLSLConstructorExpression* constructorExpression = m_tree->AddNode<HLSLConstructorExpression>(fileName, line);
     constructorExpression->type.baseType = type;
     constructorExpression->type.typeName = typeName;
     int numArguments = 0;
-    if (!ParseExpressionList(')', false, constructorExpression->argument, numArguments))
-    {
+    if (!ParseExpressionList(')', false, constructorExpression->argument, numArguments)) {
         return false;
-    }    
+    }
     constructorExpression->expressionType = constructorExpression->type;
     constructorExpression->expressionType.flags = HLSLTypeFlag_Const;
     expression = constructorExpression;
@@ -3050,53 +2891,44 @@ bool HLSLParser::ParsePartialConstructor(HLSLExpression*& expression, HLSLBaseTy
 bool HLSLParser::ParseTerminalExpression(HLSLExpression*& expression, bool& needsEndParen)
 {
     const char* fileName = GetFileName();
-    int         line     = GetLineNumber();
+    int line = GetLineNumber();
 
     needsEndParen = false;
 
     HLSLUnaryOp unaryOp;
-    if (AcceptUnaryOperator(true, unaryOp))
-    {
+    if (AcceptUnaryOperator(true, unaryOp)) {
         HLSLUnaryExpression* unaryExpression = m_tree->AddNode<HLSLUnaryExpression>(fileName, line);
         unaryExpression->unaryOp = unaryOp;
-        if (!ParseTerminalExpression(unaryExpression->expression, needsEndParen))
-        {
+        if (!ParseTerminalExpression(unaryExpression->expression, needsEndParen)) {
             return false;
         }
-        if (unaryOp == HLSLUnaryOp_BitNot)
-        {
-            if (!IsIntegerType(unaryExpression->expression->expressionType.baseType))
-            {
-                const char * typeName = GetTypeNameHLSL(unaryExpression->expression->expressionType);
+        if (unaryOp == HLSLUnaryOp_BitNot) {
+            if (!IsIntegerType(unaryExpression->expression->expressionType.baseType)) {
+                const char* typeName = GetTypeNameHLSL(unaryExpression->expression->expressionType);
                 m_tokenizer.Error("unary '~' : no global operator found which takes type '%s' (or there is no acceptable conversion)", typeName);
                 return false;
             }
         }
-        if (unaryOp == HLSLUnaryOp_Not)
-        {
+        if (unaryOp == HLSLUnaryOp_Not) {
             unaryExpression->expressionType = HLSLType(HLSLBaseType_Bool);
-            
+
             // Propagate constness.
             unaryExpression->expressionType.flags = unaryExpression->expression->expressionType.flags & HLSLTypeFlag_Const;
         }
-        else
-        {
+        else {
             unaryExpression->expressionType = unaryExpression->expression->expressionType;
         }
         expression = unaryExpression;
         return true;
     }
-    
+
     // Expressions inside parenthesis or casts.
-    if (Accept('('))
-    {
+    if (Accept('(')) {
         // Check for a casting operator.
         HLSLType type;
-        if (AcceptType(false, type))
-        {
+        if (AcceptType(false, type)) {
             // This is actually a type constructor like (float2(...
-            if (Accept('('))
-            {
+            if (Accept('(')) {
                 needsEndParen = true;
                 return ParsePartialConstructor(expression, type.baseType, type.typeName);
             }
@@ -3106,43 +2938,38 @@ bool HLSLParser::ParseTerminalExpression(HLSLExpression*& expression, bool& need
             castingExpression->expressionType = type;
             return Expect(')') && ParseExpression(castingExpression->expression);
         }
-        
-        if (!ParseExpression(expression) || !Expect(')'))
-        {
+
+        if (!ParseExpression(expression) || !Expect(')')) {
             return false;
         }
     }
-    else
-    {
+    else {
         // Terminal values.
         float fValue = 0.0f;
-        int   iValue = 0;
-        
+        int iValue = 0;
+
         // literals
-        if (AcceptFloat(fValue))
-        {
+        if (AcceptFloat(fValue)) {
             HLSLLiteralExpression* literalExpression = m_tree->AddNode<HLSLLiteralExpression>(fileName, line);
-            literalExpression->type   = HLSLBaseType_Float;
+            literalExpression->type = HLSLBaseType_Float;
             literalExpression->fValue = fValue;
             literalExpression->expressionType.baseType = literalExpression->type;
             literalExpression->expressionType.flags = HLSLTypeFlag_Const;
             expression = literalExpression;
             return true;
         }
-		if(AcceptHalf(fValue))
-		{
-			HLSLLiteralExpression* literalExpression = m_tree->AddNode<HLSLLiteralExpression>( fileName, line );
-			literalExpression->type = HLSLBaseType_Half;
-			literalExpression->fValue = fValue;
-			literalExpression->expressionType.baseType = literalExpression->type;
-			literalExpression->expressionType.flags = HLSLTypeFlag_Const;
-			expression = literalExpression;
-			return true;
-		}
-        if (AcceptInt(iValue))
-        {
+        if (AcceptHalf(fValue)) {
+            HLSLLiteralExpression* literalExpression = m_tree->AddNode<HLSLLiteralExpression>(fileName, line);
+            literalExpression->type = HLSLBaseType_Half;
+            literalExpression->fValue = fValue;
+            literalExpression->expressionType.baseType = literalExpression->type;
+            literalExpression->expressionType.flags = HLSLTypeFlag_Const;
+            expression = literalExpression;
+            return true;
+        }
+        if (AcceptInt(iValue)) {
             HLSLLiteralExpression* literalExpression = m_tree->AddNode<HLSLLiteralExpression>(fileName, line);
-            literalExpression->type   = HLSLBaseType_Int;
+            literalExpression->type = HLSLBaseType_Int;
             literalExpression->iValue = iValue;
             literalExpression->expressionType.baseType = literalExpression->type;
             literalExpression->expressionType.flags = HLSLTypeFlag_Const;
@@ -3150,22 +2977,20 @@ bool HLSLParser::ParseTerminalExpression(HLSLExpression*& expression, bool& need
             return true;
         }
         // TODO: need uint, u/short, double
-        
+
         // boolean
-        if (Accept(HLSLToken_True))
-        {
+        if (Accept(HLSLToken_True)) {
             HLSLLiteralExpression* literalExpression = m_tree->AddNode<HLSLLiteralExpression>(fileName, line);
-            literalExpression->type   = HLSLBaseType_Bool;
+            literalExpression->type = HLSLBaseType_Bool;
             literalExpression->bValue = true;
             literalExpression->expressionType.baseType = literalExpression->type;
             literalExpression->expressionType.flags = HLSLTypeFlag_Const;
             expression = literalExpression;
             return true;
         }
-        if (Accept(HLSLToken_False))
-        {
+        if (Accept(HLSLToken_False)) {
             HLSLLiteralExpression* literalExpression = m_tree->AddNode<HLSLLiteralExpression>(fileName, line);
-            literalExpression->type   = HLSLBaseType_Bool;
+            literalExpression->type = HLSLBaseType_Bool;
             literalExpression->bValue = false;
             literalExpression->expressionType.baseType = literalExpression->type;
             literalExpression->expressionType.flags = HLSLTypeFlag_Const;
@@ -3175,47 +3000,37 @@ bool HLSLParser::ParseTerminalExpression(HLSLExpression*& expression, bool& need
 
         // Type constructor.
         HLSLType type;
-        if (AcceptType(/*allowVoid=*/false, type))
-        {
+        if (AcceptType(/*allowVoid=*/false, type)) {
             Expect('(');
-            if (!ParsePartialConstructor(expression, type.baseType, type.typeName))
-            {
+            if (!ParsePartialConstructor(expression, type.baseType, type.typeName)) {
                 return false;
             }
         }
-        else
-        {
+        else {
             HLSLIdentifierExpression* identifierExpression = m_tree->AddNode<HLSLIdentifierExpression>(fileName, line);
-            if (!ExpectIdentifier(identifierExpression->name))
-            {
+            if (!ExpectIdentifier(identifierExpression->name)) {
                 return false;
             }
 
             bool undeclaredIdentifier = false;
- 
+
             const HLSLType* identifierType = FindVariable(identifierExpression->name, identifierExpression->global);
-            if (identifierType != NULL)
-            {
+            if (identifierType != NULL) {
                 identifierExpression->expressionType = *identifierType;
             }
-            else
-            {
-                if (GetIsFunction(identifierExpression->name))
-                {
+            else {
+                if (GetIsFunction(identifierExpression->name)) {
                     // Functions are always global scope.
                     // TODO: what about member functions?
                     identifierExpression->global = true;
                 }
-                else
-                {
+                else {
                     undeclaredIdentifier = true;
                 }
             }
 
-            if (undeclaredIdentifier)
-            {
-                if (m_allowUndeclaredIdentifiers)
-                {
+            if (undeclaredIdentifier) {
+                if (m_allowUndeclaredIdentifiers) {
                     HLSLLiteralExpression* literalExpression = m_tree->AddNode<HLSLLiteralExpression>(fileName, line);
                     literalExpression->bValue = false;
                     literalExpression->type = HLSLBaseType_Bool;
@@ -3223,8 +3038,7 @@ bool HLSLParser::ParseTerminalExpression(HLSLExpression*& expression, bool& need
                     literalExpression->expressionType.flags = HLSLTypeFlag_Const;
                     expression = literalExpression;
                 }
-                else
-                {
+                else {
                     m_tokenizer.Error("Undeclared identifier '%s'", identifierExpression->name);
                     return false;
                 }
@@ -3236,14 +3050,12 @@ bool HLSLParser::ParseTerminalExpression(HLSLExpression*& expression, bool& need
     }
 
     bool done = false;
-    while (!done)
-    {
+    while (!done) {
         done = true;
 
         // Post fix unary operator
         HLSLUnaryOp unaryOp2;
-        while (AcceptUnaryOperator(false, unaryOp2))
-        {
+        while (AcceptUnaryOperator(false, unaryOp2)) {
             HLSLUnaryExpression* unaryExpression = m_tree->AddNode<HLSLUnaryExpression>(fileName, line);
             unaryExpression->unaryOp = unaryOp2;
             unaryExpression->expression = expression;
@@ -3253,204 +3065,188 @@ bool HLSLParser::ParseTerminalExpression(HLSLExpression*& expression, bool& need
         }
 
         // Member access operator.
-        while (Accept('.'))
-        {
+        while (Accept('.')) {
             // member or member function
             const char* text = NULL;
-            if (!ExpectIdentifier(text))
-            {
+            if (!ExpectIdentifier(text)) {
                 return false;
             }
-            
+
             //const HLSLMemberFuction* memberFunction = FindMemberFunction(text);
             //if (function != NULL)
             {
                 // check parent type, and args to see if it's a match
-                
+
                 // copied from intrinsic lookup at end
-                if (Accept('('))
-                {
+                if (Accept('(')) {
                     HLSLMemberFunctionCall* functionCall = m_tree->AddNode<HLSLMemberFunctionCall>(fileName, line);
-                      
+
                     done = false;
-                    
+
                     // parse the args
-                    if (!ParseExpressionList(')', false, functionCall->argument, functionCall->numArguments))
-                    {
+                    if (!ParseExpressionList(')', false, functionCall->argument, functionCall->numArguments)) {
                         return false;
                     }
-                    
-                    if (expression->nodeType != HLSLNodeType_IdentifierExpression)
-                    {
+
+                    if (expression->nodeType != HLSLNodeType_IdentifierExpression) {
                         m_tokenizer.Error("Expected function identifier");
                         return false;
                     }
-                    
+
                     // This is "tex" of tex.Sample(...)
                     const HLSLIdentifierExpression* identifierExpression = static_cast<const HLSLIdentifierExpression*>(expression);
-                    
+
                     // TODO: what if it's a chain of member functions?
                     functionCall->memberIdentifier = identifierExpression;
-                    
+
                     // TODO: lookup texture, buffer, struct for identiferExpression
                     // TODO: prob need formatType to match half/float return type.
-                    
+
                     // TODO: could lookup only float memberFunctions if spirv
                     // which can't handle fp16 samplers.
-                    
+
                     // This is matching to a member function (mostly intrinsics)
-                    const HLSLFunction* function = MatchFunctionCall( functionCall, text, &identifierExpression->expressionType );
-                    if (function == NULL)
-                    {
+                    const HLSLFunction* function = MatchFunctionCall(functionCall, text, &identifierExpression->expressionType);
+                    if (function == NULL) {
                         return false;
                     }
-                    
+
                     functionCall->function = function;
                     functionCall->expressionType = function->returnType;
-                    
+
                     // or is it the identiferExpression?
                     expression = functionCall;
-                    
+
                     // for now don't allow chained member functions
                     return true;
                 }
-                
-           }
-           //else
-           {
-               // member variable
-               HLSLMemberAccess* memberAccess = m_tree->AddNode<HLSLMemberAccess>(fileName, line);
-               memberAccess->object = expression;
-               memberAccess->field = text;
-               
-               if (!GetMemberType(expression->expressionType, memberAccess))
-               {
-                   m_tokenizer.Error("Couldn't access '%s'", memberAccess->field);
-                   
-                   // this leaks memberAccess allocated above, but
-                   // all allocated from single allocator, so just free/reset that
-                   return false;
-               }
-               expression = memberAccess;
-               done = false;
-           }
+            }
+            //else
+            {
+                // member variable
+                HLSLMemberAccess* memberAccess = m_tree->AddNode<HLSLMemberAccess>(fileName, line);
+                memberAccess->object = expression;
+                memberAccess->field = text;
+
+                if (!GetMemberType(expression->expressionType, memberAccess)) {
+                    m_tokenizer.Error("Couldn't access '%s'", memberAccess->field);
+
+                    // this leaks memberAccess allocated above, but
+                    // all allocated from single allocator, so just free/reset that
+                    return false;
+                }
+                expression = memberAccess;
+                done = false;
+            }
         }
 
         // Handle array access.
-        while (Accept('['))
-        {
+        while (Accept('[')) {
             HLSLArrayAccess* arrayAccess = m_tree->AddNode<HLSLArrayAccess>(fileName, line);
             arrayAccess->array = expression;
-            if (!ParseExpression(arrayAccess->index) || !Expect(']'))
-            {
+            if (!ParseExpression(arrayAccess->index) || !Expect(']')) {
                 return false;
             }
 
-            if (expression->expressionType.baseType == HLSLBaseType_UserDefined)
-            {
+            if (expression->expressionType.baseType == HLSLBaseType_UserDefined) {
                 // some buffer types (!IsGlobalFields) have array notation
                 arrayAccess->expressionType.baseType = HLSLBaseType_UserDefined;
                 arrayAccess->expressionType.typeName = expression->expressionType.typeName;
-                arrayAccess->expressionType.array     = true;
+                arrayAccess->expressionType.array = true;
                 arrayAccess->expressionType.arraySize = NULL;
-                
             }
-            else if (expression->expressionType.array)
-            {
+            else if (expression->expressionType.array) {
                 arrayAccess->expressionType = expression->expressionType;
-                arrayAccess->expressionType.array     = false;
+                arrayAccess->expressionType.array = false;
                 arrayAccess->expressionType.arraySize = NULL;
             }
-            else
-            {
-                switch (expression->expressionType.baseType)
-                {
-                case HLSLBaseType_Float2:
-                case HLSLBaseType_Float3:
-                case HLSLBaseType_Float4:
-                    arrayAccess->expressionType.baseType = HLSLBaseType_Float;
-                    break;
-				case HLSLBaseType_Float2x2:
-					arrayAccess->expressionType.baseType = HLSLBaseType_Float2;
-					break;
-                case HLSLBaseType_Float3x3:
-                    arrayAccess->expressionType.baseType = HLSLBaseType_Float3;
-                    break;
-                case HLSLBaseType_Float4x4:
-                    arrayAccess->expressionType.baseType = HLSLBaseType_Float4;
-                    break;
-
-                case HLSLBaseType_Half2:
-                case HLSLBaseType_Half3:
-                case HLSLBaseType_Half4:
-                    arrayAccess->expressionType.baseType = HLSLBaseType_Half;
-                    break;
-				case HLSLBaseType_Half2x2:
-					arrayAccess->expressionType.baseType = HLSLBaseType_Half2;
-					break;
-                case HLSLBaseType_Half3x3:
-                    arrayAccess->expressionType.baseType = HLSLBaseType_Half3;
-                    break;
-                case HLSLBaseType_Half4x4:
-                    arrayAccess->expressionType.baseType = HLSLBaseType_Half4;
-                    break;
-
-                case HLSLBaseType_Double2:
-                case HLSLBaseType_Double3:
-                case HLSLBaseType_Double4:
-                    arrayAccess->expressionType.baseType = HLSLBaseType_Double;
-                    break;
-                case HLSLBaseType_Double2x2:
-                    arrayAccess->expressionType.baseType = HLSLBaseType_Double2;
-                    break;
-                case HLSLBaseType_Double3x3:
-                    arrayAccess->expressionType.baseType = HLSLBaseType_Double3;
-                    break;
-                case HLSLBaseType_Double4x4:
-                    arrayAccess->expressionType.baseType = HLSLBaseType_Double4;
-                    break;
-
-                        
-                case HLSLBaseType_Int2:
-                case HLSLBaseType_Int3:
-                case HLSLBaseType_Int4:
-                    arrayAccess->expressionType.baseType = HLSLBaseType_Int;
-                    break;
-                case HLSLBaseType_Uint2:
-                case HLSLBaseType_Uint3:
-                case HLSLBaseType_Uint4:
-                    arrayAccess->expressionType.baseType = HLSLBaseType_Uint;
-                    break;
-                case HLSLBaseType_Bool2:
-                case HLSLBaseType_Bool3:
-                case HLSLBaseType_Bool4:
-                    arrayAccess->expressionType.baseType = HLSLBaseType_Bool;
-                    break;
-                case HLSLBaseType_Ushort2:
-                case HLSLBaseType_Ushort3:
-                case HLSLBaseType_Ushort4:
-                    arrayAccess->expressionType.baseType = HLSLBaseType_Ushort;
-                    break;
-                case HLSLBaseType_Short2:
-                case HLSLBaseType_Short3:
-                case HLSLBaseType_Short4:
-                    arrayAccess->expressionType.baseType = HLSLBaseType_Short;
-                    break;
-                case HLSLBaseType_Ulong2:
-                case HLSLBaseType_Ulong3:
-                case HLSLBaseType_Ulong4:
-                    arrayAccess->expressionType.baseType = HLSLBaseType_Ulong;
-                    break;
-                case HLSLBaseType_Long2:
-                case HLSLBaseType_Long3:
-                case HLSLBaseType_Long4:
-                    arrayAccess->expressionType.baseType = HLSLBaseType_Long;
-                    break;
-                        
-                // TODO: u/char
-                default:
-                    m_tokenizer.Error("array, matrix, vector, or indexable object type expected in index expression");
-                    return false;
+            else {
+                switch (expression->expressionType.baseType) {
+                    case HLSLBaseType_Float2:
+                    case HLSLBaseType_Float3:
+                    case HLSLBaseType_Float4:
+                        arrayAccess->expressionType.baseType = HLSLBaseType_Float;
+                        break;
+                    case HLSLBaseType_Float2x2:
+                        arrayAccess->expressionType.baseType = HLSLBaseType_Float2;
+                        break;
+                    case HLSLBaseType_Float3x3:
+                        arrayAccess->expressionType.baseType = HLSLBaseType_Float3;
+                        break;
+                    case HLSLBaseType_Float4x4:
+                        arrayAccess->expressionType.baseType = HLSLBaseType_Float4;
+                        break;
+
+                    case HLSLBaseType_Half2:
+                    case HLSLBaseType_Half3:
+                    case HLSLBaseType_Half4:
+                        arrayAccess->expressionType.baseType = HLSLBaseType_Half;
+                        break;
+                    case HLSLBaseType_Half2x2:
+                        arrayAccess->expressionType.baseType = HLSLBaseType_Half2;
+                        break;
+                    case HLSLBaseType_Half3x3:
+                        arrayAccess->expressionType.baseType = HLSLBaseType_Half3;
+                        break;
+                    case HLSLBaseType_Half4x4:
+                        arrayAccess->expressionType.baseType = HLSLBaseType_Half4;
+                        break;
+
+                    case HLSLBaseType_Double2:
+                    case HLSLBaseType_Double3:
+                    case HLSLBaseType_Double4:
+                        arrayAccess->expressionType.baseType = HLSLBaseType_Double;
+                        break;
+                    case HLSLBaseType_Double2x2:
+                        arrayAccess->expressionType.baseType = HLSLBaseType_Double2;
+                        break;
+                    case HLSLBaseType_Double3x3:
+                        arrayAccess->expressionType.baseType = HLSLBaseType_Double3;
+                        break;
+                    case HLSLBaseType_Double4x4:
+                        arrayAccess->expressionType.baseType = HLSLBaseType_Double4;
+                        break;
+
+                    case HLSLBaseType_Int2:
+                    case HLSLBaseType_Int3:
+                    case HLSLBaseType_Int4:
+                        arrayAccess->expressionType.baseType = HLSLBaseType_Int;
+                        break;
+                    case HLSLBaseType_Uint2:
+                    case HLSLBaseType_Uint3:
+                    case HLSLBaseType_Uint4:
+                        arrayAccess->expressionType.baseType = HLSLBaseType_Uint;
+                        break;
+                    case HLSLBaseType_Bool2:
+                    case HLSLBaseType_Bool3:
+                    case HLSLBaseType_Bool4:
+                        arrayAccess->expressionType.baseType = HLSLBaseType_Bool;
+                        break;
+                    case HLSLBaseType_Ushort2:
+                    case HLSLBaseType_Ushort3:
+                    case HLSLBaseType_Ushort4:
+                        arrayAccess->expressionType.baseType = HLSLBaseType_Ushort;
+                        break;
+                    case HLSLBaseType_Short2:
+                    case HLSLBaseType_Short3:
+                    case HLSLBaseType_Short4:
+                        arrayAccess->expressionType.baseType = HLSLBaseType_Short;
+                        break;
+                    case HLSLBaseType_Ulong2:
+                    case HLSLBaseType_Ulong3:
+                    case HLSLBaseType_Ulong4:
+                        arrayAccess->expressionType.baseType = HLSLBaseType_Ulong;
+                        break;
+                    case HLSLBaseType_Long2:
+                    case HLSLBaseType_Long3:
+                    case HLSLBaseType_Long4:
+                        arrayAccess->expressionType.baseType = HLSLBaseType_Long;
+                        break;
+
+                    // TODO: u/char
+                    default:
+                        m_tokenizer.Error("array, matrix, vector, or indexable object type expected in index expression");
+                        return false;
                 }
             }
 
@@ -3461,69 +3257,56 @@ bool HLSLParser::ParseTerminalExpression(HLSLExpression*& expression, bool& need
         // Handle function calls. Note, HLSL functions aren't like C function
         // pointers -- we can only directly call on an identifier, not on an
         // expression.
-        if (Accept('('))
-        {
+        if (Accept('(')) {
             HLSLFunctionCall* functionCall = m_tree->AddNode<HLSLFunctionCall>(fileName, line);
             done = false;
-            if (!ParseExpressionList(')', false, functionCall->argument, functionCall->numArguments))
-            {
+            if (!ParseExpressionList(')', false, functionCall->argument, functionCall->numArguments)) {
                 return false;
             }
-            
-            if (expression->nodeType != HLSLNodeType_IdentifierExpression)
-            {
+
+            if (expression->nodeType != HLSLNodeType_IdentifierExpression) {
                 m_tokenizer.Error("Expected function identifier");
                 return false;
             }
-            
+
             const HLSLIdentifierExpression* identifierExpression = static_cast<const HLSLIdentifierExpression*>(expression);
-            const HLSLFunction* function = MatchFunctionCall( functionCall, identifierExpression->name );
-            if (function == NULL)
-            {
+            const HLSLFunction* function = MatchFunctionCall(functionCall, identifierExpression->name);
+            if (function == NULL) {
                 return false;
             }
-            
+
             functionCall->function = function;
             functionCall->expressionType = function->returnType;
             expression = functionCall;
         }
-
     }
     return true;
-
 }
 
 bool HLSLParser::ParseExpressionList(int endToken, bool allowEmptyEnd, HLSLExpression*& firstExpression, int& numExpressions)
 {
     numExpressions = 0;
     HLSLExpression* lastExpression = NULL;
-    while (!Accept(endToken))
-    {
-        if (CheckForUnexpectedEndOfStream(endToken))
-        {
+    while (!Accept(endToken)) {
+        if (CheckForUnexpectedEndOfStream(endToken)) {
             return false;
         }
-        if (numExpressions > 0 && !Expect(','))
-        {
+        if (numExpressions > 0 && !Expect(',')) {
             return false;
         }
         // It is acceptable for the final element in the initialization list to
         // have a trailing comma in some cases, like array initialization such as {1, 2, 3,}
-        if (allowEmptyEnd && Accept(endToken))
-        {
+        if (allowEmptyEnd && Accept(endToken)) {
             break;
         }
         HLSLExpression* expression = NULL;
-        if (!ParseExpression(expression))
-        {
+        if (!ParseExpression(expression)) {
             return false;
         }
-        if (firstExpression == NULL)
-        {
+        if (firstExpression == NULL) {
             firstExpression = expression;
         }
-        else
-        {
+        else {
             lastExpression->nextExpression = expression;
         }
         lastExpression = expression;
@@ -3535,64 +3318,65 @@ bool HLSLParser::ParseExpressionList(int endToken, bool allowEmptyEnd, HLSLExpre
 bool HLSLParser::ParseArgumentList(HLSLArgument*& firstArgument, int& numArguments, int& numOutputArguments)
 {
     const char* fileName = GetFileName();
-    int         line     = GetLineNumber();
-        
+    int line = GetLineNumber();
+
     HLSLArgument* lastArgument = NULL;
     numArguments = 0;
 
-    while (!Accept(')'))
-    {
-        if (CheckForUnexpectedEndOfStream(')'))
-        {
+    while (!Accept(')')) {
+        if (CheckForUnexpectedEndOfStream(')')) {
             return false;
         }
-        if (numArguments > 0 && !Expect(','))
-        {
+        if (numArguments > 0 && !Expect(',')) {
             return false;
         }
 
-        HLSLArgument* argument = m_tree->AddNode<HLSLArgument>(fileName, line);
-
-        // what is unifor modifier ?
-        if (Accept(HLSLToken_Uniform))     { argument->modifier = HLSLArgumentModifier_Uniform; }
-        
-        else if (Accept(HLSLToken_In))     { argument->modifier = HLSLArgumentModifier_In;      }
-        else if (Accept(HLSLToken_Out))    { argument->modifier = HLSLArgumentModifier_Out;     }
-        else if (Accept(HLSLToken_InOut))  { argument->modifier = HLSLArgumentModifier_Inout;   }
-        else if (Accept(HLSLToken_Const))  { argument->modifier = HLSLArgumentModifier_Const;   }
-
-        if (!ExpectDeclaration(/*allowUnsizedArray=*/true, argument->type, argument->name))
-        {
+        HLSLArgument* argument = m_tree->AddNode<HLSLArgument>(fileName, line);
+
+        // what is unifor modifier ?
+        if (Accept(HLSLToken_Uniform)) {
+            argument->modifier = HLSLArgumentModifier_Uniform;
+        }
+
+        else if (Accept(HLSLToken_In)) {
+            argument->modifier = HLSLArgumentModifier_In;
+        }
+        else if (Accept(HLSLToken_Out)) {
+            argument->modifier = HLSLArgumentModifier_Out;
+        }
+        else if (Accept(HLSLToken_InOut)) {
+            argument->modifier = HLSLArgumentModifier_Inout;
+        }
+        else if (Accept(HLSLToken_Const)) {
+            argument->modifier = HLSLArgumentModifier_Const;
+        }
+
+        if (!ExpectDeclaration(/*allowUnsizedArray=*/true, argument->type, argument->name)) {
             return false;
         }
 
-        DeclareVariable( argument->name, argument->type );
+        DeclareVariable(argument->name, argument->type);
 
         // Optional semantic.
-        if (Accept(':') && !ExpectIdentifier(argument->semantic))
-        {
+        if (Accept(':') && !ExpectIdentifier(argument->semantic)) {
             return false;
         }
 
-        if (Accept('=') && !ParseExpression(argument->defaultValue))
-        {
+        if (Accept('=') && !ParseExpression(argument->defaultValue)) {
             // @@ Print error!
             return false;
         }
 
-        if (lastArgument != NULL)
-        {
+        if (lastArgument != NULL) {
             lastArgument->nextArgument = argument;
         }
-        else
-        {
+        else {
             firstArgument = argument;
         }
         lastArgument = argument;
 
         ++numArguments;
-        if (argument->modifier == HLSLArgumentModifier_Out || argument->modifier == HLSLArgumentModifier_Inout)
-        {
+        if (argument->modifier == HLSLArgumentModifier_Out || argument->modifier == HLSLArgumentModifier_Inout) {
             ++numOutputArguments;
         }
     }
@@ -3810,7 +3594,7 @@ const EffectState* GetEffectState(const char* name, bool isSamplerState, bool is
 {
     const EffectState* validStates = effectStates;
     int count = sizeof(effectStates)/sizeof(effectStates[0]);
-    
+
     if (isPipeline)
     {
         validStates = pipelineStates;
@@ -3826,7 +3610,7 @@ const EffectState* GetEffectState(const char* name, bool isSamplerState, bool is
     // Case insensitive comparison.
     for (int i = 0; i < count; i++)
     {
-        if (String_EqualNoCase(name, validStates[i].name)) 
+        if (String_EqualNoCase(name, validStates[i].name))
         {
             return &validStates[i];
         }
@@ -3838,12 +3622,12 @@ const EffectState* GetEffectState(const char* name, bool isSamplerState, bool is
 static const EffectStateValue* GetStateValue(const char* name, const EffectState* state)
 {
     // Case insensitive comparison.
-    for (int i = 0; ; i++) 
+    for (int i = 0; ; i++)
     {
         const EffectStateValue & value = state->values[i];
         if (value.name == NULL) break;
 
-        if (String_EqualNoCase(name, value.name)) 
+        if (String_EqualNoCase(name, value.name))
         {
             return &value;
         }
@@ -3909,7 +3693,7 @@ bool HLSLParser::ParseStateValue(const EffectState * state, HLSLStateAssignment*
     const bool expectsFloat = state->values == floatValues;
     const bool expectsBoolean = state->values == booleanValues;
 
-    if (!expectsExpression && !expectsInteger && !expectsFloat && !expectsBoolean) 
+    if (!expectsExpression && !expectsInteger && !expectsFloat && !expectsBoolean)
     {
         if (m_tokenizer.GetToken() != HLSLToken_Identifier)
         {
@@ -3986,7 +3770,7 @@ bool HLSLParser::ParseStateValue(const EffectState * state, HLSLStateAssignment*
                 return false;
             }
         }
-        else 
+        else
         {
             // Expect one of the allowed values.
             const EffectStateValue * stateValue = GetStateValue(m_tokenizer.GetIdentifier(), state);
@@ -4042,17 +3826,17 @@ bool HLSLParser::ParseStateAssignment(HLSLStateAssignment*& stateAssignment, boo
 bool HLSLParser::ParseAttributeList(HLSLAttribute*& firstAttribute)
 {
     const char* fileName = GetFileName();
-    int         line     = GetLineNumber();
-    
-    HLSLAttribute * lastAttribute = firstAttribute;
+    int line = GetLineNumber();
+
+    HLSLAttribute* lastAttribute = firstAttribute;
     do {
-        const char * identifier = NULL;
+        const char* identifier = NULL;
         if (!ExpectIdentifier(identifier)) {
             return false;
         }
 
-        HLSLAttribute * attribute = m_tree->AddNode<HLSLAttribute>(fileName, line);
-        
+        HLSLAttribute* attribute = m_tree->AddNode<HLSLAttribute>(fileName, line);
+
         if (String_Equal(identifier, "unroll"))
             attribute->attributeType = HLSLAttributeType_Unroll;
         else if (String_Equal(identifier, "flatten"))
@@ -4061,20 +3845,18 @@ bool HLSLParser::ParseAttributeList(HLSLAttribute*& firstAttribute)
             attribute->attributeType = HLSLAttributeType_Branch;
         else if (String_Equal(identifier, "nofastmath"))
             attribute->attributeType = HLSLAttributeType_NoFastMath;
-        
+
         // @@ parse arguments, () not required if attribute constructor has no arguments.
 
-        if (firstAttribute == NULL)
-        {
+        if (firstAttribute == NULL) {
             firstAttribute = attribute;
         }
-        else
-        {
+        else {
             lastAttribute->nextAttribute = attribute;
         }
         lastAttribute = attribute;
-        
-    } while(Accept(','));
+
+    } while (Accept(','));
 
     return true;
 }
@@ -4089,19 +3871,19 @@ bool HLSLParser::ParseAttributeList(HLSLAttribute*& firstAttribute)
 //   [A(a)] statement;
 bool HLSLParser::ParseAttributeBlock(HLSLAttribute*& attribute)
 {
-    HLSLAttribute ** lastAttribute = &attribute;
-    while (*lastAttribute != NULL) { lastAttribute = &(*lastAttribute)->nextAttribute; }
+    HLSLAttribute** lastAttribute = &attribute;
+    while (*lastAttribute != NULL) {
+        lastAttribute = &(*lastAttribute)->nextAttribute;
+    }
 
-    if (!Accept('['))
-    {
+    if (!Accept('[')) {
         return false;
     }
 
     // Parse list of attribute constructors.
     ParseAttributeList(*lastAttribute);
 
-    if (!Expect(']'))
-    {
+    if (!Expect(']')) {
         return false;
     }
 
@@ -4151,31 +3933,24 @@ bool HLSLParser::ParseStage(HLSLStatement*& statement)
 }
 */
 
-
-
 bool HLSLParser::Parse(HLSLTree* tree, const HLSLParserOptions& options)
 {
     m_tree = tree;
     m_options = options;
-    
+
     HLSLRoot* root = m_tree->GetRoot();
     HLSLStatement* lastStatement = NULL;
 
-    while (!Accept(HLSLToken_EndOfStream))
-    {
+    while (!Accept(HLSLToken_EndOfStream)) {
         HLSLStatement* statement = NULL;
-        if (!ParseTopLevel(statement))
-        {
+        if (!ParseTopLevel(statement)) {
             return false;
         }
-        if (statement != NULL)
-        {   
-            if (lastStatement == NULL)
-            {
+        if (statement != NULL) {
+            if (lastStatement == NULL) {
                 root->statement = statement;
             }
-            else
-            {
+            else {
                 lastStatement->nextStatement = statement;
             }
             lastStatement = statement;
@@ -4187,23 +3962,19 @@ bool HLSLParser::Parse(HLSLTree* tree, const HLSLParserOptions& options)
 
 bool HLSLParser::AcceptTypeModifier(int& flags)
 {
-    if (Accept(HLSLToken_Const))
-    {
+    if (Accept(HLSLToken_Const)) {
         flags |= HLSLTypeFlag_Const;
         return true;
     }
-    else if (Accept(HLSLToken_Static))
-    {
+    else if (Accept(HLSLToken_Static)) {
         flags |= HLSLTypeFlag_Static;
         return true;
     }
-    else if (Accept(HLSLToken_Uniform))
-    {
+    else if (Accept(HLSLToken_Uniform)) {
         //flags |= HLSLTypeFlag_Uniform;      // @@ Ignored.
         return true;
     }
-    else if (Accept(HLSLToken_Inline))
-    {
+    else if (Accept(HLSLToken_Inline)) {
         //flags |= HLSLTypeFlag_Uniform;      // @@ Ignored. In HLSL all functions are inline.
         return true;
     }
@@ -4224,28 +3995,23 @@ bool HLSLParser::AcceptTypeModifier(int& flags)
 
 bool HLSLParser::AcceptInterpolationModifier(int& flags)
 {
-    if (Accept("linear"))
-    { 
-        flags |= HLSLTypeFlag_Linear; 
+    if (Accept("linear")) {
+        flags |= HLSLTypeFlag_Linear;
         return true;
     }
-    else if (Accept("centroid"))
-    { 
+    else if (Accept("centroid")) {
         flags |= HLSLTypeFlag_Centroid;
         return true;
     }
-    else if (Accept("nointerpolation"))
-    {
+    else if (Accept("nointerpolation")) {
         flags |= HLSLTypeFlag_NoInterpolation;
         return true;
     }
-    else if (Accept("noperspective"))
-    {
+    else if (Accept("noperspective")) {
         flags |= HLSLTypeFlag_NoPerspective;
         return true;
     }
-    else if (Accept("sample"))
-    {
+    else if (Accept("sample")) {
         flags |= HLSLTypeFlag_Sample;
         return true;
     }
@@ -4253,263 +4019,254 @@ bool HLSLParser::AcceptInterpolationModifier(int& flags)
     return false;
 }
 
-
-bool HLSLParser::AcceptType(bool allowVoid, HLSLType& type/*, bool acceptFlags*/)
+bool HLSLParser::AcceptType(bool allowVoid, HLSLType& type /*, bool acceptFlags*/)
 {
     //if (type.flags != NULL)
     {
         type.flags = 0;
-        while(AcceptTypeModifier(type.flags) || AcceptInterpolationModifier(type.flags)) {}
+        while (AcceptTypeModifier(type.flags) || AcceptInterpolationModifier(type.flags)) {
+        }
     }
 
     int token = m_tokenizer.GetToken();
 
-    if (token == HLSLToken_Comment)
-    {
+    if (token == HLSLToken_Comment) {
         // TODO: should this advance the tokenizer?
         // m_tokenizer.Next();
-        
+
         type.baseType = HLSLBaseType_Comment;
         return true;
     }
-    
+
     // Check built-in types.
     type.baseType = HLSLBaseType_Void;
-    switch (token)
-    {
-    case HLSLToken_Float:
-        type.baseType = HLSLBaseType_Float;
-        break;
-    case HLSLToken_Float2:      
-        type.baseType = HLSLBaseType_Float2;
-        break;
-    case HLSLToken_Float3:
-        type.baseType = HLSLBaseType_Float3;
-        break;
-    case HLSLToken_Float4:
-        type.baseType = HLSLBaseType_Float4;
-        break;
-            
-	case HLSLToken_Float2x2:
-		type.baseType = HLSLBaseType_Float2x2;
-		break;
-    case HLSLToken_Float3x3:
-        type.baseType = HLSLBaseType_Float3x3;
-        break;
-    case HLSLToken_Float4x4:
-        type.baseType = HLSLBaseType_Float4x4;
-        break;
-          
-    // The parser is remapping the type here
-    case HLSLToken_Halfio:
-        type.baseType = m_options.isHalfio ? HLSLBaseType_Half : HLSLBaseType_Float;
-        break;
-    case HLSLToken_Half2io:
-        type.baseType = m_options.isHalfio ? HLSLBaseType_Half2 : HLSLBaseType_Float2;
-        break;
-    case HLSLToken_Half3io:
-        type.baseType = m_options.isHalfio ? HLSLBaseType_Half3 : HLSLBaseType_Float3;
-        break;
-    case HLSLToken_Half4io:
-        type.baseType = m_options.isHalfio ? HLSLBaseType_Half4 : HLSLBaseType_Float4;
-        break;
-            
-    // The parser is remapping the type here
-    case HLSLToken_Halfst:
-        type.baseType = m_options.isHalfst ? HLSLBaseType_Half : HLSLBaseType_Float;
-        break;
-    case HLSLToken_Half2st:
-        type.baseType = m_options.isHalfst ? HLSLBaseType_Half2 : HLSLBaseType_Float2;
-        break;
-    case HLSLToken_Half3st:
-        type.baseType = m_options.isHalfst ? HLSLBaseType_Half3 : HLSLBaseType_Float3;
-        break;
-    case HLSLToken_Half4st:
-        type.baseType = m_options.isHalfst ? HLSLBaseType_Half4 : HLSLBaseType_Float4;
-        break;
-            
-    case HLSLToken_Half:
-        type.baseType = HLSLBaseType_Half;
-        break;
-    case HLSLToken_Half2:      
-        type.baseType = HLSLBaseType_Half2;
-        break;
-    case HLSLToken_Half3:
-        type.baseType = HLSLBaseType_Half3;
-        break;
-    case HLSLToken_Half4:
-        type.baseType = HLSLBaseType_Half4;
-        break;
-            
-	case HLSLToken_Half2x2:
-		type.baseType = HLSLBaseType_Half2x2;
-		break;
-    case HLSLToken_Half3x3:
-        type.baseType = HLSLBaseType_Half3x3;
-        break;
-    case HLSLToken_Half4x4:
-        type.baseType = HLSLBaseType_Half4x4;
-        break;
-
-    case HLSLToken_Bool:
-        type.baseType = HLSLBaseType_Bool;
-        break;
-	case HLSLToken_Bool2:
-		type.baseType = HLSLBaseType_Bool2;
-		break;
-	case HLSLToken_Bool3:
-		type.baseType = HLSLBaseType_Bool3;
-		break;
-	case HLSLToken_Bool4:
-		type.baseType = HLSLBaseType_Bool4;
-		break;
-            
-    case HLSLToken_Int:
-        type.baseType = HLSLBaseType_Int;
-        break;
-    case HLSLToken_Int2:
-        type.baseType = HLSLBaseType_Int2;
-        break;
-    case HLSLToken_Int3:
-        type.baseType = HLSLBaseType_Int3;
-        break;
-    case HLSLToken_Int4:
-        type.baseType = HLSLBaseType_Int4;
-        break;
-            
-    case HLSLToken_Uint:
-        type.baseType = HLSLBaseType_Uint;
-        break;
-    case HLSLToken_Uint2:
-        type.baseType = HLSLBaseType_Uint2;
-        break;
-    case HLSLToken_Uint3:
-        type.baseType = HLSLBaseType_Uint3;
-        break;
-    case HLSLToken_Uint4:
-        type.baseType = HLSLBaseType_Uint4;
-        break;
-         
-    case HLSLToken_Ushort:
-        type.baseType = HLSLBaseType_Ushort;
-        break;
-    case HLSLToken_Ushort2:
-        type.baseType = HLSLBaseType_Ushort2;
-        break;
-    case HLSLToken_Ushort3:
-        type.baseType = HLSLBaseType_Ushort3;
-        break;
-    case HLSLToken_Ushort4:
-        type.baseType = HLSLBaseType_Ushort4;
-        break;
-        
-    case HLSLToken_Short:
-        type.baseType = HLSLBaseType_Short;
-        break;
-    case HLSLToken_Short2:
-        type.baseType = HLSLBaseType_Short2;
-        break;
-    case HLSLToken_Short3:
-        type.baseType = HLSLBaseType_Short3;
-        break;
-    case HLSLToken_Short4:
-        type.baseType = HLSLBaseType_Short4;
-        break;
-            
-    // Textures (TODO: could have baseType be texture, with subtype like buffer)
-    case HLSLToken_Texture2D:
-        type.baseType = HLSLBaseType_Texture2D;
-        break;
-    case HLSLToken_Texture2DArray:
-        type.baseType = HLSLBaseType_Texture2DArray;
-        break;
-    case HLSLToken_Texture3D:
-        type.baseType = HLSLBaseType_Texture3D;
-        break;
-    case HLSLToken_TextureCube:
-        type.baseType = HLSLBaseType_TextureCube;
-        break;
-    case HLSLToken_Texture2DMS:
-        type.baseType = HLSLBaseType_Texture2DMS;
-        break;
-    case HLSLToken_TextureCubeArray:
-        type.baseType = HLSLBaseType_TextureCubeArray;
-        break;
-       
-    case HLSLToken_Depth2D:
-        type.baseType = HLSLBaseType_Depth2D;
-        break;
-    case HLSLToken_Depth2DArray:
-        type.baseType = HLSLBaseType_Depth2DArray;
-        break;
-    case HLSLToken_DepthCube:
-        type.baseType = HLSLBaseType_DepthCube;
-        break;
-            
-    case HLSLToken_RWTexture2D:
-        type.baseType = HLSLBaseType_RWTexture2D;
-        break;
-            
-    // samplers
-    case HLSLToken_SamplerState:
-        type.baseType = HLSLBaseType_SamplerState;
-        break;
-    case HLSLToken_SamplerComparisonState:
-        type.baseType = HLSLBaseType_SamplerComparisonState;
-        break;
-            
-    // older constants
-    case HLSLToken_CBuffer:
-    case HLSLToken_TBuffer:
-        // might make these BufferGlobals?
-        type.baseType = HLSLBaseType_Buffer;
-        break;
-            
-    // SSBO
-    case HLSLToken_StructuredBuffer:
-    case HLSLToken_RWStructuredBuffer:
-    case HLSLToken_ByteAddressBuffer:
-    case HLSLToken_RWByteAddressBuffer:
-    case HLSLToken_ConstantBuffer:
-        type.baseType = HLSLBaseType_Buffer;
-        break;
-    }
-    if (type.baseType != HLSLBaseType_Void)
-    {
+    switch (token) {
+        case HLSLToken_Float:
+            type.baseType = HLSLBaseType_Float;
+            break;
+        case HLSLToken_Float2:
+            type.baseType = HLSLBaseType_Float2;
+            break;
+        case HLSLToken_Float3:
+            type.baseType = HLSLBaseType_Float3;
+            break;
+        case HLSLToken_Float4:
+            type.baseType = HLSLBaseType_Float4;
+            break;
+
+        case HLSLToken_Float2x2:
+            type.baseType = HLSLBaseType_Float2x2;
+            break;
+        case HLSLToken_Float3x3:
+            type.baseType = HLSLBaseType_Float3x3;
+            break;
+        case HLSLToken_Float4x4:
+            type.baseType = HLSLBaseType_Float4x4;
+            break;
+
+        // The parser is remapping the type here
+        case HLSLToken_Halfio:
+            type.baseType = m_options.isHalfio ? HLSLBaseType_Half : HLSLBaseType_Float;
+            break;
+        case HLSLToken_Half2io:
+            type.baseType = m_options.isHalfio ? HLSLBaseType_Half2 : HLSLBaseType_Float2;
+            break;
+        case HLSLToken_Half3io:
+            type.baseType = m_options.isHalfio ? HLSLBaseType_Half3 : HLSLBaseType_Float3;
+            break;
+        case HLSLToken_Half4io:
+            type.baseType = m_options.isHalfio ? HLSLBaseType_Half4 : HLSLBaseType_Float4;
+            break;
+
+        // The parser is remapping the type here
+        case HLSLToken_Halfst:
+            type.baseType = m_options.isHalfst ? HLSLBaseType_Half : HLSLBaseType_Float;
+            break;
+        case HLSLToken_Half2st:
+            type.baseType = m_options.isHalfst ? HLSLBaseType_Half2 : HLSLBaseType_Float2;
+            break;
+        case HLSLToken_Half3st:
+            type.baseType = m_options.isHalfst ? HLSLBaseType_Half3 : HLSLBaseType_Float3;
+            break;
+        case HLSLToken_Half4st:
+            type.baseType = m_options.isHalfst ? HLSLBaseType_Half4 : HLSLBaseType_Float4;
+            break;
+
+        case HLSLToken_Half:
+            type.baseType = HLSLBaseType_Half;
+            break;
+        case HLSLToken_Half2:
+            type.baseType = HLSLBaseType_Half2;
+            break;
+        case HLSLToken_Half3:
+            type.baseType = HLSLBaseType_Half3;
+            break;
+        case HLSLToken_Half4:
+            type.baseType = HLSLBaseType_Half4;
+            break;
+
+        case HLSLToken_Half2x2:
+            type.baseType = HLSLBaseType_Half2x2;
+            break;
+        case HLSLToken_Half3x3:
+            type.baseType = HLSLBaseType_Half3x3;
+            break;
+        case HLSLToken_Half4x4:
+            type.baseType = HLSLBaseType_Half4x4;
+            break;
+
+        case HLSLToken_Bool:
+            type.baseType = HLSLBaseType_Bool;
+            break;
+        case HLSLToken_Bool2:
+            type.baseType = HLSLBaseType_Bool2;
+            break;
+        case HLSLToken_Bool3:
+            type.baseType = HLSLBaseType_Bool3;
+            break;
+        case HLSLToken_Bool4:
+            type.baseType = HLSLBaseType_Bool4;
+            break;
+
+        case HLSLToken_Int:
+            type.baseType = HLSLBaseType_Int;
+            break;
+        case HLSLToken_Int2:
+            type.baseType = HLSLBaseType_Int2;
+            break;
+        case HLSLToken_Int3:
+            type.baseType = HLSLBaseType_Int3;
+            break;
+        case HLSLToken_Int4:
+            type.baseType = HLSLBaseType_Int4;
+            break;
+
+        case HLSLToken_Uint:
+            type.baseType = HLSLBaseType_Uint;
+            break;
+        case HLSLToken_Uint2:
+            type.baseType = HLSLBaseType_Uint2;
+            break;
+        case HLSLToken_Uint3:
+            type.baseType = HLSLBaseType_Uint3;
+            break;
+        case HLSLToken_Uint4:
+            type.baseType = HLSLBaseType_Uint4;
+            break;
+
+        case HLSLToken_Ushort:
+            type.baseType = HLSLBaseType_Ushort;
+            break;
+        case HLSLToken_Ushort2:
+            type.baseType = HLSLBaseType_Ushort2;
+            break;
+        case HLSLToken_Ushort3:
+            type.baseType = HLSLBaseType_Ushort3;
+            break;
+        case HLSLToken_Ushort4:
+            type.baseType = HLSLBaseType_Ushort4;
+            break;
+
+        case HLSLToken_Short:
+            type.baseType = HLSLBaseType_Short;
+            break;
+        case HLSLToken_Short2:
+            type.baseType = HLSLBaseType_Short2;
+            break;
+        case HLSLToken_Short3:
+            type.baseType = HLSLBaseType_Short3;
+            break;
+        case HLSLToken_Short4:
+            type.baseType = HLSLBaseType_Short4;
+            break;
+
+        // Textures (TODO: could have baseType be texture, with subtype like buffer)
+        case HLSLToken_Texture2D:
+            type.baseType = HLSLBaseType_Texture2D;
+            break;
+        case HLSLToken_Texture2DArray:
+            type.baseType = HLSLBaseType_Texture2DArray;
+            break;
+        case HLSLToken_Texture3D:
+            type.baseType = HLSLBaseType_Texture3D;
+            break;
+        case HLSLToken_TextureCube:
+            type.baseType = HLSLBaseType_TextureCube;
+            break;
+        case HLSLToken_Texture2DMS:
+            type.baseType = HLSLBaseType_Texture2DMS;
+            break;
+        case HLSLToken_TextureCubeArray:
+            type.baseType = HLSLBaseType_TextureCubeArray;
+            break;
+
+        case HLSLToken_Depth2D:
+            type.baseType = HLSLBaseType_Depth2D;
+            break;
+        case HLSLToken_Depth2DArray:
+            type.baseType = HLSLBaseType_Depth2DArray;
+            break;
+        case HLSLToken_DepthCube:
+            type.baseType = HLSLBaseType_DepthCube;
+            break;
+
+        case HLSLToken_RWTexture2D:
+            type.baseType = HLSLBaseType_RWTexture2D;
+            break;
+
+        // samplers
+        case HLSLToken_SamplerState:
+            type.baseType = HLSLBaseType_SamplerState;
+            break;
+        case HLSLToken_SamplerComparisonState:
+            type.baseType = HLSLBaseType_SamplerComparisonState;
+            break;
+
+        // older constants
+        case HLSLToken_CBuffer:
+        case HLSLToken_TBuffer:
+            // might make these BufferGlobals?
+            type.baseType = HLSLBaseType_Buffer;
+            break;
+
+        // SSBO
+        case HLSLToken_StructuredBuffer:
+        case HLSLToken_RWStructuredBuffer:
+        case HLSLToken_ByteAddressBuffer:
+        case HLSLToken_RWByteAddressBuffer:
+        case HLSLToken_ConstantBuffer:
+            type.baseType = HLSLBaseType_Buffer;
+            break;
+    }
+    if (type.baseType != HLSLBaseType_Void) {
         m_tokenizer.Next();
-        
-        if (IsTextureType(type.baseType))
-        {
+
+        if (IsTextureType(type.baseType)) {
             // Parse optional sampler type.
-            if (Accept('<'))
-            {
+            if (Accept('<')) {
                 token = m_tokenizer.GetToken();
-                
+
                 // TODO: need more format types
                 // TODO: double, u/long, and other types
-                if (token >= HLSLToken_Float && token <= HLSLToken_Float4)
-                {
+                if (token >= HLSLToken_Float && token <= HLSLToken_Float4) {
                     // TODO: code only tests if texture formatType exactly matches
                     // when looking for Intrinsics, need to fix that before changing
                     // this.
-                    
+
                     type.formatType = HLSLBaseType_Float;
                     // (HLSLBaseType)(HLSLBaseType_Float + (token - HLSLToken_Float));
                 }
-                else if (token >= HLSLToken_Half && token <= HLSLToken_Half4)
-                {
-                    type.formatType =  HLSLBaseType_Half;
+                else if (token >= HLSLToken_Half && token <= HLSLToken_Half4) {
+                    type.formatType = HLSLBaseType_Half;
                     // (HLSLBaseType)(HLSLBaseType_Half + (token - HLSLToken_Half));
                 }
-                else
-                {
+                else {
                     m_tokenizer.Error("Expected half or float format type on texture.");
                     return false;
                 }
                 m_tokenizer.Next();
-                
-                if (!Expect('>'))
-                {
+
+                if (!Expect('>')) {
                     return false;
                 }
             }
@@ -4517,18 +4274,15 @@ bool HLSLParser::AcceptType(bool allowVoid, HLSLType& type/*, bool acceptFlags*/
         return true;
     }
 
-    if (allowVoid && Accept(HLSLToken_Void))
-    {
+    if (allowVoid && Accept(HLSLToken_Void)) {
         type.baseType = HLSLBaseType_Void;
         return true;
     }
-    if (token == HLSLToken_Identifier)
-    {
-        const char* identifier = m_tree->AddString( m_tokenizer.GetIdentifier() );
-        if (FindUserDefinedType(identifier) != NULL)
-        {
+    if (token == HLSLToken_Identifier) {
+        const char* identifier = m_tree->AddString(m_tokenizer.GetIdentifier());
+        if (FindUserDefinedType(identifier) != NULL) {
             m_tokenizer.Next();
-            
+
             type.baseType = HLSLBaseType_UserDefined;
             type.typeName = identifier;
             return true;
@@ -4539,8 +4293,7 @@ bool HLSLParser::AcceptType(bool allowVoid, HLSLType& type/*, bool acceptFlags*/
 
 bool HLSLParser::ExpectType(bool allowVoid, HLSLType& type)
 {
-    if (!AcceptType(allowVoid, type))
-    {
+    if (!AcceptType(allowVoid, type)) {
         m_tokenizer.Error("Expected type");
         return false;
     }
@@ -4549,27 +4302,22 @@ bool HLSLParser::ExpectType(bool allowVoid, HLSLType& type)
 
 bool HLSLParser::AcceptDeclaration(bool allowUnsizedArray, HLSLType& type, const char*& name)
 {
-    if (!AcceptType(/*allowVoid=*/false, type))
-    {
+    if (!AcceptType(/*allowVoid=*/false, type)) {
         return false;
     }
 
-    if (!ExpectIdentifier(name))
-    {
+    if (!ExpectIdentifier(name)) {
         // TODO: false means we didn't accept a declaration and we had an error!
         return false;
     }
     // Handle array syntax.
-    if (Accept('['))
-    {
+    if (Accept('[')) {
         type.array = true;
         // Optionally allow no size to the specified for the array.
-        if (Accept(']') && allowUnsizedArray)
-        {
+        if (Accept(']') && allowUnsizedArray) {
             return true;
         }
-        if (!ParseExpression(type.arraySize) || !Expect(']'))
-        {
+        if (!ParseExpression(type.arraySize) || !Expect(']')) {
             return false;
         }
     }
@@ -4578,8 +4326,7 @@ bool HLSLParser::AcceptDeclaration(bool allowUnsizedArray, HLSLType& type, const
 
 bool HLSLParser::ExpectDeclaration(bool allowUnsizedArray, HLSLType& type, const char*& name)
 {
-    if (!AcceptDeclaration(allowUnsizedArray, type, name))
-    {
+    if (!AcceptDeclaration(allowUnsizedArray, type, name)) {
         m_tokenizer.Error("Expected declaration");
         return false;
     }
@@ -4590,10 +4337,8 @@ const HLSLStruct* HLSLParser::FindUserDefinedType(const char* name) const
 {
     // Pointer comparison is sufficient for strings since they exist in the
     // string pool.
-    for (int i = 0; i < m_userTypes.GetSize(); ++i)
-    {
-        if (m_userTypes[i]->name == name)
-        {
+    for (int i = 0; i < m_userTypes.GetSize(); ++i) {
+        if (m_userTypes[i]->name == name) {
             return m_userTypes[i];
         }
     }
@@ -4602,8 +4347,7 @@ const HLSLStruct* HLSLParser::FindUserDefinedType(const char* name) const
 
 bool HLSLParser::CheckForUnexpectedEndOfStream(int endToken)
 {
-    if (Accept(HLSLToken_EndOfStream))
-    {
+    if (Accept(HLSLToken_EndOfStream)) {
         char what[HLSLTokenizer::s_maxIdentifier];
         m_tokenizer.GetTokenName(endToken, what);
         m_tokenizer.Error("Unexpected end of file while looking for '%s'", what);
@@ -4619,7 +4363,7 @@ int HLSLParser::GetLineNumber() const
 
 const char* HLSLParser::GetFileName()
 {
-    return m_tree->AddString( m_tokenizer.GetFileName() );
+    return m_tree->AddString(m_tokenizer.GetFileName());
 }
 
 void HLSLParser::BeginScope()
@@ -4632,8 +4376,7 @@ void HLSLParser::BeginScope()
 void HLSLParser::EndScope()
 {
     int numVariables = m_variables.GetSize() - 1;
-    while (m_variables[numVariables].name != NULL)
-    {
+    while (m_variables[numVariables].name != NULL) {
         --numVariables;
         ASSERT(numVariables >= 0);
     }
@@ -4642,10 +4385,8 @@ void HLSLParser::EndScope()
 
 const HLSLType* HLSLParser::FindVariable(const char* name, bool& global) const
 {
-    for (int i = m_variables.GetSize() - 1; i >= 0; --i)
-    {
-        if (m_variables[i].name == name)
-        {
+    for (int i = m_variables.GetSize() - 1; i >= 0; --i) {
+        if (m_variables[i].name == name) {
             global = (i < m_numGlobals);
             return &m_variables[i].type;
         }
@@ -4656,10 +4397,8 @@ const HLSLType* HLSLParser::FindVariable(const char* name, bool& global) const
 // This only search user-defined c-style functions.  Intrinsics are not in this.
 const HLSLFunction* HLSLParser::FindFunction(const char* name) const
 {
-    for (int i = 0; i < m_functions.GetSize(); ++i)
-    {
-        if (m_functions[i]->name == name)
-        {
+    for (int i = 0; i < m_functions.GetSize(); ++i) {
+        if (m_functions[i]->name == name) {
             return m_functions[i];
         }
     }
@@ -4673,8 +4412,7 @@ static bool AreTypesEqual(HLSLTree* tree, const HLSLType& lhs, const HLSLType& r
 
 static bool AreArgumentListsEqual(HLSLTree* tree, HLSLArgument* lhs, HLSLArgument* rhs)
 {
-    while (lhs && rhs)
-    {
+    while (lhs && rhs) {
         if (!AreTypesEqual(tree, lhs->type, rhs->type))
             return false;
 
@@ -4693,12 +4431,10 @@ static bool AreArgumentListsEqual(HLSLTree* tree, HLSLArgument* lhs, HLSLArgumen
 
 const HLSLFunction* HLSLParser::FindFunction(const HLSLFunction* fun) const
 {
-    for (int i = 0; i < m_functions.GetSize(); ++i)
-    {
+    for (int i = 0; i < m_functions.GetSize(); ++i) {
         if (m_functions[i]->name == fun->name &&
             AreTypesEqual(m_tree, m_functions[i]->returnType, fun->returnType) &&
-            AreArgumentListsEqual(m_tree, m_functions[i]->argument, fun->argument))
-        {
+            AreArgumentListsEqual(m_tree, m_functions[i]->argument, fun->argument)) {
             return m_functions[i];
         }
     }
@@ -4707,8 +4443,7 @@ const HLSLFunction* HLSLParser::FindFunction(const HLSLFunction* fun) const
 
 void HLSLParser::DeclareVariable(const char* name, const HLSLType& type)
 {
-    if (m_variables.GetSize() == m_numGlobals)
-    {
+    if (m_variables.GetSize() == m_numGlobals) {
         ++m_numGlobals;
     }
     Variable& variable = m_variables.PushBackNew();
@@ -4719,15 +4454,13 @@ void HLSLParser::DeclareVariable(const char* name, const HLSLType& type)
 bool HLSLParser::GetIsFunction(const char* name) const
 {
     // check user defined functions
-    for (int i = 0; i < m_functions.GetSize(); ++i)
-    {
+    for (int i = 0; i < m_functions.GetSize(); ++i) {
         // == is ok here because we're passed the strings through the string pool.
-        if (m_functions[i]->name == name)
-        {
+        if (m_functions[i]->name == name) {
             return true;
         }
     }
-    
+
     // see if it's an intrinsic
     const auto& it = _intrinsicRangeMap.find(name);
     return it != _intrinsicRangeMap.end();
@@ -4735,49 +4468,43 @@ bool HLSLParser::GetIsFunction(const char* name) const
 
 const HLSLFunction* HLSLParser::MatchFunctionCall(const HLSLFunctionCall* functionCall, const char* name, const HLSLType* memberType)
 {
-    const HLSLFunction* matchedFunction     = NULL;
+    const HLSLFunction* matchedFunction = NULL;
 
     //int  numArguments           = functionCall->numArguments;
-    int  numMatchedOverloads    = 0;
-    bool nameMatches            = false;
+    int numMatchedOverloads = 0;
+    bool nameMatches = false;
 
     // Get the user defined c functions with the specified name.
     // There may be more than one, and these are not ordered.
-    for (int i = 0; i < m_functions.GetSize(); ++i)
-    {
+    for (int i = 0; i < m_functions.GetSize(); ++i) {
         const HLSLFunction* function = m_functions[i];
-        if (function->name == name)
-        {
+        if (function->name == name) {
             nameMatches = true;
-            
+
             // if caller requests member function, then memberType must match
             bool isMemberFunc = function->IsMemberFunction();
-           
-            if (memberType)
-            {
+
+            if (memberType) {
                 if (!isMemberFunc)
                     continue;
-                
+
                 if (memberType->baseType != function->memberType)
                     continue;
-                
+
                 if (memberType->formatType != GetScalarType(function->returnType.baseType))
                     continue;
             }
-            else
-            {
+            else {
                 if (isMemberFunc)
                     continue;
             }
-            
-            CompareFunctionsResult result = CompareFunctions( m_tree, functionCall, function, matchedFunction );
-            if (result == Function1Better)
-            {
+
+            CompareFunctionsResult result = CompareFunctions(m_tree, functionCall, function, matchedFunction);
+            if (result == Function1Better) {
                 matchedFunction = function;
                 numMatchedOverloads = 1;
             }
-            else if (result == FunctionsEqual)
-            {
+            else if (result == FunctionsEqual) {
                 ++numMatchedOverloads;
             }
         }
@@ -4785,63 +4512,53 @@ const HLSLFunction* HLSLParser::MatchFunctionCall(const HLSLFunctionCall* functi
 
     // Get the intrinsic functions with the specified name.
     const auto& iter = _intrinsicRangeMap.find(name);
-    if (iter != _intrinsicRangeMap.end())
-    {
+    if (iter != _intrinsicRangeMap.end()) {
         Range range = iter->second;
-        for (int i = 0; i < range.count; ++i)
-        {
+        for (int i = 0; i < range.count; ++i) {
             uint32_t idx = range.start + i;
             const HLSLFunction* function = &_intrinsics[idx].function;
-            
+
             // if caller requests member function, then memberType must match
             bool isMemberFunc = function->IsMemberFunction();
-            if (memberType)
-            {
+            if (memberType) {
                 if (!isMemberFunc)
                     break;
-            
+
                 if (memberType->baseType != function->memberType)
                     continue;
-                
+
                 if (memberType->formatType != GetScalarType(function->returnType.baseType))
                     continue;
             }
-            else
-            {
+            else {
                 if (isMemberFunc)
                     break;
             }
             ASSERT(String_Equal(function->name, name));
-                   
+
             nameMatches = true;
-            
-            CompareFunctionsResult result = CompareFunctions( m_tree, functionCall, function, matchedFunction );
-            if (result == Function1Better)
-            {
+
+            CompareFunctionsResult result = CompareFunctions(m_tree, functionCall, function, matchedFunction);
+            if (result == Function1Better) {
                 matchedFunction = function;
                 numMatchedOverloads = 1;
             }
-            else if (result == FunctionsEqual)
-            {
+            else if (result == FunctionsEqual) {
                 ++numMatchedOverloads;
             }
         }
     }
-    
-    if (matchedFunction != NULL && numMatchedOverloads > 1)
-    {
+
+    if (matchedFunction != NULL && numMatchedOverloads > 1) {
         // Multiple overloads match.
         m_tokenizer.Error("'%s' %d overloads have similar conversions", name, numMatchedOverloads);
         return NULL;
     }
-    else if (matchedFunction == NULL)
-    {
-        if (nameMatches)
-        {
+    else if (matchedFunction == NULL) {
+        if (nameMatches) {
             m_tokenizer.Error("'%s' no overloaded function matched all of the arguments", name);
         }
-        else
-        {
+        else {
             m_tokenizer.Error("Undeclared identifier '%s'", name);
         }
     }
@@ -4851,29 +4568,26 @@ const HLSLFunction* HLSLParser::MatchFunctionCall(const HLSLFunctionCall* functi
 
 inline bool IsSwizzle(char c)
 {
-    return c == 'x' || c == 'y' || c == 'z' || c ==  'w' ||
-           c == 'r' || c == 'g' || c == 'b' || c ==  'a';
+    return c == 'x' || c == 'y' || c == 'z' || c == 'w' ||
+           c == 'r' || c == 'g' || c == 'b' || c == 'a';
 }
 
-bool HLSLParser::GetMemberType(const HLSLType& objectType, HLSLMemberAccess * memberAccess)
+bool HLSLParser::GetMemberType(const HLSLType& objectType, HLSLMemberAccess* memberAccess)
 {
     const char* fieldName = memberAccess->field;
 
     HLSLBaseType baseType = objectType.baseType;
 
     // pull field from struct
-    if (baseType == HLSLBaseType_UserDefined)
-    {
-        const HLSLStruct* structure = FindUserDefinedType( objectType.typeName );
+    if (baseType == HLSLBaseType_UserDefined) {
+        const HLSLStruct* structure = FindUserDefinedType(objectType.typeName);
         ASSERT(structure != NULL);
         if (structure == NULL)
             return false;
-        
+
         const HLSLStructField* field = structure->field;
-        while (field != NULL)
-        {
-            if (field->name == fieldName)
-            {
+        while (field != NULL) {
+            if (field->name == fieldName) {
                 memberAccess->expressionType = field->type;
                 return true;
             }
@@ -4883,21 +4597,17 @@ bool HLSLParser::GetMemberType(const HLSLType& objectType, HLSLMemberAccess * me
         return false;
     }
 
-    if (baseTypeDescriptions[objectType.baseType].numericType == NumericType_NaN)
-    {
+    if (baseTypeDescriptions[objectType.baseType].numericType == NumericType_NaN) {
         // Currently we don't have an non-numeric types that allow member access.
         return false;
     }
 
     int swizzleLength = 0;
 
-    if (IsScalarType(baseType) || IsVectorType(baseType))
-    {
+    if (IsScalarType(baseType) || IsVectorType(baseType)) {
         // Check for a swizzle on the scalar/vector types.
-        for (int i = 0; fieldName[i] != 0; ++i)
-        {
-            if (!IsSwizzle(fieldName[i]))
-            {
+        for (int i = 0; fieldName[i] != 0; ++i) {
+            if (!IsSwizzle(fieldName[i])) {
                 m_tokenizer.Error("Invalid swizzle '%s'", fieldName);
                 return false;
             }
@@ -4907,102 +4617,89 @@ bool HLSLParser::GetMemberType(const HLSLType& objectType, HLSLMemberAccess * me
         if (swizzleLength == 0)
             return false;
     }
-    else if (IsMatrixType(baseType))
-    {
-
+    else if (IsMatrixType(baseType)) {
         // Check for a matrix element access (e.g. _m00 or _11)
 
         const char* n = fieldName;
-        while (n[0] == '_')
-        {
+        while (n[0] == '_') {
             ++n;
             int base = 1;
-            if (n[0] == 'm')
-            {
+            if (n[0] == 'm') {
                 base = 0;
                 ++n;
             }
-            if (!isdigit(n[0]) || !isdigit(n[1]))
-            {
+            if (!isdigit(n[0]) || !isdigit(n[1])) {
                 m_tokenizer.Error("Invalid matrix digit");
                 return false;
             }
 
             int r = (n[0] - '0') - base;
             int c = (n[1] - '0') - base;
-            if (r >= baseTypeDescriptions[objectType.baseType].height)
-            {
+            if (r >= baseTypeDescriptions[objectType.baseType].height) {
                 m_tokenizer.Error("Invalid matrix dimension %d", r);
                 return false;
             }
-            if (c >= baseTypeDescriptions[objectType.baseType].numComponents)
-            {
+            if (c >= baseTypeDescriptions[objectType.baseType].numComponents) {
                 m_tokenizer.Error("Invalid matrix dimension %d", c);
                 return false;
             }
             ++swizzleLength;
             n += 2;
-
         }
 
-        if (n[0] != 0)
-        {
+        if (n[0] != 0) {
             return false;
         }
-
     }
-    else
-    {
+    else {
         return false;
     }
 
-    if (swizzleLength > 4)
-    {
+    if (swizzleLength > 4) {
         m_tokenizer.Error("Invalid swizzle '%s'", fieldName);
         return false;
     }
- 
-    switch (baseTypeDescriptions[objectType.baseType].numericType)
-    {
-    case NumericType_Float:
-        memberAccess->expressionType.baseType = (HLSLBaseType)(HLSLBaseType_Float + swizzleLength - 1);
-        break;
-    case NumericType_Half:
-        memberAccess->expressionType.baseType = (HLSLBaseType)(HLSLBaseType_Half + swizzleLength - 1);
-        break;
-    case NumericType_Double:
-        memberAccess->expressionType.baseType = (HLSLBaseType)(HLSLBaseType_Double + swizzleLength - 1);
-        break;
-        
-    case NumericType_Int:
-        memberAccess->expressionType.baseType = (HLSLBaseType)(HLSLBaseType_Int + swizzleLength - 1);
-        break;
-    case NumericType_Uint:
-        memberAccess->expressionType.baseType = (HLSLBaseType)(HLSLBaseType_Uint + swizzleLength - 1);
+
+    switch (baseTypeDescriptions[objectType.baseType].numericType) {
+        case NumericType_Float:
+            memberAccess->expressionType.baseType = (HLSLBaseType)(HLSLBaseType_Float + swizzleLength - 1);
+            break;
+        case NumericType_Half:
+            memberAccess->expressionType.baseType = (HLSLBaseType)(HLSLBaseType_Half + swizzleLength - 1);
             break;
-    case NumericType_Bool:
-        memberAccess->expressionType.baseType = (HLSLBaseType)(HLSLBaseType_Bool + swizzleLength - 1);
+        case NumericType_Double:
+            memberAccess->expressionType.baseType = (HLSLBaseType)(HLSLBaseType_Double + swizzleLength - 1);
             break;
-    case NumericType_Short:
-        memberAccess->expressionType.baseType = (HLSLBaseType)(HLSLBaseType_Short + swizzleLength - 1);
+
+        case NumericType_Int:
+            memberAccess->expressionType.baseType = (HLSLBaseType)(HLSLBaseType_Int + swizzleLength - 1);
+            break;
+        case NumericType_Uint:
+            memberAccess->expressionType.baseType = (HLSLBaseType)(HLSLBaseType_Uint + swizzleLength - 1);
+            break;
+        case NumericType_Bool:
+            memberAccess->expressionType.baseType = (HLSLBaseType)(HLSLBaseType_Bool + swizzleLength - 1);
+            break;
+        case NumericType_Short:
+            memberAccess->expressionType.baseType = (HLSLBaseType)(HLSLBaseType_Short + swizzleLength - 1);
             break;
-    case NumericType_Ushort:
-        memberAccess->expressionType.baseType = (HLSLBaseType)(HLSLBaseType_Ushort + swizzleLength - 1);
+        case NumericType_Ushort:
+            memberAccess->expressionType.baseType = (HLSLBaseType)(HLSLBaseType_Ushort + swizzleLength - 1);
             break;
-    case NumericType_Long:
-        memberAccess->expressionType.baseType = (HLSLBaseType)(HLSLBaseType_Long + swizzleLength - 1);
+        case NumericType_Long:
+            memberAccess->expressionType.baseType = (HLSLBaseType)(HLSLBaseType_Long + swizzleLength - 1);
             break;
-    case NumericType_Ulong:
-        memberAccess->expressionType.baseType = (HLSLBaseType)(HLSLBaseType_Ulong + swizzleLength - 1);
+        case NumericType_Ulong:
+            memberAccess->expressionType.baseType = (HLSLBaseType)(HLSLBaseType_Ulong + swizzleLength - 1);
             break;
-    // TODO: u/char
-    default:
-        ASSERT(false);
+        // TODO: u/char
+        default:
+            ASSERT(false);
     }
 
     memberAccess->swizzle = true;
-    
+
     return true;
 }
 
-}
+} //namespace M4
diff --git a/hlslparser/src/HLSLParser.h b/hlslparser/src/HLSLParser.h
index 1a09da60..f963babf 100644
--- a/hlslparser/src/HLSLParser.h
+++ b/hlslparser/src/HLSLParser.h
@@ -10,35 +10,28 @@
 #pragma once
 
 #include "Engine.h"
-
 #include "HLSLTokenizer.h"
 #include "HLSLTree.h"
 
-namespace M4
-{
+namespace M4 {
 
 struct EffectState;
 
 // This wouldn't be needed if could preprocess prior to calling parser.
-struct HLSLParserOptions
-{
+struct HLSLParserOptions {
     bool isHalfst = false;
-    
+
     bool isHalfio = false;
 };
 
-class HLSLParser
-{
-
+class HLSLParser {
 public:
-
     HLSLParser(Allocator* allocator, const char* fileName, const char* buffer, size_t length);
     void SetKeepComments(bool enable) { m_tokenizer.SetKeepComments(enable); }
-    
+
     bool Parse(HLSLTree* tree, const HLSLParserOptions& options = HLSLParserOptions());
 
 private:
-
     bool Accept(int token);
     bool Expect(int token);
 
@@ -53,14 +46,14 @@ class HLSLParser
     bool AcceptIdentifier(const char*& identifier);
     bool ExpectIdentifier(const char*& identifier);
     bool AcceptFloat(float& value);
-	bool AcceptHalf( float& value );
+    bool AcceptHalf(float& value);
     bool AcceptInt(int& value);
     bool AcceptType(bool allowVoid, HLSLType& type);
     bool ExpectType(bool allowVoid, HLSLType& type);
     bool AcceptBinaryOperator(int priority, HLSLBinaryOp& binaryOp);
     bool AcceptUnaryOperator(bool pre, HLSLUnaryOp& unaryOp);
     bool AcceptAssign(HLSLBinaryOp& binaryOp);
-    bool AcceptTypeModifier(int & typeFlags);
+    bool AcceptTypeModifier(int& typeFlags);
     bool AcceptInterpolationModifier(int& flags);
 
     /**
@@ -85,18 +78,18 @@ class HLSLParser
     bool ParseDeclarationAssignment(HLSLDeclaration* declaration);
     bool ParsePartialConstructor(HLSLExpression*& expression, HLSLBaseType type, const char* typeName);
 
-    bool ParseStateName(bool isSamplerState, bool isPipelineState, const char*& name, const EffectState *& state);
+    bool ParseStateName(bool isSamplerState, bool isPipelineState, const char*& name, const EffectState*& state);
     bool ParseColorMask(int& mask);
-    
-// FX file
-//    bool ParseStateValue(const EffectState * state, HLSLStateAssignment* stateAssignment);
-//    bool ParseStateAssignment(HLSLStateAssignment*& stateAssignment, bool isSamplerState, bool isPipelineState);
-//    bool ParseSamplerState(HLSLExpression*& expression);
-//    bool ParseTechnique(HLSLStatement*& statement);
-//    bool ParsePass(HLSLPass*& pass);
-//    bool ParsePipeline(HLSLStatement*& pipeline);
-//    bool ParseStage(HLSLStatement*& stage);
-    
+
+    // FX file
+    //    bool ParseStateValue(const EffectState * state, HLSLStateAssignment* stateAssignment);
+    //    bool ParseStateAssignment(HLSLStateAssignment*& stateAssignment, bool isSamplerState, bool isPipelineState);
+    //    bool ParseSamplerState(HLSLExpression*& expression);
+    //    bool ParseTechnique(HLSLStatement*& statement);
+    //    bool ParsePass(HLSLPass*& pass);
+    //    bool ParsePipeline(HLSLStatement*& pipeline);
+    //    bool ParseStage(HLSLStatement*& stage);
+
     bool ParseComment(HLSLStatement*& statement);
 
     bool ParseAttributeList(HLSLAttribute*& attribute);
@@ -111,20 +104,20 @@ class HLSLParser
 
     void DeclareVariable(const char* name, const HLSLType& type);
 
-    /// Returned pointer is only valid until Declare or Begin/EndScope is called. 
+    /// Returned pointer is only valid until Declare or Begin/EndScope is called.
     const HLSLType* FindVariable(const char* name, bool& global) const;
 
     const HLSLFunction* FindFunction(const char* name) const;
     const HLSLFunction* FindFunction(const HLSLFunction* fun) const;
 
     bool GetIsFunction(const char* name) const;
-    
+
     /// Finds the overloaded function that matches the specified call.
     /// Pass memberType to match member functions.
     const HLSLFunction* MatchFunctionCall(const HLSLFunctionCall* functionCall, const char* name, const HLSLType* memberType = NULL);
 
     /// Gets the type of the named field on the specified object type (fieldName can also specify a swizzle. )
-    bool GetMemberType(const HLSLType& objectType, HLSLMemberAccess * memberAccess);
+    bool GetMemberType(const HLSLType& objectType, HLSLMemberAccess* memberAccess);
 
     bool CheckTypeCast(const HLSLType& srcType, const HLSLType& dstType);
 
@@ -132,33 +125,30 @@ class HLSLParser
     int GetLineNumber() const;
 
 private:
-
-    struct Variable
-    {
-        const char*     name;
-        HLSLType        type;
+    struct Variable {
+        const char* name;
+        HLSLType type;
     };
 
-    HLSLTokenizer           m_tokenizer;
-    Array<HLSLStruct*>      m_userTypes;
-    Array<Variable>         m_variables;
-    Array<HLSLFunction*>    m_functions;
-    int                     m_numGlobals;
-
-    HLSLTree*               m_tree;
-    
-    bool                    m_allowUndeclaredIdentifiers = false;
-    bool                    m_disableSemanticValidation = false;
-    
-    HLSLParserOptions       m_options;
+    HLSLTokenizer m_tokenizer;
+    Array<HLSLStruct*> m_userTypes;
+    Array<Variable> m_variables;
+    Array<HLSLFunction*> m_functions;
+    int m_numGlobals;
+
+    HLSLTree* m_tree;
+
+    bool m_allowUndeclaredIdentifiers = false;
+    bool m_disableSemanticValidation = false;
+
+    HLSLParserOptions m_options;
 };
 
-enum NumericType
-{
+enum NumericType {
     NumericType_Float,
     NumericType_Half,
     NumericType_Double, // not in MSL
-    
+
     NumericType_Bool,
     NumericType_Int,
     NumericType_Uint,
@@ -166,13 +156,13 @@ enum NumericType
     NumericType_Ushort,
     NumericType_Ulong,
     NumericType_Long,
-    
+
     // TODO: HLSL doesn't have byte/ubyte, MSL does
     // NumericType_UByte,
     // NumericType_Byte,
-    
+
     NumericType_Count,
-    
+
     NumericType_NaN, // not in count?
 };
 
@@ -226,4 +216,4 @@ HLSLBaseType GetScalarType(HLSLBaseType type);
 // returns 1 for scalar or 2/3/4 for vector types.
 int32_t GetVectorDimension(HLSLBaseType type);
 
-}
+} //namespace M4
diff --git a/hlslparser/src/HLSLTokenizer.cpp b/hlslparser/src/HLSLTokenizer.cpp
index 965fb6ea..c8d89983 100644
--- a/hlslparser/src/HLSLTokenizer.cpp
+++ b/hlslparser/src/HLSLTokenizer.cpp
@@ -1,170 +1,177 @@
 #include "HLSLTokenizer.h"
 
-#include "Engine.h"
-
 #include <ctype.h>
+#include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <stdarg.h>
 
-namespace M4
-{
+#include "Engine.h"
+
+namespace M4 {
 // The order here must match the order in the Token enum.
 static const char* _reservedWords[] =
-{
-    "float",
-    "float2",
-    "float3",
-    "float4",
-    "float2x2",
-    "float3x3",
-    "float4x4",
-    
-    // for Nvidia/Adreno
-    "halfio",
-    "half2io",
-    "half3io",
-    "half4io",
-    
-    // for Android
-    "halfst",
-    "half2st",
-    "half3st",
-    "half4st",
-    
-    "half",
-    "half2",
-    "half3",
-    "half4",
-    "half2x2",
-    "half3x3",
-    "half4x4",
-    
-    "double",
-    "double2",
-    "double3",
-    "double4",
-    "double2x2",
-    "double3x3",
-    "double4x4",
-    
-    "bool",
-    "bool2",
-    "bool3",
-    "bool4",
-    
-    "int",
-    "int2",
-    "int3",
-    "int4",
-    
-    "uint",
-    "uint2",
-    "uint3",
-    "uint4",
-    
-    "short",
-    "short2",
-    "short3",
-    "short4",
-    
-    "ushort",
-    "ushort2",
-    "ushort3",
-    "ushort4",
-    
-    "long",
-    "long2",
-    "long3",
-    "long4",
-    
-    "ulong",
-    "ulong2",
-    "ulong3",
-    "ulong4",
-    
-    // TODO: u/char
-    
-    "Texture2D",
-    "Texture3D",
-    "TextureCube",
-    "Texture2DArray",
-    "TextureCubeArray",
-    "Texture2DMS",
-    
-    "Depth2D",
-    "Depth2DArray", // cascades
-    "DepthCube",
-    
-    "RWTexture2D",
-    
-    "SamplerState",
-    "SamplerComparisonState",
-
-    "if",
-    "else",
-    "for",
-    "while",
-    "break",
-    "true",
-    "false",
-    "void",
-    "struct",
-    
-    // DX9 buffer types (tons of globals)
-    "cbuffer",
-    "tbuffer",
-    
-    // DX10 buffer templated types
-    "ConstantBuffer", // indexable cbuffer
-    "StructuredBuffer",
-    "RWStructuredBuffer",
-    "ByteAddressBuffer",
-    "RWByteAddressBuffer",
-    
-    "register",
-    "return",
-    "continue",
-    "discard",
-    
-    "const",
-    "static",
-    "inline",
-
-    "uniform",
-    "in",
-    "out",
-    "inout",
-    
-    "#include",
-    
-    // these are from fx file
-    //"sampler_state",
-    //"technique",
-    //"pass",
+    {
+        "float",
+        "float2",
+        "float3",
+        "float4",
+        "float2x2",
+        "float3x3",
+        "float4x4",
+
+        // for Nvidia/Adreno
+        "halfio",
+        "half2io",
+        "half3io",
+        "half4io",
+
+        // for Android
+        "halfst",
+        "half2st",
+        "half3st",
+        "half4st",
+
+        "half",
+        "half2",
+        "half3",
+        "half4",
+        "half2x2",
+        "half3x3",
+        "half4x4",
+
+        "double",
+        "double2",
+        "double3",
+        "double4",
+        "double2x2",
+        "double3x3",
+        "double4x4",
+
+        "bool",
+        "bool2",
+        "bool3",
+        "bool4",
+
+        "int",
+        "int2",
+        "int3",
+        "int4",
+
+        "uint",
+        "uint2",
+        "uint3",
+        "uint4",
+
+        "short",
+        "short2",
+        "short3",
+        "short4",
+
+        "ushort",
+        "ushort2",
+        "ushort3",
+        "ushort4",
+
+        "long",
+        "long2",
+        "long3",
+        "long4",
+
+        "ulong",
+        "ulong2",
+        "ulong3",
+        "ulong4",
+
+        // TODO: u/char
+
+        "Texture2D",
+        "Texture3D",
+        "TextureCube",
+        "Texture2DArray",
+        "TextureCubeArray",
+        "Texture2DMS",
+
+        "Depth2D",
+        "Depth2DArray", // cascades
+        "DepthCube",
+
+        "RWTexture2D",
+
+        "SamplerState",
+        "SamplerComparisonState",
+
+        "if",
+        "else",
+        "for",
+        "while",
+        "break",
+        "true",
+        "false",
+        "void",
+        "struct",
+
+        // DX9 buffer types (tons of globals)
+        "cbuffer",
+        "tbuffer",
+
+        // DX10 buffer templated types
+        "ConstantBuffer", // indexable cbuffer
+        "StructuredBuffer",
+        "RWStructuredBuffer",
+        "ByteAddressBuffer",
+        "RWByteAddressBuffer",
+
+        "register",
+        "return",
+        "continue",
+        "discard",
+
+        "const",
+        "static",
+        "inline",
+
+        "uniform",
+        "in",
+        "out",
+        "inout",
+
+        "#include",
+
+        // these are from fx file
+        //"sampler_state",
+        //"technique",
+        //"pass",
 };
 
 static bool GetIsSymbol(char c)
 {
-    switch (c)
-    {
-    case ';':
-    case ':':
-    case '(': case ')':
-    case '[': case ']':
-    case '{': case '}':
-    case '-': case '+':
-    case '*': case '/':
-    case '?':
-    case '!':
-    case ',':
-    case '=':
-    case '.':
-    case '<': case '>':
-    case '|': case '&': case '^': case '~':
-    case '@':
-        return true;
+    switch (c) {
+        case ';':
+        case ':':
+        case '(':
+        case ')':
+        case '[':
+        case ']':
+        case '{':
+        case '}':
+        case '-':
+        case '+':
+        case '*':
+        case '/':
+        case '?':
+        case '!':
+        case ',':
+        case '=':
+        case '.':
+        case '<':
+        case '>':
+        case '|':
+        case '&':
+        case '^':
+        case '~':
+        case '@':
+            return true;
     }
     return false;
 }
@@ -177,31 +184,28 @@ static bool GetIsNumberSeparator(char c)
 
 HLSLTokenizer::HLSLTokenizer(const char* fileName, const char* buffer, size_t length)
 {
-    m_buffer            = buffer;
-    m_bufferEnd         = buffer + length;
-    m_fileName          = fileName;
-    m_lineNumber        = 1;
-    m_tokenLineNumber   = 1;
-    m_error             = false;
+    m_buffer = buffer;
+    m_bufferEnd = buffer + length;
+    m_fileName = fileName;
+    m_lineNumber = 1;
+    m_tokenLineNumber = 1;
+    m_error = false;
     Next();
 }
 
 void HLSLTokenizer::Next()
 {
-	while(SkipWhitespace() || SkipComment() || ScanLineDirective() || SkipPragmaDirective() || SkipInclude())
-    {
+    while (SkipWhitespace() || SkipComment() || ScanLineDirective() || SkipPragmaDirective() || SkipInclude()) {
     }
 
-    if (m_error)
-    {
+    if (m_error) {
         m_token = HLSLToken_EndOfStream;
         return;
     }
 
     m_tokenLineNumber = m_lineNumber;
 
-    if (m_buffer >= m_bufferEnd || *m_buffer == '\0')
-    {
+    if (m_buffer >= m_bufferEnd || *m_buffer == '\0') {
         m_token = HLSLToken_EndOfStream;
         return;
     }
@@ -209,168 +213,144 @@ void HLSLTokenizer::Next()
     const char* start = m_buffer;
 
     // single line comments
-    if (m_keepComments && (m_buffer[0] == '/' && m_buffer[1] == '/'))
-    {
+    if (m_keepComments && (m_buffer[0] == '/' && m_buffer[1] == '/')) {
         m_token = HLSLToken_Comment;
         m_buffer += 2;
-        
+
         m_comment[0] = 0;
-        
+
         // How to count the remaining string as tokens of the comment
         // typically expecting a single string, not a sequence of strings.
-        
+
         // skip the newline too, but would need to increment lineNumber
         uint32_t commentLen = 0;
-        while (m_buffer < m_bufferEnd)
-        {
-            if (*(m_buffer) == '\n')
-            {
+        while (m_buffer < m_bufferEnd) {
+            if (*(m_buffer) == '\n') {
                 m_buffer++;
                 m_lineNumber++;
                 break;
             }
-            
+
             // store comment to temporary string
             if (commentLen < (s_maxComment - 1))
                 m_comment[commentLen++] = *m_buffer;
-            
+
             m_buffer++;
         }
-    
+
         m_comment[commentLen] = 0;
-        
+
         return;
     }
-    
+
     // +=, -=, *=, /=, ==, <=, >=
-    if (m_buffer[0] == '+' && m_buffer[1] == '=')
-    {
+    if (m_buffer[0] == '+' && m_buffer[1] == '=') {
         m_token = HLSLToken_PlusEqual;
         m_buffer += 2;
         return;
     }
-    else if (m_buffer[0] == '-' && m_buffer[1] == '=')
-    {
+    else if (m_buffer[0] == '-' && m_buffer[1] == '=') {
         m_token = HLSLToken_MinusEqual;
         m_buffer += 2;
         return;
     }
-    else if (m_buffer[0] == '*' && m_buffer[1] == '=')
-    {
+    else if (m_buffer[0] == '*' && m_buffer[1] == '=') {
         m_token = HLSLToken_TimesEqual;
         m_buffer += 2;
         return;
     }
-    else if (m_buffer[0] == '/' && m_buffer[1] == '=')
-    {
+    else if (m_buffer[0] == '/' && m_buffer[1] == '=') {
         m_token = HLSLToken_DivideEqual;
         m_buffer += 2;
         return;
     }
-    else if (m_buffer[0] == '=' && m_buffer[1] == '=')
-    {
+    else if (m_buffer[0] == '=' && m_buffer[1] == '=') {
         m_token = HLSLToken_EqualEqual;
         m_buffer += 2;
         return;
     }
-    else if (m_buffer[0] == '!' && m_buffer[1] == '=')
-    {
+    else if (m_buffer[0] == '!' && m_buffer[1] == '=') {
         m_token = HLSLToken_NotEqual;
         m_buffer += 2;
         return;
     }
-    else if (m_buffer[0] == '<' && m_buffer[1] == '=')
-    {
+    else if (m_buffer[0] == '<' && m_buffer[1] == '=') {
         m_token = HLSLToken_LessEqual;
         m_buffer += 2;
         return;
     }
-    else if (m_buffer[0] == '>' && m_buffer[1] == '=')
-    {
+    else if (m_buffer[0] == '>' && m_buffer[1] == '=') {
         m_token = HLSLToken_GreaterEqual;
         m_buffer += 2;
         return;
     }
-    else if (m_buffer[0] == '&' && m_buffer[1] == '&')
-    {
+    else if (m_buffer[0] == '&' && m_buffer[1] == '&') {
         m_token = HLSLToken_LogicalAnd;
         m_buffer += 2;
         return;
     }
-    else if (m_buffer[0] == '|' && m_buffer[1] == '|')
-    {
+    else if (m_buffer[0] == '|' && m_buffer[1] == '|') {
         m_token = HLSLToken_LogicalOr;
         m_buffer += 2;
         return;
     }
 
     // ++, --
-    if ((m_buffer[0] == '-' && m_buffer[1] == '-'))
-    {
+    if ((m_buffer[0] == '-' && m_buffer[1] == '-')) {
         m_token = HLSLToken_MinusMinus;
         m_buffer += 2;
         return;
     }
-    if ((m_buffer[0] == '+' && m_buffer[1] == '+'))
-    {
+    if ((m_buffer[0] == '+' && m_buffer[1] == '+')) {
         m_token = HLSLToken_PlusPlus;
         m_buffer += 2;
         return;
     }
-    
+
     // Check for the start of a number.
-    if (ScanNumber())
-    {
+    if (ScanNumber()) {
         return;
     }
-    
-    if (GetIsSymbol(m_buffer[0]))
-    {
+
+    if (GetIsSymbol(m_buffer[0])) {
         m_token = static_cast<unsigned char>(m_buffer[0]);
         ++m_buffer;
         return;
     }
 
     // Must be an identifier or a reserved word.
-    while (m_buffer < m_bufferEnd && m_buffer[0] != 0 && !GetIsSymbol(m_buffer[0]) && !isspace(m_buffer[0]))
-    {
+    while (m_buffer < m_bufferEnd && m_buffer[0] != 0 && !GetIsSymbol(m_buffer[0]) && !isspace(m_buffer[0])) {
         ++m_buffer;
     }
 
     size_t length = m_buffer - start;
     memcpy(m_identifier, start, length);
     m_identifier[length] = 0;
-    
+
     const int numReservedWords = sizeof(_reservedWords) / sizeof(const char*);
-    for (int i = 0; i < numReservedWords; ++i)
-    {
+    for (int i = 0; i < numReservedWords; ++i) {
         // TODO: remove O(N) search of strings, need unordered_map
-        if (String_Equal(_reservedWords[i], m_identifier))
-        {
+        if (String_Equal(_reservedWords[i], m_identifier)) {
             m_token = 256 + i;
             return;
         }
     }
 
     m_token = HLSLToken_Identifier;
-
 }
 
 bool HLSLTokenizer::SkipInclude()
 {
     bool result = false;
-    
+
     static const char* keyword = "#include";
     static uint32_t keywordLen = (uint32_t)strlen(keyword);
-    
-    if( strncmp( m_buffer, keyword, keywordLen ) == 0 && isspace( m_buffer[ keywordLen ] ) )
-    {
+
+    if (strncmp(m_buffer, keyword, keywordLen) == 0 && isspace(m_buffer[keywordLen])) {
         m_buffer += keywordLen;
         result = true;
-        while( m_buffer < m_bufferEnd )
-        {
-            if( *( m_buffer++ ) == '\n' )
-            {
+        while (m_buffer < m_bufferEnd) {
+            if (*(m_buffer++) == '\n') {
                 ++m_lineNumber;
                 break;
             }
@@ -379,15 +359,12 @@ bool HLSLTokenizer::SkipInclude()
     return result;
 }
 
-    
 bool HLSLTokenizer::SkipWhitespace()
 {
     bool result = false;
-    while (m_buffer < m_bufferEnd && isspace(m_buffer[0]))
-    {
+    while (m_buffer < m_bufferEnd && isspace(m_buffer[0])) {
         result = true;
-        if (m_buffer[0] == '\n')
-        {
+        if (m_buffer[0] == '\n') {
             ++m_lineNumber;
         }
         ++m_buffer;
@@ -398,41 +375,32 @@ bool HLSLTokenizer::SkipWhitespace()
 bool HLSLTokenizer::SkipComment()
 {
     bool result = false;
-    if (m_buffer[0] == '/')
-    {
-        if ((!m_keepComments) && m_buffer[1] == '/')
-        {
+    if (m_buffer[0] == '/') {
+        if ((!m_keepComments) && m_buffer[1] == '/') {
             // Single line comment.
             result = true;
             m_buffer += 2;
-            while (m_buffer < m_bufferEnd)
-            {
-                if (*(m_buffer++) == '\n')
-                {
+            while (m_buffer < m_bufferEnd) {
+                if (*(m_buffer++) == '\n') {
                     ++m_lineNumber;
                     break;
                 }
             }
         }
-        else if (m_buffer[1] == '*')
-        {
+        else if (m_buffer[1] == '*') {
             // Multi-line comment.
             result = true;
             m_buffer += 2;
-            while (m_buffer < m_bufferEnd)
-            {
-                if (m_buffer[0] == '\n')
-                {
+            while (m_buffer < m_bufferEnd) {
+                if (m_buffer[0] == '\n') {
                     ++m_lineNumber;
                 }
-                if (m_buffer[0] == '*' && m_buffer[1] == '/')
-                {
+                if (m_buffer[0] == '*' && m_buffer[1] == '/') {
                     break;
                 }
                 ++m_buffer;
             }
-            if (m_buffer < m_bufferEnd)
-            {
+            if (m_buffer < m_bufferEnd) {
                 m_buffer += 2;
             }
         }
@@ -442,46 +410,39 @@ bool HLSLTokenizer::SkipComment()
 
 bool HLSLTokenizer::SkipPragmaDirective()
 {
-	bool result = false;
-	
+    bool result = false;
+
     static const char* keyword = "#include";
     static uint32_t keywordLen = (uint32_t)strlen(keyword);
 
-    if( strncmp( m_buffer, keyword, keywordLen ) == 0 && isspace( m_buffer[ keywordLen ] ) )
-    {
+    if (strncmp(m_buffer, keyword, keywordLen) == 0 && isspace(m_buffer[keywordLen])) {
         m_buffer += keywordLen;
         result = true;
-        while( m_buffer < m_bufferEnd )
-        {
-            if( *( m_buffer++ ) == '\n' )
-            {
+        while (m_buffer < m_bufferEnd) {
+            if (*(m_buffer++) == '\n') {
                 ++m_lineNumber;
                 break;
             }
         }
     }
 
-	return result;
+    return result;
 }
 
 bool HLSLTokenizer::ScanNumber()
 {
-
     // Don't treat the + or - as part of the number.
-    if (m_buffer[0] == '+' || m_buffer[0] == '-')
-    {
+    if (m_buffer[0] == '+' || m_buffer[0] == '-') {
         return false;
     }
 
     // Parse hex literals.
-    if (m_bufferEnd - m_buffer > 2 && m_buffer[0] == '0' && m_buffer[1] == 'x')
-    {
-        char*   hEnd = NULL;
-        int     iValue = (int)String_ToIntHex(m_buffer+2, &hEnd);
-        if (GetIsNumberSeparator(hEnd[0]))
-        {
+    if (m_bufferEnd - m_buffer > 2 && m_buffer[0] == '0' && m_buffer[1] == 'x') {
+        char* hEnd = NULL;
+        int iValue = (int)String_ToIntHex(m_buffer + 2, &hEnd);
+        if (GetIsNumberSeparator(hEnd[0])) {
             m_buffer = hEnd;
-            m_token  = HLSLToken_IntLiteral; // TODO: handle uint, etc.
+            m_token = HLSLToken_IntLiteral; // TODO: handle uint, etc.
             m_iValue = iValue;
             return true;
         }
@@ -490,37 +451,33 @@ bool HLSLTokenizer::ScanNumber()
     char* fEnd = NULL;
     double fValue = String_ToDouble(m_buffer, &fEnd);
 
-    if (fEnd == m_buffer)
-    {
+    if (fEnd == m_buffer) {
         return false;
     }
 
-    char*  iEnd = NULL;
-    int    iValue = String_ToInt(m_buffer, &iEnd);
+    char* iEnd = NULL;
+    int iValue = String_ToInt(m_buffer, &iEnd);
 
     // TODO: handle lf, etc.  Double not really worth adding, since it's
     // so hobbled.
-    
+
     // If the character after the number is an f then the f is treated as part
     // of the number (to handle 1.0f syntax).
     bool isHalf = false;
-	if( ( fEnd[ 0 ] == 'f' || fEnd[ 0 ] == 'h' ) && fEnd < m_bufferEnd )
-	{
-        isHalf = fEnd[ 0 ] == 'h';
+    if ((fEnd[0] == 'f' || fEnd[0] == 'h') && fEnd < m_bufferEnd) {
+        isHalf = fEnd[0] == 'h';
         ++fEnd;
-	}
+    }
 
-	if( fEnd > iEnd && GetIsNumberSeparator( fEnd[ 0 ] ) )
-	{
-		m_buffer = fEnd;
-        m_token = ( isHalf || fEnd[ 0 ] == 'h' ) ? HLSLToken_HalfLiteral : HLSLToken_FloatLiteral;
+    if (fEnd > iEnd && GetIsNumberSeparator(fEnd[0])) {
+        m_buffer = fEnd;
+        m_token = (isHalf || fEnd[0] == 'h') ? HLSLToken_HalfLiteral : HLSLToken_FloatLiteral;
         m_fValue = static_cast<float>(fValue);
         return true;
     }
-    else if (iEnd > m_buffer && GetIsNumberSeparator(iEnd[0]))
-    {
+    else if (iEnd > m_buffer && GetIsNumberSeparator(iEnd[0])) {
         m_buffer = iEnd;
-        m_token  = HLSLToken_IntLiteral; // TODO: uint/short/ushort
+        m_token = HLSLToken_IntLiteral; // TODO: uint/short/ushort
         m_iValue = iValue;
         return true;
     }
@@ -532,15 +489,12 @@ bool HLSLTokenizer::ScanLineDirective()
 {
     static const char* keyword = "#line";
     static uint32_t keywordLen = (uint32_t)strlen(keyword);
-    
-    if (strncmp(m_buffer, keyword, keywordLen) == 0 && isspace(m_buffer[keywordLen]))
-    {
+
+    if (strncmp(m_buffer, keyword, keywordLen) == 0 && isspace(m_buffer[keywordLen])) {
         m_buffer += keywordLen;
-        
-        while (m_buffer < m_bufferEnd && isspace(m_buffer[0]))
-        {
-            if (m_buffer[0] == '\n')
-            {
+
+        while (m_buffer < m_bufferEnd && isspace(m_buffer[0])) {
+            if (m_buffer[0] == '\n') {
                 Error("Syntax error: expected line number after #line");
                 return false;
             }
@@ -550,43 +504,36 @@ bool HLSLTokenizer::ScanLineDirective()
         char* iEnd = NULL;
         int lineNumber = String_ToInt(m_buffer, &iEnd);
 
-        if (!isspace(*iEnd))
-        {
+        if (!isspace(*iEnd)) {
             Error("Syntax error: expected line number after #line");
             return false;
         }
 
         m_buffer = iEnd;
-        while (m_buffer < m_bufferEnd && isspace(m_buffer[0]))
-        {
+        while (m_buffer < m_bufferEnd && isspace(m_buffer[0])) {
             char c = m_buffer[0];
             ++m_buffer;
-            if (c == '\n')
-            {
+            if (c == '\n') {
                 m_lineNumber = lineNumber;
                 return true;
             }
         }
 
-        if (m_buffer >= m_bufferEnd)
-        {
+        if (m_buffer >= m_bufferEnd) {
             m_lineNumber = lineNumber;
             return true;
         }
 
-        if (m_buffer[0] != '"')
-        {
+        if (m_buffer[0] != '"') {
             Error("Syntax error: expected '\"' after line number near #line");
             return false;
         }
-            
+
         ++m_buffer;
-        
+
         int i = 0;
-        while (i + 1 < s_maxIdentifier && m_buffer < m_bufferEnd && m_buffer[0] != '"')
-        {
-            if (m_buffer[0] == '\n')
-            {
+        while (i + 1 < s_maxIdentifier && m_buffer < m_bufferEnd && m_buffer[0] != '"') {
+            if (m_buffer[0] == '\n') {
                 Error("Syntax error: expected '\"' before end of line near #line");
                 return false;
             }
@@ -595,28 +542,24 @@ bool HLSLTokenizer::ScanLineDirective()
             ++m_buffer;
             ++i;
         }
-        
+
         m_lineDirectiveFileName[i] = 0;
-        
-        if (m_buffer >= m_bufferEnd)
-        {
+
+        if (m_buffer >= m_bufferEnd) {
             Error("Syntax error: expected '\"' before end of file near #line");
             return false;
         }
 
-        if (i + 1 >= s_maxIdentifier)
-        {
+        if (i + 1 >= s_maxIdentifier) {
             Error("Syntax error: file name too long near #line");
             return false;
         }
 
         // Skip the closing quote
         ++m_buffer;
-        
-        while (m_buffer < m_bufferEnd && m_buffer[0] != '\n')
-        {
-            if (!isspace(m_buffer[0]))
-            {
+
+        while (m_buffer < m_bufferEnd && m_buffer[0] != '\n') {
+            if (!isspace(m_buffer[0])) {
                 Error("Syntax error: unexpected input after file name near #line");
                 return false;
             }
@@ -630,11 +573,9 @@ bool HLSLTokenizer::ScanLineDirective()
         m_fileName = m_lineDirectiveFileName;
 
         return true;
-
     }
 
     return false;
-
 }
 
 int HLSLTokenizer::GetToken() const
@@ -677,12 +618,11 @@ void HLSLTokenizer::Error(const char* format, ...)
     // It's not always convenient to stop executing when an error occurs,
     // so just track once we've hit an error and stop reporting them until
     // we successfully bail out of execution.
-    if (m_error)
-    {
+    if (m_error) {
         return;
     }
     m_error = true;
-    
+
     va_list args;
     va_start(args, format);
     Log_ErrorArgList(format, args, m_fileName, m_lineNumber);
@@ -694,27 +634,23 @@ void HLSLTokenizer::Error(const char* format, ...)
     // Gcc/lcang convention (must be absolute filename for clickthrough)
     // Visual Stuidio can pick up on this formatting too
     //Log_Error("%s:%d: %s: %s\n", m_fileName, m_lineNumber, isError ? "error" : "warning", buffer);
-} 
+}
 
 void HLSLTokenizer::GetTokenName(char buffer[s_maxIdentifier]) const
 {
-    if (m_token == HLSLToken_FloatLiteral || m_token == HLSLToken_HalfLiteral )
-    {
+    if (m_token == HLSLToken_FloatLiteral || m_token == HLSLToken_HalfLiteral) {
         snprintf(buffer, s_maxIdentifier, "%f", m_fValue);
-        
+
         String_StripTrailingFloatZeroes(buffer);
     }
-    else if (m_token == HLSLToken_IntLiteral)
-    {
+    else if (m_token == HLSLToken_IntLiteral) {
         snprintf(buffer, s_maxIdentifier, "%d", m_iValue);
     }
     // TODO: short/ushort/uint
-    else if (m_token == HLSLToken_Identifier)
-    {
+    else if (m_token == HLSLToken_Identifier) {
         String_Copy(buffer, m_identifier, s_maxIdentifier);
     }
-    else
-    {
+    else {
         GetTokenName(m_token, buffer);
     }
 }
@@ -722,89 +658,84 @@ void HLSLTokenizer::GetTokenName(char buffer[s_maxIdentifier]) const
 void HLSLTokenizer::GetTokenName(int token, char buffer[s_maxIdentifier])
 {
     // ascii
-    if (token < 256)
-    {
+    if (token < 256) {
         buffer[0] = (char)token;
         buffer[1] = 0;
     }
-    else if (token < HLSLToken_LessEqual)
-    {
+    else if (token < HLSLToken_LessEqual) {
         strcpy(buffer, _reservedWords[token - 256]);
     }
-    else
-    {
-        switch (token)
-        {
-        case HLSLToken_PlusPlus:
-            strcpy(buffer, "++");
-            break;
-        case HLSLToken_MinusMinus:
-            strcpy(buffer, "--");
-            break;
-                
-        case HLSLToken_PlusEqual:
-            strcpy(buffer, "+=");
-            break;
-        case HLSLToken_MinusEqual:
-            strcpy(buffer, "-=");
-            break;
-        case HLSLToken_TimesEqual:
-            strcpy(buffer, "*=");
-            break;
-        case HLSLToken_DivideEqual:
-            strcpy(buffer, "/=");
-            break;
-                
-        // DONE: Missing several token types
-        case HLSLToken_LessEqual:
-            strcpy(buffer, "<=");
-            break;
-        case HLSLToken_GreaterEqual:
-            strcpy(buffer, ">=");
-            break;
-        case HLSLToken_EqualEqual:
-            strcpy(buffer, "==");
-            break;
-        case HLSLToken_NotEqual:
-            strcpy(buffer, "!=");
-            break;
-
-        case HLSLToken_LogicalAnd:
-            strcpy(buffer, "&&");
-            break;
-        case HLSLToken_LogicalOr:
-            strcpy(buffer, "||");
-            break;
-                
-        // literals
-		case HLSLToken_HalfLiteral:
-			strcpy( buffer, "half" );
-			break;
-        case HLSLToken_FloatLiteral:
-            strcpy(buffer, "float");
-            break;
-        case HLSLToken_IntLiteral:
-            strcpy(buffer, "int");
-            break;
-        // TODO: need uint, short, ushort
-                
-        case HLSLToken_Identifier:
-            strcpy(buffer, "identifier");
-            break;
-        case HLSLToken_EndOfStream:
-            strcpy(buffer, "<eof>");
-            break;
-                
-        case HLSLToken_Comment:
-            strcpy(buffer, "comment");
-            break;
-            
-        default:
-            strcpy(buffer, "unknown");
-            break;
+    else {
+        switch (token) {
+            case HLSLToken_PlusPlus:
+                strcpy(buffer, "++");
+                break;
+            case HLSLToken_MinusMinus:
+                strcpy(buffer, "--");
+                break;
+
+            case HLSLToken_PlusEqual:
+                strcpy(buffer, "+=");
+                break;
+            case HLSLToken_MinusEqual:
+                strcpy(buffer, "-=");
+                break;
+            case HLSLToken_TimesEqual:
+                strcpy(buffer, "*=");
+                break;
+            case HLSLToken_DivideEqual:
+                strcpy(buffer, "/=");
+                break;
+
+            // DONE: Missing several token types
+            case HLSLToken_LessEqual:
+                strcpy(buffer, "<=");
+                break;
+            case HLSLToken_GreaterEqual:
+                strcpy(buffer, ">=");
+                break;
+            case HLSLToken_EqualEqual:
+                strcpy(buffer, "==");
+                break;
+            case HLSLToken_NotEqual:
+                strcpy(buffer, "!=");
+                break;
+
+            case HLSLToken_LogicalAnd:
+                strcpy(buffer, "&&");
+                break;
+            case HLSLToken_LogicalOr:
+                strcpy(buffer, "||");
+                break;
+
+            // literals
+            case HLSLToken_HalfLiteral:
+                strcpy(buffer, "half");
+                break;
+            case HLSLToken_FloatLiteral:
+                strcpy(buffer, "float");
+                break;
+            case HLSLToken_IntLiteral:
+                strcpy(buffer, "int");
+                break;
+                // TODO: need uint, short, ushort
+
+            case HLSLToken_Identifier:
+                strcpy(buffer, "identifier");
+                break;
+            case HLSLToken_EndOfStream:
+                strcpy(buffer, "<eof>");
+                break;
+
+            case HLSLToken_Comment:
+                strcpy(buffer, "comment");
+                break;
+
+            default:
+                strcpy(buffer, "unknown");
+                break;
         }
     }
-
 }
 
-}
+} //namespace M4
diff --git a/hlslparser/src/HLSLTokenizer.h b/hlslparser/src/HLSLTokenizer.h
index 166309a4..b48d451a 100644
--- a/hlslparser/src/HLSLTokenizer.h
+++ b/hlslparser/src/HLSLTokenizer.h
@@ -2,44 +2,42 @@
 
 #include "Engine.h"
 
-namespace M4
-{
+namespace M4 {
 
 /** In addition to the values in this enum, all of the ASCII characters are
 valid tokens. */
-enum HLSLToken
-{
+enum HLSLToken {
     // The order here must match the order in the _reservedWords
-    
+
     // Built-in types.
-    HLSLToken_Float         = 256,
+    HLSLToken_Float = 256,
     HLSLToken_Float2,
     HLSLToken_Float3,
     HLSLToken_Float4,
-	HLSLToken_Float2x2,
+    HLSLToken_Float2x2,
     HLSLToken_Float3x3,
     HLSLToken_Float4x4,
-    
+
     // for Nvidia/Adreno
     HLSLToken_Halfio,
     HLSLToken_Half2io,
     HLSLToken_Half3io,
     HLSLToken_Half4io,
-    
+
     // for Android w/o fp16 storage
     HLSLToken_Halfst,
     HLSLToken_Half2st,
     HLSLToken_Half3st,
     HLSLToken_Half4st,
-    
+
     HLSLToken_Half,
     HLSLToken_Half2,
     HLSLToken_Half3,
     HLSLToken_Half4,
-	HLSLToken_Half2x2,
+    HLSLToken_Half2x2,
     HLSLToken_Half3x3,
     HLSLToken_Half4x4,
-    
+
     HLSLToken_Double,
     HLSLToken_Double2,
     HLSLToken_Double3,
@@ -47,17 +45,17 @@ enum HLSLToken
     HLSLToken_Double2x2,
     HLSLToken_Double3x3,
     HLSLToken_Double4x4,
-    
+
     HLSLToken_Bool,
-	HLSLToken_Bool2,
-	HLSLToken_Bool3,
-	HLSLToken_Bool4,
-    
+    HLSLToken_Bool2,
+    HLSLToken_Bool3,
+    HLSLToken_Bool4,
+
     HLSLToken_Int,
     HLSLToken_Int2,
     HLSLToken_Int3,
     HLSLToken_Int4,
-    
+
     HLSLToken_Uint,
     HLSLToken_Uint2,
     HLSLToken_Uint3,
@@ -67,22 +65,22 @@ enum HLSLToken
     HLSLToken_Short2,
     HLSLToken_Short3,
     HLSLToken_Short4,
-    
+
     HLSLToken_Ushort,
     HLSLToken_Ushort2,
     HLSLToken_Ushort3,
     HLSLToken_Ushort4,
-    
+
     HLSLToken_Long,
     HLSLToken_Long2,
     HLSLToken_Long3,
     HLSLToken_Long4,
-    
+
     HLSLToken_Ulong,
     HLSLToken_Ulong2,
     HLSLToken_Ulong3,
     HLSLToken_Ulong4,
-    
+
     // TODO: u/char
     HLSLToken_Texture2D,
     HLSLToken_Texture3D,
@@ -90,17 +88,17 @@ enum HLSLToken
     HLSLToken_Texture2DArray,
     HLSLToken_TextureCubeArray,
     HLSLToken_Texture2DMS,
-    
+
     HLSLToken_Depth2D,
     HLSLToken_Depth2DArray,
     HLSLToken_DepthCube,
     // TODO: other depth types
-    
+
     HLSLToken_RWTexture2D,
-    
+
     HLSLToken_SamplerState,
     HLSLToken_SamplerComparisonState,
-    
+
     // Reserved words.
     HLSLToken_If,
     HLSLToken_Else,
@@ -111,11 +109,11 @@ enum HLSLToken
     HLSLToken_False,
     HLSLToken_Void,
     HLSLToken_Struct,
-    
+
     // dx9
     HLSLToken_CBuffer,
     HLSLToken_TBuffer,
-    
+
     // dx10 templated types (TODO: hook to parser and generator)
     HLSLToken_ConstantBuffer,
     HLSLToken_StructuredBuffer,
@@ -125,12 +123,12 @@ enum HLSLToken
     HLSLToken_ByteAddressBuffer,
     HLSLToken_RWByteAddressBuffer,
     // RWTexture, and other types
-    
+
     HLSLToken_Register,
     HLSLToken_Return,
     HLSLToken_Continue,
     HLSLToken_Discard,
-    
+
     HLSLToken_Const,
     HLSLToken_Static,
     HLSLToken_Inline,
@@ -150,10 +148,10 @@ enum HLSLToken
     HLSLToken_Include,
     // HLSLToken_Pragma
     // HLSLToken_Line
-    
+
     //===================
     // End of strings that have to match in _reservedWords in .cpp
-    
+
     // Multi-character symbols.
     HLSLToken_LessEqual,
     HLSLToken_GreaterEqual,
@@ -165,29 +163,26 @@ enum HLSLToken
     HLSLToken_MinusEqual,
     HLSLToken_TimesEqual,
     HLSLToken_DivideEqual,
-    HLSLToken_LogicalAnd,       // &&
-    HLSLToken_LogicalOr,        // ||
-    
+    HLSLToken_LogicalAnd, // &&
+    HLSLToken_LogicalOr, // ||
+
     // Other token types.
     HLSLToken_FloatLiteral,
-	HLSLToken_HalfLiteral,
+    HLSLToken_HalfLiteral,
     HLSLToken_IntLiteral,
-    
+
     HLSLToken_Identifier,
-    HLSLToken_Comment,          // Alec added this
-    
+    HLSLToken_Comment, // Alec added this
+
     HLSLToken_EndOfStream,
 };
 
-class HLSLTokenizer
-{
-
+class HLSLTokenizer {
 public:
-
     /// Maximum string length of an identifier.
     constexpr static int s_maxIdentifier = 255 + 1;
     constexpr static int s_maxComment = 4096;
-    
+
     /// The file name is only used for error reporting.
     HLSLTokenizer(const char* fileName, const char* buffer, size_t length);
 
@@ -199,7 +194,7 @@ class HLSLTokenizer
 
     /// Returns the number of the current token.
     float GetFloat() const;
-    int   GetInt() const;
+    int GetInt() const;
 
     /// Returns the identifier for the current token.
     const char* GetIdentifier() const;
@@ -225,35 +220,31 @@ class HLSLTokenizer
 
     /// Tokenizer will default to strip double-slash comments, but this tries to preserve them if true
     void SetKeepComments(bool enable) { m_keepComments = enable; }
-    
-private:
 
+private:
     bool SkipWhitespace();
     bool SkipComment();
-	bool SkipPragmaDirective();
+    bool SkipPragmaDirective();
     bool SkipInclude();
-    
+
     bool ScanNumber();
     bool ScanLineDirective();
 
 private:
-
-    const char*         m_fileName = nullptr;
-    const char*         m_buffer = nullptr;
-    const char*         m_bufferEnd = nullptr;
-    int                 m_lineNumber = 0;
-    bool                m_error = false;
-
-    int                 m_token = 0;
-    float               m_fValue = 0.0f;
-    int                 m_iValue = 0;
-    char                m_identifier[s_maxIdentifier] = {};
-    char                m_comment[s_maxComment] = {};
-    char                m_lineDirectiveFileName[s_maxIdentifier] = {};
-    int                 m_tokenLineNumber = 0;
-    bool                m_keepComments = false;
-
+    const char* m_fileName = nullptr;
+    const char* m_buffer = nullptr;
+    const char* m_bufferEnd = nullptr;
+    int m_lineNumber = 0;
+    bool m_error = false;
+
+    int m_token = 0;
+    float m_fValue = 0.0f;
+    int m_iValue = 0;
+    char m_identifier[s_maxIdentifier] = {};
+    char m_comment[s_maxComment] = {};
+    char m_lineDirectiveFileName[s_maxIdentifier] = {};
+    int m_tokenLineNumber = 0;
+    bool m_keepComments = false;
 };
 
-}
-
+} //namespace M4
diff --git a/hlslparser/src/HLSLTree.cpp b/hlslparser/src/HLSLTree.cpp
index 6a0392aa..4332b6b5 100644
--- a/hlslparser/src/HLSLTree.cpp
+++ b/hlslparser/src/HLSLTree.cpp
@@ -2,12 +2,11 @@
 
 #include "Engine.h"
 
-namespace M4
-{
+namespace M4 {
 
 // TODO: split helper calls out to new .h, so can include that
 // over to HLSLParser.cpp
-extern bool IsSamplerType(const HLSLType & type);
+extern bool IsSamplerType(const HLSLType &type);
 
 extern bool IsScalarType(HLSLBaseType type);
 extern bool IsIntegerType(HLSLBaseType type);
@@ -15,26 +14,22 @@ extern bool IsFloatingType(HLSLBaseType type);
 
 extern int32_t GetVectorDimension(HLSLBaseType type);
 
-
-
-HLSLTree::HLSLTree(Allocator* allocator) :
-    m_allocator(allocator), m_stringPool(allocator)
+HLSLTree::HLSLTree(Allocator *allocator) : m_allocator(allocator), m_stringPool(allocator)
 {
-    m_firstPage         = m_allocator->New<NodePage>();
-    m_firstPage->next   = NULL;
+    m_firstPage = m_allocator->New<NodePage>();
+    m_firstPage->next = NULL;
 
-    m_currentPage       = m_firstPage;
+    m_currentPage = m_firstPage;
     m_currentPageOffset = 0;
 
-    m_root              = AddNode<HLSLRoot>(NULL, 1);
+    m_root = AddNode<HLSLRoot>(NULL, 1);
 }
 
 HLSLTree::~HLSLTree()
 {
-    NodePage* page = m_firstPage;
-    while (page != NULL)
-    {
-        NodePage* next = page->next;
+    NodePage *page = m_firstPage;
+    while (page != NULL) {
+        NodePage *next = page->next;
         m_allocator->Delete(page);
         page = next;
     }
@@ -42,59 +37,55 @@ HLSLTree::~HLSLTree()
 
 void HLSLTree::AllocatePage()
 {
-    NodePage* newPage    = m_allocator->New<NodePage>();
-    newPage->next        = NULL;
-    m_currentPage->next  = newPage;
-    m_currentPageOffset  = 0;
-    m_currentPage        = newPage;
+    NodePage *newPage = m_allocator->New<NodePage>();
+    newPage->next = NULL;
+    m_currentPage->next = newPage;
+    m_currentPageOffset = 0;
+    m_currentPage = newPage;
 }
 
-const char* HLSLTree::AddString(const char* string)
-{   
+const char *HLSLTree::AddString(const char *string)
+{
     return m_stringPool.AddString(string);
 }
 
-const char* HLSLTree::AddStringFormat(const char* format, ...)
+const char *HLSLTree::AddStringFormat(const char *format, ...)
 {
     va_list args;
     va_start(args, format);
-    const char * string = m_stringPool.AddStringFormatList(format, args);
+    const char *string = m_stringPool.AddStringFormatList(format, args);
     va_end(args);
     return string;
 }
 
-bool HLSLTree::GetContainsString(const char* string) const
+bool HLSLTree::GetContainsString(const char *string) const
 {
     return m_stringPool.GetContainsString(string);
 }
 
-HLSLRoot* HLSLTree::GetRoot() const
+HLSLRoot *HLSLTree::GetRoot() const
 {
     return m_root;
 }
 
-void* HLSLTree::AllocateMemory(size_t size)
+void *HLSLTree::AllocateMemory(size_t size)
 {
-    if (m_currentPageOffset + size > s_nodePageSize)
-    {
+    if (m_currentPageOffset + size > s_nodePageSize) {
         AllocatePage();
     }
-    void* buffer = m_currentPage->buffer + m_currentPageOffset;
+    void *buffer = m_currentPage->buffer + m_currentPageOffset;
     m_currentPageOffset += size;
     return buffer;
 }
 
 // @@ This doesn't do any parameter matching. Simply returns the first function with that name.
-HLSLFunction * HLSLTree::FindFunction(const char * name)
+HLSLFunction *HLSLTree::FindFunction(const char *name)
 {
-    HLSLStatement * statement = m_root->statement;
-    while (statement != NULL)
-    {
-        if (statement->nodeType == HLSLNodeType_Function)
-        {
-            HLSLFunction * function = (HLSLFunction *)statement;
-            if (String_Equal(name, function->name))
-            {
+    HLSLStatement *statement = m_root->statement;
+    while (statement != NULL) {
+        if (statement->nodeType == HLSLNodeType_Function) {
+            HLSLFunction *function = (HLSLFunction *)statement;
+            if (String_Equal(name, function->name)) {
                 return function;
             }
         }
@@ -105,56 +96,47 @@ HLSLFunction * HLSLTree::FindFunction(const char * name)
     return NULL;
 }
 
-HLSLDeclaration * HLSLTree::FindGlobalDeclaration(const char * name, HLSLBuffer ** buffer_out/*=NULL*/)
+HLSLDeclaration *HLSLTree::FindGlobalDeclaration(const char *name, HLSLBuffer **buffer_out /*=NULL*/)
 {
-    HLSLStatement * statement = m_root->statement;
-    while (statement != NULL)
-    {
-        if (statement->nodeType == HLSLNodeType_Declaration)
-        {
-            HLSLDeclaration * declaration = (HLSLDeclaration *)statement;
-            if (String_Equal(name, declaration->name))
-            {
+    HLSLStatement *statement = m_root->statement;
+    while (statement != NULL) {
+        if (statement->nodeType == HLSLNodeType_Declaration) {
+            HLSLDeclaration *declaration = (HLSLDeclaration *)statement;
+            if (String_Equal(name, declaration->name)) {
                 if (buffer_out) *buffer_out = NULL;
                 return declaration;
             }
         }
-        else if (statement->nodeType == HLSLNodeType_Buffer)
-        {
-            HLSLBuffer* buffer = (HLSLBuffer*)statement;
-            
+        else if (statement->nodeType == HLSLNodeType_Buffer) {
+            HLSLBuffer *buffer = (HLSLBuffer *)statement;
+
             // This searches the fields to find the buffer,
             // since cbuffer/tbuffer represent globals.
-            if (buffer->IsGlobalFields())
-            {
-                HLSLDeclaration* field = buffer->field;
-                while (field != NULL)
-                {
+            if (buffer->IsGlobalFields()) {
+                HLSLDeclaration *field = buffer->field;
+                while (field != NULL) {
                     ASSERT(field->nodeType == HLSLNodeType_Declaration);
-                    if (String_Equal(name, field->name))
-                    {
+                    if (String_Equal(name, field->name)) {
                         if (buffer_out) *buffer_out = buffer;
                         return field;
                     }
-                    field = (HLSLDeclaration*)field->nextStatement;
+                    field = (HLSLDeclaration *)field->nextStatement;
                 }
             }
-            else
-            {
-                if (String_Equal(name, buffer->name))
-                {
+            else {
+                if (String_Equal(name, buffer->name)) {
                     if (buffer_out) *buffer_out = buffer;
                     return NULL;
                 }
-                
+
                 /* This isn't same type...
-                 
+
                 // Note: should pass buffers, but buffer/texture
                 // and cbuffer fields can be global to entire shader.
-                
+
                 // find struct first
                 const HLSLStruct* bufferStruct = buffer->bufferStruct;
-                
+
                 // new search those for the fields
                 HLSLStructField* field = bufferStruct->field;
                 while (field != NULL)
@@ -178,16 +160,13 @@ HLSLDeclaration * HLSLTree::FindGlobalDeclaration(const char * name, HLSLBuffer
     return NULL;
 }
 
-HLSLStruct * HLSLTree::FindGlobalStruct(const char * name)
+HLSLStruct *HLSLTree::FindGlobalStruct(const char *name)
 {
-    HLSLStatement * statement = m_root->statement;
-    while (statement != NULL)
-    {
-        if (statement->nodeType == HLSLNodeType_Struct)
-        {
-            HLSLStruct * declaration = (HLSLStruct *)statement;
-            if (String_Equal(name, declaration->name))
-            {
+    HLSLStatement *statement = m_root->statement;
+    while (statement != NULL) {
+        if (statement->nodeType == HLSLNodeType_Struct) {
+            HLSLStruct *declaration = (HLSLStruct *)statement;
+            if (String_Equal(name, declaration->name)) {
                 return declaration;
             }
         }
@@ -261,16 +240,13 @@ HLSLPipeline * HLSLTree::FindPipeline(const char * name)
 }
 */
 
-HLSLBuffer * HLSLTree::FindBuffer(const char * name)
+HLSLBuffer *HLSLTree::FindBuffer(const char *name)
 {
-    HLSLStatement * statement = m_root->statement;
-    while (statement != NULL)
-    {
-        if (statement->nodeType == HLSLNodeType_Buffer)
-        {
-            HLSLBuffer * buffer = (HLSLBuffer *)statement;
-            if (String_Equal(name, buffer->name))
-            {
+    HLSLStatement *statement = m_root->statement;
+    while (statement != NULL) {
+        if (statement->nodeType == HLSLNodeType_Buffer) {
+            HLSLBuffer *buffer = (HLSLBuffer *)statement;
+            if (String_Equal(name, buffer->name)) {
                 return buffer;
             }
         }
@@ -281,15 +257,12 @@ HLSLBuffer * HLSLTree::FindBuffer(const char * name)
     return NULL;
 }
 
-
-
-bool HLSLTree::GetExpressionValue(HLSLExpression * expression, int & value)
+bool HLSLTree::GetExpressionValue(HLSLExpression *expression, int &value)
 {
-    ASSERT (expression != NULL);
+    ASSERT(expression != NULL);
 
     // Expression must be constant.
-    if ((expression->expressionType.flags & HLSLTypeFlag_Const) == 0) 
-    {
+    if ((expression->expressionType.flags & HLSLTypeFlag_Const) == 0) {
         return false;
     }
 
@@ -298,30 +271,25 @@ bool HLSLTree::GetExpressionValue(HLSLExpression * expression, int & value)
     if (expression->expressionType.baseType != HLSLBaseType_Long &&
         expression->expressionType.baseType != HLSLBaseType_Short &&
         expression->expressionType.baseType != HLSLBaseType_Int &&
-        
-        expression->expressionType.baseType != HLSLBaseType_Bool)
-    {
+
+        expression->expressionType.baseType != HLSLBaseType_Bool) {
         return false;
     }
 
-    if (expression->expressionType.array) 
-    {
+    if (expression->expressionType.array) {
         return false;
     }
 
-    if (expression->nodeType == HLSLNodeType_BinaryExpression) 
-    {
-        HLSLBinaryExpression * binaryExpression = (HLSLBinaryExpression *)expression;
+    if (expression->nodeType == HLSLNodeType_BinaryExpression) {
+        HLSLBinaryExpression *binaryExpression = (HLSLBinaryExpression *)expression;
 
         int value1, value2;
         if (!GetExpressionValue(binaryExpression->expression1, value1) ||
-            !GetExpressionValue(binaryExpression->expression2, value2))
-        {
+            !GetExpressionValue(binaryExpression->expression2, value2)) {
             return false;
         }
 
-        switch(binaryExpression->binaryOp)
-        {
+        switch (binaryExpression->binaryOp) {
             case HLSLBinaryOp_And:
                 value = value1 && value2;
                 return true;
@@ -376,17 +344,14 @@ bool HLSLTree::GetExpressionValue(HLSLExpression * expression, int & value)
                 return false;
         }
     }
-    else if (expression->nodeType == HLSLNodeType_UnaryExpression) 
-    {
-        HLSLUnaryExpression * unaryExpression = (HLSLUnaryExpression *)expression;
+    else if (expression->nodeType == HLSLNodeType_UnaryExpression) {
+        HLSLUnaryExpression *unaryExpression = (HLSLUnaryExpression *)expression;
 
-        if (!GetExpressionValue(unaryExpression->expression, value))
-        {
+        if (!GetExpressionValue(unaryExpression->expression, value)) {
             return false;
         }
 
-        switch(unaryExpression->unaryOp)
-        {
+        switch (unaryExpression->unaryOp) {
             case HLSLUnaryOp_Negative:
                 value = -value;
                 return true;
@@ -407,38 +372,34 @@ bool HLSLTree::GetExpressionValue(HLSLExpression * expression, int & value)
                 return false;
         }
     }
-    else if (expression->nodeType == HLSLNodeType_IdentifierExpression)
-    {
-        HLSLIdentifierExpression * identifier = (HLSLIdentifierExpression *)expression;
+    else if (expression->nodeType == HLSLNodeType_IdentifierExpression) {
+        HLSLIdentifierExpression *identifier = (HLSLIdentifierExpression *)expression;
 
-        HLSLDeclaration * declaration = FindGlobalDeclaration(identifier->name);
-        if (declaration == NULL) 
-        {
+        HLSLDeclaration *declaration = FindGlobalDeclaration(identifier->name);
+        if (declaration == NULL) {
             return false;
         }
-        if ((declaration->type.flags & HLSLTypeFlag_Const) == 0)
-        {
+        if ((declaration->type.flags & HLSLTypeFlag_Const) == 0) {
             return false;
         }
 
         return GetExpressionValue(declaration->assignment, value);
     }
-    else if (expression->nodeType == HLSLNodeType_LiteralExpression) 
-    {
-        HLSLLiteralExpression * literal = (HLSLLiteralExpression *)expression;
-   
+    else if (expression->nodeType == HLSLNodeType_LiteralExpression) {
+        HLSLLiteralExpression *literal = (HLSLLiteralExpression *)expression;
+
         if (literal->expressionType.baseType == HLSLBaseType_Int)
             value = literal->iValue;
         else if (literal->expressionType.baseType == HLSLBaseType_Long)
             value = literal->iValue; // precision loss to Int
         else if (literal->expressionType.baseType == HLSLBaseType_Short)
             value = literal->iValue;
-        
+
         else if (literal->expressionType.baseType == HLSLBaseType_Bool)
             value = (int)literal->bValue;
         else
             return false;
-        
+
         return true;
     }
 
@@ -446,26 +407,25 @@ bool HLSLTree::GetExpressionValue(HLSLExpression * expression, int & value)
 }
 
 // TODO: Nothing calling this?
-bool HLSLTree::NeedsFunction(const char* name)
+bool HLSLTree::NeedsFunction(const char *name)
 {
     // Early out
     if (!GetContainsString(name))
         return false;
 
-    struct NeedsFunctionVisitor: HLSLTreeVisitor
-    {
-        const char* name;
+    struct NeedsFunctionVisitor : HLSLTreeVisitor {
+        const char *name;
         bool result;
 
         virtual ~NeedsFunctionVisitor() {}
-        
-        virtual void VisitTopLevelStatement(HLSLStatement * node)
+
+        virtual void VisitTopLevelStatement(HLSLStatement *node)
         {
             if (!node->hidden)
                 HLSLTreeVisitor::VisitTopLevelStatement(node);
         }
 
-        virtual void VisitFunctionCall(HLSLFunctionCall * node)
+        virtual void VisitFunctionCall(HLSLFunctionCall *node)
         {
             result = result || String_Equal(name, node->function->name);
 
@@ -483,29 +443,26 @@ bool HLSLTree::NeedsFunction(const char* name)
 }
 
 // Returns dimension, 0 if invalid.
-int HLSLTree::GetExpressionValue(HLSLExpression * expression, float values[4])
+int HLSLTree::GetExpressionValue(HLSLExpression *expression, float values[4])
 {
-    ASSERT (expression != NULL);
+    ASSERT(expression != NULL);
 
     // Expression must be constant.
-    if ((expression->expressionType.flags & HLSLTypeFlag_Const) == 0) 
-    {
+    if ((expression->expressionType.flags & HLSLTypeFlag_Const) == 0) {
         return 0;
     }
 
     HLSLBaseType type = expression->expressionType.baseType;
-    
-    if (IsIntegerType(type))
-    {
-        if (IsScalarType(type))
-        {
+
+    if (IsIntegerType(type)) {
+        if (IsScalarType(type)) {
             int intValue;
             if (GetExpressionValue(expression, intValue)) {
-                for (int i = 0; i < 4; i++) values[i] = (float)intValue;   // @@ Warn if conversion is not exact.
+                for (int i = 0; i < 4; i++) values[i] = (float)intValue; // @@ Warn if conversion is not exact.
                 return 1;
             }
         }
-        
+
         return 0;
     }
     // this skips other int types not handled above
@@ -513,48 +470,40 @@ int HLSLTree::GetExpressionValue(HLSLExpression * expression, float values[4])
         return 0;
 
     // @@ Not supported yet, but we may need it?
-    if (expression->expressionType.array) 
-    {
+    if (expression->expressionType.array) {
         return false;
     }
 
     int dim = GetVectorDimension(type);
 
-    if (expression->nodeType == HLSLNodeType_BinaryExpression) 
-    {
-        HLSLBinaryExpression * binaryExpression = (HLSLBinaryExpression *)expression;
-        
+    if (expression->nodeType == HLSLNodeType_BinaryExpression) {
+        HLSLBinaryExpression *binaryExpression = (HLSLBinaryExpression *)expression;
+
         float values1[4], values2[4];
         int dim1 = GetExpressionValue(binaryExpression->expression1, values1);
         int dim2 = GetExpressionValue(binaryExpression->expression2, values2);
 
-        if (dim1 == 0 || dim2 == 0)
-        {
+        if (dim1 == 0 || dim2 == 0) {
             return 0;
         }
 
-        if (dim1 != dim2)
-        {
+        if (dim1 != dim2) {
             // Broadcast scalar to vector size.
-            if (dim1 == 1)
-            {
+            if (dim1 == 1) {
                 for (int i = 1; i < dim2; i++) values1[i] = values1[0];
                 dim1 = dim2;
             }
-            else if (dim2 == 1)
-            {
+            else if (dim2 == 1) {
                 for (int i = 1; i < dim1; i++) values2[i] = values2[0];
                 dim2 = dim1;
             }
-            else
-            {
+            else {
                 return 0;
             }
         }
         ASSERT(dim == dim1);
 
-        switch(binaryExpression->binaryOp)
-        {
+        switch (binaryExpression->binaryOp) {
             case HLSLBinaryOp_Add:
                 for (int i = 0; i < dim; i++) values[i] = values1[i] + values2[i];
                 return dim;
@@ -571,19 +520,16 @@ int HLSLTree::GetExpressionValue(HLSLExpression * expression, float values[4])
                 return 0;
         }
     }
-    else if (expression->nodeType == HLSLNodeType_UnaryExpression) 
-    {
-        HLSLUnaryExpression * unaryExpression = (HLSLUnaryExpression *)expression;
-       
+    else if (expression->nodeType == HLSLNodeType_UnaryExpression) {
+        HLSLUnaryExpression *unaryExpression = (HLSLUnaryExpression *)expression;
+
         int dim1 = GetExpressionValue(unaryExpression->expression, values);
-        if (dim1 == 0)
-        {
+        if (dim1 == 0) {
             return 0;
         }
         ASSERT(dim == dim1);
 
-        switch(unaryExpression->unaryOp)
-        {
+        switch (unaryExpression->unaryOp) {
             case HLSLUnaryOp_Negative:
                 for (int i = 0; i < dim; i++) values[i] = -values[i];
                 return dim;
@@ -594,14 +540,12 @@ int HLSLTree::GetExpressionValue(HLSLExpression * expression, float values[4])
                 return 0;
         }
     }
-    else if (expression->nodeType == HLSLNodeType_ConstructorExpression)
-    {
-        HLSLConstructorExpression * constructor = (HLSLConstructorExpression *)expression;
+    else if (expression->nodeType == HLSLNodeType_ConstructorExpression) {
+        HLSLConstructorExpression *constructor = (HLSLConstructorExpression *)expression;
 
         int idx = 0;
-        HLSLExpression * arg = constructor->argument;
-        while (arg != NULL)
-        {
+        HLSLExpression *arg = constructor->argument;
+        while (arg != NULL) {
             float tmp[4];
             int n = GetExpressionValue(arg, tmp);
             for (int i = 0; i < n; i++) values[idx + i] = tmp[i];
@@ -613,25 +557,21 @@ int HLSLTree::GetExpressionValue(HLSLExpression * expression, float values[4])
 
         return dim;
     }
-    else if (expression->nodeType == HLSLNodeType_IdentifierExpression)
-    {
-        HLSLIdentifierExpression * identifier = (HLSLIdentifierExpression *)expression;
+    else if (expression->nodeType == HLSLNodeType_IdentifierExpression) {
+        HLSLIdentifierExpression *identifier = (HLSLIdentifierExpression *)expression;
 
-        HLSLDeclaration * declaration = FindGlobalDeclaration(identifier->name);
-        if (declaration == NULL) 
-        {
+        HLSLDeclaration *declaration = FindGlobalDeclaration(identifier->name);
+        if (declaration == NULL) {
             return 0;
         }
-        if ((declaration->type.flags & HLSLTypeFlag_Const) == 0)
-        {
+        if ((declaration->type.flags & HLSLTypeFlag_Const) == 0) {
             return 0;
         }
 
         return GetExpressionValue(declaration->assignment, values);
     }
-    else if (expression->nodeType == HLSLNodeType_LiteralExpression)
-    {
-        HLSLLiteralExpression * literal = (HLSLLiteralExpression *)expression;
+    else if (expression->nodeType == HLSLNodeType_LiteralExpression) {
+        HLSLLiteralExpression *literal = (HLSLLiteralExpression *)expression;
 
         if (literal->expressionType.baseType == HLSLBaseType_Float)
             values[0] = literal->fValue;
@@ -639,44 +579,41 @@ int HLSLTree::GetExpressionValue(HLSLExpression * expression, float values[4])
             values[0] = literal->fValue;
         else if (literal->expressionType.baseType == HLSLBaseType_Double)
             values[0] = literal->fValue; // TODO: need more precision
-        
+
         else if (literal->expressionType.baseType == HLSLBaseType_Bool)
             values[0] = literal->bValue;
-        
+
         // TODO: add uint types, fix precision of short/long/double/half
         // signed ints
         else if (literal->expressionType.baseType == HLSLBaseType_Int)
-            values[0] = (float)literal->iValue;  // @@ Warn if conversion is not exact.
+            values[0] = (float)literal->iValue; // @@ Warn if conversion is not exact.
         else if (literal->expressionType.baseType == HLSLBaseType_Short)
             values[0] = (float)literal->iValue;
         else if (literal->expressionType.baseType == HLSLBaseType_Long)
             values[0] = (float)literal->iValue;
         else
             return 0;
-        
+
         return 1;
     }
 
     return 0;
 }
 
-
-
-
-void HLSLTreeVisitor::VisitType(HLSLType & type)
+void HLSLTreeVisitor::VisitType(HLSLType &type)
 {
 }
 
-void HLSLTreeVisitor::VisitRoot(HLSLRoot * root)
+void HLSLTreeVisitor::VisitRoot(HLSLRoot *root)
 {
-    HLSLStatement * statement = root->statement;
+    HLSLStatement *statement = root->statement;
     while (statement != NULL) {
         VisitTopLevelStatement(statement);
         statement = statement->nextStatement;
     }
 }
 
-void HLSLTreeVisitor::VisitTopLevelStatement(HLSLStatement * node)
+void HLSLTreeVisitor::VisitTopLevelStatement(HLSLStatement *node)
 {
     if (node->nodeType == HLSLNodeType_Declaration) {
         VisitDeclaration((HLSLDeclaration *)node);
@@ -691,23 +628,23 @@ void HLSLTreeVisitor::VisitTopLevelStatement(HLSLStatement * node)
         VisitFunction((HLSLFunction *)node);
     }
     else if (node->nodeType == HLSLNodeType_Comment) {
-        VisitComment((HLSLComment*)node);
+        VisitComment((HLSLComment *)node);
     }
-    
+
     // FX file stuff
-//    else if (node->nodeType == HLSLNodeType_Technique) {
-//        VisitTechnique((HLSLTechnique *)node);
-//    }
-//    else if (node->nodeType == HLSLNodeType_Pipeline) {
-//        VisitPipeline((HLSLPipeline *)node);
-//    }
-    
+    // else if (node->nodeType == HLSLNodeType_Technique) {
+    //     VisitTechnique((HLSLTechnique *)node);
+    // }
+    // else if (node->nodeType == HLSLNodeType_Pipeline) {
+    //     VisitPipeline((HLSLPipeline *)node);
+    // }
+
     else {
         ASSERT(false);
     }
 }
 
-void HLSLTreeVisitor::VisitStatements(HLSLStatement * statement)
+void HLSLTreeVisitor::VisitStatements(HLSLStatement *statement)
 {
     while (statement != NULL) {
         VisitStatement(statement);
@@ -715,7 +652,7 @@ void HLSLTreeVisitor::VisitStatements(HLSLStatement * statement)
     }
 }
 
-void HLSLTreeVisitor::VisitStatement(HLSLStatement * node)
+void HLSLTreeVisitor::VisitStatement(HLSLStatement *node)
 {
     // Function statements
     if (node->nodeType == HLSLNodeType_Declaration) {
@@ -753,7 +690,7 @@ void HLSLTreeVisitor::VisitStatement(HLSLStatement * node)
     }
 }
 
-void HLSLTreeVisitor::VisitDeclaration(HLSLDeclaration * node)
+void HLSLTreeVisitor::VisitDeclaration(HLSLDeclaration *node)
 {
     VisitType(node->type);
     /*do {
@@ -768,31 +705,29 @@ void HLSLTreeVisitor::VisitDeclaration(HLSLDeclaration * node)
     }
 }
 
-void HLSLTreeVisitor::VisitStruct(HLSLStruct * node)
+void HLSLTreeVisitor::VisitStruct(HLSLStruct *node)
 {
-    HLSLStructField * field = node->field;
+    HLSLStructField *field = node->field;
     while (field != NULL) {
         VisitStructField(field);
         field = field->nextField;
     }
 }
 
-void HLSLTreeVisitor::VisitStructField(HLSLStructField * node)
+void HLSLTreeVisitor::VisitStructField(HLSLStructField *node)
 {
     // This can use a constant in an array field that must be resolved
-    if (node->type.array)
-    {
+    if (node->type.array) {
         VisitExpression(node->type.arraySize);
     }
-    
+
     VisitType(node->type);
 }
 
-void HLSLTreeVisitor::VisitBuffer(HLSLBuffer * node)
+void HLSLTreeVisitor::VisitBuffer(HLSLBuffer *node)
 {
-    if (node->IsGlobalFields())
-    {
-        HLSLDeclaration* field = node->field;
+    if (node->IsGlobalFields()) {
+        HLSLDeclaration *field = node->field;
         while (field != NULL) {
             ASSERT(field->nodeType == HLSLNodeType_Declaration);
             VisitDeclaration(field);
@@ -800,8 +735,7 @@ void HLSLTreeVisitor::VisitBuffer(HLSLBuffer * node)
             field = (HLSLDeclaration *)field->nextStatement;
         }
     }
-    else
-    {
+    else {
         VisitStruct(node->bufferStruct);
     }
 }
@@ -811,11 +745,11 @@ void HLSLTreeVisitor::VisitBuffer(HLSLBuffer * node)
     VisitType(node->type);
 }*/
 
-void HLSLTreeVisitor::VisitFunction(HLSLFunction * node)
+void HLSLTreeVisitor::VisitFunction(HLSLFunction *node)
 {
     VisitType(node->returnType);
 
-    HLSLArgument* argument = node->argument;
+    HLSLArgument *argument = node->argument;
     while (argument != NULL) {
         VisitArgument(argument);
         argument = argument->nextArgument;
@@ -824,7 +758,7 @@ void HLSLTreeVisitor::VisitFunction(HLSLFunction * node)
     VisitStatements(node->statement);
 }
 
-void HLSLTreeVisitor::VisitArgument(HLSLArgument * node)
+void HLSLTreeVisitor::VisitArgument(HLSLArgument *node)
 {
     VisitType(node->type);
     if (node->defaultValue != NULL) {
@@ -832,12 +766,12 @@ void HLSLTreeVisitor::VisitArgument(HLSLArgument * node)
     }
 }
 
-void HLSLTreeVisitor::VisitExpressionStatement(HLSLExpressionStatement * node)
+void HLSLTreeVisitor::VisitExpressionStatement(HLSLExpressionStatement *node)
 {
     VisitExpression(node->expression);
 }
 
-void HLSLTreeVisitor::VisitExpression(HLSLExpression * node)
+void HLSLTreeVisitor::VisitExpression(HLSLExpression *node)
 {
     VisitType(node->expressionType);
 
@@ -872,29 +806,29 @@ void HLSLTreeVisitor::VisitExpression(HLSLExpression * node)
         VisitFunctionCall((HLSLFunctionCall *)node);
     }
     else if (node->nodeType == HLSLNodeType_MemberFunctionCall) {
-        HLSLMemberFunctionCall* memberFunctionCall = (HLSLMemberFunctionCall *)node;
-        VisitIdentifierExpression((HLSLIdentifierExpression*)memberFunctionCall->memberIdentifier); // const_cast
+        HLSLMemberFunctionCall *memberFunctionCall = (HLSLMemberFunctionCall *)node;
+        VisitIdentifierExpression((HLSLIdentifierExpression *)memberFunctionCall->memberIdentifier); // const_cast
         VisitFunctionCall(memberFunctionCall);
     }
     // Acoget-TODO: This was missing. Did adding it break anything?
-//    else if (node->nodeType == HLSLNodeType_SamplerState) {
-//        VisitSamplerState((HLSLSamplerState *)node);
-//    }
+    // else if (node->nodeType == HLSLNodeType_SamplerState) {
+    //     VisitSamplerState((HLSLSamplerState *)node);
+    // }
     else {
         ASSERT(false);
     }
 }
 
-void HLSLTreeVisitor::VisitReturnStatement(HLSLReturnStatement * node)
+void HLSLTreeVisitor::VisitReturnStatement(HLSLReturnStatement *node)
 {
     VisitExpression(node->expression);
 }
 
-void HLSLTreeVisitor::VisitDiscardStatement(HLSLDiscardStatement * node) {}
-void HLSLTreeVisitor::VisitBreakStatement(HLSLBreakStatement * node) {}
-void HLSLTreeVisitor::VisitContinueStatement(HLSLContinueStatement * node) {}
+void HLSLTreeVisitor::VisitDiscardStatement(HLSLDiscardStatement *node) {}
+void HLSLTreeVisitor::VisitBreakStatement(HLSLBreakStatement *node) {}
+void HLSLTreeVisitor::VisitContinueStatement(HLSLContinueStatement *node) {}
 
-void HLSLTreeVisitor::VisitIfStatement(HLSLIfStatement * node)
+void HLSLTreeVisitor::VisitIfStatement(HLSLIfStatement *node)
 {
     VisitExpression(node->condition);
     VisitStatements(node->statement);
@@ -903,7 +837,7 @@ void HLSLTreeVisitor::VisitIfStatement(HLSLIfStatement * node)
     }
 }
 
-void HLSLTreeVisitor::VisitForStatement(HLSLForStatement * node)
+void HLSLTreeVisitor::VisitForStatement(HLSLForStatement *node)
 {
     if (node->initialization) {
         VisitDeclaration(node->initialization);
@@ -917,61 +851,61 @@ void HLSLTreeVisitor::VisitForStatement(HLSLForStatement * node)
     VisitStatements(node->statement);
 }
 
-void HLSLTreeVisitor::VisitBlockStatement(HLSLBlockStatement * node)
+void HLSLTreeVisitor::VisitBlockStatement(HLSLBlockStatement *node)
 {
     VisitStatements(node->statement);
 }
 
-void HLSLTreeVisitor::VisitUnaryExpression(HLSLUnaryExpression * node)
+void HLSLTreeVisitor::VisitUnaryExpression(HLSLUnaryExpression *node)
 {
     VisitExpression(node->expression);
 }
 
-void HLSLTreeVisitor::VisitBinaryExpression(HLSLBinaryExpression * node)
+void HLSLTreeVisitor::VisitBinaryExpression(HLSLBinaryExpression *node)
 {
     VisitExpression(node->expression1);
     VisitExpression(node->expression2);
 }
 
-void HLSLTreeVisitor::VisitConditionalExpression(HLSLConditionalExpression * node)
+void HLSLTreeVisitor::VisitConditionalExpression(HLSLConditionalExpression *node)
 {
     VisitExpression(node->condition);
     VisitExpression(node->falseExpression);
     VisitExpression(node->trueExpression);
 }
 
-void HLSLTreeVisitor::VisitCastingExpression(HLSLCastingExpression * node)
+void HLSLTreeVisitor::VisitCastingExpression(HLSLCastingExpression *node)
 {
     VisitType(node->type);
     VisitExpression(node->expression);
 }
 
-void HLSLTreeVisitor::VisitLiteralExpression(HLSLLiteralExpression * node) {}
-void HLSLTreeVisitor::VisitIdentifierExpression(HLSLIdentifierExpression * node) {}
+void HLSLTreeVisitor::VisitLiteralExpression(HLSLLiteralExpression *node) {}
+void HLSLTreeVisitor::VisitIdentifierExpression(HLSLIdentifierExpression *node) {}
 
-void HLSLTreeVisitor::VisitConstructorExpression(HLSLConstructorExpression * node)
+void HLSLTreeVisitor::VisitConstructorExpression(HLSLConstructorExpression *node)
 {
-    HLSLExpression * argument = node->argument;
+    HLSLExpression *argument = node->argument;
     while (argument != NULL) {
         VisitExpression(argument);
         argument = argument->nextExpression;
     }
 }
 
-void HLSLTreeVisitor::VisitMemberAccess(HLSLMemberAccess * node)
+void HLSLTreeVisitor::VisitMemberAccess(HLSLMemberAccess *node)
 {
     VisitExpression(node->object);
 }
 
-void HLSLTreeVisitor::VisitArrayAccess(HLSLArrayAccess * node)
+void HLSLTreeVisitor::VisitArrayAccess(HLSLArrayAccess *node)
 {
     VisitExpression(node->array);
     VisitExpression(node->index);
 }
 
-void HLSLTreeVisitor::VisitFunctionCall(HLSLFunctionCall * node)
+void HLSLTreeVisitor::VisitFunctionCall(HLSLFunctionCall *node)
 {
-    HLSLExpression * argument = node->argument;
+    HLSLExpression *argument = node->argument;
     while (argument != NULL) {
         VisitExpression(argument);
         argument = argument->nextExpression;
@@ -1015,14 +949,13 @@ void HLSLTreeVisitor::VisitPipeline(HLSLPipeline * node)
 }
 */
 
-void HLSLTreeVisitor::VisitComment(HLSLComment * node)
+void HLSLTreeVisitor::VisitComment(HLSLComment *node)
 {
-    
 }
 
-void HLSLTreeVisitor::VisitFunctions(HLSLRoot * root)
+void HLSLTreeVisitor::VisitFunctions(HLSLRoot *root)
 {
-    HLSLStatement * statement = root->statement;
+    HLSLStatement *statement = root->statement;
     while (statement != NULL) {
         if (statement->nodeType == HLSLNodeType_Function) {
             VisitFunction((HLSLFunction *)statement);
@@ -1032,9 +965,9 @@ void HLSLTreeVisitor::VisitFunctions(HLSLRoot * root)
     }
 }
 
-void HLSLTreeVisitor::VisitParameters(HLSLRoot * root)
+void HLSLTreeVisitor::VisitParameters(HLSLRoot *root)
 {
-    HLSLStatement * statement = root->statement;
+    HLSLStatement *statement = root->statement;
     while (statement != NULL) {
         if (statement->nodeType == HLSLNodeType_Declaration) {
             VisitDeclaration((HLSLDeclaration *)statement);
@@ -1044,54 +977,49 @@ void HLSLTreeVisitor::VisitParameters(HLSLRoot * root)
     }
 }
 
-
-class ResetHiddenFlagVisitor : public HLSLTreeVisitor
-{
+class ResetHiddenFlagVisitor : public HLSLTreeVisitor {
 public:
     virtual ~ResetHiddenFlagVisitor() {}
-    
-    virtual void VisitTopLevelStatement(HLSLStatement * statement) override
+
+    virtual void VisitTopLevelStatement(HLSLStatement *statement) override
     {
         statement->hidden = true;
 
-        if (statement->nodeType == HLSLNodeType_Buffer)
-        {
-            VisitBuffer((HLSLBuffer*)statement);
+        if (statement->nodeType == HLSLNodeType_Buffer) {
+            VisitBuffer((HLSLBuffer *)statement);
         }
     }
 
     // Hide buffer fields.
-    virtual void VisitDeclaration(HLSLDeclaration * node) override
+    virtual void VisitDeclaration(HLSLDeclaration *node) override
     {
-       // node->hidden = true;
+        // node->hidden = true;
     }
 
-    virtual void VisitComment(HLSLComment * node) override
+    virtual void VisitComment(HLSLComment *node) override
     {
         node->hidden = true;
     }
-    
-    virtual void VisitArgument(HLSLArgument * node) override
+
+    virtual void VisitArgument(HLSLArgument *node) override
     {
-        node->hidden = false;   // Arguments are visible by default.
+        node->hidden = false; // Arguments are visible by default.
     }
 };
 
-class MarkVisibleStatementsVisitor : public HLSLTreeVisitor
-{
+class MarkVisibleStatementsVisitor : public HLSLTreeVisitor {
 public:
-    
-    HLSLTree * tree;
-    MarkVisibleStatementsVisitor(HLSLTree * tree) : tree(tree) {}
+    HLSLTree *tree;
+    MarkVisibleStatementsVisitor(HLSLTree *tree) : tree(tree) {}
 
     virtual ~MarkVisibleStatementsVisitor() {}
-    
-    virtual void VisitComment(HLSLComment * node) override
+
+    virtual void VisitComment(HLSLComment *node) override
     {
         node->hidden = false;
     }
 
-    virtual void VisitFunction(HLSLFunction * node) override
+    virtual void VisitFunction(HLSLFunction *node) override
     {
         node->hidden = false;
         HLSLTreeVisitor::VisitFunction(node);
@@ -1100,118 +1028,101 @@ class MarkVisibleStatementsVisitor : public HLSLTreeVisitor
             VisitFunction(node->forward);
     }
 
-    virtual void VisitFunctionCall(HLSLFunctionCall * node) override
+    virtual void VisitFunctionCall(HLSLFunctionCall *node) override
     {
         HLSLTreeVisitor::VisitFunctionCall(node);
 
-        if (node->function->hidden)
-        {
-            VisitFunction(const_cast<HLSLFunction*>(node->function));
+        if (node->function->hidden) {
+            VisitFunction(const_cast<HLSLFunction *>(node->function));
         }
     }
 
-    virtual void VisitIdentifierExpression(HLSLIdentifierExpression * node) override
+    virtual void VisitIdentifierExpression(HLSLIdentifierExpression *node) override
     {
         HLSLTreeVisitor::VisitIdentifierExpression(node);
-        
-        if (node->global)
-        {
-            HLSLBuffer* buffer = NULL;
-            HLSLDeclaration * declaration = tree->FindGlobalDeclaration(node->name, &buffer);
 
-            if (declaration != NULL && declaration->hidden)
-            {
+        if (node->global) {
+            HLSLBuffer *buffer = NULL;
+            HLSLDeclaration *declaration = tree->FindGlobalDeclaration(node->name, &buffer);
+
+            if (declaration != NULL && declaration->hidden) {
                 declaration->hidden = false;
                 VisitDeclaration(declaration);
             }
-            if (buffer != NULL && buffer->hidden)
-            {
+            if (buffer != NULL && buffer->hidden) {
                 buffer->hidden = false;
             }
         }
     }
-        
-    virtual void VisitType(HLSLType & type) override
+
+    virtual void VisitType(HLSLType &type) override
     {
-//        if (type.array)
-//        {
-//            //  Alec added this to try to handle structs with array constants, but
-//            // it causes other issues.  VisitStructField calls VisitType.
-//
-//            // handle sized or unsized array, since sized may use constant
-//            // VisitExpression(type.arraySize);
-//            int bp = 0;
-//            bp = bp;
-//        }
-//        else
-        if (type.baseType == HLSLBaseType_UserDefined)
-        {
-            HLSLStruct * globalStruct = tree->FindGlobalStruct(type.typeName);
-            if (globalStruct != NULL)
-            {
+        // if (type.array)
+        // {
+        //     //  Alec added this to try to handle structs with array constants, but
+        //     // it causes other issues.  VisitStructField calls VisitType.
+        //
+        //     // handle sized or unsized array, since sized may use constant
+        //     // VisitExpression(type.arraySize);
+        //     int bp = 0;
+        //     bp = bp;
+        // }
+        // else
+        if (type.baseType == HLSLBaseType_UserDefined) {
+            HLSLStruct *globalStruct = tree->FindGlobalStruct(type.typeName);
+            if (globalStruct != NULL) {
                 globalStruct->hidden = false;
                 VisitStruct(globalStruct);
             }
         }
     }
-
 };
 
-
-void PruneTree(HLSLTree* tree, const char* entryName0, const char* entryName1/*=NULL*/)
+void PruneTree(HLSLTree *tree, const char *entryName0, const char *entryName1 /*=NULL*/)
 {
-    HLSLRoot* root = tree->GetRoot();
+    HLSLRoot *root = tree->GetRoot();
 
     // Reset all flags.
     ResetHiddenFlagVisitor reset;
     reset.VisitRoot(root);
 
     // Mark all the statements necessary for these entrypoints.
-    HLSLFunction* entry = tree->FindFunction(entryName0);
-    if (entry != NULL)
-    {
+    HLSLFunction *entry = tree->FindFunction(entryName0);
+    if (entry != NULL) {
         MarkVisibleStatementsVisitor mark(tree);
         mark.VisitFunction(entry);
     }
 
-    if (entryName1 != NULL)
-    {
+    if (entryName1 != NULL) {
         entry = tree->FindFunction(entryName1);
-        if (entry != NULL)
-        {
+        if (entry != NULL) {
             MarkVisibleStatementsVisitor mark(tree);
             mark.VisitFunction(entry);
         }
     }
 
     // Mark buffers visible, if any of their fields is visible.
-    HLSLStatement * statement = root->statement;
-    while (statement != NULL)
-    {
-        if (statement->nodeType == HLSLNodeType_Buffer)
-        {
-            HLSLBuffer* buffer = (HLSLBuffer*)statement;
+    HLSLStatement *statement = root->statement;
+    while (statement != NULL) {
+        if (statement->nodeType == HLSLNodeType_Buffer) {
+            HLSLBuffer *buffer = (HLSLBuffer *)statement;
 
-            if (buffer->IsGlobalFields())
-            {
+            if (buffer->IsGlobalFields()) {
                 // mark buffer visible if any of its fields are used
-                HLSLDeclaration* field = buffer->field;
-                while (field != NULL)
-                {
+                HLSLDeclaration *field = buffer->field;
+                while (field != NULL) {
                     ASSERT(field->nodeType == HLSLNodeType_Declaration);
-                    if (!field->hidden)
-                    {
+                    if (!field->hidden) {
                         buffer->hidden = false;
                         break;
                     }
-                    field = (HLSLDeclaration*)field->nextStatement;
+                    field = (HLSLDeclaration *)field->nextStatement;
                 }
             }
-            else
-            {
+            else {
                 // TODO: these load from a struct so may just need
                 // to somehow mark this if present.
-                
+
                 /* all struct fields are hidden = false, so this doesn't work
                 // mark buffer visible if any struct fields are used
                 HLSLStructField* field = buffer->bufferStruct->field;
@@ -1233,84 +1144,73 @@ void PruneTree(HLSLTree* tree, const char* entryName0, const char* entryName1/*=
     }
 }
 
-
-void SortTree(HLSLTree * tree)
+void SortTree(HLSLTree *tree)
 {
     // Stable sort so that statements are in this order:
     // const scalars for arrays, structs, declarations, functions, techniques.
-	// but their relative order is preserved.
+    // but their relative order is preserved.
 
-    HLSLRoot* root = tree->GetRoot();
+    HLSLRoot *root = tree->GetRoot();
+
+    HLSLStatement *constScalarDeclarations = NULL;
+    HLSLStatement *lastConstScalarDeclaration = NULL;
 
-    HLSLStatement* constScalarDeclarations = NULL;
-    HLSLStatement* lastConstScalarDeclaration = NULL;
-    
-    HLSLStatement* structs = NULL;
-    HLSLStatement* lastStruct = NULL;
-    
-    HLSLStatement* constDeclarations = NULL;
-    HLSLStatement* lastConstDeclaration = NULL;
-    
-    HLSLStatement* declarations = NULL;
-    HLSLStatement* lastDeclaration = NULL;
-    
-    HLSLStatement* functions = NULL;
-    HLSLStatement* lastFunction = NULL;
-    
-    HLSLStatement* other = NULL;
-    HLSLStatement* lastOther = NULL;
-
-    
-#define AppendToList(statement, list, listLast) \
-    if (list == NULL) list = statement; \
+    HLSLStatement *structs = NULL;
+    HLSLStatement *lastStruct = NULL;
+
+    HLSLStatement *constDeclarations = NULL;
+    HLSLStatement *lastConstDeclaration = NULL;
+
+    HLSLStatement *declarations = NULL;
+    HLSLStatement *lastDeclaration = NULL;
+
+    HLSLStatement *functions = NULL;
+    HLSLStatement *lastFunction = NULL;
+
+    HLSLStatement *other = NULL;
+    HLSLStatement *lastOther = NULL;
+
+#define AppendToList(statement, list, listLast)                \
+    if (list == NULL) list = statement;                        \
     if (listLast != NULL) listLast->nextStatement = statement; \
     listLast = statement;
-    
-    HLSLStatement* statement = root->statement;
+
+    HLSLStatement *statement = root->statement;
     while (statement != NULL) {
-        HLSLStatement* nextStatement = statement->nextStatement;
+        HLSLStatement *nextStatement = statement->nextStatement;
         statement->nextStatement = NULL;
 
         if (statement->nodeType == HLSLNodeType_Struct) {
             AppendToList(statement, structs, lastStruct);
         }
         else if (statement->nodeType == HLSLNodeType_Declaration ||
-                 statement->nodeType == HLSLNodeType_Buffer)
-        {
+                 statement->nodeType == HLSLNodeType_Buffer) {
             // There are cases where a struct uses a const array size,
             // so those need to be ordered prior to the struct.
-            if (statement->nodeType == HLSLNodeType_Declaration)
-            {
-                HLSLDeclaration* decl = (HLSLDeclaration *)statement;
-                
-                if (decl->type.flags & HLSLTypeFlag_Const)
-                {
+            if (statement->nodeType == HLSLNodeType_Declaration) {
+                HLSLDeclaration *decl = (HLSLDeclaration *)statement;
+
+                if (decl->type.flags & HLSLTypeFlag_Const) {
                     // this is a global scalar, so best to order first
-                    if (IsScalarType(decl->type.baseType))
-                    {
+                    if (IsScalarType(decl->type.baseType)) {
                         AppendToList(statement, constScalarDeclarations, lastConstScalarDeclaration);
                     }
-                    else
-                    {
+                    else {
                         AppendToList(statement, constDeclarations, lastConstDeclaration);
                     }
                 }
-                else
-                {
+                else {
                     AppendToList(statement, declarations, lastDeclaration);
                 }
             }
-            else if (statement->nodeType == HLSLNodeType_Buffer)
-            {
+            else if (statement->nodeType == HLSLNodeType_Buffer) {
                 AppendToList(statement, declarations, lastDeclaration);
             }
         }
-        else if (statement->nodeType == HLSLNodeType_Function)
-        {
+        else if (statement->nodeType == HLSLNodeType_Function) {
             AppendToList(statement, functions, lastFunction);
         }
-        else
-        {
+        else {
             AppendToList(statement, other, lastOther);
         }
 
@@ -1318,48 +1218,54 @@ void SortTree(HLSLTree * tree)
     }
 
     // Chain all the statements in the order that we want.
-    HLSLStatement * firstStatement = constScalarDeclarations;
-    HLSLStatement * lastStatement = lastConstScalarDeclaration;
+    HLSLStatement *firstStatement = constScalarDeclarations;
+    HLSLStatement *lastStatement = lastConstScalarDeclaration;
 
     if (structs != NULL) {
-        if (firstStatement == NULL) firstStatement = structs;
-        else lastStatement->nextStatement = structs;
+        if (firstStatement == NULL)
+            firstStatement = structs;
+        else
+            lastStatement->nextStatement = structs;
         lastStatement = lastStruct;
     }
-    
+
     if (constDeclarations != NULL) {
-        if (firstStatement == NULL) firstStatement = constDeclarations;
-        else lastStatement->nextStatement = constDeclarations;
+        if (firstStatement == NULL)
+            firstStatement = constDeclarations;
+        else
+            lastStatement->nextStatement = constDeclarations;
         lastStatement = lastConstDeclaration;
     }
 
     if (declarations != NULL) {
-        if (firstStatement == NULL) firstStatement = declarations;
-        else lastStatement->nextStatement = declarations;
+        if (firstStatement == NULL)
+            firstStatement = declarations;
+        else
+            lastStatement->nextStatement = declarations;
         lastStatement = lastDeclaration;
     }
 
     if (functions != NULL) {
-        if (firstStatement == NULL) firstStatement = functions;
-        else lastStatement->nextStatement = functions;
+        if (firstStatement == NULL)
+            firstStatement = functions;
+        else
+            lastStatement->nextStatement = functions;
         lastStatement = lastFunction;
     }
 
     if (other != NULL) {
-        if (firstStatement == NULL) firstStatement = other;
-        else lastStatement->nextStatement = other;
+        if (firstStatement == NULL)
+            firstStatement = other;
+        else
+            lastStatement->nextStatement = other;
         lastStatement = lastOther;
     }
 
     root->statement = firstStatement;
 }
 
-
-
-
-
 // First and last can be the same.
-void AddStatements(HLSLRoot * root, HLSLStatement * before, HLSLStatement * first, HLSLStatement * last)
+void AddStatements(HLSLRoot *root, HLSLStatement *before, HLSLStatement *first, HLSLStatement *last)
 {
     if (before == NULL) {
         last->nextStatement = root->statement;
@@ -1371,12 +1277,11 @@ void AddStatements(HLSLRoot * root, HLSLStatement * before, HLSLStatement * firs
     }
 }
 
-void AddSingleStatement(HLSLRoot * root, HLSLStatement * before, HLSLStatement * statement)
+void AddSingleStatement(HLSLRoot *root, HLSLStatement *before, HLSLStatement *statement)
 {
     AddStatements(root, before, statement, statement);
 }
 
-
 /* *X file releated
 // @@ This is very game-specific. Should be moved to pipeline_parser or somewhere else.
 void GroupParameters(HLSLTree * tree)
@@ -1400,7 +1305,7 @@ void GroupParameters(HLSLTree * tree)
     HLSLDeclaration * lastPerPassSampler = NULL;
 
     HLSLStatement * statementBeforeBuffers = NULL;
-    
+
     HLSLStatement* previousStatement = NULL;
     HLSLStatement* statement = root->statement;
     while (statement != NULL)
@@ -1408,7 +1313,7 @@ void GroupParameters(HLSLTree * tree)
         HLSLStatement* nextStatement = statement->nextStatement;
 
         if (statement->nodeType == HLSLNodeType_Struct) // Do not remove this, or it will mess the else clause below.
-        {   
+        {
             statementBeforeBuffers = statement;
         }
         else if (statement->nodeType == HLSLNodeType_Declaration)
@@ -1529,7 +1434,7 @@ void GroupParameters(HLSLTree * tree)
         perItemBuffer->name = tree->AddString("per_item");
         perItemBuffer->registerName = tree->AddString("b0");
         perItemBuffer->field = firstPerItemDeclaration;
-        
+
         // Set declaration buffer pointers.
         HLSLDeclaration * field = perItemBuffer->field;
         while (field != NULL)
@@ -1558,64 +1463,58 @@ void GroupParameters(HLSLTree * tree)
             field->buffer = perPassBuffer;
             field = (HLSLDeclaration *)field->nextStatement;
         }
-        
+
         // Add buffer to statements.
         AddSingleStatement(root, statementBeforeBuffers, perPassBuffer);
     }
 }
 */
 
-class FindArgumentVisitor : public HLSLTreeVisitor
-{
+class FindArgumentVisitor : public HLSLTreeVisitor {
 public:
     bool found;
-    const char * name;
+    const char *name;
 
     virtual ~FindArgumentVisitor() {}
-    
-	FindArgumentVisitor()
-	{
-		found = false;
-		name  = NULL;
-	}
-
-    bool FindArgument(const char * _name, HLSLFunction * function)
+
+    FindArgumentVisitor()
+    {
+        found = false;
+        name = NULL;
+    }
+
+    bool FindArgument(const char *_name, HLSLFunction *function)
     {
         found = false;
         name = _name;
         VisitStatements(function->statement);
         return found;
     }
-    
-    virtual void VisitStatements(HLSLStatement * statement) override
+
+    virtual void VisitStatements(HLSLStatement *statement) override
     {
-        while (statement != NULL && !found)
-        {
+        while (statement != NULL && !found) {
             VisitStatement(statement);
             statement = statement->nextStatement;
         }
     }
 
-    virtual void VisitIdentifierExpression(HLSLIdentifierExpression * node) override
+    virtual void VisitIdentifierExpression(HLSLIdentifierExpression *node) override
     {
-        if (node->name == name)
-        {
+        if (node->name == name) {
             found = true;
         }
     }
 };
 
-
-void HideUnusedArguments(HLSLFunction * function)
+void HideUnusedArguments(HLSLFunction *function)
 {
     FindArgumentVisitor visitor;
- 
+
     // For each argument.
-    HLSLArgument * arg = function->argument;
-    while (arg != NULL)
-    {
-        if (!visitor.FindArgument(arg->name, function))
-        {
+    HLSLArgument *arg = function->argument;
+    while (arg != NULL) {
+        if (!visitor.FindArgument(arg->name, function)) {
             arg->hidden = true;
         }
 
@@ -1623,30 +1522,31 @@ void HideUnusedArguments(HLSLFunction * function)
     }
 }
 
-bool NeedsFlattening(HLSLExpression * expr, int level = 0) {
+bool NeedsFlattening(HLSLExpression *expr, int level = 0)
+{
     if (expr == NULL) {
         return false;
     }
     if (expr->nodeType == HLSLNodeType_UnaryExpression) {
-        HLSLUnaryExpression * unaryExpr = (HLSLUnaryExpression *)expr;
-        return NeedsFlattening(unaryExpr->expression, level+1) || NeedsFlattening(expr->nextExpression, level);
+        HLSLUnaryExpression *unaryExpr = (HLSLUnaryExpression *)expr;
+        return NeedsFlattening(unaryExpr->expression, level + 1) || NeedsFlattening(expr->nextExpression, level);
     }
     else if (expr->nodeType == HLSLNodeType_BinaryExpression) {
-        HLSLBinaryExpression * binaryExpr = (HLSLBinaryExpression *)expr;
+        HLSLBinaryExpression *binaryExpr = (HLSLBinaryExpression *)expr;
         if (IsAssignOp(binaryExpr->binaryOp)) {
-            return NeedsFlattening(binaryExpr->expression2, level+1) || NeedsFlattening(expr->nextExpression, level);
+            return NeedsFlattening(binaryExpr->expression2, level + 1) || NeedsFlattening(expr->nextExpression, level);
         }
         else {
-            return NeedsFlattening(binaryExpr->expression1, level+1) || NeedsFlattening(binaryExpr->expression2, level+1) || NeedsFlattening(expr->nextExpression, level);
+            return NeedsFlattening(binaryExpr->expression1, level + 1) || NeedsFlattening(binaryExpr->expression2, level + 1) || NeedsFlattening(expr->nextExpression, level);
         }
     }
     else if (expr->nodeType == HLSLNodeType_ConditionalExpression) {
-        HLSLConditionalExpression * conditionalExpr = (HLSLConditionalExpression *)expr;
-        return NeedsFlattening(conditionalExpr->condition, level+1) || NeedsFlattening(conditionalExpr->trueExpression, level+1) || NeedsFlattening(conditionalExpr->falseExpression, level+1) || NeedsFlattening(expr->nextExpression, level);
+        HLSLConditionalExpression *conditionalExpr = (HLSLConditionalExpression *)expr;
+        return NeedsFlattening(conditionalExpr->condition, level + 1) || NeedsFlattening(conditionalExpr->trueExpression, level + 1) || NeedsFlattening(conditionalExpr->falseExpression, level + 1) || NeedsFlattening(expr->nextExpression, level);
     }
     else if (expr->nodeType == HLSLNodeType_CastingExpression) {
-        HLSLCastingExpression * castingExpr = (HLSLCastingExpression *)expr;
-        return NeedsFlattening(castingExpr->expression, level+1) || NeedsFlattening(expr->nextExpression, level);
+        HLSLCastingExpression *castingExpr = (HLSLCastingExpression *)expr;
+        return NeedsFlattening(castingExpr->expression, level + 1) || NeedsFlattening(expr->nextExpression, level);
     }
     else if (expr->nodeType == HLSLNodeType_LiteralExpression) {
         return NeedsFlattening(expr->nextExpression, level);
@@ -1655,24 +1555,24 @@ bool NeedsFlattening(HLSLExpression * expr, int level = 0) {
         return NeedsFlattening(expr->nextExpression, level);
     }
     else if (expr->nodeType == HLSLNodeType_ConstructorExpression) {
-        HLSLConstructorExpression * constructorExpr = (HLSLConstructorExpression *)expr;
-        return NeedsFlattening(constructorExpr->argument, level+1) || NeedsFlattening(expr->nextExpression, level);
+        HLSLConstructorExpression *constructorExpr = (HLSLConstructorExpression *)expr;
+        return NeedsFlattening(constructorExpr->argument, level + 1) || NeedsFlattening(expr->nextExpression, level);
     }
     else if (expr->nodeType == HLSLNodeType_MemberAccess) {
-        return NeedsFlattening(expr->nextExpression, level+1);
+        return NeedsFlattening(expr->nextExpression, level + 1);
     }
     else if (expr->nodeType == HLSLNodeType_ArrayAccess) {
-        HLSLArrayAccess * arrayAccess = (HLSLArrayAccess *)expr;
-        return NeedsFlattening(arrayAccess->array, level+1) || NeedsFlattening(arrayAccess->index, level+1) || NeedsFlattening(expr->nextExpression, level);
+        HLSLArrayAccess *arrayAccess = (HLSLArrayAccess *)expr;
+        return NeedsFlattening(arrayAccess->array, level + 1) || NeedsFlattening(arrayAccess->index, level + 1) || NeedsFlattening(expr->nextExpression, level);
     }
     else if (expr->nodeType == HLSLNodeType_FunctionCall) {
-        HLSLFunctionCall * functionCall = (HLSLFunctionCall *)expr;
+        HLSLFunctionCall *functionCall = (HLSLFunctionCall *)expr;
         if (functionCall->function->numOutputArguments > 0) {
             if (level > 0) {
                 return true;
             }
         }
-        return NeedsFlattening(functionCall->argument, level+1) || NeedsFlattening(expr->nextExpression, level);
+        return NeedsFlattening(functionCall->argument, level + 1) || NeedsFlattening(expr->nextExpression, level);
     }
     else {
         //assert(false);
@@ -1680,11 +1580,11 @@ bool NeedsFlattening(HLSLExpression * expr, int level = 0) {
     }
 }
 
-
 struct StatementList {
-    HLSLStatement * head = NULL;
-    HLSLStatement * tail = NULL;
-    void append(HLSLStatement * st) {
+    HLSLStatement *head = NULL;
+    HLSLStatement *tail = NULL;
+    void append(HLSLStatement *st)
+    {
         if (head == NULL) {
             tail = head = st;
         }
@@ -1693,292 +1593,285 @@ struct StatementList {
     }
 };
 
+class ExpressionFlattener : public HLSLTreeVisitor {
+public:
+    HLSLTree *m_tree;
+    int tmp_index;
+    HLSLStatement **statement_pointer;
+    HLSLFunction *current_function;
 
-    class ExpressionFlattener : public HLSLTreeVisitor
+    ExpressionFlattener()
     {
-    public:
-        HLSLTree * m_tree;
-        int tmp_index;
-        HLSLStatement ** statement_pointer;
-        HLSLFunction * current_function;
-        
-        ExpressionFlattener()
-        {
-            m_tree = NULL;
-            tmp_index = 0;
-            statement_pointer = NULL;
-            current_function = NULL;
-        }
-        virtual ~ExpressionFlattener() {}
-        
-        void FlattenExpressions(HLSLTree * tree)
-        {
-            m_tree = tree;
-            VisitRoot(tree->GetRoot());
+        m_tree = NULL;
+        tmp_index = 0;
+        statement_pointer = NULL;
+        current_function = NULL;
+    }
+    virtual ~ExpressionFlattener() {}
+
+    void FlattenExpressions(HLSLTree *tree)
+    {
+        m_tree = tree;
+        VisitRoot(tree->GetRoot());
+    }
+
+    // Visit all statements updating the statement_pointer so that we can insert and replace statements. @@ Add this to the default visitor?
+    virtual void VisitFunction(HLSLFunction *node) override
+    {
+        current_function = node;
+        statement_pointer = &node->statement;
+        VisitStatements(node->statement);
+        statement_pointer = NULL;
+        current_function = NULL;
+    }
+
+    virtual void VisitComment(HLSLComment *node) override
+    {
+        // TODO: do nothing?
+    }
+
+    virtual void VisitIfStatement(HLSLIfStatement *node) override
+    {
+        if (NeedsFlattening(node->condition, 1)) {
+            assert(false); // @@ Add statements before if statement.
         }
 
-        // Visit all statements updating the statement_pointer so that we can insert and replace statements. @@ Add this to the default visitor?
-        virtual void VisitFunction(HLSLFunction * node) override
-        {
-            current_function = node;
-            statement_pointer = &node->statement;
-            VisitStatements(node->statement);
-            statement_pointer = NULL;
-            current_function = NULL;
+        statement_pointer = &node->statement;
+        VisitStatements(node->statement);
+        if (node->elseStatement) {
+            statement_pointer = &node->elseStatement;
+            VisitStatements(node->elseStatement);
         }
+    }
 
-        virtual void VisitComment(HLSLComment * node) override
-        {
-            // TODO: do nothing?
+    virtual void VisitForStatement(HLSLForStatement *node) override
+    {
+        if (NeedsFlattening(node->initialization->assignment, 1)) {
+            assert(false); // @@ Add statements before for statement.
         }
-        
-        virtual void VisitIfStatement(HLSLIfStatement * node) override
-        {
-            if (NeedsFlattening(node->condition, 1)) {
-                assert(false);  // @@ Add statements before if statement.
-            }
-            
-            statement_pointer = &node->statement;
-            VisitStatements(node->statement);
-            if (node->elseStatement) {
-                statement_pointer = &node->elseStatement;
-                VisitStatements(node->elseStatement);
-            }
+        if (NeedsFlattening(node->condition, 1) || NeedsFlattening(node->increment, 1)) {
+            assert(false); // @@ These are tricky to implement. Need to handle all loop exits.
         }
-        
-        virtual void VisitForStatement(HLSLForStatement * node) override
-        {
-            if (NeedsFlattening(node->initialization->assignment, 1)) {
-                assert(false);  // @@ Add statements before for statement.
-            }
-            if (NeedsFlattening(node->condition, 1) || NeedsFlattening(node->increment, 1)) {
-                assert(false);  // @@ These are tricky to implement. Need to handle all loop exits.
-            }
 
-            statement_pointer = &node->statement;
-            VisitStatements(node->statement);
+        statement_pointer = &node->statement;
+        VisitStatements(node->statement);
+    }
+
+    virtual void VisitBlockStatement(HLSLBlockStatement *node) override
+    {
+        statement_pointer = &node->statement;
+        VisitStatements(node->statement);
+    }
+
+    virtual void VisitStatements(HLSLStatement *statement) override
+    {
+        while (statement != NULL) {
+            VisitStatement(statement);
+            statement_pointer = &statement->nextStatement;
+            statement = statement->nextStatement;
         }
-        
-        virtual void VisitBlockStatement(HLSLBlockStatement * node) override
-        {
-            statement_pointer = &node->statement;
-            VisitStatements(node->statement);
+    }
+
+    // This is usually a function call or assignment.
+    virtual void VisitExpressionStatement(HLSLExpressionStatement *node) override
+    {
+        if (NeedsFlattening(node->expression, 0)) {
+            StatementList statements;
+            Flatten(node->expression, statements, false);
+
+            // Link beginning of statement list.
+            *statement_pointer = statements.head;
+
+            // Link end of statement list.
+            HLSLStatement *tail = statements.tail;
+            tail->nextStatement = node->nextStatement;
+
+            // Update statement pointer.
+            statement_pointer = &tail->nextStatement;
+
+            // @@ Delete node?
         }
-        
-        virtual void VisitStatements(HLSLStatement * statement) override
-        {
-            while (statement != NULL) {
-                VisitStatement(statement);
-                statement_pointer = &statement->nextStatement;
-                statement = statement->nextStatement;
-            }
+    }
+
+    virtual void VisitDeclaration(HLSLDeclaration *node) override
+    {
+        // Skip global declarations.
+        if (statement_pointer == NULL) return;
+
+        if (NeedsFlattening(node->assignment, 1)) {
+            StatementList statements;
+            HLSLIdentifierExpression *ident = Flatten(node->assignment, statements, true);
+
+            // @@ Delete node->assignment?
+
+            node->assignment = ident;
+            statements.append(node);
+
+            // Link beginning of statement list.
+            *statement_pointer = statements.head;
+
+            // Link end of statement list.
+            HLSLStatement *tail = statements.tail;
+            tail->nextStatement = node->nextStatement;
+
+            // Update statement pointer.
+            statement_pointer = &tail->nextStatement;
         }
+    }
 
-        // This is usually a function call or assignment.
-        virtual void VisitExpressionStatement(HLSLExpressionStatement * node) override
-        {
-            if (NeedsFlattening(node->expression, 0))
-            {
-                StatementList statements;
-                Flatten(node->expression, statements, false);
-                
-                // Link beginning of statement list.
-                *statement_pointer = statements.head;
-
-                // Link end of statement list.
-                HLSLStatement * tail = statements.tail;
-                tail->nextStatement = node->nextStatement;
-                
-                // Update statement pointer.
-                statement_pointer = &tail->nextStatement;
-                
-                // @@ Delete node?
-            }
+    virtual void VisitReturnStatement(HLSLReturnStatement *node) override
+    {
+        if (NeedsFlattening(node->expression, 1)) {
+            StatementList statements;
+            HLSLIdentifierExpression *ident = Flatten(node->expression, statements, true);
+
+            // @@ Delete node->expression?
+
+            node->expression = ident;
+            statements.append(node);
+
+            // Link beginning of statement list.
+            *statement_pointer = statements.head;
+
+            // Link end of statement list.
+            HLSLStatement *tail = statements.tail;
+            tail->nextStatement = node->nextStatement;
+
+            // Update statement pointer.
+            statement_pointer = &tail->nextStatement;
         }
+    }
 
-        virtual void VisitDeclaration(HLSLDeclaration * node) override
-        {
-            // Skip global declarations.
-            if (statement_pointer == NULL) return;
-            
-            if (NeedsFlattening(node->assignment, 1))
-            {
-                StatementList statements;
-                HLSLIdentifierExpression * ident = Flatten(node->assignment, statements, true);
-                
-                // @@ Delete node->assignment?
-                
-                node->assignment = ident;
-                statements.append(node);
-                
-                // Link beginning of statement list.
-                *statement_pointer = statements.head;
-                
-                // Link end of statement list.
-                HLSLStatement * tail = statements.tail;
-                tail->nextStatement = node->nextStatement;
-                
-                // Update statement pointer.
-                statement_pointer = &tail->nextStatement;
-            }
+    HLSLDeclaration *BuildTemporaryDeclaration(HLSLExpression *expr)
+    {
+        assert(expr->expressionType.baseType != HLSLBaseType_Void);
+
+        HLSLDeclaration *declaration = m_tree->AddNode<HLSLDeclaration>(expr->fileName, expr->line);
+        declaration->name = m_tree->AddStringFormat("tmp%d", tmp_index++);
+        declaration->type = expr->expressionType;
+        declaration->assignment = expr;
+
+        //HLSLIdentifierExpression * ident = (HLSLIdentifierExpression *)expr;
+
+        return declaration;
+    }
+
+    HLSLExpressionStatement *BuildExpressionStatement(HLSLExpression *expr)
+    {
+        HLSLExpressionStatement *statement = m_tree->AddNode<HLSLExpressionStatement>(expr->fileName, expr->line);
+        statement->expression = expr;
+        return statement;
+    }
+
+    HLSLIdentifierExpression *AddExpressionStatement(HLSLExpression *expr, StatementList &statements, bool wantIdent)
+    {
+        if (wantIdent) {
+            HLSLDeclaration *declaration = BuildTemporaryDeclaration(expr);
+            statements.append(declaration);
+
+            HLSLIdentifierExpression *ident = m_tree->AddNode<HLSLIdentifierExpression>(expr->fileName, expr->line);
+            ident->name = declaration->name;
+            ident->expressionType = declaration->type;
+            return ident;
+        }
+        else {
+            HLSLExpressionStatement *statement = BuildExpressionStatement(expr);
+            statements.append(statement);
+            return NULL;
         }
+    }
 
-        virtual void VisitReturnStatement(HLSLReturnStatement * node) override
-        {
-            if (NeedsFlattening(node->expression, 1))
-            {
-                StatementList statements;
-                HLSLIdentifierExpression * ident = Flatten(node->expression, statements, true);
-
-                // @@ Delete node->expression?
-                
-                node->expression = ident;
-                statements.append(node);
-                
-                // Link beginning of statement list.
-                *statement_pointer = statements.head;
-                
-                // Link end of statement list.
-                HLSLStatement * tail = statements.tail;
-                tail->nextStatement = node->nextStatement;
-                
-                // Update statement pointer.
-                statement_pointer = &tail->nextStatement;
-            }
+    HLSLIdentifierExpression *Flatten(HLSLExpression *expr, StatementList &statements, bool wantIdent = true)
+    {
+        if (!NeedsFlattening(expr, wantIdent)) {
+            return AddExpressionStatement(expr, statements, wantIdent);
         }
 
-        
-        HLSLDeclaration * BuildTemporaryDeclaration(HLSLExpression * expr)
-        {
-            assert(expr->expressionType.baseType != HLSLBaseType_Void);
-            
-            HLSLDeclaration * declaration = m_tree->AddNode<HLSLDeclaration>(expr->fileName, expr->line);
-            declaration->name = m_tree->AddStringFormat("tmp%d", tmp_index++);
-            declaration->type = expr->expressionType;
-            declaration->assignment = expr;
-            
-            //HLSLIdentifierExpression * ident = (HLSLIdentifierExpression *)expr;
-            
-            return declaration;
-        }
-
-        HLSLExpressionStatement * BuildExpressionStatement(HLSLExpression * expr)
-        {
-            HLSLExpressionStatement * statement = m_tree->AddNode<HLSLExpressionStatement>(expr->fileName, expr->line);
-            statement->expression = expr;
-            return statement;
+        if (expr->nodeType == HLSLNodeType_UnaryExpression) {
+            assert(expr->nextExpression == NULL);
+
+            HLSLUnaryExpression *unaryExpr = (HLSLUnaryExpression *)expr;
+
+            HLSLIdentifierExpression *tmp = Flatten(unaryExpr->expression, statements, true);
+
+            HLSLUnaryExpression *newUnaryExpr = m_tree->AddNode<HLSLUnaryExpression>(unaryExpr->fileName, unaryExpr->line);
+            newUnaryExpr->unaryOp = unaryExpr->unaryOp;
+            newUnaryExpr->expression = tmp;
+            newUnaryExpr->expressionType = unaryExpr->expressionType;
+
+            return AddExpressionStatement(newUnaryExpr, statements, wantIdent);
         }
+        else if (expr->nodeType == HLSLNodeType_BinaryExpression) {
+            assert(expr->nextExpression == NULL);
 
-        HLSLIdentifierExpression * AddExpressionStatement(HLSLExpression * expr, StatementList & statements, bool wantIdent)
-        {
-            if (wantIdent) {
-                HLSLDeclaration * declaration = BuildTemporaryDeclaration(expr);
-                statements.append(declaration);
-                
-                HLSLIdentifierExpression * ident = m_tree->AddNode<HLSLIdentifierExpression>(expr->fileName, expr->line);
-                ident->name = declaration->name;
-                ident->expressionType = declaration->type;
-                return ident;
+            HLSLBinaryExpression *binaryExpr = (HLSLBinaryExpression *)expr;
+
+            if (IsAssignOp(binaryExpr->binaryOp)) {
+                // Flatten right hand side only.
+                HLSLIdentifierExpression *tmp2 = Flatten(binaryExpr->expression2, statements, true);
+
+                HLSLBinaryExpression *newBinaryExpr = m_tree->AddNode<HLSLBinaryExpression>(binaryExpr->fileName, binaryExpr->line);
+                newBinaryExpr->binaryOp = binaryExpr->binaryOp;
+                newBinaryExpr->expression1 = binaryExpr->expression1;
+                newBinaryExpr->expression2 = tmp2;
+                newBinaryExpr->expressionType = binaryExpr->expressionType;
+
+                return AddExpressionStatement(newBinaryExpr, statements, wantIdent);
             }
             else {
-                HLSLExpressionStatement * statement = BuildExpressionStatement(expr);
-                statements.append(statement);
-                return NULL;
+                HLSLIdentifierExpression *tmp1 = Flatten(binaryExpr->expression1, statements, true);
+                HLSLIdentifierExpression *tmp2 = Flatten(binaryExpr->expression2, statements, true);
+
+                HLSLBinaryExpression *newBinaryExpr = m_tree->AddNode<HLSLBinaryExpression>(binaryExpr->fileName, binaryExpr->line);
+                newBinaryExpr->binaryOp = binaryExpr->binaryOp;
+                newBinaryExpr->expression1 = tmp1;
+                newBinaryExpr->expression2 = tmp2;
+                newBinaryExpr->expressionType = binaryExpr->expressionType;
+
+                return AddExpressionStatement(newBinaryExpr, statements, wantIdent);
             }
         }
-        
-        HLSLIdentifierExpression * Flatten(HLSLExpression * expr, StatementList & statements, bool wantIdent = true)
-        {
-            if (!NeedsFlattening(expr, wantIdent)) {
-                return AddExpressionStatement(expr, statements, wantIdent);
-            }
-            
-            if (expr->nodeType == HLSLNodeType_UnaryExpression) {
-                assert(expr->nextExpression == NULL);
-                
-                HLSLUnaryExpression * unaryExpr = (HLSLUnaryExpression *)expr;
-                
-                HLSLIdentifierExpression * tmp = Flatten(unaryExpr->expression, statements, true);
-                
-                HLSLUnaryExpression * newUnaryExpr = m_tree->AddNode<HLSLUnaryExpression>(unaryExpr->fileName, unaryExpr->line);
-                newUnaryExpr->unaryOp = unaryExpr->unaryOp;
-                newUnaryExpr->expression = tmp;
-                newUnaryExpr->expressionType = unaryExpr->expressionType;
-
-                return AddExpressionStatement(newUnaryExpr, statements, wantIdent);
-            }
-            else if (expr->nodeType == HLSLNodeType_BinaryExpression) {
-                assert(expr->nextExpression == NULL);
-                
-                HLSLBinaryExpression * binaryExpr = (HLSLBinaryExpression *)expr;
-                
-                if (IsAssignOp(binaryExpr->binaryOp)) {
-                    // Flatten right hand side only.
-                    HLSLIdentifierExpression * tmp2 = Flatten(binaryExpr->expression2, statements, true);
-                    
-                    HLSLBinaryExpression * newBinaryExpr = m_tree->AddNode<HLSLBinaryExpression>(binaryExpr->fileName, binaryExpr->line);
-                    newBinaryExpr->binaryOp = binaryExpr->binaryOp;
-                    newBinaryExpr->expression1 = binaryExpr->expression1;
-                    newBinaryExpr->expression2 = tmp2;
-                    newBinaryExpr->expressionType = binaryExpr->expressionType;
-                    
-                    return AddExpressionStatement(newBinaryExpr, statements, wantIdent);
-                }
-                else {
-                    HLSLIdentifierExpression * tmp1 = Flatten(binaryExpr->expression1, statements, true);
-                    HLSLIdentifierExpression * tmp2 = Flatten(binaryExpr->expression2, statements, true);
-
-                    HLSLBinaryExpression * newBinaryExpr = m_tree->AddNode<HLSLBinaryExpression>(binaryExpr->fileName, binaryExpr->line);
-                    newBinaryExpr->binaryOp = binaryExpr->binaryOp;
-                    newBinaryExpr->expression1 = tmp1;
-                    newBinaryExpr->expression2 = tmp2;
-                    newBinaryExpr->expressionType = binaryExpr->expressionType;
-                    
-                    return AddExpressionStatement(newBinaryExpr, statements, wantIdent);
-                }
-            }
-            else if (expr->nodeType == HLSLNodeType_ConditionalExpression) {
-                assert(false);
-            }
-            else if (expr->nodeType == HLSLNodeType_CastingExpression) {
-                assert(false);
-            }
-            else if (expr->nodeType == HLSLNodeType_LiteralExpression) {
-                assert(false);
-            }
-            else if (expr->nodeType == HLSLNodeType_IdentifierExpression) {
-                assert(false);
-            }
-            else if (expr->nodeType == HLSLNodeType_ConstructorExpression) {
-                assert(false);
-            }
-            else if (expr->nodeType == HLSLNodeType_MemberAccess) {
-                assert(false);
-            }
-            else if (expr->nodeType == HLSLNodeType_ArrayAccess) {
-                assert(false);
-            }
-            else if (expr->nodeType == HLSLNodeType_FunctionCall) {
-                HLSLFunctionCall * functionCall = (HLSLFunctionCall *)expr;
-
-                // @@ Output function as is?
-                // @@ We have to flatten function arguments! This is tricky, need to handle input/output arguments.
-                assert(!NeedsFlattening(functionCall->argument));
-                
-                return AddExpressionStatement(expr, statements, wantIdent);
-            }
-            else {
-                assert(false);
-            }
-            return NULL;
+        else if (expr->nodeType == HLSLNodeType_ConditionalExpression) {
+            assert(false);
         }
-    };
+        else if (expr->nodeType == HLSLNodeType_CastingExpression) {
+            assert(false);
+        }
+        else if (expr->nodeType == HLSLNodeType_LiteralExpression) {
+            assert(false);
+        }
+        else if (expr->nodeType == HLSLNodeType_IdentifierExpression) {
+            assert(false);
+        }
+        else if (expr->nodeType == HLSLNodeType_ConstructorExpression) {
+            assert(false);
+        }
+        else if (expr->nodeType == HLSLNodeType_MemberAccess) {
+            assert(false);
+        }
+        else if (expr->nodeType == HLSLNodeType_ArrayAccess) {
+            assert(false);
+        }
+        else if (expr->nodeType == HLSLNodeType_FunctionCall) {
+            HLSLFunctionCall *functionCall = (HLSLFunctionCall *)expr;
+
+            // @@ Output function as is?
+            // @@ We have to flatten function arguments! This is tricky, need to handle input/output arguments.
+            assert(!NeedsFlattening(functionCall->argument));
+
+            return AddExpressionStatement(expr, statements, wantIdent);
+        }
+        else {
+            assert(false);
+        }
+        return NULL;
+    }
+};
 
-    
-void FlattenExpressions(HLSLTree* tree) {
+void FlattenExpressions(HLSLTree *tree)
+{
     ExpressionFlattener flattener;
     flattener.FlattenExpressions(tree);
 }
 
-} // M4
-
+} //namespace M4
diff --git a/hlslparser/src/HLSLTree.h b/hlslparser/src/HLSLTree.h
index 3ad0bd63..39956bac 100644
--- a/hlslparser/src/HLSLTree.h
+++ b/hlslparser/src/HLSLTree.h
@@ -1,41 +1,38 @@
 #pragma once
 
-#include "Engine.h"
-
 #include <new>
 
-namespace M4
-{
+#include "Engine.h"
 
-enum HLSLTarget
-{
+namespace M4 {
+
+enum HLSLTarget {
     HLSLTarget_VertexShader,
     HLSLTarget_PixelShader,
-    
+
     HLSLTarget_ComputeShader,
-    
+
     // none of these are portable to Metal/Android, they have own triangulation
     // HLSLTarget_GeometryShader,
     // HLSLTarget_HullShader,
     // HLSLTarget_ControlShader,
-    
+
     // This is compute prior to frag (combined vertex + geo state)
     // HLSLTarget_MeshShader,
 };
 
-enum HLSLNodeType
-{
+enum HLSLNodeType {
     HLSLNodeType_Root,
-    
+
     HLSLNodeType_Declaration,
     HLSLNodeType_Struct,
     HLSLNodeType_StructField,
     HLSLNodeType_Buffer,
     HLSLNodeType_BufferField, // TODO: or just ref structField
-    
+
     HLSLNodeType_Function,
     HLSLNodeType_Argument,
-    
+
     HLSLNodeType_ExpressionStatement,
     HLSLNodeType_Expression,
     HLSLNodeType_ReturnStatement,
@@ -56,7 +53,7 @@ enum HLSLNodeType
     HLSLNodeType_ArrayAccess,
     HLSLNodeType_FunctionCall,
     HLSLNodeType_MemberFunctionCall,
-    
+
     /* FX file stuff
     HLSLNodeType_StateAssignment,
     HLSLNodeType_SamplerState,
@@ -65,33 +62,32 @@ enum HLSLNodeType
     HLSLNodeType_Pipeline,
     HLSLNodeType_Stage,
     */
-    
+
     HLSLNodeType_Attribute,
     HLSLNodeType_Comment
 };
 
-enum HLSLBaseType
-{
+enum HLSLBaseType {
     HLSLBaseType_Unknown,
     HLSLBaseType_Void,
-    
+
     // float
     HLSLBaseType_Float,
     HLSLBaseType_Float2,
     HLSLBaseType_Float3,
     HLSLBaseType_Float4,
-	HLSLBaseType_Float2x2,
+    HLSLBaseType_Float2x2,
     HLSLBaseType_Float3x3,
     HLSLBaseType_Float4x4,
-    
+
     HLSLBaseType_Half,
     HLSLBaseType_Half2,
     HLSLBaseType_Half3,
     HLSLBaseType_Half4,
-	HLSLBaseType_Half2x2,
+    HLSLBaseType_Half2x2,
     HLSLBaseType_Half3x3,
     HLSLBaseType_Half4x4,
-    
+
     HLSLBaseType_Double,
     HLSLBaseType_Double2,
     HLSLBaseType_Double3,
@@ -99,43 +95,43 @@ enum HLSLBaseType
     HLSLBaseType_Double2x2,
     HLSLBaseType_Double3x3,
     HLSLBaseType_Double4x4,
-    
+
     // integer
     HLSLBaseType_Bool,
     HLSLBaseType_Bool2,
-	HLSLBaseType_Bool3,
-	HLSLBaseType_Bool4,
-    
+    HLSLBaseType_Bool3,
+    HLSLBaseType_Bool4,
+
     HLSLBaseType_Int,
     HLSLBaseType_Int2,
     HLSLBaseType_Int3,
     HLSLBaseType_Int4,
-    
+
     HLSLBaseType_Uint,
     HLSLBaseType_Uint2,
     HLSLBaseType_Uint3,
     HLSLBaseType_Uint4,
-    
+
     HLSLBaseType_Short,
     HLSLBaseType_Short2,
     HLSLBaseType_Short3,
     HLSLBaseType_Short4,
-    
+
     HLSLBaseType_Ushort,
     HLSLBaseType_Ushort2,
     HLSLBaseType_Ushort3,
     HLSLBaseType_Ushort4,
-    
+
     HLSLBaseType_Long,
     HLSLBaseType_Long2,
     HLSLBaseType_Long3,
     HLSLBaseType_Long4,
-    
+
     HLSLBaseType_Ulong,
     HLSLBaseType_Ulong2,
     HLSLBaseType_Ulong3,
     HLSLBaseType_Ulong4,
-    
+
     // Seems like these should be subtype of HLSLTexture, but
     // many of the intrinsics require a specific type of texture.
     // MSL has many more types, included depth vs. regular textures.
@@ -145,45 +141,44 @@ enum HLSLBaseType
     HLSLBaseType_Texture2DArray,
     HLSLBaseType_TextureCubeArray,
     HLSLBaseType_Texture2DMS,
-    
+
     HLSLBaseType_Depth2D,
     HLSLBaseType_Depth2DArray,
     HLSLBaseType_DepthCube,
     // TODO: add more depth types as needed (pair with SamplerComparisonState)
-    
+
     HLSLBaseType_RWTexture2D,
-    
+
     // Only 2 sampler types. - type is for defining state inside them
     HLSLBaseType_SamplerState,
     HLSLBaseType_SamplerComparisonState,
-    
-    HLSLBaseType_UserDefined,       // struct
-    HLSLBaseType_Expression,        // type argument for defined() sizeof() and typeof().
+
+    HLSLBaseType_UserDefined, // struct
+    HLSLBaseType_Expression, // type argument for defined() sizeof() and typeof().
     //HLSLBaseType_Auto,            // this wasn't hooked up
-    HLSLBaseType_Comment,           // single line comments optionally transferred to output
-    
+    HLSLBaseType_Comment, // single line comments optionally transferred to output
+
     // Buffer subtypes below
     HLSLBaseType_Buffer,
-    
+
     HLSLBaseType_Count,
-    
+
     // counts
     //HLSLBaseType_FirstNumeric = HLSLBaseType_Float,
     //HLSLBaseType_LastNumeric = HLSLBaseType_Ulong4,
-    
+
     //HLSLBaseType_FirstInteger = HLSLBaseType_Bool,
     //HLSLBaseType_LastInteger = HLSLBaseType_LastNumeric,
-   
+
     HLSLBaseType_NumericCount = HLSLBaseType_Ulong4 - HLSLBaseType_Float + 1
 };
-  
+
 // This a subtype to HLSLBaseType_Buffer
-enum HLSLBufferType
-{
+enum HLSLBufferType {
     // DX9
     HLSLBufferType_CBuffer,
     HLSLBufferType_TBuffer,
-    
+
     // DX10 templated types
     HLSLBufferType_ConstantBuffer, // indexable
     HLSLBufferType_StructuredBuffer,
@@ -192,18 +187,17 @@ enum HLSLBufferType
     HLSLBufferType_RWByteAddressBuffer
 };
 
-enum HLSLBinaryOp
-{
+enum HLSLBinaryOp {
     // bit ops
     HLSLBinaryOp_And,
     HLSLBinaryOp_Or,
-    
+
     // math ops
     HLSLBinaryOp_Add,
     HLSLBinaryOp_Sub,
     HLSLBinaryOp_Mul,
     HLSLBinaryOp_Div,
-    
+
     // comparison ops
     HLSLBinaryOp_Less,
     HLSLBinaryOp_Greater,
@@ -211,12 +205,12 @@ enum HLSLBinaryOp
     HLSLBinaryOp_GreaterEqual,
     HLSLBinaryOp_Equal,
     HLSLBinaryOp_NotEqual,
-    
+
     // bit ops
     HLSLBinaryOp_BitAnd,
     HLSLBinaryOp_BitOr,
     HLSLBinaryOp_BitXor,
-    
+
     // assign ops
     HLSLBinaryOp_Assign,
     HLSLBinaryOp_AddAssign,
@@ -225,60 +219,58 @@ enum HLSLBinaryOp
     HLSLBinaryOp_DivAssign,
 };
 
-inline bool IsCompareOp( HLSLBinaryOp op )
+inline bool IsCompareOp(HLSLBinaryOp op)
 {
-	return op == HLSLBinaryOp_Less ||
-		op == HLSLBinaryOp_Greater ||
-		op == HLSLBinaryOp_LessEqual ||
-		op == HLSLBinaryOp_GreaterEqual ||
-		op == HLSLBinaryOp_Equal ||
-		op == HLSLBinaryOp_NotEqual;
+    return op == HLSLBinaryOp_Less ||
+           op == HLSLBinaryOp_Greater ||
+           op == HLSLBinaryOp_LessEqual ||
+           op == HLSLBinaryOp_GreaterEqual ||
+           op == HLSLBinaryOp_Equal ||
+           op == HLSLBinaryOp_NotEqual;
 }
 
-inline bool IsArithmeticOp( HLSLBinaryOp op )
+inline bool IsArithmeticOp(HLSLBinaryOp op)
 {
     return op == HLSLBinaryOp_Add ||
-        op == HLSLBinaryOp_Sub ||
-        op == HLSLBinaryOp_Mul ||
-        op == HLSLBinaryOp_Div;
+           op == HLSLBinaryOp_Sub ||
+           op == HLSLBinaryOp_Mul ||
+           op == HLSLBinaryOp_Div;
 }
 
-inline bool IsLogicOp( HLSLBinaryOp op )
+inline bool IsLogicOp(HLSLBinaryOp op)
 {
     return op == HLSLBinaryOp_And ||
-        op == HLSLBinaryOp_Or;
+           op == HLSLBinaryOp_Or;
 }
 
-inline bool IsAssignOp( HLSLBinaryOp op )
+inline bool IsAssignOp(HLSLBinaryOp op)
 {
     return op == HLSLBinaryOp_Assign ||
-        op == HLSLBinaryOp_AddAssign ||
-        op == HLSLBinaryOp_SubAssign ||
-        op == HLSLBinaryOp_MulAssign ||
-        op == HLSLBinaryOp_DivAssign;
+           op == HLSLBinaryOp_AddAssign ||
+           op == HLSLBinaryOp_SubAssign ||
+           op == HLSLBinaryOp_MulAssign ||
+           op == HLSLBinaryOp_DivAssign;
 }
 
-inline bool IsBitOp( HLSLBinaryOp op )
+inline bool IsBitOp(HLSLBinaryOp op)
 {
     return op == HLSLBinaryOp_BitAnd ||
-        op == HLSLBinaryOp_BitOr ||
-        op == HLSLBinaryOp_BitXor;
+           op == HLSLBinaryOp_BitOr ||
+           op == HLSLBinaryOp_BitXor;
 }
-    
-enum HLSLUnaryOp
-{
-    HLSLUnaryOp_Negative,       // -x
-    HLSLUnaryOp_Positive,       // +x
-    HLSLUnaryOp_Not,            // !x
-    HLSLUnaryOp_PreIncrement,   // ++x
-    HLSLUnaryOp_PreDecrement,   // --x
-    HLSLUnaryOp_PostIncrement,  // x++
-    HLSLUnaryOp_PostDecrement,  // x++
-    HLSLUnaryOp_BitNot,         // ~x
+
+enum HLSLUnaryOp {
+    HLSLUnaryOp_Negative, // -x
+    HLSLUnaryOp_Positive, // +x
+    HLSLUnaryOp_Not, // !x
+    HLSLUnaryOp_PreIncrement, // ++x
+    HLSLUnaryOp_PreDecrement, // --x
+    HLSLUnaryOp_PostIncrement, // x++
+    HLSLUnaryOp_PostDecrement, // x++
+    HLSLUnaryOp_BitNot, // ~x
 };
 
-enum HLSLArgumentModifier
-{
+enum HLSLArgumentModifier {
     HLSLArgumentModifier_None,
     HLSLArgumentModifier_In,
     HLSLArgumentModifier_Out,
@@ -287,8 +279,7 @@ enum HLSLArgumentModifier
     HLSLArgumentModifier_Const,
 };
 
-enum HLSLTypeFlags
-{
+enum HLSLTypeFlags {
     HLSLTypeFlag_None = 0,
     HLSLTypeFlag_Const = 0x01,
     HLSLTypeFlag_Static = 0x02,
@@ -312,23 +303,21 @@ enum HLSLTypeFlags
     HLSLTypeFlag_NoPromote = 0x200000,
 };
 
-enum HLSLAttributeType
-{
+enum HLSLAttributeType {
     HLSLAttributeType_Unknown,
-    
+
     // TODO: a lot more attributes, these are loop attributes
     // f.e. specialization constant and numthreads for HLSL
     HLSLAttributeType_Unroll,
     HLSLAttributeType_Branch,
     HLSLAttributeType_Flatten,
     HLSLAttributeType_NoFastMath,
-    
+
 };
 
-enum HLSLAddressSpace
-{
+enum HLSLAddressSpace {
     HLSLAddressSpace_Undefined,
-    
+
     // These only apply to MSL
     HLSLAddressSpace_Constant,
     HLSLAddressSpace_Device,
@@ -338,7 +327,6 @@ enum HLSLAddressSpace
     // TODO: ThreadgroupImageblock
 };
 
-
 struct HLSLNode;
 struct HLSLRoot;
 struct HLSLStatement;
@@ -359,312 +347,278 @@ struct HLSLFunctionCall;
 struct HLSLArrayAccess;
 struct HLSLAttribute;
 
-struct HLSLType
-{
+struct HLSLType {
     explicit HLSLType(HLSLBaseType _baseType = HLSLBaseType_Unknown)
-    { 
-        baseType    = _baseType;
+    {
+        baseType = _baseType;
     }
     bool TestFlags(int flags_) const { return (flags & flags_) == flags_; }
-    
-    HLSLBaseType        baseType = HLSLBaseType_Unknown;
-    HLSLBaseType        formatType = HLSLBaseType_Float;    // Half or Float (only applies to templated params like buffer/texture)
-    const char*         typeName = NULL;       // For user defined types.
-    bool                array = false;
-    HLSLExpression*     arraySize = NULL; // can ref constant like NUM_LIGHTS
-    int                 flags = 0;
-    HLSLAddressSpace    addressSpace = HLSLAddressSpace_Undefined; // MSL mostly
+
+    HLSLBaseType baseType = HLSLBaseType_Unknown;
+    HLSLBaseType formatType = HLSLBaseType_Float; // Half or Float (only applies to templated params like buffer/texture)
+    const char* typeName = NULL; // For user defined types.
+    bool array = false;
+    HLSLExpression* arraySize = NULL; // can ref constant like NUM_LIGHTS
+    int flags = 0;
+    HLSLAddressSpace addressSpace = HLSLAddressSpace_Undefined; // MSL mostly
 };
 
 // Only Statment, Argument, StructField can be marked hidden.
 // But many elements like Buffer derive from Statement.
 
 /// Base class for all nodes in the HLSL AST
-struct HLSLNode
-{
-    HLSLNodeType        nodeType; // set to s_type
-    const char*         fileName = NULL;
-    int                 line = 0;
+struct HLSLNode {
+    HLSLNodeType nodeType; // set to s_type
+    const char* fileName = NULL;
+    int line = 0;
 };
 
-struct HLSLRoot : public HLSLNode
-{
+struct HLSLRoot : public HLSLNode {
     static const HLSLNodeType s_type = HLSLNodeType_Root;
-    HLSLStatement*      statement = NULL;          // First statement.
+    HLSLStatement* statement = NULL; // First statement.
 };
 
-struct HLSLStatement : public HLSLNode
-{
-    HLSLStatement*      nextStatement = NULL;      // Next statement in the block.
-    HLSLAttribute*      attributes = NULL;
-    
+struct HLSLStatement : public HLSLNode {
+    HLSLStatement* nextStatement = NULL; // Next statement in the block.
+    HLSLAttribute* attributes = NULL;
+
     // This allows tree pruning.  Marked true after traversing use in
-    mutable bool        hidden = false;
-    
+    mutable bool hidden = false;
+
     // This is marked as false at start, and multi endpoint traversal marks
     // when a global is already written, and next write is skipped.
-    mutable bool        written = false;
+    mutable bool written = false;
 };
 
 // [unroll]
-struct HLSLAttribute : public HLSLNode
-{
+struct HLSLAttribute : public HLSLNode {
     static const HLSLNodeType s_type = HLSLNodeType_Attribute;
-    HLSLAttributeType   attributeType = HLSLAttributeType_Unknown;
-    HLSLExpression*     argument = NULL;
-    HLSLAttribute*      nextAttribute = NULL;
+    HLSLAttributeType attributeType = HLSLAttributeType_Unknown;
+    HLSLExpression* argument = NULL;
+    HLSLAttribute* nextAttribute = NULL;
 };
 
-struct HLSLDeclaration : public HLSLStatement
-{
+struct HLSLDeclaration : public HLSLStatement {
     static const HLSLNodeType s_type = HLSLNodeType_Declaration;
-    const char*         name  = NULL;
-    HLSLType            type;
-    const char*         registerName  = NULL;       // @@ Store register index?
-    const char*         semantic  = NULL;
-    HLSLDeclaration*    nextDeclaration = NULL;    // If multiple variables declared on a line.
-    HLSLExpression*     assignment = NULL;
-    
-    HLSLBuffer*         buffer = NULL; // reference cbuffer for decl
+    const char* name = NULL;
+    HLSLType type;
+    const char* registerName = NULL; // @@ Store register index?
+    const char* semantic = NULL;
+    HLSLDeclaration* nextDeclaration = NULL; // If multiple variables declared on a line.
+    HLSLExpression* assignment = NULL;
+
+    HLSLBuffer* buffer = NULL; // reference cbuffer for decl
 };
 
-struct HLSLStruct : public HLSLStatement
-{
+struct HLSLStruct : public HLSLStatement {
     static const HLSLNodeType s_type = HLSLNodeType_Struct;
-    const char*         name = NULL;
-    HLSLStructField*    field = NULL;              // First field in the structure.
+    const char* name = NULL;
+    HLSLStructField* field = NULL; // First field in the structure.
 };
 
-struct HLSLStructField : public HLSLNode
-{
+struct HLSLStructField : public HLSLNode {
     static const HLSLNodeType s_type = HLSLNodeType_StructField;
-    const char*         name = NULL;
-    HLSLType            type;
-    const char*         semantic = NULL;
-    const char*         sv_semantic = NULL;
-    HLSLStructField*    nextField = NULL;      // Next field in the structure.
-    bool                hidden = false;
+    const char* name = NULL;
+    HLSLType type;
+    const char* semantic = NULL;
+    const char* sv_semantic = NULL;
+    HLSLStructField* nextField = NULL; // Next field in the structure.
+    bool hidden = false;
 };
 
 /// Buffer declaration.
-struct HLSLBuffer : public HLSLStatement
-{
+struct HLSLBuffer : public HLSLStatement {
     // These spill a ton of globals throughout shader
     bool IsGlobalFields() const
     {
-        return  bufferType == HLSLBufferType_CBuffer ||
-                bufferType == HLSLBufferType_TBuffer;
+        return bufferType == HLSLBufferType_CBuffer ||
+               bufferType == HLSLBufferType_TBuffer;
     }
-    
+
     // DX changes registers for read-only vs. read-write buffers (SRV vs. UAV)
     // so constant/cbuffer use b, structured/byte use t (like textures),
     // and read-write use u.  MSL only has u and
     bool IsReadOnly() const
     {
-        return  bufferType == HLSLBufferType_CBuffer ||
-                bufferType == HLSLBufferType_TBuffer ||
-                bufferType == HLSLBufferType_ConstantBuffer ||
-                bufferType == HLSLBufferType_StructuredBuffer ||
-                bufferType == HLSLBufferType_ByteAddressBuffer;
+        return bufferType == HLSLBufferType_CBuffer ||
+               bufferType == HLSLBufferType_TBuffer ||
+               bufferType == HLSLBufferType_ConstantBuffer ||
+               bufferType == HLSLBufferType_StructuredBuffer ||
+               bufferType == HLSLBufferType_ByteAddressBuffer;
     }
-    
+
     static const HLSLNodeType s_type = HLSLNodeType_Buffer;
-    const char*         name = NULL;
-    const char*         registerName = NULL;
-    HLSLDeclaration*    field = NULL;
-    HLSLBufferType      bufferType = HLSLBufferType_CBuffer;
-    HLSLStruct*         bufferStruct = NULL;
+    const char* name = NULL;
+    const char* registerName = NULL;
+    HLSLDeclaration* field = NULL;
+    HLSLBufferType bufferType = HLSLBufferType_CBuffer;
+    HLSLStruct* bufferStruct = NULL;
 };
 
-
 /// Function declaration
-struct HLSLFunction : public HLSLStatement
-{
+struct HLSLFunction : public HLSLStatement {
     static const HLSLNodeType s_type = HLSLNodeType_Function;
-    const char*         name  = NULL;
-    HLSLType            returnType;
-    HLSLBaseType        memberType = HLSLBaseType_Unknown; // for sampler members, must also look at GetScalarType(returnType)
-    const char*         semantic  = NULL;
-    const char*         sv_semantic = NULL;
-    int                 numArguments = 0;
-    int                 numOutputArguments = 0;     // Includes out and inout arguments.
-    HLSLArgument*       argument = NULL;
-    HLSLStatement*      statement = NULL;
-    HLSLFunction*       forward = NULL; // Which HLSLFunction this one forward-declares
-    
+    const char* name = NULL;
+    HLSLType returnType;
+    HLSLBaseType memberType = HLSLBaseType_Unknown; // for sampler members, must also look at GetScalarType(returnType)
+    const char* semantic = NULL;
+    const char* sv_semantic = NULL;
+    int numArguments = 0;
+    int numOutputArguments = 0; // Includes out and inout arguments.
+    HLSLArgument* argument = NULL;
+    HLSLStatement* statement = NULL;
+    HLSLFunction* forward = NULL; // Which HLSLFunction this one forward-declares
+
     bool IsMemberFunction() const { return memberType != HLSLBaseType_Unknown; }
 };
 
 /// Declaration of an argument to a function.
-struct HLSLArgument : public HLSLNode
-{
+struct HLSLArgument : public HLSLNode {
     static const HLSLNodeType s_type = HLSLNodeType_Argument;
-    const char*             name = NULL;
-    HLSLArgumentModifier    modifier = HLSLArgumentModifier_None;
-    HLSLType                type;
-    const char*             semantic = NULL;
-    const char*             sv_semantic = NULL;
-    HLSLExpression*         defaultValue = NULL;
-    HLSLArgument*           nextArgument = NULL;
-    bool                    hidden = false;
+    const char* name = NULL;
+    HLSLArgumentModifier modifier = HLSLArgumentModifier_None;
+    HLSLType type;
+    const char* semantic = NULL;
+    const char* sv_semantic = NULL;
+    HLSLExpression* defaultValue = NULL;
+    HLSLArgument* nextArgument = NULL;
+    bool hidden = false;
 };
 
 /// A expression which forms a complete statement.
-struct HLSLExpressionStatement : public HLSLStatement
-{
+struct HLSLExpressionStatement : public HLSLStatement {
     static const HLSLNodeType s_type = HLSLNodeType_ExpressionStatement;
-    HLSLExpression*     expression = NULL;
+    HLSLExpression* expression = NULL;
 };
 
-struct HLSLReturnStatement : public HLSLStatement
-{
+struct HLSLReturnStatement : public HLSLStatement {
     static const HLSLNodeType s_type = HLSLNodeType_ReturnStatement;
-    HLSLExpression*     expression = NULL;
+    HLSLExpression* expression = NULL;
 };
 
-struct HLSLDiscardStatement : public HLSLStatement
-{
+struct HLSLDiscardStatement : public HLSLStatement {
     static const HLSLNodeType s_type = HLSLNodeType_DiscardStatement;
 };
 
-struct HLSLBreakStatement : public HLSLStatement
-{
+struct HLSLBreakStatement : public HLSLStatement {
     static const HLSLNodeType s_type = HLSLNodeType_BreakStatement;
 };
 
-struct HLSLContinueStatement : public HLSLStatement
-{
+struct HLSLContinueStatement : public HLSLStatement {
     static const HLSLNodeType s_type = HLSLNodeType_ContinueStatement;
 };
 
-struct HLSLIfStatement : public HLSLStatement
-{
+struct HLSLIfStatement : public HLSLStatement {
     static const HLSLNodeType s_type = HLSLNodeType_IfStatement;
-    HLSLExpression*     condition = NULL;
-    HLSLStatement*      statement = NULL;
-    HLSLStatement*      elseStatement = NULL;
-    bool                isStatic = false;
+    HLSLExpression* condition = NULL;
+    HLSLStatement* statement = NULL;
+    HLSLStatement* elseStatement = NULL;
+    bool isStatic = false;
 };
 
-struct HLSLForStatement : public HLSLStatement
-{
+struct HLSLForStatement : public HLSLStatement {
     static const HLSLNodeType s_type = HLSLNodeType_ForStatement;
-    HLSLDeclaration*    initialization = NULL;
-    HLSLExpression*     condition = NULL;
-    HLSLExpression*     increment = NULL;
-    HLSLStatement*      statement = NULL;
+    HLSLDeclaration* initialization = NULL;
+    HLSLExpression* condition = NULL;
+    HLSLExpression* increment = NULL;
+    HLSLStatement* statement = NULL;
 };
 
-struct HLSLBlockStatement : public HLSLStatement
-{
+struct HLSLBlockStatement : public HLSLStatement {
     static const HLSLNodeType s_type = HLSLNodeType_BlockStatement;
-    HLSLStatement*      statement = NULL;
+    HLSLStatement* statement = NULL;
 };
 
-
 /// Base type for all types of expressions.
-struct HLSLExpression : public HLSLNode
-{
+struct HLSLExpression : public HLSLNode {
     static const HLSLNodeType s_type = HLSLNodeType_Expression;
-    HLSLType            expressionType;
-    HLSLExpression*     nextExpression = NULL; // Used when the expression is part of a list, like in a function call.
+    HLSLType expressionType;
+    HLSLExpression* nextExpression = NULL; // Used when the expression is part of a list, like in a function call.
 };
 
 // -a
-struct HLSLUnaryExpression : public HLSLExpression
-{
+struct HLSLUnaryExpression : public HLSLExpression {
     static const HLSLNodeType s_type = HLSLNodeType_UnaryExpression;
-    HLSLUnaryOp         unaryOp = {};
-    HLSLExpression*     expression = NULL;
+    HLSLUnaryOp unaryOp = {};
+    HLSLExpression* expression = NULL;
 };
 
 /// a + b
-struct HLSLBinaryExpression : public HLSLExpression
-{
+struct HLSLBinaryExpression : public HLSLExpression {
     static const HLSLNodeType s_type = HLSLNodeType_BinaryExpression;
-    HLSLBinaryOp        binaryOp = {};
-    HLSLExpression*     expression1 = NULL;
-    HLSLExpression*     expression2 = NULL;
+    HLSLBinaryOp binaryOp = {};
+    HLSLExpression* expression1 = NULL;
+    HLSLExpression* expression2 = NULL;
 };
 
 /// ? : construct
-struct HLSLConditionalExpression : public HLSLExpression
-{
+struct HLSLConditionalExpression : public HLSLExpression {
     static const HLSLNodeType s_type = HLSLNodeType_ConditionalExpression;
-    HLSLExpression*     condition = NULL;
-    HLSLExpression*     trueExpression = NULL;
-    HLSLExpression*     falseExpression = NULL;
+    HLSLExpression* condition = NULL;
+    HLSLExpression* trueExpression = NULL;
+    HLSLExpression* falseExpression = NULL;
 };
 
 /// v = (half4)v2
-struct HLSLCastingExpression : public HLSLExpression
-{
+struct HLSLCastingExpression : public HLSLExpression {
     static const HLSLNodeType s_type = HLSLNodeType_CastingExpression;
-    HLSLType            type;
-    HLSLExpression*     expression = NULL;
+    HLSLType type;
+    HLSLExpression* expression = NULL;
 };
 
 /// Float, integer, boolean, etc. literal constant.
-struct HLSLLiteralExpression : public HLSLExpression
-{
+struct HLSLLiteralExpression : public HLSLExpression {
     static const HLSLNodeType s_type = HLSLNodeType_LiteralExpression;
-    HLSLBaseType        type = HLSLBaseType_Unknown;   // Note, not all types can be literals.
-    union
-    {
-        bool            bValue;
-        float           fValue;
-        int32_t         iValue;
+    HLSLBaseType type = HLSLBaseType_Unknown; // Note, not all types can be literals.
+    union {
+        bool bValue;
+        float fValue;
+        int32_t iValue;
     };
 };
 
 /// An identifier, typically a variable name or structure field name.
-struct HLSLIdentifierExpression : public HLSLExpression
-{
+struct HLSLIdentifierExpression : public HLSLExpression {
     static const HLSLNodeType s_type = HLSLNodeType_IdentifierExpression;
-    const char*         name = NULL;
-    bool                global = false; // This is a global variable.
+    const char* name = NULL;
+    bool global = false; // This is a global variable.
 };
 
 /// float2(1, 2)
-struct HLSLConstructorExpression : public HLSLExpression
-{
+struct HLSLConstructorExpression : public HLSLExpression {
     static const HLSLNodeType s_type = HLSLNodeType_ConstructorExpression;
-	HLSLType            type;
-    HLSLExpression*     argument = NULL;
+    HLSLType type;
+    HLSLExpression* argument = NULL;
 };
 
 /// object.member input.member or input[10].member
-struct HLSLMemberAccess : public HLSLExpression
-{
+struct HLSLMemberAccess : public HLSLExpression {
     static const HLSLNodeType s_type = HLSLNodeType_MemberAccess;
-	HLSLExpression*     object = NULL;
-    const char*         field = NULL;
-    bool                swizzle = false;
+    HLSLExpression* object = NULL;
+    const char* field = NULL;
+    bool swizzle = false;
 };
 
 /// array[index]
-struct HLSLArrayAccess : public HLSLExpression
-{
+struct HLSLArrayAccess : public HLSLExpression {
     static const HLSLNodeType s_type = HLSLNodeType_ArrayAccess;
-	HLSLExpression*     array = NULL;
-    HLSLExpression*     index = NULL;
+    HLSLExpression* array = NULL;
+    HLSLExpression* index = NULL;
 };
 
 /// c-style foo(arg1, arg2) - args can have defaults that are parsed
-struct HLSLFunctionCall : public HLSLExpression
-{
+struct HLSLFunctionCall : public HLSLExpression {
     static const HLSLNodeType s_type = HLSLNodeType_FunctionCall;
     const HLSLFunction* function = NULL;
-    HLSLExpression*     argument = NULL;
-    int                 numArguments = 0;
+    HLSLExpression* argument = NULL;
+    int numArguments = 0;
 };
 
 // TODO: finish adding this for texture and buffer ops
 /// c++ style member.foo(arg1, arg2)
-struct HLSLMemberFunctionCall : public HLSLFunctionCall
-{
+struct HLSLMemberFunctionCall : public HLSLFunctionCall {
     static const HLSLNodeType s_type = HLSLNodeType_MemberFunctionCall;
-    
+
     // could be buffer, texture, raytrace
     const HLSLIdentifierExpression* memberIdentifier = NULL;
 };
@@ -737,18 +691,14 @@ struct HLSLStage : public HLSLStatement
 */
 #endif
 
-struct HLSLComment : public HLSLStatement
-{
+struct HLSLComment : public HLSLStatement {
     static const HLSLNodeType s_type = HLSLNodeType_Comment;
-    const char*             text = NULL;
+    const char* text = NULL;
 };
 
 /// Abstract syntax tree for parsed HLSL code.
-class HLSLTree
-{
-
+class HLSLTree {
 public:
-
     explicit HLSLTree(Allocator* allocator);
     ~HLSLTree();
 
@@ -767,116 +717,108 @@ class HLSLTree
     T* AddNode(const char* fileName, int line)
     {
         HLSLNode* node = new (AllocateMemory(sizeof(T))) T();
-        node->nodeType  = T::s_type;
-        node->fileName  = fileName;
-        node->line      = line;
+        node->nodeType = T::s_type;
+        node->fileName = fileName;
+        node->line = line;
         return static_cast<T*>(node);
     }
 
-    HLSLFunction * FindFunction(const char * name);
-    HLSLDeclaration * FindGlobalDeclaration(const char * name, HLSLBuffer ** buffer_out = NULL);
-    
-    HLSLStruct * FindGlobalStruct(const char * name);
-    HLSLBuffer * FindBuffer(const char * name);
+    HLSLFunction* FindFunction(const char* name);
+    HLSLDeclaration* FindGlobalDeclaration(const char* name, HLSLBuffer** buffer_out = NULL);
 
-// FX files
-//    HLSLTechnique * FindTechnique(const char * name);
-//    HLSLPipeline * FindFirstPipeline();
-//    HLSLPipeline * FindNextPipeline(HLSLPipeline * current);
-//    HLSLPipeline * FindPipeline(const char * name);
- 
-    bool GetExpressionValue(HLSLExpression * expression, int & value);
-    int GetExpressionValue(HLSLExpression * expression, float values[4]);
+    HLSLStruct* FindGlobalStruct(const char* name);
+    HLSLBuffer* FindBuffer(const char* name);
 
-    bool NeedsFunction(const char * name);
+    // FX files
+    //    HLSLTechnique * FindTechnique(const char * name);
+    //    HLSLPipeline * FindFirstPipeline();
+    //    HLSLPipeline * FindNextPipeline(HLSLPipeline * current);
+    //    HLSLPipeline * FindPipeline(const char * name);
 
-private:
+    bool GetExpressionValue(HLSLExpression* expression, int& value);
+    int GetExpressionValue(HLSLExpression* expression, float values[4]);
 
-    void* AllocateMemory(size_t size);
-    void  AllocatePage();
+    bool NeedsFunction(const char* name);
 
 private:
+    void* AllocateMemory(size_t size);
+    void AllocatePage();
 
+private:
     static const size_t s_nodePageSize = 1024 * 4;
 
-    struct NodePage
-    {
-        NodePage*   next;
-        char        buffer[s_nodePageSize];
+    struct NodePage {
+        NodePage* next;
+        char buffer[s_nodePageSize];
     };
 
-    Allocator*      m_allocator;
-    StringPool      m_stringPool;
-    HLSLRoot*       m_root;
-
-    NodePage*       m_firstPage;
-    NodePage*       m_currentPage;
-    size_t          m_currentPageOffset;
+    Allocator* m_allocator;
+    StringPool m_stringPool;
+    HLSLRoot* m_root;
 
+    NodePage* m_firstPage;
+    NodePage* m_currentPage;
+    size_t m_currentPageOffset;
 };
 
-
-
-class HLSLTreeVisitor
-{
+class HLSLTreeVisitor {
 public:
     virtual ~HLSLTreeVisitor() {}
-    virtual void VisitType(HLSLType & type);
-
-    virtual void VisitRoot(HLSLRoot * node);
-    virtual void VisitTopLevelStatement(HLSLStatement * node);
-    virtual void VisitStatements(HLSLStatement * statement);
-    virtual void VisitStatement(HLSLStatement * node);
-    virtual void VisitDeclaration(HLSLDeclaration * node);
-    virtual void VisitStruct(HLSLStruct * node);
-    virtual void VisitStructField(HLSLStructField * node);
-    virtual void VisitBuffer(HLSLBuffer * node);
+    virtual void VisitType(HLSLType& type);
+
+    virtual void VisitRoot(HLSLRoot* node);
+    virtual void VisitTopLevelStatement(HLSLStatement* node);
+    virtual void VisitStatements(HLSLStatement* statement);
+    virtual void VisitStatement(HLSLStatement* node);
+    virtual void VisitDeclaration(HLSLDeclaration* node);
+    virtual void VisitStruct(HLSLStruct* node);
+    virtual void VisitStructField(HLSLStructField* node);
+    virtual void VisitBuffer(HLSLBuffer* node);
     //virtual void VisitBufferField(HLSLBufferField * node); // TODO:
-    virtual void VisitFunction(HLSLFunction * node);
-    virtual void VisitArgument(HLSLArgument * node);
-    virtual void VisitExpressionStatement(HLSLExpressionStatement * node);
-    virtual void VisitExpression(HLSLExpression * node);
-    virtual void VisitReturnStatement(HLSLReturnStatement * node);
-    virtual void VisitDiscardStatement(HLSLDiscardStatement * node);
-    virtual void VisitBreakStatement(HLSLBreakStatement * node);
-    virtual void VisitContinueStatement(HLSLContinueStatement * node);
-    virtual void VisitIfStatement(HLSLIfStatement * node);
-    virtual void VisitForStatement(HLSLForStatement * node);
-    virtual void VisitBlockStatement(HLSLBlockStatement * node);
-    virtual void VisitUnaryExpression(HLSLUnaryExpression * node);
-    virtual void VisitBinaryExpression(HLSLBinaryExpression * node);
-    virtual void VisitConditionalExpression(HLSLConditionalExpression * node);
-    virtual void VisitCastingExpression(HLSLCastingExpression * node);
-    virtual void VisitLiteralExpression(HLSLLiteralExpression * node);
-    virtual void VisitIdentifierExpression(HLSLIdentifierExpression * node);
-    virtual void VisitConstructorExpression(HLSLConstructorExpression * node);
-    virtual void VisitMemberAccess(HLSLMemberAccess * node);
-    virtual void VisitArrayAccess(HLSLArrayAccess * node);
-    virtual void VisitFunctionCall(HLSLFunctionCall * node);
-    
-    virtual void VisitComment(HLSLComment * node);
-
-    virtual void VisitFunctions(HLSLRoot * root);
-    virtual void VisitParameters(HLSLRoot * root);
-
-    HLSLFunction * FindFunction(HLSLRoot * root, const char * name);
-    HLSLDeclaration * FindGlobalDeclaration(HLSLRoot * root, const char * name);
-    HLSLStruct * FindGlobalStruct(HLSLRoot * root, const char * name);
-    
+    virtual void VisitFunction(HLSLFunction* node);
+    virtual void VisitArgument(HLSLArgument* node);
+    virtual void VisitExpressionStatement(HLSLExpressionStatement* node);
+    virtual void VisitExpression(HLSLExpression* node);
+    virtual void VisitReturnStatement(HLSLReturnStatement* node);
+    virtual void VisitDiscardStatement(HLSLDiscardStatement* node);
+    virtual void VisitBreakStatement(HLSLBreakStatement* node);
+    virtual void VisitContinueStatement(HLSLContinueStatement* node);
+    virtual void VisitIfStatement(HLSLIfStatement* node);
+    virtual void VisitForStatement(HLSLForStatement* node);
+    virtual void VisitBlockStatement(HLSLBlockStatement* node);
+    virtual void VisitUnaryExpression(HLSLUnaryExpression* node);
+    virtual void VisitBinaryExpression(HLSLBinaryExpression* node);
+    virtual void VisitConditionalExpression(HLSLConditionalExpression* node);
+    virtual void VisitCastingExpression(HLSLCastingExpression* node);
+    virtual void VisitLiteralExpression(HLSLLiteralExpression* node);
+    virtual void VisitIdentifierExpression(HLSLIdentifierExpression* node);
+    virtual void VisitConstructorExpression(HLSLConstructorExpression* node);
+    virtual void VisitMemberAccess(HLSLMemberAccess* node);
+    virtual void VisitArrayAccess(HLSLArrayAccess* node);
+    virtual void VisitFunctionCall(HLSLFunctionCall* node);
+
+    virtual void VisitComment(HLSLComment* node);
+
+    virtual void VisitFunctions(HLSLRoot* root);
+    virtual void VisitParameters(HLSLRoot* root);
+
+    HLSLFunction* FindFunction(HLSLRoot* root, const char* name);
+    HLSLDeclaration* FindGlobalDeclaration(HLSLRoot* root, const char* name);
+    HLSLStruct* FindGlobalStruct(HLSLRoot* root, const char* name);
+
     // These are fx file constructs
-//    virtual void VisitStateAssignment(HLSLStateAssignment * node);
-//    virtual void VisitSamplerState(HLSLSamplerState * node);
-//    virtual void VisitPass(HLSLPass * node);
-//    virtual void VisitTechnique(HLSLTechnique * node);
-//    virtual void VisitPipeline(HLSLPipeline * node);
+    // virtual void VisitStateAssignment(HLSLStateAssignment * node);
+    // virtual void VisitSamplerState(HLSLSamplerState * node);
+    // virtual void VisitPass(HLSLPass * node);
+    // virtual void VisitTechnique(HLSLTechnique * node);
+    // virtual void VisitPipeline(HLSLPipeline * node);
 };
 
-
 // Tree transformations:
 extern void PruneTree(HLSLTree* tree, const char* entryName0, const char* entryName1 = NULL);
 extern void SortTree(HLSLTree* tree);
 //extern void GroupParameters(HLSLTree* tree);
-extern void HideUnusedArguments(HLSLFunction * function);
+extern void HideUnusedArguments(HLSLFunction* function);
 extern void FlattenExpressions(HLSLTree* tree);
-    
-} // M4
+
+} //namespace M4
diff --git a/hlslparser/src/MSLGenerator.cpp b/hlslparser/src/MSLGenerator.cpp
index 9d1dc56d..faffb254 100644
--- a/hlslparser/src/MSLGenerator.cpp
+++ b/hlslparser/src/MSLGenerator.cpp
@@ -9,12 +9,12 @@
 
 #include "MSLGenerator.h"
 
+#include <string.h>
+
 #include "Engine.h"
 #include "HLSLParser.h"
 #include "HLSLTree.h"
 
-#include <string.h>
-
 // MSL limitations:
 // - Some type conversions and constructors don't work exactly the same way. For example, casts to smaller size vectors are not alloweed in C++. @@ Add more details...
 // - Swizzles on scalar types, whether or not it expands them. a_float.x, a_float.xxxx both cause compile errors.
@@ -25,14 +25,12 @@
 // - No support for boolean vectors and logical operators involving vectors. This is not just in metal.
 // - No support for non-float texture types
 
-namespace M4
-{
+namespace M4 {
 static void ParseSemantic(const char* semantic, uint32_t* outputLength, uint32_t* outputIndex)
 {
     const char* semanticIndex = semantic;
 
-    while (*semanticIndex && !isdigit(*semanticIndex))
-    {
+    while (*semanticIndex && !isdigit(*semanticIndex)) {
         semanticIndex++;
     }
 
@@ -43,36 +41,29 @@ static void ParseSemantic(const char* semantic, uint32_t* outputLength, uint32_t
 // Parse register name and advance next register index.
 static int ParseRegister(const char* registerName, int& nextRegister)
 {
-    if (!registerName)
-    {
+    if (!registerName) {
         return nextRegister++;
     }
 
     // skip over the u/b/t register prefix
-    while (*registerName && !isdigit(*registerName))
-    {
+    while (*registerName && !isdigit(*registerName)) {
         registerName++;
     }
 
-    if (!*registerName)
-    {
+    if (!*registerName) {
         return nextRegister++;
     }
 
     // parse the number
     int result = atoi(registerName);
 
-    if (nextRegister <= result)
-    {
+    if (nextRegister <= result) {
         nextRegister = result + 1;
     }
 
     return result;
 }
 
-
-
-
 MSLGenerator::MSLGenerator()
 {
     m_tree = NULL;
@@ -92,8 +83,7 @@ void MSLGenerator::Error(const char* format, ...) const
     // It's not always convenient to stop executing when an error occurs,
     // so just track once we've hit an error and stop reporting them until
     // we successfully bail out of execution.
-    if (m_error)
-    {
+    if (m_error) {
         return;
     }
     m_error = true;
@@ -106,36 +96,33 @@ void MSLGenerator::Error(const char* format, ...) const
 
 inline void MSLGenerator::AddClassArgument(ClassArgument* arg)
 {
-    if (m_firstClassArgument == NULL)
-    {
+    if (m_firstClassArgument == NULL) {
         m_firstClassArgument = arg;
     }
-    else
-    {
+    else {
         m_lastClassArgument->nextArg = arg;
     }
     m_lastClassArgument = arg;
 }
 
-
 void MSLGenerator::Prepass(HLSLTree* tree, HLSLTarget target, HLSLFunction* entryFunction)
 {
     // Hide unused arguments. @@ It would be good to do this in the other generators too.
-    
+
     // PruneTree resets hidden flags to true, then marks visible elements
     // based on whether entry point visits them.
     PruneTree(tree, entryFunction->name); // Note: takes second entry
-    
+
     // This sorts tree by type, but keeps ordering
     SortTree(tree);
-   
+
     // This strips any unused inputs to the entry point function
     HideUnusedArguments(entryFunction);
-    
+
     // Note sure if/where to add these calls.  Just wanted to point
     // out that nothing is calling them, but could be useful.
     FlattenExpressions(tree);
-    
+
     HLSLRoot* root = tree->GetRoot();
     HLSLStatement* statement = root->statement;
     ASSERT(m_firstClassArgument == NULL);
@@ -146,56 +133,50 @@ void MSLGenerator::Prepass(HLSLTree* tree, HLSLTarget target, HLSLFunction* entr
     int nextSamplerRegister = 0;
     int nextBufferRegister = 0;
 
-    while (statement != NULL)
-    {
-        if (statement->hidden)
-        {
+    while (statement != NULL) {
+        if (statement->hidden) {
             statement = statement->nextStatement;
             continue;
         }
-        
-        if (statement->nodeType == HLSLNodeType_Declaration)
-        {
+
+        if (statement->nodeType == HLSLNodeType_Declaration) {
             HLSLDeclaration* declaration = (HLSLDeclaration*)statement;
 
-            if (IsTextureType(declaration->type))
-            {
-                const char * textureName = declaration->name;
-                
+            if (IsTextureType(declaration->type)) {
+                const char* textureName = declaration->name;
+
                 int textureRegister = ParseRegister(declaration->registerName, nextTextureRegister);
-                 const char * textureRegisterName = m_tree->AddStringFormat("texture(%d)", textureRegister);
+                const char* textureRegisterName = m_tree->AddStringFormat("texture(%d)", textureRegister);
 
                 if (declaration->type.addressSpace == HLSLAddressSpace_Undefined)
                     declaration->type.addressSpace = HLSLAddressSpace_Device;
-                
+
                 AddClassArgument(new ClassArgument(textureName, declaration->type, textureRegisterName, true));
             }
-            else if (IsSamplerType(declaration->type))
-            {
-                const char * samplerName = declaration->name;
-                
+            else if (IsSamplerType(declaration->type)) {
+                const char* samplerName = declaration->name;
+
                 int samplerRegister = ParseRegister(declaration->registerName, nextSamplerRegister);
-                const char * samplerRegisterName = m_tree->AddStringFormat("sampler(%d)", samplerRegister);
-                
+                const char* samplerRegisterName = m_tree->AddStringFormat("sampler(%d)", samplerRegister);
+
                 if (declaration->type.addressSpace == HLSLAddressSpace_Undefined)
                     declaration->type.addressSpace = HLSLAddressSpace_Device;
-                
+
                 AddClassArgument(new ClassArgument(samplerName, declaration->type, samplerRegisterName, true));
             }
         }
-        else if (statement->nodeType == HLSLNodeType_Buffer)
-        {
-            HLSLBuffer * buffer = (HLSLBuffer *)statement;
-            
+        else if (statement->nodeType == HLSLNodeType_Buffer) {
+            HLSLBuffer* buffer = (HLSLBuffer*)statement;
+
             HLSLType type(HLSLBaseType_UserDefined);
-            
+
             // TODO: on cbuffer is a ubo, not tbuffer, or others
             // TODO: this is having to rename due to globals
             if (buffer->IsGlobalFields())
                 type.typeName = m_tree->AddStringFormat("%s_ubo", buffer->name);
             else
                 type.typeName = m_tree->AddStringFormat("%s", buffer->bufferStruct->name);
-            
+
             // TODO: ConstantBuffer can use ptr notation, detect array decl
             bool isRef = buffer->bufferType == HLSLBufferType_ConstantBuffer ||
                          buffer->IsGlobalFields();
@@ -204,10 +185,10 @@ void MSLGenerator::Prepass(HLSLTree* tree, HLSLTarget target, HLSLFunction* entr
                 type.addressSpace = HLSLAddressSpace_Constant;
             else
                 type.addressSpace = HLSLAddressSpace_Device;
-            
+
             int bufferRegister = ParseRegister(buffer->registerName, nextBufferRegister) + m_options.bufferRegisterOffset;
 
-            const char * bufferRegisterName = m_tree->AddStringFormat("buffer(%d)", bufferRegister);
+            const char* bufferRegisterName = m_tree->AddStringFormat("buffer(%d)", bufferRegister);
 
             AddClassArgument(new ClassArgument(buffer->name, type, bufferRegisterName, isRef));
         }
@@ -220,58 +201,45 @@ void MSLGenerator::Prepass(HLSLTree* tree, HLSLTarget target, HLSLFunction* entr
 
     // Translate semantics.
     HLSLArgument* argument = entryFunction->argument;
-    while (argument != NULL)
-    {
-        if (argument->hidden)
-        {
+    while (argument != NULL) {
+        if (argument->hidden) {
             argument = argument->nextArgument;
             continue;
         }
 
-        if (argument->modifier == HLSLArgumentModifier_Out)
-        {
+        if (argument->modifier == HLSLArgumentModifier_Out) {
             // Translate output arguments semantics.
-            if (argument->type.baseType == HLSLBaseType_UserDefined)
-            {
+            if (argument->type.baseType == HLSLBaseType_UserDefined) {
                 // Our vertex input is a struct and its fields need to be tagged when we generate that
                 HLSLStruct* structure = tree->FindGlobalStruct(argument->type.typeName);
-                if (structure == NULL)
-                {
+                if (structure == NULL) {
                     Error("Vertex shader output struct '%s' not found in shader\n", argument->type.typeName);
                 }
 
                 HLSLStructField* field = structure->field;
-                while (field != NULL)
-                {
-                    if (!field->hidden)
-                    {
+                while (field != NULL) {
+                    if (!field->hidden) {
                         field->sv_semantic = TranslateOutputSemantic(field->semantic);
                     }
                     field = field->nextField;
                 }
             }
-            else
-            {
+            else {
                 argument->sv_semantic = TranslateOutputSemantic(argument->semantic);
             }
         }
-        else
-        {
+        else {
             // Translate input arguments semantics.
-            if (argument->type.baseType == HLSLBaseType_UserDefined)
-            {
+            if (argument->type.baseType == HLSLBaseType_UserDefined) {
                 // Our vertex input is a struct and its fields need to be tagged when we generate that
                 HLSLStruct* structure = tree->FindGlobalStruct(argument->type.typeName);
-                if (structure == NULL)
-                {
+                if (structure == NULL) {
                     Error("Vertex shader input struct '%s' not found in shader\n", argument->type.typeName);
                 }
 
                 HLSLStructField* field = structure->field;
-                while (field != NULL)
-                {
-                    if (!field->hidden)
-                    {
+                while (field != NULL) {
+                    if (!field->hidden) {
                         field->sv_semantic = TranslateInputSemantic(field->semantic);
 
                         // Force type to uint.
@@ -288,8 +256,7 @@ void MSLGenerator::Prepass(HLSLTree* tree, HLSLTarget target, HLSLFunction* entr
                     field = field->nextField;
                 }
             }
-            else
-            {
+            else {
                 argument->sv_semantic = TranslateInputSemantic(argument->semantic);
 
                 // Force type to uint.
@@ -304,29 +271,23 @@ void MSLGenerator::Prepass(HLSLTree* tree, HLSLTarget target, HLSLFunction* entr
     }
 
     // Translate return value semantic.
-    if (entryFunction->returnType.baseType != HLSLBaseType_Void)
-    {
-        if (entryFunction->returnType.baseType == HLSLBaseType_UserDefined)
-        {
+    if (entryFunction->returnType.baseType != HLSLBaseType_Void) {
+        if (entryFunction->returnType.baseType == HLSLBaseType_UserDefined) {
             // Our vertex input is a struct and its fields need to be tagged when we generate that
             HLSLStruct* structure = tree->FindGlobalStruct(entryFunction->returnType.typeName);
-            if (structure == NULL)
-            {
+            if (structure == NULL) {
                 Error("Vertex shader output struct '%s' not found in shader\n", entryFunction->returnType.typeName);
             }
 
             HLSLStructField* field = structure->field;
-            while (field != NULL)
-            {
-                if (!field->hidden)
-                {
+            while (field != NULL) {
+                if (!field->hidden) {
                     field->sv_semantic = TranslateOutputSemantic(field->semantic);
                 }
                 field = field->nextField;
             }
         }
-        else
-        {
+        else {
             entryFunction->sv_semantic = TranslateOutputSemantic(entryFunction->semantic);
 
             //Error("MSL only supports COLOR semantic in return \n", entryFunction->returnType.typeName);
@@ -337,8 +298,7 @@ void MSLGenerator::Prepass(HLSLTree* tree, HLSLTarget target, HLSLFunction* entr
 void MSLGenerator::CleanPrepass()
 {
     ClassArgument* currentArg = m_firstClassArgument;
-    while (currentArg != NULL)
-    {
+    while (currentArg != NULL) {
         ClassArgument* nextArg = currentArg->nextArg;
         delete currentArg;
         currentArg = nextArg;
@@ -352,7 +312,7 @@ void MSLGenerator::PrependDeclarations()
 {
     // Any special function stubs we need go here
     // That includes special constructors to emulate HLSL not being strict
-    
+
     //Branch internally to HLSL vs. MSL verision
     m_writer.WriteLine(0, "#include \"ShaderMSL.h\"");
 }
@@ -360,30 +320,32 @@ void MSLGenerator::PrependDeclarations()
 // Any reference or pointer must be qualified with address space in MSL
 const char* MSLGenerator::GetAddressSpaceName(HLSLBaseType baseType, HLSLAddressSpace addressSpace) const
 {
-    if (IsSamplerType(baseType))
-    {
+    if (IsSamplerType(baseType)) {
         return "thread";
     }
-    if (IsTextureType(baseType))
-    {
+    if (IsTextureType(baseType)) {
         return "thread";
     }
 
     // buffers also need to handle readonly (constant and const device) vs.
     // readwrite (device).
-    
-    switch(addressSpace)
-    {
-        case HLSLAddressSpace_Constant: return "constant";
-        case HLSLAddressSpace_Device: return "device";
-        case HLSLAddressSpace_Thread: return "thread";
-        case HLSLAddressSpace_Shared: return "shared";
-        //case HLSLAddressSpace_Threadgroup:  return "threadgroup_local";
-        //case HLSLAddressSpace_ThreadgroupImageblock: return "threadgroup_imageblock");
-            
-        case HLSLAddressSpace_Undefined: break;
-    }
-    
+
+    switch (addressSpace) {
+        case HLSLAddressSpace_Constant:
+            return "constant";
+        case HLSLAddressSpace_Device:
+            return "device";
+        case HLSLAddressSpace_Thread:
+            return "thread";
+        case HLSLAddressSpace_Shared:
+            return "shared";
+            //case HLSLAddressSpace_Threadgroup:  return "threadgroup_local";
+            //case HLSLAddressSpace_ThreadgroupImageblock: return "threadgroup_imageblock");
+
+        case HLSLAddressSpace_Undefined:
+            break;
+    }
+
     Error("Unknown address space");
     return "";
 }
@@ -396,7 +358,7 @@ bool MSLGenerator::Generate(HLSLTree* tree, HLSLTarget target, const char* entry
     m_tree = tree;
     m_target = target;
     m_entryName = entryName;
-    
+
     m_options = options;
     m_writer.SetWriteFileLine(options.writeFileLine);
 
@@ -404,8 +366,7 @@ bool MSLGenerator::Generate(HLSLTree* tree, HLSLTarget target, const char* entry
 
     // Find entry point function
     HLSLFunction* entryFunction = tree->FindFunction(entryName);
-    if (entryFunction == NULL)
-    {
+    if (entryFunction == NULL) {
         Error("Entry point '%s' doesn't exist\n", entryName);
         return false;
     }
@@ -423,7 +384,7 @@ bool MSLGenerator::Generate(HLSLTree* tree, HLSLTarget target, const char* entry
     // Uniforms are then passed to the constructor and copied to member variables.
     std::string shaderClassNameStr = entryName;
     shaderClassNameStr += "NS"; // to distinguish from function
-    
+
     const char* shaderClassName = shaderClassNameStr.c_str();
     m_writer.WriteLine(0, "struct %s {", shaderClassName);
 
@@ -434,50 +395,45 @@ bool MSLGenerator::Generate(HLSLTree* tree, HLSLTarget target, const char* entry
     m_writer.BeginLine(1);
 
     m_writer.Write("%s(", shaderClassName);
-    
+
     // mod
     int indent = m_writer.EndLine();
-    m_writer.BeginLine(indent+1); // 1 more level for params
-    
+    m_writer.BeginLine(indent + 1); // 1 more level for params
+
     const ClassArgument* currentArg = m_firstClassArgument;
-    while (currentArg != NULL)
-    {
+    while (currentArg != NULL) {
         m_writer.Write("%s ", GetAddressSpaceName(currentArg->type.baseType, currentArg->type.addressSpace));
-        
+
         // ref vs. ptr
         bool isRef = currentArg->isRef;
-        
+
         m_writer.Write("%s %s %s", GetTypeName(currentArg->type, /*exactType=*/true), isRef ? "&" : "*", currentArg->name);
 
         currentArg = currentArg->nextArg;
-        if (currentArg)
-        {
+        if (currentArg) {
             m_writer.Write(", ");
-            
+
             // mod
             indent = m_writer.EndLine();
             m_writer.BeginLine(indent);
         }
     }
     m_writer.Write(")");
-    
+
     // mod
     indent = m_writer.EndLine();
     m_writer.BeginLine(indent);
-    
+
     currentArg = m_firstClassArgument;
-    if (currentArg)
-    {
+    if (currentArg) {
         m_writer.Write(" : ");
     }
-    while (currentArg != NULL)
-    {
+    while (currentArg != NULL) {
         m_writer.Write("%s(%s)", currentArg->name, currentArg->name);
         currentArg = currentArg->nextArg;
-        if (currentArg)
-        {
+        if (currentArg) {
             m_writer.Write(", ");
-            
+
             // mod
             indent = m_writer.EndLine();
             m_writer.BeginLine(indent);
@@ -487,14 +443,12 @@ bool MSLGenerator::Generate(HLSLTree* tree, HLSLTarget target, const char* entry
 
     m_writer.WriteLine(0, "};"); // Class
 
-
     // Generate real entry point, the one called by Metal
     m_writer.WriteLine(0, "");
 
     // If function return value has a non-color output semantic, declare a temporary struct for the output.
     bool wrapReturnType = false;
-    if (entryFunction->sv_semantic != NULL && !String_Equal(entryFunction->sv_semantic, "color(0)"))
-    {
+    if (entryFunction->sv_semantic != NULL && !String_Equal(entryFunction->sv_semantic, "color(0)")) {
         wrapReturnType = true;
 
         m_writer.WriteLine(0, "struct %s_output { %s tmp [[%s]]; };", entryName, GetTypeName(entryFunction->returnType, /*exactType=*/true), entryFunction->sv_semantic);
@@ -502,7 +456,6 @@ bool MSLGenerator::Generate(HLSLTree* tree, HLSLTarget target, const char* entry
         m_writer.WriteLine(0, "");
     }
 
-
     m_writer.BeginLine(0);
 
     // @@ Add/Translate function attributes.
@@ -512,115 +465,103 @@ bool MSLGenerator::Generate(HLSLTree* tree, HLSLTarget target, const char* entry
     // MSL doesn't seem to have this, set from code?
     // if (m_target == HLSLTarget_ComputeShader)
     //     m_writer.WriteLine(indent, "[numthreads(1,1,1)]");
-    
-    switch(m_target)
-    {
-        case HLSLTarget_VertexShader:   m_writer.Write("vertex "); break;
-        case HLSLTarget_PixelShader:    m_writer.Write("fragment "); break;
-        case HLSLTarget_ComputeShader:  m_writer.Write("kernel "); break;
+
+    switch (m_target) {
+        case HLSLTarget_VertexShader:
+            m_writer.Write("vertex ");
+            break;
+        case HLSLTarget_PixelShader:
+            m_writer.Write("fragment ");
+            break;
+        case HLSLTarget_ComputeShader:
+            m_writer.Write("kernel ");
+            break;
     }
 
     // Return type.
-    if (wrapReturnType)
-    {
+    if (wrapReturnType) {
         m_writer.Write("%s_output", entryName);
     }
-    else
-    {
-        if (entryFunction->returnType.baseType == HLSLBaseType_UserDefined)
-        {
+    else {
+        if (entryFunction->returnType.baseType == HLSLBaseType_UserDefined) {
             // Alec removing namespaced structs
             // m_writer.Write("%s::", shaderClassName);
         }
         m_writer.Write("%s", GetTypeName(entryFunction->returnType, /*exactType=*/true));
     }
-    
+
     m_writer.Write(" %s(", entryName);
 
     // Alec added for readability
     indent = m_writer.EndLine();
-    
-    m_writer.BeginLine(indent+1); // indent more
-    
+
+    m_writer.BeginLine(indent + 1); // indent more
+
     //--------------------
     // This is the class taking in arguments
-    
+
     int argumentCount = 0;
     HLSLArgument* argument = entryFunction->argument;
-    while (argument != NULL)
-    {
-        if (argument->hidden)
-        {
+    while (argument != NULL) {
+        if (argument->hidden) {
             argument = argument->nextArgument;
             continue;
         }
-        
-        if (argument->type.baseType == HLSLBaseType_UserDefined)
-        {
+
+        if (argument->type.baseType == HLSLBaseType_UserDefined) {
             //TODO: aled removing namespaced structs
             //m_writer.Write("%s::", shaderClassName);
         }
         m_writer.Write("%s %s", GetTypeName(argument->type, /*exactType=*/true), argument->name);
 
         // @@ IC: We are assuming that the first argument is the 'stage_in'.
-        if (argument->type.baseType == HLSLBaseType_UserDefined && argument == entryFunction->argument)
-        {
+        if (argument->type.baseType == HLSLBaseType_UserDefined && argument == entryFunction->argument) {
             m_writer.Write(" [[stage_in]]");
         }
-        else if (argument->sv_semantic)
-        {
+        else if (argument->sv_semantic) {
             m_writer.Write(" [[%s]]", argument->sv_semantic);
         }
-        
+
         argumentCount++;
-        
+
         argument = argument->nextArgument;
-        if (argument && !argument->hidden)
-        {
+        if (argument && !argument->hidden) {
             m_writer.Write(", ");
-            
+
             // Alec added for readability
             indent = m_writer.EndLine();
             m_writer.BeginLine(indent);
         }
-        
-        
     }
 
     // These are additional inputs/outputs not [[stage_in]]
-    
+
     currentArg = m_firstClassArgument;
-    if (argumentCount && currentArg != NULL)
-    {
+    if (argumentCount && currentArg != NULL) {
         m_writer.Write(",");
-        
+
         // Alec added for readability
         indent = m_writer.EndLine();
         m_writer.BeginLine(indent);
-        
     }
-    while (currentArg != NULL)
-    {
-        if (currentArg->type.baseType == HLSLBaseType_UserDefined)
-        {
+    while (currentArg != NULL) {
+        if (currentArg->type.baseType == HLSLBaseType_UserDefined) {
             bool isRef = currentArg->isRef;
-           
+
             m_writer.Write("%s %s %s %s [[%s]]", GetAddressSpaceName(currentArg->type.baseType, currentArg->type.addressSpace),
-              // shaderClassName,
-             currentArg->type.typeName, isRef ? "&" : "*", currentArg->name,
+                           // shaderClassName,
+                           currentArg->type.typeName, isRef ? "&" : "*", currentArg->name,
                            currentArg->registerName);
         }
-        else
-        {
+        else {
             m_writer.Write("%s %s [[%s]]", GetTypeName(currentArg->type, /*exactType=*/true), currentArg->name, currentArg->registerName);
         }
 
         currentArg = currentArg->nextArg;
-        if (currentArg)
-        {
+        if (currentArg) {
             m_writer.Write(", ");
         }
-        
+
         // Alec added for readability
         indent = m_writer.EndLine();
         m_writer.BeginLine(indent);
@@ -633,18 +574,15 @@ bool MSLGenerator::Generate(HLSLTree* tree, HLSLTarget target, const char* entry
     m_writer.Write("%s %s", shaderClassName, entryName);
 
     currentArg = m_firstClassArgument;
-    if (currentArg)
-    {
+    if (currentArg) {
         m_writer.Write("(");
 
-        while (currentArg != NULL)
-        {
+        while (currentArg != NULL) {
             m_writer.Write("%s", currentArg->name);
             currentArg = currentArg->nextArg;
-            if (currentArg)
-            {
+            if (currentArg) {
                 m_writer.Write(", ");
-                
+
                 // indent = m_writer.EndLine();
                 // m_writer.BeginLine(indent);
             }
@@ -656,33 +594,27 @@ bool MSLGenerator::Generate(HLSLTree* tree, HLSLTarget target, const char* entry
 
     m_writer.BeginLine(1);
 
-    if (wrapReturnType)
-    {
+    if (wrapReturnType) {
         m_writer.Write("%s_output output; output.tmp = %s.%s(", entryName, entryName, entryName);
     }
-    else
-    {
+    else {
         m_writer.Write("return %s.%s(", entryName, entryName);
     }
 
     argument = entryFunction->argument;
-    while (argument != NULL)
-    {
-        if (!argument->hidden)
-        {
+    while (argument != NULL) {
+        if (!argument->hidden) {
             m_writer.Write("%s", argument->name);
         }
         argument = argument->nextArgument;
-        if (argument && !argument->hidden)
-        {
+        if (argument && !argument->hidden) {
             m_writer.Write(", ");
         }
     }
 
     m_writer.EndLine(");");
 
-    if (wrapReturnType)
-    {
+    if (wrapReturnType) {
         m_writer.WriteLine(1, "return output;");
     }
 
@@ -703,22 +635,18 @@ const char* MSLGenerator::GetResult() const
 
 void MSLGenerator::OutputStaticDeclarations(int indent, HLSLStatement* statement)
 {
-    while (statement != NULL)
-    {
-        if (statement->hidden)
-        {
+    while (statement != NULL) {
+        if (statement->hidden) {
             statement = statement->nextStatement;
             continue;
         }
 
         // write struct/buffer outside of the namespace struct
-        if (statement->nodeType == HLSLNodeType_Struct)
-        {
-            if (!statement->written)
-            {
+        if (statement->nodeType == HLSLNodeType_Struct) {
+            if (!statement->written) {
                 HLSLStruct* structure = static_cast<HLSLStruct*>(statement);
                 OutputStruct(indent, structure);
-                
+
                 // skipped for multi-entrypoint
                 statement->written = true;
             }
@@ -730,44 +658,39 @@ void MSLGenerator::OutputStaticDeclarations(int indent, HLSLStatement* statement
             {
                 HLSLBuffer* buffer = static_cast<HLSLBuffer*>(statement);
                 OutputBuffer(indent, buffer);
-                
+
                 // skipped for multi-entrypoint
                 statement->written = true;
             }
         }
         */
-        
-        else if (statement->nodeType == HLSLNodeType_Declaration)
-        {
+
+        else if (statement->nodeType == HLSLNodeType_Declaration) {
             HLSLDeclaration* declaration = static_cast<HLSLDeclaration*>(statement);
 
             const HLSLType& type = declaration->type;
 
-            if (type.TestFlags(HLSLTypeFlag_Const | HLSLTypeFlag_Static))
-            {
-                if (!declaration->written)
-                {
+            if (type.TestFlags(HLSLTypeFlag_Const | HLSLTypeFlag_Static)) {
+                if (!declaration->written) {
                     m_writer.BeginLine(indent, declaration->fileName, declaration->line);
                     OutputDeclaration(declaration);
                     m_writer.EndLine(";");
-                    
+
                     // skipped for multi-entrypoint
                     declaration->written = true;
                 }
-                
+
                 // TODO: sure this is needed, or does written handle it
                 // hide declaration from subsequent passes
                 declaration->hidden = true;
             }
         }
-        else if (statement->nodeType == HLSLNodeType_Function)
-        {
+        else if (statement->nodeType == HLSLNodeType_Function) {
             HLSLFunction* function = static_cast<HLSLFunction*>(statement);
-            
-            if (!function->forward)
-            {
+
+            if (!function->forward) {
                 OutputStaticDeclarations(indent, function->statement);
-                
+
                 // skipped for multi-entrypoint
                 //function->written = true;
             }
@@ -783,29 +706,27 @@ bool MSLGenerator::CanSkipWrittenStatement(const HLSLStatement* statement) const
 
     // only write these once for multi-entrypoint
     if (statement->nodeType == HLSLNodeType_Comment ||
-         // statement->nodeType == HLSLNodeType_Buffer ||
-         statement->nodeType == HLSLNodeType_Struct)
+        // statement->nodeType == HLSLNodeType_Buffer ||
+        statement->nodeType == HLSLNodeType_Struct)
         return true;
 
     // only write const scalars out once, so they don't conflict
-    if (statement->nodeType == HLSLNodeType_Declaration)
-    {
+    if (statement->nodeType == HLSLNodeType_Declaration) {
         const HLSLDeclaration* decl = (const HLSLDeclaration*)statement;
-        if (IsScalarType(decl->type.baseType) && decl->type.flags & HLSLTypeFlag_Const)
-        {
+        if (IsScalarType(decl->type.baseType) && decl->type.flags & HLSLTypeFlag_Const) {
             return true;
         }
     }
 
     // TODO: all functions are currently thrown into the namespace class
     // so can't yet strip them.
-    
+
     // Helper functions should be skipped once written out
-//    if (statement->nodeType == HLSLNodeType_Function)
-//    {
-//        return true;
-//    }
-    
+    // if (statement->nodeType == HLSLNodeType_Function)
+    // {
+    //     return true;
+    // }
+
     return false;
 }
 
@@ -813,101 +734,82 @@ bool MSLGenerator::CanSkipWrittenStatement(const HLSLStatement* statement) const
 void MSLGenerator::OutputStatements(int indent, HLSLStatement* statement)
 {
     // Main generator loop: called recursively
-    while (statement != NULL)
-    {
+    while (statement != NULL) {
         // skip pruned statements
-        if (statement->hidden)
-        {
+        if (statement->hidden) {
             statement = statement->nextStatement;
             continue;
         }
-        
+
         // skip writing across multiple entry points
         // skip writing some types across multiple entry points
-        if (CanSkipWrittenStatement(statement))
-        {
+        if (CanSkipWrittenStatement(statement)) {
             statement = statement->nextStatement;
             continue;
         }
         statement->written = true;
-        
+
         OutputAttributes(indent, statement->attributes);
-        
-        if (statement->nodeType == HLSLNodeType_Comment)
-        {
+
+        if (statement->nodeType == HLSLNodeType_Comment) {
             HLSLComment* comment = static_cast<HLSLComment*>(statement);
             m_writer.WriteLine(indent, "//%s", comment->text);
         }
-        else if (statement->nodeType == HLSLNodeType_Declaration)
-        {
+        else if (statement->nodeType == HLSLNodeType_Declaration) {
             HLSLDeclaration* declaration = static_cast<HLSLDeclaration*>(statement);
 
-            if (declaration->assignment && declaration->assignment->nodeType == HLSLNodeType_FunctionCall)
-            {
+            if (declaration->assignment && declaration->assignment->nodeType == HLSLNodeType_FunctionCall) {
                 OutputFunctionCallStatement(indent, (HLSLFunctionCall*)declaration->assignment, declaration);
             }
-            else
-            {
+            else {
                 m_writer.BeginLine(indent, declaration->fileName, declaration->line);
                 OutputDeclaration(declaration);
                 m_writer.EndLine(";");
             }
         }
-        else if (statement->nodeType == HLSLNodeType_Struct)
-        {
+        else if (statement->nodeType == HLSLNodeType_Struct) {
             HLSLStruct* structure = static_cast<HLSLStruct*>(statement);
             OutputStruct(indent, structure);
         }
-        else if (statement->nodeType == HLSLNodeType_Buffer)
-        {
+        else if (statement->nodeType == HLSLNodeType_Buffer) {
             HLSLBuffer* buffer = static_cast<HLSLBuffer*>(statement);
             OutputBuffer(indent, buffer);
         }
-        else if (statement->nodeType == HLSLNodeType_Function)
-        {
+        else if (statement->nodeType == HLSLNodeType_Function) {
             HLSLFunction* function = static_cast<HLSLFunction*>(statement);
 
-            if (!function->forward)
-            {
+            if (!function->forward) {
                 OutputFunction(indent, function);
             }
         }
-        else if (statement->nodeType == HLSLNodeType_ExpressionStatement)
-        {
+        else if (statement->nodeType == HLSLNodeType_ExpressionStatement) {
             HLSLExpressionStatement* expressionStatement = static_cast<HLSLExpressionStatement*>(statement);
             HLSLExpression* expression = expressionStatement->expression;
 
-            if (expression->nodeType == HLSLNodeType_FunctionCall)
-            {
+            if (expression->nodeType == HLSLNodeType_FunctionCall) {
                 OutputFunctionCallStatement(indent, (HLSLFunctionCall*)expression, NULL);
             }
-            else
-            {
+            else {
                 m_writer.BeginLine(indent, statement->fileName, statement->line);
                 OutputExpression(expressionStatement->expression, NULL);
                 m_writer.EndLine(";");
             }
         }
-        else if (statement->nodeType == HLSLNodeType_ReturnStatement)
-        {
+        else if (statement->nodeType == HLSLNodeType_ReturnStatement) {
             HLSLReturnStatement* returnStatement = static_cast<HLSLReturnStatement*>(statement);
-            if (m_currentFunction->numOutputArguments > 0)
-            {
+            if (m_currentFunction->numOutputArguments > 0) {
                 m_writer.BeginLine(indent, returnStatement->fileName, returnStatement->line);
                 m_writer.Write("return { ");
 
                 int numArguments = 0;
-                if (returnStatement->expression != NULL)
-                {
+                if (returnStatement->expression != NULL) {
                     OutputTypedExpression(m_currentFunction->returnType, returnStatement->expression, NULL);
                     numArguments++;
                 }
 
-                HLSLArgument * argument = m_currentFunction->argument;
-                while (argument != NULL)
-                {
-                    if (argument->modifier == HLSLArgumentModifier_Out || argument->modifier == HLSLArgumentModifier_Inout)
-                    {
+                HLSLArgument* argument = m_currentFunction->argument;
+                while (argument != NULL) {
+                    if (argument->modifier == HLSLArgumentModifier_Out || argument->modifier == HLSLArgumentModifier_Inout) {
                         if (numArguments) m_writer.Write(", ");
                         m_writer.Write("%s", argument->name);
                         numArguments++;
@@ -917,35 +819,29 @@ void MSLGenerator::OutputStatements(int indent, HLSLStatement* statement)
 
                 m_writer.EndLine(" };");
             }
-            else if (returnStatement->expression != NULL)
-            {
+            else if (returnStatement->expression != NULL) {
                 m_writer.BeginLine(indent, returnStatement->fileName, returnStatement->line);
                 m_writer.Write("return ");
                 OutputTypedExpression(m_currentFunction->returnType, returnStatement->expression, NULL);
                 m_writer.EndLine(";");
             }
-            else
-            {
+            else {
                 m_writer.WriteLineTagged(indent, returnStatement->fileName, returnStatement->line, "return;");
             }
         }
-        else if (statement->nodeType == HLSLNodeType_DiscardStatement)
-        {
+        else if (statement->nodeType == HLSLNodeType_DiscardStatement) {
             HLSLDiscardStatement* discardStatement = static_cast<HLSLDiscardStatement*>(statement);
             m_writer.WriteLineTagged(indent, discardStatement->fileName, discardStatement->line, "discard_fragment();");
         }
-        else if (statement->nodeType == HLSLNodeType_BreakStatement)
-        {
+        else if (statement->nodeType == HLSLNodeType_BreakStatement) {
             HLSLBreakStatement* breakStatement = static_cast<HLSLBreakStatement*>(statement);
             m_writer.WriteLineTagged(indent, breakStatement->fileName, breakStatement->line, "break;");
         }
-        else if (statement->nodeType == HLSLNodeType_ContinueStatement)
-        {
+        else if (statement->nodeType == HLSLNodeType_ContinueStatement) {
             HLSLContinueStatement* continueStatement = static_cast<HLSLContinueStatement*>(statement);
             m_writer.WriteLineTagged(indent, continueStatement->fileName, continueStatement->line, "continue;");
         }
-        else if (statement->nodeType == HLSLNodeType_IfStatement)
-        {
+        else if (statement->nodeType == HLSLNodeType_IfStatement) {
             HLSLIfStatement* ifStatement = static_cast<HLSLIfStatement*>(statement);
 
             if (ifStatement->isStatic) {
@@ -968,16 +864,14 @@ void MSLGenerator::OutputStatements(int indent, HLSLStatement* statement)
                 m_writer.EndLine();
                 OutputStatements(indent + 1, ifStatement->statement);
                 m_writer.WriteLine(indent, "}");
-                if (ifStatement->elseStatement != NULL)
-                {
+                if (ifStatement->elseStatement != NULL) {
                     m_writer.WriteLine(indent, "else {");
                     OutputStatements(indent + 1, ifStatement->elseStatement);
                     m_writer.WriteLine(indent, "}");
                 }
             }
         }
-        else if (statement->nodeType == HLSLNodeType_ForStatement)
-        {
+        else if (statement->nodeType == HLSLNodeType_ForStatement) {
             HLSLForStatement* forStatement = static_cast<HLSLForStatement*>(statement);
             m_writer.BeginLine(indent, forStatement->fileName, forStatement->line);
             m_writer.Write("for (");
@@ -991,25 +885,23 @@ void MSLGenerator::OutputStatements(int indent, HLSLStatement* statement)
             OutputStatements(indent + 1, forStatement->statement);
             m_writer.WriteLine(indent, "}");
         }
-        else if (statement->nodeType == HLSLNodeType_BlockStatement)
-        {
+        else if (statement->nodeType == HLSLNodeType_BlockStatement) {
             HLSLBlockStatement* blockStatement = static_cast<HLSLBlockStatement*>(statement);
             m_writer.WriteLineTagged(indent, blockStatement->fileName, blockStatement->line, "{");
             OutputStatements(indent + 1, blockStatement->statement);
             m_writer.WriteLine(indent, "}");
         }
-        
+
         // fx file support for Technique/Pipeline
-//            else if (statement->nodeType == HLSLNodeType_Technique)
-//            {
-//                // Techniques are ignored.
-//            }
-//            else if (statement->nodeType == HLSLNodeType_Pipeline)
-//            {
-//                // Pipelines are ignored.
-//            }
-        else
-        {
+        // else if (statement->nodeType == HLSLNodeType_Technique)
+        // {
+        //     // Techniques are ignored.
+        // }
+        // else if (statement->nodeType == HLSLNodeType_Pipeline)
+        // {
+        //     // Pipelines are ignored.
+        // }
+        else {
             // Unhandled statement type.
             Error("Unknown statement");
         }
@@ -1023,18 +915,15 @@ void MSLGenerator::OutputAttributes(int indent, HLSLAttribute* attribute)
 {
     // IC: These do not appear to exist in MSL.
     while (attribute != NULL) {
-        if (attribute->attributeType == HLSLAttributeType_Unroll)
-        {
+        if (attribute->attributeType == HLSLAttributeType_Unroll) {
             // @@ Do any of these work?
             //m_writer.WriteLine(indent, attribute->fileName, attribute->line, "#pragma unroll");
             //m_writer.WriteLine(indent, attribute->fileName, attribute->line, "[[unroll]]");
         }
-        else if (attribute->attributeType == HLSLAttributeType_Flatten)
-        {
+        else if (attribute->attributeType == HLSLAttributeType_Flatten) {
             // @@
         }
-        else if (attribute->attributeType == HLSLAttributeType_Branch)
-        {
+        else if (attribute->attributeType == HLSLAttributeType_Branch) {
             // @@, [[likely]]?
         }
 
@@ -1044,25 +933,21 @@ void MSLGenerator::OutputAttributes(int indent, HLSLAttribute* attribute)
 
 void MSLGenerator::OutputDeclaration(HLSLDeclaration* declaration)
 {
-    if (IsSamplerType(declaration->type))
-    {
+    if (IsSamplerType(declaration->type)) {
         m_writer.Write("%s sampler& %s", GetAddressSpaceName(declaration->type.baseType, declaration->type.addressSpace), declaration->name);
     }
-    else if (IsTextureType(declaration->type))
-    {
+    else if (IsTextureType(declaration->type)) {
         const char* textureName = GetTypeName(declaration->type, true);
         if (textureName)
             m_writer.Write("%s %s& %s", GetAddressSpaceName(declaration->type.baseType, declaration->type.addressSpace), textureName, declaration->name);
         else
             Error("Unknown texture");
     }
-    else
-    {
+    else {
         OutputDeclaration(declaration->type, declaration->name, declaration->assignment);
 
         declaration = declaration->nextDeclaration;
-        while (declaration != NULL)
-        {
+        while (declaration != NULL) {
             m_writer.Write(",");
             OutputDeclarationBody(declaration->type, declaration->name, declaration->assignment);
             declaration = declaration->nextDeclaration;
@@ -1074,18 +959,15 @@ void MSLGenerator::OutputStruct(int indent, HLSLStruct* structure)
 {
     m_writer.WriteLineTagged(indent, structure->fileName, structure->line, "struct %s {", structure->name);
     HLSLStructField* field = structure->field;
-    while (field != NULL)
-    {
-        if (!field->hidden)
-        {
+    while (field != NULL) {
+        if (!field->hidden) {
             m_writer.BeginLine(indent + 1, field->fileName, field->line);
             OutputDeclaration(field->type, field->name, NULL);
-            
+
             // DONE: would need a semantic remap for all possible semantics
             // just use the name the caller specified if sv_semantic
             // is not set.  The header can handle translating
-            if (field->sv_semantic)
-            {
+            if (field->sv_semantic) {
                 m_writer.Write(" [[%s]]", field->sv_semantic);
             }
 
@@ -1098,45 +980,39 @@ void MSLGenerator::OutputStruct(int indent, HLSLStruct* structure)
 
 void MSLGenerator::OutputBuffer(int indent, HLSLBuffer* buffer)
 {
-    if (!buffer->IsGlobalFields())
-    {
+    if (!buffer->IsGlobalFields()) {
         m_writer.BeginLine(indent, buffer->fileName, buffer->line);
-        
+
         // TODO: handle array count for indexing into constant buffer
         // some are unbounded array like BAB and SBO
         // TODO: may need to use t/u registers for those too and a thread?
-        
+
         // TODO: fix this, ConstantBuffer can index into a constant buffer too
         // detect use of array notation on decl
         bool isRef = buffer->bufferType == HLSLBufferType_ConstantBuffer ||
                      buffer->IsGlobalFields();
-        
+
         if (buffer->bufferType == HLSLBufferType_ConstantBuffer ||
             buffer->bufferType == HLSLBufferType_ByteAddressBuffer ||
-            buffer->bufferType == HLSLBufferType_StructuredBuffer)
-        {
+            buffer->bufferType == HLSLBufferType_StructuredBuffer) {
             m_writer.Write("constant %s %s %s", buffer->bufferStruct->name, isRef ? "&" : "*", buffer->name);
         }
-        else
-        {
-            m_writer.Write("device %s %s %s",  buffer->bufferStruct->name, isRef ? "&" : "*", buffer->name);
+        else {
+            m_writer.Write("device %s %s %s", buffer->bufferStruct->name, isRef ? "&" : "*", buffer->name);
         }
-        
+
         m_writer.EndLine(";");
     }
-    else
-    {
+    else {
         // converted cbuffer that spill tons of globals for every field
         HLSLDeclaration* field = buffer->field;
-        
+
         m_writer.BeginLine(indent, buffer->fileName, buffer->line);
         m_writer.Write("struct %s_ubo", buffer->name);
         m_writer.EndLine(" {");
-        
-        while (field != NULL)
-        {
-            if (!field->hidden)
-            {
+
+        while (field != NULL) {
+            if (!field->hidden) {
                 m_writer.BeginLine(indent + 1, field->fileName, field->line);
                 OutputDeclaration(field->type, field->name, field->assignment, false, false, 0); // /*alignment=*/16);
                 m_writer.EndLine(";");
@@ -1144,7 +1020,7 @@ void MSLGenerator::OutputBuffer(int indent, HLSLBuffer* buffer)
             field = (HLSLDeclaration*)field->nextStatement;
         }
         m_writer.WriteLine(indent, "};");
-        
+
         m_writer.WriteLine(indent, "constant %s_ubo & %s;", buffer->name, buffer->name);
     }
 }
@@ -1155,26 +1031,22 @@ void MSLGenerator::OutputFunction(int indent, HLSLFunction* function)
     const char* returnTypeName = GetTypeName(function->returnType, /*exactType=*/false);
 
     // Declare output tuple.
-    if (function->numOutputArguments > 0)
-    {
+    if (function->numOutputArguments > 0) {
         returnTypeName = m_tree->AddStringFormat("%s_out%d", functionName, function->line); // @@ Find a better way to generate unique name.
 
         m_writer.BeginLine(indent, function->fileName, function->line);
         m_writer.Write("struct %s { ", returnTypeName);
         m_writer.EndLine();
 
-        if (function->returnType.baseType != HLSLBaseType_Void)
-        {
+        if (function->returnType.baseType != HLSLBaseType_Void) {
             m_writer.BeginLine(indent + 1, function->fileName, function->line);
             OutputDeclaration(function->returnType, "__result", /*defaultValue=*/NULL, /*isRef=*/false, /*isConst=*/false);
             m_writer.EndLine(";");
         }
 
-        HLSLArgument * argument = function->argument;
-        while (argument != NULL)
-        {
-            if (argument->modifier == HLSLArgumentModifier_Out || argument->modifier == HLSLArgumentModifier_Inout)
-            {
+        HLSLArgument* argument = function->argument;
+        while (argument != NULL) {
+            if (argument->modifier == HLSLArgumentModifier_Out || argument->modifier == HLSLArgumentModifier_Inout) {
                 m_writer.BeginLine(indent + 1, function->fileName, function->line);
                 OutputDeclaration(argument->type, argument->name, /*defaultValue=*/NULL, /*isRef=*/false, /*isConst=*/false);
                 m_writer.EndLine(";");
@@ -1188,8 +1060,7 @@ void MSLGenerator::OutputFunction(int indent, HLSLFunction* function)
         m_writer.BeginLine(indent, function->fileName, function->line);
         m_writer.Write("%s %s_%d(", returnTypeName, functionName, function->line);
     }
-    else
-    {
+    else {
         m_writer.BeginLine(indent, function->fileName, function->line);
         m_writer.Write("%s %s(", returnTypeName, functionName);
     }
@@ -1200,11 +1071,9 @@ void MSLGenerator::OutputFunction(int indent, HLSLFunction* function)
     m_currentFunction = function;
 
     // Local declarations for output arguments.
-    HLSLArgument * argument = function->argument;
-    while (argument != NULL)
-    {
-        if (argument->modifier == HLSLArgumentModifier_Out)
-        {
+    HLSLArgument* argument = function->argument;
+    while (argument != NULL) {
+        if (argument->modifier == HLSLArgumentModifier_Out) {
             m_writer.BeginLine(indent + 1, function->fileName, function->line);
             OutputDeclaration(argument->type, argument->name, /*defaultValue=*/NULL, /*isRef=*/false, /*isConst=*/false);
             m_writer.EndLine(";");
@@ -1215,31 +1084,25 @@ void MSLGenerator::OutputFunction(int indent, HLSLFunction* function)
     OutputStatements(indent + 1, function->statement); // @@ Modify return statements if function has multiple output arguments!
 
     // Output implicit return.
-    if (function->numOutputArguments > 0)
-    {
+    if (function->numOutputArguments > 0) {
         bool needsImplicitReturn = true;
-        HLSLStatement * statement = function->statement;
-        if (statement != NULL)
-        {
-            while (statement->nextStatement != NULL)
-            {
+        HLSLStatement* statement = function->statement;
+        if (statement != NULL) {
+            while (statement->nextStatement != NULL) {
                 statement = statement->nextStatement;
             }
             needsImplicitReturn = (statement->nodeType != HLSLNodeType_ReturnStatement) && function->returnType.baseType == HLSLBaseType_Void;
         }
 
-        if (needsImplicitReturn)
-        {
+        if (needsImplicitReturn) {
             m_writer.BeginLine(indent + 1);
             m_writer.Write("return { ");
 
             int numArguments = 0;
-            HLSLArgument * argument2 = m_currentFunction->argument;
-            while (argument2 != NULL)
-            {
+            HLSLArgument* argument2 = m_currentFunction->argument;
+            while (argument2 != NULL) {
                 if (argument2->modifier == HLSLArgumentModifier_Out ||
-                    argument2->modifier == HLSLArgumentModifier_Inout)
-                {
+                    argument2->modifier == HLSLArgumentModifier_Inout) {
                     if (numArguments) m_writer.Write(", ");
                     m_writer.Write("%s ", argument2->name);
                     numArguments++;
@@ -1255,23 +1118,19 @@ void MSLGenerator::OutputFunction(int indent, HLSLFunction* function)
     m_currentFunction = NULL;
 }
 
-
 // @@ We could be a lot smarter removing parenthesis based on the operator precedence of the parent expression.
-static bool NeedsParenthesis(HLSLExpression* expression, HLSLExpression* parentExpression) {
-
+static bool NeedsParenthesis(HLSLExpression* expression, HLSLExpression* parentExpression)
+{
     // For now we just omit the parenthesis if there's no parent expression.
-    if (parentExpression == NULL)
-    {
+    if (parentExpression == NULL) {
         return false;
     }
 
     // One more special case that's pretty common.
-    if (parentExpression->nodeType == HLSLNodeType_MemberAccess)
-    {
+    if (parentExpression->nodeType == HLSLNodeType_MemberAccess) {
         if (expression->nodeType == HLSLNodeType_IdentifierExpression ||
             expression->nodeType == HLSLNodeType_ArrayAccess ||
-            expression->nodeType == HLSLNodeType_MemberAccess)
-        {
+            expression->nodeType == HLSLNodeType_MemberAccess) {
             return false;
         }
     }
@@ -1279,7 +1138,7 @@ static bool NeedsParenthesis(HLSLExpression* expression, HLSLExpression* parentE
     return true;
 }
 
-bool MSLGenerator::NeedsCast(const HLSLType & target, const HLSLType & source)
+bool MSLGenerator::NeedsCast(const HLSLType& target, const HLSLType& source)
 {
     HLSLBaseType targetType = target.baseType;
     HLSLBaseType sourceType = source.baseType;
@@ -1294,23 +1153,20 @@ bool MSLGenerator::NeedsCast(const HLSLType & target, const HLSLType & source)
     return false;
     }*/
 
-    if (m_options.treatHalfAsFloat)
-    {
+    if (m_options.treatHalfAsFloat) {
         // use call to convert half back to float type
         if (IsHalf(targetType)) targetType = HalfToFloatBaseType(targetType);
-        if (IsHalf(sourceType)) sourceType = HalfToFloatBaseType(sourceType );
+        if (IsHalf(sourceType)) sourceType = HalfToFloatBaseType(sourceType);
     }
 
     return targetType != sourceType && (IsCoreTypeEqual(targetType, sourceType) || IsScalarType(sourceType));
 }
 
-
 void MSLGenerator::OutputTypedExpression(const HLSLType& type, HLSLExpression* expression, HLSLExpression* parentExpression)
 {
     // If base types are not exactly the same, do explicit cast.
     bool closeCastExpression = false;
-    if (NeedsCast(type, expression->expressionType))
-    {
+    if (NeedsCast(type, expression->expressionType)) {
         OutputCast(type);
         m_writer.Write("(");
         closeCastExpression = true;
@@ -1318,28 +1174,24 @@ void MSLGenerator::OutputTypedExpression(const HLSLType& type, HLSLExpression* e
 
     OutputExpression(expression, parentExpression);
 
-    if (closeCastExpression)
-    {
+    if (closeCastExpression) {
         m_writer.Write(")");
     }
 }
 
 void MSLGenerator::OutputExpression(HLSLExpression* expression, HLSLExpression* parentExpression)
 {
-    if (expression->nodeType == HLSLNodeType_IdentifierExpression)
-    {
+    if (expression->nodeType == HLSLNodeType_IdentifierExpression) {
         HLSLIdentifierExpression* identifierExpression = static_cast<HLSLIdentifierExpression*>(expression);
         const char* name = identifierExpression->name;
-        
+
         {
-            if (identifierExpression->global)
-            {
+            if (identifierExpression->global) {
                 // prepend cbuffer name
-                HLSLBuffer * buffer;
-                HLSLDeclaration * declaration = m_tree->FindGlobalDeclaration(identifierExpression->name, &buffer);
+                HLSLBuffer* buffer;
+                HLSLDeclaration* declaration = m_tree->FindGlobalDeclaration(identifierExpression->name, &buffer);
 
-                if (declaration && declaration->buffer)
-                {
+                if (declaration && declaration->buffer) {
                     ASSERT(buffer == declaration->buffer);
                     m_writer.Write("%s.", declaration->buffer->name);
                 }
@@ -1369,177 +1221,214 @@ void MSLGenerator::OutputExpression(HLSLExpression* expression, HLSLExpression*
             }*/
         }
     }
-    else if (expression->nodeType == HLSLNodeType_CastingExpression)
-    {
+    else if (expression->nodeType == HLSLNodeType_CastingExpression) {
         HLSLCastingExpression* castingExpression = static_cast<HLSLCastingExpression*>(expression);
         OutputCast(castingExpression->type);
         m_writer.Write("(");
         OutputExpression(castingExpression->expression, castingExpression);
         m_writer.Write(")");
     }
-    else if (expression->nodeType == HLSLNodeType_ConstructorExpression)
-    {
+    else if (expression->nodeType == HLSLNodeType_ConstructorExpression) {
         HLSLConstructorExpression* constructorExpression = static_cast<HLSLConstructorExpression*>(expression);
-        
+
         m_writer.Write("%s(", GetTypeName(constructorExpression->type, /*exactType=*/false));
         //OutputExpressionList(constructorExpression->type, constructorExpression->argument);   // @@ Get element type.
         OutputExpressionList(constructorExpression->argument);
         m_writer.Write(")");
     }
-    else if (expression->nodeType == HLSLNodeType_LiteralExpression)
-    {
+    else if (expression->nodeType == HLSLNodeType_LiteralExpression) {
         HLSLLiteralExpression* literalExpression = static_cast<HLSLLiteralExpression*>(expression);
-    
+
         HLSLBaseType type = literalExpression->type;
         if (m_options.treatHalfAsFloat && IsHalf(type))
             type = HLSLBaseType_Float;
-        
-        switch (type)
-        {
-                
-        case HLSLBaseType_Half:
-        case HLSLBaseType_Double:
-        case HLSLBaseType_Float:
-        {
-            char floatBuffer[64];
-            
-            String_FormatFloat(floatBuffer, sizeof(floatBuffer), literalExpression->fValue);
-            String_StripTrailingFloatZeroes(floatBuffer);
-            m_writer.Write("%s%s", floatBuffer, type == HLSLBaseType_Half ? "h" : "");
-            break;
-        }
-        // TODO: missing uint types (trailing character u, ul, ..)
-                
-        case HLSLBaseType_Short:
-        case HLSLBaseType_Long:
-        case HLSLBaseType_Int:
-            m_writer.Write("%d", literalExpression->iValue);
-            break;
-                
-        case HLSLBaseType_Bool:
-            m_writer.Write("%s", literalExpression->bValue ? "true" : "false");
-            break;
-       default:
-            Error("Unhandled literal");
-            //ASSERT(0);
+
+        switch (type) {
+            case HLSLBaseType_Half:
+            case HLSLBaseType_Double:
+            case HLSLBaseType_Float: {
+                char floatBuffer[64];
+
+                String_FormatFloat(floatBuffer, sizeof(floatBuffer), literalExpression->fValue);
+                String_StripTrailingFloatZeroes(floatBuffer);
+                m_writer.Write("%s%s", floatBuffer, type == HLSLBaseType_Half ? "h" : "");
+                break;
+            }
+                // TODO: missing uint types (trailing character u, ul, ..)
+
+            case HLSLBaseType_Short:
+            case HLSLBaseType_Long:
+            case HLSLBaseType_Int:
+                m_writer.Write("%d", literalExpression->iValue);
+                break;
+
+            case HLSLBaseType_Bool:
+                m_writer.Write("%s", literalExpression->bValue ? "true" : "false");
+                break;
+            default:
+                Error("Unhandled literal");
+                //ASSERT(0);
         }
     }
-    else if (expression->nodeType == HLSLNodeType_UnaryExpression)
-    {
+    else if (expression->nodeType == HLSLNodeType_UnaryExpression) {
         HLSLUnaryExpression* unaryExpression = static_cast<HLSLUnaryExpression*>(expression);
         const char* op = "?";
         bool pre = true;
-        switch (unaryExpression->unaryOp)
-        {
-        case HLSLUnaryOp_Negative:      op = "-";  break;
-        case HLSLUnaryOp_Positive:      op = "+";  break;
-        case HLSLUnaryOp_Not:           op = "!";  break;
-        case HLSLUnaryOp_BitNot:        op = "~";  break;
-        case HLSLUnaryOp_PreIncrement:  op = "++"; break;
-        case HLSLUnaryOp_PreDecrement:  op = "--"; break;
-        case HLSLUnaryOp_PostIncrement: op = "++"; pre = false; break;
-        case HLSLUnaryOp_PostDecrement: op = "--"; pre = false; break;
+        switch (unaryExpression->unaryOp) {
+            case HLSLUnaryOp_Negative:
+                op = "-";
+                break;
+            case HLSLUnaryOp_Positive:
+                op = "+";
+                break;
+            case HLSLUnaryOp_Not:
+                op = "!";
+                break;
+            case HLSLUnaryOp_BitNot:
+                op = "~";
+                break;
+            case HLSLUnaryOp_PreIncrement:
+                op = "++";
+                break;
+            case HLSLUnaryOp_PreDecrement:
+                op = "--";
+                break;
+            case HLSLUnaryOp_PostIncrement:
+                op = "++";
+                pre = false;
+                break;
+            case HLSLUnaryOp_PostDecrement:
+                op = "--";
+                pre = false;
+                break;
         }
         bool addParenthesis = NeedsParenthesis(unaryExpression->expression, expression);
         if (addParenthesis) m_writer.Write("(");
-        if (pre)
-        {
+        if (pre) {
             m_writer.Write("%s", op);
             OutputExpression(unaryExpression->expression, unaryExpression);
         }
-        else
-        {
+        else {
             OutputExpression(unaryExpression->expression, unaryExpression);
             m_writer.Write("%s", op);
         }
         if (addParenthesis) m_writer.Write(")");
     }
-    else if (expression->nodeType == HLSLNodeType_BinaryExpression)
-    {
+    else if (expression->nodeType == HLSLNodeType_BinaryExpression) {
         HLSLBinaryExpression* binaryExpression = static_cast<HLSLBinaryExpression*>(expression);
 
         bool addParenthesis = NeedsParenthesis(expression, parentExpression);
         if (addParenthesis) m_writer.Write("(");
-        
+
         {
-            if (IsArithmeticOp(binaryExpression->binaryOp) || IsLogicOp(binaryExpression->binaryOp))
-            {
+            if (IsArithmeticOp(binaryExpression->binaryOp) || IsLogicOp(binaryExpression->binaryOp)) {
                 // Do intermediate type promotion, without changing dimension:
                 HLSLType promotedType = binaryExpression->expression1->expressionType;
 
-                if (!IsNumericTypeEqual(binaryExpression->expressionType.baseType, promotedType.baseType))
-                {
+                if (!IsNumericTypeEqual(binaryExpression->expressionType.baseType, promotedType.baseType)) {
                     promotedType.baseType = PromoteType(binaryExpression->expressionType.baseType, promotedType.baseType);
                 }
 
                 OutputTypedExpression(promotedType, binaryExpression->expression1, binaryExpression);
             }
-            else
-            {
+            else {
                 OutputExpression(binaryExpression->expression1, binaryExpression);
             }
 
             const char* op = "?";
-            switch (binaryExpression->binaryOp)
-            {
-            case HLSLBinaryOp_Add:          op = " + "; break;
-            case HLSLBinaryOp_Sub:          op = " - "; break;
-            case HLSLBinaryOp_Mul:          op = " * "; break;
-            case HLSLBinaryOp_Div:          op = " / "; break;
-            case HLSLBinaryOp_Less:         op = " < "; break;
-            case HLSLBinaryOp_Greater:      op = " > "; break;
-            case HLSLBinaryOp_LessEqual:    op = " <= "; break;
-            case HLSLBinaryOp_GreaterEqual: op = " >= "; break;
-            case HLSLBinaryOp_Equal:        op = " == "; break;
-            case HLSLBinaryOp_NotEqual:     op = " != "; break;
-            case HLSLBinaryOp_Assign:       op = " = "; break;
-            case HLSLBinaryOp_AddAssign:    op = " += "; break;
-            case HLSLBinaryOp_SubAssign:    op = " -= "; break;
-            case HLSLBinaryOp_MulAssign:    op = " *= "; break;
-            case HLSLBinaryOp_DivAssign:    op = " /= "; break;
-            case HLSLBinaryOp_And:          op = " && "; break;
-            case HLSLBinaryOp_Or:           op = " || "; break;
-            case HLSLBinaryOp_BitAnd:       op = " & "; break;
-            case HLSLBinaryOp_BitOr:        op = " | "; break;
-            case HLSLBinaryOp_BitXor:       op = " ^ "; break;
-            default:
-                Error("unhandled literal");
-                //ASSERT(0);
+            switch (binaryExpression->binaryOp) {
+                case HLSLBinaryOp_Add:
+                    op = " + ";
+                    break;
+                case HLSLBinaryOp_Sub:
+                    op = " - ";
+                    break;
+                case HLSLBinaryOp_Mul:
+                    op = " * ";
+                    break;
+                case HLSLBinaryOp_Div:
+                    op = " / ";
+                    break;
+                case HLSLBinaryOp_Less:
+                    op = " < ";
+                    break;
+                case HLSLBinaryOp_Greater:
+                    op = " > ";
+                    break;
+                case HLSLBinaryOp_LessEqual:
+                    op = " <= ";
+                    break;
+                case HLSLBinaryOp_GreaterEqual:
+                    op = " >= ";
+                    break;
+                case HLSLBinaryOp_Equal:
+                    op = " == ";
+                    break;
+                case HLSLBinaryOp_NotEqual:
+                    op = " != ";
+                    break;
+                case HLSLBinaryOp_Assign:
+                    op = " = ";
+                    break;
+                case HLSLBinaryOp_AddAssign:
+                    op = " += ";
+                    break;
+                case HLSLBinaryOp_SubAssign:
+                    op = " -= ";
+                    break;
+                case HLSLBinaryOp_MulAssign:
+                    op = " *= ";
+                    break;
+                case HLSLBinaryOp_DivAssign:
+                    op = " /= ";
+                    break;
+                case HLSLBinaryOp_And:
+                    op = " && ";
+                    break;
+                case HLSLBinaryOp_Or:
+                    op = " || ";
+                    break;
+                case HLSLBinaryOp_BitAnd:
+                    op = " & ";
+                    break;
+                case HLSLBinaryOp_BitOr:
+                    op = " | ";
+                    break;
+                case HLSLBinaryOp_BitXor:
+                    op = " ^ ";
+                    break;
+                default:
+                    Error("unhandled literal");
+                    //ASSERT(0);
             }
             m_writer.Write("%s", op);
 
-            
             if (binaryExpression->binaryOp == HLSLBinaryOp_MulAssign ||
                 binaryExpression->binaryOp == HLSLBinaryOp_DivAssign ||
                 IsArithmeticOp(binaryExpression->binaryOp) ||
-                IsLogicOp(binaryExpression->binaryOp))
-            {
+                IsLogicOp(binaryExpression->binaryOp)) {
                 // Do intermediate type promotion, without changing dimension:
                 HLSLType promotedType = binaryExpression->expression2->expressionType;
 
-                if (!IsNumericTypeEqual(binaryExpression->expressionType.baseType, promotedType.baseType))
-                {
+                if (!IsNumericTypeEqual(binaryExpression->expressionType.baseType, promotedType.baseType)) {
                     // This should only promote up (half->float, etc)
                     promotedType.baseType = PromoteType(binaryExpression->expressionType.baseType, promotedType.baseType);
                 }
 
                 OutputTypedExpression(promotedType, binaryExpression->expression2, binaryExpression);
             }
-            else if (IsAssignOp(binaryExpression->binaryOp))
-            {
+            else if (IsAssignOp(binaryExpression->binaryOp)) {
                 OutputTypedExpression(binaryExpression->expressionType, binaryExpression->expression2, binaryExpression);
             }
-            else
-            {
+            else {
                 OutputExpression(binaryExpression->expression2, binaryExpression);
             }
         }
         if (addParenthesis) m_writer.Write(")");
     }
-    else if (expression->nodeType == HLSLNodeType_ConditionalExpression)
-    {
+    else if (expression->nodeType == HLSLNodeType_ConditionalExpression) {
         HLSLConditionalExpression* conditionalExpression = static_cast<HLSLConditionalExpression*>(expression);
-        
+
         // TODO: @@ Remove parenthesis.
         m_writer.Write("((");
         OutputExpression(conditionalExpression->condition, NULL);
@@ -1549,27 +1438,23 @@ void MSLGenerator::OutputExpression(HLSLExpression* expression, HLSLExpression*
         OutputExpression(conditionalExpression->falseExpression, NULL);
         m_writer.Write("))");
     }
-    else if (expression->nodeType == HLSLNodeType_MemberAccess)
-    {
+    else if (expression->nodeType == HLSLNodeType_MemberAccess) {
         HLSLMemberAccess* memberAccess = static_cast<HLSLMemberAccess*>(expression);
         bool addParenthesis = NeedsParenthesis(memberAccess->object, expression);
 
-        if (addParenthesis)
-        {
+        if (addParenthesis) {
             m_writer.Write("(");
         }
         OutputExpression(memberAccess->object, NULL);
-        if (addParenthesis)
-        {
+        if (addParenthesis) {
             m_writer.Write(")");
         }
 
         m_writer.Write(".%s", memberAccess->field);
     }
-    else if (expression->nodeType == HLSLNodeType_ArrayAccess)
-    {
+    else if (expression->nodeType == HLSLNodeType_ArrayAccess) {
         HLSLArrayAccess* arrayAccess = static_cast<HLSLArrayAccess*>(expression);
-        
+
         // Just use the matrix notation, using column_order instead of row_order
         //if (arrayAccess->array->expressionType.array) // || !IsMatrixType(arrayAccess->array->expressionType.baseType))
         {
@@ -1578,32 +1463,29 @@ void MSLGenerator::OutputExpression(HLSLExpression* expression, HLSLExpression*
             OutputExpression(arrayAccess->index, NULL);
             m_writer.Write("]");
         }
-//            else
-//            {
-//                // @@ This doesn't work for l-values!
-//                m_writer.Write("column(");
-//                OutputExpression(arrayAccess->array, NULL);
-//                m_writer.Write(", ");
-//                OutputExpression(arrayAccess->index, NULL);
-//                m_writer.Write(")");
-//            }
-    }
-    else if (expression->nodeType == HLSLNodeType_FunctionCall)
-    {
+        // else
+        // {
+        //     // @@ This doesn't work for l-values!
+        //     m_writer.Write("column(");
+        //     OutputExpression(arrayAccess->array, NULL);
+        //     m_writer.Write(", ");
+        //     OutputExpression(arrayAccess->index, NULL);
+        //     m_writer.Write(")");
+        // }
+    }
+    else if (expression->nodeType == HLSLNodeType_FunctionCall) {
         HLSLFunctionCall* functionCall = static_cast<HLSLFunctionCall*>(expression);
         OutputFunctionCall(functionCall, parentExpression);
     }
-    else if (expression->nodeType == HLSLNodeType_MemberFunctionCall)
-    {
+    else if (expression->nodeType == HLSLNodeType_MemberFunctionCall) {
         HLSLMemberFunctionCall* functionCall = static_cast<HLSLMemberFunctionCall*>(expression);
-        
+
         // Write out the member identifier
         m_writer.Write("%s.", functionCall->memberIdentifier->name);
 
         OutputFunctionCall(functionCall, parentExpression);
     }
-    else
-    {
+    else {
         Error("unknown expression");
     }
 }
@@ -1611,16 +1493,13 @@ void MSLGenerator::OutputExpression(HLSLExpression* expression, HLSLExpression*
 void MSLGenerator::OutputCast(const HLSLType& type)
 {
     // Note: msl fails on float4x4 to float3x3 casting
-    if (type.baseType == HLSLBaseType_Float3x3)
-    {
+    if (type.baseType == HLSLBaseType_Float3x3) {
         m_writer.Write("tofloat3x3");
     }
-    else if (type.baseType == HLSLBaseType_Half3x3)
-    {
+    else if (type.baseType == HLSLBaseType_Half3x3) {
         m_writer.Write("tohalft3x3");
     }
-    else
-    {
+    else {
         m_writer.Write("(");
         OutputDeclarationType(type, /*isConst=*/false, /*isRef=*/false, /*alignment=*/0, /*isTypeCast=*/true);
         m_writer.Write(")");
@@ -1631,17 +1510,14 @@ void MSLGenerator::OutputCast(const HLSLType& type)
 void MSLGenerator::OutputArguments(HLSLArgument* argument)
 {
     int numArgs = 0;
-    while (argument != NULL)
-    {
+    while (argument != NULL) {
         // Skip hidden and output arguments.
-        if (argument->hidden || argument->modifier == HLSLArgumentModifier_Out)
-        {
+        if (argument->hidden || argument->modifier == HLSLArgumentModifier_Out) {
             argument = argument->nextArgument;
             continue;
         }
 
-        if (numArgs > 0)
-        {
+        if (numArgs > 0) {
             m_writer.Write(", ");
         }
 
@@ -1651,8 +1527,7 @@ void MSLGenerator::OutputArguments(HLSLArgument* argument)
         {
         isRef = true;
         }*/
-        if (argument->modifier == HLSLArgumentModifier_In || argument->modifier == HLSLArgumentModifier_Const)
-        {
+        if (argument->modifier == HLSLArgumentModifier_In || argument->modifier == HLSLArgumentModifier_Const) {
             isConst = true;
         }
 
@@ -1670,76 +1545,62 @@ void MSLGenerator::OutputDeclaration(const HLSLType& type, const char* name, HLS
 
 void MSLGenerator::OutputDeclarationType(const HLSLType& type, bool isRef, bool isConst, int alignment, bool isTypeCast)
 {
-    const char* typeName = GetTypeName(type, /*exactType=*/isTypeCast);  // @@ Don't allow type changes in uniform/globals or casts!
+    const char* typeName = GetTypeName(type, /*exactType=*/isTypeCast); // @@ Don't allow type changes in uniform/globals or casts!
 
     /*if (!isTypeCast)*/
     {
-        if (isRef && !isTypeCast)
-        {
+        if (isRef && !isTypeCast) {
             m_writer.Write("%s ", GetAddressSpaceName(type.baseType, type.addressSpace));
         }
-        if (isConst || type.TestFlags(HLSLTypeFlag_Const))
-        {
+        if (isConst || type.TestFlags(HLSLTypeFlag_Const)) {
             m_writer.Write("constant ");
-            
-//                m_writer.Write("const ");
-//
-//                if ((type.flags & HLSLTypeFlag_Static) != 0 && !isTypeCast)
-//                {
-//                    // TODO: use GetAddressSpaceName?
-//                    m_writer.Write("static constant constexpr ");
-//                }
+
+            // m_writer.Write("const ");
+            //
+            // if ((type.flags & HLSLTypeFlag_Static) != 0 && !isTypeCast)
+            // {
+            //     // TODO: use GetAddressSpaceName?
+            //     m_writer.Write("static constant constexpr ");
+            // }
         }
     }
-    
-    if (alignment != 0 && !isTypeCast)
-    {
+
+    if (alignment != 0 && !isTypeCast) {
         // caller can request alignment, but default is 0
         m_writer.Write("alignas(%d) ", alignment);
     }
 
     m_writer.Write("%s", typeName);
 
-    if (isTypeCast)
-    {
+    if (isTypeCast) {
         // Do not output modifiers inside type cast expressions.
         return;
     }
 
     // Interpolation modifiers.
-    if (type.TestFlags(HLSLTypeFlag_NoInterpolation))
-    {
+    if (type.TestFlags(HLSLTypeFlag_NoInterpolation)) {
         m_writer.Write(" [[flat]]");
     }
-    else
-    {
-        if (type.TestFlags(HLSLTypeFlag_NoPerspective))
-        {
-            if (type.TestFlags(HLSLTypeFlag_Centroid))
-            {
+    else {
+        if (type.TestFlags(HLSLTypeFlag_NoPerspective)) {
+            if (type.TestFlags(HLSLTypeFlag_Centroid)) {
                 m_writer.Write(" [[centroid_no_perspective]]");
             }
-            else if (type.TestFlags(HLSLTypeFlag_Sample))
-            {
+            else if (type.TestFlags(HLSLTypeFlag_Sample)) {
                 m_writer.Write(" [[sample_no_perspective]]");
             }
-            else
-            {
+            else {
                 m_writer.Write(" [[center_no_perspective]]");
             }
         }
-        else
-        {
-            if (type.TestFlags(HLSLTypeFlag_Centroid))
-            {
+        else {
+            if (type.TestFlags(HLSLTypeFlag_Centroid)) {
                 m_writer.Write(" [[centroid_perspective]]");
             }
-            else if (type.TestFlags(HLSLTypeFlag_Sample))
-            {
+            else if (type.TestFlags(HLSLTypeFlag_Sample)) {
                 m_writer.Write(" [[sample_perspective]]");
             }
-            else
-            {
+            else {
                 // Default.
                 //m_writer.Write(" [[center_perspective]]");
             }
@@ -1749,8 +1610,7 @@ void MSLGenerator::OutputDeclarationType(const HLSLType& type, bool isRef, bool
 
 void MSLGenerator::OutputDeclarationBody(const HLSLType& type, const char* name, HLSLExpression* assignment, bool isRef)
 {
-    if (isRef)
-    {
+    if (isRef) {
         // Arrays of refs are illegal in C++ and hence MSL, need to "link" the & to the var name
         m_writer.Write("(&");
     }
@@ -1758,17 +1618,14 @@ void MSLGenerator::OutputDeclarationBody(const HLSLType& type, const char* name,
     // Then name
     m_writer.Write(" %s", name);
 
-    if (isRef)
-    {
+    if (isRef) {
         m_writer.Write(")");
     }
 
     // Add brackets for arrays
-    if (type.array)
-    {
+    if (type.array) {
         m_writer.Write("[");
-        if (type.arraySize != NULL)
-        {
+        if (type.arraySize != NULL) {
             OutputExpression(type.arraySize, NULL);
         }
         m_writer.Write("]");
@@ -1777,17 +1634,14 @@ void MSLGenerator::OutputDeclarationBody(const HLSLType& type, const char* name,
     // Semantics and registers unhandled for now
 
     // Assignment handling
-    if (assignment != NULL)
-    {
+    if (assignment != NULL) {
         m_writer.Write(" = ");
-        if (type.array)
-        {
+        if (type.array) {
             m_writer.Write("{ ");
             OutputExpressionList(assignment);
             m_writer.Write(" }");
         }
-        else
-        {
+        else {
             OutputTypedExpression(type, assignment, NULL);
         }
     }
@@ -1796,10 +1650,8 @@ void MSLGenerator::OutputDeclarationBody(const HLSLType& type, const char* name,
 void MSLGenerator::OutputExpressionList(HLSLExpression* expression)
 {
     int numExpressions = 0;
-    while (expression != NULL)
-    {
-        if (numExpressions > 0)
-        {
+    while (expression != NULL) {
+        if (numExpressions > 0) {
             m_writer.Write(", ");
         }
         OutputExpression(expression, NULL);
@@ -1809,13 +1661,11 @@ void MSLGenerator::OutputExpressionList(HLSLExpression* expression)
 }
 
 // Cast all expressions to given type.
-void MSLGenerator::OutputExpressionList(const HLSLType & type, HLSLExpression* expression)
+void MSLGenerator::OutputExpressionList(const HLSLType& type, HLSLExpression* expression)
 {
     int numExpressions = 0;
-    while (expression != NULL)
-    {
-        if (numExpressions > 0)
-        {
+    while (expression != NULL) {
+        if (numExpressions > 0) {
             m_writer.Write(", ");
         }
 
@@ -1829,13 +1679,10 @@ void MSLGenerator::OutputExpressionList(const HLSLType & type, HLSLExpression* e
 void MSLGenerator::OutputExpressionList(HLSLArgument* argument, HLSLExpression* expression)
 {
     int numExpressions = 0;
-    while (expression != NULL)
-    {
+    while (expression != NULL) {
         ASSERT(argument != NULL);
-        if (argument->modifier != HLSLArgumentModifier_Out)
-        {
-            if (numExpressions > 0)
-            {
+        if (argument->modifier != HLSLArgumentModifier_Out) {
+            if (numExpressions > 0) {
                 m_writer.Write(", ");
             }
 
@@ -1848,46 +1695,36 @@ void MSLGenerator::OutputExpressionList(HLSLArgument* argument, HLSLExpression*
     }
 }
 
-
-
 inline bool isAddressable(HLSLExpression* expression)
 {
-    if (expression->nodeType == HLSLNodeType_IdentifierExpression)
-    {
+    if (expression->nodeType == HLSLNodeType_IdentifierExpression) {
         return true;
     }
-    if (expression->nodeType == HLSLNodeType_ArrayAccess)
-    {
+    if (expression->nodeType == HLSLNodeType_ArrayAccess) {
         return true;
     }
-    if (expression->nodeType == HLSLNodeType_MemberAccess)
-    {
+    if (expression->nodeType == HLSLNodeType_MemberAccess) {
         HLSLMemberAccess* memberAccess = (HLSLMemberAccess*)expression;
         return !memberAccess->swizzle;
     }
     return false;
 }
 
-
 void MSLGenerator::OutputFunctionCallStatement(int indent, HLSLFunctionCall* functionCall, HLSLDeclaration* declaration)
 {
     // Nothing special about these cases:
-    if (functionCall->function->numOutputArguments == 0)
-    {
+    if (functionCall->function->numOutputArguments == 0) {
         m_writer.BeginLine(indent, functionCall->fileName, functionCall->line);
-        if (declaration)
-        {
+        if (declaration) {
             OutputDeclaration(declaration);
         }
-        else
-        {
+        else {
             OutputExpression(functionCall, NULL);
         }
         m_writer.EndLine(";");
         return;
     }
 
-
     // Transform this:
     // float foo = functionCall(bah, poo);
 
@@ -1904,12 +1741,10 @@ void MSLGenerator::OutputFunctionCallStatement(int indent, HLSLFunctionCall* fun
     OutputExpressionList(functionCall->function->argument, functionCall->argument);
     m_writer.EndLine(");");
 
-    HLSLExpression * expression = functionCall->argument;
-    HLSLArgument * argument = functionCall->function->argument;
-    while (argument != NULL)
-    {
-        if (argument->modifier == HLSLArgumentModifier_Out || argument->modifier == HLSLArgumentModifier_Inout)
-        {
+    HLSLExpression* expression = functionCall->argument;
+    HLSLArgument* argument = functionCall->function->argument;
+    while (argument != NULL) {
+        if (argument->modifier == HLSLArgumentModifier_Out || argument->modifier == HLSLArgumentModifier_Inout) {
             m_writer.BeginLine(indent);
             OutputExpression(expression, NULL);
             // @@ This assignment may need a cast.
@@ -1925,99 +1760,96 @@ void MSLGenerator::OutputFunctionCallStatement(int indent, HLSLFunctionCall* fun
         argument = argument->nextArgument;
     }
 
-    if (declaration)
-    {
+    if (declaration) {
         m_writer.BeginLine(indent);
         OutputDeclarationType(declaration->type);
         m_writer.Write(" %s = out%d.__result;", declaration->name, functionCall->line);
         m_writer.EndLine();
     }
 
+    /* TODO: Alec, why is all this chopped out?
 
-/* TODO: Alec, why is all this chopped out?
+        int argumentIndex = 0;
+        HLSLArgument* argument = functionCall->function->argument;
+        HLSLExpression* expression = functionCall->argument;
+        while (argument != NULL)
+        {
+            if (!isAddressable(expression))
+            {
+                if (argument->modifier == HLSLArgumentModifier_Out)
+                {
+                    m_writer.BeginLine(indent, functionCall->fileName, functionCall->line);
+                    OutputDeclarationType(argument->type);
+                    m_writer.Write("tmp%d;", argumentIndex);
+                    m_writer.EndLine();
+                }
+                else if (argument->modifier == HLSLArgumentModifier_Inout)
+                {
+                    m_writer.BeginLine(indent, functionCall->fileName, functionCall->line);
+                    OutputDeclarationType(argument->type);
+                    m_writer.Write("tmp%d = ", argumentIndex);
+                    OutputExpression(expression, NULL);
+                    m_writer.EndLine(";");
+                }
+            }
+            argument = argument->nextArgument;
+            expression = expression->nextExpression;
+            argumentIndex++;
+        }
 
-    int argumentIndex = 0;
-    HLSLArgument* argument = functionCall->function->argument;
-    HLSLExpression* expression = functionCall->argument;
-    while (argument != NULL)
-    {
-        if (!isAddressable(expression))
+        m_writer.BeginLine(indent, functionCall->fileName, functionCall->line);
+        const char* name = functionCall->function->name;
+        m_writer.Write("%s(", name);
+        //OutputExpressionList(functionCall->argument);
+
+        // Output expression list with temporary substitution.
+        argumentIndex = 0;
+        argument = functionCall->function->argument;
+        expression = functionCall->argument;
+        while (expression != NULL)
         {
-            if (argument->modifier == HLSLArgumentModifier_Out)
+            if (!isAddressable(expression) && (argument->modifier == HLSLArgumentModifier_Out || argument->modifier == HLSLArgumentModifier_Inout))
             {
-                m_writer.BeginLine(indent, functionCall->fileName, functionCall->line);
-                OutputDeclarationType(argument->type);
-                m_writer.Write("tmp%d;", argumentIndex);
-                m_writer.EndLine();
+                m_writer.Write("tmp%d", argumentIndex);
             }
-            else if (argument->modifier == HLSLArgumentModifier_Inout)
+            else
             {
-                m_writer.BeginLine(indent, functionCall->fileName, functionCall->line);
-                OutputDeclarationType(argument->type);
-                m_writer.Write("tmp%d = ", argumentIndex);
                 OutputExpression(expression, NULL);
-                m_writer.EndLine(";");
             }
-        }
-        argument = argument->nextArgument;
-        expression = expression->nextExpression;
-        argumentIndex++;
-    }
 
-    m_writer.BeginLine(indent, functionCall->fileName, functionCall->line);
-    const char* name = functionCall->function->name;
-    m_writer.Write("%s(", name);
-    //OutputExpressionList(functionCall->argument);
-
-    // Output expression list with temporary substitution.
-    argumentIndex = 0;
-    argument = functionCall->function->argument;
-    expression = functionCall->argument;
-    while (expression != NULL)
-    {
-        if (!isAddressable(expression) && (argument->modifier == HLSLArgumentModifier_Out || argument->modifier == HLSLArgumentModifier_Inout))
-        {
-            m_writer.Write("tmp%d", argumentIndex);
-        }
-        else
-        {
-            OutputExpression(expression, NULL);
+            argument = argument->nextArgument;
+            expression = expression->nextExpression;
+            argumentIndex++;
+            if (expression)
+            {
+                m_writer.Write(", ");
+            }
         }
+        m_writer.EndLine(");");
 
-        argument = argument->nextArgument;
-        expression = expression->nextExpression;
-        argumentIndex++;
-        if (expression)
+        argumentIndex = 0;
+        argument = functionCall->function->argument;
+        expression = functionCall->argument;
+        while (expression != NULL)
         {
-            m_writer.Write(", ");
-        }
-    }
-    m_writer.EndLine(");");
+            if (!isAddressable(expression) && (argument->modifier == HLSLArgumentModifier_Out || argument->modifier == HLSLArgumentModifier_Inout))
+            {
+                m_writer.BeginLine(indent, functionCall->fileName, functionCall->line);
+                OutputExpression(expression, NULL);
+                m_writer.Write(" = tmp%d", argumentIndex);
+                m_writer.EndLine(";");
+            }
 
-    argumentIndex = 0;
-    argument = functionCall->function->argument;
-    expression = functionCall->argument;
-    while (expression != NULL)
-    {
-        if (!isAddressable(expression) && (argument->modifier == HLSLArgumentModifier_Out || argument->modifier == HLSLArgumentModifier_Inout))
-        {
-            m_writer.BeginLine(indent, functionCall->fileName, functionCall->line);
-            OutputExpression(expression, NULL);
-            m_writer.Write(" = tmp%d", argumentIndex);
-            m_writer.EndLine(";");
+            argument = argument->nextArgument;
+            expression = expression->nextExpression;
+            argumentIndex++;
         }
-
-        argument = argument->nextArgument;
-        expression = expression->nextExpression;
-        argumentIndex++;
-    }
-*/
+    */
 }
 
-void MSLGenerator::OutputFunctionCall(HLSLFunctionCall* functionCall, HLSLExpression * parentExpression)
+void MSLGenerator::OutputFunctionCall(HLSLFunctionCall* functionCall, HLSLExpression* parentExpression)
 {
-    if (functionCall->function->numOutputArguments > 0)
-    {
+    if (functionCall->function->numOutputArguments > 0) {
         ASSERT(false);
     }
 
@@ -2043,7 +1875,7 @@ void MSLGenerator::OutputFunctionCall(HLSLFunctionCall* functionCall, HLSLExpres
     }
 }
 
-const char* MSLGenerator::TranslateInputSemantic(const char * semantic)
+const char* MSLGenerator::TranslateInputSemantic(const char* semantic)
 {
     if (semantic == NULL)
         return NULL;
@@ -2051,8 +1883,7 @@ const char* MSLGenerator::TranslateInputSemantic(const char * semantic)
     uint32_t length, index;
     ParseSemantic(semantic, &length, &index);
 
-    if (m_target == HLSLTarget_VertexShader)
-    {
+    if (m_target == HLSLTarget_VertexShader) {
         // These are DX10 convention
         if (String_Equal(semantic, "SV_InstanceID"))
             return "instance_id";
@@ -2067,14 +1898,13 @@ const char* MSLGenerator::TranslateInputSemantic(const char * semantic)
             return "base_instance";
         //if (String_Equal(semantic, "DRAW_INDEX"))
         //    return "draw_index";
-        
+
         // TODO: primitive_id, barycentric
-        
+
         // Handle attributes
-        
+
         // Can set custom attributes via a callback
-        if (m_options.attributeCallback)
-        {
+        if (m_options.attributeCallback) {
             char name[64];
             ASSERT(length < sizeof(name));
 
@@ -2083,45 +1913,42 @@ const char* MSLGenerator::TranslateInputSemantic(const char * semantic)
 
             int attribute = m_options.attributeCallback(name, index);
 
-            if (attribute >= 0)
-            {
+            if (attribute >= 0) {
                 return m_tree->AddStringFormat("attribute(%d)", attribute);
             }
         }
-        
+
         if (String_Equal(semantic, "SV_Position"))
             return "attribute(POSITION)";
 
         return m_tree->AddStringFormat("attribute(%s)", semantic);
     }
-    else if (m_target == HLSLTarget_PixelShader)
-    {
+    else if (m_target == HLSLTarget_PixelShader) {
         // PS inputs
-        
+
         if (String_Equal(semantic, "SV_Position"))
             return "position";
-        
-        //  if (String_Equal(semantic, "POSITION"))
-        //    return "position";
+
+        // if (String_Equal(semantic, "POSITION"))
+        //   return "position";
         if (String_Equal(semantic, "SV_IsFrontFace"))
             return "front_facing";
-        
+
         // VS sets what layer to render into, ps can look at it.
         // Gpu Family 5.
         if (String_Equal(semantic, "SV_RenderTargetArrayIndex"))
             return "render_target_array_index";
-        
+
         // dual source? passes in underlying color
         if (String_Equal(semantic, "DST_COLOR"))
             return "color(0)";
-        
+
         if (String_Equal(semantic, "SV_SampleIndex"))
             return "sample_id";
         //if (String_Equal(semantic, "SV_Coverage")) return "sample_mask";
         //if (String_Equal(semantic, "SV_Coverage")) return "sample_mask,post_depth_coverage";
     }
-    else if (m_target == HLSLTarget_ComputeShader)
-    {
+    else if (m_target == HLSLTarget_ComputeShader) {
         // compute inputs
         if (String_Equal(semantic, "SV_DispatchThreadID"))
             return "thread_position_in_grid";
@@ -2129,7 +1956,7 @@ const char* MSLGenerator::TranslateInputSemantic(const char * semantic)
     return NULL;
 }
 
-const char* MSLGenerator::TranslateOutputSemantic(const char * semantic)
+const char* MSLGenerator::TranslateOutputSemantic(const char* semantic)
 {
     if (semantic == NULL)
         return NULL;
@@ -2137,11 +1964,10 @@ const char* MSLGenerator::TranslateOutputSemantic(const char * semantic)
     uint32_t length, index;
     ParseSemantic(semantic, &length, &index);
 
-    if (m_target == HLSLTarget_VertexShader)
-    {
+    if (m_target == HLSLTarget_VertexShader) {
         if (String_Equal(semantic, "SV_Position"))
             return "position";
-    
+
         // PSIZE is non-square in DX9, and square in DX10 (and MSL)
         // https://github.com/KhronosGroup/glslang/issues/1154
         if (String_Equal(semantic, "PSIZE"))
@@ -2150,24 +1976,23 @@ const char* MSLGenerator::TranslateOutputSemantic(const char * semantic)
         // control layer in Gpu Family 5
         if (String_Equal(semantic, "SV_RenderTargetArrayIndex"))
             return "render_target_array_index";
-        
+
         // TODO: add
         // SV_ViewportArrayIndex
         // SV_ClipDistance0..n, SV_CullDistance0..n
     }
-    else if (m_target == HLSLTarget_PixelShader)
-    {
-// Not supporting flags, add as bool to options if needed
-//            if (m_options.flags & MSLGenerator::Flag_NoIndexAttribute)
-//            {
-//                // No dual-source blending on iOS, and no index() attribute
-//                if (String_Equal(semantic, "COLOR0_1")) return NULL;
-//            }
-//            else
+    else if (m_target == HLSLTarget_PixelShader) {
+        // Not supporting flags, add as bool to options if needed
+        //            if (m_options.flags & MSLGenerator::Flag_NoIndexAttribute)
+        //            {
+        //                // No dual-source blending on iOS, and no index() attribute
+        //                if (String_Equal(semantic, "COLOR0_1")) return NULL;
+        //            }
+        //            else
         {
             // See these settings
             // MTLBlendFactorSource1Color, OneMinusSource1Color, Source1Alpha, OneMinuSource1Alpha.
-            
+
             // @@ IC: Hardcoded for this specific case, extend ParseSemantic?
             if (String_Equal(semantic, "COLOR0_1"))
                 return "color(0), index(1)";
@@ -2176,25 +2001,24 @@ const char* MSLGenerator::TranslateOutputSemantic(const char * semantic)
         // This is only in A14 and higher
         if (String_Equal(semantic, "SV_Berycentrics"))
             return "barycentric_coord";
-        
+
         // Is there an HLSL euivalent.  Have vulkan ext for PointSize
         // "point_coord"
-        
+
         // "primitive_id"
-        
-        if (strncmp(semantic, "SV_Target", length) == 0)
-        {
+
+        if (strncmp(semantic, "SV_Target", length) == 0) {
             return m_tree->AddStringFormat("color(%d)", index);
         }
-//            if (strncmp(semantic, "COLOR", length) == 0)
-//            {
-//                return m_tree->AddStringFormat("color(%d)", index);
-//            }
+        // if (strncmp(semantic, "COLOR", length) == 0)
+        // {
+        //     return m_tree->AddStringFormat("color(%d)", index);
+        // }
 
         // depth variants to preserve earlyz, use greater on reverseZ
         if (String_Equal(semantic, "SV_Depth"))
             return "depth(any)";
-        
+
         // These don't quite line up, since comparison is not ==
         // Metal can only use any/less/greater.  Preserve early z when outputting depth.
         // reverseZ would use greater.
@@ -2202,19 +2026,16 @@ const char* MSLGenerator::TranslateOutputSemantic(const char * semantic)
             return "depth(greater)";
         if (String_Equal(semantic, "SV_DepthLessEqual"))
             return "depth(less)";
-        
+
         if (String_Equal(semantic, "SV_Coverage"))
             return "sample_mask";
     }
-    else if (m_target == HLSLTarget_ComputeShader)
-    {
+    else if (m_target == HLSLTarget_ComputeShader) {
         // compute outputs
-        
     }
     return NULL;
 }
 
-
 const char* MSLGenerator::GetTypeName(const HLSLType& type, bool exactType)
 {
     bool promote = ((type.flags & HLSLTypeFlag_NoPromote) == 0);
@@ -2222,59 +2043,57 @@ const char* MSLGenerator::GetTypeName(const HLSLType& type, bool exactType)
     // number
     bool isHalfNumerics = promote && !m_options.treatHalfAsFloat;
     HLSLBaseType baseType = type.baseType;
-    
+
     // Note: these conversions should really be done during parsing
     // so that casting gets applied.
     if (!isHalfNumerics)
         baseType = HalfToFloatBaseType(baseType);
-    
+
     // MSL doesn't support double
     if (IsDouble(baseType))
         baseType = DoubleToFloatBaseType(baseType);
-    
+
     HLSLType remappedType(baseType);
     remappedType.typeName = type.typeName; // in case it's a struct
-    
+
     if (IsSamplerType(baseType) || IsNumericType(baseType) || baseType == HLSLBaseType_Void || baseType == HLSLBaseType_UserDefined)
         return GetTypeNameMetal(remappedType);
-    
+
     // texture
-    if (IsTextureType(baseType))
-    {
+    if (IsTextureType(baseType)) {
         // unclear if depth supports half, may have to be float always
-        
-        bool isHalfTexture  = promote && IsHalf(type.formatType) && !m_options.treatHalfAsFloat;
-        
+
+        bool isHalfTexture = promote && IsHalf(type.formatType) && !m_options.treatHalfAsFloat;
+
         // MSL docs state must be float type, but what about D16f texture?
         if (IsDepthTextureType(baseType))
             isHalfTexture = false;
-        
+
         // TODO: could use GetTypeNameMetal() but it doesn't include <> portion
         // so would have to pool and then return the result.
-        
+
         // This would allow more formats
         // const char* textureTypeName = GetTypeNameMetal(baseType);
         // const char* formatTypeName = GetFormatTypeName(baseType, formatType);
         // snprintf(buf, sizeof(buf), "%s<%s>", textureTypeName, formatTypeName);
-        
-        switch (baseType)
-        {
+
+        switch (baseType) {
             case HLSLBaseType_Depth2D:
                 return isHalfTexture ? "depth2d<half>" : "depth2d<float>";
             case HLSLBaseType_Depth2DArray:
                 return isHalfTexture ? "depth2d_array<half>" : "depth2d_array<float>";
             case HLSLBaseType_DepthCube:
                 return isHalfTexture ? "depthcube<half>" : "depthcube<float>";
-                
+
             /* TODO: also depth_ms_array, but HLSL6.6 equivalent
             case HLSLBaseType_Depth2DMS:
                 return isHalfTexture ? "depth2d_ms<half>" : "depth2d_ms<float>";
             */
-            
+
             // More types than just half/float for this
             case HLSLBaseType_RWTexture2D:
                 return isHalfTexture ? "texture2d<half, access::read_write>" : "texture2d<float, access::read_write>";
-                
+
             case HLSLBaseType_Texture2D:
                 return isHalfTexture ? "texture2d<half>" : "texture2d<float>";
             case HLSLBaseType_Texture2DArray:
@@ -2287,14 +2106,14 @@ const char* MSLGenerator::GetTypeName(const HLSLType& type, bool exactType)
                 return isHalfTexture ? "texturecube_array<half>" : "texturecube_array<float>";
             case HLSLBaseType_Texture2DMS:
                 return isHalfTexture ? "texture2d_ms<half>" : "texture2d_ms<float>";
-            
+
             default:
                 break;
         }
     }
-    
+
     Error("Unknown Type");
     return NULL;
 }
 
-} // M4
+} //namespace M4
diff --git a/hlslparser/src/MSLGenerator.h b/hlslparser/src/MSLGenerator.h
index 25cd4d34..1b69b028 100644
--- a/hlslparser/src/MSLGenerator.h
+++ b/hlslparser/src/MSLGenerator.h
@@ -3,20 +3,18 @@
 #include "CodeWriter.h"
 #include "HLSLTree.h"
 
-namespace M4
-{
+namespace M4 {
 
-class  HLSLTree;
+class HLSLTree;
 struct HLSLFunction;
 struct HLSLStruct;
-    
-struct MSLOptions
-{
+
+struct MSLOptions {
     int (*attributeCallback)(const char* name, uint32_t index) = NULL;
-    
+
     // no CLI to set offset
     uint32_t bufferRegisterOffset = 0;
-    
+
     bool writeFileLine = false;
     bool treatHalfAsFloat = false;
 };
@@ -24,8 +22,7 @@ struct MSLOptions
 /**
  * This class is used to generate MSL shaders.
  */
-class MSLGenerator
-{
+class MSLGenerator {
 public:
     MSLGenerator();
 
@@ -33,32 +30,29 @@ class MSLGenerator
     const char* GetResult() const;
 
 private:
-    
     // @@ Rename class argument. Add buffers & textures.
-    struct ClassArgument
-    {
+    struct ClassArgument {
         const char* name;
         HLSLType type;
         //const char* typeName;     // @@ Do we need more than the type name?
         const char* registerName;
         bool isRef;
-        
-        ClassArgument * nextArg;
-        
-        ClassArgument(const char* name, HLSLType type, const char * registerName, bool isRef) :
-            name(name), type(type), registerName(registerName), isRef(isRef)
-		{
-			nextArg = NULL;
-		}
+
+        ClassArgument* nextArg;
+
+        ClassArgument(const char* name, HLSLType type, const char* registerName, bool isRef) : name(name), type(type), registerName(registerName), isRef(isRef)
+        {
+            nextArg = NULL;
+        }
     };
 
-    void AddClassArgument(ClassArgument * arg);
+    void AddClassArgument(ClassArgument* arg);
 
     void Prepass(HLSLTree* tree, HLSLTarget target, HLSLFunction* entryFunction);
     void CleanPrepass();
-    
+
     void PrependDeclarations();
-    
+
     void OutputStaticDeclarations(int indent, HLSLStatement* statement);
     void OutputStatements(int indent, HLSLStatement* statement);
     void OutputAttributes(int indent, HLSLAttribute* attribute);
@@ -68,9 +62,9 @@ class MSLGenerator
     void OutputFunction(int indent, HLSLFunction* function);
     void OutputExpression(HLSLExpression* expression, HLSLExpression* parentExpression);
     void OutputTypedExpression(const HLSLType& type, HLSLExpression* expression, HLSLExpression* parentExpression);
-    bool NeedsCast(const HLSLType & target, const HLSLType & source);
+    bool NeedsCast(const HLSLType& target, const HLSLType& source);
     void OutputCast(const HLSLType& type);
-    
+
     void OutputArguments(HLSLArgument* argument);
     void OutputDeclaration(const HLSLType& type, const char* name, HLSLExpression* assignment, bool isRef = false, bool isConst = false, int alignment = 0);
     void OutputDeclarationType(const HLSLType& type, bool isConst = false, bool isRef = false, int alignment = 0, bool isTypeCast = false);
@@ -78,36 +72,34 @@ class MSLGenerator
     void OutputExpressionList(HLSLExpression* expression);
     void OutputExpressionList(const HLSLType& type, HLSLExpression* expression);
     void OutputExpressionList(HLSLArgument* argument, HLSLExpression* expression);
-    
+
     void OutputFunctionCallStatement(int indent, HLSLFunctionCall* functionCall, HLSLDeclaration* assingmentExpression);
-    void OutputFunctionCall(HLSLFunctionCall* functionCall, HLSLExpression * parentExpression);
+    void OutputFunctionCall(HLSLFunctionCall* functionCall, HLSLExpression* parentExpression);
 
     const char* TranslateInputSemantic(const char* semantic);
     const char* TranslateOutputSemantic(const char* semantic);
 
     const char* GetTypeName(const HLSLType& type, bool exactType);
     const char* GetAddressSpaceName(HLSLBaseType baseType, HLSLAddressSpace addressSpace) const;
-    
+
     bool CanSkipWrittenStatement(const HLSLStatement* statement) const;
-    
+
     void Error(const char* format, ...) const M4_PRINTF_ATTR(2, 3);
 
 private:
+    CodeWriter m_writer;
 
-    CodeWriter      m_writer;
+    HLSLTree* m_tree;
+    const char* m_entryName;
+    HLSLTarget m_target;
+    MSLOptions m_options;
 
-    HLSLTree*       m_tree;
-    const char*     m_entryName;
-    HLSLTarget      m_target;
-    MSLOptions      m_options;
+    mutable bool m_error;
 
-    mutable bool            m_error;
+    ClassArgument* m_firstClassArgument;
+    ClassArgument* m_lastClassArgument;
 
-    ClassArgument * m_firstClassArgument;
-    ClassArgument * m_lastClassArgument;
-    
-    HLSLFunction *  m_currentFunction;
+    HLSLFunction* m_currentFunction;
 };
 
-} // M4
-
+} //namespace M4
diff --git a/hlslparser/src/Main.cpp b/hlslparser/src/Main.cpp
index addaa5d8..b471f26f 100644
--- a/hlslparser/src/Main.cpp
+++ b/hlslparser/src/Main.cpp
@@ -1,23 +1,22 @@
 #include "HLSLParser.h"
 
 //#include "GLSLGenerator.h"
-#include "HLSLGenerator.h"
-#include "MSLGenerator.h"
-
 #include <stdio.h>
 #include <sys/stat.h>
 
 #include <filesystem>
 
+#include "HLSLGenerator.h"
+#include "MSLGenerator.h"
+
 using namespace std;
 
-enum Language
-{
+enum Language {
     Language_MSL,
-	Language_HLSL,
+    Language_HLSL,
 };
 
-bool ReadFile( const char* fileName, string& str )
+bool ReadFile(const char* fileName, string& str)
 {
     struct stat stats = {};
     if (stat(fileName, &stats) < 0) {
@@ -37,17 +36,16 @@ bool ReadFile( const char* fileName, string& str )
 
 void PrintUsage()
 {
-	fprintf(stderr,
-        "usage: hlslparser [-h|-g] -i shader.hlsl -o [shader.hlsl | shader.metal]\n"
-		 "Translate DX9-style HLSL shader to HLSL/MSL shader.\n"
-         " -i          input HLSL\n"
-         " -o          output HLSL or MSL\n"
-		 "optional arguments:\n"
-         " -g          debug mode, preserve comments\n"
-         " -h, --help  show this help message and exit\n"
-         " -line       write #file/line directive\n"
-         " -nohalf     turn half into float"
-		);
+    fprintf(stderr,
+            "usage: hlslparser [-h|-g] -i shader.hlsl -o [shader.hlsl | shader.metal]\n"
+            "Translate DX9-style HLSL shader to HLSL/MSL shader.\n"
+            " -i          input HLSL\n"
+            " -o          output HLSL or MSL\n"
+            "optional arguments:\n"
+            " -g          debug mode, preserve comments\n"
+            " -h, --help  show this help message and exit\n"
+            " -line       write #file/line directive\n"
+            " -nohalf     turn half into float");
 }
 
 // Taken from KrmaLog.cpp
@@ -61,12 +59,12 @@ static bool endsWith(const string& value, const string& ending)
     if (value.size() < ending.size())
         return false;
     uint32_t start = (uint32_t)(value.size() - ending.size());
-        
+
     for (uint32_t i = 0; i < ending.size(); ++i) {
         if (value[start + i] != ending[i])
             return false;
     }
-    
+
     return true;
 }
 
@@ -77,221 +75,193 @@ static string filenameNoExtension(const char* filename)
     if (dotPosStr == nullptr)
         return filename;
     auto dotPos = dotPosStr - filename;
-    
+
     // now chop off the extension
     string filenameNoExt = filename;
     return filenameNoExt.substr(0, dotPos);
 }
 
-int main( int argc, char* argv[] )
+int main(int argc, char* argv[])
 {
-	using namespace M4;
+    using namespace M4;
 
-	// Parse arguments
-	string fileName;
-	const char* entryName = NULL;
+    // Parse arguments
+    string fileName;
+    const char* entryName = NULL;
 
-	// TODO: could we take modern DX12 HLSL and translate to MSL only
-	// That would simplify all this.  What spirv-cross already does though.
-	// Could drop HLSLGenerator then, and just use this to gen MSL.
-	// Much of the glue code can just be in a header, but having it
-	// in parser, lets this only splice code that is needed.
+    // TODO: could we take modern DX12 HLSL and translate to MSL only
+    // That would simplify all this.  What spirv-cross already does though.
+    // Could drop HLSLGenerator then, and just use this to gen MSL.
+    // Much of the glue code can just be in a header, but having it
+    // in parser, lets this only splice code that is needed.
 
-	Language language = Language_MSL;
-	HLSLTarget target = HLSLTarget_PixelShader;
+    Language language = Language_MSL;
+    HLSLTarget target = HLSLTarget_PixelShader;
     string outputFileName;
     bool isDebug = false;
     bool isTreatHalfAsFloat = false;
     bool isWriteFileLine = false;
-    
-	for( int argn = 1; argn < argc; ++argn )
-	{
-		const char* const arg = argv[ argn ];
-
-		if( String_Equal( arg, "-h" ) || String_Equal( arg, "--help" ) )
-		{
-			PrintUsage();
-			return 0;
-		}
-		
-        else if( String_Equal( arg, "-o" ) || String_Equal( arg, "-output" ) )
-        {
-            if ( ++argn < argc )
-                outputFileName = argv[ argn ];
+
+    for (int argn = 1; argn < argc; ++argn) {
+        const char* const arg = argv[argn];
+
+        if (String_Equal(arg, "-h") || String_Equal(arg, "--help")) {
+            PrintUsage();
+            return 0;
         }
-        else if( String_Equal( arg, "-i" ) || String_Equal( arg, "-input" ) )
-		{
-            if ( ++argn < argc )
-                fileName = argv[ argn ];
-		}
-        else if ( String_Equal( arg, "-g" ))
-        {
+
+        else if (String_Equal(arg, "-o") || String_Equal(arg, "-output")) {
+            if (++argn < argc)
+                outputFileName = argv[argn];
+        }
+        else if (String_Equal(arg, "-i") || String_Equal(arg, "-input")) {
+            if (++argn < argc)
+                fileName = argv[argn];
+        }
+        else if (String_Equal(arg, "-g")) {
             // will preserve double-slash comments where possible
             isDebug = true;
         }
-        else if ( String_Equal( arg, "-nohalf" ))
-        {
+        else if (String_Equal(arg, "-nohalf")) {
             // will preserve double-slash comments where possible
             isTreatHalfAsFloat = true;
         }
-        else if ( String_Equal( arg, "-line" ))
-        {
+        else if (String_Equal(arg, "-line")) {
             // will preserve double-slash comments where possible
             isWriteFileLine = true;
         }
-        
-// This is derived from end characters of entry point
-//        else if( String_Equal( arg, "-vs" ) )
-//        {
-//            target = HLSLTarget_VertexShader;
-//        }
-//        else if( String_Equal( arg, "-fs" ) )
-//        {
-//            target = HLSLTarget_PixelShader;
-//        }
- // TODO: require a arg to set entryName
-//		else if( entryName == NULL )
-//		{
-//			entryName = arg;
-//		}
-		else
-		{
-			Log_Error( "Too many arguments\n" );
-			PrintUsage();
-			return 1;
-		}
-	}
-
-	if( fileName.empty() )
-	{
-		Log_Error( "Missing source filename\n" );
-		PrintUsage();
-		return 1;
-	}
-    if( !endsWith( fileName, "hlsl" ) )
-    {
-        Log_Error( "Input filename must end with .hlsl\n" );
+
+        // This is derived from end characters of entry point
+        //        else if( String_Equal( arg, "-vs" ) )
+        //        {
+        //            target = HLSLTarget_VertexShader;
+        //        }
+        //        else if( String_Equal( arg, "-fs" ) )
+        //        {
+        //            target = HLSLTarget_PixelShader;
+        //        }
+        // TODO: require a arg to set entryName
+        //		else if( entryName == NULL )
+        //		{
+        //			entryName = arg;
+        //		}
+        else {
+            Log_Error("Too many arguments\n");
+            PrintUsage();
+            return 1;
+        }
+    }
+
+    if (fileName.empty()) {
+        Log_Error("Missing source filename\n");
+        PrintUsage();
+        return 1;
+    }
+    if (!endsWith(fileName, "hlsl")) {
+        Log_Error("Input filename must end with .hlsl\n");
         PrintUsage();
         return 1;
     }
-    
-    if( outputFileName.empty() )
-    {
-        Log_Error( "Missing dest filename\n" );
+
+    if (outputFileName.empty()) {
+        Log_Error("Missing dest filename\n");
         PrintUsage();
         return 1;
     }
-    if( endsWith( outputFileName, "hlsl" ) )
-    {
+    if (endsWith(outputFileName, "hlsl")) {
         language = Language_HLSL;
     }
-    else if( endsWith( outputFileName, "metal" ) )
-    {
+    else if (endsWith(outputFileName, "metal")) {
         language = Language_MSL;
     }
-    else
-    {
-        Log_Error( "Output file must end with .hlsl or msls\n" );
+    else {
+        Log_Error("Output file must end with .hlsl or msls\n");
         PrintUsage();
         return 1;
     }
-    
+
     // replace the extension on the output file
-    outputFileName = filenameNoExtension( outputFileName.c_str() );
-    
+    outputFileName = filenameNoExtension(outputFileName.c_str());
+
     // Allow a mix of shaders in file.
     // Code now finds entry points.
     // outputFileName += (target == HLSLTarget_PixelShader) ? "PS" : "VS";
-    
-    if ( language == Language_MSL )
-    {
+
+    if (language == Language_MSL) {
         outputFileName += ".metal";
     }
-    else if ( language == Language_HLSL )
-    {
+    else if (language == Language_HLSL) {
         outputFileName += ".hlsl";
     }
-    
+
     // Win build on github is failing on this, so skip for now
     // find  full pathname of the fileName, so that errors are logged
     // in way that can be clicked to. absolute includes .. in it, canonical does not.
     std::error_code errorCode; // To shutup exceptions
     auto path = filesystem::path(fileName);
     fileName = filesystem::canonical(path, errorCode).generic_string();
-    
+
     // if this file doesn't exist, then canonical throws exception
     path = filesystem::path(outputFileName);
-    if (filesystem::exists(path))
-    {
+    if (filesystem::exists(path)) {
         outputFileName = filesystem::canonical(path, errorCode).generic_string();
-        
-        if ( outputFileName == fileName )
-        {
-            Log_Error( "Src and Dst filenames match.  Exiting.\n" );
+
+        if (outputFileName == fileName) {
+            Log_Error("Src and Dst filenames match.  Exiting.\n");
             return 1;
         }
     }
-    
+
     //------------------------------------
     // Now start the work
-    
-	// Read input file
+
+    // Read input file
     string source;
-    if (!ReadFile( fileName.c_str(), source ))
-    {
-        Log_Error( "Input file not found\n" );
+    if (!ReadFile(fileName.c_str(), source)) {
+        Log_Error("Input file not found\n");
         return 1;
     }
 
-	// Parse input file
-	Allocator allocator;
-	HLSLParser parser( &allocator, fileName.c_str(), source.data(), source.size() );
-    if (isDebug)
-    {
+    // Parse input file
+    Allocator allocator;
+    HLSLParser parser(&allocator, fileName.c_str(), source.data(), source.size());
+    if (isDebug) {
         parser.SetKeepComments(true);
     }
-	HLSLTree tree( &allocator );
-    
+    HLSLTree tree(&allocator);
+
     // TODO: tie this to CLI, MSL should set both to true
     HLSLParserOptions parserOptions;
     parserOptions.isHalfst = true;
     parserOptions.isHalfio = true;
-    
-	if( !parser.Parse( &tree, parserOptions ) )
-	{
-		Log_Error( "Parsing failed\n" );
-		return 1;
-	}
-    
+
+    if (!parser.Parse(&tree, parserOptions)) {
+        Log_Error("Parsing failed\n");
+        return 1;
+    }
+
     int status = 0;
-    
+
     // build a list of entryPoints
     Array<const char*> entryPoints(&allocator);
-    if (entryName != nullptr)
-    {
+    if (entryName != nullptr) {
         entryPoints.PushBack(entryName);
     }
-    else
-    {
+    else {
         // search all functions with designated endings
         HLSLStatement* statement = tree.GetRoot()->statement;
-        while (statement != NULL)
-        {
-            if (statement->nodeType == HLSLNodeType_Function)
-            {
+        while (statement != NULL) {
+            if (statement->nodeType == HLSLNodeType_Function) {
                 HLSLFunction* function = (HLSLFunction*)statement;
                 const char* name = function->name;
-                
-                if (endsWith(name, "VS"))
-                {
+
+                if (endsWith(name, "VS")) {
                     entryPoints.PushBack(name);
                 }
-                else if (endsWith(name, "PS"))
-                {
+                else if (endsWith(name, "PS")) {
                     entryPoints.PushBack(name);
                 }
-                else if (endsWith(name, "CS"))
-                {
+                else if (endsWith(name, "CS")) {
                     entryPoints.PushBack(name);
                 }
             }
@@ -299,11 +269,10 @@ int main( int argc, char* argv[] )
             statement = statement->nextStatement;
         }
     }
-    
+
     string output;
-    
-    for (uint32_t i = 0; i < (uint32_t)entryPoints.GetSize(); ++i)
-    {
+
+    for (uint32_t i = 0; i < (uint32_t)entryPoints.GetSize(); ++i) {
         const char* entryPoint = entryPoints[i];
         entryName = entryPoint;
         if (endsWith(entryPoint, "VS"))
@@ -312,67 +281,59 @@ int main( int argc, char* argv[] )
             target = HLSLTarget_PixelShader;
         else if (endsWith(entryPoint, "CS"))
             target = HLSLTarget_ComputeShader;
-            
+
         // Generate output
-        if (language == Language_HLSL)
-        {
+        if (language == Language_HLSL) {
             HLSLOptions options;
             options.writeFileLine = isWriteFileLine;
             options.treatHalfAsFloat = isTreatHalfAsFloat;
             options.writeVulkan = true; // TODO: tie to CLI
-            
+
             HLSLGenerator generator;
-            if (generator.Generate( &tree, target, entryName, options))
-            {
+            if (generator.Generate(&tree, target, entryName, options)) {
                 // write the buffer out
                 output += generator.GetResult();
             }
-            else
-            {
-                Log_Error( "Translation failed, aborting\n" );
+            else {
+                Log_Error("Translation failed, aborting\n");
                 status = 1;
             }
         }
-        else if (language == Language_MSL)
-        {
+        else if (language == Language_MSL) {
             MSLOptions options;
             options.writeFileLine = isWriteFileLine;
             options.treatHalfAsFloat = isTreatHalfAsFloat;
-            
+
             MSLGenerator generator;
-            if (generator.Generate(&tree, target, entryName, options))
-            {
+            if (generator.Generate(&tree, target, entryName, options)) {
                 // write the buffer out
                 output += generator.GetResult();
             }
-            else
-            {
-                Log_Error( "Translation failed, aborting\n" );
+            else {
+                Log_Error("Translation failed, aborting\n");
                 status = 1;
             }
         }
-        
+
         if (status != 0)
             break;
     }
-    
-    if (status == 0)
-    {
+
+    if (status == 0) {
         // using wb to avoid having Win convert \n to \r\n
-        FILE* fp = fopen( outputFileName.c_str(), "wb" );
-        if ( !fp )
-        {
-            Log_Error( "Could not open output file %s\n", outputFileName.c_str() );
+        FILE* fp = fopen(outputFileName.c_str(), "wb");
+        if (!fp) {
+            Log_Error("Could not open output file %s\n", outputFileName.c_str());
             return 1;
         }
-        
+
         fprintf(fp, "%s", output.c_str());
-        fclose( fp );
+        fclose(fp);
     }
-        
+
     // It's not enough to return 1 from main, but set exit code.
     if (status)
         exit(status);
-    
+
     return status;
 }
diff --git a/kram-preview/KramPreviewViewController.mm b/kram-preview/KramPreviewViewController.mm
index 9cdc071f..4fe3f82b 100644
--- a/kram-preview/KramPreviewViewController.mm
+++ b/kram-preview/KramPreviewViewController.mm
@@ -3,35 +3,36 @@
 // in all copies or substantial portions of the Software.
 
 #import "KramPreviewViewController.h"
-#import <Quartz/Quartz.h>
 
-#include <CoreGraphics/CoreGraphics.h>
 #import <Accelerate/Accelerate.h>
+#include <CoreGraphics/CoreGraphics.h>
+#import <Quartz/Quartz.h>
 
 #include "KramLib.h"
 
 using namespace kram;
 
 // Same code in Preview and Thumbnail
-inline NSError* KLOGF(uint32_t code, const char* format, ...) {
+inline NSError* KLOGF(uint32_t code, const char* format, ...)
+{
     string str;
-    
+
     va_list args;
     va_start(args, format);
     /* int32_t len = */ append_vsprintf(str, format, args);
     va_end(args);
-    
+
     // log here, so it can see it in Console.  But this never appears.
     // How are you supposed to debug failures?  Resorted to passing a unique code into this call.
     // It wasn't originally supposed to generate an NSError
     //NSLog(@"%s", str.c_str());
-    
+
     // Console prints this as <private>, so what's the point of producing a localizedString ?
     // This doesn't seem to work to Console app, but maybe if logs are to terminal
     // sudo log config --mode "level:debug" --subsystem com.hialec.kramv
-    
+
     NSString* errorText = [NSString stringWithUTF8String:str.c_str()];
-    return [NSError errorWithDomain:@"com.hialec.kramv" code:code userInfo:@{NSLocalizedDescriptionKey: errorText}];
+    return [NSError errorWithDomain:@"com.hialec.kramv" code:code userInfo:@{NSLocalizedDescriptionKey : errorText}];
 }
 
 @interface KramPreviewViewController () <QLPreviewingController>
@@ -41,38 +42,45 @@ @implementation KramPreviewViewController {
     NSImageView* _imageView;
 }
 
-- (NSString *)nibName {
+- (NSString*)nibName
+{
     return @"KramPreviewViewController";
 }
 
-- (void)loadView {
+- (void)loadView
+{
     [super loadView];
     // Do any additional setup after loading the view.
-    
+
     _imageView = [[NSImageView alloc] initWithFrame:self.view.frame];
     [_imageView setTranslatesAutoresizingMaskIntoConstraints:NO]; //Required to opt-in to autolayout
 
     // no frame, already the default
     // _imageView.imageFrameStyle = NSImageFrameNone;
-    
+
     _imageView.imageScaling = NSImageScaleProportionallyUpOrDown;
-    
-    [self.view addSubview: _imageView];
-    
-    NSDictionary* views = @{@"myview": _imageView};
+
+    [self.view addSubview:_imageView];
+
+    NSDictionary* views = @{@"myview" : _imageView};
     [self.view addConstraints:[NSLayoutConstraint
-                               constraintsWithVisualFormat:@"H:|[myview]|" options:0 metrics:nil
-                               views:views]];
+                                  constraintsWithVisualFormat:@"H:|[myview]|"
+                                                      options:0
+                                                      metrics:nil
+                                                        views:views]];
     [self.view addConstraints:[NSLayoutConstraint
-                               constraintsWithVisualFormat:@"V:|[myview]|" options:0 metrics:nil
-                                views:views]];
+                                  constraintsWithVisualFormat:@"V:|[myview]|"
+                                                      options:0
+                                                      metrics:nil
+                                                        views:views]];
     //[NSLayoutConstraint activateConstraints: self.view.constraints];
 }
 
 // This isn't a view, but hoping this is called
-- (void)viewDidAppear {
+- (void)viewDidAppear
+{
     [super viewDidAppear];
-    
+
     // this must be called after layer is ready
     //self.view.layer.backgroundColor = [NSColor blackColor].CGColor;
     _imageView.layer.backgroundColor = [NSColor blackColor].CGColor;
@@ -82,9 +90,9 @@ - (void)viewDidAppear {
  * Implement this method and set QLSupportsSearchableItems to YES in the Info.plist of the extension if you support CoreSpotlight.
  *
 - (void)preparePreviewOfSearchableItemWithIdentifier:(NSString *)identifier queryString:(NSString *)queryString completionHandler:(void (^)(NSError * _Nullable))handler {
-    
+
     // Perform any setup necessary in order to prepare the view.
-    
+
     // Call the completion handler so Quick Look knows that the preview is fully loaded.
     // Quick Look will display a loading spinner while the completion handler is not called.
 
@@ -92,43 +100,43 @@ - (void)preparePreviewOfSearchableItemWithIdentifier:(NSString *)identifier quer
 }
 */
 
-- (void)preparePreviewOfFileAtURL:(NSURL *)url completionHandler:(void (^)(NSError * _Nullable))handler {
-    
+- (void)preparePreviewOfFileAtURL:(NSURL*)url completionHandler:(void (^)(NSError* _Nullable))handler
+{
     NSError* error = nil;
     const char* filename = [url fileSystemRepresentation];
 
-//    if (![_imageView isKindOfClass:[NSImageView class]]) {
-//        error = KLOGF(9, "kramv %s expected NSImageView \n", filename);
-//        handler(error);
-//        return;
-//    }
-    
+    // if (![_imageView isKindOfClass:[NSImageView class]]) {
+    //     error = KLOGF(9, "kramv %s expected NSImageView \n", filename);
+    //     handler(error);
+    //     return;
+    // }
+
     // Add the supported content types to the QLSupportedContentTypes array in the Info.plist of the extension.
     // Perform any setup necessary in order to prepare the view.
-    
+
     // The following is adapted out of Thumbnailer
-    
+
     // No request here, may need to use view size
     uint32_t maxWidth = _imageView.frame.size.width;
     uint32_t maxHeight = _imageView.frame.size.height;
-    
+
     // ignore upper case extensions
     if (!isSupportedFilename(filename)) {
         error = KLOGF(1, "kramv %s only supports ktx, ktx2, dds files\n", filename);
         handler(error);
         return;
     }
-         
+
     KTXImage image;
     KTXImageData imageData;
     TexEncoder decoderType = kTexEncoderUnknown;
-   
+
     if (!imageData.open(filename, image)) {
         error = KLOGF(2, "kramv %s coould not open file\n", filename);
         handler(error);
         return;
     }
-    
+
     // This will set decoder
     auto textureType = MyMTLTextureType2D; // image.textureType
     if (!validateFormatAndDecoder(textureType, image.pixelFormat, decoderType)) {
@@ -136,13 +144,13 @@ - (void)preparePreviewOfFileAtURL:(NSURL *)url completionHandler:(void (^)(NSErr
         handler(error);
         return;
     }
-    
+
     bool isPremul = image.isPremul();
     bool isSrgb = isSrgbFormat(image.pixelFormat);
-    
+
     // unpack a level to get the blocks
     uint32_t mipNumber = 0;
-    
+
     uint32_t mipCount = image.mipCount();
     uint32_t w, h, d;
     for (uint32_t i = 0; i < mipCount; ++i) {
@@ -151,25 +159,24 @@ - (void)preparePreviewOfFileAtURL:(NSURL *)url completionHandler:(void (^)(NSErr
             mipNumber++;
         }
     }
-    
+
     // clamp to smallest
     mipNumber = std::min(mipNumber, mipCount - 1);
     image.mipDimensions(mipNumber, w, h, d);
-    
+
     uint32_t chunkNum = 0; // TODO: could embed chunk(s) to gen thumbnail from, cube/array?
     uint32_t numChunks = image.totalChunks();
-    
+
     vector<uint8_t> mipData;
 
     // new decode the blocks in that chunk
     if (isBlockFormat(image.pixelFormat)) {
-        
         uint64_t mipLength = image.mipLevels[mipNumber].length;
-        
-         // then decode any blocks to rgba8u, not dealing with HDR formats yet
+
+        // then decode any blocks to rgba8u, not dealing with HDR formats yet
         if (image.isSupercompressed()) {
             const uint8_t* srcData = image.fileData + image.mipLevels[mipNumber].offset;
-            
+
             mipData.resize(mipLength * numChunks);
             uint8_t* dstData = mipData.data();
             if (!image.unpackLevel(mipNumber, srcData, dstData)) {
@@ -177,7 +184,7 @@ - (void)preparePreviewOfFileAtURL:(NSURL *)url completionHandler:(void (^)(NSErr
                 handler(error);
                 return;
             }
-            
+
             // now extract the chunk for the thumbnail out of that level
             if (numChunks > 1) {
                 macroUnusedVar(chunkNum);
@@ -187,66 +194,63 @@ - (void)preparePreviewOfFileAtURL:(NSURL *)url completionHandler:(void (^)(NSErr
                 mipData.resize(mipLength);
             }
         }
-        else
-        {
+        else {
             // this just truncate to chunk 0 instead of copying chunkNum first
             mipData.resize(mipLength);
-            
+
             const uint8_t* srcData = image.fileData + image.mipLevels[mipNumber].offset;
-            
+
             memcpy(mipData.data(), srcData, mipLength);
         }
-        
+
         KramDecoder decoder;
         KramDecoderParams params;
-        
+
         // TODO: should honor swizzle in the ktx image
         // TODO: probaby need an snorm rgba format to convert the snorm versions, so they're not all red
         // if sdf, will be signed format and that will stay red
-       
-        switch(image.pixelFormat)
-        {
+
+        switch (image.pixelFormat) {
             // To avoid showing single channel content in red, replicate to rgb
             case MyMTLPixelFormatBC4_RUnorm:
             case MyMTLPixelFormatEAC_R11Unorm:
                 params.swizzleText = "rrr1";
                 break;
-                
+
             default:
                 break;
         }
-        
+
         vector<uint8_t> dstMipData;
-        
+
         // only space for one chunk for now
         dstMipData.resize(numChunks * h * w * sizeof(Color));
-        
+
         // want to just decode one chunk of the level that was unpacked abovve
         if (!decoder.decodeBlocks(w, h, mipData.data(), (int32_t)mipData.size(), image.pixelFormat, dstMipData, params)) {
             error = KLOGF(6, "kramv %s failed to decode blocks\n", filename);
             handler(error);
             return;
         }
-        
+
         mipData = dstMipData;
     }
-    else if (isExplicitFormat(image.pixelFormat))
-    {
+    else if (isExplicitFormat(image.pixelFormat)) {
         Image image2D;
         if (!image2D.loadThumbnailFromKTX(image, mipNumber)) {
             error = KLOGF(7, "kramv %s failed to convert image to 4 channels\n", filename);
             handler(error);
             return;
         }
-        
+
         // TODO: could swizzle height (single channel) textures to rrr1
-        
+
         // copy from Color back to uint8_t
         uint32_t mipSize = h * w * sizeof(Color);
         mipData.resize(mipSize);
         memcpy(mipData.data(), image2D.pixels().data(), mipSize);
     }
-    
+
     // https://developer.apple.com/library/archive/documentation/GraphicsImaging/Conceptual/drawingwithquartz2d/dq_images/dq_images.html#//apple_ref/doc/uid/TP30001066-CH212-TPXREF101
 
     uint32_t rowBytes = w * sizeof(Color);
@@ -254,25 +258,25 @@ - (void)preparePreviewOfFileAtURL:(NSURL *)url completionHandler:(void (^)(NSErr
     // use vimage in the Accelerate.framework
     // https://developer.apple.com/library/archive/releasenotes/Performance/RN-vecLib/index.html#//apple_ref/doc/uid/TP40001049
 
-    vImage_Buffer buf = { mipData.data(), h, w, rowBytes };
+    vImage_Buffer buf = {mipData.data(), h, w, rowBytes};
 
     // Declare the pixel format for the vImage_Buffer
     vImage_CGImageFormat format = {
-        .bitsPerComponent   = 8,
-        .bitsPerPixel       = 32,
+        .bitsPerComponent = 8,
+        .bitsPerPixel = 32,
     };
-    
+
     format.bitmapInfo = kCGBitmapByteOrderDefault | (CGBitmapInfo)(isPremul ? kCGImageAlphaPremultipliedLast : kCGImageAlphaLast);
     format.colorSpace = isSrgb ? CGColorSpaceCreateWithName(kCGColorSpaceSRGB) : CGColorSpaceCreateDeviceRGB();
-    
+
     // don't need to allocate, can requse memory from mip
 
     // TODO: might want to convert to PNG, but maybe thumbnail system does that automatically?
     // see how big thumbs.db is after running this
-    
+
     // This doesn't allocate, but in an imageView that must outlast the handle call, does that work?
     bool skipPixelCopy = false;
-    
+
     vImage_Error err = 0;
     CGImageRef cgImage = vImageCreateCGImageFromBuffer(&buf, &format, NULL, NULL, skipPixelCopy ? kvImageNoAllocate : kvImageNoFlags, &err);
     if (err) {
@@ -283,30 +287,29 @@ - (void)preparePreviewOfFileAtURL:(NSURL *)url completionHandler:(void (^)(NSErr
     CGRect rect = CGRectMake(0, 0, w, h);
 
     NSImage* nsImage = [[NSImage alloc] initWithCGImage:cgImage size:rect.size];
-    
+
     NSImageView* nsImageView = _imageView; // (NSImageView*)self.view;
-    
+
     // Copositing is like it's using NSCompositeCopy instead of SourceOver
     // The default is NSCompositeSourceOver. NSRectFill() ignores
     // -[NSGraphicsContext compositingOperation] and continues to use NSCompositeCopy.
     // So may have to use NSFillRect which uses SourceOver
     // https://cocoadev.github.io/NSCompositingOperation/
-    
+
     nsImageView.image = nsImage;
 
     // This seems to cause plugin to fail with NoAllocate set
     // This leaks a CGImageRef, but the CGImage doesn't hold any memory w/NoAllocate.
     if (!skipPixelCopy)
         CGImageRelease(cgImage);
-    
+
     // TODO: could add description with info from texture (format, etc)
     // self.textView.text = ...
-    
+
     // Call the completion handler so Quick Look knows that the preview is fully loaded.
     // Quick Look will display a loading spinner while the completion handler is not called.
-    
+
     handler(nil);
 }
 
 @end
-
diff --git a/kram-profile/Source/KramZipHelper.cpp b/kram-profile/Source/KramZipHelper.cpp
index acb9af00..ddf9b889 100644
--- a/kram-profile/Source/KramZipHelper.cpp
+++ b/kram-profile/Source/KramZipHelper.cpp
@@ -2,10 +2,10 @@
 
 #include <algorithm>
 //#include <iterator> // for copy_if on Win
-#include <vector>
+#include <mutex>
 #include <string>
 #include <unordered_map>
-#include <mutex>
+#include <vector>
 
 #include "miniz.h"
 
@@ -75,19 +75,20 @@ int32_t append_sprintf(string& str, const char* format, ...)
 }
 
 // This is extracted from CBA Analysis.cpp
-extern "C" const char* _Nullable collapseFunctionName(const char* _Nonnull name_) {
+extern "C" const char* _Nullable collapseFunctionName(const char* _Nonnull name_)
+{
     // Adapted from code in Analysis.  Really the only call needed from CBA.
     // serialize to multiple threads
     static mutex sMutex;
     static unordered_map<string, string> sMap;
     lock_guard<mutex> lock(sMutex);
-    
+
     string elt(name_);
     auto it = sMap.find(elt);
     if (it != sMap.end()) {
         return it->second.c_str();
     }
-    
+
     // Parsing op<, op<<, op>, and op>> seems hard.  Just skip'm all
     if (strstr(name_, "operator") != nullptr)
         return nullptr;
@@ -96,9 +97,8 @@ extern "C" const char* _Nullable collapseFunctionName(const char* _Nonnull name_
     retval.reserve(elt.size());
     auto b_range = elt.begin();
     auto e_range = elt.begin();
-    while (b_range != elt.end())
-    {
-       e_range = std::find(b_range, elt.end(), '<');
+    while (b_range != elt.end()) {
+        e_range = std::find(b_range, elt.end(), '<');
         if (e_range == elt.end())
             break;
         ++e_range;
@@ -107,17 +107,13 @@ extern "C" const char* _Nullable collapseFunctionName(const char* _Nonnull name_
         b_range = e_range;
         int open_count = 1;
         // find the matching close angle bracket
-        for (; b_range != elt.end(); ++b_range)
-        {
-            if (*b_range == '<')
-            {
+        for (; b_range != elt.end(); ++b_range) {
+            if (*b_range == '<') {
                 ++open_count;
                 continue;
             }
-            if (*b_range == '>')
-            {
-                if (--open_count == 0)
-                {
+            if (*b_range == '>') {
+                if (--open_count == 0) {
                     break;
                 }
                 continue;
@@ -125,41 +121,40 @@ extern "C" const char* _Nullable collapseFunctionName(const char* _Nonnull name_
         }
         // b_range is now pointing at a close angle, or it is at the end of the string
     }
-    if (b_range > e_range)
-    {
-       // we are in a wacky case where something like op> showed up in a mangled name.
-       // just bail.
-       // TODO: this still isn't correct, but it avoids crashes.
-       return nullptr;
+    if (b_range > e_range) {
+        // we are in a wacky case where something like op> showed up in a mangled name.
+        // just bail.
+        // TODO: this still isn't correct, but it avoids crashes.
+        return nullptr;
     }
     // append the footer
     retval.append(b_range, e_range);
-    
+
     // add it to the map
     sMap[elt] = std::move(retval);
-    
+
     return sMap[elt].c_str();
 }
 
-extern "C" const char* _Nullable demangleSymbolName(const char* _Nonnull symbolName_) {
+extern "C" const char* _Nullable demangleSymbolName(const char* _Nonnull symbolName_)
+{
     // serialize to multiple threads
     static mutex sMutex;
     static unordered_map<string, const char*> sSymbolToDemangleName;
     lock_guard<mutex> lock(sMutex);
-    
+
     string symbolName(symbolName_);
     auto it = sSymbolToDemangleName.find(symbolName);
     if (it != sSymbolToDemangleName.end()) {
         return it->second;
     }
-    
+
     // see CBA if want a generalized demangle for Win/Linux
     size_t size = 0;
     int status = 0;
     char* symbol = abi::__cxa_demangle(symbolName.c_str(), nullptr, &size, &status);
     const char* result = nullptr;
     if (status == 0) {
-        
         sSymbolToDemangleName[symbolName] = symbol;
         result = symbol;
         // not freeing the symbols here
@@ -172,10 +167,10 @@ extern "C" const char* _Nullable demangleSymbolName(const char* _Nonnull symbolN
         // status = -2 on most of the mangled Win clang-cli symbols.  Nice one
         // Microsoft.
         //result = symbolName_;
-        
+
         result = nullptr;
     }
-    
+
     return result;
 }
 
@@ -286,13 +281,13 @@ void ZipHelper::initZipEntryTables()
 
         ZipEntry& zipEntry = _zipEntrys[index];
         zipEntry.fileIndex = stat.m_file_index;
-        zipEntry.filename = filename;  // can alias
+        zipEntry.filename = filename; // can alias
         zipEntry.uncompressedSize = stat.m_uncomp_size;
         zipEntry.compressedSize = stat.m_comp_size;
-        zipEntry.modificationDate = (int32_t)stat.m_time;  // really a time_t
-        #undef crc32
+        zipEntry.modificationDate = (int32_t)stat.m_time; // really a time_t
+#undef crc32
         zipEntry.crc32 = stat.m_crc32;
-        
+
         // TODO: stat.m_time, state.m_crc32
 
         index++;
@@ -356,7 +351,7 @@ bool ZipHelper::extract(const char* filename, uint8_t* bufferData, uint64_t buff
     if (bufferDataSize < entry->uncompressedSize) {
         return false;
     }
-    
+
     if (!extract(*entry, bufferData, bufferDataSize)) {
         return false;
     }
@@ -364,7 +359,6 @@ bool ZipHelper::extract(const char* filename, uint8_t* bufferData, uint64_t buff
     return true;
 }
 
-
 bool ZipHelper::extractPartial(const char* filename, vector<uint8_t>& buffer) const
 {
     if (buffer.empty()) {
@@ -399,9 +393,9 @@ bool ZipHelper::extract(const ZipEntry& entry, void* buffer, uint64_t bufferSize
     // https://dougallj.wordpress.com/2022/08/20/faster-zlib-deflate-decompression-on-the-apple-m1-and-x86/
 
     // https://developer.apple.com/documentation/compression/1481000-compression_decode_buffer?language=objc
-    
+
     // This call is internal, so caller has already tested failure cases.
-    
+
 #if USE_LIBCOMPRESSION
     const uint8_t* data = mz_zip_reader_get_raw_data(zip.get(), entry.fileIndex);
     if (!data) {
@@ -409,20 +403,19 @@ bool ZipHelper::extract(const ZipEntry& entry, void* buffer, uint64_t bufferSize
     }
     // need to extra data and header
     char scratchBuffer[compression_decode_scratch_buffer_size(COMPRESSION_ZLIB)];
-    
+
     uint64_t bytesDecoded = compression_decode_buffer(
         (uint8_t*)buffer, entry.uncompressedSize,
         (const uint8_t*)data, entry.compressedSize,
         scratchBuffer,
         COMPRESSION_ZLIB);
-    
+
     bool success = false;
-    if (bytesDecoded == entry.uncompressedSize)
-    {
+    if (bytesDecoded == entry.uncompressedSize) {
         success = true;
     }
 #else
-    
+
     // this pulls pages from mmap, no allocations
     mz_bool success = mz_zip_reader_extract_to_mem(
         zip.get(), entry.fileIndex, buffer, bufferSize, 0);
@@ -452,7 +445,7 @@ bool ZipHelper::extractRaw(const char* filename, const uint8_t** bufferData, uin
     }
 
     *bufferData = data;
-    
+
     // This isn't correct, need to return comp_size.
     // Caller may need the uncompressed size though to decompress fully into.
     //bufferDataSize = stat.m_uncomp_size;
@@ -461,4 +454,4 @@ bool ZipHelper::extractRaw(const char* filename, const uint8_t** bufferData, uin
     return true;
 }
 
-}  // namespace kram
+} // namespace kram
diff --git a/kram-profile/Source/KramZipHelper.h b/kram-profile/Source/KramZipHelper.h
index c19e08d5..0dd7d13c 100644
--- a/kram-profile/Source/KramZipHelper.h
+++ b/kram-profile/Source/KramZipHelper.h
@@ -8,8 +8,8 @@
 #include <stdint.h>
 
 #include <memory>
-#include <vector>
 #include <string>
+#include <vector>
 
 // from miniz
 // had to change miniz from anonymous struct typedef, or can't fwd declare
@@ -21,7 +21,7 @@ namespace kram {
 using namespace STL_NAMESPACE;
 
 struct ZipEntry {
-    const char* filename;  // max 512, aliased
+    const char* filename; // max 512, aliased
     int32_t fileIndex;
 
     // attributes
@@ -75,8 +75,8 @@ struct ZipHelper {
     std::unique_ptr<mz_zip_archive> zip;
     vector<ZipEntry> _zipEntrys;
 
-    const uint8_t* zipData;  // aliased
+    const uint8_t* zipData; // aliased
 
     vector<char> allFilenames;
 };
-}  // namespace kram
+} // namespace kram
diff --git a/kram-profile/Source/KramZipHelperW.h b/kram-profile/Source/KramZipHelperW.h
index dd260734..72cfe2c2 100644
--- a/kram-profile/Source/KramZipHelperW.h
+++ b/kram-profile/Source/KramZipHelperW.h
@@ -3,7 +3,7 @@
 #import <Foundation/Foundation.h>
 
 typedef struct ZipEntryW {
-    const char* _Nonnull filename;  // max 512, aliased
+    const char* _Nonnull filename; // max 512, aliased
     int32_t fileIndex;
 
     // attributes
@@ -13,25 +13,24 @@ typedef struct ZipEntryW {
     uint32_t crc32;
 } ZipEntryW;
 
-
 // Use this to bridge the C++ over to Swift for now
 // TODO: form a clang module and reference C++ directly
 @interface ZipHelperW : NSObject
-    - (nonnull instancetype)initWithData:(nonnull NSData*)data;
-    
-    // extract the data.  Can alias into the file.
-    - (nullable NSData*)extract:(nonnull const char*)filename;
-    
-    // pass back vector this way for now, should be property
-    - (nonnull const ZipEntryW*)zipEntrys;
+- (nonnull instancetype)initWithData:(nonnull NSData*)data;
+
+// extract the data.  Can alias into the file.
+- (nullable NSData*)extract:(nonnull const char*)filename;
 
-    - (NSInteger)zipEntrysCount;
+// pass back vector this way for now, should be property
+- (nonnull const ZipEntryW*)zipEntrys;
 
-    // This isn't the fileIndex, but uses count above to avoid needing to do unsafe
-    - (ZipEntryW)zipEntry:(NSInteger)index;
+- (NSInteger)zipEntrysCount;
 
-    // retrieve an entry by filename
-    - (ZipEntryW)zipEntryByName:(nonnull const char*)name;
+// This isn't the fileIndex, but uses count above to avoid needing to do unsafe
+- (ZipEntryW)zipEntry:(NSInteger)index;
+
+// retrieve an entry by filename
+- (ZipEntryW)zipEntryByName:(nonnull const char*)name;
 
 @end
 
@@ -41,4 +40,3 @@ const char* _Nullable demangleSymbolName(const char* _Nonnull symbolName_);
 // This is really the only call needed out of CBA
 // Convert templated code to collapsed name so get more correspondence in map.
 const char* _Nullable collapseFunctionName(const char* _Nonnull name_);
-
diff --git a/kram-profile/Source/KramZipHelperW.mm b/kram-profile/Source/KramZipHelperW.mm
index 2ad4da7d..fa232d5e 100644
--- a/kram-profile/Source/KramZipHelperW.mm
+++ b/kram-profile/Source/KramZipHelperW.mm
@@ -1,4 +1,5 @@
 #include "KramZipHelperW.h"
+
 #include "KramZipHelper.h"
 
 using namespace kram;
@@ -7,20 +8,21 @@ @implementation ZipHelperW {
     ZipHelper _helper;
 }
 
-- (nonnull instancetype)initWithData:(nonnull NSData*)data {
+- (nonnull instancetype)initWithData:(nonnull NSData*)data
+{
     _helper.openForRead((const uint8_t*)data.bytes, data.length);
     return self;
 }
 
-- (nullable NSData*)extract:(nonnull const char*)filename {
-   
+- (nullable NSData*)extract:(nonnull const char*)filename
+{
     NSData* data = nil;
-    
+
     auto entry = _helper.zipEntry(filename);
     if (!entry) {
         return nil;
     }
-    
+
     bool isCompressed = entry->uncompressedSize != entry->compressedSize;
     if (isCompressed) {
         // this allocates memory
@@ -30,31 +32,35 @@ - (nullable NSData*)extract:(nonnull const char*)filename {
     else {
         const uint8_t* bytes = nullptr;
         uint64_t bytesLength = 0;
-        
+
         // this aliases the archive
         _helper.extractRaw(filename, &bytes, bytesLength);
         data = [NSData dataWithBytesNoCopy:(void*)bytes length:bytesLength freeWhenDone:NO];
     }
-    
+
     return data;
 }
 
 // Need this for the list data
-- (nonnull const ZipEntryW*)zipEntrys {
+- (nonnull const ZipEntryW*)zipEntrys
+{
     return (const ZipEntryW*)_helper.zipEntrys().data();
 }
-- (NSInteger)zipEntrysCount {
+- (NSInteger)zipEntrysCount
+{
     return _helper.zipEntrys().size();
 }
 
-- (ZipEntryW)zipEntry:(NSInteger)index {
+- (ZipEntryW)zipEntry:(NSInteger)index
+{
     return *(const ZipEntryW*)&_helper.zipEntrys()[index];
 }
 
-- (ZipEntryW)zipEntryByName:(nonnull const char*)name {
+- (ZipEntryW)zipEntryByName:(nonnull const char*)name
+{
     // DONE: fix to return a dummy type, since zips can be missing files
     // from one iteration to the next.
-    static ZipEntryW nilEntry = { "" };
+    static ZipEntryW nilEntry = {""};
     const ZipEntry* entry = _helper.zipEntry(name);
     if (entry) {
         return *(const ZipEntryW*)entry;
@@ -64,6 +70,4 @@ - (ZipEntryW)zipEntryByName:(nonnull const char*)name {
     }
 }
 
-
 @end
-
diff --git a/kram-profile/Source/kram-profile-Bridging-Header.h b/kram-profile/Source/kram-profile-Bridging-Header.h
index 81a9554e..b99d6e26 100644
--- a/kram-profile/Source/kram-profile-Bridging-Header.h
+++ b/kram-profile/Source/kram-profile-Bridging-Header.h
@@ -2,5 +2,5 @@
 //  Use this file to import your target's public headers that you would like to expose to Swift.
 //
 
-#include "KramZipHelperW.h"
 #include "CBA.h"
+#include "KramZipHelperW.h"
diff --git a/kram-profile/Source/track_event_parser.cpp b/kram-profile/Source/track_event_parser.cpp
index d1291c86..1da63ca1 100644
--- a/kram-profile/Source/track_event_parser.cpp
+++ b/kram-profile/Source/track_event_parser.cpp
@@ -25,22 +25,6 @@
 #include "perfetto/ext/base/base64.h"
 #include "perfetto/ext/base/string_writer.h"
 #include "perfetto/trace_processor/status.h"
-#include "src/trace_processor/importers/common/args_tracker.h"
-#include "src/trace_processor/importers/common/args_translation_table.h"
-#include "src/trace_processor/importers/common/event_tracker.h"
-#include "src/trace_processor/importers/common/flow_tracker.h"
-#include "src/trace_processor/importers/common/process_tracker.h"
-#include "src/trace_processor/importers/common/track_tracker.h"
-#include "src/trace_processor/importers/json/json_utils.h"
-#include "src/trace_processor/importers/proto/packet_analyzer.h"
-#include "src/trace_processor/importers/proto/packet_sequence_state.h"
-#include "src/trace_processor/importers/proto/profile_packet_utils.h"
-#include "src/trace_processor/importers/proto/stack_profile_sequence_state.h"
-#include "src/trace_processor/importers/proto/track_event_tracker.h"
-#include "src/trace_processor/util/debug_annotation_parser.h"
-#include "src/trace_processor/util/proto_to_args_parser.h"
-#include "src/trace_processor/util/status_macros.h"
-
 #include "protos/perfetto/common/android_log_constants.pbzero.h"
 #include "protos/perfetto/trace/extension_descriptor.pbzero.h"
 #include "protos/perfetto/trace/interned_data/interned_data.pbzero.h"
@@ -59,6 +43,21 @@
 #include "protos/perfetto/trace/track_event/thread_descriptor.pbzero.h"
 #include "protos/perfetto/trace/track_event/track_descriptor.pbzero.h"
 #include "protos/perfetto/trace/track_event/track_event.pbzero.h"
+#include "src/trace_processor/importers/common/args_tracker.h"
+#include "src/trace_processor/importers/common/args_translation_table.h"
+#include "src/trace_processor/importers/common/event_tracker.h"
+#include "src/trace_processor/importers/common/flow_tracker.h"
+#include "src/trace_processor/importers/common/process_tracker.h"
+#include "src/trace_processor/importers/common/track_tracker.h"
+#include "src/trace_processor/importers/json/json_utils.h"
+#include "src/trace_processor/importers/proto/packet_analyzer.h"
+#include "src/trace_processor/importers/proto/packet_sequence_state.h"
+#include "src/trace_processor/importers/proto/profile_packet_utils.h"
+#include "src/trace_processor/importers/proto/stack_profile_sequence_state.h"
+#include "src/trace_processor/importers/proto/track_event_tracker.h"
+#include "src/trace_processor/util/debug_annotation_parser.h"
+#include "src/trace_processor/util/proto_to_args_parser.h"
+#include "src/trace_processor/util/status_macros.h"
 
 namespace perfetto {
 namespace trace_processor {
diff --git a/kram-thumb-win/Dll.cpp b/kram-thumb-win/Dll.cpp
index 7d13f5ac..267e3c5e 100644
--- a/kram-thumb-win/Dll.cpp
+++ b/kram-thumb-win/Dll.cpp
@@ -1,19 +1,20 @@
 // based on QOI Thumbnail Provider for Windows Explorer
 // Written by iOrange in 2021
-// 
+//
 // Based on Microsoft's example
 // https://github.com/microsoft/windows-classic-samples/tree/main/Samples/Win7Samples/winui/shell/appshellintegration/RecipeThumbnailProvider
-// 
+//
 // Also more info here:
 // https://docs.microsoft.com/en-us/previous-versions/windows/desktop/legacy/cc144118(v=vs.85)
 
 #include <objbase.h>
+#include <shlobj.h> // For SHChangeNotify
 #include <shlwapi.h>
 #include <thumbcache.h> // For IThumbnailProvider.
-#include <shlobj.h>     // For SHChangeNotify
-#include <new>
+
 #include <atomic>
-#include <vector>       // For std::size
+#include <new>
+#include <vector> // For std::size
 
 // from KramThumbProvider.cpp
 extern HRESULT KramThumbProvider_CreateInstance(REFIID riid, void** ppv);
@@ -27,49 +28,53 @@ extern HRESULT KramThumbProvider_CreateInstance(REFIID riid, void** ppv);
 #define SZ_CLSID_KramTHUMBHANDLER L"{a9a47ef5-c238-42a9-a4e6-a85558811dac}"
 constexpr CLSID kCLSID_KramThumbHandler = {0xa9a47ef5, 0xc238, 0x42a9, {0xa4, 0xe6, 0xa8, 0x55, 0x58, 0x81, 0x1d, 0xac}};
 
-
-typedef HRESULT(*PFNCREATEINSTANCE)(REFIID riid, void** ppvObject);
+typedef HRESULT (*PFNCREATEINSTANCE)(REFIID riid, void** ppvObject);
 struct CLASS_OBJECT_INIT {
-    const CLSID*        pClsid;
-    PFNCREATEINSTANCE   pfnCreate;
+    const CLSID* pClsid;
+    PFNCREATEINSTANCE pfnCreate;
 };
 
 // add classes supported by this module here
 constexpr CLASS_OBJECT_INIT kClassObjectInit[] = {
-    { &kCLSID_KramThumbHandler, KramThumbProvider_CreateInstance }
-};
+    {&kCLSID_KramThumbHandler, KramThumbProvider_CreateInstance}};
 
-
-std::atomic_long    gModuleReferences(0);
-HINSTANCE           gModuleInstance = nullptr;
+std::atomic_long gModuleReferences(0);
+HINSTANCE gModuleInstance = nullptr;
 
 // Standard DLL functions
-STDAPI_(BOOL) DllMain(HINSTANCE hInstance, DWORD dwReason, void*) {
+STDAPI_(BOOL)
+DllMain(HINSTANCE hInstance, DWORD dwReason, void*)
+{
     if (DLL_PROCESS_ATTACH == dwReason) {
         gModuleInstance = hInstance;
         ::DisableThreadLibraryCalls(hInstance);
-    } else if (DLL_PROCESS_DETACH == dwReason) {
+    }
+    else if (DLL_PROCESS_DETACH == dwReason) {
         gModuleInstance = nullptr;
     }
     return TRUE;
 }
 
-STDAPI DllCanUnloadNow() {
+STDAPI DllCanUnloadNow()
+{
     // Only allow the DLL to be unloaded after all outstanding references have been released
     return (gModuleReferences > 0) ? S_FALSE : S_OK;
 }
 
-void DllAddRef() {
+void DllAddRef()
+{
     ++gModuleReferences;
 }
 
-void DllRelease() {
+void DllRelease()
+{
     --gModuleReferences;
 }
 
 class CClassFactory : public IClassFactory {
 public:
-    static HRESULT CreateInstance(REFCLSID clsid, const CLASS_OBJECT_INIT* pClassObjectInits, size_t cClassObjectInits, REFIID riid, void** ppv) {
+    static HRESULT CreateInstance(REFCLSID clsid, const CLASS_OBJECT_INIT* pClassObjectInits, size_t cClassObjectInits, REFIID riid, void** ppv)
+    {
         *ppv = NULL;
         HRESULT hr = CLASS_E_CLASSNOTAVAILABLE;
         for (size_t i = 0; i < cClassObjectInits; ++i) {
@@ -87,29 +92,34 @@ class CClassFactory : public IClassFactory {
     }
 
     CClassFactory(PFNCREATEINSTANCE pfnCreate)
-        : mReferences(1)
-        , mCreateFunc(pfnCreate) {
+        : mReferences(1), mCreateFunc(pfnCreate)
+    {
         DllAddRef();
     }
 
-    virtual ~CClassFactory() {
+    virtual ~CClassFactory()
+    {
         DllRelease();
     }
 
     // IUnknown
-    IFACEMETHODIMP QueryInterface(REFIID riid, void** ppv) {
+    IFACEMETHODIMP QueryInterface(REFIID riid, void** ppv)
+    {
         static const QITAB qit[] = {
             QITABENT(CClassFactory, IClassFactory),
-            { 0 }
-        };
+            {0}};
         return QISearch(this, qit, riid, ppv);
     }
 
-    IFACEMETHODIMP_(ULONG) AddRef() {
+    IFACEMETHODIMP_(ULONG)
+    AddRef()
+    {
         return ++mReferences;
     }
 
-    IFACEMETHODIMP_(ULONG) Release() {
+    IFACEMETHODIMP_(ULONG)
+    Release()
+    {
         const long refs = --mReferences;
         if (!refs) {
             delete this;
@@ -118,38 +128,43 @@ class CClassFactory : public IClassFactory {
     }
 
     // IClassFactory
-    IFACEMETHODIMP CreateInstance(IUnknown* punkOuter, REFIID riid, void** ppv) {
+    IFACEMETHODIMP CreateInstance(IUnknown* punkOuter, REFIID riid, void** ppv)
+    {
         return punkOuter ? CLASS_E_NOAGGREGATION : mCreateFunc(riid, ppv);
     }
 
-    IFACEMETHODIMP LockServer(BOOL fLock) {
+    IFACEMETHODIMP LockServer(BOOL fLock)
+    {
         if (fLock) {
             DllAddRef();
-        } else {
+        }
+        else {
             DllRelease();
         }
         return S_OK;
     }
 
 private:
-    std::atomic_long    mReferences;
-    PFNCREATEINSTANCE   mCreateFunc;
+    std::atomic_long mReferences;
+    PFNCREATEINSTANCE mCreateFunc;
 };
 
-STDAPI DllGetClassObject(REFCLSID clsid, REFIID riid, void** ppv) {
+STDAPI DllGetClassObject(REFCLSID clsid, REFIID riid, void** ppv)
+{
     return CClassFactory::CreateInstance(clsid, kClassObjectInit, std::size(kClassObjectInit), riid, ppv);
 }
 
 // A struct to hold the information required for a registry entry
 struct REGISTRY_ENTRY {
-    HKEY   hkeyRoot;
+    HKEY hkeyRoot;
     PCWSTR pszKeyName;
     PCWSTR pszValueName;
     PCWSTR pszData;
 };
 
 // Creates a registry key (if needed) and sets the default value of the key
-HRESULT CreateRegKeyAndSetValue(const REGISTRY_ENTRY* pRegistryEntry) {
+HRESULT CreateRegKeyAndSetValue(const REGISTRY_ENTRY* pRegistryEntry)
+{
     HKEY hKey;
     HRESULT hr = HRESULT_FROM_WIN32(RegCreateKeyExW(pRegistryEntry->hkeyRoot,
                                                     pRegistryEntry->pszKeyName,
@@ -166,28 +181,30 @@ HRESULT CreateRegKeyAndSetValue(const REGISTRY_ENTRY* pRegistryEntry) {
 }
 
 // Registers this COM server
-STDAPI DllRegisterServer() {
+STDAPI DllRegisterServer()
+{
     HRESULT hr;
-    WCHAR szModuleName[MAX_PATH] = { 0 };
+    WCHAR szModuleName[MAX_PATH] = {0};
 
     if (!GetModuleFileNameW(gModuleInstance, szModuleName, ARRAYSIZE(szModuleName))) {
         hr = HRESULT_FROM_WIN32(GetLastError());
-    } else {
+    }
+    else {
         // List of registry entries we want to create
         const REGISTRY_ENTRY registryEntries[] = {
             // RootKey          KeyName                                                                      ValueName          Data
-            {HKEY_CURRENT_USER, L"Software\\Classes\\CLSID\\" SZ_CLSID_KramTHUMBHANDLER,                      nullptr,           SZ_KramTHUMBHANDLER},
-            {HKEY_CURRENT_USER, L"Software\\Classes\\CLSID\\" SZ_CLSID_KramTHUMBHANDLER L"\\InProcServer32",  nullptr,           szModuleName},
-            {HKEY_CURRENT_USER, L"Software\\Classes\\CLSID\\" SZ_CLSID_KramTHUMBHANDLER L"\\InProcServer32",  L"ThreadingModel", L"Apartment"},
+            {HKEY_CURRENT_USER, L"Software\\Classes\\CLSID\\" SZ_CLSID_KramTHUMBHANDLER, nullptr, SZ_KramTHUMBHANDLER},
+            {HKEY_CURRENT_USER, L"Software\\Classes\\CLSID\\" SZ_CLSID_KramTHUMBHANDLER L"\\InProcServer32", nullptr, szModuleName},
+            {HKEY_CURRENT_USER, L"Software\\Classes\\CLSID\\" SZ_CLSID_KramTHUMBHANDLER L"\\InProcServer32", L"ThreadingModel", L"Apartment"},
 
             // libkram can decode any of these and create a thumbnail
             // The Vista GUID for the thumbnail handler Shell extension is E357FCCD-A995-4576-B01F-234630154E96.
-            {HKEY_CURRENT_USER, L"Software\\Classes\\.ktx",                                                  L"PerceivedType",  L"image"},
-            {HKEY_CURRENT_USER, L"Software\\Classes\\.ktx\\ShellEx\\{e357fccd-a995-4576-b01f-234630154e96}", nullptr,           SZ_CLSID_KramTHUMBHANDLER},
-            {HKEY_CURRENT_USER, L"Software\\Classes\\.ktx2",                                                  L"PerceivedType",  L"image"},
-            {HKEY_CURRENT_USER, L"Software\\Classes\\.ktx2\\ShellEx\\{e357fccd-a995-4576-b01f-234630154e96}", nullptr,           SZ_CLSID_KramTHUMBHANDLER},
-            {HKEY_CURRENT_USER, L"Software\\Classes\\.dds",                                                  L"PerceivedType",  L"image"},
-            {HKEY_CURRENT_USER, L"Software\\Classes\\.dds\\ShellEx\\{e357fccd-a995-4576-b01f-234630154e96}", nullptr,           SZ_CLSID_KramTHUMBHANDLER},
+            {HKEY_CURRENT_USER, L"Software\\Classes\\.ktx", L"PerceivedType", L"image"},
+            {HKEY_CURRENT_USER, L"Software\\Classes\\.ktx\\ShellEx\\{e357fccd-a995-4576-b01f-234630154e96}", nullptr, SZ_CLSID_KramTHUMBHANDLER},
+            {HKEY_CURRENT_USER, L"Software\\Classes\\.ktx2", L"PerceivedType", L"image"},
+            {HKEY_CURRENT_USER, L"Software\\Classes\\.ktx2\\ShellEx\\{e357fccd-a995-4576-b01f-234630154e96}", nullptr, SZ_CLSID_KramTHUMBHANDLER},
+            {HKEY_CURRENT_USER, L"Software\\Classes\\.dds", L"PerceivedType", L"image"},
+            {HKEY_CURRENT_USER, L"Software\\Classes\\.dds\\ShellEx\\{e357fccd-a995-4576-b01f-234630154e96}", nullptr, SZ_CLSID_KramTHUMBHANDLER},
             //{HKEY_CURRENT_USER, L"Software\\Classes\\.png",                                                  L"PerceivedType", L"image"},
             //{HKEY_CURRENT_USER, L"Software\\Classes\\.png\\ShellEx\\{e357fccd-a995-4576-b01f-234630154e96}", nullptr, SZ_CLSID_KramTHUMBHANDLER},
         };
@@ -208,7 +225,8 @@ STDAPI DllRegisterServer() {
 }
 
 // Unregisters this COM server
-STDAPI DllUnregisterServer() {
+STDAPI DllUnregisterServer()
+{
     HRESULT hr = S_OK;
 
     const PCWSTR regKeys[] = {
@@ -216,7 +234,7 @@ STDAPI DllUnregisterServer() {
         L"Software\\Classes\\.ktx",
         L"Software\\Classes\\.ktx2",
         L"Software\\Classes\\.dds",
-       // L"Software\\Classes\\.png", // only need this if Win png bg is bad
+        // L"Software\\Classes\\.png", // only need this if Win png bg is bad
     };
 
     // Delete the registry entries
diff --git a/kram-thumb-win/KramThumbProvider.cpp b/kram-thumb-win/KramThumbProvider.cpp
index a32a7563..e49ccfd5 100644
--- a/kram-thumb-win/KramThumbProvider.cpp
+++ b/kram-thumb-win/KramThumbProvider.cpp
@@ -1,12 +1,13 @@
-#include "KramLib.h"
-
 #include <shlwapi.h>
 #include <thumbcache.h> // For IThumbnailProvider.
 #include <wrl/client.h> // For ComPtr
-#include <new>
+
 #include <atomic>
+#include <new>
 #include <vector>
 
+#include "KramLib.h"
+
 using namespace kram;
 using namespace std; // or STL_NAMESPACE
 
@@ -48,32 +49,37 @@ struct ImageToPass {
     KTXImageData imageData;
 };
 
-class KramThumbProvider final : public IInitializeWithStream, public IThumbnailProvider 
-{
+class KramThumbProvider final : public IInitializeWithStream, public IThumbnailProvider {
 public:
     KramThumbProvider()
-        : mReferences(1)
-        , mStream{} {
+        : mReferences(1), mStream{}
+    {
     }
 
-    virtual ~KramThumbProvider() {
+    virtual ~KramThumbProvider()
+    {
     }
 
     // IUnknown
-    IFACEMETHODIMP QueryInterface(REFIID riid, void** ppv) {
+    IFACEMETHODIMP QueryInterface(REFIID riid, void** ppv)
+    {
         static const QITAB qit[] = {
             QITABENT(KramThumbProvider, IInitializeWithStream),
             QITABENT(KramThumbProvider, IThumbnailProvider),
-            { 0 },
+            {0},
         };
         return QISearch(this, qit, riid, ppv);
     }
 
-    IFACEMETHODIMP_(ULONG) AddRef() {
+    IFACEMETHODIMP_(ULONG)
+    AddRef()
+    {
         return ++mReferences;
     }
 
-    IFACEMETHODIMP_(ULONG) Release() {
+    IFACEMETHODIMP_(ULONG)
+    Release()
+    {
         long refs = --mReferences;
         if (!refs) {
             delete this;
@@ -82,8 +88,9 @@ class KramThumbProvider final : public IInitializeWithStream, public IThumbnailP
     }
 
     // IInitializeWithStream
-    IFACEMETHODIMP Initialize(IStream* pStream, DWORD /*grfMode*/) {
-        HRESULT hr = E_UNEXPECTED;  // can only be inited once
+    IFACEMETHODIMP Initialize(IStream* pStream, DWORD /*grfMode*/)
+    {
+        HRESULT hr = E_UNEXPECTED; // can only be inited once
         if (!mStream) {
             // take a reference to the stream if we have not been inited yet
             hr = pStream->QueryInterface(mStream.ReleaseAndGetAddressOf());
@@ -92,8 +99,8 @@ class KramThumbProvider final : public IInitializeWithStream, public IThumbnailP
     }
 
     // IThumbnailProvider
-    IFACEMETHODIMP GetThumbnail(UINT cx, HBITMAP* phbmp, WTS_ALPHATYPE* pdwAlpha) {
-       
+    IFACEMETHODIMP GetThumbnail(UINT cx, HBITMAP* phbmp, WTS_ALPHATYPE* pdwAlpha)
+    {
         // read from stream and create a thumbnail
         if (!ImageToHBITMAP(cx, phbmp)) {
             return E_OUTOFMEMORY;
@@ -101,7 +108,7 @@ class KramThumbProvider final : public IInitializeWithStream, public IThumbnailP
 
         // always 4 channels
         *pdwAlpha = WTSAT_ARGB;
-         
+
         return S_OK;
     }
 
@@ -122,11 +129,10 @@ class KramThumbProvider final : public IInitializeWithStream, public IThumbnailP
         vector<uint8_t> streamData;
         streamData.resize(streamSize);
         ULONG bytesRead = 0;
-        HRESULT hr = mStream->Read(streamData.data(), streamSize, &bytesRead);  // can only read ULONG
+        HRESULT hr = mStream->Read(streamData.data(), streamSize, &bytesRead); // can only read ULONG
         if (FAILED(hr) || streamSize != bytesRead)
             return false;
 
-
         // https://learn.microsoft.com/en-us/windows/win32/api/thumbcache/nf-thumbcache-ithumbnailprovider-getthumbnail
 
         std::shared_ptr<ImageToPass> imageToPass = std::make_shared<ImageToPass>();
@@ -143,7 +149,7 @@ class KramThumbProvider final : public IInitializeWithStream, public IThumbnailP
             }
 
             // This will set decoder
-            auto textureType = MyMTLTextureType2D;  // image.textureType
+            auto textureType = MyMTLTextureType2D; // image.textureType
             if (!validateFormatAndDecoder(textureType, image.pixelFormat, decoderType)) {
                 KLOGF(3, "format decode only supports ktx and ktx2 output");
                 return false;
@@ -162,7 +168,7 @@ class KramThumbProvider final : public IInitializeWithStream, public IThumbnailP
             float width;
             float height;
         };
-        NSSize contextSize = { (float)maxSize, (float)maxSize };  
+        NSSize contextSize = {(float)maxSize, (float)maxSize};
 
         // compute w/h from aspect ratio of image
         float requestWidth, requestHeight;
@@ -206,7 +212,7 @@ class KramThumbProvider final : public IInitializeWithStream, public IThumbnailP
 
         //-----------------
 
-        uint32_t chunkNum = 0;  // TODO: could embed chunk(s) to gen thumbnail from, cube/array?
+        uint32_t chunkNum = 0; // TODO: could embed chunk(s) to gen thumbnail from, cube/array?
         uint32_t numChunks = image.totalChunks();
 
         vector<uint8_t> mipData;
@@ -294,19 +300,17 @@ class KramThumbProvider final : public IInitializeWithStream, public IThumbnailP
             mipData.resize(mipSize);
             memcpy(mipData.data(), image2D.pixels().data(), mipSize);
         }
-        
 
-    
         //---------------------
-        
+
         // create a bitmap, and allocate memory for the pixels
         BITMAPINFO bmi = {};
         bmi.bmiHeader.biSize = sizeof(bmi.bmiHeader);
         bmi.bmiHeader.biWidth = static_cast<LONG>(w);
-        bmi.bmiHeader.biHeight = -static_cast<LONG>(h);  // -h to be top-down
+        bmi.bmiHeader.biHeight = -static_cast<LONG>(h); // -h to be top-down
         bmi.bmiHeader.biPlanes = 1;
         bmi.bmiHeader.biBitCount = 32;
-        bmi.bmiHeader.biCompression = BI_RGB;  // TODO: use BI_PNG to shrink thumbnails
+        bmi.bmiHeader.biCompression = BI_RGB; // TODO: use BI_PNG to shrink thumbnails
 
         Color* dstPixels = nullptr;
         HBITMAP hbmp = CreateDIBSection(nullptr, &bmi, DIB_RGB_COLORS, reinterpret_cast<void**>(&dstPixels), nullptr, 0);
@@ -329,7 +333,7 @@ class KramThumbProvider final : public IInitializeWithStream, public IThumbnailP
 
             // setting to 1 for premul is equivalent of blend to opaque black
             dstPixels[i].a = 255;
-             
+
             if (!isPremul) {
                 uint32_t alpha = srcPixels[i].a;
                 if (alpha < 255) {
@@ -345,11 +349,12 @@ class KramThumbProvider final : public IInitializeWithStream, public IThumbnailP
     }
 
 private:
-    std::atomic_long    mReferences;
-    ComPtr<IStream>     mStream;     // provided during initialization.
+    std::atomic_long mReferences;
+    ComPtr<IStream> mStream; // provided during initialization.
 };
 
-HRESULT KramThumbProvider_CreateInstance(REFIID riid, void** ppv) {
+HRESULT KramThumbProvider_CreateInstance(REFIID riid, void** ppv)
+{
     KramThumbProvider* provider = new (std::nothrow) KramThumbProvider();
     HRESULT hr = provider ? S_OK : E_OUTOFMEMORY;
     if (SUCCEEDED(hr)) {
diff --git a/kram-thumb-win/resource.h b/kram-thumb-win/resource.h
index 4494a87d..1fb651e3 100644
--- a/kram-thumb-win/resource.h
+++ b/kram-thumb-win/resource.h
@@ -2,17 +2,17 @@
 // Microsoft Visual C++ generated include file.
 // Used by Dll.rc
 //
-#define VER_DEBUG                       0
-#define VS_VERSION_INFO                 1
-#define IDC_STATIC                      -1
+#define VER_DEBUG 0
+#define VS_VERSION_INFO 1
+#define IDC_STATIC -1
 
 // Next default values for new objects
-// 
+//
 #ifdef APSTUDIO_INVOKED
 #ifndef APSTUDIO_READONLY_SYMBOLS
-#define _APS_NEXT_RESOURCE_VALUE        101
-#define _APS_NEXT_COMMAND_VALUE         40001
-#define _APS_NEXT_CONTROL_VALUE         1000
-#define _APS_NEXT_SYMED_VALUE           101
+#define _APS_NEXT_RESOURCE_VALUE 101
+#define _APS_NEXT_COMMAND_VALUE 40001
+#define _APS_NEXT_CONTROL_VALUE 1000
+#define _APS_NEXT_SYMED_VALUE 101
 #endif
 #endif
diff --git a/kram-thumb/KramThumbnailProvider.mm b/kram-thumb/KramThumbnailProvider.mm
index aa00187c..49138cf3 100644
--- a/kram-thumb/KramThumbnailProvider.mm
+++ b/kram-thumb/KramThumbnailProvider.mm
@@ -3,75 +3,76 @@
 // in all copies or substantial portions of the Software.
 
 #import "KramThumbnailProvider.h"
-#include "KramLib.h"
 
+#import <Accelerate/Accelerate.h> // for vImage
 #import <CoreGraphics/CoreGraphics.h>
 #import <Foundation/Foundation.h>
-#import <Accelerate/Accelerate.h> // for vImage
+
+#include "KramLib.h"
 
 using namespace kram;
 using namespace STL_NAMESPACE;
 
 @implementation KramThumbnailProvider
 
-inline NSError* KLOGF(uint32_t code, const char* format, ...) {
+inline NSError* KLOGF(uint32_t code, const char* format, ...)
+{
     string str;
-    
+
     va_list args;
     va_start(args, format);
     /* int32_t len = */ append_vsprintf(str, format, args);
     va_end(args);
-    
+
     // log here, so it can see it in Console.  But this never appears.
     // How are you supposed to debug failures?  Resorted to passing a unique code into this call.
     // It wasn't originally supposed to generate an NSError
     //NSLog(@"%s", str.c_str());
-    
+
     // Console prints this as <private>, so what's the point of producing a localizedString ?
     // This doesn't seem to work to Console app, but maybe if logs are to terminal
     // sudo log config --mode "level:debug" --subsystem com.hialec.kramv
-    
+
     NSString* errorText = [NSString stringWithUTF8String:str.c_str()];
-    return [NSError errorWithDomain:@"com.hialec.kramv" code:code userInfo:@{NSLocalizedDescriptionKey:errorText}];
+    return [NSError errorWithDomain:@"com.hialec.kramv" code:code userInfo:@{NSLocalizedDescriptionKey : errorText}];
 }
 
-struct ImageToPass
-{
+struct ImageToPass {
     KTXImage image;
     KTXImageData imageData;
 };
 
-- (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest *)request completionHandler:(void (^)(QLThumbnailReply * _Nullable, NSError * _Nullable))handler {
+- (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest*)request completionHandler:(void (^)(QLThumbnailReply* _Nullable, NSError* _Nullable))handler
+{
+    // Draw the thumbnail into a context passed to your block, set up with Core Graphics's coordinate system.
 
-    //  Draw the thumbnail into a context passed to your block, set up with Core Graphics's coordinate system.
-    
     const char* filename = [request.fileURL fileSystemRepresentation];
 
     // DONE: could return NSError to caller if non-null
     NSError* error = nil;
     string errorText;
-    
+
     // TODO: use first x-many bytes also to validate, open will do that
     if (!isSupportedFilename(filename)) {
         error = KLOGF(1, "kramv %s only supports ktx,ktx2,dds,png files\n", filename);
         handler(nil, error);
         return;
     }
-       
+
     std::shared_ptr<ImageToPass> imageToPass = std::make_shared<ImageToPass>();
     TexEncoder decoderType = kTexEncoderUnknown;
     uint32_t imageWidth, imageHeight;
-    
+
     {
         KTXImage& image = imageToPass->image;
         KTXImageData& imageData = imageToPass->imageData;
-        
+
         if (!imageData.open(filename, image)) {
             error = KLOGF(2, "kramv %s could not open file\n", filename);
             handler(nil, error);
             return;
         }
-        
+
         // This will set decoder
         auto textureType = MyMTLTextureType2D; // image.textureType
         if (!validateFormatAndDecoder(textureType, image.pixelFormat, decoderType)) {
@@ -79,210 +80,206 @@ - (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest *)request complet
             handler(nil, error);
             return;
         }
-        
+
         imageWidth = STL_NAMESPACE::max(1U, image.width);
         imageHeight = STL_NAMESPACE::max(1U, image.height);
     }
 
     // This is retina factor
     float requestScale = request.scale;
-    
+
     // One of the sides must match maximumSize, but can have
     // different aspect ratios below that on a given sides.
     NSSize contextSize = request.maximumSize;
-   
+
     // compute w/h from aspect ratio of image
     float requestWidth, requestHeight;
-    
+
     float imageAspect = imageWidth / (float)imageHeight;
-    if (imageAspect >= 1.0f)
-    {
+    if (imageAspect >= 1.0f) {
         requestWidth = contextSize.width;
         requestHeight = std::clamp((contextSize.width / imageAspect), 1.0, contextSize.height);
     }
-    else
-    {
+    else {
         requestWidth = std::clamp((contextSize.height * imageAspect), 1.0, contextSize.width);
         requestHeight = contextSize.height;
     }
-    
+
     // will be further scaled by requestScale
     contextSize = CGSizeMake(requestWidth, requestHeight);
-    
-    handler([QLThumbnailReply replyWithContextSize:contextSize drawingBlock:^BOOL(CGContextRef  _Nonnull context)
-    {
-        KTXImage& image = imageToPass->image;
-        
-        bool isPremul = image.isPremul();
-        bool isSrgb = isSrgbFormat(image.pixelFormat);
-        
-        //-----------------
-        
-        // unpack a level to get the blocks
-        uint32_t mipNumber = 0;
-        uint32_t mipCount = image.mipCount();
-        
-        uint32_t w, h, d;
-        for (uint32_t i = 0; i < mipCount; ++i) {
-            image.mipDimensions(i, w, h, d);
-            if (w > request.maximumSize.width || h > request.maximumSize.height) {
-                mipNumber++;
-            }
-        }
-        
-        // clamp to smallest
-        mipNumber = std::min(mipNumber, mipCount - 1);
-        image.mipDimensions(mipNumber, w, h, d);
-        
-        //-----------------
-        
-        uint32_t chunkNum = 0; // TODO: could embed chunk(s) to gen thumbnail from, cube/array?
-        uint32_t numChunks = image.totalChunks();
-        
-        vector<uint8_t> mipData;
-        
-        // now decode the blocks in that chunk to Color
-        if (isBlockFormat(image.pixelFormat)) {
-            
-            // then decode any blocks to rgba8u, not dealing with HDR formats yet
-            uint64_t mipLength = image.mipLevels[mipNumber].length;
-
-            if (image.isSupercompressed()) {
-                const uint8_t* srcData = image.fileData + image.mipLevels[mipNumber].offset;
-
-                mipData.resize(mipLength * numChunks);
-                uint8_t* dstData = mipData.data();
-                if (!image.unpackLevel(mipNumber, srcData, dstData)) {
-                   //KLOGF("kramv %s failed to unpack mip\n", filename);
-                   return NO;
-                }
-
-                // now extract the chunk for the thumbnail out of that level
-                if (numChunks > 1) {
-                   macroUnusedVar(chunkNum);
-                   assert(chunkNum == 0);
-
-                   // this just truncate to chunk 0 instead of copying chunkNum first
-                   mipData.resize(mipLength);
-                }
-            }
-            else
-            {
-                // this just truncate to chunk 0 instead of copying chunkNum first
-                mipData.resize(mipLength);
-
-                const uint8_t* srcData = image.fileData + image.mipLevels[mipNumber].offset;
-
-                memcpy(mipData.data(), srcData, mipLength);
-            }
-            
-            KramDecoder decoder;
-            KramDecoderParams params;
-            params.decoder = decoderType;
-            
-            // TODO: should honor swizzle in the ktx image
-            // TODO: probaby need an snorm rgba format to convert the snorm versions, so they're not all red
-            // if sdf, will be signed format and that will stay red
-            
-            switch(image.pixelFormat)
-            {
-                // To avoid showing single channel content in red, replicate to rgb
-                case MyMTLPixelFormatBC4_RUnorm:
-                case MyMTLPixelFormatEAC_R11Unorm:
-                    params.swizzleText = "rrr1";
-                    break;
-                    
-                default:
-                    break;
-            }
-            
-            vector<uint8_t> dstMipData;
-            
-            // only space for one chunk for now
-            dstMipData.resize(h * w * sizeof(Color));
-            
-            // want to just decode one chunk of the level that was unpacked abovve
-            if (!decoder.decodeBlocks(w, h, mipData.data(), (int32_t)mipData.size(), image.pixelFormat, dstMipData, params)) {
-                // Can't return NSError
-                //error = KLOGF("kramv %s failed to decode blocks\n", filename);
-                return NO;
-            }
-            
-            // copy over original encoded data
-            mipData = dstMipData;
-        }
-        else if (isExplicitFormat(image.pixelFormat)) {
-            // explicit formats like r/rg/rgb and 16f/32F need to be converted to rgba8 here
-            // this should currently clamp, but could do range tonemap, see Image::convertToFourChannel()
-            // but this needs to be slightly different.  This will decompress mip again
-            
-            Image image2D;
-            if (!image2D.loadThumbnailFromKTX(image, mipNumber)) {
-                //KLOGF("kramv %s failed to convert image to 4 channels\n", filename);
-                return NO;
-            }
-            
-            // copy from Color back to uint8_t
-            uint32_t mipSize = h * w * sizeof(Color);
-            mipData.resize(mipSize);
-            memcpy(mipData.data(), image2D.pixels().data(), mipSize);
-        }
-        
-        // https://developer.apple.com/library/archive/documentation/GraphicsImaging/Conceptual/drawingwithquartz2d/dq_images/dq_images.html#//apple_ref/doc/uid/TP30001066-CH212-TPXREF101
-
-        uint32_t rowBytes = w * sizeof(Color);
-
-        // use vimage in the Accelerate.framework
-        // https://developer.apple.com/library/archive/releasenotes/Performance/RN-vecLib/index.html#//apple_ref/doc/uid/TP40001049
-
-        vImage_Buffer buf = { mipData.data(), h, w, rowBytes };
-
-        // Declare the pixel format for the vImage_Buffer
-        vImage_CGImageFormat format = {
-            .bitsPerComponent   = 8,
-            .bitsPerPixel       = 32,
-        };
-        
-        format.bitmapInfo = kCGBitmapByteOrderDefault | (CGBitmapInfo)(isPremul ? kCGImageAlphaPremultipliedLast : kCGImageAlphaLast);
-        format.colorSpace = isSrgb ? CGColorSpaceCreateWithName(kCGColorSpaceSRGB) : CGColorSpaceCreateDeviceRGB();
-        
-        // don't need to allocate, can reuse memory from mip
-        bool skipPixelCopy = true;
-        
-        vImage_Error err = 0;
-        CGImageRef cgImage = vImageCreateCGImageFromBuffer(&buf, &format, NULL, NULL, skipPixelCopy ? kvImageNoAllocate : kvImageNoFlags, &err);
-        if (err) {
-            // Can't return NSError
-            //error = KLOGF("kramv %s failed create cgimage\n", filename);
-            return NO;
-        }
-        
-        CGRect rect = CGRectMake(0, 0,
-                                 (uint32_t)roundf(contextSize.width * requestScale),
-                                 (uint32_t)roundf(contextSize.height * requestScale));
-
-        // Default is white, but that messes up all content that uses alpha
-        // and doesn't match the preview code or kramv background (or Preview).
-        CGContextSetFillColorWithColor(context, CGColorGetConstantColor(kCGColorBlack));
-        CGContextFillRect(context, rect);
-        
-        // TODO: should this clear to NSColor clearColor ?
-        // don't want default white?
-        
-        // The image is scaled—disproportionately
-        
-        //CGContextSetBlendMode(context, kCGBlendModeCopy);
-        CGContextSetBlendMode(context, kCGBlendModeNormal);
-        
-        CGContextDrawImage(context, rect, cgImage);
-
-        // This seems to cause plugin to fail
-        // Needed?
-        if (!skipPixelCopy)
-            CGImageRelease(cgImage);
-        
-        return YES;
-     }], nil);
+
+    handler([QLThumbnailReply replyWithContextSize:contextSize
+                                      drawingBlock:^BOOL(CGContextRef _Nonnull context) {
+                                          KTXImage& image = imageToPass->image;
+
+                                          bool isPremul = image.isPremul();
+                                          bool isSrgb = isSrgbFormat(image.pixelFormat);
+
+                                          //-----------------
+
+                                          // unpack a level to get the blocks
+                                          uint32_t mipNumber = 0;
+                                          uint32_t mipCount = image.mipCount();
+
+                                          uint32_t w, h, d;
+                                          for (uint32_t i = 0; i < mipCount; ++i) {
+                                              image.mipDimensions(i, w, h, d);
+                                              if (w > request.maximumSize.width || h > request.maximumSize.height) {
+                                                  mipNumber++;
+                                              }
+                                          }
+
+                                          // clamp to smallest
+                                          mipNumber = std::min(mipNumber, mipCount - 1);
+                                          image.mipDimensions(mipNumber, w, h, d);
+
+                                          //-----------------
+
+                                          uint32_t chunkNum = 0; // TODO: could embed chunk(s) to gen thumbnail from, cube/array?
+                                          uint32_t numChunks = image.totalChunks();
+
+                                          vector<uint8_t> mipData;
+
+                                          // now decode the blocks in that chunk to Color
+                                          if (isBlockFormat(image.pixelFormat)) {
+                                              // then decode any blocks to rgba8u, not dealing with HDR formats yet
+                                              uint64_t mipLength = image.mipLevels[mipNumber].length;
+
+                                              if (image.isSupercompressed()) {
+                                                  const uint8_t* srcData = image.fileData + image.mipLevels[mipNumber].offset;
+
+                                                  mipData.resize(mipLength * numChunks);
+                                                  uint8_t* dstData = mipData.data();
+                                                  if (!image.unpackLevel(mipNumber, srcData, dstData)) {
+                                                      //KLOGF("kramv %s failed to unpack mip\n", filename);
+                                                      return NO;
+                                                  }
+
+                                                  // now extract the chunk for the thumbnail out of that level
+                                                  if (numChunks > 1) {
+                                                      macroUnusedVar(chunkNum);
+                                                      assert(chunkNum == 0);
+
+                                                      // this just truncate to chunk 0 instead of copying chunkNum first
+                                                      mipData.resize(mipLength);
+                                                  }
+                                              }
+                                              else {
+                                                  // this just truncate to chunk 0 instead of copying chunkNum first
+                                                  mipData.resize(mipLength);
+
+                                                  const uint8_t* srcData = image.fileData + image.mipLevels[mipNumber].offset;
+
+                                                  memcpy(mipData.data(), srcData, mipLength);
+                                              }
+
+                                              KramDecoder decoder;
+                                              KramDecoderParams params;
+                                              params.decoder = decoderType;
+
+                                              // TODO: should honor swizzle in the ktx image
+                                              // TODO: probaby need an snorm rgba format to convert the snorm versions, so they're not all red
+                                              // if sdf, will be signed format and that will stay red
+
+                                              switch (image.pixelFormat) {
+                                                  // To avoid showing single channel content in red, replicate to rgb
+                                                  case MyMTLPixelFormatBC4_RUnorm:
+                                                  case MyMTLPixelFormatEAC_R11Unorm:
+                                                      params.swizzleText = "rrr1";
+                                                      break;
+
+                                                  default:
+                                                      break;
+                                              }
+
+                                              vector<uint8_t> dstMipData;
+
+                                              // only space for one chunk for now
+                                              dstMipData.resize(h * w * sizeof(Color));
+
+                                              // want to just decode one chunk of the level that was unpacked abovve
+                                              if (!decoder.decodeBlocks(w, h, mipData.data(), (int32_t)mipData.size(), image.pixelFormat, dstMipData, params)) {
+                                                  // Can't return NSError
+                                                  //error = KLOGF("kramv %s failed to decode blocks\n", filename);
+                                                  return NO;
+                                              }
+
+                                              // copy over original encoded data
+                                              mipData = dstMipData;
+                                          }
+                                          else if (isExplicitFormat(image.pixelFormat)) {
+                                              // explicit formats like r/rg/rgb and 16f/32F need to be converted to rgba8 here
+                                              // this should currently clamp, but could do range tonemap, see Image::convertToFourChannel()
+                                              // but this needs to be slightly different.  This will decompress mip again
+
+                                              Image image2D;
+                                              if (!image2D.loadThumbnailFromKTX(image, mipNumber)) {
+                                                  //KLOGF("kramv %s failed to convert image to 4 channels\n", filename);
+                                                  return NO;
+                                              }
+
+                                              // copy from Color back to uint8_t
+                                              uint32_t mipSize = h * w * sizeof(Color);
+                                              mipData.resize(mipSize);
+                                              memcpy(mipData.data(), image2D.pixels().data(), mipSize);
+                                          }
+
+                                          // https://developer.apple.com/library/archive/documentation/GraphicsImaging/Conceptual/drawingwithquartz2d/dq_images/dq_images.html#//apple_ref/doc/uid/TP30001066-CH212-TPXREF101
+
+                                          uint32_t rowBytes = w * sizeof(Color);
+
+                                          // use vimage in the Accelerate.framework
+                                          // https://developer.apple.com/library/archive/releasenotes/Performance/RN-vecLib/index.html#//apple_ref/doc/uid/TP40001049
+
+                                          vImage_Buffer buf = {mipData.data(), h, w, rowBytes};
+
+                                          // Declare the pixel format for the vImage_Buffer
+                                          vImage_CGImageFormat format = {
+                                              .bitsPerComponent = 8,
+                                              .bitsPerPixel = 32,
+                                          };
+
+                                          format.bitmapInfo = kCGBitmapByteOrderDefault | (CGBitmapInfo)(isPremul ? kCGImageAlphaPremultipliedLast : kCGImageAlphaLast);
+                                          format.colorSpace = isSrgb ? CGColorSpaceCreateWithName(kCGColorSpaceSRGB) : CGColorSpaceCreateDeviceRGB();
+
+                                          // don't need to allocate, can reuse memory from mip
+                                          bool skipPixelCopy = true;
+
+                                          vImage_Error err = 0;
+                                          CGImageRef cgImage = vImageCreateCGImageFromBuffer(&buf, &format, NULL, NULL, skipPixelCopy ? kvImageNoAllocate : kvImageNoFlags, &err);
+                                          if (err) {
+                                              // Can't return NSError
+                                              //error = KLOGF("kramv %s failed create cgimage\n", filename);
+                                              return NO;
+                                          }
+
+                                          CGRect rect = CGRectMake(0, 0,
+                                                                   (uint32_t)roundf(contextSize.width * requestScale),
+                                                                   (uint32_t)roundf(contextSize.height * requestScale));
+
+                                          // Default is white, but that messes up all content that uses alpha
+                                          // and doesn't match the preview code or kramv background (or Preview).
+                                          CGContextSetFillColorWithColor(context, CGColorGetConstantColor(kCGColorBlack));
+                                          CGContextFillRect(context, rect);
+
+                                          // TODO: should this clear to NSColor clearColor ?
+                                          // don't want default white?
+
+                                          // The image is scaled—disproportionately
+
+                                          //CGContextSetBlendMode(context, kCGBlendModeCopy);
+                                          CGContextSetBlendMode(context, kCGBlendModeNormal);
+
+                                          CGContextDrawImage(context, rect, cgImage);
+
+                                          // This seems to cause plugin to fail
+                                          // Needed?
+                                          if (!skipPixelCopy)
+                                              CGImageRelease(cgImage);
+
+                                          return YES;
+                                      }],
+            nil);
 }
 
 @end
diff --git a/kramc/KramMain.cpp b/kramc/KramMain.cpp
index e449bf16..11eaa875 100644
--- a/kramc/KramMain.cpp
+++ b/kramc/KramMain.cpp
@@ -3,11 +3,11 @@
 int main(int argc, char* argv[])
 {
     int errorCode = kram::kramAppMain(argc, argv);
-    
+
     // returning -1 from main results in exit code of 255, so fix this to return 1 on failure.
     if (errorCode != 0) {
         exit(1);
     }
-    
+
     return 0;
 }
diff --git a/kramv/KramLoader.h b/kramv/KramLoader.h
index 6ffcc49a..a47fc014 100644
--- a/kramv/KramLoader.h
+++ b/kramv/KramLoader.h
@@ -16,7 +16,7 @@
 namespace kram {
 class KTXImage;
 class KTXImageData;
-}
+} //namespace kram
 
 // This loads KTX/2 and PNG data.  Moving towards KTX/2 files only, with a PNG
 // to KTX/2 conversion.
@@ -38,7 +38,7 @@ class KTXImageData;
 - (nullable id<MTLTexture>)loadTextureFromImage:(const kram::KTXImage &)image
                                  originalFormat:
                                      (nullable MTLPixelFormat *)originalFormat
-                                name:(nonnull const char*)name;
+                                           name:(nonnull const char *)name;
 
 // load into KTXImage and KTXImageData, can use with loadTextureFromImage
 - (BOOL)loadImageFromURL:(nonnull NSURL *)url
@@ -73,4 +73,4 @@ using namespace STL_NAMESPACE;
 
 // provide access to lowercase strings
 string toLower(const string &text);
-}
+} //namespace kram
diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index 2455859d..ed8e48af 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -45,12 +45,12 @@
 @implementation KramLoader {
     // only one of these for now
     id<MTLBuffer> _buffer;
-    uint8_t* _data;
+    uint8_t *_data;
     uint32_t _bufferOffset;
 
     vector<KramBlit> _blits;
-    NSMutableArray<id<MTLTexture>>* _blitTextures;
-    NSMutableArray<id<MTLTexture>>* _mipgenTextures;
+    NSMutableArray<id<MTLTexture>> *_blitTextures;
+    NSMutableArray<id<MTLTexture>> *_mipgenTextures;
 }
 
 - (instancetype)init
@@ -72,8 +72,6 @@ - (instancetype)init
                       originalFormat:originalFormat];
 }
 
-
-
 // this means format isnt supported on platform, but can be decoded to rgba to
 // display
 bool isDecodeImageNeeded(MyMTLPixelFormat pixelFormat, MyMTLTextureType type)
@@ -99,7 +97,7 @@ bool decodeImage(const KTXImage &image, KTXImage &imageDecoded)
 {
     KramDecoderParams decoderParams;
     KramDecoder decoder;
-    
+
     // macOS Intel only had BC support, and already have macOS arm64 build
 #if SIMD_SSE
     if (isETCFormat(image.pixelFormat)) {
@@ -120,7 +118,7 @@ bool decodeImage(const KTXImage &image, KTXImage &imageDecoded)
     }
 #endif
     else {
-        KASSERT(false);  // don't call this routine if decode not needed
+        KASSERT(false); // don't call this routine if decode not needed
     }
 
     // TODO: decode BC format on iOS when not supported, but viewer only on macOS
@@ -180,27 +178,23 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format)
     KTXImage image;
 
     if (imageDataLength > 3 &&
-        imageData[0] == 0xff && imageData[1] == 0xd8 && imageData[2] == 0xff )
-    {
+        imageData[0] == 0xff && imageData[1] == 0xd8 && imageData[2] == 0xff) {
         KLOGE("kramv", "loader does not support jpg files");
         return nil;
     }
-        
+
     // if png, then need to load from KTXImageData which uses loadpng
     // \x89, P, N, G
     if (imageDataLength > 4 &&
-        imageData[0] == 137 && imageData[1] == 'P' && imageData[2] == 'N' && imageData[3] == 'G')
-    {
+        imageData[0] == 137 && imageData[1] == 'P' && imageData[2] == 'N' && imageData[3] == 'G') {
         KTXImageData imageDataReader;
         if (!imageDataReader.open(imageData, imageDataLength, image)) {
             return nil;
         }
-        
+
         return [self loadTextureFromImage:image originalFormat:originalFormat name:""];
     }
-    else
-    {
-    
+    else {
         // isInfoOnly = true keeps compressed mips on KTX2 and aliases original mip
         // data but have decode etc2/astc path below that uncompressed mips and the
         // rgb conversion path below as well in the viewer. games would want to
@@ -219,7 +213,7 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format)
 - (nullable id<MTLTexture>)loadTextureFromImage:(const KTXImage &)image
                                  originalFormat:
                                      (nullable MTLPixelFormat *)originalFormat
-                                           name:(const char*)name
+                                           name:(const char *)name
 {
 #if SUPPORT_RGB
     if (isInternalRGBFormat(image.pixelFormat)) {
@@ -236,7 +230,7 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format)
         dstImageInfoArgs.textureType = image.textureType;
         dstImageInfoArgs.pixelFormat = remapInternalRGBFormat(image.pixelFormat);
         dstImageInfoArgs.doMipmaps =
-            image.mipCount() > 1;  // ignore 0
+            image.mipCount() > 1; // ignore 0
         dstImageInfoArgs.textureEncoder = kTexEncoderExplicit;
 
         // set chunk count, so it's explicit
@@ -257,7 +251,7 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format)
         if (originalFormat != nullptr) {
             *originalFormat =
                 (MTLPixelFormat)rbgaImage2
-                    .pixelFormat;  // TODO: should this return rgbaImage.pixelFormat ?
+                    .pixelFormat; // TODO: should this return rgbaImage.pixelFormat ?
         }
 
         return [self blitTextureFromImage:rbgaImage2 name:name];
@@ -276,18 +270,17 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format)
 
         return [self blitTextureFromImage:imageDecoded name:name];
     }
-    else
-    {
+    else {
         // fast load path directly from mmap'ed data, decompress direct to staging
         return [self blitTextureFromImage:image name:name];
     }
 }
 
 - (BOOL)loadImageFromURL:(nonnull NSURL *)url
-                   image:(KTXImage&)image
-               imageData:(KTXImageData&)imageData
+                   image:(KTXImage &)image
+               imageData:(KTXImageData &)imageData
 {
-    const char* path = url.absoluteURL.path.UTF8String;
+    const char *path = url.absoluteURL.path.UTF8String;
     if (!imageData.open(path, image)) {
         return NO;
     }
@@ -295,9 +288,9 @@ - (BOOL)loadImageFromURL:(nonnull NSURL *)url
     return YES;
 }
 
-- (nullable id<MTLTexture>)loadTextureFromURL:(nonnull NSURL*)url
+- (nullable id<MTLTexture>)loadTextureFromURL:(nonnull NSURL *)url
                                originalFormat:
-                                   (nullable MTLPixelFormat*)originalFormat
+                                   (nullable MTLPixelFormat *)originalFormat
 {
     KTXImage image;
     KTXImageData imageData;
@@ -312,7 +305,7 @@ - (BOOL)loadImageFromURL:(nonnull NSURL *)url
 - (nullable id<MTLTexture>)createTexture:(const KTXImage &)image
                                isPrivate:(bool)isPrivate
 {
-    MTLTextureDescriptor* textureDescriptor = [[MTLTextureDescriptor alloc] init];
+    MTLTextureDescriptor *textureDescriptor = [[MTLTextureDescriptor alloc] init];
 
     // Indicate that each pixel has a blue, green, red, and alpha channel, where
     // each channel is an 8-bit unsigned normalized value (i.e. 0 maps to 0.0 and
@@ -330,10 +323,10 @@ - (BOOL)loadImageFromURL:(nonnull NSURL *)url
     // This is inefficient to set, but needed for viewwer.
     // Only set if texture type is toggleable.
     // only need this if changing components, type, etc.
-//    {
-//        textureDescriptor.usage |= MTLTextureUsagePixelFormatView;
-//    }
-    
+    // {
+    //     textureDescriptor.usage |= MTLTextureUsagePixelFormatView;
+    // }
+
     // ignoring 0 (auto mip), but might need to support for explicit formats
     // must have hw filtering support for format, and 32f filtering only first
     // appeared on A14/M1 and only get box filtering in API-level filters.  But
@@ -385,13 +378,13 @@ - (void)uploadTexturesIfNeeded:(id<MTLBlitCommandEncoder>)blitEncoder
                  commandBuffer:(id<MTLCommandBuffer>)commandBuffer
 {
     mylock lock(gTextureLock);
-    
+
     if (!_blits.empty()) {
         // now upload from staging MTLBuffer to private MTLTexture
-        for (const auto& blit : _blits) {
+        for (const auto &blit : _blits) {
             MTLRegion region = {
-                {0, 0, 0},                                   // MTLOrigin
-                {(NSUInteger)blit.w, (NSUInteger)blit.h, 1}  // MTLSize
+                {0, 0, 0}, // MTLOrigin
+                {(NSUInteger)blit.w, (NSUInteger)blit.h, 1} // MTLSize
             };
 
             uint32_t chunkNum = blit.chunkNum;
@@ -430,7 +423,7 @@ - (void)uploadTexturesIfNeeded:(id<MTLBlitCommandEncoder>)blitEncoder
                 self->_bufferOffset = 0;
         }];
     }
-    
+
     // mipgen possible after initial blit above
     if (_mipgenTextures.count > 0) {
         for (id<MTLTexture> texture in _mipgenTextures) {
@@ -447,11 +440,11 @@ - (void)uploadTexturesIfNeeded:(id<MTLBlitCommandEncoder>)blitEncoder
 - (void)releaseAllPendingTextures
 {
     mylock lock(gTextureLock);
-    
+
     _bufferOffset = 0;
-    
+
     _blits.clear();
-    
+
     [_mipgenTextures removeAllObjects];
     [_blitTextures removeAllObjects];
 }
@@ -465,14 +458,14 @@ inline uint64_t alignOffset(uint64_t offset, uint64_t alignment)
 // (f.e. ktx), and another path for private that uses a blitEncoder and must
 // have block aligned data (f.e. ktxa, ktx2). Could repack ktx data into ktxa
 // before writing to temporary file, or when copying NSData into MTLBuffer.
-- (nullable id<MTLTexture>)blitTextureFromImage:(const KTXImage &)image name:(const char*)name
+- (nullable id<MTLTexture>)blitTextureFromImage:(const KTXImage &)image name:(const char *)name
 {
     mylock lock(gTextureLock);
-    
+
     if (_buffer == nil) {
         // Was set to 128, but models like FlightHelmet.gltf exceeded that buffer
         static const size_t kStagingBufferSize = 256 * 1024 * 1024;
-        
+
         // this is enough to upload 4k x 4x @ RGBA8u with mips, 8k x 8k compressed
         // with mips @96MB
         [self createStagingBufffer:kStagingBufferSize];
@@ -488,10 +481,10 @@ inline uint64_t alignOffset(uint64_t offset, uint64_t alignment)
     id<MTLTexture> texture = [self createTexture:image isPrivate:true];
     if (!texture)
         return nil;
-    
+
     // set a label so can identify in captures
     texture.label = [NSString stringWithUTF8String:name];
-    
+
     // this is index where texture will be added
     uint32_t textureIndex = (uint32_t)_blitTextures.count;
 
@@ -517,8 +510,8 @@ inline uint64_t alignOffset(uint64_t offset, uint64_t alignment)
     size_t blockSize = image.blockSize();
 
     vector<uint64_t> bufferOffsets;
-    uint8_t* bufferData = (uint8_t*)_buffer.contents;
-    const uint8_t* mipData = (const uint8_t*)image.fileData;
+    uint8_t *bufferData = (uint8_t *)_buffer.contents;
+    const uint8_t *mipData = (const uint8_t *)image.fileData;
     bufferOffsets.resize(image.mipLevels.size());
 
     uint32_t numChunks = image.totalChunks();
@@ -536,8 +529,7 @@ inline uint64_t alignOffset(uint64_t offset, uint64_t alignment)
             KLOGE("kramv", "Ran out of buffer space to upload images");
             return nil;
         }
-        
-        
+
         // this may have to decompress the level data
         if (!image.unpackLevel(i, mipData + mipLevel.offset,
                                bufferData + bufferOffset)) {
@@ -570,7 +562,7 @@ inline uint64_t alignOffset(uint64_t offset, uint64_t alignment)
                     uint32_t bytesPerRow = 0;
 
                     // 1D/1DArray textures set bytesPerRow to 0
-                    if (  // image.textureType != MyMTLTextureType1D &&
+                    if ( // image.textureType != MyMTLTextureType1D &&
                         image.textureType != MyMTLTextureType1DArray) {
                         // for compressed, bytesPerRow needs to be multiple of block size
                         // so divide by the number of blocks making up the height
@@ -618,7 +610,7 @@ inline uint64_t alignOffset(uint64_t offset, uint64_t alignment)
                             mipLevelNumber, mipStorageSize, mipOffset,
 
                             textureIndex, bytesPerRow,
-                            is3D  // could derive from textureIndex lookup
+                            is3D // could derive from textureIndex lookup
                         });
                     }
                 }
diff --git a/kramv/KramRenderer.h b/kramv/KramRenderer.h
index de20df75..4c5a556a 100644
--- a/kramv/KramRenderer.h
+++ b/kramv/KramRenderer.h
@@ -8,7 +8,7 @@
 #import <MetalKit/MetalKit.h>
 
 #include "KramLib.h"
-#import "KramShaders.h"  // for TextureChannels
+#import "KramShaders.h" // for TextureChannels
 
 // Turn on GLTF loading support for 3d models.  This relies on Warren Moore's first GLTFKit
 // which only handles import and synchronous loading.
@@ -28,19 +28,17 @@
 //@import GLTFMTL;
 #endif
 
-
 namespace kram {
 class ShowSettings;
 class Data;
 class KTXImage;
-}
+} //namespace kram
 
 // Need renderer to be able to call back up to view to update hud.
 @protocol MyMTKViewUpdates <NSObject>
 - (void)updateEyedropperText;
 @end
 
-
 // Our platform independent renderer class.   Implements the MTKViewDelegate
 // protocol which
 //   allows it to accept per-frame update and drawable resize callbacks.
@@ -53,7 +51,7 @@ class KTXImage;
 - (nonnull instancetype)initWithMetalKitView:(nonnull MTKView *)view
                                     settings:
                                         (nonnull kram::ShowSettings *)settings
-                                    data:(nonnull kram::Data *)data;
+                                        data:(nonnull kram::Data *)data;
 
 - (BOOL)loadTextureFromImage:(nonnull const char *)fullFilenameString
                    timestamp:(double)timestamp
@@ -66,18 +64,17 @@ class KTXImage;
 
 - (BOOL)hotloadShaders:(nonnull const char *)filename;
 
-
 // unload textures and gltf model textures
 - (void)releaseAllPendingTextures;
 
 // load a gltf model
-- (BOOL)loadModel:(nonnull const char*)url;
+- (BOOL)loadModel:(nonnull const char *)url;
 
 // unload gltf model
 - (void)unloadModel;
 
 // called from view and renderer in render loop
-- (void)updateAnimationState:(nonnull MTKView*)view;
+- (void)updateAnimationState:(nonnull MTKView *)view;
 
 // So caller can respond to completed callback
 - (void)setEyedropperDelegate:(nullable id)delegate;
@@ -86,13 +83,12 @@ class KTXImage;
 - (void)setFramePacingEnabled:(bool)enable;
 
 // can play animations in gltf models
-@property (nonatomic) BOOL playAnimations;
+@property(nonatomic) BOOL playAnimations;
 
 // can toggle on/off srgb if that is psosible
-@property (nonatomic) BOOL isToggleView;
+@property(nonatomic) BOOL isToggleView;
 
 // true if a toggle is present
-@property (nonatomic) BOOL hasToggleView;
+@property(nonatomic) BOOL hasToggleView;
 
 @end
-
diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index c1c30ebf..68e80edb 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -16,6 +16,7 @@
 
 // c interface to signposts similar to dtrace on macOS/iOS
 #include <os/signpost.h>
+
 #include <mutex> // for recursive_mutex
 
 using mymutex = std::recursive_mutex;
@@ -23,8 +24,7 @@
 
 os_log_t gLogKramv = os_log_create("com.hialec.kramv", "");
 
-class Signpost
-{
+class Signpost {
 public:
     Signpost(const char* name)
         : _name(name), _ended(false)
@@ -34,12 +34,12 @@
         else
             _ended = true;
     }
-    
+
     ~Signpost()
     {
         stop();
     }
-    
+
     void stop()
     {
         if (!_ended) {
@@ -47,13 +47,12 @@ void stop()
             _ended = true;
         }
     }
-    
+
 private:
     const char* _name;
     bool _ended;
 };
 
-
 #if USE_GLTF
 
 // TODO: make part of renderer
@@ -64,12 +63,12 @@ void stop()
 
 @interface KramGLTFTextureLoader : NSObject <IGLTFMTLTextureLoader>
 - (instancetype)initWithLoader:(KramLoader*)loader;
-- (id<MTLTexture> _Nullable)newTextureWithContentsOfURL:(NSURL *)url options:(NSDictionary * _Nullable)options error:(NSError **)error;
-- (id<MTLTexture> _Nullable)newTextureWithData:(NSData *)data options:(NSDictionary * _Nullable)options error:(NSError **)error;
+- (id<MTLTexture> _Nullable)newTextureWithContentsOfURL:(NSURL*)url options:(NSDictionary* _Nullable)options error:(NSError**)error;
+- (id<MTLTexture> _Nullable)newTextureWithData:(NSData*)data options:(NSDictionary* _Nullable)options error:(NSError**)error;
 @end
 
 @interface KramGLTFTextureLoader ()
-@property (nonatomic, strong) KramLoader* loader;
+@property(nonatomic, strong) KramLoader* loader;
 @end
 
 @implementation KramGLTFTextureLoader
@@ -83,13 +82,13 @@ - (instancetype)initWithLoader:(KramLoader*)loader
 }
 
 // TODO: this ignores options and error.  Default png loading may need to request srgb.
-- (id<MTLTexture> _Nullable)newTextureWithContentsOfURL:(NSURL *)url options:(NSDictionary * _Nullable)options error:(NSError * __autoreleasing *)error
+- (id<MTLTexture> _Nullable)newTextureWithContentsOfURL:(NSURL*)url options:(NSDictionary* _Nullable)options error:(NSError* __autoreleasing*)error
 {
     return [_loader loadTextureFromURL:url originalFormat:nil];
 }
 
 // TODO: this ignores options and error.  Default png loading may need to request srgb.
-- (id<MTLTexture> _Nullable)newTextureWithData:(NSData *)data options:(NSDictionary * _Nullable)options error:(NSError * __autoreleasing *)error
+- (id<MTLTexture> _Nullable)newTextureWithData:(NSData*)data options:(NSDictionary* _Nullable)options error:(NSError* __autoreleasing*)error
 {
     return [_loader loadTextureFromData:data originalFormat:nil];
 }
@@ -98,7 +97,6 @@ - (instancetype)initWithLoader:(KramLoader*)loader
 
 #endif
 
-
 static const NSUInteger MaxBuffersInFlight = 3;
 
 using namespace kram;
@@ -126,7 +124,7 @@ @implementation Renderer {
     id<MTLRenderPipelineState> _pipelineStateVolume;
 
     id<MTLRenderPipelineState> _pipelineStateDrawLines;
-    
+
     id<MTLComputePipelineState> _pipelineState1DArrayCS;
     id<MTLComputePipelineState> _pipelineStateImageCS;
     id<MTLComputePipelineState> _pipelineStateImageArrayCS;
@@ -137,7 +135,6 @@ @implementation Renderer {
     id<MTLDepthStencilState> _depthStateFull;
     id<MTLDepthStencilState> _depthStateNone;
 
-    
     MTLVertexDescriptor* _mtlVertexDescriptor;
 
     // TODO: Array< id<MTLTexture> > _textures;
@@ -145,9 +142,9 @@ @implementation Renderer {
     id<MTLTexture> _colorMapView;
     id<MTLTexture> _normalMap;
     id<MTLTexture> _diffMap;
-    
+
     id<MTLTexture> _lastDrawableTexture;
-    
+
     // border is a better edge sample, but at edges it filters in the transparent
     // color around the border which is undesirable.  It would be better if the hw
     // did clamp to edge until uv outside 0 to 1.  This results in having to inset
@@ -168,12 +165,11 @@ @implementation Renderer {
 
     uint8_t _uniformBufferIndex;
 
-   
     // float _rotation;
     KramLoader* _loader;
     MTKMesh* _mesh;
 
-    MDLVertexDescriptor *_mdlVertexDescriptor;
+    MDLVertexDescriptor* _mdlVertexDescriptor;
 
     MTKMesh* _meshRect;
     MTKMesh* _meshBox;
@@ -181,7 +177,7 @@ @implementation Renderer {
     MTKMesh* _meshSphereMirrored;
     // MTKMesh* _meshCylinder;
     MTKMesh* _meshCapsule;
-    MTKMeshBufferAllocator *_metalAllocator;
+    MTKMeshBufferAllocator* _metalAllocator;
 
     id<MTLLibrary> _shaderLibrary;
     NSURL* _metallibFileURL;
@@ -190,22 +186,22 @@ @implementation Renderer {
 
     ShowSettings* _showSettings;
     Data* _data;
-    
+
 #if USE_GLTF
     KramGLTFTextureLoader* _textureLoader;
     id<GLTFBufferAllocator> _bufferAllocator;
     GLTFMTLRenderer* _gltfRenderer;
     GLTFAsset* _asset; // only 1 for now
     double _animationTime;
-    
+
     id<MTLTexture> _environmentTexture;
     bool _environmentNeedsUpdate;
-    
+
     NSURLSession* _urlSession;
 #endif
 
     __weak id _delegateHud;
-    
+
     bool _useFramePacing;
     double _avgGpuTime;
 }
@@ -214,9 +210,9 @@ @implementation Renderer {
 @synthesize isToggleView;
 @synthesize hasToggleView;
 
-- (nonnull instancetype)initWithMetalKitView:(nonnull MTKView *)view
-                                    settings:(nonnull ShowSettings *)settings
-                                    data:(nonnull Data*)data
+- (nonnull instancetype)initWithMetalKitView:(nonnull MTKView*)view
+                                    settings:(nonnull ShowSettings*)settings
+                                        data:(nonnull Data*)data
 {
     self = [super init];
     if (self) {
@@ -232,25 +228,24 @@ - (nonnull instancetype)initWithMetalKitView:(nonnull MTKView *)view
         _inFlightSemaphore = dispatch_semaphore_create(MaxBuffersInFlight);
         [self _loadMetalWithView:view];
         [self _loadAssets];
-        
+
 #if USE_GLTF
         _bufferAllocator = [[GLTFMTLBufferAllocator alloc] initWithDevice:_device];
         _gltfRenderer = [[GLTFMTLRenderer alloc] initWithDevice:_device];
-        
+
         // This aliases the existing kram loader, can handle png, ktx, ktx2
         _textureLoader = [[KramGLTFTextureLoader alloc] initWithLoader:_loader];
         _gltfRenderer.textureLoader = _textureLoader;
-        
+
         // load the environment from a cube map for now
         // runs this after _shaderLibrary established above
-        _gltfRenderer.lightingEnvironment = [[GLTFMTLLightingEnvironment alloc] initWithLibrary: _shaderLibrary];
-        
+        _gltfRenderer.lightingEnvironment = [[GLTFMTLLightingEnvironment alloc] initWithLibrary:_shaderLibrary];
+
         //NSURL* environmentURL = [[NSBundle mainBundle] URLForResource:@"piazza_san_marco" withExtension:@"ktx"];
         NSURL* environmentURL = [[NSBundle mainBundle] URLForResource:@"tropical_beach" withExtension:@"ktx"];
         _environmentTexture = [_loader loadTextureFromURL:environmentURL originalFormat:nil];
         _environmentNeedsUpdate = true;
 #endif
-
     }
 
     return self;
@@ -258,7 +253,7 @@ - (nonnull instancetype)initWithMetalKitView:(nonnull MTKView *)view
 
 - (void)_createSamplers
 {
-    MTLSamplerDescriptor *samplerDescriptor = [MTLSamplerDescriptor new];
+    MTLSamplerDescriptor* samplerDescriptor = [MTLSamplerDescriptor new];
     samplerDescriptor.minFilter = MTLSamplerMinMagFilterNearest;
     samplerDescriptor.magFilter = MTLSamplerMinMagFilterNearest;
     samplerDescriptor.mipFilter = MTLSamplerMipFilterNearest;
@@ -294,7 +289,7 @@ - (void)_createSamplers
     samplerDescriptor.minFilter = MTLSamplerMinMagFilterLinear;
     samplerDescriptor.magFilter = MTLSamplerMinMagFilterLinear;
     samplerDescriptor.mipFilter = MTLSamplerMipFilterLinear;
-    samplerDescriptor.maxAnisotropy = 4;  // 1,2,4,8,16 are choices
+    samplerDescriptor.maxAnisotropy = 4; // 1,2,4,8,16 are choices
 
     samplerDescriptor.sAddressMode = MTLSamplerAddressModeClampToBorderColor;
     samplerDescriptor.tAddressMode = MTLSamplerAddressModeClampToBorderColor;
@@ -332,19 +327,19 @@ - (void)_createVertexDescriptor
         BufferIndexMeshPosition;
 
     _mtlVertexDescriptor.attributes[VertexAttributeTexcoord].format =
-        MTLVertexFormatFloat2;  // TODO: compress
+        MTLVertexFormatFloat2; // TODO: compress
     _mtlVertexDescriptor.attributes[VertexAttributeTexcoord].offset = 0;
     _mtlVertexDescriptor.attributes[VertexAttributeTexcoord].bufferIndex =
         BufferIndexMeshUV0;
 
     _mtlVertexDescriptor.attributes[VertexAttributeNormal].format =
-        MTLVertexFormatFloat3;  // TODO: compress
+        MTLVertexFormatFloat3; // TODO: compress
     _mtlVertexDescriptor.attributes[VertexAttributeNormal].offset = 0;
     _mtlVertexDescriptor.attributes[VertexAttributeNormal].bufferIndex =
         BufferIndexMeshNormal;
 
     _mtlVertexDescriptor.attributes[VertexAttributeTangent].format =
-        MTLVertexFormatFloat4;  // TODO: compress
+        MTLVertexFormatFloat4; // TODO: compress
     _mtlVertexDescriptor.attributes[VertexAttributeTangent].offset = 0;
     _mtlVertexDescriptor.attributes[VertexAttributeTangent].bufferIndex =
         BufferIndexMeshTangent;
@@ -373,7 +368,7 @@ - (void)_createVertexDescriptor
         MDLVertexAttributeTangent;
 }
 
-- (void)_loadMetalWithView:(nonnull MTKView *)view
+- (void)_loadMetalWithView:(nonnull MTKView*)view
 {
     /// Load Metal state objects and initialize renderer dependent view properties
 
@@ -382,19 +377,18 @@ - (void)_loadMetalWithView:(nonnull MTKView *)view
     // true is good for non-srgb -> rgba16f
     CGColorSpaceRef viewColorSpace;
     MTLPixelFormat format = MTLPixelFormatRGBA16Float;
-    
+
     // This doesn't look like Figma or Photoshop for a rgb,a = 255,0 to 255,1 gradient across a 256px wide rect.   The shader is saturating
     // the color to 0,1.  So can get away with SRGB color space for now.
     // This also lines up with Preview.
     //viewColorSpace  = CGColorSpaceCreateWithName(kCGColorSpaceGenericRGBLinear);
-    
-    
+
     //CAMetalLayer* metalLayer = (CAMetalLayer*)[view layer];
-    
+
     // was using 16f so could sample hdr images from it
     //  and also so hdr data went out to the display
     uint32_t colorSpaceChoice = 1;
-    switch(colorSpaceChoice) {
+    switch (colorSpaceChoice) {
         default:
         case 0:
             // This is best so far
@@ -402,88 +396,86 @@ - (void)_loadMetalWithView:(nonnull MTKView *)view
             viewColorSpace = CGColorSpaceCreateWithName(kCGColorSpaceSRGB);
             //viewColorSpace = CGColorSpaceCreateWithName(kCGColorSpaceLinearSRGB);
             break;
-            
+
         case 1: {
             // Display P3 is a standard made by Apple that covers the same colour space as DCI-P3, but uses the more neutral D65 as a white point instead of the green white of the DCI standard.
             // Ideally feed 16-bit color to P3.
-            
+
             // This also works
             // 25% larger than srgb
             format = MTLPixelFormatRGBA16Float;
-           
+
             // This is industry format
             // viewColorSpace = CGColorSpaceCreateWithName(kCGColorSpaceDCIP3);
-            
+
             // This isn't edr
             // viewColorSpace = CGColorSpaceCreateWithName(kCGColorSpaceDisplayP3);
-            
+
             // Use this because it exists from 10.14.3+
             viewColorSpace = CGColorSpaceCreateWithName(kCGColorSpaceExtendedLinearDisplayP3);
-           
+
             // don't set this yet.
             // metalLayer.wantsExtendedDynamicRangeContent = YES;
-            
+
             // https://developer.apple.com/videos/play/wwdc2021/10161/
-            
+
             /* Can detect if on HDR display or not
                 user can mod the brightness, or move to another monitor,
                 need to listen for notification when this changes.
-             
+
             NSScreen* screen = NSScreen.mainScreen;
-            
+
             // This reports 1
             CGFloat val1 = screen.maximumExtendedDynamicRangeColorComponentValue;
-            
+
             // This is 16
             CGFloat maxPot =  screen.maximumPotentialExtendedDynamicRangeColorComponentValue;
-            
+
             // This is 0
             CGFloat maxRef = screen.maximumReferenceExtendedDynamicRangeColorComponentValue;
             */
-            
+
             // M1 monitor
-            
-            
+
             break;
         }
         case 2:
             // This doesn't match wnen srgb is turned off on TestColorGradient
             format = MTLPixelFormatRGBA8Unorm_sRGB;
             viewColorSpace = CGColorSpaceCreateWithName(kCGColorSpaceSRGB);
-            
+
             // this looks totally wrong
             //viewColorSpace = CGColorSpaceCreateWithName(kCGColorLinearSpaceSRGB);
             break;
-            
-        /*
-        case 3: {
-            // There is an exrMetadata field on NSView to set as well.
-            // https://developer.apple.com/documentation/metal/hdr_content/using_color_spaces_to_display_hdr_content?language=objc
-            
-            // Rec2020 color primaries, with PQ Transfer function.
-            // Would have to get into Rec2020 colors to set this, also go from 10bit
-            format = MTLPixelFormatBGR10A2Unorm;
-            viewColorSpace = CGColorSpaceCreateWithName(kCGColorSpaceITUR_2100_PQ);
-            
-            metalLayer.wantsExtendedDynamicRangeContent = YES;
-            
-            // https://developer.apple.com/documentation/metal/hdr_content/using_system_tone_mapping_on_video_content?language=objc
-            // must do os version check on this
-            // 1.0 is 100 nits of output
-            CAEDRMetadata* edrMetaData = [CAEDRMetadata HDR10MetadataWithMinLuminance: 0.005 maxLuminance: 1000 opticalOutputScale: 100];
-            metalLayer.EDRMetadata = edrMetaData;
-            
-            break;
-        }
-        */
+
+            /*
+            case 3: {
+                // There is an exrMetadata field on NSView to set as well.
+                // https://developer.apple.com/documentation/metal/hdr_content/using_color_spaces_to_display_hdr_content?language=objc
+
+                // Rec2020 color primaries, with PQ Transfer function.
+                // Would have to get into Rec2020 colors to set this, also go from 10bit
+                format = MTLPixelFormatBGR10A2Unorm;
+                viewColorSpace = CGColorSpaceCreateWithName(kCGColorSpaceITUR_2100_PQ);
+
+                metalLayer.wantsExtendedDynamicRangeContent = YES;
+
+                // https://developer.apple.com/documentation/metal/hdr_content/using_system_tone_mapping_on_video_content?language=objc
+                // must do os version check on this
+                // 1.0 is 100 nits of output
+                CAEDRMetadata* edrMetaData = [CAEDRMetadata HDR10MetadataWithMinLuminance: 0.005 maxLuminance: 1000 opticalOutputScale: 100];
+                metalLayer.EDRMetadata = edrMetaData;
+
+                break;
+            }
+            */
     }
-    
-    
+
     view.colorPixelFormat = format;
     view.colorspace = viewColorSpace;
-   
+
     CGColorSpaceRelease(viewColorSpace);
-    
+
     view.depthStencilPixelFormat = MTLPixelFormatDepth32Float_Stencil8;
     view.sampleCount = 1;
 
@@ -501,7 +493,7 @@ - (void)_loadMetalWithView:(nonnull MTKView *)view
 
     //-----------------------
 
-    MTLDepthStencilDescriptor *depthStateDesc =
+    MTLDepthStencilDescriptor* depthStateDesc =
         [[MTLDepthStencilDescriptor alloc] init];
     depthStateDesc.depthCompareFunction = MTLCompareFunctionGreaterEqual;
     depthStateDesc.depthWriteEnabled = YES;
@@ -530,7 +522,7 @@ - (void)_loadMetalWithView:(nonnull MTKView *)view
     [self _createSampleRender];
 }
 
-- (BOOL)hotloadShaders:(const char *)filename
+- (BOOL)hotloadShaders:(const char*)filename
 {
     _metallibFileURL =
         [NSURL fileURLWithPath:[NSString stringWithUTF8String:filename]];
@@ -549,7 +541,7 @@ - (BOOL)hotloadShaders:(const char *)filename
     _metallibFileDate = fileDate;
 
     // Now dynamically load the metallib
-    NSData *dataNS = [NSData dataWithContentsOfURL:_metallibFileURL
+    NSData* dataNS = [NSData dataWithContentsOfURL:_metallibFileURL
                                            options:NSDataReadingMappedIfSafe
                                              error:&err];
     if (dataNS == nil) {
@@ -575,7 +567,7 @@ - (BOOL)hotloadShaders:(const char *)filename
     return YES;
 }
 
-- (id<MTLComputePipelineState>)_createComputePipeline:(const char *)name
+- (id<MTLComputePipelineState>)_createComputePipeline:(const char*)name
 {
     NSString* nameNS = [NSString stringWithUTF8String:name];
     NSError* error = nil;
@@ -610,8 +602,8 @@ - (void)_createComputePipelines
         [self _createComputePipeline:"SampleImage1DArrayCS"];
 }
 
-- (id<MTLRenderPipelineState>)_createRenderPipeline:(const char *)vs
-                                                 fs:(const char *)fs
+- (id<MTLRenderPipelineState>)_createRenderPipeline:(const char*)vs
+                                                 fs:(const char*)fs
 {
     NSString* vsNameNS = [NSString stringWithUTF8String:vs];
     NSString* fsNameNS = [NSString stringWithUTF8String:fs];
@@ -629,7 +621,7 @@ - (void)_createComputePipelines
 
     // Note: blending is disabled on color target, all blending done in shader
     // since have checkerboard and other stuff to blend against.
-    
+
     // TODO: could drop these for images, but want a 3D preview of content
     // or might make these memoryless.
     pipelineStateDescriptor.depthAttachmentPixelFormat =
@@ -680,10 +672,9 @@ - (void)_createRenderPipelines
                                                        fs:"DrawCubeArrayPS"];
     _pipelineStateVolume = [self _createRenderPipeline:"DrawVolumeVS"
                                                     fs:"DrawVolumePS"];
-    
+
     _pipelineStateDrawLines = [self _createRenderPipeline:"DrawLinesVS"
                                                        fs:"DrawLinesPS"];
-     
 }
 
 - (void)_createSampleRender
@@ -717,9 +708,9 @@ - (void)_createSampleRender
     }
 }
 
-- (MTKMesh *)_createMeshAsset:(const char *)name
-                      mdlMesh:(MDLMesh *)mdlMesh
-                     doFlipUV:(bool)doFlipUV
+- (MTKMesh*)_createMeshAsset:(const char*)name
+                     mdlMesh:(MDLMesh*)mdlMesh
+                    doFlipUV:(bool)doFlipUV
 {
     NSError* error = nil;
 
@@ -733,10 +724,10 @@ - (MTKMesh *)_createMeshAsset:(const char *)name
         id<MDLMeshBuffer> uvs = mdlMesh.vertexBuffers[BufferIndexMeshUV0];
         MDLMeshBufferMap* uvsMap = [uvs map];
 
-        packed_float2* uvData = (packed_float2 *)uvsMap.bytes;
+        packed_float2* uvData = (packed_float2*)uvsMap.bytes;
 
         for (uint32_t i = 0; i < mdlMesh.vertexCount; ++i) {
-            auto &uv = uvData[i];
+            auto& uv = uvData[i];
 
             uv.x = 1.0f - uv.x;
         }
@@ -754,13 +745,13 @@ - (MTKMesh *)_createMeshAsset:(const char *)name
     if (doFlipBitangent) {
         id<MDLMeshBuffer> uvs = mdlMesh.vertexBuffers[BufferIndexMeshTangent];
         MDLMeshBufferMap* uvsMap = [uvs map];
-        packed_float4* uvData = (packed_float4 *)uvsMap.bytes;
+        packed_float4* uvData = (packed_float4*)uvsMap.bytes;
 
         for (uint32_t i = 0; i < mdlMesh.vertexCount; ++i) {
-            //            if (uvData[i].w != -1.0f && uvData[i].w != 1.0f) {
-            //                int bp = 0;
-            //                bp = bp;
-            //            }
+            // if (uvData[i].w != -1.0f && uvData[i].w != 1.0f) {
+            //     int bp = 0;
+            //     bp = bp;
+            // }
 
             uvData[i].w = -uvData[i].w;
         }
@@ -814,12 +805,11 @@ - (MTKMesh *)_createMeshAsset:(const char *)name
     float x, y, z;
 };
 
-
 - (void)releaseAllPendingTextures
 {
     @autoreleasepool {
         [_loader releaseAllPendingTextures];
-        
+
         // also release the model and cached textures in the renderer
         [self unloadModel];
     }
@@ -828,15 +818,15 @@ - (void)releaseAllPendingTextures
 - (void)updateAnimationState:(nonnull MTKView*)view
 {
     bool animateDisplay = self.playAnimations;
-    
+
     // animate the uvPreview until it reaches endPoint, no scrubber yet
     _showSettings->updateUVPreviewState();
-    
+
     if (_showSettings->uvPreviewFrames > 0) {
         _showSettings->uvPreviewFrames--;
         animateDisplay = true;
     }
-    
+
     view.enableSetNeedsDisplay = !animateDisplay;
     view.paused = !animateDisplay;
 }
@@ -847,15 +837,15 @@ - (void)setEyedropperDelegate:(nullable id)delegate
     _delegateHud = delegate;
 }
 
-- (void)updateModelSettings:(const string &)fullFilename
+- (void)updateModelSettings:(const string&)fullFilename
 {
     _showSettings->isModel = true;
     _showSettings->numChannels = 0; // hides rgba
-    
+
     // don't want any scale on view, or as little as possible
     _showSettings->imageBoundsX = 1;
     _showSettings->imageBoundsY = 1;
-    
+
     BOOL isNewFile = YES;
     [self resetSomeImageSettings:isNewFile];
 }
@@ -867,15 +857,13 @@ - (BOOL)loadModel:(nonnull const char*)filename
 
 #if USE_GLTF
 
-        
-    
-        // TODO: move to async version of this, many of these load slow
-        // but is there a way to cancel the load.  Or else move to cgltf which is faster.
-        // see GLTFKit2.
+    // TODO: move to async version of this, many of these load slow
+    // but is there a way to cancel the load.  Or else move to cgltf which is faster.
+    // see GLTFKit2.
 
 #define DO_ASYNC 0
 #if DO_ASYNC
-        // [GLTFAsset loadAssetWithURL:url bufferAllocator:_bufferAllocator delegate:self];
+    // [GLTFAsset loadAssetWithURL:url bufferAllocator:_bufferAllocator delegate:self];
 #else
     @autoreleasepool {
         GLTFAsset* newAsset = [[GLTFAsset alloc] initWithURL:fileURL bufferAllocator:_bufferAllocator];
@@ -905,15 +893,13 @@ - (void)unloadModel
 #endif
 }
 
-
-    
 - (void)_createMeshRect:(float)aspectRatioXToY
 {
     // This is a box that's smashed down to a thin 2d z plane, can get very close to it
     // due to the thinness of the volume without nearZ intersect
-    
+
     /// Load assets into metal objects
-    MDLMesh *mdlMesh;
+    MDLMesh* mdlMesh;
 
     mdlMesh = [MDLMesh newBoxWithDimensions:(vector_float3){aspectRatioXToY, 1, 0.001}
                                    segments:(vector_uint3){1, 1, 1}
@@ -923,7 +909,7 @@ - (void)_createMeshRect:(float)aspectRatioXToY
 
     // for some reason normals are all n = 1,0,0 which doesn't make sense on a box
     // for the side that is being viewed.
-    
+
     // only one of these for now, but really should store per image
     _meshRect = [self _createMeshAsset:"MeshRect" mdlMesh:mdlMesh doFlipUV:false];
 }
@@ -968,19 +954,19 @@ - (void)_loadAssets
         id<MDLMeshBuffer> posBuffer =
             mdlMesh.vertexBuffers[BufferIndexMeshPosition];
         MDLMeshBufferMap* posMap = [posBuffer map];
-        packed_float3* posData = (packed_float3 *)posMap.bytes;
+        packed_float3* posData = (packed_float3*)posMap.bytes;
 
         id<MDLMeshBuffer> normalBuffer =
             mdlMesh.vertexBuffers[BufferIndexMeshNormal];
         MDLMeshBufferMap* normalsMap = [normalBuffer map];
-        packed_float3* normalData = (packed_float3 *)normalsMap.bytes;
+        packed_float3* normalData = (packed_float3*)normalsMap.bytes;
 
         // vertexCount reports 306, but vertex 289+ are garbage
-        uint32_t numVertices = 289;  // mdlMesh.vertexCount
+        uint32_t numVertices = 289; // mdlMesh.vertexCount
 
         for (uint32_t i = 0; i < numVertices; ++i) {
             {
-                auto &pos = posData[i];
+                auto& pos = posData[i];
 
                 // dumb rotate about Y-axis
                 auto copy = pos;
@@ -990,7 +976,7 @@ - (void)_loadAssets
             }
 
             {
-                auto &normal = normalData[i];
+                auto& normal = normalData[i];
                 auto copy = normal;
                 normal.x = copy.x * cosSin.x - copy.z * cosSin.y;
                 normal.z = copy.x * cosSin.y + copy.z * cosSin.x;
@@ -999,7 +985,7 @@ - (void)_loadAssets
 
         // Hack - knock out all bogus tris from ModelIO that lead to garbage tris
         for (uint32_t i = numVertices; i < mdlMesh.vertexCount; ++i) {
-            auto &pos = posData[i];
+            auto& pos = posData[i];
             pos.x = NAN;
         }
     }
@@ -1026,26 +1012,26 @@ - (void)_loadAssets
 
         id<MDLMeshBuffer> uvsBuffer = mdlMesh.vertexBuffers[BufferIndexMeshUV0];
         MDLMeshBufferMap* uvsMap = [uvsBuffer map];
-        packed_float2* uvData = (packed_float2 *)uvsMap.bytes;
+        packed_float2* uvData = (packed_float2*)uvsMap.bytes;
 
         // this is all aos
 
         id<MDLMeshBuffer> posBuffer =
             mdlMesh.vertexBuffers[BufferIndexMeshPosition];
         MDLMeshBufferMap* posMap = [posBuffer map];
-        packed_float3 *posData = (packed_float3 *)posMap.bytes;
+        packed_float3* posData = (packed_float3*)posMap.bytes;
 
         id<MDLMeshBuffer> normalsBuffe =
             mdlMesh.vertexBuffers[BufferIndexMeshNormal];
         MDLMeshBufferMap* normalsMap = [normalsBuffe map];
-        packed_float3* normalData = (packed_float3 *)normalsMap.bytes;
+        packed_float3* normalData = (packed_float3*)normalsMap.bytes;
 
         // vertexCount reports 306, but vertex 289+ are garbage
-        uint32_t numVertices = 289;  // mdlMesh.vertexCount
+        uint32_t numVertices = 289; // mdlMesh.vertexCount
 
         for (uint32_t i = 0; i < numVertices; ++i) {
             {
-                auto &pos = posData[i];
+                auto& pos = posData[i];
 
                 // dumb rotate about Y-axis
                 auto copy = pos;
@@ -1054,18 +1040,18 @@ - (void)_loadAssets
             }
 
             {
-                auto &normal = normalData[i];
+                auto& normal = normalData[i];
                 auto copy = normal;
                 normal.x = copy.x * cosSin.x - copy.z * cosSin.y;
                 normal.z = copy.x * cosSin.y + copy.z * cosSin.x;
             }
 
-            auto &uv = uvData[i];
+            auto& uv = uvData[i];
 
-            //            if (uv.x < 0.0 || uv.x > 1.0) {
-            //                int bp = 0;
-            //                bp = bp;
-            //            }
+            // if (uv.x < 0.0 || uv.x > 1.0) {
+            //     int bp = 0;
+            //     bp = bp;
+            // }
 
             // this makes it counterclockwise 0 to 1
             float x = uv.x;
@@ -1089,7 +1075,7 @@ - (void)_loadAssets
 
         // Hack - knock out all bogus tris from ModelIO that lead to garbage tris
         for (uint32_t i = numVertices; i < mdlMesh.vertexCount; ++i) {
-            auto &pos = posData[i];
+            auto& pos = posData[i];
             pos.x = NAN;
         }
 
@@ -1116,7 +1102,7 @@ - (void)_loadAssets
     //    doFlipUV:true];
 
     mdlMesh = [MDLMesh newCapsuleWithHeight:1.0
-                                      radii:(vector_float2){1.0f/3.0f, 1.0f/3.0f} // circle
+                                      radii:(vector_float2){1.0f / 3.0f, 1.0f / 3.0f} // circle
                              // vertical cap subtracted from height
                              radialSegments:16
                            verticalSegments:1
@@ -1134,7 +1120,8 @@ - (void)_loadAssets
 }
 
 // this aliases the existing string, so can't chop extension
-inline const char* toFilenameShort(const char* filename) {
+inline const char* toFilenameShort(const char* filename)
+{
     const char* filenameShort = strrchr(filename, '/');
     if (filenameShort == nullptr) {
         filenameShort = filename;
@@ -1145,22 +1132,21 @@ - (void)_loadAssets
     return filenameShort;
 }
 
-
-- (BOOL)loadTextureFromImage:(nonnull const char *)fullFilenameString
+- (BOOL)loadTextureFromImage:(nonnull const char*)fullFilenameString
                    timestamp:(double)timestamp
-                       image:(kram::KTXImage &)image
-                 imageNormal:(nullable kram::KTXImage *)imageNormal
-                   imageDiff:(nullable kram::KTXImage *)imageDiff
+                       image:(kram::KTXImage&)image
+                 imageNormal:(nullable kram::KTXImage*)imageNormal
+                   imageDiff:(nullable kram::KTXImage*)imageDiff
                    isArchive:(BOOL)isArchive
 {
     // image can be decoded to rgba8u if platform can't display format natively
     // but still want to identify blockSize from original format
     string fullFilename = fullFilenameString;
     const char* filenameShort = toFilenameShort(fullFilename.c_str());
-    
+
     bool isTextureNew = _showSettings->isFileNew(fullFilename.c_str());
     bool isTextureChanged = _showSettings->isFileChanged(fullFilename.c_str(), timestamp);
-    
+
     if (isTextureChanged) {
         // synchronously cpu upload from ktx file to buffer, with eventual gpu blit
         // from buffer to returned texture.  TODO: If buffer is full, then something
@@ -1174,13 +1160,13 @@ - (BOOL)loadTextureFromImage:(nonnull const char *)fullFilenameString
         if (!texture) {
             return NO;
         }
-        
+
         bool isPNG = isPNGFilename(fullFilename.c_str());
-        
+
         // to be able to turn on/off srgb, need to set a view
         id<MTLTexture> textureView;
         MyMTLPixelFormat textureFormat = (MyMTLPixelFormat)image.pixelFormat;
-        
+
         // TODO: may only want to offer on png files, where format is
         MyMTLPixelFormat viewFormat = textureFormat;
         if (isPNG) // && isSrgbFormat(textureFormat))
@@ -1191,7 +1177,7 @@ - (BOOL)loadTextureFromImage:(nonnull const char *)fullFilenameString
         else {
             // This may fail.
             textureView = [texture newTextureViewWithPixelFormat:(MTLPixelFormat)viewFormat];
-            
+
             textureView.label = [texture.label stringByAppendingString:@"View"];
         }
 
@@ -1211,13 +1197,13 @@ - (BOOL)loadTextureFromImage:(nonnull const char *)fullFilenameString
         if (imageDiff) {
             // Note: this name may not be the same name
             diffTexture = [_loader loadTextureFromImage:*imageDiff
-                                           originalFormat:nil
-                                                     name:filenameShort];
+                                         originalFormat:nil
+                                                   name:filenameShort];
             if (!diffTexture) {
                 return NO;
             }
         }
-        
+
         // if archive contained png, then it's been converted to ktx
         // so the info below may not reflect original data
         // Would need original png data to look at header
@@ -1246,13 +1232,13 @@ - (BOOL)loadTextureFromImage:(nonnull const char *)fullFilenameString
             _colorMapView = textureView;
             _normalMap = normalTexture;
             _diffMap = diffTexture;
-            
+
             self.hasToggleView = _colorMapView != nil;
         }
 
         // this is the actual format, may have been decoded
         MyMTLPixelFormat format = (MyMTLPixelFormat)_colorMap.pixelFormat;
-       _data->updateImageSettings(fullFilename, image, format);
+        _data->updateImageSettings(fullFilename, image, format);
     }
 
     [self resetSomeImageSettings:isTextureNew];
@@ -1260,7 +1246,7 @@ - (BOOL)loadTextureFromImage:(nonnull const char *)fullFilenameString
     return YES;
 }
 
-- (BOOL)loadTexture:(nonnull NSURL *)url
+- (BOOL)loadTexture:(nonnull NSURL*)url
 {
     string fullFilename = url.path.UTF8String;
 
@@ -1273,10 +1259,10 @@ - (BOOL)loadTexture:(nonnull NSURL *)url
 
     // DONE: tie this to url and modstamp differences
     double timestamp = fileDate.timeIntervalSince1970;
-    
+
     bool isTextureNew = _showSettings->isFileNew(fullFilename.c_str());
     bool isTextureChanged = _showSettings->isFileChanged(fullFilename.c_str(), timestamp);
-    
+
     // image can be decoded to rgba8u if platform can't display format natively
     // but still want to identify blockSize from original format
     if (isTextureChanged) {
@@ -1289,7 +1275,7 @@ - (BOOL)loadTexture:(nonnull NSURL *)url
         }
 
         const char* filenameShort = toFilenameShort(fullFilename.c_str());
-        
+
         MTLPixelFormat originalFormatMTL = MTLPixelFormatInvalid;
         id<MTLTexture> texture = [_loader loadTextureFromImage:image
                                                 originalFormat:&originalFormatMTL
@@ -1299,11 +1285,11 @@ - (BOOL)loadTexture:(nonnull NSURL *)url
         }
 
         bool isPNG = isPNGFilename(fullFilename.c_str());
-        
+
         // to be able to turn on/off srgb, need to set a view
         id<MTLTexture> textureView;
         MyMTLPixelFormat textureFormat = (MyMTLPixelFormat)image.pixelFormat;
-        
+
         // DONE: may only want to offer on png files, where format is
         MyMTLPixelFormat viewFormat = textureFormat;
         if (isPNG) // && isSrgbFormat(textureFormat))
@@ -1314,10 +1300,10 @@ - (BOOL)loadTexture:(nonnull NSURL *)url
         else {
             // This may fail.
             textureView = [texture newTextureViewWithPixelFormat:(MTLPixelFormat)viewFormat];
-            
+
             textureView.label = [texture.label stringByAppendingString:@"View"];
         }
-        
+
         // This doesn't look for or load corresponding normal map, but should
 
         // this is not the png data, but info on converted png to ktx level
@@ -1343,13 +1329,13 @@ - (BOOL)loadTexture:(nonnull NSURL *)url
         // TODO: should archive work with diff?
         id<MTLTexture> diffTexture = nil;
         _showSettings->hasDiffTexture = diffTexture != nil;
-        
+
         @autoreleasepool {
             _colorMap = texture;
             _colorMapView = textureView;
             _normalMap = nil;
             _diffMap = nil;
-            
+
             self.hasToggleView = _colorMapView != nil;
         }
 
@@ -1362,44 +1348,41 @@ - (BOOL)loadTexture:(nonnull NSURL *)url
     return YES;
 }
 
-
 - (void)resetSomeImageSettings:(BOOL)isNewFile
 {
     _data->resetSomeImageSettings(isNewFile);
-    
+
     // the rect is ar:1 for images
     float aspectRatioXtoY = _showSettings->imageAspectRatio();
     [self _createMeshRect:aspectRatioXtoY];
 }
 
-
-
 - (void)_updateGameState
 {
     /// Update any game state before encoding rendering commands to our drawable
-    
-    Uniforms &uniforms =
-    *(Uniforms *)_dynamicUniformBuffer[_uniformBufferIndex].contents;
-    
+
+    Uniforms& uniforms =
+        *(Uniforms*)_dynamicUniformBuffer[_uniformBufferIndex].contents;
+
     uniforms.isNormal = _showSettings->texContentType == TexContentTypeNormal;
     uniforms.doShaderPremul = _showSettings->doShaderPremul;
     uniforms.isSrgbInput = _showSettings->isSRGBShown && isSrgbFormat(_showSettings->originalFormat);
     uniforms.isSigned = _showSettings->isSigned;
     uniforms.isSwizzleAGToRG = _showSettings->isSwizzleAGToRG;
-    
+
     uniforms.isSDF = _showSettings->texContentType == TexContentTypeSDF;
     uniforms.numChannels = _showSettings->numChannels;
     uniforms.lightingMode = (ShaderLightingMode)_showSettings->lightingMode;
-    
+
     MyMTLTextureType textureType = MyMTLTextureType2D;
     MyMTLPixelFormat textureFormat = MyMTLPixelFormatInvalid;
     if (_colorMap) {
         textureType = (MyMTLTextureType)_colorMap.textureType;
         textureFormat = (MyMTLPixelFormat)_colorMap.pixelFormat;
     }
-    
+
     uniforms.isCheckerboardShown = _showSettings->isCheckerboardShown;
-    
+
     // addressing mode
     bool isCube = (textureType == MyMTLTextureTypeCube ||
                    textureType == MyMTLTextureTypeCubeArray);
@@ -1407,27 +1390,27 @@ - (void)_updateGameState
     bool doEdge = !doWrap;
     bool doZero = !doEdge;
     uniforms.isWrap = doWrap ? _showSettings->isWrap : false;
-    
+
     uniforms.isPreview = _showSettings->isPreview;
     uniforms.isDiff = _showSettings->isDiff;
-    
+
     uniforms.isNormalMapPreview = false;
     if (uniforms.isPreview) {
         uniforms.isNormalMapPreview = uniforms.isNormal || (_normalMap != nil);
-        
+
         if (_normalMap != nil) {
             uniforms.isNormalMapSigned =
-            isSignedFormat((MyMTLPixelFormat)_normalMap.pixelFormat);
-            uniforms.isNormalMapSwizzleAGToRG = false;  // TODO: need a prop for this
+                isSignedFormat((MyMTLPixelFormat)_normalMap.pixelFormat);
+            uniforms.isNormalMapSwizzleAGToRG = false; // TODO: need a prop for this
         }
     }
-    
+
     // a few things to fix before enabling this
     uniforms.useTangent = _showSettings->useTangent;
-    
+
     uniforms.gridX = 0;
     uniforms.gridY = 0;
-    
+
     if (_showSettings->isPixelGridShown) {
         uniforms.gridX = 1;
         uniforms.gridY = 1;
@@ -1442,19 +1425,19 @@ - (void)_updateGameState
         uniforms.gridX = _showSettings->gridSizeX;
         uniforms.gridY = _showSettings->gridSizeY;
     }
-    
+
     // no debug mode when preview kicks on, make it possible to toggle back and
     // forth more easily
     uniforms.debugMode = (ShaderDebugMode)_showSettings->debugMode;
     uniforms.shapeChannel = (ShaderShapeChannel)_showSettings->shapeChannel;
     uniforms.channels = (ShaderTextureChannels)_showSettings->channels;
-    
+
     // turn these off in preview mode, but they may be useful?
     if (_showSettings->isPreview) {
         uniforms.debugMode = ShaderDebugMode::ShDebugModeNone;
         uniforms.shapeChannel = ShaderShapeChannel::ShShapeChannelNone;
     }
-    
+
     // crude shape experiment
     _showSettings->is3DView = true;
     switch (_showSettings->meshNumber) {
@@ -1477,22 +1460,22 @@ - (void)_updateGameState
             break;
     }
     uniforms.is3DView = _showSettings->is3DView;
-    
+
     // on small textures can really see missing pixel (3 instead of 4 pixels)
     // so only do this on the sphere/capsule which wrap-around uv space
     uniforms.isInsetByHalfPixel = false;
     if (_showSettings->meshNumber >= 2 && doZero) {
         uniforms.isInsetByHalfPixel = true;
     }
-    
+
     _data->updateTransforms();
-    
+
     // this is an animated effect, that overlays the shape uv wires over the image
     uniforms.isUVPreview = _showSettings->uvPreview > 0.0;
     uniforms.uvPreview = _showSettings->uvPreview;
-    
+
     uniforms.uvToShapeRatio = 1.0f;
-    switch(_showSettings->meshNumber) {
+    switch (_showSettings->meshNumber) {
         case 0:
             if (_showSettings->imageBoundsY)
                 uniforms.uvToShapeRatio = _showSettings->imageBoundsX / (float)_showSettings->imageBoundsY;
@@ -1506,7 +1489,7 @@ - (void)_updateGameState
     }
     uniforms.projectionViewMatrix = _data->_projectionViewMatrix;
     uniforms.cameraPosition = _data->_cameraPosition;
-   
+
     // This is per object
     uniforms.modelMatrix = _data->_modelMatrix;
     uniforms.modelMatrixInvScale2 = _data->_modelMatrixInvScale2;
@@ -1514,7 +1497,7 @@ - (void)_updateGameState
     //_rotation += .01;
 }
 
-- (void)_setUniformsLevel:(UniformsLevel &)uniforms mipLOD:(int32_t)mipLOD
+- (void)_setUniformsLevel:(UniformsLevel&)uniforms mipLOD:(int32_t)mipLOD
 {
     uniforms.mipLOD = mipLOD;
 
@@ -1558,14 +1541,14 @@ - (void)_setUniformsLevel:(UniformsLevel &)uniforms mipLOD:(int32_t)mipLOD
     }
 }
 
-- (void)drawInMTKView:(nonnull MTKView *)view
+- (void)drawInMTKView:(nonnull MTKView*)view
 {
     @autoreleasepool {
         // Per frame updates here
 
         // update per frame state
         [self updateAnimationState:view];
-        
+
         // TODO: move this out, needs to get called off mouseMove, but don't want to
         // call drawMain
         [self drawSample];
@@ -1574,29 +1557,29 @@ - (void)drawInMTKView:(nonnull MTKView *)view
         Signpost postWait("waitOnSemaphore");
         dispatch_semaphore_wait(_inFlightSemaphore, DISPATCH_TIME_FOREVER);
         postWait.stop();
-        
+
         _uniformBufferIndex = (_uniformBufferIndex + 1) % MaxBuffersInFlight;
 
         id<MTLCommandBuffer> commandBuffer = [_commandQueue commandBuffer];
         commandBuffer.label = @"MyCommand";
 
         __block dispatch_semaphore_t block_sema = _inFlightSemaphore;
-        
-        #if USE_GLTF
-                GLTFMTLRenderer* gltfRenderer = _gltfRenderer;
-                [commandBuffer addCompletedHandler:^(id<MTLCommandBuffer> /* buffer */) {
-                    [gltfRenderer signalFrameCompletion];
-        
-                    // increment count
-                    dispatch_semaphore_signal(block_sema);
-                }];
-        
-        #else
-                [commandBuffer addCompletedHandler:^(id<MTLCommandBuffer> /* buffer */) {
-                    // increment count
-                    dispatch_semaphore_signal(block_sema);
-                }];
-        #endif
+
+#if USE_GLTF
+        GLTFMTLRenderer* gltfRenderer = _gltfRenderer;
+        [commandBuffer addCompletedHandler:^(id<MTLCommandBuffer> /* buffer */) {
+            [gltfRenderer signalFrameCompletion];
+
+            // increment count
+            dispatch_semaphore_signal(block_sema);
+        }];
+
+#else
+        [commandBuffer addCompletedHandler:^(id<MTLCommandBuffer> /* buffer */) {
+            // increment count
+            dispatch_semaphore_signal(block_sema);
+        }];
+#endif
 
         [self _updateGameState];
 
@@ -1618,14 +1601,14 @@ - (void)drawInMTKView:(nonnull MTKView *)view
         [self drawMain:commandBuffer
                   view:view];
         postDraw.stop();
-        
+
         // hold onto this for sampling from it via eyedropper
         id<CAMetalDrawable> drawable = view.currentDrawable;
         _lastDrawableTexture = drawable.texture;
 
         // These are equivalent
         // [commandBuffer presentDrawable:view.currentDrawable];
-        
+
         typeof(self) __weak weakSelf = self;
         [commandBuffer addScheduledHandler:^(id<MTLCommandBuffer> cmdBuf) {
             if (cmdBuf.error) return;
@@ -1639,28 +1622,31 @@ - (void)drawInMTKView:(nonnull MTKView *)view
             double gpuTime = cmdBuf.GPUEndTime - cmdBuf.GPUStartTime;
             [weakSelf _updateFramePacing:gpuTime];
         }];
-            
+
         [commandBuffer commit];
     }
 }
-    
-- (void)_present:(id<CAMetalDrawable>)drawable {
+
+- (void)_present:(id<CAMetalDrawable>)drawable
+{
     if (_useFramePacing)
         [drawable presentAfterMinimumDuration:_avgGpuTime];
     else
         [drawable present];
 }
 
-- (void)_updateFramePacing:(double)gpuTime {
+- (void)_updateFramePacing:(double)gpuTime
+{
     if (_useFramePacing) {
         _avgGpuTime = lerp(_avgGpuTime, gpuTime, 0.25);
     }
 }
 
-- (void)setFramePacingEnabled:(bool)enable {
+- (void)setFramePacingEnabled:(bool)enable
+{
     if (_useFramePacing != enable) {
         _useFramePacing = enable;
-        
+
         // this will get adjusted by updateFramePacing
         _avgGpuTime = 1.0 / 60.0;
     }
@@ -1668,32 +1654,32 @@ - (void)setFramePacingEnabled:(bool)enable {
 
 #if USE_GLTF
 
-static GLTFBoundingSphere GLTFBoundingSphereFromBox2(const GLTFBoundingBox b) {
+static GLTFBoundingSphere GLTFBoundingSphereFromBox2(const GLTFBoundingBox b)
+{
     GLTFBoundingSphere s;
     float3 center = 0.5f * (b.minPoint + b.maxPoint);
     float r = distance(b.maxPoint, center);
-    
+
     s.center = center;
     s.radius = r;
     return s;
 }
 #endif
 
-
 - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
-            view:(nonnull MTKView *)view
+            view:(nonnull MTKView*)view
 {
     // Delay getting the currentRenderPassDescriptor until absolutely needed. This
     // avoids
     //   holding onto the drawable and blocking the display pipeline any longer
     //   than necessary
     MTLRenderPassDescriptor* renderPassDescriptor = nil;
-    
+
     // This retrieval can take 20ms+ when gpu is busy
     Signpost post("nextDrawable");
     renderPassDescriptor = view.currentRenderPassDescriptor;
     post.stop();
-    
+
     if (renderPassDescriptor == nil) {
         return;
     }
@@ -1702,8 +1688,7 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
 #if USE_GLTF
         && _asset == nil
 #endif
-    )
-    {
+    ) {
         // this will clear target
         id<MTLRenderCommandEncoder> renderEncoder =
             [commandBuffer renderCommandEncoderWithDescriptor:renderPassDescriptor];
@@ -1719,9 +1704,8 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
 #if USE_GLTF
     {
         mylock lock(gModelLock);
-    
+
         if (_asset) {
-            
             // TODO: needs to be done in the render loop, since it must run compute
             // This runs compute to generate radiance/irradiance in mip levels
             // Also an equirect version for a 2d image
@@ -1730,14 +1714,13 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
                     [_gltfRenderer.lightingEnvironment generateFromCubeTexture:_environmentTexture commandBuffer:commandBuffer];
                 else
                     [_gltfRenderer.lightingEnvironment generateFromEquirectTexture:_environmentTexture commandBuffer:commandBuffer];
-                
+
                 _environmentNeedsUpdate = false;
             }
         }
     }
 #endif
 
-    
     // Final pass rendering code here
     id<MTLRenderCommandEncoder> renderEncoder =
         [commandBuffer renderCommandEncoderWithDescriptor:renderPassDescriptor];
@@ -1755,18 +1738,18 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
     [renderEncoder setDepthStencilState:_depthStateFull];
 
     bool drawShape = true;
-    
-    #if USE_GLTF
+
+#if USE_GLTF
     {
         mylock lock(gModelLock);
 
         if (_asset) {
             drawShape = false;
-    
+
             // update animations
             if (self.playAnimations) {
-                _animationTime += 1.0/60.0;
-    
+                _animationTime += 1.0 / 60.0;
+
                 NSTimeInterval maxAnimDuration = 0;
                 for (GLTFAnimation* animation in _asset.animations) {
                     for (GLTFAnimationChannel* channel in animation.channels) {
@@ -1775,20 +1758,20 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
                         }
                     }
                 }
-            
+
                 NSTimeInterval animTime = fmod(_animationTime, maxAnimDuration);
-    
+
                 for (GLTFAnimation* animation in _asset.animations) {
                     [animation runAtTime:animTime];
                 }
             }
-            
+
             // regularization scales the model to 1 unit dimension, may animate out of this box
             // just a scale to diameter 1, and translate back from center and viewer z
             GLTFBoundingSphere bounds = GLTFBoundingSphereFromBox2(_asset.defaultScene.approximateBounds);
             float invScale = (bounds.radius > 0) ? (0.5 / (bounds.radius)) : 1.0;
-            float4x4 centerScale = float4x4(float4m(invScale,invScale,invScale,1));
-            
+            float4x4 centerScale = float4x4(float4m(invScale, invScale, invScale, 1));
+
 #if USE_SIMDLIB
             float4x4 centerTranslation = float4x4::identity();
 #else
@@ -1796,37 +1779,37 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
 #endif
             centerTranslation.columns[3] = vector4(-bounds.center, 1.0f);
             float4x4 regularizationMatrix = centerScale * centerTranslation;
-    
+
             // incorporate the rotation now
-            Uniforms &uniforms =
-                *(Uniforms *)_dynamicUniformBuffer[_uniformBufferIndex].contents;
-    
+            Uniforms& uniforms =
+                *(Uniforms*)_dynamicUniformBuffer[_uniformBufferIndex].contents;
+
             regularizationMatrix = regularizationMatrix * uniforms.modelMatrix;
-    
+
             // TODO: be able to pass regularization to affect root of modelMatrix tree,
             // do not modify viewMatrix here since that messes with world space.
-    
+
             // set the view and projection matrix
             float4x4 m = _data->_viewMatrix * regularizationMatrix;
-            
+
             // TODO: offer conversions to simd/simd.h
             _gltfRenderer.viewMatrix = reinterpret_cast<const simd_float4x4&>(m);
             _gltfRenderer.projectionMatrix = reinterpret_cast<const simd_float4x4&>(_data->_projectionMatrix);
-    
-            RenderScope drawModelScope( renderEncoder, "DrawModel" );
+
+            RenderScope drawModelScope(renderEncoder, "DrawModel");
             [_gltfRenderer renderScene:_asset.defaultScene commandBuffer:commandBuffer commandEncoder:renderEncoder];
         }
     }
-    #endif
-    
+#endif
+
     if (drawShape) {
-        RenderScope drawShapeScope( renderEncoder, "DrawShape" );
-        
+        RenderScope drawShapeScope(renderEncoder, "DrawShape");
+
         // set the mesh shape
         for (NSUInteger bufferIndex = 0; bufferIndex < _mesh.vertexBuffers.count;
              bufferIndex++) {
-            MTKMeshBuffer *vertexBuffer = _mesh.vertexBuffers[bufferIndex];
-            if ((NSNull *)vertexBuffer != [NSNull null]) {
+            MTKMeshBuffer* vertexBuffer = _mesh.vertexBuffers[bufferIndex];
+            if ((NSNull*)vertexBuffer != [NSNull null]) {
                 [renderEncoder setVertexBuffer:vertexBuffer.buffer
                                         offset:vertexBuffer.offset
                                        atIndex:bufferIndex];
@@ -1900,7 +1883,7 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
             id<MTLTexture> tex = _colorMap;
             if (self.isToggleView && _colorMap && _colorMapView)
                 tex = _colorMapView;
-            
+
             // set the texture up
             [renderEncoder setFragmentTexture:tex atIndex:TextureIndexColor];
 
@@ -1908,7 +1891,7 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
             if (_normalMap && _showSettings->isPreview) {
                 [renderEncoder setFragmentTexture:_normalMap atIndex:TextureIndexNormal];
             }
-            
+
             if (_diffMap && _showSettings->isDiff) {
                 [renderEncoder setFragmentTexture:_diffMap atIndex:TextureIndexDiff];
             }
@@ -1916,7 +1899,7 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
             UniformsLevel uniformsLevel;
             uniformsLevel.drawOffset = float2m(0.0f);
             uniformsLevel.passNumber = kPassDefault;
-            
+
             if (_showSettings->isPreview) {
                 // upload this on each face drawn, since want to be able to draw all
                 // mips/levels at once
@@ -1950,14 +1933,14 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
                 // by the zoom
                 int32_t gap =
                     _showSettings
-                        ->showAllPixelGap;  // * _showSettings->viewContentScaleFactor;
+                        ->showAllPixelGap; // * _showSettings->viewContentScaleFactor;
 
                 for (int32_t mip = 0; mip < _showSettings->mipCount; ++mip) {
                     // upload this on each face drawn, since want to be able to draw all
                     // mips/levels at once
-                    
+
                     [self _setUniformsLevel:uniformsLevel mipLOD:mip];
-                    
+
                     if (mip == 0) {
                         uniformsLevel.drawOffset.y = 0.0f;
                     }
@@ -1965,13 +1948,13 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
                         // all mips draw at top mip size currently
                         uniformsLevel.drawOffset.y -= h + gap;
                     }
-                    
+
                     // this its ktxImage.totalChunks()
                     int32_t numLevels = _showSettings->totalChunks();
-                    
+
                     for (int32_t level = 0; level < numLevels; ++level) {
-                        RenderScope drawLevelScope( renderEncoder, "DrawLevel" );
-                        
+                        RenderScope drawLevelScope(renderEncoder, "DrawLevel");
+
                         if (isCube) {
                             uniformsLevel.face = level % 6;
                             uniformsLevel.arrayOrSlice = level / 6;
@@ -1979,7 +1962,7 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
                         else {
                             uniformsLevel.arrayOrSlice = level;
                         }
-                        
+
                         // advance x across faces/slices/array elements, 1d array and 2d thin
                         // array are weird though.
                         if (level == 0) {
@@ -1988,25 +1971,25 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
                         else {
                             uniformsLevel.drawOffset.x += w + gap;
                         }
-                        
+
                         [renderEncoder setVertexBytes:&uniformsLevel
                                                length:sizeof(uniformsLevel)
                                               atIndex:BufferIndexUniformsLevel];
-                        
+
                         [renderEncoder setFragmentBytes:&uniformsLevel
                                                  length:sizeof(uniformsLevel)
                                                 atIndex:BufferIndexUniformsLevel];
-                        
+
                         // force lod, and don't mip
                         [renderEncoder setFragmentSamplerState:sampler
                                                    lodMinClamp:mip
                                                    lodMaxClamp:mip + 1
                                                        atIndex:SamplerIndexColor];
-                        
+
                         // TODO: since this isn't a preview, have mode to display all faces
                         // and mips on on screen faces and arrays and slices go across in a
                         // row, and mips are displayed down from each of those in a column
-                        
+
                         for (MTKSubmesh* submesh in _mesh.submeshes) {
                             [renderEncoder drawIndexedPrimitives:submesh.primitiveType
                                                       indexCount:submesh.indexCount
@@ -2016,11 +1999,11 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
                         }
                     }
                 }
-                
+
                 for (int32_t mip = 0; mip < _showSettings->mipCount; ++mip) {
                     // upload this on each face drawn, since want to be able to draw all
                     // mips/levels at once
-                    
+
                     [self _setUniformsLevel:uniformsLevel mipLOD:mip];
 
                     if (mip == 0) {
@@ -2042,7 +2025,7 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
                         else {
                             uniformsLevel.arrayOrSlice = level;
                         }
-                        
+
                         // advance x across faces/slices/array elements, 1d array and 2d thin
                         // array are weird though.
                         if (level == 0) {
@@ -2051,21 +2034,21 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
                         else {
                             uniformsLevel.drawOffset.x += w + gap;
                         }
-                        
+
                         [renderEncoder setVertexBytes:&uniformsLevel
                                                length:sizeof(uniformsLevel)
                                               atIndex:BufferIndexUniformsLevel];
-                        
-//                        [renderEncoder setFragmentBytes:&uniformsLevel
-//                                                 length:sizeof(uniformsLevel)
-//                                                atIndex:BufferIndexUniformsLevel];
-                        
+
+                        // [renderEncoder setFragmentBytes:&uniformsLevel
+                        //                          length:sizeof(uniformsLevel)
+                        //                         atIndex:BufferIndexUniformsLevel];
+
                         // force lod, and don't mip
-//                        [renderEncoder setFragmentSamplerState:sampler
-//                                                   lodMinClamp:mip
-//                                                   lodMaxClamp:mip + 1
-//                                                       atIndex:SamplerIndexColor];
-//                        
+                        // [renderEncoder setFragmentSamplerState:sampler
+                        //                            lodMinClamp:mip
+                        //                            lodMaxClamp:mip + 1
+                        //                                atIndex:SamplerIndexColor];
+                        //
                         [self drawAtlas:renderEncoder];
                     }
                 }
@@ -2102,23 +2085,23 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
                                              indexBuffer:submesh.indexBuffer.buffer
                                        indexBufferOffset:submesh.indexBuffer.offset];
                 }
-                
+
                 // Draw uv wire overlay
                 if (_showSettings->is3DView && _showSettings->uvPreview > 0.0) {
                     // need to force color in shader or it's still sampling texture
                     // also need to add z offset
-                    
-                    RenderScope drawUVPreviewScope( renderEncoder, "DrawUVPreview" );
-                    
+
+                    RenderScope drawUVPreviewScope(renderEncoder, "DrawUVPreview");
+
                     [renderEncoder setTriangleFillMode:MTLTriangleFillModeLines];
-                    
+
                     // only applies to tris, not points/lines, pushes depth away (towards 0), after clip
                     // affects reads/tests and writes.  Could also add in vertex shader.
                     // depthBias * 2^(exp(max abs(z) in primitive) - r) + slopeScale * maxSlope
-                    [renderEncoder setDepthBias:0.015 slopeScale:3.0 clamp: 0.02];
-                    
+                    [renderEncoder setDepthBias:0.015 slopeScale:3.0 clamp:0.02];
+
                     uniformsLevel.passNumber = kPassUVPreview;
-                    
+
                     [renderEncoder setVertexBytes:&uniformsLevel
                                            length:sizeof(uniformsLevel)
                                           atIndex:BufferIndexUniformsLevel];
@@ -2134,15 +2117,14 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
                                                  indexBuffer:submesh.indexBuffer.buffer
                                            indexBufferOffset:submesh.indexBuffer.offset];
                     }
-                    
+
                     uniformsLevel.passNumber = kPassDefault;
-                    
+
                     // restore state, even though this isn't a true state shadow
                     [renderEncoder setDepthBias:0.0 slopeScale:0.0 clamp:0.0];
                     [renderEncoder setTriangleFillMode:MTLTriangleFillModeFill];
-                    
                 }
-                
+
                 [self drawAtlas:renderEncoder];
             }
         }
@@ -2154,16 +2136,15 @@ - (void)drawMain:(id<MTLCommandBuffer>)commandBuffer
     // TODO: environment map preview should be done as fsq
 }
 
-class RenderScope
-{
+class RenderScope {
 public:
     RenderScope(id encoder_, const char* name)
         : encoder(encoder_)
     {
         id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder;
-        [enc pushDebugGroup: [NSString stringWithUTF8String: name]];
+        [enc pushDebugGroup:[NSString stringWithUTF8String:name]];
     }
-    
+
     void close()
     {
         if (encoder) {
@@ -2172,51 +2153,52 @@ void close()
             encoder = nil;
         }
     }
-    
+
     ~RenderScope()
     {
         close();
     }
+
 private:
     id encoder;
 };
 
-- (void)drawAtlas:(nonnull id<MTLRenderCommandEncoder>)renderEncoder {
+- (void)drawAtlas:(nonnull id<MTLRenderCommandEncoder>)renderEncoder
+{
     // draw last since this changes pipeline state
     if (_showSettings->is3DView && _showSettings->atlas.empty())
         return;
-    
+
     //if (!_showSettings->drawAtlas)
     //    return;
-        
-    RenderScope drawAtlasScope( renderEncoder, "DrawAtlas" );
-    
+
+    RenderScope drawAtlasScope(renderEncoder, "DrawAtlas");
+
     [renderEncoder setTriangleFillMode:MTLTriangleFillModeLines];
-    [renderEncoder setDepthBias:5.0 slopeScale:0.0 clamp: 0.0];
+    [renderEncoder setDepthBias:5.0 slopeScale:0.0 clamp:0.0];
     [renderEncoder setCullMode:MTLCullModeNone];
-    
+
     [renderEncoder setRenderPipelineState:_pipelineStateDrawLines];
-    
+
     // TODO: draw line strip with prim reset
     // need atlas data in push constants or in vb
-    
+
     // TOOO: also need to hover name or show names on canvas
-    
-//                    [renderEncoder setVertexBytes:&uniformsLevel
-//                                           length:sizeof(uniformsLevel)
-//                                          atIndex:BufferIndexUniformsLevel];
+
+    // [renderEncoder setVertexBytes:&uniformsLevel
+    //                        length:sizeof(uniformsLevel)
+    //                       atIndex:BufferIndexUniformsLevel];
 
     UniformsDebug uniformsDebug;
-    
-    for (const Atlas& atlas: _showSettings->atlas) {
+
+    for (const Atlas& atlas : _showSettings->atlas) {
         // not accounting for slice
         uniformsDebug.rect = float4m(atlas.x, atlas.y, atlas.w, atlas.h);
-        
-        
+
         [renderEncoder setVertexBytes:&uniformsDebug
                                length:sizeof(uniformsDebug)
                               atIndex:BufferIndexUniformsDebug];
-        
+
         // this will draw diagonal
         for (MTKSubmesh* submesh in _mesh.submeshes) {
             [renderEncoder drawIndexedPrimitives:submesh.primitiveType
@@ -2226,7 +2208,7 @@ - (void)drawAtlas:(nonnull id<MTLRenderCommandEncoder>)renderEncoder {
                                indexBufferOffset:submesh.indexBuffer.offset];
         }
     }
-    
+
     // restore state, even though this isn't a true state shadow
     [renderEncoder setCullMode:MTLCullModeBack];
     [renderEncoder setDepthBias:0.0 slopeScale:0.0 clamp:0.0];
@@ -2253,7 +2235,7 @@ - (void)drawSample
     // this reads directly from compressed texture via a compute shader
     int32_t textureLookupX = _showSettings->textureLookupX;
     int32_t textureLookupY = _showSettings->textureLookupY;
-    
+
     bool isDrawableBlit = _showSettings->isEyedropperFromDrawable();
 
     // TODO: only don't blit for plane + no debug or shape
@@ -2319,14 +2301,14 @@ - (void)drawSample
 
         // copy from texture back to CPU, might be easier using MTLBuffer.contents
         MTLRegion region = {
-            {0, 0, 0},  // MTLOrigin
-            {1, 1, 1}   // MTLSize
+            {0, 0, 0}, // MTLOrigin
+            {1, 1, 1} // MTLSize
         };
 
         if (isDrawableBlit) {
             half4 data16f;
             [texture getBytes:&data16f bytesPerRow:8 fromRegion:region mipmapLevel:0];
-            
+
             data = float4m(data16f);
         }
         else {
@@ -2340,13 +2322,13 @@ - (void)drawSample
             self->_showSettings->textureResult = data;
             self->_showSettings->textureResultX = textureLookupX;
             self->_showSettings->textureResultY = textureLookupY;
-            
+
             [self->_delegateHud updateEyedropperText];
         });
-        
+
         // TODO: This completed handler runs long after the hud has updated
         // so need to invalidate the hud.  So the pixel location is out of date.
-        
+
         // printf("Color %f %f %f %f\n", data.x, data.y, data.z, data.w);
     }];
 
@@ -2365,8 +2347,8 @@ - (void)drawSamples:(id<MTLCommandBuffer>)commandBuffer
 
     renderEncoder.label = @"SampleCompute";
 
-    RenderScope drawShapeScope( renderEncoder, "DrawShape" );
-    
+    RenderScope drawShapeScope(renderEncoder, "DrawShape");
+
     UniformsCS uniforms;
     uniforms.uv.x = lookupX;
     uniforms.uv.y = lookupY;
@@ -2409,7 +2391,7 @@ - (void)drawSamples:(id<MTLCommandBuffer>)commandBuffer
     id<MTLTexture> tex = _colorMap;
     if (self.isToggleView && _colorMap && _colorMapView)
         tex = _colorMapView;
-    
+
     // input and output texture
     [renderEncoder setTexture:tex
                       atIndex:TextureIndexColor];
@@ -2427,7 +2409,7 @@ - (void)drawSamples:(id<MTLCommandBuffer>)commandBuffer
     [renderEncoder endEncoding];
 }
 
-- (void)mtkView:(nonnull MTKView *)view drawableSizeWillChange:(CGSize)size
+- (void)mtkView:(nonnull MTKView*)view drawableSizeWillChange:(CGSize)size
 {
     // Don't crashing trying to readback from the cached drawable during a resize.
     _lastDrawableTexture = nil;
@@ -2445,49 +2427,46 @@ - (void)mtkView:(nonnull MTKView *)view drawableSizeWillChange:(CGSize)size
     _showSettings->viewContentScaleFactor = framebufferScale;
 
     _data->updateProjTransform();
-    
+
 #if USE_GLTF
     _gltfRenderer.drawableSize = size;
     _gltfRenderer.colorPixelFormat = view.colorPixelFormat;
     _gltfRenderer.depthStencilPixelFormat = view.depthStencilPixelFormat;
 #endif
-    
+
     _data->updateProjTransform();
 }
 
 #if USE_GLTF
 // @protocol GLTFAssetLoadingDelegate
-- (void)assetWithURL:(NSURL *)assetURL requiresContentsOfURL:(NSURL *)url completionHandler:(void (^)(NSData *_Nullable, NSError *_Nullable))completionHandler
+- (void)assetWithURL:(NSURL*)assetURL requiresContentsOfURL:(NSURL*)url completionHandler:(void (^)(NSData* _Nullable, NSError* _Nullable))completionHandler
 {
     // This can handle remote assets
-    NSURLSessionDataTask *task = [_urlSession dataTaskWithURL:url
-                                                        completionHandler:^(NSData *data, NSURLResponse *response, NSError *error)
-    {
-        completionHandler(data, error);
-    }];
-    
+    NSURLSessionDataTask* task = [_urlSession dataTaskWithURL:url
+                                            completionHandler:^(NSData* data, NSURLResponse* response, NSError* error) {
+                                                completionHandler(data, error);
+                                            }];
+
     [task resume];
 }
 
-- (void)assetWithURL:(NSURL *)assetURL didFinishLoading:(GLTFAsset *)asset
+- (void)assetWithURL:(NSURL*)assetURL didFinishLoading:(GLTFAsset*)asset
 {
     mylock lock(gModelLock);
-    
+
     _asset = asset;
-    
+
     _animationTime = 0.0;
-    
+
     string fullFilename = assetURL.path.UTF8String;
     [self updateModelSettings:fullFilename];
 }
 
-- (void)assetWithURL:(NSURL *)assetURL didFailToLoadWithError:(NSError *)error;
+- (void)assetWithURL:(NSURL*)assetURL didFailToLoadWithError:(NSError*)error;
 {
     // TODO: display this error to the user
     KLOGE("Renderer", "Asset load failed with error: %s", [[error localizedDescription] UTF8String]);
 }
 #endif
 
-
-
 @end
diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index 5dd569b9..47daab45 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -26,14 +26,14 @@ using namespace STL_NAMESPACE;
 
 // Writing out to rgba32 for sampling, but unorm formats like ASTC and RGBA8
 // are still off and need to use the following.
-float  toSnorm8(float c)  { return (255.0f / 127.0f) * c - (128.0f / 127.0f); }
+float toSnorm8(float c) { return (255.0f / 127.0f) * c - (128.0f / 127.0f); }
 float2 toSnorm8(float2 c) { return (255.0f / 127.0f) * c - (128.0f / 127.0f); }
 float3 toSnorm8(float3 c) { return (255.0f / 127.0f) * c - (128.0f / 127.0f); }
 float4 toSnorm8(float4 c) { return (255.0f / 127.0f) * c - (128.0f / 127.0f); }
 
-float4 toSnorm(float4 c)  { return 2.0f * c - 1.0f; }
+float4 toSnorm(float4 c) { return 2.0f * c - 1.0f; }
 
-inline float4 toPremul(const float4 &c)
+inline float4 toPremul(const float4& c)
 {
     // premul with a
     float4 cpremul = c;
@@ -48,28 +48,28 @@ inline bool almost_equal_elements(float3 v, float tol)
     return (fabs(v.x - v.y) < tol) && (fabs(v.x - v.z) < tol);
 }
 
-inline const float3x3& toFloat3x3(const float4x4 &m) { return (const float3x3 &)m; }
+inline const float3x3& toFloat3x3(const float4x4& m) { return (const float3x3&)m; }
 
-float4 inverseScaleSquared(const float4x4 &m)
+float4 inverseScaleSquared(const float4x4& m)
 {
     float3 scaleSquared = float3m(length_squared(m.columns[0].xyz),
                                   length_squared(m.columns[1].xyz),
                                   length_squared(m.columns[2].xyz));
-    
+
     // if uniform, then set scaleSquared all to 1
     if (almost_equal_elements(scaleSquared, 1e-5f)) {
         scaleSquared = float3m(1.0f);
     }
-    
+
     // don't divide by 0
     float3 invScaleSquared =
-    recip(SIMD_NAMESPACE::max(float3m(0.0001 * 0.0001), scaleSquared));
-    
+        recip(SIMD_NAMESPACE::max(float3m(0.0001 * 0.0001), scaleSquared));
+
     // identify determinant here for flipping orientation
     // all shapes with negative determinant need orientation flipped for
     // backfacing and need to be grouned together if rendering with instancing
     float det = determinant(toFloat3x3(m));
-    
+
     return float4m(invScaleSquared, det);
 }
 
@@ -79,7 +79,7 @@ static string filenameNoExtension(const char* filename)
     if (dotPosStr == nullptr)
         return filename;
     auto dotPos = dotPosStr - filename;
-    
+
     // now chop off the extension
     string filenameNoExt = filename;
     return filenameNoExt.substr(0, dotPos);
@@ -88,47 +88,48 @@ static string filenameNoExtension(const char* filename)
 static void findPossibleNormalMapFromAlbedoFilename(const char* filename, vector<string>& normalFilenames)
 {
     normalFilenames.clear();
-    
+
     string filenameShort = filename;
-    
+
     const char* ext = strrchr(filename, '.');
-    
+
     const char* dotPosStr = strrchr(filenameShort.c_str(), '.');
     if (dotPosStr == nullptr)
         return;
-    
+
     auto dotPos = dotPosStr - filenameShort.c_str();
-    
+
     // now chop off the extension
     filenameShort = filenameShort.substr(0, dotPos);
-    
-    const char* searches[] = { "-a", "-d", "_Color", "_baseColor" };
-    
+
+    const char* searches[] = {"-a", "-d", "_Color", "_baseColor"};
+
     for (uint32_t i = 0; i < ArrayCount(searches); ++i) {
         const char* search = searches[i];
         if (endsWith(filenameShort, search)) {
-            filenameShort = filenameShort.substr(0, filenameShort.length()-strlen(search));
+            filenameShort = filenameShort.substr(0, filenameShort.length() - strlen(search));
             break;
         }
     }
-    
-    const char* suffixes[] = { "-n", "_normal", "_Normal" };
-    
+
+    const char* suffixes[] = {"-n", "_normal", "_Normal"};
+
     string normalFilename;
     for (uint32_t i = 0; i < ArrayCount(suffixes); ++i) {
         const char* suffix = suffixes[i];
-        
+
         // may need to try various names, and see if any exist
         normalFilename = filenameShort;
         normalFilename += suffix;
         normalFilename += ext;
-        
+
         normalFilenames.push_back(normalFilename);
     }
 }
 
 // this aliases the existing string, so can't chop extension
-inline const char* toFilenameShort(const char* filename) {
+inline const char* toFilenameShort(const char* filename)
+{
     const char* filenameShort = strrchr(filename, '/');
     if (filenameShort == nullptr) {
         filenameShort = filename;
@@ -150,24 +151,27 @@ static const vector<const char*> supportedModelExt = {
 #endif
 };
 
-
-bool isSupportedModelFilename(const char* filename) {
-    for (const char* ext: supportedModelExt) {
+bool isSupportedModelFilename(const char* filename)
+{
+    for (const char* ext : supportedModelExt) {
         if (endsWithExtension(filename, ext)) {
             return true;
         }
     }
     return false;
 }
-bool isSupportedArchiveFilename(const char* filename) {
+bool isSupportedArchiveFilename(const char* filename)
+{
     return endsWithExtension(filename, ".zip");
 }
 
-bool isSupportedJsonFilename(const char* filename) {
+bool isSupportedJsonFilename(const char* filename)
+{
     return endsWith(filename, "-atlas.json");
 }
 
-bool isDirectory(const char* filename) {
+bool isDirectory(const char* filename)
+{
     FileHelper fileHelper;
     return fileHelper.isDirectory(filename);
 }
@@ -176,19 +180,19 @@ int32_t ShowSettings::totalChunks() const
 {
     int32_t one = 1;
     return std::max(one, faceCount) *
-    std::max(one, arrayCount) *
-    std::max(one, sliceCount);
+           std::max(one, arrayCount) *
+           std::max(one, sliceCount);
 }
 
 File::File(const char* name_, int32_t urlIndex_)
-: name(name_), urlIndex(urlIndex_), nameShort(toFilenameShort(name_))
+    : name(name_), urlIndex(urlIndex_), nameShort(toFilenameShort(name_))
 {
 }
 
-const char *ShowSettings::meshNumberName(uint32_t meshNumber_) const
+const char* ShowSettings::meshNumberName(uint32_t meshNumber_) const
 {
-    const char *text = "";
-    
+    const char* text = "";
+
     switch (meshNumber_) {
         case 0:
             text = "Plane";
@@ -208,14 +212,14 @@ const char *ShowSettings::meshNumberName(uint32_t meshNumber_) const
         default:
             break;
     }
-    
+
     return text;
 }
 
-const char *ShowSettings::meshNumberText() const
+const char* ShowSettings::meshNumberText() const
 {
-    const char *text = "";
-    
+    const char* text = "";
+
     switch (meshNumber) {
         case 0:
             text = "Shape Plane";
@@ -235,14 +239,14 @@ const char *ShowSettings::meshNumberText() const
         default:
             break;
     }
-    
+
     return text;
 }
 
-const char *ShowSettings::shapeChannelText() const
+const char* ShowSettings::shapeChannelText() const
 {
-    const char *text = "";
-    
+    const char* text = "";
+
     switch (shapeChannel) {
         case ShapeChannelNone:
             text = "Show Off";
@@ -272,14 +276,14 @@ const char *ShowSettings::shapeChannelText() const
         default:
             break;
     }
-    
+
     return text;
 }
 
-const char *ShowSettings::debugModeText() const
+const char* ShowSettings::debugModeText() const
 {
-    const char *text = "";
-    
+    const char* text = "";
+
     switch (debugMode) {
         case DebugModeNone:
             text = "Debug Off";
@@ -314,10 +318,10 @@ const char *ShowSettings::debugModeText() const
     return text;
 }
 
-const char *ShowSettings::lightingModeText() const
+const char* ShowSettings::lightingModeText() const
 {
-    const char *text = "";
-    
+    const char* text = "";
+
     switch (lightingMode) {
         case LightingModeDiffuse:
             text = "Light Diffuse";
@@ -337,7 +341,7 @@ const char *ShowSettings::lightingModeText() const
 bool ShowSettings::isEyedropperFromDrawable()
 {
     return meshNumber > 0 || isPreview || isShowingAllLevelsAndMips ||
-    shapeChannel > 0;
+           shapeChannel > 0;
 }
 
 void ShowSettings::advanceMeshNumber(bool decrement)
@@ -350,7 +354,7 @@ void ShowSettings::advanceMeshNumber(bool decrement)
     else {
         number += 1;
     }
-    
+
     meshNumber = number % numEnums;
 }
 
@@ -364,9 +368,9 @@ void ShowSettings::advanceShapeChannel(bool decrement)
     else {
         mode += 1;
     }
-    
+
     shapeChannel = (ShapeChannel)(mode % numEnums);
-    
+
     // skip this channel for now, in ortho it's mostly pure white
     if (shapeChannel == ShapeChannelDepth) {
         advanceShapeChannel(decrement);
@@ -383,7 +387,7 @@ void ShowSettings::advanceLightingMode(bool decrement)
     else {
         number += 1;
     }
-    
+
     lightingMode = (LightingMode)(number % numEnums);
 }
 
@@ -397,40 +401,40 @@ void ShowSettings::advanceDebugMode(bool decrement)
     else {
         mode += 1;
     }
-    
+
     debugMode = (DebugMode)(mode % numEnums);
-    
+
     MyMTLPixelFormat format = (MyMTLPixelFormat)originalFormat;
     bool isHdr = isHdrFormat(format);
-    
+
     // DONE: work on skipping some of these based on image
     bool isAlpha = isAlphaFormat(format);
     bool isColor = isColorFormat(format);
-    
+
     if (debugMode == DebugModeTransparent && (numChannels <= 3 || !isAlpha)) {
         advanceDebugMode(decrement);
     }
-    
+
     // 2 channel textures don't really have color or grayscale pixels
     if (debugMode == DebugModeColor && (numChannels <= 2 || !isColor)) {
         advanceDebugMode(decrement);
     }
-    
+
     if (debugMode == DebugModeGray && numChannels <= 2) {
         advanceDebugMode(decrement);
     }
-    
+
     if (debugMode == DebugModeHDR && !isHdr) {
         advanceDebugMode(decrement);
     }
-    
+
     // for 3 and for channel textures could skip these with more info about image
     // (hasColor) if (_showSettings->debugMode == DebugModeGray && !hasColor)
     // advanceDebugMode(isShiftKeyDown);
-    
+
     bool isNormal = texContentType == TexContentTypeNormal;
     bool isSDF = texContentType == TexContentTypeSDF;
-    
+
     // for normals show directions
     if (debugMode == DebugModePosX && !(isNormal || isSDF)) {
         advanceDebugMode(decrement);
@@ -441,7 +445,7 @@ void ShowSettings::advanceDebugMode(bool decrement)
     if (debugMode == DebugModeCircleXY && !(isNormal)) {
         advanceDebugMode(decrement);
     }
-    
+
     // TODO: have a clipping mode against a variable range too, only show pixels
     // within that range to help isolate problem pixels.  Useful for depth, and
     // have auto-range scaling for it and hdr. make sure to ignore 0 or 1 for
@@ -456,12 +460,11 @@ void ShowSettings::updateUVPreviewState()
                 if (uvPreview < 1.0)
                     uvPreview += uvPreviewStep;
             }
-            else
-            {
+            else {
                 if (uvPreview > 0.0)
                     uvPreview -= uvPreviewStep;
             }
-            
+
             uvPreview = std::clamp(uvPreview, 0.0f, 1.0f);
         }
     }
@@ -469,14 +472,14 @@ void ShowSettings::updateUVPreviewState()
         // This hides the uvView even when switchig back to 3d shape
         //uvPreview = 0.0;
     }
-    
+
     // stop the frame update
     if (uvPreview == 0.0f || uvPreview == 1.0f) {
         uvPreviewFrames = 0;
     }
 }
 
-void printChannels(string &tmp, const string &label, float4 c,
+void printChannels(string& tmp, const string& label, float4 c,
                    int32_t numChannels, bool isFloat, bool isSigned)
 {
     if (isFloat || isSigned) {
@@ -499,7 +502,7 @@ void printChannels(string &tmp, const string &label, float4 c,
     else {
         // unorm data, 8-bit values displayed
         c *= 255.1f;
-        
+
         switch (numChannels) {
             case 1:
                 sprintf(tmp, "%s%.0f\n", label.c_str(), c.r);
@@ -528,9 +531,9 @@ string ShowSettings::windowTitleString(const char* filename) const
     else {
         filenameShort += 1;
     }
-    
+
     string title = "kramv - ";
-    
+
     if (isModel) {
         title += formatTypeName(originalFormat);
         title += " - ";
@@ -540,34 +543,45 @@ string ShowSettings::windowTitleString(const char* filename) const
         // was using subtitle, but that's macOS 11.0 feature.
         title += formatTypeName(originalFormat);
         title += " - ";
-        
+
         // identify what we think the content type is
         const char* typeText = "";
-        switch(texContentType) {
-            case TexContentTypeAlbedo: typeText = "a"; break;
-            case TexContentTypeNormal: typeText = "n"; break;
-            case TexContentTypeAO: typeText = "ao"; break;
-            case TexContentTypeMetallicRoughness: typeText = "mr"; break;
-            case TexContentTypeSDF: typeText = "sdf"; break;
-            case TexContentTypeHeight: typeText = "h"; break;
-            case TexContentTypeUnknown: typeText = ""; break;
+        switch (texContentType) {
+            case TexContentTypeAlbedo:
+                typeText = "a";
+                break;
+            case TexContentTypeNormal:
+                typeText = "n";
+                break;
+            case TexContentTypeAO:
+                typeText = "ao";
+                break;
+            case TexContentTypeMetallicRoughness:
+                typeText = "mr";
+                break;
+            case TexContentTypeSDF:
+                typeText = "sdf";
+                break;
+            case TexContentTypeHeight:
+                typeText = "h";
+                break;
+            case TexContentTypeUnknown:
+                typeText = "";
+                break;
         }
         title += typeText;
         // add some info about the texture to avoid needing to go to info
         // srgb src would be useful too.
         if (texContentType == TexContentTypeAlbedo && isPremul) {
             title += ",p";
-            
         }
         title += " - ";
         title += filenameShort;
     }
-    
+
     return title;
 }
 
-
-
 //--------------------------------
 
 // Want to avoid Apple libs for things that have C++ equivalents.
@@ -576,14 +590,14 @@ Data::Data()
 {
 #if USE_SIMDLIB && 1
     vecf vfmt;
-    
+
     // want to see the simd config
     KLOGI("SIMDK", "%s", vfmt.simd_configs().c_str());
     KLOGI("SIMDK", "%s", vfmt.simd_alignments().c_str());
 #endif
-    
+
     _showSettings = new ShowSettings();
-    
+
     _textSlots.resize(kTextSlotCount);
 }
 Data::~Data()
@@ -591,7 +605,8 @@ Data::~Data()
     delete _showSettings;
 }
 
-void Data::clearAtlas() {
+void Data::clearAtlas()
+{
     _showSettings->atlas.clear();
     _showSettings->lastAtlas = nullptr;
 }
@@ -601,50 +616,50 @@ void Data::clearAtlas() {
 bool Data::loadAtlasFile(const char* filename)
 {
     using namespace simdjson;
-    
+
     clearAtlas();
-    
+
     Timer timer;
-    
+
     // can just mmap the json
     MmapHelper mmap;
     if (!mmap.open(filename)) {
         KLOGE("kramv", "Failed to open %s", filename);
         return false;
     }
-    
+
     ondemand::parser parser;
-    
+
     padded_string json((const char*)mmap.data(), mmap.dataLength());
     auto atlasProps = parser.iterate(json);
-    
+
     // can we get at memory use numbers to do the parse?
     KLOGI("kramv", "parsed %.0f KB of json in %.3fms",
           (double)mmap.dataLength() / 1024.0,
           timer.timeElapsedMillis());
-    
+
     // Can use hover or a show all on these entries and names.
     // Draw names on screen using system text in the upper left corner if 1
     // if showing all, then show names across each mip level.  May want to
     // snap to pixels on each mip level so can see overlap.
-    
+
     {
         std::vector<double> values;
         //string_view atlasName = atlasProps["name"].get_string().value_unsafe();
-        
+
         uint64_t width = atlasProps["width"].get_uint64().value_unsafe();
         uint64_t height = atlasProps["height"].get_uint64().value_unsafe();
-        
+
         uint64_t slice = atlasProps["slice"].get_uint64().value_unsafe();
-        
+
         float uPad = 0.0f;
         float vPad = 0.0f;
-        
+
         if (atlasProps["paduv"].get_array().error() != NO_SUCH_FIELD) {
             values.clear();
             for (auto value : atlasProps["paduv"])
                 values.push_back(value.get_double().value_unsafe());
-            
+
             uPad = values[0];
             vPad = values[1];
         }
@@ -652,29 +667,27 @@ bool Data::loadAtlasFile(const char* filename)
             values.clear();
             for (auto value : atlasProps["padpx"])
                 values.push_back(value.get_double().value_unsafe());
-            
+
             uPad = values[0];
             vPad = values[1];
-            
+
             uPad /= width;
             vPad /= height;
         }
-        
-        for (auto regionProps: atlasProps["regions"])
-        {
+
+        for (auto regionProps : atlasProps["regions"]) {
             string_view name = regionProps["name"].get_string().value_unsafe();
-            
+
             float x = 0.0f;
             float y = 0.0f;
             float w = 0.0f;
             float h = 0.0f;
-            
-            if (regionProps["ruv"].get_array().error() != NO_SUCH_FIELD)
-            {
+
+            if (regionProps["ruv"].get_array().error() != NO_SUCH_FIELD) {
                 values.clear();
                 for (auto value : regionProps["ruv"])
                     values.push_back(value.get_double().value_unsafe());
-                
+
                 // Note: could convert pixel and mip0 size to uv.
                 // normalized uv make these easier to draw across all mips
                 x = values[0];
@@ -682,37 +695,36 @@ bool Data::loadAtlasFile(const char* filename)
                 w = values[2];
                 h = values[3];
             }
-            else if (regionProps["rpx"].get_array().error() != NO_SUCH_FIELD)
-            {
+            else if (regionProps["rpx"].get_array().error() != NO_SUCH_FIELD) {
                 values.clear();
                 for (auto value : regionProps["rpx"])
                     values.push_back(value.get_double().value_unsafe());
-                
+
                 x = values[0];
                 y = values[1];
                 w = values[2];
                 h = values[3];
-                
+
                 // normalize to uv using the width/height
                 x /= width;
                 y /= height;
                 w /= width;
                 h /= height;
             }
-            
+
             const char* verticalProp = "f"; // regionProps["rot"];
             bool isVertical = verticalProp && verticalProp[0] == 't';
-            
-            Atlas atlas = {(string)name, x,y, w,h, uPad,vPad, isVertical, (uint32_t)slice};
+
+            Atlas atlas = {(string)name, x, y, w, h, uPad, vPad, isVertical, (uint32_t)slice};
             _showSettings->atlas.emplace_back(std::move(atlas));
         }
     }
-    
+
     // TODO: also need to be able to bring in vector shapes
     // maybe from svg or files written out from figma or photoshop.
     // Can triangulate those, and use xatlas to pack those.
     // Also xatlas can flatten out a 3d model into a chart.
-    
+
     return true;
 }
 
@@ -721,16 +733,16 @@ bool Data::loadAtlasFile(const char* filename)
 bool Data::loadAtlasFile(const char* filename)
 {
     using namespace json11;
-    
+
     clearAtlas();
-    
+
     // can just mmap the json
     MmapHelper mmap;
     if (!mmap.open(filename)) {
         KLOGE("kramv", "Failed to open %s", filename);
         return false;
     }
-    
+
     Timer timer;
     JsonReader jsonReader;
     const Json* root = jsonReader.read((const char*)mmap.data(), mmap.dataLength());
@@ -740,37 +752,36 @@ bool Data::loadAtlasFile(const char* filename)
         return false;
     }
     timer.stop();
-    
+
     KLOGI("kramv", "parsed %.0f KB of json using %.0f KB of memory in %.3fms",
           (double)mmap.dataLength() / 1024.0,
           (double)jsonReader.memoryUse() / 1024.0,
           timer.timeElapsedMillis());
-    
+
     const Json& atlasProps = (*root)[(uint32_t)0];
-    
+
     // Can use hover or a show all on these entries and names.
     // Draw names on screen using system text in the upper left corner if 1
     // if showing all, then show names across each mip level.  May want to
     // snap to pixels on each mip level so can see overlap.
-    
-    
+
     {
         std::vector<double> values;
         // string_view atlasName = atlasProps["name"].get_string().value_unsafe();
-        
+
         int width = atlasProps["width"].int_value();
         int height = atlasProps["height"].int_value();
-        
+
         int slice = atlasProps["slice"].int_value();
-        
+
         float uPad = 0.0f;
         float vPad = 0.0f;
-        
+
         if (atlasProps["paduv"].is_array()) {
             values.clear();
             for (const auto& value : atlasProps["paduv"])
                 values.push_back(value.number_value());
-            
+
             uPad = values[0];
             vPad = values[1];
         }
@@ -778,30 +789,28 @@ bool Data::loadAtlasFile(const char* filename)
             values.clear();
             for (const auto& value : atlasProps["padpx"])
                 values.push_back(value.number_value());
-            
+
             uPad = values[0];
             vPad = values[1];
-            
+
             uPad /= width;
             vPad /= height;
         }
-        
+
         string decodedName;
-        for (auto regionProps: atlasProps["regions"])
-        {
+        for (auto regionProps : atlasProps["regions"]) {
             const char* name = regionProps["name"].string_value(decodedName);
-            
+
             float x = 0.0f;
             float y = 0.0f;
             float w = 0.0f;
             float h = 0.0f;
-            
-            if (regionProps["ruv"].is_array())
-            {
+
+            if (regionProps["ruv"].is_array()) {
                 values.clear();
                 for (auto value : regionProps["ruv"])
                     values.push_back(value.number_value());
-                
+
                 // Note: could convert pixel and mip0 size to uv.
                 // normalized uv make these easier to draw across all mips
                 x = values[0];
@@ -809,60 +818,59 @@ bool Data::loadAtlasFile(const char* filename)
                 w = values[2];
                 h = values[3];
             }
-            else if (regionProps["rpx"].is_array())
-            {
+            else if (regionProps["rpx"].is_array()) {
                 values.clear();
                 for (auto value : regionProps["rpx"])
                     values.push_back(value.number_value());
-                
+
                 x = values[0];
                 y = values[1];
                 w = values[2];
                 h = values[3];
-                
+
                 // normalize to uv using the width/height
                 x /= width;
                 y /= height;
                 w /= width;
                 h /= height;
             }
-            
+
             const char* verticalProp = "f"; // regionProps["rot"];
             bool isVertical = verticalProp && verticalProp[0] == 't';
-            
-            Atlas atlas = {name, x,y, w,h, uPad,vPad, isVertical, (uint32_t)slice};
+
+            Atlas atlas = {name, x, y, w, h, uPad, vPad, isVertical, (uint32_t)slice};
             _showSettings->atlas.emplace_back(std::move(atlas));
         }
     }
-    
+
     // TODO: also need to be able to bring in vector shapes
     // maybe from svg or files written out from figma or photoshop.
     // Can triangulate those, and use xatlas to pack those.
     // Also xatlas can flatten out a 3d model into a chart.
-    
+
     return true;
 }
 
 #endif
 
 // opens archive
-bool Data::openArchive(const char * zipFilename, int32_t urlIndex)
+bool Data::openArchive(const char* zipFilename, int32_t urlIndex)
 {
     // grow the array, ptrs so that existing mmaps aren't destroyed
     if (urlIndex >= _containers.size()) {
         _containers.resize(urlIndex + 1, nullptr);
     }
-    
+
     if (_containers[urlIndex] == nullptr)
         _containers[urlIndex] = new FileContainer;
-    
+
     FileContainer& container = *_containers[urlIndex];
     MmapHelper& zipMmap = container.zipMmap;
     ZipHelper& zip = container.zip;
-    
+
     // close any previous zip
     zipMmap.close();
-    
+
     // open the mmap again
     if (!zipMmap.open(zipFilename)) {
         return false;
@@ -878,116 +886,119 @@ bool Data::listFilesInArchive(int32_t urlIndex)
 {
     FileContainer& container = *_containers[urlIndex];
     ZipHelper& zip = container.zip;
-    
+
     // filter out unsupported extensions
     vector<string> extensions = {
-        ".ktx", ".ktx2", ".png",  // textures
+        ".ktx", ".ktx2", ".png", // textures
         ".dds", ".DDS" // allow caps for dds
 #if USE_GLTF
-        // TODO: can't support these until have a loader from memory block
-        // GLTFAsset requires a URL.
-        //, ".glb", ".gltf" // models
+    // TODO: can't support these until have a loader from memory block
+    // GLTFAsset requires a URL.
+    //, ".glb", ".gltf" // models
 #endif
 #if USE_USD
-        , ".usd", ".usda", ".usb"
+        ,
+        ".usd", ".usda", ".usb"
 #endif
     };
-    
+
     container.zip.filterExtensions(extensions);
-    
+
     // don't switch to empty archive
     if (zip.zipEntrys().empty()) {
         return false;
     }
-    
-    for (const auto& entry: zip.zipEntrys()) {
+
+    for (const auto& entry : zip.zipEntrys()) {
         _files.emplace_back(File(entry.filename, urlIndex));
     }
-    
+
     return true;
 }
 
 // TODO: can simplify by storing counterpart id when file list is created
-bool Data::hasCounterpart(bool increment) {
+bool Data::hasCounterpart(bool increment)
+{
     if (_files.size() <= 1) {
         return false;
     }
-    
+
     const File& file = _files[_fileIndex];
     string currentFilename = filenameNoExtension(file.nameShort.c_str());
-    
+
     uint32_t nextFileIndex = _fileIndex;
-    
+
     size_t numEntries = _files.size();
     if (increment)
         nextFileIndex++;
     else
-        nextFileIndex += numEntries - 1;  // back 1
-    
+        nextFileIndex += numEntries - 1; // back 1
+
     nextFileIndex = nextFileIndex % numEntries;
-    
+
     const File& nextFile = _files[nextFileIndex];
     string nextFilename = filenameNoExtension(nextFile.nameShort.c_str());
-    
+
     // if short name matches (no ext) then it's a counterpart
     if (currentFilename != nextFilename)
         return false;
-    
+
     return true;
 }
 
-bool Data::advanceCounterpart(bool increment) {
-    
+bool Data::advanceCounterpart(bool increment)
+{
     if (_files.size() <= 1) {
         return false;
     }
-    
+
     // see if file has counterparts
     const File& file = _files[_fileIndex];
     string currentFilename = filenameNoExtension(file.nameShort.c_str());
-    
+
     // TODO: this should cycle through only the counterparts
     uint32_t nextFileIndex = _fileIndex;
-    
+
     size_t numEntries = _files.size();
     if (increment)
         nextFileIndex++;
     else
-        nextFileIndex += numEntries - 1;  // back 1
-    
+        nextFileIndex += numEntries - 1; // back 1
+
     nextFileIndex = nextFileIndex % numEntries;
-    
+
     const File& nextFile = _files[nextFileIndex];
     string nextFilename = filenameNoExtension(nextFile.nameShort.c_str());
-    
+
     if (currentFilename != nextFilename)
         return false;
-    
+
     _fileIndex = nextFileIndex;
-    
+
     return _delegate.loadFile(true);
 }
 
-bool Data::advanceFile(bool increment) {
+bool Data::advanceFile(bool increment)
+{
     if (_files.empty()) {
         return false;
     }
-    
+
     size_t numEntries = _files.size();
     if (increment)
         _fileIndex++;
     else
-        _fileIndex += numEntries - 1;  // back 1
-    
+        _fileIndex += numEntries - 1; // back 1
+
     _fileIndex = _fileIndex % numEntries;
-    
+
     return _delegate.loadFile(true);
 }
 
 bool Data::findFilename(const string& filename)
 {
     bool isFound = false;
-    
+
     // linear search
     for (const auto& search : _files) {
         if (search.name == filename) {
@@ -1001,7 +1012,7 @@ bool Data::findFilename(const string& filename)
 bool Data::findFilenameShort(const string& filename)
 {
     bool isFound = false;
-    
+
     // linear search
     for (const auto& search : _files) {
         if (search.nameShort == filename) {
@@ -1033,20 +1044,20 @@ const Atlas* Data::findAtlasAtUV(float2 pt)
 {
     if (_showSettings->atlas.empty()) return nullptr;
     if (_showSettings->imageBoundsX == 0) return nullptr;
-    
+
     const Atlas* atlas = nullptr;
-    
+
     // Note: rects are in uv
-    
+
     // This might need to become an atlas array index instead of ptr
     const Atlas* lastAtlas = _showSettings->lastAtlas;
-    
+
     if (lastAtlas) {
         if (isPtInRect(pt, lastAtlas->rect())) {
             atlas = lastAtlas;
         }
     }
-    
+
     if (!atlas) {
         // linear search
         for (const auto& search : _showSettings->atlas) {
@@ -1055,20 +1066,19 @@ const Atlas* Data::findAtlasAtUV(float2 pt)
                 break;
             }
         }
-        
+
         _showSettings->lastAtlas = atlas;
     }
-    
+
     return atlas;
 }
 
-
 bool Data::isArchive() const
 {
     //NSArray<NSURL*>* urls_ = (NSArray<NSURL*>*)_delegate._urls;
     //NSURL* url = urls_[_files[_fileIndex].urlIndex];
     //const char* filename = url.fileSystemRepresentation;
-    
+
     string filename = _urls[_files[_fileIndex].urlIndex];
     return isSupportedArchiveFilename(filename.c_str());
 }
@@ -1084,77 +1094,77 @@ bool Data::loadFile()
     if (isArchive()) {
         return loadFileFromArchive();
     }
-    
+
     // now lookup the filename and data at that entry
     const File& file = _files[_fileIndex];
     const char* filename = file.name.c_str();
-    
+
     string fullFilename = filename;
     auto timestamp = FileHelper::modificationTimestamp(filename);
-    
+
     bool isTextureChanged = _showSettings->isFileChanged(filename, timestamp);
     if (!isTextureChanged) {
         return true;
     }
-    
+
 #if USE_GLTF || USE_USD
     bool isModel = isSupportedModelFilename(filename);
     if (isModel) {
         bool success = _delegate.loadModelFile(filename);
-        
+
         if (success) {
             // store the filename
             _showSettings->lastFilename = filename;
             _showSettings->lastTimestamp = timestamp;
         }
-        
+
         return success;
     }
 #endif
-    
+
     // have already filtered filenames out, so this should never get hit
     if (!isSupportedFilename(filename)) {
         return false;
     }
-    
+
     // Note: better to extract from filename instead of root of folder dropped
     // or just keep displaying full path of filename.
-    
+
     _archiveName.clear();
-    
+
     vector<string> possibleNormalFilenames;
     string normalFilename;
     bool hasNormal = false;
-    
+
     TexContentType texContentType = findContentTypeFromFilename(filename);
     if (texContentType == TexContentTypeAlbedo) {
         findPossibleNormalMapFromAlbedoFilename(filename, possibleNormalFilenames);
-        
-        for (const auto& name: possibleNormalFilenames) {
+
+        for (const auto& name : possibleNormalFilenames) {
             hasNormal = findFilename(name);
-            
+
             if (hasNormal) {
                 normalFilename = name;
                 break;
             }
         }
     }
-    
+
     // see if there is an atlas file too, and load the rectangles for preview
     // note sidecar atlas files are a pain to view with a sandbox, may want to
     // splice into ktx/ktx2 files, but no good metadata for png/dds.
     _showSettings->atlas.clear();
-    
+
     string atlasFilename = filenameNoExtension(filename);
     bool hasAtlas = false;
-    
+
     // replace -a, -d, with -atlas.json
     const char* dashPosStr = strrchr(atlasFilename.c_str(), '-');
     if (dashPosStr != nullptr) {
         atlasFilename = atlasFilename.substr(0, dashPosStr - atlasFilename.c_str());
     }
     atlasFilename += "-atlas.json";
-    if ( findFilename(atlasFilename.c_str())) {
+    if (findFilename(atlasFilename.c_str())) {
         if (loadAtlasFile(atlasFilename.c_str())) {
             hasAtlas = true;
         }
@@ -1163,20 +1173,20 @@ bool Data::loadFile()
         clearAtlas();
         atlasFilename.clear();
     }
-    
+
     // If it's a compressed file, then set a diff target if a corresponding png
     // is found.  Eventually see if a src dds/ktx/ktx2 exists.  Want to stop
     // using png as source images.  Note png don't have custom mips, unless
     // flattened to one image.  So have to fabricate mips here.  KTXImage
     // can already load up striped png into slices, etc.
-    
+
     bool hasDiff = false;
     string diffFilename;
-    
+
     if (!isPNGFilename(filename)) {
         diffFilename = filenameNoExtension(filename);
         diffFilename += ".png";
-        
+
         diffFilename = toFilenameShort(diffFilename.c_str());
         if (diffFilename != filename) {
             const File* diffFile = findFileShort(diffFilename.c_str());
@@ -1185,44 +1195,41 @@ bool Data::loadFile()
                 hasDiff = true;
             }
         }
-        
+
         if (!hasDiff)
             diffFilename.clear();
     }
-    
+
     //-------------------------------
-    
+
     KTXImage image;
     KTXImageData imageDataKTX;
-    
+
     KTXImage imageNormal;
     KTXImageData imageNormalDataKTX;
-    
+
     KTXImage imageDiff;
     KTXImageData imageDiffDataKTX;
-    
+
     // this requires decode and conversion to RGBA8u
     if (!imageDataKTX.open(fullFilename.c_str(), image)) {
         return false;
     }
-    
+
     // load up the diff, but would prefer to defer this
     if (hasDiff && !imageDiffDataKTX.open(diffFilename.c_str(), imageDiff)) {
         hasDiff = false;
-        
+
         // TODO: could also compare dimensions to see if same
-        
+
         if (imageDiff.textureType == image.textureType &&
-            (imageDiff.textureType == MyMTLTextureType2D) )
-        {
-            
+            (imageDiff.textureType == MyMTLTextureType2D)) {
         }
-        else
-        {
+        else {
             hasDiff = false;
         }
     }
-    
+
     if (hasNormal &&
         imageNormalDataKTX.open(normalFilename.c_str(), imageNormal)) {
         // shaders only pull from albedo + normal on these texture types
@@ -1235,22 +1242,21 @@ bool Data::loadFile()
             hasNormal = false;
         }
     }
-    
+
     //---------------------------------
-    
+
     if (!_delegate.loadTextureFromImage(fullFilename.c_str(), (double)timestamp,
-        image,
-        hasNormal ? &imageNormal : nullptr,
-        hasDiff ? &imageDiff : nullptr,
-        false))
-    {
+                                        image,
+                                        hasNormal ? &imageNormal : nullptr,
+                                        hasDiff ? &imageDiff : nullptr,
+                                        false)) {
         return false;
     }
-    
+
     // store the filename
     _showSettings->lastFilename = filename;
     _showSettings->lastTimestamp = timestamp;
-    
+
     return true;
 }
 
@@ -1260,7 +1266,7 @@ bool Data::loadFileFromArchive()
     const File& file = _files[_fileIndex];
     FileContainer& container = *_containers[file.urlIndex];
     ZipHelper& zip = container.zip;
-    
+
     const char* filename = file.name.c_str();
     const auto* entry = zip.zipEntry(filename);
     string fullFilename = entry->filename;
@@ -1270,20 +1276,20 @@ bool Data::loadFileFromArchive()
     if (!isTextureChanged) {
         return true;
     }
-    
-// TODO: don't have a version which loads gltf model from memory block
-//    bool isModel = isSupportedModelFilename(filename);
-//    if (isModel)
-//        return [self loadModelFile:filename];
-    
+
+    // TODO: don't have a version which loads gltf model from memory block
+    //    bool isModel = isSupportedModelFilename(filename);
+    //    if (isModel)
+    //        return [self loadModelFile:filename];
+
     //--------
-    
+
     if (!isSupportedFilename(filename)) {
         return false;
     }
-    
+
     KPERFT("loadFileFromArchive");
-    
+
     const uint8_t* imageData = nullptr;
     uint64_t imageDataLength = 0;
 
@@ -1292,66 +1298,65 @@ bool Data::loadFileFromArchive()
     // zip that compressed png files.  So then the raw ptr/size
     // needs deflated.
     bool isFileUncompressed = entry->compressedSize == entry->uncompressedSize;
-    
+
     vector<uint8_t> bufferForImage;
-    
+
     if (isFileUncompressed) {
         KPERFT("ZipExtractRaw");
-        
+
         // search for main file - can be albedo or normal
         if (!zip.extractRaw(filename, &imageData, imageDataLength)) {
             return false;
         }
-
     }
     else {
         KPERFT("ZipExtract");
-        
+
         // need to decompress first
         if (!zip.extract(filename, bufferForImage)) {
             return false;
         }
-        
+
         imageData = bufferForImage.data();
         imageDataLength = bufferForImage.size();
     }
-    
+
     vector<uint8_t> bufferForNormal;
-    
+
     const uint8_t* imageNormalData = nullptr;
     uint64_t imageNormalDataLength = 0;
-    
+
     string normalFilename;
     bool hasNormal = false;
     vector<string> normalFilenames;
-    
+
     TexContentType texContentType = findContentTypeFromFilename(filename);
     if (texContentType == TexContentTypeAlbedo) {
         findPossibleNormalMapFromAlbedoFilename(filename, normalFilenames);
-     
-        for (const auto& name: normalFilenames) {
+
+        for (const auto& name : normalFilenames) {
             const auto* normalEntry = zip.zipEntry(name.c_str());
-            
+
             hasNormal = normalEntry != nullptr;
             if (hasNormal) {
                 normalFilename = name;
-                
+
                 bool isNormalUncompressed = normalEntry->compressedSize == normalEntry->uncompressedSize;
-                
+
                 if (isNormalUncompressed) {
                     KPERFT("ZipExtractRawNormal");
-                    
+
                     zip.extractRaw(name.c_str(), &imageNormalData,
                                    imageNormalDataLength);
                 }
                 else {
                     KPERFT("ZipExtractNormal");
-                    
+
                     // need to decompress first
                     if (!zip.extract(filename, bufferForNormal)) {
                         return false;
                     }
-                    
+
                     imageNormalData = bufferForNormal.data();
                     imageNormalDataLength = bufferForNormal.size();
                 }
@@ -1371,144 +1376,134 @@ bool Data::loadFileFromArchive()
     KTXImageData imageNormalDataKTX;
 
     // TODO: do imageDiff here?
-    
+
     KPERFT_START(1, "KTXOpen");
-    
+
     if (!imageDataKTX.open(imageData, imageDataLength, image)) {
         return false;
     }
 
     KPERFT_STOP(1);
-   
-    
+
     if (hasNormal) {
         KPERFT("KTXOpenNormal");
-       
+
         if (imageNormalDataKTX.open(
-            imageNormalData, imageNormalDataLength, imageNormal)) {
-                // shaders only pull from albedo + normal on these texture types
-                if (imageNormal.textureType == image.textureType &&
-                    (imageNormal.textureType == MyMTLTextureType2D ||
-                     imageNormal.textureType == MyMTLTextureType2DArray)) {
-                    // hasNormal = true;
-                }
-                else {
-                    hasNormal = false;
-                }
+                imageNormalData, imageNormalDataLength, imageNormal)) {
+            // shaders only pull from albedo + normal on these texture types
+            if (imageNormal.textureType == image.textureType &&
+                (imageNormal.textureType == MyMTLTextureType2D ||
+                 imageNormal.textureType == MyMTLTextureType2DArray)) {
+                // hasNormal = true;
             }
+            else {
+                hasNormal = false;
+            }
+        }
     }
 
-    
     //---------------------------------
-    
+
     KPERFT_START(3, "KTXLoad");
-   
+
     if (!_delegate.loadTextureFromImage(fullFilename.c_str(), (double)timestamp, image, hasNormal ? &imageNormal : nullptr, nullptr, true)) {
         return false;
     }
 
     KPERFT_STOP(3);
-   
+
     //---------------------------------
-    
+
     string archiveURL = _urls[file.urlIndex];
     _archiveName = toFilenameShort(archiveURL.c_str());
-    
+
     return true;
 }
 
-
-
-
 void Data::loadFilesFromUrls(vector<string>& urls, bool skipSubdirs)
 {
     // Using a member for archives, so limited to one archive in a drop
     // but that's probably okay for now.  Add a separate array of open
     // archives if want > 1.
-    
+
     // copy the existing files list
     string existingFilename;
     if (_fileIndex < (int32_t)_files.size())
         existingFilename = _files[_fileIndex].name;
-    
+
     // Fill this out again
     _files.clear();
-    
+
     // clear pointers
-    for (FileContainer* container: _containers)
+    for (FileContainer* container : _containers)
         delete container;
     _containers.clear();
-    
+
     // this will flatten the list
     int32_t urlIndex = 0;
-    
+
     vector<string> urlsExtracted;
-    
-    for (const auto& url: urls) {
+
+    for (const auto& url : urls) {
         // These will flatten out to a list of files
         const char* filename = url.c_str();
-        
+
         if (isSupportedArchiveFilename(filename) &&
             openArchive(filename, urlIndex) &&
-            listFilesInArchive(urlIndex))
-        {
+            listFilesInArchive(urlIndex)) {
             urlsExtracted.push_back(filename);
             urlIndex++;
         }
         else if (isDirectory(filename)) {
-            
             // this first loads only models, then textures if only those
             listFilesInFolder(url, urlIndex, skipSubdirs);
-            
+
             // could skip if nothing added
             urlsExtracted.push_back(url);
             urlIndex++;
-            
+
             // handle archives within folder
             vector<File> archiveFiles;
             listArchivesInFolder(url, archiveFiles, skipSubdirs);
-            
-            for (const File& archiveFile: archiveFiles) {
+
+            for (const File& archiveFile : archiveFiles) {
                 const char* archiveFilename = archiveFile.name.c_str();
                 if (openArchive(archiveFilename, urlIndex) &&
                     listFilesInArchive(urlIndex)) {
-                    
                     //NSURL* urlArchive = [NSURL fileURLWithPath:[NSString stringWithUTF8String:archiveFilename]];
                     //[urlsExtracted addObject:urlArchive];
                     urlsExtracted.push_back(archiveFilename);
                     urlIndex++;
                 }
-                
             }
         }
         else if (isSupportedFilename(filename)
 #if USE_GLTF
                  || isSupportedModelFilename(filename)
 #endif
-                 ) {
+        ) {
             _files.emplace_back(File(filename, urlIndex));
-            
+
             //[urlsExtracted addObject:url];
             urlsExtracted.push_back(filename);
             urlIndex++;
         }
         else if (isSupportedJsonFilename(filename)) {
             _files.emplace_back(File(filename, urlIndex));
-            
+
             //[urlsExtracted addObject:url];
             urlsExtracted.push_back(filename);
             urlIndex++;
         }
-        
     }
-    
+
     // sort them by short filename
 #if USE_EASTL
     STL_NAMESPACE::quick_sort(_files.begin(), _files.end());
 #else
     STL_NAMESPACE::sort(_files.begin(), _files.end());
 #endif
-    
+
     // preserve filename before load, and restore that index, by finding
     // that name in refreshed folder list
     _fileIndex = 0;
@@ -1520,7 +1515,7 @@ void Data::loadFilesFromUrls(vector<string>& urls, bool skipSubdirs)
             }
         }
     }
-    
+
     // preserve old file selection
     _urls = urlsExtracted;
 }
@@ -1533,7 +1528,7 @@ void Data::showEyedropperData(const float2& uv)
     float4 c = _showSettings->textureResult;
     int32_t x = _showSettings->textureResultX;
     int32_t y = _showSettings->textureResultY;
-    
+
     // DONE: use these to format the text
     MyMTLPixelFormat format = _showSettings->originalFormat;
     bool isSrgb = isSrgbFormat(format);
@@ -1557,18 +1552,18 @@ void Data::showEyedropperData(const float2& uv)
         // interpret based on shapeChannel, debugMode, etc
         switch (_showSettings->shapeChannel) {
             case ShapeChannelDepth:
-                isSigned = false;  // using fract on uv
+                isSigned = false; // using fract on uv
 
                 isValue = true;
                 isFloat = true;
                 numChannels = 1;
                 break;
             case ShapeChannelUV0:
-                isSigned = false;  // using fract on uv
+                isSigned = false; // using fract on uv
 
                 isValue = true;
                 isFloat = true;
-                numChannels = 2;  // TODO: fix for 3d uvw
+                numChannels = 2; // TODO: fix for 3d uvw
                 break;
 
             case ShapeChannelFaceNormal:
@@ -1597,7 +1592,7 @@ void Data::showEyedropperData(const float2& uv)
         }
 
         // TODO: indicate px, mip, etc (f.e. showAll)
-        
+
         // debug mode
 
         // preview vs. not
@@ -1606,7 +1601,6 @@ void Data::showEyedropperData(const float2& uv)
         // this will be out of sync with gpu eval, so may want to only display px
         // from returned lookup this will always be a linear color
 
-        
         // show uv, so can relate to gpu coordinates stored in geometry and find
         // atlas areas
         append_sprintf(text, "uv:%0.3f %0.3f\n",
@@ -1747,12 +1741,12 @@ void Data::showEyedropperData(const float2& uv)
     // TODO: Stuff these on clipboard with a click, or use cmd+C?
 }
 
-void Data::setEyedropperText(const char * text)
+void Data::setEyedropperText(const char* text)
 {
     setTextSlot(kTextSlotEyedropper, text);
 }
 
-void Data::setAtlasText(const char * text)
+void Data::setAtlasText(const char* text)
 {
     setTextSlot(kTextSlotAtlas, text);
 }
@@ -1763,18 +1757,16 @@ string Data::textFromSlots(bool isFileListHidden) const
     string text = _textSlots[kTextSlotHud];
     if (!text.empty() && text.back() != '\n')
         text += "\n";
-        
+
     // don't show eyedropper text with table up, it's many lines and overlaps
-    if (!isFileListHidden)
-    {
+    if (!isFileListHidden) {
         text += _textSlots[kTextSlotEyedropper];
         if (!text.empty() && text.back() != '\n')
             text += "\n";
-        
+
         text += _textSlots[kTextSlotAtlas];
     }
-    
-    
+
     return text;
 }
 
@@ -1798,19 +1790,19 @@ void Data::updateUIAfterLoad()
     bool isMipHidden = _showSettings->mipCount <= 1;
 
     bool isJumpToNextHidden = _files.size() <= 1;
-    
+
     bool isJumpToCounterpartHidden = true;
     bool isJumpToPrevCounterpartHidden = true;
-    
-    if ( _files.size() > 1) {
+
+    if (_files.size() > 1) {
         isJumpToCounterpartHidden = !hasCounterpart(true);
-        isJumpToPrevCounterpartHidden  = !hasCounterpart(false);
+        isJumpToPrevCounterpartHidden = !hasCounterpart(false);
     }
-    
+
     bool isRedHidden = _showSettings->numChannels == 0; // models don't show rgba
     bool isGreenHidden = _showSettings->numChannels <= 1;
     bool isBlueHidden = _showSettings->numChannels <= 2 &&
-                        _showSettings->texContentType != TexContentTypeNormal;  // reconstruct z = b on normals
+                        _showSettings->texContentType != TexContentTypeNormal; // reconstruct z = b on normals
 
     // TODO: also need a hasAlpha for pixels, since many compressed formats like
     // ASTC always have 4 channels but internally store R,RG01,... etc.  Can get
@@ -1828,7 +1820,7 @@ void Data::updateUIAfterLoad()
 
     bool isSignedHidden = !isSignedFormat(_showSettings->originalFormat);
     bool isPlayHidden = !_showSettings->isModel; // only for models
-    
+
     bool isDiffHidden = false; // only for images
     if (!_showSettings->isModel && _showSettings->hasDiffTexture) {
         isDiffHidden = false;
@@ -1838,28 +1830,28 @@ void Data::updateUIAfterLoad()
     _actionFace->setHidden(isFaceSliceHidden);
     _actionMip->setHidden(isMipHidden);
     _actionShowAll->setHidden(isShowAllHidden);
-    
+
     _actionDiff->setHidden(isDiffHidden);
     _actionItem->setHidden(isJumpToNextHidden);
     _actionPrevItem->setHidden(isJumpToNextHidden);
-    
+
     _actionCounterpart->setHidden(isJumpToCounterpartHidden);
     _actionPrevCounterpart->setHidden(isJumpToPrevCounterpartHidden);
-    
+
     _actionR->setHidden(isRedHidden);
     _actionG->setHidden(isGreenHidden);
     _actionB->setHidden(isBlueHidden);
     _actionA->setHidden(isAlphaHidden);
-    
+
     _actionPremul->setHidden(isPremulHidden);
     _actionSigned->setHidden(isSignedHidden);
     _actionChecker->setHidden(isCheckerboardHidden);
-    
+
     // only allow srgb to be disabled, not toggle on if off at load
     MyMTLPixelFormat format = _showSettings->originalFormat;
     bool isSrgb = isSrgbFormat(format);
     _actionSrgb->setHidden(!isSrgb);
-    
+
     // also need to call after each toggle
     updateUIControlState();
 }
@@ -1869,7 +1861,7 @@ void Data::updateUIControlState()
     // there is also mixed state, but not using that
     auto On = true;
     auto Off = false;
-    
+
 #define toState(x) (x) ? On : Off
 
     auto showAllState = toState(_showSettings->isShowingAllLevelsAndMips);
@@ -1881,8 +1873,8 @@ void Data::updateUIControlState()
     auto wrapState = toState(_showSettings->isWrap);
     auto debugState = toState(_showSettings->debugMode != DebugModeNone);
     auto hudState = toState(_showSettings->isHudShown);
-    
-    TextureChannels &channels = _showSettings->channels;
+
+    TextureChannels& channels = _showSettings->channels;
 
     auto redState = toState(channels == TextureChannels::ModeR001);
     auto greenState = toState(channels == TextureChannels::Mode0G01);
@@ -1905,36 +1897,36 @@ void Data::updateUIControlState()
     auto verticalState = toState(_showSettings->isVerticalUI);
     auto uiState = toState(_showSettings->isHideUI);
     auto diffState = toState(_showSettings->isDiff && _showSettings->hasDiffTexture);
-    
+
     auto srgbState = toState(_showSettings->isSRGBShown);
     auto perfState = toState(_showSettings->isPerf);
-   
+
     _actionVertical->setHighlight(verticalState);
-    
+
     // TODO: pass boolean, and change in the call
     _actionPlay->setHighlight(playState);
     _actionHelp->setHighlight(Off);
     _actionInfo->setHighlight(Off);
     _actionHud->setHighlight(hudState);
-    
+
     _actionArray->setHighlight(arrayState);
     _actionFace->setHighlight(faceState);
     _actionMip->setHighlight(mipState);
-    
+
     // these never show check state
     _actionItem->setHighlight(Off);
     _actionPrevItem->setHighlight(Off);
-    
+
     _actionCounterpart->setHighlight(Off);
     _actionPrevCounterpart->setHighlight(Off);
-    
+
     _actionHideUI->setHighlight(uiState); // note below button always off, menu has state
-    
+
     _actionR->setHighlight(redState);
     _actionG->setHighlight(greenState);
     _actionB->setHighlight(blueState);
     _actionA->setHighlight(alphaState);
-    
+
     _actionShowAll->setHighlight(showAllState);
     _actionPreview->setHighlight(previewState);
     _actionDiff->setHighlight(diffState);
@@ -1945,11 +1937,11 @@ void Data::updateUIControlState()
     _actionGrid->setHighlight(gridState);
     _actionDebug->setHighlight(debugState);
     _actionTangent->setHighlight(tangentState);
-    
+
     _actionPremul->setHighlight(premulState);
     _actionSigned->setHighlight(signedState);
     _actionChecker->setHighlight(checkerboardState);
-    
+
     _actionSrgb->setHighlight(srgbState);
     _actionPerf->setHighlight(perfState);
 }
@@ -1960,42 +1952,42 @@ void Data::updateUIControlState()
 const Action* Data::actionFromMenu(kram_id menuItem) const
 {
     const Action* action = nullptr;
-    
-    for (const auto& search: _actions) {
+
+    for (const auto& search : _actions) {
         if (search.menuItem == menuItem) {
             action = &search;
             break;
         }
     }
-    
+
     return action;
 }
 
 const Action* Data::actionFromButton(kram_id button) const
 {
     const Action* action = nullptr;
-    
-    for (const auto& search: _actions) {
+
+    for (const auto& search : _actions) {
         if (search.button == button) {
             action = &search;
             break;
         }
     }
-    
+
     return action;
 }
 
 const Action* Data::actionFromKey(uint32_t keyCode) const
 {
     const Action* action = nullptr;
-    
-    for (const auto& search: _actions) {
+
+    for (const auto& search : _actions) {
         if (search.keyCode == keyCode) {
             action = &search;
             break;
         }
     }
-    
+
     return action;
 }
 
@@ -2030,7 +2022,7 @@ void Data::setFailedText(const string& filename, string& text)
 
     // This doesn't advance with failure
     //string filename = _showSettings->lastFilename;
-    
+
     text += toFilenameShort(filename.c_str());
 
     // archives and file systems have folders, split that off
@@ -2049,7 +2041,6 @@ void Data::setFailedText(const string& filename, string& text)
         text += " from archive ";
         text += _archiveName;
     }
-
 }
 
 void Data::initActions()
@@ -2066,14 +2057,14 @@ void Data::initActions()
         Action("D", "Debug", Key::D),
         Action("G", "Grid", Key::G),
         Action("B", "Checkerboard", Key::B),
-        
+
         Action("", "", Key::A), // sep
 
         Action("P", "Preview", Key::P),
         Action("W", "Wrap", Key::W),
         Action("8", "Premul", Key::Num8),
         Action("7", "Signed", Key::Num7),
-        
+
         Action("", "", Key::A), // sep
 
         Action("A", "Show All", Key::A),
@@ -2082,12 +2073,12 @@ void Data::initActions()
         Action("Y", "Array", Key::Y),
         Action("9", "Srgb", Key::Num9),
         Action("5", "Perf", Key::Num5), // really a debug action
-        
+
         Action("↑", "Prev Item", Key::UpArrow),
         Action("↓", "Next Item", Key::DownArrow),
         Action("←", "Prev Counterpart", Key::LeftArrow),
         Action("→", "Next Counterpart", Key::RightArrow),
-        
+
         Action("R", "Reload", Key::R),
         Action("0", "Fit", Key::Num0),
 
@@ -2108,7 +2099,7 @@ void Data::initActions()
         Action("3", "Blue", Key::Num3),
         Action("4", "Alpha", Key::Num4),
     };
-    
+
     // These have to be in same order as above.  May want to go back to search for text above.
     Action** actionPtrs[] = {
         &_actionHelp,
@@ -2116,39 +2107,39 @@ void Data::initActions()
         &_actionHud,
         &_actionHideUI,
         &_actionVertical,
-       
+
         &_actionDiff,
         &_actionDebug,
         &_actionGrid,
         &_actionChecker,
-        
+
         &_actionPreview,
         &_actionWrap,
         &_actionPremul,
         &_actionSigned,
-        
+
         &_actionShowAll,
         &_actionMip,
         &_actionFace,
         &_actionArray,
         &_actionSrgb,
         &_actionPerf,
-       
+
         &_actionPrevItem,
         &_actionItem,
         &_actionPrevCounterpart,
         &_actionCounterpart,
-        
+
         &_actionReload,
         &_actionFit,
-        
+
         &_actionPlay,
         &_actionShapeUVPreview,
         &_actionShapeMesh,
         &_actionShapeChannel,
         &_actionLighting,
         &_actionTangent,
-        
+
         &_actionR,
         &_actionG,
         &_actionB,
@@ -2156,7 +2147,7 @@ void Data::initActions()
     };
 
     uint32_t numActions = ArrayCount(actions);
-    
+
     // copy all of them to a vector, and then assign the action ptrs
     for (int32_t i = 0; i < numActions; ++i) {
         Action& action = actions[i];
@@ -2168,10 +2159,10 @@ void Data::initActions()
     for (int32_t i = 0; i < _actions.size(); ++i) {
         // skip separators
         Action& action = _actions[i];
-        const char* icon = action.icon;  // single char
+        const char* icon = action.icon; // single char
         bool isSeparator = icon[0] == 0;
         if (isSeparator) continue;
-        
+
         *(actionPtrs[counter++]) = &_actions[i];
     }
     KASSERT(counter == ArrayCount(actionPtrs));
@@ -2208,7 +2199,7 @@ void Data::updateEyedropper()
         _showSettings->lastCursorY == _showSettings->cursorY) {
         return;
     }
-    
+
     if (_showSettings->isEyedropperFromDrawable()) {
         _showSettings->lastCursorX = _showSettings->cursorX;
         _showSettings->lastCursorY = _showSettings->cursorY;
@@ -2221,8 +2212,8 @@ void Data::updateEyedropper()
     // don't wait on renderer to update this matrix
     float4x4 projectionViewModelMatrix =
         computeImageTransform(_showSettings->panX,
-                                   _showSettings->panY,
-                                   _showSettings->zoom);
+                              _showSettings->panY,
+                              _showSettings->zoom);
 
     // convert to clip space, or else need to apply additional viewport transform
     float halfX = _showSettings->viewSizeX * 0.5f;
@@ -2235,37 +2226,37 @@ void Data::updateEyedropper()
     halfY /= (float)_showSettings->viewContentScaleFactor;
 
     float4 cursor = float4m(_showSettings->cursorX, _showSettings->cursorY, 0.0f, 1.0f);
-    
+
     float4x4 pixelToClipTfm =
-    {
-        (float4){ halfX,      0, 0, 0 },
-        (float4){ 0,     -halfY, 0, 0 },
-        (float4){ 0,          0, 1, 0 },
-        (float4){ halfX,  halfY, 0, 1 },
-    };
+        {
+            (float4){halfX, 0, 0, 0},
+            (float4){0, -halfY, 0, 0},
+            (float4){0, 0, 1, 0},
+            (float4){halfX, halfY, 0, 1},
+        };
     pixelToClipTfm = inverse(pixelToClipTfm);
-    
+
     cursor = pixelToClipTfm * cursor;
-    
+
     //float4 clipPoint;
     //clipPoint.x = (point.x - halfX) / halfX;
     //clipPoint.y = -(point.y - halfY) / halfY;
 
     // convert point in window to point in texture
     float4x4 mInv = inverse(projectionViewModelMatrix);
-    
+
     float4 pixel = mInv * float4m(cursor.x, cursor.y, 1.0f, 1.0f);
     pixel.xyz /= pixel.w; // in case perspective used
 
     float ar = _showSettings->imageAspectRatio();
-    
+
     // that's in model space (+/0.5f * ar, +/0.5f), so convert to texture space
     pixel.x = (pixel.x / ar + 0.5f);
     pixel.y = (-pixel.y + 0.5f);
 
     //pixel.x *= 0.999f;
     //pixel.y *= 0.999f;
-    
+
     float2 uv = pixel.xy;
 
     // pixels are 0 based
@@ -2287,33 +2278,33 @@ void Data::updateEyedropper()
     bool outsideImageBounds =
         pixel.x < 0.0f || pixel.x >= (float)_showSettings->imageBoundsX ||
         pixel.y < 0.0f || pixel.y >= (float)_showSettings->imageBoundsY;
-    
+
     // only display pixel if over image
     if (outsideImageBounds) {
         sprintf(text, "canvas: %d %d\n", (int32_t)pixel.x, (int32_t)pixel.y);
-        setEyedropperText(text.c_str());  // ick
+        setEyedropperText(text.c_str()); // ick
         _showSettings->outsideImageBounds = true;
     }
     else {
         // Note: fromView: nil returns isFlipped coordinate, fromView:self flips it
         // back.
-        
+
         int32_t newX = (int32_t)pixel.x;
         int32_t newY = (int32_t)pixel.y;
-        
+
         if (_showSettings->outsideImageBounds ||
             (_showSettings->textureLookupX != newX ||
              _showSettings->textureLookupY != newY)) {
             // Note: this only samples from the original texture via compute shaders
             // so preview mode pixel colors are not conveyed.  But can see underlying
             // data driving preview.
-            
+
             _showSettings->outsideImageBounds = false;
-            
+
             // %.0f rounds the value, but want truncation
             _showSettings->textureLookupX = newX;
             _showSettings->textureLookupY = newY;
-            
+
             // show block num
             int mipLOD = _showSettings->mipNumber;
 
@@ -2332,27 +2323,26 @@ void Data::updateEyedropper()
             // Has to be set in other call, not here
             _showSettings->textureLookupMipX = mipX;
             _showSettings->textureLookupMipY = mipY;
-            
+
             // showEyedropperData(uv);
         }
     }
 }
 
-
 bool Data::handleEventAction(const Action* action, bool isShiftKeyDown, ActionState& actionState)
 {
     // Some data depends on the texture data (isSigned, isNormal, ..)
     bool isChanged = false;
     bool isStateChanged = false;
-    
+
     // TODO: fix isChanged to only be set when value changes
     // f.e. clamped values don't need to re-render
     string text;
-    
+
     if (action == _actionVertical) {
         _showSettings->isVerticalUI = !_showSettings->isVerticalUI;
         text = _showSettings->isVerticalUI ? "Vert UI" : "Horiz UI";
-        
+
         // just to update toggle state to Off
         isStateChanged = true;
     }
@@ -2361,18 +2351,18 @@ bool Data::handleEventAction(const Action* action, bool isShiftKeyDown, ActionSt
         if (_noImageLoaded) {
             return true;
         }
-        
+
         _showSettings->isHideUI = !_showSettings->isHideUI;
         text = _showSettings->isHideUI ? "Hide UI" : "Show UI";
-        
+
         // just to update toggle state to Off
         isStateChanged = true;
     }
-    
+
     else if (action == _actionR) {
         if (!action->isHidden) {
             TextureChannels& channels = _showSettings->channels;
-            
+
             if (channels == TextureChannels::ModeR001) {
                 channels = TextureChannels::ModeRGBA;
                 text = "Mask RGBA";
@@ -2383,12 +2373,11 @@ bool Data::handleEventAction(const Action* action, bool isShiftKeyDown, ActionSt
             }
             isChanged = true;
         }
-        
     }
     else if (action == _actionG) {
         if (!action->isHidden) {
             TextureChannels& channels = _showSettings->channels;
-            
+
             if (channels == TextureChannels::Mode0G01) {
                 channels = TextureChannels::ModeRGBA;
                 text = "Mask RGBA";
@@ -2403,7 +2392,7 @@ bool Data::handleEventAction(const Action* action, bool isShiftKeyDown, ActionSt
     else if (action == _actionB) {
         if (!action->isHidden) {
             TextureChannels& channels = _showSettings->channels;
-            
+
             if (channels == TextureChannels::Mode00B1) {
                 channels = TextureChannels::ModeRGBA;
                 text = "Mask RGBA";
@@ -2412,14 +2401,14 @@ bool Data::handleEventAction(const Action* action, bool isShiftKeyDown, ActionSt
                 channels = TextureChannels::Mode00B1;
                 text = "Mask 00B1";
             }
-            
+
             isChanged = true;
         }
     }
     else if (action == _actionA) {
         if (!action->isHidden) {
             TextureChannels& channels = _showSettings->channels;
-            
+
             if (channels == TextureChannels::ModeAAA1) {
                 channels = TextureChannels::ModeRGBA;
                 text = "Mask RGBA";
@@ -2428,60 +2417,57 @@ bool Data::handleEventAction(const Action* action, bool isShiftKeyDown, ActionSt
                 channels = TextureChannels::ModeAAA1;
                 text = "Mask AAA1";
             }
-            
+
             isChanged = true;
         }
-        
     }
     else if (action == _actionPerf) {
         Perf* perf = Perf::instance();
-        
+
         bool isCompressed = true;
         if ((!_showSettings->isPerf) && perf->start("kramv", isCompressed)) {
             _showSettings->isPerf = true;
         }
         else {
             _showSettings->isPerf = false;
-            
+
             if (perf->isRunning()) {
                 perf->stop();
-                
+
                 // TODO: Only open in non-sandboxed builds, it calls system("open file")
                 // and this will have quarantine flag set if app not in app store
                 // or notarized, signed, sandboxed for distribution outside of app store
                 perf->openPerftrace();
             }
         }
-        
+
         text = "Perf ";
         text += _showSettings->isPerf ? "On" : "Off";
         isChanged = true;
     }
     else if (action == _actionPlay) {
         if (!action->isHidden) {
-            
-            _showSettings->isPlayAnimations = ! _showSettings->isPlayAnimations;
-            
+            _showSettings->isPlayAnimations = !_showSettings->isPlayAnimations;
+
             //Renderer* renderer = (Renderer*)self.delegate;
             //renderer.playAnimations = !renderer.playAnimations;
-            
+
             text = _showSettings->isPlayAnimations ? "Play" : "Pause";
             isChanged = true;
         }
     }
     else if (action == _actionShapeUVPreview) {
-        
         // toggle state
         _showSettings->isUVPreview = !_showSettings->isUVPreview;
         text = _showSettings->isUVPreview ? "Show UVPreview" : "Hide UvPreview";
         isChanged = true;
-        
+
         _showSettings->uvPreviewFrames = 10;
     }
-    
+
     else if (action == _actionShapeChannel) {
         _showSettings->advanceShapeChannel(isShiftKeyDown);
-        
+
         text = _showSettings->shapeChannelText();
         isChanged = true;
     }
@@ -2506,18 +2492,18 @@ bool Data::handleEventAction(const Action* action, bool isShiftKeyDown, ActionSt
     else if (action == _actionHelp) {
         // display the chars for now
         text =
-        "1234-rgba, Preview, Debug, A-show all\n"
-        "Info, Hud, Reload, 0-fit\n"
-        "Checker, Grid\n"
-        "Wrap, 8-signed, 9-premul\n"
-        "Mip, Face, Y-array\n"
-        "↓-next item, →-next counterpart\n"
-        "Lighting, S-shape, C-shape channel\n";
-        
+            "1234-rgba, Preview, Debug, A-show all\n"
+            "Info, Hud, Reload, 0-fit\n"
+            "Checker, Grid\n"
+            "Wrap, 8-signed, 9-premul\n"
+            "Mip, Face, Y-array\n"
+            "↓-next item, →-next counterpart\n"
+            "Lighting, S-shape, C-shape channel\n";
+
         // just to update toggle state to Off
         isStateChanged = true;
     }
-    
+
     else if (action == _actionFit) {
         float zoom;
         // fit image or mip
@@ -2528,54 +2514,54 @@ bool Data::handleEventAction(const Action* action, bool isShiftKeyDown, ActionSt
             // fit to topmost image
             zoom = _showSettings->zoomFit;
         }
-        
+
         // This zoom needs to be checked against zoom limits
         // there's a cap on the zoom multiplier.
         // This is reducing zoom which expands the image.
         zoom *= 1.0f / (1 << _showSettings->mipNumber);
-        
+
         // even if zoom same, still do this since it resets the pan
         _showSettings->zoom = zoom;
-        
+
         _showSettings->panX = 0.0f;
         _showSettings->panY = 0.0f;
-        
+
         text = "Scale Image\n";
-//        if (doPrintPanZoom) {
-//            string tmp;
-//            sprintf(tmp,
-//                    "Pan %.3f,%.3f\n"
-//                    "Zoom %.2fx\n",
-//                    _showSettings->panX, _showSettings->panY, _showSettings->zoom);
-//            text += tmp;
-//        }
-        
+        // if (doPrintPanZoom) {
+        //     string tmp;
+        //     sprintf(tmp,
+        //             "Pan %.3f,%.3f\n"
+        //             "Zoom %.2fx\n",
+        //             _showSettings->panX, _showSettings->panY, _showSettings->zoom);
+        //     text += tmp;
+        // }
+
         isChanged = true;
     }
     // reload key (also a quick way to reset the settings)
     else if (action == _actionReload) {
         //bool success =
         _delegate.loadFile();
-        
+
         // reload at actual size
         if (isShiftKeyDown) {
             _showSettings->zoom = 1.0f;
         }
-        
+
         // Name change if image
         if (_showSettings->isModel)
             text = "Reload Model\n";
         else
             text = "Reload Image\n";
-//        if (doPrintPanZoom) {
-//            string tmp;
-//            sprintf(tmp,
-//                    "Pan %.3f,%.3f\n"
-//                    "Zoom %.2fx\n",
-//                    _showSettings->panX, _showSettings->panY, _showSettings->zoom);
-//            text += tmp;
-//        }
-        
+        // if (doPrintPanZoom) {
+        //     string tmp;
+        //     sprintf(tmp,
+        //             "Pan %.3f,%.3f\n"
+        //             "Zoom %.2fx\n",
+        //             _showSettings->panX, _showSettings->panY, _showSettings->zoom);
+        //     text += tmp;
+        // }
+
         isChanged = true;
     }
     else if (action == _actionPreview) {
@@ -2592,7 +2578,7 @@ bool Data::handleEventAction(const Action* action, bool isShiftKeyDown, ActionSt
     }
     // TODO: might switch c to channel cycle, so could just hit that
     // and depending on the content, it cycles through reasonable channel masks
-    
+
     // toggle checkerboard for transparency
     else if (action == _actionChecker) {
         if (!action->isHidden) {
@@ -2602,75 +2588,75 @@ bool Data::handleEventAction(const Action* action, bool isShiftKeyDown, ActionSt
             text += _showSettings->isCheckerboardShown ? "On" : "Off";
         }
     }
-    
+
     else if (action == _actionSrgb) {
         if (!action->isHidden) {
             _showSettings->isSRGBShown = !_showSettings->isSRGBShown;
-            
+
             sprintf(text, "Format srgb %s", _showSettings->isSRGBShown ? "On" : "Off");
-            
+
             isChanged = true;
         }
     }
-    
+
     // toggle pixel grid when magnified above 1 pixel, can happen from mipmap
     // changes too
     else if (action == _actionGrid) {
         static int grid = 0;
         static const int kNumGrids = 7;
-        
+
 #define advanceGrid(g, dec) \
-grid = (grid + kNumGrids + (dec ? -1 : 1)) % kNumGrids
-        
+    grid = (grid + kNumGrids + (dec ? -1 : 1)) % kNumGrids
+
         // if block size is 1, then this shouldn't toggle
         _showSettings->isBlockGridShown = false;
         _showSettings->isAtlasGridShown = false;
         _showSettings->isPixelGridShown = false;
-        
+
         advanceGrid(grid, isShiftKeyDown);
-        
+
         static const uint32_t gridSizes[kNumGrids] = {
-            0, 1, 4, 32, 64, 128, 256  // grid sizes
+            0, 1, 4, 32, 64, 128, 256 // grid sizes
         };
-        
+
         if (grid == 0) {
             sprintf(text, "Grid Off");
         }
         else if (grid == 1) {
             _showSettings->isPixelGridShown = true;
-            
+
             sprintf(text, "Pixel Grid 1x1");
         }
         else if (grid == 2 && _showSettings->blockX > 1) {
             _showSettings->isBlockGridShown = true;
-            
+
             sprintf(text, "Block Grid %dx%d", _showSettings->blockX,
                     _showSettings->blockY);
         }
         else {
             _showSettings->isAtlasGridShown = true;
-            
+
             // want to be able to show altases tht have long entries derived from
             // props but right now just a square grid atlas
             _showSettings->gridSizeX = _showSettings->gridSizeY = gridSizes[grid];
-            
+
             sprintf(text, "Atlas Grid %dx%d", _showSettings->gridSizeX,
                     _showSettings->gridSizeY);
         }
-        
+
         isChanged = true;
     }
     else if (action == _actionShowAll) {
         if (!action->isHidden) {
             // TODO: have drawAllMips, drawAllLevels, drawAllLevelsAndMips
             _showSettings->isShowingAllLevelsAndMips =
-            !_showSettings->isShowingAllLevelsAndMips;
+                !_showSettings->isShowingAllLevelsAndMips;
             isChanged = true;
             text = "Show All ";
             text += _showSettings->isShowingAllLevelsAndMips ? "On" : "Off";
         }
     }
-    
+
     // toggle hud that shows name and pixel value under the cursor
     // this may require calling setNeedsDisplay on the UILabel as cursor moves
     else if (action == _actionHud) {
@@ -2681,23 +2667,22 @@ grid = (grid + kNumGrids + (dec ? -1 : 1)) % kNumGrids
         text += _showSettings->isHudShown ? "On" : "Off";
         isStateChanged = true;
     }
-    
+
     // info on the texture, could request info from lib, but would want to cache
     // that info
     else if (action == _actionInfo) {
         if (_showSettings->isHudShown) {
-            
             // also hide the file table, since this can be long
             //[self hideFileTable];
-            
+
             sprintf(text, "%s",
                     isShiftKeyDown ? _showSettings->imageInfoVerbose.c_str()
-                    : _showSettings->imageInfo.c_str());
+                                   : _showSettings->imageInfo.c_str());
         }
         // just to update toggle state to Off
         isStateChanged = true;
     }
-    
+
     // toggle wrap/clamp
     else if (action == _actionWrap) {
         // TODO: cycle through all possible modes (clamp, repeat, mirror-once,
@@ -2707,7 +2692,7 @@ grid = (grid + kNumGrids + (dec ? -1 : 1)) % kNumGrids
         text = "Wrap ";
         text += _showSettings->isWrap ? "On" : "Off";
     }
-    
+
     // toggle signed vs. unsigned
     else if (action == _actionSigned) {
         if (!action->isHidden) {
@@ -2717,7 +2702,7 @@ grid = (grid + kNumGrids + (dec ? -1 : 1)) % kNumGrids
             text += _showSettings->isSigned ? "On" : "Off";
         }
     }
-    
+
     // toggle premul alpha vs. unmul
     else if (action == _actionPremul) {
         if (!action->isHidden) {
@@ -2727,26 +2712,26 @@ grid = (grid + kNumGrids + (dec ? -1 : 1)) % kNumGrids
             text += _showSettings->doShaderPremul ? "On" : "Off";
         }
     }
-    
+
     else if (action == _actionItem || action == _actionPrevItem) {
         if (!action->isHidden) {
             // invert shift key for prev, since it's reverse
             if (action == _actionPrevItem) {
                 isShiftKeyDown = !isShiftKeyDown;
             }
-            
+
             if (advanceFile(!isShiftKeyDown)) {
                 //_hudHidden = true;
                 //[self updateHudVisibility];
                 //[self setEyedropperText:""];
-                
+
                 isChanged = true;
-            
+
                 setLoadedText(text);
             }
         }
     }
-    
+
     else if (action == _actionCounterpart || action == _actionPrevCounterpart) {
         if (!action->isHidden) {
             // invert shift key for prev, since it's reverse
@@ -2757,14 +2742,14 @@ grid = (grid + kNumGrids + (dec ? -1 : 1)) % kNumGrids
                 //_hudHidden = true;
                 //[self updateHudVisibility];
                 //[self setEyedropperText:""];
-                
+
                 isChanged = true;
-                
+
                 setLoadedText(text);
             }
         }
     }
-    
+
     // test out different shapes
     else if (action == _actionShapeMesh) {
         if (_showSettings->meshCount > 1) {
@@ -2773,9 +2758,9 @@ grid = (grid + kNumGrids + (dec ? -1 : 1)) % kNumGrids
             isChanged = true;
         }
     }
-    
+
     // TODO: should probably have these wrap and not clamp to count limits
-    
+
     // mip up/down
     else if (action == _actionMip) {
         if (_showSettings->mipCount > 1) {
@@ -2784,14 +2769,14 @@ grid = (grid + kNumGrids + (dec ? -1 : 1)) % kNumGrids
             }
             else {
                 _showSettings->mipNumber =
-                std::min(_showSettings->mipNumber + 1, _showSettings->mipCount - 1);
+                    std::min(_showSettings->mipNumber + 1, _showSettings->mipCount - 1);
             }
             sprintf(text, "Mip %d/%d", _showSettings->mipNumber,
                     _showSettings->mipCount);
             isChanged = true;
         }
     }
-    
+
     else if (action == _actionFace) {
         // cube or cube array, but hit s to pick cubearray
         if (_showSettings->faceCount > 1) {
@@ -2800,14 +2785,14 @@ grid = (grid + kNumGrids + (dec ? -1 : 1)) % kNumGrids
             }
             else {
                 _showSettings->faceNumber =
-                std::min(_showSettings->faceNumber + 1, _showSettings->faceCount - 1);
+                    std::min(_showSettings->faceNumber + 1, _showSettings->faceCount - 1);
             }
             sprintf(text, "Face %d/%d", _showSettings->faceNumber,
                     _showSettings->faceCount);
             isChanged = true;
         }
     }
-    
+
     else if (action == _actionArray) {
         // slice
         if (_showSettings->sliceCount > 1) {
@@ -2816,7 +2801,7 @@ grid = (grid + kNumGrids + (dec ? -1 : 1)) % kNumGrids
             }
             else {
                 _showSettings->sliceNumber =
-                std::min(_showSettings->sliceNumber + 1, _showSettings->sliceCount - 1);
+                    std::min(_showSettings->sliceNumber + 1, _showSettings->sliceCount - 1);
             }
             sprintf(text, "Slice %d/%d", _showSettings->sliceNumber,
                     _showSettings->sliceCount);
@@ -2829,7 +2814,7 @@ grid = (grid + kNumGrids + (dec ? -1 : 1)) % kNumGrids
             }
             else {
                 _showSettings->arrayNumber =
-                std::min(_showSettings->arrayNumber + 1, _showSettings->arrayCount - 1);
+                    std::min(_showSettings->arrayNumber + 1, _showSettings->arrayCount - 1);
             }
             sprintf(text, "Array %d/%d", _showSettings->arrayNumber,
                     _showSettings->arrayCount);
@@ -2840,11 +2825,11 @@ grid = (grid + kNumGrids + (dec ? -1 : 1)) % kNumGrids
         // non-handled action
         return false;
     }
-    
+
     actionState.hudText = text;
     actionState.isChanged = isChanged;
     actionState.isStateChanged = isStateChanged;
-    
+
     return true;
 }
 
@@ -2860,7 +2845,7 @@ void Data::updateImageSettings(const string& fullFilename, KTXImage& image, MyMT
     _showSettings->blockY = image.blockDims().y;
 
     _showSettings->isSigned = isSignedFormat(format);
-    
+
     TexContentType texContentType = findContentTypeFromFilename(fullFilename.c_str());
     _showSettings->texContentType = texContentType;
     //_showSettings->isSDF = isSDF;
@@ -2873,7 +2858,7 @@ void Data::updateImageSettings(const string& fullFilename, KTXImage& image, MyMT
     _showSettings->doShaderPremul = false;
     if (texContentType == TexContentTypeAlbedo && isPNG) {
         _showSettings->doShaderPremul =
-            true;  // convert to premul in shader, so can see other channels
+            true; // convert to premul in shader, so can see other channels
     }
 
     int32_t numChannels = numChannelsOfFormat(originalFormat);
@@ -2905,9 +2890,6 @@ void Data::updateImageSettings(const string& fullFilename, KTXImage& image, MyMT
     _showSettings->imageBoundsY = (int32_t)image.height;
 }
 
-
-
-
 float zoom3D = 1.0f;
 
 void Data::updateProjTransform()
@@ -2915,7 +2897,7 @@ void Data::updateProjTransform()
     // Want to move to always using perspective even for 2d images, but still more math
     // to work out to keep zoom to cursor working.
 #if USE_PERSPECTIVE
-    float aspect = _showSettings->viewSizeX /  (float)_showSettings->viewSizeY;
+    float aspect = _showSettings->viewSizeX / (float)_showSettings->viewSizeY;
     _projectionMatrix = perspective_rhcs(90.0f * (M_PI / 180.0f), aspect, 0.1f);
 
     // This was used to reset zoom to a baseline that had a nice zoom.  But little connected to it now.
@@ -2927,17 +2909,17 @@ void Data::updateProjTransform()
 #else
 
     if (_showSettings->isModel) {
-        float aspect = _showSettings->viewSizeX /  (float)_showSettings->viewSizeY;
+        float aspect = _showSettings->viewSizeX / (float)_showSettings->viewSizeY;
         _projectionMatrix = perspective_rhcs(90.0f * (M_PI / 180.0f), aspect, 0.1f);
 
         _showSettings->zoomFit = 1;
     }
     else {
         // ltrb
-        float2 rectDims = 0.5f * float2m(_showSettings->viewSizeX,_showSettings->viewSizeY);
-        float4 rect = float4m(-rectDims.x,  rectDims.y,
-                               rectDims.x, -rectDims.y);
-        
+        float2 rectDims = 0.5f * float2m(_showSettings->viewSizeX, _showSettings->viewSizeY);
+        float4 rect = float4m(-rectDims.x, rectDims.y,
+                              rectDims.x, -rectDims.y);
+
         _projectionMatrix =
             orthographic_rhcs(rect, 0.1f, 1e6f);
 
@@ -2946,7 +2928,7 @@ void Data::updateProjTransform()
             std::min((float)_showSettings->viewSizeX, (float)_showSettings->viewSizeY) /
             std::max(1.0f, std::max((float)_showSettings->imageBoundsX,
                                     (float)_showSettings->imageBoundsY));
-        
+
         static bool useImageAndViewBounds = true;
         if (useImageAndViewBounds) {
             float invWidth = 1.0f / std::max(1.0f, (float)_showSettings->imageBoundsX);
@@ -2955,8 +2937,8 @@ void Data::updateProjTransform()
             // DONE: adjust zoom to fit the entire image to the window
             // the best fit depends on dimension of image and window
             _showSettings->zoomFit =
-                std::min( (float)_showSettings->viewSizeX * invWidth,
-                          (float)_showSettings->viewSizeY * invHeight);
+                std::min((float)_showSettings->viewSizeX * invWidth,
+                         (float)_showSettings->viewSizeY * invHeight);
         }
     }
 #endif
@@ -2971,53 +2953,52 @@ void Data::resetSomeImageSettings(bool isNewFile)
         _showSettings->faceNumber = 0;
         _showSettings->arrayNumber = 0;
         _showSettings->sliceNumber = 0;
-        
+
         _showSettings->channels = TextureChannels::ModeRGBA;
-        
+
         // wish could keep existing setting, but new texture might not
         // be supported debugMode for new texture
         _showSettings->debugMode = DebugMode::DebugModeNone;
-        
+
         _showSettings->shapeChannel = ShapeChannel::ShapeChannelNone;
     }
     else {
         // reloaded file may have different limits
         _showSettings->mipNumber =
-        std::min(_showSettings->mipNumber, _showSettings->mipCount);
+            std::min(_showSettings->mipNumber, _showSettings->mipCount);
         _showSettings->faceNumber =
-        std::min(_showSettings->faceNumber, _showSettings->faceCount);
+            std::min(_showSettings->faceNumber, _showSettings->faceCount);
         _showSettings->arrayNumber =
-        std::min(_showSettings->arrayNumber, _showSettings->arrayCount);
+            std::min(_showSettings->arrayNumber, _showSettings->arrayCount);
         _showSettings->sliceNumber =
-        std::min(_showSettings->sliceNumber, _showSettings->sliceCount);
+            std::min(_showSettings->sliceNumber, _showSettings->sliceCount);
     }
-    
+
     updateProjTransform();
-    
-    
+
     // this controls viewMatrix (global to all visible textures)
     _showSettings->panX = 0.0f;
     _showSettings->panY = 0.0f;
-    
+
     _showSettings->zoom = _showSettings->zoomFit;
-    
+
     // Y is always 1.0 on the plane, so scale to imageBoundsY
     // plane is already a non-uniform size, so can keep uniform scale
-    
+
     // have one of these for each texture added to the viewer
     //float scaleX = MAX(1, _showSettings->imageBoundsX);
     float scaleY = std::max(1, _showSettings->imageBoundsY);
     float scaleX = scaleY;
     float scaleZ = scaleY;
-    
+
     _modelMatrix2D =
-    float4x4(float4m(scaleX, scaleY, scaleZ, 1.0f)); // uniform scale
+        float4x4(float4m(scaleX, scaleY, scaleZ, 1.0f)); // uniform scale
     _modelMatrix2D = _modelMatrix2D *
-    translation(float3m(0.0f, 0.0f, -1.0));  // set z=-1 unit back
-    
+                     translation(float3m(0.0f, 0.0f, -1.0)); // set z=-1 unit back
+
     // uniform scaled 3d primitive
     float scale = scaleY; // MAX(scaleX, scaleY);
-    
+
     // store the zoom into thew view matrix
     // fragment tangents seem to break down at high model scale due to precision
     // differences between worldPos and uv
@@ -3026,50 +3007,50 @@ void Data::resetSomeImageSettings(bool isNewFile)
     //        zoom3D = scale;  // * _showSettings->viewSizeX / 2.0f;
     //        scale = 1.0;
     //    }
-    
-    _modelMatrix3D = float4x4(float4m(scale, scale, scale, 1.0f));  // uniform scale
+
+    _modelMatrix3D = float4x4(float4m(scale, scale, scale, 1.0f)); // uniform scale
     _modelMatrix3D =
-    _modelMatrix3D *
-    translation(float3m(0.0f, 0.0f, -1.0f));  // set z=-1 unit back
+        _modelMatrix3D *
+        translation(float3m(0.0f, 0.0f, -1.0f)); // set z=-1 unit back
 }
 
 void Data::updateTransforms()
 {
     // scale
     float zoom = _showSettings->zoom;
-    
+
     // translate
     float4x4 panTransform =
         translation(float3m(-_showSettings->panX, _showSettings->panY, 0.0));
 
     if (_showSettings->is3DView) {
-        _viewMatrix3D = float4x4(float4m(zoom, zoom, 1.0f, 1.0f));  // non-uniform
+        _viewMatrix3D = float4x4(float4m(zoom, zoom, 1.0f, 1.0f)); // non-uniform
         _viewMatrix3D = panTransform * _viewMatrix3D;
-        
+
         _viewMatrix = _viewMatrix3D;
-        
+
         // obj specific
         _modelMatrix = _modelMatrix3D;
     }
     else {
         _viewMatrix2D = float4x4(float4m(zoom, zoom, 1.0f, 1.0f));
         _viewMatrix2D = panTransform * _viewMatrix2D;
-        
+
         _viewMatrix = _viewMatrix2D;
-        
+
         // obj specific
         _modelMatrix = _modelMatrix2D;
     }
-    
+
     // viewMatrix should typically be the inverse
     //_viewMatrix = simd_inverse(_viewMatrix);
-    
+
     _projectionViewMatrix = _projectionMatrix * _viewMatrix;
-    
+
     // cache the camera position
     _cameraPosition =
-        inverse(_viewMatrix).columns[3].xyz;  // this is all ortho
-    
+        inverse(_viewMatrix).columns[3].xyz; // this is all ortho
+
     // obj specific
     _modelMatrixInvScale2 = inverseScaleSquared(_modelMatrix);
     _showSettings->isInverted = _modelMatrixInvScale2.w < 0.0f;
@@ -3099,45 +3080,44 @@ float4x4 Data::computeImageTransform(float panX, float panY, float zoom)
     }
 }
 
-
 void Data::doZoomMath(float newZoom, float2& newPan)
 {
     // transform the cursor to texture coordinate, or clamped version if outside
     float4x4 projectionViewModelMatrix = computeImageTransform(
-                                    _showSettings->panX,
-                                    _showSettings->panY,
-                                    _showSettings->zoom);
+        _showSettings->panX,
+        _showSettings->panY,
+        _showSettings->zoom);
 
     // convert from pixel to clip space
     float halfX = _showSettings->viewSizeX * 0.5f;
     float halfY = _showSettings->viewSizeY * 0.5f;
-    
+
     // sometimes get viewSizeX that's scaled by retina, and other times not.
     // account for contentScaleFactor (viewSizeX is 2x bigger than cursorX on
     // retina display) now passing down drawableSize instead of view.bounds.size
     halfX /= (float)_showSettings->viewContentScaleFactor;
     halfY /= (float)_showSettings->viewContentScaleFactor;
-    
+
     float4x4 viewportMatrix =
-    {
-        (float4){ halfX,      0, 0, 0 },
-        (float4){ 0,     -halfY, 0, 0 },
-        (float4){ 0,          0, 1, 0 },
-        (float4){ halfX,  halfY, 0, 1 },
-    };
+        {
+            (float4){halfX, 0, 0, 0},
+            (float4){0, -halfY, 0, 0},
+            (float4){0, 0, 1, 0},
+            (float4){halfX, halfY, 0, 1},
+        };
     viewportMatrix = inverse(viewportMatrix);
-    
+
     float4 cursor = float4m(_showSettings->cursorX, _showSettings->cursorY, 0.0f, 1.0f);
-    
+
     cursor = viewportMatrix * cursor;
-    
+
     //NSPoint clipPoint;
     //clipPoint.x = (point.x - halfX) / halfX;
     //clipPoint.y = -(point.y - halfY) / halfY;
 
     // convert point in window to point in model space
     float4x4 mInv = inverse(projectionViewModelMatrix);
-    
+
     float4 pixel = mInv * float4m(cursor.x, cursor.y, 1.0f, 1.0f);
     pixel.xyz /= pixel.w; // in case perspective used
 
@@ -3162,10 +3142,10 @@ void Data::doZoomMath(float newZoom, float2& newPan)
     // normalized coords to pixel coords
     pixel.x *= _showSettings->imageBoundsX;
     pixel.y *= _showSettings->imageBoundsY;
-    
+
     // this fixes pinch-zoom on cube which are 6:1
     pixel.x /= ar;
-    
+
 #if USE_PERSPECTIVE
     // TODO: this doesn't work for perspective
     newPan.x = _showSettings->panX - (_showSettings->zoom - newZoom) * pixel.x;
@@ -3176,6 +3156,4 @@ void Data::doZoomMath(float newZoom, float2& newPan)
 #endif
 }
 
-
-
-}  // namespace kram
+} // namespace kram
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index 0d66d3a3..6af1f761 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -4,7 +4,7 @@
 
 #include <cstdint>
 
-#include "KramLib.h"  // for MyMTLPixelFormat
+#include "KramLib.h" // for MyMTLPixelFormat
 //#include <string>
 //#include <simd/simd.h>
 
@@ -53,13 +53,13 @@ enum ShapeChannel {
 
     ShapeChannelUV0,
 
-    ShapeChannelFaceNormal,  // gen from dfdx and dfdy
+    ShapeChannelFaceNormal, // gen from dfdx and dfdy
 
-    ShapeChannelNormal,  // vertex normal
+    ShapeChannelNormal, // vertex normal
     ShapeChannelTangent,
     ShapeChannelBitangent,
 
-    ShapeChannelMipLevel,  // can estimate mip chose off dfdx/dfdy, and pseudocolor
+    ShapeChannelMipLevel, // can estimate mip chose off dfdx/dfdy, and pseudocolor
 
     // don't need bump, since can already see it, but what if combined diffuse +
     // normal ShapeChannelBumpNormal,
@@ -68,22 +68,21 @@ enum ShapeChannel {
 };
 
 enum LightingMode {
-    LightingModeDiffuse = 0,   // amb + diffuse
-    LightingModeSpecular = 1,  // amb + diffuse + specular
-    LightingModeNone = 2,      // no lighting, just mips
-    
+    LightingModeDiffuse = 0, // amb + diffuse
+    LightingModeSpecular = 1, // amb + diffuse + specular
+    LightingModeNone = 2, // no lighting, just mips
+
     LightingModeCount,
 };
 
-struct Atlas
-{
+struct Atlas {
     string name;
-    float x,y,w,h;
-    float u,v; // padding - to both or just left or right?
+    float x, y, w, h;
+    float u, v; // padding - to both or just left or right?
     bool isVertical;
     uint32_t level;
-    
-    float4 rect() const { return float4m(x,y,w,h); }
+
+    float4 rect() const { return float4m(x, y, w, h); }
 };
 
 class ShowSettings {
@@ -112,15 +111,15 @@ class ShowSettings {
 
     // DONE: hook all these up to shader and view
     bool isHudShown = true;
-    
+
     bool isHideUI = false;
     bool isVerticalUI = true;
-    
+
     bool isPlayAnimations = false;
-    
+
     // Can get a dump of perf (mostly loading a decode/transcode perf)
     bool isPerf = false;
-    
+
     // transparency checkboard under the image
     bool isCheckerboardShown = false;
 
@@ -147,17 +146,17 @@ class ShowSettings {
     bool isSwizzleAGToRG = false;
     //bool isSDF = false;
     TexContentType texContentType = TexContentTypeUnknown;
-    
+
     // this mode shows the content with lighting or with bilinear/mips active
     bool isPreview = false;
 
     // Can collapse 3d to 2d and overlay the uv
     bool isUVPreview = false;
-    
+
     uint32_t uvPreviewFrames = 0;
     float uvPreviewStep = 1.0f / 10.0f;
     float uvPreview = 0.0f;
-    
+
     // the 2d view doesn't want to inset pixels for clamp, or point sampling is
     // thrown off expecially on small 4x4 textures
 #if USE_PERSPECTIVE
@@ -165,7 +164,7 @@ class ShowSettings {
 #else
     bool is3DView = false;
 #endif
-    
+
     // TODO: Might eliminate this, since mips are either built with or without
     // srgb and disabling with a MTLView caused many flags to have to be set on
     // MTLTexture
@@ -176,13 +175,13 @@ class ShowSettings {
 
     // image vs. gltf model
     bool isModel = false;
-    
+
     // if diff texture available, can show diff against source
     bool isDiff = false;
-    
+
     // currently loading the diff texture if found, this slows loads
     bool hasDiffTexture = false;
-    
+
     // can sample from drawable or from single source texture
     bool isEyedropperFromDrawable();
 
@@ -191,11 +190,11 @@ class ShowSettings {
 
     // this could be boundary of all visible images, so that pan doesn't go flying
     // off to nowhere
-    int32_t imageBoundsX = 0;  // px
-    int32_t imageBoundsY = 0;  // px
+    int32_t imageBoundsX = 0; // px
+    int32_t imageBoundsY = 0; // px
 
     bool outsideImageBounds = false;
-    
+
     // size of the block, used in block grid drawing
     int32_t blockX = 1;
     int32_t blockY = 1;
@@ -220,8 +219,8 @@ class ShowSettings {
     float4 textureResult;
 
     // size of the view and its contentScaleFactor
-    int32_t viewSizeX = 1;  // px
-    int32_t viewSizeY = 1;  // px
+    int32_t viewSizeX = 1; // px
+    int32_t viewSizeY = 1; // px
     float viewContentScaleFactor = 1.0f;
 
     // cursor is in view coordinates, but doesn't include contentScaleFactor
@@ -258,44 +257,46 @@ class ShowSettings {
     void advanceShapeChannel(bool decrement);
     void advanceLightingMode(bool decrement);
 
-    const char *meshNumberText() const;
-    const char *shapeChannelText() const;
-    const char *debugModeText() const;
-    const char *lightingModeText() const;
-    
-    const char *meshNumberName(uint32_t meshNumber) const;
-    
+    const char* meshNumberText() const;
+    const char* shapeChannelText() const;
+    const char* debugModeText() const;
+    const char* lightingModeText() const;
+
+    const char* meshNumberName(uint32_t meshNumber) const;
+
     void updateUVPreviewState();
-    
-    float imageAspectRatio() const {
+
+    float imageAspectRatio() const
+    {
         float ar = 1.0f;
         if (meshNumber == 0 && !isModel && imageBoundsY > 0)
             ar = imageBoundsX / (float)imageBoundsY;
         return ar;
     }
-    
-    bool isFileNew(const char* fullFilename) const {
+
+    bool isFileNew(const char* fullFilename) const
+    {
         return lastFilename != fullFilename;
     }
-    bool isFileChanged(const char* fullFilename, double timestamp) const {
+    bool isFileChanged(const char* fullFilename, double timestamp) const
+    {
         // Note that modstamp can change, but content data hash may be the same
         return isFileNew(fullFilename) || (timestamp != lastTimestamp);
     }
-    
+
     string lastFilename;
     double lastTimestamp = 0.0;
 
     int32_t meshNumber = 0;
     int32_t meshCount = 5;
-    
+
     const Atlas* lastAtlas = nullptr; // Might move to index
     vector<Atlas> atlas;
 };
 
-void printChannels(string &tmp, const string &label, float4 c,
+void printChannels(string& tmp, const string& label, float4 c,
                    int32_t numChannels, bool isFloat, bool isSigned);
 
-
 enum Key {
     A = 0x00,
     S = 0x01,
@@ -352,7 +353,7 @@ enum Key {
     RightArrow = 0x7C,
     DownArrow = 0x7D,
     UpArrow = 0x7E,
-    
+
     Space = 0x31,
     Escape = 0x35,
 };
@@ -364,7 +365,7 @@ class Action {
 public:
     Action(const char* icon_, const char* tip_, Key keyCode_)
         : icon(icon_), tip(tip_), keyCode(keyCode_) {}
-    
+
     const char* icon;
     const char* tip;
 
@@ -372,11 +373,11 @@ class Action {
     kram_id button; // NSButton*
     kram_id menuItem; // NSMenuItem*
     Key keyCode;
-    
+
     bool isHighlighted = false;
     bool isHidden = false;
     bool isButtonDisabled = false;
-    
+
     // This have platform impl
     void setHighlight(bool enable);
     void setHidden(bool enable);
@@ -387,46 +388,44 @@ class Action {
 struct FileContainer {
     // allow zip files to be dropped and opened, and can advance through bundle
     // content.
-    
+
     // TODO: Add FileHelper if acrhive file is networked, but would require
     // full load to memory.
-    
+
     ZipHelper zip;
     MmapHelper zipMmap;
 };
 
-struct ActionState
-{
+struct ActionState {
     string hudText;
     bool isChanged;
     bool isStateChanged;
 };
 
-enum TextSlot
-{
+enum TextSlot {
     kTextSlotHud,
     kTextSlotEyedropper,
     kTextSlotAtlas,
-    
+
     kTextSlotCount // not a slot
 };
 
 struct File {
 public:
     File(const char* name_, int32_t urlIndex_);
-    
+
     // Note: not sorting by urlIndex currently
-    bool operator <(const File& rhs) const
+    bool operator<(const File& rhs) const
     {
         // sort by shortname
         int compare = strcasecmp(nameShort.c_str(), rhs.nameShort.c_str());
-        if ( compare != 0 )
+        if (compare != 0)
             return compare < 0;
-        
+
         // if equal, then sort by longname
         return strcasecmp(name.c_str(), rhs.name.c_str()) < 0;
     }
-    
+
 public:
     string name;
     int32_t urlIndex;
@@ -434,14 +433,13 @@ struct File {
 };
 
 // This allows wrapping all the ObjC stuff
-struct DataDelegate
-{
+struct DataDelegate {
     bool loadFile(bool clear = false);
-    
+
     bool loadModelFile(const char* filename);
-   
+
     bool loadTextureFromImage(const char* fullFilename, double timestamp, KTXImage& image, KTXImage* imageNormal, KTXImage* imageDiff, bool isArchive);
-    
+
 public:
     kram_id view; // MyMTKView*
 };
@@ -449,11 +447,11 @@ struct DataDelegate
 struct Data {
     Data();
     ~Data();
-    
+
     void clearAtlas();
     bool loadAtlasFile(const char* filename);
     bool listFilesInArchive(int32_t urlIndex);
-    bool openArchive(const char * zipFilename, int32_t urlIndex);
+    bool openArchive(const char* zipFilename, int32_t urlIndex);
 
     bool hasCounterpart(bool increment);
     bool advanceCounterpart(bool increment);
@@ -465,7 +463,7 @@ struct Data {
     const Atlas* findAtlasAtUV(float2 uv);
     bool isArchive() const;
     bool loadFile();
-    
+
     bool handleEventAction(const Action* action, bool isShiftKeyDown, ActionState& actionState);
     void updateUIAfterLoad();
     void updateUIControlState();
@@ -476,7 +474,7 @@ struct Data {
 
     void setLoadedText(string& text);
     void setFailedText(const string& filename, string& text);
-    
+
     void initActions();
     vector<Action>& actions() { return _actions; }
     void initDisabledButtons();
@@ -490,9 +488,9 @@ struct Data {
 
     // See these to split off ObjC code
     DataDelegate _delegate;
-    
+
     void updateEyedropper();
-    
+
     float4x4 computeImageTransform(float panX, float panY, float zoom);
     void updateProjTransform();
     void resetSomeImageSettings(bool isNewFile);
@@ -501,22 +499,22 @@ struct Data {
     void doZoomMath(float newZoom, float2& newPan);
 
     void setPerfDirectory(const char* directory);
-    
+
 private:
     bool loadFileFromArchive();
 
 public:
     void showEyedropperData(const float2& uv);
-    void setEyedropperText(const char * text);
-    void setAtlasText(const char * text);
+    void setEyedropperText(const char* text);
+    void setAtlasText(const char* text);
     void updateTransforms();
-   
+
     //----------------
     float4x4 _projectionMatrix;
-    
+
     float4x4 _projectionViewMatrix;
     float3 _cameraPosition;
-    
+
     float4x4 _viewMatrix;
     float4x4 _viewMatrix2D;
     float4x4 _viewMatrix3D;
@@ -528,42 +526,42 @@ struct Data {
     float4x4 _modelMatrix3D;
 
     //----------------
-    
+
     vector<string> _textSlots;
     ShowSettings* _showSettings = nullptr;
 
     bool _noImageLoaded = true;
     string _archiveName; // archive or blank
-    
+
     // folders and archives and multi-drop files are filled into this
     vector<File> _files;
     int32_t _fileIndex = 0;
-   
+
     // One of these per url in _urlss
     vector<FileContainer*> _containers;
     vector<string> _urls;
-    
+
     Action* _actionPlay;
     Action* _actionShapeUVPreview;
     Action* _actionHelp;
     Action* _actionInfo;
     Action* _actionHud;
     Action* _actionShowAll;
-    
+
     Action* _actionPreview;
     Action* _actionWrap;
     Action* _actionPremul;
     Action* _actionSigned;
     Action* _actionSrgb;
     Action* _actionPerf;
-    
+
     Action* _actionDiff;
     Action* _actionDebug;
     Action* _actionGrid;
     Action* _actionChecker;
     Action* _actionHideUI;
     Action* _actionVertical;
-    
+
     Action* _actionMip;
     Action* _actionFace;
     Action* _actionArray;
@@ -573,17 +571,17 @@ struct Data {
     Action* _actionPrevCounterpart;
     Action* _actionReload;
     Action* _actionFit;
-    
+
     Action* _actionShapeMesh;
     Action* _actionShapeChannel;
     Action* _actionLighting;
     Action* _actionTangent;
-    
+
     Action* _actionR;
     Action* _actionG;
     Action* _actionB;
     Action* _actionA;
-    
+
     vector<Action> _actions;
 };
 
@@ -593,4 +591,4 @@ bool isSupportedJsonFilename(const char* filename);
 
 //extern bool doPrintPanZoom;
 
-}  // namespace kram
+} // namespace kram
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index 9731897e..e6ab81ca 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -18,7 +18,7 @@
 
 // C++
 #include "KramLib.h"
-#include "KramVersion.h"  // keep kramv version in sync with libkram
+#include "KramVersion.h" // keep kramv version in sync with libkram
 #include "TaskSystem.h"
 
 //#include "KramMipper.h"
@@ -29,11 +29,10 @@
 //#include "KramZipHelper.h"
 
 //#include "KramImage.h"
-#include "KramViewerBase.h"
-
-
 #include <mutex> // for recursive_mutex
 
+#include "KramViewerBase.h"
+
 using mymutex = std::recursive_mutex;
 using mylock = std::unique_lock<mymutex>;
 
@@ -43,115 +42,112 @@
 using namespace kram;
 using namespace STL_NAMESPACE;
 
-
 // ktx, ktx2, png, and dds for images
 // zip, metallib
 // gltf, glb files for models
 NSArray<NSString*>* utis = @[
-  @"public.directory",
-    
-  [UTType typeWithFilenameExtension: @"png"].identifier,
-  [UTType typeWithFilenameExtension: @"ktx"].identifier,
-  [UTType typeWithFilenameExtension: @"ktx2"].identifier,
-  [UTType typeWithFilenameExtension: @"dds"].identifier,
-  
-  [UTType typeWithFilenameExtension: @"zip"].identifier,
-  [UTType typeWithFilenameExtension: @"metallib"].identifier,
-  
+    @"public.directory",
+
+    [UTType typeWithFilenameExtension:@"png"].identifier,
+    [UTType typeWithFilenameExtension:@"ktx"].identifier,
+    [UTType typeWithFilenameExtension:@"ktx2"].identifier,
+    [UTType typeWithFilenameExtension:@"dds"].identifier,
+
+    [UTType typeWithFilenameExtension:@"zip"].identifier,
+    [UTType typeWithFilenameExtension:@"metallib"].identifier,
+
 #if USE_GLTF
-  [UTType typeWithFilenameExtension: @"gltf"].identifier,
-  [UTType typeWithFilenameExtension: @"glb"].identifier,
-  //@"model/gltf+json",
-  //@"model/gltf+binary"
+    [UTType typeWithFilenameExtension:@"gltf"].identifier,
+    [UTType typeWithFilenameExtension:@"glb"].identifier,
+//@"model/gltf+json",
+//@"model/gltf+binary"
 #endif
 #if USE_USD
-  [UTType typeWithFilenameExtension: @"usd"].identifier,
-  [UTType typeWithFilenameExtension: @"usd"].identifier,
-  [UTType typeWithFilenameExtension: @"usda"].identifier,
+    [UTType typeWithFilenameExtension:@"usd"].identifier,
+    [UTType typeWithFilenameExtension:@"usd"].identifier,
+    [UTType typeWithFilenameExtension:@"usda"].identifier,
 #endif
-  
-  // read -atlas.json files
-  [UTType typeWithFilenameExtension: @"json"].identifier
+
+    // read -atlas.json files
+    [UTType typeWithFilenameExtension:@"json"].identifier
 ];
 NSDictionary* pasteboardOptions = @{
     // This means only these uti can be droped.
-    NSPasteboardURLReadingContentsConformToTypesKey: utis
-    
+    NSPasteboardURLReadingContentsConformToTypesKey : utis
+
     // Don't use this it prevents folder urls
     //, NSPasteboardURLReadingFileURLsOnlyKey: @YES
 };
 
-
-struct MouseData
-{
+struct MouseData {
     NSPoint originPoint;
     NSPoint oldPoint;
     NSPoint newPoint;
-    
+
     NSPoint pan;
 };
 
 //-------------
 
-
-void Action::setHighlight(bool enable) {
+void Action::setHighlight(bool enable)
+{
     isHighlighted = enable;
-    
+
     auto On = 1; // NSControlStateValueOn;
     auto Off = 0; // NSControlStateValueOff;
-    
+
     if (!isButtonDisabled) {
         ((__bridge NSButton*)button).state = enable ? On : Off;
     }
     ((__bridge NSMenuItem*)menuItem).state = enable ? On : Off;
 }
 
-void Action::setHidden(bool enable) {
+void Action::setHidden(bool enable)
+{
     isHidden = enable;
-    
+
     if (!isButtonDisabled) {
         ((__bridge NSButton*)button).hidden = enable;
     }
     ((__bridge NSMenuItem*)menuItem).hidden = enable;
 }
 
-void Action::disableButton() {
+void Action::disableButton()
+{
     ((__bridge NSButton*)button).hidden = true;
     isButtonDisabled = true;
 }
 
-
 // These are using NSFileManager to list files, so must be ObjC
 void Data::listArchivesInFolder(const string& folderFilename, vector<File>& archiveFiles, bool skipSubdirs)
 {
     NSURL* url = [NSURL fileURLWithPath:[NSString stringWithUTF8String:folderFilename.c_str()]];
-        
+
     NSDirectoryEnumerationOptions options = NSDirectoryEnumerationSkipsHiddenFiles;
     if (skipSubdirs)
         options |= NSDirectoryEnumerationSkipsSubdirectoryDescendants;
-    
+
     NSDirectoryEnumerator* directoryEnumerator =
-    [[NSFileManager defaultManager]
-     enumeratorAtURL:url
-     includingPropertiesForKeys:[NSArray array]
-     options:options
-     errorHandler:  // nil
-     ^BOOL(NSURL *urlArg, NSError *error) {
-        macroUnusedVar(urlArg);
-        macroUnusedVar(error);
-        
-        // handle error
-        return false;
-    }];
-    
+        [[NSFileManager defaultManager]
+                       enumeratorAtURL:url
+            includingPropertiesForKeys:[NSArray array]
+                               options:options
+                          errorHandler: // nil
+                              ^BOOL(NSURL* urlArg, NSError* error) {
+                                  macroUnusedVar(urlArg);
+                                  macroUnusedVar(error);
+
+                                  // handle error
+                                  return false;
+                              }];
+
     // only display models in folder if found, ignore the png/jpg files
     while (NSURL* fileOrDirectoryURL = [directoryEnumerator nextObject]) {
         const char* name = fileOrDirectoryURL.fileSystemRepresentation;
-        
+
         bool isArchive = isSupportedArchiveFilename(name);
-        if (isArchive)
-        {
-            archiveFiles.emplace_back(File(name,0));
+        if (isArchive) {
+            archiveFiles.emplace_back(File(name, 0));
         }
     }
 }
@@ -160,30 +156,30 @@
 {
     // Hope this hsas same permissions
     NSURL* url = [NSURL fileURLWithPath:[NSString stringWithUTF8String:archiveFilename.c_str()]];
-    
+
     NSDirectoryEnumerationOptions options = NSDirectoryEnumerationSkipsHiddenFiles;
     if (skipSubdirs)
         options |= NSDirectoryEnumerationSkipsSubdirectoryDescendants;
-    
+
     NSDirectoryEnumerator* directoryEnumerator =
-    [[NSFileManager defaultManager]
-     enumeratorAtURL:url
-     includingPropertiesForKeys:[NSArray array]
-     options:options
-     errorHandler:  // nil
-     ^BOOL(NSURL *urlArg, NSError *error) {
-        macroUnusedVar(urlArg);
-        macroUnusedVar(error);
-        
-        // handle error - don't change to folder if devoid of valid content
-        return false;
-    }];
-    
+        [[NSFileManager defaultManager]
+                       enumeratorAtURL:url
+            includingPropertiesForKeys:[NSArray array]
+                               options:options
+                          errorHandler: // nil
+                              ^BOOL(NSURL* urlArg, NSError* error) {
+                                  macroUnusedVar(urlArg);
+                                  macroUnusedVar(error);
+
+                                  // handle error - don't change to folder if devoid of valid content
+                                  return false;
+                              }];
+
     while (NSURL* fileOrDirectoryURL = [directoryEnumerator nextObject]) {
         const char* name = fileOrDirectoryURL.fileSystemRepresentation;
-        
+
         bool isValid = isSupportedFilename(name);
-        
+
 #if USE_GLTF || USE_USD
         // note: many gltf reference jpg which will load via GltfAsset, but
         // kram and kramv do not import jpg files.
@@ -191,12 +187,12 @@
             isValid = isSupportedModelFilename(name);
         }
 #endif
-        
+
         if (!isValid) {
             isValid = isSupportedJsonFilename(name);
         }
         if (isValid) {
-            _files.emplace_back(File(name,urlIndex));
+            _files.emplace_back(File(name, urlIndex));
         }
     }
 }
@@ -210,9 +206,7 @@ @interface MyNSTextField : NSTextField
 
 @end
 
-@implementation MyNSTextField
-{
-    
+@implementation MyNSTextField {
 }
 
 // override to allow clickthrough
@@ -228,7 +222,7 @@ - (NSView*)hitTest:(NSPoint)aPoint
 @interface MyMTKView : MTKView
 
 @property(retain, nonatomic, readwrite, nullable)
-    NSMagnificationGestureRecognizer *zoomGesture;
+    NSMagnificationGestureRecognizer* zoomGesture;
 
 @property(nonatomic, readwrite) double lastArchiveTimestamp;
 
@@ -240,12 +234,11 @@ @interface MyMTKView : MTKView
 @property(nonatomic, readwrite) float validMagnification;
 @property(nonatomic, readwrite) MouseData mouseData;
 
-
 - (BOOL)loadTextureFromURLs:(NSArray<NSURL*>*)url;
 
-- (void)setHudText:(const char *)text;
+- (void)setHudText:(const char*)text;
 
-- (void)tableViewSelectionDidChange:(NSNotification *)notification;
+- (void)tableViewSelectionDidChange:(NSNotification*)notification;
 
 - (void)addNotifications;
 
@@ -259,27 +252,28 @@ - (void)fixupDocumentList;
 
 // https://medium.com/@kevingutowski/how-to-setup-a-tableview-in-2019-obj-c-c7dece203333
 @interface TableViewController : NSObject <NSTableViewDataSource, NSTableViewDelegate>
-@property (nonatomic, strong) NSMutableArray<NSAttributedString*>* items;
+@property(nonatomic, strong) NSMutableArray<NSAttributedString*>* items;
 @end
 
 @implementation TableViewController
 
-- (instancetype)init {
+- (instancetype)init
+{
     self = [super init];
-    
+
     _items = [[NSMutableArray alloc] init];
-    
+
     return self;
 }
 
 // NSTableViewDataSource
-- (NSInteger)numberOfRowsInTableView:(NSTableView *)tableView
+- (NSInteger)numberOfRowsInTableView:(NSTableView*)tableView
 {
     return self.items.count;
 }
 
 // NSTableViewDelegate
--(NSView *)tableView:(NSTableView *)tableView viewForTableColumn:(NSTableColumn *)tableColumn row:(NSInteger)row
+- (NSView*)tableView:(NSTableView*)tableView viewForTableColumn:(NSTableColumn*)tableColumn row:(NSInteger)row
 {
     NSString* identifier = tableColumn.identifier;
     NSTableCellView* cell = [tableView makeViewWithIdentifier:identifier owner:self];
@@ -288,18 +282,18 @@ -(NSView *)tableView:(NSTableView *)tableView viewForTableColumn:(NSTableColumn
 }
 
 // NSTableViewDelegate
-- (BOOL)tableView:(NSTableView *)tableView
-shouldTypeSelectForEvent:(NSEvent *)event
-withCurrentSearchString:(NSString *)searchString
+- (BOOL)tableView:(NSTableView*)tableView
+    shouldTypeSelectForEvent:(NSEvent*)event
+     withCurrentSearchString:(NSString*)searchString
 {
     // Return NO to prevent type select (otherwise S or N key will search that key)
     // This is nice on long lists though.
     return NO;
 }
 
-- (void)tableViewSelectionDidChange:(NSNotification *)notification
+- (void)tableViewSelectionDidChange:(NSNotification*)notification
 {
-   // does not need to respond, have a listener on this notification
+    // does not need to respond, have a listener on this notification
 }
 @end
 
@@ -326,7 +320,7 @@ - (instancetype)init
 
 + (BOOL)autosavesInPlace
 {
-    return NO;  // YES;
+    return NO; // YES;
 }
 
 // call when "new" called
@@ -339,8 +333,8 @@ - (void)makeWindowControllers
     //addWindowController:controller];
 }
 
-- (NSData *)dataOfType:(nonnull NSString *)typeName
-                 error:(NSError *_Nullable __autoreleasing *)outError
+- (NSData*)dataOfType:(nonnull NSString*)typeName
+                error:(NSError* _Nullable __autoreleasing*)outError
 {
     // Insert code here to write your document to data of the specified type. If
     // outError != NULL, ensure that you create and set an appropriate error if
@@ -352,18 +346,18 @@ - (NSData *)dataOfType:(nonnull NSString *)typeName
     return nil;
 }
 
-- (BOOL)readFromURL:(nonnull NSURL *)url
-             ofType:(nonnull NSString *)typeName
-              error:(NSError *_Nullable __autoreleasing *)outError
+- (BOOL)readFromURL:(nonnull NSURL*)url
+             ofType:(nonnull NSString*)typeName
+              error:(NSError* _Nullable __autoreleasing*)outError
 {
     // called from OpenRecent documents menu
-    
+
     // throw into an array
-    NSArray<NSURL*>* urls = @[url];
-    
+    NSArray<NSURL*>* urls = @[ url ];
+
     NSApplication* app = [NSApplication sharedApplication];
     MyMTKView* view = app.mainWindow.contentView;
-    
+
     BOOL success = [view loadTextureFromURLs:urls];
     if (success) {
         // Note: if I return NO from this call then a dialog pops up that image
@@ -393,24 +387,24 @@ @interface AppDelegate ()
 
 @implementation AppDelegate
 
-- (void)applicationDidFinishLaunching:(NSNotification *)aNotification
+- (void)applicationDidFinishLaunching:(NSNotification*)aNotification
 {
     // Insert code here to initialize your application
 }
 
-- (void)applicationWillTerminate:(NSNotification *)aNotification
+- (void)applicationWillTerminate:(NSNotification*)aNotification
 {
     // Insert code here to tear down your application
 }
 
 - (BOOL)applicationShouldTerminateAfterLastWindowClosed:
-    (NSApplication *)sender
+    (NSApplication*)sender
 {
     return YES;
 }
 
-- (void)application:(NSApplication *)sender
-           openURLs:(nonnull NSArray<NSURL *> *)urls
+- (void)application:(NSApplication*)sender
+           openURLs:(nonnull NSArray<NSURL*>*)urls
 {
     // this is called from "Open In..."
     MyMTKView* view = sender.mainWindow.contentView;
@@ -422,7 +416,7 @@ - (void)application:(NSApplication *)sender
 - (void)exportDocument:(NSString*)name toType:(NSString*)typeUTI
 {
    NSWindow* window = [[[self windowControllers] objectAtIndex:0] window];
- 
+
    // Build a new name for the file using the current name and
    // the filename extension associated with the specified UTI.
    CFStringRef newExtension = UTTypeCopyPreferredTagWithClass((CFStringRef)typeUTI,
@@ -430,7 +424,7 @@ - (void)exportDocument:(NSString*)name toType:(NSString*)typeUTI
    NSString* newName = [[name stringByDeletingPathExtension]
                        stringByAppendingPathExtension:(NSString*)newExtension];
    CFRelease(newExtension);
- 
+
    // Set the default name for the file and show the panel.
    NSSavePanel*    panel = [NSSavePanel savePanel];
    [panel setNameFieldStringValue:newName];
@@ -438,9 +432,9 @@ - (void)exportDocument:(NSString*)name toType:(NSString*)typeUTI
         if (result == NSFileHandlingPanelOKButton)
         {
             NSURL*  theFile = [panel URL];
- 
+
             // Write the contents in the new format.
-            
+
         }
     }];
 }
@@ -452,7 +446,7 @@ - (IBAction)openDocument:(id)sender
 {
     // need to implement, or default NSOpenPanel can't specify a directory
     NSDocumentController* controller = [NSDocumentController sharedDocumentController];
- 
+
 #if 0
     // Would be nice, but doesn't allow directory.
     // How is NSDocument aware of directory, from Info.plist?
@@ -462,35 +456,33 @@ - (IBAction)openDocument:(id)sender
 //        
 //    }
 #else
-    
+
     NSOpenPanel* panel = [NSOpenPanel openPanel];
     [panel setCanChooseFiles:YES];
     [panel setCanChooseDirectories:YES];
     [panel setAllowsMultipleSelection:NO];
 
-    if ([controller runModalOpenPanel:panel forTypes:utis] == NSModalResponseOK)
-    {
+    if ([controller runModalOpenPanel:panel forTypes:utis] == NSModalResponseOK) {
         NSArray<NSURL*>* urls = [panel URLs];
         NSURL* url = [urls objectAtIndex:0];
-        
+
         // This gets a file:// urls, and then openDocument won't open it if
         // it's a folder.
-        
+
         bool isDirectory = false;
         if (url.isFileURL) {
             BOOL isDir = NO;
             // Verify that the file exists
             // and is indeed a directory (isDirectory is an out parameter)
-            if ([[NSFileManager defaultManager] fileExistsAtPath: url.path isDirectory: &isDir]
-                && isDir) {
+            if ([[NSFileManager defaultManager] fileExistsAtPath:url.path isDirectory:&isDir] && isDir) {
                 isDirectory = true;
             }
         }
-        
+
         if (isDirectory) {
             // have to open this directory URL directly
             //[self openURLs:[NSApplication sharedApplication] urls:urls];
-            
+
             // this is called from "Open In..."
             NSApplication* app = [NSApplication sharedApplication];
             MyMTKView* view = app.mainWindow.contentView;
@@ -501,12 +493,11 @@ - (IBAction)openDocument:(id)sender
             [controller openDocumentWithContentsOfURL:url
                                               display:YES
                                     completionHandler:
-                 ^(NSDocument* doc, BOOL isAlreadOpen, NSError* error ) {
-                if (!error) {
-                    // what should this do?
-                }
-            }
-            ];
+                                        ^(NSDocument* doc, BOOL isAlreadOpen, NSError* error) {
+                                            if (!error) {
+                                                // what should this do?
+                                            }
+                                        }];
         }
     }
 #endif
@@ -567,16 +558,16 @@ - (IBAction)showAboutDialog:(id)sender
 
 @end
 
-NSArray<NSString *>* pasteboardTypes = @[
+NSArray<NSString*>* pasteboardTypes = @[
     // don't really want generic urls, but need folders to drop
     //NSPasteboardTypeURL
-    
+
     // this is preventing folder drops ?
     NSPasteboardTypeFileURL
 ];
 
 /* correlates with
- 
+
 public.directory.
 public.png,
 org.khronos.ktx,
@@ -586,33 +577,26 @@ - (IBAction)showAboutDialog:(id)sender
 dyn.ah62d4rv4ge8043pyqf0g24pc, // ick - metallib
 dyn.ah62d4rv4ge80s5dyq2,       // ick - gltf
 dyn.ah62d4rv4ge80s5dc          // ick - glb
- 
-*/
-
-
-
-
-
 
+*/
 
 //----------------------------------------------------
 
-
 @implementation MyMTKView {
-    NSMenu* _viewMenu;  // really the items
+    NSMenu* _viewMenu; // really the items
     NSStackView* _buttonStack;
-    NSMutableArray<NSButton *>* _buttonArray;
+    NSMutableArray<NSButton*>* _buttonArray;
     NSTextField* _hudLabel;
     NSTextField* _hudLabel2;
-    
+
     // Offer list of files in archives
     // TODO: move to NSOutlineView since that can show archive folders with content inside
     IBOutlet NSTableView* _tableView;
     IBOutlet TableViewController* _tableViewController;
-    
+
     // copy of modifier flags, can tie drop actions to this
     NSEventModifierFlags _modifierFlags;
-    
+
     ShowSettings* _showSettings;
     Data _data;
 }
@@ -620,23 +604,23 @@ @implementation MyMTKView {
 - (void)awakeFromNib
 {
     [super awakeFromNib];
-    
+
     // vertical offset of table down so hud can display info
     NSScrollView* scrollView = [_tableView enclosingScrollView];
     CGRect rect = scrollView.frame;
     rect.origin.y += 50;
     scrollView.frame = rect;
-    
+
     // C++ delegate
     _data._delegate.view = (__bridge void*)self;
-    
+
     // this is sandbox or root if not sandboxed
     // This is objC call...
     // This has to be in a .mm file to call
     std::string traceDir = [NSHomeDirectory() UTF8String];
     traceDir += "/traces/";
     _data.setPerfDirectory(traceDir.c_str());
-    
+
     // TODO: see if can only open this
     // KLOGI("Viewer", "AwakeFromNIB");
 }
@@ -658,7 +642,7 @@ - (BOOL)isFlipped
 // TODO: Sometimes getting panels from right side popping in when trying to pan
 // on macOS without using pan gesture.
 
-- (instancetype)initWithCoder:(NSCoder *)coder
+- (instancetype)initWithCoder:(NSCoder*)coder
 {
     self = [super initWithCoder:coder];
 
@@ -677,7 +661,7 @@ - (instancetype)initWithCoder:(NSCoder *)coder
 
     // added for drag-drop support
     [self registerForDraggedTypes:pasteboardTypes];
-    
+
     // This gesture only works for trackpad
     _zoomGesture = [[NSMagnificationGestureRecognizer alloc]
         initWithTarget:self
@@ -693,32 +677,32 @@ - (instancetype)initWithCoder:(NSCoder *)coder
     // hide until image loaded
     _showSettings->isHideUI = true;
     _buttonStack.hidden = YES;
-    
+
     _hudLabel2 = [self _addHud:YES];
     _hudLabel = [self _addHud:NO];
     [self setHudText:""];
-    
+
     return self;
 }
 
-- (nonnull ShowSettings *)showSettings
+- (nonnull ShowSettings*)showSettings
 {
     return _showSettings;
 }
 
-- (nonnull kram::Data *)data
+- (nonnull kram::Data*)data
 {
     return &_data;
 }
 
--(void)fixupDocumentList
+- (void)fixupDocumentList
 {
     // DONE: this recent menu only seems to work the first time
     // and not in subsequent calls to the same entry.  readFromUrl isn't even
     // called. So don't get a chance to switch back to a recent texture. Maybe
     // there's some list of documents created and so it doesn't think the file
     // needs to be reloaded.
-   
+
     // Clear the document list so readFromURL keeps getting called
     // Can't remove currentDoc, so have to skip that
     NSDocumentController* dc = [NSDocumentController sharedDocumentController];
@@ -734,17 +718,15 @@ -(void)fixupDocumentList
     }
 }
 
-
-
-- (NSStackView *)_addButtons
+- (NSStackView*)_addButtons
 {
     _data.initActions();
-    
+
     NSRect rect = NSMakeRect(0, 10, 30, 30);
 
     vector<Action>& actions = _data.actions();
     int32_t numActions = actions.size();
-    
+
     NSMutableArray* buttons = [[NSMutableArray alloc] init];
 
     for (int32_t i = 0; i < numActions; ++i) {
@@ -769,40 +751,40 @@ - (NSStackView *)_addButtons
         [button setFrame:rect];
 
         // https://stackoverflow.com/questions/4467597/how-do-you-stroke-the-outside-of-an-nsattributedstring
-        
+
         NSMutableDictionary* attribsOff = [NSMutableDictionary dictionaryWithObjectsAndKeys:
-            //[NSFont systemFontOfSize:64.0],NSFontAttributeName,
-            [NSColor whiteColor],NSForegroundColorAttributeName,
-            [NSNumber numberWithFloat:-2.0],NSStrokeWidthAttributeName,
-            [NSColor blackColor],NSStrokeColorAttributeName,
-            nil];
+                                                                   //[NSFont systemFontOfSize:64.0],NSFontAttributeName,
+                                                                   [NSColor whiteColor], NSForegroundColorAttributeName,
+                                                                   [NSNumber numberWithFloat:-2.0], NSStrokeWidthAttributeName,
+                                                                   [NSColor blackColor], NSStrokeColorAttributeName,
+                                                                   nil];
         NSMutableDictionary* attribsOn = [NSMutableDictionary dictionaryWithObjectsAndKeys:
-            //[NSFont systemFontOfSize:64.0],NSFontAttributeName,
-            [NSColor systemBlueColor],NSForegroundColorAttributeName,
-            [NSNumber numberWithFloat:-2.0],NSStrokeWidthAttributeName,
-            [NSColor blackColor],NSStrokeColorAttributeName,
-            nil];
+                                                                  //[NSFont systemFontOfSize:64.0],NSFontAttributeName,
+                                                                  [NSColor systemBlueColor], NSForegroundColorAttributeName,
+                                                                  [NSNumber numberWithFloat:-2.0], NSStrokeWidthAttributeName,
+                                                                  [NSColor blackColor], NSStrokeColorAttributeName,
+                                                                  nil];
         button.attributedTitle = [[NSMutableAttributedString alloc] initWithString:name attributes:attribsOff];
-        
+
         // Have to set this too, or button doesn't go blue
         button.attributedAlternateTitle = [[NSMutableAttributedString alloc] initWithString:name attributes:attribsOn];
-        
+
         // stackView seems to disperse the items evenly across the area, so this
         // doesn't work
         bool isSeparator = icon[0] == 0;
-        
+
         if (isSeparator) {
             // rect.origin.y += 11;
             button.enabled = NO;
         }
         else {
             action.button = (__bridge void*)button;
-            
+
             // rect.origin.y += 25;
 
             // TODO: add icons
             //button.image = ...;
-            
+
             // keep all buttons, since stackView will remove and pack the stack
             [_buttonArray addObject:button];
         }
@@ -813,7 +795,7 @@ - (NSStackView *)_addButtons
     NSStackView* stackView = [NSStackView stackViewWithViews:buttons];
     stackView.orientation = NSUserInterfaceLayoutOrientationVertical;
     stackView.detachesHiddenViews =
-        YES;  // default, but why have to have _buttonArrary
+        YES; // default, but why have to have _buttonArrary
     [self addSubview:stackView];
 
     // Want menus, so user can define their own shortcuts to commands
@@ -830,41 +812,41 @@ - (NSStackView *)_addButtons
 
     for (int32_t i = 0; i < numActions; ++i) {
         Action& action = actions[i];
-        const char* icon = action.icon;  // single char
+        const char* icon = action.icon; // single char
         const char* title = action.tip;
 
         NSString* toolTip = [NSString stringWithUTF8String:icon];
         NSString* name = [NSString stringWithUTF8String:title];
         bool isSeparator = icon[0] == 0;
-        
+
         if (isSeparator) {
             [_viewMenu addItem:[NSMenuItem separatorItem]];
         }
         else {
             // NSString *shortcut = @"";  // for now, or AppKit turns key int cmd+shift+key
             NSString* shortcut = [NSString stringWithUTF8String:icon];
-            
+
             NSMenuItem* menuItem =
                 [[NSMenuItem alloc] initWithTitle:name
                                            action:@selector(handleAction:)
                                     keyEquivalent:shortcut];
             menuItem.toolTip = toolTip;
-            
+
             // All key-equivalents assume cmd, so unset cmd
             // still leaves shift next to keys, but better than nothing
             menuItem.keyEquivalentModifierMask = (NSEventModifierFlags)0;
-            
+
             // TODO: add icons, also onStateImage, offStageImage, mixedStateImage
             //menuItem.image = ...;
-             
+
             // can set an integer constant that represents menu that avoid testing string (actionID)
             //menuItem.tag = ...;
-            
+
             // TODO: menus and buttons should reflect any toggle state
             // menuItem.state = Mixed/Off/On;
 
             [_viewMenu addItem:menuItem];
-            
+
             action.menuItem = (__bridge void*)menuItem;
         }
     }
@@ -872,27 +854,25 @@ - (NSStackView *)_addButtons
     [_viewMenu addItem:[NSMenuItem separatorItem]];
 
     //----------------------
-    
+
     // don't want some buttons showing up, menu only
     _data.initDisabledButtons();
-    
+
     return stackView;
 }
 
-
-
-- (NSTextField *)_addHud:(BOOL)isShadow
+- (NSTextField*)_addHud:(BOOL)isShadow
 {
     // TODO: This text field is clamping to the height, so have it set to 1200.
     // really want field to expand to fill the window height for large output
     uint32_t w = 800;
     uint32_t h = 1220;
-    
+
     // add a label for the hud
     NSTextField* label = [[MyNSTextField alloc]
         initWithFrame:NSMakeRect(isShadow ? 21 : 20, isShadow ? 21 : 20, w,
                                  h)];
-    
+
     label.preferredMaxLayoutWidth = w;
 
     label.drawsBackground = NO;
@@ -903,12 +883,12 @@ - (NSTextField *)_addHud:(BOOL)isShadow
     label.editable = NO;
     label.selectable = NO;
     label.lineBreakMode = NSLineBreakByClipping;
-    label.maximumNumberOfLines = 0;  // fill to height
+    label.maximumNumberOfLines = 0; // fill to height
 
     // important or interferes with table view
     label.refusesFirstResponder = YES;
     label.enabled = NO;
-    
+
     label.cell.scrollable = NO;
     label.cell.wraps = NO;
 
@@ -923,14 +903,13 @@ - (NSTextField *)_addHud:(BOOL)isShadow
     return label;
 }
 
-
-- (void)handleGesture:(NSGestureRecognizer *)gestureRecognizer
+- (void)handleGesture:(NSGestureRecognizer*)gestureRecognizer
 {
     // skip until image loaded
     if (_showSettings->imageBoundsX == 0) {
         return;
     }
-    
+
     // https://cocoaosxrevisited.wordpress.com/2018/01/06/chapter-18-mouse-events/
     if (gestureRecognizer != _zoomGesture) {
         return;
@@ -941,10 +920,10 @@ - (void)handleGesture:(NSGestureRecognizer *)gestureRecognizer
     float zoom = _zoomGesture.magnification;
     if (isFirstGesture) {
         _zoomGesture.magnification = 1.0f;
-        
+
         _validMagnification = 1.0f;
         _originalZoom = _showSettings->zoom;
-        
+
         zoom = _originalZoom;
     }
     else if (zoom * _originalZoom < 0.1f) {
@@ -952,7 +931,7 @@ - (void)handleGesture:(NSGestureRecognizer *)gestureRecognizer
         zoom = 0.1f / _originalZoom;
         _zoomGesture.magnification = zoom;
     }
-    
+
     if (!isFirstGesture) {
         // try expontental (this causes a jump, comparison avoids an initial jump
         // zoom = powf(zoom, 1.05f);
@@ -960,29 +939,28 @@ - (void)handleGesture:(NSGestureRecognizer *)gestureRecognizer
         // doing multiply instead of equals here, also does exponential zom
         zoom *= _originalZoom;
     }
-    
+
     [self updateZoom:zoom];
 }
 
--(void)updateZoom:(float)zoom
+- (void)updateZoom:(float)zoom
 {
     // https://developer.apple.com/documentation/uikit/touches_presses_and_gestures/handling_uikit_gestures/handling_pinch_gestures?language=objc
     // need to sync up the zoom when action begins or zoom will jump
-    
 
     // https://stackoverflow.com/questions/30002361/image-zoom-centered-on-mouse-position
 
     // DONE: rect is now ar:1 for rect case, so these x values need to be half ar
     // and that's only if it's not rotated.  box/cube/ellipse make also not correspond
     float ar = _showSettings->imageAspectRatio();
-    
+
     // find the cursor location with respect to the image
     float4 bottomLeftCorner = float4m(-0.5f * ar, -0.5f, 0.0f, 1.0f);
     float4 topRightCorner = float4m(0.5f * ar, 0.5f, 0.0f, 1.0f);
 
     float4x4 newMatrix = _data.computeImageTransform(_showSettings->panX,
-                                                    _showSettings->panY,
-                                                    zoom);
+                                                     _showSettings->panY,
+                                                     zoom);
 
     // don't allow panning the entire image off the view boundary
     // transform the upper left and bottom right corner of the image
@@ -1023,23 +1001,23 @@ -(void)updateZoom:(float)zoom
     //float minZoom = std::min(1.0f/8.0f, _showSettings->zoomFit);
 
     // TODO: 3d models have imageBoundsY of 1, so the limits are hit immediately
-    
+
     int32_t gap = _showSettings->showAllPixelGap;
-    
+
     // Note this includes chunks and mips even if those are not shown
     // so image could be not visible.
     float2 maxZoomXY;
     maxZoomXY.x = maxZoom * (_showSettings->imageBoundsX + gap) * numTexturesX;
     maxZoomXY.y = maxZoom * (_showSettings->imageBoundsY + gap) * numTexturesY;
-    
+
     float minPixelSize = 4;
     float2 minZoomXY;
     minZoomXY.x = minPixelSize; // minZoom * (_showSettings->imageBoundsX + gap) * numTexturesX;
     minZoomXY.y = minPixelSize; // minZoom * (_showSettings->imageBoundsY + gap) * numTexturesY;
-   
+
     // don't allow image to get too big
     bool isZoomChanged = true;
-    
+
     if (visibleWidth > maxZoomXY.x || visibleHeight > maxZoomXY.y) {
         isZoomChanged = false;
     }
@@ -1053,7 +1031,7 @@ -(void)updateZoom:(float)zoom
     if (!rectIntersectsRect(imageRect, viewRect)) {
         isZoomChanged = false;
     }
-    
+
     if (!isZoomChanged) {
         _zoomGesture.magnification = _validMagnification; // objC
         return;
@@ -1074,47 +1052,46 @@ -(void)updateZoom:(float)zoom
         _showSettings->panX = newPan.x;
         _showSettings->panY = newPan.y;
 
-//        if (doPrintPanZoom) {
-//            string text;
-//            sprintf(text,
-//                    "Pan %.3f,%.3f\n"
-//                    "Zoom %.2fx\n",
-//                    _showSettings->panX, _showSettings->panY, _showSettings->zoom);
-//            [self setHudText:text.c_str()];
-//        }
+        // if (doPrintPanZoom) {
+        //     string text;
+        //     sprintf(text,
+        //             "Pan %.3f,%.3f\n"
+        //             "Zoom %.2fx\n",
+        //             _showSettings->panX, _showSettings->panY, _showSettings->zoom);
+        //     [self setHudText:text.c_str()];
+        // }
 
         // Cause a new sample for eyedropper
         _data.updateEyedropper();
-        
+
         self.needsDisplay = YES; // objC
     }
 }
 
-
 // left mouse button down
-- (void)mouseDown:(NSEvent *)event
+- (void)mouseDown:(NSEvent*)event
 {
     // skip until image loaded
     if (_showSettings->imageBoundsX == 0) {
         return;
     }
-    
+
     _mouseData.originPoint =
-    _mouseData.oldPoint =
-    _mouseData.newPoint = [self convertPoint:[event locationInWindow] fromView:nil];
+        _mouseData.oldPoint =
+            _mouseData.newPoint = [self convertPoint:[event locationInWindow] fromView:nil];
 
     // capture pan value and cursor value
     _mouseData.pan = NSMakePoint(_showSettings->panX, _showSettings->panY);
 }
 
 // drag is mouse movement with left button down
-- (void)mouseDragged:(NSEvent *)event
+- (void)mouseDragged:(NSEvent*)event
 {
     // skip until image loaded
     if (_showSettings->imageBoundsX == 0) {
         return;
     }
-    
+
     _mouseData.oldPoint = _mouseData.newPoint;
     _mouseData.newPoint = [self convertPoint:[event locationInWindow] fromView:nil];
 
@@ -1124,38 +1101,37 @@ - (void)mouseDragged:(NSEvent *)event
     delta.y = _mouseData.newPoint.y - _mouseData.originPoint.y;
     delta.x = -delta.x;
     delta.y = -delta.y;
-    
+
     // scale to actual px or mouse cursor doesn't track drag
     delta.x *= _showSettings->viewContentScaleFactor;
     delta.y *= _showSettings->viewContentScaleFactor;
-    
+
     // This is correct, but scale to image so cursor tracks the pick location
     // might be over a different mip/chunk though.
     float panX = _mouseData.pan.x + delta.x;
     float panY = _mouseData.pan.y + delta.y;
-    
+
     [self updatePan:panX panY:panY];
 }
 
-- (void)mouseUp:(NSEvent *)event
+- (void)mouseUp:(NSEvent*)event
 {
     // ignore up even though cursor may have moved
-
 }
 
-- (void)mouseMoved:(NSEvent *)event
+- (void)mouseMoved:(NSEvent*)event
 {
     // skip until image loaded
     if (_showSettings->imageBoundsX == 0) {
         return;
     }
-    
+
     // pixel in non-square window coords, run through inverse to get texel space
     // I think magnofication of zoom gesture is affecting coordinates reported by
     // this
 
     NSPoint point = [event locationInWindow];
-    
+
     // This flips so upper left corner is 0,0, vs. bottom left
     point = [self convertPoint:point fromView:nil];
 
@@ -1165,35 +1141,32 @@ - (void)mouseMoved:(NSEvent *)event
     _showSettings->cursorY = (int32_t)point.y;
 
     _data.updateEyedropper();
-    
+
     // Cause a new sample for eyedropper (will run in Metal CompletedHandler)
     self.needsDisplay = YES;
 }
 
-
-
-
--(void)updateEyedropperText
+- (void)updateEyedropperText
 {
     if (_showSettings->imageBoundsX == 0) return;
-    
+
     float2 uv;
     uv.x = _showSettings->textureLookupX / (float)_showSettings->imageBoundsX;
     uv.y = _showSettings->textureLookupY / (float)_showSettings->imageBoundsY;
-    
+
     // convert data to text
     _data.showEyedropperData(uv);
-    
+
     const Atlas* atlas = _data.findAtlasAtUV(uv);
     if (atlas) {
         // convert back to pixels in the current mip
         float mipBoundsX = std::max(1, _showSettings->imageBoundsX >> _showSettings->mipNumber);
         float mipBoundsY = std::max(1, _showSettings->imageBoundsY >> _showSettings->mipNumber);
-        
+
         float4 rect = atlas->rect();
         rect.xz *= mipBoundsX;
         rect.yw *= mipBoundsY;
-        
+
         string atlasText;
         sprintf(atlasText, "%d,%d %dx%d %s",
                 (int32_t)rect.x, (int32_t)rect.y,
@@ -1208,13 +1181,13 @@ -(void)updateEyedropperText
     [self updateHudText];
 }
 
-- (void)setEyedropperText:(const char *)text
+- (void)setEyedropperText:(const char*)text
 {
     _data.setEyedropperText(text);
     [self updateHudText];
 }
 
-- (void)setHudText:(const char *)text
+- (void)setHudText:(const char*)text
 {
     _data.setTextSlot(kTextSlotHud, text);
     [self updateHudText];
@@ -1225,29 +1198,29 @@ - (void)updateHudText
     // combine textSlots
     string text = _data.textFromSlots(_tableView.hidden);
 
-    NSString *textNS = [NSString stringWithUTF8String:text.c_str()];
-    
+    NSString* textNS = [NSString stringWithUTF8String:text.c_str()];
+
     // This is drop shadowed by drawing same text twice
     _hudLabel2.stringValue = textNS;
     _hudLabel2.needsDisplay = YES;
-    
+
     _hudLabel.stringValue = textNS;
     _hudLabel.needsDisplay = YES;
 }
 
-- (void)scrollWheel:(NSEvent *)event
+- (void)scrollWheel:(NSEvent*)event
 {
     // skip until image loaded
     if (_showSettings->imageBoundsX == 0) {
         return;
     }
-    
+
     // From ImGui notes:
     // From macOS 12.1, scrolling with two fingers and then decelerating
     // by tapping two fingers results in two events appearing.
     if (event.phase == NSEventPhaseCancelled)
         return;
-    
+
     double wheelX = [event scrollingDeltaX];
     double wheelY = [event scrollingDeltaY];
 
@@ -1256,20 +1229,20 @@ - (void)scrollWheel:(NSEvent *)event
     //   and trackpad fires on that too causing the image to zoom away to nothing (inertia maybe)
     // https://stackoverflow.com/questions/6642058/mac-cocoa-how-can-i-detect-trackpad-scroll-gestures
     bool isMouse = ![event hasPreciseScrollingDeltas];
-    
+
     if (isMouse) {
         // zoom with mouse
         float zoom = _zoomGesture.magnification;
         if (wheelY != 0.0) {
             wheelY *= 0.01;
             wheelY = std::clamp(wheelY, -0.1, 0.1);
-            
+
             zoom *= 1.0 + wheelY;
-            
+
             // here have to modify the magnfication, since gesture isn't driving it
             _zoomGesture.magnification = zoom;
-            
-            [self updateZoom: zoom];
+
+            [self updateZoom:zoom];
         }
     }
     else {
@@ -1279,7 +1252,7 @@ - (void)scrollWheel:(NSEvent *)event
 
         float panX = _showSettings->panX + wheelX;
         float panY = _showSettings->panY + wheelY;
-        
+
         [self updatePan:panX panY:(float)panY];
     }
 }
@@ -1289,12 +1262,12 @@ bool rectIntersectsRect(float4 lhs, float4 rhs)
     // convert rect from (origin, size) to (min, max)
     float4 lRect = lhs.xyxy;
     lRect.zw += lhs.zw;
-    
+
     float4 rRect = rhs.xyxy;
     rRect.zw += rhs.zw;
-    
+
     return all(lRect.xy <= rRect.zw) && // min <= max
-           all(lRect.zw >= rRect.xy);   // max >= min
+           all(lRect.zw >= rRect.xy); // max >= min
 }
 
 // TODO: move to data, but eliminate CGRect usage
@@ -1303,15 +1276,15 @@ - (void)updatePan:(float)panX panY:(float)panY
     //Renderer* renderer = (Renderer *)self.delegate;
     float4x4 projectionViewModelMatrix =
         _data.computeImageTransform(panX,
-                                   panY,
-                                   _showSettings->zoom);
+                                    panY,
+                                    _showSettings->zoom);
 
     // don't allow panning the entire image off the view boundary
     // transform the upper left and bottom right corner or the image
 
     // what if zoom moves it outside?
     float ar = _showSettings->imageAspectRatio();
-    
+
     float4 pt0 = projectionViewModelMatrix * float4m(-0.5f * ar, -0.5f, 0.0f, 1.0f);
     float4 pt1 = projectionViewModelMatrix * float4m(0.5f * ar, 0.5f, 0.0f, 1.0f);
 
@@ -1344,14 +1317,14 @@ - (void)updatePan:(float)panX panY:(float)panY
         _showSettings->panX = panX;
         _showSettings->panY = panY;
 
-//        if (doPrintPanZoom) {
-//            string text;
-//            sprintf(text,
-//                    "Pan %.3f,%.3f\n"
-//                    "Zoom %.2fx\n",
-//                    _showSettings->panX, _showSettings->panY, _showSettings->zoom);
-//            [self setHudText:text.c_str()];
-//        }
+        // if (doPrintPanZoom) {
+        //     string text;
+        //     sprintf(text,
+        //             "Pan %.3f,%.3f\n"
+        //             "Zoom %.2fx\n",
+        //             _showSettings->panX, _showSettings->panY, _showSettings->zoom);
+        //     [self setHudText:text.c_str()];
+        // }
 
         // Cause a new sample from Metal to eyeDropper
         _data.updateEyedropper();
@@ -1372,9 +1345,6 @@ - (BOOL)validateUserInterfaceItem:(id<NSValidatedUserInterfaceItem>)item
     return YES;
 }
 
-
-
-
 - (IBAction)handleAction:(id)sender
 {
     NSEvent* theEvent = [NSApp currentEvent];
@@ -1388,21 +1358,21 @@ - (IBAction)handleAction:(id)sender
     else if ([sender isKindOfClass:[NSMenuItem class]]) {
         action = _data.actionFromMenu(senderPtr);
     }
-    
+
     if (!action) {
         KLOGE("kram", "unknown UI element");
         return;
     }
-    
+
     [self handleEventAction:action isShiftKeyDown:isShiftKeyDown];
 }
 
-- (void)flagsChanged:(NSEvent *)theEvent
+- (void)flagsChanged:(NSEvent*)theEvent
 {
     _modifierFlags = theEvent.modifierFlags;
 }
 
-- (void)keyDown:(NSEvent *)theEvent
+- (void)keyDown:(NSEvent*)theEvent
 {
     bool isShiftKeyDown = theEvent.modifierFlags & NSEventModifierFlagShift;
     uint32_t keyCode = theEvent.keyCode;
@@ -1410,19 +1380,19 @@ - (void)keyDown:(NSEvent *)theEvent
     // for now hit esc to hide the table views
     if (keyCode == Key::Escape) {
         [self hideFileTable];
-        
+
         _hudHidden = false;
         [self updateHudVisibility];
         return;
     }
-    
+
     const Action* action = _data.actionFromKey(keyCode);
     if (!action) {
         [super keyDown:theEvent];
         //KLOGE("kram", "unknown UI element");
         return;
     }
-    
+
     bool isHandled = [self handleEventAction:action isShiftKeyDown:isShiftKeyDown];
     if (!isHandled) {
         // this will bonk
@@ -1452,20 +1422,19 @@ - (void)updateHudVisibility
     _hudLabel2.hidden = _hudHidden || !_showSettings->isHudShown;
 }
 
-
 - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyDown
 {
     Renderer* renderer = (Renderer*)self.delegate;
-   
+
     ActionState actionState;
     if (!_data.handleEventAction(action, isShiftKeyDown, actionState))
         return false;
-        
+
     // Do the leftover action work to call ObjC
     if (action == _data._actionVertical) {
         _buttonStack.orientation = _showSettings->isVerticalUI
-            ? NSUserInterfaceLayoutOrientationVertical
-            : NSUserInterfaceLayoutOrientationHorizontal;
+                                       ? NSUserInterfaceLayoutOrientationVertical
+                                       : NSUserInterfaceLayoutOrientationHorizontal;
     }
     else if (action == _data._actionHideUI) {
         _buttonStack.hidden = _showSettings->isHideUI;
@@ -1475,7 +1444,6 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
     }
     else if (action == _data._actionInfo) {
         if (_showSettings->isHudShown) {
-            
             // also hide the file table, since this can be long
             [self hideFileTable];
         }
@@ -1489,7 +1457,7 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
         // tell the renderer to show one or other view
         renderer.isToggleView = !_showSettings->isSRGBShown;
     }
-    
+
     //-------------
     // Update everything
     if (!actionState.hudText.empty()) {
@@ -1506,10 +1474,6 @@ - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyD
     return true;
 }
 
-
-
-
-
 // Note: docs state that drag&drop should be handled automatically by UTI setup
 // via openURLs but I find these calls are needed, or it doesn't work.  Maybe
 // need to register for NSURL instead of NSPasteboardTypeFileURL.  For example,
@@ -1521,7 +1485,7 @@ - (NSDragOperation)draggingEntered:(id)sender
     if (([sender draggingSourceOperationMask] & NSDragOperationGeneric) ==
         NSDragOperationGeneric) {
         NSPasteboard* pasteboard = [sender draggingPasteboard];
-        
+
         bool canReadPasteboardObjects =
             [pasteboard canReadObjectForClasses:@[ [NSURL class] ]
                                         options:pasteboardOptions];
@@ -1544,9 +1508,9 @@ - (BOOL)prepareForDragOperation:(id)sender
 - (BOOL)performDragOperation:(id)sender
 {
     NSPasteboard* pasteboard = [sender draggingPasteboard];
-    
-    NSArray<NSURL*>* urls = [pasteboard readObjectsForClasses:@[[NSURL class]]
-                                                      options: pasteboardOptions];
+
+    NSArray<NSURL*>* urls = [pasteboard readObjectsForClasses:@[ [NSURL class] ]
+                                                      options:pasteboardOptions];
     int filesCount = [urls count];
     if (filesCount > 0) {
         if ([self loadTextureFromURLs:urls]) {
@@ -1558,10 +1522,6 @@ - (BOOL)performDragOperation:(id)sender
     return NO;
 }
 
-
-
-
-
 - (void)updateFileSelection
 {
     // set selection
@@ -1570,18 +1530,20 @@ - (void)updateFileSelection
     [_tableView scrollRowToVisible:fileIndex];
 }
 
-- (BOOL)setImageFromSelection:(NSInteger)index {
+- (BOOL)setImageFromSelection:(NSInteger)index
+{
     if (!_data._files.empty()) {
         if (_data._fileIndex != index) {
             _data._fileIndex = index;
             return [self loadFile];
         }
     }
-    
+
     return NO;
 }
 
-- (BOOL)setShapeFromSelection:(NSInteger)index {
+- (BOOL)setShapeFromSelection:(NSInteger)index
+{
     if (_showSettings->meshNumber != index) {
         _showSettings->meshNumber = index;
         self.needsDisplay = YES;
@@ -1590,137 +1552,130 @@ - (BOOL)setShapeFromSelection:(NSInteger)index {
     return NO;
 }
 
-
-
-
--(BOOL)loadFile
+- (BOOL)loadFile
 {
     if (_data._files.empty())
         return NO;
-    
+
     // lookup the filename and data at that entry
     const File& file = _data._files[_data._fileIndex];
     const char* filename = file.nameShort.c_str();
-    
-    setErrorLogCapture( true );
-    
+
+    setErrorLogCapture(true);
+
     bool success = _data.loadFile();
-    
+
     // Update these settings
-    
+
     if (!success) {
         string errorText;
         getErrorLogCaptureText(errorText);
         setErrorLogCapture(false);
-        
+
         string finalErrorText;
         // this does have previous filename set
         _data.setFailedText(file.name.c_str(), finalErrorText);
         finalErrorText += errorText;
-        
+
         [self setHudText:finalErrorText.c_str()];
         return NO;
     }
-    setErrorLogCapture( false );
-    
+    setErrorLogCapture(false);
+
     //-------
     Renderer* renderer = (Renderer*)self.delegate;
-    
+
     _showSettings->isSRGBShown = false;
     if (success && renderer.hasToggleView) {
         _showSettings->isSRGBShown = isSrgbFormat(_showSettings->originalFormat);
     }
-    
+
     renderer.playAnimations = _showSettings->isPlayAnimations;
     renderer.isToggleView = !_showSettings->isSRGBShown;
-    
+
     // -------------
     string title = _showSettings->windowTitleString(filename);
     self.window.title = [NSString stringWithUTF8String:title.c_str()];
-    
+
     // doesn't set imageURL or update the recent document menu
-    
+
     // show the controls
     if (_data._noImageLoaded) {
         _showSettings->isHideUI = false;
-        _buttonStack.hidden = NO;  // show controls
+        _buttonStack.hidden = NO; // show controls
         _data._noImageLoaded = false;
     }
-    
+
     // show/hide button
     _data.updateUIAfterLoad();
-    
+
     self.needsDisplay = YES;
     return YES;
 }
 
-
-
--(void)loadFilesFromUrls:(NSArray<NSURL*>*)urls skipSubdirs:(BOOL)skipSubdirs
+- (void)loadFilesFromUrls:(NSArray<NSURL*>*)urls skipSubdirs:(BOOL)skipSubdirs
 {
     // convert urls to vector<string> for C++
     vector<string> urlStrings;
     for (NSURL* url in urls) {
         urlStrings.push_back(url.fileSystemRepresentation);
     }
-    
+
     // C++ to build list
     _data.loadFilesFromUrls(urlStrings, skipSubdirs);
-    
+
     //-------------------
-    
+
     NSMutableDictionary* attribsOff = [NSMutableDictionary dictionaryWithObjectsAndKeys:
-        //[NSFont systemFontOfSize:64.0],NSFontAttributeName,
-        [NSColor whiteColor],NSForegroundColorAttributeName,
-        [NSNumber numberWithFloat:-2.0],NSStrokeWidthAttributeName,
-        [NSColor blackColor],NSStrokeColorAttributeName,
-        nil];
-    
+                                                               //[NSFont systemFontOfSize:64.0],NSFontAttributeName,
+                                                               [NSColor whiteColor], NSForegroundColorAttributeName,
+                                                               [NSNumber numberWithFloat:-2.0], NSStrokeWidthAttributeName,
+                                                               [NSColor blackColor], NSStrokeColorAttributeName,
+                                                               nil];
+
     // add the files into the file list
     [_tableViewController.items removeAllObjects];
-    for (const auto& file: _data._files) {
+    for (const auto& file : _data._files) {
         const char* filenameShort = file.nameShort.c_str();
-        
-        NSString* fileMenuText = [NSString stringWithUTF8String: filenameShort];
+
+        NSString* fileMenuText = [NSString stringWithUTF8String:filenameShort];
         NSMutableAttributedString* fileMenuStr = [[NSMutableAttributedString alloc] initWithString:fileMenuText attributes:attribsOff];
-        
+
         [_tableViewController.items addObject:fileMenuStr];
     }
-    
+
     // reloadData calls selectionDidChange which then sets _fileIndex = 0;
     uint32_t fileIndex = _data._fileIndex;
     [_tableView reloadData];
     _data._fileIndex = fileIndex;
-    
+
     [self updateFileSelection];
     [self hideFileTable];
-    
+
     // add it to recent docs (only 10 slots)
     if (urls.count == 1) {
         NSDocumentController* dc =
-        [NSDocumentController sharedDocumentController];
+            [NSDocumentController sharedDocumentController];
         [dc noteNewRecentDocumentURL:urls[0]];
     }
 }
 
-
 - (BOOL)loadTextureFromURLs:(NSArray<NSURL*>*)urls
 {
     // turn back on the hud if was in a list view
     _hudHidden = false;
     [self updateHudVisibility];
-    
+
     const char* filename = "";
     NSURL* url = urls[0];
-    if ([url.scheme isEqualToString:@"kram"])
-    {
+    if ([url.scheme isEqualToString:@"kram"]) {
         // the resource specifier has port and other data
         // for now treat this as a local file path.
-        
+
         // kram://filename.ktx
         filename = [url.resourceSpecifier UTF8String];
         filename = filename + 2; // skip the //
-        
+
         // can't get Slack to honor links like these
         // with a kram:///Users/...
         // or with kram://~/blah
@@ -1729,44 +1684,43 @@ - (BOOL)loadTextureFromURLs:(NSArray<NSURL*>*)urls
         // also need this same treatment instead
         // of relying on url.fileSystemRepresentation
     }
-    else
-    {
+    else {
         filename = url.fileSystemRepresentation;
     }
     bool isSingleFile = urls.count == 1;
-    
-    Renderer* renderer = (Renderer *)self.delegate;
-    
+
+    Renderer* renderer = (Renderer*)self.delegate;
+
     // Handle shader hotload
     if (isSingleFile && endsWithExtension(filename, ".metallib")) {
         if ([renderer hotloadShaders:filename]) {
             NSURL* metallibFileURL =
-            [NSURL fileURLWithPath:[NSString stringWithUTF8String:filename]];
-            
+                [NSURL fileURLWithPath:[NSString stringWithUTF8String:filename]];
+
             // add to recent docs, so can reload quickly
             NSDocumentController* dc =
-            [NSDocumentController sharedDocumentController];
+                [NSDocumentController sharedDocumentController];
             [dc noteNewRecentDocumentURL:metallibFileURL];
-            
+
             return YES;
         }
         return NO;
     }
-    
+
     // don't leave archive table open
     if (isSingleFile)
         [self hideFileTable];
 
     // only recurse down subdirs if cmd key held during drop or recent menu item selection
-    bool skipSubdirs = ( _modifierFlags & NSEventModifierFlagCommand ) == 0;
-    
+    bool skipSubdirs = (_modifierFlags & NSEventModifierFlagCommand) == 0;
+
     [self loadFilesFromUrls:urls skipSubdirs:skipSubdirs];
-    
+
     BOOL success = [self loadFile];
     return success;
 }
-   
--(BOOL)loadModelFile:(const char*)filename
+
+- (BOOL)loadModelFile:(const char*)filename
 {
 #if USE_GLTF
     // Right now can only load these if they are embedded, since sandbox will
@@ -1774,32 +1728,32 @@ -(BOOL)loadModelFile:(const char*)filename
     // related items, but they must all be named the same.  I think if folder
     // instead of the file is selected, then could search and find the gltf files
     // and the other files.
-    
+
     //----------------------
     // These assets should be combined into a single hierarchy, and be able to
     // save out a scene with all of them in a single scene.  But that should
     // probably reference original content in case it's updated.
-    
+
     // const char* filenameShort = toFilenameShort(filename);
     //double timestamp = FileHelper::modificationTimestamp(filename);
-    
+
     // TODO: this used to compare filename timestamp?
-    
+
     // This code only takes url, so construct one
-    Renderer* renderer = (Renderer *)self.delegate;
+    Renderer* renderer = (Renderer*)self.delegate;
     [renderer releaseAllPendingTextures];
     BOOL success = [renderer loadModel:filename];
-    
+
     // TODO: split this off to a completion handler, since loadModel is async
     // and should probably also have a cancellation (or counter)
-    
+
     // show/hide button
     _data.updateUIAfterLoad();
-    
+
     if (!success) {
         return NO;
     }
-    
+
     return success;
 #else
     return NO;
@@ -1816,10 +1770,9 @@ - (void)concludeDragOperation:(id)sender
     // did setNeedsDisplay, but already doing that in loadTextureFromURL
 }
 
-- (void)tableViewSelectionDidChange:(NSNotification *)notification
+- (void)tableViewSelectionDidChange:(NSNotification*)notification
 {
-    if (notification.object == _tableView)
-    {
+    if (notification.object == _tableView) {
         // image
         NSInteger selectedRow = [_tableView selectedRow];
         [self setImageFromSelection:selectedRow];
@@ -1830,8 +1783,9 @@ - (void)addNotifications
 {
     // listen for the selection change messages
     [[NSNotificationCenter defaultCenter] addObserver:self
-                                              selector:@selector(tableViewSelectionDidChange:)
-                                                  name:NSTableViewSelectionDidChangeNotification object:nil];
+                                             selector:@selector(tableViewSelectionDidChange:)
+                                                 name:NSTableViewSelectionDidChangeNotification
+                                               object:nil];
 }
 
 - (void)removeNotifications
@@ -1840,7 +1794,6 @@ - (void)removeNotifications
     [[NSNotificationCenter defaultCenter] removeObserver:self];
 }
 
-
 - (BOOL)acceptsFirstResponder
 {
     return YES;
@@ -1872,7 +1825,7 @@ - (void)viewDidLoad
 {
     [super viewDidLoad];
 
-    _view = (MyMTKView *)self.view;
+    _view = (MyMTKView*)self.view;
 
     // have to disable this since reading back from textures
     // that slows the blit to the screen
@@ -1886,9 +1839,8 @@ - (void)viewDidLoad
 
     _renderer = [[Renderer alloc] initWithMetalKitView:_view
                                               settings:_view.showSettings
-                                              data:_view.data];
+                                                  data:_view.data];
 
-    
     // https://developer.apple.com/library/archive/documentation/Cocoa/Conceptual/EventOverview/TrackingAreaObjects/TrackingAreaObjects.html
     // this is better than requesting mousemoved events, they're only sent when
     // cursor is inside
@@ -1905,7 +1857,7 @@ - (void)viewDidLoad
     [_view addTrackingArea:_trackingArea];
 
     [_view addNotifications];
-    
+
     [_view setupUI];
 
     // original sample code was sending down _view.bounds.size, but need
@@ -1914,30 +1866,28 @@ - (void)viewDidLoad
 
     // ObjC++ delegate
     _view.delegate = _renderer;
-    
+
     [_renderer setEyedropperDelegate:_view];
 }
 
-
-
 @end
 
 bool DataDelegate::loadFile(bool clear)
 {
     MyMTKView* view_ = (__bridge MyMTKView*)view;
-    
+
     if (clear) {
         // set selection
         [view_ updateFileSelection];
-        
+
         // want it to respond to arrow keys
         //[self.window makeFirstResponder: _tableView];
-        
+
         // show the files table
         [view_ showFileTable];
         [view_ setEyedropperText:""];
     }
-    
+
     return [view_ loadFile];
 }
 
@@ -1950,9 +1900,9 @@ - (void)viewDidLoad
 bool DataDelegate::loadTextureFromImage(const char* fullFilename, double timestamp, KTXImage& image, KTXImage* imageNormal, KTXImage* imageDiff, bool isArchive)
 {
     MyMTKView* view_ = (__bridge MyMTKView*)view;
-    Renderer* renderer = (Renderer *)view_.delegate;
+    Renderer* renderer = (Renderer*)view_.delegate;
     [renderer releaseAllPendingTextures];
-    
+
     if (![renderer loadTextureFromImage:fullFilename
                               timestamp:timestamp
                                   image:image
@@ -1961,18 +1911,17 @@ - (void)viewDidLoad
                               isArchive:isArchive]) {
         return false;
     }
-    
+
     return true;
 }
 
-
 //-------------
 
-int main(int argc, const char *argv[])
+int main(int argc, const char* argv[])
 {
-    ThreadInfo infoMain = { "Main", ThreadPriority::Interactive, 0 };
+    ThreadInfo infoMain = {"Main", ThreadPriority::Interactive, 0};
     setThreadInfo(infoMain);
-    
+
     @autoreleasepool {
         // Setup code that might create autoreleased objects goes here.
     }
diff --git a/scripts/formatSources.sh b/scripts/formatSources.sh
index feb00d90..7c01f0cd 100755
--- a/scripts/formatSources.sh
+++ b/scripts/formatSources.sh
@@ -33,12 +33,12 @@ clang-format -style=file -i kram/*.(cpp|h)
 clang-format -style=file -i vectormath/*.(cpp|h)
 popd
 
-# pushd ..
-# clang-format -style=file -i kramv/*.*
-# clang-format -style=file -i kramc/*.*
-# clang-format -style=file -i kram-thumb/*.(cpp|h)
-# clang-format -style=file -i kram-thumb-win/*.*
-# clang-format -style=file -i kram-profile/*.*
-# clang-format -style=file -i kram-preview/*.*
-# clang-format -style=file -i hlslparser/*.*
-# popd
\ No newline at end of file
+pushd ..
+clang-format -style=file -i kramv/*.(cpp|h||mm)
+clang-format -style=file -i kramc/*.(cpp|h)
+clang-format -style=file -i kram-thumb/*.(cpp|h|mm)
+clang-format -style=file -i kram-thumb-win/*.(cpp|h)
+clang-format -style=file -i kram-profile/Source/*.(cpp|h|mm)
+clang-format -style=file -i kram-preview/*.(cpp|h|mm)
+clang-format -style=file -i hlslparser/src/*.(cpp|h)
+popd
\ No newline at end of file

From 3c3808f07416c5b85f654fbaaf524c1a6542aff6 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 12 Oct 2024 18:42:19 -0700
Subject: [PATCH 800/901] kram - try new pre-commit hook

---
 scripts/pre-commit | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/scripts/pre-commit b/scripts/pre-commit
index 0021e712..60762597 100755
--- a/scripts/pre-commit
+++ b/scripts/pre-commit
@@ -1,20 +1,19 @@
 #!/bin/bash
 
-# derived from here
-# https://github.com/andrewseidl/githook-clang-format/blob/master/clang-format.hook
-# Uses the _clang-format file for rules
-# Uses the _clang-format_ignore file to skip dirs
+# Check if there are any staged changes
+if ! git diff --cached --name-only | grep -q '\.cpp\|\.c\|\.h\|\.hpp'; then
+    exit 0
+fi
 
-format_file() {
-  file="${1}"
+# Run clang-format on staged changes
+git clang-format --style=file
 
-  if [ -f $file ]; then
-    clang-format -style=file -i ${file}
-    git add ${file}
-  fi
-}
-
-for file in `git diff-index --cached --name-only HEAD | grep -iE '\*.(cpp|h|m|mm)$' ` ; do
-  format_file "${file}"
-done
+# Check if clang-format made any changes
+if ! git diff --cached --name-only | grep -q '\.cpp\|\.c\|\.h\|\.hpp'; then
+    echo "No changes made by clang-format."
+    exit 0
+fi
 
+# Add the formatted changes
+echo "Code formatted with clang-format."
+git add .

From 63f779bcc20b7e11f3135d5f06d5ed9bfb387bab Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 12 Oct 2024 18:43:02 -0700
Subject: [PATCH 801/901] kram - test new pre-commit hook


From 63a102105e1b9bc272b8ea9e06ba1e8bab81b18d Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 12 Oct 2024 19:02:20 -0700
Subject: [PATCH 802/901] kram - fix win error from clang-format moving
 windows.h later

---
 libkram/kram/TaskSystem.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index 51e4f9b7..7a8c68e7 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -11,8 +11,11 @@
 #include <pthread/qos.h>
 #include <sys/sysctl.h>
 #elif KRAM_WIN
-#include <processthreadsapi.h>
+// annoying thata windows.h has to be ordered first
+// clang-format off
 #include <windows.h>
+// clang-format off
+#include <processthreadsapi.h>
 #elif KRAM_ANDROID
 #include <sys/resource.h>
 #else

From dc67d38aa1df2b6e8d9ff50662261e755c6ef660 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 13 Oct 2024 09:00:30 -0700
Subject: [PATCH 803/901] kram - update gitattributes

---
 .gitattributes | 32 +++++++++++++++++++++++++++++---
 1 file changed, 29 insertions(+), 3 deletions(-)

diff --git a/.gitattributes b/.gitattributes
index ffd4fb1a..1fae4fea 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,8 +1,34 @@
-# Make all line endings match macOS/linux, even on Windows.  This allows bash to run.
-* text eol=lf
-
+#-------------
 # commit various binary file types to git-lfs
+# see here https://rehansaeed.com/gitattributes-best-practices/
+# -text means it's not a text file and is binary
+
+# Archives
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+
+*.ico filter=lfs diff=lfs merge=lfs -text
+*.jpg filter=lfs diff=lfs merge=lfs -text
 *.png filter=lfs diff=lfs merge=lfs -text
 *.ktx filter=lfs diff=lfs merge=lfs -text
 *.ktx2 filter=lfs diff=lfs merge=lfs -text
 *.dds filter=lfs diff=lfs merge=lfs -text
+*.psd filter=lfs diff=lfs merge=lfs -text
+
+# Documents
+*.pdf filter=lfs diff=lfs merge=lfs -text
+
+# Models
+*.obj filter=lfs diff=lfs merge=lfs
+*.gltf filter=lfs diff=lfs merge=lfs
+*.glb filter=lfs diff=lfs merge=lfs -text
+*.fbx filter=lfs diff=lfs merge=lfs -text
+*.usda filter=lfs diff=lfs merge=lfs -text
+*.usdc filter=lfs diff=lfs merge=lfs -text
+*.usdz filter=lfs diff=lfs merge=lfs -text
+*.rkassets filter=lfs diff=lfs merge=lfs -text
+
+# Other
+*.exe filter=lfs diff=lfs merge=lfs -text

From db208e0c8a78792ccfe0b9cdd9878b4c9361fd31 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 13 Oct 2024 23:58:12 -0700
Subject: [PATCH 804/901] kram - add KRAM_VISION, and add lib variants

---
 build2/kram.xcodeproj/project.pbxproj       | 472 +++++++++++++++++++-
 build2/vectormath.xcodeproj/project.pbxproj | 110 ++++-
 libkram/fmt/format.h                        |   2 +
 libkram/kram/KTXImage.cpp                   |   2 +-
 libkram/kram/Kram.cpp                       |   2 +-
 libkram/kram/KramConfig.h                   |  11 +-
 libkram/kram/KramFileHelper.cpp             |   4 +-
 libkram/kram/KramFmt.h                      |   4 +
 libkram/kram/KramLog.cpp                    |  21 +-
 libkram/kram/KramMmapHelper.cpp             |   2 +-
 libkram/kram/KramTimer.cpp                  |   4 +-
 libkram/kram/KramZipHelper.cpp              |   2 +-
 libkram/kram/TaskSystem.cpp                 |  12 +-
 libkram/vectormath/vectormath234.cpp        |   2 +-
 14 files changed, 623 insertions(+), 27 deletions(-)

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index f1e4b2d4..d86559be 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -398,6 +398,202 @@
 		70D222F62ADAF78300B9EA23 /* dlmalloc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70D222F42ADAF78300B9EA23 /* dlmalloc.cpp */; };
 		70D222F82ADAFA1500B9EA23 /* dlmalloc.h in Headers */ = {isa = PBXBuildFile; fileRef = 70D222F72ADAFA1500B9EA23 /* dlmalloc.h */; };
 		70D222F92ADAFA1500B9EA23 /* dlmalloc.h in Headers */ = {isa = PBXBuildFile; fileRef = 70D222F72ADAFA1500B9EA23 /* dlmalloc.h */; };
+		70E3BB242CBCE88300F11926 /* EtcErrorMetric.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDAB26D1583E001C950E /* EtcErrorMetric.h */; };
+		70E3BB252CBCE88300F11926 /* EtcColor.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDAD26D1583E001C950E /* EtcColor.h */; };
+		70E3BB262CBCE88300F11926 /* chrono.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2428D7BCAD0081BD1F /* chrono.h */; };
+		70E3BB272CBCE88300F11926 /* EtcDifferentialTrys.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDAE26D1583E001C950E /* EtcDifferentialTrys.h */; };
+		70E3BB282CBCE88300F11926 /* EtcBlock4x4Encoding_RGB8.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDB026D1583E001C950E /* EtcBlock4x4Encoding_RGB8.h */; };
+		70E3BB292CBCE88300F11926 /* EtcConfig.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDB426D1583E001C950E /* EtcConfig.h */; };
+		70E3BB2A2CBCE88300F11926 /* astcenc_vecmathlib_common_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DA727DDDBCC00D0B9E1 /* astcenc_vecmathlib_common_4.h */; };
+		70E3BB2B2CBCE88300F11926 /* EtcBlock4x4Encoding_R11.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDB526D1583E001C950E /* EtcBlock4x4Encoding_R11.h */; };
+		70E3BB2C2CBCE88300F11926 /* EtcBlock4x4Encoding_RG11.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDB726D1583E001C950E /* EtcBlock4x4Encoding_RG11.h */; };
+		70E3BB2D2CBCE88300F11926 /* EtcMath.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDB926D1583E001C950E /* EtcMath.h */; };
+		70E3BB2E2CBCE88300F11926 /* EtcIndividualTrys.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDBA26D1583E001C950E /* EtcIndividualTrys.h */; };
+		70E3BB2F2CBCE88300F11926 /* EtcBlock4x4EncodingBits.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDBD26D1583E001C950E /* EtcBlock4x4EncodingBits.h */; };
+		70E3BB302CBCE88300F11926 /* EtcBlock4x4Encoding_RGB8A1.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDBE26D1583E001C950E /* EtcBlock4x4Encoding_RGB8A1.h */; };
+		70E3BB312CBCE88300F11926 /* EtcBlock4x4.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC026D1583E001C950E /* EtcBlock4x4.h */; };
+		70E3BB322CBCE88300F11926 /* rgbcx.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789CF2881BA81008A51BC /* rgbcx.h */; };
+		70E3BB332CBCE88300F11926 /* Etc.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC126D1583E001C950E /* Etc.h */; };
+		70E3BB342CBCE88300F11926 /* bc7enc.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789C72881BA81008A51BC /* bc7enc.h */; };
+		70E3BB352CBCE88300F11926 /* EtcImage.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC226D1583E001C950E /* EtcImage.h */; };
+		70E3BB362CBCE88300F11926 /* std.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2C28D7BCAD0081BD1F /* std.h */; };
+		70E3BB372CBCE88300F11926 /* KramDDSHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 70CDB64E27A1382600A546C1 /* KramDDSHelper.h */; };
+		70E3BB382CBCE88300F11926 /* args.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2728D7BCAD0081BD1F /* args.h */; };
+		70E3BB392CBCE88300F11926 /* bc6h_encode.h in Headers */ = {isa = PBXBuildFile; fileRef = 708A6A8E2708CE4700BA5410 /* bc6h_encode.h */; };
+		70E3BB3A2CBCE88300F11926 /* EtcBlock4x4Encoding_ETC1.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC326D1583E001C950E /* EtcBlock4x4Encoding_ETC1.h */; };
+		70E3BB3B2CBCE88300F11926 /* EtcBlock4x4Encoding_RGBA8.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC426D1583E001C950E /* EtcBlock4x4Encoding_RGBA8.h */; };
+		70E3BB3C2CBCE88300F11926 /* EtcColorFloatRGBA.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC726D1583E001C950E /* EtcColorFloatRGBA.h */; };
+		70E3BB3D2CBCE88300F11926 /* EtcBlock4x4Encoding.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC826D1583E001C950E /* EtcBlock4x4Encoding.h */; };
+		70E3BB3E2CBCE88300F11926 /* ateencoder.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFA26D1583E001C950E /* ateencoder.h */; };
+		70E3BB3F2CBCE88300F11926 /* basisu_transcoder.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFC26D1583E001C950E /* basisu_transcoder.h */; };
+		70E3BB402CBCE88300F11926 /* hdr_encode.h in Headers */ = {isa = PBXBuildFile; fileRef = 70A7BD2F27092A1200DBCCF7 /* hdr_encode.h */; };
+		70E3BB412CBCE88300F11926 /* compile.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2928D7BCAD0081BD1F /* compile.h */; };
+		70E3BB422CBCE88300F11926 /* bc6h_definitions.h in Headers */ = {isa = PBXBuildFile; fileRef = 708A6A902708CE4700BA5410 /* bc6h_definitions.h */; };
+		70E3BB432CBCE88300F11926 /* basisu_containers.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFD26D1583E001C950E /* basisu_containers.h */; };
+		70E3BB442CBCE88300F11926 /* astcenc.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DAD27DDDBCC00D0B9E1 /* astcenc.h */; };
+		70E3BB452CBCE88300F11926 /* printf.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2828D7BCAD0081BD1F /* printf.h */; };
+		70E3BB462CBCE88300F11926 /* basisu_containers_impl.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFF26D1583E001C950E /* basisu_containers_impl.h */; };
+		70E3BB472CBCE88300F11926 /* utils.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789D22881BA81008A51BC /* utils.h */; };
+		70E3BB482CBCE88300F11926 /* basisu_transcoder_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE0226D1583F001C950E /* basisu_transcoder_internal.h */; };
+		70E3BB492CBCE88300F11926 /* astcenc_vecmathlib_avx2_8.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DBF27DDDBCC00D0B9E1 /* astcenc_vecmathlib_avx2_8.h */; };
+		70E3BB4A2CBCE88300F11926 /* astcenc_vecmathlib_none_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DC027DDDBCC00D0B9E1 /* astcenc_vecmathlib_none_4.h */; };
+		70E3BB4B2CBCE88300F11926 /* basisu_global_selector_cb.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE0326D1583F001C950E /* basisu_global_selector_cb.h */; };
+		70E3BB4C2CBCE88300F11926 /* basisu_transcoder_uastc.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE0526D1583F001C950E /* basisu_transcoder_uastc.h */; };
+		70E3BB4D2CBCE88300F11926 /* basisu_global_selector_palette.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE0626D1583F001C950E /* basisu_global_selector_palette.h */; };
+		70E3BB4E2CBCE88300F11926 /* rgbcx_table4_small.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789CD2881BA81008A51BC /* rgbcx_table4_small.h */; };
+		70E3BB4F2CBCE88300F11926 /* basisu.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE0C26D1583F001C950E /* basisu.h */; };
+		70E3BB502CBCE88300F11926 /* basisu_file_headers.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE0E26D1583F001C950E /* basisu_file_headers.h */; };
+		70E3BB512CBCE88300F11926 /* miniz.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE1226D1583F001C950E /* miniz.h */; };
+		70E3BB522CBCE88300F11926 /* hedistance.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE1526D1583F001C950E /* hedistance.h */; };
+		70E3BB532CBCE88300F11926 /* stb_rect_pack.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE1726D1583F001C950E /* stb_rect_pack.h */; };
+		70E3BB542CBCE88300F11926 /* KramZipHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE1926D1583F001C950E /* KramZipHelper.h */; };
+		70E3BB552CBCE88300F11926 /* KramSDFMipper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2026D1583F001C950E /* KramSDFMipper.h */; };
+		70E3BB562CBCE88300F11926 /* BlockedLinearAllocator.h in Headers */ = {isa = PBXBuildFile; fileRef = 70D222E32AD22BED00B9EA23 /* BlockedLinearAllocator.h */; };
+		70E3BB572CBCE88300F11926 /* KramZipStream.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B563A62C857B360089A64F /* KramZipStream.h */; };
+		70E3BB582CBCE88300F11926 /* astcenc_mathlib.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DBB27DDDBCC00D0B9E1 /* astcenc_mathlib.h */; };
+		70E3BB592CBCE88300F11926 /* ranges.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D1E28D7BCAD0081BD1F /* ranges.h */; };
+		70E3BB5A2CBCE88300F11926 /* KramConfig.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2326D1583F001C950E /* KramConfig.h */; };
+		70E3BB5B2CBCE88300F11926 /* KramLog.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2426D1583F001C950E /* KramLog.h */; };
+		70E3BB5C2CBCE88300F11926 /* KramLib.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2726D1583F001C950E /* KramLib.h */; };
+		70E3BB5D2CBCE88300F11926 /* KramVersion.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2926D1583F001C950E /* KramVersion.h */; };
+		70E3BB5E2CBCE88300F11926 /* KramImage.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2A26D1583F001C950E /* KramImage.h */; };
+		70E3BB5F2CBCE88300F11926 /* win_mmap.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2D26D1583F001C950E /* win_mmap.h */; };
+		70E3BB602CBCE88300F11926 /* astcenc_vecmathlib_sse_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DB127DDDBCC00D0B9E1 /* astcenc_vecmathlib_sse_4.h */; };
+		70E3BB612CBCE88300F11926 /* KramFmt.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D4D28D7C15F0081BD1F /* KramFmt.h */; };
+		70E3BB622CBCE88300F11926 /* bc7decomp.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789C82881BA81008A51BC /* bc7decomp.h */; };
+		70E3BB632CBCE88300F11926 /* Kram.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2E26D1583F001C950E /* Kram.h */; };
+		70E3BB642CBCE88300F11926 /* ImmutableString.h in Headers */ = {isa = PBXBuildFile; fileRef = 70D222DD2AD2132300B9EA23 /* ImmutableString.h */; };
+		70E3BB652CBCE88300F11926 /* simdjson.h in Headers */ = {isa = PBXBuildFile; fileRef = 70D222E92ADAF25E00B9EA23 /* simdjson.h */; };
+		70E3BB662CBCE88300F11926 /* astcenc_diagnostic_trace.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DB927DDDBCC00D0B9E1 /* astcenc_diagnostic_trace.h */; };
+		70E3BB672CBCE88300F11926 /* ert.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789C92881BA81008A51BC /* ert.h */; };
+		70E3BB682CBCE88300F11926 /* KTXImage.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3026D1583F001C950E /* KTXImage.h */; };
+		70E3BB692CBCE88300F11926 /* KramImageInfo.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3126D1583F001C950E /* KramImageInfo.h */; };
+		70E3BB6A2CBCE88300F11926 /* rgbcx_table4.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789CB2881BA81008A51BC /* rgbcx_table4.h */; };
+		70E3BB6B2CBCE88300F11926 /* astcenc_vecmathlib_neon_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DBE27DDDBCC00D0B9E1 /* astcenc_vecmathlib_neon_4.h */; };
+		70E3BB6C2CBCE88300F11926 /* KramTimer.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3226D1583F001C950E /* KramTimer.h */; };
+		70E3BB6D2CBCE88300F11926 /* KramMmapHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3326D1583F001C950E /* KramMmapHelper.h */; };
+		70E3BB6E2CBCE88300F11926 /* KramFileHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3626D1583F001C950E /* KramFileHelper.h */; };
+		70E3BB6F2CBCE88300F11926 /* os.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2528D7BCAD0081BD1F /* os.h */; };
+		70E3BB702CBCE88300F11926 /* KramMipper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3726D1583F001C950E /* KramMipper.h */; };
+		70E3BB712CBCE88300F11926 /* TaskSystem.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3826D1583F001C950E /* TaskSystem.h */; };
+		70E3BB722CBCE88300F11926 /* squish.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3A26D1583F001C950E /* squish.h */; };
+		70E3BB732CBCE88300F11926 /* clusterfit.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3B26D1583F001C950E /* clusterfit.h */; };
+		70E3BB742CBCE88300F11926 /* core.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2028D7BCAD0081BD1F /* core.h */; };
+		70E3BB752CBCE88300F11926 /* colourfit.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3C26D1583F001C950E /* colourfit.h */; };
+		70E3BB762CBCE88300F11926 /* astcenc_vecmathlib.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DC127DDDBCC00D0B9E1 /* astcenc_vecmathlib.h */; };
+		70E3BB772CBCE88300F11926 /* alpha.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3F26D1583F001C950E /* alpha.h */; };
+		70E3BB782CBCE88300F11926 /* color.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2628D7BCAD0081BD1F /* color.h */; };
+		70E3BB792CBCE88300F11926 /* bc6h_decode.h in Headers */ = {isa = PBXBuildFile; fileRef = 708A6A8C2708CE4700BA5410 /* bc6h_decode.h */; };
+		70E3BB7A2CBCE88300F11926 /* singlecolourfit.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE4126D1583F001C950E /* singlecolourfit.h */; };
+		70E3BB7B2CBCE88300F11926 /* maths.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE4526D1583F001C950E /* maths.h */; };
+		70E3BB7C2CBCE88300F11926 /* rdo_bc_encoder.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789F02881BCE2008A51BC /* rdo_bc_encoder.h */; };
+		70E3BB7D2CBCE88300F11926 /* json11.h in Headers */ = {isa = PBXBuildFile; fileRef = 70D222D62AC800AC00B9EA23 /* json11.h */; };
+		70E3BB7E2CBCE88300F11926 /* colourset.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE4826D1583F001C950E /* colourset.h */; };
+		70E3BB7F2CBCE88300F11926 /* bc6h_utils.h in Headers */ = {isa = PBXBuildFile; fileRef = 708A6A922708CE4700BA5410 /* bc6h_utils.h */; };
+		70E3BB802CBCE88300F11926 /* colourblock.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE4A26D1583F001C950E /* colourblock.h */; };
+		70E3BB812CBCE88300F11926 /* rangefit.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE4B26D1583F001C950E /* rangefit.h */; };
+		70E3BB822CBCE88300F11926 /* zstd.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE5226D1583F001C950E /* zstd.h */; };
+		70E3BB832CBCE88300F11926 /* astcenc_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DBC27DDDBCC00D0B9E1 /* astcenc_internal.h */; };
+		70E3BB842CBCE88300F11926 /* format-inl.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D1D28D7BCAD0081BD1F /* format-inl.h */; };
+		70E3BB852CBCE88300F11926 /* ostream.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D1C28D7BCAD0081BD1F /* ostream.h */; };
+		70E3BB862CBCE88300F11926 /* lodepng.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE5426D1583F001C950E /* lodepng.h */; };
+		70E3BB872CBCE88300F11926 /* format.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2B28D7BCAD0081BD1F /* format.h */; };
+		70E3BB882CBCE88300F11926 /* dlmalloc.h in Headers */ = {isa = PBXBuildFile; fileRef = 70D222F72ADAFA1500B9EA23 /* dlmalloc.h */; };
+		70E3BB892CBCE88300F11926 /* tmpfileplus.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE5926D1583F001C950E /* tmpfileplus.h */; };
+		70E3BB8A2CBCE88300F11926 /* xchar.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D1F28D7BCAD0081BD1F /* xchar.h */; };
+		70E3BB8C2CBCE88300F11926 /* astcenc_quantization.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DAE27DDDBCC00D0B9E1 /* astcenc_quantization.cpp */; };
+		70E3BB8D2CBCE88300F11926 /* ert.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789CE2881BA81008A51BC /* ert.cpp */; };
+		70E3BB8E2CBCE88300F11926 /* astcenc_color_unquantize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC427DDDBCC00D0B9E1 /* astcenc_color_unquantize.cpp */; };
+		70E3BB8F2CBCE88300F11926 /* astcenc_averages_and_directions.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DAB27DDDBCC00D0B9E1 /* astcenc_averages_and_directions.cpp */; };
+		70E3BB902CBCE88300F11926 /* astcenc_mathlib_softfloat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB227DDDBCC00D0B9E1 /* astcenc_mathlib_softfloat.cpp */; };
+		70E3BB912CBCE88300F11926 /* os.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 709B8D2128D7BCAD0081BD1F /* os.cpp */; };
+		70E3BB922CBCE88300F11926 /* simdjson.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70D222EA2ADAF25E00B9EA23 /* simdjson.cpp */; };
+		70E3BB932CBCE88300F11926 /* ateencoder.mm in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDF926D1583E001C950E /* ateencoder.mm */; };
+		70E3BB942CBCE88300F11926 /* bc7decomp_ref.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789D32881BA81008A51BC /* bc7decomp_ref.cpp */; };
+		70E3BB952CBCE88300F11926 /* EtcBlock4x4Encoding_RGB8.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDAA26D1583E001C950E /* EtcBlock4x4Encoding_RGB8.cpp */; };
+		70E3BB962CBCE88300F11926 /* json11.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70D222D72AC800AC00B9EA23 /* json11.cpp */; };
+		70E3BB972CBCE88300F11926 /* astcenc_find_best_partitioning.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DA927DDDBCC00D0B9E1 /* astcenc_find_best_partitioning.cpp */; };
+		70E3BB982CBCE88300F11926 /* KramDDSHelper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70CDB64F27A1382600A546C1 /* KramDDSHelper.cpp */; };
+		70E3BB992CBCE88300F11926 /* EtcImage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDAC26D1583E001C950E /* EtcImage.cpp */; };
+		70E3BB9A2CBCE88300F11926 /* astcenc_block_sizes.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB827DDDBCC00D0B9E1 /* astcenc_block_sizes.cpp */; };
+		70E3BB9B2CBCE88300F11926 /* EtcDifferentialTrys.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDAF26D1583E001C950E /* EtcDifferentialTrys.cpp */; };
+		70E3BB9C2CBCE88300F11926 /* EtcMath.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDB126D1583E001C950E /* EtcMath.cpp */; };
+		70E3BB9D2CBCE88300F11926 /* EtcBlock4x4Encoding_RGBA8.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDB226D1583E001C950E /* EtcBlock4x4Encoding_RGBA8.cpp */; };
+		70E3BB9E2CBCE88300F11926 /* EtcBlock4x4Encoding_RG11.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDB326D1583E001C950E /* EtcBlock4x4Encoding_RG11.cpp */; };
+		70E3BB9F2CBCE88300F11926 /* EtcBlock4x4Encoding_RGB8A1.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDB626D1583E001C950E /* EtcBlock4x4Encoding_RGB8A1.cpp */; };
+		70E3BBA02CBCE88300F11926 /* EtcIndividualTrys.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDBB26D1583E001C950E /* EtcIndividualTrys.cpp */; };
+		70E3BBA12CBCE88300F11926 /* rgbcx.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789CA2881BA81008A51BC /* rgbcx.cpp */; };
+		70E3BBA22CBCE88300F11926 /* EtcBlock4x4Encoding_R11.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDBC26D1583E001C950E /* EtcBlock4x4Encoding_R11.cpp */; };
+		70E3BBA32CBCE88300F11926 /* rdo_bc_encoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789EF2881BCE2008A51BC /* rdo_bc_encoder.cpp */; };
+		70E3BBA42CBCE88300F11926 /* astcenc_color_quantize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DBD27DDDBCC00D0B9E1 /* astcenc_color_quantize.cpp */; };
+		70E3BBA52CBCE88300F11926 /* EtcBlock4x4Encoding_ETC1.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDBF26D1583E001C950E /* EtcBlock4x4Encoding_ETC1.cpp */; };
+		70E3BBA62CBCE88300F11926 /* EtcBlock4x4Encoding.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDC526D1583E001C950E /* EtcBlock4x4Encoding.cpp */; };
+		70E3BBA72CBCE88300F11926 /* EtcBlock4x4.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDC626D1583E001C950E /* EtcBlock4x4.cpp */; };
+		70E3BBA82CBCE88300F11926 /* astcenc_percentile_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB027DDDBCC00D0B9E1 /* astcenc_percentile_tables.cpp */; };
+		70E3BBA92CBCE88300F11926 /* astcenc_mathlib.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB327DDDBCC00D0B9E1 /* astcenc_mathlib.cpp */; };
+		70E3BBAA2CBCE88300F11926 /* bc6h_encode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 708A6A8D2708CE4700BA5410 /* bc6h_encode.cpp */; };
+		70E3BBAB2CBCE88300F11926 /* hdr_encode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70A7BD2E27092A1200DBCCF7 /* hdr_encode.cpp */; };
+		70E3BBAC2CBCE88300F11926 /* string.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5926D3473F001C950E /* string.cpp */; };
+		70E3BBAD2CBCE88300F11926 /* bc6h_decode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 708A6A8B2708CE4700BA5410 /* bc6h_decode.cpp */; };
+		70E3BBAE2CBCE88300F11926 /* assert.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5826D3473F001C950E /* assert.cpp */; };
+		70E3BBAF2CBCE88300F11926 /* fixed_pool.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD6026D3473F001C950E /* fixed_pool.cpp */; };
+		70E3BBB02CBCE88300F11926 /* basisu_transcoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE0426D1583F001C950E /* basisu_transcoder.cpp */; };
+		70E3BBB12CBCE88300F11926 /* red_black_tree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5F26D3473F001C950E /* red_black_tree.cpp */; };
+		70E3BBB22CBCE88300F11926 /* astcenc_decompress_symbolic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB427DDDBCC00D0B9E1 /* astcenc_decompress_symbolic.cpp */; };
+		70E3BBB32CBCE88300F11926 /* astcenc_diagnostic_trace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC627DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.cpp */; };
+		70E3BBB42CBCE88300F11926 /* astcenc_platform_isa_detection.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC527DDDBCC00D0B9E1 /* astcenc_platform_isa_detection.cpp */; };
+		70E3BBB52CBCE88300F11926 /* bc7enc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789C62881BA81008A51BC /* bc7enc.cpp */; };
+		70E3BBB62CBCE88300F11926 /* BlockedLinearAllocator.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70D222E22AD22BED00B9EA23 /* BlockedLinearAllocator.cpp */; };
+		70E3BBB72CBCE88300F11926 /* intrusive_list.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5D26D3473F001C950E /* intrusive_list.cpp */; };
+		70E3BBB82CBCE88300F11926 /* bc7decomp.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789D02881BA81008A51BC /* bc7decomp.cpp */; };
+		70E3BBB92CBCE88300F11926 /* miniz.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1126D1583F001C950E /* miniz.cpp */; };
+		70E3BBBA2CBCE88300F11926 /* astcenc_compress_symbolic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB527DDDBCC00D0B9E1 /* astcenc_compress_symbolic.cpp */; };
+		70E3BBBB2CBCE88300F11926 /* hedistance.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1426D1583F001C950E /* hedistance.cpp */; };
+		70E3BBBC2CBCE88300F11926 /* KramTimer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1A26D1583F001C950E /* KramTimer.cpp */; };
+		70E3BBBD2CBCE88300F11926 /* astcenc_entry.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB627DDDBCC00D0B9E1 /* astcenc_entry.cpp */; };
+		70E3BBBE2CBCE88300F11926 /* KTXImage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1B26D1583F001C950E /* KTXImage.cpp */; };
+		70E3BBBF2CBCE88300F11926 /* KramMipper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1C26D1583F001C950E /* KramMipper.cpp */; };
+		70E3BBC02CBCE88300F11926 /* KramZipHelper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1E26D1583F001C950E /* KramZipHelper.cpp */; };
+		70E3BBC12CBCE88300F11926 /* TaskSystem.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1F26D1583F001C950E /* TaskSystem.cpp */; };
+		70E3BBC22CBCE88300F11926 /* KramFileHelper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2126D1583F001C950E /* KramFileHelper.cpp */; };
+		70E3BBC32CBCE88300F11926 /* numeric_limits.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5B26D3473F001C950E /* numeric_limits.cpp */; };
+		70E3BBC42CBCE88300F11926 /* KramImageInfo.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2526D1583F001C950E /* KramImageInfo.cpp */; };
+		70E3BBC52CBCE88300F11926 /* astcenc_integer_sequence.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB727DDDBCC00D0B9E1 /* astcenc_integer_sequence.cpp */; };
+		70E3BBC62CBCE88300F11926 /* KramImage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2626D1583F001C950E /* KramImage.cpp */; };
+		70E3BBC72CBCE88300F11926 /* KramLog.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2826D1583F001C950E /* KramLog.cpp */; };
+		70E3BBC82CBCE88300F11926 /* KramSDFMipper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2B26D1583F001C950E /* KramSDFMipper.cpp */; };
+		70E3BBC92CBCE88300F11926 /* KramMmapHelper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2C26D1583F001C950E /* KramMmapHelper.cpp */; };
+		70E3BBCA2CBCE88300F11926 /* ImmutableString.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70D222DC2AD2132300B9EA23 /* ImmutableString.cpp */; };
+		70E3BBCB2CBCE88300F11926 /* astcenc_image.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DA827DDDBCC00D0B9E1 /* astcenc_image.cpp */; };
+		70E3BBCC2CBCE88300F11926 /* thread_support.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5726D3473F001C950E /* thread_support.cpp */; };
+		70E3BBCD2CBCE88300F11926 /* Kram.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE3526D1583F001C950E /* Kram.cpp */; };
+		70E3BBCE2CBCE88300F11926 /* squish.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE3D26D1583F001C950E /* squish.cpp */; };
+		70E3BBCF2CBCE88300F11926 /* colourset.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE3E26D1583F001C950E /* colourset.cpp */; };
+		70E3BBD02CBCE88300F11926 /* astcenc_partition_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DAC27DDDBCC00D0B9E1 /* astcenc_partition_tables.cpp */; };
+		70E3BBD12CBCE88300F11926 /* hashtable.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5E26D3473F001C950E /* hashtable.cpp */; };
+		70E3BBD22CBCE88300F11926 /* astcenc_weight_align.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DBA27DDDBCC00D0B9E1 /* astcenc_weight_align.cpp */; };
+		70E3BBD32CBCE88300F11926 /* astcenc_compute_variance.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DAF27DDDBCC00D0B9E1 /* astcenc_compute_variance.cpp */; };
+		70E3BBD42CBCE88300F11926 /* clusterfit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE4226D1583F001C950E /* clusterfit.cpp */; };
+		70E3BBD52CBCE88300F11926 /* rangefit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE4426D1583F001C950E /* rangefit.cpp */; };
+		70E3BBD62CBCE88300F11926 /* alpha.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE4626D1583F001C950E /* alpha.cpp */; };
+		70E3BBD72CBCE88300F11926 /* colourblock.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE4726D1583F001C950E /* colourblock.cpp */; };
+		70E3BBD82CBCE88300F11926 /* colourfit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE4926D1583F001C950E /* colourfit.cpp */; };
+		70E3BBD92CBCE88300F11926 /* astcenc_pick_best_endpoint_format.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC227DDDBCC00D0B9E1 /* astcenc_pick_best_endpoint_format.cpp */; };
+		70E3BBDA2CBCE88300F11926 /* astcenc_ideal_endpoints_and_weights.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC727DDDBCD00D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp */; };
+		70E3BBDB2CBCE88300F11926 /* astcenc_symbolic_physical.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DAA27DDDBCC00D0B9E1 /* astcenc_symbolic_physical.cpp */; };
+		70E3BBDC2CBCE88300F11926 /* KramZipStream.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B563A52C857B360089A64F /* KramZipStream.cpp */; };
+		70E3BBDD2CBCE88300F11926 /* allocator_eastl.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5A26D3473F001C950E /* allocator_eastl.cpp */; };
+		70E3BBDE2CBCE88300F11926 /* maths.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE4D26D1583F001C950E /* maths.cpp */; };
+		70E3BBDF2CBCE88300F11926 /* singlecolourfit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE4E26D1583F001C950E /* singlecolourfit.cpp */; };
+		70E3BBE02CBCE88300F11926 /* zstd.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE5026D1583F001C950E /* zstd.cpp */; };
+		70E3BBE12CBCE88300F11926 /* dlmalloc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70D222F42ADAF78300B9EA23 /* dlmalloc.cpp */; };
+		70E3BBE22CBCE88300F11926 /* zstddeclib.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE5126D1583F001C950E /* zstddeclib.cpp */; };
+		70E3BBE32CBCE88300F11926 /* lodepng.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE5626D1583F001C950E /* lodepng.cpp */; };
+		70E3BBE42CBCE88300F11926 /* utils.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789CC2881BA81008A51BC /* utils.cpp */; };
+		70E3BBE52CBCE88300F11926 /* tmpfileplus.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE5826D1583F001C950E /* tmpfileplus.cpp */; };
+		70E3BBE62CBCE88300F11926 /* format.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 709B8D2228D7BCAD0081BD1F /* format.cpp */; };
+		70E3BBE72CBCE88300F11926 /* astcenc_weight_quant_xfer_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC327DDDBCC00D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp */; };
+		70E3BBE92CBCE88300F11926 /* libate.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF12A26D159F9001C950E /* libate.tbd */; };
 /* End PBXBuildFile section */
 
 /* Begin PBXFileReference section */
@@ -743,6 +939,7 @@
 		70D222EA2ADAF25E00B9EA23 /* simdjson.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = simdjson.cpp; sourceTree = "<group>"; };
 		70D222F42ADAF78300B9EA23 /* dlmalloc.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = dlmalloc.cpp; sourceTree = "<group>"; };
 		70D222F72ADAFA1500B9EA23 /* dlmalloc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = dlmalloc.h; sourceTree = "<group>"; };
+		70E3BBED2CBCE88300F11926 /* libkram-vos.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = "libkram-vos.a"; sourceTree = BUILT_PRODUCTS_DIR; };
 /* End PBXFileReference section */
 
 /* Begin PBXFrameworksBuildPhase section */
@@ -762,6 +959,14 @@
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
+		70E3BBE82CBCE88300F11926 /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				70E3BBE92CBCE88300F11926 /* libate.tbd in Frameworks */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
 /* End PBXFrameworksBuildPhase section */
 
 /* Begin PBXGroup section */
@@ -779,6 +984,7 @@
 			children = (
 				706ECDDE26D1577A001C950E /* libkram.a */,
 				706EF1E126D166C5001C950E /* libkram-ios.a */,
+				70E3BBED2CBCE88300F11926 /* libkram-vos.a */,
 			);
 			name = Products;
 			sourceTree = "<group>";
@@ -1577,6 +1783,116 @@
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
+		70E3BB232CBCE88300F11926 /* Headers */ = {
+			isa = PBXHeadersBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				70E3BB242CBCE88300F11926 /* EtcErrorMetric.h in Headers */,
+				70E3BB252CBCE88300F11926 /* EtcColor.h in Headers */,
+				70E3BB262CBCE88300F11926 /* chrono.h in Headers */,
+				70E3BB272CBCE88300F11926 /* EtcDifferentialTrys.h in Headers */,
+				70E3BB282CBCE88300F11926 /* EtcBlock4x4Encoding_RGB8.h in Headers */,
+				70E3BB292CBCE88300F11926 /* EtcConfig.h in Headers */,
+				70E3BB2A2CBCE88300F11926 /* astcenc_vecmathlib_common_4.h in Headers */,
+				70E3BB2B2CBCE88300F11926 /* EtcBlock4x4Encoding_R11.h in Headers */,
+				70E3BB2C2CBCE88300F11926 /* EtcBlock4x4Encoding_RG11.h in Headers */,
+				70E3BB2D2CBCE88300F11926 /* EtcMath.h in Headers */,
+				70E3BB2E2CBCE88300F11926 /* EtcIndividualTrys.h in Headers */,
+				70E3BB2F2CBCE88300F11926 /* EtcBlock4x4EncodingBits.h in Headers */,
+				70E3BB302CBCE88300F11926 /* EtcBlock4x4Encoding_RGB8A1.h in Headers */,
+				70E3BB312CBCE88300F11926 /* EtcBlock4x4.h in Headers */,
+				70E3BB322CBCE88300F11926 /* rgbcx.h in Headers */,
+				70E3BB332CBCE88300F11926 /* Etc.h in Headers */,
+				70E3BB342CBCE88300F11926 /* bc7enc.h in Headers */,
+				70E3BB352CBCE88300F11926 /* EtcImage.h in Headers */,
+				70E3BB362CBCE88300F11926 /* std.h in Headers */,
+				70E3BB372CBCE88300F11926 /* KramDDSHelper.h in Headers */,
+				70E3BB382CBCE88300F11926 /* args.h in Headers */,
+				70E3BB392CBCE88300F11926 /* bc6h_encode.h in Headers */,
+				70E3BB3A2CBCE88300F11926 /* EtcBlock4x4Encoding_ETC1.h in Headers */,
+				70E3BB3B2CBCE88300F11926 /* EtcBlock4x4Encoding_RGBA8.h in Headers */,
+				70E3BB3C2CBCE88300F11926 /* EtcColorFloatRGBA.h in Headers */,
+				70E3BB3D2CBCE88300F11926 /* EtcBlock4x4Encoding.h in Headers */,
+				70E3BB3E2CBCE88300F11926 /* ateencoder.h in Headers */,
+				70E3BB3F2CBCE88300F11926 /* basisu_transcoder.h in Headers */,
+				70E3BB402CBCE88300F11926 /* hdr_encode.h in Headers */,
+				70E3BB412CBCE88300F11926 /* compile.h in Headers */,
+				70E3BB422CBCE88300F11926 /* bc6h_definitions.h in Headers */,
+				70E3BB432CBCE88300F11926 /* basisu_containers.h in Headers */,
+				70E3BB442CBCE88300F11926 /* astcenc.h in Headers */,
+				70E3BB452CBCE88300F11926 /* printf.h in Headers */,
+				70E3BB462CBCE88300F11926 /* basisu_containers_impl.h in Headers */,
+				70E3BB472CBCE88300F11926 /* utils.h in Headers */,
+				70E3BB482CBCE88300F11926 /* basisu_transcoder_internal.h in Headers */,
+				70E3BB492CBCE88300F11926 /* astcenc_vecmathlib_avx2_8.h in Headers */,
+				70E3BB4A2CBCE88300F11926 /* astcenc_vecmathlib_none_4.h in Headers */,
+				70E3BB4B2CBCE88300F11926 /* basisu_global_selector_cb.h in Headers */,
+				70E3BB4C2CBCE88300F11926 /* basisu_transcoder_uastc.h in Headers */,
+				70E3BB4D2CBCE88300F11926 /* basisu_global_selector_palette.h in Headers */,
+				70E3BB4E2CBCE88300F11926 /* rgbcx_table4_small.h in Headers */,
+				70E3BB4F2CBCE88300F11926 /* basisu.h in Headers */,
+				70E3BB502CBCE88300F11926 /* basisu_file_headers.h in Headers */,
+				70E3BB512CBCE88300F11926 /* miniz.h in Headers */,
+				70E3BB522CBCE88300F11926 /* hedistance.h in Headers */,
+				70E3BB532CBCE88300F11926 /* stb_rect_pack.h in Headers */,
+				70E3BB542CBCE88300F11926 /* KramZipHelper.h in Headers */,
+				70E3BB552CBCE88300F11926 /* KramSDFMipper.h in Headers */,
+				70E3BB562CBCE88300F11926 /* BlockedLinearAllocator.h in Headers */,
+				70E3BB572CBCE88300F11926 /* KramZipStream.h in Headers */,
+				70E3BB582CBCE88300F11926 /* astcenc_mathlib.h in Headers */,
+				70E3BB592CBCE88300F11926 /* ranges.h in Headers */,
+				70E3BB5A2CBCE88300F11926 /* KramConfig.h in Headers */,
+				70E3BB5B2CBCE88300F11926 /* KramLog.h in Headers */,
+				70E3BB5C2CBCE88300F11926 /* KramLib.h in Headers */,
+				70E3BB5D2CBCE88300F11926 /* KramVersion.h in Headers */,
+				70E3BB5E2CBCE88300F11926 /* KramImage.h in Headers */,
+				70E3BB5F2CBCE88300F11926 /* win_mmap.h in Headers */,
+				70E3BB602CBCE88300F11926 /* astcenc_vecmathlib_sse_4.h in Headers */,
+				70E3BB612CBCE88300F11926 /* KramFmt.h in Headers */,
+				70E3BB622CBCE88300F11926 /* bc7decomp.h in Headers */,
+				70E3BB632CBCE88300F11926 /* Kram.h in Headers */,
+				70E3BB642CBCE88300F11926 /* ImmutableString.h in Headers */,
+				70E3BB652CBCE88300F11926 /* simdjson.h in Headers */,
+				70E3BB662CBCE88300F11926 /* astcenc_diagnostic_trace.h in Headers */,
+				70E3BB672CBCE88300F11926 /* ert.h in Headers */,
+				70E3BB682CBCE88300F11926 /* KTXImage.h in Headers */,
+				70E3BB692CBCE88300F11926 /* KramImageInfo.h in Headers */,
+				70E3BB6A2CBCE88300F11926 /* rgbcx_table4.h in Headers */,
+				70E3BB6B2CBCE88300F11926 /* astcenc_vecmathlib_neon_4.h in Headers */,
+				70E3BB6C2CBCE88300F11926 /* KramTimer.h in Headers */,
+				70E3BB6D2CBCE88300F11926 /* KramMmapHelper.h in Headers */,
+				70E3BB6E2CBCE88300F11926 /* KramFileHelper.h in Headers */,
+				70E3BB6F2CBCE88300F11926 /* os.h in Headers */,
+				70E3BB702CBCE88300F11926 /* KramMipper.h in Headers */,
+				70E3BB712CBCE88300F11926 /* TaskSystem.h in Headers */,
+				70E3BB722CBCE88300F11926 /* squish.h in Headers */,
+				70E3BB732CBCE88300F11926 /* clusterfit.h in Headers */,
+				70E3BB742CBCE88300F11926 /* core.h in Headers */,
+				70E3BB752CBCE88300F11926 /* colourfit.h in Headers */,
+				70E3BB762CBCE88300F11926 /* astcenc_vecmathlib.h in Headers */,
+				70E3BB772CBCE88300F11926 /* alpha.h in Headers */,
+				70E3BB782CBCE88300F11926 /* color.h in Headers */,
+				70E3BB792CBCE88300F11926 /* bc6h_decode.h in Headers */,
+				70E3BB7A2CBCE88300F11926 /* singlecolourfit.h in Headers */,
+				70E3BB7B2CBCE88300F11926 /* maths.h in Headers */,
+				70E3BB7C2CBCE88300F11926 /* rdo_bc_encoder.h in Headers */,
+				70E3BB7D2CBCE88300F11926 /* json11.h in Headers */,
+				70E3BB7E2CBCE88300F11926 /* colourset.h in Headers */,
+				70E3BB7F2CBCE88300F11926 /* bc6h_utils.h in Headers */,
+				70E3BB802CBCE88300F11926 /* colourblock.h in Headers */,
+				70E3BB812CBCE88300F11926 /* rangefit.h in Headers */,
+				70E3BB822CBCE88300F11926 /* zstd.h in Headers */,
+				70E3BB832CBCE88300F11926 /* astcenc_internal.h in Headers */,
+				70E3BB842CBCE88300F11926 /* format-inl.h in Headers */,
+				70E3BB852CBCE88300F11926 /* ostream.h in Headers */,
+				70E3BB862CBCE88300F11926 /* lodepng.h in Headers */,
+				70E3BB872CBCE88300F11926 /* format.h in Headers */,
+				70E3BB882CBCE88300F11926 /* dlmalloc.h in Headers */,
+				70E3BB892CBCE88300F11926 /* tmpfileplus.h in Headers */,
+				70E3BB8A2CBCE88300F11926 /* xchar.h in Headers */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
 /* End PBXHeadersBuildPhase section */
 
 /* Begin PBXNativeTarget section */
@@ -1614,6 +1930,23 @@
 			productReference = 706EF1E126D166C5001C950E /* libkram-ios.a */;
 			productType = "com.apple.product-type.library.static";
 		};
+		70E3BB222CBCE88300F11926 /* kram-vos */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 70E3BBEA2CBCE88300F11926 /* Build configuration list for PBXNativeTarget "kram-vos" */;
+			buildPhases = (
+				70E3BB232CBCE88300F11926 /* Headers */,
+				70E3BB8B2CBCE88300F11926 /* Sources */,
+				70E3BBE82CBCE88300F11926 /* Frameworks */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = "kram-vos";
+			productName = kram;
+			productReference = 70E3BBED2CBCE88300F11926 /* libkram-vos.a */;
+			productType = "com.apple.product-type.library.static";
+		};
 /* End PBXNativeTarget section */
 
 /* Begin PBXProject section */
@@ -1644,6 +1977,7 @@
 			targets = (
 				706ECDDD26D1577A001C950E /* kram */,
 				706EF14926D166C5001C950E /* kram-ios */,
+				70E3BB222CBCE88300F11926 /* kram-vos */,
 			);
 		};
 /* End PBXProject section */
@@ -1846,6 +2180,105 @@
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
+		70E3BB8B2CBCE88300F11926 /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				70E3BB8C2CBCE88300F11926 /* astcenc_quantization.cpp in Sources */,
+				70E3BB8D2CBCE88300F11926 /* ert.cpp in Sources */,
+				70E3BB8E2CBCE88300F11926 /* astcenc_color_unquantize.cpp in Sources */,
+				70E3BB8F2CBCE88300F11926 /* astcenc_averages_and_directions.cpp in Sources */,
+				70E3BB902CBCE88300F11926 /* astcenc_mathlib_softfloat.cpp in Sources */,
+				70E3BB912CBCE88300F11926 /* os.cpp in Sources */,
+				70E3BB922CBCE88300F11926 /* simdjson.cpp in Sources */,
+				70E3BB932CBCE88300F11926 /* ateencoder.mm in Sources */,
+				70E3BB942CBCE88300F11926 /* bc7decomp_ref.cpp in Sources */,
+				70E3BB952CBCE88300F11926 /* EtcBlock4x4Encoding_RGB8.cpp in Sources */,
+				70E3BB962CBCE88300F11926 /* json11.cpp in Sources */,
+				70E3BB972CBCE88300F11926 /* astcenc_find_best_partitioning.cpp in Sources */,
+				70E3BB982CBCE88300F11926 /* KramDDSHelper.cpp in Sources */,
+				70E3BB992CBCE88300F11926 /* EtcImage.cpp in Sources */,
+				70E3BB9A2CBCE88300F11926 /* astcenc_block_sizes.cpp in Sources */,
+				70E3BB9B2CBCE88300F11926 /* EtcDifferentialTrys.cpp in Sources */,
+				70E3BB9C2CBCE88300F11926 /* EtcMath.cpp in Sources */,
+				70E3BB9D2CBCE88300F11926 /* EtcBlock4x4Encoding_RGBA8.cpp in Sources */,
+				70E3BB9E2CBCE88300F11926 /* EtcBlock4x4Encoding_RG11.cpp in Sources */,
+				70E3BB9F2CBCE88300F11926 /* EtcBlock4x4Encoding_RGB8A1.cpp in Sources */,
+				70E3BBA02CBCE88300F11926 /* EtcIndividualTrys.cpp in Sources */,
+				70E3BBA12CBCE88300F11926 /* rgbcx.cpp in Sources */,
+				70E3BBA22CBCE88300F11926 /* EtcBlock4x4Encoding_R11.cpp in Sources */,
+				70E3BBA32CBCE88300F11926 /* rdo_bc_encoder.cpp in Sources */,
+				70E3BBA42CBCE88300F11926 /* astcenc_color_quantize.cpp in Sources */,
+				70E3BBA52CBCE88300F11926 /* EtcBlock4x4Encoding_ETC1.cpp in Sources */,
+				70E3BBA62CBCE88300F11926 /* EtcBlock4x4Encoding.cpp in Sources */,
+				70E3BBA72CBCE88300F11926 /* EtcBlock4x4.cpp in Sources */,
+				70E3BBA82CBCE88300F11926 /* astcenc_percentile_tables.cpp in Sources */,
+				70E3BBA92CBCE88300F11926 /* astcenc_mathlib.cpp in Sources */,
+				70E3BBAA2CBCE88300F11926 /* bc6h_encode.cpp in Sources */,
+				70E3BBAB2CBCE88300F11926 /* hdr_encode.cpp in Sources */,
+				70E3BBAC2CBCE88300F11926 /* string.cpp in Sources */,
+				70E3BBAD2CBCE88300F11926 /* bc6h_decode.cpp in Sources */,
+				70E3BBAE2CBCE88300F11926 /* assert.cpp in Sources */,
+				70E3BBAF2CBCE88300F11926 /* fixed_pool.cpp in Sources */,
+				70E3BBB02CBCE88300F11926 /* basisu_transcoder.cpp in Sources */,
+				70E3BBB12CBCE88300F11926 /* red_black_tree.cpp in Sources */,
+				70E3BBB22CBCE88300F11926 /* astcenc_decompress_symbolic.cpp in Sources */,
+				70E3BBB32CBCE88300F11926 /* astcenc_diagnostic_trace.cpp in Sources */,
+				70E3BBB42CBCE88300F11926 /* astcenc_platform_isa_detection.cpp in Sources */,
+				70E3BBB52CBCE88300F11926 /* bc7enc.cpp in Sources */,
+				70E3BBB62CBCE88300F11926 /* BlockedLinearAllocator.cpp in Sources */,
+				70E3BBB72CBCE88300F11926 /* intrusive_list.cpp in Sources */,
+				70E3BBB82CBCE88300F11926 /* bc7decomp.cpp in Sources */,
+				70E3BBB92CBCE88300F11926 /* miniz.cpp in Sources */,
+				70E3BBBA2CBCE88300F11926 /* astcenc_compress_symbolic.cpp in Sources */,
+				70E3BBBB2CBCE88300F11926 /* hedistance.cpp in Sources */,
+				70E3BBBC2CBCE88300F11926 /* KramTimer.cpp in Sources */,
+				70E3BBBD2CBCE88300F11926 /* astcenc_entry.cpp in Sources */,
+				70E3BBBE2CBCE88300F11926 /* KTXImage.cpp in Sources */,
+				70E3BBBF2CBCE88300F11926 /* KramMipper.cpp in Sources */,
+				70E3BBC02CBCE88300F11926 /* KramZipHelper.cpp in Sources */,
+				70E3BBC12CBCE88300F11926 /* TaskSystem.cpp in Sources */,
+				70E3BBC22CBCE88300F11926 /* KramFileHelper.cpp in Sources */,
+				70E3BBC32CBCE88300F11926 /* numeric_limits.cpp in Sources */,
+				70E3BBC42CBCE88300F11926 /* KramImageInfo.cpp in Sources */,
+				70E3BBC52CBCE88300F11926 /* astcenc_integer_sequence.cpp in Sources */,
+				70E3BBC62CBCE88300F11926 /* KramImage.cpp in Sources */,
+				70E3BBC72CBCE88300F11926 /* KramLog.cpp in Sources */,
+				70E3BBC82CBCE88300F11926 /* KramSDFMipper.cpp in Sources */,
+				70E3BBC92CBCE88300F11926 /* KramMmapHelper.cpp in Sources */,
+				70E3BBCA2CBCE88300F11926 /* ImmutableString.cpp in Sources */,
+				70E3BBCB2CBCE88300F11926 /* astcenc_image.cpp in Sources */,
+				70E3BBCC2CBCE88300F11926 /* thread_support.cpp in Sources */,
+				70E3BBCD2CBCE88300F11926 /* Kram.cpp in Sources */,
+				70E3BBCE2CBCE88300F11926 /* squish.cpp in Sources */,
+				70E3BBCF2CBCE88300F11926 /* colourset.cpp in Sources */,
+				70E3BBD02CBCE88300F11926 /* astcenc_partition_tables.cpp in Sources */,
+				70E3BBD12CBCE88300F11926 /* hashtable.cpp in Sources */,
+				70E3BBD22CBCE88300F11926 /* astcenc_weight_align.cpp in Sources */,
+				70E3BBD32CBCE88300F11926 /* astcenc_compute_variance.cpp in Sources */,
+				70E3BBD42CBCE88300F11926 /* clusterfit.cpp in Sources */,
+				70E3BBD52CBCE88300F11926 /* rangefit.cpp in Sources */,
+				70E3BBD62CBCE88300F11926 /* alpha.cpp in Sources */,
+				70E3BBD72CBCE88300F11926 /* colourblock.cpp in Sources */,
+				70E3BBD82CBCE88300F11926 /* colourfit.cpp in Sources */,
+				70E3BBD92CBCE88300F11926 /* astcenc_pick_best_endpoint_format.cpp in Sources */,
+				70E3BBDA2CBCE88300F11926 /* astcenc_ideal_endpoints_and_weights.cpp in Sources */,
+				70E3BBDB2CBCE88300F11926 /* astcenc_symbolic_physical.cpp in Sources */,
+				70E3BBDC2CBCE88300F11926 /* KramZipStream.cpp in Sources */,
+				70E3BBDD2CBCE88300F11926 /* allocator_eastl.cpp in Sources */,
+				70E3BBDE2CBCE88300F11926 /* maths.cpp in Sources */,
+				70E3BBDF2CBCE88300F11926 /* singlecolourfit.cpp in Sources */,
+				70E3BBE02CBCE88300F11926 /* zstd.cpp in Sources */,
+				70E3BBE12CBCE88300F11926 /* dlmalloc.cpp in Sources */,
+				70E3BBE22CBCE88300F11926 /* zstddeclib.cpp in Sources */,
+				70E3BBE32CBCE88300F11926 /* lodepng.cpp in Sources */,
+				70E3BBE42CBCE88300F11926 /* utils.cpp in Sources */,
+				70E3BBE52CBCE88300F11926 /* tmpfileplus.cpp in Sources */,
+				70E3BBE62CBCE88300F11926 /* format.cpp in Sources */,
+				70E3BBE72CBCE88300F11926 /* astcenc_weight_quant_xfer_tables.cpp in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
 /* End PBXSourcesBuildPhase section */
 
 /* Begin XCBuildConfiguration section */
@@ -1945,6 +2378,7 @@
 				PRESERVE_DEAD_CODE_INITS_AND_TERMS = NO;
 				SDKROOT = macosx;
 				USER_HEADER_SEARCH_PATHS = "";
+				XROS_DEPLOYMENT_TARGET = 2.0;
 			};
 			name = Debug;
 		};
@@ -2038,6 +2472,7 @@
 				PRESERVE_DEAD_CODE_INITS_AND_TERMS = NO;
 				SDKROOT = macosx;
 				USER_HEADER_SEARCH_PATHS = "";
+				XROS_DEPLOYMENT_TARGET = 2.0;
 			};
 			name = Release;
 		};
@@ -2103,7 +2538,6 @@
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SDKROOT = iphoneos;
 				SKIP_INSTALL = YES;
-				SUPPORTED_PLATFORMS = "iphonesimulator iphoneos";
 				SYSTEM_HEADER_SEARCH_PATHS = "";
 			};
 			name = Debug;
@@ -2117,7 +2551,32 @@
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SDKROOT = iphoneos;
 				SKIP_INSTALL = YES;
-				SUPPORTED_PLATFORMS = "iphonesimulator iphoneos";
+				SYSTEM_HEADER_SEARCH_PATHS = "";
+			};
+			name = Release;
+		};
+		70E3BBEB2CBCE88300F11926 /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				CLANG_WARN_DOCUMENTATION_COMMENTS = NO;
+				CODE_SIGN_STYLE = Automatic;
+				EXECUTABLE_PREFIX = lib;
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				SDKROOT = xros;
+				SKIP_INSTALL = YES;
+				SYSTEM_HEADER_SEARCH_PATHS = "";
+			};
+			name = Debug;
+		};
+		70E3BBEC2CBCE88300F11926 /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				CLANG_WARN_DOCUMENTATION_COMMENTS = NO;
+				CODE_SIGN_STYLE = Automatic;
+				EXECUTABLE_PREFIX = lib;
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				SDKROOT = xros;
+				SKIP_INSTALL = YES;
 				SYSTEM_HEADER_SEARCH_PATHS = "";
 			};
 			name = Release;
@@ -2152,6 +2611,15 @@
 			defaultConfigurationIsVisible = 0;
 			defaultConfigurationName = Release;
 		};
+		70E3BBEA2CBCE88300F11926 /* Build configuration list for PBXNativeTarget "kram-vos" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				70E3BBEB2CBCE88300F11926 /* Debug */,
+				70E3BBEC2CBCE88300F11926 /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
 /* End XCConfigurationList section */
 	};
 	rootObject = 706ECDD626D1577A001C950E /* Project object */;
diff --git a/build2/vectormath.xcodeproj/project.pbxproj b/build2/vectormath.xcodeproj/project.pbxproj
index f1825618..71521100 100644
--- a/build2/vectormath.xcodeproj/project.pbxproj
+++ b/build2/vectormath.xcodeproj/project.pbxproj
@@ -39,6 +39,22 @@
 		70B687162CAD1072007ACA58 /* float4a.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B686FD2CAD1072007ACA58 /* float4a.h */; };
 		70B687172CAD1072007ACA58 /* sse2neon-arm64.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B687072CAD1072007ACA58 /* sse2neon-arm64.h */; };
 		70B687182CAD1072007ACA58 /* double234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B686FB2CAD1072007ACA58 /* double234.h */; };
+		70E3BAC92CBCE81700F11926 /* vectormath234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B687082CAD1072007ACA58 /* vectormath234.h */; };
+		70E3BACA2CBCE81700F11926 /* bounds234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70570FE42CB378E7005692BB /* bounds234.h */; };
+		70E3BACB2CBCE81700F11926 /* long234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B687032CAD1072007ACA58 /* long234.h */; };
+		70E3BACC2CBCE81700F11926 /* sse2neon.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B687062CAD1072007ACA58 /* sse2neon.h */; };
+		70E3BACD2CBCE81700F11926 /* sse_mathfun.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B687052CAD1072007ACA58 /* sse_mathfun.h */; };
+		70E3BACE2CBCE81700F11926 /* float234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B686FF2CAD1072007ACA58 /* float234.h */; };
+		70E3BACF2CBCE81700F11926 /* half234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B687012CAD1072007ACA58 /* half234.h */; };
+		70E3BAD02CBCE81700F11926 /* int234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B687022CAD1072007ACA58 /* int234.h */; };
+		70E3BAD12CBCE81700F11926 /* float4a.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B686FD2CAD1072007ACA58 /* float4a.h */; };
+		70E3BAD22CBCE81700F11926 /* sse2neon-arm64.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B687072CAD1072007ACA58 /* sse2neon-arm64.h */; };
+		70E3BAD32CBCE81700F11926 /* double234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B686FB2CAD1072007ACA58 /* double234.h */; };
+		70E3BAD52CBCE81700F11926 /* float234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B687002CAD1072007ACA58 /* float234.cpp */; };
+		70E3BAD62CBCE81700F11926 /* double234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B686FC2CAD1072007ACA58 /* double234.cpp */; };
+		70E3BAD72CBCE81700F11926 /* vectormath234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B687092CAD1072007ACA58 /* vectormath234.cpp */; };
+		70E3BAD82CBCE81700F11926 /* bounds234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70570FE72CB37997005692BB /* bounds234.cpp */; };
+		70E3BAD92CBCE81700F11926 /* float4a.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B686FE2CAD1072007ACA58 /* float4a.cpp */; };
 /* End PBXBuildFile section */
 
 /* Begin PBXFileReference section */
@@ -62,6 +78,7 @@
 		70B687072CAD1072007ACA58 /* sse2neon-arm64.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "sse2neon-arm64.h"; sourceTree = "<group>"; };
 		70B687082CAD1072007ACA58 /* vectormath234.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = vectormath234.h; sourceTree = "<group>"; };
 		70B687092CAD1072007ACA58 /* vectormath234.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = vectormath234.cpp; sourceTree = "<group>"; };
+		70E3BADE2CBCE81700F11926 /* libvectormath-vos.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = "libvectormath-vos.a"; sourceTree = BUILT_PRODUCTS_DIR; };
 /* End PBXFileReference section */
 
 /* Begin PBXFrameworksBuildPhase section */
@@ -79,6 +96,13 @@
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
+		70E3BADA2CBCE81700F11926 /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
 /* End PBXFrameworksBuildPhase section */
 
 /* Begin PBXGroup section */
@@ -95,6 +119,7 @@
 			children = (
 				70B686F42CAD1026007ACA58 /* libvectormath.a */,
 				701AF1912CAD27CB00BD0886 /* libvectormath-ios.a */,
+				70E3BADE2CBCE81700F11926 /* libvectormath-vos.a */,
 			);
 			name = Products;
 			sourceTree = "<group>";
@@ -164,6 +189,24 @@
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
+		70E3BAC82CBCE81700F11926 /* Headers */ = {
+			isa = PBXHeadersBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				70E3BAC92CBCE81700F11926 /* vectormath234.h in Headers */,
+				70E3BACA2CBCE81700F11926 /* bounds234.h in Headers */,
+				70E3BACB2CBCE81700F11926 /* long234.h in Headers */,
+				70E3BACC2CBCE81700F11926 /* sse2neon.h in Headers */,
+				70E3BACD2CBCE81700F11926 /* sse_mathfun.h in Headers */,
+				70E3BACE2CBCE81700F11926 /* float234.h in Headers */,
+				70E3BACF2CBCE81700F11926 /* half234.h in Headers */,
+				70E3BAD02CBCE81700F11926 /* int234.h in Headers */,
+				70E3BAD12CBCE81700F11926 /* float4a.h in Headers */,
+				70E3BAD22CBCE81700F11926 /* sse2neon-arm64.h in Headers */,
+				70E3BAD32CBCE81700F11926 /* double234.h in Headers */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
 /* End PBXHeadersBuildPhase section */
 
 /* Begin PBXNativeTarget section */
@@ -205,6 +248,25 @@
 			productReference = 70B686F42CAD1026007ACA58 /* libvectormath.a */;
 			productType = "com.apple.product-type.library.static";
 		};
+		70E3BAC72CBCE81700F11926 /* vectormath-vos */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 70E3BADB2CBCE81700F11926 /* Build configuration list for PBXNativeTarget "vectormath-vos" */;
+			buildPhases = (
+				70E3BAC82CBCE81700F11926 /* Headers */,
+				70E3BAD42CBCE81700F11926 /* Sources */,
+				70E3BADA2CBCE81700F11926 /* Frameworks */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = "vectormath-vos";
+			packageProductDependencies = (
+			);
+			productName = vectormath;
+			productReference = 70E3BADE2CBCE81700F11926 /* libvectormath-vos.a */;
+			productType = "com.apple.product-type.library.static";
+		};
 /* End PBXNativeTarget section */
 
 /* Begin PBXProject section */
@@ -235,6 +297,7 @@
 			targets = (
 				70B686F32CAD1026007ACA58 /* vectormath */,
 				701AF17C2CAD27CB00BD0886 /* vectormath-ios */,
+				70E3BAC72CBCE81700F11926 /* vectormath-vos */,
 			);
 		};
 /* End PBXProject section */
@@ -264,6 +327,18 @@
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
+		70E3BAD42CBCE81700F11926 /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				70E3BAD52CBCE81700F11926 /* float234.cpp in Sources */,
+				70E3BAD62CBCE81700F11926 /* double234.cpp in Sources */,
+				70E3BAD72CBCE81700F11926 /* vectormath234.cpp in Sources */,
+				70E3BAD82CBCE81700F11926 /* bounds234.cpp in Sources */,
+				70E3BAD92CBCE81700F11926 /* float4a.cpp in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
 /* End PBXSourcesBuildPhase section */
 
 /* Begin XCBuildConfiguration section */
@@ -275,7 +350,6 @@
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SDKROOT = iphoneos;
 				SKIP_INSTALL = YES;
-				SUPPORTED_PLATFORMS = "iphonesimulator iphoneos";
 			};
 			name = Debug;
 		};
@@ -287,7 +361,6 @@
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SDKROOT = iphoneos;
 				SKIP_INSTALL = YES;
-				SUPPORTED_PLATFORMS = "iphonesimulator iphoneos";
 			};
 			name = Release;
 		};
@@ -360,6 +433,7 @@
 					"-mf16c",
 				);
 				SDKROOT = macosx;
+				XROS_DEPLOYMENT_TARGET = 2.0;
 			};
 			name = Debug;
 		};
@@ -425,6 +499,7 @@
 					"-mf16c",
 				);
 				SDKROOT = macosx;
+				XROS_DEPLOYMENT_TARGET = 2.0;
 			};
 			name = Release;
 		};
@@ -448,6 +523,28 @@
 			};
 			name = Release;
 		};
+		70E3BADC2CBCE81700F11926 /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				CODE_SIGN_STYLE = Automatic;
+				EXECUTABLE_PREFIX = lib;
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				SDKROOT = xros;
+				SKIP_INSTALL = YES;
+			};
+			name = Debug;
+		};
+		70E3BADD2CBCE81700F11926 /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				CODE_SIGN_STYLE = Automatic;
+				EXECUTABLE_PREFIX = lib;
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				SDKROOT = xros;
+				SKIP_INSTALL = YES;
+			};
+			name = Release;
+		};
 /* End XCBuildConfiguration section */
 
 /* Begin XCConfigurationList section */
@@ -478,6 +575,15 @@
 			defaultConfigurationIsVisible = 0;
 			defaultConfigurationName = Release;
 		};
+		70E3BADB2CBCE81700F11926 /* Build configuration list for PBXNativeTarget "vectormath-vos" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				70E3BADC2CBCE81700F11926 /* Debug */,
+				70E3BADD2CBCE81700F11926 /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
 /* End XCConfigurationList section */
 	};
 	rootObject = 70B686EC2CAD1026007ACA58 /* Project object */;
diff --git a/libkram/fmt/format.h b/libkram/fmt/format.h
index 4b26f926..d6d50f59 100644
--- a/libkram/fmt/format.h
+++ b/libkram/fmt/format.h
@@ -39,6 +39,8 @@
 #include <initializer_list>  // std::initializer_list
 #include <limits>            // std::numeric_limits
 #include <memory>            // std::uninitialized_copy
+
+// These use exceptions
 #include <stdexcept>         // std::runtime_error
 #include <system_error>      // std::system_error
 
diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index 35cb9e23..d263984c 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -17,7 +17,7 @@
 #include "zstd.h"
 
 #ifndef USE_LIBCOMPRESSION
-#define USE_LIBCOMPRESSION 0 // (KRAM_MAC || KRAM_IOS)
+#define USE_LIBCOMPRESSION 0 // (KRAM_MAC || KRAM_IOS || KRAM_VISION)
 #endif
 
 #if USE_LIBCOMPRESSION
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 56ed89bc..56309151 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -28,7 +28,7 @@
 #include "miniz.h"
 
 #ifndef USE_LIBCOMPRESSION
-#define USE_LIBCOMPRESSION 0 // (KRAM_MAC || KRAM_IOS)
+#define USE_LIBCOMPRESSION 0 // (KRAM_MAC || KRAM_IOS || KRAM_VISION)
 #endif
 
 #if USE_LIBCOMPRESSION
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index d46e8248..1a12e397 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -10,8 +10,10 @@
 #include <TargetConditionals.h>
 #if TARGET_OS_OSX
 #define KRAM_MAC 1
-#else
+#elif TARGET_OS_IOS
 #define KRAM_IOS 1
+#elif TARGET_OS_VISION
+#define KRAM_VISION 1
 #endif
 #elif __unix__
 #define KRAM_LINUX 1
@@ -30,6 +32,9 @@
 #ifndef KRAM_IOS
 #define KRAM_IOS 0
 #endif
+#ifndef KRAM_VISION
+#define KRAM_VISION 0
+#endif
 
 // TODO: add Profile build (rename RelWithDbgInfo)
 
@@ -75,7 +80,7 @@
 // SIMD_WORKSPACE is set
 
 // can't have ATE defined to 1 on other platforms
-#if !(KRAM_MAC || KRAM_IOS)
+#if !(KRAM_MAC || KRAM_IOS) // || KRAM_VISION
 #undef COMPILE_ATE
 #endif
 
@@ -187,7 +192,7 @@
 //-------------------------
 // simd
 
-#if KRAM_MAC || KRAM_IOS
+#if KRAM_MAC || KRAM_IOS || KRAM_VISION
 // can use old or new
 #define USE_SIMDLIB 1
 
diff --git a/libkram/kram/KramFileHelper.cpp b/libkram/kram/KramFileHelper.cpp
index 4c3fe23e..4896e477 100644
--- a/libkram/kram/KramFileHelper.cpp
+++ b/libkram/kram/KramFileHelper.cpp
@@ -15,7 +15,7 @@
 
 #include "tmpfileplus.h"
 
-#if KRAM_MAC || KRAM_IOS || KRAM_LINUX
+#if KRAM_MAC || KRAM_IOS || KRAM_VISION || KRAM_LINUX
 #include <unistd.h> // for getpagesize()
 #endif
 
@@ -124,7 +124,7 @@ size_t FileHelper::pagesize()
 {
     static size_t pagesize = 0;
     if (pagesize == 0) {
-#if KRAM_MAC || KRAM_IOS || KRAM_LINUX
+#if KRAM_MAC || KRAM_IOS || KRAM_VISION || KRAM_LINUX
         pagesize = getpagesize();
 #elif KRAM_WIN
         // win has mostly 4k, then 1MB/2MB large page size
diff --git a/libkram/kram/KramFmt.h b/libkram/kram/KramFmt.h
index a304bee4..7ca16df7 100644
--- a/libkram/kram/KramFmt.h
+++ b/libkram/kram/KramFmt.h
@@ -2,6 +2,8 @@
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
+#if !KRAM_VISION // this is breaking link on visionOS
+
 #pragma once
 
 #include <cassert>
@@ -66,3 +68,5 @@ int32_t append_sprintf_fmt(string& s, const S& format, Args&&... args)
 }
 
 } // namespace kram
+
+#endif
diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp
index 0038355d..bcb56c64 100644
--- a/libkram/kram/KramLog.cpp
+++ b/libkram/kram/KramLog.cpp
@@ -6,7 +6,7 @@
 
 //#include <string>
 
-#if KRAM_IOS || KRAM_MAC
+#if KRAM_MAC || KRAM_IOS || KRAM_VISION
 #define KRAM_LOG_STACKTRACE KRAM_DEBUG
 #elif KRAM_WIN
 // TODO: need to debug code before enabling
@@ -33,7 +33,7 @@
 #elif KRAM_ANDROID
 #include <log.h>
 
-#elif KRAM_IOS || KRAM_MAC
+#elif KRAM_MAC || KRAM_IOS || KRAM_VISION
 #include <cxxabi.h> // demangle
 #include <dlfcn.h> // address to symbol
 #include <execinfo.h>
@@ -43,7 +43,10 @@
 #include "KramFmt.h"
 #include "KramTimer.h"
 #include "TaskSystem.h"
+
+#if !KRAM_VISION // this is breaking link on visionOS
 #include "format.h" // really fmt/format.h
+#endif
 
 namespace kram {
 
@@ -439,6 +442,8 @@ string format(const char* format, ...)
 
 //----------------------------------
 
+#if !KRAM_VISION // this is breaking link on visionOS
+
 static size_t my_formatted_size(fmt::string_view format, fmt::format_args args)
 {
     auto buf = fmt::detail::counting_buffer<>();
@@ -468,6 +473,8 @@ int32_t sprintf_impl(string& str, fmt::string_view format, fmt::format_args args
     return append_sprintf_impl(str, format, args);
 }
 
+#endif
+
 //----------------------------------
 
 bool startsWith(const char* str, const string& substring)
@@ -922,7 +929,7 @@ static int32_t logMessageImpl(const LogMessage& msg)
     __android_log_write_log_message(msg);
 #else
 
-#if KRAM_IOS || KRAM_MAC
+#if KRAM_MAC || KRAM_IOS || KRAM_VISION
     // test os_log
 
     static bool useOSLog = true;
@@ -983,7 +990,7 @@ int32_t logMessage(const char* group, int32_t logLevel,
     void* dso = nullptr;
     void* logAddress = nullptr;
 
-#if KRAM_IOS || KRAM_MAC
+#if KRAM_MAC || KRAM_IOS || KRAM_VISION
     dso = &__dso_handle; // may need to come from call site for the mach_header of .o
     logAddress = __builtin_return_address(0); // or __builtin_frame_address(0))
 #elif KRAM_WIN
@@ -1043,6 +1050,8 @@ int32_t logMessage(const char* group, int32_t logLevel,
 // to have full source to impl to fix things in fmt.
 // https://fmt.dev/latest/api.html#_CPPv4IDpEN3fmt14formatted_sizeE6size_t13format_stringIDp1TEDpRR1T
 
+#if !KRAM_VISION // exceptions causing this not to link
+
 // TODO: can this use STL_NAMESPACE::string_view instead ?
 int32_t logMessage(const char* group, int32_t logLevel,
                    const char* file, int32_t line, const char* func,
@@ -1051,7 +1060,7 @@ int32_t logMessage(const char* group, int32_t logLevel,
     // TODO: size_t size = std::formatted_size(format, args);
     // and then reserve that space in str.  Use that for impl of append_format.
     // can then append to existing string (see vsprintf)
-#if KRAM_IOS || KRAM_MAC
+#if KRAM_MAC || KRAM_IOS || KRAM_VISION
     void* dso = &__dso_handle;
     void* logAddress = __builtin_return_address(0); // or __builtin_frame_address(0))
 #else
@@ -1081,4 +1090,6 @@ int32_t logMessage(const char* group, int32_t logLevel,
     return logMessageImpl(logMessage);
 }
 
+#endif
+
 } // namespace kram
diff --git a/libkram/kram/KramMmapHelper.cpp b/libkram/kram/KramMmapHelper.cpp
index 27555db7..58059254 100644
--- a/libkram/kram/KramMmapHelper.cpp
+++ b/libkram/kram/KramMmapHelper.cpp
@@ -10,7 +10,7 @@
 #include <stdio.h>
 #include <sys/stat.h>
 
-#if KRAM_MAC || KRAM_IOS || KRAM_LINUX
+#if KRAM_MAC || KRAM_IOS || KRAM_VISION || KRAM_LINUX
 #include <sys/mman.h>
 #include <unistd.h>
 #elif KRAM_WIN
diff --git a/libkram/kram/KramTimer.cpp b/libkram/kram/KramTimer.cpp
index 079aefd7..f91103e2 100644
--- a/libkram/kram/KramTimer.cpp
+++ b/libkram/kram/KramTimer.cpp
@@ -8,7 +8,7 @@
 
 #if KRAM_WIN
 #include <windows.h>
-#elif KRAM_MAC || KRAM_IOS
+#elif KRAM_MAC || KRAM_IOS || KRAM_VISION
 #include <mach/mach_time.h>
 #elif KRAM_ANDROID
 #include <trace.h>
@@ -38,7 +38,7 @@ static uint64_t queryCounter()
     return counter.QuadPart;
 };
 
-#elif KRAM_IOS || KRAM_MAC
+#elif KRAM_MAC || KRAM_IOS || KRAM_VISION
 
 static double queryPeriod()
 {
diff --git a/libkram/kram/KramZipHelper.cpp b/libkram/kram/KramZipHelper.cpp
index b514d974..d9864764 100644
--- a/libkram/kram/KramZipHelper.cpp
+++ b/libkram/kram/KramZipHelper.cpp
@@ -11,7 +11,7 @@
 // libcompress can only encode lvl 5, but here it's only decompress.
 // This is failing on various ktx2 files in the mac archive
 #ifndef USE_LIBCOMPRESSION
-#define USE_LIBCOMPRESSION 0 // (KRAM_MAC || KRAM_IOS)
+#define USE_LIBCOMPRESSION 0 // (KRAM_MAC || KRAM_IOS || KRAM_VISION)
 #endif
 
 #if USE_LIBCOMPRESSION
diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index 7a8c68e7..3ebd7f9e 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -7,7 +7,7 @@
 #include <pthread/pthread.h>
 #include <pthread/qos.h>
 #include <sys/sysctl.h>
-#elif KRAM_IOS
+#elif KRAM_IOS || KRAM_VISION
 #include <pthread/qos.h>
 #include <sys/sysctl.h>
 #elif KRAM_WIN
@@ -86,7 +86,7 @@ static const CoreInfo& GetCoreInfo()
     coreInfo.logicalCoreCount = std::thread::hardware_concurrency();
     coreInfo.physicalCoreCount = coreInfo.logicalCoreCount;
 
-#if KRAM_IOS || KRAM_MAC
+#if KRAM_MAC || KRAM_IOS || KRAM_VISION
     // get big/little core counts
     // use sysctl -a from command line to see all
     size_t size = sizeof(coreInfo.bigCoreCount);
@@ -319,7 +319,7 @@ void getThreadName(std::thread::native_handle_type threadHandle, char name[kMaxT
 
 void setCurrentThreadName(const char* threadName)
 {
-#if KRAM_MAC || KRAM_IOS
+#if KRAM_MAC || KRAM_IOS || KRAM_VISION
     // can only set thread from thread on macOS, sucks
     int val = pthread_setname_np(threadName);
 #else
@@ -350,7 +350,7 @@ void getCurrentThreadName(char name[kMaxThreadName])
 
 //------------------
 
-#if KRAM_MAC || KRAM_IOS
+#if KRAM_MAC || KRAM_IOS || KRAM_VISION
 
 static void setThreadPriority(std::thread::native_handle_type handle, ThreadPriority priority)
 {
@@ -487,7 +487,7 @@ static void setThreadAffinity(std::thread::native_handle_type handle, uint32_t t
 
     bool success = false;
 
-#if KRAM_MAC || KRAM_IOS
+#if KRAM_MAC || KRAM_IOS || KRAM_VISION
     // no support, don't use thread_policy_set it's not on M1 and just a hint
     success = true;
 
@@ -622,7 +622,7 @@ static ThreadPriority getThreadPriority(std::thread::native_handle_type handle)
 {
     ThreadPriority priority = ThreadPriority::Default;
 
-#if KRAM_MAC || KRAM_IOS || KRAM_ANDROID
+#if KRAM_MAC || KRAM_IOS || KRAM_VISION || KRAM_ANDROID
     // Note: this doesn't handle qOS, and returns default priority
     // on those threads.
 
diff --git a/libkram/vectormath/vectormath234.cpp b/libkram/vectormath/vectormath234.cpp
index 1e4acd7d..39e3a09f 100644
--- a/libkram/vectormath/vectormath234.cpp
+++ b/libkram/vectormath/vectormath234.cpp
@@ -418,7 +418,7 @@ string vecf::simd_configs() const
 #if TARGET_OS_OSX
     FMT_CONFIG(__MAC_OS_X_VERSION_MIN_REQUIRED);
 #elif TARGET_OS_VISION
-    FMT_CONFIG(__XR_OS_VERSION_MIN_REQUIRED);
+    FMT_CONFIG(__VISION_OS_VERSION_MIN_REQUIRED);
 #else
     FMT_CONFIG(__IPHONE_OS_VERSION_MIN_REQUIRED);
 #endif

From 8781eaba9874498f112b104bb25c083206782d43 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 17 Oct 2024 08:09:43 -0700
Subject: [PATCH 805/901] kram - builds - cleanup platform tests with
 KRAM_APPLE, add vos libs, turn off sandbox on preview/thumbnails

May be able to get away with just mac/ios libs.
The appex may poison files with sandbox enabled, and no signing.  So leave it off.  Will need to set that in signed builds only.
---
 build2/kram.xcodeproj/project.pbxproj  |  2 --
 kram-preview/kram_preview.entitlements |  8 ++++----
 kram-thumb/kram_thumb.entitlements     |  8 ++++----
 libkram/kram/KTXImage.cpp              |  2 +-
 libkram/kram/Kram.cpp                  |  2 +-
 libkram/kram/KramConfig.h              |  8 ++++++--
 libkram/kram/KramFileHelper.cpp        |  4 ++--
 libkram/kram/KramFmt.h                 |  2 +-
 libkram/kram/KramLog.cpp               | 18 +++++++++---------
 libkram/kram/KramMmapHelper.cpp        |  4 ++--
 libkram/kram/KramTimer.cpp             |  4 ++--
 libkram/kram/KramZipHelper.cpp         |  2 +-
 libkram/kram/TaskSystem.cpp            | 25 +++++++++++++------------
 scripts/cibuild.sh                     | 19 ++++++++++++++++---
 14 files changed, 62 insertions(+), 46 deletions(-)

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index d86559be..2b30d689 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -659,7 +659,6 @@
 		706EEE1A26D1583F001C950E /* KramTimer.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = KramTimer.cpp; sourceTree = "<group>"; };
 		706EEE1B26D1583F001C950E /* KTXImage.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = KTXImage.cpp; sourceTree = "<group>"; };
 		706EEE1C26D1583F001C950E /* KramMipper.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = KramMipper.cpp; sourceTree = "<group>"; };
-		706EEE1D26D1583F001C950E /* _clang-format */ = {isa = PBXFileReference; lastKnownFileType = text; path = "_clang-format"; sourceTree = "<group>"; };
 		706EEE1E26D1583F001C950E /* KramZipHelper.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = KramZipHelper.cpp; sourceTree = "<group>"; };
 		706EEE1F26D1583F001C950E /* TaskSystem.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = TaskSystem.cpp; sourceTree = "<group>"; };
 		706EEE2026D1583F001C950E /* KramSDFMipper.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KramSDFMipper.h; sourceTree = "<group>"; };
@@ -1219,7 +1218,6 @@
 				70D222DC2AD2132300B9EA23 /* ImmutableString.cpp */,
 				706EEE3826D1583F001C950E /* TaskSystem.h */,
 				706EEE1F26D1583F001C950E /* TaskSystem.cpp */,
-				706EEE1D26D1583F001C950E /* _clang-format */,
 			);
 			path = kram;
 			sourceTree = "<group>";
diff --git a/kram-preview/kram_preview.entitlements b/kram-preview/kram_preview.entitlements
index f2ef3ae0..311b32bd 100644
--- a/kram-preview/kram_preview.entitlements
+++ b/kram-preview/kram_preview.entitlements
@@ -2,9 +2,9 @@
 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
 <plist version="1.0">
 <dict>
-    <key>com.apple.security.app-sandbox</key>
-    <true/>
-    <key>com.apple.security.files.user-selected.read-only</key>
-    <true/>
+	<key>com.apple.security.app-sandbox</key>
+	<false/>
+	<key>com.apple.security.files.user-selected.read-only</key>
+	<true/>
 </dict>
 </plist>
diff --git a/kram-thumb/kram_thumb.entitlements b/kram-thumb/kram_thumb.entitlements
index f2ef3ae0..311b32bd 100644
--- a/kram-thumb/kram_thumb.entitlements
+++ b/kram-thumb/kram_thumb.entitlements
@@ -2,9 +2,9 @@
 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
 <plist version="1.0">
 <dict>
-    <key>com.apple.security.app-sandbox</key>
-    <true/>
-    <key>com.apple.security.files.user-selected.read-only</key>
-    <true/>
+	<key>com.apple.security.app-sandbox</key>
+	<false/>
+	<key>com.apple.security.files.user-selected.read-only</key>
+	<true/>
 </dict>
 </plist>
diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index d263984c..c1ddb1af 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -17,7 +17,7 @@
 #include "zstd.h"
 
 #ifndef USE_LIBCOMPRESSION
-#define USE_LIBCOMPRESSION 0 // (KRAM_MAC || KRAM_IOS || KRAM_VISION)
+#define USE_LIBCOMPRESSION 0 // KRAM_APPLE
 #endif
 
 #if USE_LIBCOMPRESSION
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 56309151..9b02f33e 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -28,7 +28,7 @@
 #include "miniz.h"
 
 #ifndef USE_LIBCOMPRESSION
-#define USE_LIBCOMPRESSION 0 // (KRAM_MAC || KRAM_IOS || KRAM_VISION)
+#define USE_LIBCOMPRESSION 0 // KRAM_APPLE
 #endif
 
 #if USE_LIBCOMPRESSION
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index 1a12e397..3077183c 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -46,6 +46,10 @@
 #define KRAM_DEBUG 1
 #endif
 
+// Don't really need 3 libs.  This means can build one
+// but can't use availability or platforms specifics then.
+#define KRAM_APPLE (KRAM_MAC || KRAM_IOS || KRAM_VISION)
+
 //------------------------
 
 #if KRAM_WIN
@@ -80,7 +84,7 @@
 // SIMD_WORKSPACE is set
 
 // can't have ATE defined to 1 on other platforms
-#if !(KRAM_MAC || KRAM_IOS) // || KRAM_VISION
+#if !KRAM_APPLE
 #undef COMPILE_ATE
 #endif
 
@@ -192,7 +196,7 @@
 //-------------------------
 // simd
 
-#if KRAM_MAC || KRAM_IOS || KRAM_VISION
+#if KRAM_APPLE
 // can use old or new
 #define USE_SIMDLIB 1
 
diff --git a/libkram/kram/KramFileHelper.cpp b/libkram/kram/KramFileHelper.cpp
index 4896e477..b6d43739 100644
--- a/libkram/kram/KramFileHelper.cpp
+++ b/libkram/kram/KramFileHelper.cpp
@@ -15,7 +15,7 @@
 
 #include "tmpfileplus.h"
 
-#if KRAM_MAC || KRAM_IOS || KRAM_VISION || KRAM_LINUX
+#if KRAM_APPLE || KRAM_LINUX
 #include <unistd.h> // for getpagesize()
 #endif
 
@@ -124,7 +124,7 @@ size_t FileHelper::pagesize()
 {
     static size_t pagesize = 0;
     if (pagesize == 0) {
-#if KRAM_MAC || KRAM_IOS || KRAM_VISION || KRAM_LINUX
+#if KRAM_APPLE || KRAM_LINUX
         pagesize = getpagesize();
 #elif KRAM_WIN
         // win has mostly 4k, then 1MB/2MB large page size
diff --git a/libkram/kram/KramFmt.h b/libkram/kram/KramFmt.h
index 7ca16df7..a43f5ca6 100644
--- a/libkram/kram/KramFmt.h
+++ b/libkram/kram/KramFmt.h
@@ -2,7 +2,7 @@
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
-#if !KRAM_VISION // this is breaking link on visionOS
+#if 1 // !KRAM_VISION // this is breaking link on visionOS
 
 #pragma once
 
diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp
index bcb56c64..2f688df8 100644
--- a/libkram/kram/KramLog.cpp
+++ b/libkram/kram/KramLog.cpp
@@ -6,7 +6,7 @@
 
 //#include <string>
 
-#if KRAM_MAC || KRAM_IOS || KRAM_VISION
+#if KRAM_APPLE
 #define KRAM_LOG_STACKTRACE KRAM_DEBUG
 #elif KRAM_WIN
 // TODO: need to debug code before enabling
@@ -33,7 +33,7 @@
 #elif KRAM_ANDROID
 #include <log.h>
 
-#elif KRAM_MAC || KRAM_IOS || KRAM_VISION
+#elif KRAM_APPLE
 #include <cxxabi.h> // demangle
 #include <dlfcn.h> // address to symbol
 #include <execinfo.h>
@@ -44,9 +44,9 @@
 #include "KramTimer.h"
 #include "TaskSystem.h"
 
-#if !KRAM_VISION // this is breaking link on visionOS
+//#if !KRAM_VISION // this is breaking link on visionOS
 #include "format.h" // really fmt/format.h
-#endif
+//#endif
 
 namespace kram {
 
@@ -442,7 +442,7 @@ string format(const char* format, ...)
 
 //----------------------------------
 
-#if !KRAM_VISION // this is breaking link on visionOS
+#if 1 // !KRAM_VISION // this is breaking link on visionOS
 
 static size_t my_formatted_size(fmt::string_view format, fmt::format_args args)
 {
@@ -929,7 +929,7 @@ static int32_t logMessageImpl(const LogMessage& msg)
     __android_log_write_log_message(msg);
 #else
 
-#if KRAM_MAC || KRAM_IOS || KRAM_VISION
+#if KRAM_APPLE
     // test os_log
 
     static bool useOSLog = true;
@@ -990,7 +990,7 @@ int32_t logMessage(const char* group, int32_t logLevel,
     void* dso = nullptr;
     void* logAddress = nullptr;
 
-#if KRAM_MAC || KRAM_IOS || KRAM_VISION
+#if KRAM_APPLE
     dso = &__dso_handle; // may need to come from call site for the mach_header of .o
     logAddress = __builtin_return_address(0); // or __builtin_frame_address(0))
 #elif KRAM_WIN
@@ -1050,7 +1050,7 @@ int32_t logMessage(const char* group, int32_t logLevel,
 // to have full source to impl to fix things in fmt.
 // https://fmt.dev/latest/api.html#_CPPv4IDpEN3fmt14formatted_sizeE6size_t13format_stringIDp1TEDpRR1T
 
-#if !KRAM_VISION // exceptions causing this not to link
+#if 1 // !KRAM_VISION // exceptions causing this not to link
 
 // TODO: can this use STL_NAMESPACE::string_view instead ?
 int32_t logMessage(const char* group, int32_t logLevel,
@@ -1060,7 +1060,7 @@ int32_t logMessage(const char* group, int32_t logLevel,
     // TODO: size_t size = std::formatted_size(format, args);
     // and then reserve that space in str.  Use that for impl of append_format.
     // can then append to existing string (see vsprintf)
-#if KRAM_MAC || KRAM_IOS || KRAM_VISION
+#if KRAM_APPLE
     void* dso = &__dso_handle;
     void* logAddress = __builtin_return_address(0); // or __builtin_frame_address(0))
 #else
diff --git a/libkram/kram/KramMmapHelper.cpp b/libkram/kram/KramMmapHelper.cpp
index 58059254..b32ad5f1 100644
--- a/libkram/kram/KramMmapHelper.cpp
+++ b/libkram/kram/KramMmapHelper.cpp
@@ -10,7 +10,7 @@
 #include <stdio.h>
 #include <sys/stat.h>
 
-#if KRAM_MAC || KRAM_IOS || KRAM_VISION || KRAM_LINUX
+#if KRAM_APPLE || KRAM_LINUX
 #include <sys/mman.h>
 #include <unistd.h>
 #elif KRAM_WIN
@@ -56,7 +56,7 @@ bool MmapHelper::open(const char *filename)
     // need Windows equilvent of getpagesize() call before putting this back.  This was to use
     // with MTLBuffer no copy which has a strict page alignment requirement on start and size.
     //
-    //#if KRAM_MAC || KRAM_LINUX || KRAM_IOS
+    //#if KRAM_APPLE || KRAM_LINUX
     //    // pad it out to the page size (this can be 4k or 16k)
     //    // need this alignment, or it can't be converted to a MTLBuffer
     //    size_t pageSize = FileHelper::pagesize();
diff --git a/libkram/kram/KramTimer.cpp b/libkram/kram/KramTimer.cpp
index f91103e2..f8e5f96e 100644
--- a/libkram/kram/KramTimer.cpp
+++ b/libkram/kram/KramTimer.cpp
@@ -8,7 +8,7 @@
 
 #if KRAM_WIN
 #include <windows.h>
-#elif KRAM_MAC || KRAM_IOS || KRAM_VISION
+#elif KRAM_APPLE
 #include <mach/mach_time.h>
 #elif KRAM_ANDROID
 #include <trace.h>
@@ -38,7 +38,7 @@ static uint64_t queryCounter()
     return counter.QuadPart;
 };
 
-#elif KRAM_MAC || KRAM_IOS || KRAM_VISION
+#elif KRAM_APPLE
 
 static double queryPeriod()
 {
diff --git a/libkram/kram/KramZipHelper.cpp b/libkram/kram/KramZipHelper.cpp
index d9864764..d614f1cc 100644
--- a/libkram/kram/KramZipHelper.cpp
+++ b/libkram/kram/KramZipHelper.cpp
@@ -11,7 +11,7 @@
 // libcompress can only encode lvl 5, but here it's only decompress.
 // This is failing on various ktx2 files in the mac archive
 #ifndef USE_LIBCOMPRESSION
-#define USE_LIBCOMPRESSION 0 // (KRAM_MAC || KRAM_IOS || KRAM_VISION)
+#define USE_LIBCOMPRESSION 0 // KRAM_APPLE
 #endif
 
 #if USE_LIBCOMPRESSION
diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index 3ebd7f9e..c783dbd1 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -1,17 +1,18 @@
 #include "TaskSystem.h"
 
-#if KRAM_MAC
+// Stop using this, so can have unified kram lib
+//#if KRAM_MAC
 // affinity
-#include <mach/thread_act.h>
-#include <mach/thread_policy.h>
+//#include <mach/thread_act.h>
+//#include <mach/thread_policy.h>
+//#endif
+
+#if KRAM_APPLE
 #include <pthread/pthread.h>
 #include <pthread/qos.h>
 #include <sys/sysctl.h>
-#elif KRAM_IOS || KRAM_VISION
-#include <pthread/qos.h>
-#include <sys/sysctl.h>
 #elif KRAM_WIN
-// annoying thata windows.h has to be ordered first
+// annoying that windows.h has to be ordered first
 // clang-format off
 #include <windows.h>
 // clang-format off
@@ -86,7 +87,7 @@ static const CoreInfo& GetCoreInfo()
     coreInfo.logicalCoreCount = std::thread::hardware_concurrency();
     coreInfo.physicalCoreCount = coreInfo.logicalCoreCount;
 
-#if KRAM_MAC || KRAM_IOS || KRAM_VISION
+#if KRAM_APPLE
     // get big/little core counts
     // use sysctl -a from command line to see all
     size_t size = sizeof(coreInfo.bigCoreCount);
@@ -319,7 +320,7 @@ void getThreadName(std::thread::native_handle_type threadHandle, char name[kMaxT
 
 void setCurrentThreadName(const char* threadName)
 {
-#if KRAM_MAC || KRAM_IOS || KRAM_VISION
+#if KRAM_APPLE
     // can only set thread from thread on macOS, sucks
     int val = pthread_setname_np(threadName);
 #else
@@ -350,7 +351,7 @@ void getCurrentThreadName(char name[kMaxThreadName])
 
 //------------------
 
-#if KRAM_MAC || KRAM_IOS || KRAM_VISION
+#if KRAM_APPLE
 
 static void setThreadPriority(std::thread::native_handle_type handle, ThreadPriority priority)
 {
@@ -487,7 +488,7 @@ static void setThreadAffinity(std::thread::native_handle_type handle, uint32_t t
 
     bool success = false;
 
-#if KRAM_MAC || KRAM_IOS || KRAM_VISION
+#if KRAM_APPLE
     // no support, don't use thread_policy_set it's not on M1 and just a hint
     success = true;
 
@@ -622,7 +623,7 @@ static ThreadPriority getThreadPriority(std::thread::native_handle_type handle)
 {
     ThreadPriority priority = ThreadPriority::Default;
 
-#if KRAM_MAC || KRAM_IOS || KRAM_VISION || KRAM_ANDROID
+#if KRAM_APPLE || KRAM_ANDROID
     // Note: this doesn't handle qOS, and returns default priority
     // on those threads.
 
diff --git a/scripts/cibuild.sh b/scripts/cibuild.sh
index 0b89a4f7..e9ea14fe 100755
--- a/scripts/cibuild.sh
+++ b/scripts/cibuild.sh
@@ -75,14 +75,24 @@ if [[ $buildType == macos ]]; then
     xcodebuild build -workspace kram.xcworkspace -list
     echo "::endgroup::"
  
+    # vectormath
+    echo "::group::vectormath-vos"
+    xcodebuild build -sdk xros2.0 -workspace kram.xcworkspace -scheme vectormath-vos -configuration Release ${xargs} -destination generic/platform=visionOS CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
+    echo "::endgroup::"
+    
     echo "::group::vectormath-ios"
     xcodebuild build -sdk iphoneos -workspace kram.xcworkspace -scheme vectormath-ios -configuration Release ${xargs} -destination generic/platform=iOS CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
     echo "::endgroup::"
  
     echo "::group::vectormath"
-    xcodebuild build -sdk iphoneos -workspace kram.xcworkspace -scheme vectormath -configuration Release ${xargs} -destination generic/platform=macOS CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
+    xcodebuild build -sdk macosx -workspace kram.xcworkspace -scheme vectormath -configuration Release ${xargs} -destination generic/platform=macOS CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
     echo "::endgroup::"
  
+     # libkram
+    echo "::group::kram-vos"
+    xcodebuild build -sdk xros2.0 -workspace kram.xcworkspace -scheme kram-vos -configuration Release ${xargs} -destination generic/platform=visionOS CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
+    echo "::endgroup::"
+    
     echo "::group::kram-ios"
     xcodebuild build -sdk iphoneos -workspace kram.xcworkspace -scheme kram-ios -configuration Release ${xargs} -destination generic/platform=iOS CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
     echo "::endgroup::"
@@ -93,24 +103,27 @@ if [[ $buildType == macos ]]; then
  
 	# install apps so they are signed
 	# can't specify empty INSTALL_PATH, or xcodebuild succeeds but copies nothing to bin
+ 
+    # kramc cli
     echo "::group::kramc"
     xcodebuild install -sdk macosx -workspace kram.xcworkspace -scheme kramc -configuration Release ${xargs} -destination generic/platform=macOS DSTROOT=${binHolderPath} INSTALL_PATH=bin
     echo "::endgroup::"
       
+    # kramv viewer
     echo "::group::kramv"
 	xcodebuild install -sdk macosx -workspace kram.xcworkspace -scheme kramv -configuration Release ${xargs} -destination generic/platform=macOS DSTROOT=${binHolderPath} INSTALL_PATH=bin
     echo "::endgroup::"
     
 	popd
 
-	# build hlslparser to bin directory
+	# hlslparser
 	pushd hlslparser
     echo "::group::hlsl-parser"
     xcodebuild install -sdk macosx -project hlslparser.xcodeproj -configuration Release ${xargs} -destination generic/platform=macOS DSTROOT=${binHolderPath} INSTALL_PATH=bin
     echo "::endgroup::"
 	popd
 
-    # build kram-profile to bin directory
+    # kram-profile
     pushd kram-profile
     echo "::group::kram-profiler"
     xcodebuild install -sdk macosx -project kram-profile.xcodeproj -configuration Release ${xargs} -destination generic/platform=macOS DSTROOT=${binHolderPath} INSTALL_PATH=bin

From 8eeeb1ec9db6a3a7dd7c49684f36110af8d509d4 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 19 Oct 2024 11:13:33 -0700
Subject: [PATCH 806/901] kram - move to xcconfig

Was tired of changing settings across the projects, so it's time.   Also x64 needs settings that then cause the arm64 to log warnings.

For some reason Xcode UI keeps copying the kram.config file when set on GLTF or GLTFMTL instead of referencing the common one in build2.  So I had to symlink to those.
---
 build2/kram.xcconfig                          | 26 ++++++++++++++
 build2/kram.xcodeproj/project.pbxproj         | 36 ++++---------------
 build2/kramc.xcodeproj/project.pbxproj        |  4 +++
 build2/kramv.xcodeproj/project.pbxproj        |  4 +++
 build2/vectormath.xcodeproj/project.pbxproj   | 22 ++++++------
 gtlf/GLTF/GLTF.xcodeproj/project.pbxproj      | 15 ++++----
 gtlf/GLTF/kram.xcconfig                       |  1 +
 .../GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj | 14 +++-----
 gtlf/GLTFMTL/kram.xcconfig                    |  1 +
 9 files changed, 65 insertions(+), 58 deletions(-)
 create mode 100644 build2/kram.xcconfig
 create mode 120000 gtlf/GLTF/kram.xcconfig
 create mode 120000 gtlf/GLTFMTL/kram.xcconfig

diff --git a/build2/kram.xcconfig b/build2/kram.xcconfig
new file mode 100644
index 00000000..d3d49463
--- /dev/null
+++ b/build2/kram.xcconfig
@@ -0,0 +1,26 @@
+// kram - Copyright 2020-2024 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
+
+// Xcode's AVX2 simd setting doesn't set -mfma or -m16c.  So
+// then universal builds throw hundreds of warnings.  Ugh.
+// Xcode doesn't set NDEBUG=1 in Release builds.  Ugh.
+// Also turn on -ftime-trace to review build times in kram-profile.
+
+KRAM_FLAGS_X64 =
+KRAM_FLAGS_X64[arch=x86_64] = -mf16c -mfma
+
+KRAM_FLAGS_REL =
+KRAM_FLAGS_REL[config=Release] = -NDEBUG=1
+
+KRAM_FLAGS = -ftime-trace -DUSE_SIMDLIB=1
+
+// This is killing build times in Xcode16
+ENABLE_MODULE_VERIFIER = NO
+
+// EASTL, etc.
+
+// configuring all the encoders in kram
+// -DCOMPILE_ASTCENC=1 -DCOMPILE_ATE=1 -DCOMPILE_ETCENC=1 -DCOMPILE_SQUISH=1 -DCOMPILE_BCENC=1 -DCOMPILE_COMP=1 -DCOMPILE_BASIS=0 -DCOMPILE_EASTL=0 $(KRAM_FLAGS)
+
+OTHER_FLAGS = $(inherited) $(KRAM_FLAGS) $KRAM_FLAGS_REL) $(KRAM_FLAGS_X64)
diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index 2b30d689..f27434e0 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -866,6 +866,7 @@
 		707789D42881BA81008A51BC /* LICENSE */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = LICENSE; sourceTree = "<group>"; };
 		707789EF2881BCE2008A51BC /* rdo_bc_encoder.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = rdo_bc_encoder.cpp; sourceTree = "<group>"; };
 		707789F02881BCE2008A51BC /* rdo_bc_encoder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = rdo_bc_encoder.h; sourceTree = "<group>"; };
+		707D4C4B2CC41A0300729BE0 /* kram.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = kram.xcconfig; sourceTree = "<group>"; };
 		70871DA727DDDBCC00D0B9E1 /* astcenc_vecmathlib_common_4.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = astcenc_vecmathlib_common_4.h; sourceTree = "<group>"; };
 		70871DA827DDDBCC00D0B9E1 /* astcenc_image.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_image.cpp; sourceTree = "<group>"; };
 		70871DA927DDDBCC00D0B9E1 /* astcenc_find_best_partitioning.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_find_best_partitioning.cpp; sourceTree = "<group>"; };
@@ -1181,6 +1182,7 @@
 		706EEE1826D1583F001C950E /* kram */ = {
 			isa = PBXGroup;
 			children = (
+				707D4C4B2CC41A0300729BE0 /* kram.xcconfig */,
 				70CDB64E27A1382600A546C1 /* KramDDSHelper.h */,
 				70CDB64F27A1382600A546C1 /* KramDDSHelper.cpp */,
 				70B563A62C857B360089A64F /* KramZipStream.h */,
@@ -2282,6 +2284,7 @@
 /* Begin XCBuildConfiguration section */
 		706ECDE726D1577A001C950E /* Debug */ = {
 			isa = XCBuildConfiguration;
+			baseConfigurationReference = 707D4C4B2CC41A0300729BE0 /* kram.xcconfig */;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
 				CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES;
@@ -2371,7 +2374,7 @@
 					"-DCOMPILE_COMP=1",
 					"-DCOMPILE_BASIS=0",
 					"-DCOMPILE_EASTL=0",
-					"-ftime-trace",
+					"$(KRAM_FLAGS)",
 				);
 				PRESERVE_DEAD_CODE_INITS_AND_TERMS = NO;
 				SDKROOT = macosx;
@@ -2382,6 +2385,7 @@
 		};
 		706ECDE826D1577A001C950E /* Release */ = {
 			isa = XCBuildConfiguration;
+			baseConfigurationReference = 707D4C4B2CC41A0300729BE0 /* kram.xcconfig */;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
 				CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES;
@@ -2456,7 +2460,6 @@
 				MTL_ENABLE_DEBUG_INFO = NO;
 				MTL_FAST_MATH = YES;
 				OTHER_CFLAGS = (
-					"-DNDEBUG=1",
 					"-DCOMPILE_ASTCENC=1",
 					"-DCOMPILE_ATE=1",
 					"-DCOMPILE_ETCENC=1",
@@ -2465,7 +2468,7 @@
 					"-DCOMPILE_COMP=1",
 					"-DCOMPILE_BASIS=0",
 					"-DCOMPILE_EASTL=0",
-					"-ftime-trace",
+					"$(KRAM_FLAGS)",
 				);
 				PRESERVE_DEAD_CODE_INITS_AND_TERMS = NO;
 				SDKROOT = macosx;
@@ -2481,19 +2484,6 @@
 				CLANG_WARN_OBJC_REPEATED_USE_OF_WEAK = YES;
 				CODE_SIGN_STYLE = Automatic;
 				EXECUTABLE_PREFIX = lib;
-				OTHER_CFLAGS = (
-					"-DCOMPILE_ASTCENC=1",
-					"-DCOMPILE_ATE=1",
-					"-DCOMPILE_ETCENC=1",
-					"-DCOMPILE_SQUISH=1",
-					"-DCOMPILE_BCENC=1",
-					"-DCOMPILE_COMP=1",
-					"-DCOMPILE_BASIS=0",
-					"-DCOMPILE_EASTL=0",
-					"-ftime-trace",
-					"-mfma",
-					"-mf16c",
-				);
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SKIP_INSTALL = YES;
 				SYSTEM_HEADER_SEARCH_PATHS = "";
@@ -2507,20 +2497,6 @@
 				CLANG_WARN_OBJC_REPEATED_USE_OF_WEAK = YES;
 				CODE_SIGN_STYLE = Automatic;
 				EXECUTABLE_PREFIX = lib;
-				OTHER_CFLAGS = (
-					"-DNDEBUG=1",
-					"-DCOMPILE_ASTCENC=1",
-					"-DCOMPILE_ATE=1",
-					"-DCOMPILE_ETCENC=1",
-					"-DCOMPILE_SQUISH=1",
-					"-DCOMPILE_BCENC=1",
-					"-DCOMPILE_COMP=1",
-					"-DCOMPILE_BASIS=0",
-					"-DCOMPILE_EASTL=0",
-					"-ftime-trace",
-					"-mfma",
-					"-mf16c",
-				);
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SKIP_INSTALL = YES;
 				SYSTEM_HEADER_SEARCH_PATHS = "";
diff --git a/build2/kramc.xcodeproj/project.pbxproj b/build2/kramc.xcodeproj/project.pbxproj
index ee960d4e..538d37e9 100644
--- a/build2/kramc.xcodeproj/project.pbxproj
+++ b/build2/kramc.xcodeproj/project.pbxproj
@@ -34,6 +34,7 @@
 		706EF28226D18251001C950E /* libkram.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; path = libkram.a; sourceTree = BUILT_PRODUCTS_DIR; };
 		706EF28426D18257001C950E /* libate.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = libate.tbd; path = usr/lib/libate.tbd; sourceTree = SDKROOT; };
 		706EF28A26D182CB001C950E /* Foundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Foundation.framework; path = System/Library/Frameworks/Foundation.framework; sourceTree = SDKROOT; };
+		707D4C542CC420FE00729BE0 /* kram.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = kram.xcconfig; sourceTree = "<group>"; };
 		70B687272CAD1996007ACA58 /* libvectormath.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; path = libvectormath.a; sourceTree = BUILT_PRODUCTS_DIR; };
 /* End PBXFileReference section */
 
@@ -59,6 +60,7 @@
 				706EF27E26D18223001C950E /* kramc */,
 				706EF27326D18082001C950E /* Products */,
 				706EF28126D18251001C950E /* Frameworks */,
+				707D4C542CC420FE00729BE0 /* kram.xcconfig */,
 			);
 			sourceTree = "<group>";
 		};
@@ -157,6 +159,7 @@
 /* Begin XCBuildConfiguration section */
 		706EF27726D18082001C950E /* Debug */ = {
 			isa = XCBuildConfiguration;
+			baseConfigurationReference = 707D4C542CC420FE00729BE0 /* kram.xcconfig */;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
 				CLANG_ANALYZER_NONNULL = YES;
@@ -226,6 +229,7 @@
 		};
 		706EF27826D18082001C950E /* Release */ = {
 			isa = XCBuildConfiguration;
+			baseConfigurationReference = 707D4C542CC420FE00729BE0 /* kram.xcconfig */;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
 				CLANG_ANALYZER_NONNULL = YES;
diff --git a/build2/kramv.xcodeproj/project.pbxproj b/build2/kramv.xcodeproj/project.pbxproj
index 93e52e6e..78559e74 100644
--- a/build2/kramv.xcodeproj/project.pbxproj
+++ b/build2/kramv.xcodeproj/project.pbxproj
@@ -126,6 +126,7 @@
 		706EF25426D17C85001C950E /* Metal.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Metal.framework; path = System/Library/Frameworks/Metal.framework; sourceTree = SDKROOT; };
 		706EF25626D17C9D001C950E /* AppKit.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = AppKit.framework; path = System/Library/Frameworks/AppKit.framework; sourceTree = SDKROOT; };
 		706EF25926D17CAA001C950E /* libate.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = libate.tbd; path = usr/lib/libate.tbd; sourceTree = SDKROOT; };
+		707D4C532CC420E700729BE0 /* kram.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = kram.xcconfig; sourceTree = "<group>"; };
 		7083365D2715642C0077BCB6 /* GLTF.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = GLTF.framework; sourceTree = BUILT_PRODUCTS_DIR; };
 		70833661271564320077BCB6 /* GLTFMTL.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = GLTFMTL.framework; sourceTree = BUILT_PRODUCTS_DIR; };
 		70833664271575E50077BCB6 /* GLTF.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = GLTF.framework; sourceTree = BUILT_PRODUCTS_DIR; };
@@ -223,6 +224,7 @@
 				70E33EE426E5478900CBA422 /* kram-preview */,
 				706EF21026D17A26001C950E /* Products */,
 				706EF24726D17BC2001C950E /* Frameworks */,
+				707D4C532CC420E700729BE0 /* kram.xcconfig */,
 			);
 			sourceTree = "<group>";
 		};
@@ -531,6 +533,7 @@
 /* Begin XCBuildConfiguration section */
 		706EF22126D17A2E001C950E /* Debug */ = {
 			isa = XCBuildConfiguration;
+			baseConfigurationReference = 707D4C532CC420E700729BE0 /* kram.xcconfig */;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
 				ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
@@ -610,6 +613,7 @@
 		};
 		706EF22226D17A2E001C950E /* Release */ = {
 			isa = XCBuildConfiguration;
+			baseConfigurationReference = 707D4C532CC420E700729BE0 /* kram.xcconfig */;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
 				ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
diff --git a/build2/vectormath.xcodeproj/project.pbxproj b/build2/vectormath.xcodeproj/project.pbxproj
index 71521100..b6f571f4 100644
--- a/build2/vectormath.xcodeproj/project.pbxproj
+++ b/build2/vectormath.xcodeproj/project.pbxproj
@@ -62,6 +62,7 @@
 		70570FE42CB378E7005692BB /* bounds234.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = bounds234.h; sourceTree = "<group>"; };
 		70570FE72CB37997005692BB /* bounds234.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = bounds234.cpp; sourceTree = "<group>"; };
 		70570FEF2CB8C5C6005692BB /* module.modulemap */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.module-map"; path = module.modulemap; sourceTree = "<group>"; };
+		707D4C522CC41F3900729BE0 /* kram.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = kram.xcconfig; sourceTree = "<group>"; };
 		70B686F42CAD1026007ACA58 /* libvectormath.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libvectormath.a; sourceTree = BUILT_PRODUCTS_DIR; };
 		70B686FB2CAD1072007ACA58 /* double234.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = double234.h; sourceTree = "<group>"; };
 		70B686FC2CAD1072007ACA58 /* double234.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = double234.cpp; sourceTree = "<group>"; };
@@ -111,6 +112,7 @@
 			children = (
 				70B6870A2CAD1072007ACA58 /* vectormath */,
 				70B686F52CAD1026007ACA58 /* Products */,
+				707D4C522CC41F3900729BE0 /* kram.xcconfig */,
 			);
 			sourceTree = "<group>";
 		};
@@ -344,6 +346,7 @@
 /* Begin XCBuildConfiguration section */
 		701AF18F2CAD27CB00BD0886 /* Debug */ = {
 			isa = XCBuildConfiguration;
+			baseConfigurationReference = 707D4C522CC41F3900729BE0 /* kram.xcconfig */;
 			buildSettings = {
 				CODE_SIGN_STYLE = Automatic;
 				EXECUTABLE_PREFIX = lib;
@@ -355,6 +358,7 @@
 		};
 		701AF1902CAD27CB00BD0886 /* Release */ = {
 			isa = XCBuildConfiguration;
+			baseConfigurationReference = 707D4C522CC41F3900729BE0 /* kram.xcconfig */;
 			buildSettings = {
 				CODE_SIGN_STYLE = Automatic;
 				EXECUTABLE_PREFIX = lib;
@@ -366,6 +370,7 @@
 		};
 		70B686F62CAD1026007ACA58 /* Debug */ = {
 			isa = XCBuildConfiguration;
+			baseConfigurationReference = 707D4C522CC41F3900729BE0 /* kram.xcconfig */;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
 				ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
@@ -427,11 +432,7 @@
 				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
 				MTL_FAST_MATH = YES;
 				ONLY_ACTIVE_ARCH = YES;
-				OTHER_CFLAGS = (
-					"-DUSE_SIMDLIB=1",
-					"-mfma",
-					"-mf16c",
-				);
+				OTHER_CFLAGS = "$(KRAM_FLAGS)";
 				SDKROOT = macosx;
 				XROS_DEPLOYMENT_TARGET = 2.0;
 			};
@@ -439,6 +440,7 @@
 		};
 		70B686F72CAD1026007ACA58 /* Release */ = {
 			isa = XCBuildConfiguration;
+			baseConfigurationReference = 707D4C522CC41F3900729BE0 /* kram.xcconfig */;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
 				ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
@@ -493,11 +495,7 @@
 				MODULE_VERIFIER_SUPPORTED_LANGUAGES = "objective-c c++";
 				MTL_ENABLE_DEBUG_INFO = NO;
 				MTL_FAST_MATH = YES;
-				OTHER_CFLAGS = (
-					"-DUSE_SIMDLIB=1",
-					"-mfma",
-					"-mf16c",
-				);
+				OTHER_CFLAGS = "$(KRAM_FLAGS)";
 				SDKROOT = macosx;
 				XROS_DEPLOYMENT_TARGET = 2.0;
 			};
@@ -505,6 +503,7 @@
 		};
 		70B686F92CAD1026007ACA58 /* Debug */ = {
 			isa = XCBuildConfiguration;
+			baseConfigurationReference = 707D4C522CC41F3900729BE0 /* kram.xcconfig */;
 			buildSettings = {
 				CODE_SIGN_STYLE = Automatic;
 				EXECUTABLE_PREFIX = lib;
@@ -515,6 +514,7 @@
 		};
 		70B686FA2CAD1026007ACA58 /* Release */ = {
 			isa = XCBuildConfiguration;
+			baseConfigurationReference = 707D4C522CC41F3900729BE0 /* kram.xcconfig */;
 			buildSettings = {
 				CODE_SIGN_STYLE = Automatic;
 				EXECUTABLE_PREFIX = lib;
@@ -525,6 +525,7 @@
 		};
 		70E3BADC2CBCE81700F11926 /* Debug */ = {
 			isa = XCBuildConfiguration;
+			baseConfigurationReference = 707D4C522CC41F3900729BE0 /* kram.xcconfig */;
 			buildSettings = {
 				CODE_SIGN_STYLE = Automatic;
 				EXECUTABLE_PREFIX = lib;
@@ -536,6 +537,7 @@
 		};
 		70E3BADD2CBCE81700F11926 /* Release */ = {
 			isa = XCBuildConfiguration;
+			baseConfigurationReference = 707D4C522CC41F3900729BE0 /* kram.xcconfig */;
 			buildSettings = {
 				CODE_SIGN_STYLE = Automatic;
 				EXECUTABLE_PREFIX = lib;
diff --git a/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj b/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj
index 0449c6a3..46743984 100644
--- a/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj
+++ b/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj
@@ -56,6 +56,7 @@
 /* End PBXBuildFile section */
 
 /* Begin PBXFileReference section */
+		707D4C5C2CC42C2700729BE0 /* kram.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = kram.xcconfig; sourceTree = "<group>"; };
 		83319297202589FC00B6C7E9 /* GLTFBinaryChunk.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = GLTFBinaryChunk.h; sourceTree = "<group>"; };
 		8331929B20258A4000B6C7E9 /* GLTFBinaryChunk.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = GLTFBinaryChunk.m; sourceTree = "<group>"; };
 		8331929E2025911D00B6C7E9 /* GLTFExtensionNames.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = GLTFExtensionNames.m; sourceTree = "<group>"; };
@@ -144,6 +145,7 @@
 				83D6FF7E1F48BBFA00F71E0C /* Info.plist */,
 				83D6FF491F48BB3A00F71E0C /* Products */,
 				83D600391F48C2FF00F71E0C /* Frameworks */,
+				707D4C5C2CC42C2700729BE0 /* kram.xcconfig */,
 			);
 			sourceTree = "<group>";
 		};
@@ -353,6 +355,7 @@
 /* Begin XCBuildConfiguration section */
 		83D6FF4E1F48BB3A00F71E0C /* Debug */ = {
 			isa = XCBuildConfiguration;
+			baseConfigurationReference = 707D4C5C2CC42C2700729BE0 /* kram.xcconfig */;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
 				CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES;
@@ -408,10 +411,7 @@
 				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				MTL_ENABLE_DEBUG_INFO = YES;
 				ONLY_ACTIVE_ARCH = YES;
-				OTHER_CFLAGS = (
-					"-ftime-trace",
-					"-mfma",
-				);
+				OTHER_CFLAGS = "$(KRAM_FLAGS)";
 				SDKROOT = macosx;
 				VERSIONING_SYSTEM = "apple-generic";
 				VERSION_INFO_PREFIX = "";
@@ -472,11 +472,7 @@
 				IPHONEOS_DEPLOYMENT_TARGET = 15.0;
 				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				MTL_ENABLE_DEBUG_INFO = NO;
-				OTHER_CFLAGS = (
-					"-ftime-trace",
-					"-DNDEBUG=1",
-					"-mfma",
-				);
+				OTHER_CFLAGS = "$(KRAM_FLAGS)";
 				SDKROOT = macosx;
 				VERSIONING_SYSTEM = "apple-generic";
 				VERSION_INFO_PREFIX = "";
@@ -515,6 +511,7 @@
 		};
 		83D6FF521F48BB3A00F71E0C /* Release */ = {
 			isa = XCBuildConfiguration;
+			baseConfigurationReference = 707D4C5C2CC42C2700729BE0 /* kram.xcconfig */;
 			buildSettings = {
 				CLANG_ENABLE_OBJC_WEAK = YES;
 				CODE_SIGN_IDENTITY = "";
diff --git a/gtlf/GLTF/kram.xcconfig b/gtlf/GLTF/kram.xcconfig
new file mode 120000
index 00000000..66b528eb
--- /dev/null
+++ b/gtlf/GLTF/kram.xcconfig
@@ -0,0 +1 @@
+../../build2/kram.xcconfig
\ No newline at end of file
diff --git a/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj b/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj
index fc573af6..fce39cae 100644
--- a/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj
+++ b/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj
@@ -25,6 +25,7 @@
 /* End PBXBuildFile section */
 
 /* Begin PBXFileReference section */
+		707D4C5B2CC42C1100729BE0 /* kram.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = kram.xcconfig; sourceTree = "<group>"; };
 		839945C91F641E9000642E68 /* GLTFMTLLightingEnvironment.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = GLTFMTLLightingEnvironment.h; sourceTree = "<group>"; };
 		839945CA1F641E9000642E68 /* GLTFMTLLightingEnvironment.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = GLTFMTLLightingEnvironment.m; sourceTree = "<group>"; };
 		83AF30CA1FC4DB4D00053BED /* GLTFMTLTextureLoader.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = GLTFMTLTextureLoader.h; sourceTree = "<group>"; };
@@ -75,6 +76,7 @@
 				83D6FFD91F48BDFB00F71E0C /* Info.plist */,
 				83D6FFB21F48BCB500F71E0C /* Products */,
 				83D600341F48C24F00F71E0C /* Frameworks */,
+				707D4C5B2CC42C1100729BE0 /* kram.xcconfig */,
 			);
 			sourceTree = "<group>";
 		};
@@ -213,6 +215,7 @@
 /* Begin XCBuildConfiguration section */
 		83D6FFB71F48BCB500F71E0C /* Debug */ = {
 			isa = XCBuildConfiguration;
+			baseConfigurationReference = 707D4C5B2CC42C1100729BE0 /* kram.xcconfig */;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
 				CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES;
@@ -268,10 +271,7 @@
 				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				MTL_ENABLE_DEBUG_INFO = YES;
 				ONLY_ACTIVE_ARCH = YES;
-				OTHER_CFLAGS = (
-					"-ftime-trace",
-					"-mfma",
-				);
+				OTHER_CFLAGS = "$(KRAM_FLAGS)";
 				SDKROOT = macosx;
 				SUPPORTED_PLATFORMS = "macosx iphoneos";
 				VALID_ARCHS = "i386 x86_64 armv7s armv7 arm64";
@@ -334,11 +334,7 @@
 				IPHONEOS_DEPLOYMENT_TARGET = 15.0;
 				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				MTL_ENABLE_DEBUG_INFO = NO;
-				OTHER_CFLAGS = (
-					"-DNDEBUG=1",
-					"-ftime-trace",
-					"-mfma",
-				);
+				OTHER_CFLAGS = "$(KRAM_FLAGS)";
 				SDKROOT = macosx;
 				SUPPORTED_PLATFORMS = "macosx iphoneos";
 				VALID_ARCHS = "i386 x86_64 armv7s armv7 arm64";
diff --git a/gtlf/GLTFMTL/kram.xcconfig b/gtlf/GLTFMTL/kram.xcconfig
new file mode 120000
index 00000000..66b528eb
--- /dev/null
+++ b/gtlf/GLTFMTL/kram.xcconfig
@@ -0,0 +1 @@
+../../build2/kram.xcconfig
\ No newline at end of file

From 6c3bea5eda3e2b1fb383afa19baeadf42d8f35d2 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 19 Oct 2024 11:49:32 -0700
Subject: [PATCH 807/901] kram - fix project

---
 build2/kram.xcodeproj/project.pbxproj | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index f27434e0..edaaa9cc 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -866,7 +866,7 @@
 		707789D42881BA81008A51BC /* LICENSE */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = LICENSE; sourceTree = "<group>"; };
 		707789EF2881BCE2008A51BC /* rdo_bc_encoder.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = rdo_bc_encoder.cpp; sourceTree = "<group>"; };
 		707789F02881BCE2008A51BC /* rdo_bc_encoder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = rdo_bc_encoder.h; sourceTree = "<group>"; };
-		707D4C4B2CC41A0300729BE0 /* kram.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = kram.xcconfig; sourceTree = "<group>"; };
+		707D4C732CC436A000729BE0 /* kram.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = kram.xcconfig; sourceTree = "<group>"; };
 		70871DA727DDDBCC00D0B9E1 /* astcenc_vecmathlib_common_4.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = astcenc_vecmathlib_common_4.h; sourceTree = "<group>"; };
 		70871DA827DDDBCC00D0B9E1 /* astcenc_image.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_image.cpp; sourceTree = "<group>"; };
 		70871DA927DDDBCC00D0B9E1 /* astcenc_find_best_partitioning.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_find_best_partitioning.cpp; sourceTree = "<group>"; };
@@ -976,6 +976,7 @@
 				706EEDA826D1583E001C950E /* libkram */,
 				706ECDDF26D1577A001C950E /* Products */,
 				706EF12926D159F9001C950E /* Frameworks */,
+				707D4C732CC436A000729BE0 /* kram.xcconfig */,
 			);
 			sourceTree = "<group>";
 		};
@@ -1182,7 +1183,6 @@
 		706EEE1826D1583F001C950E /* kram */ = {
 			isa = PBXGroup;
 			children = (
-				707D4C4B2CC41A0300729BE0 /* kram.xcconfig */,
 				70CDB64E27A1382600A546C1 /* KramDDSHelper.h */,
 				70CDB64F27A1382600A546C1 /* KramDDSHelper.cpp */,
 				70B563A62C857B360089A64F /* KramZipStream.h */,
@@ -2284,7 +2284,7 @@
 /* Begin XCBuildConfiguration section */
 		706ECDE726D1577A001C950E /* Debug */ = {
 			isa = XCBuildConfiguration;
-			baseConfigurationReference = 707D4C4B2CC41A0300729BE0 /* kram.xcconfig */;
+			baseConfigurationReference = 707D4C732CC436A000729BE0 /* kram.xcconfig */;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
 				CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES;
@@ -2385,7 +2385,7 @@
 		};
 		706ECDE826D1577A001C950E /* Release */ = {
 			isa = XCBuildConfiguration;
-			baseConfigurationReference = 707D4C4B2CC41A0300729BE0 /* kram.xcconfig */;
+			baseConfigurationReference = 707D4C732CC436A000729BE0 /* kram.xcconfig */;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
 				CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES;

From e1a1a6bc093548e951f7d3652d875d363f12929e Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 19 Oct 2024 12:41:25 -0700
Subject: [PATCH 808/901] kram - fix config

So many prop overrides that kill xcconfig, even at target level with empty string.  Hard to identify and fix.
---
 build2/kram.xcconfig                        | 19 ++++++++++-------
 build2/kram.xcodeproj/project.pbxproj       |  4 ++--
 build2/kramc.xcodeproj/project.pbxproj      |  9 ++------
 build2/kramv.xcodeproj/project.pbxproj      |  7 ++-----
 build2/vectormath.xcodeproj/project.pbxproj |  8 -------
 libkram/kram/KramConfig.h                   | 23 +++++++++++----------
 6 files changed, 29 insertions(+), 41 deletions(-)

diff --git a/build2/kram.xcconfig b/build2/kram.xcconfig
index d3d49463..a0e7f604 100644
--- a/build2/kram.xcconfig
+++ b/build2/kram.xcconfig
@@ -10,17 +10,20 @@
 KRAM_FLAGS_X64 =
 KRAM_FLAGS_X64[arch=x86_64] = -mf16c -mfma
 
-KRAM_FLAGS_REL =
-KRAM_FLAGS_REL[config=Release] = -NDEBUG=1
+KRAM_FLAGS_RELEASE =
+KRAM_FLAGS_RELEASE[config=Release] = -DNDEBUG=1
 
-KRAM_FLAGS = -ftime-trace -DUSE_SIMDLIB=1
+KRAM_FLAGS = -ftime-trace
 
-// This is killing build times in Xcode16
-ENABLE_MODULE_VERIFIER = NO
+KRAM_FLAGS = $(KRAM_FLAGS) -DUSE_SIMDLIB=1 -DUSE_SIMDLIBMODULE=0
 
-// EASTL, etc.
+// KRAM_FLAGS += -DCOMPILE_EASTL=1
+// TODO: also set include path for eastl
 
 // configuring all the encoders in kram
-// -DCOMPILE_ASTCENC=1 -DCOMPILE_ATE=1 -DCOMPILE_ETCENC=1 -DCOMPILE_SQUISH=1 -DCOMPILE_BCENC=1 -DCOMPILE_COMP=1 -DCOMPILE_BASIS=0 -DCOMPILE_EASTL=0 $(KRAM_FLAGS)
+// KRAM_FLAGS += -DCOMPILE_ASTCENC=1 -DCOMPILE_ATE=1 -DCOMPILE_ETCENC=1 -DCOMPILE_SQUISH=1 -DCOMPILE_BCENC=1 -DCOMPILE_COMP=1 -DCOMPILE_BASIS=0 -DCOMPILE_EASTL=0
+
+// This is killing build times in Xcode16
+ENABLE_MODULE_VERIFIER = NO
 
-OTHER_FLAGS = $(inherited) $(KRAM_FLAGS) $KRAM_FLAGS_REL) $(KRAM_FLAGS_X64)
+OTHER_CFLAGS = $(inherited) $(KRAM_FLAGS) $(KRAM_FLAGS_RELEASE) $(KRAM_FLAGS_X64)
diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index edaaa9cc..edc665fa 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -2366,6 +2366,7 @@
 				MTL_FAST_MATH = YES;
 				ONLY_ACTIVE_ARCH = YES;
 				OTHER_CFLAGS = (
+					"$(inherited)",
 					"-DCOMPILE_ASTCENC=1",
 					"-DCOMPILE_ATE=1",
 					"-DCOMPILE_ETCENC=1",
@@ -2374,7 +2375,6 @@
 					"-DCOMPILE_COMP=1",
 					"-DCOMPILE_BASIS=0",
 					"-DCOMPILE_EASTL=0",
-					"$(KRAM_FLAGS)",
 				);
 				PRESERVE_DEAD_CODE_INITS_AND_TERMS = NO;
 				SDKROOT = macosx;
@@ -2460,6 +2460,7 @@
 				MTL_ENABLE_DEBUG_INFO = NO;
 				MTL_FAST_MATH = YES;
 				OTHER_CFLAGS = (
+					"$(inherited)",
 					"-DCOMPILE_ASTCENC=1",
 					"-DCOMPILE_ATE=1",
 					"-DCOMPILE_ETCENC=1",
@@ -2468,7 +2469,6 @@
 					"-DCOMPILE_COMP=1",
 					"-DCOMPILE_BASIS=0",
 					"-DCOMPILE_EASTL=0",
-					"$(KRAM_FLAGS)",
 				);
 				PRESERVE_DEAD_CODE_INITS_AND_TERMS = NO;
 				SDKROOT = macosx;
diff --git a/build2/kramc.xcodeproj/project.pbxproj b/build2/kramc.xcodeproj/project.pbxproj
index 538d37e9..643c6fc1 100644
--- a/build2/kramc.xcodeproj/project.pbxproj
+++ b/build2/kramc.xcodeproj/project.pbxproj
@@ -216,11 +216,9 @@
 				MTL_FAST_MATH = YES;
 				ONLY_ACTIVE_ARCH = YES;
 				OTHER_CFLAGS = (
-					"-DCOMPILE_EASTL=0",
+					"$(inherited)",
 					"-include",
 					KramConfig.h,
-					"-mfma",
-					"-mf16c",
 				);
 				PRESERVE_DEAD_CODE_INITS_AND_TERMS = NO;
 				SDKROOT = macosx;
@@ -283,12 +281,9 @@
 				MTL_ENABLE_DEBUG_INFO = NO;
 				MTL_FAST_MATH = YES;
 				OTHER_CFLAGS = (
-					"-DNDEBUG=1",
-					"-DCOMPILE_EASTL=0",
+					"$(inherited)",
 					"-include",
 					KramConfig.h,
-					"-mfma",
-					"-mf16c",
 				);
 				PRESERVE_DEAD_CODE_INITS_AND_TERMS = NO;
 				SDKROOT = macosx;
diff --git a/build2/kramv.xcodeproj/project.pbxproj b/build2/kramv.xcodeproj/project.pbxproj
index 78559e74..1cd0d490 100644
--- a/build2/kramv.xcodeproj/project.pbxproj
+++ b/build2/kramv.xcodeproj/project.pbxproj
@@ -600,10 +600,9 @@
 				MTL_LANGUAGE_REVISION = UseDeploymentTarget;
 				ONLY_ACTIVE_ARCH = YES;
 				OTHER_CFLAGS = (
-					"-DCOMPILE_EASTL=0",
+					"$(inherited)",
 					"-include",
 					KramConfig.h,
-					"-mfma",
 				);
 				OTHER_CPLUSPLUSFLAGS = "$(OTHER_CFLAGS)";
 				PRESERVE_DEAD_CODE_INITS_AND_TERMS = NO;
@@ -677,11 +676,9 @@
 				MTL_FAST_MATH = YES;
 				MTL_LANGUAGE_REVISION = UseDeploymentTarget;
 				OTHER_CFLAGS = (
-					"-DNDEBUG=1",
-					"-DCOMPILE_EASTL=0",
+					"$(inherited)",
 					"-include",
 					KramConfig.h,
-					"-mfma",
 				);
 				OTHER_CPLUSPLUSFLAGS = "$(OTHER_CFLAGS)";
 				PRESERVE_DEAD_CODE_INITS_AND_TERMS = NO;
diff --git a/build2/vectormath.xcodeproj/project.pbxproj b/build2/vectormath.xcodeproj/project.pbxproj
index b6f571f4..58e1d840 100644
--- a/build2/vectormath.xcodeproj/project.pbxproj
+++ b/build2/vectormath.xcodeproj/project.pbxproj
@@ -346,7 +346,6 @@
 /* Begin XCBuildConfiguration section */
 		701AF18F2CAD27CB00BD0886 /* Debug */ = {
 			isa = XCBuildConfiguration;
-			baseConfigurationReference = 707D4C522CC41F3900729BE0 /* kram.xcconfig */;
 			buildSettings = {
 				CODE_SIGN_STYLE = Automatic;
 				EXECUTABLE_PREFIX = lib;
@@ -358,7 +357,6 @@
 		};
 		701AF1902CAD27CB00BD0886 /* Release */ = {
 			isa = XCBuildConfiguration;
-			baseConfigurationReference = 707D4C522CC41F3900729BE0 /* kram.xcconfig */;
 			buildSettings = {
 				CODE_SIGN_STYLE = Automatic;
 				EXECUTABLE_PREFIX = lib;
@@ -432,7 +430,6 @@
 				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
 				MTL_FAST_MATH = YES;
 				ONLY_ACTIVE_ARCH = YES;
-				OTHER_CFLAGS = "$(KRAM_FLAGS)";
 				SDKROOT = macosx;
 				XROS_DEPLOYMENT_TARGET = 2.0;
 			};
@@ -495,7 +492,6 @@
 				MODULE_VERIFIER_SUPPORTED_LANGUAGES = "objective-c c++";
 				MTL_ENABLE_DEBUG_INFO = NO;
 				MTL_FAST_MATH = YES;
-				OTHER_CFLAGS = "$(KRAM_FLAGS)";
 				SDKROOT = macosx;
 				XROS_DEPLOYMENT_TARGET = 2.0;
 			};
@@ -503,7 +499,6 @@
 		};
 		70B686F92CAD1026007ACA58 /* Debug */ = {
 			isa = XCBuildConfiguration;
-			baseConfigurationReference = 707D4C522CC41F3900729BE0 /* kram.xcconfig */;
 			buildSettings = {
 				CODE_SIGN_STYLE = Automatic;
 				EXECUTABLE_PREFIX = lib;
@@ -514,7 +509,6 @@
 		};
 		70B686FA2CAD1026007ACA58 /* Release */ = {
 			isa = XCBuildConfiguration;
-			baseConfigurationReference = 707D4C522CC41F3900729BE0 /* kram.xcconfig */;
 			buildSettings = {
 				CODE_SIGN_STYLE = Automatic;
 				EXECUTABLE_PREFIX = lib;
@@ -525,7 +519,6 @@
 		};
 		70E3BADC2CBCE81700F11926 /* Debug */ = {
 			isa = XCBuildConfiguration;
-			baseConfigurationReference = 707D4C522CC41F3900729BE0 /* kram.xcconfig */;
 			buildSettings = {
 				CODE_SIGN_STYLE = Automatic;
 				EXECUTABLE_PREFIX = lib;
@@ -537,7 +530,6 @@
 		};
 		70E3BADD2CBCE81700F11926 /* Release */ = {
 			isa = XCBuildConfiguration;
-			baseConfigurationReference = 707D4C522CC41F3900729BE0 /* kram.xcconfig */;
 			buildSettings = {
 				CODE_SIGN_STYLE = Automatic;
 				EXECUTABLE_PREFIX = lib;
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index 3077183c..5bc8b1f0 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -196,17 +196,18 @@
 //-------------------------
 // simd
 
-#if KRAM_APPLE
-// can use old or new
-#define USE_SIMDLIB 1
-
-// maybe this doesn't work with C++ pch,
-#define USE_SIMDLIBMODULE 0
-#else
-// have to use new on all other platforms
-#define USE_SIMDLIB 1
-#define USE_SIMDLIBMODULE 0
-#endif
+// This is now all in kram.xcconfig
+//#if KRAM_APPLE
+//// can use old or new
+//#define USE_SIMDLIB 1
+//
+//// maybe this doesn't work with C++ pch,
+//#define USE_SIMDLIBMODULE 0
+//#else
+//// have to use new on all other platforms
+//#define USE_SIMDLIB 1
+//#define USE_SIMDLIBMODULE 0
+//#endif
 
 #if USE_SIMDLIB
 

From 8cfaea3ae553e719a791238b378179815fb5a606 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 19 Oct 2024 13:56:02 -0700
Subject: [PATCH 809/901] kram - fix win build

---
 libkram/kram/KramConfig.h | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index 5bc8b1f0..f1475933 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -196,18 +196,12 @@
 //-------------------------
 // simd
 
-// This is now all in kram.xcconfig
-//#if KRAM_APPLE
-//// can use old or new
-//#define USE_SIMDLIB 1
-//
-//// maybe this doesn't work with C++ pch,
-//#define USE_SIMDLIBMODULE 0
-//#else
-//// have to use new on all other platforms
-//#define USE_SIMDLIB 1
-//#define USE_SIMDLIBMODULE 0
-//#endif
+// This is now all in kram.xcconfig for KRAM_APPLE
+#if KRAM_WIN
+//have to use simdk on non-Apple platforms
+#define USE_SIMDLIB 1
+#define USE_SIMDLIBMODULE 0
+#endif
 
 #if USE_SIMDLIB
 

From 94e4e50ebb1585327a4f1e44f466a326ef9b248a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 19 Oct 2024 15:32:55 -0700
Subject: [PATCH 810/901] kram - fix build

---
 build2/kramv.xcodeproj/project.pbxproj |  8 --------
 libkram/kram/KramConfig.h              | 19 +++++++++++++++++--
 libkram/zstd/zstd.cpp                  |  2 +-
 libkram/zstd/zstddeclib.cpp            |  2 +-
 4 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/build2/kramv.xcodeproj/project.pbxproj b/build2/kramv.xcodeproj/project.pbxproj
index 1cd0d490..fd590bd0 100644
--- a/build2/kramv.xcodeproj/project.pbxproj
+++ b/build2/kramv.xcodeproj/project.pbxproj
@@ -604,7 +604,6 @@
 					"-include",
 					KramConfig.h,
 				);
-				OTHER_CPLUSPLUSFLAGS = "$(OTHER_CFLAGS)";
 				PRESERVE_DEAD_CODE_INITS_AND_TERMS = NO;
 				SDKROOT = macosx;
 			};
@@ -680,7 +679,6 @@
 					"-include",
 					KramConfig.h,
 				);
-				OTHER_CPLUSPLUSFLAGS = "$(OTHER_CFLAGS)";
 				PRESERVE_DEAD_CODE_INITS_AND_TERMS = NO;
 				SDKROOT = macosx;
 			};
@@ -752,12 +750,6 @@
 				);
 				MARKETING_VERSION = 2.0.0;
 				MTL_LANGUAGE_REVISION = Metal30;
-				OTHER_CFLAGS = (
-					"-DNDEBUG=1",
-					"-DCOMPILE_EASTL=0",
-					"-include",
-					KramConfig.h,
-				);
 				PRODUCT_BUNDLE_IDENTIFIER = com.hialec.kramv;
 				PRODUCT_NAME = "$(TARGET_NAME)";
 			};
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index f1475933..0e982e30 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -15,7 +15,16 @@
 #elif TARGET_OS_VISION
 #define KRAM_VISION 1
 #endif
-#elif __unix__
+// taken from eaplatform.h, need PS5/XboxX
+// make sure to def 0 case after if adding these
+// just for reference
+//#elif defined(__ORBIS__)
+//#define KRAM_PS4 1
+//#elif defined(_XBOX_ONE)
+//#define KRAM_XBOX_ONE
+#elif defined(__ANDROID__)
+#define KRAM_ANDROID 1
+#elif defined(__unix__)
 #define KRAM_LINUX 1
 #endif
 
@@ -32,6 +41,9 @@
 #ifndef KRAM_IOS
 #define KRAM_IOS 0
 #endif
+#ifndef KRAM_ANDROID
+#define KRAM_ANDROID 0
+#endif
 #ifndef KRAM_VISION
 #define KRAM_VISION 0
 #endif
@@ -122,6 +134,9 @@
 // EASTL only seems to define that for Visual Studio natvis, and not lldb
 #define USE_EASTL COMPILE_EASTL
 
+// some code not compiling with size_t otherwise
+#include <stddef.h>
+
 #if USE_EASTL
 
 #define STL_NAMESPACE eastl
@@ -197,7 +212,7 @@
 // simd
 
 // This is now all in kram.xcconfig for KRAM_APPLE
-#if KRAM_WIN
+#if !KRAM_APPLE
 //have to use simdk on non-Apple platforms
 #define USE_SIMDLIB 1
 #define USE_SIMDLIBMODULE 0
diff --git a/libkram/zstd/zstd.cpp b/libkram/zstd/zstd.cpp
index 3b3c91f8..6f8f3137 100644
--- a/libkram/zstd/zstd.cpp
+++ b/libkram/zstd/zstd.cpp
@@ -52,7 +52,7 @@
 #endif
 #define ZSTD_TRACE 0
 
-#if NDEBUG
+#ifdef NDEBUG
 #define assert_or_fallthrough() [[fallthrough]]
 #else
 #define assert_or_fallthrough() assert(false)
diff --git a/libkram/zstd/zstddeclib.cpp b/libkram/zstd/zstddeclib.cpp
index 5068d2a9..0fa75230 100644
--- a/libkram/zstd/zstddeclib.cpp
+++ b/libkram/zstd/zstddeclib.cpp
@@ -40,7 +40,7 @@
 #define ZSTD_STRIP_ERROR_STRINGS
 #define ZSTD_TRACE 0
 
-#if NDEBUG
+#ifdef NDEBUG
 #define assert_or_fallthrough() [[fallthrough]]
 #else
 #define assert_or_fallthrough() assert(false)

From abe0aba5ad018e41932edae4d8ef1333872c7a7e Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 21 Oct 2024 10:02:08 -0700
Subject: [PATCH 811/901] kram - formatters - test1

This doesn't work
---
 scripts/vectormath_lldbinit.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)
 create mode 100755 scripts/vectormath_lldbinit.py

diff --git a/scripts/vectormath_lldbinit.py b/scripts/vectormath_lldbinit.py
new file mode 100755
index 00000000..b9d5257a
--- /dev/null
+++ b/scripts/vectormath_lldbinit.py
@@ -0,0 +1,22 @@
+# simd library
+type summary add --summary-string "${var.x}, ${var.y}" simd_float2
+type summary add --summary-string "${var.x}, ${var.y}, ${var.z}" simd_float3
+type summary add --summary-string "${var.x}, ${var.y}, ${var.z}, ${var.w}" simd_float4
+
+type summary add --summary-string "${var.columns[0]}\n${var.columns[1]}\n${var.columns[2]}\n${var.columns[3]}" simd_float4x4
+#type summary add --summary-string "${var.columns[0]}, ${var.columns[1]}, ${var.columns[2]}, ${var.columns[3]}" simd::float4x4
+
+type summary add --summary-string "${var.columns[0]}\n${var.columns[1]}\n${var.columns[2]}" simd_float3x3
+#type summary add --summary-string "${var.columns[0]}\n${var.columns[1]}\n${var.columns[2]}" simd::float3x3
+
+# simdk library
+type summary add --summary-string "${var.x}, ${var.y}" float2p
+type summary add --summary-string "${var.x}, ${var.y}, ${var.z}" float3p
+type summary add --summary-string "${var.x}, ${var.y}, ${var.z}, ${var.w}" float4p
+
+type summary add --summary-string "${var.x}, ${var.y}" float2a
+type summary add --summary-string "${var.x}, ${var.y}, ${var.z}" float3a
+type summary add --summary-string "${var.x}, ${var.y}, ${var.z}, ${var.w}" float4a
+
+#type summary add --summary-string "${var.columns[0]}\n${var.columns[1]}\n${var.columns[2]}\n${var.columns[3]}" simdk::float4x4
+#type summary add --summary-string "${var.columns[0]}\n${var.columns[1]}\n${var.columns[2]}" simdk::float3x3

From 1a577d22c3d9610a2edea06935a83a3726ca133e Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 21 Oct 2024 19:57:48 -0700
Subject: [PATCH 812/901] kram - update lldb python for simdk and simd

This puts columns on newlines, and also reduces the formatting on vectors.  But it still uses the default output for the matrix columns instead of the vec2/3/4 formatter.
---
 build2/vectormath.xcodeproj/project.pbxproj |   2 +
 scripts/simdk.py                            | 182 ++++++++++++++++++++
 scripts/vectormath_lldbinit.py              |  22 ---
 3 files changed, 184 insertions(+), 22 deletions(-)
 create mode 100755 scripts/simdk.py
 delete mode 100755 scripts/vectormath_lldbinit.py

diff --git a/build2/vectormath.xcodeproj/project.pbxproj b/build2/vectormath.xcodeproj/project.pbxproj
index 58e1d840..4da5b688 100644
--- a/build2/vectormath.xcodeproj/project.pbxproj
+++ b/build2/vectormath.xcodeproj/project.pbxproj
@@ -63,6 +63,7 @@
 		70570FE72CB37997005692BB /* bounds234.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = bounds234.cpp; sourceTree = "<group>"; };
 		70570FEF2CB8C5C6005692BB /* module.modulemap */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.module-map"; path = module.modulemap; sourceTree = "<group>"; };
 		707D4C522CC41F3900729BE0 /* kram.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = kram.xcconfig; sourceTree = "<group>"; };
+		7087895C2CC6C17700E34A6B /* simdk.py */ = {isa = PBXFileReference; lastKnownFileType = text.script.python; name = simdk.py; path = ../scripts/simdk.py; sourceTree = SOURCE_ROOT; };
 		70B686F42CAD1026007ACA58 /* libvectormath.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libvectormath.a; sourceTree = BUILT_PRODUCTS_DIR; };
 		70B686FB2CAD1072007ACA58 /* double234.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = double234.h; sourceTree = "<group>"; };
 		70B686FC2CAD1072007ACA58 /* double234.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = double234.cpp; sourceTree = "<group>"; };
@@ -129,6 +130,7 @@
 		70B6870A2CAD1072007ACA58 /* vectormath */ = {
 			isa = PBXGroup;
 			children = (
+				7087895C2CC6C17700E34A6B /* simdk.py */,
 				70570FEF2CB8C5C6005692BB /* module.modulemap */,
 				70B687042CAD1072007ACA58 /* README.md */,
 				70B686FB2CAD1072007ACA58 /* double234.h */,
diff --git a/scripts/simdk.py b/scripts/simdk.py
new file mode 100755
index 00000000..48481dba
--- /dev/null
+++ b/scripts/simdk.py
@@ -0,0 +1,182 @@
+# add this to your ~/.lldbinit with
+# command script import ~/yourpath/kram/scripts/vectormath_lldbinit.py
+
+import lldb
+
+# simd library
+
+# the vector ext allow various forms of addressing, but they require python eval to do so.
+# but everything else fails.
+
+def float2_summary(valobj, internal_dict):
+    frame = valobj.GetFrame()
+    name = valobj.GetName()
+    x_value = frame.EvaluateExpression('{0}.x'.format(name))
+    y_value = frame.EvaluateExpression('{0}.y'.format(name))
+    x = x_value.GetValue()
+    y = y_value.GetValue()
+    return '({0},{1})'.format(x, y)
+   
+def float3_summary(valobj, internal_dict):
+    frame = valobj.GetFrame()
+    name = valobj.GetName()
+    x_value = frame.EvaluateExpression('{0}.x'.format(name))
+    y_value = frame.EvaluateExpression('{0}.y'.format(name))
+    z_value = frame.EvaluateExpression('{0}.z'.format(name))
+    x = x_value.GetValue()
+    y = y_value.GetValue()
+    z = z_value.GetValue()
+    return '({0},{1},{2})'.format(x, y, z)
+
+def float4_summary(valobj, internal_dict):
+    frame = valobj.GetFrame()
+    name = valobj.GetName()
+    x_value = frame.EvaluateExpression('{0}.x'.format(name))
+    y_value = frame.EvaluateExpression('{0}.y'.format(name))
+    z_value = frame.EvaluateExpression('{0}.z'.format(name))
+    w_value = frame.EvaluateExpression('{0}.w'.format(name))
+    x = x_value.GetValue()
+    y = y_value.GetValue()
+    z = z_value.GetValue()
+    w = w_value.GetValue()
+    return '({0},{1},{2},{3})'.format(x, y, z, w)
+
+def float2x2_summary(valobj, internal_dict):
+    frame = valobj.GetFrame()
+    name = valobj.GetName()
+    
+#    columns = frame.EvaluateExpression('{0}.columns'.format(name))
+#
+#    x = columns.GetValueAtIndex(0)
+#    y = columns.GetValueAtIndex(1)
+    
+    structVar = valobj.GetChildAtIndex(0)
+        
+    x = structVar.GetChildAtIndex(0)
+    y = structVar.GetChildAtIndex(1)
+    
+    # TODO: This isn't using the formatters, so may want to evalExpression
+    return '\n{0}\n{1}'.format(x, y)
+   
+def float3x3_summary(valobj, internal_dict):
+    frame = valobj.GetFrame()
+    name = valobj.GetName()
+
+    #columns = frame.EvaluateExpression('{0}.columns'.format(name))
+    #x = columns.GetValueAtIndex(0)
+    #y = columns.GetValueAtIndex(1)
+    #z = columns.GetValueAtIndex(2)
+    
+    structVar = valobj.GetChildAtIndex(0)
+        
+    x = structVar.GetChildAtIndex(0)
+    y = structVar.GetChildAtIndex(1)
+    z = structVar.GetChildAtIndex(2)
+    
+    # TODO: This isn't using the formatters, so may want to evalExpression
+    return '\n{0}\n{1}\n{2}'.format(x, y, z)
+
+def float3x4_summary(valobj, interal_dict):
+    return float3x3_summary(valobj, internal_dict)
+
+def float4x4_summary(valobj, internal_dict):
+    frame = valobj.GetFrame()
+    name = valobj.GetName()
+    
+    structVar = valobj.GetChildAtIndex(0)
+    #structVar = frame.FindVariable(name)
+    #columns = structVar.GetChildMemberWithName("columns")
+    
+    #x = structVar.GetChildMemberWithName("columns").GetValueAtIndex(0)
+    #y = structVar.GetChildMemberWithName("columns").GetValueAtIndex(1)
+    #z = structVar.GetChildMemberWithName("columns").GetValueAtIndex(2)
+    #w = structVar.GetChildMemberWithName("columns").GetValueAtIndex(3)
+    
+    x = structVar.GetChildAtIndex(0)
+    y = structVar.GetChildAtIndex(1)
+    z = structVar.GetChildAtIndex(2)
+    w = structVar.GetChildAtIndex(3)
+    
+# how to make this work?  Just reports "None" is the frame incorrect?
+#    x = float4_summary(x, internal_dict)
+#    y = float4_summary(y, internal_dict)
+#    z = float4_summary(z, internal_dict)
+#    w = float4_summary(w, internal_dict)
+    
+    # TODO: This isn't using the formatters, so may want to evalExpression
+    return '\n{0}\n{1}\n{2}\n{3}'.format(x, y, z, w)
+
+
+def __lldb_init_module(debugger, internal_dict):
+
+    # v.x produces
+    #  summay string error.  These are using clang vector math ext
+
+    # type summary add --summary-string "${var[0-1]}" simd_float2
+
+    # This doesn't work
+    #debugger.HandleCommand("type summary add --summary-string \"${var[0]},${var[1]}\" simd_float2")
+    #debugger.HandleCommand("type summary add --summary-string \"${var[0]},${var[1]},${var[2]}\" simd_float3")
+    #debugger.HandleCommand("type summary add --summary-string \"${var[0]},${var[1]},${var[2]},${var[3]}\" simd_float4")
+
+    debugger.HandleCommand("type summary add -F simdk.float2_summary simd_float2")
+    debugger.HandleCommand("type summary add -F simdk.float3_summary simd_float3")
+    debugger.HandleCommand("type summary add -F simdk.float4_summary simd_float4")
+
+    debugger.HandleCommand("type summary add -F simdk.float4x4_summary simd_float4x4")
+    debugger.HandleCommand("type summary add -F simdk.float3x4_summary simd_float3x4")
+    debugger.HandleCommand("type summary add -F simdk.float3x3_summary simd_float3x3")
+    debugger.HandleCommand("type summary add -F simdk.float2x2_summary simd_float2x2")
+   
+    # debugger.HandleCommand("type summary add --summary-string \"${var.columns[0]}\n${var.columns[1]}\n${var.columns[2]}\n${var.columns[3]}\" simd_float4x4")
+    
+    #type summary add --summary-string "${var.columns[0]}, ${var.columns[1]}, ${var.columns[2]}, ${var.columns[3]}" simd::float4x4
+
+    #type summary add --summary-string "${var.columns[0]}\n${var.columns[1]}\n${var.columns[2]}" simd_float3x3
+    #type summary add --summary-string "${var.columns[0]}\n${var.columns[1]}\n${var.columns[2]}" simd::float3x3
+
+    # simdk library
+    
+    debugger.HandleCommand("type summary add -F simdk.float2_summary int2a")
+    debugger.HandleCommand("type summary add -F simdk.float3_summary int3a")
+    debugger.HandleCommand("type summary add -F simdk.float4_summary int4a")
+
+    debugger.HandleCommand("type summary add -F simdk.float2_summary long2a")
+    debugger.HandleCommand("type summary add -F simdk.float3_summary long3a")
+    debugger.HandleCommand("type summary add -F simdk.float4_summary long4a")
+
+    debugger.HandleCommand("type summary add -F simdk.float2_summary half2a")
+    debugger.HandleCommand("type summary add -F simdk.float3_summary half3a")
+    debugger.HandleCommand("type summary add -F simdk.float4_summary half4a")
+
+    # float234
+    debugger.HandleCommand("type summary add -F simdk.float2_summary float2a")
+    debugger.HandleCommand("type summary add -F simdk.float3_summary float3a")
+    debugger.HandleCommand("type summary add -F simdk.float4_summary float4a")
+
+    debugger.HandleCommand("type summary add -F simdk.float4x4_summary simdk::float4x4")
+    debugger.HandleCommand("type summary add -F simdk.float3x3_summary simdk::float3x4")
+    debugger.HandleCommand("type summary add -F simdk.float3x3_summary simdk::float3x3")
+    debugger.HandleCommand("type summary add -F simdk.float2x2_summary simdk::float2x2")
+   
+    # double234
+    debugger.HandleCommand("type summary add -F simdk.float2_summary double2a")
+    debugger.HandleCommand("type summary add -F simdk.float3_summary double3a")
+    debugger.HandleCommand("type summary add -F simdk.float4_summary double4a")
+
+    debugger.HandleCommand("type summary add -F simdk.float4x4_summary simdk::double4x4")
+    debugger.HandleCommand("type summary add -F simdk.float3x3_summary simdk::double3x4")
+    debugger.HandleCommand("type summary add -F simdk.float3x3_summary simdk::double3x3")
+    debugger.HandleCommand("type summary add -F simdk.float2x2_summary simdk::double2x2")
+   
+   
+    #type summary add --summary-string "${var[0]},${var[1]}" float2a
+    #type summary add --summary-string "${var[0]},${var[1]},${var[2]}}" float3a
+    #type summary add --summary-string "${var[0]},${var[1]},${var[2]},${var[3]}" float4a
+
+    #type summary add -F simdk.float2_summary float2a
+    #type summary add -F simdk.float3_summary float3a
+    #type summary add -F simdk.float4_summary float4a
+
+    #type summary add --summary-string "${var.columns[0]}\n${var.columns[1]}\n${var.columns[2]}\n${var.columns[3]}" simdk::float4x4
+    #type summary add --summary-string "${var.columns[0]}\n${var.columns[1]}\n${var.columns[2]}" simdk::float3x3
diff --git a/scripts/vectormath_lldbinit.py b/scripts/vectormath_lldbinit.py
deleted file mode 100755
index b9d5257a..00000000
--- a/scripts/vectormath_lldbinit.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# simd library
-type summary add --summary-string "${var.x}, ${var.y}" simd_float2
-type summary add --summary-string "${var.x}, ${var.y}, ${var.z}" simd_float3
-type summary add --summary-string "${var.x}, ${var.y}, ${var.z}, ${var.w}" simd_float4
-
-type summary add --summary-string "${var.columns[0]}\n${var.columns[1]}\n${var.columns[2]}\n${var.columns[3]}" simd_float4x4
-#type summary add --summary-string "${var.columns[0]}, ${var.columns[1]}, ${var.columns[2]}, ${var.columns[3]}" simd::float4x4
-
-type summary add --summary-string "${var.columns[0]}\n${var.columns[1]}\n${var.columns[2]}" simd_float3x3
-#type summary add --summary-string "${var.columns[0]}\n${var.columns[1]}\n${var.columns[2]}" simd::float3x3
-
-# simdk library
-type summary add --summary-string "${var.x}, ${var.y}" float2p
-type summary add --summary-string "${var.x}, ${var.y}, ${var.z}" float3p
-type summary add --summary-string "${var.x}, ${var.y}, ${var.z}, ${var.w}" float4p
-
-type summary add --summary-string "${var.x}, ${var.y}" float2a
-type summary add --summary-string "${var.x}, ${var.y}, ${var.z}" float3a
-type summary add --summary-string "${var.x}, ${var.y}, ${var.z}, ${var.w}" float4a
-
-#type summary add --summary-string "${var.columns[0]}\n${var.columns[1]}\n${var.columns[2]}\n${var.columns[3]}" simdk::float4x4
-#type summary add --summary-string "${var.columns[0]}\n${var.columns[1]}\n${var.columns[2]}" simdk::float3x3

From 10736d63a4cfe72e3d157ef51d0e546a5dc29037 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 21 Oct 2024 20:11:52 -0700
Subject: [PATCH 813/901] kram - add quatf and update comment in lldb formatter
 simdk.py

---
 scripts/simdk.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/scripts/simdk.py b/scripts/simdk.py
index 48481dba..931d77ff 100755
--- a/scripts/simdk.py
+++ b/scripts/simdk.py
@@ -1,5 +1,5 @@
 # add this to your ~/.lldbinit with
-# command script import ~/yourpath/kram/scripts/vectormath_lldbinit.py
+# command script import ~/yourpath/kram/scripts/simdk.py
 
 import lldb
 
@@ -159,6 +159,8 @@ def __lldb_init_module(debugger, internal_dict):
     debugger.HandleCommand("type summary add -F simdk.float3x3_summary simdk::float3x3")
     debugger.HandleCommand("type summary add -F simdk.float2x2_summary simdk::float2x2")
    
+    debugger.HandleCommand("type summary add -F simdk.float4_summary simdk::quatf")
+    
     # double234
     debugger.HandleCommand("type summary add -F simdk.float2_summary double2a")
     debugger.HandleCommand("type summary add -F simdk.float3_summary double3a")

From 94c8f636058427686981f3e84a9185bf43d7376d Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 21 Oct 2024 20:25:09 -0700
Subject: [PATCH 814/901] kram - more lldb formatter cleanup

---
 scripts/simdk.py | 86 ++++++++++--------------------------------------
 1 file changed, 18 insertions(+), 68 deletions(-)

diff --git a/scripts/simdk.py b/scripts/simdk.py
index 931d77ff..abf5a232 100755
--- a/scripts/simdk.py
+++ b/scripts/simdk.py
@@ -6,50 +6,37 @@
 # simd library
 
 # the vector ext allow various forms of addressing, but they require python eval to do so.
-# but everything else fails.
+# but everything else fails.  Only the EvaluateExpression returns the values.
 
 def float2_summary(valobj, internal_dict):
     frame = valobj.GetFrame()
+    
     name = valobj.GetName()
-    x_value = frame.EvaluateExpression('{0}.x'.format(name))
-    y_value = frame.EvaluateExpression('{0}.y'.format(name))
-    x = x_value.GetValue()
-    y = y_value.GetValue()
+    x = frame.EvaluateExpression('{0}.x'.format(name)).GetValue()
+    y = frame.EvaluateExpression('{0}.y'.format(name)).GetValue()
     return '({0},{1})'.format(x, y)
    
 def float3_summary(valobj, internal_dict):
     frame = valobj.GetFrame()
     name = valobj.GetName()
-    x_value = frame.EvaluateExpression('{0}.x'.format(name))
-    y_value = frame.EvaluateExpression('{0}.y'.format(name))
-    z_value = frame.EvaluateExpression('{0}.z'.format(name))
-    x = x_value.GetValue()
-    y = y_value.GetValue()
-    z = z_value.GetValue()
+    x = frame.EvaluateExpression('{0}.x'.format(name)).GetValue()
+    y = frame.EvaluateExpression('{0}.y'.format(name)).GetValue()
+    z = frame.EvaluateExpression('{0}.z'.format(name)).GetValue()
     return '({0},{1},{2})'.format(x, y, z)
 
 def float4_summary(valobj, internal_dict):
     frame = valobj.GetFrame()
     name = valobj.GetName()
-    x_value = frame.EvaluateExpression('{0}.x'.format(name))
-    y_value = frame.EvaluateExpression('{0}.y'.format(name))
-    z_value = frame.EvaluateExpression('{0}.z'.format(name))
-    w_value = frame.EvaluateExpression('{0}.w'.format(name))
-    x = x_value.GetValue()
-    y = y_value.GetValue()
-    z = z_value.GetValue()
-    w = w_value.GetValue()
+    x = frame.EvaluateExpression('{0}.x'.format(name)).GetValue()
+    y = frame.EvaluateExpression('{0}.y'.format(name)).GetValue()
+    z = frame.EvaluateExpression('{0}.z'.format(name)).GetValue()
+    w = frame.EvaluateExpression('{0}.w'.format(name)).GetValue()
     return '({0},{1},{2},{3})'.format(x, y, z, w)
 
 def float2x2_summary(valobj, internal_dict):
     frame = valobj.GetFrame()
     name = valobj.GetName()
-    
-#    columns = frame.EvaluateExpression('{0}.columns'.format(name))
-#
-#    x = columns.GetValueAtIndex(0)
-#    y = columns.GetValueAtIndex(1)
-    
+        
     structVar = valobj.GetChildAtIndex(0)
         
     x = structVar.GetChildAtIndex(0)
@@ -62,11 +49,6 @@ def float3x3_summary(valobj, internal_dict):
     frame = valobj.GetFrame()
     name = valobj.GetName()
 
-    #columns = frame.EvaluateExpression('{0}.columns'.format(name))
-    #x = columns.GetValueAtIndex(0)
-    #y = columns.GetValueAtIndex(1)
-    #z = columns.GetValueAtIndex(2)
-    
     structVar = valobj.GetChildAtIndex(0)
         
     x = structVar.GetChildAtIndex(0)
@@ -84,20 +66,13 @@ def float4x4_summary(valobj, internal_dict):
     name = valobj.GetName()
     
     structVar = valobj.GetChildAtIndex(0)
-    #structVar = frame.FindVariable(name)
-    #columns = structVar.GetChildMemberWithName("columns")
-    
-    #x = structVar.GetChildMemberWithName("columns").GetValueAtIndex(0)
-    #y = structVar.GetChildMemberWithName("columns").GetValueAtIndex(1)
-    #z = structVar.GetChildMemberWithName("columns").GetValueAtIndex(2)
-    #w = structVar.GetChildMemberWithName("columns").GetValueAtIndex(3)
     
     x = structVar.GetChildAtIndex(0)
     y = structVar.GetChildAtIndex(1)
     z = structVar.GetChildAtIndex(2)
     w = structVar.GetChildAtIndex(3)
     
-# how to make this work?  Just reports "None" is the frame incorrect?
+# TODO: how to make this work?  Just reports "None" is the frame incorrect?
 #    x = float4_summary(x, internal_dict)
 #    y = float4_summary(y, internal_dict)
 #    z = float4_summary(z, internal_dict)
@@ -108,17 +83,8 @@ def float4x4_summary(valobj, internal_dict):
 
 
 def __lldb_init_module(debugger, internal_dict):
-
-    # v.x produces
-    #  summay string error.  These are using clang vector math ext
-
-    # type summary add --summary-string "${var[0-1]}" simd_float2
-
-    # This doesn't work
-    #debugger.HandleCommand("type summary add --summary-string \"${var[0]},${var[1]}\" simd_float2")
-    #debugger.HandleCommand("type summary add --summary-string \"${var[0]},${var[1]},${var[2]}\" simd_float3")
-    #debugger.HandleCommand("type summary add --summary-string \"${var[0]},${var[1]},${var[2]},${var[3]}\" simd_float4")
-
+   
+    # simd library, many more types here
     debugger.HandleCommand("type summary add -F simdk.float2_summary simd_float2")
     debugger.HandleCommand("type summary add -F simdk.float3_summary simd_float3")
     debugger.HandleCommand("type summary add -F simdk.float4_summary simd_float4")
@@ -128,12 +94,7 @@ def __lldb_init_module(debugger, internal_dict):
     debugger.HandleCommand("type summary add -F simdk.float3x3_summary simd_float3x3")
     debugger.HandleCommand("type summary add -F simdk.float2x2_summary simd_float2x2")
    
-    # debugger.HandleCommand("type summary add --summary-string \"${var.columns[0]}\n${var.columns[1]}\n${var.columns[2]}\n${var.columns[3]}\" simd_float4x4")
-    
-    #type summary add --summary-string "${var.columns[0]}, ${var.columns[1]}, ${var.columns[2]}, ${var.columns[3]}" simd::float4x4
-
-    #type summary add --summary-string "${var.columns[0]}\n${var.columns[1]}\n${var.columns[2]}" simd_float3x3
-    #type summary add --summary-string "${var.columns[0]}\n${var.columns[1]}\n${var.columns[2]}" simd::float3x3
+    # TODO: add char, short, int8a, float8a, ...
 
     # simdk library
     
@@ -155,7 +116,7 @@ def __lldb_init_module(debugger, internal_dict):
     debugger.HandleCommand("type summary add -F simdk.float4_summary float4a")
 
     debugger.HandleCommand("type summary add -F simdk.float4x4_summary simdk::float4x4")
-    debugger.HandleCommand("type summary add -F simdk.float3x3_summary simdk::float3x4")
+    debugger.HandleCommand("type summary add -F simdk.float3x4_summary simdk::float3x4")
     debugger.HandleCommand("type summary add -F simdk.float3x3_summary simdk::float3x3")
     debugger.HandleCommand("type summary add -F simdk.float2x2_summary simdk::float2x2")
    
@@ -167,18 +128,7 @@ def __lldb_init_module(debugger, internal_dict):
     debugger.HandleCommand("type summary add -F simdk.float4_summary double4a")
 
     debugger.HandleCommand("type summary add -F simdk.float4x4_summary simdk::double4x4")
-    debugger.HandleCommand("type summary add -F simdk.float3x3_summary simdk::double3x4")
+    debugger.HandleCommand("type summary add -F simdk.float3x4_summary simdk::double3x4")
     debugger.HandleCommand("type summary add -F simdk.float3x3_summary simdk::double3x3")
     debugger.HandleCommand("type summary add -F simdk.float2x2_summary simdk::double2x2")
    
-   
-    #type summary add --summary-string "${var[0]},${var[1]}" float2a
-    #type summary add --summary-string "${var[0]},${var[1]},${var[2]}}" float3a
-    #type summary add --summary-string "${var[0]},${var[1]},${var[2]},${var[3]}" float4a
-
-    #type summary add -F simdk.float2_summary float2a
-    #type summary add -F simdk.float3_summary float3a
-    #type summary add -F simdk.float4_summary float4a
-
-    #type summary add --summary-string "${var.columns[0]}\n${var.columns[1]}\n${var.columns[2]}\n${var.columns[3]}" simdk::float4x4
-    #type summary add --summary-string "${var.columns[0]}\n${var.columns[1]}\n${var.columns[2]}" simdk::float3x3

From dc52d031510f7ba62ac1b80347fe74b863a1a818 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 3 Nov 2024 22:12:41 -0800
Subject: [PATCH 815/901] kram-profile - updated from Swift 5 to Swift 6

Perfetto gets hung up here.  Need to move off using web tools.
https://github.com/google/perfetto/issues/923
---
 .../kram-profile.xcodeproj/project.pbxproj    |   6 +-
 kram-profile/kram-profile/File.swift          |  15 ++-
 kram-profile/kram-profile/Log.swift           | 119 +++++++++++-------
 .../kram-profile/kram_profileApp.swift        |   8 +-
 4 files changed, 89 insertions(+), 59 deletions(-)

diff --git a/kram-profile/kram-profile.xcodeproj/project.pbxproj b/kram-profile/kram-profile.xcodeproj/project.pbxproj
index 85b57c06..6a3953eb 100644
--- a/kram-profile/kram-profile.xcodeproj/project.pbxproj
+++ b/kram-profile/kram-profile.xcodeproj/project.pbxproj
@@ -322,7 +322,7 @@
 				SWIFT_ACTIVE_COMPILATION_CONDITIONS = "DEBUG $(inherited)";
 				SWIFT_OBJC_INTEROP_MODE = objc;
 				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
-				SWIFT_VERSION = 5.0;
+				SWIFT_VERSION = 6.0;
 			};
 			name = Debug;
 		};
@@ -388,7 +388,7 @@
 				SWIFT_ACTIVE_COMPILATION_CONDITIONS = "";
 				SWIFT_COMPILATION_MODE = wholemodule;
 				SWIFT_OBJC_INTEROP_MODE = objc;
-				SWIFT_VERSION = 5.0;
+				SWIFT_VERSION = 6.0;
 			};
 			name = Release;
 		};
@@ -417,7 +417,6 @@
 				SWIFT_EMIT_LOC_STRINGS = YES;
 				SWIFT_OBJC_BRIDGING_HEADER = "Source/kram-profile-Bridging-Header.h";
 				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
-				SWIFT_VERSION = 5.0;
 			};
 			name = Debug;
 		};
@@ -445,7 +444,6 @@
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SWIFT_EMIT_LOC_STRINGS = YES;
 				SWIFT_OBJC_BRIDGING_HEADER = "Source/kram-profile-Bridging-Header.h";
-				SWIFT_VERSION = 5.0;
 			};
 			name = Release;
 		};
diff --git a/kram-profile/kram-profile/File.swift b/kram-profile/kram-profile/File.swift
index db9d4ac2..16b5ed04 100644
--- a/kram-profile/kram-profile/File.swift
+++ b/kram-profile/kram-profile/File.swift
@@ -76,7 +76,7 @@ class BuildStats {
     }
 }
 
-class File: Identifiable, Hashable, Equatable, Comparable
+class File: Identifiable, Hashable, Equatable, Comparable, @unchecked Sendable
 {
     // TODO: archive url relative to archive so not unique if multiple archives dropped
     // but currently all lookup is by url, and not url + archive.  Just make sure to
@@ -248,6 +248,7 @@ func generateTotalDuration(_ file: File, _ buildFiles: [File]) -> String {
     return text
 }
 
+@MainActor
 func generateNavigationTitle(_ sel: String?, _ files: [File]) -> String {
     if sel == nil { return "" }
     
@@ -277,11 +278,12 @@ func generateNavigationTitle(_ sel: String?, _ files: [File]) -> String {
 // indicate the item is gone, and await its return.
 
 // Holds supported files dropped or opened from Finder, reload reparses this
-var droppedFileCache : [URL] = []
+@MainActor var droppedFileCache : [URL] = []
 
 // Flattened list of supported files from folders and archives
-var fileCache : [URL:File] = [:]
+@MainActor var fileCache : [URL:File] = [:]
 
+@MainActor
 func updateFile(url: URL) -> File {
     let file = File(url:url)
     
@@ -307,6 +309,7 @@ func updateFile(url: URL) -> File {
     return file
 }
 
+@MainActor
 func lookupFile(url: URL) -> File {
     let file = File(url:url)
     if let fileOld = fileCache[file.url] {
@@ -315,6 +318,7 @@ func lookupFile(url: URL) -> File {
     return file
 }
 
+@MainActor
 func lookupFile(selection: String) -> File {
     return lookupFile(url:URL(string:selection)!)
 }
@@ -370,8 +374,9 @@ class Archive: Identifiable, /*Hashable, */ Equatable, Comparable {
 }
 
 // cache of archives to avoid creating these each time
-var archiveCache: [URL:Archive] = [:]
+@MainActor var archiveCache: [URL:Archive] = [:]
 
+@MainActor
 func lookupArchive(_ url: URL) -> Archive {
     let archive = Archive(url)
     
@@ -512,6 +517,7 @@ func isSupportedFilename(_ url: URL) -> Bool {
     return false
 }
 
+@MainActor
 func listFilesFromArchive(_ urlArchive: URL) -> [File] {
     
     let archive = lookupArchive(urlArchive)
@@ -544,6 +550,7 @@ func listFilesFromArchive(_ urlArchive: URL) -> [File] {
     return files
 }
 
+@MainActor
 func listFilesFromURLs(_ urls: [URL]) -> [File]
 {
     var files: [File] = []
diff --git a/kram-profile/kram-profile/Log.swift b/kram-profile/kram-profile/Log.swift
index 83169af8..6c278c9b 100644
--- a/kram-profile/kram-profile/Log.swift
+++ b/kram-profile/kram-profile/Log.swift
@@ -48,15 +48,62 @@ import Darwin
    2018-04-11 14:59:07.122186-0700 SwiftShot[581:21310] [GameSceneViewController] error text
 */
 
-class Log {
+struct LogState {
     // verbose: Whether os_log or print is used to report logs.
-    static var prints = false
+    let prints = false
     // stacktrace: Whether stack trace is logged on errors.
-    static var stacktraces = false
+    let stacktraces = false
     // timestamp: Show timestamps on all entries when printing statements.
-    static var timestamps = false
+    let timestamps = false
     // absoluteTimestamps: Show relative or absolute timestampes.
-    static var absoluteTimestamps = true
+    let absoluteTimestamps = true
+    
+    // Store data for timestamps.
+    let timestampToSeconds = initTimestampToSeconds()
+    let timestampStart: Double
+    let timestampStartDate = Date()
+    let timestampFormatter = initTimestampFormatter()
+
+    // This can be filtered from command line arguments.
+    let subsystem = Bundle.main.bundleIdentifier!
+    
+    init() {
+        timestampStart = LogState.timestampStartTime(timestampToSeconds)
+    }
+    private static func initTimestampFormatter() -> DateFormatter {
+        let formatter = DateFormatter()
+        formatter.locale = Locale.current
+        formatter.setLocalizedDateFormatFromTemplate("HH:mm:ss.SSS") // ms resolution
+        return formatter
+    }
+    
+    private static func initTimestampToSeconds() -> Double {
+        // Cache the conversion.  Note that clock rate can change with cpu throttling.
+        // These are high-resolution timestamps taken from the system timer.
+        var info = mach_timebase_info(numer: 0, denom: 0)
+        mach_timebase_info(&info)
+        let numer = Double(info.numer)
+        let denom = Double(info.denom)
+        return 1e-9 * (numer / denom) // inverse so we can multiply
+    }
+    
+    static func timestampStartTime(_ timestampToSeconds: Double) -> Double {
+        let timestamp = Double(mach_absolute_time())
+        let time = timestamp * timestampToSeconds
+        return time
+    }
+    
+    // need timestamps in other parts of the app
+    func timestamp() -> Double {
+        let timestamp = Double(mach_absolute_time())
+        let time = timestamp * timestampToSeconds
+        return time
+    }
+}
+
+let logState = LogState()
+
+class Log: @unchecked Sendable {
     
     // Custom logging group - usually based on source filename.
     // This has a very limited output, but does go to console
@@ -66,15 +113,7 @@ class Log {
     private var file: String
     // All logs go to this category for filtering.
     private var category: String
-    // This can be filtered from command line arguments.
-    private static var subsystem = Bundle.main.bundleIdentifier!
-    
-    // Store data for timestamps.
-    private static var timestampToSeconds: Double = 0
-    private static var timestampStart = timestamp()
-    private static var timestampStartDate = Date()
-    private static var timestampFormatter = initTimestampFormatter()
-    
+   
     init(_ category: String = #file, file: String = #file) {
         // Strip the path, but allow hierachical category f.e. "Group/Subgroup" wihtout .swift.
         self.category = category
@@ -85,7 +124,7 @@ class Log {
         // Compute once for use in logs.
         self.file = Log.stripFilePathAndExtension(file)
         
-        self.log = OSLog(subsystem: Log.subsystem, category: self.category)
+        self.log = OSLog(subsystem: logState.subsystem, category: self.category)
     }
     
     // Test whether messages are logged for the given levels
@@ -111,7 +150,7 @@ class Log {
     
     func error(_ message: @autoclosure () -> String, _ function: String = #function, _ line: Int = #line) {
         let text = formatMessage(message(), .error, function, line)
-        if Log.prints {
+        if logState.prints {
             print(text)
         } else {
             logToOSLog(text, .error)
@@ -121,7 +160,7 @@ class Log {
     // os_log left out warnings, so reuse default type for that
     func warn(_ message: @autoclosure () -> String, _ function: String = #function, _ line: Int = #line) {
         let text = formatMessage(message(), .default, function, line)
-        if Log.prints {
+        if logState.prints {
             print(text)
         } else {
             logToOSLog(text, .default) // this doesn't get colored yellow like a warning
@@ -130,7 +169,7 @@ class Log {
     
     func info(_ message: @autoclosure () -> String) {
         let text = formatMessage(message(), .info)
-        if Log.prints {
+        if logState.prints {
             print(text)
         } else {
             logToOSLog(text, .info)
@@ -141,7 +180,7 @@ class Log {
         // debug logs are stripped from release builds
         #if DEBUG
         let text = formatMessage(message(), .debug)
-        if Log.prints {
+        if logState.prints {
             print(text)
         } else {
             logToOSLog(text, .debug)
@@ -165,7 +204,7 @@ class Log {
         
         let levelText = formatLevel(level)
         
-        if Log.prints {
+        if logState.prints {
             let timestamp = Log.formatTimestamp()
             
             // These messages never go out to the system console, just the debugger.
@@ -199,7 +238,7 @@ class Log {
             }
         }
         
-        if Log.stacktraces && (level == .error) {
+        if logState.stacktraces && (level == .error) {
             text += "\n"
             
             // Improve this - these are mangled symbols without file/line of where
@@ -218,8 +257,10 @@ class Log {
             queueName = ":" + queueName
         }
         
-        text += " at \(file):\(line)@\(function)\n"
-        text += " on \(threadName)\(queueName)"
+        text += "\n at \(file):\(line)@\(function)"
+        if !threadName.isEmpty || !queueName.isEmpty {
+            text += "\n on \(threadName)\(queueName)"
+        }
         return text
     }
     
@@ -229,27 +270,22 @@ class Log {
     }
     
     // timestamp support
-    private static func initTimestampFormatter() -> DateFormatter {
-        let formatter = DateFormatter()
-        formatter.locale = Locale.current
-        formatter.setLocalizedDateFormatFromTemplate("HH:mm:ss.SSS") // ms resolution
-        return formatter
-    }
+    
     
     private static func timeFromStart() -> Double {
-        return max(0.0, Log.timestamp() - Log.timestampStart)
+        return max(0.0, Log.timestamp() - logState.timestampStart)
     }
     
     private static func timeAbsolute() -> String {
         let timestamp = Log.timeFromStart()
-        let date = Date(timeInterval: timestamp, since: Log.timestampStartDate)
-        return timestampFormatter.string(from: date)
+        let date = Date(timeInterval: timestamp, since: logState.timestampStartDate)
+        return logState.timestampFormatter.string(from: date)
     }
     
     private static func formatTimestamp() -> String {
         var timestamp = ""
-        if Log.timestamps {
-            if Log.absoluteTimestamps {
+        if logState.timestamps {
+            if logState.absoluteTimestamps {
                 timestamp = Log.timeAbsolute() + " "
             } else {
                 timestamp = String(format: "%.3fs ", Log.timeFromStart())
@@ -258,21 +294,8 @@ class Log {
         return timestamp
     }
     
-    // need timestamps in other parts of the app
     static func timestamp() -> Double {
-        if Log.timestampToSeconds == 0 {
-            // Cache the conversion.  Note that clock rate can change with cpu throttling.
-            // These are high-resolution timestamps taken from the system timer.
-            var info = mach_timebase_info(numer: 0, denom: 0)
-            mach_timebase_info(&info)
-            let numer = Double(info.numer)
-            let denom = Double(info.denom)
-            Log.timestampToSeconds = 1e-9 * (numer / denom) // inverse so we can multiply
-        }
-        
-        let timestamp = Double(mach_absolute_time())
-        let time = timestamp * Log.timestampToSeconds
-        return time
+        return logState.timestamp()
     }
 }
 
diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift
index f75fdc49..7e0e4e0f 100644
--- a/kram-profile/kram-profile/kram_profileApp.swift
+++ b/kram-profile/kram-profile/kram_profileApp.swift
@@ -390,7 +390,7 @@ class MyWebView : WKWebView {
     */
 }
 
-
+@MainActor
 func newWebView(request: URLRequest) -> WKWebView {
     // set preference to run javascript on the view, can then do PostMessage
     let preferences = WKPreferences()
@@ -492,7 +492,7 @@ struct MTKViewWrapper: NSViewRepresentable {
 */
 
 // https to work for some reason, but all data is previewed locally
-var ORIGIN = "https://ui.perfetto.dev"
+let ORIGIN = "https://ui.perfetto.dev"
 
 // https://gist.github.com/pwightman/64c57076b89c5d7f8e8c
 extension String {
@@ -942,6 +942,7 @@ func updateFileBuildTimings(_ events: [PerfettoEvent]) -> [String:BuildTiming] {
     return buildTimings
 }
 
+@MainActor
 func findFilesForBuildTimings(files: [File], selection: String) -> [File] {
     let selectedFile = lookupFile(url:URL(string:selection)!)
     let isArchive = selectedFile.archive != nil
@@ -1466,7 +1467,7 @@ func computeEventParentsAndDurSub(_ events: inout [PerfettoEvent]) {
 
 
 class Timer {
-    private static var kTickToSeconds = updateTimebase()
+    private static let kTickToSeconds = updateTimebase()
     private var time: Double = -Timer.getTime()
     
     deinit {
@@ -1563,6 +1564,7 @@ func updateBuildTimingsTask(_ files: [File]) {
         timer.stop()
         log.info("finished updating build timings in \(double:timer.timeElapsed(), decimals:3)s")
     })
+    
     #endif
 }
 

From 1bacbd615ec005b605bd208117b546992527ac07 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 3 Nov 2024 22:14:23 -0800
Subject: [PATCH 816/901] kram - simd - faster decompose_scale

---
 libkram/vectormath/float234.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/libkram/vectormath/float234.h b/libkram/vectormath/float234.h
index 2b52b27e..b246ad40 100644
--- a/libkram/vectormath/float234.h
+++ b/libkram/vectormath/float234.h
@@ -994,13 +994,15 @@ SIMD_CALL float4x4 float4x4m(float3 axis, float radians)
 // These sizes are positive and do not include inversion
 SIMD_CALL float decompose_size(const float4x4& m)
 {
+    // assumes m[0].w is 0
     return length(m[0]);
 }
 SIMD_CALL float3 decompose_scale(const float4x4& m)
 {
-    return float3m(length(m[0]),
-                   length(m[1]),
-                   length(m[2]));
+    // assumes m[i].w is 0
+    return sqrt(float3m(length_squared(m[0]),
+                        length_squared(m[1]),
+                        length_squared(m[2])));
 }
 SIMD_CALL float decompose_scale_max(const float4x4& m)
 {

From 0c58f9fec1fa57bc8c0354eaeed42969a66ffcac Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 6 Nov 2024 23:03:43 -0800
Subject: [PATCH 817/901] kram - move towards clang modules

---
 build2/kram.xcconfig                        |  4 +-
 build2/kram.xcodeproj/project.pbxproj       | 10 ---
 build2/vectormath.xcodeproj/project.pbxproj | 12 +--
 kram-profile/README.md                      |  8 +-
 kram-profile/Source/miniz.h                 |  8 +-
 libkram/kram/KramConfig.h                   |  8 +-
 libkram/miniz/miniz.h                       |  5 ++
 libkram/vectormath/README.md                |  2 +-
 libkram/vectormath/module.modulemap         |  3 +-
 libkram/zstd/zstd.cpp                       | 10 +--
 libkram/zstd/zstd.h                         |  4 +-
 libkram/zstd/zstddeclib.cpp                 | 90 +++++++++++----------
 12 files changed, 84 insertions(+), 80 deletions(-)

diff --git a/build2/kram.xcconfig b/build2/kram.xcconfig
index a0e7f604..9f1ee9af 100644
--- a/build2/kram.xcconfig
+++ b/build2/kram.xcconfig
@@ -14,8 +14,8 @@ KRAM_FLAGS_RELEASE =
 KRAM_FLAGS_RELEASE[config=Release] = -DNDEBUG=1
 
 KRAM_FLAGS = -ftime-trace
-
-KRAM_FLAGS = $(KRAM_FLAGS) -DUSE_SIMDLIB=1 -DUSE_SIMDLIBMODULE=0
+KRAM_FLAGS = $(KRAM_FLAGS) -DUSE_SIMDLIB=1 -DUSE_SIMDLIBMODULE=1
+KRAM_FLAGS = $(KRAM_FLAGS) -fmodules -fcxx-modules
 
 // KRAM_FLAGS += -DCOMPILE_EASTL=1
 // TODO: also set include path for eastl
diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index edc665fa..bc979f0f 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -2338,10 +2338,6 @@
 				GCC_OPTIMIZATION_LEVEL = 0;
 				GCC_PRECOMPILE_PREFIX_HEADER = YES;
 				GCC_PREFIX_HEADER = "$(PROJECT_DIR)/../libkram/kram/KramPrefix.h";
-				GCC_PREPROCESSOR_DEFINITIONS = (
-					"DEBUG=1",
-					"$(inherited)",
-				);
 				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
 				"GCC_WARN_64_TO_32_BIT_CONVERSION[arch=*64]" = NO;
 				GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
@@ -2362,8 +2358,6 @@
 				);
 				IPHONEOS_DEPLOYMENT_TARGET = 15.0;
 				MACOSX_DEPLOYMENT_TARGET = 13.0;
-				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
-				MTL_FAST_MATH = YES;
 				ONLY_ACTIVE_ARCH = YES;
 				OTHER_CFLAGS = (
 					"$(inherited)",
@@ -2376,7 +2370,6 @@
 					"-DCOMPILE_BASIS=0",
 					"-DCOMPILE_EASTL=0",
 				);
-				PRESERVE_DEAD_CODE_INITS_AND_TERMS = NO;
 				SDKROOT = macosx;
 				USER_HEADER_SEARCH_PATHS = "";
 				XROS_DEPLOYMENT_TARGET = 2.0;
@@ -2457,8 +2450,6 @@
 				);
 				IPHONEOS_DEPLOYMENT_TARGET = 15.0;
 				MACOSX_DEPLOYMENT_TARGET = 13.0;
-				MTL_ENABLE_DEBUG_INFO = NO;
-				MTL_FAST_MATH = YES;
 				OTHER_CFLAGS = (
 					"$(inherited)",
 					"-DCOMPILE_ASTCENC=1",
@@ -2470,7 +2461,6 @@
 					"-DCOMPILE_BASIS=0",
 					"-DCOMPILE_EASTL=0",
 				);
-				PRESERVE_DEAD_CODE_INITS_AND_TERMS = NO;
 				SDKROOT = macosx;
 				USER_HEADER_SEARCH_PATHS = "";
 				XROS_DEPLOYMENT_TARGET = 2.0;
diff --git a/build2/vectormath.xcodeproj/project.pbxproj b/build2/vectormath.xcodeproj/project.pbxproj
index 4da5b688..69597f13 100644
--- a/build2/vectormath.xcodeproj/project.pbxproj
+++ b/build2/vectormath.xcodeproj/project.pbxproj
@@ -414,10 +414,6 @@
 				GCC_DYNAMIC_NO_PIC = NO;
 				GCC_NO_COMMON_BLOCKS = YES;
 				GCC_OPTIMIZATION_LEVEL = 1;
-				GCC_PREPROCESSOR_DEFINITIONS = (
-					"DEBUG=1",
-					"$(inherited)",
-				);
 				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
 				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
 				GCC_WARN_UNDECLARED_SELECTOR = YES;
@@ -427,10 +423,8 @@
 				IPHONEOS_DEPLOYMENT_TARGET = 15.0;
 				LOCALIZATION_PREFERS_STRING_CATALOGS = YES;
 				MACOSX_DEPLOYMENT_TARGET = 13.0;
-				MODULEMAP_FILE = ../libkram/vectormath/;
+				MODULEMAP_FILE = ../libkram/vectormath/module.modulemap;
 				MODULE_VERIFIER_SUPPORTED_LANGUAGES = "objective-c c++";
-				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
-				MTL_FAST_MATH = YES;
 				ONLY_ACTIVE_ARCH = YES;
 				SDKROOT = macosx;
 				XROS_DEPLOYMENT_TARGET = 2.0;
@@ -490,10 +484,8 @@
 				IPHONEOS_DEPLOYMENT_TARGET = 15.0;
 				LOCALIZATION_PREFERS_STRING_CATALOGS = YES;
 				MACOSX_DEPLOYMENT_TARGET = 13.0;
-				MODULEMAP_FILE = ../libkram/vectormath/;
+				MODULEMAP_FILE = ../libkram/vectormath/module.modulemap;
 				MODULE_VERIFIER_SUPPORTED_LANGUAGES = "objective-c c++";
-				MTL_ENABLE_DEBUG_INFO = NO;
-				MTL_FAST_MATH = YES;
 				SDKROOT = macosx;
 				XROS_DEPLOYMENT_TARGET = 2.0;
 			};
diff --git a/kram-profile/README.md b/kram-profile/README.md
index ae718be3..6e7bfea3 100644
--- a/kram-profile/README.md
+++ b/kram-profile/README.md
@@ -112,17 +112,19 @@ Perfetto
 
 This is a web-based profiling and flame-graph tool.  It's fast on desktop, and continues to evolve.  Only has second and timecode granularity which isn't enough.  For example, performance profiling for games is in milliseconds.  The team is mostly focused on Chrome profiling which apparently is in seconds.  But the visuals are nice, and it now has hover tips with size/name, and also has an Issues list that the devs are responsive to.  Flutter is using this profiler, and kram-profile does too.
 
-Perfetto lives inside a sandbox due to the browser, so feeding files to Perfetto is it's weekness.  As a result kram-profile's file list is a nice complement, and can send the file data across via Javascript.  This is not unlike an Electron wrapper, but in much less memory.  
+Perfetto lives inside a sandbox due to the browser, so feeding files to Perfetto is one weakness.  As a result kram-profile's file list is a nice complement, and can send the file data across via Javascript.  This is not unlike an Electron wrapper, but in much less memory.  
 
 One limitation is that traces must be nested.  So timestamps cannot overlap.   Make sure to honor this, or traces will overlap verticall and become confused.  There is a C++ SDK to help with writing out traces, and that is a much more compact format than the json.  But more languages can write to the json format.  The Perfetto team is doing no further work on the json format.  And fields like "color" are unsupported, and Perfetto uses it's own coloration for blocks instead.  This coloration is nice and consistent and tied to name.
 
+Having lots of issues trying to reuse the Perfetto web page to load more than one profile.  The web app gets into a bad state, and then won't load any content afterwareds.
+
 Orbit
 ---------
 * https://orbitprofiler.com/
 
-This profiler uses dynamic instrumentation of code via dtrace and trampolines.  Note that Win, macOS can use this sort of system.  Apple blocks access to dtrace on iOS, but there are mentions of ktrace.  So you inject/remove traces dynamically by patching the dll sources directly.  This used to run on macOS, Win, and Linux.  Google Stadio adopted this project, and not it is limited to Linux support.
+This profiler uses dynamic instrumentation of code via dtrace and trampolines.  Note that Win, macOS can use this sort of system.  Apple blocks access to dtrace on iOS, but there are mentions of ktrace.  So you inject/remove traces dynamically by patching the dll sources directly.  This used to run on macOS, Win, and Linux.  Google Stadia adopted this project, and now it is limited to Linux support.
 
-This avoids the need to blindly instrument code or inject scopes into high-frequency routines.  But this patching may not be compatible by the security theater adopted by some mobile devices.
+This avoids the need to blindly instrument code or inject scopes into high-frequency routines.  But this patching is not be compatible by the security theater adopted by iOS devices.
 
 ClangBuildAnalyzer
 --------
diff --git a/kram-profile/Source/miniz.h b/kram-profile/Source/miniz.h
index 8867c3c6..3100819f 100644
--- a/kram-profile/Source/miniz.h
+++ b/kram-profile/Source/miniz.h
@@ -112,8 +112,11 @@
 */
 #pragma once
 
-#if 1
 // Alec add this for now (move to define on projects?)
+#if 1
+
+// Make sure large file calls are used.  Should be set across app.
+#define _LARGEFILE64_SOURCE 1
 
 // skip crc read checks to speed up reads
 #define MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS
@@ -124,6 +127,9 @@
 // handling file io separately
 #define MINIZ_NO_STDIO
 
+// These defines annoying conflict with everything (f.e. compress)
+#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES
+
 #endif
 
 
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index 0e982e30..3775f387 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -221,12 +221,12 @@
 #if USE_SIMDLIB
 
 // new vector math
-#if USE_SIMDLIBMODULE
+//#if USE_SIMDLIBMODULE
 // import this as a clang module now
-import vectormath
-#else
+//import vectormath234;
+//#else
 #include "vectormath234.h"
-#endif
+//#endif
 
 #else
 // old vector math, using simd/simd.h
diff --git a/libkram/miniz/miniz.h b/libkram/miniz/miniz.h
index c359003e..295a8b6a 100644
--- a/libkram/miniz/miniz.h
+++ b/libkram/miniz/miniz.h
@@ -112,7 +112,12 @@
 */
 #pragma once
 
+// Alec add this for now (move to define on projects?)
 #if 1
+
+// Make sure large file calls are used.  Should be set across app.
+#define _LARGEFILE64_SOURCE 1
+
 // Alec add this for now (move to define on projects?)
 
 // skip crc read checks to speed up reads
diff --git a/libkram/vectormath/README.md b/libkram/vectormath/README.md
index 97786634..9a4b7279 100644
--- a/libkram/vectormath/README.md
+++ b/libkram/vectormath/README.md
@@ -98,7 +98,7 @@ max vec size per register
 * TODO: Tests of the calls.
 * TODO: Add row vector support (vs. columns)
 * TODO: Consider adding ISPC optimized calls for log, exp, sin, cos, etc
-* TODO: Add debugger natvis and lldb formatters
+* DONE: Add debugger natvis and lldb formatters
 * SOME: Disassembly of the calls (MSVC?)
 
 ---
diff --git a/libkram/vectormath/module.modulemap b/libkram/vectormath/module.modulemap
index 2bfa82f5..8f7ad310 100644
--- a/libkram/vectormath/module.modulemap
+++ b/libkram/vectormath/module.modulemap
@@ -1,5 +1,6 @@
-module vectormath {
+module vectormath234 {
     // All headers are pulled in by this.
+    requires cplusplus20
     
     // This defaults to namespace simdk
     header "vectormath234.h"
diff --git a/libkram/zstd/zstd.cpp b/libkram/zstd/zstd.cpp
index 6f8f3137..cc07d3c8 100644
--- a/libkram/zstd/zstd.cpp
+++ b/libkram/zstd/zstd.cpp
@@ -666,9 +666,9 @@ void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
 *****************************************************************/
 #if  !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
 #  if defined(_AIX)
-#    include <inttypes.h>
+//#    include <inttypes.h>
 #  else
-#    include <stdint.h> /* intptr_t */
+//#    include <stdint.h> /* intptr_t */
 #  endif
   typedef   uint8_t BYTE;
   typedef  uint16_t U16;
@@ -3673,7 +3673,7 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr);
 
 #elif defined(ZSTD_MULTITHREAD)    /* posix assumed ; need a better detection method */
 /* ===   POSIX Systems   === */
-#  include <pthread.h>
+//#  include <pthread.h>
 
 #if DEBUGLEVEL < 1
 
@@ -3915,8 +3915,8 @@ extern "C" {
 #define ZSTD_H_235446
 
 /* ======   Dependency   ======*/
-#include <limits.h>   /* INT_MAX */
-#include <stddef.h>   /* size_t */
+//#include <limits.h>   /* INT_MAX */
+//#include <stddef.h>   /* size_t */
 
 
 /* =====   ZSTDLIB_API : control library symbols visibility   ===== */
diff --git a/libkram/zstd/zstd.h b/libkram/zstd/zstd.h
index 4651e6c4..e011166c 100644
--- a/libkram/zstd/zstd.h
+++ b/libkram/zstd/zstd.h
@@ -15,8 +15,8 @@ extern "C" {
 #define ZSTD_H_235446
 
 /* ======   Dependency   ======*/
-#include <limits.h>   /* INT_MAX */
-#include <stddef.h>   /* size_t */
+//#include <limits.h>   /* INT_MAX */
+//#include <stddef.h>   /* size_t */
 
 
 /* =====   ZSTDLIB_API : control library symbols visibility   ===== */
diff --git a/libkram/zstd/zstddeclib.cpp b/libkram/zstd/zstddeclib.cpp
index 0fa75230..04dee931 100644
--- a/libkram/zstd/zstddeclib.cpp
+++ b/libkram/zstd/zstddeclib.cpp
@@ -328,14 +328,15 @@ int g_debuglevel = DEBUGLEVEL;
 #ifndef MEM_H_MODULE
 #define MEM_H_MODULE
 
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
 /*-****************************************
 *  Dependencies
 ******************************************/
 #include <stddef.h>  /* size_t, ptrdiff_t */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
 /**** start inlining compiler.h ****/
 /*
  * Copyright (c) Yann Collet, Facebook, Inc.
@@ -653,9 +654,9 @@ void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
 *****************************************************************/
 #if  !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
 #  if defined(_AIX)
-#    include <inttypes.h>
+//#    include <inttypes.h>
 #  else
-#    include <stdint.h> /* intptr_t */
+//#    include <stdint.h> /* intptr_t */
 #  endif
   typedef   uint8_t BYTE;
   typedef  uint16_t U16;
@@ -1046,9 +1047,9 @@ MEM_STATIC void MEM_check(void) { DEBUG_STATIC_ASSERT((sizeof(size_t)==4) || (si
 #ifndef ERROR_H_MODULE
 #define ERROR_H_MODULE
 
-#if defined (__cplusplus)
-extern "C" {
-#endif
+//#if defined (__cplusplus)
+//extern "C" {
+//#endif
 
 
 /* ****************************************
@@ -1068,13 +1069,12 @@ extern "C" {
 #ifndef ZSTD_ERRORS_H_398273423
 #define ZSTD_ERRORS_H_398273423
 
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
 /*===== dependency =====*/
 #include <stddef.h>   /* size_t */
 
+#if defined (__cplusplus)
+extern "C" {
+#endif
 
 /* =====   ZSTDERRORLIB_API : control library symbols visibility   ===== */
 #ifndef ZSTDERRORLIB_VISIBILITY
@@ -1154,6 +1154,10 @@ ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code);   /**< Sa
 /**** skipping file: zstd_deps.h ****/
 
 
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
 /* ****************************************
 *  Compiler-specific
 ******************************************/
@@ -3618,6 +3622,11 @@ size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cS
  * in the COPYING file in the root directory of this source tree).
  * You may select, at your option, one of the above-listed licenses.
  */
+
+/* ======   Dependency   ======*/
+#include <limits.h>   /* INT_MAX */
+#include <stddef.h>   /* size_t */
+
 #if defined (__cplusplus)
 extern "C" {
 #endif
@@ -3625,9 +3634,6 @@ extern "C" {
 #ifndef ZSTD_H_235446
 #define ZSTD_H_235446
 
-/* ======   Dependency   ======*/
-#include <limits.h>   /* INT_MAX */
-#include <stddef.h>   /* size_t */
 
 
 /* =====   ZSTDLIB_API : control library symbols visibility   ===== */
@@ -7276,11 +7282,12 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src
 #ifndef ZSTD_TRACE_H
 #define ZSTD_TRACE_H
 
+#include <stddef.h>
+
 #if defined (__cplusplus)
 extern "C" {
 #endif
 
-#include <stddef.h>
 
 /* weak symbol support */
 #if !defined(ZSTD_HAVE_WEAK_SYMBOLS) && defined(__GNUC__) && \
@@ -9826,15 +9833,14 @@ extern "C" {
 #ifndef ZSTD_V01_H_28739879432
 #define ZSTD_V01_H_28739879432
 
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
 /* *************************************
 *  Includes
 ***************************************/
 #include <stddef.h>   /* size_t */
 
+#if defined (__cplusplus)
+extern "C" {
+#endif
 
 /* *************************************
 *  Simple one-step function
@@ -9924,15 +9930,16 @@ size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSi
 #ifndef ZSTD_V02_H_4174539423
 #define ZSTD_V02_H_4174539423
 
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
 /* *************************************
 *  Includes
 ***************************************/
 #include <stddef.h>   /* size_t */
 
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
 
 /* *************************************
 *  Simple one-step function
@@ -10021,15 +10028,14 @@ size_t ZSTDv02_decompressContinue(ZSTDv02_Dctx* dctx, void* dst, size_t maxDstSi
 #ifndef ZSTD_V03_H_298734209782
 #define ZSTD_V03_H_298734209782
 
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
 /* *************************************
 *  Includes
 ***************************************/
 #include <stddef.h>   /* size_t */
 
+#if defined (__cplusplus)
+extern "C" {
+#endif
 
 /* *************************************
 *  Simple one-step function
@@ -10118,15 +10124,14 @@ size_t ZSTDv03_decompressContinue(ZSTDv03_Dctx* dctx, void* dst, size_t maxDstSi
 #ifndef ZSTD_V04_H_91868324769238
 #define ZSTD_V04_H_91868324769238
 
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
 /* *************************************
 *  Includes
 ***************************************/
 #include <stddef.h>   /* size_t */
 
+#if defined (__cplusplus)
+extern "C" {
+#endif
 
 /* *************************************
 *  Simple one-step function
@@ -10264,14 +10269,15 @@ size_t ZBUFFv04_recommendedDOutSize(void);
 #ifndef ZSTDv05_H
 #define ZSTDv05_H
 
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
 /*-*************************************
 *  Dependencies
 ***************************************/
 #include <stddef.h>   /* size_t */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
 /**** skipping file: ../common/mem.h ****/
 
 
@@ -10430,12 +10436,13 @@ size_t ZBUFFv05_recommendedDOutSize(void);
 #ifndef ZSTDv06_H
 #define ZSTDv06_H
 
+/*======  Dependency  ======*/
+#include <stddef.h>   /* size_t */
+
 #if defined (__cplusplus)
 extern "C" {
 #endif
 
-/*======  Dependency  ======*/
-#include <stddef.h>   /* size_t */
 
 
 /*======  Export for Windows  ======*/
@@ -10606,12 +10613,13 @@ ZSTDLIBv06_API size_t ZBUFFv06_recommendedDOutSize(void);
 #ifndef ZSTDv07_H_235446
 #define ZSTDv07_H_235446
 
+/*======  Dependency  ======*/
+#include <stddef.h>   /* size_t */
+
 #if defined (__cplusplus)
 extern "C" {
 #endif
 
-/*======  Dependency  ======*/
-#include <stddef.h>   /* size_t */
 
 
 /*======  Export for Windows  ======*/

From 5925c8bfd127c0c7072e4f9ba68d4b9a0185ad8a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 9 Nov 2024 11:53:37 -0800
Subject: [PATCH 818/901] kram - simd - fix projections

---
 libkram/eastl/include/EASTL/chrono.h | 29 ++++++++++++++++++--
 libkram/kram/KramTimer.cpp           |  5 ++--
 libkram/vectormath/float234.cpp      | 41 ++++++++++++++++------------
 3 files changed, 53 insertions(+), 22 deletions(-)

diff --git a/libkram/eastl/include/EASTL/chrono.h b/libkram/eastl/include/EASTL/chrono.h
index ccfeb2f9..269a98fd 100644
--- a/libkram/eastl/include/EASTL/chrono.h
+++ b/libkram/eastl/include/EASTL/chrono.h
@@ -584,9 +584,34 @@ namespace chrono
 			EA_RESTORE_VC_WARNING()
 			return uint64_t(frequency * queryCounter());
         #elif defined EA_PLATFORM_SONY
-			return sceKernelGetProcessTimeCounter();
+            auto queryTimeInfo = []
+            {
+                mach_timebase_info_data_t info;
+                mach_timebase_info(&info);
+                return info;
+            };
+            
+            static auto timeInfo = queryTimeInfo();
+            uint64_t t = mach_absolute_time();
+            t *= timeInfo.numer;
+            t /= timeInfo.denom;
+            return t;
 		#elif defined(EA_PLATFORM_APPLE)
-		   return mach_absolute_time();
+            // took this from newer from newer drop of EASTL from 2022 release on 11/8/24
+            // Note that numer/denom will often be 1 and 1, so can skip math.
+            // but is 125/3 on some iOS and M1.
+            auto queryTimeInfo = []
+            {
+                mach_timebase_info_data_t info;
+                mach_timebase_info(&info);
+                return info;
+            };
+            
+            static auto timeInfo = queryTimeInfo();
+            uint64_t t = mach_absolute_time();
+            t *= timeInfo.numer;
+            t /= timeInfo.denom;
+            return t;
 		#elif defined(EA_PLATFORM_POSIX) // Posix means Linux, Unix, and Macintosh OSX, among others (including Linux-based mobile platforms).
 			#if (defined(CLOCK_REALTIME) || defined(CLOCK_MONOTONIC))
 				timespec ts;
diff --git a/libkram/kram/KramTimer.cpp b/libkram/kram/KramTimer.cpp
index f8e5f96e..a8dc5ebf 100644
--- a/libkram/kram/KramTimer.cpp
+++ b/libkram/kram/KramTimer.cpp
@@ -46,8 +46,9 @@ static double queryPeriod()
     mach_timebase_info(&timebase);
 
     // https://eclecticlight.co/2020/11/27/inside-m1-macs-time-and-logs/
-    // On macOS Intel, nanosecondsPerTick are 1ns (1/1)
-    // On macOS M1, nanosecondsPerTick are 41.67ns (num/denom = 125/3)
+    // On macOS Intel, nanosecondsPerTick are 1ns (1/1) = 1Ghz.
+    // On macOS M1, nanosecondsPerTick are 41.67ns (num/denom = 125/3) = 24Mhz
+    // On M2, A16/A17 Pro, and armv8.6-A should be (1/1) = 1Ghz.
     double period = (double)timebase.numer / timebase.denom;
     period *= 1e-9; // convert to seconds
 
diff --git a/libkram/vectormath/float234.cpp b/libkram/vectormath/float234.cpp
index 41d89b36..7ef28070 100644
--- a/libkram/vectormath/float234.cpp
+++ b/libkram/vectormath/float234.cpp
@@ -983,9 +983,7 @@ float4x4 perspective_rhcs(float4 tangents, float nearZ, float farZ)
     float xoff = (r + l) / dx;
     float yoff = (t + b) / dy;
     
-    // zs drops out since zs = inf / -inf = 1, 1-1 = 0
-    // z' = near / -z
-
+    // reverseZ, looking down -Z axis
     float m22;
     float m23;
   
@@ -994,16 +992,23 @@ float4x4 perspective_rhcs(float4 tangents, float nearZ, float farZ)
         m23 = nearZ;
     }
     else {
-        // TODO: handle farZ when not inf, check these
-        m22 = -nearZ / farZ;
-        m23 = nearZ;
+        float dz = farZ - nearZ;
+        m22 = nearZ / dz;
+        m23 = (nearZ * farZ) / dz;
+        
+        // Math looking down -Z axis
+        // ( z * nearZ + nearZ * farZ ) / dz
+        // nearZ * (farZ + z) / (farZ - nearZ)
+        // -nearZ -> nearZ * (farZ - nearZ ) / (farZ - nearZ) = nearZ
+        //   when dividing by w = -(-nearZ) then get 1
+        // -farZ  -> nearZ * (farZ - farZ) / (farZ - nearZ) = 0
     }
         
     float4x4 m(
-        (float4){ xs,       0,   0,  0 },
-        (float4){  0,      ys,   0,  0 },
-        (float4){  xoff, yoff, m22, -1 },
-        (float4){  0,       0, m23,  0 }
+        (float4){xs,       0,   0,  0},
+        (float4){ 0,      ys,   0,  0},
+        (float4){ xoff, yoff, m22, -1},
+        (float4){ 0,       0, m23,  0}
     );
     
     return m;
@@ -1026,16 +1031,16 @@ float4x4 orthographic_rhcs(float4 rect, float nearZ, float farZ)
     float m03 = (r+l) / dx;
     float m13 = (t+b) / dy;
     
-    float dz = -(farZ - nearZ);
-    float zs = 1.0f / dz;
+    // reverzeZ looking down -z axis
+    float dz = (farZ - nearZ);
     
-    float m22 = zs;
-    float m23 = zs * nearZ;
+    float m22 = 1.0 / dz;
+    float m23 = farZ / dz;
+    
+    // Math looking down -Z axis
+    // -near -> (-nearZ + farZ) / dz = 1
+    // -far  -> (-farZ + farZ) / dz = 0
     
-    // revZ, can't use infiniteZ with ortho view
-    m22 = -m22;
-    m23 = 1.0f - m23;
-
     float4x4 m(
         (float4){m00,   0,   0, 0},
         (float4){  0, m11,   0, 0},

From 8896f883b2e0de32fe6ec00ac52b246a82855db9 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 9 Nov 2024 14:28:44 -0800
Subject: [PATCH 819/901] kram - simd - sourcetree checkin in all files

---
 libkram/eastl/include/EASTL/chrono.h | 36 +++++++++++++++++-----------
 libkram/kram/KramTimer.cpp           |  4 +++-
 2 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/libkram/eastl/include/EASTL/chrono.h b/libkram/eastl/include/EASTL/chrono.h
index 269a98fd..981ae962 100644
--- a/libkram/eastl/include/EASTL/chrono.h
+++ b/libkram/eastl/include/EASTL/chrono.h
@@ -584,22 +584,25 @@ namespace chrono
 			EA_RESTORE_VC_WARNING()
 			return uint64_t(frequency * queryCounter());
         #elif defined EA_PLATFORM_SONY
-            auto queryTimeInfo = []
+            auto queryFrequency = []
             {
-                mach_timebase_info_data_t info;
-                mach_timebase_info(&info);
-                return info;
+                // nanoseconds/seconds / ticks/seconds
+                return double(1000000000.0L / (long double)sceKernelGetProcessTimeCounterFrequency());  // nanoseconds per tick
             };
-            
-            static auto timeInfo = queryTimeInfo();
-            uint64_t t = mach_absolute_time();
-            t *= timeInfo.numer;
-            t /= timeInfo.denom;
-            return t;
+
+            auto queryCounter = []
+            {
+                return sceKernelGetProcessTimeCounter();
+            };
+
+            EA_DISABLE_VC_WARNING(4640)  // warning C4640: construction of local static object is not thread-safe (VS2013)
+            static auto frequency = queryFrequency(); // cache cpu frequency on first call
+            EA_RESTORE_VC_WARNING()
+            return uint64_t(frequency * (double)queryCounter());
 		#elif defined(EA_PLATFORM_APPLE)
             // took this from newer from newer drop of EASTL from 2022 release on 11/8/24
             // Note that numer/denom will often be 1 and 1, so can skip math.
-            // but is 125/3 on some iOS and M1.
+            // but is 125/3 on some iOS and M1.  Added inNanos check.
             auto queryTimeInfo = []
             {
                 mach_timebase_info_data_t info;
@@ -607,10 +610,15 @@ namespace chrono
                 return info;
             };
             
-            static auto timeInfo = queryTimeInfo();
             uint64_t t = mach_absolute_time();
-            t *= timeInfo.numer;
-            t /= timeInfo.denom;
+            
+            static auto timeInfo = queryTimeInfo();
+            static const bool isNanos = timeInfo.numer == 1 && timeInfo.denom == 1;
+            if (!isNanos)
+            {
+                t *= timeInfo.numer;
+                t /= timeInfo.denom;
+            }
             return t;
 		#elif defined(EA_PLATFORM_POSIX) // Posix means Linux, Unix, and Macintosh OSX, among others (including Linux-based mobile platforms).
 			#if (defined(CLOCK_REALTIME) || defined(CLOCK_MONOTONIC))
diff --git a/libkram/kram/KramTimer.cpp b/libkram/kram/KramTimer.cpp
index a8dc5ebf..b54ba4cb 100644
--- a/libkram/kram/KramTimer.cpp
+++ b/libkram/kram/KramTimer.cpp
@@ -49,8 +49,10 @@ static double queryPeriod()
     // On macOS Intel, nanosecondsPerTick are 1ns (1/1) = 1Ghz.
     // On macOS M1, nanosecondsPerTick are 41.67ns (num/denom = 125/3) = 24Mhz
     // On M2, A16/A17 Pro, and armv8.6-A should be (1/1) = 1Ghz.
+    // So when 1/1, can avoid mul div below, seconds requires mul by 1e-9.
     double period = (double)timebase.numer / timebase.denom;
-    period *= 1e-9; // convert to seconds
+    
+    period *= 1e-9; // convert nanos to seconds
 
     return period;
 }

From dd039073c87f166c5a6b7780e64453e70ed7a221 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 9 Nov 2024 15:13:21 -0800
Subject: [PATCH 820/901] kram - test sourcetree 4.2.6 commit

---
 libkram/eastl/include/EASTL/chrono.h | 2 +-
 libkram/kram/KramTimer.cpp           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/libkram/eastl/include/EASTL/chrono.h b/libkram/eastl/include/EASTL/chrono.h
index 981ae962..1d59a9b1 100644
--- a/libkram/eastl/include/EASTL/chrono.h
+++ b/libkram/eastl/include/EASTL/chrono.h
@@ -602,7 +602,7 @@ namespace chrono
 		#elif defined(EA_PLATFORM_APPLE)
             // took this from newer from newer drop of EASTL from 2022 release on 11/8/24
             // Note that numer/denom will often be 1 and 1, so can skip math.
-            // but is 125/3 on some iOS and M1.  Added inNanos check.
+            // but is 125/3 on some iOS and M1.  Added inNanos check.  Test.
             auto queryTimeInfo = []
             {
                 mach_timebase_info_data_t info;
diff --git a/libkram/kram/KramTimer.cpp b/libkram/kram/KramTimer.cpp
index b54ba4cb..6992a1b8 100644
--- a/libkram/kram/KramTimer.cpp
+++ b/libkram/kram/KramTimer.cpp
@@ -54,7 +54,7 @@ static double queryPeriod()
     
     period *= 1e-9; // convert nanos to seconds
 
-    return period;
+    return period; // foo
 }
 
 static uint64_t queryCounter()

From 826a949f5b3fbe7e4c23a190c73c560f6cd4a4af Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 9 Nov 2024 16:18:53 -0800
Subject: [PATCH 821/901] kram - sourcetree 4.2.4 test

---
 libkram/kram/Kram.cpp      | 2 +-
 libkram/kram/KramTimer.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 9b02f33e..11229c85 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -45,7 +45,7 @@ void* __cdecl operator new[](size_t size, const char* name, int flags, unsigned
 void* operator new[](size_t size, size_t alignment, size_t alignmentOffset, const char* pName, int flags, unsigned debugFlags, const char* file, int line)
 {
     return new uint8_t[size]; // TODO: honor alignment
-}
+} // boo
 
 #endif
 
diff --git a/libkram/kram/KramTimer.cpp b/libkram/kram/KramTimer.cpp
index 6992a1b8..b54ba4cb 100644
--- a/libkram/kram/KramTimer.cpp
+++ b/libkram/kram/KramTimer.cpp
@@ -54,7 +54,7 @@ static double queryPeriod()
     
     period *= 1e-9; // convert nanos to seconds
 
-    return period; // foo
+    return period;
 }
 
 static uint64_t queryCounter()

From 9ab311882af3a07968770bc7a3b7d02e8f4b327c Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 9 Nov 2024 16:20:41 -0800
Subject: [PATCH 822/901] kram - test sourcetree 4.2.4

---
 libkram/kram/Kram.cpp      | 2 +-
 libkram/kram/KramTimer.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 11229c85..9b02f33e 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -45,7 +45,7 @@ void* __cdecl operator new[](size_t size, const char* name, int flags, unsigned
 void* operator new[](size_t size, size_t alignment, size_t alignmentOffset, const char* pName, int flags, unsigned debugFlags, const char* file, int line)
 {
     return new uint8_t[size]; // TODO: honor alignment
-} // boo
+}
 
 #endif
 
diff --git a/libkram/kram/KramTimer.cpp b/libkram/kram/KramTimer.cpp
index b54ba4cb..80c206f2 100644
--- a/libkram/kram/KramTimer.cpp
+++ b/libkram/kram/KramTimer.cpp
@@ -55,7 +55,7 @@ static double queryPeriod()
     period *= 1e-9; // convert nanos to seconds
 
     return period;
-}
+} // foo
 
 static uint64_t queryCounter()
 {

From fee6087c91ae4f2af5e79d0e8a1615b7a447413f Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 11 Nov 2024 00:28:26 -0800
Subject: [PATCH 823/901] kram - go back to old pre-comitt, new one was checkin
  all files

---
 scripts/pre-commit | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/scripts/pre-commit b/scripts/pre-commit
index 60762597..3014d5d5 100755
--- a/scripts/pre-commit
+++ b/scripts/pre-commit
@@ -1,19 +1,18 @@
 #!/bin/bash
 
-# Check if there are any staged changes
-if ! git diff --cached --name-only | grep -q '\.cpp\|\.c\|\.h\|\.hpp'; then
-    exit 0
-fi
+# derived from here
+# https://github.com/andrewseidl/githook-clang-format/blob/master/clang-format.hook
 
-# Run clang-format on staged changes
-git clang-format --style=file
+format_file() {
+  file="${1}"
 
-# Check if clang-format made any changes
-if ! git diff --cached --name-only | grep -q '\.cpp\|\.c\|\.h\|\.hpp'; then
-    echo "No changes made by clang-format."
-    exit 0
-fi
+  if [ -f $file ]; then
+    clang-format -style=file -i ${file}
+    git add ${file}
+  fi
+}
 
-# Add the formatted changes
-echo "Code formatted with clang-format."
-git add .
+# find staged files only
+for file in `git diff-index --cached --name-only HEAD | grep -iE '\.(c|cpp|h|hpp|)$' ` ; do
+  format_file "${file}"
+done

From 4f65d1e6af00c5980455fb8c2d17e3b4bfd39daf Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 11 Nov 2024 00:36:58 -0800
Subject: [PATCH 824/901] kram - test pre-commit

---
 libkram/kram/KramTimer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libkram/kram/KramTimer.cpp b/libkram/kram/KramTimer.cpp
index 80c206f2..0080a13b 100644
--- a/libkram/kram/KramTimer.cpp
+++ b/libkram/kram/KramTimer.cpp
@@ -55,7 +55,7 @@ static double queryPeriod()
     period *= 1e-9; // convert nanos to seconds
 
     return period;
-} // foo
+} // foobar
 
 static uint64_t queryCounter()
 {

From 56153e40057e44169ecdacee166e5dca23584f12 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 11 Nov 2024 00:37:30 -0800
Subject: [PATCH 825/901] kram - undo test

---
 libkram/kram/KramTimer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libkram/kram/KramTimer.cpp b/libkram/kram/KramTimer.cpp
index 0080a13b..b54ba4cb 100644
--- a/libkram/kram/KramTimer.cpp
+++ b/libkram/kram/KramTimer.cpp
@@ -55,7 +55,7 @@ static double queryPeriod()
     period *= 1e-9; // convert nanos to seconds
 
     return period;
-} // foobar
+}
 
 static uint64_t queryCounter()
 {

From 1ac83d8379bb4a312eae5dffd40db4e6bdd2f8b8 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 12 Dec 2024 22:31:28 -0800
Subject: [PATCH 826/901] kram - switch to adjoint for normal

This simplifies having to deal with non-uniform scale, etc.  And it's correct over invT.  Can remove invScale2, and only pass 1 tfm.
Small change to xcconfig to get it to work.
---
 build2/kram.xcconfig            |  4 ++--
 kramv/KramRenderer.mm           |  2 +-
 kramv/KramViewerBase.cpp        |  4 ++--
 kramv/KramViewerBase.h          |  2 +-
 kramv/Shaders/KramShaders.h     |  2 +-
 kramv/Shaders/KramShaders.metal | 22 +++++++++++++---------
 libkram/vectormath/double234.h  |  3 ---
 libkram/vectormath/float234.h   |  8 ++++++--
 8 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/build2/kram.xcconfig b/build2/kram.xcconfig
index 9f1ee9af..ca8e6425 100644
--- a/build2/kram.xcconfig
+++ b/build2/kram.xcconfig
@@ -8,10 +8,10 @@
 // Also turn on -ftime-trace to review build times in kram-profile.
 
 KRAM_FLAGS_X64 =
-KRAM_FLAGS_X64[arch=x86_64] = -mf16c -mfma
+KRAM_FLAGS_X64[sdk=*][arch=x86_64] = -mf16c -mfma
 
 KRAM_FLAGS_RELEASE =
-KRAM_FLAGS_RELEASE[config=Release] = -DNDEBUG=1
+KRAM_FLAGS_RELEASE[sdk=*][config=Release] = -DNDEBUG=1
 
 KRAM_FLAGS = -ftime-trace
 KRAM_FLAGS = $(KRAM_FLAGS) -DUSE_SIMDLIB=1 -DUSE_SIMDLIBMODULE=1
diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 68e80edb..6f5d757f 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -1492,7 +1492,7 @@ - (void)_updateGameState
 
     // This is per object
     uniforms.modelMatrix = _data->_modelMatrix;
-    uniforms.modelMatrixInvScale2 = _data->_modelMatrixInvScale2;
+   // uniforms.modelMatrixInvScale2 = _data->_modelMatrixInvScale2;
 
     //_rotation += .01;
 }
diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index 47daab45..057447c2 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -3052,8 +3052,8 @@ void Data::updateTransforms()
         inverse(_viewMatrix).columns[3].xyz; // this is all ortho
 
     // obj specific
-    _modelMatrixInvScale2 = inverseScaleSquared(_modelMatrix);
-    _showSettings->isInverted = _modelMatrixInvScale2.w < 0.0f;
+    float4 modelMatrixInvScale2 = inverseScaleSquared(_modelMatrix);
+    _showSettings->isInverted = modelMatrixInvScale2.w < 0.0f;
 }
 
 float4x4 Data::computeImageTransform(float panX, float panY, float zoom)
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index 6af1f761..92f93955 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -521,7 +521,7 @@ struct Data {
 
     // object specific
     float4x4 _modelMatrix;
-    float4 _modelMatrixInvScale2;
+    //float4 _modelMatrixInvScale2;
     float4x4 _modelMatrix2D;
     float4x4 _modelMatrix3D;
 
diff --git a/kramv/Shaders/KramShaders.h b/kramv/Shaders/KramShaders.h
index e269a36f..dbd63e60 100644
--- a/kramv/Shaders/KramShaders.h
+++ b/kramv/Shaders/KramShaders.h
@@ -114,7 +114,7 @@ typedef NS_ENUM(int32_t, ShaderLightingMode) {
 struct Uniforms {
     SIMD_NAMESPACE::float4x4 projectionViewMatrix;
     SIMD_NAMESPACE::float4x4 modelMatrix;
-    SIMD_NAMESPACE::float4 modelMatrixInvScale2;  // to supply inverse, w is determinant
+    //SIMD_NAMESPACE::float4 modelMatrixInvScale2;  // to supply inverse, w is determinant
     SIMD_NAMESPACE::float3 cameraPosition;        // world-space
     float uvPreview;
     float uvToShapeRatio;
diff --git a/kramv/Shaders/KramShaders.metal b/kramv/Shaders/KramShaders.metal
index bc435a44..5671a781 100644
--- a/kramv/Shaders/KramShaders.metal
+++ b/kramv/Shaders/KramShaders.metal
@@ -519,24 +519,28 @@ inline float3x3 toFloat3x3(float4x4 m)
     return float3x3(m[0].xyz, m[1].xyz, m[2].xyz);
 }
 
+// This works even with scale of 0 and is correct over using invT.
+// The normal will be normalize anyways.  Also saves sending down another tfm.
+inline float3x3 adjoint(float3x3 m)
+{
+    return float3x3(cross(m[1], m[2]),
+                    cross(m[2], m[0]),
+                    cross(m[0], m[1]));
+}
+
+
 // this is for vertex shader if tangent supplied
 void transformBasis(thread float3& normal, thread float3& tangent,
-                    float4x4 modelToWorldTfm, float3 invScale2, bool useTangent)
+                    float4x4 modelToWorldTfm, bool useTangent)
 {
     
     float3x3 m = toFloat3x3(modelToWorldTfm);
     
-    // note this is RinvT * n = (Rt)t = R, this is for simple inverse, inv scale handled below
-    // but uniform scale already handled by normalize
-    normal = m * normal;
-    normal *= invScale2;
+    normal = adjoint(m) * normal;
     normal = normalize(normal);
    
-    // question here of whether tangent is transformed by m or mInvT
-    // most apps assume m, but after averaging it can be just as off the surface as the normal
     if (useTangent) {
         tangent = m * tangent;
-        tangent *= invScale2;
         tangent = normalize(tangent);
     }
     
@@ -622,7 +626,7 @@ ColorInOut DrawImageFunc(
     
     if (needsWorldBasis) {
         float3 t = tangent.xyz;
-        transformBasis(normal, t, uniforms.modelMatrix, uniforms.modelMatrixInvScale2.xyz, uniforms.useTangent);
+        transformBasis(normal, t, uniforms.modelMatrix, uniforms.useTangent);
         tangent.xyz = t;
         
         out.normal = toHalf(normal);
diff --git a/libkram/vectormath/double234.h b/libkram/vectormath/double234.h
index ffde9200..4ac2432c 100644
--- a/libkram/vectormath/double234.h
+++ b/libkram/vectormath/double234.h
@@ -106,9 +106,6 @@ SIMD_CALL double4 zeroext(double3 x)
 
 #if SIMD_NEON
 
-// TODO: expose double2 ops on Neon.
-// think I have to, so that 4 can call 2x2 with hi/lo
-
 SIMD_CALL double reduce_min(double2 x)
 {
     return vminvq_f64(x);
diff --git a/libkram/vectormath/float234.h b/libkram/vectormath/float234.h
index b246ad40..62e9e4f7 100644
--- a/libkram/vectormath/float234.h
+++ b/libkram/vectormath/float234.h
@@ -929,9 +929,13 @@ SIMD_CALL quatf operator-(quatf q)
 SIMD_CALL float3 operator*(quatf q, float3 v)
 {
     // see https://fgiesen.wordpress.com/2019/02/09/rotating-a-single-vector-using-a-quaternion/
+    //float4 qv = q.v;
+    //float3 t = 2.0f * cross(qv.xyz, v);
+    //return v + qv.w * t + cross(qv.xyz, t);
+    
+    // simplified form of above
     float4 qv = q.v;
-    float3 t = 2.0f * cross(qv.xyz, v);
-    return v + qv.w * t + cross(qv.xyz, t);
+    return v + 2.0 * cross(qv.xyz, cross(qv.xyz, v) + qv.w * v);
 }
 
 SIMD_CALL bool equal(quatf x, quatf y)

From b304ededf2b4736f21a0722fa0d1d26368822680 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 12 Dec 2024 22:32:53 -0800
Subject: [PATCH 827/901] kram - remove fflush use from ext code

---
 libkram/simdjson/simdjson.h |  4 ++--
 libkram/zstd/zstd.cpp       | 27 +++++++++++++++------------
 2 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/libkram/simdjson/simdjson.h b/libkram/simdjson/simdjson.h
index 4ad510d3..9588ff55 100644
--- a/libkram/simdjson/simdjson.h
+++ b/libkram/simdjson/simdjson.h
@@ -27249,7 +27249,7 @@ inline void log_headers() noexcept {
     printf("|%.*s", 5+2, DASHES);
     printf("|--------");
     printf("|\n");
-    fflush(stdout);
+    // fflush(stdout);
   }
 }
 
@@ -27287,7 +27287,7 @@ inline void log_line(const json_iterator &iter, token_position index, depth_t de
     printf("| %5i ", depth);
     printf("| %.*s ", int(detail.size()), detail.data());
     printf("|\n");
-    fflush(stdout);
+    // fflush(stdout);
   }
 }
 
diff --git a/libkram/zstd/zstd.cpp b/libkram/zstd/zstd.cpp
index cc07d3c8..a1f2e042 100644
--- a/libkram/zstd/zstd.cpp
+++ b/libkram/zstd/zstd.cpp
@@ -35865,11 +35865,12 @@ void COVER_dictSelectionFree(COVER_dictSelection_t selection);
 static int g_displayLevel = 2;
 #endif
 #undef  DISPLAY
-#define DISPLAY(...)                                                           \
-  {                                                                            \
-    fprintf(stderr, __VA_ARGS__);                                              \
-    fflush(stderr);                                                            \
-  }
+#define DISPLAY(fmt, ...) KLOGI("zstd", fmt, ##__VA_ARGS__)
+//#define DISPLAY(...)                                                           \
+//  {                                                                            \
+//    fprintf(stderr, __VA_ARGS__);                                              \
+//    fflush(stderr);                                                            \
+//  }
 #undef  LOCALDISPLAYLEVEL
 #define LOCALDISPLAYLEVEL(displayLevel, l, ...)                                \
   if (displayLevel >= l) {                                                     \
@@ -39095,11 +39096,12 @@ divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char *
 static int g_displayLevel = 2;
 #endif
 #undef  DISPLAY
-#define DISPLAY(...)                                                           \
-  {                                                                            \
-    fprintf(stderr, __VA_ARGS__);                                              \
-    fflush(stderr);                                                            \
-  }
+#define DISPLAY(fmt, ...) KLOGI("zstd", fmt, ##__VA_ARGS__)
+//#define DISPLAY(...)                                                           \
+//  {                                                                            \
+//    fprintf(stderr, __VA_ARGS__);                                              \
+//    fflush(stderr);                                                            \
+//  }
 #undef  LOCALDISPLAYLEVEL
 #define LOCALDISPLAYLEVEL(displayLevel, l, ...)                                \
   if (displayLevel >= l) {                                                     \
@@ -39883,7 +39885,8 @@ static const U32 g_selectivity_default = 9;
 *  Console display
 ***************************************/
 #undef  DISPLAY
-#define DISPLAY(...)         { fprintf(stderr, __VA_ARGS__); fflush( stderr ); }
+#define DISPLAY(fmt, ...) KLOGI("zstd", fmt, ##__VA_ARGS__)
+// #define DISPLAY(...)         { fprintf(stderr, __VA_ARGS__); fflush( stderr ); }
 #undef  DISPLAYLEVEL
 #define DISPLAYLEVEL(l, ...) if (notificationLevel>=l) { DISPLAY(__VA_ARGS__); }    /* 0 : no display;   1: errors;   2: default;  3: details;  4: debug */
 
@@ -40348,7 +40351,7 @@ static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
 #   define DISPLAYUPDATE(l, ...) if (notificationLevel>=l) { \
             if (ZDICT_clockSpan(displayClock) > refreshRate)  \
             { displayClock = clock(); DISPLAY(__VA_ARGS__); \
-            if (notificationLevel>=4) fflush(stderr); } }
+            } }
 
     /* init */
     DISPLAYLEVEL(2, "\r%70s\r", "");   /* clean display line */

From f04687073916f3db963a9d215ef2b5fa106905bd Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 2 Feb 2025 22:11:41 -0800
Subject: [PATCH 828/901] kram - switch kram/vectormath lib projects to
 single-target, multiplatform builds

This requires Xcode14, but means that a single target builds code for all of the platforms.  Don't have to maintain the targets, and platforms easy to add.
Also switched the architecture to arm64 from universal.
---
 build2/kram.xcodeproj/project.pbxproj       | 948 +-------------------
 build2/vectormath.xcodeproj/project.pbxproj | 228 +----
 2 files changed, 26 insertions(+), 1150 deletions(-)

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index bc979f0f..85b3de8d 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -108,492 +108,100 @@
 		706EF01B26D15985001C950E /* lodepng.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE5426D1583F001C950E /* lodepng.h */; };
 		706EF01C26D15985001C950E /* tmpfileplus.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE5926D1583F001C950E /* tmpfileplus.h */; };
 		706EF12B26D159F9001C950E /* libate.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF12A26D159F9001C950E /* libate.tbd */; };
-		706EF14B26D166C5001C950E /* EtcErrorMetric.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDAB26D1583E001C950E /* EtcErrorMetric.h */; };
-		706EF14C26D166C5001C950E /* EtcColor.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDAD26D1583E001C950E /* EtcColor.h */; };
-		706EF14D26D166C5001C950E /* EtcDifferentialTrys.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDAE26D1583E001C950E /* EtcDifferentialTrys.h */; };
-		706EF14E26D166C5001C950E /* EtcBlock4x4Encoding_RGB8.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDB026D1583E001C950E /* EtcBlock4x4Encoding_RGB8.h */; };
-		706EF14F26D166C5001C950E /* EtcConfig.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDB426D1583E001C950E /* EtcConfig.h */; };
-		706EF15026D166C5001C950E /* EtcBlock4x4Encoding_R11.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDB526D1583E001C950E /* EtcBlock4x4Encoding_R11.h */; };
-		706EF15126D166C5001C950E /* EtcBlock4x4Encoding_RG11.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDB726D1583E001C950E /* EtcBlock4x4Encoding_RG11.h */; };
-		706EF15226D166C5001C950E /* EtcMath.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDB926D1583E001C950E /* EtcMath.h */; };
-		706EF15326D166C5001C950E /* EtcIndividualTrys.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDBA26D1583E001C950E /* EtcIndividualTrys.h */; };
-		706EF15426D166C5001C950E /* EtcBlock4x4EncodingBits.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDBD26D1583E001C950E /* EtcBlock4x4EncodingBits.h */; };
-		706EF15526D166C5001C950E /* EtcBlock4x4Encoding_RGB8A1.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDBE26D1583E001C950E /* EtcBlock4x4Encoding_RGB8A1.h */; };
-		706EF15626D166C5001C950E /* EtcBlock4x4.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC026D1583E001C950E /* EtcBlock4x4.h */; };
-		706EF15726D166C5001C950E /* Etc.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC126D1583E001C950E /* Etc.h */; };
-		706EF15826D166C5001C950E /* EtcImage.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC226D1583E001C950E /* EtcImage.h */; };
-		706EF15926D166C5001C950E /* EtcBlock4x4Encoding_ETC1.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC326D1583E001C950E /* EtcBlock4x4Encoding_ETC1.h */; };
-		706EF15A26D166C5001C950E /* EtcBlock4x4Encoding_RGBA8.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC426D1583E001C950E /* EtcBlock4x4Encoding_RGBA8.h */; };
-		706EF15B26D166C5001C950E /* EtcColorFloatRGBA.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC726D1583E001C950E /* EtcColorFloatRGBA.h */; };
-		706EF15C26D166C5001C950E /* EtcBlock4x4Encoding.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC826D1583E001C950E /* EtcBlock4x4Encoding.h */; };
-		706EF16C26D166C5001C950E /* ateencoder.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFA26D1583E001C950E /* ateencoder.h */; };
-		706EF16D26D166C5001C950E /* basisu_transcoder.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFC26D1583E001C950E /* basisu_transcoder.h */; };
-		706EF16E26D166C5001C950E /* basisu_containers.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFD26D1583E001C950E /* basisu_containers.h */; };
-		706EF16F26D166C5001C950E /* basisu_containers_impl.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFF26D1583E001C950E /* basisu_containers_impl.h */; };
-		706EF17026D166C5001C950E /* basisu_transcoder_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE0226D1583F001C950E /* basisu_transcoder_internal.h */; };
-		706EF17126D166C5001C950E /* basisu_global_selector_cb.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE0326D1583F001C950E /* basisu_global_selector_cb.h */; };
-		706EF17226D166C5001C950E /* basisu_transcoder_uastc.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE0526D1583F001C950E /* basisu_transcoder_uastc.h */; };
-		706EF17326D166C5001C950E /* basisu_global_selector_palette.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE0626D1583F001C950E /* basisu_global_selector_palette.h */; };
-		706EF17426D166C5001C950E /* basisu.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE0C26D1583F001C950E /* basisu.h */; };
-		706EF17526D166C5001C950E /* basisu_file_headers.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE0E26D1583F001C950E /* basisu_file_headers.h */; };
-		706EF17626D166C5001C950E /* miniz.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE1226D1583F001C950E /* miniz.h */; };
-		706EF17726D166C5001C950E /* hedistance.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE1526D1583F001C950E /* hedistance.h */; };
-		706EF17826D166C5001C950E /* stb_rect_pack.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE1726D1583F001C950E /* stb_rect_pack.h */; };
-		706EF17926D166C5001C950E /* KramZipHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE1926D1583F001C950E /* KramZipHelper.h */; };
-		706EF17A26D166C5001C950E /* KramSDFMipper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2026D1583F001C950E /* KramSDFMipper.h */; };
-		706EF17C26D166C5001C950E /* KramConfig.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2326D1583F001C950E /* KramConfig.h */; };
-		706EF17D26D166C5001C950E /* KramLog.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2426D1583F001C950E /* KramLog.h */; };
-		706EF17E26D166C5001C950E /* KramLib.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2726D1583F001C950E /* KramLib.h */; };
-		706EF17F26D166C5001C950E /* KramVersion.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2926D1583F001C950E /* KramVersion.h */; };
-		706EF18026D166C5001C950E /* KramImage.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2A26D1583F001C950E /* KramImage.h */; };
-		706EF18126D166C5001C950E /* win_mmap.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2D26D1583F001C950E /* win_mmap.h */; };
-		706EF18226D166C5001C950E /* Kram.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2E26D1583F001C950E /* Kram.h */; };
-		706EF18326D166C5001C950E /* KTXImage.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3026D1583F001C950E /* KTXImage.h */; };
-		706EF18426D166C5001C950E /* KramImageInfo.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3126D1583F001C950E /* KramImageInfo.h */; };
-		706EF18526D166C5001C950E /* KramTimer.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3226D1583F001C950E /* KramTimer.h */; };
-		706EF18626D166C5001C950E /* KramMmapHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3326D1583F001C950E /* KramMmapHelper.h */; };
-		706EF18826D166C5001C950E /* KramFileHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3626D1583F001C950E /* KramFileHelper.h */; };
-		706EF18926D166C5001C950E /* KramMipper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3726D1583F001C950E /* KramMipper.h */; };
-		706EF18A26D166C5001C950E /* TaskSystem.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3826D1583F001C950E /* TaskSystem.h */; };
-		706EF18B26D166C5001C950E /* squish.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3A26D1583F001C950E /* squish.h */; };
-		706EF18C26D166C5001C950E /* clusterfit.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3B26D1583F001C950E /* clusterfit.h */; };
-		706EF18D26D166C5001C950E /* colourfit.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3C26D1583F001C950E /* colourfit.h */; };
-		706EF18E26D166C5001C950E /* alpha.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3F26D1583F001C950E /* alpha.h */; };
-		706EF18F26D166C5001C950E /* singlecolourfit.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE4126D1583F001C950E /* singlecolourfit.h */; };
-		706EF19026D166C5001C950E /* maths.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE4526D1583F001C950E /* maths.h */; };
-		706EF19126D166C5001C950E /* colourset.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE4826D1583F001C950E /* colourset.h */; };
-		706EF19226D166C5001C950E /* colourblock.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE4A26D1583F001C950E /* colourblock.h */; };
-		706EF19326D166C5001C950E /* rangefit.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE4B26D1583F001C950E /* rangefit.h */; };
-		706EF19426D166C5001C950E /* zstd.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE5226D1583F001C950E /* zstd.h */; };
-		706EF19526D166C5001C950E /* lodepng.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE5426D1583F001C950E /* lodepng.h */; };
-		706EF19626D166C5001C950E /* tmpfileplus.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE5926D1583F001C950E /* tmpfileplus.h */; };
-		706EF19826D166C5001C950E /* EtcBlock4x4Encoding_RGB8.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDAA26D1583E001C950E /* EtcBlock4x4Encoding_RGB8.cpp */; };
-		706EF19926D166C5001C950E /* EtcImage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDAC26D1583E001C950E /* EtcImage.cpp */; };
-		706EF19A26D166C5001C950E /* EtcDifferentialTrys.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDAF26D1583E001C950E /* EtcDifferentialTrys.cpp */; };
-		706EF19B26D166C5001C950E /* EtcMath.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDB126D1583E001C950E /* EtcMath.cpp */; };
-		706EF19C26D166C5001C950E /* EtcBlock4x4Encoding_RGBA8.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDB226D1583E001C950E /* EtcBlock4x4Encoding_RGBA8.cpp */; };
-		706EF19D26D166C5001C950E /* EtcBlock4x4Encoding_RG11.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDB326D1583E001C950E /* EtcBlock4x4Encoding_RG11.cpp */; };
-		706EF19E26D166C5001C950E /* EtcBlock4x4Encoding_RGB8A1.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDB626D1583E001C950E /* EtcBlock4x4Encoding_RGB8A1.cpp */; };
-		706EF19F26D166C5001C950E /* EtcIndividualTrys.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDBB26D1583E001C950E /* EtcIndividualTrys.cpp */; };
-		706EF1A026D166C5001C950E /* EtcBlock4x4Encoding_R11.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDBC26D1583E001C950E /* EtcBlock4x4Encoding_R11.cpp */; };
-		706EF1A126D166C5001C950E /* EtcBlock4x4Encoding_ETC1.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDBF26D1583E001C950E /* EtcBlock4x4Encoding_ETC1.cpp */; };
-		706EF1A226D166C5001C950E /* EtcBlock4x4Encoding.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDC526D1583E001C950E /* EtcBlock4x4Encoding.cpp */; };
-		706EF1A326D166C5001C950E /* EtcBlock4x4.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDC626D1583E001C950E /* EtcBlock4x4.cpp */; };
-		706EF1BF26D166C5001C950E /* basisu_transcoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE0426D1583F001C950E /* basisu_transcoder.cpp */; };
-		706EF1C026D166C5001C950E /* miniz.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1126D1583F001C950E /* miniz.cpp */; };
-		706EF1C126D166C5001C950E /* hedistance.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1426D1583F001C950E /* hedistance.cpp */; };
-		706EF1C226D166C5001C950E /* KramTimer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1A26D1583F001C950E /* KramTimer.cpp */; };
-		706EF1C326D166C5001C950E /* KTXImage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1B26D1583F001C950E /* KTXImage.cpp */; };
-		706EF1C426D166C5001C950E /* KramMipper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1C26D1583F001C950E /* KramMipper.cpp */; };
-		706EF1C526D166C5001C950E /* KramZipHelper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1E26D1583F001C950E /* KramZipHelper.cpp */; };
-		706EF1C626D166C5001C950E /* TaskSystem.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1F26D1583F001C950E /* TaskSystem.cpp */; };
-		706EF1C726D166C5001C950E /* KramFileHelper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2126D1583F001C950E /* KramFileHelper.cpp */; };
-		706EF1C826D166C5001C950E /* KramImageInfo.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2526D1583F001C950E /* KramImageInfo.cpp */; };
-		706EF1C926D166C5001C950E /* KramImage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2626D1583F001C950E /* KramImage.cpp */; };
-		706EF1CA26D166C5001C950E /* KramLog.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2826D1583F001C950E /* KramLog.cpp */; };
-		706EF1CB26D166C5001C950E /* KramSDFMipper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2B26D1583F001C950E /* KramSDFMipper.cpp */; };
-		706EF1CC26D166C5001C950E /* KramMmapHelper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2C26D1583F001C950E /* KramMmapHelper.cpp */; };
-		706EF1CE26D166C5001C950E /* Kram.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE3526D1583F001C950E /* Kram.cpp */; };
-		706EF1CF26D166C5001C950E /* squish.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE3D26D1583F001C950E /* squish.cpp */; };
-		706EF1D026D166C5001C950E /* colourset.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE3E26D1583F001C950E /* colourset.cpp */; };
-		706EF1D126D166C5001C950E /* clusterfit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE4226D1583F001C950E /* clusterfit.cpp */; };
-		706EF1D226D166C5001C950E /* rangefit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE4426D1583F001C950E /* rangefit.cpp */; };
-		706EF1D326D166C5001C950E /* alpha.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE4626D1583F001C950E /* alpha.cpp */; };
-		706EF1D426D166C5001C950E /* colourblock.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE4726D1583F001C950E /* colourblock.cpp */; };
-		706EF1D526D166C5001C950E /* colourfit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE4926D1583F001C950E /* colourfit.cpp */; };
-		706EF1D626D166C5001C950E /* maths.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE4D26D1583F001C950E /* maths.cpp */; };
-		706EF1D726D166C5001C950E /* singlecolourfit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE4E26D1583F001C950E /* singlecolourfit.cpp */; };
-		706EF1D826D166C5001C950E /* zstd.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE5026D1583F001C950E /* zstd.cpp */; };
-		706EF1D926D166C5001C950E /* zstddeclib.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE5126D1583F001C950E /* zstddeclib.cpp */; };
-		706EF1DA26D166C5001C950E /* lodepng.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE5626D1583F001C950E /* lodepng.cpp */; };
-		706EF1DB26D166C5001C950E /* tmpfileplus.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE5826D1583F001C950E /* tmpfileplus.cpp */; };
-		706EF1DD26D166C5001C950E /* libate.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF12A26D159F9001C950E /* libate.tbd */; };
 		706EF26426D17DCC001C950E /* ateencoder.mm in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDF926D1583E001C950E /* ateencoder.mm */; };
-		706EFC2426D1C39B001C950E /* ateencoder.mm in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDF926D1583E001C950E /* ateencoder.mm */; };
 		706EFF7326D34740001C950E /* thread_support.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5726D3473F001C950E /* thread_support.cpp */; };
-		706EFF7426D34740001C950E /* thread_support.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5726D3473F001C950E /* thread_support.cpp */; };
 		706EFF7526D34740001C950E /* assert.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5826D3473F001C950E /* assert.cpp */; };
-		706EFF7626D34740001C950E /* assert.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5826D3473F001C950E /* assert.cpp */; };
 		706EFF7726D34740001C950E /* string.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5926D3473F001C950E /* string.cpp */; };
-		706EFF7826D34740001C950E /* string.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5926D3473F001C950E /* string.cpp */; };
-		706EFF7A26D34740001C950E /* allocator_eastl.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5A26D3473F001C950E /* allocator_eastl.cpp */; };
 		706EFF7B26D34740001C950E /* numeric_limits.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5B26D3473F001C950E /* numeric_limits.cpp */; };
-		706EFF7C26D34740001C950E /* numeric_limits.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5B26D3473F001C950E /* numeric_limits.cpp */; };
 		706EFF7F26D34740001C950E /* intrusive_list.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5D26D3473F001C950E /* intrusive_list.cpp */; };
-		706EFF8026D34740001C950E /* intrusive_list.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5D26D3473F001C950E /* intrusive_list.cpp */; };
 		706EFF8126D34740001C950E /* hashtable.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5E26D3473F001C950E /* hashtable.cpp */; };
-		706EFF8226D34740001C950E /* hashtable.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5E26D3473F001C950E /* hashtable.cpp */; };
 		706EFF8326D34740001C950E /* red_black_tree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5F26D3473F001C950E /* red_black_tree.cpp */; };
-		706EFF8426D34740001C950E /* red_black_tree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5F26D3473F001C950E /* red_black_tree.cpp */; };
 		706EFF8526D34740001C950E /* fixed_pool.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD6026D3473F001C950E /* fixed_pool.cpp */; };
-		706EFF8626D34740001C950E /* fixed_pool.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD6026D3473F001C950E /* fixed_pool.cpp */; };
 		707789D52881BA81008A51BC /* bc7enc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789C62881BA81008A51BC /* bc7enc.cpp */; };
-		707789D62881BA81008A51BC /* bc7enc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789C62881BA81008A51BC /* bc7enc.cpp */; };
 		707789D72881BA81008A51BC /* bc7enc.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789C72881BA81008A51BC /* bc7enc.h */; };
-		707789D82881BA81008A51BC /* bc7enc.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789C72881BA81008A51BC /* bc7enc.h */; };
 		707789D92881BA81008A51BC /* bc7decomp.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789C82881BA81008A51BC /* bc7decomp.h */; };
-		707789DA2881BA81008A51BC /* bc7decomp.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789C82881BA81008A51BC /* bc7decomp.h */; };
 		707789DB2881BA81008A51BC /* ert.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789C92881BA81008A51BC /* ert.h */; };
-		707789DC2881BA81008A51BC /* ert.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789C92881BA81008A51BC /* ert.h */; };
 		707789DD2881BA81008A51BC /* rgbcx.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789CA2881BA81008A51BC /* rgbcx.cpp */; };
-		707789DE2881BA81008A51BC /* rgbcx.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789CA2881BA81008A51BC /* rgbcx.cpp */; };
 		707789DF2881BA81008A51BC /* rgbcx_table4.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789CB2881BA81008A51BC /* rgbcx_table4.h */; };
-		707789E02881BA81008A51BC /* rgbcx_table4.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789CB2881BA81008A51BC /* rgbcx_table4.h */; };
 		707789E12881BA81008A51BC /* utils.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789CC2881BA81008A51BC /* utils.cpp */; };
-		707789E22881BA81008A51BC /* utils.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789CC2881BA81008A51BC /* utils.cpp */; };
 		707789E32881BA81008A51BC /* rgbcx_table4_small.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789CD2881BA81008A51BC /* rgbcx_table4_small.h */; };
-		707789E42881BA81008A51BC /* rgbcx_table4_small.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789CD2881BA81008A51BC /* rgbcx_table4_small.h */; };
 		707789E52881BA81008A51BC /* ert.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789CE2881BA81008A51BC /* ert.cpp */; };
-		707789E62881BA81008A51BC /* ert.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789CE2881BA81008A51BC /* ert.cpp */; };
 		707789E72881BA81008A51BC /* rgbcx.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789CF2881BA81008A51BC /* rgbcx.h */; };
-		707789E82881BA81008A51BC /* rgbcx.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789CF2881BA81008A51BC /* rgbcx.h */; };
 		707789E92881BA81008A51BC /* bc7decomp.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789D02881BA81008A51BC /* bc7decomp.cpp */; };
-		707789EA2881BA81008A51BC /* bc7decomp.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789D02881BA81008A51BC /* bc7decomp.cpp */; };
 		707789EB2881BA81008A51BC /* utils.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789D22881BA81008A51BC /* utils.h */; };
-		707789EC2881BA81008A51BC /* utils.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789D22881BA81008A51BC /* utils.h */; };
 		707789ED2881BA81008A51BC /* bc7decomp_ref.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789D32881BA81008A51BC /* bc7decomp_ref.cpp */; };
-		707789EE2881BA81008A51BC /* bc7decomp_ref.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789D32881BA81008A51BC /* bc7decomp_ref.cpp */; };
 		707789F12881BCE2008A51BC /* rdo_bc_encoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789EF2881BCE2008A51BC /* rdo_bc_encoder.cpp */; };
-		707789F22881BCE2008A51BC /* rdo_bc_encoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789EF2881BCE2008A51BC /* rdo_bc_encoder.cpp */; };
 		707789F32881BCE2008A51BC /* rdo_bc_encoder.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789F02881BCE2008A51BC /* rdo_bc_encoder.h */; };
-		707789F42881BCE2008A51BC /* rdo_bc_encoder.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789F02881BCE2008A51BC /* rdo_bc_encoder.h */; };
 		70871DC927DDDBCD00D0B9E1 /* astcenc_vecmathlib_common_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DA727DDDBCC00D0B9E1 /* astcenc_vecmathlib_common_4.h */; };
-		70871DCA27DDDBCD00D0B9E1 /* astcenc_vecmathlib_common_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DA727DDDBCC00D0B9E1 /* astcenc_vecmathlib_common_4.h */; };
 		70871DCB27DDDBCD00D0B9E1 /* astcenc_image.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DA827DDDBCC00D0B9E1 /* astcenc_image.cpp */; };
-		70871DCC27DDDBCD00D0B9E1 /* astcenc_image.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DA827DDDBCC00D0B9E1 /* astcenc_image.cpp */; };
 		70871DCD27DDDBCD00D0B9E1 /* astcenc_find_best_partitioning.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DA927DDDBCC00D0B9E1 /* astcenc_find_best_partitioning.cpp */; };
-		70871DCE27DDDBCD00D0B9E1 /* astcenc_find_best_partitioning.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DA927DDDBCC00D0B9E1 /* astcenc_find_best_partitioning.cpp */; };
 		70871DCF27DDDBCD00D0B9E1 /* astcenc_symbolic_physical.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DAA27DDDBCC00D0B9E1 /* astcenc_symbolic_physical.cpp */; };
-		70871DD027DDDBCD00D0B9E1 /* astcenc_symbolic_physical.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DAA27DDDBCC00D0B9E1 /* astcenc_symbolic_physical.cpp */; };
 		70871DD127DDDBCD00D0B9E1 /* astcenc_averages_and_directions.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DAB27DDDBCC00D0B9E1 /* astcenc_averages_and_directions.cpp */; };
-		70871DD227DDDBCD00D0B9E1 /* astcenc_averages_and_directions.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DAB27DDDBCC00D0B9E1 /* astcenc_averages_and_directions.cpp */; };
 		70871DD327DDDBCD00D0B9E1 /* astcenc_partition_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DAC27DDDBCC00D0B9E1 /* astcenc_partition_tables.cpp */; };
-		70871DD427DDDBCD00D0B9E1 /* astcenc_partition_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DAC27DDDBCC00D0B9E1 /* astcenc_partition_tables.cpp */; };
 		70871DD527DDDBCD00D0B9E1 /* astcenc.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DAD27DDDBCC00D0B9E1 /* astcenc.h */; };
-		70871DD627DDDBCD00D0B9E1 /* astcenc.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DAD27DDDBCC00D0B9E1 /* astcenc.h */; };
 		70871DD727DDDBCD00D0B9E1 /* astcenc_quantization.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DAE27DDDBCC00D0B9E1 /* astcenc_quantization.cpp */; };
-		70871DD827DDDBCD00D0B9E1 /* astcenc_quantization.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DAE27DDDBCC00D0B9E1 /* astcenc_quantization.cpp */; };
 		70871DD927DDDBCD00D0B9E1 /* astcenc_compute_variance.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DAF27DDDBCC00D0B9E1 /* astcenc_compute_variance.cpp */; };
-		70871DDA27DDDBCD00D0B9E1 /* astcenc_compute_variance.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DAF27DDDBCC00D0B9E1 /* astcenc_compute_variance.cpp */; };
 		70871DDB27DDDBCD00D0B9E1 /* astcenc_percentile_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB027DDDBCC00D0B9E1 /* astcenc_percentile_tables.cpp */; };
-		70871DDC27DDDBCD00D0B9E1 /* astcenc_percentile_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB027DDDBCC00D0B9E1 /* astcenc_percentile_tables.cpp */; };
 		70871DDD27DDDBCD00D0B9E1 /* astcenc_vecmathlib_sse_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DB127DDDBCC00D0B9E1 /* astcenc_vecmathlib_sse_4.h */; };
-		70871DDE27DDDBCD00D0B9E1 /* astcenc_vecmathlib_sse_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DB127DDDBCC00D0B9E1 /* astcenc_vecmathlib_sse_4.h */; };
 		70871DDF27DDDBCD00D0B9E1 /* astcenc_mathlib_softfloat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB227DDDBCC00D0B9E1 /* astcenc_mathlib_softfloat.cpp */; };
-		70871DE027DDDBCD00D0B9E1 /* astcenc_mathlib_softfloat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB227DDDBCC00D0B9E1 /* astcenc_mathlib_softfloat.cpp */; };
-		70871DE227DDDBCD00D0B9E1 /* astcenc_mathlib.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB327DDDBCC00D0B9E1 /* astcenc_mathlib.cpp */; };
 		70871DE327DDDBCD00D0B9E1 /* astcenc_decompress_symbolic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB427DDDBCC00D0B9E1 /* astcenc_decompress_symbolic.cpp */; };
-		70871DE427DDDBCD00D0B9E1 /* astcenc_decompress_symbolic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB427DDDBCC00D0B9E1 /* astcenc_decompress_symbolic.cpp */; };
 		70871DE527DDDBCD00D0B9E1 /* astcenc_compress_symbolic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB527DDDBCC00D0B9E1 /* astcenc_compress_symbolic.cpp */; };
-		70871DE627DDDBCD00D0B9E1 /* astcenc_compress_symbolic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB527DDDBCC00D0B9E1 /* astcenc_compress_symbolic.cpp */; };
 		70871DE727DDDBCD00D0B9E1 /* astcenc_entry.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB627DDDBCC00D0B9E1 /* astcenc_entry.cpp */; };
-		70871DE827DDDBCD00D0B9E1 /* astcenc_entry.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB627DDDBCC00D0B9E1 /* astcenc_entry.cpp */; };
 		70871DE927DDDBCD00D0B9E1 /* astcenc_integer_sequence.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB727DDDBCC00D0B9E1 /* astcenc_integer_sequence.cpp */; };
-		70871DEA27DDDBCD00D0B9E1 /* astcenc_integer_sequence.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB727DDDBCC00D0B9E1 /* astcenc_integer_sequence.cpp */; };
 		70871DEB27DDDBCD00D0B9E1 /* astcenc_block_sizes.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB827DDDBCC00D0B9E1 /* astcenc_block_sizes.cpp */; };
-		70871DEC27DDDBCD00D0B9E1 /* astcenc_block_sizes.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB827DDDBCC00D0B9E1 /* astcenc_block_sizes.cpp */; };
 		70871DED27DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DB927DDDBCC00D0B9E1 /* astcenc_diagnostic_trace.h */; };
-		70871DEE27DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DB927DDDBCC00D0B9E1 /* astcenc_diagnostic_trace.h */; };
 		70871DEF27DDDBCD00D0B9E1 /* astcenc_weight_align.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DBA27DDDBCC00D0B9E1 /* astcenc_weight_align.cpp */; };
-		70871DF027DDDBCD00D0B9E1 /* astcenc_weight_align.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DBA27DDDBCC00D0B9E1 /* astcenc_weight_align.cpp */; };
 		70871DF127DDDBCD00D0B9E1 /* astcenc_mathlib.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DBB27DDDBCC00D0B9E1 /* astcenc_mathlib.h */; };
-		70871DF227DDDBCD00D0B9E1 /* astcenc_mathlib.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DBB27DDDBCC00D0B9E1 /* astcenc_mathlib.h */; };
 		70871DF327DDDBCD00D0B9E1 /* astcenc_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DBC27DDDBCC00D0B9E1 /* astcenc_internal.h */; };
-		70871DF427DDDBCD00D0B9E1 /* astcenc_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DBC27DDDBCC00D0B9E1 /* astcenc_internal.h */; };
 		70871DF527DDDBCD00D0B9E1 /* astcenc_color_quantize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DBD27DDDBCC00D0B9E1 /* astcenc_color_quantize.cpp */; };
-		70871DF627DDDBCD00D0B9E1 /* astcenc_color_quantize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DBD27DDDBCC00D0B9E1 /* astcenc_color_quantize.cpp */; };
 		70871DF727DDDBCD00D0B9E1 /* astcenc_vecmathlib_neon_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DBE27DDDBCC00D0B9E1 /* astcenc_vecmathlib_neon_4.h */; };
-		70871DF827DDDBCD00D0B9E1 /* astcenc_vecmathlib_neon_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DBE27DDDBCC00D0B9E1 /* astcenc_vecmathlib_neon_4.h */; };
 		70871DF927DDDBCD00D0B9E1 /* astcenc_vecmathlib_avx2_8.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DBF27DDDBCC00D0B9E1 /* astcenc_vecmathlib_avx2_8.h */; };
-		70871DFA27DDDBCD00D0B9E1 /* astcenc_vecmathlib_avx2_8.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DBF27DDDBCC00D0B9E1 /* astcenc_vecmathlib_avx2_8.h */; };
 		70871DFB27DDDBCD00D0B9E1 /* astcenc_vecmathlib_none_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DC027DDDBCC00D0B9E1 /* astcenc_vecmathlib_none_4.h */; };
-		70871DFC27DDDBCD00D0B9E1 /* astcenc_vecmathlib_none_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DC027DDDBCC00D0B9E1 /* astcenc_vecmathlib_none_4.h */; };
 		70871DFD27DDDBCD00D0B9E1 /* astcenc_vecmathlib.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DC127DDDBCC00D0B9E1 /* astcenc_vecmathlib.h */; };
-		70871DFE27DDDBCD00D0B9E1 /* astcenc_vecmathlib.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DC127DDDBCC00D0B9E1 /* astcenc_vecmathlib.h */; };
 		70871DFF27DDDBCD00D0B9E1 /* astcenc_pick_best_endpoint_format.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC227DDDBCC00D0B9E1 /* astcenc_pick_best_endpoint_format.cpp */; };
-		70871E0027DDDBCD00D0B9E1 /* astcenc_pick_best_endpoint_format.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC227DDDBCC00D0B9E1 /* astcenc_pick_best_endpoint_format.cpp */; };
 		70871E0127DDDBCD00D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC327DDDBCC00D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp */; };
-		70871E0227DDDBCD00D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC327DDDBCC00D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp */; };
 		70871E0327DDDBCD00D0B9E1 /* astcenc_color_unquantize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC427DDDBCC00D0B9E1 /* astcenc_color_unquantize.cpp */; };
-		70871E0427DDDBCD00D0B9E1 /* astcenc_color_unquantize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC427DDDBCC00D0B9E1 /* astcenc_color_unquantize.cpp */; };
 		70871E0527DDDBCD00D0B9E1 /* astcenc_platform_isa_detection.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC527DDDBCC00D0B9E1 /* astcenc_platform_isa_detection.cpp */; };
-		70871E0627DDDBCD00D0B9E1 /* astcenc_platform_isa_detection.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC527DDDBCC00D0B9E1 /* astcenc_platform_isa_detection.cpp */; };
 		70871E0727DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC627DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.cpp */; };
-		70871E0827DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC627DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.cpp */; };
 		70871E0927DDDBCD00D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC727DDDBCD00D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp */; };
-		70871E0A27DDDBCD00D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC727DDDBCD00D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp */; };
 		708A6A962708CE4700BA5410 /* bc6h_decode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 708A6A8B2708CE4700BA5410 /* bc6h_decode.cpp */; };
-		708A6A972708CE4700BA5410 /* bc6h_decode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 708A6A8B2708CE4700BA5410 /* bc6h_decode.cpp */; };
 		708A6A982708CE4700BA5410 /* bc6h_decode.h in Headers */ = {isa = PBXBuildFile; fileRef = 708A6A8C2708CE4700BA5410 /* bc6h_decode.h */; };
-		708A6A992708CE4700BA5410 /* bc6h_decode.h in Headers */ = {isa = PBXBuildFile; fileRef = 708A6A8C2708CE4700BA5410 /* bc6h_decode.h */; };
 		708A6A9A2708CE4700BA5410 /* bc6h_encode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 708A6A8D2708CE4700BA5410 /* bc6h_encode.cpp */; };
-		708A6A9B2708CE4700BA5410 /* bc6h_encode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 708A6A8D2708CE4700BA5410 /* bc6h_encode.cpp */; };
 		708A6A9C2708CE4700BA5410 /* bc6h_encode.h in Headers */ = {isa = PBXBuildFile; fileRef = 708A6A8E2708CE4700BA5410 /* bc6h_encode.h */; };
-		708A6A9D2708CE4700BA5410 /* bc6h_encode.h in Headers */ = {isa = PBXBuildFile; fileRef = 708A6A8E2708CE4700BA5410 /* bc6h_encode.h */; };
 		708A6AA02708CE4700BA5410 /* bc6h_definitions.h in Headers */ = {isa = PBXBuildFile; fileRef = 708A6A902708CE4700BA5410 /* bc6h_definitions.h */; };
-		708A6AA12708CE4700BA5410 /* bc6h_definitions.h in Headers */ = {isa = PBXBuildFile; fileRef = 708A6A902708CE4700BA5410 /* bc6h_definitions.h */; };
 		708A6AA42708CE4700BA5410 /* bc6h_utils.h in Headers */ = {isa = PBXBuildFile; fileRef = 708A6A922708CE4700BA5410 /* bc6h_utils.h */; };
-		708A6AA52708CE4700BA5410 /* bc6h_utils.h in Headers */ = {isa = PBXBuildFile; fileRef = 708A6A922708CE4700BA5410 /* bc6h_utils.h */; };
 		709B8D2D28D7BCAD0081BD1F /* ostream.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D1C28D7BCAD0081BD1F /* ostream.h */; };
-		709B8D2E28D7BCAD0081BD1F /* ostream.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D1C28D7BCAD0081BD1F /* ostream.h */; };
 		709B8D2F28D7BCAD0081BD1F /* format-inl.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D1D28D7BCAD0081BD1F /* format-inl.h */; };
-		709B8D3028D7BCAD0081BD1F /* format-inl.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D1D28D7BCAD0081BD1F /* format-inl.h */; };
 		709B8D3128D7BCAD0081BD1F /* ranges.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D1E28D7BCAD0081BD1F /* ranges.h */; };
-		709B8D3228D7BCAD0081BD1F /* ranges.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D1E28D7BCAD0081BD1F /* ranges.h */; };
 		709B8D3328D7BCAD0081BD1F /* xchar.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D1F28D7BCAD0081BD1F /* xchar.h */; };
-		709B8D3428D7BCAD0081BD1F /* xchar.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D1F28D7BCAD0081BD1F /* xchar.h */; };
 		709B8D3528D7BCAD0081BD1F /* core.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2028D7BCAD0081BD1F /* core.h */; };
-		709B8D3628D7BCAD0081BD1F /* core.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2028D7BCAD0081BD1F /* core.h */; };
 		709B8D3728D7BCAD0081BD1F /* os.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 709B8D2128D7BCAD0081BD1F /* os.cpp */; };
-		709B8D3828D7BCAD0081BD1F /* os.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 709B8D2128D7BCAD0081BD1F /* os.cpp */; };
 		709B8D3928D7BCAD0081BD1F /* format.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 709B8D2228D7BCAD0081BD1F /* format.cpp */; };
-		709B8D3A28D7BCAD0081BD1F /* format.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 709B8D2228D7BCAD0081BD1F /* format.cpp */; };
 		709B8D3D28D7BCAD0081BD1F /* chrono.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2428D7BCAD0081BD1F /* chrono.h */; };
-		709B8D3E28D7BCAD0081BD1F /* chrono.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2428D7BCAD0081BD1F /* chrono.h */; };
 		709B8D3F28D7BCAD0081BD1F /* os.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2528D7BCAD0081BD1F /* os.h */; };
-		709B8D4028D7BCAD0081BD1F /* os.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2528D7BCAD0081BD1F /* os.h */; };
 		709B8D4128D7BCAD0081BD1F /* color.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2628D7BCAD0081BD1F /* color.h */; };
-		709B8D4228D7BCAD0081BD1F /* color.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2628D7BCAD0081BD1F /* color.h */; };
 		709B8D4328D7BCAD0081BD1F /* args.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2728D7BCAD0081BD1F /* args.h */; };
-		709B8D4428D7BCAD0081BD1F /* args.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2728D7BCAD0081BD1F /* args.h */; };
 		709B8D4528D7BCAD0081BD1F /* printf.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2828D7BCAD0081BD1F /* printf.h */; };
-		709B8D4628D7BCAD0081BD1F /* printf.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2828D7BCAD0081BD1F /* printf.h */; };
 		709B8D4728D7BCAD0081BD1F /* compile.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2928D7BCAD0081BD1F /* compile.h */; };
-		709B8D4828D7BCAD0081BD1F /* compile.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2928D7BCAD0081BD1F /* compile.h */; };
 		709B8D4928D7BCAD0081BD1F /* format.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2B28D7BCAD0081BD1F /* format.h */; };
-		709B8D4A28D7BCAD0081BD1F /* format.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2B28D7BCAD0081BD1F /* format.h */; };
 		709B8D4B28D7BCAD0081BD1F /* std.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2C28D7BCAD0081BD1F /* std.h */; };
-		709B8D4C28D7BCAD0081BD1F /* std.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2C28D7BCAD0081BD1F /* std.h */; };
 		709B8D4F28D7C15F0081BD1F /* KramFmt.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D4D28D7C15F0081BD1F /* KramFmt.h */; };
-		709B8D5028D7C15F0081BD1F /* KramFmt.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D4D28D7C15F0081BD1F /* KramFmt.h */; };
 		70A7BD3027092A1200DBCCF7 /* hdr_encode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70A7BD2E27092A1200DBCCF7 /* hdr_encode.cpp */; };
-		70A7BD3127092A1200DBCCF7 /* hdr_encode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70A7BD2E27092A1200DBCCF7 /* hdr_encode.cpp */; };
 		70A7BD3227092A1200DBCCF7 /* hdr_encode.h in Headers */ = {isa = PBXBuildFile; fileRef = 70A7BD2F27092A1200DBCCF7 /* hdr_encode.h */; };
-		70A7BD3327092A1200DBCCF7 /* hdr_encode.h in Headers */ = {isa = PBXBuildFile; fileRef = 70A7BD2F27092A1200DBCCF7 /* hdr_encode.h */; };
 		70B563A72C857B360089A64F /* KramZipStream.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B563A52C857B360089A64F /* KramZipStream.cpp */; };
-		70B563A82C857B360089A64F /* KramZipStream.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B563A52C857B360089A64F /* KramZipStream.cpp */; };
 		70B563A92C857B360089A64F /* KramZipStream.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B563A62C857B360089A64F /* KramZipStream.h */; };
-		70B563AA2C857B360089A64F /* KramZipStream.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B563A62C857B360089A64F /* KramZipStream.h */; };
 		70CDB65027A1382700A546C1 /* KramDDSHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 70CDB64E27A1382600A546C1 /* KramDDSHelper.h */; };
-		70CDB65127A1382700A546C1 /* KramDDSHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 70CDB64E27A1382600A546C1 /* KramDDSHelper.h */; };
 		70CDB65227A1382700A546C1 /* KramDDSHelper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70CDB64F27A1382600A546C1 /* KramDDSHelper.cpp */; };
-		70CDB65327A1382700A546C1 /* KramDDSHelper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70CDB64F27A1382600A546C1 /* KramDDSHelper.cpp */; };
 		70D222D82AC800AC00B9EA23 /* json11.h in Headers */ = {isa = PBXBuildFile; fileRef = 70D222D62AC800AC00B9EA23 /* json11.h */; };
-		70D222D92AC800AC00B9EA23 /* json11.h in Headers */ = {isa = PBXBuildFile; fileRef = 70D222D62AC800AC00B9EA23 /* json11.h */; };
 		70D222DA2AC800AC00B9EA23 /* json11.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70D222D72AC800AC00B9EA23 /* json11.cpp */; };
-		70D222DB2AC800AC00B9EA23 /* json11.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70D222D72AC800AC00B9EA23 /* json11.cpp */; };
 		70D222DE2AD2132300B9EA23 /* ImmutableString.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70D222DC2AD2132300B9EA23 /* ImmutableString.cpp */; };
-		70D222DF2AD2132300B9EA23 /* ImmutableString.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70D222DC2AD2132300B9EA23 /* ImmutableString.cpp */; };
 		70D222E02AD2132300B9EA23 /* ImmutableString.h in Headers */ = {isa = PBXBuildFile; fileRef = 70D222DD2AD2132300B9EA23 /* ImmutableString.h */; };
-		70D222E12AD2132300B9EA23 /* ImmutableString.h in Headers */ = {isa = PBXBuildFile; fileRef = 70D222DD2AD2132300B9EA23 /* ImmutableString.h */; };
 		70D222E42AD22BED00B9EA23 /* BlockedLinearAllocator.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70D222E22AD22BED00B9EA23 /* BlockedLinearAllocator.cpp */; };
-		70D222E52AD22BED00B9EA23 /* BlockedLinearAllocator.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70D222E22AD22BED00B9EA23 /* BlockedLinearAllocator.cpp */; };
 		70D222E62AD22BED00B9EA23 /* BlockedLinearAllocator.h in Headers */ = {isa = PBXBuildFile; fileRef = 70D222E32AD22BED00B9EA23 /* BlockedLinearAllocator.h */; };
-		70D222E72AD22BED00B9EA23 /* BlockedLinearAllocator.h in Headers */ = {isa = PBXBuildFile; fileRef = 70D222E32AD22BED00B9EA23 /* BlockedLinearAllocator.h */; };
 		70D222EB2ADAF25E00B9EA23 /* simdjson.h in Headers */ = {isa = PBXBuildFile; fileRef = 70D222E92ADAF25E00B9EA23 /* simdjson.h */; };
-		70D222EC2ADAF25E00B9EA23 /* simdjson.h in Headers */ = {isa = PBXBuildFile; fileRef = 70D222E92ADAF25E00B9EA23 /* simdjson.h */; };
 		70D222ED2ADAF25E00B9EA23 /* simdjson.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70D222EA2ADAF25E00B9EA23 /* simdjson.cpp */; };
-		70D222EE2ADAF25E00B9EA23 /* simdjson.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70D222EA2ADAF25E00B9EA23 /* simdjson.cpp */; };
 		70D222F52ADAF78300B9EA23 /* dlmalloc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70D222F42ADAF78300B9EA23 /* dlmalloc.cpp */; };
-		70D222F62ADAF78300B9EA23 /* dlmalloc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70D222F42ADAF78300B9EA23 /* dlmalloc.cpp */; };
 		70D222F82ADAFA1500B9EA23 /* dlmalloc.h in Headers */ = {isa = PBXBuildFile; fileRef = 70D222F72ADAFA1500B9EA23 /* dlmalloc.h */; };
-		70D222F92ADAFA1500B9EA23 /* dlmalloc.h in Headers */ = {isa = PBXBuildFile; fileRef = 70D222F72ADAFA1500B9EA23 /* dlmalloc.h */; };
-		70E3BB242CBCE88300F11926 /* EtcErrorMetric.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDAB26D1583E001C950E /* EtcErrorMetric.h */; };
-		70E3BB252CBCE88300F11926 /* EtcColor.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDAD26D1583E001C950E /* EtcColor.h */; };
-		70E3BB262CBCE88300F11926 /* chrono.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2428D7BCAD0081BD1F /* chrono.h */; };
-		70E3BB272CBCE88300F11926 /* EtcDifferentialTrys.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDAE26D1583E001C950E /* EtcDifferentialTrys.h */; };
-		70E3BB282CBCE88300F11926 /* EtcBlock4x4Encoding_RGB8.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDB026D1583E001C950E /* EtcBlock4x4Encoding_RGB8.h */; };
-		70E3BB292CBCE88300F11926 /* EtcConfig.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDB426D1583E001C950E /* EtcConfig.h */; };
-		70E3BB2A2CBCE88300F11926 /* astcenc_vecmathlib_common_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DA727DDDBCC00D0B9E1 /* astcenc_vecmathlib_common_4.h */; };
-		70E3BB2B2CBCE88300F11926 /* EtcBlock4x4Encoding_R11.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDB526D1583E001C950E /* EtcBlock4x4Encoding_R11.h */; };
-		70E3BB2C2CBCE88300F11926 /* EtcBlock4x4Encoding_RG11.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDB726D1583E001C950E /* EtcBlock4x4Encoding_RG11.h */; };
-		70E3BB2D2CBCE88300F11926 /* EtcMath.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDB926D1583E001C950E /* EtcMath.h */; };
-		70E3BB2E2CBCE88300F11926 /* EtcIndividualTrys.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDBA26D1583E001C950E /* EtcIndividualTrys.h */; };
-		70E3BB2F2CBCE88300F11926 /* EtcBlock4x4EncodingBits.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDBD26D1583E001C950E /* EtcBlock4x4EncodingBits.h */; };
-		70E3BB302CBCE88300F11926 /* EtcBlock4x4Encoding_RGB8A1.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDBE26D1583E001C950E /* EtcBlock4x4Encoding_RGB8A1.h */; };
-		70E3BB312CBCE88300F11926 /* EtcBlock4x4.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC026D1583E001C950E /* EtcBlock4x4.h */; };
-		70E3BB322CBCE88300F11926 /* rgbcx.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789CF2881BA81008A51BC /* rgbcx.h */; };
-		70E3BB332CBCE88300F11926 /* Etc.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC126D1583E001C950E /* Etc.h */; };
-		70E3BB342CBCE88300F11926 /* bc7enc.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789C72881BA81008A51BC /* bc7enc.h */; };
-		70E3BB352CBCE88300F11926 /* EtcImage.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC226D1583E001C950E /* EtcImage.h */; };
-		70E3BB362CBCE88300F11926 /* std.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2C28D7BCAD0081BD1F /* std.h */; };
-		70E3BB372CBCE88300F11926 /* KramDDSHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 70CDB64E27A1382600A546C1 /* KramDDSHelper.h */; };
-		70E3BB382CBCE88300F11926 /* args.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2728D7BCAD0081BD1F /* args.h */; };
-		70E3BB392CBCE88300F11926 /* bc6h_encode.h in Headers */ = {isa = PBXBuildFile; fileRef = 708A6A8E2708CE4700BA5410 /* bc6h_encode.h */; };
-		70E3BB3A2CBCE88300F11926 /* EtcBlock4x4Encoding_ETC1.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC326D1583E001C950E /* EtcBlock4x4Encoding_ETC1.h */; };
-		70E3BB3B2CBCE88300F11926 /* EtcBlock4x4Encoding_RGBA8.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC426D1583E001C950E /* EtcBlock4x4Encoding_RGBA8.h */; };
-		70E3BB3C2CBCE88300F11926 /* EtcColorFloatRGBA.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC726D1583E001C950E /* EtcColorFloatRGBA.h */; };
-		70E3BB3D2CBCE88300F11926 /* EtcBlock4x4Encoding.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC826D1583E001C950E /* EtcBlock4x4Encoding.h */; };
-		70E3BB3E2CBCE88300F11926 /* ateencoder.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFA26D1583E001C950E /* ateencoder.h */; };
-		70E3BB3F2CBCE88300F11926 /* basisu_transcoder.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFC26D1583E001C950E /* basisu_transcoder.h */; };
-		70E3BB402CBCE88300F11926 /* hdr_encode.h in Headers */ = {isa = PBXBuildFile; fileRef = 70A7BD2F27092A1200DBCCF7 /* hdr_encode.h */; };
-		70E3BB412CBCE88300F11926 /* compile.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2928D7BCAD0081BD1F /* compile.h */; };
-		70E3BB422CBCE88300F11926 /* bc6h_definitions.h in Headers */ = {isa = PBXBuildFile; fileRef = 708A6A902708CE4700BA5410 /* bc6h_definitions.h */; };
-		70E3BB432CBCE88300F11926 /* basisu_containers.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFD26D1583E001C950E /* basisu_containers.h */; };
-		70E3BB442CBCE88300F11926 /* astcenc.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DAD27DDDBCC00D0B9E1 /* astcenc.h */; };
-		70E3BB452CBCE88300F11926 /* printf.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2828D7BCAD0081BD1F /* printf.h */; };
-		70E3BB462CBCE88300F11926 /* basisu_containers_impl.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFF26D1583E001C950E /* basisu_containers_impl.h */; };
-		70E3BB472CBCE88300F11926 /* utils.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789D22881BA81008A51BC /* utils.h */; };
-		70E3BB482CBCE88300F11926 /* basisu_transcoder_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE0226D1583F001C950E /* basisu_transcoder_internal.h */; };
-		70E3BB492CBCE88300F11926 /* astcenc_vecmathlib_avx2_8.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DBF27DDDBCC00D0B9E1 /* astcenc_vecmathlib_avx2_8.h */; };
-		70E3BB4A2CBCE88300F11926 /* astcenc_vecmathlib_none_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DC027DDDBCC00D0B9E1 /* astcenc_vecmathlib_none_4.h */; };
-		70E3BB4B2CBCE88300F11926 /* basisu_global_selector_cb.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE0326D1583F001C950E /* basisu_global_selector_cb.h */; };
-		70E3BB4C2CBCE88300F11926 /* basisu_transcoder_uastc.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE0526D1583F001C950E /* basisu_transcoder_uastc.h */; };
-		70E3BB4D2CBCE88300F11926 /* basisu_global_selector_palette.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE0626D1583F001C950E /* basisu_global_selector_palette.h */; };
-		70E3BB4E2CBCE88300F11926 /* rgbcx_table4_small.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789CD2881BA81008A51BC /* rgbcx_table4_small.h */; };
-		70E3BB4F2CBCE88300F11926 /* basisu.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE0C26D1583F001C950E /* basisu.h */; };
-		70E3BB502CBCE88300F11926 /* basisu_file_headers.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE0E26D1583F001C950E /* basisu_file_headers.h */; };
-		70E3BB512CBCE88300F11926 /* miniz.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE1226D1583F001C950E /* miniz.h */; };
-		70E3BB522CBCE88300F11926 /* hedistance.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE1526D1583F001C950E /* hedistance.h */; };
-		70E3BB532CBCE88300F11926 /* stb_rect_pack.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE1726D1583F001C950E /* stb_rect_pack.h */; };
-		70E3BB542CBCE88300F11926 /* KramZipHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE1926D1583F001C950E /* KramZipHelper.h */; };
-		70E3BB552CBCE88300F11926 /* KramSDFMipper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2026D1583F001C950E /* KramSDFMipper.h */; };
-		70E3BB562CBCE88300F11926 /* BlockedLinearAllocator.h in Headers */ = {isa = PBXBuildFile; fileRef = 70D222E32AD22BED00B9EA23 /* BlockedLinearAllocator.h */; };
-		70E3BB572CBCE88300F11926 /* KramZipStream.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B563A62C857B360089A64F /* KramZipStream.h */; };
-		70E3BB582CBCE88300F11926 /* astcenc_mathlib.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DBB27DDDBCC00D0B9E1 /* astcenc_mathlib.h */; };
-		70E3BB592CBCE88300F11926 /* ranges.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D1E28D7BCAD0081BD1F /* ranges.h */; };
-		70E3BB5A2CBCE88300F11926 /* KramConfig.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2326D1583F001C950E /* KramConfig.h */; };
-		70E3BB5B2CBCE88300F11926 /* KramLog.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2426D1583F001C950E /* KramLog.h */; };
-		70E3BB5C2CBCE88300F11926 /* KramLib.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2726D1583F001C950E /* KramLib.h */; };
-		70E3BB5D2CBCE88300F11926 /* KramVersion.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2926D1583F001C950E /* KramVersion.h */; };
-		70E3BB5E2CBCE88300F11926 /* KramImage.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2A26D1583F001C950E /* KramImage.h */; };
-		70E3BB5F2CBCE88300F11926 /* win_mmap.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2D26D1583F001C950E /* win_mmap.h */; };
-		70E3BB602CBCE88300F11926 /* astcenc_vecmathlib_sse_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DB127DDDBCC00D0B9E1 /* astcenc_vecmathlib_sse_4.h */; };
-		70E3BB612CBCE88300F11926 /* KramFmt.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D4D28D7C15F0081BD1F /* KramFmt.h */; };
-		70E3BB622CBCE88300F11926 /* bc7decomp.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789C82881BA81008A51BC /* bc7decomp.h */; };
-		70E3BB632CBCE88300F11926 /* Kram.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2E26D1583F001C950E /* Kram.h */; };
-		70E3BB642CBCE88300F11926 /* ImmutableString.h in Headers */ = {isa = PBXBuildFile; fileRef = 70D222DD2AD2132300B9EA23 /* ImmutableString.h */; };
-		70E3BB652CBCE88300F11926 /* simdjson.h in Headers */ = {isa = PBXBuildFile; fileRef = 70D222E92ADAF25E00B9EA23 /* simdjson.h */; };
-		70E3BB662CBCE88300F11926 /* astcenc_diagnostic_trace.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DB927DDDBCC00D0B9E1 /* astcenc_diagnostic_trace.h */; };
-		70E3BB672CBCE88300F11926 /* ert.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789C92881BA81008A51BC /* ert.h */; };
-		70E3BB682CBCE88300F11926 /* KTXImage.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3026D1583F001C950E /* KTXImage.h */; };
-		70E3BB692CBCE88300F11926 /* KramImageInfo.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3126D1583F001C950E /* KramImageInfo.h */; };
-		70E3BB6A2CBCE88300F11926 /* rgbcx_table4.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789CB2881BA81008A51BC /* rgbcx_table4.h */; };
-		70E3BB6B2CBCE88300F11926 /* astcenc_vecmathlib_neon_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DBE27DDDBCC00D0B9E1 /* astcenc_vecmathlib_neon_4.h */; };
-		70E3BB6C2CBCE88300F11926 /* KramTimer.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3226D1583F001C950E /* KramTimer.h */; };
-		70E3BB6D2CBCE88300F11926 /* KramMmapHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3326D1583F001C950E /* KramMmapHelper.h */; };
-		70E3BB6E2CBCE88300F11926 /* KramFileHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3626D1583F001C950E /* KramFileHelper.h */; };
-		70E3BB6F2CBCE88300F11926 /* os.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2528D7BCAD0081BD1F /* os.h */; };
-		70E3BB702CBCE88300F11926 /* KramMipper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3726D1583F001C950E /* KramMipper.h */; };
-		70E3BB712CBCE88300F11926 /* TaskSystem.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3826D1583F001C950E /* TaskSystem.h */; };
-		70E3BB722CBCE88300F11926 /* squish.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3A26D1583F001C950E /* squish.h */; };
-		70E3BB732CBCE88300F11926 /* clusterfit.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3B26D1583F001C950E /* clusterfit.h */; };
-		70E3BB742CBCE88300F11926 /* core.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2028D7BCAD0081BD1F /* core.h */; };
-		70E3BB752CBCE88300F11926 /* colourfit.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3C26D1583F001C950E /* colourfit.h */; };
-		70E3BB762CBCE88300F11926 /* astcenc_vecmathlib.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DC127DDDBCC00D0B9E1 /* astcenc_vecmathlib.h */; };
-		70E3BB772CBCE88300F11926 /* alpha.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3F26D1583F001C950E /* alpha.h */; };
-		70E3BB782CBCE88300F11926 /* color.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2628D7BCAD0081BD1F /* color.h */; };
-		70E3BB792CBCE88300F11926 /* bc6h_decode.h in Headers */ = {isa = PBXBuildFile; fileRef = 708A6A8C2708CE4700BA5410 /* bc6h_decode.h */; };
-		70E3BB7A2CBCE88300F11926 /* singlecolourfit.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE4126D1583F001C950E /* singlecolourfit.h */; };
-		70E3BB7B2CBCE88300F11926 /* maths.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE4526D1583F001C950E /* maths.h */; };
-		70E3BB7C2CBCE88300F11926 /* rdo_bc_encoder.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789F02881BCE2008A51BC /* rdo_bc_encoder.h */; };
-		70E3BB7D2CBCE88300F11926 /* json11.h in Headers */ = {isa = PBXBuildFile; fileRef = 70D222D62AC800AC00B9EA23 /* json11.h */; };
-		70E3BB7E2CBCE88300F11926 /* colourset.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE4826D1583F001C950E /* colourset.h */; };
-		70E3BB7F2CBCE88300F11926 /* bc6h_utils.h in Headers */ = {isa = PBXBuildFile; fileRef = 708A6A922708CE4700BA5410 /* bc6h_utils.h */; };
-		70E3BB802CBCE88300F11926 /* colourblock.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE4A26D1583F001C950E /* colourblock.h */; };
-		70E3BB812CBCE88300F11926 /* rangefit.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE4B26D1583F001C950E /* rangefit.h */; };
-		70E3BB822CBCE88300F11926 /* zstd.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE5226D1583F001C950E /* zstd.h */; };
-		70E3BB832CBCE88300F11926 /* astcenc_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DBC27DDDBCC00D0B9E1 /* astcenc_internal.h */; };
-		70E3BB842CBCE88300F11926 /* format-inl.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D1D28D7BCAD0081BD1F /* format-inl.h */; };
-		70E3BB852CBCE88300F11926 /* ostream.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D1C28D7BCAD0081BD1F /* ostream.h */; };
-		70E3BB862CBCE88300F11926 /* lodepng.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE5426D1583F001C950E /* lodepng.h */; };
-		70E3BB872CBCE88300F11926 /* format.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2B28D7BCAD0081BD1F /* format.h */; };
-		70E3BB882CBCE88300F11926 /* dlmalloc.h in Headers */ = {isa = PBXBuildFile; fileRef = 70D222F72ADAFA1500B9EA23 /* dlmalloc.h */; };
-		70E3BB892CBCE88300F11926 /* tmpfileplus.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE5926D1583F001C950E /* tmpfileplus.h */; };
-		70E3BB8A2CBCE88300F11926 /* xchar.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D1F28D7BCAD0081BD1F /* xchar.h */; };
-		70E3BB8C2CBCE88300F11926 /* astcenc_quantization.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DAE27DDDBCC00D0B9E1 /* astcenc_quantization.cpp */; };
-		70E3BB8D2CBCE88300F11926 /* ert.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789CE2881BA81008A51BC /* ert.cpp */; };
-		70E3BB8E2CBCE88300F11926 /* astcenc_color_unquantize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC427DDDBCC00D0B9E1 /* astcenc_color_unquantize.cpp */; };
-		70E3BB8F2CBCE88300F11926 /* astcenc_averages_and_directions.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DAB27DDDBCC00D0B9E1 /* astcenc_averages_and_directions.cpp */; };
-		70E3BB902CBCE88300F11926 /* astcenc_mathlib_softfloat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB227DDDBCC00D0B9E1 /* astcenc_mathlib_softfloat.cpp */; };
-		70E3BB912CBCE88300F11926 /* os.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 709B8D2128D7BCAD0081BD1F /* os.cpp */; };
-		70E3BB922CBCE88300F11926 /* simdjson.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70D222EA2ADAF25E00B9EA23 /* simdjson.cpp */; };
-		70E3BB932CBCE88300F11926 /* ateencoder.mm in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDF926D1583E001C950E /* ateencoder.mm */; };
-		70E3BB942CBCE88300F11926 /* bc7decomp_ref.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789D32881BA81008A51BC /* bc7decomp_ref.cpp */; };
-		70E3BB952CBCE88300F11926 /* EtcBlock4x4Encoding_RGB8.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDAA26D1583E001C950E /* EtcBlock4x4Encoding_RGB8.cpp */; };
-		70E3BB962CBCE88300F11926 /* json11.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70D222D72AC800AC00B9EA23 /* json11.cpp */; };
-		70E3BB972CBCE88300F11926 /* astcenc_find_best_partitioning.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DA927DDDBCC00D0B9E1 /* astcenc_find_best_partitioning.cpp */; };
-		70E3BB982CBCE88300F11926 /* KramDDSHelper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70CDB64F27A1382600A546C1 /* KramDDSHelper.cpp */; };
-		70E3BB992CBCE88300F11926 /* EtcImage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDAC26D1583E001C950E /* EtcImage.cpp */; };
-		70E3BB9A2CBCE88300F11926 /* astcenc_block_sizes.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB827DDDBCC00D0B9E1 /* astcenc_block_sizes.cpp */; };
-		70E3BB9B2CBCE88300F11926 /* EtcDifferentialTrys.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDAF26D1583E001C950E /* EtcDifferentialTrys.cpp */; };
-		70E3BB9C2CBCE88300F11926 /* EtcMath.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDB126D1583E001C950E /* EtcMath.cpp */; };
-		70E3BB9D2CBCE88300F11926 /* EtcBlock4x4Encoding_RGBA8.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDB226D1583E001C950E /* EtcBlock4x4Encoding_RGBA8.cpp */; };
-		70E3BB9E2CBCE88300F11926 /* EtcBlock4x4Encoding_RG11.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDB326D1583E001C950E /* EtcBlock4x4Encoding_RG11.cpp */; };
-		70E3BB9F2CBCE88300F11926 /* EtcBlock4x4Encoding_RGB8A1.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDB626D1583E001C950E /* EtcBlock4x4Encoding_RGB8A1.cpp */; };
-		70E3BBA02CBCE88300F11926 /* EtcIndividualTrys.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDBB26D1583E001C950E /* EtcIndividualTrys.cpp */; };
-		70E3BBA12CBCE88300F11926 /* rgbcx.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789CA2881BA81008A51BC /* rgbcx.cpp */; };
-		70E3BBA22CBCE88300F11926 /* EtcBlock4x4Encoding_R11.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDBC26D1583E001C950E /* EtcBlock4x4Encoding_R11.cpp */; };
-		70E3BBA32CBCE88300F11926 /* rdo_bc_encoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789EF2881BCE2008A51BC /* rdo_bc_encoder.cpp */; };
-		70E3BBA42CBCE88300F11926 /* astcenc_color_quantize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DBD27DDDBCC00D0B9E1 /* astcenc_color_quantize.cpp */; };
-		70E3BBA52CBCE88300F11926 /* EtcBlock4x4Encoding_ETC1.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDBF26D1583E001C950E /* EtcBlock4x4Encoding_ETC1.cpp */; };
-		70E3BBA62CBCE88300F11926 /* EtcBlock4x4Encoding.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDC526D1583E001C950E /* EtcBlock4x4Encoding.cpp */; };
-		70E3BBA72CBCE88300F11926 /* EtcBlock4x4.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDC626D1583E001C950E /* EtcBlock4x4.cpp */; };
-		70E3BBA82CBCE88300F11926 /* astcenc_percentile_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB027DDDBCC00D0B9E1 /* astcenc_percentile_tables.cpp */; };
-		70E3BBA92CBCE88300F11926 /* astcenc_mathlib.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB327DDDBCC00D0B9E1 /* astcenc_mathlib.cpp */; };
-		70E3BBAA2CBCE88300F11926 /* bc6h_encode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 708A6A8D2708CE4700BA5410 /* bc6h_encode.cpp */; };
-		70E3BBAB2CBCE88300F11926 /* hdr_encode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70A7BD2E27092A1200DBCCF7 /* hdr_encode.cpp */; };
-		70E3BBAC2CBCE88300F11926 /* string.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5926D3473F001C950E /* string.cpp */; };
-		70E3BBAD2CBCE88300F11926 /* bc6h_decode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 708A6A8B2708CE4700BA5410 /* bc6h_decode.cpp */; };
-		70E3BBAE2CBCE88300F11926 /* assert.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5826D3473F001C950E /* assert.cpp */; };
-		70E3BBAF2CBCE88300F11926 /* fixed_pool.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD6026D3473F001C950E /* fixed_pool.cpp */; };
-		70E3BBB02CBCE88300F11926 /* basisu_transcoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE0426D1583F001C950E /* basisu_transcoder.cpp */; };
-		70E3BBB12CBCE88300F11926 /* red_black_tree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5F26D3473F001C950E /* red_black_tree.cpp */; };
-		70E3BBB22CBCE88300F11926 /* astcenc_decompress_symbolic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB427DDDBCC00D0B9E1 /* astcenc_decompress_symbolic.cpp */; };
-		70E3BBB32CBCE88300F11926 /* astcenc_diagnostic_trace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC627DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.cpp */; };
-		70E3BBB42CBCE88300F11926 /* astcenc_platform_isa_detection.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC527DDDBCC00D0B9E1 /* astcenc_platform_isa_detection.cpp */; };
-		70E3BBB52CBCE88300F11926 /* bc7enc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789C62881BA81008A51BC /* bc7enc.cpp */; };
-		70E3BBB62CBCE88300F11926 /* BlockedLinearAllocator.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70D222E22AD22BED00B9EA23 /* BlockedLinearAllocator.cpp */; };
-		70E3BBB72CBCE88300F11926 /* intrusive_list.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5D26D3473F001C950E /* intrusive_list.cpp */; };
-		70E3BBB82CBCE88300F11926 /* bc7decomp.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789D02881BA81008A51BC /* bc7decomp.cpp */; };
-		70E3BBB92CBCE88300F11926 /* miniz.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1126D1583F001C950E /* miniz.cpp */; };
-		70E3BBBA2CBCE88300F11926 /* astcenc_compress_symbolic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB527DDDBCC00D0B9E1 /* astcenc_compress_symbolic.cpp */; };
-		70E3BBBB2CBCE88300F11926 /* hedistance.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1426D1583F001C950E /* hedistance.cpp */; };
-		70E3BBBC2CBCE88300F11926 /* KramTimer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1A26D1583F001C950E /* KramTimer.cpp */; };
-		70E3BBBD2CBCE88300F11926 /* astcenc_entry.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB627DDDBCC00D0B9E1 /* astcenc_entry.cpp */; };
-		70E3BBBE2CBCE88300F11926 /* KTXImage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1B26D1583F001C950E /* KTXImage.cpp */; };
-		70E3BBBF2CBCE88300F11926 /* KramMipper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1C26D1583F001C950E /* KramMipper.cpp */; };
-		70E3BBC02CBCE88300F11926 /* KramZipHelper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1E26D1583F001C950E /* KramZipHelper.cpp */; };
-		70E3BBC12CBCE88300F11926 /* TaskSystem.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1F26D1583F001C950E /* TaskSystem.cpp */; };
-		70E3BBC22CBCE88300F11926 /* KramFileHelper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2126D1583F001C950E /* KramFileHelper.cpp */; };
-		70E3BBC32CBCE88300F11926 /* numeric_limits.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5B26D3473F001C950E /* numeric_limits.cpp */; };
-		70E3BBC42CBCE88300F11926 /* KramImageInfo.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2526D1583F001C950E /* KramImageInfo.cpp */; };
-		70E3BBC52CBCE88300F11926 /* astcenc_integer_sequence.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB727DDDBCC00D0B9E1 /* astcenc_integer_sequence.cpp */; };
-		70E3BBC62CBCE88300F11926 /* KramImage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2626D1583F001C950E /* KramImage.cpp */; };
-		70E3BBC72CBCE88300F11926 /* KramLog.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2826D1583F001C950E /* KramLog.cpp */; };
-		70E3BBC82CBCE88300F11926 /* KramSDFMipper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2B26D1583F001C950E /* KramSDFMipper.cpp */; };
-		70E3BBC92CBCE88300F11926 /* KramMmapHelper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2C26D1583F001C950E /* KramMmapHelper.cpp */; };
-		70E3BBCA2CBCE88300F11926 /* ImmutableString.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70D222DC2AD2132300B9EA23 /* ImmutableString.cpp */; };
-		70E3BBCB2CBCE88300F11926 /* astcenc_image.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DA827DDDBCC00D0B9E1 /* astcenc_image.cpp */; };
-		70E3BBCC2CBCE88300F11926 /* thread_support.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5726D3473F001C950E /* thread_support.cpp */; };
-		70E3BBCD2CBCE88300F11926 /* Kram.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE3526D1583F001C950E /* Kram.cpp */; };
-		70E3BBCE2CBCE88300F11926 /* squish.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE3D26D1583F001C950E /* squish.cpp */; };
-		70E3BBCF2CBCE88300F11926 /* colourset.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE3E26D1583F001C950E /* colourset.cpp */; };
-		70E3BBD02CBCE88300F11926 /* astcenc_partition_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DAC27DDDBCC00D0B9E1 /* astcenc_partition_tables.cpp */; };
-		70E3BBD12CBCE88300F11926 /* hashtable.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5E26D3473F001C950E /* hashtable.cpp */; };
-		70E3BBD22CBCE88300F11926 /* astcenc_weight_align.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DBA27DDDBCC00D0B9E1 /* astcenc_weight_align.cpp */; };
-		70E3BBD32CBCE88300F11926 /* astcenc_compute_variance.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DAF27DDDBCC00D0B9E1 /* astcenc_compute_variance.cpp */; };
-		70E3BBD42CBCE88300F11926 /* clusterfit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE4226D1583F001C950E /* clusterfit.cpp */; };
-		70E3BBD52CBCE88300F11926 /* rangefit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE4426D1583F001C950E /* rangefit.cpp */; };
-		70E3BBD62CBCE88300F11926 /* alpha.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE4626D1583F001C950E /* alpha.cpp */; };
-		70E3BBD72CBCE88300F11926 /* colourblock.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE4726D1583F001C950E /* colourblock.cpp */; };
-		70E3BBD82CBCE88300F11926 /* colourfit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE4926D1583F001C950E /* colourfit.cpp */; };
-		70E3BBD92CBCE88300F11926 /* astcenc_pick_best_endpoint_format.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC227DDDBCC00D0B9E1 /* astcenc_pick_best_endpoint_format.cpp */; };
-		70E3BBDA2CBCE88300F11926 /* astcenc_ideal_endpoints_and_weights.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC727DDDBCD00D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp */; };
-		70E3BBDB2CBCE88300F11926 /* astcenc_symbolic_physical.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DAA27DDDBCC00D0B9E1 /* astcenc_symbolic_physical.cpp */; };
-		70E3BBDC2CBCE88300F11926 /* KramZipStream.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B563A52C857B360089A64F /* KramZipStream.cpp */; };
-		70E3BBDD2CBCE88300F11926 /* allocator_eastl.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5A26D3473F001C950E /* allocator_eastl.cpp */; };
-		70E3BBDE2CBCE88300F11926 /* maths.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE4D26D1583F001C950E /* maths.cpp */; };
-		70E3BBDF2CBCE88300F11926 /* singlecolourfit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE4E26D1583F001C950E /* singlecolourfit.cpp */; };
-		70E3BBE02CBCE88300F11926 /* zstd.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE5026D1583F001C950E /* zstd.cpp */; };
-		70E3BBE12CBCE88300F11926 /* dlmalloc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70D222F42ADAF78300B9EA23 /* dlmalloc.cpp */; };
-		70E3BBE22CBCE88300F11926 /* zstddeclib.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE5126D1583F001C950E /* zstddeclib.cpp */; };
-		70E3BBE32CBCE88300F11926 /* lodepng.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE5626D1583F001C950E /* lodepng.cpp */; };
-		70E3BBE42CBCE88300F11926 /* utils.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789CC2881BA81008A51BC /* utils.cpp */; };
-		70E3BBE52CBCE88300F11926 /* tmpfileplus.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE5826D1583F001C950E /* tmpfileplus.cpp */; };
-		70E3BBE62CBCE88300F11926 /* format.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 709B8D2228D7BCAD0081BD1F /* format.cpp */; };
-		70E3BBE72CBCE88300F11926 /* astcenc_weight_quant_xfer_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC327DDDBCC00D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp */; };
-		70E3BBE92CBCE88300F11926 /* libate.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF12A26D159F9001C950E /* libate.tbd */; };
 /* End PBXBuildFile section */
 
 /* Begin PBXFileReference section */
@@ -711,7 +319,6 @@
 		706EEE5826D1583F001C950E /* tmpfileplus.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = tmpfileplus.cpp; sourceTree = "<group>"; };
 		706EEE5926D1583F001C950E /* tmpfileplus.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = tmpfileplus.h; sourceTree = "<group>"; };
 		706EF12A26D159F9001C950E /* libate.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = libate.tbd; path = usr/lib/libate.tbd; sourceTree = SDKROOT; };
-		706EF1E126D166C5001C950E /* libkram-ios.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = "libkram-ios.a"; sourceTree = BUILT_PRODUCTS_DIR; };
 		706EFC4126D3473F001C950E /* eaunits.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = eaunits.h; sourceTree = "<group>"; };
 		706EFC4226D3473F001C950E /* version.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = version.h; sourceTree = "<group>"; };
 		706EFC4426D3473F001C950E /* eacompilertraits.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = eacompilertraits.h; sourceTree = "<group>"; };
@@ -939,7 +546,6 @@
 		70D222EA2ADAF25E00B9EA23 /* simdjson.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = simdjson.cpp; sourceTree = "<group>"; };
 		70D222F42ADAF78300B9EA23 /* dlmalloc.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = dlmalloc.cpp; sourceTree = "<group>"; };
 		70D222F72ADAFA1500B9EA23 /* dlmalloc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = dlmalloc.h; sourceTree = "<group>"; };
-		70E3BBED2CBCE88300F11926 /* libkram-vos.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = "libkram-vos.a"; sourceTree = BUILT_PRODUCTS_DIR; };
 /* End PBXFileReference section */
 
 /* Begin PBXFrameworksBuildPhase section */
@@ -951,22 +557,6 @@
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
-		706EF1DC26D166C5001C950E /* Frameworks */ = {
-			isa = PBXFrameworksBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-				706EF1DD26D166C5001C950E /* libate.tbd in Frameworks */,
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
-		70E3BBE82CBCE88300F11926 /* Frameworks */ = {
-			isa = PBXFrameworksBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-				70E3BBE92CBCE88300F11926 /* libate.tbd in Frameworks */,
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
 /* End PBXFrameworksBuildPhase section */
 
 /* Begin PBXGroup section */
@@ -984,8 +574,6 @@
 			isa = PBXGroup;
 			children = (
 				706ECDDE26D1577A001C950E /* libkram.a */,
-				706EF1E126D166C5001C950E /* libkram-ios.a */,
-				70E3BBED2CBCE88300F11926 /* libkram-vos.a */,
 			);
 			name = Products;
 			sourceTree = "<group>";
@@ -1673,226 +1261,6 @@
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
-		706EF14A26D166C5001C950E /* Headers */ = {
-			isa = PBXHeadersBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-				706EF14B26D166C5001C950E /* EtcErrorMetric.h in Headers */,
-				706EF14C26D166C5001C950E /* EtcColor.h in Headers */,
-				709B8D3E28D7BCAD0081BD1F /* chrono.h in Headers */,
-				706EF14D26D166C5001C950E /* EtcDifferentialTrys.h in Headers */,
-				706EF14E26D166C5001C950E /* EtcBlock4x4Encoding_RGB8.h in Headers */,
-				706EF14F26D166C5001C950E /* EtcConfig.h in Headers */,
-				70871DCA27DDDBCD00D0B9E1 /* astcenc_vecmathlib_common_4.h in Headers */,
-				706EF15026D166C5001C950E /* EtcBlock4x4Encoding_R11.h in Headers */,
-				706EF15126D166C5001C950E /* EtcBlock4x4Encoding_RG11.h in Headers */,
-				706EF15226D166C5001C950E /* EtcMath.h in Headers */,
-				706EF15326D166C5001C950E /* EtcIndividualTrys.h in Headers */,
-				706EF15426D166C5001C950E /* EtcBlock4x4EncodingBits.h in Headers */,
-				706EF15526D166C5001C950E /* EtcBlock4x4Encoding_RGB8A1.h in Headers */,
-				706EF15626D166C5001C950E /* EtcBlock4x4.h in Headers */,
-				707789E82881BA81008A51BC /* rgbcx.h in Headers */,
-				706EF15726D166C5001C950E /* Etc.h in Headers */,
-				707789D82881BA81008A51BC /* bc7enc.h in Headers */,
-				706EF15826D166C5001C950E /* EtcImage.h in Headers */,
-				709B8D4C28D7BCAD0081BD1F /* std.h in Headers */,
-				70CDB65127A1382700A546C1 /* KramDDSHelper.h in Headers */,
-				709B8D4428D7BCAD0081BD1F /* args.h in Headers */,
-				708A6A9D2708CE4700BA5410 /* bc6h_encode.h in Headers */,
-				706EF15926D166C5001C950E /* EtcBlock4x4Encoding_ETC1.h in Headers */,
-				706EF15A26D166C5001C950E /* EtcBlock4x4Encoding_RGBA8.h in Headers */,
-				706EF15B26D166C5001C950E /* EtcColorFloatRGBA.h in Headers */,
-				706EF15C26D166C5001C950E /* EtcBlock4x4Encoding.h in Headers */,
-				706EF16C26D166C5001C950E /* ateencoder.h in Headers */,
-				706EF16D26D166C5001C950E /* basisu_transcoder.h in Headers */,
-				70A7BD3327092A1200DBCCF7 /* hdr_encode.h in Headers */,
-				709B8D4828D7BCAD0081BD1F /* compile.h in Headers */,
-				708A6AA12708CE4700BA5410 /* bc6h_definitions.h in Headers */,
-				706EF16E26D166C5001C950E /* basisu_containers.h in Headers */,
-				70871DD627DDDBCD00D0B9E1 /* astcenc.h in Headers */,
-				709B8D4628D7BCAD0081BD1F /* printf.h in Headers */,
-				706EF16F26D166C5001C950E /* basisu_containers_impl.h in Headers */,
-				707789EC2881BA81008A51BC /* utils.h in Headers */,
-				706EF17026D166C5001C950E /* basisu_transcoder_internal.h in Headers */,
-				70871DFA27DDDBCD00D0B9E1 /* astcenc_vecmathlib_avx2_8.h in Headers */,
-				70871DFC27DDDBCD00D0B9E1 /* astcenc_vecmathlib_none_4.h in Headers */,
-				706EF17126D166C5001C950E /* basisu_global_selector_cb.h in Headers */,
-				706EF17226D166C5001C950E /* basisu_transcoder_uastc.h in Headers */,
-				706EF17326D166C5001C950E /* basisu_global_selector_palette.h in Headers */,
-				707789E42881BA81008A51BC /* rgbcx_table4_small.h in Headers */,
-				706EF17426D166C5001C950E /* basisu.h in Headers */,
-				706EF17526D166C5001C950E /* basisu_file_headers.h in Headers */,
-				706EF17626D166C5001C950E /* miniz.h in Headers */,
-				706EF17726D166C5001C950E /* hedistance.h in Headers */,
-				706EF17826D166C5001C950E /* stb_rect_pack.h in Headers */,
-				706EF17926D166C5001C950E /* KramZipHelper.h in Headers */,
-				706EF17A26D166C5001C950E /* KramSDFMipper.h in Headers */,
-				70D222E72AD22BED00B9EA23 /* BlockedLinearAllocator.h in Headers */,
-				70B563AA2C857B360089A64F /* KramZipStream.h in Headers */,
-				70871DF227DDDBCD00D0B9E1 /* astcenc_mathlib.h in Headers */,
-				709B8D3228D7BCAD0081BD1F /* ranges.h in Headers */,
-				706EF17C26D166C5001C950E /* KramConfig.h in Headers */,
-				706EF17D26D166C5001C950E /* KramLog.h in Headers */,
-				706EF17E26D166C5001C950E /* KramLib.h in Headers */,
-				706EF17F26D166C5001C950E /* KramVersion.h in Headers */,
-				706EF18026D166C5001C950E /* KramImage.h in Headers */,
-				706EF18126D166C5001C950E /* win_mmap.h in Headers */,
-				70871DDE27DDDBCD00D0B9E1 /* astcenc_vecmathlib_sse_4.h in Headers */,
-				709B8D5028D7C15F0081BD1F /* KramFmt.h in Headers */,
-				707789DA2881BA81008A51BC /* bc7decomp.h in Headers */,
-				706EF18226D166C5001C950E /* Kram.h in Headers */,
-				70D222E12AD2132300B9EA23 /* ImmutableString.h in Headers */,
-				70D222EC2ADAF25E00B9EA23 /* simdjson.h in Headers */,
-				70871DEE27DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.h in Headers */,
-				707789DC2881BA81008A51BC /* ert.h in Headers */,
-				706EF18326D166C5001C950E /* KTXImage.h in Headers */,
-				706EF18426D166C5001C950E /* KramImageInfo.h in Headers */,
-				707789E02881BA81008A51BC /* rgbcx_table4.h in Headers */,
-				70871DF827DDDBCD00D0B9E1 /* astcenc_vecmathlib_neon_4.h in Headers */,
-				706EF18526D166C5001C950E /* KramTimer.h in Headers */,
-				706EF18626D166C5001C950E /* KramMmapHelper.h in Headers */,
-				706EF18826D166C5001C950E /* KramFileHelper.h in Headers */,
-				709B8D4028D7BCAD0081BD1F /* os.h in Headers */,
-				706EF18926D166C5001C950E /* KramMipper.h in Headers */,
-				706EF18A26D166C5001C950E /* TaskSystem.h in Headers */,
-				706EF18B26D166C5001C950E /* squish.h in Headers */,
-				706EF18C26D166C5001C950E /* clusterfit.h in Headers */,
-				709B8D3628D7BCAD0081BD1F /* core.h in Headers */,
-				706EF18D26D166C5001C950E /* colourfit.h in Headers */,
-				70871DFE27DDDBCD00D0B9E1 /* astcenc_vecmathlib.h in Headers */,
-				706EF18E26D166C5001C950E /* alpha.h in Headers */,
-				709B8D4228D7BCAD0081BD1F /* color.h in Headers */,
-				708A6A992708CE4700BA5410 /* bc6h_decode.h in Headers */,
-				706EF18F26D166C5001C950E /* singlecolourfit.h in Headers */,
-				706EF19026D166C5001C950E /* maths.h in Headers */,
-				707789F42881BCE2008A51BC /* rdo_bc_encoder.h in Headers */,
-				70D222D92AC800AC00B9EA23 /* json11.h in Headers */,
-				706EF19126D166C5001C950E /* colourset.h in Headers */,
-				708A6AA52708CE4700BA5410 /* bc6h_utils.h in Headers */,
-				706EF19226D166C5001C950E /* colourblock.h in Headers */,
-				706EF19326D166C5001C950E /* rangefit.h in Headers */,
-				706EF19426D166C5001C950E /* zstd.h in Headers */,
-				70871DF427DDDBCD00D0B9E1 /* astcenc_internal.h in Headers */,
-				709B8D3028D7BCAD0081BD1F /* format-inl.h in Headers */,
-				709B8D2E28D7BCAD0081BD1F /* ostream.h in Headers */,
-				706EF19526D166C5001C950E /* lodepng.h in Headers */,
-				709B8D4A28D7BCAD0081BD1F /* format.h in Headers */,
-				70D222F92ADAFA1500B9EA23 /* dlmalloc.h in Headers */,
-				706EF19626D166C5001C950E /* tmpfileplus.h in Headers */,
-				709B8D3428D7BCAD0081BD1F /* xchar.h in Headers */,
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
-		70E3BB232CBCE88300F11926 /* Headers */ = {
-			isa = PBXHeadersBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-				70E3BB242CBCE88300F11926 /* EtcErrorMetric.h in Headers */,
-				70E3BB252CBCE88300F11926 /* EtcColor.h in Headers */,
-				70E3BB262CBCE88300F11926 /* chrono.h in Headers */,
-				70E3BB272CBCE88300F11926 /* EtcDifferentialTrys.h in Headers */,
-				70E3BB282CBCE88300F11926 /* EtcBlock4x4Encoding_RGB8.h in Headers */,
-				70E3BB292CBCE88300F11926 /* EtcConfig.h in Headers */,
-				70E3BB2A2CBCE88300F11926 /* astcenc_vecmathlib_common_4.h in Headers */,
-				70E3BB2B2CBCE88300F11926 /* EtcBlock4x4Encoding_R11.h in Headers */,
-				70E3BB2C2CBCE88300F11926 /* EtcBlock4x4Encoding_RG11.h in Headers */,
-				70E3BB2D2CBCE88300F11926 /* EtcMath.h in Headers */,
-				70E3BB2E2CBCE88300F11926 /* EtcIndividualTrys.h in Headers */,
-				70E3BB2F2CBCE88300F11926 /* EtcBlock4x4EncodingBits.h in Headers */,
-				70E3BB302CBCE88300F11926 /* EtcBlock4x4Encoding_RGB8A1.h in Headers */,
-				70E3BB312CBCE88300F11926 /* EtcBlock4x4.h in Headers */,
-				70E3BB322CBCE88300F11926 /* rgbcx.h in Headers */,
-				70E3BB332CBCE88300F11926 /* Etc.h in Headers */,
-				70E3BB342CBCE88300F11926 /* bc7enc.h in Headers */,
-				70E3BB352CBCE88300F11926 /* EtcImage.h in Headers */,
-				70E3BB362CBCE88300F11926 /* std.h in Headers */,
-				70E3BB372CBCE88300F11926 /* KramDDSHelper.h in Headers */,
-				70E3BB382CBCE88300F11926 /* args.h in Headers */,
-				70E3BB392CBCE88300F11926 /* bc6h_encode.h in Headers */,
-				70E3BB3A2CBCE88300F11926 /* EtcBlock4x4Encoding_ETC1.h in Headers */,
-				70E3BB3B2CBCE88300F11926 /* EtcBlock4x4Encoding_RGBA8.h in Headers */,
-				70E3BB3C2CBCE88300F11926 /* EtcColorFloatRGBA.h in Headers */,
-				70E3BB3D2CBCE88300F11926 /* EtcBlock4x4Encoding.h in Headers */,
-				70E3BB3E2CBCE88300F11926 /* ateencoder.h in Headers */,
-				70E3BB3F2CBCE88300F11926 /* basisu_transcoder.h in Headers */,
-				70E3BB402CBCE88300F11926 /* hdr_encode.h in Headers */,
-				70E3BB412CBCE88300F11926 /* compile.h in Headers */,
-				70E3BB422CBCE88300F11926 /* bc6h_definitions.h in Headers */,
-				70E3BB432CBCE88300F11926 /* basisu_containers.h in Headers */,
-				70E3BB442CBCE88300F11926 /* astcenc.h in Headers */,
-				70E3BB452CBCE88300F11926 /* printf.h in Headers */,
-				70E3BB462CBCE88300F11926 /* basisu_containers_impl.h in Headers */,
-				70E3BB472CBCE88300F11926 /* utils.h in Headers */,
-				70E3BB482CBCE88300F11926 /* basisu_transcoder_internal.h in Headers */,
-				70E3BB492CBCE88300F11926 /* astcenc_vecmathlib_avx2_8.h in Headers */,
-				70E3BB4A2CBCE88300F11926 /* astcenc_vecmathlib_none_4.h in Headers */,
-				70E3BB4B2CBCE88300F11926 /* basisu_global_selector_cb.h in Headers */,
-				70E3BB4C2CBCE88300F11926 /* basisu_transcoder_uastc.h in Headers */,
-				70E3BB4D2CBCE88300F11926 /* basisu_global_selector_palette.h in Headers */,
-				70E3BB4E2CBCE88300F11926 /* rgbcx_table4_small.h in Headers */,
-				70E3BB4F2CBCE88300F11926 /* basisu.h in Headers */,
-				70E3BB502CBCE88300F11926 /* basisu_file_headers.h in Headers */,
-				70E3BB512CBCE88300F11926 /* miniz.h in Headers */,
-				70E3BB522CBCE88300F11926 /* hedistance.h in Headers */,
-				70E3BB532CBCE88300F11926 /* stb_rect_pack.h in Headers */,
-				70E3BB542CBCE88300F11926 /* KramZipHelper.h in Headers */,
-				70E3BB552CBCE88300F11926 /* KramSDFMipper.h in Headers */,
-				70E3BB562CBCE88300F11926 /* BlockedLinearAllocator.h in Headers */,
-				70E3BB572CBCE88300F11926 /* KramZipStream.h in Headers */,
-				70E3BB582CBCE88300F11926 /* astcenc_mathlib.h in Headers */,
-				70E3BB592CBCE88300F11926 /* ranges.h in Headers */,
-				70E3BB5A2CBCE88300F11926 /* KramConfig.h in Headers */,
-				70E3BB5B2CBCE88300F11926 /* KramLog.h in Headers */,
-				70E3BB5C2CBCE88300F11926 /* KramLib.h in Headers */,
-				70E3BB5D2CBCE88300F11926 /* KramVersion.h in Headers */,
-				70E3BB5E2CBCE88300F11926 /* KramImage.h in Headers */,
-				70E3BB5F2CBCE88300F11926 /* win_mmap.h in Headers */,
-				70E3BB602CBCE88300F11926 /* astcenc_vecmathlib_sse_4.h in Headers */,
-				70E3BB612CBCE88300F11926 /* KramFmt.h in Headers */,
-				70E3BB622CBCE88300F11926 /* bc7decomp.h in Headers */,
-				70E3BB632CBCE88300F11926 /* Kram.h in Headers */,
-				70E3BB642CBCE88300F11926 /* ImmutableString.h in Headers */,
-				70E3BB652CBCE88300F11926 /* simdjson.h in Headers */,
-				70E3BB662CBCE88300F11926 /* astcenc_diagnostic_trace.h in Headers */,
-				70E3BB672CBCE88300F11926 /* ert.h in Headers */,
-				70E3BB682CBCE88300F11926 /* KTXImage.h in Headers */,
-				70E3BB692CBCE88300F11926 /* KramImageInfo.h in Headers */,
-				70E3BB6A2CBCE88300F11926 /* rgbcx_table4.h in Headers */,
-				70E3BB6B2CBCE88300F11926 /* astcenc_vecmathlib_neon_4.h in Headers */,
-				70E3BB6C2CBCE88300F11926 /* KramTimer.h in Headers */,
-				70E3BB6D2CBCE88300F11926 /* KramMmapHelper.h in Headers */,
-				70E3BB6E2CBCE88300F11926 /* KramFileHelper.h in Headers */,
-				70E3BB6F2CBCE88300F11926 /* os.h in Headers */,
-				70E3BB702CBCE88300F11926 /* KramMipper.h in Headers */,
-				70E3BB712CBCE88300F11926 /* TaskSystem.h in Headers */,
-				70E3BB722CBCE88300F11926 /* squish.h in Headers */,
-				70E3BB732CBCE88300F11926 /* clusterfit.h in Headers */,
-				70E3BB742CBCE88300F11926 /* core.h in Headers */,
-				70E3BB752CBCE88300F11926 /* colourfit.h in Headers */,
-				70E3BB762CBCE88300F11926 /* astcenc_vecmathlib.h in Headers */,
-				70E3BB772CBCE88300F11926 /* alpha.h in Headers */,
-				70E3BB782CBCE88300F11926 /* color.h in Headers */,
-				70E3BB792CBCE88300F11926 /* bc6h_decode.h in Headers */,
-				70E3BB7A2CBCE88300F11926 /* singlecolourfit.h in Headers */,
-				70E3BB7B2CBCE88300F11926 /* maths.h in Headers */,
-				70E3BB7C2CBCE88300F11926 /* rdo_bc_encoder.h in Headers */,
-				70E3BB7D2CBCE88300F11926 /* json11.h in Headers */,
-				70E3BB7E2CBCE88300F11926 /* colourset.h in Headers */,
-				70E3BB7F2CBCE88300F11926 /* bc6h_utils.h in Headers */,
-				70E3BB802CBCE88300F11926 /* colourblock.h in Headers */,
-				70E3BB812CBCE88300F11926 /* rangefit.h in Headers */,
-				70E3BB822CBCE88300F11926 /* zstd.h in Headers */,
-				70E3BB832CBCE88300F11926 /* astcenc_internal.h in Headers */,
-				70E3BB842CBCE88300F11926 /* format-inl.h in Headers */,
-				70E3BB852CBCE88300F11926 /* ostream.h in Headers */,
-				70E3BB862CBCE88300F11926 /* lodepng.h in Headers */,
-				70E3BB872CBCE88300F11926 /* format.h in Headers */,
-				70E3BB882CBCE88300F11926 /* dlmalloc.h in Headers */,
-				70E3BB892CBCE88300F11926 /* tmpfileplus.h in Headers */,
-				70E3BB8A2CBCE88300F11926 /* xchar.h in Headers */,
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
 /* End PBXHeadersBuildPhase section */
 
 /* Begin PBXNativeTarget section */
@@ -1913,40 +1281,6 @@
 			productReference = 706ECDDE26D1577A001C950E /* libkram.a */;
 			productType = "com.apple.product-type.library.static";
 		};
-		706EF14926D166C5001C950E /* kram-ios */ = {
-			isa = PBXNativeTarget;
-			buildConfigurationList = 706EF1DE26D166C5001C950E /* Build configuration list for PBXNativeTarget "kram-ios" */;
-			buildPhases = (
-				706EF14A26D166C5001C950E /* Headers */,
-				706EF19726D166C5001C950E /* Sources */,
-				706EF1DC26D166C5001C950E /* Frameworks */,
-			);
-			buildRules = (
-			);
-			dependencies = (
-			);
-			name = "kram-ios";
-			productName = kram;
-			productReference = 706EF1E126D166C5001C950E /* libkram-ios.a */;
-			productType = "com.apple.product-type.library.static";
-		};
-		70E3BB222CBCE88300F11926 /* kram-vos */ = {
-			isa = PBXNativeTarget;
-			buildConfigurationList = 70E3BBEA2CBCE88300F11926 /* Build configuration list for PBXNativeTarget "kram-vos" */;
-			buildPhases = (
-				70E3BB232CBCE88300F11926 /* Headers */,
-				70E3BB8B2CBCE88300F11926 /* Sources */,
-				70E3BBE82CBCE88300F11926 /* Frameworks */,
-			);
-			buildRules = (
-			);
-			dependencies = (
-			);
-			name = "kram-vos";
-			productName = kram;
-			productReference = 70E3BBED2CBCE88300F11926 /* libkram-vos.a */;
-			productType = "com.apple.product-type.library.static";
-		};
 /* End PBXNativeTarget section */
 
 /* Begin PBXProject section */
@@ -1976,8 +1310,6 @@
 			projectRoot = "";
 			targets = (
 				706ECDDD26D1577A001C950E /* kram */,
-				706EF14926D166C5001C950E /* kram-ios */,
-				70E3BB222CBCE88300F11926 /* kram-vos */,
 			);
 		};
 /* End PBXProject section */
@@ -2081,204 +1413,6 @@
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
-		706EF19726D166C5001C950E /* Sources */ = {
-			isa = PBXSourcesBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-				70871DD827DDDBCD00D0B9E1 /* astcenc_quantization.cpp in Sources */,
-				707789E62881BA81008A51BC /* ert.cpp in Sources */,
-				70871E0427DDDBCD00D0B9E1 /* astcenc_color_unquantize.cpp in Sources */,
-				70871DD227DDDBCD00D0B9E1 /* astcenc_averages_and_directions.cpp in Sources */,
-				70871DE027DDDBCD00D0B9E1 /* astcenc_mathlib_softfloat.cpp in Sources */,
-				709B8D3828D7BCAD0081BD1F /* os.cpp in Sources */,
-				70D222EE2ADAF25E00B9EA23 /* simdjson.cpp in Sources */,
-				706EFC2426D1C39B001C950E /* ateencoder.mm in Sources */,
-				707789EE2881BA81008A51BC /* bc7decomp_ref.cpp in Sources */,
-				706EF19826D166C5001C950E /* EtcBlock4x4Encoding_RGB8.cpp in Sources */,
-				70D222DB2AC800AC00B9EA23 /* json11.cpp in Sources */,
-				70871DCE27DDDBCD00D0B9E1 /* astcenc_find_best_partitioning.cpp in Sources */,
-				70CDB65327A1382700A546C1 /* KramDDSHelper.cpp in Sources */,
-				706EF19926D166C5001C950E /* EtcImage.cpp in Sources */,
-				70871DEC27DDDBCD00D0B9E1 /* astcenc_block_sizes.cpp in Sources */,
-				706EF19A26D166C5001C950E /* EtcDifferentialTrys.cpp in Sources */,
-				706EF19B26D166C5001C950E /* EtcMath.cpp in Sources */,
-				706EF19C26D166C5001C950E /* EtcBlock4x4Encoding_RGBA8.cpp in Sources */,
-				706EF19D26D166C5001C950E /* EtcBlock4x4Encoding_RG11.cpp in Sources */,
-				706EF19E26D166C5001C950E /* EtcBlock4x4Encoding_RGB8A1.cpp in Sources */,
-				706EF19F26D166C5001C950E /* EtcIndividualTrys.cpp in Sources */,
-				707789DE2881BA81008A51BC /* rgbcx.cpp in Sources */,
-				706EF1A026D166C5001C950E /* EtcBlock4x4Encoding_R11.cpp in Sources */,
-				707789F22881BCE2008A51BC /* rdo_bc_encoder.cpp in Sources */,
-				70871DF627DDDBCD00D0B9E1 /* astcenc_color_quantize.cpp in Sources */,
-				706EF1A126D166C5001C950E /* EtcBlock4x4Encoding_ETC1.cpp in Sources */,
-				706EF1A226D166C5001C950E /* EtcBlock4x4Encoding.cpp in Sources */,
-				706EF1A326D166C5001C950E /* EtcBlock4x4.cpp in Sources */,
-				70871DDC27DDDBCD00D0B9E1 /* astcenc_percentile_tables.cpp in Sources */,
-				70871DE227DDDBCD00D0B9E1 /* astcenc_mathlib.cpp in Sources */,
-				708A6A9B2708CE4700BA5410 /* bc6h_encode.cpp in Sources */,
-				70A7BD3127092A1200DBCCF7 /* hdr_encode.cpp in Sources */,
-				706EFF7826D34740001C950E /* string.cpp in Sources */,
-				708A6A972708CE4700BA5410 /* bc6h_decode.cpp in Sources */,
-				706EFF7626D34740001C950E /* assert.cpp in Sources */,
-				706EFF8626D34740001C950E /* fixed_pool.cpp in Sources */,
-				706EF1BF26D166C5001C950E /* basisu_transcoder.cpp in Sources */,
-				706EFF8426D34740001C950E /* red_black_tree.cpp in Sources */,
-				70871DE427DDDBCD00D0B9E1 /* astcenc_decompress_symbolic.cpp in Sources */,
-				70871E0827DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.cpp in Sources */,
-				70871E0627DDDBCD00D0B9E1 /* astcenc_platform_isa_detection.cpp in Sources */,
-				707789D62881BA81008A51BC /* bc7enc.cpp in Sources */,
-				70D222E52AD22BED00B9EA23 /* BlockedLinearAllocator.cpp in Sources */,
-				706EFF8026D34740001C950E /* intrusive_list.cpp in Sources */,
-				707789EA2881BA81008A51BC /* bc7decomp.cpp in Sources */,
-				706EF1C026D166C5001C950E /* miniz.cpp in Sources */,
-				70871DE627DDDBCD00D0B9E1 /* astcenc_compress_symbolic.cpp in Sources */,
-				706EF1C126D166C5001C950E /* hedistance.cpp in Sources */,
-				706EF1C226D166C5001C950E /* KramTimer.cpp in Sources */,
-				70871DE827DDDBCD00D0B9E1 /* astcenc_entry.cpp in Sources */,
-				706EF1C326D166C5001C950E /* KTXImage.cpp in Sources */,
-				706EF1C426D166C5001C950E /* KramMipper.cpp in Sources */,
-				706EF1C526D166C5001C950E /* KramZipHelper.cpp in Sources */,
-				706EF1C626D166C5001C950E /* TaskSystem.cpp in Sources */,
-				706EF1C726D166C5001C950E /* KramFileHelper.cpp in Sources */,
-				706EFF7C26D34740001C950E /* numeric_limits.cpp in Sources */,
-				706EF1C826D166C5001C950E /* KramImageInfo.cpp in Sources */,
-				70871DEA27DDDBCD00D0B9E1 /* astcenc_integer_sequence.cpp in Sources */,
-				706EF1C926D166C5001C950E /* KramImage.cpp in Sources */,
-				706EF1CA26D166C5001C950E /* KramLog.cpp in Sources */,
-				706EF1CB26D166C5001C950E /* KramSDFMipper.cpp in Sources */,
-				706EF1CC26D166C5001C950E /* KramMmapHelper.cpp in Sources */,
-				70D222DF2AD2132300B9EA23 /* ImmutableString.cpp in Sources */,
-				70871DCC27DDDBCD00D0B9E1 /* astcenc_image.cpp in Sources */,
-				706EFF7426D34740001C950E /* thread_support.cpp in Sources */,
-				706EF1CE26D166C5001C950E /* Kram.cpp in Sources */,
-				706EF1CF26D166C5001C950E /* squish.cpp in Sources */,
-				706EF1D026D166C5001C950E /* colourset.cpp in Sources */,
-				70871DD427DDDBCD00D0B9E1 /* astcenc_partition_tables.cpp in Sources */,
-				706EFF8226D34740001C950E /* hashtable.cpp in Sources */,
-				70871DF027DDDBCD00D0B9E1 /* astcenc_weight_align.cpp in Sources */,
-				70871DDA27DDDBCD00D0B9E1 /* astcenc_compute_variance.cpp in Sources */,
-				706EF1D126D166C5001C950E /* clusterfit.cpp in Sources */,
-				706EF1D226D166C5001C950E /* rangefit.cpp in Sources */,
-				706EF1D326D166C5001C950E /* alpha.cpp in Sources */,
-				706EF1D426D166C5001C950E /* colourblock.cpp in Sources */,
-				706EF1D526D166C5001C950E /* colourfit.cpp in Sources */,
-				70871E0027DDDBCD00D0B9E1 /* astcenc_pick_best_endpoint_format.cpp in Sources */,
-				70871E0A27DDDBCD00D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp in Sources */,
-				70871DD027DDDBCD00D0B9E1 /* astcenc_symbolic_physical.cpp in Sources */,
-				70B563A82C857B360089A64F /* KramZipStream.cpp in Sources */,
-				706EFF7A26D34740001C950E /* allocator_eastl.cpp in Sources */,
-				706EF1D626D166C5001C950E /* maths.cpp in Sources */,
-				706EF1D726D166C5001C950E /* singlecolourfit.cpp in Sources */,
-				706EF1D826D166C5001C950E /* zstd.cpp in Sources */,
-				70D222F62ADAF78300B9EA23 /* dlmalloc.cpp in Sources */,
-				706EF1D926D166C5001C950E /* zstddeclib.cpp in Sources */,
-				706EF1DA26D166C5001C950E /* lodepng.cpp in Sources */,
-				707789E22881BA81008A51BC /* utils.cpp in Sources */,
-				706EF1DB26D166C5001C950E /* tmpfileplus.cpp in Sources */,
-				709B8D3A28D7BCAD0081BD1F /* format.cpp in Sources */,
-				70871E0227DDDBCD00D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp in Sources */,
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
-		70E3BB8B2CBCE88300F11926 /* Sources */ = {
-			isa = PBXSourcesBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-				70E3BB8C2CBCE88300F11926 /* astcenc_quantization.cpp in Sources */,
-				70E3BB8D2CBCE88300F11926 /* ert.cpp in Sources */,
-				70E3BB8E2CBCE88300F11926 /* astcenc_color_unquantize.cpp in Sources */,
-				70E3BB8F2CBCE88300F11926 /* astcenc_averages_and_directions.cpp in Sources */,
-				70E3BB902CBCE88300F11926 /* astcenc_mathlib_softfloat.cpp in Sources */,
-				70E3BB912CBCE88300F11926 /* os.cpp in Sources */,
-				70E3BB922CBCE88300F11926 /* simdjson.cpp in Sources */,
-				70E3BB932CBCE88300F11926 /* ateencoder.mm in Sources */,
-				70E3BB942CBCE88300F11926 /* bc7decomp_ref.cpp in Sources */,
-				70E3BB952CBCE88300F11926 /* EtcBlock4x4Encoding_RGB8.cpp in Sources */,
-				70E3BB962CBCE88300F11926 /* json11.cpp in Sources */,
-				70E3BB972CBCE88300F11926 /* astcenc_find_best_partitioning.cpp in Sources */,
-				70E3BB982CBCE88300F11926 /* KramDDSHelper.cpp in Sources */,
-				70E3BB992CBCE88300F11926 /* EtcImage.cpp in Sources */,
-				70E3BB9A2CBCE88300F11926 /* astcenc_block_sizes.cpp in Sources */,
-				70E3BB9B2CBCE88300F11926 /* EtcDifferentialTrys.cpp in Sources */,
-				70E3BB9C2CBCE88300F11926 /* EtcMath.cpp in Sources */,
-				70E3BB9D2CBCE88300F11926 /* EtcBlock4x4Encoding_RGBA8.cpp in Sources */,
-				70E3BB9E2CBCE88300F11926 /* EtcBlock4x4Encoding_RG11.cpp in Sources */,
-				70E3BB9F2CBCE88300F11926 /* EtcBlock4x4Encoding_RGB8A1.cpp in Sources */,
-				70E3BBA02CBCE88300F11926 /* EtcIndividualTrys.cpp in Sources */,
-				70E3BBA12CBCE88300F11926 /* rgbcx.cpp in Sources */,
-				70E3BBA22CBCE88300F11926 /* EtcBlock4x4Encoding_R11.cpp in Sources */,
-				70E3BBA32CBCE88300F11926 /* rdo_bc_encoder.cpp in Sources */,
-				70E3BBA42CBCE88300F11926 /* astcenc_color_quantize.cpp in Sources */,
-				70E3BBA52CBCE88300F11926 /* EtcBlock4x4Encoding_ETC1.cpp in Sources */,
-				70E3BBA62CBCE88300F11926 /* EtcBlock4x4Encoding.cpp in Sources */,
-				70E3BBA72CBCE88300F11926 /* EtcBlock4x4.cpp in Sources */,
-				70E3BBA82CBCE88300F11926 /* astcenc_percentile_tables.cpp in Sources */,
-				70E3BBA92CBCE88300F11926 /* astcenc_mathlib.cpp in Sources */,
-				70E3BBAA2CBCE88300F11926 /* bc6h_encode.cpp in Sources */,
-				70E3BBAB2CBCE88300F11926 /* hdr_encode.cpp in Sources */,
-				70E3BBAC2CBCE88300F11926 /* string.cpp in Sources */,
-				70E3BBAD2CBCE88300F11926 /* bc6h_decode.cpp in Sources */,
-				70E3BBAE2CBCE88300F11926 /* assert.cpp in Sources */,
-				70E3BBAF2CBCE88300F11926 /* fixed_pool.cpp in Sources */,
-				70E3BBB02CBCE88300F11926 /* basisu_transcoder.cpp in Sources */,
-				70E3BBB12CBCE88300F11926 /* red_black_tree.cpp in Sources */,
-				70E3BBB22CBCE88300F11926 /* astcenc_decompress_symbolic.cpp in Sources */,
-				70E3BBB32CBCE88300F11926 /* astcenc_diagnostic_trace.cpp in Sources */,
-				70E3BBB42CBCE88300F11926 /* astcenc_platform_isa_detection.cpp in Sources */,
-				70E3BBB52CBCE88300F11926 /* bc7enc.cpp in Sources */,
-				70E3BBB62CBCE88300F11926 /* BlockedLinearAllocator.cpp in Sources */,
-				70E3BBB72CBCE88300F11926 /* intrusive_list.cpp in Sources */,
-				70E3BBB82CBCE88300F11926 /* bc7decomp.cpp in Sources */,
-				70E3BBB92CBCE88300F11926 /* miniz.cpp in Sources */,
-				70E3BBBA2CBCE88300F11926 /* astcenc_compress_symbolic.cpp in Sources */,
-				70E3BBBB2CBCE88300F11926 /* hedistance.cpp in Sources */,
-				70E3BBBC2CBCE88300F11926 /* KramTimer.cpp in Sources */,
-				70E3BBBD2CBCE88300F11926 /* astcenc_entry.cpp in Sources */,
-				70E3BBBE2CBCE88300F11926 /* KTXImage.cpp in Sources */,
-				70E3BBBF2CBCE88300F11926 /* KramMipper.cpp in Sources */,
-				70E3BBC02CBCE88300F11926 /* KramZipHelper.cpp in Sources */,
-				70E3BBC12CBCE88300F11926 /* TaskSystem.cpp in Sources */,
-				70E3BBC22CBCE88300F11926 /* KramFileHelper.cpp in Sources */,
-				70E3BBC32CBCE88300F11926 /* numeric_limits.cpp in Sources */,
-				70E3BBC42CBCE88300F11926 /* KramImageInfo.cpp in Sources */,
-				70E3BBC52CBCE88300F11926 /* astcenc_integer_sequence.cpp in Sources */,
-				70E3BBC62CBCE88300F11926 /* KramImage.cpp in Sources */,
-				70E3BBC72CBCE88300F11926 /* KramLog.cpp in Sources */,
-				70E3BBC82CBCE88300F11926 /* KramSDFMipper.cpp in Sources */,
-				70E3BBC92CBCE88300F11926 /* KramMmapHelper.cpp in Sources */,
-				70E3BBCA2CBCE88300F11926 /* ImmutableString.cpp in Sources */,
-				70E3BBCB2CBCE88300F11926 /* astcenc_image.cpp in Sources */,
-				70E3BBCC2CBCE88300F11926 /* thread_support.cpp in Sources */,
-				70E3BBCD2CBCE88300F11926 /* Kram.cpp in Sources */,
-				70E3BBCE2CBCE88300F11926 /* squish.cpp in Sources */,
-				70E3BBCF2CBCE88300F11926 /* colourset.cpp in Sources */,
-				70E3BBD02CBCE88300F11926 /* astcenc_partition_tables.cpp in Sources */,
-				70E3BBD12CBCE88300F11926 /* hashtable.cpp in Sources */,
-				70E3BBD22CBCE88300F11926 /* astcenc_weight_align.cpp in Sources */,
-				70E3BBD32CBCE88300F11926 /* astcenc_compute_variance.cpp in Sources */,
-				70E3BBD42CBCE88300F11926 /* clusterfit.cpp in Sources */,
-				70E3BBD52CBCE88300F11926 /* rangefit.cpp in Sources */,
-				70E3BBD62CBCE88300F11926 /* alpha.cpp in Sources */,
-				70E3BBD72CBCE88300F11926 /* colourblock.cpp in Sources */,
-				70E3BBD82CBCE88300F11926 /* colourfit.cpp in Sources */,
-				70E3BBD92CBCE88300F11926 /* astcenc_pick_best_endpoint_format.cpp in Sources */,
-				70E3BBDA2CBCE88300F11926 /* astcenc_ideal_endpoints_and_weights.cpp in Sources */,
-				70E3BBDB2CBCE88300F11926 /* astcenc_symbolic_physical.cpp in Sources */,
-				70E3BBDC2CBCE88300F11926 /* KramZipStream.cpp in Sources */,
-				70E3BBDD2CBCE88300F11926 /* allocator_eastl.cpp in Sources */,
-				70E3BBDE2CBCE88300F11926 /* maths.cpp in Sources */,
-				70E3BBDF2CBCE88300F11926 /* singlecolourfit.cpp in Sources */,
-				70E3BBE02CBCE88300F11926 /* zstd.cpp in Sources */,
-				70E3BBE12CBCE88300F11926 /* dlmalloc.cpp in Sources */,
-				70E3BBE22CBCE88300F11926 /* zstddeclib.cpp in Sources */,
-				70E3BBE32CBCE88300F11926 /* lodepng.cpp in Sources */,
-				70E3BBE42CBCE88300F11926 /* utils.cpp in Sources */,
-				70E3BBE52CBCE88300F11926 /* tmpfileplus.cpp in Sources */,
-				70E3BBE62CBCE88300F11926 /* format.cpp in Sources */,
-				70E3BBE72CBCE88300F11926 /* astcenc_weight_quant_xfer_tables.cpp in Sources */,
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
 /* End PBXSourcesBuildPhase section */
 
 /* Begin XCBuildConfiguration section */
@@ -2470,78 +1604,38 @@
 		706ECDEA26D1577A001C950E /* Debug */ = {
 			isa = XCBuildConfiguration;
 			buildSettings = {
+				ALLOW_TARGET_PLATFORM_SPECIALIZATION = YES;
+				ARCHS = arm64;
 				CLANG_WARN_OBJC_EXPLICIT_OWNERSHIP_TYPE = YES;
 				CLANG_WARN_OBJC_REPEATED_USE_OF_WEAK = YES;
 				CODE_SIGN_STYLE = Automatic;
 				EXECUTABLE_PREFIX = lib;
 				PRODUCT_NAME = "$(TARGET_NAME)";
+				SDKROOT = Automatic;
 				SKIP_INSTALL = YES;
+				SUPPORTED_PLATFORMS = "iphoneos iphonesimulator macosx xros xrsimulator";
+				SUPPORTS_MACCATALYST = NO;
 				SYSTEM_HEADER_SEARCH_PATHS = "";
+				TARGETED_DEVICE_FAMILY = "1,2,7";
 			};
 			name = Debug;
 		};
 		706ECDEB26D1577A001C950E /* Release */ = {
 			isa = XCBuildConfiguration;
 			buildSettings = {
+				ALLOW_TARGET_PLATFORM_SPECIALIZATION = YES;
+				ARCHS = arm64;
 				CLANG_WARN_OBJC_EXPLICIT_OWNERSHIP_TYPE = YES;
 				CLANG_WARN_OBJC_REPEATED_USE_OF_WEAK = YES;
 				CODE_SIGN_STYLE = Automatic;
 				EXECUTABLE_PREFIX = lib;
 				PRODUCT_NAME = "$(TARGET_NAME)";
+				SDKROOT = Automatic;
 				SKIP_INSTALL = YES;
+				SUPPORTED_PLATFORMS = "iphoneos iphonesimulator macosx xros xrsimulator";
+				SUPPORTS_MACCATALYST = NO;
 				SYSTEM_HEADER_SEARCH_PATHS = "";
-			};
-			name = Release;
-		};
-		706EF1DF26D166C5001C950E /* Debug */ = {
-			isa = XCBuildConfiguration;
-			buildSettings = {
-				CLANG_WARN_DOCUMENTATION_COMMENTS = NO;
-				CODE_SIGN_STYLE = Automatic;
-				EXECUTABLE_PREFIX = lib;
-				PRODUCT_NAME = "$(TARGET_NAME)";
-				SDKROOT = iphoneos;
-				SKIP_INSTALL = YES;
-				SYSTEM_HEADER_SEARCH_PATHS = "";
-			};
-			name = Debug;
-		};
-		706EF1E026D166C5001C950E /* Release */ = {
-			isa = XCBuildConfiguration;
-			buildSettings = {
-				CLANG_WARN_DOCUMENTATION_COMMENTS = NO;
-				CODE_SIGN_STYLE = Automatic;
-				EXECUTABLE_PREFIX = lib;
-				PRODUCT_NAME = "$(TARGET_NAME)";
-				SDKROOT = iphoneos;
-				SKIP_INSTALL = YES;
-				SYSTEM_HEADER_SEARCH_PATHS = "";
-			};
-			name = Release;
-		};
-		70E3BBEB2CBCE88300F11926 /* Debug */ = {
-			isa = XCBuildConfiguration;
-			buildSettings = {
-				CLANG_WARN_DOCUMENTATION_COMMENTS = NO;
-				CODE_SIGN_STYLE = Automatic;
-				EXECUTABLE_PREFIX = lib;
-				PRODUCT_NAME = "$(TARGET_NAME)";
-				SDKROOT = xros;
-				SKIP_INSTALL = YES;
-				SYSTEM_HEADER_SEARCH_PATHS = "";
-			};
-			name = Debug;
-		};
-		70E3BBEC2CBCE88300F11926 /* Release */ = {
-			isa = XCBuildConfiguration;
-			buildSettings = {
-				CLANG_WARN_DOCUMENTATION_COMMENTS = NO;
-				CODE_SIGN_STYLE = Automatic;
-				EXECUTABLE_PREFIX = lib;
-				PRODUCT_NAME = "$(TARGET_NAME)";
-				SDKROOT = xros;
-				SKIP_INSTALL = YES;
-				SYSTEM_HEADER_SEARCH_PATHS = "";
+				TARGETED_DEVICE_FAMILY = "1,2,7";
 			};
 			name = Release;
 		};
@@ -2566,24 +1660,6 @@
 			defaultConfigurationIsVisible = 0;
 			defaultConfigurationName = Release;
 		};
-		706EF1DE26D166C5001C950E /* Build configuration list for PBXNativeTarget "kram-ios" */ = {
-			isa = XCConfigurationList;
-			buildConfigurations = (
-				706EF1DF26D166C5001C950E /* Debug */,
-				706EF1E026D166C5001C950E /* Release */,
-			);
-			defaultConfigurationIsVisible = 0;
-			defaultConfigurationName = Release;
-		};
-		70E3BBEA2CBCE88300F11926 /* Build configuration list for PBXNativeTarget "kram-vos" */ = {
-			isa = XCConfigurationList;
-			buildConfigurations = (
-				70E3BBEB2CBCE88300F11926 /* Debug */,
-				70E3BBEC2CBCE88300F11926 /* Release */,
-			);
-			defaultConfigurationIsVisible = 0;
-			defaultConfigurationName = Release;
-		};
 /* End XCConfigurationList section */
 	};
 	rootObject = 706ECDD626D1577A001C950E /* Project object */;
diff --git a/build2/vectormath.xcodeproj/project.pbxproj b/build2/vectormath.xcodeproj/project.pbxproj
index 69597f13..581dd630 100644
--- a/build2/vectormath.xcodeproj/project.pbxproj
+++ b/build2/vectormath.xcodeproj/project.pbxproj
@@ -7,23 +7,7 @@
 	objects = {
 
 /* Begin PBXBuildFile section */
-		701AF17E2CAD27CB00BD0886 /* vectormath234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B687082CAD1072007ACA58 /* vectormath234.h */; };
-		701AF17F2CAD27CB00BD0886 /* long234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B687032CAD1072007ACA58 /* long234.h */; };
-		701AF1802CAD27CB00BD0886 /* sse2neon.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B687062CAD1072007ACA58 /* sse2neon.h */; };
-		701AF1812CAD27CB00BD0886 /* sse_mathfun.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B687052CAD1072007ACA58 /* sse_mathfun.h */; };
-		701AF1822CAD27CB00BD0886 /* float234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B686FF2CAD1072007ACA58 /* float234.h */; };
-		701AF1832CAD27CB00BD0886 /* half234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B687012CAD1072007ACA58 /* half234.h */; };
-		701AF1842CAD27CB00BD0886 /* int234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B687022CAD1072007ACA58 /* int234.h */; };
-		701AF1852CAD27CB00BD0886 /* float4a.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B686FD2CAD1072007ACA58 /* float4a.h */; };
-		701AF1862CAD27CB00BD0886 /* sse2neon-arm64.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B687072CAD1072007ACA58 /* sse2neon-arm64.h */; };
-		701AF1872CAD27CB00BD0886 /* double234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B686FB2CAD1072007ACA58 /* double234.h */; };
-		701AF1892CAD27CB00BD0886 /* float234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B687002CAD1072007ACA58 /* float234.cpp */; };
-		701AF18A2CAD27CB00BD0886 /* double234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B686FC2CAD1072007ACA58 /* double234.cpp */; };
-		701AF18B2CAD27CB00BD0886 /* vectormath234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B687092CAD1072007ACA58 /* vectormath234.cpp */; };
-		701AF18C2CAD27CB00BD0886 /* float4a.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B686FE2CAD1072007ACA58 /* float4a.cpp */; };
 		70570FE52CB378EE005692BB /* bounds234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70570FE42CB378E7005692BB /* bounds234.h */; };
-		70570FE62CB378EE005692BB /* bounds234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70570FE42CB378E7005692BB /* bounds234.h */; };
-		70570FE82CB379C9005692BB /* bounds234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70570FE72CB37997005692BB /* bounds234.cpp */; };
 		70570FE92CB379C9005692BB /* bounds234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70570FE72CB37997005692BB /* bounds234.cpp */; };
 		70B6870B2CAD1072007ACA58 /* float234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B687002CAD1072007ACA58 /* float234.cpp */; };
 		70B6870C2CAD1072007ACA58 /* double234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B686FC2CAD1072007ACA58 /* double234.cpp */; };
@@ -39,26 +23,9 @@
 		70B687162CAD1072007ACA58 /* float4a.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B686FD2CAD1072007ACA58 /* float4a.h */; };
 		70B687172CAD1072007ACA58 /* sse2neon-arm64.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B687072CAD1072007ACA58 /* sse2neon-arm64.h */; };
 		70B687182CAD1072007ACA58 /* double234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B686FB2CAD1072007ACA58 /* double234.h */; };
-		70E3BAC92CBCE81700F11926 /* vectormath234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B687082CAD1072007ACA58 /* vectormath234.h */; };
-		70E3BACA2CBCE81700F11926 /* bounds234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70570FE42CB378E7005692BB /* bounds234.h */; };
-		70E3BACB2CBCE81700F11926 /* long234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B687032CAD1072007ACA58 /* long234.h */; };
-		70E3BACC2CBCE81700F11926 /* sse2neon.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B687062CAD1072007ACA58 /* sse2neon.h */; };
-		70E3BACD2CBCE81700F11926 /* sse_mathfun.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B687052CAD1072007ACA58 /* sse_mathfun.h */; };
-		70E3BACE2CBCE81700F11926 /* float234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B686FF2CAD1072007ACA58 /* float234.h */; };
-		70E3BACF2CBCE81700F11926 /* half234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B687012CAD1072007ACA58 /* half234.h */; };
-		70E3BAD02CBCE81700F11926 /* int234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B687022CAD1072007ACA58 /* int234.h */; };
-		70E3BAD12CBCE81700F11926 /* float4a.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B686FD2CAD1072007ACA58 /* float4a.h */; };
-		70E3BAD22CBCE81700F11926 /* sse2neon-arm64.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B687072CAD1072007ACA58 /* sse2neon-arm64.h */; };
-		70E3BAD32CBCE81700F11926 /* double234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B686FB2CAD1072007ACA58 /* double234.h */; };
-		70E3BAD52CBCE81700F11926 /* float234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B687002CAD1072007ACA58 /* float234.cpp */; };
-		70E3BAD62CBCE81700F11926 /* double234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B686FC2CAD1072007ACA58 /* double234.cpp */; };
-		70E3BAD72CBCE81700F11926 /* vectormath234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B687092CAD1072007ACA58 /* vectormath234.cpp */; };
-		70E3BAD82CBCE81700F11926 /* bounds234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70570FE72CB37997005692BB /* bounds234.cpp */; };
-		70E3BAD92CBCE81700F11926 /* float4a.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B686FE2CAD1072007ACA58 /* float4a.cpp */; };
 /* End PBXBuildFile section */
 
 /* Begin PBXFileReference section */
-		701AF1912CAD27CB00BD0886 /* libvectormath-ios.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = "libvectormath-ios.a"; sourceTree = BUILT_PRODUCTS_DIR; };
 		70570FE42CB378E7005692BB /* bounds234.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = bounds234.h; sourceTree = "<group>"; };
 		70570FE72CB37997005692BB /* bounds234.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = bounds234.cpp; sourceTree = "<group>"; };
 		70570FEF2CB8C5C6005692BB /* module.modulemap */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.module-map"; path = module.modulemap; sourceTree = "<group>"; };
@@ -80,17 +47,9 @@
 		70B687072CAD1072007ACA58 /* sse2neon-arm64.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "sse2neon-arm64.h"; sourceTree = "<group>"; };
 		70B687082CAD1072007ACA58 /* vectormath234.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = vectormath234.h; sourceTree = "<group>"; };
 		70B687092CAD1072007ACA58 /* vectormath234.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = vectormath234.cpp; sourceTree = "<group>"; };
-		70E3BADE2CBCE81700F11926 /* libvectormath-vos.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = "libvectormath-vos.a"; sourceTree = BUILT_PRODUCTS_DIR; };
 /* End PBXFileReference section */
 
 /* Begin PBXFrameworksBuildPhase section */
-		701AF18D2CAD27CB00BD0886 /* Frameworks */ = {
-			isa = PBXFrameworksBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
 		70B686F22CAD1026007ACA58 /* Frameworks */ = {
 			isa = PBXFrameworksBuildPhase;
 			buildActionMask = 2147483647;
@@ -98,13 +57,6 @@
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
-		70E3BADA2CBCE81700F11926 /* Frameworks */ = {
-			isa = PBXFrameworksBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
 /* End PBXFrameworksBuildPhase section */
 
 /* Begin PBXGroup section */
@@ -121,8 +73,6 @@
 			isa = PBXGroup;
 			children = (
 				70B686F42CAD1026007ACA58 /* libvectormath.a */,
-				701AF1912CAD27CB00BD0886 /* libvectormath-ios.a */,
-				70E3BADE2CBCE81700F11926 /* libvectormath-vos.a */,
 			);
 			name = Products;
 			sourceTree = "<group>";
@@ -157,24 +107,6 @@
 /* End PBXGroup section */
 
 /* Begin PBXHeadersBuildPhase section */
-		701AF17D2CAD27CB00BD0886 /* Headers */ = {
-			isa = PBXHeadersBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-				701AF17E2CAD27CB00BD0886 /* vectormath234.h in Headers */,
-				70570FE62CB378EE005692BB /* bounds234.h in Headers */,
-				701AF17F2CAD27CB00BD0886 /* long234.h in Headers */,
-				701AF1802CAD27CB00BD0886 /* sse2neon.h in Headers */,
-				701AF1812CAD27CB00BD0886 /* sse_mathfun.h in Headers */,
-				701AF1822CAD27CB00BD0886 /* float234.h in Headers */,
-				701AF1832CAD27CB00BD0886 /* half234.h in Headers */,
-				701AF1842CAD27CB00BD0886 /* int234.h in Headers */,
-				701AF1852CAD27CB00BD0886 /* float4a.h in Headers */,
-				701AF1862CAD27CB00BD0886 /* sse2neon-arm64.h in Headers */,
-				701AF1872CAD27CB00BD0886 /* double234.h in Headers */,
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
 		70B686F02CAD1026007ACA58 /* Headers */ = {
 			isa = PBXHeadersBuildPhase;
 			buildActionMask = 2147483647;
@@ -193,46 +125,9 @@
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
-		70E3BAC82CBCE81700F11926 /* Headers */ = {
-			isa = PBXHeadersBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-				70E3BAC92CBCE81700F11926 /* vectormath234.h in Headers */,
-				70E3BACA2CBCE81700F11926 /* bounds234.h in Headers */,
-				70E3BACB2CBCE81700F11926 /* long234.h in Headers */,
-				70E3BACC2CBCE81700F11926 /* sse2neon.h in Headers */,
-				70E3BACD2CBCE81700F11926 /* sse_mathfun.h in Headers */,
-				70E3BACE2CBCE81700F11926 /* float234.h in Headers */,
-				70E3BACF2CBCE81700F11926 /* half234.h in Headers */,
-				70E3BAD02CBCE81700F11926 /* int234.h in Headers */,
-				70E3BAD12CBCE81700F11926 /* float4a.h in Headers */,
-				70E3BAD22CBCE81700F11926 /* sse2neon-arm64.h in Headers */,
-				70E3BAD32CBCE81700F11926 /* double234.h in Headers */,
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
 /* End PBXHeadersBuildPhase section */
 
 /* Begin PBXNativeTarget section */
-		701AF17C2CAD27CB00BD0886 /* vectormath-ios */ = {
-			isa = PBXNativeTarget;
-			buildConfigurationList = 701AF18E2CAD27CB00BD0886 /* Build configuration list for PBXNativeTarget "vectormath-ios" */;
-			buildPhases = (
-				701AF17D2CAD27CB00BD0886 /* Headers */,
-				701AF1882CAD27CB00BD0886 /* Sources */,
-				701AF18D2CAD27CB00BD0886 /* Frameworks */,
-			);
-			buildRules = (
-			);
-			dependencies = (
-			);
-			name = "vectormath-ios";
-			packageProductDependencies = (
-			);
-			productName = vectormath;
-			productReference = 701AF1912CAD27CB00BD0886 /* libvectormath-ios.a */;
-			productType = "com.apple.product-type.library.static";
-		};
 		70B686F32CAD1026007ACA58 /* vectormath */ = {
 			isa = PBXNativeTarget;
 			buildConfigurationList = 70B686F82CAD1026007ACA58 /* Build configuration list for PBXNativeTarget "vectormath" */;
@@ -252,25 +147,6 @@
 			productReference = 70B686F42CAD1026007ACA58 /* libvectormath.a */;
 			productType = "com.apple.product-type.library.static";
 		};
-		70E3BAC72CBCE81700F11926 /* vectormath-vos */ = {
-			isa = PBXNativeTarget;
-			buildConfigurationList = 70E3BADB2CBCE81700F11926 /* Build configuration list for PBXNativeTarget "vectormath-vos" */;
-			buildPhases = (
-				70E3BAC82CBCE81700F11926 /* Headers */,
-				70E3BAD42CBCE81700F11926 /* Sources */,
-				70E3BADA2CBCE81700F11926 /* Frameworks */,
-			);
-			buildRules = (
-			);
-			dependencies = (
-			);
-			name = "vectormath-vos";
-			packageProductDependencies = (
-			);
-			productName = vectormath;
-			productReference = 70E3BADE2CBCE81700F11926 /* libvectormath-vos.a */;
-			productType = "com.apple.product-type.library.static";
-		};
 /* End PBXNativeTarget section */
 
 /* Begin PBXProject section */
@@ -300,25 +176,11 @@
 			projectRoot = "";
 			targets = (
 				70B686F32CAD1026007ACA58 /* vectormath */,
-				701AF17C2CAD27CB00BD0886 /* vectormath-ios */,
-				70E3BAC72CBCE81700F11926 /* vectormath-vos */,
 			);
 		};
 /* End PBXProject section */
 
 /* Begin PBXSourcesBuildPhase section */
-		701AF1882CAD27CB00BD0886 /* Sources */ = {
-			isa = PBXSourcesBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-				701AF1892CAD27CB00BD0886 /* float234.cpp in Sources */,
-				701AF18A2CAD27CB00BD0886 /* double234.cpp in Sources */,
-				701AF18B2CAD27CB00BD0886 /* vectormath234.cpp in Sources */,
-				70570FE82CB379C9005692BB /* bounds234.cpp in Sources */,
-				701AF18C2CAD27CB00BD0886 /* float4a.cpp in Sources */,
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
 		70B686F12CAD1026007ACA58 /* Sources */ = {
 			isa = PBXSourcesBuildPhase;
 			buildActionMask = 2147483647;
@@ -331,48 +193,15 @@
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
-		70E3BAD42CBCE81700F11926 /* Sources */ = {
-			isa = PBXSourcesBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-				70E3BAD52CBCE81700F11926 /* float234.cpp in Sources */,
-				70E3BAD62CBCE81700F11926 /* double234.cpp in Sources */,
-				70E3BAD72CBCE81700F11926 /* vectormath234.cpp in Sources */,
-				70E3BAD82CBCE81700F11926 /* bounds234.cpp in Sources */,
-				70E3BAD92CBCE81700F11926 /* float4a.cpp in Sources */,
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
 /* End PBXSourcesBuildPhase section */
 
 /* Begin XCBuildConfiguration section */
-		701AF18F2CAD27CB00BD0886 /* Debug */ = {
-			isa = XCBuildConfiguration;
-			buildSettings = {
-				CODE_SIGN_STYLE = Automatic;
-				EXECUTABLE_PREFIX = lib;
-				PRODUCT_NAME = "$(TARGET_NAME)";
-				SDKROOT = iphoneos;
-				SKIP_INSTALL = YES;
-			};
-			name = Debug;
-		};
-		701AF1902CAD27CB00BD0886 /* Release */ = {
-			isa = XCBuildConfiguration;
-			buildSettings = {
-				CODE_SIGN_STYLE = Automatic;
-				EXECUTABLE_PREFIX = lib;
-				PRODUCT_NAME = "$(TARGET_NAME)";
-				SDKROOT = iphoneos;
-				SKIP_INSTALL = YES;
-			};
-			name = Release;
-		};
 		70B686F62CAD1026007ACA58 /* Debug */ = {
 			isa = XCBuildConfiguration;
 			baseConfigurationReference = 707D4C522CC41F3900729BE0 /* kram.xcconfig */;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
+				ARCHS = arm64;
 				ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
 				CLANG_ANALYZER_NONNULL = YES;
 				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
@@ -426,7 +255,7 @@
 				MODULEMAP_FILE = ../libkram/vectormath/module.modulemap;
 				MODULE_VERIFIER_SUPPORTED_LANGUAGES = "objective-c c++";
 				ONLY_ACTIVE_ARCH = YES;
-				SDKROOT = macosx;
+				SDKROOT = Automatic;
 				XROS_DEPLOYMENT_TARGET = 2.0;
 			};
 			name = Debug;
@@ -436,6 +265,7 @@
 			baseConfigurationReference = 707D4C522CC41F3900729BE0 /* kram.xcconfig */;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
+				ARCHS = arm64;
 				ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
 				CLANG_ANALYZER_NONNULL = YES;
 				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
@@ -486,7 +316,7 @@
 				MACOSX_DEPLOYMENT_TARGET = 13.0;
 				MODULEMAP_FILE = ../libkram/vectormath/module.modulemap;
 				MODULE_VERIFIER_SUPPORTED_LANGUAGES = "objective-c c++";
-				SDKROOT = macosx;
+				SDKROOT = Automatic;
 				XROS_DEPLOYMENT_TARGET = 2.0;
 			};
 			name = Release;
@@ -494,57 +324,36 @@
 		70B686F92CAD1026007ACA58 /* Debug */ = {
 			isa = XCBuildConfiguration;
 			buildSettings = {
+				ALLOW_TARGET_PLATFORM_SPECIALIZATION = YES;
 				CODE_SIGN_STYLE = Automatic;
 				EXECUTABLE_PREFIX = lib;
 				PRODUCT_NAME = "$(TARGET_NAME)";
+				REGISTER_APP_GROUPS = NO;
 				SKIP_INSTALL = YES;
+				SUPPORTED_PLATFORMS = "iphoneos iphonesimulator macosx xros xrsimulator";
+				SUPPORTS_MACCATALYST = NO;
+				TARGETED_DEVICE_FAMILY = "1,2,7";
 			};
 			name = Debug;
 		};
 		70B686FA2CAD1026007ACA58 /* Release */ = {
 			isa = XCBuildConfiguration;
 			buildSettings = {
+				ALLOW_TARGET_PLATFORM_SPECIALIZATION = YES;
 				CODE_SIGN_STYLE = Automatic;
 				EXECUTABLE_PREFIX = lib;
 				PRODUCT_NAME = "$(TARGET_NAME)";
+				REGISTER_APP_GROUPS = NO;
 				SKIP_INSTALL = YES;
-			};
-			name = Release;
-		};
-		70E3BADC2CBCE81700F11926 /* Debug */ = {
-			isa = XCBuildConfiguration;
-			buildSettings = {
-				CODE_SIGN_STYLE = Automatic;
-				EXECUTABLE_PREFIX = lib;
-				PRODUCT_NAME = "$(TARGET_NAME)";
-				SDKROOT = xros;
-				SKIP_INSTALL = YES;
-			};
-			name = Debug;
-		};
-		70E3BADD2CBCE81700F11926 /* Release */ = {
-			isa = XCBuildConfiguration;
-			buildSettings = {
-				CODE_SIGN_STYLE = Automatic;
-				EXECUTABLE_PREFIX = lib;
-				PRODUCT_NAME = "$(TARGET_NAME)";
-				SDKROOT = xros;
-				SKIP_INSTALL = YES;
+				SUPPORTED_PLATFORMS = "iphoneos iphonesimulator macosx xros xrsimulator";
+				SUPPORTS_MACCATALYST = NO;
+				TARGETED_DEVICE_FAMILY = "1,2,7";
 			};
 			name = Release;
 		};
 /* End XCBuildConfiguration section */
 
 /* Begin XCConfigurationList section */
-		701AF18E2CAD27CB00BD0886 /* Build configuration list for PBXNativeTarget "vectormath-ios" */ = {
-			isa = XCConfigurationList;
-			buildConfigurations = (
-				701AF18F2CAD27CB00BD0886 /* Debug */,
-				701AF1902CAD27CB00BD0886 /* Release */,
-			);
-			defaultConfigurationIsVisible = 0;
-			defaultConfigurationName = Release;
-		};
 		70B686EF2CAD1026007ACA58 /* Build configuration list for PBXProject "vectormath" */ = {
 			isa = XCConfigurationList;
 			buildConfigurations = (
@@ -563,15 +372,6 @@
 			defaultConfigurationIsVisible = 0;
 			defaultConfigurationName = Release;
 		};
-		70E3BADB2CBCE81700F11926 /* Build configuration list for PBXNativeTarget "vectormath-vos" */ = {
-			isa = XCConfigurationList;
-			buildConfigurations = (
-				70E3BADC2CBCE81700F11926 /* Debug */,
-				70E3BADD2CBCE81700F11926 /* Release */,
-			);
-			defaultConfigurationIsVisible = 0;
-			defaultConfigurationName = Release;
-		};
 /* End XCConfigurationList section */
 	};
 	rootObject = 70B686EC2CAD1026007ACA58 /* Project object */;

From 2a9ab92ad6e8cda374b1a3b9daca1f927e934b28 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 2 Feb 2025 22:12:59 -0800
Subject: [PATCH 829/901] kram - switch timer off mach calls to clock_gettime.

Apple is making the mach calls need a privacy setting for fingerprinting.
---
 libkram/kram/KramTimer.cpp | 46 +++++++++++++++++++++++++++++++++-----
 1 file changed, 40 insertions(+), 6 deletions(-)

diff --git a/libkram/kram/KramTimer.cpp b/libkram/kram/KramTimer.cpp
index b54ba4cb..37a68100 100644
--- a/libkram/kram/KramTimer.cpp
+++ b/libkram/kram/KramTimer.cpp
@@ -33,6 +33,10 @@ static double queryPeriod()
 
 static uint64_t queryCounter()
 {
+    // This doesn't pause when app is paused.
+    // seems like it wouldn't pause when system is paused either.
+    // Needed for multi-core, multi-frequency systems.  This is
+    // a fixed rate timer, so frequency can be cached.
     LARGE_INTEGER counter;
     QueryPerformanceCounter(&counter);
     return counter.QuadPart;
@@ -40,8 +44,12 @@ static uint64_t queryCounter()
 
 #elif KRAM_APPLE
 
+
 static double queryPeriod()
 {
+    double period = 1.0;
+    
+    /* only needed for the mach calls
     mach_timebase_info_data_t timebase;
     mach_timebase_info(&timebase);
 
@@ -50,7 +58,8 @@ static double queryPeriod()
     // On macOS M1, nanosecondsPerTick are 41.67ns (num/denom = 125/3) = 24Mhz
     // On M2, A16/A17 Pro, and armv8.6-A should be (1/1) = 1Ghz.
     // So when 1/1, can avoid mul div below, seconds requires mul by 1e-9.
-    double period = (double)timebase.numer / timebase.denom;
+    period = (double)timebase.numer / timebase.denom;
+    */
     
     period *= 1e-9; // convert nanos to seconds
 
@@ -59,11 +68,36 @@ static double queryPeriod()
 
 static uint64_t queryCounter()
 {
-    // increment when app sleeps
-    // return mach_continuous_time();
+    uint64_t time = 0;
+    
+    // Mach absolute time will, in general, continue to count if your process is suspended in the background.
+    // However, if will stop counting if the CPU goes to sleep.
 
-    // no increment when app sleeps
-    return mach_absolute_time();
+    // Apple docs recommends these non-posix clock calls.
+    // They maybe salt these to avoid fingerprinting, but don't need permissions.
+    // Also they don't need period conversion to nanos.
+    
+    // With continuous time, can store one epoch time to convert to real timings.
+    // But not halting in debugger will skew timings.
+    // May want timeouts to use the absolute timer.
+    
+    // Really each core has different frequencies with P/E, so want a timer
+    // that is consistent.  Also the frequency can ramp up/down.  Timers
+    // like rdtsc drift when a process goes from one core to another.
+    
+    // increment when system sleeps
+    // time = mach_continuous_time();
+    time = clock_gettime_nsec_np(CLOCK_MONOTONIC_RAW);
+    
+    // no increment when system sleeps
+    //time = clock_gettime_nsec_np(CLOCK_UPTIME_RAW);
+    //time = mach_absolute_time();
+    
+    // Have gotten burned by these timers, unclear of precision.
+    // Tracy says these timers are bad, but uses them.
+    // C++11 has std::chrono::high_resolution_clock::now() in <chrono>
+    
+    return time;
 }
 
 #endif
@@ -127,7 +161,7 @@ void addPerfCounter(const char* name, int64_t value)
 
 Perf::Perf()
 {
-    // TODO: should set alongsize exe by default
+    // TODO: should set alongside exe by default
 #if KRAM_WIN
     setPerfDirectory("C:/traces/");
 #else

From 15967528953b5cac730b26877cefc97f2d332ea1 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 3 Feb 2025 20:09:40 -0800
Subject: [PATCH 830/901] kram - turn off exceptions on vectormath

---
 build2/vectormath.xcodeproj/project.pbxproj | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/build2/vectormath.xcodeproj/project.pbxproj b/build2/vectormath.xcodeproj/project.pbxproj
index 581dd630..d1620639 100644
--- a/build2/vectormath.xcodeproj/project.pbxproj
+++ b/build2/vectormath.xcodeproj/project.pbxproj
@@ -241,6 +241,8 @@
 				ENABLE_USER_SCRIPT_SANDBOXING = YES;
 				GCC_C_LANGUAGE_STANDARD = gnu17;
 				GCC_DYNAMIC_NO_PIC = NO;
+				GCC_ENABLE_CPP_EXCEPTIONS = NO;
+				GCC_ENABLE_CPP_RTTI = NO;
 				GCC_NO_COMMON_BLOCKS = YES;
 				GCC_OPTIMIZATION_LEVEL = 1;
 				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
@@ -304,6 +306,8 @@
 				ENABLE_STRICT_OBJC_MSGSEND = YES;
 				ENABLE_USER_SCRIPT_SANDBOXING = YES;
 				GCC_C_LANGUAGE_STANDARD = gnu17;
+				GCC_ENABLE_CPP_EXCEPTIONS = NO;
+				GCC_ENABLE_CPP_RTTI = NO;
 				GCC_NO_COMMON_BLOCKS = YES;
 				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
 				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;

From 5f3a8b8c2a0b8745af003b495f16a82a07d9c336 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 3 Feb 2025 20:49:13 -0800
Subject: [PATCH 831/901] kram - drop mac x64 arch from builds, fix ci script

Unclear how to build a single lib from a multiplatform build using xcodebuild.
It's time to stop supporting macOS x64, but code will build x64 if callers set it back.
---
 build2/kram.xcodeproj/project.pbxproj          | 4 ++--
 build2/kramc.xcodeproj/project.pbxproj         | 2 ++
 build2/kramv.xcodeproj/project.pbxproj         | 2 ++
 gtlf/GLTF/GLTF.xcodeproj/project.pbxproj       | 2 ++
 gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj | 2 ++
 scripts/cibuild.sh                             | 8 ++++----
 6 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index 85b3de8d..728de93b 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -1421,6 +1421,7 @@
 			baseConfigurationReference = 707D4C732CC436A000729BE0 /* kram.xcconfig */;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
+				ARCHS = arm64;
 				CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES;
 				CLANG_ANALYZER_NONNULL = YES;
 				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
@@ -1515,6 +1516,7 @@
 			baseConfigurationReference = 707D4C732CC436A000729BE0 /* kram.xcconfig */;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
+				ARCHS = arm64;
 				CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES;
 				CLANG_ANALYZER_NONNULL = YES;
 				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
@@ -1605,7 +1607,6 @@
 			isa = XCBuildConfiguration;
 			buildSettings = {
 				ALLOW_TARGET_PLATFORM_SPECIALIZATION = YES;
-				ARCHS = arm64;
 				CLANG_WARN_OBJC_EXPLICIT_OWNERSHIP_TYPE = YES;
 				CLANG_WARN_OBJC_REPEATED_USE_OF_WEAK = YES;
 				CODE_SIGN_STYLE = Automatic;
@@ -1624,7 +1625,6 @@
 			isa = XCBuildConfiguration;
 			buildSettings = {
 				ALLOW_TARGET_PLATFORM_SPECIALIZATION = YES;
-				ARCHS = arm64;
 				CLANG_WARN_OBJC_EXPLICIT_OWNERSHIP_TYPE = YES;
 				CLANG_WARN_OBJC_REPEATED_USE_OF_WEAK = YES;
 				CODE_SIGN_STYLE = Automatic;
diff --git a/build2/kramc.xcodeproj/project.pbxproj b/build2/kramc.xcodeproj/project.pbxproj
index 643c6fc1..c765d3b0 100644
--- a/build2/kramc.xcodeproj/project.pbxproj
+++ b/build2/kramc.xcodeproj/project.pbxproj
@@ -162,6 +162,7 @@
 			baseConfigurationReference = 707D4C542CC420FE00729BE0 /* kram.xcconfig */;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
+				ARCHS = arm64;
 				CLANG_ANALYZER_NONNULL = YES;
 				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
 				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
@@ -230,6 +231,7 @@
 			baseConfigurationReference = 707D4C542CC420FE00729BE0 /* kram.xcconfig */;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
+				ARCHS = arm64;
 				CLANG_ANALYZER_NONNULL = YES;
 				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
 				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
diff --git a/build2/kramv.xcodeproj/project.pbxproj b/build2/kramv.xcodeproj/project.pbxproj
index fd590bd0..0c26804d 100644
--- a/build2/kramv.xcodeproj/project.pbxproj
+++ b/build2/kramv.xcodeproj/project.pbxproj
@@ -536,6 +536,7 @@
 			baseConfigurationReference = 707D4C532CC420E700729BE0 /* kram.xcconfig */;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
+				ARCHS = arm64;
 				ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
 				CLANG_ANALYZER_NONNULL = YES;
 				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
@@ -614,6 +615,7 @@
 			baseConfigurationReference = 707D4C532CC420E700729BE0 /* kram.xcconfig */;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
+				ARCHS = arm64;
 				ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
 				CLANG_ANALYZER_NONNULL = YES;
 				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
diff --git a/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj b/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj
index 46743984..ad5638e7 100644
--- a/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj
+++ b/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj
@@ -482,6 +482,7 @@
 		83D6FF511F48BB3A00F71E0C /* Debug */ = {
 			isa = XCBuildConfiguration;
 			buildSettings = {
+				ARCHS = arm64;
 				CLANG_ENABLE_OBJC_WEAK = YES;
 				CODE_SIGN_IDENTITY = "";
 				COMBINE_HIDPI_IMAGES = YES;
@@ -513,6 +514,7 @@
 			isa = XCBuildConfiguration;
 			baseConfigurationReference = 707D4C5C2CC42C2700729BE0 /* kram.xcconfig */;
 			buildSettings = {
+				ARCHS = arm64;
 				CLANG_ENABLE_OBJC_WEAK = YES;
 				CODE_SIGN_IDENTITY = "";
 				COMBINE_HIDPI_IMAGES = YES;
diff --git a/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj b/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj
index fce39cae..2c0d2daf 100644
--- a/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj
+++ b/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj
@@ -218,6 +218,7 @@
 			baseConfigurationReference = 707D4C5B2CC42C1100729BE0 /* kram.xcconfig */;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
+				ARCHS = arm64;
 				CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES;
 				CLANG_ANALYZER_NONNULL = YES;
 				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
@@ -284,6 +285,7 @@
 			isa = XCBuildConfiguration;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
+				ARCHS = arm64;
 				CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES;
 				CLANG_ANALYZER_NONNULL = YES;
 				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
diff --git a/scripts/cibuild.sh b/scripts/cibuild.sh
index e9ea14fe..97b548a7 100755
--- a/scripts/cibuild.sh
+++ b/scripts/cibuild.sh
@@ -77,11 +77,11 @@ if [[ $buildType == macos ]]; then
  
     # vectormath
     echo "::group::vectormath-vos"
-    xcodebuild build -sdk xros2.0 -workspace kram.xcworkspace -scheme vectormath-vos -configuration Release ${xargs} -destination generic/platform=visionOS CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
+    xcodebuild build -sdk xros -workspace kram.xcworkspace -scheme vectormath -configuration Release ${xargs} -destination generic/platform=visionOS CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
     echo "::endgroup::"
     
     echo "::group::vectormath-ios"
-    xcodebuild build -sdk iphoneos -workspace kram.xcworkspace -scheme vectormath-ios -configuration Release ${xargs} -destination generic/platform=iOS CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
+    xcodebuild build -sdk iphoneos -workspace kram.xcworkspace -scheme vectormath -configuration Release ${xargs} -destination generic/platform=iOS CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
     echo "::endgroup::"
  
     echo "::group::vectormath"
@@ -90,11 +90,11 @@ if [[ $buildType == macos ]]; then
  
      # libkram
     echo "::group::kram-vos"
-    xcodebuild build -sdk xros2.0 -workspace kram.xcworkspace -scheme kram-vos -configuration Release ${xargs} -destination generic/platform=visionOS CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
+    xcodebuild build -sdk xros -workspace kram.xcworkspace -scheme kram -configuration Release ${xargs} -destination generic/platform=visionOS CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
     echo "::endgroup::"
     
     echo "::group::kram-ios"
-    xcodebuild build -sdk iphoneos -workspace kram.xcworkspace -scheme kram-ios -configuration Release ${xargs} -destination generic/platform=iOS CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
+    xcodebuild build -sdk iphoneos -workspace kram.xcworkspace -scheme kram -configuration Release ${xargs} -destination generic/platform=iOS CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
     echo "::endgroup::"
     
     echo "::group::kram"

From ac77f56af624bfe4bb931cd1185a8596ac5a4f0d Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 5 Feb 2025 23:20:27 -0800
Subject: [PATCH 832/901] kram - update cibiuld.sh to put libkram,
 libvectormath into dirs

Since this is all on mutliplatform build, the only way to uniquify the libs is to either put them in a folder, or packages those folders into an xcframework
So this is the first part of putting them in folders.   Previously the target would give them a -ios or -vos suffis to the library name.
---
 scripts/cibuild.sh | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/scripts/cibuild.sh b/scripts/cibuild.sh
index 97b548a7..0a00f1de 100755
--- a/scripts/cibuild.sh
+++ b/scripts/cibuild.sh
@@ -75,30 +75,33 @@ if [[ $buildType == macos ]]; then
     xcodebuild build -workspace kram.xcworkspace -list
     echo "::endgroup::"
  
+	# note there is a method to make an xcframework, but seems that it has to be signed
+	# instead the vos/ios libs will have unique output dirs, but don't have to when used in a workspace
+
     # vectormath
     echo "::group::vectormath-vos"
-    xcodebuild build -sdk xros -workspace kram.xcworkspace -scheme vectormath -configuration Release ${xargs} -destination generic/platform=visionOS CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
+    xcodebuild build -sdk xros -workspace kram.xcworkspace -scheme vectormath -configuration Release ${xargs} -destination generic/platform=visionOS CONFIGURATION_BUILD_DIR=${binPath}/vos BUILD_LIBRARY_FOR_DISTRIBUTION=YES
     echo "::endgroup::"
     
     echo "::group::vectormath-ios"
-    xcodebuild build -sdk iphoneos -workspace kram.xcworkspace -scheme vectormath -configuration Release ${xargs} -destination generic/platform=iOS CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
+    xcodebuild build -sdk iphoneos -workspace kram.xcworkspace -scheme vectormath -configuration Release ${xargs} -destination generic/platform=iOS CONFIGURATION_BUILD_DIR=${binPath}/ios BUILD_LIBRARY_FOR_DISTRIBUTION=YES
     echo "::endgroup::"
  
     echo "::group::vectormath"
-    xcodebuild build -sdk macosx -workspace kram.xcworkspace -scheme vectormath -configuration Release ${xargs} -destination generic/platform=macOS CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
-    echo "::endgroup::"
+    xcodebuild build -sdk macosx -workspace kram.xcworkspace -scheme vectormath -configuration Release ${xargs} -destination generic/platform=macOS CONFIGURATION_BUILD_DIR=${binPath}/mac BUILD_LIBRARY_FOR_DISTRIBUTION=YES
+	echo "::endgroup::"
  
-     # libkram
+    # libkram
     echo "::group::kram-vos"
-    xcodebuild build -sdk xros -workspace kram.xcworkspace -scheme kram -configuration Release ${xargs} -destination generic/platform=visionOS CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
+    xcodebuild build -sdk xros -workspace kram.xcworkspace -scheme kram -configuration Release ${xargs} -destination generic/platform=visionOS CONFIGURATION_BUILD_DIR=${binPath}/vos BUILD_LIBRARY_FOR_DISTRIBUTION=YES
     echo "::endgroup::"
     
     echo "::group::kram-ios"
-    xcodebuild build -sdk iphoneos -workspace kram.xcworkspace -scheme kram -configuration Release ${xargs} -destination generic/platform=iOS CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
+    xcodebuild build -sdk iphoneos -workspace kram.xcworkspace -scheme kram -configuration Release ${xargs} -destination generic/platform=iOS CONFIGURATION_BUILD_DIR=${binPath}/ios BUILD_LIBRARY_FOR_DISTRIBUTION=YES
     echo "::endgroup::"
     
     echo "::group::kram"
-    xcodebuild build -sdk macosx -workspace kram.xcworkspace -scheme kram -configuration Release ${xargs} -destination generic/platform=macOS CONFIGURATION_BUILD_DIR=${binPath} BUILD_LIBRARY_FOR_DISTRIBUTION=YES
+    xcodebuild build -sdk macosx -workspace kram.xcworkspace -scheme kram -configuration Release ${xargs} -destination generic/platform=macOS CONFIGURATION_BUILD_DIR=${binPath}/mac BUILD_LIBRARY_FOR_DISTRIBUTION=YES
     echo "::endgroup::"
  
 	# install apps so they are signed

From 57df468ef9ac68e1f81d8ae8580166d348e2ba71 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 19 Feb 2025 13:17:47 -0800
Subject: [PATCH 833/901] kram - add rgb9e5 and rg11b10

These are just to be able to view these formats, and to convert dds -> ktx.  More work may be needed.   Cannot yet encode to these formats, since there are no conversions.
---
 libkram/kram/KTXImage.cpp | 38 ++++++++++++++++++++++++++------------
 libkram/kram/KTXImage.h   |  4 ++++
 2 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index c1ddb1af..a83a7257 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -89,7 +89,7 @@ enum MyDXGIFormat : uint32_t {
     //DXGI_FORMAT_R10G10B10A2_TYPELESS                    = 23,
     //DXGI_FORMAT_R10G10B10A2_UNORM                       = 24,
     //DXGI_FORMAT_R10G10B10A2_UINT                        = 25,
-    //DXGI_FORMAT_R11G11B10_FLOAT                         = 26,
+    DXGI_FORMAT_R11G11B10_FLOAT                         = 26,
 
     //DXGI_FORMAT_R8G8B8A8_TYPELESS                       = 27,
     DXGI_FORMAT_R8G8B8A8_UNORM = 28,
@@ -136,7 +136,7 @@ enum MyDXGIFormat : uint32_t {
 
     //DXGI_FORMAT_A8_UNORM                                = 65,
     //DXGI_FORMAT_R1_UNORM                                = 66,
-    //DXGI_FORMAT_R9G9B9E5_SHAREDEXP                      = 67,
+    DXGI_FORMAT_R9G9B9E5_SHAREDEXP                      = 67,
 
     //DXGI_FORMAT_R8G8_B8G8_UNORM                         = 68,
     //DXGI_FORMAT_G8R8_G8B8_UNORM                         = 69,
@@ -345,6 +345,9 @@ enum GLFormat : uint32_t {
     GL_RG32F = 0x8230,
     GL_RGBA32F = 0x8814,
 
+    GL_R11F_G11F_B10F = 0x8C3A,
+    GL_RGB9_E5 = 0x8C3D,
+    
 #if SUPPORT_RGB
     GL_RGB8 = 0x8051,
     GL_SRGB8 = 0x8C41,
@@ -470,15 +473,15 @@ enum MyVKFormat {
     VK_FORMAT_ASTC_8x8_UNORM_BLOCK = 171,
     VK_FORMAT_ASTC_8x8_SRGB_BLOCK = 172,
 
-// not support these
-//    VK_FORMAT_ASTC_5x4_UNORM_BLOCK = 159,
-//    VK_FORMAT_ASTC_5x4_SRGB_BLOCK = 160,
-//    VK_FORMAT_ASTC_6x5_UNORM_BLOCK = 163,
-//    VK_FORMAT_ASTC_6x5_SRGB_BLOCK = 164,
-//    VK_FORMAT_ASTC_8x5_UNORM_BLOCK = 167,
-//    VK_FORMAT_ASTC_8x5_SRGB_BLOCK = 168,
-//    VK_FORMAT_ASTC_8x6_UNORM_BLOCK = 169,
-//    VK_FORMAT_ASTC_8x6_SRGB_BLOCK = 170,
+// not supporting these
+// VK_FORMAT_ASTC_5x4_UNORM_BLOCK = 159,
+// VK_FORMAT_ASTC_5x4_SRGB_BLOCK = 160,
+// VK_FORMAT_ASTC_6x5_UNORM_BLOCK = 163,
+// VK_FORMAT_ASTC_6x5_SRGB_BLOCK = 164,
+// VK_FORMAT_ASTC_8x5_UNORM_BLOCK = 167,
+// VK_FORMAT_ASTC_8x5_SRGB_BLOCK = 168,
+// VK_FORMAT_ASTC_8x6_UNORM_BLOCK = 169,
+// VK_FORMAT_ASTC_8x6_SRGB_BLOCK = 170,
 
 // VK_FORMAT_ASTC_10x5_UNORM_BLOCK = 173,
 // VK_FORMAT_ASTC_10x5_SRGB_BLOCK = 174,
@@ -493,6 +496,9 @@ enum MyVKFormat {
 // VK_FORMAT_ASTC_12x12_UNORM_BLOCK = 183,
 // VK_FORMAT_ASTC_12x12_SRGB_BLOCK = 184,
 
+    VK_FORMAT_B10G11R11_UFLOAT_PACK32 = 122,
+    VK_FORMAT_E5B9G9R9_UFLOAT_PACK32 = 123,
+    
 #if SUPPORT_RGB
     // import only
     VK_FORMAT_R8G8B8_UNORM = 23,
@@ -718,8 +724,16 @@ static bool initFormatsIfNeeded()
     KTX_FORMAT(EXPrg32f, MyMTLPixelFormatRG32Float, VK_FORMAT_R32G32_SFLOAT, DXGI_FORMAT_R32G32_FLOAT, GL_RG32F, GL_RG, 1, 1, 8, 2, FLAG_32F)
     KTX_FORMAT(EXPrgba32f, MyMTLPixelFormatRGBA32Float, VK_FORMAT_R32G32B32A32_SFLOAT, DXGI_FORMAT_R32G32B32A32_FLOAT, GL_RGBA32F, GL_RGBA, 1, 1, 16, 4, FLAG_32F)
 
+    // import only (can convert dds -> ktx/ktx2)
+    KTX_FORMAT(EXPrg11b10f, MyMTLPixelFormatRG11B10Float, VK_FORMAT_B10G11R11_UFLOAT_PACK32, DXGI_FORMAT_R11G11B10_FLOAT, GL_R11F_G11F_B10F, GL_RGB, 1, 1, 4, 3, FLAG_16F)
+    // GL_UNSIGNED_INT_10F_11F_11F_REV
+    
+    // import only (can convert dds -> ktx/ktx2)
+    KTX_FORMAT(EXPrgb9f, MyMTLPixelFormatRGB9E5Float, VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, DXGI_FORMAT_R9G9B9E5_SHAREDEXP, GL_RGB9_E5, GL_RGB, 1, 1, 4, 3, FLAG_16F)
+    // GL_UNSIGNED_INT_5_9_9_9_REV
+    
 #if SUPPORT_RGB
-    // these are import only formats
+    // import only formats (can convert dds -> ktx/ktx2)
     // DX only has one of these as a valid type
     KTX_FORMAT(EXPrgb8, MyMTLPixelFormatRGB8Unorm_internal, VK_FORMAT_R8G8B8_UNORM, DXGI_FORMAT_UNKNOWN, GL_RGB8, GL_RGB, 1, 1, 3, 3, 0)
     KTX_FORMAT(EXPsrgb8, MyMTLPixelFormatRGB8Unorm_sRGB_internal, VK_FORMAT_R8G8B8_SRGB, DXGI_FORMAT_UNKNOWN, GL_SRGB8, GL_SRGB, 1, 1, 3, 3, FLAG_SRGB)
diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h
index b14890e6..3cb6fabe 100644
--- a/libkram/kram/KTXImage.h
+++ b/libkram/kram/KTXImage.h
@@ -90,6 +90,10 @@ enum MyMTLPixelFormat {
     // MyMTLPixelFormatRG8Snorm     = 32,
     // MyMTLPixelFormatRGBA8Snorm      = 72,
 
+    // TODO: also 10a2Unorm
+    MyMTLPixelFormatRG11B10Float = 92,
+    MyMTLPixelFormatRGB9E5Float = 93,
+    
     // TODO: also BGRA8Unorm types?
 
     MyMTLPixelFormatR16Float = 25,

From e241334827b8f18be912ede5b453e3b4bd854b4c Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Mon, 10 Mar 2025 14:31:15 -0700
Subject: [PATCH 834/901] kram - comment fixes

---
 libkram/kram/KTXImage.h    | 3 ---
 libkram/kram/KramTimer.cpp | 2 +-
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h
index 3cb6fabe..fadaabf2 100644
--- a/libkram/kram/KTXImage.h
+++ b/libkram/kram/KTXImage.h
@@ -104,9 +104,6 @@ enum MyMTLPixelFormat {
     MyMTLPixelFormatRG32Float = 105,
     MyMTLPixelFormatRGBA32Float = 125,
 
-// TODO: also need rgb9e5 for fallback if ASTC HDR/6H not supported
-// That is Unity's fallback if alpha not needed, otherwise RGBA16F.
-
 #if SUPPORT_RGB
     // Can import files from KTX/KTX2 with RGB data, but convert right away to RGBA.
     // These are not export formats.  Watch alignment on these too.  These
diff --git a/libkram/kram/KramTimer.cpp b/libkram/kram/KramTimer.cpp
index 37a68100..bce4b272 100644
--- a/libkram/kram/KramTimer.cpp
+++ b/libkram/kram/KramTimer.cpp
@@ -402,7 +402,7 @@ void Perf::addCounter(const char* name, double time, int64_t amount)
         return;
     }
 
-    // Catapult timings are suppoed to be in micros.
+    // Catapult timings are supposed to be in micros.
     // Convert seconds to micros (as integer), lose nanos.  Note that
     // Perfetto will convert all values to nanos anyways.
     // Raw means nanos, and Seconds is too small of a fraction.

From a39ccc66c10f5746fd33fb544d10aa36a0fe65f2 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 11 Mar 2025 15:33:04 -0700
Subject: [PATCH 835/901] kram - turn off module verifier, fix invalid utf8
 char in KTXImage.cpp

xcodebuild when module verifier is on searches for a device and then claims it has a passcode and fails.  This in on a macOS build.  Seems like a big Xcode problem.  Only noted when running cibuild.sh.
---
 build2/vectormath.xcodeproj/project.pbxproj    | 2 --
 gtlf/GLTF/GLTF.xcodeproj/project.pbxproj       | 2 --
 gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj | 2 --
 libkram/kram/KTXImage.cpp                      | 2 +-
 4 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/build2/vectormath.xcodeproj/project.pbxproj b/build2/vectormath.xcodeproj/project.pbxproj
index d1620639..fa1c8cd0 100644
--- a/build2/vectormath.xcodeproj/project.pbxproj
+++ b/build2/vectormath.xcodeproj/project.pbxproj
@@ -235,7 +235,6 @@
 				COPY_PHASE_STRIP = NO;
 				DEBUG_INFORMATION_FORMAT = dwarf;
 				DEFINES_MODULE = YES;
-				ENABLE_MODULE_VERIFIER = YES;
 				ENABLE_STRICT_OBJC_MSGSEND = YES;
 				ENABLE_TESTABILITY = YES;
 				ENABLE_USER_SCRIPT_SANDBOXING = YES;
@@ -301,7 +300,6 @@
 				COPY_PHASE_STRIP = NO;
 				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
 				DEFINES_MODULE = YES;
-				ENABLE_MODULE_VERIFIER = YES;
 				ENABLE_NS_ASSERTIONS = NO;
 				ENABLE_STRICT_OBJC_MSGSEND = YES;
 				ENABLE_USER_SCRIPT_SANDBOXING = YES;
diff --git a/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj b/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj
index ad5638e7..9a63773c 100644
--- a/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj
+++ b/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj
@@ -491,7 +491,6 @@
 				DYLIB_COMPATIBILITY_VERSION = 1;
 				DYLIB_CURRENT_VERSION = 1;
 				DYLIB_INSTALL_NAME_BASE = "@rpath";
-				ENABLE_MODULE_VERIFIER = YES;
 				FRAMEWORK_VERSION = A;
 				INFOPLIST_FILE = Info.plist;
 				INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks";
@@ -523,7 +522,6 @@
 				DYLIB_COMPATIBILITY_VERSION = 1;
 				DYLIB_CURRENT_VERSION = 1;
 				DYLIB_INSTALL_NAME_BASE = "@rpath";
-				ENABLE_MODULE_VERIFIER = YES;
 				FRAMEWORK_VERSION = A;
 				INFOPLIST_FILE = Info.plist;
 				INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks";
diff --git a/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj b/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj
index 2c0d2daf..a5ce1a78 100644
--- a/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj
+++ b/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj
@@ -356,7 +356,6 @@
 				DYLIB_COMPATIBILITY_VERSION = 1;
 				DYLIB_CURRENT_VERSION = 1;
 				DYLIB_INSTALL_NAME_BASE = "@rpath";
-				ENABLE_MODULE_VERIFIER = YES;
 				FRAMEWORK_VERSION = A;
 				INFOPLIST_FILE = "$(SRCROOT)/Info.plist";
 				INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks";
@@ -386,7 +385,6 @@
 				DYLIB_COMPATIBILITY_VERSION = 1;
 				DYLIB_CURRENT_VERSION = 1;
 				DYLIB_INSTALL_NAME_BASE = "@rpath";
-				ENABLE_MODULE_VERIFIER = YES;
 				FRAMEWORK_VERSION = A;
 				INFOPLIST_FILE = "$(SRCROOT)/Info.plist";
 				INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks";
diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index a83a7257..b170feef 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -725,7 +725,7 @@ static bool initFormatsIfNeeded()
     KTX_FORMAT(EXPrgba32f, MyMTLPixelFormatRGBA32Float, VK_FORMAT_R32G32B32A32_SFLOAT, DXGI_FORMAT_R32G32B32A32_FLOAT, GL_RGBA32F, GL_RGBA, 1, 1, 16, 4, FLAG_32F)
 
     // import only (can convert dds -> ktx/ktx2)
-    KTX_FORMAT(EXPrg11b10f, MyMTLPixelFormatRG11B10Float, VK_FORMAT_B10G11R11_UFLOAT_PACK32, DXGI_FORMAT_R11G11B10_FLOAT, GL_R11F_G11F_B10F, GL_RGB, 1, 1, 4, 3, FLAG_16F)
+    KTX_FORMAT(EXPrg11b10f, MyMTLPixelFormatRG11B10Float, VK_FORMAT_B10G11R11_UFLOAT_PACK32, DXGI_FORMAT_R11G11B10_FLOAT, GL_R11F_G11F_B10F, GL_RGB, 1, 1, 4, 3, FLAG_16F)
     // GL_UNSIGNED_INT_10F_11F_11F_REV
     
     // import only (can convert dds -> ktx/ktx2)

From 24692bdf7c3a3c20d01e310219647a57b8b15aab Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Tue, 11 Mar 2025 16:05:21 -0700
Subject: [PATCH 836/901] kram - turn of os_log, so that kram generates output
 again, add schemes to xcodebuild project builds to avoid nag msg

Oh, how I hate os_log.
---
 libkram/kram/KramLog.cpp | 4 +++-
 scripts/cibuild.sh       | 4 ++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp
index 2f688df8..f0213469 100644
--- a/libkram/kram/KramLog.cpp
+++ b/libkram/kram/KramLog.cpp
@@ -932,7 +932,9 @@ static int32_t logMessageImpl(const LogMessage& msg)
 #if KRAM_APPLE
     // test os_log
 
-    static bool useOSLog = true;
+    // TODO: setting useOSLog to true, breaks all output from kramc
+    //  but it shows up in debugger.  So stop using it.
+    static bool useOSLog = false;
     if (useOSLog) {
         char tokens[kMaxTokens] = {};
         getFormatTokens(tokens, msg, DebuggerOSLog);
diff --git a/scripts/cibuild.sh b/scripts/cibuild.sh
index 0a00f1de..67d80529 100755
--- a/scripts/cibuild.sh
+++ b/scripts/cibuild.sh
@@ -122,14 +122,14 @@ if [[ $buildType == macos ]]; then
 	# hlslparser
 	pushd hlslparser
     echo "::group::hlsl-parser"
-    xcodebuild install -sdk macosx -project hlslparser.xcodeproj -configuration Release ${xargs} -destination generic/platform=macOS DSTROOT=${binHolderPath} INSTALL_PATH=bin
+    xcodebuild install -sdk macosx -project hlslparser.xcodeproj -scheme hlslparser -configuration Release ${xargs} -destination generic/platform=macOS DSTROOT=${binHolderPath} INSTALL_PATH=bin
     echo "::endgroup::"
 	popd
 
     # kram-profile
     pushd kram-profile
     echo "::group::kram-profiler"
-    xcodebuild install -sdk macosx -project kram-profile.xcodeproj -configuration Release ${xargs} -destination generic/platform=macOS DSTROOT=${binHolderPath} INSTALL_PATH=bin
+    xcodebuild install -sdk macosx -project kram-profile.xcodeproj -scheme kram-profile -configuration Release ${xargs} -destination generic/platform=macOS DSTROOT=${binHolderPath} INSTALL_PATH=bin
     echo "::endgroup::"
     popd
 

From 4932808c1dffddc90d938692d2c028d43639c118 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Wed, 12 Mar 2025 21:04:27 -0700
Subject: [PATCH 837/901] kram - fix odd mipgen, fix cube/array/cubearray
 texturegen

48x8 image was generating incorrect top mips.  Needed to handle special 1x1 and 3x1 case on the smallest mips.
The mipgen was clobbering the srcImage.pixels fields, so restore that when processing a new chunk.
bc7enc change - assign error to alpha when it tries mode 6 on an opaque texture.  Otherwise, the alpha error is not accounted for.  Black255 -> Black254.  So I added a Black4x4-a.png test case, and the b+w checkerboard already demonstrates this.

Attempt to fix this issue in the old .cpp encoder.  Haven't tried the ispc, since that is harder to integrate and a big dependency.
https://github.com/richgel999/bc7enc_rdo/issues/15

Turn off simd config printout.
---
 kramv/KramViewerBase.cpp    |  2 +-
 libkram/bc7enc/bc7enc.cpp   | 10 ++++++++--
 libkram/kram/KramImage.cpp  | 12 ++++++++++--
 libkram/kram/KramMipper.cpp | 34 ++++++++++++++++++++++++++++++----
 tests/src/Black4x4-a.png    |  3 +++
 5 files changed, 52 insertions(+), 9 deletions(-)
 create mode 100644 tests/src/Black4x4-a.png

diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index 057447c2..1484de3e 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -588,7 +588,7 @@ string ShowSettings::windowTitleString(const char* filename) const
 
 Data::Data()
 {
-#if USE_SIMDLIB && 1
+#if USE_SIMDLIB && KRAM_DEBUG && 0
     vecf vfmt;
 
     // want to see the simd config
diff --git a/libkram/bc7enc/bc7enc.cpp b/libkram/bc7enc/bc7enc.cpp
index d7aec202..fd8e43c2 100644
--- a/libkram/bc7enc/bc7enc.cpp
+++ b/libkram/bc7enc/bc7enc.cpp
@@ -2303,8 +2303,7 @@ static void handle_opaque_block(void *pBlock, const color_rgba *pPixels, const b
 	pParams->m_perceptual = pComp_params->m_perceptual;
 	pParams->m_num_pixels = 16;
 	pParams->m_pPixels = pPixels;
-	pParams->m_has_alpha = false;
-
+	
 	opt_results.m_partition = 0;
 	opt_results.m_index_selector = 0;
 	opt_results.m_rotation = 0;
@@ -2318,6 +2317,10 @@ static void handle_opaque_block(void *pBlock, const color_rgba *pPixels, const b
 		pParams->m_comp_bits = 7;
 		pParams->m_has_pbits = true;
 		pParams->m_endpoints_share_pbit = false;
+        
+        // This means mode has alpha even though this is an opaque block
+        // so deal with the error on alpha too.
+        pParams->m_has_alpha = true;
 
 		color_cell_compressor_results results6;
 		results6.m_pSelectors = opt_results.m_selectors;
@@ -2343,6 +2346,7 @@ static void handle_opaque_block(void *pBlock, const color_rgba *pPixels, const b
 		pParams->m_comp_bits = 6;
 		pParams->m_has_pbits = true;
 		pParams->m_endpoints_share_pbit = true;
+        pParams->m_has_alpha = false;
 
 		const uint8_t *pPartition = &g_bc7_partition2[trial_partition * 16];
 
@@ -2396,6 +2400,8 @@ static void handle_opaque_block(void *pBlock, const color_rgba *pPixels, const b
 		}
 	}
 
+    pParams->m_has_alpha = false;
+
 	encode_bc7_block(pBlock, &opt_results);
 }
 
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index f23aa7f2..06a39b0f 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -2099,6 +2099,9 @@ bool KramEncoder::createMipsFromChunks(
     int32_t srcTopMipWidth = srcImage.width;
     int32_t srcTopMipHeight = srcImage.height;
 
+    // Need this to restore the pointers after mip gen
+    const ImageData srcImageSaved = srcImage;
+    
     for (int32_t chunk = 0; chunk < numChunks; ++chunk) {
         Timer timerBuildMips;
 
@@ -2112,7 +2115,12 @@ bool KramEncoder::createMipsFromChunks(
         // reset these dimensions, or the mip mapping drops them to 1x1
         srcImage.width = w;
         srcImage.height = h;
-
+        
+        // restore the pointers
+        srcImage.pixels = srcImageSaved.pixels;
+        srcImage.pixelsHalf = srcImageSaved.pixelsHalf;
+        srcImage.pixelsFloat = srcImageSaved.pixelsFloat;
+        
         if (info.isHDR) {
             // TODO: should this support halfImage too?
 
@@ -2263,7 +2271,7 @@ bool KramEncoder::createMipsFromChunks(
                     // dst becomes src for next mipmap
                     // preserve the isSRGB state
                     bool isSRGBSrc = srcImage.isSRGB;
-                    srcImage = dstMipImage;
+                    srcImage = dstMipImage; // this is changing srcImage.pixels
                     srcImage.isSRGB = isSRGBSrc;
 
                     pixelOffset += dstMipImage.width * dstMipImage.height;
diff --git a/libkram/kram/KramMipper.cpp b/libkram/kram/KramMipper.cpp
index 3baec9b8..9ec89b38 100644
--- a/libkram/kram/KramMipper.cpp
+++ b/libkram/kram/KramMipper.cpp
@@ -439,8 +439,8 @@ void Mipper::mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) cons
 
     int32_t dstIndex = 0;
 
-    bool isOddX = width & 1;
-    bool isOddY = height & 1;
+    bool isOddX = (width > 1) && (width & 1);
+    bool isOddY = (height > 1) && (height & 1);
 
     // advance always by 2, but sample from neighbors
     int32_t mipWidth = std::max(1, width / 2);
@@ -464,7 +464,20 @@ void Mipper::mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) cons
         float y0w = mipHeight * invHeight;
         float y1w = mipY * invHeight;
 
-        if (!isOddY) {
+        if (height == 3) {
+            ymw = 1.0f/3.0f;
+            y0w = 1.0f/3.0f;
+            y1w = 1.0f/3.0f;
+        }
+        else if (height == 1) {
+            ym = y; // weight is 0
+            y1 = y;
+            
+            ymw = 0.0f;
+            y0w = 1.0f;
+            y1w = 0.0f;
+        }
+        else if (!isOddY) {
             ym = y; // weight is 0
 
             ymw = 0.0f;
@@ -493,7 +506,20 @@ void Mipper::mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) cons
             float x0w = mipWidth * invWidth;
             float x1w = mipX * invWidth;
 
-            if (!isOddX) {
+            if (width == 3) {
+                xmw = 1.0f/3.0f;
+                x0w = 1.0f/3.0f;
+                x1w = 1.0f/3.0f;
+            }
+            else if (width == 1) {
+                xm = x; // weight is 0
+                x1 = x;
+                
+                xmw = 0.0f;
+                x0w = 1.0f;
+                x1w = 0.0f;
+            }
+            else if (!isOddX) {
                 xm = x; // weight is 0
 
                 xmw = 0.0f;
diff --git a/tests/src/Black4x4-a.png b/tests/src/Black4x4-a.png
new file mode 100644
index 00000000..20cae3b7
--- /dev/null
+++ b/tests/src/Black4x4-a.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7cc7b6562601638aa7fb3e4c4ca7c900f0d39e0665a6616d30ccbbbaf37dea75
+size 135

From 0afc67ff8bb05fe304deb3f0429627a533181637 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 13 Mar 2025 19:03:33 -0700
Subject: [PATCH 838/901] kram - move the SIMD setting into xconfig

Many mac shipped with AVX only.  So move 4 of the 6 x64 simd settings to the xcconfig file.  The GLTF and GLTFMTL projects don't use the config, so must be changed manually.
---
 build2/kram.xcconfig                        | 5 +++++
 build2/kram.xcodeproj/project.pbxproj       | 2 --
 build2/kramc.xcodeproj/project.pbxproj      | 2 --
 build2/kramv.xcodeproj/project.pbxproj      | 2 --
 build2/vectormath.xcodeproj/project.pbxproj | 2 --
 5 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/build2/kram.xcconfig b/build2/kram.xcconfig
index ca8e6425..10ea9f67 100644
--- a/build2/kram.xcconfig
+++ b/build2/kram.xcconfig
@@ -7,6 +7,11 @@
 // Xcode doesn't set NDEBUG=1 in Release builds.  Ugh.
 // Also turn on -ftime-trace to review build times in kram-profile.
 
+// This setting only applies to x64, but many macs only have AVX (2019 MBP).
+// Note that f16c is supported on AVX, but fma requires AVX2.
+// If setting this to AVX, then set GLTF/GLTFMTL too since those don't use config.
+CLANG_X86_VECTOR_INSTRUCTIONS = AVX2
+
 KRAM_FLAGS_X64 =
 KRAM_FLAGS_X64[sdk=*][arch=x86_64] = -mf16c -mfma
 
diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index 728de93b..6383ad2d 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -1457,7 +1457,6 @@
 				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
 				CLANG_WARN_UNREACHABLE_CODE = YES;
 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
-				CLANG_X86_VECTOR_INSTRUCTIONS = avx2;
 				COPY_PHASE_STRIP = NO;
 				DEAD_CODE_STRIPPING = YES;
 				DEBUG_INFORMATION_FORMAT = dwarf;
@@ -1552,7 +1551,6 @@
 				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
 				CLANG_WARN_UNREACHABLE_CODE = YES;
 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
-				CLANG_X86_VECTOR_INSTRUCTIONS = avx2;
 				COPY_PHASE_STRIP = NO;
 				DEAD_CODE_STRIPPING = YES;
 				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
diff --git a/build2/kramc.xcodeproj/project.pbxproj b/build2/kramc.xcodeproj/project.pbxproj
index c765d3b0..5ec57506 100644
--- a/build2/kramc.xcodeproj/project.pbxproj
+++ b/build2/kramc.xcodeproj/project.pbxproj
@@ -193,7 +193,6 @@
 				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
 				CLANG_WARN_UNREACHABLE_CODE = YES;
 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
-				CLANG_X86_VECTOR_INSTRUCTIONS = avx2;
 				COPY_PHASE_STRIP = NO;
 				DEAD_CODE_STRIPPING = YES;
 				DEBUG_INFORMATION_FORMAT = dwarf;
@@ -262,7 +261,6 @@
 				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
 				CLANG_WARN_UNREACHABLE_CODE = YES;
 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
-				CLANG_X86_VECTOR_INSTRUCTIONS = avx2;
 				COPY_PHASE_STRIP = NO;
 				DEAD_CODE_STRIPPING = YES;
 				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
diff --git a/build2/kramv.xcodeproj/project.pbxproj b/build2/kramv.xcodeproj/project.pbxproj
index 0c26804d..1aa924d6 100644
--- a/build2/kramv.xcodeproj/project.pbxproj
+++ b/build2/kramv.xcodeproj/project.pbxproj
@@ -568,7 +568,6 @@
 				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
 				CLANG_WARN_UNREACHABLE_CODE = YES;
 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
-				CLANG_X86_VECTOR_INSTRUCTIONS = avx2;
 				COPY_PHASE_STRIP = NO;
 				DEAD_CODE_STRIPPING = YES;
 				DEBUG_INFORMATION_FORMAT = dwarf;
@@ -647,7 +646,6 @@
 				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
 				CLANG_WARN_UNREACHABLE_CODE = YES;
 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
-				CLANG_X86_VECTOR_INSTRUCTIONS = avx2;
 				COPY_PHASE_STRIP = NO;
 				DEAD_CODE_STRIPPING = YES;
 				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
diff --git a/build2/vectormath.xcodeproj/project.pbxproj b/build2/vectormath.xcodeproj/project.pbxproj
index fa1c8cd0..8f528134 100644
--- a/build2/vectormath.xcodeproj/project.pbxproj
+++ b/build2/vectormath.xcodeproj/project.pbxproj
@@ -231,7 +231,6 @@
 				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
 				CLANG_WARN_UNREACHABLE_CODE = YES;
 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
-				CLANG_X86_VECTOR_INSTRUCTIONS = avx2;
 				COPY_PHASE_STRIP = NO;
 				DEBUG_INFORMATION_FORMAT = dwarf;
 				DEFINES_MODULE = YES;
@@ -296,7 +295,6 @@
 				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
 				CLANG_WARN_UNREACHABLE_CODE = YES;
 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
-				CLANG_X86_VECTOR_INSTRUCTIONS = avx2;
 				COPY_PHASE_STRIP = NO;
 				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
 				DEFINES_MODULE = YES;

From dc599495841ff9684e6ac760739c7ae6db1e01e5 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 13 Mar 2025 21:51:42 -0700
Subject: [PATCH 839/901] kram - fix astcenc to not use 32B aligned load/store,
 add macOS simd hw test for AVX2

kram sets ASTCENC_VECALIGN to 16 to match up with Accelerate.  The 32B load/store ops make no diff to perf and break the build.
Add macOS test for avx2 support, so that old Rosetta and old macOS HW are flagged by kram instead of crashing.
---
 build2/kram.xcconfig                          |  1 +
 kramc/KramMain.cpp                            | 51 +++++++++++++++++++
 .../astc-encoder/astcenc_vecmathlib_avx2_8.h  | 20 +++++++-
 3 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/build2/kram.xcconfig b/build2/kram.xcconfig
index 10ea9f67..217f0356 100644
--- a/build2/kram.xcconfig
+++ b/build2/kram.xcconfig
@@ -10,6 +10,7 @@
 // This setting only applies to x64, but many macs only have AVX (2019 MBP).
 // Note that f16c is supported on AVX, but fma requires AVX2.
 // If setting this to AVX, then set GLTF/GLTFMTL too since those don't use config.
+// There isn't currently a fallback to AVX (see SIMD_AVX2).
 CLANG_X86_VECTOR_INSTRUCTIONS = AVX2
 
 KRAM_FLAGS_X64 =
diff --git a/kramc/KramMain.cpp b/kramc/KramMain.cpp
index 11eaa875..6bdd001e 100644
--- a/kramc/KramMain.cpp
+++ b/kramc/KramMain.cpp
@@ -1,7 +1,58 @@
 #include "KramLib.h"
 
+#if KRAM_MAC
+#include <sys/sysctl.h>
+#endif
+
 int main(int argc, char* argv[])
 {
+    bool hasSimdSupport = true;
+    
+#if KRAM_MAC
+    // TODO: test for simd support here
+    
+    #if SIMD_AVX2
+    
+    // can also check AVX1.0
+    // F16C (avx/avx2 imply F16C and assume Rosetta too)
+    
+    // https://csharpmulticore.blogspot.com/2014/12/how-to-check-intel-avx2-support-on-mac-os-x-haswell.html
+    // machdep.cpu.features: FPU VME DE PSE TSC MSR PAE MCE CX8 APIC SEP MTRR PGE MCA CMOV PAT PSE36 CLFSH DS ACPI MMX FXSR SSE SSE2 SS HTT TM PBE SSE3 PCLMULQDQ DTES64 MON DSCPL VMX EST TM2 SSSE3 FMA CX16 TPR PDCM SSE4.1 SSE4.2 x2APIC MOVBE POPCNT AES PCID XSAVE OSXSAVE SEGLIM64 TSCTMR AVX1.0 RDRAND F16C
+    // machdep.cpu.leaf7_features: SMEP ERMS RDWRFSGS TSC_THREAD_OFFSET BMI1 AVX2 BMI2 INVPCID
+    
+    
+    //size_t cpuFeatureSize = 0;
+    //sysctlbyname("machdep.cpu.features", nullptr, &cpuFeatureSize, nullptr, 0);
+    
+    size_t leaf7FeatureSize = 0;
+    sysctlbyname("machdep.cpu.leaf7_featuress", nullptr, &leaf7FeatureSize, nullptr, 0);
+    
+    if (leaf7FeatureSize == 0) {
+        hasSimdSupport = false;
+    }
+    else {
+        using namespace STL_NAMESPACE;
+        
+        vector<char> buffer;
+        buffer.resize(leaf7FeatureSize);
+        sysctlbyname("machdep.cpu.leaf7_featuress", buffer.data(), &leaf7FeatureSize, nullptr, 0);
+        
+        if (strstr(buffer.data(), "AVX2") == nullptr) {
+            hasSimdSupport = false;
+        }
+    }
+    #endif
+    
+#elif KRAM_WIN
+    // TODO: check Win support too
+#endif
+    
+    if (!hasSimdSupport) {
+        KLOGE("Main", "Missing simd support");
+        exit(1);
+    }
+    
+    // verify that machine has simd support to run
     int errorCode = kram::kramAppMain(argc, argv);
 
     // returning -1 from main results in exit code of 255, so fix this to return 1 on failure.
diff --git a/libkram/astc-encoder/astcenc_vecmathlib_avx2_8.h b/libkram/astc-encoder/astcenc_vecmathlib_avx2_8.h
index fe8a1b16..360c0969 100755
--- a/libkram/astc-encoder/astcenc_vecmathlib_avx2_8.h
+++ b/libkram/astc-encoder/astcenc_vecmathlib_avx2_8.h
@@ -125,7 +125,11 @@ struct vfloat8
 	 */
 	static ASTCENC_SIMD_INLINE vfloat8 loada(const float* p)
 	{
+#if ASTCENC_VECALIGN == 32
 		return vfloat8(_mm256_load_ps(p));
+#else // 16
+        return vfloat8(_mm256_loadu_ps(p));
+#endif
 	}
 
 	/**
@@ -242,7 +246,11 @@ struct vint8
 	 */
 	static ASTCENC_SIMD_INLINE vint8 loada(const int* p)
 	{
+#if ASTCENC_VECALIGN == 32
 		return vint8(_mm256_load_si256(reinterpret_cast<const __m256i*>(p)));
+#else // 16
+        return vint8(_mm256_loadu_si256(reinterpret_cast<const __m256i*>(p)));
+#endif
 	}
 
 	/**
@@ -534,7 +542,11 @@ ASTCENC_SIMD_INLINE vint8 hmax(vint8 a)
  */
 ASTCENC_SIMD_INLINE void storea(vint8 a, int* p)
 {
-	_mm256_store_si256(reinterpret_cast<__m256i*>(p), a.m);
+#if ASTCENC_VECALIGN == 32
+    _mm256_store_si256(reinterpret_cast<__m256i*>(p), a.m);
+#else // 16
+	_mm256_storeu_si256(reinterpret_cast<__m256i*>(p), a.m);
+#endif
 }
 
 /**
@@ -961,7 +973,11 @@ ASTCENC_SIMD_INLINE void store(vfloat8 a, float* p)
  */
 ASTCENC_SIMD_INLINE void storea(vfloat8 a, float* p)
 {
-	_mm256_store_ps(p, a.m);
+#if ASTCENC_VECALIGN == 32
+    _mm256_store_ps(p, a.m);
+#else // 16
+	_mm256_storeu_ps(p, a.m);
+#endif
 }
 
 /**

From b88c5440b19ad5f0d4c1c58e483379ae195ca6b6 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 13 Mar 2025 21:58:24 -0700
Subject: [PATCH 840/901] kram - fix xcconfig for avx2

---
 build2/kram.xcconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/build2/kram.xcconfig b/build2/kram.xcconfig
index 217f0356..5e970962 100644
--- a/build2/kram.xcconfig
+++ b/build2/kram.xcconfig
@@ -10,8 +10,8 @@
 // This setting only applies to x64, but many macs only have AVX (2019 MBP).
 // Note that f16c is supported on AVX, but fma requires AVX2.
 // If setting this to AVX, then set GLTF/GLTFMTL too since those don't use config.
-// There isn't currently a fallback to AVX (see SIMD_AVX2).
-CLANG_X86_VECTOR_INSTRUCTIONS = AVX2
+// There isn't currently a vectormath fallback to avx (see SIMD_AVX2).
+CLANG_X86_VECTOR_INSTRUCTIONS = avx2
 
 KRAM_FLAGS_X64 =
 KRAM_FLAGS_X64[sdk=*][arch=x86_64] = -mf16c -mfma

From 18e945a8866b24ceb37915fc9a52316cc18a6f98 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 13 Mar 2025 22:04:01 -0700
Subject: [PATCH 841/901] kram - fix macOS feature detection of AVX2

---
 kramc/KramMain.cpp | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/kramc/KramMain.cpp b/kramc/KramMain.cpp
index 6bdd001e..97c4c3bf 100644
--- a/kramc/KramMain.cpp
+++ b/kramc/KramMain.cpp
@@ -24,8 +24,10 @@ int main(int argc, char* argv[])
     //size_t cpuFeatureSize = 0;
     //sysctlbyname("machdep.cpu.features", nullptr, &cpuFeatureSize, nullptr, 0);
     
+    const char* avx2Features = "machdep.cpu.leaf7_features";
+    
     size_t leaf7FeatureSize = 0;
-    sysctlbyname("machdep.cpu.leaf7_featuress", nullptr, &leaf7FeatureSize, nullptr, 0);
+    sysctlbyname(avx2Features, nullptr, &leaf7FeatureSize, nullptr, 0);
     
     if (leaf7FeatureSize == 0) {
         hasSimdSupport = false;
@@ -35,8 +37,10 @@ int main(int argc, char* argv[])
         
         vector<char> buffer;
         buffer.resize(leaf7FeatureSize);
-        sysctlbyname("machdep.cpu.leaf7_featuress", buffer.data(), &leaf7FeatureSize, nullptr, 0);
+        sysctlbyname(avx2Features, buffer.data(), &leaf7FeatureSize, nullptr, 0);
         
+        // If don't find avx2, then support is not present.
+        // could be running under Rosetta2 but it's supposed to add AVX2 soon.
         if (strstr(buffer.data(), "AVX2") == nullptr) {
             hasSimdSupport = false;
         }
@@ -45,6 +49,8 @@ int main(int argc, char* argv[])
     
 #elif KRAM_WIN
     // TODO: check Win support too
+    // Also need Win for ARM tests (f.e. Prism is SSE4 -> AVX2 soon).
+    
 #endif
     
     if (!hasSimdSupport) {

From d07e1a1152b31c50493fa656104ca6948d666399 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 14 Mar 2025 12:56:50 -0700
Subject: [PATCH 842/901] kram - add win test for avx2 support

This is testing the cpuid which tells whether the machine has AVX2 support.
---
 kramc/KramMain.cpp | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/kramc/KramMain.cpp b/kramc/KramMain.cpp
index 97c4c3bf..fbcd7585 100644
--- a/kramc/KramMain.cpp
+++ b/kramc/KramMain.cpp
@@ -4,13 +4,15 @@
 #include <sys/sysctl.h>
 #endif
 
+#if KRAM_WIN
+#include <intrin.h>
+#endif
+
 int main(int argc, char* argv[])
 {
     bool hasSimdSupport = true;
     
 #if KRAM_MAC
-    // TODO: test for simd support here
-    
     #if SIMD_AVX2
     
     // can also check AVX1.0
@@ -48,8 +50,20 @@ int main(int argc, char* argv[])
     #endif
     
 #elif KRAM_WIN
-    // TODO: check Win support too
-    // Also need Win for ARM tests (f.e. Prism is SSE4 -> AVX2 soon).
+    // Also handles Win for ARM (f.e. Prism is SSE4 -> AVX2 soon).
+    // See here for more bits (f.e. AVX512)
+    // https://learn.microsoft.com/en-us/cpp/intrinsics/cpuid-cpuidex?view=msvc-170
+    //
+    // ecx bit 28 is avx.
+    // ecx bit 29 is avx2
+    
+    int cpuInfo[4] = {};
+    __cpuid(cpuInfo, 0);
+    
+    #if SIMD_AVX2
+    bool hasAVX2 = cpuInfo[2] & (1 << 29);
+    hasSimdSupport = hasAVX2;
+    #endif
     
 #endif
     

From 26aa3e6030fefe6ff9b70bce0f7f13ef5bddbe41 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 14 Mar 2025 16:36:37 -0700
Subject: [PATCH 843/901] kram - corrected testing of cpuInfo for AVX2

The AI sample code was testing f16c, and assuming that was AVX2, but the 0 function returns the vendor ID.  Don't trust AI code.
---
 kramc/KramMain.cpp | 66 +++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 56 insertions(+), 10 deletions(-)

diff --git a/kramc/KramMain.cpp b/kramc/KramMain.cpp
index fbcd7585..e05a31ed 100644
--- a/kramc/KramMain.cpp
+++ b/kramc/KramMain.cpp
@@ -8,6 +8,8 @@
 #include <intrin.h>
 #endif
 
+using namespace STL_NAMESPACE;
+
 int main(int argc, char* argv[])
 {
     bool hasSimdSupport = true;
@@ -35,8 +37,6 @@ int main(int argc, char* argv[])
         hasSimdSupport = false;
     }
     else {
-        using namespace STL_NAMESPACE;
-        
         vector<char> buffer;
         buffer.resize(leaf7FeatureSize);
         sysctlbyname(avx2Features, buffer.data(), &leaf7FeatureSize, nullptr, 0);
@@ -53,17 +53,63 @@ int main(int argc, char* argv[])
     // Also handles Win for ARM (f.e. Prism is SSE4 -> AVX2 soon).
     // See here for more bits (f.e. AVX512)
     // https://learn.microsoft.com/en-us/cpp/intrinsics/cpuid-cpuidex?view=msvc-170
-    //
-    // ecx bit 28 is avx.
-    // ecx bit 29 is avx2
     
-    int cpuInfo[4] = {};
+    // f1.ecx bit  0 is sse3
+    // f1.ecx bit 19 is sse4.1
+    // f1.ecx bit 20 is sse4.2
+    // f1.ecx bit 28 is avx.
+    // f1.ecx bit 29 is f16c (docs are wrong about this being avx2)
+    
+    // f7.ebx bit 5 is avx2
+    // f7.ebx bit 16 is avx-512f
+    // f7.ebx bit 26 is avx-512pf
+    // f7.ebx bit 27 is avx-512er
+    // f7.ebx bit 28 is avx-512cd
+   
+   
+    // This returns a count of the ids from mthe docs.
+    struct CpuInfo {
+        int eax, ebx, ecx, edx;
+    };'
+    
+    // numIds in 0
+    // vendorId (12 char string) returned in 1,3,2
+    // can tell intel from amd off vendorId
+    CpuInfo cpuInfo = {};
     __cpuid(cpuInfo, 0);
     
-    #if SIMD_AVX2
-    bool hasAVX2 = cpuInfo[2] & (1 << 29);
-    hasSimdSupport = hasAVX2;
-    #endif
+    int numIds = cpuInfo[0];
+    if (numIds < 7) {
+        hasSimdSupport = false;
+    }
+    else {
+        // +1 since 0 is the count and vendorId
+        vector<CpuInfo> cpuInfoByIndex;
+        cpuInfoByIndex.resize(numIds+1);
+        
+        // This has sse4, avx, f16c
+        __cpuidex((int*)&cpuInfo, 1, 0);
+        cpuInfoByIndex[1] = cpuInfo;
+
+        // This has AVX2, avx512
+        __cpuidex((int*)&cpuInfo, 7, 0);
+        cpuInfoByIndex[7] = cpuInfo;
+        
+        // TODO: probably another for AVX10
+        
+// This is to obtain all the bits, but only care about AVX2
+//        for (int i = 0; i <= numIds; ++i) {
+//            __cpuidex((int*)&cpuInfo, i, 0); // get the values
+//            cpuInfoByIndex[i] = cpuInfo;
+//        }
+        
+        // there are extended bits above 0x8000000
+        
+        #if SIMD_AVX2
+        bool hasAVX2 = cpuInfoByIndex[7].ebx & (1 << 5);
+        hasSimdSupport = hasAVX2;
+        #endif
+    }
     
 #endif
     

From 814d625619ed768ced24fa7f6b99b922f5c86af4 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 14 Mar 2025 16:44:04 -0700
Subject: [PATCH 844/901] kram - fixup win cpuid tests

---
 kramc/KramMain.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kramc/KramMain.cpp b/kramc/KramMain.cpp
index e05a31ed..f1818057 100644
--- a/kramc/KramMain.cpp
+++ b/kramc/KramMain.cpp
@@ -70,15 +70,15 @@ int main(int argc, char* argv[])
     // This returns a count of the ids from mthe docs.
     struct CpuInfo {
         int eax, ebx, ecx, edx;
-    };'
+    };
     
     // numIds in 0
     // vendorId (12 char string) returned in 1,3,2
     // can tell intel from amd off vendorId
     CpuInfo cpuInfo = {};
-    __cpuid(cpuInfo, 0);
+    __cpuid((int*)&cpuInfo, 0);
     
-    int numIds = cpuInfo[0];
+    int numIds = cpuInfo.eax;
     if (numIds < 7) {
         hasSimdSupport = false;
     }

From f8ec1885f79f69733e128a03b615979dfd7f2eb7 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 14 Mar 2025 18:55:58 -0700
Subject: [PATCH 845/901] kram - check avx2, fma, f16c support

---
 kramc/KramMain.cpp | 117 +++++++++++++++++++++++++++++++++++----------
 1 file changed, 92 insertions(+), 25 deletions(-)

diff --git a/kramc/KramMain.cpp b/kramc/KramMain.cpp
index f1818057..01f312a2 100644
--- a/kramc/KramMain.cpp
+++ b/kramc/KramMain.cpp
@@ -12,10 +12,11 @@ using namespace STL_NAMESPACE;
 
 int main(int argc, char* argv[])
 {
-    bool hasSimdSupport = true;
-    
+    // Check for AVX2, FMA, F16C support on Intel.
+    // AVX2 implies the other 2, but still have to enable on compile.
+#if SIMD_AVX2
 #if KRAM_MAC
-    #if SIMD_AVX2
+    bool hasSimdSupport = true;
     
     // can also check AVX1.0
     // F16C (avx/avx2 imply F16C and assume Rosetta too)
@@ -25,13 +26,10 @@ int main(int argc, char* argv[])
     // machdep.cpu.leaf7_features: SMEP ERMS RDWRFSGS TSC_THREAD_OFFSET BMI1 AVX2 BMI2 INVPCID
     
     
-    //size_t cpuFeatureSize = 0;
-    //sysctlbyname("machdep.cpu.features", nullptr, &cpuFeatureSize, nullptr, 0);
-    
-    const char* avx2Features = "machdep.cpu.leaf7_features";
+    const char* leaf7Features = "machdep.cpu.leaf7_features";
     
     size_t leaf7FeatureSize = 0;
-    sysctlbyname(avx2Features, nullptr, &leaf7FeatureSize, nullptr, 0);
+    sysctlbyname(leaf7Features, nullptr, &leaf7FeatureSize, nullptr, 0);
     
     if (leaf7FeatureSize == 0) {
         hasSimdSupport = false;
@@ -39,22 +37,57 @@ int main(int argc, char* argv[])
     else {
         vector<char> buffer;
         buffer.resize(leaf7FeatureSize);
-        sysctlbyname(avx2Features, buffer.data(), &leaf7FeatureSize, nullptr, 0);
+        sysctlbyname(leaf7Features, buffer.data(), &leaf7FeatureSize, nullptr, 0);
         
         // If don't find avx2, then support is not present.
         // could be running under Rosetta2 but it's supposed to add AVX2 soon.
-        if (strstr(buffer.data(), "AVX2") == nullptr) {
+        bool hasAVX2 = strstr(buffer.data(), "AVX2") != nullptr;
+        
+        if (!hasAVX2) {
             hasSimdSupport = false;
         }
     }
-    #endif
+    
+    const char* cpuFeatures = "machdep.cpu.features";
+    
+    size_t cpuFeatureSize = 0;
+    sysctlbyname(cpuFeatures, nullptr, &cpuFeatureSize, nullptr, 0);
+    
+    if (!hasSimdSupport || cpuFeatureSize == 0) {
+        hasSimdSupport = false;
+    }
+    else {
+        vector<char> buffer;
+        buffer.resize(cpuFeatureSize);
+        sysctlbyname(cpuFeatures, buffer.data(), &cpuFeatureSize, nullptr, 0);
+       
+        // Make sure compile has enabled these on AVX2
+        bool hasF16C = strstr(buffer.data(), "F16C") != nullptr;
+        bool hasFMA = strstr(buffer.data(), "FMA") != nullptr;
+        
+        if (!hasF16C) {
+            hasSimdSupport = false;
+        }
+        else if (!hasFMA) {
+            hasSimdSupport = false;
+        }
+    }
+
+    // TODO: can add brand to this if find the sysctlbyname query
+    if (!hasSimdSupport) {
+        KLOGE("Main", "Missing simd support");
+        exit(1);
+    }
     
 #elif KRAM_WIN
+    bool hasSimdSupport = true;
+    
     // Also handles Win for ARM (f.e. Prism is SSE4 -> AVX2 soon).
     // See here for more bits (f.e. AVX512)
     // https://learn.microsoft.com/en-us/cpp/intrinsics/cpuid-cpuidex?view=msvc-170
     
     // f1.ecx bit  0 is sse3
+    // f1.ecx bit 12 is fma
     // f1.ecx bit 19 is sse4.1
     // f1.ecx bit 20 is sse4.2
     // f1.ecx bit 28 is avx.
@@ -78,6 +111,12 @@ int main(int argc, char* argv[])
     CpuInfo cpuInfo = {};
     __cpuid((int*)&cpuInfo, 0);
     
+    // This is GenuineIntel or AuthenticAMD
+    char vendorId[12+1] = {};
+    *reinterpret_cast<int*>(vendor + 0) = cpuInfo.ebx;
+    *reinterpret_cast<int*>(vendor + 4) = cpuInfo.edx;
+    *reinterpret_cast<int*>(vendor + 8) = cpuInfo.ecx;
+       
     int numIds = cpuInfo.eax;
     if (numIds < 7) {
         hasSimdSupport = false;
@@ -95,29 +134,57 @@ int main(int argc, char* argv[])
         __cpuidex((int*)&cpuInfo, 7, 0);
         cpuInfoByIndex[7] = cpuInfo;
         
-        // TODO: probably another for AVX10
-        
-// This is to obtain all the bits, but only care about AVX2
-//        for (int i = 0; i <= numIds; ++i) {
-//            __cpuidex((int*)&cpuInfo, i, 0); // get the values
-//            cpuInfoByIndex[i] = cpuInfo;
-//        }
+        bool hasAVX2 = cpuInfoByIndex[7].ebx & (1 << 5);
         
-        // there are extended bits above 0x8000000
+        bool hasFMA = cpuInfoByIndex[1].ecx & (1 << 12);
+        bool hasF16C = cpuInfoByIndex[1].ecx & (1 << 29);
         
-        #if SIMD_AVX2
-        bool hasAVX2 = cpuInfoByIndex[7].ebx & (1 << 5);
-        hasSimdSupport = hasAVX2;
-        #endif
+        if (!hasAVX2))
+            hasSimdSupport = false;
+        else if (!hasFMA)
+            hasSimdSupport = false;
+        else if (!hasF16C)
+            hasSimdSupport = false;
     }
     
-#endif
+    // extended cpuid attributes
+    int extBase = 0x80000000;
+    __cpuid((int*)&cpuInfo, extBase);
+    numIds = cpuInfo.eax - extBase;
+
+    char brandId[48+1] = {};
+    
+    if (numIds >= 4)
+    {
+        vector<CpuInfo> cpuInfoByIndex;
+        cpuInfoByIndex.resize(numIds+1);
+        
+        // f81
+        __cpuidex((int*)&cpuInfo, extBase+1, 0);
+        cpuInfoByIndex[1] = cpuInfo;
+        
+        // brand
+        __cpuidex((int*)&cpuInfo, extBase+2, 0);
+        cpuInfoByIndex[2] = cpuInfo;
+        __cpuidex((int*)&cpuInfo, extBase+3, 0);
+        cpuInfoByIndex[3] = cpuInfo;
+        __cpuidex((int*)&cpuInfo, extBase+4, 0);
+        cpuInfoByIndex[4] = cpuInfo;
+        
+        memcpy(brand +  0, &cpuInfoByIndex[2], sizeof(CpuInfo));
+        memcpy(brand + 16, &cpuInfoByIndex[3], sizeof(CpuInfo));
+        memcpy(brand + 32, &cpuInfoByIndex[4].data(), sizeof(CpuInfo));
+    }
     
     if (!hasSimdSupport) {
-        KLOGE("Main", "Missing simd support");
+        KLOGE("Main", "Missing simd support for %s", brand);
         exit(1);
     }
     
+#endif
+#endif
+    
+    
     // verify that machine has simd support to run
     int errorCode = kram::kramAppMain(argc, argv);
 

From e818f47de2ee243234926f0b67cbbdc00c8985f1 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 14 Mar 2025 19:06:21 -0700
Subject: [PATCH 846/901] kram - fix win tests for avx2

---
 kramc/KramMain.cpp | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/kramc/KramMain.cpp b/kramc/KramMain.cpp
index 01f312a2..9bd700a9 100644
--- a/kramc/KramMain.cpp
+++ b/kramc/KramMain.cpp
@@ -113,9 +113,9 @@ int main(int argc, char* argv[])
     
     // This is GenuineIntel or AuthenticAMD
     char vendorId[12+1] = {};
-    *reinterpret_cast<int*>(vendor + 0) = cpuInfo.ebx;
-    *reinterpret_cast<int*>(vendor + 4) = cpuInfo.edx;
-    *reinterpret_cast<int*>(vendor + 8) = cpuInfo.ecx;
+    *reinterpret_cast<int*>(vendorId + 0) = cpuInfo.ebx;
+    *reinterpret_cast<int*>(vendorId + 4) = cpuInfo.edx;
+    *reinterpret_cast<int*>(vendorId + 8) = cpuInfo.ecx;
        
     int numIds = cpuInfo.eax;
     if (numIds < 7) {
@@ -139,7 +139,7 @@ int main(int argc, char* argv[])
         bool hasFMA = cpuInfoByIndex[1].ecx & (1 << 12);
         bool hasF16C = cpuInfoByIndex[1].ecx & (1 << 29);
         
-        if (!hasAVX2))
+        if (!hasAVX2)
             hasSimdSupport = false;
         else if (!hasFMA)
             hasSimdSupport = false;
@@ -171,13 +171,13 @@ int main(int argc, char* argv[])
         __cpuidex((int*)&cpuInfo, extBase+4, 0);
         cpuInfoByIndex[4] = cpuInfo;
         
-        memcpy(brand +  0, &cpuInfoByIndex[2], sizeof(CpuInfo));
-        memcpy(brand + 16, &cpuInfoByIndex[3], sizeof(CpuInfo));
-        memcpy(brand + 32, &cpuInfoByIndex[4].data(), sizeof(CpuInfo));
+        memcpy(brandId +  0, &cpuInfoByIndex[2], sizeof(CpuInfo));
+        memcpy(brandId + 16, &cpuInfoByIndex[3], sizeof(CpuInfo));
+        memcpy(brandId + 32, &cpuInfoByIndex[4], sizeof(CpuInfo));
     }
     
     if (!hasSimdSupport) {
-        KLOGE("Main", "Missing simd support for %s", brand);
+        KLOGE("Main", "Missing simd support for %s", brandId);
         exit(1);
     }
     

From e61bc807b22f6b9250561eb15309748c5fae5d0f Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 15 Mar 2025 18:16:47 -0700
Subject: [PATCH 847/901] kram - switch win build to static-linked vc runtime

Otherwise, can't run on a machine without the runtimes installed (debug and release).
---
 kramc/CMakeLists.txt | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/kramc/CMakeLists.txt b/kramc/CMakeLists.txt
index 812193f8..26d7b161 100644
--- a/kramc/CMakeLists.txt
+++ b/kramc/CMakeLists.txt
@@ -54,6 +54,12 @@ endif()
 #-----------------------------------------------------
 
 if (BUILD_WIN)
+
+    # Use the static linked libs, or the exe needs the VCRuntimeDLL installed
+    set_property(TARGET ${myTargetApp} PROPERTY
+        MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
+
+
     target_link_libraries(${myTargetApp} libkram)
 
     # When Win rebuilds library, it doesn't relink app to correct code when you

From 3193e325aefdb31c85e877c0f758bef701030361 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 15 Mar 2025 18:23:23 -0700
Subject: [PATCH 848/901] kram - set static vcruntime on the libraries too

---
 kram-thumb-win/CMakeLists.txt | 5 +++++
 libkram/CMakeLists.txt        | 4 ++++
 2 files changed, 9 insertions(+)

diff --git a/kram-thumb-win/CMakeLists.txt b/kram-thumb-win/CMakeLists.txt
index 78c9cf45..08de49b7 100644
--- a/kram-thumb-win/CMakeLists.txt
+++ b/kram-thumb-win/CMakeLists.txt
@@ -2,6 +2,11 @@
 # dll output can be renamed for debug vs. release, but is hard to debug
 set(myTargetLib kram-thumb-win)
 
+# Use the static linked libs, or the exe needs the VCRuntimeDLL installed
+set_property(TARGET ${myTargetLib} PROPERTY
+    MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
+
+
 # caller already set all this
 # project(${myTargetLib} LANGUAGES CXX)
 
diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index 35b89fff..5fe98fb1 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -228,6 +228,10 @@ endif()
 if (BUILD_WIN)
     # TODO: switch to add_target_definitions
 
+set_property(TARGET ${myTargetLib} PROPERTY
+    MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
+
+
     # to turn off exceptions/rtti use /GR and /EHsc replacement
     string(REGEX REPLACE "/GR" "/GR-" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
     string(REGEX REPLACE "/EHsc" "/EHs-c-" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")

From 6db50f94e2d955d52b5ad040a709ff7249a6c0e0 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 15 Mar 2025 18:30:08 -0700
Subject: [PATCH 849/901] kram - fix cmake for lib

---
 kram-thumb-win/CMakeLists.txt | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/kram-thumb-win/CMakeLists.txt b/kram-thumb-win/CMakeLists.txt
index 08de49b7..dc0b6768 100644
--- a/kram-thumb-win/CMakeLists.txt
+++ b/kram-thumb-win/CMakeLists.txt
@@ -2,11 +2,6 @@
 # dll output can be renamed for debug vs. release, but is hard to debug
 set(myTargetLib kram-thumb-win)
 
-# Use the static linked libs, or the exe needs the VCRuntimeDLL installed
-set_property(TARGET ${myTargetLib} PROPERTY
-    MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
-
-
 # caller already set all this
 # project(${myTargetLib} LANGUAGES CXX)
 
@@ -22,6 +17,11 @@ set(SOURCE_FILES
 # Module is a DLL library
 add_library(${myTargetLib} MODULE ${SOURCE_FILES})
 
+# Use the static linked libs, or the exe needs the VCRuntimeDLL installed
+# This has to occur after library defined above.
+set_property(TARGET ${myTargetLib} PROPERTY
+    MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
+
  # to turn off exceptions/rtti use /GR and /EHsc replacement
 string(REGEX REPLACE "/GR" "/GR-" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
 string(REGEX REPLACE "/EHsc" "/EHs-c-" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")

From c1fccbc0317f2ac0c619b657ada6a840d4d428f6 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 15 Mar 2025 20:55:45 -0700
Subject: [PATCH 850/901] kram - more cmake fixes, turned off the
 kram-thumb-win for now

The Win32 thumbnail viewer is a dll, so that conflicts with the static vcruntime that kram.exe now uses.  Need to build 2 libs.
---
 CMakeLists.txt         |  9 +++++----
 kramc/CMakeLists.txt   | 24 +++++++++---------------
 libkram/CMakeLists.txt | 37 ++++++++++++++++---------------------
 3 files changed, 30 insertions(+), 40 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 25319a48..f7ba7fec 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -54,8 +54,9 @@ if (BUILD_WIN)
     # the CLI app for Mac/Win that can build content for other platforms, uses libkram
     add_subdirectory(kramc)
 
+    # TODO: this needs a shared libkram, but kramc uses static libkram
     # this is an Explorer thumbnail extension (run script to un/register), uses libkram
-    add_subdirectory(kram-thumb-win)
+    # add_subdirectory(kram-thumb-win)
     
     # hack hlslparser for win build into kram for now, does not use kram
     # add_subdirectory(hlslparser)
@@ -66,9 +67,9 @@ if (BUILD_WIN)
     set(BIN_DIR ${PROJECT_SOURCE_DIR}/bin)
 
     # install doesn't seem to do anything on WIN32, the build elements are not copied
-    install(TARGETS libkram ARCHIVE DESTINATION ${BIN_DIR})
-    install(TARGETS kram RUNTIME DESTINATION ${BIN_DIR})
-    install(TARGETS kram-thumb-win LIBRARY DESTINATION ${BIN_DIR})
+    #install(TARGETS libkram ARCHIVE DESTINATION ${BIN_DIR})
+    #install(TARGETS kram RUNTIME DESTINATION ${BIN_DIR})
+    #install(TARGETS kram-thumb-win LIBRARY DESTINATION ${BIN_DIR})
     
     # hlslparser is also now in the kram build.  Keep executables up to date.
     # I would use the sln file, but msbuild doesn't like to be called from cibuld.sh
diff --git a/kramc/CMakeLists.txt b/kramc/CMakeLists.txt
index 26d7b161..e6dc7f00 100644
--- a/kramc/CMakeLists.txt
+++ b/kramc/CMakeLists.txt
@@ -17,12 +17,7 @@ if (BUILD_MAC)
     set_target_properties(${myTargetApp} PROPERTIES
         XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD "c++20"
         XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++"
-        XCODE_ATTRIBUTE_CLANG_X86_VECTOR_INSTRUCTIONS "avx2"
         
-        # turn off exceptions/rtti
-        XCODE_ATTRIBUTE_GCC_ENABLE_CPP_EXCEPTIONS NO
-        XCODE_ATTRIBUTE_GCC_ENABLE_CPP_RTTI NO
-    
         # can't believe this isn't on by default in CMAKE
         XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC YES
     
@@ -48,7 +43,10 @@ if (BUILD_MAC)
         XCODE_ATTRIBUTE_CODE_SIGN_IDENTITY ""
     )
 
-    target_compile_options(${myTargetApp} PRIVATE -W -Wall)
+    target_compile_options(${myTargetApp} PRIVATE -W -Wall
+        -mavx2 -mf16c -mfma 
+        -fno-exceptions -fno-rtti
+    )
 endif()
 
 #-----------------------------------------------------
@@ -58,8 +56,7 @@ if (BUILD_WIN)
     # Use the static linked libs, or the exe needs the VCRuntimeDLL installed
     set_property(TARGET ${myTargetApp} PROPERTY
         MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
-
-
+    
     target_link_libraries(${myTargetApp} libkram)
 
     # When Win rebuilds library, it doesn't relink app to correct code when you
@@ -70,16 +67,13 @@ if (BUILD_WIN)
     SET(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR})
     SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR})
 
-    # TODO: switch to add_target_definitions
-
-    # to turn off exceptions/rtti use /GR and /EHsc replacement
-    string(REGEX REPLACE "/GR" "/GR-" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
-    string(REGEX REPLACE "/EHsc" "/EHs-c-" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
-
     # don't need force with apps, since they only access kram folder files which include KramConfig
     
     # all warnings, AVX2, and multiprocess compiles
-    target_compile_options(${myTargetApp} PRIVATE /W3 -march=haswell -mf16c -mfma /MP /GF /FC)
+    target_compile_options(${myTargetApp} PRIVATE /W3 
+	-march=haswell -mf16c -mfma 
+	-fno-exceptions -fno-rtti
+	/MP /GF /FC)
     
     # fix STL
     target_compile_definitions(${myTargetApp} PRIVATE -D_ITERATOR_DEBUG_LEVEL=0 -D_HAS_EXCEPTIONS=0)
diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index 5fe98fb1..1756037d 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -188,7 +188,7 @@ target_include_directories(${myTargetLib} PRIVATE
     "${SOURCE_DIR}/squish/"
     "${SOURCE_DIR}/tmpfileplus/"
     "${SOURCE_DIR}/zstd/"
-    )
+)
      
 # only add sources to the library
 target_sources(${myTargetLib} PRIVATE ${libSources})
@@ -200,11 +200,6 @@ if (BUILD_MAC)
     set_target_properties(${myTargetLib} PROPERTIES
         XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD "c++20"
         XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++"
-        XCODE_ATTRIBUTE_CLANG_X86_VECTOR_INSTRUCTIONS "avx2"
-        
-        # turn off exceptions/rtti
-        XCODE_ATTRIBUTE_GCC_ENABLE_CPP_EXCEPTIONS NO
-        XCODE_ATTRIBUTE_GCC_ENABLE_CPP_RTTI NO
         
         # can't believe this isn't on by default in CMAKE
         XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC YES
@@ -217,43 +212,43 @@ if (BUILD_MAC)
     )
 
     # Enable all warnings, and also enable f16c sims op (only x64 though)
-    target_compile_options(${myTargetLib} PRIVATE -W -Wall -mf16c)
+    target_compile_options(${myTargetLib} PRIVATE -W -Wall 
+        -mavx2 -mfma -mf16c 
+        -fno-exceptions -fno-rtti )
 
     # this is already done by pch for libkram, but other projects need the force include inherited
     # force include (public)
     target_compile_options(${myTargetLib} PUBLIC -include KramConfig.h)
     
+    # This is only a link option for macOS/iOS lld, but do need to set sections flags for this
+    #if (CMAKE_BUILD_TYPE STREQUAL "RELEASE")
+    #    target_compile_options(${myTargetLib} PUBLIC -dead_strip)
+    #endif()
 endif()
 
 if (BUILD_WIN)
     # TODO: switch to add_target_definitions
 
-set_property(TARGET ${myTargetLib} PROPERTY
-    MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
-
-
-    # to turn off exceptions/rtti use /GR and /EHsc replacement
-    string(REGEX REPLACE "/GR" "/GR-" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
-    string(REGEX REPLACE "/EHsc" "/EHs-c-" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+    set_property(TARGET ${myTargetLib} PROPERTY
+        MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
 
     # this is already done by pch for libkram, but other projects need the force include inherited
     # force include (public)
-    target_compile_options(${myTargetLib} PUBLIC /FIKramConfig.h)
+    target_compile_options(${myTargetLib} PUBLIC -include KramConfig.h)
        
     # all warnings, AVX2, and multiprocess compiles,
     # eliminate duplicate strings, embed full path
-    target_compile_options(${myTargetLib} PRIVATE  /W3 -march=haswell -mf16c -mfma /MP /GF /FC)
+    target_compile_options(${myTargetLib} PRIVATE  /W3 
+        -march=haswell -mf16c -mfma 
+        -fno-exceptions -fno-rtti
+        /MP /GF /FC)
     
     # fix STL (don't use -D here, will remove)
     target_compile_definitions(${myTargetLib} PRIVATE _ITERATOR_DEBUG_LEVEL=0 _HAS_EXCEPTIONS=0)
 
 endif()
 
-# turn on dead-code stripping in release.  Don't set this in debug.
-# does this make sense on lib, or just on apps ?
-#if (CMAKE_BUILD_TYPE STREQUAL "RELEASE")
-#    target_compile_options(${myTargetLib} PUBLIC -dead_strip)
-#endif()
+
 
 # This will be force include (-include, /FI) on GCC/clang/VS.
 # Can't seem to ref KramPrefix.pch file.  Goes into cmake_pch.hxx file

From daae06a2fa0262c05660448e0ba8344c67be2f3e Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 15 Mar 2025 21:33:59 -0700
Subject: [PATCH 851/901] kram - more cmake fixes

---
 kram-thumb-win/CMakeLists.txt | 5 +++--
 kramc/CMakeLists.txt          | 2 +-
 libkram/CMakeLists.txt        | 4 ++--
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/kram-thumb-win/CMakeLists.txt b/kram-thumb-win/CMakeLists.txt
index dc0b6768..7f263221 100644
--- a/kram-thumb-win/CMakeLists.txt
+++ b/kram-thumb-win/CMakeLists.txt
@@ -17,10 +17,11 @@ set(SOURCE_FILES
 # Module is a DLL library
 add_library(${myTargetLib} MODULE ${SOURCE_FILES})
 
+# This doesn't work for dll based viewer
 # Use the static linked libs, or the exe needs the VCRuntimeDLL installed
 # This has to occur after library defined above.
-set_property(TARGET ${myTargetLib} PROPERTY
-    MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
+# set_property(TARGET ${myTargetLib} PROPERTY
+#    MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
 
  # to turn off exceptions/rtti use /GR and /EHsc replacement
 string(REGEX REPLACE "/GR" "/GR-" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
diff --git a/kramc/CMakeLists.txt b/kramc/CMakeLists.txt
index e6dc7f00..9ae5d1c5 100644
--- a/kramc/CMakeLists.txt
+++ b/kramc/CMakeLists.txt
@@ -72,7 +72,7 @@ if (BUILD_WIN)
     # all warnings, AVX2, and multiprocess compiles
     target_compile_options(${myTargetApp} PRIVATE /W3 
 	-march=haswell -mf16c -mfma 
-	-fno-exceptions -fno-rtti
+	/GR- /EHs-c-
 	/MP /GF /FC)
     
     # fix STL
diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index 1756037d..da926308 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -234,13 +234,13 @@ if (BUILD_WIN)
 
     # this is already done by pch for libkram, but other projects need the force include inherited
     # force include (public)
-    target_compile_options(${myTargetLib} PUBLIC -include KramConfig.h)
+    target_compile_options(${myTargetLib} PUBLIC /FIKramConfig.h)
        
     # all warnings, AVX2, and multiprocess compiles,
     # eliminate duplicate strings, embed full path
     target_compile_options(${myTargetLib} PRIVATE  /W3 
         -march=haswell -mf16c -mfma 
-        -fno-exceptions -fno-rtti
+        /GR- /EHs-c-
         /MP /GF /FC)
     
     # fix STL (don't use -D here, will remove)

From ad6ae53ff5d116b92f81c9de8ff050337f2a305a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 16 Mar 2025 11:35:19 -0700
Subject: [PATCH 852/901] kram - more cmake cleanup, add kramc linux target to
 test, re-enable KramVersion.h

---
 .github/workflows/pre-release.yml |  3 +-
 CMakeLists.txt                    | 33 +++++++++++++-----
 kramc/CMakeLists.txt              | 57 +++++++++++++++++++++----------
 kramc/KramMain.cpp                | 25 +++++++++-----
 libkram/CMakeLists.txt            | 39 ++++++++++++---------
 libkram/kram/Kram.cpp             |  4 +--
 scripts/cibuild.sh                |  1 +
 7 files changed, 106 insertions(+), 56 deletions(-)

diff --git a/.github/workflows/pre-release.yml b/.github/workflows/pre-release.yml
index 770376e5..ebb5ef13 100644
--- a/.github/workflows/pre-release.yml
+++ b/.github/workflows/pre-release.yml
@@ -14,7 +14,8 @@ jobs:
       matrix:
         #os: [ubuntu-latest, macos-latest, windows-latest]
         #os: [macos-latest, windows-latest]
-        os: [macos-15, windows-latest]
+        #os: [macos-15, windows-latest]
+        os: [ubuntu-latest, macos-15, windows-latest]
         
     steps:
       - name: Update CMake
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f7ba7fec..64efcd61 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -6,6 +6,7 @@ cmake_minimum_required(VERSION 3.19.1 FATAL_ERROR)
 # still building Win using Cmake.  macOS uses avx2 and Win uses avx2.
 set(BUILD_MAC FALSE)
 set(BUILD_WIN FALSE)
+set(BUILD_LINUX FALSE)
 
 if (APPLE)
     message(STATUS "build for macOS")
@@ -13,6 +14,9 @@ if (APPLE)
 elseif (WIN32)
     message(STATUS "build for win x64")
     set(BUILD_WIN TRUE)
+elseif (LINUX)
+    message(STATUS "build for linux x64")
+    set(BUILD_LINUX TRUE)
 endif()
 
 #-----------------------------------------------------
@@ -27,6 +31,23 @@ set(CMAKE_CXX_EXTENSIONS NO)
 set(CMAKE_CONFIGURATION_TYPES "Debug;Release")
 set(CMAKE_BUILD_TYPE Release)
 set(CMAKE_DEFAULT_STARTUP_PROJECT "kramc")
+
+#-----------------------------------------------------
+if (BUILD_LINUX)
+    set(myTargetWorkspace kramWorkspace)
+
+    project(${myTargetWorkspace} LANGUAGES C CXX)
+
+    # want to only use clang across all platforms
+    message(STATUS "Using ${CMAKE_CXX_COMPILER_ID} compiler")
+    
+    # the kram static library libkram which should build on iOS/Android/Mac/Win
+    # this doesn't set a project, but maybe it should
+    add_subdirectory(libkram)
+
+    # the CLI app for Mac/Win that can build content for other platforms, uses libkram
+    add_subdirectory(kramc)
+endif()
     
 #-----------------------------------------------------
 if (BUILD_WIN)
@@ -36,12 +57,6 @@ if (BUILD_WIN)
 
     project(${myTargetWorkspace} LANGUAGES C CXX)
     
-    # use clang-cl on Win, have to set both
-    # this is suggested way to set, but github already reports clang 17.0.3 with VS extensions
-    #  as the default compiler
-    # set(CMAKE_C_COMPILER "clang-cl")
-    # set(CMAKE_CXX_COMPILER "clang-cl")
-
     # want to only use clang across all platforms
     message(STATUS "Using ${CMAKE_CXX_COMPILER_ID} compiler")
     
@@ -64,7 +79,7 @@ if (BUILD_WIN)
     #-----------------------------------------------------
 
     # was considering platform-specific builds, but mac/win don't conflict
-    set(BIN_DIR ${PROJECT_SOURCE_DIR}/bin)
+    #set(BIN_DIR ${PROJECT_SOURCE_DIR}/bin)
 
     # install doesn't seem to do anything on WIN32, the build elements are not copied
     #install(TARGETS libkram ARCHIVE DESTINATION ${BIN_DIR})
@@ -79,7 +94,7 @@ endif()
 
 #-----------------------------------------------------
 # This part is unmaintained.  Couldn't build app extensions via CMake.
-# So now just mapintain projects
+# So now just maintain projects
 
 if (BUILD_MAC)
     # cmake translates project to sln in Win, but to xcode projects on Mac.
@@ -90,7 +105,7 @@ if (BUILD_MAC)
 
     # CMAKE_OSX_DEPLOYMENT_TARGET must be set as a CACHE variable, or it will be stripped
     set(CMAKE_OSX_DEPLOYMENT_TARGET "13.0" CACHE STRING "Minimum macOS")
-    set(CMAKE_OSX_ARCHITECTURES "$(ARCHS_STANDARD)" CACHE STRING "Architecture macOS")
+    set(CMAKE_OSX_ARCHITECTURES "arm64" CACHE STRING "Architecture macOS")
    
     #-----------------------------------------------------
 
diff --git a/kramc/CMakeLists.txt b/kramc/CMakeLists.txt
index 9ae5d1c5..1010b966 100644
--- a/kramc/CMakeLists.txt
+++ b/kramc/CMakeLists.txt
@@ -8,18 +8,32 @@ add_executable(${myTargetApp})
 
 #-----------------------------------------------------
     
+if (BUILD_LINUX)
+    # Enable all warnings, and also enable f16c sims op (only x64 though)
+    target_compile_options(${myTargetApp} PRIVATE -W -Wall 
+        -mavx2 -mfma -mf16c 
+        -fno-exceptions -fno-rtti
+        -fdata-sections -ffunction-sections
+    )
+
+    # this is already done by pch for libkram, but other projects need the force include inherited
+    # force include (public)
+    target_compile_options(${myTargetLib} PUBLIC -include KramConfig.h)
+endif()
+
 if (BUILD_MAC)
     # ate is a macOS/iOS only library, and it varies in encode support by OS revision
     target_link_libraries(${myTargetApp}
         ate
-        libkram)
+        libkram
+    )
 
     set_target_properties(${myTargetApp} PROPERTIES
-        XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD "c++20"
-        XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++"
+        #XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD "c++20"
+        #XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++"
         
         # can't believe this isn't on by default in CMAKE
-        XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC YES
+        #XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC YES
     
         #-------------------------
         
@@ -33,8 +47,10 @@ if (BUILD_MAC)
         # this drops app from 762KB to 174KB with only ATE enabled
         # note about needing -gfull instead of -gused here or debug info messed up:
         # https://gist.github.com/tkersey/39b4fe69e14b859889ffadccb009e397
-        XCODE_ATTRIBUTE_DEAD_CODE_STRIPPING YES
-        XCODE_ATTRIBUTE_LLVM_LTO[variant=Release] "Incremental"
+        #XCODE_ATTRIBUTE_DEAD_CODE_STRIPPING YES
+
+        # This is LTO
+        #XCODE_ATTRIBUTE_LLVM_LTO[variant=Release] "Incremental"
     
         #-------------------------
         # for now disable signing, and just "sign to run locally"
@@ -44,9 +60,15 @@ if (BUILD_MAC)
     )
 
     target_compile_options(${myTargetApp} PRIVATE -W -Wall
-        -mavx2 -mf16c -mfma 
+        -mavx2 -mfma -mf16c 
         -fno-exceptions -fno-rtti
+        -fdata-sections -ffunction-sections
     )
+
+    # only turn on in Release in case this disables incremental linking
+    if (CMAKE_BUILD_TYPE EQUAL "Release")
+        add_link_options(${myTargetApp}, "-dead_strip")
+    endif()
 endif()
 
 #-----------------------------------------------------
@@ -55,7 +77,8 @@ if (BUILD_WIN)
 
     # Use the static linked libs, or the exe needs the VCRuntimeDLL installed
     set_property(TARGET ${myTargetApp} PROPERTY
-        MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
+        MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>"
+    )
     
     target_link_libraries(${myTargetApp} libkram)
 
@@ -71,19 +94,17 @@ if (BUILD_WIN)
     
     # all warnings, AVX2, and multiprocess compiles
     target_compile_options(${myTargetApp} PRIVATE /W3 
-	-march=haswell -mf16c -mfma 
-	/GR- /EHs-c-
-	/MP /GF /FC)
+	   -march=haswell -mf16c -mfma 
+	   /GR- /EHs-c-
+	   /MP /GF /FC
+    )
     
     # fix STL
     target_compile_definitions(${myTargetApp} PRIVATE -D_ITERATOR_DEBUG_LEVEL=0 -D_HAS_EXCEPTIONS=0)
-    
-    if (CMAKE_BUILD_TYPE EQUAL "Debug")
-        target_compile_definitions(${myTargetLib} PRIVATE "/INCREMENTAL")
-        
-    elseif (CMAKE_BUILD_TYPE EQUAL "Release")
-        # only dead strip on Release builds since this disables Incremental linking, may want Profile build that doesn't use this
-        target_compile_definitions(${myTargetLib} PRIVATE "/OPT:REF")
+         
+    if (CMAKE_BUILD_TYPE EQUAL "Release")
+        # This disables Incremental linking, so only set on Release
+        add_link_options(${myTargetApp} PRIVATE "/OPT:REF")
         
         # other possibliities
         # /GL - whole program optimization
diff --git a/kramc/KramMain.cpp b/kramc/KramMain.cpp
index 9bd700a9..92d52e1c 100644
--- a/kramc/KramMain.cpp
+++ b/kramc/KramMain.cpp
@@ -10,7 +10,8 @@
 
 using namespace STL_NAMESPACE;
 
-int main(int argc, char* argv[])
+// TODO: move this into vectormath
+void checkSimdSupport()
 {
     // Check for AVX2, FMA, F16C support on Intel.
     // AVX2 implies the other 2, but still have to enable on compile.
@@ -60,7 +61,7 @@ int main(int argc, char* argv[])
         vector<char> buffer;
         buffer.resize(cpuFeatureSize);
         sysctlbyname(cpuFeatures, buffer.data(), &cpuFeatureSize, nullptr, 0);
-       
+        
         // Make sure compile has enabled these on AVX2
         bool hasF16C = strstr(buffer.data(), "F16C") != nullptr;
         bool hasFMA = strstr(buffer.data(), "FMA") != nullptr;
@@ -72,7 +73,7 @@ int main(int argc, char* argv[])
             hasSimdSupport = false;
         }
     }
-
+    
     // TODO: can add brand to this if find the sysctlbyname query
     if (!hasSimdSupport) {
         KLOGE("Main", "Missing simd support");
@@ -98,8 +99,8 @@ int main(int argc, char* argv[])
     // f7.ebx bit 26 is avx-512pf
     // f7.ebx bit 27 is avx-512er
     // f7.ebx bit 28 is avx-512cd
-   
-   
+    
+    
     // This returns a count of the ids from mthe docs.
     struct CpuInfo {
         int eax, ebx, ecx, edx;
@@ -116,7 +117,7 @@ int main(int argc, char* argv[])
     *reinterpret_cast<int*>(vendorId + 0) = cpuInfo.ebx;
     *reinterpret_cast<int*>(vendorId + 4) = cpuInfo.edx;
     *reinterpret_cast<int*>(vendorId + 8) = cpuInfo.ecx;
-       
+    
     int numIds = cpuInfo.eax;
     if (numIds < 7) {
         hasSimdSupport = false;
@@ -129,7 +130,7 @@ int main(int argc, char* argv[])
         // This has sse4, avx, f16c
         __cpuidex((int*)&cpuInfo, 1, 0);
         cpuInfoByIndex[1] = cpuInfo;
-
+        
         // This has AVX2, avx512
         __cpuidex((int*)&cpuInfo, 7, 0);
         cpuInfoByIndex[7] = cpuInfo;
@@ -151,7 +152,7 @@ int main(int argc, char* argv[])
     int extBase = 0x80000000;
     __cpuid((int*)&cpuInfo, extBase);
     numIds = cpuInfo.eax - extBase;
-
+    
     char brandId[48+1] = {};
     
     if (numIds >= 4)
@@ -183,7 +184,13 @@ int main(int argc, char* argv[])
     
 #endif
 #endif
-    
+}
+
+int main(int argc, char* argv[])
+{
+    // This will exit if insufficient simd support on x64.
+    // arm64+neon has full support of all operations.
+    checkSimdSupport();
     
     // verify that machine has simd support to run
     int errorCode = kram::kramAppMain(argc, argv);
diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index da926308..9ff85526 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -172,7 +172,6 @@ target_include_directories(${myTargetLib} PUBLIC
     "${SOURCE_DIR}/kram/"
     "${SOURCE_DIR}/eastl/include/"
     "${SOURCE_DIR}/vectormath/"
-    
 )
 
 target_include_directories(${myTargetLib} PRIVATE
@@ -195,17 +194,22 @@ target_sources(${myTargetLib} PRIVATE ${libSources})
 
 #-----------------------------------------------------
 
+if (BUILD_LINUX)
+    # Enable all warnings, and also enable f16c sims op (only x64 though)
+    target_compile_options(${myTargetLib} PRIVATE -W -Wall 
+        -mavx2 -mfma -mf16c 
+        -fno-exceptions -fno-rtti
+        -fdata-sections -ffunction-sections
+    )
+
+    # this is already done by pch for libkram, but other projects need the force include inherited
+    # force include (public)
+    target_compile_options(${myTargetLib} PUBLIC -include KramConfig.h)
+endif()
+
 # note: mac build is all done via Xcode workspace/project now, this cmake build is legacy
 if (BUILD_MAC)
     set_target_properties(${myTargetLib} PROPERTIES
-        XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD "c++20"
-        XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++"
-        
-        # can't believe this isn't on by default in CMAKE
-        XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC YES
-        
-        #-------------------------
-
         # set debug style for apps
         XCODE_ATTRIBUTE_DEBUG_INFORMATION_FORMAT "dwarf-with-dsym"
         XCODE_ATTRIBUTE_ONLY_ACTIVE_ARCH "NO"
@@ -214,23 +218,22 @@ if (BUILD_MAC)
     # Enable all warnings, and also enable f16c sims op (only x64 though)
     target_compile_options(${myTargetLib} PRIVATE -W -Wall 
         -mavx2 -mfma -mf16c 
-        -fno-exceptions -fno-rtti )
+        -fobjc-arc
+        -fno-exceptions -fno-rtti
+        -fdata-sections -ffunction-sections
+    )
 
     # this is already done by pch for libkram, but other projects need the force include inherited
     # force include (public)
     target_compile_options(${myTargetLib} PUBLIC -include KramConfig.h)
-    
-    # This is only a link option for macOS/iOS lld, but do need to set sections flags for this
-    #if (CMAKE_BUILD_TYPE STREQUAL "RELEASE")
-    #    target_compile_options(${myTargetLib} PUBLIC -dead_strip)
-    #endif()
 endif()
 
 if (BUILD_WIN)
     # TODO: switch to add_target_definitions
 
     set_property(TARGET ${myTargetLib} PROPERTY
-        MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
+        MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>"
+    )
 
     # this is already done by pch for libkram, but other projects need the force include inherited
     # force include (public)
@@ -238,10 +241,12 @@ if (BUILD_WIN)
        
     # all warnings, AVX2, and multiprocess compiles,
     # eliminate duplicate strings, embed full path
+    # TODO: -fdata-sections -ffunction-sections
     target_compile_options(${myTargetLib} PRIVATE  /W3 
         -march=haswell -mf16c -mfma 
         /GR- /EHs-c-
-        /MP /GF /FC)
+        /MP /GF /FC
+    )
     
     # fix STL (don't use -D here, will remove)
     target_compile_definitions(${myTargetLib} PRIVATE _ITERATOR_DEBUG_LEVEL=0 _HAS_EXCEPTIONS=0)
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 9b02f33e..5e34a5f1 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -21,8 +21,8 @@
 #include "KramImage.h" // has config defines, move them out
 #include "KramMmapHelper.h"
 #include "KramTimer.h"
-#define KRAM_VERSION "1.0"
-//#include "KramVersion.h"
+//#define KRAM_VERSION "1.0"
+#include "KramVersion.h"
 #include "TaskSystem.h"
 #include "lodepng.h"
 #include "miniz.h"
diff --git a/scripts/cibuild.sh b/scripts/cibuild.sh
index 67d80529..997648af 100755
--- a/scripts/cibuild.sh
+++ b/scripts/cibuild.sh
@@ -168,6 +168,7 @@ elif [[ $buildType == linux ]]; then
 
 	pushd build
 
+    # this will use make TODO: switch to ninja (-G Ninja)
     cmake ..
 
 	# build the release build

From 70a0ee2dc51a42b24822d401ffe820d5adfebc27 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 16 Mar 2025 11:36:53 -0700
Subject: [PATCH 853/901] kram - fix cmake for linux

---
 kramc/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kramc/CMakeLists.txt b/kramc/CMakeLists.txt
index 1010b966..0e6b8b74 100644
--- a/kramc/CMakeLists.txt
+++ b/kramc/CMakeLists.txt
@@ -18,7 +18,7 @@ if (BUILD_LINUX)
 
     # this is already done by pch for libkram, but other projects need the force include inherited
     # force include (public)
-    target_compile_options(${myTargetLib} PUBLIC -include KramConfig.h)
+    target_compile_options(${myTargetApp} PUBLIC -include KramConfig.h)
 endif()
 
 if (BUILD_MAC)

From 2e85b33bf1f0f4904cb872f8d7ae2197c8085ea4 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 16 Mar 2025 11:43:08 -0700
Subject: [PATCH 854/901] kram - fix cmake linux to use clang

---
 CMakeLists.txt | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 64efcd61..ec7e4b98 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -36,6 +36,10 @@ set(CMAKE_DEFAULT_STARTUP_PROJECT "kramc")
 if (BUILD_LINUX)
     set(myTargetWorkspace kramWorkspace)
 
+    # don't want gcc, want clang
+    SET (CMAKE_CXX_COMPILER "/usr/bin/clang++" CACHE STRING "C++ compiler" FORCE)
+    SET (CMAKE_C_COMPILER "/usr/bin/clang" CACHE STRING "C compiler" FORCE)
+
     project(${myTargetWorkspace} LANGUAGES C CXX)
 
     # want to only use clang across all platforms

From 8e3c8b71951d5d7ae308b7f3d002ac22800eb20a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 16 Mar 2025 11:45:59 -0700
Subject: [PATCH 855/901] kram - more cmake fixes

---
 libkram/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index 9ff85526..e0daa074 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -139,7 +139,7 @@ file(GLOB_RECURSE libSources CONFIGURE_DEPENDS
 )
 
 # no objc on win or linux
-if (BUILD_WIN)
+if (BUILD_WIN OR BUILD_LINUX)
     list(FILTER libSources EXCLUDE REGEX ".*ateencoder.mm$")
     list(FILTER libSources EXCLUDE REGEX ".*ateencoder.h$")
 endif()

From 9e8bf34687215082e39e3fd037330aabbed2d962 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 16 Mar 2025 11:55:29 -0700
Subject: [PATCH 856/901] kram - more linux fixes

---
 libkram/kram/ImmutableString.cpp | 5 +++++
 libkram/kram/KramConfig.h        | 3 +++
 2 files changed, 8 insertions(+)

diff --git a/libkram/kram/ImmutableString.cpp b/libkram/kram/ImmutableString.cpp
index 145a60da..082f1f40 100644
--- a/libkram/kram/ImmutableString.cpp
+++ b/libkram/kram/ImmutableString.cpp
@@ -4,6 +4,11 @@
 
 #include "ImmutableString.h"
 
+// for memcpy
+#if KRAM_LINUX
+#include <string.h>
+#endif
+
 namespace kram {
 
 using namespace STL_NAMESPACE;
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index 3775f387..f6240a39 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -137,6 +137,9 @@
 // some code not compiling with size_t otherwise
 #include <stddef.h>
 
+// linux need this for memcpy
+#include <string.h>
+
 #if USE_EASTL
 
 #define STL_NAMESPACE eastl

From 8150414c133b99c35bb0d49a0809274b2c5a8bc5 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 16 Mar 2025 12:03:14 -0700
Subject: [PATCH 857/901] kram - add linux stub for timer

---
 libkram/kram/KramTimer.cpp | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/libkram/kram/KramTimer.cpp b/libkram/kram/KramTimer.cpp
index bce4b272..de3dfc28 100644
--- a/libkram/kram/KramTimer.cpp
+++ b/libkram/kram/KramTimer.cpp
@@ -12,6 +12,8 @@
 #include <mach/mach_time.h>
 #elif KRAM_ANDROID
 #include <trace.h>
+#elif KRAM_LINUX
+// TODO:
 #endif
 
 #define nl '\n'
@@ -100,6 +102,20 @@ static uint64_t queryCounter()
     return time;
 }
 
+#elif KRAM_LINUX
+
+static double queryPeriod()
+{
+    period *= 1e-9;
+}
+
+static uint64_t queryCounter()
+{
+    uint64_t time = 0;
+    // TODO: add implementation
+    return time;
+}
+
 #endif
 
 static const double gQueryPeriod = queryPeriod();

From 8f63e5ad1e214625af7af31613e9060742938b7b Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 16 Mar 2025 12:05:56 -0700
Subject: [PATCH 858/901] kram - fix timer stub

---
 libkram/kram/KramTimer.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/libkram/kram/KramTimer.cpp b/libkram/kram/KramTimer.cpp
index de3dfc28..f4d6dddc 100644
--- a/libkram/kram/KramTimer.cpp
+++ b/libkram/kram/KramTimer.cpp
@@ -71,6 +71,7 @@ static double queryPeriod()
 static uint64_t queryCounter()
 {
     uint64_t time = 0;
+    time = clock_gettime_nsec_np(CLOCK_MONOTONIC_RAW);
     
     // Mach absolute time will, in general, continue to count if your process is suspended in the background.
     // However, if will stop counting if the CPU goes to sleep.
@@ -89,7 +90,6 @@ static uint64_t queryCounter()
     
     // increment when system sleeps
     // time = mach_continuous_time();
-    time = clock_gettime_nsec_np(CLOCK_MONOTONIC_RAW);
     
     // no increment when system sleeps
     //time = clock_gettime_nsec_np(CLOCK_UPTIME_RAW);
@@ -106,7 +106,8 @@ static uint64_t queryCounter()
 
 static double queryPeriod()
 {
-    period *= 1e-9;
+    double period = 1e-9;
+    return period;
 }
 
 static uint64_t queryCounter()

From 033313f04aa7858f4a0e169719045bcb0c03e063 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 16 Mar 2025 12:07:49 -0700
Subject: [PATCH 859/901] kram - fix pthread include

---
 libkram/kram/TaskSystem.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index c783dbd1..1fdaff04 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -8,7 +8,7 @@
 //#endif
 
 #if KRAM_APPLE
-#include <pthread/pthread.h>
+#include <pthread.h>
 #include <pthread/qos.h>
 #include <sys/sysctl.h>
 #elif KRAM_WIN
@@ -20,7 +20,7 @@
 #elif KRAM_ANDROID
 #include <sys/resource.h>
 #else
-#include <pthread/pthread.h>
+#include <pthread.h>
 #endif
 
 // TODO: look at replacing this with Job Queue from Filament

From 34f90a16e5045e8777dedd22843eafcbad5a4b73 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 16 Mar 2025 12:18:24 -0700
Subject: [PATCH 860/901] kram - stub setThreadPriority call, fix naming for
 linux

---
 libkram/kram/TaskSystem.cpp | 32 +++++++++++++++++++++++++++-----
 1 file changed, 27 insertions(+), 5 deletions(-)

diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index 1fdaff04..d1f556d6 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -316,17 +316,32 @@ void getThreadName(std::thread::native_handle_type threadHandle, char name[kMaxT
     }
 }
 
-#else
+#elif KRAM_APPLE
 
 void setCurrentThreadName(const char* threadName)
 {
-#if KRAM_APPLE
+    // 64-char limit
     // can only set thread from thread on macOS, sucks
     int val = pthread_setname_np(threadName);
-#else
+    if (val != 0)
+        KLOGW("Thread", "Could not set thread name");
+}
+
+void getThreadName(std::thread::native_handle_type threadHandle, char name[kMaxThreadName])
+{
+    pthread_getname_np(threadHandle, name, kMaxThreadName);
+}
+
+#elif KRAM_LINUX || KRAM_ANDROID
+
+void setCurrentThreadName(const char* threadName)
+{
     // 15 char name limit on Linux/Android, how modern!
-    int val = pthread_setname_np(getCurrentThread(), threadName);
-#endif
+    // Call fails if longer, so have to truncate
+    char threadName16[16];
+    strlcpy(threadName16, threadName, 16);
+    
+    int val = pthread_setname_np(getCurrentThread(), threadName16);
 
     if (val != 0)
         KLOGW("Thread", "Could not set thread name");
@@ -409,6 +424,13 @@ static void setThreadPriority(std::thread::native_handle_type handle, ThreadPrio
     }
 }
 
+#elif KRAM_LINUX
+static void setThreadPriority(std::thread::native_handle_type handle, uint8_t priority)
+{
+    // TODO: this has it's own levels
+}
+
+
 #elif KRAM_ANDROID
 
 static void setThreadPriority(std::thread::native_handle_type handle, uint8_t priority)

From fdd27c93ea9d271169e436dbdaae34b6be35a3ec Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 16 Mar 2025 12:21:43 -0700
Subject: [PATCH 861/901] kram - fix setThreadPriority stub

---
 libkram/kram/TaskSystem.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index d1f556d6..e0621a28 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -425,7 +425,7 @@ static void setThreadPriority(std::thread::native_handle_type handle, ThreadPrio
 }
 
 #elif KRAM_LINUX
-static void setThreadPriority(std::thread::native_handle_type handle, uint8_t priority)
+static void setThreadPriority(std::thread::native_handle_type handle, ThreadPriority priority)
 {
     // TODO: this has it's own levels
 }
@@ -433,7 +433,7 @@ static void setThreadPriority(std::thread::native_handle_type handle, uint8_t pr
 
 #elif KRAM_ANDROID
 
-static void setThreadPriority(std::thread::native_handle_type handle, uint8_t priority)
+static void setThreadPriority(std::thread::native_handle_type handle, ThreadPriority priority)
 {
     // This doesn't change policy.
     // Android on -20 to 20, where lower is higher priority

From cdb1ed73c15eb0087575c646b4dd3e6a5502b4aa Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 16 Mar 2025 12:27:30 -0700
Subject: [PATCH 862/901] kram - include libkram target on linux

---
 kramc/CMakeLists.txt | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/kramc/CMakeLists.txt b/kramc/CMakeLists.txt
index 0e6b8b74..80687e02 100644
--- a/kramc/CMakeLists.txt
+++ b/kramc/CMakeLists.txt
@@ -15,10 +15,6 @@ if (BUILD_LINUX)
         -fno-exceptions -fno-rtti
         -fdata-sections -ffunction-sections
     )
-
-    # this is already done by pch for libkram, but other projects need the force include inherited
-    # force include (public)
-    target_compile_options(${myTargetApp} PUBLIC -include KramConfig.h)
 endif()
 
 if (BUILD_MAC)

From 0d5026a3287d9cce317fd99a87cac0ab339be86d Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 16 Mar 2025 12:52:39 -0700
Subject: [PATCH 863/901] kram - try different SOURCE_DIR, the public include
 dirs don't work on linux

---
 libkram/CMakeLists.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index e0daa074..565217fe 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -78,7 +78,8 @@ else()
 endif()
 
 # add_library doesn't establish a project, so still pointing at root CMake
-set(SOURCE_DIR ${PROJECT_SOURCE_DIR}/libkram)
+#set(SOURCE_DIR ${PROJECT_SOURCE_DIR}/libkram)
+set(SOURCE_DIR ${CMAKE_SOURCE_DIR}/libkram)
 
 file(GLOB_RECURSE libSources CONFIGURE_DEPENDS 
 	"${SOURCE_DIR}/astc-encoder/*.cpp"

From ad0f66e302c3d88e2ab0083012aa9f239b1f918f Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 16 Mar 2025 13:57:45 -0700
Subject: [PATCH 864/901] kram - switch linux to ninja

make prints nothing useful
---
 scripts/cibuild.sh | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/scripts/cibuild.sh b/scripts/cibuild.sh
index 997648af..4c3f1967 100755
--- a/scripts/cibuild.sh
+++ b/scripts/cibuild.sh
@@ -168,8 +168,10 @@ elif [[ $buildType == linux ]]; then
 
 	pushd build
 
-    # this will use make TODO: switch to ninja (-G Ninja)
-    cmake ..
+    # this will use make
+    # cmake ..
+    # TODO: switch to ninja, so can see failure cli
+    cmake .. -G Ninja
 
 	# build the release build
 	cmake --build . --config Release

From 4a30d02223d52c24ee903b4bf75e31ca336154d5 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 16 Mar 2025 14:01:53 -0700
Subject: [PATCH 865/901] kram - more cmake fixes for linux

---
 libkram/CMakeLists.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index 565217fe..f333ce53 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -79,7 +79,8 @@ endif()
 
 # add_library doesn't establish a project, so still pointing at root CMake
 #set(SOURCE_DIR ${PROJECT_SOURCE_DIR}/libkram)
-set(SOURCE_DIR ${CMAKE_SOURCE_DIR}/libkram)
+#set(SOURCE_DIR ${CMAKE_SOURCE_DIR}/libkram)
+set(SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/libkram)
 
 file(GLOB_RECURSE libSources CONFIGURE_DEPENDS 
 	"${SOURCE_DIR}/astc-encoder/*.cpp"

From 5abdfd490f86cff0982ca09f77f8b3d8d3d34333 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 16 Mar 2025 14:07:08 -0700
Subject: [PATCH 866/901] kram - i hate cmake

---
 libkram/CMakeLists.txt | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index f333ce53..944ea1d1 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -79,8 +79,8 @@ endif()
 
 # add_library doesn't establish a project, so still pointing at root CMake
 #set(SOURCE_DIR ${PROJECT_SOURCE_DIR}/libkram)
-#set(SOURCE_DIR ${CMAKE_SOURCE_DIR}/libkram)
-set(SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/libkram)
+set(SOURCE_DIR ${CMAKE_SOURCE_DIR}/libkram)
+set(INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/libkram)
 
 file(GLOB_RECURSE libSources CONFIGURE_DEPENDS 
 	"${SOURCE_DIR}/astc-encoder/*.cpp"
@@ -171,24 +171,24 @@ source_group(TREE "${SOURCE_DIR}" PREFIX "source" FILES ${libSources})
 
 target_include_directories(${myTargetLib} PUBLIC
     # public since included in other project files
-    "${SOURCE_DIR}/kram/"
-    "${SOURCE_DIR}/eastl/include/"
-    "${SOURCE_DIR}/vectormath/"
+    "${INCLUDE_DIR}/kram/"
+    "${INCLUDE_DIR}/eastl/include/"
+    "${INCLUDE_DIR}/vectormath/"
 )
 
 target_include_directories(${myTargetLib} PRIVATE
-    "${SOURCE_DIR}/astc-encoder/"
-    "${SOURCE_DIR}/ate/"
-    "${SOURCE_DIR}/bc7enc/"
-    "${SOURCE_DIR}/compressonator/bc6h/"
-    "${SOURCE_DIR}/etc2comp/"
-    "${SOURCE_DIR}/fmt/"
-    "${SOURCE_DIR}/heman/"
-    "${SOURCE_DIR}/lodepng"
-    "${SOURCE_DIR}/miniz/"
-    "${SOURCE_DIR}/squish/"
-    "${SOURCE_DIR}/tmpfileplus/"
-    "${SOURCE_DIR}/zstd/"
+    "${INCLUDE_DIR}/astc-encoder/"
+    "${INCLUDE_DIR}/ate/"
+    "${INCLUDE_DIR}/bc7enc/"
+    "${INCLUDE_DIR}/compressonator/bc6h/"
+    "${INCLUDE_DIR}/etc2comp/"
+    "${INCLUDE_DIR}/fmt/"
+    "${INCLUDE_DIR}/heman/"
+    "${INCLUDE_DIR}/lodepng"
+    "${INCLUDE_DIR}/miniz/"
+    "${INCLUDE_DIR}/squish/"
+    "${INCLUDE_DIR}/tmpfileplus/"
+    "${INCLUDE_DIR}/zstd/"
 )
      
 # only add sources to the library

From 0b0ce567d5c428f128220ead5a785d7162cb2f6f Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 16 Mar 2025 14:12:10 -0700
Subject: [PATCH 867/901] kram - i hate cmake 2

---
 libkram/CMakeLists.txt | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt
index 944ea1d1..814aae65 100644
--- a/libkram/CMakeLists.txt
+++ b/libkram/CMakeLists.txt
@@ -79,8 +79,11 @@ endif()
 
 # add_library doesn't establish a project, so still pointing at root CMake
 #set(SOURCE_DIR ${PROJECT_SOURCE_DIR}/libkram)
-set(SOURCE_DIR ${CMAKE_SOURCE_DIR}/libkram)
-set(INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/libkram)
+#set(SOURCE_DIR ${CMAKE_SOURCE_DIR}/libkram)
+set(SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
+
+#set(INCLUDE_DIR ${CMAKE_SOURCE_DIR}/libkram)
+set(INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
 
 file(GLOB_RECURSE libSources CONFIGURE_DEPENDS 
 	"${SOURCE_DIR}/astc-encoder/*.cpp"

From 782c844743f21649dc62f47bc28348b8f8af469c Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 16 Mar 2025 14:16:17 -0700
Subject: [PATCH 868/901] kram - i hate cmake 3

---
 kramc/CMakeLists.txt | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/kramc/CMakeLists.txt b/kramc/CMakeLists.txt
index 80687e02..fdfffd22 100644
--- a/kramc/CMakeLists.txt
+++ b/kramc/CMakeLists.txt
@@ -15,11 +15,14 @@ if (BUILD_LINUX)
         -fno-exceptions -fno-rtti
         -fdata-sections -ffunction-sections
     )
+
+    target_link_libraries(${myTargetApp} PUBLIC libkram)
+
 endif()
 
 if (BUILD_MAC)
     # ate is a macOS/iOS only library, and it varies in encode support by OS revision
-    target_link_libraries(${myTargetApp}
+    target_link_libraries(${myTargetApp} PUBLIC
         ate
         libkram
     )
@@ -76,7 +79,7 @@ if (BUILD_WIN)
         MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>"
     )
     
-    target_link_libraries(${myTargetApp} libkram)
+    target_link_libraries(${myTargetApp} PUBLIC libkram)
 
     # When Win rebuilds library, it doesn't relink app to correct code when you
     # build the app target project.  Breakpoints stop working after any library source edit,

From 1a5e48518b9fabba1eaac25ff00b08a22d3770f7 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 16 Mar 2025 14:37:22 -0700
Subject: [PATCH 869/901] kram - update README, add linux to tagged-release CI

---
 .github/workflows/tagged-release.yml |  3 ++-
 README.md                            | 20 ++++++++++----------
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/tagged-release.yml b/.github/workflows/tagged-release.yml
index 0790ec57..bb9c3414 100644
--- a/.github/workflows/tagged-release.yml
+++ b/.github/workflows/tagged-release.yml
@@ -13,7 +13,8 @@ jobs:
       matrix:
         #os: [ubuntu-latest, macos-latest, windows-latest]
         #os: [macos-latest, windows-latest]
-        os: [macos-15, windows-latest]
+        #os: [macos-15, windows-latest]
+        os: [ubuntu-latest, macos-15, windows-latest]
         
     steps:
       - name: Update CMake
diff --git a/README.md b/README.md
index 707214e5..b08acec0 100644
--- a/README.md
+++ b/README.md
@@ -1,9 +1,5 @@
 The suite of kram tools below.  I hope these improve your game, app, and art development.
 
-# vectormath
-Fast vector math based around clang vector extensions.  Requires clang but accelerated for ARM or AVX2.
-https://github.com/alecazam/kram/tree/main/libkram/vectormath
-
 # kram-profile
 Display profile traces (f.e. Perfetto) quickly in an application for optimizing memory, builds, and cpu/gpu timings
 https://github.com/alecazam/kram/tree/main/kram-profile
@@ -12,11 +8,15 @@ https://github.com/alecazam/kram/tree/main/kram-profile
 Parses HLSL syntax and generates readable HLSL/MSL code without transpiling.  DXC is then used to compile to spirv.
 https://github.com/alecazam/kram/tree/main/hlslparser
 
+# vectormath
+Fast vector math based around clang vector extensions.  Requires clang but accelerated for arm64/neon, x64/avx2+fma+f16c.
+https://github.com/alecazam/kram/tree/main/libkram/vectormath
+
 # libkram.a, libkram-ios.a, kram.lib
-C++11 library from 200 to 800KB in size depending on encoder options.  Compiles for iOS (ARM), macOS (ARM/Intel), win (Intel).
+C++11 library from 200 to 800KB in size depending on encoder options.  Compiles for iOS/macOS (arm64), win/linux (x64).
 
 # kram, kram.exe
-C++11 main to libkram to create CLI tool.  Encode/decode/info on PNG/KTX/KTX2/DDS files with LDR/HDR and BC/ASTC/ETC2.  Runs on macOS/win.
+C++11 main to libkram to create CLI tool.  Encode/decode/info on PNG/KTX/KTX2/DDS files with LDR/HDR and BC/ASTC/ETC2.  Runs on macOS(arm64), win/linux(x64).
 
 # kram-thumb-win.dll
 Windows thumbnailer for DDS/KTX/KTX2.  Go to build or bin folder.  Install with "regsvr32.exe kram-thumb-win.dll".  Uninstall with "regsvr32.exe /u kram-thumb-win.dll"
@@ -24,12 +24,12 @@ Windows thumbnailer for DDS/KTX/KTX2.  Go to build or bin folder.  Install with
 https://github.com/alecazam/kram/tree/main/kram-thumb-win
 
 # kramv.app
-ObjC++ viewer for PNG/KTX/KTX2/DDS supported files from kram.  Uses Metal compute and shaders, eyedropper, grids, debugging, preview.  Supports HDR and all texture types.  Mip, face, volume, and array access.  No dmg yet, just drop onto /Applications folder.  Runs on macOS (arm64/x64).  Generates Finder thumbnails and QuickLook previews via modern macOS app extension mechanisms.
+ObjC++ viewer for PNG/KTX/KTX2/DDS supported files from kram.  Uses Metal compute and shaders, eyedropper, grids, debugging, preview.  Supports HDR and all texture types.  Mip, face, volume, and array access.  No dmg yet, just drop onto /Applications folder.  Runs on macOS (arm64).  Generates Finder thumbnails and QuickLook previews via modern macOS app extension mechanisms.
 
 Diagrams and screenshots can be located here:
 https://www.figma.com/design/bPmPSpBGTi2xTVnBDqVEq0/kram?node-id=0-1&t=OnP0wHcDOmg7b7Vg-1
 
-#### Releases includes builds for macOS (Xcode 14.3 - arm64/x64/clang) and Windows x64 (VS 2022 - x64/clang).  kramv for macOS, kram for macOS/Win, libkram for macOS/iOS/Win, win-thumb-kram for Win.  Android library via NDK is possible, but f16 support is spotty on devices.
+#### Releases includes builds for macOS (Xcode 15.3 - arm64/x64/clang) and Windows x64 (VS 2022 - x64/clang) and Linux (ubuntu-x64/clang).  kramv for macOS, kram for macOS/Win/Linux, libkram for macOS/iOS/Win/Linux, win-thumb-kram for Win.  Android library via NDK is possible, but f16 support is spotty on devices.
 
 ### About kram
 kram is a wrapper to several popular encoders.  Most encoders have sources, and have been optimized to use very little memory and generate high quality encodings at all settings.  All kram encoders are currently CPU-based.  Some of these encoders use SSE, and a SSE to Neon layer translates those.  kram was built to be small and used as a library or app.  It's also designed for mobile and desktop use.  The final size with all encoders is under 1MB, and disabling each encoder chops off around 200KB down to a final 200KB app size via dead-code stripping.  The code should compile with C++11 or higher.
@@ -41,7 +41,7 @@ Many of the encoder sources can multithread a single image, but that is unused.
 Similar to a makefile system, the script sample kramtexture.py uses modstamps to skip textures that have already been processed.  If the source png/ktx/ktx2 is older than the output, then the file is skipped.  Command line options are not yet compared, so if those change then use --force on the python script to rebuild all textures.  Also a crc/hash could be used instead when modstamp isn't sufficient or the same data could come from different folders.
 
 ### About kramv
-kramv is a viewer for the BC/ASTC/ETC2 LDR/HDR KTX/KTX2/DDS textures generated by kram from LDR PNG and LDR/HDR KTX/KTX2/DDS sources.  kramv decodes ASTC/ETC2 textures on macOS Intel, where the GPU doesn't support them.  macOS with Apple Silicon supports all three formats, and doesn't need to decode.   
+kramv is a viewer for the BC/ASTC/ETC2 LDR/HDR KTX/KTX2/DDS textures generated by kram from LDR PNG and LDR/HDR KTX/KTX2/DDS sources.  kramv decodes ASTC/ETC2 textures on macOS Intel, where the GPU doesn't support them.  macOS with Apple Silicon supports all three formats, and doesn't need to decode.  I have macOS Intel support disabled as of 1/25, but can be re--enabled in the xcode project.
 
 kramv uses ObjC++ with the intent to port to Windows C++ as time permits.  Uses menus, buttons, and keyboard handling useful for texture triage and analysis.  Drag and drop folders, bundles, and click-to-launch are supported.  Recently used textures/folders/bundles are listed in the menu.  The app currently shows a single document at a time.  Subsequent opens reuse the same document Window.  With bundles and folders, kramv will attempt to pair albedo and normal maps together by filename for the preview. 
 
@@ -640,7 +640,7 @@ kram encourages the use of lossless and hdr source data.  There are not many cho
 
 KTX is a well-designed format, and KTX2 continues that tradition.  It was also faily easy to convert between these formats.  Once mips are decoded, KTX2 looks very much like KTX.
 
-Visually validating and previewing the results is complicated.  KTX/2 have few viewers, hence the need for kramv.  Apple's Preview can open BC and ASTC files on macOS, but not ETC/PVRTC.  And then you can't look at channels or mips, or turn on/off premultiplied alpha, or view signed/unsigned data.  Preview premultiplies PNG images, but KTX files aren't.  Apple's thumbnails don't work for ETC2 or PVRTC data in KTX files.  Windows thumbnails don't work for KTX at all.  PVRTexToolGUI 2020R2 applies sRGB incorrectly to images, and can't open BC4/5/7 files on Mac.  
+Visually validating and previewing the results is complicated.  KTX/2 have few viewers, hence the need for kramv.  Apple's Preview can open BC/ASTC files on macOS without mips, but not ETC/PVRTC.  It quarantines files opened.  And then you can't look at channels or mips, or turn on/off premultiplied alpha, or view signed/unsigned data.  Preview premultiplies PNG images, but KTX files aren't.  Apple's thumbnails don't work for ETC2 or PVRTC data in KTX files.  Windows thumbnails don't work for KTX at all.  PVRTexToolGUI 2020R2 applies sRGB incorrectly to images, and can't open BC4/5/7 files on Mac.  
 
 kram adds props to KTX/2 file to store data.  Currently props store Metal and Vulkan formats.  This is important since GL's ASTC LDR and HDR formats are the same constant.  Also props are saved for channel content and post-swizzle.  Loaders, viewers, and shaders can utilize this metadata.
 

From 404cb1c977c090bc0dcfd7588dc48ad926258525 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 16 Mar 2025 17:15:25 -0700
Subject: [PATCH 870/901] kram - fix win/linux install, so that archives aren't
 empty

---
 CMakeLists.txt | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index ec7e4b98..7ec939ba 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -51,6 +51,13 @@ if (BUILD_LINUX)
 
     # the CLI app for Mac/Win that can build content for other platforms, uses libkram
     add_subdirectory(kramc)
+
+    set(BIN_DIR ${PROJECT_SOURCE_DIR}/bin)
+
+    # need app/libs to be in bin directory to zip archive
+    install(TARGETS libkram ARCHIVE DESTINATION ${BIN_DIR})
+    install(TARGETS kram RUNTIME DESTINATION ${BIN_DIR})
+    
 endif()
     
 #-----------------------------------------------------
@@ -82,12 +89,11 @@ if (BUILD_WIN)
 
     #-----------------------------------------------------
 
-    # was considering platform-specific builds, but mac/win don't conflict
-    #set(BIN_DIR ${PROJECT_SOURCE_DIR}/bin)
+    set(BIN_DIR ${PROJECT_SOURCE_DIR}/bin)
 
-    # install doesn't seem to do anything on WIN32, the build elements are not copied
-    #install(TARGETS libkram ARCHIVE DESTINATION ${BIN_DIR})
-    #install(TARGETS kram RUNTIME DESTINATION ${BIN_DIR})
+    # need app/libs to be in bin directory to zip archive
+    install(TARGETS libkram ARCHIVE DESTINATION ${BIN_DIR})
+    install(TARGETS kram RUNTIME DESTINATION ${BIN_DIR})
     #install(TARGETS kram-thumb-win LIBRARY DESTINATION ${BIN_DIR})
     
     # hlslparser is also now in the kram build.  Keep executables up to date.

From 84ca57242d0e85feab40cf79c3a76ff5d47fe293 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 23 Mar 2025 10:31:00 -0700
Subject: [PATCH 871/901] kram - more linux fixes

---
 kramc/CMakeLists.txt        |   5 +-
 libkram/kram/KramTimer.cpp  |   9 ++-
 libkram/kram/TaskSystem.cpp | 152 ++++++++++++++++++------------------
 libkram/kram/TaskSystem.h   |   2 +-
 scripts/cibuild.sh          |   3 +-
 5 files changed, 91 insertions(+), 80 deletions(-)

diff --git a/kramc/CMakeLists.txt b/kramc/CMakeLists.txt
index fdfffd22..ade4f012 100644
--- a/kramc/CMakeLists.txt
+++ b/kramc/CMakeLists.txt
@@ -16,7 +16,10 @@ if (BUILD_LINUX)
         -fdata-sections -ffunction-sections
     )
 
-    target_link_libraries(${myTargetApp} PUBLIC libkram)
+    # librt is for clock_gettime
+    target_link_libraries(${myTargetApp} PUBLIC 
+        libkram 
+        librt)
 
 endif()
 
diff --git a/libkram/kram/KramTimer.cpp b/libkram/kram/KramTimer.cpp
index f4d6dddc..5b1e346a 100644
--- a/libkram/kram/KramTimer.cpp
+++ b/libkram/kram/KramTimer.cpp
@@ -13,7 +13,7 @@
 #elif KRAM_ANDROID
 #include <trace.h>
 #elif KRAM_LINUX
-// TODO:
+#include <time.h> // needs librt.a
 #endif
 
 #define nl '\n'
@@ -112,8 +112,11 @@ static double queryPeriod()
 
 static uint64_t queryCounter()
 {
-    uint64_t time = 0;
-    // TODO: add implementation
+    timespec ts;
+    /*int result = */ clock_gettime(CLOCK_MONOTONIC, &ts);
+
+    uint64_t time = (uint64_t)ts.tv_nsec + ((uint64_t)ts.tv_sec * 1000000000ULL);
+    
     return time;
 }
 
diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index e0621a28..ba25f476 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -368,73 +368,75 @@ void getCurrentThreadName(char name[kMaxThreadName])
 
 #if KRAM_APPLE
 
-static void setThreadPriority(std::thread::native_handle_type handle, ThreadPriority priority)
+static void setThreadPriority(std::thread::native_handle_type macroUnusedArg(handle), ThreadPriority priority)
 {
-    if (priority == ThreadPriority::Default) {
-        /* samples of qos
-        qos_class_t qos = QOS_CLASS_UNSPECIFIED;
-        switch(level) {
-            case ThreadQos::Interactive: qos = QOS_CLASS_USER_INTERACTIVE; break;
-            case ThreadQos::High: qos = QOS_CLASS_USER_INITIATED; break;
-            case ThreadQos::Default: qos = QOS_CLASS_DEFAULT; break;
-            case ThreadQos::Medium: qos = QOS_CLASS_UTILITY; break;
-            case ThreadQos::Low: qos = QOS_CLASS_BACKGROUND; break;
-        }
-        */
-
-        // qos is transferred to GCD jobs, and can experience thread depriority
-        // can system can try to adjust priority inversion.
-
-        // note here the priorityOffset = 0, but is negative offsets
-        // there is a narrow range of offsets
-
-        // note this is a start/end overide call, but can set override on existing thread
-        // TODO: this returns a newly allocated object which isn't released here
-        // need to release with pthread_override_qos_class_end_np(override);
-
-        qos_class_t qos = QOS_CLASS_DEFAULT;
-        auto val = pthread_override_qos_class_start_np(handle, qos, 0);
-        if (val != nullptr)
-            KLOGW("Thread", "Failed to set qos %d", (int)qos);
+    // Note: this only works on current thread.
+    // Xcode displays qos in the thread view, but doesn't display priority.
+    // Don't mix qos and priority.  qos locks out setting prioririty, scheduler.
+    
+    qos_class_t qos = QOS_CLASS_UNSPECIFIED;
+    switch(priority) {
+        case ThreadPriority::Interactive: qos = QOS_CLASS_USER_INTERACTIVE; break;
+        case ThreadPriority::High: qos = QOS_CLASS_USER_INITIATED; break;
+        case ThreadPriority::Default: qos = QOS_CLASS_DEFAULT; break;
+            
+        // TODO: more qOS to choose from
+        //case ThreadQos::Medium: qos = QOS_CLASS_UTILITY; break;
+        //case ThreadQos::Low: qos = QOS_CLASS_BACKGROUND; break;
     }
-    else {
-        int prioritySys = 0;
-        switch (priority) {
-            case ThreadPriority::Default:
-                prioritySys = 30;
-                break; // skipped above
-            case ThreadPriority::High:
-                prioritySys = 41;
-                break;
-            case ThreadPriority::Interactive:
-                prioritySys = 45;
-                break;
-        }
-
-        struct sched_param param = {prioritySys};
 
-        // policy choices
-        // SCHED_RR, SCHED_FIFO, SCHED_OTHER
-        int policy = SCHED_RR;
-
-        // this sets policy to round-robin and priority
-        int val = pthread_setschedparam(handle, policy, &param);
-        if (val != 0)
-            KLOGW("Thread", "Failed to set policy %d priority %d", policy, prioritySys);
-    }
+    // Note here the priorityOffset = 0, but is negative offsets.
+    // There is a narrow range of offsets per qos.
+    // QOS_CLASS_USER_INTERACTIVE: 38-47  -9
+    // QOS_CLASS_USER_INITIATED:   32-37  -5
+    // QOS_CLASS_DEFAULTL          21-31 -10
+    // QOS_CLASS_UTILITY:           5-20 -15
+    // QOS_CLASS_BACKGROUND:        0-4   -4
+    
+    int relativePriority = 0;
+    auto val = pthread_set_qos_class_self_np(qos, relativePriority);
+    if (val != 0)
+        KLOGW("Thread", "Failed to set qos %d", (int)qos);
 }
 
 #elif KRAM_LINUX
 static void setThreadPriority(std::thread::native_handle_type handle, ThreadPriority priority)
 {
-    // TODO: this has it's own levels
+    // policy choices
+    // SCHED_RR, SCHED_FIFO, SCHED_OTHER
+    int policy = SCHED_RR;
+
+    int prioritySys = 0;
+    switch (priority) {
+        case ThreadPriority::Default:
+            prioritySys = 0;
+            policy = SCHED_OTHER;
+            break; // skipped above
+        case ThreadPriority::High:
+            prioritySys = -4;
+            policy = SCHED_RR;
+            break;
+        case ThreadPriority::Interactive:
+            prioritySys = -8;
+            policy = SCHED_RR;
+            break;
+    }
+
+    struct sched_param param = {prioritySys};
+
+    // this sets policy to round-robin and priority
+    int val = pthread_setschedparam(handle, policy, &param);
+    if (val != 0)
+        KLOGW("Thread", "Failed to set policy %d priority %d", policy, prioritySys);
 }
 
 
 #elif KRAM_ANDROID
 
-static void setThreadPriority(std::thread::native_handle_type handle, ThreadPriority priority)
+static void setThreadPriority(std::thread::native_handle_type macroUnusedArg(handle), ThreadPriority priority)
 {
+    // This only works on current thread.
+    
     // This doesn't change policy.
     // Android on -20 to 20, where lower is higher priority
     int prioritySys = 0;
@@ -529,7 +531,7 @@ static void setThreadAffinity(std::thread::native_handle_type handle, uint32_t t
     DWORD_PTR mask = SetThreadAffinityMask(handle, *(const DWORD_PTR*)&affinityMask);
     success = mask != 0;
 
-#else
+#elif KRAM_LINUX
     // most systems are pthread-based, this is represented with array of bits
     cpu_set_t cpuset;
     CPU_ZERO(&cpuset);
@@ -645,10 +647,25 @@ static ThreadPriority getThreadPriority(std::thread::native_handle_type handle)
 {
     ThreadPriority priority = ThreadPriority::Default;
 
-#if KRAM_APPLE || KRAM_ANDROID
-    // Note: this doesn't handle qOS, and returns default priority
-    // on those threads.
-
+#if KRAM_APPLE
+    qos_class_t qos = QOS_CLASS_UNSPECIFIED;
+    int relativePriority = 0;
+    
+    pthread_get_qos_class_np(handle, &qos, &relativePriority);
+    
+    switch(qos) {
+        case QOS_CLASS_USER_INTERACTIVE: priority = ThreadPriority::Interactive; break;
+        case QOS_CLASS_USER_INITIATED: priority = ThreadPriority::High; break;
+        case QOS_CLASS_DEFAULT:
+        case QOS_CLASS_UTILITY:
+        case QOS_CLASS_BACKGROUND:
+        default:
+            priority = ThreadPriority::Default;
+            break;
+    }
+    
+#elif KRAM_ANDROID || KRAM_LINUX
+   
     int policy = 0;
     struct sched_param priorityVal;
     int val = pthread_getschedparam(handle, &policy, &priorityVal);
@@ -658,30 +675,17 @@ static ThreadPriority getThreadPriority(std::thread::native_handle_type handle)
 
     // remap back to enum
     switch (prioritySys) {
-        case 41:
+        case -8:
             priority = ThreadPriority::High;
             break;
-        case 45:
+        case -4:
             priority = ThreadPriority::Interactive;
             break;
         default:
             priority = ThreadPriority::Default;
             break;
     }
-
-/* Using code above since it may work with other threads
-#elif KRAM_ANDROID
-    // Note: only for current thread
-
-    // only have getpriority call on current thread
-    // pthread_getschedparam never returns valid data
-    int priority = getpriority(PRIO_PROCESS, 0);
-    switch(prioritySys) {
-        case 41: priority = ThreadPriority::High; break;
-        case 45: priority = ThreadPriority::Interactive; break;
-        default: priority = ThreadPriority::Default; break;
-    }
-*/
+    
 #elif KRAM_WIN
     // all threads same policy on Win?
     // https://www.microsoftpressstore.com/articles/article.aspx?p=2233328&seqNum=7#:~:text=Windows%20never%20adjusts%20the%20priority,the%20process%20that%20created%20it.
diff --git a/libkram/kram/TaskSystem.h b/libkram/kram/TaskSystem.h
index cdad455e..db7ddbe1 100644
--- a/libkram/kram/TaskSystem.h
+++ b/libkram/kram/TaskSystem.h
@@ -130,7 +130,7 @@ class notification_queue {
 // Note: if running multiple processes on the same cpu, then affinity
 // isn't ideal.  It will force work onto the same cores.  Especially if
 // limiting cores to say 4/16, then can run 4 processes faster w/o affinity.
-#define SUPPORT_AFFINITY (KRAM_ANDROID || KRAM_WIN)
+#define SUPPORT_AFFINITY (KRAM_ANDROID || KRAM_WIN || KRAM_LINUX)
 
 // only for ioS/macOS
 enum class ThreadPriority {
diff --git a/scripts/cibuild.sh b/scripts/cibuild.sh
index 4c3f1967..be0a0ff8 100755
--- a/scripts/cibuild.sh
+++ b/scripts/cibuild.sh
@@ -170,7 +170,8 @@ elif [[ $buildType == linux ]]; then
 
     # this will use make
     # cmake ..
-    # TODO: switch to ninja, so can see failure cli
+   
+    # this uses Ninja, so can see failures
     cmake .. -G Ninja
 
 	# build the release build

From 41f28762323584f081f12a1ae008b3703b56654a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 23 Mar 2025 10:38:18 -0700
Subject: [PATCH 872/901] kram - linux fixes

---
 kramc/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kramc/CMakeLists.txt b/kramc/CMakeLists.txt
index ade4f012..bfb1c9e2 100644
--- a/kramc/CMakeLists.txt
+++ b/kramc/CMakeLists.txt
@@ -19,7 +19,7 @@ if (BUILD_LINUX)
     # librt is for clock_gettime
     target_link_libraries(${myTargetApp} PUBLIC 
         libkram 
-        librt)
+        rt)
 
 endif()
 

From 6c7a0f7ad12d7be61beedfa68142a7f00ba0f256 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 23 Mar 2025 17:45:43 -0700
Subject: [PATCH 873/901] kram - fix kramTests.sh to passthrough all args

---
 scripts/kramTests.sh | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/scripts/kramTests.sh b/scripts/kramTests.sh
index b38d6e23..d647d12c 100755
--- a/scripts/kramTests.sh
+++ b/scripts/kramTests.sh
@@ -1,6 +1,8 @@
 #!/bin/zsh
 
-args=$1
+# return all args from $1 onward
+# can pass --force, -c ktx, -c dds, -c ktx2
+args=$@
 
 ../scripts/kramTextures.py -p mac --bundle ${args}
 #../scripts/kramTextures.py -p mac -c ktx --bundle ${args}
@@ -10,7 +12,7 @@ args=$1
 
 # this takes 15s+ with ETC2comp
 ../scripts/kramTextures.py -p android --bundle ${args}
-#../scripts/kramTextures.py -p -c ktx android --bundle ${args}
+#../scripts/kramTextures.py -p android -c ktx android --bundle ${args}
 
 # this only has ktx2 form, tests uastc which kram doesn't open/save yet
 #../scripts/kramTextures.py -p any --bundle ${args}

From f66b57b4b71a2c49efd77243273a2c7af07e0dbf Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 23 Mar 2025 18:07:35 -0700
Subject: [PATCH 874/901] kram - more wsl script fixes.

---
 scripts/cibuild.sh   |  6 +++---
 scripts/kramTests.sh | 13 +++++--------
 2 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/scripts/cibuild.sh b/scripts/cibuild.sh
index be0a0ff8..7d9a264c 100755
--- a/scripts/cibuild.sh
+++ b/scripts/cibuild.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # note: zsh works on  osx, but not on Win git bash, so using bash
 
@@ -164,9 +164,9 @@ elif [[ $buildType == windows ]]; then
 elif [[ $buildType == linux ]]; then
     echo "::group::kram-linux"
 
-    mkdir -p build
+    mkdir -p build3
 
-	pushd build
+	pushd build3
 
     # this will use make
     # cmake ..
diff --git a/scripts/kramTests.sh b/scripts/kramTests.sh
index d647d12c..6d40b3ce 100755
--- a/scripts/kramTests.sh
+++ b/scripts/kramTests.sh
@@ -1,20 +1,17 @@
-#!/bin/zsh
+#!/usr/bin/env zsh
 
 # return all args from $1 onward
 # can pass --force, -c ktx, -c dds, -c ktx2
 args=$@
 
-../scripts/kramTextures.py -p mac --bundle ${args}
-#../scripts/kramTextures.py -p mac -c ktx --bundle ${args}
+../scripts/kramTextures.py -p mac --bundle ${=args}
 
-../scripts/kramTextures.py -p ios --bundle ${args}
-#../scripts/kramTextures.py -p ios -c ktx --bundle ${args} 
+../scripts/kramTextures.py -p ios --bundle ${=args}
 
 # this takes 15s+ with ETC2comp
-../scripts/kramTextures.py -p android --bundle ${args}
-#../scripts/kramTextures.py -p android -c ktx android --bundle ${args}
+../scripts/kramTextures.py -p android --bundle ${=args}
 
 # this only has ktx2 form, tests uastc which kram doesn't open/save yet
-#../scripts/kramTextures.py -p any --bundle ${args}
+#../scripts/kramTextures.py -p any --bundle ${=args}
 
 
From 1ffda35e08649d43bffd9c9fe4e8f139ede0e27b Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 23 Mar 2025 21:39:51 -0700
Subject: [PATCH 875/901] kram - fix linefeeds for scripts/txt files on wsl

---
 .gitattributes | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/.gitattributes b/.gitattributes
index 1fae4fea..f35c3b36 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -3,6 +3,12 @@
 # see here https://rehansaeed.com/gitattributes-best-practices/
 # -text means it's not a text file and is binary
 
+# text/scripts need to be lf to run across wsl/macOS
+*.txt text eol=lf
+*.md text eol=lf
+*.sh text eol=lf
+*.vcproj text eol=crlf
+
 # Archives
 *.7z filter=lfs diff=lfs merge=lfs -text
 *.gz filter=lfs diff=lfs merge=lfs -text

From 77502e3f67719c1b07a3ece59494477f6da739bc Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 23 Mar 2025 21:41:25 -0700
Subject: [PATCH 876/901] kram - add python scripts too

---
 .gitattributes | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitattributes b/.gitattributes
index f35c3b36..c6168e45 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -7,6 +7,7 @@
 *.txt text eol=lf
 *.md text eol=lf
 *.sh text eol=lf
+*.py text eol=lf
 *.vcproj text eol=crlf
 
 # Archives

From 19a60d45a9bd0ffa214db195eea57f9fe7f54705 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 23 Mar 2025 21:51:19 -0700
Subject: [PATCH 877/901] kram - more gitattributes to control linefeed

---
 .gitattributes | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/.gitattributes b/.gitattributes
index c6168e45..86ca36cb 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,15 +1,28 @@
-#-------------
-# commit various binary file types to git-lfs
-# see here https://rehansaeed.com/gitattributes-best-practices/
-# -text means it's not a text file and is binary
+# This controls the line-endings on various file types.
+# Most win editors can cope with lf ending files, 
+# and use binary load to avoid cstdlib replacement.
 
-# text/scripts need to be lf to run across wsl/macOS
-*.txt text eol=lf
-*.md text eol=lf
+# txt/sh/py scripts need to be lf to run across wsl/macOS
 *.sh text eol=lf
 *.py text eol=lf
+*.txt text eol=lf
+
+*.json text eol=lf
+*.plist text eol=lf
+*.xconfig text eol=lf
+
+*.md text eol=lf
+LICENSE text eol=lf
+meson.build text eol=lf
 *.vcproj text eol=crlf
 
+# what about .cpp/.h files?
+
+#-------------
+# commit various binary file types to git-lfs
+# see here https://rehansaeed.com/gitattributes-best-practices/
+# -text means it's not a text file and is binary
+
 # Archives
 *.7z filter=lfs diff=lfs merge=lfs -text
 *.gz filter=lfs diff=lfs merge=lfs -text

From 23aebd3eba035fdb3f8813dc983645e97d9a66be Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 27 Mar 2025 19:25:33 -0700
Subject: [PATCH 878/901] kram - clearAtlas on archive texture loads

The atlas rect overlay was showing up on images that it didn't apply to.  Eventually allow archive -atlas.json to load/apply if in same archive.
---
 kramv/KramViewerBase.cpp | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index 1484de3e..a31c58ae 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -1158,8 +1158,7 @@ bool Data::loadFile()
     string atlasFilename = filenameNoExtension(filename);
     bool hasAtlas = false;
 
-    // replace -a, -d, with -atlas.json
-    const char* dashPosStr = strrchr(atlasFilename.c_str(), '-');
+    // replace -a, -d, with -atlas.jsonc    const char* dashPosStr = strrchr(atlasFilename.c_str(), '-');
     if (dashPosStr != nullptr) {
         atlasFilename = atlasFilename.substr(0, dashPosStr - atlasFilename.c_str());
     }
@@ -1288,6 +1287,14 @@ bool Data::loadFileFromArchive()
         return false;
     }
 
+    // TODO: right now -atlas.json even if already loaded loose
+    // won't apply to archive textures.  Would the -atlas.json file
+    // need to be in the same archive?
+    bool hasAtlas = false;
+    if (!hasAtlas) {
+        clearAtlas();
+    }
+    
     KPERFT("loadFileFromArchive");
 
     const uint8_t* imageData = nullptr;

From b50af6c5881203de2a19c80e7de4d9be65b91e71 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 28 Mar 2025 00:05:40 -0700
Subject: [PATCH 879/901] kram - fix comment

---
 kramv/KramViewerBase.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index a31c58ae..dc79db0e 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -1158,7 +1158,8 @@ bool Data::loadFile()
     string atlasFilename = filenameNoExtension(filename);
     bool hasAtlas = false;
 
-    // replace -a, -d, with -atlas.jsonc    const char* dashPosStr = strrchr(atlasFilename.c_str(), '-');
+    // replace -a, -d, with -atlas.jsonc
+    const char* dashPosStr = strrchr(atlasFilename.c_str(), '-');
     if (dashPosStr != nullptr) {
         atlasFilename = atlasFilename.substr(0, dashPosStr - atlasFilename.c_str());
     }

From f165d67b1657b7013977feb287e5f18472dbc00f Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 29 Mar 2025 10:36:13 -0700
Subject: [PATCH 880/901] kram - add more descriptive simd tests for
 AVX/AVX2/F16C/FMA on macOS Intel/Win

The problem is emulators like Rosetta may not have F16C, and so testing AVX2 doesn't seem to be enough.  Hope that Prism at least has this.
---
 kramc/KramMain.cpp | 101 ++++++++++++++++++++++++++++++++++-----------
 1 file changed, 76 insertions(+), 25 deletions(-)

diff --git a/kramc/KramMain.cpp b/kramc/KramMain.cpp
index 92d52e1c..7ebfd415 100644
--- a/kramc/KramMain.cpp
+++ b/kramc/KramMain.cpp
@@ -15,38 +15,49 @@ void checkSimdSupport()
 {
     // Check for AVX2, FMA, F16C support on Intel.
     // AVX2 implies the other 2, but still have to enable on compile.
+    // arm64 just has everything needed.  No holes to check, or legacy simd.
+    
 #if SIMD_AVX2
 #if KRAM_MAC
     bool hasSimdSupport = true;
     
+    vector<char> cpuName;
+    size_t cpuNameSize = 0;
+    
+    const char* cpuNameProp = "machdep.cpu.brand_string";
+    
+    if (sysctlbyname(cpuNameProp, nullptr, &cpuNameSize, nullptr, 0) >= 0 {
+        cpuName.resize(cpuNameSize);
+        
+        // Assuming this is ascii
+        sysctlbyname(cpuNameProp, cpuName.data(), &cpuNameSize, nullptr, 0);
+    }
+    
+    
     // can also check AVX1.0
     // F16C (avx/avx2 imply F16C and assume Rosetta too)
     
     // https://csharpmulticore.blogspot.com/2014/12/how-to-check-intel-avx2-support-on-mac-os-x-haswell.html
     // machdep.cpu.features: FPU VME DE PSE TSC MSR PAE MCE CX8 APIC SEP MTRR PGE MCA CMOV PAT PSE36 CLFSH DS ACPI MMX FXSR SSE SSE2 SS HTT TM PBE SSE3 PCLMULQDQ DTES64 MON DSCPL VMX EST TM2 SSSE3 FMA CX16 TPR PDCM SSE4.1 SSE4.2 x2APIC MOVBE POPCNT AES PCID XSAVE OSXSAVE SEGLIM64 TSCTMR AVX1.0 RDRAND F16C
     // machdep.cpu.leaf7_features: SMEP ERMS RDWRFSGS TSC_THREAD_OFFSET BMI1 AVX2 BMI2 INVPCID
-    
+    const char* missingFeatures[4] = { "", "", "", "" };
+    uint32_t missingFeaturesCount = 0;
     
     const char* leaf7Features = "machdep.cpu.leaf7_features";
     
     size_t leaf7FeatureSize = 0;
     sysctlbyname(leaf7Features, nullptr, &leaf7FeatureSize, nullptr, 0);
     
+    vector<char> bufferLeaf7;
+    
     if (leaf7FeatureSize == 0) {
         hasSimdSupport = false;
     }
     else {
-        vector<char> buffer;
-        buffer.resize(leaf7FeatureSize);
-        sysctlbyname(leaf7Features, buffer.data(), &leaf7FeatureSize, nullptr, 0);
+        bufferLeaf7.resize(leaf7FeatureSize);
         
-        // If don't find avx2, then support is not present.
-        // could be running under Rosetta2 but it's supposed to add AVX2 soon.
-        bool hasAVX2 = strstr(buffer.data(), "AVX2") != nullptr;
-        
-        if (!hasAVX2) {
-            hasSimdSupport = false;
-        }
+        // TODO: check failure
+        sysctlbyname(leaf7Features, bufferLeaf7.data(), &leaf7FeatureSize, nullptr, 0);
     }
     
     const char* cpuFeatures = "machdep.cpu.features";
@@ -54,29 +65,54 @@ void checkSimdSupport()
     size_t cpuFeatureSize = 0;
     sysctlbyname(cpuFeatures, nullptr, &cpuFeatureSize, nullptr, 0);
     
+    vector<char> bufferFeatures;
+
     if (!hasSimdSupport || cpuFeatureSize == 0) {
         hasSimdSupport = false;
     }
     else {
-        vector<char> buffer;
-        buffer.resize(cpuFeatureSize);
-        sysctlbyname(cpuFeatures, buffer.data(), &cpuFeatureSize, nullptr, 0);
+        bufferFeatures.resize(cpuFeatureSize);
         
-        // Make sure compile has enabled these on AVX2
-        bool hasF16C = strstr(buffer.data(), "F16C") != nullptr;
-        bool hasFMA = strstr(buffer.data(), "FMA") != nullptr;
+        // TODO: check failure
+        sysctlbyname(cpuFeatures, bufferFeatures.data(), &cpuFeatureSize, nullptr, 0);
+    }
+
+    if (hasSimdSupport) {
+        // If don't find avx2, then support is not present.
+        // could be running under Rosetta2 but it's supposed to add AVX2 soon.
+        bool hasAVX2 = strstr(bufferLeaf7.data(), "AVX2") != nullptr;
+
+        if (!hasAVX2) {
+            missingFeatures[missingFeaturesCount+++] = "AVX2 ";
+            hasSimdSupport = false;
+        }
+    
+        // Make sure compile has enabled these on AVX2.
+        // Rosetta2 and Prism often don't emulate these.
+        // (f.e. BMI and F16C)
+        bool hasAVX = strstr(bufferFeatures.data(), "AVX") != nullptr;
+        bool hasF16C = strstr(bufferFeatures.data(), "F16C") != nullptr;
+        bool hasFMA = strstr(bufferFeatures.data(), "FMA") != nullptr;
         
+        if (!hasAVX) {
+            missingFeatures[missingFeaturesCount+++] = "AVX ";
+            hasSimdSupport = false;
+        }
         if (!hasF16C) {
+            missingFeatures[missingFeaturesCount+++] = "F16C ";
             hasSimdSupport = false;
         }
-        else if (!hasFMA) {
+        if (!hasFMA) {
+            missingFeatures[missingFeaturesCount+++] = "FMA ";
             hasSimdSupport = false;
         }
     }
     
     // TODO: can add brand to this if find the sysctlbyname query
     if (!hasSimdSupport) {
-        KLOGE("Main", "Missing simd support");
+        KLOGE("Main", "Missing simd support for %s%s%s%s on %s",
+              missingFeatures[0], missingFeatures[1], missingFeatures[2], missingFeatures[3],
+              cpuName.data());
         exit(1);
     }
     
@@ -91,7 +127,7 @@ void checkSimdSupport()
     // f1.ecx bit 12 is fma
     // f1.ecx bit 19 is sse4.1
     // f1.ecx bit 20 is sse4.2
-    // f1.ecx bit 28 is avx.
+    // f1.ecx bit 28 is avx
     // f1.ecx bit 29 is f16c (docs are wrong about this being avx2)
     
     // f7.ebx bit 5 is avx2
@@ -100,7 +136,6 @@ void checkSimdSupport()
     // f7.ebx bit 27 is avx-512er
     // f7.ebx bit 28 is avx-512cd
     
-    
     // This returns a count of the ids from mthe docs.
     struct CpuInfo {
         int eax, ebx, ecx, edx;
@@ -118,6 +153,9 @@ void checkSimdSupport()
     *reinterpret_cast<int*>(vendorId + 4) = cpuInfo.edx;
     *reinterpret_cast<int*>(vendorId + 8) = cpuInfo.ecx;
     
+    const char* missingFeatures[4] = { "", "", "", "" };
+    uint32_t missingFeaturesCount = 0;
+    
     int numIds = cpuInfo.eax;
     if (numIds < 7) {
         hasSimdSupport = false;
@@ -138,14 +176,25 @@ void checkSimdSupport()
         bool hasAVX2 = cpuInfoByIndex[7].ebx & (1 << 5);
         
         bool hasFMA = cpuInfoByIndex[1].ecx & (1 << 12);
+        bool hasAVX = cpuInfoByIndex[1].ecx & (1 << 28);
         bool hasF16C = cpuInfoByIndex[1].ecx & (1 << 29);
         
-        if (!hasAVX2)
+        if (!hasAVX2) {
+            missingFeatures[missingFeaturesCount++] = "AVX2 ";
             hasSimdSupport = false;
-        else if (!hasFMA)
+        }
+        if (!hasAVX) {
+            missingFeatures[missingFeaturesCount++] = "AVX ";
             hasSimdSupport = false;
-        else if (!hasF16C)
+        }
+        if (!hasFMA) {
+            missingFeatures[missingFeaturesCount++] = "FMA ";
             hasSimdSupport = false;
+        }
+        if (!hasF16C) {
+            missingFeatures[missingFeaturesCount++] = "F16C ";
+            hasSimdSupport = false;
+        }
     }
     
     // extended cpuid attributes
@@ -178,7 +227,9 @@ void checkSimdSupport()
     }
     
     if (!hasSimdSupport) {
-        KLOGE("Main", "Missing simd support for %s", brandId);
+        KLOGE("Main", "Missing simd support for %s%s%s%s on %s",
+              missingFeatures[0], missingFeatures[1], missingFeatures[2], missingFeatures[3],
+              brandId);
         exit(1);
     }
     

From 89f6fe1217baa45a21b959c2f6cb2e0a67d7ea8a Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 29 Mar 2025 10:57:30 -0700
Subject: [PATCH 881/901] kram - add linux simd checks

---
 kramc/KramMain.cpp | 52 +++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 49 insertions(+), 3 deletions(-)

diff --git a/kramc/KramMain.cpp b/kramc/KramMain.cpp
index 7ebfd415..a7be56d0 100644
--- a/kramc/KramMain.cpp
+++ b/kramc/KramMain.cpp
@@ -14,8 +14,8 @@ using namespace STL_NAMESPACE;
 void checkSimdSupport()
 {
     // Check for AVX2, FMA, F16C support on Intel.
-    // AVX2 implies the other 2, but still have to enable on compile.
-    // arm64 just has everything needed.  No holes to check, or legacy simd.
+    // Still need to set compile flags, and arm64 emulators are also spotty.
+    // arm64 native has everything needed.  No holes to check, or legacy simd.
     
 #if SIMD_AVX2
 #if KRAM_MAC
@@ -90,6 +90,7 @@ void checkSimdSupport()
         // Make sure compile has enabled these on AVX2.
         // Rosetta2 and Prism often don't emulate these.
         // (f.e. BMI and F16C)
+        
         bool hasAVX = strstr(bufferFeatures.data(), "AVX") != nullptr;
         bool hasF16C = strstr(bufferFeatures.data(), "F16C") != nullptr;
         bool hasFMA = strstr(bufferFeatures.data(), "FMA") != nullptr;
@@ -108,7 +109,6 @@ void checkSimdSupport()
         }
     }
     
-    // TODO: can add brand to this if find the sysctlbyname query
     if (!hasSimdSupport) {
         KLOGE("Main", "Missing simd support for %s%s%s%s on %s",
               missingFeatures[0], missingFeatures[1], missingFeatures[2], missingFeatures[3],
@@ -232,6 +232,52 @@ void checkSimdSupport()
               brandId);
         exit(1);
     }
+        
+#elif KRAM_LINUX
+        
+    // This should apply to all clang and gcc builds.  So may want
+    // to use on all platforms.
+        
+    //        Common CPU features that can be checked with __builtin_cpu_supports include:
+    //        sse, sse2, sse3, ssse3, sse4.1, sse4.2
+    //        avx, avx2, avx512f
+    //        fma
+    //        bmi, bmi2
+    //        popcnt
+    //        lzcnt
+    //        mmx
+        
+        
+    bool hasSimdSupport = true;
+        
+    bool hasAVX2 = __builtin_cpu_supports("avx2");
+    
+    bool hasFMA = __builtin_cpu_supports("fma");
+    bool hasAVX = __builtin_cpu_supports("avx");
+    bool hasF16C = __builtin_cpu_supports("f16c");
+    
+    if (!hasAVX2) {
+        missingFeatures[missingFeaturesCount++] = "AVX2 ";
+        hasSimdSupport = false;
+    }
+    if (!hasAVX) {
+        missingFeatures[missingFeaturesCount++] = "AVX ";
+        hasSimdSupport = false;
+    }
+    if (!hasFMA) {
+        missingFeatures[missingFeaturesCount++] = "FMA ";
+        hasSimdSupport = false;
+    }
+    if (!hasF16C) {
+        missingFeatures[missingFeaturesCount++] = "F16C ";
+        hasSimdSupport = false;
+    }
+       
+    if (!hasSimdSupport) {
+        KLOGE("Main", "Missing simd support for %s%s%s%s",
+              missingFeatures[0], missingFeatures[1], missingFeatures[2], missingFeatures[3]);
+        exit(1);
+    }
     
 #endif
 #endif

From 2c3ddc4f1269ce0e0239087783812fd607a92f80 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 29 Mar 2025 10:59:06 -0700
Subject: [PATCH 882/901] kram - fix linux

---
 kramc/KramMain.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/kramc/KramMain.cpp b/kramc/KramMain.cpp
index a7be56d0..1aadb55b 100644
--- a/kramc/KramMain.cpp
+++ b/kramc/KramMain.cpp
@@ -256,6 +256,9 @@ void checkSimdSupport()
     bool hasAVX = __builtin_cpu_supports("avx");
     bool hasF16C = __builtin_cpu_supports("f16c");
     
+    const char* missingFeatures[4] = { "", "", "", "" };
+    uint32_t missingFeaturesCount = 0;
+    
     if (!hasAVX2) {
         missingFeatures[missingFeaturesCount++] = "AVX2 ";
         hasSimdSupport = false;

From a8c45ab1b591854abe637d0c611020a6ccacf13c Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 29 Mar 2025 12:07:51 -0700
Subject: [PATCH 883/901] kram - fix mac intel

---
 kramc/KramMain.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/kramc/KramMain.cpp b/kramc/KramMain.cpp
index 1aadb55b..d1724210 100644
--- a/kramc/KramMain.cpp
+++ b/kramc/KramMain.cpp
@@ -26,7 +26,7 @@ void checkSimdSupport()
     
     const char* cpuNameProp = "machdep.cpu.brand_string";
     
-    if (sysctlbyname(cpuNameProp, nullptr, &cpuNameSize, nullptr, 0) >= 0 {
+    if (sysctlbyname(cpuNameProp, nullptr, &cpuNameSize, nullptr, 0) >= 0) {
         cpuName.resize(cpuNameSize);
         
         // Assuming this is ascii
@@ -83,7 +83,7 @@ void checkSimdSupport()
         bool hasAVX2 = strstr(bufferLeaf7.data(), "AVX2") != nullptr;
 
         if (!hasAVX2) {
-            missingFeatures[missingFeaturesCount+++] = "AVX2 ";
+            missingFeatures[missingFeaturesCount++] = "AVX2 ";
             hasSimdSupport = false;
         }
     
@@ -96,15 +96,15 @@ void checkSimdSupport()
         bool hasFMA = strstr(bufferFeatures.data(), "FMA") != nullptr;
         
         if (!hasAVX) {
-            missingFeatures[missingFeaturesCount+++] = "AVX ";
+            missingFeatures[missingFeaturesCount++] = "AVX ";
             hasSimdSupport = false;
         }
         if (!hasF16C) {
-            missingFeatures[missingFeaturesCount+++] = "F16C ";
+            missingFeatures[missingFeaturesCount++] = "F16C ";
             hasSimdSupport = false;
         }
         if (!hasFMA) {
-            missingFeatures[missingFeaturesCount+++] = "FMA ";
+            missingFeatures[missingFeaturesCount++] = "FMA ";
             hasSimdSupport = false;
         }
     }

From df98d923983c353c91f5083ff74418a5115d66ee Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 29 Mar 2025 12:51:35 -0700
Subject: [PATCH 884/901] kram - more improvments to simd tests

---
 kramc/KramMain.cpp | 65 +++++++++++++++++++++++++---------------------
 1 file changed, 36 insertions(+), 29 deletions(-)

diff --git a/kramc/KramMain.cpp b/kramc/KramMain.cpp
index d1724210..1689660c 100644
--- a/kramc/KramMain.cpp
+++ b/kramc/KramMain.cpp
@@ -77,36 +77,37 @@ void checkSimdSupport()
         sysctlbyname(cpuFeatures, bufferFeatures.data(), &cpuFeatureSize, nullptr, 0);
     }
 
-    if (hasSimdSupport) {
-        // If don't find avx2, then support is not present.
-        // could be running under Rosetta2 but it's supposed to add AVX2 soon.
-        bool hasAVX2 = strstr(bufferLeaf7.data(), "AVX2") != nullptr;
+    const char* features = !bufferFeatures.empty() ? bufferFeatures.data() : "";
+    const char* features7 = !bufferLeaf7.empty() ? bufferLeaf7.data() : "";
+   
+    // If don't find avx2, then support is not present.
+    // could be running under Rosetta2 but it's supposed to add AVX2 soon.
+    bool hasAVX2 = strstr(features7, "AVX2") != nullptr;
 
-        if (!hasAVX2) {
-            missingFeatures[missingFeaturesCount++] = "AVX2 ";
-            hasSimdSupport = false;
-        }
+    if (!hasAVX2) {
+        missingFeatures[missingFeaturesCount++] = "AVX2 ";
+        hasSimdSupport = false;
+    }
+
+    // Make sure compile has enabled these on AVX2.
+    // Rosetta2 and Prism often don't emulate these.
+    // (f.e. BMI and F16C)
     
-        // Make sure compile has enabled these on AVX2.
-        // Rosetta2 and Prism often don't emulate these.
-        // (f.e. BMI and F16C)
-        
-        bool hasAVX = strstr(bufferFeatures.data(), "AVX") != nullptr;
-        bool hasF16C = strstr(bufferFeatures.data(), "F16C") != nullptr;
-        bool hasFMA = strstr(bufferFeatures.data(), "FMA") != nullptr;
-        
-        if (!hasAVX) {
-            missingFeatures[missingFeaturesCount++] = "AVX ";
-            hasSimdSupport = false;
-        }
-        if (!hasF16C) {
-            missingFeatures[missingFeaturesCount++] = "F16C ";
-            hasSimdSupport = false;
-        }
-        if (!hasFMA) {
-            missingFeatures[missingFeaturesCount++] = "FMA ";
-            hasSimdSupport = false;
-        }
+    bool hasAVX = strstr(features, "AVX") != nullptr;
+    bool hasF16C = strstr(features, "F16C") != nullptr;
+    bool hasFMA = strstr(features, "FMA") != nullptr;
+    
+    if (!hasAVX) {
+        missingFeatures[missingFeaturesCount++] = "AVX ";
+        hasSimdSupport = false;
+    }
+    if (!hasF16C) {
+        missingFeatures[missingFeaturesCount++] = "F16C ";
+        hasSimdSupport = false;
+    }
+    if (!hasFMA) {
+        missingFeatures[missingFeaturesCount++] = "FMA ";
+        hasSimdSupport = false;
     }
     
     if (!hasSimdSupport) {
@@ -233,7 +234,7 @@ void checkSimdSupport()
         exit(1);
     }
         
-#elif KRAM_LINUX
+#elif KRAM_LINUX // || KRAM_MAC
         
     // This should apply to all clang and gcc builds.  So may want
     // to use on all platforms.
@@ -254,7 +255,13 @@ void checkSimdSupport()
     
     bool hasFMA = __builtin_cpu_supports("fma");
     bool hasAVX = __builtin_cpu_supports("avx");
+    
+    // macOS doesn't support f16c as string?
+    #if  KRAM_MAC
+    bool hasF16C = true; // a lie
+    #else
     bool hasF16C = __builtin_cpu_supports("f16c");
+    #endif
     
     const char* missingFeatures[4] = { "", "", "", "" };
     uint32_t missingFeaturesCount = 0;

From 78fe2aec3600228f4036fbfd176207897f88efb5 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 29 Mar 2025 17:11:05 -0700
Subject: [PATCH 885/901] kram - small formatting fix to log

---
 kramc/KramMain.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kramc/KramMain.cpp b/kramc/KramMain.cpp
index 1689660c..8e432ee9 100644
--- a/kramc/KramMain.cpp
+++ b/kramc/KramMain.cpp
@@ -111,7 +111,7 @@ void checkSimdSupport()
     }
     
     if (!hasSimdSupport) {
-        KLOGE("Main", "Missing simd support for %s%s%s%s on %s",
+        KLOGE("Main", "Missing simd support for %s%s%s%son %s",
               missingFeatures[0], missingFeatures[1], missingFeatures[2], missingFeatures[3],
               cpuName.data());
         exit(1);
@@ -228,7 +228,7 @@ void checkSimdSupport()
     }
     
     if (!hasSimdSupport) {
-        KLOGE("Main", "Missing simd support for %s%s%s%s on %s",
+        KLOGE("Main", "Missing simd support for %s%s%s%son %s",
               missingFeatures[0], missingFeatures[1], missingFeatures[2], missingFeatures[3],
               brandId);
         exit(1);

From d404e5bc596e2ebbff511c07dd339b046213e012 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 30 Mar 2025 18:26:59 -0700
Subject: [PATCH 886/901] kram - work on better KramThreadPool, want to replace
 TaskSystem

---
 libkram/kram/KramThreadPool.cpp | 339 ++++++++++++++++++++++++++++++++
 libkram/kram/KramThreadPool.h   | 108 ++++++++++
 libkram/kram/TaskSystem.cpp     |   4 +-
 libkram/kram/TaskSystem.h       |   3 +-
 4 files changed, 451 insertions(+), 3 deletions(-)
 create mode 100644 libkram/kram/KramThreadPool.cpp
 create mode 100644 libkram/kram/KramThreadPool.h

diff --git a/libkram/kram/KramThreadPool.cpp b/libkram/kram/KramThreadPool.cpp
new file mode 100644
index 00000000..7452c6ab
--- /dev/null
+++ b/libkram/kram/KramThreadPool.cpp
@@ -0,0 +1,339 @@
+#include "KramThreadPool.h"
+
+// #include <atomic>
+//#include <queue>
+
+#if KRAM_WIN
+#include <synchapi.h>
+#endif
+
+#if KRAM_LINUX
+#include <linux/futex.h>
+#endif
+
+// Remove this, and move out all the threading prims
+#include "TaskSystem.h"
+
+// Android is missing defines
+#if KRAM_ANDROID
+#ifndef SYS_futex
+# define SYS_futex __NR_futex
+#endif
+#ifndef FUTEX_WAIT_BITSET
+# define FUTEX_WAIT_BITSET 9
+#endif
+#ifndef FUTEX_WAKE_BITSET
+# define FUTEX_WAKE_BITSET 10
+#endif
+#ifndef FUTEX_PRIVATE_FLAG
+# define FUTEX_PRIVATE_FLAG 128
+#endif
+#endif
+
+// TODO: don't understand how jobs get distributed to the various queues
+// Especially if all jobs are coming in from the main/scheduler thread.
+//
+// TODO: add ability to grow/shrink workers
+//
+// Inspired by notes from Andreas Fredriksson on building a better thread
+// pool. Otherwise, every pool just uses a single cv and mutex.  That cv
+// then has to do a lot of wakey wakey when the thread/core counts are high.
+//
+// Talks about Unity thread pool
+// https://unity.com/blog/engine-platform/improving-job-system-performance-2022-2-part-1
+//
+// https://unity.com/blog/engine-platform/improving-job-system-performance-2022-2-part-2
+
+namespace kram {
+using namespace STL_NAMESPACE;
+
+
+// futex is 0 when not available and 1 when availalb.e
+
+#if KRAM_MAC
+
+// C++20 calls below
+// iOS14, macOS 11
+
+void futex::wait(uint32_t expectedValue) {
+    auto monitor = __libcpp_atomic_monitor(&_value);
+    // Check again if we should still go to sleep.
+    if (_value.load() != expectedValue) {
+        return;
+    }
+    // Wait, but only if there's been no new notifications
+    // since we acquired the monitor.
+    __libcpp_atomic_wait(&_value, monitor);
+}
+
+void futex::notify_one() {
+    __cxx_atomic_notify_one(&_value);
+}
+
+void futex::notify_all() {
+    __cxx_atomic_notify_all(&_value);
+}
+
+
+#elif KRAM_WIN
+
+// Win8+
+
+void futex::wait(uint32_t expectedValue) {
+    WaitOnAddress(&_value, &expectedValue(), sizeof(uint32_t), INFINITE);
+}
+
+void futex::notify_one() {
+    WakeByAddressSingle(&_value)
+}
+
+void futex::notify_all() {
+    WakeByAddressAll(&_value);
+}
+
+
+#elif KRAM_LINUX || KRAM_ANDROID
+
+// Linux 2.6.7
+// Only has uint32_t support
+
+void futex::wait(uint32_t expectedValue) {
+    syscall(SYS_futex, &_value, FUTEX_WAIT_BITSET | FUTEX_PRIVATE_FLAG,
+            NULL, NULL, expectedValue);
+}
+
+void futex::notify_one() {
+    syscall(SYS_futex, &_value, FUTEX_WAKE_BITSET | FUTEX_PRIVATE_FLAT,
+            NULL, NULL, 1);
+}
+
+void futex::notify_all() {
+    syscall(SYS_futex, &value, FUTEX_WAKE_BITSET | FUTEX_PRIVATE_FLAT,
+            NULL, NULL, INT32_MAX); // TODO: UINT32_MAX?
+}
+
+#endif
+
+// Each thread has it's own queue
+
+// main thread is responsible for making sure one thread is awake
+// when it schedules a job.
+// but main no longer has to drive waking, since each worker can
+
+Scheduler::Scheduler(uint32_t numWorkers) {
+    // TODO: numWorkers must be even number?  If buddy are paired
+    // if each worker uses next as buddy, then can be odd.
+    
+    _schedulerThread = getCurrentThread();
+    
+    // TODO: should move the scheduler settings out
+    ThreadInfo infoMain = {"Sheduler", ThreadPriority::Interactive, 0};
+    setThreadInfo(infoMain);
+    
+    string name;
+    
+    for (uint32_t threadIndex = 0; threadIndex < numWorkers; ++threadIndex) {
+        // These have to be ptr, due to uncopyable futex/mutex
+        Worker* worker = new Worker();
+        sprintf(name, "Task%d", threadIndex);
+        worker->Init( name, threadIndex, this);
+        
+        _workers.push_back(worker);
+    }
+    
+    
+    // Note that running work on core0 when core0 may starve it
+    // from assigning work to threads.
+    
+    // start up the threads
+    
+    for (uint32_t threadIndex = 0; threadIndex < numWorkers; ++threadIndex) {
+        // Generate a name, also corresponds to core for affinity
+        // May want to include priority too.
+        Worker& worker = *_workers[threadIndex];
+        
+        _threads.emplace_back([threadIndex, &worker] {
+            
+            // This is setting affinity to threadIndex
+            ThreadInfo infoTask = {worker._name.c_str(), ThreadPriority::High, (int)threadIndex};
+            setThreadInfo(infoTask);
+            
+            worker.run();
+        });
+    }
+}
+
+
+// TODO: this is one way to pass pririty with priority
+//    template<typename F>
+//    void scheduleJob(int priority, F f) {
+//        sheduleJob(Job2(priority, f));
+//    }
+
+void Scheduler::scheduleJob(Job2& job) {
+    auto currentThread = getCurrentThread();
+
+    // Already on same thread.  That thread is awake.
+    // But another thread could be stealing a job,
+    // so for now project queue with mutex.
+    if (currentThread == _schedulerThread) {
+        // TODO: need to pick a best queue to put work on?
+        // otherwise everything gets stuck on scheduler queue
+        // and then is stolen off it.
+        
+        // Scheduler thread needs to ensure a worker is awake
+        // since it doesn't run it's own work?
+        {
+            Worker* worker = _workers[0];
+            
+            lock_guard<mutex> lock(worker->_mutex);
+            worker->_queue.push(move(job));
+            _stats.jobsTotal++;
+        }
+        
+        _workers[0]->_futex.notify_one();
+    }
+    else {
+        Worker* worker = findWorker(currentThread);
+        if (!worker)
+            return;
+        
+        lock_guard<mutex> lock(worker->_mutex);
+        worker->_queue.push(move(job));
+        _stats.jobsTotal++;
+    }
+}
+
+void Scheduler::stop()
+{
+    // has to be called on scheduler thread
+    KASSERT(getCurrentThread() == _schedulerThread);
+    
+    if (_isStop)
+        return;
+    
+    _isStop = true;
+    
+    // have all threads wait on each other to finish
+    for (uint32_t i = 0; i < _workers.size(); ++i) {
+        _workers[i]->_futex.notify_one();
+        _threads[i].join();
+        
+        // since had to use ptrs
+        delete _workers[i];
+    }
+}
+   
+
+bool Worker::stealFromOtherQueues(Job2& job)
+{
+    bool found = false;
+    
+    // TODO: this is always going to vist 0...n
+    for (Worker* worker: _scheduler->workers()) {
+        if (worker == this)
+            continue;
+        
+        // lots of mutex locks
+        lock_guard<mutex> lock(worker->_mutex);
+        if (!worker->_queue.empty()) {
+            job = move(worker->_queue.top());
+            worker->_queue.pop();
+            
+            SchedulerStats& stats = _scheduler->stats();
+            stats.jobsExecuting++;
+            found = true;
+            break;
+        }
+    }
+    
+    return found;
+}
+
+void Worker::wakeWorkers()
+{
+    // This takes responsibility off the main thread
+    // to keep waking threads to run tasks.
+    auto& workers = _scheduler->workers();
+    
+    Worker* buddy = workers[(_workerId+1) % workers.size()];
+    
+    if (!buddy->_isExecuting) {
+        buddy->_futex.notify_one();
+        return;
+    }
+    
+    // TODO: should we only wake as many workers as jobs
+    // what if they are already awake and working?
+    // uint32_t numJobs = _scheduler->stats().jobsRemaining();
+    
+    // Wrap around visit from just past buddy
+    for (uint32_t i = 0; i < workers.size()-2; ++i) {
+        Worker* worker = workers[(_workerId+2+i) % workers.size()];
+        if (!worker->_isExecuting) {
+            worker->_futex.notify_one();
+            break;
+        }
+    }
+}
+
+bool Worker::shouldSleep()
+{
+    // TODO: needs to be more complex
+    // for parallel task exectution.
+    
+    return true;
+}
+
+void Worker::run()
+{
+    SchedulerStats& stats = _scheduler->stats();
+    
+    while(!_scheduler->isStop()) {
+        // Take a job from our worker thread’s local queue
+        Job2 job;
+        bool found = false;
+        {
+            lock_guard<mutex> lock(_mutex);
+            if (!_queue.empty()) {
+                job = move(_queue.top());
+                _queue.pop();
+                stats.jobsExecuting++;
+                found = true;
+            }
+        }
+        
+        // If our queue is empty try to steal work from someone
+        // else's queue to help them out.
+        if(!found) {
+            found = stealFromOtherQueues(job);
+        }
+        
+        if(found) {
+            // If we found work, there may be more conditionally
+            // wake up other workers as necessary
+            wakeWorkers();
+            
+            // Any job spawned by job goes to same queue.
+            // But may get stolen by another thread.
+            // Try not to have tasks wait on sub-tasks
+            // or their thread is locked down.
+            _isExecuting = true;
+            job.execute();
+            _isExecuting = false;
+            
+            // these can change a little out of order
+            stats.jobsExecuting--;
+            stats.jobsTotal--;
+        }
+        
+        // Conditionally go to sleep (perhaps we were told there is a
+        // parallel job we can help with)
+        else if(shouldSleep()) {
+            // Put the thread to sleep until more jobs are scheduled
+            _futex.wait(1); // TODO: fix wait to not take value
+        }
+    }
+}
+
+} // namespace kram
diff --git a/libkram/kram/KramThreadPool.h b/libkram/kram/KramThreadPool.h
new file mode 100644
index 00000000..3b5b0535
--- /dev/null
+++ b/libkram/kram/KramThreadPool.h
@@ -0,0 +1,108 @@
+#include <queue>
+
+namespace kram {
+using namespace STL_NAMESPACE;
+
+// this must not rollover
+using AtomicValue = atomic<uint32_t>;
+
+// fast locking
+class futex {
+    public: // for now leave this public
+    AtomicValue _value;
+    futex() = default;
+public:
+    void wait(uint32_t expectedValue);
+    void notify_one();
+    void notify_all();
+};
+    
+// No affinity needed.  OS can shedule threads from p to e core.
+// What about skipping HT though.
+class Scheduler;
+
+// This wraps priority and function together.
+// A priority queue can then return higher priority jobs.
+class Job2 {
+public:
+    int priority = 0; // smaller type?
+    function<void()> job;
+    
+    Job2() {}
+    Job2(int p, function<void()> f) : priority(p), job(f) {}
+    
+    bool operator<(const Job2& other) const {
+        return priority > other.priority; // Higher priority comes first
+    }
+    
+    void execute() { job(); }
+};
+
+class Worker {
+public:
+    string _name;
+    
+    priority_queue<Job2> _queue;
+    mutex _mutex; // for queue
+    futex _futex; // to wait/notify threads
+    Scheduler* _scheduler = nullptr;
+    uint32_t _workerId = 0;
+    bool _isExecuting = false;
+    
+    void Init(const string& name, uint32_t workerId, Scheduler* scheduler) {
+        _name = name;
+        _workerId = workerId;
+        _scheduler = scheduler;
+    }
+    
+    void run();
+    
+private:
+    bool stealFromOtherQueues(Job2& job);
+    void wakeWorkers();
+    bool shouldSleep();
+};
+
+struct SchedulerStats {
+    AtomicValue jobsTotal;
+    AtomicValue jobsExecuting;
+    uint32_t jobsRemaining() const { return jobsTotal - jobsExecuting; }
+};
+
+class Scheduler {
+public:
+    Scheduler(uint32_t numWorkers);
+    ~Scheduler() {
+        if (!_isStop) {
+            stop();
+        }
+    }
+
+    void scheduleJob(Job2& job);
+    
+    bool isStop() const { return _isStop; }
+    void stop();
+    
+    // Not really public API
+    vector<Worker*>& workers() { return _workers; }
+    
+    SchedulerStats& stats() { return _stats; }
+    
+private:
+    Worker* findWorker(thread::native_handle_type currentThread) {
+        for (uint32_t i = 0; i < (uint32_t)_workers.size(); ++i) {
+            if (_threads[i].native_handle() == currentThread) {
+                return _workers[i];
+            }
+        }
+        return nullptr;
+    }
+    
+    bool _isStop = false;
+    vector<Worker*> _workers;
+    vector<thread> _threads;
+    SchedulerStats _stats;
+    thread::native_handle_type _schedulerThread = nullptr;
+};
+
+} // namespace kram
diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index ba25f476..f3ab8901 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -617,7 +617,7 @@ task_system::task_system(int32_t count) : _count(std::min(count, (int32_t)GetCor
     // see WWDC 2021 presentation here
     // Tune CPU job scheduling for Apple silicon games
     // https://developer.apple.com/videos/play/tech-talks/110147/
-    ThreadInfo infoMain = {"Main", ThreadPriority::Interactive, 0};
+    ThreadInfo infoMain = {"Sheduler", ThreadPriority::Interactive, 0};
     setThreadInfo(infoMain);
 
     // Note that running work on core0 when core0 may starve it
@@ -717,7 +717,7 @@ static ThreadPriority getThreadPriority(std::thread::native_handle_type handle)
 void task_system::log_threads()
 {
     ThreadInfo info = {};
-    info.name = "Main";
+    info.name = "Scheduler";
 #if SUPPORT_AFFINITY
     info.affinity = 0;
 #endif
diff --git a/libkram/kram/TaskSystem.h b/libkram/kram/TaskSystem.h
index db7ddbe1..1b16b7c6 100644
--- a/libkram/kram/TaskSystem.h
+++ b/libkram/kram/TaskSystem.h
@@ -90,7 +90,8 @@ class notification_queue {
     {
         {
             mylock lock{_mutex};
-            // TODO: fix this construct, it's saying no matching sctor for mydeque<eastl::function<void ()>>>::value_type
+            // TODO: fix this construct, eastl is saying no matching sctor for
+            // mydeque<eastl::function<void ()>>>::value_type
 #if USE_EASTL
             KLOGE("TaskSystem", "Fix eastl deque or function");
             //_q.emplace_back(forward<F>(f));

From aa506fbd5fa26316e3e927ece667c1a94f035551 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 30 Mar 2025 18:34:09 -0700
Subject: [PATCH 887/901] kram - qualify std::move

---
 libkram/kram/KramThreadPool.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/libkram/kram/KramThreadPool.cpp b/libkram/kram/KramThreadPool.cpp
index 7452c6ab..0485bb3b 100644
--- a/libkram/kram/KramThreadPool.cpp
+++ b/libkram/kram/KramThreadPool.cpp
@@ -187,7 +187,7 @@ void Scheduler::scheduleJob(Job2& job) {
             Worker* worker = _workers[0];
             
             lock_guard<mutex> lock(worker->_mutex);
-            worker->_queue.push(move(job));
+            worker->_queue.push(std::move(job));
             _stats.jobsTotal++;
         }
         
@@ -199,7 +199,7 @@ void Scheduler::scheduleJob(Job2& job) {
             return;
         
         lock_guard<mutex> lock(worker->_mutex);
-        worker->_queue.push(move(job));
+        worker->_queue.push(std::move(job));
         _stats.jobsTotal++;
     }
 }
@@ -237,7 +237,7 @@ bool Worker::stealFromOtherQueues(Job2& job)
         // lots of mutex locks
         lock_guard<mutex> lock(worker->_mutex);
         if (!worker->_queue.empty()) {
-            job = move(worker->_queue.top());
+            job = std::move(worker->_queue.top());
             worker->_queue.pop();
             
             SchedulerStats& stats = _scheduler->stats();
@@ -296,7 +296,7 @@ void Worker::run()
         {
             lock_guard<mutex> lock(_mutex);
             if (!_queue.empty()) {
-                job = move(_queue.top());
+                job = std::move(_queue.top());
                 _queue.pop();
                 stats.jobsExecuting++;
                 found = true;

From bbeafc03bb08703b5bdbe350c4eacfb179d30dde Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 30 Mar 2025 18:36:38 -0700
Subject: [PATCH 888/901] kram - fix linux

---
 libkram/kram/KramThreadPool.cpp | 4 ++--
 libkram/kram/KramThreadPool.h   | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/libkram/kram/KramThreadPool.cpp b/libkram/kram/KramThreadPool.cpp
index 0485bb3b..e70e94d1 100644
--- a/libkram/kram/KramThreadPool.cpp
+++ b/libkram/kram/KramThreadPool.cpp
@@ -103,12 +103,12 @@ void futex::wait(uint32_t expectedValue) {
 }
 
 void futex::notify_one() {
-    syscall(SYS_futex, &_value, FUTEX_WAKE_BITSET | FUTEX_PRIVATE_FLAT,
+    syscall(SYS_futex, &_value, FUTEX_WAKE_BITSET | FUTEX_PRIVATE_FLAG,
             NULL, NULL, 1);
 }
 
 void futex::notify_all() {
-    syscall(SYS_futex, &value, FUTEX_WAKE_BITSET | FUTEX_PRIVATE_FLAT,
+    syscall(SYS_futex, &value, FUTEX_WAKE_BITSET | FUTEX_PRIVATE_FLAG,
             NULL, NULL, INT32_MAX); // TODO: UINT32_MAX?
 }
 
diff --git a/libkram/kram/KramThreadPool.h b/libkram/kram/KramThreadPool.h
index 3b5b0535..77f528f1 100644
--- a/libkram/kram/KramThreadPool.h
+++ b/libkram/kram/KramThreadPool.h
@@ -102,7 +102,7 @@ class Scheduler {
     vector<Worker*> _workers;
     vector<thread> _threads;
     SchedulerStats _stats;
-    thread::native_handle_type _schedulerThread = nullptr;
+    thread::native_handle_type _schedulerThread = 0;
 };
 
 } // namespace kram

From 21f16476be53d47840fd6bd7b26007719a1e0758 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 30 Mar 2025 18:38:40 -0700
Subject: [PATCH 889/901] kram - fix linux

---
 libkram/kram/KramThreadPool.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libkram/kram/KramThreadPool.cpp b/libkram/kram/KramThreadPool.cpp
index e70e94d1..b81610fb 100644
--- a/libkram/kram/KramThreadPool.cpp
+++ b/libkram/kram/KramThreadPool.cpp
@@ -108,7 +108,7 @@ void futex::notify_one() {
 }
 
 void futex::notify_all() {
-    syscall(SYS_futex, &value, FUTEX_WAKE_BITSET | FUTEX_PRIVATE_FLAG,
+    syscall(SYS_futex, &_value, FUTEX_WAKE_BITSET | FUTEX_PRIVATE_FLAG,
             NULL, NULL, INT32_MAX); // TODO: UINT32_MAX?
 }
 

From fb3b204fd1ab3ed4373b2a6394bfddc2a3ad6d6f Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 30 Mar 2025 18:49:44 -0700
Subject: [PATCH 890/901] kram - fix win

---
 libkram/kram/KramLog.cpp        | 4 ++++
 libkram/kram/KramThreadPool.cpp | 6 +++++-
 libkram/kram/KramTimer.cpp      | 3 +++
 libkram/kram/TaskSystem.cpp     | 3 +++
 libkram/kram/win_mmap.h         | 4 ++++
 5 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp
index f0213469..17a8f14c 100644
--- a/libkram/kram/KramLog.cpp
+++ b/libkram/kram/KramLog.cpp
@@ -22,6 +22,10 @@
 
 #if KRAM_WIN
 #include <intrin.h> // for AddressOfReturnAdress, ReturnAddress
+
+#ifndef WIN32_LEAN_AND_MEAN
+    #define WIN32_LEAN_AND_MEAN
+#endif
 #include <windows.h>
 
 #if KRAM_LOG_STACKTRACE
diff --git a/libkram/kram/KramThreadPool.cpp b/libkram/kram/KramThreadPool.cpp
index b81610fb..7197f660 100644
--- a/libkram/kram/KramThreadPool.cpp
+++ b/libkram/kram/KramThreadPool.cpp
@@ -4,7 +4,11 @@
 //#include <queue>
 
 #if KRAM_WIN
-#include <synchapi.h>
+#ifndef WIN32_LEAN_AND_MEAN
+    #define WIN32_LEAN_AND_MEAN
+#endif
+#include <windows.h>
+//#include <synchapi.h>
 #endif
 
 #if KRAM_LINUX
diff --git a/libkram/kram/KramTimer.cpp b/libkram/kram/KramTimer.cpp
index 5b1e346a..79cafc38 100644
--- a/libkram/kram/KramTimer.cpp
+++ b/libkram/kram/KramTimer.cpp
@@ -7,6 +7,9 @@
 #include "TaskSystem.h"
 
 #if KRAM_WIN
+#ifndef WIN32_LEAN_AND_MEAN
+    #define WIN32_LEAN_AND_MEAN
+#endif
 #include <windows.h>
 #elif KRAM_APPLE
 #include <mach/mach_time.h>
diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index f3ab8901..5469bd5c 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -14,6 +14,9 @@
 #elif KRAM_WIN
 // annoying that windows.h has to be ordered first
 // clang-format off
+#ifndef WIN32_LEAN_AND_MEAN
+    #define WIN32_LEAN_AND_MEAN
+#endif
 #include <windows.h>
 // clang-format off
 #include <processthreadsapi.h>
diff --git a/libkram/kram/win_mmap.h b/libkram/kram/win_mmap.h
index 91617c02..b21c1c9f 100644
--- a/libkram/kram/win_mmap.h
+++ b/libkram/kram/win_mmap.h
@@ -19,6 +19,10 @@
 
 #include <io.h>
 #include <sys/types.h>
+
+#ifndef WIN32_LEAN_AND_MEAN
+    #define WIN32_LEAN_AND_MEAN
+#endif
 #include <windows.h>
 
 #define PROT_READ 0x1

From 4aec19f2cf69b4f09972b5e0e3f1dcd542532078 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 30 Mar 2025 18:54:07 -0700
Subject: [PATCH 891/901] kram - fix win

---
 libkram/kram/KramThreadPool.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libkram/kram/KramThreadPool.cpp b/libkram/kram/KramThreadPool.cpp
index 7197f660..140fe908 100644
--- a/libkram/kram/KramThreadPool.cpp
+++ b/libkram/kram/KramThreadPool.cpp
@@ -84,11 +84,11 @@ void futex::notify_all() {
 // Win8+
 
 void futex::wait(uint32_t expectedValue) {
-    WaitOnAddress(&_value, &expectedValue(), sizeof(uint32_t), INFINITE);
+    WaitOnAddress(&_value, &expectedValue, sizeof(uint32_t), INFINITE);
 }
 
 void futex::notify_one() {
-    WakeByAddressSingle(&_value)
+    WakeByAddressSingle(&_value);
 }
 
 void futex::notify_all() {

From f0ecd5014851fc0684274692101efbd16317491f Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 30 Mar 2025 19:38:21 -0700
Subject: [PATCH 892/901] kram - added queueSize to thread pool

---
 libkram/kram/KramThreadPool.cpp | 67 ++++++++++++++++++++++++++-------
 libkram/kram/KramThreadPool.h   |  2 +-
 2 files changed, 54 insertions(+), 15 deletions(-)

diff --git a/libkram/kram/KramThreadPool.cpp b/libkram/kram/KramThreadPool.cpp
index 140fe908..ab442930 100644
--- a/libkram/kram/KramThreadPool.cpp
+++ b/libkram/kram/KramThreadPool.cpp
@@ -177,33 +177,52 @@ Scheduler::Scheduler(uint32_t numWorkers) {
 void Scheduler::scheduleJob(Job2& job) {
     auto currentThread = getCurrentThread();
 
+    // job subtasks always first pushed to their own queue.
+    // if this is null, then it's either the scheduler thread or a random thread
+    //  trying to submit a job (which goes to scheduler).
+    Worker* worker = findWorker(currentThread);
+    
     // Already on same thread.  That thread is awake.
     // But another thread could be stealing a job,
-    // so for now project queue with mutex.
-    if (currentThread == _schedulerThread) {
-        // TODO: need to pick a best queue to put work on?
+    // So for now project queue with mutex.
+    
+    if (currentThread == _schedulerThread || !worker) {
+        // Need to pick a best queue to put work on?
         // otherwise everything gets stuck on scheduler queue
         // and then is stolen off it.
         
         // Scheduler thread needs to ensure a worker is awake
         // since it doesn't run it's own work?
+        
+        // Atomic count per Worker helps here.  Can read outside
+        // of lock, and can then spread work more evenly.
+        
+        uint32_t minQueue = 0;
+        uint32_t minQueueCount = _workers[0]->_queueSize;
+        
+        for (uint32_t i = 1; i < _workers.size(); ++i) {
+            uint32_t queueCount = _workers[i]->_queueSize;
+            if (queueCount < minQueueCount) {
+                minQueueCount = queueCount;
+                minQueue = i;
+            }
+        }
+        
         {
-            Worker* worker = _workers[0];
+            worker = _workers[minQueue];
             
             lock_guard<mutex> lock(worker->_mutex);
             worker->_queue.push(std::move(job));
+            worker->_queueSize++;
             _stats.jobsTotal++;
         }
         
         _workers[0]->_futex.notify_one();
     }
     else {
-        Worker* worker = findWorker(currentThread);
-        if (!worker)
-            return;
-        
         lock_guard<mutex> lock(worker->_mutex);
         worker->_queue.push(std::move(job));
+        worker->_queueSize++;
         _stats.jobsTotal++;
     }
 }
@@ -231,31 +250,50 @@ void Scheduler::stop()
 
 bool Worker::stealFromOtherQueues(Job2& job)
 {
+    // Is this safe to test?
+    if (_scheduler->stats().jobsRemaining() == 0)
+        return false;
+    
     bool found = false;
     
-    // TODO: this is always going to vist 0...n
-    for (Worker* worker: _scheduler->workers()) {
-        if (worker == this)
-            continue;
+    auto& workers = _scheduler->workers();
+
+    // This will visit buddy and then the rest
+    for (uint32_t i = 0; i < workers.size()-1; ++i) {
+        Worker* worker = workers[(_workerId+1+i) % workers.size()];
+        
+        // This should never visit caller Worker.
+        KASSERT(worker != this);
+        
+        // lots of expensive queue mutex locks below searching for jobs
+        // use atomic queueSize per worker.  A little racy.
+//        if (worker->queueSize == 0) {
+//            continue;
+//        }
         
-        // lots of mutex locks
         lock_guard<mutex> lock(worker->_mutex);
         if (!worker->_queue.empty()) {
             job = std::move(worker->_queue.top());
             worker->_queue.pop();
+            worker->_queueSize--;
             
             SchedulerStats& stats = _scheduler->stats();
             stats.jobsExecuting++;
             found = true;
             break;
         }
+        
     }
-    
+
     return found;
 }
 
 void Worker::wakeWorkers()
 {
+    // Is this safe to test?
+    if (_scheduler->stats().jobsRemaining() == 0)
+        return;
+    
     // This takes responsibility off the main thread
     // to keep waking threads to run tasks.
     auto& workers = _scheduler->workers();
@@ -302,6 +340,7 @@ void Worker::run()
             if (!_queue.empty()) {
                 job = std::move(_queue.top());
                 _queue.pop();
+                _queueSize--;
                 stats.jobsExecuting++;
                 found = true;
             }
diff --git a/libkram/kram/KramThreadPool.h b/libkram/kram/KramThreadPool.h
index 77f528f1..7b20a2c9 100644
--- a/libkram/kram/KramThreadPool.h
+++ b/libkram/kram/KramThreadPool.h
@@ -41,8 +41,8 @@ class Job2 {
 class Worker {
 public:
     string _name;
-    
     priority_queue<Job2> _queue;
+    AtomicValue _queueSize;
     mutex _mutex; // for queue
     futex _futex; // to wait/notify threads
     Scheduler* _scheduler = nullptr;

From cb64eeec00b01d8cd12efba35bf4edd76f44f091 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sun, 30 Mar 2025 22:12:41 -0700
Subject: [PATCH 893/901] kram - use futex for queueCount

---
 libkram/kram/KramThreadPool.cpp | 41 ++++++++++++++++++++-------------
 libkram/kram/KramThreadPool.h   | 21 +++++++++++++----
 2 files changed, 41 insertions(+), 21 deletions(-)

diff --git a/libkram/kram/KramThreadPool.cpp b/libkram/kram/KramThreadPool.cpp
index ab442930..46a772b8 100644
--- a/libkram/kram/KramThreadPool.cpp
+++ b/libkram/kram/KramThreadPool.cpp
@@ -39,6 +39,9 @@
 //
 // TODO: add ability to grow/shrink workers
 //
+// TODO: sucky part of using Worker* is that these are spread across memory
+//   but can grow/shrink count.
+//
 // Inspired by notes from Andreas Fredriksson on building a better thread
 // pool. Otherwise, every pool just uses a single cv and mutex.  That cv
 // then has to do a lot of wakey wakey when the thread/core counts are high.
@@ -52,17 +55,21 @@ namespace kram {
 using namespace STL_NAMESPACE;
 
 
-// futex is 0 when not available and 1 when availalb.e
+// futex is 0 when waiting, and 1+ when active.
+// futex wait and timout support with newer macOS API, but requires iOS17.4/macOS14.4.
+// #include "os/os_sync_wait_on_address.h"
+// int os_sync_wait_on_address(void *addr, uint64_t value, size_t size, os_sync_wait_on_address_flags_t flags);
+
 
 #if KRAM_MAC
 
-// C++20 calls below
-// iOS14, macOS 11
+// C++20 calls below.
+// iOS 14/macOS 11
 
 void futex::wait(uint32_t expectedValue) {
     auto monitor = __libcpp_atomic_monitor(&_value);
     // Check again if we should still go to sleep.
-    if (_value.load() != expectedValue) {
+    if (_value.load(memory_order_relaxed) != expectedValue) {
         return;
     }
     // Wait, but only if there's been no new notifications
@@ -84,6 +91,7 @@ void futex::notify_all() {
 // Win8+
 
 void futex::wait(uint32_t expectedValue) {
+    // this waits until value shifts
     WaitOnAddress(&_value, &expectedValue, sizeof(uint32_t), INFINITE);
 }
 
@@ -198,31 +206,31 @@ void Scheduler::scheduleJob(Job2& job) {
         // of lock, and can then spread work more evenly.
         
         uint32_t minQueue = 0;
-        uint32_t minQueueCount = _workers[0]->_queueSize;
+        uint32_t minQueueCount = _workers[0]->queueSize();
         
         for (uint32_t i = 1; i < _workers.size(); ++i) {
-            uint32_t queueCount = _workers[i]->_queueSize;
+            uint32_t queueCount = _workers[i]->queueSize();
             if (queueCount < minQueueCount) {
                 minQueueCount = queueCount;
                 minQueue = i;
             }
         }
         
+        worker = _workers[minQueue];
+        
         {
-            worker = _workers[minQueue];
-            
             lock_guard<mutex> lock(worker->_mutex);
             worker->_queue.push(std::move(job));
-            worker->_queueSize++;
+            worker->incQueueSize();
             _stats.jobsTotal++;
         }
         
-        _workers[0]->_futex.notify_one();
+        worker->_futex.notify_one();
     }
     else {
         lock_guard<mutex> lock(worker->_mutex);
         worker->_queue.push(std::move(job));
-        worker->_queueSize++;
+        worker->incQueueSize();
         _stats.jobsTotal++;
     }
 }
@@ -267,7 +275,7 @@ bool Worker::stealFromOtherQueues(Job2& job)
         
         // lots of expensive queue mutex locks below searching for jobs
         // use atomic queueSize per worker.  A little racy.
-//        if (worker->queueSize == 0) {
+//        if (worker->queueSize() == 0) { // _queueSize
 //            continue;
 //        }
         
@@ -275,7 +283,7 @@ bool Worker::stealFromOtherQueues(Job2& job)
         if (!worker->_queue.empty()) {
             job = std::move(worker->_queue.top());
             worker->_queue.pop();
-            worker->_queueSize--;
+            worker->decQueueSize();
             
             SchedulerStats& stats = _scheduler->stats();
             stats.jobsExecuting++;
@@ -340,7 +348,7 @@ void Worker::run()
             if (!_queue.empty()) {
                 job = std::move(_queue.top());
                 _queue.pop();
-                _queueSize--;
+                decQueueSize();
                 stats.jobsExecuting++;
                 found = true;
             }
@@ -373,8 +381,9 @@ void Worker::run()
         // Conditionally go to sleep (perhaps we were told there is a
         // parallel job we can help with)
         else if(shouldSleep()) {
-            // Put the thread to sleep until more jobs are scheduled
-            _futex.wait(1); // TODO: fix wait to not take value
+            // Put the thread to sleep until more jobs are scheduled.
+            // Wakes when value is non-zero and notify called.
+            _futex.wait(0);
         }
     }
 }
diff --git a/libkram/kram/KramThreadPool.h b/libkram/kram/KramThreadPool.h
index 7b20a2c9..0f527455 100644
--- a/libkram/kram/KramThreadPool.h
+++ b/libkram/kram/KramThreadPool.h
@@ -8,12 +8,18 @@ using AtomicValue = atomic<uint32_t>;
 
 // fast locking
 class futex {
-    public: // for now leave this public
+public: // for now leave this public
     AtomicValue _value;
     futex() = default;
+    
 public:
-    void wait(uint32_t expectedValue);
+    // wait.  wake when atomic does not match expectedValue and notify called
+    void wait(uint32_t expectedValue = 0);
+    
+    // wake first thread waiting
     void notify_one();
+    
+    // wake all threads wiating
     void notify_all();
 };
     
@@ -42,9 +48,8 @@ class Worker {
 public:
     string _name;
     priority_queue<Job2> _queue;
-    AtomicValue _queueSize;
     mutex _mutex; // for queue
-    futex _futex; // to wait/notify threads
+    futex _futex; // to wait/notify threads, holds count of jobs in queue
     Scheduler* _scheduler = nullptr;
     uint32_t _workerId = 0;
     bool _isExecuting = false;
@@ -55,6 +60,11 @@ class Worker {
         _scheduler = scheduler;
     }
     
+    // could be const, but it's atomic so volatile
+    uint32_t queueSize() { return _futex._value; }
+    void incQueueSize() { _futex._value++; }
+    void decQueueSize() { _futex._value--; }
+    
     void run();
     
 private:
@@ -63,7 +73,8 @@ class Worker {
     bool shouldSleep();
 };
 
-struct SchedulerStats {
+class SchedulerStats {
+public:
     AtomicValue jobsTotal;
     AtomicValue jobsExecuting;
     uint32_t jobsRemaining() const { return jobsTotal - jobsExecuting; }

From 46b23abd0fa085cf4cd80ec57a4c4f888eb07976 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 18 Apr 2025 21:12:31 -0700
Subject: [PATCH 894/901] kram - move to signed builds, will break the github
 ci builds (keep using tagged release)

I'll fix this by uploading the certs to github secrets.  But Apple requires sandboxed, signed, notarized tools anymore.
Especially the thumbnail/preview extensions won't function without that.
---
 build2/kram.xcodeproj/project.pbxproj         | 12 ++++-
 build2/kramc.xcodeproj/project.pbxproj        | 14 ++++--
 build2/kramv.xcodeproj/project.pbxproj        | 47 ++++++++++++++-----
 build2/vectormath.xcodeproj/project.pbxproj   |  4 +-
 .../hlslparser.xcodeproj/project.pbxproj      | 14 +++++-
 kram-preview/kram_preview.entitlements        |  2 +-
 .../kram-profile.xcodeproj/project.pbxproj    | 30 +++++++++++-
 kram-thumb/kram_thumb.entitlements            |  2 +-
 kramv/KramRenderer.mm                         |  5 ++
 kramv/kramv.entitlements                      |  2 +-
 libkram/kram/KramThreadPool.cpp               | 20 ++++++--
 libkram/kram/TaskSystem.cpp                   |  2 +-
 libkram/kram/TaskSystem.h                     |  1 +
 13 files changed, 122 insertions(+), 33 deletions(-)

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index 6383ad2d..6957fc78 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -132,6 +132,8 @@
 		707789ED2881BA81008A51BC /* bc7decomp_ref.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789D32881BA81008A51BC /* bc7decomp_ref.cpp */; };
 		707789F12881BCE2008A51BC /* rdo_bc_encoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789EF2881BCE2008A51BC /* rdo_bc_encoder.cpp */; };
 		707789F32881BCE2008A51BC /* rdo_bc_encoder.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789F02881BCE2008A51BC /* rdo_bc_encoder.h */; };
+		707B2AB42D99BF7A00DD3F0B /* KramThreadPool.h in Headers */ = {isa = PBXBuildFile; fileRef = 707B2AB22D99BF7A00DD3F0B /* KramThreadPool.h */; };
+		707B2AB52D99BF7A00DD3F0B /* KramThreadPool.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707B2AB32D99BF7A00DD3F0B /* KramThreadPool.cpp */; };
 		70871DC927DDDBCD00D0B9E1 /* astcenc_vecmathlib_common_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DA727DDDBCC00D0B9E1 /* astcenc_vecmathlib_common_4.h */; };
 		70871DCB27DDDBCD00D0B9E1 /* astcenc_image.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DA827DDDBCC00D0B9E1 /* astcenc_image.cpp */; };
 		70871DCD27DDDBCD00D0B9E1 /* astcenc_find_best_partitioning.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DA927DDDBCC00D0B9E1 /* astcenc_find_best_partitioning.cpp */; };
@@ -473,6 +475,8 @@
 		707789D42881BA81008A51BC /* LICENSE */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = LICENSE; sourceTree = "<group>"; };
 		707789EF2881BCE2008A51BC /* rdo_bc_encoder.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = rdo_bc_encoder.cpp; sourceTree = "<group>"; };
 		707789F02881BCE2008A51BC /* rdo_bc_encoder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = rdo_bc_encoder.h; sourceTree = "<group>"; };
+		707B2AB22D99BF7A00DD3F0B /* KramThreadPool.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KramThreadPool.h; sourceTree = "<group>"; };
+		707B2AB32D99BF7A00DD3F0B /* KramThreadPool.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = KramThreadPool.cpp; sourceTree = "<group>"; };
 		707D4C732CC436A000729BE0 /* kram.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = kram.xcconfig; sourceTree = "<group>"; };
 		70871DA727DDDBCC00D0B9E1 /* astcenc_vecmathlib_common_4.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = astcenc_vecmathlib_common_4.h; sourceTree = "<group>"; };
 		70871DA827DDDBCC00D0B9E1 /* astcenc_image.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_image.cpp; sourceTree = "<group>"; };
@@ -806,6 +810,8 @@
 				70D222E22AD22BED00B9EA23 /* BlockedLinearAllocator.cpp */,
 				70D222DD2AD2132300B9EA23 /* ImmutableString.h */,
 				70D222DC2AD2132300B9EA23 /* ImmutableString.cpp */,
+				707B2AB22D99BF7A00DD3F0B /* KramThreadPool.h */,
+				707B2AB32D99BF7A00DD3F0B /* KramThreadPool.cpp */,
 				706EEE3826D1583F001C950E /* TaskSystem.h */,
 				706EEE1F26D1583F001C950E /* TaskSystem.cpp */,
 			);
@@ -1174,6 +1180,7 @@
 				707789D72881BA81008A51BC /* bc7enc.h in Headers */,
 				706EEFDE26D15984001C950E /* EtcImage.h in Headers */,
 				709B8D4B28D7BCAD0081BD1F /* std.h in Headers */,
+				707B2AB42D99BF7A00DD3F0B /* KramThreadPool.h in Headers */,
 				70CDB65027A1382700A546C1 /* KramDDSHelper.h in Headers */,
 				709B8D4328D7BCAD0081BD1F /* args.h in Headers */,
 				708A6A9C2708CE4700BA5410 /* bc6h_encode.h in Headers */,
@@ -1387,6 +1394,7 @@
 				706EEFB626D1595D001C950E /* Kram.cpp in Sources */,
 				706EEFB726D1595D001C950E /* squish.cpp in Sources */,
 				706EEFB826D1595D001C950E /* colourset.cpp in Sources */,
+				707B2AB52D99BF7A00DD3F0B /* KramThreadPool.cpp in Sources */,
 				70871DD327DDDBCD00D0B9E1 /* astcenc_partition_tables.cpp in Sources */,
 				709B8D3728D7BCAD0081BD1F /* os.cpp in Sources */,
 				706EFF8126D34740001C950E /* hashtable.cpp in Sources */,
@@ -1421,7 +1429,7 @@
 			baseConfigurationReference = 707D4C732CC436A000729BE0 /* kram.xcconfig */;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
-				ARCHS = arm64;
+				ARCHS = "$(ARCHS_STANDARD)";
 				CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES;
 				CLANG_ANALYZER_NONNULL = YES;
 				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
@@ -1515,7 +1523,7 @@
 			baseConfigurationReference = 707D4C732CC436A000729BE0 /* kram.xcconfig */;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
-				ARCHS = arm64;
+				ARCHS = "$(ARCHS_STANDARD)";
 				CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES;
 				CLANG_ANALYZER_NONNULL = YES;
 				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
diff --git a/build2/kramc.xcodeproj/project.pbxproj b/build2/kramc.xcodeproj/project.pbxproj
index 5ec57506..e422cd4d 100644
--- a/build2/kramc.xcodeproj/project.pbxproj
+++ b/build2/kramc.xcodeproj/project.pbxproj
@@ -162,7 +162,7 @@
 			baseConfigurationReference = 707D4C542CC420FE00729BE0 /* kram.xcconfig */;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
-				ARCHS = arm64;
+				ARCHS = "$(ARCHS_STANDARD)";
 				CLANG_ANALYZER_NONNULL = YES;
 				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
 				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
@@ -230,7 +230,7 @@
 			baseConfigurationReference = 707D4C542CC420FE00729BE0 /* kram.xcconfig */;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
-				ARCHS = arm64;
+				ARCHS = "$(ARCHS_STANDARD)";
 				CLANG_ANALYZER_NONNULL = YES;
 				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
 				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
@@ -300,7 +300,10 @@
 				CLANG_WARN_OBJC_INTERFACE_IVARS = YES_ERROR;
 				CLANG_WARN_OBJC_MISSING_PROPERTY_SYNTHESIS = YES;
 				CLANG_WARN_OBJC_REPEATED_USE_OF_WEAK = YES;
-				CODE_SIGN_STYLE = Automatic;
+				CODE_SIGN_IDENTITY = "Developer ID Application: Alec Miller (LDJ95E4NS8)";
+				CODE_SIGN_STYLE = Manual;
+				DEVELOPMENT_TEAM = LDJ95E4NS8;
+				"DEVELOPMENT_TEAM[sdk=macosx*]" = LDJ95E4NS8;
 				GCC_WARN_HIDDEN_VIRTUAL_FUNCTIONS = YES;
 				GCC_WARN_NON_VIRTUAL_DESTRUCTOR = YES;
 				GCC_WARN_SHADOW = YES;
@@ -324,8 +327,11 @@
 				CLANG_WARN_OBJC_INTERFACE_IVARS = YES_ERROR;
 				CLANG_WARN_OBJC_MISSING_PROPERTY_SYNTHESIS = YES;
 				CLANG_WARN_OBJC_REPEATED_USE_OF_WEAK = YES;
-				CODE_SIGN_STYLE = Automatic;
+				CODE_SIGN_IDENTITY = "Developer ID Application: Alec Miller (LDJ95E4NS8)";
+				CODE_SIGN_STYLE = Manual;
 				DEAD_CODE_STRIPPING = YES;
+				DEVELOPMENT_TEAM = LDJ95E4NS8;
+				"DEVELOPMENT_TEAM[sdk=macosx*]" = LDJ95E4NS8;
 				GCC_WARN_HIDDEN_VIRTUAL_FUNCTIONS = YES;
 				GCC_WARN_NON_VIRTUAL_DESTRUCTOR = YES;
 				GCC_WARN_SHADOW = YES;
diff --git a/build2/kramv.xcodeproj/project.pbxproj b/build2/kramv.xcodeproj/project.pbxproj
index 1aa924d6..5f9c1043 100644
--- a/build2/kramv.xcodeproj/project.pbxproj
+++ b/build2/kramv.xcodeproj/project.pbxproj
@@ -572,6 +572,7 @@
 				DEAD_CODE_STRIPPING = YES;
 				DEBUG_INFORMATION_FORMAT = dwarf;
 				DONT_GENERATE_INFOPLIST_FILE = YES;
+				ENABLE_APP_SANDBOX = YES;
 				ENABLE_STRICT_OBJC_MSGSEND = YES;
 				ENABLE_TESTABILITY = YES;
 				ENABLE_USER_SCRIPT_SANDBOXING = YES;
@@ -650,6 +651,8 @@
 				DEAD_CODE_STRIPPING = YES;
 				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
 				DONT_GENERATE_INFOPLIST_FILE = YES;
+				ENABLE_APP_SANDBOX = YES;
+				ENABLE_HARDENED_RUNTIME = YES;
 				ENABLE_NS_ASSERTIONS = NO;
 				ENABLE_STRICT_OBJC_MSGSEND = YES;
 				ENABLE_USER_SCRIPT_SANDBOXING = YES;
@@ -697,9 +700,12 @@
 				CLANG_WARN_OBJC_MISSING_PROPERTY_SYNTHESIS = NO;
 				CLANG_WARN_OBJC_REPEATED_USE_OF_WEAK = YES;
 				CODE_SIGN_ENTITLEMENTS = "$(PROJECT_DIR)/../kramv/kramv.entitlements";
-				CODE_SIGN_STYLE = Automatic;
+				CODE_SIGN_IDENTITY = "Developer ID Application: Alec Miller (LDJ95E4NS8)";
+				"CODE_SIGN_IDENTITY[sdk=macosx*]" = "Developer ID Application";
+				CODE_SIGN_STYLE = Manual;
 				COMBINE_HIDPI_IMAGES = YES;
-				DEVELOPMENT_TEAM = "";
+				DEVELOPMENT_TEAM = LDJ95E4NS8;
+				"DEVELOPMENT_TEAM[sdk=macosx*]" = LDJ95E4NS8;
 				GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
 				GCC_WARN_HIDDEN_VIRTUAL_FUNCTIONS = YES;
 				GCC_WARN_NON_VIRTUAL_DESTRUCTOR = YES;
@@ -716,6 +722,7 @@
 				MTL_LANGUAGE_REVISION = Metal30;
 				PRODUCT_BUNDLE_IDENTIFIER = com.hialec.kramv;
 				PRODUCT_NAME = "$(TARGET_NAME)";
+				"PROVISIONING_PROFILE_SPECIFIER[sdk=macosx*]" = KramViewerProvision;
 			};
 			name = Debug;
 		};
@@ -732,10 +739,13 @@
 				CLANG_WARN_OBJC_MISSING_PROPERTY_SYNTHESIS = NO;
 				CLANG_WARN_OBJC_REPEATED_USE_OF_WEAK = YES;
 				CODE_SIGN_ENTITLEMENTS = "$(PROJECT_DIR)/../kramv/kramv.entitlements";
-				CODE_SIGN_STYLE = Automatic;
+				CODE_SIGN_IDENTITY = "Developer ID Application: Alec Miller (LDJ95E4NS8)";
+				"CODE_SIGN_IDENTITY[sdk=macosx*]" = "Developer ID Application";
+				CODE_SIGN_STYLE = Manual;
 				COMBINE_HIDPI_IMAGES = YES;
 				DEAD_CODE_STRIPPING = YES;
-				DEVELOPMENT_TEAM = "";
+				DEVELOPMENT_TEAM = LDJ95E4NS8;
+				"DEVELOPMENT_TEAM[sdk=macosx*]" = LDJ95E4NS8;
 				GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
 				GCC_WARN_HIDDEN_VIRTUAL_FUNCTIONS = YES;
 				GCC_WARN_NON_VIRTUAL_DESTRUCTOR = YES;
@@ -752,6 +762,7 @@
 				MTL_LANGUAGE_REVISION = Metal30;
 				PRODUCT_BUNDLE_IDENTIFIER = com.hialec.kramv;
 				PRODUCT_NAME = "$(TARGET_NAME)";
+				"PROVISIONING_PROFILE_SPECIFIER[sdk=macosx*]" = KramViewerProvision;
 			};
 			name = Release;
 		};
@@ -759,8 +770,10 @@
 			isa = XCBuildConfiguration;
 			buildSettings = {
 				CODE_SIGN_ENTITLEMENTS = "$(SRCROOT)/../kram-thumb/kram_thumb.entitlements";
-				CODE_SIGN_STYLE = Automatic;
-				DEVELOPMENT_TEAM = "";
+				CODE_SIGN_IDENTITY = "Developer ID Application: Alec Miller (LDJ95E4NS8)";
+				CODE_SIGN_STYLE = Manual;
+				DEVELOPMENT_TEAM = LDJ95E4NS8;
+				"DEVELOPMENT_TEAM[sdk=macosx*]" = LDJ95E4NS8;
 				INFOPLIST_FILE = "../kram-thumb/Info.plist";
 				LD_RUNPATH_SEARCH_PATHS = (
 					"$(inherited)",
@@ -769,6 +782,7 @@
 				);
 				PRODUCT_BUNDLE_IDENTIFIER = "com.hialec.kramv.kram-thumb";
 				PRODUCT_NAME = "$(TARGET_NAME)";
+				"PROVISIONING_PROFILE_SPECIFIER[sdk=macosx*]" = KramViewerThumbProvision;
 				SKIP_INSTALL = YES;
 			};
 			name = Debug;
@@ -777,8 +791,10 @@
 			isa = XCBuildConfiguration;
 			buildSettings = {
 				CODE_SIGN_ENTITLEMENTS = "$(SRCROOT)/../kram-thumb/kram_thumb.entitlements";
-				CODE_SIGN_STYLE = Automatic;
-				DEVELOPMENT_TEAM = "";
+				CODE_SIGN_IDENTITY = "Developer ID Application: Alec Miller (LDJ95E4NS8)";
+				CODE_SIGN_STYLE = Manual;
+				DEVELOPMENT_TEAM = LDJ95E4NS8;
+				"DEVELOPMENT_TEAM[sdk=macosx*]" = LDJ95E4NS8;
 				INFOPLIST_FILE = "../kram-thumb/Info.plist";
 				LD_RUNPATH_SEARCH_PATHS = (
 					"$(inherited)",
@@ -787,6 +803,7 @@
 				);
 				PRODUCT_BUNDLE_IDENTIFIER = "com.hialec.kramv.kram-thumb";
 				PRODUCT_NAME = "$(TARGET_NAME)";
+				"PROVISIONING_PROFILE_SPECIFIER[sdk=macosx*]" = KramViewerThumbProvision;
 				SKIP_INSTALL = YES;
 			};
 			name = Release;
@@ -795,8 +812,10 @@
 			isa = XCBuildConfiguration;
 			buildSettings = {
 				CODE_SIGN_ENTITLEMENTS = "$(SRCROOT)/../kram-preview/kram_preview.entitlements";
-				CODE_SIGN_STYLE = Automatic;
-				DEVELOPMENT_TEAM = "";
+				CODE_SIGN_IDENTITY = "Developer ID Application: Alec Miller (LDJ95E4NS8)";
+				CODE_SIGN_STYLE = Manual;
+				DEVELOPMENT_TEAM = LDJ95E4NS8;
+				"DEVELOPMENT_TEAM[sdk=macosx*]" = LDJ95E4NS8;
 				INFOPLIST_FILE = "../kram-preview/Info.plist";
 				LD_RUNPATH_SEARCH_PATHS = (
 					"$(inherited)",
@@ -805,6 +824,7 @@
 				);
 				PRODUCT_BUNDLE_IDENTIFIER = "com.hialec.kramv.kram-preview";
 				PRODUCT_NAME = "$(TARGET_NAME)";
+				"PROVISIONING_PROFILE_SPECIFIER[sdk=macosx*]" = KramViewerPreviewProvision;
 				SKIP_INSTALL = YES;
 			};
 			name = Debug;
@@ -813,8 +833,10 @@
 			isa = XCBuildConfiguration;
 			buildSettings = {
 				CODE_SIGN_ENTITLEMENTS = "$(SRCROOT)/../kram-preview/kram_preview.entitlements";
-				CODE_SIGN_STYLE = Automatic;
-				DEVELOPMENT_TEAM = "";
+				CODE_SIGN_IDENTITY = "Developer ID Application: Alec Miller (LDJ95E4NS8)";
+				CODE_SIGN_STYLE = Manual;
+				DEVELOPMENT_TEAM = LDJ95E4NS8;
+				"DEVELOPMENT_TEAM[sdk=macosx*]" = LDJ95E4NS8;
 				INFOPLIST_FILE = "../kram-preview/Info.plist";
 				LD_RUNPATH_SEARCH_PATHS = (
 					"$(inherited)",
@@ -823,6 +845,7 @@
 				);
 				PRODUCT_BUNDLE_IDENTIFIER = "com.hialec.kramv.kram-preview";
 				PRODUCT_NAME = "$(TARGET_NAME)";
+				"PROVISIONING_PROFILE_SPECIFIER[sdk=macosx*]" = KramViewerPreviewProvision;
 				SKIP_INSTALL = YES;
 			};
 			name = Release;
diff --git a/build2/vectormath.xcodeproj/project.pbxproj b/build2/vectormath.xcodeproj/project.pbxproj
index 8f528134..3b4487e0 100644
--- a/build2/vectormath.xcodeproj/project.pbxproj
+++ b/build2/vectormath.xcodeproj/project.pbxproj
@@ -201,7 +201,7 @@
 			baseConfigurationReference = 707D4C522CC41F3900729BE0 /* kram.xcconfig */;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
-				ARCHS = arm64;
+				ARCHS = "$(ARCHS_STANDARD)";
 				ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
 				CLANG_ANALYZER_NONNULL = YES;
 				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
@@ -265,7 +265,7 @@
 			baseConfigurationReference = 707D4C522CC41F3900729BE0 /* kram.xcconfig */;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
-				ARCHS = arm64;
+				ARCHS = "$(ARCHS_STANDARD)";
 				ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
 				CLANG_ANALYZER_NONNULL = YES;
 				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
diff --git a/hlslparser/hlslparser.xcodeproj/project.pbxproj b/hlslparser/hlslparser.xcodeproj/project.pbxproj
index b9444ce2..9a2e5bcf 100644
--- a/hlslparser/hlslparser.xcodeproj/project.pbxproj
+++ b/hlslparser/hlslparser.xcodeproj/project.pbxproj
@@ -174,6 +174,7 @@
 			isa = XCBuildConfiguration;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
+				ARCHS = arm64;
 				CLANG_ANALYZER_NONNULL = YES;
 				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
 				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
@@ -242,6 +243,7 @@
 			isa = XCBuildConfiguration;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
+				ARCHS = arm64;
 				CLANG_ANALYZER_NONNULL = YES;
 				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
 				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
@@ -302,7 +304,11 @@
 		702A2B6129A49DC8007D9A99 /* Debug */ = {
 			isa = XCBuildConfiguration;
 			buildSettings = {
-				CODE_SIGN_STYLE = Automatic;
+				CODE_SIGN_IDENTITY = "Developer ID Application: Alec Miller (LDJ95E4NS8)";
+				CODE_SIGN_STYLE = Manual;
+				DEVELOPMENT_TEAM = LDJ95E4NS8;
+				MACOSX_DEPLOYMENT_TARGET = 13.0;
+				PRODUCT_BUNDLE_IDENTIFIER = com.hialec.hlslparser;
 				PRODUCT_NAME = "$(TARGET_NAME)";
 			};
 			name = Debug;
@@ -310,7 +316,11 @@
 		702A2B6229A49DC8007D9A99 /* Release */ = {
 			isa = XCBuildConfiguration;
 			buildSettings = {
-				CODE_SIGN_STYLE = Automatic;
+				CODE_SIGN_IDENTITY = "Developer ID Application: Alec Miller (LDJ95E4NS8)";
+				CODE_SIGN_STYLE = Manual;
+				DEVELOPMENT_TEAM = LDJ95E4NS8;
+				MACOSX_DEPLOYMENT_TARGET = 13.0;
+				PRODUCT_BUNDLE_IDENTIFIER = com.hialec.hlslparser;
 				PRODUCT_NAME = "$(TARGET_NAME)";
 			};
 			name = Release;
diff --git a/kram-preview/kram_preview.entitlements b/kram-preview/kram_preview.entitlements
index 311b32bd..18aff0ce 100644
--- a/kram-preview/kram_preview.entitlements
+++ b/kram-preview/kram_preview.entitlements
@@ -3,7 +3,7 @@
 <plist version="1.0">
 <dict>
 	<key>com.apple.security.app-sandbox</key>
-	<false/>
+	<true/>
 	<key>com.apple.security.files.user-selected.read-only</key>
 	<true/>
 </dict>
diff --git a/kram-profile/kram-profile.xcodeproj/project.pbxproj b/kram-profile/kram-profile.xcodeproj/project.pbxproj
index 6a3953eb..fb29e9a0 100644
--- a/kram-profile/kram-profile.xcodeproj/project.pbxproj
+++ b/kram-profile/kram-profile.xcodeproj/project.pbxproj
@@ -258,6 +258,7 @@
 			isa = XCBuildConfiguration;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
+				ARCHS = arm64;
 				ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
 				CLANG_ANALYZER_GCD_PERFORMANCE = YES;
 				CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES;
@@ -330,6 +331,7 @@
 			isa = XCBuildConfiguration;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
+				ARCHS = arm64;
 				ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
 				CLANG_ANALYZER_GCD_PERFORMANCE = YES;
 				CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES;
@@ -397,15 +399,25 @@
 			buildSettings = {
 				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
 				ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
+				CLANG_CXX_STANDARD_LIBRARY_HARDENING = none;
 				CLANG_ENABLE_MODULES = YES;
 				CODE_SIGN_ENTITLEMENTS = "kram-profile/kram_profile.entitlements";
-				CODE_SIGN_STYLE = Automatic;
+				CODE_SIGN_IDENTITY = "Developer ID Application: Alec Miller (LDJ95E4NS8)";
+				"CODE_SIGN_IDENTITY[sdk=macosx*]" = "Developer ID Application";
+				CODE_SIGN_STYLE = Manual;
 				COMBINE_HIDPI_IMAGES = YES;
 				CURRENT_PROJECT_VERSION = 1;
 				DEVELOPMENT_ASSET_PATHS = "\"kram-profile/Preview Content\"";
+				DEVELOPMENT_TEAM = "";
+				"DEVELOPMENT_TEAM[sdk=macosx*]" = LDJ95E4NS8;
+				ENABLE_APP_SANDBOX = YES;
+				ENABLE_HARDENED_RUNTIME = NO;
 				ENABLE_PREVIEWS = YES;
+				ENABLE_USER_SELECTED_FILES = readonly;
 				GENERATE_INFOPLIST_FILE = YES;
 				INFOPLIST_FILE = "kram-profile/Info.plist";
+				INFOPLIST_KEY_CFBundleDisplayName = "Kram Profiler";
+				INFOPLIST_KEY_LSApplicationCategoryType = "public.app-category.developer-tools";
 				INFOPLIST_KEY_NSHumanReadableCopyright = "";
 				LD_RUNPATH_SEARCH_PATHS = (
 					"$(inherited)",
@@ -414,6 +426,8 @@
 				MARKETING_VERSION = 1.0;
 				PRODUCT_BUNDLE_IDENTIFIER = "com.hialec.kram-profile";
 				PRODUCT_NAME = "$(TARGET_NAME)";
+				PROVISIONING_PROFILE_SPECIFIER = "";
+				"PROVISIONING_PROFILE_SPECIFIER[sdk=macosx*]" = KramProfilerProvision;
 				SWIFT_EMIT_LOC_STRINGS = YES;
 				SWIFT_OBJC_BRIDGING_HEADER = "Source/kram-profile-Bridging-Header.h";
 				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
@@ -425,15 +439,25 @@
 			buildSettings = {
 				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
 				ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
+				CLANG_CXX_STANDARD_LIBRARY_HARDENING = fast;
 				CLANG_ENABLE_MODULES = YES;
 				CODE_SIGN_ENTITLEMENTS = "kram-profile/kram_profile.entitlements";
-				CODE_SIGN_STYLE = Automatic;
+				CODE_SIGN_IDENTITY = "Developer ID Application: Alec Miller (LDJ95E4NS8)";
+				"CODE_SIGN_IDENTITY[sdk=macosx*]" = "Developer ID Application";
+				CODE_SIGN_STYLE = Manual;
 				COMBINE_HIDPI_IMAGES = YES;
 				CURRENT_PROJECT_VERSION = 1;
 				DEVELOPMENT_ASSET_PATHS = "\"kram-profile/Preview Content\"";
+				DEVELOPMENT_TEAM = "";
+				"DEVELOPMENT_TEAM[sdk=macosx*]" = LDJ95E4NS8;
+				ENABLE_APP_SANDBOX = YES;
+				ENABLE_HARDENED_RUNTIME = YES;
 				ENABLE_PREVIEWS = YES;
+				ENABLE_USER_SELECTED_FILES = readonly;
 				GENERATE_INFOPLIST_FILE = YES;
 				INFOPLIST_FILE = "kram-profile/Info.plist";
+				INFOPLIST_KEY_CFBundleDisplayName = "Kram Profiler";
+				INFOPLIST_KEY_LSApplicationCategoryType = "public.app-category.developer-tools";
 				INFOPLIST_KEY_NSHumanReadableCopyright = "";
 				LD_RUNPATH_SEARCH_PATHS = (
 					"$(inherited)",
@@ -442,6 +466,8 @@
 				MARKETING_VERSION = 1.0;
 				PRODUCT_BUNDLE_IDENTIFIER = "com.hialec.kram-profile";
 				PRODUCT_NAME = "$(TARGET_NAME)";
+				PROVISIONING_PROFILE_SPECIFIER = "";
+				"PROVISIONING_PROFILE_SPECIFIER[sdk=macosx*]" = KramProfilerProvision;
 				SWIFT_EMIT_LOC_STRINGS = YES;
 				SWIFT_OBJC_BRIDGING_HEADER = "Source/kram-profile-Bridging-Header.h";
 			};
diff --git a/kram-thumb/kram_thumb.entitlements b/kram-thumb/kram_thumb.entitlements
index 311b32bd..18aff0ce 100644
--- a/kram-thumb/kram_thumb.entitlements
+++ b/kram-thumb/kram_thumb.entitlements
@@ -3,7 +3,7 @@
 <plist version="1.0">
 <dict>
 	<key>com.apple.security.app-sandbox</key>
-	<false/>
+	<true/>
 	<key>com.apple.security.files.user-selected.read-only</key>
 	<true/>
 </dict>
diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 6f5d757f..5c813f11 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -220,6 +220,11 @@ - (nonnull instancetype)initWithMetalKitView:(nonnull MTKView*)view
         _data = data;
         _device = view.device;
 
+        // 11gb on a 16gb machine.
+        // Represents the max size of a render encoder.
+        double kInvOneMB = 1.0 / (1024.0 * 1024.0);
+        KLOGI("Rendererr", "%0.3f mb", _device.recommendedMaxWorkingSetSize * kInvOneMB );
+        
         _loader = [KramLoader new];
         _loader.device = _device;
 
diff --git a/kramv/kramv.entitlements b/kramv/kramv.entitlements
index 311b32bd..18aff0ce 100644
--- a/kramv/kramv.entitlements
+++ b/kramv/kramv.entitlements
@@ -3,7 +3,7 @@
 <plist version="1.0">
 <dict>
 	<key>com.apple.security.app-sandbox</key>
-	<false/>
+	<true/>
 	<key>com.apple.security.files.user-selected.read-only</key>
 	<true/>
 </dict>
diff --git a/libkram/kram/KramThreadPool.cpp b/libkram/kram/KramThreadPool.cpp
index 46a772b8..c71f10d1 100644
--- a/libkram/kram/KramThreadPool.cpp
+++ b/libkram/kram/KramThreadPool.cpp
@@ -225,6 +225,7 @@ void Scheduler::scheduleJob(Job2& job) {
             _stats.jobsTotal++;
         }
         
+        // here the scheduler or random thread needs to wake a worker
         worker->_futex.notify_one();
     }
     else {
@@ -232,26 +233,33 @@ void Scheduler::scheduleJob(Job2& job) {
         worker->_queue.push(std::move(job));
         worker->incQueueSize();
         _stats.jobsTotal++;
+        
+        // the job is already awake and scheduling to its own queue
+        // so don't need to notify.
     }
 }
 
 void Scheduler::stop()
 {
     // has to be called on scheduler thread
-    KASSERT(getCurrentThread() == _schedulerThread);
+    // just don't call from a worker
+    //KASSERT(getCurrentThread() == _schedulerThread);
     
     if (_isStop)
         return;
     
     _isStop = true;
     
-    // have all threads wait on each other to finish
     for (uint32_t i = 0; i < _workers.size(); ++i) {
+        // wake it
         _workers[i]->_futex.notify_one();
+        
+        // wait on thread to end
         _threads[i].join();
         
-        // since had to use ptrs
+        // since had to use ptrs, delete them
         delete _workers[i];
+        _workers[i] = nullptr;
     }
 }
    
@@ -273,9 +281,9 @@ bool Worker::stealFromOtherQueues(Job2& job)
         // This should never visit caller Worker.
         KASSERT(worker != this);
         
-        // lots of expensive queue mutex locks below searching for jobs
+        // loop of expensive queue mutex locks below searching for jobs
         // use atomic queueSize per worker.  A little racy.
-//        if (worker->queueSize() == 0) { // _queueSize
+//        if (worker->queueSize() == 0) {
 //            continue;
 //        }
         
@@ -287,6 +295,8 @@ bool Worker::stealFromOtherQueues(Job2& job)
             
             SchedulerStats& stats = _scheduler->stats();
             stats.jobsExecuting++;
+            
+            // stop search, since returning a job
             found = true;
             break;
         }
diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index 5469bd5c..05c07f9a 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -392,7 +392,7 @@ static void setThreadPriority(std::thread::native_handle_type macroUnusedArg(han
     // There is a narrow range of offsets per qos.
     // QOS_CLASS_USER_INTERACTIVE: 38-47  -9
     // QOS_CLASS_USER_INITIATED:   32-37  -5
-    // QOS_CLASS_DEFAULTL          21-31 -10
+    // QOS_CLASS_DEFAULT:          21-31 -10
     // QOS_CLASS_UTILITY:           5-20 -15
     // QOS_CLASS_BACKGROUND:        0-4   -4
     
diff --git a/libkram/kram/TaskSystem.h b/libkram/kram/TaskSystem.h
index 1b16b7c6..6aa6b3a7 100644
--- a/libkram/kram/TaskSystem.h
+++ b/libkram/kram/TaskSystem.h
@@ -32,6 +32,7 @@ using mymutex = std::recursive_mutex;
 using mylock = std::unique_lock<mymutex>;
 using mycondition = std::condition_variable_any;
 
+// TOOO: can also have a priority_queue
 #define mydeque deque
 #define myfunction function
 

From 0f62a2c4edd93fde3ab0a2f7c38c02ebabd315c0 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 10 May 2025 07:28:26 -0700
Subject: [PATCH 895/901] kram - update copyright

---
 build2/kram.xcconfig                      | 2 +-
 kram-preview/KramPreviewViewController.h  | 2 +-
 kram-preview/KramPreviewViewController.mm | 2 +-
 kram-thumb/KramThumbnailProvider.h        | 2 +-
 kram-thumb/KramThumbnailProvider.mm       | 2 +-
 kramv/KramLoader.h                        | 2 +-
 kramv/KramLoader.mm                       | 2 +-
 kramv/KramRenderer.h                      | 2 +-
 kramv/KramRenderer.mm                     | 2 +-
 kramv/KramViewerBase.cpp                  | 2 +-
 kramv/KramViewerBase.h                    | 2 +-
 kramv/KramViewerMain.mm                   | 2 +-
 kramv/Shaders/KramShaders.h               | 2 +-
 kramv/Shaders/KramShaders.metal           | 2 +-
 libkram/kram/BlockedLinearAllocator.cpp   | 2 +-
 libkram/kram/BlockedLinearAllocator.h     | 2 +-
 libkram/kram/ImmutableString.cpp          | 2 +-
 libkram/kram/ImmutableString.h            | 2 +-
 libkram/kram/KTXImage.cpp                 | 2 +-
 libkram/kram/KTXImage.h                   | 2 +-
 libkram/kram/Kram.cpp                     | 2 +-
 libkram/kram/Kram.h                       | 2 +-
 libkram/kram/KramConfig.h                 | 2 +-
 libkram/kram/KramDDSHelper.cpp            | 2 +-
 libkram/kram/KramDDSHelper.h              | 2 +-
 libkram/kram/KramFileHelper.cpp           | 2 +-
 libkram/kram/KramFileHelper.h             | 2 +-
 libkram/kram/KramFmt.h                    | 2 +-
 libkram/kram/KramImage.cpp                | 2 +-
 libkram/kram/KramImage.h                  | 2 +-
 libkram/kram/KramImageInfo.cpp            | 2 +-
 libkram/kram/KramImageInfo.h              | 2 +-
 libkram/kram/KramLib.h                    | 2 +-
 libkram/kram/KramLog.cpp                  | 2 +-
 libkram/kram/KramLog.h                    | 2 +-
 libkram/kram/KramMipper.cpp               | 2 +-
 libkram/kram/KramMipper.h                 | 2 +-
 libkram/kram/KramMmapHelper.cpp           | 5 +++--
 libkram/kram/KramMmapHelper.h             | 6 +++---
 libkram/kram/KramSDFMipper.cpp            | 2 +-
 libkram/kram/KramSDFMipper.h              | 2 +-
 libkram/kram/KramTimer.cpp                | 2 +-
 libkram/kram/KramTimer.h                  | 2 +-
 libkram/vectormath/bounds234.h            | 2 +-
 libkram/vectormath/double234.cpp          | 2 +-
 libkram/vectormath/double234.h            | 2 +-
 libkram/vectormath/float234.cpp           | 2 +-
 libkram/vectormath/float234.h             | 2 +-
 libkram/vectormath/float4a.cpp            | 2 +-
 libkram/vectormath/float4a.h              | 2 +-
 libkram/vectormath/half234.h              | 2 +-
 libkram/vectormath/int234.h               | 2 +-
 libkram/vectormath/long234.h              | 2 +-
 libkram/vectormath/sse_mathfun.h          | 2 +-
 libkram/vectormath/vectormath234.cpp      | 2 +-
 libkram/vectormath/vectormath234.h        | 2 +-
 56 files changed, 60 insertions(+), 59 deletions(-)

diff --git a/build2/kram.xcconfig b/build2/kram.xcconfig
index 5e970962..c63effb2 100644
--- a/build2/kram.xcconfig
+++ b/build2/kram.xcconfig
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2024 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/kram-preview/KramPreviewViewController.h b/kram-preview/KramPreviewViewController.h
index 2bf9e439..80df48ea 100644
--- a/kram-preview/KramPreviewViewController.h
+++ b/kram-preview/KramPreviewViewController.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/kram-preview/KramPreviewViewController.mm b/kram-preview/KramPreviewViewController.mm
index 4fe3f82b..c03e7bc8 100644
--- a/kram-preview/KramPreviewViewController.mm
+++ b/kram-preview/KramPreviewViewController.mm
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/kram-thumb/KramThumbnailProvider.h b/kram-thumb/KramThumbnailProvider.h
index 17498b80..3730f045 100644
--- a/kram-thumb/KramThumbnailProvider.h
+++ b/kram-thumb/KramThumbnailProvider.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/kram-thumb/KramThumbnailProvider.mm b/kram-thumb/KramThumbnailProvider.mm
index 49138cf3..6daf720d 100644
--- a/kram-thumb/KramThumbnailProvider.mm
+++ b/kram-thumb/KramThumbnailProvider.mm
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/kramv/KramLoader.h b/kramv/KramLoader.h
index a47fc014..3b56d4eb 100644
--- a/kramv/KramLoader.h
+++ b/kramv/KramLoader.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm
index ed8e48af..b9b33426 100644
--- a/kramv/KramLoader.mm
+++ b/kramv/KramLoader.mm
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/kramv/KramRenderer.h b/kramv/KramRenderer.h
index 4c5a556a..adc72336 100644
--- a/kramv/KramRenderer.h
+++ b/kramv/KramRenderer.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 5c813f11..c5985f94 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index dc79db0e..74df253e 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index 92f93955..ddd9ef34 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm
index e6ab81ca..3ee1fd22 100644
--- a/kramv/KramViewerMain.mm
+++ b/kramv/KramViewerMain.mm
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/kramv/Shaders/KramShaders.h b/kramv/Shaders/KramShaders.h
index dbd63e60..bde752e5 100644
--- a/kramv/Shaders/KramShaders.h
+++ b/kramv/Shaders/KramShaders.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/kramv/Shaders/KramShaders.metal b/kramv/Shaders/KramShaders.metal
index 5671a781..3228c91c 100644
--- a/kramv/Shaders/KramShaders.metal
+++ b/kramv/Shaders/KramShaders.metal
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/BlockedLinearAllocator.cpp b/libkram/kram/BlockedLinearAllocator.cpp
index f945a958..1549235e 100644
--- a/libkram/kram/BlockedLinearAllocator.cpp
+++ b/libkram/kram/BlockedLinearAllocator.cpp
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/BlockedLinearAllocator.h b/libkram/kram/BlockedLinearAllocator.h
index 8c7376e3..abfc37ec 100644
--- a/libkram/kram/BlockedLinearAllocator.h
+++ b/libkram/kram/BlockedLinearAllocator.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/ImmutableString.cpp b/libkram/kram/ImmutableString.cpp
index 082f1f40..a97f48db 100644
--- a/libkram/kram/ImmutableString.cpp
+++ b/libkram/kram/ImmutableString.cpp
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/ImmutableString.h b/libkram/kram/ImmutableString.h
index c7d81344..0bd44a42 100644
--- a/libkram/kram/ImmutableString.h
+++ b/libkram/kram/ImmutableString.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp
index b170feef..be48245e 100644
--- a/libkram/kram/KTXImage.cpp
+++ b/libkram/kram/KTXImage.cpp
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h
index fadaabf2..1b0b77a7 100644
--- a/libkram/kram/KTXImage.h
+++ b/libkram/kram/KTXImage.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 5e34a5f1..7729f645 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/Kram.h b/libkram/kram/Kram.h
index ab775488..a48f8f1c 100644
--- a/libkram/kram/Kram.h
+++ b/libkram/kram/Kram.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index f6240a39..fc00f08a 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramDDSHelper.cpp b/libkram/kram/KramDDSHelper.cpp
index 30b76c77..c00b1424 100644
--- a/libkram/kram/KramDDSHelper.cpp
+++ b/libkram/kram/KramDDSHelper.cpp
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramDDSHelper.h b/libkram/kram/KramDDSHelper.h
index b2ce5c7d..6f25f273 100644
--- a/libkram/kram/KramDDSHelper.h
+++ b/libkram/kram/KramDDSHelper.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramFileHelper.cpp b/libkram/kram/KramFileHelper.cpp
index b6d43739..fbc52f1a 100644
--- a/libkram/kram/KramFileHelper.cpp
+++ b/libkram/kram/KramFileHelper.cpp
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramFileHelper.h b/libkram/kram/KramFileHelper.h
index 32b0bb2a..9d076937 100644
--- a/libkram/kram/KramFileHelper.h
+++ b/libkram/kram/KramFileHelper.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramFmt.h b/libkram/kram/KramFmt.h
index a43f5ca6..45d0022a 100644
--- a/libkram/kram/KramFmt.h
+++ b/libkram/kram/KramFmt.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp
index 06a39b0f..954315c7 100644
--- a/libkram/kram/KramImage.cpp
+++ b/libkram/kram/KramImage.cpp
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramImage.h b/libkram/kram/KramImage.h
index eed20426..05d8eb02 100644
--- a/libkram/kram/KramImage.h
+++ b/libkram/kram/KramImage.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp
index 428facf0..cc8fdb1c 100644
--- a/libkram/kram/KramImageInfo.cpp
+++ b/libkram/kram/KramImageInfo.cpp
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramImageInfo.h b/libkram/kram/KramImageInfo.h
index 7fef9cbe..5ced8c8a 100644
--- a/libkram/kram/KramImageInfo.h
+++ b/libkram/kram/KramImageInfo.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramLib.h b/libkram/kram/KramLib.h
index 6739d68b..19251c3c 100644
--- a/libkram/kram/KramLib.h
+++ b/libkram/kram/KramLib.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp
index 17a8f14c..fb0ccb7f 100644
--- a/libkram/kram/KramLog.cpp
+++ b/libkram/kram/KramLog.cpp
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramLog.h b/libkram/kram/KramLog.h
index b2a555c4..355c12fb 100644
--- a/libkram/kram/KramLog.h
+++ b/libkram/kram/KramLog.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramMipper.cpp b/libkram/kram/KramMipper.cpp
index 9ec89b38..dbe74b02 100644
--- a/libkram/kram/KramMipper.cpp
+++ b/libkram/kram/KramMipper.cpp
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramMipper.h b/libkram/kram/KramMipper.h
index 599004c0..e06d227b 100644
--- a/libkram/kram/KramMipper.h
+++ b/libkram/kram/KramMipper.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramMmapHelper.cpp b/libkram/kram/KramMmapHelper.cpp
index b32ad5f1..e53d0b8d 100644
--- a/libkram/kram/KramMmapHelper.cpp
+++ b/libkram/kram/KramMmapHelper.cpp
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
@@ -18,6 +18,7 @@
 #endif
 
 MmapHelper::MmapHelper() {}
+
 MmapHelper::MmapHelper(MmapHelper &&rhs)
 {
     addr = rhs.addr;
@@ -30,7 +31,7 @@ MmapHelper::MmapHelper(MmapHelper &&rhs)
 
 MmapHelper::~MmapHelper() { close(); }
 
-bool MmapHelper::open(const char *filename)
+bool MmapHelper::open(const char* filename)
 {
     if (addr) {
         return false;
diff --git a/libkram/kram/KramMmapHelper.h b/libkram/kram/KramMmapHelper.h
index 125a4a6f..7f986018 100644
--- a/libkram/kram/KramMmapHelper.h
+++ b/libkram/kram/KramMmapHelper.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
@@ -19,10 +19,10 @@ class MmapHelper {
     bool open(const char *filename);
     void close();
 
-    const uint8_t *data() { return addr; }
+    const uint8_t* data() { return addr; }
     size_t dataLength() { return length; }
 
 private:
-    const uint8_t *addr = nullptr;
+    const uint8_t* addr = nullptr;
     size_t length = 0;
 };
diff --git a/libkram/kram/KramSDFMipper.cpp b/libkram/kram/KramSDFMipper.cpp
index 030429d3..4b2fa977 100644
--- a/libkram/kram/KramSDFMipper.cpp
+++ b/libkram/kram/KramSDFMipper.cpp
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramSDFMipper.h b/libkram/kram/KramSDFMipper.h
index 1143a726..f724ef73 100644
--- a/libkram/kram/KramSDFMipper.h
+++ b/libkram/kram/KramSDFMipper.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramTimer.cpp b/libkram/kram/KramTimer.cpp
index 79cafc38..01b9d566 100644
--- a/libkram/kram/KramTimer.cpp
+++ b/libkram/kram/KramTimer.cpp
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/kram/KramTimer.h b/libkram/kram/KramTimer.h
index ade0ecd0..c2e3bf44 100644
--- a/libkram/kram/KramTimer.h
+++ b/libkram/kram/KramTimer.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/vectormath/bounds234.h b/libkram/vectormath/bounds234.h
index fcea92c1..cec15486 100644
--- a/libkram/vectormath/bounds234.h
+++ b/libkram/vectormath/bounds234.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2024 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/vectormath/double234.cpp b/libkram/vectormath/double234.cpp
index 2bfb58f3..0efa841e 100644
--- a/libkram/vectormath/double234.cpp
+++ b/libkram/vectormath/double234.cpp
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2024 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 #include "vectormath234.h"
diff --git a/libkram/vectormath/double234.h b/libkram/vectormath/double234.h
index 4ac2432c..4d899595 100644
--- a/libkram/vectormath/double234.h
+++ b/libkram/vectormath/double234.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2024 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/vectormath/float234.cpp b/libkram/vectormath/float234.cpp
index 7ef28070..140dca1d 100644
--- a/libkram/vectormath/float234.cpp
+++ b/libkram/vectormath/float234.cpp
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2024 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/vectormath/float234.h b/libkram/vectormath/float234.h
index 62e9e4f7..ec07bb64 100644
--- a/libkram/vectormath/float234.h
+++ b/libkram/vectormath/float234.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2024 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/vectormath/float4a.cpp b/libkram/vectormath/float4a.cpp
index 6fcb8793..e89155dd 100644
--- a/libkram/vectormath/float4a.cpp
+++ b/libkram/vectormath/float4a.cpp
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/vectormath/float4a.h b/libkram/vectormath/float4a.h
index abcb44e8..946bb926 100644
--- a/libkram/vectormath/float4a.h
+++ b/libkram/vectormath/float4a.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/vectormath/half234.h b/libkram/vectormath/half234.h
index 3ee71def..c8ccdeea 100644
--- a/libkram/vectormath/half234.h
+++ b/libkram/vectormath/half234.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2024 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/vectormath/int234.h b/libkram/vectormath/int234.h
index 76098b58..767f3ec3 100644
--- a/libkram/vectormath/int234.h
+++ b/libkram/vectormath/int234.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2024 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/vectormath/long234.h b/libkram/vectormath/long234.h
index 060bac38..77d82712 100644
--- a/libkram/vectormath/long234.h
+++ b/libkram/vectormath/long234.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2024 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/vectormath/sse_mathfun.h b/libkram/vectormath/sse_mathfun.h
index 1b068ae8..708c2146 100644
--- a/libkram/vectormath/sse_mathfun.h
+++ b/libkram/vectormath/sse_mathfun.h
@@ -30,7 +30,7 @@
 */
 
 // Mods to this;
-// kram - Copyright 2020-2024 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 
diff --git a/libkram/vectormath/vectormath234.cpp b/libkram/vectormath/vectormath234.cpp
index 39e3a09f..ce8b5c21 100644
--- a/libkram/vectormath/vectormath234.cpp
+++ b/libkram/vectormath/vectormath234.cpp
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2024 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 #include "vectormath234.h"
diff --git a/libkram/vectormath/vectormath234.h b/libkram/vectormath/vectormath234.h
index 0df779dd..caa200ff 100644
--- a/libkram/vectormath/vectormath234.h
+++ b/libkram/vectormath/vectormath234.h
@@ -1,4 +1,4 @@
-// kram - Copyright 2020-2024 by Alec Miller. - MIT License
+// kram - Copyright 2020-2025 by Alec Miller. - MIT License
 // The license and copyright notice shall be included
 // in all copies or substantial portions of the Software.
 

From 77565a6db9591c90784b5862704bc7e54404ddd3 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 10 May 2025 07:34:01 -0700
Subject: [PATCH 896/901] kram - use w+b consistently

---
 libkram/kram/Kram.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 7729f645..5afd9069 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -898,7 +898,7 @@ bool SavePNG(Image& image, const char* filename)
     }
 
     FileHelper fileHelper;
-    if (!fileHelper.open(filename, "wb+")) {
+    if (!fileHelper.open(filename, "w+b")) {
         return false;
     }
 
@@ -1967,7 +1967,7 @@ static int32_t kramAppInfo(vector<const char*>& args)
 
     FileHelper dstFileHelper;
     if (!dstFilename.empty()) {
-        if (!dstFileHelper.open(dstFilename.c_str(), "wb+")) {
+        if (!dstFileHelper.open(dstFilename.c_str(), "w+b")) {
             KLOGE("Kram", "info couldn't open output file");
             return -1;
         }

From fd9bcaf7b6942049a4f4d4884f05a73195981dd4 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 23 May 2025 19:34:19 -0700
Subject: [PATCH 897/901] kram - add FileIO, add index delta encode/decode

---
 build2/kram.xcodeproj/project.pbxproj |   8 +
 libkram/kram/KramFileIO.cpp           | 151 ++++++++++++++
 libkram/kram/KramFileIO.h             | 169 ++++++++++++++++
 libkram/vectormath/README.md          |   2 +-
 libkram/vectormath/vectormath234.cpp  | 277 +++++++++++++++++++++-----
 libkram/vectormath/vectormath234.h    |  58 +++++-
 scripts/simdk.py                      |  18 ++
 7 files changed, 624 insertions(+), 59 deletions(-)
 create mode 100644 libkram/kram/KramFileIO.cpp
 create mode 100644 libkram/kram/KramFileIO.h

diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj
index 6957fc78..26ee08f5 100644
--- a/build2/kram.xcodeproj/project.pbxproj
+++ b/build2/kram.xcodeproj/project.pbxproj
@@ -8,6 +8,8 @@
 
 /* Begin PBXBuildFile section */
 		702E0DB62CA10BC100B652B7 /* astcenc_mathlib.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB327DDDBCC00D0B9E1 /* astcenc_mathlib.cpp */; };
+		706178192DE16211001545E1 /* KramFileIO.h in Headers */ = {isa = PBXBuildFile; fileRef = 706178172DE16211001545E1 /* KramFileIO.h */; };
+		7061781A2DE16211001545E1 /* KramFileIO.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706178182DE16211001545E1 /* KramFileIO.cpp */; };
 		706EEF7F26D1595D001C950E /* EtcBlock4x4Encoding_RGB8.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDAA26D1583E001C950E /* EtcBlock4x4Encoding_RGB8.cpp */; };
 		706EEF8026D1595D001C950E /* EtcImage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDAC26D1583E001C950E /* EtcImage.cpp */; };
 		706EEF8126D1595D001C950E /* EtcDifferentialTrys.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDAF26D1583E001C950E /* EtcDifferentialTrys.cpp */; };
@@ -207,6 +209,8 @@
 /* End PBXBuildFile section */
 
 /* Begin PBXFileReference section */
+		706178172DE16211001545E1 /* KramFileIO.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KramFileIO.h; sourceTree = "<group>"; };
+		706178182DE16211001545E1 /* KramFileIO.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = KramFileIO.cpp; sourceTree = "<group>"; };
 		706ECDDE26D1577A001C950E /* libkram.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libkram.a; sourceTree = BUILT_PRODUCTS_DIR; };
 		706EEDAA26D1583E001C950E /* EtcBlock4x4Encoding_RGB8.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = EtcBlock4x4Encoding_RGB8.cpp; sourceTree = "<group>"; };
 		706EEDAB26D1583E001C950E /* EtcErrorMetric.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = EtcErrorMetric.h; sourceTree = "<group>"; };
@@ -803,6 +807,8 @@
 				706EEE3526D1583F001C950E /* Kram.cpp */,
 				706EEE3626D1583F001C950E /* KramFileHelper.h */,
 				706EEE2126D1583F001C950E /* KramFileHelper.cpp */,
+				706178172DE16211001545E1 /* KramFileIO.h */,
+				706178182DE16211001545E1 /* KramFileIO.cpp */,
 				706EEE3726D1583F001C950E /* KramMipper.h */,
 				706EEE1C26D1583F001C950E /* KramMipper.cpp */,
 				706EEE2D26D1583F001C950E /* win_mmap.h */,
@@ -1262,6 +1268,7 @@
 				709B8D2D28D7BCAD0081BD1F /* ostream.h in Headers */,
 				706EF01B26D15985001C950E /* lodepng.h in Headers */,
 				709B8D4928D7BCAD0081BD1F /* format.h in Headers */,
+				706178192DE16211001545E1 /* KramFileIO.h in Headers */,
 				70D222F82ADAFA1500B9EA23 /* dlmalloc.h in Headers */,
 				706EF01C26D15985001C950E /* tmpfileplus.h in Headers */,
 				709B8D3328D7BCAD0081BD1F /* xchar.h in Headers */,
@@ -1389,6 +1396,7 @@
 				706EEFB426D1595D001C950E /* KramMmapHelper.cpp in Sources */,
 				709B8D3928D7BCAD0081BD1F /* format.cpp in Sources */,
 				70D222DE2AD2132300B9EA23 /* ImmutableString.cpp in Sources */,
+				7061781A2DE16211001545E1 /* KramFileIO.cpp in Sources */,
 				70871DCB27DDDBCD00D0B9E1 /* astcenc_image.cpp in Sources */,
 				706EFF7326D34740001C950E /* thread_support.cpp in Sources */,
 				706EEFB626D1595D001C950E /* Kram.cpp in Sources */,
diff --git a/libkram/kram/KramFileIO.cpp b/libkram/kram/KramFileIO.cpp
new file mode 100644
index 00000000..3a8afa54
--- /dev/null
+++ b/libkram/kram/KramFileIO.cpp
@@ -0,0 +1,151 @@
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
+
+#include "KramFileIO.h"
+
+//#include <stdio.h>
+//#include "KramFileHelper.h"
+
+//#include <algorithm>
+
+// TODO: move to common header
+#define nl "\n"
+
+namespace kram {
+using namespace STL_NAMESPACE;
+using namespace SIMD_NAMESPACE;
+
+void FileIO::writePad(int paddingSize)
+{
+    if (paddingSize <= 1) {
+        return;
+    }
+
+    constexpr int maxPaddingSize = 16;
+    constexpr uint8_t padding[maxPaddingSize] = {0};
+
+    paddingSize = std::max(paddingSize, maxPaddingSize);
+
+    size_t dataSize = tell();
+
+    // pad to 16 byte alignment
+    size_t valuePadding = (paddingSize - 1) - ((dataSize + (paddingSize - 1)) % paddingSize);
+
+    // write padding out
+    writeArray8u(padding, (int)valuePadding);
+}
+
+void FileIO::readPad(int paddingSize)
+{
+    if (paddingSize <= 1) {
+        return;
+    }
+    constexpr int maxPaddingSize = 16;
+    uint8_t padding[maxPaddingSize] = {0};
+
+    paddingSize = std::max(paddingSize, maxPaddingSize);
+
+    size_t dataSize = tell();
+
+    // pad to paddingSize
+    size_t valuePadding = (paddingSize - 1) - ((dataSize + (paddingSize - 1)) % paddingSize);
+
+    // skip padding
+    readArray8u(padding, (int)valuePadding);
+}
+
+int FileIO::tell()
+{
+    if (isFile() && fp) {
+        return (int)ftell(fp);
+    }
+    else if (isData() && _data) {
+        return dataLocation;
+    }
+    else if (isMemory() && mem) {
+        return dataLocation;
+    }
+    else {
+        KASSERT(false);
+        return 0;
+    }
+}
+void FileIO::seek(int tell_)
+{
+    if (tell_ < 0) {
+        KASSERT(false);
+        return;
+    }
+
+    if (isFile() && fp) {
+        fseek(fp, (size_t)tell_, SEEK_SET);
+    }
+    else if (isData() && _data) {
+        dataLocation = STL_NAMESPACE::clamp(tell_, 0, dataLength);
+    }
+    else if (isMemory() && mem) {
+        dataLocation = STL_NAMESPACE::clamp(tell_, 0, dataLength);
+    }
+    else {
+        KASSERT(false);
+    }
+}
+
+void FileIO::read(void* data_, int size, int count)
+{
+    size_t numberOfBytes = size * count;
+    if (isFile() && fp) {
+        fread(data_, 1, numberOfBytes, fp);
+    }
+    else if (isData() && _data) {
+        if (dataLocation + numberOfBytes <= dataLength) {
+            memcpy(data_, _data + dataLocation, numberOfBytes);
+            dataLocation += numberOfBytes;
+        }
+        else {
+            KASSERT(false);
+        }
+    }
+    else if (isMemory() && mem) {
+        if (dataLocation + numberOfBytes <= dataLength) {
+            memcpy(data_, _data + dataLocation, numberOfBytes);
+            dataLocation += numberOfBytes;
+        }
+    }
+}
+
+void FileIO::write(const void* data_, int size, int count)
+{
+    if (_isReadOnly) {
+        KASSERT(false);
+        return;
+    }
+
+    int numberOfBytes = size * count;
+    if (isFile() && fp) {
+        fwrite(data_, 1, numberOfBytes, fp);
+    }
+    else if (isData() && _data) {
+        if (dataLocation + numberOfBytes <= dataLength) {
+            memcpy(const_cast<uint8_t*>(_data) + dataLocation, data_, numberOfBytes);
+            dataLocation += numberOfBytes;
+        }
+        else {
+            KASSERT(false);
+        }
+    }
+    else if (isMemory() && mem) {
+        int totalBytes = dataLocation + numberOfBytes;
+        if (totalBytes <= dataLength) {
+            mem->resize(totalBytes);
+            _data = mem->data();
+            dataLength = totalBytes;
+        }
+
+        memcpy(const_cast<uint8_t*>(_data) + dataLocation, data_, numberOfBytes);
+        dataLocation += numberOfBytes;
+    }
+}
+
+} //namespace kram
diff --git a/libkram/kram/KramFileIO.h b/libkram/kram/KramFileIO.h
new file mode 100644
index 00000000..3d880d25
--- /dev/null
+++ b/libkram/kram/KramFileIO.h
@@ -0,0 +1,169 @@
+// kram - Copyright 2020-2023 by Alec Miller. - MIT License
+// The license and copyright notice shall be included
+// in all copies or substantial portions of the Software.
+
+#pragma once
+
+#include <stdint.h>
+#include <stdio.h> // for FILE
+
+namespace kram {
+using namespace STL_NAMESPACE;
+using namespace SIMD_NAMESPACE;
+
+// Unifies binary reads/writes from/to a mmap, buffer, or file pointer.
+struct FileIO {
+private:
+    FILE* fp = nullptr;
+
+    // can point mmap to this
+    const uint8_t* _data = nullptr;
+    int dataLength = 0;
+    int dataLocation = 0;
+
+    bool _isReadOnly = false;
+    bool _isResizeable = false;
+
+    // dynamic vector
+    vector<uint8_t>* mem = nullptr;
+
+public:
+    // FileIO doesn't deal with lifetime of the incoming data
+    // may eventually have helpers return a FileIO object
+
+    // read/write to file
+    FileIO(FILE* fp_, bool isReadOnly = false)
+        : fp(fp_), _isReadOnly(isReadOnly), _isResizeable(!isReadOnly)
+    {
+    }
+
+    // fixed data area for reads/writes, reads are for mmap
+    FileIO(const uint8_t* data_, int dataLength_)
+        : _data(data_), dataLength(dataLength_), _isReadOnly(true)
+    {
+    }
+    FileIO(uint8_t* data_, int dataLength_)
+        : _data(data_), dataLength(dataLength_), _isReadOnly(false)
+    {
+    }
+
+    // read/write and resizable memory
+    FileIO(const vector<uint8_t>* mem_)
+        : _data(mem_->data()), dataLength((int)mem_->size()), _isReadOnly(false), _isResizeable(true)
+    {
+    }
+    FileIO(vector<uint8_t>* mem_)
+        : mem(mem_), dataLength((int)mem_->size()), _isReadOnly(false), _isResizeable(true)
+    {
+    }
+
+    bool isFile() const { return fp != nullptr; }
+    bool isData() const { return _data != nullptr; }
+    bool isMemory() const { return mem != nullptr; }
+
+    void writeArray32u(const uint32_t* data, int count) { write(data, sizeof(uint32_t), count); }
+    void writeArray16u(const uint16_t* data, int count) { write(data, sizeof(uint16_t), count); }
+    void writeArray8u(const uint8_t* data, int count) { write(data, sizeof(uint8_t), count); }
+
+    void writeArray32i(const int32_t* data, int count) { write(data, sizeof(int32_t), count); }
+    void writeArray16i(const int16_t* data, int count) { write(data, sizeof(int16_t), count); }
+    void writeArray8i(const int8_t* data, int count) { write(data, sizeof(int8_t), count); }
+
+    // API has to be explicit on writes about type - signed vs. unsigned due to promotion
+    // could use const & instead but then can't enforce types written out
+    // might switch to writeArray8
+    void write32u(uint32_t data) { writeArray32u(&data, 1); }
+    void write16u(uint16_t data) { writeArray16u(&data, 1); }
+    void write8u(uint8_t data) { writeArray8u(&data, 1); }
+
+    void write32i(int32_t data) { writeArray32i(&data, 1); }
+    void write16i(int16_t data) { writeArray16i(&data, 1); }
+    void write8i(int8_t data) { writeArray8i(&data, 1); }
+
+    void writeArray32f(const float* data, int count) { write(data, sizeof(float), count); }
+    void write32f(float data) { writeArray32f(&data, 1); }
+
+    void writePad(int paddingSize);
+
+    void readPad(int paddingSize);
+
+    // simd too?
+#if USE_SIMDLIB
+#if SIMD_FLOAT
+    void write32fx2(float2 v) { writeArray32fx2(&v, 1); }
+    void write32fx3(float3 v) { writeArray32fx3(&v, 1); }
+    void write32fx4(float4 v) { writeArray32fx4(&v, 1); }
+    
+    void writeArray32fx2(const float2* v, int count) { writeArray32f((const float*)v, 2*count); }
+    void writeArray32fx3(const float3p* v, int count) { writeArray32f((const float*)v, 3); }
+    void writeArray32fx4(const float4* v, int count) { writeArray32f((const float*)v, 4*count); }
+    
+    // TODO: add read calls
+    // TODO: handle float3 to float3p
+#endif
+    
+#if SIMD_INT
+    void write32ix2(int2 v) { writeArray32ix2(&v, 1); }
+    void write32ix3(int3 v) { writeArray32ix3(&v, 1); }
+    void write32ix4(int4 v) { writeArray32ix4(&v, 1); }
+    
+    void writeArray32ix2(const int2* v, int count) { writeArray32i((const int32_t*)v, 2*count); }
+    void writeArray32ix3(const int3p* v, int count) { writeArray32i((const int32_t*)v, 3*count); }
+    void writeArray32ix4(const int4* v, int count) { writeArray32i((const int32_t*)v, 4*count); }
+    
+#endif
+ 
+#if SIMD_SHORT
+    void write16ix2(short2 v) { writeArray16ix2(&v, 1); }
+    void write16ix3(short3 v) { writeArray16ix3(&v, 1); }
+    void write16ix4(short4 v) { writeArray16ix4(&v, 1); }
+    
+    void writeArray16ix2(const short2* v, int count) { writeArray16i((const short*)v, 2*count); }
+    void writeArray16ix3(const short3p* v, int count) { writeArray16i((const short*)v, 3*count); }
+    void writeArray16ix4(const short4* v, int count) { writeArray16i((const short*)v, 4*count); }
+#endif
+
+#if SIMD_CHAR
+    void write8ix2(char2 v) { writeArray8ix2(&v, 1); }
+    void write8ix3(char3 v) { writeArray8ix3(&v, 1); }
+    void write8ix4(char4 v) { writeArray8ix4(&v, 1); }
+    
+    void writeArray8ix2(const char2* v, int count) { writeArray8i((const int8_t*)v, 2*count); }
+    void writeArray8ix3(const char3p* v, int count) { writeArray8i((const int8_t*)v, 3*count); }
+    void writeArray8ix4(const char4* v, int count) { writeArray8i((const int8_t*)v, 4*count); }
+#endif
+#endif
+    
+    void readArray32f(float* data, int count) { read(data, sizeof(float), count); }
+    void read32f(float& data) { readArray32f(&data, 1); }
+
+    void readArray32u(uint32_t* data, int count) { read(data, sizeof(uint32_t), count); }
+    void readArray16u(uint16_t* data, int count) { read(data, sizeof(uint16_t), count); }
+    void readArray8u(uint8_t* data, int count) { read(data, sizeof(uint8_t), count); }
+
+    void read32u(uint32_t& data) { readArray32u(&data, 1); }
+    void read16u(uint16_t& data) { readArray16u(&data, 1); }
+    void read8u(uint8_t& data) { readArray8u(&data, 1); }
+
+    void readArray32i(int32_t* data, int count) { read(data, sizeof(int32_t), count); }
+    void readArray16i(int16_t* data, int count) { read(data, sizeof(int16_t), count); }
+    void readArray8i(int8_t* data, int count) { read(data, sizeof(int8_t), count); }
+
+    void read32i(int32_t& data) { readArray32i(&data, 1); }
+    void read16i(int16_t& data) { readArray16i(&data, 1); }
+    void read8i(int8_t& data) { readArray8i(&data, 1); }
+
+    // seek/tell
+    int tell();
+    void seek(int tell_);
+
+private:
+    // binary reads/writes
+    void read(void* data_, int size, int count);
+    void write(const void* data_, int size, int count);
+};
+
+// to better distinguish mmap/buffer io
+using DataIO = FileIO;
+
+} //namespace ba
diff --git a/libkram/vectormath/README.md b/libkram/vectormath/README.md
index 9a4b7279..2f98f71a 100644
--- a/libkram/vectormath/README.md
+++ b/libkram/vectormath/README.md
@@ -17,7 +17,7 @@ float4 v = 1.0f.          v = 1,1,1,1
 
 Matrices are 2x2, 3x3, 3x4, and 4x4 column only.  Matrices have a C++ type with operators and calls.  Chop out with defines float, double, half, but keep int for the conditional tests.   Easy to add more types with the macros - u/char, u/long, u/short. 
 
-I gutted the arrmv7 stuff from sse2neon.h so that's readable.  But this is only needed for an _mm_shuffle_ps.  Updated sse_mathfun for the cos/sin/log ops, but it's currently only reference fp32 SSE.  I added the fp16 <-> fp32 calls, since that's all Android has.  
+I gutted the armv7 stuff from sse2neon.h so that's readable.  But this is only needed for an _mm_shuffle_ps.  Updated sse_mathfun for the cos/sin/log ops, but it's currently only reference fp32 SSE.  I added the fp16 <-> fp32 calls, since that's all Android has.  
 
 Apple Accelerate has similar calls and structs.  The lib holds the optimized calls for sin, cos, log, inverse, but you only get them if you're on a new enough iOS/macOS.   And that api is so much code, that for some things it's not using the best methods.  
 
diff --git a/libkram/vectormath/vectormath234.cpp b/libkram/vectormath/vectormath234.cpp
index ce8b5c21..9a6a142a 100644
--- a/libkram/vectormath/vectormath234.cpp
+++ b/libkram/vectormath/vectormath234.cpp
@@ -159,7 +159,7 @@ void TestCalls()
 #if SIMD_FLOAT
     float4a va = 0;
     float4p vp = (float)1;
-
+    
     va = vp;
     vp = va;
 #endif
@@ -176,22 +176,22 @@ inline string format(const char* format, ...) __printflike(1, 2);
 inline string format(const char* format, ...)
 {
     string str;
-
+    
     va_list args;
     va_start(args, format);
-
+    
     // format once to get length (without NULL at end)
     va_list argsCopy;
     va_copy(argsCopy, args);
     int32_t len = vsnprintf(NULL, 0, format, argsCopy);
     va_end(argsCopy);
-
+    
     // replace string
     str.resize(len, 0);
     vsnprintf((char*)str.c_str(), len + 1, format, args);
-
+    
     va_end(args);
-
+    
     return str;
 }
 
@@ -346,20 +346,20 @@ string vecf::str(long4 v) const
 string vecf::simd_configs() const
 {
     string s;
-
+    
 #define FMT_CONFIG(val) s += format("%s: %d\n", #val, val);
-
+    
     FMT_CONFIG(SIMD_SSE);
     FMT_CONFIG(SIMD_NEON);
-
+    
 #if SIMD_SSE
     bool hasSSE42 = false;
     bool hasAVX = false;
     bool hasAVX2 = false;
-
+    
     bool hasF16C = false;
     bool hasFMA = false;
-
+    
 #if SIMD_SSE
     hasSSE42 = true;
 #endif
@@ -369,51 +369,51 @@ string vecf::simd_configs() const
 #if SIMD_AVX2
     hasAVX2 = true;
 #endif
-
+    
     // TODO: AVX-512 flags (combine into one?)
     // (__AVX512F__) && (__AVX512DQ__) && (__AVX512CD__) && (__AVX512BW__) && (__AVX512VL__) && (__AVX512VBMI2__)
-
+    
 #ifdef __F16C__
     hasF16C = true;
 #endif
 #ifdef __FMA__
     hasFMA = true;
 #endif
-
+    
     if (hasAVX2)
         s += format("%s: %d\n", "AVX2 ", hasAVX2);
     else if (hasAVX)
         s += format("%s: %d\n", "AVX  ", hasAVX);
     else if (hasSSE42)
         s += format("%s: %d\n", "SSE42 ", hasSSE42);
-
+    
     s += format("%s: %d\n", "F16C  ", hasF16C);
     s += format("%s: %d\n", "FMA   ", hasFMA);
-
+    
     // fp-contract, etc ?
     // CRC (may not be worth it)
-
+    
 #endif
-
+    
 #if SIMD_NEON
     // any neon setting, arm64 version
     // __ARM_VFPV4__
     // CRC (may not be worth it)
-
+    
 #endif
-
+    
     FMT_CONFIG(SIMD_FLOAT_EXT);
     FMT_CONFIG(SIMD_HALF_FLOAT16);
 #if SIMD_HALF
     FMT_CONFIG(SIMD_HALF4_ONLY);
 #endif
-
+    
     FMT_SEP();
-
+    
     FMT_CONFIG(SIMD_CMATH_MATH);
     FMT_CONFIG(SIMD_ACCELERATE_MATH);
 #if SIMD_ACCELERATE_MATH
-// Dump the min version. This is supposed to control SIMD_LIBRARY_VERSION
+    // Dump the min version. This is supposed to control SIMD_LIBRARY_VERSION
 #if __APPLE__
 #if TARGET_OS_OSX
     FMT_CONFIG(__MAC_OS_X_VERSION_MIN_REQUIRED);
@@ -423,89 +423,89 @@ string vecf::simd_configs() const
     FMT_CONFIG(__IPHONE_OS_VERSION_MIN_REQUIRED);
 #endif
 #endif
-
+    
     FMT_CONFIG(SIMD_LIBRARY_VERSION); // lib based on min os target
     FMT_CONFIG(SIMD_CURRENT_LIBRARY_VERSION); // max lib based on sdk
     FMT_CONFIG(SIMD_LIBRARY_VERSION_TEST);
     FMT_CONFIG(SIMD_ACCELERATE_MATH_NAMES);
 #endif
-
+    
     FMT_SEP();
-
+    
     FMT_CONFIG(SIMD_HALF);
     FMT_CONFIG(SIMD_FLOAT);
     FMT_CONFIG(SIMD_DOUBLE);
-
+    
     FMT_CONFIG(SIMD_INT);
     FMT_CONFIG(SIMD_LONG);
-
+    
     // don't have these implemented yet
     //FMT_CONFIG(SIMD_CHAR);
     //FMT_CONFIG(SIMD_SHORT);
-
+    
 #undef FMT_CONFIG
-
+    
     return s;
 }
 
 string vecf::simd_alignments() const
 {
     string s;
-
+    
 #define FMT_CONFIG(val) s += format("%s: %zu %zu\n", #val, sizeof(val), __alignof(val));
-
+    
     // TODO: add other types int, half?
-
+    
 #if SIMD_FLOAT
     FMT_SEP();
-
+    
     FMT_CONFIG(float2);
     FMT_CONFIG(float3);
     FMT_CONFIG(float4);
     FMT_CONFIG(float8);
     //FMT_CONFIG(float16);
-
+    
     FMT_CONFIG(float2x2);
     FMT_CONFIG(float3x3);
     FMT_CONFIG(float3x4);
     FMT_CONFIG(float4x4);
 #endif
-
+    
 #if SIMD_DOUBLE
     FMT_SEP();
-
+    
     FMT_CONFIG(double2);
     FMT_CONFIG(double3);
     FMT_CONFIG(double4);
     // FMT_CONFIG(double8);
-
+    
     FMT_CONFIG(double2x2);
     FMT_CONFIG(double3x3);
     FMT_CONFIG(double3x4);
     FMT_CONFIG(double4x4);
 #endif
-
+    
 #if SIMD_INT
     FMT_SEP();
-
+    
     FMT_CONFIG(int2);
     FMT_CONFIG(int3);
     FMT_CONFIG(int4);
     FMT_CONFIG(int8);
     //FMT_CONFIG(int16);
 #endif
-
+    
 #if SIMD_LONG
     FMT_SEP();
-
+    
     FMT_CONFIG(long2);
     FMT_CONFIG(long3);
     FMT_CONFIG(long4);
     //FMT_CONFIG(long8);
 #endif
-
+    
 #undef FMT_CONFIG
-
+    
     return s;
 }
 
@@ -534,24 +534,24 @@ float4 float4m(half4 vv)
     // https://gcc.gnu.org/onlinedocs/gcc-7.5.0/gcc/Half-Precision.html
     // https://developer.arm.com/documentation/dui0491/i/Using-NEON-Support/Converting-vectors
     __m128i reg16 = _mm_setzero_si128();
-
+    
     // TODO: switch to load low 64-bits, but don't know which one _mm_cvtsi32_si128(&vv.reg); ?
     // want 0 extend here, sse overuses int32_t when really unsigned and zero extended value
     reg16 = _mm_insert_epi16(reg16, vv[0], 0);
     reg16 = _mm_insert_epi16(reg16, vv[1], 1);
     reg16 = _mm_insert_epi16(reg16, vv[2], 2);
     reg16 = _mm_insert_epi16(reg16, vv[3], 3);
-
+    
     return simd::float4(_mm_cvtph_ps(reg16));
 }
 
 half4 half4m(float4 vv)
 {
     __m128i reg16 = _mm_cvtps_ph(*(const __m128*)&vv, 0); // 4xfp32-> 4xfp16,  round to nearest-even
-
+    
     // TODO: switch to store/steam, but don't know which one _mm_storeu_epi16 ?
     half4 val; // = 0;
-
+    
     // 0 extended
     val[0] = (half)_mm_extract_epi16(reg16, 0);
     val[1] = (half)_mm_extract_epi16(reg16, 1);
@@ -563,5 +563,186 @@ half4 half4m(float4 vv)
 #endif // SIMD_SSE
 #endif // SIMD_HALF4_ONLY
 
+
+// Adapted from this code for uint32_t.
+// https://github.com/lemire/FastDifferentialCoding/blob/master/src/fastdelta.c
+// Don't have uchar, ushort, uint, ulong support, but can use signed.
+
+#if SIMD_INT
+void deltaEncodeU32(uint32_t * buffer, size_t length, uint32_t starting_point) {
+    constexpr uint32_t elementCount = 4;
+    constexpr uint32_t elementSize = sizeof(uint32_t);
+    
+    // Could do 2 unaligned loads (second shifted by 1 element) instead of alignr.
+    // but that would lower simd count if multiple of elementCount
+    int4p* b = (int4p*)buffer;
+    int4 prev = starting_point;
+    size_t i = 0;
+    for(; i < length/elementCount; i++) {
+        int4 curr = b[i];
+        int4 delta = curr - _mm_alignr_epi8(curr, prev, (elementCount-1)*elementSize);
+        b[i] = delta;
+        prev = curr;
+    }
+    
+    // 1 to (elementCount-1) indices don't fit above
+    uint32_t lastprev = prev[elementCount-1];
+    for(i = elementCount * i; i < length; ++i) {
+        uint32_t curr = buffer[i];
+        buffer[i] = curr - lastprev;
+        lastprev = curr;
+    }
+}
+
+// write to buffer the successive differences of buffer (buffer[0]-starting_point, buffer[1]-buffer[2], ...)
+// there are "length" values in buffer
+void deltaDecodeU32(uint32_t * buffer, size_t length, uint32_t starting_point) {
+    constexpr uint32_t elementCount = 4;
+    constexpr uint32_t elementSize = sizeof(uint32_t);
+    
+    int4p* b = (int4p*)buffer;
+    int4 prev = starting_point;
+    size_t i = 0;
+    for(; i  < length/elementCount; i++) {
+        int4 curr = b[i];
+        
+        // this is prefix sum
+        int4 _tmp1 = _mm_slli_si128(curr, 2*elementSize) + curr;
+        int4 _tmp2 = _mm_slli_si128(_tmp1, 1*elementSize) + _tmp1;
+        prev = _tmp2 + _mm_shuffle_epi32(prev, _MM_SHUFFLE(3, 3, 3, 3));
+        
+        b[i] = prev;
+    }
+    uint32_t lastprev = prev[elementCount-1];
+    for(i = elementCount * i ; i < length; ++i) {
+        lastprev += buffer[i];
+        buffer[i] = lastprev;
+    }
+}
+#endif
+
+#if SIMD_SHORT
+void deltaEncodeU16(uint16_t * buffer, size_t length, uint16_t starting_point) {
+    constexpr uint32_t elementCount = 8;
+    constexpr uint32_t elementSize = sizeof(uint16_t);
+    
+    // Could do 2 unaligned loads (second shifted by 1 element) instead of alignr.
+    // but that would lower simd count if multiple of elementCount
+    short8p* b = (short8p*)buffer;
+    short8 prev = starting_point;
+    size_t i = 0;
+    for(; i < length/elementCount; i++) {
+        short8 curr = b[i];
+        short8 delta = curr - _mm_alignr_epi8(curr, prev, (elementCount-1)*elementSize);
+        b[i] = delta;
+        prev = curr;
+    }
+    
+    // 1 to (elementCount-1) indices don't fit above
+    uint16_t lastprev = prev[elementCount-1];
+    for(i = elementCount * i; i < length; ++i) {
+        uint16_t curr = buffer[i];
+        buffer[i] = curr - lastprev;
+        lastprev = curr;
+    }
+}
+
+// write to buffer the successive differences of buffer (buffer[0]-starting_point, buffer[1]-buffer[2], ...)
+// there are "length" values in buffer
+void deltaDecodeU16(uint16_t * buffer, size_t length, uint16_t starting_point) {
+    /* decode may be faster as scalar, lots of simd ops to prefix 8 values
+    constexpr uint32_t elementCount = 8;
+    constexpr uint32_t elementSize = sizeof(uint16_t);
+    
+    short8p* b = (short8p*)buffer;
+    short8 prev = starting_point;
+    size_t i = 0;
+    for(; i < length/elementCount; i++) {
+        short8 curr = b[i];
+        
+        // this is prefix sum
+        // TODO: way more values to add (8 total, this is for 4)
+        short8 _tmp1 = _mm_slli_si128(curr, 2*elementSize) + curr;
+        short8 _tmp2 = _mm_slli_si128(_tmp1, 1*elementSize) + _tmp1;
+        prev = _tmp2 + _mm_shuffle_epi32(prev, _MM_SHUFFLE(3, 3, 3, 3));
+        
+        b[i] = prev;
+    }
+    uint16_t lastprev = prev[elementCount-1];
+    for(i = 4 * i ; i < length; ++i) {
+        lastprev = lastprev + buffer[i];
+        buffer[i] = lastprev;
+    }
+    */
+    
+    uint16_t lastprev = starting_point;
+    for(size_t i = 0; i < length; ++i) {
+        lastprev += buffer[i];
+        buffer[i] = lastprev;
+    }
+}
+#endif
+
+#if SIMD_CHAR
+void deltaEncodeU8(uint8_t * buffer, size_t length, uint8_t starting_point) {
+    constexpr uint32_t elementCount = 16;
+    constexpr uint32_t elementSize = sizeof(uint8_t);
+    
+    char16p* b = (char16p*)buffer;
+    char16 prev = starting_point;
+    size_t i = 0;
+    for(; i < length/elementCount; i++) {
+        char16 curr = b[i];
+        char16 delta = curr - _mm_alignr_epi8(curr, prev, (elementCount-1)*elementSize);
+        b[i] = delta;
+        prev = curr;
+    }
+    
+    // 1 to (elementCount-1) indices don't fit above
+    uint8_t lastprev = prev[elementCount-1];
+    for(i = elementCount * i; i < length; ++i) {
+        uint8_t curr = buffer[i];
+        buffer[i] = curr - lastprev;
+        lastprev = curr;
+    }
+}
+
+// write to buffer the successive differences of buffer (buffer[0]-starting_point, buffer[1]-buffer[2], ...)
+// there are "length" values in buffer
+void deltaDecodeU8(uint8_t * buffer, size_t length, uint8_t starting_point) {
+    /* decode may be faster as scalar, lots of simd ops to prefix 16 values
+    constexpr uint32_t elementCount = 16;
+    constexpr uint32_t elementSize = sizeof(uint8_t);
+    
+    // This is a confusing name.  Really char8x16
+    char16p* b = (char16p*)buffer;
+    char16 prev = starting_point;
+    size_t i = 0;
+    for(; i  < length/elementCount; i++) {
+        char16 curr = b[i];
+        
+        // this is prefix sum
+        // TODO: way more values to add (16 total, this is for 4)
+        char16 _tmp1 = _mm_slli_si128(curr, 2*elementSize) + curr;
+        char16 _tmp2 = _mm_slli_si128(_tmp1, 1*elementSize) + _tmp1;
+        prev = _tmp2 + _mm_shuffle_epi32(prev, _MM_SHUFFLE(3, 3, 3, 3));
+        
+        b[i] = prev;
+    }
+    uint8_t lastprev = prev[elementCount-1];
+    for(i = elementCount * i ; i < length; ++i) {
+        lastprev = lastprev + buffer[i];
+        buffer[i] = lastprev;
+    }
+    */
+    uint8_t lastprev = starting_point;
+    for(size_t i = 0; i < length; ++i) {
+        lastprev += buffer[i];
+        buffer[i] = lastprev;
+    }
+}
+#endif
+
+
 } // namespace SIMD_NAMESPACE
 #endif // USE_SIMDLIB
diff --git a/libkram/vectormath/vectormath234.h b/libkram/vectormath/vectormath234.h
index caa200ff..2af8fc31 100644
--- a/libkram/vectormath/vectormath234.h
+++ b/libkram/vectormath/vectormath234.h
@@ -169,16 +169,16 @@
 #ifndef SIMD_CONFIG
 
 // fp comparisons gen a corresponding signed integer type
-#define SIMD_INT 1
-#define SIMD_LONG 1
-
 // apple is signed-char, so make sure to set on -fsigned-char on other platforms
 #define SIMD_CHAR 1
 #define SIMD_SHORT 1
+#define SIMD_INT 1
+#define SIMD_LONG 1
 
 // don't need these yet, but easy to add with macros
 //#define SIMD_UCHAR  0
 //#define SIMD_USHORT 0
+//#define SIMD_UINT   0
 //#define SIMD_ULONG  0
 
 // Vector and matrix types.  Currently only matrix types for SIMD_FLOAT, SIMD_DOUBLE.
@@ -341,12 +341,12 @@
 
 //-----------------------------------
 
-#define macroMatrixOps(type)                                                                         \
-    SIMD_CALL_OP type& operator*=(type& x, const type& y)                                            \
-    {                                                                                                \
-        x = mul(x, y);                                                                               \
-        return x;                                                                                    \
-    }                                                                                                \
+#define macroMatrixOps(type) \
+    SIMD_CALL_OP type& operator*=(type& x, const type& y) \
+    { \
+        x = mul(x, y); \
+        return x; \
+    } \
     SIMD_CALL_OP type& operator+=(type& x, const type& y)                                            \
     {                                                                                                \
         x = add(x, y);                                                                               \
@@ -489,7 +489,7 @@ macroVector1TypesStorageRenames(char, simd_char)
 }
 
 namespace SIMD_NAMESPACE {
-macroVector4TypesStorageRenames(char, char)
+macroVector1TypesStorageRenames(char, char)
 }
 #endif // __cplusplus
 #endif // SIMD_CHAR
@@ -660,6 +660,26 @@ struct vecf {
     string str(int4 v) const;
 #endif
 
+#if SIMD_SHORT
+    // vector
+    //string str(short2 v) const;
+    //string str(short3 v) const;
+    //string str(short4 v) const;
+    //string str(short8 v) const;
+    //string str(short16 v) const;
+#endif
+    
+#if SIMD_CHAR
+    // vector
+    // TODO: up to 32
+    //string str(char2 v) const;
+    //string str(char3 v) const;
+    //string str(char4 v) const;
+    //string str(char8 v) const;
+    //string str(char16 v) const;
+    //string str(char32 v) const;
+#endif
+
     // Just stuffing this here for now
     string simd_configs() const;
     string simd_alignments() const;
@@ -669,4 +689,22 @@ struct vecf {
 
 #endif // __cplusplus
 
+//-------------------------------
+// Compression utils.
+// Fast delta encode/decode for indices.
+#if SIMD_INT
+void deltaEncodeU32(uint32_t * buffer, size_t length, uint32_t starting_point = 0);
+void deltaDecodeU32(uint32_t * buffer, size_t length, uint32_t starting_point = 0);
+#endif
+
+#if SIMD_SHORT
+void deltaEncodeU16(uint16_t * buffer, size_t length, uint16_t starting_point = 0);
+void deltaDecodeU16(uint16_t * buffer, size_t length, uint16_t starting_point = 0);
+#endif
+
+#if SIMD_CHAR
+void deltaEncodeU8(uint8_t * buffer, size_t length, uint8_t starting_point = 0);
+void deltaDecodeU8(uint8_t * buffer, size_t length, uint8_t starting_point = 0);
+#endif
+
 #endif
diff --git a/scripts/simdk.py b/scripts/simdk.py
index abf5a232..37561fde 100755
--- a/scripts/simdk.py
+++ b/scripts/simdk.py
@@ -98,6 +98,24 @@ def __lldb_init_module(debugger, internal_dict):
 
     # simdk library
     
+    # TODO: more packed types
+    debugger.HandleCommand("type summary add -F simdk.float2_summary half2p")
+    debugger.HandleCommand("type summary add -F simdk.float3_summary half3p")
+    debugger.HandleCommand("type summary add -F simdk.float4_summary half4p")
+
+    debugger.HandleCommand("type summary add -F simdk.float2_summary float2p")
+    debugger.HandleCommand("type summary add -F simdk.float3_summary float3p")
+    debugger.HandleCommand("type summary add -F simdk.float4_summary float4p")
+
+    debugger.HandleCommand("type summary add -F simdk.float2_summary double2p")
+    debugger.HandleCommand("type summary add -F simdk.float3_summary double3p")
+    debugger.HandleCommand("type summary add -F simdk.float4_summary double4p")
+
+    # aligned types
+    debugger.HandleCommand("type summary add -F simdk.float2_summary short2a")
+    debugger.HandleCommand("type summary add -F simdk.float3_summary short3a")
+    debugger.HandleCommand("type summary add -F simdk.float4_summary short4a")
+
     debugger.HandleCommand("type summary add -F simdk.float2_summary int2a")
     debugger.HandleCommand("type summary add -F simdk.float3_summary int3a")
     debugger.HandleCommand("type summary add -F simdk.float4_summary int4a")

From 065e2f804d455b8e71b1c4895de06fabe79f2095 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Sat, 24 May 2025 17:11:15 -0700
Subject: [PATCH 898/901] kram - fix FileIO and add to KramLib.h, add
 velemsizeof

---
 libkram/kram/KramConfig.h   |  7 +++++++
 libkram/kram/KramFileIO.cpp | 19 +++++++++++++++----
 libkram/kram/KramFileIO.h   |  8 +++++---
 libkram/kram/KramLib.h      |  1 +
 4 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index fc00f08a..2e85921e 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -255,4 +255,11 @@ inline size_t vsizeof(const vector<T>& v)
 {
     return sizeof(T) * v.size();
 }
+
+template <typename T>
+inline size_t velemsizeof(const vector<T>& v)
+{
+    return sizeof(T);
+}
+    
 } // namespace kram
diff --git a/libkram/kram/KramFileIO.cpp b/libkram/kram/KramFileIO.cpp
index 3a8afa54..fa43fd14 100644
--- a/libkram/kram/KramFileIO.cpp
+++ b/libkram/kram/KramFileIO.cpp
@@ -96,7 +96,10 @@ void FileIO::read(void* data_, int size, int count)
 {
     size_t numberOfBytes = size * count;
     if (isFile() && fp) {
-        fread(data_, 1, numberOfBytes, fp);
+        size_t readBytes = fread(data_, 1, numberOfBytes, fp);
+        if (readBytes != numberOfBytes) {
+            _isFailed = true;
+        }
     }
     else if (isData() && _data) {
         if (dataLocation + numberOfBytes <= dataLength) {
@@ -104,7 +107,7 @@ void FileIO::read(void* data_, int size, int count)
             dataLocation += numberOfBytes;
         }
         else {
-            KASSERT(false);
+            _isFailed = true;
         }
     }
     else if (isMemory() && mem) {
@@ -112,6 +115,9 @@ void FileIO::read(void* data_, int size, int count)
             memcpy(data_, _data + dataLocation, numberOfBytes);
             dataLocation += numberOfBytes;
         }
+        else {
+            _isFailed = true;
+        }
     }
 }
 
@@ -124,7 +130,10 @@ void FileIO::write(const void* data_, int size, int count)
 
     int numberOfBytes = size * count;
     if (isFile() && fp) {
-        fwrite(data_, 1, numberOfBytes, fp);
+        size_t writeBytes = fwrite(data_, 1, numberOfBytes, fp);
+        if (writeBytes != numberOfBytes) {
+            _isFailed = true;
+        }
     }
     else if (isData() && _data) {
         if (dataLocation + numberOfBytes <= dataLength) {
@@ -132,7 +141,7 @@ void FileIO::write(const void* data_, int size, int count)
             dataLocation += numberOfBytes;
         }
         else {
-            KASSERT(false);
+            _isFailed = true;
         }
     }
     else if (isMemory() && mem) {
@@ -143,6 +152,8 @@ void FileIO::write(const void* data_, int size, int count)
             dataLength = totalBytes;
         }
 
+        // TOOD: handle resize failure?
+        
         memcpy(const_cast<uint8_t*>(_data) + dataLocation, data_, numberOfBytes);
         dataLocation += numberOfBytes;
     }
diff --git a/libkram/kram/KramFileIO.h b/libkram/kram/KramFileIO.h
index 3d880d25..be5d4e91 100644
--- a/libkram/kram/KramFileIO.h
+++ b/libkram/kram/KramFileIO.h
@@ -23,7 +23,8 @@ struct FileIO {
 
     bool _isReadOnly = false;
     bool _isResizeable = false;
-
+    bool _isFailed = false;
+    
     // dynamic vector
     vector<uint8_t>* mem = nullptr;
 
@@ -60,7 +61,8 @@ struct FileIO {
     bool isFile() const { return fp != nullptr; }
     bool isData() const { return _data != nullptr; }
     bool isMemory() const { return mem != nullptr; }
-
+    bool isFailed() const { return _isFailed; }
+    
     void writeArray32u(const uint32_t* data, int count) { write(data, sizeof(uint32_t), count); }
     void writeArray16u(const uint16_t* data, int count) { write(data, sizeof(uint16_t), count); }
     void writeArray8u(const uint8_t* data, int count) { write(data, sizeof(uint8_t), count); }
@@ -166,4 +168,4 @@ struct FileIO {
 // to better distinguish mmap/buffer io
 using DataIO = FileIO;
 
-} //namespace ba
+} //namespace kram
diff --git a/libkram/kram/KramLib.h b/libkram/kram/KramLib.h
index 19251c3c..7eb3dc8f 100644
--- a/libkram/kram/KramLib.h
+++ b/libkram/kram/KramLib.h
@@ -13,6 +13,7 @@
 #include "KTXImage.h"
 #include "Kram.h"
 #include "KramFileHelper.h"
+#include "KramFileIO.h"
 #include "KramImage.h"
 #include "KramImageInfo.h"
 #include "KramLog.h"

From ddad6daeeeaf9eab1b3b92cd09aa2431b20c866b Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 5 Jun 2025 18:37:08 -0700
Subject: [PATCH 899/901] kram - skip avx2 test on macOS15+, add span, comment
 out packed simdk.py

The python lldb scripts just fail on float2p/3p/4p, even though they work on float2/3/4.
---
 kramc/KramMain.cpp        | 36 ++++++++++++++++++++++++++++++++++++
 libkram/kram/KramConfig.h | 19 ++++++++++++++++---
 scripts/simdk.py          | 23 ++++++++++++-----------
 3 files changed, 64 insertions(+), 14 deletions(-)

diff --git a/kramc/KramMain.cpp b/kramc/KramMain.cpp
index 8e432ee9..94aae5d9 100644
--- a/kramc/KramMain.cpp
+++ b/kramc/KramMain.cpp
@@ -10,6 +10,32 @@
 
 using namespace STL_NAMESPACE;
 
+#if SIMD_AVX2
+#if KRAM_MAC
+
+inline const char* getMacOSVersion() {
+    static char str[256] = {};
+    if (str[0] == 0) {
+        size_t size = sizeof(str);
+        if (sysctlbyname("kern.osproductversion", str, &size, NULL, 0) == 0) {
+            return str;
+        }
+    }
+    return str;
+}
+
+inline uint32_t getMacOSMajorVersion() {
+    // 15.4
+    static uint32_t majorVersion = 0;
+    if (majorVersion == 0) {
+        sscanf(getMacOSVersion(), "%u", &majorVersion);
+    }
+    return majorVersion;
+}
+
+#endif
+#endif
+
 // TODO: move this into vectormath
 void checkSimdSupport()
 {
@@ -19,6 +45,16 @@ void checkSimdSupport()
     
 #if SIMD_AVX2
 #if KRAM_MAC
+    // Apple added AVX2 support to Rosetta in macOS 15 with no way
+    // to detect it.   Really awesome, so skip the test.  There are
+    // no supporting Intel hw devices on macOS 15 that don't have AVX2.
+    //const char* macOSVersion = getMacOSVersion();
+    // KLOGI("kram", "%s", macOSVersion);
+    uint32_t majorOSVersions = getMacOSMajorVersion();
+    if (majorOSVersions >= 15) {
+        return;
+    }
+    
     bool hasSimdSupport = true;
     
     vector<char> cpuName;
diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h
index 2e85921e..3f16af7e 100644
--- a/libkram/kram/KramConfig.h
+++ b/libkram/kram/KramConfig.h
@@ -156,6 +156,7 @@
 #include <EASTL/map.h>
 #include <EASTL/shared_ptr.h> // includes thread/mutex
 #include <EASTL/sort.h>
+#include <EASTL/span.h>
 #include <EASTL/unique_ptr.h>
 #include <EASTL/unordered_map.h>
 #include <EASTL/vector.h>
@@ -191,6 +192,7 @@
 #include <mutex>
 #include <random>
 #include <string>
+#include <span>
 #include <thread>
 #include <unordered_map>
 #include <vector>
@@ -244,10 +246,10 @@
 // this just strips args
 #define macroUnusedVar(x) (void)x
 
-    //---------------------------------------
+//---------------------------------------
 
-    namespace kram {
-        using namespace STL_NAMESPACE;
+namespace kram {
+using namespace STL_NAMESPACE;
 
 // Use this on vectors
 template <typename T>
@@ -261,5 +263,16 @@ inline size_t velemsizeof(const vector<T>& v)
 {
     return sizeof(T);
 }
+
+// TODO: make sure these don't conflict with std:: versions
+template <typename T>
+inline constexpr const span<T> make_span(const vector<T>& v) {
+    return span<T, dynamic_extent>(const_cast<T*>(v.data()), v.size());
+}
+
+template <typename T>
+inline constexpr span<T> make_span(vector<T>& v) {
+    return span<T, dynamic_extent>(v.data(), v.size());
+}
     
 } // namespace kram
diff --git a/scripts/simdk.py b/scripts/simdk.py
index 37561fde..e194b7ce 100755
--- a/scripts/simdk.py
+++ b/scripts/simdk.py
@@ -99,17 +99,18 @@ def __lldb_init_module(debugger, internal_dict):
     # simdk library
     
     # TODO: more packed types
-    debugger.HandleCommand("type summary add -F simdk.float2_summary half2p")
-    debugger.HandleCommand("type summary add -F simdk.float3_summary half3p")
-    debugger.HandleCommand("type summary add -F simdk.float4_summary half4p")
-
-    debugger.HandleCommand("type summary add -F simdk.float2_summary float2p")
-    debugger.HandleCommand("type summary add -F simdk.float3_summary float3p")
-    debugger.HandleCommand("type summary add -F simdk.float4_summary float4p")
-
-    debugger.HandleCommand("type summary add -F simdk.float2_summary double2p")
-    debugger.HandleCommand("type summary add -F simdk.float3_summary double3p")
-    debugger.HandleCommand("type summary add -F simdk.float4_summary double4p")
+    # These just cause Xcode to report (None, None, None)
+    #debugger.HandleCommand("type summary add -F simdk.float2_summary half2p")
+    #debugger.HandleCommand("type summary add -F simdk.float3_summary half3p")
+    #debugger.HandleCommand("type summary add -F simdk.float4_summary half4p")
+
+    #debugger.HandleCommand("type summary add -F simdk.float2_summary float2p")
+    #debugger.HandleCommand("type summary add -F simdk.float3_summary float3p")
+    #debugger.HandleCommand("type summary add -F simdk.float4_summary float4p")
+
+    #debugger.HandleCommand("type summary add -F simdk.float2_summary double2p")
+    #debugger.HandleCommand("type summary add -F simdk.float3_summary double3p")
+    #debugger.HandleCommand("type summary add -F simdk.float4_summary double4p")
 
     # aligned types
     debugger.HandleCommand("type summary add -F simdk.float2_summary short2a")

From cda4bea7a4acb66702fd6d5e76d4d9553f4e5735 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 5 Jun 2025 20:05:10 -0700
Subject: [PATCH 900/901] kram - provide hint about installing macOS 15 for
 avx2 emulation

macOS build apps that can't be updated to universal or arm64 only run x64 emulated portions (even of universal apps).  So important to drop x64 everywhere.
---
 kramc/KramMain.cpp | 27 +++++++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

diff --git a/kramc/KramMain.cpp b/kramc/KramMain.cpp
index 94aae5d9..01ec267c 100644
--- a/kramc/KramMain.cpp
+++ b/kramc/KramMain.cpp
@@ -10,6 +10,7 @@
 
 using namespace STL_NAMESPACE;
 
+// These aren't avx2 specific, but just don't want unused func warning 
 #if SIMD_AVX2
 #if KRAM_MAC
 
@@ -24,6 +25,21 @@ inline const char* getMacOSVersion() {
     return str;
 }
 
+inline bool isRunningUnderRosetta() {
+    int ret = 0;
+    size_t size = sizeof(ret);
+    if (sysctlbyname("sysctl.proc_translated", &ret, &size, NULL, 0) == -1) {
+        if (errno == ENOENT) {
+            // sysctl doesn't exist - not running under Rosetta
+            return false;
+        }
+        // Other error occurred
+        return false;
+    }
+    return ret > 0;
+}
+
+
 inline uint32_t getMacOSMajorVersion() {
     // 15.4
     static uint32_t majorVersion = 0;
@@ -45,10 +61,10 @@ void checkSimdSupport()
     
 #if SIMD_AVX2
 #if KRAM_MAC
-    // Apple added AVX2 support to Rosetta in macOS 15 with no way
+    // Apple added AVX2 and F16C? support to Rosetta in macOS 15 with no way
     // to detect it.   Really awesome, so skip the test.  There are
     // no supporting Intel hw devices on macOS 15 that don't have AVX2.
-    //const char* macOSVersion = getMacOSVersion();
+    // const char* macOSVersion = getMacOSVersion();
     // KLOGI("kram", "%s", macOSVersion);
     uint32_t majorOSVersions = getMacOSMajorVersion();
     if (majorOSVersions >= 15) {
@@ -147,9 +163,12 @@ void checkSimdSupport()
     }
     
     if (!hasSimdSupport) {
-        KLOGE("Main", "Missing simd support for %s%s%s%son %s",
+        bool isEmulated = isRunningUnderRosetta() && (majorVersion < 15);
+        const char* emulatedHint = isEmulated ? " install macOS 15.0+" : "";
+        
+        KLOGE("Main", "Missing simd support for %s%s%s%son %s%s",
               missingFeatures[0], missingFeatures[1], missingFeatures[2], missingFeatures[3],
-              cpuName.data());
+              cpuName.data(), emulatedHint);
         exit(1);
     }
     

From 802c5aeabef30c37e9ce75f3cc96762cfbab1ed7 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Thu, 5 Jun 2025 20:44:34 -0700
Subject: [PATCH 901/901] kram - fix os version test

---
 kramc/KramMain.cpp | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/kramc/KramMain.cpp b/kramc/KramMain.cpp
index 01ec267c..2bf1f999 100644
--- a/kramc/KramMain.cpp
+++ b/kramc/KramMain.cpp
@@ -10,7 +10,7 @@
 
 using namespace STL_NAMESPACE;
 
-// These aren't avx2 specific, but just don't want unused func warning 
+// These aren't avx2 specific, but just don't want unused func warning
 #if SIMD_AVX2
 #if KRAM_MAC
 
@@ -42,11 +42,11 @@ inline bool isRunningUnderRosetta() {
 
 inline uint32_t getMacOSMajorVersion() {
     // 15.4
-    static uint32_t majorVersion = 0;
-    if (majorVersion == 0) {
-        sscanf(getMacOSVersion(), "%u", &majorVersion);
+    static uint32_t majorOSVersion = 0;
+    if (majorOSVersion == 0) {
+        sscanf(getMacOSVersion(), "%u", &majorOSVersion);
     }
-    return majorVersion;
+    return majorOSVersion;
 }
 
 #endif
@@ -66,8 +66,8 @@ void checkSimdSupport()
     // no supporting Intel hw devices on macOS 15 that don't have AVX2.
     // const char* macOSVersion = getMacOSVersion();
     // KLOGI("kram", "%s", macOSVersion);
-    uint32_t majorOSVersions = getMacOSMajorVersion();
-    if (majorOSVersions >= 15) {
+    uint32_t majorOSVersion = getMacOSMajorVersion();
+    if (majorOSVersion >= 15) {
         return;
     }
     
@@ -163,7 +163,7 @@ void checkSimdSupport()
     }
     
     if (!hasSimdSupport) {
-        bool isEmulated = isRunningUnderRosetta() && (majorVersion < 15);
+        bool isEmulated = isRunningUnderRosetta() && (majorOSVersion < 15);
         const char* emulatedHint = isEmulated ? " install macOS 15.0+" : "";
         
         KLOGE("Main", "Missing simd support for %s%s%s%son %s%s",